1235 files changed, 564076 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
new file mode 100644
index 000000000000..c189a0042928
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -0,0 +1,402 @@
+//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic AliasAnalysis interface which is used as the
+// common interface used by all clients and implementations of alias analysis.
+//
+// This file also implements the default version of the AliasAnalysis interface
+// that is to be used when no other implementation is specified.  This does some
+// simple tests that detect obvious cases: two different global pointers cannot
+// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+// etc.
+//
+// This alias analysis implementation really isn't very good for anything, but
+// it is very fast, and makes a nice clean default implementation.  Because it
+// handles lots of little corner cases, other, more complex, alias analysis
+// implementations may choose to rely on this pass to resolve these simple and
+// easy cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+// Register the AliasAnalysis interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
+char AliasAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Default chaining methods
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::AliasResult
+AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  return AA->alias(LocA, LocB);
+}
+
+bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                           bool OrLocal) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  return AA->pointsToConstantMemory(Loc, OrLocal);
+}
+
+void AliasAnalysis::deleteValue(Value *V) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  AA->deleteValue(V);
+}
+
+void AliasAnalysis::copyValue(Value *From, Value *To) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  AA->copyValue(From, To);
+}
+
+void AliasAnalysis::addEscapingUse(Use &U) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  AA->addEscapingUse(U);
+}
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                             const Location &Loc) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+  ModRefBehavior MRB = getModRefBehavior(CS);
+  if (MRB == DoesNotAccessMemory)
+    return NoModRef;
+
+  ModRefResult Mask = ModRef;
+  if (onlyReadsMemory(MRB))
+    Mask = Ref;
+
+  if (onlyAccessesArgPointees(MRB)) {
+    bool doesAlias = false;
+    if (doesAccessArgPointees(MRB)) {
+      MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+           AI != AE; ++AI) {
+        const Value *Arg = *AI;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CSLoc(Arg, UnknownSize, CSTag);
+        if (!isNoAlias(CSLoc, Loc)) {
+          doesAlias = true;
+          break;
+        }
+      }
+    }
+    if (!doesAlias)
+      return NoModRef;
+  }
+
+  // If Loc is a constant memory location, the call definitely could not
+  // modify the memory location.
+  if ((Mask & Mod) && pointsToConstantMemory(Loc))
+    Mask = ModRefResult(Mask & ~Mod);
+
+  // If this is the end of the chain, don't forward.
+  if (!AA) return Mask;
+
+  // Otherwise, fall back to the next AA in the chain. But we can merge
+  // in any mask we've managed to compute.
+  return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+  // If CS1 or CS2 are readnone, they don't interact.
+  ModRefBehavior CS1B = getModRefBehavior(CS1);
+  if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+  ModRefBehavior CS2B = getModRefBehavior(CS2);
+  if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+  // If they both only read from memory, there is no dependence.
+  if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+    return NoModRef;
+
+  AliasAnalysis::ModRefResult Mask = ModRef;
+
+  // If CS1 only reads memory, the only dependence on CS2 can be
+  // from CS1 reading memory written by CS2.
+  if (onlyReadsMemory(CS1B))
+    Mask = ModRefResult(Mask & Ref);
+
+  // If CS2 only access memory through arguments, accumulate the mod/ref
+  // information from CS1's references to the memory referenced by
+  // CS2's arguments.
+  if (onlyAccessesArgPointees(CS2B)) {
+    AliasAnalysis::ModRefResult R = NoModRef;
+    if (doesAccessArgPointees(CS2B)) {
+      MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+      for (ImmutableCallSite::arg_iterator
+           I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CS2Loc(Arg, UnknownSize, CS2Tag);
+        R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
+        if (R == Mask)
+          break;
+      }
+    }
+    return R;
+  }
+
+  // If CS1 only accesses memory through arguments, check if CS2 references
+  // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+  if (onlyAccessesArgPointees(CS1B)) {
+    AliasAnalysis::ModRefResult R = NoModRef;
+    if (doesAccessArgPointees(CS1B)) {
+      MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
+      for (ImmutableCallSite::arg_iterator
+           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CS1Loc(Arg, UnknownSize, CS1Tag);
+        if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
+          R = Mask;
+          break;
+        }
+      }
+    }
+    if (R == NoModRef)
+      return R;
+  }
+
+  // If this is the end of the chain, don't forward.
+  if (!AA) return Mask;
+
+  // Otherwise, fall back to the next AA in the chain. But we can merge
+  // in any mask we've managed to compute.
+  return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // Call back into the alias analysis with the other form of getModRefBehavior
+  // to see if it can give a better response.
+  if (const Function *F = CS.getCalledFunction())
+    Min = getModRefBehavior(F);
+
+  // If this is the end of the chain, don't forward.
+  if (!AA) return Min;
+
+  // Otherwise, fall back to the next AA in the chain. But we can merge
+  // in any result we've managed to compute.
+  return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(const Function *F) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  return AA->getModRefBehavior(F);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasAnalysis non-virtual helper method implementation
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+  return Location(LI->getPointerOperand(),
+                  getTypeStoreSize(LI->getType()),
+                  LI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+  return Location(SI->getPointerOperand(),
+                  getTypeStoreSize(SI->getValueOperand()->getType()),
+                  SI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
+  return Location(VI->getPointerOperand(),
+                  UnknownSize,
+                  VI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+
+AliasAnalysis::Location 
+AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have TBAA tags. For memcpy, they apply
+  // to both the source and the destination.
+  MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+  return Location(MTI->getRawSource(), Size, TBAATag);
+}
+
+AliasAnalysis::Location 
+AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have TBAA tags. For memcpy, they apply
+  // to both the source and the destination.
+  MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+  
+  return Location(MTI->getRawDest(), Size, TBAATag);
+}
+
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
+  // Be conservative in the face of volatile.
+  if (L->isVolatile())
+    return ModRef;
+
+  // If the load address doesn't alias the given address, it doesn't read
+  // or write the specified memory.
+  if (!alias(getLocation(L), Loc))
+    return NoModRef;
+
+  // Otherwise, a load just reads.
+  return Ref;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
+  // Be conservative in the face of volatile.
+  if (S->isVolatile())
+    return ModRef;
+
+  // If the store address cannot alias the pointer in question, then the
+  // specified memory cannot be modified by the store.
+  if (!alias(getLocation(S), Loc))
+    return NoModRef;
+
+  // If the pointer is a pointer to constant memory, then it could not have been
+  // modified by this store.
+  if (pointsToConstantMemory(Loc))
+    return NoModRef;
+
+  // Otherwise, a store just writes.
+  return Mod;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
+  // If the va_arg address cannot alias the pointer in question, then the
+  // specified memory cannot be accessed by the va_arg.
+  if (!alias(getLocation(V), Loc))
+    return NoModRef;
+
+  // If the pointer is a pointer to constant memory, then it could not have been
+  // modified by this va_arg.
+  if (pointsToConstantMemory(Loc))
+    return NoModRef;
+
+  // Otherwise, a va_arg reads and writes.
+  return ModRef;
+}
+
+// AliasAnalysis destructor: DO NOT move this to the header file for
+// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
+// the AliasAnalysis.o file in the current .a file, causing alias analysis
+// support to not be included in the tool correctly!
+//
+AliasAnalysis::~AliasAnalysis() {}
+
+/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
+/// AliasAnalysis interface before any other methods are called.
+///
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
+  TD = P->getAnalysisIfAvailable<TargetData>();
+  AA = &P->getAnalysis<AliasAnalysis>();
+}
+
+// getAnalysisUsage - All alias analysis implementations should invoke this
+// directly (using AliasAnalysis::getAnalysisUsage(AU)).
+void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();         // All AA's chain
+}
+
+/// getTypeStoreSize - Return the TargetData store size for the given type,
+/// if known, or a conservative value otherwise.
+///
+uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+  return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
+}
+
+/// canBasicBlockModify - Return true if it is possible for execution of the
+/// specified basic block to modify the value pointed to by Ptr.
+///
+bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
+                                        const Location &Loc) {
+  return canInstructionRangeModify(BB.front(), BB.back(), Loc);
+}
+
+/// canInstructionRangeModify - Return true if it is possible for the execution
+/// of the specified instructions to modify the value pointed to by Ptr.  The
+/// instructions to consider are all of the instructions in the range of [I1,I2]
+/// INCLUSIVE.  I1 and I2 must be in the same basic block.
+///
+bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+                                              const Instruction &I2,
+                                              const Location &Loc) {
+  assert(I1.getParent() == I2.getParent() &&
+         "Instructions not in same basic block!");
+  BasicBlock::const_iterator I = &I1;
+  BasicBlock::const_iterator E = &I2;
+  ++E;  // Convert from inclusive to exclusive range.
+
+  for (; I != E; ++I) // Check every instruction in range
+    if (getModRefInfo(I, Loc) & Mod)
+      return true;
+  return false;
+}
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool llvm::isNoAliasCall(const Value *V) {
+  if (isa<CallInst>(V) || isa<InvokeInst>(V))
+    return ImmutableCallSite(cast<Instruction>(V))
+      .paramHasAttr(0, Attribute::NoAlias);
+  return false;
+}
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object.  This returns true for:
+///    Global Variables and Functions (but not Global Aliases)
+///    Allocas and Mallocs
+///    ByVal and NoAlias Arguments
+///    NoAlias returns
+///
+bool llvm::isIdentifiedObject(const Value *V) {
+  if (isa<AllocaInst>(V))
+    return true;
+  if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
+    return true;
+  if (isNoAliasCall(V))
+    return true;
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasNoAliasAttr() || A->hasByValAttr();
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
new file mode 100644
index 000000000000..d947220e078d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -0,0 +1,173 @@
+//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass which can be used to count how many alias queries
+// are being made and how the alias analysis implementation being used responds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
+static cl::opt<bool>
+PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
+
+namespace {
+  class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
+    unsigned No, May, Partial, Must;
+    unsigned NoMR, JustRef, JustMod, MR;
+    Module *M;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    AliasAnalysisCounter() : ModulePass(ID) {
+      initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
+      No = May = Partial = Must = 0;
+      NoMR = JustRef = JustMod = MR = 0;
+    }
+
+    void printLine(const char *Desc, unsigned Val, unsigned Sum) {
+      errs() <<  "  " << Val << " " << Desc << " responses ("
+             << Val*100/Sum << "%)\n";
+    }
+    ~AliasAnalysisCounter() {
+      unsigned AASum = No+May+Partial+Must;
+      unsigned MRSum = NoMR+JustRef+JustMod+MR;
+      if (AASum + MRSum) { // Print a report if any counted queries occurred...
+        errs() << "\n===== Alias Analysis Counter Report =====\n"
+               << "  Analysis counted:\n"
+               << "  " << AASum << " Total Alias Queries Performed\n";
+        if (AASum) {
+          printLine("no alias",     No, AASum);
+          printLine("may alias",   May, AASum);
+          printLine("partial alias", Partial, AASum);
+          printLine("must alias", Must, AASum);
+          errs() << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+                 << May*100/AASum << "%/"
+                 << Partial*100/AASum << "%/"
+                 << Must*100/AASum<<"%\n\n";
+        }
+
+        errs() << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
+        if (MRSum) {
+          printLine("no mod/ref",    NoMR, MRSum);
+          printLine("ref",        JustRef, MRSum);
+          printLine("mod",        JustMod, MRSum);
+          printLine("mod/ref",         MR, MRSum);
+          errs() << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
+                 << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
+                 << "%/" << MR*100/MRSum <<"%\n\n";
+        }
+      }
+    }
+
+    bool runOnModule(Module &M) {
+      this->M = &M;
+      InitializeAliasAnalysis(this);
+      return false;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AliasAnalysis::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+    // FIXME: We could count these too...
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+      return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    // Forwarding functions: just delegate to a real AA implementation, counting
+    // the number of responses...
+    AliasResult alias(const Location &LocA, const Location &LocB);
+
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc);
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) {
+      return AliasAnalysis::getModRefInfo(CS1,CS2);
+    }
+  };
+}
+
+char AliasAnalysisCounter::ID = 0;
+INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
+                   "Count Alias Analysis Query Responses", false, true, false)
+
+ModulePass *llvm::createAliasAnalysisCounterPass() {
+  return new AliasAnalysisCounter();
+}
+
+AliasAnalysis::AliasResult
+AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+  AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
+
+  const char *AliasString;
+  switch (R) {
+  default: llvm_unreachable("Unknown alias type!");
+  case NoAlias:   No++;   AliasString = "No alias"; break;
+  case MayAlias:  May++;  AliasString = "May alias"; break;
+  case PartialAlias: Partial++; AliasString = "Partial alias"; break;
+  case MustAlias: Must++; AliasString = "Must alias"; break;
+  }
+
+  if (PrintAll || (PrintAllFailures && R == MayAlias)) {
+    errs() << AliasString << ":\t";
+    errs() << "[" << LocA.Size << "B] ";
+    WriteAsOperand(errs(), LocA.Ptr, true, M);
+    errs() << ", ";
+    errs() << "[" << LocB.Size << "B] ";
+    WriteAsOperand(errs(), LocB.Ptr, true, M);
+    errs() << "\n";
+  }
+
+  return R;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
+                                    const Location &Loc) {
+  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
+
+  const char *MRString;
+  switch (R) {
+  default:       llvm_unreachable("Unknown mod/ref type!");
+  case NoModRef: NoMR++;     MRString = "NoModRef"; break;
+  case Ref:      JustRef++;  MRString = "JustRef"; break;
+  case Mod:      JustMod++;  MRString = "JustMod"; break;
+  case ModRef:   MR++;       MRString = "ModRef"; break;
+  }
+
+  if (PrintAll || (PrintAllFailures && R == ModRef)) {
+    errs() << MRString << ":  Ptr: ";
+    errs() << "[" << Loc.Size << "B] ";
+    WriteAsOperand(errs(), Loc.Ptr, true, M);
+    errs() << "\t<->" << *CS.getInstruction() << '\n';
+  }
+  return R;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
new file mode 100644
index 000000000000..1afc1b71d93e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -0,0 +1,304 @@
+//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple N^2 alias analysis accuracy evaluator.
+// Basically, for each function in the program, it simply queries to see how the
+// alias analysis implementation answers alias queries between each pair of
+// pointers in the function.
+//
+// This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+// Spadini, and Wojciech Stryjewski.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
+static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+
+namespace {
+  class AAEval : public FunctionPass {
+    unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
+    unsigned NoModRef, Mod, Ref, ModRef;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    AAEval() : FunctionPass(ID) {
+      initializeAAEvalPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    bool doInitialization(Module &M) {
+      NoAlias = MayAlias = PartialAlias = MustAlias = 0;
+      NoModRef = Mod = Ref = ModRef = 0;
+
+      if (PrintAll) {
+        PrintNoAlias = PrintMayAlias = true;
+        PrintPartialAlias = PrintMustAlias = true;
+        PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
+      }
+      return false;
+    }
+
+    bool runOnFunction(Function &F);
+    bool doFinalization(Module &M);
+  };
+}
+
+char AAEval::ID = 0;
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
+
+FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
+
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+                         const Value *V2, const Module *M) {
+  if (P) {
+    std::string o1, o2;
+    {
+      raw_string_ostream os1(o1), os2(o2);
+      WriteAsOperand(os1, V1, true, M);
+      WriteAsOperand(os2, V2, true, M);
+    }
+    
+    if (o2 < o1)
+      std::swap(o1, o2);
+    errs() << "  " << Msg << ":\t"
+           << o1 << ", "
+           << o2 << "\n";
+  }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
+                   Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ":  Ptr: ";
+    WriteAsOperand(errs(), Ptr, true, M);
+    errs() << "\t<->" << *I << '\n';
+  }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+                   Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ": " << *CSA.getInstruction()
+           << " <-> " << *CSB.getInstruction() << '\n';
+  }
+}
+
+static inline bool isInterestingPointer(Value *V) {
+  return V->getType()->isPointerTy()
+      && !isa<ConstantPointerNull>(V);
+}
+
+bool AAEval::runOnFunction(Function &F) {
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  SetVector<Value *> Pointers;
+  SetVector<CallSite> CallSites;
+
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+    if (I->getType()->isPointerTy())    // Add all pointer arguments.
+      Pointers.insert(I);
+
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    if (I->getType()->isPointerTy()) // Add all pointer instructions.
+      Pointers.insert(&*I);
+    Instruction &Inst = *I;
+    if (CallSite CS = cast<Value>(&Inst)) {
+      Value *Callee = CS.getCalledValue();
+      // Skip actual functions for direct function calls.
+      if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+        Pointers.insert(Callee);
+      // Consider formals.
+      for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+           AI != AE; ++AI)
+        if (isInterestingPointer(*AI))
+          Pointers.insert(*AI);
+      CallSites.insert(CS);
+    } else {
+      // Consider all operands.
+      for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+           OI != OE; ++OI)
+        if (isInterestingPointer(*OI))
+          Pointers.insert(*OI);
+    }
+  }
+
+  if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
+      PrintNoModRef || PrintMod || PrintRef || PrintModRef)
+    errs() << "Function: " << F.getName() << ": " << Pointers.size()
+           << " pointers, " << CallSites.size() << " call sites\n";
+
+  // iterate over the worklist, and run the full (n^2)/2 disambiguations
+  for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+       I1 != E; ++I1) {
+    uint64_t I1Size = AliasAnalysis::UnknownSize;
+    const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+    if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+
+    for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+      uint64_t I2Size = AliasAnalysis::UnknownSize;
+      const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+      if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+
+      switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+      case AliasAnalysis::NoAlias:
+        PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+        ++NoAlias; break;
+      case AliasAnalysis::MayAlias:
+        PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+        ++MayAlias; break;
+      case AliasAnalysis::PartialAlias:
+        PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                     F.getParent());
+        ++PartialAlias; break;
+      case AliasAnalysis::MustAlias:
+        PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+        ++MustAlias; break;
+      default:
+        errs() << "Unknown alias query result!\n";
+      }
+    }
+  }
+
+  // Mod/ref alias analysis: compare all pairs of calls and values
+  for (SetVector<CallSite>::iterator C = CallSites.begin(),
+         Ce = CallSites.end(); C != Ce; ++C) {
+    Instruction *I = C->getInstruction();
+
+    for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+         V != Ve; ++V) {
+      uint64_t Size = AliasAnalysis::UnknownSize;
+      const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+      if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+
+      switch (AA.getModRefInfo(*C, *V, Size)) {
+      case AliasAnalysis::NoModRef:
+        PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
+        ++NoModRef; break;
+      case AliasAnalysis::Mod:
+        PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
+        ++Mod; break;
+      case AliasAnalysis::Ref:
+        PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
+        ++Ref; break;
+      case AliasAnalysis::ModRef:
+        PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
+        ++ModRef; break;
+      default:
+        errs() << "Unknown alias query result!\n";
+      }
+    }
+  }
+
+  // Mod/ref alias analysis: compare all pairs of calls
+  for (SetVector<CallSite>::iterator C = CallSites.begin(),
+         Ce = CallSites.end(); C != Ce; ++C) {
+    for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+      if (D == C)
+        continue;
+      switch (AA.getModRefInfo(*C, *D)) {
+      case AliasAnalysis::NoModRef:
+        PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+        ++NoModRef; break;
+      case AliasAnalysis::Mod:
+        PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+        ++Mod; break;
+      case AliasAnalysis::Ref:
+        PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+        ++Ref; break;
+      case AliasAnalysis::ModRef:
+        PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+        ++ModRef; break;
+      }
+    }
+  }
+
+  return false;
+}
+
+static void PrintPercent(unsigned Num, unsigned Sum) {
+  errs() << "(" << Num*100ULL/Sum << "."
+         << ((Num*1000ULL/Sum) % 10) << "%)\n";
+}
+
+bool AAEval::doFinalization(Module &M) {
+  unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
+  errs() << "===== Alias Analysis Evaluator Report =====\n";
+  if (AliasSum == 0) {
+    errs() << "  Alias Analysis Evaluator Summary: No pointers!\n";
+  } else {
+    errs() << "  " << AliasSum << " Total Alias Queries Performed\n";
+    errs() << "  " << NoAlias << " no alias responses ";
+    PrintPercent(NoAlias, AliasSum);
+    errs() << "  " << MayAlias << " may alias responses ";
+    PrintPercent(MayAlias, AliasSum);
+    errs() << "  " << PartialAlias << " partial alias responses ";
+    PrintPercent(PartialAlias, AliasSum);
+    errs() << "  " << MustAlias << " must alias responses ";
+    PrintPercent(MustAlias, AliasSum);
+    errs() << "  Alias Analysis Evaluator Pointer Alias Summary: "
+           << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
+           << PartialAlias*100/AliasSum << "%/"
+           << MustAlias*100/AliasSum << "%\n";
+  }
+
+  // Display the summary for mod/ref analysis
+  unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
+  if (ModRefSum == 0) {
+    errs() << "  Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+  } else {
+    errs() << "  " << ModRefSum << " Total ModRef Queries Performed\n";
+    errs() << "  " << NoModRef << " no mod/ref responses ";
+    PrintPercent(NoModRef, ModRefSum);
+    errs() << "  " << Mod << " mod responses ";
+    PrintPercent(Mod, ModRefSum);
+    errs() << "  " << Ref << " ref responses ";
+    PrintPercent(Ref, ModRefSum);
+    errs() << "  " << ModRef << " mod & ref responses ";
+    PrintPercent(ModRef, ModRefSum);
+    errs() << "  Alias Analysis Evaluator Mod/Ref Summary: "
+           << NoModRef*100/ModRefSum  << "%/" << Mod*100/ModRefSum << "%/"
+           << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
new file mode 100644
index 000000000000..f15c05153e10
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -0,0 +1,138 @@
+//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass checks alias analysis users to ensure that if they
+// create a new value, they do not query AA without informing it of the value.
+// It acts as a shim over any other AA pass you want.
+//
+// Yes keeping track of every value in the program is expensive, but this is 
+// a debugging pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+  
+  class AliasDebugger : public ModulePass, public AliasAnalysis {
+
+    //What we do is simple.  Keep track of every value the AA could
+    //know about, and verify that queries are one of those.
+    //A query to a value that didn't exist when the AA was created
+    //means someone forgot to update the AA when creating new values
+
+    std::set<const Value*> Vals;
+    
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    AliasDebugger() : ModulePass(ID) {
+      initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) {
+      InitializeAliasAnalysis(this);                 // set up super class
+
+      for(Module::global_iterator I = M.global_begin(),
+            E = M.global_end(); I != E; ++I) {
+        Vals.insert(&*I);
+        for (User::const_op_iterator OI = I->op_begin(),
+             OE = I->op_end(); OI != OE; ++OI)
+          Vals.insert(*OI);
+      }
+
+      for(Module::iterator I = M.begin(),
+            E = M.end(); I != E; ++I){
+        Vals.insert(&*I);
+        if(!I->isDeclaration()) {
+          for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+               AI != AE; ++AI) 
+            Vals.insert(&*AI);     
+          for (Function::const_iterator FI = I->begin(), FE = I->end();
+               FI != FE; ++FI) 
+            for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+                 BI != BE; ++BI) {
+              Vals.insert(&*BI);
+              for (User::const_op_iterator OI = BI->op_begin(),
+                   OE = BI->op_end(); OI != OE; ++OI)
+                Vals.insert(*OI);
+            }
+        }
+        
+      }
+      return false;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AliasAnalysis::getAnalysisUsage(AU);
+      AU.setPreservesAll();                         // Does not transform code
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+    //------------------------------------------------
+    // Implement the AliasAnalysis API
+    //
+    AliasResult alias(const Location &LocA, const Location &LocB) {
+      assert(Vals.find(LocA.Ptr) != Vals.end() &&
+             "Never seen value in AA before");
+      assert(Vals.find(LocB.Ptr) != Vals.end() &&
+             "Never seen value in AA before");
+      return AliasAnalysis::alias(LocA, LocB);
+    }
+
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc) {
+      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+      return AliasAnalysis::getModRefInfo(CS, Loc);
+    }
+
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) {
+      return AliasAnalysis::getModRefInfo(CS1,CS2);
+    }
+    
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    virtual void deleteValue(Value *V) {
+      assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
+      AliasAnalysis::deleteValue(V);
+    }
+    virtual void copyValue(Value *From, Value *To) {
+      Vals.insert(To);
+      AliasAnalysis::copyValue(From, To);
+    }
+
+  };
+}
+
+char AliasDebugger::ID = 0;
+INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
+                   "AA use debugger", false, true, false)
+
+Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
+
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
new file mode 100644
index 000000000000..2ed694941212
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -0,0 +1,652 @@
+//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AliasSetTracker and AliasSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// mergeSetIn - Merge the specified alias set into this alias set.
+///
+void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
+  assert(!AS.Forward && "Alias set is already forwarding!");
+  assert(!Forward && "This set is a forwarding set!!");
+
+  // Update the alias and access types of this set...
+  AccessTy |= AS.AccessTy;
+  AliasTy  |= AS.AliasTy;
+  Volatile |= AS.Volatile;
+
+  if (AliasTy == MustAlias) {
+    // Check that these two merged sets really are must aliases.  Since both
+    // used to be must-alias sets, we can just check any pointer from each set
+    // for aliasing.
+    AliasAnalysis &AA = AST.getAliasAnalysis();
+    PointerRec *L = getSomePointer();
+    PointerRec *R = AS.getSomePointer();
+
+    // If the pointers are not a must-alias pair, this set becomes a may alias.
+    if (AA.alias(AliasAnalysis::Location(L->getValue(),
+                                         L->getSize(),
+                                         L->getTBAAInfo()),
+                 AliasAnalysis::Location(R->getValue(),
+                                         R->getSize(),
+                                         R->getTBAAInfo()))
+        != AliasAnalysis::MustAlias)
+      AliasTy = MayAlias;
+  }
+
+  if (CallSites.empty()) {            // Merge call sites...
+    if (!AS.CallSites.empty())
+      std::swap(CallSites, AS.CallSites);
+  } else if (!AS.CallSites.empty()) {
+    CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end());
+    AS.CallSites.clear();
+  }
+
+  AS.Forward = this;  // Forward across AS now...
+  addRef();           // AS is now pointing to us...
+
+  // Merge the list of constituent pointers...
+  if (AS.PtrList) {
+    *PtrListEnd = AS.PtrList;
+    AS.PtrList->setPrevInList(PtrListEnd);
+    PtrListEnd = AS.PtrListEnd;
+
+    AS.PtrList = 0;
+    AS.PtrListEnd = &AS.PtrList;
+    assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+  }
+}
+
+void AliasSetTracker::removeAliasSet(AliasSet *AS) {
+  if (AliasSet *Fwd = AS->Forward) {
+    Fwd->dropRef(*this);
+    AS->Forward = 0;
+  }
+  AliasSets.erase(AS);
+}
+
+void AliasSet::removeFromTracker(AliasSetTracker &AST) {
+  assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!");
+  AST.removeAliasSet(this);
+}
+
+void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
+                          uint64_t Size, const MDNode *TBAAInfo,
+                          bool KnownMustAlias) {
+  assert(!Entry.hasAliasSet() && "Entry already in set!");
+
+  // Check to see if we have to downgrade to _may_ alias.
+  if (isMustAlias() && !KnownMustAlias)
+    if (PointerRec *P = getSomePointer()) {
+      AliasAnalysis &AA = AST.getAliasAnalysis();
+      AliasAnalysis::AliasResult Result =
+        AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
+                                         P->getTBAAInfo()),
+                 AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+      if (Result != AliasAnalysis::MustAlias)
+        AliasTy = MayAlias;
+      else                  // First entry of must alias must have maximum size!
+        P->updateSizeAndTBAAInfo(Size, TBAAInfo);
+      assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
+    }
+
+  Entry.setAliasSet(this);
+  Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+
+  // Add it to the end of the list...
+  assert(*PtrListEnd == 0 && "End of list is not null?");
+  *PtrListEnd = &Entry;
+  PtrListEnd = Entry.setPrevInList(PtrListEnd);
+  assert(*PtrListEnd == 0 && "End of list is not null?");
+  addRef();               // Entry points to alias set.
+}
+
+void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
+  CallSites.push_back(CS.getInstruction());
+
+  AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
+  if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+    return;
+  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+    AliasTy = MayAlias;
+    AccessTy |= Refs;
+    return;
+  }
+
+  // FIXME: This should use mod/ref information to make this not suck so bad
+  AliasTy = MayAlias;
+  AccessTy = ModRef;
+}
+
+/// aliasesPointer - Return true if the specified pointer "may" (or must)
+/// alias one of the members in the set.
+///
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+                              const MDNode *TBAAInfo,
+                              AliasAnalysis &AA) const {
+  if (AliasTy == MustAlias) {
+    assert(CallSites.empty() && "Illegal must alias set!");
+
+    // If this is a set of MustAliases, only check to see if the pointer aliases
+    // SOME value in the set.
+    PointerRec *SomePtr = getSomePointer();
+    assert(SomePtr && "Empty must-alias set??");
+    return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
+                                            SomePtr->getSize(),
+                                            SomePtr->getTBAAInfo()),
+                    AliasAnalysis::Location(Ptr, Size, TBAAInfo));
+  }
+
+  // If this is a may-alias set, we have to check all of the pointers in the set
+  // to be sure it doesn't alias the set...
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+                 AliasAnalysis::Location(I.getPointer(), I.getSize(),
+                                         I.getTBAAInfo())))
+      return true;
+
+  // Check the call sites list and invoke list...
+  if (!CallSites.empty()) {
+    for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
+      if (AA.getModRefInfo(CallSites[i],
+                           AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+            AliasAnalysis::NoModRef)
+        return true;
+  }
+
+  return false;
+}
+
+bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
+  if (AA.doesNotAccessMemory(CS))
+    return false;
+
+  for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+    if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
+        AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
+      return true;
+  }
+
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
+           AliasAnalysis::NoModRef)
+      return true;
+
+  return false;
+}
+
+void AliasSetTracker::clear() {
+  // Delete all the PointerRec entries.
+  for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+       I != E; ++I)
+    I->second->eraseFromList();
+  
+  PointerMap.clear();
+  
+  // The alias sets should all be clear now.
+  AliasSets.clear();
+}
+
+
+/// findAliasSetForPointer - Given a pointer, find the one alias set to put the
+/// instruction referring to the pointer into.  If there are multiple alias sets
+/// that may alias the pointer, merge them together and return the unified set.
+///
+AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
+                                                  uint64_t Size,
+                                                  const MDNode *TBAAInfo) {
+  AliasSet *FoundSet = 0;
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+    
+    if (FoundSet == 0) {  // If this is the first alias set ptr can go into.
+      FoundSet = I;       // Remember it.
+    } else {              // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
+    }
+  }
+
+  return FoundSet;
+}
+
+/// containsPointer - Return true if the specified location is represented by
+/// this alias set, false otherwise.  This does not modify the AST object or
+/// alias sets.
+bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+                                      const MDNode *TBAAInfo) const {
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
+      return true;
+  return false;
+}
+
+
+
+AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
+  AliasSet *FoundSet = 0;
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    if (I->Forward || !I->aliasesCallSite(CS, AA))
+      continue;
+    
+    if (FoundSet == 0)        // If this is the first alias set ptr can go into.
+      FoundSet = I;           // Remember it.
+    else if (!I->Forward)     // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
+  }
+  return FoundSet;
+}
+
+
+
+
+/// getAliasSetForPointer - Return the alias set that the specified pointer
+/// lives in.
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+                                                 const MDNode *TBAAInfo,
+                                                 bool *New) {
+  AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+
+  // Check to see if the pointer is already known.
+  if (Entry.hasAliasSet()) {
+    Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+    // Return the set!
+    return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
+  }
+  
+  if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
+    // Add it to the alias set it aliases.
+    AS->addPointer(*this, Entry, Size, TBAAInfo);
+    return *AS;
+  }
+  
+  if (New) *New = true;
+  // Otherwise create a new alias set to hold the loaded pointer.
+  AliasSets.push_back(new AliasSet());
+  AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
+  return AliasSets.back();
+}
+
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+  bool NewPtr;
+  addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
+  return NewPtr;
+}
+
+
+bool AliasSetTracker::add(LoadInst *LI) {
+  bool NewPtr;
+  AliasSet &AS = addPointer(LI->getOperand(0),
+                            AA.getTypeStoreSize(LI->getType()),
+                            LI->getMetadata(LLVMContext::MD_tbaa),
+                            AliasSet::Refs, NewPtr);
+  if (LI->isVolatile()) AS.setVolatile();
+  return NewPtr;
+}
+
+bool AliasSetTracker::add(StoreInst *SI) {
+  bool NewPtr;
+  Value *Val = SI->getOperand(0);
+  AliasSet &AS = addPointer(SI->getOperand(1),
+                            AA.getTypeStoreSize(Val->getType()),
+                            SI->getMetadata(LLVMContext::MD_tbaa),
+                            AliasSet::Mods, NewPtr);
+  if (SI->isVolatile()) AS.setVolatile();
+  return NewPtr;
+}
+
+bool AliasSetTracker::add(VAArgInst *VAAI) {
+  bool NewPtr;
+  addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, 
+             VAAI->getMetadata(LLVMContext::MD_tbaa),
+             AliasSet::ModRef, NewPtr);
+  return NewPtr;
+}
+
+
+bool AliasSetTracker::add(CallSite CS) {
+  if (isa<DbgInfoIntrinsic>(CS.getInstruction())) 
+    return true; // Ignore DbgInfo Intrinsics.
+  if (AA.doesNotAccessMemory(CS))
+    return true; // doesn't alias anything
+
+  AliasSet *AS = findAliasSetForCallSite(CS);
+  if (AS) {
+    AS->addCallSite(CS, AA);
+    return false;
+  }
+  AliasSets.push_back(new AliasSet());
+  AS = &AliasSets.back();
+  AS->addCallSite(CS, AA);
+  return true;
+}
+
+bool AliasSetTracker::add(Instruction *I) {
+  // Dispatch to one of the other add methods.
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return add(LI);
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return add(SI);
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return add(CI);
+  if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+    return add(II);
+  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+    return add(VAAI);
+  return true;
+}
+
+void AliasSetTracker::add(BasicBlock &BB) {
+  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+    add(I);
+}
+
+void AliasSetTracker::add(const AliasSetTracker &AST) {
+  assert(&AA == &AST.AA &&
+         "Merging AliasSetTracker objects with different Alias Analyses!");
+
+  // Loop over all of the alias sets in AST, adding the pointers contained
+  // therein into the current alias sets.  This can cause alias sets to be
+  // merged together in the current AST.
+  for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+    if (I->Forward) continue;   // Ignore forwarding alias sets
+    
+    AliasSet &AS = const_cast<AliasSet&>(*I);
+
+    // If there are any call sites in the alias set, add them to this AST.
+    for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
+      add(AS.CallSites[i]);
+
+    // Loop over all of the pointers in this alias set.
+    bool X;
+    for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+      AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+                                   ASI.getTBAAInfo(),
+                                   (AliasSet::AccessType)AS.AccessTy, X);
+      if (AS.isVolatile()) NewAS.setVolatile();
+    }
+  }
+}
+
+/// remove - Remove the specified (potentially non-empty) alias set from the
+/// tracker.
+void AliasSetTracker::remove(AliasSet &AS) {
+  // Drop all call sites.
+  AS.CallSites.clear();
+  
+  // Clear the alias set.
+  unsigned NumRefs = 0;
+  while (!AS.empty()) {
+    AliasSet::PointerRec *P = AS.PtrList;
+
+    Value *ValToRemove = P->getValue();
+    
+    // Unlink and delete entry from the list of values.
+    P->eraseFromList();
+    
+    // Remember how many references need to be dropped.
+    ++NumRefs;
+
+    // Finally, remove the entry.
+    PointerMap.erase(ValToRemove);
+  }
+  
+  // Stop using the alias set, removing it.
+  AS.RefCount -= NumRefs;
+  if (AS.RefCount == 0)
+    AS.removeFromTracker(*this);
+}
+
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+  AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(LoadInst *LI) {
+  uint64_t Size = AA.getTypeStoreSize(LI->getType());
+  const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
+  AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(StoreInst *SI) {
+  uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+  const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
+  AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(VAArgInst *VAAI) {
+  AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+                                        AliasAnalysis::UnknownSize,
+                                        VAAI->getMetadata(LLVMContext::MD_tbaa));
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(CallSite CS) {
+  if (AA.doesNotAccessMemory(CS))
+    return false; // doesn't alias anything
+
+  AliasSet *AS = findAliasSetForCallSite(CS);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(Instruction *I) {
+  // Dispatch to one of the other remove methods...
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return remove(LI);
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return remove(SI);
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return remove(CI);
+  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+    return remove(VAAI);
+  return true;
+}
+
+
+// deleteValue method - This method is used to remove a pointer value from the
+// AliasSetTracker entirely.  It should be used when an instruction is deleted
+// from the program to update the AST.  If you don't use this, you would have
+// dangling pointers to deleted instructions.
+//
+void AliasSetTracker::deleteValue(Value *PtrVal) {
+  // Notify the alias analysis implementation that this value is gone.
+  AA.deleteValue(PtrVal);
+
+  // If this is a call instruction, remove the callsite from the appropriate
+  // AliasSet (if present).
+  if (CallSite CS = PtrVal) {
+    if (!AA.doesNotAccessMemory(CS)) {
+      // Scan all the alias sets to see if this call site is contained.
+      for (iterator I = begin(), E = end(); I != E; ++I) {
+        if (I->Forward) continue;
+        
+        I->removeCallSite(CS);
+      }
+    }
+  }
+
+  // First, look up the PointerRec for this pointer.
+  PointerMapType::iterator I = PointerMap.find(PtrVal);
+  if (I == PointerMap.end()) return;  // Noop
+
+  // If we found one, remove the pointer from the alias set it is in.
+  AliasSet::PointerRec *PtrValEnt = I->second;
+  AliasSet *AS = PtrValEnt->getAliasSet(*this);
+
+  // Unlink and delete from the list of values.
+  PtrValEnt->eraseFromList();
+  
+  // Stop using the alias set.
+  AS->dropRef(*this);
+  
+  PointerMap.erase(I);
+}
+
+// copyValue - This method should be used whenever a preexisting value in the
+// program is copied or cloned, introducing a new value.  Note that it is ok for
+// clients that use this method to introduce the same value multiple times: if
+// the tracker already knows about a value, it will ignore the request.
+//
+void AliasSetTracker::copyValue(Value *From, Value *To) {
+  // Notify the alias analysis implementation that this value is copied.
+  AA.copyValue(From, To);
+
+  // First, look up the PointerRec for this pointer.
+  PointerMapType::iterator I = PointerMap.find(From);
+  if (I == PointerMap.end())
+    return;  // Noop
+  assert(I->second->hasAliasSet() && "Dead entry?");
+
+  AliasSet::PointerRec &Entry = getEntryFor(To);
+  if (Entry.hasAliasSet()) return;    // Already in the tracker!
+
+  // Add it to the alias set it aliases...
+  I = PointerMap.find(From);
+  AliasSet *AS = I->second->getAliasSet(*this);
+  AS->addPointer(*this, Entry, I->second->getSize(),
+                 I->second->getTBAAInfo(),
+                 true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//               AliasSet/AliasSetTracker Printing Support
+//===----------------------------------------------------------------------===//
+
+void AliasSet::print(raw_ostream &OS) const {
+  OS << "  AliasSet[" << (void*)this << ", " << RefCount << "] ";
+  OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
+  switch (AccessTy) {
+  case NoModRef: OS << "No access "; break;
+  case Refs    : OS << "Ref       "; break;
+  case Mods    : OS << "Mod       "; break;
+  case ModRef  : OS << "Mod/Ref   "; break;
+  default: llvm_unreachable("Bad value for AccessTy!");
+  }
+  if (isVolatile()) OS << "[volatile] ";
+  if (Forward)
+    OS << " forwarding to " << (void*)Forward;
+
+
+  if (!empty()) {
+    OS << "Pointers: ";
+    for (iterator I = begin(), E = end(); I != E; ++I) {
+      if (I != begin()) OS << ", ";
+      WriteAsOperand(OS << "(", I.getPointer());
+      OS << ", " << I.getSize() << ")";
+    }
+  }
+  if (!CallSites.empty()) {
+    OS << "\n    " << CallSites.size() << " Call Sites: ";
+    for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+      if (i) OS << ", ";
+      WriteAsOperand(OS, CallSites[i]);
+    }
+  }
+  OS << "\n";
+}
+
+void AliasSetTracker::print(raw_ostream &OS) const {
+  OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
+     << PointerMap.size() << " pointer values.\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    I->print(OS);
+  OS << "\n";
+}
+
+void AliasSet::dump() const { print(dbgs()); }
+void AliasSetTracker::dump() const { print(dbgs()); }
+
+//===----------------------------------------------------------------------===//
+//                     ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+  assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+  AST->deleteValue(getValPtr());
+  // this now dangles!
+}
+
+void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) {
+  AST->copyValue(getValPtr(), V);
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+  : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+  return *this = ASTCallbackVH(V, AST);
+}
+
+//===----------------------------------------------------------------------===//
+//                            AliasSetPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+  class AliasSetPrinter : public FunctionPass {
+    AliasSetTracker *Tracker;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    AliasSetPrinter() : FunctionPass(ID) {
+      initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<AliasAnalysis>();
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+
+      for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+        Tracker->add(&*I);
+      Tracker->print(errs());
+      delete Tracker;
+      return false;
+    }
+  };
+}
+
+char AliasSetPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
new file mode 100644
index 000000000000..71e0a832696c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -0,0 +1,104 @@
+//===-- Analysis.cpp ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/Verifier.h"
+#include <cstring>
+
+using namespace llvm;
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+  initializeAliasAnalysisAnalysisGroup(Registry);
+  initializeAliasAnalysisCounterPass(Registry);
+  initializeAAEvalPass(Registry);
+  initializeAliasDebuggerPass(Registry);
+  initializeAliasSetPrinterPass(Registry);
+  initializeNoAAPass(Registry);
+  initializeBasicAliasAnalysisPass(Registry);
+  initializeBlockFrequencyPass(Registry);
+  initializeBranchProbabilityInfoPass(Registry);
+  initializeCFGViewerPass(Registry);
+  initializeCFGPrinterPass(Registry);
+  initializeCFGOnlyViewerPass(Registry);
+  initializeCFGOnlyPrinterPass(Registry);
+  initializePrintDbgInfoPass(Registry);
+  initializeDominanceFrontierPass(Registry);
+  initializeDomViewerPass(Registry);
+  initializeDomPrinterPass(Registry);
+  initializeDomOnlyViewerPass(Registry);
+  initializePostDomViewerPass(Registry);
+  initializeDomOnlyPrinterPass(Registry);
+  initializePostDomPrinterPass(Registry);
+  initializePostDomOnlyViewerPass(Registry);
+  initializePostDomOnlyPrinterPass(Registry);
+  initializeIVUsersPass(Registry);
+  initializeInstCountPass(Registry);
+  initializeIntervalPartitionPass(Registry);
+  initializeLazyValueInfoPass(Registry);
+  initializeLibCallAliasAnalysisPass(Registry);
+  initializeLintPass(Registry);
+  initializeLoopDependenceAnalysisPass(Registry);
+  initializeLoopInfoPass(Registry);
+  initializeMemDepPrinterPass(Registry);
+  initializeMemoryDependenceAnalysisPass(Registry);
+  initializeModuleDebugInfoPrinterPass(Registry);
+  initializePostDominatorTreePass(Registry);
+  initializeProfileEstimatorPassPass(Registry);
+  initializeNoProfileInfoPass(Registry);
+  initializeNoPathProfileInfoPass(Registry);
+  initializeProfileInfoAnalysisGroup(Registry);
+  initializePathProfileInfoAnalysisGroup(Registry);
+  initializeLoaderPassPass(Registry);
+  initializePathProfileLoaderPassPass(Registry);
+  initializeProfileVerifierPassPass(Registry);
+  initializePathProfileVerifierPass(Registry);
+  initializeRegionInfoPass(Registry);
+  initializeRegionViewerPass(Registry);
+  initializeRegionPrinterPass(Registry);
+  initializeRegionOnlyViewerPass(Registry);
+  initializeRegionOnlyPrinterPass(Registry);
+  initializeScalarEvolutionPass(Registry);
+  initializeScalarEvolutionAliasAnalysisPass(Registry);
+  initializeTypeBasedAliasAnalysisPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+  initializeAnalysis(*unwrap(R));
+}
+
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+                          char **OutMessages) {
+  std::string Messages;
+
+  LLVMBool Result = verifyModule(*unwrap(M),
+                            static_cast<VerifierFailureAction>(Action),
+                            OutMessages? &Messages : 0);
+
+  if (OutMessages)
+    *OutMessages = strdup(Messages.c_str());
+
+  return Result;
+}
+
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+  return verifyFunction(*unwrap<Function>(Fn),
+                        static_cast<VerifierFailureAction>(Action));
+}
+
+void LLVMViewFunctionCFG(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  F->viewCFG();
+}
+
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  F->viewCFGOnly();
+}
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
new file mode 100644
index 000000000000..8330ea7c7036
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -0,0 +1,1213 @@
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Useful predicates
+//===----------------------------------------------------------------------===//
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+static bool isKnownNonNull(const Value *V) {
+  // Alloca never returns null, malloc might.
+  if (isa<AllocaInst>(V)) return true;
+  
+  // A byval argument is never null.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+
+  // Global values are not null unless extern weak.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return !GV->hasExternalWeakLinkage();
+  return false;
+}
+
+/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
+/// object that never escapes from the function.
+static bool isNonEscapingLocalObject(const Value *V) {
+  // If this is a local allocation, check to see if it escapes.
+  if (isa<AllocaInst>(V) || isNoAliasCall(V))
+    // Set StoreCaptures to True so that we can assume in our callers that the
+    // pointer is not the result of a load instruction. Currently
+    // PointerMayBeCaptured doesn't have any special analysis for the
+    // StoreCaptures=false case; if it did, our callers could be refined to be
+    // more precise.
+    return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+  // If this is an argument that corresponds to a byval or noalias argument,
+  // then it has not escaped before entering the function.  Check if it escapes
+  // inside the function.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    if (A->hasByValAttr() || A->hasNoAliasAttr()) {
+      // Don't bother analyzing arguments already known not to escape.
+      if (A->hasNoCaptureAttr())
+        return true;
+      return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+    }
+  return false;
+}
+
+/// isEscapeSource - Return true if the pointer is one which would have
+/// been considered an escape by isNonEscapingLocalObject.
+static bool isEscapeSource(const Value *V) {
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+    return true;
+
+  // The load case works because isNonEscapingLocalObject considers all
+  // stores to be escapes (it passes true for the StoreCaptures argument
+  // to PointerMayBeCaptured).
+  if (isa<LoadInst>(V))
+    return true;
+
+  return false;
+}
+
+/// getObjectSize - Return the size of the object specified by V, or
+/// UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
+  const Type *AccessTy;
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (!GV->hasDefinitiveInitializer())
+      return AliasAnalysis::UnknownSize;
+    AccessTy = GV->getType()->getElementType();
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    if (!AI->isArrayAllocation())
+      AccessTy = AI->getType()->getElementType();
+    else
+      return AliasAnalysis::UnknownSize;
+  } else if (const CallInst* CI = extractMallocCall(V)) {
+    if (!isArrayMalloc(V, &TD))
+      // The size is the argument to the malloc call.
+      if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
+        return C->getZExtValue();
+    return AliasAnalysis::UnknownSize;
+  } else if (const Argument *A = dyn_cast<Argument>(V)) {
+    if (A->hasByValAttr())
+      AccessTy = cast<PointerType>(A->getType())->getElementType();
+    else
+      return AliasAnalysis::UnknownSize;
+  } else {
+    return AliasAnalysis::UnknownSize;
+  }
+  
+  if (AccessTy->isSized())
+    return TD.getTypeAllocSize(AccessTy);
+  return AliasAnalysis::UnknownSize;
+}
+
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+                                const TargetData &TD) {
+  uint64_t ObjectSize = getObjectSize(V, TD);
+  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+}
+
+/// isObjectSize - Return true if we can prove that the object specified
+/// by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size,
+                         const TargetData &TD) {
+  uint64_t ObjectSize = getObjectSize(V, TD);
+  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+namespace {
+  enum ExtensionKind {
+    EK_NotExtended,
+    EK_SignExt,
+    EK_ZeroExt
+  };
+  
+  struct VariableGEPIndex {
+    const Value *V;
+    ExtensionKind Extension;
+    int64_t Scale;
+  };
+}
+
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers.  Return the scale and offset
+/// values as APInts and return V as a Value*, and return whether we looked
+/// through any sign or zero extends.  The incoming Value is known to have
+/// IntegerType and it may already be sign or zero extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+                                  ExtensionKind &Extension,
+                                  const TargetData &TD, unsigned Depth) {
+  assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+  // Limit our recursion depth.
+  if (Depth == 6) {
+    Scale = 1;
+    Offset = 0;
+    return V;
+  }
+  
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+      switch (BOp->getOpcode()) {
+      default: break;
+      case Instruction::Or:
+        // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
+        // analyze it.
+        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+          break;
+        // FALL THROUGH.
+      case Instruction::Add:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+                                TD, Depth+1);
+        Offset += RHSC->getValue();
+        return V;
+      case Instruction::Mul:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+                                TD, Depth+1);
+        Offset *= RHSC->getValue();
+        Scale *= RHSC->getValue();
+        return V;
+      case Instruction::Shl:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+                                TD, Depth+1);
+        Offset <<= RHSC->getValue().getLimitedValue();
+        Scale <<= RHSC->getValue().getLimitedValue();
+        return V;
+      }
+    }
+  }
+  
+  // Since GEP indices are sign extended anyway, we don't care about the high
+  // bits of a sign or zero extended value - just scales and offsets.  The
+  // extensions have to be consistent though.
+  if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
+      (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+    Value *CastOp = cast<CastInst>(V)->getOperand(0);
+    unsigned OldWidth = Scale.getBitWidth();
+    unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+    Scale = Scale.trunc(SmallWidth);
+    Offset = Offset.trunc(SmallWidth);
+    Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+
+    Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
+                                        TD, Depth+1);
+    Scale = Scale.zext(OldWidth);
+    Offset = Offset.zext(OldWidth);
+    
+    return Result;
+  }
+  
+  Scale = 1;
+  Offset = 0;
+  return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When TargetData is around, this function is capable of analyzing everything
+/// that GetUnderlyingObject can look through.  When not, it just looks
+/// through pointer casts.
+///
+static const Value *
+DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+                       SmallVectorImpl<VariableGEPIndex> &VarIndices,
+                       const TargetData *TD) {
+  // Limit recursion depth to limit compile time in crazy cases.
+  unsigned MaxLookup = 6;
+  
+  BaseOffs = 0;
+  do {
+    // See if this is a bitcast or GEP.
+    const Operator *Op = dyn_cast<Operator>(V);
+    if (Op == 0) {
+      // The only non-operator case we can handle are GlobalAliases.
+      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+        if (!GA->mayBeOverridden()) {
+          V = GA->getAliasee();
+          continue;
+        }
+      }
+      return V;
+    }
+    
+    if (Op->getOpcode() == Instruction::BitCast) {
+      V = Op->getOperand(0);
+      continue;
+    }
+
+    const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+    if (GEPOp == 0) {
+      // If it's not a GEP, hand it off to SimplifyInstruction to see if it
+      // can come up with something. This matches what GetUnderlyingObject does.
+      if (const Instruction *I = dyn_cast<Instruction>(V))
+        // TODO: Get a DominatorTree and use it here.
+        if (const Value *Simplified =
+              SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+          V = Simplified;
+          continue;
+        }
+    
+      return V;
+    }
+    
+    // Don't attempt to analyze GEPs over unsized objects.
+    if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+        ->getElementType()->isSized())
+      return V;
+    
+    // If we are lacking TargetData information, we can't compute the offets of
+    // elements computed by GEPs.  However, we can handle bitcast equivalent
+    // GEPs.
+    if (TD == 0) {
+      if (!GEPOp->hasAllZeroIndices())
+        return V;
+      V = GEPOp->getOperand(0);
+      continue;
+    }
+    
+    // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+    gep_type_iterator GTI = gep_type_begin(GEPOp);
+    for (User::const_op_iterator I = GEPOp->op_begin()+1,
+         E = GEPOp->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      // Compute the (potentially symbolic) offset in bytes for this index.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+        // For a struct, add the member offset.
+        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+        if (FieldNo == 0) continue;
+        
+        BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+        continue;
+      }
+      
+      // For an array/pointer, add the element offset, explicitly scaled.
+      if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+        if (CIdx->isZero()) continue;
+        BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+        continue;
+      }
+      
+      uint64_t Scale = TD->getTypeAllocSize(*GTI);
+      ExtensionKind Extension = EK_NotExtended;
+      
+      // If the integer type is smaller than the pointer size, it is implicitly
+      // sign extended to pointer size.
+      unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+      if (TD->getPointerSizeInBits() > Width)
+        Extension = EK_SignExt;
+      
+      // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+      APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+      Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
+                                  *TD, 0);
+      
+      // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+      // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+      BaseOffs += IndexOffset.getSExtValue()*Scale;
+      Scale *= IndexScale.getSExtValue();
+      
+      
+      // If we already had an occurrence of this index variable, merge this
+      // scale into it.  For example, we want to handle:
+      //   A[x][x] -> x*16 + x*4 -> x*20
+      // This also ensures that 'x' only appears in the index list once.
+      for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+        if (VarIndices[i].V == Index &&
+            VarIndices[i].Extension == Extension) {
+          Scale += VarIndices[i].Scale;
+          VarIndices.erase(VarIndices.begin()+i);
+          break;
+        }
+      }
+      
+      // Make sure that we have a scale that makes sense for this target's
+      // pointer size.
+      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+        Scale <<= ShiftBits;
+        Scale = (int64_t)Scale >> ShiftBits;
+      }
+      
+      if (Scale) {
+        VariableGEPIndex Entry = {Index, Extension, Scale};
+        VarIndices.push_back(Entry);
+      }
+    }
+    
+    // Analyze the base pointer next.
+    V = GEPOp->getOperand(0);
+  } while (--MaxLookup);
+  
+  // If the chain of expressions is too deep, just return early.
+  return V;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers. 
+static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+                               const SmallVectorImpl<VariableGEPIndex> &Src) {
+  if (Src.empty()) return;
+
+  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+    const Value *V = Src[i].V;
+    ExtensionKind Extension = Src[i].Extension;
+    int64_t Scale = Src[i].Scale;
+    
+    // Find V in Dest.  This is N^2, but pointer indices almost never have more
+    // than a few variable indexes.
+    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+      if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
+      
+      // If we found it, subtract off Scale V's from the entry in Dest.  If it
+      // goes to zero, remove the entry.
+      if (Dest[j].Scale != Scale)
+        Dest[j].Scale -= Scale;
+      else
+        Dest.erase(Dest.begin()+j);
+      Scale = 0;
+      break;
+    }
+    
+    // If we didn't consume this entry, add it to the end of the Dest list.
+    if (Scale) {
+      VariableGEPIndex Entry = { V, Extension, -Scale };
+      Dest.push_back(Entry);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+  if (const Instruction *inst = dyn_cast<Instruction>(V))
+    return inst->getParent()->getParent();
+
+  if (const Argument *arg = dyn_cast<Argument>(V))
+    return arg->getParent();
+
+  return NULL;
+}
+
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+  const Function *F1 = getParent(O1);
+  const Function *F2 = getParent(O2);
+
+  return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+namespace {
+  /// BasicAliasAnalysis - This is the primary alias analysis implementation.
+  struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
+    static char ID; // Class identification, replacement for typeinfo
+    BasicAliasAnalysis() : ImmutablePass(ID),
+                           // AliasCache rarely has more than 1 or 2 elements,
+                           // so start it off fairly small so that clear()
+                           // doesn't have to tromp through 64 (the default)
+                           // elements on each alias query. This really wants
+                           // something like a SmallDenseMap.
+                           AliasCache(8) {
+      initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+    }
+
+    virtual AliasResult alias(const Location &LocA,
+                              const Location &LocB) {
+      assert(AliasCache.empty() && "AliasCache must be cleared after use!");
+      assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+             "BasicAliasAnalysis doesn't support interprocedural queries.");
+      AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
+                                     LocB.Ptr, LocB.Size, LocB.TBAATag);
+      AliasCache.clear();
+      return Alias;
+    }
+
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2) {
+      // The AliasAnalysis base class has some smarts, lets use them.
+      return AliasAnalysis::getModRefInfo(CS1, CS2);
+    }
+
+    /// pointsToConstantMemory - Chase pointers until we find a (constant
+    /// global) or not.
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+
+    /// getModRefBehavior - Return the behavior when calling the given
+    /// call site.
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+    /// getModRefBehavior - Return the behavior when calling the given function.
+    /// For use when the call site is not known.
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *ID) {
+      if (ID == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+  private:
+    // AliasCache - Track alias queries to guard against recursion.
+    typedef std::pair<Location, Location> LocPair;
+    typedef DenseMap<LocPair, AliasResult> AliasCacheTy;
+    AliasCacheTy AliasCache;
+
+    // Visited - Track instructions visited by pointsToConstantMemory.
+    SmallPtrSet<const Value*, 16> Visited;
+
+    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
+    // instruction against another.
+    AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+                         const Value *V2, uint64_t V2Size,
+                         const MDNode *V2TBAAInfo,
+                         const Value *UnderlyingV1, const Value *UnderlyingV2);
+
+    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
+    // instruction against another.
+    AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+                         const MDNode *PNTBAAInfo,
+                         const Value *V2, uint64_t V2Size,
+                         const MDNode *V2TBAAInfo);
+
+    /// aliasSelect - Disambiguate a Select instruction against another value.
+    AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+                            const MDNode *SITBAAInfo,
+                            const Value *V2, uint64_t V2Size,
+                            const MDNode *V2TBAAInfo);
+
+    AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
+                           const MDNode *V1TBAATag,
+                           const Value *V2, uint64_t V2Size,
+                           const MDNode *V2TBAATag);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char BasicAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+                   "Basic Alias Analysis (stateless AA impl)",
+                   false, true, false)
+
+ImmutablePass *llvm::createBasicAliasAnalysisPass() {
+  return new BasicAliasAnalysis();
+}
+
+/// pointsToConstantMemory - Returns whether the given pointer value
+/// points to memory that is local to the function, with global constants being
+/// considered local to all functions.
+bool
+BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+  assert(Visited.empty() && "Visited must be cleared after use!");
+
+  unsigned MaxLookup = 8;
+  SmallVector<const Value *, 16> Worklist;
+  Worklist.push_back(Loc.Ptr);
+  do {
+    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+    if (!Visited.insert(V)) {
+      Visited.clear();
+      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    // An alloca instruction defines local memory.
+    if (OrLocal && isa<AllocaInst>(V))
+      continue;
+
+    // A global constant counts as local memory for our purposes.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+      // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+      // global to be marked constant in some modules and non-constant in
+      // others.  GV may even be a declaration, not a definition.
+      if (!GV->isConstant()) {
+        Visited.clear();
+        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+      }
+      continue;
+    }
+
+    // If both select values point to local memory, then so does the select.
+    if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    // If all values incoming to a phi node point to local memory, then so does
+    // the phi.
+    if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+      // Don't bother inspecting phi nodes with many operands.
+      if (PN->getNumIncomingValues() > MaxLookup) {
+        Visited.clear();
+        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+      }
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Worklist.push_back(PN->getIncomingValue(i));
+      continue;
+    }
+
+    // Otherwise be conservative.
+    Visited.clear();
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  } while (!Worklist.empty() && --MaxLookup);
+
+  Visited.clear();
+  return Worklist.empty();
+}
+
+/// getModRefBehavior - Return the behavior when calling the given call site.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  if (CS.doesNotAccessMemory())
+    // Can't do better than this.
+    return DoesNotAccessMemory;
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If the callsite knows it only reads memory, don't return worse
+  // than that.
+  if (CS.onlyReadsMemory())
+    Min = OnlyReadsMemory;
+
+  // The AliasAnalysis base class has some smarts, lets use them.
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+/// getModRefBehavior - Return the behavior when calling the given function.
+/// For use when the call site is not known.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+  // If the function declares it doesn't access memory, we can't do better.
+  if (F->doesNotAccessMemory())
+    return DoesNotAccessMemory;
+
+  // For intrinsics, we can check the table.
+  if (unsigned iid = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+  }
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If the function declares it only reads memory, go with that.
+  if (F->onlyReadsMemory())
+    Min = OnlyReadsMemory;
+
+  // Otherwise be conservative.
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+}
+
+/// getModRefInfo - Check to see if the specified callsite can clobber the
+/// specified memory object.  Since we only look at local properties of this
+/// function, we really can't say much about this query.  We do, however, use
+/// simple "address taken" analysis on local objects.
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                  const Location &Loc) {
+  assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+         "AliasAnalysis query involving multiple functions!");
+
+  const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
+  
+  // If this is a tail call and Loc.Ptr points to a stack location, we know that
+  // the tail call cannot access or modify the local stack.
+  // We cannot exclude byval arguments here; these belong to the caller of
+  // the current function not to the current function, and a tail callee
+  // may reference them.
+  if (isa<AllocaInst>(Object))
+    if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+      if (CI->isTailCall())
+        return NoModRef;
+  
+  // If the pointer is to a locally allocated object that does not escape,
+  // then the call can not mod/ref the pointer unless the call takes the pointer
+  // as an argument, and itself doesn't capture it.
+  if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+      isNonEscapingLocalObject(Object)) {
+    bool PassedAsArg = false;
+    unsigned ArgNo = 0;
+    for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+         CI != CE; ++CI, ++ArgNo) {
+      // Only look at the no-capture or byval pointer arguments.  If this
+      // pointer were passed to arguments that were neither of these, then it
+      // couldn't be no-capture.
+      if (!(*CI)->getType()->isPointerTy() ||
+          (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) &&
+           !CS.paramHasAttr(ArgNo+1, Attribute::ByVal)))
+        continue;
+      
+      // If this is a no-capture pointer argument, see if we can tell that it
+      // is impossible to alias the pointer we're checking.  If not, we have to
+      // assume that the call could touch the pointer, even though it doesn't
+      // escape.
+      if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
+        PassedAsArg = true;
+        break;
+      }
+    }
+    
+    if (!PassedAsArg)
+      return NoModRef;
+  }
+
+  ModRefResult Min = ModRef;
+
+  // Finally, handle specific knowledge of intrinsics.
+  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+  if (II != 0)
+    switch (II->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove: {
+      uint64_t Len = UnknownSize;
+      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+        Len = LenCI->getZExtValue();
+      Value *Dest = II->getArgOperand(0);
+      Value *Src = II->getArgOperand(1);
+      // If it can't overlap the source dest, then it doesn't modref the loc.
+      if (isNoAlias(Location(Dest, Len), Loc)) {
+        if (isNoAlias(Location(Src, Len), Loc))
+          return NoModRef;
+        // If it can't overlap the dest, then worst case it reads the loc.
+        Min = Ref;
+      } else if (isNoAlias(Location(Src, Len), Loc)) {
+        // If it can't overlap the source, then worst case it mutates the loc.
+        Min = Mod;
+      }
+      break;
+    }
+    case Intrinsic::memset:
+      // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+      // will handle it for the variable length case.
+      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+        uint64_t Len = LenCI->getZExtValue();
+        Value *Dest = II->getArgOperand(0);
+        if (isNoAlias(Location(Dest, Len), Loc))
+          return NoModRef;
+      }
+      // We know that memset doesn't load anything.
+      Min = Mod;
+      break;
+    case Intrinsic::atomic_cmp_swap:
+    case Intrinsic::atomic_swap:
+    case Intrinsic::atomic_load_add:
+    case Intrinsic::atomic_load_sub:
+    case Intrinsic::atomic_load_and:
+    case Intrinsic::atomic_load_nand:
+    case Intrinsic::atomic_load_or:
+    case Intrinsic::atomic_load_xor:
+    case Intrinsic::atomic_load_max:
+    case Intrinsic::atomic_load_min:
+    case Intrinsic::atomic_load_umax:
+    case Intrinsic::atomic_load_umin:
+      if (TD) {
+        Value *Op1 = II->getArgOperand(0);
+        uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
+        MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
+        if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
+          return NoModRef;
+      }
+      break;
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start: {
+      uint64_t PtrSize =
+        cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+      if (isNoAlias(Location(II->getArgOperand(1),
+                             PtrSize,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    case Intrinsic::invariant_end: {
+      uint64_t PtrSize =
+        cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+      if (isNoAlias(Location(II->getArgOperand(2),
+                             PtrSize,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    case Intrinsic::arm_neon_vld1: {
+      // LLVM's vld1 and vst1 intrinsics currently only support a single
+      // vector register.
+      uint64_t Size =
+        TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
+      if (isNoAlias(Location(II->getArgOperand(0), Size,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    case Intrinsic::arm_neon_vst1: {
+      uint64_t Size =
+        TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
+      if (isNoAlias(Location(II->getArgOperand(0), Size,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    }
+
+  // The AliasAnalysis base class has some smarts, lets use them.
+  return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
+}
+
+/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+/// against another pointer.  We know that V1 is a GEP, but we don't know
+/// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
+/// UnderlyingV2 is the same for V2.
+///
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+                             const Value *V2, uint64_t V2Size,
+                             const MDNode *V2TBAAInfo,
+                             const Value *UnderlyingV1,
+                             const Value *UnderlyingV2) {
+  int64_t GEP1BaseOffset;
+  SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+
+  // If we have two gep instructions with must-alias'ing base pointers, figure
+  // out if the indexes to the GEP tell us anything about the derived pointer.
+  if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+    // Do the base pointers alias?
+    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                                       UnderlyingV2, UnknownSize, 0);
+    
+    // If we get a No or May, then return it immediately, no amount of analysis
+    // will improve this situation.
+    if (BaseAlias != MustAlias) return BaseAlias;
+    
+    // Otherwise, we have a MustAlias.  Since the base pointers alias each other
+    // exactly, see if the computed offset from the common pointer tells us
+    // about the relation of the resulting pointer.
+    const Value *GEP1BasePtr =
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+    
+    int64_t GEP2BaseOffset;
+    SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+    const Value *GEP2BasePtr =
+      DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+    
+    // If DecomposeGEPExpression isn't able to look all the way through the
+    // addressing operation, we must not have TD and this is too complex for us
+    // to handle without it.
+    if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+      assert(TD == 0 &&
+             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+      return MayAlias;
+    }
+    
+    // Subtract the GEP2 pointer from the GEP1 pointer to find out their
+    // symbolic difference.
+    GEP1BaseOffset -= GEP2BaseOffset;
+    GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
+    
+  } else {
+    // Check to see if these two pointers are related by the getelementptr
+    // instruction.  If one pointer is a GEP with a non-zero index of the other
+    // pointer, we know they cannot alias.
+
+    // If both accesses are unknown size, we can't do anything useful here.
+    if (V1Size == UnknownSize && V2Size == UnknownSize)
+      return MayAlias;
+
+    AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                               V2, V2Size, V2TBAAInfo);
+    if (R != MustAlias)
+      // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+      // If V2 is known not to alias GEP base pointer, then the two values
+      // cannot alias per GEP semantics: "A pointer value formed from a
+      // getelementptr instruction is associated with the addresses associated
+      // with the first operand of the getelementptr".
+      return R;
+
+    const Value *GEP1BasePtr =
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+    
+    // If DecomposeGEPExpression isn't able to look all the way through the
+    // addressing operation, we must not have TD and this is too complex for us
+    // to handle without it.
+    if (GEP1BasePtr != UnderlyingV1) {
+      assert(TD == 0 &&
+             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+      return MayAlias;
+    }
+  }
+  
+  // In the two GEP Case, if there is no difference in the offsets of the
+  // computed pointers, the resultant pointers are a must alias.  This
+  // hapens when we have two lexically identical GEP's (for example).
+  //
+  // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
+  // must aliases the GEP, the end result is a must alias also.
+  if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
+    return MustAlias;
+
+  // If there is a difference between the pointers, but the difference is
+  // less than the size of the associated memory object, then we know
+  // that the objects are partially overlapping.
+  if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+    if (GEP1BaseOffset >= 0 ?
+        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
+        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
+         GEP1BaseOffset != INT64_MIN))
+      return PartialAlias;
+  }
+
+  // If we have a known constant offset, see if this offset is larger than the
+  // access size being queried.  If so, and if no variable indices can remove
+  // pieces of this constant, then we know we have a no-alias.  For example,
+  //   &A[100] != &A.
+  
+  // In order to handle cases like &A[100][i] where i is an out of range
+  // subscript, we have to ignore all constant offset pieces that are a multiple
+  // of a scaled index.  Do this by removing constant offsets that are a
+  // multiple of any of our variable indices.  This allows us to transform
+  // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1
+  // provides an offset of 4 bytes (assuming a <= 4 byte access).
+  for (unsigned i = 0, e = GEP1VariableIndices.size();
+       i != e && GEP1BaseOffset;++i)
+    if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
+      GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
+  
+  // If our known offset is bigger than the access size, we know we don't have
+  // an alias.
+  if (GEP1BaseOffset) {
+    if (GEP1BaseOffset >= 0 ?
+        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
+        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
+         GEP1BaseOffset != INT64_MIN))
+      return NoAlias;
+  }
+  
+  // Statically, we can see that the base objects are the same, but the
+  // pointers have dynamic offsets which we can't resolve. And none of our
+  // little tricks above worked.
+  //
+  // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
+  // practical effect of this is protecting TBAA in the case of dynamic
+  // indices into arrays of unions. An alternative way to solve this would
+  // be to have clang emit extra metadata for unions and/or union accesses.
+  // A union-specific solution wouldn't handle the problem for malloc'd
+  // memory however.
+  return PartialAlias;
+}
+
+static AliasAnalysis::AliasResult
+MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
+  // If the results agree, take it.
+  if (A == B)
+    return A;
+  // A mix of PartialAlias and MustAlias is PartialAlias.
+  if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) ||
+      (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias))
+    return AliasAnalysis::PartialAlias;
+  // Otherwise, we don't know anything.
+  return AliasAnalysis::MayAlias;
+}
+
+/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
+/// instruction against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
+                                const MDNode *SITBAAInfo,
+                                const Value *V2, uint64_t V2Size,
+                                const MDNode *V2TBAAInfo) {
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for aliases between the values on corresponding arms.
+  if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
+    if (SI->getCondition() == SI2->getCondition()) {
+      AliasResult Alias =
+        aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
+                   SI2->getTrueValue(), V2Size, V2TBAAInfo);
+      if (Alias == MayAlias)
+        return MayAlias;
+      AliasResult ThisAlias =
+        aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
+                   SI2->getFalseValue(), V2Size, V2TBAAInfo);
+      return MergeAliasResults(ThisAlias, Alias);
+    }
+
+  // If both arms of the Select node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  AliasResult Alias =
+    aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  AliasResult ThisAlias =
+    aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
+  return MergeAliasResults(ThisAlias, Alias);
+}
+
+// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+                             const MDNode *PNTBAAInfo,
+                             const Value *V2, uint64_t V2Size,
+                             const MDNode *V2TBAAInfo) {
+  // If the values are PHIs in the same block, we can do a more precise
+  // as well as efficient check: just check for aliases between the values
+  // on corresponding edges.
+  if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
+    if (PN2->getParent() == PN->getParent()) {
+      AliasResult Alias =
+        aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
+                   PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
+                   V2Size, V2TBAAInfo);
+      if (Alias == MayAlias)
+        return MayAlias;
+      for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+        AliasResult ThisAlias =
+          aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
+                     PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+                     V2Size, V2TBAAInfo);
+        Alias = MergeAliasResults(ThisAlias, Alias);
+        if (Alias == MayAlias)
+          break;
+      }
+      return Alias;
+    }
+
+  SmallPtrSet<Value*, 4> UniqueSrc;
+  SmallVector<Value*, 4> V1Srcs;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *PV1 = PN->getIncomingValue(i);
+    if (isa<PHINode>(PV1))
+      // If any of the source itself is a PHI, return MayAlias conservatively
+      // to avoid compile time explosion. The worst possible case is if both
+      // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+      // and 'n' are the number of PHI sources.
+      return MayAlias;
+    if (UniqueSrc.insert(PV1))
+      V1Srcs.push_back(PV1);
+  }
+
+  AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
+                                 V1Srcs[0], PNSize, PNTBAAInfo);
+  // Early exit if the check of the first PHI source against V2 is MayAlias.
+  // Other results are not possible.
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+    Value *V = V1Srcs[i];
+
+    AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
+                                       V, PNSize, PNTBAAInfo);
+    Alias = MergeAliasResults(ThisAlias, Alias);
+    if (Alias == MayAlias)
+      break;
+  }
+
+  return Alias;
+}
+
+// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
+// such as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+                               const MDNode *V1TBAAInfo,
+                               const Value *V2, uint64_t V2Size,
+                               const MDNode *V2TBAAInfo) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are.
+  if (V1Size == 0 || V2Size == 0)
+    return NoAlias;
+
+  // Strip off any casts if they exist.
+  V1 = V1->stripPointerCasts();
+  V2 = V2->stripPointerCasts();
+
+  // Are we checking for alias of the same value?
+  if (V1 == V2) return MustAlias;
+
+  if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
+    return NoAlias;  // Scalars cannot alias each other
+
+  // Figure out what objects these things are pointing to if we can.
+  const Value *O1 = GetUnderlyingObject(V1, TD);
+  const Value *O2 = GetUnderlyingObject(V2, TD);
+
+  // Null values in the default address space don't point to any object, so they
+  // don't alias any other pointer.
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+
+  if (O1 != O2) {
+    // If V1/V2 point to two different objects we know that we have no alias.
+    if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+      return NoAlias;
+
+    // Constant pointers can't alias with non-const isIdentifiedObject objects.
+    if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+        (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+      return NoAlias;
+
+    // Arguments can't alias with local allocations or noalias calls
+    // in the same function.
+    if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
+         (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))))
+      return NoAlias;
+
+    // Most objects can't alias null.
+    if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
+        (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
+      return NoAlias;
+  
+    // If one pointer is the result of a call/invoke or load and the other is a
+    // non-escaping local object within the same function, then we know the
+    // object couldn't escape to a point where the call could return it.
+    //
+    // Note that if the pointers are in different functions, there are a
+    // variety of complications. A call with a nocapture argument may still
+    // temporary store the nocapture argument's value in a temporary memory
+    // location if that memory location doesn't escape. Or it may pass a
+    // nocapture value to other functions as long as they don't capture it.
+    if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+      return NoAlias;
+    if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+      return NoAlias;
+  }
+
+  // If the size of one access is larger than the entire object on the other
+  // side, then we know such behavior is undefined and can assume no alias.
+  if (TD)
+    if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) ||
+        (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
+      return NoAlias;
+  
+  // Check the cache before climbing up use-def chains. This also terminates
+  // otherwise infinitely recursive queries.
+  LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
+               Location(V2, V2Size, V2TBAAInfo));
+  if (V1 > V2)
+    std::swap(Locs.first, Locs.second);
+  std::pair<AliasCacheTy::iterator, bool> Pair =
+    AliasCache.insert(std::make_pair(Locs, MayAlias));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
+  // GEP can't simplify, we don't even look at the PHI cases.
+  if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+    std::swap(O1, O2);
+  }
+  if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+    AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
+    if (Result != MayAlias) return AliasCache[Locs] = Result;
+  }
+
+  if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+    AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
+                                  V2, V2Size, V2TBAAInfo);
+    if (Result != MayAlias) return AliasCache[Locs] = Result;
+  }
+
+  if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+    AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
+                                     V2, V2Size, V2TBAAInfo);
+    if (Result != MayAlias) return AliasCache[Locs] = Result;
+  }
+
+  // If both pointers are pointing into the same object and one of them
+  // accesses is accessing the entire object, then the accesses must
+  // overlap in some way.
+  if (TD && O1 == O2)
+    if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
+        (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
+      return AliasCache[Locs] = PartialAlias;
+
+  AliasResult Result =
+    AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+                         Location(V2, V2Size, V2TBAAInfo));
+  return AliasCache[Locs] = Result;
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequency.cpp b/contrib/llvm/lib/Analysis/BlockFrequency.cpp
new file mode 100644
index 000000000000..4b86d1db1f04
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BlockFrequency.cpp
@@ -0,0 +1,59 @@
+//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequency.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis",
+                      true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis",
+                    true, true)
+
+char BlockFrequency::ID = 0;
+
+
+BlockFrequency::BlockFrequency() : FunctionPass(ID) {
+  initializeBlockFrequencyPass(*PassRegistry::getPassRegistry());
+  BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
+}
+
+BlockFrequency::~BlockFrequency() {
+  delete BFI;
+}
+
+void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<BranchProbabilityInfo>();
+  AU.setPreservesAll();
+}
+
+bool BlockFrequency::runOnFunction(Function &F) {
+  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+  BFI->doFunction(&F, &BPI);
+  return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) {
+  return BFI->getBlockFreq(BB);
+}
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..e39cd221b5a7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -0,0 +1,363 @@
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+                      "Branch Probability Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+                    "Branch Probability Analysis", false, true)
+
+char BranchProbabilityInfo::ID = 0;
+
+namespace {
+// Please note that BranchProbabilityAnalysis is not a FunctionPass.
+// It is created by BranchProbabilityInfo (which is a FunctionPass), which
+// provides a clear interface. Thanks to that, all heuristics and other
+// private methods are hidden in the .cpp file.
+class BranchProbabilityAnalysis {
+
+  typedef std::pair<BasicBlock *, BasicBlock *> Edge;
+
+  DenseMap<Edge, uint32_t> *Weights;
+
+  BranchProbabilityInfo *BP;
+
+  LoopInfo *LI;
+
+
+  // Weights are for internal use only. They are used by heuristics to help to
+  // estimate edges' probability. Example:
+  //
+  // Using "Loop Branch Heuristics" we predict weights of edges for the
+  // block BB2.
+  //         ...
+  //          |
+  //          V
+  //         BB1<-+
+  //          |   |
+  //          |   | (Weight = 128)
+  //          V   |
+  //         BB2--+
+  //          |
+  //          | (Weight = 4)
+  //          V
+  //         BB3
+  //
+  // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696..
+  // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303..
+
+  static const uint32_t LBH_TAKEN_WEIGHT = 128;
+  static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+  // Standard weight value. Used when none of the heuristics set weight for
+  // the edge.
+  static const uint32_t NORMAL_WEIGHT = 16;
+
+  // Minimum weight of an edge. Please note, that weight is NEVER 0.
+  static const uint32_t MIN_WEIGHT = 1;
+
+  // Return TRUE if BB leads directly to a Return Instruction.
+  static bool isReturningBlock(BasicBlock *BB) {
+    SmallPtrSet<BasicBlock *, 8> Visited;
+
+    while (true) {
+      TerminatorInst *TI = BB->getTerminator();
+      if (isa<ReturnInst>(TI))
+        return true;
+
+      if (TI->getNumSuccessors() > 1)
+        break;
+
+      // It is unreachable block which we can consider as a return instruction.
+      if (TI->getNumSuccessors() == 0)
+        return true;
+
+      Visited.insert(BB);
+      BB = TI->getSuccessor(0);
+
+      // Stop if cycle is detected.
+      if (Visited.count(BB))
+        return false;
+    }
+
+    return false;
+  }
+
+  // Multiply Edge Weight by two.
+  void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+    uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+    uint32_t MaxWeight = getMaxWeightFor(Src);
+
+    if (Weight * 2 > MaxWeight)
+      BP->setEdgeWeight(Src, Dst, MaxWeight);
+    else
+      BP->setEdgeWeight(Src, Dst, Weight * 2);
+  }
+
+  // Divide Edge Weight by two.
+  void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+    uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+
+    assert(Weight > 0);
+    if (Weight / 2 < MIN_WEIGHT)
+      BP->setEdgeWeight(Src, Dst, MIN_WEIGHT);
+    else
+      BP->setEdgeWeight(Src, Dst, Weight / 2);
+  }
+
+
+  uint32_t getMaxWeightFor(BasicBlock *BB) const {
+    return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
+  }
+
+public:
+  BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W,
+                            BranchProbabilityInfo *BP, LoopInfo *LI)
+    : Weights(W), BP(BP), LI(LI) {
+  }
+
+  // Return Heuristics
+  void calcReturnHeuristics(BasicBlock *BB);
+
+  // Pointer Heuristics
+  void calcPointerHeuristics(BasicBlock *BB);
+
+  // Loop Branch Heuristics
+  void calcLoopBranchHeuristics(BasicBlock *BB);
+
+  bool runOnFunction(Function &F);
+};
+} // end anonymous namespace
+
+// Calculate Edge Weights using "Return Heuristics". Predict a successor which
+// leads directly to Return Instruction will not be taken.
+void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
+  if (BB->getTerminator()->getNumSuccessors() == 1)
+    return;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    if (isReturningBlock(Succ)) {
+      decEdgeWeight(BB, Succ);
+    }
+  }
+}
+
+// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
+// between two pointer or pointer and NULL will fail.
+void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+  BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return;
+
+  Value *Cond = BI->getCondition();
+  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+  if (!CI || !CI->isEquality())
+    return;
+
+  Value *LHS = CI->getOperand(0);
+
+  if (!LHS->getType()->isPointerTy())
+    return;
+
+  assert(CI->getOperand(1)->getType()->isPointerTy());
+
+  BasicBlock *Taken = BI->getSuccessor(0);
+  BasicBlock *NonTaken = BI->getSuccessor(1);
+
+  // p != 0   ->   isProb = true
+  // p == 0   ->   isProb = false
+  // p != q   ->   isProb = true
+  // p == q   ->   isProb = false;
+  bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
+  if (!isProb)
+    std::swap(Taken, NonTaken);
+
+  incEdgeWeight(BB, Taken);
+  decEdgeWeight(BB, NonTaken);
+}
+
+// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
+// as taken, exiting edges as not-taken.
+void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
+  uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
+
+  Loop *L = LI->getLoopFor(BB);
+  if (!L)
+    return;
+
+  SmallVector<BasicBlock *, 8> BackEdges;
+  SmallVector<BasicBlock *, 8> ExitingEdges;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    Loop *SuccL = LI->getLoopFor(Succ);
+    if (SuccL != L)
+      ExitingEdges.push_back(Succ);
+    else if (Succ == L->getHeader())
+      BackEdges.push_back(Succ);
+  }
+
+  if (uint32_t numBackEdges = BackEdges.size()) {
+    uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
+    if (backWeight < NORMAL_WEIGHT)
+      backWeight = NORMAL_WEIGHT;
+
+    for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
+         EE = BackEdges.end(); EI != EE; ++EI) {
+      BasicBlock *Back = *EI;
+      BP->setEdgeWeight(BB, Back, backWeight);
+    }
+  }
+
+  uint32_t numExitingEdges = ExitingEdges.size();
+  if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
+    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
+    if (exitWeight < MIN_WEIGHT)
+      exitWeight = MIN_WEIGHT;
+
+    for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
+         EE = ExitingEdges.end(); EI != EE; ++EI) {
+      BasicBlock *Exiting = *EI;
+      BP->setEdgeWeight(BB, Exiting, exitWeight);
+    }
+  }
+}
+
+bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *BB = I++;
+
+    // Only LBH uses setEdgeWeight method.
+    calcLoopBranchHeuristics(BB);
+
+    // PH and RH use only incEdgeWeight and decEwdgeWeight methods to
+    // not efface LBH results.
+    calcPointerHeuristics(BB);
+    calcReturnHeuristics(BB);
+  }
+
+  return false;
+}
+
+void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<LoopInfo>();
+    AU.setPreservesAll();
+}
+
+bool BranchProbabilityInfo::runOnFunction(Function &F) {
+  LoopInfo &LI = getAnalysis<LoopInfo>();
+  BranchProbabilityAnalysis BPA(&Weights, this, &LI);
+  return BPA.runOnFunction(F);
+}
+
+uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
+  uint32_t Sum = 0;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(BB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+  }
+
+  return Sum;
+}
+
+bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const {
+  // Hot probability is at least 4/5 = 80%
+  uint32_t Weight = getEdgeWeight(Src, Dst);
+  uint32_t Sum = getSumForBlock(Src);
+
+  // FIXME: Implement BranchProbability::compare then change this code to
+  // compare this BranchProbability against a static "hot" BranchProbability.
+  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
+  uint32_t Sum = 0;
+  uint32_t MaxWeight = 0;
+  BasicBlock *MaxSucc = 0;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(BB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+
+    if (Weight > MaxWeight) {
+      MaxWeight = Weight;
+      MaxSucc = Succ;
+    }
+  }
+
+  // FIXME: Use BranchProbability::compare.
+  if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4)
+    return MaxSucc;
+
+  return 0;
+}
+
+// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value.
+uint32_t
+BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
+  Edge E(Src, Dst);
+  DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E);
+
+  if (I != Weights.end())
+    return I->second;
+
+  return DEFAULT_WEIGHT;
+}
+
+void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
+                                     uint32_t Weight) {
+  Weights[std::make_pair(Src, Dst)] = Weight;
+  DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
+               << Dst->getNameStr() << " weight to " << Weight
+               << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n"));
+}
+
+
+BranchProbability BranchProbabilityInfo::
+getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
+
+  uint32_t N = getEdgeWeight(Src, Dst);
+  uint32_t D = getSumForBlock(Src);
+
+  return BranchProbability(N, D);
+}
+
+raw_ostream &
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
+                                            BasicBlock *Dst) const {
+
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
+  OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
+     << " probability is " << Prob
+     << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+  return OS;
+}
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
new file mode 100644
index 000000000000..7bb063fbbbcf
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -0,0 +1,165 @@
+//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a '-dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
+// The other main feature of this file is that it implements the
+// Function::viewCFG method, which is useful for debugging passes which operate
+// on the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFGPrinter.h"
+
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  struct CFGViewer : public FunctionPass {
+    static char ID; // Pass identifcation, replacement for typeid
+    CFGViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      F.viewCFG();
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGViewer::ID = 0;
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+
+namespace {
+  struct CFGOnlyViewer : public FunctionPass {
+    static char ID; // Pass identifcation, replacement for typeid
+    CFGOnlyViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      F.viewCFGOnly();
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGOnlyViewer::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+                "View CFG of function (with no function bodies)", false, true)
+
+namespace {
+  struct CFGPrinter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGPrinter() : FunctionPass(ID) {
+      initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
+      
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+      if (ErrorInfo.empty())
+        WriteGraph(File, (const Function*)&F);
+      else
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGPrinter::ID = 0;
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
+                false, true)
+
+namespace {
+  struct CFGOnlyPrinter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGOnlyPrinter() : FunctionPass(ID) {
+      initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual bool runOnFunction(Function &F) {
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
+
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+      
+      if (ErrorInfo.empty())
+        WriteGraph(File, (const Function*)&F, true);
+      else
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
+      return false;
+    }
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGOnlyPrinter::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+   "Print CFG of function to 'dot' file (with no function bodies)",
+   false, true)
+
+/// viewCFG - This function is meant for use from the debugger.  You can just
+/// say 'call F->viewCFG()' and a ghostview window should pop up from the
+/// program, displaying the CFG of the current function.  This depends on there
+/// being a 'dot' and 'gv' program in your path.
+///
+void Function::viewCFG() const {
+  ViewGraph(this, "cfg" + getNameStr());
+}
+
+/// viewCFGOnly - This function is meant for use from the debugger.  It works
+/// just like viewCFG, but it does not include the contents of basic blocks
+/// into the nodes, just the label.  If you are only interested in the CFG t
+/// his can make the graph smaller.
+///
+void Function::viewCFGOnly() const {
+  ViewGraph(this, "cfg" + getNameStr(), true);
+}
+
+FunctionPass *llvm::createCFGPrinterPass () {
+  return new CFGPrinter();
+}
+
+FunctionPass *llvm::createCFGOnlyPrinterPass () {
+  return new CFGOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
new file mode 100644
index 000000000000..b2c27d1dfc4b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -0,0 +1,148 @@
+//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+// A pointer value is captured if the function makes a copy of any part of the
+// pointer that outlives the call.  Not being captured means, more or less, that
+// the pointer is only dereferenced and not stored in a global.  Returning part
+// of the pointer as the function return value may or may not count as capturing
+// the pointer, depending on the context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+/// As its comment mentions, PointerMayBeCaptured can be expensive.
+/// However, it's not easy for BasicAA to cache the result, because
+/// it's an ImmutablePass. To work around this, bound queries at a
+/// fixed number of uses.
+///
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+/// PointerMayBeCaptured - Return true if this pointer value may be captured
+/// by the enclosing function (which is required to exist).  This routine can
+/// be expensive, so consider caching the results.  The boolean ReturnCaptures
+/// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not.  The boolean StoreCaptures specified whether
+/// storing the value (or part of it) into memory anywhere automatically
+/// counts as capturing it or not.
+bool llvm::PointerMayBeCaptured(const Value *V,
+                                bool ReturnCaptures, bool StoreCaptures) {
+  assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
+  SmallVector<Use*, Threshold> Worklist;
+  SmallSet<Use*, Threshold> Visited;
+  int Count = 0;
+
+  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+       UI != UE; ++UI) {
+    // If there are lots of uses, conservatively say that the value
+    // is captured to avoid taking too much compile time.
+    if (Count++ >= Threshold)
+      return true;
+
+    Use *U = &UI.getUse();
+    Visited.insert(U);
+    Worklist.push_back(U);
+  }
+
+  while (!Worklist.empty()) {
+    Use *U = Worklist.pop_back_val();
+    Instruction *I = cast<Instruction>(U->getUser());
+    V = U->get();
+
+    switch (I->getOpcode()) {
+    case Instruction::Call:
+    case Instruction::Invoke: {
+      CallSite CS(I);
+      // Not captured if the callee is readonly, doesn't return a copy through
+      // its return value and doesn't unwind (a readonly function can leak bits
+      // by throwing an exception or not depending on the input value).
+      if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+        break;
+
+      // Not captured if only passed via 'nocapture' arguments.  Note that
+      // calling a function pointer does not in itself cause the pointer to
+      // be captured.  This is a subtle point considering that (for example)
+      // the callee might return its own address.  It is analogous to saying
+      // that loading a value from a pointer does not cause the pointer to be
+      // captured, even though the loaded value might be the pointer itself
+      // (think of self-referential objects).
+      CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+      for (CallSite::arg_iterator A = B; A != E; ++A)
+        if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture))
+          // The parameter is not marked 'nocapture' - captured.
+          return true;
+      // Only passed via 'nocapture' arguments, or is the called function - not
+      // captured.
+      break;
+    }
+    case Instruction::Load:
+      // Loading from a pointer does not cause it to be captured.
+      break;
+    case Instruction::VAArg:
+      // "va-arg" from a pointer does not cause it to be captured.
+      break;
+    case Instruction::Ret:
+      if (ReturnCaptures)
+        return true;
+      break;
+    case Instruction::Store:
+      if (V == I->getOperand(0))
+        // Stored the pointer - conservatively assume it may be captured.
+        // TODO: If StoreCaptures is not true, we could do Fancy analysis
+        // to determine whether this store is not actually an escape point.
+        // In that case, BasicAliasAnalysis should be updated as well to
+        // take advantage of this.
+        return true;
+      // Storing to the pointee does not cause the pointer to be captured.
+      break;
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::PHI:
+    case Instruction::Select:
+      // The original value is not captured via this if the new value isn't.
+      for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE; ++UI) {
+        Use *U = &UI.getUse();
+        if (Visited.insert(U))
+          Worklist.push_back(U);
+      }
+      break;
+    case Instruction::ICmp:
+      // Don't count comparisons of a no-alias return value against null as
+      // captures. This allows us to ignore comparisons of malloc results
+      // with null, for example.
+      if (isNoAliasCall(V->stripPointerCasts()))
+        if (ConstantPointerNull *CPN =
+              dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+          if (CPN->getType()->getAddressSpace() == 0)
+            break;
+      // Otherwise, be conservative. There are crazy ways to capture pointers
+      // using comparisons.
+      return true;
+    default:
+      // Something else - be conservative and say it is captured.
+      return true;
+    }
+  }
+
+  // All uses examined - not captured.
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
new file mode 100644
index 000000000000..7fca17eb69f6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -0,0 +1,1412 @@
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FEnv.h"
+#include <cerrno>
+#include <cmath>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Folding internal helper functions
+//===----------------------------------------------------------------------===//
+
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with 
+/// TargetData.  This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static Constant *FoldBitCast(Constant *C, const Type *DestTy,
+                             const TargetData &TD) {
+  
+  // This only handles casts to vectors currently.
+  const VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+  if (DestVTy == 0)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+  // vector so the code below can handle it uniformly.
+  if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+    Constant *Ops = C; // don't take the address of C!
+    return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+  }
+  
+  // If this is a bitcast from constant vector -> vector, fold it.
+  ConstantVector *CV = dyn_cast<ConstantVector>(C);
+  if (CV == 0)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  // If the element types match, VMCore can fold it.
+  unsigned NumDstElt = DestVTy->getNumElements();
+  unsigned NumSrcElt = CV->getNumOperands();
+  if (NumDstElt == NumSrcElt)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  const Type *SrcEltTy = CV->getType()->getElementType();
+  const Type *DstEltTy = DestVTy->getElementType();
+  
+  // Otherwise, we're changing the number of elements in a vector, which 
+  // requires endianness information to do the right thing.  For example,
+  //    bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  // folds to (little endian):
+  //    <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+  // and to (big endian):
+  //    <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  
+  // First thing is first.  We only want to think about integer here, so if
+  // we have something in FP form, recast it as integer.
+  if (DstEltTy->isFloatingPointTy()) {
+    // Fold to an vector of integers with same size as our FP type.
+    unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+    const Type *DestIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+    // Recursively handle this integer conversion, if possible.
+    C = FoldBitCast(C, DestIVTy, TD);
+    if (!C) return ConstantExpr::getBitCast(C, DestTy);
+    
+    // Finally, VMCore can handle this now that #elts line up.
+    return ConstantExpr::getBitCast(C, DestTy);
+  }
+  
+  // Okay, we know the destination is integer, if the input is FP, convert
+  // it to integer first.
+  if (SrcEltTy->isFloatingPointTy()) {
+    unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+    const Type *SrcIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+    // Ask VMCore to do the conversion now that #elts line up.
+    C = ConstantExpr::getBitCast(C, SrcIVTy);
+    CV = dyn_cast<ConstantVector>(C);
+    if (!CV)  // If VMCore wasn't able to fold it, bail out.
+      return C;
+  }
+  
+  // Now we know that the input and output vectors are both integer vectors
+  // of the same size, and that their #elements is not the same.  Do the
+  // conversion here, which depends on whether the input or output has
+  // more elements.
+  bool isLittleEndian = TD.isLittleEndian();
+  
+  SmallVector<Constant*, 32> Result;
+  if (NumDstElt < NumSrcElt) {
+    // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+    Constant *Zero = Constant::getNullValue(DstEltTy);
+    unsigned Ratio = NumSrcElt/NumDstElt;
+    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+    unsigned SrcElt = 0;
+    for (unsigned i = 0; i != NumDstElt; ++i) {
+      // Build each element of the result.
+      Constant *Elt = Zero;
+      unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+        if (!Src)  // Reject constantexpr elements.
+          return ConstantExpr::getBitCast(C, DestTy);
+        
+        // Zero extend the element to the right size.
+        Src = ConstantExpr::getZExt(Src, Elt->getType());
+        
+        // Shift it to the right place, depending on endianness.
+        Src = ConstantExpr::getShl(Src, 
+                                   ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+        
+        // Mix it in.
+        Elt = ConstantExpr::getOr(Elt, Src);
+      }
+      Result.push_back(Elt);
+    }
+  } else {
+    // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+    unsigned Ratio = NumDstElt/NumSrcElt;
+    unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+    
+    // Loop over each source value, expanding into multiple results.
+    for (unsigned i = 0; i != NumSrcElt; ++i) {
+      Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
+      if (!Src)  // Reject constantexpr elements.
+        return ConstantExpr::getBitCast(C, DestTy);
+      
+      unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        // Shift the piece of the value into the right place, depending on
+        // endianness.
+        Constant *Elt = ConstantExpr::getLShr(Src, 
+                                    ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+        
+        // Truncate and remember this piece.
+        Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+      }
+    }
+  }
+  
+  return ConstantVector::get(Result);
+}
+
+
+/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
+/// from a global, return the global and the constant.  Because of
+/// constantexprs, this function is recursive.
+static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
+                                       int64_t &Offset, const TargetData &TD) {
+  // Trivial case, constant is the global.
+  if ((GV = dyn_cast<GlobalValue>(C))) {
+    Offset = 0;
+    return true;
+  }
+  
+  // Otherwise, if this isn't a constant expr, bail out.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE) return false;
+  
+  // Look through ptr->int and ptr->ptr casts.
+  if (CE->getOpcode() == Instruction::PtrToInt ||
+      CE->getOpcode() == Instruction::BitCast)
+    return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+  
+  // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)    
+  if (CE->getOpcode() == Instruction::GetElementPtr) {
+    // Cannot compute this if the element type of the pointer is missing size
+    // info.
+    if (!cast<PointerType>(CE->getOperand(0)->getType())
+                 ->getElementType()->isSized())
+      return false;
+    
+    // If the base isn't a global+constant, we aren't either.
+    if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
+      return false;
+    
+    // Otherwise, add any offset that our operands provide.
+    gep_type_iterator GTI = gep_type_begin(CE);
+    for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
+         i != e; ++i, ++GTI) {
+      ConstantInt *CI = dyn_cast<ConstantInt>(*i);
+      if (!CI) return false;  // Index isn't a simple constant?
+      if (CI->isZero()) continue;  // Not adding anything.
+      
+      if (const StructType *ST = dyn_cast<StructType>(*GTI)) {
+        // N = N + Offset
+        Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
+      } else {
+        const SequentialType *SQT = cast<SequentialType>(*GTI);
+        Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
+      }
+    }
+    return true;
+  }
+  
+  return false;
+}
+
+/// ReadDataFromGlobal - Recursive helper to read bits out of global.  C is the
+/// constant being copied out of. ByteOffset is an offset into C.  CurPtr is the
+/// pointer to copy results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer.  TD is the target data.
+static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
+                               unsigned char *CurPtr, unsigned BytesLeft,
+                               const TargetData &TD) {
+  assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+         "Out of range access");
+  
+  // If this element is zero or undefined, we can just return since *CurPtr is
+  // zero initialized.
+  if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+    return true;
+  
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    if (CI->getBitWidth() > 64 ||
+        (CI->getBitWidth() & 7) != 0)
+      return false;
+    
+    uint64_t Val = CI->getZExtValue();
+    unsigned IntBytes = unsigned(CI->getBitWidth()/8);
+    
+    for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
+      CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
+      ++ByteOffset;
+    }
+    return true;
+  }
+  
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    if (CFP->getType()->isDoubleTy()) {
+      C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+    }
+    if (CFP->getType()->isFloatTy()){
+      C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+    }
+    return false;
+  }
+
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+    const StructLayout *SL = TD.getStructLayout(CS->getType());
+    unsigned Index = SL->getElementContainingOffset(ByteOffset);
+    uint64_t CurEltOffset = SL->getElementOffset(Index);
+    ByteOffset -= CurEltOffset;
+    
+    while (1) {
+      // If the element access is to the element itself and not to tail padding,
+      // read the bytes from the element.
+      uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+
+      if (ByteOffset < EltSize &&
+          !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
+                              BytesLeft, TD))
+        return false;
+      
+      ++Index;
+      
+      // Check to see if we read from the last struct element, if so we're done.
+      if (Index == CS->getType()->getNumElements())
+        return true;
+
+      // If we read all of the bytes we needed from this element we're done.
+      uint64_t NextEltOffset = SL->getElementOffset(Index);
+
+      if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+        return true;
+
+      // Move to the next element of the struct.
+      CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
+      BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+      ByteOffset = 0;
+      CurEltOffset = NextEltOffset;
+    }
+    // not reached.
+  }
+
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+    uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType());
+    uint64_t Index = ByteOffset / EltSize;
+    uint64_t Offset = ByteOffset - Index * EltSize;
+    for (; Index != CA->getType()->getNumElements(); ++Index) {
+      if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr,
+                              BytesLeft, TD))
+        return false;
+      if (EltSize >= BytesLeft)
+        return true;
+      
+      Offset = 0;
+      BytesLeft -= EltSize;
+      CurPtr += EltSize;
+    }
+    return true;
+  }
+  
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+    uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType());
+    uint64_t Index = ByteOffset / EltSize;
+    uint64_t Offset = ByteOffset - Index * EltSize;
+    for (; Index != CV->getType()->getNumElements(); ++Index) {
+      if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr,
+                              BytesLeft, TD))
+        return false;
+      if (EltSize >= BytesLeft)
+        return true;
+      
+      Offset = 0;
+      BytesLeft -= EltSize;
+      CurPtr += EltSize;
+    }
+    return true;
+  }
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (CE->getOpcode() == Instruction::IntToPtr &&
+        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) 
+        return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, 
+                                  BytesLeft, TD);
+  }
+
+  // Otherwise, unknown initializer type.
+  return false;
+}
+
+static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
+                                                 const TargetData &TD) {
+  const Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+  const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
+  
+  // If this isn't an integer load we can't fold it directly.
+  if (!IntType) {
+    // If this is a float/double load, we can try folding it as an int32/64 load
+    // and then bitcast the result.  This can be useful for union cases.  Note
+    // that address spaces don't matter here since we're not going to result in
+    // an actual new load.
+    const Type *MapTy;
+    if (LoadTy->isFloatTy())
+      MapTy = Type::getInt32PtrTy(C->getContext());
+    else if (LoadTy->isDoubleTy())
+      MapTy = Type::getInt64PtrTy(C->getContext());
+    else if (LoadTy->isVectorTy()) {
+      MapTy = IntegerType::get(C->getContext(),
+                               TD.getTypeAllocSizeInBits(LoadTy));
+      MapTy = PointerType::getUnqual(MapTy);
+    } else
+      return 0;
+
+    C = FoldBitCast(C, MapTy, TD);
+    if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
+      return FoldBitCast(Res, LoadTy, TD);
+    return 0;
+  }
+  
+  unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
+  if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+  
+  GlobalValue *GVal;
+  int64_t Offset;
+  if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+    return 0;
+  
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+      !GV->getInitializer()->getType()->isSized())
+    return 0;
+
+  // If we're loading off the beginning of the global, some bytes may be valid,
+  // but we don't try to handle this.
+  if (Offset < 0) return 0;
+  
+  // If we're not accessing anything in this constant, the result is undefined.
+  if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType()))
+    return UndefValue::get(IntType);
+  
+  unsigned char RawBytes[32] = {0};
+  if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes,
+                          BytesLoaded, TD))
+    return 0;
+
+  APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
+  for (unsigned i = 1; i != BytesLoaded; ++i) {
+    ResultVal <<= 8;
+    ResultVal |= RawBytes[BytesLoaded-1-i];
+  }
+
+  return ConstantInt::get(IntType->getContext(), ResultVal);
+}
+
+/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
+/// produce if it is constant and determinable.  If this is not determinable,
+/// return null.
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
+                                             const TargetData *TD) {
+  // First, try the easy cases:
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    if (GV->isConstant() && GV->hasDefinitiveInitializer())
+      return GV->getInitializer();
+
+  // If the loaded value isn't a constant expr, we can't handle it.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE) return 0;
+  
+  if (CE->getOpcode() == Instruction::GetElementPtr) {
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+      if (GV->isConstant() && GV->hasDefinitiveInitializer())
+        if (Constant *V = 
+             ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
+          return V;
+  }
+  
+  // Instead of loading constant c string, use corresponding integer value
+  // directly if string length is small enough.
+  std::string Str;
+  if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
+    unsigned StrLen = Str.length();
+    const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+    unsigned NumBits = Ty->getPrimitiveSizeInBits();
+    // Replace load with immediate integer if the result is an integer or fp
+    // value.
+    if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
+        (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
+      APInt StrVal(NumBits, 0);
+      APInt SingleChar(NumBits, 0);
+      if (TD->isLittleEndian()) {
+        for (signed i = StrLen-1; i >= 0; i--) {
+          SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+      } else {
+        for (unsigned i = 0; i < StrLen; i++) {
+          SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+        // Append NULL at the end.
+        SingleChar = 0;
+        StrVal = (StrVal << 8) | SingleChar;
+      }
+      
+      Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
+      if (Ty->isFloatingPointTy())
+        Res = ConstantExpr::getBitCast(Res, Ty);
+      return Res;
+    }
+  }
+  
+  // If this load comes from anywhere in a constant global, and if the global
+  // is all undef or zero, we know what it loads.
+  if (GlobalVariable *GV =
+        dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+    if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+      const Type *ResTy = cast<PointerType>(C->getType())->getElementType();
+      if (GV->getInitializer()->isNullValue())
+        return Constant::getNullValue(ResTy);
+      if (isa<UndefValue>(GV->getInitializer()))
+        return UndefValue::get(ResTy);
+    }
+  }
+  
+  // Try hard to fold loads from bitcasted strange and non-type-safe things.  We
+  // currently don't do any of this for big endian systems.  It can be
+  // generalized in the future if someone is interested.
+  if (TD && TD->isLittleEndian())
+    return FoldReinterpretLoadFromConstPtr(CE, *TD);
+  return 0;
+}
+
+static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){
+  if (LI->isVolatile()) return 0;
+  
+  if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
+    return ConstantFoldLoadFromConstPtr(C, TD);
+
+  return 0;
+}
+
+/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// Attempt to symbolically evaluate the result of a binary operator merging
+/// these together.  If target data info is available, it is provided as TD, 
+/// otherwise TD is null.
+static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
+                                           Constant *Op1, const TargetData *TD){
+  // SROA
+  
+  // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
+  // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
+  // bits.
+  
+  
+  // If the constant expr is something like &A[123] - &A[4].f, fold this into a
+  // constant.  This happens frequently when iterating over a global array.
+  if (Opc == Instruction::Sub && TD) {
+    GlobalValue *GV1, *GV2;
+    int64_t Offs1, Offs2;
+    
+    if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
+      if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
+          GV1 == GV2) {
+        // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
+        return ConstantInt::get(Op0->getType(), Offs1-Offs2);
+      }
+  }
+    
+  return 0;
+}
+
+/// CastGEPIndices - If array indices are not pointer-sized integers,
+/// explicitly cast them so that they aren't implicitly casted by the
+/// getelementptr.
+static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
+                                const Type *ResultTy,
+                                const TargetData *TD) {
+  if (!TD) return 0;
+  const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+
+  bool Any = false;
+  SmallVector<Constant*, 32> NewIdxs;
+  for (unsigned i = 1; i != NumOps; ++i) {
+    if ((i == 1 ||
+         !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
+                                        reinterpret_cast<Value *const *>(Ops+1),
+                                                            i-1))) &&
+        Ops[i]->getType() != IntPtrTy) {
+      Any = true;
+      NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+                                                                      true,
+                                                                      IntPtrTy,
+                                                                      true),
+                                              Ops[i], IntPtrTy));
+    } else
+      NewIdxs.push_back(Ops[i]);
+  }
+  if (!Any) return 0;
+
+  Constant *C =
+    ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size());
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  return C;
+}
+
+/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
+/// constant expression, do so.
+static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
+                                         const Type *ResultTy,
+                                         const TargetData *TD) {
+  Constant *Ptr = Ops[0];
+  if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+    return 0;
+  
+  const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+
+  // If this is a constant expr gep that is effectively computing an
+  // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
+  for (unsigned i = 1; i != NumOps; ++i)
+    if (!isa<ConstantInt>(Ops[i])) {
+      
+      // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+      // "inttoptr (sub (ptrtoint Ptr), V)"
+      if (NumOps == 2 &&
+          cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+        assert((CE == 0 || CE->getType() == IntPtrTy) &&
+               "CastGEPIndices didn't canonicalize index types!");
+        if (CE && CE->getOpcode() == Instruction::Sub &&
+            CE->getOperand(0)->isNullValue()) {
+          Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+          Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+          Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+          if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+            Res = ConstantFoldConstantExpression(ResCE, TD);
+          return Res;
+        }
+      }
+      return 0;
+    }
+  
+  unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+  APInt Offset = APInt(BitWidth,
+                       TD->getIndexedOffset(Ptr->getType(),
+                                            (Value**)Ops+1, NumOps-1));
+  Ptr = cast<Constant>(Ptr->stripPointerCasts());
+
+  // If this is a GEP of a GEP, fold it all into a single GEP.
+  while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+    SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+
+    // Do not try the incorporate the sub-GEP if some index is not a number.
+    bool AllConstantInt = true;
+    for (unsigned i = 0, e = NestedOps.size(); i != e; ++i)
+      if (!isa<ConstantInt>(NestedOps[i])) {
+        AllConstantInt = false;
+        break;
+      }
+    if (!AllConstantInt)
+      break;
+
+    Ptr = cast<Constant>(GEP->getOperand(0));
+    Offset += APInt(BitWidth,
+                    TD->getIndexedOffset(Ptr->getType(),
+                                         (Value**)NestedOps.data(),
+                                         NestedOps.size()));
+    Ptr = cast<Constant>(Ptr->stripPointerCasts());
+  }
+
+  // If the base value for this address is a literal integer value, fold the
+  // getelementptr to the resulting integer value casted to the pointer type.
+  APInt BasePtr(BitWidth, 0);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+        BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+  if (Ptr->isNullValue() || BasePtr != 0) {
+    Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+    return ConstantExpr::getIntToPtr(C, ResultTy);
+  }
+
+  // Otherwise form a regular getelementptr. Recompute the indices so that
+  // we eliminate over-indexing of the notional static type array bounds.
+  // This makes it easy to determine if the getelementptr is "inbounds".
+  // Also, this helps GlobalOpt do SROA on GlobalVariables.
+  const Type *Ty = Ptr->getType();
+  SmallVector<Constant*, 32> NewIdxs;
+  do {
+    if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+      if (ATy->isPointerTy()) {
+        // The only pointer indexing we'll do is on the first index of the GEP.
+        if (!NewIdxs.empty())
+          break;
+       
+        // Only handle pointers to sized types, not pointers to functions.
+        if (!ATy->getElementType()->isSized())
+          return 0;
+      }
+        
+      // Determine which element of the array the offset points into.
+      APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+      const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+      if (ElemSize == 0)
+        // The element size is 0. This may be [0 x Ty]*, so just use a zero
+        // index for this level and proceed to the next level to see if it can
+        // accommodate the offset.
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+      else {
+        // The element size is non-zero divide the offset by the element
+        // size (rounding down), to compute the index at this level.
+        APInt NewIdx = Offset.udiv(ElemSize);
+        Offset -= NewIdx * ElemSize;
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+      }
+      Ty = ATy->getElementType();
+    } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      // Determine which field of the struct the offset points into. The
+      // getZExtValue is at least as safe as the StructLayout API because we
+      // know the offset is within the struct at this point.
+      const StructLayout &SL = *TD->getStructLayout(STy);
+      unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+      NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                         ElIdx));
+      Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+      Ty = STy->getTypeAtIndex(ElIdx);
+    } else {
+      // We've reached some non-indexable type.
+      break;
+    }
+  } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+
+  // If we haven't used up the entire offset by descending the static
+  // type, then the offset is pointing into the middle of an indivisible
+  // member, so we can't simplify it.
+  if (Offset != 0)
+    return 0;
+
+  // Create a GEP.
+  Constant *C =
+    ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+  assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+         "Computed GetElementPtr has unexpected type!");
+
+  // If we ended up indexing a member with a type that doesn't match
+  // the type of what the original indices indexed, add a cast.
+  if (Ty != cast<PointerType>(ResultTy)->getElementType())
+    C = FoldBitCast(C, ResultTy, *TD);
+
+  return C;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding public APIs
+//===----------------------------------------------------------------------===//
+
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant.  Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+  // Handle PHI nodes quickly here...
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    Constant *CommonValue = 0;
+
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *Incoming = PN->getIncomingValue(i);
+      // If the incoming value is undef then skip it.  Note that while we could
+      // skip the value if it is equal to the phi node itself we choose not to
+      // because that would break the rule that constant folding only applies if
+      // all operands are constants.
+      if (isa<UndefValue>(Incoming))
+        continue;
+      // If the incoming value is not a constant, or is a different constant to
+      // the one we saw previously, then give up.
+      Constant *C = dyn_cast<Constant>(Incoming);
+      if (!C || (CommonValue && C != CommonValue))
+        return 0;
+      CommonValue = C;
+    }
+
+    // If we reach here, all incoming values are the same constant or undef.
+    return CommonValue ? CommonValue : UndefValue::get(PN->getType());
+  }
+
+  // Scan the operand list, checking to see if they are all constants, if so,
+  // hand off to ConstantFoldInstOperands.
+  SmallVector<Constant*, 8> Ops;
+  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+    if (Constant *Op = dyn_cast<Constant>(*i))
+      Ops.push_back(Op);
+    else
+      return 0;  // All operands not constant!
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           TD);
+  
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    return ConstantFoldLoadInst(LI, TD);
+
+  if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+    return ConstantExpr::getInsertValue(
+                                cast<Constant>(IVI->getAggregateOperand()),
+                                cast<Constant>(IVI->getInsertedValueOperand()),
+                                IVI->getIndices());
+
+  if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+    return ConstantExpr::getExtractValue(
+                                    cast<Constant>(EVI->getAggregateOperand()),
+                                    EVI->getIndices());
+
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                  Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified TargetData.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                               const TargetData *TD) {
+  SmallVector<Constant*, 8> Ops;
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
+       i != e; ++i) {
+    Constant *NewC = cast<Constant>(*i);
+    // Recursively fold the ConstantExpr's operands.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
+      NewC = ConstantFoldConstantExpression(NewCE, TD);
+    Ops.push_back(NewC);
+  }
+
+  if (CE->isCompare())
+    return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+                                           TD);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
+                                  Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// specified opcode and operands.  If successful, the constant result is
+/// returned, if not, null is returned.  Note that this function can fail when
+/// attempting to fold instructions like loads and stores, which have no
+/// constant expression form.
+///
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
+/// information, due to only being passed an opcode and operands. Constant
+/// folding using this function strips this information.
+///
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, 
+                                         Constant* const* Ops, unsigned NumOps,
+                                         const TargetData *TD) {
+  // Handle easy binops first.
+  if (Instruction::isBinaryOp(Opcode)) {
+    if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+        return C;
+    
+    return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
+  }
+  
+  switch (Opcode) {
+  default: return 0;
+  case Instruction::ICmp:
+  case Instruction::FCmp: assert(0 && "Invalid for compares");
+  case Instruction::Call:
+    if (Function *F = dyn_cast<Function>(Ops[NumOps - 1]))
+      if (canConstantFoldCallTo(F))
+        return ConstantFoldCall(F, Ops, NumOps - 1);
+    return 0;
+  case Instruction::PtrToInt:
+    // If the input is a inttoptr, eliminate the pair.  This requires knowing
+    // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+      if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+        Constant *Input = CE->getOperand(0);
+        unsigned InWidth = Input->getType()->getScalarSizeInBits();
+        if (TD->getPointerSizeInBits() < InWidth) {
+          Constant *Mask = 
+            ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
+                                                  TD->getPointerSizeInBits()));
+          Input = ConstantExpr::getAnd(Input, Mask);
+        }
+        // Do a zext or trunc to get to the dest size.
+        return ConstantExpr::getIntegerCast(Input, DestTy, false);
+      }
+    }
+    return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::IntToPtr:
+    // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+    // the int size is >= the ptr size.  This requires knowing the width of a
+    // pointer, so it can't be done in ConstantExpr::getCast.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+      if (TD &&
+          TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+          CE->getOpcode() == Instruction::PtrToInt)
+        return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
+    return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+      return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::BitCast:
+    if (TD)
+      return FoldBitCast(Ops[0], DestTy, *TD);
+    return ConstantExpr::getBitCast(Ops[0], DestTy);
+  case Instruction::Select:
+    return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::InsertElement:
+    return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ShuffleVector:
+    return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+  case Instruction::GetElementPtr:
+    if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD))
+      return C;
+    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+      return C;
+    
+    return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
+  }
+}
+
+/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands.  If it fails, it
+/// returns a constant expression of the specified operands.
+///
+Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
+                                                Constant *Ops0, Constant *Ops1, 
+                                                const TargetData *TD) {
+  // fold: icmp (inttoptr x), null         -> icmp x, 0
+  // fold: icmp (ptrtoint x), 0            -> icmp x, null
+  // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
+  // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
+  //
+  // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+  // around to know if bit truncation is happening.
+  if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+    if (TD && Ops1->isNullValue()) {
+      const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+      if (CE0->getOpcode() == Instruction::IntToPtr) {
+        // Convert the integer value to the right size to ensure we get the
+        // proper extension or truncation.
+        Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+                                                   IntPtrTy, false);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+      }
+      
+      // Only do this transformation if the int is intptrty in size, otherwise
+      // there is a truncation or extension that we aren't modeling.
+      if (CE0->getOpcode() == Instruction::PtrToInt && 
+          CE0->getType() == IntPtrTy) {
+        Constant *C = CE0->getOperand(0);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+      }
+    }
+    
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
+      if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+        const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+
+        if (CE0->getOpcode() == Instruction::IntToPtr) {
+          // Convert the integer value to the right size to ensure we get the
+          // proper extension or truncation.
+          Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+                                                      IntPtrTy, false);
+          Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
+                                                      IntPtrTy, false);
+          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+        }
+
+        // Only do this transformation if the int is intptrty in size, otherwise
+        // there is a truncation or extension that we aren't modeling.
+        if ((CE0->getOpcode() == Instruction::PtrToInt &&
+             CE0->getType() == IntPtrTy &&
+             CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+          return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
+                                                 CE1->getOperand(0), TD);
+      }
+    }
+    
+    // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
+    // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
+    if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
+        CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
+      Constant *LHS = 
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+      Constant *RHS = 
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+      unsigned OpC = 
+        Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+      Constant *Ops[] = { LHS, RHS };
+      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD);
+    }
+  }
+  
+  return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
+}
+
+
+/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
+/// getelementptr constantexpr, return the constant value being addressed by the
+/// constant expression, or null if something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, 
+                                                       ConstantExpr *CE) {
+  if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType()))
+    return 0;  // Do not allow stepping over the value!
+  
+  // Loop over all of the operands, tracking down which value we are
+  // addressing...
+  gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+  for (++I; I != E; ++I)
+    if (const StructType *STy = dyn_cast<StructType>(*I)) {
+      ConstantInt *CU = cast<ConstantInt>(I.getOperand());
+      assert(CU->getZExtValue() < STy->getNumElements() &&
+             "Struct index out of range!");
+      unsigned El = (unsigned)CU->getZExtValue();
+      if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+        C = CS->getOperand(El);
+      } else if (isa<ConstantAggregateZero>(C)) {
+        C = Constant::getNullValue(STy->getElementType(El));
+      } else if (isa<UndefValue>(C)) {
+        C = UndefValue::get(STy->getElementType(El));
+      } else {
+        return 0;
+      }
+    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
+        if (CI->getZExtValue() >= ATy->getNumElements())
+         return 0;
+        if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+          C = CA->getOperand(CI->getZExtValue());
+        else if (isa<ConstantAggregateZero>(C))
+          C = Constant::getNullValue(ATy->getElementType());
+        else if (isa<UndefValue>(C))
+          C = UndefValue::get(ATy->getElementType());
+        else
+          return 0;
+      } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) {
+        if (CI->getZExtValue() >= VTy->getNumElements())
+          return 0;
+        if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
+          C = CP->getOperand(CI->getZExtValue());
+        else if (isa<ConstantAggregateZero>(C))
+          C = Constant::getNullValue(VTy->getElementType());
+        else if (isa<UndefValue>(C))
+          C = UndefValue::get(VTy->getElementType());
+        else
+          return 0;
+      } else {
+        return 0;
+      }
+    } else {
+      return 0;
+    }
+  return C;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Constant Folding for Calls
+//
+
+/// canConstantFoldCallTo - Return true if its even possible to fold a call to
+/// the specified function.
+bool
+llvm::canConstantFoldCallTo(const Function *F) {
+  switch (F->getIntrinsicID()) {
+  case Intrinsic::sqrt:
+  case Intrinsic::powi:
+  case Intrinsic::bswap:
+  case Intrinsic::ctpop:
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::smul_with_overflow:
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::convert_from_fp16:
+  case Intrinsic::convert_to_fp16:
+  case Intrinsic::x86_sse_cvtss2si:
+  case Intrinsic::x86_sse_cvtss2si64:
+  case Intrinsic::x86_sse_cvttss2si:
+  case Intrinsic::x86_sse_cvttss2si64:
+  case Intrinsic::x86_sse2_cvtsd2si:
+  case Intrinsic::x86_sse2_cvtsd2si64:
+  case Intrinsic::x86_sse2_cvttsd2si:
+  case Intrinsic::x86_sse2_cvttsd2si64:
+    return true;
+  default:
+    return false;
+  case 0: break;
+  }
+
+  if (!F->hasName()) return false;
+  StringRef Name = F->getName();
+  
+  // In these cases, the check of the length is required.  We don't want to
+  // return true for a name like "cos\0blah" which strcmp would return equal to
+  // "cos", but has length 8.
+  switch (Name[0]) {
+  default: return false;
+  case 'a':
+    return Name == "acos" || Name == "asin" || 
+      Name == "atan" || Name == "atan2";
+  case 'c':
+    return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+  case 'e':
+    return Name == "exp" || Name == "exp2";
+  case 'f':
+    return Name == "fabs" || Name == "fmod" || Name == "floor";
+  case 'l':
+    return Name == "log" || Name == "log10";
+  case 'p':
+    return Name == "pow";
+  case 's':
+    return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+      Name == "sinf" || Name == "sqrtf";
+  case 't':
+    return Name == "tan" || Name == "tanh";
+  }
+}
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, 
+                                const Type *Ty) {
+  sys::llvm_fenv_clearexcept();
+  V = NativeFP(V);
+  if (sys::llvm_fenv_testexcept()) {
+    sys::llvm_fenv_clearexcept();
+    return 0;
+  }
+  
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Ty->getContext(), APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
+  return 0; // dummy return to suppress warning
+}
+
+static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
+                                      double V, double W, const Type *Ty) {
+  sys::llvm_fenv_clearexcept();
+  V = NativeFP(V, W);
+  if (sys::llvm_fenv_testexcept()) {
+    sys::llvm_fenv_clearexcept();
+    return 0;
+  }
+  
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Ty->getContext(), APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
+  return 0; // dummy return to suppress warning
+}
+
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
+                                          const Type *Ty) {
+  assert(Op && "Called with NULL operand");
+  APFloat Val(Op->getValueAPF());
+
+  // All of these conversion intrinsics form an integer of at most 64bits.
+  unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+  assert(ResultWidth <= 64 &&
+         "Can only constant fold conversions to 64 and 32 bit ints");
+
+  uint64_t UIntVal;
+  bool isExact = false;
+  APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+                                              : APFloat::rmNearestTiesToEven;
+  APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+                                                  /*isSigned=*/true, mode,
+                                                  &isExact);
+  if (status != APFloat::opOK && status != APFloat::opInexact)
+    return 0;
+  return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+llvm::ConstantFoldCall(Function *F, 
+                       Constant *const *Operands, unsigned NumOperands) {
+  if (!F->hasName()) return 0;
+  StringRef Name = F->getName();
+
+  const Type *Ty = F->getReturnType();
+  if (NumOperands == 1) {
+    if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+      if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
+        APFloat Val(Op->getValueAPF());
+
+        bool lost = false;
+        Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+        return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+      }
+
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+        return 0;
+
+      /// We only fold functions with finite arguments. Folding NaN and inf is
+      /// likely to be aborted with an exception anyway, and some host libms
+      /// have known errors raising exceptions.
+      if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+        return 0;
+
+      /// Currently APFloat versions of these functions do not exist, so we use
+      /// the host native double versions.  Float versions are not called
+      /// directly but for all these it is true (float)(f((double)arg)) ==
+      /// f(arg).  Long double not supported yet.
+      double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
+                                     Op->getValueAPF().convertToDouble();
+      switch (Name[0]) {
+      case 'a':
+        if (Name == "acos")
+          return ConstantFoldFP(acos, V, Ty);
+        else if (Name == "asin")
+          return ConstantFoldFP(asin, V, Ty);
+        else if (Name == "atan")
+          return ConstantFoldFP(atan, V, Ty);
+        break;
+      case 'c':
+        if (Name == "ceil")
+          return ConstantFoldFP(ceil, V, Ty);
+        else if (Name == "cos")
+          return ConstantFoldFP(cos, V, Ty);
+        else if (Name == "cosh")
+          return ConstantFoldFP(cosh, V, Ty);
+        else if (Name == "cosf")
+          return ConstantFoldFP(cos, V, Ty);
+        break;
+      case 'e':
+        if (Name == "exp")
+          return ConstantFoldFP(exp, V, Ty);
+  
+        if (Name == "exp2") {
+          // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+          // C99 library.
+          return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+        }
+        break;
+      case 'f':
+        if (Name == "fabs")
+          return ConstantFoldFP(fabs, V, Ty);
+        else if (Name == "floor")
+          return ConstantFoldFP(floor, V, Ty);
+        break;
+      case 'l':
+        if (Name == "log" && V > 0)
+          return ConstantFoldFP(log, V, Ty);
+        else if (Name == "log10" && V > 0)
+          return ConstantFoldFP(log10, V, Ty);
+        else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+                 (Ty->isFloatTy() || Ty->isDoubleTy())) {
+          if (V >= -0.0)
+            return ConstantFoldFP(sqrt, V, Ty);
+          else // Undefined
+            return Constant::getNullValue(Ty);
+        }
+        break;
+      case 's':
+        if (Name == "sin")
+          return ConstantFoldFP(sin, V, Ty);
+        else if (Name == "sinh")
+          return ConstantFoldFP(sinh, V, Ty);
+        else if (Name == "sqrt" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty);
+        else if (Name == "sqrtf" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty);
+        else if (Name == "sinf")
+          return ConstantFoldFP(sin, V, Ty);
+        break;
+      case 't':
+        if (Name == "tan")
+          return ConstantFoldFP(tan, V, Ty);
+        else if (Name == "tanh")
+          return ConstantFoldFP(tanh, V, Ty);
+        break;
+      default:
+        break;
+      }
+      return 0;
+    }
+
+    if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+      switch (F->getIntrinsicID()) {
+      case Intrinsic::bswap:
+        return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
+      case Intrinsic::ctpop:
+        return ConstantInt::get(Ty, Op->getValue().countPopulation());
+      case Intrinsic::cttz:
+        return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
+      case Intrinsic::ctlz:
+        return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+      case Intrinsic::convert_from_fp16: {
+        APFloat Val(Op->getValue());
+
+        bool lost = false;
+        APFloat::opStatus status =
+          Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+
+        // Conversion is always precise.
+        (void)status;
+        assert(status == APFloat::opOK && !lost &&
+               "Precision lost during fp16 constfolding");
+
+        return ConstantFP::get(F->getContext(), Val);
+      }
+      default:
+        return 0;
+      }
+    }
+
+    if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+      switch (F->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::x86_sse_cvtss2si:
+      case Intrinsic::x86_sse_cvtss2si64:
+      case Intrinsic::x86_sse2_cvtsd2si:
+      case Intrinsic::x86_sse2_cvtsd2si64:
+        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+      case Intrinsic::x86_sse_cvttss2si:
+      case Intrinsic::x86_sse_cvttss2si64:
+      case Intrinsic::x86_sse2_cvttsd2si:
+      case Intrinsic::x86_sse2_cvttsd2si64:
+        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+      }
+    }
+
+    if (isa<UndefValue>(Operands[0])) {
+      if (F->getIntrinsicID() == Intrinsic::bswap)
+        return Operands[0];
+      return 0;
+    }
+
+    return 0;
+  }
+
+  if (NumOperands == 2) {
+    if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+        return 0;
+      double Op1V = Ty->isFloatTy() ? 
+                      (double)Op1->getValueAPF().convertToFloat() :
+                      Op1->getValueAPF().convertToDouble();
+      if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+        if (Op2->getType() != Op1->getType())
+          return 0;
+        
+        double Op2V = Ty->isFloatTy() ? 
+                      (double)Op2->getValueAPF().convertToFloat():
+                      Op2->getValueAPF().convertToDouble();
+
+        if (Name == "pow")
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+        if (Name == "fmod")
+          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+        if (Name == "atan2")
+          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+      } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((float)std::pow((float)Op1V,
+                                                 (int)Op2C->getZExtValue())));
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((double)std::pow((double)Op1V,
+                                                   (int)Op2C->getZExtValue())));
+      }
+      return 0;
+    }
+    
+    
+    if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+      if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+        switch (F->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::sadd_with_overflow:
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::ssub_with_overflow:
+        case Intrinsic::usub_with_overflow:
+        case Intrinsic::smul_with_overflow:
+        case Intrinsic::umul_with_overflow: {
+          APInt Res;
+          bool Overflow;
+          switch (F->getIntrinsicID()) {
+          default: assert(0 && "Invalid case");
+          case Intrinsic::sadd_with_overflow:
+            Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::uadd_with_overflow:
+            Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::ssub_with_overflow:
+            Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::usub_with_overflow:
+            Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::smul_with_overflow:
+            Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::umul_with_overflow:
+            Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
+            break;
+          }
+          Constant *Ops[] = {
+            ConstantInt::get(F->getContext(), Res),
+            ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
+          };
+          return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
+        }
+        }
+      }
+      
+      return 0;
+    }
+    return 0;
+  }
+  return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/DIBuilder.cpp b/contrib/llvm/lib/Analysis/DIBuilder.cpp
new file mode 100644
index 000000000000..ac5eeeb4706a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DIBuilder.cpp
@@ -0,0 +1,839 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+  assert((Tag & LLVMDebugVersionMask) == 0 &&
+         "Tag too large for debug encoding!");
+  return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+  : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+
+/// createCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, 
+                                  StringRef Directory, StringRef Producer, 
+                                  bool isOptimized, StringRef Flags, 
+                                  unsigned RunTimeVer) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    MDString::get(VMContext, Producer),
+    // Deprecate isMain field.
+    ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    MDString::get(VMContext, Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+  };
+  TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
+
+  // Create a named metadata so that it is easier to find cu in a module.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+  NMD->addOperand(TheCU);
+}
+
+/// createFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+  assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    TheCU
+  };
+  return DIFile(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+  };
+  return DIEnumerator(MDNode::get(VMContext, Elts));
+}
+
+/// createBasicType - Create debugging information entry for a basic 
+/// type, e.g 'char'.
+DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, 
+                                  uint64_t AlignInBits,
+                                  unsigned Encoding) {
+  // Basic types are encoded in DIBasicType format. Line number, filename,
+  // offset and flags are always empty here.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+    TheCU,
+    MDString::get(VMContext, Name),
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createQaulifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+  // Qualified types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    TheCU,
+    MDString::get(VMContext, StringRef()), // Empty name.
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FromTy
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createPointerType - Create debugging information entry for a pointer.
+DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+                                    uint64_t AlignInBits, StringRef Name) {
+  // Pointer types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+    TheCU,
+    MDString::get(VMContext, Name),
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    PointeeTy
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createReferenceType - Create debugging information entry for a reference.
+DIType DIBuilder::createReferenceType(DIType RTy) {
+  // References are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
+    TheCU,
+    NULL, // Name
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    RTy
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createTypedef - Create debugging information entry for a typedef.
+DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
+                                unsigned LineNo, DIDescriptor Context) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid typedef type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+    Context,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    Ty
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid type!");
+  assert(FriendTy.Verify() && "Invalid friend type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+    Ty,
+    NULL, // Name
+    Ty.getFile(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FriendTy
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createInheritance - Create debugging information entry to establish
+/// inheritnace relationship between two types.
+DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, 
+                                    uint64_t BaseOffset, unsigned Flags) {
+  // TAG_inheritance is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+    Ty,
+    NULL, // Name
+    Ty.getFile(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    BaseTy
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createMemberType - Create debugging information entry for a member.
+DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, 
+                                   DIFile File, unsigned LineNumber, 
+                                   uint64_t SizeInBits, uint64_t AlignInBits,
+                                   uint64_t OffsetInBits, unsigned Flags, 
+                                   DIType Ty) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name, 
+                                 DIFile File, unsigned LineNumber, 
+                                 uint64_t SizeInBits, uint64_t AlignInBits,
+                                 uint64_t OffsetInBits, unsigned Flags, 
+                                 DIType Ty, StringRef PropertyName,
+                                 StringRef GetterName, StringRef SetterName,
+                                 unsigned PropertyAttributes) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File, // Or TheCU ? Ty ?
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty,
+    MDString::get(VMContext, PropertyName),
+    MDString::get(VMContext, GetterName),
+    MDString::get(VMContext, SetterName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createClassType - Create debugging information entry for a class.
+DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, 
+                                  DIFile File, unsigned LineNumber, 
+                                  uint64_t SizeInBits, uint64_t AlignInBits,
+                                  uint64_t OffsetInBits, unsigned Flags,
+                                  DIType DerivedFrom, DIArray Elements,
+                                  MDNode *VTableHoder, MDNode *TemplateParams) {
+ // TAG_class_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+    Context,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom,
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    VTableHoder,
+    TemplateParams
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter 
+DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+                                       DIType Ty, MDNode *File, unsigned LineNo,
+                                       unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+    Context,
+    MDString::get(VMContext, Name),
+    Ty,
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter 
+DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
+                                        DIType Ty, uint64_t Val,
+                                        MDNode *File, unsigned LineNo,
+                                        unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+    Context,
+    MDString::get(VMContext, Name),
+    Ty,
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateValueParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createStructType - Create debugging information entry for a struct.
+DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, 
+                                   DIFile File, unsigned LineNumber, 
+                                   uint64_t SizeInBits, uint64_t AlignInBits,
+                                   unsigned Flags, DIArray Elements, 
+                                   unsigned RunTimeLang) {
+ // TAG_structure_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+    Context,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createUnionType - Create debugging information entry for an union.
+DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, 
+                                  DIFile File,
+                                  unsigned LineNumber, uint64_t SizeInBits,
+                                  uint64_t AlignInBits, unsigned Flags,
+                                  DIArray Elements, unsigned RunTimeLang) {
+  // TAG_union_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createSubroutineType - Create subroutine type.
+DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
+  // TAG_subroutine_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+    File,
+    MDString::get(VMContext, ""),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ParameterTypes,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerationType - Create debugging information entry for an 
+/// enumeration.
+DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, 
+                                        DIFile File, unsigned LineNumber, 
+                                        uint64_t SizeInBits, 
+                                        uint64_t AlignInBits, DIArray Elements) {
+  // TAG_enumeration_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+  NMD->addOperand(Node);
+  return DIType(Node);
+}
+
+/// createArrayType - Create debugging information entry for an array.
+DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, 
+                                  DIType Ty, DIArray Subscripts) {
+  // TAG_array_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+    TheCU,
+    MDString::get(VMContext, ""),
+    TheCU,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, 
+                                   DIType Ty, DIArray Subscripts) {
+  // TAG_vector_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
+    TheCU,
+    MDString::get(VMContext, ""),
+    TheCU,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::createArtificialType(DIType Ty) {
+  if (Ty.isArtificial())
+    return Ty;
+
+  SmallVector<Value *, 9> Elts;
+  MDNode *N = Ty;
+  assert (N && "Unexpected input DIType!");
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i))
+      Elts.push_back(V);
+    else
+      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  }
+
+  unsigned CurFlags = Ty.getFlags();
+  CurFlags = CurFlags | DIType::FlagArtificial;
+
+  // Flags are stored at this slot.
+  Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// retainType - Retain DIType in a module even if it is not referenced 
+/// through debug info anchors.
+void DIBuilder::retainType(DIType T) {
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+  NMD->addOperand(T);
+}
+
+/// createUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::createUnspecifiedParameter() {
+  Value *Elts[] = { 
+    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) 
+  };
+  return DIDescriptor(MDNode::get(VMContext, Elts));
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType() {
+  // Give the temporary MDNode a tag. It doesn't matter what tag we
+  // use here as long as DIType accepts it.
+  Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+  return DIType(Node);
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType(DIFile F) {
+  // Give the temporary MDNode a tag. It doesn't matter what tag we
+  // use here as long as DIType accepts it.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, DW_TAG_base_type),
+    F.getCompileUnit(),
+    NULL,
+    F
+  };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+  return DIType(Node);
+}
+
+/// getOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
+  if (Elements.empty()) {
+    Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+    return DIArray(MDNode::get(VMContext, Null));
+  }
+  return DIArray(MDNode::get(VMContext, Elements));
+}
+
+/// getOrCreateSubrange - Create a descriptor for a value range.  This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
+  };
+
+  return DISubrange(MDNode::get(VMContext, Elts));
+}
+
+/// createGlobalVariable - Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, 
+                     DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    TheCU,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addOperand(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// createStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+createStaticVariable(DIDescriptor Context, StringRef Name, 
+                     StringRef LinkageName, DIFile F, unsigned LineNumber, 
+                     DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addOperand(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// createVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
+                                          StringRef Name, DIFile File,
+                                          unsigned LineNo, DIType Ty, 
+                                          bool AlwaysPreserve, unsigned Flags,
+                                          unsigned ArgNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  if (AlwaysPreserve) {
+    // The optimizer may remove local variable. If there is an interest
+    // to preserve variable info in such situation then stash it in a
+    // named mdnode.
+    DISubprogram Fn(getDISubprogram(Scope));
+    StringRef FName = "fn";
+    if (Fn.getFunction())
+      FName = Fn.getFunction()->getName();
+    char One = '\1';
+    if (FName.startswith(StringRef(&One, 1)))
+      FName = FName.substr(1);
+    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+    FnLocals->addOperand(Node);
+  }
+  return DIVariable(Node);
+}
+
+/// createComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
+                                            StringRef Name, DIFile F,
+                                            unsigned LineNo,
+                                            DIType Ty, ArrayRef<Value *> Addr,
+                                            unsigned ArgNo) {
+  SmallVector<Value *, 15> Elts;
+  Elts.push_back(GetTagConstant(VMContext, Tag));
+  Elts.push_back(Scope);
+  Elts.push_back(MDString::get(VMContext, Name));
+  Elts.push_back(F);
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
+  Elts.push_back(Ty);
+  Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  Elts.append(Addr.begin(), Addr.end());
+
+  return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context,
+                                       StringRef Name,
+                                       StringRef LinkageName,
+                                       DIFile File, unsigned LineNo,
+                                       DIType Ty,
+                                       bool isLocalToUnit, bool isDefinition,
+                                       unsigned Flags, bool isOptimized,
+                                       Function *Fn,
+                                       MDNode *TParams,
+                                       MDNode *Decl) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn,
+    TParams,
+    Decl
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+  NMD->addOperand(Node);
+  return DISubprogram(Node);
+}
+
+/// createMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::createMethod(DIDescriptor Context,
+                                     StringRef Name,
+                                     StringRef LinkageName,
+                                     DIFile F,
+                                     unsigned LineNo, DIType Ty,
+                                     bool isLocalToUnit,
+                                     bool isDefinition,
+                                     unsigned VK, unsigned VIndex,
+                                     MDNode *VTableHolder,
+                                     unsigned Flags,
+                                     bool isOptimized,
+                                     Function *Fn,
+                                     MDNode *TParam) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+    ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+    VTableHolder,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn,
+    TParam,
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+  NMD->addOperand(Node);
+  return DISubprogram(Node);
+}
+
+/// createNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
+                                       DIFile File, unsigned LineNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+  };
+  return DINameSpace(MDNode::get(VMContext, Elts));
+}
+
+DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+                                             unsigned Line, unsigned Col) {
+  // Defeat MDNode uniqing for lexical blocks by using unique id.
+  static unsigned int unique_id = 0;
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+    Scope,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+  };
+  return DILexicalBlock(MDNode::get(VMContext, Elts));
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+                                      Instruction *InsertBefore) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+                                      BasicBlock *InsertAtEnd) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+
+  // If this block already has a terminator then insert this intrinsic
+  // before the terminator.
+  if (TerminatorInst *T = InsertAtEnd->getTerminator())
+    return CallInst::Create(DeclareFn, Args, "", T);
+  else
+    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                Instruction *InsertBefore) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), V),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, "", InsertBefore);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                BasicBlock *InsertAtEnd) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), V),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
+}
+
diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
new file mode 100644
index 000000000000..b23c3514d0bd
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
@@ -0,0 +1,224 @@
+//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that prints instructions, and associated debug
+// info:
+// 
+//   - source/line/col information
+//   - original variable name
+//   - original type name
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+PrintDirectory("print-fullpath",
+               cl::desc("Print fullpath when printing debug info"),
+               cl::Hidden);
+
+namespace {
+  class PrintDbgInfo : public FunctionPass {
+    raw_ostream &Out;
+    void printVariableDeclaration(const Value *V);
+  public:
+    static char ID; // Pass identification
+    PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
+      initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+  char PrintDbgInfo::ID = 0;
+}
+
+INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+                "Print debug info in human readable form", false, false)
+
+FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
+
+/// Find the debug info descriptor corresponding to this global variable.
+static Value *findDbgGlobalDeclare(GlobalVariable *V) {
+  const Module *M = V->getParent();
+  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+  if (!NMD)
+    return 0;
+
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+    if (!DIG.isGlobalVariable())
+      continue;
+    if (DIGlobalVariable(DIG).getGlobal() == V)
+      return DIG;
+  }
+  return 0;
+}
+
+/// Find the debug info descriptor corresponding to this function.
+static Value *findDbgSubprogramDeclare(Function *V) {
+  const Module *M = V->getParent();
+  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
+  if (!NMD)
+    return 0;
+
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+    if (!DIG.isSubprogram())
+      continue;
+    if (DISubprogram(DIG).getFunction() == V)
+      return DIG;
+  }
+  return 0;
+}
+
+/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
+/// It looks through pointer casts too.
+static const DbgDeclareInst *findDbgDeclare(const Value *V) {
+  V = V->stripPointerCasts();
+
+  if (!isa<Instruction>(V) && !isa<Argument>(V))
+    return 0;
+
+  const Function *F = NULL;
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    F = I->getParent()->getParent();
+  else if (const Argument *A = dyn_cast<Argument>(V))
+    F = A->getParent();
+
+  for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+    for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+         BI != BE; ++BI)
+      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        if (DDI->getAddress() == V)
+          return DDI;
+
+  return 0;
+}
+
+static bool getLocationInfo(const Value *V, std::string &DisplayName,
+                            std::string &Type, unsigned &LineNo,
+                            std::string &File, std::string &Dir) {
+  DICompileUnit Unit;
+  DIType TypeD;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
+    Value *DIGV = findDbgGlobalDeclare(GV);
+    if (!DIGV) return false;
+    DIGlobalVariable Var(cast<MDNode>(DIGV));
+
+    StringRef D = Var.getDisplayName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
+    Value *DIF = findDbgSubprogramDeclare(F);
+    if (!DIF) return false;
+    DISubprogram Var(cast<MDNode>(DIF));
+
+    StringRef D = Var.getDisplayName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  } else {
+    const DbgDeclareInst *DDI = findDbgDeclare(V);
+    if (!DDI) return false;
+    DIVariable Var(cast<MDNode>(DDI->getVariable()));
+
+    StringRef D = Var.getName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  }
+
+  StringRef T = TypeD.getName();
+  if (!T.empty())
+    Type = T;
+  StringRef F = Unit.getFilename();
+  if (!F.empty())
+    File = F;
+  StringRef D = Unit.getDirectory();
+  if (!D.empty())
+    Dir = D;
+  return true;
+}
+
+void PrintDbgInfo::printVariableDeclaration(const Value *V) {
+  std::string DisplayName, File, Directory, Type;
+  unsigned LineNo;
+
+  if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
+    return;
+
+  Out << "; ";
+  WriteAsOperand(Out, V, false, 0);
+  if (isa<Function>(V)) 
+    Out << " is function " << DisplayName
+        << " of type " << Type << " declared at ";
+  else
+    Out << " is variable " << DisplayName
+        << " of type " << Type << " declared at ";
+
+  if (PrintDirectory)
+    Out << Directory << "/";
+
+  Out << File << ":" << LineNo << "\n";
+}
+
+bool PrintDbgInfo::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  Out << "function " << F.getName() << "\n\n";
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BasicBlock *BB = I;
+
+    if (I != F.begin() && (pred_begin(BB) == pred_end(BB)))
+      // Skip dead blocks.
+      continue;
+
+    Out << BB->getName();
+    Out << ":";
+
+    Out << "\n";
+
+    for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
+         i != e; ++i) {
+
+        printVariableDeclaration(i);
+
+        if (const User *U = dyn_cast<User>(i)) {
+          for(unsigned i=0;i<U->getNumOperands();i++)
+            printVariableDeclaration(U->getOperand(i));
+        }
+    }
+  }
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp
new file mode 100644
index 000000000000..b42e946f2ffa
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp
@@ -0,0 +1,948 @@
+//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper classes used to build and interpret debug
+// information in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor
+//===----------------------------------------------------------------------===//
+
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+StringRef
+DIDescriptor::getStringField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return StringRef();
+
+  if (Elt < DbgNode->getNumOperands())
+    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
+      return MDS->getString();
+
+  return StringRef();
+}
+
+uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+      return CI->getZExtValue();
+
+  return 0;
+}
+
+DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return DIDescriptor();
+
+  if (Elt < DbgNode->getNumOperands())
+    return
+      DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+  return DIDescriptor();
+}
+
+GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+Function *DIDescriptor::getFunctionField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+unsigned DIVariable::getNumAddrElements() const {
+  if (getVersion() <= llvm::LLVMDebugVersion8)
+    return DbgNode->getNumOperands()-6;
+  return DbgNode->getNumOperands()-7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Predicates
+//===----------------------------------------------------------------------===//
+
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_base_type;
+}
+
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_typedef:
+  case dwarf::DW_TAG_pointer_type:
+  case dwarf::DW_TAG_reference_type:
+  case dwarf::DW_TAG_const_type:
+  case dwarf::DW_TAG_volatile_type:
+  case dwarf::DW_TAG_restrict_type:
+  case dwarf::DW_TAG_member:
+  case dwarf::DW_TAG_inheritance:
+  case dwarf::DW_TAG_friend:
+    return true;
+  default:
+    // CompositeTypes are currently modelled as DerivedTypes.
+    return isCompositeType();
+  }
+}
+
+/// isCompositeType - Return true if the specified tag is legal for
+/// DICompositeType.
+bool DIDescriptor::isCompositeType() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_array_type:
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_enumeration_type:
+  case dwarf::DW_TAG_vector_type:
+  case dwarf::DW_TAG_subroutine_type:
+  case dwarf::DW_TAG_class_type:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isVariable - Return true if the specified tag is legal for DIVariable.
+bool DIDescriptor::isVariable() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_auto_variable:
+  case dwarf::DW_TAG_arg_variable:
+  case dwarf::DW_TAG_return_variable:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+  return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+  return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+                     getTag() == dwarf::DW_TAG_constant);
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+  return isGlobalVariable();
+}
+
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_compile_unit:
+  case dwarf::DW_TAG_lexical_block:
+  case dwarf::DW_TAG_subprogram:
+  case dwarf::DW_TAG_namespace:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
+}
+
+/// isFile - Return true if the specified tag is DW_TAG_file_type.
+bool DIDescriptor::isFile() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_file_type;
+}
+
+/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
+bool DIDescriptor::isNameSpace() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_namespace;
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block;
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
+}
+
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(const MDNode *N) : DIScope(N) {
+  if (!N) return;
+  if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+    DbgNode = 0;
+  }
+}
+
+unsigned DIArray::getNumElements() const {
+  if (!DbgNode)
+    return 0;
+  return DbgNode->getNumOperands();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
+  if (!DbgNode)
+    return;
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
+    MDNode::deleteTemporary(Node);
+  }
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+  if (!DbgNode)
+    return;
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
+    MDNode::deleteTemporary(Node);
+  }
+}
+
+/// Verify - Verify that a compile unit is well formed.
+bool DICompileUnit::Verify() const {
+  if (!DbgNode)
+    return false;
+  StringRef N = getFilename();
+  if (N.empty())
+    return false;
+  // It is possible that directory and produce string is empty.
+  return true;
+}
+
+/// Verify - Verify that a type descriptor is well formed.
+bool DIType::Verify() const {
+  if (!DbgNode)
+    return false;
+  if (!getContext().Verify())
+    return false;
+  unsigned Tag = getTag();
+  if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+      Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+      Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type 
+      && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
+      && Tag != dwarf::DW_TAG_enumeration_type 
+      && getFilename().empty())
+    return false;
+  return true;
+}
+
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+  return isBasicType();
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+  return isDerivedType();
+}
+
+/// Verify - Verify that a composite type descriptor is well formed.
+bool DICompositeType::Verify() const {
+  if (!DbgNode)
+    return false;
+  if (!getContext().Verify())
+    return false;
+
+  DICompileUnit CU = getCompileUnit();
+  if (!CU.Verify())
+    return false;
+  return true;
+}
+
+/// Verify - Verify that a subprogram descriptor is well formed.
+bool DISubprogram::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  if (!getContext().Verify())
+    return false;
+
+  DICompileUnit CU = getCompileUnit();
+  if (!CU.Verify())
+    return false;
+
+  DICompositeType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+  return true;
+}
+
+/// Verify - Verify that a global variable descriptor is well formed.
+bool DIGlobalVariable::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  if (getDisplayName().empty())
+    return false;
+
+  if (!getContext().Verify())
+    return false;
+
+  DICompileUnit CU = getCompileUnit();
+  if (!CU.Verify())
+    return false;
+
+  DIType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+
+  if (!getGlobal() && !getConstant())
+    return false;
+
+  return true;
+}
+
+/// Verify - Verify that a variable descriptor is well formed.
+bool DIVariable::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  if (!getContext().Verify())
+    return false;
+
+  if (!getCompileUnit().Verify())
+    return false;
+
+  DIType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+
+  return true;
+}
+
+/// Verify - Verify that a location descriptor is well formed.
+bool DILocation::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  return DbgNode->getNumOperands() == 4;
+}
+
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+  if (!DbgNode)
+    return false;
+  if (getName().empty())
+    return false;
+  if (!getCompileUnit().Verify())
+    return false;
+  return true;
+}
+
+/// getOriginalTypeSize - If this type is derived from a base type then
+/// return base type size.
+uint64_t DIDerivedType::getOriginalTypeSize() const {
+  unsigned Tag = getTag();
+  if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef ||
+      Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+      Tag == dwarf::DW_TAG_restrict_type) {
+    DIType BaseType = getTypeDerivedFrom();
+    // If this type is not derived from any type then take conservative
+    // approach.
+    if (!BaseType.isValid())
+      return getSizeInBits();
+    if (BaseType.isDerivedType())
+      return DIDerivedType(BaseType).getOriginalTypeSize();
+    else
+      return BaseType.getSizeInBits();
+  }
+
+  return getSizeInBits();
+}
+
+/// isInlinedFnArgument - Return true if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+  assert(CurFn && "Invalid function");
+  if (!getContext().isSubprogram())
+    return false;
+  // This variable is not inlined function argument if its scope
+  // does not describe current function.
+  return !(DISubprogram(getContext()).describes(CurFn));
+}
+
+/// describes - Return true if this subprogram provides debugging
+/// information for the function F.
+bool DISubprogram::describes(const Function *F) {
+  assert(F && "Invalid function");
+  if (F == getFunction())
+    return true;
+  StringRef Name = getLinkageName();
+  if (Name.empty())
+    Name = getName();
+  if (F->getName() == Name)
+    return true;
+  return false;
+}
+
+unsigned DISubprogram::isOptimized() const {
+  assert (DbgNode && "Invalid subprogram descriptor!");
+  if (DbgNode->getNumOperands() == 16)
+    return getUnsignedField(15);
+  return 0;
+}
+
+StringRef DIScope::getFilename() const {
+  if (!DbgNode)
+    return StringRef();
+  if (isLexicalBlock())
+    return DILexicalBlock(DbgNode).getFilename();
+  if (isSubprogram())
+    return DISubprogram(DbgNode).getFilename();
+  if (isCompileUnit())
+    return DICompileUnit(DbgNode).getFilename();
+  if (isNameSpace())
+    return DINameSpace(DbgNode).getFilename();
+  if (isType())
+    return DIType(DbgNode).getFilename();
+  if (isFile())
+    return DIFile(DbgNode).getFilename();
+  assert(0 && "Invalid DIScope!");
+  return StringRef();
+}
+
+StringRef DIScope::getDirectory() const {
+  if (!DbgNode)
+    return StringRef();
+  if (isLexicalBlock())
+    return DILexicalBlock(DbgNode).getDirectory();
+  if (isSubprogram())
+    return DISubprogram(DbgNode).getDirectory();
+  if (isCompileUnit())
+    return DICompileUnit(DbgNode).getDirectory();
+  if (isNameSpace())
+    return DINameSpace(DbgNode).getDirectory();
+  if (isType())
+    return DIType(DbgNode).getDirectory();
+  if (isFile())
+    return DIFile(DbgNode).getDirectory();
+  assert(0 && "Invalid DIScope!");
+  return StringRef();
+}
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor: dump routines for all descriptors.
+//===----------------------------------------------------------------------===//
+
+
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+  OS << "[" << dwarf::TagString(getTag()) << "] ";
+  OS.write_hex((intptr_t) &*DbgNode) << ']';
+}
+
+/// print - Print compile unit.
+void DICompileUnit::print(raw_ostream &OS) const {
+  if (getLanguage())
+    OS << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+
+  OS << " [" << getDirectory() << "/" << getFilename() << "]";
+}
+
+/// print - Print type.
+void DIType::print(raw_ostream &OS) const {
+  if (!DbgNode) return;
+
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  OS << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().print(OS);
+  OS << " ["
+         << "line " << getLineNumber() << ", "
+         << getSizeInBits() << " bits, "
+         << getAlignInBits() << " bit alignment, "
+         << getOffsetInBits() << " bit offset"
+         << "] ";
+
+  if (isPrivate())
+    OS << " [private] ";
+  else if (isProtected())
+    OS << " [protected] ";
+
+  if (isForwardDecl())
+    OS << " [fwd] ";
+
+  if (isBasicType())
+    DIBasicType(DbgNode).print(OS);
+  else if (isDerivedType())
+    DIDerivedType(DbgNode).print(OS);
+  else if (isCompositeType())
+    DICompositeType(DbgNode).print(OS);
+  else {
+    OS << "Invalid DIType\n";
+    return;
+  }
+
+  OS << "\n";
+}
+
+/// print - Print basic type.
+void DIBasicType::print(raw_ostream &OS) const {
+  OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+}
+
+/// print - Print derived type.
+void DIDerivedType::print(raw_ostream &OS) const {
+  OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
+}
+
+/// print - Print composite type.
+void DICompositeType::print(raw_ostream &OS) const {
+  DIArray A = getTypeArray();
+  OS << " [" << A.getNumElements() << " elements]";
+}
+
+/// print - Print subprogram.
+void DISubprogram::print(raw_ostream &OS) const {
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  OS << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
+
+  if (isLocalToUnit())
+    OS << " [local] ";
+
+  if (isDefinition())
+    OS << " [def] ";
+
+  OS << "\n";
+}
+
+/// print - Print global variable.
+void DIGlobalVariable::print(raw_ostream &OS) const {
+  OS << " [";
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  OS << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
+
+  if (isLocalToUnit())
+    OS << " [local] ";
+
+  if (isDefinition())
+    OS << " [def] ";
+
+  if (isGlobalVariable())
+    DIGlobalVariable(DbgNode).print(OS);
+  OS << "]\n";
+}
+
+/// print - Print variable.
+void DIVariable::print(raw_ostream &OS) const {
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
+  getType().print(OS);
+  OS << "\n";
+
+  // FIXME: Dump complex addresses
+}
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print compile unit to dbgs() with a newline.
+void DICompileUnit::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print type to dbgs() with a newline.
+void DIType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print basic type to dbgs() with a newline.
+void DIBasicType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print derived type to dbgs() with a newline.
+void DIDerivedType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print composite type to dbgs() with a newline.
+void DICompositeType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print subprogram to dbgs() with a newline.
+void DISubprogram::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print global variable.
+void DIGlobalVariable::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print variable.
+void DIVariable::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
+  bool isObjCLike = false;
+  for (size_t i = 0, e = Str.size(); i < e; ++i) {
+    char C = Str[i];
+    if (C == '[')
+      isObjCLike = true;
+
+    if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
+                       C == '+' || C == '(' || C == ')'))
+      Out.push_back('.');
+    else
+      Out.push_back(C);
+  }
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getNamedMetadata(Name.str());
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getOrInsertNamedMetadata(Name.str());
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(Module &M) {
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
+      for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
+           ++BI) {
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+          processDeclare(DDI);
+
+        DebugLoc Loc = BI->getDebugLoc();
+        if (Loc.isUnknown())
+          continue;
+
+        LLVMContext &Ctx = BI->getContext();
+        DIDescriptor Scope(Loc.getScope(Ctx));
+
+        if (Scope.isCompileUnit())
+          addCompileUnit(DICompileUnit(Scope));
+        else if (Scope.isSubprogram())
+          processSubprogram(DISubprogram(Scope));
+        else if (Scope.isLexicalBlock())
+          processLexicalBlock(DILexicalBlock(Scope));
+
+        if (MDNode *IA = Loc.getInlinedAt(Ctx))
+          processLocation(DILocation(IA));
+      }
+
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
+      if (addGlobalVariable(DIG)) {
+        addCompileUnit(DIG.getCompileUnit());
+        processType(DIG.getType());
+      }
+    }
+  }
+
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      processSubprogram(DISubprogram(NMD->getOperand(i)));
+}
+
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+  if (!Loc.Verify()) return;
+  DIDescriptor S(Loc.getScope());
+  if (S.isCompileUnit())
+    addCompileUnit(DICompileUnit(S));
+  else if (S.isSubprogram())
+    processSubprogram(DISubprogram(S));
+  else if (S.isLexicalBlock())
+    processLexicalBlock(DILexicalBlock(S));
+  processLocation(Loc.getOrigLocation());
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+  if (!addType(DT))
+    return;
+
+  addCompileUnit(DT.getCompileUnit());
+  if (DT.isCompositeType()) {
+    DICompositeType DCT(DT);
+    processType(DCT.getTypeDerivedFrom());
+    DIArray DA = DCT.getTypeArray();
+    for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+      DIDescriptor D = DA.getElement(i);
+      if (D.isType())
+        processType(DIType(D));
+      else if (D.isSubprogram())
+        processSubprogram(DISubprogram(D));
+    }
+  } else if (DT.isDerivedType()) {
+    DIDerivedType DDT(DT);
+    processType(DDT.getTypeDerivedFrom());
+  }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+  DIScope Context = LB.getContext();
+  if (Context.isLexicalBlock())
+    return processLexicalBlock(DILexicalBlock(Context));
+  else
+    return processSubprogram(DISubprogram(Context));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+  if (!addSubprogram(SP))
+    return;
+  addCompileUnit(SP.getCompileUnit());
+  processType(SP.getType());
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
+  MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
+  if (!N) return;
+
+  DIDescriptor DV(N);
+  if (!DV.isVariable())
+    return;
+
+  if (!NodesSeen.insert(DV))
+    return;
+
+  addCompileUnit(DIVariable(N).getCompileUnit());
+  processType(DIVariable(N).getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+  if (!DT.isValid())
+    return false;
+
+  if (!NodesSeen.insert(DT))
+    return false;
+
+  TYs.push_back(DT);
+  return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+  if (!CU.Verify())
+    return false;
+
+  if (!NodesSeen.insert(CU))
+    return false;
+
+  CUs.push_back(CU);
+  return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+  if (!DIDescriptor(DIG).isGlobalVariable())
+    return false;
+
+  if (!NodesSeen.insert(DIG))
+    return false;
+
+  GVs.push_back(DIG);
+  return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+  if (!DIDescriptor(SP).isSubprogram())
+    return false;
+
+  if (!NodesSeen.insert(SP))
+    return false;
+
+  SPs.push_back(SP);
+  return true;
+}
+
+/// getDISubprogram - Find subprogram that is enclosing this scope.
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
+  DIDescriptor D(Scope);
+  if (D.isSubprogram())
+    return DISubprogram(Scope);
+
+  if (D.isLexicalBlock())
+    return getDISubprogram(DILexicalBlock(Scope).getContext());
+
+  return DISubprogram();
+}
+
+/// getDICompositeType - Find underlying composite type.
+DICompositeType llvm::getDICompositeType(DIType T) {
+  if (T.isCompositeType())
+    return DICompositeType(T);
+
+  if (T.isDerivedType())
+    return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
+
+  return DICompositeType();
+}
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
new file mode 100644
index 000000000000..cde431459d50
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -0,0 +1,232 @@
+//===- DomPrinter.cpp - DOT printer for the dominance trees    ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit
+// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the
+// program, with a graph of the dominance/postdominance tree of that
+// function.
+//
+// There are also passes available to directly call dotty ('-view-dom' or
+// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the
+// names of the bbs are printed, but the content is hidden.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/PostDominators.h"
+
+using namespace llvm;
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
+
+    BasicBlock *BB = Node->getBlock();
+
+    if (!BB)
+      return "Post dominance root node";
+
+
+    if (isSimple())
+      return DOTGraphTraits<const Function*>
+        ::getSimpleNodeLabel(BB, BB->getParent());
+    else
+      return DOTGraphTraits<const Function*>
+        ::getCompleteNodeLabel(BB, BB->getParent());
+  }
+};
+
+template<>
+struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+  static std::string getGraphName(DominatorTree *DT) {
+    return "Dominator tree";
+  }
+
+  std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+  }
+};
+
+template<>
+struct DOTGraphTraits<PostDominatorTree*>
+  : public DOTGraphTraits<DomTreeNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+  static std::string getGraphName(PostDominatorTree *DT) {
+    return "Post dominator tree";
+  }
+
+  std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+  }
+};
+}
+
+namespace {
+struct DomViewer
+  : public DOTGraphTraitsViewer<DominatorTree, false> {
+  static char ID;
+  DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+    initializeDomViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct DomOnlyViewer
+  : public DOTGraphTraitsViewer<DominatorTree, true> {
+  static char ID;
+  DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+    initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct PostDomViewer
+  : public DOTGraphTraitsViewer<PostDominatorTree, false> {
+  static char ID;
+  PostDomViewer() :
+    DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+      initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+struct PostDomOnlyViewer
+  : public DOTGraphTraitsViewer<PostDominatorTree, true> {
+  static char ID;
+  PostDomOnlyViewer() :
+    DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+      initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+};
+} // end anonymous namespace
+
+char DomViewer::ID = 0;
+INITIALIZE_PASS(DomViewer, "view-dom",
+                "View dominance tree of function", false, false)
+
+char DomOnlyViewer::ID = 0;
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+                "View dominance tree of function (with no function bodies)",
+                false, false)
+
+char PostDomViewer::ID = 0;
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+                "View postdominance tree of function", false, false)
+
+char PostDomOnlyViewer::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+                "View postdominance tree of function "
+                "(with no function bodies)",
+                false, false)
+
+namespace {
+struct DomPrinter
+  : public DOTGraphTraitsPrinter<DominatorTree, false> {
+  static char ID;
+  DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+    initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct DomOnlyPrinter
+  : public DOTGraphTraitsPrinter<DominatorTree, true> {
+  static char ID;
+  DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+    initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct PostDomPrinter
+  : public DOTGraphTraitsPrinter<PostDominatorTree, false> {
+  static char ID;
+  PostDomPrinter() :
+    DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+      initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+struct PostDomOnlyPrinter
+  : public DOTGraphTraitsPrinter<PostDominatorTree, true> {
+  static char ID;
+  PostDomOnlyPrinter() :
+    DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+      initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+} // end anonymous namespace
+
+
+
+char DomPrinter::ID = 0;
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+                "Print dominance tree of function to 'dot' file",
+                false, false)
+
+char DomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+                "Print dominance tree of function to 'dot' file "
+                "(with no function bodies)",
+                false, false)
+
+char PostDomPrinter::ID = 0;
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+                "Print postdominance tree of function to 'dot' file",
+                false, false)
+
+char PostDomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+                "Print postdominance tree of function to 'dot' file "
+                "(with no function bodies)",
+                false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+FunctionPass *llvm::createDomPrinterPass() {
+  return new DomPrinter();
+}
+
+FunctionPass *llvm::createDomOnlyPrinterPass() {
+  return new DomOnlyPrinter();
+}
+
+FunctionPass *llvm::createDomViewerPass() {
+  return new DomViewer();
+}
+
+FunctionPass *llvm::createDomOnlyViewerPass() {
+  return new DomOnlyViewer();
+}
+
+FunctionPass *llvm::createPostDomPrinterPass() {
+  return new PostDomPrinter();
+}
+
+FunctionPass *llvm::createPostDomOnlyPrinterPass() {
+  return new PostDomOnlyPrinter();
+}
+
+FunctionPass *llvm::createPostDomViewerPass() {
+  return new PostDomViewer();
+}
+
+FunctionPass *llvm::createPostDomOnlyViewerPass() {
+  return new PostDomOnlyViewer();
+}
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 000000000000..6de4e1e1d7de
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,137 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char DominanceFrontier::ID = 0;
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+
+namespace {
+  class DFCalculateWorkObject {
+  public:
+    DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, 
+                          const DomTreeNode *N,
+                          const DomTreeNode *PN)
+    : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+    BasicBlock *currentBB;
+    BasicBlock *parentBB;
+    const DomTreeNode *Node;
+    const DomTreeNode *parentNode;
+  };
+}
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+                             const DomTreeNode *Node) {
+  BasicBlock *BB = Node->getBlock();
+  DomSetType *Result = NULL;
+
+  std::vector<DFCalculateWorkObject> workList;
+  SmallPtrSet<BasicBlock *, 32> visited;
+
+  workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+  do {
+    DFCalculateWorkObject *currentW = &workList.back();
+    assert (currentW && "Missing work object.");
+
+    BasicBlock *currentBB = currentW->currentBB;
+    BasicBlock *parentBB = currentW->parentBB;
+    const DomTreeNode *currentNode = currentW->Node;
+    const DomTreeNode *parentNode = currentW->parentNode;
+    assert (currentBB && "Invalid work object. Missing current Basic Block");
+    assert (currentNode && "Invalid work object. Missing current Node");
+    DomSetType &S = Frontiers[currentBB];
+
+    // Visit each block only once.
+    if (visited.count(currentBB) == 0) {
+      visited.insert(currentBB);
+
+      // Loop over CFG successors to calculate DFlocal[currentNode]
+      for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+           SI != SE; ++SI) {
+        // Does Node immediately dominate this successor?
+        if (DT[*SI]->getIDom() != currentNode)
+          S.insert(*SI);
+      }
+    }
+
+    // At this point, S is DFlocal.  Now we union in DFup's of our children...
+    // Loop through and visit the nodes that Node immediately dominates (Node's
+    // children in the IDomTree)
+    bool visitChild = false;
+    for (DomTreeNode::const_iterator NI = currentNode->begin(), 
+           NE = currentNode->end(); NI != NE; ++NI) {
+      DomTreeNode *IDominee = *NI;
+      BasicBlock *childBB = IDominee->getBlock();
+      if (visited.count(childBB) == 0) {
+        workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+                                                 IDominee, currentNode));
+        visitChild = true;
+      }
+    }
+
+    // If all children are visited or there is any child then pop this block
+    // from the workList.
+    if (!visitChild) {
+
+      if (!parentBB) {
+        Result = &S;
+        break;
+      }
+
+      DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+      DomSetType &parentSet = Frontiers[parentBB];
+      for (; CDFI != CDFE; ++CDFI) {
+        if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+          parentSet.insert(*CDFI);
+      }
+      workList.pop_back();
+    }
+
+  } while (!workList.empty());
+
+  return *Result;
+}
+
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  DomFrontier for BB ";
+    if (I->first)
+      WriteAsOperand(OS, I->first, false);
+    else
+      OS << " <<exit node>>";
+    OS << " is:\t";
+    
+    const std::set<BasicBlock*> &BBs = I->second;
+    
+    for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+         I != E; ++I) {
+      OS << ' ';
+      if (*I)
+        WriteAsOperand(OS, *I, false);
+      else
+        OS << "<<exit node>>";
+    }
+    OS << "\n";
+  }
+}
+
+void DominanceFrontierBase::dump() const {
+  print(dbgs());
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
new file mode 100644
index 000000000000..2e79eab51ff7
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
@@ -0,0 +1,340 @@
+//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraph class and provides the BasicCallGraph
+// default implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BasicCallGraph class definition
+//
+class BasicCallGraph : public ModulePass, public CallGraph {
+  // Root is root of the call graph, or the external node if a 'main' function
+  // couldn't be found.
+  //
+  CallGraphNode *Root;
+
+  // ExternalCallingNode - This node has edges to all external functions and
+  // those internal functions that have their address taken.
+  CallGraphNode *ExternalCallingNode;
+
+  // CallsExternalNode - This node has edges to it from all functions making
+  // indirect calls or calling an external function.
+  CallGraphNode *CallsExternalNode;
+
+public:
+  static char ID; // Class identification, replacement for typeinfo
+  BasicCallGraph() : ModulePass(ID), Root(0), 
+    ExternalCallingNode(0), CallsExternalNode(0) {
+      initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
+    }
+
+  // runOnModule - Compute the call graph for the specified module.
+  virtual bool runOnModule(Module &M) {
+    CallGraph::initialize(M);
+    
+    ExternalCallingNode = getOrInsertFunction(0);
+    CallsExternalNode = new CallGraphNode(0);
+    Root = 0;
+  
+    // Add every function to the call graph.
+    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+      addToCallGraph(I);
+  
+    // If we didn't find a main function, use the external call graph node
+    if (Root == 0) Root = ExternalCallingNode;
+    
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  virtual void print(raw_ostream &OS, const Module *) const {
+    OS << "CallGraph Root is: ";
+    if (Function *F = getRoot()->getFunction())
+      OS << F->getName() << "\n";
+    else {
+      OS << "<<null function: 0x" << getRoot() << ">>\n";
+    }
+    
+    CallGraph::print(OS, 0);
+  }
+
+  virtual void releaseMemory() {
+    destroy();
+  }
+  
+  /// getAdjustedAnalysisPointer - This method is used when a pass implements
+  /// an analysis interface through multiple inheritance.  If needed, it should
+  /// override this to adjust the this pointer as needed for the specified pass
+  /// info.
+  virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+    if (PI == &CallGraph::ID)
+      return (CallGraph*)this;
+    return this;
+  }
+  
+  CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
+  CallGraphNode* getCallsExternalNode()   const { return CallsExternalNode; }
+
+  // getRoot - Return the root of the call graph, which is either main, or if
+  // main cannot be found, the external node.
+  //
+  CallGraphNode *getRoot()             { return Root; }
+  const CallGraphNode *getRoot() const { return Root; }
+
+private:
+  //===---------------------------------------------------------------------
+  // Implementation of CallGraph construction
+  //
+
+  // addToCallGraph - Add a function to the call graph, and link the node to all
+  // of the functions that it calls.
+  //
+  void addToCallGraph(Function *F) {
+    CallGraphNode *Node = getOrInsertFunction(F);
+
+    // If this function has external linkage, anything could call it.
+    if (!F->hasLocalLinkage()) {
+      ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+      // Found the entry point?
+      if (F->getName() == "main") {
+        if (Root)    // Found multiple external mains?  Don't pick one.
+          Root = ExternalCallingNode;
+        else
+          Root = Node;          // Found a main, keep track of it!
+      }
+    }
+
+    // Loop over all of the users of the function, looking for non-call uses.
+    for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){
+      User *U = *I;
+      if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+          || !CallSite(cast<Instruction>(U)).isCallee(I)) {
+        // Not a call, or being used as a parameter rather than as the callee.
+        ExternalCallingNode->addCalledFunction(CallSite(), Node);
+        break;
+      }
+    }
+
+    // If this function is not defined in this translation unit, it could call
+    // anything.
+    if (F->isDeclaration() && !F->isIntrinsic())
+      Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+    // Look for calls by this function.
+    for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+      for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+           II != IE; ++II) {
+        CallSite CS(cast<Value>(II));
+        if (CS && !isa<IntrinsicInst>(II)) {
+          const Function *Callee = CS.getCalledFunction();
+          if (Callee)
+            Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+          else
+            Node->addCalledFunction(CS, CallsExternalNode);
+        }
+      }
+  }
+
+  //
+  // destroy - Release memory for the call graph
+  virtual void destroy() {
+    /// CallsExternalNode is not in the function map, delete it explicitly.
+    if (CallsExternalNode) {
+      CallsExternalNode->allReferencesDropped();
+      delete CallsExternalNode;
+      CallsExternalNode = 0;
+    }
+    CallGraph::destroy();
+  }
+};
+
+} //End anonymous namespace
+
+INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
+INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
+                   "Basic CallGraph Construction", false, true, true)
+
+char CallGraph::ID = 0;
+char BasicCallGraph::ID = 0;
+
+void CallGraph::initialize(Module &M) {
+  Mod = &M;
+}
+
+void CallGraph::destroy() {
+  if (FunctionMap.empty()) return;
+  
+  // Reset all node's use counts to zero before deleting them to prevent an
+  // assertion from firing.
+#ifndef NDEBUG
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+       I != E; ++I)
+    I->second->allReferencesDropped();
+#endif
+  
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+      I != E; ++I)
+    delete I->second;
+  FunctionMap.clear();
+}
+
+void CallGraph::print(raw_ostream &OS, Module*) const {
+  for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
+    I->second->print(OS);
+}
+void CallGraph::dump() const {
+  print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Implementations of public modification methods
+//
+
+// removeFunctionFromModule - Unlink the function from this module, returning
+// it.  Because this removes the function from the module, the call graph node
+// is destroyed.  This is only valid if the function does not call any other
+// functions (ie, there are no edges in it's CGN).  The easiest way to do this
+// is to dropAllReferences before calling this.
+//
+Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
+  assert(CGN->empty() && "Cannot remove function from call "
+         "graph if it references other functions!");
+  Function *F = CGN->getFunction(); // Get the function for the call graph node
+  delete CGN;                       // Delete the call graph node for this func
+  FunctionMap.erase(F);             // Remove the call graph node from the map
+
+  Mod->getFunctionList().remove(F);
+  return F;
+}
+
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+  assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+  assert(!FunctionMap.count(To) &&
+         "Pointing CallGraphNode at a function that already exists");
+  FunctionMapTy::iterator I = FunctionMap.find(From);
+  I->second->F = const_cast<Function*>(To);
+  FunctionMap[To] = I->second;
+  FunctionMap.erase(I);
+}
+
+// getOrInsertFunction - This method is identical to calling operator[], but
+// it will insert a new CallGraphNode for the specified function if one does
+// not already exist.
+CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
+  CallGraphNode *&CGN = FunctionMap[F];
+  if (CGN) return CGN;
+  
+  assert((!F || F->getParent() == Mod) && "Function not in current module!");
+  return CGN = new CallGraphNode(const_cast<Function*>(F));
+}
+
+void CallGraphNode::print(raw_ostream &OS) const {
+  if (Function *F = getFunction())
+    OS << "Call graph node for function: '" << F->getName() << "'";
+  else
+    OS << "Call graph node <<null function>>";
+  
+  OS << "<<" << this << ">>  #uses=" << getNumReferences() << '\n';
+
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  CS<" << I->first << "> calls ";
+    if (Function *FI = I->second->getFunction())
+      OS << "function '" << FI->getName() <<"'\n";
+    else
+      OS << "external node\n";
+  }
+  OS << '\n';
+}
+
+void CallGraphNode::dump() const { print(dbgs()); }
+
+/// removeCallEdgeFor - This method removes the edge in the node for the
+/// specified call site.  Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      return;
+    }
+  }
+}
+
+// removeAnyCallEdgeTo - This method removes any call edges from this node to
+// the specified callee function.  This takes more time to execute than
+// removeCallEdgeTo, so it should not be used unless necessary.
+void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
+  for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
+    if (CalledFunctions[i].second == Callee) {
+      Callee->DropRef();
+      CalledFunctions[i] = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      --i; --e;
+    }
+}
+
+/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+/// from this node to the specified callee function.
+void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
+    CallRecord &CR = *I;
+    if (CR.second == Callee && CR.first == 0) {
+      Callee->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      return;
+    }
+  }
+}
+
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one.  Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+                                    CallSite NewCS, CallGraphNode *NewNode){
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      I->first = NewCS.getInstruction();
+      I->second = NewNode;
+      NewNode->AddRef();
+      return;
+    }
+  }
+}
+
+// Enuse that users of CallGraph.h also link with this file
+DEFINING_FILE_FOR(CallGraph)
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
new file mode 100644
index 000000000000..659ffab0c6f0
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -0,0 +1,608 @@
+//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraphSCCPass class, which is used for passes
+// which are implemented as bottom-up traversals on the call graph.  Because
+// there may be cycles in the call graph, passes of this type operate on the
+// call-graph in SCC order: that is, they process function bottom-up, except for
+// recursive functions, which they process all at once.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cgscc-passmgr"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<unsigned> 
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
+//===----------------------------------------------------------------------===//
+// CGPassManager
+//
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
+
+namespace {
+
+class CGPassManager : public ModulePass, public PMDataManager {
+public:
+  static char ID;
+  explicit CGPassManager(int Depth) 
+    : ModulePass(ID), PMDataManager(Depth) { }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnModule(Module &M);
+
+  bool doInitialization(CallGraph &CG);
+  bool doFinalization(CallGraph &CG);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    // CGPassManager walks SCC and it needs CallGraph.
+    Info.addRequired<CallGraph>();
+    Info.setPreservesAll();
+  }
+
+  virtual const char *getPassName() const {
+    return "CallGraph Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      Pass *P = getContainedPass(Index);
+      P->dumpPassStructure(Offset + 1);
+      dumpLastUses(P, Offset+1);
+    }
+  }
+
+  Pass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<Pass *>(PassVector[N]);
+  }
+
+  virtual PassManagerType getPassManagerType() const { 
+    return PMT_CallGraphPassManager; 
+  }
+  
+private:
+  bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                         bool &DevirtualizedCall);
+  
+  bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                    CallGraph &CG, bool &CallGraphUpToDate,
+                    bool &DevirtualizedCall);
+  bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+                        bool IsCheckingMode);
+};
+
+} // end anonymous namespace.
+
+char CGPassManager::ID = 0;
+
+
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                                 CallGraph &CG, bool &CallGraphUpToDate,
+                                 bool &DevirtualizedCall) {
+  bool Changed = false;
+  PMDataManager *PM = P->getAsPMDataManager();
+
+  if (PM == 0) {
+    CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+    if (!CallGraphUpToDate) {
+      DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+      CallGraphUpToDate = true;
+    }
+
+    {
+      TimeRegion PassTimer(getPassTimer(CGSP));
+      Changed = CGSP->runOnSCC(CurSCC);
+    }
+    
+    // After the CGSCCPass is done, when assertions are enabled, use
+    // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+    if (Changed)
+      RefreshCallGraph(CurSCC, CG, true);
+#endif
+    
+    return Changed;
+  }
+  
+  
+  assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+         "Invalid CGPassManager member");
+  FPPassManager *FPP = (FPPassManager*)P;
+  
+  // Run pass P on all functions in the current SCC.
+  for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+       I != E; ++I) {
+    if (Function *F = (*I)->getFunction()) {
+      dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+      TimeRegion PassTimer(getPassTimer(FPP));
+      Changed |= FPP->runOnFunction(*F);
+    }
+  }
+  
+  // The function pass(es) modified the IR, they may have clobbered the
+  // callgraph.
+  if (Changed && CallGraphUpToDate) {
+    DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+                 << P->getPassName() << '\n');
+    CallGraphUpToDate = false;
+  }
+  return Changed;
+}
+
+
+/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it.  This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call.  This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
+                                     CallGraph &CG, bool CheckingMode) {
+  DenseMap<Value*, CallGraphNode*> CallSites;
+  
+  DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+               << " nodes:\n";
+        for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+             I != E; ++I)
+          (*I)->dump();
+        );
+
+  bool MadeChange = false;
+  bool DevirtualizedCall = false;
+  
+  // Scan all functions in the SCC.
+  unsigned FunctionNo = 0;
+  for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+       SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+    CallGraphNode *CGN = *SCCIdx;
+    Function *F = CGN->getFunction();
+    if (F == 0 || F->isDeclaration()) continue;
+    
+    // Walk the function body looking for call sites.  Sync up the call sites in
+    // CGN with those actually in the function.
+
+    // Keep track of the number of direct and indirect calls that were
+    // invalidated and removed.
+    unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
+    
+    // Get the set of call sites currently in the function.
+    for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+      // If this call site is null, then the function pass deleted the call
+      // entirely and the WeakVH nulled it out.  
+      if (I->first == 0 ||
+          // If we've already seen this call site, then the FunctionPass RAUW'd
+          // one call with another, which resulted in two "uses" in the edge
+          // list of the same call.
+          CallSites.count(I->first) ||
+
+          // If the call edge is not from a call or invoke, then the function
+          // pass RAUW'd a call with another value.  This can happen when
+          // constant folding happens of well known functions etc.
+          !CallSite(I->first)) {
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+        
+        // If this was an indirect call site, count it.
+        if (I->second->getFunction() == 0)
+          ++NumIndirectRemoved;
+        else 
+          ++NumDirectRemoved;
+        
+        // Just remove the edge from the set of callees, keep track of whether
+        // I points to the last element of the vector.
+        bool WasLast = I + 1 == E;
+        CGN->removeCallEdge(I);
+        
+        // If I pointed to the last element of the vector, we have to bail out:
+        // iterator checking rejects comparisons of the resultant pointer with
+        // end.
+        if (WasLast)
+          break;
+        E = CGN->end();
+        continue;
+      }
+      
+      assert(!CallSites.count(I->first) &&
+             "Call site occurs in node multiple times");
+      CallSites.insert(std::make_pair(I->first, I->second));
+      ++I;
+    }
+    
+    // Loop over all of the instructions in the function, getting the callsites.
+    // Keep track of the number of direct/indirect calls added.
+    unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS(cast<Value>(I));
+        if (!CS || isa<IntrinsicInst>(I)) continue;
+        
+        // If this call site already existed in the callgraph, just verify it
+        // matches up to expectations and remove it from CallSites.
+        DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+          CallSites.find(CS.getInstruction());
+        if (ExistingIt != CallSites.end()) {
+          CallGraphNode *ExistingNode = ExistingIt->second;
+
+          // Remove from CallSites since we have now seen it.
+          CallSites.erase(ExistingIt);
+          
+          // Verify that the callee is right.
+          if (ExistingNode->getFunction() == CS.getCalledFunction())
+            continue;
+          
+          // If we are in checking mode, we are not allowed to actually mutate
+          // the callgraph.  If this is a case where we can infer that the
+          // callgraph is less precise than it could be (e.g. an indirect call
+          // site could be turned direct), don't reject it in checking mode, and
+          // don't tweak it to be more precise.
+          if (CheckingMode && CS.getCalledFunction() &&
+              ExistingNode->getFunction() == 0)
+            continue;
+          
+          assert(!CheckingMode &&
+                 "CallGraphSCCPass did not update the CallGraph correctly!");
+          
+          // If not, we either went from a direct call to indirect, indirect to
+          // direct, or direct to different direct.
+          CallGraphNode *CalleeNode;
+          if (Function *Callee = CS.getCalledFunction()) {
+            CalleeNode = CG.getOrInsertFunction(Callee);
+            // Keep track of whether we turned an indirect call into a direct
+            // one.
+            if (ExistingNode->getFunction() == 0) {
+              DevirtualizedCall = true;
+              DEBUG(dbgs() << "  CGSCCPASSMGR: Devirtualized call to '"
+                           << Callee->getName() << "'\n");
+            }
+          } else {
+            CalleeNode = CG.getCallsExternalNode();
+          }
+
+          // Update the edge target in CGN.
+          CGN->replaceCallEdge(CS, CS, CalleeNode);
+          MadeChange = true;
+          continue;
+        }
+        
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+
+        // If the call site didn't exist in the CGN yet, add it.
+        CallGraphNode *CalleeNode;
+        if (Function *Callee = CS.getCalledFunction()) {
+          CalleeNode = CG.getOrInsertFunction(Callee);
+          ++NumDirectAdded;
+        } else {
+          CalleeNode = CG.getCallsExternalNode();
+          ++NumIndirectAdded;
+        }
+        
+        CGN->addCalledFunction(CS, CalleeNode);
+        MadeChange = true;
+      }
+    
+    // We scanned the old callgraph node, removing invalidated call sites and
+    // then added back newly found call sites.  One thing that can happen is
+    // that an old indirect call site was deleted and replaced with a new direct
+    // call.  In this case, we have devirtualized a call, and CGSCCPM would like
+    // to iteratively optimize the new code.  Unfortunately, we don't really
+    // have a great way to detect when this happens.  As an approximation, we
+    // just look at whether the number of indirect calls is reduced and the
+    // number of direct calls is increased.  There are tons of ways to fool this
+    // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+    // direct call) but this is close enough.
+    if (NumIndirectRemoved > NumIndirectAdded &&
+        NumDirectRemoved < NumDirectAdded)
+      DevirtualizedCall = true;
+    
+    // After scanning this function, if we still have entries in callsites, then
+    // they are dangling pointers.  WeakVH should save us for this, so abort if
+    // this happens.
+    assert(CallSites.empty() && "Dangling pointers found in call sites map");
+    
+    // Periodically do an explicit clear to remove tombstones when processing
+    // large scc's.
+    if ((FunctionNo & 15) == 15)
+      CallSites.clear();
+  }
+
+  DEBUG(if (MadeChange) {
+          dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+          for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+            I != E; ++I)
+              (*I)->dump();
+          if (DevirtualizedCall)
+            dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
+         } else {
+           dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+         }
+        );
+
+  return DevirtualizedCall;
+}
+
+/// RunAllPassesOnSCC -  Execute the body of the entire pass manager on the
+/// specified SCC.  This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                                      bool &DevirtualizedCall) {
+  bool Changed = false;
+  
+  // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+  // up-to-date or not.  The CGSSC pass manager runs two types of passes:
+  // CallGraphSCC Passes and other random function passes.  Because other
+  // random function passes are not CallGraph aware, they may clobber the
+  // call graph by introducing new calls or deleting other ones.  This flag
+  // is set to false when we run a function pass so that we know to clean up
+  // the callgraph when we need to run a CGSCCPass again.
+  bool CallGraphUpToDate = true;
+
+  // Run all passes on current SCC.
+  for (unsigned PassNo = 0, e = getNumContainedPasses();
+       PassNo != e; ++PassNo) {
+    Pass *P = getContainedPass(PassNo);
+    
+    // If we're in -debug-pass=Executions mode, construct the SCC node list,
+    // otherwise avoid constructing this string as it is expensive.
+    if (isPassDebuggingExecutionsOrMore()) {
+      std::string Functions;
+  #ifndef NDEBUG
+      raw_string_ostream OS(Functions);
+      for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+           I != E; ++I) {
+        if (I != CurSCC.begin()) OS << ", ";
+        (*I)->print(OS);
+      }
+      OS.flush();
+  #endif
+      dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+    }
+    dumpRequiredSet(P);
+    
+    initializeAnalysisImpl(P);
+    
+    // Actually run this pass on the current SCC.
+    Changed |= RunPassOnSCC(P, CurSCC, CG,
+                            CallGraphUpToDate, DevirtualizedCall);
+    
+    if (Changed)
+      dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+    dumpPreservedSet(P);
+    
+    verifyPreservedAnalysis(P);      
+    removeNotPreservedAnalysis(P);
+    recordAvailableAnalysis(P);
+    removeDeadPasses(P, "", ON_CG_MSG);
+  }
+  
+  // If the callgraph was left out of date (because the last pass run was a
+  // functionpass), refresh it before we move on to the next SCC.
+  if (!CallGraphUpToDate)
+    DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+  return Changed;
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool CGPassManager::runOnModule(Module &M) {
+  CallGraph &CG = getAnalysis<CallGraph>();
+  bool Changed = doInitialization(CG);
+  
+  // Walk the callgraph in bottom-up SCC order.
+  scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+  CallGraphSCC CurSCC(&CGI);
+  while (!CGI.isAtEnd()) {
+    // Copy the current SCC and increment past it so that the pass can hack
+    // on the SCC if it wants to without invalidating our iterator.
+    std::vector<CallGraphNode*> &NodeVec = *CGI;
+    CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
+    ++CGI;
+    
+    // At the top level, we run all the passes in this pass manager on the
+    // functions in this SCC.  However, we support iterative compilation in the
+    // case where a function pass devirtualizes a call to a function.  For
+    // example, it is very common for a function pass (often GVN or instcombine)
+    // to eliminate the addressing that feeds into a call.  With that improved
+    // information, we would like the call to be an inline candidate, infer
+    // mod-ref information etc.
+    //
+    // Because of this, we allow iteration up to a specified iteration count.
+    // This only happens in the case of a devirtualized call, so we only burn
+    // compile time in the case that we're making progress.  We also have a hard
+    // iteration count limit in case there is crazy code.
+    unsigned Iteration = 0;
+    bool DevirtualizedCall = false;
+    do {
+      DEBUG(if (Iteration)
+              dbgs() << "  SCCPASSMGR: Re-visiting SCC, iteration #"
+                     << Iteration << '\n');
+      DevirtualizedCall = false;
+      Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+    } while (Iteration++ < MaxIterations && DevirtualizedCall);
+    
+    if (DevirtualizedCall)
+      DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after " << Iteration
+                   << " times, due to -max-cg-scc-iterations\n");
+    
+    if (Iteration > MaxSCCIterations)
+      MaxSCCIterations = Iteration;
+    
+  }
+  Changed |= doFinalization(CG);
+  return Changed;
+}
+
+
+/// Initialize CG
+bool CGPassManager::doInitialization(CallGraph &CG) {
+  bool Changed = false;
+  for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {  
+    if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+      assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+             "Invalid CGPassManager member");
+      Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule());
+    } else {
+      Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG);
+    }
+  }
+  return Changed;
+}
+
+/// Finalize CG
+bool CGPassManager::doFinalization(CallGraph &CG) {
+  bool Changed = false;
+  for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {  
+    if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+      assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+             "Invalid CGPassManager member");
+      Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule());
+    } else {
+      Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG);
+    }
+  }
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+  assert(Old != New && "Should not replace node with self");
+  for (unsigned i = 0; ; ++i) {
+    assert(i != Nodes.size() && "Node not in SCC");
+    if (Nodes[i] != Old) continue;
+    Nodes[i] = New;
+    break;
+  }
+  
+  // Update the active scc_iterator so that it doesn't contain dangling
+  // pointers to the old CallGraphNode.
+  scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+  CGI->ReplaceNode(Old, New);
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
+/// Assign pass manager to manage this pass.
+void CallGraphSCCPass::assignPassManager(PMStack &PMS,
+                                         PassManagerType PreferredType) {
+  // Find CGPassManager 
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
+    PMS.pop();
+
+  assert(!PMS.empty() && "Unable to handle Call Graph Pass");
+  CGPassManager *CGP;
+  
+  if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
+    CGP = (CGPassManager*)PMS.top();
+  else {
+    // Create new Call Graph SCC Pass Manager if it does not exist. 
+    assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Call Graph Pass Manager
+    CGP = new CGPassManager(PMD->getDepth() + 1);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(CGP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    Pass *P = CGP;
+    TPM->schedulePass(P);
+
+    // [4] Push new manager into PMS
+    PMS.push(CGP);
+  }
+
+  CGP->add(this);
+}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph.  If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<CallGraph>();
+  AU.addPreserved<CallGraph>();
+}
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+  ///
+  class PrintCallGraphPass : public CallGraphSCCPass {
+    std::string Banner;
+    raw_ostream &Out;       // raw_ostream to print on.
+    
+  public:
+    static char ID;
+    PrintCallGraphPass(const std::string &B, raw_ostream &o)
+      : CallGraphSCCPass(ID), Banner(B), Out(o) {}
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    
+    bool runOnSCC(CallGraphSCC &SCC) {
+      Out << Banner;
+      for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+        (*I)->getFunction()->print(Out);
+      return false;
+    }
+  };
+  
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+                                          const std::string &Banner) const {
+  return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
new file mode 100644
index 000000000000..6535786668bc
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -0,0 +1,101 @@
+//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to seek out all of the types in use by the program.  Note
+// that this analysis explicitly does not include types only used by the symbol
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char FindUsedTypes::ID = 0;
+INITIALIZE_PASS(FindUsedTypes, "print-used-types",
+                "Find Used Types", false, true)
+
+// IncorporateType - Incorporate one type and all of its subtypes into the
+// collection of used types.
+//
+void FindUsedTypes::IncorporateType(const Type *Ty) {
+  // If ty doesn't already exist in the used types map, add it now, otherwise
+  // return.
+  if (!UsedTypes.insert(Ty)) return;  // Already contain Ty.
+
+  // Make sure to add any types this type references now.
+  //
+  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+       I != E; ++I)
+    IncorporateType(*I);
+}
+
+void FindUsedTypes::IncorporateValue(const Value *V) {
+  IncorporateType(V->getType());
+
+  // If this is a constant, it could be using other types...
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    if (!isa<GlobalValue>(C))
+      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+           OI != OE; ++OI)
+        IncorporateValue(*OI);
+  }
+}
+
+
+// run - This incorporates all types used by the specified module
+//
+bool FindUsedTypes::runOnModule(Module &m) {
+  UsedTypes.clear();  // reset if run multiple times...
+
+  // Loop over global variables, incorporating their types
+  for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
+       I != E; ++I) {
+    IncorporateType(I->getType());
+    if (I->hasInitializer())
+      IncorporateValue(I->getInitializer());
+  }
+
+  for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
+    IncorporateType(MI->getType());
+    const Function &F = *MI;
+
+    // Loop over all of the instructions in the function, adding their return
+    // type as well as the types of their operands.
+    //
+    for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
+         II != IE; ++II) {
+      const Instruction &I = *II;
+
+      IncorporateType(I.getType());  // Incorporate the type of the instruction
+      for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+           OI != OE; ++OI)
+        IncorporateValue(*OI);  // Insert inst operand types as well
+    }
+  }
+
+  return false;
+}
+
+// Print the types found in the module.  If the optional Module parameter is
+// passed in, then the types are printed symbolically if possible, using the
+// symbol table from the module.
+//
+void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
+  OS << "Types in use by this module:\n";
+  for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(),
+       E = UsedTypes.end(); I != E; ++I) {
+    OS << "   " << **I << '\n';
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
new file mode 100644
index 000000000000..b226d66cd78a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -0,0 +1,609 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure").  For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalsmodref-aa"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SCCIterator.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+          "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+namespace {
+  /// FunctionRecord - One instance of this structure is stored for every
+  /// function in the program.  Later, the entries for these functions are
+  /// removed if the function is found to call an external function (in which
+  /// case we know nothing about it.
+  struct FunctionRecord {
+    /// GlobalInfo - Maintain mod/ref info for all of the globals without
+    /// addresses taken that are read or written (transitively) by this
+    /// function.
+    std::map<const GlobalValue*, unsigned> GlobalInfo;
+
+    /// MayReadAnyGlobal - May read global variables, but it is not known which.
+    bool MayReadAnyGlobal;
+
+    unsigned getInfoForGlobal(const GlobalValue *GV) const {
+      unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+      std::map<const GlobalValue*, unsigned>::const_iterator I =
+        GlobalInfo.find(GV);
+      if (I != GlobalInfo.end())
+        Effect |= I->second;
+      return Effect;
+    }
+
+    /// FunctionEffect - Capture whether or not this function reads or writes to
+    /// ANY memory.  If not, we can do a lot of aggressive analysis on it.
+    unsigned FunctionEffect;
+
+    FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
+  };
+
+  /// GlobalsModRef - The actual analysis pass.
+  class GlobalsModRef : public ModulePass, public AliasAnalysis {
+    /// NonAddressTakenGlobals - The globals that do not have their addresses
+    /// taken.
+    std::set<const GlobalValue*> NonAddressTakenGlobals;
+
+    /// IndirectGlobals - The memory pointed to by this global is known to be
+    /// 'owned' by the global.
+    std::set<const GlobalValue*> IndirectGlobals;
+
+    /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+    /// indirect global, this map indicates which one.
+    std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+
+    /// FunctionInfo - For each function, keep track of what globals are
+    /// modified or read.
+    std::map<const Function*, FunctionRecord> FunctionInfo;
+
+  public:
+    static char ID;
+    GlobalsModRef() : ModulePass(ID) {
+      initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) {
+      InitializeAliasAnalysis(this);                 // set up super class
+      AnalyzeGlobals(M);                          // find non-addr taken globals
+      AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
+      return false;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AliasAnalysis::getAnalysisUsage(AU);
+      AU.addRequired<CallGraph>();
+      AU.setPreservesAll();                         // Does not transform code
+    }
+
+    //------------------------------------------------
+    // Implement the AliasAnalysis API
+    //
+    AliasResult alias(const Location &LocA, const Location &LocB);
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc);
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) {
+      return AliasAnalysis::getModRefInfo(CS1, CS2);
+    }
+
+    /// getModRefBehavior - Return the behavior of the specified function if
+    /// called from the specified call site.  The call site may be null in which
+    /// case the most generic behavior of this function should be returned.
+    ModRefBehavior getModRefBehavior(const Function *F) {
+      ModRefBehavior Min = UnknownModRefBehavior;
+
+      if (FunctionRecord *FR = getFunctionInfo(F)) {
+        if (FR->FunctionEffect == 0)
+          Min = DoesNotAccessMemory;
+        else if ((FR->FunctionEffect & Mod) == 0)
+          Min = OnlyReadsMemory;
+      }
+
+      return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+    }
+    
+    /// getModRefBehavior - Return the behavior of the specified function if
+    /// called from the specified call site.  The call site may be null in which
+    /// case the most generic behavior of this function should be returned.
+    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+      ModRefBehavior Min = UnknownModRefBehavior;
+
+      if (const Function* F = CS.getCalledFunction())
+        if (FunctionRecord *FR = getFunctionInfo(F)) {
+          if (FR->FunctionEffect == 0)
+            Min = DoesNotAccessMemory;
+          else if ((FR->FunctionEffect & Mod) == 0)
+            Min = OnlyReadsMemory;
+        }
+
+      return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+    }
+
+    virtual void deleteValue(Value *V);
+    virtual void copyValue(Value *From, Value *To);
+    virtual void addEscapingUse(Use &U);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+  private:
+    /// getFunctionInfo - Return the function info for the function, or null if
+    /// we don't have anything useful to say about it.
+    FunctionRecord *getFunctionInfo(const Function *F) {
+      std::map<const Function*, FunctionRecord>::iterator I =
+        FunctionInfo.find(F);
+      if (I != FunctionInfo.end())
+        return &I->second;
+      return 0;
+    }
+
+    void AnalyzeGlobals(Module &M);
+    void AnalyzeCallGraph(CallGraph &CG, Module &M);
+    bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
+                              std::vector<Function*> &Writers,
+                              GlobalValue *OkayStoreDest = 0);
+    bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+  };
+}
+
+char GlobalsModRef::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
+                "globalsmodref-aa", "Simple mod/ref analysis for globals",    
+                false, true, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
+                "globalsmodref-aa", "Simple mod/ref analysis for globals",    
+                false, true, false)
+
+Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program.  If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsModRef::AnalyzeGlobals(Module &M) {
+  std::vector<Function*> Readers, Writers;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (I->hasLocalLinkage()) {
+      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+        // Remember that we are tracking this global.
+        NonAddressTakenGlobals.insert(I);
+        ++NumNonAddrTakenFunctions;
+      }
+      Readers.clear(); Writers.clear();
+    }
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    if (I->hasLocalLinkage()) {
+      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+        // Remember that we are tracking this global, and the mod/ref fns
+        NonAddressTakenGlobals.insert(I);
+
+        for (unsigned i = 0, e = Readers.size(); i != e; ++i)
+          FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
+
+        if (!I->isConstant())  // No need to keep track of writers to constants
+          for (unsigned i = 0, e = Writers.size(); i != e; ++i)
+            FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
+        ++NumNonAddrTakenGlobalVars;
+
+        // If this global holds a pointer type, see if it is an indirect global.
+        if (I->getType()->getElementType()->isPointerTy() &&
+            AnalyzeIndirectGlobalMemory(I))
+          ++NumIndirectGlobalVars;
+      }
+      Readers.clear(); Writers.clear();
+    }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true.  Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
+                                         std::vector<Function*> &Readers,
+                                         std::vector<Function*> &Writers,
+                                         GlobalValue *OkayStoreDest) {
+  if (!V->getType()->isPointerTy()) return true;
+
+  for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) {
+    User *U = *UI;
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      Readers.push_back(LI->getParent()->getParent());
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (V == SI->getOperand(1)) {
+        Writers.push_back(SI->getParent()->getParent());
+      } else if (SI->getOperand(1) != OkayStoreDest) {
+        return true;  // Storing the pointer
+      }
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+        return true;
+    } else if (isFreeCall(U)) {
+      Writers.push_back(cast<Instruction>(U)->getParent()->getParent());
+    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      // Make sure that this is just the function being called, not that it is
+      // passing into the function.
+      for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+        if (CI->getArgOperand(i) == V) return true;
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      // Make sure that this is just the function being called, not that it is
+      // passing into the function.
+      for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
+        if (II->getArgOperand(i) == V) return true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      if (CE->getOpcode() == Instruction::GetElementPtr ||
+          CE->getOpcode() == Instruction::BitCast) {
+        if (AnalyzeUsesOfPointer(CE, Readers, Writers))
+          return true;
+      } else {
+        return true;
+      }
+    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return true;  // Allow comparison against null.
+    } else {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type.  See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere.  If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
+  // Keep track of values related to the allocation of the memory, f.e. the
+  // value produced by the malloc call and any casts.
+  std::vector<Value*> AllocRelatedValues;
+
+  // Walk the user list of the global.  If we find anything other than a direct
+  // load or store, bail out.
+  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+    User *U = *I;
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // The pointer loaded from the global can only be used in simple ways:
+      // we allow addressing of it and loading storing to it.  We do *not* allow
+      // storing the loaded pointer somewhere else or passing to a function.
+      std::vector<Function*> ReadersWriters;
+      if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
+        return false;  // Loaded pointer escapes.
+      // TODO: Could try some IP mod/ref of the loaded pointer.
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // Storing the global itself.
+      if (SI->getOperand(0) == GV) return false;
+
+      // If storing the null pointer, ignore it.
+      if (isa<ConstantPointerNull>(SI->getOperand(0)))
+        continue;
+
+      // Check the value being stored.
+      Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+
+      if (isMalloc(Ptr)) {
+        // Okay, easy case.
+      } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
+        Function *F = CI->getCalledFunction();
+        if (!F || !F->isDeclaration()) return false;     // Too hard to analyze.
+        if (F->getName() != "calloc") return false;   // Not calloc.
+      } else {
+        return false;  // Too hard to analyze.
+      }
+
+      // Analyze all uses of the allocation.  If any of them are used in a
+      // non-simple way (e.g. stored to another global) bail out.
+      std::vector<Function*> ReadersWriters;
+      if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
+        return false;  // Loaded pointer escapes.
+
+      // Remember that this allocation is related to the indirect global.
+      AllocRelatedValues.push_back(Ptr);
+    } else {
+      // Something complex, bail out.
+      return false;
+    }
+  }
+
+  // Okay, this is an indirect global.  Remember all of the allocations for
+  // this global in AllocsForIndirectGlobals.
+  while (!AllocRelatedValues.empty()) {
+    AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+    AllocRelatedValues.pop_back();
+  }
+  IndirectGlobals.insert(GV);
+  return true;
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from.  Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+  // We do a bottom-up SCC traversal of the call graph.  In other words, we
+  // visit all callees before callers (leaf-first).
+  for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E;
+       ++I) {
+    std::vector<CallGraphNode *> &SCC = *I;
+    assert(!SCC.empty() && "SCC with no functions?");
+
+    if (!SCC[0]->getFunction()) {
+      // Calls externally - can't say anything useful.  Remove any existing
+      // function records (may have been created when scanning globals).
+      for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+        FunctionInfo.erase(SCC[i]->getFunction());
+      continue;
+    }
+
+    FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
+
+    bool KnowNothing = false;
+    unsigned FunctionEffect = 0;
+
+    // Collect the mod/ref properties due to called functions.  We only compute
+    // one mod-ref set.
+    for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+      Function *F = SCC[i]->getFunction();
+      if (!F) {
+        KnowNothing = true;
+        break;
+      }
+
+      if (F->isDeclaration()) {
+        // Try to get mod/ref behaviour from function attributes.
+        if (F->doesNotAccessMemory()) {
+          // Can't do better than that!
+        } else if (F->onlyReadsMemory()) {
+          FunctionEffect |= Ref;
+          if (!F->isIntrinsic())
+            // This function might call back into the module and read a global -
+            // consider every global as possibly being read by this function.
+            FR.MayReadAnyGlobal = true;
+        } else {
+          FunctionEffect |= ModRef;
+          // Can't say anything useful unless it's an intrinsic - they don't
+          // read or write global variables of the kind considered here.
+          KnowNothing = !F->isIntrinsic();
+        }
+        continue;
+      }
+
+      for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+           CI != E && !KnowNothing; ++CI)
+        if (Function *Callee = CI->second->getFunction()) {
+          if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
+            // Propagate function effect up.
+            FunctionEffect |= CalleeFR->FunctionEffect;
+
+            // Incorporate callee's effects on globals into our info.
+            for (std::map<const GlobalValue*, unsigned>::iterator GI =
+                   CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
+                 GI != E; ++GI)
+              FR.GlobalInfo[GI->first] |= GI->second;
+            FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
+          } else {
+            // Can't say anything about it.  However, if it is inside our SCC,
+            // then nothing needs to be done.
+            CallGraphNode *CalleeNode = CG[Callee];
+            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+              KnowNothing = true;
+          }
+        } else {
+          KnowNothing = true;
+        }
+    }
+
+    // If we can't say anything useful about this SCC, remove all SCC functions
+    // from the FunctionInfo map.
+    if (KnowNothing) {
+      for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+        FunctionInfo.erase(SCC[i]->getFunction());
+      continue;
+    }
+
+    // Scan the function bodies for explicit loads or stores.
+    for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+      for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
+             E = inst_end(SCC[i]->getFunction());
+           II != E && FunctionEffect != ModRef; ++II)
+        if (isa<LoadInst>(*II)) {
+          FunctionEffect |= Ref;
+          if (cast<LoadInst>(*II).isVolatile())
+            // Volatile loads may have side-effects, so mark them as writing
+            // memory (for example, a flag inside the processor).
+            FunctionEffect |= Mod;
+        } else if (isa<StoreInst>(*II)) {
+          FunctionEffect |= Mod;
+          if (cast<StoreInst>(*II).isVolatile())
+            // Treat volatile stores as reading memory somewhere.
+            FunctionEffect |= Ref;
+        } else if (isMalloc(&cast<Instruction>(*II)) ||
+                   isFreeCall(&cast<Instruction>(*II))) {
+          FunctionEffect |= ModRef;
+        }
+
+    if ((FunctionEffect & Mod) == 0)
+      ++NumReadMemFunctions;
+    if (FunctionEffect == 0)
+      ++NumNoMemFunctions;
+    FR.FunctionEffect = FunctionEffect;
+
+    // Finally, now that we know the full effect on this SCC, clone the
+    // information to each function in the SCC.
+    for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+      FunctionInfo[SCC[i]->getFunction()] = FR;
+  }
+}
+
+
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasAnalysis::AliasResult
+GlobalsModRef::alias(const Location &LocA,
+                     const Location &LocB) {
+  // Get the base object these pointers point to.
+  const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
+  const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+
+  // If either of the underlying values is a global, they may be non-addr-taken
+  // globals, which we can answer queries about.
+  const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+  const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+  if (GV1 || GV2) {
+    // If the global's address is taken, pretend we don't know it's a pointer to
+    // the global.
+    if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
+    if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+
+    // If the two pointers are derived from two different non-addr-taken
+    // globals, or if one is and the other isn't, we know these can't alias.
+    if ((GV1 || GV2) && GV1 != GV2)
+      return NoAlias;
+
+    // Otherwise if they are both derived from the same addr-taken global, we
+    // can't know the two accesses don't overlap.
+  }
+
+  // These pointers may be based on the memory owned by an indirect global.  If
+  // so, we may be able to handle this.  First check to see if the base pointer
+  // is a direct load from an indirect global.
+  GV1 = GV2 = 0;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV1 = GV;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV2 = GV;
+
+  // These pointers may also be from an allocation for the indirect global.  If
+  // so, also handle them.
+  if (AllocsForIndirectGlobals.count(UV1))
+    GV1 = AllocsForIndirectGlobals[UV1];
+  if (AllocsForIndirectGlobals.count(UV2))
+    GV2 = AllocsForIndirectGlobals[UV2];
+
+  // Now that we know whether the two pointers are related to indirect globals,
+  // use this to disambiguate the pointers.  If either pointer is based on an
+  // indirect global and if they are not both based on the same indirect global,
+  // they cannot alias.
+  if ((GV1 || GV2) && GV1 != GV2)
+    return NoAlias;
+
+  return AliasAnalysis::alias(LocA, LocB);
+}
+
+AliasAnalysis::ModRefResult
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
+                             const Location &Loc) {
+  unsigned Known = ModRef;
+
+  // If we are asking for mod/ref info of a direct call with a pointer to a
+  // global we are tracking, return information if we have it.
+  if (const GlobalValue *GV =
+        dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+    if (GV->hasLocalLinkage())
+      if (const Function *F = CS.getCalledFunction())
+        if (NonAddressTakenGlobals.count(GV))
+          if (const FunctionRecord *FR = getFunctionInfo(F))
+            Known = FR->getInfoForGlobal(GV);
+
+  if (Known == NoModRef)
+    return NoModRef; // No need to query other mod/ref analyses
+  return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to update the analysis as a result of the client transformation.
+//
+void GlobalsModRef::deleteValue(Value *V) {
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    if (NonAddressTakenGlobals.erase(GV)) {
+      // This global might be an indirect global.  If so, remove it and remove
+      // any AllocRelatedValues for it.
+      if (IndirectGlobals.erase(GV)) {
+        // Remove any entries in AllocsForIndirectGlobals for this global.
+        for (std::map<const Value*, const GlobalValue*>::iterator
+             I = AllocsForIndirectGlobals.begin(),
+             E = AllocsForIndirectGlobals.end(); I != E; ) {
+          if (I->second == GV) {
+            AllocsForIndirectGlobals.erase(I++);
+          } else {
+            ++I;
+          }
+        }
+      }
+    }
+  }
+
+  // Otherwise, if this is an allocation related to an indirect global, remove
+  // it.
+  AllocsForIndirectGlobals.erase(V);
+
+  AliasAnalysis::deleteValue(V);
+}
+
+void GlobalsModRef::copyValue(Value *From, Value *To) {
+  AliasAnalysis::copyValue(From, To);
+}
+
+void GlobalsModRef::addEscapingUse(Use &U) {
+  // For the purposes of this analysis, it is conservatively correct to treat
+  // a newly escaping value equivalently to a deleted one.  We could perhaps
+  // be more precise by processing the new use and attempting to update our
+  // saved analysis results to accommodate it.
+  deleteValue(U);
+  
+  AliasAnalysis::addEscapingUse(U);
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
new file mode 100644
index 000000000000..0ba2e04c6302
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -0,0 +1,29 @@
+//===-- IPA.cpp -----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the IPA library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void llvm::initializeIPA(PassRegistry &Registry) {
+  initializeBasicCallGraphPass(Registry);
+  initializeCallGraphAnalysisGroup(Registry);
+  initializeFindUsedTypesPass(Registry);
+  initializeGlobalsModRefPass(Registry);
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+  initializeIPA(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
new file mode 100644
index 000000000000..e5f0a77ab67d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,272 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "iv-users"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char IVUsers::ID = 0;
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
+
+Pass *llvm::createIVUsersPass() {
+  return new IVUsers();
+}
+
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+                          ScalarEvolution *SE, LoopInfo *LI) {
+  // An addrec is interesting if it's affine or if it has an interesting start.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Keep things simple. Don't touch loop-variant strides unless they're
+    // only used outside the loop and we can simplify them.
+    if (AR->getLoop() == L)
+      return AR->isAffine() ||
+             (!L->contains(I) &&
+              SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR);
+    // Otherwise recurse to see if the start value is interesting, and that
+    // the step value is not interesting, since we don't yet know how to
+    // do effective SCEV expansions for addrecs with interesting steps.
+    return isInteresting(AR->getStart(), I, L, SE, LI) &&
+          !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI);
+  }
+
+  // An add is interesting if exactly one of its operands is interesting.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    bool AnyInterestingYet = false;
+    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+         OI != OE; ++OI)
+      if (isInteresting(*OI, I, L, SE, LI)) {
+        if (AnyInterestingYet)
+          return false;
+        AnyInterestingYet = true;
+      }
+    return AnyInterestingYet;
+  }
+
+  // Nothing else is interesting here.
+  return false;
+}
+
+/// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true.  Otherwise, return false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;   // Void and FP expressions cannot be reduced.
+
+  // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+  // Also avoid creating IVs of non-native types. For example, we don't want a
+  // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
+  uint64_t Width = SE->getTypeSizeInBits(I->getType());
+  if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
+    return false;
+
+  if (!Processed.insert(I))
+    return true;    // Instruction already handled.
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *ISE = SE->getSCEV(I);
+
+  // If we've come to an uninteresting expression, stop the traversal and
+  // call this a user.
+  if (!isInteresting(ISE, I, L, SE, LI))
+    return false;
+
+  SmallPtrSet<Instruction *, 4> UniqueUsers;
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    if (!UniqueUsers.insert(User))
+      continue;
+
+    // Do not infinitely recurse on PHI nodes.
+    if (isa<PHINode>(User) && Processed.count(User))
+      continue;
+
+    // Descend recursively, but not into PHI nodes outside the current loop.
+    // It's important to see the entire expression outside the loop to get
+    // choices that depend on addressing mode use right, although we won't
+    // consider references outside the loop in all cases.
+    // If User is already in Processed, we don't want to recurse into it again,
+    // but do want to record a second reference in the same instruction.
+    bool AddUserToIVUsers = false;
+    if (LI->getLoopFor(User->getParent()) != L) {
+      if (isa<PHINode>(User) || Processed.count(User) ||
+          !AddUsersIfInteresting(User)) {
+        DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
+                     << "   OF SCEV: " << *ISE << '\n');
+        AddUserToIVUsers = true;
+      }
+    } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
+      DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
+                   << "   OF SCEV: " << *ISE << '\n');
+      AddUserToIVUsers = true;
+    }
+
+    if (AddUserToIVUsers) {
+      // Okay, we found a user that we cannot reduce.
+      IVUses.push_back(new IVStrideUse(this, User, I));
+      IVStrideUse &NewUse = IVUses.back();
+      // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
+      // The regular return value here is discarded; instead of recording
+      // it, we just recompute it when we need it.
+      ISE = TransformForPostIncUse(NormalizeAutodetect,
+                                   ISE, User, I,
+                                   NewUse.PostIncLoops,
+                                   *SE, *DT);
+      DEBUG(dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
+    }
+  }
+  return true;
+}
+
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
+  return IVUses.back();
+}
+
+IVUsers::IVUsers()
+    : LoopPass(ID) {
+  initializeIVUsersPass(*PassRegistry::getPassRegistry());
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<LoopInfo>();
+  AU.addRequired<DominatorTree>();
+  AU.addRequired<ScalarEvolution>();
+  AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+  L = l;
+  LI = &getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTree>();
+  SE = &getAnalysis<ScalarEvolution>();
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  // Find all uses of induction variables in this loop, and categorize
+  // them by stride.  Start by finding all of the PHI nodes in the header for
+  // this loop.  If they are induction variables, inspect their uses.
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+    (void)AddUsersIfInteresting(I);
+
+  return false;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+  OS << "IV Users for loop ";
+  WriteAsOperand(OS, L->getHeader(), false);
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    OS << " with backedge-taken count "
+       << *SE->getBackedgeTakenCount(L);
+  }
+  OS << ":\n";
+
+  for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+       E = IVUses.end(); UI != E; ++UI) {
+    OS << "  ";
+    WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+    OS << " = " << *getReplacementExpr(*UI);
+    for (PostIncLoopSet::const_iterator
+         I = UI->PostIncLoops.begin(),
+         E = UI->PostIncLoops.end(); I != E; ++I) {
+      OS << " (post-inc with loop ";
+      WriteAsOperand(OS, (*I)->getHeader(), false);
+      OS << ")";
+    }
+    OS << " in  ";
+    UI->getUser()->print(OS);
+    OS << '\n';
+  }
+}
+
+void IVUsers::dump() const {
+  print(dbgs());
+}
+
+void IVUsers::releaseMemory() {
+  Processed.clear();
+  IVUses.clear();
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+  return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+  return
+    TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+                           IU.getUser(), IU.getOperandValToReplace(),
+                           const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+                           *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    if (AR->getLoop() == L)
+      return AR;
+    return findAddRecForLoop(AR->getStart(), L);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+        return AR;
+    return 0;
+  }
+
+  return 0;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+  if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+    return AR->getStepRecurrence(*SE);
+  return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+  PostIncLoops.insert(L);
+}
+
+void IVStrideUse::deleted() {
+  // Remove this user from the list.
+  Parent->IVUses.erase(this);
+  // this now dangles!
+}
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
new file mode 100644
index 000000000000..efde5984c115
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -0,0 +1,647 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+  if (!F) return false;
+  
+  if (F->hasLocalLinkage()) return false;
+  
+  if (!F->hasName()) return false;
+  
+  StringRef Name = F->getName();
+  
+  // These will all likely lower to a single selection DAG node.
+  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+      Name == "sin" || Name == "sinf" || Name == "sinl" ||
+      Name == "cos" || Name == "cosf" || Name == "cosl" ||
+      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+    return true;
+  
+  // These are all likely to be optimized into something smaller.
+  if (Name == "pow" || Name == "powf" || Name == "powl" ||
+      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+      Name == "floor" || Name == "floorf" || Name == "ceil" ||
+      Name == "round" || Name == "ffs" || Name == "ffsl" ||
+      Name == "abs" || Name == "labs" || Name == "llabs")
+    return true;
+  
+  return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+  ++NumBlocks;
+  unsigned NumInstsBeforeThisBB = NumInsts;
+  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+       II != E; ++II) {
+    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+
+    // Special handling for calls.
+    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+      if (isa<DbgInfoIntrinsic>(II))
+        continue;  // Debug intrinsics don't count as size.
+
+      ImmutableCallSite CS(cast<Instruction>(II));
+
+      if (const Function *F = CS.getCalledFunction()) {
+        // If a function is both internal and has a single use, then it is 
+        // extremely likely to get inlined in the future (it was probably 
+        // exposed by an interleaved devirtualization pass).
+        if (F->hasInternalLinkage() && F->hasOneUse())
+          ++NumInlineCandidates;
+
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          isRecursive = true;
+      }
+
+      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+        // Each argument to a call takes on average one instruction to set up.
+        NumInsts += CS.arg_size();
+
+        // We don't want inline asm to count as a call - that would prevent loop
+        // unrolling. The argument setup cost is still real, though.
+        if (!isa<InlineAsm>(CS.getCalledValue()))
+          ++NumCalls;
+      }
+    }
+    
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (!AI->isStaticAlloca())
+        this->usesDynamicAlloca = true;
+    }
+
+    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+      ++NumVectorInsts; 
+    
+    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+      // Noop casts, including ptr <-> int,  don't count.
+      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
+          isa<PtrToIntInst>(CI))
+        continue;
+      // Result of a cmp instruction is often extended (to be used by other
+      // cmp instructions, logical or return instructions). These are usually
+      // nop on most sane targets.
+      if (isa<CmpInst>(CI->getOperand(0)))
+        continue;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
+      // If a GEP has all constant indices, it will probably be folded with
+      // a load/store.
+      if (GEPI->hasAllConstantIndices())
+        continue;
+    }
+
+    ++NumInsts;
+  }
+  
+  if (isa<ReturnInst>(BB->getTerminator()))
+    ++NumRets;
+  
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this indirect
+  // jump would jump from the inlined copy of the function into the original
+  // function which is extremely undefined behavior.
+  if (isa<IndirectBrInst>(BB->getTerminator()))
+    containsIndirectBr = true;
+
+  // Remember NumInsts for this BB.
+  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+      // We will be able to eliminate all but one of the successors.
+      const TerminatorInst &TI = cast<TerminatorInst>(*U);
+      const unsigned NumSucc = TI.getNumSuccessors();
+      unsigned Instrs = 0;
+      for (unsigned I = 0; I != NumSucc; ++I)
+        Instrs += NumBBInsts[TI.getSuccessor(I)];
+      // We don't know which blocks will be eliminated, so use the average size.
+      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+    } else {
+      // Figure out if this instruction will be removed due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(*U);
+
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocaInst>(Inst))
+        continue;
+
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant) {
+        // We will get to remove this instruction...
+        Reduction += InlineConstants::InstrCost;
+
+        // And any other instructions that use it which become constants
+        // themselves.
+        Reduction += CountCodeReductionForConstant(&Inst);
+      }
+    }
+  }
+  return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;  // Not a pointer
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    Instruction *I = cast<Instruction>(*UI);
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      Reduction += InlineConstants::InstrCost;
+    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      // If the GEP has variable indices, we won't be able to do much with it.
+      if (GEP->hasAllConstantIndices())
+        Reduction += CountCodeReductionForAlloca(GEP);
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+      // Track pointer through bitcasts.
+      Reduction += CountCodeReductionForAlloca(BCI);
+    } else {
+      // If there is some other strange instruction, we're not going to be able
+      // to do much if we inline this.
+      return 0;
+    }
+  }
+
+  return Reduction;
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void CodeMetrics::analyzeFunction(Function *F) {
+  // If this function contains a call to setjmp or _setjmp, never inline
+  // it.  This is a hack because we depend on the user marking their local
+  // variables as volatile if they are live across a setjmp call, and they
+  // probably won't do this in callers.
+  if (F->callsFunctionThatReturnsTwice())
+    callsSetJmp = true;
+
+  // Look at the size of the callee.
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    analyzeBasicBlock(&*BB);
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
+  Metrics.analyzeFunction(F);
+
+  // A function with exactly one return has it removed during the inlining
+  // process (see InlineFunction), so don't count it.
+  // FIXME: This knowledge should really be encoded outside of FunctionInfo.
+  if (Metrics.NumRets==1)
+    --Metrics.NumInsts;
+
+  // Check out all of the arguments to the function, figuring out how much
+  // code can be eliminated if one of the arguments is a constant.
+  ArgumentWeights.reserve(F->arg_size());
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
+                                      Metrics.CountCodeReductionForAlloca(I)));
+}
+
+/// NeverInline - returns true if the function should never be inlined into
+/// any caller
+bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
+  return (Metrics.callsSetJmp || Metrics.isRecursive || 
+          Metrics.containsIndirectBr);
+}
+// getSpecializationBonus - The heuristic used to determine the per-call
+// performance boost for using a specialization of Callee with argument
+// specializedArgNo replaced by a constant.
+int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
+         SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+  if (Callee->mayBeOverridden())
+    return 0;
+  
+  int Bonus = 0;
+  // If this function uses the coldcc calling convention, prefer not to
+  // specialize it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    Bonus -= InlineConstants::ColdccPenalty;
+  
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+
+  unsigned ArgNo = 0;
+  unsigned i = 0;
+  for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
+       I != E; ++I, ++ArgNo)
+    if (ArgNo == SpecializedArgNos[i]) {
+      ++i;
+      Bonus += CountBonusForConstant(I);
+    }
+
+  // Calls usually take a long time, so they make the specialization gain 
+  // smaller.
+  Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+  return Bonus;
+}
+
+// ConstantFunctionBonus - Figure out how much of a bonus we can get for
+// possibly devirtualizing a function. We'll subtract the size of the function
+// we may wish to inline from the indirect call bonus providing a limit on
+// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
+// inlining because we decide we don't want to give a bonus for
+// devirtualizing.
+int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+  
+  // This could just be NULL.
+  if (!C) return 0;
+  
+  Function *F = dyn_cast<Function>(C);
+  if (!F) return 0;
+  
+  int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
+  return (Bonus > 0) ? 0 : Bonus;
+}
+
+// CountBonusForConstant - Figure out an approximation for how much per-call
+// performance boost we can expect if the specified value is constant.
+int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
+  unsigned Bonus = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      // Turning an indirect call into a direct call is a BIG win
+      if (CI->getCalledValue() == V)
+        Bonus += ConstantFunctionBonus(CallSite(CI), C);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      // Turning an indirect call into a direct call is a BIG win
+      if (II->getCalledValue() == V)
+        Bonus += ConstantFunctionBonus(CallSite(II), C);
+    }
+    // FIXME: Eliminating conditional branches and switches should
+    // also yield a per-call performance boost.
+    else {
+      // Figure out the bonuses that wll accrue due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(*U);
+
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocaInst>(Inst))
+        continue;
+
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant)
+        Bonus += CountBonusForConstant(&Inst);
+    }
+  }
+  
+  return Bonus;
+}
+
+int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+  
+  // InlineCost - This value measures how good of an inline candidate this call
+  // site is to inline.  A lower inline cost make is more likely for the call to
+  // be inlined.  This value may go negative.
+  //
+  int InlineCost = 0;
+
+  // Compute any size reductions we can expect due to arguments being passed into
+  // the function.
+  //
+  unsigned ArgNo = 0;
+  CallSite::arg_iterator I = CS.arg_begin();
+  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+       FI != FE; ++I, ++FI, ++ArgNo) {
+
+    // If an alloca is passed in, inlining this function is likely to allow
+    // significant future optimization possibilities (like scalar promotion, and
+    // scalarization), so encourage the inlining of the function.
+    //
+    if (isa<AllocaInst>(I))
+      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
+
+    // If this is a constant being passed into the function, use the argument
+    // weights calculated for the callee to determine how much will be folded
+    // away with this information.
+    else if (isa<Constant>(I))
+      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;       
+  }
+  
+  // Each argument passed in has a cost at both the caller and the callee
+  // sides.  Measurements show that each argument costs about the same as an
+  // instruction.
+  InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+
+  // Now that we have considered all of the factors that make the call site more
+  // likely to be inlined, look at factors that make us not want to inline it.
+
+  // Calls usually take a long time, so they make the inlining gain smaller.
+  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+  // Look at the size of the callee. Each instruction counts as 5.
+  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+  
+  return InlineCost;
+}
+
+int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+    
+  bool isDirectCall = CS.getCalledFunction() == Callee;
+  Instruction *TheCall = CS.getInstruction();
+  int Bonus = 0;
+  
+  // If there is only one call of the function, and it has internal linkage,
+  // make it almost guaranteed to be inlined.
+  //
+  if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
+    Bonus += InlineConstants::LastCallToStaticBonus;
+  
+  // If the instruction after the call, or if the normal destination of the
+  // invoke is an unreachable instruction, the function is noreturn.  As such,
+  // there is little point in inlining this.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+      Bonus += InlineConstants::NoreturnPenalty;
+  } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
+    Bonus += InlineConstants::NoreturnPenalty;
+  
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    Bonus += InlineConstants::ColdccPenalty;
+  
+  // Add to the inline quality for properties that make the call valuable to
+  // inline.  This includes factors that indicate that the result of inlining
+  // the function will be optimizable.  Currently this just looks at arguments
+  // passed into the function.
+  //
+  CallSite::arg_iterator I = CS.arg_begin();
+  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+       FI != FE; ++I, ++FI)
+    // Compute any constant bonus due to inlining we want to give here.
+    if (isa<Constant>(I))
+      Bonus += CountBonusForConstant(FI, cast<Constant>(I));
+      
+  return Bonus;
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               Function *Callee,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  Instruction *TheCall = CS.getInstruction();
+  Function *Caller = TheCall->getParent()->getParent();
+
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline or call sites
+  // marked noinline.
+  if (Callee->mayBeOverridden() ||
+      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+      CS.isNoInline())
+    return llvm::InlineCost::getNever();
+
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+
+  // If we should never inline this, return a huge cost.
+  if (CalleeFI->NeverInline())
+    return InlineCost::getNever();
+
+  // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
+  // could move this up and avoid computing the FunctionInfo for
+  // things we are going to just return always inline for. This
+  // requires handling setjmp somewhere else, however.
+  if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
+    return InlineCost::getAlways();
+    
+  if (CalleeFI->Metrics.usesDynamicAlloca) {
+    // Get information about the caller.
+    FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+
+    // If we haven't calculated this information yet, do so now.
+    if (CallerFI.Metrics.NumBlocks == 0) {
+      CallerFI.analyzeFunction(Caller);
+     
+      // Recompute the CalleeFI pointer, getting Caller could have invalidated
+      // it.
+      CalleeFI = &CachedFunctionInfo[Callee];
+    }
+
+    // Don't inline a callee with dynamic alloca into a caller without them.
+    // Functions containing dynamic alloca's are inefficient in various ways;
+    // don't create more inefficiency.
+    if (!CallerFI.Metrics.usesDynamicAlloca)
+      return InlineCost::getNever();
+  }
+
+  // InlineCost - This value measures how good of an inline candidate this call
+  // site is to inline.  A lower inline cost make is more likely for the call to
+  // be inlined.  This value may go negative due to the fact that bonuses
+  // are negative numbers.
+  //
+  int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
+  return llvm::InlineCost::get(InlineCost);
+}
+
+// getSpecializationCost - The heuristic used to determine the code-size
+// impact of creating a specialized version of Callee with argument
+// SpecializedArgNo replaced by a constant.
+InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
+                               SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+  // Don't specialize functions which can be redefined at link-time to mean
+  // something else.
+  if (Callee->mayBeOverridden())
+    return llvm::InlineCost::getNever();
+  
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+
+  int Cost = 0;
+  
+  // Look at the original size of the callee.  Each instruction counts as 5.
+  Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
+
+  // Offset that with the amount of code that can be constant-folded
+  // away with the given arguments replaced by constants.
+  for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
+       ae = SpecializedArgNos.end(); an != ae; ++an)
+    Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
+
+  return llvm::InlineCost::get(Cost);
+}
+
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+  
+  // Get information about the callee.
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.Metrics.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  float Factor = 1.0f;
+  // Single BB functions are often written to be inlined.
+  if (CalleeFI.Metrics.NumBlocks == 1)
+    Factor += 0.5f;
+
+  // Be more aggressive if the function contains a good chunk (if it mades up
+  // at least 10% of the instructions) of vector instructions.
+  if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
+    Factor += 2.0f;
+  else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
+    Factor += 1.5f;
+  return Factor;
+}
+
+/// growCachedCostInfo - update the cached cost info for Caller after Callee has
+/// been inlined.
+void
+InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
+  CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
+
+  // For small functions we prefer to recalculate the cost for better accuracy.
+  if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) {
+    resetCachedCostInfo(Caller);
+    return;
+  }
+
+  // For large functions, we can save a lot of computation time by skipping
+  // recalculations.
+  if (CallerMetrics.NumCalls > 0)
+    --CallerMetrics.NumCalls;
+
+  if (Callee == 0) return;
+  
+  CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+
+  // If we don't have metrics for the callee, don't recalculate them just to
+  // update an approximation in the caller.  Instead, just recalculate the
+  // caller info from scratch.
+  if (CalleeMetrics.NumBlocks == 0) {
+    resetCachedCostInfo(Caller);
+    return;
+  }
+  
+  // Since CalleeMetrics were already calculated, we know that the CallerMetrics
+  // reference isn't invalidated: both were in the DenseMap.
+  CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
+
+  // FIXME: If any of these three are true for the callee, the callee was
+  // not inlined into the caller, so I think they're redundant here.
+  CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp;
+  CallerMetrics.isRecursive |= CalleeMetrics.isRecursive;
+  CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr;
+
+  CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
+  CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
+  CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
+  CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
+  CallerMetrics.NumRets += CalleeMetrics.NumRets;
+
+  // analyzeBasicBlock counts each function argument as an inst.
+  if (CallerMetrics.NumInsts >= Callee->arg_size())
+    CallerMetrics.NumInsts -= Callee->arg_size();
+  else
+    CallerMetrics.NumInsts = 0;
+  
+  // We are not updating the argument weights. We have already determined that
+  // Caller is a fairly large function, so we accept the loss of precision.
+}
+
+/// clear - empty the cache of inline costs
+void InlineCostAnalyzer::clear() {
+  CachedFunctionInfo.clear();
+}
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
new file mode 100644
index 000000000000..3b385d26ba3c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -0,0 +1,87 @@
+//===-- InstCount.cpp - Collects the count of all instructions ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcount"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalBlocks, "Number of basic blocks");
+STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalMemInst, "Number of memory instructions");
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+  STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+
+#include "llvm/Instruction.def"
+
+
+namespace {
+  class InstCount : public FunctionPass, public InstVisitor<InstCount> {
+    friend class InstVisitor<InstCount>;
+
+    void visitFunction  (Function &F) { ++TotalFuncs; }
+    void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+    void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+
+#include "llvm/Instruction.def"
+
+    void visitInstruction(Instruction &I) {
+      errs() << "Instruction Count does not know about " << I;
+      llvm_unreachable(0);
+    }
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    InstCount() : FunctionPass(ID) {
+      initializeInstCountPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const {}
+
+  };
+}
+
+char InstCount::ID = 0;
+INITIALIZE_PASS(InstCount, "instcount",
+                "Counts the various types of Instructions", false, true)
+
+FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+
+// InstCount::run - This is the main Analysis entry point for a
+// function.
+//
+bool InstCount::runOnFunction(Function &F) {
+  unsigned StartMemInsts =
+    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+    NumInvokeInst + NumAllocaInst;
+  visit(F);
+  unsigned EndMemInsts =
+    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+    NumInvokeInst + NumAllocaInst;
+  TotalMemInst += EndMemInsts-StartMemInsts;
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 000000000000..8709f6bf9d26
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,2526 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions.  This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x").  All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Operator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand,  "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
+                            const DominatorTree *, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
+                             const DominatorTree *, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    // Arguments and constants dominate all instructions.
+    return true;
+
+  // If we have a DominatorTree then do a precise test.
+  if (DT)
+    return DT->dominates(I, P);
+
+  // Otherwise, if the instruction is in the entry block, and is not an invoke,
+  // then it obviously dominates all phi nodes.
+  if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+      !isa<InvokeInst>(I))
+    return true;
+
+  return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)".  Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                          unsigned OpcToExpand, const TargetData *TD,
+                          const DominatorTree *DT, unsigned MaxRecurse) {
+  Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Check whether the expression has the form "(A op' B) op C".
+  if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+    if (Op0->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op C) op' (B op C)".
+      Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+      // Do "A op C" and "B op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+          if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == B && R == A)) {
+            ++NumExpand;
+            return LHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+                                       MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  // Check whether the expression has the form "A op (B op' C)".
+  if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+    if (Op1->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op B) op' (A op C)".
+      Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+      // Do "A op B" and "A op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+          if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == C && R == B)) {
+            ++NumExpand;
+            return RHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+                                       MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  return 0;
+}
+
+/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
+/// using the operation OpCodeToExtract.  For example, when Opcode is Add and
+/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                             unsigned OpcToExtract, const TargetData *TD,
+                             const DominatorTree *DT, unsigned MaxRecurse) {
+  Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+  if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
+      !Op1 || Op1->getOpcode() != OpcodeToExtract)
+    return 0;
+
+  // The expression has the form "(A op' B) op (C op' D)".
+  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+  Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+
+  // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
+  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+  // commutative case, "(A op' B) op (C op' A)"?
+  if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
+    Value *DD = A == C ? D : C;
+    // Form "A op' (B op DD)" if it simplifies completely.
+    // Does "B op DD" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+      // It does!  Return "A op' V" if it simplifies or is already available.
+      // If V equals B then "A op' V" is just the LHS.  If V equals DD then
+      // "A op' V" is just the RHS.
+      if (V == B || V == DD) {
+        ++NumFactor;
+        return V == B ? LHS : RHS;
+      }
+      // Otherwise return "A op' V" if it simplifies.
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+        ++NumFactor;
+        return W;
+      }
+    }
+  }
+
+  // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
+  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+  // commutative case, "(A op' B) op (B op' D)"?
+  if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
+    Value *CC = B == D ? C : D;
+    // Form "(A op CC) op' B" if it simplifies completely..
+    // Does "A op CC" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op' B" if it simplifies or is already available.
+      // If V equals A then "V op' B" is just the LHS.  If V equals CC then
+      // "V op' B" is just the RHS.
+      if (V == A || V == CC) {
+        ++NumFactor;
+        return V == A ? LHS : RHS;
+      }
+      // Otherwise return "V op' B" if it simplifies.
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+        ++NumFactor;
+        return W;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations.  Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+                                       const TargetData *TD,
+                                       const DominatorTree *DT,
+                                       unsigned MaxRecurse) {
+  Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+  assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+  // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "B op C" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+      // It does!  Return "A op V" if it simplifies or is already available.
+      // If V equals B then "A op V" is just the LHS.
+      if (V == B) return LHS;
+      // Otherwise return "A op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "A op B" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op C" if it simplifies or is already available.
+      // If V equals B then "V op C" is just the RHS.
+      if (V == B) return RHS;
+      // Otherwise return "V op C" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // The remaining transforms require commutativity as well as associativity.
+  if (!Instruction::isCommutative(Opcode))
+    return 0;
+
+  // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op B" if it simplifies or is already available.
+      // If V equals A then "V op B" is just the LHS.
+      if (V == A) return LHS;
+      // Otherwise return "V op B" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+      // It does!  Return "B op V" if it simplifies or is already available.
+      // If V equals C then "B op V" is just the RHS.
+      if (V == C) return RHS;
+      // Otherwise return "B op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+                                    const TargetData *TD,
+                                    const DominatorTree *DT,
+                                    unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  SelectInst *SI;
+  if (isa<SelectInst>(LHS)) {
+    SI = cast<SelectInst>(LHS);
+  } else {
+    assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+    SI = cast<SelectInst>(RHS);
+  }
+
+  // Evaluate the BinOp on the true and false branches of the select.
+  Value *TV;
+  Value *FV;
+  if (SI == LHS) {
+    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+  } else {
+    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+  }
+
+  // If they simplified to the same value, then return the common value.
+  // If they both failed to simplify then return null.
+  if (TV == FV)
+    return TV;
+
+  // If one branch simplified to undef, return the other one.
+  if (TV && isa<UndefValue>(TV))
+    return FV;
+  if (FV && isa<UndefValue>(FV))
+    return TV;
+
+  // If applying the operation did not change the true and false select values,
+  // then the result of the binop is the select itself.
+  if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+    return SI;
+
+  // If one branch simplified and the other did not, and the simplified
+  // value is equal to the unsimplified one, return the simplified value.
+  // For example, select (cond, X, X & Z) & Z -> X & Z.
+  if ((FV && !TV) || (TV && !FV)) {
+    // Check that the simplified value has the form "X op Y" where "op" is the
+    // same as the original operation.
+    Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+    if (Simplified && Simplified->getOpcode() == Opcode) {
+      // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+      // We already know that "op" is the same as for the simplified value.  See
+      // if the operands match too.  If so, return the simplified value.
+      Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+      Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+      Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+      if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+          Simplified->getOperand(1) == UnsimplifiedRHS)
+        return Simplified;
+      if (Simplified->isCommutative() &&
+          Simplified->getOperand(1) == UnsimplifiedLHS &&
+          Simplified->getOperand(0) == UnsimplifiedRHS)
+        return Simplified;
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value.  Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+                                  Value *RHS, const TargetData *TD,
+                                  const DominatorTree *DT,
+                                  unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Make sure the select is on the LHS.
+  if (!isa<SelectInst>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+  SelectInst *SI = cast<SelectInst>(LHS);
+
+  // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+  // Does "cmp TV, RHS" simplify?
+  if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
+                                    MaxRecurse)) {
+    // It does!  Does "cmp FV, RHS" simplify?
+    if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
+                                      MaxRecurse)) {
+      // It does!  If they simplified to the same value, then use it as the
+      // result of the original comparison.
+      if (TCmp == FCmp)
+        return TCmp;
+      Value *Cond = SI->getCondition();
+      // If the false value simplified to false, then the result of the compare
+      // is equal to "Cond && TCmp".  This also catches the case when the false
+      // value simplified to false and the true value to true, returning "Cond".
+      if (match(FCmp, m_Zero()))
+        if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+          return V;
+      // If the true value simplified to true, then the result of the compare
+      // is equal to "Cond || FCmp".
+      if (match(TCmp, m_One()))
+        if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+          return V;
+      // Finally, if the false value simplified to true and the true value to
+      // false, then the result of the compare is equal to "!Cond".
+      if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+        if (Value *V =
+            SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+                            TD, DT, MaxRecurse))
+          return V;
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value.  If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+                                 const TargetData *TD, const DominatorTree *DT,
+                                 unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  PHINode *PI;
+  if (isa<PHINode>(LHS)) {
+    PI = cast<PHINode>(LHS);
+    // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(RHS, PI, DT))
+      return 0;
+  } else {
+    assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+    PI = cast<PHINode>(RHS);
+    // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(LHS, PI, DT))
+      return 0;
+  }
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = 0;
+  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PI->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = PI == LHS ?
+      SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
+      SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return 0;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time.  If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Make sure the phi is on the LHS.
+  if (!isa<PHINode>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+  PHINode *PI = cast<PHINode>(LHS);
+
+  // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+  if (!ValueDominatesPHI(RHS, PI, DT))
+    return 0;
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = 0;
+  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PI->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return 0;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
+/// SimplifyAddInst - Given operands for an Add, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X + undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // X + 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X + (Y - X) -> Y
+  // (Y - X) + X -> Y
+  // Eg: X + -X -> 0
+  Value *Y = 0;
+  if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+      match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+    return Y;
+
+  // X + ~X -> -1   since   ~X = -X-1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  /// i1 add -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Mul distributes over Add.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // Threading Add over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A + select(cond, B, C)" means evaluating
+  // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A+B" and "A+C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0))
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+  // X - undef -> undef
+  // undef - X -> undef
+  if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+    return UndefValue::get(Op0->getType());
+
+  // X - 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X - X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // (X*2) - X -> X
+  // (X<<1) - X -> X
+  Value *X = 0;
+  if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
+      match(Op0, m_Shl(m_Specific(Op1), m_One())))
+    return Op1;
+
+  // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+  // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+  Value *Y = 0, *Z = Op1;
+  if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+    // See if "V === Y - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "X + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "Y + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+  // For example, X - (X + 1) -> -1
+  X = Op0;
+  if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+    // See if "V === X - Y" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V - Z" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V - Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+  // For example, X - (X - Y) -> Y.
+  Z = Op0;
+  if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+    // See if "V === Z - X" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V + Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+
+  // Mul distributes over Sub.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // i1 sub -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Threading Sub over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A - select(cond, B, C)" means evaluating
+  // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A-B" and "A-C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X * undef -> 0
+  if (match(Op1, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // X * 0 -> 0
+  if (match(Op1, m_Zero()))
+    return Op1;
+
+  // X * 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  // (X / Y) * Y -> X if the division is exact.
+  Value *X = 0, *Y = 0;
+  if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
+      (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
+    BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
+    if (Div->isExact())
+      return X;
+  }
+
+  // i1 mul -> and.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Mul distributes over Add.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const TargetData *TD, const DominatorTree *DT,
+                          unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  bool isSigned = Opcode == Instruction::SDiv;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // undef / X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 / X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X / 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be division by zero, hence it must be division by one.
+    return Op0;
+
+  // X / X -> 1
+  if (Op0 == Op1)
+    return ConstantInt::get(Op0->getType(), 1);
+
+  // (X * Y) / Y -> X if the multiplication does not overflow.
+  Value *X = 0, *Y = 0;
+  if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+    if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+    BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+    // If the Mul knows it does not overflow, then we are good to go.
+    if ((isSigned && Mul->hasNoSignedWrap()) ||
+        (!isSigned && Mul->hasNoUnsignedWrap()))
+      return X;
+    // If X has the form X = A / Y then X * Y cannot overflow.
+    if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+      if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+        return X;
+  }
+
+  // (X rem Y) / Y -> 0
+  if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+      (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+    return Constant::getNullValue(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+                               const DominatorTree *, unsigned) {
+  // undef / X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyRem - Given operands for an SRem or URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const TargetData *TD, const DominatorTree *DT,
+                          unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // undef % X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 % X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X % 0 -> undef, we don't need to preserve faults!
+  if (match(Op1, m_Zero()))
+    return UndefValue::get(Op0->getType());
+
+  // X % 1 -> 0
+  if (match(Op1, m_One()))
+    return Constant::getNullValue(Op0->getType());
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be remainder by zero, hence it must be remainder by one.
+    return Constant::getNullValue(Op0->getType());
+
+  // X % X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifySRemInst - Given operands for an SRem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyURemInst - Given operands for a URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
+                               const DominatorTree *, unsigned) {
+  // undef % X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+                            const TargetData *TD, const DominatorTree *DT,
+                            unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  // 0 shift by X -> 0
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X shift by 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X shift by undef -> undef because it may shift by the bitwidth.
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // Shifting by the bitwidth or more is undefined.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
+    if (CI->getValue().getLimitedValue() >=
+        Op0->getType()->getScalarSizeInBits())
+      return UndefValue::get(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // undef << X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // (X >> A) << A -> X
+  Value *X;
+  if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
+      cast<PossiblyExactOperator>(Op0)->isExact())
+    return X;
+  return 0;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // undef >>l X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+      cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+    return X;
+
+  return 0;
+}
+
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // all ones >>a X -> all ones
+  if (match(Op0, m_AllOnes()))
+    return Op0;
+
+  // undef >>a X -> all ones
+  if (match(Op0, m_Undef()))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+      cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+    return X;
+
+  return 0;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X & undef -> 0
+  if (match(Op1, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // X & X = X
+  if (Op0 == Op1)
+    return Op0;
+
+  // X & 0 = 0
+  if (match(Op1, m_Zero()))
+    return Op1;
+
+  // X & -1 = X
+  if (match(Op1, m_AllOnes()))
+    return Op0;
+
+  // A & ~A  =  ~A & A  =  0
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getNullValue(Op0->getType());
+
+  // (A | ?) & A = A
+  Value *A = 0, *B = 0;
+  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+
+  // A & (A | ?) = A
+  if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // And distributes over Or.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // And distributes over Xor.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // Or distributes over And.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X | undef -> -1
+  if (match(Op1, m_Undef()))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // X | X = X
+  if (Op0 == Op1)
+    return Op0;
+
+  // X | 0 = X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X | -1 = -1
+  if (match(Op1, m_AllOnes()))
+    return Op1;
+
+  // A | ~A  =  ~A | A  =  -1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // (A & ?) | A = A
+  Value *A = 0, *B = 0;
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+
+  // A | (A & ?) = A
+  if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+
+  // ~(A & ?) | A = -1
+  if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op1 || B == Op1))
+    return Constant::getAllOnesValue(Op1->getType());
+
+  // A | ~(A & ?) = -1
+  if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op0 || B == Op0))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Or distributes over And.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // And distributes over Or.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                            const DominatorTree *DT) {
+  return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // A ^ undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // A ^ 0 = A
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // A ^ A = 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // A ^ ~A  =  ~A ^ A  =  -1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // And distributes over Xor.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // Threading Xor over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+  // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A^B" and "A^C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static const Type *GetCompareTy(Value *Op) {
+  return CmpInst::makeCmpResultType(Op->getType());
+}
+
+/// ExtractEquivalentCondition - Rummage around inside V looking for something
+/// equivalent to the comparison "LHS Pred RHS".  Return such a value if found,
+/// otherwise return null.  Helper function for analyzing max/min idioms.
+static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+                                         Value *LHS, Value *RHS) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (!SI)
+    return 0;
+  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+  if (!Cmp)
+    return 0;
+  Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
+  if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
+    return Cmp;
+  if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
+      LHS == CmpRHS && RHS == CmpLHS)
+    return Cmp;
+  return 0;
+}
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  const Type *ITy = GetCompareTy(LHS); // The return type.
+  const Type *OpTy = LHS->getType();   // The operand type.
+
+  // icmp X, X -> true/false
+  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
+  // because X could be 0.
+  if (LHS == RHS || isa<UndefValue>(RHS))
+    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+  // Special case logic when the operands have i1 type.
+  if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
+       cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+    switch (Pred) {
+    default: break;
+    case ICmpInst::ICMP_EQ:
+      // X == 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_NE:
+      // X != 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGT:
+      // X >u 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGE:
+      // X >=u 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_SLT:
+      // X <s 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_SLE:
+      // X <=s -1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    }
+  }
+
+  // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
+  // different addresses, and what's more the address of a stack variable is
+  // never null or equal to the address of a global.  Note that generalizing
+  // to the case where LHS is a global variable address or null is pointless,
+  // since if both LHS and RHS are constants then we already constant folded
+  // the compare, and if only one of them is then we moved it to RHS already.
+  if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
+                               isa<ConstantPointerNull>(RHS)))
+    // We already know that LHS != RHS.
+    return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+
+  // If we are comparing with zero then try hard since this is a common case.
+  if (match(RHS, m_Zero())) {
+    bool LHSKnownNonNegative, LHSKnownNegative;
+    switch (Pred) {
+    default:
+      assert(false && "Unknown ICmp predicate!");
+    case ICmpInst::ICMP_ULT:
+      // getNullValue also works for vectors, unlike getFalse.
+      return Constant::getNullValue(ITy);
+    case ICmpInst::ICMP_UGE:
+      // getAllOnesValue also works for vectors, unlike getTrue.
+      return ConstantInt::getAllOnesValue(ITy);
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_ULE:
+      if (isKnownNonZero(LHS, TD))
+        return Constant::getNullValue(ITy);
+      break;
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_UGT:
+      if (isKnownNonZero(LHS, TD))
+        return ConstantInt::getAllOnesValue(ITy);
+      break;
+    case ICmpInst::ICMP_SLT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getAllOnesValue(ITy);
+      if (LHSKnownNonNegative)
+        return Constant::getNullValue(ITy);
+      break;
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getAllOnesValue(ITy);
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+        return Constant::getNullValue(ITy);
+      break;
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return Constant::getNullValue(ITy);
+      if (LHSKnownNonNegative)
+        return ConstantInt::getAllOnesValue(ITy);
+      break;
+    case ICmpInst::ICMP_SGT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return Constant::getNullValue(ITy);
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+        return ConstantInt::getAllOnesValue(ITy);
+      break;
+    }
+  }
+
+  // See if we are doing a comparison with a constant integer.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    // Rule out tautological comparisons (eg., ult 0 or uge 0).
+    ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
+    if (RHS_CR.isEmptySet())
+      return ConstantInt::getFalse(CI->getContext());
+    if (RHS_CR.isFullSet())
+      return ConstantInt::getTrue(CI->getContext());
+
+    // Many binary operators with constant RHS have easy to compute constant
+    // range.  Use them to check whether the comparison is a tautology.
+    uint32_t Width = CI->getBitWidth();
+    APInt Lower = APInt(Width, 0);
+    APInt Upper = APInt(Width, 0);
+    ConstantInt *CI2;
+    if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'urem x, CI2' produces [0, CI2).
+      Upper = CI2->getValue();
+    } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'srem x, CI2' produces (-|CI2|, |CI2|).
+      Upper = CI2->getValue().abs();
+      Lower = (-Upper) + 1;
+    } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
+      // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (!CI2->isZero())
+        Upper = NegOne.udiv(CI2->getValue()) + 1;
+    } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
+      // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      APInt Val = CI2->getValue().abs();
+      if (!Val.isMinValue()) {
+        Lower = IntMin.sdiv(Val);
+        Upper = IntMax.sdiv(Val) + 1;
+      }
+    } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (CI2->getValue().ult(Width))
+        Upper = NegOne.lshr(CI2->getValue()) + 1;
+    } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      if (CI2->getValue().ult(Width)) {
+        Lower = IntMin.ashr(CI2->getValue());
+        Upper = IntMax.ashr(CI2->getValue()) + 1;
+      }
+    } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
+      // 'or x, CI2' produces [CI2, UINT_MAX].
+      Lower = CI2->getValue();
+    } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
+      // 'and x, CI2' produces [0, CI2].
+      Upper = CI2->getValue() + 1;
+    }
+    if (Lower != Upper) {
+      ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+      if (RHS_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+      if (RHS_CR.inverse().contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
+  // Compare of cast, for example (zext X) != 0 -> X != 0
+  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+    Instruction *LI = cast<CastInst>(LHS);
+    Value *SrcOp = LI->getOperand(0);
+    const Type *SrcTy = SrcOp->getType();
+    const Type *DstTy = LI->getType();
+
+    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+    // if the integer type is the same size as the pointer type.
+    if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
+        TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+        // Transfer the cast to the constant.
+        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
+                                        TD, DT, MaxRecurse-1))
+          return V;
+      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+        if (RI->getOperand(0)->getType() == SrcTy)
+          // Compare without the cast.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          TD, DT, MaxRecurse-1))
+            return V;
+      }
+    }
+
+    if (isa<ZExtInst>(LHS)) {
+      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+      // same type.
+      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that signed predicates become unsigned.
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, RI->getOperand(0), TD, DT,
+                                          MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two zero-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, Trunc, TD, DT, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+        // there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default:
+            assert(false && "Unknown ICmp predicate!");
+          // LHS <u RHS.
+          case ICmpInst::ICMP_EQ:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            return ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
+          // is non-negative then LHS <s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+          }
+        }
+      }
+    }
+
+    if (isa<SExtInst>(LHS)) {
+      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+      // same type.
+      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that the predicate does not change.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          TD, DT, MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two sign-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+                                          MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are all equal, while RHS has varying
+        // bits there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default:
+            assert(false && "Unknown ICmp predicate!");
+          case ICmpInst::ICMP_EQ:
+            return ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_NE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
+          // LHS >s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+
+          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
+          // LHS >u RHS.
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            // Comparison is true iff the LHS <s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              TD, DT, MaxRecurse-1))
+                return V;
+            break;
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            // Comparison is true iff the LHS >=s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              TD, DT, MaxRecurse-1))
+                return V;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // Special logic for binary operators.
+  BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+  if (MaxRecurse && (LBO || RBO)) {
+    // Analyze the case when either LHS or RHS is an add instruction.
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+    bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+    if (LBO && LBO->getOpcode() == Instruction::Add) {
+      A = LBO->getOperand(0); B = LBO->getOperand(1);
+      NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+    }
+    if (RBO && RBO->getOpcode() == Instruction::Add) {
+      C = RBO->getOperand(0); D = RBO->getOperand(1);
+      NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+    }
+
+    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+    if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+                                      Constant::getNullValue(RHS->getType()),
+                                      TD, DT, MaxRecurse-1))
+        return V;
+
+    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+    if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred,
+                                      Constant::getNullValue(LHS->getType()),
+                                      C == LHS ? D : C, TD, DT, MaxRecurse-1))
+        return V;
+
+    // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+    if (A && C && (A == C || A == D || B == C || B == D) &&
+        NoLHSWrapProblem && NoRHSWrapProblem) {
+      // Determine Y and Z in the form icmp (X+Y), (X+Z).
+      Value *Y = (A == C || A == D) ? B : A;
+      Value *Z = (C == A || C == B) ? D : C;
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+        return V;
+    }
+  }
+
+  if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
+    bool KnownNonNegative, KnownNegative;
+    switch (Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE:
+      // getNullValue also works for vectors, unlike getFalse.
+      return Constant::getNullValue(ITy);
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE:
+      // getAllOnesValue also works for vectors, unlike getTrue.
+      return Constant::getAllOnesValue(ITy);
+    }
+  }
+  if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
+    bool KnownNonNegative, KnownNegative;
+    switch (Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE:
+      // getAllOnesValue also works for vectors, unlike getTrue.
+      return Constant::getAllOnesValue(ITy);
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE:
+      // getNullValue also works for vectors, unlike getFalse.
+      return Constant::getNullValue(ITy);
+    }
+  }
+
+  if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
+      LBO->getOperand(1) == RBO->getOperand(1)) {
+    switch (LBO->getOpcode()) {
+    default: break;
+    case Instruction::UDiv:
+    case Instruction::LShr:
+      if (ICmpInst::isSigned(Pred))
+        break;
+      // fall-through
+    case Instruction::SDiv:
+    case Instruction::AShr:
+      if (!LBO->isExact() || !RBO->isExact())
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+        return V;
+      break;
+    case Instruction::Shl: {
+      bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap();
+      bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+      if (!NUW && !NSW)
+        break;
+      if (!NSW && ICmpInst::isSigned(Pred))
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+        return V;
+      break;
+    }
+    }
+  }
+
+  // Simplify comparisons involving max/min.
+  Value *A, *B;
+  CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
+  CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
+
+  // Signed variants on "max(a,b)>=a -> true".
+  if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+    EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+    // We analyze this as smax(A, B) pred A.
+    P = Pred;
+  } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+    EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+    // We analyze this as smax(A, B) swapped-pred A.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+             (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+    EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+    // We analyze this as smax(-A, -B) swapped-pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+    EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+    // We analyze this as smax(-A, -B) pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = Pred;
+  }
+  if (P != CmpInst::BAD_ICMP_PREDICATE) {
+    // Cases correspond to "max(A, B) p A".
+    switch (P) {
+    default:
+      break;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_SLE:
+      // Equivalent to "A EqP B".  This may be the same as the condition tested
+      // in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+        return V;
+      // Otherwise, see if "A EqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    case CmpInst::ICMP_NE:
+    case CmpInst::ICMP_SGT: {
+      CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+      // Equivalent to "A InvEqP B".  This may be the same as the condition
+      // tested in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+        return V;
+      // Otherwise, see if "A InvEqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    }
+    case CmpInst::ICMP_SGE:
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    case CmpInst::ICMP_SLT:
+      // Always false.
+      return Constant::getNullValue(ITy);
+    }
+  }
+
+  // Unsigned variants on "max(a,b)>=a -> true".
+  P = CmpInst::BAD_ICMP_PREDICATE;
+  if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+    EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+    // We analyze this as umax(A, B) pred A.
+    P = Pred;
+  } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+    EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+    // We analyze this as umax(A, B) swapped-pred A.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+             (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+    EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+    // We analyze this as umax(-A, -B) swapped-pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+    EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+    // We analyze this as umax(-A, -B) pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = Pred;
+  }
+  if (P != CmpInst::BAD_ICMP_PREDICATE) {
+    // Cases correspond to "max(A, B) p A".
+    switch (P) {
+    default:
+      break;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_ULE:
+      // Equivalent to "A EqP B".  This may be the same as the condition tested
+      // in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+        return V;
+      // Otherwise, see if "A EqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    case CmpInst::ICMP_NE:
+    case CmpInst::ICMP_UGT: {
+      CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+      // Equivalent to "A InvEqP B".  This may be the same as the condition
+      // tested in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+        return V;
+      // Otherwise, see if "A InvEqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    }
+    case CmpInst::ICMP_UGE:
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    case CmpInst::ICMP_ULT:
+      // Always false.
+      return Constant::getNullValue(ITy);
+    }
+  }
+
+  // Variants on "max(x,y) >= min(x,z)".
+  Value *C, *D;
+  if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
+      match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
+      (A == C || A == D || B == C || B == D)) {
+    // max(x, ?) pred min(x, ?).
+    if (Pred == CmpInst::ICMP_SGE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_SLT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+             match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // min(x, ?) pred max(x, ?).
+    if (Pred == CmpInst::ICMP_SLE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_SGT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
+             match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // max(x, ?) pred min(x, ?).
+    if (Pred == CmpInst::ICMP_UGE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_ULT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+             match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // min(x, ?) pred max(x, ?).
+    if (Pred == CmpInst::ICMP_ULE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_UGT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  }
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  // Fold trivial predicates.
+  if (Pred == FCmpInst::FCMP_FALSE)
+    return ConstantInt::get(GetCompareTy(LHS), 0);
+  if (Pred == FCmpInst::FCMP_TRUE)
+    return ConstantInt::get(GetCompareTy(LHS), 1);
+
+  if (isa<UndefValue>(RHS))                  // fcmp pred X, undef -> undef
+    return UndefValue::get(GetCompareTy(LHS));
+
+  // fcmp x,x -> true/false.  Not all compares are foldable.
+  if (LHS == RHS) {
+    if (CmpInst::isTrueWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 1);
+    if (CmpInst::isFalseWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 0);
+  }
+
+  // Handle fcmp with constant RHS
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // If the constant is a nan, see if we can fold the comparison based on it.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->getValueAPF().isNaN()) {
+        if (FCmpInst::isOrdered(Pred))   // True "if ordered and foo"
+          return ConstantInt::getFalse(CFP->getContext());
+        assert(FCmpInst::isUnordered(Pred) &&
+               "Comparison must be either ordered or unordered!");
+        // True if unordered.
+        return ConstantInt::getTrue(CFP->getContext());
+      }
+      // Check whether the constant is an infinity.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          switch (Pred) {
+          case FCmpInst::FCMP_OLT:
+            // No value is ordered and less than negative infinity.
+            return ConstantInt::getFalse(CFP->getContext());
+          case FCmpInst::FCMP_UGE:
+            // All values are unordered with or at least negative infinity.
+            return ConstantInt::getTrue(CFP->getContext());
+          default:
+            break;
+          }
+        } else {
+          switch (Pred) {
+          case FCmpInst::FCMP_OGT:
+            // No value is ordered and greater than infinity.
+            return ConstantInt::getFalse(CFP->getContext());
+          case FCmpInst::FCMP_ULE:
+            // All values are unordered with and at most infinity.
+            return ConstantInt::getTrue(CFP->getContext());
+          default:
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result.  If not, this returns null.
+Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
+                                const TargetData *TD, const DominatorTree *) {
+  // select true, X, Y  -> X
+  // select false, X, Y -> Y
+  if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
+    return CB->getZExtValue() ? TrueVal : FalseVal;
+
+  // select C, X, X -> X
+  if (TrueVal == FalseVal)
+    return TrueVal;
+
+  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
+    if (isa<Constant>(TrueVal))
+      return TrueVal;
+    return FalseVal;
+  }
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return FalseVal;
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return TrueVal;
+
+  return 0;
+}
+
+/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
+                             const TargetData *TD, const DominatorTree *) {
+  // The type of the GEP pointer operand.
+  const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+
+  // getelementptr P -> P.
+  if (NumOps == 1)
+    return Ops[0];
+
+  if (isa<UndefValue>(Ops[0])) {
+    // Compute the (pointer) type returned by the GEP instruction.
+    const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1],
+                                                             NumOps-1);
+    const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+    return UndefValue::get(GEPTy);
+  }
+
+  if (NumOps == 2) {
+    // getelementptr P, 0 -> P.
+    if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
+      if (C->isZero())
+        return Ops[0];
+    // getelementptr P, N -> P if P points to a type of zero size.
+    if (TD) {
+      const Type *Ty = PtrTy->getElementType();
+      if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+        return Ops[0];
+    }
+  }
+
+  // Check to see if this is constant foldable.
+  for (unsigned i = 0; i != NumOps; ++i)
+    if (!isa<Constant>(Ops[i]))
+      return 0;
+
+  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
+                                        (Constant *const*)Ops+1, NumOps-1);
+}
+
+/// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+  // If all of the PHI's incoming values are the same then replace the PHI node
+  // with the common value.
+  Value *CommonValue = 0;
+  bool HasUndefInput = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PN->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PN) continue;
+    if (isa<UndefValue>(Incoming)) {
+      // Remember that we saw an undef value, but otherwise ignore them.
+      HasUndefInput = true;
+      continue;
+    }
+    if (CommonValue && Incoming != CommonValue)
+      return 0;  // Not the same, bail out.
+    CommonValue = Incoming;
+  }
+
+  // If CommonValue is null then all of the incoming values were either undef or
+  // equal to the phi node itself.
+  if (!CommonValue)
+    return UndefValue::get(PN->getType());
+
+  // If we have a PHI node like phi(X, undef, X), where X is defined by some
+  // instruction, we cannot return X as the result of the PHI node unless it
+  // dominates the PHI block.
+  if (HasUndefInput)
+    return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+
+  return CommonValue;
+}
+
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                            const TargetData *TD, const DominatorTree *DT,
+                            unsigned MaxRecurse) {
+  switch (Opcode) {
+  case Instruction::Add:
+    return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::Sub:
+    return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Shl:
+    return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::LShr:
+    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+  case Instruction::AShr:
+    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+  default:
+    if (Constant *CLHS = dyn_cast<Constant>(LHS))
+      if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+        Constant *COps[] = {CLHS, CRHS};
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
+      }
+
+    // If the operation is associative, try some generic simplifications.
+    if (Instruction::isAssociative(Opcode))
+      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+                                              MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a select instruction, check whether
+    // operating on either branch of the select always yields the same value.
+    if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+                                           MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a phi instruction, check whether
+    // operating on all incoming values of the phi always yields the same value.
+    if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+        return V;
+
+    return 0;
+  }
+}
+
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                           const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+    return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+}
+
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction.  If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+                                 const DominatorTree *DT) {
+  Value *Result;
+
+  switch (I->getOpcode()) {
+  default:
+    Result = ConstantFoldInstruction(I, TD);
+    break;
+  case Instruction::Add:
+    Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::Sub:
+    Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::Mul:
+    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::SDiv:
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::UDiv:
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FDiv:
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::SRem:
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::URem:
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FRem:
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Shl:
+    Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::LShr:
+    Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(),
+                              TD, DT);
+    break;
+  case Instruction::AShr:
+    Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(),
+                              TD, DT);
+    break;
+  case Instruction::And:
+    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Or:
+    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Xor:
+    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::ICmp:
+    Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FCmp:
+    Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Select:
+    Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+                                I->getOperand(2), TD, DT);
+    break;
+  case Instruction::GetElementPtr: {
+    SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+    Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT);
+    break;
+  }
+  case Instruction::PHI:
+    Result = SimplifyPHINode(cast<PHINode>(I), DT);
+    break;
+  }
+
+  /// If called on unreachable code, the above logic may report that the
+  /// instruction simplified to itself.  Make life easier for users by
+  /// detecting that case here, returning a safe value instead.
+  return Result == I ? UndefValue::get(I->getType()) : Result;
+}
+
+/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+/// delete the From instruction.  In addition to a basic RAUW, this does a
+/// recursive simplification of the newly formed instructions.  This catches
+/// things where one simplification exposes other opportunities.  This only
+/// simplifies and deletes scalar operations, it does not change the CFG.
+///
+void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+                                     const TargetData *TD,
+                                     const DominatorTree *DT) {
+  assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
+
+  // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
+  // we can know if it gets deleted out from under us or replaced in a
+  // recursive simplification.
+  WeakVH FromHandle(From);
+  WeakVH ToHandle(To);
+
+  while (!From->use_empty()) {
+    // Update the instruction to use the new value.
+    Use &TheUse = From->use_begin().getUse();
+    Instruction *User = cast<Instruction>(TheUse.getUser());
+    TheUse = To;
+
+    // Check to see if the instruction can be folded due to the operand
+    // replacement.  For example changing (or X, Y) into (or X, -1) can replace
+    // the 'or' with -1.
+    Value *SimplifiedVal;
+    {
+      // Sanity check to make sure 'User' doesn't dangle across
+      // SimplifyInstruction.
+      AssertingVH<> UserHandle(User);
+
+      SimplifiedVal = SimplifyInstruction(User, TD, DT);
+      if (SimplifiedVal == 0) continue;
+    }
+
+    // Recursively simplify this user to the new value.
+    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
+    From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
+    To = ToHandle;
+
+    assert(ToHandle && "To value deleted by recursive simplification?");
+
+    // If the recursive simplification ended up revisiting and deleting
+    // 'From' then we're done.
+    if (From == 0)
+      return;
+  }
+
+  // If 'From' has value handles referring to it, do a real RAUW to update them.
+  From->replaceAllUsesWith(To);
+
+  From->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp
new file mode 100644
index 000000000000..ca9cdcaf2464
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Interval.cpp
@@ -0,0 +1,58 @@
+//===- Interval.cpp - Interval class code ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the Interval class, which represents a
+// partition of a control flow graph of some kind.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Interval Implementation
+//===----------------------------------------------------------------------===//
+
+// isLoop - Find out if there is a back edge in this interval...
+//
+bool Interval::isLoop() const {
+  // There is a loop in this interval iff one of the predecessors of the header
+  // node lives in the interval.
+  for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
+       I != E; ++I)
+    if (contains(*I))
+      return true;
+  return false;
+}
+
+
+void Interval::print(raw_ostream &OS) const {
+  OS << "-------------------------------------------------------------\n"
+       << "Interval Contents:\n";
+
+  // Print out all of the basic blocks in the interval...
+  for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
+         E = Nodes.end(); I != E; ++I)
+    OS << **I << "\n";
+
+  OS << "Interval Predecessors:\n";
+  for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
+         E = Predecessors.end(); I != E; ++I)
+    OS << **I << "\n";
+
+  OS << "Interval Successors:\n";
+  for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
+         E = Successors.end(); I != E; ++I)
+    OS << **I << "\n";
+}
diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 000000000000..2e259b147b8b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,114 @@
+//===- IntervalPartition.cpp - Interval Partition module code -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the IntervalPartition class, which
+// calculates and represent the interval partition of a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IntervalIterator.h"
+using namespace llvm;
+
+char IntervalPartition::ID = 0;
+INITIALIZE_PASS(IntervalPartition, "intervals",
+                "Interval Partition Construction", true, true)
+
+//===----------------------------------------------------------------------===//
+// IntervalPartition Implementation
+//===----------------------------------------------------------------------===//
+
+// releaseMemory - Reset state back to before function was analyzed
+void IntervalPartition::releaseMemory() {
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    delete Intervals[i];
+  IntervalMap.clear();
+  Intervals.clear();
+  RootInterval = 0;
+}
+
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
+  for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    Intervals[i]->print(O);
+}
+
+// addIntervalToPartition - Add an interval to the internal list of intervals,
+// and then add mappings from all of the basic blocks in the interval to the
+// interval itself (in the IntervalMap).
+//
+void IntervalPartition::addIntervalToPartition(Interval *I) {
+  Intervals.push_back(I);
+
+  // Add mappings for all of the basic blocks in I to the IntervalPartition
+  for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end();
+       It != End; ++It)
+    IntervalMap.insert(std::make_pair(*It, I));
+}
+
+// updatePredecessors - Interval generation only sets the successor fields of
+// the interval data structures.  After interval generation is complete,
+// run through all of the intervals and propagate successor info as
+// predecessor info.
+//
+void IntervalPartition::updatePredecessors(Interval *Int) {
+  BasicBlock *Header = Int->getHeaderNode();
+  for (Interval::succ_iterator I = Int->Successors.begin(),
+         E = Int->Successors.end(); I != E; ++I)
+    getBlockInterval(*I)->Predecessors.push_back(Header);
+}
+
+// IntervalPartition ctor - Build the first level interval partition for the
+// specified function...
+//
+bool IntervalPartition::runOnFunction(Function &F) {
+  // Pass false to intervals_begin because we take ownership of it's memory
+  function_interval_iterator I = intervals_begin(&F, false);
+  assert(I != intervals_end(&F) && "No intervals in function!?!?!");
+
+  addIntervalToPartition(RootInterval = *I);
+
+  ++I;  // After the first one...
+
+  // Add the rest of the intervals to the partition.
+  for (function_interval_iterator E = intervals_end(&F); I != E; ++I)
+    addIntervalToPartition(*I);
+
+  // Now that we know all of the successor information, propagate this to the
+  // predecessors for each block.
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    updatePredecessors(Intervals[i]);
+  return false;
+}
+
+
+// IntervalPartition ctor - Build a reduced interval partition from an
+// existing interval graph.  This takes an additional boolean parameter to
+// distinguish it from a copy constructor.  Always pass in false for now.
+//
+IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
+  : FunctionPass(ID) {
+  assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
+
+  // Pass false to intervals_begin because we take ownership of it's memory
+  interval_part_interval_iterator I = intervals_begin(IP, false);
+  assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!");
+
+  addIntervalToPartition(RootInterval = *I);
+
+  ++I;  // After the first one...
+
+  // Add the rest of the intervals to the partition.
+  for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I)
+    addIntervalToPartition(*I);
+
+  // Now that we know all of the successor information, propagate this to the
+  // predecessors for each block.
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    updatePredecessors(Intervals[i]);
+}
+
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 000000000000..6e275978276d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,1128 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazy-value-info"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+#include <stack>
+using namespace llvm;
+
+char LazyValueInfo::ID = 0;
+INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+                "Lazy Value Information Analysis", false, true)
+
+namespace llvm {
+  FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
+/// value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+  enum LatticeValueTy {
+    /// undefined - This Value has no known value yet.
+    undefined,
+    
+    /// constant - This Value has a specific constant value.
+    constant,
+    /// notconstant - This Value is known to not have the specified value.
+    notconstant,
+    
+    /// constantrange - The Value falls within this range.
+    constantrange,
+    
+    /// overdefined - This value is not known to be constant, and we know that
+    /// it has a value.
+    overdefined
+  };
+  
+  /// Val: This stores the current lattice value along with the Constant* for
+  /// the constant if this is a 'constant' or 'notconstant' value.
+  LatticeValueTy Tag;
+  Constant *Val;
+  ConstantRange Range;
+  
+public:
+  LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
+
+  static LVILatticeVal get(Constant *C) {
+    LVILatticeVal Res;
+    if (!isa<UndefValue>(C))
+      Res.markConstant(C);
+    return Res;
+  }
+  static LVILatticeVal getNot(Constant *C) {
+    LVILatticeVal Res;
+    if (!isa<UndefValue>(C))
+      Res.markNotConstant(C);
+    return Res;
+  }
+  static LVILatticeVal getRange(ConstantRange CR) {
+    LVILatticeVal Res;
+    Res.markConstantRange(CR);
+    return Res;
+  }
+  
+  bool isUndefined() const     { return Tag == undefined; }
+  bool isConstant() const      { return Tag == constant; }
+  bool isNotConstant() const   { return Tag == notconstant; }
+  bool isConstantRange() const { return Tag == constantrange; }
+  bool isOverdefined() const   { return Tag == overdefined; }
+  
+  Constant *getConstant() const {
+    assert(isConstant() && "Cannot get the constant of a non-constant!");
+    return Val;
+  }
+  
+  Constant *getNotConstant() const {
+    assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+    return Val;
+  }
+  
+  ConstantRange getConstantRange() const {
+    assert(isConstantRange() &&
+           "Cannot get the constant-range of a non-constant-range!");
+    return Range;
+  }
+  
+  /// markOverdefined - Return true if this is a change in status.
+  bool markOverdefined() {
+    if (isOverdefined())
+      return false;
+    Tag = overdefined;
+    return true;
+  }
+
+  /// markConstant - Return true if this is a change in status.
+  bool markConstant(Constant *V) {
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()));
+    if (isa<UndefValue>(V))
+      return false;
+
+    assert((!isConstant() || getConstant() == V) &&
+           "Marking constant with different value");
+    assert(isUndefined());
+    Tag = constant;
+    Val = V;
+    return true;
+  }
+  
+  /// markNotConstant - Return true if this is a change in status.
+  bool markNotConstant(Constant *V) {
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+    if (isa<UndefValue>(V))
+      return false;
+
+    assert((!isConstant() || getConstant() != V) &&
+           "Marking constant !constant with same value");
+    assert((!isNotConstant() || getNotConstant() == V) &&
+           "Marking !constant with different value");
+    assert(isUndefined() || isConstant());
+    Tag = notconstant;
+    Val = V;
+    return true;
+  }
+  
+  /// markConstantRange - Return true if this is a change in status.
+  bool markConstantRange(const ConstantRange NewR) {
+    if (isConstantRange()) {
+      if (NewR.isEmptySet())
+        return markOverdefined();
+      
+      bool changed = Range == NewR;
+      Range = NewR;
+      return changed;
+    }
+    
+    assert(isUndefined());
+    if (NewR.isEmptySet())
+      return markOverdefined();
+    
+    Tag = constantrange;
+    Range = NewR;
+    return true;
+  }
+  
+  /// mergeIn - Merge the specified lattice value into this one, updating this
+  /// one and returning true if anything changed.
+  bool mergeIn(const LVILatticeVal &RHS) {
+    if (RHS.isUndefined() || isOverdefined()) return false;
+    if (RHS.isOverdefined()) return markOverdefined();
+
+    if (isUndefined()) {
+      Tag = RHS.Tag;
+      Val = RHS.Val;
+      Range = RHS.Range;
+      return true;
+    }
+
+    if (isConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
+          return false;
+        return markOverdefined();
+      }
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
+          return markOverdefined();
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        // FIXME: use TargetData for smarter constant folding.
+        if (ConstantInt *Res = dyn_cast<ConstantInt>(
+                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+                                                getConstant(),
+                                                RHS.getNotConstant())))
+          if (Res->isOne())
+            return markNotConstant(RHS.getNotConstant());
+
+        return markOverdefined();
+      }
+
+      // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+      // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+      // a function. The correct result is to pick up RHS.
+
+      return markOverdefined();
+    }
+
+    if (isNotConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
+          return markOverdefined();
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        // FIXME: use TargetData for smarter constant folding.
+        if (ConstantInt *Res = dyn_cast<ConstantInt>(
+                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+                                                getNotConstant(),
+                                                RHS.getConstant())))
+          if (Res->isOne())
+            return false;
+
+        return markOverdefined();
+      }
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
+          return false;
+        return markOverdefined();
+      }
+
+      return markOverdefined();
+    }
+
+    assert(isConstantRange() && "New LVILattice type?");
+    if (!RHS.isConstantRange())
+      return markOverdefined();
+
+    ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+    if (NewR.isFullSet())
+      return markOverdefined();
+    return markConstantRange(NewR);
+  }
+};
+  
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
+    LLVM_ATTRIBUTE_USED;
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+  if (Val.isUndefined())
+    return OS << "undefined";
+  if (Val.isOverdefined())
+    return OS << "overdefined";
+
+  if (Val.isNotConstant())
+    return OS << "notconstant<" << *Val.getNotConstant() << '>';
+  else if (Val.isConstantRange())
+    return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+              << Val.getConstantRange().getUpper() << '>';
+  return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+//                          LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// LVIValueHandle - A callback value handle update the cache when
+  /// values are erased.
+  class LazyValueInfoCache;
+  struct LVIValueHandle : public CallbackVH {
+    LazyValueInfoCache *Parent;
+      
+    LVIValueHandle(Value *V, LazyValueInfoCache *P)
+      : CallbackVH(V), Parent(P) { }
+      
+    void deleted();
+    void allUsesReplacedWith(Value *V) {
+      deleted();
+    }
+  };
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<LVIValueHandle> {
+    typedef DenseMapInfo<Value*> PointerInfo;
+    static inline LVIValueHandle getEmptyKey() {
+      return LVIValueHandle(PointerInfo::getEmptyKey(),
+                            static_cast<LazyValueInfoCache*>(0));
+    }
+    static inline LVIValueHandle getTombstoneKey() {
+      return LVIValueHandle(PointerInfo::getTombstoneKey(),
+                            static_cast<LazyValueInfoCache*>(0));
+    }
+    static unsigned getHashValue(const LVIValueHandle &Val) {
+      return PointerInfo::getHashValue(Val);
+    }
+    static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
+      return LHS == RHS;
+    }
+  };
+  
+  template<>
+  struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
+    typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
+    typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
+    typedef DenseMapInfo<Value*> BPointerInfo;
+    static inline PairTy getEmptyKey() {
+      return std::make_pair(APointerInfo::getEmptyKey(),
+                            BPointerInfo::getEmptyKey());
+    }
+    static inline PairTy getTombstoneKey() {
+      return std::make_pair(APointerInfo::getTombstoneKey(), 
+                            BPointerInfo::getTombstoneKey());
+    }
+    static unsigned getHashValue( const PairTy &Val) {
+      return APointerInfo::getHashValue(Val.first) ^ 
+             BPointerInfo::getHashValue(Val.second);
+    }
+    static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
+      return APointerInfo::isEqual(LHS.first, RHS.first) &&
+             BPointerInfo::isEqual(LHS.second, RHS.second);
+    }
+  };
+}
+
+namespace { 
+  /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+  /// maintains information about queries across the clients' queries.
+  class LazyValueInfoCache {
+    /// ValueCacheEntryTy - This is all of the cached block information for
+    /// exactly one Value*.  The entries are sorted by the BasicBlock* of the
+    /// entries, allowing us to do a lookup with a binary search.
+    typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+
+    /// ValueCache - This is all of the cached information for all values,
+    /// mapped from Value* to key information.
+    DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+    
+    /// OverDefinedCache - This tracks, on a per-block basis, the set of 
+    /// values that are over-defined at the end of that block.  This is required
+    /// for cache updating.
+    typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+    DenseSet<OverDefinedPairTy> OverDefinedCache;
+    
+    /// BlockValueStack - This stack holds the state of the value solver
+    /// during a query.  It basically emulates the callstack of the naive
+    /// recursive value lookup process.
+    std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+    
+    friend struct LVIValueHandle;
+    
+    /// OverDefinedCacheUpdater - A helper object that ensures that the
+    /// OverDefinedCache is updated whenever solveBlockValue returns.
+    struct OverDefinedCacheUpdater {
+      LazyValueInfoCache *Parent;
+      Value *Val;
+      BasicBlock *BB;
+      LVILatticeVal &BBLV;
+      
+      OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
+                       LazyValueInfoCache *P)
+        : Parent(P), Val(V), BB(B), BBLV(LV) { }
+      
+      bool markResult(bool changed) { 
+        if (changed && BBLV.isOverdefined())
+          Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
+        return changed;
+      }
+    };
+    
+
+
+    LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+    bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+                      LVILatticeVal &Result);
+    bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+    // These methods process one work item and may add more. A false value
+    // returned means that the work item was not completely processed and must
+    // be revisited after going through the new items.
+    bool solveBlockValue(Value *Val, BasicBlock *BB);
+    bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                 Value *Val, BasicBlock *BB);
+    bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                PHINode *PN, BasicBlock *BB);
+    bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                      Instruction *BBI, BasicBlock *BB);
+
+    void solve();
+    
+    ValueCacheEntryTy &lookup(Value *V) {
+      return ValueCache[LVIValueHandle(V, this)];
+    }
+
+  public:
+    /// getValueInBlock - This is the query interface to determine the lattice
+    /// value for the specified Value* at the end of the specified block.
+    LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+
+    /// getValueOnEdge - This is the query interface to determine the lattice
+    /// value for the specified Value* that is true on the specified edge.
+    LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+    
+    /// threadEdge - This is the update interface to inform the cache that an
+    /// edge from PredBB to OldSucc has been threaded to be from PredBB to
+    /// NewSucc.
+    void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+    
+    /// eraseBlock - This is part of the update interface to inform the cache
+    /// that a block has been deleted.
+    void eraseBlock(BasicBlock *BB);
+    
+    /// clear - Empty the cache.
+    void clear() {
+      ValueCache.clear();
+      OverDefinedCache.clear();
+    }
+  };
+} // end anonymous namespace
+
+void LVIValueHandle::deleted() {
+  typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+  
+  SmallVector<OverDefinedPairTy, 4> ToErase;
+  for (DenseSet<OverDefinedPairTy>::iterator 
+       I = Parent->OverDefinedCache.begin(),
+       E = Parent->OverDefinedCache.end();
+       I != E; ++I) {
+    if (I->second == getValPtr())
+      ToErase.push_back(*I);
+  }
+  
+  for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+       E = ToErase.end(); I != E; ++I)
+    Parent->OverDefinedCache.erase(*I);
+  
+  // This erasure deallocates *this, so it MUST happen after we're done
+  // using any and all members of *this.
+  Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+  SmallVector<OverDefinedPairTy, 4> ToErase;
+  for (DenseSet<OverDefinedPairTy>::iterator  I = OverDefinedCache.begin(),
+       E = OverDefinedCache.end(); I != E; ++I) {
+    if (I->first == BB)
+      ToErase.push_back(*I);
+  }
+  
+  for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+       E = ToErase.end(); I != E; ++I)
+    OverDefinedCache.erase(*I);
+
+  for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
+       I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+    I->second.erase(BB);
+}
+
+void LazyValueInfoCache::solve() {
+  while (!BlockValueStack.empty()) {
+    std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+    if (solveBlockValue(e.second, e.first))
+      BlockValueStack.pop();
+  }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (isa<Constant>(Val))
+    return true;
+
+  LVIValueHandle ValHandle(Val, this);
+  if (!ValueCache.count(ValHandle)) return false;
+  return ValueCache[ValHandle].count(BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val))
+    return LVILatticeVal::get(VC);
+
+  return lookup(Val)[BB];
+}
+
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+  if (isa<Constant>(Val))
+    return true;
+
+  ValueCacheEntryTy &Cache = lookup(Val);
+  LVILatticeVal &BBLV = Cache[BB];
+  
+  // OverDefinedCacheUpdater is a helper object that will update
+  // the OverDefinedCache for us when this method exits.  Make sure to
+  // call markResult on it as we exist, passing a bool to indicate if the
+  // cache needs updating, i.e. if we have solve a new value or not.
+  OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
+
+  // If we've already computed this block's value, return it.
+  if (!BBLV.isUndefined()) {
+    DEBUG(dbgs() << "  reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
+    
+    // Since we're reusing a cached value here, we don't need to update the 
+    // OverDefinedCahce.  The cache will have been properly updated 
+    // whenever the cached value was inserted.
+    ODCacheUpdater.markResult(false);
+    return true;
+  }
+
+  // Otherwise, this is the first time we're seeing this block.  Reset the
+  // lattice value to overdefined, so that cycles will terminate and be
+  // conservatively correct.
+  BBLV.markOverdefined();
+  
+  Instruction *BBI = dyn_cast<Instruction>(Val);
+  if (BBI == 0 || BBI->getParent() != BB) {
+    return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
+  }
+
+  if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+    return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
+  }
+
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
+    BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+    return ODCacheUpdater.markResult(true);
+  }
+
+  // We can only analyze the definitions of certain classes of instructions
+  // (integral binops and casts at the moment), so bail if this isn't one.
+  LVILatticeVal Result;
+  if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+     !BBI->getType()->isIntegerTy()) {
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+    BBLV.markOverdefined();
+    return ODCacheUpdater.markResult(true);
+  }
+
+  // FIXME: We're currently limited to binops with a constant RHS.  This should
+  // be improved.
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+  if (BO && !isa<ConstantInt>(BO->getOperand(1))) { 
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+
+    BBLV.markOverdefined();
+    return ODCacheUpdater.markResult(true);
+  }
+
+  return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+    return L->getPointerAddressSpace() == 0 &&
+        GetUnderlyingObject(L->getPointerOperand()) ==
+        GetUnderlyingObject(Ptr);
+  }
+  if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+    return S->getPointerAddressSpace() == 0 &&
+        GetUnderlyingObject(S->getPointerOperand()) ==
+        GetUnderlyingObject(Ptr);
+  }
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+    if (MI->isVolatile()) return false;
+
+    // FIXME: check whether it has a valuerange that excludes zero?
+    ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+    if (!Len || Len->isZero()) return false;
+
+    if (MI->getDestAddressSpace() == 0)
+      if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+        return true;
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+      if (MTI->getSourceAddressSpace() == 0)
+        if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr)
+          return true;
+  }
+  return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                                 Value *Val, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // If this is a pointer, and there's a load from that pointer in this BB,
+  // then we know that the pointer can't be NULL.
+  bool NotNull = false;
+  if (Val->getType()->isPointerTy()) {
+    if (isa<AllocaInst>(Val)) {
+      NotNull = true;
+    } else {
+      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+        if (InstructionDereferencesPointer(BI, Val)) {
+          NotNull = true;
+          break;
+        }
+      }
+    }
+  }
+
+  // If this is the entry block, we must be asking about an argument.  The
+  // value is overdefined.
+  if (BB == &BB->getParent()->getEntryBlock()) {
+    assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+    if (NotNull) {
+      const PointerType *PTy = cast<PointerType>(Val->getType());
+      Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+    } else {
+      Result.markOverdefined();
+    }
+    BBLV = Result;
+    return true;
+  }
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    LVILatticeVal EdgeResult;
+    EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+    if (EdgesMissing)
+      continue;
+
+    Result.mergeIn(EdgeResult);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+      // If we previously determined that this is a pointer that can't be null
+      // then return that rather than giving up entirely.
+      if (NotNull) {
+        const PointerType *PTy = cast<PointerType>(Val->getType());
+        Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+      }
+      
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined());
+  BBLV = Result;
+  return true;
+}
+  
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                                PHINode *PN, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PhiBB = PN->getIncomingBlock(i);
+    Value *PhiVal = PN->getIncomingValue(i);
+    LVILatticeVal EdgeResult;
+    EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+    if (EdgesMissing)
+      continue;
+
+    Result.mergeIn(EdgeResult);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+      
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+  BBLV = Result;
+  return true;
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                                      Instruction *BBI,
+                                                      BasicBlock *BB) {
+  // Figure out the range of the LHS.  If that fails, bail.
+  if (!hasBlockValue(BBI->getOperand(0), BB)) {
+    BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
+    return false;
+  }
+
+  LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+  if (!LHSVal.isConstantRange()) {
+    BBLV.markOverdefined();
+    return true;
+  }
+  
+  ConstantRange LHSRange = LHSVal.getConstantRange();
+  ConstantRange RHSRange(1);
+  const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+  if (isa<BinaryOperator>(BBI)) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+      RHSRange = ConstantRange(RHS->getValue());
+    } else {
+      BBLV.markOverdefined();
+      return true;
+    }
+  }
+
+  // NOTE: We're currently limited by the set of operations that ConstantRange
+  // can evaluate symbolically.  Enhancing that set will allows us to analyze
+  // more definitions.
+  LVILatticeVal Result;
+  switch (BBI->getOpcode()) {
+  case Instruction::Add:
+    Result.markConstantRange(LHSRange.add(RHSRange));
+    break;
+  case Instruction::Sub:
+    Result.markConstantRange(LHSRange.sub(RHSRange));
+    break;
+  case Instruction::Mul:
+    Result.markConstantRange(LHSRange.multiply(RHSRange));
+    break;
+  case Instruction::UDiv:
+    Result.markConstantRange(LHSRange.udiv(RHSRange));
+    break;
+  case Instruction::Shl:
+    Result.markConstantRange(LHSRange.shl(RHSRange));
+    break;
+  case Instruction::LShr:
+    Result.markConstantRange(LHSRange.lshr(RHSRange));
+    break;
+  case Instruction::Trunc:
+    Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+    break;
+  case Instruction::SExt:
+    Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+    break;
+  case Instruction::ZExt:
+    Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+    break;
+  case Instruction::BitCast:
+    Result.markConstantRange(LHSRange);
+    break;
+  case Instruction::And:
+    Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+    break;
+  case Instruction::Or:
+    Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+    break;
+  
+  // Unhandled instructions are overdefined.
+  default:
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+    Result.markOverdefined();
+    break;
+  }
+  
+  BBLV = Result;
+  return true;
+}
+
+/// getEdgeValue - This method attempts to infer more complex 
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+                                      BasicBlock *BBTo, LVILatticeVal &Result) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val)) {
+    Result = LVILatticeVal::get(VC);
+    return true;
+  }
+  
+  // TODO: Handle more complex conditionals.  If (v == 0 || v2 < 1) is false, we
+  // know that v != 0.
+  if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+    // If this is a conditional branch and only one successor goes to BBTo, then
+    // we maybe able to infer something from the condition. 
+    if (BI->isConditional() &&
+        BI->getSuccessor(0) != BI->getSuccessor(1)) {
+      bool isTrueDest = BI->getSuccessor(0) == BBTo;
+      assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+             "BBTo isn't a successor of BBFrom");
+      
+      // If V is the condition of the branch itself, then we know exactly what
+      // it is.
+      if (BI->getCondition() == Val) {
+        Result = LVILatticeVal::get(ConstantInt::get(
+                              Type::getInt1Ty(Val->getContext()), isTrueDest));
+        return true;
+      }
+      
+      // If the condition of the branch is an equality comparison, we may be
+      // able to infer the value.
+      ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+      if (ICI && ICI->getOperand(0) == Val &&
+          isa<Constant>(ICI->getOperand(1))) {
+        if (ICI->isEquality()) {
+          // We know that V has the RHS constant if this is a true SETEQ or
+          // false SETNE. 
+          if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+            Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+          else
+            Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+          return true;
+        }
+
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+          // Calculate the range of values that would satisfy the comparison.
+          ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
+          ConstantRange TrueValues =
+            ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+          // If we're interested in the false dest, invert the condition.
+          if (!isTrueDest) TrueValues = TrueValues.inverse();
+          
+          // Figure out the possible values of the query BEFORE this branch.  
+          if (!hasBlockValue(Val, BBFrom)) {
+            BlockValueStack.push(std::make_pair(BBFrom, Val));
+            return false;
+          }
+          
+          LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+          if (!InBlock.isConstantRange()) {
+            Result = LVILatticeVal::getRange(TrueValues);
+            return true;
+          }
+
+          // Find all potential values that satisfy both the input and output
+          // conditions.
+          ConstantRange PossibleValues =
+            TrueValues.intersectWith(InBlock.getConstantRange());
+
+          Result = LVILatticeVal::getRange(PossibleValues);
+          return true;
+        }
+      }
+    }
+  }
+
+  // If the edge was formed by a switch on the value, then we may know exactly
+  // what it is.
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+    if (SI->getCondition() == Val) {
+      // We don't know anything in the default case.
+      if (SI->getDefaultDest() == BBTo) {
+        Result.markOverdefined();
+        return true;
+      }
+      
+      // We only know something if there is exactly one value that goes from
+      // BBFrom to BBTo.
+      unsigned NumEdges = 0;
+      ConstantInt *EdgeVal = 0;
+      for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+        if (SI->getSuccessor(i) != BBTo) continue;
+        if (NumEdges++) break;
+        EdgeVal = SI->getCaseValue(i);
+      }
+      assert(EdgeVal && "Missing successor?");
+      if (NumEdges == 1) {
+        Result = LVILatticeVal::get(EdgeVal);
+        return true;
+      }
+    }
+  }
+  
+  // Otherwise see if the value is known in the block.
+  if (hasBlockValue(Val, BBFrom)) {
+    Result = getBlockValue(Val, BBFrom);
+    return true;
+  }
+  BlockValueStack.push(std::make_pair(BBFrom, Val));
+  return false;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+  DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
+        << BB->getName() << "'\n");
+  
+  BlockValueStack.push(std::make_pair(BB, V));
+  solve();
+  LVILatticeVal Result = getBlockValue(V, BB);
+
+  DEBUG(dbgs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+  DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
+        << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+  
+  LVILatticeVal Result;
+  if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+    solve();
+    bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+    (void)WasFastQuery;
+    assert(WasFastQuery && "More work to do after problem solved?");
+  }
+
+  DEBUG(dbgs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                                    BasicBlock *NewSucc) {
+  // When an edge in the graph has been threaded, values that we could not 
+  // determine a value for before (i.e. were marked overdefined) may be possible
+  // to solve now.  We do NOT try to proactively update these values.  Instead,
+  // we clear their entries from the cache, and allow lazy updating to recompute
+  // them when needed.
+  
+  // The updating process is fairly simple: we need to dropped cached info
+  // for all values that were marked overdefined in OldSucc, and for those same
+  // values in any successor of OldSucc (except NewSucc) in which they were
+  // also marked overdefined.
+  std::vector<BasicBlock*> worklist;
+  worklist.push_back(OldSucc);
+  
+  DenseSet<Value*> ClearSet;
+  for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+       E = OverDefinedCache.end(); I != E; ++I) {
+    if (I->first == OldSucc)
+      ClearSet.insert(I->second);
+  }
+  
+  // Use a worklist to perform a depth-first search of OldSucc's successors.
+  // NOTE: We do not need a visited list since any blocks we have already
+  // visited will have had their overdefined markers cleared already, and we
+  // thus won't loop to their successors.
+  while (!worklist.empty()) {
+    BasicBlock *ToUpdate = worklist.back();
+    worklist.pop_back();
+    
+    // Skip blocks only accessible through NewSucc.
+    if (ToUpdate == NewSucc) continue;
+    
+    bool changed = false;
+    for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
+         I != E; ++I) {
+      // If a value was marked overdefined in OldSucc, and is here too...
+      DenseSet<OverDefinedPairTy>::iterator OI =
+        OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+      if (OI == OverDefinedCache.end()) continue;
+
+      // Remove it from the caches.
+      ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+      ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+
+      assert(CI != Entry.end() && "Couldn't find entry to update?");
+      Entry.erase(CI);
+      OverDefinedCache.erase(OI);
+
+      // If we removed anything, then we potentially need to update 
+      // blocks successors too.
+      changed = true;
+    }
+
+    if (!changed) continue;
+    
+    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                            LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+/// getCache - This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl) {
+  if (!PImpl)
+    PImpl = new LazyValueInfoCache();
+  return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+  if (PImpl)
+    getCache(PImpl).clear();
+  
+  TD = getAnalysisIfAvailable<TargetData>();
+  // Fully lazy.
+  return false;
+}
+
+void LazyValueInfo::releaseMemory() {
+  // If the cache was allocated, free it.
+  if (PImpl) {
+    delete &getCache(PImpl);
+    PImpl = 0;
+  }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
+  LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+  
+  if (Result.isConstant())
+    return Result.getConstant();
+  if (Result.isConstantRange()) {
+    ConstantRange CR = Result.getConstantRange();
+    if (const APInt *SingleVal = CR.getSingleElement())
+      return ConstantInt::get(V->getContext(), *SingleVal);
+  }
+  return 0;
+}
+
+/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// constant on the specified edge.  Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+                                           BasicBlock *ToBB) {
+  LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+  
+  if (Result.isConstant())
+    return Result.getConstant();
+  if (Result.isConstantRange()) {
+    ConstantRange CR = Result.getConstantRange();
+    if (const APInt *SingleVal = CR.getSingleElement())
+      return ConstantInt::get(V->getContext(), *SingleVal);
+  }
+  return 0;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+                                  BasicBlock *FromBB, BasicBlock *ToBB) {
+  LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+  
+  // If we know the value is a constant, evaluate the conditional.
+  Constant *Res = 0;
+  if (Result.isConstant()) {
+    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+    if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
+      return ResCI->isZero() ? False : True;
+    return Unknown;
+  }
+  
+  if (Result.isConstantRange()) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(C);
+    if (!CI) return Unknown;
+    
+    ConstantRange CR = Result.getConstantRange();
+    if (Pred == ICmpInst::ICMP_EQ) {
+      if (!CR.contains(CI->getValue()))
+        return False;
+      
+      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+        return True;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      if (!CR.contains(CI->getValue()))
+        return True;
+      
+      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+        return False;
+    }
+    
+    // Handle more complex predicates.
+    ConstantRange TrueValues =
+        ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+    if (TrueValues.contains(CR))
+      return True;
+    if (TrueValues.inverse().contains(CR))
+      return False;
+    return Unknown;
+  }
+  
+  if (Result.isNotConstant()) {
+    // If this is an equality comparison, we can try to fold it knowing that
+    // "V != C1".
+    if (Pred == ICmpInst::ICMP_EQ) {
+      // !C1 == C -> false iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, TD);
+      if (Res->isNullValue())
+        return False;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      // !C1 != C -> true iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, TD);
+      if (Res->isNullValue())
+        return True;
+    }
+    return Unknown;
+  }
+  
+  return Unknown;
+}
+
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                               BasicBlock *NewSucc) {
+  if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+}
+
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+  if (PImpl) getCache(PImpl).eraseBlock(BB);
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
new file mode 100644
index 000000000000..efb722bb97c4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -0,0 +1,137 @@
+//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LibCallAliasAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallAliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+  
+// Register this pass...
+char LibCallAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
+                   "LibCall Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
+  return new LibCallAliasAnalysis(LCI);
+}
+
+LibCallAliasAnalysis::~LibCallAliasAnalysis() {
+  delete LCI;
+}
+
+void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AliasAnalysis::getAnalysisUsage(AU);
+  AU.setPreservesAll();                         // Does not transform code
+}
+
+
+
+/// AnalyzeLibCallDetails - Given a call to a function with the specified
+/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
+/// vs the specified pointer/size.
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
+                                            ImmutableCallSite CS,
+                                            const Location &Loc) {
+  // If we have a function, check to see what kind of mod/ref effects it
+  // has.  Start by including any info globally known about the function.
+  AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
+  if (MRInfo == NoModRef) return MRInfo;
+  
+  // If that didn't tell us that the function is 'readnone', check to see
+  // if we have detailed info and if 'P' is any of the locations we know
+  // about.
+  const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
+  if (Details == 0)
+    return MRInfo;
+  
+  // If the details array is of the 'DoesNot' kind, we only know something if
+  // the pointer is a match for one of the locations in 'Details'.  If we find a
+  // match, we can prove some interactions cannot happen.
+  // 
+  if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
+    // Find out if the pointer refers to a known location.
+    for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+      const LibCallLocationInfo &LocInfo =
+      LCI->getLocationInfo(Details[i].LocationID);
+      LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+      if (Res != LibCallLocationInfo::Yes) continue;
+      
+      // If we find a match against a location that we 'do not' interact with,
+      // learn this info into MRInfo.
+      return ModRefResult(MRInfo & ~Details[i].MRInfo);
+    }
+    return MRInfo;
+  }
+  
+  // If the details are of the 'DoesOnly' sort, we know something if the pointer
+  // is a match for one of the locations in 'Details'.  Also, if we can prove
+  // that the pointers is *not* one of the locations in 'Details', we know that
+  // the call is NoModRef.
+  assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
+  
+  // Find out if the pointer refers to a known location.
+  bool NoneMatch = true;
+  for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+    const LibCallLocationInfo &LocInfo =
+    LCI->getLocationInfo(Details[i].LocationID);
+    LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+    if (Res == LibCallLocationInfo::No) continue;
+    
+    // If we don't know if this pointer points to the location, then we have to
+    // assume it might alias in some case.
+    if (Res == LibCallLocationInfo::Unknown) {
+      NoneMatch = false;
+      continue;
+    }
+    
+    // If we know that this pointer definitely is pointing into the location,
+    // merge in this information.
+    return ModRefResult(MRInfo & Details[i].MRInfo);
+  }
+  
+  // If we found that the pointer is guaranteed to not match any of the
+  // locations in our 'DoesOnly' rule, then we know that the pointer must point
+  // to some other location.  Since the libcall doesn't mod/ref any other
+  // locations, return NoModRef.
+  if (NoneMatch)
+    return NoModRef;
+  
+  // Otherwise, return any other info gained so far.
+  return MRInfo;
+}
+
+// getModRefInfo - Check to see if the specified callsite can clobber the
+// specified memory object.
+//
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                    const Location &Loc) {
+  ModRefResult MRInfo = ModRef;
+  
+  // If this is a direct call to a function that LCI knows about, get the
+  // information about the runtime function.
+  if (LCI) {
+    if (const Function *F = CS.getCalledFunction()) {
+      if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
+        MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
+        if (MRInfo == NoModRef) return NoModRef;
+      }
+    }
+  }
+  
+  // The AliasAnalysis base class has some smarts, lets use them.
+  return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
+}
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
new file mode 100644
index 000000000000..81b0f46f3740
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
@@ -0,0 +1,63 @@
+//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements interfaces that can be used to describe language
+// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
+// optimizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap.  This
+/// helper does the cast.
+static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
+  return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
+}
+
+LibCallInfo::~LibCallInfo() {
+  delete getMap(Impl);
+}
+
+const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
+  // Get location info on the first call.
+  if (NumLocations == 0)
+    NumLocations = getLocationInfo(Locations);
+  
+  assert(LocID < NumLocations && "Invalid location ID!");
+  return Locations[LocID];
+}
+
+
+/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// the specified function if we have it.  If not, return null.
+const LibCallFunctionInfo *
+LibCallInfo::getFunctionInfo(const Function *F) const {
+  StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
+  
+  /// If this is the first time we are querying for this info, lazily construct
+  /// the StringMap to index it.
+  if (Map == 0) {
+    Impl = Map = new StringMap<const LibCallFunctionInfo*>();
+    
+    const LibCallFunctionInfo *Array = getFunctionInfoArray();
+    if (Array == 0) return 0;
+    
+    // We now have the array of entries.  Populate the StringMap.
+    for (unsigned i = 0; Array[i].Name; ++i)
+      (*Map)[Array[i].Name] = Array+i;
+  }
+  
+  // Look up this function in the string map.
+  return Map->lookup(F->getName());
+}
+
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
new file mode 100644
index 000000000000..89755da85097
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -0,0 +1,655 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+// 
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway. This is the main reason why most
+// of these checks live here instead of in the Verifier pass.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+// 
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+// 
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+  namespace MemRef {
+    static unsigned Read     = 1;
+    static unsigned Write    = 2;
+    static unsigned Callee   = 4;
+    static unsigned Branchee = 8;
+  }
+
+  class Lint : public FunctionPass, public InstVisitor<Lint> {
+    friend class InstVisitor<Lint>;
+
+    void visitFunction(Function &F);
+
+    void visitCallSite(CallSite CS);
+    void visitMemoryReference(Instruction &I, Value *Ptr,
+                              uint64_t Size, unsigned Align,
+                              const Type *Ty, unsigned Flags);
+
+    void visitCallInst(CallInst &I);
+    void visitInvokeInst(InvokeInst &I);
+    void visitReturnInst(ReturnInst &I);
+    void visitLoadInst(LoadInst &I);
+    void visitStoreInst(StoreInst &I);
+    void visitXor(BinaryOperator &I);
+    void visitSub(BinaryOperator &I);
+    void visitLShr(BinaryOperator &I);
+    void visitAShr(BinaryOperator &I);
+    void visitShl(BinaryOperator &I);
+    void visitSDiv(BinaryOperator &I);
+    void visitUDiv(BinaryOperator &I);
+    void visitSRem(BinaryOperator &I);
+    void visitURem(BinaryOperator &I);
+    void visitAllocaInst(AllocaInst &I);
+    void visitVAArgInst(VAArgInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
+    void visitExtractElementInst(ExtractElementInst &I);
+    void visitInsertElementInst(InsertElementInst &I);
+    void visitUnreachableInst(UnreachableInst &I);
+
+    Value *findValue(Value *V, bool OffsetOk) const;
+    Value *findValueImpl(Value *V, bool OffsetOk,
+                         SmallPtrSet<Value *, 4> &Visited) const;
+
+  public:
+    Module *Mod;
+    AliasAnalysis *AA;
+    DominatorTree *DT;
+    TargetData *TD;
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    static char ID; // Pass identification, replacement for typeid
+    Lint() : FunctionPass(ID), MessagesStr(Messages) {
+      initializeLintPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<DominatorTree>();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const {}
+
+    void WriteValue(const Value *V) {
+      if (!V) return;
+      if (isa<Instruction>(V)) {
+        MessagesStr << *V << '\n';
+      } else {
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << '\n';
+      }
+    }
+
+    // CheckFailed - A check failed, so print out the condition and the message
+    // that failed.  This provides a nice place to put a breakpoint if you want
+    // to see why something is not correct.
+    void CheckFailed(const Twine &Message,
+                     const Value *V1 = 0, const Value *V2 = 0,
+                     const Value *V3 = 0, const Value *V4 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteValue(V2);
+      WriteValue(V3);
+      WriteValue(V4);
+    }
+  };
+}
+
+char Lint::ID = 0;
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+                    false, true)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+    do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+    do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+    do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+  Mod = F.getParent();
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  visit(F);
+  dbgs() << MessagesStr.str();
+  Messages.clear();
+  return false;
+}
+
+void Lint::visitFunction(Function &F) {
+  // This isn't undefined behavior, it's just a little unusual, and it's a
+  // fairly common mistake to neglect to name a function.
+  Assert1(F.hasName() || F.hasLocalLinkage(),
+          "Unusual: Unnamed function with non-local linkage", &F);
+
+  // TODO: Check for irreducible control flow.
+}
+
+void Lint::visitCallSite(CallSite CS) {
+  Instruction &I = *CS.getInstruction();
+  Value *Callee = CS.getCalledValue();
+
+  visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
+                       0, 0, MemRef::Callee);
+
+  if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
+    Assert1(CS.getCallingConv() == F->getCallingConv(),
+            "Undefined behavior: Caller and callee calling convention differ",
+            &I);
+
+    const FunctionType *FT = F->getFunctionType();
+    unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+
+    Assert1(FT->isVarArg() ?
+              FT->getNumParams() <= NumActualArgs :
+              FT->getNumParams() == NumActualArgs,
+            "Undefined behavior: Call argument count mismatches callee "
+            "argument count", &I);
+
+    Assert1(FT->getReturnType() == I.getType(),
+            "Undefined behavior: Call return type mismatches "
+            "callee return type", &I);
+
+    // Check argument types (in case the callee was casted) and attributes.
+    // TODO: Verify that caller and callee attributes are compatible.
+    Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
+    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+    for (; AI != AE; ++AI) {
+      Value *Actual = *AI;
+      if (PI != PE) {
+        Argument *Formal = PI++;
+        Assert1(Formal->getType() == Actual->getType(),
+                "Undefined behavior: Call argument type mismatches "
+                "callee parameter type", &I);
+
+        // Check that noalias arguments don't alias other arguments. This is
+        // not fully precise because we don't know the sizes of the dereferenced
+        // memory regions.
+        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
+          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+            if (AI != BI && (*BI)->getType()->isPointerTy()) {
+              AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
+              Assert1(Result != AliasAnalysis::MustAlias &&
+                      Result != AliasAnalysis::PartialAlias,
+                      "Unusual: noalias argument aliases another argument", &I);
+            }
+
+        // Check that an sret argument points to valid memory.
+        if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
+          const Type *Ty =
+            cast<PointerType>(Formal->getType())->getElementType();
+          visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
+                               TD ? TD->getABITypeAlignment(Ty) : 0,
+                               Ty, MemRef::Read | MemRef::Write);
+        }
+      }
+    }
+  }
+
+  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+         AI != AE; ++AI) {
+      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
+      Assert1(!isa<AllocaInst>(Obj),
+              "Undefined behavior: Call with \"tail\" keyword references "
+              "alloca", &I);
+    }
+
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+    switch (II->getIntrinsicID()) {
+    default: break;
+
+    // TODO: Check more intrinsics
+
+    case Intrinsic::memcpy: {
+      MemCpyInst *MCI = cast<MemCpyInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
+                           MCI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
+                           MCI->getAlignment(), 0,
+                           MemRef::Read);
+
+      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+      // isn't expressive enough for what we really want to do. Known partial
+      // overlap is not distinguished from the case where nothing is known.
+      uint64_t Size = 0;
+      if (const ConstantInt *Len =
+            dyn_cast<ConstantInt>(findValue(MCI->getLength(),
+                                            /*OffsetOk=*/false)))
+        if (Len->getValue().isIntN(32))
+          Size = Len->getValue().getZExtValue();
+      Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+              AliasAnalysis::MustAlias,
+              "Undefined behavior: memcpy source and destination overlap", &I);
+      break;
+    }
+    case Intrinsic::memmove: {
+      MemMoveInst *MMI = cast<MemMoveInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
+                           MMI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
+                           MMI->getAlignment(), 0,
+                           MemRef::Read);
+      break;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MSI = cast<MemSetInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
+                           MSI->getAlignment(), 0,
+                           MemRef::Write);
+      break;
+    }
+
+    case Intrinsic::vastart:
+      Assert1(I.getParent()->getParent()->isVarArg(),
+              "Undefined behavior: va_start called in a non-varargs function",
+              &I);
+
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
+      break;
+    case Intrinsic::vacopy:
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read);
+      break;
+    case Intrinsic::vaend:
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
+      break;
+
+    case Intrinsic::stackrestore:
+      // Stackrestore doesn't read or write memory, but it sets the
+      // stack pointer, which the compiler may read from or write to
+      // at any time, so check it for both readability and writeability.
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
+      break;
+    }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+  Function *F = I.getParent()->getParent();
+  Assert1(!F->doesNotReturn(),
+          "Unusual: Return statement in function with noreturn attribute",
+          &I);
+
+  if (Value *V = I.getReturnValue()) {
+    Value *Obj = findValue(V, /*OffsetOk=*/true);
+    Assert1(!isa<AllocaInst>(Obj),
+            "Unusual: Returning alloca value", &I);
+  }
+}
+
+// TODO: Check that the reference is in bounds.
+// TODO: Check readnone/readonly function attributes.
+void Lint::visitMemoryReference(Instruction &I,
+                                Value *Ptr, uint64_t Size, unsigned Align,
+                                const Type *Ty, unsigned Flags) {
+  // If no memory is being referenced, it doesn't matter if the pointer
+  // is valid.
+  if (Size == 0)
+    return;
+
+  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
+  Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
+          "Undefined behavior: Null pointer dereference", &I);
+  Assert1(!isa<UndefValue>(UnderlyingObject),
+          "Undefined behavior: Undef pointer dereference", &I);
+  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+          !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+          "Unusual: All-ones pointer dereference", &I);
+  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+          !cast<ConstantInt>(UnderlyingObject)->isOne(),
+          "Unusual: Address one pointer dereference", &I);
+
+  if (Flags & MemRef::Write) {
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+      Assert1(!GV->isConstant(),
+              "Undefined behavior: Write to read-only memory", &I);
+    Assert1(!isa<Function>(UnderlyingObject) &&
+            !isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Write to text section", &I);
+  }
+  if (Flags & MemRef::Read) {
+    Assert1(!isa<Function>(UnderlyingObject),
+            "Unusual: Load from function body", &I);
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Load from block address", &I);
+  }
+  if (Flags & MemRef::Callee) {
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Call to block address", &I);
+  }
+  if (Flags & MemRef::Branchee) {
+    Assert1(!isa<Constant>(UnderlyingObject) ||
+            isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Branch to non-blockaddress", &I);
+  }
+
+  if (TD) {
+    if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty);
+
+    if (Align != 0) {
+      unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
+      APInt Mask = APInt::getAllOnesValue(BitWidth),
+                   KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+      Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
+              "Undefined behavior: Memory reference address is misaligned", &I);
+    }
+  }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(),
+                       AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+                       I.getType(), MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(),
+                       AA->getTypeStoreSize(I.getOperand(0)->getType()),
+                       I.getAlignment(),
+                       I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, TargetData *TD) {
+  // Assume undef could be zero.
+  if (isa<UndefValue>(V)) return true;
+
+  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+  APInt Mask = APInt::getAllOnesValue(BitWidth),
+               KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  return KnownZero.isAllOnesValue();
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+  if (isa<ConstantInt>(I.getArraySize()))
+    // This isn't undefined behavior, it's just an obvious pessimization.
+    Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+            "Pessimization: Static alloca outside of entry block", &I);
+
+  // TODO: Check for an unusual size (MSB set?)
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+  visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
+                       MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+  visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
+                       MemRef::Branchee);
+
+  Assert1(I.getNumDestinations() != 0,
+          "Undefined behavior: indirectbr with no destinations", &I);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+                                        /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+            "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+                                        /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(I.getType()->getNumElements()),
+            "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+  // This isn't undefined behavior, it's merely suspicious.
+  Assert1(&I == I.getParent()->begin() ||
+          prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+          "Unusual: unreachable immediately preceded by instruction without "
+          "side effects", &I);
+}
+
+/// findValue - Look through bitcasts and simple memory reference patterns
+/// to identify an equivalent, but more informative, value.  If OffsetOk
+/// is true, look through getelementptrs with non-zero offsets too.
+///
+/// Most analysis passes don't require this logic, because instcombine
+/// will simplify most of these kinds of things away. But it's a goal of
+/// this Lint pass to be useful even on non-optimized IR.
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
+  SmallPtrSet<Value *, 4> Visited;
+  return findValueImpl(V, OffsetOk, Visited);
+}
+
+/// findValueImpl - Implementation helper for findValue.
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+                           SmallPtrSet<Value *, 4> &Visited) const {
+  // Detect self-referential values.
+  if (!Visited.insert(V))
+    return UndefValue::get(V->getType());
+
+  // TODO: Look through sext or zext cast, when the result is known to
+  // be interpreted as signed or unsigned, respectively.
+  // TODO: Look through eliminable cast pairs.
+  // TODO: Look through calls with unique return values.
+  // TODO: Look through vector insert/extract/shuffle.
+  V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
+  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
+    BasicBlock::iterator BBI = L;
+    BasicBlock *BB = L->getParent();
+    SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+    for (;;) {
+      if (!VisitedBlocks.insert(BB)) break;
+      if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
+                                              BB, BBI, 6, AA))
+        return findValueImpl(U, OffsetOk, Visited);
+      if (BBI != BB->begin()) break;
+      BB = BB->getUniquePredecessor();
+      if (!BB) break;
+      BBI = BB->end();
+    }
+  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (Value *W = PN->hasConstantValue())
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+    if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
+                            Type::getInt64Ty(V->getContext())))
+      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
+    if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
+                                     Ex->getIndices()))
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    // Same as above, but for ConstantExpr instead of Instruction.
+    if (Instruction::isCast(CE->getOpcode())) {
+      if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
+                               CE->getOperand(0)->getType(),
+                               CE->getType(),
+                               TD ? TD->getIntPtrType(V->getContext()) :
+                                    Type::getInt64Ty(V->getContext())))
+        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+    } else if (CE->getOpcode() == Instruction::ExtractValue) {
+      ArrayRef<unsigned> Indices = CE->getIndices();
+      if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
+        if (W != V)
+          return findValueImpl(W, OffsetOk, Visited);
+    }
+  }
+
+  // As a last resort, try SimplifyInstruction or constant folding.
+  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+    if (Value *W = SimplifyInstruction(Inst, TD, DT))
+      return findValueImpl(W, OffsetOk, Visited);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (Value *W = ConstantFoldConstantExpression(CE, TD))
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  }
+
+  return V;
+}
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+  return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot lint external functions");
+
+  FunctionPassManager FPM(F.getParent());
+  Lint *V = new Lint();
+  FPM.add(V);
+  FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+///
+void llvm::lintModule(const Module &M) {
+  PassManager PM;
+  Lint *V = new Lint();
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+}
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
new file mode 100644
index 000000000000..c5c676b5265f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -0,0 +1,236 @@
+//===- Loads.cpp - Local load analysis ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+using namespace llvm;
+
+/// AreEquivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+///   %t0 = getelementptr \@a, 0, 3
+///   store i32 0, i32* %t0
+///   %t1 = getelementptr \@a, 0, 3
+///   %t2 = load i32* %t1
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+  // Test if the values are trivially equivalent.
+  if (A == B) return true;
+
+  // Test if the values come from identical arithmetic instructions.
+  // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+  // this function is only used when one address use dominates the
+  // other, which means that they'll always either have the same
+  // value or one of them will have an undefined value.
+  if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
+      isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+    if (const Instruction *BI = dyn_cast<Instruction>(B))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+        return true;
+
+  // Otherwise they may not be equivalent.
+  return false;
+}
+
+/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
+/// bitcasts to get back to the underlying object being addressed, keeping
+/// track of the offset in bytes from the GEPs relative to the result.
+/// This is closely related to GetUnderlyingObject but is located
+/// here to avoid making VMCore depend on TargetData.
+static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
+                                            uint64_t &ByteOffset,
+                                            unsigned MaxLookup = 6) {
+  if (!V->getType()->isPointerTy())
+    return V;
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->hasAllConstantIndices())
+        return V;
+      SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
+      ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
+                                         &Indices[0], Indices.size());
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  }
+  return V;
+}
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap.  If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+                                       unsigned Align, const TargetData *TD) {
+  uint64_t ByteOffset = 0;
+  Value *Base = V;
+  if (TD)
+    Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
+
+  const Type *BaseType = 0;
+  unsigned BaseAlign = 0;
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+    // An alloca is safe to load from as load as it is suitably aligned.
+    BaseType = AI->getAllocatedType();
+    BaseAlign = AI->getAlignment();
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
+    // Global variables are safe to load from but their size cannot be
+    // guaranteed if they are overridden.
+    if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
+      BaseType = GV->getType()->getElementType();
+      BaseAlign = GV->getAlignment();
+    }
+  }
+
+  if (BaseType && BaseType->isSized()) {
+    if (TD && BaseAlign == 0)
+      BaseAlign = TD->getPrefTypeAlignment(BaseType);
+
+    if (Align <= BaseAlign) {
+      if (!TD)
+        return true; // Loading directly from an alloca or global is OK.
+
+      // Check if the load is within the bounds of the underlying object.
+      const PointerType *AddrTy = cast<PointerType>(V->getType());
+      uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
+      if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+          (Align == 0 || (ByteOffset % Align) == 0))
+        return true;
+    }
+  }
+
+  // Otherwise, be a little bit aggressive by scanning the local block where we
+  // want to check to see if the pointer is already being loaded or stored
+  // from/to.  If so, the previous load or store would have already trapped,
+  // so there is no harm doing an extra load (also, CSE will later eliminate
+  // the load entirely).
+  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+  while (BBI != E) {
+    --BBI;
+
+    // If we see a free or a call which may write to memory (i.e. which might do
+    // a free) the pointer could be marked invalid.
+    if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+        !isa<DbgInfoIntrinsic>(BBI))
+      return false;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+      if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
+    }
+  }
+  return false;
+}
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
+/// instruction before ScanFrom) checking to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+/// If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the 
+/// value would be live through.  If we scanned the entire block and didn't find
+/// something that invalidates *Ptr or provides it, ScanFrom would be left at
+/// begin() and this returns null.  ScanFrom could also be left 
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block.  If
+/// it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+                                      BasicBlock::iterator &ScanFrom,
+                                      unsigned MaxInstsToScan,
+                                      AliasAnalysis *AA) {
+  if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+
+  // If we're using alias analysis to disambiguate get the size of *Ptr.
+  uint64_t AccessSize = 0;
+  if (AA) {
+    const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+    AccessSize = AA->getTypeStoreSize(AccessTy);
+  }
+  
+  while (ScanFrom != ScanBB->begin()) {
+    // We must ignore debug info directives when counting (otherwise they
+    // would affect codegen).
+    Instruction *Inst = --ScanFrom;
+    if (isa<DbgInfoIntrinsic>(Inst))
+      continue;
+
+    // Restore ScanFrom to expected value in case next test succeeds
+    ScanFrom++;
+   
+    // Don't scan huge blocks.
+    if (MaxInstsToScan-- == 0) return 0;
+    
+    --ScanFrom;
+    // If this is a load of Ptr, the loaded value is available.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+      if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+        return LI;
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      // If this is a store through Ptr, the value is available!
+      if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+        return SI->getOperand(0);
+      
+      // If Ptr is an alloca and this is a store to a different alloca, ignore
+      // the store.  This is a trivial form of alias analysis that is important
+      // for reg2mem'd code.
+      if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
+          (isa<AllocaInst>(SI->getOperand(1)) ||
+           isa<GlobalVariable>(SI->getOperand(1))))
+        continue;
+      
+      // If we have alias analysis and it says the store won't modify the loaded
+      // value, ignore the store.
+      if (AA &&
+          (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+        continue;
+      
+      // Otherwise the store that may or may not alias the pointer, bail out.
+      ++ScanFrom;
+      return 0;
+    }
+    
+    // If this is some other instruction that may clobber Ptr, bail out.
+    if (Inst->mayWriteToMemory()) {
+      // If alias analysis claims that it really won't modify the load,
+      // ignore it.
+      if (AA &&
+          (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+        continue;
+      
+      // May modify the pointer, bail out.
+      ++ScanFrom;
+      return 0;
+    }
+  }
+  
+  // Got to the start of the block, we didn't find it, but are done for this
+  // block.
+  return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
new file mode 100644
index 000000000000..c1afe8fbd618
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -0,0 +1,358 @@
+//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the (beginning) of an implementation of a loop dependence analysis
+// framework, which is used to detect dependences in memory accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as implementation progresses.
+//
+// TODO: document lingo (pair, subscript, index)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lda"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopDependenceAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumAnswered,    "Number of dependence queries answered");
+STATISTIC(NumAnalysed,    "Number of distinct dependence pairs analysed");
+STATISTIC(NumDependent,   "Number of pairs with dependent accesses");
+STATISTIC(NumIndependent, "Number of pairs with independent accesses");
+STATISTIC(NumUnknown,     "Number of pairs with unknown accesses");
+
+LoopPass *llvm::createLoopDependenceAnalysisPass() {
+  return new LoopDependenceAnalysis();
+}
+
+INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda",
+                "Loop Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda",
+                "Loop Dependence Analysis", false, true)
+char LoopDependenceAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                             Utility Functions
+//===----------------------------------------------------------------------===//
+
+static inline bool IsMemRefInstr(const Value *V) {
+  const Instruction *I = dyn_cast<const Instruction>(V);
+  return I && (I->mayReadFromMemory() || I->mayWriteToMemory());
+}
+
+static void GetMemRefInstrs(const Loop *L,
+                            SmallVectorImpl<Instruction*> &Memrefs) {
+  for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+       b != be; ++b)
+    for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end();
+         i != ie; ++i)
+      if (IsMemRefInstr(i))
+        Memrefs.push_back(i);
+}
+
+static bool IsLoadOrStoreInst(Value *I) {
+  return isa<LoadInst>(I) || isa<StoreInst>(I);
+}
+
+static Value *GetPointerOperand(Value *I) {
+  if (LoadInst *i = dyn_cast<LoadInst>(I))
+    return i->getPointerOperand();
+  if (StoreInst *i = dyn_cast<StoreInst>(I))
+    return i->getPointerOperand();
+  llvm_unreachable("Value is no load or store instruction!");
+  // Never reached.
+  return 0;
+}
+
+static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
+                                                         const Value *A,
+                                                         const Value *B) {
+  const Value *aObj = GetUnderlyingObject(A);
+  const Value *bObj = GetUnderlyingObject(B);
+  return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
+                   bObj, AA->getTypeStoreSize(bObj->getType()));
+}
+
+static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) {
+  return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L);
+}
+
+//===----------------------------------------------------------------------===//
+//                             Dependence Testing
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::isDependencePair(const Value *A,
+                                              const Value *B) const {
+  return IsMemRefInstr(A) &&
+         IsMemRefInstr(B) &&
+         (cast<const Instruction>(A)->mayWriteToMemory() ||
+          cast<const Instruction>(B)->mayWriteToMemory());
+}
+
+bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
+                                                        Value *B,
+                                                        DependencePair *&P) {
+  void *insertPos = 0;
+  FoldingSetNodeID id;
+  id.AddPointer(A);
+  id.AddPointer(B);
+
+  P = Pairs.FindNodeOrInsertPos(id, insertPos);
+  if (P) return true;
+
+  P = new (PairAllocator) DependencePair(id, A, B);
+  Pairs.InsertNode(P, insertPos);
+  return false;
+}
+
+void LoopDependenceAnalysis::getLoops(const SCEV *S,
+                                      DenseSet<const Loop*>* Loops) const {
+  // Refactor this into an SCEVVisitor, if efficiency becomes a concern.
+  for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
+    if (!SE->isLoopInvariant(S, L))
+      Loops->insert(L);
+}
+
+bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const {
+  DenseSet<const Loop*> loops;
+  getLoops(S, &loops);
+  return loops.empty();
+}
+
+bool LoopDependenceAnalysis::isAffine(const SCEV *S) const {
+  const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S);
+  return isLoopInvariant(S) || (rec && rec->isAffine());
+}
+
+bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const {
+  return isLoopInvariant(A) && isLoopInvariant(B);
+}
+
+bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const {
+  DenseSet<const Loop*> loops;
+  getLoops(A, &loops);
+  getLoops(B, &loops);
+  return loops.size() == 1;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseZIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!");
+  return A == B ? Dependent : Independent;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseMIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSubscript(const SCEV *A,
+                                         const SCEV *B,
+                                         Subscript *S) const {
+  DEBUG(dbgs() << "  Testing subscript: " << *A << ", " << *B << "\n");
+
+  if (A == B) {
+    DEBUG(dbgs() << "  -> [D] same SCEV\n");
+    return Dependent;
+  }
+
+  if (!isAffine(A) || !isAffine(B)) {
+    DEBUG(dbgs() << "  -> [?] not affine\n");
+    return Unknown;
+  }
+
+  if (isZIVPair(A, B))
+    return analyseZIV(A, B, S);
+
+  if (isSIVPair(A, B))
+    return analyseSIV(A, B, S);
+
+  return analyseMIV(A, B, S);
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analysePair(DependencePair *P) const {
+  DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n");
+
+  // We only analyse loads and stores but no possible memory accesses by e.g.
+  // free, call, or invoke instructions.
+  if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) {
+    DEBUG(dbgs() << "--> [?] no load/store\n");
+    return Unknown;
+  }
+
+  Value *aPtr = GetPointerOperand(P->A);
+  Value *bPtr = GetPointerOperand(P->B);
+
+  switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
+  case AliasAnalysis::MayAlias:
+  case AliasAnalysis::PartialAlias:
+    // We can not analyse objects if we do not know about their aliasing.
+    DEBUG(dbgs() << "---> [?] may alias\n");
+    return Unknown;
+
+  case AliasAnalysis::NoAlias:
+    // If the objects noalias, they are distinct, accesses are independent.
+    DEBUG(dbgs() << "---> [I] no alias\n");
+    return Independent;
+
+  case AliasAnalysis::MustAlias:
+    break; // The underlying objects alias, test accesses for dependence.
+  }
+
+  const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr);
+  const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr);
+
+  if (!aGEP || !bGEP)
+    return Unknown;
+
+  // FIXME: Is filtering coupled subscripts necessary?
+
+  // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding
+  // trailing zeroes to the smaller GEP, if needed.
+  typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy;
+  GEPOpdPairsTy opds;
+  for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(),
+                                     aEnd = aGEP->idx_end(),
+                                     bIdx = bGEP->idx_begin(),
+                                     bEnd = bGEP->idx_end();
+      aIdx != aEnd && bIdx != bEnd;
+      aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) {
+    const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE);
+    const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE);
+    opds.push_back(std::make_pair(aSCEV, bSCEV));
+  }
+
+  if (!opds.empty() && opds[0].first != opds[0].second) {
+    // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting
+    //
+    // TODO: this could be relaxed by adding the size of the underlying object
+    // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we
+    // know that x is a [100 x i8]*, we could modify the first subscript to be
+    // (i, 200-i) instead of (i, -i).
+    return Unknown;
+  }
+
+  // Now analyse the collected operand pairs (skipping the GEP ptr offsets).
+  for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end();
+       i != end; ++i) {
+    Subscript subscript;
+    DependenceResult result = analyseSubscript(i->first, i->second, &subscript);
+    if (result != Dependent) {
+      // We either proved independence or failed to analyse this subscript.
+      // Further subscripts will not improve the situation, so abort early.
+      return result;
+    }
+    P->Subscripts.push_back(subscript);
+  }
+  // We successfully analysed all subscripts but failed to prove independence.
+  return Dependent;
+}
+
+bool LoopDependenceAnalysis::depends(Value *A, Value *B) {
+  assert(isDependencePair(A, B) && "Values form no dependence pair!");
+  ++NumAnswered;
+
+  DependencePair *p;
+  if (!findOrInsertDependencePair(A, B, p)) {
+    // The pair is not cached, so analyse it.
+    ++NumAnalysed;
+    switch (p->Result = analysePair(p)) {
+    case Dependent:   ++NumDependent;   break;
+    case Independent: ++NumIndependent; break;
+    case Unknown:     ++NumUnknown;     break;
+    }
+  }
+  return p->Result != Independent;
+}
+
+//===----------------------------------------------------------------------===//
+//                   LoopDependenceAnalysis Implementation
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
+  this->L = L;
+  AA = &getAnalysis<AliasAnalysis>();
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+void LoopDependenceAnalysis::releaseMemory() {
+  Pairs.clear();
+  PairAllocator.Reset();
+}
+
+void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AliasAnalysis>();
+  AU.addRequiredTransitive<ScalarEvolution>();
+}
+
+static void PrintLoopInfo(raw_ostream &OS,
+                          LoopDependenceAnalysis *LDA, const Loop *L) {
+  if (!L->empty()) return; // ignore non-innermost loops
+
+  SmallVector<Instruction*, 8> memrefs;
+  GetMemRefInstrs(L, memrefs);
+
+  OS << "Loop at depth " << L->getLoopDepth() << ", header block: ";
+  WriteAsOperand(OS, L->getHeader(), false);
+  OS << "\n";
+
+  OS << "  Load/store instructions: " << memrefs.size() << "\n";
+  for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+       end = memrefs.end(); x != end; ++x)
+    OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n";
+
+  OS << "  Pairwise dependence results:\n";
+  for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+       end = memrefs.end(); x != end; ++x)
+    for (SmallVector<Instruction*, 8>::const_iterator y = x + 1;
+         y != end; ++y)
+      if (LDA->isDependencePair(*x, *y))
+        OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin())
+           << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent")
+           << "\n";
+}
+
+void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+  // TODO: doc why const_cast is safe
+  PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L);
+}
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
new file mode 100644
index 000000000000..05831402f409
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -0,0 +1,419 @@
+//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG.  Note that the
+// loops identified may actually be several natural loops that share the same
+// header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyLoopInfo = true;
+#else
+static bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+                cl::desc("Verify loop info (time consuming)"));
+
+char LoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
+
+//===----------------------------------------------------------------------===//
+// Loop implementation
+//
+
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(Value *V) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return !contains(I);
+  return true;  // All non-instructions are loop invariant
+}
+
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant. 
+bool Loop::hasLoopInvariantOperands(Instruction *I) const {
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!isLoopInvariant(I->getOperand(i)))
+      return false;
+  
+  return true;
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+                             Instruction *InsertPt) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return makeLoopInvariant(I, Changed, InsertPt);
+  return true;  // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+                             Instruction *InsertPt) const {
+  // Test if the value is already loop-invariant.
+  if (isLoopInvariant(I))
+    return true;
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
+  if (I->mayReadFromMemory())
+    return false;
+  // Determine the insertion point, unless one was given.
+  if (!InsertPt) {
+    BasicBlock *Preheader = getLoopPreheader();
+    // Without a preheader, hoisting is not feasible.
+    if (!Preheader)
+      return false;
+    InsertPt = Preheader->getTerminator();
+  }
+  // Don't hoist instructions with loop-variant operands.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+      return false;
+  
+  // Hoist.
+  I->moveBefore(InsertPt);
+  Changed = true;
+  return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop.  If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+  BasicBlock *H = getHeader();
+
+  BasicBlock *Incoming = 0, *Backedge = 0;
+  pred_iterator PI = pred_begin(H);
+  assert(PI != pred_end(H) &&
+         "Loop must have at least one backedge!");
+  Backedge = *PI++;
+  if (PI == pred_end(H)) return 0;  // dead loop
+  Incoming = *PI++;
+  if (PI != pred_end(H)) return 0;  // multiple backedges?
+
+  if (contains(Incoming)) {
+    if (contains(Backedge))
+      return 0;
+    std::swap(Incoming, Backedge);
+  } else if (!contains(Backedge))
+    return 0;
+
+  // Loop over all of the PHI nodes, looking for a canonical indvar.
+  for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+      if (CI->isNullValue())
+        if (Instruction *Inc =
+            dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+          if (Inc->getOpcode() == Instruction::Add &&
+                Inc->getOperand(0) == PN)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+              if (CI->equalsInt(1))
+                return PN;
+  }
+  return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the number of
+/// times the loop will be executed.  Note that this means that the backedge
+/// of the loop executes N-1 times.  If the trip-count cannot be determined,
+/// this returns null.
+///
+/// The IndVarSimplify pass transforms loops to have a form that this
+/// function easily understands.
+///
+Value *Loop::getTripCount() const {
+  // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
+  // canonical induction variable and V is the trip count of the loop.
+  PHINode *IV = getCanonicalInductionVariable();
+  if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
+
+  bool P0InLoop = contains(IV->getIncomingBlock(0));
+  Value *Inc = IV->getIncomingValue(!P0InLoop);
+  BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
+    if (BI->isConditional()) {
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+        if (ICI->getOperand(0) == Inc) {
+          if (BI->getSuccessor(0) == getHeader()) {
+            if (ICI->getPredicate() == ICmpInst::ICMP_NE)
+              return ICI->getOperand(1);
+          } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+            return ICI->getOperand(1);
+          }
+        }
+      }
+    }
+
+  return 0;
+}
+
+/// getSmallConstantTripCount - Returns the trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the trip count is very large
+/// (>= 2^32)
+unsigned Loop::getSmallConstantTripCount() const {
+  Value* TripCount = this->getTripCount();
+  if (TripCount) {
+    if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
+      // Guard against huge trip counts.
+      if (TripCountC->getValue().getActiveBits() <= 32) {
+        return (unsigned)TripCountC->getZExtValue();
+      }
+    }
+  }
+  return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned Loop::getSmallConstantTripMultiple() const {
+  Value* TripCount = this->getTripCount();
+  // This will hold the ConstantInt result, if any
+  ConstantInt *Result = NULL;
+  if (TripCount) {
+    // See if the trip count is constant itself
+    Result = dyn_cast<ConstantInt>(TripCount);
+    // if not, see if it is a multiplication
+    if (!Result)
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
+        switch (BO->getOpcode()) {
+        case BinaryOperator::Mul:
+          Result = dyn_cast<ConstantInt>(BO->getOperand(1));
+          break;
+        case BinaryOperator::Shl:
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+            if (CI->getValue().getActiveBits() <= 5)
+              return 1u << CI->getZExtValue();
+          break;
+        default:
+          break;
+        }
+      }
+  }
+  // Guard against huge trip counts.
+  if (Result && Result->getValue().getActiveBits() <= 32) {
+    return (unsigned)Result->getZExtValue();
+  } else {
+    return 1;
+  }
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
+
+  for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+    BasicBlock *BB = *BI;
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+           ++UI) {
+        User *U = *UI;
+        BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+        if (PHINode *P = dyn_cast<PHINode>(U))
+          UserBB = P->getIncomingBlock(UI);
+
+        // Check the current block, as a fast-path, before checking whether
+        // the use is anywhere in the loop.  Most values are used in the same
+        // block they are defined in.  Also, blocks not reachable from the
+        // entry are special; uses in them don't need to go through PHIs.
+        if (UserBB != BB &&
+            !LoopBBs.count(UserBB) &&
+            DT.isReachableFromEntry(UserBB))
+          return false;
+      }
+  }
+
+  return true;
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+  // Normal-form loops have a preheader, a single backedge, and all of their
+  // exits have all their predecessors inside the loop.
+  return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+  // Each predecessor of each exit block of a normal loop is contained
+  // within the loop.
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  getExitBlocks(ExitBlocks);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+         PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+      if (!LoopBBs.count(*PI))
+        return false;
+  // All the requirements are met.
+  return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop exits are in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+  assert(hasDedicatedExits() &&
+         "getUniqueExitBlocks assumes the loop has canonical form exits!");
+
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
+  std::sort(LoopBBs.begin(), LoopBBs.end());
+
+  SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+    BasicBlock *current = *BI;
+    switchExitBlocks.clear();
+
+    for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
+      // If block is inside the loop then it is not a exit block.
+      if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+        continue;
+
+      pred_iterator PI = pred_begin(*I);
+      BasicBlock *firstPred = *PI;
+
+      // If current basic block is this exit block's first predecessor
+      // then only insert exit block in to the output ExitBlocks vector.
+      // This ensures that same exit block is not inserted twice into
+      // ExitBlocks vector.
+      if (current != firstPred)
+        continue;
+
+      // If a terminator has more then two successors, for example SwitchInst,
+      // then it is possible that there are multiple edges from current block
+      // to one exit block.
+      if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
+        ExitBlocks.push_back(*I);
+        continue;
+      }
+
+      // In case of multiple edges from current block to exit block, collect
+      // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+      // duplicate edges.
+      if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+          == switchExitBlocks.end()) {
+        switchExitBlocks.push_back(*I);
+        ExitBlocks.push_back(*I);
+      }
+    }
+  }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+  SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+  getUniqueExitBlocks(UniqueExitBlocks);
+  if (UniqueExitBlocks.size() == 1)
+    return UniqueExitBlocks[0];
+  return 0;
+}
+
+void Loop::dump() const {
+  print(dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
+bool LoopInfo::runOnFunction(Function &) {
+  releaseMemory();
+  LI.Calculate(getAnalysis<DominatorTree>().getBase());    // Update
+  return false;
+}
+
+void LoopInfo::verifyAnalysis() const {
+  // LoopInfo is a FunctionPass, but verifying every loop in the function
+  // each time verifyAnalysis is called is very expensive. The
+  // -verify-loop-info option can enable this. In order to perform some
+  // checking by default, LoopPass has been taught to call verifyLoop
+  // manually during loop pass sequences.
+
+  if (!VerifyLoopInfo) return;
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+    (*I)->verifyLoopNest();
+  }
+
+  // TODO: check BBMap consistency.
+}
+
+void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<DominatorTree>();
+}
+
+void LoopInfo::print(raw_ostream &OS, const Module*) const {
+  LI.print(OS);
+}
+
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
new file mode 100644
index 000000000000..10e3f297f9c5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -0,0 +1,422 @@
+//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LoopPass and LPPassManager. All loop optimization
+// and transformation passes are derived from LoopPass. LPPassManager is
+// responsible for managing LoopPasses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+namespace {
+
+/// PrintLoopPass - Print a Function corresponding to a Loop.
+///
+class PrintLoopPass : public LoopPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintLoopPass(const std::string &B, raw_ostream &o)
+      : LoopPass(ID), Banner(B), Out(o) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &) {
+    Out << Banner;
+    for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+         b != be;
+         ++b) {
+      (*b)->print(Out);
+    }
+    return false;
+  }
+};
+
+char PrintLoopPass::ID = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+static DebugInfoProbeInfo *TheDebugProbe;
+static void createDebugInfoProbe() {
+  if (TheDebugProbe) return;
+      
+  // Constructed the first time this is called. This guarantees that the 
+  // object will be constructed, if -enable-debug-info-probe is set, 
+  // before static globals, thus it will be destroyed before them.
+  static ManagedStatic<DebugInfoProbeInfo> DIP;
+  TheDebugProbe = &*DIP;
+}
+
+//===----------------------------------------------------------------------===//
+// LPPassManager
+//
+
+char LPPassManager::ID = 0;
+
+LPPassManager::LPPassManager(int Depth) 
+  : FunctionPass(ID), PMDataManager(Depth) { 
+  skipThisLoop = false;
+  redoThisLoop = false;
+  LI = NULL;
+  CurrentLoop = NULL;
+}
+
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo). 
+void LPPassManager::deleteLoopFromQueue(Loop *L) {
+
+  if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop.
+    // Reparent all of the blocks in this loop.  Since BBLoop had a parent,
+    // they are now all in it.
+    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 
+         I != E; ++I)
+      if (LI->getLoopFor(*I) == L)    // Don't change blocks in subloops.
+        LI->changeLoopFor(*I, ParentLoop);
+    
+    // Remove the loop from its parent loop.
+    for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();;
+         ++I) {
+      assert(I != E && "Couldn't find loop");
+      if (*I == L) {
+        ParentLoop->removeChildLoop(I);
+        break;
+      }
+    }
+    
+    // Move all subloops into the parent loop.
+    while (!L->empty())
+      ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1));
+  } else {
+    // Reparent all of the blocks in this loop.  Since BBLoop had no parent,
+    // they no longer in a loop at all.
+    
+    for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+      // Don't change blocks in subloops.
+      if (LI->getLoopFor(L->getBlocks()[i]) == L) {
+        LI->removeBlock(L->getBlocks()[i]);
+        --i;
+      }
+    }
+
+    // Remove the loop from the top-level LoopInfo object.
+    for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) {
+      assert(I != E && "Couldn't find loop");
+      if (*I == L) {
+        LI->removeLoop(I);
+        break;
+      }
+    }
+
+    // Move all of the subloops to the top-level.
+    while (!L->empty())
+      LI->addTopLevelLoop(L->removeChildLoop(L->end()-1));
+  }
+
+  delete L;
+
+  // If L is current loop then skip rest of the passes and let
+  // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
+  // and continue applying other passes on CurrentLoop.
+  if (CurrentLoop == L) {
+    skipThisLoop = true;
+    return;
+  }
+
+  for (std::deque<Loop *>::iterator I = LQ.begin(),
+         E = LQ.end(); I != E; ++I) {
+    if (*I == L) {
+      LQ.erase(I);
+      break;
+    }
+  }
+}
+
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
+
+  assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+
+  // Insert into loop nest
+  if (ParentLoop)
+    ParentLoop->addChildLoop(L);
+  else
+    LI->addTopLevelLoop(L);
+
+  insertLoopIntoQueue(L);
+}
+
+void LPPassManager::insertLoopIntoQueue(Loop *L) {
+  // Insert L into loop queue
+  if (L == CurrentLoop) 
+    redoLoop(L);
+  else if (!L->getParentLoop())
+    // This is top level loop. 
+    LQ.push_front(L);
+  else {
+    // Insert L after the parent loop.
+    for (std::deque<Loop *>::iterator I = LQ.begin(),
+           E = LQ.end(); I != E; ++I) {
+      if (*I == L->getParentLoop()) {
+        // deque does not support insert after.
+        ++I;
+        LQ.insert(I, 1, L);
+        break;
+      }
+    }
+  }
+}
+
+// Reoptimize this loop. LPPassManager will re-insert this loop into the
+// queue. This allows LoopPass to change loop nest for the loop. This
+// utility may send LPPassManager into infinite loops so use caution.
+void LPPassManager::redoLoop(Loop *L) {
+  assert (CurrentLoop == L && "Can redo only CurrentLoop");
+  redoThisLoop = true;
+}
+
+/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+/// all loop passes.
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, 
+                                                  BasicBlock *To, Loop *L) {
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+    LoopPass *LP = getContainedPass(Index);
+    LP->cloneBasicBlockAnalysis(From, To, L);
+  }
+}
+
+/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
+void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; 
+         ++BI) {
+      Instruction &I = *BI;
+      deleteSimpleAnalysisValue(&I, L);
+    }
+  }
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+    LoopPass *LP = getContainedPass(Index);
+    LP->deleteAnalysisValue(V, L);
+  }
+}
+
+
+// Recurse through all subloops and all loops  into LQ.
+static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
+  LQ.push_back(L);
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    addLoopIntoQueue(*I, LQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+  // LPPassManager needs LoopInfo. In the long term LoopInfo class will 
+  // become part of LPPassManager.
+  Info.addRequired<LoopInfo>();
+  Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool LPPassManager::runOnFunction(Function &F) {
+  LI = &getAnalysis<LoopInfo>();
+  bool Changed = false;
+  createDebugInfoProbe();
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  // Populate Loop Queue
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    addLoopIntoQueue(*I, LQ);
+
+  if (LQ.empty()) // No loops, skip calling finalizers
+    return false;
+
+  // Initialization
+  for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
+       I != E; ++I) {
+    Loop *L = *I;
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+      LoopPass *P = getContainedPass(Index);
+      Changed |= P->doInitialization(L, *this);
+    }
+  }
+
+  // Walk Loops
+  while (!LQ.empty()) {
+      
+    CurrentLoop  = LQ.back();
+    skipThisLoop = false;
+    redoThisLoop = false;
+
+    // Run all passes on the current Loop.
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+      LoopPass *P = getContainedPass(Index);
+      dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+                   CurrentLoop->getHeader()->getName());
+      dumpRequiredSet(P);
+
+      initializeAnalysisImpl(P);
+      if (TheDebugProbe)
+        TheDebugProbe->initialize(P, F);
+      {
+        PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
+        TimeRegion PassTimer(getPassTimer(P));
+
+        Changed |= P->runOnLoop(CurrentLoop, *this);
+      }
+      if (TheDebugProbe)
+        TheDebugProbe->finalize(P, F);
+
+      if (Changed)
+        dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+                     skipThisLoop ? "<deleted>" :
+                                    CurrentLoop->getHeader()->getName());
+      dumpPreservedSet(P);
+
+      if (!skipThisLoop) {
+        // Manually check that this loop is still healthy. This is done
+        // instead of relying on LoopInfo::verifyLoop since LoopInfo
+        // is a function pass and it's really expensive to verify every
+        // loop in the function every time. That level of checking can be
+        // enabled with the -verify-loop-info option.
+        {
+          TimeRegion PassTimer(getPassTimer(LI));
+          CurrentLoop->verifyLoop();
+        }
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(P);
+      }
+
+      removeNotPreservedAnalysis(P);
+      recordAvailableAnalysis(P);
+      removeDeadPasses(P,
+                       skipThisLoop ? "<deleted>" :
+                                      CurrentLoop->getHeader()->getName(),
+                       ON_LOOP_MSG);
+
+      if (skipThisLoop)
+        // Do not run other passes on this loop.
+        break;
+    }
+    
+    // If the loop was deleted, release all the loop passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisLoop)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_LOOP_MSG);
+      }
+
+    // Pop the loop from queue after running all passes.
+    LQ.pop_back();
+    
+    if (redoThisLoop)
+      LQ.push_back(CurrentLoop);
+  }
+  
+  // Finalization
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    LoopPass *P = getContainedPass(Index);
+    Changed |= P->doFinalization();
+  }
+
+  return Changed;
+}
+
+/// Print passes managed by this manager
+void LPPassManager::dumpPassStructure(unsigned Offset) {
+  errs().indent(Offset*2) << "Loop Pass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    Pass *P = getContainedPass(Index);
+    P->dumpPassStructure(Offset + 1);
+    dumpLastUses(P, Offset+1);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoopPass
+
+Pass *LoopPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintLoopPass(Banner, O);
+}
+
+// Check if this pass is suitable for the current LPPassManager, if
+// available. This pass P is not suitable for a LPPassManager if P
+// is not preserving higher level analysis info used by other
+// LPPassManager passes. In such case, pop LPPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void LoopPass::preparePassManager(PMStack &PMS) {
+
+  // Find LPPassManager 
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+    PMS.pop();
+
+  // If this pass is destroying high level information that is used
+  // by other passes that are managed by LPM then do not insert
+  // this pass in current LPM. Use new LPPassManager.
+  if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
+      !PMS.top()->preserveHigherLevelAnalysis(this)) 
+    PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void LoopPass::assignPassManager(PMStack &PMS,
+                                 PassManagerType PreferredType) {
+  // Find LPPassManager 
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+    PMS.pop();
+
+  LPPassManager *LPPM;
+  if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
+    LPPM = (LPPassManager*)PMS.top();
+  else {
+    // Create new Loop Pass Manager if it does not exist. 
+    assert (!PMS.empty() && "Unable to create Loop Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Call Graph Pass Manager
+    LPPM = new LPPassManager(PMD->getDepth() + 1);
+    LPPM->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(LPPM);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    Pass *P = LPPM->getAsPass();
+    TPM->schedulePass(P);
+
+    // [4] Push new manager into PMS
+    PMS.push(LPPM);
+  }
+
+  LPPM->add(this);
+}
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 000000000000..2283db0bc482
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,176 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+namespace {
+  struct MemDepPrinter : public FunctionPass {
+    const Function *F;
+
+    typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
+    typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+    typedef SmallSetVector<Dep, 4> DepSet;
+    typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+    DepSetMap Deps;
+
+    static char ID; // Pass identifcation, replacement for typeid
+    MemDepPrinter() : FunctionPass(ID) {
+      initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    void print(raw_ostream &OS, const Module * = 0) const;
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredTransitive<AliasAnalysis>();
+      AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    virtual void releaseMemory() {
+      Deps.clear();
+      F = 0;
+    }
+  };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+  return new MemDepPrinter();
+}
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+  this->F = &F;
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+  // All this code uses non-const interfaces because MemDep is not
+  // const-friendly, though nothing is actually modified.
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    Instruction *Inst = &*I;
+
+    if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+      continue;
+
+    MemDepResult Res = MDA.getDependency(Inst);
+    if (!Res.isNonLocal()) {
+      assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+              "Local dep should be unknown, def or clobber!");
+      Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       static_cast<BasicBlock *>(0)));
+    } else if (CallSite CS = cast<Value>(Inst)) {
+      const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+        MDA.getNonLocalCallDependency(CS);
+
+      DepSet &InstDeps = Deps[Inst];
+      for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+                "Resolved non-local call dep should be unknown, def or "
+                "clobber!");
+        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       I->getBB()));
+      }
+    } else {
+      SmallVector<NonLocalDepResult, 4> NLDI;
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        // FIXME: Volatile is not handled properly here.
+        AliasAnalysis::Location Loc = AA.getLocation(LI);
+        MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
+                                         LI->getParent(), NLDI);
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        // FIXME: Volatile is not handled properly here.
+        AliasAnalysis::Location Loc = AA.getLocation(SI);
+        MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
+      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+        AliasAnalysis::Location Loc = AA.getLocation(VI);
+        MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
+      } else {
+        llvm_unreachable("Unknown memory instruction!");
+      }
+
+      DepSet &InstDeps = Deps[Inst];
+      for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        assert(Res.isClobber() != Res.isDef() &&
+               "Resolved non-local pointer dep should be def or clobber!");
+        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       I->getBB()));
+      }
+    }
+  }
+
+  return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+  for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
+    const Instruction *Inst = &*I;
+
+    DepSetMap::const_iterator DI = Deps.find(Inst);
+    if (DI == Deps.end())
+      continue;
+
+    const DepSet &InstDeps = DI->second;
+
+    for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
+         I != E; ++I) {
+      const Instruction *DepInst = I->first.getPointer();
+      bool isClobber = I->first.getInt();
+      const BasicBlock *DepBB = I->second;
+
+      OS << "    ";
+      if (!DepInst)
+        OS << "Unknown";
+      else if (isClobber)
+        OS << "Clobber";
+      else
+        OS << "    Def";
+      if (DepBB) {
+        OS << " in block ";
+        WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+      }
+      if (DepInst) {
+        OS << " from: ";
+        if (DepInst == Inst)
+          OS << "<unspecified>";
+        else
+          DepInst->print(OS);
+      }
+      OS << "\n";
+    }
+
+    Inst->print(OS);
+    OS << "\n\n";
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
new file mode 100644
index 000000000000..53d430491198
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -0,0 +1,213 @@
+//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.  
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the value is either a malloc call or a
+/// bitcast of the result of a malloc call.
+bool llvm::isMalloc(const Value *I) {
+  return extractMallocCall(I) || extractMallocCallFromBitCast(I);
+}
+
+static bool isMallocCall(const CallInst *CI) {
+  if (!CI)
+    return false;
+
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == 0 || !Callee->isDeclaration())
+    return false;
+  if (Callee->getName() != "malloc" &&
+      Callee->getName() != "_Znwj" && // operator new(unsigned int)
+      Callee->getName() != "_Znwm" && // operator new(unsigned long)
+      Callee->getName() != "_Znaj" && // operator new[](unsigned int)
+      Callee->getName() != "_Znam")   // operator new[](unsigned long)
+    return false;
+
+  // Check malloc prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = Callee->getFunctionType();
+  if (FTy->getNumParams() != 1)
+    return false;
+  return FTy->getParamType(0)->isIntegerTy(32) ||
+         FTy->getParamType(0)->isIntegerTy(64);
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *llvm::extractMallocCall(const Value *I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+CallInst *llvm::extractMallocCall(Value *I) {
+  CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+static bool isBitCastOfMallocCall(const BitCastInst *BCI) {
+  if (!BCI)
+    return false;
+    
+  return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
+}
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+CallInst *llvm::extractMallocCallFromBitCast(Value *I) {
+  BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) {
+  const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
+                               bool LookThroughSExt = false) {
+  if (!CI)
+    return NULL;
+
+  // The size of the malloc's result type must be known to determine array size.
+  const Type *T = getMallocAllocatedType(CI);
+  if (!T || !T->isSized() || !TD)
+    return NULL;
+
+  unsigned ElementSize = TD->getTypeAllocSize(T);
+  if (const StructType *ST = dyn_cast<StructType>(T))
+    ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+
+  // If malloc call's arg can be determined to be a multiple of ElementSize,
+  // return the multiple.  Otherwise, return NULL.
+  Value *MallocArg = CI->getArgOperand(0);
+  Value *Multiple = NULL;
+  if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+                      LookThroughSExt))
+    return Multiple;
+
+  return NULL;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1.  Otherwise, return NULL.
+const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
+  const CallInst *CI = extractMallocCall(I);
+  Value *ArraySize = computeArraySize(CI, TD);
+
+  if (ArraySize &&
+      ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+    return CI;
+
+  // CI is a non-array malloc or we can't figure out that it is an array malloc.
+  return NULL;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+const PointerType *llvm::getMallocType(const CallInst *CI) {
+  assert(isMalloc(CI) && "getMallocType and not malloc call");
+  
+  const PointerType *MallocType = NULL;
+  unsigned NumOfBitCastUses = 0;
+
+  // Determine if CallInst has a bitcast use.
+  for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; )
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+      MallocType = cast<PointerType>(BCI->getDestTy());
+      NumOfBitCastUses++;
+    }
+
+  // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
+  if (NumOfBitCastUses == 1)
+    return MallocType;
+
+  // Malloc call was not bitcast, so type is the malloc function's return type.
+  if (NumOfBitCastUses == 0)
+    return cast<PointerType>(CI->getType());
+
+  // Type could not be determined.
+  return NULL;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
+  const PointerType *PT = getMallocType(CI);
+  return PT ? PT->getElementType() : NULL;
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call.  If the 
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple.  For non-array mallocs, the multiple is
+/// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
+                                bool LookThroughSExt) {
+  assert(isMalloc(CI) && "getMallocArraySize and not malloc call");
+  return computeArraySize(CI, TD, LookThroughSExt);
+}
+
+//===----------------------------------------------------------------------===//
+//  free Call Utility Functions.
+//
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI)
+    return 0;
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == 0 || !Callee->isDeclaration())
+    return 0;
+
+  if (Callee->getName() != "free" &&
+      Callee->getName() != "_ZdlPv" && // operator delete(void*)
+      Callee->getName() != "_ZdaPv")   // operator delete[](void*)
+    return 0;
+
+  // Check free prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = Callee->getFunctionType();
+  if (!FTy->getReturnType()->isVoidTy())
+    return 0;
+  if (FTy->getNumParams() != 1)
+    return 0;
+  if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
+    return 0;
+
+  return CI;
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
new file mode 100644
index 000000000000..bba4482f4da5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -0,0 +1,1469 @@
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an analysis that determines, for a given memory
+// operation, what preceding memory operations it depends on.  It builds on 
+// alias analysis information, and tries to provide a lazy, caching interface to
+// a common kind of alias information query.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memdep"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
+STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
+STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
+
+STATISTIC(NumCacheNonLocalPtr,
+          "Number of fully cached non-local ptr responses");
+STATISTIC(NumCacheDirtyNonLocalPtr,
+          "Number of cached, but dirty, non-local ptr responses");
+STATISTIC(NumUncacheNonLocalPtr,
+          "Number of uncached non-local ptr responses");
+STATISTIC(NumCacheCompleteNonLocalPtr,
+          "Number of block queries that were completely cached");
+
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+//        (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
+char MemoryDependenceAnalysis::ID = 0;
+  
+// Register this pass...
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+                "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+                      "Memory Dependence Analysis", false, true)
+
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+: FunctionPass(ID), PredCache(0) {
+  initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
+MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
+}
+
+/// Clean up memory in between runs
+void MemoryDependenceAnalysis::releaseMemory() {
+  LocalDeps.clear();
+  NonLocalDeps.clear();
+  NonLocalPointerDeps.clear();
+  ReverseLocalDeps.clear();
+  ReverseNonLocalDeps.clear();
+  ReverseNonLocalPtrDeps.clear();
+  PredCache->clear();
+}
+
+
+
+/// getAnalysisUsage - Does not modify anything.  It uses Alias Analysis.
+///
+void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AliasAnalysis>();
+}
+
+bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+  AA = &getAnalysis<AliasAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (PredCache == 0)
+    PredCache.reset(new PredIteratorCache());
+  return false;
+}
+
+/// RemoveFromReverseMap - This is a helper function that removes Val from
+/// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
+template <typename KeyTy>
+static void RemoveFromReverseMap(DenseMap<Instruction*, 
+                                 SmallPtrSet<KeyTy, 4> > &ReverseMap,
+                                 Instruction *Inst, KeyTy Val) {
+  typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
+  InstIt = ReverseMap.find(Inst);
+  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
+  bool Found = InstIt->second.erase(Val);
+  assert(Found && "Invalid reverse map!"); (void)Found;
+  if (InstIt->second.empty())
+    ReverseMap.erase(InstIt);
+}
+
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static
+AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
+                                        AliasAnalysis::Location &Loc,
+                                        AliasAnalysis *AA) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    if (LI->isVolatile()) {
+      Loc = AliasAnalysis::Location();
+      return AliasAnalysis::ModRef;
+    }
+    Loc = AA->getLocation(LI);
+    return AliasAnalysis::Ref;
+  }
+
+  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (SI->isVolatile()) {
+      Loc = AliasAnalysis::Location();
+      return AliasAnalysis::ModRef;
+    }
+    Loc = AA->getLocation(SI);
+    return AliasAnalysis::Mod;
+  }
+
+  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+    Loc = AA->getLocation(V);
+    return AliasAnalysis::ModRef;
+  }
+
+  if (const CallInst *CI = isFreeCall(Inst)) {
+    // calls to free() deallocate the entire structure
+    Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+    return AliasAnalysis::Mod;
+  }
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+      Loc = AliasAnalysis::Location(II->getArgOperand(1),
+                                    cast<ConstantInt>(II->getArgOperand(0))
+                                      ->getZExtValue(),
+                                    II->getMetadata(LLVMContext::MD_tbaa));
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return AliasAnalysis::Mod;
+    case Intrinsic::invariant_end:
+      Loc = AliasAnalysis::Location(II->getArgOperand(2),
+                                    cast<ConstantInt>(II->getArgOperand(1))
+                                      ->getZExtValue(),
+                                    II->getMetadata(LLVMContext::MD_tbaa));
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return AliasAnalysis::Mod;
+    default:
+      break;
+    }
+
+  // Otherwise, just do the coarse-grained thing that always works.
+  if (Inst->mayWriteToMemory())
+    return AliasAnalysis::ModRef;
+  if (Inst->mayReadFromMemory())
+    return AliasAnalysis::Ref;
+  return AliasAnalysis::NoModRef;
+}
+
+/// getCallSiteDependencyFrom - Private helper for finding the local
+/// dependencies of a call site.
+MemDepResult MemoryDependenceAnalysis::
+getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
+                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
+  unsigned Limit = BlockScanLimit;
+
+  // Walk backwards through the block, looking for dependencies
+  while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases. 
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
+    Instruction *Inst = --ScanIt;
+    
+    // If this inst is a memory op, get the pointer it accessed
+    AliasAnalysis::Location Loc;
+    AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+    if (Loc.Ptr) {
+      // A simple instruction.
+      if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+        return MemDepResult::getClobber(Inst);
+      continue;
+    }
+
+    if (CallSite InstCS = cast<Value>(Inst)) {
+      // Debug intrinsics don't cause dependences.
+      if (isa<DbgInfoIntrinsic>(Inst)) continue;
+      // If these two calls do not interfere, look past it.
+      switch (AA->getModRefInfo(CS, InstCS)) {
+      case AliasAnalysis::NoModRef:
+        // If the two calls are the same, return InstCS as a Def, so that
+        // CS can be found redundant and eliminated.
+        if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+            CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+          return MemDepResult::getDef(Inst);
+
+        // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+        // keep scanning.
+        break;
+      default:
+        return MemDepResult::getClobber(Inst);
+      }
+    }
+  }
+  
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
+  if (BB != &BB->getParent()->getEntryBlock())
+    return MemDepResult::getNonLocal();
+  return MemDepResult::getUnknown();
+}
+
+/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
+/// would fully overlap MemLoc if done as a wider legal integer load.
+///
+/// MemLocBase, MemLocOffset are lazily computed here the first time the
+/// base/offs of memloc is needed.
+static bool 
+isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
+                                       const Value *&MemLocBase,
+                                       int64_t &MemLocOffs,
+                                       const LoadInst *LI,
+                                       const TargetData *TD) {
+  // If we have no target data, we can't do this.
+  if (TD == 0) return false;
+
+  // If we haven't already computed the base/offset of MemLoc, do so now.
+  if (MemLocBase == 0)
+    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+                                    LI, *TD);
+  return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load.  If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use.  If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+                                unsigned MemLocSize, const LoadInst *LI,
+                                const TargetData &TD) {
+  // We can only extend non-volatile integer loads.
+  if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+  
+  // Get the base of this load.
+  int64_t LIOffs = 0;
+  const Value *LIBase = 
+    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
+  
+  // If the two pointers are not based on the same pointer, we can't tell that
+  // they are related.
+  if (LIBase != MemLocBase) return 0;
+  
+  // Okay, the two values are based on the same pointer, but returned as
+  // no-alias.  This happens when we have things like two byte loads at "P+1"
+  // and "P+3".  Check to see if increasing the size of the "LI" load up to its
+  // alignment (or the largest native integer type) will allow us to load all
+  // the bits required by MemLoc.
+  
+  // If MemLoc is before LI, then no widening of LI will help us out.
+  if (MemLocOffs < LIOffs) return 0;
+  
+  // Get the alignment of the load in bytes.  We assume that it is safe to load
+  // any legal integer up to this size without a problem.  For example, if we're
+  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+  // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
+  // to i16.
+  unsigned LoadAlign = LI->getAlignment();
+
+  int64_t MemLocEnd = MemLocOffs+MemLocSize;
+  
+  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+  if (LIOffs+LoadAlign < MemLocEnd) return 0;
+  
+  // This is the size of the load to try.  Start with the next larger power of
+  // two.
+  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
+  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+  
+  while (1) {
+    // If this load size is bigger than our known alignment or would not fit
+    // into a native integer register, then we fail.
+    if (NewLoadByteSize > LoadAlign ||
+        !TD.fitsInLegalInteger(NewLoadByteSize*8))
+      return 0;
+
+    // If a load of this width would include all of MemLoc, then we succeed.
+    if (LIOffs+NewLoadByteSize >= MemLocEnd)
+      return NewLoadByteSize;
+    
+    NewLoadByteSize <<= 1;
+  }
+  
+  return 0;
+}
+
+/// getPointerDependencyFrom - Return the instruction on which a memory
+/// location depends.  If isLoad is true, this routine ignores may-aliases with
+/// read-only operations.  If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.
+MemDepResult MemoryDependenceAnalysis::
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
+                         BasicBlock::iterator ScanIt, BasicBlock *BB) {
+
+  const Value *MemLocBase = 0;
+  int64_t MemLocOffset = 0;
+
+  unsigned Limit = BlockScanLimit;
+
+  // Walk backwards through the basic block, looking for dependencies.
+  while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases.
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
+    Instruction *Inst = --ScanIt;
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+      // Debug intrinsics don't (and can't) cause dependences.
+      if (isa<DbgInfoIntrinsic>(II)) continue;
+      
+      // If we reach a lifetime begin or end marker, then the query ends here
+      // because the value is undefined.
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+        // FIXME: This only considers queries directly on the invariant-tagged
+        // pointer, not on query pointers that are indexed off of them.  It'd
+        // be nice to handle that at some point (the right approach is to use
+        // GetPointerBaseWithConstantOffset).
+        if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
+                            MemLoc))
+          return MemDepResult::getDef(II);
+        continue;
+      }
+    }
+
+    // Values depend on loads if the pointers are must aliased.  This means that
+    // a load depends on another must aliased load from the same value.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
+      
+      // If we found a pointer, check if it could be the same as our pointer.
+      AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
+      
+      if (isLoad) {
+        if (R == AliasAnalysis::NoAlias) {
+          // If this is an over-aligned integer load (for example,
+          // "load i8* %P, align 4") see if it would obviously overlap with the
+          // queried location if widened to a larger load (e.g. if the queried
+          // location is 1 byte at P+1).  If so, return it as a load/load
+          // clobber result, allowing the client to decide to widen the load if
+          // it wants to.
+          if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+            if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+                isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
+                                                       MemLocOffset, LI, TD))
+              return MemDepResult::getClobber(Inst);
+          
+          continue;
+        }
+        
+        // Must aliased loads are defs of each other.
+        if (R == AliasAnalysis::MustAlias)
+          return MemDepResult::getDef(Inst);
+
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+      // in terms of clobbering loads, but since it does this by looking
+      // at the clobbering load directly, it doesn't know about any
+      // phi translation that may have happened along the way.
+
+        // If we have a partial alias, then return this as a clobber for the
+        // client to handle.
+        if (R == AliasAnalysis::PartialAlias)
+          return MemDepResult::getClobber(Inst);
+#endif
+        
+        // Random may-alias loads don't depend on each other without a
+        // dependence.
+        continue;
+      }
+
+      // Stores don't depend on other no-aliased accesses.
+      if (R == AliasAnalysis::NoAlias)
+        continue;
+
+      // Stores don't alias loads from read-only memory.
+      if (AA->pointsToConstantMemory(LoadLoc))
+        continue;
+
+      // Stores depend on may/must aliased loads.
+      return MemDepResult::getDef(Inst);
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      // If alias analysis can tell that this store is guaranteed to not modify
+      // the query pointer, ignore it.  Use getModRefInfo to handle cases where
+      // the query pointer points to constant memory etc.
+      if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+        continue;
+
+      // Ok, this store might clobber the query pointer.  Check to see if it is
+      // a must alias: in this case, we want to return this as a def.
+      AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
+      
+      // If we found a pointer, check if it could be the same as our pointer.
+      AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
+      
+      if (R == AliasAnalysis::NoAlias)
+        continue;
+      if (R == AliasAnalysis::MustAlias)
+        return MemDepResult::getDef(Inst);
+      return MemDepResult::getClobber(Inst);
+    }
+
+    // If this is an allocation, and if we know that the accessed pointer is to
+    // the allocation, return Def.  This means that there is no dependence and
+    // the access can be optimized based on that.  For example, a load could
+    // turn into undef.
+    // Note: Only determine this to be a malloc if Inst is the malloc call, not
+    // a subsequent bitcast of the malloc call result.  There can be stores to
+    // the malloced memory between the malloc call and its bitcast uses, and we
+    // need to continue scanning until the malloc call.
+    if (isa<AllocaInst>(Inst) ||
+        (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
+      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
+      
+      if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
+        return MemDepResult::getDef(Inst);
+      continue;
+    }
+
+    // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
+    switch (AA->getModRefInfo(Inst, MemLoc)) {
+    case AliasAnalysis::NoModRef:
+      // If the call has no effect on the queried pointer, just ignore it.
+      continue;
+    case AliasAnalysis::Mod:
+      return MemDepResult::getClobber(Inst);
+    case AliasAnalysis::Ref:
+      // If the call is known to never store to the pointer, and if this is a
+      // load query, we can safely ignore it (scan past it).
+      if (isLoad)
+        continue;
+    default:
+      // Otherwise, there is a potential dependence.  Return a clobber.
+      return MemDepResult::getClobber(Inst);
+    }
+  }
+  
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
+  if (BB != &BB->getParent()->getEntryBlock())
+    return MemDepResult::getNonLocal();
+  return MemDepResult::getUnknown();
+}
+
+/// getDependency - Return the instruction on which a memory operation
+/// depends.
+MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
+  Instruction *ScanPos = QueryInst;
+  
+  // Check for a cached result
+  MemDepResult &LocalCache = LocalDeps[QueryInst];
+  
+  // If the cached entry is non-dirty, just return it.  Note that this depends
+  // on MemDepResult's default constructing to 'dirty'.
+  if (!LocalCache.isDirty())
+    return LocalCache;
+    
+  // Otherwise, if we have a dirty entry, we know we can start the scan at that
+  // instruction, which may save us some work.
+  if (Instruction *Inst = LocalCache.getInst()) {
+    ScanPos = Inst;
+   
+    RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
+  }
+  
+  BasicBlock *QueryParent = QueryInst->getParent();
+  
+  // Do the scan.
+  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
+    // No dependence found.  If this is the entry block of the function, it is
+    // unknown, otherwise it is non-local.
+    if (QueryParent != &QueryParent->getParent()->getEntryBlock())
+      LocalCache = MemDepResult::getNonLocal();
+    else
+      LocalCache = MemDepResult::getUnknown();
+  } else {
+    AliasAnalysis::Location MemLoc;
+    AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+    if (MemLoc.Ptr) {
+      // If we can do a pointer scan, make it happen.
+      bool isLoad = !(MR & AliasAnalysis::Mod);
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
+
+      LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
+                                            QueryParent);
+    } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
+      CallSite QueryCS(QueryInst);
+      bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+      LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+                                             QueryParent);
+    } else
+      // Non-memory instruction.
+      LocalCache = MemDepResult::getUnknown();
+  }
+  
+  // Remember the result!
+  if (Instruction *I = LocalCache.getInst())
+    ReverseLocalDeps[I].insert(QueryInst);
+  
+  return LocalCache;
+}
+
+#ifndef NDEBUG
+/// AssertSorted - This method is used when -debug is specified to verify that
+/// cache arrays are properly kept sorted.
+static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         int Count = -1) {
+  if (Count == -1) Count = Cache.size();
+  if (Count == 0) return;
+
+  for (unsigned i = 1; i != unsigned(Count); ++i)
+    assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
+}
+#endif
+
+/// getNonLocalCallDependency - Perform a full dependency query for the
+/// specified call, returning the set of blocks that the value is
+/// potentially live across.  The returned set of results will include a
+/// "NonLocal" result for all blocks where the value is live across.
+///
+/// This method assumes the instruction returns a "NonLocal" dependency
+/// within its own block.
+///
+/// This returns a reference to an internal data structure that may be
+/// invalidated on the next non-local query or when an instruction is
+/// removed.  Clients must copy this data if they want it around longer than
+/// that.
+const MemoryDependenceAnalysis::NonLocalDepInfo &
+MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
+  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+ "getNonLocalCallDependency should only be used on calls with non-local deps!");
+  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+  NonLocalDepInfo &Cache = CacheP.first;
+
+  /// DirtyBlocks - This is the set of blocks that need to be recomputed.  In
+  /// the cached case, this can happen due to instructions being deleted etc. In
+  /// the uncached case, this starts out as the set of predecessors we care
+  /// about.
+  SmallVector<BasicBlock*, 32> DirtyBlocks;
+  
+  if (!Cache.empty()) {
+    // Okay, we have a cache entry.  If we know it is not dirty, just return it
+    // with no computation.
+    if (!CacheP.second) {
+      ++NumCacheNonLocal;
+      return Cache;
+    }
+    
+    // If we already have a partially computed set of results, scan them to
+    // determine what is dirty, seeding our initial DirtyBlocks worklist.
+    for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
+       I != E; ++I)
+      if (I->getResult().isDirty())
+        DirtyBlocks.push_back(I->getBB());
+    
+    // Sort the cache so that we can do fast binary search lookups below.
+    std::sort(Cache.begin(), Cache.end());
+    
+    ++NumCacheDirtyNonLocal;
+    //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
+    //     << Cache.size() << " cached: " << *QueryInst;
+  } else {
+    // Seed DirtyBlocks with each of the preds of QueryInst's block.
+    BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+    for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
+      DirtyBlocks.push_back(*PI);
+    ++NumUncacheNonLocal;
+  }
+  
+  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
+  bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
+
+  SmallPtrSet<BasicBlock*, 64> Visited;
+  
+  unsigned NumSortedEntries = Cache.size();
+  DEBUG(AssertSorted(Cache));
+  
+  // Iterate while we still have blocks to update.
+  while (!DirtyBlocks.empty()) {
+    BasicBlock *DirtyBB = DirtyBlocks.back();
+    DirtyBlocks.pop_back();
+    
+    // Already processed this block?
+    if (!Visited.insert(DirtyBB))
+      continue;
+    
+    // Do a binary search to see if we already have an entry for this block in
+    // the cache set.  If so, find it.
+    DEBUG(AssertSorted(Cache, NumSortedEntries));
+    NonLocalDepInfo::iterator Entry = 
+      std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
+                       NonLocalDepEntry(DirtyBB));
+    if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
+      --Entry;
+    
+    NonLocalDepEntry *ExistingResult = 0;
+    if (Entry != Cache.begin()+NumSortedEntries && 
+        Entry->getBB() == DirtyBB) {
+      // If we already have an entry, and if it isn't already dirty, the block
+      // is done.
+      if (!Entry->getResult().isDirty())
+        continue;
+      
+      // Otherwise, remember this slot so we can update the value.
+      ExistingResult = &*Entry;
+    }
+    
+    // If the dirty entry has a pointer, start scanning from it so we don't have
+    // to rescan the entire block.
+    BasicBlock::iterator ScanPos = DirtyBB->end();
+    if (ExistingResult) {
+      if (Instruction *Inst = ExistingResult->getResult().getInst()) {
+        ScanPos = Inst;
+        // We're removing QueryInst's use of Inst.
+        RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
+                             QueryCS.getInstruction());
+      }
+    }
+    
+    // Find out if this block has a local dependency for QueryInst.
+    MemDepResult Dep;
+    
+    if (ScanPos != DirtyBB->begin()) {
+      Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
+    } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
+      // No dependence found.  If this is the entry block of the function, it is
+      // a clobber, otherwise it is unknown.
+      Dep = MemDepResult::getNonLocal();
+    } else {
+      Dep = MemDepResult::getUnknown();
+    }
+    
+    // If we had a dirty entry for the block, update it.  Otherwise, just add
+    // a new entry.
+    if (ExistingResult)
+      ExistingResult->setResult(Dep);
+    else
+      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
+    
+    // If the block has a dependency (i.e. it isn't completely transparent to
+    // the value), remember the association!
+    if (!Dep.isNonLocal()) {
+      // Keep the ReverseNonLocalDeps map up to date so we can efficiently
+      // update this when we remove instructions.
+      if (Instruction *Inst = Dep.getInst())
+        ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+    } else {
+    
+      // If the block *is* completely transparent to the load, we need to check
+      // the predecessors of this block.  Add them to our worklist.
+      for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
+        DirtyBlocks.push_back(*PI);
+    }
+  }
+  
+  return Cache;
+}
+
+/// getNonLocalPointerDependency - Perform a full dependency query for an
+/// access to the specified (non-volatile) memory location, returning the
+/// set of instructions that either define or clobber the value.
+///
+/// This method assumes the pointer has a "NonLocal" dependency within its
+/// own block.
+///
+void MemoryDependenceAnalysis::
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+                             BasicBlock *FromBB,
+                             SmallVectorImpl<NonLocalDepResult> &Result) {
+  assert(Loc.Ptr->getType()->isPointerTy() &&
+         "Can't get pointer deps of a non-pointer!");
+  Result.clear();
+  
+  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
+  
+  // This is the set of blocks we've inspected, and the pointer we consider in
+  // each block.  Because of critical edges, we currently bail out if querying
+  // a block with multiple different pointers.  This can happen during PHI
+  // translation.
+  DenseMap<BasicBlock*, Value*> Visited;
+  if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+                                   Result, Visited, true))
+    return;
+  Result.clear();
+  Result.push_back(NonLocalDepResult(FromBB,
+                                     MemDepResult::getUnknown(),
+                                     const_cast<Value *>(Loc.Ptr)));
+}
+
+/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
+/// Pointer/PointeeSize using either cached information in Cache or by doing a
+/// lookup (which may use dirty cache info if available).  If we do a lookup,
+/// add the result to the cache.
+MemDepResult MemoryDependenceAnalysis::
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+                        bool isLoad, BasicBlock *BB,
+                        NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+  
+  // Do a binary search to see if we already have an entry for this block in
+  // the cache set.  If so, find it.
+  NonLocalDepInfo::iterator Entry =
+    std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
+                     NonLocalDepEntry(BB));
+  if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
+    --Entry;
+  
+  NonLocalDepEntry *ExistingResult = 0;
+  if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+    ExistingResult = &*Entry;
+  
+  // If we have a cached entry, and it is non-dirty, use it as the value for
+  // this dependency.
+  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
+    ++NumCacheNonLocalPtr;
+    return ExistingResult->getResult();
+  }    
+  
+  // Otherwise, we have to scan for the value.  If we have a dirty cache
+  // entry, start scanning from its position, otherwise we scan from the end
+  // of the block.
+  BasicBlock::iterator ScanPos = BB->end();
+  if (ExistingResult && ExistingResult->getResult().getInst()) {
+    assert(ExistingResult->getResult().getInst()->getParent() == BB &&
+           "Instruction invalidated?");
+    ++NumCacheDirtyNonLocalPtr;
+    ScanPos = ExistingResult->getResult().getInst();
+    
+    // Eliminating the dirty entry from 'Cache', so update the reverse info.
+    ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+    RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+  } else {
+    ++NumUncacheNonLocalPtr;
+  }
+  
+  // Scan the block for the dependency.
+  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+  
+  // If we had a dirty entry for the block, update it.  Otherwise, just add
+  // a new entry.
+  if (ExistingResult)
+    ExistingResult->setResult(Dep);
+  else
+    Cache->push_back(NonLocalDepEntry(BB, Dep));
+  
+  // If the block has a dependency (i.e. it isn't completely transparent to
+  // the value), remember the reverse association because we just added it
+  // to Cache!
+  if (Dep.isNonLocal() || Dep.isUnknown())
+    return Dep;
+  
+  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
+  // update MemDep when we remove instructions.
+  Instruction *Inst = Dep.getInst();
+  assert(Inst && "Didn't depend on anything?");
+  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
+  return Dep;
+}
+
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered.  This is
+/// optimized for the case when only a few entries are added.
+static void 
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         unsigned NumSortedEntries) {
+  switch (Cache.size() - NumSortedEntries) {
+  case 0:
+    // done, no new entries.
+    break;
+  case 2: {
+    // Two new entries, insert the last one into place.
+    NonLocalDepEntry Val = Cache.back();
+    Cache.pop_back();
+    MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+      std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+    Cache.insert(Entry, Val);
+    // FALL THROUGH.
+  }
+  case 1:
+    // One new entry, Just insert the new value at the appropriate position.
+    if (Cache.size() != 1) {
+      NonLocalDepEntry Val = Cache.back();
+      Cache.pop_back();
+      MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+        std::upper_bound(Cache.begin(), Cache.end(), Val);
+      Cache.insert(Entry, Val);
+    }
+    break;
+  default:
+    // Added many values, do a full scale sort.
+    std::sort(Cache.begin(), Cache.end());
+    break;
+  }
+}
+
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true).  In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason.  This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+                            const AliasAnalysis::Location &Loc,
+                            bool isLoad, BasicBlock *StartBB,
+                            SmallVectorImpl<NonLocalDepResult> &Result,
+                            DenseMap<BasicBlock*, Value*> &Visited,
+                            bool SkipFirstBlock) {
+  
+  // Look up the cached info for Pointer.
+  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
+
+  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+  // CacheKey, this value will be inserted as the associated value. Otherwise,
+  // it'll be ignored, and we'll have to check to see if the cached size and
+  // tbaa tag are consistent with the current query.
+  NonLocalPointerInfo InitialNLPI;
+  InitialNLPI.Size = Loc.Size;
+  InitialNLPI.TBAATag = Loc.TBAATag;
+
+  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+  // already have one.
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 
+    NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+  // If we already have a cache entry for this CacheKey, we may need to do some
+  // work to reconcile the cache entry and the current query.
+  if (!Pair.second) {
+    if (CacheInfo->Size < Loc.Size) {
+      // The query's Size is greater than the cached one. Throw out the
+      // cached data and procede with the query at the greater size.
+      CacheInfo->Pair = BBSkipFirstBlockPair();
+      CacheInfo->Size = Loc.Size;
+      for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+           DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+        if (Instruction *Inst = DI->getResult().getInst())
+          RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+      CacheInfo->NonLocalDeps.clear();
+    } else if (CacheInfo->Size > Loc.Size) {
+      // This query's Size is less than the cached one. Conservatively restart
+      // the query using the greater size.
+      return getNonLocalPointerDepFromBB(Pointer,
+                                         Loc.getWithNewSize(CacheInfo->Size),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
+
+    // If the query's TBAATag is inconsistent with the cached one,
+    // conservatively throw out the cached data and restart the query with
+    // no tag if needed.
+    if (CacheInfo->TBAATag != Loc.TBAATag) {
+      if (CacheInfo->TBAATag) {
+        CacheInfo->Pair = BBSkipFirstBlockPair();
+        CacheInfo->TBAATag = 0;
+        for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+             DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+          if (Instruction *Inst = DI->getResult().getInst())
+            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+        CacheInfo->NonLocalDeps.clear();
+      }
+      if (Loc.TBAATag)
+        return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+                                           isLoad, StartBB, Result, Visited,
+                                           SkipFirstBlock);
+    }
+  }
+
+  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
+
+  // If we have valid cached information for exactly the block we are
+  // investigating, just return it with no recomputation.
+  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+    // We have a fully cached result for this query then we can just return the
+    // cached results and populate the visited set.  However, we have to verify
+    // that we don't already have conflicting results for these blocks.  Check
+    // to ensure that if a block in the results set is in the visited set that
+    // it was for the same pointer query.
+    if (!Visited.empty()) {
+      for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+           I != E; ++I) {
+        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+        if (VI == Visited.end() || VI->second == Pointer.getAddr())
+          continue;
+        
+        // We have a pointer mismatch in a block.  Just return clobber, saying
+        // that something was clobbered in this result.  We could also do a
+        // non-fully cached query, but there is little point in doing this.
+        return true;
+      }
+    }
+    
+    Value *Addr = Pointer.getAddr();
+    for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+         I != E; ++I) {
+      Visited.insert(std::make_pair(I->getBB(), Addr));
+      if (!I->getResult().isNonLocal())
+        Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+    }
+    ++NumCacheCompleteNonLocalPtr;
+    return false;
+  }
+  
+  // Otherwise, either this is a new block, a block with an invalid cache
+  // pointer or one that we're about to invalidate by putting more info into it
+  // than its valid cache info.  If empty, the result will be valid cache info,
+  // otherwise it isn't.
+  if (Cache->empty())
+    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+  else
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+  
+  SmallVector<BasicBlock*, 32> Worklist;
+  Worklist.push_back(StartBB);
+  
+  // PredList used inside loop.
+  SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
+  // Keep track of the entries that we know are sorted.  Previously cached
+  // entries will all be sorted.  The entries we add we only sort on demand (we
+  // don't insert every element into its sorted position).  We know that we
+  // won't get any reuse from currently inserted values, because we don't
+  // revisit blocks after we insert info for them.
+  unsigned NumSortedEntries = Cache->size();
+  DEBUG(AssertSorted(*Cache));
+  
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+    
+    // Skip the first block if we have it.
+    if (!SkipFirstBlock) {
+      // Analyze the dependency of *Pointer in FromBB.  See if we already have
+      // been here.
+      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
+
+      // Get the dependency info for Pointer in BB.  If we have cached
+      // information, we will use it, otherwise we compute it.
+      DEBUG(AssertSorted(*Cache, NumSortedEntries));
+      MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+                                                 NumSortedEntries);
+      
+      // If we got a Def or Clobber, add this to the list of results.
+      if (!Dep.isNonLocal()) {
+        Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+        continue;
+      }
+    }
+    
+    // If 'Pointer' is an instruction defined in this block, then we need to do
+    // phi translation to change it into a value live in the predecessor block.
+    // If not, we just add the predecessors to the worklist and scan them with
+    // the same Pointer.
+    if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
+      SkipFirstBlock = false;
+      SmallVector<BasicBlock*, 16> NewBlocks;
+      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+        // Verify that we haven't looked at this block yet.
+        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+          InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
+        if (InsertRes.second) {
+          // First time we've looked at *PI.
+          NewBlocks.push_back(*PI);
+          continue;
+        }
+        
+        // If we have seen this block before, but it was with a different
+        // pointer then we have a phi translation failure and we have to treat
+        // this as a clobber.
+        if (InsertRes.first->second != Pointer.getAddr()) {
+          // Make sure to clean up the Visited map before continuing on to
+          // PredTranslationFailure.
+          for (unsigned i = 0; i < NewBlocks.size(); i++)
+            Visited.erase(NewBlocks[i]);
+          goto PredTranslationFailure;
+        }
+      }
+      Worklist.append(NewBlocks.begin(), NewBlocks.end());
+      continue;
+    }
+    
+    // We do need to do phi translation, if we know ahead of time we can't phi
+    // translate this value, don't even try.
+    if (!Pointer.IsPotentiallyPHITranslatable())
+      goto PredTranslationFailure;
+    
+    // We may have added values to the cache list before this PHI translation.
+    // If so, we haven't done anything to ensure that the cache remains sorted.
+    // Sort it now (if needed) so that recursive invocations of
+    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+    // value will only see properly sorted cache arrays.
+    if (Cache && NumSortedEntries != Cache->size()) {
+      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+      NumSortedEntries = Cache->size();
+    }
+    Cache = 0;
+
+    PredList.clear();
+    for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+      BasicBlock *Pred = *PI;
+      PredList.push_back(std::make_pair(Pred, Pointer));
+
+      // Get the PHI translated pointer in this predecessor.  This can fail if
+      // not translatable, in which case the getAddr() returns null.
+      PHITransAddr &PredPointer = PredList.back().second;
+      PredPointer.PHITranslateValue(BB, Pred, 0);
+
+      Value *PredPtrVal = PredPointer.getAddr();
+      
+      // Check to see if we have already visited this pred block with another
+      // pointer.  If so, we can't do this lookup.  This failure can occur
+      // with PHI translation when a critical edge exists and the PHI node in
+      // the successor translates to a pointer value different than the
+      // pointer the block was first analyzed with.
+      std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+        InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
+
+      if (!InsertRes.second) {
+        // We found the pred; take it off the list of preds to visit.
+        PredList.pop_back();
+
+        // If the predecessor was visited with PredPtr, then we already did
+        // the analysis and can ignore it.
+        if (InsertRes.first->second == PredPtrVal)
+          continue;
+        
+        // Otherwise, the block was previously analyzed with a different
+        // pointer.  We can't represent the result of this case, so we just
+        // treat this as a phi translation failure.
+
+        // Make sure to clean up the Visited map before continuing on to
+        // PredTranslationFailure.
+        for (unsigned i = 0; i < PredList.size(); i++)
+          Visited.erase(PredList[i].first);
+
+        goto PredTranslationFailure;
+      }
+    }
+
+    // Actually process results here; this need to be a separate loop to avoid
+    // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+    // any results for.  (getNonLocalPointerDepFromBB will modify our 
+    // datastructures in ways the code after the PredTranslationFailure label
+    // doesn't expect.)
+    for (unsigned i = 0; i < PredList.size(); i++) {
+      BasicBlock *Pred = PredList[i].first;
+      PHITransAddr &PredPointer = PredList[i].second;
+      Value *PredPtrVal = PredPointer.getAddr();
+
+      bool CanTranslate = true;
+      // If PHI translation was unable to find an available pointer in this
+      // predecessor, then we have to assume that the pointer is clobbered in
+      // that predecessor.  We can still do PRE of the load, which would insert
+      // a computation of the pointer in this predecessor.
+      if (PredPtrVal == 0)
+        CanTranslate = false;
+
+      // FIXME: it is entirely possible that PHI translating will end up with
+      // the same value.  Consider PHI translating something like:
+      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
+      // to recurse here, pedantically speaking.
+
+      // If getNonLocalPointerDepFromBB fails here, that means the cached
+      // result conflicted with the Visited list; we have to conservatively
+      // assume it is unknown, but this also does not block PRE of the load.
+      if (!CanTranslate ||
+          getNonLocalPointerDepFromBB(PredPointer,
+                                      Loc.getWithNewPtr(PredPtrVal),
+                                      isLoad, Pred,
+                                      Result, Visited)) {
+        // Add the entry to the Result list.
+        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
+        Result.push_back(Entry);
+
+        // Since we had a phi translation failure, the cache for CacheKey won't
+        // include all of the entries that we need to immediately satisfy future
+        // queries.  Mark this in NonLocalPointerDeps by setting the
+        // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
+        // cached value to do more work but not miss the phi trans failure.
+        NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+        NLPI.Pair = BBSkipFirstBlockPair();
+        continue;
+      }
+    }
+    
+    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+    CacheInfo = &NonLocalPointerDeps[CacheKey];
+    Cache = &CacheInfo->NonLocalDeps;
+    NumSortedEntries = Cache->size();
+    
+    // Since we did phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set"  Clear out the indicator for this.
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    SkipFirstBlock = false;
+    continue;
+
+  PredTranslationFailure:
+    // The following code is "failure"; we can't produce a sane translation
+    // for the given block.  It assumes that we haven't modified any of
+    // our datastructures while processing the current block.
+    
+    if (Cache == 0) {
+      // Refresh the CacheInfo/Cache pointer if it got invalidated.
+      CacheInfo = &NonLocalPointerDeps[CacheKey];
+      Cache = &CacheInfo->NonLocalDeps;
+      NumSortedEntries = Cache->size();
+    }
+    
+    // Since we failed phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set".  Clear out the indicator for this.
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    
+    // If *nothing* works, mark the pointer as unknown.
+    //
+    // If this is the magic first block, return this as a clobber of the whole
+    // incoming value.  Since we can't phi translate to one of the predecessors,
+    // we have to bail out.
+    if (SkipFirstBlock)
+      return true;
+    
+    for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+      assert(I != Cache->rend() && "Didn't find current block??");
+      if (I->getBB() != BB)
+        continue;
+      
+      assert(I->getResult().isNonLocal() &&
+             "Should only be here with transparent block");
+      I->setResult(MemDepResult::getUnknown());
+      Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
+                                         Pointer.getAddr()));
+      break;
+    }
+  }
+
+  // Okay, we're done now.  If we added new values to the cache, re-sort it.
+  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+  DEBUG(AssertSorted(*Cache));
+  return false;
+}
+
+/// RemoveCachedNonLocalPointerDependencies - If P exists in
+/// CachedNonLocalPointerInfo, remove it.
+void MemoryDependenceAnalysis::
+RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
+  CachedNonLocalPointerInfo::iterator It = 
+    NonLocalPointerDeps.find(P);
+  if (It == NonLocalPointerDeps.end()) return;
+  
+  // Remove all of the entries in the BB->val map.  This involves removing
+  // instructions from the reverse map.
+  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
+  
+  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
+    Instruction *Target = PInfo[i].getResult().getInst();
+    if (Target == 0) continue;  // Ignore non-local dep results.
+    assert(Target->getParent() == PInfo[i].getBB());
+    
+    // Eliminating the dirty entry from 'Cache', so update the reverse info.
+    RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
+  }
+  
+  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
+  NonLocalPointerDeps.erase(It);
+}
+
+
+/// invalidateCachedPointerInfo - This method is used to invalidate cached
+/// information about the specified pointer, because it may be too
+/// conservative in memdep.  This is an optional call that can be used when
+/// the client detects an equivalence between the pointer and some other
+/// value and replaces the other value with ptr. This can make Ptr available
+/// in more places that cached info does not necessarily keep.
+void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
+  // If Ptr isn't really a pointer, just ignore it.
+  if (!Ptr->getType()->isPointerTy()) return;
+  // Flush store info for the pointer.
+  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
+  // Flush load info for the pointer.
+  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+}
+
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+  PredCache->clear();
+}
+
+/// removeInstruction - Remove an instruction from the dependence analysis,
+/// updating the dependence of instructions that previously depended on it.
+/// This method attempts to keep the cache coherent using the reverse map.
+void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
+  // Walk through the Non-local dependencies, removing this one as the value
+  // for any cached queries.
+  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
+  if (NLDI != NonLocalDeps.end()) {
+    NonLocalDepInfo &BlockMap = NLDI->second.first;
+    for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
+         DI != DE; ++DI)
+      if (Instruction *Inst = DI->getResult().getInst())
+        RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
+    NonLocalDeps.erase(NLDI);
+  }
+
+  // If we have a cached local dependence query for this instruction, remove it.
+  //
+  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
+  if (LocalDepEntry != LocalDeps.end()) {
+    // Remove us from DepInst's reverse set now that the local dep info is gone.
+    if (Instruction *Inst = LocalDepEntry->second.getInst())
+      RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
+
+    // Remove this local dependency info.
+    LocalDeps.erase(LocalDepEntry);
+  }
+  
+  // If we have any cached pointer dependencies on this instruction, remove
+  // them.  If the instruction has non-pointer type, then it can't be a pointer
+  // base.
+  
+  // Remove it from both the load info and the store info.  The instruction
+  // can't be in either of these maps if it is non-pointer.
+  if (RemInst->getType()->isPointerTy()) {
+    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
+    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
+  }
+  
+  // Loop over all of the things that depend on the instruction we're removing.
+  // 
+  SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
+
+  // If we find RemInst as a clobber or Def in any of the maps for other values,
+  // we need to replace its entry with a dirty version of the instruction after
+  // it.  If RemInst is a terminator, we use a null dirty value.
+  //
+  // Using a dirty version of the instruction after RemInst saves having to scan
+  // the entire block to get to this point.
+  MemDepResult NewDirtyVal;
+  if (!RemInst->isTerminator())
+    NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+  
+  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
+  if (ReverseDepIt != ReverseLocalDeps.end()) {
+    SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
+    // RemInst can't be the terminator if it has local stuff depending on it.
+    assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+           "Nothing can locally depend on a terminator");
+    
+    for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
+         E = ReverseDeps.end(); I != E; ++I) {
+      Instruction *InstDependingOnRemInst = *I;
+      assert(InstDependingOnRemInst != RemInst &&
+             "Already removed our local dep info");
+                        
+      LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
+      
+      // Make sure to remember that new things depend on NewDepInst.
+      assert(NewDirtyVal.getInst() && "There is no way something else can have "
+             "a local dep on this if it is a terminator!");
+      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), 
+                                                InstDependingOnRemInst));
+    }
+    
+    ReverseLocalDeps.erase(ReverseDepIt);
+
+    // Add new reverse deps after scanning the set, to avoid invalidating the
+    // 'ReverseDeps' reference.
+    while (!ReverseDepsToAdd.empty()) {
+      ReverseLocalDeps[ReverseDepsToAdd.back().first]
+        .insert(ReverseDepsToAdd.back().second);
+      ReverseDepsToAdd.pop_back();
+    }
+  }
+  
+  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
+  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
+    SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
+    for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
+         I != E; ++I) {
+      assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+      
+      PerInstNLInfo &INLD = NonLocalDeps[*I];
+      // The information is now dirty!
+      INLD.second = true;
+      
+      for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 
+           DE = INLD.first.end(); DI != DE; ++DI) {
+        if (DI->getResult().getInst() != RemInst) continue;
+        
+        // Convert to a dirty entry for the subsequent instruction.
+        DI->setResult(NewDirtyVal);
+        
+        if (Instruction *NextI = NewDirtyVal.getInst())
+          ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+      }
+    }
+
+    ReverseNonLocalDeps.erase(ReverseDepIt);
+
+    // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
+    while (!ReverseDepsToAdd.empty()) {
+      ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
+        .insert(ReverseDepsToAdd.back().second);
+      ReverseDepsToAdd.pop_back();
+    }
+  }
+  
+  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
+  // value in the NonLocalPointerDeps info.
+  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
+    ReverseNonLocalPtrDeps.find(RemInst);
+  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
+    SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
+    SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
+    
+    for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
+         E = Set.end(); I != E; ++I) {
+      ValueIsLoadPair P = *I;
+      assert(P.getPointer() != RemInst &&
+             "Already removed NonLocalPointerDeps info for RemInst");
+      
+      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
+      
+      // The cache is not valid for any specific block anymore.
+      NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+      
+      // Update any entries for RemInst to use the instruction after it.
+      for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
+           DI != DE; ++DI) {
+        if (DI->getResult().getInst() != RemInst) continue;
+        
+        // Convert to a dirty entry for the subsequent instruction.
+        DI->setResult(NewDirtyVal);
+        
+        if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
+          ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
+      }
+      
+      // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
+      // subsequent value may invalidate the sortedness.
+      std::sort(NLPDI.begin(), NLPDI.end());
+    }
+    
+    ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
+    
+    while (!ReversePtrDepsToAdd.empty()) {
+      ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
+        .insert(ReversePtrDepsToAdd.back().second);
+      ReversePtrDepsToAdd.pop_back();
+    }
+  }
+  
+  
+  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
+  AA->deleteValue(RemInst);
+  DEBUG(verifyRemoved(RemInst));
+}
+/// verifyRemoved - Verify that the specified instruction does not occur
+/// in our internal data structures.
+void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+  for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
+       E = LocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    assert(I->second.getInst() != D &&
+           "Inst occurs in data structures");
+  }
+  
+  for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
+       E = NonLocalPointerDeps.end(); I != E; ++I) {
+    assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
+    const NonLocalDepInfo &Val = I->second.NonLocalDeps;
+    for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
+         II != E; ++II)
+      assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
+  }
+  
+  for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
+       E = NonLocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    const PerInstNLInfo &INLD = I->second;
+    for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
+         EE = INLD.first.end(); II  != EE; ++II)
+      assert(II->getResult().getInst() != D && "Inst occurs in data structures");
+  }
+  
+  for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
+       E = ReverseLocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+         EE = I->second.end(); II != EE; ++II)
+      assert(*II != D && "Inst occurs in data structures");
+  }
+  
+  for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
+       E = ReverseNonLocalDeps.end();
+       I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+         EE = I->second.end(); II != EE; ++II)
+      assert(*II != D && "Inst occurs in data structures");
+  }
+  
+  for (ReverseNonLocalPtrDepTy::const_iterator
+       I = ReverseNonLocalPtrDeps.begin(),
+       E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in rev NLPD map");
+    
+    for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
+         E = I->second.end(); II != E; ++II)
+      assert(*II != ValueIsLoadPair(D, false) &&
+             *II != ValueIsLoadPair(D, true) &&
+             "Inst occurs in ReverseNonLocalPtrDeps map");
+  }
+  
+}
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 000000000000..e7e999cebeb9
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,87 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+namespace {
+  class ModuleDebugInfoPrinter : public ModulePass {
+    DebugInfoFinder Finder;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ModuleDebugInfoPrinter() : ModulePass(ID) {
+      initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const;
+  };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+                "Decodes module-level debug info", false, true)
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+  return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+  Finder.processModule(M);
+  return false;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+  for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
+       E = Finder.compile_unit_end(); I != E; ++I) {
+    O << "Compile Unit: ";
+    DICompileUnit(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
+       E = Finder.subprogram_end(); I != E; ++I) {
+    O << "Subprogram: ";
+    DISubprogram(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
+       E = Finder.global_variable_end(); I != E; ++I) {
+    O << "GlobalVariable: ";
+    DIGlobalVariable(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.type_begin(),
+       E = Finder.type_end(); I != E; ++I) {
+    O << "Type: ";
+    DIType(*I).print(O);
+    O << '\n';
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
new file mode 100644
index 000000000000..101c2d5b0285
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply returns "I don't know" for all queries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+namespace {
+  /// NoAA - This class implements the -no-aa pass, which always returns "I
+  /// don't know" for alias queries.  NoAA is unlike other alias analysis
+  /// implementations, in that it does not chain to a previous analysis.  As
+  /// such it doesn't follow many of the rules that other alias analyses must.
+  ///
+  struct NoAA : public ImmutablePass, public AliasAnalysis {
+    static char ID; // Class identification, replacement for typeinfo
+    NoAA() : ImmutablePass(ID) {
+      initializeNoAAPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    }
+
+    virtual void initializePass() {
+      // Note: NoAA does not call InitializeAliasAnalysis because it's
+      // special and does not support chaining.
+      TD = getAnalysisIfAvailable<TargetData>();
+    }
+
+    virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+      return MayAlias;
+    }
+
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+      return UnknownModRefBehavior;
+    }
+    virtual ModRefBehavior getModRefBehavior(const Function *F) {
+      return UnknownModRefBehavior;
+    }
+
+    virtual bool pointsToConstantMemory(const Location &Loc,
+                                        bool OrLocal) {
+      return false;
+    }
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc) {
+      return ModRef;
+    }
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2) {
+      return ModRef;
+    }
+
+    virtual void deleteValue(Value *V) {}
+    virtual void copyValue(Value *From, Value *To) {}
+    virtual void addEscapingUse(Use &U) {}
+    
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *ID) {
+      if (ID == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+                   "No Alias Analysis (always returns 'may' alias)",
+                   true, true, true)
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 000000000000..70dcd0df242d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,442 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+  if (isa<PHINode>(Inst) ||
+      isa<GetElementPtrInst>(Inst))
+    return true;
+
+  if (isa<CastInst>(Inst) &&
+      Inst->isSafeToSpeculativelyExecute())
+    return true;
+
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1)))
+    return true;
+
+  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+  return false;
+}
+
+void PHITransAddr::dump() const {
+  if (Addr == 0) {
+    dbgs() << "PHITransAddr: null\n";
+    return;
+  }
+  dbgs() << "PHITransAddr: " << *Addr << "\n";
+  for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+    dbgs() << "  Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+
+
+static bool VerifySubExpr(Value *Expr,
+                          SmallVectorImpl<Instruction*> &InstInputs) {
+  // If this is a non-instruction value, there is nothing to do.
+  Instruction *I = dyn_cast<Instruction>(Expr);
+  if (I == 0) return true;
+
+  // If it's an instruction, it is either in Tmp or its operands recursively
+  // are.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return true;
+  }
+
+  // If it isn't in the InstInputs list it is a subexpr incorporated into the
+  // address.  Sanity check that it is phi translatable.
+  if (!CanPHITrans(I)) {
+    errs() << "Non phi translatable instruction found in PHITransAddr:\n";
+    errs() << *I << '\n';
+    llvm_unreachable("Either something is missing from InstInputs or "
+                     "CanPHITrans is wrong.");
+    return false;
+  }
+
+  // Validate the operands of the instruction.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!VerifySubExpr(I->getOperand(i), InstInputs))
+      return false;
+
+  return true;
+}
+
+/// Verify - Check internal consistency of this data structure.  If the
+/// structure is valid, it returns true.  If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+  if (Addr == 0) return true;
+
+  SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
+  if (!VerifySubExpr(Addr, Tmp))
+    return false;
+
+  if (!Tmp.empty()) {
+    errs() << "PHITransAddr contains extra instructions:\n";
+    for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+      errs() << "  InstInput #" << i << " is " << *InstInputs[i] << "\n";
+    llvm_unreachable("This is unexpected.");
+    return false;
+  }
+
+  // a-ok.
+  return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it.  This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+  // If the input value is not an instruction, or if it is not defined in CurBB,
+  // then we don't need to phi translate it.
+  Instruction *Inst = dyn_cast<Instruction>(Addr);
+  return Inst == 0 || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V,
+                             SmallVectorImpl<Instruction*> &InstInputs) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return;
+
+  // If the instruction is in the InstInputs list, remove it.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return;
+  }
+
+  assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+
+  // Otherwise, it must have instruction inputs itself.  Zap them recursively.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+      RemoveInstInputs(Op, InstInputs);
+  }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+                                         BasicBlock *PredBB,
+                                         const DominatorTree *DT) {
+  // If this is a non-instruction value, it can't require PHI translation.
+  Instruction *Inst = dyn_cast<Instruction>(V);
+  if (Inst == 0) return V;
+
+  // Determine whether 'Inst' is an input to our PHI translatable expression.
+  bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+
+  // Handle inputs instructions if needed.
+  if (isInput) {
+    if (Inst->getParent() != CurBB) {
+      // If it is an input defined in a different block, then it remains an
+      // input.
+      return Inst;
+    }
+
+    // If 'Inst' is defined in this block and is an input that needs to be phi
+    // translated, we need to incorporate the value into the expression or fail.
+
+    // In either case, the instruction itself isn't an input any longer.
+    InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+
+    // If this is a PHI, go ahead and translate it.
+    if (PHINode *PN = dyn_cast<PHINode>(Inst))
+      return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+
+    // If this is a non-phi value, and it is analyzable, we can incorporate it
+    // into the expression by making all instruction operands be inputs.
+    if (!CanPHITrans(Inst))
+      return 0;
+
+    // All instruction operands are now inputs (and of course, they may also be
+    // defined in this block, so they may need to be phi translated themselves.
+    for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+      if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+        InstInputs.push_back(Op);
+  }
+
+  // Ok, it must be an intermediate result (either because it started that way
+  // or because we just incorporated it into the expression).  See if its
+  // operands need to be phi translated, and if so, reconstruct it.
+
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
+    if (PHIIn == 0) return 0;
+    if (PHIIn == Cast->getOperand(0))
+      return Cast;
+
+    // Find an available version of this cast.
+
+    // Constants are trivial to find.
+    if (Constant *C = dyn_cast<Constant>(PHIIn))
+      return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+                                              C, Cast->getType()));
+
+    // Otherwise we have to see if a casted version of the incoming pointer
+    // is available.  If so, we can use it, otherwise we have to fail.
+    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+         UI != E; ++UI) {
+      if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+        if (CastI->getOpcode() == Cast->getOpcode() &&
+            CastI->getType() == Cast->getType() &&
+            (!DT || DT->dominates(CastI->getParent(), PredBB)))
+          return CastI;
+    }
+    return 0;
+  }
+
+  // Handle getelementptr with at least one PHI translatable operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    bool AnyChanged = false;
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
+      if (GEPOp == 0) return 0;
+
+      AnyChanged |= GEPOp != GEP->getOperand(i);
+      GEPOps.push_back(GEPOp);
+    }
+
+    if (!AnyChanged)
+      return GEP;
+
+    // Simplify the GEP to handle 'gep x, 0' -> x etc.
+    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) {
+      for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+        RemoveInstInputs(GEPOps[i], InstInputs);
+
+      return AddAsInput(V);
+    }
+
+    // Scan to see if we have this GEP available.
+    Value *APHIOp = GEPOps[0];
+    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+         UI != E; ++UI) {
+      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+        if (GEPI->getType() == GEP->getType() &&
+            GEPI->getNumOperands() == GEPOps.size() &&
+            GEPI->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
+          bool Mismatch = false;
+          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+            if (GEPI->getOperand(i) != GEPOps[i]) {
+              Mismatch = true;
+              break;
+            }
+          if (!Mismatch)
+            return GEPI;
+        }
+    }
+    return 0;
+  }
+
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+
+    Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
+    if (LHS == 0) return 0;
+
+    // If the PHI translated LHS is an add of a constant, fold the immediates.
+    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+      if (BOp->getOpcode() == Instruction::Add)
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+          LHS = BOp->getOperand(0);
+          RHS = ConstantExpr::getAdd(RHS, CI);
+          isNSW = isNUW = false;
+
+          // If the old 'LHS' was an input, add the new 'LHS' as an input.
+          if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+            RemoveInstInputs(BOp, InstInputs);
+            AddAsInput(LHS);
+          }
+        }
+
+    // See if the add simplifies away.
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+      // If we simplified the operands, the LHS is no longer an input, but Res
+      // is.
+      RemoveInstInputs(LHS, InstInputs);
+      return AddAsInput(Res);
+    }
+
+    // If we didn't modify the add, just return it.
+    if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+      return Inst;
+
+    // Otherwise, see if we have this add available somewhere.
+    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+         UI != E; ++UI) {
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+        if (BO->getOpcode() == Instruction::Add &&
+            BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+            BO->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(BO->getParent(), PredBB)))
+          return BO;
+    }
+
+    return 0;
+  }
+
+  // Otherwise, we failed.
+  return 0;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state to reflect any needed changes.  If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB.  This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+                                     const DominatorTree *DT) {
+  assert(Verify() && "Invalid PHITransAddr!");
+  Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+  assert(Verify() && "Invalid PHITransAddr!");
+
+  if (DT) {
+    // Make sure the value is live in the predecessor.
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+      if (!DT->dominates(Inst->getParent(), PredBB))
+        Addr = 0;
+  }
+
+  return Addr == 0;
+}
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list.  This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+                          const DominatorTree &DT,
+                          SmallVectorImpl<Instruction*> &NewInsts) {
+  unsigned NISize = NewInsts.size();
+
+  // Attempt to PHI translate with insertion.
+  Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+
+  // If successful, return the new value.
+  if (Addr) return Addr;
+
+  // If not, destroy any intermediate instructions inserted.
+  while (NewInsts.size() != NISize)
+    NewInsts.pop_back_val()->eraseFromParent();
+  return 0;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block.  All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+                           BasicBlock *PredBB, const DominatorTree &DT,
+                           SmallVectorImpl<Instruction*> &NewInsts) {
+  // See if we have a version of this value already available and dominating
+  // PredBB.  If so, there is no need to insert a new instance of it.
+  PHITransAddr Tmp(InVal, TD);
+  if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+    return Tmp.getAddr();
+
+  // If we don't have an available version of this value, it must be an
+  // instruction.
+  Instruction *Inst = cast<Instruction>(InVal);
+
+  // Handle cast of PHI translatable value.
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (OpVal == 0) return 0;
+
+    // Otherwise insert a cast at the end of PredBB.
+    CastInst *New = CastInst::Create(Cast->getOpcode(),
+                                     OpVal, InVal->getType(),
+                                     InVal->getName()+".phi.trans.insert",
+                                     PredBB->getTerminator());
+    NewInsts.push_back(New);
+    return New;
+  }
+
+  // Handle getelementptr with at least one PHI operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    BasicBlock *CurBB = GEP->getParent();
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+                                                CurBB, PredBB, DT, NewInsts);
+      if (OpVal == 0) return 0;
+      GEPOps.push_back(OpVal);
+    }
+
+    GetElementPtrInst *Result =
+    GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
+                              InVal->getName()+".phi.trans.insert",
+                              PredBB->getTerminator());
+    Result->setIsInBounds(GEP->isInBounds());
+    NewInsts.push_back(Result);
+    return Result;
+  }
+
+#if 0
+  // FIXME: This code works, but it is unclear that we actually want to insert
+  // a big chain of computation in order to make a value available in a block.
+  // This needs to be evaluated carefully to consider its cost trade offs.
+
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (OpVal == 0) return 0;
+
+    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+                                           InVal->getName()+".phi.trans.insert",
+                                                    PredBB->getTerminator());
+    Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+    NewInsts.push_back(Res);
+    return Res;
+  }
+#endif
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp
new file mode 100644
index 000000000000..7c584daef734
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp
@@ -0,0 +1,522 @@
+//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96].  For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner.  As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ball-larus-numbering"
+
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+#include <stack>
+#include <string>
+#include <utility>
+#include <sstream>
+
+using namespace llvm;
+
+// Are we enabling early termination
+static cl::opt<bool> ProcessEarlyTermination(
+  "path-profile-early-termination", cl::Hidden,
+  cl::desc("In path profiling, insert extra instrumentation to account for "
+           "unexpected function termination."));
+
+// Returns the basic block for the BallLarusNode
+BasicBlock* BallLarusNode::getBlock() {
+  return(_basicBlock);
+}
+
+// Returns the number of paths to the exit starting at the node.
+unsigned BallLarusNode::getNumberPaths() {
+  return(_numberPaths);
+}
+
+// Sets the number of paths to the exit starting at the node.
+void BallLarusNode::setNumberPaths(unsigned numberPaths) {
+  _numberPaths = numberPaths;
+}
+
+// Gets the NodeColor used in graph algorithms.
+BallLarusNode::NodeColor BallLarusNode::getColor() {
+  return(_color);
+}
+
+// Sets the NodeColor used in graph algorithms.
+void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
+  _color = color;
+}
+
+// Returns an iterator over predecessor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::predBegin() {
+  return(_predEdges.begin());
+}
+
+// Returns the end sentinel for the predecessor iterator.
+BLEdgeIterator BallLarusNode::predEnd() {
+  return(_predEdges.end());
+}
+
+// Returns the number of predecessor edges.  Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberPredEdges() {
+  return(_predEdges.size());
+}
+
+// Returns an iterator over successor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::succBegin() {
+  return(_succEdges.begin());
+}
+
+// Returns the end sentinel for the successor iterator.
+BLEdgeIterator BallLarusNode::succEnd() {
+  return(_succEdges.end());
+}
+
+// Returns the number of successor edges.  Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberSuccEdges() {
+  return(_succEdges.size());
+}
+
+// Add an edge to the predecessor list.
+void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
+  _predEdges.push_back(edge);
+}
+
+// Remove an edge from the predecessor list.
+void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
+  removeEdge(_predEdges, edge);
+}
+
+// Add an edge to the successor list.
+void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
+  _succEdges.push_back(edge);
+}
+
+// Remove an edge from the successor list.
+void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
+  removeEdge(_succEdges, edge);
+}
+
+// Returns the name of the BasicBlock being represented.  If BasicBlock
+// is null then returns "<null>".  If BasicBlock has no name, then
+// "<unnamed>" is returned.  Intended for use with debug output.
+std::string BallLarusNode::getName() {
+  std::stringstream name;
+
+  if(getBlock() != NULL) {
+    if(getBlock()->hasName()) {
+      std::string tempName(getBlock()->getName());
+      name << tempName.c_str() << " (" << _uid << ")";
+    } else
+      name << "<unnamed> (" << _uid << ")";
+  } else
+    name << "<null> (" << _uid << ")";
+
+  return name.str();
+}
+
+// Removes an edge from an edgeVector.  Used by removePredEdge and
+// removeSuccEdge.
+void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
+  // TODO: Avoid linear scan by using a set instead
+  for(BLEdgeIterator i = v.begin(),
+        end = v.end();
+      i != end;
+      ++i) {
+    if((*i) == e) {
+      v.erase(i);
+      break;
+    }
+  }
+}
+
+// Returns the source node of this edge.
+BallLarusNode* BallLarusEdge::getSource() const {
+  return(_source);
+}
+
+// Returns the target node of this edge.
+BallLarusNode* BallLarusEdge::getTarget() const {
+  return(_target);
+}
+
+// Sets the type of the edge.
+BallLarusEdge::EdgeType BallLarusEdge::getType() const {
+  return _edgeType;
+}
+
+// Gets the type of the edge.
+void BallLarusEdge::setType(EdgeType type) {
+  _edgeType = type;
+}
+
+// Returns the weight of this edge.  Used to decode path numbers to sequences
+// of basic blocks.
+unsigned BallLarusEdge::getWeight() {
+  return(_weight);
+}
+
+// Sets the weight of the edge.  Used during path numbering.
+void BallLarusEdge::setWeight(unsigned weight) {
+  _weight = weight;
+}
+
+// Gets the phony edge originating at the root.
+BallLarusEdge* BallLarusEdge::getPhonyRoot() {
+  return _phonyRoot;
+}
+
+// Sets the phony edge originating at the root.
+void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
+  _phonyRoot = phonyRoot;
+}
+
+// Gets the phony edge terminating at the exit.
+BallLarusEdge* BallLarusEdge::getPhonyExit() {
+  return _phonyExit;
+}
+
+// Sets the phony edge terminating at the exit.
+void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
+  _phonyExit = phonyExit;
+}
+
+// Gets the associated real edge if this is a phony edge.
+BallLarusEdge* BallLarusEdge::getRealEdge() {
+  return _realEdge;
+}
+
+// Sets the associated real edge if this is a phony edge.
+void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
+  _realEdge = realEdge;
+}
+
+// Returns the duplicate number of the edge.
+unsigned BallLarusEdge::getDuplicateNumber() {
+  return(_duplicateNumber);
+}
+
+// Initialization that requires virtual functions which are not fully
+// functional in the constructor.
+void BallLarusDag::init() {
+  BLBlockNodeMap inDag;
+  std::stack<BallLarusNode*> dfsStack;
+
+  _root = addNode(&(_function.getEntryBlock()));
+  _exit = addNode(NULL);
+
+  // start search from root
+  dfsStack.push(getRoot());
+
+  // dfs to add each bb into the dag
+  while(dfsStack.size())
+    buildNode(inDag, dfsStack);
+
+  // put in the final edge
+  addEdge(getExit(),getRoot(),0);
+}
+
+// Frees all memory associated with the DAG.
+BallLarusDag::~BallLarusDag() {
+  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
+      ++edge)
+    delete (*edge);
+
+  for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
+      ++node)
+    delete (*node);
+}
+
+// Calculate the path numbers by assigning edge increments as prescribed
+// in Ball-Larus path profiling.
+void BallLarusDag::calculatePathNumbers() {
+  BallLarusNode* node;
+  std::queue<BallLarusNode*> bfsQueue;
+  bfsQueue.push(getExit());
+
+  while(bfsQueue.size() > 0) {
+    node = bfsQueue.front();
+
+    DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
+
+    bfsQueue.pop();
+    unsigned prevPathNumber = node->getNumberPaths();
+    calculatePathNumbersFrom(node);
+
+    // Check for DAG splitting
+    if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
+      // Add new phony edge from the split-node to the DAG's exit
+      BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
+      exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+
+      // Counters to handle the possibility of a multi-graph
+      BasicBlock* oldTarget = 0;
+      unsigned duplicateNumber = 0;
+
+      // Iterate through each successor edge, adding phony edges
+      for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+           succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
+
+        if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
+          // is this edge a duplicate?
+          if( oldTarget != (*succ)->getTarget()->getBlock() )
+            duplicateNumber = 0;
+
+          // create the new phony edge: root -> succ
+          BallLarusEdge* rootEdge =
+            addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
+          rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+          rootEdge->setRealEdge(*succ);
+
+          // split on this edge and reference it's exit/root phony edges
+          (*succ)->setType(BallLarusEdge::SPLITEDGE);
+          (*succ)->setPhonyRoot(rootEdge);
+          (*succ)->setPhonyExit(exitEdge);
+          (*succ)->setWeight(0);
+        }
+      }
+
+      calculatePathNumbersFrom(node);
+    }
+
+    DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
+          << node->getNumberPaths() << ".\n");
+
+    if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
+      DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
+      for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
+          pred != end; pred++) {
+        if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
+            (*pred)->getType() == BallLarusEdge::SPLITEDGE )
+          continue;
+
+        BallLarusNode* nextNode = (*pred)->getSource();
+        // not yet visited?
+        if(nextNode->getNumberPaths() == 0)
+          bfsQueue.push(nextNode);
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
+}
+
+// Returns the number of paths for the Dag.
+unsigned BallLarusDag::getNumberOfPaths() {
+  return(getRoot()->getNumberPaths());
+}
+
+// Returns the root (i.e. entry) node for the DAG.
+BallLarusNode* BallLarusDag::getRoot() {
+  return _root;
+}
+
+// Returns the exit node for the DAG.
+BallLarusNode* BallLarusDag::getExit() {
+  return _exit;
+}
+
+// Returns the function for the DAG.
+Function& BallLarusDag::getFunction() {
+  return(_function);
+}
+
+// Clears the node colors.
+void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
+  for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
+    (*nodeIt)->setColor(color);
+}
+
+// Processes one node and its imediate edges for building the DAG.
+void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
+  BallLarusNode* currentNode = dfsStack.top();
+  BasicBlock* currentBlock = currentNode->getBlock();
+
+  if(currentNode->getColor() != BallLarusNode::WHITE) {
+    // we have already visited this node
+    dfsStack.pop();
+    currentNode->setColor(BallLarusNode::BLACK);
+  } else {
+    // are there any external procedure calls?
+    if( ProcessEarlyTermination ) {
+      for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
+             bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
+           bbCurrent++ ) {
+        Instruction& instr = *bbCurrent;
+        if( instr.getOpcode() == Instruction::Call ) {
+          BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
+          callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
+          break;
+        }
+      }
+    }
+
+    TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
+    if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
+       || isa<UnwindInst>(terminator))
+      addEdge(currentNode, getExit(),0);
+
+    currentNode->setColor(BallLarusNode::GRAY);
+    inDag[currentBlock] = currentNode;
+
+    BasicBlock* oldSuccessor = 0;
+    unsigned duplicateNumber = 0;
+
+    // iterate through this node's successors
+    for(succ_iterator successor = succ_begin(currentBlock),
+          succEnd = succ_end(currentBlock); successor != succEnd;
+        oldSuccessor = *successor, ++successor ) {
+      BasicBlock* succBB = *successor;
+
+      // is this edge a duplicate?
+      if (oldSuccessor == succBB)
+        duplicateNumber++;
+      else
+        duplicateNumber = 0;
+
+      buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
+    }
+  }
+}
+
+// Process an edge in the CFG for DAG building.
+void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
+                             dfsStack, BallLarusNode* currentNode,
+                             BasicBlock* succBB, unsigned duplicateCount) {
+  BallLarusNode* succNode = inDag[succBB];
+
+  if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
+    // visited node and forward edge
+    addEdge(currentNode, succNode, duplicateCount);
+  } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
+    // visited node and back edge
+    DEBUG(dbgs() << "Backedge detected.\n");
+    addBackedge(currentNode, succNode, duplicateCount);
+  } else {
+    BallLarusNode* childNode;
+    // not visited node and forward edge
+    if(succNode) // an unvisited node that is child of a gray node
+      childNode = succNode;
+    else { // an unvisited node that is a child of a an unvisted node
+      childNode = addNode(succBB);
+      inDag[succBB] = childNode;
+    }
+    addEdge(currentNode, childNode, duplicateCount);
+    dfsStack.push(childNode);
+  }
+}
+
+// The weight on each edge is the increment required along any path that
+// contains that edge.
+void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
+  if(node == getExit())
+    // The Exit node must be base case
+    node->setNumberPaths(1);
+  else {
+    unsigned sumPaths = 0;
+    BallLarusNode* succNode;
+
+    for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+        succ != end; succ++) {
+      if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
+          (*succ)->getType() == BallLarusEdge::SPLITEDGE )
+        continue;
+
+      (*succ)->setWeight(sumPaths);
+      succNode = (*succ)->getTarget();
+
+      if( !succNode->getNumberPaths() )
+        return;
+      sumPaths += succNode->getNumberPaths();
+    }
+
+    node->setNumberPaths(sumPaths);
+  }
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
+  return( new BallLarusNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
+                                        BallLarusNode* target,
+                                        unsigned duplicateCount) {
+  return( new BallLarusEdge(source, target, duplicateCount) );
+}
+
+// Proxy to node's constructor.  Updates the DAG state.
+BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
+  BallLarusNode* newNode = createNode(BB);
+  _nodes.push_back(newNode);
+  return( newNode );
+}
+
+// Proxy to edge's constructor. Updates the DAG state.
+BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
+                                     BallLarusNode* target,
+                                     unsigned duplicateCount) {
+  BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
+  _edges.push_back(newEdge);
+  source->addSuccEdge(newEdge);
+  target->addPredEdge(newEdge);
+  return(newEdge);
+}
+
+// Adds a backedge with its phony edges. Updates the DAG state.
+void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
+                               unsigned duplicateCount) {
+  BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
+  childEdge->setType(BallLarusEdge::BACKEDGE);
+
+  childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
+  childEdge->setPhonyExit(addEdge(source, getExit(),0));
+
+  childEdge->getPhonyRoot()->setRealEdge(childEdge);
+  childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
+
+  childEdge->getPhonyExit()->setRealEdge(childEdge);
+  childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
+  _backEdges.push_back(childEdge);
+}
diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
new file mode 100644
index 000000000000..b361d3f4fa94
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
@@ -0,0 +1,434 @@
+//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface used by optimizers to load path profiles,
+// and provides a loader pass which reads a path profile file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-info"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+// command line option for loading path profiles
+static cl::opt<std::string>
+PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
+  cl::value_desc("filename"),
+  cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
+
+namespace {
+  class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
+  public:
+    PathProfileLoaderPass() : ModulePass(ID) { }
+    ~PathProfileLoaderPass();
+
+    // this pass doesn't change anything (only loads information)
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    // the full name of the loader pass
+    virtual const char* getPassName() const {
+      return "Path Profiling Information Loader";
+    }
+
+    // required since this pass implements multiple inheritance
+                virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &PathProfileInfo::ID)
+        return (PathProfileInfo*)this;
+      return this;
+    }
+
+    // entry point to run the pass
+    bool runOnModule(Module &M);
+
+    // pass identification
+    static char ID;
+
+  private:
+    // make a reference table to refer to function by number
+    void buildFunctionRefs(Module &M);
+
+    // process argument info of a program from the input file
+    void handleArgumentInfo();
+
+    // process path number information from the input file
+    void handlePathInfo();
+
+    // array of references to the functions in the module
+    std::vector<Function*> _functions;
+
+    // path profile file handle
+    FILE* _file;
+
+    // path profile file name
+    std::string _filename;
+  };
+}
+
+// register PathLoader
+char PathProfileLoaderPass::ID = 0;
+
+INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
+                          NoPathProfileInfo)
+INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
+                   "path-profile-loader",
+                   "Load path profile information from file",
+                   false, true, false)
+
+char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
+
+// link PathLoader as a pass, and make it available as an optimisation
+ModulePass *llvm::createPathProfileLoaderPass() {
+  return new PathProfileLoaderPass;
+}
+
+// ----------------------------------------------------------------------------
+// PathEdge implementation
+//
+ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
+                                  unsigned duplicateNumber)
+  : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
+
+// ----------------------------------------------------------------------------
+// Path implementation
+//
+
+ProfilePath::ProfilePath (unsigned int number, unsigned int count,
+                          double countStdDev,   PathProfileInfo* ppi)
+  : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
+
+double ProfilePath::getFrequency() const {
+  return 100 * double(_count) /
+    double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
+}
+
+static BallLarusEdge* getNextEdge (BallLarusNode* node,
+                                   unsigned int pathNumber) {
+  BallLarusEdge* best = 0;
+
+  for( BLEdgeIterator next = node->succBegin(),
+         end = node->succEnd(); next != end; next++ ) {
+    if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
+        (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
+        (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
+        (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
+      best = *next;
+  }
+
+  return best;
+}
+
+ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
+  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+  unsigned int increment = _number;
+  ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
+
+  while (currentNode != _ppi->_currentDag->getExit()) {
+    BallLarusEdge* next = getNextEdge(currentNode, increment);
+
+    increment -= next->getWeight();
+
+    if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
+        next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
+        next->getTarget() != _ppi->_currentDag->getExit() )
+      pev->push_back(ProfilePathEdge(
+                       next->getSource()->getBlock(),
+                       next->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
+        next->getTarget() == _ppi->_currentDag->getExit() )
+      pev->push_back(ProfilePathEdge(
+                       next->getRealEdge()->getSource()->getBlock(),
+                       next->getRealEdge()->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
+        next->getSource() == _ppi->_currentDag->getRoot() )
+      pev->push_back(ProfilePathEdge(
+                       next->getRealEdge()->getSource()->getBlock(),
+                       next->getRealEdge()->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    // set the new node
+    currentNode = next->getTarget();
+  }
+
+  return pev;
+}
+
+ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
+  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+  unsigned int increment = _number;
+  ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
+
+  while (currentNode != _ppi->_currentDag->getExit()) {
+    BallLarusEdge* next = getNextEdge(currentNode, increment);
+    increment -= next->getWeight();
+
+    // add block to the block list if it is a real edge
+    if( next->getType() == BallLarusEdge::NORMAL)
+      pbv->push_back (currentNode->getBlock());
+    // make the back edge the last edge since we are at the end
+    else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
+      pbv->push_back (currentNode->getBlock());
+      pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
+    }
+
+    // set the new node
+    currentNode = next->getTarget();
+  }
+
+  return pbv;
+}
+
+BasicBlock* ProfilePath::getFirstBlockInPath() const {
+  BallLarusNode* root = _ppi->_currentDag->getRoot();
+  BallLarusEdge* edge = getNextEdge(root, _number);
+
+  if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+               edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
+    return edge->getTarget()->getBlock();
+
+  return root->getBlock();
+}
+
+// ----------------------------------------------------------------------------
+// PathProfileInfo implementation
+//
+
+// Pass identification
+char llvm::PathProfileInfo::ID = 0;
+
+PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
+}
+
+PathProfileInfo::~PathProfileInfo() {
+  if (_currentDag)
+    delete _currentDag;
+}
+
+// set the function for which paths are currently begin processed
+void PathProfileInfo::setCurrentFunction(Function* F) {
+  // Make sure it exists
+  if (!F) return;
+
+  if (_currentDag)
+    delete _currentDag;
+
+  _currentFunction = F;
+  _currentDag = new BallLarusDag(*F);
+  _currentDag->init();
+  _currentDag->calculatePathNumbers();
+}
+
+// get the function for which paths are currently being processed
+Function* PathProfileInfo::getCurrentFunction() const {
+  return _currentFunction;
+}
+
+// get the entry block of the function
+BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
+  return _currentDag->getRoot()->getBlock();
+}
+
+// return the path based on its number
+ProfilePath* PathProfileInfo::getPath(unsigned int number) {
+  return _functionPaths[_currentFunction][number];
+}
+
+// return the number of paths which a function may potentially execute
+unsigned int PathProfileInfo::getPotentialPathCount() {
+  return _currentDag ? _currentDag->getNumberOfPaths() : 0;
+}
+
+// return an iterator for the beginning of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathBegin() {
+  return _functionPaths[_currentFunction].begin();
+}
+
+// return an iterator for the end of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathEnd() {
+  return _functionPaths[_currentFunction].end();
+}
+
+// returns the total number of paths run in the function
+unsigned int PathProfileInfo::pathsRun() {
+  return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
+}
+
+// ----------------------------------------------------------------------------
+// PathLoader implementation
+//
+
+// remove all generated paths
+PathProfileLoaderPass::~PathProfileLoaderPass() {
+  for( FunctionPathIterator funcNext = _functionPaths.begin(),
+         funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
+    for( ProfilePathIterator pathNext = funcNext->second.begin(),
+           pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
+      delete pathNext->second;
+}
+
+// entry point of the pass; this loads and parses a file
+bool PathProfileLoaderPass::runOnModule(Module &M) {
+  // get the filename and setup the module's function references
+  _filename = PathProfileInfoFilename;
+  buildFunctionRefs (M);
+
+  if (!(_file = fopen(_filename.c_str(), "rb"))) {
+    errs () << "error: input '" << _filename << "' file does not exist.\n";
+    return false;
+  }
+
+  ProfilingType profType;
+
+  while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
+    switch (profType) {
+    case ArgumentInfo:
+      handleArgumentInfo ();
+      break;
+    case PathInfo:
+      handlePathInfo ();
+      break;
+    default:
+      errs () << "error: bad path profiling file syntax, " << profType << "\n";
+      fclose (_file);
+      return false;
+    }
+  }
+
+  fclose (_file);
+
+  return true;
+}
+
+// create a reference table for functions defined in the path profile file
+void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
+  _functions.push_back(0); // make the 0 index a null pointer
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+    if (F->isDeclaration())
+      continue;
+    _functions.push_back(F);
+  }
+}
+
+// handle command like argument infor in the output file
+void PathProfileLoaderPass::handleArgumentInfo() {
+  // get the argument list's length
+  unsigned savedArgsLength;
+  if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
+    errs() << "warning: argument info header/data mismatch\n";
+    return;
+  }
+
+  // allocate a buffer, and get the arguments
+  char* args = new char[savedArgsLength+1];
+  if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
+    errs() << "warning: argument info header/data mismatch\n";
+
+  args[savedArgsLength] = '\0';
+  argList = std::string(args);
+  delete [] args; // cleanup dynamic string
+
+  // byte alignment
+  if (savedArgsLength & 3)
+    fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
+}
+
+// Handle path profile information in the output file
+void PathProfileLoaderPass::handlePathInfo () {
+  // get the number of functions in this profile
+  unsigned functionCount;
+  if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
+    errs() << "warning: path info header/data mismatch\n";
+    return;
+  }
+
+  // gather path information for each function
+  for (unsigned i = 0; i < functionCount; i++) {
+    PathProfileHeader pathHeader;
+    if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
+      errs() << "warning: bad header for path function info\n";
+      break;
+    }
+
+    Function* f = _functions[pathHeader.fnNumber];
+
+    // dynamically allocate a table to store path numbers
+    PathProfileTableEntry* pathTable =
+      new PathProfileTableEntry[pathHeader.numEntries];
+
+    if( fread(pathTable, sizeof(PathProfileTableEntry),
+              pathHeader.numEntries, _file) != pathHeader.numEntries) {
+      delete [] pathTable;
+      errs() << "warning: path function info header/data mismatch\n";
+      return;
+    }
+
+    // Build a new path for the current function
+    unsigned int totalPaths = 0;
+    for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
+      totalPaths += pathTable[j].pathCounter;
+      _functionPaths[f][pathTable[j].pathNumber]
+        = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
+                          0, this);
+    }
+
+    _functionPathCounts[f] = totalPaths;
+
+    delete [] pathTable;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  NoProfile PathProfileInfo implementation
+//
+
+namespace {
+  struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
+    static char ID; // Class identification, replacement for typeinfo
+    NoPathProfileInfo() : ImmutablePass(ID) {
+      initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &PathProfileInfo::ID)
+        return (PathProfileInfo*)this;
+      return this;
+    }
+
+    virtual const char *getPassName() const {
+      return "NoPathProfileInfo";
+    }
+  };
+}  // End of anonymous namespace
+
+char NoPathProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
+                   "No Path Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
new file mode 100644
index 000000000000..0ae734e259db
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
@@ -0,0 +1,207 @@
+//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This verifier derives an edge profile file from current path profile
+// information
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-verifier"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+  class PathProfileVerifier : public ModulePass {
+  private:
+    bool runOnModule(Module &M);
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PathProfileVerifier() : ModulePass(ID) {
+      initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
+    }
+
+
+    virtual const char *getPassName() const {
+      return "Path Profiler Verifier";
+    }
+
+    // The verifier requires the path profile and edge profile.
+    virtual void getAnalysisUsage(AnalysisUsage& AU) const;
+  };
+}
+
+static cl::opt<std::string>
+EdgeProfileFilename("path-profile-verifier-file",
+  cl::init("edgefrompath.llvmprof.out"),
+  cl::value_desc("filename"),
+  cl::desc("Edge profile file generated by -path-profile-verifier"),
+  cl::Hidden);
+
+char PathProfileVerifier::ID = 0;
+INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
+                "Compare the path profile derived edge profile against the "
+                "edge profile.", true, true)
+
+ModulePass *llvm::createPathProfileVerifierPass() {
+  return new PathProfileVerifier();
+}
+
+// The verifier requires the path profile and edge profile.
+void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
+  AU.addRequired<PathProfileInfo>();
+  AU.addPreserved<PathProfileInfo>();
+}
+
+typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
+typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
+typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
+
+// the verifier iterates through each path to gather the total
+// number of edge frequencies
+bool PathProfileVerifier::runOnModule (Module &M) {
+  PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
+
+  // setup a data structure to map path edges which index an
+  // array of edge counters
+  NestedBlockToIndexMap arrayMap;
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+
+    arrayMap[0][F->begin()][0] = i++;
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+
+      unsigned duplicate = 0;
+      BasicBlock* prev = 0;
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
+           prev = TI->getSuccessor(s), ++s) {
+        if (prev == TI->getSuccessor(s))
+          duplicate++;
+        else duplicate = 0;
+
+        arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
+      }
+    }
+  }
+
+  std::vector<unsigned> edgeArray(i);
+
+  // iterate through each path and increment the edge counters as needed
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+
+    pathProfileInfo.setCurrentFunction(F);
+
+    DEBUG(dbgs() << "function '" << F->getName() << "' ran "
+          << pathProfileInfo.pathsRun()
+          << "/" << pathProfileInfo.getPotentialPathCount()
+          << " potential paths\n");
+
+    for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
+           endPath = pathProfileInfo.pathEnd();
+         nextPath != endPath; nextPath++ ) {
+      ProfilePath* currentPath = nextPath->second;
+
+      ProfilePathEdgeVector* pev = currentPath->getPathEdges();
+      DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
+            << currentPath->getCount() << "\n");
+      // setup the entry edge (normally path profiling doesn't care about this)
+      if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
+        edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+          += currentPath->getCount();
+
+      for( ProfilePathEdgeIterator nextEdge = pev->begin(),
+             endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
+        if (nextEdge != pev->begin())
+          DEBUG(dbgs() << " :: ");
+
+        BasicBlock* source = nextEdge->getSource();
+        BasicBlock* target = nextEdge->getTarget();
+        unsigned duplicateNumber = nextEdge->getDuplicateNumber();
+        DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
+              << "}--> " << target->getNameStr());
+
+        // Ensure all the referenced edges exist
+        // TODO: make this a separate function
+        if( !arrayMap.count(source) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: source '"
+                 << source->getNameStr()
+                 << "' does not exist in the array map.\n";
+        } else if( !arrayMap[source].count(target) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: target '"
+                 << target->getNameStr()
+                 << "' does not exist in the array map.\n";
+        } else if( !arrayMap[source][target].count(duplicateNumber) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: edge "
+                 << source->getNameStr() << " -> " << target->getNameStr()
+                 << " duplicate number " << duplicateNumber
+                 << " does not exist in the array map.\n";
+        } else {
+          edgeArray[arrayMap[source][target][duplicateNumber]]
+            += currentPath->getCount();
+        }
+      }
+
+      DEBUG(errs() << "\n");
+
+      delete pev;
+    }
+  }
+
+  std::string errorInfo;
+  std::string filename = EdgeProfileFilename;
+
+  // Open a handle to the file
+  FILE* edgeFile = fopen(filename.c_str(),"wb");
+
+  if (!edgeFile) {
+    errs() << "error: unable to open file '" << filename << "' for output.\n";
+    return false;
+  }
+
+  errs() << "Generating edge profile '" << filename << "' ...\n";
+
+  // write argument info
+  unsigned type = ArgumentInfo;
+  unsigned num = pathProfileInfo.argList.size();
+  int zeros = 0;
+
+  fwrite(&type,sizeof(unsigned),1,edgeFile);
+  fwrite(&num,sizeof(unsigned),1,edgeFile);
+  fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
+  if (num&3)
+    fwrite(&zeros, 1, 4-(num&3), edgeFile);
+
+  type = EdgeInfo;
+  num = edgeArray.size();
+  fwrite(&type,sizeof(unsigned),1,edgeFile);
+  fwrite(&num,sizeof(unsigned),1,edgeFile);
+
+  // write each edge to the file
+  for( std::vector<unsigned>::iterator s = edgeArray.begin(),
+         e = edgeArray.end(); s != e; s++)
+    fwrite(&*s, sizeof (unsigned), 1, edgeFile);
+
+  fclose (edgeFile);
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
new file mode 100644
index 000000000000..6ed27297923f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -0,0 +1,51 @@
+//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the post-dominator construction algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postdomtree"
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DominatorInternals.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  PostDominatorTree Implementation
+//===----------------------------------------------------------------------===//
+
+char PostDominatorTree::ID = 0;
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+                "Post-Dominator Tree Construction", true, true)
+
+bool PostDominatorTree::runOnFunction(Function &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+PostDominatorTree::~PostDominatorTree() {
+  delete DT;
+}
+
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+
+FunctionPass* llvm::createPostDomTree() {
+  return new PostDominatorTree();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
new file mode 100644
index 000000000000..b594e2ba5506
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
@@ -0,0 +1,426 @@
+//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// estimates the profiling information in a very crude and unimaginative way.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-estimator"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+using namespace llvm;
+
+static cl::opt<double>
+LoopWeight(
+    "profile-estimator-loop-weight", cl::init(10),
+    cl::value_desc("loop-weight"),
+    cl::desc("Number of loop executions used for profile-estimator")
+);
+
+namespace {
+  class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
+    double ExecCount;
+    LoopInfo *LI;
+    std::set<BasicBlock*>  BBToVisit;
+    std::map<Loop*,double> LoopExitWeights;
+    std::map<Edge,double>  MinimalWeight;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    explicit ProfileEstimatorPass(const double execcount = 0)
+        : FunctionPass(ID), ExecCount(execcount) {
+      initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
+      if (execcount == 0) ExecCount = LoopWeight;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<LoopInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Profiling information estimator";
+    }
+
+    /// run - Estimate the profile information from the specified file.
+    virtual bool runOnFunction(Function &F);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &ProfileInfo::ID)
+        return (ProfileInfo*)this;
+      return this;
+    }
+    
+    virtual void recurseBasicBlock(BasicBlock *BB);
+
+    void inline printEdgeWeight(Edge);
+  };
+}  // End of anonymous namespace
+
+char ProfileEstimatorPass::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+                "Estimate profiling information", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+                "Estimate profiling information", false, true, false)
+
+namespace llvm {
+  char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
+
+  FunctionPass *createProfileEstimatorPass() {
+    return new ProfileEstimatorPass();
+  }
+
+  /// createProfileEstimatorPass - This function returns a Pass that estimates
+  /// profiling information using the given loop execution count.
+  Pass *createProfileEstimatorPass(const unsigned execcount) {
+    return new ProfileEstimatorPass(execcount);
+  }
+}
+
+static double ignoreMissing(double w) {
+  if (w == ProfileInfo::MissingValue) return 0;
+  return w;
+}
+
+static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
+  DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
+}
+
+void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
+  DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
+               << format("%20.20g", getEdgeWeight(E)) << "\n");
+}
+
+// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
+// single block and then recurses into the successors.
+// The algorithm preserves the flow condition, meaning that the sum of the
+// weight of the incoming edges must be equal the block weight which must in
+// turn be equal to the sume of the weights of the outgoing edges.
+// Since the flow of an block is deterimined from the current state of the
+// flow, once an edge has a flow assigned this flow is never changed again,
+// otherwise it would be possible to violate the flow condition in another
+// block.
+void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
+
+  // Break the recursion if this BasicBlock was already visited.
+  if (BBToVisit.find(BB) == BBToVisit.end()) return;
+
+  // Read the LoopInfo for this block.
+  bool  BBisHeader = LI->isLoopHeader(BB);
+  Loop* BBLoop     = LI->getLoopFor(BB);
+
+  // To get the block weight, read all incoming edges.
+  double BBWeight = 0;
+  std::set<BasicBlock*> ProcessedPreds;
+  for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+        bbi != bbe; ++bbi ) {
+    // If this block was not considered already, add weight.
+    Edge edge = getEdge(*bbi,BB);
+    double w = getEdgeWeight(edge);
+    if (ProcessedPreds.insert(*bbi).second) {
+      BBWeight += ignoreMissing(w);
+    }
+    // If this block is a loop header and the predecessor is contained in this
+    // loop, thus the edge is a backedge, continue and do not check if the
+    // value is valid.
+    if (BBisHeader && BBLoop->contains(*bbi)) {
+      printEdgeError(edge, "but is backedge, continuing");
+      continue;
+    }
+    // If the edges value is missing (and this is no loop header, and this is
+    // no backedge) return, this block is currently non estimatable.
+    if (w == MissingValue) {
+      printEdgeError(edge, "returning");
+      return;
+    }
+  }
+  if (getExecutionCount(BB) != MissingValue) {
+    BBWeight = getExecutionCount(BB);
+  }
+
+  // Fetch all necessary information for current block.
+  SmallVector<Edge, 8> ExitEdges;
+  SmallVector<Edge, 8> Edges;
+  if (BBLoop) {
+    BBLoop->getExitEdges(ExitEdges);
+  }
+
+  // If this is a loop header, consider the following:
+  // Exactly the flow that is entering this block, must exit this block too. So
+  // do the following: 
+  // *) get all the exit edges, read the flow that is already leaving this
+  // loop, remember the edges that do not have any flow on them right now.
+  // (The edges that have already flow on them are most likely exiting edges of
+  // other loops, do not touch those flows because the previously caclulated
+  // loopheaders would not be exact anymore.)
+  // *) In case there is not a single exiting edge left, create one at the loop
+  // latch to prevent the flow from building up in the loop.
+  // *) Take the flow that is not leaving the loop already and distribute it on
+  // the remaining exiting edges.
+  // (This ensures that all flow that enters the loop also leaves it.)
+  // *) Increase the flow into the loop by increasing the weight of this block.
+  // There is at least one incoming backedge that will bring us this flow later
+  // on. (So that the flow condition in this node is valid again.)
+  if (BBisHeader) {
+    double incoming = BBWeight;
+    // Subtract the flow leaving the loop.
+    std::set<Edge> ProcessedExits;
+    for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
+         ee = ExitEdges.end(); ei != ee; ++ei) {
+      if (ProcessedExits.insert(*ei).second) {
+        double w = getEdgeWeight(*ei);
+        if (w == MissingValue) {
+          Edges.push_back(*ei);
+          // Check if there is a necessary minimal weight, if yes, subtract it 
+          // from weight.
+          if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+            incoming -= MinimalWeight[*ei];
+            DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+          }
+        } else {
+          incoming -= w;
+        }
+      }
+    }
+    // If no exit edges, create one:
+    if (Edges.size() == 0) {
+      BasicBlock *Latch = BBLoop->getLoopLatch();
+      if (Latch) {
+        Edge edge = getEdge(Latch,0);
+        EdgeInformation[BB->getParent()][edge] = BBWeight;
+        printEdgeWeight(edge);
+        edge = getEdge(Latch, BB);
+        EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
+        printEdgeWeight(edge);
+      }
+    }
+
+    // Distribute remaining weight to the exting edges. To prevent fractions
+    // from building up and provoking precision problems the weight which is to
+    // be distributed is split and the rounded, the last edge gets a somewhat
+    // bigger value, but we are close enough for an estimation.
+    double fraction = floor(incoming/Edges.size());
+    for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+         ei != ee; ++ei) {
+      double w = 0;
+      if (ei != (ee-1)) {
+        w = fraction;
+        incoming -= fraction;
+      } else {
+        w = incoming;
+      }
+      EdgeInformation[BB->getParent()][*ei] += w;
+      // Read necessary minimal weight.
+      if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+        EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+        DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+      }
+      printEdgeWeight(*ei);
+      
+      // Add minimal weight to paths to all exit edges, this is used to ensure
+      // that enough flow is reaching this edges.
+      Path p;
+      const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
+      while (Dest != BB) {
+        const BasicBlock *Parent = p.find(Dest)->second;
+        Edge e = getEdge(Parent, Dest);
+        if (MinimalWeight.find(e) == MinimalWeight.end()) {
+          MinimalWeight[e] = 0;
+        }
+        MinimalWeight[e] += w;
+        DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
+        Dest = Parent;
+      }
+    }
+    // Increase flow into the loop.
+    BBWeight *= (ExecCount+1);
+  }
+
+  BlockInformation[BB->getParent()][BB] = BBWeight;
+  // Up until now we considered only the loop exiting edges, now we have a
+  // definite block weight and must distribute this onto the outgoing edges.
+  // Since there may be already flow attached to some of the edges, read this
+  // flow first and remember the edges that have still now flow attached.
+  Edges.clear();
+  std::set<BasicBlock*> ProcessedSuccs;
+
+  succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+  // Also check for (BB,0) edges that may already contain some flow. (But only
+  // in case there are no successors.)
+  if (bbi == bbe) {
+    Edge edge = getEdge(BB,0);
+    EdgeInformation[BB->getParent()][edge] = BBWeight;
+    printEdgeWeight(edge);
+  }
+  for ( ; bbi != bbe; ++bbi ) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      Edge edge = getEdge(BB,*bbi);
+      double w = getEdgeWeight(edge);
+      if (w != MissingValue) {
+        BBWeight -= getEdgeWeight(edge);
+      } else {
+        Edges.push_back(edge);
+        // If minimal weight is necessary, reserve weight by subtracting weight
+        // from block weight, this is readded later on.
+        if (MinimalWeight.find(edge) != MinimalWeight.end()) {
+          BBWeight -= MinimalWeight[edge];
+          DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
+        }
+      }
+    }
+  }
+
+  double fraction = floor(BBWeight/Edges.size());
+  // Finally we know what flow is still not leaving the block, distribute this
+  // flow onto the empty edges.
+  for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+       ei != ee; ++ei) {
+    if (ei != (ee-1)) {
+      EdgeInformation[BB->getParent()][*ei] += fraction;
+      BBWeight -= fraction;
+    } else {
+      EdgeInformation[BB->getParent()][*ei] += BBWeight;
+    }
+    // Readd minial necessary weight.
+    if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+      EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+      DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+    }
+    printEdgeWeight(*ei);
+  }
+
+  // This block is visited, mark this before the recursion.
+  BBToVisit.erase(BB);
+
+  // Recurse into successors.
+  for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+}
+
+bool ProfileEstimatorPass::runOnFunction(Function &F) {
+  if (F.isDeclaration()) return false;
+
+  // Fetch LoopInfo and clear ProfileInfo for this function.
+  LI = &getAnalysis<LoopInfo>();
+  FunctionInformation.erase(&F);
+  BlockInformation[&F].clear();
+  EdgeInformation[&F].clear();
+  BBToVisit.clear();
+
+  // Mark all blocks as to visit.
+  for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
+    BBToVisit.insert(bi);
+
+  // Clear Minimal Edges.
+  MinimalWeight.clear();
+
+  DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n");
+
+  // Since the entry block is the first one and has no predecessors, the edge
+  // (0,entry) is inserted with the starting weight of 1.
+  BasicBlock *entry = &F.getEntryBlock();
+  BlockInformation[&F][entry] = pow(2.0, 32.0);
+  Edge edge = getEdge(0,entry);
+  EdgeInformation[&F][edge] = BlockInformation[&F][entry];
+  printEdgeWeight(edge);
+
+  // Since recurseBasicBlock() maybe returns with a block which was not fully
+  // estimated, use recurseBasicBlock() until everything is calculated.
+  bool cleanup = false;
+  recurseBasicBlock(entry);
+  while (BBToVisit.size() > 0 && !cleanup) {
+    // Remember number of open blocks, this is later used to check if progress
+    // was made.
+    unsigned size = BBToVisit.size();
+
+    // Try to calculate all blocks in turn.
+    for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
+         be = BBToVisit.end(); bi != be; ++bi) {
+      recurseBasicBlock(*bi);
+      // If at least one block was finished, break because iterator may be
+      // invalid.
+      if (BBToVisit.size() < size) break;
+    }
+
+    // If there was not a single block resolved, make some assumptions.
+    if (BBToVisit.size() == size) {
+      bool found = false;
+      for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); 
+           (BBI != BBE) && (!found); ++BBI) {
+        BasicBlock *BB = *BBI;
+        // Try each predecessor if it can be assumend.
+        for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+             (bbi != bbe) && (!found); ++bbi) {
+          Edge e = getEdge(*bbi,BB);
+          double w = getEdgeWeight(e);
+          // Check that edge from predecessor is still free.
+          if (w == MissingValue) {
+            // Check if there is a circle from this block to predecessor.
+            Path P;
+            const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
+            if (Dest != *bbi) {
+              // If there is no circle, just set edge weight to 0
+              EdgeInformation[&F][e] = 0;
+              DEBUG(dbgs() << "Assuming edge weight: ");
+              printEdgeWeight(e);
+              found = true;
+            }
+          }
+        }
+      }
+      if (!found) {
+        cleanup = true;
+        DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
+      }
+    }
+  }
+  // In case there was no safe way to assume edges, set as a last measure, 
+  // set _everything_ to zero.
+  if (cleanup) {
+    FunctionInformation[&F] = 0;
+    BlockInformation[&F].clear();
+    EdgeInformation[&F].clear();
+    for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+      const BasicBlock *BB = &(*FI);
+      BlockInformation[&F][BB] = 0;
+      const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
+      if (predi == prede) {
+        Edge e = getEdge(0,BB);
+        setEdgeWeight(e,0);
+      }
+      for (;predi != prede; ++predi) {
+        Edge e = getEdge(*predi,BB);
+        setEdgeWeight(e,0);
+      }
+      succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
+      if (succi == succe) {
+        Edge e = getEdge(BB,0);
+        setEdgeWeight(e,0);
+      }
+      for (;succi != succe; ++succi) {
+        Edge e = getEdge(*succi,BB);
+        setEdgeWeight(e,0);
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
new file mode 100644
index 000000000000..173de2c02791
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
@@ -0,0 +1,1105 @@
+//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract ProfileInfo interface, and the default
+// "no profile" implementation.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-info"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+#include <queue>
+#include <limits>
+using namespace llvm;
+
+namespace llvm {
+  template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
+}
+
+// Register the ProfileInfo interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
+
+namespace llvm {
+
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
+
+template <>
+ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
+  MachineProfile = 0;
+}
+template <>
+ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
+  if (MachineProfile) delete MachineProfile;
+}
+
+template<>
+char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
+
+template<>
+const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
+
+template<> const
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
+
+template<> double
+ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
+  std::map<const Function*, BlockCounts>::iterator J =
+    BlockInformation.find(BB->getParent());
+  if (J != BlockInformation.end()) {
+    BlockCounts::iterator I = J->second.find(BB);
+    if (I != J->second.end())
+      return I->second;
+  }
+
+  double Count = MissingValue;
+
+  const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+  // Are there zero predecessors of this block?
+  if (PI == PE) {
+    Edge e = getEdge(0, BB);
+    Count = getEdgeWeight(e);
+  } else {
+    // Otherwise, if there are predecessors, the execution count of this block is
+    // the sum of the edge frequencies from the incoming edges.
+    std::set<const BasicBlock*> ProcessedPreds;
+    Count = 0;
+    for (; PI != PE; ++PI) {
+      const BasicBlock *P = *PI;
+      if (ProcessedPreds.insert(P).second) {
+        double w = getEdgeWeight(getEdge(P, BB));
+        if (w == MissingValue) {
+          Count = MissingValue;
+          break;
+        }
+        Count += w;
+      }
+    }
+  }
+
+  // If the predecessors did not suffice to get block weight, try successors.
+  if (Count == MissingValue) {
+
+    succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+
+    // Are there zero successors of this block?
+    if (SI == SE) {
+      Edge e = getEdge(BB,0);
+      Count = getEdgeWeight(e);
+    } else {
+      std::set<const BasicBlock*> ProcessedSuccs;
+      Count = 0;
+      for (; SI != SE; ++SI)
+        if (ProcessedSuccs.insert(*SI).second) {
+          double w = getEdgeWeight(getEdge(BB, *SI));
+          if (w == MissingValue) {
+            Count = MissingValue;
+            break;
+          }
+          Count += w;
+        }
+    }
+  }
+
+  if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
+  return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        getExecutionCount(const MachineBasicBlock *MBB) {
+  std::map<const MachineFunction*, BlockCounts>::iterator J =
+    BlockInformation.find(MBB->getParent());
+  if (J != BlockInformation.end()) {
+    BlockCounts::iterator I = J->second.find(MBB);
+    if (I != J->second.end())
+      return I->second;
+  }
+
+  return MissingValue;
+}
+
+template<>
+double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
+  std::map<const Function*, double>::iterator J =
+    FunctionInformation.find(F);
+  if (J != FunctionInformation.end())
+    return J->second;
+
+  // isDeclaration() is checked here and not at start of function to allow
+  // functions without a body still to have a execution count.
+  if (F->isDeclaration()) return MissingValue;
+
+  double Count = getExecutionCount(&F->getEntryBlock());
+  if (Count != MissingValue) FunctionInformation[F] = Count;
+  return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        getExecutionCount(const MachineFunction *MF) {
+  std::map<const MachineFunction*, double>::iterator J =
+    FunctionInformation.find(MF);
+  if (J != FunctionInformation.end())
+    return J->second;
+
+  double Count = getExecutionCount(&MF->front());
+  if (Count != MissingValue) FunctionInformation[MF] = Count;
+  return Count;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        setExecutionCount(const BasicBlock *BB, double w) {
+  DEBUG(dbgs() << "Creating Block " << BB->getName() 
+               << " (weight: " << format("%.20g",w) << ")\n");
+  BlockInformation[BB->getParent()][BB] = w;
+}
+
+template<>
+void ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        setExecutionCount(const MachineBasicBlock *MBB, double w) {
+  DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
+               << " (weight: " << format("%.20g",w) << ")\n");
+  BlockInformation[MBB->getParent()][MBB] = w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
+  double oldw = getEdgeWeight(e);
+  assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
+  DEBUG(dbgs() << "Adding to Edge " << e
+               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+  EdgeInformation[getFunction(e)][e] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        addExecutionCount(const BasicBlock *BB, double w) {
+  double oldw = getExecutionCount(BB);
+  assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
+  DEBUG(dbgs() << "Adding to Block " << BB->getName()
+               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+  BlockInformation[BB->getParent()][BB] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
+  std::map<const Function*, BlockCounts>::iterator J =
+    BlockInformation.find(BB->getParent());
+  if (J == BlockInformation.end()) return;
+
+  DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
+  J->second.erase(BB);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(getFunction(e));
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(dbgs() << "Deleting" << e << "\n");
+  J->second.erase(e);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        replaceEdge(const Edge &oldedge, const Edge &newedge) {
+  double w;
+  if ((w = getEdgeWeight(newedge)) == MissingValue) {
+    w = getEdgeWeight(oldedge);
+    DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge  << "\n");
+  } else {
+    w += getEdgeWeight(oldedge);
+    DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge  << "\n");
+  }
+  setEdgeWeight(newedge,w);
+  removeEdge(oldedge);
+}
+
+template<>
+const BasicBlock *ProfileInfoT<Function,BasicBlock>::
+        GetPath(const BasicBlock *Src, const BasicBlock *Dest,
+                Path &P, unsigned Mode) {
+  const BasicBlock *BB = 0;
+  bool hasFoundPath = false;
+
+  std::queue<const BasicBlock *> BFS;
+  BFS.push(Src);
+
+  while(BFS.size() && !hasFoundPath) {
+    BB = BFS.front();
+    BFS.pop();
+
+    succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+    if (Succ == End) {
+      P[0] = BB;
+      if (Mode & GetPathToExit) {
+        hasFoundPath = true;
+        BB = 0;
+      }
+    }
+    for(;Succ != End; ++Succ) {
+      if (P.find(*Succ) != P.end()) continue;
+      Edge e = getEdge(BB,*Succ);
+      if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
+      P[*Succ] = BB;
+      BFS.push(*Succ);
+      if ((Mode & GetPathToDest) && *Succ == Dest) {
+        hasFoundPath = true;
+        BB = *Succ;
+        break;
+      }
+      if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
+        hasFoundPath = true;
+        BB = *Succ;
+        break;
+      }
+    }
+  }
+
+  return BB;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        divertFlow(const Edge &oldedge, const Edge &newedge) {
+  DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
+
+  // First check if the old edge was taken, if not, just delete it...
+  if (getEdgeWeight(oldedge) == 0) {
+    removeEdge(oldedge);
+    return;
+  }
+
+  Path P;
+  P[newedge.first] = 0;
+  P[newedge.second] = newedge.first;
+  const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
+
+  double w = getEdgeWeight (oldedge);
+  DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
+  do {
+    const BasicBlock *Parent = P.find(BB)->second;
+    Edge e = getEdge(Parent,BB);
+    double oldw = getEdgeWeight(e);
+    double oldc = getExecutionCount(e.first);
+    setEdgeWeight(e, w+oldw);
+    if (Parent != oldedge.first) {
+      setExecutionCount(e.first, w+oldc);
+    }
+    BB = Parent;
+  } while (BB != newedge.first);
+  removeEdge(oldedge);
+}
+
+/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
+/// This checks all edges of the function the blocks reside in and replaces the
+/// occurrences of RmBB with DestBB.
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
+  DEBUG(dbgs() << "Replacing " << RmBB->getName()
+               << " with " << DestBB->getName() << "\n");
+  const Function *F = DestBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  Edge e, newedge;
+  bool erasededge = false;
+  EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+  while(I != E) {
+    e = (I++)->first;
+    bool foundedge = false; bool eraseedge = false;
+    if (e.first == RmBB) {
+      if (e.second == DestBB) {
+        eraseedge = true;
+      } else {
+        newedge = getEdge(DestBB, e.second);
+        foundedge = true;
+      }
+    }
+    if (e.second == RmBB) {
+      if (e.first == DestBB) {
+        eraseedge = true;
+      } else {
+        newedge = getEdge(e.first, DestBB);
+        foundedge = true;
+      }
+    }
+    if (foundedge) {
+      replaceEdge(e, newedge);
+    }
+    if (eraseedge) {
+      if (erasededge) {
+        Edge newedge = getEdge(DestBB, DestBB);
+        replaceEdge(e, newedge);
+      } else {
+        removeEdge(e);
+        erasededge = true;
+      }
+    }
+  }
+}
+
+/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
+/// Since its possible that there is more than one edge in the CFG from FristBB
+/// to SecondBB its necessary to redirect the flow proporionally.
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
+                                                  const BasicBlock *SecondBB,
+                                                  const BasicBlock *NewBB,
+                                                  bool MergeIdenticalEdges) {
+  const Function *F = FirstBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  // Generate edges and read current weight.
+  Edge e  = getEdge(FirstBB, SecondBB);
+  Edge n1 = getEdge(FirstBB, NewBB);
+  Edge n2 = getEdge(NewBB, SecondBB);
+  EdgeWeights &ECs = J->second;
+  double w = ECs[e];
+
+  int succ_count = 0;
+  if (!MergeIdenticalEdges) {
+    // First count the edges from FristBB to SecondBB, if there is more than
+    // one, only slice out a proporional part for NewBB.
+    for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
+        BBI != BBE; ++BBI) {
+      if (*BBI == SecondBB) succ_count++;  
+    }
+    // When the NewBB is completely new, increment the count by one so that
+    // the counts are properly distributed.
+    if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
+  } else {
+    // When the edges are merged anyway, then redirect all flow.
+    succ_count = 1;
+  }
+
+  // We know now how many edges there are from FirstBB to SecondBB, reroute a
+  // proportional part of the edge weight over NewBB.
+  double neww = floor(w / succ_count);
+  ECs[n1] += neww;
+  ECs[n2] += neww;
+  BlockInformation[F][NewBB] += neww;
+  if (succ_count == 1) {
+    ECs.erase(e);
+  } else {
+    ECs[e] -= neww;
+  }
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
+                                                   const BasicBlock* New) {
+  const Function *F = Old->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
+
+  std::set<Edge> Edges;
+  for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); 
+       ewi != ewe; ++ewi) {
+    Edge old = ewi->first;
+    if (old.first == Old) {
+      Edges.insert(old);
+    }
+  }
+  for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); 
+       EI != EE; ++EI) {
+    Edge newedge = getEdge(New, EI->second);
+    replaceEdge(*EI, newedge);
+  }
+
+  double w = getExecutionCount(Old);
+  setEdgeWeight(getEdge(Old, New), w);
+  setExecutionCount(New, w);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
+                                                   const BasicBlock* NewBB,
+                                                   BasicBlock *const *Preds,
+                                                   unsigned NumPreds) {
+  const Function *F = BB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName() 
+               << " to " << NewBB->getName() << "\n");
+
+  // Collect weight that was redirected over NewBB.
+  double newweight = 0;
+  
+  std::set<const BasicBlock *> ProcessedPreds;
+  // For all requestes Predecessors.
+  for (unsigned pred = 0; pred < NumPreds; ++pred) {
+    const BasicBlock * Pred = Preds[pred];
+    if (ProcessedPreds.insert(Pred).second) {
+      // Create edges and read old weight.
+      Edge oldedge = getEdge(Pred, BB);
+      Edge newedge = getEdge(Pred, NewBB);
+
+      // Remember how much weight was redirected.
+      newweight += getEdgeWeight(oldedge);
+    
+      replaceEdge(oldedge,newedge);
+    }
+  }
+
+  Edge newedge = getEdge(NewBB,BB);
+  setEdgeWeight(newedge, newweight);
+  setExecutionCount(NewBB, newweight);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
+                                                 const Function *New) {
+  DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
+               << New->getName() << "\n");
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(Old);
+  if(J != EdgeInformation.end()) {
+    EdgeInformation[New] = J->second;
+  }
+  EdgeInformation.erase(Old);
+  BlockInformation.erase(Old);
+  FunctionInformation.erase(Old);
+}
+
+static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
+                                 ProfileInfo::Edge &tocalc, unsigned &uncalc) {
+  if (w == ProfileInfo::MissingValue) {
+    tocalc = edge;
+    uncalc++;
+    return 0;
+  } else {
+    return w;
+  }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::
+        CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
+                             bool assumeEmptySelf) {
+  Edge edgetocalc;
+  unsigned uncalculated = 0;
+
+  // collect weights of all incoming and outgoing edges, rememer edges that
+  // have no value
+  double incount = 0;
+  SmallSet<const BasicBlock*,8> pred_visited;
+  const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi==bbe) {
+    Edge e = getEdge(0,BB);
+    incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+  }
+  for (;bbi != bbe; ++bbi) {
+    if (pred_visited.insert(*bbi)) {
+      Edge e = getEdge(*bbi,BB);
+      incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+    }
+  }
+
+  double outcount = 0;
+  SmallSet<const BasicBlock*,8> succ_visited;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi==sbbe) {
+    Edge e = getEdge(BB,0);
+    if (getEdgeWeight(e) == MissingValue) {
+      double w = getExecutionCount(BB);
+      if (w != MissingValue) {
+        setEdgeWeight(e,w);
+        removed = e;
+      }
+    }
+    outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+  }
+  for (;sbbi != sbbe; ++sbbi) {
+    if (succ_visited.insert(*sbbi)) {
+      Edge e = getEdge(BB,*sbbi);
+      outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+    }
+  }
+
+  // if exactly one edge weight was missing, calculate it and remove it from
+  // spanning tree
+  if (uncalculated == 0 ) {
+    return true;
+  } else
+  if (uncalculated == 1) {
+    if (incount < outcount) {
+      EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+    } else {
+      EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+    }
+    DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
+                 << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
+    removed = edgetocalc;
+    return true;
+  } else 
+  if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
+    setEdgeWeight(edgetocalc, incount * 10);
+    removed = edgetocalc;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
+  double w = PI->getEdgeWeight(e);
+  if (w != ProfileInfo::MissingValue) {
+    calcw += w;
+  } else {
+    misscount.insert(e);
+  }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
+  double inWeight = 0;
+  std::set<Edge> inMissing;
+  std::set<const BasicBlock*> ProcessedPreds;
+  const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi == bbe) {
+    readEdge(this,getEdge(0,BB),inWeight,inMissing);
+  }
+  for( ; bbi != bbe; ++bbi ) {
+    if (ProcessedPreds.insert(*bbi).second) {
+      readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
+    }
+  }
+
+  double outWeight = 0;
+  std::set<Edge> outMissing;
+  std::set<const BasicBlock*> ProcessedSuccs;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi == sbbe)
+    readEdge(this,getEdge(BB,0),outWeight,outMissing);
+  for ( ; sbbi != sbbe; ++sbbi ) {
+    if (ProcessedSuccs.insert(*sbbi).second) {
+      readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
+    }
+  }
+
+  double share;
+  std::set<Edge>::iterator ei,ee;
+  if (inMissing.size() == 0 && outMissing.size() > 0) {
+    ei = outMissing.begin();
+    ee = outMissing.end();
+    share = inWeight/outMissing.size();
+    setExecutionCount(BB,inWeight);
+  } else
+  if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
+    ei = inMissing.begin();
+    ee = inMissing.end();
+    share = 0;
+    setExecutionCount(BB,0);
+  } else
+  if (inMissing.size() == 0 && outMissing.size() == 0) {
+    setExecutionCount(BB,outWeight);
+    return true;
+  } else {
+    return false;
+  }
+  for ( ; ei != ee; ++ei ) {
+    setEdgeWeight(*ei,share);
+  }
+  return true;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
+//  if (getExecutionCount(&(F->getEntryBlock())) == 0) {
+//    for (Function::const_iterator FI = F->begin(), FE = F->end();
+//         FI != FE; ++FI) {
+//      const BasicBlock* BB = &(*FI);
+//      {
+//        const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+//        if (NBB == End) {
+//          setEdgeWeight(getEdge(0,BB),0);
+//        }
+//        for(;NBB != End; ++NBB) {
+//          setEdgeWeight(getEdge(*NBB,BB),0);
+//        }
+//      }
+//      {
+//        succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+//        if (NBB == End) {
+//          setEdgeWeight(getEdge(0,BB),0);
+//        }
+//        for(;NBB != End; ++NBB) {
+//          setEdgeWeight(getEdge(*NBB,BB),0);
+//        }
+//      }
+//    }
+//    return;
+//  }
+  // The set of BasicBlocks that are still unvisited.
+  std::set<const BasicBlock*> Unvisited;
+
+  // The set of return edges (Edges with no successors).
+  std::set<Edge> ReturnEdges;
+  double ReturnWeight = 0;
+  
+  // First iterate over the whole function and collect:
+  // 1) The blocks in this function in the Unvisited set.
+  // 2) The return edges in the ReturnEdges set.
+  // 3) The flow that is leaving the function already via return edges.
+
+  // Data structure for searching the function.
+  std::queue<const BasicBlock *> BFS;
+  const BasicBlock *BB = &(F->getEntryBlock());
+  BFS.push(BB);
+  Unvisited.insert(BB);
+
+  while (BFS.size()) {
+    BB = BFS.front(); BFS.pop();
+    succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+    if (NBB == End) {
+      Edge e = getEdge(BB,0);
+      double w = getEdgeWeight(e);
+      if (w == MissingValue) {
+        // If the return edge has no value, try to read value from block.
+        double bw = getExecutionCount(BB);
+        if (bw != MissingValue) {
+          setEdgeWeight(e,bw);
+          ReturnWeight += bw;
+        } else {
+          // If both return edge and block provide no value, collect edge.
+          ReturnEdges.insert(e);
+        }
+      } else {
+        // If the return edge has a proper value, collect it.
+        ReturnWeight += w;
+      }
+    }
+    for (;NBB != End; ++NBB) {
+      if (Unvisited.insert(*NBB).second) {
+        BFS.push(*NBB);
+      }
+    }
+  }
+
+  while (Unvisited.size() > 0) {
+    unsigned oldUnvisitedCount = Unvisited.size();
+    bool FoundPath = false;
+
+    // If there is only one edge left, calculate it.
+    if (ReturnEdges.size() == 1) {
+      ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
+
+      Edge e = *ReturnEdges.begin();
+      setEdgeWeight(e,ReturnWeight);
+      setExecutionCount(e.first,ReturnWeight);
+
+      Unvisited.erase(e.first);
+      ReturnEdges.erase(e);
+      continue;
+    }
+
+    // Calculate all blocks where only one edge is missing, this may also
+    // resolve furhter return edges.
+    std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      Edge e;
+      if(CalculateMissingEdge(BB,e,true)) {
+        if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
+          setExecutionCount(BB,getExecutionCount(BB));
+        }
+        Unvisited.erase(BB);
+        if (e.first != 0 && e.second == 0) {
+          ReturnEdges.erase(e);
+          ReturnWeight += getEdgeWeight(e);
+        }
+      }
+    }
+    if (oldUnvisitedCount > Unvisited.size()) continue;
+
+    // Estimate edge weights by dividing the flow proportionally.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      const BasicBlock *Dest = 0;
+      bool AllEdgesHaveSameReturn = true;
+      // Check each Successor, these must all end up in the same or an empty
+      // return block otherwise its dangerous to do an estimation on them.
+      for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+           Succ != End; ++Succ) {
+        Path P;
+        GetPath(*Succ, 0, P, GetPathToExit);
+        if (Dest && Dest != P[0]) {
+          AllEdgesHaveSameReturn = false;
+        }
+        Dest = P[0];
+      }
+      if (AllEdgesHaveSameReturn) {
+        if(EstimateMissingEdges(BB)) {
+          Unvisited.erase(BB);
+          break;
+        }
+      }
+    }
+    if (oldUnvisitedCount > Unvisited.size()) continue;
+
+    // Check if there is a path to an block that has a known value and redirect
+    // flow accordingly.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      // Fetch path.
+      const BasicBlock *BB = *FI; ++FI;
+      Path P;
+      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
+
+      // Calculate incoming flow.
+      double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
+      std::set<const BasicBlock *> Processed;
+      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(*NBB, BB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            iw += ew;
+            invalid++;
+          } else {
+            // If the path contains the successor, this means its a backedge,
+            // do not count as missing.
+            if (P.find(*NBB) == P.end())
+              inmissing++;
+          }
+          incount++;
+        }
+      }
+      if (inmissing == incount) continue;
+      if (invalid == 0) continue;
+
+      // Subtract (already) outgoing flow.
+      Processed.clear();
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(BB, *NBB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            iw -= ew;
+          }
+        }
+      }
+      if (iw < 0) continue;
+
+      // Check the receiving end of the path if it can handle the flow.
+      double ow = getExecutionCount(Dest);
+      Processed.clear();
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(BB, *NBB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            ow -= ew;
+          }
+        }
+      }
+      if (ow < 0) continue;
+
+      // Determine how much flow shall be used.
+      double ew = getEdgeWeight(getEdge(P[Dest],Dest));
+      if (ew != MissingValue) {
+        ew = ew<ow?ew:ow;
+        ew = ew<iw?ew:iw;
+      } else {
+        if (inmissing == 0)
+          ew = iw;
+      }
+
+      // Create flow.
+      if (ew != MissingValue) {
+        do {
+          Edge e = getEdge(P[Dest],Dest);
+          if (getEdgeWeight(e) == MissingValue) {
+            setEdgeWeight(e,ew);
+            FoundPath = true;
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Calculate a block with self loop.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+      bool SelfEdgeFound = false;
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (*NBB == BB) {
+          SelfEdgeFound = true;
+          break;
+        }
+      }
+      if (SelfEdgeFound) {
+        Edge e = getEdge(BB,BB);
+        if (getEdgeWeight(e) == MissingValue) {
+          double iw = 0;
+          std::set<const BasicBlock *> Processed;
+          for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+               NBB != End; ++NBB) {
+            if (Processed.insert(*NBB).second) {
+              Edge e = getEdge(*NBB, BB);
+              double ew = getEdgeWeight(e);
+              if (ew != MissingValue) {
+                iw += ew;
+              }
+            }
+          }
+          setEdgeWeight(e,iw * 10);
+          FoundPath = true;
+        }
+      }
+    }
+    if (FoundPath) continue;
+
+    // Determine backedges, set them to zero.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+      const BasicBlock *Dest = 0;
+      Path P;
+      bool BackEdgeFound = false;
+      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
+        if (Dest == *NBB) {
+          BackEdgeFound = true;
+          break;
+        }
+      }
+      if (BackEdgeFound) {
+        Edge e = getEdge(Dest,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          setEdgeWeight(e,0);
+          FoundPath = true;
+        }
+        do {
+          Edge e = getEdge(P[Dest], Dest);
+          double w = getEdgeWeight(e);
+          if (w == MissingValue) {
+            setEdgeWeight(e,0);
+            FoundPath = true;
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Channel flow to return block.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+
+      Path P;
+      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
+      Dest = P[0];
+      if (!Dest) continue;
+
+      if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
+        // Calculate incoming flow.
+        double iw = 0;
+        std::set<const BasicBlock *> Processed;
+        for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+             NBB != End; ++NBB) {
+          if (Processed.insert(*NBB).second) {
+            Edge e = getEdge(*NBB, BB);
+            double ew = getEdgeWeight(e);
+            if (ew != MissingValue) {
+              iw += ew;
+            }
+          }
+        }
+        do {
+          Edge e = getEdge(P[Dest], Dest);
+          double w = getEdgeWeight(e);
+          if (w == MissingValue) {
+            setEdgeWeight(e,iw);
+            FoundPath = true;
+          } else {
+            assert(0 && "Edge should not have value already!");
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Speculatively set edges to zero.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+
+      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        Edge e = getEdge(*NBB,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          setEdgeWeight(e,0);
+          FoundPath = true;
+          break;
+        }
+      }
+    }
+    if (FoundPath) continue;
+
+    errs() << "{";
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      dbgs() << BB->getName();
+      if (FI != FE)
+        dbgs() << ",";
+    }
+    errs() << "}";
+
+    errs() << "ASSERT: could not repair function";
+    assert(0 && "could not repair function");
+  }
+
+  EdgeWeights J = EdgeInformation[F];
+  for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
+    Edge e = EI->first;
+
+    bool SuccFound = false;
+    if (e.first != 0) {
+      succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
+      if (NBB == End) {
+        if (0 == e.second) {
+          SuccFound = true;
+        }
+      }
+      for (;NBB != End; ++NBB) {
+        if (*NBB == e.second) {
+          SuccFound = true;
+          break;
+        }
+      }
+      if (!SuccFound) {
+        removeEdge(e);
+      }
+    }
+  }
+}
+
+raw_ostream& operator<<(raw_ostream &O, const Function *F) {
+  return O << F->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
+  return O << MF->getFunction()->getName() << "(MF)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) {
+  return O << BB->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
+  return O << MBB->getBasicBlock()->getName() << "(MB)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) {
+  O << "(";
+
+  if (E.first)
+    O << E.first;
+  else
+    O << "0";
+
+  O << ",";
+
+  if (E.second)
+    O << E.second;
+  else
+    O << "0";
+
+  return O << ")";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
+  O << "(";
+
+  if (E.first)
+    O << E.first;
+  else
+    O << "0";
+
+  O << ",";
+
+  if (E.second)
+    O << E.second;
+  else
+    O << "0";
+
+  return O << ")";
+}
+
+} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+//  NoProfile ProfileInfo implementation
+//
+
+namespace {
+  struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
+    static char ID; // Class identification, replacement for typeinfo
+    NoProfileInfo() : ImmutablePass(ID) {
+      initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
+    }
+    
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &ProfileInfo::ID)
+        return (ProfileInfo*)this;
+      return this;
+    }
+    
+    virtual const char *getPassName() const {
+      return "NoProfileInfo";
+    }
+  };
+}  // End of anonymous namespace
+
+char NoProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
+                   "No Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
new file mode 100644
index 000000000000..eaa38dad16a1
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp
@@ -0,0 +1,157 @@
+//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileInfoLoader class is used to load and represent profiling
+// information read in from the dump file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+// ByteSwap - Byteswap 'Var' if 'Really' is true.
+//
+static inline unsigned ByteSwap(unsigned Var, bool Really) {
+  if (!Really) return Var;
+  return ((Var & (255U<< 0U)) << 24U) |
+         ((Var & (255U<< 8U)) <<  8U) |
+         ((Var & (255U<<16U)) >>  8U) |
+         ((Var & (255U<<24U)) >> 24U);
+}
+
+static unsigned AddCounts(unsigned A, unsigned B) {
+  // If either value is undefined, use the other.
+  if (A == ProfileInfoLoader::Uncounted) return B;
+  if (B == ProfileInfoLoader::Uncounted) return A;
+  return A + B;
+}
+
+static void ReadProfilingBlock(const char *ToolName, FILE *F,
+                               bool ShouldByteSwap,
+                               std::vector<unsigned> &Data) {
+  // Read the number of entries...
+  unsigned NumEntries;
+  if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
+    errs() << ToolName << ": data packet truncated!\n";
+    perror(0);
+    exit(1);
+  }
+  NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
+
+  // Read the counts...
+  std::vector<unsigned> TempSpace(NumEntries);
+
+  // Read in the block of data...
+  if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
+    errs() << ToolName << ": data packet truncated!\n";
+    perror(0);
+    exit(1);
+  }
+
+  // Make sure we have enough space... The space is initialised to -1 to
+  // facitiltate the loading of missing values for OptimalEdgeProfiling.
+  if (Data.size() < NumEntries)
+    Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
+
+  // Accumulate the data we just read into the data.
+  if (!ShouldByteSwap) {
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(TempSpace[i], Data[i]);
+    }
+  } else {
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
+    }
+  }
+}
+
+const unsigned ProfileInfoLoader::Uncounted = ~0U;
+
+// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
+// program if the file is invalid or broken.
+//
+ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
+                                     const std::string &Filename,
+                                     Module &TheModule) :
+                                     Filename(Filename), 
+                                     M(TheModule), Warned(false) {
+  FILE *F = fopen(Filename.c_str(), "rb");
+  if (F == 0) {
+    errs() << ToolName << ": Error opening '" << Filename << "': ";
+    perror(0);
+    exit(1);
+  }
+
+  // Keep reading packets until we run out of them.
+  unsigned PacketType;
+  while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
+    // If the low eight bits of the packet are zero, we must be dealing with an
+    // endianness mismatch.  Byteswap all words read from the profiling
+    // information.
+    bool ShouldByteSwap = (char)PacketType == 0;
+    PacketType = ByteSwap(PacketType, ShouldByteSwap);
+
+    switch (PacketType) {
+    case ArgumentInfo: {
+      unsigned ArgLength;
+      if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
+        errs() << ToolName << ": arguments packet truncated!\n";
+        perror(0);
+        exit(1);
+      }
+      ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
+
+      // Read in the arguments...
+      std::vector<char> Chars(ArgLength+4);
+
+      if (ArgLength)
+        if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
+          errs() << ToolName << ": arguments packet truncated!\n";
+          perror(0);
+          exit(1);
+        }
+      CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
+      break;
+    }
+
+    case FunctionInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
+      break;
+
+    case BlockInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
+      break;
+
+    case EdgeInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
+      break;
+
+    case OptEdgeInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
+      break;
+
+    case BBTraceInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
+      break;
+
+    default:
+      errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+      exit(1);
+    }
+  }
+
+  fclose(F);
+}
+
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
new file mode 100644
index 000000000000..098079bcffc4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -0,0 +1,267 @@
+//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// loads the information from a profile dump file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-loader"
+#include "llvm/BasicBlock.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesRead, "The # of edges read.");
+
+static cl::opt<std::string>
+ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
+                    cl::value_desc("filename"),
+                    cl::desc("Profile file loaded by -profile-loader"));
+
+namespace {
+  class LoaderPass : public ModulePass, public ProfileInfo {
+    std::string Filename;
+    std::set<Edge> SpanningTree;
+    std::set<const BasicBlock*> BBisUnvisited;
+    unsigned ReadCount;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    explicit LoaderPass(const std::string &filename = "")
+      : ModulePass(ID), Filename(filename) {
+      initializeLoaderPassPass(*PassRegistry::getPassRegistry());
+      if (filename.empty()) Filename = ProfileInfoFilename;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    virtual const char *getPassName() const {
+      return "Profiling information loader";
+    }
+
+    // recurseBasicBlock() - Calculates the edge weights for as much basic
+    // blocks as possbile.
+    virtual void recurseBasicBlock(const BasicBlock *BB);
+    virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
+    virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &ProfileInfo::ID)
+        return (ProfileInfo*)this;
+      return this;
+    }
+    
+    /// run - Load the profile information from the specified file.
+    virtual bool runOnModule(Module &M);
+  };
+}  // End of anonymous namespace
+
+char LoaderPass::ID = 0;
+INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
+              "Load profile information from llvmprof.out", false, true, false)
+
+char &llvm::ProfileLoaderPassID = LoaderPass::ID;
+
+ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
+
+/// createProfileLoaderPass - This function returns a Pass that loads the
+/// profiling information for the module from the specified filename, making it
+/// available to the optimizers.
+Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
+  return new LoaderPass(Filename);
+}
+
+void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, 
+                                    unsigned &uncalc, double &count) {
+  double w;
+  if ((w = getEdgeWeight(edge)) == MissingValue) {
+    tocalc = edge;
+    uncalc++;
+  } else {
+    count+=w;
+  }
+}
+
+// recurseBasicBlock - Visits all neighbours of a block and then tries to
+// calculate the missing edge values.
+void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
+
+  // break recursion if already visited
+  if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
+  BBisUnvisited.erase(BB);
+  if (!BB) return;
+
+  for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+  for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+
+  Edge tocalc;
+  if (CalculateMissingEdge(BB, tocalc)) {
+    SpanningTree.erase(tocalc);
+  }
+}
+
+void LoaderPass::readEdge(ProfileInfo::Edge e,
+                          std::vector<unsigned> &ECs) {
+  if (ReadCount < ECs.size()) {
+    double weight = ECs[ReadCount++];
+    if (weight != ProfileInfoLoader::Uncounted) {
+      // Here the data realm changes from the unsigned of the file to the
+      // double of the ProfileInfo. This conversion is save because we know
+      // that everything thats representable in unsinged is also representable
+      // in double.
+      EdgeInformation[getFunction(e)][e] += (double)weight;
+
+      DEBUG(dbgs() << "--Read Edge Counter for " << e
+                   << " (# "<< (ReadCount-1) << "): "
+                   << (unsigned)getEdgeWeight(e) << "\n");
+    } else {
+      // This happens only if reading optimal profiling information, not when
+      // reading regular profiling information.
+      SpanningTree.insert(e);
+    }
+  }
+}
+
+bool LoaderPass::runOnModule(Module &M) {
+  ProfileInfoLoader PIL("profile-loader", Filename, M);
+
+  EdgeInformation.clear();
+  std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  Counters = PIL.getRawOptimalEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        if (TI->getNumSuccessors() == 0) {
+          readEdge(getEdge(BB,0), Counters);
+        }
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
+      }
+      while (SpanningTree.size() > 0) {
+
+        unsigned size = SpanningTree.size();
+
+        BBisUnvisited.clear();
+        for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+             ee = SpanningTree.end(); ei != ee; ++ei) {
+          BBisUnvisited.insert(ei->first);
+          BBisUnvisited.insert(ei->second);
+        }
+        while (BBisUnvisited.size() > 0) {
+          recurseBasicBlock(*BBisUnvisited.begin());
+        }
+
+        if (SpanningTree.size() == size) {
+          DEBUG(dbgs()<<"{");
+          for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+               ee = SpanningTree.end(); ei != ee; ++ei) {
+            DEBUG(dbgs()<< *ei <<",");
+          }
+          assert(0 && "No edge calculated!");
+        }
+
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  BlockInformation.clear();
+  Counters = PIL.getRawBlockCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+        if (ReadCount < Counters.size())
+          // Here the data realm changes from the unsigned of the file to the
+          // double of the ProfileInfo. This conversion is save because we know
+          // that everything thats representable in unsinged is also
+          // representable in double.
+          BlockInformation[F][BB] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+  }
+
+  FunctionInformation.clear();
+  Counters = PIL.getRawFunctionCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      if (ReadCount < Counters.size())
+        // Here the data realm changes from the unsigned of the file to the
+        // double of the ProfileInfo. This conversion is save because we know
+        // that everything thats representable in unsinged is also
+        // representable in double.
+        FunctionInformation[F] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
new file mode 100644
index 000000000000..a01751849c51
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
@@ -0,0 +1,382 @@
+//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that checks profiling information for 
+// plausibility.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-verifier"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool,false>
+ProfileVerifierDisableAssertions("profile-verifier-noassert",
+     cl::desc("Disable assertions"));
+
+namespace llvm {
+  template<class FType, class BType>
+  class ProfileVerifierPassT : public FunctionPass {
+
+    struct DetailedBlockInfo {
+      const BType *BB;
+      double      BBWeight;
+      double      inWeight;
+      int         inCount;
+      double      outWeight;
+      int         outCount;
+    };
+
+    ProfileInfoT<FType, BType> *PI;
+    std::set<const BType*> BBisVisited;
+    std::set<const FType*>   FisVisited;
+    bool DisableAssertions;
+
+    // When debugging is enabled, the verifier prints a whole slew of debug
+    // information, otherwise its just the assert. These are all the helper
+    // functions.
+    bool PrintedDebugTree;
+    std::set<const BType*> BBisPrinted;
+    void debugEntry(DetailedBlockInfo*);
+    void printDebugInfo(const BType *BB);
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    explicit ProfileVerifierPassT () : FunctionPass(ID) {
+      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+      DisableAssertions = ProfileVerifierDisableAssertions;
+    }
+    explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), 
+                                              DisableAssertions(da) {
+      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<ProfileInfoT<FType, BType> >();
+    }
+
+    const char *getPassName() const {
+      return "Profiling information verifier";
+    }
+
+    /// run - Verify the profile information.
+    bool runOnFunction(FType &F);
+    void recurseBasicBlock(const BType*);
+
+    bool   exitReachable(const FType*);
+    double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
+    void   CheckValue(bool, const char*, DetailedBlockInfo*);
+  };
+
+  typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
+
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
+
+    if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+    double BBWeight = PI->getExecutionCount(BB);
+    if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
+    double inWeight = 0;
+    int inCount = 0;
+    std::set<const BType*> ProcessedPreds;
+    for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+         bbi != bbe; ++bbi ) {
+      if (ProcessedPreds.insert(*bbi).second) {
+        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
+        double EdgeWeight = PI->getEdgeWeight(E);
+        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+        dbgs() << "calculated in-edge " << E << ": " 
+               << format("%20.20g",EdgeWeight) << "\n";
+        inWeight += EdgeWeight;
+        inCount++;
+      }
+    }
+    double outWeight = 0;
+    int outCount = 0;
+    std::set<const BType*> ProcessedSuccs;
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+          bbi != bbe; ++bbi ) {
+      if (ProcessedSuccs.insert(*bbi).second) {
+        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
+        double EdgeWeight = PI->getEdgeWeight(E);
+        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+        dbgs() << "calculated out-edge " << E << ": " 
+               << format("%20.20g",EdgeWeight) << "\n";
+        outWeight += EdgeWeight;
+        outCount++;
+      }
+    }
+    dbgs() << "Block " << BB->getNameStr()                << " in " 
+           << BB->getParent()->getNameStr()               << ":"
+           << "BBWeight="  << format("%20.20g",BBWeight)  << ","
+           << "inWeight="  << format("%20.20g",inWeight)  << ","
+           << "inCount="   << inCount                     << ","
+           << "outWeight=" << format("%20.20g",outWeight) << ","
+           << "outCount"   << outCount                    << "\n";
+
+    // mark as visited and recurse into subnodes
+    BBisPrinted.insert(BB);
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+          bbi != bbe; ++bbi ) {
+      printDebugInfo(*bbi);
+    }
+  }
+
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
+    dbgs() << "TROUBLE: Block " << DI->BB->getNameStr()       << " in "
+           << DI->BB->getParent()->getNameStr()               << ":"
+           << "BBWeight="  << format("%20.20g",DI->BBWeight)  << ","
+           << "inWeight="  << format("%20.20g",DI->inWeight)  << ","
+           << "inCount="   << DI->inCount                     << ","
+           << "outWeight=" << format("%20.20g",DI->outWeight) << ","
+           << "outCount="  << DI->outCount                    << "\n";
+    if (!PrintedDebugTree) {
+      PrintedDebugTree = true;
+      printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+    }
+  }
+
+  // This compares A and B for equality.
+  static bool Equals(double A, double B) {
+    return A == B;
+  }
+
+  // This checks if the function "exit" is reachable from an given function
+  // via calls, this is necessary to check if a profile is valid despite the
+  // counts not fitting exactly.
+  template<class FType, class BType>
+  bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
+    if (!F) return false;
+
+    if (FisVisited.count(F)) return false;
+
+    FType *Exit = F->getParent()->getFunction("exit");
+    if (Exit == F) {
+      return true;
+    }
+
+    FisVisited.insert(F);
+    bool exits = false;
+    for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+        FType *F = CI->getCalledFunction();
+        if (F) {
+          exits |= exitReachable(F);
+        } else {
+          // This is a call to a pointer, all bets are off...
+          exits = true;
+        }
+        if (exits) break;
+      }
+    }
+    return exits;
+  }
+
+  #define ASSERTMESSAGE(M) \
+    { dbgs() << "ASSERT:" << (M) << "\n"; \
+      if (!DisableAssertions) assert(0 && (M)); }
+
+  template<class FType, class BType>
+  double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
+    double EdgeWeight = PI->getEdgeWeight(E);
+    if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
+      dbgs() << "Edge " << E << " in Function " 
+             << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+      ASSERTMESSAGE("Edge has missing value");
+      return 0;
+    } else {
+      if (EdgeWeight < 0) {
+        dbgs() << "Edge " << E << " in Function " 
+               << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+        ASSERTMESSAGE("Edge has negative value");
+      }
+      return EdgeWeight;
+    }
+  }
+
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, 
+                                                      const char *Message,
+                                                      DetailedBlockInfo *DI) {
+    if (Error) {
+      DEBUG(debugEntry(DI));
+      dbgs() << "Block " << DI->BB->getNameStr() << " in Function " 
+             << DI->BB->getParent()->getNameStr() << ": ";
+      ASSERTMESSAGE(Message);
+    }
+    return;
+  }
+
+  // This calculates the Information for a block and then recurses into the
+  // successors.
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
+
+    // Break the recursion by remembering all visited blocks.
+    if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+    // Use a data structure to store all the information, this can then be handed
+    // to debug printers.
+    DetailedBlockInfo DI;
+    DI.BB = BB;
+    DI.outCount = DI.inCount = 0;
+    DI.inWeight = DI.outWeight = 0;
+
+    // Read predecessors.
+    std::set<const BType*> ProcessedPreds;
+    const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+    // If there are none, check for (0,BB) edge.
+    if (bpi == bpe) {
+      DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
+      DI.inCount++;
+    }
+    for (;bpi != bpe; ++bpi) {
+      if (ProcessedPreds.insert(*bpi).second) {
+        DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+        DI.inCount++;
+      }
+    }
+
+    // Read successors.
+    std::set<const BType*> ProcessedSuccs;
+    succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+    // If there is an (0,BB) edge, consider it too. (This is done not only when
+    // there are no successors, but every time; not every function contains
+    // return blocks with no successors (think loop latch as return block)).
+    double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+    if (w != ProfileInfoT<FType, BType>::MissingValue) {
+      DI.outWeight += w;
+      DI.outCount++;
+    }
+    for (;bbi != bbe; ++bbi) {
+      if (ProcessedSuccs.insert(*bbi).second) {
+        DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+        DI.outCount++;
+      }
+    }
+
+    // Read block weight.
+    DI.BBWeight = PI->getExecutionCount(BB);
+    CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
+               "BasicBlock has missing value", &DI);
+    CheckValue(DI.BBWeight < 0,
+               "BasicBlock has negative value", &DI);
+
+    // Check if this block is a setjmp target.
+    bool isSetJmpTarget = false;
+    if (DI.outWeight > DI.inWeight) {
+      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+           i != ie; ++i) {
+        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+          FType *F = CI->getCalledFunction();
+          if (F && (F->getName() == "_setjmp")) {
+            isSetJmpTarget = true; break;
+          }
+        }
+      }
+    }
+    // Check if this block is eventually reaching exit.
+    bool isExitReachable = false;
+    if (DI.inWeight > DI.outWeight) {
+      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+           i != ie; ++i) {
+        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+          FType *F = CI->getCalledFunction();
+          if (F) {
+            FisVisited.clear();
+            isExitReachable |= exitReachable(F);
+          } else {
+            // This is a call to a pointer, all bets are off...
+            isExitReachable = true;
+          }
+          if (isExitReachable) break;
+        }
+      }
+    }
+
+    if (DI.inCount > 0 && DI.outCount == 0) {
+       // If this is a block with no successors.
+      if (!isSetJmpTarget) {
+        CheckValue(!Equals(DI.inWeight,DI.BBWeight), 
+                   "inWeight and BBWeight do not match", &DI);
+      }
+    } else if (DI.inCount == 0 && DI.outCount > 0) {
+      // If this is a block with no predecessors.
+      if (!isExitReachable)
+        CheckValue(!Equals(DI.BBWeight,DI.outWeight), 
+                   "BBWeight and outWeight do not match", &DI);
+    } else {
+      // If this block has successors and predecessors.
+      if (DI.inWeight > DI.outWeight && !isExitReachable)
+        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                   "inWeight and outWeight do not match", &DI);
+      if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                   "inWeight and outWeight do not match", &DI);
+    }
+
+
+    // Mark this block as visited, rescurse into successors.
+    BBisVisited.insert(BB);
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+          bbi != bbe; ++bbi ) {
+      recurseBasicBlock(*bbi);
+    }
+  }
+
+  template<class FType, class BType>
+  bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
+    PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
+    if (!PI)
+      ASSERTMESSAGE("No ProfileInfo available");
+
+    // Prepare global variables.
+    PrintedDebugTree = false;
+    BBisVisited.clear();
+
+    // Fetch entry block and recurse into it.
+    const BType *entry = &F.getEntryBlock();
+    recurseBasicBlock(entry);
+
+    if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
+      ASSERTMESSAGE("Function count and entry block count do not match");
+
+    return false;
+  }
+
+  template<class FType, class BType>
+  char ProfileVerifierPassT<FType, BType>::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
+                "Verify profiling information", false, true)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
+                "Verify profiling information", false, true)
+
+namespace llvm {
+  FunctionPass *createProfileVerifierPass() {
+    return new ProfileVerifierPass(ProfileVerifierDisableAssertions); 
+  }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 000000000000..52753cbe85af
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,851 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Assembly/Writer.h"
+
+#define DEBUG_TYPE "region"
+#include "llvm/Support/Debug.h"
+
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+
+// Always verify if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyRegionInfo = true;
+#else
+static bool VerifyRegionInfo = false;
+#endif
+
+static cl::opt<bool,true>
+VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
+                cl::desc("Verify region info (time consuming)"));
+
+STATISTIC(numRegions,       "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+static cl::opt<enum Region::PrintStyle> printStyle("print-region-style",
+  cl::Hidden,
+  cl::desc("style of printing regions"),
+  cl::values(
+    clEnumValN(Region::PrintNone, "none",  "print no details"),
+    clEnumValN(Region::PrintBB, "bb",
+               "print regions in detail with block_iterator"),
+    clEnumValN(Region::PrintRN, "rn",
+               "print regions in detail with element_iterator"),
+    clEnumValEnd));
+//===----------------------------------------------------------------------===//
+/// Region Implementation
+Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
+               DominatorTree *dt, Region *Parent)
+               : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
+
+Region::~Region() {
+  // Free the cached nodes.
+  for (BBNodeMapT::iterator it = BBNodeMap.begin(),
+         ie = BBNodeMap.end(); it != ie; ++it)
+    delete it->second;
+
+  // Only clean the cache for this Region. Caches of child Regions will be
+  // cleaned when the child Regions are deleted.
+  BBNodeMap.clear();
+
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+}
+
+void Region::replaceEntry(BasicBlock *BB) {
+  entry.setPointer(BB);
+}
+
+void Region::replaceExit(BasicBlock *BB) {
+  assert(exit && "No exit to replace!");
+  exit = BB;
+}
+
+bool Region::contains(const BasicBlock *B) const {
+  BasicBlock *BB = const_cast<BasicBlock*>(B);
+
+  assert(DT->getNode(BB) && "BB not part of the dominance tree");
+
+  BasicBlock *entry = getEntry(), *exit = getExit();
+
+  // Toplevel region.
+  if (!exit)
+    return true;
+
+  return (DT->dominates(entry, BB)
+    && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
+}
+
+bool Region::contains(const Loop *L) const {
+  // BBs that are not part of any loop are element of the Loop
+  // described by the NULL pointer. This loop is not part of any region,
+  // except if the region describes the whole function.
+  if (L == 0)
+    return getExit() == 0;
+
+  if (!contains(L->getHeader()))
+    return false;
+
+  SmallVector<BasicBlock *, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
+       BE = ExitingBlocks.end(); BI != BE; ++BI)
+    if (!contains(*BI))
+      return false;
+
+  return true;
+}
+
+Loop *Region::outermostLoopInRegion(Loop *L) const {
+  if (!contains(L))
+    return 0;
+
+  while (L && contains(L->getParentLoop())) {
+    L = L->getParentLoop();
+  }
+
+  return L;
+}
+
+Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
+  assert(LI && BB && "LI and BB cannot be null!");
+  Loop *L = LI->getLoopFor(BB);
+  return outermostLoopInRegion(L);
+}
+
+BasicBlock *Region::getEnteringBlock() const {
+  BasicBlock *entry = getEntry();
+  BasicBlock *Pred;
+  BasicBlock *enteringBlock = 0;
+
+  for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
+       ++PI) {
+    Pred = *PI;
+    if (DT->getNode(Pred) && !contains(Pred)) {
+      if (enteringBlock)
+        return 0;
+
+      enteringBlock = Pred;
+    }
+  }
+
+  return enteringBlock;
+}
+
+BasicBlock *Region::getExitingBlock() const {
+  BasicBlock *exit = getExit();
+  BasicBlock *Pred;
+  BasicBlock *exitingBlock = 0;
+
+  if (!exit)
+    return 0;
+
+  for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
+       ++PI) {
+    Pred = *PI;
+    if (contains(Pred)) {
+      if (exitingBlock)
+        return 0;
+
+      exitingBlock = Pred;
+    }
+  }
+
+  return exitingBlock;
+}
+
+bool Region::isSimple() const {
+  return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
+}
+
+std::string Region::getNameStr() const {
+  std::string exitName;
+  std::string entryName;
+
+  if (getEntry()->getName().empty()) {
+    raw_string_ostream OS(entryName);
+
+    WriteAsOperand(OS, getEntry(), false);
+    entryName = OS.str();
+  } else
+    entryName = getEntry()->getNameStr();
+
+  if (getExit()) {
+    if (getExit()->getName().empty()) {
+      raw_string_ostream OS(exitName);
+
+      WriteAsOperand(OS, getExit(), false);
+      exitName = OS.str();
+    } else
+      exitName = getExit()->getNameStr();
+  } else
+    exitName = "<Function Return>";
+
+  return entryName + " => " + exitName;
+}
+
+void Region::verifyBBInRegion(BasicBlock *BB) const {
+  if (!contains(BB))
+    llvm_unreachable("Broken region found!");
+
+  BasicBlock *entry = getEntry(), *exit = getExit();
+
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    if (!contains(*SI) && exit != *SI)
+      llvm_unreachable("Broken region found!");
+
+  if (entry != BB)
+    for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
+      if (!contains(*SI))
+        llvm_unreachable("Broken region found!");
+}
+
+void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
+  BasicBlock *exit = getExit();
+
+  visited->insert(BB);
+
+  verifyBBInRegion(BB);
+
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    if (*SI != exit && visited->find(*SI) == visited->end())
+        verifyWalk(*SI, visited);
+}
+
+void Region::verifyRegion() const {
+  // Only do verification when user wants to, otherwise this expensive
+  // check will be invoked by PassManager.
+  if (!VerifyRegionInfo) return;
+
+  std::set<BasicBlock*> visited;
+  verifyWalk(getEntry(), &visited);
+}
+
+void Region::verifyRegionNest() const {
+  for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+    (*RI)->verifyRegionNest();
+
+  verifyRegion();
+}
+
+Region::block_iterator Region::block_begin() {
+  return GraphTraits<FlatIt<Region*> >::nodes_begin(this);
+}
+
+Region::block_iterator Region::block_end() {
+  return GraphTraits<FlatIt<Region*> >::nodes_end(this);
+}
+
+Region::const_block_iterator Region::block_begin() const {
+  return GraphTraits<FlatIt<const Region*> >::nodes_begin(this);
+}
+
+Region::const_block_iterator Region::block_end() const {
+  return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
+}
+
+Region::element_iterator Region::element_begin() {
+  return GraphTraits<Region*>::nodes_begin(this);
+}
+
+Region::element_iterator Region::element_end() {
+  return GraphTraits<Region*>::nodes_end(this);
+}
+
+Region::const_element_iterator Region::element_begin() const {
+  return GraphTraits<const Region*>::nodes_begin(this);
+}
+
+Region::const_element_iterator Region::element_end() const {
+  return GraphTraits<const Region*>::nodes_end(this);
+}
+
+Region* Region::getSubRegionNode(BasicBlock *BB) const {
+  Region *R = RI->getRegionFor(BB);
+
+  if (!R || R == this)
+    return 0;
+
+  // If we pass the BB out of this region, that means our code is broken.
+  assert(contains(R) && "BB not in current region!");
+
+  while (contains(R->getParent()) && R->getParent() != this)
+    R = R->getParent();
+
+  if (R->getEntry() != BB)
+    return 0;
+
+  return R;
+}
+
+RegionNode* Region::getBBNode(BasicBlock *BB) const {
+  assert(contains(BB) && "Can get BB node out of this region!");
+
+  BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
+
+  if (at != BBNodeMap.end())
+    return at->second;
+
+  RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
+  BBNodeMap.insert(std::make_pair(BB, NewNode));
+  return NewNode;
+}
+
+RegionNode* Region::getNode(BasicBlock *BB) const {
+  assert(contains(BB) && "Can get BB node out of this region!");
+  if (Region* Child = getSubRegionNode(BB))
+    return Child->getNode();
+
+  return getBBNode(BB);
+}
+
+void Region::transferChildrenTo(Region *To) {
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    (*I)->parent = To;
+    To->children.push_back(*I);
+  }
+  children.clear();
+}
+
+void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
+  assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+  assert(std::find(begin(), end(), SubRegion) == children.end()
+         && "Subregion already exists!");
+
+  SubRegion->parent = this;
+  children.push_back(SubRegion);
+
+  if (!moveChildren)
+    return;
+
+  assert(SubRegion->children.size() == 0
+         && "SubRegions that contain children are not supported");
+
+  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+    if (!(*I)->isSubRegion()) {
+      BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
+
+      if (SubRegion->contains(BB))
+        RI->setRegionFor(BB, SubRegion);
+    }
+
+  std::vector<Region*> Keep;
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (SubRegion->contains(*I) && *I != SubRegion) {
+      SubRegion->children.push_back(*I);
+      (*I)->parent = SubRegion;
+    } else
+      Keep.push_back(*I);
+
+  children.clear();
+  children.insert(children.begin(), Keep.begin(), Keep.end());
+}
+
+
+Region *Region::removeSubRegion(Region *Child) {
+  assert(Child->parent == this && "Child is not a child of this region!");
+  Child->parent = 0;
+  RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+  assert(I != children.end() && "Region does not exit. Unable to remove.");
+  children.erase(children.begin()+(I-begin()));
+  return Child;
+}
+
+unsigned Region::getDepth() const {
+  unsigned Depth = 0;
+
+  for (Region *R = parent; R != 0; R = R->parent)
+    ++Depth;
+
+  return Depth;
+}
+
+Region *Region::getExpandedRegion() const {
+  unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
+
+  if (NumSuccessors == 0)
+    return NULL;
+
+  for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+       PI != PE; ++PI)
+    if (!DT->dominates(getEntry(), *PI))
+      return NULL;
+
+  Region *R = RI->getRegionFor(exit);
+
+  if (R->getEntry() != exit) {
+    if (exit->getTerminator()->getNumSuccessors() == 1)
+      return new Region(getEntry(), *succ_begin(exit), RI, DT);
+    else
+      return NULL;
+  }
+
+  while (R->getParent() && R->getParent()->getEntry() == exit)
+    R = R->getParent();
+
+  if (!DT->dominates(getEntry(), R->getExit()))
+    for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+         PI != PE; ++PI)
+    if (!DT->dominates(R->getExit(), *PI))
+      return NULL;
+
+  return new Region(getEntry(), R->getExit(), RI, DT);
+}
+
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level,
+                   enum PrintStyle Style) const {
+  if (print_tree)
+    OS.indent(level*2) << "[" << level << "] " << getNameStr();
+  else
+    OS.indent(level*2) << getNameStr();
+
+  OS << "\n";
+
+
+  if (Style != PrintNone) {
+    OS.indent(level*2) << "{\n";
+    OS.indent(level*2 + 2);
+
+    if (Style == PrintBB) {
+      for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I)
+        OS << **I << ", "; // TODO: remove the last ","
+    } else if (Style == PrintRN) {
+      for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
+        OS << **I << ", "; // TODO: remove the last ",
+    }
+
+    OS << "\n";
+  }
+
+  if (print_tree)
+    for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+      (*RI)->print(OS, print_tree, level+1, Style);
+
+  if (Style != PrintNone)
+    OS.indent(level*2) << "} \n";
+}
+
+void Region::dump() const {
+  print(dbgs(), true, getDepth(), printStyle.getValue());
+}
+
+void Region::clearNodeCache() {
+  // Free the cached nodes.
+  for (BBNodeMapT::iterator I = BBNodeMap.begin(),
+       IE = BBNodeMap.end(); I != IE; ++I)
+    delete I->second;
+
+  BBNodeMap.clear();
+  for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
+    (*RI)->clearNodeCache();
+}
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
+                                     BasicBlock *exit) const {
+  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (DT->dominates(entry, P) && !DT->dominates(exit, P))
+      return false;
+  }
+  return true;
+}
+
+bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
+  assert(entry && exit && "entry and exit must not be null!");
+  typedef DominanceFrontier::DomSetType DST;
+
+  DST *entrySuccs = &DF->find(entry)->second;
+
+  // Exit is the header of a loop that contains the entry. In this case,
+  // the dominance frontier must only contain the exit.
+  if (!DT->dominates(entry, exit)) {
+    for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+         SI != SE; ++SI)
+      if (*SI != exit && *SI != entry)
+        return false;
+
+    return true;
+  }
+
+  DST *exitSuccs = &DF->find(exit)->second;
+
+  // Do not allow edges leaving the region.
+  for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+       SI != SE; ++SI) {
+    if (*SI == exit || *SI == entry)
+      continue;
+    if (exitSuccs->find(*SI) == exitSuccs->end())
+      return false;
+    if (!isCommonDomFrontier(*SI, entry, exit))
+      return false;
+  }
+
+  // Do not allow edges pointing into the region.
+  for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
+       SI != SE; ++SI)
+    if (DT->properlyDominates(entry, *SI) && *SI != exit)
+      return false;
+
+
+  return true;
+}
+
+void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
+                             BBtoBBMap *ShortCut) const {
+  assert(entry && exit && "entry and exit must not be null!");
+
+  BBtoBBMap::iterator e = ShortCut->find(exit);
+
+  if (e == ShortCut->end())
+    // No further region at exit available.
+    (*ShortCut)[entry] = exit;
+  else {
+    // We found a region e that starts at exit. Therefore (entry, e->second)
+    // is also a region, that is larger than (entry, exit). Insert the
+    // larger one.
+    BasicBlock *BB = e->second;
+    (*ShortCut)[entry] = BB;
+  }
+}
+
+DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
+                                        BBtoBBMap *ShortCut) const {
+  BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
+
+  if (e == ShortCut->end())
+    return N->getIDom();
+
+  return PDT->getNode(e->second)->getIDom();
+}
+
+bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
+  assert(entry && exit && "entry and exit must not be null!");
+
+  unsigned num_successors = succ_end(entry) - succ_begin(entry);
+
+  if (num_successors <= 1 && exit == *(succ_begin(entry)))
+    return true;
+
+  return false;
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+  ++numRegions;
+
+  // TODO: Slow. Should only be enabled if -stats is used.
+  if (R->isSimple()) ++numSimpleRegions;
+}
+
+Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
+  assert(entry && exit && "entry and exit must not be null!");
+
+  if (isTrivialRegion(entry, exit))
+    return 0;
+
+  Region *region = new Region(entry, exit, this, DT);
+  BBtoRegion.insert(std::make_pair(entry, region));
+
+ #ifdef XDEBUG
+    region->verifyRegion();
+ #else
+    DEBUG(region->verifyRegion());
+ #endif
+
+  updateStatistics(region);
+  return region;
+}
+
+void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
+  assert(entry);
+
+  DomTreeNode *N = PDT->getNode(entry);
+
+  if (!N)
+    return;
+
+  Region *lastRegion= 0;
+  BasicBlock *lastExit = entry;
+
+  // As only a BasicBlock that postdominates entry can finish a region, walk the
+  // post dominance tree upwards.
+  while ((N = getNextPostDom(N, ShortCut))) {
+    BasicBlock *exit = N->getBlock();
+
+    if (!exit)
+      break;
+
+    if (isRegion(entry, exit)) {
+      Region *newRegion = createRegion(entry, exit);
+
+      if (lastRegion)
+        newRegion->addSubRegion(lastRegion);
+
+      lastRegion = newRegion;
+      lastExit = exit;
+    }
+
+    // This can never be a region, so stop the search.
+    if (!DT->dominates(entry, exit))
+      break;
+  }
+
+  // Tried to create regions from entry to lastExit.  Next time take a
+  // shortcut from entry to lastExit.
+  if (lastExit != entry)
+    insertShortCut(entry, lastExit, ShortCut);
+}
+
+void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
+  BasicBlock *entry = &(F.getEntryBlock());
+  DomTreeNode *N = DT->getNode(entry);
+
+  // Iterate over the dominance tree in post order to start with the small
+  // regions from the bottom of the dominance tree.  If the small regions are
+  // detected first, detection of bigger regions is faster, as we can jump
+  // over the small regions.
+  for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
+    ++FI) {
+    findRegionsWithEntry(FI->getBlock(), ShortCut);
+  }
+}
+
+Region *RegionInfo::getTopMostParent(Region *region) {
+  while (region->parent)
+    region = region->getParent();
+
+  return region;
+}
+
+void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
+  BasicBlock *BB = N->getBlock();
+
+  // Passed region exit
+  while (BB == region->getExit())
+    region = region->getParent();
+
+  BBtoRegionMap::iterator it = BBtoRegion.find(BB);
+
+  // This basic block is a start block of a region. It is already in the
+  // BBtoRegion relation. Only the child basic blocks have to be updated.
+  if (it != BBtoRegion.end()) {
+    Region *newRegion = it->second;;
+    region->addSubRegion(getTopMostParent(newRegion));
+    region = newRegion;
+  } else {
+    BBtoRegion[BB] = region;
+  }
+
+  for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
+    buildRegionsTree(*CI, region);
+}
+
+void RegionInfo::releaseMemory() {
+  BBtoRegion.clear();
+  if (TopLevelRegion)
+    delete TopLevelRegion;
+  TopLevelRegion = 0;
+}
+
+RegionInfo::RegionInfo() : FunctionPass(ID) {
+  initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+  TopLevelRegion = 0;
+}
+
+RegionInfo::~RegionInfo() {
+  releaseMemory();
+}
+
+void RegionInfo::Calculate(Function &F) {
+  // ShortCut a function where for every BB the exit of the largest region
+  // starting with BB is stored. These regions can be threated as single BBS.
+  // This improves performance on linear CFGs.
+  BBtoBBMap ShortCut;
+
+  scanForRegions(F, &ShortCut);
+  BasicBlock *BB = &F.getEntryBlock();
+  buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+}
+
+bool RegionInfo::runOnFunction(Function &F) {
+  releaseMemory();
+
+  DT = &getAnalysis<DominatorTree>();
+  PDT = &getAnalysis<PostDominatorTree>();
+  DF = &getAnalysis<DominanceFrontier>();
+
+  TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+  updateStatistics(TopLevelRegion);
+
+  Calculate(F);
+
+  return false;
+}
+
+void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequired<PostDominatorTree>();
+  AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfo::print(raw_ostream &OS, const Module *) const {
+  OS << "Region tree:\n";
+  TopLevelRegion->print(OS, true, 0, printStyle.getValue());
+  OS << "End region tree\n";
+}
+
+void RegionInfo::verifyAnalysis() const {
+  // Only do verification when user wants to, otherwise this expensive check
+  // will be invoked by PMDataManager::verifyPreservedAnalysis when
+  // a regionpass (marked PreservedAll) finish.
+  if (!VerifyRegionInfo) return;
+
+  TopLevelRegion->verifyRegionNest();
+}
+
+// Region pass manager support.
+Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
+  BBtoRegionMap::const_iterator I=
+    BBtoRegion.find(BB);
+  return I != BBtoRegion.end() ? I->second : 0;
+}
+
+void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
+  BBtoRegion[BB] = R;
+}
+
+Region *RegionInfo::operator[](BasicBlock *BB) const {
+  return getRegionFor(BB);
+}
+
+BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
+  BasicBlock *Exit = NULL;
+
+  while (true) {
+    // Get largest region that starts at BB.
+    Region *R = getRegionFor(BB);
+    while (R && R->getParent() && R->getParent()->getEntry() == BB)
+      R = R->getParent();
+
+    // Get the single exit of BB.
+    if (R && R->getEntry() == BB)
+      Exit = R->getExit();
+    else if (++succ_begin(BB) == succ_end(BB))
+      Exit = *succ_begin(BB);
+    else // No single exit exists.
+      return Exit;
+
+    // Get largest region that starts at Exit.
+    Region *ExitR = getRegionFor(Exit);
+    while (ExitR && ExitR->getParent()
+           && ExitR->getParent()->getEntry() == Exit)
+      ExitR = ExitR->getParent();
+
+    for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
+         ++PI)
+      if (!R->contains(*PI) && !ExitR->contains(*PI))
+        break;
+
+    // This stops infinite cycles.
+    if (DT->dominates(Exit, BB))
+      break;
+
+    BB = Exit;
+  }
+
+  return Exit;
+}
+
+Region*
+RegionInfo::getCommonRegion(Region *A, Region *B) const {
+  assert (A && B && "One of the Regions is NULL");
+
+  if (A->contains(B)) return A;
+
+  while (!B->contains(A))
+    B = B->getParent();
+
+  return B;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
+  Region* ret = Regions.back();
+  Regions.pop_back();
+
+  for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
+       E = Regions.end(); I != E; ++I)
+      ret = getCommonRegion(ret, *I);
+
+  return ret;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
+  Region* ret = getRegionFor(BBs.back());
+  BBs.pop_back();
+
+  for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
+       E = BBs.end(); I != E; ++I)
+      ret = getCommonRegion(ret, getRegionFor(*I));
+
+  return ret;
+}
+
+void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
+{
+  Region *R = getRegionFor(OldBB);
+
+  setRegionFor(NewBB, R);
+
+  while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
+    R->replaceEntry(NewBB);
+    R = R->getParent();
+  }
+
+  setRegionFor(OldBB, R);
+}
+
+char RegionInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+                "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfo, "regions",
+                "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+  FunctionPass *createRegionInfoPass() {
+    return new RegionInfo();
+  }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
new file mode 100644
index 000000000000..80eda79d8a42
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,275 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Timer.h"
+
+#define DEBUG_TYPE "regionpassmgr"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager(int Depth)
+  : FunctionPass(ID), PMDataManager(Depth) {
+  skipThisRegion = false;
+  redoThisRegion = false;
+  RI = NULL;
+  CurrentRegion = NULL;
+}
+
+// Recurse through all subregions and all regions  into RQ.
+static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
+  RQ.push_back(R);
+  for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
+    addRegionIntoQueue(*I, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+  Info.addRequired<RegionInfo>();
+  Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+  RI = &getAnalysis<RegionInfo>();
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+
+  if (RQ.empty()) // No regions, skip calling finalizers
+    return false;
+
+  // Initialization
+  for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+       I != E; ++I) {
+    Region *R = *I;
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *RP = (RegionPass *)getContainedPass(Index);
+      Changed |= RP->doInitialization(R, *this);
+    }
+  }
+
+  // Walk Regions
+  while (!RQ.empty()) {
+
+    CurrentRegion  = RQ.back();
+    skipThisRegion = false;
+    redoThisRegion = false;
+
+    // Run all passes on the current Region.
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+      dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+                   CurrentRegion->getNameStr());
+      dumpRequiredSet(P);
+
+      initializeAnalysisImpl(P);
+
+      {
+        PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+        TimeRegion PassTimer(getPassTimer(P));
+        Changed |= P->runOnRegion(CurrentRegion, *this);
+      }
+
+      if (Changed)
+        dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+                     skipThisRegion ? "<deleted>" :
+                                    CurrentRegion->getNameStr());
+      dumpPreservedSet(P);
+
+      if (!skipThisRegion) {
+        // Manually check that this region is still healthy. This is done
+        // instead of relying on RegionInfo::verifyRegion since RegionInfo
+        // is a function pass and it's really expensive to verify every
+        // Region in the function every time. That level of checking can be
+        // enabled with the -verify-region-info option.
+        {
+          TimeRegion PassTimer(getPassTimer(P));
+          CurrentRegion->verifyRegion();
+        }
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(P);
+      }
+
+      removeNotPreservedAnalysis(P);
+      recordAvailableAnalysis(P);
+      removeDeadPasses(P,
+                       skipThisRegion ? "<deleted>" :
+                                      CurrentRegion->getNameStr(),
+                       ON_REGION_MSG);
+
+      if (skipThisRegion)
+        // Do not run other passes on this region.
+        break;
+    }
+
+    // If the region was deleted, release all the region passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisRegion)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_REGION_MSG);
+      }
+
+    // Pop the region from queue after running all passes.
+    RQ.pop_back();
+
+    if (redoThisRegion)
+      RQ.push_back(CurrentRegion);
+
+    // Free all region nodes created in region passes.
+    RI->clearNodeCache();
+  }
+
+  // Finalization
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    RegionPass *P = (RegionPass*)getContainedPass(Index);
+    Changed |= P->doFinalization();
+  }
+
+  // Print the region tree after all pass.
+  DEBUG(
+    dbgs() << "\nRegion tree of function " << F.getName()
+           << " after all region Pass:\n";
+    RI->dump();
+    dbgs() << "\n";
+    );
+
+  return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+  errs().indent(Offset*2) << "Region Pass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    Pass *P = getContainedPass(Index);
+    P->dumpPassStructure(Offset + 1);
+    dumpLastUses(P, Offset+1);
+  }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
+  PrintRegionPass(const std::string &B, raw_ostream &o)
+      : RegionPass(ID), Banner(B), Out(o) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+    Out << Banner;
+    for (Region::block_iterator I = R->block_begin(), E = R->block_end();
+         I != E; ++I)
+      (*I)->getEntry()->print(Out);
+
+    return false;
+  }
+};
+
+char PrintRegionPass::ID = 0;
+}  //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+
+  // If this pass is destroying high level information that is used
+  // by other passes that are managed by LPM then do not insert
+  // this pass in current LPM. Use new RGPassManager.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+    !PMS.top()->preserveHigherLevelAnalysis(this))
+    PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+                                 PassManagerType PreferredType) {
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+  RGPassManager *RGPM;
+
+  // Create new Region Pass Manager if it does not exist.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+    RGPM = (RGPassManager*)PMS.top();
+  else {
+
+    assert (!PMS.empty() && "Unable to create Region Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Region Pass Manager
+    RGPM = new RGPassManager(PMD->getDepth() + 1);
+    RGPM->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(RGPM);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    TPM->schedulePass(RGPM);
+
+    // [4] Push new manager into PMS
+    PMS.push(RGPM);
+  }
+
+  RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintRegionPass(Banner, O);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 000000000000..a1730b0a3ca1
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,220 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+                  cl::desc("Show only simple regions in the graphviz viewer"),
+                  cl::Hidden,
+                  cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+    if (!Node->isSubRegion()) {
+      BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+      if (isSimple())
+        return DOTGraphTraits<const Function*>
+          ::getSimpleNodeLabel(BB, BB->getParent());
+      else
+        return DOTGraphTraits<const Function*>
+          ::getCompleteNodeLabel(BB, BB->getParent());
+    }
+
+    return "Not implemented";
+  }
+};
+
+template<>
+struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+  static std::string getGraphName(RegionInfo *DT) {
+    return "Region Graph";
+  }
+
+  std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+    return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
+                                                     G->getTopLevelRegion());
+  }
+
+  std::string getEdgeAttributes(RegionNode *srcNode,
+    GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) {
+
+    RegionNode *destNode = *CI;
+
+    if (srcNode->isSubRegion() || destNode->isSubRegion())
+      return "";
+
+    // In case of a backedge, do not use it to define the layout of the nodes.
+    BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+    BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+    Region *R = RI->getRegionFor(destBB);
+
+    while (R && R->getParent())
+      if (R->getParent()->getEntry() == destBB)
+        R = R->getParent();
+      else
+        break;
+
+    if (R->getEntry() == destBB && R->contains(srcBB))
+      return "constraint=false";
+
+    return "";
+  }
+
+  // Print the cluster of the subregions. This groups the single basic blocks
+  // and adds a different background color for each group.
+  static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+                                 unsigned depth = 0) {
+    raw_ostream &O = GW.getOStream();
+    O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+      << " {\n";
+    O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+    if (!onlySimpleRegions || R->isSimple()) {
+      O.indent(2 * (depth + 1)) << "style = filled;\n";
+      O.indent(2 * (depth + 1)) << "color = "
+        << ((R->getDepth() * 2 % 12) + 1) << "\n";
+
+    } else {
+      O.indent(2 * (depth + 1)) << "style = solid;\n";
+      O.indent(2 * (depth + 1)) << "color = "
+        << ((R->getDepth() * 2 % 12) + 2) << "\n";
+    }
+
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      printRegionCluster(*RI, GW, depth + 1);
+
+    RegionInfo *RI = R->getRegionInfo();
+
+    for (Region::const_block_iterator BI = R->block_begin(),
+         BE = R->block_end(); BI != BE; ++BI) {
+      BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>();
+      if (RI->getRegionFor(BB) == R)
+        O.indent(2 * (depth + 1)) << "Node"
+          << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
+          << ";\n";
+    }
+
+    O.indent(2 * depth) << "}\n";
+  }
+
+  static void addCustomGraphFeatures(const RegionInfo* RI,
+                                     GraphWriter<RegionInfo*> &GW) {
+    raw_ostream &O = GW.getOStream();
+    O << "\tcolorscheme = \"paired12\"\n";
+    printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+  }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionViewer
+  : public DOTGraphTraitsViewer<RegionInfo, false> {
+  static char ID;
+  RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+    initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionViewer::ID = 0;
+
+struct RegionOnlyViewer
+  : public DOTGraphTraitsViewer<RegionInfo, true> {
+  static char ID;
+  RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+    initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionOnlyViewer::ID = 0;
+
+struct RegionPrinter
+  : public DOTGraphTraitsPrinter<RegionInfo, false> {
+  static char ID;
+  RegionPrinter() :
+    DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+      initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+char RegionPrinter::ID = 0;
+} //end anonymous namespace
+
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+                "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+                true, true)
+                
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+                "View regions of function (with no function bodies)",
+                true, true)
+
+namespace {
+
+struct RegionOnlyPrinter
+  : public DOTGraphTraitsPrinter<RegionInfo, true> {
+  static char ID;
+  RegionOnlyPrinter() :
+    DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+      initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+}
+
+char RegionOnlyPrinter::ID = 0;
+INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
+                "Print regions of function to 'dot' file "
+                "(with no function bodies)",
+                true, true)
+
+FunctionPass* llvm::createRegionViewerPass() {
+  return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+  return new RegionOnlyViewer();
+}
+
+FunctionPass* llvm::createRegionPrinterPass() {
+  return new RegionPrinter();
+}
+
+FunctionPass* llvm::createRegionOnlyPrinterPass() {
+  return new RegionOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
new file mode 100644
index 000000000000..025718e09feb
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -0,0 +1,6432 @@
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution analysis
+// engine, which is used primarily to analyze expressions involving induction
+// variables in loops.
+//
+// There are several aspects to this library.  First is the representation of
+// scalar expressions, which are represented as subclasses of the SCEV class.
+// These classes are used to represent certain types of subexpressions that we
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
+//
+// One important aspect of the SCEV objects is that they are never cyclic, even
+// if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
+// the PHI node is one of the idioms that we can represent (e.g., a polynomial
+// recurrence) then we represent it directly as a recurrence node, otherwise we
+// represent it as a SCEVUnknown node.
+//
+// In addition to being able to represent expressions of various types, we also
+// have folders that are used to build the *canonical* representation for a
+// particular expression.  These folders are capable of using a variety of
+// rewrite rules to simplify the expressions.
+//
+// Once the folders are defined, we can implement the more interesting
+// higher-level code, such as the code that recognizes PHI nodes of various
+// types, computes the execution count of a loop, etc.
+//
+// TODO: We should use these routines and value representations to implement
+// dependence analysis!
+//
+//===----------------------------------------------------------------------===//
+//
+// There are several good references for the techniques used in this analysis.
+//
+//  Chains of recurrences -- a method to expedite the evaluation
+//  of closed-form functions
+//  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
+//
+//  On computational properties of chains of recurrences
+//  Eugene V. Zima
+//
+//  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
+//  Robert A. van Engelen
+//
+//  Efficient Symbolic Analysis for Optimizing Compilers
+//  Robert A. van Engelen
+//
+//  Using the chains of recurrences algebra for data dependence testing and
+//  induction variable substitution
+//  MS Thesis, Johnie Birch
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalar-evolution"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumArrayLenItCounts,
+          "Number of trip counts computed with array length");
+STATISTIC(NumTripCountsComputed,
+          "Number of loops with predictable loop counts");
+STATISTIC(NumTripCountsNotComputed,
+          "Number of loops without predictable loop counts");
+STATISTIC(NumBruteForceTripCountsComputed,
+          "Number of loops with trip counts computed by force");
+
+static cl::opt<unsigned>
+MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+                        cl::desc("Maximum number of iterations SCEV will "
+                                 "symbolically execute a constant "
+                                 "derived loop"),
+                        cl::init(100));
+
+INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
+                "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
+                "Scalar Evolution Analysis", false, true)
+char ScalarEvolution::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                           SCEV class definitions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Implementation of the SCEV class.
+//
+
+void SCEV::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+void SCEV::print(raw_ostream &OS) const {
+  switch (getSCEVType()) {
+  case scConstant:
+    WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+    return;
+  case scTruncate: {
+    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+    const SCEV *Op = Trunc->getOperand();
+    OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+       << *Trunc->getType() << ")";
+    return;
+  }
+  case scZeroExtend: {
+    const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+    const SCEV *Op = ZExt->getOperand();
+    OS << "(zext " << *Op->getType() << " " << *Op << " to "
+       << *ZExt->getType() << ")";
+    return;
+  }
+  case scSignExtend: {
+    const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+    const SCEV *Op = SExt->getOperand();
+    OS << "(sext " << *Op->getType() << " " << *Op << " to "
+       << *SExt->getType() << ")";
+    return;
+  }
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+    OS << "{" << *AR->getOperand(0);
+    for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+      OS << ",+," << *AR->getOperand(i);
+    OS << "}<";
+    if (AR->getNoWrapFlags(FlagNUW))
+      OS << "nuw><";
+    if (AR->getNoWrapFlags(FlagNSW))
+      OS << "nsw><";
+    if (AR->getNoWrapFlags(FlagNW) &&
+        !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
+      OS << "nw><";
+    WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+    OS << ">";
+    return;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+    const char *OpStr = 0;
+    switch (NAry->getSCEVType()) {
+    case scAddExpr: OpStr = " + "; break;
+    case scMulExpr: OpStr = " * "; break;
+    case scUMaxExpr: OpStr = " umax "; break;
+    case scSMaxExpr: OpStr = " smax "; break;
+    }
+    OS << "(";
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      OS << **I;
+      if (llvm::next(I) != E)
+        OS << OpStr;
+    }
+    OS << ")";
+    return;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+    OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+    return;
+  }
+  case scUnknown: {
+    const SCEVUnknown *U = cast<SCEVUnknown>(this);
+    const Type *AllocTy;
+    if (U->isSizeOf(AllocTy)) {
+      OS << "sizeof(" << *AllocTy << ")";
+      return;
+    }
+    if (U->isAlignOf(AllocTy)) {
+      OS << "alignof(" << *AllocTy << ")";
+      return;
+    }
+
+    const Type *CTy;
+    Constant *FieldNo;
+    if (U->isOffsetOf(CTy, FieldNo)) {
+      OS << "offsetof(" << *CTy << ", ";
+      WriteAsOperand(OS, FieldNo, false);
+      OS << ")";
+      return;
+    }
+
+    // Otherwise just print it normally.
+    WriteAsOperand(OS, U->getValue(), false);
+    return;
+  }
+  case scCouldNotCompute:
+    OS << "***COULDNOTCOMPUTE***";
+    return;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
+
+const Type *SCEV::getType() const {
+  switch (getSCEVType()) {
+  case scConstant:
+    return cast<SCEVConstant>(this)->getType();
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return cast<SCEVCastExpr>(this)->getType();
+  case scAddRecExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr:
+    return cast<SCEVNAryExpr>(this)->getType();
+  case scAddExpr:
+    return cast<SCEVAddExpr>(this)->getType();
+  case scUDivExpr:
+    return cast<SCEVUDivExpr>(this)->getType();
+  case scUnknown:
+    return cast<SCEVUnknown>(this)->getType();
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return 0;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return 0;
+}
+
+bool SCEV::isZero() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isZero();
+  return false;
+}
+
+bool SCEV::isOne() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isOne();
+  return false;
+}
+
+bool SCEV::isAllOnesValue() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isAllOnesValue();
+  return false;
+}
+
+SCEVCouldNotCompute::SCEVCouldNotCompute() :
+  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+
+bool SCEVCouldNotCompute::classof(const SCEV *S) {
+  return S->getSCEVType() == scCouldNotCompute;
+}
+
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
+  FoldingSetNodeID ID;
+  ID.AddInteger(scConstant);
+  ID.AddPointer(V);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+  return getConstant(ConstantInt::get(getContext(), Val));
+}
+
+const SCEV *
+ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
+  const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+  return getConstant(ConstantInt::get(ITy, V, isSigned));
+}
+
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
+                           unsigned SCEVTy, const SCEV *op, const Type *ty)
+  : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
+                                   const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scTruncate, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate non-integer value!");
+}
+
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scZeroExtend, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot zero extend non-integer value!");
+}
+
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scSignExtend, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot sign extend non-integer value!");
+}
+
+void SCEVUnknown::deleted() {
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
+
+  // Remove this SCEVUnknown from the uniquing map.
+  SE->UniqueSCEVs.RemoveNode(this);
+
+  // Release the value.
+  setValPtr(0);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
+
+  // Remove this SCEVUnknown from the uniquing map.
+  SE->UniqueSCEVs.RemoveNode(this);
+
+  // Update this SCEVUnknown to point to the new value. This is needed
+  // because there may still be outstanding SCEVs which still point to
+  // this SCEVUnknown.
+  setValPtr(New);
+}
+
+bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getNumOperands() == 2)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
+            if (CI->isOne()) {
+              AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
+                                 ->getElementType();
+              return true;
+            }
+
+  return false;
+}
+
+bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue()) {
+          const Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          if (const StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked() &&
+                CE->getNumOperands() == 3 &&
+                CE->getOperand(1)->isNullValue()) {
+              if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
+                if (CI->isOne() &&
+                    STy->getNumElements() == 2 &&
+                    STy->getElementType(0)->isIntegerTy(1)) {
+                  AllocTy = STy->getElementType(1);
+                  return true;
+                }
+            }
+        }
+
+  return false;
+}
+
+bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getNumOperands() == 3 &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getOperand(1)->isNullValue()) {
+          const Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          // Ignore vector types here so that ScalarEvolutionExpander doesn't
+          // emit getelementptrs that index into vectors.
+          if (Ty->isStructTy() || Ty->isArrayTy()) {
+            CTy = Ty;
+            FieldNo = CE->getOperand(2);
+            return true;
+          }
+        }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                               SCEV Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+  /// than the complexity of the RHS.  This comparator is used to canonicalize
+  /// expressions.
+  class SCEVComplexityCompare {
+    const LoopInfo *const LI;
+  public:
+    explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+
+    // Return true or false if LHS is less than, or at least RHS, respectively.
+    bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+      return compare(LHS, RHS) < 0;
+    }
+
+    // Return negative, zero, or positive, if LHS is less than, equal to, or
+    // greater than RHS, respectively. A three-way result allows recursive
+    // comparisons to be more efficient.
+    int compare(const SCEV *LHS, const SCEV *RHS) const {
+      // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+      if (LHS == RHS)
+        return 0;
+
+      // Primarily, sort the SCEVs by their getSCEVType().
+      unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+      if (LType != RType)
+        return (int)LType - (int)RType;
+
+      // Aside from the getSCEVType() ordering, the particular ordering
+      // isn't very important except that it's beneficial to be consistent,
+      // so that (a + b) and (b + a) don't end up as different expressions.
+      switch (LType) {
+      case scUnknown: {
+        const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+        const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+        // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+        // not as complete as it could be.
+        const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+        // Order pointer values after integer values. This helps SCEVExpander
+        // form GEPs.
+        bool LIsPointer = LV->getType()->isPointerTy(),
+             RIsPointer = RV->getType()->isPointerTy();
+        if (LIsPointer != RIsPointer)
+          return (int)LIsPointer - (int)RIsPointer;
+
+        // Compare getValueID values.
+        unsigned LID = LV->getValueID(),
+                 RID = RV->getValueID();
+        if (LID != RID)
+          return (int)LID - (int)RID;
+
+        // Sort arguments by their position.
+        if (const Argument *LA = dyn_cast<Argument>(LV)) {
+          const Argument *RA = cast<Argument>(RV);
+          unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+          return (int)LArgNo - (int)RArgNo;
+        }
+
+        // For instructions, compare their loop depth, and their operand
+        // count.  This is pretty loose.
+        if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+          const Instruction *RInst = cast<Instruction>(RV);
+
+          // Compare loop depths.
+          const BasicBlock *LParent = LInst->getParent(),
+                           *RParent = RInst->getParent();
+          if (LParent != RParent) {
+            unsigned LDepth = LI->getLoopDepth(LParent),
+                     RDepth = LI->getLoopDepth(RParent);
+            if (LDepth != RDepth)
+              return (int)LDepth - (int)RDepth;
+          }
+
+          // Compare the number of operands.
+          unsigned LNumOps = LInst->getNumOperands(),
+                   RNumOps = RInst->getNumOperands();
+          return (int)LNumOps - (int)RNumOps;
+        }
+
+        return 0;
+      }
+
+      case scConstant: {
+        const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+        const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+        // Compare constant values.
+        const APInt &LA = LC->getValue()->getValue();
+        const APInt &RA = RC->getValue()->getValue();
+        unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+        if (LBitWidth != RBitWidth)
+          return (int)LBitWidth - (int)RBitWidth;
+        return LA.ult(RA) ? -1 : 1;
+      }
+
+      case scAddRecExpr: {
+        const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+        const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+        // Compare addrec loop depths.
+        const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+        if (LLoop != RLoop) {
+          unsigned LDepth = LLoop->getLoopDepth(),
+                   RDepth = RLoop->getLoopDepth();
+          if (LDepth != RDepth)
+            return (int)LDepth - (int)RDepth;
+        }
+
+        // Addrec complexity grows with operand count.
+        unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+        if (LNumOps != RNumOps)
+          return (int)LNumOps - (int)RNumOps;
+
+        // Lexicographically compare.
+        for (unsigned i = 0; i != LNumOps; ++i) {
+          long X = compare(LA->getOperand(i), RA->getOperand(i));
+          if (X != 0)
+            return X;
+        }
+
+        return 0;
+      }
+
+      case scAddExpr:
+      case scMulExpr:
+      case scSMaxExpr:
+      case scUMaxExpr: {
+        const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+        const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+        // Lexicographically compare n-ary expressions.
+        unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+        for (unsigned i = 0; i != LNumOps; ++i) {
+          if (i >= RNumOps)
+            return 1;
+          long X = compare(LC->getOperand(i), RC->getOperand(i));
+          if (X != 0)
+            return X;
+        }
+        return (int)LNumOps - (int)RNumOps;
+      }
+
+      case scUDivExpr: {
+        const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+        const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+
+        // Lexicographically compare udiv expressions.
+        long X = compare(LC->getLHS(), RC->getLHS());
+        if (X != 0)
+          return X;
+        return compare(LC->getRHS(), RC->getRHS());
+      }
+
+      case scTruncate:
+      case scZeroExtend:
+      case scSignExtend: {
+        const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+        const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+        // Compare cast expressions by operand.
+        return compare(LC->getOperand(), RC->getOperand());
+      }
+
+      default:
+        break;
+      }
+
+      llvm_unreachable("Unknown SCEV kind!");
+      return 0;
+    }
+  };
+}
+
+/// GroupByComplexity - Given a list of SCEV objects, order them by their
+/// complexity, and group objects of the same complexity together by value.
+/// When this routine is finished, we know that any duplicates in the vector are
+/// consecutive and that complexity is monotonically increasing.
+///
+/// Note that we go take special precautions to ensure that we get deterministic
+/// results from this routine.  In other words, we don't want the results of
+/// this to depend on where the addresses of various SCEV objects happened to
+/// land in memory.
+///
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
+                              LoopInfo *LI) {
+  if (Ops.size() < 2) return;  // Noop
+  if (Ops.size() == 2) {
+    // This is the common case, which also happens to be trivially simple.
+    // Special case it.
+    const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+    if (SCEVComplexityCompare(LI)(RHS, LHS))
+      std::swap(LHS, RHS);
+    return;
+  }
+
+  // Do the rough sort by complexity.
+  std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+
+  // Now that we are sorted by complexity, group elements of the same
+  // complexity.  Note that this is, at worst, N^2, but the vector is likely to
+  // be extremely short in practice.  Note that we take this approach because we
+  // do not want to depend on the addresses of the objects we are grouping.
+  for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
+    const SCEV *S = Ops[i];
+    unsigned Complexity = S->getSCEVType();
+
+    // If there are any objects of the same complexity and same value as this
+    // one, group them.
+    for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
+      if (Ops[j] == S) { // Found a duplicate.
+        // Move it to immediately after i'th element.
+        std::swap(Ops[i+1], Ops[j]);
+        ++i;   // no need to rescan it.
+        if (i == e-2) return;  // Done!
+      }
+    }
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                      Simple SCEV method implementations
+//===----------------------------------------------------------------------===//
+
+/// BinomialCoefficient - Compute BC(It, K).  The result has width W.
+/// Assume, K > 0.
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+                                       ScalarEvolution &SE,
+                                       const Type* ResultTy) {
+  // Handle the simplest case efficiently.
+  if (K == 1)
+    return SE.getTruncateOrZeroExtend(It, ResultTy);
+
+  // We are using the following formula for BC(It, K):
+  //
+  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
+  //
+  // Suppose, W is the bitwidth of the return value.  We must be prepared for
+  // overflow.  Hence, we must assure that the result of our computation is
+  // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
+  // safe in modular arithmetic.
+  //
+  // However, this code doesn't use exactly that formula; the formula it uses
+  // is something like the following, where T is the number of factors of 2 in
+  // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
+  // exponentiation:
+  //
+  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
+  //
+  // This formula is trivially equivalent to the previous formula.  However,
+  // this formula can be implemented much more efficiently.  The trick is that
+  // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
+  // arithmetic.  To do exact division in modular arithmetic, all we have
+  // to do is multiply by the inverse.  Therefore, this step can be done at
+  // width W.
+  //
+  // The next issue is how to safely do the division by 2^T.  The way this
+  // is done is by doing the multiplication step at a width of at least W + T
+  // bits.  This way, the bottom W+T bits of the product are accurate. Then,
+  // when we perform the division by 2^T (which is equivalent to a right shift
+  // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
+  // truncated out after the division by 2^T.
+  //
+  // In comparison to just directly using the first formula, this technique
+  // is much more efficient; using the first formula requires W * K bits,
+  // but this formula less than W + K bits. Also, the first formula requires
+  // a division step, whereas this formula only requires multiplies and shifts.
+  //
+  // It doesn't matter whether the subtraction step is done in the calculation
+  // width or the input iteration count's width; if the subtraction overflows,
+  // the result must be zero anyway.  We prefer here to do it in the width of
+  // the induction variable because it helps a lot for certain cases; CodeGen
+  // isn't smart enough to ignore the overflow, which leads to much less
+  // efficient code if the width of the subtraction is wider than the native
+  // register width.
+  //
+  // (It's possible to not widen at all by pulling out factors of 2 before
+  // the multiplication; for example, K=2 can be calculated as
+  // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
+  // extra arithmetic, so it's not an obvious win, and it gets
+  // much more complicated for K > 3.)
+
+  // Protection from insane SCEVs; this bound is conservative,
+  // but it probably doesn't matter.
+  if (K > 1000)
+    return SE.getCouldNotCompute();
+
+  unsigned W = SE.getTypeSizeInBits(ResultTy);
+
+  // Calculate K! / 2^T and T; we divide out the factors of two before
+  // multiplying for calculating K! / 2^T to avoid overflow.
+  // Other overflow doesn't matter because we only care about the bottom
+  // W bits of the result.
+  APInt OddFactorial(W, 1);
+  unsigned T = 1;
+  for (unsigned i = 3; i <= K; ++i) {
+    APInt Mult(W, i);
+    unsigned TwoFactors = Mult.countTrailingZeros();
+    T += TwoFactors;
+    Mult = Mult.lshr(TwoFactors);
+    OddFactorial *= Mult;
+  }
+
+  // We need at least W + T bits for the multiplication step
+  unsigned CalculationBits = W + T;
+
+  // Calculate 2^T, at width T+W.
+  APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+
+  // Calculate the multiplicative inverse of K! / 2^T;
+  // this multiplication factor will perform the exact division by
+  // K! / 2^T.
+  APInt Mod = APInt::getSignedMinValue(W+1);
+  APInt MultiplyFactor = OddFactorial.zext(W+1);
+  MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
+  MultiplyFactor = MultiplyFactor.trunc(W);
+
+  // Calculate the product, at width T+W
+  const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+                                                      CalculationBits);
+  const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+  for (unsigned i = 1; i != K; ++i) {
+    const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
+    Dividend = SE.getMulExpr(Dividend,
+                             SE.getTruncateOrZeroExtend(S, CalculationTy));
+  }
+
+  // Divide by 2^T
+  const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+
+  // Truncate the result, and divide by K! / 2^T.
+
+  return SE.getMulExpr(SE.getConstant(MultiplyFactor),
+                       SE.getTruncateOrZeroExtend(DivResult, ResultTy));
+}
+
+/// evaluateAtIteration - Return the value of this chain of recurrences at
+/// the specified iteration number.  We can evaluate this recurrence by
+/// multiplying each element in the chain by the binomial coefficient
+/// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
+///
+///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
+///
+/// where BC(It, k) stands for binomial coefficient.
+///
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+                                                ScalarEvolution &SE) const {
+  const SCEV *Result = getStart();
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    // The computation is correct in the face of overflow provided that the
+    // multiplication is performed _after_ the evaluation of the binomial
+    // coefficient.
+    const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
+    if (isa<SCEVCouldNotCompute>(Coeff))
+      return Coeff;
+
+    Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
+  }
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+//                    SCEV Expression folder implementations
+//===----------------------------------------------------------------------===//
+
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+                                             const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
+         "This is not a truncating conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scTruncate);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(),
+                                               getEffectiveSCEVType(Ty))));
+
+  // trunc(trunc(x)) --> trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
+    return getTruncateExpr(ST->getOperand(), Ty);
+
+  // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
+  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+    return getTruncateOrSignExtend(SS->getOperand(), Ty);
+
+  // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+
+  // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+  // eliminate all the truncates.
+  if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+      hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getAddExpr(Operands);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
+  // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+  // eliminate all the truncates.
+  if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+      hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getMulExpr(Operands);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
+  // If the input value is a chrec scev, truncate the chrec's operands.
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+      Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+    return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
+  }
+
+  // As a special case, fold trunc(undef) to undef. We don't want to
+  // know too much about SCEVUnknowns, but this special case is handy
+  // and harmless.
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+    if (isa<UndefValue>(U->getValue()))
+      return getSCEV(UndefValue::get(Ty));
+
+  // The cast wasn't folded; create an explicit cast node. We can reuse
+  // the existing insert position since if we get here, we won't have
+  // made any changes which would invalidate it.
+  SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+                                                 Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(),
+                                              getEffectiveSCEVType(Ty))));
+
+  // zext(zext(x)) --> zext(x)
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scZeroExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // zext(trunc(x)) --> zext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all zero bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getUnsignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+            CR.zextOrTrunc(NewBits)))
+      return getTruncateOrZeroExtend(X, Ty);
+  }
+
+  // If the input value is a chrec scev, and we can prove that the value
+  // did not overflow the old, smaller, value, we can zero extend all of the
+  // operands (often constants).  This allows analysis of something like
+  // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+    if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->getNoWrapFlags(SCEV::FlagNUW))
+        return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                             getZeroExtendExpr(Step, Ty),
+                             L, AR->getNoWrapFlags());
+
+      // Check whether the backedge-taken count is SCEVCouldNotCompute.
+      // Note that this serves two purposes: It filters out loops that are
+      // simply not analyzable, and it covers the case where this code is
+      // being called from within backedge-taken count analysis, such that
+      // attempting to ask for the backedge-taken count would likely result
+      // in infinite recursion. In the later case, the analysis code will
+      // cope with a conservative value, and it will take care to purge
+      // that value once it has finished.
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+        // Manually compute the final value for AR, checking for
+        // overflow.
+
+        // Check whether the backedge-taken count can be losslessly casted to
+        // the addrec's type. The count is always unsigned.
+        const SCEV *CastedMaxBECount =
+          getTruncateOrZeroExtend(MaxBECount, Start->getType());
+        const SCEV *RecastedMaxBECount =
+          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+        if (MaxBECount == RecastedMaxBECount) {
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          // Check whether Start+Step*MaxBECount has no unsigned overflow.
+          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
+          const SCEV *Add = getAddExpr(Start, ZMul);
+          const SCEV *OperandExtendedAdd =
+            getAddExpr(getZeroExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NUW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L, AR->getNoWrapFlags());
+          }
+          // Similar to above, only this time treat the step value as signed.
+          // This covers loops that count down.
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+          Add = getAddExpr(Start, SMul);
+          OperandExtendedAdd =
+            getAddExpr(getZeroExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getSignExtendExpr(Step, WideTy)));
+          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NW, which is propagated to this AddRec.
+            // Negative step causes unsigned wrap, but it still can't self-wrap.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L, AR->getNoWrapFlags());
+          }
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+                                      getUnsignedRange(Step).getUnsignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NUW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L, AR->getNoWrapFlags());
+          }
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NW, which is propagated to this AddRec.
+            // Negative step causes unsigned wrap, but it still can't self-wrap.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L, AR->getNoWrapFlags());
+          }
+        }
+      }
+    }
+
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+                                           ICmpInst::Predicate *Pred,
+                                           ScalarEvolution *SE) {
+  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+  if (SE->isKnownPositive(Step)) {
+    *Pred = ICmpInst::ICMP_SLT;
+    return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+                           SE->getSignedRange(Step).getSignedMax());
+  }
+  if (SE->isKnownNegative(Step)) {
+    *Pred = ICmpInst::ICMP_SGT;
+    return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+                       SE->getSignedRange(Step).getSignedMin());
+  }
+  return 0;
+}
+
+// The recurrence AR has been shown to have no signed wrap. Typically, if we can
+// prove NSW for AR, then we can just as easily prove NSW for its preincrement
+// or postincrement sibling. This allows normalizing a sign extended AddRec as
+// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
+// result, the expression "Step + sext(PreIncAR)" is congruent with
+// "sext(PostIncAR)"
+static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
+                                            const Type *Ty,
+                                            ScalarEvolution *SE) {
+  const Loop *L = AR->getLoop();
+  const SCEV *Start = AR->getStart();
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // Check for a simple looking step prior to loop entry.
+  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+  if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
+    return 0;
+
+  // This is a postinc AR. Check for overflow on the preinc recurrence using the
+  // same three conditions that getSignExtendedExpr checks.
+
+  // 1. NSW flags on the step increment.
+  const SCEV *PreStart = SA->getOperand(1);
+  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+    SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+  if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
+    return PreStart;
+
+  // 2. Direct overflow check on the step operation's expression.
+  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+  const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+  const SCEV *OperandExtendedStart =
+    SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
+                   SE->getSignExtendExpr(Step, WideTy));
+  if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
+    // Cache knowledge of PreAR NSW.
+    if (PreAR)
+      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
+    // FIXME: this optimization needs a unit test
+    DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
+    return PreStart;
+  }
+
+  // 3. Loop precondition.
+  ICmpInst::Predicate Pred;
+  const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
+
+  if (OverflowLimit &&
+      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+    return PreStart;
+  }
+  return 0;
+}
+
+// Get the normalized sign-extended expression for this AddRec's Start.
+static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
+                                            const Type *Ty,
+                                            ScalarEvolution *SE) {
+  const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
+  if (!PreStart)
+    return SE->getSignExtendExpr(AR->getStart(), Ty);
+
+  return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
+                        SE->getSignExtendExpr(PreStart, Ty));
+}
+
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(),
+                                              getEffectiveSCEVType(Ty))));
+
+  // sext(sext(x)) --> sext(x)
+  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+    return getSignExtendExpr(SS->getOperand(), Ty);
+
+  // sext(zext(x)) --> zext(x)
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSignExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // If the input value is provably positive, build a zext instead.
+  if (isKnownNonNegative(Op))
+    return getZeroExtendExpr(Op, Ty);
+
+  // sext(trunc(x)) --> sext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all sign bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getSignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+            CR.sextOrTrunc(NewBits)))
+      return getTruncateOrSignExtend(X, Ty);
+  }
+
+  // If the input value is a chrec scev, and we can prove that the value
+  // did not overflow the old, smaller, value, we can sign extend all of the
+  // operands (often constants).  This allows analysis of something like
+  // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+    if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->getNoWrapFlags(SCEV::FlagNSW))
+        return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+                             getSignExtendExpr(Step, Ty),
+                             L, SCEV::FlagNSW);
+
+      // Check whether the backedge-taken count is SCEVCouldNotCompute.
+      // Note that this serves two purposes: It filters out loops that are
+      // simply not analyzable, and it covers the case where this code is
+      // being called from within backedge-taken count analysis, such that
+      // attempting to ask for the backedge-taken count would likely result
+      // in infinite recursion. In the later case, the analysis code will
+      // cope with a conservative value, and it will take care to purge
+      // that value once it has finished.
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+        // Manually compute the final value for AR, checking for
+        // overflow.
+
+        // Check whether the backedge-taken count can be losslessly casted to
+        // the addrec's type. The count is always unsigned.
+        const SCEV *CastedMaxBECount =
+          getTruncateOrZeroExtend(MaxBECount, Start->getType());
+        const SCEV *RecastedMaxBECount =
+          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+        if (MaxBECount == RecastedMaxBECount) {
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          // Check whether Start+Step*MaxBECount has no signed overflow.
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+          const SCEV *Add = getAddExpr(Start, SMul);
+          const SCEV *OperandExtendedAdd =
+            getAddExpr(getSignExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getSignExtendExpr(Step, WideTy)));
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NSW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+                                 getSignExtendExpr(Step, Ty),
+                                 L, AR->getNoWrapFlags());
+          }
+          // Similar to above, only this time treat the step value as unsigned.
+          // This covers loops that count up with an unsigned step.
+          const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
+          Add = getAddExpr(Start, UMul);
+          OperandExtendedAdd =
+            getAddExpr(getSignExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NSW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L, AR->getNoWrapFlags());
+          }
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        ICmpInst::Predicate Pred;
+        const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+        if (OverflowLimit &&
+            (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+             (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+              isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+                                          OverflowLimit)))) {
+          // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+          const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+          return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+                               getSignExtendExpr(Step, Ty),
+                               L, AR->getNoWrapFlags());
+        }
+      }
+    }
+
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+                                              const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Sign-extend negative constants.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    if (SC->getValue()->getValue().isNegative())
+      return getSignExtendExpr(Op, Ty);
+
+  // Peel off a truncate cast.
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+    const SCEV *NewOp = T->getOperand();
+    if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+      return getAnyExtendExpr(NewOp, Ty);
+    return getTruncateOrNoop(NewOp, Ty);
+  }
+
+  // Next try a zext cast. If the cast is folded, use it.
+  const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
+  if (!isa<SCEVZeroExtendExpr>(ZExt))
+    return ZExt;
+
+  // Next try a sext cast. If the cast is folded, use it.
+  const SCEV *SExt = getSignExtendExpr(Op, Ty);
+  if (!isa<SCEVSignExtendExpr>(SExt))
+    return SExt;
+
+  // Force the cast to be folded into the operands of an addrec.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Ops;
+    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I)
+      Ops.push_back(getAnyExtendExpr(*I, Ty));
+    return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
+  }
+
+  // As a special case, fold anyext(undef) to undef. We don't want to
+  // know too much about SCEVUnknowns, but this special case is handy
+  // and harmless.
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+    if (isa<UndefValue>(U->getValue()))
+      return getSCEV(UndefValue::get(Ty));
+
+  // If the expression is obviously signed, use the sext cast value.
+  if (isa<SCEVSMaxExpr>(Op))
+    return SExt;
+
+  // Absent any other information, use the zext cast value.
+  return ZExt;
+}
+
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+                             SmallVector<const SCEV *, 8> &NewOps,
+                             APInt &AccumulatedConstant,
+                             const SCEV *const *Ops, size_t NumOperands,
+                             const APInt &Scale,
+                             ScalarEvolution &SE) {
+  bool Interesting = false;
+
+  // Iterate over the add operands. They are sorted, with constants first.
+  unsigned i = 0;
+  while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+    ++i;
+    // Pull a buried constant out to the outside.
+    if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
+      Interesting = true;
+    AccumulatedConstant += Scale * C->getValue()->getValue();
+  }
+
+  // Next comes everything else. We're especially interested in multiplies
+  // here, but they're in the middle, so just visit the rest with one loop.
+  for (; i != NumOperands; ++i) {
+    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+    if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+      APInt NewScale =
+        Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+      if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+        // A multiplication of a constant with another add; recurse.
+        const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
+        Interesting |=
+          CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                       Add->op_begin(), Add->getNumOperands(),
+                                       NewScale, SE);
+      } else {
+        // A multiplication of a constant with some other value. Update
+        // the map.
+        SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+        const SCEV *Key = SE.getMulExpr(MulOps);
+        std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+          M.insert(std::make_pair(Key, NewScale));
+        if (Pair.second) {
+          NewOps.push_back(Pair.first->first);
+        } else {
+          Pair.first->second += NewScale;
+          // The map already had an entry for this value, which may indicate
+          // a folding opportunity.
+          Interesting = true;
+        }
+      }
+    } else {
+      // An ordinary operand. Update the map.
+      std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+        M.insert(std::make_pair(Ops[i], Scale));
+      if (Pair.second) {
+        NewOps.push_back(Pair.first->first);
+      } else {
+        Pair.first->second += Scale;
+        // The map already had an entry for this value, which may indicate
+        // a folding opportunity.
+        Interesting = true;
+      }
+    }
+  }
+
+  return Interesting;
+}
+
+namespace {
+  struct APIntCompare {
+    bool operator()(const APInt &LHS, const APInt &RHS) const {
+      return LHS.ult(RHS);
+    }
+  };
+}
+
+/// getAddExpr - Get a canonical add expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        SCEV::NoWrapFlags Flags) {
+  assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
+         "only nuw or nsw allowed");
+  assert(!Ops.empty() && "Cannot get empty add!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVAddExpr operand types don't match!");
+#endif
+
+  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+  // And vice-versa.
+  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+  SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+  if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+    bool All = true;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+         E = Ops.end(); I != E; ++I)
+      if (!isKnownNonNegative(*I)) {
+        All = false;
+        break;
+      }
+    if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+  }
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      Ops[0] = getConstant(LHSC->getValue()->getValue() +
+                           RHSC->getValue()->getValue());
+      if (Ops.size() == 2) return Ops[0];
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant zero being added, strip it off.
+    if (LHSC->getValue()->isZero()) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Okay, check to see if the same value occurs in the operand list more than
+  // once.  If so, merge them together into an multiply expression.  Since we
+  // sorted the list, these values are required to be adjacent.
+  const Type *Ty = Ops[0]->getType();
+  bool FoundMatch = false;
+  for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
+    if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
+      // Scan ahead to count how many equal operands there are.
+      unsigned Count = 2;
+      while (i+Count != e && Ops[i+Count] == Ops[i])
+        ++Count;
+      // Merge the values into a multiply.
+      const SCEV *Scale = getConstant(Ty, Count);
+      const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+      if (Ops.size() == Count)
+        return Mul;
+      Ops[i] = Mul;
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+      --i; e -= Count - 1;
+      FoundMatch = true;
+    }
+  if (FoundMatch)
+    return getAddExpr(Ops, Flags);
+
+  // Check for truncates. If all the operands are truncated from the same
+  // type, see if factoring out the truncate would permit the result to be
+  // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+  // if the contents of the resulting outer trunc fold to something simple.
+  for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+    const Type *DstType = Trunc->getType();
+    const Type *SrcType = Trunc->getOperand()->getType();
+    SmallVector<const SCEV *, 8> LargeOps;
+    bool Ok = true;
+    // Check all the operands to see if they can be represented in the
+    // source type of the truncate.
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+        if (T->getOperand()->getType() != SrcType) {
+          Ok = false;
+          break;
+        }
+        LargeOps.push_back(T->getOperand());
+      } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+        LargeOps.push_back(getAnyExtendExpr(C, SrcType));
+      } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+        SmallVector<const SCEV *, 8> LargeMulOps;
+        for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+          if (const SCEVTruncateExpr *T =
+                dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+            if (T->getOperand()->getType() != SrcType) {
+              Ok = false;
+              break;
+            }
+            LargeMulOps.push_back(T->getOperand());
+          } else if (const SCEVConstant *C =
+                       dyn_cast<SCEVConstant>(M->getOperand(j))) {
+            LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
+          } else {
+            Ok = false;
+            break;
+          }
+        }
+        if (Ok)
+          LargeOps.push_back(getMulExpr(LargeMulOps));
+      } else {
+        Ok = false;
+        break;
+      }
+    }
+    if (Ok) {
+      // Evaluate the expression in the larger type.
+      const SCEV *Fold = getAddExpr(LargeOps, Flags);
+      // If it folds to something simple, use it. Otherwise, don't.
+      if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+        return getTruncateExpr(Fold, DstType);
+    }
+  }
+
+  // Skip past any other cast SCEVs.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+    ++Idx;
+
+  // If there are add operands they would be next.
+  if (Idx < Ops.size()) {
+    bool DeletedAdd = false;
+    while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+      // If we have an add, expand the add operands onto the end of the operands
+      // list.
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(Add->op_begin(), Add->op_end());
+      DeletedAdd = true;
+    }
+
+    // If we deleted at least one add, we added operands to the end of the list,
+    // and they are not necessarily sorted.  Recurse to resort and resimplify
+    // any operands we just acquired.
+    if (DeletedAdd)
+      return getAddExpr(Ops);
+  }
+
+  // Skip over the add expression until we get to a multiply.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+    ++Idx;
+
+  // Check to see if there are any folding opportunities present with
+  // operands multiplied by constant values.
+  if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+    uint64_t BitWidth = getTypeSizeInBits(Ty);
+    DenseMap<const SCEV *, APInt> M;
+    SmallVector<const SCEV *, 8> NewOps;
+    APInt AccumulatedConstant(BitWidth, 0);
+    if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                     Ops.data(), Ops.size(),
+                                     APInt(BitWidth, 1), *this)) {
+      // Some interesting folding opportunity is present, so its worthwhile to
+      // re-generate the operands list. Group the operands by constant scale,
+      // to avoid multiplying by the same constant scale multiple times.
+      std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+      for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+           E = NewOps.end(); I != E; ++I)
+        MulOpLists[M.find(*I)->second].push_back(*I);
+      // Re-generate the operands list.
+      Ops.clear();
+      if (AccumulatedConstant != 0)
+        Ops.push_back(getConstant(AccumulatedConstant));
+      for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
+           I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+        if (I->first != 0)
+          Ops.push_back(getMulExpr(getConstant(I->first),
+                                   getAddExpr(I->second)));
+      if (Ops.empty())
+        return getConstant(Ty, 0);
+      if (Ops.size() == 1)
+        return Ops[0];
+      return getAddExpr(Ops);
+    }
+  }
+
+  // If we are adding something to a multiply expression, make sure the
+  // something is not already an operand of the multiply.  If so, merge it into
+  // the multiply.
+  for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+    const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+    for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+      const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+      if (isa<SCEVConstant>(MulOpSCEV))
+        continue;
+      for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+        if (MulOpSCEV == Ops[AddOp]) {
+          // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
+          const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
+          if (Mul->getNumOperands() != 2) {
+            // If the multiply has more than two operands, we must get the
+            // Y*Z term.
+            SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                Mul->op_begin()+MulOp);
+            MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+            InnerMul = getMulExpr(MulOps);
+          }
+          const SCEV *One = getConstant(Ty, 1);
+          const SCEV *AddOne = getAddExpr(One, InnerMul);
+          const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+          if (Ops.size() == 2) return OuterMul;
+          if (AddOp < Idx) {
+            Ops.erase(Ops.begin()+AddOp);
+            Ops.erase(Ops.begin()+Idx-1);
+          } else {
+            Ops.erase(Ops.begin()+Idx);
+            Ops.erase(Ops.begin()+AddOp-1);
+          }
+          Ops.push_back(OuterMul);
+          return getAddExpr(Ops);
+        }
+
+      // Check this multiply against other multiplies being added together.
+      for (unsigned OtherMulIdx = Idx+1;
+           OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+           ++OtherMulIdx) {
+        const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+        // If MulOp occurs in OtherMul, we can fold the two multiplies
+        // together.
+        for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+             OMulOp != e; ++OMulOp)
+          if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+            // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+            const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+            if (Mul->getNumOperands() != 2) {
+              SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                  Mul->op_begin()+MulOp);
+              MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+              InnerMul1 = getMulExpr(MulOps);
+            }
+            const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+            if (OtherMul->getNumOperands() != 2) {
+              SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+                                                  OtherMul->op_begin()+OMulOp);
+              MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
+              InnerMul2 = getMulExpr(MulOps);
+            }
+            const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+            if (Ops.size() == 2) return OuterMul;
+            Ops.erase(Ops.begin()+Idx);
+            Ops.erase(Ops.begin()+OtherMulIdx-1);
+            Ops.push_back(OuterMul);
+            return getAddExpr(Ops);
+          }
+      }
+    }
+  }
+
+  // If there are any add recurrences in the operands list, see if any other
+  // added values are loop invariant.  If so, we can fold them into the
+  // recurrence.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+    ++Idx;
+
+  // Scan over all recurrences, trying to fold loop invariants into them.
+  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+    // Scan all of the other operands to this add and add them to the vector if
+    // they are loop invariant w.r.t. the recurrence.
+    SmallVector<const SCEV *, 8> LIOps;
+    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+        LIOps.push_back(Ops[i]);
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+      }
+
+    // If we found some loop invariants, fold them into the recurrence.
+    if (!LIOps.empty()) {
+      //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
+      LIOps.push_back(AddRec->getStart());
+
+      SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+                                             AddRec->op_end());
+      AddRecOps[0] = getAddExpr(LIOps);
+
+      // Build the new addrec. Propagate the NUW and NSW flags if both the
+      // outer add and the inner addrec are guaranteed to have no overflow.
+      // Always propagate NW.
+      Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
+
+      // If all of the other operands were loop invariant, we are done.
+      if (Ops.size() == 1) return NewRec;
+
+      // Otherwise, add the folded AddRec by the non-liv parts.
+      for (unsigned i = 0;; ++i)
+        if (Ops[i] == AddRec) {
+          Ops[i] = NewRec;
+          break;
+        }
+      return getAddExpr(Ops);
+    }
+
+    // Okay, if there weren't any loop invariants to be folded, check to see if
+    // there are multiple AddRec's with the same loop induction variable being
+    // added together.  If so, we can fold them.
+    for (unsigned OtherIdx = Idx+1;
+         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+         ++OtherIdx)
+      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+        // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
+        SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+                                               AddRec->op_end());
+        for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+             ++OtherIdx)
+          if (const SCEVAddRecExpr *OtherAddRec =
+                dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+            if (OtherAddRec->getLoop() == AddRecLoop) {
+              for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+                   i != e; ++i) {
+                if (i >= AddRecOps.size()) {
+                  AddRecOps.append(OtherAddRec->op_begin()+i,
+                                   OtherAddRec->op_end());
+                  break;
+                }
+                AddRecOps[i] = getAddExpr(AddRecOps[i],
+                                          OtherAddRec->getOperand(i));
+              }
+              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+            }
+        // Step size has changed, so we cannot guarantee no self-wraparound.
+        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+        return getAddExpr(Ops);
+      }
+
+    // Otherwise couldn't fold anything into this recurrence.  Move onto the
+    // next one.
+  }
+
+  // Okay, it looks like we really DO need an add expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  SCEVAddExpr *S =
+    static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+/// getMulExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        SCEV::NoWrapFlags Flags) {
+  assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
+         "only nuw or nsw allowed");
+  assert(!Ops.empty() && "Cannot get empty mul!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVMulExpr operand types don't match!");
+#endif
+
+  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+  // And vice-versa.
+  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+  SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+  if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+    bool All = true;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+         E = Ops.end(); I != E; ++I)
+      if (!isKnownNonNegative(*I)) {
+        All = false;
+        break;
+      }
+    if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+  }
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+
+    // C1*(C2+V) -> C1*C2 + C1*V
+    if (Ops.size() == 2)
+      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+        if (Add->getNumOperands() == 2 &&
+            isa<SCEVConstant>(Add->getOperand(0)))
+          return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+                            getMulExpr(LHSC, Add->getOperand(1)));
+
+    ++Idx;
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                                           LHSC->getValue()->getValue() *
+                                           RHSC->getValue()->getValue());
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant one being multiplied, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+      // If we have a multiply of zero, it will always be zero.
+      return Ops[0];
+    } else if (Ops[0]->isAllOnesValue()) {
+      // If we have a mul by -1 of an add, try distributing the -1 among the
+      // add operands.
+      if (Ops.size() == 2) {
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+          SmallVector<const SCEV *, 4> NewOps;
+          bool AnyFolded = false;
+          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
+                 E = Add->op_end(); I != E; ++I) {
+            const SCEV *Mul = getMulExpr(Ops[0], *I);
+            if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+            NewOps.push_back(Mul);
+          }
+          if (AnyFolded)
+            return getAddExpr(NewOps);
+        }
+        else if (const SCEVAddRecExpr *
+                 AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+          // Negation preserves a recurrence's no self-wrap property.
+          SmallVector<const SCEV *, 4> Operands;
+          for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
+                 E = AddRec->op_end(); I != E; ++I) {
+            Operands.push_back(getMulExpr(Ops[0], *I));
+          }
+          return getAddRecExpr(Operands, AddRec->getLoop(),
+                               AddRec->getNoWrapFlags(SCEV::FlagNW));
+        }
+      }
+    }
+
+    if (Ops.size() == 1)
+      return Ops[0];
+  }
+
+  // Skip over the add expression until we get to a multiply.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+    ++Idx;
+
+  // If there are mul operands inline them all into this expression.
+  if (Idx < Ops.size()) {
+    bool DeletedMul = false;
+    while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+      // If we have an mul, expand the mul operands onto the end of the operands
+      // list.
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(Mul->op_begin(), Mul->op_end());
+      DeletedMul = true;
+    }
+
+    // If we deleted at least one mul, we added operands to the end of the list,
+    // and they are not necessarily sorted.  Recurse to resort and resimplify
+    // any operands we just acquired.
+    if (DeletedMul)
+      return getMulExpr(Ops);
+  }
+
+  // If there are any add recurrences in the operands list, see if any other
+  // added values are loop invariant.  If so, we can fold them into the
+  // recurrence.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+    ++Idx;
+
+  // Scan over all recurrences, trying to fold loop invariants into them.
+  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+    // Scan all of the other operands to this mul and add them to the vector if
+    // they are loop invariant w.r.t. the recurrence.
+    SmallVector<const SCEV *, 8> LIOps;
+    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+        LIOps.push_back(Ops[i]);
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+      }
+
+    // If we found some loop invariants, fold them into the recurrence.
+    if (!LIOps.empty()) {
+      //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
+      SmallVector<const SCEV *, 4> NewOps;
+      NewOps.reserve(AddRec->getNumOperands());
+      const SCEV *Scale = getMulExpr(LIOps);
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+        NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+
+      // Build the new addrec. Propagate the NUW and NSW flags if both the
+      // outer mul and the inner addrec are guaranteed to have no overflow.
+      //
+      // No self-wrap cannot be guaranteed after changing the step size, but
+      // will be inferred if either NUW or NSW is true.
+      Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
+
+      // If all of the other operands were loop invariant, we are done.
+      if (Ops.size() == 1) return NewRec;
+
+      // Otherwise, multiply the folded AddRec by the non-liv parts.
+      for (unsigned i = 0;; ++i)
+        if (Ops[i] == AddRec) {
+          Ops[i] = NewRec;
+          break;
+        }
+      return getMulExpr(Ops);
+    }
+
+    // Okay, if there weren't any loop invariants to be folded, check to see if
+    // there are multiple AddRec's with the same loop induction variable being
+    // multiplied together.  If so, we can fold them.
+    for (unsigned OtherIdx = Idx+1;
+         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+         ++OtherIdx)
+      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+        // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L>  -->
+        // {A*C,+,F*D + G*B + B*D}<L>
+        for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+             ++OtherIdx)
+          if (const SCEVAddRecExpr *OtherAddRec =
+                dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+            if (OtherAddRec->getLoop() == AddRecLoop) {
+              const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
+              const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
+              const SCEV *B = F->getStepRecurrence(*this);
+              const SCEV *D = G->getStepRecurrence(*this);
+              const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
+                                               getMulExpr(G, B),
+                                               getMulExpr(B, D));
+              const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
+                                                    F->getLoop(),
+                                                    SCEV::FlagAnyWrap);
+              if (Ops.size() == 2) return NewAddRec;
+              Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+            }
+        return getMulExpr(Ops);
+      }
+
+    // Otherwise couldn't fold anything into this recurrence.  Move onto the
+    // next one.
+  }
+
+  // Okay, it looks like we really DO need an mul expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scMulExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  SCEVMulExpr *S =
+    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  assert(getEffectiveSCEVType(LHS->getType()) ==
+         getEffectiveSCEVType(RHS->getType()) &&
+         "SCEVUDivExpr operand types don't match!");
+
+  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+    if (RHSC->getValue()->equalsInt(1))
+      return LHS;                               // X udiv 1 --> x
+    // If the denominator is zero, the result of the udiv is undefined. Don't
+    // try to analyze it, because the resolution chosen here may differ from
+    // the resolution chosen in other parts of the compiler.
+    if (!RHSC->getValue()->isZero()) {
+      // Determine if the division can be folded into the operands of
+      // its operands.
+      // TODO: Generalize this to non-constants by using known-bits information.
+      const Type *Ty = LHS->getType();
+      unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+      unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
+      // For non-power-of-two values, effectively round the value up to the
+      // nearest power of two.
+      if (!RHSC->getValue()->getValue().isPowerOf2())
+        ++MaxShiftAmt;
+      const IntegerType *ExtTy =
+        IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+      // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+        if (const SCEVConstant *Step =
+              dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
+          if (!Step->getValue()->getValue()
+                .urem(RHSC->getValue()->getValue()) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop(), SCEV::FlagAnyWrap)) {
+            SmallVector<const SCEV *, 4> Operands;
+            for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+              Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+            return getAddRecExpr(Operands, AR->getLoop(),
+                                 SCEV::FlagNW);
+          }
+      // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+      if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+          // Find an operand that's safely divisible.
+          for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = M->getOperand(i);
+            const SCEV *Div = getUDivExpr(Op, RHSC);
+            if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+              Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+                                                      M->op_end());
+              Operands[i] = Div;
+              return getMulExpr(Operands);
+            }
+          }
+      }
+      // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+      if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+          Operands.clear();
+          for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+            if (isa<SCEVUDivExpr>(Op) ||
+                getMulExpr(Op, RHS) != A->getOperand(i))
+              break;
+            Operands.push_back(Op);
+          }
+          if (Operands.size() == A->getNumOperands())
+            return getAddExpr(Operands);
+        }
+      }
+
+      // Fold if both operands are constant.
+      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+        Constant *LHSCV = LHSC->getValue();
+        Constant *RHSCV = RHSC->getValue();
+        return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+                                                                   RHSCV)));
+      }
+    }
+  }
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUDivExpr);
+  ID.AddPointer(LHS);
+  ID.AddPointer(RHS);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+                                             LHS, RHS);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
+                                           const Loop *L,
+                                           SCEV::NoWrapFlags Flags) {
+  SmallVector<const SCEV *, 4> Operands;
+  Operands.push_back(Start);
+  if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
+    if (StepChrec->getLoop() == L) {
+      Operands.append(StepChrec->op_begin(), StepChrec->op_end());
+      return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
+    }
+
+  Operands.push_back(Step);
+  return getAddRecExpr(Operands, L, Flags);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                               const Loop *L, SCEV::NoWrapFlags Flags) {
+  if (Operands.size() == 1) return Operands[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
+           "SCEVAddRecExpr operand types don't match!");
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    assert(isLoopInvariant(Operands[i], L) &&
+           "SCEVAddRecExpr operand is not loop-invariant!");
+#endif
+
+  if (Operands.back()->isZero()) {
+    Operands.pop_back();
+    return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
+  }
+
+  // It's tempting to want to call getMaxBackedgeTakenCount count here and
+  // use that information to infer NUW and NSW flags. However, computing a
+  // BE count requires calling getAddRecExpr, so we may not yet have a
+  // meaningful BE count at this point (and if we don't, we'd be stuck
+  // with a SCEVCouldNotCompute as the cached BE count).
+
+  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+  // And vice-versa.
+  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+  SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+  if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
+    bool All = true;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
+         E = Operands.end(); I != E; ++I)
+      if (!isKnownNonNegative(*I)) {
+        All = false;
+        break;
+      }
+    if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+  }
+
+  // Canonicalize nested AddRecs in by nesting them in order of loop depth.
+  if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
+    const Loop *NestedLoop = NestedAR->getLoop();
+    if (L->contains(NestedLoop) ?
+        (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
+        (!NestedLoop->contains(L) &&
+         DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+      SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
+                                                  NestedAR->op_end());
+      Operands[0] = NestedAR->getStart();
+      // AddRecs require their operands be loop-invariant with respect to their
+      // loops. Don't perform this transformation if it would break this
+      // requirement.
+      bool AllInvariant = true;
+      for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+        if (!isLoopInvariant(Operands[i], L)) {
+          AllInvariant = false;
+          break;
+        }
+      if (AllInvariant) {
+        // Create a recurrence for the outer loop with the same step size.
+        //
+        // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
+        // inner recurrence has the same property.
+        SCEV::NoWrapFlags OuterFlags =
+          maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
+
+        NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
+        AllInvariant = true;
+        for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
+          if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
+            AllInvariant = false;
+            break;
+          }
+        if (AllInvariant) {
+          // Ok, both add recurrences are valid after the transformation.
+          //
+          // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
+          // the outer recurrence has the same property.
+          SCEV::NoWrapFlags InnerFlags =
+            maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
+          return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
+        }
+      }
+      // Reset Operands to its original state.
+      Operands[0] = NestedAR;
+    }
+  }
+
+  // Okay, it looks like we really DO need an addrec expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddRecExpr);
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    ID.AddPointer(Operands[i]);
+  ID.AddPointer(L);
+  void *IP = 0;
+  SCEVAddRecExpr *S =
+    static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+    std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+    S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+                                           O, Operands.size(), L);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  SmallVector<const SCEV *, 2> Ops;
+  Ops.push_back(LHS);
+  Ops.push_back(RHS);
+  return getSMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  assert(!Ops.empty() && "Cannot get empty smax!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVSMaxExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                              APIntOps::smax(LHSC->getValue()->getValue(),
+                                             RHSC->getValue()->getValue()));
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant minimum-int, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+      // If we have an smax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Find the first SMax
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+    ++Idx;
+
+  // Check to see if one of the operands is an SMax. If so, expand its operands
+  // onto our operand list, and recurse to simplify.
+  if (Idx < Ops.size()) {
+    bool DeletedSMax = false;
+    while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(SMax->op_begin(), SMax->op_end());
+      DeletedSMax = true;
+    }
+
+    if (DeletedSMax)
+      return getSMaxExpr(Ops);
+  }
+
+  // Okay, check to see if the same value occurs in the operand list twice.  If
+  // so, delete one.  Since we sorted the list, these values are required to
+  // be adjacent.
+  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+    //  X smax Y smax Y  -->  X smax Y
+    //  X smax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+      --i; --e;
+    }
+
+  if (Ops.size() == 1) return Ops[0];
+
+  assert(!Ops.empty() && "Reduced smax down to nothing!");
+
+  // Okay, it looks like we really DO need an smax expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSMaxExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  SmallVector<const SCEV *, 2> Ops;
+  Ops.push_back(LHS);
+  Ops.push_back(RHS);
+  return getUMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  assert(!Ops.empty() && "Cannot get empty umax!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVUMaxExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                              APIntOps::umax(LHSC->getValue()->getValue(),
+                                             RHSC->getValue()->getValue()));
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant minimum-int, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+      // If we have an umax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Find the first UMax
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+    ++Idx;
+
+  // Check to see if one of the operands is a UMax. If so, expand its operands
+  // onto our operand list, and recurse to simplify.
+  if (Idx < Ops.size()) {
+    bool DeletedUMax = false;
+    while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(UMax->op_begin(), UMax->op_end());
+      DeletedUMax = true;
+    }
+
+    if (DeletedUMax)
+      return getUMaxExpr(Ops);
+  }
+
+  // Okay, check to see if the same value occurs in the operand list twice.  If
+  // so, delete one.  Since we sorted the list, these values are required to
+  // be adjacent.
+  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+    //  X umax Y umax Y  -->  X umax Y
+    //  X umax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+      --i; --e;
+    }
+
+  if (Ops.size() == 1) return Ops[0];
+
+  assert(!Ops.empty() && "Reduced umax down to nothing!");
+
+  // Okay, it looks like we really DO need a umax expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUMaxExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  // ~smax(~x, ~y) == smin(x, y).
+  return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  // ~umax(~x, ~y) == umin(x, y)
+  return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getTypeAllocSize(AllocTy));
+
+  Constant *C = ConstantExpr::getSizeOf(AllocTy);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
+  Constant *C = ConstantExpr::getAlignOf(AllocTy);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+                                             unsigned FieldNo) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getStructLayout(STy)->getElementOffset(FieldNo));
+
+  Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
+                                             Constant *FieldNo) {
+  Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
+  // Don't attempt to do anything other than create a SCEVUnknown object
+  // here.  createSCEV only calls getUnknown after checking for all other
+  // interesting possibilities, and any other code that calls getUnknown
+  // is doing so in order to hide a value from SCEV canonicalization.
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUnknown);
+  ID.AddPointer(V);
+  void *IP = 0;
+  if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+    assert(cast<SCEVUnknown>(S)->getValue() == V &&
+           "Stale SCEVUnknown in uniquing map!");
+    return S;
+  }
+  SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+                                            FirstUnknown);
+  FirstUnknown = cast<SCEVUnknown>(S);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+//===----------------------------------------------------------------------===//
+//            Basic SCEV Analysis and PHI Idiom Recognition Code
+//
+
+/// isSCEVable - Test if values of the given type are analyzable within
+/// the SCEV framework. This primarily includes integer types, and it
+/// can optionally include pointer types if the ScalarEvolution class
+/// has access to target-specific information.
+bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+  // Integers and pointers are always SCEVable.
+  return Ty->isIntegerTy() || Ty->isPointerTy();
+}
+
+/// getTypeSizeInBits - Return the size in bits of the specified type,
+/// for which isSCEVable must return true.
+uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+  assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+  // If we have a TargetData, use it!
+  if (TD)
+    return TD->getTypeSizeInBits(Ty);
+
+  // Integer types have fixed sizes.
+  if (Ty->isIntegerTy())
+    return Ty->getPrimitiveSizeInBits();
+
+  // The only other support type is pointer. Without TargetData, conservatively
+  // assume pointers are 64-bit.
+  assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
+  return 64;
+}
+
+/// getEffectiveSCEVType - Return a type with the same bitwidth as
+/// the given type and which represents how SCEV will treat the given
+/// type, for which isSCEVable must return true. For pointer types,
+/// this is the pointer-sized integer type.
+const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+  assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+  if (Ty->isIntegerTy())
+    return Ty;
+
+  // The only other support type is pointer.
+  assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
+  if (TD) return TD->getIntPtrType(getContext());
+
+  // Without TargetData, conservatively assume pointers are 64-bit.
+  return Type::getInt64Ty(getContext());
+}
+
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+  return &CouldNotCompute;
+}
+
+/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
+/// expression and create a new one.
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
+  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+  ValueExprMapType::const_iterator I = ValueExprMap.find(V);
+  if (I != ValueExprMap.end()) return I->second;
+  const SCEV *S = createSCEV(V);
+
+  // The process of creating a SCEV for V may have caused other SCEVs
+  // to have been created, so it's necessary to insert the new entry
+  // from scratch, rather than trying to remember the insert position
+  // above.
+  ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+  return S;
+}
+
+/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
+///
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+    return getConstant(
+               cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+
+  const Type *Ty = V->getType();
+  Ty = getEffectiveSCEVType(Ty);
+  return getMulExpr(V,
+                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+}
+
+/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+    return getConstant(
+                cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+
+  const Type *Ty = V->getType();
+  Ty = getEffectiveSCEVType(Ty);
+  const SCEV *AllOnes =
+                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
+  return getMinusSCEV(AllOnes, V);
+}
+
+/// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+                                          SCEV::NoWrapFlags Flags) {
+  assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
+
+  // Fast path: X - X --> 0.
+  if (LHS == RHS)
+    return getConstant(LHS->getType(), 0);
+
+  // X - Y --> X + -Y
+  return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
+}
+
+/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is zero
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or zero extend with non-integer arguments!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+    return getTruncateExpr(V, Ty);
+  return getZeroExtendExpr(V, Ty);
+}
+
+/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is sign
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
+                                         const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or zero extend with non-integer arguments!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+    return getTruncateExpr(V, Ty);
+  return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is zero
+/// extended.  The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or zero extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrZeroExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getZeroExtendExpr(V, Ty);
+}
+
+/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is sign
+/// extended.  The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or sign extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrSignExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or any extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrAnyExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getAnyExtendExpr(V, Ty);
+}
+
+/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  The conversion must not be widening.
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or noop with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
+         "getTruncateOrNoop cannot extend!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getTruncateExpr(V, Ty);
+}
+
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getUMinFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umin operation
+/// with them.
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMinExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getPointerBase - Transitively follow the chain of pointer-type operands
+/// until reaching a SCEV that does not have a single pointer operand. This
+/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+/// but corner cases do exist.
+const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
+  // A pointer operand may evaluate to a nonpointer expression, such as null.
+  if (!V->getType()->isPointerTy())
+    return V;
+
+  if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+    return getPointerBase(Cast->getOperand());
+  }
+  else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+    const SCEV *PtrOp = 0;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      if ((*I)->getType()->isPointerTy()) {
+        // Cannot find the base of an expression with multiple pointer operands.
+        if (PtrOp)
+          return V;
+        PtrOp = *I;
+      }
+    }
+    if (!PtrOp)
+      return V;
+    return getPointerBase(PtrOp);
+  }
+  return V;
+}
+
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+                   SmallVectorImpl<Instruction *> &Worklist) {
+  // Push the def-use children onto the Worklist stack.
+  for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+       UI != UE; ++UI)
+    Worklist.push_back(cast<Instruction>(*UI));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
+/// resolution.
+void
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
+  SmallVector<Instruction *, 16> Worklist;
+  PushDefUseChildren(PN, Worklist);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  Visited.insert(PN);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
+
+    ValueExprMapType::iterator It =
+      ValueExprMap.find(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      const SCEV *Old = It->second;
+
+      // Short-circuit the def-use traversal if the symbolic name
+      // ceases to appear in expressions.
+      if (Old != SymName && !hasOperand(Old, SymName))
+        continue;
+
+      // SCEVUnknown for a PHI either means that it has an unrecognized
+      // structure, it's a PHI that's in the progress of being computed
+      // by createNodeForPHI, or it's a single-value PHI. In the first case,
+      // additional loop trip count information isn't going to change anything.
+      // In the second case, createNodeForPHI will perform the necessary
+      // updates on its own when it gets to that point. In the third, we do
+      // want to forget the SCEVUnknown.
+      if (!isa<PHINode>(I) ||
+          !isa<SCEVUnknown>(Old) ||
+          (I != PN && Old == SymName)) {
+        forgetMemoizedResults(Old);
+        ValueExprMap.erase(It);
+      }
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+}
+
+/// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
+/// a loop header, making it a potential recurrence, or it doesn't.
+///
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+  if (const Loop *L = LI->getLoopFor(PN->getParent()))
+    if (L->getHeader() == PN->getParent()) {
+      // The loop may have multiple entrances or multiple exits; we can analyze
+      // this phi as an addrec if it has a unique entry value and a unique
+      // backedge value.
+      Value *BEValueV = 0, *StartValueV = 0;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *V = PN->getIncomingValue(i);
+        if (L->contains(PN->getIncomingBlock(i))) {
+          if (!BEValueV) {
+            BEValueV = V;
+          } else if (BEValueV != V) {
+            BEValueV = 0;
+            break;
+          }
+        } else if (!StartValueV) {
+          StartValueV = V;
+        } else if (StartValueV != V) {
+          StartValueV = 0;
+          break;
+        }
+      }
+      if (BEValueV && StartValueV) {
+        // While we are analyzing this PHI node, handle its value symbolically.
+        const SCEV *SymbolicName = getUnknown(PN);
+        assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
+               "PHI node already processed?");
+        ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+        // Using this symbolic name for the PHI, analyze the value coming around
+        // the back-edge.
+        const SCEV *BEValue = getSCEV(BEValueV);
+
+        // NOTE: If BEValue is loop invariant, we know that the PHI node just
+        // has a special value for the first iteration of the loop.
+
+        // If the value coming around the backedge is an add with the symbolic
+        // value we just inserted, then we found a simple induction variable!
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+          // If there is a single occurrence of the symbolic value, replace it
+          // with a recurrence.
+          unsigned FoundIndex = Add->getNumOperands();
+          for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+            if (Add->getOperand(i) == SymbolicName)
+              if (FoundIndex == e) {
+                FoundIndex = i;
+                break;
+              }
+
+          if (FoundIndex != Add->getNumOperands()) {
+            // Create an add with everything but the specified operand.
+            SmallVector<const SCEV *, 8> Ops;
+            for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+              if (i != FoundIndex)
+                Ops.push_back(Add->getOperand(i));
+            const SCEV *Accum = getAddExpr(Ops);
+
+            // This is not a valid addrec if the step amount is varying each
+            // loop iteration, but is not itself an addrec in this loop.
+            if (isLoopInvariant(Accum, L) ||
+                (isa<SCEVAddRecExpr>(Accum) &&
+                 cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+              SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+              // If the increment doesn't overflow, then neither the addrec nor
+              // the post-increment will overflow.
+              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+                if (OBO->hasNoUnsignedWrap())
+                  Flags = setFlags(Flags, SCEV::FlagNUW);
+                if (OBO->hasNoSignedWrap())
+                  Flags = setFlags(Flags, SCEV::FlagNSW);
+              } else if (const GEPOperator *GEP =
+                         dyn_cast<GEPOperator>(BEValueV)) {
+                // If the increment is an inbounds GEP, then we know the address
+                // space cannot be wrapped around. We cannot make any guarantee
+                // about signed or unsigned overflow because pointers are
+                // unsigned but we may have a negative index from the base
+                // pointer.
+                if (GEP->isInBounds())
+                  Flags = setFlags(Flags, SCEV::FlagNW);
+              }
+
+              const SCEV *StartVal = getSCEV(StartValueV);
+              const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+              // Since the no-wrap flags are on the increment, they apply to the
+              // post-incremented value as well.
+              if (isLoopInvariant(Accum, L))
+                (void)getAddRecExpr(getAddExpr(StartVal, Accum),
+                                    Accum, L, Flags);
+
+              // Okay, for the entire analysis of this edge we assumed the PHI
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+              return PHISCEV;
+            }
+          }
+        } else if (const SCEVAddRecExpr *AddRec =
+                     dyn_cast<SCEVAddRecExpr>(BEValue)) {
+          // Otherwise, this could be a loop like this:
+          //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
+          // In this case, j = {1,+,1}  and BEValue is j.
+          // Because the other in-value of i (0) fits the evolution of BEValue
+          // i really is an addrec evolution.
+          if (AddRec->getLoop() == L && AddRec->isAffine()) {
+            const SCEV *StartVal = getSCEV(StartValueV);
+
+            // If StartVal = j.start - j.stride, we can use StartVal as the
+            // initial step of the addrec evolution.
+            if (StartVal == getMinusSCEV(AddRec->getOperand(0),
+                                         AddRec->getOperand(1))) {
+              // FIXME: For constant StartVal, we should be able to infer
+              // no-wrap flags.
+              const SCEV *PHISCEV =
+                getAddRecExpr(StartVal, AddRec->getOperand(1), L,
+                              SCEV::FlagAnyWrap);
+
+              // Okay, for the entire analysis of this edge we assumed the PHI
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+              return PHISCEV;
+            }
+          }
+        }
+      }
+    }
+
+  // If the PHI has a single incoming value, follow that value, unless the
+  // PHI's incoming blocks are in a different loop, in which case doing so
+  // risks breaking LCSSA form. Instcombine would normally zap these, but
+  // it doesn't have DominatorTree information, so it may miss cases.
+  if (Value *V = SimplifyInstruction(PN, TD, DT))
+    if (LI->replacementPreservesLCSSAForm(PN, V))
+      return getSCEV(V);
+
+  // If it's not a loop phi, we can't handle it yet.
+  return getUnknown(PN);
+}
+
+/// createNodeForGEP - Expand GEP instructions into add and multiply
+/// operations. This allows them to be analyzed by regular SCEV code.
+///
+const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+
+  // Don't blindly transfer the inbounds flag from the GEP instruction to the
+  // Add expression, because the Instruction may be guarded by control flow
+  // and the no-overflow bits may not be valid for the expression in any
+  // context.
+  bool isInBounds = GEP->isInBounds();
+
+  const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+  Value *Base = GEP->getOperand(0);
+  // Don't attempt to analyze GEPs over unsized objects.
+  if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
+    return getUnknown(GEP);
+  const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+                                      E = GEP->op_end();
+       I != E; ++I) {
+    Value *Index = *I;
+    // Compute the (potentially symbolic) offset in bytes for this index.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+      // For a struct, add the member offset.
+      unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+      const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+
+      // Add the field offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+    } else {
+      // For an array, add the element offset, explicitly scaled.
+      const SCEV *ElementSize = getSizeOfExpr(*GTI);
+      const SCEV *IndexS = getSCEV(Index);
+      // Getelementptr indices are signed.
+      IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
+
+      // Multiply the index by the element size to compute the element offset.
+      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
+                                           isInBounds ? SCEV::FlagNSW :
+                                           SCEV::FlagAnyWrap);
+
+      // Add the element offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+    }
+  }
+
+  // Get the SCEV for the GEP base.
+  const SCEV *BaseS = getSCEV(Base);
+
+  // Add the total offset from all the GEP indices to the base.
+  return getAddExpr(BaseS, TotalOffset,
+                    isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+}
+
+/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+/// guaranteed to end in (at every loop iteration).  It is, at the same time,
+/// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
+/// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return C->getValue()->getValue().countTrailingZeros();
+
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
+    return std::min(GetMinTrailingZeros(T->getOperand()),
+                    (uint32_t)getTypeSizeInBits(T->getType()));
+
+  if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
+  }
+
+  if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
+  }
+
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+    for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    // The result is the sum of all operands results.
+    uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+    uint32_t BitWidth = getTypeSizeInBits(M->getType());
+    for (unsigned i = 1, e = M->getNumOperands();
+         SumOpRes != BitWidth && i != e; ++i)
+      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
+                          BitWidth);
+    return SumOpRes;
+  }
+
+  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+    for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+    for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+    for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+    return Zeros.countTrailingOnes();
+  }
+
+  // SCEVUDivExpr
+  return 0;
+}
+
+/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getUnsignedRange(const SCEV *S) {
+  // See if we've computed this range already.
+  DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
+  if (I != UnsignedRanges.end())
+    return I->second;
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+  unsigned BitWidth = getTypeSizeInBits(S->getType());
+  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+  // If the value has known zeros, the maximum unsigned value will have those
+  // known zeros as well.
+  uint32_t TZ = GetMinTrailingZeros(S);
+  if (TZ != 0)
+    ConservativeResult =
+      ConstantRange(APInt::getMinValue(BitWidth),
+                    APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getUnsignedRange(Add->getOperand(i)));
+    return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
+    return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getUnsignedRange(SMax->getOperand(i)));
+    return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getUnsignedRange(UMax->getOperand(i)));
+    return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UDiv->getLHS());
+    ConstantRange Y = getUnsignedRange(UDiv->getRHS());
+    return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(ZExt->getOperand());
+    return setUnsignedRange(ZExt,
+      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SExt->getOperand());
+    return setUnsignedRange(SExt,
+      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Trunc->getOperand());
+    return setUnsignedRange(Trunc,
+      ConservativeResult.intersectWith(X.truncate(BitWidth)));
+  }
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    // If there's no unsigned wrap, the value will never be less than its
+    // initial value.
+    if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+        if (!C->getValue()->isZero())
+          ConservativeResult =
+            ConservativeResult.intersectWith(
+              ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+        ConstantRange StartRange = getUnsignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
+          return setUnsignedRange(AddRec, ConservativeResult);
+
+        APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
+                                   EndRange.getUnsignedMin());
+        APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
+                                   EndRange.getUnsignedMax());
+        if (Min.isMinValue() && Max.isMaxValue())
+          return setUnsignedRange(AddRec, ConservativeResult);
+        return setUnsignedRange(AddRec,
+          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+      }
+    }
+
+    return setUnsignedRange(AddRec, ConservativeResult);
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+    if (Ones == ~Zeros + 1)
+      return setUnsignedRange(U, ConservativeResult);
+    return setUnsignedRange(U,
+      ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
+  }
+
+  return setUnsignedRange(S, ConservativeResult);
+}
+
+/// getSignedRange - Determine the signed range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getSignedRange(const SCEV *S) {
+  // See if we've computed this range already.
+  DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
+  if (I != SignedRanges.end())
+    return I->second;
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+  unsigned BitWidth = getTypeSizeInBits(S->getType());
+  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+  // If the value has known zeros, the maximum signed value will have those
+  // known zeros as well.
+  uint32_t TZ = GetMinTrailingZeros(S);
+  if (TZ != 0)
+    ConservativeResult =
+      ConstantRange(APInt::getSignedMinValue(BitWidth),
+                    APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getSignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getSignedRange(Add->getOperand(i)));
+    return setSignedRange(Add, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getSignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getSignedRange(Mul->getOperand(i)));
+    return setSignedRange(Mul, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getSignedRange(SMax->getOperand(i)));
+    return setSignedRange(SMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getSignedRange(UMax->getOperand(i)));
+    return setSignedRange(UMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getSignedRange(UDiv->getLHS());
+    ConstantRange Y = getSignedRange(UDiv->getRHS());
+    return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(ZExt->getOperand());
+    return setSignedRange(ZExt,
+      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(SExt->getOperand());
+    return setSignedRange(SExt,
+      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getSignedRange(Trunc->getOperand());
+    return setSignedRange(Trunc,
+      ConservativeResult.intersectWith(X.truncate(BitWidth)));
+  }
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    // If there's no signed wrap, and all the operands have the same sign or
+    // zero, the value won't ever change sign.
+    if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
+      bool AllNonNeg = true;
+      bool AllNonPos = true;
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+        if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+        if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+      }
+      if (AllNonNeg)
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt(BitWidth, 0),
+                        APInt::getSignedMinValue(BitWidth)));
+      else if (AllNonPos)
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt::getSignedMinValue(BitWidth),
+                        APInt(BitWidth, 1)));
+    }
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+        ConstantRange StartRange = getSignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
+          return setSignedRange(AddRec, ConservativeResult);
+
+        APInt Min = APIntOps::smin(StartRange.getSignedMin(),
+                                   EndRange.getSignedMin());
+        APInt Max = APIntOps::smax(StartRange.getSignedMax(),
+                                   EndRange.getSignedMax());
+        if (Min.isMinSignedValue() && Max.isMaxSignedValue())
+          return setSignedRange(AddRec, ConservativeResult);
+        return setSignedRange(AddRec,
+          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+      }
+    }
+
+    return setSignedRange(AddRec, ConservativeResult);
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    if (!U->getValue()->getType()->isIntegerTy() && !TD)
+      return setSignedRange(U, ConservativeResult);
+    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+    if (NS == 1)
+      return setSignedRange(U, ConservativeResult);
+    return setSignedRange(U, ConservativeResult.intersectWith(
+      ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+  }
+
+  return setSignedRange(S, ConservativeResult);
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.
+/// Analyze the expression.
+///
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
+  if (!isSCEVable(V->getType()))
+    return getUnknown(V);
+
+  unsigned Opcode = Instruction::UserOp1;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    Opcode = I->getOpcode();
+
+    // Don't attempt to analyze instructions in blocks that aren't
+    // reachable. Such instructions don't matter, and they aren't required
+    // to obey basic rules for definitions dominating uses which this
+    // analysis depends on.
+    if (!DT->isReachableFromEntry(I->getParent()))
+      return getUnknown(V);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    Opcode = CE->getOpcode();
+  else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return getConstant(CI);
+  else if (isa<ConstantPointerNull>(V))
+    return getConstant(V->getType(), 0);
+  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
+  else
+    return getUnknown(V);
+
+  Operator *U = cast<Operator>(V);
+  switch (Opcode) {
+  case Instruction::Add: {
+    // The simple thing to do would be to just call getSCEV on both operands
+    // and call getAddExpr with the result. However if we're looking at a
+    // bunch of things all added together, this can be quite inefficient,
+    // because it leads to N-1 getAddExpr calls for N ultimate operands.
+    // Instead, gather up all the operands and make a single getAddExpr call.
+    // LLVM IR canonical form means we need only traverse the left operands.
+    SmallVector<const SCEV *, 4> AddOps;
+    AddOps.push_back(getSCEV(U->getOperand(1)));
+    for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
+      unsigned Opcode = Op->getValueID() - Value::InstructionVal;
+      if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+        break;
+      U = cast<Operator>(Op);
+      const SCEV *Op1 = getSCEV(U->getOperand(1));
+      if (Opcode == Instruction::Sub)
+        AddOps.push_back(getNegativeSCEV(Op1));
+      else
+        AddOps.push_back(Op1);
+    }
+    AddOps.push_back(getSCEV(U->getOperand(0)));
+    return getAddExpr(AddOps);
+  }
+  case Instruction::Mul: {
+    // See the Add code above.
+    SmallVector<const SCEV *, 4> MulOps;
+    MulOps.push_back(getSCEV(U->getOperand(1)));
+    for (Value *Op = U->getOperand(0);
+         Op->getValueID() == Instruction::Mul + Value::InstructionVal;
+         Op = U->getOperand(0)) {
+      U = cast<Operator>(Op);
+      MulOps.push_back(getSCEV(U->getOperand(1)));
+    }
+    MulOps.push_back(getSCEV(U->getOperand(0)));
+    return getMulExpr(MulOps);
+  }
+  case Instruction::UDiv:
+    return getUDivExpr(getSCEV(U->getOperand(0)),
+                       getSCEV(U->getOperand(1)));
+  case Instruction::Sub:
+    return getMinusSCEV(getSCEV(U->getOperand(0)),
+                        getSCEV(U->getOperand(1)));
+  case Instruction::And:
+    // For an expression like x&255 that merely masks off the high bits,
+    // use zext(trunc(x)) as the SCEV expression.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      if (CI->isNullValue())
+        return getSCEV(U->getOperand(1));
+      if (CI->isAllOnesValue())
+        return getSCEV(U->getOperand(0));
+      const APInt &A = CI->getValue();
+
+      // Instcombine's ShrinkDemandedConstant may strip bits out of
+      // constants, obscuring what would otherwise be a low-bits mask.
+      // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+      // knew about to reconstruct a low-bits mask value.
+      unsigned LZ = A.countLeadingZeros();
+      unsigned BitWidth = A.getBitWidth();
+      APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+
+      APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+      if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
+        return
+          getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
+                                IntegerType::get(getContext(), BitWidth - LZ)),
+                            U->getType());
+    }
+    break;
+
+  case Instruction::Or:
+    // If the RHS of the Or is a constant, we may have something like:
+    // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
+    // optimizations will transparently handle this case.
+    //
+    // In order for this transformation to be safe, the LHS must be of the
+    // form X*(2^n) and the Or constant must be less than 2^n.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      const SCEV *LHS = getSCEV(U->getOperand(0));
+      const APInt &CIVal = CI->getValue();
+      if (GetMinTrailingZeros(LHS) >=
+          (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+        // Build a plain add SCEV.
+        const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+        // If the LHS of the add was an addrec and it has no-wrap flags,
+        // transfer the no-wrap flags, since an or won't introduce a wrap.
+        if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+          const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+          const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+            OldAR->getNoWrapFlags());
+        }
+        return S;
+      }
+    }
+    break;
+  case Instruction::Xor:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      // If the RHS of the xor is a signbit, then this is just an add.
+      // Instcombine turns add of signbit into xor as a strength reduction step.
+      if (CI->getValue().isSignBit())
+        return getAddExpr(getSCEV(U->getOperand(0)),
+                          getSCEV(U->getOperand(1)));
+
+      // If the RHS of xor is -1, then this is a not operation.
+      if (CI->isAllOnesValue())
+        return getNotSCEV(getSCEV(U->getOperand(0)));
+
+      // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
+      // This is a variant of the check for xor with -1, and it handles
+      // the case where instcombine has trimmed non-demanded bits out
+      // of an xor with -1.
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
+        if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+          if (BO->getOpcode() == Instruction::And &&
+              LCI->getValue() == CI->getValue())
+            if (const SCEVZeroExtendExpr *Z =
+                  dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+              const Type *UTy = U->getType();
+              const SCEV *Z0 = Z->getOperand();
+              const Type *Z0Ty = Z0->getType();
+              unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+              // If C is a low-bits mask, the zero extend is serving to
+              // mask off the high bits. Complement the operand and
+              // re-apply the zext.
+              if (APIntOps::isMask(Z0TySize, CI->getValue()))
+                return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+              // If C is a single bit, it may be in the sign-bit position
+              // before the zero-extend. In this case, represent the xor
+              // using an add, which is equivalent, and re-apply the zext.
+              APInt Trunc = CI->getValue().trunc(Z0TySize);
+              if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+                  Trunc.isSignBit())
+                return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+                                         UTy);
+            }
+    }
+    break;
+
+  case Instruction::Shl:
+    // Turn shift left of a constant amount into a multiply.
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
+      Constant *X = ConstantInt::get(getContext(),
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
+      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+    }
+    break;
+
+  case Instruction::LShr:
+    // Turn logical shift right of a constant into a unsigned divide.
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
+      Constant *X = ConstantInt::get(getContext(),
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
+      return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+    }
+    break;
+
+  case Instruction::AShr:
+    // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
+      if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
+        if (L->getOpcode() == Instruction::Shl &&
+            L->getOperand(1) == U->getOperand(1)) {
+          uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+          // If the shift count is not less than the bitwidth, the result of
+          // the shift is undefined. Don't try to analyze it, because the
+          // resolution chosen here may differ from the resolution chosen in
+          // other parts of the compiler.
+          if (CI->getValue().uge(BitWidth))
+            break;
+
+          uint64_t Amt = BitWidth - CI->getZExtValue();
+          if (Amt == BitWidth)
+            return getSCEV(L->getOperand(0));       // shift by zero --> noop
+          return
+            getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
+                                              IntegerType::get(getContext(),
+                                                               Amt)),
+                              U->getType());
+        }
+    break;
+
+  case Instruction::Trunc:
+    return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::ZExt:
+    return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::SExt:
+    return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::BitCast:
+    // BitCasts are no-op casts so we just eliminate the cast.
+    if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
+      return getSCEV(U->getOperand(0));
+    break;
+
+  // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+  // lead to pointer expressions which cannot safely be expanded to GEPs,
+  // because ScalarEvolution doesn't respect the GEP aliasing rules when
+  // simplifying integer expressions.
+
+  case Instruction::GetElementPtr:
+    return createNodeForGEP(cast<GEPOperator>(U));
+
+  case Instruction::PHI:
+    return createNodeForPHI(cast<PHINode>(U));
+
+  case Instruction::Select:
+    // This could be a smax or umax that was lowered earlier.
+    // Try to recover it.
+    if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+      switch (ICI->getPredicate()) {
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_SLE:
+        std::swap(LHS, RHS);
+        // fall through
+      case ICmpInst::ICMP_SGT:
+      case ICmpInst::ICMP_SGE:
+        // a >s b ? a+x : b+x  ->  smax(a, b)+x
+        // a >s b ? b+x : a+x  ->  smin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMinExpr(LS, RS), LDiff);
+        }
+        break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_ULE:
+        std::swap(LHS, RHS);
+        // fall through
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_UGE:
+        // a >u b ? a+x : b+x  ->  umax(a, b)+x
+        // a >u b ? b+x : a+x  ->  umin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMinExpr(LS, RS), LDiff);
+        }
+        break;
+      case ICmpInst::ICMP_NE:
+        // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, One);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(One, LS), LDiff);
+        }
+        break;
+      case ICmpInst::ICMP_EQ:
+        // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, One);
+          const SCEV *RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(One, LS), LDiff);
+        }
+        break;
+      default:
+        break;
+      }
+    }
+
+  default: // We cannot analyze this expression.
+    break;
+  }
+
+  return getUnknown(V);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                   Iteration Count Computation Code
+//
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
+///
+/// Note that it is not valid to call this method on a loop without a
+/// loop-invariant backedge-taken count (see
+/// hasLoopInvariantBackedgeTakenCount).
+///
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+  return getBackedgeTakenInfo(L).Exact;
+}
+
+/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+/// return the least SCEV value that is known never to be less than the
+/// actual backedge taken count.
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+  return getBackedgeTakenInfo(L).Max;
+}
+
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+  BasicBlock *Header = L->getHeader();
+
+  // Push all Loop-header PHIs onto the Worklist stack.
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    Worklist.push_back(PN);
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
+  // Initially insert a CouldNotCompute for this loop. If the insertion
+  // succeeds, proceed to actually compute a backedge-taken count and
+  // update the value. The temporary CouldNotCompute value tells SCEV
+  // code elsewhere that it shouldn't attempt to request a new
+  // backedge-taken count, which could result in infinite recursion.
+  std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+    BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  BackedgeTakenInfo Result = getCouldNotCompute();
+  BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
+  if (Computed.Exact != getCouldNotCompute()) {
+    assert(isLoopInvariant(Computed.Exact, L) &&
+           isLoopInvariant(Computed.Max, L) &&
+           "Computed backedge-taken count isn't loop invariant for loop!");
+    ++NumTripCountsComputed;
+
+    // Update the value in the map.
+    Result = Computed;
+  } else {
+    if (Computed.Max != getCouldNotCompute())
+      // Update the value in the map.
+      Result = Computed;
+    if (isa<PHINode>(L->getHeader()->begin()))
+      // Only count loops that have phi nodes as not being computable.
+      ++NumTripCountsNotComputed;
+  }
+
+  // Now that we know more about the trip count for this loop, forget any
+  // existing SCEV values for PHI nodes in this loop since they are only
+  // conservative estimates made without the benefit of trip count
+  // information. This is similar to the code in forgetLoop, except that
+  // it handles SCEVUnknown PHI nodes specially.
+  if (Computed.hasAnyInfo()) {
+    SmallVector<Instruction *, 16> Worklist;
+    PushLoopPHIs(L, Worklist);
+
+    SmallPtrSet<Instruction *, 8> Visited;
+    while (!Worklist.empty()) {
+      Instruction *I = Worklist.pop_back_val();
+      if (!Visited.insert(I)) continue;
+
+      ValueExprMapType::iterator It =
+        ValueExprMap.find(static_cast<Value *>(I));
+      if (It != ValueExprMap.end()) {
+        const SCEV *Old = It->second;
+
+        // SCEVUnknown for a PHI either means that it has an unrecognized
+        // structure, or it's a PHI that's in the progress of being computed
+        // by createNodeForPHI.  In the former case, additional loop trip
+        // count information isn't going to change anything. In the later
+        // case, createNodeForPHI will perform the necessary updates on its
+        // own when it gets to that point.
+        if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+          forgetMemoizedResults(Old);
+          ValueExprMap.erase(It);
+        }
+        if (PHINode *PN = dyn_cast<PHINode>(I))
+          ConstantEvolutionLoopExitValue.erase(PN);
+      }
+
+      PushDefUseChildren(I, Worklist);
+    }
+  }
+
+  // Re-lookup the insert position, since the call to
+  // ComputeBackedgeTakenCount above could result in a
+  // recusive call to getBackedgeTakenInfo (on a different
+  // loop), which would invalidate the iterator computed
+  // earlier.
+  return BackedgeTakenCounts.find(L)->second = Result;
+}
+
+/// forgetLoop - This method should be called by the client when it has
+/// changed a loop in a way that may effect ScalarEvolution's ability to
+/// compute a trip count, or if the loop is deleted.
+void ScalarEvolution::forgetLoop(const Loop *L) {
+  // Drop any stored trip count value.
+  BackedgeTakenCounts.erase(L);
+
+  // Drop information about expressions based on loop-header PHIs.
+  SmallVector<Instruction *, 16> Worklist;
+  PushLoopPHIs(L, Worklist);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
+
+    ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      forgetMemoizedResults(It->second);
+      ValueExprMap.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+
+  // Forget all contained loops too, to avoid dangling entries in the
+  // ValuesAtScopes map.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    forgetLoop(*I);
+}
+
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return;
+
+  // Drop information about expressions based on loop-header PHIs.
+  SmallVector<Instruction *, 16> Worklist;
+  Worklist.push_back(I);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  while (!Worklist.empty()) {
+    I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
+
+    ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      forgetMemoizedResults(It->second);
+      ValueExprMap.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+}
+
+/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// of the specified loop will execute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+  SmallVector<BasicBlock *, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // Examine all exits and pick the most conservative values.
+  const SCEV *BECount = getCouldNotCompute();
+  const SCEV *MaxBECount = getCouldNotCompute();
+  bool CouldNotComputeBECount = false;
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BackedgeTakenInfo NewBTI =
+      ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+
+    if (NewBTI.Exact == getCouldNotCompute()) {
+      // We couldn't compute an exact value for this exit, so
+      // we won't be able to compute an exact value for the loop.
+      CouldNotComputeBECount = true;
+      BECount = getCouldNotCompute();
+    } else if (!CouldNotComputeBECount) {
+      if (BECount == getCouldNotCompute())
+        BECount = NewBTI.Exact;
+      else
+        BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
+    }
+    if (MaxBECount == getCouldNotCompute())
+      MaxBECount = NewBTI.Max;
+    else if (NewBTI.Max != getCouldNotCompute())
+      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
+  }
+
+  return BackedgeTakenInfo(BECount, MaxBECount);
+}
+
+/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
+/// of the specified loop will execute if it exits via the specified block.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
+                                                   BasicBlock *ExitingBlock) {
+
+  // Okay, we've chosen an exiting block.  See what condition causes us to
+  // exit at this block.
+  //
+  // FIXME: we should be able to handle switch instructions (with a single exit)
+  BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+  if (ExitBr == 0) return getCouldNotCompute();
+  assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+
+  // At this point, we know we have a conditional branch that determines whether
+  // the loop is exited.  However, we don't know if the branch is executed each
+  // time through the loop.  If not, then the execution count of the branch will
+  // not be equal to the trip count of the loop.
+  //
+  // Currently we check for this by checking to see if the Exit branch goes to
+  // the loop header.  If so, we know it will always execute the same number of
+  // times as the loop.  We also handle the case where the exit block *is* the
+  // loop header.  This is common for un-rotated loops.
+  //
+  // If both of those tests fail, walk up the unique predecessor chain to the
+  // header, stopping if there is an edge that doesn't exit the loop. If the
+  // header is reached, the execution count of the branch will be equal to the
+  // trip count of the loop.
+  //
+  //  More extensive analysis could be done to handle more cases here.
+  //
+  if (ExitBr->getSuccessor(0) != L->getHeader() &&
+      ExitBr->getSuccessor(1) != L->getHeader() &&
+      ExitBr->getParent() != L->getHeader()) {
+    // The simple checks failed, try climbing the unique predecessor chain
+    // up to the header.
+    bool Ok = false;
+    for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+      BasicBlock *Pred = BB->getUniquePredecessor();
+      if (!Pred)
+        return getCouldNotCompute();
+      TerminatorInst *PredTerm = Pred->getTerminator();
+      for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+        if (PredSucc == BB)
+          continue;
+        // If the predecessor has a successor that isn't BB and isn't
+        // outside the loop, assume the worst.
+        if (L->contains(PredSucc))
+          return getCouldNotCompute();
+      }
+      if (Pred == L->getHeader()) {
+        Ok = true;
+        break;
+      }
+      BB = Pred;
+    }
+    if (!Ok)
+      return getCouldNotCompute();
+  }
+
+  // Proceed to the next level to examine the exit condition expression.
+  return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
+                                               ExitBr->getSuccessor(0),
+                                               ExitBr->getSuccessor(1));
+}
+
+/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+                                                       Value *ExitCond,
+                                                       BasicBlock *TBB,
+                                                       BasicBlock *FBB) {
+  // Check if the controlling expression for this loop is an And or Or.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+    if (BO->getOpcode() == Instruction::And) {
+      // Recurse on the operands of the and.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
+      if (L->contains(TBB)) {
+        // Both conditions must be true for the loop to continue executing.
+        // Choose the less conservative count.
+        if (BTI0.Exact == getCouldNotCompute() ||
+            BTI1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
+        else
+          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max == getCouldNotCompute())
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == getCouldNotCompute())
+          MaxBECount = BTI0.Max;
+        else
+          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      } else {
+        // Both conditions must be true at the same time for the loop to exit.
+        // For now, be conservative.
+        assert(L->contains(FBB) && "Loop block has no successor in loop!");
+        if (BTI0.Max == BTI1.Max)
+          MaxBECount = BTI0.Max;
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+    if (BO->getOpcode() == Instruction::Or) {
+      // Recurse on the operands of the or.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
+      if (L->contains(FBB)) {
+        // Both conditions must be false for the loop to continue executing.
+        // Choose the less conservative count.
+        if (BTI0.Exact == getCouldNotCompute() ||
+            BTI1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
+        else
+          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max == getCouldNotCompute())
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == getCouldNotCompute())
+          MaxBECount = BTI0.Max;
+        else
+          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      } else {
+        // Both conditions must be false at the same time for the loop to exit.
+        // For now, be conservative.
+        assert(L->contains(TBB) && "Loop block has no successor in loop!");
+        if (BTI0.Max == BTI1.Max)
+          MaxBECount = BTI0.Max;
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+  }
+
+  // With an icmp, it may be feasible to compute an exact backedge-taken count.
+  // Proceed to the next level to examine the icmp.
+  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+    return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+
+  // Check for a constant condition. These are normally stripped out by
+  // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+  // preserve the CFG and is temporarily leaving constant conditions
+  // in place.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+    if (L->contains(FBB) == !CI->getZExtValue())
+      // The backedge is always taken.
+      return getCouldNotCompute();
+    else
+      // The backedge is never taken.
+      return getConstant(CI->getType(), 0);
+  }
+
+  // If it's not an integer or pointer comparison then compute it the hard way.
+  return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+                                                           ICmpInst *ExitCond,
+                                                           BasicBlock *TBB,
+                                                           BasicBlock *FBB) {
+
+  // If the condition was exit on true, convert the condition to exit on false
+  ICmpInst::Predicate Cond;
+  if (!L->contains(FBB))
+    Cond = ExitCond->getPredicate();
+  else
+    Cond = ExitCond->getInversePredicate();
+
+  // Handle common loops like: for (X = "string"; *X; ++X)
+  if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
+    if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
+      BackedgeTakenInfo ItCnt =
+        ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+      if (ItCnt.hasAnyInfo())
+        return ItCnt;
+    }
+
+  const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+  const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
+
+  // Try to evaluate any dependencies out of the loop.
+  LHS = getSCEVAtScope(LHS, L);
+  RHS = getSCEVAtScope(RHS, L);
+
+  // At this point, we would like to compute how many iterations of the
+  // loop the predicate will return true for these inputs.
+  if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
+    // If there is a loop-invariant, force it into the RHS.
+    std::swap(LHS, RHS);
+    Cond = ICmpInst::getSwappedPredicate(Cond);
+  }
+
+  // Simplify the operands before analyzing them.
+  (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
+  // If we have a comparison of a chrec against a constant, try to use value
+  // ranges to answer this query.
+  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+    if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+      if (AddRec->getLoop() == L) {
+        // Form the constant range.
+        ConstantRange CompRange(
+            ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+
+        const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+        if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
+      }
+
+  switch (Cond) {
+  case ICmpInst::ICMP_NE: {                     // while (X != Y)
+    // Convert to: while (X-Y != 0)
+    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_EQ: {                     // while (X == Y)
+    // Convert to: while (X-Y == 0)
+    BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_SLT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_SGT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+                                             getNotSCEV(RHS), L, true);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_ULT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_UGT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+                                             getNotSCEV(RHS), L, false);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  default:
+#if 0
+    dbgs() << "ComputeBackedgeTakenCount ";
+    if (ExitCond->getOperand(0)->getType()->isUnsigned())
+      dbgs() << "[unsigned] ";
+    dbgs() << *LHS << "   "
+         << Instruction::getOpcodeName(Instruction::ICmp)
+         << "   " << *RHS << "\n";
+#endif
+    break;
+  }
+  return
+    ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+static ConstantInt *
+EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
+                                ScalarEvolution &SE) {
+  const SCEV *InVal = SE.getConstant(C);
+  const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
+  assert(isa<SCEVConstant>(Val) &&
+         "Evaluation of SCEV at constant didn't fold correctly?");
+  return cast<SCEVConstant>(Val)->getValue();
+}
+
+/// GetAddressedElementFromGlobal - Given a global variable with an initializer
+/// and a GEP expression (missing the pointer index) indexing into it, return
+/// the addressed element of the initializer or null if the index expression is
+/// invalid.
+static Constant *
+GetAddressedElementFromGlobal(GlobalVariable *GV,
+                              const std::vector<ConstantInt*> &Indices) {
+  Constant *Init = GV->getInitializer();
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+    uint64_t Idx = Indices[i]->getZExtValue();
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+      assert(Idx < CS->getNumOperands() && "Bad struct index!");
+      Init = cast<Constant>(CS->getOperand(Idx));
+    } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+      if (Idx >= CA->getNumOperands()) return 0;  // Bogus program
+      Init = cast<Constant>(CA->getOperand(Idx));
+    } else if (isa<ConstantAggregateZero>(Init)) {
+      if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+        assert(Idx < STy->getNumElements() && "Bad struct index!");
+        Init = Constant::getNullValue(STy->getElementType(Idx));
+      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
+        if (Idx >= ATy->getNumElements()) return 0;  // Bogus program
+        Init = Constant::getNullValue(ATy->getElementType());
+      } else {
+        llvm_unreachable("Unknown constant aggregate type!");
+      }
+      return 0;
+    } else {
+      return 0; // Unknown initializer type
+    }
+  }
+  return Init;
+}
+
+/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// 'icmp op load X, cst', try to see if we can compute the backedge
+/// execution count.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
+                                                LoadInst *LI,
+                                                Constant *RHS,
+                                                const Loop *L,
+                                                ICmpInst::Predicate predicate) {
+  if (LI->isVolatile()) return getCouldNotCompute();
+
+  // Check to see if the loaded pointer is a getelementptr of a global.
+  // TODO: Use SCEV instead of manually grubbing with GEPs.
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
+  if (!GEP) return getCouldNotCompute();
+
+  // Make sure that it is really a constant global we are gepping, with an
+  // initializer, and make sure the first IDX is really 0.
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+      GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
+      !cast<Constant>(GEP->getOperand(1))->isNullValue())
+    return getCouldNotCompute();
+
+  // Okay, we allow one non-constant index into the GEP instruction.
+  Value *VarIdx = 0;
+  std::vector<ConstantInt*> Indexes;
+  unsigned VarIdxNum = 0;
+  for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      Indexes.push_back(CI);
+    } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
+      if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
+      VarIdx = GEP->getOperand(i);
+      VarIdxNum = i-2;
+      Indexes.push_back(0);
+    }
+
+  // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
+  // Check to see if X is a loop variant variable value now.
+  const SCEV *Idx = getSCEV(VarIdx);
+  Idx = getSCEVAtScope(Idx, L);
+
+  // We can only recognize very limited forms of loop index expressions, in
+  // particular, only affine AddRec's like {C1,+,C2}.
+  const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
+  if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
+      !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
+      !isa<SCEVConstant>(IdxExpr->getOperand(1)))
+    return getCouldNotCompute();
+
+  unsigned MaxSteps = MaxBruteForceIterations;
+  for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
+    ConstantInt *ItCst = ConstantInt::get(
+                           cast<IntegerType>(IdxExpr->getType()), IterationNum);
+    ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
+
+    // Form the GEP offset.
+    Indexes[VarIdxNum] = Val;
+
+    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+    if (Result == 0) break;  // Cannot compute!
+
+    // Evaluate the condition for this iteration.
+    Result = ConstantExpr::getICmp(predicate, Result, RHS);
+    if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
+    if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
+#if 0
+      dbgs() << "\n***\n*** Computed loop count " << *ItCst
+             << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
+             << "***\n";
+#endif
+      ++NumArrayLenItCounts;
+      return getConstant(ItCst);   // Found terminating iteration!
+    }
+  }
+  return getCouldNotCompute();
+}
+
+
+/// CanConstantFold - Return true if we can constant fold an instruction of the
+/// specified type, assuming that all operands were constants.
+static bool CanConstantFold(const Instruction *I) {
+  if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+      isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+    return true;
+
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (const Function *F = CI->getCalledFunction())
+      return canConstantFoldCallTo(F);
+  return false;
+}
+
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from.  We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI.  If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+  // If this is not an instruction, or if this is an instruction outside of the
+  // loop, it can't be derived from a loop PHI.
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0 || !L->contains(I)) return 0;
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    if (L->getHeader() == I->getParent())
+      return PN;
+    else
+      // We don't currently keep track of the control flow needed to evaluate
+      // PHIs, so we cannot handle PHIs inside of loops.
+      return 0;
+  }
+
+  // If we won't be able to constant fold this expression even if the operands
+  // are constants, return early.
+  if (!CanConstantFold(I)) return 0;
+
+  // Otherwise, we can evaluate this instruction if all of its operands are
+  // constant or derived from a PHI node themselves.
+  PHINode *PHI = 0;
+  for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
+    if (!isa<Constant>(I->getOperand(Op))) {
+      PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
+      if (P == 0) return 0;  // Not evolving from PHI
+      if (PHI == 0)
+        PHI = P;
+      else if (PHI != P)
+        return 0;  // Evolving from multiple different PHIs.
+    }
+
+  // This is a expression evolving from a constant PHI!
+  return PHI;
+}
+
+/// EvaluateExpression - Given an expression that passes the
+/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
+/// in the loop has the value PHIVal.  If we can't fold this expression for some
+/// reason, return null.
+static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+                                    const TargetData *TD) {
+  if (isa<PHINode>(V)) return PHIVal;
+  if (Constant *C = dyn_cast<Constant>(V)) return C;
+  Instruction *I = cast<Instruction>(V);
+
+  std::vector<Constant*> Operands(I->getNumOperands());
+
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
+    if (Operands[i] == 0) return 0;
+  }
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+                                           Operands[1], TD);
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                  &Operands[0], Operands.size(), TD);
+}
+
+/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+/// in the header of its containing loop, we know the loop executes a
+/// constant number of times, and the PHI node is just a recurrence
+/// involving constants, fold it.
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+                                                   const APInt &BEs,
+                                                   const Loop *L) {
+  DenseMap<PHINode*, Constant*>::const_iterator I =
+    ConstantEvolutionLoopExitValue.find(PN);
+  if (I != ConstantEvolutionLoopExitValue.end())
+    return I->second;
+
+  if (BEs.ugt(MaxBruteForceIterations))
+    return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
+
+  Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+
+  // Since the loop is canonicalized, the PHI node must have two entries.  One
+  // entry must be a constant (coming in from outside of the loop), and the
+  // second must be derived from the same PHI.
+  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+  Constant *StartCST =
+    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+  if (StartCST == 0)
+    return RetVal = 0;  // Must be a constant.
+
+  Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+  if (getConstantEvolvingPHI(BEValue, L) != PN &&
+      !isa<Constant>(BEValue))
+    return RetVal = 0;  // Not derived from same PHI.
+
+  // Execute the loop symbolically to determine the exit value.
+  if (BEs.getActiveBits() >= 32)
+    return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+
+  unsigned NumIterations = BEs.getZExtValue(); // must be in range
+  unsigned IterationNum = 0;
+  for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+    if (IterationNum == NumIterations)
+      return RetVal = PHIVal;  // Got exit value!
+
+    // Compute the value of the PHI node for the next iteration.
+    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+    if (NextPHI == PHIVal)
+      return RetVal = NextPHI;  // Stopped evolving!
+    if (NextPHI == 0)
+      return 0;        // Couldn't evaluate!
+    PHIVal = NextPHI;
+  }
+}
+
+/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// constant number of times (the condition evolves only from constants),
+/// try to evaluate a few iterations of the loop until we get the exit
+/// condition gets a value of ExitWhen (true or false).  If we cannot
+/// evaluate the trip count of the loop, return getCouldNotCompute().
+const SCEV *
+ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
+                                                       Value *Cond,
+                                                       bool ExitWhen) {
+  PHINode *PN = getConstantEvolvingPHI(Cond, L);
+  if (PN == 0) return getCouldNotCompute();
+
+  // If the loop is canonicalized, the PHI will have exactly two entries.
+  // That's the only form we support here.
+  if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+
+  // One entry must be a constant (coming in from outside of the loop), and the
+  // second must be derived from the same PHI.
+  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+  Constant *StartCST =
+    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+  if (StartCST == 0) return getCouldNotCompute();  // Must be a constant.
+
+  Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+  if (getConstantEvolvingPHI(BEValue, L) != PN &&
+      !isa<Constant>(BEValue))
+    return getCouldNotCompute();  // Not derived from same PHI.
+
+  // Okay, we find a PHI node that defines the trip count of this loop.  Execute
+  // the loop symbolically to determine when the condition gets a value of
+  // "ExitWhen".
+  unsigned IterationNum = 0;
+  unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
+  for (Constant *PHIVal = StartCST;
+       IterationNum != MaxIterations; ++IterationNum) {
+    ConstantInt *CondVal =
+      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+
+    // Couldn't symbolically evaluate.
+    if (!CondVal) return getCouldNotCompute();
+
+    if (CondVal->getValue() == uint64_t(ExitWhen)) {
+      ++NumBruteForceTripCountsComputed;
+      return getConstant(Type::getInt32Ty(getContext()), IterationNum);
+    }
+
+    // Compute the value of the PHI node for the next iteration.
+    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+    if (NextPHI == 0 || NextPHI == PHIVal)
+      return getCouldNotCompute();// Couldn't evaluate or not making progress...
+    PHIVal = NextPHI;
+  }
+
+  // Too many iterations were needed to evaluate.
+  return getCouldNotCompute();
+}
+
+/// getSCEVAtScope - Return a SCEV expression for the specified value
+/// at the specified scope in the program.  The L value specifies a loop
+/// nest to evaluate the expression at, where null is the top-level or a
+/// specified loop is immediately inside of the loop.
+///
+/// This method can be used to compute the exit value for a variable defined
+/// in a loop by querying what the value will hold in the parent loop.
+///
+/// In the case that a relevant loop exit value cannot be computed, the
+/// original value V is returned.
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+  // Check to see if we've folded this expression at this loop before.
+  std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
+  std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
+    Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
+  if (!Pair.second)
+    return Pair.first->second ? Pair.first->second : V;
+
+  // Otherwise compute it.
+  const SCEV *C = computeSCEVAtScope(V, L);
+  ValuesAtScopes[V][L] = C;
+  return C;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
+  if (isa<SCEVConstant>(V)) return V;
+
+  // If this instruction is evolved from a constant-evolving PHI, compute the
+  // exit value from the loop without using SCEVs.
+  if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
+    if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
+      const Loop *LI = (*this->LI)[I->getParent()];
+      if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
+        if (PHINode *PN = dyn_cast<PHINode>(I))
+          if (PN->getParent() == LI->getHeader()) {
+            // Okay, there is no closed form solution for the PHI node.  Check
+            // to see if the loop that contains it has a known backedge-taken
+            // count.  If so, we may be able to force computation of the exit
+            // value.
+            const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+            if (const SCEVConstant *BTCC =
+                  dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+              // Okay, we know how many times the containing loop executes.  If
+              // this is a constant evolving PHI node, get the final value at
+              // the specified iteration number.
+              Constant *RV = getConstantEvolutionLoopExitValue(PN,
+                                                   BTCC->getValue()->getValue(),
+                                                               LI);
+              if (RV) return getSCEV(RV);
+            }
+          }
+
+      // Okay, this is an expression that we cannot symbolically evaluate
+      // into a SCEV.  Check to see if it's possible to symbolically evaluate
+      // the arguments into constants, and if so, try to constant propagate the
+      // result.  This is particularly useful for computing loop exit values.
+      if (CanConstantFold(I)) {
+        SmallVector<Constant *, 4> Operands;
+        bool MadeImprovement = false;
+        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+          Value *Op = I->getOperand(i);
+          if (Constant *C = dyn_cast<Constant>(Op)) {
+            Operands.push_back(C);
+            continue;
+          }
+
+          // If any of the operands is non-constant and if they are
+          // non-integer and non-pointer, don't even try to analyze them
+          // with scev techniques.
+          if (!isSCEVable(Op->getType()))
+            return V;
+
+          const SCEV *OrigV = getSCEV(Op);
+          const SCEV *OpV = getSCEVAtScope(OrigV, L);
+          MadeImprovement |= OrigV != OpV;
+
+          Constant *C = 0;
+          if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
+            C = SC->getValue();
+          if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
+            C = dyn_cast<Constant>(SU->getValue());
+          if (!C) return V;
+          if (C->getType() != Op->getType())
+            C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                              Op->getType(),
+                                                              false),
+                                      C, Op->getType());
+          Operands.push_back(C);
+        }
+
+        // Check to see if getSCEVAtScope actually made an improvement.
+        if (MadeImprovement) {
+          Constant *C = 0;
+          if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+            C = ConstantFoldCompareInstOperands(CI->getPredicate(),
+                                                Operands[0], Operands[1], TD);
+          else
+            C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                         &Operands[0], Operands.size(), TD);
+          if (!C) return V;
+          return getSCEV(C);
+        }
+      }
+    }
+
+    // This is some other type of SCEVUnknown, just return it.
+    return V;
+  }
+
+  if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
+    // Avoid performing the look-up in the common case where the specified
+    // expression has no loop-variant portions.
+    for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
+      const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+      if (OpAtScope != Comm->getOperand(i)) {
+        // Okay, at least one of these operands is loop variant but might be
+        // foldable.  Build a new instance of the folded commutative expression.
+        SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+                                            Comm->op_begin()+i);
+        NewOps.push_back(OpAtScope);
+
+        for (++i; i != e; ++i) {
+          OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+          NewOps.push_back(OpAtScope);
+        }
+        if (isa<SCEVAddExpr>(Comm))
+          return getAddExpr(NewOps);
+        if (isa<SCEVMulExpr>(Comm))
+          return getMulExpr(NewOps);
+        if (isa<SCEVSMaxExpr>(Comm))
+          return getSMaxExpr(NewOps);
+        if (isa<SCEVUMaxExpr>(Comm))
+          return getUMaxExpr(NewOps);
+        llvm_unreachable("Unknown commutative SCEV type!");
+      }
+    }
+    // If we got here, all operands are loop invariant.
+    return Comm;
+  }
+
+  if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
+    const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+    const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
+    if (LHS == Div->getLHS() && RHS == Div->getRHS())
+      return Div;   // must be loop invariant
+    return getUDivExpr(LHS, RHS);
+  }
+
+  // If this is a loop recurrence for a loop that does not contain L, then we
+  // are dealing with the final value computed by the loop.
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+    // First, attempt to evaluate each operand.
+    // Avoid performing the look-up in the common case where the specified
+    // expression has no loop-variant portions.
+    for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+      const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
+      if (OpAtScope == AddRec->getOperand(i))
+        continue;
+
+      // Okay, at least one of these operands is loop variant but might be
+      // foldable.  Build a new instance of the folded commutative expression.
+      SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
+                                          AddRec->op_begin()+i);
+      NewOps.push_back(OpAtScope);
+      for (++i; i != e; ++i)
+        NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
+
+      const SCEV *FoldedRec =
+        getAddRecExpr(NewOps, AddRec->getLoop(),
+                      AddRec->getNoWrapFlags(SCEV::FlagNW));
+      AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+      // The addrec may be folded to a nonrecurrence, for example, if the
+      // induction variable is multiplied by zero after constant folding. Go
+      // ahead and return the folded value.
+      if (!AddRec)
+        return FoldedRec;
+      break;
+    }
+
+    // If the scope is outside the addrec's loop, evaluate it by using the
+    // loop exit value of the addrec.
+    if (!AddRec->getLoop()->contains(L)) {
+      // To evaluate this recurrence, we need to know how many times the AddRec
+      // loop iterates.  Compute this now.
+      const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+      if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
+
+      // Then, evaluate the AddRec.
+      return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
+    }
+
+    return AddRec;
+  }
+
+  if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getZeroExtendExpr(Op, Cast->getType());
+  }
+
+  if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getSignExtendExpr(Op, Cast->getType());
+  }
+
+  if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getTruncateExpr(Op, Cast->getType());
+  }
+
+  llvm_unreachable("Unknown SCEV type!");
+  return 0;
+}
+
+/// getSCEVAtScope - This is a convenience function which does
+/// getSCEVAtScope(getSCEV(V), L).
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+  return getSCEVAtScope(getSCEV(V), L);
+}
+
+/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
+/// following equation:
+///
+///     A * X = B (mod N)
+///
+/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
+/// A and B isn't important.
+///
+/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+                                               ScalarEvolution &SE) {
+  uint32_t BW = A.getBitWidth();
+  assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+  assert(A != 0 && "A must be non-zero.");
+
+  // 1. D = gcd(A, N)
+  //
+  // The gcd of A and N may have only one prime factor: 2. The number of
+  // trailing zeros in A is its multiplicity
+  uint32_t Mult2 = A.countTrailingZeros();
+  // D = 2^Mult2
+
+  // 2. Check if B is divisible by D.
+  //
+  // B is divisible by D if and only if the multiplicity of prime factor 2 for B
+  // is not less than multiplicity of this prime factor for D.
+  if (B.countTrailingZeros() < Mult2)
+    return SE.getCouldNotCompute();
+
+  // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
+  // modulo (N / D).
+  //
+  // (N / D) may need BW+1 bits in its representation.  Hence, we'll use this
+  // bit width during computations.
+  APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
+  APInt Mod(BW + 1, 0);
+  Mod.setBit(BW - Mult2);  // Mod = N / D
+  APInt I = AD.multiplicativeInverse(Mod);
+
+  // 4. Compute the minimum unsigned root of the equation:
+  // I * (B / D) mod (N / D)
+  APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
+
+  // The result is guaranteed to be less than 2^BW so we may truncate it to BW
+  // bits.
+  return SE.getConstant(Result.trunc(BW));
+}
+
+/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
+/// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
+/// might be the same) or two SCEVCouldNotCompute objects.
+///
+static std::pair<const SCEV *,const SCEV *>
+SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+  assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
+  const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
+  const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
+  const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+
+  // We currently can only solve this if the coefficients are constants.
+  if (!LC || !MC || !NC) {
+    const SCEV *CNC = SE.getCouldNotCompute();
+    return std::make_pair(CNC, CNC);
+  }
+
+  uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
+  const APInt &L = LC->getValue()->getValue();
+  const APInt &M = MC->getValue()->getValue();
+  const APInt &N = NC->getValue()->getValue();
+  APInt Two(BitWidth, 2);
+  APInt Four(BitWidth, 4);
+
+  {
+    using namespace APIntOps;
+    const APInt& C = L;
+    // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+    // The B coefficient is M-N/2
+    APInt B(M);
+    B -= sdiv(N,Two);
+
+    // The A coefficient is N/2
+    APInt A(N.sdiv(Two));
+
+    // Compute the B^2-4ac term.
+    APInt SqrtTerm(B);
+    SqrtTerm *= B;
+    SqrtTerm -= Four * (A * C);
+
+    // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+    // integer value or else APInt::sqrt() will assert.
+    APInt SqrtVal(SqrtTerm.sqrt());
+
+    // Compute the two solutions for the quadratic formula.
+    // The divisions must be performed as signed divisions.
+    APInt NegB(-B);
+    APInt TwoA( A << 1 );
+    if (TwoA.isMinValue()) {
+      const SCEV *CNC = SE.getCouldNotCompute();
+      return std::make_pair(CNC, CNC);
+    }
+
+    LLVMContext &Context = SE.getContext();
+
+    ConstantInt *Solution1 =
+      ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+    ConstantInt *Solution2 =
+      ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+
+    return std::make_pair(SE.getConstant(Solution1),
+                          SE.getConstant(Solution2));
+    } // end APIntOps namespace
+}
+
+/// HowFarToZero - Return the number of times a backedge comparing the specified
+/// value to zero will execute.  If not computable, return CouldNotCompute.
+///
+/// This is only used for loops with a "x != y" exit test. The exit condition is
+/// now expressed as a single expression, V = x-y. So the exit test is
+/// effectively V != 0.  We know and take advantage of the fact that this
+/// expression only being used in a comparison by zero context.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+  // If the value is a constant
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+    // If the value is already zero, the branch will execute zero times.
+    if (C->getValue()->isZero()) return C;
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
+  }
+
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+  if (!AddRec || AddRec->getLoop() != L)
+    return getCouldNotCompute();
+
+  // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+  // the quadratic equation to solve it.
+  if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+    std::pair<const SCEV *,const SCEV *> Roots =
+      SolveQuadraticEquation(AddRec, *this);
+    const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+    const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+    if (R1 && R2) {
+#if 0
+      dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
+             << "  sol#2: " << *R2 << "\n";
+#endif
+      // Pick the smallest positive root value.
+      if (ConstantInt *CB =
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+                                                      R1->getValue(),
+                                                      R2->getValue()))) {
+        if (CB->getZExtValue() == false)
+          std::swap(R1, R2);   // R1 is the minimum root now.
+
+        // We can only use this value if the chrec ends up with an exact zero
+        // value at this index.  When solving for "X*X != 5", for example, we
+        // should not accept a root of 2.
+        const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
+        if (Val->isZero())
+          return R1;  // We found a quadratic root!
+      }
+    }
+    return getCouldNotCompute();
+  }
+
+  // Otherwise we can only handle this if it is affine.
+  if (!AddRec->isAffine())
+    return getCouldNotCompute();
+
+  // If this is an affine expression, the execution count of this branch is
+  // the minimum unsigned root of the following equation:
+  //
+  //     Start + Step*N = 0 (mod 2^BW)
+  //
+  // equivalent to:
+  //
+  //             Step*N = -Start (mod 2^BW)
+  //
+  // where BW is the common bit width of Start and Step.
+
+  // Get the initial value for the loop.
+  const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+  const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+  // For now we handle only constant steps.
+  //
+  // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
+  // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
+  // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
+  // We have not yet seen any such cases.
+  const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+  if (StepC == 0)
+    return getCouldNotCompute();
+
+  // For positive steps (counting up until unsigned overflow):
+  //   N = -Start/Step (as unsigned)
+  // For negative steps (counting down to zero):
+  //   N = Start/-Step
+  // First compute the unsigned distance from zero in the direction of Step.
+  bool CountDown = StepC->getValue()->getValue().isNegative();
+  const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
+
+  // Handle unitary steps, which cannot wraparound.
+  // 1*N = -Start; -1*N = Start (mod 2^BW), so:
+  //   N = Distance (as unsigned)
+  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue())
+    return Distance;
+
+  // If the recurrence is known not to wraparound, unsigned divide computes the
+  // back edge count. We know that the value will either become zero (and thus
+  // the loop terminates), that the loop will terminate through some other exit
+  // condition first, or that the loop has undefined behavior.  This means
+  // we can't "miss" the exit value, even with nonunit stride.
+  //
+  // FIXME: Prove that loops always exhibits *acceptable* undefined
+  // behavior. Loops must exhibit defined behavior until a wrapped value is
+  // actually used. So the trip count computed by udiv could be smaller than the
+  // number of well-defined iterations.
+  if (AddRec->getNoWrapFlags(SCEV::FlagNW))
+    // FIXME: We really want an "isexact" bit for udiv.
+    return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+
+  // Then, try to solve the above equation provided that Start is constant.
+  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+    return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+                                        -StartC->getValue()->getValue(),
+                                        *this);
+  return getCouldNotCompute();
+}
+
+/// HowFarToNonZero - Return the number of times a backedge checking the
+/// specified value for nonzero will execute.  If not computable, return
+/// CouldNotCompute
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+  // Loops that look like: while (X == 0) are very strange indeed.  We don't
+  // handle them yet except for the trivial case.  This could be expanded in the
+  // future as needed.
+
+  // If the value is a constant, check to see if it is known to be non-zero
+  // already.  If so, the backedge will execute zero times.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+    if (!C->getValue()->isNullValue())
+      return getConstant(C->getType(), 0);
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
+  }
+
+  // We could implement others, but I really doubt anyone writes loops like
+  // this, and if they did, they would already be constant folded.
+  return getCouldNotCompute();
+}
+
+/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+/// (which may not be an immediate predecessor) which has exactly one
+/// successor from which BB is reachable, or null if no such block is
+/// found.
+///
+std::pair<BasicBlock *, BasicBlock *>
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+  // If the block has a unique predecessor, then there is no path from the
+  // predecessor to the block that does not go through the direct edge
+  // from the predecessor to the block.
+  if (BasicBlock *Pred = BB->getSinglePredecessor())
+    return std::make_pair(Pred, BB);
+
+  // A loop's header is defined to be a block that dominates the loop.
+  // If the header has a unique predecessor outside the loop, it must be
+  // a block that has exactly one successor that can reach the loop.
+  if (Loop *L = LI->getLoopFor(BB))
+    return std::make_pair(L->getLoopPredecessor(), L->getHeader());
+
+  return std::pair<BasicBlock *, BasicBlock *>();
+}
+
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
+  // Quick check to see if they are the same SCEV.
+  if (A == B) return true;
+
+  // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+  // two different instructions with the same value. Check for this case.
+  if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+    if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+      if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+        if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+          if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+            return true;
+
+  // Otherwise assume they may have a different value.
+  return false;
+}
+
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+                                           const SCEV *&LHS, const SCEV *&RHS) {
+  bool Changed = false;
+
+  // Canonicalize a constant to the right side.
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+    // Check for both operands constant.
+    if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+      if (ConstantExpr::getICmp(Pred,
+                                LHSC->getValue(),
+                                RHSC->getValue())->isNullValue())
+        goto trivially_false;
+      else
+        goto trivially_true;
+    }
+    // Otherwise swap the operands to put the constant on the right.
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+    Changed = true;
+  }
+
+  // If we're comparing an addrec with a value which is loop-invariant in the
+  // addrec's loop, put the addrec on the left. Also make a dominance check,
+  // as both operands could be addrecs loop-invariant in each other's loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+    const Loop *L = AR->getLoop();
+    if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      Changed = true;
+    }
+  }
+
+  // If there's a constant operand, canonicalize comparisons with boundary
+  // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+    const APInt &RA = RC->getValue()->getValue();
+    switch (Pred) {
+    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_NE:
+      break;
+    case ICmpInst::ICMP_UGE:
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_UGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_ULT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if ((RA - 1).isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SLT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinSignedValue()) {
+       Pred = ICmpInst::ICMP_EQ;
+       RHS = getConstant(RA - 1);
+        Changed = true;
+       break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_false;
+      break;
+    }
+  }
+
+  // Check for obvious equality.
+  if (HasSameValue(LHS, RHS)) {
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      goto trivially_true;
+    if (ICmpInst::isFalseWhenEqual(Pred))
+      goto trivially_false;
+  }
+
+  // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+  // adding or subtracting 1 from one of the operands.
+  switch (Pred) {
+  case ICmpInst::ICMP_SLE:
+    if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       SCEV::FlagNUW);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       SCEV::FlagNUW);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_UGE:
+    if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       SCEV::FlagNUW);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       SCEV::FlagNUW);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  // TODO: More simplifications are possible here.
+
+  return Changed;
+
+trivially_true:
+  // Return 0 == 0.
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+  Pred = ICmpInst::ICMP_EQ;
+  return true;
+
+trivially_false:
+  // Return 0 != 0.
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+  Pred = ICmpInst::ICMP_NE;
+  return true;
+}
+
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+  return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+  return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+  return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+  return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+  return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+                                       const SCEV *LHS, const SCEV *RHS) {
+  // Canonicalize the inputs first.
+  (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+  // If LHS or RHS is an addrec, check to see if the condition is true in
+  // every iteration of the loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, AR->getStart(), RHS) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
+      return true;
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getStart()) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
+      return true;
+
+  // Otherwise see what can be done with known constant ranges.
+  return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
+
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS) {
+  if (HasSameValue(LHS, RHS))
+    return ICmpInst::isTrueWhenEqual(Pred);
+
+  // This code is split out from isKnownPredicate because it is called from
+  // within isLoopEntryGuardedByCond.
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    break;
+  case ICmpInst::ICMP_SGT:
+    Pred = ICmpInst::ICMP_SLT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLT: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE:
+    Pred = ICmpInst::ICMP_SLE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLE: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGT:
+    Pred = ICmpInst::ICMP_ULT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULT: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGE:
+    Pred = ICmpInst::ICMP_ULE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULE: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_NE: {
+    if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+      return true;
+    if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+      return true;
+
+    const SCEV *Diff = getMinusSCEV(LHS, RHS);
+    if (isKnownNonZero(Diff))
+      return true;
+    break;
+  }
+  case ICmpInst::ICMP_EQ:
+    // The check at the top of the function catches the case where
+    // the values are known to be equal.
+    break;
+  }
+  return false;
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS.  This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+                                             ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return true;
+
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+
+  BranchInst *LoopContinuePredicate =
+    dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!LoopContinuePredicate ||
+      LoopContinuePredicate->isUnconditional())
+    return false;
+
+  return isImpliedCond(Pred, LHS, RHS,
+                       LoopContinuePredicate->getCondition(),
+                       LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+}
+
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS.  This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+                                          ICmpInst::Predicate Pred,
+                                          const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return false;
+
+  // Starting at the loop predecessor, climb up the predecessor chain, as long
+  // as there are predecessors that can be found that have unique successors
+  // leading to the original header.
+  for (std::pair<BasicBlock *, BasicBlock *>
+         Pair(L->getLoopPredecessor(), L->getHeader());
+       Pair.first;
+       Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
+
+    BranchInst *LoopEntryPredicate =
+      dyn_cast<BranchInst>(Pair.first->getTerminator());
+    if (!LoopEntryPredicate ||
+        LoopEntryPredicate->isUnconditional())
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS,
+                      LoopEntryPredicate->getCondition(),
+                      LoopEntryPredicate->getSuccessor(0) != Pair.second))
+      return true;
+  }
+
+  return false;
+}
+
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
+                                    const SCEV *LHS, const SCEV *RHS,
+                                    Value *FoundCondValue,
+                                    bool Inverse) {
+  // Recursively handle And and Or conditions.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
+    if (BO->getOpcode() == Instruction::And) {
+      if (!Inverse)
+        return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+               isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+    } else if (BO->getOpcode() == Instruction::Or) {
+      if (Inverse)
+        return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+               isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+    }
+  }
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
+  if (!ICI) return false;
+
+  // Bail if the ICmp's operands' types are wider than the needed type
+  // before attempting to call getSCEV on them. This avoids infinite
+  // recursion, since the analysis of widening casts can require loop
+  // exit condition information for overflow checking, which would
+  // lead back here.
+  if (getTypeSizeInBits(LHS->getType()) <
+      getTypeSizeInBits(ICI->getOperand(0)->getType()))
+    return false;
+
+  // Now that we found a conditional branch that dominates the loop, check to
+  // see if it is the comparison we are looking for.
+  ICmpInst::Predicate FoundPred;
+  if (Inverse)
+    FoundPred = ICI->getInversePredicate();
+  else
+    FoundPred = ICI->getPredicate();
+
+  const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+  const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+  // Balance the types. The case where FoundLHS' type is wider than
+  // LHS' type is checked for above.
+  if (getTypeSizeInBits(LHS->getType()) >
+      getTypeSizeInBits(FoundLHS->getType())) {
+    if (CmpInst::isSigned(Pred)) {
+      FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+    } else {
+      FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+    }
+  }
+
+  // Canonicalize the query to match the way instcombine will have
+  // canonicalized the comparison.
+  if (SimplifyICmpOperands(Pred, LHS, RHS))
+    if (LHS == RHS)
+      return CmpInst::isTrueWhenEqual(Pred);
+  if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+    if (FoundLHS == FoundRHS)
+      return CmpInst::isFalseWhenEqual(Pred);
+
+  // Check to see if we can make the LHS or RHS match.
+  if (LHS == FoundRHS || RHS == FoundLHS) {
+    if (isa<SCEVConstant>(RHS)) {
+      std::swap(FoundLHS, FoundRHS);
+      FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+    } else {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+    }
+  }
+
+  // Check whether the found predicate is the same as the desired predicate.
+  if (FoundPred == Pred)
+    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
+  // Check whether swapping the found predicate makes it the same as the
+  // desired predicate.
+  if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+    if (isa<SCEVConstant>(RHS))
+      return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+    else
+      return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+                                   RHS, LHS, FoundLHS, FoundRHS);
+  }
+
+  // Check whether the actual condition is beyond sufficient.
+  if (FoundPred == ICmpInst::ICMP_EQ)
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+  if (Pred == ICmpInst::ICMP_NE)
+    if (!ICmpInst::isTrueWhenEqual(FoundPred))
+      if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+
+  // Otherwise assume the worst.
+  return false;
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS,
+                                            const SCEV *FoundLHS,
+                                            const SCEV *FoundRHS) {
+  return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     FoundLHS, FoundRHS) ||
+         // ~x < ~y --> x > y
+         isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     getNotSCEV(FoundRHS),
+                                     getNotSCEV(FoundLHS));
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS,
+                                             const SCEV *FoundLHS,
+                                             const SCEV *FoundRHS) {
+  switch (Pred) {
+  default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_NE:
+    if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+      return true;
+    break;
+  }
+
+  return false;
+}
+
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
+                                        const SCEV *End,
+                                        const SCEV *Step,
+                                        bool NoWrap) {
+  assert(!isKnownNegative(Step) &&
+         "This code doesn't handle negative strides yet!");
+
+  const Type *Ty = Start->getType();
+
+  // When Start == End, we have an exact BECount == 0. Short-circuit this case
+  // here because SCEV may not be able to determine that the unsigned division
+  // after rounding is zero.
+  if (Start == End)
+    return getConstant(Ty, 0);
+
+  const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
+  const SCEV *Diff = getMinusSCEV(End, Start);
+  const SCEV *RoundUp = getAddExpr(Step, NegOne);
+
+  // Add an adjustment to the difference between End and Start so that
+  // the division will effectively round up.
+  const SCEV *Add = getAddExpr(Diff, RoundUp);
+
+  if (!NoWrap) {
+    // Check Add for unsigned overflow.
+    // TODO: More sophisticated things could be done here.
+    const Type *WideTy = IntegerType::get(getContext(),
+                                          getTypeSizeInBits(Ty) + 1);
+    const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
+    const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
+    const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
+    if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+      return getCouldNotCompute();
+  }
+
+  return getUDivExpr(Add, Step);
+}
+
+/// HowManyLessThans - Return the number of times a backedge containing the
+/// specified less-than comparison will execute.  If not computable, return
+/// CouldNotCompute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+                                  const Loop *L, bool isSigned) {
+  // Only handle:  "ADDREC < LoopInvariant".
+  if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!AddRec || AddRec->getLoop() != L)
+    return getCouldNotCompute();
+
+  // Check to see if we have a flag which makes analysis easy.
+  bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
+                           AddRec->getNoWrapFlags(SCEV::FlagNUW);
+
+  if (AddRec->isAffine()) {
+    unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+    const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+    if (Step->isZero())
+      return getCouldNotCompute();
+    if (Step->isOne()) {
+      // With unit stride, the iteration never steps past the limit value.
+    } else if (isKnownPositive(Step)) {
+      // Test whether a positive iteration can step past the limit
+      // value and past the maximum value for its type in a single step.
+      // Note that it's not sufficient to check NoWrap here, because even
+      // though the value after a wrap is undefined, it's not undefined
+      // behavior, so if wrap does occur, the loop could either terminate or
+      // loop infinitely, but in either case, the loop is guaranteed to
+      // iterate at least until the iteration where the wrapping occurs.
+      const SCEV *One = getConstant(Step->getType(), 1);
+      if (isSigned) {
+        APInt Max = APInt::getSignedMaxValue(BitWidth);
+        if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+              .slt(getSignedRange(RHS).getSignedMax()))
+          return getCouldNotCompute();
+      } else {
+        APInt Max = APInt::getMaxValue(BitWidth);
+        if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
+              .ult(getUnsignedRange(RHS).getUnsignedMax()))
+          return getCouldNotCompute();
+      }
+    } else
+      // TODO: Handle negative strides here and below.
+      return getCouldNotCompute();
+
+    // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
+    // m.  So, we count the number of iterations in which {n,+,s} < m is true.
+    // Note that we cannot simply return max(m-n,0)/s because it's not safe to
+    // treat m-n as signed nor unsigned due to overflow possibility.
+
+    // First, we get the value of the LHS in the first iteration: n
+    const SCEV *Start = AddRec->getOperand(0);
+
+    // Determine the minimum constant start value.
+    const SCEV *MinStart = getConstant(isSigned ?
+      getSignedRange(Start).getSignedMin() :
+      getUnsignedRange(Start).getUnsignedMin());
+
+    // If we know that the condition is true in order to enter the loop,
+    // then we know that it will run exactly (m-n)/s times. Otherwise, we
+    // only know that it will execute (max(m,n)-n)/s times. In both cases,
+    // the division must round up.
+    const SCEV *End = RHS;
+    if (!isLoopEntryGuardedByCond(L,
+                                  isSigned ? ICmpInst::ICMP_SLT :
+                                             ICmpInst::ICMP_ULT,
+                                  getMinusSCEV(Start, Step), RHS))
+      End = isSigned ? getSMaxExpr(RHS, Start)
+                     : getUMaxExpr(RHS, Start);
+
+    // Determine the maximum constant end value.
+    const SCEV *MaxEnd = getConstant(isSigned ?
+      getSignedRange(End).getSignedMax() :
+      getUnsignedRange(End).getUnsignedMax());
+
+    // If MaxEnd is within a step of the maximum integer value in its type,
+    // adjust it down to the minimum value which would produce the same effect.
+    // This allows the subsequent ceiling division of (N+(step-1))/step to
+    // compute the correct value.
+    const SCEV *StepMinusOne = getMinusSCEV(Step,
+                                            getConstant(Step->getType(), 1));
+    MaxEnd = isSigned ?
+      getSMinExpr(MaxEnd,
+                  getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
+                               StepMinusOne)) :
+      getUMinExpr(MaxEnd,
+                  getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
+                               StepMinusOne));
+
+    // Finally, we subtract these two values and divide, rounding up, to get
+    // the number of times the backedge is executed.
+    const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
+
+    // The maximum backedge count is similar, except using the minimum start
+    // value and the maximum end value.
+    // If we already have an exact constant BECount, use it instead.
+    const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
+      : getBECount(MinStart, MaxEnd, Step, NoWrap);
+
+    // If the stride is nonconstant, and NoWrap == true, then
+    // getBECount(MinStart, MaxEnd) may not compute. This would result in an
+    // exact BECount and invalid MaxBECount, which should be avoided to catch
+    // more optimization opportunities.
+    if (isa<SCEVCouldNotCompute>(MaxBECount))
+      MaxBECount = BECount;
+
+    return BackedgeTakenInfo(BECount, MaxBECount);
+  }
+
+  return getCouldNotCompute();
+}
+
+/// getNumIterationsInRange - Return the number of iterations of this loop that
+/// produce values in the specified constant range.  Another way of looking at
+/// this is that it returns the first iteration number where the value is not in
+/// the condition, thus computing the exit count. If the iteration count can't
+/// be computed, an instance of SCEVCouldNotCompute is returned.
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+                                                    ScalarEvolution &SE) const {
+  if (Range.isFullSet())  // Infinite loop.
+    return SE.getCouldNotCompute();
+
+  // If the start is a non-zero constant, shift the range to simplify things.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
+    if (!SC->getValue()->isZero()) {
+      SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
+      Operands[0] = SE.getConstant(SC->getType(), 0);
+      const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
+                                             getNoWrapFlags(FlagNW));
+      if (const SCEVAddRecExpr *ShiftedAddRec =
+            dyn_cast<SCEVAddRecExpr>(Shifted))
+        return ShiftedAddRec->getNumIterationsInRange(
+                           Range.subtract(SC->getValue()->getValue()), SE);
+      // This is strange and shouldn't happen.
+      return SE.getCouldNotCompute();
+    }
+
+  // The only time we can solve this is when we have all constant indices.
+  // Otherwise, we cannot determine the overflow conditions.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (!isa<SCEVConstant>(getOperand(i)))
+      return SE.getCouldNotCompute();
+
+
+  // Okay at this point we know that all elements of the chrec are constants and
+  // that the start element is zero.
+
+  // First check to see if the range contains zero.  If not, the first
+  // iteration exits.
+  unsigned BitWidth = SE.getTypeSizeInBits(getType());
+  if (!Range.contains(APInt(BitWidth, 0)))
+    return SE.getConstant(getType(), 0);
+
+  if (isAffine()) {
+    // If this is an affine expression then we have this situation:
+    //   Solve {0,+,A} in Range  ===  Ax in Range
+
+    // We know that zero is in the range.  If A is positive then we know that
+    // the upper value of the range must be the first possible exit value.
+    // If A is negative then the lower of the range is the last possible loop
+    // value.  Also note that we already checked for a full range.
+    APInt One(BitWidth,1);
+    APInt A     = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+    APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+
+    // The exit value should be (End+A)/A.
+    APInt ExitVal = (End + A).udiv(A);
+    ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
+
+    // Evaluate at the exit value.  If we really did fall out of the valid
+    // range, then we computed our trip count, otherwise wrap around or other
+    // things must have happened.
+    ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
+    if (Range.contains(Val->getValue()))
+      return SE.getCouldNotCompute();  // Something strange happened
+
+    // Ensure that the previous value is in the range.  This is a sanity check.
+    assert(Range.contains(
+           EvaluateConstantChrecAtConstant(this,
+           ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
+           "Linear scev computation is off in a bad way!");
+    return SE.getConstant(ExitValue);
+  } else if (isQuadratic()) {
+    // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
+    // quadratic equation to solve it.  To do this, we must frame our problem in
+    // terms of figuring out when zero is crossed, instead of when
+    // Range.getUpper() is crossed.
+    SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
+    NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
+                                             // getNoWrapFlags(FlagNW)
+                                             FlagAnyWrap);
+
+    // Next, solve the constructed addrec
+    std::pair<const SCEV *,const SCEV *> Roots =
+      SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+    const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+    const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+    if (R1) {
+      // Pick the smallest positive root value.
+      if (ConstantInt *CB =
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
+                         R1->getValue(), R2->getValue()))) {
+        if (CB->getZExtValue() == false)
+          std::swap(R1, R2);   // R1 is the minimum root now.
+
+        // Make sure the root is not off by one.  The returned iteration should
+        // not be in the range, but the previous one should be.  When solving
+        // for "X*X < 5", for example, we should not return a root of 2.
+        ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
+                                                             R1->getValue(),
+                                                             SE);
+        if (Range.contains(R1Val->getValue())) {
+          // The next iteration must be out of the range...
+          ConstantInt *NextVal =
+                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+
+          R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+          if (!Range.contains(R1Val->getValue()))
+            return SE.getConstant(NextVal);
+          return SE.getCouldNotCompute();  // Something strange happened
+        }
+
+        // If R1 was not in the range, then it is a good return value.  Make
+        // sure that R1-1 WAS in the range though, just in case.
+        ConstantInt *NextVal =
+               ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+        R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+        if (Range.contains(R1Val->getValue()))
+          return R1;
+        return SE.getCouldNotCompute();  // Something strange happened
+      }
+    }
+  }
+
+  return SE.getCouldNotCompute();
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                   SCEVCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void ScalarEvolution::SCEVCallbackVH::deleted() {
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+  if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
+    SE->ConstantEvolutionLoopExitValue.erase(PN);
+  SE->ValueExprMap.erase(getValPtr());
+  // this now dangles!
+}
+
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+
+  // Forget all the expressions associated with users of the old value,
+  // so that future queries will recompute the expressions using the new
+  // value.
+  Value *Old = getValPtr();
+  SmallVector<User *, 16> Worklist;
+  SmallPtrSet<User *, 8> Visited;
+  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+       UI != UE; ++UI)
+    Worklist.push_back(*UI);
+  while (!Worklist.empty()) {
+    User *U = Worklist.pop_back_val();
+    // Deleting the Old value will cause this to dangle. Postpone
+    // that until everything else is done.
+    if (U == Old)
+      continue;
+    if (!Visited.insert(U))
+      continue;
+    if (PHINode *PN = dyn_cast<PHINode>(U))
+      SE->ConstantEvolutionLoopExitValue.erase(PN);
+    SE->ValueExprMap.erase(U);
+    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+         UI != UE; ++UI)
+      Worklist.push_back(*UI);
+  }
+  // Delete the Old value.
+  if (PHINode *PN = dyn_cast<PHINode>(Old))
+    SE->ConstantEvolutionLoopExitValue.erase(PN);
+  SE->ValueExprMap.erase(Old);
+  // this now dangles!
+}
+
+ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
+  : CallbackVH(V), SE(se) {}
+
+//===----------------------------------------------------------------------===//
+//                   ScalarEvolution Class Implementation
+//===----------------------------------------------------------------------===//
+
+ScalarEvolution::ScalarEvolution()
+  : FunctionPass(ID), FirstUnknown(0) {
+  initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolution::runOnFunction(Function &F) {
+  this->F = &F;
+  LI = &getAnalysis<LoopInfo>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  DT = &getAnalysis<DominatorTree>();
+  return false;
+}
+
+void ScalarEvolution::releaseMemory() {
+  // Iterate through all the SCEVUnknown instances and call their
+  // destructors, so that they release their references to their values.
+  for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
+    U->~SCEVUnknown();
+  FirstUnknown = 0;
+
+  ValueExprMap.clear();
+  BackedgeTakenCounts.clear();
+  ConstantEvolutionLoopExitValue.clear();
+  ValuesAtScopes.clear();
+  LoopDispositions.clear();
+  BlockDispositions.clear();
+  UnsignedRanges.clear();
+  SignedRanges.clear();
+  UniqueSCEVs.clear();
+  SCEVAllocator.Reset();
+}
+
+void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<LoopInfo>();
+  AU.addRequiredTransitive<DominatorTree>();
+}
+
+bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
+  return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
+}
+
+static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
+                          const Loop *L) {
+  // Print all inner loops first
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    PrintLoopInfo(OS, SE, *I);
+
+  OS << "Loop ";
+  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  OS << ": ";
+
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  if (ExitBlocks.size() != 1)
+    OS << "<multiple exits> ";
+
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable backedge-taken count. ";
+  }
+
+  OS << "\n"
+        "Loop ";
+  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  OS << ": ";
+
+  if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+    OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable max backedge-taken count. ";
+  }
+
+  OS << "\n";
+}
+
+void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+  // ScalarEvolution's implementation of the print method is to print
+  // out SCEV values of all instructions that are interesting. Doing
+  // this potentially causes it to create new SCEV objects though,
+  // which technically conflicts with the const qualifier. This isn't
+  // observable from outside the class though, so casting away the
+  // const isn't dangerous.
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  OS << "Classifying expressions for: ";
+  WriteAsOperand(OS, F, /*PrintType=*/false);
+  OS << "\n";
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
+      OS << *I << '\n';
+      OS << "  -->  ";
+      const SCEV *SV = SE.getSCEV(&*I);
+      SV->print(OS);
+
+      const Loop *L = LI->getLoopFor((*I).getParent());
+
+      const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+      if (AtUse != SV) {
+        OS << "  -->  ";
+        AtUse->print(OS);
+      }
+
+      if (L) {
+        OS << "\t\t" "Exits: ";
+        const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+        if (!SE.isLoopInvariant(ExitValue, L)) {
+          OS << "<<Unknown>>";
+        } else {
+          OS << *ExitValue;
+        }
+      }
+
+      OS << "\n";
+    }
+
+  OS << "Determining loop execution counts for: ";
+  WriteAsOperand(OS, F, /*PrintType=*/false);
+  OS << "\n";
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    PrintLoopInfo(OS, &SE, *I);
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+  std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
+  std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
+    Values.insert(std::make_pair(L, LoopVariant));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  LoopDisposition D = computeLoopDisposition(S, L);
+  return LoopDispositions[S][L] = D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return LoopInvariant;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+    // If L is the addrec's loop, it's computable.
+    if (AR->getLoop() == L)
+      return LoopComputable;
+
+    // Add recurrences are never invariant in the function-body (null loop).
+    if (!L)
+      return LoopVariant;
+
+    // This recurrence is variant w.r.t. L if L contains AR's loop.
+    if (L->contains(AR->getLoop()))
+      return LoopVariant;
+
+    // This recurrence is invariant w.r.t. L if AR's loop contains L.
+    if (AR->getLoop()->contains(L))
+      return LoopInvariant;
+
+    // This recurrence is variant w.r.t. L if any of its operands
+    // are variant.
+    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I)
+      if (!isLoopInvariant(*I, L))
+        return LoopVariant;
+
+    // Otherwise it's loop-invariant.
+    return LoopInvariant;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    bool HasVarying = false;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      LoopDisposition D = getLoopDisposition(*I, L);
+      if (D == LoopVariant)
+        return LoopVariant;
+      if (D == LoopComputable)
+        HasVarying = true;
+    }
+    return HasVarying ? LoopComputable : LoopInvariant;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+    if (LD == LoopVariant)
+      return LoopVariant;
+    LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+    if (RD == LoopVariant)
+      return LoopVariant;
+    return (LD == LoopInvariant && RD == LoopInvariant) ?
+           LoopInvariant : LoopComputable;
+  }
+  case scUnknown:
+    // All non-instruction values are loop invariant.  All instructions are loop
+    // invariant if they are not contained in the specified loop.
+    // Instructions are never considered invariant in the function body
+    // (null loop) because they are defined within the "loop".
+    if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+      return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+    return LoopInvariant;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return LoopVariant;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return LoopVariant;
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
+  std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
+    Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  BlockDisposition D = computeBlockDisposition(S, BB);
+  return BlockDispositions[S][BB] = D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return ProperlyDominatesBlock;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+  case scAddRecExpr: {
+    // This uses a "dominates" query instead of "properly dominates" query
+    // to test for proper dominance too, because the instruction which
+    // produces the addrec's value is a PHI, and a PHI effectively properly
+    // dominates its entire containing block.
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+    if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+      return DoesNotDominateBlock;
+  }
+  // FALL THROUGH into SCEVNAryExpr handling.
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    bool Proper = true;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      BlockDisposition D = getBlockDisposition(*I, BB);
+      if (D == DoesNotDominateBlock)
+        return DoesNotDominateBlock;
+      if (D == DominatesBlock)
+        Proper = false;
+    }
+    return Proper ? ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+    BlockDisposition LD = getBlockDisposition(LHS, BB);
+    if (LD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    BlockDisposition RD = getBlockDisposition(RHS, BB);
+    if (RD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+      ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUnknown:
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+      if (I->getParent() == BB)
+        return DominatesBlock;
+      if (DT->properlyDominates(I->getParent(), BB))
+        return ProperlyDominatesBlock;
+      return DoesNotDominateBlock;
+    }
+    return ProperlyDominatesBlock;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return DoesNotDominateBlock;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return DoesNotDominateBlock;
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return false;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend: {
+    const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
+    const SCEV *CastOp = Cast->getOperand();
+    return Op == CastOp || hasOperand(CastOp, Op);
+  }
+  case scAddRecExpr:
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      const SCEV *NAryOp = *I;
+      if (NAryOp == Op || hasOperand(NAryOp, Op))
+        return true;
+    }
+    return false;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+    return LHS == Op || hasOperand(LHS, Op) ||
+           RHS == Op || hasOperand(RHS, Op);
+  }
+  case scUnknown:
+    return false;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return false;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return false;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+  ValuesAtScopes.erase(S);
+  LoopDispositions.erase(S);
+  BlockDispositions.erase(S);
+  UnsignedRanges.erase(S);
+  SignedRanges.erase(S);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 000000000000..e9edb3e083de
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,173 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependencies between different iterations.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses ScalarEvolution to answer queries.
+  class ScalarEvolutionAliasAnalysis : public FunctionPass,
+                                       public AliasAnalysis {
+    ScalarEvolution *SE;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+      initializeScalarEvolutionAliasAnalysisPass(
+        *PassRegistry::getPassRegistry());
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnFunction(Function &F);
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+
+    Value *GetBaseValue(const SCEV *S);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char ScalarEvolutionAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+                   "ScalarEvolution-based Alias Analysis", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+                    "ScalarEvolution-based Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
+  return new ScalarEvolutionAliasAnalysis();
+}
+
+void
+ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+bool
+ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
+  InitializeAliasAnalysis(this);
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+/// GetBaseValue - Given an expression, try to find a
+/// base value. Return null is none was found.
+Value *
+ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // In an addrec, assume that the base will be in the start, rather
+    // than the step.
+    return GetBaseValue(AR->getStart());
+  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // If there's a pointer operand, it'll be sorted at the end of the list.
+    const SCEV *Last = A->getOperand(A->getNumOperands()-1);
+    if (Last->getType()->isPointerTy())
+      return GetBaseValue(Last);
+  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // This is a leaf node.
+    return U->getValue();
+  }
+  // No Identified object found.
+  return 0;
+}
+
+AliasAnalysis::AliasResult
+ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
+                                    const Location &LocB) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are. This allows the code below to ignore this special
+  // case.
+  if (LocA.Size == 0 || LocB.Size == 0)
+    return NoAlias;
+
+  // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
+  const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
+  const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+
+  // If they evaluate to the same expression, it's a MustAlias.
+  if (AS == BS) return MustAlias;
+
+  // If something is known about the difference between the two addresses,
+  // see if it's enough to prove a NoAlias.
+  if (SE->getEffectiveSCEVType(AS->getType()) ==
+      SE->getEffectiveSCEVType(BS->getType())) {
+    unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+    APInt ASizeInt(BitWidth, LocA.Size);
+    APInt BSizeInt(BitWidth, LocB.Size);
+
+    // Compute the difference between the two pointers.
+    const SCEV *BA = SE->getMinusSCEV(BS, AS);
+
+    // Test whether the difference is known to be great enough that memory of
+    // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+    // are non-zero, which is special-cased above.
+    if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
+        (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+      return NoAlias;
+
+    // Folding the subtraction while preserving range information can be tricky
+    // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS
+    // and try again to see if things fold better that way.
+
+    // Compute the difference between the two pointers.
+    const SCEV *AB = SE->getMinusSCEV(AS, BS);
+
+    // Test whether the difference is known to be great enough that memory of
+    // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+    // are non-zero, which is special-cased above.
+    if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
+        (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+      return NoAlias;
+  }
+
+  // If ScalarEvolution can find an underlying object, form a new query.
+  // The correctness of this depends on ScalarEvolution not recognizing
+  // inttoptr and ptrtoint operators.
+  Value *AO = GetBaseValue(AS);
+  Value *BO = GetBaseValue(BS);
+  if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+    if (alias(Location(AO ? AO : LocA.Ptr,
+                       AO ? +UnknownSize : LocA.Size,
+                       AO ? 0 : LocA.TBAATag),
+              Location(BO ? BO : LocB.Ptr,
+                       BO ? +UnknownSize : LocB.Size,
+                       BO ? 0 : LocB.TBAATag)) == NoAlias)
+      return NoAlias;
+
+  // Forward the query to the next analysis.
+  return AliasAnalysis::alias(LocA, LocB);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
new file mode 100644
index 000000000000..befe6d2599d6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -0,0 +1,1403 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
+                                       Instruction::CastOps Op,
+                                       BasicBlock::iterator IP) {
+  // Check to see if there is already a cast!
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    if (U->getType() == Ty)
+      if (CastInst *CI = dyn_cast<CastInst>(U))
+        if (CI->getOpcode() == Op) {
+          // If the cast isn't where we want it, fix it.
+          if (BasicBlock::iterator(CI) != IP) {
+            // Create a new cast, and leave the old cast in place in case
+            // it is being used as an insert point. Clear its operand
+            // so that it doesn't hold anything live.
+            Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
+            NewCI->takeName(CI);
+            CI->replaceAllUsesWith(NewCI);
+            CI->setOperand(0, UndefValue::get(V->getType()));
+            rememberInstruction(NewCI);
+            return NewCI;
+          }
+          rememberInstruction(CI);
+          return CI;
+        }
+  }
+
+  // Create a new cast.
+  Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
+  rememberInstruction(I);
+  return I;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
+  Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+  assert((Op == Instruction::BitCast ||
+          Op == Instruction::PtrToInt ||
+          Op == Instruction::IntToPtr) &&
+         "InsertNoopCastOfTo cannot perform non-noop casts!");
+  assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+         "InsertNoopCastOfTo cannot change sizes!");
+
+  // Short-circuit unnecessary bitcasts.
+  if (Op == Instruction::BitCast && V->getType() == Ty)
+    return V;
+
+  // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+  if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+      SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+    if (CastInst *CI = dyn_cast<CastInst>(V))
+      if ((CI->getOpcode() == Instruction::PtrToInt ||
+           CI->getOpcode() == Instruction::IntToPtr) &&
+          SE.getTypeSizeInBits(CI->getType()) ==
+          SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+        return CI->getOperand(0);
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if ((CE->getOpcode() == Instruction::PtrToInt ||
+           CE->getOpcode() == Instruction::IntToPtr) &&
+          SE.getTypeSizeInBits(CE->getType()) ==
+          SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+        return CE->getOperand(0);
+  }
+
+  // Fold a cast of a constant.
+  if (Constant *C = dyn_cast<Constant>(V))
+    return ConstantExpr::getCast(Op, C, Ty);
+
+  // Cast the argument at the beginning of the entry block, after
+  // any bitcasts of other arguments.
+  if (Argument *A = dyn_cast<Argument>(V)) {
+    BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+    while ((isa<BitCastInst>(IP) &&
+            isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+            cast<BitCastInst>(IP)->getOperand(0) != A) ||
+           isa<DbgInfoIntrinsic>(IP))
+      ++IP;
+    return ReuseOrCreateCast(A, Ty, Op, IP);
+  }
+
+  // Cast the instruction immediately after the instruction.
+  Instruction *I = cast<Instruction>(V);
+  BasicBlock::iterator IP = I; ++IP;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+    IP = II->getNormalDest()->begin();
+  while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
+  return ReuseOrCreateCast(I, Ty, Op, IP);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+                                 Value *LHS, Value *RHS) {
+  // Fold a binop with constant operands.
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+  // Do a quick scan to see if we have this binop nearby.  If so, reuse it.
+  unsigned ScanLimit = 6;
+  BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+  // Scanning starts from the last instruction before the insertion point.
+  BasicBlock::iterator IP = Builder.GetInsertPoint();
+  if (IP != BlockBegin) {
+    --IP;
+    for (; ScanLimit; --IP, --ScanLimit) {
+      // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+      // generated code.
+      if (isa<DbgInfoIntrinsic>(IP))
+        ScanLimit++;
+      if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+          IP->getOperand(1) == RHS)
+        return IP;
+      if (IP == BlockBegin) break;
+    }
+  }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+  }
+
+  // If we haven't found this binop, insert it.
+  Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"));
+  BO->setDebugLoc(SaveInsertPt->getDebugLoc());
+  rememberInstruction(BO);
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
+/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
+/// check to see if the divide was folded.
+static bool FactorOutConstant(const SCEV *&S,
+                              const SCEV *&Remainder,
+                              const SCEV *Factor,
+                              ScalarEvolution &SE,
+                              const TargetData *TD) {
+  // Everything is divisible by one.
+  if (Factor->isOne())
+    return true;
+
+  // x/x == 1.
+  if (S == Factor) {
+    S = SE.getConstant(S->getType(), 1);
+    return true;
+  }
+
+  // For a Constant, check for a multiple of the given factor.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    // 0/x == 0.
+    if (C->isZero())
+      return true;
+    // Check for divisibility.
+    if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+      ConstantInt *CI =
+        ConstantInt::get(SE.getContext(),
+                         C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+      // If the quotient is zero and the remainder is non-zero, reject
+      // the value at this scale. It will be considered for subsequent
+      // smaller scales.
+      if (!CI->isZero()) {
+        const SCEV *Div = SE.getConstant(CI);
+        S = Div;
+        Remainder =
+          SE.getAddExpr(Remainder,
+                        SE.getConstant(C->getValue()->getValue().srem(
+                                                  FC->getValue()->getValue())));
+        return true;
+      }
+    }
+  }
+
+  // In a Mul, check if there is a constant operand which is a multiple
+  // of the given factor.
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    if (TD) {
+      // With TargetData, the size is known. Check if there is a constant
+      // operand which is a multiple of the given factor. If so, we can
+      // factor it.
+      const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+        if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+          NewMulOps[0] =
+            SE.getConstant(C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
+    } else {
+      // Without TargetData, check if Factor can be factored out of any of the
+      // Mul's operands. If so, we can just remove it.
+      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+        const SCEV *SOp = M->getOperand(i);
+        const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
+        if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+            Remainder->isZero()) {
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+          NewMulOps[i] = SOp;
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
+      }
+    }
+  }
+
+  // In an AddRec, check if both start and step are divisible.
+  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+    const SCEV *Step = A->getStepRecurrence(SE);
+    const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+    if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
+      return false;
+    if (!StepRem->isZero())
+      return false;
+    const SCEV *Start = A->getStart();
+    if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
+      return false;
+    // FIXME: can use A->getNoWrapFlags(FlagNW)
+    S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap);
+    return true;
+  }
+
+  return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+                                const Type *Ty,
+                                ScalarEvolution &SE) {
+  unsigned NumAddRecs = 0;
+  for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+    ++NumAddRecs;
+  // Group Ops into non-addrecs and addrecs.
+  SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+  SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+  // Let ScalarEvolution sort and simplify the non-addrecs list.
+  const SCEV *Sum = NoAddRecs.empty() ?
+                    SE.getConstant(Ty, 0) :
+                    SE.getAddExpr(NoAddRecs);
+  // If it returned an add, use the operands. Otherwise it simplified
+  // the sum into a single value, so just use that.
+  Ops.clear();
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+    Ops.append(Add->op_begin(), Add->op_end());
+  else if (!Sum->isZero())
+    Ops.push_back(Sum);
+  // Then append the addrecs.
+  Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+                         const Type *Ty,
+                         ScalarEvolution &SE) {
+  // Find the addrecs.
+  SmallVector<const SCEV *, 8> AddRecs;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+      const SCEV *Start = A->getStart();
+      if (Start->isZero()) break;
+      const SCEV *Zero = SE.getConstant(Ty, 0);
+      AddRecs.push_back(SE.getAddRecExpr(Zero,
+                                         A->getStepRecurrence(SE),
+                                         A->getLoop(),
+                                         // FIXME: A->getNoWrapFlags(FlagNW)
+                                         SCEV::FlagAnyWrap));
+      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+        Ops[i] = Zero;
+        Ops.append(Add->op_begin(), Add->op_end());
+        e += Add->getNumOperands();
+      } else {
+        Ops[i] = Start;
+      }
+    }
+  if (!AddRecs.empty()) {
+    // Add the addrecs onto the end of the list.
+    Ops.append(AddRecs.begin(), AddRecs.end());
+    // Resort the operand list, moving any constants to the front.
+    SimplifyAddOperands(Ops, Ty, SE);
+  }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+                                    const SCEV *const *op_end,
+                                    const PointerType *PTy,
+                                    const Type *Ty,
+                                    Value *V) {
+  const Type *ElTy = PTy->getElementType();
+  SmallVector<Value *, 4> GepIndices;
+  SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+  bool AnyNonZeroIndices = false;
+
+  // Split AddRecs up into parts as either of the parts may be usable
+  // without the other.
+  SplitAddRecs(Ops, Ty, SE);
+
+  // Descend down the pointer's type and attempt to convert the other
+  // operands into GEP indices, at each level. The first index in a GEP
+  // indexes into the array implied by the pointer operand; the rest of
+  // the indices index into the element or field type selected by the
+  // preceding index.
+  for (;;) {
+    // If the scale size is not 0, attempt to factor out a scale for
+    // array indexing.
+    SmallVector<const SCEV *, 8> ScaledOps;
+    if (ElTy->isSized()) {
+      const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+      if (!ElSize->isZero()) {
+        SmallVector<const SCEV *, 8> NewOps;
+        for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+          const SCEV *Op = Ops[i];
+          const SCEV *Remainder = SE.getConstant(Ty, 0);
+          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+            // Op now has ElSize factored out.
+            ScaledOps.push_back(Op);
+            if (!Remainder->isZero())
+              NewOps.push_back(Remainder);
+            AnyNonZeroIndices = true;
+          } else {
+            // The operand was not divisible, so add it to the list of operands
+            // we'll scan next iteration.
+            NewOps.push_back(Ops[i]);
+          }
+        }
+        // If we made any changes, update Ops.
+        if (!ScaledOps.empty()) {
+          Ops = NewOps;
+          SimplifyAddOperands(Ops, Ty, SE);
+        }
+      }
+    }
+
+    // Record the scaled array index for this level of the type. If
+    // we didn't find any operands that could be factored, tentatively
+    // assume that element zero was selected (since the zero offset
+    // would obviously be folded away).
+    Value *Scaled = ScaledOps.empty() ?
+                    Constant::getNullValue(Ty) :
+                    expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+    GepIndices.push_back(Scaled);
+
+    // Collect struct field index operands.
+    while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+      bool FoundFieldNo = false;
+      // An empty struct has no fields.
+      if (STy->getNumElements() == 0) break;
+      if (SE.TD) {
+        // With TargetData, field offsets are known. See if a constant offset
+        // falls within any of the struct fields.
+        if (Ops.empty()) break;
+        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+          if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+            const StructLayout &SL = *SE.TD->getStructLayout(STy);
+            uint64_t FullOffset = C->getValue()->getZExtValue();
+            if (FullOffset < SL.getSizeInBytes()) {
+              unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+              GepIndices.push_back(
+                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+              ElTy = STy->getTypeAtIndex(ElIdx);
+              Ops[0] =
+                SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+              AnyNonZeroIndices = true;
+              FoundFieldNo = true;
+            }
+          }
+      } else {
+        // Without TargetData, just check for an offsetof expression of the
+        // appropriate struct type.
+        for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+          if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
+            const Type *CTy;
+            Constant *FieldNo;
+            if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
+              GepIndices.push_back(FieldNo);
+              ElTy =
+                STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
+              Ops[i] = SE.getConstant(Ty, 0);
+              AnyNonZeroIndices = true;
+              FoundFieldNo = true;
+              break;
+            }
+          }
+      }
+      // If no struct field offsets were found, tentatively assume that
+      // field zero was selected (since the zero offset would obviously
+      // be folded away).
+      if (!FoundFieldNo) {
+        ElTy = STy->getTypeAtIndex(0u);
+        GepIndices.push_back(
+          Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+      }
+    }
+
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+      ElTy = ATy->getElementType();
+    else
+      break;
+  }
+
+  // If none of the operands were convertible to proper GEP indices, cast
+  // the base to i8* and do an ugly getelementptr with that. It's still
+  // better than ptrtoint+arithmetic+inttoptr at least.
+  if (!AnyNonZeroIndices) {
+    // Cast the base to i8*.
+    V = InsertNoopCastOfTo(V,
+       Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+    // Expand the operands for a plain byte offset.
+    Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+
+    // Fold a GEP with constant operands.
+    if (Constant *CLHS = dyn_cast<Constant>(V))
+      if (Constant *CRHS = dyn_cast<Constant>(Idx))
+        return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
+
+    // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
+    unsigned ScanLimit = 6;
+    BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+    // Scanning starts from the last instruction before the insertion point.
+    BasicBlock::iterator IP = Builder.GetInsertPoint();
+    if (IP != BlockBegin) {
+      --IP;
+      for (; ScanLimit; --IP, --ScanLimit) {
+        // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+        // generated code.
+        if (isa<DbgInfoIntrinsic>(IP))
+          ScanLimit++;
+        if (IP->getOpcode() == Instruction::GetElementPtr &&
+            IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+          return IP;
+        if (IP == BlockBegin) break;
+      }
+    }
+
+    // Save the original insertion point so we can restore it when we're done.
+    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+    // Move the insertion point out of as many loops as we can.
+    while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+      if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+      BasicBlock *Preheader = L->getLoopPreheader();
+      if (!Preheader) break;
+
+      // Ok, move up a level.
+      Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+    }
+
+    // Emit a GEP.
+    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+    rememberInstruction(GEP);
+
+    // Restore the original insert point.
+    if (SaveInsertBB)
+      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+    return GEP;
+  }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(V)) break;
+
+    bool AnyIndexNotLoopInvariant = false;
+    for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+         E = GepIndices.end(); I != E; ++I)
+      if (!L->isLoopInvariant(*I)) {
+        AnyIndexNotLoopInvariant = true;
+        break;
+      }
+    if (AnyIndexNotLoopInvariant)
+      break;
+
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+  }
+
+  // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+  // because ScalarEvolution may have changed the address arithmetic to
+  // compute a value which is beyond the end of the allocated object.
+  Value *Casted = V;
+  if (V->getType() != PTy)
+    Casted = InsertNoopCastOfTo(Casted, PTy);
+  Value *GEP = Builder.CreateGEP(Casted,
+                                 GepIndices.begin(),
+                                 GepIndices.end(),
+                                 "scevgep");
+  Ops.push_back(SE.getUnknown(GEP));
+  rememberInstruction(GEP);
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  return expand(SE.getAddExpr(Ops));
+}
+
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+static bool isNonConstantNegative(const SCEV *F) {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getValue()->getValue().isNegative();
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+                                        DominatorTree &DT) {
+  if (!A) return B;
+  if (!B) return A;
+  if (A->contains(B)) return B;
+  if (B->contains(A)) return A;
+  if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+  if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+  return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+  // Test whether we've already computed the most relevant loop for this SCEV.
+  std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
+    RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  if (isa<SCEVConstant>(S))
+    // A constant has no relevant loops.
+    return 0;
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+      return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+    // A non-instruction has no relevant loops.
+    return 0;
+  }
+  if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+    const Loop *L = 0;
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      L = AR->getLoop();
+    for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+         I != E; ++I)
+      L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+    return RelevantLoops[N] = L;
+  }
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+    const Loop *Result = getRelevantLoop(C->getOperand());
+    return RelevantLoops[C] = Result;
+  }
+  if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+    const Loop *Result =
+      PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
+                           getRelevantLoop(D->getRHS()),
+                           *SE.DT);
+    return RelevantLoops[D] = Result;
+  }
+  llvm_unreachable("Unexpected SCEV type!");
+  return 0;
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+  DominatorTree &DT;
+public:
+  explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+  bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+                  std::pair<const Loop *, const SCEV *> RHS) const {
+    // Keep pointer operands sorted at the end.
+    if (LHS.second->getType()->isPointerTy() !=
+        RHS.second->getType()->isPointerTy())
+      return LHS.second->getType()->isPointerTy();
+
+    // Compare loops with PickMostRelevantLoop.
+    if (LHS.first != RHS.first)
+      return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+    // If one operand is a non-constant negative and the other is not,
+    // put the non-constant negative on the right so that a sub can
+    // be used instead of a negate and add.
+    if (isNonConstantNegative(LHS.second)) {
+      if (!isNonConstantNegative(RHS.second))
+        return false;
+    } else if (isNonConstantNegative(RHS.second))
+      return true;
+
+    // Otherwise they are equivalent according to this comparison.
+    return false;
+  }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  // Collect all the add operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal, and
+  // so that pointer operands are inserted first, which the code below relies on
+  // to form more involved GEPs.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants and
+  // pointer operands precede non-pointer operands.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+  // Emit instructions to add all the operands. Hoist as much as possible
+  // out of loops, and form meaningful getelementptrs where possible.
+  Value *Sum = 0;
+  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+    const Loop *CurLoop = I->first;
+    const SCEV *Op = I->second;
+    if (!Sum) {
+      // This is the first operand. Just expand it.
+      Sum = expand(Op);
+      ++I;
+    } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+      // The running sum expression is a pointer. Try to form a getelementptr
+      // at this level with that as the base.
+      SmallVector<const SCEV *, 4> NewOps;
+      for (; I != E && I->first == CurLoop; ++I) {
+        // If the operand is SCEVUnknown and not instructions, peek through
+        // it, to enable more of it to be folded into the GEP.
+        const SCEV *X = I->second;
+        if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+          if (!isa<Instruction>(U->getValue()))
+            X = SE.getSCEV(U->getValue());
+        NewOps.push_back(X);
+      }
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+    } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+      // The running sum is an integer, and there's a pointer at this level.
+      // Try to form a getelementptr. If the running sum is instructions,
+      // use a SCEVUnknown to avoid re-analyzing them.
+      SmallVector<const SCEV *, 4> NewOps;
+      NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+                                               SE.getSCEV(Sum));
+      for (++I; I != E && I->first == CurLoop; ++I)
+        NewOps.push_back(I->second);
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+    } else if (isNonConstantNegative(Op)) {
+      // Instead of doing a negate and add, just do a subtract.
+      Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      Sum = InsertBinop(Instruction::Sub, Sum, W);
+      ++I;
+    } else {
+      // A simple add.
+      Value *W = expandCodeFor(Op, Ty);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Sum)) std::swap(Sum, W);
+      Sum = InsertBinop(Instruction::Add, Sum, W);
+      ++I;
+    }
+  }
+
+  return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  // Collect all the mul operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+  // Emit instructions to mul all the operands. Hoist as much as possible
+  // out of loops.
+  Value *Prod = 0;
+  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+    const SCEV *Op = I->second;
+    if (!Prod) {
+      // This is the first operand. Just expand it.
+      Prod = expand(Op);
+      ++I;
+    } else if (Op->isAllOnesValue()) {
+      // Instead of doing a multiply by negative one, just do a negate.
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+      ++I;
+    } else {
+      // A simple mul.
+      Value *W = expandCodeFor(Op, Ty);
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Prod)) std::swap(Prod, W);
+      Prod = InsertBinop(Instruction::Mul, Prod, W);
+      ++I;
+    }
+  }
+
+  return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  Value *LHS = expandCodeFor(S->getLHS(), Ty);
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+    const APInt &RHS = SC->getValue()->getValue();
+    if (RHS.isPowerOf2())
+      return InsertBinop(Instruction::LShr, LHS,
+                         ConstantInt::get(Ty, RHS.logBase2()));
+  }
+
+  Value *RHS = expandCodeFor(S->getRHS(), Ty);
+  return InsertBinop(Instruction::UDiv, LHS, RHS);
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
+                              ScalarEvolution &SE) {
+  while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+    Base = A->getStart();
+    Rest = SE.getAddExpr(Rest,
+                         SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
+                                          A->getStepRecurrence(SE),
+                                          A->getLoop(),
+                                          // FIXME: A->getNoWrapFlags(FlagNW)
+                                          SCEV::FlagAnyWrap));
+  }
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+    Base = A->getOperand(A->getNumOperands()-1);
+    SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+    NewAddOps.back() = Rest;
+    Rest = SE.getAddExpr(NewAddOps);
+    ExposePointerBase(Base, Rest, SE);
+  }
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+                                        const Loop *L,
+                                        const Type *ExpandTy,
+                                        const Type *IntTy) {
+  assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
+  // Reuse a previously-inserted PHI, if present.
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    if (SE.isSCEVable(PN->getType()) &&
+        (SE.getEffectiveSCEVType(PN->getType()) ==
+         SE.getEffectiveSCEVType(Normalized->getType())) &&
+        SE.getSCEV(PN) == Normalized)
+      if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+        Instruction *IncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+        // Determine if this is a well-behaved chain of instructions leading
+        // back to the PHI. It probably will be, if we're scanning an inner
+        // loop already visited by LSR for example, but it wouldn't have
+        // to be.
+        do {
+          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+              (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
+            IncV = 0;
+            break;
+          }
+          // If any of the operands don't dominate the insert position, bail.
+          // Addrec operands are always loop-invariant, so this can only happen
+          // if there are instructions which haven't been hoisted.
+          if (L == IVIncInsertLoop) {
+            for (User::op_iterator OI = IncV->op_begin()+1,
+                   OE = IncV->op_end(); OI != OE; ++OI)
+              if (Instruction *OInst = dyn_cast<Instruction>(OI))
+                if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+                  IncV = 0;
+                  break;
+                }
+          }
+          if (!IncV)
+            break;
+          // Advance to the next instruction.
+          IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+          if (!IncV)
+            break;
+          if (IncV->mayHaveSideEffects()) {
+            IncV = 0;
+            break;
+          }
+        } while (IncV != PN);
+
+        if (IncV) {
+          // Ok, the add recurrence looks usable.
+          // Remember this PHI, even in post-inc mode.
+          InsertedValues.insert(PN);
+          // Remember the increment.
+          IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+          rememberInstruction(IncV);
+          if (L == IVIncInsertLoop)
+            do {
+              if (SE.DT->dominates(IncV, IVIncInsertPos))
+                break;
+              // Make sure the increment is where we want it. But don't move it
+              // down past a potential existing post-inc user.
+              IncV->moveBefore(IVIncInsertPos);
+              IVIncInsertPos = IncV;
+              IncV = cast<Instruction>(IncV->getOperand(0));
+            } while (IncV != PN);
+          return PN;
+        }
+      }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Expand code for the start value.
+  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+                                L->getHeader()->begin());
+
+  // StartV must be hoisted into L's preheader to dominate the new phi.
+  assert(!isa<Instruction>(StartV) ||
+         SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
+                                  L->getHeader()));
+
+  // Expand code for the step value. Insert instructions right before the
+  // terminator corresponding to the back-edge. Do this before creating the PHI
+  // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
+  // negative, insert a sub instead of an add for the increment (unless it's a
+  // constant, because subtracts of constants are canonicalized to adds).
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  bool isPointer = ExpandTy->isPointerTy();
+  bool isNegative = !isPointer && isNonConstantNegative(Step);
+  if (isNegative)
+    Step = SE.getNegativeSCEV(Step);
+  Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+
+  // Create the PHI.
+  BasicBlock *Header = L->getHeader();
+  Builder.SetInsertPoint(Header, Header->begin());
+  pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+                                  Twine(IVName) + ".iv");
+  rememberInstruction(PN);
+
+  // Create the step instructions and populate the PHI.
+  for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+    BasicBlock *Pred = *HPI;
+
+    // Add a start value.
+    if (!L->contains(Pred)) {
+      PN->addIncoming(StartV, Pred);
+      continue;
+    }
+
+    // Create a step value and add it to the PHI. If IVIncInsertLoop is
+    // non-null and equal to the addrec's loop, insert the instructions
+    // at IVIncInsertPos.
+    Instruction *InsertPos = L == IVIncInsertLoop ?
+      IVIncInsertPos : Pred->getTerminator();
+    Builder.SetInsertPoint(InsertPos);
+    Value *IncV;
+    // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+    if (isPointer) {
+      const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+      // If the step isn't constant, don't use an implicitly scaled GEP, because
+      // that would require a multiply inside the loop.
+      if (!isa<ConstantInt>(StepV))
+        GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+                                    GEPPtrTy->getAddressSpace());
+      const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+      IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+      if (IncV->getType() != PN->getType()) {
+        IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
+        rememberInstruction(IncV);
+      }
+    } else {
+      IncV = isNegative ?
+        Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+        Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+      rememberInstruction(IncV);
+    }
+    PN->addIncoming(IncV, Pred);
+  }
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  // Remember this PHI, even in post-inc mode.
+  InsertedValues.insert(PN);
+
+  return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+  const Type *STy = S->getType();
+  const Type *IntTy = SE.getEffectiveSCEVType(STy);
+  const Loop *L = S->getLoop();
+
+  // Determine a normalized form of this expression, which is the expression
+  // before any post-inc adjustment is made.
+  const SCEVAddRecExpr *Normalized = S;
+  if (PostIncLoops.count(L)) {
+    PostIncLoopSet Loops;
+    Loops.insert(L);
+    Normalized =
+      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+                                                  Loops, SE, *SE.DT));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec start.
+  const SCEV *Start = Normalized->getStart();
+  const SCEV *PostLoopOffset = 0;
+  if (!SE.properlyDominates(Start, L->getHeader())) {
+    PostLoopOffset = Start;
+    Start = SE.getConstant(Normalized->getType(), 0);
+    Normalized = cast<SCEVAddRecExpr>(
+      SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+                       Normalized->getLoop(),
+                       // FIXME: Normalized->getNoWrapFlags(FlagNW)
+                       SCEV::FlagAnyWrap));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec step.
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  const SCEV *PostLoopScale = 0;
+  if (!SE.dominates(Step, L->getHeader())) {
+    PostLoopScale = Step;
+    Step = SE.getConstant(Normalized->getType(), 1);
+    Normalized =
+      cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
+                                            Normalized->getLoop(),
+                                            // FIXME: Normalized
+                                            // ->getNoWrapFlags(FlagNW)
+                                            SCEV::FlagAnyWrap));
+  }
+
+  // Expand the core addrec. If we need post-loop scaling, force it to
+  // expand to an integer type to avoid the need for additional casting.
+  const Type *ExpandTy = PostLoopScale ? IntTy : STy;
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+
+  // Accommodate post-inc mode, if necessary.
+  Value *Result;
+  if (!PostIncLoops.count(L))
+    Result = PN;
+  else {
+    // In PostInc mode, use the post-incremented value.
+    BasicBlock *LatchBlock = L->getLoopLatch();
+    assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+    Result = PN->getIncomingValueForBlock(LatchBlock);
+  }
+
+  // Re-apply any non-loop-dominating scale.
+  if (PostLoopScale) {
+    Result = InsertNoopCastOfTo(Result, IntTy);
+    Result = Builder.CreateMul(Result,
+                               expandCodeFor(PostLoopScale, IntTy));
+    rememberInstruction(Result);
+  }
+
+  // Re-apply any non-loop-dominating offset.
+  if (PostLoopOffset) {
+    if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+      const SCEV *const OffsetArray[1] = { PostLoopOffset };
+      Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+    } else {
+      Result = InsertNoopCastOfTo(Result, IntTy);
+      Result = Builder.CreateAdd(Result,
+                                 expandCodeFor(PostLoopOffset, IntTy));
+      rememberInstruction(Result);
+    }
+  }
+
+  return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+  if (!CanonicalMode) return expandAddRecExprLiterally(S);
+
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  const Loop *L = S->getLoop();
+
+  // First check for an existing canonical IV in a suitable type.
+  PHINode *CanonicalIV = 0;
+  if (PHINode *PN = L->getCanonicalInductionVariable())
+    if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+      CanonicalIV = PN;
+
+  // Rewrite an AddRec in terms of the canonical induction variable, if
+  // its type is more narrow.
+  if (CanonicalIV &&
+      SE.getTypeSizeInBits(CanonicalIV->getType()) >
+      SE.getTypeSizeInBits(Ty)) {
+    SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+    for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+      NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+                                       // FIXME: S->getNoWrapFlags(FlagNW)
+                                       SCEV::FlagAnyWrap));
+    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+    BasicBlock::iterator NewInsertPt =
+      llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+    while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
+      ++NewInsertPt;
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+                      NewInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+    return V;
+  }
+
+  // {X,+,F} --> X + {0,+,F}
+  if (!S->getStart()->isZero()) {
+    SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+    NewOps[0] = SE.getConstant(Ty, 0);
+    // FIXME: can use S->getNoWrapFlags()
+    const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap);
+
+    // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+    // comments on expandAddToGEP for details.
+    const SCEV *Base = S->getStart();
+    const SCEV *RestArray[1] = { Rest };
+    // Dig into the expression to find the pointer base for a GEP.
+    ExposePointerBase(Base, RestArray[0], SE);
+    // If we found a pointer, expand the AddRec with a GEP.
+    if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+      // Make sure the Base isn't something exotic, such as a multiplied
+      // or divided pointer value. In those cases, the result type isn't
+      // actually a pointer type.
+      if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+        Value *StartV = expand(Base);
+        assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+        return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+      }
+    }
+
+    // Just do a normal add. Pre-expand the operands to suppress folding.
+    return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+                                SE.getUnknown(expand(Rest))));
+  }
+
+  // If we don't yet have a canonical IV, create one.
+  if (!CanonicalIV) {
+    // Create and insert the PHI node for the induction variable in the
+    // specified loop.
+    BasicBlock *Header = L->getHeader();
+    pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+    CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+                                  Header->begin());
+    rememberInstruction(CanonicalIV);
+
+    Constant *One = ConstantInt::get(Ty, 1);
+    for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+      BasicBlock *HP = *HPI;
+      if (L->contains(HP)) {
+        // Insert a unit add instruction right before the terminator
+        // corresponding to the back-edge.
+        Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+                                                     "indvar.next",
+                                                     HP->getTerminator());
+        Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
+        rememberInstruction(Add);
+        CanonicalIV->addIncoming(Add, HP);
+      } else {
+        CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+      }
+    }
+  }
+
+  // {0,+,1} --> Insert a canonical induction variable into the loop!
+  if (S->isAffine() && S->getOperand(1)->isOne()) {
+    assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+           "IVs with types different from the canonical IV should "
+           "already have been handled!");
+    return CanonicalIV;
+  }
+
+  // {0,+,F} --> {0,+,1} * F
+
+  // If this is a simple linear addrec, emit it now as a special case.
+  if (S->isAffine())    // {0,+,F} --> i*F
+    return
+      expand(SE.getTruncateOrNoop(
+        SE.getMulExpr(SE.getUnknown(CanonicalIV),
+                      SE.getNoopOrAnyExtend(S->getOperand(1),
+                                            CanonicalIV->getType())),
+        Ty));
+
+  // If this is a chain of recurrences, turn it into a closed form, using the
+  // folders, then expandCodeFor the closed form.  This allows the folders to
+  // simplify the expression without having to build a bunch of special code
+  // into this folder.
+  const SCEV *IH = SE.getUnknown(CanonicalIV);   // Get I as a "symbolic" SCEV.
+
+  // Promote S up to the canonical IV type, if the cast is foldable.
+  const SCEV *NewS = S;
+  const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+  if (isa<SCEVAddRecExpr>(Ext))
+    NewS = Ext;
+
+  const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+  //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
+
+  // Truncate the result down to the original type, if needed.
+  const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+  return expand(T);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateTrunc(V, Ty, "tmp");
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateZExt(V, Ty, "tmp");
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateSExt(V, Ty, "tmp");
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
+                                   Instruction *I) {
+  BasicBlock::iterator IP = I;
+  while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
+    ++IP;
+  Builder.SetInsertPoint(IP->getParent(), IP);
+  return expandCodeFor(SH, Ty);
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
+  // Expand the code for this SCEV.
+  Value *V = expand(SH);
+  if (Ty) {
+    assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+           "non-trivial casts should be done with the SCEVs directly!");
+    V = InsertNoopCastOfTo(V, Ty);
+  }
+  return V;
+}
+
+Value *SCEVExpander::expand(const SCEV *S) {
+  // Compute an insertion point for this SCEV object. Hoist the instructions
+  // as far out in the loop nest as possible.
+  Instruction *InsertPt = Builder.GetInsertPoint();
+  for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+       L = L->getParentLoop())
+    if (SE.isLoopInvariant(S, L)) {
+      if (!L) break;
+      if (BasicBlock *Preheader = L->getLoopPreheader())
+        InsertPt = Preheader->getTerminator();
+    } else {
+      // If the SCEV is computable at this level, insert it into the header
+      // after the PHIs (and after any other instructions that we've inserted
+      // there) so that it is guaranteed to dominate any user inside the loop.
+      if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+        InsertPt = L->getHeader()->getFirstNonPHI();
+      while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
+        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+      break;
+    }
+
+  // Check to see if we already expanded this here.
+  std::map<std::pair<const SCEV *, Instruction *>,
+           AssertingVH<Value> >::iterator I =
+    InsertedExpressions.find(std::make_pair(S, InsertPt));
+  if (I != InsertedExpressions.end())
+    return I->second;
+
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+
+  // Expand the expression into instructions.
+  Value *V = visit(S);
+
+  // Remember the expanded value for this SCEV at this location.
+  if (PostIncLoops.empty())
+    InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+
+  restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+  return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+  if (!PostIncLoops.empty())
+    InsertedPostIncValues.insert(I);
+  else
+    InsertedValues.insert(I);
+
+  // If we just claimed an existing instruction and that instruction had
+  // been the insert point, adjust the insert point forward so that
+  // subsequently inserted code will be dominated.
+  if (Builder.GetInsertPoint() == I) {
+    BasicBlock::iterator It = cast<Instruction>(I);
+    do { ++It; } while (isInsertedInstruction(It) ||
+                        isa<DbgInfoIntrinsic>(It));
+    Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
+  }
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+  // If we acquired more instructions since the old insert point was saved,
+  // advance past them.
+  while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
+
+  Builder.SetInsertPoint(BB, I);
+}
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none).  A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+PHINode *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+                                                    const Type *Ty) {
+  assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+  // Build a SCEV for {0,+,1}<L>.
+  // Conservatively use FlagAnyWrap for now.
+  const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+                                   SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+
+  // Emit code for it.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  return V;
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 000000000000..60e630aaab88
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,184 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User)) return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast<PHINode>(User);
+  if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of Operand by the PHI node.  If any use corresponds
+  // to a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == Operand &&
+        !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+      return false;
+
+  // Okay, all uses of Operand by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+    return S;
+
+  if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+    const SCEV *O = X->getOperand();
+    const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                           Loops, SE, DT);
+    if (O != N)
+      switch (S->getSCEVType()) {
+      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+      case scTruncate: return SE.getTruncateExpr(N, S->getType());
+      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+      }
+    return S;
+  }
+
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // An addrec. This is the interesting part.
+    SmallVector<const SCEV *, 8> Operands;
+    const Loop *L = AR->getLoop();
+    // The addrec conceptually uses its operands at loop entry.
+    Instruction *LUser = L->getHeader()->begin();
+    // Transform each operand.
+    for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
+      Operands.push_back(N);
+    }
+    // Conservatively use AnyWrap until/unless we need FlagNW.
+    const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
+    switch (Kind) {
+    default: llvm_unreachable("Unexpected transform name!");
+    case NormalizeAutodetect:
+      if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+        const SCEV *TransformedStep =
+          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                 User, OperandValToReplace, Loops, SE, DT);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+        Loops.insert(L);
+      }
+#if 0
+      // This assert is conceptually correct, but ScalarEvolution currently
+      // sometimes fails to canonicalize two equal SCEVs to exactly the same
+      // form. It's possibly a pessimization when this happens, but it isn't a
+      // correctness problem, so disable this assert for now.
+      assert(S == TransformForPostIncUse(Denormalize, Result,
+                                         User, OperandValToReplace,
+                                         Loops, SE, DT) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Normalize:
+      if (Loops.count(L)) {
+        const SCEV *TransformedStep =
+          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                 User, OperandValToReplace, Loops, SE, DT);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+      }
+#if 0
+      // See the comment on the assert above.
+      assert(S == TransformForPostIncUse(Denormalize, Result,
+                                         User, OperandValToReplace,
+                                         Loops, SE, DT) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Denormalize:
+      if (Loops.count(L))
+        Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+      break;
+    }
+    return Result;
+  }
+
+  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+    SmallVector<const SCEV *, 8> Operands;
+    bool Changed = false;
+    // Transform each operand.
+    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                             Loops, SE, DT);
+      Changed |= N != O;
+      Operands.push_back(N);
+    }
+    // If any operand actually changed, return a transformed result.
+    if (Changed)
+      switch (S->getSCEVType()) {
+      case scAddExpr: return SE.getAddExpr(Operands);
+      case scMulExpr: return SE.getMulExpr(Operands);
+      case scSMaxExpr: return SE.getSMaxExpr(Operands);
+      case scUMaxExpr: return SE.getUMaxExpr(Operands);
+      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+      }
+    return S;
+  }
+
+  if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+    const SCEV *LO = X->getLHS();
+    const SCEV *RO = X->getRHS();
+    const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    if (LO != LN || RO != RN)
+      return SE.getUDivExpr(LN, RN);
+    return S;
+  }
+
+  llvm_unreachable("Unexpected SCEV kind!");
+  return 0;
+}
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
new file mode 100644
index 000000000000..d8c207b4bd49
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -0,0 +1,347 @@
+//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparseprop"
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                  AbstractLatticeFunction Implementation
+//===----------------------------------------------------------------------===//
+
+AbstractLatticeFunction::~AbstractLatticeFunction() {}
+
+/// PrintValue - Render the specified lattice value to the specified stream.
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
+  if (V == UndefVal)
+    OS << "undefined";
+  else if (V == OverdefinedVal)
+    OS << "overdefined";
+  else if (V == UntrackedVal)
+    OS << "untracked";
+  else
+    OS << "unknown lattice value";
+}
+
+//===----------------------------------------------------------------------===//
+//                          SparseSolver Implementation
+//===----------------------------------------------------------------------===//
+
+/// getOrInitValueState - Return the LatticeVal object that corresponds to the
+/// value, initializing the value's state if it hasn't been entered into the
+/// map yet.   This function is necessary because not all values should start
+/// out in the underdefined state... Arguments should be overdefined, and
+/// constants should be marked as constants.
+///
+SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
+  DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+  if (I != ValueState.end()) return I->second;  // Common case, in the map
+  
+  LatticeVal LV;
+  if (LatticeFunc->IsUntrackedValue(V))
+    return LatticeFunc->getUntrackedVal();
+  else if (Constant *C = dyn_cast<Constant>(V))
+    LV = LatticeFunc->ComputeConstant(C);
+  else if (Argument *A = dyn_cast<Argument>(V))
+    LV = LatticeFunc->ComputeArgument(A);
+  else if (!isa<Instruction>(V))
+    // All other non-instructions are overdefined.
+    LV = LatticeFunc->getOverdefinedVal();
+  else
+    // All instructions are underdefined by default.
+    LV = LatticeFunc->getUndefVal();
+  
+  // If this value is untracked, don't add it to the map.
+  if (LV == LatticeFunc->getUntrackedVal())
+    return LV;
+  return ValueState[V] = LV;
+}
+
+/// UpdateState - When the state for some instruction is potentially updated,
+/// this function notices and adds I to the worklist if needed.
+void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
+  DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
+  if (I != ValueState.end() && I->second == V)
+    return;  // No change.
+  
+  // An update.  Visit uses of I.
+  ValueState[&Inst] = V;
+  InstWorkList.push_back(&Inst);
+}
+
+/// MarkBlockExecutable - This method can be used by clients to mark all of
+/// the blocks that are known to be intrinsically live in the processed unit.
+void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
+  DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+  BBExecutable.insert(BB);   // Basic block is executable!
+  BBWorkList.push_back(BB);  // Add the block to the work list!
+}
+
+/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+/// work list if it is not already executable...
+void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+  if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+    return;  // This edge is already known to be executable!
+  
+  DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+        << " -> " << Dest->getName() << "\n");
+
+  if (BBExecutable.count(Dest)) {
+    // The destination is already executable, but we just made an edge
+    // feasible that wasn't before.  Revisit the PHI nodes in the block
+    // because they have potentially new operands.
+    for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+      visitPHINode(*cast<PHINode>(I));
+    
+  } else {
+    MarkBlockExecutable(Dest);
+  }
+}
+
+
+/// getFeasibleSuccessors - Return a vector of booleans to indicate which
+/// successors are reachable from a given terminator instruction.
+void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
+                                         SmallVectorImpl<bool> &Succs,
+                                         bool AggressiveUndef) {
+  Succs.resize(TI.getNumSuccessors());
+  if (TI.getNumSuccessors() == 0) return;
+  
+  if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+    if (BI->isUnconditional()) {
+      Succs[0] = true;
+      return;
+    }
+    
+    LatticeVal BCValue;
+    if (AggressiveUndef)
+      BCValue = getOrInitValueState(BI->getCondition());
+    else
+      BCValue = getLatticeState(BI->getCondition());
+    
+    if (BCValue == LatticeFunc->getOverdefinedVal() ||
+        BCValue == LatticeFunc->getUntrackedVal()) {
+      // Overdefined condition variables can branch either way.
+      Succs[0] = Succs[1] = true;
+      return;
+    }
+
+    // If undefined, neither is feasible yet.
+    if (BCValue == LatticeFunc->getUndefVal())
+      return;
+
+    Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
+    if (C == 0 || !isa<ConstantInt>(C)) {
+      // Non-constant values can go either way.
+      Succs[0] = Succs[1] = true;
+      return;
+    }
+
+    // Constant condition variables mean the branch can only go a single way
+    Succs[C->isNullValue()] = true;
+    return;
+  }
+  
+  if (isa<InvokeInst>(TI)) {
+    // Invoke instructions successors are always executable.
+    // TODO: Could ask the lattice function if the value can throw.
+    Succs[0] = Succs[1] = true;
+    return;
+  }
+  
+  if (isa<IndirectBrInst>(TI)) {
+    Succs.assign(Succs.size(), true);
+    return;
+  }
+  
+  SwitchInst &SI = cast<SwitchInst>(TI);
+  LatticeVal SCValue;
+  if (AggressiveUndef)
+    SCValue = getOrInitValueState(SI.getCondition());
+  else
+    SCValue = getLatticeState(SI.getCondition());
+  
+  if (SCValue == LatticeFunc->getOverdefinedVal() ||
+      SCValue == LatticeFunc->getUntrackedVal()) {
+    // All destinations are executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  
+  // If undefined, neither is feasible yet.
+  if (SCValue == LatticeFunc->getUndefVal())
+    return;
+  
+  Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
+  if (C == 0 || !isa<ConstantInt>(C)) {
+    // All destinations are executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  
+  Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true;
+}
+
+
+/// isEdgeFeasible - Return true if the control flow edge from the 'From'
+/// basic block to the 'To' basic block is currently feasible...
+bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+                                  bool AggressiveUndef) {
+  SmallVector<bool, 16> SuccFeasible;
+  TerminatorInst *TI = From->getTerminator();
+  getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
+  
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    if (TI->getSuccessor(i) == To && SuccFeasible[i])
+      return true;
+  
+  return false;
+}
+
+void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
+  SmallVector<bool, 16> SuccFeasible;
+  getFeasibleSuccessors(TI, SuccFeasible, true);
+  
+  BasicBlock *BB = TI.getParent();
+  
+  // Mark all feasible successors executable...
+  for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+    if (SuccFeasible[i])
+      markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SparseSolver::visitPHINode(PHINode &PN) {
+  // The lattice function may store more information on a PHINode than could be
+  // computed from its incoming values.  For example, SSI form stores its sigma
+  // functions as PHINodes with a single incoming value.
+  if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+    LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+    if (IV != LatticeFunc->getUntrackedVal())
+      UpdateState(PN, IV);
+    return;
+  }
+
+  LatticeVal PNIV = getOrInitValueState(&PN);
+  LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
+  
+  // If this value is already overdefined (common) just return.
+  if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
+    return;  // Quick exit
+  
+  // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
+  // and slow us down a lot.  Just mark them overdefined.
+  if (PN.getNumIncomingValues() > 64) {
+    UpdateState(PN, Overdefined);
+    return;
+  }
+  
+  // Look at all of the executable operands of the PHI node.  If any of them
+  // are overdefined, the PHI becomes overdefined as well.  Otherwise, ask the
+  // transfer function to give us the merge of the incoming values.
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    // If the edge is not yet known to be feasible, it doesn't impact the PHI.
+    if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
+      continue;
+    
+    // Merge in this value.
+    LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
+    if (OpVal != PNIV)
+      PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
+    
+    if (PNIV == Overdefined)
+      break;  // Rest of input values don't matter.
+  }
+
+  // Update the PHI with the compute value, which is the merge of the inputs.
+  UpdateState(PN, PNIV);
+}
+
+
+void SparseSolver::visitInst(Instruction &I) {
+  // PHIs are handled by the propagation logic, they are never passed into the
+  // transfer functions.
+  if (PHINode *PN = dyn_cast<PHINode>(&I))
+    return visitPHINode(*PN);
+  
+  // Otherwise, ask the transfer function what the result is.  If this is
+  // something that we care about, remember it.
+  LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
+  if (IV != LatticeFunc->getUntrackedVal())
+    UpdateState(I, IV);
+  
+  if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
+    visitTerminatorInst(*TI);
+}
+
+void SparseSolver::Solve(Function &F) {
+  MarkBlockExecutable(&F.getEntryBlock());
+  
+  // Process the work lists until they are empty!
+  while (!BBWorkList.empty() || !InstWorkList.empty()) {
+    // Process the instruction work list.
+    while (!InstWorkList.empty()) {
+      Instruction *I = InstWorkList.back();
+      InstWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n");
+
+      // "I" got into the work list because it made a transition.  See if any
+      // users are both live and in need of updating.
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI) {
+        Instruction *U = cast<Instruction>(*UI);
+        if (BBExecutable.count(U->getParent()))   // Inst is executable?
+          visitInst(*U);
+      }
+    }
+
+    // Process the basic block work list.
+    while (!BBWorkList.empty()) {
+      BasicBlock *BB = BBWorkList.back();
+      BBWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
+
+      // Notify all instructions in this basic block that they are newly
+      // executable.
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        visitInst(*I);
+    }
+  }
+}
+
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
+  OS << "\nFUNCTION: " << F.getNameStr() << "\n";
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (!BBExecutable.count(BB))
+      OS << "INFEASIBLE: ";
+    OS << "\t";
+    if (BB->hasName())
+      OS << BB->getNameStr() << ":\n";
+    else
+      OS << "; anon bb\n";
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      LatticeFunc->PrintValue(getLatticeState(I), OS);
+      OS << *I << "\n";
+    }
+    
+    OS << "\n";
+  }
+}
+
diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp
new file mode 100644
index 000000000000..68a39cd581f4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Trace.cpp
@@ -0,0 +1,51 @@
+//===- Trace.cpp - Implementation of Trace class --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks.  A trace is a
+// single entry, multiple exit, region of code that is often hot.  Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Function *Trace::getFunction() const {
+  return getEntryBasicBlock()->getParent();
+}
+
+Module *Trace::getModule() const {
+  return getFunction()->getParent();
+}
+
+/// print - Write trace to output stream.
+///
+void Trace::print(raw_ostream &O) const {
+  Function *F = getFunction();
+  O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
+  for (const_iterator i = begin(), e = end(); i != e; ++i) {
+    O << "; ";
+    WriteAsOperand(O, *i, true, getModule());
+    O << "\n";
+  }
+  O << "; Trace parent function: \n" << *F;
+}
+
+/// dump - Debugger convenience method; writes trace to standard error
+/// output stream.
+///
+void Trace::dump() const {
+  print(dbgs());
+}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 000000000000..0faf1398ec76
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,300 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
+//
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+//   !0 = metadata !{ metadata !"an example type tree" }
+//   !1 = metadata !{ metadata !"int", metadata !0 }
+//   !2 = metadata !{ metadata !"float", metadata !0 }
+//   !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+//
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
+//
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its descendants and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+//   struct X {
+//     double d;
+//     int i;
+//   };
+//   void foo(struct X *x, struct X *y, double *p) {
+//     *x = *y;
+//     *p = 0.0;
+//   }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
+namespace {
+  /// TBAANode - This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAANode {
+    const MDNode *Node;
+
+  public:
+    TBAANode() : Node(0) {}
+    explicit TBAANode(const MDNode *N) : Node(N) {}
+
+    /// getNode - Get the MDNode for this TBAANode.
+    const MDNode *getNode() const { return Node; }
+
+    /// getParent - Get this TBAANode's Alias tree parent.
+    TBAANode getParent() const {
+      if (Node->getNumOperands() < 2)
+        return TBAANode();
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+      if (!P)
+        return TBAANode();
+      // Ok, this node has a valid parent. Return it.
+      return TBAANode(P);
+    }
+
+    /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+    /// which are not modified (by any means) in the context where this
+    /// AliasAnalysis is relevant.
+    bool TypeIsImmutable() const {
+      if (Node->getNumOperands() < 3)
+        return false;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+      if (!CI)
+        return false;
+      return CI->getValue()[0];
+    }
+  };
+}
+
+namespace {
+  /// TypeBasedAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses TypeBased to answer queries.
+  class TypeBasedAliasAnalysis : public ImmutablePass,
+                                 public AliasAnalysis {
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+      initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+
+    bool Aliases(const MDNode *A, const MDNode *B) const;
+
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char TypeBasedAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
+                   "Type-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
+  return new TypeBasedAliasAnalysis();
+}
+
+void
+TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+                                const MDNode *B) const {
+  // Keep track of the root node for A and B.
+  TBAANode RootA, RootB;
+
+  // Climb the tree from A to see if we reach B.
+  for (TBAANode T(A); ; ) {
+    if (T.getNode() == B)
+      // B is an ancestor of A.
+      return true;
+
+    RootA = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Climb the tree from B to see if we reach A.
+  for (TBAANode T(B); ; ) {
+    if (T.getNode() == A)
+      // A is an ancestor of B.
+      return true;
+
+    RootB = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+  
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+                              const Location &LocB) {
+  if (!EnableTBAA)
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+  // be conservative.
+  const MDNode *AM = LocA.TBAATag;
+  if (!AM) return AliasAnalysis::alias(LocA, LocB);
+  const MDNode *BM = LocB.TBAATag;
+  if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+  // If they may alias, chain to the next AliasAnalysis.
+  if (Aliases(AM, BM))
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // Otherwise return a definitive result.
+  return NoAlias;
+}
+
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                                    bool OrLocal) {
+  if (!EnableTBAA)
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  const MDNode *M = Loc.TBAATag;
+  if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  // If this is an "immutable" type, we can assume the pointer is pointing
+  // to constant memory.
+  if (TBAANode(M).TypeIsImmutable())
+    return true;
+
+  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefBehavior(CS);
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If this is an "immutable" type, we can assume the call doesn't write
+  // to memory.
+  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (TBAANode(M).TypeIsImmutable())
+      Min = OnlyReadsMemory;
+
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+  // Functions don't have metadata. Just chain to the next implementation.
+  return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                      const Location &Loc) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefInfo(CS, Loc);
+
+  if (const MDNode *L = Loc.TBAATag)
+    if (const MDNode *M =
+          CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(L, M))
+        return NoModRef;
+
+  return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+                                      ImmutableCallSite CS2) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+  if (const MDNode *M1 =
+        CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (const MDNode *M2 =
+          CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(M1, M2))
+        return NoModRef;
+
+  return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
new file mode 100644
index 000000000000..455c91077dfb
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -0,0 +1,1798 @@
+//===- ValueTracking.cpp - Walk computations to compute properties --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <cstring>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
+/// unknown returns 0).  For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(const Type *Ty, const TargetData *TD) {
+  if (unsigned BitWidth = Ty->getScalarSizeInBits())
+    return BitWidth;
+  assert(isa<PointerType>(Ty) && "Expected a pointer type!");
+  return TD ? TD->getPointerSizeInBits() : 0;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero.  If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
+                             APInt &KnownZero, APInt &KnownOne,
+                             const TargetData *TD, unsigned Depth) {
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  unsigned BitWidth = Mask.getBitWidth();
+  assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
+         && "Not integer or pointer type!");
+  assert((!TD ||
+          TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+         (!V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->getScalarSizeInBits() == BitWidth) &&
+         KnownZero.getBitWidth() == BitWidth && 
+         KnownOne.getBitWidth() == BitWidth &&
+         "V, Mask, KnownOne and KnownZero should have same BitWidth");
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne = CI->getValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  }
+  // Null and aggregate-zero are all-zeros.
+  if (isa<ConstantPointerNull>(V) ||
+      isa<ConstantAggregateZero>(V)) {
+    KnownOne.clearAllBits();
+    KnownZero = Mask;
+    return;
+  }
+  // Handle a constant vector by taking the intersection of the known bits of
+  // each element.
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+    KnownZero.setAllBits(); KnownOne.setAllBits();
+    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+      APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+      ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
+                        TD, Depth);
+      KnownZero &= KnownZero2;
+      KnownOne &= KnownOne2;
+    }
+    return;
+  }
+  // The address of an aligned GlobalValue has trailing zeros.
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    unsigned Align = GV->getAlignment();
+    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+      const Type *ObjectType = GV->getType()->getElementType();
+      // If the object is defined in the current Module, we'll be giving
+      // it the preferred alignment. Otherwise, we have to assume that it
+      // may only have the minimum ABI alignment.
+      if (!GV->isDeclaration() && !GV->mayBeOverridden())
+        Align = TD->getPrefTypeAlignment(ObjectType);
+      else
+        Align = TD->getABITypeAlignment(ObjectType);
+    }
+    if (Align > 0)
+      KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+                                              CountTrailingZeros_32(Align));
+    else
+      KnownZero.clearAllBits();
+    KnownOne.clearAllBits();
+    return;
+  }
+  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+  // the bits of its aliasee.
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+    if (GA->mayBeOverridden()) {
+      KnownZero.clearAllBits(); KnownOne.clearAllBits();
+    } else {
+      ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
+                        TD, Depth+1);
+    }
+    return;
+  }
+  
+  if (Argument *A = dyn_cast<Argument>(V)) {
+    // Get alignment information off byval arguments if specified in the IR.
+    if (A->hasByValAttr())
+      if (unsigned Align = A->getParamAlignment())
+        KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+                                                CountTrailingZeros_32(Align));
+    return;
+  }
+
+  // Start out not knowing anything.
+  KnownZero.clearAllBits(); KnownOne.clearAllBits();
+
+  if (Depth == MaxDepth || Mask == 0)
+    return;  // Limit search depth.
+
+  Operator *I = dyn_cast<Operator>(V);
+  if (!I) return;
+
+  APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::And: {
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+    APInt Mask2(Mask & ~KnownZero);
+    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  }
+  case Instruction::Or: {
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+    APInt Mask2(Mask & ~KnownOne);
+    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  }
+  case Instruction::Xor: {
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case Instruction::Mul: {
+    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
+    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If low bits are zero in either operand, output low known-0 bits.
+    // Also compute a conserative estimate for high known-0 bits.
+    // More trickiness is possible, but this is sufficient for the
+    // interesting case of alignment computation.
+    KnownOne.clearAllBits();
+    unsigned TrailZ = KnownZero.countTrailingOnes() +
+                      KnownZero2.countTrailingOnes();
+    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                               KnownZero2.countLeadingOnes(),
+                               BitWidth) - BitWidth;
+
+    TrailZ = std::min(TrailZ, BitWidth);
+    LeadZ = std::min(LeadZ, BitWidth);
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+                APInt::getHighBitsSet(BitWidth, LeadZ);
+    KnownZero &= Mask;
+    return;
+  }
+  case Instruction::UDiv: {
+    // For the purposes of computing leading zeros we can conservatively
+    // treat a udiv as a logical right shift by the power of 2 known to
+    // be less than the denominator.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(I->getOperand(0),
+                      AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+    unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
+    ComputeMaskedBits(I->getOperand(1),
+                      AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+    if (RHSUnknownLeadingOnes != BitWidth)
+      LeadZ = std::min(BitWidth,
+                       LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    return;
+  }
+  case Instruction::Select:
+    ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+    return; // Can't work with floating point.
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+    // We can't handle these if we don't know the pointer size.
+    if (!TD) return;
+    // FALL THROUGH and handle them the same as zext/trunc.
+  case Instruction::ZExt:
+  case Instruction::Trunc: {
+    const Type *SrcTy = I->getOperand(0)->getType();
+    
+    unsigned SrcBitWidth;
+    // Note that we handle pointer operands here because of inttoptr/ptrtoint
+    // which fall through here.
+    if (SrcTy->isPointerTy())
+      SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
+    else
+      SrcBitWidth = SrcTy->getScalarSizeInBits();
+    
+    APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
+    KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+    KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
+    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+                      Depth+1);
+    KnownZero = KnownZero.zextOrTrunc(BitWidth);
+    KnownOne = KnownOne.zextOrTrunc(BitWidth);
+    // Any top bits are known to be zero.
+    if (BitWidth > SrcBitWidth)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    return;
+  }
+  case Instruction::BitCast: {
+    const Type *SrcTy = I->getOperand(0)->getType();
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+        // TODO: For now, not handling conversions like:
+        // (bitcast i64 %x to <2 x i32>)
+        !I->getType()->isVectorTy()) {
+      ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
+                        Depth+1);
+      return;
+    }
+    break;
+  }
+  case Instruction::SExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
+      
+    APInt MaskIn = Mask.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
+    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero[SrcBitWidth-1])             // Input sign bit known zero
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    else if (KnownOne[SrcBitWidth-1])           // Input sign bit known set
+      KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    return;
+  }
+  case Instruction::Shl:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      APInt Mask2(Mask.lshr(ShiftAmt));
+      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero <<= ShiftAmt;
+      KnownOne  <<= ShiftAmt;
+      KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
+      return;
+    }
+    break;
+  case Instruction::LShr:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // Compute the new bits that are at the top now.
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      
+      // Unsigned shift right.
+      APInt Mask2(Mask.shl(ShiftAmt));
+      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+      // high bits known zero.
+      KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      return;
+    }
+    break;
+  case Instruction::AShr:
+    // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // Compute the new bits that are at the top now.
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      
+      // Signed shift right.
+      APInt Mask2(Mask.shl(ShiftAmt));
+      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+        
+      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+      if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero.
+        KnownZero |= HighBits;
+      else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one.
+        KnownOne |= HighBits;
+      return;
+    }
+    break;
+  case Instruction::Sub: {
+    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (!CLHS->getValue().isNegative()) {
+        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
+                          TD, Depth+1);
+    
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+        }
+      }        
+    }
+  }
+  // fall through
+  case Instruction::Add: {
+    // If one of the operands has trailing zeros, then the bits that the
+    // other operand has in those bit positions will be preserved in the
+    // result. For an add, this works with either operand. For a subtract,
+    // this only works if the known zeros are in the right operand.
+    APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       BitWidth - Mask.countLeadingZeros());
+    ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+                      Depth+1);
+    assert((LHSKnownZero & LHSKnownOne) == 0 &&
+           "Bits known to be one AND zero?");
+    unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, 
+                      Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+    // Determine which operand has more trailing zeros, and use that
+    // many bits from the other operand.
+    if (LHSKnownZeroOut > RHSKnownZeroOut) {
+      if (I->getOpcode() == Instruction::Add) {
+        APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+        KnownZero |= KnownZero2 & Mask;
+        KnownOne  |= KnownOne2 & Mask;
+      } else {
+        // If the known zeros are in the left operand for a subtract,
+        // fall back to the minimum known zeros in both operands.
+        KnownZero |= APInt::getLowBitsSet(BitWidth,
+                                          std::min(LHSKnownZeroOut,
+                                                   RHSKnownZeroOut));
+      }
+    } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+      KnownZero |= LHSKnownZero & Mask;
+      KnownOne  |= LHSKnownOne & Mask;
+    }
+
+    // Are we still trying to solve for the sign bit?
+    if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
+      OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
+      if (OBO->hasNoSignedWrap()) {
+        if (I->getOpcode() == Instruction::Add) {
+          // Adding two positive numbers can't wrap into negative
+          if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+            KnownZero |= APInt::getSignBit(BitWidth);
+          // and adding two negative numbers can't wrap into positive.
+          else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+            KnownOne |= APInt::getSignBit(BitWidth);
+        } else {
+          // Subtracting a negative number from a positive one can't wrap
+          if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+            KnownZero |= APInt::getSignBit(BitWidth);
+          // neither can subtracting a positive number from a negative one.
+          else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+            KnownOne |= APInt::getSignBit(BitWidth);
+        }
+      }
+    }
+
+    return;
+  }
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, 
+                          Depth+1);
+
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
+        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
+
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      }
+    }
+
+    // The sign bit is the LHS's sign bit, except when the result of the
+    // remainder is zero.
+    if (Mask.isNegative() && KnownZero.isNonNegative()) {
+      APInt Mask2 = APInt::getSignBit(BitWidth);
+      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+                        Depth+1);
+      // If it's known zero, our sign bit is also zero.
+      if (LHSKnownZero.isNegative())
+        KnownZero |= LHSKnownZero;
+    }
+
+    break;
+  case Instruction::URem: {
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        APInt Mask2 = LowBits & Mask;
+        KnownZero |= ~LowBits & Mask;
+        ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+                          Depth+1);
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+        break;
+      }
+    }
+
+    // Since the result is less than or equal to either operand, any leading
+    // zero bits in either operand must also exist in the result.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
+                      TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
+                      TD, Depth+1);
+
+    unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+    KnownOne.clearAllBits();
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    break;
+  }
+
+  case Instruction::Alloca: {
+    AllocaInst *AI = cast<AllocaInst>(V);
+    unsigned Align = AI->getAlignment();
+    if (Align == 0 && TD)
+      Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+    
+    if (Align > 0)
+      KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+                                              CountTrailingZeros_32(Align));
+    break;
+  }
+  case Instruction::GetElementPtr: {
+    // Analyze all of the subscripts of this getelementptr instruction
+    // to determine if we can prove known low zero bits.
+    APInt LocalMask = APInt::getAllOnesValue(BitWidth);
+    APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
+    ComputeMaskedBits(I->getOperand(0), LocalMask,
+                      LocalKnownZero, LocalKnownOne, TD, Depth+1);
+    unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+
+    gep_type_iterator GTI = gep_type_begin(I);
+    for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
+      Value *Index = I->getOperand(i);
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        // Handle struct member offset arithmetic.
+        if (!TD) return;
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+        uint64_t Offset = SL->getElementOffset(Idx);
+        TrailZ = std::min(TrailZ,
+                          CountTrailingZeros_64(Offset));
+      } else {
+        // Handle array index arithmetic.
+        const Type *IndexedTy = GTI.getIndexedType();
+        if (!IndexedTy->isSized()) return;
+        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
+        uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+        LocalMask = APInt::getAllOnesValue(GEPOpiBits);
+        LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
+        ComputeMaskedBits(Index, LocalMask,
+                          LocalKnownZero, LocalKnownOne, TD, Depth+1);
+        TrailZ = std::min(TrailZ,
+                          unsigned(CountTrailingZeros_64(TypeSize) +
+                                   LocalKnownZero.countTrailingOnes()));
+      }
+    }
+    
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
+    break;
+  }
+  case Instruction::PHI: {
+    PHINode *P = cast<PHINode>(I);
+    // Handle the case of a simple two-predecessor recurrence PHI.
+    // There's a lot more that could theoretically be done here, but
+    // this is sufficient to catch some interesting cases.
+    if (P->getNumIncomingValues() == 2) {
+      for (unsigned i = 0; i != 2; ++i) {
+        Value *L = P->getIncomingValue(i);
+        Value *R = P->getIncomingValue(!i);
+        Operator *LU = dyn_cast<Operator>(L);
+        if (!LU)
+          continue;
+        unsigned Opcode = LU->getOpcode();
+        // Check for operations that have the property that if
+        // both their operands have low zero bits, the result
+        // will have low zero bits.
+        if (Opcode == Instruction::Add ||
+            Opcode == Instruction::Sub ||
+            Opcode == Instruction::And ||
+            Opcode == Instruction::Or ||
+            Opcode == Instruction::Mul) {
+          Value *LL = LU->getOperand(0);
+          Value *LR = LU->getOperand(1);
+          // Find a recurrence.
+          if (LL == I)
+            L = LR;
+          else if (LR == I)
+            L = LL;
+          else
+            break;
+          // Ok, we have a PHI of the form L op= R. Check for low
+          // zero bits.
+          APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+          ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+          Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       KnownZero2.countTrailingOnes());
+
+          // We need to take the minimum number of known bits
+          APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
+          ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1);
+
+          KnownZero = Mask &
+                      APInt::getLowBitsSet(BitWidth,
+                                           std::min(KnownZero2.countTrailingOnes(),
+                                                    KnownZero3.countTrailingOnes()));
+          break;
+        }
+      }
+    }
+
+    // Unreachable blocks may have zero-operand PHI nodes.
+    if (P->getNumIncomingValues() == 0)
+      return;
+
+    // Otherwise take the unions of the known bit sets of the operands,
+    // taking conservative care to avoid excessive recursion.
+    if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+      // Skip if every incoming value references to ourself.
+      if (P->hasConstantValue() == P)
+        break;
+
+      KnownZero = APInt::getAllOnesValue(BitWidth);
+      KnownOne = APInt::getAllOnesValue(BitWidth);
+      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+        // Skip direct self references.
+        if (P->getIncomingValue(i) == P) continue;
+
+        KnownZero2 = APInt(BitWidth, 0);
+        KnownOne2 = APInt(BitWidth, 0);
+        // Recurse, but cap the recursion to one level, because we don't
+        // want to waste time spinning around in loops.
+        ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne,
+                          KnownZero2, KnownOne2, TD, MaxDepth-1);
+        KnownZero &= KnownZero2;
+        KnownOne &= KnownOne2;
+        // If all bits have been ruled out, there's no need to check
+        // more operands.
+        if (!KnownZero && !KnownOne)
+          break;
+      }
+    }
+    break;
+  }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::ctpop:
+      case Intrinsic::ctlz:
+      case Intrinsic::cttz: {
+        unsigned LowBits = Log2_32(BitWidth)+1;
+        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        break;
+      }
+      case Intrinsic::x86_sse42_crc32_64_8:
+      case Intrinsic::x86_sse42_crc32_64_64:
+        KnownZero = APInt::getHighBitsSet(64, 32);
+        break;
+      }
+    }
+    break;
+  }
+}
+
+/// ComputeSignBit - Determine whether the sign bit is known to be zero or
+/// one.  Convenience wrapper around ComputeMaskedBits.
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                          const TargetData *TD, unsigned Depth) {
+  unsigned BitWidth = getBitWidth(V->getType(), TD);
+  if (!BitWidth) {
+    KnownZero = false;
+    KnownOne = false;
+    return;
+  }
+  APInt ZeroBits(BitWidth, 0);
+  APInt OneBits(BitWidth, 0);
+  ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
+                    Depth);
+  KnownOne = OneBits[BitWidth - 1];
+  KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// isPowerOfTwo - Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined.  Supports values with integer or pointer
+/// types and vectors of integers.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return CI->getValue().isPowerOf2();
+  // TODO: Handle vector constants.
+
+  // 1 << X is clearly a power of two if the one is not shifted off the end.  If
+  // it is shifted off the end then the result is undefined.
+  if (match(V, m_Shl(m_One(), m_Value())))
+    return true;
+
+  // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+  // bottom.  If it is shifted off the bottom then the result is undefined.
+  if (match(V, m_LShr(m_SignBit(), m_Value())))
+    return true;
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+    return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(V))
+    return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
+      isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+
+  // An exact divide or right shift can only shift off zero bits, so the result
+  // is a power of two only if the first operand is a power of two and not
+  // copying a sign bit (sdiv int_min, 2).
+  if (match(V, m_LShr(m_Value(), m_Value())) ||
+      match(V, m_UDiv(m_Value(), m_Value()))) {
+    PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V);
+    if (PEO->isExact())
+      return isPowerOfTwo(PEO->getOperand(0), TD, Depth);
+  }
+
+  return false;
+}
+
+/// isKnownNonZero - Return true if the given value is known to be non-zero
+/// when defined.  For vectors return true if every element is known to be
+/// non-zero when defined.  Supports values with integer or pointer type and
+/// vectors of integers.
+bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (C->isNullValue())
+      return false;
+    if (isa<ConstantInt>(C))
+      // Must be non-zero due to null test above.
+      return true;
+    // TODO: Handle vectors
+    return false;
+  }
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  unsigned BitWidth = getBitWidth(V->getType(), TD);
+
+  // X | Y != 0 if X != 0 or Y != 0.
+  Value *X = 0, *Y = 0;
+  if (match(V, m_Or(m_Value(X), m_Value(Y))))
+    return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+
+  // ext X != 0 if X != 0.
+  if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+    return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+
+  // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
+  // if the lowest bit is shifted off the end.
+  if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+    // shl nuw can't remove any non-zero bits.
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->hasNoUnsignedWrap())
+      return isKnownNonZero(X, TD, Depth);
+
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+    if (KnownOne[0])
+      return true;
+  }
+  // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
+  // defined if the sign bit is shifted off the end.
+  else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+    // shr exact can only shift out zero bits.
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->isExact())
+      return isKnownNonZero(X, TD, Depth);
+
+    bool XKnownNonNegative, XKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+    if (XKnownNegative)
+      return true;
+  }
+  // div exact can only produce a zero if the dividend is zero.
+  else if (match(V, m_IDiv(m_Value(X), m_Value()))) {
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->isExact())
+      return isKnownNonZero(X, TD, Depth);
+  }
+  // X + Y.
+  else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+    bool XKnownNonNegative, XKnownNegative;
+    bool YKnownNonNegative, YKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+    ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+
+    // If X and Y are both non-negative (as signed values) then their sum is not
+    // zero unless both X and Y are zero.
+    if (XKnownNonNegative && YKnownNonNegative)
+      if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+        return true;
+
+    // If X and Y are both negative (as signed values) then their sum is not
+    // zero unless both X and Y equal INT_MIN.
+    if (BitWidth && XKnownNegative && YKnownNegative) {
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      APInt Mask = APInt::getSignedMaxValue(BitWidth);
+      // The sign bit of X is set.  If some other bit is set then X is not equal
+      // to INT_MIN.
+      ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+      if ((KnownOne & Mask) != 0)
+        return true;
+      // The sign bit of Y is set.  If some other bit is set then Y is not equal
+      // to INT_MIN.
+      ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+      if ((KnownOne & Mask) != 0)
+        return true;
+    }
+
+    // The sum of a non-negative number and a power of two is not zero.
+    if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+      return true;
+    if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+      return true;
+  }
+  // (C ? X : Y) != 0 if X != 0 and Y != 0.
+  else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
+        isKnownNonZero(SI->getFalseValue(), TD, Depth))
+      return true;
+  }
+
+  if (!BitWidth) return false;
+  APInt KnownZero(BitWidth, 0);
+  APInt KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
+                    TD, Depth);
+  return KnownOne != 0;
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
+                             const TargetData *TD, unsigned Depth) {
+  APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+  return (KnownZero & Mask) == Mask;
+}
+
+
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "ashr X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+///
+/// 'Op' must have a scalar integer type.
+///
+unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
+                                  unsigned Depth) {
+  assert((TD || V->getType()->isIntOrIntVectorTy()) &&
+         "ComputeNumSignBits requires a TargetData object to operate "
+         "on non-integer values!");
+  const Type *Ty = V->getType();
+  unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
+                         Ty->getScalarSizeInBits();
+  unsigned Tmp, Tmp2;
+  unsigned FirstAnswer = 1;
+
+  // Note that ConstantInt is handled by the general ComputeMaskedBits case
+  // below.
+
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+  
+  Operator *U = dyn_cast<Operator>(V);
+  switch (Operator::getOpcode(V)) {
+  default: break;
+  case Instruction::SExt:
+    Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+    return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+    
+  case Instruction::AShr:
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    // ashr X, C   -> adds C sign bits.
+    if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      Tmp += C->getZExtValue();
+      if (Tmp > TyBits) Tmp = TyBits;
+    }
+    // vector ashr X, <C, C, C, C>  -> adds C sign bits
+    if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+        Tmp += CI->getZExtValue();
+        if (Tmp > TyBits) Tmp = TyBits;
+      }
+    }
+    return Tmp;
+  case Instruction::Shl:
+    if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+      if (C->getZExtValue() >= TyBits ||      // Bad shift.
+          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getZExtValue();
+    }
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits at the worst.
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    if (Tmp != 1) {
+      Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+      FirstAnswer = std::min(Tmp, Tmp2);
+      // We computed what we know about the sign bits as our first
+      // answer. Now proceed to the generic code that uses
+      // ComputeMaskedBits, and pick whichever answer is better.
+    }
+    break;
+
+  case Instruction::Select:
+    Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+    return std::min(Tmp, Tmp2);
+    
+  case Instruction::Add:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+      if (CRHS->isAllOnesValue()) {
+        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+        APInt Mask = APInt::getAllOnesValue(TyBits);
+        ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD,
+                          Depth+1);
+        
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(TyBits, 1)) == Mask)
+          return TyBits;
+        
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero.isNegative())
+          return Tmp;
+      }
+      
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+    if (Tmp2 == 1) return 1;
+    return std::min(Tmp, Tmp2)-1;
+    
+  case Instruction::Sub:
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+    if (Tmp2 == 1) return 1;
+      
+    // Handle NEG.
+    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+      if (CLHS->isNullValue()) {
+        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+        APInt Mask = APInt::getAllOnesValue(TyBits);
+        ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, 
+                          TD, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(TyBits, 1)) == Mask)
+          return TyBits;
+        
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero.isNegative())
+          return Tmp2;
+        
+        // Otherwise, we treat this like a SUB.
+      }
+    
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    return std::min(Tmp, Tmp2)-1;
+      
+  case Instruction::PHI: {
+    PHINode *PN = cast<PHINode>(U);
+    // Don't analyze large in-degree PHIs.
+    if (PN->getNumIncomingValues() > 4) break;
+    
+    // Take the minimum of all incoming values.  This can't infinitely loop
+    // because of our depth threshold.
+    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+      if (Tmp == 1) return Tmp;
+      Tmp = std::min(Tmp,
+                     ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+    }
+    return Tmp;
+  }
+
+  case Instruction::Trunc:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+  
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+  APInt Mask = APInt::getAllOnesValue(TyBits);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  
+  if (KnownZero.isNegative()) {        // sign bit is 0
+    Mask = KnownZero;
+  } else if (KnownOne.isNegative()) {  // sign bit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return FirstAnswer;
+  }
+  
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask = ~Mask;
+  Mask <<= Mask.getBitWidth()-TyBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
+}
+
+/// ComputeMultiple - This function computes the integer multiple of Base that
+/// equals V.  If successful, it returns true and returns the multiple in
+/// Multiple.  If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+                           bool LookThroughSExt, unsigned Depth) {
+  const unsigned MaxDepth = 6;
+
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
+
+  const Type *T = V->getType();
+
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+  if (Base == 0)
+    return false;
+    
+  if (Base == 1) {
+    Multiple = V;
+    return true;
+  }
+
+  ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+  Constant *BaseVal = ConstantInt::get(T, Base);
+  if (CO && CO == BaseVal) {
+    // Multiple is 1.
+    Multiple = ConstantInt::get(T, 1);
+    return true;
+  }
+
+  if (CI && CI->getZExtValue() % Base == 0) {
+    Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+    return true;  
+  }
+  
+  if (Depth == MaxDepth) return false;  // Limit search depth.
+        
+  Operator *I = dyn_cast<Operator>(V);
+  if (!I) return false;
+
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::SExt:
+    if (!LookThroughSExt) return false;
+    // otherwise fall through to ZExt
+  case Instruction::ZExt:
+    return ComputeMultiple(I->getOperand(0), Base, Multiple,
+                           LookThroughSExt, Depth+1);
+  case Instruction::Shl:
+  case Instruction::Mul: {
+    Value *Op0 = I->getOperand(0);
+    Value *Op1 = I->getOperand(1);
+
+    if (I->getOpcode() == Instruction::Shl) {
+      ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+      if (!Op1CI) return false;
+      // Turn Op0 << Op1 into Op0 * 2^Op1
+      APInt Op1Int = Op1CI->getValue();
+      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+      APInt API(Op1Int.getBitWidth(), 0);
+      API.setBit(BitToSet);
+      Op1 = ConstantInt::get(V->getContext(), API);
+    }
+
+    Value *Mul0 = NULL;
+    if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+      if (Constant *Op1C = dyn_cast<Constant>(Op1))
+        if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+          if (Op1C->getType()->getPrimitiveSizeInBits() < 
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+          if (Op1C->getType()->getPrimitiveSizeInBits() > 
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+          
+          // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+          Multiple = ConstantExpr::getMul(MulC, Op1C);
+          return true;
+        }
+
+      if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+        if (Mul0CI->getValue() == 1) {
+          // V == Base * Op1, so return Op1
+          Multiple = Op1;
+          return true;
+        }
+    }
+
+    Value *Mul1 = NULL;
+    if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+      if (Constant *Op0C = dyn_cast<Constant>(Op0))
+        if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+          if (Op0C->getType()->getPrimitiveSizeInBits() < 
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+          if (Op0C->getType()->getPrimitiveSizeInBits() > 
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+          
+          // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+          Multiple = ConstantExpr::getMul(MulC, Op0C);
+          return true;
+        }
+
+      if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+        if (Mul1CI->getValue() == 1) {
+          // V == Base * Op0, so return Op0
+          Multiple = Op0;
+          return true;
+        }
+    }
+  }
+  }
+
+  // We could not determine if V is a multiple of Base.
+  return false;
+}
+
+/// CannotBeNegativeZero - Return true if we can prove that the specified FP 
+/// value is never equal to -0.0.
+///
+/// NOTE: this function will need to be revisited when we support non-default
+/// rounding modes!
+///
+bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+    return !CFP->getValueAPF().isNegZero();
+  
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  const Operator *I = dyn_cast<Operator>(V);
+  if (I == 0) return false;
+  
+  // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
+  if (I->getOpcode() == Instruction::FAdd &&
+      isa<ConstantFP>(I->getOperand(1)) && 
+      cast<ConstantFP>(I->getOperand(1))->isNullValue())
+    return true;
+    
+  // sitofp and uitofp turn into +0.0 for zero.
+  if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+    return true;
+  
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    // sqrt(-0.0) = -0.0, no other negative results are possible.
+    if (II->getIntrinsicID() == Intrinsic::sqrt)
+      return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
+  
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (const Function *F = CI->getCalledFunction()) {
+      if (F->isDeclaration()) {
+        // abs(x) != -0.0
+        if (F->getName() == "abs") return true;
+        // fabs[lf](x) != -0.0
+        if (F->getName() == "fabs") return true;
+        if (F->getName() == "fabsf") return true;
+        if (F->getName() == "fabsl") return true;
+        if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+            F->getName() == "sqrtl")
+          return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
+      }
+    }
+  
+  return false;
+}
+
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with.  This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+  // All byte-wide stores are splatable, even of arbitrary variables.
+  if (V->getType()->isIntegerTy(8)) return V;
+
+  // Handle 'null' ConstantArrayZero etc.
+  if (Constant *C = dyn_cast<Constant>(V))
+    if (C->isNullValue())
+      return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+  
+  // Constant float and double values can be handled as integer values if the
+  // corresponding integer value is "byteable".  An important case is 0.0. 
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType()->isFloatTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+    if (CFP->getType()->isDoubleTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+    // Don't handle long double formats, which have strange constraints.
+  }
+  
+  // We can handle constant integers that are power of two in size and a 
+  // multiple of 8 bits.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    unsigned Width = CI->getBitWidth();
+    if (isPowerOf2_32(Width) && Width > 8) {
+      // We can handle this value if the recursive binary decomposition is the
+      // same at all levels.
+      APInt Val = CI->getValue();
+      APInt Val2;
+      while (Val.getBitWidth() != 8) {
+        unsigned NextWidth = Val.getBitWidth()/2;
+        Val2  = Val.lshr(NextWidth);
+        Val2 = Val2.trunc(Val.getBitWidth()/2);
+        Val = Val.trunc(Val.getBitWidth()/2);
+        
+        // If the top/bottom halves aren't the same, reject it.
+        if (Val != Val2)
+          return 0;
+      }
+      return ConstantInt::get(V->getContext(), Val);
+    }
+  }
+  
+  // A ConstantArray is splatable if all its members are equal and also
+  // splatable.
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+    if (CA->getNumOperands() == 0)
+      return 0;
+    
+    Value *Val = isBytewiseValue(CA->getOperand(0));
+    if (!Val)
+      return 0;
+    
+    for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
+      if (CA->getOperand(I-1) != CA->getOperand(I))
+        return 0;
+    
+    return Val;
+  }
+  
+  // Conceptually, we could handle things like:
+  //   %a = zext i8 %X to i16
+  //   %b = shl i16 %a, 8
+  //   %c = or i16 %a, %b
+  // but until there is an example that actually needs this, it doesn't seem
+  // worth worrying about.
+  return 0;
+}
+
+
+// This is the recursive version of BuildSubAggregate. It takes a few different
+// arguments. Idxs is the index within the nested struct From that we are
+// looking at now (which is of type IndexedType). IdxSkip is the number of
+// indices from Idxs that should be left out when inserting into the resulting
+// struct. To is the result struct built so far, new insertvalue instructions
+// build on that.
+static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+                                SmallVector<unsigned, 10> &Idxs,
+                                unsigned IdxSkip,
+                                Instruction *InsertBefore) {
+  const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
+  if (STy) {
+    // Save the original To argument so we can modify it
+    Value *OrigTo = To;
+    // General case, the type indexed by Idxs is a struct
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      // Process each struct element recursively
+      Idxs.push_back(i);
+      Value *PrevTo = To;
+      To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
+                             InsertBefore);
+      Idxs.pop_back();
+      if (!To) {
+        // Couldn't find any inserted value for this index? Cleanup
+        while (PrevTo != OrigTo) {
+          InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
+          PrevTo = Del->getAggregateOperand();
+          Del->eraseFromParent();
+        }
+        // Stop processing elements
+        break;
+      }
+    }
+    // If we successfully found a value for each of our subaggregates
+    if (To)
+      return To;
+  }
+  // Base case, the type indexed by SourceIdxs is not a struct, or not all of
+  // the struct's elements had a value that was inserted directly. In the latter
+  // case, perhaps we can't determine each of the subelements individually, but
+  // we might be able to find the complete struct somewhere.
+  
+  // Find the value that is at that particular spot
+  Value *V = FindInsertedValue(From, Idxs);
+
+  if (!V)
+    return NULL;
+
+  // Insert the value in the new (sub) aggregrate
+  return llvm::InsertValueInst::Create(To, V,
+                                       ArrayRef<unsigned>(Idxs).slice(IdxSkip),
+                                       "tmp", InsertBefore);
+}
+
+// This helper takes a nested struct and extracts a part of it (which is again a
+// struct) into a new value. For example, given the struct:
+// { a, { b, { c, d }, e } }
+// and the indices "1, 1" this returns
+// { c, d }.
+//
+// It does this by inserting an insertvalue for each element in the resulting
+// struct, as opposed to just inserting a single struct. This will only work if
+// each of the elements of the substruct are known (ie, inserted into From by an
+// insertvalue instruction somewhere).
+//
+// All inserted insertvalue instructions are inserted before InsertBefore
+static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
+                                Instruction *InsertBefore) {
+  assert(InsertBefore && "Must have someplace to insert!");
+  const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+                                                             idx_range);
+  Value *To = UndefValue::get(IndexedType);
+  SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
+  unsigned IdxSkip = Idxs.size();
+
+  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+}
+
+/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// the scalar value indexed is already around as a register, for example if it
+/// were inserted directly into the aggregrate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
+                               Instruction *InsertBefore) {
+  // Nothing to index? Just return V then (this is useful at the end of our
+  // recursion)
+  if (idx_range.empty())
+    return V;
+  // We have indices, so V should have an indexable type
+  assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
+         && "Not looking at a struct or array?");
+  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range)
+         && "Invalid indices for type?");
+  const CompositeType *PTy = cast<CompositeType>(V->getType());
+
+  if (isa<UndefValue>(V))
+    return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
+                                                              idx_range));
+  else if (isa<ConstantAggregateZero>(V))
+    return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, 
+                                                                  idx_range));
+  else if (Constant *C = dyn_cast<Constant>(V)) {
+    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
+      // Recursively process this constant
+      return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1),
+                               InsertBefore);
+  } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+    // Loop the indices for the insertvalue instruction in parallel with the
+    // requested indices
+    const unsigned *req_idx = idx_range.begin();
+    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+         i != e; ++i, ++req_idx) {
+      if (req_idx == idx_range.end()) {
+        if (InsertBefore)
+          // The requested index identifies a part of a nested aggregate. Handle
+          // this specially. For example,
+          // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+          // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+          // %C = extractvalue {i32, { i32, i32 } } %B, 1
+          // This can be changed into
+          // %A = insertvalue {i32, i32 } undef, i32 10, 0
+          // %C = insertvalue {i32, i32 } %A, i32 11, 1
+          // which allows the unused 0,0 element from the nested struct to be
+          // removed.
+          return BuildSubAggregate(V,
+                                   ArrayRef<unsigned>(idx_range.begin(),
+                                                      req_idx),
+                                   InsertBefore);
+        else
+          // We can't handle this without inserting insertvalues
+          return 0;
+      }
+      
+      // This insert value inserts something else than what we are looking for.
+      // See if the (aggregrate) value inserted into has the value we are
+      // looking for, then.
+      if (*req_idx != *i)
+        return FindInsertedValue(I->getAggregateOperand(), idx_range,
+                                 InsertBefore);
+    }
+    // If we end up here, the indices of the insertvalue match with those
+    // requested (though possibly only partially). Now we recursively look at
+    // the inserted value, passing any remaining indices.
+    return FindInsertedValue(I->getInsertedValueOperand(),
+                             ArrayRef<unsigned>(req_idx, idx_range.end()),
+                             InsertBefore);
+  } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+    // If we're extracting a value from an aggregrate that was extracted from
+    // something else, we can extract from that something else directly instead.
+    // However, we will need to chain I's indices with the requested indices.
+   
+    // Calculate the number of indices required 
+    unsigned size = I->getNumIndices() + idx_range.size();
+    // Allocate some space to put the new indices in
+    SmallVector<unsigned, 5> Idxs;
+    Idxs.reserve(size);
+    // Add indices from the extract value instruction
+    Idxs.append(I->idx_begin(), I->idx_end());
+    
+    // Add requested indices
+    Idxs.append(idx_range.begin(), idx_range.end());
+
+    assert(Idxs.size() == size 
+           && "Number of indices added not correct?");
+    
+    return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
+  }
+  // Otherwise, we don't know (such as, extracting from a function return value
+  // or load instruction)
+  return 0;
+}
+
+/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+/// it can be expressed as a base pointer plus a constant offset.  Return the
+/// base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                              const TargetData &TD) {
+  Operator *PtrOp = dyn_cast<Operator>(Ptr);
+  if (PtrOp == 0) return Ptr;
+  
+  // Just look through bitcasts.
+  if (PtrOp->getOpcode() == Instruction::BitCast)
+    return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+  
+  // If this is a GEP with constant indices, we can look through it.
+  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+  
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+       ++I, ++GTI) {
+    ConstantInt *OpC = cast<ConstantInt>(*I);
+    if (OpC->isZero()) continue;
+    
+    // Handle a struct and array indices which add their offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += OpC->getSExtValue()*Size;
+    }
+  }
+  
+  // Re-sign extend from the pointer size if needed to get overflow edge cases
+  // right.
+  unsigned PtrSize = TD.getPointerSizeInBits();
+  if (PtrSize < 64)
+    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+  
+  return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// GetConstantStringInfo - This function computes the length of a
+/// null-terminated C string pointed to by V.  If successful, it returns true
+/// and returns the string in Str.  If unsuccessful, it returns false.
+bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
+                                 uint64_t Offset,
+                                 bool StopAtNul) {
+  // If V is NULL then return false;
+  if (V == NULL) return false;
+
+  // Look through bitcast instructions.
+  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
+  
+  // If the value is not a GEP instruction nor a constant expression with a
+  // GEP instruction, then return false because ConstantArray can't occur
+  // any other way
+  const User *GEP = 0;
+  if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+    GEP = GEPI;
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() == Instruction::BitCast)
+      return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
+    if (CE->getOpcode() != Instruction::GetElementPtr)
+      return false;
+    GEP = CE;
+  }
+  
+  if (GEP) {
+    // Make sure the GEP has exactly three arguments.
+    if (GEP->getNumOperands() != 3)
+      return false;
+    
+    // Make sure the index-ee is a pointer to array of i8.
+    const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+    const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+    if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
+      return false;
+    
+    // Check to make sure that the first operand of the GEP is an integer and
+    // has value 0 so that we are sure we're indexing into the initializer.
+    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    if (FirstIdx == 0 || !FirstIdx->isZero())
+      return false;
+    
+    // If the second index isn't a ConstantInt, then this is a variable index
+    // into the array.  If this occurs, we can't say anything meaningful about
+    // the string.
+    uint64_t StartIdx = 0;
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+      StartIdx = CI->getZExtValue();
+    else
+      return false;
+    return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
+                                 StopAtNul);
+  }
+  
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+    return false;
+  const Constant *GlobalInit = GV->getInitializer();
+  
+  // Handle the ConstantAggregateZero case
+  if (isa<ConstantAggregateZero>(GlobalInit)) {
+    // This is a degenerate case. The initializer is constant zero so the
+    // length of the string must be zero.
+    Str.clear();
+    return true;
+  }
+  
+  // Must be a Constant Array
+  const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
+    return false;
+  
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getNumElements();
+  
+  if (Offset > NumElts)
+    return false;
+  
+  // Traverse the constant array from 'Offset' which is the place the GEP refers
+  // to in the array.
+  Str.reserve(NumElts-Offset);
+  for (unsigned i = Offset; i != NumElts; ++i) {
+    const Constant *Elt = Array->getOperand(i);
+    const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    if (!CI) // This array isn't suitable, non-int initializer.
+      return false;
+    if (StopAtNul && CI->isZero())
+      return true; // we found end of string, success!
+    Str += (char)CI->getZExtValue();
+  }
+  
+  // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
+  return true;
+}
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+  // Look through noop bitcast instructions.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+  // If this is a PHI node, there are two cases: either we have already seen it
+  // or we haven't.
+  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (!PHIs.insert(PN))
+      return ~0ULL;  // already in the set.
+
+    // If it was new, see if all the input strings are the same length.
+    uint64_t LenSoFar = ~0ULL;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+      if (Len == 0) return 0; // Unknown length -> unknown.
+
+      if (Len == ~0ULL) continue;
+
+      if (Len != LenSoFar && LenSoFar != ~0ULL)
+        return 0;    // Disagree -> unknown.
+      LenSoFar = Len;
+    }
+
+    // Success, all agree.
+    return LenSoFar;
+  }
+
+  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+    if (Len1 == 0) return 0;
+    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+    if (Len2 == 0) return 0;
+    if (Len1 == ~0ULL) return Len2;
+    if (Len2 == ~0ULL) return Len1;
+    if (Len1 != Len2) return 0;
+    return Len1;
+  }
+
+  // If the value is not a GEP instruction nor a constant expression with a
+  // GEP instruction, then return unknown.
+  User *GEP = 0;
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+    GEP = GEPI;
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() != Instruction::GetElementPtr)
+      return 0;
+    GEP = CE;
+  } else {
+    return 0;
+  }
+
+  // Make sure the GEP has exactly three arguments.
+  if (GEP->getNumOperands() != 3)
+    return 0;
+
+  // Check to make sure that the first operand of the GEP is an integer and
+  // has value 0 so that we are sure we're indexing into the initializer.
+  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+    if (!Idx->isZero())
+      return 0;
+  } else
+    return 0;
+
+  // If the second index isn't a ConstantInt, then this is a variable index
+  // into the array.  If this occurs, we can't say anything meaningful about
+  // the string.
+  uint64_t StartIdx = 0;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    StartIdx = CI->getZExtValue();
+  else
+    return 0;
+
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+      GV->mayBeOverridden())
+    return 0;
+  Constant *GlobalInit = GV->getInitializer();
+
+  // Handle the ConstantAggregateZero case, which is a degenerate case. The
+  // initializer is constant zero so the length of the string must be zero.
+  if (isa<ConstantAggregateZero>(GlobalInit))
+    return 1;  // Len = 0 offset by 1.
+
+  // Must be a Constant Array
+  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getNumElements();
+
+  // Traverse the constant array from StartIdx (derived above) which is
+  // the place the GEP refers to in the array.
+  for (unsigned i = StartIdx; i != NumElts; ++i) {
+    Constant *Elt = Array->getOperand(i);
+    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    if (!CI) // This array isn't suitable, non-int initializer.
+      return 0;
+    if (CI->isZero())
+      return i-StartIdx+1; // We found end of string, success!
+  }
+
+  return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;
+
+  SmallPtrSet<PHINode*, 32> PHIs;
+  uint64_t Len = GetStringLengthH(V, PHIs);
+  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+  // an empty string as a length.
+  return Len == ~0ULL ? 1 : Len;
+}
+
+Value *
+llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
+  if (!V->getType()->isPointerTy())
+    return V;
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      // See if InstructionSimplify knows any relevant tricks.
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        // TODO: Acquire a DominatorTree and use it.
+        if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+          V = Simplified;
+          continue;
+        }
+
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  }
+  return V;
+}
+
+/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
+/// are lifetime markers.
+///
+bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+       UI != UE; ++UI) {
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+    if (!II) return false;
+
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      return false;
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/Archive/Archive.cpp b/contrib/llvm/lib/Archive/Archive.cpp
new file mode 100644
index 000000000000..1eab27d3eba3
--- /dev/null
+++ b/contrib/llvm/lib/Archive/Archive.cpp
@@ -0,0 +1,261 @@
+//===-- Archive.cpp - Generic LLVM archive functions ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the Archive and ArchiveMember
+// classes that is common to both reading and writing archives..
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Module.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/system_error.h"
+#include <memory>
+#include <cstring>
+using namespace llvm;
+
+// getMemberSize - compute the actual physical size of the file member as seen
+// on disk. This isn't the size of member's payload. Use getSize() for that.
+unsigned
+ArchiveMember::getMemberSize() const {
+  // Basically its the file size plus the header size
+  unsigned result =  info.fileSize + sizeof(ArchiveMemberHeader);
+
+  // If it has a long filename, include the name length
+  if (hasLongFilename())
+    result += path.str().length() + 1;
+
+  // If its now odd lengthed, include the padding byte
+  if (result % 2 != 0 )
+    result++;
+
+  return result;
+}
+
+// This default constructor is only use by the ilist when it creates its
+// sentry node. We give it specific static values to make it stand out a bit.
+ArchiveMember::ArchiveMember()
+  : parent(0), path("--invalid--"), flags(0), data(0)
+{
+  info.user = sys::Process::GetCurrentUserId();
+  info.group = sys::Process::GetCurrentGroupId();
+  info.mode = 0777;
+  info.fileSize = 0;
+  info.modTime = sys::TimeValue::now();
+}
+
+// This is the constructor that the Archive class uses when it is building or
+// reading an archive. It just defaults a few things and ensures the parent is
+// set for the iplist. The Archive class fills in the ArchiveMember's data.
+// This is required because correctly setting the data may depend on other
+// things in the Archive.
+ArchiveMember::ArchiveMember(Archive* PAR)
+  : parent(PAR), path(), flags(0), data(0)
+{
+}
+
+// This method allows an ArchiveMember to be replaced with the data for a
+// different file, presumably as an update to the member. It also makes sure
+// the flags are reset correctly.
+bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
+  bool Exists;
+  if (sys::fs::exists(newFile.str(), Exists) || !Exists) {
+    if (ErrMsg)
+      *ErrMsg = "Can not replace an archive member with a non-existent file";
+    return true;
+  }
+
+  data = 0;
+  path = newFile;
+
+  // SVR4 symbol tables have an empty name
+  if (path.str() == ARFILE_SVR4_SYMTAB_NAME)
+    flags |= SVR4SymbolTableFlag;
+  else
+    flags &= ~SVR4SymbolTableFlag;
+
+  // BSD4.4 symbol tables have a special name
+  if (path.str() == ARFILE_BSD4_SYMTAB_NAME)
+    flags |= BSD4SymbolTableFlag;
+  else
+    flags &= ~BSD4SymbolTableFlag;
+
+  // LLVM symbol tables have a very specific name
+  if (path.str() == ARFILE_LLVM_SYMTAB_NAME)
+    flags |= LLVMSymbolTableFlag;
+  else
+    flags &= ~LLVMSymbolTableFlag;
+
+  // String table name
+  if (path.str() == ARFILE_STRTAB_NAME)
+    flags |= StringTableFlag;
+  else
+    flags &= ~StringTableFlag;
+
+  // If it has a slash then it has a path
+  bool hasSlash = path.str().find('/') != std::string::npos;
+  if (hasSlash)
+    flags |= HasPathFlag;
+  else
+    flags &= ~HasPathFlag;
+
+  // If it has a slash or its over 15 chars then its a long filename format
+  if (hasSlash || path.str().length() > 15)
+    flags |= HasLongFilenameFlag;
+  else
+    flags &= ~HasLongFilenameFlag;
+
+  // Get the signature and status info
+  const char* signature = (const char*) data;
+  SmallString<4> magic;
+  if (!signature) {
+    sys::fs::get_magic(path.str(), magic.capacity(), magic);
+    signature = magic.c_str();
+    const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg);
+    if (FSinfo)
+      info = *FSinfo;
+    else
+      return true;
+  }
+
+  // Determine what kind of file it is.
+  switch (sys::IdentifyFileType(signature,4)) {
+    case sys::Bitcode_FileType:
+      flags |= BitcodeFlag;
+      break;
+    default:
+      flags &= ~BitcodeFlag;
+      break;
+  }
+  return false;
+}
+
+// Archive constructor - this is the only constructor that gets used for the
+// Archive class. Everything else (default,copy) is deprecated. This just
+// initializes and maps the file into memory, if requested.
+Archive::Archive(const sys::Path& filename, LLVMContext& C)
+  : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(),
+    symTabSize(0), firstFileOffset(0), modules(), foreignST(0), Context(C) {
+}
+
+bool
+Archive::mapToMemory(std::string* ErrMsg) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(archPath.c_str(), File)) {
+    if (ErrMsg)
+      *ErrMsg = ec.message();
+    return true;
+  }
+  mapfile = File.take();
+  base = mapfile->getBufferStart();
+  return false;
+}
+
+void Archive::cleanUpMemory() {
+  // Shutdown the file mapping
+  delete mapfile;
+  mapfile = 0;
+  base = 0;
+
+  // Forget the entire symbol table
+  symTab.clear();
+  symTabSize = 0;
+
+  firstFileOffset = 0;
+
+  // Free the foreign symbol table member
+  if (foreignST) {
+    delete foreignST;
+    foreignST = 0;
+  }
+
+  // Delete any Modules and ArchiveMember's we've allocated as a result of
+  // symbol table searches.
+  for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
+    delete I->second.first;
+    delete I->second.second;
+  }
+}
+
+// Archive destructor - just clean up memory
+Archive::~Archive() {
+  cleanUpMemory();
+}
+
+
+
+static void getSymbols(Module*M, std::vector<std::string>& symbols) {
+  // Loop over global variables
+  for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
+    if (!GI->isDeclaration() && !GI->hasLocalLinkage())
+      if (!GI->getName().empty())
+        symbols.push_back(GI->getName());
+
+  // Loop over functions
+  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+    if (!FI->isDeclaration() && !FI->hasLocalLinkage())
+      if (!FI->getName().empty())
+        symbols.push_back(FI->getName());
+
+  // Loop over aliases
+  for (Module::alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+       AI != AE; ++AI) {
+    if (AI->hasName())
+      symbols.push_back(AI->getName());
+  }
+}
+
+// Get just the externally visible defined symbols from the bitcode
+bool llvm::GetBitcodeSymbols(const sys::Path& fName,
+                             LLVMContext& Context,
+                             std::vector<std::string>& symbols,
+                             std::string* ErrMsg) {
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(fName.c_str(), Buffer)) {
+    if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'" + ": "
+                        + ec.message();
+    return true;
+  }
+
+  Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+  if (!M)
+    return true;
+
+  // Get the symbols
+  getSymbols(M, symbols);
+
+  // Done with the module.
+  delete M;
+  return true;
+}
+
+Module*
+llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
+                        const std::string& ModuleID,
+                        LLVMContext& Context,
+                        std::vector<std::string>& symbols,
+                        std::string* ErrMsg) {
+  // Get the module.
+  OwningPtr<MemoryBuffer> Buffer(
+    MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
+
+  Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+  if (!M)
+    return 0;
+
+  // Get the symbols
+  getSymbols(M, symbols);
+
+  // Done with the module. Note that it's the caller's responsibility to delete
+  // the Module.
+  return M;
+}
diff --git a/contrib/llvm/lib/Archive/ArchiveInternals.h b/contrib/llvm/lib/Archive/ArchiveInternals.h
new file mode 100644
index 000000000000..55684f7023d2
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveInternals.h
@@ -0,0 +1,89 @@
+//===-- lib/Archive/ArchiveInternals.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Internal implementation header for LLVM Archive files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H
+#define LIB_ARCHIVE_ARCHIVEINTERNALS_H
+
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include <cstring>
+
+#define ARFILE_MAGIC "!<arch>\n"                   ///< magic string
+#define ARFILE_MAGIC_LEN (sizeof(ARFILE_MAGIC)-1)  ///< length of magic string
+#define ARFILE_SVR4_SYMTAB_NAME "/               " ///< SVR4 symtab entry name
+#define ARFILE_LLVM_SYMTAB_NAME "#_LLVM_SYM_TAB_#" ///< LLVM symtab entry name
+#define ARFILE_BSD4_SYMTAB_NAME "__.SYMDEF SORTED" ///< BSD4 symtab entry name
+#define ARFILE_STRTAB_NAME      "//              " ///< Name of string table
+#define ARFILE_PAD "\n"                            ///< inter-file align padding
+#define ARFILE_MEMBER_MAGIC "`\n"                  ///< fmag field magic #
+
+namespace llvm {
+
+  class LLVMContext;
+
+  /// The ArchiveMemberHeader structure is used internally for bitcode
+  /// archives.
+  /// The header precedes each file member in the archive. This structure is
+  /// defined using character arrays for direct and correct interpretation
+  /// regardless of the endianess of the machine that produced it.
+  /// @brief Archive File Member Header
+  class ArchiveMemberHeader {
+    /// @name Data
+    /// @{
+    public:
+      char name[16];  ///< Name of the file member.
+      char date[12];  ///< File date, decimal seconds since Epoch
+      char uid[6];    ///< user id in ASCII decimal
+      char gid[6];    ///< group id in ASCII decimal
+      char mode[8];   ///< file mode in ASCII octal
+      char size[10];  ///< file size in ASCII decimal
+      char fmag[2];   ///< Always contains ARFILE_MAGIC_TERMINATOR
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+    void init() {
+      memset(name,' ',16);
+      memset(date,' ',12);
+      memset(uid,' ',6);
+      memset(gid,' ',6);
+      memset(mode,' ',8);
+      memset(size,' ',10);
+      fmag[0] = '`';
+      fmag[1] = '\n';
+    }
+
+    bool checkSignature() {
+      return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2);
+    }
+  };
+  
+  // Get just the externally visible defined symbols from the bitcode
+  bool GetBitcodeSymbols(const sys::Path& fName,
+                          LLVMContext& Context,
+                          std::vector<std::string>& symbols,
+                          std::string* ErrMsg);
+  
+  Module* GetBitcodeSymbols(const char *Buffer, unsigned Length,
+                            const std::string& ModuleID,
+                            LLVMContext& Context,
+                            std::vector<std::string>& symbols,
+                            std::string* ErrMsg);
+}
+
+#endif
+
+// vim: sw=2 ai
diff --git a/contrib/llvm/lib/Archive/ArchiveReader.cpp b/contrib/llvm/lib/Archive/ArchiveReader.cpp
new file mode 100644
index 000000000000..eef6fe0b1c1d
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveReader.cpp
@@ -0,0 +1,630 @@
+//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up standard unix archive files (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Module.h"
+#include <cstdlib>
+#include <memory>
+using namespace llvm;
+
+/// Read a variable-bit-rate encoded unsigned integer
+static inline unsigned readInteger(const char*&At, const char*End) {
+  unsigned Shift = 0;
+  unsigned Result = 0;
+
+  do {
+    if (At == End)
+      return Result;
+    Result |= (unsigned)((*At++) & 0x7F) << Shift;
+    Shift += 7;
+  } while (At[-1] & 0x80);
+  return Result;
+}
+
+// Completely parse the Archive's symbol table and populate symTab member var.
+bool
+Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) {
+  const char* At = (const char*) data;
+  const char* End = At + size;
+  while (At < End) {
+    unsigned offset = readInteger(At, End);
+    if (At == End) {
+      if (error)
+        *error = "Ran out of data reading vbr_uint for symtab offset!";
+      return false;
+    }
+    unsigned length = readInteger(At, End);
+    if (At == End) {
+      if (error)
+        *error = "Ran out of data reading vbr_uint for symtab length!";
+      return false;
+    }
+    if (At + length > End) {
+      if (error)
+        *error = "Malformed symbol table: length not consistent with size";
+      return false;
+    }
+    // we don't care if it can't be inserted (duplicate entry)
+    symTab.insert(std::make_pair(std::string(At, length), offset));
+    At += length;
+  }
+  symTabSize = size;
+  return true;
+}
+
+// This member parses an ArchiveMemberHeader that is presumed to be pointed to
+// by At. The At pointer is updated to the byte just after the header, which
+// can be variable in size.
+ArchiveMember*
+Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
+{
+  if (At + sizeof(ArchiveMemberHeader) >= End) {
+    if (error)
+      *error = "Unexpected end of file";
+    return 0;
+  }
+
+  // Cast archive member header
+  ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
+  At += sizeof(ArchiveMemberHeader);
+
+  // Extract the size and determine if the file is
+  // compressed or not (negative length).
+  int flags = 0;
+  int MemberSize = atoi(Hdr->size);
+  if (MemberSize < 0) {
+    flags |= ArchiveMember::CompressedFlag;
+    MemberSize = -MemberSize;
+  }
+
+  // Check the size of the member for sanity
+  if (At + MemberSize > End) {
+    if (error)
+      *error = "invalid member length in archive file";
+    return 0;
+  }
+
+  // Check the member signature
+  if (!Hdr->checkSignature()) {
+    if (error)
+      *error = "invalid file member signature";
+    return 0;
+  }
+
+  // Convert and check the member name
+  // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
+  // table. The special name "//" and 14 blanks is for a string table, used
+  // for long file names. This library doesn't generate either of those but
+  // it will accept them. If the name starts with #1/ and the remainder is
+  // digits, then those digits specify the length of the name that is
+  // stored immediately following the header. The special name
+  // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode.
+  // Anything else is a regular, short filename that is terminated with
+  // a '/' and blanks.
+
+  std::string pathname;
+  switch (Hdr->name[0]) {
+    case '#':
+      if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
+        if (isdigit(Hdr->name[3])) {
+          unsigned len = atoi(&Hdr->name[3]);
+          const char *nulp = (const char *)memchr(At, '\0', len);
+          pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len);
+          At += len;
+          MemberSize -= len;
+          flags |= ArchiveMember::HasLongFilenameFlag;
+        } else {
+          if (error)
+            *error = "invalid long filename";
+          return 0;
+        }
+      } else if (Hdr->name[1] == '_' &&
+                 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) {
+        // The member is using a long file name (>15 chars) format.
+        // This format is standard for 4.4BSD and Mac OSX operating
+        // systems. LLVM uses it similarly. In this format, the
+        // remainder of the name field (after #1/) specifies the
+        // length of the file name which occupy the first bytes of
+        // the member's data. The pathname already has the #1/ stripped.
+        pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
+        flags |= ArchiveMember::LLVMSymbolTableFlag;
+      }
+      break;
+    case '/':
+      if (Hdr->name[1]== '/') {
+        if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) {
+          pathname.assign(ARFILE_STRTAB_NAME);
+          flags |= ArchiveMember::StringTableFlag;
+        } else {
+          if (error)
+            *error = "invalid string table name";
+          return 0;
+        }
+      } else if (Hdr->name[1] == ' ') {
+        if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) {
+          pathname.assign(ARFILE_SVR4_SYMTAB_NAME);
+          flags |= ArchiveMember::SVR4SymbolTableFlag;
+        } else {
+          if (error)
+            *error = "invalid SVR4 symbol table name";
+          return 0;
+        }
+      } else if (isdigit(Hdr->name[1])) {
+        unsigned index = atoi(&Hdr->name[1]);
+        if (index < strtab.length()) {
+          const char* namep = strtab.c_str() + index;
+          const char* endp = strtab.c_str() + strtab.length();
+          const char* p = namep;
+          const char* last_p = p;
+          while (p < endp) {
+            if (*p == '\n' && *last_p == '/') {
+              pathname.assign(namep, last_p - namep);
+              flags |= ArchiveMember::HasLongFilenameFlag;
+              break;
+            }
+            last_p = p;
+            p++;
+          }
+          if (p >= endp) {
+            if (error)
+              *error = "missing name termiantor in string table";
+            return 0;
+          }
+        } else {
+          if (error)
+            *error = "name index beyond string table";
+          return 0;
+        }
+      }
+      break;
+    case '_':
+      if (Hdr->name[1] == '_' &&
+          (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) {
+        pathname.assign(ARFILE_BSD4_SYMTAB_NAME);
+        flags |= ArchiveMember::BSD4SymbolTableFlag;
+        break;
+      }
+      /* FALL THROUGH */
+
+    default:
+      char* slash = (char*) memchr(Hdr->name, '/', 16);
+      if (slash == 0)
+        slash = Hdr->name + 16;
+      pathname.assign(Hdr->name, slash - Hdr->name);
+      break;
+  }
+
+  // Determine if this is a bitcode file
+  switch (sys::IdentifyFileType(At, 4)) {
+    case sys::Bitcode_FileType:
+      flags |= ArchiveMember::BitcodeFlag;
+      break;
+    default:
+      flags &= ~ArchiveMember::BitcodeFlag;
+      break;
+  }
+
+  // Instantiate the ArchiveMember to be filled
+  ArchiveMember* member = new ArchiveMember(this);
+
+  // Fill in fields of the ArchiveMember
+  member->parent = this;
+  member->path.set(pathname);
+  member->info.fileSize = MemberSize;
+  member->info.modTime.fromEpochTime(atoi(Hdr->date));
+  unsigned int mode;
+  sscanf(Hdr->mode, "%o", &mode);
+  member->info.mode = mode;
+  member->info.user = atoi(Hdr->uid);
+  member->info.group = atoi(Hdr->gid);
+  member->flags = flags;
+  member->data = At;
+
+  return member;
+}
+
+bool
+Archive::checkSignature(std::string* error) {
+  // Check the magic string at file's header
+  if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) {
+    if (error)
+      *error = "invalid signature for an archive file";
+    return false;
+  }
+  return true;
+}
+
+// This function loads the entire archive and fully populates its ilist with
+// the members of the archive file. This is typically used in preparation for
+// editing the contents of the archive.
+bool
+Archive::loadArchive(std::string* error) {
+
+  // Set up parsing
+  members.clear();
+  symTab.clear();
+  const char *At = base;
+  const char *End = mapfile->getBufferEnd();
+
+  if (!checkSignature(error))
+    return false;
+
+  At += 8;  // Skip the magic string.
+
+  bool seenSymbolTable = false;
+  bool foundFirstFile = false;
+  while (At < End) {
+    // parse the member header
+    const char* Save = At;
+    ArchiveMember* mbr = parseMemberHeader(At, End, error);
+    if (!mbr)
+      return false;
+
+    // check if this is the foreign symbol table
+    if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+      // We just save this but don't do anything special
+      // with it. It doesn't count as the "first file".
+      if (foreignST) {
+        // What? Multiple foreign symbol tables? Just chuck it
+        // and retain the last one found.
+        delete foreignST;
+      }
+      foreignST = mbr;
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+    } else if (mbr->isStringTable()) {
+      // Simply suck the entire string table into a string
+      // variable. This will be used to get the names of the
+      // members that use the "/ddd" format for their names
+      // (SVR4 style long names).
+      strtab.assign(At, mbr->getSize());
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+      delete mbr;
+    } else if (mbr->isLLVMSymbolTable()) {
+      // This is the LLVM symbol table for the archive. If we've seen it
+      // already, its an error. Otherwise, parse the symbol table and move on.
+      if (seenSymbolTable) {
+        if (error)
+          *error = "invalid archive: multiple symbol tables";
+        return false;
+      }
+      if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error))
+        return false;
+      seenSymbolTable = true;
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+      delete mbr; // We don't need this member in the list of members.
+    } else {
+      // This is just a regular file. If its the first one, save its offset.
+      // Otherwise just push it on the list and move on to the next file.
+      if (!foundFirstFile) {
+        firstFileOffset = Save - base;
+        foundFirstFile = true;
+      }
+      members.push_back(mbr);
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+    }
+  }
+  return true;
+}
+
+// Open and completely load the archive file.
+Archive*
+Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, 
+                     std::string* ErrorMessage) {
+  std::auto_ptr<Archive> result ( new Archive(file, C));
+  if (result->mapToMemory(ErrorMessage))
+    return 0;
+  if (!result->loadArchive(ErrorMessage))
+    return 0;
+  return result.release();
+}
+
+// Get all the bitcode modules from the archive
+bool
+Archive::getAllModules(std::vector<Module*>& Modules,
+                       std::string* ErrMessage) {
+
+  for (iterator I=begin(), E=end(); I != E; ++I) {
+    if (I->isBitcode()) {
+      std::string FullMemberName = archPath.str() +
+        "(" + I->getPath().str() + ")";
+      MemoryBuffer *Buffer =
+        MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                       FullMemberName.c_str());
+      
+      Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
+      delete Buffer;
+      if (!M)
+        return true;
+
+      Modules.push_back(M);
+    }
+  }
+  return false;
+}
+
+// Load just the symbol table from the archive file
+bool
+Archive::loadSymbolTable(std::string* ErrorMsg) {
+
+  // Set up parsing
+  members.clear();
+  symTab.clear();
+  const char *At = base;
+  const char *End = mapfile->getBufferEnd();
+
+  // Make sure we're dealing with an archive
+  if (!checkSignature(ErrorMsg))
+    return false;
+
+  At += 8; // Skip signature
+
+  // Parse the first file member header
+  const char* FirstFile = At;
+  ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg);
+  if (!mbr)
+    return false;
+
+  if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+    // Skip the foreign symbol table, we don't do anything with it
+    At += mbr->getSize();
+    if ((intptr_t(At) & 1) == 1)
+      At++;
+    delete mbr;
+
+    // Read the next one
+    FirstFile = At;
+    mbr = parseMemberHeader(At, End, ErrorMsg);
+    if (!mbr) {
+      delete mbr;
+      return false;
+    }
+  }
+
+  if (mbr->isStringTable()) {
+    // Process the string table entry
+    strtab.assign((const char*)mbr->getData(), mbr->getSize());
+    At += mbr->getSize();
+    if ((intptr_t(At) & 1) == 1)
+      At++;
+    delete mbr;
+    // Get the next one
+    FirstFile = At;
+    mbr = parseMemberHeader(At, End, ErrorMsg);
+    if (!mbr) {
+      delete mbr;
+      return false;
+    }
+  }
+
+  // See if its the symbol table
+  if (mbr->isLLVMSymbolTable()) {
+    if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) {
+      delete mbr;
+      return false;
+    }
+
+    At += mbr->getSize();
+    if ((intptr_t(At) & 1) == 1)
+      At++;
+    delete mbr;
+    // Can't be any more symtab headers so just advance
+    FirstFile = At;
+  } else {
+    // There's no symbol table in the file. We have to rebuild it from scratch
+    // because the intent of this method is to get the symbol table loaded so
+    // it can be searched efficiently.
+    // Add the member to the members list
+    members.push_back(mbr);
+  }
+
+  firstFileOffset = FirstFile - base;
+  return true;
+}
+
+// Open the archive and load just the symbol tables
+Archive* Archive::OpenAndLoadSymbols(const sys::Path& file,
+                                     LLVMContext& C,
+                                     std::string* ErrorMessage) {
+  std::auto_ptr<Archive> result ( new Archive(file, C) );
+  if (result->mapToMemory(ErrorMessage))
+    return 0;
+  if (!result->loadSymbolTable(ErrorMessage))
+    return 0;
+  return result.release();
+}
+
+// Look up one symbol in the symbol table and return the module that defines
+// that symbol.
+Module*
+Archive::findModuleDefiningSymbol(const std::string& symbol, 
+                                  std::string* ErrMsg) {
+  SymTabType::iterator SI = symTab.find(symbol);
+  if (SI == symTab.end())
+    return 0;
+
+  // The symbol table was previously constructed assuming that the members were
+  // written without the symbol table header. Because VBR encoding is used, the
+  // values could not be adjusted to account for the offset of the symbol table
+  // because that could affect the size of the symbol table due to VBR encoding.
+  // We now have to account for this by adjusting the offset by the size of the
+  // symbol table and its header.
+  unsigned fileOffset =
+    SI->second +                // offset in symbol-table-less file
+    firstFileOffset;            // add offset to first "real" file in archive
+
+  // See if the module is already loaded
+  ModuleMap::iterator MI = modules.find(fileOffset);
+  if (MI != modules.end())
+    return MI->second.first;
+
+  // Module hasn't been loaded yet, we need to load it
+  const char* modptr = base + fileOffset;
+  ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(),
+                                         ErrMsg);
+  if (!mbr)
+    return 0;
+
+  // Now, load the bitcode module to get the Module.
+  std::string FullMemberName = archPath.str() + "(" +
+    mbr->getPath().str() + ")";
+  MemoryBuffer *Buffer =
+    MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()),
+                                   FullMemberName.c_str());
+  
+  Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+  if (!m)
+    return 0;
+
+  modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr)));
+
+  return m;
+}
+
+// Look up multiple symbols in the symbol table and return a set of
+// Modules that define those symbols.
+bool
+Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
+                                    std::set<Module*>& result,
+                                    std::string* error) {
+  if (!mapfile || !base) {
+    if (error)
+      *error = "Empty archive invalid for finding modules defining symbols";
+    return false;
+  }
+
+  if (symTab.empty()) {
+    // We don't have a symbol table, so we must build it now but lets also
+    // make sure that we populate the modules table as we do this to ensure
+    // that we don't load them twice when findModuleDefiningSymbol is called
+    // below.
+
+    // Get a pointer to the first file
+    const char* At  = base + firstFileOffset;
+    const char* End = mapfile->getBufferEnd();
+
+    while ( At < End) {
+      // Compute the offset to be put in the symbol table
+      unsigned offset = At - base - firstFileOffset;
+
+      // Parse the file's header
+      ArchiveMember* mbr = parseMemberHeader(At, End, error);
+      if (!mbr)
+        return false;
+
+      // If it contains symbols
+      if (mbr->isBitcode()) {
+        // Get the symbols
+        std::vector<std::string> symbols;
+        std::string FullMemberName = archPath.str() + "(" +
+          mbr->getPath().str() + ")";
+        Module* M = 
+          GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context,
+                            symbols, error);
+
+        if (M) {
+          // Insert the module's symbols into the symbol table
+          for (std::vector<std::string>::iterator I = symbols.begin(),
+               E=symbols.end(); I != E; ++I ) {
+            symTab.insert(std::make_pair(*I, offset));
+          }
+          // Insert the Module and the ArchiveMember into the table of
+          // modules.
+          modules.insert(std::make_pair(offset, std::make_pair(M, mbr)));
+        } else {
+          if (error)
+            *error = "Can't parse bitcode member: " + 
+              mbr->getPath().str() + ": " + *error;
+          delete mbr;
+          return false;
+        }
+      }
+
+      // Go to the next file location
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+    }
+  }
+
+  // At this point we have a valid symbol table (one way or another) so we
+  // just use it to quickly find the symbols requested.
+
+  for (std::set<std::string>::iterator I=symbols.begin(),
+       E=symbols.end(); I != E;) {
+    // See if this symbol exists
+    Module* m = findModuleDefiningSymbol(*I,error);
+    if (m) {
+      // The symbol exists, insert the Module into our result, duplicates will
+      // be ignored.
+      result.insert(m);
+
+      // Remove the symbol now that its been resolved, being careful to
+      // post-increment the iterator.
+      symbols.erase(I++);
+    } else {
+      ++I;
+    }
+  }
+  return true;
+}
+
+bool Archive::isBitcodeArchive() {
+  // Make sure the symTab has been loaded. In most cases this should have been
+  // done when the archive was constructed, but still,  this is just in case.
+  if (symTab.empty())
+    if (!loadSymbolTable(0))
+      return false;
+
+  // Now that we know it's been loaded, return true
+  // if it has a size
+  if (symTab.size()) return true;
+
+  // We still can't be sure it isn't a bitcode archive
+  if (!loadArchive(0))
+    return false;
+
+  std::vector<Module *> Modules;
+  std::string ErrorMessage;
+
+  // Scan the archive, trying to load a bitcode member.  We only load one to
+  // see if this works.
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    if (!I->isBitcode())
+      continue;
+    
+    std::string FullMemberName = 
+      archPath.str() + "(" + I->getPath().str() + ")";
+
+    MemoryBuffer *Buffer =
+      MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                     FullMemberName.c_str());
+    Module *M = ParseBitcodeFile(Buffer, Context);
+    delete Buffer;
+    if (!M)
+      return false;  // Couldn't parse bitcode, not a bitcode archive.
+    delete M;
+    return true;
+  }
+  
+  return false;
+}
diff --git a/contrib/llvm/lib/Archive/ArchiveWriter.cpp b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
new file mode 100644
index 000000000000..8fcc7aa29cc8
--- /dev/null
+++ b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
@@ -0,0 +1,489 @@
+//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up an LLVM archive file (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <fstream>
+#include <ostream>
+#include <iomanip>
+using namespace llvm;
+
+// Write an integer using variable bit rate encoding. This saves a few bytes
+// per entry in the symbol table.
+static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
+  while (1) {
+    if (num < 0x80) { // done?
+      ARFile << (unsigned char)num;
+      return;
+    }
+
+    // Nope, we are bigger than a character, output the next 7 bits and set the
+    // high bit to say that there is more coming...
+    ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F));
+    num >>= 7;  // Shift out 7 bits now...
+  }
+}
+
+// Compute how many bytes are taken by a given VBR encoded value. This is needed
+// to pre-compute the size of the symbol table.
+static inline unsigned numVbrBytes(unsigned num) {
+
+  // Note that the following nested ifs are somewhat equivalent to a binary
+  // search. We split it in half by comparing against 2^14 first. This allows
+  // most reasonable values to be done in 2 comparisons instead of 1 for
+  // small ones and four for large ones. We expect this to access file offsets
+  // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range,
+  // so this approach is reasonable.
+  if (num < 1<<14) {
+    if (num < 1<<7)
+      return 1;
+    else
+      return 2;
+  }
+  if (num < 1<<21)
+    return 3;
+
+  if (num < 1<<28)
+    return 4;
+  return 5; // anything >= 2^28 takes 5 bytes
+}
+
+// Create an empty archive.
+Archive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) {
+  Archive* result = new Archive(FilePath, C);
+  return result;
+}
+
+// Fill the ArchiveMemberHeader with the information from a member. If
+// TruncateNames is true, names are flattened to 15 chars or less. The sz field
+// is provided here instead of coming from the mbr because the member might be
+// stored compressed and the compressed size is not the ArchiveMember's size.
+// Furthermore compressed files have negative size fields to identify them as
+// compressed.
+bool
+Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
+                    int sz, bool TruncateNames) const {
+
+  // Set the permissions mode, uid and gid
+  hdr.init();
+  char buffer[32];
+  sprintf(buffer, "%-8o", mbr.getMode());
+  memcpy(hdr.mode,buffer,8);
+  sprintf(buffer,  "%-6u", mbr.getUser());
+  memcpy(hdr.uid,buffer,6);
+  sprintf(buffer,  "%-6u", mbr.getGroup());
+  memcpy(hdr.gid,buffer,6);
+
+  // Set the last modification date
+  uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime();
+  sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+  memcpy(hdr.date,buffer,12);
+
+  // Get rid of trailing blanks in the name
+  std::string mbrPath = mbr.getPath().str();
+  size_t mbrLen = mbrPath.length();
+  while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') {
+    mbrPath.erase(mbrLen-1,1);
+    mbrLen--;
+  }
+
+  // Set the name field in one of its various flavors.
+  bool writeLongName = false;
+  if (mbr.isStringTable()) {
+    memcpy(hdr.name,ARFILE_STRTAB_NAME,16);
+  } else if (mbr.isSVR4SymbolTable()) {
+    memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16);
+  } else if (mbr.isBSD4SymbolTable()) {
+    memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16);
+  } else if (mbr.isLLVMSymbolTable()) {
+    memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+  } else if (TruncateNames) {
+    const char* nm = mbrPath.c_str();
+    unsigned len = mbrPath.length();
+    size_t slashpos = mbrPath.rfind('/');
+    if (slashpos != std::string::npos) {
+      nm += slashpos + 1;
+      len -= slashpos +1;
+    }
+    if (len > 15)
+      len = 15;
+    memcpy(hdr.name,nm,len);
+    hdr.name[len] = '/';
+  } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) {
+    memcpy(hdr.name,mbrPath.c_str(),mbrPath.length());
+    hdr.name[mbrPath.length()] = '/';
+  } else {
+    std::string nm = "#1/";
+    nm += utostr(mbrPath.length());
+    memcpy(hdr.name,nm.data(),nm.length());
+    if (sz < 0)
+      sz -= mbrPath.length();
+    else
+      sz += mbrPath.length();
+    writeLongName = true;
+  }
+
+  // Set the size field
+  if (sz < 0) {
+    buffer[0] = '-';
+    sprintf(&buffer[1],"%-9u",(unsigned)-sz);
+  } else {
+    sprintf(buffer, "%-10u", (unsigned)sz);
+  }
+  memcpy(hdr.size,buffer,10);
+
+  return writeLongName;
+}
+
+// Insert a file into the archive before some other member. This also takes care
+// of extracting the necessary flags and information from the file.
+bool
+Archive::addFileBefore(const sys::Path& filePath, iterator where,
+                        std::string* ErrMsg) {
+  bool Exists;
+  if (sys::fs::exists(filePath.str(), Exists) || !Exists) {
+    if (ErrMsg)
+      *ErrMsg = "Can not add a non-existent file to archive";
+    return true;
+  }
+
+  ArchiveMember* mbr = new ArchiveMember(this);
+
+  mbr->data = 0;
+  mbr->path = filePath;
+  const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
+  if (!FSInfo) {
+    delete mbr;
+    return true;
+  }
+  mbr->info = *FSInfo;
+
+  unsigned flags = 0;
+  bool hasSlash = filePath.str().find('/') != std::string::npos;
+  if (hasSlash)
+    flags |= ArchiveMember::HasPathFlag;
+  if (hasSlash || filePath.str().length() > 15)
+    flags |= ArchiveMember::HasLongFilenameFlag;
+
+  sys::LLVMFileType type;
+  if (sys::fs::identify_magic(mbr->path.str(), type))
+    type = sys::Unknown_FileType;
+  switch (type) {
+    case sys::Bitcode_FileType:
+      flags |= ArchiveMember::BitcodeFlag;
+      break;
+    default:
+      break;
+  }
+  mbr->flags = flags;
+  members.insert(where,mbr);
+  return false;
+}
+
+// Write one member out to the file.
+bool
+Archive::writeMember(
+  const ArchiveMember& member,
+  std::ofstream& ARFile,
+  bool CreateSymbolTable,
+  bool TruncateNames,
+  bool ShouldCompress,
+  std::string* ErrMsg
+) {
+
+  unsigned filepos = ARFile.tellp();
+  filepos -= 8;
+
+  // Get the data and its size either from the
+  // member's in-memory data or directly from the file.
+  size_t fSize = member.getSize();
+  const char *data = (const char*)member.getData();
+  MemoryBuffer *mFile = 0;
+  if (!data) {
+    OwningPtr<MemoryBuffer> File;
+    if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) {
+      if (ErrMsg)
+        *ErrMsg = ec.message();
+      return true;
+    }
+    mFile = File.take();
+    data = mFile->getBufferStart();
+    fSize = mFile->getBufferSize();
+  }
+
+  // Now that we have the data in memory, update the
+  // symbol table if it's a bitcode file.
+  if (CreateSymbolTable && member.isBitcode()) {
+    std::vector<std::string> symbols;
+    std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
+      + ")";
+    Module* M =
+      GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
+
+    // If the bitcode parsed successfully
+    if ( M ) {
+      for (std::vector<std::string>::iterator SI = symbols.begin(),
+           SE = symbols.end(); SI != SE; ++SI) {
+
+        std::pair<SymTabType::iterator,bool> Res =
+          symTab.insert(std::make_pair(*SI,filepos));
+
+        if (Res.second) {
+          symTabSize += SI->length() +
+                        numVbrBytes(SI->length()) +
+                        numVbrBytes(filepos);
+        }
+      }
+      // We don't need this module any more.
+      delete M;
+    } else {
+      delete mFile;
+      if (ErrMsg)
+        *ErrMsg = "Can't parse bitcode member: " + member.getPath().str()
+          + ": " + *ErrMsg;
+      return true;
+    }
+  }
+
+  int hdrSize = fSize;
+
+  // Compute the fields of the header
+  ArchiveMemberHeader Hdr;
+  bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames);
+
+  // Write header to archive file
+  ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+  // Write the long filename if its long
+  if (writeLongName) {
+    ARFile.write(member.getPath().str().data(),
+                 member.getPath().str().length());
+  }
+
+  // Write the (possibly compressed) member's content to the file.
+  ARFile.write(data,fSize);
+
+  // Make sure the member is an even length
+  if ((ARFile.tellp() & 1) == 1)
+    ARFile << ARFILE_PAD;
+
+  // Close the mapped file if it was opened
+  delete mFile;
+  return false;
+}
+
+// Write out the LLVM symbol table as an archive member to the file.
+void
+Archive::writeSymbolTable(std::ofstream& ARFile) {
+
+  // Construct the symbol table's header
+  ArchiveMemberHeader Hdr;
+  Hdr.init();
+  memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+  uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime();
+  char buffer[32];
+  sprintf(buffer, "%-8o", 0644);
+  memcpy(Hdr.mode,buffer,8);
+  sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId());
+  memcpy(Hdr.uid,buffer,6);
+  sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId());
+  memcpy(Hdr.gid,buffer,6);
+  sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+  memcpy(Hdr.date,buffer,12);
+  sprintf(buffer,"%-10u",symTabSize);
+  memcpy(Hdr.size,buffer,10);
+
+  // Write the header
+  ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+#ifndef NDEBUG
+  // Save the starting position of the symbol tables data content.
+  unsigned startpos = ARFile.tellp();
+#endif
+
+  // Write out the symbols sequentially
+  for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end();
+        I != E; ++I)
+  {
+    // Write out the file index
+    writeInteger(I->second, ARFile);
+    // Write out the length of the symbol
+    writeInteger(I->first.length(), ARFile);
+    // Write out the symbol
+    ARFile.write(I->first.data(), I->first.length());
+  }
+
+#ifndef NDEBUG
+  // Now that we're done with the symbol table, get the ending file position
+  unsigned endpos = ARFile.tellp();
+#endif
+
+  // Make sure that the amount we wrote is what we pre-computed. This is
+  // critical for file integrity purposes.
+  assert(endpos - startpos == symTabSize && "Invalid symTabSize computation");
+
+  // Make sure the symbol table is even sized
+  if (symTabSize % 2 != 0 )
+    ARFile << ARFILE_PAD;
+}
+
+// Write the entire archive to the file specified when the archive was created.
+// This writes to a temporary file first. Options are for creating a symbol
+// table, flattening the file names (no directories, 15 chars max) and
+// compressing each archive member.
+bool
+Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
+                     std::string* ErrMsg)
+{
+  // Make sure they haven't opened up the file, not loaded it,
+  // but are now trying to write it which would wipe out the file.
+  if (members.empty() && mapfile && mapfile->getBufferSize() > 8) {
+    if (ErrMsg)
+      *ErrMsg = "Can't write an archive not opened for writing";
+    return true;
+  }
+
+  // Create a temporary file to store the archive in
+  sys::Path TmpArchive = archPath;
+  if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
+    return true;
+
+  // Make sure the temporary gets removed if we crash
+  sys::RemoveFileOnSignal(TmpArchive);
+
+  // Create archive file for output.
+  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
+                               std::ios::binary;
+  std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
+
+  // Check for errors opening or creating archive file.
+  if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
+    TmpArchive.eraseFromDisk();
+    if (ErrMsg)
+      *ErrMsg = "Error opening archive file: " + archPath.str();
+    return true;
+  }
+
+  // If we're creating a symbol table, reset it now
+  if (CreateSymbolTable) {
+    symTabSize = 0;
+    symTab.clear();
+  }
+
+  // Write magic string to archive.
+  ArchiveFile << ARFILE_MAGIC;
+
+  // Loop over all member files, and write them out. Note that this also
+  // builds the symbol table, symTab.
+  for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
+    if (writeMember(*I, ArchiveFile, CreateSymbolTable,
+                     TruncateNames, Compress, ErrMsg)) {
+      TmpArchive.eraseFromDisk();
+      ArchiveFile.close();
+      return true;
+    }
+  }
+
+  // Close archive file.
+  ArchiveFile.close();
+
+  // Write the symbol table
+  if (CreateSymbolTable) {
+    // At this point we have written a file that is a legal archive but it
+    // doesn't have a symbol table in it. To aid in faster reading and to
+    // ensure compatibility with other archivers we need to put the symbol
+    // table first in the file. Unfortunately, this means mapping the file
+    // we just wrote back in and copying it to the destination file.
+    sys::Path FinalFilePath = archPath;
+
+    // Map in the archive we just wrote.
+    {
+    OwningPtr<MemoryBuffer> arch;
+    if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) {
+      if (ErrMsg)
+        *ErrMsg = ec.message();
+      return true;
+    }
+    const char* base = arch->getBufferStart();
+
+    // Open another temporary file in order to avoid invalidating the
+    // mmapped data
+    if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
+      return true;
+    sys::RemoveFileOnSignal(FinalFilePath);
+
+    std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
+    if (!FinalFile.is_open() || FinalFile.bad()) {
+      TmpArchive.eraseFromDisk();
+      if (ErrMsg)
+        *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
+      return true;
+    }
+
+    // Write the file magic number
+    FinalFile << ARFILE_MAGIC;
+
+    // If there is a foreign symbol table, put it into the file now. Most
+    // ar(1) implementations require the symbol table to be first but llvm-ar
+    // can deal with it being after a foreign symbol table. This ensures
+    // compatibility with other ar(1) implementations as well as allowing the
+    // archive to store both native .o and LLVM .bc files, both indexed.
+    if (foreignST) {
+      if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
+        FinalFile.close();
+        TmpArchive.eraseFromDisk();
+        return true;
+      }
+    }
+
+    // Put out the LLVM symbol table now.
+    writeSymbolTable(FinalFile);
+
+    // Copy the temporary file contents being sure to skip the file's magic
+    // number.
+    FinalFile.write(base + sizeof(ARFILE_MAGIC)-1,
+      arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1);
+
+    // Close up shop
+    FinalFile.close();
+    } // free arch.
+
+    // Move the final file over top of TmpArchive
+    if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
+      return true;
+  }
+
+  // Before we replace the actual archive, we need to forget all the
+  // members, since they point to data in that old archive. We need to do
+  // this because we cannot replace an open file on Windows.
+  cleanUpMemory();
+
+  if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
+    return true;
+
+  // Set correct read and write permissions after temporary file is moved
+  // to final destination path.
+  if (archPath.makeReadableOnDisk(ErrMsg))
+    return true;
+  if (archPath.makeWriteableOnDisk(ErrMsg))
+    return true;
+
+  return false;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
new file mode 100644
index 000000000000..3c63106e8c3b
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -0,0 +1,830 @@
+//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLLexer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
+  ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions.
+//===----------------------------------------------------------------------===//
+
+// atoull - Convert an ascii string of decimal digits into the unsigned long
+// long representation... this does not have to do input error checking,
+// because we know that the input will be matched by a suitable regex...
+//
+uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
+  uint64_t Result = 0;
+  for (; Buffer != End; Buffer++) {
+    uint64_t OldRes = Result;
+    Result *= 10;
+    Result += *Buffer-'0';
+    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
+      Error("constant bigger than 64 bits detected!");
+      return 0;
+    }
+  }
+  return Result;
+}
+
+uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
+  uint64_t Result = 0;
+  for (; Buffer != End; ++Buffer) {
+    uint64_t OldRes = Result;
+    Result *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Result += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Result += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Result += C-'a'+10;
+
+    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
+      Error("constant bigger than 64 bits detected!");
+      return 0;
+    }
+  }
+  return Result;
+}
+
+void LLLexer::HexToIntPair(const char *Buffer, const char *End,
+                           uint64_t Pair[2]) {
+  Pair[0] = 0;
+  for (int i=0; i<16; i++, Buffer++) {
+    assert(Buffer != End);
+    Pair[0] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[0] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[0] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[0] += C-'a'+10;
+  }
+  Pair[1] = 0;
+  for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
+    Pair[1] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[1] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[1] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[1] += C-'a'+10;
+  }
+  if (Buffer != End)
+    Error("constant bigger than 128 bits detected!");
+}
+
+/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
+/// { low64, high16 } as usual for an APInt.
+void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
+                           uint64_t Pair[2]) {
+  Pair[1] = 0;
+  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
+    assert(Buffer != End);
+    Pair[1] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[1] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[1] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[1] += C-'a'+10;
+  }
+  Pair[0] = 0;
+  for (int i=0; i<16; i++, Buffer++) {
+    Pair[0] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[0] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[0] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[0] += C-'a'+10;
+  }
+  if (Buffer != End)
+    Error("constant bigger than 128 bits detected!");
+}
+
+// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
+// appropriate character.
+static void UnEscapeLexed(std::string &Str) {
+  if (Str.empty()) return;
+
+  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
+  char *BOut = Buffer;
+  for (char *BIn = Buffer; BIn != EndBuffer; ) {
+    if (BIn[0] == '\\') {
+      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
+        *BOut++ = '\\'; // Two \ becomes one
+        BIn += 2;
+      } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
+        char Tmp = BIn[3]; BIn[3] = 0;      // Terminate string
+        *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
+        BIn[3] = Tmp;                       // Restore character
+        BIn += 3;                           // Skip over handled chars
+        ++BOut;
+      } else {
+        *BOut++ = *BIn++;
+      }
+    } else {
+      *BOut++ = *BIn++;
+    }
+  }
+  Str.resize(BOut-Buffer);
+}
+
+/// isLabelChar - Return true for [-a-zA-Z$._0-9].
+static bool isLabelChar(char C) {
+  return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
+}
+
+
+/// isLabelTail - Return true if this pointer points to a valid end of a label.
+static const char *isLabelTail(const char *CurPtr) {
+  while (1) {
+    if (CurPtr[0] == ':') return CurPtr+1;
+    if (!isLabelChar(CurPtr[0])) return 0;
+    ++CurPtr;
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Lexer definition.
+//===----------------------------------------------------------------------===//
+
+LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
+                 LLVMContext &C)
+  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
+  CurPtr = CurBuf->getBufferStart();
+}
+
+std::string LLLexer::getFilename() const {
+  return CurBuf->getBufferIdentifier();
+}
+
+int LLLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default: return (unsigned char)CurChar;
+  case 0:
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.
+    return EOF;
+  }
+}
+
+
+lltok::Kind LLLexer::LexToken() {
+  TokStart = CurPtr;
+
+  int CurChar = getNextChar();
+  switch (CurChar) {
+  default:
+    // Handle letters: [a-zA-Z_]
+    if (isalpha(CurChar) || CurChar == '_')
+      return LexIdentifier();
+
+    return lltok::Error;
+  case EOF: return lltok::Eof;
+  case 0:
+  case ' ':
+  case '\t':
+  case '\n':
+  case '\r':
+    // Ignore whitespace.
+    return LexToken();
+  case '+': return LexPositive();
+  case '@': return LexAt();
+  case '%': return LexPercent();
+  case '"': return LexQuote();
+  case '.':
+    if (const char *Ptr = isLabelTail(CurPtr)) {
+      CurPtr = Ptr;
+      StrVal.assign(TokStart, CurPtr-1);
+      return lltok::LabelStr;
+    }
+    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
+      CurPtr += 2;
+      return lltok::dotdotdot;
+    }
+    return lltok::Error;
+  case '$':
+    if (const char *Ptr = isLabelTail(CurPtr)) {
+      CurPtr = Ptr;
+      StrVal.assign(TokStart, CurPtr-1);
+      return lltok::LabelStr;
+    }
+    return lltok::Error;
+  case ';':
+    SkipLineComment();
+    return LexToken();
+  case '!': return LexExclaim();
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+  case '-':
+    return LexDigitOrNegative();
+  case '=': return lltok::equal;
+  case '[': return lltok::lsquare;
+  case ']': return lltok::rsquare;
+  case '{': return lltok::lbrace;
+  case '}': return lltok::rbrace;
+  case '<': return lltok::less;
+  case '>': return lltok::greater;
+  case '(': return lltok::lparen;
+  case ')': return lltok::rparen;
+  case ',': return lltok::comma;
+  case '*': return lltok::star;
+  case '\\': return lltok::backslash;
+  }
+}
+
+void LLLexer::SkipLineComment() {
+  while (1) {
+    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
+      return;
+  }
+}
+
+/// LexAt - Lex all tokens that start with an @ character:
+///   GlobalVar   @\"[^\"]*\"
+///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+///   GlobalVarID @[0-9]+
+lltok::Kind LLLexer::LexAt() {
+  // Handle AtStringConstant: @\"[^\"]*\"
+  if (CurPtr[0] == '"') {
+    ++CurPtr;
+
+    while (1) {
+      int CurChar = getNextChar();
+
+      if (CurChar == EOF) {
+        Error("end of file in global variable name");
+        return lltok::Error;
+      }
+      if (CurChar == '"') {
+        StrVal.assign(TokStart+2, CurPtr-1);
+        UnEscapeLexed(StrVal);
+        return lltok::GlobalVar;
+      }
+    }
+  }
+
+  // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+  if (ReadVarName())
+    return lltok::GlobalVar;
+
+  // Handle GlobalVarID: @[0-9]+
+  if (isdigit(CurPtr[0])) {
+    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+      /*empty*/;
+
+    uint64_t Val = atoull(TokStart+1, CurPtr);
+    if ((unsigned)Val != Val)
+      Error("invalid value number (too large)!");
+    UIntVal = unsigned(Val);
+    return lltok::GlobalID;
+  }
+
+  return lltok::Error;
+}
+
+/// ReadString - Read a string until the closing quote.
+lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
+  const char *Start = CurPtr;
+  while (1) {
+    int CurChar = getNextChar();
+
+    if (CurChar == EOF) {
+      Error("end of file in string constant");
+      return lltok::Error;
+    }
+    if (CurChar == '"') {
+      StrVal.assign(Start, CurPtr-1);
+      UnEscapeLexed(StrVal);
+      return kind;
+    }
+  }
+}
+
+/// ReadVarName - Read the rest of a token containing a variable name.
+bool LLLexer::ReadVarName() {
+  const char *NameStart = CurPtr;
+  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+      CurPtr[0] == '.' || CurPtr[0] == '_') {
+    ++CurPtr;
+    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+           CurPtr[0] == '.' || CurPtr[0] == '_')
+      ++CurPtr;
+
+    StrVal.assign(NameStart, CurPtr);
+    return true;
+  }
+  return false;
+}
+
+/// LexPercent - Lex all tokens that start with a % character:
+///   LocalVar   ::= %\"[^\"]*\"
+///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+///   LocalVarID ::= %[0-9]+
+lltok::Kind LLLexer::LexPercent() {
+  // Handle LocalVarName: %\"[^\"]*\"
+  if (CurPtr[0] == '"') {
+    ++CurPtr;
+    return ReadString(lltok::LocalVar);
+  }
+
+  // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+  if (ReadVarName())
+    return lltok::LocalVar;
+
+  // Handle LocalVarID: %[0-9]+
+  if (isdigit(CurPtr[0])) {
+    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+      /*empty*/;
+
+    uint64_t Val = atoull(TokStart+1, CurPtr);
+    if ((unsigned)Val != Val)
+      Error("invalid value number (too large)!");
+    UIntVal = unsigned(Val);
+    return lltok::LocalVarID;
+  }
+
+  return lltok::Error;
+}
+
+/// LexQuote - Lex all tokens that start with a " character:
+///   QuoteLabel        "[^"]+":
+///   StringConstant    "[^"]*"
+lltok::Kind LLLexer::LexQuote() {
+  lltok::Kind kind = ReadString(lltok::StringConstant);
+  if (kind == lltok::Error || kind == lltok::Eof)
+    return kind;
+
+  if (CurPtr[0] == ':') {
+    ++CurPtr;
+    kind = lltok::LabelStr;
+  }
+
+  return kind;
+}
+
+/// LexExclaim:
+///    !foo
+///    !
+lltok::Kind LLLexer::LexExclaim() {
+  // Lex a metadata name as a MetadataVar.
+  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
+    ++CurPtr;
+    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
+      ++CurPtr;
+
+    StrVal.assign(TokStart+1, CurPtr);   // Skip !
+    UnEscapeLexed(StrVal);
+    return lltok::MetadataVar;
+  }
+  return lltok::exclaim;
+}
+  
+/// LexIdentifier: Handle several related productions:
+///    Label           [-a-zA-Z$._0-9]+:
+///    IntegerType     i[0-9]+
+///    Keyword         sdiv, float, ...
+///    HexIntConstant  [us]0x[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexIdentifier() {
+  const char *StartChar = CurPtr;
+  const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
+  const char *KeywordEnd = 0;
+
+  for (; isLabelChar(*CurPtr); ++CurPtr) {
+    // If we decide this is an integer, remember the end of the sequence.
+    if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
+    if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
+  }
+
+  // If we stopped due to a colon, this really is a label.
+  if (*CurPtr == ':') {
+    StrVal.assign(StartChar-1, CurPtr++);
+    return lltok::LabelStr;
+  }
+
+  // Otherwise, this wasn't a label.  If this was valid as an integer type,
+  // return it.
+  if (IntEnd == 0) IntEnd = CurPtr;
+  if (IntEnd != StartChar) {
+    CurPtr = IntEnd;
+    uint64_t NumBits = atoull(StartChar, CurPtr);
+    if (NumBits < IntegerType::MIN_INT_BITS ||
+        NumBits > IntegerType::MAX_INT_BITS) {
+      Error("bitwidth for integer type out of range!");
+      return lltok::Error;
+    }
+    TyVal = IntegerType::get(Context, NumBits);
+    return lltok::Type;
+  }
+
+  // Otherwise, this was a letter sequence.  See which keyword this is.
+  if (KeywordEnd == 0) KeywordEnd = CurPtr;
+  CurPtr = KeywordEnd;
+  --StartChar;
+  unsigned Len = CurPtr-StartChar;
+#define KEYWORD(STR) \
+  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
+    return lltok::kw_##STR;
+
+  KEYWORD(true);    KEYWORD(false);
+  KEYWORD(declare); KEYWORD(define);
+  KEYWORD(global);  KEYWORD(constant);
+
+  KEYWORD(private);
+  KEYWORD(linker_private);
+  KEYWORD(linker_private_weak);
+  KEYWORD(linker_private_weak_def_auto);
+  KEYWORD(internal);
+  KEYWORD(available_externally);
+  KEYWORD(linkonce);
+  KEYWORD(linkonce_odr);
+  KEYWORD(weak);
+  KEYWORD(weak_odr);
+  KEYWORD(appending);
+  KEYWORD(dllimport);
+  KEYWORD(dllexport);
+  KEYWORD(common);
+  KEYWORD(default);
+  KEYWORD(hidden);
+  KEYWORD(protected);
+  KEYWORD(unnamed_addr);
+  KEYWORD(extern_weak);
+  KEYWORD(external);
+  KEYWORD(thread_local);
+  KEYWORD(zeroinitializer);
+  KEYWORD(undef);
+  KEYWORD(null);
+  KEYWORD(to);
+  KEYWORD(tail);
+  KEYWORD(target);
+  KEYWORD(triple);
+  KEYWORD(deplibs);
+  KEYWORD(datalayout);
+  KEYWORD(volatile);
+  KEYWORD(nuw);
+  KEYWORD(nsw);
+  KEYWORD(exact);
+  KEYWORD(inbounds);
+  KEYWORD(align);
+  KEYWORD(addrspace);
+  KEYWORD(section);
+  KEYWORD(alias);
+  KEYWORD(module);
+  KEYWORD(asm);
+  KEYWORD(sideeffect);
+  KEYWORD(alignstack);
+  KEYWORD(gc);
+
+  KEYWORD(ccc);
+  KEYWORD(fastcc);
+  KEYWORD(coldcc);
+  KEYWORD(x86_stdcallcc);
+  KEYWORD(x86_fastcallcc);
+  KEYWORD(x86_thiscallcc);
+  KEYWORD(arm_apcscc);
+  KEYWORD(arm_aapcscc);
+  KEYWORD(arm_aapcs_vfpcc);
+  KEYWORD(msp430_intrcc);
+  KEYWORD(ptx_kernel);
+  KEYWORD(ptx_device);
+
+  KEYWORD(cc);
+  KEYWORD(c);
+
+  KEYWORD(signext);
+  KEYWORD(zeroext);
+  KEYWORD(inreg);
+  KEYWORD(sret);
+  KEYWORD(nounwind);
+  KEYWORD(noreturn);
+  KEYWORD(noalias);
+  KEYWORD(nocapture);
+  KEYWORD(byval);
+  KEYWORD(nest);
+  KEYWORD(readnone);
+  KEYWORD(readonly);
+  KEYWORD(uwtable);
+
+  KEYWORD(inlinehint);
+  KEYWORD(noinline);
+  KEYWORD(alwaysinline);
+  KEYWORD(optsize);
+  KEYWORD(ssp);
+  KEYWORD(sspreq);
+  KEYWORD(noredzone);
+  KEYWORD(noimplicitfloat);
+  KEYWORD(naked);
+  KEYWORD(hotpatch);
+  KEYWORD(nonlazybind);
+
+  KEYWORD(type);
+  KEYWORD(opaque);
+
+  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
+  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
+  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
+  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
+
+  KEYWORD(x);
+  KEYWORD(blockaddress);
+#undef KEYWORD
+
+  // Keywords for types.
+#define TYPEKEYWORD(STR, LLVMTY) \
+  if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
+    TyVal = LLVMTY; return lltok::Type; }
+  TYPEKEYWORD("void",      Type::getVoidTy(Context));
+  TYPEKEYWORD("float",     Type::getFloatTy(Context));
+  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
+  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
+  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
+  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
+  TYPEKEYWORD("label",     Type::getLabelTy(Context));
+  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
+  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
+#undef TYPEKEYWORD
+
+  // Keywords for instructions.
+#define INSTKEYWORD(STR, Enum) \
+  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
+    UIntVal = Instruction::Enum; return lltok::kw_##STR; }
+
+  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
+  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
+  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
+  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
+  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
+  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
+  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
+  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
+
+  INSTKEYWORD(phi,         PHI);
+  INSTKEYWORD(call,        Call);
+  INSTKEYWORD(trunc,       Trunc);
+  INSTKEYWORD(zext,        ZExt);
+  INSTKEYWORD(sext,        SExt);
+  INSTKEYWORD(fptrunc,     FPTrunc);
+  INSTKEYWORD(fpext,       FPExt);
+  INSTKEYWORD(uitofp,      UIToFP);
+  INSTKEYWORD(sitofp,      SIToFP);
+  INSTKEYWORD(fptoui,      FPToUI);
+  INSTKEYWORD(fptosi,      FPToSI);
+  INSTKEYWORD(inttoptr,    IntToPtr);
+  INSTKEYWORD(ptrtoint,    PtrToInt);
+  INSTKEYWORD(bitcast,     BitCast);
+  INSTKEYWORD(select,      Select);
+  INSTKEYWORD(va_arg,      VAArg);
+  INSTKEYWORD(ret,         Ret);
+  INSTKEYWORD(br,          Br);
+  INSTKEYWORD(switch,      Switch);
+  INSTKEYWORD(indirectbr,  IndirectBr);
+  INSTKEYWORD(invoke,      Invoke);
+  INSTKEYWORD(unwind,      Unwind);
+  INSTKEYWORD(unreachable, Unreachable);
+
+  INSTKEYWORD(alloca,      Alloca);
+  INSTKEYWORD(load,        Load);
+  INSTKEYWORD(store,       Store);
+  INSTKEYWORD(getelementptr, GetElementPtr);
+
+  INSTKEYWORD(extractelement, ExtractElement);
+  INSTKEYWORD(insertelement,  InsertElement);
+  INSTKEYWORD(shufflevector,  ShuffleVector);
+  INSTKEYWORD(extractvalue,   ExtractValue);
+  INSTKEYWORD(insertvalue,    InsertValue);
+#undef INSTKEYWORD
+
+  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
+  // the CFE to avoid forcing it to deal with 64-bit numbers.
+  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
+      TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
+    int len = CurPtr-TokStart-3;
+    uint32_t bits = len * 4;
+    APInt Tmp(bits, StringRef(TokStart+3, len), 16);
+    uint32_t activeBits = Tmp.getActiveBits();
+    if (activeBits > 0 && activeBits < bits)
+      Tmp = Tmp.trunc(activeBits);
+    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
+    return lltok::APSInt;
+  }
+
+  // If this is "cc1234", return this as just "cc".
+  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
+    CurPtr = TokStart+2;
+    return lltok::kw_cc;
+  }
+
+  // Finally, if this isn't known, return an error.
+  CurPtr = TokStart+1;
+  return lltok::Error;
+}
+
+
+/// Lex0x: Handle productions that start with 0x, knowing that it matches and
+/// that this is not a label:
+///    HexFPConstant     0x[0-9A-Fa-f]+
+///    HexFP80Constant   0xK[0-9A-Fa-f]+
+///    HexFP128Constant  0xL[0-9A-Fa-f]+
+///    HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::Lex0x() {
+  CurPtr = TokStart + 2;
+
+  char Kind;
+  if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
+    Kind = *CurPtr++;
+  } else {
+    Kind = 'J';
+  }
+
+  if (!isxdigit(CurPtr[0])) {
+    // Bad token, return it as an error.
+    CurPtr = TokStart+1;
+    return lltok::Error;
+  }
+
+  while (isxdigit(CurPtr[0]))
+    ++CurPtr;
+
+  if (Kind == 'J') {
+    // HexFPConstant - Floating point constant represented in IEEE format as a
+    // hexadecimal number for when exponential notation is not precise enough.
+    // Float and double only.
+    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
+    return lltok::APFloat;
+  }
+
+  uint64_t Pair[2];
+  switch (Kind) {
+  default: llvm_unreachable("Unknown kind!");
+  case 'K':
+    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
+    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
+    APFloatVal = APFloat(APInt(80, 2, Pair));
+    return lltok::APFloat;
+  case 'L':
+    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
+    HexToIntPair(TokStart+3, CurPtr, Pair);
+    APFloatVal = APFloat(APInt(128, 2, Pair), true);
+    return lltok::APFloat;
+  case 'M':
+    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
+    HexToIntPair(TokStart+3, CurPtr, Pair);
+    APFloatVal = APFloat(APInt(128, 2, Pair));
+    return lltok::APFloat;
+  }
+}
+
+/// LexIdentifier: Handle several related productions:
+///    Label             [-a-zA-Z$._0-9]+:
+///    NInteger          -[0-9]+
+///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+///    PInteger          [0-9]+
+///    HexFPConstant     0x[0-9A-Fa-f]+
+///    HexFP80Constant   0xK[0-9A-Fa-f]+
+///    HexFP128Constant  0xL[0-9A-Fa-f]+
+///    HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexDigitOrNegative() {
+  // If the letter after the negative is a number, this is probably a label.
+  if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
+    // Okay, this is not a number after the -, it's probably a label.
+    if (const char *End = isLabelTail(CurPtr)) {
+      StrVal.assign(TokStart, End-1);
+      CurPtr = End;
+      return lltok::LabelStr;
+    }
+
+    return lltok::Error;
+  }
+
+  // At this point, it is either a label, int or fp constant.
+
+  // Skip digits, we have at least one.
+  for (; isdigit(CurPtr[0]); ++CurPtr)
+    /*empty*/;
+
+  // Check to see if this really is a label afterall, e.g. "-1:".
+  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
+    if (const char *End = isLabelTail(CurPtr)) {
+      StrVal.assign(TokStart, End-1);
+      CurPtr = End;
+      return lltok::LabelStr;
+    }
+  }
+
+  // If the next character is a '.', then it is a fp value, otherwise its
+  // integer.
+  if (CurPtr[0] != '.') {
+    if (TokStart[0] == '0' && TokStart[1] == 'x')
+      return Lex0x();
+    unsigned Len = CurPtr-TokStart;
+    uint32_t numBits = ((Len * 64) / 19) + 2;
+    APInt Tmp(numBits, StringRef(TokStart, Len), 10);
+    if (TokStart[0] == '-') {
+      uint32_t minBits = Tmp.getMinSignedBits();
+      if (minBits > 0 && minBits < numBits)
+        Tmp = Tmp.trunc(minBits);
+      APSIntVal = APSInt(Tmp, false);
+    } else {
+      uint32_t activeBits = Tmp.getActiveBits();
+      if (activeBits > 0 && activeBits < numBits)
+        Tmp = Tmp.trunc(activeBits);
+      APSIntVal = APSInt(Tmp, true);
+    }
+    return lltok::APSInt;
+  }
+
+  ++CurPtr;
+
+  // Skip over [0-9]*([eE][-+]?[0-9]+)?
+  while (isdigit(CurPtr[0])) ++CurPtr;
+
+  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+    if (isdigit(CurPtr[1]) ||
+        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+      CurPtr += 2;
+      while (isdigit(CurPtr[0])) ++CurPtr;
+    }
+  }
+
+  APFloatVal = APFloat(std::atof(TokStart));
+  return lltok::APFloat;
+}
+
+///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+lltok::Kind LLLexer::LexPositive() {
+  // If the letter after the negative is a number, this is probably not a
+  // label.
+  if (!isdigit(CurPtr[0]))
+    return lltok::Error;
+
+  // Skip digits.
+  for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+    /*empty*/;
+
+  // At this point, we need a '.'.
+  if (CurPtr[0] != '.') {
+    CurPtr = TokStart+1;
+    return lltok::Error;
+  }
+
+  ++CurPtr;
+
+  // Skip over [0-9]*([eE][-+]?[0-9]+)?
+  while (isdigit(CurPtr[0])) ++CurPtr;
+
+  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+    if (isdigit(CurPtr[1]) ||
+        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+      CurPtr += 2;
+      while (isdigit(CurPtr[0])) ++CurPtr;
+    }
+  }
+
+  APFloatVal = APFloat(std::atof(TokStart));
+  return lltok::APFloat;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.h b/contrib/llvm/lib/AsmParser/LLLexer.h
new file mode 100644
index 000000000000..33b913572375
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLLexer.h
@@ -0,0 +1,93 @@
+//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ASMPARSER_LLLEXER_H
+#define LIB_ASMPARSER_LLLEXER_H
+
+#include "LLToken.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/SourceMgr.h"
+#include <string>
+
+namespace llvm {
+  class MemoryBuffer;
+  class Type;
+  class SMDiagnostic;
+  class LLVMContext;
+
+  class LLLexer {
+    const char *CurPtr;
+    MemoryBuffer *CurBuf;
+    SMDiagnostic &ErrorInfo;
+    SourceMgr &SM;
+    LLVMContext &Context;
+
+    // Information about the current token.
+    const char *TokStart;
+    lltok::Kind CurKind;
+    std::string StrVal;
+    unsigned UIntVal;
+    Type *TyVal;
+    APFloat APFloatVal;
+    APSInt  APSIntVal;
+
+    std::string TheError;
+  public:
+    explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
+                     LLVMContext &C);
+    ~LLLexer() {}
+
+    lltok::Kind Lex() {
+      return CurKind = LexToken();
+    }
+
+    typedef SMLoc LocTy;
+    LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
+    lltok::Kind getKind() const { return CurKind; }
+    const std::string &getStrVal() const { return StrVal; }
+    Type *getTyVal() const { return TyVal; }
+    unsigned getUIntVal() const { return UIntVal; }
+    const APSInt &getAPSIntVal() const { return APSIntVal; }
+    const APFloat &getAPFloatVal() const { return APFloatVal; }
+
+
+    bool Error(LocTy L, const Twine &Msg) const;
+    bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
+    std::string getFilename() const;
+
+  private:
+    lltok::Kind LexToken();
+
+    int getNextChar();
+    void SkipLineComment();
+    lltok::Kind ReadString(lltok::Kind kind);
+    bool ReadVarName();
+
+    lltok::Kind LexIdentifier();
+    lltok::Kind LexDigitOrNegative();
+    lltok::Kind LexPositive();
+    lltok::Kind LexAt();
+    lltok::Kind LexExclaim();
+    lltok::Kind LexPercent();
+    lltok::Kind LexQuote();
+    lltok::Kind Lex0x();
+
+    uint64_t atoull(const char *Buffer, const char *End);
+    uint64_t HexIntToVal(const char *Buffer, const char *End);
+    void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
+    void FP80HexToIntPair(const char *Buff, const char *End, uint64_t Pair[2]);
+  };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
new file mode 100644
index 000000000000..cfc31f3db8a7
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -0,0 +1,3742 @@
+//===-- LLParser.cpp - Parser Class ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLParser.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static std::string getTypeString(const Type *T) {
+  std::string Result;
+  raw_string_ostream Tmp(Result);
+  Tmp << *T;
+  return Tmp.str();
+}
+
+/// Run: module ::= toplevelentity*
+bool LLParser::Run() {
+  // Prime the lexer.
+  Lex.Lex();
+
+  return ParseTopLevelEntities() ||
+         ValidateEndOfModule();
+}
+
+/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
+/// module.
+bool LLParser::ValidateEndOfModule() {
+  // Handle any instruction metadata forward references.
+  if (!ForwardRefInstMetadata.empty()) {
+    for (DenseMap<Instruction*, std::vector<MDRef> >::iterator
+         I = ForwardRefInstMetadata.begin(), E = ForwardRefInstMetadata.end();
+         I != E; ++I) {
+      Instruction *Inst = I->first;
+      const std::vector<MDRef> &MDList = I->second;
+      
+      for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
+        unsigned SlotNo = MDList[i].MDSlot;
+        
+        if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
+          return Error(MDList[i].Loc, "use of undefined metadata '!" +
+                       Twine(SlotNo) + "'");
+        Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
+      }
+    }
+    ForwardRefInstMetadata.clear();
+  }
+  
+  
+  // If there are entries in ForwardRefBlockAddresses at this point, they are
+  // references after the function was defined.  Resolve those now.
+  while (!ForwardRefBlockAddresses.empty()) {
+    // Okay, we are referencing an already-parsed function, resolve them now.
+    Function *TheFn = 0;
+    const ValID &Fn = ForwardRefBlockAddresses.begin()->first;
+    if (Fn.Kind == ValID::t_GlobalName)
+      TheFn = M->getFunction(Fn.StrVal);
+    else if (Fn.UIntVal < NumberedVals.size())
+      TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
+    
+    if (TheFn == 0)
+      return Error(Fn.Loc, "unknown function referenced by blockaddress");
+    
+    // Resolve all these references.
+    if (ResolveForwardRefBlockAddresses(TheFn, 
+                                      ForwardRefBlockAddresses.begin()->second,
+                                        0))
+      return true;
+    
+    ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
+  }
+  
+  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
+    if (NumberedTypes[i].second.isValid())
+      return Error(NumberedTypes[i].second,
+                   "use of undefined type '%" + Twine(i) + "'");
+
+  for (StringMap<std::pair<Type*, LocTy> >::iterator I =
+       NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
+    if (I->second.second.isValid())
+      return Error(I->second.second,
+                   "use of undefined type named '" + I->getKey() + "'");
+
+  if (!ForwardRefVals.empty())
+    return Error(ForwardRefVals.begin()->second.second,
+                 "use of undefined value '@" + ForwardRefVals.begin()->first +
+                 "'");
+
+  if (!ForwardRefValIDs.empty())
+    return Error(ForwardRefValIDs.begin()->second.second,
+                 "use of undefined value '@" +
+                 Twine(ForwardRefValIDs.begin()->first) + "'");
+
+  if (!ForwardRefMDNodes.empty())
+    return Error(ForwardRefMDNodes.begin()->second.second,
+                 "use of undefined metadata '!" +
+                 Twine(ForwardRefMDNodes.begin()->first) + "'");
+
+
+  // Look for intrinsic functions and CallInst that need to be upgraded
+  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
+    UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+
+  // Check debug info intrinsics.
+  CheckDebugInfoIntrinsics(M);
+  return false;
+}
+
+bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, 
+                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+                                               PerFunctionState *PFS) {
+  // Loop over all the references, resolving them.
+  for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
+    BasicBlock *Res;
+    if (PFS) {
+      if (Refs[i].first.Kind == ValID::t_LocalName)
+        Res = PFS->GetBB(Refs[i].first.StrVal, Refs[i].first.Loc);
+      else
+        Res = PFS->GetBB(Refs[i].first.UIntVal, Refs[i].first.Loc);
+    } else if (Refs[i].first.Kind == ValID::t_LocalID) {
+      return Error(Refs[i].first.Loc,
+       "cannot take address of numeric label after the function is defined");
+    } else {
+      Res = dyn_cast_or_null<BasicBlock>(
+                     TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
+    }
+    
+    if (Res == 0)
+      return Error(Refs[i].first.Loc,
+                   "referenced value is not a basic block");
+    
+    // Get the BlockAddress for this and update references to use it.
+    BlockAddress *BA = BlockAddress::get(TheFn, Res);
+    Refs[i].second->replaceAllUsesWith(BA);
+    Refs[i].second->eraseFromParent();
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Top-Level Entities
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ParseTopLevelEntities() {
+  while (1) {
+    switch (Lex.getKind()) {
+    default:         return TokError("expected top-level entity");
+    case lltok::Eof: return false;
+    case lltok::kw_declare: if (ParseDeclare()) return true; break;
+    case lltok::kw_define:  if (ParseDefine()) return true; break;
+    case lltok::kw_module:  if (ParseModuleAsm()) return true; break;
+    case lltok::kw_target:  if (ParseTargetDefinition()) return true; break;
+    case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
+    case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
+    case lltok::LocalVar:   if (ParseNamedType()) return true; break;
+    case lltok::GlobalID:   if (ParseUnnamedGlobal()) return true; break;
+    case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
+    case lltok::exclaim:    if (ParseStandaloneMetadata()) return true; break;
+    case lltok::MetadataVar: if (ParseNamedMetadata()) return true; break;
+
+    // The Global variable production with no name can have many different
+    // optional leading prefixes, the production is:
+    // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+    //               OptionalAddrSpace OptionalUnNammedAddr
+    //               ('constant'|'global') ...
+    case lltok::kw_private:             // OptionalLinkage
+    case lltok::kw_linker_private:      // OptionalLinkage
+    case lltok::kw_linker_private_weak: // OptionalLinkage
+    case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage
+    case lltok::kw_internal:            // OptionalLinkage
+    case lltok::kw_weak:                // OptionalLinkage
+    case lltok::kw_weak_odr:            // OptionalLinkage
+    case lltok::kw_linkonce:            // OptionalLinkage
+    case lltok::kw_linkonce_odr:        // OptionalLinkage
+    case lltok::kw_appending:           // OptionalLinkage
+    case lltok::kw_dllexport:           // OptionalLinkage
+    case lltok::kw_common:              // OptionalLinkage
+    case lltok::kw_dllimport:           // OptionalLinkage
+    case lltok::kw_extern_weak:         // OptionalLinkage
+    case lltok::kw_external: {          // OptionalLinkage
+      unsigned Linkage, Visibility;
+      if (ParseOptionalLinkage(Linkage) ||
+          ParseOptionalVisibility(Visibility) ||
+          ParseGlobal("", SMLoc(), Linkage, true, Visibility))
+        return true;
+      break;
+    }
+    case lltok::kw_default:       // OptionalVisibility
+    case lltok::kw_hidden:        // OptionalVisibility
+    case lltok::kw_protected: {   // OptionalVisibility
+      unsigned Visibility;
+      if (ParseOptionalVisibility(Visibility) ||
+          ParseGlobal("", SMLoc(), 0, false, Visibility))
+        return true;
+      break;
+    }
+
+    case lltok::kw_thread_local:  // OptionalThreadLocal
+    case lltok::kw_addrspace:     // OptionalAddrSpace
+    case lltok::kw_constant:      // GlobalType
+    case lltok::kw_global:        // GlobalType
+      if (ParseGlobal("", SMLoc(), 0, false, 0)) return true;
+      break;
+    }
+  }
+}
+
+
+/// toplevelentity
+///   ::= 'module' 'asm' STRINGCONSTANT
+bool LLParser::ParseModuleAsm() {
+  assert(Lex.getKind() == lltok::kw_module);
+  Lex.Lex();
+
+  std::string AsmStr;
+  if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
+      ParseStringConstant(AsmStr)) return true;
+
+  M->appendModuleInlineAsm(AsmStr);
+  return false;
+}
+
+/// toplevelentity
+///   ::= 'target' 'triple' '=' STRINGCONSTANT
+///   ::= 'target' 'datalayout' '=' STRINGCONSTANT
+bool LLParser::ParseTargetDefinition() {
+  assert(Lex.getKind() == lltok::kw_target);
+  std::string Str;
+  switch (Lex.Lex()) {
+  default: return TokError("unknown target property");
+  case lltok::kw_triple:
+    Lex.Lex();
+    if (ParseToken(lltok::equal, "expected '=' after target triple") ||
+        ParseStringConstant(Str))
+      return true;
+    M->setTargetTriple(Str);
+    return false;
+  case lltok::kw_datalayout:
+    Lex.Lex();
+    if (ParseToken(lltok::equal, "expected '=' after target datalayout") ||
+        ParseStringConstant(Str))
+      return true;
+    M->setDataLayout(Str);
+    return false;
+  }
+}
+
+/// toplevelentity
+///   ::= 'deplibs' '=' '[' ']'
+///   ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
+bool LLParser::ParseDepLibs() {
+  assert(Lex.getKind() == lltok::kw_deplibs);
+  Lex.Lex();
+  if (ParseToken(lltok::equal, "expected '=' after deplibs") ||
+      ParseToken(lltok::lsquare, "expected '=' after deplibs"))
+    return true;
+
+  if (EatIfPresent(lltok::rsquare))
+    return false;
+
+  std::string Str;
+  if (ParseStringConstant(Str)) return true;
+  M->addLibrary(Str);
+
+  while (EatIfPresent(lltok::comma)) {
+    if (ParseStringConstant(Str)) return true;
+    M->addLibrary(Str);
+  }
+
+  return ParseToken(lltok::rsquare, "expected ']' at end of list");
+}
+
+/// ParseUnnamedType:
+///   ::= LocalVarID '=' 'type' type
+bool LLParser::ParseUnnamedType() {
+  LocTy TypeLoc = Lex.getLoc();
+  unsigned TypeID = Lex.getUIntVal();
+  Lex.Lex(); // eat LocalVarID;
+
+  if (ParseToken(lltok::equal, "expected '=' after name") ||
+      ParseToken(lltok::kw_type, "expected 'type' after '='"))
+    return true;
+
+  if (TypeID >= NumberedTypes.size())
+    NumberedTypes.resize(TypeID+1);
+  
+  Type *Result = 0;
+  if (ParseStructDefinition(TypeLoc, "",
+                            NumberedTypes[TypeID], Result)) return true;
+  
+  if (!isa<StructType>(Result)) {
+    std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
+    if (Entry.first)
+      return Error(TypeLoc, "non-struct types may not be recursive");
+    Entry.first = Result;
+    Entry.second = SMLoc();
+  }
+
+  return false;
+}
+
+
+/// toplevelentity
+///   ::= LocalVar '=' 'type' type
+bool LLParser::ParseNamedType() {
+  std::string Name = Lex.getStrVal();
+  LocTy NameLoc = Lex.getLoc();
+  Lex.Lex();  // eat LocalVar.
+
+  if (ParseToken(lltok::equal, "expected '=' after name") ||
+      ParseToken(lltok::kw_type, "expected 'type' after name"))
+    return true;
+  
+  Type *Result = 0;
+  if (ParseStructDefinition(NameLoc, Name,
+                            NamedTypes[Name], Result)) return true;
+  
+  if (!isa<StructType>(Result)) {
+    std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
+    if (Entry.first)
+      return Error(NameLoc, "non-struct types may not be recursive");
+    Entry.first = Result;
+    Entry.second = SMLoc();
+  }
+  
+  return false;
+}
+
+
+/// toplevelentity
+///   ::= 'declare' FunctionHeader
+bool LLParser::ParseDeclare() {
+  assert(Lex.getKind() == lltok::kw_declare);
+  Lex.Lex();
+
+  Function *F;
+  return ParseFunctionHeader(F, false);
+}
+
+/// toplevelentity
+///   ::= 'define' FunctionHeader '{' ...
+bool LLParser::ParseDefine() {
+  assert(Lex.getKind() == lltok::kw_define);
+  Lex.Lex();
+
+  Function *F;
+  return ParseFunctionHeader(F, true) ||
+         ParseFunctionBody(*F);
+}
+
+/// ParseGlobalType
+///   ::= 'constant'
+///   ::= 'global'
+bool LLParser::ParseGlobalType(bool &IsConstant) {
+  if (Lex.getKind() == lltok::kw_constant)
+    IsConstant = true;
+  else if (Lex.getKind() == lltok::kw_global)
+    IsConstant = false;
+  else {
+    IsConstant = false;
+    return TokError("expected 'global' or 'constant'");
+  }
+  Lex.Lex();
+  return false;
+}
+
+/// ParseUnnamedGlobal:
+///   OptionalVisibility ALIAS ...
+///   OptionalLinkage OptionalVisibility ...   -> global variable
+///   GlobalID '=' OptionalVisibility ALIAS ...
+///   GlobalID '=' OptionalLinkage OptionalVisibility ...   -> global variable
+bool LLParser::ParseUnnamedGlobal() {
+  unsigned VarID = NumberedVals.size();
+  std::string Name;
+  LocTy NameLoc = Lex.getLoc();
+
+  // Handle the GlobalID form.
+  if (Lex.getKind() == lltok::GlobalID) {
+    if (Lex.getUIntVal() != VarID)
+      return Error(Lex.getLoc(), "variable expected to be numbered '%" +
+                   Twine(VarID) + "'");
+    Lex.Lex(); // eat GlobalID;
+
+    if (ParseToken(lltok::equal, "expected '=' after name"))
+      return true;
+  }
+
+  bool HasLinkage;
+  unsigned Linkage, Visibility;
+  if (ParseOptionalLinkage(Linkage, HasLinkage) ||
+      ParseOptionalVisibility(Visibility))
+    return true;
+
+  if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+    return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+  return ParseAlias(Name, NameLoc, Visibility);
+}
+
+/// ParseNamedGlobal:
+///   GlobalVar '=' OptionalVisibility ALIAS ...
+///   GlobalVar '=' OptionalLinkage OptionalVisibility ...   -> global variable
+bool LLParser::ParseNamedGlobal() {
+  assert(Lex.getKind() == lltok::GlobalVar);
+  LocTy NameLoc = Lex.getLoc();
+  std::string Name = Lex.getStrVal();
+  Lex.Lex();
+
+  bool HasLinkage;
+  unsigned Linkage, Visibility;
+  if (ParseToken(lltok::equal, "expected '=' in global variable") ||
+      ParseOptionalLinkage(Linkage, HasLinkage) ||
+      ParseOptionalVisibility(Visibility))
+    return true;
+
+  if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+    return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+  return ParseAlias(Name, NameLoc, Visibility);
+}
+
+// MDString:
+//   ::= '!' STRINGCONSTANT
+bool LLParser::ParseMDString(MDString *&Result) {
+  std::string Str;
+  if (ParseStringConstant(Str)) return true;
+  Result = MDString::get(Context, Str);
+  return false;
+}
+
+// MDNode:
+//   ::= '!' MDNodeNumber
+//
+/// This version of ParseMDNodeID returns the slot number and null in the case
+/// of a forward reference.
+bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) {
+  // !{ ..., !42, ... }
+  if (ParseUInt32(SlotNo)) return true;
+
+  // Check existing MDNode.
+  if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0)
+    Result = NumberedMetadata[SlotNo];
+  else
+    Result = 0;
+  return false;
+}
+
+bool LLParser::ParseMDNodeID(MDNode *&Result) {
+  // !{ ..., !42, ... }
+  unsigned MID = 0;
+  if (ParseMDNodeID(Result, MID)) return true;
+
+  // If not a forward reference, just return it now.
+  if (Result) return false;
+
+  // Otherwise, create MDNode forward reference.
+  MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
+  ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
+  
+  if (NumberedMetadata.size() <= MID)
+    NumberedMetadata.resize(MID+1);
+  NumberedMetadata[MID] = FwdNode;
+  Result = FwdNode;
+  return false;
+}
+
+/// ParseNamedMetadata:
+///   !foo = !{ !1, !2 }
+bool LLParser::ParseNamedMetadata() {
+  assert(Lex.getKind() == lltok::MetadataVar);
+  std::string Name = Lex.getStrVal();
+  Lex.Lex();
+
+  if (ParseToken(lltok::equal, "expected '=' here") ||
+      ParseToken(lltok::exclaim, "Expected '!' here") ||
+      ParseToken(lltok::lbrace, "Expected '{' here"))
+    return true;
+
+  NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
+  if (Lex.getKind() != lltok::rbrace)
+    do {
+      if (ParseToken(lltok::exclaim, "Expected '!' here"))
+        return true;
+    
+      MDNode *N = 0;
+      if (ParseMDNodeID(N)) return true;
+      NMD->addOperand(N);
+    } while (EatIfPresent(lltok::comma));
+
+  if (ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  return false;
+}
+
+/// ParseStandaloneMetadata:
+///   !42 = !{...}
+bool LLParser::ParseStandaloneMetadata() {
+  assert(Lex.getKind() == lltok::exclaim);
+  Lex.Lex();
+  unsigned MetadataID = 0;
+
+  LocTy TyLoc;
+  Type *Ty = 0;
+  SmallVector<Value *, 16> Elts;
+  if (ParseUInt32(MetadataID) ||
+      ParseToken(lltok::equal, "expected '=' here") ||
+      ParseType(Ty, TyLoc) ||
+      ParseToken(lltok::exclaim, "Expected '!' here") ||
+      ParseToken(lltok::lbrace, "Expected '{' here") ||
+      ParseMDNodeVector(Elts, NULL) ||
+      ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  MDNode *Init = MDNode::get(Context, Elts);
+  
+  // See if this was forward referenced, if so, handle it.
+  std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
+    FI = ForwardRefMDNodes.find(MetadataID);
+  if (FI != ForwardRefMDNodes.end()) {
+    MDNode *Temp = FI->second.first;
+    Temp->replaceAllUsesWith(Init);
+    MDNode::deleteTemporary(Temp);
+    ForwardRefMDNodes.erase(FI);
+    
+    assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
+  } else {
+    if (MetadataID >= NumberedMetadata.size())
+      NumberedMetadata.resize(MetadataID+1);
+
+    if (NumberedMetadata[MetadataID] != 0)
+      return TokError("Metadata id is already used");
+    NumberedMetadata[MetadataID] = Init;
+  }
+
+  return false;
+}
+
+/// ParseAlias:
+///   ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
+/// Aliasee
+///   ::= TypeAndValue
+///   ::= 'bitcast' '(' TypeAndValue 'to' Type ')'
+///   ::= 'getelementptr' 'inbounds'? '(' ... ')'
+///
+/// Everything through visibility has already been parsed.
+///
+bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
+                          unsigned Visibility) {
+  assert(Lex.getKind() == lltok::kw_alias);
+  Lex.Lex();
+  unsigned Linkage;
+  LocTy LinkageLoc = Lex.getLoc();
+  if (ParseOptionalLinkage(Linkage))
+    return true;
+
+  if (Linkage != GlobalValue::ExternalLinkage &&
+      Linkage != GlobalValue::WeakAnyLinkage &&
+      Linkage != GlobalValue::WeakODRLinkage &&
+      Linkage != GlobalValue::InternalLinkage &&
+      Linkage != GlobalValue::PrivateLinkage &&
+      Linkage != GlobalValue::LinkerPrivateLinkage &&
+      Linkage != GlobalValue::LinkerPrivateWeakLinkage &&
+      Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+    return Error(LinkageLoc, "invalid linkage type for alias");
+
+  Constant *Aliasee;
+  LocTy AliaseeLoc = Lex.getLoc();
+  if (Lex.getKind() != lltok::kw_bitcast &&
+      Lex.getKind() != lltok::kw_getelementptr) {
+    if (ParseGlobalTypeAndValue(Aliasee)) return true;
+  } else {
+    // The bitcast dest type is not present, it is implied by the dest type.
+    ValID ID;
+    if (ParseValID(ID)) return true;
+    if (ID.Kind != ValID::t_Constant)
+      return Error(AliaseeLoc, "invalid aliasee");
+    Aliasee = ID.ConstantVal;
+  }
+
+  if (!Aliasee->getType()->isPointerTy())
+    return Error(AliaseeLoc, "alias must have pointer type");
+
+  // Okay, create the alias but do not insert it into the module yet.
+  GlobalAlias* GA = new GlobalAlias(Aliasee->getType(),
+                                    (GlobalValue::LinkageTypes)Linkage, Name,
+                                    Aliasee);
+  GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+
+  // See if this value already exists in the symbol table.  If so, it is either
+  // a redefinition or a definition of a forward reference.
+  if (GlobalValue *Val = M->getNamedValue(Name)) {
+    // See if this was a redefinition.  If so, there is no entry in
+    // ForwardRefVals.
+    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefVals.find(Name);
+    if (I == ForwardRefVals.end())
+      return Error(NameLoc, "redefinition of global named '@" + Name + "'");
+
+    // Otherwise, this was a definition of forward ref.  Verify that types
+    // agree.
+    if (Val->getType() != GA->getType())
+      return Error(NameLoc,
+              "forward reference and definition of alias have different types");
+
+    // If they agree, just RAUW the old value with the alias and remove the
+    // forward ref info.
+    Val->replaceAllUsesWith(GA);
+    Val->eraseFromParent();
+    ForwardRefVals.erase(I);
+  }
+
+  // Insert into the module, we know its name won't collide now.
+  M->getAliasList().push_back(GA);
+  assert(GA->getName() == Name && "Should not be a name conflict!");
+
+  return false;
+}
+
+/// ParseGlobal
+///   ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
+///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+///   ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+///
+/// Everything through visibility has been parsed already.
+///
+bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
+                           unsigned Linkage, bool HasLinkage,
+                           unsigned Visibility) {
+  unsigned AddrSpace;
+  bool ThreadLocal, IsConstant, UnnamedAddr;
+  LocTy UnnamedAddrLoc;
+  LocTy TyLoc;
+
+  Type *Ty = 0;
+  if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
+      ParseOptionalAddrSpace(AddrSpace) ||
+      ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+                         &UnnamedAddrLoc) ||
+      ParseGlobalType(IsConstant) ||
+      ParseType(Ty, TyLoc))
+    return true;
+
+  // If the linkage is specified and is external, then no initializer is
+  // present.
+  Constant *Init = 0;
+  if (!HasLinkage || (Linkage != GlobalValue::DLLImportLinkage &&
+                      Linkage != GlobalValue::ExternalWeakLinkage &&
+                      Linkage != GlobalValue::ExternalLinkage)) {
+    if (ParseGlobalValue(Ty, Init))
+      return true;
+  }
+
+  if (Ty->isFunctionTy() || Ty->isLabelTy())
+    return Error(TyLoc, "invalid type for global variable");
+
+  GlobalVariable *GV = 0;
+
+  // See if the global was forward referenced, if so, use the global.
+  if (!Name.empty()) {
+    if (GlobalValue *GVal = M->getNamedValue(Name)) {
+      if (!ForwardRefVals.erase(Name) || !isa<GlobalValue>(GVal))
+        return Error(NameLoc, "redefinition of global '@" + Name + "'");
+      GV = cast<GlobalVariable>(GVal);
+    }
+  } else {
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefValIDs.find(NumberedVals.size());
+    if (I != ForwardRefValIDs.end()) {
+      GV = cast<GlobalVariable>(I->second.first);
+      ForwardRefValIDs.erase(I);
+    }
+  }
+
+  if (GV == 0) {
+    GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0,
+                            Name, 0, false, AddrSpace);
+  } else {
+    if (GV->getType()->getElementType() != Ty)
+      return Error(TyLoc,
+            "forward reference and definition of global have different types");
+
+    // Move the forward-reference to the correct spot in the module.
+    M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV);
+  }
+
+  if (Name.empty())
+    NumberedVals.push_back(GV);
+
+  // Set the parsed properties on the global.
+  if (Init)
+    GV->setInitializer(Init);
+  GV->setConstant(IsConstant);
+  GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
+  GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+  GV->setThreadLocal(ThreadLocal);
+  GV->setUnnamedAddr(UnnamedAddr);
+
+  // Parse attributes on the global.
+  while (Lex.getKind() == lltok::comma) {
+    Lex.Lex();
+
+    if (Lex.getKind() == lltok::kw_section) {
+      Lex.Lex();
+      GV->setSection(Lex.getStrVal());
+      if (ParseToken(lltok::StringConstant, "expected global section string"))
+        return true;
+    } else if (Lex.getKind() == lltok::kw_align) {
+      unsigned Alignment;
+      if (ParseOptionalAlignment(Alignment)) return true;
+      GV->setAlignment(Alignment);
+    } else {
+      TokError("unknown global variable property!");
+    }
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Reference/Resolution Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetGlobalVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed.  This can return null if the value
+/// exists but does not have the right type.
+GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
+                                    LocTy Loc) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ty);
+  if (PTy == 0) {
+    Error(Loc, "global variable reference must have pointer type");
+    return 0;
+  }
+
+  // Look this name up in the normal function symbol table.
+  GlobalValue *Val =
+    cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name));
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefVals.find(Name);
+    if (I != ForwardRefVals.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    Error(Loc, "'@" + Name + "' defined with type '" +
+          getTypeString(Val->getType()) + "'");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  GlobalValue *FwdVal;
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+    FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+  else
+    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+                                GlobalValue::ExternalWeakLinkage, 0, Name);
+
+  ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ty);
+  if (PTy == 0) {
+    Error(Loc, "global variable reference must have pointer type");
+    return 0;
+  }
+
+  GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefValIDs.find(ID);
+    if (I != ForwardRefValIDs.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
+          getTypeString(Val->getType()) + "'");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  GlobalValue *FwdVal;
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+    FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
+  else
+    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+                                GlobalValue::ExternalWeakLinkage, 0, "");
+
+  ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Routines.
+//===----------------------------------------------------------------------===//
+
+/// ParseToken - If the current token has the specified kind, eat it and return
+/// success.  Otherwise, emit the specified error and return failure.
+bool LLParser::ParseToken(lltok::Kind T, const char *ErrMsg) {
+  if (Lex.getKind() != T)
+    return TokError(ErrMsg);
+  Lex.Lex();
+  return false;
+}
+
+/// ParseStringConstant
+///   ::= StringConstant
+bool LLParser::ParseStringConstant(std::string &Result) {
+  if (Lex.getKind() != lltok::StringConstant)
+    return TokError("expected string constant");
+  Result = Lex.getStrVal();
+  Lex.Lex();
+  return false;
+}
+
+/// ParseUInt32
+///   ::= uint32
+bool LLParser::ParseUInt32(unsigned &Val) {
+  if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
+    return TokError("expected integer");
+  uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1);
+  if (Val64 != unsigned(Val64))
+    return TokError("expected 32-bit integer (too large)");
+  Val = Val64;
+  Lex.Lex();
+  return false;
+}
+
+
+/// ParseOptionalAddrSpace
+///   := /*empty*/
+///   := 'addrspace' '(' uint32 ')'
+bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
+  AddrSpace = 0;
+  if (!EatIfPresent(lltok::kw_addrspace))
+    return false;
+  return ParseToken(lltok::lparen, "expected '(' in address space") ||
+         ParseUInt32(AddrSpace) ||
+         ParseToken(lltok::rparen, "expected ')' in address space");
+}
+
+/// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
+/// indicates what kind of attribute list this is: 0: function arg, 1: result,
+/// 2: function attr.
+bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
+  Attrs = Attribute::None;
+  LocTy AttrLoc = Lex.getLoc();
+
+  while (1) {
+    switch (Lex.getKind()) {
+    default:  // End of attributes.
+      if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
+        return Error(AttrLoc, "invalid use of function-only attribute");
+
+      // As a hack, we allow "align 2" on functions as a synonym for
+      // "alignstack 2".
+      if (AttrKind == 2 &&
+          (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment)))
+        return Error(AttrLoc, "invalid use of attribute on a function");
+
+      if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly))
+        return Error(AttrLoc, "invalid use of parameter-only attribute");
+
+      return false;
+    case lltok::kw_zeroext:         Attrs |= Attribute::ZExt; break;
+    case lltok::kw_signext:         Attrs |= Attribute::SExt; break;
+    case lltok::kw_inreg:           Attrs |= Attribute::InReg; break;
+    case lltok::kw_sret:            Attrs |= Attribute::StructRet; break;
+    case lltok::kw_noalias:         Attrs |= Attribute::NoAlias; break;
+    case lltok::kw_nocapture:       Attrs |= Attribute::NoCapture; break;
+    case lltok::kw_byval:           Attrs |= Attribute::ByVal; break;
+    case lltok::kw_nest:            Attrs |= Attribute::Nest; break;
+
+    case lltok::kw_noreturn:        Attrs |= Attribute::NoReturn; break;
+    case lltok::kw_nounwind:        Attrs |= Attribute::NoUnwind; break;
+    case lltok::kw_uwtable:         Attrs |= Attribute::UWTable; break;
+    case lltok::kw_noinline:        Attrs |= Attribute::NoInline; break;
+    case lltok::kw_readnone:        Attrs |= Attribute::ReadNone; break;
+    case lltok::kw_readonly:        Attrs |= Attribute::ReadOnly; break;
+    case lltok::kw_inlinehint:      Attrs |= Attribute::InlineHint; break;
+    case lltok::kw_alwaysinline:    Attrs |= Attribute::AlwaysInline; break;
+    case lltok::kw_optsize:         Attrs |= Attribute::OptimizeForSize; break;
+    case lltok::kw_ssp:             Attrs |= Attribute::StackProtect; break;
+    case lltok::kw_sspreq:          Attrs |= Attribute::StackProtectReq; break;
+    case lltok::kw_noredzone:       Attrs |= Attribute::NoRedZone; break;
+    case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
+    case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
+    case lltok::kw_hotpatch:        Attrs |= Attribute::Hotpatch; break;
+    case lltok::kw_nonlazybind:     Attrs |= Attribute::NonLazyBind; break;
+
+    case lltok::kw_alignstack: {
+      unsigned Alignment;
+      if (ParseOptionalStackAlignment(Alignment))
+        return true;
+      Attrs |= Attribute::constructStackAlignmentFromInt(Alignment);
+      continue;
+    }
+
+    case lltok::kw_align: {
+      unsigned Alignment;
+      if (ParseOptionalAlignment(Alignment))
+        return true;
+      Attrs |= Attribute::constructAlignmentFromInt(Alignment);
+      continue;
+    }
+
+    }
+    Lex.Lex();
+  }
+}
+
+/// ParseOptionalLinkage
+///   ::= /*empty*/
+///   ::= 'private'
+///   ::= 'linker_private'
+///   ::= 'linker_private_weak'
+///   ::= 'linker_private_weak_def_auto'
+///   ::= 'internal'
+///   ::= 'weak'
+///   ::= 'weak_odr'
+///   ::= 'linkonce'
+///   ::= 'linkonce_odr'
+///   ::= 'available_externally'
+///   ::= 'appending'
+///   ::= 'dllexport'
+///   ::= 'common'
+///   ::= 'dllimport'
+///   ::= 'extern_weak'
+///   ::= 'external'
+bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
+  HasLinkage = false;
+  switch (Lex.getKind()) {
+  default:                       Res=GlobalValue::ExternalLinkage; return false;
+  case lltok::kw_private:        Res = GlobalValue::PrivateLinkage;       break;
+  case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break;
+  case lltok::kw_linker_private_weak:
+    Res = GlobalValue::LinkerPrivateWeakLinkage;
+    break;
+  case lltok::kw_linker_private_weak_def_auto:
+    Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+    break;
+  case lltok::kw_internal:       Res = GlobalValue::InternalLinkage;      break;
+  case lltok::kw_weak:           Res = GlobalValue::WeakAnyLinkage;       break;
+  case lltok::kw_weak_odr:       Res = GlobalValue::WeakODRLinkage;       break;
+  case lltok::kw_linkonce:       Res = GlobalValue::LinkOnceAnyLinkage;   break;
+  case lltok::kw_linkonce_odr:   Res = GlobalValue::LinkOnceODRLinkage;   break;
+  case lltok::kw_available_externally:
+    Res = GlobalValue::AvailableExternallyLinkage;
+    break;
+  case lltok::kw_appending:      Res = GlobalValue::AppendingLinkage;     break;
+  case lltok::kw_dllexport:      Res = GlobalValue::DLLExportLinkage;     break;
+  case lltok::kw_common:         Res = GlobalValue::CommonLinkage;        break;
+  case lltok::kw_dllimport:      Res = GlobalValue::DLLImportLinkage;     break;
+  case lltok::kw_extern_weak:    Res = GlobalValue::ExternalWeakLinkage;  break;
+  case lltok::kw_external:       Res = GlobalValue::ExternalLinkage;      break;
+  }
+  Lex.Lex();
+  HasLinkage = true;
+  return false;
+}
+
+/// ParseOptionalVisibility
+///   ::= /*empty*/
+///   ::= 'default'
+///   ::= 'hidden'
+///   ::= 'protected'
+///
+bool LLParser::ParseOptionalVisibility(unsigned &Res) {
+  switch (Lex.getKind()) {
+  default:                  Res = GlobalValue::DefaultVisibility; return false;
+  case lltok::kw_default:   Res = GlobalValue::DefaultVisibility; break;
+  case lltok::kw_hidden:    Res = GlobalValue::HiddenVisibility; break;
+  case lltok::kw_protected: Res = GlobalValue::ProtectedVisibility; break;
+  }
+  Lex.Lex();
+  return false;
+}
+
+/// ParseOptionalCallingConv
+///   ::= /*empty*/
+///   ::= 'ccc'
+///   ::= 'fastcc'
+///   ::= 'coldcc'
+///   ::= 'x86_stdcallcc'
+///   ::= 'x86_fastcallcc'
+///   ::= 'x86_thiscallcc'
+///   ::= 'arm_apcscc'
+///   ::= 'arm_aapcscc'
+///   ::= 'arm_aapcs_vfpcc'
+///   ::= 'msp430_intrcc'
+///   ::= 'ptx_kernel'
+///   ::= 'ptx_device'
+///   ::= 'cc' UINT
+///
+bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
+  switch (Lex.getKind()) {
+  default:                       CC = CallingConv::C; return false;
+  case lltok::kw_ccc:            CC = CallingConv::C; break;
+  case lltok::kw_fastcc:         CC = CallingConv::Fast; break;
+  case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
+  case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
+  case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+  case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+  case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
+  case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
+  case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+  case lltok::kw_msp430_intrcc:  CC = CallingConv::MSP430_INTR; break;
+  case lltok::kw_ptx_kernel:     CC = CallingConv::PTX_Kernel; break;
+  case lltok::kw_ptx_device:     CC = CallingConv::PTX_Device; break;
+  case lltok::kw_cc: {
+      unsigned ArbitraryCC;
+      Lex.Lex();
+      if (ParseUInt32(ArbitraryCC)) {
+        return true;
+      } else
+        CC = static_cast<CallingConv::ID>(ArbitraryCC);
+        return false;
+    }
+    break;
+  }
+
+  Lex.Lex();
+  return false;
+}
+
+/// ParseInstructionMetadata
+///   ::= !dbg !42 (',' !dbg !57)*
+bool LLParser::ParseInstructionMetadata(Instruction *Inst,
+                                        PerFunctionState *PFS) {
+  do {
+    if (Lex.getKind() != lltok::MetadataVar)
+      return TokError("expected metadata after comma");
+
+    std::string Name = Lex.getStrVal();
+    unsigned MDK = M->getMDKindID(Name.c_str());
+    Lex.Lex();
+
+    MDNode *Node;
+    SMLoc Loc = Lex.getLoc();
+
+    if (ParseToken(lltok::exclaim, "expected '!' here"))
+      return true;
+
+    // This code is similar to that of ParseMetadataValue, however it needs to
+    // have special-case code for a forward reference; see the comments on
+    // ForwardRefInstMetadata for details. Also, MDStrings are not supported
+    // at the top level here.
+    if (Lex.getKind() == lltok::lbrace) {
+      ValID ID;
+      if (ParseMetadataListValue(ID, PFS))
+        return true;
+      assert(ID.Kind == ValID::t_MDNode);
+      Inst->setMetadata(MDK, ID.MDNodeVal);
+    } else {
+      unsigned NodeID = 0;
+      if (ParseMDNodeID(Node, NodeID))
+        return true;
+      if (Node) {
+        // If we got the node, add it to the instruction.
+        Inst->setMetadata(MDK, Node);
+      } else {
+        MDRef R = { Loc, MDK, NodeID };
+        // Otherwise, remember that this should be resolved later.
+        ForwardRefInstMetadata[Inst].push_back(R);
+      }
+    }
+
+    // If this is the end of the list, we're done.
+  } while (EatIfPresent(lltok::comma));
+  return false;
+}
+
+/// ParseOptionalAlignment
+///   ::= /* empty */
+///   ::= 'align' 4
+bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
+  Alignment = 0;
+  if (!EatIfPresent(lltok::kw_align))
+    return false;
+  LocTy AlignLoc = Lex.getLoc();
+  if (ParseUInt32(Alignment)) return true;
+  if (!isPowerOf2_32(Alignment))
+    return Error(AlignLoc, "alignment is not a power of two");
+  if (Alignment > Value::MaximumAlignment)
+    return Error(AlignLoc, "huge alignments are not supported yet");
+  return false;
+}
+
+/// ParseOptionalCommaAlign
+///   ::= 
+///   ::= ',' align 4
+///
+/// This returns with AteExtraComma set to true if it ate an excess comma at the
+/// end.
+bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
+                                       bool &AteExtraComma) {
+  AteExtraComma = false;
+  while (EatIfPresent(lltok::comma)) {
+    // Metadata at the end is an early exit.
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      return false;
+    }
+    
+    if (Lex.getKind() != lltok::kw_align)
+      return Error(Lex.getLoc(), "expected metadata or 'align'");
+
+    if (ParseOptionalAlignment(Alignment)) return true;
+  }
+
+  return false;
+}
+
+/// ParseOptionalStackAlignment
+///   ::= /* empty */
+///   ::= 'alignstack' '(' 4 ')'
+bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+  Alignment = 0;
+  if (!EatIfPresent(lltok::kw_alignstack))
+    return false;
+  LocTy ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::lparen))
+    return Error(ParenLoc, "expected '('");
+  LocTy AlignLoc = Lex.getLoc();
+  if (ParseUInt32(Alignment)) return true;
+  ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::rparen))
+    return Error(ParenLoc, "expected ')'");
+  if (!isPowerOf2_32(Alignment))
+    return Error(AlignLoc, "stack alignment is not a power of two");
+  return false;
+}
+
+/// ParseIndexList - This parses the index list for an insert/extractvalue
+/// instruction.  This sets AteExtraComma in the case where we eat an extra
+/// comma at the end of the line and find that it is followed by metadata.
+/// Clients that don't allow metadata can call the version of this function that
+/// only takes one argument.
+///
+/// ParseIndexList
+///    ::=  (',' uint32)+
+///
+bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
+                              bool &AteExtraComma) {
+  AteExtraComma = false;
+  
+  if (Lex.getKind() != lltok::comma)
+    return TokError("expected ',' as start of index list");
+
+  while (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      return false;
+    }
+    unsigned Idx = 0;
+    if (ParseUInt32(Idx)) return true;
+    Indices.push_back(Idx);
+  }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Type Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseType - Parse a type.
+bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
+  SMLoc TypeLoc = Lex.getLoc();
+  switch (Lex.getKind()) {
+  default:
+    return TokError("expected type");
+  case lltok::Type:
+    // Type ::= 'float' | 'void' (etc)
+    Result = Lex.getTyVal();
+    Lex.Lex();
+    break;
+  case lltok::lbrace:
+    // Type ::= StructType
+    if (ParseAnonStructType(Result, false))
+      return true;
+    break;
+  case lltok::lsquare:
+    // Type ::= '[' ... ']'
+    Lex.Lex(); // eat the lsquare.
+    if (ParseArrayVectorType(Result, false))
+      return true;
+    break;
+  case lltok::less: // Either vector or packed struct.
+    // Type ::= '<' ... '>'
+    Lex.Lex();
+    if (Lex.getKind() == lltok::lbrace) {
+      if (ParseAnonStructType(Result, true) ||
+          ParseToken(lltok::greater, "expected '>' at end of packed struct"))
+        return true;
+    } else if (ParseArrayVectorType(Result, true))
+      return true;
+    break;
+  case lltok::LocalVar: {
+    // Type ::= %foo
+    std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()];
+    
+    // If the type hasn't been defined yet, create a forward definition and
+    // remember where that forward def'n was seen (in case it never is defined).
+    if (Entry.first == 0) {
+      Entry.first = StructType::createNamed(Context, Lex.getStrVal());
+      Entry.second = Lex.getLoc();
+    }
+    Result = Entry.first;
+    Lex.Lex();
+    break;
+  }
+
+  case lltok::LocalVarID: {
+    // Type ::= %4
+    if (Lex.getUIntVal() >= NumberedTypes.size())
+      NumberedTypes.resize(Lex.getUIntVal()+1);
+    std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
+    
+    // If the type hasn't been defined yet, create a forward definition and
+    // remember where that forward def'n was seen (in case it never is defined).
+    if (Entry.first == 0) {
+      Entry.first = StructType::createNamed(Context, "");
+      Entry.second = Lex.getLoc();
+    }
+    Result = Entry.first;
+    Lex.Lex();
+    break;
+  }
+  }
+
+  // Parse the type suffixes.
+  while (1) {
+    switch (Lex.getKind()) {
+    // End of type.
+    default:
+      if (!AllowVoid && Result->isVoidTy())
+        return Error(TypeLoc, "void type only allowed for function results");
+      return false;
+
+    // Type ::= Type '*'
+    case lltok::star:
+      if (Result->isLabelTy())
+        return TokError("basic block pointers are invalid");
+      if (Result->isVoidTy())
+        return TokError("pointers to void are invalid - use i8* instead");
+      if (!PointerType::isValidElementType(Result))
+        return TokError("pointer to this type is invalid");
+      Result = PointerType::getUnqual(Result);
+      Lex.Lex();
+      break;
+
+    // Type ::= Type 'addrspace' '(' uint32 ')' '*'
+    case lltok::kw_addrspace: {
+      if (Result->isLabelTy())
+        return TokError("basic block pointers are invalid");
+      if (Result->isVoidTy())
+        return TokError("pointers to void are invalid; use i8* instead");
+      if (!PointerType::isValidElementType(Result))
+        return TokError("pointer to this type is invalid");
+      unsigned AddrSpace;
+      if (ParseOptionalAddrSpace(AddrSpace) ||
+          ParseToken(lltok::star, "expected '*' in address space"))
+        return true;
+
+      Result = PointerType::get(Result, AddrSpace);
+      break;
+    }
+
+    /// Types '(' ArgTypeListI ')' OptFuncAttrs
+    case lltok::lparen:
+      if (ParseFunctionType(Result))
+        return true;
+      break;
+    }
+  }
+}
+
+/// ParseParameterList
+///    ::= '(' ')'
+///    ::= '(' Arg (',' Arg)* ')'
+///  Arg
+///    ::= Type OptionalAttributes Value OptionalAttributes
+bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+                                  PerFunctionState &PFS) {
+  if (ParseToken(lltok::lparen, "expected '(' in call"))
+    return true;
+
+  while (Lex.getKind() != lltok::rparen) {
+    // If this isn't the first argument, we need a comma.
+    if (!ArgList.empty() &&
+        ParseToken(lltok::comma, "expected ',' in argument list"))
+      return true;
+
+    // Parse the argument.
+    LocTy ArgLoc;
+    Type *ArgTy = 0;
+    unsigned ArgAttrs1 = Attribute::None;
+    unsigned ArgAttrs2 = Attribute::None;
+    Value *V;
+    if (ParseType(ArgTy, ArgLoc))
+      return true;
+
+    // Otherwise, handle normal operands.
+    if (ParseOptionalAttrs(ArgAttrs1, 0) || ParseValue(ArgTy, V, PFS))
+      return true;
+    ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
+  }
+
+  Lex.Lex();  // Lex the ')'.
+  return false;
+}
+
+
+
+/// ParseArgumentList - Parse the argument list for a function type or function
+/// prototype.
+///   ::= '(' ArgTypeListI ')'
+/// ArgTypeListI
+///   ::= /*empty*/
+///   ::= '...'
+///   ::= ArgTypeList ',' '...'
+///   ::= ArgType (',' ArgType)*
+///
+bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
+                                 bool &isVarArg){
+  isVarArg = false;
+  assert(Lex.getKind() == lltok::lparen);
+  Lex.Lex(); // eat the (.
+
+  if (Lex.getKind() == lltok::rparen) {
+    // empty
+  } else if (Lex.getKind() == lltok::dotdotdot) {
+    isVarArg = true;
+    Lex.Lex();
+  } else {
+    LocTy TypeLoc = Lex.getLoc();
+    Type *ArgTy = 0;
+    unsigned Attrs;
+    std::string Name;
+
+    if (ParseType(ArgTy) ||
+        ParseOptionalAttrs(Attrs, 0)) return true;
+
+    if (ArgTy->isVoidTy())
+      return Error(TypeLoc, "argument can not have void type");
+
+    if (Lex.getKind() == lltok::LocalVar) {
+      Name = Lex.getStrVal();
+      Lex.Lex();
+    }
+
+    if (!FunctionType::isValidArgumentType(ArgTy))
+      return Error(TypeLoc, "invalid type for function argument");
+
+    ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+
+    while (EatIfPresent(lltok::comma)) {
+      // Handle ... at end of arg list.
+      if (EatIfPresent(lltok::dotdotdot)) {
+        isVarArg = true;
+        break;
+      }
+
+      // Otherwise must be an argument type.
+      TypeLoc = Lex.getLoc();
+      if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true;
+
+      if (ArgTy->isVoidTy())
+        return Error(TypeLoc, "argument can not have void type");
+
+      if (Lex.getKind() == lltok::LocalVar) {
+        Name = Lex.getStrVal();
+        Lex.Lex();
+      } else {
+        Name = "";
+      }
+
+      if (!ArgTy->isFirstClassType())
+        return Error(TypeLoc, "invalid type for function argument");
+
+      ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+    }
+  }
+
+  return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+}
+
+/// ParseFunctionType
+///  ::= Type ArgumentList OptionalAttrs
+bool LLParser::ParseFunctionType(Type *&Result) {
+  assert(Lex.getKind() == lltok::lparen);
+
+  if (!FunctionType::isValidReturnType(Result))
+    return TokError("invalid function return type");
+
+  SmallVector<ArgInfo, 8> ArgList;
+  bool isVarArg;
+  if (ParseArgumentList(ArgList, isVarArg))
+    return true;
+
+  // Reject names on the arguments lists.
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    if (!ArgList[i].Name.empty())
+      return Error(ArgList[i].Loc, "argument name invalid in function type");
+    if (ArgList[i].Attrs != 0)
+      return Error(ArgList[i].Loc,
+                   "argument attributes invalid in function type");
+  }
+
+  SmallVector<Type*, 16> ArgListTy;
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+    ArgListTy.push_back(ArgList[i].Ty);
+
+  Result = FunctionType::get(Result, ArgListTy, isVarArg);
+  return false;
+}
+
+/// ParseAnonStructType - Parse an anonymous struct type, which is inlined into
+/// other structs.
+bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
+  SmallVector<Type*, 8> Elts;
+  if (ParseStructBody(Elts)) return true;
+  
+  Result = StructType::get(Context, Elts, Packed);
+  return false;
+}
+
+/// ParseStructDefinition - Parse a struct in a 'type' definition.
+bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+                                     std::pair<Type*, LocTy> &Entry,
+                                     Type *&ResultTy) {
+  // If the type was already defined, diagnose the redefinition.
+  if (Entry.first && !Entry.second.isValid())
+    return Error(TypeLoc, "redefinition of type");
+  
+  // If we have opaque, just return without filling in the definition for the
+  // struct.  This counts as a definition as far as the .ll file goes.
+  if (EatIfPresent(lltok::kw_opaque)) {
+    // This type is being defined, so clear the location to indicate this.
+    Entry.second = SMLoc();
+    
+    // If this type number has never been uttered, create it.
+    if (Entry.first == 0)
+      Entry.first = StructType::createNamed(Context, Name);
+    ResultTy = Entry.first;
+    return false;
+  }
+  
+  // If the type starts with '<', then it is either a packed struct or a vector.
+  bool isPacked = EatIfPresent(lltok::less);
+
+  // If we don't have a struct, then we have a random type alias, which we
+  // accept for compatibility with old files.  These types are not allowed to be
+  // forward referenced and not allowed to be recursive.
+  if (Lex.getKind() != lltok::lbrace) {
+    if (Entry.first)
+      return Error(TypeLoc, "forward references to non-struct type");
+  
+    ResultTy = 0;
+    if (isPacked)
+      return ParseArrayVectorType(ResultTy, true);
+    return ParseType(ResultTy);
+  }
+                               
+  // This type is being defined, so clear the location to indicate this.
+  Entry.second = SMLoc();
+  
+  // If this type number has never been uttered, create it.
+  if (Entry.first == 0)
+    Entry.first = StructType::createNamed(Context, Name);
+  
+  StructType *STy = cast<StructType>(Entry.first);
+ 
+  SmallVector<Type*, 8> Body;
+  if (ParseStructBody(Body) ||
+      (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
+    return true;
+  
+  STy->setBody(Body, isPacked);
+  ResultTy = STy;
+  return false;
+}
+
+
+/// ParseStructType: Handles packed and unpacked types.  </> parsed elsewhere.
+///   StructType
+///     ::= '{' '}'
+///     ::= '{' Type (',' Type)* '}'
+///     ::= '<' '{' '}' '>'
+///     ::= '<' '{' Type (',' Type)* '}' '>'
+bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
+  assert(Lex.getKind() == lltok::lbrace);
+  Lex.Lex(); // Consume the '{'
+
+  // Handle the empty struct.
+  if (EatIfPresent(lltok::rbrace))
+    return false;
+
+  LocTy EltTyLoc = Lex.getLoc();
+  Type *Ty = 0;
+  if (ParseType(Ty)) return true;
+  Body.push_back(Ty);
+
+  if (!StructType::isValidElementType(Ty))
+    return Error(EltTyLoc, "invalid element type for struct");
+
+  while (EatIfPresent(lltok::comma)) {
+    EltTyLoc = Lex.getLoc();
+    if (ParseType(Ty)) return true;
+
+    if (!StructType::isValidElementType(Ty))
+      return Error(EltTyLoc, "invalid element type for struct");
+
+    Body.push_back(Ty);
+  }
+
+  return ParseToken(lltok::rbrace, "expected '}' at end of struct");
+}
+
+/// ParseArrayVectorType - Parse an array or vector type, assuming the first
+/// token has already been consumed.
+///   Type
+///     ::= '[' APSINTVAL 'x' Types ']'
+///     ::= '<' APSINTVAL 'x' Types '>'
+bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
+  if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
+      Lex.getAPSIntVal().getBitWidth() > 64)
+    return TokError("expected number in address space");
+
+  LocTy SizeLoc = Lex.getLoc();
+  uint64_t Size = Lex.getAPSIntVal().getZExtValue();
+  Lex.Lex();
+
+  if (ParseToken(lltok::kw_x, "expected 'x' after element count"))
+      return true;
+
+  LocTy TypeLoc = Lex.getLoc();
+  Type *EltTy = 0;
+  if (ParseType(EltTy)) return true;
+
+  if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
+                 "expected end of sequential type"))
+    return true;
+
+  if (isVector) {
+    if (Size == 0)
+      return Error(SizeLoc, "zero element vector is illegal");
+    if ((unsigned)Size != Size)
+      return Error(SizeLoc, "size too large for vector");
+    if (!VectorType::isValidElementType(EltTy))
+      return Error(TypeLoc, "vector element type must be fp or integer");
+    Result = VectorType::get(EltTy, unsigned(Size));
+  } else {
+    if (!ArrayType::isValidElementType(EltTy))
+      return Error(TypeLoc, "invalid array element type");
+    Result = ArrayType::get(EltTy, Size);
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Function Semantic Analysis.
+//===----------------------------------------------------------------------===//
+
+LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
+                                             int functionNumber)
+  : P(p), F(f), FunctionNumber(functionNumber) {
+
+  // Insert unnamed arguments into the NumberedVals list.
+  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+       AI != E; ++AI)
+    if (!AI->hasName())
+      NumberedVals.push_back(AI);
+}
+
+LLParser::PerFunctionState::~PerFunctionState() {
+  // If there were any forward referenced non-basicblock values, delete them.
+  for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
+       I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
+    if (!isa<BasicBlock>(I->second.first)) {
+      I->second.first->replaceAllUsesWith(
+                           UndefValue::get(I->second.first->getType()));
+      delete I->second.first;
+      I->second.first = 0;
+    }
+
+  for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+       I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
+    if (!isa<BasicBlock>(I->second.first)) {
+      I->second.first->replaceAllUsesWith(
+                           UndefValue::get(I->second.first->getType()));
+      delete I->second.first;
+      I->second.first = 0;
+    }
+}
+
+bool LLParser::PerFunctionState::FinishFunction() {
+  // Check to see if someone took the address of labels in this block.
+  if (!P.ForwardRefBlockAddresses.empty()) {
+    ValID FunctionID;
+    if (!F.getName().empty()) {
+      FunctionID.Kind = ValID::t_GlobalName;
+      FunctionID.StrVal = F.getName();
+    } else {
+      FunctionID.Kind = ValID::t_GlobalID;
+      FunctionID.UIntVal = FunctionNumber;
+    }
+  
+    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
+      FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
+    if (FRBAI != P.ForwardRefBlockAddresses.end()) {
+      // Resolve all these references.
+      if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
+        return true;
+      
+      P.ForwardRefBlockAddresses.erase(FRBAI);
+    }
+  }
+  
+  if (!ForwardRefVals.empty())
+    return P.Error(ForwardRefVals.begin()->second.second,
+                   "use of undefined value '%" + ForwardRefVals.begin()->first +
+                   "'");
+  if (!ForwardRefValIDs.empty())
+    return P.Error(ForwardRefValIDs.begin()->second.second,
+                   "use of undefined value '%" +
+                   Twine(ForwardRefValIDs.begin()->first) + "'");
+  return false;
+}
+
+
+/// GetVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed.  This can return null if the value
+/// exists but does not have the right type.
+Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
+                                          const Type *Ty, LocTy Loc) {
+  // Look this name up in the normal function symbol table.
+  Value *Val = F.getValueSymbolTable().lookup(Name);
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<std::string, std::pair<Value*, LocTy> >::iterator
+      I = ForwardRefVals.find(Name);
+    if (I != ForwardRefVals.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    if (Ty->isLabelTy())
+      P.Error(Loc, "'%" + Name + "' is not a basic block");
+    else
+      P.Error(Loc, "'%" + Name + "' defined with type '" +
+              getTypeString(Val->getType()) + "'");
+    return 0;
+  }
+
+  // Don't make placeholders with invalid type.
+  if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+    P.Error(Loc, "invalid use of a non-first-class type");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  Value *FwdVal;
+  if (Ty->isLabelTy())
+    FwdVal = BasicBlock::Create(F.getContext(), Name, &F);
+  else
+    FwdVal = new Argument(Ty, Name);
+
+  ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
+                                          LocTy Loc) {
+  // Look this name up in the normal function symbol table.
+  Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+      I = ForwardRefValIDs.find(ID);
+    if (I != ForwardRefValIDs.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    if (Ty->isLabelTy())
+      P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
+    else
+      P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
+              getTypeString(Val->getType()) + "'");
+    return 0;
+  }
+
+  if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+    P.Error(Loc, "invalid use of a non-first-class type");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  Value *FwdVal;
+  if (Ty->isLabelTy())
+    FwdVal = BasicBlock::Create(F.getContext(), "", &F);
+  else
+    FwdVal = new Argument(Ty);
+
+  ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+/// SetInstName - After an instruction is parsed and inserted into its
+/// basic block, this installs its name.
+bool LLParser::PerFunctionState::SetInstName(int NameID,
+                                             const std::string &NameStr,
+                                             LocTy NameLoc, Instruction *Inst) {
+  // If this instruction has void type, it cannot have a name or ID specified.
+  if (Inst->getType()->isVoidTy()) {
+    if (NameID != -1 || !NameStr.empty())
+      return P.Error(NameLoc, "instructions returning void cannot have a name");
+    return false;
+  }
+
+  // If this was a numbered instruction, verify that the instruction is the
+  // expected value and resolve any forward references.
+  if (NameStr.empty()) {
+    // If neither a name nor an ID was specified, just use the next ID.
+    if (NameID == -1)
+      NameID = NumberedVals.size();
+
+    if (unsigned(NameID) != NumberedVals.size())
+      return P.Error(NameLoc, "instruction expected to be numbered '%" +
+                     Twine(NumberedVals.size()) + "'");
+
+    std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
+      ForwardRefValIDs.find(NameID);
+    if (FI != ForwardRefValIDs.end()) {
+      if (FI->second.first->getType() != Inst->getType())
+        return P.Error(NameLoc, "instruction forward referenced with type '" +
+                       getTypeString(FI->second.first->getType()) + "'");
+      FI->second.first->replaceAllUsesWith(Inst);
+      delete FI->second.first;
+      ForwardRefValIDs.erase(FI);
+    }
+
+    NumberedVals.push_back(Inst);
+    return false;
+  }
+
+  // Otherwise, the instruction had a name.  Resolve forward refs and set it.
+  std::map<std::string, std::pair<Value*, LocTy> >::iterator
+    FI = ForwardRefVals.find(NameStr);
+  if (FI != ForwardRefVals.end()) {
+    if (FI->second.first->getType() != Inst->getType())
+      return P.Error(NameLoc, "instruction forward referenced with type '" +
+                     getTypeString(FI->second.first->getType()) + "'");
+    FI->second.first->replaceAllUsesWith(Inst);
+    delete FI->second.first;
+    ForwardRefVals.erase(FI);
+  }
+
+  // Set the name on the instruction.
+  Inst->setName(NameStr);
+
+  if (Inst->getName() != NameStr)
+    return P.Error(NameLoc, "multiple definition of local value named '" +
+                   NameStr + "'");
+  return false;
+}
+
+/// GetBB - Get a basic block with the specified name or ID, creating a
+/// forward reference record if needed.
+BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
+                                              LocTy Loc) {
+  return cast_or_null<BasicBlock>(GetVal(Name,
+                                        Type::getLabelTy(F.getContext()), Loc));
+}
+
+BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
+  return cast_or_null<BasicBlock>(GetVal(ID,
+                                        Type::getLabelTy(F.getContext()), Loc));
+}
+
+/// DefineBB - Define the specified basic block, which is either named or
+/// unnamed.  If there is an error, this returns null otherwise it returns
+/// the block being defined.
+BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
+                                                 LocTy Loc) {
+  BasicBlock *BB;
+  if (Name.empty())
+    BB = GetBB(NumberedVals.size(), Loc);
+  else
+    BB = GetBB(Name, Loc);
+  if (BB == 0) return 0; // Already diagnosed error.
+
+  // Move the block to the end of the function.  Forward ref'd blocks are
+  // inserted wherever they happen to be referenced.
+  F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB);
+
+  // Remove the block from forward ref sets.
+  if (Name.empty()) {
+    ForwardRefValIDs.erase(NumberedVals.size());
+    NumberedVals.push_back(BB);
+  } else {
+    // BB forward references are already in the function symbol table.
+    ForwardRefVals.erase(Name);
+  }
+
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Constants.
+//===----------------------------------------------------------------------===//
+
+/// ParseValID - Parse an abstract value that doesn't necessarily have a
+/// type implied.  For example, if we parse "4" we don't know what integer type
+/// it has.  The value will later be combined with its type and checked for
+/// sanity.  PFS is used to convert function-local operands of metadata (since
+/// metadata operands are not just parsed here but also converted to values).
+/// PFS can be null when we are not parsing metadata values inside a function.
+bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
+  ID.Loc = Lex.getLoc();
+  switch (Lex.getKind()) {
+  default: return TokError("expected value token");
+  case lltok::GlobalID:  // @42
+    ID.UIntVal = Lex.getUIntVal();
+    ID.Kind = ValID::t_GlobalID;
+    break;
+  case lltok::GlobalVar:  // @foo
+    ID.StrVal = Lex.getStrVal();
+    ID.Kind = ValID::t_GlobalName;
+    break;
+  case lltok::LocalVarID:  // %42
+    ID.UIntVal = Lex.getUIntVal();
+    ID.Kind = ValID::t_LocalID;
+    break;
+  case lltok::LocalVar:  // %foo
+    ID.StrVal = Lex.getStrVal();
+    ID.Kind = ValID::t_LocalName;
+    break;
+  case lltok::exclaim:   // !42, !{...}, or !"foo"
+    return ParseMetadataValue(ID, PFS);
+  case lltok::APSInt:
+    ID.APSIntVal = Lex.getAPSIntVal();
+    ID.Kind = ValID::t_APSInt;
+    break;
+  case lltok::APFloat:
+    ID.APFloatVal = Lex.getAPFloatVal();
+    ID.Kind = ValID::t_APFloat;
+    break;
+  case lltok::kw_true:
+    ID.ConstantVal = ConstantInt::getTrue(Context);
+    ID.Kind = ValID::t_Constant;
+    break;
+  case lltok::kw_false:
+    ID.ConstantVal = ConstantInt::getFalse(Context);
+    ID.Kind = ValID::t_Constant;
+    break;
+  case lltok::kw_null: ID.Kind = ValID::t_Null; break;
+  case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
+  case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
+
+  case lltok::lbrace: {
+    // ValID ::= '{' ConstVector '}'
+    Lex.Lex();
+    SmallVector<Constant*, 16> Elts;
+    if (ParseGlobalValueVector(Elts) ||
+        ParseToken(lltok::rbrace, "expected end of struct constant"))
+      return true;
+
+    ID.ConstantStructElts = new Constant*[Elts.size()];
+    ID.UIntVal = Elts.size();
+    memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+    ID.Kind = ValID::t_ConstantStruct;
+    return false;
+  }
+  case lltok::less: {
+    // ValID ::= '<' ConstVector '>'         --> Vector.
+    // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct.
+    Lex.Lex();
+    bool isPackedStruct = EatIfPresent(lltok::lbrace);
+
+    SmallVector<Constant*, 16> Elts;
+    LocTy FirstEltLoc = Lex.getLoc();
+    if (ParseGlobalValueVector(Elts) ||
+        (isPackedStruct &&
+         ParseToken(lltok::rbrace, "expected end of packed struct")) ||
+        ParseToken(lltok::greater, "expected end of constant"))
+      return true;
+
+    if (isPackedStruct) {
+      ID.ConstantStructElts = new Constant*[Elts.size()];
+      memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+      ID.UIntVal = Elts.size();
+      ID.Kind = ValID::t_PackedConstantStruct;
+      return false;
+    }
+
+    if (Elts.empty())
+      return Error(ID.Loc, "constant vector must not be empty");
+
+    if (!Elts[0]->getType()->isIntegerTy() &&
+        !Elts[0]->getType()->isFloatingPointTy())
+      return Error(FirstEltLoc,
+                   "vector elements must have integer or floating point type");
+
+    // Verify that all the vector elements have the same type.
+    for (unsigned i = 1, e = Elts.size(); i != e; ++i)
+      if (Elts[i]->getType() != Elts[0]->getType())
+        return Error(FirstEltLoc,
+                     "vector element #" + Twine(i) +
+                    " is not of type '" + getTypeString(Elts[0]->getType()));
+
+    ID.ConstantVal = ConstantVector::get(Elts);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::lsquare: {   // Array Constant
+    Lex.Lex();
+    SmallVector<Constant*, 16> Elts;
+    LocTy FirstEltLoc = Lex.getLoc();
+    if (ParseGlobalValueVector(Elts) ||
+        ParseToken(lltok::rsquare, "expected end of array constant"))
+      return true;
+
+    // Handle empty element.
+    if (Elts.empty()) {
+      // Use undef instead of an array because it's inconvenient to determine
+      // the element type at this point, there being no elements to examine.
+      ID.Kind = ValID::t_EmptyArray;
+      return false;
+    }
+
+    if (!Elts[0]->getType()->isFirstClassType())
+      return Error(FirstEltLoc, "invalid array element type: " +
+                   getTypeString(Elts[0]->getType()));
+
+    ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
+
+    // Verify all elements are correct type!
+    for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
+      if (Elts[i]->getType() != Elts[0]->getType())
+        return Error(FirstEltLoc,
+                     "array element #" + Twine(i) +
+                     " is not of type '" + getTypeString(Elts[0]->getType()));
+    }
+
+    ID.ConstantVal = ConstantArray::get(ATy, Elts);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_c:  // c "foo"
+    Lex.Lex();
+    ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false);
+    if (ParseToken(lltok::StringConstant, "expected string")) return true;
+    ID.Kind = ValID::t_Constant;
+    return false;
+
+  case lltok::kw_asm: {
+    // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT
+    bool HasSideEffect, AlignStack;
+    Lex.Lex();
+    if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
+        ParseOptionalToken(lltok::kw_alignstack, AlignStack) ||
+        ParseStringConstant(ID.StrVal) ||
+        ParseToken(lltok::comma, "expected comma in inline asm expression") ||
+        ParseToken(lltok::StringConstant, "expected constraint string"))
+      return true;
+    ID.StrVal2 = Lex.getStrVal();
+    ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1);
+    ID.Kind = ValID::t_InlineAsm;
+    return false;
+  }
+
+  case lltok::kw_blockaddress: {
+    // ValID ::= 'blockaddress' '(' @foo ',' %bar ')'
+    Lex.Lex();
+
+    ValID Fn, Label;
+    LocTy FnLoc, LabelLoc;
+    
+    if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
+        ParseValID(Fn) ||
+        ParseToken(lltok::comma, "expected comma in block address expression")||
+        ParseValID(Label) ||
+        ParseToken(lltok::rparen, "expected ')' in block address expression"))
+      return true;
+    
+    if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
+      return Error(Fn.Loc, "expected function name in blockaddress");
+    if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
+      return Error(Label.Loc, "expected basic block name in blockaddress");
+    
+    // Make a global variable as a placeholder for this reference.
+    GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
+                                           false, GlobalValue::InternalLinkage,
+                                                0, "");
+    ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef));
+    ID.ConstantVal = FwdRef;
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+      
+  case lltok::kw_trunc:
+  case lltok::kw_zext:
+  case lltok::kw_sext:
+  case lltok::kw_fptrunc:
+  case lltok::kw_fpext:
+  case lltok::kw_bitcast:
+  case lltok::kw_uitofp:
+  case lltok::kw_sitofp:
+  case lltok::kw_fptoui:
+  case lltok::kw_fptosi:
+  case lltok::kw_inttoptr:
+  case lltok::kw_ptrtoint: {
+    unsigned Opc = Lex.getUIntVal();
+    Type *DestTy = 0;
+    Constant *SrcVal;
+    Lex.Lex();
+    if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
+        ParseGlobalTypeAndValue(SrcVal) ||
+        ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
+        ParseType(DestTy) ||
+        ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
+      return true;
+    if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
+      return Error(ID.Loc, "invalid cast opcode for cast from '" +
+                   getTypeString(SrcVal->getType()) + "' to '" +
+                   getTypeString(DestTy) + "'");
+    ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
+                                                 SrcVal, DestTy);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_extractvalue: {
+    Lex.Lex();
+    Constant *Val;
+    SmallVector<unsigned, 4> Indices;
+    if (ParseToken(lltok::lparen, "expected '(' in extractvalue constantexpr")||
+        ParseGlobalTypeAndValue(Val) ||
+        ParseIndexList(Indices) ||
+        ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
+      return true;
+
+    if (!Val->getType()->isAggregateType())
+      return Error(ID.Loc, "extractvalue operand must be aggregate type");
+    if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
+      return Error(ID.Loc, "invalid indices for extractvalue");
+    ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_insertvalue: {
+    Lex.Lex();
+    Constant *Val0, *Val1;
+    SmallVector<unsigned, 4> Indices;
+    if (ParseToken(lltok::lparen, "expected '(' in insertvalue constantexpr")||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in insertvalue constantexpr")||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseIndexList(Indices) ||
+        ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
+      return true;
+    if (!Val0->getType()->isAggregateType())
+      return Error(ID.Loc, "insertvalue operand must be aggregate type");
+    if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+      return Error(ID.Loc, "invalid indices for insertvalue");
+    ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_icmp:
+  case lltok::kw_fcmp: {
+    unsigned PredVal, Opc = Lex.getUIntVal();
+    Constant *Val0, *Val1;
+    Lex.Lex();
+    if (ParseCmpPredicate(PredVal, Opc) ||
+        ParseToken(lltok::lparen, "expected '(' in compare constantexpr") ||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in compare constantexpr") ||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseToken(lltok::rparen, "expected ')' in compare constantexpr"))
+      return true;
+
+    if (Val0->getType() != Val1->getType())
+      return Error(ID.Loc, "compare operands must have the same type");
+
+    CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
+
+    if (Opc == Instruction::FCmp) {
+      if (!Val0->getType()->isFPOrFPVectorTy())
+        return Error(ID.Loc, "fcmp requires floating point operands");
+      ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
+    } else {
+      assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
+      if (!Val0->getType()->isIntOrIntVectorTy() &&
+          !Val0->getType()->isPointerTy())
+        return Error(ID.Loc, "icmp requires pointer or integer operands");
+      ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
+    }
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+
+  // Binary Operators.
+  case lltok::kw_add:
+  case lltok::kw_fadd:
+  case lltok::kw_sub:
+  case lltok::kw_fsub:
+  case lltok::kw_mul:
+  case lltok::kw_fmul:
+  case lltok::kw_udiv:
+  case lltok::kw_sdiv:
+  case lltok::kw_fdiv:
+  case lltok::kw_urem:
+  case lltok::kw_srem:
+  case lltok::kw_frem:
+  case lltok::kw_shl:
+  case lltok::kw_lshr:
+  case lltok::kw_ashr: {
+    bool NUW = false;
+    bool NSW = false;
+    bool Exact = false;
+    unsigned Opc = Lex.getUIntVal();
+    Constant *Val0, *Val1;
+    Lex.Lex();
+    LocTy ModifierLoc = Lex.getLoc();
+    if (Opc == Instruction::Add || Opc == Instruction::Sub ||
+        Opc == Instruction::Mul || Opc == Instruction::Shl) {
+      if (EatIfPresent(lltok::kw_nuw))
+        NUW = true;
+      if (EatIfPresent(lltok::kw_nsw)) {
+        NSW = true;
+        if (EatIfPresent(lltok::kw_nuw))
+          NUW = true;
+      }
+    } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
+               Opc == Instruction::LShr || Opc == Instruction::AShr) {
+      if (EatIfPresent(lltok::kw_exact))
+        Exact = true;
+    }
+    if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in binary constantexpr") ||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseToken(lltok::rparen, "expected ')' in binary constantexpr"))
+      return true;
+    if (Val0->getType() != Val1->getType())
+      return Error(ID.Loc, "operands of constexpr must have same type");
+    if (!Val0->getType()->isIntOrIntVectorTy()) {
+      if (NUW)
+        return Error(ModifierLoc, "nuw only applies to integer operations");
+      if (NSW)
+        return Error(ModifierLoc, "nsw only applies to integer operations");
+    }
+    // Check that the type is valid for the operator.
+    switch (Opc) {
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::Shl:
+    case Instruction::AShr:
+    case Instruction::LShr:
+      if (!Val0->getType()->isIntOrIntVectorTy())
+        return Error(ID.Loc, "constexpr requires integer operands");
+      break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      if (!Val0->getType()->isFPOrFPVectorTy())
+        return Error(ID.Loc, "constexpr requires fp operands");
+      break;
+    default: llvm_unreachable("Unknown binary operator!");
+    }
+    unsigned Flags = 0;
+    if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+    if (NSW)   Flags |= OverflowingBinaryOperator::NoSignedWrap;
+    if (Exact) Flags |= PossiblyExactOperator::IsExact;
+    Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
+    ID.ConstantVal = C;
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+
+  // Logical Operations
+  case lltok::kw_and:
+  case lltok::kw_or:
+  case lltok::kw_xor: {
+    unsigned Opc = Lex.getUIntVal();
+    Constant *Val0, *Val1;
+    Lex.Lex();
+    if (ParseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in logical constantexpr") ||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseToken(lltok::rparen, "expected ')' in logical constantexpr"))
+      return true;
+    if (Val0->getType() != Val1->getType())
+      return Error(ID.Loc, "operands of constexpr must have same type");
+    if (!Val0->getType()->isIntOrIntVectorTy())
+      return Error(ID.Loc,
+                   "constexpr requires integer or integer vector operands");
+    ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+
+  case lltok::kw_getelementptr:
+  case lltok::kw_shufflevector:
+  case lltok::kw_insertelement:
+  case lltok::kw_extractelement:
+  case lltok::kw_select: {
+    unsigned Opc = Lex.getUIntVal();
+    SmallVector<Constant*, 16> Elts;
+    bool InBounds = false;
+    Lex.Lex();
+    if (Opc == Instruction::GetElementPtr)
+      InBounds = EatIfPresent(lltok::kw_inbounds);
+    if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
+        ParseGlobalValueVector(Elts) ||
+        ParseToken(lltok::rparen, "expected ')' in constantexpr"))
+      return true;
+
+    if (Opc == Instruction::GetElementPtr) {
+      if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
+        return Error(ID.Loc, "getelementptr requires pointer operand");
+
+      if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
+                                             (Value**)(Elts.data() + 1),
+                                             Elts.size() - 1))
+        return Error(ID.Loc, "invalid indices for getelementptr");
+      ID.ConstantVal = InBounds ?
+        ConstantExpr::getInBoundsGetElementPtr(Elts[0],
+                                               Elts.data() + 1,
+                                               Elts.size() - 1) :
+        ConstantExpr::getGetElementPtr(Elts[0],
+                                       Elts.data() + 1, Elts.size() - 1);
+    } else if (Opc == Instruction::Select) {
+      if (Elts.size() != 3)
+        return Error(ID.Loc, "expected three operands to select");
+      if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
+                                                              Elts[2]))
+        return Error(ID.Loc, Reason);
+      ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
+    } else if (Opc == Instruction::ShuffleVector) {
+      if (Elts.size() != 3)
+        return Error(ID.Loc, "expected three operands to shufflevector");
+      if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+        return Error(ID.Loc, "invalid operands to shufflevector");
+      ID.ConstantVal =
+                 ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]);
+    } else if (Opc == Instruction::ExtractElement) {
+      if (Elts.size() != 2)
+        return Error(ID.Loc, "expected two operands to extractelement");
+      if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
+        return Error(ID.Loc, "invalid extractelement operands");
+      ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
+    } else {
+      assert(Opc == Instruction::InsertElement && "Unknown opcode");
+      if (Elts.size() != 3)
+      return Error(ID.Loc, "expected three operands to insertelement");
+      if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+        return Error(ID.Loc, "invalid insertelement operands");
+      ID.ConstantVal =
+                 ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
+    }
+
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  }
+
+  Lex.Lex();
+  return false;
+}
+
+/// ParseGlobalValue - Parse a global value with the specified type.
+bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) {
+  C = 0;
+  ValID ID;
+  Value *V = NULL;
+  bool Parsed = ParseValID(ID) ||
+                ConvertValIDToValue(Ty, ID, V, NULL);
+  if (V && !(C = dyn_cast<Constant>(V)))
+    return Error(ID.Loc, "global values must be constants");
+  return Parsed;
+}
+
+bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
+  Type *Ty = 0;
+  return ParseType(Ty) ||
+         ParseGlobalValue(Ty, V);
+}
+
+/// ParseGlobalValueVector
+///   ::= /*empty*/
+///   ::= TypeAndValue (',' TypeAndValue)*
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
+  // Empty list.
+  if (Lex.getKind() == lltok::rbrace ||
+      Lex.getKind() == lltok::rsquare ||
+      Lex.getKind() == lltok::greater ||
+      Lex.getKind() == lltok::rparen)
+    return false;
+
+  Constant *C;
+  if (ParseGlobalTypeAndValue(C)) return true;
+  Elts.push_back(C);
+
+  while (EatIfPresent(lltok::comma)) {
+    if (ParseGlobalTypeAndValue(C)) return true;
+    Elts.push_back(C);
+  }
+
+  return false;
+}
+
+bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) {
+  assert(Lex.getKind() == lltok::lbrace);
+  Lex.Lex();
+
+  SmallVector<Value*, 16> Elts;
+  if (ParseMDNodeVector(Elts, PFS) ||
+      ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  ID.MDNodeVal = MDNode::get(Context, Elts);
+  ID.Kind = ValID::t_MDNode;
+  return false;
+}
+
+/// ParseMetadataValue
+///  ::= !42
+///  ::= !{...}
+///  ::= !"string"
+bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
+  assert(Lex.getKind() == lltok::exclaim);
+  Lex.Lex();
+
+  // MDNode:
+  // !{ ... }
+  if (Lex.getKind() == lltok::lbrace)
+    return ParseMetadataListValue(ID, PFS);
+
+  // Standalone metadata reference
+  // !42
+  if (Lex.getKind() == lltok::APSInt) {
+    if (ParseMDNodeID(ID.MDNodeVal)) return true;
+    ID.Kind = ValID::t_MDNode;
+    return false;
+  }
+
+  // MDString:
+  //   ::= '!' STRINGCONSTANT
+  if (ParseMDString(ID.MDStringVal)) return true;
+  ID.Kind = ValID::t_MDString;
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Function Parsing.
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+                                   PerFunctionState *PFS) {
+  if (Ty->isFunctionTy())
+    return Error(ID.Loc, "functions are not values, refer to them as pointers");
+
+  switch (ID.Kind) {
+  default: llvm_unreachable("Unknown ValID!");
+  case ValID::t_LocalID:
+    if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+    V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
+    return (V == 0);
+  case ValID::t_LocalName:
+    if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+    V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
+    return (V == 0);
+  case ValID::t_InlineAsm: {
+    const PointerType *PTy = dyn_cast<PointerType>(Ty);
+    const FunctionType *FTy = 
+      PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+    if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+      return Error(ID.Loc, "invalid type for inline asm constraint string");
+    V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1);
+    return false;
+  }
+  case ValID::t_MDNode:
+    if (!Ty->isMetadataTy())
+      return Error(ID.Loc, "metadata value must have metadata type");
+    V = ID.MDNodeVal;
+    return false;
+  case ValID::t_MDString:
+    if (!Ty->isMetadataTy())
+      return Error(ID.Loc, "metadata value must have metadata type");
+    V = ID.MDStringVal;
+    return false;
+  case ValID::t_GlobalName:
+    V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
+    return V == 0;
+  case ValID::t_GlobalID:
+    V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
+    return V == 0;
+  case ValID::t_APSInt:
+    if (!Ty->isIntegerTy())
+      return Error(ID.Loc, "integer constant must have integer type");
+    ID.APSIntVal = ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
+    V = ConstantInt::get(Context, ID.APSIntVal);
+    return false;
+  case ValID::t_APFloat:
+    if (!Ty->isFloatingPointTy() ||
+        !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
+      return Error(ID.Loc, "floating point constant invalid for type");
+
+    // The lexer has no type info, so builds all float and double FP constants
+    // as double.  Fix this here.  Long double does not need this.
+    if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble &&
+        Ty->isFloatTy()) {
+      bool Ignored;
+      ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+                            &Ignored);
+    }
+    V = ConstantFP::get(Context, ID.APFloatVal);
+
+    if (V->getType() != Ty)
+      return Error(ID.Loc, "floating point constant does not have type '" +
+                   getTypeString(Ty) + "'");
+
+    return false;
+  case ValID::t_Null:
+    if (!Ty->isPointerTy())
+      return Error(ID.Loc, "null must be a pointer type");
+    V = ConstantPointerNull::get(cast<PointerType>(Ty));
+    return false;
+  case ValID::t_Undef:
+    // FIXME: LabelTy should not be a first-class type.
+    if (!Ty->isFirstClassType() || Ty->isLabelTy())
+      return Error(ID.Loc, "invalid type for undef constant");
+    V = UndefValue::get(Ty);
+    return false;
+  case ValID::t_EmptyArray:
+    if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
+      return Error(ID.Loc, "invalid empty array initializer");
+    V = UndefValue::get(Ty);
+    return false;
+  case ValID::t_Zero:
+    // FIXME: LabelTy should not be a first-class type.
+    if (!Ty->isFirstClassType() || Ty->isLabelTy())
+      return Error(ID.Loc, "invalid type for null constant");
+    V = Constant::getNullValue(Ty);
+    return false;
+  case ValID::t_Constant:
+    if (ID.ConstantVal->getType() != Ty)
+      return Error(ID.Loc, "constant expression type mismatch");
+
+    V = ID.ConstantVal;
+    return false;
+  case ValID::t_ConstantStruct:
+  case ValID::t_PackedConstantStruct:
+    if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+      if (ST->getNumElements() != ID.UIntVal)
+        return Error(ID.Loc,
+                     "initializer with struct type has wrong # elements");
+      if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
+        return Error(ID.Loc, "packed'ness of initializer and type don't match");
+        
+      // Verify that the elements are compatible with the structtype.
+      for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
+        if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
+          return Error(ID.Loc, "element " + Twine(i) +
+                    " of struct initializer doesn't match struct element type");
+      
+      V = ConstantStruct::get(ST, ArrayRef<Constant*>(ID.ConstantStructElts,
+                                                      ID.UIntVal));
+    } else
+      return Error(ID.Loc, "constant expression type mismatch");
+    return false;
+  }
+}
+
+bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS) {
+  V = 0;
+  ValID ID;
+  return ParseValID(ID, PFS) ||
+         ConvertValIDToValue(Ty, ID, V, PFS);
+}
+
+bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
+  Type *Ty = 0;
+  return ParseType(Ty) ||
+         ParseValue(Ty, V, PFS);
+}
+
+bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+                                      PerFunctionState &PFS) {
+  Value *V;
+  Loc = Lex.getLoc();
+  if (ParseTypeAndValue(V, PFS)) return true;
+  if (!isa<BasicBlock>(V))
+    return Error(Loc, "expected a basic block");
+  BB = cast<BasicBlock>(V);
+  return false;
+}
+
+
+/// FunctionHeader
+///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
+///       OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
+///       OptionalAlign OptGC
+bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
+  // Parse the linkage.
+  LocTy LinkageLoc = Lex.getLoc();
+  unsigned Linkage;
+
+  unsigned Visibility, RetAttrs;
+  CallingConv::ID CC;
+  Type *RetType = 0;
+  LocTy RetTypeLoc = Lex.getLoc();
+  if (ParseOptionalLinkage(Linkage) ||
+      ParseOptionalVisibility(Visibility) ||
+      ParseOptionalCallingConv(CC) ||
+      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseType(RetType, RetTypeLoc, true /*void allowed*/))
+    return true;
+
+  // Verify that the linkage is ok.
+  switch ((GlobalValue::LinkageTypes)Linkage) {
+  case GlobalValue::ExternalLinkage:
+    break; // always ok.
+  case GlobalValue::DLLImportLinkage:
+  case GlobalValue::ExternalWeakLinkage:
+    if (isDefine)
+      return Error(LinkageLoc, "invalid linkage for function definition");
+    break;
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+  case GlobalValue::LinkerPrivateWeakLinkage:
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::AvailableExternallyLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::DLLExportLinkage:
+    if (!isDefine)
+      return Error(LinkageLoc, "invalid linkage for function declaration");
+    break;
+  case GlobalValue::AppendingLinkage:
+  case GlobalValue::CommonLinkage:
+    return Error(LinkageLoc, "invalid function linkage type");
+  }
+
+  if (!FunctionType::isValidReturnType(RetType))
+    return Error(RetTypeLoc, "invalid function return type");
+
+  LocTy NameLoc = Lex.getLoc();
+
+  std::string FunctionName;
+  if (Lex.getKind() == lltok::GlobalVar) {
+    FunctionName = Lex.getStrVal();
+  } else if (Lex.getKind() == lltok::GlobalID) {     // @42 is ok.
+    unsigned NameID = Lex.getUIntVal();
+
+    if (NameID != NumberedVals.size())
+      return TokError("function expected to be numbered '%" +
+                      Twine(NumberedVals.size()) + "'");
+  } else {
+    return TokError("expected function name");
+  }
+
+  Lex.Lex();
+
+  if (Lex.getKind() != lltok::lparen)
+    return TokError("expected '(' in function argument list");
+
+  SmallVector<ArgInfo, 8> ArgList;
+  bool isVarArg;
+  unsigned FuncAttrs;
+  std::string Section;
+  unsigned Alignment;
+  std::string GC;
+  bool UnnamedAddr;
+  LocTy UnnamedAddrLoc;
+
+  if (ParseArgumentList(ArgList, isVarArg) ||
+      ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+                         &UnnamedAddrLoc) ||
+      ParseOptionalAttrs(FuncAttrs, 2) ||
+      (EatIfPresent(lltok::kw_section) &&
+       ParseStringConstant(Section)) ||
+      ParseOptionalAlignment(Alignment) ||
+      (EatIfPresent(lltok::kw_gc) &&
+       ParseStringConstant(GC)))
+    return true;
+
+  // If the alignment was parsed as an attribute, move to the alignment field.
+  if (FuncAttrs & Attribute::Alignment) {
+    Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs);
+    FuncAttrs &= ~Attribute::Alignment;
+  }
+
+  // Okay, if we got here, the function is syntactically valid.  Convert types
+  // and do semantic checks.
+  std::vector<Type*> ParamTypeList;
+  SmallVector<AttributeWithIndex, 8> Attrs;
+
+  if (RetAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    ParamTypeList.push_back(ArgList[i].Ty);
+    if (ArgList[i].Attrs != Attribute::None)
+      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+  }
+
+  if (FuncAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs));
+
+  AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+  if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy())
+    return Error(RetTypeLoc, "functions with 'sret' argument must return void");
+
+  const FunctionType *FT =
+    FunctionType::get(RetType, ParamTypeList, isVarArg);
+  const PointerType *PFT = PointerType::getUnqual(FT);
+
+  Fn = 0;
+  if (!FunctionName.empty()) {
+    // If this was a definition of a forward reference, remove the definition
+    // from the forward reference table and fill in the forward ref.
+    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator FRVI =
+      ForwardRefVals.find(FunctionName);
+    if (FRVI != ForwardRefVals.end()) {
+      Fn = M->getFunction(FunctionName);
+      if (Fn->getType() != PFT)
+        return Error(FRVI->second.second, "invalid forward reference to "
+                     "function '" + FunctionName + "' with wrong type!");
+      
+      ForwardRefVals.erase(FRVI);
+    } else if ((Fn = M->getFunction(FunctionName))) {
+      // Reject redefinitions.
+      return Error(NameLoc, "invalid redefinition of function '" +
+                   FunctionName + "'");
+    } else if (M->getNamedValue(FunctionName)) {
+      return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
+    }
+
+  } else {
+    // If this is a definition of a forward referenced function, make sure the
+    // types agree.
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
+      = ForwardRefValIDs.find(NumberedVals.size());
+    if (I != ForwardRefValIDs.end()) {
+      Fn = cast<Function>(I->second.first);
+      if (Fn->getType() != PFT)
+        return Error(NameLoc, "type of definition and forward reference of '@" +
+                     Twine(NumberedVals.size()) + "' disagree");
+      ForwardRefValIDs.erase(I);
+    }
+  }
+
+  if (Fn == 0)
+    Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
+  else // Move the forward-reference to the correct spot in the module.
+    M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
+
+  if (FunctionName.empty())
+    NumberedVals.push_back(Fn);
+
+  Fn->setLinkage((GlobalValue::LinkageTypes)Linkage);
+  Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+  Fn->setCallingConv(CC);
+  Fn->setAttributes(PAL);
+  Fn->setUnnamedAddr(UnnamedAddr);
+  Fn->setAlignment(Alignment);
+  Fn->setSection(Section);
+  if (!GC.empty()) Fn->setGC(GC.c_str());
+
+  // Add all of the arguments we parsed to the function.
+  Function::arg_iterator ArgIt = Fn->arg_begin();
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
+    // If the argument has a name, insert it into the argument symbol table.
+    if (ArgList[i].Name.empty()) continue;
+
+    // Set the name, if it conflicted, it will be auto-renamed.
+    ArgIt->setName(ArgList[i].Name);
+
+    if (ArgIt->getName() != ArgList[i].Name)
+      return Error(ArgList[i].Loc, "redefinition of argument '%" +
+                   ArgList[i].Name + "'");
+  }
+
+  return false;
+}
+
+
+/// ParseFunctionBody
+///   ::= '{' BasicBlock+ '}'
+///
+bool LLParser::ParseFunctionBody(Function &Fn) {
+  if (Lex.getKind() != lltok::lbrace)
+    return TokError("expected '{' in function body");
+  Lex.Lex();  // eat the {.
+
+  int FunctionNumber = -1;
+  if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
+  
+  PerFunctionState PFS(*this, Fn, FunctionNumber);
+
+  // We need at least one basic block.
+  if (Lex.getKind() == lltok::rbrace)
+    return TokError("function body requires at least one basic block");
+  
+  while (Lex.getKind() != lltok::rbrace)
+    if (ParseBasicBlock(PFS)) return true;
+
+  // Eat the }.
+  Lex.Lex();
+
+  // Verify function is ok.
+  return PFS.FinishFunction();
+}
+
+/// ParseBasicBlock
+///   ::= LabelStr? Instruction*
+bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
+  // If this basic block starts out with a name, remember it.
+  std::string Name;
+  LocTy NameLoc = Lex.getLoc();
+  if (Lex.getKind() == lltok::LabelStr) {
+    Name = Lex.getStrVal();
+    Lex.Lex();
+  }
+
+  BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
+  if (BB == 0) return true;
+
+  std::string NameStr;
+
+  // Parse the instructions in this block until we get a terminator.
+  Instruction *Inst;
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MetadataOnInst;
+  do {
+    // This instruction may have three possibilities for a name: a) none
+    // specified, b) name specified "%foo =", c) number specified: "%4 =".
+    LocTy NameLoc = Lex.getLoc();
+    int NameID = -1;
+    NameStr = "";
+
+    if (Lex.getKind() == lltok::LocalVarID) {
+      NameID = Lex.getUIntVal();
+      Lex.Lex();
+      if (ParseToken(lltok::equal, "expected '=' after instruction id"))
+        return true;
+    } else if (Lex.getKind() == lltok::LocalVar) {
+      NameStr = Lex.getStrVal();
+      Lex.Lex();
+      if (ParseToken(lltok::equal, "expected '=' after instruction name"))
+        return true;
+    }
+
+    switch (ParseInstruction(Inst, BB, PFS)) {
+    default: assert(0 && "Unknown ParseInstruction result!");
+    case InstError: return true;
+    case InstNormal:
+      BB->getInstList().push_back(Inst);
+
+      // With a normal result, we check to see if the instruction is followed by
+      // a comma and metadata.
+      if (EatIfPresent(lltok::comma))
+        if (ParseInstructionMetadata(Inst, &PFS))
+          return true;
+      break;
+    case InstExtraComma:
+      BB->getInstList().push_back(Inst);
+
+      // If the instruction parser ate an extra comma at the end of it, it
+      // *must* be followed by metadata.
+      if (ParseInstructionMetadata(Inst, &PFS))
+        return true;
+      break;        
+    }
+
+    // Set the name on the instruction.
+    if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
+  } while (!isa<TerminatorInst>(Inst));
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseInstruction - Parse one of the many different instructions.
+///
+int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+                               PerFunctionState &PFS) {
+  lltok::Kind Token = Lex.getKind();
+  if (Token == lltok::Eof)
+    return TokError("found end of file when expecting more instructions");
+  LocTy Loc = Lex.getLoc();
+  unsigned KeywordVal = Lex.getUIntVal();
+  Lex.Lex();  // Eat the keyword.
+
+  switch (Token) {
+  default:                    return Error(Loc, "expected instruction opcode");
+  // Terminator Instructions.
+  case lltok::kw_unwind:      Inst = new UnwindInst(Context); return false;
+  case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
+  case lltok::kw_ret:         return ParseRet(Inst, BB, PFS);
+  case lltok::kw_br:          return ParseBr(Inst, PFS);
+  case lltok::kw_switch:      return ParseSwitch(Inst, PFS);
+  case lltok::kw_indirectbr:  return ParseIndirectBr(Inst, PFS);
+  case lltok::kw_invoke:      return ParseInvoke(Inst, PFS);
+  // Binary Operators.
+  case lltok::kw_add:
+  case lltok::kw_sub:
+  case lltok::kw_mul:
+  case lltok::kw_shl: {
+    bool NUW = EatIfPresent(lltok::kw_nuw);
+    bool NSW = EatIfPresent(lltok::kw_nsw);
+    if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
+    
+    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    
+    if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
+    if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
+    return false;
+  }
+  case lltok::kw_fadd:
+  case lltok::kw_fsub:
+  case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+
+  case lltok::kw_sdiv:
+  case lltok::kw_udiv:
+  case lltok::kw_lshr:
+  case lltok::kw_ashr: {
+    bool Exact = EatIfPresent(lltok::kw_exact);
+
+    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
+    return false;
+  }
+
+  case lltok::kw_urem:
+  case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal, 1);
+  case lltok::kw_fdiv:
+  case lltok::kw_frem:   return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+  case lltok::kw_and:
+  case lltok::kw_or:
+  case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
+  case lltok::kw_icmp:
+  case lltok::kw_fcmp:   return ParseCompare(Inst, PFS, KeywordVal);
+  // Casts.
+  case lltok::kw_trunc:
+  case lltok::kw_zext:
+  case lltok::kw_sext:
+  case lltok::kw_fptrunc:
+  case lltok::kw_fpext:
+  case lltok::kw_bitcast:
+  case lltok::kw_uitofp:
+  case lltok::kw_sitofp:
+  case lltok::kw_fptoui:
+  case lltok::kw_fptosi:
+  case lltok::kw_inttoptr:
+  case lltok::kw_ptrtoint:       return ParseCast(Inst, PFS, KeywordVal);
+  // Other.
+  case lltok::kw_select:         return ParseSelect(Inst, PFS);
+  case lltok::kw_va_arg:         return ParseVA_Arg(Inst, PFS);
+  case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
+  case lltok::kw_insertelement:  return ParseInsertElement(Inst, PFS);
+  case lltok::kw_shufflevector:  return ParseShuffleVector(Inst, PFS);
+  case lltok::kw_phi:            return ParsePHI(Inst, PFS);
+  case lltok::kw_call:           return ParseCall(Inst, PFS, false);
+  case lltok::kw_tail:           return ParseCall(Inst, PFS, true);
+  // Memory.
+  case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
+  case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
+  case lltok::kw_store:          return ParseStore(Inst, PFS, false);
+  case lltok::kw_volatile:
+    if (EatIfPresent(lltok::kw_load))
+      return ParseLoad(Inst, PFS, true);
+    else if (EatIfPresent(lltok::kw_store))
+      return ParseStore(Inst, PFS, true);
+    else
+      return TokError("expected 'load' or 'store'");
+  case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
+  case lltok::kw_extractvalue:  return ParseExtractValue(Inst, PFS);
+  case lltok::kw_insertvalue:   return ParseInsertValue(Inst, PFS);
+  }
+}
+
+/// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind.
+bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
+  if (Opc == Instruction::FCmp) {
+    switch (Lex.getKind()) {
+    default: TokError("expected fcmp predicate (e.g. 'oeq')");
+    case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
+    case lltok::kw_one: P = CmpInst::FCMP_ONE; break;
+    case lltok::kw_olt: P = CmpInst::FCMP_OLT; break;
+    case lltok::kw_ogt: P = CmpInst::FCMP_OGT; break;
+    case lltok::kw_ole: P = CmpInst::FCMP_OLE; break;
+    case lltok::kw_oge: P = CmpInst::FCMP_OGE; break;
+    case lltok::kw_ord: P = CmpInst::FCMP_ORD; break;
+    case lltok::kw_uno: P = CmpInst::FCMP_UNO; break;
+    case lltok::kw_ueq: P = CmpInst::FCMP_UEQ; break;
+    case lltok::kw_une: P = CmpInst::FCMP_UNE; break;
+    case lltok::kw_ult: P = CmpInst::FCMP_ULT; break;
+    case lltok::kw_ugt: P = CmpInst::FCMP_UGT; break;
+    case lltok::kw_ule: P = CmpInst::FCMP_ULE; break;
+    case lltok::kw_uge: P = CmpInst::FCMP_UGE; break;
+    case lltok::kw_true: P = CmpInst::FCMP_TRUE; break;
+    case lltok::kw_false: P = CmpInst::FCMP_FALSE; break;
+    }
+  } else {
+    switch (Lex.getKind()) {
+    default: TokError("expected icmp predicate (e.g. 'eq')");
+    case lltok::kw_eq:  P = CmpInst::ICMP_EQ; break;
+    case lltok::kw_ne:  P = CmpInst::ICMP_NE; break;
+    case lltok::kw_slt: P = CmpInst::ICMP_SLT; break;
+    case lltok::kw_sgt: P = CmpInst::ICMP_SGT; break;
+    case lltok::kw_sle: P = CmpInst::ICMP_SLE; break;
+    case lltok::kw_sge: P = CmpInst::ICMP_SGE; break;
+    case lltok::kw_ult: P = CmpInst::ICMP_ULT; break;
+    case lltok::kw_ugt: P = CmpInst::ICMP_UGT; break;
+    case lltok::kw_ule: P = CmpInst::ICMP_ULE; break;
+    case lltok::kw_uge: P = CmpInst::ICMP_UGE; break;
+    }
+  }
+  Lex.Lex();
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Terminator Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseRet - Parse a return instruction.
+///   ::= 'ret' void (',' !dbg, !1)*
+///   ::= 'ret' TypeAndValue (',' !dbg, !1)*
+bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+                        PerFunctionState &PFS) {
+  SMLoc TypeLoc = Lex.getLoc();
+  Type *Ty = 0;
+  if (ParseType(Ty, true /*void allowed*/)) return true;
+
+  Type *ResType = PFS.getFunction().getReturnType();
+  
+  if (Ty->isVoidTy()) {
+    if (!ResType->isVoidTy())
+      return Error(TypeLoc, "value doesn't match function result type '" +
+                   getTypeString(ResType) + "'");
+    
+    Inst = ReturnInst::Create(Context);
+    return false;
+  }
+
+  Value *RV;
+  if (ParseValue(Ty, RV, PFS)) return true;
+
+  if (ResType != RV->getType())
+    return Error(TypeLoc, "value doesn't match function result type '" +
+                 getTypeString(ResType) + "'");
+  
+  Inst = ReturnInst::Create(Context, RV);
+  return false;
+}
+
+
+/// ParseBr
+///   ::= 'br' TypeAndValue
+///   ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc, Loc2;
+  Value *Op0;
+  BasicBlock *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
+
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
+    Inst = BranchInst::Create(BB);
+    return false;
+  }
+
+  if (Op0->getType() != Type::getInt1Ty(Context))
+    return Error(Loc, "branch condition must have 'i1' type");
+
+  if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
+      ParseTypeAndBasicBlock(Op1, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after true destination") ||
+      ParseTypeAndBasicBlock(Op2, Loc2, PFS))
+    return true;
+
+  Inst = BranchInst::Create(Op1, Op2, Op0);
+  return false;
+}
+
+/// ParseSwitch
+///  Instruction
+///    ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']'
+///  JumpTable
+///    ::= (TypeAndValue ',' TypeAndValue)*
+bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy CondLoc, BBLoc;
+  Value *Cond;
+  BasicBlock *DefaultBB;
+  if (ParseTypeAndValue(Cond, CondLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after switch condition") ||
+      ParseTypeAndBasicBlock(DefaultBB, BBLoc, PFS) ||
+      ParseToken(lltok::lsquare, "expected '[' with switch table"))
+    return true;
+
+  if (!Cond->getType()->isIntegerTy())
+    return Error(CondLoc, "switch condition must have integer type");
+
+  // Parse the jump table pairs.
+  SmallPtrSet<Value*, 32> SeenCases;
+  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
+  while (Lex.getKind() != lltok::rsquare) {
+    Value *Constant;
+    BasicBlock *DestBB;
+
+    if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
+        ParseToken(lltok::comma, "expected ',' after case value") ||
+        ParseTypeAndBasicBlock(DestBB, PFS))
+      return true;
+    
+    if (!SeenCases.insert(Constant))
+      return Error(CondLoc, "duplicate case value in switch");
+    if (!isa<ConstantInt>(Constant))
+      return Error(CondLoc, "case value is not a constant integer");
+
+    Table.push_back(std::make_pair(cast<ConstantInt>(Constant), DestBB));
+  }
+
+  Lex.Lex();  // Eat the ']'.
+
+  SwitchInst *SI = SwitchInst::Create(Cond, DefaultBB, Table.size());
+  for (unsigned i = 0, e = Table.size(); i != e; ++i)
+    SI->addCase(Table[i].first, Table[i].second);
+  Inst = SI;
+  return false;
+}
+
+/// ParseIndirectBr
+///  Instruction
+///    ::= 'indirectbr' TypeAndValue ',' '[' LabelList ']'
+bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy AddrLoc;
+  Value *Address;
+  if (ParseTypeAndValue(Address, AddrLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
+      ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
+    return true;
+  
+  if (!Address->getType()->isPointerTy())
+    return Error(AddrLoc, "indirectbr address must have pointer type");
+  
+  // Parse the destination list.
+  SmallVector<BasicBlock*, 16> DestList;
+  
+  if (Lex.getKind() != lltok::rsquare) {
+    BasicBlock *DestBB;
+    if (ParseTypeAndBasicBlock(DestBB, PFS))
+      return true;
+    DestList.push_back(DestBB);
+    
+    while (EatIfPresent(lltok::comma)) {
+      if (ParseTypeAndBasicBlock(DestBB, PFS))
+        return true;
+      DestList.push_back(DestBB);
+    }
+  }
+  
+  if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+    return true;
+
+  IndirectBrInst *IBI = IndirectBrInst::Create(Address, DestList.size());
+  for (unsigned i = 0, e = DestList.size(); i != e; ++i)
+    IBI->addDestination(DestList[i]);
+  Inst = IBI;
+  return false;
+}
+
+
+/// ParseInvoke
+///   ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
+///       OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
+bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy CallLoc = Lex.getLoc();
+  unsigned RetAttrs, FnAttrs;
+  CallingConv::ID CC;
+  Type *RetType = 0;
+  LocTy RetTypeLoc;
+  ValID CalleeID;
+  SmallVector<ParamInfo, 16> ArgList;
+
+  BasicBlock *NormalBB, *UnwindBB;
+  if (ParseOptionalCallingConv(CC) ||
+      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+      ParseValID(CalleeID) ||
+      ParseParameterList(ArgList, PFS) ||
+      ParseOptionalAttrs(FnAttrs, 2) ||
+      ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
+      ParseTypeAndBasicBlock(NormalBB, PFS) ||
+      ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
+      ParseTypeAndBasicBlock(UnwindBB, PFS))
+    return true;
+
+  // If RetType is a non-function pointer type, then this is the short syntax
+  // for the call, which means that RetType is just the return type.  Infer the
+  // rest of the function argument types from the arguments that are present.
+  const PointerType *PFTy = 0;
+  const FunctionType *Ty = 0;
+  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+    // Pull out the types of all of the arguments...
+    std::vector<Type*> ParamTypes;
+    for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+      ParamTypes.push_back(ArgList[i].V->getType());
+
+    if (!FunctionType::isValidReturnType(RetType))
+      return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+    Ty = FunctionType::get(RetType, ParamTypes, false);
+    PFTy = PointerType::getUnqual(Ty);
+  }
+
+  // Look up the callee.
+  Value *Callee;
+  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+  // Set up the Attributes for the function.
+  SmallVector<AttributeWithIndex, 8> Attrs;
+  if (RetAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+  SmallVector<Value*, 8> Args;
+
+  // Loop through FunctionType's arguments and ensure they are specified
+  // correctly.  Also, gather any parameter attributes.
+  FunctionType::param_iterator I = Ty->param_begin();
+  FunctionType::param_iterator E = Ty->param_end();
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    const Type *ExpectedTy = 0;
+    if (I != E) {
+      ExpectedTy = *I++;
+    } else if (!Ty->isVarArg()) {
+      return Error(ArgList[i].Loc, "too many arguments specified");
+    }
+
+    if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+      return Error(ArgList[i].Loc, "argument is not of expected type '" +
+                   getTypeString(ExpectedTy) + "'");
+    Args.push_back(ArgList[i].V);
+    if (ArgList[i].Attrs != Attribute::None)
+      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+  }
+
+  if (I != E)
+    return Error(CallLoc, "not enough parameters specified for call");
+
+  if (FnAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  // Finish off the Attributes and check them
+  AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
+  II->setCallingConv(CC);
+  II->setAttributes(PAL);
+  Inst = II;
+  return false;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Binary Operators.
+//===----------------------------------------------------------------------===//
+
+/// ParseArithmetic
+///  ::= ArithmeticOps TypeAndValue ',' Value
+///
+/// If OperandType is 0, then any FP or integer operand is allowed.  If it is 1,
+/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
+                               unsigned Opc, unsigned OperandType) {
+  LocTy Loc; Value *LHS, *RHS;
+  if (ParseTypeAndValue(LHS, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
+      ParseValue(LHS->getType(), RHS, PFS))
+    return true;
+
+  bool Valid;
+  switch (OperandType) {
+  default: llvm_unreachable("Unknown operand type!");
+  case 0: // int or FP.
+    Valid = LHS->getType()->isIntOrIntVectorTy() ||
+            LHS->getType()->isFPOrFPVectorTy();
+    break;
+  case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
+  case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
+  }
+
+  if (!Valid)
+    return Error(Loc, "invalid operand type for instruction");
+
+  Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+  return false;
+}
+
+/// ParseLogical
+///  ::= ArithmeticOps TypeAndValue ',' Value {
+bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
+                            unsigned Opc) {
+  LocTy Loc; Value *LHS, *RHS;
+  if (ParseTypeAndValue(LHS, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' in logical operation") ||
+      ParseValue(LHS->getType(), RHS, PFS))
+    return true;
+
+  if (!LHS->getType()->isIntOrIntVectorTy())
+    return Error(Loc,"instruction requires integer or integer vector operands");
+
+  Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+  return false;
+}
+
+
+/// ParseCompare
+///  ::= 'icmp' IPredicates TypeAndValue ',' Value
+///  ::= 'fcmp' FPredicates TypeAndValue ',' Value
+bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
+                            unsigned Opc) {
+  // Parse the integer/fp comparison predicate.
+  LocTy Loc;
+  unsigned Pred;
+  Value *LHS, *RHS;
+  if (ParseCmpPredicate(Pred, Opc) ||
+      ParseTypeAndValue(LHS, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after compare value") ||
+      ParseValue(LHS->getType(), RHS, PFS))
+    return true;
+
+  if (Opc == Instruction::FCmp) {
+    if (!LHS->getType()->isFPOrFPVectorTy())
+      return Error(Loc, "fcmp requires floating point operands");
+    Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+  } else {
+    assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
+    if (!LHS->getType()->isIntOrIntVectorTy() &&
+        !LHS->getType()->isPointerTy())
+      return Error(Loc, "icmp requires integer operands");
+    Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other Instructions.
+//===----------------------------------------------------------------------===//
+
+
+/// ParseCast
+///   ::= CastOpc TypeAndValue 'to' Type
+bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
+                         unsigned Opc) {
+  LocTy Loc;
+  Value *Op;
+  Type *DestTy = 0;
+  if (ParseTypeAndValue(Op, Loc, PFS) ||
+      ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
+      ParseType(DestTy))
+    return true;
+
+  if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
+    CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
+    return Error(Loc, "invalid cast opcode for cast from '" +
+                 getTypeString(Op->getType()) + "' to '" +
+                 getTypeString(DestTy) + "'");
+  }
+  Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
+  return false;
+}
+
+/// ParseSelect
+///   ::= 'select' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after select condition") ||
+      ParseTypeAndValue(Op1, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after select value") ||
+      ParseTypeAndValue(Op2, PFS))
+    return true;
+
+  if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2))
+    return Error(Loc, Reason);
+
+  Inst = SelectInst::Create(Op0, Op1, Op2);
+  return false;
+}
+
+/// ParseVA_Arg
+///   ::= 'va_arg' TypeAndValue ',' Type
+bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Op;
+  Type *EltTy = 0;
+  LocTy TypeLoc;
+  if (ParseTypeAndValue(Op, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
+      ParseType(EltTy, TypeLoc))
+    return true;
+
+  if (!EltTy->isFirstClassType())
+    return Error(TypeLoc, "va_arg requires operand with first class type");
+
+  Inst = new VAArgInst(Op, EltTy);
+  return false;
+}
+
+/// ParseExtractElement
+///   ::= 'extractelement' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after extract value") ||
+      ParseTypeAndValue(Op1, PFS))
+    return true;
+
+  if (!ExtractElementInst::isValidOperands(Op0, Op1))
+    return Error(Loc, "invalid extractelement operands");
+
+  Inst = ExtractElementInst::Create(Op0, Op1);
+  return false;
+}
+
+/// ParseInsertElement
+///   ::= 'insertelement' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+      ParseTypeAndValue(Op1, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+      ParseTypeAndValue(Op2, PFS))
+    return true;
+
+  if (!InsertElementInst::isValidOperands(Op0, Op1, Op2))
+    return Error(Loc, "invalid insertelement operands");
+
+  Inst = InsertElementInst::Create(Op0, Op1, Op2);
+  return false;
+}
+
+/// ParseShuffleVector
+///   ::= 'shufflevector' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after shuffle mask") ||
+      ParseTypeAndValue(Op1, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after shuffle value") ||
+      ParseTypeAndValue(Op2, PFS))
+    return true;
+
+  if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
+    return Error(Loc, "invalid extractelement operands");
+
+  Inst = new ShuffleVectorInst(Op0, Op1, Op2);
+  return false;
+}
+
+/// ParsePHI
+///   ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
+int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
+  Type *Ty = 0;  LocTy TypeLoc;
+  Value *Op0, *Op1;
+
+  if (ParseType(Ty, TypeLoc) ||
+      ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+      ParseValue(Ty, Op0, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+      ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+      ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+    return true;
+
+  bool AteExtraComma = false;
+  SmallVector<std::pair<Value*, BasicBlock*>, 16> PHIVals;
+  while (1) {
+    PHIVals.push_back(std::make_pair(Op0, cast<BasicBlock>(Op1)));
+
+    if (!EatIfPresent(lltok::comma))
+      break;
+
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      break;
+    }
+
+    if (ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+        ParseValue(Ty, Op0, PFS) ||
+        ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+        ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+        ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+      return true;
+  }
+
+  if (!Ty->isFirstClassType())
+    return Error(TypeLoc, "phi node must have first class type");
+
+  PHINode *PN = PHINode::Create(Ty, PHIVals.size());
+  for (unsigned i = 0, e = PHIVals.size(); i != e; ++i)
+    PN->addIncoming(PHIVals[i].first, PHIVals[i].second);
+  Inst = PN;
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseCall
+///   ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value
+///       ParameterList OptionalAttrs
+bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
+                         bool isTail) {
+  unsigned RetAttrs, FnAttrs;
+  CallingConv::ID CC;
+  Type *RetType = 0;
+  LocTy RetTypeLoc;
+  ValID CalleeID;
+  SmallVector<ParamInfo, 16> ArgList;
+  LocTy CallLoc = Lex.getLoc();
+
+  if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
+      ParseOptionalCallingConv(CC) ||
+      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+      ParseValID(CalleeID) ||
+      ParseParameterList(ArgList, PFS) ||
+      ParseOptionalAttrs(FnAttrs, 2))
+    return true;
+
+  // If RetType is a non-function pointer type, then this is the short syntax
+  // for the call, which means that RetType is just the return type.  Infer the
+  // rest of the function argument types from the arguments that are present.
+  const PointerType *PFTy = 0;
+  const FunctionType *Ty = 0;
+  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+    // Pull out the types of all of the arguments...
+    std::vector<Type*> ParamTypes;
+    for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+      ParamTypes.push_back(ArgList[i].V->getType());
+
+    if (!FunctionType::isValidReturnType(RetType))
+      return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+    Ty = FunctionType::get(RetType, ParamTypes, false);
+    PFTy = PointerType::getUnqual(Ty);
+  }
+
+  // Look up the callee.
+  Value *Callee;
+  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+  // Set up the Attributes for the function.
+  SmallVector<AttributeWithIndex, 8> Attrs;
+  if (RetAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+  SmallVector<Value*, 8> Args;
+
+  // Loop through FunctionType's arguments and ensure they are specified
+  // correctly.  Also, gather any parameter attributes.
+  FunctionType::param_iterator I = Ty->param_begin();
+  FunctionType::param_iterator E = Ty->param_end();
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    const Type *ExpectedTy = 0;
+    if (I != E) {
+      ExpectedTy = *I++;
+    } else if (!Ty->isVarArg()) {
+      return Error(ArgList[i].Loc, "too many arguments specified");
+    }
+
+    if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+      return Error(ArgList[i].Loc, "argument is not of expected type '" +
+                   getTypeString(ExpectedTy) + "'");
+    Args.push_back(ArgList[i].V);
+    if (ArgList[i].Attrs != Attribute::None)
+      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+  }
+
+  if (I != E)
+    return Error(CallLoc, "not enough parameters specified for call");
+
+  if (FnAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  // Finish off the Attributes and check them
+  AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+  CallInst *CI = CallInst::Create(Callee, Args);
+  CI->setTailCall(isTail);
+  CI->setCallingConv(CC);
+  CI->setAttributes(PAL);
+  Inst = CI;
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseAlloc
+///   ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)?
+int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Size = 0;
+  LocTy SizeLoc;
+  unsigned Alignment = 0;
+  Type *Ty = 0;
+  if (ParseType(Ty)) return true;
+
+  bool AteExtraComma = false;
+  if (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::kw_align) {
+      if (ParseOptionalAlignment(Alignment)) return true;
+    } else if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+    } else {
+      if (ParseTypeAndValue(Size, SizeLoc, PFS) ||
+          ParseOptionalCommaAlign(Alignment, AteExtraComma))
+        return true;
+    }
+  }
+
+  if (Size && !Size->getType()->isIntegerTy())
+    return Error(SizeLoc, "element count must have integer type");
+
+  Inst = new AllocaInst(Ty, Size, Alignment);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseLoad
+///   ::= 'volatile'? 'load' TypeAndValue (',' OptionalInfo)?
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
+                        bool isVolatile) {
+  Value *Val; LocTy Loc;
+  unsigned Alignment = 0;
+  bool AteExtraComma = false;
+  if (ParseTypeAndValue(Val, Loc, PFS) ||
+      ParseOptionalCommaAlign(Alignment, AteExtraComma))
+    return true;
+
+  if (!Val->getType()->isPointerTy() ||
+      !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+    return Error(Loc, "load operand must be a pointer to a first class type");
+
+  Inst = new LoadInst(Val, "", isVolatile, Alignment);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseStore
+///   ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
+                         bool isVolatile) {
+  Value *Val, *Ptr; LocTy Loc, PtrLoc;
+  unsigned Alignment = 0;
+  bool AteExtraComma = false;
+  if (ParseTypeAndValue(Val, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after store operand") ||
+      ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+      ParseOptionalCommaAlign(Alignment, AteExtraComma))
+    return true;
+
+  if (!Ptr->getType()->isPointerTy())
+    return Error(PtrLoc, "store operand must be a pointer");
+  if (!Val->getType()->isFirstClassType())
+    return Error(Loc, "store operand must be a first class value");
+  if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+    return Error(Loc, "stored value and pointer type do not match");
+
+  Inst = new StoreInst(Val, Ptr, isVolatile, Alignment);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseGetElementPtr
+///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
+int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Ptr, *Val; LocTy Loc, EltLoc;
+
+  bool InBounds = EatIfPresent(lltok::kw_inbounds);
+
+  if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
+
+  if (!Ptr->getType()->isPointerTy())
+    return Error(Loc, "base of getelementptr must be a pointer");
+
+  SmallVector<Value*, 16> Indices;
+  bool AteExtraComma = false;
+  while (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      break;
+    }
+    if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
+    if (!Val->getType()->isIntegerTy())
+      return Error(EltLoc, "getelementptr index must be an integer");
+    Indices.push_back(Val);
+  }
+
+  if (!GetElementPtrInst::getIndexedType(Ptr->getType(),
+                                         Indices.begin(), Indices.end()))
+    return Error(Loc, "invalid getelementptr indices");
+  Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end());
+  if (InBounds)
+    cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseExtractValue
+///   ::= 'extractvalue' TypeAndValue (',' uint32)+
+int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Val; LocTy Loc;
+  SmallVector<unsigned, 4> Indices;
+  bool AteExtraComma;
+  if (ParseTypeAndValue(Val, Loc, PFS) ||
+      ParseIndexList(Indices, AteExtraComma))
+    return true;
+
+  if (!Val->getType()->isAggregateType())
+    return Error(Loc, "extractvalue operand must be aggregate type");
+
+  if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
+    return Error(Loc, "invalid indices for extractvalue");
+  Inst = ExtractValueInst::Create(Val, Indices);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseInsertValue
+///   ::= 'insertvalue' TypeAndValue ',' TypeAndValue (',' uint32)+
+int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Val0, *Val1; LocTy Loc0, Loc1;
+  SmallVector<unsigned, 4> Indices;
+  bool AteExtraComma;
+  if (ParseTypeAndValue(Val0, Loc0, PFS) ||
+      ParseToken(lltok::comma, "expected comma after insertvalue operand") ||
+      ParseTypeAndValue(Val1, Loc1, PFS) ||
+      ParseIndexList(Indices, AteExtraComma))
+    return true;
+  
+  if (!Val0->getType()->isAggregateType())
+    return Error(Loc0, "insertvalue operand must be aggregate type");
+
+  if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+    return Error(Loc0, "invalid indices for insertvalue");
+  Inst = InsertValueInst::Create(Val0, Val1, Indices);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+//===----------------------------------------------------------------------===//
+// Embedded metadata.
+//===----------------------------------------------------------------------===//
+
+/// ParseMDNodeVector
+///   ::= Element (',' Element)*
+/// Element
+///   ::= 'null' | TypeAndValue
+bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
+                                 PerFunctionState *PFS) {
+  // Check for an empty list.
+  if (Lex.getKind() == lltok::rbrace)
+    return false;
+
+  do {
+    // Null is a special case since it is typeless.
+    if (EatIfPresent(lltok::kw_null)) {
+      Elts.push_back(0);
+      continue;
+    }
+    
+    Value *V = 0;
+    if (ParseTypeAndValue(V, PFS)) return true;
+    Elts.push_back(V);
+  } while (EatIfPresent(lltok::comma));
+
+  return false;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
new file mode 100644
index 000000000000..963065785061
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -0,0 +1,373 @@
+//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
+
+#include "LLLexer.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include <map>
+
+namespace llvm {
+  class Module;
+  class OpaqueType;
+  class Function;
+  class Value;
+  class BasicBlock;
+  class Instruction;
+  class Constant;
+  class GlobalValue;
+  class MDString;
+  class MDNode;
+  class StructType;
+
+  /// ValID - Represents a reference of a definition of some sort with no type.
+  /// There are several cases where we have to parse the value but where the
+  /// type can depend on later context.  This may either be a numeric reference
+  /// or a symbolic (%var) reference.  This is just a discriminated union.
+  struct ValID {
+    enum {
+      t_LocalID, t_GlobalID,      // ID in UIntVal.
+      t_LocalName, t_GlobalName,  // Name in StrVal.
+      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
+      t_Null, t_Undef, t_Zero,    // No value.
+      t_EmptyArray,               // No value:  []
+      t_Constant,                 // Value in ConstantVal.
+      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
+      t_MDNode,                   // Value in MDNodeVal.
+      t_MDString,                 // Value in MDStringVal.
+      t_ConstantStruct,           // Value in ConstantStructElts.
+      t_PackedConstantStruct      // Value in ConstantStructElts.
+    } Kind;
+    
+    LLLexer::LocTy Loc;
+    unsigned UIntVal;
+    std::string StrVal, StrVal2;
+    APSInt APSIntVal;
+    APFloat APFloatVal;
+    Constant *ConstantVal;
+    MDNode *MDNodeVal;
+    MDString *MDStringVal;
+    Constant **ConstantStructElts;
+    
+    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
+    ~ValID() {
+      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
+        delete [] ConstantStructElts;
+    }
+    
+    bool operator<(const ValID &RHS) const {
+      if (Kind == t_LocalID || Kind == t_GlobalID)
+        return UIntVal < RHS.UIntVal;
+      assert((Kind == t_LocalName || Kind == t_GlobalName ||
+              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && 
+             "Ordering not defined for this ValID kind yet");
+      return StrVal < RHS.StrVal;
+    }
+  };
+  
+  class LLParser {
+  public:
+    typedef LLLexer::LocTy LocTy;
+  private:
+    LLVMContext &Context;
+    LLLexer Lex;
+    Module *M;
+    
+    // Instruction metadata resolution.  Each instruction can have a list of
+    // MDRef info associated with them.
+    //
+    // The simpler approach of just creating temporary MDNodes and then calling
+    // RAUW on them when the definition is processed doesn't work because some
+    // instruction metadata kinds, such as dbg, get stored in the IR in an
+    // "optimized" format which doesn't participate in the normal value use
+    // lists. This means that RAUW doesn't work, even on temporary MDNodes
+    // which otherwise support RAUW. Instead, we defer resolving MDNode
+    // references until the definitions have been processed.
+    struct MDRef {
+      SMLoc Loc;
+      unsigned MDKind, MDSlot;
+    };
+    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
+
+    // Type resolution handling data structures.  The location is set when we
+    // have processed a use of the type but not a definition yet.
+    StringMap<std::pair<Type*, LocTy> > NamedTypes;
+    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
+    
+    std::vector<TrackingVH<MDNode> > NumberedMetadata;
+    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
+
+    // Global Value reference information.
+    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
+    std::vector<GlobalValue*> NumberedVals;
+    
+    // References to blockaddress.  The key is the function ValID, the value is
+    // a list of references to blocks in that function.
+    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
+      ForwardRefBlockAddresses;
+    
+  public:
+    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
+      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
+      M(m) {}
+    bool Run();
+
+    LLVMContext &getContext() { return Context; }
+
+  private:
+
+    bool Error(LocTy L, const Twine &Msg) const {
+      return Lex.Error(L, Msg);
+    }
+    bool TokError(const Twine &Msg) const {
+      return Error(Lex.getLoc(), Msg);
+    }
+
+    /// GetGlobalVal - Get a value with the specified name or ID, creating a
+    /// forward reference record if needed.  This can return null if the value
+    /// exists but does not have the right type.
+    GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc);
+    GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+    // Helper Routines.
+    bool ParseToken(lltok::Kind T, const char *ErrMsg);
+    bool EatIfPresent(lltok::Kind T) {
+      if (Lex.getKind() != T) return false;
+      Lex.Lex();
+      return true;
+    }
+    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
+      if (Lex.getKind() != T) {
+        Present = false;
+      } else {
+        if (Loc)
+          *Loc = Lex.getLoc();
+        Lex.Lex();
+        Present = true;
+      }
+      return false;
+    }
+    bool ParseStringConstant(std::string &Result);
+    bool ParseUInt32(unsigned &Val);
+    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
+      Loc = Lex.getLoc();
+      return ParseUInt32(Val);
+    }
+    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
+    bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
+    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
+    bool ParseOptionalLinkage(unsigned &Linkage) {
+      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
+    }
+    bool ParseOptionalVisibility(unsigned &Visibility);
+    bool ParseOptionalCallingConv(CallingConv::ID &CC);
+    bool ParseOptionalAlignment(unsigned &Alignment);
+    bool ParseOptionalStackAlignment(unsigned &Alignment);
+    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
+    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
+    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
+      bool AteExtraComma;
+      if (ParseIndexList(Indices, AteExtraComma)) return true;
+      if (AteExtraComma)
+        return TokError("expected index");
+      return false;
+    }
+
+    // Top-Level Entities
+    bool ParseTopLevelEntities();
+    bool ValidateEndOfModule();
+    bool ParseTargetDefinition();
+    bool ParseDepLibs();
+    bool ParseModuleAsm();
+    bool ParseUnnamedType();
+    bool ParseNamedType();
+    bool ParseDeclare();
+    bool ParseDefine();
+
+    bool ParseGlobalType(bool &IsConstant);
+    bool ParseUnnamedGlobal();
+    bool ParseNamedGlobal();
+    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
+                     bool HasLinkage, unsigned Visibility);
+    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
+    bool ParseStandaloneMetadata();
+    bool ParseNamedMetadata();
+    bool ParseMDString(MDString *&Result);
+    bool ParseMDNodeID(MDNode *&Result);
+    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
+
+    // Type Parsing.
+    bool ParseType(Type *&Result, bool AllowVoid = false);
+    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
+      Loc = Lex.getLoc();
+      return ParseType(Result, AllowVoid);
+    }
+    bool ParseAnonStructType(Type *&Result, bool Packed);
+    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
+    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+                               std::pair<Type*, LocTy> &Entry,
+                               Type *&ResultTy);
+
+    bool ParseArrayVectorType(Type *&Result, bool isVector);
+    bool ParseFunctionType(Type *&Result);
+
+    // Function Semantic Analysis.
+    class PerFunctionState {
+      LLParser &P;
+      Function &F;
+      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
+      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
+      std::vector<Value*> NumberedVals;
+      
+      /// FunctionNumber - If this is an unnamed function, this is the slot
+      /// number of it, otherwise it is -1.
+      int FunctionNumber;
+    public:
+      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
+      ~PerFunctionState();
+
+      Function &getFunction() const { return F; }
+
+      bool FinishFunction();
+
+      /// GetVal - Get a value with the specified name or ID, creating a
+      /// forward reference record if needed.  This can return null if the value
+      /// exists but does not have the right type.
+      Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc);
+      Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+      /// SetInstName - After an instruction is parsed and inserted into its
+      /// basic block, this installs its name.
+      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
+                       Instruction *Inst);
+
+      /// GetBB - Get a basic block with the specified name or ID, creating a
+      /// forward reference record if needed.  This can return null if the value
+      /// is not a BasicBlock.
+      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
+      BasicBlock *GetBB(unsigned ID, LocTy Loc);
+
+      /// DefineBB - Define the specified basic block, which is either named or
+      /// unnamed.  If there is an error, this returns null otherwise it returns
+      /// the block being defined.
+      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+    };
+
+    bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+                             PerFunctionState *PFS);
+
+    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS);
+    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+      return ParseValue(Ty, V, &PFS);
+    }
+    bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
+                    PerFunctionState &PFS) {
+      Loc = Lex.getLoc();
+      return ParseValue(Ty, V, &PFS);
+    }
+
+    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
+    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+      return ParseTypeAndValue(V, &PFS);
+    }
+    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
+      Loc = Lex.getLoc();
+      return ParseTypeAndValue(V, PFS);
+    }
+    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+                                PerFunctionState &PFS);
+    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
+      LocTy Loc;
+      return ParseTypeAndBasicBlock(BB, Loc, PFS);
+    }
+
+
+    struct ParamInfo {
+      LocTy Loc;
+      Value *V;
+      unsigned Attrs;
+      ParamInfo(LocTy loc, Value *v, unsigned attrs)
+        : Loc(loc), V(v), Attrs(attrs) {}
+    };
+    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+                            PerFunctionState &PFS);
+
+    // Constant Parsing.
+    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
+    bool ParseGlobalValue(const Type *Ty, Constant *&V);
+    bool ParseGlobalTypeAndValue(Constant *&V);
+    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+    bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
+    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
+    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
+    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
+
+    // Function Parsing.
+    struct ArgInfo {
+      LocTy Loc;
+      Type *Ty;
+      unsigned Attrs;
+      std::string Name;
+      ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
+        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
+    };
+    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
+    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
+    bool ParseFunctionBody(Function &Fn);
+    bool ParseBasicBlock(PerFunctionState &PFS);
+
+    // Instruction Parsing.  Each instruction parsing routine can return with a
+    // normal result, an error result, or return having eaten an extra comma.
+    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
+    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+                         PerFunctionState &PFS);
+    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
+
+    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
+
+    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
+                         unsigned OperandType);
+    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
+    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
+    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
+    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
+    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
+    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
+    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
+    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
+    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
+    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
+    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
+    
+    bool ResolveForwardRefBlockAddresses(Function *TheFn, 
+                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+                                         PerFunctionState *PFS);
+  };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
new file mode 100644
index 000000000000..a5f89fcce0c0
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -0,0 +1,149 @@
+//===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the enums for the .ll lexer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBS_ASMPARSER_LLTOKEN_H
+#define LIBS_ASMPARSER_LLTOKEN_H
+
+namespace llvm {
+namespace lltok {
+  enum Kind {
+    // Markers
+    Eof, Error,
+
+    // Tokens with no info.
+    dotdotdot,         // ...
+    equal, comma,      // =  ,
+    star,              // *
+    lsquare, rsquare,  // [  ]
+    lbrace, rbrace,    // {  }
+    less, greater,     // <  >
+    lparen, rparen,    // (  )
+    backslash,         // \    (not /)
+    exclaim,           // !
+
+    kw_x,
+    kw_true,    kw_false,
+    kw_declare, kw_define,
+    kw_global,  kw_constant,
+
+    kw_private, kw_linker_private, kw_linker_private_weak,
+    kw_linker_private_weak_def_auto, kw_internal,
+    kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
+    kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
+    kw_default, kw_hidden, kw_protected,
+    kw_unnamed_addr,
+    kw_extern_weak,
+    kw_external, kw_thread_local,
+    kw_zeroinitializer,
+    kw_undef, kw_null,
+    kw_to,
+    kw_tail,
+    kw_target,
+    kw_triple,
+    kw_deplibs,
+    kw_datalayout,
+    kw_volatile,
+    kw_nuw,
+    kw_nsw,
+    kw_exact,
+    kw_inbounds,
+    kw_align,
+    kw_addrspace,
+    kw_section,
+    kw_alias,
+    kw_module,
+    kw_asm,
+    kw_sideeffect,
+    kw_alignstack,
+    kw_gc,
+    kw_c,
+
+    kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+    kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+    kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
+    kw_msp430_intrcc,
+    kw_ptx_kernel, kw_ptx_device,
+
+    kw_signext,
+    kw_zeroext,
+    kw_inreg,
+    kw_sret,
+    kw_nounwind,
+    kw_noreturn,
+    kw_noalias,
+    kw_nocapture,
+    kw_byval,
+    kw_nest,
+    kw_readnone,
+    kw_readonly,
+    kw_uwtable,
+
+    kw_inlinehint,
+    kw_noinline,
+    kw_alwaysinline,
+    kw_optsize,
+    kw_ssp,
+    kw_sspreq,
+    kw_noredzone,
+    kw_noimplicitfloat,
+    kw_naked,
+    kw_hotpatch,
+    kw_nonlazybind,
+
+    kw_type,
+    kw_opaque,
+
+    kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
+    kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
+    kw_ueq, kw_une,
+
+    // Instruction Opcodes (Opcode in UIntVal).
+    kw_add,  kw_fadd, kw_sub,  kw_fsub, kw_mul,  kw_fmul,
+    kw_udiv, kw_sdiv, kw_fdiv,
+    kw_urem, kw_srem, kw_frem, kw_shl,  kw_lshr, kw_ashr,
+    kw_and,  kw_or,   kw_xor,  kw_icmp, kw_fcmp,
+
+    kw_phi, kw_call,
+    kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp,
+    kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
+    kw_select, kw_va_arg,
+
+    kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind,
+    kw_unreachable,
+
+    kw_alloca, kw_load, kw_store, kw_getelementptr,
+
+    kw_extractelement, kw_insertelement, kw_shufflevector,
+    kw_extractvalue, kw_insertvalue, kw_blockaddress,
+
+    // Unsigned Valued tokens (UIntVal).
+    GlobalID,          // @42
+    LocalVarID,        // %42
+
+    // String valued tokens (StrVal).
+    LabelStr,          // foo:
+    GlobalVar,         // @foo @"foo"
+    LocalVar,          // %foo %"foo"
+    MetadataVar,       // !foo
+    StringConstant,    // "foo"
+
+    // Type valued tokens (TyVal).
+    Type,
+
+    APFloat,  // APFloatVal
+    APSInt // APSInt
+  };
+} // end namespace lltok
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp
new file mode 100644
index 000000000000..59fb471f2b93
--- /dev/null
+++ b/contrib/llvm/lib/AsmParser/Parser.cpp
@@ -0,0 +1,62 @@
+//===- Parser.cpp - Main dispatch module for the Parser library -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Parser.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Parser.h"
+#include "LLParser.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cstring>
+using namespace llvm;
+
+Module *llvm::ParseAssembly(MemoryBuffer *F,
+                            Module *M,
+                            SMDiagnostic &Err,
+                            LLVMContext &Context) {
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(F, SMLoc());
+
+  // If we are parsing into an existing module, do it.
+  if (M)
+    return LLParser(F, SM, Err, M).Run() ? 0 : M;
+
+  // Otherwise create a new module.
+  OwningPtr<Module> M2(new Module(F->getBufferIdentifier(), Context));
+  if (LLParser(F, SM, Err, M2.get()).Run())
+    return 0;
+  return M2.take();
+}
+
+Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
+                                LLVMContext &Context) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    Err = SMDiagnostic(Filename,
+                       "Could not open input file: " + ec.message());
+    return 0;
+  }
+
+  return ParseAssembly(File.take(), 0, Err, Context);
+}
+
+Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
+                                  SMDiagnostic &Err, LLVMContext &Context) {
+  MemoryBuffer *F =
+    MemoryBuffer::getMemBuffer(StringRef(AsmString, strlen(AsmString)),
+                               "<string>");
+
+  return ParseAssembly(F, M, Err, Context);
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
new file mode 100644
index 000000000000..15844c0041c3
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -0,0 +1,88 @@
+//===-- BitReader.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitReader.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <string>
+#include <cstring>
+
+using namespace llvm;
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+   reference to the module via the OutModule parameter. Returns 0 on success.
+   Optionally returns a human-readable error message via OutMessage. */
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+                          LLVMModuleRef *OutModule, char **OutMessage) {
+  return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
+                                   OutMessage);
+}
+
+LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+                                   LLVMMemoryBufferRef MemBuf,
+                                   LLVMModuleRef *OutModule,
+                                   char **OutMessage) {
+  std::string Message;
+  
+  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
+                                     &Message));
+  if (!*OutModule) {
+    if (OutMessage)
+      *OutMessage = strdup(Message.c_str());
+    return 1;
+  }
+  
+  return 0;
+}
+
+/* Reads a module from the specified path, returning via the OutModule parameter
+   a module provider which performs lazy deserialization. Returns 0 on success.
+   Optionally returns a human-readable error message via OutMessage. */ 
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+                                       LLVMMemoryBufferRef MemBuf,
+                                       LLVMModuleRef *OutM,
+                                       char **OutMessage) {
+  std::string Message;
+  
+  *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
+                                    &Message));
+  if (!*OutM) {
+    if (OutMessage)
+      *OutMessage = strdup(Message.c_str());
+    return 1;
+  }
+  
+  return 0;
+
+}
+
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+                              char **OutMessage) {
+  return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM,
+                                       OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+                                               LLVMMemoryBufferRef MemBuf,
+                                               LLVMModuleProviderRef *OutMP,
+                                               char **OutMessage) {
+  return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
+                                       reinterpret_cast<LLVMModuleRef*>(OutMP),
+                                       OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+                                      LLVMModuleProviderRef *OutMP,
+                                      char **OutMessage) {
+  return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
+                                               OutMP, OutMessage);
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
new file mode 100644
index 000000000000..24c29941cf16
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -0,0 +1,2824 @@
+//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "BitcodeReader.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/OperandTraits.h"
+using namespace llvm;
+
+void BitcodeReader::FreeState() {
+  if (BufferOwned)
+    delete Buffer;
+  Buffer = 0;
+  std::vector<Type*>().swap(TypeList);
+  ValueList.clear();
+  MDValueList.clear();
+
+  std::vector<AttrListPtr>().swap(MAttributes);
+  std::vector<BasicBlock*>().swap(FunctionBBs);
+  std::vector<Function*>().swap(FunctionsWithBodies);
+  DeferredFunctionInfo.clear();
+  MDKindMap.clear();
+}
+
+//===----------------------------------------------------------------------===//
+//  Helper functions to implement forward reference resolution, etc.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToString - Convert a string from a record into an std::string, return
+/// true on failure.
+template<typename StrTy>
+static bool ConvertToString(SmallVector<uint64_t, 64> &Record, unsigned Idx,
+                            StrTy &Result) {
+  if (Idx > Record.size())
+    return true;
+
+  for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+    Result += (char)Record[i];
+  return false;
+}
+
+static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
+  switch (Val) {
+  default: // Map unknown/new linkages to external
+  case 0:  return GlobalValue::ExternalLinkage;
+  case 1:  return GlobalValue::WeakAnyLinkage;
+  case 2:  return GlobalValue::AppendingLinkage;
+  case 3:  return GlobalValue::InternalLinkage;
+  case 4:  return GlobalValue::LinkOnceAnyLinkage;
+  case 5:  return GlobalValue::DLLImportLinkage;
+  case 6:  return GlobalValue::DLLExportLinkage;
+  case 7:  return GlobalValue::ExternalWeakLinkage;
+  case 8:  return GlobalValue::CommonLinkage;
+  case 9:  return GlobalValue::PrivateLinkage;
+  case 10: return GlobalValue::WeakODRLinkage;
+  case 11: return GlobalValue::LinkOnceODRLinkage;
+  case 12: return GlobalValue::AvailableExternallyLinkage;
+  case 13: return GlobalValue::LinkerPrivateLinkage;
+  case 14: return GlobalValue::LinkerPrivateWeakLinkage;
+  case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+  }
+}
+
+static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
+  switch (Val) {
+  default: // Map unknown visibilities to default.
+  case 0: return GlobalValue::DefaultVisibility;
+  case 1: return GlobalValue::HiddenVisibility;
+  case 2: return GlobalValue::ProtectedVisibility;
+  }
+}
+
+static int GetDecodedCastOpcode(unsigned Val) {
+  switch (Val) {
+  default: return -1;
+  case bitc::CAST_TRUNC   : return Instruction::Trunc;
+  case bitc::CAST_ZEXT    : return Instruction::ZExt;
+  case bitc::CAST_SEXT    : return Instruction::SExt;
+  case bitc::CAST_FPTOUI  : return Instruction::FPToUI;
+  case bitc::CAST_FPTOSI  : return Instruction::FPToSI;
+  case bitc::CAST_UITOFP  : return Instruction::UIToFP;
+  case bitc::CAST_SITOFP  : return Instruction::SIToFP;
+  case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
+  case bitc::CAST_FPEXT   : return Instruction::FPExt;
+  case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
+  case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
+  case bitc::CAST_BITCAST : return Instruction::BitCast;
+  }
+}
+static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
+  switch (Val) {
+  default: return -1;
+  case bitc::BINOP_ADD:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
+  case bitc::BINOP_SUB:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
+  case bitc::BINOP_MUL:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
+  case bitc::BINOP_UDIV: return Instruction::UDiv;
+  case bitc::BINOP_SDIV:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
+  case bitc::BINOP_UREM: return Instruction::URem;
+  case bitc::BINOP_SREM:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
+  case bitc::BINOP_SHL:  return Instruction::Shl;
+  case bitc::BINOP_LSHR: return Instruction::LShr;
+  case bitc::BINOP_ASHR: return Instruction::AShr;
+  case bitc::BINOP_AND:  return Instruction::And;
+  case bitc::BINOP_OR:   return Instruction::Or;
+  case bitc::BINOP_XOR:  return Instruction::Xor;
+  }
+}
+
+namespace llvm {
+namespace {
+  /// @brief A class for maintaining the slot number definition
+  /// as a placeholder for the actual definition for forward constants defs.
+  class ConstantPlaceHolder : public ConstantExpr {
+    void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
+  public:
+    // allocate space for exactly one operand
+    void *operator new(size_t s) {
+      return User::operator new(s, 1);
+    }
+    explicit ConstantPlaceHolder(const Type *Ty, LLVMContext& Context)
+      : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+      Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
+    }
+
+    /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+    //static inline bool classof(const ConstantPlaceHolder *) { return true; }
+    static bool classof(const Value *V) {
+      return isa<ConstantExpr>(V) &&
+             cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+    }
+
+
+    /// Provide fast operand accessors
+    //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+  };
+}
+
+// FIXME: can we inherit this from ConstantExpr?
+template <>
+struct OperandTraits<ConstantPlaceHolder> :
+  public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
+};
+}
+
+
+void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = ValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = V;
+    return;
+  }
+
+  // Handle constants and non-constants (e.g. instrs) differently for
+  // efficiency.
+  if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
+    ResolveConstants.push_back(std::make_pair(PHC, Idx));
+    OldV = V;
+  } else {
+    // If there was a forward reference to this value, replace it.
+    Value *PrevVal = OldV;
+    OldV->replaceAllUsesWith(V);
+    delete PrevVal;
+  }
+}
+
+
+Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
+                                                    const Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = ValuePtrs[Idx]) {
+    assert(Ty == V->getType() && "Type mismatch in constant table!");
+    return cast<Constant>(V);
+  }
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Constant *C = new ConstantPlaceHolder(Ty, Context);
+  ValuePtrs[Idx] = C;
+  return C;
+}
+
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = ValuePtrs[Idx]) {
+    assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!");
+    return V;
+  }
+
+  // No type specified, must be invalid reference.
+  if (Ty == 0) return 0;
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Value *V = new Argument(Ty);
+  ValuePtrs[Idx] = V;
+  return V;
+}
+
+/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+/// resolves any forward references.  The idea behind this is that we sometimes
+/// get constants (such as large arrays) which reference *many* forward ref
+/// constants.  Replacing each of these causes a lot of thrashing when
+/// building/reuniquing the constant.  Instead of doing this, we look at all the
+/// uses and rewrite all the place holders at once for any constant that uses
+/// a placeholder.
+void BitcodeReaderValueList::ResolveConstantForwardRefs() {
+  // Sort the values by-pointer so that they are efficient to look up with a
+  // binary search.
+  std::sort(ResolveConstants.begin(), ResolveConstants.end());
+
+  SmallVector<Constant*, 64> NewOps;
+
+  while (!ResolveConstants.empty()) {
+    Value *RealVal = operator[](ResolveConstants.back().second);
+    Constant *Placeholder = ResolveConstants.back().first;
+    ResolveConstants.pop_back();
+
+    // Loop over all users of the placeholder, updating them to reference the
+    // new value.  If they reference more than one placeholder, update them all
+    // at once.
+    while (!Placeholder->use_empty()) {
+      Value::use_iterator UI = Placeholder->use_begin();
+      User *U = *UI;
+
+      // If the using object isn't uniqued, just update the operands.  This
+      // handles instructions and initializers for global variables.
+      if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
+        UI.getUse().set(RealVal);
+        continue;
+      }
+
+      // Otherwise, we have a constant that uses the placeholder.  Replace that
+      // constant with a new constant that has *all* placeholder uses updated.
+      Constant *UserC = cast<Constant>(U);
+      for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
+           I != E; ++I) {
+        Value *NewOp;
+        if (!isa<ConstantPlaceHolder>(*I)) {
+          // Not a placeholder reference.
+          NewOp = *I;
+        } else if (*I == Placeholder) {
+          // Common case is that it just references this one placeholder.
+          NewOp = RealVal;
+        } else {
+          // Otherwise, look up the placeholder in ResolveConstants.
+          ResolveConstantsTy::iterator It =
+            std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
+                             std::pair<Constant*, unsigned>(cast<Constant>(*I),
+                                                            0));
+          assert(It != ResolveConstants.end() && It->first == *I);
+          NewOp = operator[](It->second);
+        }
+
+        NewOps.push_back(cast<Constant>(NewOp));
+      }
+
+      // Make the new constant.
+      Constant *NewC;
+      if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
+        NewC = ConstantArray::get(UserCA->getType(), NewOps);
+      } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
+        NewC = ConstantStruct::get(UserCS->getType(), NewOps);
+      } else if (isa<ConstantVector>(UserC)) {
+        NewC = ConstantVector::get(NewOps);
+      } else {
+        assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
+        NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
+      }
+
+      UserC->replaceAllUsesWith(NewC);
+      UserC->destroyConstant();
+      NewOps.clear();
+    }
+
+    // Update all ValueHandles, they should be the only users at this point.
+    Placeholder->replaceAllUsesWith(RealVal);
+    delete Placeholder;
+  }
+}
+
+void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = MDValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = V;
+    return;
+  }
+
+  // If there was a forward reference to this value, replace it.
+  MDNode *PrevVal = cast<MDNode>(OldV);
+  OldV->replaceAllUsesWith(V);
+  MDNode::deleteTemporary(PrevVal);
+  // Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new
+  // value for Idx.
+  MDValuePtrs[Idx] = V;
+}
+
+Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = MDValuePtrs[Idx]) {
+    assert(V->getType()->isMetadataTy() && "Type mismatch in value table!");
+    return V;
+  }
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>());
+  MDValuePtrs[Idx] = V;
+  return V;
+}
+
+Type *BitcodeReader::getTypeByID(unsigned ID) {
+  // The type table size is always specified correctly.
+  if (ID >= TypeList.size())
+    return 0;
+  
+  if (Type *Ty = TypeList[ID])
+    return Ty;
+
+  // If we have a forward reference, the only possible case is when it is to a
+  // named struct.  Just create a placeholder for now.
+  return TypeList[ID] = StructType::createNamed(Context, "");
+}
+
+/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
+Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
+  if (ID >= TypeList.size())
+    TypeList.resize(ID+1);
+  
+  return TypeList[ID];
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Functions for parsing blocks from the bitcode file
+//===----------------------------------------------------------------------===//
+
+bool BitcodeReader::ParseAttributeBlock() {
+  if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
+    return Error("Malformed block record");
+
+  if (!MAttributes.empty())
+    return Error("Multiple PARAMATTR blocks found!");
+
+  SmallVector<uint64_t, 64> Record;
+
+  SmallVector<AttributeWithIndex, 8> Attrs;
+
+  // Read all the records.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...]
+      if (Record.size() & 1)
+        return Error("Invalid ENTRY record");
+
+      // FIXME : Remove this autoupgrade code in LLVM 3.0.
+      // If Function attributes are using index 0 then transfer them
+      // to index ~0. Index 0 is used for return value attributes but used to be
+      // used for function attributes.
+      Attributes RetAttribute = Attribute::None;
+      Attributes FnAttribute = Attribute::None;
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        // FIXME: remove in LLVM 3.0
+        // The alignment is stored as a 16-bit raw value from bits 31--16.
+        // We shift the bits above 31 down by 11 bits.
+
+        unsigned Alignment = (Record[i+1] & (0xffffull << 16)) >> 16;
+        if (Alignment && !isPowerOf2_32(Alignment))
+          return Error("Alignment is not a power of two.");
+
+        Attributes ReconstitutedAttr = Record[i+1] & 0xffff;
+        if (Alignment)
+          ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment);
+        ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11;
+        Record[i+1] = ReconstitutedAttr;
+
+        if (Record[i] == 0)
+          RetAttribute = Record[i+1];
+        else if (Record[i] == ~0U)
+          FnAttribute = Record[i+1];
+      }
+
+      unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
+                              Attribute::ReadOnly|Attribute::ReadNone);
+
+      if (FnAttribute == Attribute::None && RetAttribute != Attribute::None &&
+          (RetAttribute & OldRetAttrs) != 0) {
+        if (FnAttribute == Attribute::None) { // add a slot so they get added.
+          Record.push_back(~0U);
+          Record.push_back(0);
+        }
+
+        FnAttribute  |= RetAttribute & OldRetAttrs;
+        RetAttribute &= ~OldRetAttrs;
+      }
+
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        if (Record[i] == 0) {
+          if (RetAttribute != Attribute::None)
+            Attrs.push_back(AttributeWithIndex::get(0, RetAttribute));
+        } else if (Record[i] == ~0U) {
+          if (FnAttribute != Attribute::None)
+            Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute));
+        } else if (Record[i+1] != Attribute::None)
+          Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1]));
+      }
+
+      MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end()));
+      Attrs.clear();
+      break;
+    }
+    }
+  }
+}
+
+bool BitcodeReader::ParseTypeTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
+    return Error("Malformed block record");
+  
+  return ParseTypeTableBody();
+}
+
+bool BitcodeReader::ParseTypeTableBody() {
+  if (!TypeList.empty())
+    return Error("Multiple TYPE_BLOCKs found!");
+
+  SmallVector<uint64_t, 64> Record;
+  unsigned NumRecords = 0;
+
+  SmallString<64> TypeName;
+  
+  // Read all the records for this type table.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (NumRecords != TypeList.size())
+        return Error("Invalid type forward reference in TYPE_BLOCK");
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of type table block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    Type *ResultTy = 0;
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: return Error("unknown type in type table");
+    case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+      // type list.  This allows us to reserve space.
+      if (Record.size() < 1)
+        return Error("Invalid TYPE_CODE_NUMENTRY record");
+      TypeList.resize(Record[0]);
+      continue;
+    case bitc::TYPE_CODE_VOID:      // VOID
+      ResultTy = Type::getVoidTy(Context);
+      break;
+    case bitc::TYPE_CODE_FLOAT:     // FLOAT
+      ResultTy = Type::getFloatTy(Context);
+      break;
+    case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
+      ResultTy = Type::getDoubleTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
+      ResultTy = Type::getX86_FP80Ty(Context);
+      break;
+    case bitc::TYPE_CODE_FP128:     // FP128
+      ResultTy = Type::getFP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+      ResultTy = Type::getPPC_FP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_LABEL:     // LABEL
+      ResultTy = Type::getLabelTy(Context);
+      break;
+    case bitc::TYPE_CODE_METADATA:  // METADATA
+      ResultTy = Type::getMetadataTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
+      ResultTy = Type::getX86_MMXTy(Context);
+      break;
+    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+      if (Record.size() < 1)
+        return Error("Invalid Integer type record");
+
+      ResultTy = IntegerType::get(Context, Record[0]);
+      break;
+    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+                                    //          [pointee type, address space]
+      if (Record.size() < 1)
+        return Error("Invalid POINTER type record");
+      unsigned AddressSpace = 0;
+      if (Record.size() == 2)
+        AddressSpace = Record[1];
+      ResultTy = getTypeByID(Record[0]);
+      if (ResultTy == 0) return Error("invalid element type in pointer type");
+      ResultTy = PointerType::get(ResultTy, AddressSpace);
+      break;
+    }
+    case bitc::TYPE_CODE_FUNCTION: {
+      // FIXME: attrid is dead, remove it in LLVM 3.0
+      // FUNCTION: [vararg, attrid, retty, paramty x N]
+      if (Record.size() < 3)
+        return Error("Invalid FUNCTION type record");
+      std::vector<Type*> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          ArgTys.push_back(T);
+        else
+          break;
+      }
+      
+      ResultTy = getTypeByID(Record[2]);
+      if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
+        return Error("invalid type in function type");
+
+      ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
+    case bitc::TYPE_CODE_STRUCT_ANON: {  // STRUCT: [ispacked, eltty x N]
+      if (Record.size() < 1)
+        return Error("Invalid STRUCT type record");
+      std::vector<Type*> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid type in struct type");
+      ResultTy = StructType::get(Context, EltTys, Record[0]);
+      break;
+    }
+    case bitc::TYPE_CODE_STRUCT_NAME:   // STRUCT_NAME: [strchr x N]
+      if (ConvertToString(Record, 0, TypeName))
+        return Error("Invalid STRUCT_NAME record");
+      continue;
+
+    case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
+      if (Record.size() < 1)
+        return Error("Invalid STRUCT type record");
+      
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+      
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct.
+        Res = StructType::createNamed(Context, TypeName);
+      TypeName.clear();
+      
+      SmallVector<Type*, 8> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid STRUCT type record");
+      Res->setBody(EltTys, Record[0]);
+      ResultTy = Res;
+      break;
+    }
+    case bitc::TYPE_CODE_OPAQUE: {       // OPAQUE: []
+      if (Record.size() != 1)
+        return Error("Invalid OPAQUE type record");
+
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+      
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct with no body.
+        Res = StructType::createNamed(Context, TypeName);
+      TypeName.clear();
+      ResultTy = Res;
+      break;
+    }        
+    case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid ARRAY type record");
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = ArrayType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
+      break;
+    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid VECTOR type record");
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = VectorType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
+      break;
+    }
+
+    if (NumRecords >= TypeList.size())
+      return Error("invalid TYPE table");
+    assert(ResultTy && "Didn't read a type?");
+    assert(TypeList[NumRecords] == 0 && "Already read type?");
+    TypeList[NumRecords++] = ResultTy;
+  }
+}
+
+// FIXME: Remove in LLVM 3.1
+bool BitcodeReader::ParseOldTypeTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
+    return Error("Malformed block record");
+
+  if (!TypeList.empty())
+    return Error("Multiple TYPE_BLOCKs found!");
+  
+  
+  // While horrible, we have no good ordering of types in the bc file.  Just
+  // iteratively parse types out of the bc file in multiple passes until we get
+  // them all.  Do this by saving a cursor for the start of the type block.
+  BitstreamCursor StartOfTypeBlockCursor(Stream);
+  
+  unsigned NumTypesRead = 0;
+  
+  SmallVector<uint64_t, 64> Record;
+RestartScan:
+  unsigned NextTypeID = 0;
+  bool ReadAnyTypes = false;
+  
+  // Read all the records for this type table.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (NextTypeID != TypeList.size())
+        return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
+      
+      // If we haven't read all of the types yet, iterate again.
+      if (NumTypesRead != TypeList.size()) {
+        // If we didn't successfully read any types in this pass, then we must
+        // have an unhandled forward reference.
+        if (!ReadAnyTypes)
+          return Error("Obsolete bitcode contains unhandled recursive type");
+        
+        Stream = StartOfTypeBlockCursor;
+        goto RestartScan;
+      }
+      
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of type table block");
+      return false;
+    }
+    
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+    
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+    
+    // Read a record.
+    Record.clear();
+    Type *ResultTy = 0;
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: return Error("unknown type in type table");
+    case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+      // type list.  This allows us to reserve space.
+      if (Record.size() < 1)
+        return Error("Invalid TYPE_CODE_NUMENTRY record");
+      TypeList.resize(Record[0]);
+      continue;
+    case bitc::TYPE_CODE_VOID:      // VOID
+      ResultTy = Type::getVoidTy(Context);
+      break;
+    case bitc::TYPE_CODE_FLOAT:     // FLOAT
+      ResultTy = Type::getFloatTy(Context);
+      break;
+    case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
+      ResultTy = Type::getDoubleTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
+      ResultTy = Type::getX86_FP80Ty(Context);
+      break;
+    case bitc::TYPE_CODE_FP128:     // FP128
+      ResultTy = Type::getFP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+      ResultTy = Type::getPPC_FP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_LABEL:     // LABEL
+      ResultTy = Type::getLabelTy(Context);
+      break;
+    case bitc::TYPE_CODE_METADATA:  // METADATA
+      ResultTy = Type::getMetadataTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
+      ResultTy = Type::getX86_MMXTy(Context);
+      break;
+    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+      if (Record.size() < 1)
+        return Error("Invalid Integer type record");
+      ResultTy = IntegerType::get(Context, Record[0]);
+      break;
+    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
+      if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
+        ResultTy = StructType::createNamed(Context, "");
+      break;
+    case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
+      if (NextTypeID >= TypeList.size()) break;
+      // If we already read it, don't reprocess.
+      if (TypeList[NextTypeID] &&
+          !cast<StructType>(TypeList[NextTypeID])->isOpaque())
+        break;
+
+      // Set a type.
+      if (TypeList[NextTypeID] == 0)
+        TypeList[NextTypeID] = StructType::createNamed(Context, "");
+
+      std::vector<Type*> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *Elt = getTypeByIDOrNull(Record[i]))
+          EltTys.push_back(Elt);
+        else
+          break;
+      }
+
+      if (EltTys.size() != Record.size()-1)
+        break;      // Not all elements are ready.
+      
+      cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
+      ResultTy = TypeList[NextTypeID];
+      TypeList[NextTypeID] = 0;
+      break;
+    }
+    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+      //          [pointee type, address space]
+      if (Record.size() < 1)
+        return Error("Invalid POINTER type record");
+      unsigned AddressSpace = 0;
+      if (Record.size() == 2)
+        AddressSpace = Record[1];
+      if ((ResultTy = getTypeByIDOrNull(Record[0])))
+        ResultTy = PointerType::get(ResultTy, AddressSpace);
+      break;
+    }
+    case bitc::TYPE_CODE_FUNCTION: {
+      // FIXME: attrid is dead, remove it in LLVM 3.0
+      // FUNCTION: [vararg, attrid, retty, paramty x N]
+      if (Record.size() < 3)
+        return Error("Invalid FUNCTION type record");
+      std::vector<Type*> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+        if (Type *Elt = getTypeByIDOrNull(Record[i]))
+          ArgTys.push_back(Elt);
+        else
+          break;
+      }
+      if (ArgTys.size()+3 != Record.size())
+        break;  // Something was null.
+      if ((ResultTy = getTypeByIDOrNull(Record[2])))
+        ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
+    case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid ARRAY type record");
+      if ((ResultTy = getTypeByIDOrNull(Record[1])))
+        ResultTy = ArrayType::get(ResultTy, Record[0]);
+      break;
+    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid VECTOR type record");
+      if ((ResultTy = getTypeByIDOrNull(Record[1])))
+        ResultTy = VectorType::get(ResultTy, Record[0]);
+      break;
+    }
+    
+    if (NextTypeID >= TypeList.size())
+      return Error("invalid TYPE table");
+    
+    if (ResultTy && TypeList[NextTypeID] == 0) {
+      ++NumTypesRead;
+      ReadAnyTypes = true;
+      
+      TypeList[NextTypeID] = ResultTy;
+    }
+    
+    ++NextTypeID;
+  }
+}
+
+
+bool BitcodeReader::ParseOldTypeSymbolTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this type table.
+  std::string TypeName;
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of type symbol table block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case bitc::TST_CODE_ENTRY:    // TST_ENTRY: [typeid, namechar x N]
+      if (ConvertToString(Record, 1, TypeName))
+        return Error("Invalid TST_ENTRY record");
+      unsigned TypeID = Record[0];
+      if (TypeID >= TypeList.size())
+        return Error("Invalid Type ID in TST_ENTRY record");
+
+      // Only apply the type name to a struct type with no name.
+      if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
+        if (!STy->isAnonymous() && !STy->hasName())
+          STy->setName(TypeName);
+      TypeName.clear();
+      break;
+    }
+  }
+}
+
+bool BitcodeReader::ParseValueSymbolTable() {
+  if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  SmallString<128> ValueName;
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of value symbol table block");
+      return false;
+    }
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case bitc::VST_CODE_ENTRY: {  // VST_ENTRY: [valueid, namechar x N]
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_ENTRY record");
+      unsigned ValueID = Record[0];
+      if (ValueID >= ValueList.size())
+        return Error("Invalid Value ID in VST_ENTRY record");
+      Value *V = ValueList[ValueID];
+
+      V->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    case bitc::VST_CODE_BBENTRY: {
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_BBENTRY record");
+      BasicBlock *BB = getBasicBlock(Record[0]);
+      if (BB == 0)
+        return Error("Invalid BB ID in VST_BBENTRY record");
+
+      BB->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    }
+  }
+}
+
+bool BitcodeReader::ParseMetadata() {
+  unsigned NextMDValueNo = MDValueList.size();
+
+  if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    bool IsFunctionLocal = false;
+    // Read a record.
+    Record.clear();
+    Code = Stream.ReadRecord(Code, Record);
+    switch (Code) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::METADATA_NAME: {
+      // Read named of the named metadata.
+      unsigned NameLength = Record.size();
+      SmallString<8> Name;
+      Name.resize(NameLength);
+      for (unsigned i = 0; i != NameLength; ++i)
+        Name[i] = Record[i];
+      Record.clear();
+      Code = Stream.ReadCode();
+
+      // METADATA_NAME is always followed by METADATA_NAMED_NODE.
+      unsigned NextBitCode = Stream.ReadRecord(Code, Record);
+      assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
+
+      // Read named metadata elements.
+      unsigned Size = Record.size();
+      NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
+      for (unsigned i = 0; i != Size; ++i) {
+        MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
+        if (MD == 0)
+          return Error("Malformed metadata record");
+        NMD->addOperand(MD);
+      }
+      break;
+    }
+    case bitc::METADATA_FN_NODE:
+      IsFunctionLocal = true;
+      // fall-through
+    case bitc::METADATA_NODE: {
+      if (Record.size() % 2 == 1)
+        return Error("Invalid METADATA_NODE record");
+
+      unsigned Size = Record.size();
+      SmallVector<Value*, 8> Elts;
+      for (unsigned i = 0; i != Size; i += 2) {
+        const Type *Ty = getTypeByID(Record[i]);
+        if (!Ty) return Error("Invalid METADATA_NODE record");
+        if (Ty->isMetadataTy())
+          Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
+        else if (!Ty->isVoidTy())
+          Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
+        else
+          Elts.push_back(NULL);
+      }
+      Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal);
+      IsFunctionLocal = false;
+      MDValueList.AssignValue(V, NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_STRING: {
+      unsigned MDStringLength = Record.size();
+      SmallString<8> String;
+      String.resize(MDStringLength);
+      for (unsigned i = 0; i != MDStringLength; ++i)
+        String[i] = Record[i];
+      Value *V = MDString::get(Context,
+                               StringRef(String.data(), String.size()));
+      MDValueList.AssignValue(V, NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_KIND: {
+      unsigned RecordLength = Record.size();
+      if (Record.empty() || RecordLength < 2)
+        return Error("Invalid METADATA_KIND record");
+      SmallString<8> Name;
+      Name.resize(RecordLength-1);
+      unsigned Kind = Record[0];
+      for (unsigned i = 1; i != RecordLength; ++i)
+        Name[i-1] = Record[i];
+      
+      unsigned NewKind = TheModule->getMDKindID(Name.str());
+      if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+        return Error("Conflicting METADATA_KIND records");
+      break;
+    }
+    }
+  }
+}
+
+/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+static uint64_t DecodeSignRotatedValue(uint64_t V) {
+  if ((V & 1) == 0)
+    return V >> 1;
+  if (V != 1)
+    return -(V >> 1);
+  // There is no such thing as -0 with integers.  "-0" really means MININT.
+  return 1ULL << 63;
+}
+
+/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
+/// values and aliases that we can.
+bool BitcodeReader::ResolveGlobalAndAliasInits() {
+  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
+  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
+
+  GlobalInitWorklist.swap(GlobalInits);
+  AliasInitWorklist.swap(AliasInits);
+
+  while (!GlobalInitWorklist.empty()) {
+    unsigned ValID = GlobalInitWorklist.back().second;
+    if (ValID >= ValueList.size()) {
+      // Not ready to resolve this yet, it requires something later in the file.
+      GlobalInits.push_back(GlobalInitWorklist.back());
+    } else {
+      if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+        GlobalInitWorklist.back().first->setInitializer(C);
+      else
+        return Error("Global variable initializer is not a constant!");
+    }
+    GlobalInitWorklist.pop_back();
+  }
+
+  while (!AliasInitWorklist.empty()) {
+    unsigned ValID = AliasInitWorklist.back().second;
+    if (ValID >= ValueList.size()) {
+      AliasInits.push_back(AliasInitWorklist.back());
+    } else {
+      if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+        AliasInitWorklist.back().first->setAliasee(C);
+      else
+        return Error("Alias initializer is not a constant!");
+    }
+    AliasInitWorklist.pop_back();
+  }
+  return false;
+}
+
+bool BitcodeReader::ParseConstants() {
+  if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  const Type *CurTy = Type::getInt32Ty(Context);
+  unsigned NextCstNo = ValueList.size();
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK)
+      break;
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    Value *V = 0;
+    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    switch (BitCode) {
+    default:  // Default behavior: unknown constant
+    case bitc::CST_CODE_UNDEF:     // UNDEF
+      V = UndefValue::get(CurTy);
+      break;
+    case bitc::CST_CODE_SETTYPE:   // SETTYPE: [typeid]
+      if (Record.empty())
+        return Error("Malformed CST_SETTYPE record");
+      if (Record[0] >= TypeList.size())
+        return Error("Invalid Type ID in CST_SETTYPE record");
+      CurTy = TypeList[Record[0]];
+      continue;  // Skip the ValueList manipulation.
+    case bitc::CST_CODE_NULL:      // NULL
+      V = Constant::getNullValue(CurTy);
+      break;
+    case bitc::CST_CODE_INTEGER:   // INTEGER: [intval]
+      if (!CurTy->isIntegerTy() || Record.empty())
+        return Error("Invalid CST_INTEGER record");
+      V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
+      break;
+    case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
+      if (!CurTy->isIntegerTy() || Record.empty())
+        return Error("Invalid WIDE_INTEGER record");
+
+      unsigned NumWords = Record.size();
+      SmallVector<uint64_t, 8> Words;
+      Words.resize(NumWords);
+      for (unsigned i = 0; i != NumWords; ++i)
+        Words[i] = DecodeSignRotatedValue(Record[i]);
+      V = ConstantInt::get(Context,
+                           APInt(cast<IntegerType>(CurTy)->getBitWidth(),
+                           NumWords, &Words[0]));
+      break;
+    }
+    case bitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
+      if (Record.empty())
+        return Error("Invalid FLOAT record");
+      if (CurTy->isFloatTy())
+        V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0])));
+      else if (CurTy->isDoubleTy())
+        V = ConstantFP::get(Context, APFloat(APInt(64, Record[0])));
+      else if (CurTy->isX86_FP80Ty()) {
+        // Bits are not stored the same way as a normal i80 APInt, compensate.
+        uint64_t Rearrange[2];
+        Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
+        Rearrange[1] = Record[0] >> 48;
+        V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange)));
+      } else if (CurTy->isFP128Ty())
+        V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true));
+      else if (CurTy->isPPC_FP128Ty())
+        V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0])));
+      else
+        V = UndefValue::get(CurTy);
+      break;
+    }
+
+    case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
+      if (Record.empty())
+        return Error("Invalid CST_AGGREGATE record");
+
+      unsigned Size = Record.size();
+      std::vector<Constant*> Elts;
+
+      if (const StructType *STy = dyn_cast<StructType>(CurTy)) {
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i],
+                                                     STy->getElementType(i)));
+        V = ConstantStruct::get(STy, Elts);
+      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
+        const Type *EltTy = ATy->getElementType();
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+        V = ConstantArray::get(ATy, Elts);
+      } else if (const VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
+        const Type *EltTy = VTy->getElementType();
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+        V = ConstantVector::get(Elts);
+      } else {
+        V = UndefValue::get(CurTy);
+      }
+      break;
+    }
+    case bitc::CST_CODE_STRING: { // STRING: [values]
+      if (Record.empty())
+        return Error("Invalid CST_AGGREGATE record");
+
+      const ArrayType *ATy = cast<ArrayType>(CurTy);
+      const Type *EltTy = ATy->getElementType();
+
+      unsigned Size = Record.size();
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0; i != Size; ++i)
+        Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+      V = ConstantArray::get(ATy, Elts);
+      break;
+    }
+    case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
+      if (Record.empty())
+        return Error("Invalid CST_AGGREGATE record");
+
+      const ArrayType *ATy = cast<ArrayType>(CurTy);
+      const Type *EltTy = ATy->getElementType();
+
+      unsigned Size = Record.size();
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0; i != Size; ++i)
+        Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+      Elts.push_back(Constant::getNullValue(EltTy));
+      V = ConstantArray::get(ATy, Elts);
+      break;
+    }
+    case bitc::CST_CODE_CE_BINOP: {  // CE_BINOP: [opcode, opval, opval]
+      if (Record.size() < 3) return Error("Invalid CE_BINOP record");
+      int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
+      if (Opc < 0) {
+        V = UndefValue::get(CurTy);  // Unknown binop.
+      } else {
+        Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+        Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
+        unsigned Flags = 0;
+        if (Record.size() >= 4) {
+          if (Opc == Instruction::Add ||
+              Opc == Instruction::Sub ||
+              Opc == Instruction::Mul ||
+              Opc == Instruction::Shl) {
+            if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoSignedWrap;
+            if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+          } else if (Opc == Instruction::SDiv ||
+                     Opc == Instruction::UDiv ||
+                     Opc == Instruction::LShr ||
+                     Opc == Instruction::AShr) {
+            if (Record[3] & (1 << bitc::PEO_EXACT))
+              Flags |= SDivOperator::IsExact;
+          }
+        }
+        V = ConstantExpr::get(Opc, LHS, RHS, Flags);
+      }
+      break;
+    }
+    case bitc::CST_CODE_CE_CAST: {  // CE_CAST: [opcode, opty, opval]
+      if (Record.size() < 3) return Error("Invalid CE_CAST record");
+      int Opc = GetDecodedCastOpcode(Record[0]);
+      if (Opc < 0) {
+        V = UndefValue::get(CurTy);  // Unknown cast.
+      } else {
+        const Type *OpTy = getTypeByID(Record[1]);
+        if (!OpTy) return Error("Invalid CE_CAST record");
+        Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
+        V = ConstantExpr::getCast(Opc, Op, CurTy);
+      }
+      break;
+    }
+    case bitc::CST_CODE_CE_INBOUNDS_GEP:
+    case bitc::CST_CODE_CE_GEP: {  // CE_GEP:        [n x operands]
+      if (Record.size() & 1) return Error("Invalid CE_GEP record");
+      SmallVector<Constant*, 16> Elts;
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        const Type *ElTy = getTypeByID(Record[i]);
+        if (!ElTy) return Error("Invalid CE_GEP record");
+        Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+      }
+      if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
+        V = ConstantExpr::getInBoundsGetElementPtr(Elts[0], &Elts[1],
+                                                   Elts.size()-1);
+      else
+        V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1],
+                                           Elts.size()-1);
+      break;
+    }
+    case bitc::CST_CODE_CE_SELECT:  // CE_SELECT: [opval#, opval#, opval#]
+      if (Record.size() < 3) return Error("Invalid CE_SELECT record");
+      V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
+                                                              Type::getInt1Ty(Context)),
+                                  ValueList.getConstantFwdRef(Record[1],CurTy),
+                                  ValueList.getConstantFwdRef(Record[2],CurTy));
+      break;
+    case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
+      if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
+      const VectorType *OpTy =
+        dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+      if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      V = ConstantExpr::getExtractElement(Op0, Op1);
+      break;
+    }
+    case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
+      const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+      if (Record.size() < 3 || OpTy == 0)
+        return Error("Invalid CE_INSERTELT record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
+                                                  OpTy->getElementType());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
+      break;
+    }
+    case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
+      const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+      if (Record.size() < 3 || OpTy == 0)
+        return Error("Invalid CE_SHUFFLEVEC record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 OpTy->getNumElements());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
+      V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+      break;
+    }
+    case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
+      const VectorType *RTy = dyn_cast<VectorType>(CurTy);
+      const VectorType *OpTy =
+        dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+      if (Record.size() < 4 || RTy == 0 || OpTy == 0)
+        return Error("Invalid CE_SHUFVEC_EX record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+      const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 RTy->getNumElements());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
+      V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+      break;
+    }
+    case bitc::CST_CODE_CE_CMP: {     // CE_CMP: [opty, opval, opval, pred]
+      if (Record.size() < 4) return Error("Invalid CE_CMP record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      if (OpTy == 0) return Error("Invalid CE_CMP record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+
+      if (OpTy->isFPOrFPVectorTy())
+        V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
+      else
+        V = ConstantExpr::getICmp(Record[3], Op0, Op1);
+      break;
+    }
+    case bitc::CST_CODE_INLINEASM: {
+      if (Record.size() < 2) return Error("Invalid INLINEASM record");
+      std::string AsmStr, ConstrStr;
+      bool HasSideEffects = Record[0] & 1;
+      bool IsAlignStack = Record[0] >> 1;
+      unsigned AsmStrSize = Record[1];
+      if (2+AsmStrSize >= Record.size())
+        return Error("Invalid INLINEASM record");
+      unsigned ConstStrSize = Record[2+AsmStrSize];
+      if (3+AsmStrSize+ConstStrSize > Record.size())
+        return Error("Invalid INLINEASM record");
+
+      for (unsigned i = 0; i != AsmStrSize; ++i)
+        AsmStr += (char)Record[2+i];
+      for (unsigned i = 0; i != ConstStrSize; ++i)
+        ConstrStr += (char)Record[3+AsmStrSize+i];
+      const PointerType *PTy = cast<PointerType>(CurTy);
+      V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+      break;
+    }
+    case bitc::CST_CODE_BLOCKADDRESS:{
+      if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
+      const Type *FnTy = getTypeByID(Record[0]);
+      if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
+      Function *Fn =
+        dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
+      if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
+      
+      GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
+                                                  Type::getInt8Ty(Context),
+                                            false, GlobalValue::InternalLinkage,
+                                                  0, "");
+      BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
+      V = FwdRef;
+      break;
+    }  
+    }
+
+    ValueList.AssignValue(V, NextCstNo);
+    ++NextCstNo;
+  }
+
+  if (NextCstNo != ValueList.size())
+    return Error("Invalid constant reference!");
+
+  if (Stream.ReadBlockEnd())
+    return Error("Error at end of constants block");
+
+  // Once all the constants have been read, go through and resolve forward
+  // references.
+  ValueList.ResolveConstantForwardRefs();
+  return false;
+}
+
+/// RememberAndSkipFunctionBody - When we see the block for a function body,
+/// remember where it is and then skip it.  This lets us lazily deserialize the
+/// functions.
+bool BitcodeReader::RememberAndSkipFunctionBody() {
+  // Get the function we are talking about.
+  if (FunctionsWithBodies.empty())
+    return Error("Insufficient function protos");
+
+  Function *Fn = FunctionsWithBodies.back();
+  FunctionsWithBodies.pop_back();
+
+  // Save the current stream state.
+  uint64_t CurBit = Stream.GetCurrentBitNo();
+  DeferredFunctionInfo[Fn] = CurBit;
+
+  // Skip over the function block for now.
+  if (Stream.SkipBlock())
+    return Error("Malformed block record");
+  return false;
+}
+
+bool BitcodeReader::ParseModule() {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+  std::vector<std::string> SectionTable;
+  std::vector<std::string> GCTable;
+
+  // Read all the records for this module.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of module block");
+
+      // Patch the initializers for globals and aliases up.
+      ResolveGlobalAndAliasInits();
+      if (!GlobalInits.empty() || !AliasInits.empty())
+        return Error("Malformed global initializer set");
+      if (!FunctionsWithBodies.empty())
+        return Error("Too few function bodies found");
+
+      // Look for intrinsic functions which need to be upgraded at some point
+      for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+           FI != FE; ++FI) {
+        Function* NewFn;
+        if (UpgradeIntrinsicFunction(FI, NewFn))
+          UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+      }
+
+      // Look for global variables which need to be renamed.
+      for (Module::global_iterator
+             GI = TheModule->global_begin(), GE = TheModule->global_end();
+           GI != GE; ++GI)
+        UpgradeGlobalVariable(GI);
+
+      // Force deallocation of memory for these vectors to favor the client that
+      // want lazy deserialization.
+      std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+      std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+      std::vector<Function*>().swap(FunctionsWithBodies);
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      switch (Stream.ReadSubBlockID()) {
+      default:  // Skip unknown content.
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      case bitc::BLOCKINFO_BLOCK_ID:
+        if (Stream.ReadBlockInfoBlock())
+          return Error("Malformed BlockInfoBlock");
+        break;
+      case bitc::PARAMATTR_BLOCK_ID:
+        if (ParseAttributeBlock())
+          return true;
+        break;
+      case bitc::TYPE_BLOCK_ID_NEW:
+        if (ParseTypeTable())
+          return true;
+        break;
+      case bitc::TYPE_BLOCK_ID_OLD:
+        if (ParseOldTypeTable())
+          return true;
+        break;
+      case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
+        if (ParseOldTypeSymbolTable())
+          return true;
+        break;
+      case bitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable())
+          return true;
+        break;
+      case bitc::CONSTANTS_BLOCK_ID:
+        if (ParseConstants() || ResolveGlobalAndAliasInits())
+          return true;
+        break;
+      case bitc::METADATA_BLOCK_ID:
+        if (ParseMetadata())
+          return true;
+        break;
+      case bitc::FUNCTION_BLOCK_ID:
+        // If this is the first function body we've seen, reverse the
+        // FunctionsWithBodies list.
+        if (!HasReversedFunctionsWithBodies) {
+          std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
+          HasReversedFunctionsWithBodies = true;
+        }
+
+        if (RememberAndSkipFunctionBody())
+          return true;
+        break;
+      }
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: break;  // Default behavior, ignore unknown content.
+    case bitc::MODULE_CODE_VERSION:  // VERSION: [version#]
+      if (Record.size() < 1)
+        return Error("Malformed MODULE_CODE_VERSION");
+      // Only version #0 is supported so far.
+      if (Record[0] != 0)
+        return Error("Unknown bitstream version!");
+      break;
+    case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_TRIPLE record");
+      TheModule->setTargetTriple(S);
+      break;
+    }
+    case bitc::MODULE_CODE_DATALAYOUT: {  // DATALAYOUT: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_DATALAYOUT record");
+      TheModule->setDataLayout(S);
+      break;
+    }
+    case bitc::MODULE_CODE_ASM: {  // ASM: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_ASM record");
+      TheModule->setModuleInlineAsm(S);
+      break;
+    }
+    case bitc::MODULE_CODE_DEPLIB: {  // DEPLIB: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_DEPLIB record");
+      TheModule->addLibrary(S);
+      break;
+    }
+    case bitc::MODULE_CODE_SECTIONNAME: {  // SECTIONNAME: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_SECTIONNAME record");
+      SectionTable.push_back(S);
+      break;
+    }
+    case bitc::MODULE_CODE_GCNAME: {  // SECTIONNAME: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_GCNAME record");
+      GCTable.push_back(S);
+      break;
+    }
+    // GLOBALVAR: [pointer type, isconst, initid,
+    //             linkage, alignment, section, visibility, threadlocal,
+    //             unnamed_addr]
+    case bitc::MODULE_CODE_GLOBALVAR: {
+      if (Record.size() < 6)
+        return Error("Invalid MODULE_CODE_GLOBALVAR record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
+      if (!Ty->isPointerTy())
+        return Error("Global not a pointer type!");
+      unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
+      Ty = cast<PointerType>(Ty)->getElementType();
+
+      bool isConstant = Record[1];
+      GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]);
+      unsigned Alignment = (1 << Record[4]) >> 1;
+      std::string Section;
+      if (Record[5]) {
+        if (Record[5]-1 >= SectionTable.size())
+          return Error("Invalid section ID");
+        Section = SectionTable[Record[5]-1];
+      }
+      GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
+      if (Record.size() > 6)
+        Visibility = GetDecodedVisibility(Record[6]);
+      bool isThreadLocal = false;
+      if (Record.size() > 7)
+        isThreadLocal = Record[7];
+
+      bool UnnamedAddr = false;
+      if (Record.size() > 8)
+        UnnamedAddr = Record[8];
+
+      GlobalVariable *NewGV =
+        new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
+                           isThreadLocal, AddressSpace);
+      NewGV->setAlignment(Alignment);
+      if (!Section.empty())
+        NewGV->setSection(Section);
+      NewGV->setVisibility(Visibility);
+      NewGV->setThreadLocal(isThreadLocal);
+      NewGV->setUnnamedAddr(UnnamedAddr);
+
+      ValueList.push_back(NewGV);
+
+      // Remember which value to use for the global initializer.
+      if (unsigned InitID = Record[2])
+        GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
+      break;
+    }
+    // FUNCTION:  [type, callingconv, isproto, linkage, paramattr,
+    //             alignment, section, visibility, gc, unnamed_addr]
+    case bitc::MODULE_CODE_FUNCTION: {
+      if (Record.size() < 8)
+        return Error("Invalid MODULE_CODE_FUNCTION record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
+      if (!Ty->isPointerTy())
+        return Error("Function not a pointer type!");
+      const FunctionType *FTy =
+        dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
+      if (!FTy)
+        return Error("Function not a pointer to function type!");
+
+      Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                        "", TheModule);
+
+      Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
+      bool isProto = Record[2];
+      Func->setLinkage(GetDecodedLinkage(Record[3]));
+      Func->setAttributes(getAttributes(Record[4]));
+
+      Func->setAlignment((1 << Record[5]) >> 1);
+      if (Record[6]) {
+        if (Record[6]-1 >= SectionTable.size())
+          return Error("Invalid section ID");
+        Func->setSection(SectionTable[Record[6]-1]);
+      }
+      Func->setVisibility(GetDecodedVisibility(Record[7]));
+      if (Record.size() > 8 && Record[8]) {
+        if (Record[8]-1 > GCTable.size())
+          return Error("Invalid GC ID");
+        Func->setGC(GCTable[Record[8]-1].c_str());
+      }
+      bool UnnamedAddr = false;
+      if (Record.size() > 9)
+        UnnamedAddr = Record[9];
+      Func->setUnnamedAddr(UnnamedAddr);
+      ValueList.push_back(Func);
+
+      // If this is a function with a body, remember the prototype we are
+      // creating now, so that we can match up the body with them later.
+      if (!isProto)
+        FunctionsWithBodies.push_back(Func);
+      break;
+    }
+    // ALIAS: [alias type, aliasee val#, linkage]
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
+    case bitc::MODULE_CODE_ALIAS: {
+      if (Record.size() < 3)
+        return Error("Invalid MODULE_ALIAS record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_ALIAS record");
+      if (!Ty->isPointerTy())
+        return Error("Function not a pointer type!");
+
+      GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
+                                           "", 0, TheModule);
+      // Old bitcode files didn't have visibility field.
+      if (Record.size() > 3)
+        NewGA->setVisibility(GetDecodedVisibility(Record[3]));
+      ValueList.push_back(NewGA);
+      AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+      break;
+    }
+    /// MODULE_CODE_PURGEVALS: [numvals]
+    case bitc::MODULE_CODE_PURGEVALS:
+      // Trim down the value list to the specified size.
+      if (Record.size() < 1 || Record[0] > ValueList.size())
+        return Error("Invalid MODULE_PURGEVALS record");
+      ValueList.shrinkTo(Record[0]);
+      break;
+    }
+    Record.clear();
+  }
+
+  return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseBitcodeInto(Module *M) {
+  TheModule = 0;
+
+  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  if (Buffer->getBufferSize() & 3) {
+    if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+      return Error("Invalid bitcode signature");
+    else
+      return Error("Bitcode stream should be a multiple of 4 bytes in length");
+  }
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+
+  StreamFile.init(BufPtr, BufEnd);
+  Stream.init(StreamFile);
+
+  // Sniff for the signature.
+  if (Stream.Read(8) != 'B' ||
+      Stream.Read(8) != 'C' ||
+      Stream.Read(4) != 0x0 ||
+      Stream.Read(4) != 0xC ||
+      Stream.Read(4) != 0xE ||
+      Stream.Read(4) != 0xD)
+    return Error("Invalid bitcode signature");
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+
+    if (Code != bitc::ENTER_SUBBLOCK) {
+
+      // The ranlib in xcode 4 will align archive members by appending newlines to the
+      // end of them. If this file size is a multiple of 4 but not 8, we have to read and
+      // ignore these final 4 bytes :-(
+      if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 &&
+          Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
+	  Stream.AtEndOfStream())
+        return false;
+
+      return Error("Invalid record at top-level");
+    }
+
+    unsigned BlockID = Stream.ReadSubBlockID();
+
+    // We only know the MODULE subblock ID.
+    switch (BlockID) {
+    case bitc::BLOCKINFO_BLOCK_ID:
+      if (Stream.ReadBlockInfoBlock())
+        return Error("Malformed BlockInfoBlock");
+      break;
+    case bitc::MODULE_BLOCK_ID:
+      // Reject multiple MODULE_BLOCK's in a single bitstream.
+      if (TheModule)
+        return Error("Multiple MODULE_BLOCKs in same stream");
+      TheModule = M;
+      if (ParseModule())
+        return true;
+      break;
+    default:
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      break;
+    }
+  }
+
+  return false;
+}
+
+bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this module.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of module block");
+
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      switch (Stream.ReadSubBlockID()) {
+      default:  // Skip unknown content.
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      }
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: break;  // Default behavior, ignore unknown content.
+    case bitc::MODULE_CODE_VERSION:  // VERSION: [version#]
+      if (Record.size() < 1)
+        return Error("Malformed MODULE_CODE_VERSION");
+      // Only version #0 is supported so far.
+      if (Record[0] != 0)
+        return Error("Unknown bitstream version!");
+      break;
+    case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_TRIPLE record");
+      Triple = S;
+      break;
+    }
+    }
+    Record.clear();
+  }
+
+  return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseTriple(std::string &Triple) {
+  if (Buffer->getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+
+  StreamFile.init(BufPtr, BufEnd);
+  Stream.init(StreamFile);
+
+  // Sniff for the signature.
+  if (Stream.Read(8) != 'B' ||
+      Stream.Read(8) != 'C' ||
+      Stream.Read(4) != 0x0 ||
+      Stream.Read(4) != 0xC ||
+      Stream.Read(4) != 0xE ||
+      Stream.Read(4) != 0xD)
+    return Error("Invalid bitcode signature");
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+
+    if (Code != bitc::ENTER_SUBBLOCK)
+      return Error("Invalid record at top-level");
+
+    unsigned BlockID = Stream.ReadSubBlockID();
+
+    // We only know the MODULE subblock ID.
+    switch (BlockID) {
+    case bitc::MODULE_BLOCK_ID:
+      if (ParseModuleTriple(Triple))
+        return true;
+      break;
+    default:
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      break;
+    }
+  }
+
+  return false;
+}
+
+/// ParseMetadataAttachment - Parse metadata attachments.
+bool BitcodeReader::ParseMetadataAttachment() {
+  if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+  while(1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      break;
+    }
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+    // Read a metadata attachment record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::METADATA_ATTACHMENT: {
+      unsigned RecordLength = Record.size();
+      if (Record.empty() || (RecordLength - 1) % 2 == 1)
+        return Error ("Invalid METADATA_ATTACHMENT reader!");
+      Instruction *Inst = InstructionList[Record[0]];
+      for (unsigned i = 1; i != RecordLength; i = i+2) {
+        unsigned Kind = Record[i];
+        DenseMap<unsigned, unsigned>::iterator I =
+          MDKindMap.find(Kind);
+        if (I == MDKindMap.end())
+          return Error("Invalid metadata kind ID");
+        Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
+        Inst->setMetadata(I->second, cast<MDNode>(Node));
+      }
+      break;
+    }
+    }
+  }
+  return false;
+}
+
+/// ParseFunctionBody - Lazily parse the specified function body block.
+bool BitcodeReader::ParseFunctionBody(Function *F) {
+  if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
+    return Error("Malformed block record");
+
+  InstructionList.clear();
+  unsigned ModuleValueListSize = ValueList.size();
+  unsigned ModuleMDValueListSize = MDValueList.size();
+
+  // Add all the function arguments to the value table.
+  for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ValueList.push_back(I);
+
+  unsigned NextValueNo = ValueList.size();
+  BasicBlock *CurBB = 0;
+  unsigned CurBBNo = 0;
+
+  DebugLoc LastLoc;
+  
+  // Read all the records.
+  SmallVector<uint64_t, 64> Record;
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of function block");
+      break;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      switch (Stream.ReadSubBlockID()) {
+      default:  // Skip unknown content.
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      case bitc::CONSTANTS_BLOCK_ID:
+        if (ParseConstants()) return true;
+        NextValueNo = ValueList.size();
+        break;
+      case bitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable()) return true;
+        break;
+      case bitc::METADATA_ATTACHMENT_ID:
+        if (ParseMetadataAttachment()) return true;
+        break;
+      case bitc::METADATA_BLOCK_ID:
+        if (ParseMetadata()) return true;
+        break;
+      }
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    Instruction *I = 0;
+    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    switch (BitCode) {
+    default: // Default behavior: reject
+      return Error("Unknown instruction");
+    case bitc::FUNC_CODE_DECLAREBLOCKS:     // DECLAREBLOCKS: [nblocks]
+      if (Record.size() < 1 || Record[0] == 0)
+        return Error("Invalid DECLAREBLOCKS record");
+      // Create all the basic blocks for the function.
+      FunctionBBs.resize(Record[0]);
+      for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
+        FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+      CurBB = FunctionBBs[0];
+      continue;
+        
+    case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:  // DEBUG_LOC_AGAIN
+      // This record indicates that the last instruction is at the same
+      // location as the previous instruction with a location.
+      I = 0;
+        
+      // Get the last instruction emitted.
+      if (CurBB && !CurBB->empty())
+        I = &CurBB->back();
+      else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+               !FunctionBBs[CurBBNo-1]->empty())
+        I = &FunctionBBs[CurBBNo-1]->back();
+        
+      if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
+      I->setDebugLoc(LastLoc);
+      I = 0;
+      continue;
+        
+    case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
+      I = 0;     // Get the last instruction emitted.
+      if (CurBB && !CurBB->empty())
+        I = &CurBB->back();
+      else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+               !FunctionBBs[CurBBNo-1]->empty())
+        I = &FunctionBBs[CurBBNo-1]->back();
+      if (I == 0 || Record.size() < 4)
+        return Error("Invalid FUNC_CODE_DEBUG_LOC record");
+      
+      unsigned Line = Record[0], Col = Record[1];
+      unsigned ScopeID = Record[2], IAID = Record[3];
+      
+      MDNode *Scope = 0, *IA = 0;
+      if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
+      if (IAID)    IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
+      LastLoc = DebugLoc::get(Line, Col, Scope, IA);
+      I->setDebugLoc(LastLoc);
+      I = 0;
+      continue;
+    }
+
+    case bitc::FUNC_CODE_INST_BINOP: {    // BINOP: [opval, ty, opval, opcode]
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+          getValue(Record, OpNum, LHS->getType(), RHS) ||
+          OpNum+1 > Record.size())
+        return Error("Invalid BINOP record");
+
+      int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
+      if (Opc == -1) return Error("Invalid BINOP record");
+      I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+      InstructionList.push_back(I);
+      if (OpNum < Record.size()) {
+        if (Opc == Instruction::Add ||
+            Opc == Instruction::Sub ||
+            Opc == Instruction::Mul ||
+            Opc == Instruction::Shl) {
+          if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
+          if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
+        } else if (Opc == Instruction::SDiv ||
+                   Opc == Instruction::UDiv ||
+                   Opc == Instruction::LShr ||
+                   Opc == Instruction::AShr) {
+          if (Record[OpNum] & (1 << bitc::PEO_EXACT))
+            cast<BinaryOperator>(I)->setIsExact(true);
+        }
+      }
+      break;
+    }
+    case bitc::FUNC_CODE_INST_CAST: {    // CAST: [opval, opty, destty, castopc]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid CAST record");
+
+      const Type *ResTy = getTypeByID(Record[OpNum]);
+      int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+      if (Opc == -1 || ResTy == 0)
+        return Error("Invalid CAST record");
+      I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_INBOUNDS_GEP:
+    case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
+      unsigned OpNum = 0;
+      Value *BasePtr;
+      if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+        return Error("Invalid GEP record");
+
+      SmallVector<Value*, 16> GEPIdx;
+      while (OpNum != Record.size()) {
+        Value *Op;
+        if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          return Error("Invalid GEP record");
+        GEPIdx.push_back(Op);
+      }
+
+      I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end());
+      InstructionList.push_back(I);
+      if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP)
+        cast<GetElementPtrInst>(I)->setIsInBounds(true);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_EXTRACTVAL: {
+                                       // EXTRACTVAL: [opty, opval, n x indices]
+      unsigned OpNum = 0;
+      Value *Agg;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+        return Error("Invalid EXTRACTVAL record");
+
+      SmallVector<unsigned, 4> EXTRACTVALIdx;
+      for (unsigned RecSize = Record.size();
+           OpNum != RecSize; ++OpNum) {
+        uint64_t Index = Record[OpNum];
+        if ((unsigned)Index != Index)
+          return Error("Invalid EXTRACTVAL index");
+        EXTRACTVALIdx.push_back((unsigned)Index);
+      }
+
+      I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_INSERTVAL: {
+                           // INSERTVAL: [opty, opval, opty, opval, n x indices]
+      unsigned OpNum = 0;
+      Value *Agg;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+        return Error("Invalid INSERTVAL record");
+      Value *Val;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Val))
+        return Error("Invalid INSERTVAL record");
+
+      SmallVector<unsigned, 4> INSERTVALIdx;
+      for (unsigned RecSize = Record.size();
+           OpNum != RecSize; ++OpNum) {
+        uint64_t Index = Record[OpNum];
+        if ((unsigned)Index != Index)
+          return Error("Invalid INSERTVAL index");
+        INSERTVALIdx.push_back((unsigned)Index);
+      }
+
+      I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval]
+      // obsolete form of select
+      // handles select i1 ... in old bitcode
+      unsigned OpNum = 0;
+      Value *TrueVal, *FalseVal, *Cond;
+      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+          getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+          getValue(Record, OpNum, Type::getInt1Ty(Context), Cond))
+        return Error("Invalid SELECT record");
+
+      I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred]
+      // new form of select
+      // handles select i1 or select [N x i1]
+      unsigned OpNum = 0;
+      Value *TrueVal, *FalseVal, *Cond;
+      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+          getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+          getValueTypePair(Record, OpNum, NextValueNo, Cond))
+        return Error("Invalid SELECT record");
+
+      // select condition can be either i1 or [N x i1]
+      if (const VectorType* vector_type =
+          dyn_cast<const VectorType>(Cond->getType())) {
+        // expect <n x i1>
+        if (vector_type->getElementType() != Type::getInt1Ty(Context))
+          return Error("Invalid SELECT condition type");
+      } else {
+        // expect i1
+        if (Cond->getType() != Type::getInt1Ty(Context))
+          return Error("Invalid SELECT condition type");
+      }
+
+      I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
+      unsigned OpNum = 0;
+      Value *Vec, *Idx;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+        return Error("Invalid EXTRACTELT record");
+      I = ExtractElementInst::Create(Vec, Idx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
+      unsigned OpNum = 0;
+      Value *Vec, *Elt, *Idx;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+          getValue(Record, OpNum,
+                   cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
+          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+        return Error("Invalid INSERTELT record");
+      I = InsertElementInst::Create(Vec, Elt, Idx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
+      unsigned OpNum = 0;
+      Value *Vec1, *Vec2, *Mask;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
+          getValue(Record, OpNum, Vec1->getType(), Vec2))
+        return Error("Invalid SHUFFLEVEC record");
+
+      if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
+        return Error("Invalid SHUFFLEVEC record");
+      I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_CMP:   // CMP: [opty, opval, opval, pred]
+      // Old form of ICmp/FCmp returning bool
+      // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were
+      // both legal on vectors but had different behaviour.
+    case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred]
+      // FCmp/ICmp returning bool or vector of bool
+
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+          getValue(Record, OpNum, LHS->getType(), RHS) ||
+          OpNum+1 != Record.size())
+        return Error("Invalid CMP record");
+
+      if (LHS->getType()->isFPOrFPVectorTy())
+        I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+      else
+        I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
+      {
+        unsigned Size = Record.size();
+        if (Size == 0) {
+          I = ReturnInst::Create(Context);
+          InstructionList.push_back(I);
+          break;
+        }
+
+        unsigned OpNum = 0;
+        Value *Op = NULL;
+        if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          return Error("Invalid RET record");
+        if (OpNum != Record.size())
+          return Error("Invalid RET record");
+
+        I = ReturnInst::Create(Context, Op);
+        InstructionList.push_back(I);
+        break;
+      }
+    case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
+      if (Record.size() != 1 && Record.size() != 3)
+        return Error("Invalid BR record");
+      BasicBlock *TrueDest = getBasicBlock(Record[0]);
+      if (TrueDest == 0)
+        return Error("Invalid BR record");
+
+      if (Record.size() == 1) {
+        I = BranchInst::Create(TrueDest);
+        InstructionList.push_back(I);
+      }
+      else {
+        BasicBlock *FalseDest = getBasicBlock(Record[1]);
+        Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context));
+        if (FalseDest == 0 || Cond == 0)
+          return Error("Invalid BR record");
+        I = BranchInst::Create(TrueDest, FalseDest, Cond);
+        InstructionList.push_back(I);
+      }
+      break;
+    }
+    case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
+      if (Record.size() < 3 || (Record.size() & 1) == 0)
+        return Error("Invalid SWITCH record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      Value *Cond = getFnValueByID(Record[1], OpTy);
+      BasicBlock *Default = getBasicBlock(Record[2]);
+      if (OpTy == 0 || Cond == 0 || Default == 0)
+        return Error("Invalid SWITCH record");
+      unsigned NumCases = (Record.size()-3)/2;
+      SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+      InstructionList.push_back(SI);
+      for (unsigned i = 0, e = NumCases; i != e; ++i) {
+        ConstantInt *CaseVal =
+          dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
+        BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
+        if (CaseVal == 0 || DestBB == 0) {
+          delete SI;
+          return Error("Invalid SWITCH record!");
+        }
+        SI->addCase(CaseVal, DestBB);
+      }
+      I = SI;
+      break;
+    }
+    case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
+      if (Record.size() < 2)
+        return Error("Invalid INDIRECTBR record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      Value *Address = getFnValueByID(Record[1], OpTy);
+      if (OpTy == 0 || Address == 0)
+        return Error("Invalid INDIRECTBR record");
+      unsigned NumDests = Record.size()-2;
+      IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
+      InstructionList.push_back(IBI);
+      for (unsigned i = 0, e = NumDests; i != e; ++i) {
+        if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) {
+          IBI->addDestination(DestBB);
+        } else {
+          delete IBI;
+          return Error("Invalid INDIRECTBR record!");
+        }
+      }
+      I = IBI;
+      break;
+    }
+        
+    case bitc::FUNC_CODE_INST_INVOKE: {
+      // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
+      if (Record.size() < 4) return Error("Invalid INVOKE record");
+      AttrListPtr PAL = getAttributes(Record[0]);
+      unsigned CCInfo = Record[1];
+      BasicBlock *NormalBB = getBasicBlock(Record[2]);
+      BasicBlock *UnwindBB = getBasicBlock(Record[3]);
+
+      unsigned OpNum = 4;
+      Value *Callee;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+        return Error("Invalid INVOKE record");
+
+      const PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
+      const FunctionType *FTy = !CalleeTy ? 0 :
+        dyn_cast<FunctionType>(CalleeTy->getElementType());
+
+      // Check that the right number of fixed parameters are here.
+      if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
+          Record.size() < OpNum+FTy->getNumParams())
+        return Error("Invalid INVOKE record");
+
+      SmallVector<Value*, 16> Ops;
+      for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+        Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+        if (Ops.back() == 0) return Error("Invalid INVOKE record");
+      }
+
+      if (!FTy->isVarArg()) {
+        if (Record.size() != OpNum)
+          return Error("Invalid INVOKE record");
+      } else {
+        // Read type/value pairs for varargs params.
+        while (OpNum != Record.size()) {
+          Value *Op;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+            return Error("Invalid INVOKE record");
+          Ops.push_back(Op);
+        }
+      }
+
+      I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
+      InstructionList.push_back(I);
+      cast<InvokeInst>(I)->setCallingConv(
+        static_cast<CallingConv::ID>(CCInfo));
+      cast<InvokeInst>(I)->setAttributes(PAL);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
+      I = new UnwindInst(Context);
+      InstructionList.push_back(I);
+      break;
+    case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
+      I = new UnreachableInst(Context);
+      InstructionList.push_back(I);
+      break;
+    case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
+      if (Record.size() < 1 || ((Record.size()-1)&1))
+        return Error("Invalid PHI record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid PHI record");
+
+      PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
+      InstructionList.push_back(PN);
+
+      for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+        Value *V = getFnValueByID(Record[1+i], Ty);
+        BasicBlock *BB = getBasicBlock(Record[2+i]);
+        if (!V || !BB) return Error("Invalid PHI record");
+        PN->addIncoming(V, BB);
+      }
+      I = PN;
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
+      if (Record.size() != 4)
+        return Error("Invalid ALLOCA record");
+      const PointerType *Ty =
+        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+      const Type *OpTy = getTypeByID(Record[1]);
+      Value *Size = getFnValueByID(Record[2], OpTy);
+      unsigned Align = Record[3];
+      if (!Ty || !Size) return Error("Invalid ALLOCA record");
+      I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid LOAD record");
+
+      I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
+      unsigned OpNum = 0;
+      Value *Val, *Ptr;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+          getValue(Record, OpNum,
+                    cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid STORE record");
+
+      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_CALL: {
+      // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
+      if (Record.size() < 3)
+        return Error("Invalid CALL record");
+
+      AttrListPtr PAL = getAttributes(Record[0]);
+      unsigned CCInfo = Record[1];
+
+      unsigned OpNum = 2;
+      Value *Callee;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+        return Error("Invalid CALL record");
+
+      const PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+      const FunctionType *FTy = 0;
+      if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+      if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
+        return Error("Invalid CALL record");
+
+      SmallVector<Value*, 16> Args;
+      // Read the fixed params.
+      for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+        if (FTy->getParamType(i)->isLabelTy())
+          Args.push_back(getBasicBlock(Record[OpNum]));
+        else
+          Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+        if (Args.back() == 0) return Error("Invalid CALL record");
+      }
+
+      // Read type/value pairs for varargs params.
+      if (!FTy->isVarArg()) {
+        if (OpNum != Record.size())
+          return Error("Invalid CALL record");
+      } else {
+        while (OpNum != Record.size()) {
+          Value *Op;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+            return Error("Invalid CALL record");
+          Args.push_back(Op);
+        }
+      }
+
+      I = CallInst::Create(Callee, Args);
+      InstructionList.push_back(I);
+      cast<CallInst>(I)->setCallingConv(
+        static_cast<CallingConv::ID>(CCInfo>>1));
+      cast<CallInst>(I)->setTailCall(CCInfo & 1);
+      cast<CallInst>(I)->setAttributes(PAL);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
+      if (Record.size() < 3)
+        return Error("Invalid VAARG record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      Value *Op = getFnValueByID(Record[1], OpTy);
+      const Type *ResTy = getTypeByID(Record[2]);
+      if (!OpTy || !Op || !ResTy)
+        return Error("Invalid VAARG record");
+      I = new VAArgInst(Op, ResTy);
+      InstructionList.push_back(I);
+      break;
+    }
+    }
+
+    // Add instruction to end of current BB.  If there is no current BB, reject
+    // this file.
+    if (CurBB == 0) {
+      delete I;
+      return Error("Invalid instruction with no BB");
+    }
+    CurBB->getInstList().push_back(I);
+
+    // If this was a terminator instruction, move to the next block.
+    if (isa<TerminatorInst>(I)) {
+      ++CurBBNo;
+      CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+    }
+
+    // Non-void values get registered in the value table for future use.
+    if (I && !I->getType()->isVoidTy())
+      ValueList.AssignValue(I, NextValueNo++);
+  }
+
+  // Check the function list for unresolved values.
+  if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
+    if (A->getParent() == 0) {
+      // We found at least one unresolved value.  Nuke them all to avoid leaks.
+      for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
+        if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
+          A->replaceAllUsesWith(UndefValue::get(A->getType()));
+          delete A;
+        }
+      }
+      return Error("Never resolved value found in function!");
+    }
+  }
+
+  // FIXME: Check for unresolved forward-declared metadata references
+  // and clean up leaks.
+
+  // See if anything took the address of blocks in this function.  If so,
+  // resolve them now.
+  DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
+    BlockAddrFwdRefs.find(F);
+  if (BAFRI != BlockAddrFwdRefs.end()) {
+    std::vector<BlockAddrRefTy> &RefList = BAFRI->second;
+    for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
+      unsigned BlockIdx = RefList[i].first;
+      if (BlockIdx >= FunctionBBs.size())
+        return Error("Invalid blockaddress block #");
+    
+      GlobalVariable *FwdRef = RefList[i].second;
+      FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
+      FwdRef->eraseFromParent();
+    }
+    
+    BlockAddrFwdRefs.erase(BAFRI);
+  }
+  
+  // Trim the value list down to the size it was before we parsed this function.
+  ValueList.shrinkTo(ModuleValueListSize);
+  MDValueList.shrinkTo(ModuleMDValueListSize);
+  std::vector<BasicBlock*>().swap(FunctionBBs);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GVMaterializer implementation
+//===----------------------------------------------------------------------===//
+
+
+bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
+  if (const Function *F = dyn_cast<Function>(GV)) {
+    return F->isDeclaration() &&
+      DeferredFunctionInfo.count(const_cast<Function*>(F));
+  }
+  return false;
+}
+
+bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  Function *F = dyn_cast<Function>(GV);
+  // If it's not a function or is already material, ignore the request.
+  if (!F || !F->isMaterializable()) return false;
+
+  DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
+  assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+
+  // Move the bit stream to the saved position of the deferred function body.
+  Stream.JumpToBit(DFII->second);
+
+  if (ParseFunctionBody(F)) {
+    if (ErrInfo) *ErrInfo = ErrorString;
+    return true;
+  }
+
+  // Upgrade any old intrinsic calls in the function.
+  for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
+       E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+    }
+  }
+
+  return false;
+}
+
+bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
+  const Function *F = dyn_cast<Function>(GV);
+  if (!F || F->isDeclaration())
+    return false;
+  return DeferredFunctionInfo.count(const_cast<Function*>(F));
+}
+
+void BitcodeReader::Dematerialize(GlobalValue *GV) {
+  Function *F = dyn_cast<Function>(GV);
+  // If this function isn't dematerializable, this is a noop.
+  if (!F || !isDematerializable(F))
+    return;
+
+  assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
+
+  // Just forget the function body, we can remat it later.
+  F->deleteBody();
+}
+
+
+bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
+  assert(M == TheModule &&
+         "Can only Materialize the Module this BitcodeReader is attached to.");
+  // Iterate over the module, deserializing any functions that are still on
+  // disk.
+  for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+       F != E; ++F)
+    if (F->isMaterializable() &&
+        Materialize(F, ErrInfo))
+      return true;
+
+  // Upgrade any intrinsic calls that slipped through (should not happen!) and
+  // delete the old functions to clean up. We can't do this unless the entire
+  // module is materialized because there could always be another function body
+  // with calls to the old function.
+  for (std::vector<std::pair<Function*, Function*> >::iterator I =
+       UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+      if (!I->first->use_empty())
+        I->first->replaceAllUsesWith(I->second);
+      I->first->eraseFromParent();
+    }
+  }
+  std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+
+  // Check debug info intrinsics.
+  CheckDebugInfoIntrinsics(TheModule);
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// External interface
+//===----------------------------------------------------------------------===//
+
+/// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
+///
+Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
+                                   LLVMContext& Context,
+                                   std::string *ErrMsg) {
+  Module *M = new Module(Buffer->getBufferIdentifier(), Context);
+  BitcodeReader *R = new BitcodeReader(Buffer, Context);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  // Have the BitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(true);
+  return M;
+}
+
+/// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+/// If an error occurs, return null and fill in *ErrMsg if non-null.
+Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+                               std::string *ErrMsg){
+  Module *M = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+  if (!M) return 0;
+
+  // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
+  // there was an error.
+  static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
+
+  // Read in the entire module, and destroy the BitcodeReader.
+  if (M->MaterializeAllPermanently(ErrMsg)) {
+    delete M;
+    return 0;
+  }
+
+  return M;
+}
+
+std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
+                                         LLVMContext& Context,
+                                         std::string *ErrMsg) {
+  BitcodeReader *R = new BitcodeReader(Buffer, Context);
+  // Don't let the BitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(false);
+
+  std::string Triple("");
+  if (R->ParseTriple(Triple))
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+
+  delete R;
+  return Triple;
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
new file mode 100644
index 000000000000..1b3bf1a1854a
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
@@ -0,0 +1,278 @@
+//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITCODE_READER_H
+#define BITCODE_READER_H
+
+#include "llvm/GVMaterializer.h"
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+
+namespace llvm {
+  class MemoryBuffer;
+  class LLVMContext;
+  
+//===----------------------------------------------------------------------===//
+//                          BitcodeReaderValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderValueList {
+  std::vector<WeakVH> ValuePtrs;
+  
+  /// ResolveConstants - As we resolve forward-referenced constants, we add
+  /// information about them to this vector.  This allows us to resolve them in
+  /// bulk instead of resolving each reference at a time.  See the code in
+  /// ResolveConstantForwardRefs for more information about this.
+  ///
+  /// The key of this vector is the placeholder constant, the value is the slot
+  /// number that holds the resolved value.
+  typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+  ResolveConstantsTy ResolveConstants;
+  LLVMContext &Context;
+public:
+  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+  ~BitcodeReaderValueList() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+  }
+
+  // vector compatibility methods
+  unsigned size() const { return ValuePtrs.size(); }
+  void resize(unsigned N) { ValuePtrs.resize(N); }
+  void push_back(Value *V) {
+    ValuePtrs.push_back(V);
+  }
+  
+  void clear() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+    ValuePtrs.clear();
+  }
+  
+  Value *operator[](unsigned i) const {
+    assert(i < ValuePtrs.size());
+    return ValuePtrs[i];
+  }
+  
+  Value *back() const { return ValuePtrs.back(); }
+    void pop_back() { ValuePtrs.pop_back(); }
+  bool empty() const { return ValuePtrs.empty(); }
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    ValuePtrs.resize(N);
+  }
+  
+  Constant *getConstantFwdRef(unsigned Idx, const Type *Ty);
+  Value *getValueFwdRef(unsigned Idx, const Type *Ty);
+  
+  void AssignValue(Value *V, unsigned Idx);
+  
+  /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+  /// resolves any forward references.
+  void ResolveConstantForwardRefs();
+};
+
+
+//===----------------------------------------------------------------------===//
+//                          BitcodeReaderMDValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderMDValueList {
+  std::vector<WeakVH> MDValuePtrs;
+  
+  LLVMContext &Context;
+public:
+  BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
+
+  // vector compatibility methods
+  unsigned size() const       { return MDValuePtrs.size(); }
+  void resize(unsigned N)     { MDValuePtrs.resize(N); }
+  void push_back(Value *V)    { MDValuePtrs.push_back(V);  }
+  void clear()                { MDValuePtrs.clear();  }
+  Value *back() const         { return MDValuePtrs.back(); }
+  void pop_back()             { MDValuePtrs.pop_back(); }
+  bool empty() const          { return MDValuePtrs.empty(); }
+  
+  Value *operator[](unsigned i) const {
+    assert(i < MDValuePtrs.size());
+    return MDValuePtrs[i];
+  }
+  
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    MDValuePtrs.resize(N);
+  }
+
+  Value *getValueFwdRef(unsigned Idx);
+  void AssignValue(Value *V, unsigned Idx);
+};
+
+class BitcodeReader : public GVMaterializer {
+  LLVMContext &Context;
+  Module *TheModule;
+  MemoryBuffer *Buffer;
+  bool BufferOwned;
+  BitstreamReader StreamFile;
+  BitstreamCursor Stream;
+  
+  const char *ErrorString;
+  
+  std::vector<Type*> TypeList;
+  BitcodeReaderValueList ValueList;
+  BitcodeReaderMDValueList MDValueList;
+  SmallVector<Instruction *, 64> InstructionList;
+
+  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+  
+  /// MAttributes - The set of attributes by index.  Index zero in the
+  /// file is for null, and is thus not represented here.  As such all indices
+  /// are off by one.
+  std::vector<AttrListPtr> MAttributes;
+  
+  /// FunctionBBs - While parsing a function body, this is a list of the basic
+  /// blocks for the function.
+  std::vector<BasicBlock*> FunctionBBs;
+  
+  // When reading the module header, this list is populated with functions that
+  // have bodies later in the file.
+  std::vector<Function*> FunctionsWithBodies;
+
+  // When intrinsic functions are encountered which require upgrading they are 
+  // stored here with their replacement function.
+  typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+  UpgradedIntrinsicMap UpgradedIntrinsics;
+
+  // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+  DenseMap<unsigned, unsigned> MDKindMap;
+  
+  // After the module header has been read, the FunctionsWithBodies list is 
+  // reversed.  This keeps track of whether we've done this yet.
+  bool HasReversedFunctionsWithBodies;
+  
+  /// DeferredFunctionInfo - When function bodies are initially scanned, this
+  /// map contains info about where to find deferred function body in the
+  /// stream.
+  DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+  
+  /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
+  /// are resolved lazily when functions are loaded.
+  typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
+  DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+
+public:
+  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
+    : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
+      ErrorString(0), ValueList(C), MDValueList(C) {
+    HasReversedFunctionsWithBodies = false;
+  }
+  ~BitcodeReader() {
+    FreeState();
+  }
+  
+  void FreeState();
+  
+  /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
+  /// when the reader is destroyed.
+  void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+  
+  virtual bool isMaterializable(const GlobalValue *GV) const;
+  virtual bool isDematerializable(const GlobalValue *GV) const;
+  virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+  virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0);
+  virtual void Dematerialize(GlobalValue *GV);
+
+  bool Error(const char *Str) {
+    ErrorString = Str;
+    return true;
+  }
+  const char *getErrorString() const { return ErrorString; }
+  
+  /// @brief Main interface to parsing a bitcode buffer.
+  /// @returns true if an error occurred.
+  bool ParseBitcodeInto(Module *M);
+
+  /// @brief Cheap mechanism to just extract module triple
+  /// @returns true if an error occurred.
+  bool ParseTriple(std::string &Triple);
+private:
+  Type *getTypeByID(unsigned ID);
+  Type *getTypeByIDOrNull(unsigned ID);
+  Value *getFnValueByID(unsigned ID, const Type *Ty) {
+    if (Ty && Ty->isMetadataTy())
+      return MDValueList.getValueFwdRef(ID);
+    return ValueList.getValueFwdRef(ID, Ty);
+  }
+  BasicBlock *getBasicBlock(unsigned ID) const {
+    if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+    return FunctionBBs[ID];
+  }
+  AttrListPtr getAttributes(unsigned i) const {
+    if (i-1 < MAttributes.size())
+      return MAttributes[i-1];
+    return AttrListPtr();
+  }
+  
+  /// getValueTypePair - Read a value/type pair out of the specified record from
+  /// slot 'Slot'.  Increment Slot past the number of slots used in the record.
+  /// Return true on failure.
+  bool getValueTypePair(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+                        unsigned InstNum, Value *&ResVal) {
+    if (Slot == Record.size()) return true;
+    unsigned ValNo = (unsigned)Record[Slot++];
+    if (ValNo < InstNum) {
+      // If this is not a forward reference, just return the value we already
+      // have.
+      ResVal = getFnValueByID(ValNo, 0);
+      return ResVal == 0;
+    } else if (Slot == Record.size()) {
+      return true;
+    }
+    
+    unsigned TypeNo = (unsigned)Record[Slot++];
+    ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+    return ResVal == 0;
+  }
+  bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+                const Type *Ty, Value *&ResVal) {
+    if (Slot == Record.size()) return true;
+    unsigned ValNo = (unsigned)Record[Slot++];
+    ResVal = getFnValueByID(ValNo, Ty);
+    return ResVal == 0;
+  }
+
+  
+  bool ParseModule();
+  bool ParseAttributeBlock();
+  bool ParseTypeTable();
+  bool ParseOldTypeTable();         // FIXME: Remove in LLVM 3.1
+  bool ParseTypeTableBody();
+
+  bool ParseOldTypeSymbolTable();   // FIXME: Remove in LLVM 3.1
+  bool ParseValueSymbolTable();
+  bool ParseConstants();
+  bool RememberAndSkipFunctionBody();
+  bool ParseFunctionBody(Function *F);
+  bool ResolveGlobalAndAliasInits();
+  bool ParseMetadata();
+  bool ParseMetadataAttachment();
+  bool ParseModuleTriple(std::string &Triple);
+};
+  
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
new file mode 100644
index 000000000000..428842246331
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
@@ -0,0 +1,40 @@
+//===-- BitWriter.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
+  std::string ErrorInfo;
+  raw_fd_ostream OS(Path, ErrorInfo,
+                    raw_fd_ostream::F_Binary);
+  
+  if (!ErrorInfo.empty())
+    return -1;
+  
+  WriteBitcodeToFile(unwrap(M), OS);
+  return 0;
+}
+
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+                         int Unbuffered) {
+  raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
+  
+  WriteBitcodeToFile(unwrap(M), OS);
+  return 0;
+}
+
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
+  return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
new file mode 100644
index 000000000000..85d67ce62b9f
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -0,0 +1,1628 @@
+//===--- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "ValueEnumerator.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Program.h"
+#include <cctype>
+#include <map>
+using namespace llvm;
+
+/// These are manifest constants used by the bitcode writer. They do not need to
+/// be kept in sync with the reader, but need to be consistent within this file.
+enum {
+  CurVersion = 0,
+
+  // VALUE_SYMTAB_BLOCK abbrev id's.
+  VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  VST_ENTRY_7_ABBREV,
+  VST_ENTRY_6_ABBREV,
+  VST_BBENTRY_6_ABBREV,
+
+  // CONSTANTS_BLOCK abbrev id's.
+  CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  CONSTANTS_INTEGER_ABBREV,
+  CONSTANTS_CE_CAST_Abbrev,
+  CONSTANTS_NULL_Abbrev,
+
+  // FUNCTION_BLOCK abbrev id's.
+  FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  FUNCTION_INST_BINOP_ABBREV,
+  FUNCTION_INST_BINOP_FLAGS_ABBREV,
+  FUNCTION_INST_CAST_ABBREV,
+  FUNCTION_INST_RET_VOID_ABBREV,
+  FUNCTION_INST_RET_VAL_ABBREV,
+  FUNCTION_INST_UNREACHABLE_ABBREV
+};
+
+
+static unsigned GetEncodedCastOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown cast instruction!");
+  case Instruction::Trunc   : return bitc::CAST_TRUNC;
+  case Instruction::ZExt    : return bitc::CAST_ZEXT;
+  case Instruction::SExt    : return bitc::CAST_SEXT;
+  case Instruction::FPToUI  : return bitc::CAST_FPTOUI;
+  case Instruction::FPToSI  : return bitc::CAST_FPTOSI;
+  case Instruction::UIToFP  : return bitc::CAST_UITOFP;
+  case Instruction::SIToFP  : return bitc::CAST_SITOFP;
+  case Instruction::FPTrunc : return bitc::CAST_FPTRUNC;
+  case Instruction::FPExt   : return bitc::CAST_FPEXT;
+  case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
+  case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
+  case Instruction::BitCast : return bitc::CAST_BITCAST;
+  }
+}
+
+static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown binary instruction!");
+  case Instruction::Add:
+  case Instruction::FAdd: return bitc::BINOP_ADD;
+  case Instruction::Sub:
+  case Instruction::FSub: return bitc::BINOP_SUB;
+  case Instruction::Mul:
+  case Instruction::FMul: return bitc::BINOP_MUL;
+  case Instruction::UDiv: return bitc::BINOP_UDIV;
+  case Instruction::FDiv:
+  case Instruction::SDiv: return bitc::BINOP_SDIV;
+  case Instruction::URem: return bitc::BINOP_UREM;
+  case Instruction::FRem:
+  case Instruction::SRem: return bitc::BINOP_SREM;
+  case Instruction::Shl:  return bitc::BINOP_SHL;
+  case Instruction::LShr: return bitc::BINOP_LSHR;
+  case Instruction::AShr: return bitc::BINOP_ASHR;
+  case Instruction::And:  return bitc::BINOP_AND;
+  case Instruction::Or:   return bitc::BINOP_OR;
+  case Instruction::Xor:  return bitc::BINOP_XOR;
+  }
+}
+
+static void WriteStringRecord(unsigned Code, StringRef Str,
+                              unsigned AbbrevToUse, BitstreamWriter &Stream) {
+  SmallVector<unsigned, 64> Vals;
+
+  // Code: [strchar x N]
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
+      AbbrevToUse = 0;
+    Vals.push_back(Str[i]);
+  }
+
+  // Emit the finished record.
+  Stream.EmitRecord(Code, Vals, AbbrevToUse);
+}
+
+// Emit information about parameter attributes.
+static void WriteAttributeTable(const ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
+  const std::vector<AttrListPtr> &Attrs = VE.getAttributes();
+  if (Attrs.empty()) return;
+
+  Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
+
+  SmallVector<uint64_t, 64> Record;
+  for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+    const AttrListPtr &A = Attrs[i];
+    for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
+      const AttributeWithIndex &PAWI = A.getSlot(i);
+      Record.push_back(PAWI.Index);
+
+      // FIXME: remove in LLVM 3.0
+      // Store the alignment in the bitcode as a 16-bit raw value instead of a
+      // 5-bit log2 encoded value. Shift the bits above the alignment up by
+      // 11 bits.
+      uint64_t FauxAttr = PAWI.Attrs & 0xffff;
+      if (PAWI.Attrs & Attribute::Alignment)
+        FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16);
+      FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11;
+
+      Record.push_back(FauxAttr);
+    }
+
+    Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+  const ValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+  Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
+  SmallVector<uint64_t, 64> TypeVals;
+
+  // Abbrev for TYPE_CODE_POINTER.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(0));  // Addrspace = 0
+  unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_FUNCTION.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // isvararg
+  Abbv->Add(BitCodeAbbrevOp(0));  // FIXME: DEAD value, remove in LLVM 3.0
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_STRUCT_ANON.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_STRUCT_NAME.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+  unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_STRUCT_NAMED.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+
+  
+  // Abbrev for TYPE_CODE_ARRAY.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // size
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Emit an entry count so the reader can reserve space.
+  TypeVals.push_back(TypeList.size());
+  Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
+  TypeVals.clear();
+
+  // Loop over all of the types, emitting each in turn.
+  for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+    const Type *T = TypeList[i];
+    int AbbrevToUse = 0;
+    unsigned Code = 0;
+
+    switch (T->getTypeID()) {
+    default: llvm_unreachable("Unknown type!");
+    case Type::VoidTyID:      Code = bitc::TYPE_CODE_VOID;   break;
+    case Type::FloatTyID:     Code = bitc::TYPE_CODE_FLOAT;  break;
+    case Type::DoubleTyID:    Code = bitc::TYPE_CODE_DOUBLE; break;
+    case Type::X86_FP80TyID:  Code = bitc::TYPE_CODE_X86_FP80; break;
+    case Type::FP128TyID:     Code = bitc::TYPE_CODE_FP128; break;
+    case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
+    case Type::LabelTyID:     Code = bitc::TYPE_CODE_LABEL;  break;
+    case Type::MetadataTyID:  Code = bitc::TYPE_CODE_METADATA; break;
+    case Type::X86_MMXTyID:   Code = bitc::TYPE_CODE_X86_MMX; break;
+    case Type::IntegerTyID:
+      // INTEGER: [width]
+      Code = bitc::TYPE_CODE_INTEGER;
+      TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+      break;
+    case Type::PointerTyID: {
+      const PointerType *PTy = cast<PointerType>(T);
+      // POINTER: [pointee type, address space]
+      Code = bitc::TYPE_CODE_POINTER;
+      TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+      unsigned AddressSpace = PTy->getAddressSpace();
+      TypeVals.push_back(AddressSpace);
+      if (AddressSpace == 0) AbbrevToUse = PtrAbbrev;
+      break;
+    }
+    case Type::FunctionTyID: {
+      const FunctionType *FT = cast<FunctionType>(T);
+      // FUNCTION: [isvararg, attrid, retty, paramty x N]
+      Code = bitc::TYPE_CODE_FUNCTION;
+      TypeVals.push_back(FT->isVarArg());
+      TypeVals.push_back(0);  // FIXME: DEAD: remove in llvm 3.0
+      TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
+      for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+        TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
+      AbbrevToUse = FunctionAbbrev;
+      break;
+    }
+    case Type::StructTyID: {
+      const StructType *ST = cast<StructType>(T);
+      // STRUCT: [ispacked, eltty x N]
+      TypeVals.push_back(ST->isPacked());
+      // Output all of the element types.
+      for (StructType::element_iterator I = ST->element_begin(),
+           E = ST->element_end(); I != E; ++I)
+        TypeVals.push_back(VE.getTypeID(*I));
+      
+      if (ST->isAnonymous()) {
+        Code = bitc::TYPE_CODE_STRUCT_ANON;
+        AbbrevToUse = StructAnonAbbrev;
+      } else {
+        if (ST->isOpaque()) {
+          Code = bitc::TYPE_CODE_OPAQUE;
+        } else {
+          Code = bitc::TYPE_CODE_STRUCT_NAMED;
+          AbbrevToUse = StructNamedAbbrev;
+        }
+
+        // Emit the name if it is present.
+        if (!ST->getName().empty())
+          WriteStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+                            StructNameAbbrev, Stream);
+      }
+      break;
+    }
+    case Type::ArrayTyID: {
+      const ArrayType *AT = cast<ArrayType>(T);
+      // ARRAY: [numelts, eltty]
+      Code = bitc::TYPE_CODE_ARRAY;
+      TypeVals.push_back(AT->getNumElements());
+      TypeVals.push_back(VE.getTypeID(AT->getElementType()));
+      AbbrevToUse = ArrayAbbrev;
+      break;
+    }
+    case Type::VectorTyID: {
+      const VectorType *VT = cast<VectorType>(T);
+      // VECTOR [numelts, eltty]
+      Code = bitc::TYPE_CODE_VECTOR;
+      TypeVals.push_back(VT->getNumElements());
+      TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+      break;
+    }
+    }
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+    TypeVals.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+static unsigned getEncodedLinkage(const GlobalValue *GV) {
+  switch (GV->getLinkage()) {
+  default: llvm_unreachable("Invalid linkage!");
+  case GlobalValue::ExternalLinkage:                 return 0;
+  case GlobalValue::WeakAnyLinkage:                  return 1;
+  case GlobalValue::AppendingLinkage:                return 2;
+  case GlobalValue::InternalLinkage:                 return 3;
+  case GlobalValue::LinkOnceAnyLinkage:              return 4;
+  case GlobalValue::DLLImportLinkage:                return 5;
+  case GlobalValue::DLLExportLinkage:                return 6;
+  case GlobalValue::ExternalWeakLinkage:             return 7;
+  case GlobalValue::CommonLinkage:                   return 8;
+  case GlobalValue::PrivateLinkage:                  return 9;
+  case GlobalValue::WeakODRLinkage:                  return 10;
+  case GlobalValue::LinkOnceODRLinkage:              return 11;
+  case GlobalValue::AvailableExternallyLinkage:      return 12;
+  case GlobalValue::LinkerPrivateLinkage:            return 13;
+  case GlobalValue::LinkerPrivateWeakLinkage:        return 14;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
+  }
+}
+
+static unsigned getEncodedVisibility(const GlobalValue *GV) {
+  switch (GV->getVisibility()) {
+  default: llvm_unreachable("Invalid visibility!");
+  case GlobalValue::DefaultVisibility:   return 0;
+  case GlobalValue::HiddenVisibility:    return 1;
+  case GlobalValue::ProtectedVisibility: return 2;
+  }
+}
+
+// Emit top-level description of module, including target triple, inline asm,
+// descriptors for global variables, and function prototype info.
+static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+                            BitstreamWriter &Stream) {
+  // Emit the list of dependent libraries for the Module.
+  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+    WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream);
+
+  // Emit various pieces of data attached to a module.
+  if (!M->getTargetTriple().empty())
+    WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
+                      0/*TODO*/, Stream);
+  if (!M->getDataLayout().empty())
+    WriteStringRecord(bitc::MODULE_CODE_DATALAYOUT, M->getDataLayout(),
+                      0/*TODO*/, Stream);
+  if (!M->getModuleInlineAsm().empty())
+    WriteStringRecord(bitc::MODULE_CODE_ASM, M->getModuleInlineAsm(),
+                      0/*TODO*/, Stream);
+
+  // Emit information about sections and GC, computing how many there are. Also
+  // compute the maximum alignment value.
+  std::map<std::string, unsigned> SectionMap;
+  std::map<std::string, unsigned> GCMap;
+  unsigned MaxAlignment = 0;
+  unsigned MaxGlobalType = 0;
+  for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+       GV != E; ++GV) {
+    MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
+    MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
+
+    if (!GV->hasSection()) continue;
+    // Give section names unique ID's.
+    unsigned &Entry = SectionMap[GV->getSection()];
+    if (Entry != 0) continue;
+    WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+                      0/*TODO*/, Stream);
+    Entry = SectionMap.size();
+  }
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+    MaxAlignment = std::max(MaxAlignment, F->getAlignment());
+    if (F->hasSection()) {
+      // Give section names unique ID's.
+      unsigned &Entry = SectionMap[F->getSection()];
+      if (!Entry) {
+        WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(),
+                          0/*TODO*/, Stream);
+        Entry = SectionMap.size();
+      }
+    }
+    if (F->hasGC()) {
+      // Same for GC names.
+      unsigned &Entry = GCMap[F->getGC()];
+      if (!Entry) {
+        WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(),
+                          0/*TODO*/, Stream);
+        Entry = GCMap.size();
+      }
+    }
+  }
+
+  // Emit abbrev for globals, now that we know # sections and max alignment.
+  unsigned SimpleGVarAbbrev = 0;
+  if (!M->global_empty()) {
+    // Add an abbrev for common globals with no visibility or thread localness.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                              Log2_32_Ceil(MaxGlobalType+1)));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));      // Constant.
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));        // Initializer.
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));      // Linkage.
+    if (MaxAlignment == 0)                                      // Alignment.
+      Abbv->Add(BitCodeAbbrevOp(0));
+    else {
+      unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
+      Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                               Log2_32_Ceil(MaxEncAlignment+1)));
+    }
+    if (SectionMap.empty())                                    // Section.
+      Abbv->Add(BitCodeAbbrevOp(0));
+    else
+      Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                               Log2_32_Ceil(SectionMap.size()+1)));
+    // Don't bother emitting vis + thread local.
+    SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+  }
+
+  // Emit the global variable information.
+  SmallVector<unsigned, 64> Vals;
+  for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+       GV != E; ++GV) {
+    unsigned AbbrevToUse = 0;
+
+    // GLOBALVAR: [type, isconst, initid,
+    //             linkage, alignment, section, visibility, threadlocal,
+    //             unnamed_addr]
+    Vals.push_back(VE.getTypeID(GV->getType()));
+    Vals.push_back(GV->isConstant());
+    Vals.push_back(GV->isDeclaration() ? 0 :
+                   (VE.getValueID(GV->getInitializer()) + 1));
+    Vals.push_back(getEncodedLinkage(GV));
+    Vals.push_back(Log2_32(GV->getAlignment())+1);
+    Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
+    if (GV->isThreadLocal() ||
+        GV->getVisibility() != GlobalValue::DefaultVisibility ||
+        GV->hasUnnamedAddr()) {
+      Vals.push_back(getEncodedVisibility(GV));
+      Vals.push_back(GV->isThreadLocal());
+      Vals.push_back(GV->hasUnnamedAddr());
+    } else {
+      AbbrevToUse = SimpleGVarAbbrev;
+    }
+
+    Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+
+  // Emit the function proto information.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+    // FUNCTION:  [type, callingconv, isproto, paramattr,
+    //             linkage, alignment, section, visibility, gc, unnamed_addr]
+    Vals.push_back(VE.getTypeID(F->getType()));
+    Vals.push_back(F->getCallingConv());
+    Vals.push_back(F->isDeclaration());
+    Vals.push_back(getEncodedLinkage(F));
+    Vals.push_back(VE.getAttributeID(F->getAttributes()));
+    Vals.push_back(Log2_32(F->getAlignment())+1);
+    Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
+    Vals.push_back(getEncodedVisibility(F));
+    Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
+    Vals.push_back(F->hasUnnamedAddr());
+
+    unsigned AbbrevToUse = 0;
+    Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+
+  // Emit the alias information.
+  for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
+       AI != E; ++AI) {
+    Vals.push_back(VE.getTypeID(AI->getType()));
+    Vals.push_back(VE.getValueID(AI->getAliasee()));
+    Vals.push_back(getEncodedLinkage(AI));
+    Vals.push_back(getEncodedVisibility(AI));
+    unsigned AbbrevToUse = 0;
+    Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+}
+
+static uint64_t GetOptimizationFlags(const Value *V) {
+  uint64_t Flags = 0;
+
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(V)) {
+    if (OBO->hasNoSignedWrap())
+      Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
+    if (OBO->hasNoUnsignedWrap())
+      Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
+  } else if (const PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(V)) {
+    if (PEO->isExact())
+      Flags |= 1 << bitc::PEO_EXACT;
+  }
+
+  return Flags;
+}
+
+static void WriteMDNode(const MDNode *N,
+                        const ValueEnumerator &VE,
+                        BitstreamWriter &Stream,
+                        SmallVector<uint64_t, 64> &Record) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (N->getOperand(i)) {
+      Record.push_back(VE.getTypeID(N->getOperand(i)->getType()));
+      Record.push_back(VE.getValueID(N->getOperand(i)));
+    } else {
+      Record.push_back(VE.getTypeID(Type::getVoidTy(N->getContext())));
+      Record.push_back(0);
+    }
+  }
+  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
+                                           bitc::METADATA_NODE;
+  Stream.EmitRecord(MDCode, Record, 0);
+  Record.clear();
+}
+
+static void WriteModuleMetadata(const Module *M,
+                                const ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
+  const ValueEnumerator::ValueList &Vals = VE.getMDValues();
+  bool StartedMetadataBlock = false;
+  unsigned MDSAbbrev = 0;
+  SmallVector<uint64_t, 64> Record;
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+
+    if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
+      if (!N->isFunctionLocal() || !N->getFunction()) {
+        if (!StartedMetadataBlock) {
+          Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+          StartedMetadataBlock = true;
+        }
+        WriteMDNode(N, VE, Stream, Record);
+      }
+    } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) {
+      if (!StartedMetadataBlock)  {
+        Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+        // Abbrev for METADATA_STRING.
+        BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+        Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING));
+        Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+        Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+        MDSAbbrev = Stream.EmitAbbrev(Abbv);
+        StartedMetadataBlock = true;
+      }
+
+      // Code: [strchar x N]
+      Record.append(MDS->begin(), MDS->end());
+
+      // Emit the finished record.
+      Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
+      Record.clear();
+    }
+  }
+
+  // Write named metadata.
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+       E = M->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    if (!StartedMetadataBlock)  {
+      Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+      StartedMetadataBlock = true;
+    }
+
+    // Write name.
+    StringRef Str = NMD->getName();
+    for (unsigned i = 0, e = Str.size(); i != e; ++i)
+      Record.push_back(Str[i]);
+    Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
+    Record.clear();
+
+    // Write named metadata operands.
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      Record.push_back(VE.getValueID(NMD->getOperand(i)));
+    Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
+    Record.clear();
+  }
+
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
+
+static void WriteFunctionLocalMetadata(const Function &F,
+                                       const ValueEnumerator &VE,
+                                       BitstreamWriter &Stream) {
+  bool StartedMetadataBlock = false;
+  SmallVector<uint64_t, 64> Record;
+  const SmallVector<const MDNode *, 8> &Vals = VE.getFunctionLocalMDValues();
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+    if (const MDNode *N = Vals[i])
+      if (N->isFunctionLocal() && N->getFunction() == &F) {
+        if (!StartedMetadataBlock) {
+          Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+          StartedMetadataBlock = true;
+        }
+        WriteMDNode(N, VE, Stream, Record);
+      }
+      
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
+
+static void WriteMetadataAttachment(const Function &F,
+                                    const ValueEnumerator &VE,
+                                    BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Write metadata attachments
+  // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
+  SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
+  
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      MDs.clear();
+      I->getAllMetadataOtherThanDebugLoc(MDs);
+      
+      // If no metadata, ignore instruction.
+      if (MDs.empty()) continue;
+
+      Record.push_back(VE.getInstructionID(I));
+      
+      for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+        Record.push_back(MDs[i].first);
+        Record.push_back(VE.getValueID(MDs[i].second));
+      }
+      Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+      Record.clear();
+    }
+
+  Stream.ExitBlock();
+}
+
+static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
+  SmallVector<uint64_t, 64> Record;
+
+  // Write metadata kinds
+  // METADATA_KIND - [n x [id, name]]
+  SmallVector<StringRef, 4> Names;
+  M->getMDKindNames(Names);
+  
+  if (Names.empty()) return;
+
+  Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+  
+  for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
+    Record.push_back(MDKindID);
+    StringRef KName = Names[MDKindID];
+    Record.append(KName.begin(), KName.end());
+    
+    Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+static void WriteConstants(unsigned FirstVal, unsigned LastVal,
+                           const ValueEnumerator &VE,
+                           BitstreamWriter &Stream, bool isGlobal) {
+  if (FirstVal == LastVal) return;
+
+  Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
+
+  unsigned AggregateAbbrev = 0;
+  unsigned String8Abbrev = 0;
+  unsigned CString7Abbrev = 0;
+  unsigned CString6Abbrev = 0;
+  // If this is a constant pool for the module, emit module-specific abbrevs.
+  if (isGlobal) {
+    // Abbrev for CST_CODE_AGGREGATE.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
+    AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+
+    // Abbrev for CST_CODE_STRING.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+    String8Abbrev = Stream.EmitAbbrev(Abbv);
+    // Abbrev for CST_CODE_CSTRING.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+    CString7Abbrev = Stream.EmitAbbrev(Abbv);
+    // Abbrev for CST_CODE_CSTRING.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+    CString6Abbrev = Stream.EmitAbbrev(Abbv);
+  }
+
+  SmallVector<uint64_t, 64> Record;
+
+  const ValueEnumerator::ValueList &Vals = VE.getValues();
+  const Type *LastTy = 0;
+  for (unsigned i = FirstVal; i != LastVal; ++i) {
+    const Value *V = Vals[i].first;
+    // If we need to switch types, do so now.
+    if (V->getType() != LastTy) {
+      LastTy = V->getType();
+      Record.push_back(VE.getTypeID(LastTy));
+      Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record,
+                        CONSTANTS_SETTYPE_ABBREV);
+      Record.clear();
+    }
+
+    if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+      Record.push_back(unsigned(IA->hasSideEffects()) |
+                       unsigned(IA->isAlignStack()) << 1);
+
+      // Add the asm string.
+      const std::string &AsmStr = IA->getAsmString();
+      Record.push_back(AsmStr.size());
+      for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
+        Record.push_back(AsmStr[i]);
+
+      // Add the constraint string.
+      const std::string &ConstraintStr = IA->getConstraintString();
+      Record.push_back(ConstraintStr.size());
+      for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i)
+        Record.push_back(ConstraintStr[i]);
+      Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
+      Record.clear();
+      continue;
+    }
+    const Constant *C = cast<Constant>(V);
+    unsigned Code = -1U;
+    unsigned AbbrevToUse = 0;
+    if (C->isNullValue()) {
+      Code = bitc::CST_CODE_NULL;
+    } else if (isa<UndefValue>(C)) {
+      Code = bitc::CST_CODE_UNDEF;
+    } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+      if (IV->getBitWidth() <= 64) {
+        uint64_t V = IV->getSExtValue();
+        if ((int64_t)V >= 0)
+          Record.push_back(V << 1);
+        else
+          Record.push_back((-V << 1) | 1);
+        Code = bitc::CST_CODE_INTEGER;
+        AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+      } else {                             // Wide integers, > 64 bits in size.
+        // We have an arbitrary precision integer value to write whose
+        // bit width is > 64. However, in canonical unsigned integer
+        // format it is likely that the high bits are going to be zero.
+        // So, we only write the number of active words.
+        unsigned NWords = IV->getValue().getActiveWords();
+        const uint64_t *RawWords = IV->getValue().getRawData();
+        for (unsigned i = 0; i != NWords; ++i) {
+          int64_t V = RawWords[i];
+          if (V >= 0)
+            Record.push_back(V << 1);
+          else
+            Record.push_back((-V << 1) | 1);
+        }
+        Code = bitc::CST_CODE_WIDE_INTEGER;
+      }
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      Code = bitc::CST_CODE_FLOAT;
+      const Type *Ty = CFP->getType();
+      if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+        Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      } else if (Ty->isX86_FP80Ty()) {
+        // api needed to prevent premature destruction
+        // bits are not in the same order as a normal i80 APInt, compensate.
+        APInt api = CFP->getValueAPF().bitcastToAPInt();
+        const uint64_t *p = api.getRawData();
+        Record.push_back((p[1] << 48) | (p[0] >> 16));
+        Record.push_back(p[0] & 0xffffLL);
+      } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
+        APInt api = CFP->getValueAPF().bitcastToAPInt();
+        const uint64_t *p = api.getRawData();
+        Record.push_back(p[0]);
+        Record.push_back(p[1]);
+      } else {
+        assert (0 && "Unknown FP type!");
+      }
+    } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+      const ConstantArray *CA = cast<ConstantArray>(C);
+      // Emit constant strings specially.
+      unsigned NumOps = CA->getNumOperands();
+      // If this is a null-terminated string, use the denser CSTRING encoding.
+      if (CA->getOperand(NumOps-1)->isNullValue()) {
+        Code = bitc::CST_CODE_CSTRING;
+        --NumOps;  // Don't encode the null, which isn't allowed by char6.
+      } else {
+        Code = bitc::CST_CODE_STRING;
+        AbbrevToUse = String8Abbrev;
+      }
+      bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
+      bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
+      for (unsigned i = 0; i != NumOps; ++i) {
+        unsigned char V = cast<ConstantInt>(CA->getOperand(i))->getZExtValue();
+        Record.push_back(V);
+        isCStr7 &= (V & 128) == 0;
+        if (isCStrChar6)
+          isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
+      }
+
+      if (isCStrChar6)
+        AbbrevToUse = CString6Abbrev;
+      else if (isCStr7)
+        AbbrevToUse = CString7Abbrev;
+    } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
+               isa<ConstantVector>(V)) {
+      Code = bitc::CST_CODE_AGGREGATE;
+      for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+        Record.push_back(VE.getValueID(C->getOperand(i)));
+      AbbrevToUse = AggregateAbbrev;
+    } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      switch (CE->getOpcode()) {
+      default:
+        if (Instruction::isCast(CE->getOpcode())) {
+          Code = bitc::CST_CODE_CE_CAST;
+          Record.push_back(GetEncodedCastOpcode(CE->getOpcode()));
+          Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+          Record.push_back(VE.getValueID(C->getOperand(0)));
+          AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
+        } else {
+          assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
+          Code = bitc::CST_CODE_CE_BINOP;
+          Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode()));
+          Record.push_back(VE.getValueID(C->getOperand(0)));
+          Record.push_back(VE.getValueID(C->getOperand(1)));
+          uint64_t Flags = GetOptimizationFlags(CE);
+          if (Flags != 0)
+            Record.push_back(Flags);
+        }
+        break;
+      case Instruction::GetElementPtr:
+        Code = bitc::CST_CODE_CE_GEP;
+        if (cast<GEPOperator>(C)->isInBounds())
+          Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
+        for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+          Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
+          Record.push_back(VE.getValueID(C->getOperand(i)));
+        }
+        break;
+      case Instruction::Select:
+        Code = bitc::CST_CODE_CE_SELECT;
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ExtractElement:
+        Code = bitc::CST_CODE_CE_EXTRACTELT;
+        Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        break;
+      case Instruction::InsertElement:
+        Code = bitc::CST_CODE_CE_INSERTELT;
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ShuffleVector:
+        // If the return type and argument types are the same, this is a
+        // standard shufflevector instruction.  If the types are different,
+        // then the shuffle is widening or truncating the input vectors, and
+        // the argument type must also be encoded.
+        if (C->getType() == C->getOperand(0)->getType()) {
+          Code = bitc::CST_CODE_CE_SHUFFLEVEC;
+        } else {
+          Code = bitc::CST_CODE_CE_SHUFVEC_EX;
+          Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        }
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        Code = bitc::CST_CODE_CE_CMP;
+        Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(CE->getPredicate());
+        break;
+      }
+    } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+      Code = bitc::CST_CODE_BLOCKADDRESS;
+      Record.push_back(VE.getTypeID(BA->getFunction()->getType()));
+      Record.push_back(VE.getValueID(BA->getFunction()));
+      Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
+    } else {
+#ifndef NDEBUG
+      C->dump();
+#endif
+      llvm_unreachable("Unknown constant!");
+    }
+    Stream.EmitRecord(Code, Record, AbbrevToUse);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+static void WriteModuleConstants(const ValueEnumerator &VE,
+                                 BitstreamWriter &Stream) {
+  const ValueEnumerator::ValueList &Vals = VE.getValues();
+
+  // Find the first constant to emit, which is the first non-globalvalue value.
+  // We know globalvalues have been emitted by WriteModuleInfo.
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+    if (!isa<GlobalValue>(Vals[i].first)) {
+      WriteConstants(i, Vals.size(), VE, Stream, true);
+      return;
+    }
+  }
+}
+
+/// PushValueAndType - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references.  However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals.  If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID.
+static bool PushValueAndType(const Value *V, unsigned InstID,
+                             SmallVector<unsigned, 64> &Vals,
+                             ValueEnumerator &VE) {
+  unsigned ValID = VE.getValueID(V);
+  Vals.push_back(ValID);
+  if (ValID >= InstID) {
+    Vals.push_back(VE.getTypeID(V->getType()));
+    return true;
+  }
+  return false;
+}
+
+/// WriteInstruction - Emit an instruction to the specified stream.
+static void WriteInstruction(const Instruction &I, unsigned InstID,
+                             ValueEnumerator &VE, BitstreamWriter &Stream,
+                             SmallVector<unsigned, 64> &Vals) {
+  unsigned Code = 0;
+  unsigned AbbrevToUse = 0;
+  VE.setInstructionID(&I);
+  switch (I.getOpcode()) {
+  default:
+    if (Instruction::isCast(I.getOpcode())) {
+      Code = bitc::FUNC_CODE_INST_CAST;
+      if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+        AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
+      Vals.push_back(VE.getTypeID(I.getType()));
+      Vals.push_back(GetEncodedCastOpcode(I.getOpcode()));
+    } else {
+      assert(isa<BinaryOperator>(I) && "Unknown instruction!");
+      Code = bitc::FUNC_CODE_INST_BINOP;
+      if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+        AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
+      Vals.push_back(VE.getValueID(I.getOperand(1)));
+      Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
+      uint64_t Flags = GetOptimizationFlags(&I);
+      if (Flags != 0) {
+        if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV)
+          AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV;
+        Vals.push_back(Flags);
+      }
+    }
+    break;
+
+  case Instruction::GetElementPtr:
+    Code = bitc::FUNC_CODE_INST_GEP;
+    if (cast<GEPOperator>(&I)->isInBounds())
+      Code = bitc::FUNC_CODE_INST_INBOUNDS_GEP;
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+    break;
+  case Instruction::ExtractValue: {
+    Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
+    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+      Vals.push_back(*i);
+    break;
+  }
+  case Instruction::InsertValue: {
+    Code = bitc::FUNC_CODE_INST_INSERTVAL;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+    const InsertValueInst *IVI = cast<InsertValueInst>(&I);
+    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+      Vals.push_back(*i);
+    break;
+  }
+  case Instruction::Select:
+    Code = bitc::FUNC_CODE_INST_VSELECT;
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    break;
+  case Instruction::ExtractElement:
+    Code = bitc::FUNC_CODE_INST_EXTRACTELT;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    break;
+  case Instruction::InsertElement:
+    Code = bitc::FUNC_CODE_INST_INSERTELT;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    break;
+  case Instruction::ShuffleVector:
+    Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    break;
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    // compare returning Int1Ty or vector of Int1Ty
+    Code = bitc::FUNC_CODE_INST_CMP2;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    Vals.push_back(cast<CmpInst>(I).getPredicate());
+    break;
+
+  case Instruction::Ret:
+    {
+      Code = bitc::FUNC_CODE_INST_RET;
+      unsigned NumOperands = I.getNumOperands();
+      if (NumOperands == 0)
+        AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
+      else if (NumOperands == 1) {
+        if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+          AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
+      } else {
+        for (unsigned i = 0, e = NumOperands; i != e; ++i)
+          PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+      }
+    }
+    break;
+  case Instruction::Br:
+    {
+      Code = bitc::FUNC_CODE_INST_BR;
+      BranchInst &II = cast<BranchInst>(I);
+      Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+      if (II.isConditional()) {
+        Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+        Vals.push_back(VE.getValueID(II.getCondition()));
+      }
+    }
+    break;
+  case Instruction::Switch:
+    Code = bitc::FUNC_CODE_INST_SWITCH;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    break;
+  case Instruction::IndirectBr:
+    Code = bitc::FUNC_CODE_INST_INDIRECTBR;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    break;
+      
+  case Instruction::Invoke: {
+    const InvokeInst *II = cast<InvokeInst>(&I);
+    const Value *Callee(II->getCalledValue());
+    const PointerType *PTy = cast<PointerType>(Callee->getType());
+    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Code = bitc::FUNC_CODE_INST_INVOKE;
+
+    Vals.push_back(VE.getAttributeID(II->getAttributes()));
+    Vals.push_back(II->getCallingConv());
+    Vals.push_back(VE.getValueID(II->getNormalDest()));
+    Vals.push_back(VE.getValueID(II->getUnwindDest()));
+    PushValueAndType(Callee, InstID, Vals, VE);
+
+    // Emit value #'s for the fixed parameters.
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));  // fixed param.
+
+    // Emit type/value pairs for varargs params.
+    if (FTy->isVarArg()) {
+      for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3;
+           i != e; ++i)
+        PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg
+    }
+    break;
+  }
+  case Instruction::Unwind:
+    Code = bitc::FUNC_CODE_INST_UNWIND;
+    break;
+  case Instruction::Unreachable:
+    Code = bitc::FUNC_CODE_INST_UNREACHABLE;
+    AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
+    break;
+
+  case Instruction::PHI: {
+    const PHINode &PN = cast<PHINode>(I);
+    Code = bitc::FUNC_CODE_INST_PHI;
+    Vals.push_back(VE.getTypeID(PN.getType()));
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+      Vals.push_back(VE.getValueID(PN.getIncomingValue(i)));
+      Vals.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+    }
+    break;
+  }
+
+  case Instruction::Alloca:
+    Code = bitc::FUNC_CODE_INST_ALLOCA;
+    Vals.push_back(VE.getTypeID(I.getType()));
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
+    Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+    break;
+
+  case Instruction::Load:
+    Code = bitc::FUNC_CODE_INST_LOAD;
+    if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))  // ptr
+      AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+
+    Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+    Vals.push_back(cast<LoadInst>(I).isVolatile());
+    break;
+  case Instruction::Store:
+    Code = bitc::FUNC_CODE_INST_STORE;
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);  // ptrty + ptr
+    Vals.push_back(VE.getValueID(I.getOperand(0)));       // val.
+    Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+    Vals.push_back(cast<StoreInst>(I).isVolatile());
+    break;
+  case Instruction::Call: {
+    const CallInst &CI = cast<CallInst>(I);
+    const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
+    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+    Code = bitc::FUNC_CODE_INST_CALL;
+
+    Vals.push_back(VE.getAttributeID(CI.getAttributes()));
+    Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
+    PushValueAndType(CI.getCalledValue(), InstID, Vals, VE);  // Callee
+
+    // Emit value #'s for the fixed parameters.
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+      Vals.push_back(VE.getValueID(CI.getArgOperand(i)));  // fixed param.
+
+    // Emit type/value pairs for varargs params.
+    if (FTy->isVarArg()) {
+      for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
+           i != e; ++i)
+        PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE);  // varargs
+    }
+    break;
+  }
+  case Instruction::VAArg:
+    Code = bitc::FUNC_CODE_INST_VAARG;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));   // valistty
+    Vals.push_back(VE.getValueID(I.getOperand(0))); // valist.
+    Vals.push_back(VE.getTypeID(I.getType())); // restype.
+    break;
+  }
+
+  Stream.EmitRecord(Code, Vals, AbbrevToUse);
+  Vals.clear();
+}
+
+// Emit names for globals/functions etc.
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+                                  const ValueEnumerator &VE,
+                                  BitstreamWriter &Stream) {
+  if (VST.empty()) return;
+  Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+  // FIXME: Set up the abbrev, we know how many values there are!
+  // FIXME: We know if the type names can use 7-bit ascii.
+  SmallVector<unsigned, 64> NameVals;
+
+  for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
+       SI != SE; ++SI) {
+
+    const ValueName &Name = *SI;
+
+    // Figure out the encoding to use for the name.
+    bool is7Bit = true;
+    bool isChar6 = true;
+    for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
+         C != E; ++C) {
+      if (isChar6)
+        isChar6 = BitCodeAbbrevOp::isChar6(*C);
+      if ((unsigned char)*C & 128) {
+        is7Bit = false;
+        break;  // don't bother scanning the rest.
+      }
+    }
+
+    unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+    // VST_ENTRY:   [valueid, namechar x N]
+    // VST_BBENTRY: [bbid, namechar x N]
+    unsigned Code;
+    if (isa<BasicBlock>(SI->getValue())) {
+      Code = bitc::VST_CODE_BBENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_BBENTRY_6_ABBREV;
+    } else {
+      Code = bitc::VST_CODE_ENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_ENTRY_6_ABBREV;
+      else if (is7Bit)
+        AbbrevToUse = VST_ENTRY_7_ABBREV;
+    }
+
+    NameVals.push_back(VE.getValueID(SI->getValue()));
+    for (const char *P = Name.getKeyData(),
+         *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
+      NameVals.push_back((unsigned char)*P);
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+    NameVals.clear();
+  }
+  Stream.ExitBlock();
+}
+
+/// WriteFunction - Emit a function body to the module stream.
+static void WriteFunction(const Function &F, ValueEnumerator &VE,
+                          BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
+  VE.incorporateFunction(F);
+
+  SmallVector<unsigned, 64> Vals;
+
+  // Emit the number of basic blocks, so the reader can create them ahead of
+  // time.
+  Vals.push_back(VE.getBasicBlocks().size());
+  Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+  Vals.clear();
+
+  // If there are function-local constants, emit them now.
+  unsigned CstStart, CstEnd;
+  VE.getFunctionConstantRange(CstStart, CstEnd);
+  WriteConstants(CstStart, CstEnd, VE, Stream, false);
+
+  // If there is function-local metadata, emit it now.
+  WriteFunctionLocalMetadata(F, VE, Stream);
+
+  // Keep a running idea of what the instruction ID is.
+  unsigned InstID = CstEnd;
+
+  bool NeedsMetadataAttachment = false;
+  
+  DebugLoc LastDL;
+  
+  // Finally, emit all the instructions, in order.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      WriteInstruction(*I, InstID, VE, Stream, Vals);
+      
+      if (!I->getType()->isVoidTy())
+        ++InstID;
+      
+      // If the instruction has metadata, write a metadata attachment later.
+      NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+      
+      // If the instruction has a debug location, emit it.
+      DebugLoc DL = I->getDebugLoc();
+      if (DL.isUnknown()) {
+        // nothing todo.
+      } else if (DL == LastDL) {
+        // Just repeat the same debug loc as last time.
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
+      } else {
+        MDNode *Scope, *IA;
+        DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
+        
+        Vals.push_back(DL.getLine());
+        Vals.push_back(DL.getCol());
+        Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
+        Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+        Vals.clear();
+        
+        LastDL = DL;
+      }
+    }
+
+  // Emit names for all the instructions etc.
+  WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
+
+  if (NeedsMetadataAttachment)
+    WriteMetadataAttachment(F, VE, Stream);
+  VE.purgeFunction();
+  Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+  // We only want to emit block info records for blocks that have multiple
+  // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.  Other
+  // blocks can defined their abbrevs inline.
+  Stream.EnterBlockInfoBlock(2);
+
+  { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_8_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // 7-bit fixed width VST_ENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_7_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_ENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_BBENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_BBENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+
+
+  { // SETTYPE abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                              Log2_32_Ceil(VE.getTypes().size()+1)));
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_SETTYPE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INTEGER abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_INTEGER_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // CE_CAST abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // cast opc
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // typeid
+                              Log2_32_Ceil(VE.getTypes().size()+1)));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));    // value id
+
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_CE_CAST_Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // NULL abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_NULL_Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  // FIXME: This should only use space for first class types!
+
+  { // INST_LOAD abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_LOAD_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_CAST abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));    // OpVal
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // dest ty
+                              Log2_32_Ceil(VE.getTypes().size()+1)));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // opc
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_CAST_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  Stream.ExitBlock();
+}
+
+
+/// WriteModule - Emit the specified module to the bitstream.
+static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+  // Emit the version number if it is non-zero.
+  if (CurVersion) {
+    SmallVector<unsigned, 1> Vals;
+    Vals.push_back(CurVersion);
+    Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+  }
+
+  // Analyze the module, enumerating globals, functions, etc.
+  ValueEnumerator VE(M);
+
+  // Emit blockinfo, which defines the standard abbreviations etc.
+  WriteBlockInfo(VE, Stream);
+
+  // Emit information about parameter attributes.
+  WriteAttributeTable(VE, Stream);
+
+  // Emit information describing all of the types in the module.
+  WriteTypeTable(VE, Stream);
+
+  // Emit top-level description of module, including target triple, inline asm,
+  // descriptors for global variables, and function prototype info.
+  WriteModuleInfo(M, VE, Stream);
+
+  // Emit constants.
+  WriteModuleConstants(VE, Stream);
+
+  // Emit metadata.
+  WriteModuleMetadata(M, VE, Stream);
+
+  // Emit function bodies.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+    if (!F->isDeclaration())
+      WriteFunction(*F, VE, Stream);
+
+  // Emit metadata.
+  WriteModuleMetadataStore(M, Stream);
+
+  // Emit names for globals/functions etc.
+  WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+
+  Stream.ExitBlock();
+}
+
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver.  To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+///
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   uint32_t CPUType;       // CPU specifier.
+///   ... potentially more later ...
+/// };
+enum {
+  DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+  DarwinBCHeaderSize = 5*4
+};
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
+  unsigned CPUType = ~0U;
+
+  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
+  // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
+  // number from /usr/include/mach/machine.h.  It is ok to reproduce the
+  // specific constants here because they are implicitly part of the Darwin ABI.
+  enum {
+    DARWIN_CPU_ARCH_ABI64      = 0x01000000,
+    DARWIN_CPU_TYPE_X86        = 7,
+    DARWIN_CPU_TYPE_ARM        = 12,
+    DARWIN_CPU_TYPE_POWERPC    = 18
+  };
+
+  Triple::ArchType Arch = TT.getArch();
+  if (Arch == Triple::x86_64)
+    CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+  else if (Arch == Triple::x86)
+    CPUType = DARWIN_CPU_TYPE_X86;
+  else if (Arch == Triple::ppc)
+    CPUType = DARWIN_CPU_TYPE_POWERPC;
+  else if (Arch == Triple::ppc64)
+    CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+  else if (Arch == Triple::arm || Arch == Triple::thumb)
+    CPUType = DARWIN_CPU_TYPE_ARM;
+
+  // Traditional Bitcode starts after header.
+  unsigned BCOffset = DarwinBCHeaderSize;
+
+  Stream.Emit(0x0B17C0DE, 32);
+  Stream.Emit(0         , 32);  // Version.
+  Stream.Emit(BCOffset  , 32);
+  Stream.Emit(0         , 32);  // Filled in later.
+  Stream.Emit(CPUType   , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+  // Update the size field in the header.
+  Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+
+  // If the file is not a multiple of 16 bytes, insert dummy padding.
+  while (BufferSize & 15) {
+    Stream.Emit(0, 8);
+    ++BufferSize;
+  }
+}
+
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+  std::vector<unsigned char> Buffer;
+  BitstreamWriter Stream(Buffer);
+
+  Buffer.reserve(256*1024);
+
+  WriteBitcodeToStream( M, Stream );
+
+  // Write the generated bitstream to "Out".
+  Out.write((char*)&Buffer.front(), Buffer.size());
+}
+
+/// WriteBitcodeToStream - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
+  // If this is darwin or another generic macho target, emit a file header and
+  // trailer if needed.
+  Triple TT(M->getTargetTriple());
+  if (TT.isOSDarwin())
+    EmitDarwinBCHeader(Stream, TT);
+
+  // Emit the file header.
+  Stream.Emit((unsigned)'B', 8);
+  Stream.Emit((unsigned)'C', 8);
+  Stream.Emit(0x0, 4);
+  Stream.Emit(0xC, 4);
+  Stream.Emit(0xE, 4);
+  Stream.Emit(0xD, 4);
+
+  // Emit the module.
+  WriteModule(M, Stream);
+
+  if (TT.isOSDarwin())
+    EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
new file mode 100644
index 000000000000..91e115cba6cc
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -0,0 +1,41 @@
+//===--- Bitcode/Writer/BitcodeWriterPass.cpp - Bitcode Writer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BitcodeWriterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  class WriteBitcodePass : public ModulePass {
+    raw_ostream &OS; // raw_ostream to print on
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit WriteBitcodePass(raw_ostream &o)
+      : ModulePass(ID), OS(o) {}
+    
+    const char *getPassName() const { return "Bitcode Writer"; }
+    
+    bool runOnModule(Module &M) {
+      WriteBitcodeToFile(&M, OS);
+      return false;
+    }
+  };
+}
+
+char WriteBitcodePass::ID = 0;
+
+/// createBitcodeWriterPass - Create and return a pass that writes the module
+/// to the specified ostream.
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
+  return new WriteBitcodePass(Str);
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
new file mode 100644
index 000000000000..b68bf92d51b2
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -0,0 +1,494 @@
+//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueEnumerator class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueEnumerator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Instructions.h"
+#include <algorithm>
+using namespace llvm;
+
+static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
+  return V.first->getType()->isIntegerTy();
+}
+
+/// ValueEnumerator - Enumerate module-level information.
+ValueEnumerator::ValueEnumerator(const Module *M) {
+  // Enumerate the global variables.
+  for (Module::const_global_iterator I = M->global_begin(),
+         E = M->global_end(); I != E; ++I)
+    EnumerateValue(I);
+
+  // Enumerate the functions.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    EnumerateValue(I);
+    EnumerateAttributes(cast<Function>(I)->getAttributes());
+  }
+
+  // Enumerate the aliases.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  // Remember what is the cutoff between globalvalue's and other constants.
+  unsigned FirstConstant = Values.size();
+
+  // Enumerate the global variable initializers.
+  for (Module::const_global_iterator I = M->global_begin(),
+         E = M->global_end(); I != E; ++I)
+    if (I->hasInitializer())
+      EnumerateValue(I->getInitializer());
+
+  // Enumerate the aliasees.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I->getAliasee());
+
+  // Insert constants and metadata that are named at module level into the slot 
+  // pool so that the module symbol table can refer to them...
+  EnumerateValueSymbolTable(M->getValueSymbolTable());
+  EnumerateNamedMetadata(M);
+
+  SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+
+  // Enumerate types used by function bodies and argument lists.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+
+    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I)
+      EnumerateType(I->getType());
+
+    for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
+        for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+             OI != E; ++OI) {
+          if (MDNode *MD = dyn_cast<MDNode>(*OI))
+            if (MD->isFunctionLocal() && MD->getFunction())
+              // These will get enumerated during function-incorporation.
+              continue;
+          EnumerateOperandType(*OI);
+        }
+        EnumerateType(I->getType());
+        if (const CallInst *CI = dyn_cast<CallInst>(I))
+          EnumerateAttributes(CI->getAttributes());
+        else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
+          EnumerateAttributes(II->getAttributes());
+
+        // Enumerate metadata attached with this instruction.
+        MDs.clear();
+        I->getAllMetadataOtherThanDebugLoc(MDs);
+        for (unsigned i = 0, e = MDs.size(); i != e; ++i)
+          EnumerateMetadata(MDs[i].second);
+        
+        if (!I->getDebugLoc().isUnknown()) {
+          MDNode *Scope, *IA;
+          I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
+          if (Scope) EnumerateMetadata(Scope);
+          if (IA) EnumerateMetadata(IA);
+        }
+      }
+  }
+
+  // Optimize constant ordering.
+  OptimizeConstants(FirstConstant, Values.size());
+}
+
+
+unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
+  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+  assert(I != InstructionMap.end() && "Instruction is not mapped!");
+  return I->second;
+}
+
+void ValueEnumerator::setInstructionID(const Instruction *I) {
+  InstructionMap[I] = InstructionCount++;
+}
+
+unsigned ValueEnumerator::getValueID(const Value *V) const {
+  if (isa<MDNode>(V) || isa<MDString>(V)) {
+    ValueMapType::const_iterator I = MDValueMap.find(V);
+    assert(I != MDValueMap.end() && "Value not in slotcalculator!");
+    return I->second-1;
+  }
+
+  ValueMapType::const_iterator I = ValueMap.find(V);
+  assert(I != ValueMap.end() && "Value not in slotcalculator!");
+  return I->second-1;
+}
+
+// Optimize constant ordering.
+namespace {
+  struct CstSortPredicate {
+    ValueEnumerator &VE;
+    explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
+    bool operator()(const std::pair<const Value*, unsigned> &LHS,
+                    const std::pair<const Value*, unsigned> &RHS) {
+      // Sort by plane.
+      if (LHS.first->getType() != RHS.first->getType())
+        return VE.getTypeID(LHS.first->getType()) <
+               VE.getTypeID(RHS.first->getType());
+      // Then by frequency.
+      return LHS.second > RHS.second;
+    }
+  };
+}
+
+/// OptimizeConstants - Reorder constant pool for denser encoding.
+void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
+  if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+
+  CstSortPredicate P(*this);
+  std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
+
+  // Ensure that integer constants are at the start of the constant pool.  This
+  // is important so that GEP structure indices come before gep constant exprs.
+  std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
+                 isIntegerValue);
+
+  // Rebuild the modified portion of ValueMap.
+  for (; CstStart != CstEnd; ++CstStart)
+    ValueMap[Values[CstStart].first] = CstStart+1;
+}
+
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+       VI != VE; ++VI)
+    EnumerateValue(VI->getValue());
+}
+
+/// EnumerateNamedMetadata - Insert all of the values referenced by
+/// named metadata in the specified module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+       E = M->named_metadata_end(); I != E; ++I)
+    EnumerateNamedMDNode(I);
+}
+
+void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
+  for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+    EnumerateMetadata(MD->getOperand(i));
+}
+
+/// EnumerateMDNodeOperands - Enumerate all non-function-local values
+/// and types referenced by the given MDNode.
+void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i)) {
+      if (isa<MDNode>(V) || isa<MDString>(V))
+        EnumerateMetadata(V);
+      else if (!isa<Instruction>(V) && !isa<Argument>(V))
+        EnumerateValue(V);
+    } else
+      EnumerateType(Type::getVoidTy(N->getContext()));
+  }
+}
+
+void ValueEnumerator::EnumerateMetadata(const Value *MD) {
+  assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
+
+  // Enumerate the type of this value.
+  EnumerateType(MD->getType());
+
+  const MDNode *N = dyn_cast<MDNode>(MD);
+
+  // In the module-level pass, skip function-local nodes themselves, but
+  // do walk their operands.
+  if (N && N->isFunctionLocal() && N->getFunction()) {
+    EnumerateMDNodeOperands(N);
+    return;
+  }
+
+  // Check to see if it's already in!
+  unsigned &MDValueID = MDValueMap[MD];
+  if (MDValueID) {
+    // Increment use count.
+    MDValues[MDValueID-1].second++;
+    return;
+  }
+  MDValues.push_back(std::make_pair(MD, 1U));
+  MDValueID = MDValues.size();
+
+  // Enumerate all non-function-local operands.
+  if (N)
+    EnumerateMDNodeOperands(N);
+}
+
+/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
+/// information reachable from the given MDNode.
+void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
+  assert(N->isFunctionLocal() && N->getFunction() &&
+         "EnumerateFunctionLocalMetadata called on non-function-local mdnode!");
+
+  // Enumerate the type of this value.
+  EnumerateType(N->getType());
+
+  // Check to see if it's already in!
+  unsigned &MDValueID = MDValueMap[N];
+  if (MDValueID) {
+    // Increment use count.
+    MDValues[MDValueID-1].second++;
+    return;
+  }
+  MDValues.push_back(std::make_pair(N, 1U));
+  MDValueID = MDValues.size();
+
+  // To incoroporate function-local information visit all function-local
+  // MDNodes and all function-local values they reference.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (Value *V = N->getOperand(i)) {
+      if (MDNode *O = dyn_cast<MDNode>(V)) {
+        if (O->isFunctionLocal() && O->getFunction())
+          EnumerateFunctionLocalMetadata(O);
+      } else if (isa<Instruction>(V) || isa<Argument>(V))
+        EnumerateValue(V);
+    }
+
+  // Also, collect all function-local MDNodes for easy access.
+  FunctionLocalMDs.push_back(N);
+}
+
+void ValueEnumerator::EnumerateValue(const Value *V) {
+  assert(!V->getType()->isVoidTy() && "Can't insert void values!");
+  assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+         "EnumerateValue doesn't handle Metadata!");
+
+  // Check to see if it's already in!
+  unsigned &ValueID = ValueMap[V];
+  if (ValueID) {
+    // Increment use count.
+    Values[ValueID-1].second++;
+    return;
+  }
+
+  // Enumerate the type of this value.
+  EnumerateType(V->getType());
+
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    if (isa<GlobalValue>(C)) {
+      // Initializers for globals are handled explicitly elsewhere.
+    } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+      // Do not enumerate the initializers for an array of simple characters.
+      // The initializers just pollute the value table, and we emit the strings
+      // specially.
+    } else if (C->getNumOperands()) {
+      // If a constant has operands, enumerate them.  This makes sure that if a
+      // constant has uses (for example an array of const ints), that they are
+      // inserted also.
+
+      // We prefer to enumerate them with values before we enumerate the user
+      // itself.  This makes it more likely that we can avoid forward references
+      // in the reader.  We know that there can be no cycles in the constants
+      // graph that don't go through a global variable.
+      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+           I != E; ++I)
+        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+          EnumerateValue(*I);
+
+      // Finally, add the value.  Doing this could make the ValueID reference be
+      // dangling, don't reuse it.
+      Values.push_back(std::make_pair(V, 1U));
+      ValueMap[V] = Values.size();
+      return;
+    }
+  }
+
+  // Add the value.
+  Values.push_back(std::make_pair(V, 1U));
+  ValueID = Values.size();
+}
+
+
+void ValueEnumerator::EnumerateType(const Type *Ty) {
+  unsigned *TypeID = &TypeMap[Ty];
+
+  // We've already seen this type.
+  if (*TypeID)
+    return;
+
+  // If it is a non-anonymous struct, mark the type as being visited so that we
+  // don't recursively visit it.  This is safe because we allow forward
+  // references of these in the bitcode reader.
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isAnonymous())
+      *TypeID = ~0U;
+  
+  // Enumerate all of the subtypes before we enumerate this type.  This ensures
+  // that the type will be enumerated in an order that can be directly built.
+  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+       I != E; ++I)
+    EnumerateType(*I);
+  
+  // Refresh the TypeID pointer in case the table rehashed.
+  TypeID = &TypeMap[Ty];
+  
+  // Check to see if we got the pointer another way.  This can happen when
+  // enumerating recursive types that hit the base case deeper than they start.
+  //
+  // If this is actually a struct that we are treating as forward ref'able,
+  // then emit the definition now that all of its contents are available.
+  if (*TypeID && *TypeID != ~0U)
+    return;
+  
+  // Add this type now that its contents are all happily enumerated.
+  Types.push_back(Ty);
+  
+  *TypeID = Types.size();
+}
+
+// Enumerate the types for the specified value.  If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void ValueEnumerator::EnumerateOperandType(const Value *V) {
+  EnumerateType(V->getType());
+  
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    // If this constant is already enumerated, ignore it, we know its type must
+    // be enumerated.
+    if (ValueMap.count(V)) return;
+
+    // This constant may have operands, make sure to enumerate the types in
+    // them.
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      const Value *Op = C->getOperand(i);
+      
+      // Don't enumerate basic blocks here, this happens as operands to
+      // blockaddress.
+      if (isa<BasicBlock>(Op)) continue;
+      
+      EnumerateOperandType(Op);
+    }
+
+    if (const MDNode *N = dyn_cast<MDNode>(V)) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        if (Value *Elem = N->getOperand(i))
+          EnumerateOperandType(Elem);
+    }
+  } else if (isa<MDString>(V) || isa<MDNode>(V))
+    EnumerateMetadata(V);
+}
+
+void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
+  if (PAL.isEmpty()) return;  // null is always 0.
+  // Do a lookup.
+  unsigned &Entry = AttributeMap[PAL.getRawPointer()];
+  if (Entry == 0) {
+    // Never saw this before, add it.
+    Attributes.push_back(PAL);
+    Entry = Attributes.size();
+  }
+}
+
+void ValueEnumerator::incorporateFunction(const Function &F) {
+  InstructionCount = 0;
+  NumModuleValues = Values.size();
+  NumModuleMDValues = MDValues.size();
+
+  // Adding function arguments to the value table.
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  FirstFuncConstantID = Values.size();
+
+  // Add all function-level constants to the value table.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
+      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+           OI != E; ++OI) {
+        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+            isa<InlineAsm>(*OI))
+          EnumerateValue(*OI);
+      }
+    BasicBlocks.push_back(BB);
+    ValueMap[BB] = BasicBlocks.size();
+  }
+
+  // Optimize the constant layout.
+  OptimizeConstants(FirstFuncConstantID, Values.size());
+
+  // Add the function's parameter attributes so they are available for use in
+  // the function's instruction.
+  EnumerateAttributes(F.getAttributes());
+
+  FirstInstID = Values.size();
+
+  SmallVector<MDNode *, 8> FnLocalMDVector;
+  // Add all of the instructions.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+           OI != E; ++OI) {
+        if (MDNode *MD = dyn_cast<MDNode>(*OI))
+          if (MD->isFunctionLocal() && MD->getFunction())
+            // Enumerate metadata after the instructions they might refer to.
+            FnLocalMDVector.push_back(MD);
+      }
+
+      SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+      I->getAllMetadataOtherThanDebugLoc(MDs);
+      for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+        MDNode *N = MDs[i].second;
+        if (N->isFunctionLocal() && N->getFunction())
+          FnLocalMDVector.push_back(N);
+      }
+        
+      if (!I->getType()->isVoidTy())
+        EnumerateValue(I);
+    }
+  }
+
+  // Add all of the function-local metadata.
+  for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
+    EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
+}
+
+void ValueEnumerator::purgeFunction() {
+  /// Remove purged values from the ValueMap.
+  for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+    ValueMap.erase(Values[i].first);
+  for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
+    MDValueMap.erase(MDValues[i].first);
+  for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+    ValueMap.erase(BasicBlocks[i]);
+
+  Values.resize(NumModuleValues);
+  MDValues.resize(NumModuleMDValues);
+  BasicBlocks.clear();
+  FunctionLocalMDs.clear();
+}
+
+static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
+                                 DenseMap<const BasicBlock*, unsigned> &IDMap) {
+  unsigned Counter = 0;
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    IDMap[BB] = ++Counter;
+}
+
+/// getGlobalBasicBlockID - This returns the function-specific ID for the
+/// specified basic block.  This is relatively expensive information, so it
+/// should only be used by rare constructs such as address-of-label.
+unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
+  unsigned &Idx = GlobalBasicBlockIDs[BB];
+  if (Idx != 0)
+    return Idx-1;
+
+  IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
+  return getGlobalBasicBlockID(BB);
+}
+
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
new file mode 100644
index 000000000000..6617b60deb26
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
@@ -0,0 +1,153 @@
+//===-- Bitcode/Writer/ValueEnumerator.h - Number values --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VALUE_ENUMERATOR_H
+#define VALUE_ENUMERATOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Attributes.h"
+#include <vector>
+
+namespace llvm {
+
+class Type;
+class Value;
+class Instruction;
+class BasicBlock;
+class Function;
+class Module;
+class MDNode;
+class NamedMDNode;
+class AttrListPtr;
+class ValueSymbolTable;
+class MDSymbolTable;
+
+class ValueEnumerator {
+public:
+  typedef std::vector<const Type*> TypeList;
+
+  // For each value, we remember its Value* and occurrence frequency.
+  typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+private:
+  typedef DenseMap<const Type*, unsigned> TypeMapType;
+  TypeMapType TypeMap;
+  TypeList Types;
+
+  typedef DenseMap<const Value*, unsigned> ValueMapType;
+  ValueMapType ValueMap;
+  ValueList Values;
+  ValueList MDValues;
+  SmallVector<const MDNode *, 8> FunctionLocalMDs;
+  ValueMapType MDValueMap;
+  
+  typedef DenseMap<void*, unsigned> AttributeMapType;
+  AttributeMapType AttributeMap;
+  std::vector<AttrListPtr> Attributes;
+  
+  /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
+  /// the "getGlobalBasicBlockID" method.
+  mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
+  
+  typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
+  InstructionMapType InstructionMap;
+  unsigned InstructionCount;
+
+  /// BasicBlocks - This contains all the basic blocks for the currently
+  /// incorporated function.  Their reverse mapping is stored in ValueMap.
+  std::vector<const BasicBlock*> BasicBlocks;
+  
+  /// When a function is incorporated, this is the size of the Values list
+  /// before incorporation.
+  unsigned NumModuleValues;
+
+  /// When a function is incorporated, this is the size of the MDValues list
+  /// before incorporation.
+  unsigned NumModuleMDValues;
+
+  unsigned FirstFuncConstantID;
+  unsigned FirstInstID;
+  
+  ValueEnumerator(const ValueEnumerator &);  // DO NOT IMPLEMENT
+  void operator=(const ValueEnumerator &);   // DO NOT IMPLEMENT
+public:
+  ValueEnumerator(const Module *M);
+
+  unsigned getValueID(const Value *V) const;
+
+  unsigned getTypeID(const Type *T) const {
+    TypeMapType::const_iterator I = TypeMap.find(T);
+    assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
+    return I->second-1;
+  }
+
+  unsigned getInstructionID(const Instruction *I) const;
+  void setInstructionID(const Instruction *I);
+
+  unsigned getAttributeID(const AttrListPtr &PAL) const {
+    if (PAL.isEmpty()) return 0;  // Null maps to zero.
+    AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer());
+    assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
+    return I->second;
+  }
+
+  /// getFunctionConstantRange - Return the range of values that corresponds to
+  /// function-local constants.
+  void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+    Start = FirstFuncConstantID;
+    End = FirstInstID;
+  }
+  
+  const ValueList &getValues() const { return Values; }
+  const ValueList &getMDValues() const { return MDValues; }
+  const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const { 
+    return FunctionLocalMDs;
+  }
+  const TypeList &getTypes() const { return Types; }
+  const std::vector<const BasicBlock*> &getBasicBlocks() const {
+    return BasicBlocks; 
+  }
+  const std::vector<AttrListPtr> &getAttributes() const {
+    return Attributes;
+  }
+  
+  /// getGlobalBasicBlockID - This returns the function-specific ID for the
+  /// specified basic block.  This is relatively expensive information, so it
+  /// should only be used by rare constructs such as address-of-label.
+  unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
+
+  /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+  /// use these two methods to get its data into the ValueEnumerator!
+  ///
+  void incorporateFunction(const Function &F);
+  void purgeFunction();
+
+private:
+  void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+    
+  void EnumerateMDNodeOperands(const MDNode *N);
+  void EnumerateMetadata(const Value *MD);
+  void EnumerateFunctionLocalMetadata(const MDNode *N);
+  void EnumerateNamedMDNode(const NamedMDNode *NMD);
+  void EnumerateValue(const Value *V);
+  void EnumerateType(const Type *T);
+  void EnumerateOperandType(const Value *V);
+  void EnumerateAttributes(const AttrListPtr &PAL);
+  
+  void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+  void EnumerateNamedMetadata(const Module *M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 000000000000..25842a7876a2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,962 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AggressiveAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+                                               MachineBasicBlock *BB) :
+  NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+  GroupNodeIndices(TargetRegs, 0),
+  KillIndices(TargetRegs, 0),
+  DefIndices(TargetRegs, 0)
+{
+  const unsigned BBSize = BB->size();
+  for (unsigned i = 0; i < NumTargetRegs; ++i) {
+    // Initialize all registers to be in their own group. Initially we
+    // assign the register to the same-indexed GroupNode.
+    GroupNodeIndices[i] = i;
+    // Initialize the indices to indicate that no registers are live.
+    KillIndices[i] = ~0u;
+    DefIndices[i] = BBSize;
+  }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
+  unsigned Node = GroupNodeIndices[Reg];
+  while (GroupNodes[Node] != Node)
+    Node = GroupNodes[Node];
+
+  return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+  unsigned Group,
+  std::vector<unsigned> &Regs,
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+  for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+    if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+      Regs.push_back(Reg);
+  }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+  assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+  assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+  // find group for each register
+  unsigned Group1 = GetGroup(Reg1);
+  unsigned Group2 = GetGroup(Reg2);
+
+  // if either group is 0, then that must become the parent
+  unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+  unsigned Other = (Parent == Group1) ? Group2 : Group1;
+  GroupNodes.at(Other) = Parent;
+  return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+  // Create a new GroupNode for Reg. Reg's existing GroupNode must
+  // stay as is because there could be other GroupNodes referring to
+  // it.
+  unsigned idx = GroupNodes.size();
+  GroupNodes.push_back(idx);
+  GroupNodeIndices[Reg] = idx;
+  return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+  // KillIndex must be defined and DefIndex not defined for a register
+  // to be live.
+  return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+
+
+AggressiveAntiDepBreaker::
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+                         const RegisterClassInfo &RCI,
+                         TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
+  AntiDepBreaker(), MF(MFi),
+  MRI(MF.getRegInfo()),
+  TII(MF.getTarget().getInstrInfo()),
+  TRI(MF.getTarget().getRegisterInfo()),
+  RegClassInfo(RCI),
+  State(NULL) {
+  /* Collect a bitset of all registers that are only broken if they
+     are on the critical path. */
+  for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+    BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+    if (CriticalPathSet.none())
+      CriticalPathSet = CPSet;
+    else
+      CriticalPathSet |= CPSet;
+   }
+
+  DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+             r = CriticalPathSet.find_next(r))
+          dbgs() << " " << TRI->getName(r));
+  DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+  delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+  assert(State == NULL);
+  State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+
+  // Determine the live-out physregs for this block.
+  if (IsReturnBlock) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+         E = MRI.liveout_end(); I != E; ++I) {
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
+      }
+    }
+  }
+
+  // In a non-return block, examine the live-in regs of all successors.
+  // Note a return block can have successors if the return instruction is
+  // predicated.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         SE = BB->succ_end(); SI != SE; ++SI)
+    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+           E = (*SI)->livein_end(); I != E; ++I) {
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
+      }
+    }
+
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
+  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+    unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias) {
+      State->UnionGroups(AliasReg, 0);
+      KillIndices[AliasReg] = BB->size();
+      DefIndices[AliasReg] = ~0u;
+    }
+  }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+  delete State;
+  State = NULL;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+                                       unsigned InsertPosIndex) {
+  assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+  std::set<unsigned> PassthruRegs;
+  GetPassthruRegs(MI, PassthruRegs);
+  PrescanInstruction(MI, Count, PassthruRegs);
+  ScanInstruction(MI, Count);
+
+  DEBUG(dbgs() << "Observe: ");
+  DEBUG(MI->dump());
+  DEBUG(dbgs() << "\tRegs:");
+
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+    // If Reg is current live, then mark that it can't be renamed as
+    // we don't know the extent of its live-range anymore (now that it
+    // has been scheduled). If it is not live but was defined in the
+    // previous schedule region, then set its def index to the most
+    // conservative location (i.e. the beginning of the previous
+    // schedule region).
+    if (State->IsLive(Reg)) {
+      DEBUG(if (State->GetGroup(Reg) != 0)
+              dbgs() << " " << TRI->getName(Reg) << "=g" <<
+                State->GetGroup(Reg) << "->g0(region live-out)");
+      State->UnionGroups(Reg, 0);
+    } else if ((DefIndices[Reg] < InsertPosIndex)
+               && (DefIndices[Reg] >= Count)) {
+      DefIndices[Reg] = Count;
+    }
+  }
+  DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
+                                                MachineOperand& MO)
+{
+  if (!MO.isReg() || !MO.isImplicit())
+    return false;
+
+  unsigned Reg = MO.getReg();
+  if (Reg == 0)
+    return false;
+
+  MachineOperand *Op = NULL;
+  if (MO.isDef())
+    Op = MI->findRegisterUseOperand(Reg, true);
+  else
+    Op = MI->findRegisterDefOperand(Reg);
+
+  return((Op != NULL) && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
+                                           std::set<unsigned>& PassthruRegs) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
+        IsImplicitDefUse(MI, MO)) {
+      const unsigned Reg = MO.getReg();
+      PassthruRegs.insert(Reg);
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        PassthruRegs.insert(*Subreg);
+      }
+    }
+  }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+  SmallSet<unsigned, 4> RegSet;
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
+      unsigned Reg = P->getReg();
+      if (RegSet.count(Reg) == 0) {
+        Edges.push_back(&*P);
+        RegSet.insert(Reg);
+      }
+    }
+  }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  if (SU != 0) {
+    for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+         P != PE; ++P) {
+      const SUnit *PredSU = P->getSUnit();
+      unsigned PredLatency = P->getLatency();
+      unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+      // In the case of a latency tie, prefer an anti-dependency edge over
+      // other types of edges.
+      if (NextDepth < PredTotalLatency ||
+          (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+        NextDepth = PredTotalLatency;
+        Next = &*P;
+      }
+    }
+  }
+
+  return (Next) ? Next->getSUnit() : 0;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+                                             const char *tag,
+                                             const char *header,
+                                             const char *footer) {
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  if (!State->IsLive(Reg)) {
+    KillIndices[Reg] = KillIdx;
+    DefIndices[Reg] = ~0u;
+    RegRefs.erase(Reg);
+    State->LeaveGroup(Reg);
+    DEBUG(if (header != NULL) {
+        dbgs() << header << TRI->getName(Reg); header = NULL; });
+    DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+  }
+  // Repeat for subregisters.
+  for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+       *Subreg; ++Subreg) {
+    unsigned SubregReg = *Subreg;
+    if (!State->IsLive(SubregReg)) {
+      KillIndices[SubregReg] = KillIdx;
+      DefIndices[SubregReg] = ~0u;
+      RegRefs.erase(SubregReg);
+      State->LeaveGroup(SubregReg);
+      DEBUG(if (header != NULL) {
+          dbgs() << header << TRI->getName(Reg); header = NULL; });
+      DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+            State->GetGroup(SubregReg) << tag);
+    }
+  }
+
+  DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+                                                  unsigned Count,
+                                             std::set<unsigned>& PassthruRegs) {
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // Handle dead defs by simulating a last-use of the register just
+  // after the def. A dead def can occur because the def is truly
+  // dead, or because only a subregister is live at the def. If we
+  // don't do this the dead def will be incorrectly merged into the
+  // previous def.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+  }
+
+  DEBUG(dbgs() << "\tDef Groups:");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+
+    // If MI's defs have a special allocation requirement, don't allow
+    // any def registers to be changed. Also assume all registers
+    // defined in a call must not be changed (ABI).
+    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+        TII->isPredicated(MI)) {
+      DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+      State->UnionGroups(Reg, 0);
+    }
+
+    // Any aliased that are live at this point are completely or
+    // partially defined here, so group those aliases with Reg.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      if (State->IsLive(AliasReg)) {
+        State->UnionGroups(Reg, AliasReg);
+        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
+              TRI->getName(AliasReg) << ")");
+      }
+    }
+
+    // Note register reference...
+    const TargetRegisterClass *RC = NULL;
+    if (i < MI->getDesc().getNumOperands())
+      RC = TII->getRegClass(MI->getDesc(), i, TRI);
+    AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+    RegRefs.insert(std::make_pair(Reg, RR));
+  }
+
+  DEBUG(dbgs() << '\n');
+
+  // Scan the register defs for this instruction and update
+  // live-ranges.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    // Ignore KILLs and passthru registers for liveness...
+    if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
+      continue;
+
+    // Update def for Reg and aliases.
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias)
+      DefIndices[AliasReg] = Count;
+  }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+                                               unsigned Count) {
+  DEBUG(dbgs() << "\tUse Groups:");
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // If MI's uses have special allocation requirement, don't allow
+  // any use registers to be changed. Also assume all registers
+  // used in a call must not be changed (ABI).
+  // FIXME: The issue with predicated instruction is more complex. We are being
+  // conservatively here because the kill markers cannot be trusted after
+  // if-conversion:
+  // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+  // ...
+  // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+  // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+  // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+  //
+  // The first R6 kill is not really a kill since it's killed by a predicated
+  // instruction which may not be executed. The second R6 def may or may not
+  // re-define R6 so it's not safe to change it since the last R6 use cannot be
+  // changed.
+  bool Special = MI->getDesc().isCall() ||
+    MI->getDesc().hasExtraSrcRegAllocReq() ||
+    TII->isPredicated(MI);
+
+  // Scan the register uses for this instruction and update
+  // live-ranges, groups and RegRefs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+          State->GetGroup(Reg));
+
+    // It wasn't previously live but now it is, this is a kill. Forget
+    // the previous live-range information and start a new live-range
+    // for the register.
+    HandleLastUse(Reg, Count, "(last-use)");
+
+    if (Special) {
+      DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+      State->UnionGroups(Reg, 0);
+    }
+
+    // Note register reference...
+    const TargetRegisterClass *RC = NULL;
+    if (i < MI->getDesc().getNumOperands())
+      RC = TII->getRegClass(MI->getDesc(), i, TRI);
+    AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+    RegRefs.insert(std::make_pair(Reg, RR));
+  }
+
+  DEBUG(dbgs() << '\n');
+
+  // Form a group of all defs and uses of a KILL instruction to ensure
+  // that all registers are renamed as a group.
+  if (MI->isKill()) {
+    DEBUG(dbgs() << "\tKill Group:");
+
+    unsigned FirstReg = 0;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      if (FirstReg != 0) {
+        DEBUG(dbgs() << "=" << TRI->getName(Reg));
+        State->UnionGroups(FirstReg, Reg);
+      } else {
+        DEBUG(dbgs() << " " << TRI->getName(Reg));
+        FirstReg = Reg;
+      }
+    }
+
+    DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+  }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+  BitVector BV(TRI->getNumRegs(), false);
+  bool first = true;
+
+  // Check all references that need rewriting for Reg. For each, use
+  // the corresponding register class to narrow the set of registers
+  // that are appropriate for renaming.
+  std::pair<std::multimap<unsigned,
+                     AggressiveAntiDepState::RegisterReference>::iterator,
+            std::multimap<unsigned,
+                     AggressiveAntiDepState::RegisterReference>::iterator>
+    Range = State->GetRegRefs().equal_range(Reg);
+  for (std::multimap<unsigned,
+       AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+       QE = Range.second; Q != QE; ++Q) {
+    const TargetRegisterClass *RC = Q->second.RC;
+    if (RC == NULL) continue;
+
+    BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+    if (first) {
+      BV |= RCBV;
+      first = false;
+    } else {
+      BV &= RCBV;
+    }
+
+    DEBUG(dbgs() << " " << RC->getName());
+  }
+
+  return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+                                unsigned AntiDepGroupIndex,
+                                RenameOrderType& RenameOrder,
+                                std::map<unsigned, unsigned> &RenameMap) {
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // Collect all referenced registers in the same group as
+  // AntiDepReg. These all need to be renamed together if we are to
+  // break the anti-dependence.
+  std::vector<unsigned> Regs;
+  State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+  assert(Regs.size() > 0 && "Empty register group!");
+  if (Regs.size() == 0)
+    return false;
+
+  // Find the "superest" register in the group. At the same time,
+  // collect the BitVector of registers that can be used to rename
+  // each register.
+  DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+        << ":\n");
+  std::map<unsigned, BitVector> RenameRegisterMap;
+  unsigned SuperReg = 0;
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    unsigned Reg = Regs[i];
+    if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+      SuperReg = Reg;
+
+    // If Reg has any references, then collect possible rename regs
+    if (RegRefs.count(Reg) > 0) {
+      DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+
+      BitVector BV = GetRenameRegisters(Reg);
+      RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+
+      DEBUG(dbgs() << " ::");
+      DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+              dbgs() << " " << TRI->getName(r));
+      DEBUG(dbgs() << "\n");
+    }
+  }
+
+  // All group registers should be a subreg of SuperReg.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    unsigned Reg = Regs[i];
+    if (Reg == SuperReg) continue;
+    bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+    assert(IsSub && "Expecting group subregister");
+    if (!IsSub)
+      return false;
+  }
+
+#ifndef NDEBUG
+  // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+  if (DebugDiv > 0) {
+    static int renamecnt = 0;
+    if (renamecnt++ % DebugDiv != DebugMod)
+      return false;
+
+    dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+      " for debug ***\n";
+  }
+#endif
+
+  // Check each possible rename register for SuperReg in round-robin
+  // order. If that register is available, and the corresponding
+  // registers are available for the other group subregisters, then we
+  // can use those registers to rename.
+
+  // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+  // check every use of the register and find the largest register class
+  // that can be used in all of them.
+  const TargetRegisterClass *SuperRC =
+    TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
+
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+  if (Order.empty()) {
+    DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+    return false;
+  }
+
+  DEBUG(dbgs() << "\tFind Registers:");
+
+  if (RenameOrder.count(SuperRC) == 0)
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+
+  unsigned OrigR = RenameOrder[SuperRC];
+  unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+  unsigned R = OrigR;
+  do {
+    if (R == 0) R = Order.size();
+    --R;
+    const unsigned NewSuperReg = Order[R];
+    // Don't consider non-allocatable registers
+    if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
+    // Don't replace a register with itself.
+    if (NewSuperReg == SuperReg) continue;
+
+    DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+    RenameMap.clear();
+
+    // For each referenced group register (which must be a SuperReg or
+    // a subregister of SuperReg), find the corresponding subregister
+    // of NewSuperReg and make sure it is free to be renamed.
+    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+      unsigned Reg = Regs[i];
+      unsigned NewReg = 0;
+      if (Reg == SuperReg) {
+        NewReg = NewSuperReg;
+      } else {
+        unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+        if (NewSubRegIdx != 0)
+          NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+      }
+
+      DEBUG(dbgs() << " " << TRI->getName(NewReg));
+
+      // Check if Reg can be renamed to NewReg.
+      BitVector BV = RenameRegisterMap[Reg];
+      if (!BV.test(NewReg)) {
+        DEBUG(dbgs() << "(no rename)");
+        goto next_super_reg;
+      }
+
+      // If NewReg is dead and NewReg's most recent def is not before
+      // Regs's kill, it's safe to replace Reg with NewReg. We
+      // must also check all aliases of NewReg, because we can't define a
+      // register when any sub or super is already live.
+      if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+        DEBUG(dbgs() << "(live)");
+        goto next_super_reg;
+      } else {
+        bool found = false;
+        for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+             *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (State->IsLive(AliasReg) ||
+              (KillIndices[Reg] > DefIndices[AliasReg])) {
+            DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+            found = true;
+            break;
+          }
+        }
+        if (found)
+          goto next_super_reg;
+      }
+
+      // Record that 'Reg' can be renamed to 'NewReg'.
+      RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+    }
+
+    // If we fall-out here, then every register in the group can be
+    // renamed, as recorded in RenameMap.
+    RenameOrder.erase(SuperRC);
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+    DEBUG(dbgs() << "]\n");
+    return true;
+
+  next_super_reg:
+    DEBUG(dbgs() << ']');
+  } while (R != EndR);
+
+  DEBUG(dbgs() << '\n');
+
+  // No registers are free and available!
+  return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+                              const std::vector<SUnit>& SUnits,
+                              MachineBasicBlock::iterator Begin,
+                              MachineBasicBlock::iterator End,
+                              unsigned InsertPosIndex,
+                              DbgValueVector &DbgValues) {
+
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // The code below assumes that there is at least one instruction,
+  // so just duck out immediately if the block is empty.
+  if (SUnits.empty()) return 0;
+
+  // For each regclass the next register to use for renaming.
+  RenameOrderType RenameOrder;
+
+  // ...need a map from MI to SUnit.
+  std::map<MachineInstr *, const SUnit *> MISUnitMap;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    const SUnit *SU = &SUnits[i];
+    MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+                                                               SU));
+  }
+
+  // Track progress along the critical path through the SUnit graph as
+  // we walk the instructions. This is needed for regclasses that only
+  // break critical-path anti-dependencies.
+  const SUnit *CriticalPathSU = 0;
+  MachineInstr *CriticalPathMI = 0;
+  if (CriticalPathSet.any()) {
+    for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+      const SUnit *SU = &SUnits[i];
+      if (!CriticalPathSU ||
+          ((SU->getDepth() + SU->Latency) >
+           (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+        CriticalPathSU = SU;
+      }
+    }
+
+    CriticalPathMI = CriticalPathSU->getInstr();
+  }
+
+#ifndef NDEBUG
+  DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+  DEBUG(dbgs() << "Available regs:");
+  for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+    if (!State->IsLive(Reg))
+      DEBUG(dbgs() << " " << TRI->getName(Reg));
+  }
+  DEBUG(dbgs() << '\n');
+#endif
+
+  // Attempt to break anti-dependence edges. Walk the instructions
+  // from the bottom up, tracking information about liveness as we go
+  // to help determine which registers are available.
+  unsigned Broken = 0;
+  unsigned Count = InsertPosIndex - 1;
+  for (MachineBasicBlock::iterator I = End, E = Begin;
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+
+    DEBUG(dbgs() << "Anti: ");
+    DEBUG(MI->dump());
+
+    std::set<unsigned> PassthruRegs;
+    GetPassthruRegs(MI, PassthruRegs);
+
+    // Process the defs in MI...
+    PrescanInstruction(MI, Count, PassthruRegs);
+
+    // The dependence edges that represent anti- and output-
+    // dependencies that are candidates for breaking.
+    std::vector<const SDep *> Edges;
+    const SUnit *PathSU = MISUnitMap[MI];
+    AntiDepEdges(PathSU, Edges);
+
+    // If MI is not on the critical path, then we don't rename
+    // registers in the CriticalPathSet.
+    BitVector *ExcludeRegs = NULL;
+    if (MI == CriticalPathMI) {
+      CriticalPathSU = CriticalPathStep(CriticalPathSU);
+      CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+    } else {
+      ExcludeRegs = &CriticalPathSet;
+    }
+
+    // Ignore KILL instructions (they form a group in ScanInstruction
+    // but don't cause any anti-dependence breaking themselves)
+    if (!MI->isKill()) {
+      // Attempt to break each anti-dependency...
+      for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+        const SDep *Edge = Edges[i];
+        SUnit *NextSU = Edge->getSUnit();
+
+        if ((Edge->getKind() != SDep::Anti) &&
+            (Edge->getKind() != SDep::Output)) continue;
+
+        unsigned AntiDepReg = Edge->getReg();
+        DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+        assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+        if (!RegClassInfo.isAllocatable(AntiDepReg)) {
+          // Don't break anti-dependencies on non-allocatable registers.
+          DEBUG(dbgs() << " (non-allocatable)\n");
+          continue;
+        } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+          // Don't break anti-dependencies for critical path registers
+          // if not on the critical path
+          DEBUG(dbgs() << " (not critical-path)\n");
+          continue;
+        } else if (PassthruRegs.count(AntiDepReg) != 0) {
+          // If the anti-dep register liveness "passes-thru", then
+          // don't try to change it. It will be changed along with
+          // the use if required to break an earlier antidep.
+          DEBUG(dbgs() << " (passthru)\n");
+          continue;
+        } else {
+          // No anti-dep breaking for implicit deps
+          MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+          assert(AntiDepOp != NULL &&
+                 "Can't find index for defined register operand");
+          if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+            DEBUG(dbgs() << " (implicit)\n");
+            continue;
+          }
+
+          // If the SUnit has other dependencies on the SUnit that
+          // it anti-depends on, don't bother breaking the
+          // anti-dependency since those edges would prevent such
+          // units from being scheduled past each other
+          // regardless.
+          //
+          // Also, if there are dependencies on other SUnits with the
+          // same register as the anti-dependency, don't attempt to
+          // break it.
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+                 PE = PathSU->Preds.end(); P != PE; ++P) {
+            if (P->getSUnit() == NextSU ?
+                (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+                (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+              AntiDepReg = 0;
+              break;
+            }
+          }
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+                 PE = PathSU->Preds.end(); P != PE; ++P) {
+            if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+                (P->getKind() != SDep::Output)) {
+              DEBUG(dbgs() << " (real dependency)\n");
+              AntiDepReg = 0;
+              break;
+            } else if ((P->getSUnit() != NextSU) &&
+                       (P->getKind() == SDep::Data) &&
+                       (P->getReg() == AntiDepReg)) {
+              DEBUG(dbgs() << " (other dependency)\n");
+              AntiDepReg = 0;
+              break;
+            }
+          }
+
+          if (AntiDepReg == 0) continue;
+        }
+
+        assert(AntiDepReg != 0);
+        if (AntiDepReg == 0) continue;
+
+        // Determine AntiDepReg's register group.
+        const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+        if (GroupIndex == 0) {
+          DEBUG(dbgs() << " (zero group)\n");
+          continue;
+        }
+
+        DEBUG(dbgs() << '\n');
+
+        // Look for a suitable register to use to break the anti-dependence.
+        std::map<unsigned, unsigned> RenameMap;
+        if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+          DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+                << TRI->getName(AntiDepReg) << ":");
+
+          // Handle each group register...
+          for (std::map<unsigned, unsigned>::iterator
+                 S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+            unsigned CurrReg = S->first;
+            unsigned NewReg = S->second;
+
+            DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+                  TRI->getName(NewReg) << "(" <<
+                  RegRefs.count(CurrReg) << " refs)");
+
+            // Update the references to the old register CurrReg to
+            // refer to the new register NewReg.
+            std::pair<std::multimap<unsigned,
+                           AggressiveAntiDepState::RegisterReference>::iterator,
+                      std::multimap<unsigned,
+                           AggressiveAntiDepState::RegisterReference>::iterator>
+              Range = RegRefs.equal_range(CurrReg);
+            for (std::multimap<unsigned,
+                 AggressiveAntiDepState::RegisterReference>::iterator
+                   Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+              Q->second.Operand->setReg(NewReg);
+              // If the SU for the instruction being updated has debug
+              // information related to the anti-dependency register, make
+              // sure to update that as well.
+              const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+              if (!SU) continue;
+              for (DbgValueVector::iterator DVI = DbgValues.begin(),
+                     DVE = DbgValues.end(); DVI != DVE; ++DVI)
+                if (DVI->second == Q->second.Operand->getParent())
+                  UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+            }
+
+            // We just went back in time and modified history; the
+            // liveness information for CurrReg is now inconsistent. Set
+            // the state as if it were dead.
+            State->UnionGroups(NewReg, 0);
+            RegRefs.erase(NewReg);
+            DefIndices[NewReg] = DefIndices[CurrReg];
+            KillIndices[NewReg] = KillIndices[CurrReg];
+
+            State->UnionGroups(CurrReg, 0);
+            RegRefs.erase(CurrReg);
+            DefIndices[CurrReg] = KillIndices[CurrReg];
+            KillIndices[CurrReg] = ~0u;
+            assert(((KillIndices[CurrReg] == ~0u) !=
+                    (DefIndices[CurrReg] == ~0u)) &&
+                   "Kill and Def maps aren't consistent for AntiDepReg!");
+          }
+
+          ++Broken;
+          DEBUG(dbgs() << '\n');
+        }
+      }
+    }
+
+    ScanInstruction(MI, Count);
+  }
+
+  return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 000000000000..706778485429
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,184 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+
+  /// Class AggressiveAntiDepState
+  /// Contains all the state necessary for anti-dep breaking.
+  class AggressiveAntiDepState {
+  public:
+    /// RegisterReference - Information about a register reference
+    /// within a liverange
+    typedef struct {
+      /// Operand - The registers operand
+      MachineOperand *Operand;
+      /// RC - The register class
+      const TargetRegisterClass *RC;
+    } RegisterReference;
+
+  private:
+    /// NumTargetRegs - Number of non-virtual target registers
+    /// (i.e. TRI->getNumRegs()).
+    const unsigned NumTargetRegs;
+
+    /// GroupNodes - Implements a disjoint-union data structure to
+    /// form register groups. A node is represented by an index into
+    /// the vector. A node can "point to" itself to indicate that it
+    /// is the parent of a group, or point to another node to indicate
+    /// that it is a member of the same group as that node.
+    std::vector<unsigned> GroupNodes;
+
+    /// GroupNodeIndices - For each register, the index of the GroupNode
+    /// currently representing the group that the register belongs to.
+    /// Register 0 is always represented by the 0 group, a group
+    /// composed of registers that are not eligible for anti-aliasing.
+    std::vector<unsigned> GroupNodeIndices;
+
+    /// RegRefs - Map registers to all their references within a live range.
+    std::multimap<unsigned, RegisterReference> RegRefs;
+
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    std::vector<unsigned> KillIndices;
+
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
+    std::vector<unsigned> DefIndices;
+
+  public:
+    AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+    /// GetKillIndices - Return the kill indices.
+    std::vector<unsigned> &GetKillIndices() { return KillIndices; }
+
+    /// GetDefIndices - Return the define indices.
+    std::vector<unsigned> &GetDefIndices() { return DefIndices; }
+
+    /// GetRegRefs - Return the RegRefs map.
+    std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+    // GetGroup - Get the group for a register. The returned value is
+    // the index of the GroupNode representing the group.
+    unsigned GetGroup(unsigned Reg);
+
+    // GetGroupRegs - Return a vector of the registers belonging to a
+    // group. If RegRefs is non-NULL then only included referenced registers.
+    void GetGroupRegs(
+       unsigned Group,
+       std::vector<unsigned> &Regs,
+       std::multimap<unsigned,
+         AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+    // UnionGroups - Union Reg1's and Reg2's groups to form a new
+    // group. Return the index of the GroupNode representing the
+    // group.
+    unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+    // LeaveGroup - Remove a register from its current group and place
+    // it alone in its own group. Return the index of the GroupNode
+    // representing the registers new group.
+    unsigned LeaveGroup(unsigned Reg);
+
+    /// IsLive - Return true if Reg is live
+    bool IsLive(unsigned Reg);
+  };
+
+
+  /// Class AggressiveAntiDepBreaker
+  class AggressiveAntiDepBreaker : public AntiDepBreaker {
+    MachineFunction& MF;
+    MachineRegisterInfo &MRI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const RegisterClassInfo &RegClassInfo;
+
+    /// CriticalPathSet - The set of registers that should only be
+    /// renamed if they are on the critical path.
+    BitVector CriticalPathSet;
+
+    /// State - The state used to identify and rename anti-dependence
+    /// registers.
+    AggressiveAntiDepState *State;
+
+  public:
+    AggressiveAntiDepBreaker(MachineFunction& MFi,
+                          const RegisterClassInfo &RCI,
+                          TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+    ~AggressiveAntiDepBreaker();
+
+    /// Start - Initialize anti-dep breaking for a new basic block.
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
+    /// of the ScheduleDAG and break them by renaming registers.
+    ///
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
+                                   unsigned InsertPosIndex,
+                                   DbgValueVector &DbgValues);
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+    /// Finish - Finish anti-dep breaking for a basic block.
+    void FinishBlock();
+
+  private:
+    /// Keep track of a position in the allocation order for each regclass.
+    typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
+
+    /// IsImplicitDefUse - Return true if MO represents a register
+    /// that is both implicitly used and defined in MI
+    bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+
+    /// GetPassthruRegs - If MI implicitly def/uses a register, then
+    /// return that register and all subregisters.
+    void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+
+    void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+                       const char *header =NULL, const char *footer =NULL);
+
+    void PrescanInstruction(MachineInstr *MI, unsigned Count,
+                            std::set<unsigned>& PassthruRegs);
+    void ScanInstruction(MachineInstr *MI, unsigned Count);
+    BitVector GetRenameRegisters(unsigned Reg);
+    bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+                                   RenameOrderType& RenameOrder,
+                                   std::map<unsigned, unsigned> &RenameMap);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..1005f102bea6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,79 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "RegisterClassInfo.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+                                 const VirtRegMap &VRM,
+                                 const RegisterClassInfo &RegClassInfo)
+  : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) {
+  const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+  std::pair<unsigned, unsigned> HintPair =
+    VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+  // HintPair.second is a register, phys or virt.
+  Hint = HintPair.second;
+
+  // Translate to physreg, or 0 if not assigned yet.
+  if (TargetRegisterInfo::isVirtualRegister(Hint))
+    Hint = VRM.getPhys(Hint);
+
+  // The first hint pair component indicates a target-specific hint.
+  if (HintPair.first) {
+    const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
+    // The remaining allocation order may depend on the hint.
+    ArrayRef<unsigned> Order =
+      TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
+                                VRM.getMachineFunction());
+    if (Order.empty())
+      return;
+
+    // Copy the allocation order with reserved registers removed.
+    OwnedBegin = true;
+    unsigned *P = new unsigned[Order.size()];
+    Begin = P;
+    for (unsigned i = 0; i != Order.size(); ++i)
+      if (!RCI.isReserved(Order[i]))
+        *P++ = Order[i];
+    End = P;
+
+    // Target-dependent hints require resolution.
+    Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint,
+                                   VRM.getMachineFunction());
+  } else {
+    // If there is no hint or just a normal hint, use the cached allocation
+    // order from RegisterClassInfo.
+    ArrayRef<unsigned> O = RCI.getOrder(RC);
+    Begin = O.begin();
+    End = O.end();
+  }
+
+  // The hint must be a valid physreg for allocation.
+  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+               !RC->contains(Hint) || RCI.isReserved(Hint)))
+    Hint = 0;
+}
+
+AllocationOrder::~AllocationOrder() {
+  if (OwnedBegin)
+    delete [] Begin;
+}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..d1e48a1f2e96
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,73 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class RegisterClassInfo;
+class VirtRegMap;
+
+class AllocationOrder {
+  const unsigned *Begin;
+  const unsigned *End;
+  const unsigned *Pos;
+  const RegisterClassInfo &RCI;
+  unsigned Hint;
+  bool OwnedBegin;
+public:
+
+  /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+  /// @param VirtReg      Virtual register to allocate for.
+  /// @param VRM          Virtual register map for function.
+  /// @param ReservedRegs Set of reserved registers as returned by
+  ///        TargetRegisterInfo::getReservedRegs().
+  AllocationOrder(unsigned VirtReg,
+                  const VirtRegMap &VRM,
+                  const RegisterClassInfo &RegClassInfo);
+
+  ~AllocationOrder();
+
+  /// next - Return the next physical register in the allocation order, or 0.
+  /// It is safe to call next again after it returned 0.
+  /// It will keep returning 0 until rewind() is called.
+  unsigned next() {
+    // First take the hint.
+    if (!Pos) {
+      Pos = Begin;
+      if (Hint)
+        return Hint;
+    }
+    // Then look at the order from TRI.
+    while (Pos != End) {
+      unsigned Reg = *Pos++;
+      if (Reg != Hint)
+        return Reg;
+    }
+    return 0;
+  }
+
+  /// rewind - Start over from the beginning.
+  void rewind() { Pos = 0; }
+
+  /// isHint - Return true if PhysReg is a preferred register.
+  bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
new file mode 100644
index 000000000000..125e64196f15
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,303 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty->isVoidTy())
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+  if (GV && GV->getName() == "llvm.eh.catch.all.value") {
+    assert(GV->hasInitializer() &&
+           "The EH catch-all value must have an initializer");
+    Value *Init = GV->getInitializer();
+    GV = dyn_cast<GlobalVariable>(Init);
+    if (!GV) V = cast<ConstantPointerNull>(Init);
+  }
+
+  assert((GV || isa<ConstantPointerNull>(V)) &&
+         "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
+                                const TargetLowering &TLI) {
+  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+    InlineAsm::ConstraintInfo &CI = CInfos[i];
+    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+      if (CType == TargetLowering::C_Memory)
+        return true;
+    }
+
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
+  }
+
+  return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+  ISD::CondCode FPC, FOC;
+  switch (Pred) {
+  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
+  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
+  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+  default:
+    llvm_unreachable("Invalid FCmp predicate opcode!");
+    FOC = FPC = ISD::SETFALSE;
+    break;
+  }
+  if (NoNaNsFPMath)
+    return FOC;
+  else
+    return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+  switch (Pred) {
+  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
+  case ICmpInst::ICMP_NE:  return ISD::SETNE;
+  case ICmpInst::ICMP_SLE: return ISD::SETLE;
+  case ICmpInst::ICMP_ULE: return ISD::SETULE;
+  case ICmpInst::ICMP_SGE: return ISD::SETGE;
+  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+  case ICmpInst::ICMP_SLT: return ISD::SETLT;
+  case ICmpInst::ICMP_ULT: return ISD::SETULT;
+  case ICmpInst::ICMP_SGT: return ISD::SETGT;
+  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+  default:
+    llvm_unreachable("Invalid ICmp predicate opcode!");
+    return ISD::SETNE;
+  }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+                                const TargetLowering &TLI) {
+  const Instruction *I = CS.getInstruction();
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+
+  // The block must end in a return statement or unreachable.
+  //
+  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+  // an unreachable, for now. The way tailcall optimization is currently
+  // implemented means it will add an epilogue followed by a jump. That is
+  // not profitable. Also, if the callee is a special function (e.g.
+  // longjmp on x86), it can end up causing miscompilation that has not
+  // been fully understood.
+  if (!Ret &&
+      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      // Debug info intrinsics do not get in the way of tail call optimization.
+      if (isa<DbgInfoIntrinsic>(BBI))
+        continue;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  const Function *F = ExitBB->getParent();
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (U->getOperand(0)->getType()->isPointerTy() &&
+          U->getType()->isPointerTy())))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
+
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                                const TargetLowering &TLI) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if (CallerRetAttr & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Check if the only use is a function return node.
+  return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 000000000000..df47f984d578
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,71 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+/// AntiDepBreaker - This class works into conjunction with the
+/// post-RA scheduler to rename registers to break register
+/// anti-dependencies.
+class AntiDepBreaker {
+public:
+  typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > 
+    DbgValueVector;
+
+  virtual ~AntiDepBreaker();
+
+  /// Start - Initialize anti-dep breaking for a new basic block.
+  virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+  /// BreakAntiDependencies - Identifiy anti-dependencies within a
+  /// basic-block region and break them by renaming registers. Return
+  /// the number of anti-dependencies broken.
+  ///
+  virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                         MachineBasicBlock::iterator Begin,
+                                         MachineBasicBlock::iterator End,
+                                         unsigned InsertPosIndex,
+                                         DbgValueVector &DbgValues) = 0;
+  
+  /// Observe - Update liveness information to account for the current
+  /// instruction, which will not be scheduled.
+  ///
+  virtual void Observe(MachineInstr *MI, unsigned Count,
+                       unsigned InsertPosIndex) =0;
+  
+  /// Finish - Finish anti-dep breaking for a basic block.
+  virtual void FinishBlock() =0;
+
+  /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+  /// other machine instruction to use NewReg.
+  void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
+    assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
+    if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg)
+      MI->getOperand(0).setReg(NewReg);
+  }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 000000000000..5861fa4817f6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,87 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+ARMException::ARMException(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+    {}
+
+ARMException::~ARMException() {}
+
+void ARMException::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void ARMException::BeginFunction(const MachineFunction *MF) {
+  Asm->OutStreamer.EmitFnStart();
+  if (Asm->MF->getFunction()->needsUnwindTableEntry())
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void ARMException::EndFunction() {
+  if (!Asm->MF->getFunction()->needsUnwindTableEntry())
+    Asm->OutStreamer.EmitCantUnwind();
+  else {
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                  Asm->getFunctionNumber()));
+
+    // Emit references to personality.
+    if (const Function * Personality =
+        MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+      MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+      Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+      Asm->OutStreamer.EmitPersonality(PerSym);
+    }
+
+    // Map all labels and get rid of any dead landing pads.
+    MMI->TidyLandingPads();
+
+    Asm->OutStreamer.EmitHandlerData();
+
+    // Emit actual exception table
+    EmitExceptionTable();
+  }
+
+  Asm->OutStreamer.EmitFnEnd();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 000000000000..7f314eed3ae6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,1996 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+  if (P == 0)
+    P = new gcp_map_type();
+  return *(gcp_map_type*)P;
+}
+
+
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form.  This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+                                   unsigned InBits = 0) {
+  unsigned NumBits = 0;
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    NumBits = TD.getPreferredAlignmentLog(GVar);
+
+  // If InBits is specified, round it to it.
+  if (InBits > NumBits)
+    NumBits = InBits;
+
+  // If the GV has a specified alignment, take it into account.
+  if (GV->getAlignment() == 0)
+    return NumBits;
+
+  unsigned GVAlign = Log2_32(GV->getAlignment());
+
+  // If the GVAlign is larger than NumBits, or if we are required to obey
+  // NumBits because the GV has an assigned section, obey it.
+  if (GVAlign > NumBits || GV->hasSection())
+    NumBits = GVAlign;
+  return NumBits;
+}
+
+
+
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+  : MachineFunctionPass(ID),
+    TM(tm), MAI(tm.getMCAsmInfo()),
+    OutContext(Streamer.getContext()),
+    OutStreamer(Streamer),
+    LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
+  DD = 0; DE = 0; MMI = 0; LI = 0;
+  GCMetadataPrinters = 0;
+  VerboseAsm = Streamer.isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+  assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+
+  if (GCMetadataPrinters != 0) {
+    gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+
+    for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
+      delete I->second;
+    delete &GCMap;
+    GCMetadataPrinters = 0;
+  }
+
+  delete &OutStreamer;
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+  return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+  return TM.getTargetLowering()->getObjFileLowering();
+}
+
+
+/// getTargetData - Return information about data layout.
+const TargetData &AsmPrinter::getTargetData() const {
+  return *TM.getTargetData();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+  return OutStreamer.getCurrentSection();
+}
+
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<MachineModuleInfo>();
+  AU.addRequired<GCModuleInfo>();
+  if (isVerbose())
+    AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MMI->AnalyzeModule(M);
+
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+    .Initialize(OutContext, TM);
+
+  Mang = new Mangler(OutContext, *TM.getTargetData());
+
+  // Allow the target to emit any magic that it wants at the start of the file.
+  EmitStartOfAsmFile(M);
+
+  // Very minimal debug info. It is ignored if we emit actual debug info. If we
+  // don't, this at least helps the user find where a global came from.
+  if (MAI->hasSingleParameterDotFile()) {
+    // .file "foo.c"
+    OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+  }
+
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+  for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+    if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+      MP->beginAssembly(*this);
+
+  // Emit module-level inline asm if it exists.
+  if (!M.getModuleInlineAsm().empty()) {
+    OutStreamer.AddComment("Start of file scope inline assembly");
+    OutStreamer.AddBlankLine();
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+    OutStreamer.AddComment("End of file scope inline assembly");
+    OutStreamer.AddBlankLine();
+  }
+
+  if (MAI->doesSupportDebugInformation())
+    DD = new DwarfDebug(this, &M);
+
+  switch (MAI->getExceptionHandlingType()) {
+  case ExceptionHandling::None:
+    return false;
+  case ExceptionHandling::SjLj:
+  case ExceptionHandling::DwarfCFI:
+    DE = new DwarfCFIException(this);
+    return false;
+  case ExceptionHandling::ARM:
+    DE = new ARMException(this);
+    return false;
+  case ExceptionHandling::Win64:
+    DE = new Win64Exception(this);
+    return false;
+  }
+
+  llvm_unreachable("Unknown exception type.");
+}
+
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+  switch ((GlobalValue::LinkageTypes)Linkage) {
+  case GlobalValue::CommonLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::LinkerPrivateWeakLinkage:
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    if (MAI->getWeakDefDirective() != 0) {
+      // .globl _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+      if ((GlobalValue::LinkageTypes)Linkage !=
+          GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+        // .weak_definition _foo
+        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+      else
+        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+    } else if (MAI->getLinkOnceDirective() != 0) {
+      // .globl _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      //NOTE: linkonce is handled by the section the symbol was assigned to.
+    } else {
+      // .weak _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+    }
+    break;
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::AppendingLinkage:
+    // FIXME: appending linkage variables should go into a section of
+    // their name or something.  For now, just emit them as external.
+  case GlobalValue::ExternalLinkage:
+    // If external or appending, declare as a global symbol.
+    // .globl _foo
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    break;
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+    break;
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+}
+
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  if (GV->hasInitializer()) {
+    // Check to see if this is a special global used by LLVM, if so, emit it.
+    if (EmitSpecialLLVMGlobal(GV))
+      return;
+
+    if (isVerbose()) {
+      WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+                     /*PrintType=*/false, GV->getParent());
+      OutStreamer.GetCommentOS() << '\n';
+    }
+  }
+
+  MCSymbol *GVSym = Mang->getSymbol(GV);
+  EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+
+  if (!GV->hasInitializer())   // External globals require no extra code.
+    return;
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+
+  SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+  const TargetData *TD = TM.getTargetData();
+  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+
+  // If the alignment is specified, we *must* obey it.  Overaligning a global
+  // with a specified alignment is a prompt way to break globals emitted to
+  // sections and expected to be contiguous (e.g. ObjC metadata).
+  unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
+
+  // Handle common and BSS local symbols (.lcomm).
+  if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+    if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
+
+    // Handle common symbols.
+    if (GVKind.isCommon()) {
+      unsigned Align = 1 << AlignLog;
+      if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+        Align = 0;
+
+      // .comm _foo, 42, 4
+      OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+      return;
+    }
+
+    // Handle local BSS symbols.
+    if (MAI->hasMachoZeroFillDirective()) {
+      const MCSection *TheSection =
+        getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+      // .zerofill __DATA, __bss, _foo, 400, 5
+      OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+      return;
+    }
+
+    if (MAI->hasLCOMMDirective()) {
+      // .lcomm _foo, 42
+      OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
+      return;
+    }
+
+    unsigned Align = 1 << AlignLog;
+    if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+      Align = 0;
+
+    // .local _foo
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+    // .comm _foo, 42, 4
+    OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+    return;
+  }
+
+  const MCSection *TheSection =
+    getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+
+  // Handle the zerofill directive on darwin, which is a special form of BSS
+  // emission.
+  if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+    if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
+
+    // .globl _foo
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    // .zerofill __DATA, __common, _foo, 400, 5
+    OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+    return;
+  }
+
+  // Handle thread local data for mach-o which requires us to output an
+  // additional structure of data and mangle the original symbol so that we
+  // can reference it later.
+  //
+  // TODO: This should become an "emit thread local global" method on TLOF.
+  // All of this macho specific stuff should be sunk down into TLOFMachO and
+  // stuff like "TLSExtraDataSection" should no longer be part of the parent
+  // TLOF class.  This will also make it more obvious that stuff like
+  // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+  // specific code.
+  if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+    // Emit the .tbss symbol
+    MCSymbol *MangSym =
+      OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+    if (GVKind.isThreadBSS())
+      OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+    else if (GVKind.isThreadData()) {
+      OutStreamer.SwitchSection(TheSection);
+
+      EmitAlignment(AlignLog, GV);
+      OutStreamer.EmitLabel(MangSym);
+
+      EmitGlobalConstant(GV->getInitializer());
+    }
+
+    OutStreamer.AddBlankLine();
+
+    // Emit the variable struct for the runtime.
+    const MCSection *TLVSect
+      = getObjFileLowering().getTLSExtraDataSection();
+
+    OutStreamer.SwitchSection(TLVSect);
+    // Emit the linkage here.
+    EmitLinkage(GV->getLinkage(), GVSym);
+    OutStreamer.EmitLabel(GVSym);
+
+    // Three pointers in size:
+    //   - __tlv_bootstrap - used to make sure support exists
+    //   - spare pointer, used when mapped by the runtime
+    //   - pointer to mangled symbol above with initializer
+    unsigned PtrSize = TD->getPointerSizeInBits()/8;
+    OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+                          PtrSize, 0);
+    OutStreamer.EmitIntValue(0, PtrSize, 0);
+    OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+
+    OutStreamer.AddBlankLine();
+    return;
+  }
+
+  OutStreamer.SwitchSection(TheSection);
+
+  EmitLinkage(GV->getLinkage(), GVSym);
+  EmitAlignment(AlignLog, GV);
+
+  OutStreamer.EmitLabel(GVSym);
+
+  EmitGlobalConstant(GV->getInitializer());
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    // .size foo, 42
+    OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+
+  OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+  // Print out constants referenced by the function
+  EmitConstantPool();
+
+  // Print the 'header' of function.
+  const Function *F = MF->getFunction();
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  EmitVisibility(CurrentFnSym, F->getVisibility());
+
+  EmitLinkage(F->getLinkage(), CurrentFnSym);
+  EmitAlignment(MF->getAlignment(), F);
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+  if (isVerbose()) {
+    WriteAsOperand(OutStreamer.GetCommentOS(), F,
+                   /*PrintType=*/false, F->getParent());
+    OutStreamer.GetCommentOS() << '\n';
+  }
+
+  // Emit the CurrentFnSym.  This is a virtual function to allow targets to
+  // do their wild and crazy things as required.
+  EmitFunctionEntryLabel();
+
+  // If the function had address-taken blocks that got deleted, then we have
+  // references to the dangling symbols.  Emit them at the start of the function
+  // so that we don't get references to undefined symbols.
+  std::vector<MCSymbol*> DeadBlockSyms;
+  MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+  for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+    OutStreamer.AddComment("Address taken block that was later removed");
+    OutStreamer.EmitLabel(DeadBlockSyms[i]);
+  }
+
+  // Add some workaround for linkonce linkage on Cygwin\MinGW.
+  if (MAI->getLinkOnceDirective() != 0 &&
+      (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
+    // FIXME: What is this?
+    MCSymbol *FakeStub =
+      OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
+                                   CurrentFnSym->getName());
+    OutStreamer.EmitLabel(FakeStub);
+  }
+
+  // Emit pre-function debug and/or EH information.
+  if (DE) {
+    NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DE->BeginFunction(MF);
+  }
+  if (DD) {
+    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DD->beginFunction(MF);
+  }
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function.  This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+  // The function label could have already been emitted if two symbols end up
+  // conflicting due to asm renaming.  Detect this and emit an error.
+  if (CurrentFnSym->isUndefined())
+    return OutStreamer.EmitLabel(CurrentFnSym);
+
+  report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+                     "' label emitted multiple times to assembly file");
+}
+
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const TargetMachine &TM = MF->getTarget();
+
+  // Check for spills and reloads
+  int FI;
+
+  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+
+  // We assume a single instruction only has a spill or reload, not
+  // both.
+  const MachineMemOperand *MMO;
+  if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
+      CommentOS << MMO->getSize() << "-byte Reload\n";
+    }
+  } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI))
+      CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+  } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
+      CommentOS << MMO->getSize() << "-byte Spill\n";
+    }
+  } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI))
+      CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+  }
+
+  // Check for spill-induced copies
+  if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+    CommentOS << " Reload Reuse\n";
+}
+
+/// EmitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+  unsigned RegNo = MI->getOperand(0).getReg();
+  AP.OutStreamer.AddComment(Twine("implicit-def: ") +
+                            AP.TM.getRegisterInfo()->getName(RegNo));
+  AP.OutStreamer.AddBlankLine();
+}
+
+static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
+  std::string Str = "kill:";
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+    assert(Op.isReg() && "KILL instruction must have only register operands");
+    Str += ' ';
+    Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+    Str += (Op.isDef() ? "<def>" : "<kill>");
+  }
+  AP.OutStreamer.AddComment(Str);
+  AP.OutStreamer.AddBlankLine();
+}
+
+/// EmitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so.  A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+  // This code handles only the 3-operand target-independent form.
+  if (MI->getNumOperands() != 3)
+    return false;
+
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+  if (V.getContext().isSubprogram())
+    OS << DISubprogram(V.getContext()).getDisplayName() << ":";
+  OS << V.getName() << " <- ";
+
+  // Register or immediate value. Register 0 means undef.
+  if (MI->getOperand(0).isFPImm()) {
+    APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+    if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+      OS << (double)APF.convertToFloat();
+    } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+      OS << APF.convertToDouble();
+    } else {
+      // There is no good way to print long double.  Convert a copy to
+      // double.  Ah well, it's only a comment.
+      bool ignored;
+      APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                  &ignored);
+      OS << "(long double) " << APF.convertToDouble();
+    }
+  } else if (MI->getOperand(0).isImm()) {
+    OS << MI->getOperand(0).getImm();
+  } else if (MI->getOperand(0).isCImm()) {
+    MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+  } else {
+    assert(MI->getOperand(0).isReg() && "Unknown operand type");
+    if (MI->getOperand(0).getReg() == 0) {
+      // Suppress offset, it is not meaningful here.
+      OS << "undef";
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      AP.OutStreamer.EmitRawText(OS.str());
+      return true;
+    }
+    OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+  }
+
+  OS << '+' << MI->getOperand(1).getImm();
+  // NOTE: Want this comment at start of line, don't emit with AddComment.
+  AP.OutStreamer.EmitRawText(OS.str());
+  return true;
+}
+
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+  if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+      MF->getFunction()->needsUnwindTableEntry())
+    return CFI_M_EH;
+
+  if (MMI->hasDebugInfo())
+    return CFI_M_Debug;
+
+  return CFI_M_None;
+}
+
+bool AsmPrinter::needsSEHMoves() {
+  return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+    MF->getFunction()->needsUnwindTableEntry();
+}
+
+void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
+  MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+
+  if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+    return;
+
+  if (needsCFIMoves() == CFI_M_None)
+    return;
+
+  MachineModuleInfo &MMI = MF->getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  bool FoundOne = false;
+  (void)FoundOne;
+  for (std::vector<MachineMove>::iterator I = Moves.begin(),
+         E = Moves.end(); I != E; ++I) {
+    if (I->getLabel() == Label) {
+      EmitCFIFrameMove(*I);
+      FoundOne = true;
+    }
+  }
+  assert(FoundOne);
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+  // Emit target-specific gunk before the function body.
+  EmitFunctionBodyStart();
+
+  bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+
+  // Print out code for the function.
+  bool HasAnyRealCode = false;
+  const MachineInstr *LastMI = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    EmitBasicBlockStart(I);
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      LastMI = II;
+
+      // Print the assembly for the instruction.
+      if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+          !II->isDebugValue()) {
+        HasAnyRealCode = true;
+        ++EmittedInsts;
+      }
+
+      if (ShouldPrintDebugScopes) {
+        NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+        DD->beginInstruction(II);
+      }
+
+      if (isVerbose())
+        EmitComments(*II, OutStreamer.GetCommentOS());
+
+      switch (II->getOpcode()) {
+      case TargetOpcode::PROLOG_LABEL:
+        emitPrologLabel(*II);
+        break;
+
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
+        break;
+      case TargetOpcode::INLINEASM:
+        EmitInlineAsm(II);
+        break;
+      case TargetOpcode::DBG_VALUE:
+        if (isVerbose()) {
+          if (!EmitDebugValueComment(II, *this))
+            EmitInstruction(II);
+        }
+        break;
+      case TargetOpcode::IMPLICIT_DEF:
+        if (isVerbose()) EmitImplicitDef(II, *this);
+        break;
+      case TargetOpcode::KILL:
+        if (isVerbose()) EmitKill(II, *this);
+        break;
+      default:
+        if (!TM.hasMCUseLoc())
+          MCLineEntry::Make(&OutStreamer, getCurrentSection());
+
+        EmitInstruction(II);
+        break;
+      }
+
+      if (ShouldPrintDebugScopes) {
+        NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+        DD->endInstruction(II);
+      }
+    }
+  }
+
+  // If the last instruction was a prolog label, then we have a situation where
+  // we emitted a prolog but no function body. This results in the ending prolog
+  // label equaling the end of function label and an invalid "row" in the
+  // FDE. We need to emit a noop in this situation so that the FDE's rows are
+  // valid.
+  bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
+  // If the function is empty and the object file uses .subsections_via_symbols,
+  // then we need to emit *something* to the function body to prevent the
+  // labels from collapsing together.  Just emit a noop.
+  if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+    MCInst Noop;
+    TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+    if (Noop.getOpcode()) {
+      OutStreamer.AddComment("avoids zero-length function");
+      OutStreamer.EmitInstruction(Noop);
+    } else  // Target not mc-ized yet.
+      OutStreamer.EmitRawText(StringRef("\tnop\n"));
+  }
+
+  // Emit target-specific gunk after the function body.
+  EmitFunctionBodyEnd();
+
+  // If the target wants a .size directive for the size of the function, emit
+  // it.
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    // Create a symbol for the end of function, so we can get the size as
+    // difference between the function label and the temp label.
+    MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
+    OutStreamer.EmitLabel(FnEndLabel);
+
+    const MCExpr *SizeExp =
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+                              MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+                              OutContext);
+    OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+  }
+
+  // Emit post-function debug information.
+  if (DD) {
+    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DD->endFunction(MF);
+  }
+  if (DE) {
+    NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DE->EndFunction();
+  }
+  MMI->EndFunction();
+
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo();
+
+  OutStreamer.AddBlankLine();
+}
+
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+  // Target specific DBG_VALUE instructions are handled by each target.
+  return MachineLocation();
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+
+  for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+       *SR && Reg < 0; ++SR) {
+    Reg = TRI->getDwarfRegNum(*SR, false);
+    // FIXME: Get the bit range this register uses of the superregister
+    // so that we can produce a DW_OP_bit_piece
+  }
+
+  // FIXME: Handle cases like a super register being encoded as
+  // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
+
+  // FIXME: We have no reasonable way of handling errors in here. The
+  // caller might be in the middle of an dwarf expression. We should
+  // probably assert that Reg >= 0 once debug info generation is more mature.
+
+  if (int Offset =  MLoc.getOffset()) {
+    if (Reg < 32) {
+      OutStreamer.AddComment(
+        dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+      EmitInt8(dwarf::DW_OP_breg0 + Reg);
+    } else {
+      OutStreamer.AddComment("DW_OP_bregx");
+      EmitInt8(dwarf::DW_OP_bregx);
+      OutStreamer.AddComment(Twine(Reg));
+      EmitULEB128(Reg);
+    }
+    EmitSLEB128(Offset);
+  } else {
+    if (Reg < 32) {
+      OutStreamer.AddComment(
+        dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+      EmitInt8(dwarf::DW_OP_reg0 + Reg);
+    } else {
+      OutStreamer.AddComment("DW_OP_regx");
+      EmitInt8(dwarf::DW_OP_regx);
+      OutStreamer.AddComment(Twine(Reg));
+      EmitULEB128(Reg);
+    }
+  }
+
+  // FIXME: Produce a DW_OP_bit_piece if we used a superregister
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+  // Emit global variables.
+  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    EmitGlobalVariable(I);
+
+  // Emit visibility info for declarations
+  for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    const Function &F = *I;
+    if (!F.isDeclaration())
+      continue;
+    GlobalValue::VisibilityTypes V = F.getVisibility();
+    if (V == GlobalValue::DefaultVisibility)
+      continue;
+
+    MCSymbol *Name = Mang->getSymbol(&F);
+    EmitVisibility(Name, V, false);
+  }
+
+  // Finalize debug and EH information.
+  if (DE) {
+    {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+      DE->EndModule();
+    }
+    delete DE; DE = 0;
+  }
+  if (DD) {
+    {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+      DD->endModule();
+    }
+    delete DD; DD = 0;
+  }
+
+  // If the target wants to know about weak references, print them all.
+  if (MAI->getWeakRefDirective()) {
+    // FIXME: This is not lazy, it would be nice to only print weak references
+    // to stuff that is actually used.  Note that doing so would require targets
+    // to notice uses in operands (due to constant exprs etc).  This should
+    // happen with the MC stuff eventually.
+
+    // Print out module-level global variables here.
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      if (!I->hasExternalWeakLinkage()) continue;
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+    }
+
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+      if (!I->hasExternalWeakLinkage()) continue;
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+    }
+  }
+
+  if (MAI->hasSetDirective()) {
+    OutStreamer.AddBlankLine();
+    for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+         I != E; ++I) {
+      MCSymbol *Name = Mang->getSymbol(I);
+
+      const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
+      MCSymbol *Target = Mang->getSymbol(GV);
+
+      if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+        OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+      else if (I->hasWeakLinkage())
+        OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+      else
+        assert(I->hasLocalLinkage() && "Invalid alias linkage");
+
+      EmitVisibility(Name, I->getVisibility());
+
+      // Emit the directives as assignments aka .set:
+      OutStreamer.EmitAssignment(Name,
+                                 MCSymbolRefExpr::Create(Target, OutContext));
+    }
+  }
+
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+  for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+    if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+      MP->finishAssembly(*this);
+
+  // If we don't have any trampolines, then we don't require stack memory
+  // to be executable. Some targets have a directive to declare this.
+  Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+  if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+    if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+      OutStreamer.SwitchSection(S);
+
+  // Allow the target to emit any magic that it wants at the end of the file,
+  // after everything else has gone out.
+  EmitEndOfAsmFile(M);
+
+  delete Mang; Mang = 0;
+  MMI = 0;
+
+  OutStreamer.Finish();
+  return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+  this->MF = &MF;
+  // Get the function symbol.
+  CurrentFnSym = Mang->getSymbol(MF.getFunction());
+
+  if (isVerbose())
+    LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+  // SectionCPs - Keep track the alignment, constpool entries per Section.
+  struct SectionCPs {
+    const MCSection *S;
+    unsigned Alignment;
+    SmallVector<unsigned, 4> CPEs;
+    SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+  };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+  const MachineConstantPool *MCP = MF->getConstantPool();
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // Calculate sections for constant pool entries. We collect entries to go into
+  // the same section together to reduce amount of section switch statements.
+  SmallVector<SectionCPs, 4> CPSections;
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    const MachineConstantPoolEntry &CPE = CP[i];
+    unsigned Align = CPE.getAlignment();
+
+    SectionKind Kind;
+    switch (CPE.getRelocationInfo()) {
+    default: llvm_unreachable("Unknown section kind");
+    case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+    case 1:
+      Kind = SectionKind::getReadOnlyWithRelLocal();
+      break;
+    case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16();break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+    }
+
+    const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+
+    // The number of sections are small, just do a linear search from the
+    // last section to the first.
+    bool Found = false;
+    unsigned SecIdx = CPSections.size();
+    while (SecIdx != 0) {
+      if (CPSections[--SecIdx].S == S) {
+        Found = true;
+        break;
+      }
+    }
+    if (!Found) {
+      SecIdx = CPSections.size();
+      CPSections.push_back(SectionCPs(S, Align));
+    }
+
+    if (Align > CPSections[SecIdx].Alignment)
+      CPSections[SecIdx].Alignment = Align;
+    CPSections[SecIdx].CPEs.push_back(i);
+  }
+
+  // Now print stuff into the calculated sections.
+  for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+    OutStreamer.SwitchSection(CPSections[i].S);
+    EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+    unsigned Offset = 0;
+    for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+      unsigned CPI = CPSections[i].CPEs[j];
+      MachineConstantPoolEntry CPE = CP[CPI];
+
+      // Emit inter-object padding for alignment.
+      unsigned AlignMask = CPE.getAlignment() - 1;
+      unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+      OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
+
+      const Type *Ty = CPE.getType();
+      Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+      OutStreamer.EmitLabel(GetCPISymbol(CPI));
+
+      if (CPE.isMachineConstantPoolEntry())
+        EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+      else
+        EmitGlobalConstant(CPE.Val.ConstVal);
+    }
+  }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo() {
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  if (MJTI == 0) return;
+  if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  // Pick the directive to use to print the jump table entries, and switch to
+  // the appropriate section.
+  const Function *F = MF->getFunction();
+  bool JTInDiffSection = false;
+  if (// In PIC mode, we need to emit the jump table to the same section as the
+      // function body itself, otherwise the label differences won't make sense.
+      // FIXME: Need a better predicate for this: what about custom entries?
+      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+      // We should also do if the section name is NULL or function is declared
+      // in discardable section
+      // FIXME: this isn't the right predicate, should be based on the MCSection
+      // for the function.
+      F->isWeakForLinker()) {
+    OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+  } else {
+    // Otherwise, drop it in the readonly section.
+    const MCSection *ReadOnlySection =
+      getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+    OutStreamer.SwitchSection(ReadOnlySection);
+    JTInDiffSection = true;
+  }
+
+  EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+
+  for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+    // If this jump table was deleted, ignore it.
+    if (JTBBs.empty()) continue;
+
+    // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+    // .set directive for each unique entry.  This reduces the number of
+    // relocations the assembler will generate for the jump table.
+    if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+        MAI->hasSetDirective()) {
+      SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+      const TargetLowering *TLI = TM.getTargetLowering();
+      const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+      for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+        const MachineBasicBlock *MBB = JTBBs[ii];
+        if (!EmittedSets.insert(MBB)) continue;
+
+        // .set LJTSet, LBB32-base
+        const MCExpr *LHS =
+          MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+        OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+                                MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+      }
+    }
+
+    // On some targets (e.g. Darwin) we want to emit two consecutive labels
+    // before each jump table.  The first label is never referenced, but tells
+    // the assembler and linker the extents of the jump table object.  The
+    // second label is actually referenced by the code.
+    if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+      // FIXME: This doesn't have to have any specific name, just any randomly
+      // named and numbered 'l' label would work.  Simplify GetJTISymbol.
+      OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+
+    OutStreamer.EmitLabel(GetJTISymbol(JTI));
+
+    for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+      EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+  }
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                    const MachineBasicBlock *MBB,
+                                    unsigned UID) const {
+  assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+  const MCExpr *Value = 0;
+  switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Inline:
+    llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+  case MachineJumpTableInfo::EK_Custom32:
+    Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+                                                              OutContext);
+    break;
+  case MachineJumpTableInfo::EK_BlockAddress:
+    // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+    //     .word LBB123
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+    break;
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+    // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+    // with a relocation as gp-relative, e.g.:
+    //     .gprel32 LBB123
+    MCSymbol *MBBSym = MBB->getSymbol();
+    OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    return;
+  }
+
+  case MachineJumpTableInfo::EK_LabelDifference32: {
+    // EK_LabelDifference32 - Each entry is the address of the block minus
+    // the address of the jump table.  This is used for PIC jump tables where
+    // gprel32 is not supported.  e.g.:
+    //      .word LBB123 - LJTI1_2
+    // If the .set directive is supported, this is emitted as:
+    //      .set L4_5_set_123, LBB123 - LJTI1_2
+    //      .word L4_5_set_123
+
+    // If we have emitted set directives for the jump table entries, print
+    // them rather than the entries themselves.  If we're emitting PIC, then
+    // emit the table entries as differences between two text section labels.
+    if (MAI->hasSetDirective()) {
+      // If we used .set, reference the .set's symbol.
+      Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+                                      OutContext);
+      break;
+    }
+    // Otherwise, use the difference as the jump table entry.
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+    const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+    Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+    break;
+  }
+  }
+
+  assert(Value && "Unknown entry kind!");
+
+  unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+  OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM.  If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+  if (GV->getName() == "llvm.used") {
+    if (MAI->hasNoDeadStrip())    // No need to emit this at all.
+      EmitLLVMUsedList(GV->getInitializer());
+    return true;
+  }
+
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
+  if (GV->getSection() == "llvm.metadata" ||
+      GV->hasAvailableExternallyLinkage())
+    return true;
+
+  if (!GV->hasAppendingLinkage()) return false;
+
+  assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+  if (GV->getName() == "llvm.global_ctors") {
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
+    EmitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer());
+
+    if (TM.getRelocationModel() == Reloc::Static &&
+        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+      StringRef Sym(".constructors_used");
+      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+                                      MCSA_Reference);
+    }
+    return true;
+  }
+
+  if (GV->getName() == "llvm.global_dtors") {
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
+    EmitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer());
+
+    if (TM.getRelocationModel() == Reloc::Static &&
+        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+      StringRef Sym(".destructors_used");
+      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+                                      MCSA_Reference);
+    }
+    return true;
+  }
+
+  return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+  // Should be an array of 'i8*'.
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+  if (InitList == 0) return;
+
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    const GlobalValue *GV =
+      dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+    if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
+  }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(const Constant *List) {
+  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // init priority, which we ignore.
+  if (!isa<ConstantArray>(List)) return;
+  const ConstantArray *InitList = cast<ConstantArray>(List);
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      // Emit the function pointer.
+      EmitGlobalConstant(CS->getOperand(1));
+    }
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+  OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+  OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+  OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
+}
+
+/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+/// in bytes of the directive is specified by Size and Hi/Lo specify the
+/// labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+                                     unsigned Size) const {
+  // Get the Hi-Lo expression.
+  const MCExpr *Diff =
+    MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+
+  if (!MAI->hasSetDirective()) {
+    OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
+    return;
+  }
+
+  // Otherwise, emit with .set (aka assignment).
+  MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+  OutStreamer.EmitAssignment(SetLabel, Diff);
+  OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
+}
+
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+                                           const MCSymbol *Lo, unsigned Size)
+  const {
+
+  // Emit Hi+Offset - Lo
+  // Get the Hi+Offset expression.
+  const MCExpr *Plus =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
+                            MCConstantExpr::Create(Offset, OutContext),
+                            OutContext);
+
+  // Get the Hi+Offset-Lo expression.
+  const MCExpr *Diff =
+    MCBinaryExpr::CreateSub(Plus,
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+
+  if (!MAI->hasSetDirective())
+    OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+  else {
+    // Otherwise, emit with .set (aka assignment).
+    MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+    OutStreamer.EmitAssignment(SetLabel, Diff);
+    OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+  }
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+                                      unsigned Size)
+  const {
+
+  // Emit Label+Offset
+  const MCExpr *Plus =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
+                            MCConstantExpr::Create(Offset, OutContext),
+                            OutContext);
+
+  OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary.  For example, if you pass in 3 here, you will get an 8
+// byte alignment.  If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
+
+  if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
+
+  if (getCurrentSection()->getKind().isText())
+    OutStreamer.EmitCodeAlignment(1 << NumBits);
+  else
+    OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
+  MCContext &Ctx = AP.OutContext;
+
+  if (CV->isNullValue() || isa<UndefValue>(CV))
+    return MCConstantExpr::Create(0, Ctx);
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+    return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+    return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+    return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  if (CE == 0) {
+    llvm_unreachable("Unknown constant value to lower!");
+    return MCConstantExpr::Create(0, Ctx);
+  }
+
+  switch (CE->getOpcode()) {
+  default:
+    // If the code isn't optimized, there may be outstanding folding
+    // opportunities. Attempt to fold the expression using TargetData as a
+    // last resort before giving up.
+    if (Constant *C =
+          ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+      if (C != CE)
+        return LowerConstant(C, AP);
+
+    // Otherwise report the problem to the user.
+    {
+      std::string S;
+      raw_string_ostream OS(S);
+      OS << "Unsupported expression in static initializer: ";
+      WriteAsOperand(OS, CE, /*PrintType=*/false,
+                     !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+      report_fatal_error(OS.str());
+    }
+    return MCConstantExpr::Create(0, Ctx);
+  case Instruction::GetElementPtr: {
+    const TargetData &TD = *AP.TM.getTargetData();
+    // Generate a symbolic expression for the byte address
+    const Constant *PtrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0],
+                                         IdxVec.size());
+
+    const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+    if (Offset == 0)
+      return Base;
+
+    // Truncate/sext the offset to the pointer size.
+    if (TD.getPointerSizeInBits() != 64) {
+      int SExtAmount = 64-TD.getPointerSizeInBits();
+      Offset = (Offset << SExtAmount) >> SExtAmount;
+    }
+
+    return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+                                   Ctx);
+  }
+
+  case Instruction::Trunc:
+    // We emit the value and depend on the assembler to truncate the generated
+    // expression properly.  This is important for differences between
+    // blockaddress labels.  Since the two labels are in the same function, it
+    // is reasonable to treat their delta as a 32-bit value.
+    // FALL THROUGH.
+  case Instruction::BitCast:
+    return LowerConstant(CE->getOperand(0), AP);
+
+  case Instruction::IntToPtr: {
+    const TargetData &TD = *AP.TM.getTargetData();
+    // Handle casts to pointers by changing them into casts to the appropriate
+    // integer type.  This promotes constant folding and simplifies this code.
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return LowerConstant(Op, AP);
+  }
+
+  case Instruction::PtrToInt: {
+    const TargetData &TD = *AP.TM.getTargetData();
+    // Support only foldable casts to/from pointers that can be eliminated by
+    // changing the pointer to the appropriately sized integer type.
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+
+    const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot equal to the size of the pointer.
+    if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+      return OpExpr;
+
+    // Otherwise the pointer is smaller than the resultant integer, mask off
+    // the high bits so we are sure to get a proper truncation if the input is
+    // a constant expr.
+    unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+    const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+    return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+  }
+
+  // The MC library also has a right-shift operator, but it isn't consistently
+  // signed or unsigned between different targets.
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::SRem:
+  case Instruction::Shl:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+    const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+    switch (CE->getOpcode()) {
+    default: llvm_unreachable("Unknown binary operator constant cast expr");
+    case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+    case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+    case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+    case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+    case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+    case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+    case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+    case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+    case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+    }
+  }
+  }
+}
+
+static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+                                   AsmPrinter &AP);
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+                                    AsmPrinter &AP) {
+  if (AddrSpace != 0 || !CA->isString()) {
+    // Not a string.  Print the values in successive locations
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+    return;
+  }
+
+  // Otherwise, it can be emitted as .ascii.
+  SmallVector<char, 128> TmpVec;
+  TmpVec.reserve(CA->getNumOperands());
+  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+    TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+
+  AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantVector(const ConstantVector *CV,
+                                     unsigned AddrSpace, AsmPrinter &AP) {
+  for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+    EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+  const TargetData &TD = *AP.TM.getTargetData();
+  unsigned Size = TD.getTypeAllocSize(CV->getType());
+  unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+                         CV->getType()->getNumElements();
+  if (unsigned Padding = Size - EmittedSize)
+    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+}
+
+static void EmitGlobalConstantStruct(const ConstantStruct *CS,
+                                     unsigned AddrSpace, AsmPrinter &AP) {
+  // Print the fields in successive locations. Pad to align if needed!
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CS->getType());
+  const StructLayout *Layout = TD->getStructLayout(CS->getType());
+  uint64_t SizeSoFar = 0;
+  for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+    const Constant *Field = CS->getOperand(i);
+
+    // Check if padding is needed and insert one or more 0s.
+    uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+    uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+                        - Layout->getElementOffset(i)) - FieldSize;
+    SizeSoFar += FieldSize + PadSize;
+
+    // Now print the actual field value.
+    EmitGlobalConstantImpl(Field, AddrSpace, AP);
+
+    // Insert padding - this may include padding to increase the size of the
+    // current field up to the ABI size (if the struct is not packed) as well
+    // as padding to ensure that the next field starts at the right offset.
+    AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+  }
+  assert(SizeSoFar == Layout->getSizeInBytes() &&
+         "Layout of constant struct may be incorrect!");
+}
+
+static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+                                 AsmPrinter &AP) {
+  // FP Constants are printed as integer constants to avoid losing
+  // precision.
+  if (CFP->getType()->isDoubleTy()) {
+    if (AP.isVerbose()) {
+      double Val = CFP->getValueAPF().convertToDouble();
+      AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+    }
+
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+    return;
+  }
+
+  if (CFP->getType()->isFloatTy()) {
+    if (AP.isVerbose()) {
+      float Val = CFP->getValueAPF().convertToFloat();
+      AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+    }
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
+    return;
+  }
+
+  if (CFP->getType()->isX86_FP80Ty()) {
+    // all long double variants are printed as hex
+    // API needed to prevent premature destruction
+    APInt API = CFP->getValueAPF().bitcastToAPInt();
+    const uint64_t *p = API.getRawData();
+    if (AP.isVerbose()) {
+      // Convert to double so we can print the approximate val as a comment.
+      APFloat DoubleVal = CFP->getValueAPF();
+      bool ignored;
+      DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                        &ignored);
+      AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
+        << DoubleVal.convertToDouble() << '\n';
+    }
+
+    if (AP.TM.getTargetData()->isBigEndian()) {
+      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+    } else {
+      AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+    }
+
+    // Emit the tail padding for the long double.
+    const TargetData &TD = *AP.TM.getTargetData();
+    AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+                             TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+    return;
+  }
+
+  assert(CFP->getType()->isPPC_FP128Ty() &&
+         "Floating point constant type not handled");
+  // All long double variants are printed as hex
+  // API needed to prevent premature destruction.
+  APInt API = CFP->getValueAPF().bitcastToAPInt();
+  const uint64_t *p = API.getRawData();
+  if (AP.TM.getTargetData()->isBigEndian()) {
+    AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+    AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+  } else {
+    AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+    AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+  }
+}
+
+static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
+                                       unsigned AddrSpace, AsmPrinter &AP) {
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned BitWidth = CI->getBitWidth();
+  assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
+
+  // We don't expect assemblers to support integer data directives
+  // for more than 64 bits, so we emit the data in at most 64-bit
+  // quantities at a time.
+  const uint64_t *RawData = CI->getValue().getRawData();
+  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+    uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+  }
+}
+
+static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+                                   AsmPrinter &AP) {
+  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
+    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    return AP.OutStreamer.EmitZeros(Size, AddrSpace);
+  }
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    switch (Size) {
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+      AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+      return;
+    default:
+      EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
+      return;
+    }
+  }
+
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+
+  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+    return EmitGlobalConstantFP(CFP, AddrSpace, AP);
+
+  if (isa<ConstantPointerNull>(CV)) {
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
+    return;
+  }
+
+  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+    return EmitGlobalConstantVector(V, AddrSpace, AP);
+
+  // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
+  // thread the streamer with EmitValue.
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
+                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
+                           AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  if (Size)
+    EmitGlobalConstantImpl(CV, AddrSpace, *this);
+  else if (MAI->hasSubsectionsViaSymbols()) {
+    // If the global has zero size, emit a single byte so that two labels don't
+    // look like they are at the same location.
+    OutStreamer.EmitIntValue(0, 1, AddrSpace);
+  }
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  // Target doesn't support this yet!
+  llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+  if (Offset > 0)
+    OS << '+' << Offset;
+  else if (Offset < 0)
+    OS << Offset;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+/// temporary label with the specified stem and unique ID.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
+  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+                                      Name + Twine(ID));
+}
+
+/// GetTempSymbol - Return an assembler temporary label with the specified
+/// stem.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
+  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+                                      Name);
+}
+
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+  return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+  return MMI->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+  return OutContext.GetOrCreateSymbol
+    (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+     + "_" + Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+  return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+  return OutContext.GetOrCreateSymbol
+  (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+   Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+/// global value name as its base, with the specified suffix, and where the
+/// symbol is forced to have private linkage if ForcePrivate is true.
+MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+                                                   StringRef Suffix,
+                                                   bool ForcePrivate) const {
+  SmallString<60> NameStr;
+  Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
+  NameStr.append(Suffix.begin(), Suffix.end());
+  return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+/// ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+  SmallString<60> NameStr;
+  Mang->getNameWithPrefix(NameStr, Sym);
+  return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+                                   unsigned FunctionNumber) {
+  if (Loop == 0) return;
+  PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+  OS.indent(Loop->getLoopDepth()*2)
+    << "Parent Loop BB" << FunctionNumber << "_"
+    << Loop->getHeader()->getNumber()
+    << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+                                  unsigned FunctionNumber) {
+  // Add child loop information
+  for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
+    OS.indent((*CL)->getLoopDepth()*2)
+      << "Child Loop BB" << FunctionNumber << "_"
+      << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+      << '\n';
+    PrintChildLoopComment(OS, *CL, FunctionNumber);
+  }
+}
+
+/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+                                       const MachineLoopInfo *LI,
+                                       const AsmPrinter &AP) {
+  // Add loop depth information
+  const MachineLoop *Loop = LI->getLoopFor(&MBB);
+  if (Loop == 0) return;
+
+  MachineBasicBlock *Header = Loop->getHeader();
+  assert(Header && "No header for loop");
+
+  // If this block is not a loop header, just print out what is the loop header
+  // and return.
+  if (Header != &MBB) {
+    AP.OutStreamer.AddComment("  in Loop: Header=BB" +
+                              Twine(AP.getFunctionNumber())+"_" +
+                              Twine(Loop->getHeader()->getNumber())+
+                              " Depth="+Twine(Loop->getLoopDepth()));
+    return;
+  }
+
+  // Otherwise, it is a loop header.  Print out information about child and
+  // parent loops.
+  raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+
+  PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+  OS << "=>";
+  OS.indent(Loop->getLoopDepth()*2-2);
+
+  OS << "This ";
+  if (Loop->empty())
+    OS << "Inner ";
+  OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+  PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+  // Emit an alignment directive for this block, if needed.
+  if (unsigned Align = MBB->getAlignment())
+    EmitAlignment(Log2_32(Align));
+
+  // If the block has its address taken, emit any labels that were used to
+  // reference the block.  It is possible that there is more than one label
+  // here, because multiple LLVM BB's may have been RAUW'd to this block after
+  // the references were generated.
+  if (MBB->hasAddressTaken()) {
+    const BasicBlock *BB = MBB->getBasicBlock();
+    if (isVerbose())
+      OutStreamer.AddComment("Block address taken");
+
+    std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+    for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+      OutStreamer.EmitLabel(Syms[i]);
+  }
+
+  // Print the main label for the block.
+  if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      if (const BasicBlock *BB = MBB->getBasicBlock())
+        if (BB->hasName())
+          OutStreamer.AddComment("%" + BB->getName());
+
+      EmitBasicBlockLoopComments(*MBB, LI, *this);
+
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
+                              Twine(MBB->getNumber()) + ":");
+    }
+  } else {
+    if (isVerbose()) {
+      if (const BasicBlock *BB = MBB->getBasicBlock())
+        if (BB->hasName())
+          OutStreamer.AddComment("%" + BB->getName());
+      EmitBasicBlockLoopComments(*MBB, LI, *this);
+    }
+
+    OutStreamer.EmitLabel(MBB->getSymbol());
+  }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+                                bool IsDefinition) const {
+  MCSymbolAttr Attr = MCSA_Invalid;
+
+  switch (Visibility) {
+  default: break;
+  case GlobalValue::HiddenVisibility:
+    if (IsDefinition)
+      Attr = MAI->getHiddenVisibilityAttr();
+    else
+      Attr = MAI->getHiddenDeclarationVisibilityAttr();
+    break;
+  case GlobalValue::ProtectedVisibility:
+    Attr = MAI->getProtectedVisibilityAttr();
+    break;
+  }
+
+  if (Attr != MCSA_Invalid)
+    OutStreamer.EmitSymbolAttribute(Sym, Attr);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+
+  // The predecessor has to be immediately before this block.
+  MachineBasicBlock *Pred = *PI;
+
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+
+  // Check the terminators in the previous blocks
+  for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+         IE = Pred->end(); II != IE; ++II) {
+    MachineInstr &MI = *II;
+
+    // If it is not a simple branch, we are in a table somewhere.
+    if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+      return false;
+
+    // If we are the operands of one of the branches, this is not
+    // a fall through.
+    for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+           OE = MI.operands_end(); OI != OE; ++OI) {
+      const MachineOperand& OP = *OI;
+      if (OP.isJTI())
+        return false;
+      if (OP.isMBB() && OP.getMBB() == MBB)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+  if (!S->usesMetadata())
+    return 0;
+
+  gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+  gcp_map_type::iterator GCPI = GCMap.find(S);
+  if (GCPI != GCMap.end())
+    return GCPI->second;
+
+  const char *Name = S->getName().c_str();
+
+  for (GCMetadataPrinterRegistry::iterator
+         I = GCMetadataPrinterRegistry::begin(),
+         E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+    if (strcmp(Name, I->getName()) == 0) {
+      GCMetadataPrinter *GMP = I->instantiate();
+      GMP->S = S;
+      GCMap.insert(std::make_pair(S, GMP));
+      return GMP;
+    }
+
+  report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+  return 0;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 000000000000..dd5b0e261490
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,233 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+  if (isVerbose() && Desc)
+    OutStreamer.AddComment(Desc);
+    
+  if (MAI->hasLEB128()) {
+    OutStreamer.EmitSLEB128IntValue(Value);
+    return;
+  }
+
+  // If we don't have .sleb128, emit as .bytes.
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+  
+  do {
+    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    if (IsMore) Byte |= 0x80;
+    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+  } while (IsMore);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+                             unsigned PadTo) const {
+  if (isVerbose() && Desc)
+    OutStreamer.AddComment(Desc);
+
+  // FIXME: Should we add a PadTo option to the streamer?
+  if (MAI->hasLEB128() && PadTo == 0) {
+    OutStreamer.EmitULEB128IntValue(Value); 
+    return;
+  }
+  
+  // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
+  do {
+    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+    Value >>= 7;
+    if (Value || PadTo != 0) Byte |= 0x80;
+    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+  } while (Value);
+
+  if (PadTo) {
+    if (PadTo > 1)
+      OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+    OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+  }
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void AsmPrinter::EmitCFAByte(unsigned Val) const {
+  if (isVerbose()) {
+    if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+      OutStreamer.AddComment("DW_CFA_offset + Reg (" + 
+                             Twine(Val-dwarf::DW_CFA_offset) + ")");
+    else
+      OutStreamer.AddComment(dwarf::CallFrameString(Val));
+  }
+  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+  switch (Encoding) {
+  case dwarf::DW_EH_PE_absptr: return "absptr";
+  case dwarf::DW_EH_PE_omit:   return "omit";
+  case dwarf::DW_EH_PE_pcrel:  return "pcrel";
+  case dwarf::DW_EH_PE_udata4: return "udata4";
+  case dwarf::DW_EH_PE_udata8: return "udata8";
+  case dwarf::DW_EH_PE_sdata4: return "sdata4";
+  case dwarf::DW_EH_PE_sdata8: return "sdata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+    return "indirect pcrel udata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+    return "indirect pcrel sdata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+    return "indirect pcrel udata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+    return "indirect pcrel sdata8";
+  }
+  
+  return "<unknown encoding>";
+}
+
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding.  If verbose assembly output is enabled, we output comments
+/// describing the encoding.  Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+  if (isVerbose()) {
+    if (Desc != 0)
+      OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+                             Twine(DecodeDWARFEncoding(Val)));
+    else
+      OutStreamer.AddComment(Twine("Encoding = ") +
+                             DecodeDWARFEncoding(Val));
+  }
+  
+  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return 0;
+  
+  switch (Encoding & 0x07) {
+  default: assert(0 && "Invalid encoded value.");
+  case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+  case dwarf::DW_EH_PE_udata2: return 2;
+  case dwarf::DW_EH_PE_udata4: return 4;
+  case dwarf::DW_EH_PE_udata8: return 8;
+  }
+}
+
+void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  
+  const MCExpr *Exp =
+    TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
+  OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  
+  const MCExpr *Exp =
+    TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+}
+
+/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
+/// section.  This can be done with a special directive if the target supports
+/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
+/// of the section.
+///
+/// SectionLabel is a temporary label emitted at the start of the section that
+/// Label lives in.
+void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
+                                   const MCSymbol *SectionLabel) const {
+  // On COFF targets, we have to emit the special .secrel32 directive.
+  if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
+    // FIXME: MCize.
+    OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+    return;
+  }
+  
+  // Get the section that we're referring to, based on SectionLabel.
+  const MCSection &Section = SectionLabel->getSection();
+  
+  // If Label has already been emitted, verify that it is in the same section as
+  // section label for sanity.
+  assert((!Label->isInSection() || &Label->getSection() == &Section) &&
+         "Section offset using wrong section base for label");
+  
+  // If the section in question will end up with an address of 0 anyway, we can
+  // just emit an absolute reference to save a relocation.
+  if (Section.isBaseAddressKnownZero()) {
+    OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
+    return;
+  }
+  
+  // Otherwise, emit it as a label difference from the start of the section.
+  EmitLabelDifference(Label, SectionLabel, 4);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitCFIFrameMove - Emit a frame instruction.
+void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+  const MachineLocation &Dst = Move.getDestination();
+  const MachineLocation &Src = Move.getSource();
+
+  // If advancing cfa.
+  if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+    if (Src.getReg() == MachineLocation::VirtualFP) {
+      OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+    } else {
+      // Reg + Offset
+      OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true),
+                                Src.getOffset());
+    }
+  } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+    assert(Dst.isReg() && "Machine move not supported yet.");
+    OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+  } else {
+    assert(!Dst.isReg() && "Machine move not supported yet.");
+    OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+                              Dst.getOffset());
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 000000000000..5ac455e1a1a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,419 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct SrcMgrDiagInfo {
+    const MDNode *LocInfo;
+    LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+    void *DiagContext;
+  };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it.  diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+  SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+  assert(DiagInfo && "Diagnostic context not passed down?");
+  
+  // If the inline asm had metadata associated with it, pull out a location
+  // cookie corresponding to which line the error occurred on.
+  unsigned LocCookie = 0;
+  if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+    unsigned ErrorLine = Diag.getLineNo()-1;
+    if (ErrorLine >= LocInfo->getNumOperands())
+      ErrorLine = 0;
+    
+    if (LocInfo->getNumOperands() != 0)
+      if (const ConstantInt *CI =
+          dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+        LocCookie = CI->getZExtValue();
+  }
+  
+  DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
+  assert(!Str.empty() && "Can't emit empty inline asm block");
+
+  // Remember if the buffer is nul terminated or not so we can avoid a copy.
+  bool isNullTerminated = Str.back() == 0;
+  if (isNullTerminated)
+    Str = Str.substr(0, Str.size()-1);
+
+  // If the output streamer is actually a .s file, just emit the blob textually.
+  // This is useful in case the asm parser doesn't handle something but the
+  // system assembler does.
+  if (OutStreamer.hasRawTextSupport()) {
+    OutStreamer.EmitRawText(Str);
+    return;
+  }
+
+  SourceMgr SrcMgr;
+  SrcMgrDiagInfo DiagInfo;
+
+  // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+  LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+  bool HasDiagHandler = false;
+  if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+    // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+    // to be invoked, getting DiagInfo passed into it.
+    DiagInfo.LocInfo = LocMDNode;
+    DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+    DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+    SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
+    HasDiagHandler = true;
+  }
+
+  MemoryBuffer *Buffer;
+  if (isNullTerminated)
+    Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+  else
+    Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+  // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+  OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
+                                                  OutContext, OutStreamer,
+                                                  *MAI));
+
+  // FIXME: It would be nice if we can avoid createing a new instance of
+  // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
+  // we have to watch out for asm directives which can change subtarget
+  // state. e.g. .code 16, .code 32.
+  OwningPtr<MCSubtargetInfo>
+    STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
+                                             TM.getTargetCPU(),
+                                             TM.getTargetFeatureString()));
+  OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser));
+  if (!TAP)
+    report_fatal_error("Inline asm not supported by this streamer because"
+                       " we don't have an asm parser for this target\n");
+  Parser->setTargetParser(*TAP.get());
+
+  // Don't implicitly switch to the text section before the asm.
+  int Res = Parser->Run(/*NoInitialTextSection*/ true,
+                        /*NoFinalize*/ true);
+  if (Res && !HasDiagHandler)
+    report_fatal_error("Error parsing inline asm\n");
+}
+
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+  assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+  unsigned NumOperands = MI->getNumOperands();
+
+  // Count the number of register definitions to find the asm string.
+  unsigned NumDefs = 0;
+  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+       ++NumDefs)
+    assert(NumDefs != NumOperands-2 && "No asm string?");
+
+  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+  // If this asmstr is empty, just print the #APP/#NOAPP markers.
+  // These are useful to see where empty asm's wound up.
+  if (AsmStr[0] == 0) {
+    // Don't emit the comments if writing to a .o file.
+    if (!OutStreamer.hasRawTextSupport()) return;
+
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+    return;
+  }
+
+  // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+
+  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+  // it.
+  unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
+  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+    if (MI->getOperand(i-1).isMetadata() &&
+        (LocMD = MI->getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
+  }
+
+  // Emit the inline asm to a temporary string so we can emit it through
+  // EmitInlineAsm.
+  SmallString<256> StringData;
+  raw_svector_ostream OS(StringData);
+
+  OS << '\t';
+
+  // The variant of the current asmprinter.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+
+  int CurVariant = -1;            // The number of the {.|.|.} region we are in.
+  const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+  while (*LastEmitted) {
+    switch (*LastEmitted) {
+    default: {
+      // Not a special case, emit the string section literally.
+      const char *LiteralEnd = LastEmitted+1;
+      while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+             *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+        ++LiteralEnd;
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+        OS.write(LastEmitted, LiteralEnd-LastEmitted);
+      LastEmitted = LiteralEnd;
+      break;
+    }
+    case '\n':
+      ++LastEmitted;   // Consume newline character.
+      OS << '\n';      // Indent code with newline.
+      break;
+    case '$': {
+      ++LastEmitted;   // Consume '$' character.
+      bool Done = true;
+
+      // Handle escapes.
+      switch (*LastEmitted) {
+      default: Done = false; break;
+      case '$':     // $$ -> $
+        if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+          OS << '$';
+        ++LastEmitted;  // Consume second '$' character.
+        break;
+      case '(':             // $( -> same as GCC's { character.
+        ++LastEmitted;      // Consume '(' character.
+        if (CurVariant != -1)
+          report_fatal_error("Nested variants found in inline asm string: '" +
+                             Twine(AsmStr) + "'");
+        CurVariant = 0;     // We're in the first variant now.
+        break;
+      case '|':
+        ++LastEmitted;  // consume '|' character.
+        if (CurVariant == -1)
+          OS << '|';       // this is gcc's behavior for | outside a variant
+        else
+          ++CurVariant;   // We're in the next variant.
+        break;
+      case ')':         // $) -> same as GCC's } char.
+        ++LastEmitted;  // consume ')' character.
+        if (CurVariant == -1)
+          OS << '}';     // this is gcc's behavior for } outside a variant
+        else
+          CurVariant = -1;
+        break;
+      }
+      if (Done) break;
+
+      bool HasCurlyBraces = false;
+      if (*LastEmitted == '{') {     // ${variable}
+        ++LastEmitted;               // Consume '{' character.
+        HasCurlyBraces = true;
+      }
+
+      // If we have ${:foo}, then this is not a real operand reference, it is a
+      // "magic" string reference, just like in .td files.  Arrange to call
+      // PrintSpecial.
+      if (HasCurlyBraces && *LastEmitted == ':') {
+        ++LastEmitted;
+        const char *StrStart = LastEmitted;
+        const char *StrEnd = strchr(StrStart, '}');
+        if (StrEnd == 0)
+          report_fatal_error("Unterminated ${:foo} operand in inline asm"
+                             " string: '" + Twine(AsmStr) + "'");
+
+        std::string Val(StrStart, StrEnd);
+        PrintSpecial(MI, OS, Val.c_str());
+        LastEmitted = StrEnd+1;
+        break;
+      }
+
+      const char *IDStart = LastEmitted;
+      const char *IDEnd = IDStart;
+      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+      unsigned Val;
+      if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+        report_fatal_error("Bad $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+      LastEmitted = IDEnd;
+
+      char Modifier[2] = { 0, 0 };
+
+      if (HasCurlyBraces) {
+        // If we have curly braces, check for a modifier character.  This
+        // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+        if (*LastEmitted == ':') {
+          ++LastEmitted;    // Consume ':' character.
+          if (*LastEmitted == 0)
+            report_fatal_error("Bad ${:} expression in inline asm string: '" +
+                               Twine(AsmStr) + "'");
+
+          Modifier[0] = *LastEmitted;
+          ++LastEmitted;    // Consume modifier character.
+        }
+
+        if (*LastEmitted != '}')
+          report_fatal_error("Bad ${} expression in inline asm string: '" +
+                             Twine(AsmStr) + "'");
+        ++LastEmitted;    // Consume '}' character.
+      }
+
+      if (Val >= NumOperands-1)
+        report_fatal_error("Invalid $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+
+      // Okay, we finally have a value number.  Ask the target to print this
+      // operand!
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+        unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+        bool Error = false;
+
+        // Scan to find the machine operand number for the operand.
+        for (; Val; --Val) {
+          if (OpNo >= MI->getNumOperands()) break;
+          unsigned OpFlags = MI->getOperand(OpNo).getImm();
+          OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+        }
+
+        if (OpNo >= MI->getNumOperands()) {
+          Error = true;
+        } else {
+          unsigned OpFlags = MI->getOperand(OpNo).getImm();
+          ++OpNo;  // Skip over the ID number.
+
+          if (Modifier[0] == 'l')  // labels are target independent
+            // FIXME: What if the operand isn't an MBB, report error?
+            OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
+          else {
+            AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+            if (InlineAsm::isMemKind(OpFlags)) {
+              Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+                                                Modifier[0] ? Modifier : 0,
+                                                OS);
+            } else {
+              Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+                                          Modifier[0] ? Modifier : 0, OS);
+            }
+          }
+        }
+        if (Error) {
+          std::string msg;
+          raw_string_ostream Msg(msg);
+          Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+          MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+        }
+      }
+      break;
+    }
+    }
+  }
+  OS << '\n' << (char)0;  // null terminate string.
+  EmitInlineAsm(OS.str(), LocMD);
+
+  // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself.  This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings.  The
+/// syntax used is ${:comment}.  Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+                              const char *Code) const {
+  if (!strcmp(Code, "private")) {
+    OS << MAI->getPrivateGlobalPrefix();
+  } else if (!strcmp(Code, "comment")) {
+    OS << MAI->getCommentString();
+  } else if (!strcmp(Code, "uid")) {
+    // Comparing the address of MI isn't sufficient, because machineinstrs may
+    // be allocated to the same address across functions.
+
+    // If this is a new LastFn instruction, bump the counter.
+    if (LastMI != MI || LastFn != getFunctionNumber()) {
+      ++Counter;
+      LastMI = MI;
+      LastFn = getFunctionNumber();
+    }
+    OS << Counter;
+  } else {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Unknown special formatter '" << Code
+         << "' for machine instr: " << *MI;
+    report_fatal_error(Msg.str());
+  }
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant.  Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                 unsigned AsmVariant, const char *ExtraCode,
+                                 raw_ostream &O) {
+  // Target doesn't support this yet!
+  return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  // Target doesn't support this yet!
+  return true;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 000000000000..21396ca37f06
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,368 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+// 
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(Attribute);
+  ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(Tag);
+  ID.AddInteger(ChildrenFlag);
+
+  // For each attribute description.
+  for (unsigned i = 0, N = Data.size(); i < N; ++i)
+    Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(AsmPrinter *AP) const {
+  // Emit its Dwarf tag type.
+  // FIXME: Doing work even in non-asm-verbose runs.
+  AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+  // Emit whether it has children DIEs.
+  // FIXME: Doing work even in non-asm-verbose runs.
+  AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+
+  // For each attribute description.
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    const DIEAbbrevData &AttrData = Data[i];
+
+    // Emit attribute type.
+    // FIXME: Doing work even in non-asm-verbose runs.
+    AP->EmitULEB128(AttrData.getAttribute(),
+                              dwarf::AttributeString(AttrData.getAttribute()));
+
+    // Emit form type.
+    // FIXME: Doing work even in non-asm-verbose runs.
+    AP->EmitULEB128(AttrData.getForm(),
+                    dwarf::FormEncodingString(AttrData.getForm()));
+  }
+
+  // Mark end of abbreviation.
+  AP->EmitULEB128(0, "EOM(1)");
+  AP->EmitULEB128(0, "EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(raw_ostream &O) {
+  O << "Abbreviation @"
+    << format("0x%lx", (long)(intptr_t)this)
+    << "  "
+    << dwarf::TagString(Tag)
+    << " "
+    << dwarf::ChildrenString(ChildrenFlag)
+    << '\n';
+
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    O << "  "
+      << dwarf::AttributeString(Data[i].getAttribute())
+      << "  "
+      << dwarf::FormEncodingString(Data[i].getForm())
+      << '\n';
+  }
+}
+void DIEAbbrev::dump() { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+  for (unsigned i = 0, N = Children.size(); i < N; ++i)
+    delete Children[i];
+}
+
+/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
+  DIEInteger *DI = new (A) DIEInteger(0);
+  Values.insert(Values.begin(), DI);
+  Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+  return DI;
+}
+
+#ifndef NDEBUG
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
+  IndentCount += IncIndent;
+  const std::string Indent(IndentCount, ' ');
+  bool isBlock = Abbrev.getTag() == 0;
+
+  if (!isBlock) {
+    O << Indent
+      << "Die: "
+      << format("0x%lx", (long)(intptr_t)this)
+      << ", Offset: " << Offset
+      << ", Size: " << Size << "\n";
+
+    O << Indent
+      << dwarf::TagString(Abbrev.getTag())
+      << " "
+      << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
+  } else {
+    O << "Size: " << Size << "\n";
+  }
+
+  const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+
+  IndentCount += 2;
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    O << Indent;
+
+    if (!isBlock)
+      O << dwarf::AttributeString(Data[i].getAttribute());
+    else
+      O << "Blk[" << i << "]";
+
+    O <<  "  "
+      << dwarf::FormEncodingString(Data[i].getForm())
+      << " ";
+    Values[i]->print(O);
+    O << "\n";
+  }
+  IndentCount -= 2;
+
+  for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+    Children[j]->print(O, 4);
+  }
+
+  if (!isBlock) O << "\n";
+  IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+  print(dbgs());
+}
+#endif
+
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+  print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+  unsigned Size = ~0U;
+  switch (Form) {
+  case dwarf::DW_FORM_flag:  // Fall thru
+  case dwarf::DW_FORM_ref1:  // Fall thru
+  case dwarf::DW_FORM_data1: Size = 1; break;
+  case dwarf::DW_FORM_ref2:  // Fall thru
+  case dwarf::DW_FORM_data2: Size = 2; break;
+  case dwarf::DW_FORM_ref4:  // Fall thru
+  case dwarf::DW_FORM_data4: Size = 4; break;
+  case dwarf::DW_FORM_ref8:  // Fall thru
+  case dwarf::DW_FORM_data8: Size = 8; break;
+  case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+  case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+  case dwarf::DW_FORM_addr:  Size = Asm->getTargetData().getPointerSize(); break;
+  default: llvm_unreachable("DIE Value form not supported yet");
+  }
+  Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  switch (Form) {
+  case dwarf::DW_FORM_flag:  // Fall thru
+  case dwarf::DW_FORM_ref1:  // Fall thru
+  case dwarf::DW_FORM_data1: return sizeof(int8_t);
+  case dwarf::DW_FORM_ref2:  // Fall thru
+  case dwarf::DW_FORM_data2: return sizeof(int16_t);
+  case dwarf::DW_FORM_ref4:  // Fall thru
+  case dwarf::DW_FORM_data4: return sizeof(int32_t);
+  case dwarf::DW_FORM_ref8:  // Fall thru
+  case dwarf::DW_FORM_data8: return sizeof(int64_t);
+  case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+  case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+  case dwarf::DW_FORM_addr:  return AP->getTargetData().getPointerSize();
+  default: llvm_unreachable("DIE Value form not supported yet"); break;
+  }
+  return 0;
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(raw_ostream &O) {
+  O << "Int: " << (int64_t)Integer
+    << format("  0x%llx", (unsigned long long)Integer);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
+  // Emit nul terminator.
+  AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
+}
+
+#ifndef NDEBUG
+void DIEString::print(raw_ostream &O) {
+  O << "Str: \"" << Str << "\"";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  if (Form == dwarf::DW_FORM_data4) return 4;
+  return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIELabel::print(raw_ostream &O) {
+  O << "Lbl: " << Label->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  if (Form == dwarf::DW_FORM_data4) return 4;
+  return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(raw_ostream &O) {
+  O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->EmitInt32(Entry->getOffset());
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(raw_ostream &O) {
+  O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
+  if (!Size) {
+    const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+    for (unsigned i = 0, N = Values.size(); i < N; ++i)
+      Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+  }
+
+  return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+  switch (Form) {
+  default: assert(0 && "Improper form for block");    break;
+  case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
+  case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break;
+  case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break;
+  case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
+  }
+
+  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+  for (unsigned i = 0, N = Values.size(); i < N; ++i)
+    Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  switch (Form) {
+  case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+  case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+  case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+  case dwarf::DW_FORM_block:  return Size + MCAsmInfo::getULEB128Size(Size);
+  default: llvm_unreachable("Improper form for block"); break;
+  }
+  return 0;
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(raw_ostream &O) {
+  O << "Blk: ";
+  DIE::print(O, 5);
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 000000000000..7d61f1edff4a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,433 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+// 
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+  class AsmPrinter;
+  class MCSymbol;
+  class raw_ostream;
+
+  //===--------------------------------------------------------------------===//
+  /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+  /// Dwarf abbreviation.
+  class DIEAbbrevData {
+    /// Attribute - Dwarf attribute code.
+    ///
+    unsigned Attribute;
+
+    /// Form - Dwarf form code.
+    ///
+    unsigned Form;
+  public:
+    DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+
+    // Accessors.
+    unsigned getAttribute() const { return Attribute; }
+    unsigned getForm()      const { return Form; }
+
+    /// Profile - Used to gather unique data for the abbreviation folding set.
+    ///
+    void Profile(FoldingSetNodeID &ID) const;
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+  /// information object.
+  class DIEAbbrev : public FoldingSetNode {
+    /// Tag - Dwarf tag code.
+    ///
+    unsigned Tag;
+
+    /// Unique number for node.
+    ///
+    unsigned Number;
+
+    /// ChildrenFlag - Dwarf children flag.
+    ///
+    unsigned ChildrenFlag;
+
+    /// Data - Raw data bytes for abbreviation.
+    ///
+    SmallVector<DIEAbbrevData, 8> Data;
+
+  public:
+    DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+
+    // Accessors.
+    unsigned getTag() const { return Tag; }
+    unsigned getNumber() const { return Number; }
+    unsigned getChildrenFlag() const { return ChildrenFlag; }
+    const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+    void setTag(unsigned T) { Tag = T; }
+    void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+    void setNumber(unsigned N) { Number = N; }
+
+    /// AddAttribute - Adds another set of attribute information to the
+    /// abbreviation.
+    void AddAttribute(unsigned Attribute, unsigned Form) {
+      Data.push_back(DIEAbbrevData(Attribute, Form));
+    }
+
+    /// AddFirstAttribute - Adds a set of attribute information to the front
+    /// of the abbreviation.
+    void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+      Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+    }
+
+    /// Profile - Used to gather unique data for the abbreviation folding set.
+    ///
+    void Profile(FoldingSetNodeID &ID) const;
+
+    /// Emit - Print the abbreviation using the specified asm printer.
+    ///
+    void Emit(AsmPrinter *AP) const;
+
+#ifndef NDEBUG
+    void print(raw_ostream &O);
+    void dump();
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIE - A structured debug information entry.  Has an abbreviation which
+  /// describes it's organization.
+  class DIEValue;
+
+  class DIE {
+  protected:
+    /// Abbrev - Buffer for constructing abbreviation.
+    ///
+    DIEAbbrev Abbrev;
+
+    /// Offset - Offset in debug info section.
+    ///
+    unsigned Offset;
+
+    /// Size - Size of instance + children.
+    ///
+    unsigned Size;
+
+    /// Children DIEs.
+    ///
+    std::vector<DIE *> Children;
+
+    DIE *Parent;
+
+    /// Attributes values.
+    ///
+    SmallVector<DIEValue*, 32> Values;
+
+    // Private data for print()
+    mutable unsigned IndentCount;
+  public:
+    explicit DIE(unsigned Tag)
+      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
+        Size(0), Parent (0), IndentCount(0) {}
+    virtual ~DIE();
+
+    // Accessors.
+    DIEAbbrev &getAbbrev() { return Abbrev; }
+    unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+    unsigned getTag() const { return Abbrev.getTag(); }
+    unsigned getOffset() const { return Offset; }
+    unsigned getSize() const { return Size; }
+    const std::vector<DIE *> &getChildren() const { return Children; }
+    const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
+    DIE *getParent() const { return Parent; }
+    void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+    void setOffset(unsigned O) { Offset = O; }
+    void setSize(unsigned S) { Size = S; }
+    
+    /// addValue - Add a value and attributes to a DIE.
+    ///
+    void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+      Abbrev.AddAttribute(Attribute, Form);
+      Values.push_back(Value);
+    }
+
+    /// SiblingOffset - Return the offset of the debug information entry's
+    /// sibling.
+    unsigned getSiblingOffset() const { return Offset + Size; }
+
+    /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+    /// The caller is responsible for deleting the return value at or after the
+    /// same time it destroys this DIE.
+    ///
+    DIEValue *addSiblingOffset(BumpPtrAllocator &A);
+
+    /// addChild - Add a child to the DIE.
+    ///
+    void addChild(DIE *Child) {
+      if (Child->getParent()) {
+        assert (Child->getParent() == this && "Unexpected DIE Parent!");
+        return;
+      }
+      Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+      Children.push_back(Child);
+      Child->Parent = this;
+    }
+
+#ifndef NDEBUG
+    void print(raw_ostream &O, unsigned IncIndent = 0);
+    void dump();
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEValue - A debug information entry value.
+  ///
+  class DIEValue {
+  public:
+    enum {
+      isInteger,
+      isString,
+      isLabel,
+      isSectionOffset,
+      isDelta,
+      isEntry,
+      isBlock
+    };
+  protected:
+    /// Type - Type of data stored in the value.
+    ///
+    unsigned Type;
+  public:
+    explicit DIEValue(unsigned T) : Type(T) {}
+    virtual ~DIEValue() {}
+
+    // Accessors
+    unsigned getType()  const { return Type; }
+
+    /// EmitValue - Emit value via the Dwarf writer.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0;
+
+    /// SizeOf - Return the size of a value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEValue *) { return true; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O) = 0;
+    void dump();
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEInteger - An integer value DIE.
+  ///
+  class DIEInteger : public DIEValue {
+    uint64_t Integer;
+  public:
+    explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+    /// BestForm - Choose the best form for integer.
+    ///
+    static unsigned BestForm(bool IsSigned, uint64_t Int) {
+      if (IsSigned) {
+        if ((char)Int == (signed)Int)   return dwarf::DW_FORM_data1;
+        if ((short)Int == (signed)Int)  return dwarf::DW_FORM_data2;
+        if ((int)Int == (signed)Int)    return dwarf::DW_FORM_data4;
+      } else {
+        if ((unsigned char)Int == Int)  return dwarf::DW_FORM_data1;
+        if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+        if ((unsigned int)Int == Int)   return dwarf::DW_FORM_data4;
+      }
+      return dwarf::DW_FORM_data8;
+    }
+
+    /// EmitValue - Emit integer of appropriate size.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    uint64_t getValue() const { return Integer; }
+
+    /// SizeOf - Determine size of integer value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEInteger *) { return true; }
+    static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEString - A string value DIE. This DIE keeps string reference only.
+  ///
+  class DIEString : public DIEValue {
+    const StringRef Str;
+  public:
+    explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
+
+    /// EmitValue - Emit string value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of string value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
+      return Str.size() + sizeof(char); // sizeof('\0');
+    }
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEString *) { return true; }
+    static bool classof(const DIEValue *S) { return S->getType() == isString; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIELabel - A label expression DIE.
+  //
+  class DIELabel : public DIEValue {
+    const MCSymbol *Label;
+  public:
+    explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+
+    /// EmitValue - Emit label value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// getValue - Get MCSymbol.
+    ///
+    const MCSymbol *getValue()       const { return Label; }
+
+    /// SizeOf - Determine size of label value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIELabel *)  { return true; }
+    static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEDelta - A simple label difference DIE.
+  ///
+  class DIEDelta : public DIEValue {
+    const MCSymbol *LabelHi;
+    const MCSymbol *LabelLo;
+  public:
+    DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
+      : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+    /// EmitValue - Emit delta value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of delta value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEDelta *)  { return true; }
+    static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEntry - A pointer to another debug information entry.  An instance of
+  /// this class can also be used as a proxy for a debug information entry not
+  /// yet defined (ie. types.)
+  class DIEEntry : public DIEValue {
+    DIE *const Entry;
+  public:
+    explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+    DIE *getEntry() const { return Entry; }
+
+    /// EmitValue - Emit debug information entry offset.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of debug information entry in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
+      return sizeof(int32_t);
+    }
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEEntry *)  { return true; }
+    static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEBlock - A block of values.  Primarily used for location expressions.
+  //
+  class DIEBlock : public DIEValue, public DIE {
+    unsigned Size;                // Size in bytes excluding size header.
+  public:
+    DIEBlock()
+      : DIEValue(isBlock), DIE(0), Size(0) {}
+    virtual ~DIEBlock() {}
+
+    /// ComputeSize - calculate the size of the block.
+    ///
+    unsigned ComputeSize(AsmPrinter *AP);
+
+    /// BestForm - Choose the best form for data.
+    ///
+    unsigned BestForm() const {
+      if ((unsigned char)Size == Size)  return dwarf::DW_FORM_block1;
+      if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+      if ((unsigned int)Size == Size)   return dwarf::DW_FORM_block4;
+      return dwarf::DW_FORM_block;
+    }
+
+    /// EmitValue - Emit block data.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of block data in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEBlock *)  { return true; }
+    static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..91b7d08c6ae2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,152 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
+    moveTypeModule(AsmPrinter::CFI_M_None) {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+  if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+    Asm->OutStreamer.EmitCFISections(false, true);
+
+  if (!Asm->MAI->isExceptionHandlingDwarf())
+    return;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+  if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+    return;
+
+  // Emit references to all used personality functions
+  bool AtLeastOne = false;
+  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+  for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+    if (!Personalities[i])
+      continue;
+    MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+    TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+    AtLeastOne = true;
+  }
+
+  if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
+    // This is a temporary hack to keep sections in the same order they
+    // were before. This lets us produce bit identical outputs while
+    // transitioning to CFI.
+    Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+  }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+  shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+  // If any landing pads survive, we need an EH table.
+  bool hasLandingPads = !MMI->getLandingPads().empty();
+
+  // See if we need frame move info.
+  AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+  if (MoveType == AsmPrinter::CFI_M_EH ||
+      (MoveType == AsmPrinter::CFI_M_Debug &&
+       moveTypeModule == AsmPrinter::CFI_M_None))
+    moveTypeModule = MoveType;
+
+  shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+  shouldEmitPersonality = hasLandingPads &&
+    PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  shouldEmitLSDA = shouldEmitPersonality &&
+    LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
+
+  Asm->OutStreamer.EmitCFIStartProc();
+
+  // Indicate personality routine, if any.
+  if (!shouldEmitPersonality)
+    return;
+
+  const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+  Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                Asm->getFunctionNumber()));
+
+  // Provide LSDA information.
+  if (!shouldEmitLSDA)
+    return;
+
+  Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+                                                  Asm->getFunctionNumber()),
+                               LSDAEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
+
+  Asm->OutStreamer.EmitCFIEndProc();
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (shouldEmitPersonality)
+    EmitExceptionTable();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 000000000000..1fe035efde3e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,1054 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+/// CompileUnit - Compile unit constructor.
+CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
+  : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+  DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+/// ~CompileUnit - Destructor for compile unit.
+CompileUnit::~CompileUnit() {
+  for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+    DIEBlocks[j]->~DIEBlock();
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+  DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+  return Value;
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
+                          unsigned Form, uint64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(false, Integer);
+  DIEValue *Value = Integer == 1 ?
+    DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
+                          unsigned Form, int64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(true, Integer);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
+                            StringRef String) {
+  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                           const MCSymbol *Label) {
+  DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                           const MCSymbol *Hi, const MCSymbol *Lo) {
+  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+                              DIE *Entry) {
+  Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+
+/// addBlock - Add block data.
+///
+void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+                           DIEBlock *Block) {
+  Block->ComputeSize(Asm);
+  DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+  Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
+  // Verify variable.
+  if (!V.Verify())
+    return;
+  
+  unsigned Line = V.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
+                                            V.getContext().getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+  // Verify global variable.
+  if (!G.Verify())
+    return;
+
+  unsigned Line = G.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(),
+                                            G.getContext().getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+  // Verify subprogram.
+  if (!SP.Verify())
+    return;
+  // If the line number is 0, don't add it.
+  if (SP.getLineNumber() == 0)
+    return;
+
+  unsigned Line = SP.getLineNumber();
+  if (!SP.getContext().Verify())
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
+  // Verify type.
+  if (!Ty.Verify())
+    return;
+
+  unsigned Line = Ty.getLineNumber();
+  if (Line == 0 || !Ty.getContext().Verify())
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+  // Verify namespace.
+  if (!NS.Verify())
+    return;
+
+  unsigned Line = NS.getLineNumber();
+  if (Line == 0)
+    return;
+  StringRef FN = NS.getFilename();
+
+  unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a 
+/// DbgVariable based on provided MachineLocation.
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, 
+                                     MachineLocation Location) {
+  if (DV->variableHasComplexAddress())
+    addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else if (DV->isBlockByrefVariable())
+    addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else
+    addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addRegisterOp - Add register operand.
+void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+  if (DWReg < 32)
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+  else {
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+    addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+  }
+}
+
+/// addRegisterOffset - Add register offset.
+void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+                                    int64_t Offset) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+  const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+  if (Reg == TRI->getFrameRegister(*Asm->MF))
+    // If variable offset is based in frame register then use fbreg.
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+  else if (DWReg < 32)
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+  else {
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+    addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+  }
+  addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+                             const MachineLocation &Location) {
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg())
+    addRegisterOp(Block, Location.getReg());
+  else
+    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location.  Add the DWARF information to the die.
+///
+void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+                                    unsigned Attribute,
+                                    const MachineLocation &Location) {
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned N = DV->getNumAddrElements();
+  unsigned i = 0;
+  if (Location.isReg()) {
+    if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+      // If first address element is OpPlus then emit
+      // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+      addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+      i = 2;
+    } else
+      addRegisterOp(Block, Location.getReg());
+  }
+  else
+    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+  for (;i < N; ++i) {
+    uint64_t Element = DV->getAddrElement(i);
+    if (Element == DIBuilder::OpPlus) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+    } else if (Element == DIBuilder::OpDeref) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    } else llvm_unreachable("unknown DIBuilder Opcode");
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+   gives the variable VarName either the struct, or a pointer to the struct, as
+   its type.  This is necessary for various behind-the-scenes things the
+   compiler needs to do with by-reference variables in Blocks.
+
+   However, as far as the original *programmer* is concerned, the variable
+   should still have type 'SomeType', as originally declared.
+
+   The function getBlockByrefType dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable, which is then assigned to
+   the variable's Debug Information Entry as its real type.  So far, so good.
+   However now the debugger will expect the variable VarName to have the type
+   SomeType.  So we need the location attribute for the variable to be an
+   expression that explains to the debugger how to navigate through the
+   pointers and struct to find the actual variable of type SomeType.
+
+   The following function does just that.  We start by getting
+   the "normal" location for the variable. This will be the location
+   of either the struct __Block_byref_x_VarName or the pointer to the
+   struct __Block_byref_x_VarName.
+
+   The struct will look something like:
+
+   struct __Block_byref_x_VarName {
+     ... <various fields>
+     struct __Block_byref_x_VarName *forwarding;
+     ... <various other fields>
+     SomeType VarName;
+     ... <maybe more fields>
+   };
+
+   If we are given the struct directly (as our starting point) we
+   need to tell the debugger to:
+
+   1).  Add the offset of the forwarding field.
+
+   2).  Follow that pointer to get the real __Block_byref_x_VarName
+   struct to use (the real one may have been copied onto the heap).
+
+   3).  Add the offset for the field VarName, to find the actual variable.
+
+   If we started with a pointer to the struct, then we need to
+   dereference that pointer first, before the other steps.
+   Translating this into DWARF ops, we will need to append the following
+   to the current location description for the variable:
+
+   DW_OP_deref                    -- optional, if we start with a pointer
+   DW_OP_plus_uconst <forward_fld_offset>
+   DW_OP_deref
+   DW_OP_plus_uconst <varName_fld_offset>
+
+   That is what this function does.  */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location.  Add the DWARF information to the die.  For
+/// more information, read large comment just above here.
+///
+void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+                                       unsigned Attribute,
+                                       const MachineLocation &Location) {
+  DIType Ty = DV->getType();
+  DIType TmpTy = Ty;
+  unsigned Tag = Ty.getTag();
+  bool isPointer = false;
+
+  StringRef varName = DV->getName();
+
+  if (Tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty);
+    TmpTy = DTy.getTypeDerivedFrom();
+    isPointer = true;
+  }
+
+  DICompositeType blockStruct = DICompositeType(TmpTy);
+
+  // Find the __forwarding field and the variable field in the __Block_byref
+  // struct.
+  DIArray Fields = blockStruct.getTypeArray();
+  DIDescriptor varField = DIDescriptor();
+  DIDescriptor forwardingField = DIDescriptor();
+
+  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Fields.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element);
+    StringRef fieldName = DT.getName();
+    if (fieldName == "__forwarding")
+      forwardingField = Element;
+    else if (fieldName == varName)
+      varField = Element;
+  }
+
+  // Get the offsets for the forwarding field and the variable field.
+  unsigned forwardingFieldOffset =
+    DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+  unsigned varFieldOffset =
+    DIDerivedType(varField).getOffsetInBits() >> 3;
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  // If we started with a pointer to the __Block_byref... struct, then
+  // the first thing we need to do is dereference the pointer (DW_OP_deref).
+  if (isPointer)
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Next add the offset for the '__forwarding' field:
+  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
+  // adding the offset if it's 0.
+  if (forwardingFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+  }
+
+  // Now dereference the __forwarding field to get to the real __Block_byref
+  // struct:  DW_OP_deref.
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Now that we've got the real __Block_byref... struct, add the offset
+  // for the variable's field to get to the location of the actual variable:
+  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
+  if (varFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+                                   DIType Ty) {
+  assert (MO.isImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned form = dwarf::DW_FORM_udata;
+  switch (Ty.getSizeInBits()) {
+    case 8: form = dwarf::DW_FORM_data1; break;
+    case 16: form = dwarf::DW_FORM_data2; break;
+    case 32: form = dwarf::DW_FORM_data4; break;
+    case 64: form = dwarf::DW_FORM_data8; break;
+    default: break;
+  }
+
+  DIBasicType BTy(Ty);
+  if (BTy.Verify() &&
+      (BTy.getEncoding()  == dwarf::DW_ATE_signed 
+       || BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+    addSInt(Block, 0, form, MO.getImm());
+  else
+    addUInt(Block, 0, form, MO.getImm());
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+  assert (MO.isFPImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+  // Get the raw data form of the floating point.
+  const APInt FltVal = FPImm.bitcastToAPInt();
+  const char *FltPtr = (const char*)FltVal.getRawData();
+
+  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & FltPtr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
+                                   bool Unsigned) {
+  unsigned CIBitWidth = CI->getBitWidth();
+  if (CIBitWidth <= 64) {
+    unsigned form = 0;
+    switch (CIBitWidth) {
+    case 8: form = dwarf::DW_FORM_data1; break;
+    case 16: form = dwarf::DW_FORM_data2; break;
+    case 32: form = dwarf::DW_FORM_data4; break;
+    case 64: form = dwarf::DW_FORM_data8; break;
+    default: 
+      form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
+    }
+    if (Unsigned)
+      addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue());
+    else
+      addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue());
+    return true;
+  }
+
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  // Get the raw data form of the large APInt.
+  const APInt Val = CI->getValue();
+  const char *Ptr = (const char*)Val.getRawData();
+
+  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & Ptr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addTemplateParams - Add template parameters in buffer.
+void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+  // Add template parameters.
+  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
+    DIDescriptor Element = TParams.getElement(i);
+    if (Element.isTemplateTypeParameter())
+      Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+                        DITemplateTypeParameter(Element)));
+    else if (Element.isTemplateValueParameter())
+      Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+                        DITemplateValueParameter(Element)));
+  }
+
+}
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
+  if (Context.isType()) {
+    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isNameSpace()) {
+    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isSubprogram()) {
+    DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context));
+    ContextDIE->addChild(Die);
+  } else if (DIE *ContextDIE = getDIE(Context))
+    ContextDIE->addChild(Die);
+  else
+    addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) {
+  DIE *TyDIE = getDIE(Ty);
+  if (TyDIE)
+    return TyDIE;
+
+  // Create new type.
+  TyDIE = new DIE(dwarf::DW_TAG_base_type);
+  insertDIE(Ty, TyDIE);
+  if (Ty.isBasicType())
+    constructTypeDIE(*TyDIE, DIBasicType(Ty));
+  else if (Ty.isCompositeType())
+    constructTypeDIE(*TyDIE, DICompositeType(Ty));
+  else {
+    assert(Ty.isDerivedType() && "Unknown kind of DIType");
+    constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+  }
+
+  addToContextOwner(TyDIE, Ty.getContext());
+  return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void CompileUnit::addType(DIE *Entity, DIType Ty) {
+  if (!Ty.Verify())
+    return;
+
+  // Check for pre-existence.
+  DIEEntry *Entry = getDIEEntry(Ty);
+  // If it exists then use the existing value.
+  if (Entry) {
+    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+    return;
+  }
+
+  // Construct type.
+  DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+  // Set up proxy.
+  Entry = createDIEEntry(Buffer);
+  insertDIEEntry(Ty, Entry);
+  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+
+  // If this is a complete composite type then include it in the
+  // list of global types.
+  addGlobalType(Ty);
+}
+
+/// addGlobalType - Add a new global type to the compile unit.
+///
+void CompileUnit::addGlobalType(DIType Ty) {
+  DIDescriptor Context = Ty.getContext();
+  if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() 
+      && (Context.isCompileUnit() || Context.isFile() || Context.isNameSpace()))
+    if (DIEEntry *Entry = getDIEEntry(Ty))
+      GlobalTypes[Ty.getName()] = Entry->getEntry();
+}
+
+/// addPubTypes - Add type for pubtypes section.
+void CompileUnit::addPubTypes(DISubprogram SP) {
+  DICompositeType SPTy = SP.getType();
+  unsigned SPTag = SPTy.getTag();
+  if (SPTag != dwarf::DW_TAG_subroutine_type)
+    return;
+
+  DIArray Args = SPTy.getTypeArray();
+  for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+    DIType ATy(Args.getElement(i));
+    if (!ATy.Verify())
+      continue;
+    addGlobalType(ATy);
+  }
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+  // Get core information.
+  StringRef Name = BTy.getName();
+  Buffer.setTag(dwarf::DW_TAG_base_type);
+  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
+          BTy.getEncoding());
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  uint64_t Size = BTy.getSizeInBits() >> 3;
+  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+  // Get core information.
+  StringRef Name = DTy.getName();
+  uint64_t Size = DTy.getSizeInBits() >> 3;
+  unsigned Tag = DTy.getTag();
+
+  // FIXME - Workaround for templates.
+  if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+  Buffer.setTag(Tag);
+
+  // Map to main type, void will not have a type.
+  DIType FromTy = DTy.getTypeDerivedFrom();
+  addType(&Buffer, FromTy);
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  // Add size if non-zero (derived types might be zero-sized.)
+  if (Size)
+    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+  // Add source line info if available and TyDesc is not a forward declaration.
+  if (!DTy.isForwardDecl())
+    addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+  // Get core information.
+  StringRef Name = CTy.getName();
+
+  uint64_t Size = CTy.getSizeInBits() >> 3;
+  unsigned Tag = CTy.getTag();
+  Buffer.setTag(Tag);
+
+  switch (Tag) {
+  case dwarf::DW_TAG_vector_type:
+  case dwarf::DW_TAG_array_type:
+    constructArrayTypeDIE(Buffer, &CTy);
+    break;
+  case dwarf::DW_TAG_enumeration_type: {
+    DIArray Elements = CTy.getTypeArray();
+
+    // Add enumerators to enumeration type.
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+      DIE *ElemDie = NULL;
+      DIDescriptor Enum(Elements.getElement(i));
+      if (Enum.isEnumerator()) {
+        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+        Buffer.addChild(ElemDie);
+      }
+    }
+  }
+    break;
+  case dwarf::DW_TAG_subroutine_type: {
+    // Add return type.
+    DIArray Elements = CTy.getTypeArray();
+    DIDescriptor RTy = Elements.getElement(0);
+    addType(&Buffer, DIType(RTy));
+
+    bool isPrototyped = true;
+    // Add arguments.
+    for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+      DIDescriptor Ty = Elements.getElement(i);
+      if (Ty.isUnspecifiedParameter()) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+        Buffer.addChild(Arg);
+        isPrototyped = false;
+      } else {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        addType(Arg, DIType(Ty));
+        Buffer.addChild(Arg);
+      }
+    }
+    // Add prototype flag.
+    if (isPrototyped)
+      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+  }
+    break;
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_class_type: {
+    // Add elements to structure type.
+    DIArray Elements = CTy.getTypeArray();
+
+    // A forward struct declared type may not have elements available.
+    unsigned N = Elements.getNumElements();
+    if (N == 0)
+      break;
+
+    // Add elements to structure type.
+    for (unsigned i = 0; i < N; ++i) {
+      DIDescriptor Element = Elements.getElement(i);
+      DIE *ElemDie = NULL;
+      if (Element.isSubprogram()) {
+        DISubprogram SP(Element);
+        ElemDie = DD->createSubprogramDIE(DISubprogram(Element));
+        if (SP.isProtected())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_protected);
+        else if (SP.isPrivate())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_private);
+        else 
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+        if (SP.isExplicit())
+          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+      }
+      else if (Element.isVariable()) {
+        DIVariable DV(Element);
+        ElemDie = new DIE(dwarf::DW_TAG_variable);
+        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                  DV.getName());
+        addType(ElemDie, DV.getType());
+        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+        addSourceLine(ElemDie, DV);
+      } else if (Element.isDerivedType())
+        ElemDie = createMemberDIE(DIDerivedType(Element));
+      else
+        continue;
+      Buffer.addChild(ElemDie);
+    }
+
+    if (CTy.isAppleBlockExtension())
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+    unsigned RLang = CTy.getRunTimeLang();
+    if (RLang)
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+              dwarf::DW_FORM_data1, RLang);
+
+    DICompositeType ContainingType = CTy.getContainingType();
+    if (DIDescriptor(ContainingType).isCompositeType())
+      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+                  getOrCreateTypeDIE(DIType(ContainingType)));
+    else {
+      DIDescriptor Context = CTy.getContext();
+      addToContextOwner(&Buffer, Context);
+    }
+
+    if (CTy.isObjcClassComplete())
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
+              dwarf::DW_FORM_flag, 1);
+
+    if (Tag == dwarf::DW_TAG_class_type) 
+      addTemplateParams(Buffer, CTy.getTemplateParams());
+
+    break;
+  }
+  default:
+    break;
+  }
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+      || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+    {
+    // Add size if non-zero (derived types might be zero-sized.)
+    if (Size)
+      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+    else {
+      // Add zero size if it is not a forward declaration.
+      if (CTy.isForwardDecl())
+        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      else
+        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+    }
+
+    // Add source line info if available.
+    if (!CTy.isForwardDecl())
+      addSourceLine(&Buffer, CTy);
+  }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateTypeParameter.
+DIE *
+CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+  DIE *ParamDIE = getDIE(TP);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+  addType(ParamDIE, TP.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateValueParameter.
+DIE *
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+  DIE *ParamDIE = getDIE(TPV);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+  addType(ParamDIE, TPV.getType());
+  if (!TPV.getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+          TPV.getValue());
+  return ParamDIE;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+  DIE *NDie = getDIE(NS);
+  if (NDie)
+    return NDie;
+  NDie = new DIE(dwarf::DW_TAG_namespace);
+  insertDIE(NS, NDie);
+  if (!NS.getName().empty())
+    addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+  addSourceLine(NDie, NS);
+  addToContextOwner(NDie, NS.getContext());
+  return NDie;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+  DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  int64_t L = SR.getLo();
+  int64_t H = SR.getHi();
+
+  // The L value defines the lower bounds which is typically zero for C/C++. The
+  // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
+  // of the array. If L > H then do not emit DW_AT_lower_bound and 
+  // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
+  // array has one element and in such case do not emit lower bound.
+
+  if (L > H) {
+    Buffer.addChild(DW_Subrange);
+    return;
+  }
+  if (L)
+    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+  Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
+                                        DICompositeType *CTy) {
+  Buffer.setTag(dwarf::DW_TAG_array_type);
+  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+  // Emit derived type.
+  addType(&Buffer, CTy->getTypeDerivedFrom());
+  DIArray Elements = CTy->getTypeArray();
+
+  // Get an anonymous type for index type.
+  DIE *IdxTy = getIndexTyDie();
+  if (!IdxTy) {
+    // Construct an anonymous type for index type.
+    IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+            dwarf::DW_ATE_signed);
+    addDie(IdxTy);
+    setIndexTyDie(IdxTy);
+  }
+
+  // Add subranges to array type.
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
+  DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+  StringRef Name = ETy.getName();
+  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  int64_t Value = ETy.getEnumValue();
+  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+  return Enumerator;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
+  DIE *MemberDie = new DIE(DT.getTag());
+  StringRef Name = DT.getName();
+  if (!Name.empty())
+    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  addType(MemberDie, DT.getTypeDerivedFrom());
+
+  addSourceLine(MemberDie, DT);
+
+  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+  uint64_t Size = DT.getSizeInBits();
+  uint64_t FieldSize = DT.getOriginalTypeSize();
+
+  if (Size != FieldSize) {
+    // Handle bitfield.
+    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+    uint64_t Offset = DT.getOffsetInBits();
+    uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+    uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+    uint64_t FieldOffset = (HiMark - FieldSize);
+    Offset -= FieldOffset;
+
+    // Maybe we need to work from the other end.
+    if (Asm->getTargetData().isLittleEndian())
+      Offset = FieldSize - (Offset + Size);
+    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+    // Here WD_AT_data_member_location points to the anonymous
+    // field that includes this bit field.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+  } else
+    // This is not a bitfield.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+  if (DT.getTag() == dwarf::DW_TAG_inheritance
+      && DT.isVirtual()) {
+
+    // For C++, virtual base classes are not at fixed offset. Use following
+    // expression to extract appropriate offset from vtable.
+    // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+             VBaseLocationDie);
+  } else
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+  if (DT.isProtected())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_protected);
+  else if (DT.isPrivate())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_private);
+  // Otherwise C++ member and base classes are considered public.
+  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+  if (DT.isVirtual())
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+            dwarf::DW_VIRTUALITY_virtual);
+
+  // Objective-C properties.
+  StringRef PropertyName = DT.getObjCPropertyName();
+  if (!PropertyName.empty()) {
+    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
+              PropertyName);
+    StringRef GetterName = DT.getObjCPropertyGetterName();
+    if (!GetterName.empty())
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
+                dwarf::DW_FORM_string, GetterName);
+    StringRef SetterName = DT.getObjCPropertySetterName();
+    if (!SetterName.empty())
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
+                dwarf::DW_FORM_string, SetterName);
+    unsigned PropertyAttributes = 0;
+    if (DT.isReadOnlyObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+    if (DT.isReadWriteObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+    if (DT.isAssignObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+    if (DT.isRetainObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+    if (DT.isCopyObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+    if (DT.isNonAtomicObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+    if (PropertyAttributes)
+      addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+              PropertyAttributes);
+  }
+  return MemberDie;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 000000000000..213c7fc630d3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,280 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class DwarfDebug;
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class DbgVariable;
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+  /// ID - File identifier for source.
+  ///
+  unsigned ID;
+
+  /// Die - Compile unit debug information entry.
+  ///
+  const OwningPtr<DIE> CUDie;
+
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  DwarfDebug *DD;
+
+  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
+  DIE *IndexTyDie;
+
+  /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+  /// variables to debug information entries.
+  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+  /// descriptors to debug information entries using a DIEEntry proxy.
+  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+  /// Globals - A map of globally visible named entities for this unit.
+  ///
+  StringMap<DIE*> Globals;
+
+  /// GlobalTypes - A map of globally visible types for this unit.
+  ///
+  StringMap<DIE*> GlobalTypes;
+
+  /// DIEBlocks - A list of all the DIEBlocks in use.
+  std::vector<DIEBlock *> DIEBlocks;
+
+public:
+  CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  ~CompileUnit();
+
+  // Accessors.
+  unsigned getID()                  const { return ID; }
+  DIE* getCUDie()                   const { return CUDie.get(); }
+  const StringMap<DIE*> &getGlobals()     const { return Globals; }
+  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+  /// hasContent - Return true if this compile unit has something to write out.
+  ///
+  bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+  /// addGlobal - Add a new global entity to the compile unit.
+  ///
+  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
+
+  /// addGlobalType - Add a new global type to the compile unit.
+  ///
+  void addGlobalType(DIType Ty);
+
+  /// getDIE - Returns the debug information entry map slot for the
+  /// specified debug variable.
+  DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+  DIEBlock *getDIEBlock() { 
+    return new (DIEValueAllocator) DIEBlock();
+  }
+
+  /// insertDIE - Insert DIE into the map.
+  void insertDIE(const MDNode *N, DIE *D) {
+    MDNodeToDieMap.insert(std::make_pair(N, D));
+  }
+
+  /// getDIEEntry - Returns the debug information entry for the specified
+  /// debug variable.
+  DIEEntry *getDIEEntry(const MDNode *N) {
+    DenseMap<const MDNode *, DIEEntry *>::iterator I =
+      MDNodeToDIEEntryMap.find(N);
+    if (I == MDNodeToDIEEntryMap.end())
+      return NULL;
+    return I->second;
+  }
+
+  /// insertDIEEntry - Insert debug information entry into the map.
+  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+  }
+
+  /// addDie - Adds or interns the DIE to the compile unit.
+  ///
+  void addDie(DIE *Buffer) {
+    this->CUDie->addChild(Buffer);
+  }
+
+  // getIndexTyDie - Get an anonymous type for index type.
+  DIE *getIndexTyDie() {
+    return IndexTyDie;
+  }
+
+  // setIndexTyDie - Set D as anonymous type for index which can be reused
+  // later.
+  void setIndexTyDie(DIE *D) {
+    IndexTyDie = D;
+  }
+public:
+
+  /// addUInt - Add an unsigned integer attribute data and value.
+  ///
+  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+  /// addSInt - Add an signed integer attribute data and value.
+  ///
+  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+  /// addString - Add a string attribute data and value.
+  ///
+  void addString(DIE *Die, unsigned Attribute, unsigned Form,
+                 const StringRef Str);
+
+  /// addLabel - Add a Dwarf label attribute data and value.
+  ///
+  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Label);
+
+  /// addDelta - Add a label delta attribute data and value.
+  ///
+  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Hi, const MCSymbol *Lo);
+
+  /// addDIEEntry - Add a DIE attribute data and value.
+  ///
+  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+  
+  /// addBlock - Add block data.
+  ///
+  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+  /// addSourceLine - Add location information to specified debug information
+  /// entry.
+  void addSourceLine(DIE *Die, DIVariable V);
+  void addSourceLine(DIE *Die, DIGlobalVariable G);
+  void addSourceLine(DIE *Die, DISubprogram SP);
+  void addSourceLine(DIE *Die, DIType Ty);
+  void addSourceLine(DIE *Die, DINameSpace NS);
+
+  /// addAddress - Add an address attribute to a die based on the location
+  /// provided.
+  void addAddress(DIE *Die, unsigned Attribute,
+                  const MachineLocation &Location);
+
+  /// addConstantValue - Add constant value entry in variable DIE.
+  bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+  bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+
+  /// addConstantFPValue - Add constant value entry in variable DIE.
+  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+
+  /// addTemplateParams - Add template parameters in buffer.
+  void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+  /// addRegisterOp - Add register operand.
+  void addRegisterOp(DIE *TheDie, unsigned Reg);
+
+  /// addRegisterOffset - Add register offset.
+  void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+
+  /// addComplexAddress - Start with the address based on the location provided,
+  /// and generate the DWARF information necessary to find the actual variable
+  /// (navigating the extra location information encoded in the type) based on
+  /// the starting location.  Add the DWARF information to the die.
+  ///
+  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                         const MachineLocation &Location);
+
+  // FIXME: Should be reformulated in terms of addComplexAddress.
+  /// addBlockByrefAddress - Start with the address based on the location
+  /// provided, and generate the DWARF information necessary to find the
+  /// actual Block variable (navigating the Block struct) based on the
+  /// starting location.  Add the DWARF information to the die.  Obsolete,
+  /// please use addComplexAddress instead.
+  ///
+  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location);
+
+  /// addVariableAddress - Add DW_AT_location attribute for a 
+  /// DbgVariable based on provided MachineLocation.
+  void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+
+  /// addToContextOwner - Add Die into the list of its context owner's children.
+  void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+  /// addType - Add a new type attribute to the specified entity.
+  void addType(DIE *Entity, DIType Ty);
+
+  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+  DIE *getOrCreateNameSpace(DINameSpace NS);
+
+  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+  /// given DIType.
+  DIE *getOrCreateTypeDIE(DIType Ty);
+
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateTypeParameter.
+  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateValueParameter.
+  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+  /// information entry.
+  DIEEntry *createDIEEntry(DIE *Entry);
+
+  void addPubTypes(DISubprogram SP);
+
+  /// constructTypeDIE - Construct basic type die from DIBasicType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIBasicType BTy);
+
+  /// constructTypeDIE - Construct derived type die from DIDerivedType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIDerivedType DTy);
+
+  /// constructTypeDIE - Construct type DIE from DICompositeType.
+  void constructTypeDIE(DIE &Buffer,
+                        DICompositeType CTy);
+
+  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+  void constructArrayTypeDIE(DIE &Buffer, 
+                             DICompositeType *CTy);
+
+  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+  DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+  /// createMemberDIE - Create new member DIE.
+  DIE *createMemberDIE(DIDerivedType DT);
+
+private:
+
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+  DIEInteger *DIEIntegerOne;
+};
+
+} // end llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 000000000000..125e1e86b12f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,2755 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/Path.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
+     cl::desc("Print DbgScope information for each machine instruction"));
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+                                              cl::Hidden,
+     cl::desc("Disable debug info printing"));
+
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+     cl::desc("Make an absence of debug location information explicit."),
+     cl::init(false));
+
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+
+namespace llvm {
+
+DIType DbgVariable::getType()               const {
+  DIType Ty = Var.getType();
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+  // addresses instead.
+  if (Var.isBlockByrefVariable()) {
+    /* Byref variables, in Blocks, are declared by the programmer as
+       "SomeType VarName;", but the compiler creates a
+       __Block_byref_x_VarName struct, and gives the variable VarName
+       either the struct, or a pointer to the struct, as its type.  This
+       is necessary for various behind-the-scenes things the compiler
+       needs to do with by-reference variables in blocks.
+       
+       However, as far as the original *programmer* is concerned, the
+       variable should still have type 'SomeType', as originally declared.
+       
+       The following function dives into the __Block_byref_x_VarName
+       struct to find the original type of the variable.  This will be
+       passed back to the code generating the type for the Debug
+       Information Entry for the variable 'VarName'.  'VarName' will then
+       have the original type 'SomeType' in its debug information.
+       
+       The original type 'SomeType' will be the type of the field named
+       'VarName' inside the __Block_byref_x_VarName struct.
+       
+       NOTE: In order for this to not completely fail on the debugger
+       side, the Debug Information Entry for the variable VarName needs to
+       have a DW_AT_location that tells the debugger how to unwind through
+       the pointers and __Block_byref_x_VarName struct to find the actual
+       value of the variable.  The function addBlockByrefType does this.  */
+    DIType subType = Ty;
+    unsigned tag = Ty.getTag();
+    
+    if (tag == dwarf::DW_TAG_pointer_type) {
+      DIDerivedType DTy = DIDerivedType(Ty);
+      subType = DTy.getTypeDerivedFrom();
+    }
+    
+    DICompositeType blockStruct = DICompositeType(subType);
+    DIArray Elements = blockStruct.getTypeArray();
+    
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+      DIDescriptor Element = Elements.getElement(i);
+      DIDerivedType DT = DIDerivedType(Element);
+      if (getName() == DT.getName())
+        return (DT.getTypeDerivedFrom());
+    }
+    return Ty;
+  }
+  return Ty;
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgRange - This is used to track range of instructions with identical
+/// debug info scope.
+///
+typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange;
+
+//===----------------------------------------------------------------------===//
+/// DbgScope - This class is used to track scope information.
+///
+class DbgScope {
+  DbgScope *Parent;                   // Parent to this scope.
+  DIDescriptor Desc;                  // Debug info descriptor for scope.
+  // Location at which this scope is inlined.
+  AssertingVH<const MDNode> InlinedAtLocation;
+  bool AbstractScope;                 // Abstract Scope
+  const MachineInstr *LastInsn;       // Last instruction of this scope.
+  const MachineInstr *FirstInsn;      // First instruction of this scope.
+  unsigned DFSIn, DFSOut;
+  // Scopes defined in scope.  Contents not owned.
+  SmallVector<DbgScope *, 4> Scopes;
+  // Variables declared in scope.  Contents owned.
+  SmallVector<DbgVariable *, 8> Variables;
+  SmallVector<DbgRange, 4> Ranges;
+  // Private state for dump()
+  mutable unsigned IndentLevel;
+public:
+  DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0)
+    : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
+      LastInsn(0), FirstInsn(0),
+      DFSIn(0), DFSOut(0), IndentLevel(0) {}
+  virtual ~DbgScope();
+
+  // Accessors.
+  DbgScope *getParent()          const { return Parent; }
+  void setParent(DbgScope *P)          { Parent = P; }
+  DIDescriptor getDesc()         const { return Desc; }
+  const MDNode *getInlinedAt()         const { return InlinedAtLocation; }
+  const MDNode *getScopeNode()         const { return Desc; }
+  const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+  const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; }
+  const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
+
+  /// openInsnRange - This scope covers instruction range starting from MI.
+  void openInsnRange(const MachineInstr *MI) {
+    if (!FirstInsn)
+      FirstInsn = MI;
+
+    if (Parent)
+      Parent->openInsnRange(MI);
+  }
+
+  /// extendInsnRange - Extend the current instruction range covered by
+  /// this scope.
+  void extendInsnRange(const MachineInstr *MI) {
+    assert (FirstInsn && "MI Range is not open!");
+    LastInsn = MI;
+    if (Parent)
+      Parent->extendInsnRange(MI);
+  }
+
+  /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected
+  /// until now. This is used when a new scope is encountered while walking
+  /// machine instructions.
+  void closeInsnRange(DbgScope *NewScope = NULL) {
+    assert (LastInsn && "Last insn missing!");
+    Ranges.push_back(DbgRange(FirstInsn, LastInsn));
+    FirstInsn = NULL;
+    LastInsn = NULL;
+    // If Parent dominates NewScope then do not close Parent's instruction
+    // range.
+    if (Parent && (!NewScope || !Parent->dominates(NewScope)))
+      Parent->closeInsnRange(NewScope);
+  }
+
+  void setAbstractScope() { AbstractScope = true; }
+  bool isAbstractScope() const { return AbstractScope; }
+
+  // Depth First Search support to walk and mainpluate DbgScope hierarchy.
+  unsigned getDFSOut() const { return DFSOut; }
+  void setDFSOut(unsigned O) { DFSOut = O; }
+  unsigned getDFSIn() const  { return DFSIn; }
+  void setDFSIn(unsigned I)  { DFSIn = I; }
+  bool dominates(const DbgScope *S) {
+    if (S == this)
+      return true;
+    if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
+      return true;
+    return false;
+  }
+
+  /// addScope - Add a scope to the scope.
+  ///
+  void addScope(DbgScope *S) { Scopes.push_back(S); }
+
+  /// addVariable - Add a variable to the scope.
+  ///
+  void addVariable(DbgVariable *V) { Variables.push_back(V); }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // end llvm namespace
+
+#ifndef NDEBUG
+void DbgScope::dump() const {
+  raw_ostream &err = dbgs();
+  err.indent(IndentLevel);
+  err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+  const MDNode *N = Desc;
+  N->dump();
+  if (AbstractScope)
+    err << "Abstract Scope\n";
+
+  IndentLevel += 2;
+  if (!Scopes.empty())
+    err << "Children ...\n";
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
+    if (Scopes[i] != this)
+      Scopes[i]->dump();
+
+  IndentLevel -= 2;
+}
+#endif
+
+DbgScope::~DbgScope() {
+  for (unsigned j = 0, M = Variables.size(); j < M; ++j)
+    delete Variables[j];
+}
+
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+  : Asm(A), MMI(Asm->MMI), FirstCU(0),
+    AbbreviationsSet(InitAbbreviationsSetSize),
+    CurrentFnDbgScope(0), PrevLabel(NULL) {
+  NextStringPoolNumber = 0;
+
+  DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+  DwarfStrSectionSym = TextSectionSym = 0;
+  DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+  FunctionBeginSym = FunctionEndSym = 0;
+  {
+    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+    beginModule(M);
+  }
+}
+DwarfDebug::~DwarfDebug() {
+}
+
+MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+  if (Entry.first) return Entry.first;
+
+  Entry.second = NextStringPoolNumber++;
+  return Entry.first = Asm->GetTempSymbol("string", Entry.second);
+}
+
+
+/// assignAbbrevNumber - Define a unique number for the abbreviation.
+///
+void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+  // Profile the node so that we can make it unique.
+  FoldingSetNodeID ID;
+  Abbrev.Profile(ID);
+
+  // Check the set for priors.
+  DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+  // If it's newly added.
+  if (InSet == &Abbrev) {
+    // Add to abbreviation list.
+    Abbreviations.push_back(&Abbrev);
+
+    // Assign the vector position + 1 as its number.
+    Abbrev.setNumber(Abbreviations.size());
+  } else {
+    // Assign existing abbreviation number.
+    Abbrev.setNumber(InSet->getNumber());
+  }
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+  char One = '\1';
+  if (LinkageName.startswith(StringRef(&One, 1)))
+    return LinkageName.substr(1);
+  return LinkageName;
+}
+
+/// createSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
+  CompileUnit *SPCU = getCompileUnit(SP);
+  DIE *SPDie = SPCU->getDIE(SP);
+  if (SPDie)
+    return SPDie;
+
+  SPDie = new DIE(dwarf::DW_TAG_subprogram);
+  
+  // DW_TAG_inlined_subroutine may refer to this DIE.
+  SPCU->insertDIE(SP, SPDie);
+  
+  // Add to context owner.
+  SPCU->addToContextOwner(SPDie, SP.getContext());
+
+  // Add function template parameters.
+  SPCU->addTemplateParams(*SPDie, SP.getTemplateParams());
+
+  StringRef LinkageName = SP.getLinkageName();
+  if (!LinkageName.empty())
+    SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+                    getRealLinkageName(LinkageName));
+
+  // If this DIE is going to refer declaration info using AT_specification
+  // then there is no need to add other attributes.
+  if (SP.getFunctionDeclaration().isSubprogram())
+    return SPDie;
+
+  // Constructors and operators for anonymous aggregates do not have names.
+  if (!SP.getName().empty())
+    SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
+                    SP.getName());
+
+  SPCU->addSourceLine(SPDie, SP);
+
+  if (SP.isPrototyped()) 
+    SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+  // Add Return Type.
+  DICompositeType SPTy = SP.getType();
+  DIArray Args = SPTy.getTypeArray();
+  unsigned SPTag = SPTy.getTag();
+
+  if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
+    SPCU->addType(SPDie, SPTy);
+  else
+    SPCU->addType(SPDie, DIType(Args.getElement(0)));
+
+  unsigned VK = SP.getVirtuality();
+  if (VK) {
+    SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    DIEBlock *Block = SPCU->getDIEBlock();
+    SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+    ContainingTypeMap.insert(std::make_pair(SPDie,
+                                            SP.getContainingType()));
+  }
+
+  if (!SP.isDefinition()) {
+    SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+    
+    // Add arguments. Do not add arguments for subprogram definition. They will
+    // be handled while processing variables.
+    DICompositeType SPTy = SP.getType();
+    DIArray Args = SPTy.getTypeArray();
+    unsigned SPTag = SPTy.getTag();
+
+    if (SPTag == dwarf::DW_TAG_subroutine_type)
+      for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        DIType ATy = DIType(DIType(Args.getElement(i)));
+        SPCU->addType(Arg, ATy);
+        if (ATy.isArtificial())
+          SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+        SPDie->addChild(Arg);
+      }
+  }
+
+  if (SP.isArtificial())
+    SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+  if (!SP.isLocalToUnit())
+    SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+  if (SP.isOptimized())
+    SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+  if (unsigned isa = Asm->getISAEncoding()) {
+    SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+  }
+
+  return SPDie;
+}
+
+DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) {
+  assert(N && "Invalid Scope encoding!");
+
+  DbgScope *AScope = AbstractScopes.lookup(N);
+  if (AScope)
+    return AScope;
+
+  DbgScope *Parent = NULL;
+
+  DIDescriptor Scope(N);
+  if (Scope.isLexicalBlock()) {
+    DILexicalBlock DB(N);
+    DIDescriptor ParentDesc = DB.getContext();
+    Parent = getOrCreateAbstractScope(ParentDesc);
+  }
+
+  AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
+
+  if (Parent)
+    Parent->addScope(AScope);
+  AScope->setAbstractScope();
+  AbstractScopes[N] = AScope;
+  if (DIDescriptor(N).isSubprogram())
+    AbstractScopesList.push_back(AScope);
+  return AScope;
+}
+
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+static bool isSubprogramContext(const MDNode *Context) {
+  if (!Context)
+    return false;
+  DIDescriptor D(Context);
+  if (D.isSubprogram())
+    return true;
+  if (D.isType())
+    return isSubprogramContext(DIType(Context).getContext());
+  return false;
+}
+
+/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+/// If there are global variables in this scope then create and insert
+/// DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
+  CompileUnit *SPCU = getCompileUnit(SPNode);
+  DIE *SPDie = SPCU->getDIE(SPNode);
+
+  assert(SPDie && "Unable to find subprogram DIE!");
+  DISubprogram SP(SPNode);
+
+  DISubprogram SPDecl = SP.getFunctionDeclaration();
+  if (SPDecl.isSubprogram())
+    // Refer function declaration directly.
+    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                      createSubprogramDIE(SPDecl));
+  else {
+    // There is not any need to generate specification DIE for a function
+    // defined at compile unit level. If a function is defined inside another
+    // function then gdb prefers the definition at top level and but does not
+    // expect specification DIE in parent function. So avoid creating
+    // specification DIE for a function defined inside a function.
+    if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+        !SP.getContext().isFile() &&
+        !isSubprogramContext(SP.getContext())) {
+      SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      
+      // Add arguments.
+      DICompositeType SPTy = SP.getType();
+      DIArray Args = SPTy.getTypeArray();
+      unsigned SPTag = SPTy.getTag();
+      if (SPTag == dwarf::DW_TAG_subroutine_type)
+        for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+          DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+          DIType ATy = DIType(DIType(Args.getElement(i)));
+          SPCU->addType(Arg, ATy);
+          if (ATy.isArtificial())
+            SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+          SPDie->addChild(Arg);
+        }
+      DIE *SPDeclDie = SPDie;
+      SPDie = new DIE(dwarf::DW_TAG_subprogram);
+      SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                        SPDeclDie);
+      SPCU->addDie(SPDie);
+    }
+  }
+  // Pick up abstract subprogram DIE.
+  if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+    SPDie = new DIE(dwarf::DW_TAG_subprogram);
+    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+                      dwarf::DW_FORM_ref4, AbsSPDIE);
+    SPCU->addDie(SPDie);
+  }
+
+  SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+                 Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
+  SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+                 Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+  SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+  return SPDie;
+}
+
+/// constructLexicalScope - Construct new DW_TAG_lexical_block
+/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
+
+  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+  if (Scope->isAbstractScope())
+    return ScopeDIE;
+
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  if (Ranges.empty())
+    return 0;
+
+  CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode());
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  if (Ranges.size() > 1) {
+    // .debug_range section has not been laid out yet. Emit offset in
+    // .debug_range as a uint, size 4, for now. emitDIE will handle
+    // DW_AT_ranges appropriately.
+    TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+                   DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+         RE = Ranges.end(); RI != RE; ++RI) {
+      DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+      DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+    }
+    DebugRangeSymbols.push_back(NULL);
+    DebugRangeSymbols.push_back(NULL);
+    return ScopeDIE;
+  }
+
+  const MCSymbol *Start = getLabelBeforeInsn(RI->first);
+  const MCSymbol *End = getLabelAfterInsn(RI->second);
+
+  if (End == 0) return 0;
+
+  assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+  assert(End->isDefined() && "Invalid end label for an inlined scope!");
+
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+
+  return ScopeDIE;
+}
+
+/// constructInlinedScopeDIE - This scope represents inlined body of
+/// a function. Construct DIE to represent this concrete inlined copy
+/// of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
+
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  assert (Ranges.empty() == false
+          && "DbgScope does not have instruction markers!");
+
+  // FIXME : .debug_inlined section specification does not clearly state how
+  // to emit inlined scope that is split into multiple instruction ranges.
+  // For now, use first instruction range and emit low_pc/high_pc pair and
+  // corresponding .debug_inlined section entry for this pair.
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+  const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+
+  if (StartLabel == 0 || EndLabel == 0) {
+    assert (0 && "Unexpected Start and End  labels for a inlined scope!");
+    return 0;
+  }
+  assert(StartLabel->isDefined() &&
+         "Invalid starting label for an inlined scope!");
+  assert(EndLabel->isDefined() &&
+         "Invalid end label for an inlined scope!");
+
+  if (!Scope->getScopeNode())
+    return NULL;
+  DIScope DS(Scope->getScopeNode());
+  DISubprogram InlinedSP = getDISubprogram(DS);
+  CompileUnit *TheCU = getCompileUnit(InlinedSP);
+  DIE *OriginDIE = TheCU->getDIE(InlinedSP);
+  if (!OriginDIE) {
+    DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+    return NULL;
+  }
+  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+  TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+                     dwarf::DW_FORM_ref4, OriginDIE);
+
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
+
+  InlinedSubprogramDIEs.insert(OriginDIE);
+
+  // Track the start label for this inlined function.
+  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+    I = InlineInfo.find(InlinedSP);
+
+  if (I == InlineInfo.end()) {
+    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
+                                                             ScopeDIE));
+    InlinedSPNodes.push_back(InlinedSP);
+  } else
+    I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
+
+  DILocation DL(Scope->getInlinedAt());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+  return ScopeDIE;
+}
+
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DIType Ty) {
+  DIDerivedType DTy(Ty);
+  if (DTy.Verify())
+    return isUnsignedDIType(DTy.getTypeDerivedFrom());
+
+  DIBasicType BTy(Ty);
+  if (BTy.Verify()) {
+    unsigned Encoding = BTy.getEncoding();
+    if (Encoding == dwarf::DW_ATE_unsigned ||
+        Encoding == dwarf::DW_ATE_unsigned_char)
+      return true;
+  }
+  return false;
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
+  StringRef Name = DV->getName();
+  if (Name.empty())
+    return NULL;
+
+  // Translate tag to proper Dwarf tag.  The result variable is dropped for
+  // now.
+  unsigned Tag;
+  switch (DV->getTag()) {
+  case dwarf::DW_TAG_return_variable:
+    return NULL;
+  case dwarf::DW_TAG_arg_variable:
+    Tag = dwarf::DW_TAG_formal_parameter;
+    break;
+  case dwarf::DW_TAG_auto_variable:    // fall thru
+  default:
+    Tag = dwarf::DW_TAG_variable;
+    break;
+  }
+
+  // Define variable debug information entry.
+  DIE *VariableDie = new DIE(Tag);
+  CompileUnit *VariableCU = getCompileUnit(DV->getVariable());
+  DIE *AbsDIE = NULL;
+  DenseMap<const DbgVariable *, const DbgVariable *>::iterator
+    V2AVI = VarToAbstractVarMap.find(DV);
+  if (V2AVI != VarToAbstractVarMap.end())
+    AbsDIE = V2AVI->second->getDIE();
+
+  if (AbsDIE)
+    VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+                       dwarf::DW_FORM_ref4, AbsDIE);
+  else {
+    VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                          Name);
+    VariableCU->addSourceLine(VariableDie, DV->getVariable());
+
+    // Add variable type.
+    VariableCU->addType(VariableDie, DV->getType());
+  }
+
+  if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
+    VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, 
+                        dwarf::DW_FORM_flag, 1);
+  else if (DIVariable(DV->getVariable()).isArtificial())
+    VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, 
+                        dwarf::DW_FORM_flag, 1);
+
+  if (Scope->isAbstractScope()) {
+    DV->setDIE(VariableDie);
+    return VariableDie;
+  }
+
+  // Add variable address.
+
+  unsigned Offset = DV->getDotDebugLocOffset();
+  if (Offset != ~0U) {
+    VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+             Asm->GetTempSymbol("debug_loc", Offset));
+    DV->setDIE(VariableDie);
+    UseDotDebugLocEntry.insert(VariableDie);
+    return VariableDie;
+  }
+
+  // Check if variable is described by a  DBG_VALUE instruction.
+  DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI =
+    DbgVariableToDbgInstMap.find(DV);
+  if (DVI != DbgVariableToDbgInstMap.end()) {
+    const MachineInstr *DVInsn = DVI->second;
+    bool updated = false;
+    // FIXME : Handle getNumOperands != 3
+    if (DVInsn->getNumOperands() == 3) {
+      if (DVInsn->getOperand(0).isReg()) {
+        const MachineOperand RegOp = DVInsn->getOperand(0);
+        const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+        if (DVInsn->getOperand(1).isImm() &&
+            TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+          unsigned FrameReg = 0;
+          const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+          int Offset = 
+            TFI->getFrameIndexReference(*Asm->MF, 
+                                        DVInsn->getOperand(1).getImm(), 
+                                        FrameReg);
+          MachineLocation Location(FrameReg, Offset);
+          VariableCU->addVariableAddress(DV, VariableDie, Location);
+          
+        } else if (RegOp.getReg())
+          VariableCU->addVariableAddress(DV, VariableDie, 
+                                         MachineLocation(RegOp.getReg()));
+        updated = true;
+      }
+      else if (DVInsn->getOperand(0).isImm())
+        updated = 
+          VariableCU->addConstantValue(VariableDie, DVInsn->getOperand(0),
+                                       DV->getType());
+      else if (DVInsn->getOperand(0).isFPImm())
+        updated =
+          VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+      else if (DVInsn->getOperand(0).isCImm())
+        updated =
+          VariableCU->addConstantValue(VariableDie, 
+                                       DVInsn->getOperand(0).getCImm(),
+                                       isUnsignedDIType(DV->getType()));
+    } else {
+      VariableCU->addVariableAddress(DV, VariableDie, 
+                                     Asm->getDebugValueLocation(DVInsn));
+      updated = true;
+    }
+    if (!updated) {
+      // If variableDie is not updated then DBG_VALUE instruction does not
+      // have valid variable info.
+      delete VariableDie;
+      return NULL;
+    }
+    DV->setDIE(VariableDie);
+    return VariableDie;
+  }
+
+  // .. else use frame index, if available.
+  int FI = 0;
+  if (findVariableFrameIndex(DV, &FI)) {
+    unsigned FrameReg = 0;
+    const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+    int Offset = 
+      TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+    MachineLocation Location(FrameReg, Offset);
+    VariableCU->addVariableAddress(DV, VariableDie, Location);
+  }
+
+  DV->setDIE(VariableDie);
+  return VariableDie;
+
+}
+
+/// constructScopeDIE - Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
+  if (!Scope || !Scope->getScopeNode())
+    return NULL;
+
+  SmallVector <DIE *, 8> Children;
+
+  // Collect arguments for current function.
+  if (Scope == CurrentFnDbgScope)
+    for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
+      if (DbgVariable *ArgDV = CurrentFnArguments[i])
+        if (DIE *Arg = constructVariableDIE(ArgDV, Scope))
+          Children.push_back(Arg);
+
+  // Collect lexical scope childrens first.
+  const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+  for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+    if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+      Children.push_back(Variable);
+  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+  for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+    if (DIE *Nested = constructScopeDIE(Scopes[j]))
+      Children.push_back(Nested);
+  DIScope DS(Scope->getScopeNode());
+  DIE *ScopeDIE = NULL;
+  if (Scope->getInlinedAt())
+    ScopeDIE = constructInlinedScopeDIE(Scope);
+  else if (DS.isSubprogram()) {
+    ProcessedSPNodes.insert(DS);
+    if (Scope->isAbstractScope()) {
+      ScopeDIE = getCompileUnit(DS)->getDIE(DS);
+      // Note down abstract DIE.
+      if (ScopeDIE)
+        AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+    }
+    else
+      ScopeDIE = updateSubprogramScopeDIE(DS);
+  }
+  else {
+    // There is no need to emit empty lexical block DIE.
+    if (Children.empty())
+      return NULL;
+    ScopeDIE = constructLexicalScopeDIE(Scope);
+  }
+  
+  if (!ScopeDIE) return NULL;
+
+  // Add children
+  for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+         E = Children.end(); I != E; ++I)
+    ScopeDIE->addChild(*I);
+
+  if (DS.isSubprogram())
+    getCompileUnit(DS)->addPubTypes(DISubprogram(DS));
+
+ return ScopeDIE;
+}
+
+/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// source file names. If none currently exists, create a new id and insert it
+/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+/// maps as well.
+
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, 
+                                         StringRef DirName) {
+  // If FE did not provide a file name, then assume stdin.
+  if (FileName.empty())
+    return GetOrCreateSourceID("<stdin>", StringRef());
+
+  // MCStream expects full path name as filename.
+  if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+    SmallString<128> FullPathName = DirName;
+    sys::path::append(FullPathName, FileName);
+    // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+    return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+  }
+
+  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+  if (Entry.getValue())
+    return Entry.getValue();
+
+  unsigned SrcId = SourceIdMap.size();
+  Entry.setValue(SrcId);
+
+  // Print out a .file directive to specify files for .loc directives.
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+  return SrcId;
+}
+
+/// constructCompileUnit - Create new CompileUnit for the given
+/// metadata node with tag DW_TAG_compile_unit.
+void DwarfDebug::constructCompileUnit(const MDNode *N) {
+  DICompileUnit DIUnit(N);
+  StringRef FN = DIUnit.getFilename();
+  StringRef Dir = DIUnit.getDirectory();
+  unsigned ID = GetOrCreateSourceID(FN, Dir);
+
+  DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+  CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
+  NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+                   DIUnit.getProducer());
+  NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+                 DIUnit.getLanguage());
+  NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+  // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
+  // simplifies debug range entries.
+  NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+  // DW_AT_stmt_list is a offset of line number information for this
+  // compile unit in debug_line section.
+  if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                    Asm->GetTempSymbol("section_line"));
+  else
+    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+
+  if (!Dir.empty())
+    NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+  if (DIUnit.isOptimized())
+    NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+  StringRef Flags = DIUnit.getFlags();
+  if (!Flags.empty())
+    NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+  
+  unsigned RVer = DIUnit.getRunTimeVersion();
+  if (RVer)
+    NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+            dwarf::DW_FORM_data1, RVer);
+
+  if (!FirstCU)
+    FirstCU = NewCU;
+  CUMap.insert(std::make_pair(N, NewCU));
+}
+
+/// getCompielUnit - Get CompileUnit DIE.
+CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
+  assert (N && "Invalid DwarfDebug::getCompileUnit argument!");
+  DIDescriptor D(N);
+  const MDNode *CUNode = NULL;
+  if (D.isCompileUnit())
+    CUNode = N;
+  else if (D.isSubprogram())
+    CUNode = DISubprogram(N).getCompileUnit();
+  else if (D.isType())
+    CUNode = DIType(N).getCompileUnit();
+  else if (D.isGlobalVariable())
+    CUNode = DIGlobalVariable(N).getCompileUnit();
+  else if (D.isVariable())
+    CUNode = DIVariable(N).getCompileUnit();
+  else if (D.isNameSpace())
+    CUNode = DINameSpace(N).getCompileUnit();
+  else if (D.isFile())
+    CUNode = DIFile(N).getCompileUnit();
+  else
+    return FirstCU;
+
+  DenseMap<const MDNode *, CompileUnit *>::const_iterator I
+    = CUMap.find(CUNode);
+  if (I == CUMap.end())
+    return FirstCU;
+  return I->second;
+}
+
+// Return const exprssion if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+  const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+  if (!CE || CE->getNumOperands() != 3 ||
+      CE->getOpcode() != Instruction::GetElementPtr)
+    return NULL;
+
+  // First operand points to a global value.
+  if (!isa<GlobalValue>(CE->getOperand(0)))
+    return NULL;
+
+  // Second operand is zero.
+  const ConstantInt *CI = 
+    dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+  if (!CI || !CI->isZero())
+    return NULL;
+
+  // Third operand is offset.
+  if (!isa<ConstantInt>(CE->getOperand(2)))
+    return NULL;
+
+  return CE;
+}
+
+/// constructGlobalVariableDIE - Construct global variable DIE.
+void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
+  DIGlobalVariable GV(N);
+
+  // If debug information is malformed then ignore it.
+  if (GV.Verify() == false)
+    return;
+
+  // Check for pre-existence.
+  CompileUnit *TheCU = getCompileUnit(N);
+  if (TheCU->getDIE(GV))
+    return;
+
+  DIType GTy = GV.getType();
+  DIE *VariableDIE = new DIE(GV.getTag());
+
+  bool isGlobalVariable = GV.getGlobal() != NULL;
+
+  // Add name.
+  TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                   GV.getDisplayName());
+  StringRef LinkageName = GV.getLinkageName();
+  if (!LinkageName.empty() && isGlobalVariable)
+    TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, 
+                     dwarf::DW_FORM_string,
+                     getRealLinkageName(LinkageName));
+  // Add type.
+  TheCU->addType(VariableDIE, GTy);
+
+  // Add scoping info.
+  if (!GV.isLocalToUnit()) {
+    TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    // Expose as global. 
+    TheCU->addGlobal(GV.getName(), VariableDIE);
+  }
+  // Add line number info.
+  TheCU->addSourceLine(VariableDIE, GV);
+  // Add to map.
+  TheCU->insertDIE(N, VariableDIE);
+  // Add to context owner.
+  DIDescriptor GVContext = GV.getContext();
+  TheCU->addToContextOwner(VariableDIE, GVContext);
+  // Add location.
+  if (isGlobalVariable) {
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
+             Asm->Mang->getSymbol(GV.getGlobal()));
+    // Do not create specification DIE if context is either compile unit
+    // or a subprogram.
+    if (GV.isDefinition() && !GVContext.isCompileUnit() &&
+        !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+      // Create specification DIE.
+      DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+      TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+                  dwarf::DW_FORM_ref4, VariableDIE);
+      TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+      TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      TheCU->addDie(VariableSpecDIE);
+    } else {
+      TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+    } 
+  } else if (const ConstantInt *CI = 
+             dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+    TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+  else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+    // GV is a merged global.
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
+                    Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+    ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+    TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+  }
+
+  return;
+}
+
+/// construct SubprogramDIE - Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
+  DISubprogram SP(N);
+
+  // Check for pre-existence.
+  CompileUnit *TheCU = getCompileUnit(N);
+  if (TheCU->getDIE(N))
+    return;
+
+  if (!SP.isDefinition())
+    // This is a method declaration which will be handled while constructing
+    // class type.
+    return;
+
+  DIE *SubprogramDie = createSubprogramDIE(SP);
+
+  // Add to map.
+  TheCU->insertDIE(N, SubprogramDie);
+
+  // Add to context owner.
+  TheCU->addToContextOwner(SubprogramDie, SP.getContext());
+
+  // Expose as global.
+  TheCU->addGlobal(SP.getName(), SubprogramDie);
+
+  return;
+}
+
+/// beginModule - Emit all Dwarf sections that should come prior to the
+/// content. Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
+void DwarfDebug::beginModule(Module *M) {
+  if (DisableDebugInfoPrinting)
+    return;
+
+  // If module has named metadata anchors then use them, otherwise scan the module
+  // using debug info finder to collect debug info.
+  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+  if (CU_Nodes) {
+
+    NamedMDNode *GV_Nodes = M->getNamedMetadata("llvm.dbg.gv");
+    NamedMDNode *SP_Nodes = M->getNamedMetadata("llvm.dbg.sp");
+    if (!GV_Nodes && !SP_Nodes)
+      // If there are not any global variables or any functions then
+      // there is not any debug info in this module.
+      return;
+
+    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i)
+      constructCompileUnit(CU_Nodes->getOperand(i));
+
+    if (GV_Nodes)
+      for (unsigned i = 0, e = GV_Nodes->getNumOperands(); i != e; ++i)
+        constructGlobalVariableDIE(GV_Nodes->getOperand(i));
+
+    if (SP_Nodes)
+      for (unsigned i = 0, e = SP_Nodes->getNumOperands(); i != e; ++i)
+        constructSubprogramDIE(SP_Nodes->getOperand(i));
+    
+  } else {
+
+    DebugInfoFinder DbgFinder;
+    DbgFinder.processModule(*M);
+    
+    bool HasDebugInfo = false;
+    // Scan all the compile-units to see if there are any marked as the main unit.
+    // if not, we do not generate debug info.
+    for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+           E = DbgFinder.compile_unit_end(); I != E; ++I) {
+      if (DICompileUnit(*I).isMain()) {
+        HasDebugInfo = true;
+        break;
+      }
+    }
+    if (!HasDebugInfo) return;
+    
+    // Create all the compile unit DIEs.
+    for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+           E = DbgFinder.compile_unit_end(); I != E; ++I)
+      constructCompileUnit(*I);
+    
+    // Create DIEs for each global variable.
+    for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+           E = DbgFinder.global_variable_end(); I != E; ++I)
+      constructGlobalVariableDIE(*I);
+    
+    // Create DIEs for each subprogram.
+    for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+           E = DbgFinder.subprogram_end(); I != E; ++I)
+      constructSubprogramDIE(*I);
+  }
+  
+  // Tell MMI that we have debug info.
+  MMI->setDebugInfoAvailability(true);
+  
+  // Emit initial sections.
+  EmitSectionLabels();
+
+  //getOrCreateTypeDIE
+  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIType Ty(NMD->getOperand(i));
+      getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+    }
+
+  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIType Ty(NMD->getOperand(i));
+      getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+    }
+
+  // Prime section data.
+  SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+}
+
+/// endModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::endModule() {
+  if (!FirstCU) return;
+  const Module *M = MMI->getModule();
+  DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap;
+  if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
+    for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
+      if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
+      DISubprogram SP(AllSPs->getOperand(SI));
+      if (!SP.Verify()) continue;
+
+      // Collect info for variables that were optimized out.
+      if (!SP.isDefinition()) continue;
+      StringRef FName = SP.getLinkageName();
+      if (FName.empty())
+        FName = SP.getName();
+      NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
+      if (!NMD) continue;
+      unsigned E = NMD->getNumOperands();
+      if (!E) continue;
+      DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
+      DeadFnScopeMap[SP] = Scope;
+      for (unsigned I = 0; I != E; ++I) {
+        DIVariable DV(NMD->getOperand(I));
+        if (!DV.Verify()) continue;
+        Scope->addVariable(new DbgVariable(DV));
+      }
+
+      // Construct subprogram DIE and add variables DIEs.
+      constructSubprogramDIE(SP);
+      DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
+      const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+      for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+        DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
+        if (VariableDIE)
+          ScopeDIE->addChild(VariableDIE);
+      }
+    }
+  }
+
+  // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+    DIE *ISP = *AI;
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
+
+  for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
+         CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+    DIE *SPDie = CI->first;
+    const MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+    if (!N) continue;
+    DIE *NDie = getCompileUnit(N)->getDIE(N);
+    if (!NDie) continue;
+    getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, 
+                                   dwarf::DW_FORM_ref4, NDie);
+  }
+
+  // Standard sections final addresses.
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
+
+  // End text sections.
+  for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+    Asm->OutStreamer.SwitchSection(SectionMap[i]);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
+  }
+
+  // Compute DIE offsets and sizes.
+  computeSizeAndOffsets();
+
+  // Emit all the DIEs into a debug info section
+  emitDebugInfo();
+
+  // Corresponding abbreviations into a abbrev section.
+  emitAbbreviations();
+
+  // Emit info into a debug pubnames section.
+  emitDebugPubNames();
+
+  // Emit info into a debug pubtypes section.
+  emitDebugPubTypes();
+
+  // Emit info into a debug loc section.
+  emitDebugLoc();
+
+  // Emit info into a debug aranges section.
+  EmitDebugARanges();
+
+  // Emit info into a debug ranges section.
+  emitDebugRanges();
+
+  // Emit info into a debug macinfo section.
+  emitDebugMacInfo();
+
+  // Emit inline info.
+  emitDebugInlineInfo();
+
+  // Emit info into a debug str section.
+  emitDebugStr();
+
+  // clean up.
+  DeleteContainerSeconds(DeadFnScopeMap);
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I)
+    delete I->second;
+  FirstCU = NULL;  // Reset for the next Module, if any.
+}
+
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+                                              DebugLoc ScopeLoc) {
+
+  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
+  if (AbsDbgVariable)
+    return AbsDbgVariable;
+
+  LLVMContext &Ctx = Var->getContext();
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
+  if (!Scope)
+    return NULL;
+
+  AbsDbgVariable = new DbgVariable(Var);
+  Scope->addVariable(AbsDbgVariable);
+  AbstractVariables[Var] = AbsDbgVariable;
+  return AbsDbgVariable;
+}
+
+/// addCurrentFnArgument - If Var is an current function argument that add
+/// it in CurrentFnArguments list.
+bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
+                                      DbgVariable *Var, DbgScope *Scope) {
+  if (Scope != CurrentFnDbgScope) 
+    return false;
+  DIVariable DV = Var->getVariable();
+  if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+    return false;
+  unsigned ArgNo = DV.getArgNumber();
+  if (ArgNo == 0) 
+    return false;
+
+  size_t Size = CurrentFnArguments.size();
+  if (Size == 0)
+    CurrentFnArguments.resize(MF->getFunction()->arg_size());
+  // llvm::Function argument size is not good indicator of how many
+  // arguments does the function have at source level.
+  if (ArgNo > Size)
+    CurrentFnArguments.resize(ArgNo * 2);
+  CurrentFnArguments[ArgNo - 1] = Var;
+  return true;
+}
+
+/// collectVariableInfoFromMMITable - Collect variable information from
+/// side table maintained by MMI.
+void
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
+                                   SmallPtrSet<const MDNode *, 16> &Processed) {
+  MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+  for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+         VE = VMap.end(); VI != VE; ++VI) {
+    const MDNode *Var = VI->first;
+    if (!Var) continue;
+    Processed.insert(Var);
+    DIVariable DV(Var);
+    const std::pair<unsigned, DebugLoc> &VP = VI->second;
+
+    DbgScope *Scope = findDbgScope(VP.second);
+
+    // If variable scope is not found then skip this variable.
+    if (Scope == 0)
+      continue;
+
+    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+    DbgVariable *RegVar = new DbgVariable(DV);
+    recordVariableFrameIndex(RegVar, VP.first);
+    if (!addCurrentFnArgument(MF, RegVar, Scope))
+      Scope->addVariable(RegVar);
+    if (AbsDbgVariable) {
+      recordVariableFrameIndex(AbsDbgVariable, VP.first);
+      VarToAbstractVarMap[RegVar] = AbsDbgVariable;
+    }
+  }
+}
+
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by
+/// DBG_VALUE instruction, is in a defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+  return MI->getNumOperands() == 3 &&
+         MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
+         MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
+}
+
+/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting
+/// at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, 
+                                         const MCSymbol *FLabel, 
+                                         const MCSymbol *SLabel,
+                                         const MachineInstr *MI) {
+  const MDNode *Var =  MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+  if (MI->getNumOperands() != 3) {
+    MachineLocation MLoc = Asm->getDebugValueLocation(MI);
+    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+  }
+  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+    MachineLocation MLoc;
+    MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+  }
+  if (MI->getOperand(0).isImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+  if (MI->getOperand(0).isFPImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+  if (MI->getOperand(0).isCImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+
+  assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
+  return DotDebugLocEntry();
+}
+
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+                                SmallPtrSet<const MDNode *, 16> &Processed) {
+
+  /// collection info from MMI table.
+  collectVariableInfoFromMMITable(MF, Processed);
+
+  for (SmallVectorImpl<const MDNode*>::const_iterator
+         UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE;
+         ++UVI) {
+    const MDNode *Var = *UVI;
+    if (Processed.count(Var))
+      continue;
+
+    // History contains relevant DBG_VALUE instructions for Var and instructions
+    // clobbering it.
+    SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+    if (History.empty())
+      continue;
+    const MachineInstr *MInsn = History.front();
+
+    DIVariable DV(Var);
+    DbgScope *Scope = NULL;
+    if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+        DISubprogram(DV.getContext()).describes(MF->getFunction()))
+      Scope = CurrentFnDbgScope;
+    else
+      Scope = findDbgScope(MInsn->getDebugLoc());
+    // If variable scope is not found then skip this variable.
+    if (!Scope)
+      continue;
+
+    Processed.insert(DV);
+    assert(MInsn->isDebugValue() && "History must begin with debug value");
+    DbgVariable *RegVar = new DbgVariable(DV);
+    if (!addCurrentFnArgument(MF, RegVar, Scope))
+      Scope->addVariable(RegVar);
+    if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
+      DbgVariableToDbgInstMap[AbsVar] = MInsn;
+      VarToAbstractVarMap[RegVar] = AbsVar;
+    }
+
+    // Simple ranges that are fully coalesced.
+    if (History.size() <= 1 || (History.size() == 2 &&
+                                MInsn->isIdenticalTo(History.back()))) {
+      DbgVariableToDbgInstMap[RegVar] = MInsn;
+      continue;
+    }
+
+    // handle multiple DBG_VALUE instructions describing one variable.
+    RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+
+    for (SmallVectorImpl<const MachineInstr*>::const_iterator
+           HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
+      const MachineInstr *Begin = *HI;
+      assert(Begin->isDebugValue() && "Invalid History entry");
+
+      // Check if DBG_VALUE is truncating a range.
+      if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg()
+          && !Begin->getOperand(0).getReg())
+        continue;
+
+      // Compute the range for a register location.
+      const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+      const MCSymbol *SLabel = 0;
+
+      if (HI + 1 == HE)
+        // If Begin is the last instruction in History then its value is valid
+        // until the end of the function.
+        SLabel = FunctionEndSym;
+      else {
+        const MachineInstr *End = HI[1];
+        DEBUG(dbgs() << "DotDebugLoc Pair:\n" 
+              << "\t" << *Begin << "\t" << *End << "\n");
+        if (End->isDebugValue())
+          SLabel = getLabelBeforeInsn(End);
+        else {
+          // End is a normal instruction clobbering the range.
+          SLabel = getLabelAfterInsn(End);
+          assert(SLabel && "Forgot label after clobber instruction");
+          ++HI;
+        }
+      }
+
+      // The value is valid until the next DBG_VALUE or clobber.
+      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
+    }
+    DotDebugLocEntries.push_back(DotDebugLocEntry());
+  }
+
+  // Collect info for variables that were optimized out.
+  const Function *F = MF->getFunction();
+  if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
+      if (!DV || !Processed.insert(DV))
+        continue;
+      DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
+      if (Scope)
+        Scope->addVariable(new DbgVariable(DV));
+    }
+  }
+}
+
+/// getLabelBeforeInsn - Return Label preceding the instruction.
+const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+  MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+  assert(Label && "Didn't insert label before instruction");
+  return Label;
+}
+
+/// getLabelAfterInsn - Return Label immediately following the instruction.
+const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+  return LabelsAfterInsn.lookup(MI);
+}
+
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+  // Check if source location changes, but ignore DBG_VALUE locations.
+  if (!MI->isDebugValue()) {
+    DebugLoc DL = MI->getDebugLoc();
+    if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
+      unsigned Flags = DWARF2_FLAG_IS_STMT;
+      PrevInstLoc = DL;
+      if (DL == PrologEndLoc) {
+        Flags |= DWARF2_FLAG_PROLOGUE_END;
+        PrologEndLoc = DebugLoc();
+      }
+      if (!DL.isUnknown()) {
+        const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+        recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+      } else
+        recordSourceLine(0, 0, 0, 0);
+    }
+  }
+
+  // Insert labels where requested.
+  DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+    LabelsBeforeInsn.find(MI);
+
+  // No label needed.
+  if (I == LabelsBeforeInsn.end())
+    return;
+
+  // Label already assigned.
+  if (I->second)
+    return;
+
+  if (!PrevLabel) {
+    PrevLabel = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(PrevLabel);
+  }
+  I->second = PrevLabel;
+}
+
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
+  // Don't create a new label after DBG_VALUE instructions.
+  // They don't generate code.
+  if (!MI->isDebugValue())
+    PrevLabel = 0;
+
+  DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+    LabelsAfterInsn.find(MI);
+
+  // No label needed.
+  if (I == LabelsAfterInsn.end())
+    return;
+
+  // Label already assigned.
+  if (I->second)
+    return;
+
+  // We need a label after this instruction.
+  if (!PrevLabel) {
+    PrevLabel = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(PrevLabel);
+  }
+  I->second = PrevLabel;
+}
+
+/// getOrCreateDbgScope - Create DbgScope for the scope.
+DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) {
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  MDNode *Scope = NULL;
+  MDNode *InlinedAt = NULL;
+  DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
+
+  if (!InlinedAt) {
+    DbgScope *WScope = DbgScopeMap.lookup(Scope);
+    if (WScope)
+      return WScope;
+    WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
+    DbgScopeMap.insert(std::make_pair(Scope, WScope));
+    if (DIDescriptor(Scope).isLexicalBlock()) {
+      DbgScope *Parent =
+        getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope));
+      WScope->setParent(Parent);
+      Parent->addScope(WScope);
+    } else if (DIDescriptor(Scope).isSubprogram()
+               && DISubprogram(Scope).describes(Asm->MF->getFunction()))
+      CurrentFnDbgScope = WScope;
+
+    return WScope;
+  }
+
+  getOrCreateAbstractScope(Scope);
+  DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
+  if (WScope)
+    return WScope;
+
+  WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
+  DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
+  InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope;
+  DbgScope *Parent =
+    getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt));
+  WScope->setParent(Parent);
+  Parent->addScope(WScope);
+  return WScope;
+}
+
+/// calculateDominanceGraph - Calculate dominance graph for DbgScope
+/// hierarchy.
+static void calculateDominanceGraph(DbgScope *Scope) {
+  assert (Scope && "Unable to calculate scop edominance graph!");
+  SmallVector<DbgScope *, 4> WorkStack;
+  WorkStack.push_back(Scope);
+  unsigned Counter = 0;
+  while (!WorkStack.empty()) {
+    DbgScope *WS = WorkStack.back();
+    const SmallVector<DbgScope *, 4> &Children = WS->getScopes();
+    bool visitedChildren = false;
+    for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+           SE = Children.end(); SI != SE; ++SI) {
+      DbgScope *ChildScope = *SI;
+      if (!ChildScope->getDFSOut()) {
+        WorkStack.push_back(ChildScope);
+        visitedChildren = true;
+        ChildScope->setDFSIn(++Counter);
+        break;
+      }
+    }
+    if (!visitedChildren) {
+      WorkStack.pop_back();
+      WS->setDFSOut(++Counter);
+    }
+  }
+}
+
+/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
+static
+void printDbgScopeInfo(const MachineFunction *MF,
+                       DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
+{
+#ifndef NDEBUG
+  LLVMContext &Ctx = MF->getFunction()->getContext();
+  unsigned PrevDFSIn = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      MDNode *Scope = NULL;
+      MDNode *InlinedAt = NULL;
+
+      // Check if instruction has valid location information.
+      DebugLoc MIDL = MInsn->getDebugLoc();
+      if (!MIDL.isUnknown()) {
+        MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
+        dbgs() << " [ ";
+        if (InlinedAt)
+          dbgs() << "*";
+        DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
+          MI2ScopeMap.find(MInsn);
+        if (DI != MI2ScopeMap.end()) {
+          DbgScope *S = DI->second;
+          dbgs() << S->getDFSIn();
+          PrevDFSIn = S->getDFSIn();
+        } else
+          dbgs() << PrevDFSIn;
+      } else
+        dbgs() << " [ x" << PrevDFSIn;
+      dbgs() << " ]";
+      MInsn->dump();
+    }
+    dbgs() << "\n";
+  }
+#endif
+}
+/// extractScopeInformation - Scan machine instructions in this function
+/// and collect DbgScopes. Return true, if at least one scope was found.
+bool DwarfDebug::extractScopeInformation() {
+  // If scope information was extracted using .dbg intrinsics then there is not
+  // any need to extract these information by scanning each instruction.
+  if (!DbgScopeMap.empty())
+    return false;
+
+  // Scan each instruction and create scopes. First build working set of scopes.
+  SmallVector<DbgRange, 4> MIRanges;
+  DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
+  DebugLoc PrevDL;
+  const MachineInstr *RangeBeginMI = NULL;
+  const MachineInstr *PrevMI = NULL;
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+
+      // Check if instruction has valid location information.
+      const DebugLoc MIDL = MInsn->getDebugLoc();
+      if (MIDL.isUnknown()) {
+        PrevMI = MInsn;
+        continue;
+      }
+
+      // If scope has not changed then skip this instruction.
+      if (MIDL == PrevDL) {
+        PrevMI = MInsn;
+        continue;
+      }
+
+      // Ignore DBG_VALUE. It does not contribute any instruction in output.
+      if (MInsn->isDebugValue())
+        continue;
+
+      if (RangeBeginMI) {
+        // If we have alread seen a beginning of a instruction range and
+        // current instruction scope does not match scope of first instruction
+        // in this range then create a new instruction range.
+        DEBUG(dbgs() << "Creating new instruction range :\n");
+        DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI);
+        DEBUG(dbgs() << "End Range at " << *PrevMI);
+        DEBUG(dbgs() << "Next Range starting at " << *MInsn);
+        DEBUG(dbgs() << "------------------------\n");
+        DbgRange R(RangeBeginMI, PrevMI);
+        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
+        MIRanges.push_back(R);
+      }
+
+      // This is a beginning of a new instruction range.
+      RangeBeginMI = MInsn;
+
+      // Reset previous markers.
+      PrevMI = MInsn;
+      PrevDL = MIDL;
+    }
+  }
+
+  // Create last instruction range.
+  if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+    DbgRange R(RangeBeginMI, PrevMI);
+    MIRanges.push_back(R);
+    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
+  }
+
+  if (!CurrentFnDbgScope)
+    return false;
+
+  calculateDominanceGraph(CurrentFnDbgScope);
+  if (PrintDbgScope)
+    printDbgScopeInfo(Asm->MF, MI2ScopeMap);
+
+  // Find ranges of instructions covered by each DbgScope;
+  DbgScope *PrevDbgScope = NULL;
+  for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(),
+         RE = MIRanges.end(); RI != RE; ++RI) {
+    const DbgRange &R = *RI;
+    DbgScope *S = MI2ScopeMap.lookup(R.first);
+    assert (S && "Lost DbgScope for a machine instruction!");
+    if (PrevDbgScope && !PrevDbgScope->dominates(S))
+      PrevDbgScope->closeInsnRange(S);
+    S->openInsnRange(R.first);
+    S->extendInsnRange(R.second);
+    PrevDbgScope = S;
+  }
+
+  if (PrevDbgScope)
+    PrevDbgScope->closeInsnRange();
+
+  identifyScopeMarkers();
+
+  return !DbgScopeMap.empty();
+}
+
+/// identifyScopeMarkers() -
+/// Each DbgScope has first instruction and last instruction to mark beginning
+/// and end of a scope respectively. Create an inverse map that list scopes
+/// starts (and ends) with an instruction. One instruction may start (or end)
+/// multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
+  SmallVector<DbgScope *, 4> WorkList;
+  WorkList.push_back(CurrentFnDbgScope);
+  while (!WorkList.empty()) {
+    DbgScope *S = WorkList.pop_back_val();
+
+    const SmallVector<DbgScope *, 4> &Children = S->getScopes();
+    if (!Children.empty())
+      for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+             SE = Children.end(); SI != SE; ++SI)
+        WorkList.push_back(*SI);
+
+    if (S->isAbstractScope())
+      continue;
+
+    const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
+    if (Ranges.empty())
+      continue;
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+           RE = Ranges.end(); RI != RE; ++RI) {
+      assert(RI->first && "DbgRange does not have first instruction!");
+      assert(RI->second && "DbgRange does not have second instruction!");
+      requestLabelBeforeInsn(RI->first);
+      requestLabelAfterInsn(RI->second);
+    }
+  }
+}
+
+/// getScopeNode - Get MDNode for DebugLoc's scope.
+static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
+  if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
+    return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
+  return DL.getScope(Ctx);
+}
+
+/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
+/// line number  info for the function.
+static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
+  const MDNode *Scope = getScopeNode(DL, Ctx);
+  DISubprogram SP = getDISubprogram(Scope);
+  if (SP.Verify()) 
+    return DebugLoc::get(SP.getLineNumber(), 0, SP);
+  return DebugLoc();
+}
+
+/// beginFunction - Gather pre-function debug information.  Assumes being
+/// emitted immediately after the function entry point.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+  if (!MMI->hasDebugInfo()) return;
+  if (!extractScopeInformation()) return;
+
+  FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+                                        Asm->getFunctionNumber());
+  // Assumes in correct section after the entry point.
+  Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+
+  assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+
+  const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+  /// LiveUserVar - Map physreg numbers to the MDNode they contain.
+  std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
+
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    bool AtBlockEntry = true;
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MI = II;
+
+      if (MI->isDebugValue()) {
+        assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+
+        // Keep track of user variables.
+        const MDNode *Var =
+          MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+        // Variable is in a register, we need to check for clobbers.
+        if (isDbgValueInDefinedReg(MI))
+          LiveUserVar[MI->getOperand(0).getReg()] = Var;
+
+        // Check the history of this variable.
+        SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+        if (History.empty()) {
+          UserVariables.push_back(Var);
+          // The first mention of a function argument gets the FunctionBeginSym
+          // label, so arguments are visible when breaking at function entry.
+          DIVariable DV(Var);
+          if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+              DISubprogram(getDISubprogram(DV.getContext()))
+                .describes(MF->getFunction()))
+            LabelsBeforeInsn[MI] = FunctionBeginSym;
+        } else {
+          // We have seen this variable before. Try to coalesce DBG_VALUEs.
+          const MachineInstr *Prev = History.back();
+          if (Prev->isDebugValue()) {
+            // Coalesce identical entries at the end of History.
+            if (History.size() >= 2 &&
+                Prev->isIdenticalTo(History[History.size() - 2])) {
+              DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
+                    << "\t" << *Prev 
+                    << "\t" << *History[History.size() - 2] << "\n");
+              History.pop_back();
+            }
+
+            // Terminate old register assignments that don't reach MI;
+            MachineFunction::const_iterator PrevMBB = Prev->getParent();
+            if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) &&
+                isDbgValueInDefinedReg(Prev)) {
+              // Previous register assignment needs to terminate at the end of
+              // its basic block.
+              MachineBasicBlock::const_iterator LastMI =
+                PrevMBB->getLastNonDebugInstr();
+              if (LastMI == PrevMBB->end()) {
+                // Drop DBG_VALUE for empty range.
+                DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+                      << "\t" << *Prev << "\n");
+                History.pop_back();
+              }
+              else {
+                // Terminate after LastMI.
+                History.push_back(LastMI);
+              }
+            }
+          }
+        }
+        History.push_back(MI);
+      } else {
+        // Not a DBG_VALUE instruction.
+        if (!MI->isLabel())
+          AtBlockEntry = false;
+
+        // First known non DBG_VALUE location marks beginning of function
+        // body.
+        if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())
+          PrologEndLoc = MI->getDebugLoc();
+
+        // Check if the instruction clobbers any registers with debug vars.
+        for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+          if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
+            continue;
+          for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+               unsigned Reg = *AI; ++AI) {
+            const MDNode *Var = LiveUserVar[Reg];
+            if (!Var)
+              continue;
+            // Reg is now clobbered.
+            LiveUserVar[Reg] = 0;
+
+            // Was MD last defined by a DBG_VALUE referring to Reg?
+            DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
+            if (HistI == DbgValues.end())
+              continue;
+            SmallVectorImpl<const MachineInstr*> &History = HistI->second;
+            if (History.empty())
+              continue;
+            const MachineInstr *Prev = History.back();
+            // Sanity-check: Register assignments are terminated at the end of
+            // their block.
+            if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent())
+              continue;
+            // Is the variable still in Reg?
+            if (!isDbgValueInDefinedReg(Prev) ||
+                Prev->getOperand(0).getReg() != Reg)
+              continue;
+            // Var is clobbered. Make sure the next instruction gets a label.
+            History.push_back(MI);
+          }
+        }
+      }
+    }
+  }
+
+  for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
+       I != E; ++I) {
+    SmallVectorImpl<const MachineInstr*> &History = I->second;
+    if (History.empty())
+      continue;
+
+    // Make sure the final register assignments are terminated.
+    const MachineInstr *Prev = History.back();
+    if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
+      const MachineBasicBlock *PrevMBB = Prev->getParent();
+      MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr();
+      if (LastMI == PrevMBB->end())
+        // Drop DBG_VALUE for empty range.
+        History.pop_back();
+      else {
+        // Terminate after LastMI.
+        History.push_back(LastMI);
+      }
+    }
+    // Request labels for the full history.
+    for (unsigned i = 0, e = History.size(); i != e; ++i) {
+      const MachineInstr *MI = History[i];
+      if (MI->isDebugValue())
+        requestLabelBeforeInsn(MI);
+      else
+        requestLabelAfterInsn(MI);
+    }
+  }
+
+  PrevInstLoc = DebugLoc();
+  PrevLabel = FunctionBeginSym;
+
+  // Record beginning of function.
+  if (!PrologEndLoc.isUnknown()) {
+    DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc,
+                                       MF->getFunction()->getContext());
+    recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
+                     FnStartDL.getScope(MF->getFunction()->getContext()),
+                     DWARF2_FLAG_IS_STMT);
+  }
+}
+
+/// endFunction - Gather and emit post-function debug information.
+///
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+  if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
+
+  if (CurrentFnDbgScope) {
+
+    // Define end label for subprogram.
+    FunctionEndSym = Asm->GetTempSymbol("func_end",
+                                        Asm->getFunctionNumber());
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(FunctionEndSym);
+
+    SmallPtrSet<const MDNode *, 16> ProcessedVars;
+    collectVariableInfo(MF, ProcessedVars);
+
+    // Construct abstract scopes.
+    for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+           AE = AbstractScopesList.end(); AI != AE; ++AI) {
+      DISubprogram SP((*AI)->getScopeNode());
+      if (SP.Verify()) {
+        // Collect info for variables that were optimized out.
+        StringRef FName = SP.getLinkageName();
+        if (FName.empty())
+          FName = SP.getName();
+        if (NamedMDNode *NMD = 
+            getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
+          for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+          DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
+          if (!DV || !ProcessedVars.insert(DV))
+            continue;
+          DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
+          if (Scope)
+            Scope->addVariable(new DbgVariable(DV));
+          }
+        }
+      }
+      if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0)
+        constructScopeDIE(*AI);
+    }
+
+    DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
+
+    if (!DisableFramePointerElim(*MF))
+      getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, 
+                                                                 dwarf::DW_AT_APPLE_omit_frame_ptr,
+                                                                 dwarf::DW_FORM_flag, 1);
+
+
+    DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
+                                                 MMI->getFrameMoves()));
+  }
+
+  // Clear debug info
+  CurrentFnDbgScope = NULL;
+  DeleteContainerPointers(CurrentFnArguments);
+  DbgVariableToFrameIndexMap.clear();
+  VarToAbstractVarMap.clear();
+  DbgVariableToDbgInstMap.clear();
+  InlinedDbgScopeMap.clear();
+  DeleteContainerSeconds(DbgScopeMap);
+  UserVariables.clear();
+  DbgValues.clear();
+  DeleteContainerSeconds(AbstractScopes);
+  AbstractScopesList.clear();
+  AbstractVariables.clear();
+  LabelsBeforeInsn.clear();
+  LabelsAfterInsn.clear();
+  PrevLabel = NULL;
+}
+
+/// recordVariableFrameIndex - Record a variable's index.
+void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) {
+  assert (V && "Invalid DbgVariable!");
+  DbgVariableToFrameIndexMap[V] = Index;
+}
+
+/// findVariableFrameIndex - Return true if frame index for the variable
+/// is found. Update FI to hold value of the index.
+bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
+  assert (V && "Invalid DbgVariable!");
+  DenseMap<const DbgVariable *, int>::iterator I =
+    DbgVariableToFrameIndexMap.find(V);
+  if (I == DbgVariableToFrameIndexMap.end())
+    return false;
+  *FI = I->second;
+  return true;
+}
+
+/// findDbgScope - Find DbgScope for the debug loc.
+DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) {
+  if (DL.isUnknown())
+    return NULL;
+
+  DbgScope *Scope = NULL;
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  if (MDNode *IA = DL.getInlinedAt(Ctx))
+    Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+  else
+    Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
+  return Scope;
+}
+
+
+/// recordSourceLine - Register a source line with debug info. Returns the
+/// unique label that was emitted and which provides correspondence to
+/// the source line list.
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+                                  unsigned Flags) {
+  StringRef Fn;
+  StringRef Dir;
+  unsigned Src = 1;
+  if (S) {
+    DIDescriptor Scope(S);
+
+    if (Scope.isCompileUnit()) {
+      DICompileUnit CU(S);
+      Fn = CU.getFilename();
+      Dir = CU.getDirectory();
+    } else if (Scope.isFile()) {
+      DIFile F(S);
+      Fn = F.getFilename();
+      Dir = F.getDirectory();
+    } else if (Scope.isSubprogram()) {
+      DISubprogram SP(S);
+      Fn = SP.getFilename();
+      Dir = SP.getDirectory();
+    } else if (Scope.isLexicalBlock()) {
+      DILexicalBlock DB(S);
+      Fn = DB.getFilename();
+      Dir = DB.getDirectory();
+    } else
+      assert(0 && "Unexpected scope info");
+
+    Src = GetOrCreateSourceID(Fn, Dir);
+  }
+  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags,
+                                         0, 0, Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+/// computeSizeAndOffset - Compute the size and offset of a DIE.
+///
+unsigned
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+  // Get the children.
+  const std::vector<DIE *> &Children = Die->getChildren();
+
+  // If not last sibling and has children then add sibling offset attribute.
+  if (!Last && !Children.empty())
+    Die->addSiblingOffset(DIEValueAllocator);
+
+  // Record the abbreviation.
+  assignAbbrevNumber(Die->getAbbrev());
+
+  // Get the abbreviation for this DIE.
+  unsigned AbbrevNumber = Die->getAbbrevNumber();
+  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+  // Set DIE offset
+  Die->setOffset(Offset);
+
+  // Start the size with the size of abbreviation code.
+  Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+
+  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+  // Size the DIE attribute values.
+  for (unsigned i = 0, N = Values.size(); i < N; ++i)
+    // Size attribute value.
+    Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
+
+  // Size the DIE children if any.
+  if (!Children.empty()) {
+    assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+           "Children flag not set");
+
+    for (unsigned j = 0, M = Children.size(); j < M; ++j)
+      Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+
+    // End of children marker.
+    Offset += sizeof(int8_t);
+  }
+
+  Die->setSize(Offset - Die->getOffset());
+  return Offset;
+}
+
+/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+///
+void DwarfDebug::computeSizeAndOffsets() {
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    // Compute size of compile unit header.
+    unsigned Offset = 
+      sizeof(int32_t) + // Length of Compilation Unit Info
+      sizeof(int16_t) + // DWARF version number
+      sizeof(int32_t) + // Offset Into Abbrev. Section
+      sizeof(int8_t);   // Pointer Size (in bytes)
+    computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+  }
+}
+
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+                                const char *SymbolStem = 0) {
+  Asm->OutStreamer.SwitchSection(Section);
+  if (!SymbolStem) return 0;
+
+  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+  Asm->OutStreamer.EmitLabel(TmpSym);
+  return TmpSym;
+}
+
+/// EmitSectionLabels - Emit initial Dwarf sections with a label at
+/// the start of each one.
+void DwarfDebug::EmitSectionLabels() {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Dwarf sections base addresses.
+  DwarfInfoSectionSym =
+    EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+  DwarfAbbrevSectionSym =
+    EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+  EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+
+  if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
+    EmitSectionSym(Asm, MacroInfo);
+
+  EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+  EmitSectionSym(Asm, TLOF.getDwarfLocSection());
+  EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+  EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+  DwarfStrSectionSym =
+    EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
+  DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+                                             "debug_range");
+
+  DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+                                           "section_debug_loc");
+
+  TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+  EmitSectionSym(Asm, TLOF.getDataSection());
+}
+
+/// emitDIE - Recusively Emits a debug information entry.
+///
+void DwarfDebug::emitDIE(DIE *Die) {
+  // Get the abbreviation for this DIE.
+  unsigned AbbrevNumber = Die->getAbbrevNumber();
+  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+  // Emit the code (index) for the abbreviation.
+  if (Asm->isVerbose())
+    Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
+                                Twine::utohexstr(Die->getOffset()) + ":0x" +
+                                Twine::utohexstr(Die->getSize()) + " " +
+                                dwarf::TagString(Abbrev->getTag()));
+  Asm->EmitULEB128(AbbrevNumber);
+
+  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+  // Emit the DIE attribute values.
+  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+    unsigned Attr = AbbrevData[i].getAttribute();
+    unsigned Form = AbbrevData[i].getForm();
+    assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+    if (Asm->isVerbose())
+      Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+
+    switch (Attr) {
+    case dwarf::DW_AT_sibling:
+      Asm->EmitInt32(Die->getSiblingOffset());
+      break;
+    case dwarf::DW_AT_abstract_origin: {
+      DIEEntry *E = cast<DIEEntry>(Values[i]);
+      DIE *Origin = E->getEntry();
+      unsigned Addr = Origin->getOffset();
+      Asm->EmitInt32(Addr);
+      break;
+    }
+    case dwarf::DW_AT_ranges: {
+      // DW_AT_range Value encodes offset in debug_range section.
+      DIEInteger *V = cast<DIEInteger>(Values[i]);
+
+      if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+        Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+                                 V->getValue(),
+                                 4);
+      } else {
+        Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+                                       V->getValue(),
+                                       DwarfDebugRangeSectionSym,
+                                       4);
+      }
+      break;
+    }
+    case dwarf::DW_AT_location: {
+      if (UseDotDebugLocEntry.count(Die) != 0) {
+        DIELabel *L = cast<DIELabel>(Values[i]);
+        Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+      } else
+        Values[i]->EmitValue(Asm, Form);
+      break;
+    }
+    case dwarf::DW_AT_accessibility: {
+      if (Asm->isVerbose()) {
+        DIEInteger *V = cast<DIEInteger>(Values[i]);
+        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+      }
+      Values[i]->EmitValue(Asm, Form);
+      break;
+    }
+    default:
+      // Emit an attribute using the defined form.
+      Values[i]->EmitValue(Asm, Form);
+      break;
+    }
+  }
+
+  // Emit the DIE children if any.
+  if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+    const std::vector<DIE *> &Children = Die->getChildren();
+
+    for (unsigned j = 0, M = Children.size(); j < M; ++j)
+      emitDIE(Children[j]);
+
+    if (Asm->isVerbose())
+      Asm->OutStreamer.AddComment("End Of Children Mark");
+    Asm->EmitInt8(0);
+  }
+}
+
+/// emitDebugInfo - Emit the debug info section.
+///
+void DwarfDebug::emitDebugInfo() {
+  // Start debug info section.
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfInfoSection());
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    DIE *Die = TheCU->getCUDie();
+
+    // Emit the compile units header.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
+                                                  TheCU->getID()));
+
+    // Emit size of content not including length itself
+    unsigned ContentSize = Die->getSize() +
+      sizeof(int16_t) + // DWARF version number
+      sizeof(int32_t) + // Offset Into Abbrev. Section
+      sizeof(int8_t);   // Pointer Size (in bytes)
+
+    Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+    Asm->EmitInt32(ContentSize);
+    Asm->OutStreamer.AddComment("DWARF version number");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+    Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+                           DwarfAbbrevSectionSym);
+    Asm->OutStreamer.AddComment("Address Size (in bytes)");
+    Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+    emitDIE(Die);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+  }
+}
+
+/// emitAbbreviations - Emit the abbreviation section.
+///
+void DwarfDebug::emitAbbreviations() const {
+  // Check to see if it is worth the effort.
+  if (!Abbreviations.empty()) {
+    // Start the debug abbrev section.
+    Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfAbbrevSection());
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin"));
+
+    // For each abbrevation.
+    for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+      // Get abbreviation data
+      const DIEAbbrev *Abbrev = Abbreviations[i];
+
+      // Emit the abbrevations code (base 1 index.)
+      Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+      // Emit the abbreviations data.
+      Abbrev->Emit(Asm);
+    }
+
+    // Mark end of abbreviations.
+    Asm->EmitULEB128(0, "EOM(3)");
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end"));
+  }
+}
+
+/// emitEndOfLineMatrix - Emit the last address of the section and the end of
+/// the line matrix.
+///
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
+  // Define last address of section.
+  Asm->OutStreamer.AddComment("Extended Op");
+  Asm->EmitInt8(0);
+
+  Asm->OutStreamer.AddComment("Op size");
+  Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+  Asm->OutStreamer.AddComment("DW_LNE_set_address");
+  Asm->EmitInt8(dwarf::DW_LNE_set_address);
+
+  Asm->OutStreamer.AddComment("Section end label");
+
+  Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
+                                   Asm->getTargetData().getPointerSize(),
+                                   0/*AddrSpace*/);
+
+  // Mark end of matrix.
+  Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
+  Asm->EmitInt8(0);
+  Asm->EmitInt8(1);
+  Asm->EmitInt8(1);
+}
+
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames() {
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    // Start the dwarf pubnames section.
+    Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+    Asm->OutStreamer.AddComment("Length of Public Names Info");
+    Asm->EmitLabelDifference(
+      Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
+      Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
+                                                  TheCU->getID()));
+
+    Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                           DwarfInfoSectionSym);
+
+    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                             4);
+
+    const StringMap<DIE*> &Globals = TheCU->getGlobals();
+    for (StringMap<DIE*>::const_iterator
+           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      DIE *Entity = GI->second;
+
+      Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(Entity->getOffset());
+
+      if (Asm->isVerbose())
+        Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+    }
+
+    Asm->OutStreamer.AddComment("End Mark");
+    Asm->EmitInt32(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
+                                                TheCU->getID()));
+  }
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    // Start the dwarf pubnames section.
+    Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfPubTypesSection());
+    Asm->OutStreamer.AddComment("Length of Public Types Info");
+    Asm->EmitLabelDifference(
+      Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
+      Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+                                                  TheCU->getID()));
+
+    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                           DwarfInfoSectionSym);
+
+    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                             4);
+
+    const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+    for (StringMap<DIE*>::const_iterator
+           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      DIE * Entity = GI->second;
+
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(Entity->getOffset());
+
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+    }
+
+    Asm->OutStreamer.AddComment("End Mark");
+    Asm->EmitInt32(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+                                                  TheCU->getID()));
+  }
+}
+
+/// emitDebugStr - Emit visible names into a debug str section.
+///
+void DwarfDebug::emitDebugStr() {
+  // Check to see if it is worth the effort.
+  if (StringPool.empty()) return;
+
+  // Start the dwarf str section.
+  Asm->OutStreamer.SwitchSection(
+                                Asm->getObjFileLowering().getDwarfStrSection());
+
+  // Get all of the string pool entries and put them in an array by their ID so
+  // we can sort them.
+  SmallVector<std::pair<unsigned,
+      StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+
+  for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+       I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+    Entries.push_back(std::make_pair(I->second.second, &*I));
+
+  array_pod_sort(Entries.begin(), Entries.end());
+
+  for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+    // Emit a label for reference from debug information entries.
+    Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+
+    // Emit the string itself.
+    Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+  }
+}
+
+/// emitDebugLoc - Emit visible names into a debug loc section.
+///
+void DwarfDebug::emitDebugLoc() {
+  if (DotDebugLocEntries.empty())
+    return;
+
+  for (SmallVector<DotDebugLocEntry, 4>::iterator
+         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+       I != E; ++I) {
+    DotDebugLocEntry &Entry = *I;
+    if (I + 1 != DotDebugLocEntries.end())
+      Entry.Merge(I+1);
+  }
+
+  // Start the dwarf loc section.
+  Asm->OutStreamer.SwitchSection(
+    Asm->getObjFileLowering().getDwarfLocSection());
+  unsigned char Size = Asm->getTargetData().getPointerSize();
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+  unsigned index = 1;
+  for (SmallVector<DotDebugLocEntry, 4>::iterator
+         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+       I != E; ++I, ++index) {
+    DotDebugLocEntry &Entry = *I;
+    if (Entry.isMerged()) continue;
+    if (Entry.isEmpty()) {
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+    } else {
+      Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
+      Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+      DIVariable DV(Entry.Variable);
+      Asm->OutStreamer.AddComment("Loc expr size");
+      MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
+      MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
+      Asm->EmitLabelDifference(end, begin, 2);
+      Asm->OutStreamer.EmitLabel(begin);
+      if (Entry.isInt()) {
+        DIBasicType BTy(DV.getType());
+        if (BTy.Verify() &&
+            (BTy.getEncoding()  == dwarf::DW_ATE_signed 
+             || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
+          Asm->OutStreamer.AddComment("DW_OP_consts");
+          Asm->EmitInt8(dwarf::DW_OP_consts);
+          Asm->EmitSLEB128(Entry.getInt());
+        } else {
+          Asm->OutStreamer.AddComment("DW_OP_constu");
+          Asm->EmitInt8(dwarf::DW_OP_constu);
+          Asm->EmitULEB128(Entry.getInt());
+        }
+      } else if (Entry.isLocation()) {
+        if (!DV.hasComplexAddress()) 
+          // Regular entry.
+          Asm->EmitDwarfRegOp(Entry.Loc);
+        else {
+          // Complex address entry.
+          unsigned N = DV.getNumAddrElements();
+          unsigned i = 0;
+          if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+            if (Entry.Loc.getOffset()) {
+              i = 2;
+              Asm->EmitDwarfRegOp(Entry.Loc);
+              Asm->OutStreamer.AddComment("DW_OP_deref");
+              Asm->EmitInt8(dwarf::DW_OP_deref);
+              Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+              Asm->EmitSLEB128(DV.getAddrElement(1));
+            } else {
+              // If first address element is OpPlus then emit
+              // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+              MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+              Asm->EmitDwarfRegOp(Loc);
+              i = 2;
+            }
+          } else {
+            Asm->EmitDwarfRegOp(Entry.Loc);
+          }
+          
+          // Emit remaining complex address elements.
+          for (; i < N; ++i) {
+            uint64_t Element = DV.getAddrElement(i);
+            if (Element == DIBuilder::OpPlus) {
+              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+              Asm->EmitULEB128(DV.getAddrElement(++i));
+            } else if (Element == DIBuilder::OpDeref)
+              Asm->EmitInt8(dwarf::DW_OP_deref);
+            else llvm_unreachable("unknown Opcode found in complex address");
+          }
+        }
+      }
+      // else ... ignore constant fp. There is not any good way to
+      // to represent them here in dwarf.
+      Asm->OutStreamer.EmitLabel(end);
+    }
+  }
+}
+
+/// EmitDebugARanges - Emit visible names into a debug aranges section.
+///
+void DwarfDebug::EmitDebugARanges() {
+  // Start the dwarf aranges section.
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfARangesSection());
+}
+
+/// emitDebugRanges - Emit visible names into a debug ranges section.
+///
+void DwarfDebug::emitDebugRanges() {
+  // Start the dwarf ranges section.
+  Asm->OutStreamer.SwitchSection(
+    Asm->getObjFileLowering().getDwarfRangesSection());
+  unsigned char Size = Asm->getTargetData().getPointerSize();
+  for (SmallVector<const MCSymbol *, 8>::iterator
+         I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+       I != E; ++I) {
+    if (*I)
+      Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+    else
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+  }
+}
+
+/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+///
+void DwarfDebug::emitDebugMacInfo() {
+  if (const MCSection *LineInfo =
+      Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
+    // Start the dwarf macinfo section.
+    Asm->OutStreamer.SwitchSection(LineInfo);
+  }
+}
+
+/// emitDebugInlineInfo - Emit inline info using following format.
+/// Section Header:
+/// 1. length of section
+/// 2. Dwarf version number
+/// 3. address size.
+///
+/// Entries (one "entry" for each function that was inlined):
+///
+/// 1. offset into __debug_str section for MIPS linkage name, if exists;
+///   otherwise offset into __debug_str for regular function name.
+/// 2. offset into __debug_str section for regular function name.
+/// 3. an unsigned LEB128 number indicating the number of distinct inlining
+/// instances for the function.
+///
+/// The rest of the entry consists of a {die_offset, low_pc} pair for each
+/// inlined instance; the die_offset points to the inlined_subroutine die in the
+/// __debug_info section, and the low_pc is the starting address for the
+/// inlining instance.
+void DwarfDebug::emitDebugInlineInfo() {
+  if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+    return;
+
+  if (!FirstCU)
+    return;
+
+  Asm->OutStreamer.SwitchSection(
+                        Asm->getObjFileLowering().getDwarfDebugInlineSection());
+
+  Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
+                           Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
+
+  Asm->OutStreamer.AddComment("Dwarf Version");
+  Asm->EmitInt16(dwarf::DWARF_VERSION);
+  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+  Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+  for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+         E = InlinedSPNodes.end(); I != E; ++I) {
+
+    const MDNode *Node = *I;
+    DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+      = InlineInfo.find(Node);
+    SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+    DISubprogram SP(Node);
+    StringRef LName = SP.getLinkageName();
+    StringRef Name = SP.getName();
+
+    Asm->OutStreamer.AddComment("MIPS linkage name");
+    if (LName.empty()) {
+      Asm->OutStreamer.EmitBytes(Name, 0);
+      Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
+    } else
+      Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
+                             DwarfStrSectionSym);
+
+    Asm->OutStreamer.AddComment("Function name");
+    Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+    Asm->EmitULEB128(Labels.size(), "Inline count");
+
+    for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+           LE = Labels.end(); LI != LE; ++LI) {
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(LI->second->getOffset());
+
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
+      Asm->OutStreamer.EmitSymbolValue(LI->first,
+                                       Asm->getTargetData().getPointerSize(),0);
+    }
+  }
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 000000000000..b2450064e3d0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,531 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class CompileUnit;
+class DbgConcreteScope;
+class DbgScope;
+class DbgVariable;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class MachineOperand;
+class MCAsmInfo;
+class DIEAbbrev;
+class DIE;
+class DIEBlock;
+class DIEEntry;
+
+//===----------------------------------------------------------------------===//
+/// SrcLineInfo - This class is used to record source line correspondence.
+///
+class SrcLineInfo {
+  unsigned Line;                     // Source line number.
+  unsigned Column;                   // Source column.
+  unsigned SourceID;                 // Source ID number.
+  MCSymbol *Label;                   // Label in code ID number.
+public:
+  SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
+    : Line(L), Column(C), SourceID(S), Label(label) {}
+
+  // Accessors
+  unsigned getLine() const { return Line; }
+  unsigned getColumn() const { return Column; }
+  unsigned getSourceID() const { return SourceID; }
+  MCSymbol *getLabel() const { return Label; }
+};
+
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+  const MCSymbol *Begin;
+  const MCSymbol *End;
+  MachineLocation Loc;
+  const MDNode *Variable;
+  bool Merged;
+  bool Constant;
+  enum EntryType {
+    E_Location,
+    E_Integer,
+    E_ConstantFP,
+    E_ConstantInt
+  };
+  enum EntryType EntryKind;
+
+  union {
+    int64_t Int;
+    const ConstantFP *CFP;
+    const ConstantInt *CIP;
+  } Constants;
+  DotDebugLocEntry() 
+    : Begin(0), End(0), Variable(0), Merged(false), 
+      Constant(false) { Constants.Int = 0;}
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+                   const MDNode *V) 
+    : Begin(B), End(E), Loc(L), Variable(V), Merged(false), 
+      Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
+
+  /// Empty entries are also used as a trigger to emit temp label. Such
+  /// labels are referenced is used to find debug_loc offset for a given DIE.
+  bool isEmpty() { return Begin == 0 && End == 0; }
+  bool isMerged() { return Merged; }
+  void Merge(DotDebugLocEntry *Next) {
+    if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+      return;
+    Next->Begin = Begin;
+    Merged = true;
+  }
+  bool isLocation() const    { return EntryKind == E_Location; }
+  bool isInt() const         { return EntryKind == E_Integer; }
+  bool isConstantFP() const  { return EntryKind == E_ConstantFP; }
+  bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+  int64_t getInt()                    { return Constants.Int; }
+  const ConstantFP *getConstantFP()   { return Constants.CFP; }
+  const ConstantInt *getConstantInt() { return Constants.CIP; }
+} DotDebugLocEntry;
+
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class DbgVariable {
+  DIVariable Var;                    // Variable Descriptor.
+  DIE *TheDIE;                       // Variable DIE.
+  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
+public:
+  // AbsVar may be NULL.
+  DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
+
+  // Accessors.
+  DIVariable getVariable()           const { return Var; }
+  void setDIE(DIE *D)                      { TheDIE = D; }
+  DIE *getDIE()                      const { return TheDIE; }
+  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
+  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
+  StringRef getName()                const { return Var.getName(); }
+  unsigned getTag()                  const { return Var.getTag(); }
+  bool variableHasComplexAddress()   const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.hasComplexAddress();
+  }
+  bool isBlockByrefVariable()        const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.isBlockByrefVariable();
+  }
+  unsigned getNumAddrElements()      const { 
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.getNumAddrElements();
+  }
+  uint64_t getAddrElement(unsigned i) const {
+    return Var.getAddrElement(i);
+  }
+  DIType getType() const;
+};
+
+class DwarfDebug {
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  /// MMI - Collected machine module information.
+  MachineModuleInfo *MMI;
+
+  //===--------------------------------------------------------------------===//
+  // Attributes used to construct specific Dwarf sections.
+  //
+
+  CompileUnit *FirstCU;
+  DenseMap <const MDNode *, CompileUnit *> CUMap;
+
+  /// AbbreviationsSet - Used to uniquely define abbreviations.
+  ///
+  FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+  /// Abbreviations - A list of all the unique abbreviations in use.
+  ///
+  std::vector<DIEAbbrev *> Abbreviations;
+
+  /// SourceIdMap - Source id map, i.e. pair of directory id and source file
+  /// id mapped to a unique id.
+  StringMap<unsigned> SourceIdMap;
+
+  /// StringPool - A String->Symbol mapping of strings used by indirect
+  /// references.
+  StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
+  unsigned NextStringPoolNumber;
+  
+  MCSymbol *getStringPoolEntry(StringRef Str);
+
+  /// SectionMap - Provides a unique id per text section.
+  ///
+  UniqueVector<const MCSection*> SectionMap;
+
+  /// CurrentFnDbgScope - Top level scope for the current function.
+  ///
+  DbgScope *CurrentFnDbgScope;
+  
+  /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+  SmallVector<DbgVariable *, 8> CurrentFnArguments;
+
+  /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
+  /// contained DbgScope*s.
+  DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
+
+  /// InlinedDbgScopeMap - Tracks inlined function scopes in current function.
+  DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap;
+
+  /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
+  /// not included DbgScopeMap.  AbstractScopes owns its DbgScope*s.
+  DenseMap<const MDNode *, DbgScope *> AbstractScopes;
+
+  /// AbstractSPDies - Collection of abstract subprogram DIEs.
+  DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+  /// AbstractScopesList - Tracks abstract scopes constructed while processing
+  /// a function. This list is cleared during endFunction().
+  SmallVector<DbgScope *, 4>AbstractScopesList;
+
+  /// AbstractVariables - Collection on abstract variables.  Owned by the
+  /// DbgScopes in AbstractScopes.
+  DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+
+  /// DbgVariableToFrameIndexMap - Tracks frame index used to find 
+  /// variable's value.
+  DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap;
+
+  /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE
+  /// machine instruction.
+  DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
+
+  /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+  SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+
+  /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set
+  /// idetifies corresponding .debug_loc entry offset.
+  SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry;
+
+  /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract
+  /// DbgVariable, if any.
+  DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap;
+
+  /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+  /// (at the end of the module) as DW_AT_inline.
+  SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// corresponds to the MDNode mapped with the subprogram DIE.
+  DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
+  /// InlineInfo - Keep track of inlined functions and their location.  This
+  /// information is used to populate debug_inlined section.
+  typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
+  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+  SmallVector<const MDNode *, 4> InlinedSPNodes;
+
+  // ProcessedSPNodes - This is a collection of subprogram MDNodes that
+  // are processed to create DIEs.
+  SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
+  /// LabelsBeforeInsn - Maps instruction with label emitted before 
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+  /// LabelsAfterInsn - Maps instruction with label emitted after
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+  /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction
+  /// in order of appearance.
+  SmallVector<const MDNode*, 8> UserVariables;
+
+  /// DbgValues - For each user variable, keep a list of DBG_VALUE
+  /// instructions in order. The list can also contain normal instructions that
+  /// clobber the previous DBG_VALUE.
+  typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
+    DbgValueHistoryMap;
+  DbgValueHistoryMap DbgValues;
+
+  SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
+
+  /// Previous instruction's location information. This is used to determine
+  /// label location to indicate scope boundries in dwarf debug info.
+  DebugLoc PrevInstLoc;
+  MCSymbol *PrevLabel;
+
+  /// PrologEndLoc - This location indicates end of function prologue and
+  /// beginning of function body.
+  DebugLoc PrologEndLoc;
+
+  struct FunctionDebugFrameInfo {
+    unsigned Number;
+    std::vector<MachineMove> Moves;
+
+    FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+      : Number(Num), Moves(M) {}
+  };
+
+  std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+
+  // Section Symbols: these are assembler temporary labels that are emitted at
+  // the beginning of each supported dwarf section.  These are used to form
+  // section offsets and are created by EmitSectionLabels.
+  MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+  MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+  MCSymbol *DwarfDebugLocSectionSym;
+  MCSymbol *FunctionBeginSym, *FunctionEndSym;
+
+private:
+
+  /// assignAbbrevNumber - Define a unique number for the abbreviation.
+  ///
+  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+  /// getOrCreateDbgScope - Create DbgScope for the scope.
+  DbgScope *getOrCreateDbgScope(DebugLoc DL);
+
+  DbgScope *getOrCreateAbstractScope(const MDNode *N);
+
+  /// findAbstractVariable - Find abstract variable associated with Var.
+  DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
+
+  /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
+  /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+  /// If there are global variables in this scope then create and insert
+  /// DIEs for these variables.
+  DIE *updateSubprogramScopeDIE(const MDNode *SPNode);
+
+  /// constructLexicalScope - Construct new DW_TAG_lexical_block 
+  /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+  DIE *constructLexicalScopeDIE(DbgScope *Scope);
+
+  /// constructInlinedScopeDIE - This scope represents inlined body of
+  /// a function. Construct DIE to represent this concrete inlined copy
+  /// of the function.
+  DIE *constructInlinedScopeDIE(DbgScope *Scope);
+
+  /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+  DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
+
+  /// constructScopeDIE - Construct a DIE for this scope.
+  DIE *constructScopeDIE(DbgScope *Scope);
+
+  /// EmitSectionLabels - Emit initial Dwarf sections with a label at
+  /// the start of each one.
+  void EmitSectionLabels();
+
+  /// emitDIE - Recusively Emits a debug information entry.
+  ///
+  void emitDIE(DIE *Die);
+
+  /// computeSizeAndOffset - Compute the size and offset of a DIE.
+  ///
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
+
+  /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+  ///
+  void computeSizeAndOffsets();
+
+  /// EmitDebugInfo - Emit the debug info section.
+  ///
+  void emitDebugInfo();
+
+  /// emitAbbreviations - Emit the abbreviation section.
+  ///
+  void emitAbbreviations() const;
+
+  /// emitEndOfLineMatrix - Emit the last address of the section and the end of
+  /// the line matrix.
+  ///
+  void emitEndOfLineMatrix(unsigned SectionEnd);
+
+  /// emitDebugPubNames - Emit visible names into a debug pubnames section.
+  ///
+  void emitDebugPubNames();
+
+  /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
+  ///
+  void emitDebugPubTypes();
+
+  /// emitDebugStr - Emit visible names into a debug str section.
+  ///
+  void emitDebugStr();
+
+  /// emitDebugLoc - Emit visible names into a debug loc section.
+  ///
+  void emitDebugLoc();
+
+  /// EmitDebugARanges - Emit visible names into a debug aranges section.
+  ///
+  void EmitDebugARanges();
+
+  /// emitDebugRanges - Emit visible names into a debug ranges section.
+  ///
+  void emitDebugRanges();
+
+  /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+  ///
+  void emitDebugMacInfo();
+
+  /// emitDebugInlineInfo - Emit inline info using following format.
+  /// Section Header:
+  /// 1. length of section
+  /// 2. Dwarf version number
+  /// 3. address size.
+  ///
+  /// Entries (one "entry" for each function that was inlined):
+  ///
+  /// 1. offset into __debug_str section for MIPS linkage name, if exists; 
+  ///   otherwise offset into __debug_str for regular function name.
+  /// 2. offset into __debug_str section for regular function name.
+  /// 3. an unsigned LEB128 number indicating the number of distinct inlining 
+  /// instances for the function.
+  /// 
+  /// The rest of the entry consists of a {die_offset, low_pc}  pair for each 
+  /// inlined instance; the die_offset points to the inlined_subroutine die in
+  /// the __debug_info section, and the low_pc is the starting address  for the
+  ///  inlining instance.
+  void emitDebugInlineInfo();
+
+  /// constructCompileUnit - Create new CompileUnit for the given 
+  /// metadata node with tag DW_TAG_compile_unit.
+  void constructCompileUnit(const MDNode *N);
+
+  /// getCompielUnit - Get CompileUnit DIE.
+  CompileUnit *getCompileUnit(const MDNode *N) const;
+
+  /// constructGlobalVariableDIE - Construct global variable DIE.
+  void constructGlobalVariableDIE(const MDNode *N);
+
+  /// construct SubprogramDIE - Construct subprogram DIE.
+  void constructSubprogramDIE(const MDNode *N);
+
+  /// recordSourceLine - Register a source line with debug info. Returns the
+  /// unique label that was emitted and which provides correspondence to
+  /// the source line list.
+  void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+                        unsigned Flags);
+  
+  /// recordVariableFrameIndex - Record a variable's index.
+  void recordVariableFrameIndex(const DbgVariable *V, int Index);
+
+  /// findVariableFrameIndex - Return true if frame index for the variable
+  /// is found. Update FI to hold value of the index.
+  bool findVariableFrameIndex(const DbgVariable *V, int *FI);
+
+  /// findDbgScope - Find DbgScope for the debug loc.
+  DbgScope *findDbgScope(DebugLoc DL);
+
+  /// identifyScopeMarkers() - Indentify instructions that are marking
+  /// beginning of or end of a scope.
+  void identifyScopeMarkers();
+
+  /// extractScopeInformation - Scan machine instructions in this function
+  /// and collect DbgScopes. Return true, if atleast one scope was found.
+  bool extractScopeInformation();
+  
+  /// addCurrentFnArgument - If Var is an current function argument that add
+  /// it in CurrentFnArguments list.
+  bool addCurrentFnArgument(const MachineFunction *MF,
+                            DbgVariable *Var, DbgScope *Scope);
+
+  /// collectVariableInfo - Populate DbgScope entries with variables' info.
+  void collectVariableInfo(const MachineFunction *,
+                           SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+  
+  /// collectVariableInfoFromMMITable - Collect variable information from
+  /// side table maintained by MMI.
+  void collectVariableInfoFromMMITable(const MachineFunction * MF,
+                                       SmallPtrSet<const MDNode *, 16> &P);
+
+  /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI.
+  void requestLabelBeforeInsn(const MachineInstr *MI) {
+    LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+  }
+
+  /// getLabelBeforeInsn - Return Label preceding the instruction.
+  const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+  /// requestLabelAfterInsn - Ensure that a label will be emitted after MI.
+  void requestLabelAfterInsn(const MachineInstr *MI) {
+    LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+  }
+
+  /// getLabelAfterInsn - Return Label immediately following the instruction.
+  const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfDebug(AsmPrinter *A, Module *M);
+  ~DwarfDebug();
+
+  /// beginModule - Emit all Dwarf sections that should come prior to the
+  /// content.
+  void beginModule(Module *M);
+
+  /// endModule - Emit all Dwarf sections that should come after the content.
+  ///
+  void endModule();
+
+  /// beginFunction - Gather pre-function debug information.  Assumes being
+  /// emitted immediately after the function entry point.
+  void beginFunction(const MachineFunction *MF);
+
+  /// endFunction - Gather and emit post-function debug information.
+  ///
+  void endFunction(const MachineFunction *MF);
+
+  /// beginInstruction - Process beginning of an instruction.
+  void beginInstruction(const MachineInstr *MI);
+
+  /// endInstruction - Prcess end of an instruction.
+  void endInstruction(const MachineInstr *MI);
+
+  /// GetOrCreateSourceID - Look up the source id with the given directory and
+  /// source file names. If none currently exists, create a new id and insert it
+  /// in the SourceIds map.
+  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
+
+  /// createSubprogramDIE - Create new DIE using SP.
+  DIE *createSubprogramDIE(DISubprogram SP);
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 000000000000..1f992faaadb5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,739 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfException::DwarfException(AsmPrinter *A)
+  : Asm(A), MMI(Asm->MMI) {}
+
+DwarfException::~DwarfException() {}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+                                       const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+  unsigned Count = 0;
+
+  for (; Count != MinSize; ++Count)
+    if (LIds[Count] != RIds[Count])
+      return Count;
+
+  return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+  for (unsigned i = 0; i != MinSize; ++i)
+    if (LIds[i] != RIds[i])
+      return LIds[i] < RIds[i];
+
+  return LSize < RSize;
+}
+
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+                    SmallVectorImpl<ActionEntry> &Actions,
+                    SmallVectorImpl<unsigned> &FirstActions) {
+
+  // The action table follows the call-site table in the LSDA. The individual
+  // records are of two types:
+  //
+  //   * Catch clause
+  //   * Exception specification
+  //
+  // The two record kinds have the same format, with only small differences.
+  // They are distinguished by the "switch value" field: Catch clauses
+  // (TypeInfos) have strictly positive switch values, and exception
+  // specifications (FilterIds) have strictly negative switch values. Value 0
+  // indicates a catch-all clause.
+  //
+  // Negative type IDs index into FilterIds. Positive type IDs index into
+  // TypeInfos.  The value written for a positive type ID is just the type ID
+  // itself.  For a negative type ID, however, the value written is the
+  // (negative) byte offset of the corresponding FilterIds entry.  The byte
+  // offset is usually equal to the type ID (because the FilterIds entries are
+  // written using a variable width encoding, which outputs one byte per entry
+  // as long as the value written is not too large) but can differ.  This kind
+  // of complication does not occur for positive type IDs because type infos are
+  // output using a fixed width encoding.  FilterOffsets[i] holds the byte
+  // offset corresponding to FilterIds[i].
+
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  SmallVector<int, 16> FilterOffsets;
+  FilterOffsets.reserve(FilterIds.size());
+  int Offset = -1;
+
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+    FilterOffsets.push_back(Offset);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
+  }
+
+  FirstActions.reserve(LandingPads.size());
+
+  int FirstAction = 0;
+  unsigned SizeActions = 0;
+  const LandingPadInfo *PrevLPI = 0;
+
+  for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+         I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+    const LandingPadInfo *LPI = *I;
+    const std::vector<int> &TypeIds = LPI->TypeIds;
+    unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+    unsigned SizeSiteActions = 0;
+
+    if (NumShared < TypeIds.size()) {
+      unsigned SizeAction = 0;
+      unsigned PrevAction = (unsigned)-1;
+
+      if (NumShared) {
+        unsigned SizePrevIds = PrevLPI->TypeIds.size();
+        assert(Actions.size());
+        PrevAction = Actions.size() - 1;
+        SizeAction =
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+        for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+          assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+          SizeAction -=
+            MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+          SizeAction += -Actions[PrevAction].NextAction;
+          PrevAction = Actions[PrevAction].Previous;
+        }
+      }
+
+      // Compute the actions.
+      for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+        int TypeID = TypeIds[J];
+        assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+        int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+        SizeSiteActions += SizeAction;
+
+        ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+        Actions.push_back(Action);
+        PrevAction = Actions.size() - 1;
+      }
+
+      // Record the first action of the landing pad site.
+      FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+    } // else identical - re-use previous FirstAction
+
+    // Information used when created the call-site table. The action record
+    // field of the call site record is the offset of the first associated
+    // action record, relative to the start of the actions table. This value is
+    // biased by 1 (1 indicating the start of the actions table), and 0
+    // indicates that there are no actions.
+    FirstActions.push_back(FirstAction);
+
+    // Compute this sites contribution to size.
+    SizeActions += SizeSiteActions;
+
+    PrevLPI = LPI;
+  }
+
+  return SizeActions;
+}
+
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+  assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+  bool MarkedNoUnwind = false;
+  bool SawFunc = false;
+
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+
+    if (!MO.isGlobal()) continue;
+
+    const Function *F = dyn_cast<Function>(MO.getGlobal());
+    if (F == 0) continue;
+
+    if (SawFunc) {
+      // Be conservative. If we have more than one function operand for this
+      // call, then we can't make the assumption that it's the callee and
+      // not a parameter to the call.
+      //
+      // FIXME: Determine if there's a way to say that `F' is the callee or
+      // parameter.
+      MarkedNoUnwind = false;
+      break;
+    }
+
+    MarkedNoUnwind = F->doesNotThrow();
+    SawFunc = true;
+  }
+
+  return MarkedNoUnwind;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table.  The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action.  The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action.  Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table.  Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const RangeMapType &PadMap,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
+  // The end label of the previous invoke or nounwind try-range.
+  MCSymbol *LastLabel = 0;
+
+  // Whether there is a potentially throwing instruction (currently this means
+  // an ordinary call) between the end of the previous try-range and now.
+  bool SawPotentiallyThrowing = false;
+
+  // Whether the last CallSite entry was for an invoke.
+  bool PreviousIsInvoke = false;
+
+  // Visit all instructions in order of address.
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+         MI != E; ++MI) {
+      if (!MI->isLabel()) {
+        if (MI->getDesc().isCall())
+          SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+        continue;
+      }
+
+      // End of the previous try-range?
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+      if (BeginLabel == LastLabel)
+        SawPotentiallyThrowing = false;
+
+      // Beginning of a new try-range?
+      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+      if (L == PadMap.end())
+        // Nope, it was just some random label.
+        continue;
+
+      const PadRange &P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+             "Inconsistent landing pad map!");
+
+      // For Dwarf exception handling (SjLj handling doesn't use this). If some
+      // instruction between the previous try-range and this one may throw,
+      // create a call-site entry with no landing pad for the region between the
+      // try-ranges.
+      if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+        CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+        CallSites.push_back(Site);
+        PreviousIsInvoke = false;
+      }
+
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+      assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+      if (!LandingPad->LandingPadLabel) {
+        // Create a gap.
+        PreviousIsInvoke = false;
+      } else {
+        // This try-range is for an invoke.
+        CallSiteEntry Site = {
+          BeginLabel,
+          LastLabel,
+          LandingPad->LandingPadLabel,
+          FirstActions[P.PadIndex]
+        };
+
+        // Try to merge with the previous call-site. SJLJ doesn't do this
+        if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+          CallSiteEntry &Prev = CallSites.back();
+          if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+            // Extend the range of the previous entry.
+            Prev.EndLabel = Site.EndLabel;
+            continue;
+          }
+        }
+
+        // Otherwise, create a new call-site.
+        if (Asm->MAI->isExceptionHandlingDwarf())
+          CallSites.push_back(Site);
+        else {
+          // SjLj EH must maintain the call sites in the order assigned
+          // to them by the SjLjPrepare pass.
+          unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+          if (CallSites.size() < SiteNo)
+            CallSites.resize(SiteNo);
+          CallSites[SiteNo - 1] = Site;
+        }
+        PreviousIsInvoke = true;
+      }
+    }
+  }
+
+  // If some instruction between the previous try-range and the end of the
+  // function may throw, create a call-site entry with no landing pad for the
+  // region following the try-range.
+  if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+    CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+    CallSites.push_back(Site);
+  }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy.  First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+///  1. The landing pad site information describes the range of code covered by
+///     the try.  In our case it's an accumulation of the ranges covered by the
+///     invokes in the try.  There is also a reference to the landing pad that
+///     handles the exception once processed.  Finally an index into the actions
+///     table.
+///  2. The action table, in our case, is composed of pairs of type IDs and next
+///     action offset.  Starting with the action index from the landing pad
+///     site, each type ID is checked for a match to the current exception.  If
+///     it matches then the exception and type id are passed on to the landing
+///     pad.  Otherwise the next action is looked up.  This chain is terminated
+///     with a next action of zero.  If no type id is found then the frame is
+///     unwound and handling continues.
+///  3. Type ID table contains references to all the C++ typeinfo for all
+///     catches in the function.  This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+  // Sort the landing pads in order of their type ids.  This is used to fold
+  // duplicate actions.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+
+  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+    LandingPads.push_back(&PadInfos[i]);
+
+  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+  // Compute the actions table and gather the first action index for each
+  // landing pad site.
+  SmallVector<ActionEntry, 32> Actions;
+  SmallVector<unsigned, 64> FirstActions;
+  unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+
+  // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+  // by try-range labels when lowered).  Ordinary calls do not, so appropriate
+  // try-ranges for them need be deduced when using DWARF exception handling.
+  RangeMapType PadMap;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LandingPad = LandingPads[i];
+    for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+      PadRange P = { i, j };
+      PadMap[BeginLabel] = P;
+    }
+  }
+
+  // Compute the call-site table.
+  SmallVector<CallSiteEntry, 64> CallSites;
+  ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+
+  // Final tallies.
+
+  // Call sites.
+  bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+  bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+
+  unsigned CallSiteTableLength;
+  if (IsSJLJ)
+    CallSiteTableLength = 0;
+  else {
+    unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4
+    unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+    unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+    CallSiteTableLength =
+      CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+  }
+
+  for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+    CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+    if (IsSJLJ)
+      CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
+  }
+
+  // Type infos.
+  const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+  unsigned TTypeEncoding;
+  unsigned TypeFormatSize;
+
+  if (!HaveTTData) {
+    // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+    // that we're omitting that bit.
+    TTypeEncoding = dwarf::DW_EH_PE_omit;
+    // dwarf::DW_EH_PE_absptr
+    TypeFormatSize = Asm->getTargetData().getPointerSize();
+  } else {
+    // Okay, we have actual filters or typeinfos to emit.  As such, we need to
+    // pick a type encoding for them.  We're about to emit a list of pointers to
+    // typeinfo objects at the end of the LSDA.  However, unless we're in static
+    // mode, this reference will require a relocation by the dynamic linker.
+    //
+    // Because of this, we have a couple of options:
+    //
+    //   1) If we are in -static mode, we can always use an absolute reference
+    //      from the LSDA, because the static linker will resolve it.
+    //
+    //   2) Otherwise, if the LSDA section is writable, we can output the direct
+    //      reference to the typeinfo and allow the dynamic linker to relocate
+    //      it.  Since it is in a writable section, the dynamic linker won't
+    //      have a problem.
+    //
+    //   3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+    //      we need to use some form of indirection.  For example, on Darwin,
+    //      we can output a statically-relocatable reference to a dyld stub. The
+    //      offset to the stub is constant, but the contents are in a section
+    //      that is updated by the dynamic linker.  This is easy enough, but we
+    //      need to tell the personality function of the unwinder to indirect
+    //      through the dyld stub.
+    //
+    // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+    // somewhere.  This predicate should be moved to a shared location that is
+    // in target-independent code.
+    //
+    TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+    TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
+  }
+
+  // Begin the exception table.
+  // Sometimes we want not to emit the data into separate section (e.g. ARM
+  // EHABI). In this case LSDASection will be NULL.
+  if (LSDASection)
+    Asm->OutStreamer.SwitchSection(LSDASection);
+  Asm->EmitAlignment(2);
+
+  // Emit the LSDA.
+  MCSymbol *GCCETSym =
+    Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+                                      Twine(Asm->getFunctionNumber()));
+  Asm->OutStreamer.EmitLabel(GCCETSym);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
+                                                Asm->getFunctionNumber()));
+
+  if (IsSJLJ)
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
+                                                  Asm->getFunctionNumber()));
+
+  // Emit the LSDA header.
+  Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+  Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+
+  // The type infos need to be aligned. GCC does this by inserting padding just
+  // before the type infos. However, this changes the size of the exception
+  // table, so you need to take this into account when you output the exception
+  // table size. However, the size is output using a variable length encoding.
+  // So by increasing the size by inserting padding, you may increase the number
+  // of bytes used for writing the size. If it increases, say by one byte, then
+  // you now need to output one less byte of padding to get the type infos
+  // aligned. However this decreases the size of the exception table. This
+  // changes the value you have to output for the exception table size. Due to
+  // the variable length encoding, the number of bytes used for writing the
+  // length may decrease. If so, you then have to increase the amount of
+  // padding. And so on. If you look carefully at the GCC code you will see that
+  // it indeed does this in a loop, going on and on until the values stabilize.
+  // We chose another solution: don't output padding inside the table like GCC
+  // does, instead output it before the table.
+  unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+  unsigned CallSiteTableLengthSize =
+    MCAsmInfo::getULEB128Size(CallSiteTableLength);
+  unsigned TTypeBaseOffset =
+    sizeof(int8_t) +                            // Call site format
+    CallSiteTableLengthSize +                   // Call site table length size
+    CallSiteTableLength +                       // Call site table length
+    SizeActions +                               // Actions size
+    SizeTypes;
+  unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+  unsigned TotalSize =
+    sizeof(int8_t) +                            // LPStart format
+    sizeof(int8_t) +                            // TType format
+    (HaveTTData ? TTypeBaseOffsetSize : 0) +    // TType base offset size
+    TTypeBaseOffset;                            // TType base offset
+  unsigned SizeAlign = (4 - TotalSize) & 3;
+
+  if (HaveTTData) {
+    // Account for any extra padding that will be added to the call site table
+    // length.
+    Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+    SizeAlign = 0;
+  }
+
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+  // SjLj Exception handling
+  if (IsSJLJ) {
+    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+    // Add extra padding if it wasn't added to the TType base offset.
+    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+    // Emit the landing pad site information.
+    unsigned idx = 0;
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+      const CallSiteEntry &S = *I;
+
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(idx) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  On exception at call site ") +
+                                    llvm::utostr(idx));
+
+        if (S.Action == 0)
+          Asm->OutStreamer.AddComment("  Action: cleanup");
+        else
+          Asm->OutStreamer.AddComment(Twine("  Action: ") +
+                                      llvm::utostr((S.Action - 1) / 2 + 1));
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      Asm->EmitULEB128(idx);
+
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128(S.Action);
+    }
+  } else {
+    // DWARF Exception handling
+    assert(Asm->MAI->isExceptionHandlingDwarf());
+
+    // The call-site table is a list of all call sites that may throw an
+    // exception (including C++ 'throw' statements) in the procedure
+    // fragment. It immediately follows the LSDA header. Each entry indicates,
+    // for a given call, the first corresponding action record and corresponding
+    // landing pad.
+    //
+    // The table begins with the number of bytes, stored as an LEB128
+    // compressed, unsigned integer. The records immediately follow the record
+    // count. They are sorted in increasing call-site address. Each record
+    // indicates:
+    //
+    //   * The position of the call-site.
+    //   * The position of the landing pad.
+    //   * The first action record for that call site.
+    //
+    // A missing entry in the call-site table indicates that a call is not
+    // supposed to throw.
+
+    // Emit the landing pad call site table.
+    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+    // Add extra padding if it wasn't added to the TType base offset.
+    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+    unsigned Entry = 0;
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+      const CallSiteEntry &S = *I;
+
+      MCSymbol *EHFuncBeginSym =
+        Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+
+      MCSymbol *BeginLabel = S.BeginLabel;
+      if (BeginLabel == 0)
+        BeginLabel = EHFuncBeginSym;
+      MCSymbol *EndLabel = S.EndLabel;
+      if (EndLabel == 0)
+        EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(++Entry) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  Call between ") +
+                                    BeginLabel->getName() + " and " +
+                                    EndLabel->getName());
+
+        if (!S.PadLabel) {
+          Asm->OutStreamer.AddComment("    has no landing pad");
+        } else {
+          Asm->OutStreamer.AddComment(Twine("    jumps to ") +
+                                      S.PadLabel->getName());
+
+          if (S.Action == 0)
+            Asm->OutStreamer.AddComment("  On action: cleanup");
+          else
+            Asm->OutStreamer.AddComment(Twine("  On action: ") +
+                                        llvm::utostr((S.Action - 1) / 2 + 1));
+        }
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
+      // Offset of the call site relative to the previous call site, counted in
+      // number of 16-byte bundles. The first call site is counted relative to
+      // the start of the procedure fragment.
+      Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+      Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      if (!S.PadLabel)
+        Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+      else
+        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128(S.Action);
+    }
+  }
+
+  // Emit the Action Table.
+  int Entry = 0;
+  for (SmallVectorImpl<ActionEntry>::const_iterator
+         I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+    const ActionEntry &Action = *I;
+
+    if (VerboseAsm) {
+      // Emit comments that decode the action table.
+      Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
+                                  llvm::utostr(++Entry) + " <<");
+      if (Action.ValueForTypeID >= 0)
+        Asm->OutStreamer.AddComment(Twine("  Catch TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+      else 
+        Asm->OutStreamer.AddComment(Twine("  Filter TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+
+      if (Action.NextAction == 0) {
+        Asm->OutStreamer.AddComment("  No further actions");
+      } else {
+        unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+        Asm->OutStreamer.AddComment(Twine("  Continue to action ") +
+                                    llvm::utostr(NextAction));
+      }
+
+      Asm->OutStreamer.AddBlankLine();
+    }
+
+    // Type Filter
+    //
+    //   Used by the runtime to match the type of the thrown exception to the
+    //   type of the catch clauses or the types in the exception specification.
+    Asm->EmitSLEB128(Action.ValueForTypeID);
+
+    // Action Record
+    //
+    //   Self-relative signed displacement in bytes of the next action record,
+    //   or 0 if there is no next action record.
+    Asm->EmitSLEB128(Action.NextAction);
+  }
+
+  // Emit the Catch TypeInfos.
+  if (VerboseAsm && !TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+    Asm->OutStreamer.AddBlankLine();
+    Entry = TypeInfos.size();
+  }
+
+  for (std::vector<const GlobalVariable *>::const_reverse_iterator
+         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+    const GlobalVariable *GV = *I;
+    if (VerboseAsm)
+      Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
+    if (GV)
+      Asm->EmitReference(GV, TTypeEncoding);
+    else
+      Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
+                                    0);
+  }
+
+  // Emit the Exception Specifications.
+  if (VerboseAsm && !FilterIds.empty()) {
+    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+    Asm->OutStreamer.AddBlankLine();
+    Entry = 0;
+  }
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+    unsigned TypeID = *I;
+    if (VerboseAsm) {
+      --Entry;
+      if (TypeID != 0)
+        Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+    }
+
+    Asm->EmitULEB128(TypeID);
+  }
+
+  Asm->EmitAlignment(2);
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+  assert(0 && "Should be implemented");
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfException::BeginFunction(const MachineFunction *MF) {
+  assert(0 && "Should be implemented");
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+  assert(0 && "Should be implemented");
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 000000000000..b5f86ab1b951
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,242 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include <vector>
+
+namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineMove;
+class MachineInstr;
+class MachineFunction;
+class MCAsmInfo;
+class MCExpr;
+class MCSymbol;
+class Function;
+class AsmPrinter;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class DwarfException {
+protected:
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  /// MMI - Collected machine module information.
+  MachineModuleInfo *MMI;
+
+  /// EmitExceptionTable - Emit landing pads and actions.
+  ///
+  /// The general organization of the table is complex, but the basic concepts
+  /// are easy.  First there is a header which describes the location and
+  /// organization of the three components that follow.
+  ///  1. The landing pad site information describes the range of code covered
+  ///     by the try.  In our case it's an accumulation of the ranges covered
+  ///     by the invokes in the try.  There is also a reference to the landing
+  ///     pad that handles the exception once processed.  Finally an index into
+  ///     the actions table.
+  ///  2. The action table, in our case, is composed of pairs of type ids
+  ///     and next action offset.  Starting with the action index from the
+  ///     landing pad site, each type Id is checked for a match to the current
+  ///     exception.  If it matches then the exception and type id are passed
+  ///     on to the landing pad.  Otherwise the next action is looked up.  This
+  ///     chain is terminated with a next action of zero.  If no type id is
+  ///     found the frame is unwound and handling continues.
+  ///  3. Type id table contains references to all the C++ typeinfo for all
+  ///     catches in the function.  This tables is reversed indexed base 1.
+
+  /// SharedTypeIds - How many leading type ids two landing pads have in common.
+  static unsigned SharedTypeIds(const LandingPadInfo *L,
+                                const LandingPadInfo *R);
+
+  /// PadLT - Order landing pads lexicographically by type id.
+  static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+  /// PadRange - Structure holding a try-range and the associated landing pad.
+  struct PadRange {
+    // The index of the landing pad.
+    unsigned PadIndex;
+    // The index of the begin and end labels in the landing pad's label lists.
+    unsigned RangeIndex;
+  };
+
+  typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+  /// ActionEntry - Structure describing an entry in the actions table.
+  struct ActionEntry {
+    int ValueForTypeID; // The value to write - may not be equal to the type id.
+    int NextAction;
+    unsigned Previous;
+  };
+
+  /// CallSiteEntry - Structure describing an entry in the call-site table.
+  struct CallSiteEntry {
+    // The 'try-range' is BeginLabel .. EndLabel.
+    MCSymbol *BeginLabel; // zero indicates the start of the function.
+    MCSymbol *EndLabel;   // zero indicates the end of the function.
+
+    // The landing pad starts at PadLabel.
+    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
+    unsigned Action;
+  };
+
+  /// ComputeActionsTable - Compute the actions table and gather the first
+  /// action index for each landing pad site.
+  unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+                               SmallVectorImpl<ActionEntry> &Actions,
+                               SmallVectorImpl<unsigned> &FirstActions);
+
+  /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+  /// marked `nounwind'. Return `false' otherwise.
+  bool CallToNoUnwindFunction(const MachineInstr *MI);
+
+  /// ComputeCallSiteTable - Compute the call-site table.  The entry for an
+  /// invoke has a try-range containing the call, a non-zero landing pad and an
+  /// appropriate action.  The entry for an ordinary call has a try-range
+  /// containing the call and zero for the landing pad and the action.  Calls
+  /// marked 'nounwind' have no entry and must not be contained in the try-range
+  /// of any entry - they form gaps in the table.  Entries must be ordered by
+  /// try-range address.
+  void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                            const RangeMapType &PadMap,
+                            const SmallVectorImpl<const LandingPadInfo *> &LPs,
+                            const SmallVectorImpl<unsigned> &FirstActions);
+  void EmitExceptionTable();
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfException(AsmPrinter *A);
+  virtual ~DwarfException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+  /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
+  /// should be emitted.
+  bool shouldEmitPersonality;
+
+  /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
+  /// should be emitted.
+  bool shouldEmitLSDA;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  AsmPrinter::CFIMoveType moveTypeModule;
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfCFIException(AsmPrinter *A);
+  virtual ~DwarfCFIException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class ARMException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  ARMException(AsmPrinter *A);
+  virtual ~ARMException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class Win64Exception : public DwarfException {
+  /// shouldEmitPersonality - Per-function flag to indicate if personality
+  /// info should be emitted.
+  bool shouldEmitPersonality;
+
+  /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
+  /// should be emitted.
+  bool shouldEmitLSDA;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  Win64Exception(AsmPrinter *A);
+  virtual ~Win64Exception();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 000000000000..115381767751
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+  class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+  public:
+    void beginAssembly(AsmPrinter &AP);
+    void finishAssembly(AsmPrinter &AP);
+  };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+  const std::string &MId = M.getModuleIdentifier();
+
+  std::string SymName;
+  SymName += "caml";
+  size_t Letter = SymName.size();
+  SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+  SymName += "__";
+  SymName += Id;
+
+  // Capitalize the first letter of the module name.
+  SymName[Letter] = toupper(SymName[Letter]);
+
+  SmallString<128> TmpStr;
+  AP.Mang->getNameWithPrefix(TmpStr, SymName);
+
+  MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+
+  AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+  AP.OutStreamer.EmitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), AP, "code_begin");
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+///   extern "C" struct align(sizeof(intptr_t)) {
+///     uint16_t NumDescriptors;
+///     struct align(sizeof(intptr_t)) {
+///       void *ReturnAddress;
+///       uint16_t FrameSize;
+///       uint16_t NumLiveOffsets;
+///       uint16_t LiveOffsets[NumLiveOffsets];
+///     } Descriptors[NumDescriptors];
+///   } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+  unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), AP, "code_end");
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), AP, "data_end");
+
+  // FIXME: Why does ocaml emit this??
+  AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0);
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), AP, "frametable");
+
+  int NumDescriptors = 0;
+  for (iterator I = begin(), IE = end(); I != IE; ++I) {
+    GCFunctionInfo &FI = **I;
+    for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+      NumDescriptors++;
+    }
+  }
+
+  if (NumDescriptors >= 1<<16) {
+    // Very rude!
+    report_fatal_error(" Too much descriptor for ocaml GC");
+  }
+  AP.EmitInt16(NumDescriptors);
+  AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+  for (iterator I = begin(), IE = end(); I != IE; ++I) {
+    GCFunctionInfo &FI = **I;
+
+    uint64_t FrameSize = FI.getFrameSize();
+    if (FrameSize >= 1<<16) {
+      // Very rude!
+      report_fatal_error("Function '" + FI.getFunction().getName() +
+                         "' is too large for the ocaml GC! "
+                         "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+                         "(" + Twine(uintptr_t(&FI)) + ")");
+    }
+
+    AP.OutStreamer.AddComment("live roots for " +
+                              Twine(FI.getFunction().getName()));
+    AP.OutStreamer.AddBlankLine();
+
+    for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+      size_t LiveCount = FI.live_size(J);
+      if (LiveCount >= 1<<16) {
+        // Very rude!
+        report_fatal_error("Function '" + FI.getFunction().getName() +
+                           "' is too large for the ocaml GC! "
+                           "Live root count "+Twine(LiveCount)+" >= 65536.");
+      }
+
+      AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
+      AP.EmitInt16(FrameSize);
+      AP.EmitInt16(LiveCount);
+
+      for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+                                         KE = FI.live_end(J); K != KE; ++K) {
+        if (K->StackOffset >= 1<<16) {
+          // Very rude!
+          report_fatal_error(
+                 "GC root stack offset is outside of fixed stack frame and out "
+                 "of range for ocaml GC!");
+        }
+        AP.EmitInt16(K->StackOffset);
+      }
+
+      AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+    }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
new file mode 100644
index 000000000000..c2ad5eba3b3c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -0,0 +1,116 @@
+//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+Win64Exception::Win64Exception(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
+    {}
+
+Win64Exception::~Win64Exception() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void Win64Exception::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void Win64Exception::BeginFunction(const MachineFunction *MF) {
+  shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+  // If any landing pads survive, we need an EH table.
+  bool hasLandingPads = !MMI->getLandingPads().empty();
+
+  shouldEmitMoves = Asm->needsSEHMoves();
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+  shouldEmitPersonality = hasLandingPads &&
+    PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  shouldEmitLSDA = shouldEmitPersonality &&
+    LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
+
+  Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+
+  if (!shouldEmitPersonality)
+    return;
+
+  MCSymbol *GCCHandlerSym =
+    Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
+  Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void Win64Exception::EndFunction() {
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (shouldEmitPersonality) {
+    const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+    const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+    const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+
+    Asm->OutStreamer.PushSection();
+    Asm->OutStreamer.EmitWin64EHHandlerData();
+    Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
+                               4);
+    EmitExceptionTable();
+    Asm->OutStreamer.PopSection();
+  }
+  Asm->OutStreamer.EmitWin64EHEndProc();
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 000000000000..99090a8269d4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1671 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block.  This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist     , "Number of times common instructions are hoisted");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+                              cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+          cl::desc("Max number of predecessors to consider tail merging"),
+          cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+          cl::desc("Min number of instructions to consider tail merging"),
+                              cl::init(3), cl::Hidden);
+
+namespace {
+  /// BranchFolderPass - Wrap branch folder in a machine function pass.
+  class BranchFolderPass : public MachineFunctionPass,
+                           public BranchFolder {
+  public:
+    static char ID;
+    explicit BranchFolderPass(bool defaultEnableTailMerge)
+      : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+  };
+}
+
+char BranchFolderPass::ID = 0;
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+  return new BranchFolderPass(DefaultEnableTailMerge);
+}
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+  return OptimizeFunction(MF,
+                          MF.getTarget().getInstrInfo(),
+                          MF.getTarget().getRegisterInfo(),
+                          getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
+  switch (FlagEnableTailMerge) {
+  case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+  case cl::BOU_TRUE: EnableTailMerge = true; break;
+  case cl::BOU_FALSE: EnableTailMerge = false; break;
+  }
+
+  EnableHoistCommonCode = CommonHoist;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+  assert(MBB->pred_empty() && "MBB must be dead!");
+  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+  MachineFunction *MF = MBB->getParent();
+  // drop all successors.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_end()-1);
+
+  // Avoid matching if this pointer gets reused.
+  TriedMerging.erase(MBB);
+
+  // Remove the block.
+  MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+///   r0 = implicit_def
+///   r1 = implicit_def
+///   br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+  SmallSet<unsigned, 4> ImpDefRegs;
+  MachineBasicBlock::iterator I = MBB->begin();
+  while (I != MBB->end()) {
+    if (!I->isImplicitDef())
+      break;
+    unsigned Reg = I->getOperand(0).getReg();
+    ImpDefRegs.insert(Reg);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs)
+      ImpDefRegs.insert(SubReg);
+    ++I;
+  }
+  if (ImpDefRegs.empty())
+    return false;
+
+  MachineBasicBlock::iterator FirstTerm = I;
+  while (I != MBB->end()) {
+    if (!TII->isUnpredicatedTerminator(I))
+      return false;
+    // See if it uses any of the implicitly defined registers.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (ImpDefRegs.count(Reg))
+        return false;
+    }
+    ++I;
+  }
+
+  I = MBB->begin();
+  while (I != FirstTerm) {
+    MachineInstr *ImpDefMI = &*I;
+    ++I;
+    MBB->erase(ImpDefMI);
+  }
+
+  return true;
+}
+
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+                                    const TargetInstrInfo *tii,
+                                    const TargetRegisterInfo *tri,
+                                    MachineModuleInfo *mmi) {
+  if (!tii) return false;
+
+  TriedMerging.clear();
+
+  TII = tii;
+  TRI = tri;
+  MMI = mmi;
+
+  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+  // Fix CFG.  The later algorithms expect it to be right.
+  bool MadeChange = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+    MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 4> Cond;
+    if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+      MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+    MadeChange |= OptimizeImpDefsBlock(MBB);
+  }
+
+  bool MadeChangeThisIteration = true;
+  while (MadeChangeThisIteration) {
+    MadeChangeThisIteration    = TailMergeBlocks(MF);
+    MadeChangeThisIteration   |= OptimizeBranches(MF);
+    if (EnableHoistCommonCode)
+      MadeChangeThisIteration |= HoistCommonCode(MF);
+    MadeChange |= MadeChangeThisIteration;
+  }
+
+  // See if any jump tables have become dead as the code generator
+  // did its thing.
+  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+  if (JTI == 0) {
+    delete RS;
+    return MadeChange;
+  }
+  
+  // Walk the function to find jump tables that are live.
+  BitVector JTIsLive(JTI->getJumpTables().size());
+  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+       BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I)
+      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+        MachineOperand &Op = I->getOperand(op);
+        if (!Op.isJTI()) continue;
+
+        // Remember that this JT is live.
+        JTIsLive.set(Op.getIndex());
+      }
+  }
+
+  // Finally, remove dead jump tables.  This happens when the
+  // indirect jump was unreachable (and thus deleted).
+  for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+    if (!JTIsLive.test(i)) {
+      JTI->RemoveJumpTable(i);
+      MadeChange = true;
+    }
+
+  delete RS;
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+  unsigned Hash = MI->getOpcode();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+
+    // Merge in bits from the operand if easy.
+    unsigned OperandHash = 0;
+    switch (Op.getType()) {
+    case MachineOperand::MO_Register:          OperandHash = Op.getReg(); break;
+    case MachineOperand::MO_Immediate:         OperandHash = Op.getImm(); break;
+    case MachineOperand::MO_MachineBasicBlock:
+      OperandHash = Op.getMBB()->getNumber();
+      break;
+    case MachineOperand::MO_FrameIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_JumpTableIndex:
+      OperandHash = Op.getIndex();
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      // Global address / external symbol are too hard, don't bother, but do
+      // pull in the offset.
+      OperandHash = Op.getOffset();
+      break;
+    default: break;
+    }
+
+    Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+  }
+  return Hash;
+}
+
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
+  MachineBasicBlock::const_iterator I = MBB->end();
+  if (I == MBB->begin())
+    return 0;   // Empty MBB.
+
+  --I;
+  // Skip debug info so it will not affect codegen.
+  while (I->isDebugValue()) {
+    if (I==MBB->begin())
+      return 0;      // MBB empty except for debug info.
+    --I;
+  }
+
+  return HashMachineInstr(I);
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end.  Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+                                        MachineBasicBlock *MBB2,
+                                        MachineBasicBlock::iterator &I1,
+                                        MachineBasicBlock::iterator &I2) {
+  I1 = MBB1->end();
+  I2 = MBB2->end();
+
+  unsigned TailLen = 0;
+  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+    --I1; --I2;
+    // Skip debugging pseudos; necessary to avoid changing the code.
+    while (I1->isDebugValue()) {
+      if (I1==MBB1->begin()) {
+        while (I2->isDebugValue()) {
+          if (I2==MBB2->begin())
+            // I1==DBG at begin; I2==DBG at begin
+            return TailLen;
+          --I2;
+        }
+        ++I2;
+        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
+        return TailLen;
+      }
+      --I1;
+    }
+    // I1==first (untested) non-DBG preceding known match
+    while (I2->isDebugValue()) {
+      if (I2==MBB2->begin()) {
+        ++I1;
+        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
+        return TailLen;
+      }
+      --I2;
+    }
+    // I1, I2==first (untested) non-DBGs preceding known match
+    if (!I1->isIdenticalTo(I2) ||
+        // FIXME: This check is dubious. It's used to get around a problem where
+        // people incorrectly expect inline asm directives to remain in the same
+        // relative order. This is untenable because normal compiler
+        // optimizations (like this one) may reorder and/or merge these
+        // directives.
+        I1->isInlineAsm()) {
+      ++I1; ++I2;
+      break;
+    }
+    ++TailLen;
+  }
+  // Back past possible debugging pseudos at beginning of block.  This matters
+  // when one block differs from the other only by whether debugging pseudos
+  // are present at the beginning.  (This way, the various checks later for
+  // I1==MBB1->begin() work as expected.)
+  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
+    --I2;
+    while (I2->isDebugValue()) {
+      if (I2 == MBB2->begin()) {
+        return TailLen;
+        }
+      --I2;
+    }
+    ++I2;
+  }
+  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
+    --I1;
+    while (I1->isDebugValue()) {
+      if (I1 == MBB1->begin())
+        return TailLen;
+      --I1;
+    }
+    ++I1;
+  }
+  return TailLen;
+}
+
+void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
+                                   MachineBasicBlock *NewMBB) {
+  if (RS) {
+    RS->enterBasicBlock(CurMBB);
+    if (!CurMBB->empty())
+      RS->forward(prior(CurMBB->end()));
+    BitVector RegsLiveAtExit(TRI->getNumRegs());
+    RS->getRegsUsed(RegsLiveAtExit, false);
+    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+      if (RegsLiveAtExit[i])
+        NewMBB->addLiveIn(i);
+  }
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                           MachineBasicBlock *NewDest) {
+  MachineBasicBlock *CurMBB = OldInst->getParent();
+
+  TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+  // For targets that use the register scavenger, we must maintain LiveIns.
+  MaintainLiveIns(CurMBB, NewDest);
+
+  ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator.  This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+                                            MachineBasicBlock::iterator BBI1) {
+  if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+    return 0;
+
+  MachineFunction &MF = *CurMBB.getParent();
+
+  // Create the fall-through block.
+  MachineFunction::iterator MBBI = &CurMBB;
+  MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+  CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+  // Move all the successors of this block to the specified block.
+  NewMBB->transferSuccessors(&CurMBB);
+
+  // Add an edge from CurMBB to NewMBB for the fall-through.
+  CurMBB.addSuccessor(NewMBB);
+
+  // Splice the code over.
+  NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+  // For targets that use the register scavenger, we must maintain LiveIns.
+  MaintainLiveIns(&CurMBB, NewMBB);
+
+  return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+                                MachineBasicBlock::iterator E) {
+  unsigned Time = 0;
+  for (; I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isCall())
+      Time += 10;
+    else if (MCID.mayLoad() || MCID.mayStore())
+      Time += 2;
+    else
+      ++Time;
+  }
+  return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging).  In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+                    const TargetInstrInfo *TII) {
+  MachineFunction *MF = CurMBB->getParent();
+  MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  DebugLoc dl;  // FIXME: this is nowhere
+  if (I != MF->end() &&
+      !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+    MachineBasicBlock *NextBB = I;
+    if (TBB == NextBB && !Cond.empty() && !FBB) {
+      if (!TII->ReverseBranchCondition(Cond)) {
+        TII->RemoveBranch(*CurMBB);
+        TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
+        return;
+      }
+    }
+  }
+  TII->InsertBranch(*CurMBB, SuccBB, NULL,
+                    SmallVector<MachineOperand, 0>(), dl);
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+  if (getHash() < o.getHash())
+    return true;
+   else if (getHash() > o.getHash())
+    return false;
+  else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+    return true;
+  else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+    return false;
+  else {
+    // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+    // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+    llvm_unreachable("Predecessor appears twice");
+#endif
+    return false;
+  }
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+                                 MachineBasicBlock::iterator &I) {
+  I = MBB->end();
+  unsigned NumTerms = 0;
+  for (;;) {
+    if (I == MBB->begin()) {
+      I = MBB->end();
+      break;
+    }
+    --I;
+    if (!I->getDesc().isTerminator()) break;
+    ++NumTerms;
+  }
+  return NumTerms;
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails.  Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool ProfitableToMerge(MachineBasicBlock *MBB1,
+                              MachineBasicBlock *MBB2,
+                              unsigned minCommonTailLength,
+                              unsigned &CommonTailLen,
+                              MachineBasicBlock::iterator &I1,
+                              MachineBasicBlock::iterator &I2,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB) {
+  CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+  if (CommonTailLen == 0)
+    return false;
+  DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+               << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+               << '\n');
+
+  // It's almost always profitable to merge any number of non-terminator
+  // instructions with the block that falls through into the common successor.
+  if (MBB1 == PredBB || MBB2 == PredBB) {
+    MachineBasicBlock::iterator I;
+    unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+    if (CommonTailLen > NumTerms)
+      return true;
+  }
+
+  // If one of the blocks can be completely merged and happens to be in
+  // a position where the other could fall through into it, merge any number
+  // of instructions, because it can be done without a branch.
+  // TODO: If the blocks are not adjacent, move one of them so that they are?
+  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+    return true;
+  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+    return true;
+
+  // If both blocks have an unconditional branch temporarily stripped out,
+  // count that as an additional common instruction for the following
+  // heuristics.
+  unsigned EffectiveTailLen = CommonTailLen;
+  if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+      !MBB1->back().getDesc().isBarrier() &&
+      !MBB2->back().getDesc().isBarrier())
+    ++EffectiveTailLen;
+
+  // Check if the common tail is long enough to be worthwhile.
+  if (EffectiveTailLen >= minCommonTailLength)
+    return true;
+
+  // If we are optimizing for code size, 2 instructions in common is enough if
+  // we don't have to split a block.  At worst we will be introducing 1 new
+  // branch instruction, which is likely to be smaller than the 2
+  // instructions that would be deleted in the merge.
+  MachineFunction *MF = MBB1->getParent();
+  if (EffectiveTailLen >= 2 &&
+      MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+      (I1 == MBB1->begin() || I2 == MBB2->begin()))
+    return true;
+
+  return false;
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element).  Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks.  SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+                                        unsigned minCommonTailLength,
+                                        MachineBasicBlock *SuccBB,
+                                        MachineBasicBlock *PredBB) {
+  unsigned maxCommonTailLength = 0U;
+  SameTails.clear();
+  MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+  MPIterator HighestMPIter = prior(MergePotentials.end());
+  for (MPIterator CurMPIter = prior(MergePotentials.end()),
+                  B = MergePotentials.begin();
+       CurMPIter != B && CurMPIter->getHash() == CurHash;
+       --CurMPIter) {
+    for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
+      unsigned CommonTailLen;
+      if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+                            minCommonTailLength,
+                            CommonTailLen, TrialBBI1, TrialBBI2,
+                            SuccBB, PredBB)) {
+        if (CommonTailLen > maxCommonTailLength) {
+          SameTails.clear();
+          maxCommonTailLength = CommonTailLen;
+          HighestMPIter = CurMPIter;
+          SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+        }
+        if (HighestMPIter == CurMPIter &&
+            CommonTailLen == maxCommonTailLength)
+          SameTails.push_back(SameTailElt(I, TrialBBI2));
+      }
+      if (I == B)
+        break;
+    }
+  }
+  return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+                                        MachineBasicBlock *SuccBB,
+                                        MachineBasicBlock *PredBB) {
+  MPIterator CurMPIter, B;
+  for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+       CurMPIter->getHash() == CurHash;
+       --CurMPIter) {
+    // Put the unconditional branch back, if we need one.
+    MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+    if (SuccBB && CurMBB != PredBB)
+      FixTail(CurMBB, SuccBB, TII);
+    if (CurMPIter == B)
+      break;
+  }
+  if (CurMPIter->getHash() != CurHash)
+    CurMPIter++;
+  MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail.  Create a block that does by splitting one.
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                             unsigned maxCommonTailLength,
+                                             unsigned &commonTailIndex) {
+  commonTailIndex = 0;
+  unsigned TimeEstimate = ~0U;
+  for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+    // Use PredBB if possible; that doesn't require a new branch.
+    if (SameTails[i].getBlock() == PredBB) {
+      commonTailIndex = i;
+      break;
+    }
+    // Otherwise, make a (fairly bogus) choice based on estimate of
+    // how long it will take the various blocks to execute.
+    unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+                                 SameTails[i].getTailStartPos());
+    if (t <= TimeEstimate) {
+      TimeEstimate = t;
+      commonTailIndex = i;
+    }
+  }
+
+  MachineBasicBlock::iterator BBI =
+    SameTails[commonTailIndex].getTailStartPos();
+  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+  // If the common tail includes any debug info we will take it pretty
+  // randomly from one of the inputs.  Might be better to remove it?
+  DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+               << maxCommonTailLength);
+
+  MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+  if (!newMBB) {
+    DEBUG(dbgs() << "... failed!");
+    return false;
+  }
+
+  SameTails[commonTailIndex].setBlock(newMBB);
+  SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+  // If we split PredBB, newMBB is the new predecessor.
+  if (PredBB == MBB)
+    PredBB = newMBB;
+
+  return true;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged.  If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+                                      MachineBasicBlock *PredBB) {
+  bool MadeChange = false;
+
+  // Except for the special cases below, tail-merge if there are at least
+  // this many instructions in common.
+  unsigned minCommonTailLength = TailMergeSize;
+
+  DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+                 << (i == e-1 ? "" : ", ");
+        dbgs() << "\n";
+        if (SuccBB) {
+          dbgs() << "  with successor BB#" << SuccBB->getNumber() << '\n';
+          if (PredBB)
+            dbgs() << "  which has fall-through from BB#"
+                   << PredBB->getNumber() << "\n";
+        }
+        dbgs() << "Looking for common tails of at least "
+               << minCommonTailLength << " instruction"
+               << (minCommonTailLength == 1 ? "" : "s") << '\n';
+       );
+
+  // Sort by hash value so that blocks with identical end sequences sort
+  // together.
+  std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+
+  // Walk through equivalence sets looking for actual exact matches.
+  while (MergePotentials.size() > 1) {
+    unsigned CurHash = MergePotentials.back().getHash();
+
+    // Build SameTails, identifying the set of blocks with this hash code
+    // and with the maximum number of instructions in common.
+    unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+                                                    minCommonTailLength,
+                                                    SuccBB, PredBB);
+
+    // If we didn't find any pair that has at least minCommonTailLength
+    // instructions in common, remove all blocks with this hash code and retry.
+    if (SameTails.empty()) {
+      RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+      continue;
+    }
+
+    // If one of the blocks is the entire common tail (and not the entry
+    // block, which we can't jump to), we can treat all blocks with this same
+    // tail at once.  Use PredBB if that is one of the possibilities, as that
+    // will not introduce any extra branches.
+    MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+                                 getParent()->begin();
+    unsigned commonTailIndex = SameTails.size();
+    // If there are two blocks, check to see if one can be made to fall through
+    // into the other.
+    if (SameTails.size() == 2 &&
+        SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+        SameTails[1].tailIsWholeBlock())
+      commonTailIndex = 1;
+    else if (SameTails.size() == 2 &&
+             SameTails[1].getBlock()->isLayoutSuccessor(
+                                                     SameTails[0].getBlock()) &&
+             SameTails[0].tailIsWholeBlock())
+      commonTailIndex = 0;
+    else {
+      // Otherwise just pick one, favoring the fall-through predecessor if
+      // there is one.
+      for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+        MachineBasicBlock *MBB = SameTails[i].getBlock();
+        if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+          continue;
+        if (MBB == PredBB) {
+          commonTailIndex = i;
+          break;
+        }
+        if (SameTails[i].tailIsWholeBlock())
+          commonTailIndex = i;
+      }
+    }
+
+    if (commonTailIndex == SameTails.size() ||
+        (SameTails[commonTailIndex].getBlock() == PredBB &&
+         !SameTails[commonTailIndex].tailIsWholeBlock())) {
+      // None of the blocks consist entirely of the common tail.
+      // Split a block so that one does.
+      if (!CreateCommonTailOnlyBlock(PredBB,
+                                     maxCommonTailLength, commonTailIndex)) {
+        RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+        continue;
+      }
+    }
+
+    MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+    // MBB is common tail.  Adjust all other BB's to jump to this one.
+    // Traversal must be forwards so erases work.
+    DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+                 << " for ");
+    for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+      if (commonTailIndex == i)
+        continue;
+      DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
+                   << (i == e-1 ? "" : ", "));
+      // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+      ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+      // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+      MergePotentials.erase(SameTails[i].getMPIter());
+    }
+    DEBUG(dbgs() << "\n");
+    // We leave commonTailIndex in the worklist in case there are other blocks
+    // that match it with a smaller number of instructions.
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+  if (!EnableTailMerge) return false;
+
+  bool MadeChange = false;
+
+  // First find blocks with no successors.
+  MergePotentials.clear();
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+    if (TriedMerging.count(I))
+      continue;
+    if (I->succ_empty())
+      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+  }
+
+  // If this is a large problem, avoid visiting the same basic blocks
+  // multiple times.
+  if (MergePotentials.size() == TailMergeThreshold)
+    for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+      TriedMerging.insert(MergePotentials[i].getBlock());
+  // See if we can do any tail merging on those.
+  if (MergePotentials.size() >= 2)
+    MadeChange |= TryTailMergeBlocks(NULL, NULL);
+
+  // Look at blocks (IBB) with multiple predecessors (PBB).
+  // We change each predecessor to a canonical form, by
+  // (1) temporarily removing any unconditional branch from the predecessor
+  // to IBB, and
+  // (2) alter conditional branches so they branch to the other block
+  // not IBB; this may require adding back an unconditional branch to IBB
+  // later, where there wasn't one coming in.  E.g.
+  //   Bcc IBB
+  //   fallthrough to QBB
+  // here becomes
+  //   Bncc QBB
+  // with a conceptual B to IBB after that, which never actually exists.
+  // With those changes, we see whether the predecessors' tails match,
+  // and merge them if so.  We change things out of canonical form and
+  // back to the way they were later in the process.  (OptimizeBranches
+  // would undo some of this, but we can't use it, because we'd get into
+  // a compile-time infinite loop repeatedly doing and undoing the same
+  // transformations.)
+
+  for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+       I != E; ++I) {
+    if (I->pred_size() >= 2) {
+      SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+      MachineBasicBlock *IBB = I;
+      MachineBasicBlock *PredBB = prior(I);
+      MergePotentials.clear();
+      for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+                                            E2 = I->pred_end();
+           P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
+        MachineBasicBlock *PBB = *P;
+        if (TriedMerging.count(PBB))
+          continue;
+        // Skip blocks that loop to themselves, can't tail merge these.
+        if (PBB == IBB)
+          continue;
+        // Visit each predecessor only once.
+        if (!UniquePreds.insert(PBB))
+          continue;
+        MachineBasicBlock *TBB = 0, *FBB = 0;
+        SmallVector<MachineOperand, 4> Cond;
+        if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+          // Failing case:  IBB is the target of a cbr, and
+          // we cannot reverse the branch.
+          SmallVector<MachineOperand, 4> NewCond(Cond);
+          if (!Cond.empty() && TBB == IBB) {
+            if (TII->ReverseBranchCondition(NewCond))
+              continue;
+            // This is the QBB case described above
+            if (!FBB)
+              FBB = llvm::next(MachineFunction::iterator(PBB));
+          }
+          // Failing case:  the only way IBB can be reached from PBB is via
+          // exception handling.  Happens for landing pads.  Would be nice
+          // to have a bit in the edge so we didn't have to do all this.
+          if (IBB->isLandingPad()) {
+            MachineFunction::iterator IP = PBB;  IP++;
+            MachineBasicBlock *PredNextBB = NULL;
+            if (IP != MF.end())
+              PredNextBB = IP;
+            if (TBB == NULL) {
+              if (IBB != PredNextBB)      // fallthrough
+                continue;
+            } else if (FBB) {
+              if (TBB != IBB && FBB != IBB)   // cbr then ubr
+                continue;
+            } else if (Cond.empty()) {
+              if (TBB != IBB)               // ubr
+                continue;
+            } else {
+              if (TBB != IBB && IBB != PredNextBB)  // cbr
+                continue;
+            }
+          }
+          // Remove the unconditional branch at the end, if any.
+          if (TBB && (Cond.empty() || FBB)) {
+            DebugLoc dl;  // FIXME: this is nowhere
+            TII->RemoveBranch(*PBB);
+            if (!Cond.empty())
+              // reinsert conditional branch only, for now
+              TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+          }
+          MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+        }
+      }
+      // If this is a large problem, avoid visiting the same basic blocks
+      // multiple times.
+      if (MergePotentials.size() == TailMergeThreshold)
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          TriedMerging.insert(MergePotentials[i].getBlock());
+      if (MergePotentials.size() >= 2)
+        MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+      // Reinsert an unconditional branch if needed.
+      // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+      PredBB = prior(I);      // this may have been changed in TryTailMergeBlocks
+      if (MergePotentials.size() == 1 &&
+          MergePotentials.begin()->getBlock() != PredBB)
+        FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+    }
+  }
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // Make sure blocks are numbered in order
+  MF.RenumberBlocks();
+
+  for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+       I != E; ) {
+    MachineBasicBlock *MBB = I++;
+    MadeChange |= OptimizeBlock(MBB);
+
+    // If it is dead, remove it.
+    if (MBB->pred_empty()) {
+      RemoveDeadBlock(MBB);
+      MadeChange = true;
+      ++NumDeadBlocks;
+    }
+  }
+  return MadeChange;
+}
+
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+  if (MBB->empty())
+    return true;
+  for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+       MBBI!=MBBE; ++MBBI) {
+    if (!MBBI->isDebugValue())
+      return false;
+  }
+  return true;
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+  MachineBasicBlock::iterator MBBI, MBBE;
+  for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
+    if (!MBBI->isDebugValue())
+      break;
+  }
+  return (MBBI->getDesc().isBranch());
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2.  This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+                                MachineBasicBlock *MBB2) {
+  // Right now, we use a simple heuristic.  If MBB2 ends with a call, and
+  // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
+  // optimize branches that branch to either a return block or an assert block
+  // into a fallthrough to the return.
+  if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
+
+  // If there is a clear successor ordering we make sure that one block
+  // will fall through to the next
+  if (MBB1->isSuccessor(MBB2)) return true;
+  if (MBB2->isSuccessor(MBB1)) return false;
+
+  // Neither block consists entirely of debug info (per IsEmptyBlock check),
+  // so we needn't test for falling off the beginning here.
+  MachineBasicBlock::iterator MBB1I = --MBB1->end();
+  while (MBB1I->isDebugValue())
+    --MBB1I;
+  MachineBasicBlock::iterator MBB2I = --MBB2->end();
+  while (MBB2I->isDebugValue())
+    --MBB2I;
+  return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block.  This is never called on the entry block.
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+  bool MadeChange = false;
+  MachineFunction &MF = *MBB->getParent();
+  DebugLoc dl;  // FIXME: this is nowhere
+ReoptimizeBlock:
+
+  MachineFunction::iterator FallThrough = MBB;
+  ++FallThrough;
+
+  // If this block is empty, make everyone use its fall-through, not the block
+  // explicitly.  Landing pads should not do this since the landing-pad table
+  // points to this block.  Blocks with their addresses taken shouldn't be
+  // optimized away.
+  if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+    // Dead block?  Leave for cleanup later.
+    if (MBB->pred_empty()) return MadeChange;
+
+    if (FallThrough == MF.end()) {
+      // TODO: Simplify preds to not branch here if possible!
+    } else {
+      // Rewrite all predecessors of the old block to go to the fallthrough
+      // instead.
+      while (!MBB->pred_empty()) {
+        MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+        Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+      }
+      // If MBB was the target of a jump table, update jump tables to go to the
+      // fallthrough instead.
+      if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+        MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+      MadeChange = true;
+    }
+    return MadeChange;
+  }
+
+  // Check to see if we can simplify the terminator of the block before this
+  // one.
+  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  SmallVector<MachineOperand, 4> PriorCond;
+  bool PriorUnAnalyzable =
+    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+  if (!PriorUnAnalyzable) {
+    // If the CFG for the prior block has extra edges, remove them.
+    MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+                                              !PriorCond.empty());
+
+    // If the previous branch is conditional and both conditions go to the same
+    // destination, remove the branch, replacing it with an unconditional one or
+    // a fall-through.
+    if (PriorTBB && PriorTBB == PriorFBB) {
+      TII->RemoveBranch(PrevBB);
+      PriorCond.clear();
+      if (PriorTBB != MBB)
+        TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+      MadeChange = true;
+      ++NumBranchOpts;
+      goto ReoptimizeBlock;
+    }
+
+    // If the previous block unconditionally falls through to this block and
+    // this block has no other predecessors, move the contents of this block
+    // into the prior block. This doesn't usually happen when SimplifyCFG
+    // has been used, but it can happen if tail merging splits a fall-through
+    // predecessor of a block.
+    // This has to check PrevBB->succ_size() because EH edges are ignored by
+    // AnalyzeBranch.
+    if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+        PrevBB.succ_size() == 1 &&
+        !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
+      DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+                   << "From MBB: " << *MBB);
+      // Remove redundant DBG_VALUEs first.
+      if (PrevBB.begin() != PrevBB.end()) {
+        MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+        --PrevBBIter;
+        MachineBasicBlock::iterator MBBIter = MBB->begin();
+        // Check if DBG_VALUE at the end of PrevBB is identical to the 
+        // DBG_VALUE at the beginning of MBB.
+        while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+               && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+          if (!MBBIter->isIdenticalTo(PrevBBIter))
+            break;
+          MachineInstr *DuplicateDbg = MBBIter;
+          ++MBBIter; -- PrevBBIter;
+          DuplicateDbg->eraseFromParent();
+        }
+      }
+      PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+      PrevBB.removeSuccessor(PrevBB.succ_begin());;
+      assert(PrevBB.succ_empty());
+      PrevBB.transferSuccessors(MBB);
+      MadeChange = true;
+      return MadeChange;
+    }
+
+    // If the previous branch *only* branches to *this* block (conditional or
+    // not) remove the branch.
+    if (PriorTBB == MBB && PriorFBB == 0) {
+      TII->RemoveBranch(PrevBB);
+      MadeChange = true;
+      ++NumBranchOpts;
+      goto ReoptimizeBlock;
+    }
+
+    // If the prior block branches somewhere else on the condition and here if
+    // the condition is false, remove the uncond second branch.
+    if (PriorFBB == MBB) {
+      TII->RemoveBranch(PrevBB);
+      TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+      MadeChange = true;
+      ++NumBranchOpts;
+      goto ReoptimizeBlock;
+    }
+
+    // If the prior block branches here on true and somewhere else on false, and
+    // if the branch condition is reversible, reverse the branch to create a
+    // fall-through.
+    if (PriorTBB == MBB) {
+      SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+      if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        TII->RemoveBranch(PrevBB);
+        TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
+        MadeChange = true;
+        ++NumBranchOpts;
+        goto ReoptimizeBlock;
+      }
+    }
+
+    // If this block has no successors (e.g. it is a return block or ends with
+    // a call to a no-return function like abort or __cxa_throw) and if the pred
+    // falls through into this block, and if it would otherwise fall through
+    // into the block after this, move this block to the end of the function.
+    //
+    // We consider it more likely that execution will stay in the function (e.g.
+    // due to loops) than it is to exit it.  This asserts in loops etc, moving
+    // the assert condition out of the loop body.
+    if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
+        MachineFunction::iterator(PriorTBB) == FallThrough &&
+        !MBB->canFallThrough()) {
+      bool DoTransform = true;
+
+      // We have to be careful that the succs of PredBB aren't both no-successor
+      // blocks.  If neither have successors and if PredBB is the second from
+      // last block in the function, we'd just keep swapping the two blocks for
+      // last.  Only do the swap if one is clearly better to fall through than
+      // the other.
+      if (FallThrough == --MF.end() &&
+          !IsBetterFallthrough(PriorTBB, MBB))
+        DoTransform = false;
+
+      if (DoTransform) {
+        // Reverse the branch so we will fall through on the previous true cond.
+        SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+        if (!TII->ReverseBranchCondition(NewPriorCond)) {
+          DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+                       << "To make fallthrough to: " << *PriorTBB << "\n");
+
+          TII->RemoveBranch(PrevBB);
+          TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
+
+          // Move this block to the end of the function.
+          MBB->moveAfter(--MF.end());
+          MadeChange = true;
+          ++NumBranchOpts;
+          return MadeChange;
+        }
+      }
+    }
+  }
+
+  // Analyze the branch in the current block.
+  MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+  SmallVector<MachineOperand, 4> CurCond;
+  bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+  if (!CurUnAnalyzable) {
+    // If the CFG for the prior block has extra edges, remove them.
+    MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+    // If this is a two-way branch, and the FBB branches to this block, reverse
+    // the condition so the single-basic-block loop is faster.  Instead of:
+    //    Loop: xxx; jcc Out; jmp Loop
+    // we want:
+    //    Loop: xxx; jncc Loop; jmp Out
+    if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+      SmallVector<MachineOperand, 4> NewCond(CurCond);
+      if (!TII->ReverseBranchCondition(NewCond)) {
+        TII->RemoveBranch(*MBB);
+        TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+        MadeChange = true;
+        ++NumBranchOpts;
+        goto ReoptimizeBlock;
+      }
+    }
+
+    // If this branch is the only thing in its block, see if we can forward
+    // other blocks across it.
+    if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+        IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
+        !MBB->hasAddressTaken()) {
+      // This block may contain just an unconditional branch.  Because there can
+      // be 'non-branch terminators' in the block, try removing the branch and
+      // then seeing if the block is empty.
+      TII->RemoveBranch(*MBB);
+      // If the only things remaining in the block are debug info, remove these
+      // as well, so this will behave the same as an empty block in non-debug
+      // mode.
+      if (!MBB->empty()) {
+        bool NonDebugInfoFound = false;
+        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+             I != E; ++I) {
+          if (!I->isDebugValue()) {
+            NonDebugInfoFound = true;
+            break;
+          }
+        }
+        if (!NonDebugInfoFound)
+          // Make the block empty, losing the debug info (we could probably
+          // improve this in some cases.)
+          MBB->erase(MBB->begin(), MBB->end());
+      }
+      // If this block is just an unconditional branch to CurTBB, we can
+      // usually completely eliminate the block.  The only case we cannot
+      // completely eliminate the block is when the block before this one
+      // falls through into MBB and we can't understand the prior block's branch
+      // condition.
+      if (MBB->empty()) {
+        bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+        if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+            !PrevBB.isSuccessor(MBB)) {
+          // If the prior block falls through into us, turn it into an
+          // explicit branch to us to make updates simpler.
+          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+              PriorTBB != MBB && PriorFBB != MBB) {
+            if (PriorTBB == 0) {
+              assert(PriorCond.empty() && PriorFBB == 0 &&
+                     "Bad branch analysis");
+              PriorTBB = MBB;
+            } else {
+              assert(PriorFBB == 0 && "Machine CFG out of date!");
+              PriorFBB = MBB;
+            }
+            TII->RemoveBranch(PrevBB);
+            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+          }
+
+          // Iterate through all the predecessors, revectoring each in-turn.
+          size_t PI = 0;
+          bool DidChange = false;
+          bool HasBranchToSelf = false;
+          while(PI != MBB->pred_size()) {
+            MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+            if (PMBB == MBB) {
+              // If this block has an uncond branch to itself, leave it.
+              ++PI;
+              HasBranchToSelf = true;
+            } else {
+              DidChange = true;
+              PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+              // If this change resulted in PMBB ending in a conditional
+              // branch where both conditions go to the same destination,
+              // change this to an unconditional branch (and fix the CFG).
+              MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+              SmallVector<MachineOperand, 4> NewCurCond;
+              bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+                      NewCurFBB, NewCurCond, true);
+              if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+                TII->RemoveBranch(*PMBB);
+                NewCurCond.clear();
+                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+                MadeChange = true;
+                ++NumBranchOpts;
+                PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
+              }
+            }
+          }
+
+          // Change any jumptables to go to the new MBB.
+          if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+            MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+          if (DidChange) {
+            ++NumBranchOpts;
+            MadeChange = true;
+            if (!HasBranchToSelf) return MadeChange;
+          }
+        }
+      }
+
+      // Add the branch back if the block is more than just an uncond branch.
+      TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
+    }
+  }
+
+  // If the prior block doesn't fall through into this block, and if this
+  // block doesn't fall through into some other block, see if we can find a
+  // place to move this block where a fall-through will happen.
+  if (!PrevBB.canFallThrough()) {
+
+    // Now we know that there was no fall-through into this block, check to
+    // see if it has a fall-through into its successor.
+    bool CurFallsThru = MBB->canFallThrough();
+
+    if (!MBB->isLandingPad()) {
+      // Check all the predecessors of this block.  If one of them has no fall
+      // throughs, move this block right after it.
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+           E = MBB->pred_end(); PI != E; ++PI) {
+        // Analyze the branch at the end of the pred.
+        MachineBasicBlock *PredBB = *PI;
+        MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+        MachineBasicBlock *PredTBB = 0, *PredFBB = 0;
+        SmallVector<MachineOperand, 4> PredCond;
+        if (PredBB != MBB && !PredBB->canFallThrough() &&
+            !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
+            && (!CurFallsThru || !CurTBB || !CurFBB)
+            && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+          // If the current block doesn't fall through, just move it.
+          // If the current block can fall through and does not end with a
+          // conditional branch, we need to append an unconditional jump to
+          // the (current) next block.  To avoid a possible compile-time
+          // infinite loop, move blocks only backward in this case.
+          // Also, if there are already 2 branches here, we cannot add a third;
+          // this means we have the case
+          // Bcc next
+          // B elsewhere
+          // next:
+          if (CurFallsThru) {
+            MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+            CurCond.clear();
+            TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+          }
+          MBB->moveAfter(PredBB);
+          MadeChange = true;
+          goto ReoptimizeBlock;
+        }
+      }
+    }
+
+    if (!CurFallsThru) {
+      // Check all successors to see if we can move this block before it.
+      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           E = MBB->succ_end(); SI != E; ++SI) {
+        // Analyze the branch at the end of the block before the succ.
+        MachineBasicBlock *SuccBB = *SI;
+        MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+
+        // If this block doesn't already fall-through to that successor, and if
+        // the succ doesn't already have a block that can fall through into it,
+        // and if the successor isn't an EH destination, we can arrange for the
+        // fallthrough to happen.
+        if (SuccBB != MBB && &*SuccPrev != MBB &&
+            !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+            !SuccBB->isLandingPad()) {
+          MBB->moveBefore(SuccBB);
+          MadeChange = true;
+          goto ReoptimizeBlock;
+        }
+      }
+
+      // Okay, there is no really great place to put this block.  If, however,
+      // the block before this one would be a fall-through if this block were
+      // removed, move this block to the end of the function.
+      MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0;
+      SmallVector<MachineOperand, 4> PrevCond;
+      if (FallThrough != MF.end() &&
+          !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+          PrevBB.isSuccessor(FallThrough)) {
+        MBB->moveAfter(--MF.end());
+        MadeChange = true;
+        return MadeChange;
+      }
+    }
+  }
+
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+  bool MadeChange = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+    MadeChange |= HoistCommonCodeInSuccs(MBB);
+  }
+
+  return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+                                         MachineBasicBlock *TrueBB) {
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         E = BB->succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    if (SuccBB != TrueBB)
+      return SuccBB;
+  }
+  return NULL;
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is ususally just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+                                                  const TargetInstrInfo *TII,
+                                                  const TargetRegisterInfo *TRI,
+                                                  SmallSet<unsigned,4> &Uses,
+                                                  SmallSet<unsigned,4> &Defs) {
+  MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+  if (!TII->isUnpredicatedTerminator(Loc))
+    return MBB->end();
+
+  for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = Loc->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isUse()) {
+      Uses.insert(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Uses.insert(*AS);
+    } else if (!MO.isDead())
+      // Don't try to hoist code in the rare case the terminator defines a
+      // register that is later used.
+      return MBB->end();
+  }
+
+  if (Uses.empty())
+    return Loc;
+  if (Loc == MBB->begin())
+    return MBB->end();
+
+  // The terminator is probably a conditional branch, try not to separate the
+  // branch from condition setting instruction.
+  MachineBasicBlock::iterator PI = Loc;
+  --PI;
+  while (PI != MBB->begin() && Loc->isDebugValue())
+    --PI;
+
+  bool IsDef = false;
+  for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
+    const MachineOperand &MO = PI->getOperand(i);
+    if (!MO.isReg() || MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (Uses.count(Reg))
+      IsDef = true;
+  }
+  if (!IsDef)
+    // The condition setting instruction is not just before the conditional
+    // branch.
+    return Loc;
+
+  // Be conservative, don't insert instruction above something that may have
+  // side-effects. And since it's potentially bad to separate flag setting
+  // instruction from the conditional branch, just abort the optimization
+  // completely.
+  // Also avoid moving code above predicated instruction since it's hard to
+  // reason about register liveness with predicated instruction.
+  bool DontMoveAcrossStore = true;
+  if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+      TII->isPredicated(PI))
+    return MBB->end();
+
+
+  // Find out what registers are live. Note this routine is ignoring other live
+  // registers which are only used by instructions in successor blocks.
+  for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = PI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isUse()) {
+      Uses.insert(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Uses.insert(*AS);
+    } else {
+      if (Uses.count(Reg)) {
+        Uses.erase(Reg);
+        for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+          Uses.erase(*SR); // Use getSubRegisters to be conservative
+      }
+      Defs.insert(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Defs.insert(*AS);
+    }
+  }
+
+  return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+    return false;
+
+  if (!FBB) FBB = findFalseBlock(MBB, TBB);
+  if (!FBB)
+    // Malformed bcc? True and false blocks are the same?
+    return false;
+
+  // Restrict the optimization to cases where MBB is the only predecessor,
+  // it is an obvious win.
+  if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+    return false;
+
+  // Find a suitable position to hoist the common instructions to. Also figure
+  // out which registers are used or defined by instructions from the insertion
+  // point to the end of the block.
+  SmallSet<unsigned, 4> Uses, Defs;
+  MachineBasicBlock::iterator Loc =
+    findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+  if (Loc == MBB->end())
+    return false;
+
+  bool HasDups = false;
+  SmallVector<unsigned, 4> LocalDefs;
+  SmallSet<unsigned, 4> LocalDefsSet;
+  MachineBasicBlock::iterator TIB = TBB->begin();
+  MachineBasicBlock::iterator FIB = FBB->begin();
+  MachineBasicBlock::iterator TIE = TBB->end();
+  MachineBasicBlock::iterator FIE = FBB->end();
+  while (TIB != TIE && FIB != FIE) {
+    // Skip dbg_value instructions. These do not count.
+    if (TIB->isDebugValue()) {
+      while (TIB != TIE && TIB->isDebugValue())
+        ++TIB;
+      if (TIB == TIE)
+        break;
+    }
+    if (FIB->isDebugValue()) {
+      while (FIB != FIE && FIB->isDebugValue())
+        ++FIB;
+      if (FIB == FIE)
+        break;
+    }
+    if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
+      break;
+
+    if (TII->isPredicated(TIB))
+      // Hard to reason about register liveness with predicated instruction.
+      break;
+
+    bool IsSafe = true;
+    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = TIB->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      if (MO.isDef()) {
+        if (Uses.count(Reg)) {
+          // Avoid clobbering a register that's used by the instruction at
+          // the point of insertion.
+          IsSafe = false;
+          break;
+        }
+
+        if (Defs.count(Reg) && !MO.isDead()) {
+          // Don't hoist the instruction if the def would be clobber by the
+          // instruction at the point insertion. FIXME: This is overly
+          // conservative. It should be possible to hoist the instructions
+          // in BB2 in the following example:
+          // BB1:
+          // r1, eflag = op1 r2, r3
+          // brcc eflag
+          //
+          // BB2:
+          // r1 = op2, ...
+          //    = op3, r1<kill>
+          IsSafe = false;
+          break;
+        }
+      } else if (!LocalDefsSet.count(Reg)) {
+        if (Defs.count(Reg)) {
+          // Use is defined by the instruction at the point of insertion.
+          IsSafe = false;
+          break;
+        }
+      }
+    }
+    if (!IsSafe)
+      break;
+
+    bool DontMoveAcrossStore = true;
+    if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+      break;
+
+    // Track local defs so we can update liveins.
+    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = TIB->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      if (MO.isDef()) {
+        if (!MO.isDead()) {
+          LocalDefs.push_back(Reg);
+          LocalDefsSet.insert(Reg);
+          for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+            LocalDefsSet.insert(*SR);
+        }
+      } else if (MO.isKill() && LocalDefsSet.count(Reg)) {
+        LocalDefsSet.erase(Reg);
+        for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+          LocalDefsSet.erase(*SR);
+      }
+    }
+
+    HasDups = true;;
+    ++TIB;
+    ++FIB;
+  }
+
+  if (!HasDups)
+    return false;
+
+  MBB->splice(Loc, TBB, TBB->begin(), TIB);
+  FBB->erase(FBB->begin(), FIB);
+
+  // Update livein's.
+  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+    unsigned Def = LocalDefs[i];
+    if (LocalDefsSet.count(Def)) {
+      TBB->addLiveIn(Def);
+      FBB->addLiveIn(Def);
+    }
+  }
+
+  ++NumHoist;
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
new file mode 100644
index 000000000000..df795dfc248e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,123 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
+
+namespace llvm {
+  class MachineFunction;
+  class MachineModuleInfo;
+  class RegScavenger;
+  class TargetInstrInfo;
+  class TargetRegisterInfo;
+
+  class BranchFolder {
+  public:
+    explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
+
+    bool OptimizeFunction(MachineFunction &MF,
+                          const TargetInstrInfo *tii,
+                          const TargetRegisterInfo *tri,
+                          MachineModuleInfo *mmi);
+  private:
+    class MergePotentialsElt {
+      unsigned Hash;
+      MachineBasicBlock *Block;
+    public:
+      MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+        : Hash(h), Block(b) {}
+
+      unsigned getHash() const { return Hash; }
+      MachineBasicBlock *getBlock() const { return Block; }
+
+      void setBlock(MachineBasicBlock *MBB) {
+        Block = MBB;
+      }
+
+      bool operator<(const MergePotentialsElt &) const;
+    };
+    typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+    std::vector<MergePotentialsElt> MergePotentials;
+    SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+
+    class SameTailElt {
+      MPIterator MPIter;
+      MachineBasicBlock::iterator TailStartPos;
+    public:
+      SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+        : MPIter(mp), TailStartPos(tsp) {}
+
+      MPIterator getMPIter() const {
+        return MPIter;
+      }
+      MergePotentialsElt &getMergePotentialsElt() const {
+        return *getMPIter();
+      }
+      MachineBasicBlock::iterator getTailStartPos() const {
+        return TailStartPos;
+      }
+      unsigned getHash() const {
+        return getMergePotentialsElt().getHash();
+      }
+      MachineBasicBlock *getBlock() const {
+        return getMergePotentialsElt().getBlock();
+      }
+      bool tailIsWholeBlock() const {
+        return TailStartPos == getBlock()->begin();
+      }
+
+      void setBlock(MachineBasicBlock *MBB) {
+        getMergePotentialsElt().setBlock(MBB);
+      }
+      void setTailStartPos(MachineBasicBlock::iterator Pos) {
+        TailStartPos = Pos;
+      }
+    };
+    std::vector<SameTailElt> SameTails;
+
+    bool EnableTailMerge;
+    bool EnableHoistCommonCode;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineModuleInfo *MMI;
+    RegScavenger *RS;
+
+    bool TailMergeBlocks(MachineFunction &MF);
+    bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+                       MachineBasicBlock* PredBB);
+    void MaintainLiveIns(MachineBasicBlock *CurMBB,
+                         MachineBasicBlock *NewMBB);
+    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                 MachineBasicBlock *NewDest);
+    MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+                                  MachineBasicBlock::iterator BBI1);
+    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB);
+    void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+                                                MachineBasicBlock* PredBB);
+    bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                   unsigned maxCommonTailLength,
+                                   unsigned &commonTailIndex);
+
+    bool OptimizeBranches(MachineFunction &MF);
+    bool OptimizeBlock(MachineBasicBlock *MBB);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+    bool HoistCommonCode(MachineFunction &MF);
+    bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
+  };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 000000000000..e6b3bbca2068
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,219 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+  au.addRequired<LiveIntervals>();
+  au.addRequired<MachineLoopInfo>();
+  au.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+
+  DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+               << "********** Function: "
+               << fn.getFunction()->getName() << '\n');
+
+  LiveIntervals &lis = getAnalysis<LiveIntervals>();
+  VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>());
+  for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) {
+    LiveInterval &li = *I->second;
+    if (TargetRegisterInfo::isVirtualRegister(li.reg))
+      vrai.CalculateWeightAndHint(li);
+  }
+  return false;
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+                         const TargetRegisterInfo &tri,
+                         const MachineRegisterInfo &mri) {
+  unsigned sub, hreg, hsub;
+  if (mi->getOperand(0).getReg() == reg) {
+    sub = mi->getOperand(0).getSubReg();
+    hreg = mi->getOperand(1).getReg();
+    hsub = mi->getOperand(1).getSubReg();
+  } else {
+    sub = mi->getOperand(1).getSubReg();
+    hreg = mi->getOperand(0).getReg();
+    hsub = mi->getOperand(0).getSubReg();
+  }
+
+  if (!hreg)
+    return 0;
+
+  if (TargetRegisterInfo::isVirtualRegister(hreg))
+    return sub == hsub ? hreg : 0;
+
+  const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+  // Only allow physreg hints in rc.
+  if (sub == 0)
+    return rc->contains(hreg) ? hreg : 0;
+
+  // reg:sub should match the physreg hreg.
+  return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+  MachineRegisterInfo &mri = MF.getRegInfo();
+  const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
+  MachineBasicBlock *mbb = 0;
+  MachineLoop *loop = 0;
+  unsigned loopDepth = 0;
+  bool isExiting = false;
+  float totalWeight = 0;
+  SmallPtrSet<MachineInstr*, 8> visited;
+
+  // Find the best physreg hist and the best virtreg hint.
+  float bestPhys = 0, bestVirt = 0;
+  unsigned hintPhys = 0, hintVirt = 0;
+
+  // Don't recompute a target specific hint.
+  bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+  // Don't recompute spill weight for an unspillable register.
+  bool Spillable = li.isSpillable();
+
+  for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
+       MachineInstr *mi = I.skipInstruction();) {
+    if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+      continue;
+    if (!visited.insert(mi))
+      continue;
+
+    float weight = 1.0f;
+    if (Spillable) {
+      // Get loop info for mi.
+      if (mi->getParent() != mbb) {
+        mbb = mi->getParent();
+        loop = Loops.getLoopFor(mbb);
+        loopDepth = loop ? loop->getLoopDepth() : 0;
+        isExiting = loop ? loop->isLoopExiting(mbb) : false;
+      }
+
+      // Calculate instr weight.
+      bool reads, writes;
+      tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+      weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+      // Give extra weight to what looks like a loop induction variable update.
+      if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
+        weight *= 3;
+
+      totalWeight += weight;
+    }
+
+    // Get allocation hints from copies.
+    if (noHint || !mi->isCopy())
+      continue;
+    unsigned hint = copyHint(mi, li.reg, tri, mri);
+    if (!hint)
+      continue;
+    float hweight = Hint[hint] += weight;
+    if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+      if (hweight > bestPhys && LIS.isAllocatable(hint))
+        bestPhys = hweight, hintPhys = hint;
+    } else {
+      if (hweight > bestVirt)
+        bestVirt = hweight, hintVirt = hint;
+    }
+  }
+
+  Hint.clear();
+
+  // Always prefer the physreg hint.
+  if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+    mri.setRegAllocationHint(li.reg, 0, hint);
+    // Weakly boost the spill weight of hinted registers.
+    totalWeight *= 1.01F;
+  }
+
+  // If the live interval was already unspillable, leave it that way.
+  if (!Spillable)
+    return;
+
+  // Mark li as unspillable if all live ranges are tiny.
+  if (li.isZeroLength(LIS.getSlotIndexes())) {
+    li.markNotSpillable();
+    return;
+  }
+
+  // If all of the definitions of the interval are re-materializable,
+  // it is a preferred candidate for spilling. If none of the defs are
+  // loads, then it's potentially very cheap to re-materialize.
+  // FIXME: this gets much more complicated once we support non-trivial
+  // re-materialization.
+  bool isLoad = false;
+  if (LIS.isReMaterializable(li, 0, isLoad)) {
+    if (isLoad)
+      totalWeight *= 0.9F;
+    else
+      totalWeight *= 0.5F;
+  }
+
+  li.weight = normalizeSpillWeight(totalWeight, li.getSize());
+}
+
+void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  const TargetRegisterClass *OldRC = MRI.getRegClass(reg);
+  const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
+
+  // Stop early if there is no room to grow.
+  if (NewRC == OldRC)
+    return;
+
+  // Accumulate constraints from all uses.
+  for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg),
+       E = MRI.reg_nodbg_end(); I != E; ++I) {
+    // TRI doesn't have accurate enough information to model this yet.
+    if (I.getOperand().getSubReg())
+      return;
+    // Inline asm instuctions don't remember their constraints.
+    if (I->isInlineAsm())
+      return;
+    const TargetRegisterClass *OpRC =
+      TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI);
+    if (OpRC)
+      NewRC = getCommonSubClass(NewRC, OpRC);
+    if (!NewRC || NewRC == OldRC)
+      return;
+  }
+  DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg)
+               << " to " << NewRC->getName() <<".\n");
+  MRI.setRegClass(reg, NewRC);
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
new file mode 100644
index 000000000000..14eb0541dc8d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -0,0 +1,182 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
+                 const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs,
+                 LLVMContext &C)
+  : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
+    CallOrPrologue(Unknown) {
+  // No stack is used.
+  StackOffset = 0;
+
+  clearFirstByValReg();
+  UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate space on the stack large enough to pass an argument
+// by value. The size and alignment information of the argument is encoded in
+// its parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+                          MVT LocVT, CCValAssign::LocInfo LocInfo,
+                          int MinSize, int MinAlign,
+                          ISD::ArgFlagsTy ArgFlags) {
+  unsigned Align = ArgFlags.getByValAlign();
+  unsigned Size  = ArgFlags.getByValSize();
+  if (MinSize > (int)Size)
+    Size = MinSize;
+  if (MinAlign > (int)Align)
+    Align = MinAlign;
+  if (MF.getFrameInfo()->getMaxAlignment() < Align)
+    MF.getFrameInfo()->setMaxAlignment(Align);
+  TM.getTargetLowering()->HandleByVal(this, Size);
+  unsigned Offset = AllocateStack(Size, Align);
+  addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+  for (const unsigned *Alias = TRI.getOverlaps(Reg);
+       unsigned Reg = *Alias; ++Alias)
+    UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  unsigned NumArgs = Ins.size();
+
+  for (unsigned i = 0; i != NumArgs; ++i) {
+    MVT ArgVT = Ins[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Formal argument #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    MVT VT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+      return false;
+  }
+  return true;
+}
+
+/// AnalyzeReturn - Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                            CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    MVT VT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Return operand #" << i << " has unhandled type "
+             << EVT(VT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = Outs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MVT ArgVT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call operand #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+                                  SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = ArgVTs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MVT ArgVT = ArgVTs[i];
+    ISD::ArgFlagsTy ArgFlags = Flags[i];
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call operand #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallResult - Analyze the return values of a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    MVT VT = Ins[i].VT;
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call result #" << i << " has unhandled type "
+             << EVT(VT).getEVTString() << "\n";
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+  if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+    dbgs() << "Call result has unhandled type "
+           << EVT(VT).getEVTString();
+#endif
+    llvm_unreachable(0);
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..489746cf3c72
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,59 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeCalculateSpillWeightsPass(Registry);
+  initializeDeadMachineInstructionElimPass(Registry);
+  initializeGCModuleInfoPass(Registry);
+  initializeIfConverterPass(Registry);
+  initializeLiveDebugVariablesPass(Registry);
+  initializeLiveIntervalsPass(Registry);
+  initializeLiveStacksPass(Registry);
+  initializeLiveVariablesPass(Registry);
+  initializeMachineCSEPass(Registry);
+  initializeMachineDominatorTreePass(Registry);
+  initializeMachineLICMPass(Registry);
+  initializeMachineLoopInfoPass(Registry);
+  initializeMachineModuleInfoPass(Registry);
+  initializeMachineSinkingPass(Registry);
+  initializeMachineVerifierPassPass(Registry);
+  initializeOptimizePHIsPass(Registry);
+  initializePHIEliminationPass(Registry);
+  initializePeepholeOptimizerPass(Registry);
+  initializeProcessImplicitDefsPass(Registry);
+  initializePEIPass(Registry);
+  initializeRALinScanPass(Registry);
+  initializeRegisterCoalescerPass(Registry);
+  initializeRenderMachineFunctionPass(Registry);
+  initializeSlotIndexesPass(Registry);
+  initializeLoopSplitterPass(Registry);
+  initializeStackProtectorPass(Registry);
+  initializeStackSlotColoringPass(Registry);
+  initializeStrongPHIEliminationPass(Registry);
+  initializeTwoAddressInstructionPassPass(Registry);
+  initializeUnreachableBlockElimPass(Registry);
+  initializeUnreachableMachineBlockElimPass(Registry);
+  initializeVirtRegMapPass(Registry);
+  initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+  initializeCodeGen(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
new file mode 100644
index 000000000000..270c337ef67e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
@@ -0,0 +1,425 @@
+//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the pass that optimizes code placement and aligns loop
+// headers to target-specific alignment boundaries.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "code-placement"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoopsAligned,  "Number of loops aligned");
+STATISTIC(NumIntraElim,     "Number of intra loop branches eliminated");
+STATISTIC(NumIntraMoved,    "Number of intra loop branches moved");
+
+namespace {
+  class CodePlacementOpt : public MachineFunctionPass {
+    const MachineLoopInfo *MLI;
+    const TargetInstrInfo *TII;
+    const TargetLowering  *TLI;
+
+  public:
+    static char ID;
+    CodePlacementOpt() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const {
+      return "Code Placement Optimizer";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    bool HasFallthrough(MachineBasicBlock *MBB);
+    bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
+    void Splice(MachineFunction &MF,
+                MachineFunction::iterator InsertPt,
+                MachineFunction::iterator Begin,
+                MachineFunction::iterator End);
+    bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+                                          MachineLoop *L);
+    bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+                                     MachineLoop *L);
+    bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
+    bool OptimizeIntraLoopEdges(MachineFunction &MF);
+    bool AlignLoops(MachineFunction &MF);
+    bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
+  };
+
+  char CodePlacementOpt::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createCodePlacementOptPass() {
+  return new CodePlacementOpt();
+}
+
+/// HasFallthrough - Test whether the given branch has a fallthrough, either as
+/// a plain fallthrough or as a fallthrough case of a conditional branch.
+///
+bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+    return false;
+  // This conditional branch has no fallthrough.
+  if (FBB)
+    return false;
+  // An unconditional branch has no fallthrough.
+  if (Cond.empty() && TBB)
+    return false;
+  // It has a fallthrough.
+  return true;
+}
+
+/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
+/// This is called before major changes are begun to test whether it will be
+/// possible to complete the changes.
+///
+/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
+/// whenever possible.
+///
+bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
+  // Conservatively ignore EH landing pads.
+  if (MBB->isLandingPad()) return false;
+
+  // Aggressively handle return blocks and similar constructs.
+  if (MBB->succ_empty()) return true;
+
+  // Ask the target's AnalyzeBranch if it can handle this block.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  // Make sure the terminator is understood.
+  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+    return false;
+   // Ignore blocks which look like they might have EH-related control flow.
+   // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
+   // recognize the possibility of a control transfer through an unwind.
+   // Such blocks contain EH_LABEL instructions, however they may be in the
+   // middle of the block. Instead of searching for them, just check to see
+   // if the CFG disagrees with AnalyzeBranch.
+  if (1u + !Cond.empty() != MBB->succ_size())
+    return false;
+  // Make sure we have the option of reversing the condition.
+  if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
+    return false;
+  return true;
+}
+
+/// Splice - Move the sequence of instructions [Begin,End) to just before
+/// InsertPt. Update branch instructions as needed to account for broken
+/// fallthrough edges and to take advantage of newly exposed fallthrough
+/// opportunities.
+///
+void CodePlacementOpt::Splice(MachineFunction &MF,
+                              MachineFunction::iterator InsertPt,
+                              MachineFunction::iterator Begin,
+                              MachineFunction::iterator End) {
+  assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
+         "Splice can't change the entry block!");
+  MachineFunction::iterator OldBeginPrior = prior(Begin);
+  MachineFunction::iterator OldEndPrior = prior(End);
+
+  MF.splice(InsertPt, Begin, End);
+
+  prior(Begin)->updateTerminator();
+  OldBeginPrior->updateTerminator();
+  OldEndPrior->updateTerminator();
+}
+
+/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
+/// to the loop top to the top of the loop so that they have a fall through.
+/// This can introduce a branch on entry to the loop, but it can eliminate a
+/// branch within the loop. See the @simple case in
+/// test/CodeGen/X86/loop_blocks.ll for an example of this.
+bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+                                                        MachineLoop *L) {
+  bool Changed = false;
+  MachineBasicBlock *TopMBB = L->getTopBlock();
+
+  bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
+
+  if (TopMBB == MF.begin() ||
+      HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
+  new_top:
+    for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
+         PE = TopMBB->pred_end(); PI != PE; ++PI) {
+      MachineBasicBlock *Pred = *PI;
+      if (Pred == TopMBB) continue;
+      if (HasFallthrough(Pred)) continue;
+      if (!L->contains(Pred)) continue;
+
+      // Verify that we can analyze all the loop entry edges before beginning
+      // any changes which will require us to be able to analyze them.
+      if (Pred == MF.begin())
+        continue;
+      if (!HasAnalyzableTerminator(Pred))
+        continue;
+      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
+        continue;
+
+      // Move the block.
+      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
+                   << " to top of loop.\n");
+      Changed = true;
+
+      // Move it and all the blocks that can reach it via fallthrough edges
+      // exclusively, to keep existing fallthrough edges intact.
+      MachineFunction::iterator Begin = Pred;
+      MachineFunction::iterator End = llvm::next(Begin);
+      while (Begin != MF.begin()) {
+        MachineFunction::iterator Prior = prior(Begin);
+        if (Prior == MF.begin())
+          break;
+        // Stop when a non-fallthrough edge is found.
+        if (!HasFallthrough(Prior))
+          break;
+        // Stop if a block which could fall-through out of the loop is found.
+        if (Prior->isSuccessor(End))
+          break;
+        // If we've reached the top, stop scanning.
+        if (Prior == MachineFunction::iterator(TopMBB)) {
+          // We know top currently has a fall through (because we just checked
+          // it) which would be lost if we do the transformation, so it isn't
+          // worthwhile to do the transformation unless it would expose a new
+          // fallthrough edge.
+          if (!Prior->isSuccessor(End))
+            goto next_pred;
+          // Otherwise we can stop scanning and procede to move the blocks.
+          break;
+        }
+        // If we hit a switch or something complicated, don't move anything
+        // for this predecessor.
+        if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
+          break;
+        // Ok, the block prior to Begin will be moved along with the rest.
+        // Extend the range to include it.
+        Begin = Prior;
+        ++NumIntraMoved;
+      }
+
+      // Move the blocks.
+      Splice(MF, TopMBB, Begin, End);
+
+      // Update TopMBB.
+      TopMBB = L->getTopBlock();
+
+      // We have a new loop top. Iterate on it. We shouldn't have to do this
+      // too many times if BranchFolding has done a reasonable job.
+      goto new_top;
+    next_pred:;
+    }
+  }
+
+  // If the loop previously didn't exit with a fall-through and it now does,
+  // we eliminated a branch.
+  if (Changed &&
+      !BotHasFallthrough &&
+      HasFallthrough(L->getBottomBlock())) {
+    ++NumIntraElim;
+  }
+
+  return Changed;
+}
+
+/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
+/// portion of the loop contiguous with the header. This usually makes the loop
+/// contiguous, provided that AnalyzeBranch can handle all the relevant
+/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
+/// for an example of this.
+bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+                                                   MachineLoop *L) {
+  bool Changed = false;
+  MachineBasicBlock *TopMBB = L->getTopBlock();
+  MachineBasicBlock *BotMBB = L->getBottomBlock();
+
+  // Determine a position to move orphaned loop blocks to. If TopMBB is not
+  // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
+  // to the top of the loop to avoid losing that fallthrough. Otherwise append
+  // them to the bottom, even if it previously had a fallthrough, on the theory
+  // that it's worth an extra branch to keep the loop contiguous.
+  MachineFunction::iterator InsertPt =
+    llvm::next(MachineFunction::iterator(BotMBB));
+  bool InsertAtTop = false;
+  if (TopMBB != MF.begin() &&
+      !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
+      HasFallthrough(BotMBB)) {
+    InsertPt = TopMBB;
+    InsertAtTop = true;
+  }
+
+  // Keep a record of which blocks are in the portion of the loop contiguous
+  // with the loop header.
+  SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
+  for (MachineFunction::iterator I = TopMBB,
+       E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
+    ContiguousBlocks.insert(I);
+
+  // Find non-contigous blocks and fix them.
+  if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
+    for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
+         BI != BE; ++BI) {
+      MachineBasicBlock *BB = *BI;
+
+      // Verify that we can analyze all the loop entry edges before beginning
+      // any changes which will require us to be able to analyze them.
+      if (!HasAnalyzableTerminator(BB))
+        continue;
+      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
+        continue;
+
+      // If the layout predecessor is part of the loop, this block will be
+      // processed along with it. This keeps them in their relative order.
+      if (BB != MF.begin() &&
+          L->contains(prior(MachineFunction::iterator(BB))))
+        continue;
+
+      // Check to see if this block is already contiguous with the main
+      // portion of the loop.
+      if (!ContiguousBlocks.insert(BB))
+        continue;
+
+      // Move the block.
+      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
+                   << " to be contiguous with loop.\n");
+      Changed = true;
+
+      // Process this block and all loop blocks contiguous with it, to keep
+      // them in their relative order.
+      MachineFunction::iterator Begin = BB;
+      MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
+      for (; End != MF.end(); ++End) {
+        if (!L->contains(End)) break;
+        if (!HasAnalyzableTerminator(End)) break;
+        ContiguousBlocks.insert(End);
+        ++NumIntraMoved;
+      }
+
+      // If we're inserting at the bottom of the loop, and the code we're
+      // moving originally had fall-through successors, bring the sucessors
+      // up with the loop blocks to preserve the fall-through edges.
+      if (!InsertAtTop)
+        for (; End != MF.end(); ++End) {
+          if (L->contains(End)) break;
+          if (!HasAnalyzableTerminator(End)) break;
+          if (!HasFallthrough(prior(End))) break;
+        }
+
+      // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
+      // we don't need them anymore at this point.
+      Splice(MF, InsertPt, Begin, End);
+    }
+
+  return Changed;
+}
+
+/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+/// This code takes the approach of making minor changes to the existing
+/// layout to fix specific loop-oriented problems. Also, it depends on
+/// AnalyzeBranch, which can't understand complex control instructions.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
+                                                        MachineLoop *L) {
+  bool Changed = false;
+
+  // Do optimization for nested loops.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+  // Do optimization for this loop.
+  Changed |= EliminateUnconditionalJumpsToTop(MF, L);
+  Changed |= MoveDiscontiguousLoopBlocks(MF, L);
+
+  return Changed;
+}
+
+/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
+  bool Changed = false;
+
+  if (!TLI->shouldOptimizeCodePlacement())
+    return Changed;
+
+  // Do optimization for each loop in the function.
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+       I != E; ++I)
+    if (!(*I)->getParentLoop())
+      Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+  return Changed;
+}
+
+/// AlignLoops - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  if (F->hasFnAttr(Attribute::OptimizeForSize))
+    return false;
+
+  unsigned Align = TLI->getPrefLoopAlignment();
+  if (!Align)
+    return false;  // Don't care about loop alignment.
+
+  bool Changed = false;
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+       I != E; ++I)
+    Changed |= AlignLoop(MF, *I, Align);
+
+  return Changed;
+}
+
+/// AlignLoop - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
+                                 unsigned Align) {
+  bool Changed = false;
+
+  // Do alignment for nested loops.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    Changed |= AlignLoop(MF, *I, Align);
+
+  L->getTopBlock()->setAlignment(Align);
+  Changed = true;
+  ++NumLoopsAligned;
+
+  return Changed;
+}
+
+bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
+  MLI = &getAnalysis<MachineLoopInfo>();
+  if (MLI->empty())
+    return false;  // No loops.
+
+  TLI = MF.getTarget().getTargetLowering();
+  TII = MF.getTarget().getInstrInfo();
+
+  bool Changed = OptimizeIntraLoopEdges(MF);
+
+  Changed |= AlignLoops(MF);
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 000000000000..84c4d59c0e41
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,663 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+CriticalAntiDepBreaker::
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
+  AntiDepBreaker(), MF(MFi),
+  MRI(MF.getRegInfo()),
+  TII(MF.getTarget().getInstrInfo()),
+  TRI(MF.getTarget().getRegisterInfo()),
+  RegClassInfo(RCI),
+  Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+  KillIndices(TRI->getNumRegs(), 0),
+  DefIndices(TRI->getNumRegs(), 0) {}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+  const unsigned BBSize = BB->size();
+  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+    // Clear out the register class data.
+    Classes[i] = static_cast<const TargetRegisterClass *>(0);
+
+    // Initialize the indices to indicate that no registers are live.
+    KillIndices[i] = ~0u;
+    DefIndices[i] = BBSize;
+  }
+
+  // Clear "do not change" set.
+  KeepRegs.clear();
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+
+  // Determine the live-out physregs for this block.
+  if (IsReturnBlock) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+         E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = BB->size();
+      DefIndices[Reg] = ~0u;
+
+      // Repeat, for all aliases.
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        unsigned AliasReg = *Alias;
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        KillIndices[AliasReg] = BB->size();
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+  }
+
+  // In a non-return block, examine the live-in regs of all successors.
+  // Note a return block can have successors if the return instruction is
+  // predicated.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         SE = BB->succ_end(); SI != SE; ++SI)
+    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+           E = (*SI)->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = BB->size();
+      DefIndices[Reg] = ~0u;
+
+      // Repeat, for all aliases.
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        unsigned AliasReg = *Alias;
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        KillIndices[AliasReg] = BB->size();
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
+  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+    unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+    Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+    KillIndices[Reg] = BB->size();
+    DefIndices[Reg] = ~0u;
+
+    // Repeat, for all aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[AliasReg] = BB->size();
+      DefIndices[AliasReg] = ~0u;
+    }
+  }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+  RegRefs.clear();
+  KeepRegs.clear();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+                                     unsigned InsertPosIndex) {
+  if (MI->isDebugValue())
+    return;
+  assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+    if (KillIndices[Reg] != ~0u) {
+      // If Reg is currently live, then mark that it can't be renamed as
+      // we don't know the extent of its live-range anymore (now that it
+      // has been scheduled).
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = Count;
+    } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+      // Any register which was defined within the previous scheduling region
+      // may have been rescheduled and its lifetime may overlap with registers
+      // in ways not reflected in our current liveness state. For each such
+      // register, adjust the liveness state to be conservatively correct.
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+      // Move the def index to the end of the previous region, to reflect
+      // that the def could theoretically have been scheduled at the end.
+      DefIndices[Reg] = InsertPosIndex;
+    }
+  }
+
+  PrescanInstruction(MI);
+  ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SDep *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    const SUnit *PredSU = P->getSUnit();
+    unsigned PredLatency = P->getLatency();
+    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+    // In the case of a latency tie, prefer an anti-dependency edge over
+    // other types of edges.
+    if (NextDepth < PredTotalLatency ||
+        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+      NextDepth = PredTotalLatency;
+      Next = &*P;
+    }
+  }
+  return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+  // It's not safe to change register allocation for source operands of
+  // that have special allocation requirements. Also assume all registers
+  // used in a call must not be changed (ABI).
+  // FIXME: The issue with predicated instruction is more complex. We are being
+  // conservative here because the kill markers cannot be trusted after
+  // if-conversion:
+  // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+  // ...
+  // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+  // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+  // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+  //
+  // The first R6 kill is not really a kill since it's killed by a predicated
+  // instruction which may not be executed. The second R6 def may or may not
+  // re-define R6 so it's not safe to change it since the last R6 use cannot be
+  // changed.
+  bool Special = MI->getDesc().isCall() ||
+    MI->getDesc().hasExtraSrcRegAllocReq() ||
+    TII->isPredicated(MI);
+
+  // Scan the register operands for this instruction and update
+  // Classes and RegRefs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    const TargetRegisterClass *NewRC = 0;
+
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+
+    // For now, only allow the register to be changed if its register
+    // class is consistent across all uses.
+    if (!Classes[Reg] && NewRC)
+      Classes[Reg] = NewRC;
+    else if (!NewRC || Classes[Reg] != NewRC)
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+    // Now check for aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      // If an alias of the reg is used during the live range, give up.
+      // Note that this allows us to skip checking if AntiDepReg
+      // overlaps with any of the aliases, among other things.
+      unsigned AliasReg = *Alias;
+      if (Classes[AliasReg]) {
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
+    }
+
+    // If we're still willing to consider this register, note the reference.
+    if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+      RegRefs.insert(std::make_pair(Reg, &MO));
+
+    if (MO.isUse() && Special) {
+      if (KeepRegs.insert(Reg)) {
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg)
+          KeepRegs.insert(*Subreg);
+      }
+    }
+  }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+                                             unsigned Count) {
+  // Update liveness.
+  // Proceding upwards, registers that are defed but not used in this
+  // instruction are now dead.
+
+  if (!TII->isPredicated(MI)) {
+    // Predicated defs are modeled as read + write, i.e. similar to two
+    // address updates.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      if (!MO.isDef()) continue;
+      // Ignore two-addr defs.
+      if (MI->isRegTiedToUseOperand(i)) continue;
+
+      DefIndices[Reg] = Count;
+      KillIndices[Reg] = ~0u;
+      assert(((KillIndices[Reg] == ~0u) !=
+              (DefIndices[Reg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for Reg!");
+      KeepRegs.erase(Reg);
+      Classes[Reg] = 0;
+      RegRefs.erase(Reg);
+      // Repeat, for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        unsigned SubregReg = *Subreg;
+        DefIndices[SubregReg] = Count;
+        KillIndices[SubregReg] = ~0u;
+        KeepRegs.erase(SubregReg);
+        Classes[SubregReg] = 0;
+        RegRefs.erase(SubregReg);
+      }
+      // Conservatively mark super-registers as unusable.
+      for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+           *Super; ++Super) {
+        unsigned SuperReg = *Super;
+        Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
+    }
+  }
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    if (!MO.isUse()) continue;
+
+    const TargetRegisterClass *NewRC = 0;
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+
+    // For now, only allow the register to be changed if its register
+    // class is consistent across all uses.
+    if (!Classes[Reg] && NewRC)
+      Classes[Reg] = NewRC;
+    else if (!NewRC || Classes[Reg] != NewRC)
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+    RegRefs.insert(std::make_pair(Reg, &MO));
+
+    // It wasn't previously live but now it is, this is a kill.
+    if (KillIndices[Reg] == ~0u) {
+      KillIndices[Reg] = Count;
+      DefIndices[Reg] = ~0u;
+          assert(((KillIndices[Reg] == ~0u) !=
+                  (DefIndices[Reg] == ~0u)) &&
+               "Kill and Def maps aren't consistent for Reg!");
+    }
+    // Repeat, for all aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      if (KillIndices[AliasReg] == ~0u) {
+        KillIndices[AliasReg] = Count;
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+  }
+}
+
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                                RegRefIter RegRefEnd,
+                                                unsigned NewReg)
+{
+  for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+    MachineOperand *RefOper = I->second;
+
+    // Don't allow the instruction defining AntiDepReg to earlyclobber its
+    // operands, in case they may be assigned to NewReg. In this case antidep
+    // breaking must fail, but it's too rare to bother optimizing.
+    if (RefOper->isDef() && RefOper->isEarlyClobber())
+      return true;
+
+    // Handle cases in which this instructions defines NewReg.
+    MachineInstr *MI = RefOper->getParent();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &CheckOper = MI->getOperand(i);
+
+      if (!CheckOper.isReg() || !CheckOper.isDef() ||
+          CheckOper.getReg() != NewReg)
+        continue;
+
+      // Don't allow the instruction to define NewReg and AntiDepReg.
+      // When AntiDepReg is renamed it will be an illegal op.
+      if (RefOper->isDef())
+        return true;
+
+      // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+      // NewReg
+      if (CheckOper.isEarlyClobber())
+        return true;
+
+      // Don't allow inline asm to define NewReg at all. Who know what it's
+      // doing with it.
+      if (MI->isInlineAsm())
+        return true;
+    }
+  }
+  return false;
+}
+
+unsigned
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                                 RegRefIter RegRefEnd,
+                                                 unsigned AntiDepReg,
+                                                 unsigned LastNewReg,
+                                                 const TargetRegisterClass *RC)
+{
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned NewReg = Order[i];
+    // Don't replace a register with itself.
+    if (NewReg == AntiDepReg) continue;
+    // Don't replace a register with one that was recently used to repair
+    // an anti-dependence with this AntiDepReg, because that would
+    // re-introduce that anti-dependence.
+    if (NewReg == LastNewReg) continue;
+    // If any instructions that define AntiDepReg also define the NewReg, it's
+    // not suitable.  For example, Instruction with multiple definitions can
+    // result in this condition.
+    if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
+    // If NewReg is dead and NewReg's most recent def is not before
+    // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+           && "Kill and Def maps aren't consistent for AntiDepReg!");
+    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+           && "Kill and Def maps aren't consistent for NewReg!");
+    if (KillIndices[NewReg] != ~0u ||
+        Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+        KillIndices[AntiDepReg] > DefIndices[NewReg])
+      continue;
+    return NewReg;
+  }
+
+  // No registers are free and available!
+  return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                      MachineBasicBlock::iterator Begin,
+                      MachineBasicBlock::iterator End,
+                      unsigned InsertPosIndex,
+                      DbgValueVector &DbgValues) {
+  // The code below assumes that there is at least one instruction,
+  // so just duck out immediately if the block is empty.
+  if (SUnits.empty()) return 0;
+
+  // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+  // This is used for updating debug information.
+  DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
+  // Find the node at the bottom of the critical path.
+  const SUnit *Max = 0;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    const SUnit *SU = &SUnits[i];
+    MISUnitMap[SU->getInstr()] = SU;
+    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+      Max = SU;
+  }
+
+#ifndef NDEBUG
+  {
+    DEBUG(dbgs() << "Critical path has total latency "
+          << (Max->getDepth() + Max->Latency) << "\n");
+    DEBUG(dbgs() << "Available regs:");
+    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+      if (KillIndices[Reg] == ~0u)
+        DEBUG(dbgs() << " " << TRI->getName(Reg));
+    }
+    DEBUG(dbgs() << '\n');
+  }
+#endif
+
+  // Track progress along the critical path through the SUnit graph as we walk
+  // the instructions.
+  const SUnit *CriticalPathSU = Max;
+  MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+  // Consider this pattern:
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  // There are three anti-dependencies here, and without special care,
+  // we'd break all of them using the same register:
+  //   A = ...
+  //   ... = A
+  //   B = ...
+  //   ... = B
+  //   B = ...
+  //   ... = B
+  //   B = ...
+  //   ... = B
+  // because at each anti-dependence, B is the first register that
+  // isn't A which is free.  This re-introduces anti-dependencies
+  // at all but one of the original anti-dependencies that we were
+  // trying to break.  To avoid this, keep track of the most recent
+  // register that each register was replaced with, avoid
+  // using it to repair an anti-dependence on the same register.
+  // This lets us produce this:
+  //   A = ...
+  //   ... = A
+  //   B = ...
+  //   ... = B
+  //   C = ...
+  //   ... = C
+  //   B = ...
+  //   ... = B
+  // This still has an anti-dependence on B, but at least it isn't on the
+  // original critical path.
+  //
+  // TODO: If we tracked more than one register here, we could potentially
+  // fix that remaining critical edge too. This is a little more involved,
+  // because unlike the most recent register, less recent registers should
+  // still be considered, though only if no other registers are available.
+  std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
+
+  // Attempt to break anti-dependence edges on the critical path. Walk the
+  // instructions from the bottom up, tracking information about liveness
+  // as we go to help determine which registers are available.
+  unsigned Broken = 0;
+  unsigned Count = InsertPosIndex - 1;
+  for (MachineBasicBlock::iterator I = End, E = Begin;
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
+
+    // Check if this instruction has a dependence on the critical path that
+    // is an anti-dependence that we may be able to break. If it is, set
+    // AntiDepReg to the non-zero register associated with the anti-dependence.
+    //
+    // We limit our attention to the critical path as a heuristic to avoid
+    // breaking anti-dependence edges that aren't going to significantly
+    // impact the overall schedule. There are a limited number of registers
+    // and we want to save them for the important edges.
+    //
+    // TODO: Instructions with multiple defs could have multiple
+    // anti-dependencies. The current code here only knows how to break one
+    // edge per instruction. Note that we'd have to be able to break all of
+    // the anti-dependencies in an instruction in order to be effective.
+    unsigned AntiDepReg = 0;
+    if (MI == CriticalPathMI) {
+      if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+        const SUnit *NextSU = Edge->getSUnit();
+
+        // Only consider anti-dependence edges.
+        if (Edge->getKind() == SDep::Anti) {
+          AntiDepReg = Edge->getReg();
+          assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+          if (!RegClassInfo.isAllocatable(AntiDepReg))
+            // Don't break anti-dependencies on non-allocatable registers.
+            AntiDepReg = 0;
+          else if (KeepRegs.count(AntiDepReg))
+            // Don't break anti-dependencies if an use down below requires
+            // this exact register.
+            AntiDepReg = 0;
+          else {
+            // If the SUnit has other dependencies on the SUnit that it
+            // anti-depends on, don't bother breaking the anti-dependency
+            // since those edges would prevent such units from being
+            // scheduled past each other regardless.
+            //
+            // Also, if there are dependencies on other SUnits with the
+            // same register as the anti-dependency, don't attempt to
+            // break it.
+            for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
+                 PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+              if (P->getSUnit() == NextSU ?
+                    (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+                    (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+                AntiDepReg = 0;
+                break;
+              }
+          }
+        }
+        CriticalPathSU = NextSU;
+        CriticalPathMI = CriticalPathSU->getInstr();
+      } else {
+        // We've reached the end of the critical path.
+        CriticalPathSU = 0;
+        CriticalPathMI = 0;
+      }
+    }
+
+    PrescanInstruction(MI);
+
+    // If MI's defs have a special allocation requirement, don't allow
+    // any def registers to be changed. Also assume all registers
+    // defined in a call must not be changed (ABI).
+    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+        TII->isPredicated(MI))
+      // If this instruction's defs have special allocation requirement, don't
+      // break this anti-dependency.
+      AntiDepReg = 0;
+    else if (AntiDepReg) {
+      // If this instruction has a use of AntiDepReg, breaking it
+      // is invalid.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (Reg == 0) continue;
+        if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
+          AntiDepReg = 0;
+          break;
+        }
+      }
+    }
+
+    // Determine AntiDepReg's register class, if it is live and is
+    // consistently used within a single class.
+    const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+    assert((AntiDepReg == 0 || RC != NULL) &&
+           "Register should be live if it's causing an anti-dependence!");
+    if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+      AntiDepReg = 0;
+
+    // Look for a suitable register to use to break the anti-depenence.
+    //
+    // TODO: Instead of picking the first free register, consider which might
+    // be the best.
+    if (AntiDepReg != 0) {
+      std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+                std::multimap<unsigned, MachineOperand *>::iterator>
+        Range = RegRefs.equal_range(AntiDepReg);
+      if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+                                                     AntiDepReg,
+                                                     LastNewReg[AntiDepReg],
+                                                     RC)) {
+        DEBUG(dbgs() << "Breaking anti-dependence edge on "
+              << TRI->getName(AntiDepReg)
+              << " with " << RegRefs.count(AntiDepReg) << " references"
+              << " using " << TRI->getName(NewReg) << "!\n");
+
+        // Update the references to the old register to refer to the new
+        // register.
+        for (std::multimap<unsigned, MachineOperand *>::iterator
+             Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+          Q->second->setReg(NewReg);
+          // If the SU for the instruction being updated has debug information
+          // related to the anti-dependency register, make sure to update that
+          // as well.
+          const SUnit *SU = MISUnitMap[Q->second->getParent()];
+          if (!SU) continue;
+          for (DbgValueVector::iterator DVI = DbgValues.begin(),
+                 DVE = DbgValues.end(); DVI != DVE; ++DVI)
+            if (DVI->second == Q->second->getParent())
+              UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+        }
+
+        // We just went back in time and modified history; the
+        // liveness information for the anti-dependence reg is now
+        // inconsistent. Set the state as if it were dead.
+        Classes[NewReg] = Classes[AntiDepReg];
+        DefIndices[NewReg] = DefIndices[AntiDepReg];
+        KillIndices[NewReg] = KillIndices[AntiDepReg];
+        assert(((KillIndices[NewReg] == ~0u) !=
+                (DefIndices[NewReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for NewReg!");
+
+        Classes[AntiDepReg] = 0;
+        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+        KillIndices[AntiDepReg] = ~0u;
+        assert(((KillIndices[AntiDepReg] == ~0u) !=
+                (DefIndices[AntiDepReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for AntiDepReg!");
+
+        RegRefs.erase(AntiDepReg);
+        LastNewReg[AntiDepReg] = NewReg;
+        ++Broken;
+      }
+    }
+
+    ScanInstruction(MI, Count);
+  }
+
+  return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 000000000000..07107802972d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,110 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+  class CriticalAntiDepBreaker : public AntiDepBreaker {
+    MachineFunction& MF;
+    MachineRegisterInfo &MRI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const RegisterClassInfo &RegClassInfo;
+
+    /// AllocatableSet - The set of allocatable registers.
+    /// We'll be ignoring anti-dependencies on non-allocatable registers,
+    /// because they may not be safe to break.
+    const BitVector AllocatableSet;
+
+    /// Classes - For live regs that are only used in one register class in a
+    /// live range, the register class. If the register is not live, the
+    /// corresponding value is null. If the register is live but used in
+    /// multiple register classes, the corresponding value is -1 casted to a
+    /// pointer.
+    std::vector<const TargetRegisterClass*> Classes;
+
+    /// RegRefs - Map registers to all their references within a live range.
+    std::multimap<unsigned, MachineOperand *> RegRefs;
+    typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+      RegRefIter;
+
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    std::vector<unsigned> KillIndices;
+
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
+    std::vector<unsigned> DefIndices;
+
+    /// KeepRegs - A set of registers which are live and cannot be changed to
+    /// break anti-dependencies.
+    SmallSet<unsigned, 4> KeepRegs;
+
+  public:
+    CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
+    ~CriticalAntiDepBreaker();
+
+    /// Start - Initialize anti-dep breaking for a new basic block.
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
+    /// of the ScheduleDAG and break them by renaming registers.
+    ///
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
+                                   unsigned InsertPosIndex,
+                                   DbgValueVector &DbgValues);
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+    /// Finish - Finish anti-dep breaking for a basic block.
+    void FinishBlock();
+
+  private:
+    void PrescanInstruction(MachineInstr *MI);
+    void ScanInstruction(MachineInstr *MI, unsigned Count);
+    bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                 RegRefIter RegRefEnd,
+                                 unsigned NewReg);
+    unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                      RegRefIter RegRefEnd,
+                                      unsigned AntiDepReg,
+                                      unsigned LastNewReg,
+                                      const TargetRegisterClass *RC);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 000000000000..6de6c0cb81bd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,201 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-dce"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+
+namespace {
+  class DeadMachineInstructionElim : public MachineFunctionPass {
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    
+    const TargetRegisterInfo *TRI;
+    const MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    BitVector LivePhysRegs;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+     initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    bool isDead(const MachineInstr *MI) const;
+  };
+}
+char DeadMachineInstructionElim::ID = 0;
+
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+                "Remove dead machine instructions", false, false)
+
+FunctionPass *llvm::createDeadMachineInstructionElimPass() {
+  return new DeadMachineInstructionElim();
+}
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI->isInlineAsm())
+    return false;
+
+  // Don't delete instructions with side effects.
+  bool SawStore = false;
+  if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
+    return false;
+
+  // Examine each operand.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
+          LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
+        // This def has a non-debug use. Don't delete the instruction!
+        return false;
+      }
+    }
+  }
+
+  // If there are no defs with uses, the instruction is dead.
+  return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+  bool AnyChanges = false;
+  MRI = &MF.getRegInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+
+  // Treat reserved registers as always live.
+  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+  // Loop over all instructions in all blocks, from bottom to top, so that it's
+  // more likely that chains of dependent but ultimately dead instructions will
+  // be cleaned up.
+  for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+       I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+
+    // Start out assuming that reserved registers are live out of this block.
+    LivePhysRegs = ReservedRegs;
+
+    // Also add any explicit live-out physregs for this block.
+    if (!MBB->empty() && MBB->back().getDesc().isReturn())
+      for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
+           LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
+        unsigned Reg = *LOI;
+        if (TargetRegisterInfo::isPhysicalRegister(Reg))
+          LivePhysRegs.set(Reg);
+      }
+
+    // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+    // live across blocks, but some targets (x86) can have flags live out of a
+    // block.
+    for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+           E = MBB->succ_end(); S != E; S++)
+      for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
+           LI != (*S)->livein_end(); LI++)
+        LivePhysRegs.set(*LI);
+
+    // Now scan the instructions and delete dead ones, tracking physreg
+    // liveness as we go.
+    for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+         MIE = MBB->rend(); MII != MIE; ) {
+      MachineInstr *MI = &*MII;
+
+      // If the instruction is dead, delete it!
+      if (isDead(MI)) {
+        DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+        // It is possible that some DBG_VALUE instructions refer to this
+        // instruction.  Examine each def operand for such references;
+        // if found, mark the DBG_VALUE as undef (but don't delete it).
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(Reg))
+            continue;
+          MachineRegisterInfo::use_iterator nextI;
+          for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+               E = MRI->use_end(); I!=E; I=nextI) {
+            nextI = llvm::next(I);  // I is invalidated by the setReg
+            MachineOperand& Use = I.getOperand();
+            MachineInstr *UseMI = Use.getParent();
+            if (UseMI==MI)
+              continue;
+            assert(Use.isDebug());
+            UseMI->getOperand(0).setReg(0U);
+          }
+        }
+        AnyChanges = true;
+        MI->eraseFromParent();
+        ++NumDeletes;
+        MIE = MBB->rend();
+        // MII is now pointing to the next instruction to process,
+        // so don't increment it.
+        continue;
+      }
+
+      // Record the physreg defs.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.isDef()) {
+          unsigned Reg = MO.getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+            LivePhysRegs.reset(Reg);
+            // Check the subreg set, not the alias set, because a def
+            // of a super-register may still be partially live after
+            // this def.
+            for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+                 *SubRegs; ++SubRegs)
+              LivePhysRegs.reset(*SubRegs);
+          }
+        }
+      }
+      // Record the physreg uses, after the defs, in case a physreg is
+      // both defined and used in the same instruction.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.isUse()) {
+          unsigned Reg = MO.getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+            LivePhysRegs.set(Reg);
+            for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+                 *AliasSet; ++AliasSet)
+              LivePhysRegs.set(*AliasSet);
+          }
+        }
+      }
+
+      // We didn't delete the current instruction, so increment MII to
+      // the next one.
+      ++MII;
+    }
+  }
+
+  LivePhysRegs.clear();
+  return AnyChanges;
+}
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 000000000000..03604b0a170f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,685 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumLandingPadsSplit,     "Number of landing pads split");
+STATISTIC(NumUnwindsLowered,       "Number of unwind instructions lowered");
+STATISTIC(NumResumesLowered,       "Number of eh.resume calls lowered");
+STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+
+namespace {
+  class DwarfEHPrepare : public FunctionPass {
+    const TargetMachine *TM;
+    const TargetLowering *TLI;
+
+    // The eh.exception intrinsic.
+    Function *ExceptionValueIntrinsic;
+
+    // The eh.selector intrinsic.
+    Function *SelectorIntrinsic;
+
+    // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
+    Constant *URoR;
+
+    // The EH language-specific catch-all type.
+    GlobalVariable *EHCatchAllValue;
+
+    // _Unwind_Resume or the target equivalent.
+    Constant *RewindFunction;
+
+    // We both use and preserve dominator info.
+    DominatorTree *DT;
+
+    // The function we are running on.
+    Function *F;
+
+    // The landing pads for this function.
+    typedef SmallPtrSet<BasicBlock*, 8> BBSet;
+    BBSet LandingPads;
+
+    bool NormalizeLandingPads();
+    bool LowerUnwindsAndResumes();
+    bool MoveExceptionValueCalls();
+
+    Instruction *CreateExceptionValueCall(BasicBlock *BB);
+
+    /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
+    /// use the "llvm.eh.catch.all.value" call need to convert to using its
+    /// initializer instead.
+    bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+    bool HasCatchAllInSelector(IntrinsicInst *);
+
+    /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+    void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+                                 SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
+
+    /// FindAllURoRInvokes - Find all URoR invokes in the function.
+    void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
+
+    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+    /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+    /// a landing pad within the current function. This is a candidate to merge
+    /// the selector associated with the URoR invoke with the one from the
+    /// URoR's landing pad.
+    bool HandleURoRInvokes();
+
+    /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
+    /// with the eh.exception call. This recursively looks past instructions
+    /// which don't change the EH pointer value, like casts or PHI nodes.
+    bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+                             SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+                             SmallPtrSet<PHINode*, 32> &SeenPHIs);
+      
+  public:
+    static char ID; // Pass identification, replacement for typeid.
+    DwarfEHPrepare(const TargetMachine *tm) :
+      FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
+      ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
+      URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+        initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnFunction(Function &Fn);
+
+    // getAnalysisUsage - We need the dominator tree for handling URoR.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<DominatorTree>();
+    }
+
+    const char *getPassName() const {
+      return "Exception handling preparation";
+    }
+
+  };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+  return new DwarfEHPrepare(tm);
+}
+
+/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
+/// catch-all.
+bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
+  if (!EHCatchAllValue) return false;
+
+  unsigned ArgIdx = II->getNumArgOperands() - 1;
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
+  return GV == EHCatchAllValue;
+}
+
+/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+void DwarfEHPrepare::
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+                        SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
+  for (Value::use_iterator
+         I = SelectorIntrinsic->use_begin(),
+         E = SelectorIntrinsic->use_end(); I != E; ++I) {
+    IntrinsicInst *II = cast<IntrinsicInst>(*I);
+
+    if (II->getParent()->getParent() != F)
+      continue;
+
+    if (!HasCatchAllInSelector(II))
+      Sels.insert(II);
+    else
+      CatchAllSels.insert(II);
+  }
+}
+
+/// FindAllURoRInvokes - Find all URoR invokes in the function.
+void DwarfEHPrepare::
+FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
+  for (Value::use_iterator
+         I = URoR->use_begin(),
+         E = URoR->use_end(); I != E; ++I) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
+      URoRInvokes.insert(II);
+  }
+}
+
+/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
+/// the "llvm.eh.catch.all.value" call need to convert to using its
+/// initializer instead.
+bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+  if (!EHCatchAllValue) return false;
+
+  if (!SelectorIntrinsic) {
+    SelectorIntrinsic =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+    if (!SelectorIntrinsic) return false;
+  }
+
+  bool Changed = false;
+  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+         I = Sels.begin(), E = Sels.end(); I != E; ++I) {
+    IntrinsicInst *Sel = *I;
+
+    // Index of the "llvm.eh.catch.all.value" variable.
+    unsigned OpIdx = Sel->getNumArgOperands() - 1;
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
+    if (GV != EHCatchAllValue) continue;
+    Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// FindSelectorAndURoR - Find the eh.selector call associated with the
+/// eh.exception call. And indicate if there is a URoR "invoke" associated with
+/// the eh.exception call. This recursively looks past instructions which don't
+/// change the EH pointer value, like casts or PHI nodes.
+bool
+DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+                                    SmallPtrSet<PHINode*, 32> &SeenPHIs) {
+  bool Changed = false;
+
+  for (Value::use_iterator
+         I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
+    Instruction *II = dyn_cast<Instruction>(*I);
+    if (!II || II->getParent()->getParent() != F) continue;
+    
+    if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
+      if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
+        SelCalls.insert(Sel);
+    } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
+      if (Invoke->getCalledFunction() == URoR)
+        URoRInvoke = true;
+    } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
+      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
+    } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
+      if (SeenPHIs.insert(PN))
+        // Don't process a PHI node more than once.
+        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
+    }
+  }
+
+  return Changed;
+}
+
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
+bool DwarfEHPrepare::HandleURoRInvokes() {
+  if (!EHCatchAllValue) {
+    EHCatchAllValue =
+      F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
+    if (!EHCatchAllValue) return false;
+  }
+
+  if (!SelectorIntrinsic) {
+    SelectorIntrinsic =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+    if (!SelectorIntrinsic) return false;
+  }
+
+  SmallPtrSet<IntrinsicInst*, 32> Sels;
+  SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
+  FindAllCleanupSelectors(Sels, CatchAllSels);
+
+  if (!URoR) {
+    URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
+    if (!URoR) return CleanupSelectors(CatchAllSels);
+  }
+
+  SmallPtrSet<InvokeInst*, 32> URoRInvokes;
+  FindAllURoRInvokes(URoRInvokes);
+
+  SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
+
+  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+         SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
+    const BasicBlock *SelBB = (*SI)->getParent();
+    for (SmallPtrSet<InvokeInst*, 32>::iterator
+           UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
+      const BasicBlock *URoRBB = (*UI)->getParent();
+      if (DT->dominates(SelBB, URoRBB)) {
+        SelsToConvert.insert(*SI);
+        break;
+      }
+    }
+  }
+
+  bool Changed = false;
+
+  if (Sels.size() != SelsToConvert.size()) {
+    // If we haven't been able to convert all of the clean-up selectors, then
+    // loop through the slow way to see if they still need to be converted.
+    if (!ExceptionValueIntrinsic) {
+      ExceptionValueIntrinsic =
+        Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
+      if (!ExceptionValueIntrinsic)
+        return CleanupSelectors(CatchAllSels);
+    }
+
+    for (Value::use_iterator
+           I = ExceptionValueIntrinsic->use_begin(),
+           E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
+      IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
+      if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
+
+      bool URoRInvoke = false;
+      SmallPtrSet<IntrinsicInst*, 8> SelCalls;
+      SmallPtrSet<PHINode*, 32> SeenPHIs;
+      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
+
+      if (URoRInvoke) {
+        // This EH pointer is being used by an invoke of an URoR instruction and
+        // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
+        // need to convert it to a 'catch-all'.
+        for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+               SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
+          if (!HasCatchAllInSelector(*SI))
+              SelsToConvert.insert(*SI);
+      }
+    }
+  }
+
+  if (!SelsToConvert.empty()) {
+    // Convert all clean-up eh.selectors, which are associated with "invokes" of
+    // URoR calls, into catch-all eh.selectors.
+    Changed = true;
+
+    for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+           SI = SelsToConvert.begin(), SE = SelsToConvert.end();
+         SI != SE; ++SI) {
+      IntrinsicInst *II = *SI;
+
+      // Use the exception object pointer and the personality function
+      // from the original selector.
+      CallSite CS(II);
+      IntrinsicInst::op_iterator I = CS.arg_begin();
+      IntrinsicInst::op_iterator E = CS.arg_end();
+      IntrinsicInst::op_iterator B = prior(E);
+
+      // Exclude last argument if it is an integer.
+      if (isa<ConstantInt>(B)) E = B;
+
+      // Add exception object pointer (front).
+      // Add personality function (next).
+      // Add in any filter IDs (rest).
+      SmallVector<Value*, 8> Args(I, E);
+
+      Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
+
+      CallInst *NewSelector =
+        CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
+
+      NewSelector->setTailCall(II->isTailCall());
+      NewSelector->setAttributes(II->getAttributes());
+      NewSelector->setCallingConv(II->getCallingConv());
+
+      II->replaceAllUsesWith(NewSelector);
+      II->eraseFromParent();
+    }
+  }
+
+  Changed |= CleanupSelectors(CatchAllSels);
+  return Changed;
+}
+
+/// NormalizeLandingPads - Normalize and discover landing pads, noting them
+/// in the LandingPads set.  A landing pad is normal if the only CFG edges
+/// that end at it are unwind edges from invoke instructions. If we inlined
+/// through an invoke we could have a normal branch from the previous
+/// unwind block through to the landing pad for the original invoke.
+/// Abnormal landing pads are fixed up by redirecting all unwind edges to
+/// a new basic block which falls through to the original.
+bool DwarfEHPrepare::NormalizeLandingPads() {
+  bool Changed = false;
+
+  const MCAsmInfo *MAI = TM->getMCAsmInfo();
+  bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    TerminatorInst *TI = I->getTerminator();
+    if (!isa<InvokeInst>(TI))
+      continue;
+    BasicBlock *LPad = TI->getSuccessor(1);
+    // Skip landing pads that have already been normalized.
+    if (LandingPads.count(LPad))
+      continue;
+
+    // Check that only invoke unwind edges end at the landing pad.
+    bool OnlyUnwoundTo = true;
+    bool SwitchOK = usingSjLjEH;
+    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
+         PI != PE; ++PI) {
+      TerminatorInst *PT = (*PI)->getTerminator();
+      // The SjLj dispatch block uses a switch instruction. This is effectively
+      // an unwind edge, so we can disregard it here. There will only ever
+      // be one dispatch, however, so if there are multiple switches, one
+      // of them truly is a normal edge, not an unwind edge.
+      if (SwitchOK && isa<SwitchInst>(PT)) {
+        SwitchOK = false;
+        continue;
+      }
+      if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
+        OnlyUnwoundTo = false;
+        break;
+      }
+    }
+
+    if (OnlyUnwoundTo) {
+      // Only unwind edges lead to the landing pad.  Remember the landing pad.
+      LandingPads.insert(LPad);
+      continue;
+    }
+
+    // At least one normal edge ends at the landing pad.  Redirect the unwind
+    // edges to a new basic block which falls through into this one.
+
+    // Create the new basic block.
+    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
+                                           LPad->getName() + "_unwind_edge");
+
+    // Insert it into the function right before the original landing pad.
+    LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
+
+    // Redirect unwind edges from the original landing pad to NewBB.
+    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
+      TerminatorInst *PT = (*PI++)->getTerminator();
+      if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
+        // Unwind to the new block.
+        PT->setSuccessor(1, NewBB);
+    }
+
+    // If there are any PHI nodes in LPad, we need to update them so that they
+    // merge incoming values from NewBB instead.
+    for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
+      PHINode *PN = cast<PHINode>(II);
+      pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
+
+      // Check to see if all of the values coming in via unwind edges are the
+      // same.  If so, we don't need to create a new PHI node.
+      Value *InVal = PN->getIncomingValueForBlock(*PB);
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
+          InVal = 0;
+          break;
+        }
+      }
+
+      if (InVal == 0) {
+        // Different unwind edges have different values.  Create a new PHI node
+        // in NewBB.
+        PHINode *NewPN = PHINode::Create(PN->getType(),
+                                         PN->getNumIncomingValues(),
+                                         PN->getName()+".unwind", NewBB);
+        // Add an entry for each unwind edge, using the value from the old PHI.
+        for (pred_iterator PI = PB; PI != PE; ++PI)
+          NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
+
+        // Now use this new PHI as the common incoming value for NewBB in PN.
+        InVal = NewPN;
+      }
+
+      // Revector exactly one entry in the PHI node to come from NewBB
+      // and delete all other entries that come from unwind edges.  If
+      // there are both normal and unwind edges from the same predecessor,
+      // this leaves an entry for the normal edge.
+      for (pred_iterator PI = PB; PI != PE; ++PI)
+        PN->removeIncomingValue(*PI);
+      PN->addIncoming(InVal, NewBB);
+    }
+
+    // Add a fallthrough from NewBB to the original landing pad.
+    BranchInst::Create(LPad, NewBB);
+
+    // Now update DominatorTree analysis information.
+    DT->splitBlock(NewBB);
+
+    // Remember the newly constructed landing pad.  The original landing pad
+    // LPad is no longer a landing pad now that all unwind edges have been
+    // revectored to NewBB.
+    LandingPads.insert(NewBB);
+    ++NumLandingPadsSplit;
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
+/// rethrowing any previously caught exception.  This will crash horribly
+/// at runtime if there is no such exception: using unwind to throw a new
+/// exception is currently not supported.
+bool DwarfEHPrepare::LowerUnwindsAndResumes() {
+  SmallVector<Instruction*, 16> ResumeInsts;
+
+  for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
+    for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
+      if (isa<UnwindInst>(bi))
+        ResumeInsts.push_back(bi);
+      else if (CallInst *call = dyn_cast<CallInst>(bi))
+        if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
+          if (fn->getName() == "llvm.eh.resume")
+            ResumeInsts.push_back(bi);
+    }
+  }
+
+  if (ResumeInsts.empty()) return false;
+
+  // Find the rewind function if we didn't already.
+  if (!RewindFunction) {
+    LLVMContext &Ctx = ResumeInsts[0]->getContext();
+    FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+                                          Type::getInt8PtrTy(Ctx), false);
+    const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+  }
+
+  bool Changed = false;
+
+  for (SmallVectorImpl<Instruction*>::iterator
+         I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
+    Instruction *RI = *I;
+
+    // Replace the resuming instruction with a call to _Unwind_Resume (or the
+    // appropriate target equivalent).
+
+    llvm::Value *ExnValue;
+    if (isa<UnwindInst>(RI))
+      ExnValue = CreateExceptionValueCall(RI->getParent());
+    else
+      ExnValue = cast<CallInst>(RI)->getArgOperand(0);
+
+    // Create the call...
+    CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
+    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+    // ...followed by an UnreachableInst, if it was an unwind.
+    // Calls to llvm.eh.resume are typically already followed by this.
+    if (isa<UnwindInst>(RI))
+      new UnreachableInst(RI->getContext(), RI);
+
+    if (isa<UnwindInst>(RI))
+      ++NumUnwindsLowered;
+    else
+      ++NumResumesLowered;
+
+    // Nuke the resume instruction.
+    RI->eraseFromParent();
+
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
+/// landing pads by replacing calls outside of landing pads with direct use of
+/// a register holding the appropriate value; this requires adding calls inside
+/// all landing pads to initialize the register.  Also, move eh.exception calls
+/// inside landing pads to the start of the landing pad (optional, but may make
+/// things simpler for later passes).
+bool DwarfEHPrepare::MoveExceptionValueCalls() {
+  // If the eh.exception intrinsic is not declared in the module then there is
+  // nothing to do.  Speed up compilation by checking for this common case.
+  if (!ExceptionValueIntrinsic &&
+      !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
+    return false;
+
+  bool Changed = false;
+
+  // Move calls to eh.exception that are inside a landing pad to the start of
+  // the landing pad.
+  for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
+       LI != LE; ++LI) {
+    BasicBlock *LP = *LI;
+    for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
+         II != IE;)
+      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+        // Found a call to eh.exception.
+        if (!EI->use_empty()) {
+          // If there is already a call to eh.exception at the start of the
+          // landing pad, then get hold of it; otherwise create such a call.
+          Value *CallAtStart = CreateExceptionValueCall(LP);
+
+          // If the call was at the start of a landing pad then leave it alone.
+          if (EI == CallAtStart)
+            continue;
+          EI->replaceAllUsesWith(CallAtStart);
+        }
+        EI->eraseFromParent();
+        ++NumExceptionValuesMoved;
+        Changed = true;
+      }
+  }
+
+  // Look for calls to eh.exception that are not in a landing pad.  If one is
+  // found, then a register that holds the exception value will be created in
+  // each landing pad, and the SSAUpdater will be used to compute the values
+  // returned by eh.exception calls outside of landing pads.
+  SSAUpdater SSA;
+
+  // Remember where we found the eh.exception call, to avoid rescanning earlier
+  // basic blocks which we already know contain no eh.exception calls.
+  bool FoundCallOutsideLandingPad = false;
+  Function::iterator BB = F->begin();
+  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+    // Skip over landing pads.
+    if (LandingPads.count(BB))
+      continue;
+
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE; ++II)
+      if (isa<EHExceptionInst>(II)) {
+        SSA.Initialize(II->getType(), II->getName());
+        FoundCallOutsideLandingPad = true;
+        break;
+      }
+
+    if (FoundCallOutsideLandingPad)
+      break;
+  }
+
+  // If all calls to eh.exception are in landing pads then we are done.
+  if (!FoundCallOutsideLandingPad)
+    return Changed;
+
+  // Add a call to eh.exception at the start of each landing pad, and tell the
+  // SSAUpdater that this is the value produced by the landing pad.
+  for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+       LI != LE; ++LI)
+    SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
+
+  // Now turn all calls to eh.exception that are not in a landing pad into a use
+  // of the appropriate register.
+  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+    // Skip over landing pads.
+    if (LandingPads.count(BB))
+      continue;
+
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE;)
+      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+        // Found a call to eh.exception, replace it with the value from any
+        // upstream landing pad(s).
+        EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
+        EI->eraseFromParent();
+        ++NumExceptionValuesMoved;
+      }
+  }
+
+  return true;
+}
+
+/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
+/// the start of the basic block (unless there already is one, in which case
+/// the existing call is returned).
+Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
+  Instruction *Start = BB->getFirstNonPHIOrDbg();
+  // Is this a call to eh.exception?
+  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
+    if (CI->getIntrinsicID() == Intrinsic::eh_exception)
+      // Reuse the existing call.
+      return Start;
+
+  // Find the eh.exception intrinsic if we didn't already.
+  if (!ExceptionValueIntrinsic)
+    ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
+                                                       Intrinsic::eh_exception);
+
+  // Create the call.
+  return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+  bool Changed = false;
+
+  // Initialize internal state.
+  DT = &getAnalysis<DominatorTree>();
+  F = &Fn;
+
+  // Ensure that only unwind edges end at landing pads (a landing pad is a
+  // basic block where an invoke unwind edge ends).
+  Changed |= NormalizeLandingPads();
+
+  // Turn unwind instructions and eh.resume calls into libcalls.
+  Changed |= LowerUnwindsAndResumes();
+
+  // TODO: Move eh.selector calls to landing pads and combine them.
+
+  // Move eh.exception calls to landing pads.
+  Changed |= MoveExceptionValueCalls();
+
+  Changed |= HandleURoRInvokes();
+
+  LandingPads.clear();
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/ELF.h b/contrib/llvm/lib/CodeGen/ELF.h
new file mode 100644
index 000000000000..5b634682cc87
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELF.h
@@ -0,0 +1,227 @@
+//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
+// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ELF_H
+#define CODEGEN_ELF_H
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  class GlobalValue;
+
+  /// ELFSym - This struct contains information about each symbol that is
+  /// added to logical symbol table for the module.  This is eventually
+  /// turned into a real symbol table in the file.
+  struct ELFSym {
+
+    // ELF symbols are related to llvm ones by being one of the two llvm
+    // types, for the other ones (section, file, func) a null pointer is
+    // assumed by default.
+    union {
+      const GlobalValue *GV;  // If this is a pointer to a GV
+      const char *Ext;        // If this is a pointer to a named symbol
+    } Source;
+
+    // Describes from which source type this ELF symbol comes from,
+    // they can be GlobalValue, ExternalSymbol or neither.
+    enum {
+      isGV,      // The Source.GV field is valid.
+      isExtSym,  // The Source.ExtSym field is valid.
+      isOther    // Not a GlobalValue or External Symbol
+    };
+    unsigned SourceType;
+
+    bool isGlobalValue() const { return SourceType == isGV; }
+    bool isExternalSym() const { return SourceType == isExtSym; }
+
+    // getGlobalValue - If this is a global value which originated the
+    // elf symbol, return a reference to it.
+    const GlobalValue *getGlobalValue() const {
+      assert(SourceType == isGV && "This is not a global value");
+      return Source.GV;
+    }
+
+    // getExternalSym - If this is an external symbol which originated the
+    // elf symbol, return a reference to it.
+    const char *getExternalSymbol() const {
+      assert(SourceType == isExtSym && "This is not an external symbol");
+      return Source.Ext;
+    }
+
+    // getGV - From a global value return a elf symbol to represent it
+    static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
+                         unsigned Type, unsigned Visibility) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(Type);
+      Sym->setVisibility(Visibility);
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // getExtSym - Create and return an elf symbol to represent an
+    // external symbol
+    static ELFSym *getExtSym(const char *Ext) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.Ext = Ext;
+      Sym->setBind(ELF::STB_GLOBAL);
+      Sym->setType(ELF::STT_NOTYPE);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SourceType = isExtSym;
+      return Sym;
+    }
+
+    // getSectionSym - Returns a elf symbol to represent an elf section
+    static ELFSym *getSectionSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(ELF::STB_LOCAL);
+      Sym->setType(ELF::STT_SECTION);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getFileSym - Returns a elf symbol to represent the module identifier
+    static ELFSym *getFileSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(ELF::STB_LOCAL);
+      Sym->setType(ELF::STT_FILE);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SectionIdx = 0xfff1;  // ELFSection::SHN_ABS;
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getUndefGV - Returns a STT_NOTYPE symbol
+    static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(ELF::STT_NOTYPE);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SectionIdx = 0;  //ELFSection::SHN_UNDEF;
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // ELF specific fields
+    unsigned NameIdx;         // Index in .strtab of name, once emitted.
+    uint64_t Value;
+    unsigned Size;
+    uint8_t Info;
+    uint8_t Other;
+    unsigned short SectionIdx;
+
+    // Symbol index into the Symbol table
+    unsigned SymTabIdx;
+
+    ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
+               Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
+               SymTabIdx(0) {}
+
+    unsigned getBind() const { return (Info >> 4) & 0xf; }
+    unsigned getType() const { return Info & 0xf; }
+    bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
+    bool isFileType() const { return getType() == ELF::STT_FILE; }
+
+    void setBind(unsigned X) {
+      assert(X == (X & 0xF) && "Bind value out of range!");
+      Info = (Info & 0x0F) | (X << 4);
+    }
+
+    void setType(unsigned X) {
+      assert(X == (X & 0xF) && "Type value out of range!");
+      Info = (Info & 0xF0) | X;
+    }
+
+    void setVisibility(unsigned V) {
+      assert(V == (V & 0x3) && "Visibility value out of range!");
+      Other = V;
+    }
+  };
+
+  /// ELFSection - This struct contains information about each section that is
+  /// emitted to the file.  This is eventually turned into the section header
+  /// table at the end of the file.
+  class ELFSection : public BinaryObject {
+    public:
+    // ELF specific fields
+    unsigned NameIdx;   // sh_name - .shstrtab idx of name, once emitted.
+    unsigned Type;      // sh_type - Section contents & semantics 
+    unsigned Flags;     // sh_flags - Section flags.
+    uint64_t Addr;      // sh_addr - The mem addr this section is in.
+    unsigned Offset;    // sh_offset - Offset from the file start
+    unsigned Size;      // sh_size - The section size.
+    unsigned Link;      // sh_link - Section header table index link.
+    unsigned Info;      // sh_info - Auxiliary information.
+    unsigned Align;     // sh_addralign - Alignment of section.
+    unsigned EntSize;   // sh_entsize - Size of entries in the section e
+
+    /// SectionIdx - The number of the section in the Section Table.
+    unsigned short SectionIdx;
+
+    /// Sym - The symbol to represent this section if it has one.
+    ELFSym *Sym;
+
+    /// getSymIndex - Returns the symbol table index of the symbol
+    /// representing this section.
+    unsigned getSymbolTableIndex() const {
+      assert(Sym && "section not present in the symbol table");
+      return Sym->SymTabIdx;
+    }
+
+    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
+  };
+
+  /// ELFRelocation - This class contains all the information necessary to
+  /// to generate any 32-bit or 64-bit ELF relocation entry.
+  class ELFRelocation {
+    uint64_t r_offset;    // offset in the section of the object this applies to
+    uint32_t r_symidx;    // symbol table index of the symbol to use
+    uint32_t r_type;      // machine specific relocation type
+    int64_t  r_add;       // explicit relocation addend
+    bool     r_rela;      // if true then the addend is part of the entry
+                          // otherwise the addend is at the location specified
+                          // by r_offset
+  public:
+    uint64_t getInfo(bool is64Bit) const {
+      if (is64Bit)
+        return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
+      else
+        return (r_symidx << 8)  + (r_type & 0xFFL);
+    }
+
+    uint64_t getOffset() const { return r_offset; }
+    int64_t getAddend() const { return r_add; }
+
+    ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
+                  bool rela = true, int64_t addend = 0) :
+      r_offset(off), r_symidx(sym), r_type(type),
+      r_add(addend), r_rela(rela) {}
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp
new file mode 100644
index 000000000000..3fb087c5ea8b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp
@@ -0,0 +1,205 @@
+//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfce"
+
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//===----------------------------------------------------------------------===//
+//                       ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "processing function: "
+        << MF.getFunction()->getName() << "\n");
+
+  // Get the ELF Section that this function belongs in.
+  ES = &EW.getTextSection(MF.getFunction());
+
+  // Set the desired binary object to be used by the code emitters
+  setBinaryObject(ES);
+
+  // Get the function alignment in bytes
+  unsigned Align = (1 << MF.getAlignment());
+
+  // The function must start on its required alignment
+  ES->emitAlignment(Align);
+
+  // Update the section alignment if needed.
+  ES->Align = std::max(ES->Align, Align);
+
+  // Record the function start offset
+  FnStartOff = ES->getCurrentPCOffset();
+
+  // Emit constant pool and jump tables to their appropriate sections.
+  // They need to be emitted before the function because in some targets
+  // the later may reference JT or CP entry address.
+  emitConstantPool(MF.getConstantPool());
+  if (MF.getJumpTableInfo())
+    emitJumpTables(MF.getJumpTableInfo());
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
+  // Add a symbol to represent the function.
+  const Function *F = MF.getFunction();
+  ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
+                                EW.getGlobalELFVisibility(F));
+  FnSym->SectionIdx = ES->SectionIdx;
+  FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
+  EW.AddPendingGlobalSymbol(F, true);
+
+  // Offset from start of Section
+  FnSym->Value = FnStartOff;
+
+  if (!F->hasPrivateLinkage())
+    EW.SymbolList.push_back(FnSym);
+
+  // Patch up Jump Table Section relocations to use the real MBBs offsets
+  // now that the MBB label offsets inside the function are known.
+  if (MF.getJumpTableInfo()) {
+    ELFSection &JTSection = EW.getJumpTableSection();
+    for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
+         MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
+      MachineRelocation &MR = *MRI;
+      uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+      MR.setResultPointer((void*)MBBOffset);
+      MR.setConstantVal(ES->SectionIdx);
+      JTSection.addRelocation(MR);
+    }
+  }
+
+  // If we have emitted any relocations to function-specific objects such as
+  // basic blocks, constant pools entries, or jump tables, record their
+  // addresses now so that we can rewrite them with the correct addresses later
+  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+    MachineRelocation &MR = Relocations[i];
+    intptr_t Addr;
+    if (MR.isGlobalValue()) {
+      EW.AddPendingGlobalSymbol(MR.getGlobalValue());
+    } else if (MR.isExternalSymbol()) {
+      EW.AddPendingExternalSymbol(MR.getExternalSymbol());
+    } else if (MR.isBasicBlock()) {
+      Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+      MR.setConstantVal(ES->SectionIdx);
+      MR.setResultPointer((void*)Addr);
+    } else if (MR.isConstantPoolIndex()) {
+      Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+      MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+      MR.setResultPointer((void*)Addr);
+    } else if (MR.isJumpTableIndex()) {
+      ELFSection &JTSection = EW.getJumpTableSection();
+      Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+      MR.setConstantVal(JTSection.SectionIdx);
+      MR.setResultPointer((void*)Addr);
+    } else {
+      llvm_unreachable("Unhandled relocation type");
+    }
+    ES->addRelocation(MR);
+  }
+
+  // Clear per-function data structures.
+  JTRelocations.clear();
+  Relocations.clear();
+  CPLocations.clear();
+  CPSections.clear();
+  JTLocations.clear();
+  MBBLocations.clear();
+  return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in and emit the constant
+void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // TODO: handle PIC codegen
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "PIC codegen not yet handled for elf constant pools!");
+
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = CP[i];
+
+    // Record the constant pool location and the section index
+    ELFSection &CstPool = EW.getConstantPoolSection(CPE);
+    CPLocations.push_back(CstPool.size());
+    CPSections.push_back(CstPool.SectionIdx);
+
+    if (CPE.isMachineConstantPoolEntry())
+      assert("CPE.isMachineConstantPoolEntry not supported yet");
+
+    // Emit the constant to constant pool section
+    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
+  }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  // FIXME: handle PIC codegen
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "PIC codegen not yet handled for elf jump tables!");
+
+  const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
+  unsigned EntrySize = 4; //MJTI->getEntrySize();
+
+  // Get the ELF Section to emit the jump table
+  ELFSection &JTSection = EW.getJumpTableSection();
+
+  // For each JT, record its offset from the start of the section
+  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+    const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+
+    // Record JT 'i' offset in the JT section
+    JTLocations.push_back(JTSection.size());
+
+    // Each MBB entry in the Jump table section has a relocation entry
+    // against the current text section.
+    for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+      unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
+      MachineRelocation MR =
+        MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
+
+      // Add the relocation to the Jump Table section
+      JTRelocations.push_back(MR);
+
+      // Output placeholder for MBB in the JT section
+      for (unsigned s=0; s < EntrySize; ++s)
+        JTSection.emitByte(0);
+    }
+  }
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h
new file mode 100644
index 000000000000..2ec1f6e873d6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h
@@ -0,0 +1,78 @@
+//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFCODEEMITTER_H
+#define ELFCODEEMITTER_H
+
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include <vector>
+
+namespace llvm {
+  class ELFWriter;
+  class ELFSection;
+
+  /// ELFCodeEmitter - This class is used by the ELFWriter to 
+  /// emit the code for functions to the ELF file.
+  class ELFCodeEmitter : public ObjectCodeEmitter {
+    ELFWriter &EW;
+
+    /// Target machine description
+    TargetMachine &TM;
+
+    /// Section containing code for functions
+    ELFSection *ES;
+
+    /// Relocations - Record relocations needed by the current function 
+    std::vector<MachineRelocation> Relocations;
+
+    /// JTRelocations - Record relocations needed by the relocation
+    /// section.
+    std::vector<MachineRelocation> JTRelocations;
+
+    /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
+    uintptr_t FnStartOff;
+  public:
+    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
+
+    /// addRelocation - Register new relocations for this function
+    void addRelocation(const MachineRelocation &MR) {
+      Relocations.push_back(MR);
+    }
+
+    /// emitConstantPool - For each constant pool entry, figure out which
+    /// section the constant should live in and emit data to it
+    void emitConstantPool(MachineConstantPool *MCP);
+
+    /// emitJumpTables - Emit all the jump tables for a given jump table
+    /// info and record them to the appropriate section.
+    void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+    void startFunction(MachineFunction &F);
+    bool finishFunction(MachineFunction &F);
+
+    /// emitLabel - Emits a label
+    virtual void emitLabel(MCSymbol *Label) {
+      assert("emitLabel not implemented");
+    }
+
+    /// getLabelAddress - Return the address of the specified LabelID, 
+    /// only usable after the LabelID has been emitted.
+    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+      assert("getLabelAddress not implemented");
+      return 0;
+    }
+
+    virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
+
+};  // end class ELFCodeEmitter
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.cpp b/contrib/llvm/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 000000000000..d977651c32f7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,1105 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer.  This file writes out
+// the ELF file in the following order:
+//
+//  #1. ELF Header
+//  #2. '.text' section
+//  #3. '.data' section
+//  #4. '.bss' section  (conceptual position in file)
+//  ...
+//  #X. '.shstrtab' section
+//  #Y. Section Table
+//
+// The entries in the section table are laid out as:
+//  #0. Null entry [required]
+//  #1. ".text" entry - the program code
+//  #2. ".data" entry - global variables with initializers.     [ if needed ]
+//  #3. ".bss" entry  - global variables without initializers.  [ if needed ]
+//  ...
+//  #N. ".shstrtab" entry - String table for the section names.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfwriter"
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                          ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
+  : MachineFunctionPass(ID), O(o), TM(tm),
+    OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
+    TLOF(TM.getTargetLowering()->getObjFileLowering()),
+    is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
+    isLittleEndian(TM.getTargetData()->isLittleEndian()),
+    ElfHdr(isLittleEndian, is64Bit) {
+
+  MAI = TM.getMCAsmInfo();
+  TEW = TM.getELFWriterInfo();
+
+  // Create the object code emitter object for this target.
+  ElfCE = new ELFCodeEmitter(*this);
+
+  // Initial number of sections
+  NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+  delete ElfCE;
+  delete &OutContext;
+
+  while(!SymbolList.empty()) {
+    delete SymbolList.back(); 
+    SymbolList.pop_back();
+  }
+
+  while(!PrivateSyms.empty()) {
+    delete PrivateSyms.back(); 
+    PrivateSyms.pop_back();
+  }
+
+  while(!SectionList.empty()) {
+    delete SectionList.back(); 
+    SectionList.pop_back();
+  }
+
+  // Release the name mangler object.
+  delete Mang; Mang = 0;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
+  
+  Mang = new Mangler(OutContext, *TM.getTargetData());
+
+  // ELF Header
+  // ----------
+  // Fields e_shnum e_shstrndx are only known after all section have
+  // been emitted. They locations in the ouput buffer are recorded so
+  // to be patched up later.
+  //
+  // Note
+  // ----
+  // emitWord method behaves differently for ELF32 and ELF64, writing
+  // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
+
+  ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
+  ElfHdr.emitByte('E');  // e_ident[EI_MAG1]
+  ElfHdr.emitByte('L');  // e_ident[EI_MAG2]
+  ElfHdr.emitByte('F');  // e_ident[EI_MAG3]
+
+  ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
+  ElfHdr.emitByte(TEW->getEIData());  // e_ident[EI_DATA]
+  ElfHdr.emitByte(ELF::EV_CURRENT);   // e_ident[EI_VERSION]
+  ElfHdr.emitAlignment(16);           // e_ident[EI_NIDENT-EI_PAD]
+
+  ElfHdr.emitWord16(ELF::ET_REL);        // e_type
+  ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
+  ElfHdr.emitWord32(ELF::EV_CURRENT);    // e_version
+  ElfHdr.emitWord(0);                    // e_entry, no entry point in .o file
+  ElfHdr.emitWord(0);                    // e_phoff, no program header for .o
+  ELFHdr_e_shoff_Offset = ElfHdr.size();
+  ElfHdr.emitWord(0);                    // e_shoff = sec hdr table off in bytes
+  ElfHdr.emitWord32(TEW->getEFlags());   // e_flags = whatever the target wants
+  ElfHdr.emitWord16(TEW->getHdrSize());  // e_ehsize = ELF header size
+  ElfHdr.emitWord16(0);                  // e_phentsize = prog header entry size
+  ElfHdr.emitWord16(0);                  // e_phnum = # prog header entries = 0
+
+  // e_shentsize = Section header entry size
+  ElfHdr.emitWord16(TEW->getSHdrSize());
+
+  // e_shnum     = # of section header ents
+  ELFHdr_e_shnum_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder
+
+  // e_shstrndx  = Section # of '.shstrtab'
+  ELFHdr_e_shstrndx_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder
+
+  // Add the null section, which is required to be first in the file.
+  getNullSection();
+
+  // The first entry in the symtab is the null symbol and the second
+  // is a local symbol containing the module/file name
+  SymbolList.push_back(new ELFSym());
+  SymbolList.push_back(ELFSym::getFileSym());
+
+  return false;
+}
+
+// AddPendingGlobalSymbol - Add a global to be processed and to
+// the global symbol lookup, use a zero index because the table
+// index will be determined later.
+void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                       bool AddToLookup /* = false */) {
+  PendingGlobals.insert(GV);
+  if (AddToLookup) 
+    GblSymLookup[GV] = 0;
+}
+
+// AddPendingExternalSymbol - Add the external to be processed
+// and to the external symbol lookup, use a zero index because
+// the symbol table index will be determined later.
+void ELFWriter::AddPendingExternalSymbol(const char *External) {
+  PendingExternals.insert(External);
+  ExtSymLookup[External] = 0;
+}
+
+ELFSection &ELFWriter::getDataSection() {
+  const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
+  return getSection(Data->getSectionName(), Data->getType(), 
+                    Data->getFlags(), 4);
+}
+
+ELFSection &ELFWriter::getBSSSection() {
+  const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
+  return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
+}
+
+// getCtorSection - Get the static constructor section
+ELFSection &ELFWriter::getCtorSection() {
+  const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
+  return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); 
+}
+
+// getDtorSection - Get the static destructor section
+ELFSection &ELFWriter::getDtorSection() {
+  const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
+  return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
+}
+
+// getTextSection - Get the text section for the specified function
+ELFSection &ELFWriter::getTextSection(const Function *F) {
+  const MCSectionELF *Text = 
+    (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
+  return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
+}
+
+// getJumpTableSection - Get a read only section for constants when 
+// emitting jump tables. TODO: add PIC support
+ELFSection &ELFWriter::getJumpTableSection() {
+  const MCSectionELF *JT = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
+  return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
+                    TM.getTargetData()->getPointerABIAlignment());
+}
+
+// getConstantPoolSection - Get a constant pool section based on the machine 
+// constant pool entry type and relocation info.
+ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
+  SectionKind Kind;
+  switch (CPE.getRelocationInfo()) {
+  default: llvm_unreachable("Unknown section kind");
+  case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+  case 1:
+    Kind = SectionKind::getReadOnlyWithRelLocal();
+    break;
+  case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16(); break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+  }
+
+  const MCSectionELF *CPSect = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
+  return getSection(CPSect->getSectionName(), CPSect->getType(), 
+                    CPSect->getFlags(), CPE.getAlignment());
+}
+
+// getRelocSection - Return the relocation section of section 'S'. 'RelA' 
+// is true if the relocation section contains entries with addends.
+ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
+  unsigned SectionType = TEW->hasRelocationAddend() ?
+                ELF::SHT_RELA : ELF::SHT_REL;
+
+  std::string SectionName(".rel");
+  if (TEW->hasRelocationAddend())
+    SectionName.append("a");
+  SectionName.append(S.getName());
+
+  return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
+}
+
+// getGlobalELFVisibility - Returns the ELF specific visibility type
+unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
+  switch (GV->getVisibility()) {
+  default:
+    llvm_unreachable("unknown visibility type");
+  case GlobalValue::DefaultVisibility:
+    return ELF::STV_DEFAULT;
+  case GlobalValue::HiddenVisibility:
+    return ELF::STV_HIDDEN;
+  case GlobalValue::ProtectedVisibility:
+    return ELF::STV_PROTECTED;
+  }
+  return 0;
+}
+
+// getGlobalELFBinding - Returns the ELF specific binding type
+unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
+  if (GV->hasInternalLinkage())
+    return ELF::STB_LOCAL;
+
+  if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
+    return ELF::STB_WEAK;
+
+  return ELF::STB_GLOBAL;
+}
+
+// getGlobalELFType - Returns the ELF specific type for a global
+unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
+  if (GV->isDeclaration())
+    return ELF::STT_NOTYPE;
+
+  if (isa<Function>(GV))
+    return ELF::STT_FUNC;
+
+  return ELF::STT_OBJECT;
+}
+
+// IsELFUndefSym - True if the global value must be marked as a symbol
+// which points to a SHN_UNDEF section. This means that the symbol has
+// no definition on the module.
+static bool IsELFUndefSym(const GlobalValue *GV) {
+  return GV->isDeclaration() || (isa<Function>(GV));
+}
+
+// AddToSymbolList - Update the symbol lookup and If the symbol is 
+// private add it to PrivateSyms list, otherwise to SymbolList. 
+void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
+  assert(GblSym->isGlobalValue() && "Symbol must be a global value");
+
+  const GlobalValue *GV = GblSym->getGlobalValue(); 
+  if (GV->hasPrivateLinkage()) {
+    // For a private symbols, keep track of the index inside 
+    // the private list since it will never go to the symbol 
+    // table and won't be patched up later.
+    PrivateSyms.push_back(GblSym);
+    GblSymLookup[GV] = PrivateSyms.size()-1;
+  } else {
+    // Non private symbol are left with zero indices until 
+    // they are patched up during the symbol table emition 
+    // (where the indicies are created).
+    SymbolList.push_back(GblSym);
+    GblSymLookup[GV] = 0;
+  }
+}
+
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+  // FIXME: this is wrong, a common symbol can be in .data for example.
+  if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+    return true;
+
+  return false;
+}
+
+
+// EmitGlobal - Choose the right section for global and emit it
+void ELFWriter::EmitGlobal(const GlobalValue *GV) {
+
+  // Check if the referenced symbol is already emitted
+  if (GblSymLookup.find(GV) != GblSymLookup.end())
+    return;
+
+  // Handle ELF Bind, Visibility and Type for the current symbol
+  unsigned SymBind = getGlobalELFBinding(GV);
+  unsigned SymType = getGlobalELFType(GV);
+  bool IsUndefSym = IsELFUndefSym(GV);
+
+  ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
+    : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
+
+  if (!IsUndefSym) {
+    assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
+    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+    // Handle special llvm globals
+    if (EmitSpecialLLVMGlobal(GVar))
+      return;
+
+    // Get the ELF section where this global belongs from TLOF
+    const MCSectionELF *S = 
+      (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
+    ELFSection &ES = 
+      getSection(S->getSectionName(), S->getType(), S->getFlags());
+    SectionKind Kind = S->getKind();
+
+    // The symbol align should update the section alignment if needed
+    const TargetData *TD = TM.getTargetData();
+    unsigned Align = TD->getPreferredAlignment(GVar);
+    unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
+    GblSym->Size = Size;
+
+    if (HasCommonSymbols(*S)) { // Symbol must go to a common section
+      GblSym->SectionIdx = ELF::SHN_COMMON;
+
+      // A new linkonce section is created for each global in the
+      // common section, the default alignment is 1 and the symbol
+      // value contains its alignment.
+      ES.Align = 1;
+      GblSym->Value = Align;
+
+    } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // Update the size with alignment and the next object can
+      // start in the right offset in the section
+      if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
+      ES.Align = std::max(ES.Align, Align);
+
+      // GblSym->Value should contain the virtual offset inside the section.
+      // Virtual because the BSS space is not allocated on ELF objects
+      GblSym->Value = ES.Size;
+      ES.Size += Size;
+
+    } else { // The symbol must go to some kind of data section
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // GblSym->Value should contain the symbol offset inside the section,
+      // and all symbols should start on their required alignment boundary
+      ES.Align = std::max(ES.Align, Align);
+      ES.emitAlignment(Align);
+      GblSym->Value = ES.size();
+
+      // Emit the global to the data section 'ES'
+      EmitGlobalConstant(GVar->getInitializer(), ES);
+    }
+  }
+
+  AddToSymbolList(GblSym);
+}
+
+void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                         ELFSection &GblS) {
+
+  // Print the fields in successive locations. Pad to align if needed!
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CVS->getType());
+  const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+  uint64_t sizeSoFar = 0;
+  for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+    const Constant* field = CVS->getOperand(i);
+
+    // Check if padding is needed and insert one or more 0s.
+    uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+    uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+                        - cvsLayout->getElementOffset(i)) - fieldSize;
+    sizeSoFar += fieldSize + padSize;
+
+    // Now print the actual field value.
+    EmitGlobalConstant(field, GblS);
+
+    // Insert padding - this may include padding to increase the size of the
+    // current field up to the ABI size (if the struct is not packed) as well
+    // as padding to ensure that the next field starts at the right offset.
+    GblS.emitZeros(padSize);
+  }
+  assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+         "Layout of constant struct may be incorrect!");
+}
+
+void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CV->getType());
+
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+    for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstant(CVA->getOperand(i), GblS);
+    return;
+  } else if (isa<ConstantAggregateZero>(CV)) {
+    GblS.emitZeros(Size);
+    return;
+  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+    EmitGlobalConstantStruct(CVS, GblS);
+    return;
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    APInt Val = CFP->getValueAPF().bitcastToAPInt();
+    if (CFP->getType()->isDoubleTy())
+      GblS.emitWord64(Val.getZExtValue());
+    else if (CFP->getType()->isFloatTy())
+      GblS.emitWord32(Val.getZExtValue());
+    else if (CFP->getType()->isX86_FP80Ty()) {
+      unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
+                         TD->getTypeStoreSize(CFP->getType());
+      GblS.emitWordFP80(Val.getRawData(), PadSize);
+    } else if (CFP->getType()->isPPC_FP128Ty())
+      llvm_unreachable("PPC_FP128Ty global emission not implemented");
+    return;
+  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    if (Size == 1)
+      GblS.emitByte(CI->getZExtValue());
+    else if (Size == 2) 
+      GblS.emitWord16(CI->getZExtValue());
+    else if (Size == 4)
+      GblS.emitWord32(CI->getZExtValue());
+    else 
+      EmitGlobalConstantLargeInt(CI, GblS);
+    return;
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    const VectorType *PTy = CP->getType();
+    for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+      EmitGlobalConstant(CP->getOperand(I), GblS);
+    return;
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    // Resolve a constant expression which returns a (Constant, Offset)
+    // pair. If 'Res.first' is a GlobalValue, emit a relocation with 
+    // the offset 'Res.second', otherwise emit a global constant like
+    // it is always done for not contant expression types.
+    CstExprResTy Res = ResolveConstantExpr(CE);
+    const Constant *Op = Res.first;
+
+    if (isa<GlobalValue>(Op))
+      EmitGlobalDataRelocation(cast<const GlobalValue>(Op), 
+                               TD->getTypeAllocSize(Op->getType()), 
+                               GblS, Res.second);
+    else
+      EmitGlobalConstant(Op, GblS);
+
+    return;
+  } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
+    // Fill the data entry with zeros or emit a relocation entry
+    if (isa<ConstantPointerNull>(CV))
+      GblS.emitZeros(Size);
+    else 
+      EmitGlobalDataRelocation(cast<const GlobalValue>(CV), 
+                               Size, GblS);
+    return;
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    // This is a constant address for a global variable or function and
+    // therefore must be referenced using a relocation entry.
+    EmitGlobalDataRelocation(GV, Size, GblS);
+    return;
+  }
+
+  std::string msg;
+  raw_string_ostream ErrorMsg(msg);
+  ErrorMsg << "Constant unimp for type: " << *CV->getType();
+  report_fatal_error(ErrorMsg.str());
+}
+
+// ResolveConstantExpr - Resolve the constant expression until it stop
+// yielding other constant expressions.
+CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
+  const TargetData *TD = TM.getTargetData();
+  
+  // There ins't constant expression inside others anymore
+  if (!isa<ConstantExpr>(CV))
+    return std::make_pair(CV, 0);
+
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+    return ResolveConstantExpr(CE->getOperand(0));
+  
+  case Instruction::GetElementPtr: {
+    const Constant *ptrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+                                          idxVec.size());
+    return std::make_pair(ptrVal, Offset);
+  }
+  case Instruction::IntToPtr: {
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return ResolveConstantExpr(Op);
+  }
+  case Instruction::PtrToInt: {
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot greater or equal to the size of the pointer.
+    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
+      return ResolveConstantExpr(Op);
+
+    llvm_unreachable("Integer size less then pointer size");
+  }
+  case Instruction::Add:
+  case Instruction::Sub: {
+    // Only handle cases where there's a constant expression with GlobalValue
+    // as first operand and ConstantInt as second, which are the cases we can
+    // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
+    // 1)  Instruction::Add  => (global) + CstInt
+    // 2)  Instruction::Sub  => (global) + -CstInt
+    const Constant *Op0 = CE->getOperand(0); 
+    const Constant *Op1 = CE->getOperand(1); 
+    assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
+
+    CstExprResTy Res = ResolveConstantExpr(Op0);
+    assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
+
+    const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
+    switch (CE->getOpcode()) {
+    case Instruction::Add: 
+      return std::make_pair(Res.first, RHS.getSExtValue());
+    case Instruction::Sub:
+      return std::make_pair(Res.first, (-RHS).getSExtValue());
+    }
+  }
+  }
+
+  report_fatal_error(CE->getOpcodeName() +
+                     StringRef(": Unsupported ConstantExpr type"));
+
+  return std::make_pair(CV, 0); // silence warning
+}
+
+void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+                                         ELFSection &GblS, int64_t Offset) {
+  // Create the relocation entry for the global value
+  MachineRelocation MR =
+    MachineRelocation::getGV(GblS.getCurrentPCOffset(),
+                             TEW->getAbsoluteLabelMachineRelTy(),
+                             const_cast<GlobalValue*>(GV),
+                             Offset);
+
+  // Fill the data entry with zeros
+  GblS.emitZeros(Size);
+
+  // Add the relocation entry for the current data section
+  GblS.addRelocation(MR);
+}
+
+void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, 
+                                           ELFSection &S) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned BitWidth = CI->getBitWidth();
+  assert(isPowerOf2_32(BitWidth) &&
+         "Non-power-of-2-sized integers not handled!");
+
+  const uint64_t *RawData = CI->getValue().getRawData();
+  uint64_t Val = 0;
+  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+    Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
+    S.emitWord64(Val);
+  }
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM.  If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+  if (GV->getName() == "llvm.used")
+    llvm_unreachable("not implemented yet");
+
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
+  if (GV->getSection() == "llvm.metadata" ||
+      GV->hasAvailableExternallyLinkage())
+    return true;
+  
+  if (!GV->hasAppendingLinkage()) return false;
+
+  assert(GV->hasInitializer() && "Not a special LLVM global!");
+  
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = TD->getPointerPrefAlignment();
+  if (GV->getName() == "llvm.global_ctors") {
+    ELFSection &Ctor = getCtorSection();
+    Ctor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Ctor);
+    return true;
+  } 
+  
+  if (GV->getName() == "llvm.global_dtors") {
+    ELFSection &Dtor = getDtorSection();
+    Dtor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Dtor);
+    return true;
+  }
+  
+  return false;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
+/// function pointers, ignoring the init priority.
+void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
+  // Should be an array of '{ i32, void ()* }' structs.  The first value is the
+  // init priority, which we ignore.
+  if (List->isNullValue()) return;
+  const ConstantArray *InitList = cast<ConstantArray>(List);
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    if (InitList->getOperand(i)->isNullValue())
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+
+    if (CS->getOperand(1)->isNullValue())
+      continue;
+
+    // Emit the function pointer.
+    EmitGlobalConstant(CS->getOperand(1), Xtor);
+  }
+}
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+  // Nothing to do here, this is all done through the ElfCE object above.
+  return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+  // Emit .data section placeholder
+  getDataSection();
+
+  // Emit .bss section placeholder
+  getBSSSection();
+
+  // Build and emit data, bss and "common" sections.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    EmitGlobal(I);
+
+  // Emit all pending globals
+  for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
+       I != E; ++I)
+    EmitGlobal(*I);
+
+  // Emit all pending externals
+  for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
+       I != E; ++I)
+    SymbolList.push_back(ELFSym::getExtSym(*I));
+
+  // Emit a symbol for each section created until now, skip null section
+  for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    ELFSym *SectionSym = ELFSym::getSectionSym();
+    SectionSym->SectionIdx = ES.SectionIdx;
+    SymbolList.push_back(SectionSym);
+    ES.Sym = SymbolList.back();
+  }
+
+  // Emit string table
+  EmitStringTable(M.getModuleIdentifier());
+
+  // Emit the symbol table now, if non-empty.
+  EmitSymbolTable();
+
+  // Emit the relocation sections.
+  EmitRelocations();
+
+  // Emit the sections string table.
+  EmitSectionTableStringTable();
+
+  // Dump the sections and section table to the .o file.
+  OutputSectionsAndSectionTable();
+
+  return false;
+}
+
+// RelocateField - Patch relocatable field with 'Offset' in 'BO'
+// using a 'Value' of known 'Size'
+void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
+                              int64_t Value, unsigned Size) {
+  if (Size == 32)
+    BO.fixWord32(Value, Offset);
+  else if (Size == 64)
+    BO.fixWord64(Value, Offset);
+  else
+    llvm_unreachable("don't know howto patch relocatable field");
+}
+
+/// EmitRelocations - Emit relocations
+void ELFWriter::EmitRelocations() {
+
+  // True if the target uses the relocation entry to hold the addend,
+  // otherwise the addend is written directly to the relocatable field.
+  bool HasRelA = TEW->hasRelocationAddend();
+
+  // Create Relocation sections for each section which needs it.
+  for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
+    ELFSection &S = *SectionList[i];
+
+    // This section does not have relocations
+    if (!S.hasRelocations()) continue;
+    ELFSection &RelSec = getRelocSection(S);
+
+    // 'Link' - Section hdr idx of the associated symbol table
+    // 'Info' - Section hdr idx of the section to which the relocation applies
+    ELFSection &SymTab = getSymbolTableSection();
+    RelSec.Link = SymTab.SectionIdx;
+    RelSec.Info = S.SectionIdx;
+    RelSec.EntSize = TEW->getRelocationEntrySize();
+
+    // Get the relocations from Section
+    std::vector<MachineRelocation> Relos = S.getRelocations();
+    for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
+         MRE = Relos.end(); MRI != MRE; ++MRI) {
+      MachineRelocation &MR = *MRI;
+
+      // Relocatable field offset from the section start
+      unsigned RelOffset = MR.getMachineCodeOffset();
+
+      // Symbol index in the symbol table
+      unsigned SymIdx = 0;
+
+      // Target specific relocation field type and size
+      unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
+      unsigned RelTySize = TEW->getRelocationTySize(RelType);
+      int64_t Addend = 0;
+
+      // There are several machine relocations types, and each one of
+      // them needs a different approach to retrieve the symbol table index.
+      if (MR.isGlobalValue()) {
+        const GlobalValue *G = MR.getGlobalValue();
+        int64_t GlobalOffset = MR.getConstantVal();
+        SymIdx = GblSymLookup[G];
+        if (G->hasPrivateLinkage()) {
+          // If the target uses a section offset in the relocation:
+          // SymIdx + Addend = section sym for global + section offset
+          unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
+          Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
+          SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+        } else {
+          Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
+        }
+      } else if (MR.isExternalSymbol()) {
+        const char *ExtSym = MR.getExternalSymbol();
+        SymIdx = ExtSymLookup[ExtSym];
+        Addend = TEW->getDefaultAddendForRelTy(RelType);
+      } else {
+        // Get the symbol index for the section symbol
+        unsigned SectionIdx = MR.getConstantVal();
+        SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+
+        // The symbol offset inside the section
+        int64_t SymOffset = (int64_t)MR.getResultPointer();
+
+        // For pc relative relocations where symbols are defined in the same
+        // section they are referenced, ignore the relocation entry and patch
+        // the relocatable field with the symbol offset directly.
+        if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
+          int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
+          RelocateField(S, RelOffset, Value, RelTySize);
+          continue;
+        }
+
+        Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
+      }
+
+      // The target without addend on the relocation symbol must be
+      // patched in the relocation place itself to contain the addend
+      // otherwise write zeros to make sure there is no garbage there
+      RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
+
+      // Get the relocation entry and emit to the relocation section
+      ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
+      EmitRelocation(RelSec, Rel, HasRelA);
+    }
+  }
+}
+
+/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
+void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
+                               bool HasRelA) {
+  RelSec.emitWord(Rel.getOffset());
+  RelSec.emitWord(Rel.getInfo(is64Bit));
+  if (HasRelA)
+    RelSec.emitWord(Rel.getAddend());
+}
+
+/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
+void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
+  if (is64Bit) {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+    SymbolTable.emitWord64(Sym.Value);
+    SymbolTable.emitWord64(Sym.Size);
+  } else {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitWord32(Sym.Value);
+    SymbolTable.emitWord32(Sym.Size);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+  }
+}
+
+/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
+/// Section Header Table
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
+                                  const ELFSection &SHdr) {
+  SHdrTab.emitWord32(SHdr.NameIdx);
+  SHdrTab.emitWord32(SHdr.Type);
+  if (is64Bit) {
+    SHdrTab.emitWord64(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord64(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord64(SHdr.Align);
+    SHdrTab.emitWord64(SHdr.EntSize);
+  } else {
+    SHdrTab.emitWord32(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord32(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord32(SHdr.Align);
+    SHdrTab.emitWord32(SHdr.EntSize);
+  }
+}
+
+/// EmitStringTable - If the current symbol table is non-empty, emit the string
+/// table for it
+void ELFWriter::EmitStringTable(const std::string &ModuleName) {
+  if (!SymbolList.size()) return;  // Empty symbol table.
+  ELFSection &StrTab = getStringTableSection();
+
+  // Set the zero'th symbol to a null byte, as required.
+  StrTab.emitByte(0);
+
+  // Walk on the symbol list and write symbol names into the string table.
+  unsigned Index = 1;
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    ELFSym &Sym = *(*I);
+
+    std::string Name;
+    if (Sym.isGlobalValue()) {
+      SmallString<40> NameStr;
+      Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
+      Name.append(NameStr.begin(), NameStr.end());
+    } else if (Sym.isExternalSym())
+      Name.append(Sym.getExternalSymbol());
+    else if (Sym.isFileType())
+      Name.append(ModuleName);
+
+    if (Name.empty()) {
+      Sym.NameIdx = 0;
+    } else {
+      Sym.NameIdx = Index;
+      StrTab.emitString(Name);
+
+      // Keep track of the number of bytes emitted to this section.
+      Index += Name.size()+1;
+    }
+  }
+  assert(Index == StrTab.size());
+  StrTab.Size = Index;
+}
+
+// SortSymbols - On the symbol table local symbols must come before
+// all other symbols with non-local bindings. The return value is
+// the position of the first non local symbol.
+unsigned ELFWriter::SortSymbols() {
+  unsigned FirstNonLocalSymbol;
+  std::vector<ELFSym*> LocalSyms, OtherSyms;
+
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    if ((*I)->isLocalBind())
+      LocalSyms.push_back(*I);
+    else
+      OtherSyms.push_back(*I);
+  }
+  SymbolList.clear();
+  FirstNonLocalSymbol = LocalSyms.size();
+
+  for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
+    SymbolList.push_back(LocalSyms[i]);
+
+  for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
+    SymbolList.push_back(*I);
+
+  LocalSyms.clear();
+  OtherSyms.clear();
+
+  return FirstNonLocalSymbol;
+}
+
+/// EmitSymbolTable - Emit the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+  if (!SymbolList.size()) return;  // Empty symbol table.
+
+  // Now that we have emitted the string table and know the offset into the
+  // string table of each symbol, emit the symbol table itself.
+  ELFSection &SymTab = getSymbolTableSection();
+  SymTab.Align = TEW->getPrefELFAlignment();
+
+  // Section Index of .strtab.
+  SymTab.Link = getStringTableSection().SectionIdx;
+
+  // Size of each symtab entry.
+  SymTab.EntSize = TEW->getSymTabEntrySize();
+
+  // Reorder the symbol table with local symbols first!
+  unsigned FirstNonLocalSymbol = SortSymbols();
+
+  // Emit all the symbols to the symbol table.
+  for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
+    ELFSym &Sym = *SymbolList[i];
+
+    // Emit symbol to the symbol table
+    EmitSymbol(SymTab, Sym);
+
+    // Record the symbol table index for each symbol
+    if (Sym.isGlobalValue())
+      GblSymLookup[Sym.getGlobalValue()] = i;
+    else if (Sym.isExternalSym())
+      ExtSymLookup[Sym.getExternalSymbol()] = i;
+
+    // Keep track on the symbol index into the symbol table
+    Sym.SymTabIdx = i;
+  }
+
+  // One greater than the symbol table index of the last local symbol
+  SymTab.Info = FirstNonLocalSymbol;
+  SymTab.Size = SymTab.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+  // First step: add the section for the string table to the list of sections:
+  ELFSection &SHStrTab = getSectionHeaderStringTableSection();
+
+  // Now that we know which section number is the .shstrtab section, update the
+  // e_shstrndx entry in the ELF header.
+  ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
+
+  // Set the NameIdx of each section in the string table and emit the bytes for
+  // the string table.
+  unsigned Index = 0;
+
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
+    // Set the index into the table.  Note if we have lots of entries with
+    // common suffixes, we could memoize them here if we cared.
+    S.NameIdx = Index;
+    SHStrTab.emitString(S.getName());
+
+    // Keep track of the number of bytes emitted to this section.
+    Index += S.getName().size()+1;
+  }
+
+  // Set the size of .shstrtab now that we know what it is.
+  assert(Index == SHStrTab.size());
+  SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+  // Pass #1: Compute the file offset for each section.
+  size_t FileOff = ElfHdr.size();   // File header first.
+
+  // Adjust alignment of all section if needed, skip the null section.
+  for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    if (!ES.size()) {
+      ES.Offset = FileOff;
+      continue;
+    }
+
+    // Update Section size
+    if (!ES.Size)
+      ES.Size = ES.size();
+
+    // Align FileOff to whatever the alignment restrictions of the section are.
+    if (ES.Align)
+      FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
+
+    ES.Offset = FileOff;
+    FileOff += ES.Size;
+  }
+
+  // Align Section Header.
+  unsigned TableAlign = TEW->getPrefELFAlignment();
+  FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+  // Now that we know where all of the sections will be emitted, set the e_shnum
+  // entry in the ELF header.
+  ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
+
+  // Now that we know the offset in the file of the section table, update the
+  // e_shoff address in the ELF header.
+  ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
+
+  // Now that we know all of the data in the file header, emit it and all of the
+  // sections!
+  O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
+  FileOff = ElfHdr.size();
+
+  // Section Header Table blob
+  BinaryObject SHdrTable(isLittleEndian, is64Bit);
+
+  // Emit all of sections to the file and build the section header table.
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
+    DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+                 << ", Size: " << S.Size << ", Offset: " << S.Offset
+                 << ", SectionData Size: " << S.size() << "\n");
+
+    // Align FileOff to whatever the alignment restrictions of the section are.
+    if (S.size()) {
+      if (S.Align)  {
+        for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+             FileOff != NewFileOff; ++FileOff)
+          O << (char)0xAB;
+      }
+      O.write((char *)&S.getData()[0], S.Size);
+      FileOff += S.Size;
+    }
+
+    EmitSectionHeader(SHdrTable, S);
+  }
+
+  // Align output for the section table.
+  for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+       FileOff != NewFileOff; ++FileOff)
+    O << (char)0xAB;
+
+  // Emit the section table itself.
+  O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
+}
diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.h b/contrib/llvm/lib/CodeGen/ELFWriter.h
new file mode 100644
index 000000000000..6f7fbace8aba
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,251 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <map>
+
+namespace llvm {
+  class BinaryObject;
+  class Constant;
+  class ConstantInt;
+  class ConstantStruct;
+  class ELFCodeEmitter;
+  class ELFRelocation;
+  class ELFSection;
+  struct ELFSym;
+  class GlobalVariable;
+  class JITDebugRegisterer;
+  class Mangler;
+  class MachineCodeEmitter;
+  class MachineConstantPoolEntry;
+  class ObjectCodeEmitter;
+  class MCAsmInfo;
+  class TargetELFWriterInfo;
+  class TargetLoweringObjectFile;
+  class raw_ostream;
+  class SectionKind;
+  class MCContext;
+  class TargetMachine;
+
+  typedef std::vector<ELFSym*>::iterator ELFSymIter;
+  typedef std::vector<ELFSection*>::iterator ELFSectionIter;
+  typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
+  typedef SetVector<const char *>::const_iterator PendingExtsIter;
+  typedef std::pair<const Constant *, int64_t> CstExprResTy;
+
+  /// ELFWriter - This class implements the common target-independent code for
+  /// writing ELF files.  Targets should derive a class from this to
+  /// parameterize the output format.
+  ///
+  class ELFWriter : public MachineFunctionPass {
+    friend class ELFCodeEmitter;
+    friend class JITDebugRegisterer;
+  public:
+    static char ID;
+
+    /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
+    ObjectCodeEmitter *getObjectCodeEmitter() {
+      return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
+    }
+
+    ELFWriter(raw_ostream &O, TargetMachine &TM);
+    ~ELFWriter();
+
+  protected:
+    /// Output stream to send the resultant object file to.
+    raw_ostream &O;
+
+    /// Target machine description.
+    TargetMachine &TM;
+
+    /// Context object for machine code objects.
+    MCContext &OutContext;
+    
+    /// Target Elf Writer description.
+    const TargetELFWriterInfo *TEW;
+
+    /// Mang - The object used to perform name mangling for this module.
+    Mangler *Mang;
+
+    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+    /// code for functions to the .o file.
+    ELFCodeEmitter *ElfCE;
+
+    /// TLOF - Target Lowering Object File, provide section names for globals 
+    /// and other object file specific stuff
+    const TargetLoweringObjectFile &TLOF;
+
+    /// MAI - Target Asm Info, provide information about section names for
+    /// globals and other target specific stuff.
+    const MCAsmInfo *MAI;
+
+    //===------------------------------------------------------------------===//
+    // Properties inferred automatically from the target machine.
+    //===------------------------------------------------------------------===//
+
+    /// is64Bit/isLittleEndian - This information is inferred from the target
+    /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+    bool is64Bit, isLittleEndian;
+
+    /// doInitialization - Emit the file header and all of the global variables
+    /// for the module to the ELF file.
+    bool doInitialization(Module &M);
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    /// doFinalization - Now that the module has been completely processed, emit
+    /// the ELF file to 'O'.
+    bool doFinalization(Module &M);
+
+  private:
+    /// Blob containing the Elf header
+    BinaryObject ElfHdr;
+
+    /// SectionList - This is the list of sections that we have emitted to the
+    /// file. Once the file has been completely built, the section header table
+    /// is constructed from this info.
+    std::vector<ELFSection*> SectionList;
+    unsigned NumSections;   // Always = SectionList.size()
+
+    /// SectionLookup - This is a mapping from section name to section number in
+    /// the SectionList. Used to quickly gather the Section Index from MAI names
+    std::map<std::string, ELFSection*> SectionLookup;
+
+    /// PendingGlobals - Globals not processed as symbols yet.
+    SetVector<const GlobalValue*> PendingGlobals;
+
+    /// GblSymLookup - This is a mapping from global value to a symbol index
+    /// in the symbol table or private symbols list. This is useful since reloc
+    /// symbol references must be quickly mapped to their indices on the lists.
+    std::map<const GlobalValue*, uint32_t> GblSymLookup;
+
+    /// PendingExternals - Externals not processed as symbols yet.
+    SetVector<const char *> PendingExternals;
+
+    /// ExtSymLookup - This is a mapping from externals to a symbol index
+    /// in the symbol table list. This is useful since reloc symbol references
+    /// must be quickly mapped to their symbol table indices.
+    std::map<const char *, uint32_t> ExtSymLookup;
+
+    /// SymbolList - This is the list of symbols emitted to the symbol table.
+    /// When the SymbolList is finally built, local symbols must be placed in
+    /// the beginning while non-locals at the end.
+    std::vector<ELFSym*> SymbolList;
+
+    /// PrivateSyms - Record private symbols, every symbol here must never be
+    /// present in the SymbolList.
+    std::vector<ELFSym*> PrivateSyms;
+
+    /// getSection - Return the section with the specified name, creating a new
+    /// section if one does not already exist.
+    ELFSection &getSection(const std::string &Name, unsigned Type,
+                           unsigned Flags = 0, unsigned Align = 0) {
+      ELFSection *&SN = SectionLookup[Name];
+      if (SN) return *SN;
+
+      SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
+      SN = SectionList.back();
+      SN->SectionIdx = NumSections++;
+      SN->Type = Type;
+      SN->Flags = Flags;
+      SN->Link = ELF::SHN_UNDEF;
+      SN->Align = Align;
+      return *SN;
+    }
+
+    ELFSection &getNonExecStackSection() {
+      return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
+    }
+
+    ELFSection &getSymbolTableSection() {
+      return getSection(".symtab", ELF::SHT_SYMTAB, 0);
+    }
+
+    ELFSection &getStringTableSection() {
+      return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
+    }
+
+    ELFSection &getSectionHeaderStringTableSection() {
+      return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
+    }
+
+    ELFSection &getNullSection() {
+      return getSection("", ELF::SHT_NULL, 0);
+    }
+
+    ELFSection &getDataSection();
+    ELFSection &getBSSSection();
+    ELFSection &getCtorSection();
+    ELFSection &getDtorSection();
+    ELFSection &getJumpTableSection();
+    ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
+    ELFSection &getTextSection(const Function *F);
+    ELFSection &getRelocSection(ELFSection &S);
+
+    // Helpers for obtaining ELF specific info.
+    unsigned getGlobalELFBinding(const GlobalValue *GV);
+    unsigned getGlobalELFType(const GlobalValue *GV);
+    unsigned getGlobalELFVisibility(const GlobalValue *GV);
+
+    // AddPendingGlobalSymbol - Add a global to be processed and to
+    // the global symbol lookup, use a zero index because the table
+    // index will be determined later.
+    void AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                bool AddToLookup = false);
+    
+    // AddPendingExternalSymbol - Add the external to be processed
+    // and to the external symbol lookup, use a zero index because
+    // the symbol table index will be determined later.
+    void AddPendingExternalSymbol(const char *External);
+
+    // AddToSymbolList - Update the symbol lookup and If the symbol is 
+    // private add it to PrivateSyms list, otherwise to SymbolList. 
+    void AddToSymbolList(ELFSym *GblSym);
+
+    // As we complete the ELF file, we need to update fields in the ELF header
+    // (e.g. the location of the section table).  These members keep track of
+    // the offset in ELFHeader of these various pieces to update and other
+    // locations in the file.
+    unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
+    unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
+    unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
+
+  private:
+    void EmitGlobal(const GlobalValue *GV);
+    void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
+    void EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                  ELFSection &GblS);
+    void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
+    void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
+                                  ELFSection &GblS, int64_t Offset = 0);
+    bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+    void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
+    void EmitRelocations();
+    void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
+    void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
+    void EmitSectionTableStringTable();
+    void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
+    void EmitSymbolTable();
+    void EmitStringTable(const std::string &ModuleName);
+    void OutputSectionsAndSectionTable();
+    void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
+                       unsigned Size);
+    unsigned SortSymbols();
+    CstExprResTy ResolveConstantExpr(const Constant *CV);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..a7aba89b87f3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,97 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+                cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+                /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  EC.clear();
+  EC.grow(2 * MF->getNumBlockIDs());
+
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+       ++I) {
+    const MachineBasicBlock &MBB = *I;
+    unsigned OutE = 2 * MBB.getNumber() + 1;
+    // Join the outgoing bundle with the ingoing bundles of all successors.
+    for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+           SE = MBB.succ_end(); SI != SE; ++SI)
+      EC.join(OutE, 2 * (*SI)->getNumber());
+  }
+  EC.compress();
+  if (ViewEdgeBundles)
+    view();
+
+  // Compute the reverse mapping.
+  Blocks.clear();
+  Blocks.resize(getNumBundles());
+
+  for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+    unsigned b0 = getBundle(i, 0);
+    unsigned b1 = getBundle(i, 1);
+    Blocks[b0].push_back(i);
+    if (b1 != b0)
+      Blocks[b1].push_back(i);
+  }
+
+  return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+  ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+                              bool ShortNames,
+                              const std::string &Title) {
+  const MachineFunction *MF = G.getMachineFunction();
+
+  O << "digraph {\n";
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    unsigned BB = I->getNumber();
+    O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+      << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+      << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+    for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+           SE = I->succ_end(); SI != SE; ++SI)
+      O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+        << "\" [ color=lightgray ]\n";
+  }
+  O << "}\n";
+  return O;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 000000000000..a67140ece4a5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+  class ExpandISelPseudos : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+  private:
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const {
+      return "Expand ISel Pseudo-instructions";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+                "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+  return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+  // Iterate through each instruction in the function, looking for pseudos.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE; ) {
+      MachineInstr *MI = MBBI++;
+
+      // If MI is a pseudo, expand it.
+      const MCInstrDesc &MCID = MI->getDesc();
+      if (MCID.usesCustomInsertionHook()) {
+        Changed = true;
+        MachineBasicBlock *NewMBB =
+          TLI->EmitInstrWithCustomInserter(MI, MBB);
+        // The expansion may involve new basic blocks.
+        if (NewMBB != MBB) {
+          MBB = NewMBB;
+          I = NewMBB;
+          MBBI = NewMBB->begin();
+          MBBE = NewMBB->end();
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 000000000000..d757cf409d50
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,213 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  
+  class Printer : public FunctionPass {
+    static char ID;
+    raw_ostream &OS;
+    
+  public:
+    explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+    
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool runOnFunction(Function &F);
+  };
+  
+  class Deleter : public FunctionPass {
+    static char ID;
+    
+  public:
+    Deleter();
+    
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool runOnFunction(Function &F);
+    bool doFinalization(Module &M);
+  };
+  
+}
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+                "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+  : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+    : ImmutablePass(ID) {
+  initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCModuleInfo::~GCModuleInfo() {
+  clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+                                              const std::string &Name) {
+  strategy_map_type::iterator NMI = StrategyMap.find(Name);
+  if (NMI != StrategyMap.end())
+    return NMI->getValue();
+  
+  for (GCRegistry::iterator I = GCRegistry::begin(),
+                            E = GCRegistry::end(); I != E; ++I) {
+    if (Name == I->getName()) {
+      GCStrategy *S = I->instantiate();
+      S->M = M;
+      S->Name = Name;
+      StrategyMap.GetOrCreateValue(Name).setValue(S);
+      StrategyList.push_back(S);
+      return S;
+    }
+  }
+ 
+  dbgs() << "unsupported GC: " << Name << "\n";
+  llvm_unreachable(0);
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+  assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+  assert(F.hasGC());
+  
+  finfo_map_type::iterator I = FInfoMap.find(&F);
+  if (I != FInfoMap.end())
+    return *I->second;
+  
+  GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+  GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+  FInfoMap[&F] = GFI;
+  return *GFI;
+}
+
+void GCModuleInfo::clear() {
+  FInfoMap.clear();
+  StrategyMap.clear();
+  
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+  StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+  return new Printer(OS);
+}
+
+
+const char *Printer::getPassName() const {
+  return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+  FunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+  AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+  switch (Kind) {
+    default: llvm_unreachable("Unknown GC point kind");
+    case GC::Loop:     return "loop";
+    case GC::Return:   return "return";
+    case GC::PreCall:  return "pre-call";
+    case GC::PostCall: return "post-call";
+  }
+}
+
+bool Printer::runOnFunction(Function &F) {
+  if (F.hasGC()) return false;
+  
+  GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  
+  OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+                                      RE = FD->roots_end(); RI != RE; ++RI)
+    OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+  
+  OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::iterator PI = FD->begin(),
+                                PE = FD->end(); PI != PE; ++PI) {
+    
+    OS << "\t" << PI->Label->getName() << ": "
+       << DescKind(PI->Kind) << ", live = {";
+    
+    for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+                                       RE = FD->live_end(PI);;) {
+      OS << " " << RI->Num;
+      if (++RI == RE)
+        break;
+      OS << ",";
+    }
+    
+    OS << " }\n";
+  }
+  
+  return false;
+}
+
+// -----------------------------------------------------------------------------
+
+char Deleter::ID = 0;
+
+FunctionPass *llvm::createGCInfoDeleter() {
+  return new Deleter();
+}
+
+Deleter::Deleter() : FunctionPass(ID) {}
+
+const char *Deleter::getPassName() const {
+  return "Delete Garbage Collector Information";
+}
+
+void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<GCModuleInfo>();
+}
+
+bool Deleter::runOnFunction(Function &MF) {
+  return false;
+}
+
+bool Deleter::doFinalization(Module &M) {
+  GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(GMI && "Deleter didn't require GCModuleInfo?!");
+  GMI->clear();
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 000000000000..f80e9ced0bc9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,27 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+  // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+  // Default is no action.
+}
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 000000000000..766c6ee542a9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,416 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
+// are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  
+  /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as 
+  /// directed by the GCStrategy. It also performs automatic root initialization
+  /// and custom intrinsic lowering.
+  class LowerIntrinsics : public FunctionPass {
+    static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+    static bool NeedsCustomLoweringPass(const GCStrategy &C);
+    static bool CouldBecomeSafePoint(Instruction *I);
+    bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+    static bool InsertRootInitializers(Function &F,
+                                       AllocaInst **Roots, unsigned Count);
+    
+  public:
+    static char ID;
+    
+    LowerIntrinsics();
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+  };
+  
+  
+  /// MachineCodeAnalysis - This is a target-independent pass over the machine 
+  /// function representation to identify safe points for the garbage collector
+  /// in the machine code. It inserts labels at safe points and populates a
+  /// GCMetadata record for each function.
+  class MachineCodeAnalysis : public MachineFunctionPass {
+    const TargetMachine *TM;
+    GCFunctionInfo *FI;
+    MachineModuleInfo *MMI;
+    const TargetInstrInfo *TII;
+    
+    void FindSafePoints(MachineFunction &MF);
+    void VisitCallPoint(MachineBasicBlock::iterator MI);
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB, 
+                          MachineBasicBlock::iterator MI,
+                          DebugLoc DL) const;
+    
+    void FindStackOffsets(MachineFunction &MF);
+    
+  public:
+    static char ID;
+    
+    MachineCodeAnalysis();
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool runOnMachineFunction(MachineFunction &MF);
+  };
+  
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+  NeededSafePoints(0),
+  CustomReadBarriers(false),
+  CustomWriteBarriers(false),
+  CustomRoots(false),
+  InitRoots(true),
+  UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+  
+  Functions.clear();
+}
+ 
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+ 
+bool GCStrategy::performCustomLowering(Function &F) {
+  dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+  llvm_unreachable(0);
+  return 0;
+}
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+  GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+  Functions.push_back(FI);
+  return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() {
+  return new LowerIntrinsics();
+}
+ 
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+  : FunctionPass(ID) {
+    initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+  }
+
+const char *LowerIntrinsics::getPassName() const {
+  return "Lower Garbage Collection Instructions";
+}
+    
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+  FunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<DominatorTree>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+  // FIXME: This is rather antisocial in the context of a JIT since it performs
+  //        work against the entire module. But this cannot be done at
+  //        runFunction time (initializeCustomLowering likely needs to change
+  //        the module).
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isDeclaration() && I->hasGC())
+      MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+  
+  bool MadeChange = false;
+  for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+    if (NeedsCustomLoweringPass(**I))
+      if ((*I)->initializeCustomLowering(M))
+        MadeChange = true;
+  
+  return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, 
+                                                          unsigned Count) {
+  // Scroll past alloca instructions.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  while (isa<AllocaInst>(IP)) ++IP;
+  
+  // Search for initializers in the initial BB.
+  SmallPtrSet<AllocaInst*,16> InitedRoots;
+  for (; !CouldBecomeSafePoint(IP); ++IP)
+    if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+      if (AllocaInst *AI =
+          dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+        InitedRoots.insert(AI);
+  
+  // Add root initializers.
+  bool MadeChange = false;
+  
+  for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+    if (!InitedRoots.count(*I)) {
+      StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+                        cast<PointerType>((*I)->getType())->getElementType())),
+                        *I);
+      SI->insertAfter(*I);
+      MadeChange = true;
+    }
+  
+  return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+  // Default lowering is necessary only if read or write barriers have a default
+  // action. The default for roots is no action.
+  return !C.customWriteBarrier()
+      || !C.customReadBarrier()
+      || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+  // Custom lowering is only necessary if enabled for some action.
+  return C.customWriteBarrier()
+      || C.customReadBarrier()
+      || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+  // The natural definition of instructions which could introduce safe points
+  // are:
+  // 
+  //   - call, invoke (AfterCall, BeforeCall)
+  //   - phis (Loops)
+  //   - invoke, ret, unwind (Exit)
+  // 
+  // However, instructions as seemingly inoccuous as arithmetic can become
+  // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+  // it is necessary to take a conservative approach.
+  
+  if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+      isa<StoreInst>(I) || isa<LoadInst>(I))
+    return false;
+  
+  // llvm.gcroot is safe because it doesn't do anything at runtime.
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    if (Function *F = CI->getCalledFunction())
+      if (unsigned IID = F->getIntrinsicID())
+        if (IID == Intrinsic::gcroot)
+          return false;
+  
+  return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+  // Quick exit for functions that do not use GC.
+  if (!F.hasGC())
+    return false;
+  
+  GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  GCStrategy &S = FI.getStrategy();
+  
+  bool MadeChange = false;
+  
+  if (NeedsDefaultLoweringPass(S))
+    MadeChange |= PerformDefaultLowering(F, S);
+  
+  bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+  if (UseCustomLoweringPass)
+    MadeChange |= S.performCustomLowering(F);
+
+  // Custom lowering may modify the CFG, so dominators must be recomputed.
+  if (UseCustomLoweringPass) {
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+      DT->DT->recalculate(F);
+  }
+
+  return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+  bool LowerWr = !S.customWriteBarrier();
+  bool LowerRd = !S.customReadBarrier();
+  bool InitRoots = S.initializeRoots();
+  
+  SmallVector<AllocaInst*, 32> Roots;
+  
+  bool MadeChange = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+        Function *F = CI->getCalledFunction();
+        switch (F->getIntrinsicID()) {
+        case Intrinsic::gcwrite:
+          if (LowerWr) {
+            // Replace a write barrier with a simple store.
+            Value *St = new StoreInst(CI->getArgOperand(0),
+                                      CI->getArgOperand(2), CI);
+            CI->replaceAllUsesWith(St);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcread:
+          if (LowerRd) {
+            // Replace a read barrier with a simple load.
+            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+            Ld->takeName(CI);
+            CI->replaceAllUsesWith(Ld);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcroot:
+          if (InitRoots) {
+            // Initialize the GC root, but do not delete the intrinsic. The
+            // backend needs the intrinsic to flag the stack slot.
+            Roots.push_back(cast<AllocaInst>(
+                              CI->getArgOperand(0)->stripPointerCasts()));
+          }
+          break;
+        default:
+          continue;
+        }
+        
+        MadeChange = true;
+      }
+    }
+  }
+  
+  if (Roots.size())
+    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+  
+  return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+  return new MachineCodeAnalysis();
+}
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis()
+  : MachineFunctionPass(ID) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+  return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+  AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
+                                           MachineBasicBlock::iterator MI,
+                                           DebugLoc DL) const {
+  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+  return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+  // Find the return address (next instruction), too, so as to bracket the call
+  // instruction.
+  MachineBasicBlock::iterator RAI = CI; 
+  ++RAI;                                
+  
+  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+  }
+  
+  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+  }
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+  for (MachineFunction::iterator BBI = MF.begin(),
+                                 BBE = MF.end(); BBI != BBE; ++BBI)
+    for (MachineBasicBlock::iterator MI = BBI->begin(),
+                                     ME = BBI->end(); MI != ME; ++MI)
+      if (MI->getDesc().isCall())
+        VisitCallPoint(MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+  const TargetFrameLowering *TFI = TM->getFrameLowering();
+  assert(TFI && "TargetRegisterInfo not available!");
+  
+  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+                                      RE = FI->roots_end(); RI != RE; ++RI)
+    RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+  // Quick exit for functions that do not use GC.
+  if (!MF.getFunction()->hasGC())
+    return false;
+  
+  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+  if (!FI->getStrategy().needsSafePoints())
+    return false;
+  
+  TM = &MF.getTarget();
+  MMI = &getAnalysis<MachineModuleInfo>();
+  TII = TM->getInstrInfo();
+  
+  // Find the size of the stack frame.
+  FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+  
+  // Find all safe points.
+  FindSafePoints(MF);
+  
+  // Find the stack offsets for all roots.
+  FindStackOffsets(MF);
+  
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 000000000000..6cb22778caf9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1513 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+                                   cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+                                    cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+                                     cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+                                      cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+                                      cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+                                       cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+                                    cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+                                     cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple,       "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse,  "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle,     "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev,  "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
+STATISTIC(NumDupBBs,       "Number of duplicated blocks");
+
+namespace {
+  class IfConverter : public MachineFunctionPass {
+    enum IfcvtKind {
+      ICNotClassfied,  // BB data valid, but not classified.
+      ICSimpleFalse,   // Same as ICSimple, but on the false path.
+      ICSimple,        // BB is entry of an one split, no rejoin sub-CFG.
+      ICTriangleFRev,  // Same as ICTriangleFalse, but false path rev condition.
+      ICTriangleRev,   // Same as ICTriangle, but true path rev condition.
+      ICTriangleFalse, // Same as ICTriangle, but on the false path.
+      ICTriangle,      // BB is entry of a triangle sub-CFG.
+      ICDiamond        // BB is entry of a diamond sub-CFG.
+    };
+
+    /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+    /// if-conversion feasibility analysis. This includes results from
+    /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+    /// classification, and common tail block of its successors (if it's a
+    /// diamond shape), its size, whether it's predicable, and whether any
+    /// instruction can clobber the 'would-be' predicate.
+    ///
+    /// IsDone          - True if BB is not to be considered for ifcvt.
+    /// IsBeingAnalyzed - True if BB is currently being analyzed.
+    /// IsAnalyzed      - True if BB has been analyzed (info is still valid).
+    /// IsEnqueued      - True if BB has been enqueued to be ifcvt'ed.
+    /// IsBrAnalyzable  - True if AnalyzeBranch() returns false.
+    /// HasFallThrough  - True if BB may fallthrough to the following BB.
+    /// IsUnpredicable  - True if BB is known to be unpredicable.
+    /// ClobbersPred    - True if BB could modify predicates (e.g. has
+    ///                   cmp, call, etc.)
+    /// NonPredSize     - Number of non-predicated instructions.
+    /// ExtraCost       - Extra cost for multi-cycle instructions.
+    /// ExtraCost2      - Some instructions are slower when predicated
+    /// BB              - Corresponding MachineBasicBlock.
+    /// TrueBB / FalseBB- See AnalyzeBranch().
+    /// BrCond          - Conditions for end of block conditional branches.
+    /// Predicate       - Predicate used in the BB.
+    struct BBInfo {
+      bool IsDone          : 1;
+      bool IsBeingAnalyzed : 1;
+      bool IsAnalyzed      : 1;
+      bool IsEnqueued      : 1;
+      bool IsBrAnalyzable  : 1;
+      bool HasFallThrough  : 1;
+      bool IsUnpredicable  : 1;
+      bool CannotBeCopied  : 1;
+      bool ClobbersPred    : 1;
+      unsigned NonPredSize;
+      unsigned ExtraCost;
+      unsigned ExtraCost2;
+      MachineBasicBlock *BB;
+      MachineBasicBlock *TrueBB;
+      MachineBasicBlock *FalseBB;
+      SmallVector<MachineOperand, 4> BrCond;
+      SmallVector<MachineOperand, 4> Predicate;
+      BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+                 IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+                 HasFallThrough(false), IsUnpredicable(false),
+                 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+                 ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
+    };
+
+    /// IfcvtToken - Record information about pending if-conversions to attempt:
+    /// BBI             - Corresponding BBInfo.
+    /// Kind            - Type of block. See IfcvtKind.
+    /// NeedSubsumption - True if the to-be-predicated BB has already been
+    ///                   predicated.
+    /// NumDups      - Number of instructions that would be duplicated due
+    ///                   to this if-conversion. (For diamonds, the number of
+    ///                   identical instructions at the beginnings of both
+    ///                   paths).
+    /// NumDups2     - For diamonds, the number of identical instructions
+    ///                   at the ends of both paths.
+    struct IfcvtToken {
+      BBInfo &BBI;
+      IfcvtKind Kind;
+      bool NeedSubsumption;
+      unsigned NumDups;
+      unsigned NumDups2;
+      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+        : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+    };
+
+    /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+    /// basic block number.
+    std::vector<BBInfo> BBAnalysis;
+
+    const TargetLowering *TLI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const InstrItineraryData *InstrItins;
+    const MachineLoopInfo *MLI;
+    bool MadeChange;
+    int FnNum;
+  public:
+    static char ID;
+    IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+      initializeIfConverterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "If Converter"; }
+
+  private:
+    bool ReverseBranchCondition(BBInfo &BBI);
+    bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                     const BranchProbability &Prediction) const;
+    bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                       bool FalseBranch, unsigned &Dups,
+                       const BranchProbability &Prediction) const;
+    bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                      unsigned &Dups1, unsigned &Dups2) const;
+    void ScanInstructions(BBInfo &BBI);
+    BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+                         std::vector<IfcvtToken*> &Tokens);
+    bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+                             bool isTriangle = false, bool RevBranch = false);
+    void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
+    void InvalidatePreds(MachineBasicBlock *BB);
+    void RemoveExtraEdges(BBInfo &BBI);
+    bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                          unsigned NumDups1, unsigned NumDups2);
+    void PredicateBlock(BBInfo &BBI,
+                        MachineBasicBlock::iterator E,
+                        SmallVectorImpl<MachineOperand> &Cond,
+                        SmallSet<unsigned, 4> &Redefs);
+    void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+                               SmallVectorImpl<MachineOperand> &Cond,
+                               SmallSet<unsigned, 4> &Redefs,
+                               bool IgnoreBr = false);
+    void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+                            unsigned Cycle, unsigned Extra,
+                            const BranchProbability &Prediction) const {
+      return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+                                                   Prediction);
+    }
+
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+                            unsigned TCycle, unsigned TExtra,
+                            MachineBasicBlock &FBB,
+                            unsigned FCycle, unsigned FExtra,
+                            const BranchProbability &Prediction) const {
+      return TCycle > 0 && FCycle > 0 &&
+        TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+                                 Prediction);
+    }
+
+    // blockAlwaysFallThrough - Block ends without a terminator.
+    bool blockAlwaysFallThrough(BBInfo &BBI) const {
+      return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+    }
+
+    // IfcvtTokenCmp - Used to sort if-conversion candidates.
+    static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+      int Incr1 = (C1->Kind == ICDiamond)
+        ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+      int Incr2 = (C2->Kind == ICDiamond)
+        ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+      if (Incr1 > Incr2)
+        return true;
+      else if (Incr1 == Incr2) {
+        // Favors subsumption.
+        if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+          return true;
+        else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+          // Favors diamond over triangle, etc.
+          if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+            return true;
+          else if (C1->Kind == C2->Kind)
+            return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+        }
+      }
+      return false;
+    }
+  };
+
+  char IfConverter::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+  TLI = MF.getTarget().getTargetLowering();
+  TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  InstrItins = MF.getTarget().getInstrItineraryData();
+  if (!TII) return false;
+
+  // Tail merge tend to expose more if-conversion opportunities.
+  BranchFolder BF(true, false);
+  bool BFChange = BF.OptimizeFunction(MF, TII,
+                                   MF.getTarget().getRegisterInfo(),
+                                   getAnalysisIfAvailable<MachineModuleInfo>());
+
+  DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
+               << MF.getFunction()->getName() << "\'");
+
+  if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+    DEBUG(dbgs() << " skipped\n");
+    return false;
+  }
+  DEBUG(dbgs() << "\n");
+
+  MF.RenumberBlocks();
+  BBAnalysis.resize(MF.getNumBlockIDs());
+
+  std::vector<IfcvtToken*> Tokens;
+  MadeChange = false;
+  unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+    NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+  while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+    // Do an initial analysis for each basic block and find all the potential
+    // candidates to perform if-conversion.
+    bool Change = false;
+    AnalyzeBlocks(MF, Tokens);
+    while (!Tokens.empty()) {
+      IfcvtToken *Token = Tokens.back();
+      Tokens.pop_back();
+      BBInfo &BBI = Token->BBI;
+      IfcvtKind Kind = Token->Kind;
+      unsigned NumDups = Token->NumDups;
+      unsigned NumDups2 = Token->NumDups2;
+
+      delete Token;
+
+      // If the block has been evicted out of the queue or it has already been
+      // marked dead (due to it being predicated), then skip it.
+      if (BBI.IsDone)
+        BBI.IsEnqueued = false;
+      if (!BBI.IsEnqueued)
+        continue;
+
+      BBI.IsEnqueued = false;
+
+      bool RetVal = false;
+      switch (Kind) {
+      default: assert(false && "Unexpected!");
+        break;
+      case ICSimple:
+      case ICSimpleFalse: {
+        bool isFalse = Kind == ICSimpleFalse;
+        if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+        DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+                                            " false" : "")
+                     << "): BB#" << BBI.BB->getNumber() << " ("
+                     << ((Kind == ICSimpleFalse)
+                         ? BBI.FalseBB->getNumber()
+                         : BBI.TrueBB->getNumber()) << ") ");
+        RetVal = IfConvertSimple(BBI, Kind);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) {
+          if (isFalse) ++NumSimpleFalse;
+          else         ++NumSimple;
+        }
+       break;
+      }
+      case ICTriangle:
+      case ICTriangleRev:
+      case ICTriangleFalse:
+      case ICTriangleFRev: {
+        bool isFalse = Kind == ICTriangleFalse;
+        bool isRev   = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+        if (DisableTriangle && !isFalse && !isRev) break;
+        if (DisableTriangleR && !isFalse && isRev) break;
+        if (DisableTriangleF && isFalse && !isRev) break;
+        if (DisableTriangleFR && isFalse && isRev) break;
+        DEBUG(dbgs() << "Ifcvt (Triangle");
+        if (isFalse)
+          DEBUG(dbgs() << " false");
+        if (isRev)
+          DEBUG(dbgs() << " rev");
+        DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
+        RetVal = IfConvertTriangle(BBI, Kind);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) {
+          if (isFalse) {
+            if (isRev) ++NumTriangleFRev;
+            else       ++NumTriangleFalse;
+          } else {
+            if (isRev) ++NumTriangleRev;
+            else       ++NumTriangle;
+          }
+        }
+        break;
+      }
+      case ICDiamond: {
+        if (DisableDiamond) break;
+        DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
+        RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) ++NumDiamonds;
+        break;
+      }
+      }
+
+      Change |= RetVal;
+
+      NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+        NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+      if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+        break;
+    }
+
+    if (!Change)
+      break;
+    MadeChange |= Change;
+  }
+
+  // Delete tokens in case of early exit.
+  while (!Tokens.empty()) {
+    IfcvtToken *Token = Tokens.back();
+    Tokens.pop_back();
+    delete Token;
+  }
+
+  Tokens.clear();
+  BBAnalysis.clear();
+
+  if (MadeChange && IfCvtBranchFold) {
+    BranchFolder BF(false, false);
+    BF.OptimizeFunction(MF, TII,
+                        MF.getTarget().getRegisterInfo(),
+                        getAnalysisIfAvailable<MachineModuleInfo>());
+  }
+
+  MadeChange |= BFChange;
+  return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+                                         MachineBasicBlock *TrueBB) {
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         E = BB->succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    if (SuccBB != TrueBB)
+      return SuccBB;
+  }
+  return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+  DebugLoc dl;  // FIXME: this is nowhere
+  if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+    TII->RemoveBranch(*BBI.BB);
+    TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+    std::swap(BBI.TrueBB, BBI.FalseBB);
+    return true;
+  }
+  return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+  MachineFunction::iterator I = BB;
+  MachineFunction::iterator E = BB->getParent()->end();
+  if (++I == E)
+    return NULL;
+  return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                              const BranchProbability &Prediction) const {
+  Dups = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+    return false;
+
+  if (TrueBBI.IsBrAnalyzable)
+    return false;
+
+  if (TrueBBI.BB->pred_size() > 1) {
+    if (TrueBBI.CannotBeCopied ||
+        !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+                                        Prediction))
+      return false;
+    Dups = TrueBBI.NonPredSize;
+  }
+
+  return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the 'false' block rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                                bool FalseBranch, unsigned &Dups,
+                                const BranchProbability &Prediction) const {
+  Dups = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+    return false;
+
+  if (TrueBBI.BB->pred_size() > 1) {
+    if (TrueBBI.CannotBeCopied)
+      return false;
+
+    unsigned Size = TrueBBI.NonPredSize;
+    if (TrueBBI.IsBrAnalyzable) {
+      if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+        // Ends with an unconditional branch. It will be removed.
+        --Size;
+      else {
+        MachineBasicBlock *FExit = FalseBranch
+          ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+        if (FExit)
+          // Require a conditional branch
+          ++Size;
+      }
+    }
+    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
+      return false;
+    Dups = Size;
+  }
+
+  MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+  if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+    MachineFunction::iterator I = TrueBBI.BB;
+    if (++I == TrueBBI.BB->getParent()->end())
+      return false;
+    TExit = I;
+  }
+  return TExit && TExit == FalseBBI.BB;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                               unsigned &Dups1, unsigned &Dups2) const {
+  Dups1 = Dups2 = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+    return false;
+
+  MachineBasicBlock *TT = TrueBBI.TrueBB;
+  MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+  if (!TT && blockAlwaysFallThrough(TrueBBI))
+    TT = getNextBlock(TrueBBI.BB);
+  if (!FT && blockAlwaysFallThrough(FalseBBI))
+    FT = getNextBlock(FalseBBI.BB);
+  if (TT != FT)
+    return false;
+  if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+    return false;
+  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+    return false;
+
+  // FIXME: Allow true block to have an early exit?
+  if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+      (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+    return false;
+
+  // Count duplicate instructions at the beginning of the true and false blocks.
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+  while (TIB != TIE && FIB != FIE) {
+    // Skip dbg_value instructions. These do not count.
+    if (TIB->isDebugValue()) {
+      while (TIB != TIE && TIB->isDebugValue())
+        ++TIB;
+      if (TIB == TIE)
+        break;
+    }
+    if (FIB->isDebugValue()) {
+      while (FIB != FIE && FIB->isDebugValue())
+        ++FIB;
+      if (FIB == FIE)
+        break;
+    }
+    if (!TIB->isIdenticalTo(FIB))
+      break;
+    ++Dups1;
+    ++TIB;
+    ++FIB;
+  }
+
+  // Now, in preparation for counting duplicate instructions at the ends of the
+  // blocks, move the end iterators up past any branch instructions.
+  while (TIE != TIB) {
+    --TIE;
+    if (!TIE->getDesc().isBranch())
+      break;
+  }
+  while (FIE != FIB) {
+    --FIE;
+    if (!FIE->getDesc().isBranch())
+      break;
+  }
+
+  // If Dups1 includes all of a block, then don't count duplicate
+  // instructions at the end of the blocks.
+  if (TIB == TIE || FIB == FIE)
+    return true;
+
+  // Count duplicate instructions at the ends of the blocks.
+  while (TIE != TIB && FIE != FIB) {
+    // Skip dbg_value instructions. These do not count.
+    if (TIE->isDebugValue()) {
+      while (TIE != TIB && TIE->isDebugValue())
+        --TIE;
+      if (TIE == TIB)
+        break;
+    }
+    if (FIE->isDebugValue()) {
+      while (FIE != FIB && FIE->isDebugValue())
+        --FIE;
+      if (FIE == FIB)
+        break;
+    }
+    if (!TIE->isIdenticalTo(FIE))
+      break;
+    ++Dups2;
+    --TIE;
+    --FIE;
+  }
+
+  return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+  if (BBI.IsDone)
+    return;
+
+  bool AlreadyPredicated = BBI.Predicate.size() > 0;
+  // First analyze the end of BB branches.
+  BBI.TrueBB = BBI.FalseBB = NULL;
+  BBI.BrCond.clear();
+  BBI.IsBrAnalyzable =
+    !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+  BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+  if (BBI.BrCond.size()) {
+    // No false branch. This BB must end with a conditional branch and a
+    // fallthrough.
+    if (!BBI.FalseBB)
+      BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+    if (!BBI.FalseBB) {
+      // Malformed bcc? True and false blocks are the same?
+      BBI.IsUnpredicable = true;
+      return;
+    }
+  }
+
+  // Then scan all the instructions.
+  BBI.NonPredSize = 0;
+  BBI.ExtraCost = 0;
+  BBI.ExtraCost2 = 0;
+  BBI.ClobbersPred = false;
+  for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+       I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
+
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isNotDuplicable())
+      BBI.CannotBeCopied = true;
+
+    bool isPredicated = TII->isPredicated(I);
+    bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+
+    if (!isCondBr) {
+      if (!isPredicated) {
+        BBI.NonPredSize++;
+        unsigned ExtraPredCost = 0;
+        unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+                                                  &ExtraPredCost);
+        if (NumCycles > 1)
+          BBI.ExtraCost += NumCycles-1;
+        BBI.ExtraCost2 += ExtraPredCost;
+      } else if (!AlreadyPredicated) {
+        // FIXME: This instruction is already predicated before the
+        // if-conversion pass. It's probably something like a conditional move.
+        // Mark this block unpredicable for now.
+        BBI.IsUnpredicable = true;
+        return;
+      }
+    }
+
+    if (BBI.ClobbersPred && !isPredicated) {
+      // Predicate modification instruction should end the block (except for
+      // already predicated instructions and end of block branches).
+      if (isCondBr) {
+        // A conditional branch is not predicable, but it may be eliminated.
+        continue;
+      }
+
+      // Predicate may have been modified, the subsequent (currently)
+      // unpredicated instructions cannot be correctly predicated.
+      BBI.IsUnpredicable = true;
+      return;
+    }
+
+    // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+    // still potentially predicable.
+    std::vector<MachineOperand> PredDefs;
+    if (TII->DefinesPredicate(I, PredDefs))
+      BBI.ClobbersPred = true;
+
+    if (!TII->isPredicable(I)) {
+      BBI.IsUnpredicable = true;
+      return;
+    }
+  }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+                                      SmallVectorImpl<MachineOperand> &Pred,
+                                      bool isTriangle, bool RevBranch) {
+  // If the block is dead or unpredicable, then it cannot be predicated.
+  if (BBI.IsDone || BBI.IsUnpredicable)
+    return false;
+
+  // If it is already predicated, check if its predicate subsumes the new
+  // predicate.
+  if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+    return false;
+
+  if (BBI.BrCond.size()) {
+    if (!isTriangle)
+      return false;
+
+    // Test predicate subsumption.
+    SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+    SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+    if (RevBranch) {
+      if (TII->ReverseBranchCondition(Cond))
+        return false;
+    }
+    if (TII->ReverseBranchCondition(RevPred) ||
+        !TII->SubsumesPredicate(Cond, RevPred))
+      return false;
+  }
+
+  return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+                                             std::vector<IfcvtToken*> &Tokens) {
+  BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+  if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+    return BBI;
+
+  BBI.BB = BB;
+  BBI.IsBeingAnalyzed = true;
+
+  ScanInstructions(BBI);
+
+  // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+  // considered for ifcvt anymore.
+  if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  // Do not ifcvt if either path is a back edge to the entry block.
+  if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  // Do not ifcvt if true and false fallthrough blocks are the same.
+  if (!BBI.FalseBB) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  BBInfo &TrueBBI  = AnalyzeBlock(BBI.TrueBB, Tokens);
+  BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+  if (TrueBBI.IsDone && FalseBBI.IsDone) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+  bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+  unsigned Dups = 0;
+  unsigned Dups2 = 0;
+  bool TNeedSub = TrueBBI.Predicate.size() > 0;
+  bool FNeedSub = FalseBBI.Predicate.size() > 0;
+  bool Enqueued = false;
+  
+  // Try to predict the branch, using loop info to guide us.
+  // General heuristics are:
+  //   - backedge -> 90% taken
+  //   - early exit -> 20% taken
+  //   - branch predictor confidence -> 90%
+  BranchProbability Prediction(5, 10);
+  MachineLoop *Loop = MLI->getLoopFor(BB);
+  if (Loop) {
+    if (TrueBBI.BB == Loop->getHeader())
+      Prediction = BranchProbability(9, 10);
+    else if (FalseBBI.BB == Loop->getHeader())
+      Prediction = BranchProbability(1, 10);
+
+    MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
+    MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
+    if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
+      Prediction = BranchProbability(2, 10);
+    else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
+      Prediction = BranchProbability(8, 10);
+  }
+  
+  if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+                                       TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+                         *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+                         Prediction) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+      FeasibilityAnalysis(FalseBBI, RevCond)) {
+    // Diamond:
+    //   EBB
+    //   / \_
+    //  |   |
+    // TBB FBB
+    //   \ /
+    //  TailBB
+    // Note TailBB can be empty.
+    Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+                                    Dups2));
+    Enqueued = true;
+  }
+
+  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+    // Triangle:
+    //   EBB
+    //   | \_
+    //   |  |
+    //   | TBB
+    //   |  /
+    //   FBB
+    Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+    Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (ValidSimple(TrueBBI, Dups, Prediction) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+    // Simple (split, no rejoin):
+    //   EBB
+    //   | \_
+    //   |  |
+    //   | TBB---> exit
+    //   |
+    //   FBB
+    Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (CanRevCond) {
+    // Try the other path...
+    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+                      Prediction.getCompl()) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+        FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+      Enqueued = true;
+    }
+
+    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+                      Prediction.getCompl()) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+        FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+      Enqueued = true;
+    }
+
+    if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+        FeasibilityAnalysis(FalseBBI, RevCond)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+      Enqueued = true;
+    }
+  }
+
+  BBI.IsEnqueued = Enqueued;
+  BBI.IsBeingAnalyzed = false;
+  BBI.IsAnalyzed = true;
+  return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
+                                std::vector<IfcvtToken*> &Tokens) {
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *BB = I;
+    AnalyzeBlock(BB, Tokens);
+  }
+
+  // Sort to favor more complex ifcvt scheme.
+  std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+  MachineFunction::iterator PI = BB;
+  MachineFunction::iterator I = llvm::next(PI);
+  MachineFunction::iterator TI = ToBB;
+  MachineFunction::iterator E = BB->getParent()->end();
+  while (I != TI) {
+    // Check isSuccessor to avoid case where the next block is empty, but
+    // it's not a successor.
+    if (I == E || !I->empty() || !PI->isSuccessor(I))
+      return false;
+    PI = I++;
+  }
+  return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+         E = BB->pred_end(); PI != E; ++PI) {
+    BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+    if (PBBI.IsDone || PBBI.BB == BB)
+      continue;
+    PBBI.IsAnalyzed = false;
+    PBBI.IsEnqueued = false;
+  }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+                               const TargetInstrInfo *TII) {
+  DebugLoc dl;  // FIXME: this is nowhere
+  SmallVector<MachineOperand, 0> NoCond;
+  TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+  MachineBasicBlock *TBB = NULL, *FBB = NULL;
+  SmallVector<MachineOperand, 4> Cond;
+  if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+    BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+                           const TargetRegisterInfo *TRI) {
+  for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+         E = BB->livein_end(); I != E; ++I) {
+    unsigned Reg = *I;
+    Redefs.insert(Reg);
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg)
+      Redefs.insert(*Subreg);
+  }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+                             const TargetRegisterInfo *TRI,
+                             bool AddImpUse = false) {
+  SmallVector<unsigned, 4> Defs;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef())
+      Defs.push_back(Reg);
+    else if (MO.isKill()) {
+      Redefs.erase(Reg);
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+        Redefs.erase(*SR);
+    }
+  }
+  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+    unsigned Reg = Defs[i];
+    if (Redefs.count(Reg)) {
+      if (AddImpUse)
+        // Treat predicated update as read + write.
+        MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/,false/*IsKill*/));
+    } else {
+      Redefs.insert(Reg);
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+        Redefs.insert(*SR);
+    }
+  }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+                             MachineBasicBlock::iterator E,
+                             SmallSet<unsigned,4> &Redefs,
+                             const TargetRegisterInfo *TRI) {
+  while (I != E) {
+    UpdatePredRedefs(I, Redefs, TRI);
+    ++I;
+  }
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  BBInfo *CvtBBI = &TrueBBI;
+  BBInfo *NextBBI = &FalseBBI;
+
+  SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+  if (Kind == ICSimpleFalse)
+    std::swap(CvtBBI, NextBBI);
+
+  if (CvtBBI->IsDone ||
+      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+    // Something has changed. It's no longer safe to predicate this block.
+    BBI.IsAnalyzed = false;
+    CvtBBI->IsAnalyzed = false;
+    return false;
+  }
+
+  if (Kind == ICSimpleFalse)
+    if (TII->ReverseBranchCondition(Cond))
+      assert(false && "Unable to reverse branch condition!");
+
+  // Initialize liveins to the first BB. These are potentiall redefined by
+  // predicated instructions.
+  SmallSet<unsigned, 4> Redefs;
+  InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+  InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+  if (CvtBBI->BB->pred_size() > 1) {
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    // Copy instructions in the true block, predicate them, and add them to
+    // the entry block.
+    CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+  } else {
+    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+    // Merge converted block into entry block.
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    MergeBlocks(BBI, *CvtBBI);
+  }
+
+  bool IterIfcvt = true;
+  if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+    InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+    BBI.HasFallThrough = false;
+    // Now ifcvt'd block will look like this:
+    // BB:
+    // ...
+    // t, f = cmp
+    // if t op
+    // b BBf
+    //
+    // We cannot further ifcvt this block because the unconditional branch
+    // will have to be predicated on the new condition, that will not be
+    // available if cmp executes.
+    IterIfcvt = false;
+  }
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info. BB can be iteratively if-converted.
+  if (!IterIfcvt)
+    BBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+  CvtBBI->IsDone = true;
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+  BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  BBInfo *CvtBBI = &TrueBBI;
+  BBInfo *NextBBI = &FalseBBI;
+  DebugLoc dl;  // FIXME: this is nowhere
+
+  SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+  if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+    std::swap(CvtBBI, NextBBI);
+
+  if (CvtBBI->IsDone ||
+      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+    // Something has changed. It's no longer safe to predicate this block.
+    BBI.IsAnalyzed = false;
+    CvtBBI->IsAnalyzed = false;
+    return false;
+  }
+
+  if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+    if (TII->ReverseBranchCondition(Cond))
+      assert(false && "Unable to reverse branch condition!");
+
+  if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+    if (ReverseBranchCondition(*CvtBBI)) {
+      // BB has been changed, modify its predecessors (except for this
+      // one) so they don't get ifcvt'ed based on bad intel.
+      for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+             E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+        MachineBasicBlock *PBB = *PI;
+        if (PBB == BBI.BB)
+          continue;
+        BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+        if (PBBI.IsEnqueued) {
+          PBBI.IsAnalyzed = false;
+          PBBI.IsEnqueued = false;
+        }
+      }
+    }
+  }
+
+  // Initialize liveins to the first BB. These are potentially redefined by
+  // predicated instructions.
+  SmallSet<unsigned, 4> Redefs;
+  InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+  InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+  bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+  if (CvtBBI->BB->pred_size() > 1) {
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    // Copy instructions in the true block, predicate them, and add them to
+    // the entry block.
+    CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+  } else {
+    // Predicate the 'true' block after removing its branch.
+    CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+    // Now merge the entry of the triangle with the true block.
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    MergeBlocks(BBI, *CvtBBI, false);
+  }
+
+  // If 'true' block has a 'false' successor, add an exit branch to it.
+  if (HasEarlyExit) {
+    SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+                                           CvtBBI->BrCond.end());
+    if (TII->ReverseBranchCondition(RevCond))
+      assert(false && "Unable to reverse branch condition!");
+    TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
+    BBI.BB->addSuccessor(CvtBBI->FalseBB);
+  }
+
+  // Merge in the 'false' block if the 'false' block has no other
+  // predecessors. Otherwise, add an unconditional branch to 'false'.
+  bool FalseBBDead = false;
+  bool IterIfcvt = true;
+  bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+  if (!isFallThrough) {
+    // Only merge them if the true block does not fallthrough to the false
+    // block. By not merging them, we make it possible to iteratively
+    // ifcvt the blocks.
+    if (!HasEarlyExit &&
+        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+      MergeBlocks(BBI, *NextBBI);
+      FalseBBDead = true;
+    } else {
+      InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+      BBI.HasFallThrough = false;
+    }
+    // Mixed predicated and unpredicated code. This cannot be iteratively
+    // predicated.
+    IterIfcvt = false;
+  }
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info. BB can be iteratively if-converted.
+  if (!IterIfcvt)
+    BBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+  CvtBBI->IsDone = true;
+  if (FalseBBDead)
+    NextBBI->IsDone = true;
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                                   unsigned NumDups1, unsigned NumDups2) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+  // True block must fall through or end with an unanalyzable terminator.
+  if (!TailBB) {
+    if (blockAlwaysFallThrough(TrueBBI))
+      TailBB = FalseBBI.TrueBB;
+    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+  }
+
+  if (TrueBBI.IsDone || FalseBBI.IsDone ||
+      TrueBBI.BB->pred_size() > 1 ||
+      FalseBBI.BB->pred_size() > 1) {
+    // Something has changed. It's no longer safe to predicate these blocks.
+    BBI.IsAnalyzed = false;
+    TrueBBI.IsAnalyzed = false;
+    FalseBBI.IsAnalyzed = false;
+    return false;
+  }
+
+  // Put the predicated instructions from the 'true' block before the
+  // instructions from the 'false' block, unless the true block would clobber
+  // the predicate, in which case, do the opposite.
+  BBInfo *BBI1 = &TrueBBI;
+  BBInfo *BBI2 = &FalseBBI;
+  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+  if (TII->ReverseBranchCondition(RevCond))
+    assert(false && "Unable to reverse branch condition!");
+  SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+  SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+  // Figure out the more profitable ordering.
+  bool DoSwap = false;
+  if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+    DoSwap = true;
+  else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+    if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+      DoSwap = true;
+  }
+  if (DoSwap) {
+    std::swap(BBI1, BBI2);
+    std::swap(Cond1, Cond2);
+  }
+
+  // Remove the conditional branch from entry to the blocks.
+  BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+  // Initialize liveins to the first BB. These are potentially redefined by
+  // predicated instructions.
+  SmallSet<unsigned, 4> Redefs;
+  InitPredRedefs(BBI1->BB, Redefs, TRI);
+
+  // Remove the duplicated instructions at the beginnings of both paths.
+  MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+  MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+  MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+  MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+  // Skip dbg_value instructions
+  while (DI1 != DIE1 && DI1->isDebugValue())
+    ++DI1;
+  while (DI2 != DIE2 && DI2->isDebugValue())
+    ++DI2;
+  BBI1->NonPredSize -= NumDups1;
+  BBI2->NonPredSize -= NumDups1;
+
+  // Skip past the dups on each side separately since there may be
+  // differing dbg_value entries.
+  for (unsigned i = 0; i < NumDups1; ++DI1) {
+    if (!DI1->isDebugValue())
+      ++i;
+  }
+  while (NumDups1 != 0) {
+    ++DI2;
+    if (!DI2->isDebugValue())
+      --NumDups1;
+  }
+
+  UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
+  BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+  BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+  // Predicate the 'true' block after removing its branch.
+  BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+  DI1 = BBI1->BB->end();
+  for (unsigned i = 0; i != NumDups2; ) {
+    // NumDups2 only counted non-dbg_value instructions, so this won't
+    // run off the head of the list.
+    assert (DI1 != BBI1->BB->begin());
+    --DI1;
+    // skip dbg_value instructions
+    if (!DI1->isDebugValue())
+      ++i;
+  }
+  BBI1->BB->erase(DI1, BBI1->BB->end());
+  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
+
+  // Predicate the 'false' block.
+  BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+  DI2 = BBI2->BB->end();
+  while (NumDups2 != 0) {
+    // NumDups2 only counted non-dbg_value instructions, so this won't
+    // run off the head of the list.
+    assert (DI2 != BBI2->BB->begin());
+    --DI2;
+    // skip dbg_value instructions
+    if (!DI2->isDebugValue())
+      --NumDups2;
+  }
+  PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
+
+  // Merge the true block into the entry of the diamond.
+  MergeBlocks(BBI, *BBI1, TailBB == 0);
+  MergeBlocks(BBI, *BBI2, TailBB == 0);
+
+  // If the if-converted block falls through or unconditionally branches into
+  // the tail block, and the tail block does not have other predecessors, then
+  // fold the tail block in as well. Otherwise, unless it falls through to the
+  // tail, add a unconditional branch to it.
+  if (TailBB) {
+    BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+    bool CanMergeTail = !TailBBI.HasFallThrough;
+    // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+    // check if there are any other predecessors besides those.
+    unsigned NumPreds = TailBB->pred_size();
+    if (NumPreds > 1)
+      CanMergeTail = false;
+    else if (NumPreds == 1 && CanMergeTail) {
+      MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+      if (*PI != BBI1->BB && *PI != BBI2->BB)
+        CanMergeTail = false;
+    }
+    if (CanMergeTail) {
+      MergeBlocks(BBI, TailBBI);
+      TailBBI.IsDone = true;
+    } else {
+      BBI.BB->addSuccessor(TailBB);
+      InsertUncondBranch(BBI.BB, TailBB, TII);
+      BBI.HasFallThrough = false;
+    }
+  }
+
+  // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+  // which can happen here if TailBB is unanalyzable and is merged, so
+  // explicitly remove BBI1 and BBI2 as successors.
+  BBI.BB->removeSuccessor(BBI1->BB);
+  BBI.BB->removeSuccessor(BBI2->BB);
+  RemoveExtraEdges(BBI);
+
+  // Update block info.
+  BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+                                 MachineBasicBlock::iterator E,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 SmallSet<unsigned, 4> &Redefs) {
+  for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+    if (I->isDebugValue() || TII->isPredicated(I))
+      continue;
+    if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+      dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+      llvm_unreachable(0);
+    }
+
+    // If the predicated instruction now redefines a register as the result of
+    // if-conversion, add an implicit kill.
+    UpdatePredRedefs(I, Redefs, TRI, true);
+  }
+
+  std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+  BBI.IsAnalyzed = false;
+  BBI.NonPredSize = 0;
+
+  ++NumIfConvBBs;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+                                        SmallVectorImpl<MachineOperand> &Cond,
+                                        SmallSet<unsigned, 4> &Redefs,
+                                        bool IgnoreBr) {
+  MachineFunction &MF = *ToBBI.BB->getParent();
+
+  for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+         E = FromBBI.BB->end(); I != E; ++I) {
+    const MCInstrDesc &MCID = I->getDesc();
+    // Do not copy the end of the block branches.
+    if (IgnoreBr && MCID.isBranch())
+      break;
+
+    MachineInstr *MI = MF.CloneMachineInstr(I);
+    ToBBI.BB->insert(ToBBI.BB->end(), MI);
+    ToBBI.NonPredSize++;
+    unsigned ExtraPredCost = 0;
+    unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+    if (NumCycles > 1)
+      ToBBI.ExtraCost += NumCycles-1;
+    ToBBI.ExtraCost2 += ExtraPredCost;
+
+    if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+      if (!TII->PredicateInstruction(MI, Cond)) {
+#ifndef NDEBUG
+        dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+        llvm_unreachable(0);
+      }
+    }
+
+    // If the predicated instruction now redefines a register as the result of
+    // if-conversion, add an implicit kill.
+    UpdatePredRedefs(MI, Redefs, TRI, true);
+  }
+
+  if (!IgnoreBr) {
+    std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+                                           FromBBI.BB->succ_end());
+    MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+    MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+      MachineBasicBlock *Succ = Succs[i];
+      // Fallthrough edge can't be transferred.
+      if (Succ == FallThrough)
+        continue;
+      ToBBI.BB->addSuccessor(Succ);
+    }
+  }
+
+  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+            std::back_inserter(ToBBI.Predicate));
+  std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+  ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+  ToBBI.IsAnalyzed = false;
+
+  ++NumDupBBs;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge.  If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+  ToBBI.BB->splice(ToBBI.BB->end(),
+                   FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+  std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+                                         FromBBI.BB->succ_end());
+  MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+  MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = Succs[i];
+    // Fallthrough edge can't be transferred.
+    if (Succ == FallThrough)
+      continue;
+    FromBBI.BB->removeSuccessor(Succ);
+    if (AddEdges)
+      ToBBI.BB->addSuccessor(Succ);
+  }
+
+  // Now FromBBI always falls through to the next block!
+  if (NBB && !FromBBI.BB->isSuccessor(NBB))
+    FromBBI.BB->addSuccessor(NBB);
+
+  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+            std::back_inserter(ToBBI.Predicate));
+  FromBBI.Predicate.clear();
+
+  ToBBI.NonPredSize += FromBBI.NonPredSize;
+  ToBBI.ExtraCost += FromBBI.ExtraCost;
+  ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+  FromBBI.NonPredSize = 0;
+  FromBBI.ExtraCost = 0;
+  FromBBI.ExtraCost2 = 0;
+
+  ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+  ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+  ToBBI.IsAnalyzed = false;
+  FromBBI.IsAnalyzed = false;
+}
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 000000000000..5547f735ba5e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,1058 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "Spiller.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumSpilledRanges,   "Number of spilled live ranges");
+STATISTIC(NumSnippets,        "Number of snippets included in spills");
+STATISTIC(NumSpills,          "Number of spills inserted");
+STATISTIC(NumReloads,         "Number of reloads inserted");
+STATISTIC(NumFolded,          "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads,     "Number of folded loads");
+STATISTIC(NumRemats,          "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills after reloads");
+STATISTIC(NumHoistLocal,      "Number of locally hoisted spills");
+STATISTIC(NumHoistGlobal,     "Number of globally hoisted spills");
+STATISTIC(NumRedundantSpills, "Number of redundant spills identified");
+
+namespace {
+class InlineSpiller : public Spiller {
+  MachineFunctionPass &Pass;
+  MachineFunction &MF;
+  LiveIntervals &LIS;
+  LiveStacks &LSS;
+  AliasAnalysis *AA;
+  MachineDominatorTree &MDT;
+  MachineLoopInfo &Loops;
+  VirtRegMap &VRM;
+  MachineFrameInfo &MFI;
+  MachineRegisterInfo &MRI;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
+
+  // Variables that are valid during spill(), but used by multiple methods.
+  LiveRangeEdit *Edit;
+  LiveInterval *StackInt;
+  int StackSlot;
+  unsigned Original;
+
+  // All registers to spill to StackSlot, including the main register.
+  SmallVector<unsigned, 8> RegsToSpill;
+
+  // All COPY instructions to/from snippets.
+  // They are ignored since both operands refer to the same stack slot.
+  SmallPtrSet<MachineInstr*, 8> SnippetCopies;
+
+  // Values that failed to remat at some point.
+  SmallPtrSet<VNInfo*, 8> UsedValues;
+
+  // Information about a value that was defined by a copy from a sibling
+  // register.
+  struct SibValueInfo {
+    // True when all reaching defs were reloads: No spill is necessary.
+    bool AllDefsAreReloads;
+
+    // The preferred register to spill.
+    unsigned SpillReg;
+
+    // The value of SpillReg that should be spilled.
+    VNInfo *SpillVNI;
+
+    // A defining instruction that is not a sibling copy or a reload, or NULL.
+    // This can be used as a template for rematerialization.
+    MachineInstr *DefMI;
+
+    SibValueInfo(unsigned Reg, VNInfo *VNI)
+      : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {}
+  };
+
+  // Values in RegsToSpill defined by sibling copies.
+  typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+  SibValueMap SibValues;
+
+  // Dead defs generated during spilling.
+  SmallVector<MachineInstr*, 8> DeadDefs;
+
+  ~InlineSpiller() {}
+
+public:
+  InlineSpiller(MachineFunctionPass &pass,
+                MachineFunction &mf,
+                VirtRegMap &vrm)
+    : Pass(pass),
+      MF(mf),
+      LIS(pass.getAnalysis<LiveIntervals>()),
+      LSS(pass.getAnalysis<LiveStacks>()),
+      AA(&pass.getAnalysis<AliasAnalysis>()),
+      MDT(pass.getAnalysis<MachineDominatorTree>()),
+      Loops(pass.getAnalysis<MachineLoopInfo>()),
+      VRM(vrm),
+      MFI(*mf.getFrameInfo()),
+      MRI(mf.getRegInfo()),
+      TII(*mf.getTarget().getInstrInfo()),
+      TRI(*mf.getTarget().getRegisterInfo()) {}
+
+  void spill(LiveRangeEdit &);
+
+private:
+  bool isSnippet(const LiveInterval &SnipLI);
+  void collectRegsToSpill();
+
+  bool isRegToSpill(unsigned Reg) {
+    return std::find(RegsToSpill.begin(),
+                     RegsToSpill.end(), Reg) != RegsToSpill.end();
+  }
+
+  bool isSibling(unsigned Reg);
+  MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+  void analyzeSiblingValues();
+
+  bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
+  void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+  void markValueUsed(LiveInterval*, VNInfo*);
+  bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI);
+  void reMaterializeAll();
+
+  bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
+  bool foldMemoryOperand(MachineBasicBlock::iterator MI,
+                         const SmallVectorImpl<unsigned> &Ops,
+                         MachineInstr *LoadMI = 0);
+  void insertReload(LiveInterval &NewLI, SlotIndex,
+                    MachineBasicBlock::iterator MI);
+  void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+                   SlotIndex, MachineBasicBlock::iterator MI);
+
+  void spillAroundUses(unsigned Reg);
+  void spillAll();
+};
+}
+
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+                             MachineFunction &mf,
+                             VirtRegMap &vrm) {
+  return new InlineSpiller(pass, mf, vrm);
+}
+}
+
+//===----------------------------------------------------------------------===//
+//                                Snippets
+//===----------------------------------------------------------------------===//
+
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
+
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
+  if (!MI->isFullCopy())
+    return 0;
+  if (MI->getOperand(0).getReg() == Reg)
+      return MI->getOperand(1).getReg();
+  if (MI->getOperand(1).getReg() == Reg)
+      return MI->getOperand(0).getReg();
+  return 0;
+}
+
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+  unsigned Reg = Edit->getReg();
+
+  // A snippet is a tiny live range with only a single instruction using it
+  // besides copies to/from Reg or spills/fills. We accept:
+  //
+  //   %snip = COPY %Reg / FILL fi#
+  //   %snip = USE %snip
+  //   %Reg = COPY %snip / SPILL %snip, fi#
+  //
+  if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+    return false;
+
+  MachineInstr *UseMI = 0;
+
+  // Check that all uses satisfy our criteria.
+  for (MachineRegisterInfo::reg_nodbg_iterator
+         RI = MRI.reg_nodbg_begin(SnipLI.reg);
+       MachineInstr *MI = RI.skipInstruction();) {
+
+    // Allow copies to/from Reg.
+    if (isFullCopyOf(MI, Reg))
+      continue;
+
+    // Allow stack slot loads.
+    int FI;
+    if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+      continue;
+
+    // Allow stack slot stores.
+    if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+      continue;
+
+    // Allow a single additional instruction.
+    if (UseMI && MI != UseMI)
+      return false;
+    UseMI = MI;
+  }
+  return true;
+}
+
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+  unsigned Reg = Edit->getReg();
+
+  // Main register always spills.
+  RegsToSpill.assign(1, Reg);
+  SnippetCopies.clear();
+
+  // Snippets all have the same original, so there can't be any for an original
+  // register.
+  if (Original == Reg)
+    return;
+
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+       MachineInstr *MI = RI.skipInstruction();) {
+    unsigned SnipReg = isFullCopyOf(MI, Reg);
+    if (!isSibling(SnipReg))
+      continue;
+    LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+    if (!isSnippet(SnipLI))
+      continue;
+    SnippetCopies.insert(MI);
+    if (isRegToSpill(SnipReg))
+      continue;
+    RegsToSpill.push_back(SnipReg);
+    DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+    ++NumSnippets;
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
+bool InlineSpiller::isSibling(unsigned Reg) {
+  return TargetRegisterInfo::isVirtualRegister(Reg) &&
+           VRM.getOriginal(Reg) == Original;
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
+///
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
+///
+/// Return a defining instruction that may be a candidate for rematerialization.
+///
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+                                               VNInfo *OrigVNI) {
+  DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+               << UseVNI->id << '@' << UseVNI->def << '\n');
+  SmallPtrSet<VNInfo*, 8> Visited;
+  SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+  WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+  // Best spill candidate seen so far. This must dominate UseVNI.
+  SibValueInfo SVI(UseReg, UseVNI);
+  MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def);
+  MachineBasicBlock *SpillMBB = UseMBB;
+  unsigned SpillDepth = Loops.getLoopDepth(SpillMBB);
+  bool SeenOrigPHI = false; // Original PHI met.
+
+  do {
+    unsigned Reg;
+    VNInfo *VNI;
+    tie(Reg, VNI) = WorkList.pop_back_val();
+    if (!Visited.insert(VNI))
+      continue;
+
+    // Is this value a better spill candidate?
+    if (!isRegToSpill(Reg)) {
+      MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+      if (MBB == SpillMBB) {
+        // This is an alternative def earlier in the same MBB.
+        // Hoist the spill as far as possible in SpillMBB. This can ease
+        // register pressure:
+        //
+        //   x = def
+        //   y = use x
+        //   s = copy x
+        //
+        // Hoisting the spill of s to immediately after the def removes the
+        // interference between x and y:
+        //
+        //   x = def
+        //   spill x
+        //   y = use x<kill>
+        //
+        if (VNI->def < SVI.SpillVNI->def) {
+          DEBUG(dbgs() << "  hoist in BB#" << MBB->getNumber() << ": "
+                       << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+                       << '\n');
+          SVI.SpillReg = Reg;
+          SVI.SpillVNI = VNI;
+        }
+      } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
+        // This is a valid spill location dominating UseVNI.
+        // Prefer to spill at a smaller loop depth.
+        unsigned Depth = Loops.getLoopDepth(MBB);
+        if (Depth < SpillDepth) {
+          DEBUG(dbgs() << "  spill depth " << Depth << ": " << PrintReg(Reg)
+                       << ':' << VNI->id << '@' << VNI->def << '\n');
+          SVI.SpillReg = Reg;
+          SVI.SpillVNI = VNI;
+          SpillMBB = MBB;
+          SpillDepth = Depth;
+        }
+      }
+    }
+
+    // Trace through PHI-defs created by live range splitting.
+    if (VNI->isPHIDef()) {
+      if (VNI->def == OrigVNI->def) {
+        DEBUG(dbgs() << "  orig phi value " << PrintReg(Reg) << ':'
+                     << VNI->id << '@' << VNI->def << '\n');
+        SeenOrigPHI = true;
+        continue;
+      }
+      // Get values live-out of predecessors.
+      LiveInterval &LI = LIS.getInterval(Reg);
+      MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+        VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+        if (PVNI)
+          WorkList.push_back(std::make_pair(Reg, PVNI));
+      }
+      continue;
+    }
+
+    MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+    assert(MI && "Missing def");
+
+    // Trace through sibling copies.
+    if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+      if (isSibling(SrcReg)) {
+        LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+        VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex());
+        assert(SrcVNI && "Copy from non-existing value");
+        DEBUG(dbgs() << "  copy of " << PrintReg(SrcReg) << ':'
+                     << SrcVNI->id << '@' << SrcVNI->def << '\n');
+        WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+        continue;
+      }
+    }
+
+    // Track reachable reloads.
+    int FI;
+    if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+      DEBUG(dbgs() << "  reload " << PrintReg(Reg) << ':'
+                   << VNI->id << "@" << VNI->def << '\n');
+      SVI.AllDefsAreReloads = true;
+      continue;
+    }
+
+    // We have an 'original' def. Don't record trivial cases.
+    if (VNI == UseVNI) {
+      DEBUG(dbgs() << "Not a sibling copy.\n");
+      return MI;
+    }
+
+    // Potential remat candidate.
+    DEBUG(dbgs() << "  def " << PrintReg(Reg) << ':'
+                 << VNI->id << '@' << VNI->def << '\t' << *MI);
+    SVI.DefMI = MI;
+  } while (!WorkList.empty());
+
+  if (SeenOrigPHI || SVI.DefMI)
+    SVI.AllDefsAreReloads = false;
+
+  DEBUG({
+    if (SVI.AllDefsAreReloads)
+      dbgs() << "All defs are reloads.\n";
+    else
+      dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':'
+             << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n';
+  });
+  SibValues.insert(std::make_pair(UseVNI, SVI));
+  return SVI.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
+///
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+  SibValues.clear();
+
+  // No siblings at all?
+  if (Edit->getReg() == Original)
+    return;
+
+  LiveInterval &OrigLI = LIS.getInterval(Original);
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    LiveInterval &LI = LIS.getInterval(Reg);
+    for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+         VE = LI.vni_end(); VI != VE; ++VI) {
+      VNInfo *VNI = *VI;
+      if (VNI->isUnused())
+        continue;
+      MachineInstr *DefMI = 0;
+      // Check possible sibling copies.
+      if (VNI->isPHIDef() || VNI->getCopy()) {
+        VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+        assert(OrigVNI && "Def outside original live range");
+        if (OrigVNI->def != VNI->def)
+          DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+      }
+      if (!DefMI && !VNI->isPHIDef())
+        DefMI = LIS.getInstructionFromIndex(VNI->def);
+      if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+        DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+                     << VNI->def << " may remat from " << *DefMI);
+      }
+    }
+  }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
+  SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+  VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
+  assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+  SibValueMap::iterator I = SibValues.find(VNI);
+  if (I == SibValues.end())
+    return false;
+
+  const SibValueInfo &SVI = I->second;
+
+  // Let the normal folding code deal with the boring case.
+  if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+    return false;
+
+  // SpillReg may have been deleted by remat and DCE.
+  if (!LIS.hasInterval(SVI.SpillReg)) {
+    DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
+    return false;
+  }
+
+  LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+  if (!SibLI.containsValue(SVI.SpillVNI)) {
+    DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
+    return false;
+  }
+
+  // Conservatively extend the stack slot range to the range of the original
+  // value. We may be able to do better with stack slot coloring by being more
+  // careful here.
+  assert(StackInt && "No stack slot assigned yet.");
+  LiveInterval &OrigLI = LIS.getInterval(Original);
+  VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+  StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+  DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+               << *StackInt << '\n');
+
+  // Already spilled everywhere.
+  if (SVI.AllDefsAreReloads) {
+    ++NumOmitReloadSpill;
+    return true;
+  }
+  // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+  // any later spills of the same value.
+  eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+
+  MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+  MachineBasicBlock::iterator MII;
+  if (SVI.SpillVNI->isPHIDef())
+    MII = MBB->SkipPHIsAndLabels(MBB->begin());
+  else {
+    MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+    assert(DefMI && "Defining instruction disappeared");
+    MII = DefMI;
+    ++MII;
+  }
+  // Insert spill without kill flag immediately after def.
+  TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+                          MRI.getRegClass(SVI.SpillReg), &TRI);
+  --MII; // Point to store instruction.
+  LIS.InsertMachineInstrInMaps(MII);
+  VRM.addSpillSlotUse(StackSlot, MII);
+  DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+
+  if (MBB == CopyMI->getParent())
+    ++NumHoistLocal;
+  else
+    ++NumHoistGlobal;
+  return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+  assert(VNI && "Missing value");
+  SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+  WorkList.push_back(std::make_pair(&SLI, VNI));
+  assert(StackInt && "No stack slot assigned yet.");
+
+  do {
+    LiveInterval *LI;
+    tie(LI, VNI) = WorkList.pop_back_val();
+    unsigned Reg = LI->reg;
+    DEBUG(dbgs() << "Checking redundant spills for "
+                 << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+
+    // Regs to spill are taken care of.
+    if (isRegToSpill(Reg))
+      continue;
+
+    // Add all of VNI's live range to StackInt.
+    StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+    DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+    // Find all spills and copies of VNI.
+    for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
+         MachineInstr *MI = UI.skipInstruction();) {
+      if (!MI->isCopy() && !MI->getDesc().mayStore())
+        continue;
+      SlotIndex Idx = LIS.getInstructionIndex(MI);
+      if (LI->getVNInfoAt(Idx) != VNI)
+        continue;
+
+      // Follow sibling copies down the dominator tree.
+      if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+        if (isSibling(DstReg)) {
+           LiveInterval &DstLI = LIS.getInterval(DstReg);
+           VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+           assert(DstVNI && "Missing defined value");
+           assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+           WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+        }
+        continue;
+      }
+
+      // Erase spills.
+      int FI;
+      if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+        DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI);
+        // eliminateDeadDefs won't normally remove stores, so switch opcode.
+        MI->setDesc(TII.get(TargetOpcode::KILL));
+        DeadDefs.push_back(MI);
+        ++NumRedundantSpills;
+      }
+    }
+  } while (!WorkList.empty());
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+  SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+  WorkList.push_back(std::make_pair(LI, VNI));
+  do {
+    tie(LI, VNI) = WorkList.pop_back_val();
+    if (!UsedValues.insert(VNI))
+      continue;
+
+    if (VNI->isPHIDef()) {
+      MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+        VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+        if (PVNI)
+          WorkList.push_back(std::make_pair(LI, PVNI));
+      }
+      continue;
+    }
+
+    // Follow snippet copies.
+    MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+    if (!SnippetCopies.count(MI))
+      continue;
+    LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+    assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+    VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+    assert(SnipVNI && "Snippet undefined before copy");
+    WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+  } while (!WorkList.empty());
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
+                                     MachineBasicBlock::iterator MI) {
+  SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+  VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx);
+
+  if (!ParentVNI) {
+    DEBUG(dbgs() << "\tadding <undef> flags: ");
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
+        MO.setIsUndef();
+    }
+    DEBUG(dbgs() << UseIdx << '\t' << *MI);
+    return true;
+  }
+
+  if (SnippetCopies.count(MI))
+    return false;
+
+  // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+  LiveRangeEdit::Remat RM(ParentVNI);
+  SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+  if (SibI != SibValues.end())
+    RM.OrigMI = SibI->second.DefMI;
+  if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+    markValueUsed(&VirtReg, ParentVNI);
+    DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+    return false;
+  }
+
+  // If the instruction also writes VirtReg.reg, it had better not require the
+  // same register for uses and defs.
+  bool Reads, Writes;
+  SmallVector<unsigned, 8> Ops;
+  tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
+  if (Writes) {
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(Ops[i]);
+      if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
+        markValueUsed(&VirtReg, ParentVNI);
+        DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+        return false;
+      }
+    }
+  }
+
+  // Before rematerializing into a register for a single instruction, try to
+  // fold a load into the instruction. That avoids allocating a new register.
+  if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+      foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+    Edit->markRematerialized(RM.ParentVNI);
+    ++NumFoldedLoads;
+    return true;
+  }
+
+  // Alocate a new register for the remat.
+  LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
+  NewLI.markNotSpillable();
+
+  // Finally we can rematerialize OrigMI before MI.
+  SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+                                           LIS, TII, TRI);
+  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
+               << *LIS.getInstructionFromIndex(DefIdx));
+
+  // Replace operands
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(Ops[i]);
+    if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+      MO.setReg(NewLI.reg);
+      MO.setIsKill();
+    }
+  }
+  DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI);
+
+  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+  DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+  ++NumRemats;
+  return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+  // analyzeSiblingValues has already tested all relevant defining instructions.
+  if (!Edit->anyRematerializable(LIS, TII, AA))
+    return;
+
+  UsedValues.clear();
+
+  // Try to remat before all uses of snippets.
+  bool anyRemat = false;
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    LiveInterval &LI = LIS.getInterval(Reg);
+    for (MachineRegisterInfo::use_nodbg_iterator
+         RI = MRI.use_nodbg_begin(Reg);
+         MachineInstr *MI = RI.skipInstruction();)
+      anyRemat |= reMaterializeFor(LI, MI);
+  }
+  if (!anyRemat)
+    return;
+
+  // Remove any values that were completely rematted.
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    LiveInterval &LI = LIS.getInterval(Reg);
+    for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+         I != E; ++I) {
+      VNInfo *VNI = *I;
+      if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
+        continue;
+      MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+      MI->addRegisterDead(Reg, &TRI);
+      if (!MI->allDefsAreDead())
+        continue;
+      DEBUG(dbgs() << "All defs dead: " << *MI);
+      DeadDefs.push_back(MI);
+    }
+  }
+
+  // Eliminate dead code after remat. Note that some snippet copies may be
+  // deleted here.
+  if (DeadDefs.empty())
+    return;
+  DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+  Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+
+  // Get rid of deleted and empty intervals.
+  for (unsigned i = RegsToSpill.size(); i != 0; --i) {
+    unsigned Reg = RegsToSpill[i-1];
+    if (!LIS.hasInterval(Reg)) {
+      RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+      continue;
+    }
+    LiveInterval &LI = LIS.getInterval(Reg);
+    if (!LI.empty())
+      continue;
+    Edit->eraseVirtReg(Reg, LIS);
+    RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+  }
+  DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                 Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
+  int FI = 0;
+  unsigned InstrReg;
+  if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) &&
+      !(InstrReg = TII.isStoreToStackSlot(MI, FI)))
+    return false;
+
+  // We have a stack access. Is it the right register and slot?
+  if (InstrReg != Reg || FI != StackSlot)
+    return false;
+
+  DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+  LIS.RemoveMachineInstrFromMaps(MI);
+  MI->eraseFromParent();
+  return true;
+}
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
+/// @param MI     Instruction using or defining the current register.
+/// @param Ops    Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return       True on success, and MI will be erased.
+bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) {
+  // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+  // operands.
+  SmallVector<unsigned, 8> FoldOps;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    unsigned Idx = Ops[i];
+    MachineOperand &MO = MI->getOperand(Idx);
+    if (MO.isImplicit())
+      continue;
+    // FIXME: Teach targets to deal with subregs.
+    if (MO.getSubReg())
+      return false;
+    // We cannot fold a load instruction into a def.
+    if (LoadMI && MO.isDef())
+      return false;
+    // Tied use operands should not be passed to foldMemoryOperand.
+    if (!MI->isRegTiedToDefOperand(Idx))
+      FoldOps.push_back(Idx);
+  }
+
+  MachineInstr *FoldMI =
+                LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
+                       : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
+  if (!FoldMI)
+    return false;
+  LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
+  if (!LoadMI)
+    VRM.addSpillSlotUse(StackSlot, FoldMI);
+  MI->eraseFromParent();
+  DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+  ++NumFolded;
+  return true;
+}
+
+/// insertReload - Insert a reload of NewLI.reg before MI.
+void InlineSpiller::insertReload(LiveInterval &NewLI,
+                                 SlotIndex Idx,
+                                 MachineBasicBlock::iterator MI) {
+  MachineBasicBlock &MBB = *MI->getParent();
+  TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
+                           MRI.getRegClass(NewLI.reg), &TRI);
+  --MI; // Point to load instruction.
+  SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+  VRM.addSpillSlotUse(StackSlot, MI);
+  DEBUG(dbgs() << "\treload:  " << LoadIdx << '\t' << *MI);
+  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
+                                       LIS.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+  ++NumReloads;
+}
+
+/// insertSpill - Insert a spill of NewLI.reg after MI.
+void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+                                SlotIndex Idx, MachineBasicBlock::iterator MI) {
+  MachineBasicBlock &MBB = *MI->getParent();
+  TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
+                          MRI.getRegClass(NewLI.reg), &TRI);
+  --MI; // Point to store instruction.
+  SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+  VRM.addSpillSlotUse(StackSlot, MI);
+  DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
+  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+  ++NumSpills;
+}
+
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(unsigned Reg) {
+  DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+  LiveInterval &OldLI = LIS.getInterval(Reg);
+
+  // Iterate over instructions using Reg.
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+       MachineInstr *MI = RI.skipInstruction();) {
+
+    // Debug values are not allowed to affect codegen.
+    if (MI->isDebugValue()) {
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      uint64_t Offset = MI->getOperand(1).getImm();
+      const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+      DebugLoc DL = MI->getDebugLoc();
+      if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot,
+                                                           Offset, MDPtr, DL)) {
+        DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+        MachineBasicBlock *MBB = MI->getParent();
+        MBB->insert(MBB->erase(MI), NewDV);
+      } else {
+        DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+        MI->eraseFromParent();
+      }
+      continue;
+    }
+
+    // Ignore copies to/from snippets. We'll delete them.
+    if (SnippetCopies.count(MI))
+      continue;
+
+    // Stack slot accesses may coalesce away.
+    if (coalesceStackAccess(MI, Reg))
+      continue;
+
+    // Analyze instruction.
+    bool Reads, Writes;
+    SmallVector<unsigned, 8> Ops;
+    tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+
+    // Find the slot index where this instruction reads and writes OldLI.
+    // This is usually the def slot, except for tied early clobbers.
+    SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+      if (SlotIndex::isSameInstr(Idx, VNI->def))
+        Idx = VNI->def;
+
+    // Check for a sibling copy.
+    unsigned SibReg = isFullCopyOf(MI, Reg);
+    if (SibReg && isSibling(SibReg)) {
+      // This may actually be a copy between snippets.
+      if (isRegToSpill(SibReg)) {
+        DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+        SnippetCopies.insert(MI);
+        continue;
+      }
+      if (Writes) {
+        // Hoist the spill of a sib-reg copy.
+        if (hoistSpill(OldLI, MI)) {
+          // This COPY is now dead, the value is already in the stack slot.
+          MI->getOperand(0).setIsDead();
+          DeadDefs.push_back(MI);
+          continue;
+        }
+      } else {
+        // This is a reload for a sib-reg copy. Drop spills downstream.
+        LiveInterval &SibLI = LIS.getInterval(SibReg);
+        eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+        // The COPY will fold to a reload below.
+      }
+    }
+
+    // Attempt to fold memory ops.
+    if (foldMemoryOperand(MI, Ops))
+      continue;
+
+    // Allocate interval around instruction.
+    // FIXME: Infer regclass from instruction alone.
+    LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
+    NewLI.markNotSpillable();
+
+    if (Reads)
+      insertReload(NewLI, Idx, MI);
+
+    // Rewrite instruction operands.
+    bool hasLiveDef = false;
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(Ops[i]);
+      MO.setReg(NewLI.reg);
+      if (MO.isUse()) {
+        if (!MI->isRegTiedToDefOperand(Ops[i]))
+          MO.setIsKill();
+      } else {
+        if (!MO.isDead())
+          hasLiveDef = true;
+      }
+    }
+    DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
+
+    // FIXME: Use a second vreg if instruction has no tied ops.
+    if (Writes && hasLiveDef)
+      insertSpill(NewLI, OldLI, Idx, MI);
+
+    DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+  }
+}
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+  // Update LiveStacks now that we are committed to spilling.
+  if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+    StackSlot = VRM.assignVirt2StackSlot(Original);
+    StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+    StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+  } else
+    StackInt = &LSS.getInterval(StackSlot);
+
+  if (Original != Edit->getReg())
+    VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+  assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+    StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]),
+                                   StackInt->getValNumInfo(0));
+  DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+  // Spill around uses of all RegsToSpill.
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+    spillAroundUses(RegsToSpill[i]);
+
+  // Hoisted spills may cause dead code.
+  if (!DeadDefs.empty()) {
+    DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+    Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+  }
+
+  // Finally delete the SnippetCopies.
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]);
+         MachineInstr *MI = RI.skipInstruction();) {
+      assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+      // FIXME: Do this with a LiveRangeEdit callback.
+      VRM.RemoveMachineInstrFromMaps(MI);
+      LIS.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+    }
+  }
+
+  // Delete all spilled registers.
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+    Edit->eraseVirtReg(RegsToSpill[i], LIS);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+  ++NumSpilledRanges;
+  Edit = &edit;
+  assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+         && "Trying to spill a stack slot.");
+  // Share a stack slot among all descendants of Original.
+  Original = VRM.getOriginal(edit.getReg());
+  StackSlot = VRM.getStackSlot(Original);
+  StackInt = 0;
+
+  DEBUG(dbgs() << "Inline spilling "
+               << MRI.getRegClass(edit.getReg())->getName()
+               << ':' << edit.getParent() << "\nFrom original "
+               << LIS.getInterval(Original) << '\n');
+  assert(edit.getParent().isSpillable() &&
+         "Attempting to spill already spilled value.");
+  assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+  collectRegsToSpill();
+  analyzeSiblingValues();
+  reMaterializeAll();
+
+  // Remat may handle everything.
+  if (!RegsToSpill.empty())
+    spillAll();
+
+  Edit->calculateRegClassAndHint(MF, LIS, Loops);
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 000000000000..a09bb39f8336
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,166 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "InterferenceCache.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void InterferenceCache::init(MachineFunction *mf,
+                             LiveIntervalUnion *liuarray,
+                             SlotIndexes *indexes,
+                            const TargetRegisterInfo *tri) {
+  MF = mf;
+  LIUArray = liuarray;
+  TRI = tri;
+  PhysRegEntries.assign(TRI->getNumRegs(), 0);
+  for (unsigned i = 0; i != CacheEntries; ++i)
+    Entries[i].clear(mf, indexes);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
+  unsigned E = PhysRegEntries[PhysReg];
+  if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+    if (!Entries[E].valid(LIUArray, TRI))
+      Entries[E].revalidate();
+    return &Entries[E];
+  }
+  // No valid entry exists, pick the next round-robin entry.
+  E = RoundRobin;
+  if (++RoundRobin == CacheEntries)
+    RoundRobin = 0;
+  for (unsigned i = 0; i != CacheEntries; ++i) {
+    // Skip entries that are in use.
+    if (Entries[E].hasRefs()) {
+      if (++E == CacheEntries)
+        E = 0;
+      continue;
+    }
+    Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+    PhysRegEntries[PhysReg] = E;
+    return &Entries[E];
+  }
+  llvm_unreachable("Ran out of interference cache entries.");
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate() {
+  // Invalidate all block entries.
+  ++Tag;
+  // Invalidate all iterators.
+  PrevPos = SlotIndex();
+  for (unsigned i = 0, e = Aliases.size(); i != e; ++i)
+    Aliases[i].second = Aliases[i].first->getTag();
+}
+
+void InterferenceCache::Entry::reset(unsigned physReg,
+                                     LiveIntervalUnion *LIUArray,
+                                     const TargetRegisterInfo *TRI,
+                                     const MachineFunction *MF) {
+  assert(!hasRefs() && "Cannot reset cache entry with references");
+  // LIU's changed, invalidate cache.
+  ++Tag;
+  PhysReg = physReg;
+  Blocks.resize(MF->getNumBlockIDs());
+  Aliases.clear();
+  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+    LiveIntervalUnion *LIU = LIUArray + *AS;
+    Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
+  }
+
+  // Reset iterators.
+  PrevPos = SlotIndex();
+  unsigned e = Aliases.size();
+  Iters.resize(e);
+  for (unsigned i = 0; i != e; ++i)
+    Iters[i].setMap(Aliases[i].first->getMap());
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+                                     const TargetRegisterInfo *TRI) {
+  unsigned i = 0, e = Aliases.size();
+  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+    LiveIntervalUnion *LIU = LIUArray + *AS;
+    if (i == e ||  Aliases[i].first != LIU)
+      return false;
+    if (LIU->changedSince(Aliases[i].second))
+      return false;
+  }
+  return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+  // Use advanceTo only when possible.
+  if (PrevPos != Start) {
+    if (!PrevPos.isValid() || Start < PrevPos)
+      for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+        Iters[i].find(Start);
+    else
+      for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+        Iters[i].advanceTo(Start);
+    PrevPos = Start;
+  }
+
+  MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+  BlockInterference *BI = &Blocks[MBBNum];
+  for (;;) {
+    BI->Tag = Tag;
+    BI->First = BI->Last = SlotIndex();
+
+    // Check for first interference.
+    for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+      Iter &I = Iters[i];
+      if (!I.valid())
+        continue;
+      SlotIndex StartI = I.start();
+      if (StartI >= Stop)
+        continue;
+      if (!BI->First.isValid() || StartI < BI->First)
+        BI->First = StartI;
+    }
+
+    PrevPos = Stop;
+    if (BI->First.isValid())
+      break;
+
+    // No interference in this block? Go ahead and precompute the next block.
+    if (++MFI == MF->end())
+      return;
+    MBBNum = MFI->getNumber();
+    BI = &Blocks[MBBNum];
+    if (BI->Tag == Tag)
+      return;
+    tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+  }
+
+  // Check for last interference in block.
+  for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+    Iter &I = Iters[i];
+    if (!I.valid() || I.start() >= Stop)
+      continue;
+    I.advanceTo(Stop);
+    bool Backup = !I.valid() || I.start() >= Stop;
+    if (Backup)
+      --I;
+    SlotIndex StopI = I.stop();
+    if (!BI->Last.isValid() || StopI > BI->Last)
+      BI->Last = StopI;
+    if (Backup)
+      ++I;
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 000000000000..7f0a27a41baa
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,204 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INTERFERENCECACHE
+#define LLVM_CODEGEN_INTERFERENCECACHE
+
+#include "LiveIntervalUnion.h"
+
+namespace llvm {
+
+class InterferenceCache {
+  const TargetRegisterInfo *TRI;
+  LiveIntervalUnion *LIUArray;
+  SlotIndexes *Indexes;
+  MachineFunction *MF;
+
+  /// BlockInterference - information about the interference in a single basic
+  /// block.
+  struct BlockInterference {
+    BlockInterference() : Tag(0) {}
+    unsigned Tag;
+    SlotIndex First;
+    SlotIndex Last;
+  };
+
+  /// Entry - A cache entry containing interference information for all aliases
+  /// of PhysReg in all basic blocks.
+  class Entry {
+    /// PhysReg - The register currently represented.
+    unsigned PhysReg;
+
+    /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+    /// change.
+    unsigned Tag;
+
+    /// RefCount - The total number of Cursor instances referring to this Entry.
+    unsigned RefCount;
+
+    /// MF - The current function.
+    MachineFunction *MF;
+
+    /// Indexes - Mapping block numbers to SlotIndex ranges.
+    SlotIndexes *Indexes;
+
+    /// PrevPos - The previous position the iterators were moved to.
+    SlotIndex PrevPos;
+
+    /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of
+    /// PhysReg.
+    SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases;
+
+    typedef LiveIntervalUnion::SegmentIter Iter;
+
+    /// Iters - an iterator for each alias
+    SmallVector<Iter, 8> Iters;
+
+    /// Blocks - Interference for each block in the function.
+    SmallVector<BlockInterference, 8> Blocks;
+
+    /// update - Recompute Blocks[MBBNum]
+    void update(unsigned MBBNum);
+
+  public:
+    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
+
+    void clear(MachineFunction *mf, SlotIndexes *indexes) {
+      assert(!hasRefs() && "Cannot clear cache entry with references");
+      PhysReg = 0;
+      MF = mf;
+      Indexes = indexes;
+    }
+
+    unsigned getPhysReg() const { return PhysReg; }
+
+    void addRef(int Delta) { RefCount += Delta; }
+
+    bool hasRefs() const { return RefCount > 0; }
+
+    void revalidate();
+
+    /// valid - Return true if this is a valid entry for physReg.
+    bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+    /// reset - Initialize entry to represent physReg's aliases.
+    void reset(unsigned physReg,
+               LiveIntervalUnion *LIUArray,
+               const TargetRegisterInfo *TRI,
+               const MachineFunction *MF);
+
+    /// get - Return an up to date BlockInterference.
+    BlockInterference *get(unsigned MBBNum) {
+      if (Blocks[MBBNum].Tag != Tag)
+        update(MBBNum);
+      return &Blocks[MBBNum];
+    }
+  };
+
+  // We don't keep a cache entry for every physical register, that would use too
+  // much memory. Instead, a fixed number of cache entries are used in a round-
+  // robin manner.
+  enum { CacheEntries = 32 };
+
+  // Point to an entry for each physreg. The entry pointed to may not be up to
+  // date, and it may have been reused for a different physreg.
+  SmallVector<unsigned char, 2> PhysRegEntries;
+
+  // Next round-robin entry to be picked.
+  unsigned RoundRobin;
+
+  // The actual cache entries.
+  Entry Entries[CacheEntries];
+
+  // get - Get a valid entry for PhysReg.
+  Entry *get(unsigned PhysReg);
+
+public:
+  InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+
+  /// init - Prepare cache for a new function.
+  void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+            const TargetRegisterInfo *);
+
+  /// getMaxCursors - Return the maximum number of concurrent cursors that can
+  /// be supported.
+  unsigned getMaxCursors() const { return CacheEntries; }
+
+  /// Cursor - The primary query interface for the block interference cache.
+  class Cursor {
+    Entry *CacheEntry;
+    BlockInterference *Current;
+
+    void setEntry(Entry *E) {
+      Current = 0;
+      // Update reference counts. Nothing happens when RefCount reaches 0, so
+      // we don't have to check for E == CacheEntry etc.
+      if (CacheEntry)
+        CacheEntry->addRef(-1);
+      CacheEntry = E;
+      if (CacheEntry)
+        CacheEntry->addRef(+1);
+    }
+
+  public:
+    /// Cursor - Create a dangling cursor.
+    Cursor() : CacheEntry(0), Current(0) {}
+    ~Cursor() { setEntry(0); }
+
+    Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+      setEntry(O.CacheEntry);
+    }
+
+    Cursor &operator=(const Cursor &O) {
+      setEntry(O.CacheEntry);
+      return *this;
+    }
+
+    /// setPhysReg - Point this cursor to PhysReg's interference.
+    void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+      // Release reference before getting a new one. That guarantees we can
+      // actually have CacheEntries live cursors.
+      setEntry(0);
+      if (PhysReg)
+        setEntry(Cache.get(PhysReg));
+    }
+
+    /// moveTo - Move cursor to basic block MBBNum.
+    void moveToBlock(unsigned MBBNum) {
+      Current = CacheEntry->get(MBBNum);
+    }
+
+    /// hasInterference - Return true if the current block has any interference.
+    bool hasInterference() {
+      return Current->First.isValid();
+    }
+
+    /// first - Return the starting index of the first interfering range in the
+    /// current block.
+    SlotIndex first() {
+      return Current->First;
+    }
+
+    /// last - Return the ending index of the last interfering range in the
+    /// current block.
+    SlotIndex last() {
+      return Current->Last;
+    }
+  };
+
+  friend class Cursor;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 000000000000..611886ff16a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,570 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+                                 ArgIt ArgBegin, ArgIt ArgEnd,
+                                 const Type *RetTy) {
+  // Insert a correctly-typed definition now.
+  std::vector<Type *> ParamTys;
+  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+    ParamTys.push_back(I->getType());
+  M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+                                    const char *FName,
+                                    const char *DName, const char *LDName) {
+  // Insert definitions for all the floating point types.
+  switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+  case Type::FloatTyID:
+    EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+                         Type::getFloatTy(M.getContext()));
+    break;
+  case Type::DoubleTyID:
+    EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+                         Type::getDoubleTy(M.getContext()));
+    break;
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+    EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+                         Fn->arg_begin()->getType());
+    break;
+  }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function.  This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+                                 ArgIt ArgBegin, ArgIt ArgEnd,
+                                 const Type *RetTy) {
+  // If we haven't already looked up this function, check to see if the
+  // program already contains a function with this name.
+  Module *M = CI->getParent()->getParent()->getParent();
+  // Get or insert the definition now.
+  std::vector<Type *> ParamTys;
+  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+    ParamTys.push_back((*I)->getType());
+  Constant* FCache = M->getOrInsertFunction(NewFn,
+                                  FunctionType::get(RetTy, ParamTys, false));
+
+  IRBuilder<> Builder(CI->getParent(), CI);
+  SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+  CallInst *NewCI = Builder.CreateCall(FCache, Args);
+  NewCI->setName(CI->getName());
+  if (!CI->use_empty())
+    CI->replaceAllUsesWith(NewCI);
+  return NewCI;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+  LLVMContext &Context = M.getContext();
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (I->isDeclaration() && !I->use_empty())
+      switch (I->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::setjmp:
+        EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+                             Type::getInt32Ty(M.getContext()));
+        break;
+      case Intrinsic::longjmp:
+        EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+                             Type::getVoidTy(M.getContext()));
+        break;
+      case Intrinsic::siglongjmp:
+        EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+                             Type::getVoidTy(M.getContext()));
+        break;
+      case Intrinsic::memcpy:
+        M.getOrInsertFunction("memcpy",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
+        break;
+      case Intrinsic::memmove:
+        M.getOrInsertFunction("memmove",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
+        break;
+      case Intrinsic::memset:
+        M.getOrInsertFunction("memset",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt32Ty(M.getContext()), 
+                              TD.getIntPtrType(Context), (Type *)0);
+        break;
+      case Intrinsic::sqrt:
+        EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+        break;
+      case Intrinsic::sin:
+        EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+        break;
+      case Intrinsic::cos:
+        EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+        break;
+      case Intrinsic::pow:
+        EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+        break;
+      case Intrinsic::log:
+        EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+        break;
+      case Intrinsic::log2:
+        EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+        break;
+      case Intrinsic::log10:
+        EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+        break;
+      case Intrinsic::exp:
+        EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+        break;
+      case Intrinsic::exp2:
+        EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+        break;
+      }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+  assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  
+  IRBuilder<> Builder(IP->getParent(), IP);
+
+  switch(BitSize) {
+  default: llvm_unreachable("Unhandled type size of value to byteswap!");
+  case 16: {
+    Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+                                    "bswap.2");
+    Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+                                     "bswap.1");
+    V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+    break;
+  }
+  case 32: {
+    Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+                                    "bswap.4");
+    Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+                                    "bswap.3");
+    Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+                                     "bswap.2");
+    Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+                                     "bswap.1");
+    Tmp3 = Builder.CreateAnd(Tmp3,
+                         ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+                             "bswap.and3");
+    Tmp2 = Builder.CreateAnd(Tmp2,
+                           ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+                             "bswap.and2");
+    Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+    Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+    V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+    break;
+  }
+  case 64: {
+    Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+                                    "bswap.8");
+    Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+                                    "bswap.7");
+    Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+                                    "bswap.6");
+    Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+                                    "bswap.5");
+    Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+                                     "bswap.4");
+    Value* Tmp3 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 24),
+                                     "bswap.3");
+    Value* Tmp2 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 40),
+                                     "bswap.2");
+    Value* Tmp1 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 56),
+                                     "bswap.1");
+    Tmp7 = Builder.CreateAnd(Tmp7,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                                              0xFF000000000000ULL),
+                             "bswap.and7");
+    Tmp6 = Builder.CreateAnd(Tmp6,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                                              0xFF0000000000ULL),
+                             "bswap.and6");
+    Tmp5 = Builder.CreateAnd(Tmp5,
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00000000ULL),
+                             "bswap.and5");
+    Tmp4 = Builder.CreateAnd(Tmp4,
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF000000ULL),
+                             "bswap.and4");
+    Tmp3 = Builder.CreateAnd(Tmp3,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF0000ULL),
+                             "bswap.and3");
+    Tmp2 = Builder.CreateAnd(Tmp2,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00ULL),
+                             "bswap.and2");
+    Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+    Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+    Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+    Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+    Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+    Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+    V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+    break;
+  }
+  }
+  return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+  assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+  static const uint64_t MaskValues[6] = {
+    0x5555555555555555ULL, 0x3333333333333333ULL,
+    0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+    0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+  };
+
+  IRBuilder<> Builder(IP->getParent(), IP);
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  unsigned WordSize = (BitSize + 63) / 64;
+  Value *Count = ConstantInt::get(V->getType(), 0);
+
+  for (unsigned n = 0; n < WordSize; ++n) {
+    Value *PartValue = V;
+    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); 
+         i <<= 1, ++ct) {
+      Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+      Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+      Value *VShift = Builder.CreateLShr(PartValue,
+                                        ConstantInt::get(V->getType(), i),
+                                         "ctpop.sh");
+      Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+      PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+    }
+    Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+    if (BitSize > 64) {
+      V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+                             "ctpop.part.sh");
+      BitSize -= 64;
+    }
+  }
+
+  return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+  IRBuilder<> Builder(IP->getParent(), IP);
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  for (unsigned i = 1; i < BitSize; i <<= 1) {
+    Value *ShVal = ConstantInt::get(V->getType(), i);
+    ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+    V = Builder.CreateOr(V, ShVal, "ctlz.step");
+  }
+
+  V = Builder.CreateNot(V);
+  return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+                                       const char *Dname,
+                                       const char *LDname) {
+  CallSite CS(CI);
+  switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+  default: llvm_unreachable("Invalid type in intrinsic");
+  case Type::FloatTyID:
+    ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
+                  Type::getFloatTy(CI->getContext()));
+    break;
+  case Type::DoubleTyID:
+    ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
+                  Type::getDoubleTy(CI->getContext()));
+    break;
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+    ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+                  CI->getArgOperand(0)->getType());
+    break;
+  }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+  IRBuilder<> Builder(CI->getParent(), CI);
+  LLVMContext &Context = CI->getContext();
+
+  const Function *Callee = CI->getCalledFunction();
+  assert(Callee && "Cannot lower an indirect call!");
+
+  CallSite CS(CI);
+  switch (Callee->getIntrinsicID()) {
+  case Intrinsic::not_intrinsic:
+    report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+                      Callee->getName() + "'!");
+  default:
+    report_fatal_error("Code generator does not support intrinsic function '"+
+                      Callee->getName()+"'!");
+
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    Value *V = CI->getArgOperand(0);
+    CI->replaceAllUsesWith(V);
+    break;
+  }
+
+    // The setjmp/longjmp intrinsics should only exist in the code if it was
+    // never optimized (ie, right out of the CFE), or if it has been hacked on
+    // by the lowerinvoke pass.  In both cases, the right thing to do is to
+    // convert the call to an explicit setjmp or longjmp call.
+  case Intrinsic::setjmp: {
+    Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
+                               Type::getInt32Ty(Context));
+    if (!CI->getType()->isVoidTy())
+      CI->replaceAllUsesWith(V);
+    break;
+  }
+  case Intrinsic::sigsetjmp:
+     if (!CI->getType()->isVoidTy())
+       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+     break;
+
+  case Intrinsic::longjmp: {
+    ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
+                    Type::getVoidTy(Context));
+    break;
+  }
+
+  case Intrinsic::siglongjmp: {
+    // Insert the call to abort
+    ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), 
+                    Type::getVoidTy(Context));
+    break;
+  }
+  case Intrinsic::ctpop:
+    CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+    break;
+
+  case Intrinsic::bswap:
+    CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+    break;
+    
+  case Intrinsic::ctlz:
+    CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+    break;
+
+  case Intrinsic::cttz: {
+    // cttz(x) -> ctpop(~X & (X-1))
+    Value *Src = CI->getArgOperand(0);
+    Value *NotSrc = Builder.CreateNot(Src);
+    NotSrc->setName(Src->getName() + ".not");
+    Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+    SrcM1 = Builder.CreateSub(Src, SrcM1);
+    Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+    CI->replaceAllUsesWith(Src);
+    break;
+  }
+
+  case Intrinsic::stacksave:
+  case Intrinsic::stackrestore: {
+    if (!Warned)
+      errs() << "WARNING: this target does not support the llvm.stack"
+             << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+               "save" : "restore") << " intrinsic.\n";
+    Warned = true;
+    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    break;
+  }
+    
+  case Intrinsic::returnaddress:
+  case Intrinsic::frameaddress:
+    errs() << "WARNING: this target does not support the llvm."
+           << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+             "return" : "frame") << "address intrinsic.\n";
+    CI->replaceAllUsesWith(ConstantPointerNull::get(
+                                            cast<PointerType>(CI->getType())));
+    break;
+
+  case Intrinsic::prefetch:
+    break;    // Simply strip out prefetches on unsupported architectures
+
+  case Intrinsic::pcmarker:
+    break;    // Simply strip out pcmarker on unsupported architectures
+  case Intrinsic::readcyclecounter: {
+    errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+           << "ter intrinsic.  It is being lowered to a constant 0\n";
+    CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+    break;
+  }
+
+  case Intrinsic::dbg_declare:
+    break;    // Simply strip out debugging intrinsics
+
+  case Intrinsic::eh_exception:
+  case Intrinsic::eh_selector:
+    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    break;
+
+  case Intrinsic::eh_typeid_for:
+    // Return something different to eh_selector.
+    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+    break;
+
+  case Intrinsic::var_annotation:
+    break;   // Strip out annotate intrinsic
+    
+  case Intrinsic::memcpy: {
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+                                        /* isSigned */ false);
+    Value *Ops[3];
+    Ops[0] = CI->getArgOperand(0);
+    Ops[1] = CI->getArgOperand(1);
+    Ops[2] = Size;
+    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+    break;
+  }
+  case Intrinsic::memmove: {
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+                                        /* isSigned */ false);
+    Value *Ops[3];
+    Ops[0] = CI->getArgOperand(0);
+    Ops[1] = CI->getArgOperand(1);
+    Ops[2] = Size;
+    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+    break;
+  }
+  case Intrinsic::memset: {
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+                                        /* isSigned */ false);
+    Value *Ops[3];
+    Ops[0] = CI->getArgOperand(0);
+    // Extend the amount to i32.
+    Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+                                   Type::getInt32Ty(Context),
+                                   /* isSigned */ false);
+    Ops[2] = Size;
+    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+    break;
+  }
+  case Intrinsic::sqrt: {
+    ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+    break;
+  }
+  case Intrinsic::log: {
+    ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+    break;
+  }
+  case Intrinsic::log2: {
+    ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+    break;
+  }
+  case Intrinsic::log10: {
+    ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+    break;
+  }
+  case Intrinsic::exp: {
+    ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+    break;
+  }
+  case Intrinsic::exp2: {
+    ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+    break;
+  }
+  case Intrinsic::pow: {
+    ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+    break;
+  }
+  case Intrinsic::flt_rounds:
+     // Lower to "round to the nearest"
+     if (!CI->getType()->isVoidTy())
+       CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+     break;
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+    break;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    break;
+  }
+
+  assert(CI->use_empty() &&
+         "Lowering should have eliminated any uses of the intrinsic call!");
+  CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+  // Verify this is a simple bswap.
+  if (CI->getNumArgOperands() != 1 ||
+      CI->getType() != CI->getArgOperand(0)->getType() ||
+      !CI->getType()->isIntegerTy())
+    return false;
+
+  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty)
+    return false;
+
+  // Okay, we can do this xform, do so now.
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+
+  Value *Op = CI->getArgOperand(0);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 000000000000..f985af8ba83e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,498 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+namespace llvm {
+  bool EnableFastISel;
+}
+
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+    cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+    cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+    cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+    cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+    cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+    cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+    cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+    cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+    cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+    cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+    cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+    cl::desc("Dump garbage collector data"));
+static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
+    cl::desc("Show encoding in .s output"));
+static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
+    cl::desc("Show instruction structure in .s output"));
+static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
+    cl::desc("Enable MC API logging"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+    cl::desc("Verify generated machine code"),
+    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+           cl::init(cl::BOU_UNSET));
+
+static bool getVerboseAsm() {
+  switch (AsmVerbose) {
+  default:
+  case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
+  case cl::BOU_TRUE:  return true;
+  case cl::BOU_FALSE: return false;
+  }
+}
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+  cl::desc("Enable the \"fast\" instruction selector"));
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+                                     StringRef CPU, StringRef FS)
+  : TargetMachine(T, Triple, CPU, FS) {
+  AsmInfo = T.createMCAsmInfo(Triple);
+}
+
+// Set the default code model for the JIT for a generic target.
+// FIXME: Is small right here? or .is64Bit() ? Large : Small?
+void LLVMTargetMachine::setCodeModelForJIT() {
+  setCodeModel(CodeModel::Small);
+}
+
+// Set the default code model for static compilation for a generic target.
+void LLVMTargetMachine::setCodeModelForStatic() {
+  setCodeModel(CodeModel::Small);
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                            formatted_raw_ostream &Out,
+                                            CodeGenFileType FileType,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool DisableVerify) {
+  // Add common CodeGen passes.
+  MCContext *Context = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+    return true;
+  assert(Context != 0 && "Failed to get MCContext");
+
+  if (hasMCSaveTempLabels())
+    Context->setAllowTemporaryLabels(false);
+
+  const MCAsmInfo &MAI = *getMCAsmInfo();
+  OwningPtr<MCStreamer> AsmStreamer;
+
+  switch (FileType) {
+  default: return true;
+  case CGFT_AssemblyFile: {
+    MCInstPrinter *InstPrinter =
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+
+    // Create a code emitter if asked to show the encoding.
+    MCCodeEmitter *MCE = 0;
+    TargetAsmBackend *TAB = 0;
+    if (ShowMCEncoding) {
+      const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+      MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context);
+      TAB = getTarget().createAsmBackend(getTargetTriple());
+    }
+
+    MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+                                                  getVerboseAsm(),
+                                                  hasMCUseLoc(),
+                                                  hasMCUseCFI(),
+                                                  InstPrinter,
+                                                  MCE, TAB,
+                                                  ShowMCInst);
+    AsmStreamer.reset(S);
+    break;
+  }
+  case CGFT_ObjectFile: {
+    // Create the code emitter for the target if it exists.  If not, .o file
+    // emission fails.
+    const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI,
+                                                       *Context);
+    TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
+    if (MCE == 0 || TAB == 0)
+      return true;
+
+    AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(),
+                                                       *Context, *TAB, Out, MCE,
+                                                       hasMCRelaxAll(),
+                                                       hasMCNoExecStack()));
+    AsmStreamer.get()->InitSections();
+    break;
+  }
+  case CGFT_Null:
+    // The Null output is intended for use for performance analysis and testing,
+    // not real users.
+    AsmStreamer.reset(createNullStreamer(*Context));
+    break;
+  }
+
+  if (EnableMCLogging)
+    AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+  // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  if (Printer == 0)
+    return true;
+
+  // If successful, createAsmPrinter took ownership of AsmStreamer.
+  AsmStreamer.take();
+
+  PM.add(Printer);
+
+  // Make sure the code model is set.
+  setCodeModelForStatic();
+  PM.add(createGCInfoDeleter());
+  return false;
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted.  This uses a JITCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions.  This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+                                                   JITCodeEmitter &JCE,
+                                                   CodeGenOpt::Level OptLevel,
+                                                   bool DisableVerify) {
+  // Make sure the code model is set.
+  setCodeModelForJIT();
+
+  // Add common CodeGen passes.
+  MCContext *Ctx = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+    return true;
+
+  addCodeEmitter(PM, OptLevel, JCE);
+  PM.add(createGCInfoDeleter());
+
+  return false; // success!
+}
+
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+                                          MCContext *&Ctx,
+                                          raw_ostream &Out,
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DisableVerify) {
+  // Add common CodeGen passes.
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+    return true;
+
+  if (hasMCSaveTempLabels())
+    Ctx->setAllowTemporaryLabels(false);
+
+  // Create the code emitter for the target if it exists.  If not, .o file
+  // emission fails.
+  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+  MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx);
+  TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
+  if (MCE == 0 || TAB == 0)
+    return true;
+
+  OwningPtr<MCStreamer> AsmStreamer;
+  AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx,
+                                                     *TAB, Out, MCE,
+                                                     hasMCRelaxAll(),
+                                                     hasMCNoExecStack()));
+  AsmStreamer.get()->InitSections();
+
+  // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  if (Printer == 0)
+    return true;
+
+  // If successful, createAsmPrinter took ownership of AsmStreamer.
+  AsmStreamer.take();
+
+  PM.add(Printer);
+
+  // Make sure the code model is set.
+  setCodeModelForJIT();
+
+  return false; // success!
+}
+
+static void printNoVerify(PassManagerBase &PM, const char *Banner) {
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
+static void printAndVerify(PassManagerBase &PM,
+                           const char *Banner) {
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+  if (VerifyMachineCode)
+    PM.add(createMachineVerifierPass(Banner));
+}
+
+/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
+/// emitting to assembly files or machine code output.
+///
+bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+                                               CodeGenOpt::Level OptLevel,
+                                               bool DisableVerify,
+                                               MCContext *&OutContext) {
+  // Standard LLVM-Level Passes.
+
+  // Basic AliasAnalysis support.
+  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+  // BasicAliasAnalysis wins if they disagree. This is intended to help
+  // support "obvious" type-punning idioms.
+  PM.add(createTypeBasedAliasAnalysisPass());
+  PM.add(createBasicAliasAnalysisPass());
+
+  // Before running any passes, run the verifier to determine if the input
+  // coming from the front-end and/or optimizer is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
+  // Run loop strength reduction before anything else.
+  if (OptLevel != CodeGenOpt::None && !DisableLSR) {
+    PM.add(createLoopStrengthReducePass(getTargetLowering()));
+    if (PrintLSR)
+      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+  }
+
+  PM.add(createGCLoweringPass());
+
+  // Make sure that no unreachable blocks are instruction selected.
+  PM.add(createUnreachableBlockEliminationPass());
+
+  // Turn exception handling constructs into something the code generators can
+  // handle.
+  switch (getMCAsmInfo()->getExceptionHandlingType()) {
+  case ExceptionHandling::SjLj:
+    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+    // catch info can get misplaced when a selector ends up more than one block
+    // removed from the parent invoke(s). This could happen when a landing
+    // pad is shared by multiple invokes and is also a target of a normal
+    // edge from elsewhere.
+    PM.add(createSjLjEHPass(getTargetLowering()));
+    // FALLTHROUGH
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::ARM:
+  case ExceptionHandling::Win64:
+    PM.add(createDwarfEHPass(this));
+    break;
+  case ExceptionHandling::None:
+    PM.add(createLowerInvokePass(getTargetLowering()));
+
+    // The lower invoke pass may create unreachable code. Remove it.
+    PM.add(createUnreachableBlockEliminationPass());
+    break;
+  }
+
+  if (OptLevel != CodeGenOpt::None && !DisableCGP)
+    PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+  PM.add(createStackProtectorPass(getTargetLowering()));
+
+  addPreISel(PM, OptLevel);
+
+  if (PrintISelInput)
+    PM.add(createPrintFunctionPass("\n\n"
+                                   "*** Final LLVM Code input to ISel ***\n",
+                                   &dbgs()));
+
+  // All passes which modify the LLVM IR are now complete; run the verifier
+  // to ensure that the IR is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
+  // Standard Lower-Level Passes.
+
+  // Install a MachineModuleInfo class, which is an immutable pass that holds
+  // all the per-module stuff we're generating, including MCContext.
+  TargetAsmInfo *TAI = new TargetAsmInfo(*this);
+  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
+  PM.add(MMI);
+  OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+
+  // Set up a MachineFunction for the rest of CodeGen to work on.
+  PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+  // Enable FastISel with -fast, but allow that to be overridden.
+  if (EnableFastISelOption == cl::BOU_TRUE ||
+      (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
+    EnableFastISel = true;
+
+  // Ask the target for an isel.
+  if (addInstSelector(PM, OptLevel))
+    return true;
+
+  // Print the instruction selected machine code...
+  printAndVerify(PM, "After Instruction Selection");
+
+  // Expand pseudo-instructions emitted by ISel.
+  PM.add(createExpandISelPseudosPass());
+
+  // Pre-ra tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
+    PM.add(createTailDuplicatePass(true));
+    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+  }
+
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  if (OptLevel != CodeGenOpt::None)
+    PM.add(createOptimizePHIsPass());
+
+  // If the target requests it, assign local variables to stack slots relative
+  // to one another and simplify frame index references where possible.
+  PM.add(createLocalStackSlotAllocationPass());
+
+  if (OptLevel != CodeGenOpt::None) {
+    // With optimization, dead code should already be eliminated. However
+    // there is one known exception: lowered code for arguments that are only
+    // used by tail calls, where the tail calls reuse the incoming stack
+    // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+    PM.add(createDeadMachineInstructionElimPass());
+    printAndVerify(PM, "After codegen DCE pass");
+
+    if (!DisableMachineLICM)
+      PM.add(createMachineLICMPass());
+    PM.add(createMachineCSEPass());
+    if (!DisableMachineSink)
+      PM.add(createMachineSinkingPass());
+    printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+    PM.add(createPeepholeOptimizerPass());
+    printAndVerify(PM, "After codegen peephole optimization pass");
+  }
+
+  // Run pre-ra passes.
+  if (addPreRegAlloc(PM, OptLevel))
+    printAndVerify(PM, "After PreRegAlloc passes");
+
+  // Perform register allocation.
+  PM.add(createRegisterAllocator(OptLevel));
+  printAndVerify(PM, "After Register Allocation");
+
+  // Perform stack slot coloring and post-ra machine LICM.
+  if (OptLevel != CodeGenOpt::None) {
+    // FIXME: Re-enable coloring with register when it's capable of adding
+    // kill markers.
+    if (!DisableSSC)
+      PM.add(createStackSlotColoringPass(false));
+
+    // Run post-ra machine LICM to hoist reloads / remats.
+    if (!DisablePostRAMachineLICM)
+      PM.add(createMachineLICMPass(false));
+
+    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
+  }
+
+  // Run post-ra passes.
+  if (addPostRegAlloc(PM, OptLevel))
+    printAndVerify(PM, "After PostRegAlloc passes");
+
+  PM.add(createLowerSubregsPass());
+  printAndVerify(PM, "After LowerSubregs");
+
+  // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  PM.add(createPrologEpilogCodeInserter());
+  printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+  // Run pre-sched2 passes.
+  if (addPreSched2(PM, OptLevel))
+    printAndVerify(PM, "After PreSched2 passes");
+
+  // Second pass scheduler.
+  if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
+    PM.add(createPostRAScheduler(OptLevel));
+    printAndVerify(PM, "After PostRAScheduler");
+  }
+
+  // Branch folding must be run after regalloc and prolog/epilog insertion.
+  if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
+    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+    printNoVerify(PM, "After BranchFolding");
+  }
+
+  // Tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
+    PM.add(createTailDuplicatePass(false));
+    printNoVerify(PM, "After TailDuplicate");
+  }
+
+  PM.add(createGCMachineCodeAnalysisPass());
+
+  if (PrintGCInfo)
+    PM.add(createGCInfoPrinter(dbgs()));
+
+  if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
+    PM.add(createCodePlacementOptPass());
+    printNoVerify(PM, "After CodePlacementOpt");
+  }
+
+  if (addPreEmitPass(PM, OptLevel))
+    printNoVerify(PM, "After PreEmit passes");
+
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 000000000000..0eb009ddac29
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,152 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+  // The isScheduleHigh flag allows nodes with wraparound dependencies that
+  // cannot easily be modeled as edges with latencies to be scheduled as
+  // soon as possible in a top-down schedule.
+  if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+    return false;
+  if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+    return true;
+
+  unsigned LHSNum = LHS->NodeNum;
+  unsigned RHSNum = RHS->NodeNum;
+
+  // The most important heuristic is scheduling the critical path.
+  unsigned LHSLatency = PQ->getLatency(LHSNum);
+  unsigned RHSLatency = PQ->getLatency(RHSNum);
+  if (LHSLatency < RHSLatency) return true;
+  if (LHSLatency > RHSLatency) return false;
+
+  // After that, if two nodes have identical latencies, look to see if one will
+  // unblock more other nodes than the other.
+  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+  if (LHSBlocked < RHSBlocked) return true;
+  if (LHSBlocked > RHSBlocked) return false;
+
+  // Finally, just to provide a stable ordering, use the node number as a
+  // deciding factor.
+  return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+
+  return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push(SUnit *SU) {
+  // Look at all of the successors of this node.  Count the number of nodes that
+  // this node is the sole unscheduled node for.
+  unsigned NumNodesBlocking = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+      ++NumNodesBlocking;
+  }
+  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+  Queue.push_back(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor.  If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+  }
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled.  If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+  if (SU->isAvailable) return;  // All preds scheduled.
+
+  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+  if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+  // Okay, we found a single predecessor that is available, but not scheduled.
+  // Since it is available, it must be in the priority queue.  First remove it.
+  remove(OnlyAvailablePred);
+
+  // Reinsert the node into the priority queue, which recomputes its
+  // NumNodesSolelyBlocking value.
+  push(OnlyAvailablePred);
+}
+
+SUnit *LatencyPriorityQueue::pop() {
+  if (empty()) return NULL;
+  std::vector<SUnit *>::iterator Best = Queue.begin();
+  for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+       E = Queue.end(); I != E; ++I)
+    if (Picker(*Best, *I))
+      Best = I;
+  SUnit *V = *Best;
+  if (Best != prior(Queue.end()))
+    std::swap(*Best, Queue.back());
+  Queue.pop_back();
+  return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+  assert(!Queue.empty() && "Queue is empty!");
+  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  if (I != prior(Queue.end()))
+    std::swap(*I, Queue.back());
+  Queue.pop_back();
+}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+  LatencyPriorityQueue q = *this;
+  while (!q.empty()) {
+    SUnit *su = q.pop();
+    dbgs() << "Height " << su->getHeight() << ": ";
+    su->dump(DAG);
+  }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..5d38c83b49c2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,966 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+          cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequiredTransitive<LiveIntervals>();
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class LDVImpl;
+class UserValue {
+  const MDNode *variable; ///< The debug info variable we are part of.
+  unsigned offset;        ///< Byte offset into variable.
+  DebugLoc dl;            ///< The debug location for the variable. This is
+                          ///< used by dwarf writer to find lexical scope.
+  UserValue *leader;      ///< Equivalence class leader.
+  UserValue *next;        ///< Next value in equivalence class, or null.
+
+  /// Numbered locations referenced by locmap.
+  SmallVector<MachineOperand, 4> locations;
+
+  /// Map of slot indices where this value is live.
+  LocMap locInts;
+
+  /// coalesceLocation - After LocNo was changed, check if it has become
+  /// identical to another location, and coalesce them. This may cause LocNo or
+  /// a later location to be erased, but no earlier location will be erased.
+  void coalesceLocation(unsigned LocNo);
+
+  /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+  void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+                        LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+  /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+  /// is live. Returns true if any changes were made.
+  bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+public:
+  /// UserValue - Create a new UserValue.
+  UserValue(const MDNode *var, unsigned o, DebugLoc L,
+            LocMap::Allocator &alloc)
+    : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+  {}
+
+  /// getLeader - Get the leader of this value's equivalence class.
+  UserValue *getLeader() {
+    UserValue *l = leader;
+    while (l != l->leader)
+      l = l->leader;
+    return leader = l;
+  }
+
+  /// getNext - Return the next UserValue in the equivalence class.
+  UserValue *getNext() const { return next; }
+
+  /// match - Does this UserValue match the parameters?
+  bool match(const MDNode *Var, unsigned Offset) const {
+    return Var == variable && Offset == offset;
+  }
+
+  /// merge - Merge equivalence classes.
+  static UserValue *merge(UserValue *L1, UserValue *L2) {
+    L2 = L2->getLeader();
+    if (!L1)
+      return L2;
+    L1 = L1->getLeader();
+    if (L1 == L2)
+      return L1;
+    // Splice L2 before L1's members.
+    UserValue *End = L2;
+    while (End->next)
+      End->leader = L1, End = End->next;
+    End->leader = L1;
+    End->next = L1->next;
+    L1->next = L2;
+    return L1;
+  }
+
+  /// getLocationNo - Return the location number that matches Loc.
+  unsigned getLocationNo(const MachineOperand &LocMO) {
+    if (LocMO.isReg()) {
+      if (LocMO.getReg() == 0)
+        return ~0u;
+      // For register locations we dont care about use/def and other flags.
+      for (unsigned i = 0, e = locations.size(); i != e; ++i)
+        if (locations[i].isReg() &&
+            locations[i].getReg() == LocMO.getReg() &&
+            locations[i].getSubReg() == LocMO.getSubReg())
+          return i;
+    } else
+      for (unsigned i = 0, e = locations.size(); i != e; ++i)
+        if (LocMO.isIdenticalTo(locations[i]))
+          return i;
+    locations.push_back(LocMO);
+    // We are storing a MachineOperand outside a MachineInstr.
+    locations.back().clearParent();
+    // Don't store def operands.
+    if (locations.back().isReg())
+      locations.back().setIsUse();
+    return locations.size() - 1;
+  }
+
+  /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+  void mapVirtRegs(LDVImpl *LDV);
+
+  /// addDef - Add a definition point to this value.
+  void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+    // Add a singular (Idx,Idx) -> Loc mapping.
+    LocMap::iterator I = locInts.find(Idx);
+    if (!I.valid() || I.start() != Idx)
+      I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+  }
+
+  /// extendDef - Extend the current definition as far as possible down the
+  /// dominator tree. Stop when meeting an existing def or when leaving the live
+  /// range of VNI.
+  /// End points where VNI is no longer live are added to Kills.
+  /// @param Idx   Starting point for the definition.
+  /// @param LocNo Location number to propagate.
+  /// @param LI    Restrict liveness to where LI has the value VNI. May be null.
+  /// @param VNI   When LI is not null, this is the value to restrict to.
+  /// @param Kills Append end points of VNI's live range to Kills.
+  /// @param LIS   Live intervals analysis.
+  /// @param MDT   Dominator tree.
+  void extendDef(SlotIndex Idx, unsigned LocNo,
+                 LiveInterval *LI, const VNInfo *VNI,
+                 SmallVectorImpl<SlotIndex> *Kills,
+                 LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// addDefsFromCopies - The value in LI/LocNo may be copies to other
+  /// registers. Determine if any of the copies are available at the kill
+  /// points, and add defs if possible.
+  /// @param LI      Scan for copies of the value in LI->reg.
+  /// @param LocNo   Location number of LI->reg.
+  /// @param Kills   Points where the range of LocNo could be extended.
+  /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+  void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+                      const SmallVectorImpl<SlotIndex> &Kills,
+                      SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+                      MachineRegisterInfo &MRI,
+                      LiveIntervals &LIS);
+
+  /// computeIntervals - Compute the live intervals of all locations after
+  /// collecting all their def points.
+  void computeIntervals(MachineRegisterInfo &MRI,
+                        LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+                      const TargetRegisterInfo *TRI);
+
+  /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+  /// live. Returns true if any changes were made.
+  bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+  /// rewriteLocations - Rewrite virtual register locations according to the
+  /// provided virtual register map.
+  void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM,
+                       LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+  /// variable may have more than one corresponding DBG_VALUE instructions. 
+  /// Only first one needs DebugLoc to identify variable's lexical scope
+  /// in source file.
+  DebugLoc findDebugLoc();
+  void print(raw_ostream&, const TargetMachine*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+  LiveDebugVariables &pass;
+  LocMap::Allocator allocator;
+  MachineFunction *MF;
+  LiveIntervals *LIS;
+  MachineDominatorTree *MDT;
+  const TargetRegisterInfo *TRI;
+
+  /// userValues - All allocated UserValue instances.
+  SmallVector<UserValue*, 8> userValues;
+
+  /// Map virtual register to eq class leader.
+  typedef DenseMap<unsigned, UserValue*> VRMap;
+  VRMap virtRegToEqClass;
+
+  /// Map user variable to eq class leader.
+  typedef DenseMap<const MDNode *, UserValue*> UVMap;
+  UVMap userVarMap;
+
+  /// getUserValue - Find or create a UserValue.
+  UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+  /// lookupVirtReg - Find the EC leader for VirtReg or null.
+  UserValue *lookupVirtReg(unsigned VirtReg);
+
+  /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+  /// @param MI  DBG_VALUE instruction
+  /// @param Idx Last valid SLotIndex before instruction.
+  /// @return    True if the DBG_VALUE instruction should be deleted.
+  bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+  /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+  /// a UserValue def for each instruction.
+  /// @param mf MachineFunction to be scanned.
+  /// @return True if any debug values were found.
+  bool collectDebugValues(MachineFunction &mf);
+
+  /// computeIntervals - Compute the live intervals of all user values after
+  /// collecting all their def points.
+  void computeIntervals();
+
+public:
+  LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+  bool runOnMachineFunction(MachineFunction &mf);
+
+  /// clear - Relase all memory.
+  void clear() {
+    DeleteContainerPointers(userValues);
+    userValues.clear();
+    virtRegToEqClass.clear();
+    userVarMap.clear();
+  }
+
+  /// mapVirtReg - Map virtual register to an equivalence class.
+  void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+  /// renameRegister - Replace all references to OldReg with NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// splitRegister -  Replace all references to OldReg with NewRegs.
+  void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
+  if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
+    OS << "!\"" << MDS->getString() << "\"\t";
+  if (offset)
+    OS << '+' << offset;
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+    OS << " [" << I.start() << ';' << I.stop() << "):";
+    if (I.value() == ~0u)
+      OS << "undef";
+    else
+      OS << I.value();
+  }
+  for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+    OS << " Loc" << i << '=';
+    locations[i].print(OS, TM);
+  }
+  OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+  OS << "********** DEBUG VARIABLES **********\n";
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+    userValues[i]->print(OS, &MF->getTarget());
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+  unsigned KeepLoc = 0;
+  for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+    if (KeepLoc == LocNo)
+      continue;
+    if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+      break;
+  }
+  // No matches.
+  if (KeepLoc == locations.size())
+    return;
+
+  // Keep the smaller location, erase the larger one.
+  unsigned EraseLoc = LocNo;
+  if (KeepLoc > EraseLoc)
+    std::swap(KeepLoc, EraseLoc);
+  locations.erase(locations.begin() + EraseLoc);
+
+  // Rewrite values.
+  for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+    unsigned v = I.value();
+    if (v == EraseLoc)
+      I.setValue(KeepLoc);      // Coalesce when possible.
+    else if (v > EraseLoc)
+      I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+  }
+}
+
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+  for (unsigned i = 0, e = locations.size(); i != e; ++i)
+    if (locations[i].isReg() &&
+        TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+      LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+                                 DebugLoc DL) {
+  UserValue *&Leader = userVarMap[Var];
+  if (Leader) {
+    UserValue *UV = Leader->getLeader();
+    Leader = UV;
+    for (; UV; UV = UV->getNext())
+      if (UV->match(Var, Offset))
+        return UV;
+  }
+
+  UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+  userValues.push_back(UV);
+  Leader = UserValue::merge(Leader, UV);
+  return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+  UserValue *&Leader = virtRegToEqClass[VirtReg];
+  Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+  if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+    return UV->getLeader();
+  return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+  // DBG_VALUE loc, offset, variable
+  if (MI->getNumOperands() != 3 ||
+      !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+    DEBUG(dbgs() << "Can't handle " << *MI);
+    return false;
+  }
+
+  // Get or create the UserValue for (variable,offset).
+  unsigned Offset = MI->getOperand(1).getImm();
+  const MDNode *Var = MI->getOperand(2).getMetadata();
+  UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+  UV->addDef(Idx, MI->getOperand(0));
+  return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+  bool Changed = false;
+  for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE;) {
+      if (!MBBI->isDebugValue()) {
+        ++MBBI;
+        continue;
+      }
+      // DBG_VALUE has no slot index, use the previous instruction instead.
+      SlotIndex Idx = MBBI == MBB->begin() ?
+        LIS->getMBBStartIdx(MBB) :
+        LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+      // Handle consecutive DBG_VALUE instructions with the same slot index.
+      do {
+        if (handleDebugValue(MBBI, Idx)) {
+          MBBI = MBB->erase(MBBI);
+          Changed = true;
+        } else
+          ++MBBI;
+      } while (MBBI != MBBE && MBBI->isDebugValue());
+    }
+  }
+  return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+                          LiveInterval *LI, const VNInfo *VNI,
+                          SmallVectorImpl<SlotIndex> *Kills,
+                          LiveIntervals &LIS, MachineDominatorTree &MDT) {
+  SmallVector<SlotIndex, 16> Todo;
+  Todo.push_back(Idx);
+
+  do {
+    SlotIndex Start = Todo.pop_back_val();
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+    LocMap::iterator I = locInts.find(Start);
+
+    // Limit to VNI's live range.
+    bool ToEnd = true;
+    if (LI && VNI) {
+      LiveRange *Range = LI->getLiveRangeContaining(Start);
+      if (!Range || Range->valno != VNI) {
+        if (Kills)
+          Kills->push_back(Start);
+        continue;
+      }
+      if (Range->end < Stop)
+        Stop = Range->end, ToEnd = false;
+    }
+
+    // There could already be a short def at Start.
+    if (I.valid() && I.start() <= Start) {
+      // Stop when meeting a different location or an already extended interval.
+      Start = Start.getNextSlot();
+      if (I.value() != LocNo || I.stop() != Start)
+        continue;
+      // This is a one-slot placeholder. Just skip it.
+      ++I;
+    }
+
+    // Limited by the next def.
+    if (I.valid() && I.start() < Stop)
+      Stop = I.start(), ToEnd = false;
+    // Limited by VNI's live range.
+    else if (!ToEnd && Kills)
+      Kills->push_back(Stop);
+
+    if (Start >= Stop)
+      continue;
+
+    I.insert(Start, Stop, LocNo);
+
+    // If we extended to the MBB end, propagate down the dominator tree.
+    if (!ToEnd)
+      continue;
+    const std::vector<MachineDomTreeNode*> &Children =
+      MDT.getNode(MBB)->getChildren();
+    for (unsigned i = 0, e = Children.size(); i != e; ++i)
+      Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+  } while (!Todo.empty());
+}
+
+void
+UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+                      const SmallVectorImpl<SlotIndex> &Kills,
+                      SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+                      MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+  if (Kills.empty())
+    return;
+  // Don't track copies from physregs, there are too many uses.
+  if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+    return;
+
+  // Collect all the (vreg, valno) pairs that are copies of LI.
+  SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI.use_nodbg_begin(LI->reg),
+         UE = MRI.use_nodbg_end(); UI != UE; ++UI) {
+    // Copies of the full value.
+    if (UI.getOperand().getSubReg() || !UI->isCopy())
+      continue;
+    MachineInstr *MI = &*UI;
+    unsigned DstReg = MI->getOperand(0).getReg();
+
+    // Don't follow copies to physregs. These are usually setting up call
+    // arguments, and the argument registers are always call clobbered. We are
+    // better off in the source register which could be a callee-saved register,
+    // or it could be spilled.
+    if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+      continue;
+
+    // Is LocNo extended to reach this copy? If not, another def may be blocking
+    // it, or we are looking at a wrong value of LI.
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    LocMap::iterator I = locInts.find(Idx.getUseIndex());
+    if (!I.valid() || I.value() != LocNo)
+      continue;
+
+    if (!LIS.hasInterval(DstReg))
+      continue;
+    LiveInterval *DstLI = &LIS.getInterval(DstReg);
+    const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
+    assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+    CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+  }
+
+  if (CopyValues.empty())
+    return;
+
+  DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+
+  // Try to add defs of the copied values for each kill point.
+  for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+    SlotIndex Idx = Kills[i];
+    for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
+      LiveInterval *DstLI = CopyValues[j].first;
+      const VNInfo *DstVNI = CopyValues[j].second;
+      if (DstLI->getVNInfoAt(Idx) != DstVNI)
+        continue;
+      // Check that there isn't already a def at Idx
+      LocMap::iterator I = locInts.find(Idx);
+      if (I.valid() && I.start() <= Idx)
+        continue;
+      DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+                   << DstVNI->id << " in " << *DstLI << '\n');
+      MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+      assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+      unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
+      I.insert(Idx, Idx.getNextSlot(), LocNo);
+      NewDefs.push_back(std::make_pair(Idx, LocNo));
+      break;
+    }
+  }
+}
+
+void
+UserValue::computeIntervals(MachineRegisterInfo &MRI,
+                            LiveIntervals &LIS,
+                            MachineDominatorTree &MDT) {
+  SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+  // Collect all defs to be extended (Skipping undefs).
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+    if (I.value() != ~0u)
+      Defs.push_back(std::make_pair(I.start(), I.value()));
+
+  // Extend all defs, and possibly add new ones along the way.
+  for (unsigned i = 0; i != Defs.size(); ++i) {
+    SlotIndex Idx = Defs[i].first;
+    unsigned LocNo = Defs[i].second;
+    const MachineOperand &Loc = locations[LocNo];
+
+    // Register locations are constrained to where the register value is live.
+    if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+      LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+      const VNInfo *VNI = LI->getVNInfoAt(Idx);
+      SmallVector<SlotIndex, 16> Kills;
+      extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT);
+      addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+    } else
+      extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT);
+  }
+
+  // Finally, erase all the undefs.
+  for (LocMap::iterator I = locInts.begin(); I.valid();)
+    if (I.value() == ~0u)
+      I.erase();
+    else
+      ++I;
+}
+
+void LDVImpl::computeIntervals() {
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+    userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT);
+    userValues[i]->mapVirtRegs(this);
+  }
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  LIS = &pass.getAnalysis<LiveIntervals>();
+  MDT = &pass.getAnalysis<MachineDominatorTree>();
+  TRI = mf.getTarget().getRegisterInfo();
+  clear();
+  DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+               << ((Value*)mf.getFunction())->getName()
+               << " **********\n");
+
+  bool Changed = collectDebugValues(mf);
+  computeIntervals();
+  DEBUG(print(dbgs()));
+  return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+  if (!EnableLDV)
+    return false;
+  if (!pImpl)
+    pImpl = new LDVImpl(this);
+  return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+  if (pImpl)
+    delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+               const TargetRegisterInfo *TRI) {
+  for (unsigned i = locations.size(); i; --i) {
+    unsigned LocNo = i - 1;
+    MachineOperand &Loc = locations[LocNo];
+    if (!Loc.isReg() || Loc.getReg() != OldReg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      Loc.substPhysReg(NewReg, *TRI);
+    else
+      Loc.substVirtReg(NewReg, SubIdx, *TRI);
+    coalesceLocation(LocNo);
+  }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  UserValue *UV = lookupVirtReg(OldReg);
+  if (!UV)
+    return;
+
+  if (TargetRegisterInfo::isVirtualRegister(NewReg))
+    mapVirtReg(NewReg, UV);
+  virtRegToEqClass.erase(OldReg);
+
+  do {
+    UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+    UV = UV->getNext();
+  } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+//===----------------------------------------------------------------------===//
+//                           Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
+  DEBUG({
+    dbgs() << "Splitting Loc" << OldLocNo << '\t';
+    print(dbgs(), 0);
+  });
+  bool DidChange = false;
+  LocMap::iterator LocMapI;
+  LocMapI.setMap(locInts);
+  for (unsigned i = 0; i != NewRegs.size(); ++i) {
+    LiveInterval *LI = NewRegs[i];
+    if (LI->empty())
+      continue;
+
+    // Don't allocate the new LocNo until it is needed.
+    unsigned NewLocNo = ~0u;
+
+    // Iterate over the overlaps between locInts and LI.
+    LocMapI.find(LI->beginIndex());
+    if (!LocMapI.valid())
+      continue;
+    LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+    LiveInterval::iterator LIE = LI->end();
+    while (LocMapI.valid() && LII != LIE) {
+      // At this point, we know that LocMapI.stop() > LII->start.
+      LII = LI->advanceTo(LII, LocMapI.start());
+      if (LII == LIE)
+        break;
+
+      // Now LII->end > LocMapI.start(). Do we have an overlap?
+      if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+        // Overlapping correct location. Allocate NewLocNo now.
+        if (NewLocNo == ~0u) {
+          MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+          MO.setSubReg(locations[OldLocNo].getSubReg());
+          NewLocNo = getLocationNo(MO);
+          DidChange = true;
+        }
+
+        SlotIndex LStart = LocMapI.start();
+        SlotIndex LStop  = LocMapI.stop();
+
+        // Trim LocMapI down to the LII overlap.
+        if (LStart < LII->start)
+          LocMapI.setStartUnchecked(LII->start);
+        if (LStop > LII->end)
+          LocMapI.setStopUnchecked(LII->end);
+
+        // Change the value in the overlap. This may trigger coalescing.
+        LocMapI.setValue(NewLocNo);
+
+        // Re-insert any removed OldLocNo ranges.
+        if (LStart < LocMapI.start()) {
+          LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+          ++LocMapI;
+          assert(LocMapI.valid() && "Unexpected coalescing");
+        }
+        if (LStop > LocMapI.stop()) {
+          ++LocMapI;
+          LocMapI.insert(LII->end, LStop, OldLocNo);
+          --LocMapI;
+        }
+      }
+
+      // Advance to the next overlap.
+      if (LII->end < LocMapI.stop()) {
+        if (++LII == LIE)
+          break;
+        LocMapI.advanceTo(LII->start);
+      } else {
+        ++LocMapI;
+        if (!LocMapI.valid())
+          break;
+        LII = LI->advanceTo(LII, LocMapI.start());
+      }
+    }
+  }
+
+  // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
+  locations.erase(locations.begin() + OldLocNo);
+  LocMapI.goToBegin();
+  while (LocMapI.valid()) {
+    unsigned v = LocMapI.value();
+    if (v == OldLocNo) {
+      DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+                   << LocMapI.stop() << ")\n");
+      LocMapI.erase();
+    } else {
+      if (v > OldLocNo)
+        LocMapI.setValueUnchecked(v-1);
+      ++LocMapI;
+    }
+  }
+
+  DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);});
+  return DidChange;
+}
+
+bool
+UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+  bool DidChange = false;
+  // Split locations referring to OldReg. Iterate backwards so splitLocation can
+  // safely erase unuused locations.
+  for (unsigned i = locations.size(); i ; --i) {
+    unsigned LocNo = i-1;
+    const MachineOperand *Loc = &locations[LocNo];
+    if (!Loc->isReg() || Loc->getReg() != OldReg)
+      continue;
+    DidChange |= splitLocation(LocNo, NewRegs);
+  }
+  return DidChange;
+}
+
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+  bool DidChange = false;
+  for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+    DidChange |= UV->splitRegister(OldReg, NewRegs);
+
+  if (!DidChange)
+    return;
+
+  // Map all of the new virtual registers.
+  UserValue *UV = lookupVirtReg(OldReg);
+  for (unsigned i = 0; i != NewRegs.size(); ++i)
+    mapVirtReg(NewRegs[i]->reg, UV);
+}
+
+void LiveDebugVariables::
+splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+  // Iterate over locations in reverse makes it easier to handle coalescing.
+  for (unsigned i = locations.size(); i ; --i) {
+    unsigned LocNo = i-1;
+    MachineOperand &Loc = locations[LocNo];
+    // Only virtual registers are rewritten.
+    if (!Loc.isReg() || !Loc.getReg() ||
+        !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+      continue;
+    unsigned VirtReg = Loc.getReg();
+    if (VRM.isAssignedReg(VirtReg) &&
+        TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+      // This can create a %noreg operand in rare cases when the sub-register
+      // index is no longer available. That means the user value is in a
+      // non-existent sub-register, and %noreg is exactly what we want.
+      Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+               VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+      // FIXME: Translate SubIdx to a stackslot offset.
+      Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+    } else {
+      Loc.setReg(0);
+      Loc.setSubReg(0);
+    }
+    coalesceLocation(LocNo);
+  }
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+                   LiveIntervals &LIS) {
+  SlotIndex Start = LIS.getMBBStartIdx(MBB);
+  Idx = Idx.getBaseIndex();
+
+  // Try to find an insert location by going backwards from Idx.
+  MachineInstr *MI;
+  while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+    // We've reached the beginning of MBB.
+    if (Idx == Start) {
+      MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+      return I;
+    }
+    Idx = Idx.getPrevIndex();
+  }
+
+  // Don't insert anything after the first terminator, though.
+  return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+                                    llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+  DebugLoc D = dl;
+  dl = DebugLoc();
+  return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+                                 unsigned LocNo,
+                                 LiveIntervals &LIS,
+                                 const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  MachineOperand &Loc = locations[LocNo];
+
+  // Frame index locations may require a target callback.
+  if (Loc.isFI()) {
+    MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+                                          Loc.getIndex(), offset, variable, 
+                                                    findDebugLoc());
+    if (MI) {
+      MBB->insert(I, MI);
+      return;
+    }
+  }
+  // This is not a frame index, or the target is happy with a standard FI.
+  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+                                const TargetInstrInfo &TII) {
+  MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+  for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+    SlotIndex Start = I.start();
+    SlotIndex Stop = I.stop();
+    unsigned LocNo = I.value();
+    DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+    MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+    DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+    insertDebugValue(MBB, Start, LocNo, LIS, TII);
+
+    // This interval may span multiple basic blocks.
+    // Insert a DBG_VALUE into each one.
+    while(Stop > MBBEnd) {
+      // Move to the next block.
+      Start = MBBEnd;
+      if (++MBB == MFEnd)
+        break;
+      MBBEnd = LIS.getMBBEndIdx(MBB);
+      DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+      insertDebugValue(MBB, Start, LocNo, LIS, TII);
+    }
+    DEBUG(dbgs() << '\n');
+    if (MBB == MFEnd)
+      break;
+
+    ++I;
+  }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+  DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+    DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+    userValues[i]->rewriteLocations(*VRM, *TRI);
+    userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+  }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..3ce3c398bd4b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,70 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+  void *pImpl;
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  LiveDebugVariables();
+  ~LiveDebugVariables();
+
+  /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+  /// @param OldReg Old virtual register that is going away.
+  /// @param NewReg New register holding the user variables.
+  /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+  ///               register.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// splitRegister - Move any user variables in OldReg to the live ranges in
+  /// NewRegs where they are live. Mark the values as unavailable where no new
+  /// register is live.
+  void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+  /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+  /// that happened during register allocation.
+  /// @param VRM Rename virtual registers according to map.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  /// dump - Print data structures to dbgs().
+  void dump();
+
+private:
+
+  virtual bool runOnMachineFunction(MachineFunction &);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 000000000000..cfade24b8d87
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,775 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes.  Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001).  Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+  // This algorithm is basically std::upper_bound.
+  // Unfortunately, std::upper_bound cannot be used with mixed types until we
+  // adopt C++0x. Many libraries can do it, but not all.
+  if (empty() || Pos >= endIndex())
+    return end();
+  iterator I = begin();
+  size_t Len = ranges.size();
+  do {
+    size_t Mid = Len >> 1;
+    if (Pos < I[Mid].end)
+      Len = Mid;
+    else
+      I += Mid + 1, Len -= Mid + 1;
+  } while (Len);
+  return I;
+}
+
+/// killedInRange - Return true if the interval has kills in [Start,End).
+bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
+  Ranges::const_iterator r =
+    std::lower_bound(ranges.begin(), ranges.end(), End);
+
+  // Now r points to the first interval with start >= End, or ranges.end().
+  if (r == ranges.begin())
+    return false;
+
+  --r;
+  // Now r points to the last interval with end <= End.
+  // r->end is the kill point.
+  return r->end >= Start && r->end < End;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+                                const_iterator StartPos) const {
+  assert(!empty() && "empty interval");
+  const_iterator i = begin();
+  const_iterator ie = end();
+  const_iterator j = StartPos;
+  const_iterator je = other.end();
+
+  assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+         StartPos != other.end() && "Bogus start position hint!");
+
+  if (i->start < j->start) {
+    i = std::upper_bound(i, ie, j->start);
+    if (i != ranges.begin()) --i;
+  } else if (j->start < i->start) {
+    ++StartPos;
+    if (StartPos != other.end() && StartPos->start <= i->start) {
+      assert(StartPos < other.end() && i < end());
+      j = std::upper_bound(j, je, i->start);
+      if (j != other.ranges.begin()) --j;
+    }
+  } else {
+    return true;
+  }
+
+  if (j == je) return false;
+
+  while (i != ie) {
+    if (i->start > j->start) {
+      std::swap(i, j);
+      std::swap(ie, je);
+    }
+
+    if (i->end > j->start)
+      return true;
+    ++i;
+  }
+
+  return false;
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
+  assert(Start < End && "Invalid range");
+  const_iterator I = std::lower_bound(begin(), end(), End);
+  return I != begin() && (--I)->end > Start;
+}
+
+
+/// ValNo is dead, remove it.  If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+  if (ValNo->id == getNumValNums()-1) {
+    do {
+      valnos.pop_back();
+    } while (!valnos.empty() && valnos.back()->isUnused());
+  } else {
+    ValNo->setIsUnused(true);
+  }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
+  SmallPtrSet<VNInfo*, 8> Seen;
+  bool seenPHIDef = false;
+  valnos.clear();
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    VNInfo *VNI = I->valno;
+    if (!Seen.insert(VNI))
+      continue;
+    assert(!VNI->isUnused() && "Unused valno used by live range");
+    VNI->id = (unsigned)valnos.size();
+    valnos.push_back(VNI);
+    VNI->setHasPHIKill(false);
+    if (VNI->isPHIDef())
+      seenPHIDef = true;
+  }
+
+  // Recompute phi kill flags.
+  if (!seenPHIDef)
+    return;
+  for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (!VNI->isPHIDef())
+      continue;
+    const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def);
+    assert(PHIBB && "No basic block for phi-def");
+    for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(),
+         PE = PHIBB->pred_end(); PI != PE; ++PI) {
+      VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot());
+      if (KVNI)
+        KVNI->setHasPHIKill(true);
+    }
+  }
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint.  To do this, we should
+/// merge and eliminate all ranges that this will overlap with.  The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
+  assert(I != ranges.end() && "Not a valid interval!");
+  VNInfo *ValNo = I->valno;
+
+  // Search for the first interval that we can't merge with.
+  Ranges::iterator MergeTo = llvm::next(I);
+  for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+  }
+
+  // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+  I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+  // Erase any dead ranges.
+  ranges.erase(llvm::next(I), MergeTo);
+
+  // If the newly formed range now touches the range after it and if they have
+  // the same value number, merge the two ranges into one range.
+  Ranges::iterator Next = llvm::next(I);
+  if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
+    I->end = Next->end;
+    ranges.erase(Next);
+  }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint.  To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
+  assert(I != ranges.end() && "Not a valid interval!");
+  VNInfo *ValNo = I->valno;
+
+  // Search for the first interval that we can't merge with.
+  Ranges::iterator MergeTo = I;
+  do {
+    if (MergeTo == ranges.begin()) {
+      I->start = NewStart;
+      ranges.erase(MergeTo, I);
+      return I;
+    }
+    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+    --MergeTo;
+  } while (NewStart <= MergeTo->start);
+
+  // If we start in the middle of another interval, just delete a range and
+  // extend that interval.
+  if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+    MergeTo->end = I->end;
+  } else {
+    // Otherwise, extend the interval right after.
+    ++MergeTo;
+    MergeTo->start = NewStart;
+    MergeTo->end = I->end;
+  }
+
+  ranges.erase(llvm::next(MergeTo), llvm::next(I));
+  return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+  SlotIndex Start = LR.start, End = LR.end;
+  iterator it = std::upper_bound(From, ranges.end(), Start);
+
+  // If the inserted interval starts in the middle or right at the end of
+  // another interval, just extend that interval to contain the range of LR.
+  if (it != ranges.begin()) {
+    iterator B = prior(it);
+    if (LR.valno == B->valno) {
+      if (B->start <= Start && B->end >= Start) {
+        extendIntervalEndTo(B, End);
+        return B;
+      }
+    } else {
+      // Check to make sure that we are not overlapping two live ranges with
+      // different valno's.
+      assert(B->end <= Start &&
+             "Cannot overlap two LiveRanges with differing ValID's"
+             " (did you def the same reg twice in a MachineInstr?)");
+    }
+  }
+
+  // Otherwise, if this range ends in the middle of, or right next to, another
+  // interval, merge it into that interval.
+  if (it != ranges.end()) {
+    if (LR.valno == it->valno) {
+      if (it->start <= End) {
+        it = extendIntervalStartTo(it, Start);
+
+        // If LR is a complete superset of an interval, we may need to grow its
+        // endpoint as well.
+        if (End > it->end)
+          extendIntervalEndTo(it, End);
+        return it;
+      }
+    } else {
+      // Check to make sure that we are not overlapping two live ranges with
+      // different valno's.
+      assert(it->start >= End &&
+             "Cannot overlap two LiveRanges with differing ValID's");
+    }
+  }
+
+  // Otherwise, this is just a new range that doesn't interact with anything.
+  // Insert it.
+  return ranges.insert(it, LR);
+}
+
+/// extendInBlock - If this interval is live before UseIdx in the basic
+/// block that starts at StartIdx, extend it to be live at UseIdx and return
+/// the value. If there is no live range before UseIdx, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) {
+  if (empty())
+    return 0;
+  iterator I = std::upper_bound(begin(), end(), UseIdx);
+  if (I == begin())
+    return 0;
+  --I;
+  if (I->end <= StartIdx)
+    return 0;
+  if (I->end <= UseIdx)
+    extendIntervalEndTo(I, UseIdx.getNextSlot());
+  return I->valno;
+}
+
+/// removeRange - Remove the specified range from this interval.  Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
+                               bool RemoveDeadValNo) {
+  // Find the LiveRange containing this span.
+  Ranges::iterator I = find(Start);
+  assert(I != ranges.end() && "Range is not in interval!");
+  assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
+
+  // If the span we are removing is at the start of the LiveRange, adjust it.
+  VNInfo *ValNo = I->valno;
+  if (I->start == Start) {
+    if (I->end == End) {
+      if (RemoveDeadValNo) {
+        // Check if val# is dead.
+        bool isDead = true;
+        for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+          if (II != I && II->valno == ValNo) {
+            isDead = false;
+            break;
+          }
+        if (isDead) {
+          // Now that ValNo is dead, remove it.
+          markValNoForDeletion(ValNo);
+        }
+      }
+
+      ranges.erase(I);  // Removed the whole LiveRange.
+    } else
+      I->start = End;
+    return;
+  }
+
+  // Otherwise if the span we are removing is at the end of the LiveRange,
+  // adjust the other way.
+  if (I->end == End) {
+    I->end = Start;
+    return;
+  }
+
+  // Otherwise, we are splitting the LiveRange into two pieces.
+  SlotIndex OldEnd = I->end;
+  I->end = Start;   // Trim the old interval.
+
+  // Insert the new one.
+  ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+  if (empty()) return;
+  Ranges::iterator I = ranges.end();
+  Ranges::iterator E = ranges.begin();
+  do {
+    --I;
+    if (I->valno == ValNo)
+      ranges.erase(I);
+  } while (I != E);
+  // Now that ValNo is dead, remove it.
+  markValNoForDeletion(ValNo);
+}
+
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// index (register interval).
+VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
+  for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+       i != e; ++i) {
+    if ((*i)->def == Idx)
+      return *i;
+  }
+
+  return 0;
+}
+
+/// join - Join two live intervals (this, and other) together.  This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified.  If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other,
+                        const int *LHSValNoAssignments,
+                        const int *RHSValNoAssignments,
+                        SmallVector<VNInfo*, 16> &NewVNInfo,
+                        MachineRegisterInfo *MRI) {
+  // Determine if any of our live range values are mapped.  This is uncommon, so
+  // we want to avoid the interval scan if not.
+  bool MustMapCurValNos = false;
+  unsigned NumVals = getNumValNums();
+  unsigned NumNewVals = NewVNInfo.size();
+  for (unsigned i = 0; i != NumVals; ++i) {
+    unsigned LHSValID = LHSValNoAssignments[i];
+    if (i != LHSValID ||
+        (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+      MustMapCurValNos = true;
+  }
+
+  // If we have to apply a mapping to our base interval assignment, rewrite it
+  // now.
+  if (MustMapCurValNos) {
+    // Map the first live range.
+    iterator OutIt = begin();
+    OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+    ++OutIt;
+    for (iterator I = OutIt, E = end(); I != E; ++I) {
+      OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+
+      // If this live range has the same value # as its immediate predecessor,
+      // and if they are neighbors, remove one LiveRange.  This happens when we
+      // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+      if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
+        (OutIt-1)->end = OutIt->end;
+      } else {
+        if (I != OutIt) {
+          OutIt->start = I->start;
+          OutIt->end = I->end;
+        }
+
+        // Didn't merge, on to the next one.
+        ++OutIt;
+      }
+    }
+
+    // If we merge some live ranges, chop off the end.
+    ranges.erase(OutIt, end());
+  }
+
+  // Remember assignements because val# ids are changing.
+  SmallVector<unsigned, 16> OtherAssignments;
+  for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+    OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+
+  // Update val# info. Renumber them and make sure they all belong to this
+  // LiveInterval now. Also remove dead val#'s.
+  unsigned NumValNos = 0;
+  for (unsigned i = 0; i < NumNewVals; ++i) {
+    VNInfo *VNI = NewVNInfo[i];
+    if (VNI) {
+      if (NumValNos >= NumVals)
+        valnos.push_back(VNI);
+      else
+        valnos[NumValNos] = VNI;
+      VNI->id = NumValNos++;  // Renumber val#.
+    }
+  }
+  if (NumNewVals < NumVals)
+    valnos.resize(NumNewVals);  // shrinkify
+
+  // Okay, now insert the RHS live ranges into the LHS.
+  iterator InsertPos = begin();
+  unsigned RangeNo = 0;
+  for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
+    // Map the valno in the other live range to the current live range.
+    I->valno = NewVNInfo[OtherAssignments[RangeNo]];
+    assert(I->valno && "Adding a dead range?");
+    InsertPos = addRangeFrom(*I, InsertPos);
+  }
+
+  ComputeJoinedWeight(Other);
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number.  The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+                                        VNInfo *LHSValNo) {
+  // TODO: Make this more efficient.
+  iterator InsertPos = begin();
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+    // Map the valno in the other live range to the current live range.
+    LiveRange Tmp = *I;
+    Tmp.valno = LHSValNo;
+    InsertPos = addRangeFrom(Tmp, InsertPos);
+  }
+}
+
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(
+                                    const LiveInterval &RHS,
+                                    const VNInfo *RHSValNo, VNInfo *LHSValNo) {
+  // TODO: Make this more efficient.
+  iterator InsertPos = begin();
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+    if (I->valno != RHSValNo)
+      continue;
+    // Map the valno in the other live range to the current live range.
+    LiveRange Tmp = *I;
+    Tmp.valno = LHSValNo;
+    InsertPos = addRangeFrom(Tmp, InsertPos);
+  }
+}
+
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent.  This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number.  This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+  assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+  // This code actually merges the (numerically) larger value number into the
+  // smaller value number, which is likely to allow us to compactify the value
+  // space.  The only thing we have to be careful of is to preserve the
+  // instruction that defines the result value.
+
+  // Make sure V2 is smaller than V1.
+  if (V1->id < V2->id) {
+    V1->copyFrom(*V2);
+    std::swap(V1, V2);
+  }
+
+  // Merge V1 live ranges into V2.
+  for (iterator I = begin(); I != end(); ) {
+    iterator LR = I++;
+    if (LR->valno != V1) continue;  // Not a V1 LiveRange.
+
+    // Okay, we found a V1 live range.  If it had a previous, touching, V2 live
+    // range, extend it.
+    if (LR != begin()) {
+      iterator Prev = LR-1;
+      if (Prev->valno == V2 && Prev->end == LR->start) {
+        Prev->end = LR->end;
+
+        // Erase this live-range.
+        ranges.erase(LR);
+        I = Prev+1;
+        LR = Prev;
+      }
+    }
+
+    // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+    // Ensure that it is a V2 live-range.
+    LR->valno = V2;
+
+    // If we can merge it into later V2 live ranges, do so now.  We ignore any
+    // following V1 live ranges, as they will be merged in subsequent iterations
+    // of the loop.
+    if (I != end()) {
+      if (I->start == LR->end && I->valno == V2) {
+        LR->end = I->end;
+        ranges.erase(I);
+        I = LR+1;
+      }
+    }
+  }
+
+  // Merge the relevant flags.
+  V2->mergeFlags(V1);
+
+  // Now that V1 is dead, remove it.
+  markValNoForDeletion(V1);
+
+  return V2;
+}
+
+void LiveInterval::Copy(const LiveInterval &RHS,
+                        MachineRegisterInfo *MRI,
+                        VNInfo::Allocator &VNInfoAllocator) {
+  ranges.clear();
+  valnos.clear();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
+  MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
+
+  weight = RHS.weight;
+  for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
+    const VNInfo *VNI = RHS.getValNumInfo(i);
+    createValueCopy(VNI, VNInfoAllocator);
+  }
+  for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
+    const LiveRange &LR = RHS.ranges[i];
+    addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
+  }
+}
+
+unsigned LiveInterval::getSize() const {
+  unsigned Sum = 0;
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    Sum += I->start.distance(I->end);
+  return Sum;
+}
+
+/// ComputeJoinedWeight - Set the weight of a live interval Joined
+/// after Other has been merged into it.
+void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
+  // If either of these intervals was spilled, the weight is the
+  // weight of the non-spilled interval.  This can only happen with
+  // iterative coalescers.
+
+  if (Other.weight != HUGE_VALF) {
+    weight += Other.weight;
+  }
+  else if (weight == HUGE_VALF &&
+      !TargetRegisterInfo::isPhysicalRegister(reg)) {
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining to spilled interval");
+    weight = Other.weight;
+  }
+  else {
+    // Otherwise the weight stays the same
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining from spilled interval");
+  }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
+  return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+void LiveRange::dump() const {
+  dbgs() << *this << "\n";
+}
+
+void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+  OS << PrintReg(reg, TRI);
+  if (weight != 0)
+    OS << ',' << weight;
+
+  if (empty())
+    OS << " EMPTY";
+  else {
+    OS << " = ";
+    for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+           E = ranges.end(); I != E; ++I) {
+      OS << *I;
+      assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+    }
+  }
+
+  // Print value number info.
+  if (getNumValNums()) {
+    OS << "  ";
+    unsigned vnum = 0;
+    for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+         ++i, ++vnum) {
+      const VNInfo *vni = *i;
+      if (vnum) OS << " ";
+      OS << vnum << "@";
+      if (vni->isUnused()) {
+        OS << "x";
+      } else {
+        OS << vni->def;
+        if (vni->isPHIDef())
+          OS << "-phidef";
+        if (vni->hasPHIKill())
+          OS << "-phikill";
+        if (vni->hasRedefByEC())
+          OS << "-ec";
+      }
+    }
+  }
+}
+
+void LiveInterval::dump() const {
+  dbgs() << *this << "\n";
+}
+
+
+void LiveRange::print(raw_ostream &os) const {
+  os << *this;
+}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+  // Create initial equivalence classes.
+  EqClass.clear();
+  EqClass.grow(LI->getNumValNums());
+
+  const VNInfo *used = 0, *unused = 0;
+
+  // Determine connections.
+  for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+       I != E; ++I) {
+    const VNInfo *VNI = *I;
+    // Group all unused values into one class.
+    if (VNI->isUnused()) {
+      if (unused)
+        EqClass.join(unused->id, VNI->id);
+      unused = VNI;
+      continue;
+    }
+    used = VNI;
+    if (VNI->isPHIDef()) {
+      const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+      assert(MBB && "Phi-def has no defining MBB");
+      // Connect to values live out of predecessors.
+      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI)
+        if (const VNInfo *PVNI =
+              LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+          EqClass.join(VNI->id, PVNI->id);
+    } else {
+      // Normal value defined by an instruction. Check for two-addr redef.
+      // FIXME: This could be coincidental. Should we really check for a tied
+      // operand constraint?
+      // Note that VNI->def may be a use slot for an early clobber def.
+      if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+        EqClass.join(VNI->id, UVNI->id);
+    }
+  }
+
+  // Lump all the unused values in with the last used value.
+  if (used && unused)
+    EqClass.join(used->id, unused->id);
+
+  EqClass.compress();
+  return EqClass.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
+                                          MachineRegisterInfo &MRI) {
+  assert(LIV[0] && "LIV[0] must be set");
+  LiveInterval &LI = *LIV[0];
+
+  // Rewrite instructions.
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
+       RE = MRI.reg_end(); RI != RE;) {
+    MachineOperand &MO = RI.getOperand();
+    MachineInstr *MI = MO.getParent();
+    ++RI;
+    if (MO.isUse() && MO.isUndef())
+      continue;
+    // DBG_VALUE instructions should have been eliminated earlier.
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    const VNInfo *VNI = LI.getVNInfoAt(Idx);
+    assert(VNI && "Interval not live at use.");
+    MO.setReg(LIV[getEqClass(VNI)]->reg);
+  }
+
+  // Move runs to new intervals.
+  LiveInterval::iterator J = LI.begin(), E = LI.end();
+  while (J != E && EqClass[J->valno->id] == 0)
+    ++J;
+  for (LiveInterval::iterator I = J; I != E; ++I) {
+    if (unsigned eq = EqClass[I->valno->id]) {
+      assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+             "New intervals should be empty");
+      LIV[eq]->ranges.push_back(*I);
+    } else
+      *J++ = *I;
+  }
+  LI.ranges.erase(J, E);
+
+  // Transfer VNInfos to their new owners and renumber them.
+  unsigned j = 0, e = LI.getNumValNums();
+  while (j != e && EqClass[j] == 0)
+    ++j;
+  for (unsigned i = j; i != e; ++i) {
+    VNInfo *VNI = LI.getValNumInfo(i);
+    if (unsigned eq = EqClass[i]) {
+      VNI->id = LIV[eq]->getNumValNums();
+      LIV[eq]->valnos.push_back(VNI);
+    } else {
+      VNI->id = j;
+      LI.valnos[j++] = VNI;
+    }
+  }
+  LI.valnos.resize(j);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 000000000000..9257191f7fc0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,2171 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<bool> DisableReMat("disable-rematerialization",
+                                  cl::init(false), cl::Hidden);
+
+STATISTIC(numIntervals , "Number of original intervals");
+STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits    , "Number of intervals split");
+
+char LiveIntervals::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveVariables>();
+  AU.addPreserved<LiveVariables>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineDominatorsID);
+
+  if (!StrongPHIElim) {
+    AU.addPreservedID(PHIEliminationID);
+    AU.addRequiredID(PHIEliminationID);
+  }
+
+  AU.addRequiredID(TwoAddressInstructionPassID);
+  AU.addPreserved<ProcessImplicitDefs>();
+  AU.addRequired<ProcessImplicitDefs>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequiredTransitive<SlotIndexes>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+  // Free the live intervals themselves.
+  for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
+       E = r2iMap_.end(); I != E; ++I)
+    delete I->second;
+
+  r2iMap_.clear();
+
+  // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+  VNInfoAllocator.Reset();
+  while (!CloneMIs.empty()) {
+    MachineInstr *MI = CloneMIs.back();
+    CloneMIs.pop_back();
+    mf_->DeleteMachineInstr(MI);
+  }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &mf_->getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  aa_ = &getAnalysis<AliasAnalysis>();
+  lv_ = &getAnalysis<LiveVariables>();
+  indexes_ = &getAnalysis<SlotIndexes>();
+  allocatableRegs_ = tri_->getAllocatableSet(fn);
+
+  computeIntervals();
+
+  numIntervals += getNumIntervals();
+
+  DEBUG(dump());
+  return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+  OS << "********** INTERVALS **********\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    I->second->print(OS, tri_);
+    OS << "\n";
+  }
+
+  printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+  OS << "********** MACHINEINSTRS **********\n";
+  mf_->print(OS, indexes_);
+}
+
+void LiveIntervals::dumpInstrs() const {
+  printInstrs(dbgs());
+}
+
+bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
+                                         VirtRegMap &vrm, unsigned reg) {
+  // We don't handle fancy stuff crossing basic block boundaries
+  if (li.ranges.size() != 1)
+    return true;
+  const LiveRange &range = li.ranges.front();
+  SlotIndex idx = range.start.getBaseIndex();
+  SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
+
+  // Skip deleted instructions
+  MachineInstr *firstMI = getInstructionFromIndex(idx);
+  while (!firstMI && idx != end) {
+    idx = idx.getNextIndex();
+    firstMI = getInstructionFromIndex(idx);
+  }
+  if (!firstMI)
+    return false;
+
+  // Find last instruction in range
+  SlotIndex lastIdx = end.getPrevIndex();
+  MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
+  while (!lastMI && lastIdx != idx) {
+    lastIdx = lastIdx.getPrevIndex();
+    lastMI = getInstructionFromIndex(lastIdx);
+  }
+  if (!lastMI)
+    return false;
+
+  // Range cannot cross basic block boundaries or terminators
+  MachineBasicBlock *MBB = firstMI->getParent();
+  if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
+    return true;
+
+  MachineBasicBlock::const_iterator E = lastMI;
+  ++E;
+  for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
+    const MachineInstr &MI = *I;
+
+    // Allow copies to and from li.reg
+    if (MI.isCopy())
+      if (MI.getOperand(0).getReg() == li.reg ||
+          MI.getOperand(1).getReg() == li.reg)
+        continue;
+
+    // Check for operands using reg
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e;  ++i) {
+      const MachineOperand& mop = MI.getOperand(i);
+      if (!mop.isReg())
+        continue;
+      unsigned PhysReg = mop.getReg();
+      if (PhysReg == 0 || PhysReg == li.reg)
+        continue;
+      if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+        if (!vrm.hasPhys(PhysReg))
+          continue;
+        PhysReg = vrm.getPhys(PhysReg);
+      }
+      if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+        return true;
+    }
+  }
+
+  // No conflicts found.
+  return false;
+}
+
+bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
+                                  SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
+  for (LiveInterval::Ranges::const_iterator
+         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+    for (SlotIndex index = I->start.getBaseIndex(),
+           end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+           index != end;
+           index = index.getNextIndex()) {
+      MachineInstr *MI = getInstructionFromIndex(index);
+      if (!MI)
+        continue;               // skip deleted instructions
+
+      if (JoinedCopies.count(MI))
+        continue;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand& MO = MI->getOperand(i);
+        if (!MO.isReg())
+          continue;
+        unsigned PhysReg = MO.getReg();
+        if (PhysReg == 0 || PhysReg == Reg ||
+            TargetRegisterInfo::isVirtualRegister(PhysReg))
+          continue;
+        if (tri_->regsOverlap(Reg, PhysReg))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+static
+bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
+  unsigned Reg = MI.getOperand(MOIdx).getReg();
+  for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.getReg() == Reg && MO.isDef()) {
+      assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() &&
+             MI.getOperand(MOIdx).getSubReg() &&
+             (MO.getSubReg() || MO.isImplicit()));
+      return true;
+    }
+  }
+  return false;
+}
+
+/// isPartialRedef - Return true if the specified def at the specific index is
+/// partially re-defining the specified live interval. A common case of this is
+/// a definition of the sub-register.
+bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
+                                   LiveInterval &interval) {
+  if (!MO.getSubReg() || MO.isEarlyClobber())
+    return false;
+
+  SlotIndex RedefIndex = MIIdx.getDefIndex();
+  const LiveRange *OldLR =
+    interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+  MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+  if (DefMI != 0) {
+    return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
+  }
+  return false;
+}
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+                                             MachineBasicBlock::iterator mi,
+                                             SlotIndex MIIdx,
+                                             MachineOperand& MO,
+                                             unsigned MOIdx,
+                                             LiveInterval &interval) {
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+
+  // Virtual registers may be defined multiple times (due to phi
+  // elimination and 2-addr elimination).  Much of what we do only has to be
+  // done once for the vreg.  We use an empty interval to detect the first
+  // time we see a vreg.
+  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+  if (interval.empty()) {
+    // Get the Idx of the defining instructions.
+    SlotIndex defIndex = MIIdx.getDefIndex();
+    // Earlyclobbers move back one, so that they overlap the live range
+    // of inputs.
+    if (MO.isEarlyClobber())
+      defIndex = MIIdx.getUseIndex();
+
+    // Make sure the first definition is not a partial redefinition. Add an
+    // <imp-def> of the full register.
+    if (MO.getSubReg())
+      mi->addRegisterDefined(interval.reg);
+
+    MachineInstr *CopyMI = NULL;
+    if (mi->isCopyLike()) {
+      CopyMI = mi;
+    }
+
+    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+    assert(ValNo->id == 0 && "First value in interval is not 0?");
+
+    // Loop over all of the blocks that the vreg is defined in.  There are
+    // two cases we have to handle here.  The most common case is a vreg
+    // whose lifetime is contained within a basic block.  In this case there
+    // will be a single kill, in MBB, which comes after the definition.
+    if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+      // FIXME: what about dead vars?
+      SlotIndex killIdx;
+      if (vi.Kills[0] != mi)
+        killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+      else
+        killIdx = defIndex.getStoreIndex();
+
+      // If the kill happens after the definition, we have an intra-block
+      // live range.
+      if (killIdx > defIndex) {
+        assert(vi.AliveBlocks.empty() &&
+               "Shouldn't be alive across any blocks!");
+        LiveRange LR(defIndex, killIdx, ValNo);
+        interval.addRange(LR);
+        DEBUG(dbgs() << " +" << LR << "\n");
+        return;
+      }
+    }
+
+    // The other case we handle is when a virtual register lives to the end
+    // of the defining block, potentially live across some blocks, then is
+    // live into some number of blocks, but gets killed.  Start by adding a
+    // range that goes from this definition to the end of the defining block.
+    LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
+    DEBUG(dbgs() << " +" << NewLR);
+    interval.addRange(NewLR);
+
+    bool PHIJoin = lv_->isPHIJoin(interval.reg);
+
+    if (PHIJoin) {
+      // A phi join register is killed at the end of the MBB and revived as a new
+      // valno in the killing blocks.
+      assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
+      DEBUG(dbgs() << " phi-join");
+      ValNo->setHasPHIKill(true);
+    } else {
+      // Iterate over all of the blocks that the variable is completely
+      // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+      // live interval.
+      for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+               E = vi.AliveBlocks.end(); I != E; ++I) {
+        MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
+        LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+        interval.addRange(LR);
+        DEBUG(dbgs() << " +" << LR);
+      }
+    }
+
+    // Finally, this virtual register is live from the start of any killing
+    // block to the 'use' slot of the killing instruction.
+    for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+      MachineInstr *Kill = vi.Kills[i];
+      SlotIndex Start = getMBBStartIdx(Kill->getParent());
+      SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+
+      // Create interval with one of a NEW value number.  Note that this value
+      // number isn't actually defined by an instruction, weird huh? :)
+      if (PHIJoin) {
+        assert(getInstructionFromIndex(Start) == 0 &&
+               "PHI def index points at actual instruction.");
+        ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
+        ValNo->setIsPHIDef(true);
+      }
+      LiveRange LR(Start, killIdx, ValNo);
+      interval.addRange(LR);
+      DEBUG(dbgs() << " +" << LR);
+    }
+
+  } else {
+    if (MultipleDefsBySameMI(*mi, MOIdx))
+      // Multiple defs of the same virtual register by the same instruction.
+      // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+      // This is likely due to elimination of REG_SEQUENCE instructions. Return
+      // here since there is nothing to do.
+      return;
+
+    // If this is the second time we see a virtual register definition, it
+    // must be due to phi elimination or two addr elimination.  If this is
+    // the result of two address elimination, then the vreg is one of the
+    // def-and-use register operand.
+
+    // It may also be partial redef like this:
+    // 80  %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+    // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+    bool PartReDef = isPartialRedef(MIIdx, MO, interval);
+    if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
+      // If this is a two-address definition, then we have already processed
+      // the live range.  The only problem is that we didn't realize there
+      // are actually two values in the live interval.  Because of this we
+      // need to take the LiveRegion that defines this register and split it
+      // into two values.
+      SlotIndex RedefIndex = MIIdx.getDefIndex();
+      if (MO.isEarlyClobber())
+        RedefIndex = MIIdx.getUseIndex();
+
+      const LiveRange *OldLR =
+        interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+      VNInfo *OldValNo = OldLR->valno;
+      SlotIndex DefIndex = OldValNo->def.getDefIndex();
+
+      // Delete the previous value, which should be short and continuous,
+      // because the 2-addr copy must be in the same MBB as the redef.
+      interval.removeRange(DefIndex, RedefIndex);
+
+      // The new value number (#1) is defined by the instruction we claimed
+      // defined value #0.
+      VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
+
+      // Value#0 is now defined by the 2-addr instruction.
+      OldValNo->def  = RedefIndex;
+      OldValNo->setCopy(0);
+
+      // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
+      if (PartReDef && mi->isCopyLike())
+        OldValNo->setCopy(&*mi);
+
+      // Add the new live interval which replaces the range for the input copy.
+      LiveRange LR(DefIndex, RedefIndex, ValNo);
+      DEBUG(dbgs() << " replace range with " << LR);
+      interval.addRange(LR);
+
+      // If this redefinition is dead, we need to add a dummy unit live
+      // range covering the def slot.
+      if (MO.isDead())
+        interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+                                    OldValNo));
+
+      DEBUG({
+          dbgs() << " RESULT: ";
+          interval.print(dbgs(), tri_);
+        });
+    } else if (lv_->isPHIJoin(interval.reg)) {
+      // In the case of PHI elimination, each variable definition is only
+      // live until the end of the block.  We've already taken care of the
+      // rest of the live range.
+
+      SlotIndex defIndex = MIIdx.getDefIndex();
+      if (MO.isEarlyClobber())
+        defIndex = MIIdx.getUseIndex();
+
+      VNInfo *ValNo;
+      MachineInstr *CopyMI = NULL;
+      if (mi->isCopyLike())
+        CopyMI = mi;
+      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
+      SlotIndex killIndex = getMBBEndIdx(mbb);
+      LiveRange LR(defIndex, killIndex, ValNo);
+      interval.addRange(LR);
+      ValNo->setHasPHIKill(true);
+      DEBUG(dbgs() << " phi-join +" << LR);
+    } else {
+      llvm_unreachable("Multiply defined register");
+    }
+  }
+
+  DEBUG(dbgs() << '\n');
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+                                              MachineBasicBlock::iterator mi,
+                                              SlotIndex MIIdx,
+                                              MachineOperand& MO,
+                                              LiveInterval &interval,
+                                              MachineInstr *CopyMI) {
+  // A physical register cannot be live across basic block, so its
+  // lifetime must end somewhere in its defining basic block.
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+
+  SlotIndex baseIndex = MIIdx;
+  SlotIndex start = baseIndex.getDefIndex();
+  // Earlyclobbers move back one.
+  if (MO.isEarlyClobber())
+    start = MIIdx.getUseIndex();
+  SlotIndex end = start;
+
+  // If it is not used after definition, it is considered dead at
+  // the instruction defining it. Hence its interval is:
+  // [defSlot(def), defSlot(def)+1)
+  // For earlyclobbers, the defSlot was pushed back one; the extra
+  // advance below compensates.
+  if (MO.isDead()) {
+    DEBUG(dbgs() << " dead");
+    end = start.getStoreIndex();
+    goto exit;
+  }
+
+  // If it is not dead on definition, it must be killed by a
+  // subsequent instruction. Hence its interval is:
+  // [defSlot(def), useSlot(kill)+1)
+  baseIndex = baseIndex.getNextIndex();
+  while (++mi != MBB->end()) {
+
+    if (mi->isDebugValue())
+      continue;
+    if (getInstructionFromIndex(baseIndex) == 0)
+      baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+    if (mi->killsRegister(interval.reg, tri_)) {
+      DEBUG(dbgs() << " killed");
+      end = baseIndex.getDefIndex();
+      goto exit;
+    } else {
+      int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
+      if (DefIdx != -1) {
+        if (mi->isRegTiedToUseOperand(DefIdx)) {
+          // Two-address instruction.
+          end = baseIndex.getDefIndex();
+        } else {
+          // Another instruction redefines the register before it is ever read.
+          // Then the register is essentially dead at the instruction that
+          // defines it. Hence its interval is:
+          // [defSlot(def), defSlot(def)+1)
+          DEBUG(dbgs() << " dead");
+          end = start.getStoreIndex();
+        }
+        goto exit;
+      }
+    }
+
+    baseIndex = baseIndex.getNextIndex();
+  }
+
+  // The only case we should have a dead physreg here without a killing or
+  // instruction where we know it's dead is if it is live-in to the function
+  // and never used. Another possible case is the implicit use of the
+  // physical register has been deleted by two-address pass.
+  end = start.getStoreIndex();
+
+exit:
+  assert(start < end && "did not find end of interval?");
+
+  // Already exists? Extend old live interval.
+  VNInfo *ValNo = interval.getVNInfoAt(start);
+  bool Extend = ValNo != 0;
+  if (!Extend)
+    ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+  if (Extend && MO.isEarlyClobber())
+    ValNo->setHasRedefByEC(true);
+  LiveRange LR(start, end, ValNo);
+  interval.addRange(LR);
+  DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator MI,
+                                      SlotIndex MIIdx,
+                                      MachineOperand& MO,
+                                      unsigned MOIdx) {
+  if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+    handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
+                             getOrCreateInterval(MO.getReg()));
+  else {
+    MachineInstr *CopyMI = NULL;
+    if (MI->isCopyLike())
+      CopyMI = MI;
+    handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+                              getOrCreateInterval(MO.getReg()), CopyMI);
+  }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+                                         SlotIndex MIIdx,
+                                         LiveInterval &interval, bool isAlias) {
+  DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
+
+  // Look for kills, if it reaches a def before it's killed, then it shouldn't
+  // be considered a livein.
+  MachineBasicBlock::iterator mi = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  // Skip over DBG_VALUE at the start of the MBB.
+  if (mi != E && mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      ;
+    if (mi == E)
+      // MBB is empty except for DBG_VALUE's.
+      return;
+  }
+
+  SlotIndex baseIndex = MIIdx;
+  SlotIndex start = baseIndex;
+  if (getInstructionFromIndex(baseIndex) == 0)
+    baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+  SlotIndex end = baseIndex;
+  bool SeenDefUse = false;
+
+  while (mi != E) {
+    if (mi->killsRegister(interval.reg, tri_)) {
+      DEBUG(dbgs() << " killed");
+      end = baseIndex.getDefIndex();
+      SeenDefUse = true;
+      break;
+    } else if (mi->definesRegister(interval.reg, tri_)) {
+      // Another instruction redefines the register before it is ever read.
+      // Then the register is essentially dead at the instruction that defines
+      // it. Hence its interval is:
+      // [defSlot(def), defSlot(def)+1)
+      DEBUG(dbgs() << " dead");
+      end = start.getStoreIndex();
+      SeenDefUse = true;
+      break;
+    }
+
+    while (++mi != E && mi->isDebugValue())
+      // Skip over DBG_VALUE.
+      ;
+    if (mi != E)
+      baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+  }
+
+  // Live-in register might not be used at all.
+  if (!SeenDefUse) {
+    if (isAlias) {
+      DEBUG(dbgs() << " dead");
+      end = MIIdx.getStoreIndex();
+    } else {
+      DEBUG(dbgs() << " live through");
+      end = getMBBEndIdx(MBB);
+    }
+  }
+
+  SlotIndex defIdx = getMBBStartIdx(MBB);
+  assert(getInstructionFromIndex(defIdx) == 0 &&
+         "PHI def index points at actual instruction.");
+  VNInfo *vni =
+    interval.getNextValue(defIdx, 0, VNInfoAllocator);
+  vni->setIsPHIDef(true);
+  LiveRange LR(start, end, vni);
+
+  interval.addRange(LR);
+  DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+  DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
+
+  SmallVector<unsigned, 8> UndefUses;
+  for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    if (MBB->empty())
+      continue;
+
+    // Track the index of the current machine instr.
+    SlotIndex MIIndex = getMBBStartIdx(MBB);
+    DEBUG(dbgs() << "BB#" << MBB->getNumber()
+          << ":\t\t# derived from " << MBB->getName() << "\n");
+
+    // Create intervals for live-ins to this BB first.
+    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+           LE = MBB->livein_end(); LI != LE; ++LI) {
+      handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+      // Multiple live-ins can alias the same register.
+      for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
+        if (!hasInterval(*AS))
+          handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+                               true);
+    }
+
+    // Skip over empty initial indices.
+    if (getInstructionFromIndex(MIIndex) == 0)
+      MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+
+    for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+         MI != miEnd; ++MI) {
+      DEBUG(dbgs() << MIIndex << "\t" << *MI);
+      if (MI->isDebugValue())
+        continue;
+
+      // Handle defs.
+      for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg())
+          continue;
+
+        // handle register defs - build intervals
+        if (MO.isDef())
+          handleRegisterDef(MBB, MI, MIIndex, MO, i);
+        else if (MO.isUndef())
+          UndefUses.push_back(MO.getReg());
+      }
+
+      // Move to the next instr slot.
+      MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+    }
+  }
+
+  // Create empty intervals for registers defined by implicit_def's (except
+  // for those implicit_def that define values which are liveout of their
+  // blocks.
+  for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
+    unsigned UndefReg = UndefUses[i];
+    (void)getOrCreateInterval(UndefReg);
+  }
+}
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+  float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+  return new LiveInterval(reg, Weight);
+}
+
+/// dupInterval - Duplicate a live interval. The caller is responsible for
+/// managing the allocated memory.
+LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
+  LiveInterval *NewLI = createInterval(li->reg);
+  NewLI->Copy(*li, mri_, getVNInfoAllocator());
+  return NewLI;
+}
+
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+                                 SmallVectorImpl<MachineInstr*> *dead) {
+  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+  assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+         && "Can't only shrink physical registers");
+  // Find all the values used, including PHI kills.
+  SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+  // Visit all instructions reading li->reg.
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+      continue;
+    SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+    VNInfo *VNI = li->getVNInfoAt(Idx);
+    if (!VNI) {
+      // This shouldn't happen: readsVirtualRegister returns true, but there is
+      // no live value. It is likely caused by a target getting <undef> flags
+      // wrong.
+      DEBUG(dbgs() << Idx << '\t' << *UseMI
+                   << "Warning: Instr claims to read non-existent value in "
+                    << *li << '\n');
+      continue;
+    }
+    if (VNI->def == Idx) {
+      // Special case: An early-clobber tied operand reads and writes the
+      // register one slot early.
+      Idx = Idx.getPrevSlot();
+      VNI = li->getVNInfoAt(Idx);
+      assert(VNI && "Early-clobber tied value not available");
+    }
+    WorkList.push_back(std::make_pair(Idx, VNI));
+  }
+
+  // Create a new live interval with only minimal live segments per def.
+  LiveInterval NewLI(li->reg, 0);
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    // We may eliminate PHI values, so recompute PHIKill flags.
+    VNI->setHasPHIKill(false);
+    NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+
+    // A use tied to an early-clobber def ends at the load slot and isn't caught
+    // above. Catch it here instead. This probably only ever happens for inline
+    // assembly.
+    if (VNI->def.isUse())
+      if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
+        WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+  }
+
+  // Keep track of the PHIs that are in use.
+  SmallPtrSet<VNInfo*, 8> UsedPHIs;
+
+  // Extend intervals to reach all uses in WorkList.
+  while (!WorkList.empty()) {
+    SlotIndex Idx = WorkList.back().first;
+    VNInfo *VNI = WorkList.back().second;
+    WorkList.pop_back();
+    const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+    SlotIndex BlockStart = getMBBStartIdx(MBB);
+
+    // Extend the live range for VNI to be live at Idx.
+    if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+      (void)ExtVNI;
+      assert(ExtVNI == VNI && "Unexpected existing value number");
+      // Is this a PHIDef we haven't seen before?
+      if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+        continue;
+      // The PHI is live, make sure the predecessors are live-out.
+      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI) {
+        SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+        VNInfo *PVNI = li->getVNInfoAt(Stop);
+        // A predecessor is not required to have a live-out value for a PHI.
+        if (PVNI) {
+          PVNI->setHasPHIKill(true);
+          WorkList.push_back(std::make_pair(Stop, PVNI));
+        }
+      }
+      continue;
+    }
+
+    // VNI is live-in to MBB.
+    DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+    NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+    // Make sure VNI is live-out from the predecessors.
+    for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+      assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+      WorkList.push_back(std::make_pair(Stop, VNI));
+    }
+  }
+
+  // Handle dead values.
+  bool CanSeparate = false;
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+    assert(LII != NewLI.end() && "Missing live range for PHI");
+    if (LII->end != VNI->def.getNextSlot())
+      continue;
+    if (VNI->isPHIDef()) {
+      // This is a dead PHI. Remove it.
+      VNI->setIsUnused(true);
+      NewLI.removeRange(*LII);
+      DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+      CanSeparate = true;
+    } else {
+      // This is a dead def. Make sure the instruction knows.
+      MachineInstr *MI = getInstructionFromIndex(VNI->def);
+      assert(MI && "No instruction defining live value");
+      MI->addRegisterDead(li->reg, tri_);
+      if (dead && MI->allDefsAreDead()) {
+        DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+        dead->push_back(MI);
+      }
+    }
+  }
+
+  // Move the trimmed ranges back.
+  li->ranges.swap(NewLI.ranges);
+  DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+  return CanSeparate;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+                                 MachineBasicBlock *mbb) const {
+  const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+  // If li is not live into a landing pad, we can insert spill code before the
+  // first terminator.
+  if (!lpad || !isLiveInToMBB(li, lpad))
+    return mbb->getFirstTerminator();
+
+  // When there is a landing pad, spill code must go before the call instruction
+  // that can throw.
+  MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+  while (I != B) {
+    --I;
+    if (I->getDesc().isCall())
+      return I;
+  }
+  // The block contains no calls that can throw, so use the first terminator.
+  return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    unsigned Reg = I->first;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (mri_->reg_nodbg_empty(Reg))
+      continue;
+    LiveInterval *LI = I->second;
+
+    // Every instruction that kills Reg corresponds to a live range end point.
+    for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+         ++RI) {
+      // A LOAD index indicates an MBB edge.
+      if (RI->end.isLoad())
+        continue;
+      MachineInstr *MI = getInstructionFromIndex(RI->end);
+      if (!MI)
+        continue;
+      MI->addRegisterKilled(Reg, NULL);
+    }
+  }
+}
+
+/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
+/// allow one) virtual register operand, then its uses are implicitly using
+/// the register. Returns the virtual register.
+unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
+                                            MachineInstr *MI) const {
+  unsigned RegOp = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || Reg == li.reg)
+      continue;
+
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+        !allocatableRegs_[Reg])
+      continue;
+    // FIXME: For now, only remat MI with at most one register operand.
+    assert(!RegOp &&
+           "Can't rematerialize instruction with multiple register operand!");
+    RegOp = MO.getReg();
+#ifndef NDEBUG
+    break;
+#endif
+  }
+  return RegOp;
+}
+
+/// isValNoAvailableAt - Return true if the val# of the specified interval
+/// which reaches the given instruction also reaches the specified use index.
+bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
+                                       SlotIndex UseIdx) const {
+  VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+  return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const VNInfo *ValNo, MachineInstr *MI,
+                                  const SmallVectorImpl<LiveInterval*> *SpillIs,
+                                  bool &isLoad) {
+  if (DisableReMat)
+    return false;
+
+  if (!tii_->isTriviallyReMaterializable(MI, aa_))
+    return false;
+
+  // Target-specific code can mark an instruction as being rematerializable
+  // if it has one virtual reg use, though it had better be something like
+  // a PIC base register which is likely to be live everywhere.
+  unsigned ImpUse = getReMatImplicitUse(li, MI);
+  if (ImpUse) {
+    const LiveInterval &ImpLi = getInterval(ImpUse);
+    for (MachineRegisterInfo::use_nodbg_iterator
+           ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end();
+         ri != re; ++ri) {
+      MachineInstr *UseMI = &*ri;
+      SlotIndex UseIdx = getInstructionIndex(UseMI);
+      if (li.getVNInfoAt(UseIdx) != ValNo)
+        continue;
+      if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
+        return false;
+    }
+
+    // If a register operand of the re-materialized instruction is going to
+    // be spilled next, then it's not legal to re-materialize this instruction.
+    if (SpillIs)
+      for (unsigned i = 0, e = SpillIs->size(); i != e; ++i)
+        if (ImpUse == (*SpillIs)[i]->reg)
+          return false;
+  }
+  return true;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                       const VNInfo *ValNo, MachineInstr *MI) {
+  bool Dummy2;
+  return isReMaterializable(li, ValNo, MI, 0, Dummy2);
+}
+
+/// isReMaterializable - Returns true if every definition of MI of every
+/// val# of the specified interval is re-materializable.
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const SmallVectorImpl<LiveInterval*> *SpillIs,
+                                  bool &isLoad) {
+  isLoad = false;
+  for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+       i != e; ++i) {
+    const VNInfo *VNI = *i;
+    if (VNI->isUnused())
+      continue; // Dead val#.
+    // Is the def for the val# rematerializable?
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+    if (!ReMatDefMI)
+      return false;
+    bool DefIsLoad = false;
+    if (!ReMatDefMI ||
+        !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
+      return false;
+    isLoad |= DefIsLoad;
+  }
+  return true;
+}
+
+/// FilterFoldedOps - Filter out two-address use operands. Return
+/// true if it finds any issue with the operands that ought to prevent
+/// folding.
+static bool FilterFoldedOps(MachineInstr *MI,
+                            SmallVector<unsigned, 2> &Ops,
+                            unsigned &MRInfo,
+                            SmallVector<unsigned, 2> &FoldOps) {
+  MRInfo = 0;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    unsigned OpIdx = Ops[i];
+    MachineOperand &MO = MI->getOperand(OpIdx);
+    // FIXME: fold subreg use.
+    if (MO.getSubReg())
+      return true;
+    if (MO.isDef())
+      MRInfo |= (unsigned)VirtRegMap::isMod;
+    else {
+      // Filter out two-address use operand(s).
+      if (MI->isRegTiedToDefOperand(OpIdx)) {
+        MRInfo = VirtRegMap::isModRef;
+        continue;
+      }
+      MRInfo |= (unsigned)VirtRegMap::isRef;
+    }
+    FoldOps.push_back(OpIdx);
+  }
+  return false;
+}
+
+
+/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
+/// slot / to reg or any rematerialized load into ith operand of specified
+/// MI. If it is successul, MI is updated with the newly created MI and
+/// returns true.
+bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
+                                         VirtRegMap &vrm, MachineInstr *DefMI,
+                                         SlotIndex InstrIdx,
+                                         SmallVector<unsigned, 2> &Ops,
+                                         bool isSS, int Slot, unsigned Reg) {
+  // If it is an implicit def instruction, just delete it.
+  if (MI->isImplicitDef()) {
+    RemoveMachineInstrFromMaps(MI);
+    vrm.RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+    ++numFolds;
+    return true;
+  }
+
+  // Filter the list of operand indexes that are to be folded. Abort if
+  // any operand will prevent folding.
+  unsigned MRInfo = 0;
+  SmallVector<unsigned, 2> FoldOps;
+  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+    return false;
+
+  // The only time it's safe to fold into a two address instruction is when
+  // it's folding reload and spill from / into a spill stack slot.
+  if (DefMI && (MRInfo & VirtRegMap::isMod))
+    return false;
+
+  MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
+                           : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
+  if (fmi) {
+    // Remember this instruction uses the spill slot.
+    if (isSS) vrm.addSpillSlotUse(Slot, fmi);
+
+    // Attempt to fold the memory reference into the instruction. If
+    // we can do this, we don't need to insert spill code.
+    if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
+      vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
+    vrm.transferSpillPts(MI, fmi);
+    vrm.transferRestorePts(MI, fmi);
+    vrm.transferEmergencySpills(MI, fmi);
+    ReplaceMachineInstrInMaps(MI, fmi);
+    MI->eraseFromParent();
+    MI = fmi;
+    ++numFolds;
+    return true;
+  }
+  return false;
+}
+
+/// canFoldMemoryOperand - Returns true if the specified load / store
+/// folding is possible.
+bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
+                                         SmallVector<unsigned, 2> &Ops,
+                                         bool ReMat) const {
+  // Filter the list of operand indexes that are to be folded. Abort if
+  // any operand will prevent folding.
+  unsigned MRInfo = 0;
+  SmallVector<unsigned, 2> FoldOps;
+  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+    return false;
+
+  // It's only legal to remat for a use, not a def.
+  if (ReMat && (MRInfo & VirtRegMap::isMod))
+    return false;
+
+  return tii_->canFoldMemoryOperand(MI, FoldOps);
+}
+
+bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
+  LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+
+  MachineBasicBlock *mbb =  indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+  if (mbb == 0)
+    return false;
+
+  for (++itr; itr != li.ranges.end(); ++itr) {
+    MachineBasicBlock *mbb2 =
+      indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+    if (mbb2 != mbb)
+      return false;
+  }
+
+  return true;
+}
+
+/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
+/// interval on to-be re-materialized operands of MI) with new register.
+void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
+                                       MachineInstr *MI, unsigned NewVReg,
+                                       VirtRegMap &vrm) {
+  // There is an implicit use. That means one of the other operand is
+  // being remat'ed and the remat'ed instruction has li.reg as an
+  // use operand. Make sure we rewrite that as well.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (!vrm.isReMaterialized(Reg))
+      continue;
+    MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
+    MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
+    if (UseMO)
+      UseMO->setReg(NewVReg);
+  }
+}
+
+/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
+/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
+bool LiveIntervals::
+rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
+                 bool TrySplit, SlotIndex index, SlotIndex end,
+                 MachineInstr *MI,
+                 MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+                 unsigned Slot, int LdSlot,
+                 bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+                 VirtRegMap &vrm,
+                 const TargetRegisterClass* rc,
+                 SmallVector<int, 4> &ReMatIds,
+                 const MachineLoopInfo *loopInfo,
+                 unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
+                 DenseMap<unsigned,unsigned> &MBBVRegsMap,
+                 std::vector<LiveInterval*> &NewLIs) {
+  bool CanFold = false;
+ RestartInstruction:
+  for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+    MachineOperand& mop = MI->getOperand(i);
+    if (!mop.isReg())
+      continue;
+    unsigned Reg = mop.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (Reg != li.reg)
+      continue;
+
+    bool TryFold = !DefIsReMat;
+    bool FoldSS = true; // Default behavior unless it's a remat.
+    int FoldSlot = Slot;
+    if (DefIsReMat) {
+      // If this is the rematerializable definition MI itself and
+      // all of its uses are rematerialized, simply delete it.
+      if (MI == ReMatOrigDefMI && CanDelete) {
+        DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
+                     << *MI << '\n');
+        RemoveMachineInstrFromMaps(MI);
+        vrm.RemoveMachineInstrFromMaps(MI);
+        MI->eraseFromParent();
+        break;
+      }
+
+      // If def for this use can't be rematerialized, then try folding.
+      // If def is rematerializable and it's a load, also try folding.
+      TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
+      if (isLoad) {
+        // Try fold loads (from stack slot, constant pool, etc.) into uses.
+        FoldSS = isLoadSS;
+        FoldSlot = LdSlot;
+      }
+    }
+
+    // Scan all of the operands of this instruction rewriting operands
+    // to use NewVReg instead of li.reg as appropriate.  We do this for
+    // two reasons:
+    //
+    //   1. If the instr reads the same spilled vreg multiple times, we
+    //      want to reuse the NewVReg.
+    //   2. If the instr is a two-addr instruction, we are required to
+    //      keep the src/dst regs pinned.
+    //
+    // Keep track of whether we replace a use and/or def so that we can
+    // create the spill interval with the appropriate range.
+    SmallVector<unsigned, 2> Ops;
+    tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
+
+    // Create a new virtual register for the spill interval.
+    // Create the new register now so we can map the fold instruction
+    // to the new register so when it is unfolded we get the correct
+    // answer.
+    bool CreatedNewVReg = false;
+    if (NewVReg == 0) {
+      NewVReg = mri_->createVirtualRegister(rc);
+      vrm.grow();
+      CreatedNewVReg = true;
+
+      // The new virtual register should get the same allocation hints as the
+      // old one.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
+      if (Hint.first || Hint.second)
+        mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
+    }
+
+    if (!TryFold)
+      CanFold = false;
+    else {
+      // Do not fold load / store here if we are splitting. We'll find an
+      // optimal point to insert a load / store later.
+      if (!TrySplit) {
+        if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+                                 Ops, FoldSS, FoldSlot, NewVReg)) {
+          // Folding the load/store can completely change the instruction in
+          // unpredictable ways, rescan it from the beginning.
+
+          if (FoldSS) {
+            // We need to give the new vreg the same stack slot as the
+            // spilled interval.
+            vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
+          }
+
+          HasUse = false;
+          HasDef = false;
+          CanFold = false;
+          if (isNotInMIMap(MI))
+            break;
+          goto RestartInstruction;
+        }
+      } else {
+        // We'll try to fold it later if it's profitable.
+        CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
+      }
+    }
+
+    mop.setReg(NewVReg);
+    if (mop.isImplicit())
+      rewriteImplicitOps(li, MI, NewVReg, vrm);
+
+    // Reuse NewVReg for other reads.
+    bool HasEarlyClobber = false;
+    for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
+      MachineOperand &mopj = MI->getOperand(Ops[j]);
+      mopj.setReg(NewVReg);
+      if (mopj.isImplicit())
+        rewriteImplicitOps(li, MI, NewVReg, vrm);
+      if (mopj.isEarlyClobber())
+        HasEarlyClobber = true;
+    }
+
+    if (CreatedNewVReg) {
+      if (DefIsReMat) {
+        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
+        if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
+          // Each valnum may have its own remat id.
+          ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
+        } else {
+          vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
+        }
+        if (!CanDelete || (HasUse && HasDef)) {
+          // If this is a two-addr instruction then its use operands are
+          // rematerializable but its def is not. It should be assigned a
+          // stack slot.
+          vrm.assignVirt2StackSlot(NewVReg, Slot);
+        }
+      } else {
+        vrm.assignVirt2StackSlot(NewVReg, Slot);
+      }
+    } else if (HasUse && HasDef &&
+               vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
+      // If this interval hasn't been assigned a stack slot (because earlier
+      // def is a deleted remat def), do it now.
+      assert(Slot != VirtRegMap::NO_STACK_SLOT);
+      vrm.assignVirt2StackSlot(NewVReg, Slot);
+    }
+
+    // Re-matting an instruction with virtual register use. Add the
+    // register as an implicit use on the use MI.
+    if (DefIsReMat && ImpUse)
+      MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+
+    // Create a new register interval for this spill / remat.
+    LiveInterval &nI = getOrCreateInterval(NewVReg);
+    if (CreatedNewVReg) {
+      NewLIs.push_back(&nI);
+      MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
+      if (TrySplit)
+        vrm.setIsSplitFromReg(NewVReg, li.reg);
+    }
+
+    if (HasUse) {
+      if (CreatedNewVReg) {
+        LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
+                     nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
+        DEBUG(dbgs() << " +" << LR);
+        nI.addRange(LR);
+      } else {
+        // Extend the split live interval to this def / use.
+        SlotIndex End = index.getDefIndex();
+        LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
+                     nI.getValNumInfo(nI.getNumValNums()-1));
+        DEBUG(dbgs() << " +" << LR);
+        nI.addRange(LR);
+      }
+    }
+    if (HasDef) {
+      // An early clobber starts at the use slot, except for an early clobber
+      // tied to a use operand (yes, that is a thing).
+      LiveRange LR(HasEarlyClobber && !HasUse ?
+                   index.getUseIndex() : index.getDefIndex(),
+                   index.getStoreIndex(),
+                   nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
+      DEBUG(dbgs() << " +" << LR);
+      nI.addRange(LR);
+    }
+
+    DEBUG({
+        dbgs() << "\t\t\t\tAdded new interval: ";
+        nI.print(dbgs(), tri_);
+        dbgs() << '\n';
+      });
+  }
+  return CanFold;
+}
+bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
+                                   const VNInfo *VNI,
+                                   MachineBasicBlock *MBB,
+                                   SlotIndex Idx) const {
+  return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
+}
+
+/// RewriteInfo - Keep track of machine instrs that will be rewritten
+/// during spilling.
+namespace {
+  struct RewriteInfo {
+    SlotIndex Index;
+    MachineInstr *MI;
+    RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
+  };
+
+  struct RewriteInfoCompare {
+    bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
+      return LHS.Index < RHS.Index;
+    }
+  };
+}
+
+void LiveIntervals::
+rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
+                    LiveInterval::Ranges::const_iterator &I,
+                    MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+                    unsigned Slot, int LdSlot,
+                    bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+                    VirtRegMap &vrm,
+                    const TargetRegisterClass* rc,
+                    SmallVector<int, 4> &ReMatIds,
+                    const MachineLoopInfo *loopInfo,
+                    BitVector &SpillMBBs,
+                    DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
+                    BitVector &RestoreMBBs,
+                    DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
+                    DenseMap<unsigned,unsigned> &MBBVRegsMap,
+                    std::vector<LiveInterval*> &NewLIs) {
+  bool AllCanFold = true;
+  unsigned NewVReg = 0;
+  SlotIndex start = I->start.getBaseIndex();
+  SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+
+  // First collect all the def / use in this live range that will be rewritten.
+  // Make sure they are sorted according to instruction index.
+  std::vector<RewriteInfo> RewriteMIs;
+  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+         re = mri_->reg_end(); ri != re; ) {
+    MachineInstr *MI = &*ri;
+    MachineOperand &O = ri.getOperand();
+    ++ri;
+    if (MI->isDebugValue()) {
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
+        uint64_t Offset = MI->getOperand(1).getImm();
+        const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+        DebugLoc DL = MI->getDebugLoc();
+        int FI = isLoadSS ? LdSlot : (int)Slot;
+        if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
+                                                           Offset, MDPtr, DL)) {
+          DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+          ReplaceMachineInstrInMaps(MI, NewDV);
+          MachineBasicBlock *MBB = MI->getParent();
+          MBB->insert(MBB->erase(MI), NewDV);
+          continue;
+        }
+      }
+
+      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      RemoveMachineInstrFromMaps(MI);
+      vrm.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+      continue;
+    }
+    assert(!(O.isImplicit() && O.isUse()) &&
+           "Spilling register that's used as implicit use?");
+    SlotIndex index = getInstructionIndex(MI);
+    if (index < start || index >= end)
+      continue;
+
+    if (O.isUndef())
+      // Must be defined by an implicit def. It should not be spilled. Note,
+      // this is for correctness reason. e.g.
+      // 8   %reg1024<def> = IMPLICIT_DEF
+      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+      // The live range [12, 14) are not part of the r1024 live interval since
+      // it's defined by an implicit def. It will not conflicts with live
+      // interval of r1025. Now suppose both registers are spilled, you can
+      // easily see a situation where both registers are reloaded before
+      // the INSERT_SUBREG and both target registers that would overlap.
+      continue;
+    RewriteMIs.push_back(RewriteInfo(index, MI));
+  }
+  std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
+
+  unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
+  // Now rewrite the defs and uses.
+  for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
+    RewriteInfo &rwi = RewriteMIs[i];
+    ++i;
+    SlotIndex index = rwi.Index;
+    MachineInstr *MI = rwi.MI;
+    // If MI def and/or use the same register multiple times, then there
+    // are multiple entries.
+    while (i != e && RewriteMIs[i].MI == MI) {
+      assert(RewriteMIs[i].Index == index);
+      ++i;
+    }
+    MachineBasicBlock *MBB = MI->getParent();
+
+    if (ImpUse && MI != ReMatDefMI) {
+      // Re-matting an instruction with virtual register use. Prevent interval
+      // from being spilled.
+      getInterval(ImpUse).markNotSpillable();
+    }
+
+    unsigned MBBId = MBB->getNumber();
+    unsigned ThisVReg = 0;
+    if (TrySplit) {
+      DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
+      if (NVI != MBBVRegsMap.end()) {
+        ThisVReg = NVI->second;
+        // One common case:
+        // x = use
+        // ...
+        // ...
+        // def = ...
+        //     = use
+        // It's better to start a new interval to avoid artificially
+        // extend the new interval.
+        if (MI->readsWritesVirtualRegister(li.reg) ==
+            std::make_pair(false,true)) {
+          MBBVRegsMap.erase(MBB->getNumber());
+          ThisVReg = 0;
+        }
+      }
+    }
+
+    bool IsNew = ThisVReg == 0;
+    if (IsNew) {
+      // This ends the previous live interval. If all of its def / use
+      // can be folded, give it a low spill weight.
+      if (NewVReg && TrySplit && AllCanFold) {
+        LiveInterval &nI = getOrCreateInterval(NewVReg);
+        nI.weight /= 10.0F;
+      }
+      AllCanFold = true;
+    }
+    NewVReg = ThisVReg;
+
+    bool HasDef = false;
+    bool HasUse = false;
+    bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
+                         index, end, MI, ReMatOrigDefMI, ReMatDefMI,
+                         Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+                         CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
+                         ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
+    if (!HasDef && !HasUse)
+      continue;
+
+    AllCanFold &= CanFold;
+
+    // Update weight of spill interval.
+    LiveInterval &nI = getOrCreateInterval(NewVReg);
+    if (!TrySplit) {
+      // The spill weight is now infinity as it cannot be spilled again.
+      nI.markNotSpillable();
+      continue;
+    }
+
+    // Keep track of the last def and first use in each MBB.
+    if (HasDef) {
+      if (MI != ReMatOrigDefMI || !CanDelete) {
+        bool HasKill = false;
+        if (!HasUse)
+          HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
+        else {
+          // If this is a two-address code, then this index starts a new VNInfo.
+          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
+          if (VNI)
+            HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
+        }
+        DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+          SpillIdxes.find(MBBId);
+        if (!HasKill) {
+          if (SII == SpillIdxes.end()) {
+            std::vector<SRInfo> S;
+            S.push_back(SRInfo(index, NewVReg, true));
+            SpillIdxes.insert(std::make_pair(MBBId, S));
+          } else if (SII->second.back().vreg != NewVReg) {
+            SII->second.push_back(SRInfo(index, NewVReg, true));
+          } else if (index > SII->second.back().index) {
+            // If there is an earlier def and this is a two-address
+            // instruction, then it's not possible to fold the store (which
+            // would also fold the load).
+            SRInfo &Info = SII->second.back();
+            Info.index = index;
+            Info.canFold = !HasUse;
+          }
+          SpillMBBs.set(MBBId);
+        } else if (SII != SpillIdxes.end() &&
+                   SII->second.back().vreg == NewVReg &&
+                   index > SII->second.back().index) {
+          // There is an earlier def that's not killed (must be two-address).
+          // The spill is no longer needed.
+          SII->second.pop_back();
+          if (SII->second.empty()) {
+            SpillIdxes.erase(MBBId);
+            SpillMBBs.reset(MBBId);
+          }
+        }
+      }
+    }
+
+    if (HasUse) {
+      DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+        SpillIdxes.find(MBBId);
+      if (SII != SpillIdxes.end() &&
+          SII->second.back().vreg == NewVReg &&
+          index > SII->second.back().index)
+        // Use(s) following the last def, it's not safe to fold the spill.
+        SII->second.back().canFold = false;
+      DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
+        RestoreIdxes.find(MBBId);
+      if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
+        // If we are splitting live intervals, only fold if it's the first
+        // use and there isn't another use later in the MBB.
+        RII->second.back().canFold = false;
+      else if (IsNew) {
+        // Only need a reload if there isn't an earlier def / use.
+        if (RII == RestoreIdxes.end()) {
+          std::vector<SRInfo> Infos;
+          Infos.push_back(SRInfo(index, NewVReg, true));
+          RestoreIdxes.insert(std::make_pair(MBBId, Infos));
+        } else {
+          RII->second.push_back(SRInfo(index, NewVReg, true));
+        }
+        RestoreMBBs.set(MBBId);
+      }
+    }
+
+    // Update spill weight.
+    unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+    nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+  }
+
+  if (NewVReg && TrySplit && AllCanFold) {
+    // If all of its def / use can be folded, give it a low spill weight.
+    LiveInterval &nI = getOrCreateInterval(NewVReg);
+    nI.weight /= 10.0F;
+  }
+}
+
+bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
+                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+  if (!RestoreMBBs[Id])
+    return false;
+  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+    if (Restores[i].index == index &&
+        Restores[i].vreg == vr &&
+        Restores[i].canFold)
+      return true;
+  return false;
+}
+
+void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
+                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+  if (!RestoreMBBs[Id])
+    return;
+  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+    if (Restores[i].index == index && Restores[i].vreg)
+      Restores[i].index = SlotIndex();
+}
+
+/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
+/// spilled and create empty intervals for their uses.
+void
+LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
+                                    const TargetRegisterClass* rc,
+                                    std::vector<LiveInterval*> &NewLIs) {
+  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+         re = mri_->reg_end(); ri != re; ) {
+    MachineOperand &O = ri.getOperand();
+    MachineInstr *MI = &*ri;
+    ++ri;
+    if (MI->isDebugValue()) {
+      // Remove debug info for now.
+      O.setReg(0U);
+      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      continue;
+    }
+    if (O.isDef()) {
+      assert(MI->isImplicitDef() &&
+             "Register def was not rewritten?");
+      RemoveMachineInstrFromMaps(MI);
+      vrm.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+    } else {
+      // This must be an use of an implicit_def so it's not part of the live
+      // interval. Create a new empty live interval for it.
+      // FIXME: Can we simply erase some of the instructions? e.g. Stores?
+      unsigned NewVReg = mri_->createVirtualRegister(rc);
+      vrm.grow();
+      vrm.setIsImplicitlyDefined(NewVReg);
+      NewLIs.push_back(&getOrCreateInterval(NewVReg));
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.getReg() == li.reg) {
+          MO.setReg(NewVReg);
+          MO.setIsUndef();
+        }
+      }
+    }
+  }
+}
+
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+  // Limit the loop depth ridiculousness.
+  if (loopDepth > 200)
+    loopDepth = 200;
+
+  // The loop depth is used to roughly estimate the number of times the
+  // instruction is executed. Something like 10^d is simple, but will quickly
+  // overflow a float. This expression behaves like 10^d for small d, but is
+  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+  // headroom before overflow.
+  // By the way, powf() might be unavailable here. For consistency,
+  // We may take pow(double,double).
+  float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
+
+  return (isDef + isUse) * lc;
+}
+
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
+    NewLIs[i]->weight =
+      normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li,
+                      const SmallVectorImpl<LiveInterval*> *SpillIs,
+                      const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
+  assert(li.isSpillable() && "attempt to spill already spilled interval!");
+
+  DEBUG({
+      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.print(dbgs(), tri_);
+      dbgs() << '\n';
+    });
+
+  // Each bit specify whether a spill is required in the MBB.
+  BitVector SpillMBBs(mf_->getNumBlockIDs());
+  DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
+  BitVector RestoreMBBs(mf_->getNumBlockIDs());
+  DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
+  DenseMap<unsigned,unsigned> MBBVRegsMap;
+  std::vector<LiveInterval*> NewLIs;
+  const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+  unsigned NumValNums = li.getNumValNums();
+  SmallVector<MachineInstr*, 4> ReMatDefs;
+  ReMatDefs.resize(NumValNums, NULL);
+  SmallVector<MachineInstr*, 4> ReMatOrigDefs;
+  ReMatOrigDefs.resize(NumValNums, NULL);
+  SmallVector<int, 4> ReMatIds;
+  ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
+  BitVector ReMatDelete(NumValNums);
+  unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
+
+  // Spilling a split live interval. It cannot be split any further. Also,
+  // it's also guaranteed to be a single val# / range interval.
+  if (vrm.getPreSplitReg(li.reg)) {
+    vrm.setIsSplitFromReg(li.reg, 0);
+    // Unset the split kill marker on the last use.
+    SlotIndex KillIdx = vrm.getKillPoint(li.reg);
+    if (KillIdx != SlotIndex()) {
+      MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
+      assert(KillMI && "Last use disappeared?");
+      int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
+      assert(KillOp != -1 && "Last use disappeared?");
+      KillMI->getOperand(KillOp).setIsKill(false);
+    }
+    vrm.removeKillPoint(li.reg);
+    bool DefIsReMat = vrm.isReMaterialized(li.reg);
+    Slot = vrm.getStackSlot(li.reg);
+    assert(Slot != VirtRegMap::MAX_STACK_SLOT);
+    MachineInstr *ReMatDefMI = DefIsReMat ?
+      vrm.getReMaterializedMI(li.reg) : NULL;
+    int LdSlot = 0;
+    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+    bool isLoad = isLoadSS ||
+      (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
+    bool IsFirstRange = true;
+    for (LiveInterval::Ranges::const_iterator
+           I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+      // If this is a split live interval with multiple ranges, it means there
+      // are two-address instructions that re-defined the value. Only the
+      // first def can be rematerialized!
+      if (IsFirstRange) {
+        // Note ReMatOrigDefMI has already been deleted.
+        rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
+                             Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+                             false, vrm, rc, ReMatIds, loopInfo,
+                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+                             MBBVRegsMap, NewLIs);
+      } else {
+        rewriteInstructionsForSpills(li, false, I, NULL, 0,
+                             Slot, 0, false, false, false,
+                             false, vrm, rc, ReMatIds, loopInfo,
+                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+                             MBBVRegsMap, NewLIs);
+      }
+      IsFirstRange = false;
+    }
+
+    handleSpilledImpDefs(li, vrm, rc, NewLIs);
+    normalizeSpillWeights(NewLIs);
+    return NewLIs;
+  }
+
+  bool TrySplit = !intervalIsInOneMBB(li);
+  if (TrySplit)
+    ++numSplits;
+  bool NeedStackSlot = false;
+  for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+       i != e; ++i) {
+    const VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (VNI->isUnused())
+      continue; // Dead val#.
+    // Is the def for the val# rematerializable?
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+    bool dummy;
+    if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
+      // Remember how to remat the def of this val#.
+      ReMatOrigDefs[VN] = ReMatDefMI;
+      // Original def may be modified so we have to make a copy here.
+      MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
+      CloneMIs.push_back(Clone);
+      ReMatDefs[VN] = Clone;
+
+      bool CanDelete = true;
+      if (VNI->hasPHIKill()) {
+        // A kill is a phi node, not all of its uses can be rematerialized.
+        // It must not be deleted.
+        CanDelete = false;
+        // Need a stack slot if there is any live range where uses cannot be
+        // rematerialized.
+        NeedStackSlot = true;
+      }
+      if (CanDelete)
+        ReMatDelete.set(VN);
+    } else {
+      // Need a stack slot if there is any live range where uses cannot be
+      // rematerialized.
+      NeedStackSlot = true;
+    }
+  }
+
+  // One stack slot per live interval.
+  if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
+    if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
+      Slot = vrm.assignVirt2StackSlot(li.reg);
+
+    // This case only occurs when the prealloc splitter has already assigned
+    // a stack slot to this vreg.
+    else
+      Slot = vrm.getStackSlot(li.reg);
+  }
+
+  // Create new intervals and rewrite defs and uses.
+  for (LiveInterval::Ranges::const_iterator
+         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+    MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
+    MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
+    bool DefIsReMat = ReMatDefMI != NULL;
+    bool CanDelete = ReMatDelete[I->valno->id];
+    int LdSlot = 0;
+    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+    bool isLoad = isLoadSS ||
+      (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
+    rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
+                               Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+                               CanDelete, vrm, rc, ReMatIds, loopInfo,
+                               SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+                               MBBVRegsMap, NewLIs);
+  }
+
+  // Insert spills / restores if we are splitting.
+  if (!TrySplit) {
+    handleSpilledImpDefs(li, vrm, rc, NewLIs);
+    normalizeSpillWeights(NewLIs);
+    return NewLIs;
+  }
+
+  SmallPtrSet<LiveInterval*, 4> AddedKill;
+  SmallVector<unsigned, 2> Ops;
+  if (NeedStackSlot) {
+    int Id = SpillMBBs.find_first();
+    while (Id != -1) {
+      std::vector<SRInfo> &spills = SpillIdxes[Id];
+      for (unsigned i = 0, e = spills.size(); i != e; ++i) {
+        SlotIndex index = spills[i].index;
+        unsigned VReg = spills[i].vreg;
+        LiveInterval &nI = getOrCreateInterval(VReg);
+        bool isReMat = vrm.isReMaterialized(VReg);
+        MachineInstr *MI = getInstructionFromIndex(index);
+        bool CanFold = false;
+        bool FoundUse = false;
+        Ops.clear();
+        if (spills[i].canFold) {
+          CanFold = true;
+          for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+            MachineOperand &MO = MI->getOperand(j);
+            if (!MO.isReg() || MO.getReg() != VReg)
+              continue;
+
+            Ops.push_back(j);
+            if (MO.isDef())
+              continue;
+            if (isReMat ||
+                (!FoundUse && !alsoFoldARestore(Id, index, VReg,
+                                                RestoreMBBs, RestoreIdxes))) {
+              // MI has two-address uses of the same register. If the use
+              // isn't the first and only use in the BB, then we can't fold
+              // it. FIXME: Move this to rewriteInstructionsForSpills.
+              CanFold = false;
+              break;
+            }
+            FoundUse = true;
+          }
+        }
+        // Fold the store into the def if possible.
+        bool Folded = false;
+        if (CanFold && !Ops.empty()) {
+          if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
+            Folded = true;
+            if (FoundUse) {
+              // Also folded uses, do not issue a load.
+              eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
+              nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+            }
+            nI.removeRange(index.getDefIndex(), index.getStoreIndex());
+          }
+        }
+
+        // Otherwise tell the spiller to issue a spill.
+        if (!Folded) {
+          LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
+          bool isKill = LR->end == index.getStoreIndex();
+          if (!MI->registerDefIsDead(nI.reg))
+            // No need to spill a dead def.
+            vrm.addSpillPoint(VReg, isKill, MI);
+          if (isKill)
+            AddedKill.insert(&nI);
+        }
+      }
+      Id = SpillMBBs.find_next(Id);
+    }
+  }
+
+  int Id = RestoreMBBs.find_first();
+  while (Id != -1) {
+    std::vector<SRInfo> &restores = RestoreIdxes[Id];
+    for (unsigned i = 0, e = restores.size(); i != e; ++i) {
+      SlotIndex index = restores[i].index;
+      if (index == SlotIndex())
+        continue;
+      unsigned VReg = restores[i].vreg;
+      LiveInterval &nI = getOrCreateInterval(VReg);
+      bool isReMat = vrm.isReMaterialized(VReg);
+      MachineInstr *MI = getInstructionFromIndex(index);
+      bool CanFold = false;
+      Ops.clear();
+      if (restores[i].canFold) {
+        CanFold = true;
+        for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+          MachineOperand &MO = MI->getOperand(j);
+          if (!MO.isReg() || MO.getReg() != VReg)
+            continue;
+
+          if (MO.isDef()) {
+            // If this restore were to be folded, it would have been folded
+            // already.
+            CanFold = false;
+            break;
+          }
+          Ops.push_back(j);
+        }
+      }
+
+      // Fold the load into the use if possible.
+      bool Folded = false;
+      if (CanFold && !Ops.empty()) {
+        if (!isReMat)
+          Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
+        else {
+          MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
+          int LdSlot = 0;
+          bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+          // If the rematerializable def is a load, also try to fold it.
+          if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
+            Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+                                          Ops, isLoadSS, LdSlot, VReg);
+          if (!Folded) {
+            unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
+            if (ImpUse) {
+              // Re-matting an instruction with virtual register use. Add the
+              // register as an implicit use on the use MI and mark the register
+              // interval as unspillable.
+              LiveInterval &ImpLi = getInterval(ImpUse);
+              ImpLi.markNotSpillable();
+              MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+            }
+          }
+        }
+      }
+      // If folding is not possible / failed, then tell the spiller to issue a
+      // load / rematerialization for us.
+      if (Folded)
+        nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+      else
+        vrm.addRestorePoint(VReg, MI);
+    }
+    Id = RestoreMBBs.find_next(Id);
+  }
+
+  // Finalize intervals: add kills, finalize spill weights, and filter out
+  // dead intervals.
+  std::vector<LiveInterval*> RetNewLIs;
+  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
+    LiveInterval *LI = NewLIs[i];
+    if (!LI->empty()) {
+      if (!AddedKill.count(LI)) {
+        LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
+        SlotIndex LastUseIdx = LR->end.getBaseIndex();
+        MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
+        int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
+        assert(UseIdx != -1);
+        if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
+          LastUse->getOperand(UseIdx).setIsKill();
+          vrm.addKillPoint(LI->reg, LastUseIdx);
+        }
+      }
+      RetNewLIs.push_back(LI);
+    }
+  }
+
+  handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+  normalizeSpillWeights(RetNewLIs);
+  return RetNewLIs;
+}
+
+/// hasAllocatableSuperReg - Return true if the specified physical register has
+/// any super register that's allocatable.
+bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
+  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
+    if (allocatableRegs_[*AS] && hasInterval(*AS))
+      return true;
+  return false;
+}
+
+/// getRepresentativeReg - Find the largest super register of the specified
+/// physical register.
+unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
+  // Find the largest super-register that is allocatable.
+  unsigned BestReg = Reg;
+  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
+    unsigned SuperReg = *AS;
+    if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
+      BestReg = SuperReg;
+      break;
+    }
+  }
+  return BestReg;
+}
+
+/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
+/// specified interval that conflicts with the specified physical register.
+unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
+                                                   unsigned PhysReg) const {
+  unsigned NumConflicts = 0;
+  const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+         E = mri_->reg_end(); I != E; ++I) {
+    MachineOperand &O = I.getOperand();
+    MachineInstr *MI = O.getParent();
+    if (MI->isDebugValue())
+      continue;
+    SlotIndex Index = getInstructionIndex(MI);
+    if (pli.liveAt(Index))
+      ++NumConflicts;
+  }
+  return NumConflicts;
+}
+
+/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
+/// around all defs and uses of the specified interval. Return true if it
+/// was able to cut its interval.
+bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
+                                            unsigned PhysReg, VirtRegMap &vrm) {
+  unsigned SpillReg = getRepresentativeReg(PhysReg);
+
+  DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+               << " represented by " << tri_->getName(SpillReg) << '\n');
+
+  for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
+    // If there are registers which alias PhysReg, but which are not a
+    // sub-register of the chosen representative super register. Assert
+    // since we can't handle it yet.
+    assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
+           tri_->isSuperRegister(*AS, SpillReg));
+
+  bool Cut = false;
+  SmallVector<unsigned, 4> PRegs;
+  if (hasInterval(SpillReg))
+    PRegs.push_back(SpillReg);
+  for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+    if (hasInterval(*SR))
+      PRegs.push_back(*SR);
+
+  DEBUG({
+    dbgs() << "Trying to spill:";
+    for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+      dbgs() << ' ' << tri_->getName(PRegs[i]);
+    dbgs() << '\n';
+  });
+
+  SmallPtrSet<MachineInstr*, 8> SeenMIs;
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+         E = mri_->reg_end(); I != E; ++I) {
+    MachineOperand &O = I.getOperand();
+    MachineInstr *MI = O.getParent();
+    if (MI->isDebugValue() || SeenMIs.count(MI))
+      continue;
+    SeenMIs.insert(MI);
+    SlotIndex Index = getInstructionIndex(MI);
+    bool LiveReg = false;
+    for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
+      unsigned PReg = PRegs[i];
+      LiveInterval &pli = getInterval(PReg);
+      if (!pli.liveAt(Index))
+        continue;
+      LiveReg = true;
+      SlotIndex StartIdx = Index.getLoadIndex();
+      SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
+      if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "Ran out of registers during register allocation!";
+        if (MI->isInlineAsm()) {
+          Msg << "\nPlease check your inline asm statement for invalid "
+              << "constraints:\n";
+          MI->print(Msg, tm_);
+        }
+        report_fatal_error(Msg.str());
+      }
+      pli.removeRange(StartIdx, EndIdx);
+      LiveReg = true;
+    }
+    if (!LiveReg)
+      continue;
+    DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+    vrm.addEmergencySpill(SpillReg, MI);
+    Cut = true;
+  }
+  return Cut;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+                                                  MachineInstr* startInst) {
+  LiveInterval& Interval = getOrCreateInterval(reg);
+  VNInfo* VN = Interval.getNextValue(
+    SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+    startInst, getVNInfoAllocator());
+  VN->setHasPHIKill(true);
+  LiveRange LR(
+     SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+     getMBBEndIdx(startInst->getParent()), VN);
+  Interval.addRange(LR);
+
+  return LR;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..70003e7cc86a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,325 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Insert each of the virtual register's live segments into the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  while (SegPos.valid()) {
+    SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+    if (++RegPos == RegEnd)
+      return;
+    SegPos.advanceTo(RegPos->start);
+  }
+
+  // We have reached the end of Segments, so it is no longer necessary to search
+  // for the insertion position.
+  // It is faster to insert the end first.
+  --RegEnd;
+  SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+  for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+    SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Remove each of the virtual register's live segments from the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  for (;;) {
+    assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+    SegPos.erase();
+    if (!SegPos.valid())
+      return;
+
+    // Skip all segments that may have been coalesced.
+    RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+    if (RegPos == RegEnd)
+      return;
+
+    SegPos.advanceTo(RegPos->start);
+  }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+  OS << "LIU " << PrintReg(RepReg, TRI);
+  if (empty()) {
+    OS << " empty\n";
+    return;
+  }
+  for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+    OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+       << PrintReg(SI.value()->reg, TRI);
+  }
+  OS << '\n';
+}
+
+void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
+                                          const TargetRegisterInfo *TRI) const {
+  OS << '[' << start() << ';' << stop() << "):"
+     << PrintReg(interference()->reg, TRI);
+}
+
+void LiveIntervalUnion::Query::print(raw_ostream &OS,
+                                     const TargetRegisterInfo *TRI) {
+  OS << "Interferences with ";
+  LiveUnion->print(OS, TRI);
+  InterferenceResult IR = firstInterference();
+  while (isInterference(IR)) {
+    OS << "  ";
+    IR.print(OS, TRI);
+    OS << '\n';
+    nextInterference(IR);
+  }
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+  for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+    VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Private interface accessed by Query.
+//
+// Find a pair of segments that intersect, one in the live virtual register
+// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
+// is responsible for advancing the LiveIntervalUnion segments to find a
+// "notable" intersection, which requires query-specific logic.
+//
+// This design assumes only a fast mechanism for intersecting a single live
+// virtual register segment with a set of LiveIntervalUnion segments.  This may
+// be ok since most virtual registers have very few segments.  If we had a data
+// structure that optimizd MxN intersection of segments, then we would bypass
+// the loop that advances within the LiveInterval.
+//
+// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
+// segment whose start point is greater than LiveInterval's end point.
+//
+// Assumes that segments are sorted by start position in both
+// LiveInterval and LiveSegments.
+void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
+  // Search until reaching the end of the LiveUnion segments.
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  if (IR.VirtRegI == VirtRegEnd)
+    return;
+  while (IR.LiveUnionI.valid()) {
+    // Slowly advance the live virtual reg iterator until we surpass the next
+    // segment in LiveUnion.
+    //
+    // Note: If this is ever used for coalescing of fixed registers and we have
+    // a live vreg with thousands of segments, then change this code to use
+    // upperBound instead.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break; // Retain current (nonoverlapping) LiveUnionI
+
+    // VirtRegI may have advanced far beyond LiveUnionI, catch up.
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+    // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
+    if (!IR.LiveUnionI.valid())
+      break;
+    if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
+      assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+             "upperBound postcondition");
+      break;
+    }
+  }
+  if (!IR.LiveUnionI.valid())
+    IR.VirtRegI = VirtRegEnd;
+}
+
+// Find the first intersection, and cache interference info
+// (retain segment iterators into both VirtReg and LiveUnion).
+const LiveIntervalUnion::InterferenceResult &
+LiveIntervalUnion::Query::firstInterference() {
+  if (CheckedFirstInterference)
+    return FirstInterference;
+  CheckedFirstInterference = true;
+  InterferenceResult &IR = FirstInterference;
+  IR.LiveUnionI.setMap(LiveUnion->getMap());
+
+  // Quickly skip interference check for empty sets.
+  if (VirtReg->empty() || LiveUnion->empty()) {
+    IR.VirtRegI = VirtReg->end();
+  } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
+    // VirtReg starts first, perform double binary search.
+    IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
+    if (IR.VirtRegI != VirtReg->end())
+      IR.LiveUnionI.find(IR.VirtRegI->start);
+  } else {
+    // LiveUnion starts first, perform double binary search.
+    IR.LiveUnionI.find(VirtReg->beginIndex());
+    if (IR.LiveUnionI.valid())
+      IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
+    else
+      IR.VirtRegI = VirtReg->end();
+  }
+  findIntersection(FirstInterference);
+  assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
+         && "Uninitialized iterator");
+  return FirstInterference;
+}
+
+// Treat the result as an iterator and advance to the next interfering pair
+// of segments. This is a plain iterator with no filter.
+bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
+  assert(isInterference(IR) && "iteration past end of interferences");
+
+  // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
+  // unique overlapping pairs.
+  if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
+    if (++IR.VirtRegI == VirtReg->end())
+      return false;
+  }
+  else {
+    if (!(++IR.LiveUnionI).valid()) {
+      IR.VirtRegI = VirtReg->end();
+      return false;
+    }
+  }
+  // Short-circuit findIntersection() if possible.
+  if (overlap(*IR.VirtRegI, IR.LiveUnionI))
+    return true;
+
+  // Find the next intersection.
+  findIntersection(IR);
+  return isInterference(IR);
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+  SmallVectorImpl<LiveInterval*>::const_iterator I =
+    std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+  return I != InterferingVRegs.end();
+}
+
+// Count the number of virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The number of times that we either advance IR.VirtRegI or call
+// LiveUnion.upperBound() will be no more than the number of holes in
+// VirtReg. So each invocation of collectInterferingVRegs() takes
+// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+//
+// For comments on how to speed it up, see Query::findIntersection().
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+  InterferenceResult IR = firstInterference();
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  LiveInterval *RecentInterferingVReg = NULL;
+  if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
+    // Advance the union's iterator to reach an unseen interfering vreg.
+    do {
+      if (IR.LiveUnionI.value() == RecentInterferingVReg)
+        continue;
+
+      if (!isSeenInterference(IR.LiveUnionI.value()))
+        break;
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+
+    } while ((++IR.LiveUnionI).valid());
+    if (!IR.LiveUnionI.valid())
+      break;
+
+    // Advance the VirtReg iterator until surpassing the next segment in
+    // LiveUnion.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break;
+
+    // Check for intersection with the union's segment.
+    if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
+
+      if (!IR.LiveUnionI.value()->isSpillable())
+        SeenUnspillableVReg = true;
+
+      if (InterferingVRegs.size() == MaxInterferingRegs)
+        // Leave SeenAllInterferences set to false to indicate that at least one
+        // interference exists beyond those we collected.
+        return MaxInterferingRegs;
+
+      InterferingVRegs.push_back(IR.LiveUnionI.value());
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+      ++IR.LiveUnionI;
+
+      continue;
+    }
+    // VirtRegI may have advanced far beyond LiveUnionI,
+    // do a fast intersection test to "catch up"
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+  }
+  SeenAllInterferences = true;
+  return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+  // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+  // overlaps.
+  IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+    Overlaps(LiveUnion->getMap(), Loop->getMap());
+  if (!Overlaps.valid())
+    return false;
+
+  // The loop is overlapping an LIU assignment. Check VirtReg as well.
+  LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+  for (;;) {
+    if (VRI == VirtReg->end())
+      return false;
+    if (VRI->start < Overlaps.stop())
+      return true;
+
+    Overlaps.advanceTo(VRI->start);
+    if (!Overlaps.valid())
+      return false;
+    if (Overlaps.start() < VRI->end)
+      return true;
+
+    VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 000000000000..5e78d5e85029
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,264 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+        const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+  return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context).  We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+  // A set of live virtual register segments that supports fast insertion,
+  // intersection, and removal.
+  // Mapping SlotIndex intervals to virtual register numbers.
+  typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+  // SegmentIter can advance to the next segment ordered by starting position
+  // which may belong to a different live virtual register. We also must be able
+  // to reach the current segment's containing virtual register.
+  typedef LiveSegments::iterator SegmentIter;
+
+  // LiveIntervalUnions share an external allocator.
+  typedef LiveSegments::Allocator Allocator;
+
+  class InterferenceResult;
+  class Query;
+
+private:
+  const unsigned RepReg;  // representative register number
+  unsigned Tag;           // unique tag for current contents.
+  LiveSegments Segments;  // union of virtual reg segments
+
+public:
+  LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+    {}
+
+  // Iterate over all segments in the union of live virtual registers ordered
+  // by their starting position.
+  SegmentIter begin() { return Segments.begin(); }
+  SegmentIter end() { return Segments.end(); }
+  SegmentIter find(SlotIndex x) { return Segments.find(x); }
+  bool empty() const { return Segments.empty(); }
+  SlotIndex startIndex() const { return Segments.start(); }
+
+  // Provide public access to the underlying map to allow overlap iteration.
+  typedef LiveSegments Map;
+  const Map &getMap() { return Segments; }
+
+  /// getTag - Return an opaque tag representing the current state of the union.
+  unsigned getTag() const { return Tag; }
+
+  /// changedSince - Return true if the union change since getTag returned tag.
+  bool changedSince(unsigned tag) const { return tag != Tag; }
+
+  // Add a live virtual register to this union and merge its segments.
+  void unify(LiveInterval &VirtReg);
+
+  // Remove a live virtual register's segments from this union.
+  void extract(LiveInterval &VirtReg);
+
+  // Remove all inserted virtual registers.
+  void clear() { Segments.clear(); ++Tag; }
+
+  // Print union, using TRI to translate register names
+  void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+  // Verify the live intervals in this union and add them to the visited set.
+  void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+  /// Cache a single interference test result in the form of two intersecting
+  /// segments. This allows efficiently iterating over the interferences. The
+  /// iteration logic is handled by LiveIntervalUnion::Query which may
+  /// filter interferences depending on the type of query.
+  class InterferenceResult {
+    friend class Query;
+
+    LiveInterval::iterator VirtRegI; // current position in VirtReg
+    SegmentIter LiveUnionI;          // current position in LiveUnion
+
+    // Internal ctor.
+    InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
+      : VirtRegI(VRegI), LiveUnionI(UnionI) {}
+
+  public:
+    // Public default ctor.
+    InterferenceResult(): VirtRegI(), LiveUnionI() {}
+
+    /// start - Return the start of the current overlap.
+    SlotIndex start() const {
+      return std::max(VirtRegI->start, LiveUnionI.start());
+    }
+
+    /// stop - Return the end of the current overlap.
+    SlotIndex stop() const {
+      return std::min(VirtRegI->end, LiveUnionI.stop());
+    }
+
+    /// interference - Return the register that is interfering here.
+    LiveInterval *interference() const { return LiveUnionI.value(); }
+
+    // Note: this interface provides raw access to the iterators because the
+    // result has no way to tell if it's valid to dereference them.
+
+    // Access the VirtReg segment.
+    LiveInterval::iterator virtRegPos() const { return VirtRegI; }
+
+    // Access the LiveUnion segment.
+    const SegmentIter &liveUnionPos() const { return LiveUnionI; }
+
+    bool operator==(const InterferenceResult &IR) const {
+      return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
+    }
+    bool operator!=(const InterferenceResult &IR) const {
+      return !operator==(IR);
+    }
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+  };
+
+  /// Query interferences between a single live virtual register and a live
+  /// interval union.
+  class Query {
+    LiveIntervalUnion *LiveUnion;
+    LiveInterval *VirtReg;
+    InterferenceResult FirstInterference;
+    SmallVector<LiveInterval*,4> InterferingVRegs;
+    bool CheckedFirstInterference;
+    bool SeenAllInterferences;
+    bool SeenUnspillableVReg;
+    unsigned Tag, UserTag;
+
+  public:
+    Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {}
+
+    Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+      LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+      SeenAllInterferences(false), SeenUnspillableVReg(false)
+    {}
+
+    void clear() {
+      LiveUnion = NULL;
+      VirtReg = NULL;
+      InterferingVRegs.clear();
+      CheckedFirstInterference = false;
+      SeenAllInterferences = false;
+      SeenUnspillableVReg = false;
+      Tag = 0;
+      UserTag = 0;
+    }
+
+    void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) {
+      assert(VReg && LIU && "Invalid arguments");
+      if (UserTag == UTag && VirtReg == VReg &&
+          LiveUnion == LIU && !LIU->changedSince(Tag)) {
+        // Retain cached results, e.g. firstInterference.
+        return;
+      }
+      clear();
+      LiveUnion = LIU;
+      VirtReg = VReg;
+      Tag = LIU->getTag();
+      UserTag = UTag;
+    }
+
+    LiveInterval &virtReg() const {
+      assert(VirtReg && "uninitialized");
+      return *VirtReg;
+    }
+
+    bool isInterference(const InterferenceResult &IR) const {
+      if (IR.VirtRegI != VirtReg->end()) {
+        assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+               "invalid segment iterators");
+        return true;
+      }
+      return false;
+    }
+
+    // Does this live virtual register interfere with the union?
+    bool checkInterference() { return isInterference(firstInterference()); }
+
+    // Get the first pair of interfering segments, or a noninterfering result.
+    // This initializes the firstInterference_ cache.
+    const InterferenceResult &firstInterference();
+
+    // Treat the result as an iterator and advance to the next interfering pair
+    // of segments. Visiting each unique interfering pairs means that the same
+    // VirtReg or LiveUnion segment may be visited multiple times.
+    bool nextInterference(InterferenceResult &IR) const;
+
+    // Count the virtual registers in this union that interfere with this
+    // query's live virtual register, up to maxInterferingRegs.
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+    // Was this virtual register visited during collectInterferingVRegs?
+    bool isSeenInterference(LiveInterval *VReg) const;
+
+    // Did collectInterferingVRegs collect all interferences?
+    bool seenAllInterferences() const { return SeenAllInterferences; }
+
+    // Did collectInterferingVRegs encounter an unspillable vreg?
+    bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+    // Vector generated by collectInterferingVRegs.
+    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+      return InterferingVRegs;
+    }
+
+    /// checkLoopInterference - Return true if there is interference overlapping
+    /// Loop.
+    bool checkLoopInterference(MachineLoopRange*);
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
+  private:
+    Query(const Query&);          // DO NOT IMPLEMENT
+    void operator=(const Query&); // DO NOT IMPLEMENT
+
+    // Private interface for queries
+    void findIntersection(InterferenceResult &IR) const;
+  };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..b385fb36bbf1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,327 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumDCEDeleted,     "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges,     "Number of live ranges fractured by DCE");
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
+                                        LiveIntervals &LIS,
+                                        VirtRegMap &VRM) {
+  MachineRegisterInfo &MRI = VRM.getRegInfo();
+  unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+  VRM.grow();
+  VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+  LiveInterval &LI = LIS.getOrCreateInterval(VReg);
+  newRegs_.push_back(&LI);
+  return LI;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+                                          const MachineInstr *DefMI,
+                                          const TargetInstrInfo &tii,
+                                          AliasAnalysis *aa) {
+  assert(DefMI && "Missing instruction");
+  scannedRemattable_ = true;
+  if (!tii.isTriviallyReMaterializable(DefMI, aa))
+    return false;
+  remattable_.insert(VNI);
+  return true;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+                                   const TargetInstrInfo &tii,
+                                   AliasAnalysis *aa) {
+  for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+       E = parent_.vni_end(); I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+    if (!DefMI)
+      continue;
+    checkRematerializable(VNI, DefMI, tii, aa);
+  }
+  scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+                                        const TargetInstrInfo &tii,
+                                        AliasAnalysis *aa) {
+  if (!scannedRemattable_)
+    scanRemattable(lis, tii, aa);
+  return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+                                       SlotIndex OrigIdx,
+                                       SlotIndex UseIdx,
+                                       LiveIntervals &lis) {
+  OrigIdx = OrigIdx.getUseIndex();
+  UseIdx = UseIdx.getUseIndex();
+  for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = OrigMI->getOperand(i);
+    if (!MO.isReg() || !MO.getReg() || MO.isDef())
+      continue;
+    // Reserved registers are OK.
+    if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+      continue;
+    // We cannot depend on virtual registers in uselessRegs_.
+    if (uselessRegs_)
+      for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
+        if ((*uselessRegs_)[ui]->reg == MO.getReg())
+          return false;
+
+    LiveInterval &li = lis.getInterval(MO.getReg());
+    const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+    if (!OVNI)
+      continue;
+    if (OVNI != li.getVNInfoAt(UseIdx))
+      return false;
+  }
+  return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+                                       SlotIndex UseIdx,
+                                       bool cheapAsAMove,
+                                       LiveIntervals &lis) {
+  assert(scannedRemattable_ && "Call anyRematerializable first");
+
+  // Use scanRemattable info.
+  if (!remattable_.count(RM.ParentVNI))
+    return false;
+
+  // No defining instruction provided.
+  SlotIndex DefIdx;
+  if (RM.OrigMI)
+    DefIdx = lis.getInstructionIndex(RM.OrigMI);
+  else {
+    DefIdx = RM.ParentVNI->def;
+    RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+    assert(RM.OrigMI && "No defining instruction for remattable value");
+  }
+
+  // If only cheap remats were requested, bail out early.
+  if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+    return false;
+
+  // Verify that all used registers are available with the same values.
+  if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
+    return false;
+
+  return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                         unsigned DestReg,
+                                         const Remat &RM,
+                                         LiveIntervals &lis,
+                                         const TargetInstrInfo &tii,
+                                         const TargetRegisterInfo &tri,
+                                         bool Late) {
+  assert(RM.OrigMI && "Invalid remat");
+  tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+  rematted_.insert(RM.ParentVNI);
+  return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+           .getDefIndex();
+}
+
+void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+  if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
+    LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+                               SmallVectorImpl<MachineInstr*> &Dead,
+                               MachineRegisterInfo &MRI,
+                               LiveIntervals &LIS,
+                               const TargetInstrInfo &TII) {
+  MachineInstr *DefMI = 0, *UseMI = 0;
+
+  // Check that there is a single def and a single use.
+  for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg),
+       E = MRI.reg_nodbg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    MachineInstr *MI = MO.getParent();
+    if (MO.isDef()) {
+      if (DefMI && DefMI != MI)
+        return false;
+      if (!MI->getDesc().canFoldAsLoad())
+        return false;
+      DefMI = MI;
+    } else if (!MO.isUndef()) {
+      if (UseMI && UseMI != MI)
+        return false;
+      // FIXME: Targets don't know how to fold subreg uses.
+      if (MO.getSubReg())
+        return false;
+      UseMI = MI;
+    }
+  }
+  if (!DefMI || !UseMI)
+    return false;
+
+  DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+               << "       into single use: " << *UseMI);
+
+  SmallVector<unsigned, 8> Ops;
+  if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+    return false;
+
+  MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI);
+  if (!FoldMI)
+    return false;
+  DEBUG(dbgs() << "                folded: " << *FoldMI);
+  LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
+  UseMI->eraseFromParent();
+  DefMI->addRegisterDead(LI->reg, 0);
+  Dead.push_back(DefMI);
+  ++NumDCEFoldedLoads;
+  return true;
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                                      LiveIntervals &LIS, VirtRegMap &VRM,
+                                      const TargetInstrInfo &TII) {
+  SetVector<LiveInterval*,
+            SmallVector<LiveInterval*, 8>,
+            SmallPtrSet<LiveInterval*, 8> > ToShrink;
+  MachineRegisterInfo &MRI = VRM.getRegInfo();
+
+  for (;;) {
+    // Erase all dead defs.
+    while (!Dead.empty()) {
+      MachineInstr *MI = Dead.pop_back_val();
+      assert(MI->allDefsAreDead() && "Def isn't really dead");
+      SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+
+      // Never delete inline asm.
+      if (MI->isInlineAsm()) {
+        DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+        continue;
+      }
+
+      // Use the same criteria as DeadMachineInstructionElim.
+      bool SawStore = false;
+      if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+        DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+        continue;
+      }
+
+      DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+      // Check for live intervals that may shrink
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+             MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+        if (!MOI->isReg())
+          continue;
+        unsigned Reg = MOI->getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+          continue;
+        LiveInterval &LI = LIS.getInterval(Reg);
+
+        // Shrink read registers, unless it is likely to be expensive and
+        // unlikely to change anything. We typically don't want to shrink the
+        // PIC base register that has lots of uses everywhere.
+        // Always shrink COPY uses that probably come from live range splitting.
+        if (MI->readsVirtualRegister(Reg) &&
+            (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+             LI.killedAt(Idx)))
+          ToShrink.insert(&LI);
+
+        // Remove defined value.
+        if (MOI->isDef()) {
+          if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
+            if (delegate_)
+              delegate_->LRE_WillShrinkVirtReg(LI.reg);
+            LI.removeValNo(VNI);
+            if (LI.empty()) {
+              ToShrink.remove(&LI);
+              eraseVirtReg(Reg, LIS);
+            }
+          }
+        }
+      }
+
+      if (delegate_)
+        delegate_->LRE_WillEraseInstruction(MI);
+      LIS.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+      ++NumDCEDeleted;
+    }
+
+    if (ToShrink.empty())
+      break;
+
+    // Shrink just one live interval. Then delete new dead defs.
+    LiveInterval *LI = ToShrink.back();
+    ToShrink.pop_back();
+    if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+      continue;
+    if (delegate_)
+      delegate_->LRE_WillShrinkVirtReg(LI->reg);
+    if (!LIS.shrinkToUses(LI, &Dead))
+      continue;
+
+    // LI may have been separated, create new intervals.
+    LI->RenumberValues(LIS);
+    ConnectedVNInfoEqClasses ConEQ(LIS);
+    unsigned NumComp = ConEQ.Classify(LI);
+    if (NumComp <= 1)
+      continue;
+    ++NumFracRanges;
+    bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
+    DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
+    SmallVector<LiveInterval*, 8> Dups(1, LI);
+    for (unsigned i = 1; i != NumComp; ++i) {
+      Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+      // If LI is an original interval that hasn't been split yet, make the new
+      // intervals their own originals instead of referring to LI. The original
+      // interval must contain all the split products, and LI doesn't.
+      if (IsOriginal)
+        VRM.setIsSplitFromReg(Dups.back()->reg, 0);
+      if (delegate_)
+        delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+    }
+    ConEQ.Distribute(&Dups[0], MRI);
+  }
+}
+
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+                                             LiveIntervals &LIS,
+                                             const MachineLoopInfo &Loops) {
+  VirtRegAuxInfo VRAI(MF, LIS, Loops);
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    LiveInterval &LI = **I;
+    VRAI.CalculateRegClass(LI.reg);
+    VRAI.CalculateWeightAndHint(LI);
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.h b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 000000000000..db6740c1fbcd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,206 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+public:
+  /// Callback methods for LiveRangeEdit owners.
+  struct Delegate {
+    /// Called immediately before erasing a dead machine instruction.
+    virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
+
+    /// Called when a virtual register is no longer used. Return false to defer
+    /// its deletion from LiveIntervals.
+    virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
+
+    /// Called before shrinking the live range of a virtual register.
+    virtual void LRE_WillShrinkVirtReg(unsigned) {}
+
+    /// Called after cloning a virtual register.
+    /// This is used for new registers representing connected components of Old.
+    virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
+
+    virtual ~Delegate() {}
+  };
+
+private:
+  LiveInterval &parent_;
+  SmallVectorImpl<LiveInterval*> &newRegs_;
+  Delegate *const delegate_;
+  const SmallVectorImpl<LiveInterval*> *uselessRegs_;
+
+  /// firstNew_ - Index of the first register added to newRegs_.
+  const unsigned firstNew_;
+
+  /// scannedRemattable_ - true when remattable values have been identified.
+  bool scannedRemattable_;
+
+  /// remattable_ - Values defined by remattable instructions as identified by
+  /// tii.isTriviallyReMaterializable().
+  SmallPtrSet<const VNInfo*,4> remattable_;
+
+  /// rematted_ - Values that were actually rematted, and so need to have their
+  /// live range trimmed or entirely removed.
+  SmallPtrSet<const VNInfo*,4> rematted_;
+
+  /// scanRemattable - Identify the parent_ values that may rematerialize.
+  void scanRemattable(LiveIntervals &lis,
+                      const TargetInstrInfo &tii,
+                      AliasAnalysis *aa);
+
+  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+  /// OrigIdx are also available with the same value at UseIdx.
+  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+                          SlotIndex UseIdx, LiveIntervals &lis);
+
+  /// foldAsLoad - If LI has a single use and a single def that can be folded as
+  /// a load, eliminate the register by folding the def into the use.
+  bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
+                  MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
+
+public:
+  /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+  /// @param parent The register being spilled or split.
+  /// @param newRegs List to receive any new registers created. This needn't be
+  ///                empty initially, any existing registers are ignored.
+  /// @param uselessRegs List of registers that can't be used when
+  ///        rematerializing values because they are about to be removed.
+  LiveRangeEdit(LiveInterval &parent,
+                SmallVectorImpl<LiveInterval*> &newRegs,
+                Delegate *delegate = 0,
+                const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
+    : parent_(parent), newRegs_(newRegs),
+      delegate_(delegate),
+      uselessRegs_(uselessRegs),
+      firstNew_(newRegs.size()),
+      scannedRemattable_(false) {}
+
+  LiveInterval &getParent() const { return parent_; }
+  unsigned getReg() const { return parent_.reg; }
+
+  /// Iterator for accessing the new registers added by this edit.
+  typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+  iterator begin() const { return newRegs_.begin()+firstNew_; }
+  iterator end() const { return newRegs_.end(); }
+  unsigned size() const { return newRegs_.size()-firstNew_; }
+  bool empty() const { return size() == 0; }
+  LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+  ArrayRef<LiveInterval*> regs() const {
+    return ArrayRef<LiveInterval*>(newRegs_).slice(firstNew_);
+  }
+
+  /// FIXME: Temporary accessors until we can get rid of
+  /// LiveIntervals::AddIntervalsForSpills
+  SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
+  const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
+    return uselessRegs_;
+  }
+
+  /// createFrom - Create a new virtual register based on OldReg.
+  LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
+
+  /// create - Create a new register with the same class and original slot as
+  /// parent.
+  LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
+    return createFrom(getReg(), LIS, VRM);
+  }
+
+  /// anyRematerializable - Return true if any parent values may be
+  /// rematerializable.
+  /// This function must be called before any rematerialization is attempted.
+  bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+                           AliasAnalysis*);
+
+  /// checkRematerializable - Manually add VNI to the list of rematerializable
+  /// values if DefMI may be rematerializable.
+  bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
+                             const TargetInstrInfo&, AliasAnalysis*);
+
+  /// Remat - Information needed to rematerialize at a specific location.
+  struct Remat {
+    VNInfo *ParentVNI;      // parent_'s value at the remat location.
+    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
+    explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+  };
+
+  /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+  /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+  /// When cheapAsAMove is set, only cheap remats are allowed.
+  bool canRematerializeAt(Remat &RM,
+                          SlotIndex UseIdx,
+                          bool cheapAsAMove,
+                          LiveIntervals &lis);
+
+  /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+  /// instruction into MBB before MI. The new instruction is mapped, but
+  /// liveness is not updated.
+  /// Return the SlotIndex of the new instruction.
+  SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg,
+                            const Remat &RM,
+                            LiveIntervals&,
+                            const TargetInstrInfo&,
+                            const TargetRegisterInfo&,
+                            bool Late = false);
+
+  /// markRematerialized - explicitly mark a value as rematerialized after doing
+  /// it manually.
+  void markRematerialized(const VNInfo *ParentVNI) {
+    rematted_.insert(ParentVNI);
+  }
+
+  /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+  bool didRematerialize(const VNInfo *ParentVNI) const {
+    return rematted_.count(ParentVNI);
+  }
+
+  /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
+  /// to erase it from LIS.
+  void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
+
+  /// eliminateDeadDefs - Try to delete machine instructions that are now dead
+  /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
+  /// and further dead efs to be eliminated.
+  void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                         LiveIntervals&, VirtRegMap&,
+                         const TargetInstrInfo&);
+
+  /// calculateRegClassAndHint - Recompute register class and hint for each new
+  /// register.
+  void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
+                                const MachineLoopInfo&);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 000000000000..c75196a47210
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,82 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+INITIALIZE_PASS(LiveStacks, "livestacks",
+                "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequiredTransitive<SlotIndexes>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+  // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+  VNInfoAllocator.Reset();
+  S2IMap.clear();
+  S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+  // FIXME: No analysis is being done right now. We are relying on the
+  // register allocators to provide the information.
+  return false;
+}
+
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+  assert(Slot >= 0 && "Spill slot indice must be >= 0");
+  SS2IntervalMap::iterator I = S2IMap.find(Slot);
+  if (I == S2IMap.end()) {
+    I = S2IMap.insert(I, std::make_pair(Slot,
+            LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+    S2RCMap.insert(std::make_pair(Slot, RC));
+  } else {
+    // Use the largest common subclass register class.
+    const TargetRegisterClass *OldRC = S2RCMap[Slot];
+    S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+  }
+  return I->second;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+  OS << "********** INTERVALS **********\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    I->second.print(OS);
+    int Slot = I->first;
+    const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+    if (RC)
+      OS << " [" << RC->getName() << "]\n";
+    else
+      OS << " [Unknown]\n";
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 000000000000..20bad60dedda
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,770 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass.  For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form.  This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function.  It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block).  If a physical
+// register is not register allocatable, it is not tracked.  This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredID(UnreachableMachineBlockElimID);
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+  for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+    if (Kills[i]->getParent() == MBB)
+      return Kills[i];
+  return NULL;
+}
+
+void LiveVariables::VarInfo::dump() const {
+  dbgs() << "  Alive in blocks: ";
+  for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+           E = AliveBlocks.end(); I != E; ++I)
+    dbgs() << *I << ", ";
+  dbgs() << "\n  Killed by:";
+  if (Kills.empty())
+    dbgs() << " No instructions.\n";
+  else {
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+      dbgs() << "\n    #" << i << ": " << *Kills[i];
+    dbgs() << "\n";
+  }
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+  assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+         "getVarInfo: not a virtual register!");
+  VirtRegInfo.grow(RegIdx);
+  return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+                                            MachineBasicBlock *DefBlock,
+                                            MachineBasicBlock *MBB,
+                                    std::vector<MachineBasicBlock*> &WorkList) {
+  unsigned BBNum = MBB->getNumber();
+  
+  // Check to see if this basic block is one of the killing blocks.  If so,
+  // remove it.
+  for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+    if (VRInfo.Kills[i]->getParent() == MBB) {
+      VRInfo.Kills.erase(VRInfo.Kills.begin()+i);  // Erase entry
+      break;
+    }
+  
+  if (MBB == DefBlock) return;  // Terminate recursion
+
+  if (VRInfo.AliveBlocks.test(BBNum))
+    return;  // We already know the block is live
+
+  // Mark the variable known alive in this bb
+  VRInfo.AliveBlocks.set(BBNum);
+
+  WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+                                            MachineBasicBlock *DefBlock,
+                                            MachineBasicBlock *MBB) {
+  std::vector<MachineBasicBlock*> WorkList;
+  MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+  while (!WorkList.empty()) {
+    MachineBasicBlock *Pred = WorkList.back();
+    WorkList.pop_back();
+    MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+  }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+                                     MachineInstr *MI) {
+  assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+  unsigned BBNum = MBB->getNumber();
+
+  VarInfo& VRInfo = getVarInfo(reg);
+  VRInfo.NumUses++;
+
+  // Check to see if this basic block is already a kill block.
+  if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+    // Yes, this register is killed in this basic block already. Increase the
+    // live range by updating the kill instruction.
+    VRInfo.Kills.back() = MI;
+    return;
+  }
+
+#ifndef NDEBUG
+  for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+    assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+  // This situation can occur:
+  //
+  //     ,------.
+  //     |      |
+  //     |      v
+  //     |   t2 = phi ... t1 ...
+  //     |      |
+  //     |      v
+  //     |   t1 = ...
+  //     |  ... = ... t1 ...
+  //     |      |
+  //     `------'
+  //
+  // where there is a use in a PHI node that's a predecessor to the defining
+  // block. We don't want to mark all predecessors as having the value "alive"
+  // in this case.
+  if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+  // Add a new kill entry for this basic block. If this virtual register is
+  // already marked as alive in this basic block, that means it is alive in at
+  // least one of the successor blocks, it's not a kill.
+  if (!VRInfo.AliveBlocks.test(BBNum))
+    VRInfo.Kills.push_back(MI);
+
+  // Update all dominating blocks to mark them as "known live".
+  for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+         E = MBB->pred_end(); PI != E; ++PI)
+    MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+  VarInfo &VRInfo = getVarInfo(Reg);
+
+  if (VRInfo.AliveBlocks.empty())
+    // If vr is not alive in any block, then defaults to dead.
+    VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+                                            SmallSet<unsigned,4> &PartDefRegs) {
+  unsigned LastDefReg = 0;
+  unsigned LastDefDist = 0;
+  MachineInstr *LastDef = NULL;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (!Def)
+      continue;
+    unsigned Dist = DistanceMap[Def];
+    if (Dist > LastDefDist) {
+      LastDefReg  = SubReg;
+      LastDef     = Def;
+      LastDefDist = Dist;
+    }
+  }
+
+  if (!LastDef)
+    return 0;
+
+  PartDefRegs.insert(LastDefReg);
+  for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = LastDef->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+      continue;
+    unsigned DefReg = MO.getReg();
+    if (TRI->isSubRegister(Reg, DefReg)) {
+      PartDefRegs.insert(DefReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+           unsigned SubReg = *SubRegs; ++SubRegs)
+        PartDefRegs.insert(SubReg);
+    }
+  }
+  return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  // If there was a previous use or a "full" def all is well.
+  if (!LastDef && !PhysRegUse[Reg]) {
+    // Otherwise, the last sub-register def implicitly defines this register.
+    // e.g.
+    // AH =
+    // AL = ... <imp-def EAX>, <imp-kill AH>
+    //    = AH
+    // ...
+    //    = EAX
+    // All of the sub-registers must have been defined before the use of Reg!
+    SmallSet<unsigned, 4> PartDefRegs;
+    MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+    // If LastPartialDef is NULL, it must be using a livein register.
+    if (LastPartialDef) {
+      LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+                                                           true/*IsImp*/));
+      PhysRegDef[Reg] = LastPartialDef;
+      SmallSet<unsigned, 8> Processed;
+      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+           unsigned SubReg = *SubRegs; ++SubRegs) {
+        if (Processed.count(SubReg))
+          continue;
+        if (PartDefRegs.count(SubReg))
+          continue;
+        // This part of Reg was defined before the last partial def. It's killed
+        // here.
+        LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+                                                             false/*IsDef*/,
+                                                             true/*IsImp*/));
+        PhysRegDef[SubReg] = LastPartialDef;
+        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+          Processed.insert(*SS);
+      }
+    }
+  }
+  else if (LastDef && !PhysRegUse[Reg] &&
+           !LastDef->findRegisterDefOperand(Reg))
+    // Last def defines the super register, add an implicit def of reg.
+    LastDef->addOperand(MachineOperand::CreateReg(Reg,
+                                                 true/*IsDef*/, true/*IsImp*/));
+
+  // Remember this use.
+  PhysRegUse[Reg]  = MI;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs)
+    PhysRegUse[SubReg] =  MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
+    return 0;
+
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+  unsigned LastPartDefDist = 0;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist)
+        LastPartDefDist = Dist;
+    } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+      unsigned Dist = DistanceMap[Use];
+      if (Dist > LastRefOrPartRefDist) {
+        LastRefOrPartRefDist = Dist;
+        LastRefOrPartRef = Use;
+      }
+    }
+  }
+
+  return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
+    return false;
+
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+  // The whole register is used.
+  // AL =
+  // AH =
+  //
+  //    = AX
+  //    = AL, AX<imp-use, kill>
+  // AX =
+  //
+  // Or whole register is defined, but not used at all.
+  // AX<dead> =
+  // ...
+  // AX =
+  //
+  // Or whole register is defined, but only partly used.
+  // AX<dead> = AL<imp-def>
+  //    = AL<kill>
+  // AX = 
+  MachineInstr *LastPartDef = 0;
+  unsigned LastPartDefDist = 0;
+  SmallSet<unsigned, 8> PartUses;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist) {
+        LastPartDefDist = Dist;
+        LastPartDef = Def;
+      }
+      continue;
+    }
+    if (MachineInstr *Use = PhysRegUse[SubReg]) {
+      PartUses.insert(SubReg);
+      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        PartUses.insert(*SS);
+      unsigned Dist = DistanceMap[Use];
+      if (Dist > LastRefOrPartRefDist) {
+        LastRefOrPartRefDist = Dist;
+        LastRefOrPartRef = Use;
+      }
+    }
+  }
+
+  if (!PhysRegUse[Reg]) {
+    // Partial uses. Mark register def dead and add implicit def of
+    // sub-registers which are used.
+    // EAX<dead>  = op  AL<imp-def>
+    // That is, EAX def is dead but AL def extends pass it.
+    PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs) {
+      if (!PartUses.count(SubReg))
+        continue;
+      bool NeedDef = true;
+      if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+        MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+        if (MO) {
+          NeedDef = false;
+          assert(!MO->isDead());
+        }
+      }
+      if (NeedDef)
+        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+                                                 true/*IsDef*/, true/*IsImp*/));
+      MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+      if (LastSubRef)
+        LastSubRef->addRegisterKilled(SubReg, TRI, true);
+      else {
+        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+        PhysRegUse[SubReg] = LastRefOrPartRef;
+        for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+             unsigned SSReg = *SSRegs; ++SSRegs)
+          PhysRegUse[SSReg] = LastRefOrPartRef;
+      }
+      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        PartUses.erase(*SS);
+    }
+  } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+    if (LastPartDef)
+      // The last partial def kills the register.
+      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/, true/*IsKill*/));
+    else {
+      MachineOperand *MO =
+        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+      // If the last reference is the last def, then it's not used at all.
+      // That is, unless we are currently processing the last reference itself.
+      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+      if (NeedEC) {
+        // If we are adding a subreg def and the superreg def is marked early
+        // clobber, add an early clobber marker to the subreg def.
+        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+        if (MO)
+          MO->setIsEarlyClobber();
+      }
+    }
+  } else
+    LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+  return true;
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+                                     SmallVector<unsigned, 4> &Defs) {
+  // What parts of the register are previously defined?
+  SmallSet<unsigned, 32> Live;
+  if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+    Live.insert(Reg);
+    for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+      Live.insert(*SS);
+  } else {
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs) {
+      // If a register isn't itself defined, but all parts that make up of it
+      // are defined, then consider it also defined.
+      // e.g.
+      // AL =
+      // AH =
+      //    = AX
+      if (Live.count(SubReg))
+        continue;
+      if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+        Live.insert(SubReg);
+        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+          Live.insert(*SS);
+      }
+    }
+  }
+
+  // Start from the largest piece, find the last time any part of the register
+  // is referenced.
+  HandlePhysRegKill(Reg, MI);
+  // Only some of the sub-registers are used.
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    if (!Live.count(SubReg))
+      // Skip if this sub-register isn't defined.
+      continue;
+    HandlePhysRegKill(SubReg, MI);
+  }
+
+  if (MI)
+    Defs.push_back(Reg);  // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+                                      SmallVector<unsigned, 4> &Defs) {
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.back();
+    Defs.pop_back();
+    PhysRegDef[Reg]  = MI;
+    PhysRegUse[Reg]  = NULL;
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs) {
+      PhysRegDef[SubReg]  = MI;
+      PhysRegUse[SubReg]  = NULL;
+    }
+  }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  MRI = &mf.getRegInfo();
+  TRI = MF->getTarget().getRegisterInfo();
+
+  ReservedRegisters = TRI->getReservedRegs(mf);
+
+  unsigned NumRegs = TRI->getNumRegs();
+  PhysRegDef  = new MachineInstr*[NumRegs];
+  PhysRegUse  = new MachineInstr*[NumRegs];
+  PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+  std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
+  std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
+  PHIJoins.clear();
+
+  analyzePHINodes(mf);
+
+  // Calculate live variable information in depth first order on the CFG of the
+  // function.  This guarantees that we will see the definition of a virtual
+  // register before its uses due to dominance properties of SSA (except for PHI
+  // nodes, which are treated as a special case).
+  MachineBasicBlock *Entry = MF->begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MachineBasicBlock *MBB = *DFI;
+
+    // Mark live-in registers as live-in.
+    SmallVector<unsigned, 4> Defs;
+    for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
+           EE = MBB->livein_end(); II != EE; ++II) {
+      assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+             "Cannot have a live-in virtual register!");
+      HandlePhysRegDef(*II, 0, Defs);
+    }
+
+    // Loop over all of the instructions, processing them.
+    DistanceMap.clear();
+    unsigned Dist = 0;
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      MachineInstr *MI = I;
+      if (MI->isDebugValue())
+        continue;
+      DistanceMap.insert(std::make_pair(MI, Dist++));
+
+      // Process all of the operands of the instruction...
+      unsigned NumOperandsToProcess = MI->getNumOperands();
+
+      // Unless it is a PHI node.  In this case, ONLY process the DEF, not any
+      // of the uses.  They will be handled in other basic blocks.
+      if (MI->isPHI())
+        NumOperandsToProcess = 1;
+
+      // Clear kill and dead markers. LV will recompute them.
+      SmallVector<unsigned, 4> UseRegs;
+      SmallVector<unsigned, 4> DefRegs;
+      for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || MO.getReg() == 0)
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MO.isUse()) {
+          MO.setIsKill(false);
+          UseRegs.push_back(MOReg);
+        } else /*MO.isDef()*/ {
+          MO.setIsDead(false);
+          DefRegs.push_back(MOReg);
+        }
+      }
+
+      // Process all uses.
+      for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+        unsigned MOReg = UseRegs[i];
+        if (TargetRegisterInfo::isVirtualRegister(MOReg))
+          HandleVirtRegUse(MOReg, MBB, MI);
+        else if (!ReservedRegisters[MOReg])
+          HandlePhysRegUse(MOReg, MI);
+      }
+
+      // Process all defs.
+      for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+        unsigned MOReg = DefRegs[i];
+        if (TargetRegisterInfo::isVirtualRegister(MOReg))
+          HandleVirtRegDef(MOReg, MI);
+        else if (!ReservedRegisters[MOReg])
+          HandlePhysRegDef(MOReg, MI, Defs);
+      }
+      UpdatePhysRegDefs(MI, Defs);
+    }
+
+    // Handle any virtual assignments from PHI nodes which might be at the
+    // bottom of this basic block.  We check all of our successor blocks to see
+    // if they have PHI nodes, and if so, we simulate an assignment at the end
+    // of the current block.
+    if (!PHIVarInfo[MBB->getNumber()].empty()) {
+      SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+      for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+             E = VarInfoVec.end(); I != E; ++I)
+        // Mark it alive only in the block we are representing.
+        MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+                                MBB);
+    }
+
+    // Finally, if the last instruction in the block is a return, make sure to
+    // mark it as using all of the live-out values in the function.
+    // Things marked both call and return are tail calls; do not do this for
+    // them.  The tail callee need not take the same registers as input
+    // that it produces as output, and there are dependencies for its input
+    // registers elsewhere.
+    if (!MBB->empty() && MBB->back().getDesc().isReturn()
+        && !MBB->back().getDesc().isCall()) {
+      MachineInstr *Ret = &MBB->back();
+
+      for (MachineRegisterInfo::liveout_iterator
+           I = MF->getRegInfo().liveout_begin(),
+           E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+        assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+               "Cannot have a live-out virtual register!");
+        HandlePhysRegUse(*I, Ret);
+
+        // Add live-out registers as implicit uses.
+        if (!Ret->readsRegister(*I))
+          Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+      }
+    }
+
+    // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+    // available at the end of the basic block.
+    for (unsigned i = 0; i != NumRegs; ++i)
+      if (PhysRegDef[i] || PhysRegUse[i])
+        HandlePhysRegDef(i, 0, Defs);
+
+    std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
+    std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
+  }
+
+  // Convert and transfer the dead / killed information we have gathered into
+  // VirtRegInfo onto MI's.
+  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+    const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+      if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+        VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
+      else
+        VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+  }
+
+  // Check to make sure there are no unreachable blocks in the MC CFG for the
+  // function.  If so, it is due to a bug in the instruction selector or some
+  // other part of the code generator if this happens.
+#ifndef NDEBUG
+  for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+    assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+  delete[] PhysRegDef;
+  delete[] PhysRegUse;
+  delete[] PHIVarInfo;
+
+  return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+                                           MachineInstr *NewMI) {
+  VarInfo &VI = getVarInfo(Reg);
+  std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isKill()) {
+      MO.setIsKill(false);
+      unsigned Reg = MO.getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        bool removed = getVarInfo(Reg).removeKill(MI);
+        assert(removed && "kill not in register's VarInfo?");
+        removed = true;
+      }
+    }
+  }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+  for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI)
+      for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+        PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+          .push_back(BBI->getOperand(i).getReg());
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+                                      unsigned Reg,
+                                      MachineRegisterInfo &MRI) {
+  unsigned Num = MBB.getNumber();
+
+  // Reg is live-through.
+  if (AliveBlocks.test(Num))
+    return true;
+
+  // Registers defined in MBB cannot be live in.
+  const MachineInstr *Def = MRI.getVRegDef(Reg);
+  if (Def && Def->getParent() == &MBB)
+    return false;
+
+ // Reg was not defined in MBB, was it killed here?
+  return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+  LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+  // Loop over all of the successors of the basic block, checking to see if
+  // the value is either live in the block, or if it is killed in the block.
+  SmallVector<MachineBasicBlock*, 8> OpSuccBlocks;
+  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+         E = MBB.succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccMBB = *SI;
+
+    // Is it alive in this successor?
+    unsigned SuccIdx = SuccMBB->getNumber();
+    if (VI.AliveBlocks.test(SuccIdx))
+      return true;
+    OpSuccBlocks.push_back(SuccMBB);
+  }
+
+  // Check to see if this value is live because there is a use in a successor
+  // that kills it.
+  switch (OpSuccBlocks.size()) {
+  case 1: {
+    MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB)
+        return true;
+    break;
+  }
+  case 2: {
+    MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB1 ||
+          VI.Kills[i]->getParent() == SuccMBB2)
+        return true;
+    break;
+  }
+  default:
+    std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+                             VI.Kills[i]->getParent()))
+        return true;
+  }
+  return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+                                MachineBasicBlock *DomBB,
+                                MachineBasicBlock *SuccBB) {
+  const unsigned NumNew = BB->getNumber();
+
+  // All registers used by PHI nodes in SuccBB must be live through BB.
+  for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+         BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+      if (BBI->getOperand(i+1).getMBB() == BB)
+        getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+
+  // Update info for all live variables
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    VarInfo &VI = getVarInfo(Reg);
+    if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
+      VI.AliveBlocks.set(NumNew);
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 000000000000..1318d6212497
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,359 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+  class FrameRef {
+    MachineBasicBlock::iterator MI; // Instr referencing the frame
+    int64_t LocalOffset;            // Local offset of the frame idx referenced
+  public:
+    FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+      MI(I), LocalOffset(Offset) {}
+    bool operator<(const FrameRef &RHS) const {
+      return LocalOffset < RHS.LocalOffset;
+    }
+    MachineBasicBlock::iterator getMachineInstr() { return MI; }
+  };
+
+  class LocalStackSlotPass: public MachineFunctionPass {
+    SmallVector<int64_t,16> LocalOffsets;
+
+    void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+                           bool StackGrowsDown, unsigned &MaxAlign);
+    void calculateFrameObjectOffsets(MachineFunction &Fn);
+    bool insertFrameReferenceRegisters(MachineFunction &Fn);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    const char *getPassName() const {
+      return "Local Stack Slot Allocation";
+    }
+
+  private:
+  };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+
+FunctionPass *llvm::createLocalStackSlotAllocationPass() {
+  return new LocalStackSlotPass();
+}
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+  // If the target doesn't want/need this pass, or if there are no locals
+  // to consider, early exit.
+  if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+    return true;
+
+  // Make sure we have enough space to store the local offsets.
+  LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+  // Lay out the local blob.
+  calculateFrameObjectOffsets(MF);
+
+  // Insert virtual base registers to resolve frame index references.
+  bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+  // Tell MFI whether any base registers were allocated. PEI will only
+  // want to use the local block allocations from this pass if there were any.
+  // Otherwise, PEI can do a bit better job of getting the alignment right
+  // without a hole at the start since it knows the alignment of the stack
+  // at the start of local allocation, and this pass doesn't.
+  MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+  return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+                                           int FrameIdx, int64_t &Offset,
+                                           bool StackGrowsDown,
+                                           unsigned &MaxAlign) {
+  // If the stack grows down, add the object size to find the lowest address.
+  if (StackGrowsDown)
+    Offset += MFI->getObjectSize(FrameIdx);
+
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+  // If the alignment of this object is greater than that of the stack, then
+  // increase the stack alignment to match.
+  MaxAlign = std::max(MaxAlign, Align);
+
+  // Adjust to alignment boundary.
+  Offset = (Offset + Align - 1) / Align * Align;
+
+  int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+  DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+        << LocalOffset << "\n");
+  // Keep the offset available for base register allocation
+  LocalOffsets[FrameIdx] = LocalOffset;
+  // And tell MFI about it for PEI to use later
+  MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+  if (!StackGrowsDown)
+    Offset += MFI->getObjectSize(FrameIdx);
+
+  ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+  // Loop over all of the stack objects, assigning sequential addresses...
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+  int64_t Offset = 0;
+  unsigned MaxAlign = 0;
+
+  // Make sure that the stack protector comes before the local variables on the
+  // stack.
+  SmallSet<int, 16> LargeStackObjs;
+  if (MFI->getStackProtectorIndex() >= 0) {
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+                      StackGrowsDown, MaxAlign);
+
+    // Assign large stack objects first.
+    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+      if (MFI->isDeadObjectIndex(i))
+        continue;
+      if (MFI->getStackProtectorIndex() == (int)i)
+        continue;
+      if (!MFI->MayNeedStackProtector(i))
+        continue;
+
+      AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+      LargeStackObjs.insert(i);
+    }
+  }
+
+  // Then assign frame offsets to stack objects that are not used to spill
+  // callee saved registers.
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    if (MFI->getStackProtectorIndex() == (int)i)
+      continue;
+    if (LargeStackObjs.count(i))
+      continue;
+
+    AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+  }
+
+  // Remember how big this blob of stack space is
+  MFI->setLocalFrameSize(Offset);
+  MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+                       std::pair<unsigned, int64_t> &RegOffset,
+                       int64_t FrameSizeAdjust,
+                       int64_t LocalFrameOffset,
+                       const MachineInstr *MI,
+                       const TargetRegisterInfo *TRI) {
+  unsigned e = Regs.size();
+  for (unsigned i = 0; i < e; ++i) {
+    RegOffset = Regs[i];
+    // Check if the relative offset from the where the base register references
+    // to the target address is in range for the instruction.
+    int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+    if (TRI->isFrameOffsetLegal(MI, Offset))
+      return true;
+  }
+  return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+  // Scan the function's instructions looking for frame index references.
+  // For each, ask the target if it wants a virtual base register for it
+  // based on what we can tell it about where the local will end up in the
+  // stack frame. If it wants one, re-use a suitable one we've previously
+  // allocated, or if there isn't one that fits the bill, allocate a new one
+  // and ask the target to create a defining instruction for it.
+  bool UsedBaseReg = false;
+
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+  // Collect all of the instructions in the block that reference
+  // a frame index. Also store the frame index referenced to ease later
+  // lookup. (For any insn that has more than one FI reference, we arbitrarily
+  // choose the first one).
+  SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+  // A base register definition is a register + offset pair.
+  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+      MachineInstr *MI = I;
+
+      // Debug value instructions can't be out of range, so they don't need
+      // any updates.
+      if (MI->isDebugValue())
+        continue;
+
+      // For now, allocate the base register(s) within the basic block
+      // where they're used, and don't try to keep them around outside
+      // of that. It may be beneficial to try sharing them more broadly
+      // than that, but the increased register pressure makes that a
+      // tricky thing to balance. Investigate if re-materializing these
+      // becomes an issue.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        // Consider replacing all frame index operands that reference
+        // an object allocated in the local block.
+        if (MI->getOperand(i).isFI()) {
+          // Don't try this with values not in the local block.
+          if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+            break;
+          FrameReferenceInsns.
+            push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+          break;
+        }
+      }
+    }
+  }
+
+  // Sort the frame references by local offset
+  array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+  MachineBasicBlock *Entry = Fn.begin();
+
+  // Loop through the frame references and allocate for them as necessary.
+  for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+    MachineBasicBlock::iterator I =
+      FrameReferenceInsns[ref].getMachineInstr();
+    MachineInstr *MI = I;
+    for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+      // Consider replacing all frame index operands that reference
+      // an object allocated in the local block.
+      if (MI->getOperand(idx).isFI()) {
+        int FrameIdx = MI->getOperand(idx).getIndex();
+
+        assert(MFI->isObjectPreAllocated(FrameIdx) &&
+               "Only pre-allocated locals expected!");
+
+        DEBUG(dbgs() << "Considering: " << *MI);
+        if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+          unsigned BaseReg = 0;
+          int64_t Offset = 0;
+          int64_t FrameSizeAdjust =
+            StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+          DEBUG(dbgs() << "  Replacing FI in: " << *MI);
+
+          // If we have a suitable base register available, use it; otherwise
+          // create a new one. Note that any offset encoded in the
+          // instruction itself will be taken into account by the target,
+          // so we don't have to adjust for it here when reusing a base
+          // register.
+          std::pair<unsigned, int64_t> RegOffset;
+          if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+                                     FrameSizeAdjust,
+                                     LocalOffsets[FrameIdx],
+                                     MI, TRI)) {
+            DEBUG(dbgs() << "  Reusing base register " <<
+                  RegOffset.first << "\n");
+            // We found a register to reuse.
+            BaseReg = RegOffset.first;
+            Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+              RegOffset.second;
+          } else {
+            // No previously defined register was in range, so create a
+            // new one.
+            int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+            const TargetRegisterClass *RC = TRI->getPointerRegClass();
+            BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+            DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
+                  " at frame local offset " <<
+                  LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
+            // Tell the target to insert the instruction to initialize
+            // the base register.
+            //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
+            TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+                                              InstrOffset);
+
+            // The base register already includes any offset specified
+            // by the instruction, so account for that so it doesn't get
+            // applied twice.
+            Offset = -InstrOffset;
+
+            int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+              InstrOffset;
+            BaseRegisters.push_back(
+              std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+            ++NumBaseRegisters;
+            UsedBaseReg = true;
+          }
+          assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+          // Modify the instruction to use the new base register rather
+          // than the frame index operand.
+          TRI->resolveFrameIndex(I, BaseReg, Offset);
+          DEBUG(dbgs() << "Resolved: " << *MI);
+
+          ++NumReplacements;
+        }
+      }
+    }
+  }
+  return UsedBaseReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/LowerSubregs.cpp b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp
new file mode 100644
index 000000000000..7871ba9c17e4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp
@@ -0,0 +1,223 @@
+//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineFunction pass which runs after register
+// allocation that turns subreg insert/extract instructions into register
+// copies, as needed. This ensures correct codegen even if the coalescer
+// isn't able to remove all subreg instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersubregs"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct LowerSubregsInstructionPass : public MachineFunctionPass {
+  private:
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSubregsInstructionPass() : MachineFunctionPass(ID) {}
+
+    const char *getPassName() const {
+      return "Subregister lowering instruction pass";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    /// runOnMachineFunction - pass entry point
+    bool runOnMachineFunction(MachineFunction&);
+
+  private:
+    bool LowerSubregToReg(MachineInstr *MI);
+    bool LowerCopy(MachineInstr *MI);
+
+    void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+                          const TargetRegisterInfo *TRI);
+    void TransferImplicitDefs(MachineInstr *MI);
+  };
+
+  char LowerSubregsInstructionPass::ID = 0;
+}
+
+FunctionPass *llvm::createLowerSubregsPass() {
+  return new LowerSubregsInstructionPass();
+}
+
+/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the dead flag.
+void
+LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
+                                              unsigned DstReg,
+                                              const TargetRegisterInfo *TRI) {
+  for (MachineBasicBlock::iterator MII =
+        prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+    if (MII->addRegisterDead(DstReg, TRI))
+      break;
+    assert(MII != MI->getParent()->begin() &&
+           "copyPhysReg output doesn't reference destination register!");
+  }
+}
+
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it.  Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) {
+  MachineBasicBlock::iterator CopyMI = MI;
+  --CopyMI;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+      continue;
+    CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
+  }
+}
+
+bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
+  MachineBasicBlock *MBB = MI->getParent();
+  assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+         MI->getOperand(1).isImm() &&
+         (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+          MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+  unsigned DstReg  = MI->getOperand(0).getReg();
+  unsigned InsReg  = MI->getOperand(2).getReg();
+  assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+  unsigned SubIdx  = MI->getOperand(3).getImm();
+
+  assert(SubIdx != 0 && "Invalid index for insert_subreg");
+  unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+  assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+         "Insert destination must be in a physical register");
+  assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+         "Inserted value must be in a physical register");
+
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+  if (DstSubReg == InsReg) {
+    // No need to insert an identify copy instruction.
+    // Watch out for case like this:
+    // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+    // We must leave %RAX live.
+    if (DstReg != InsReg) {
+      MI->setDesc(TII->get(TargetOpcode::KILL));
+      MI->RemoveOperand(3);     // SubIdx
+      MI->RemoveOperand(1);     // Imm
+      DEBUG(dbgs() << "subreg: replace by: " << *MI);
+      return true;
+    }
+    DEBUG(dbgs() << "subreg: eliminated!");
+  } else {
+    TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+                     MI->getOperand(2).isKill());
+    // Transfer the kill/dead flags, if needed.
+    if (MI->getOperand(0).isDead())
+      TransferDeadFlag(MI, DstSubReg, TRI);
+    DEBUG({
+        MachineBasicBlock::iterator dMI = MI;
+        dbgs() << "subreg: " << *(--dMI);
+      });
+  }
+
+  DEBUG(dbgs() << '\n');
+  MBB->erase(MI);
+  return true;
+}
+
+bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
+  MachineOperand &DstMO = MI->getOperand(0);
+  MachineOperand &SrcMO = MI->getOperand(1);
+
+  if (SrcMO.getReg() == DstMO.getReg()) {
+    DEBUG(dbgs() << "identity copy: " << *MI);
+    // No need to insert an identity copy instruction, but replace with a KILL
+    // if liveness is changed.
+    if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+      // We must make sure the super-register gets killed. Replace the
+      // instruction with KILL.
+      MI->setDesc(TII->get(TargetOpcode::KILL));
+      DEBUG(dbgs() << "replaced by:   " << *MI);
+      return true;
+    }
+    // Vanilla identity copy.
+    MI->eraseFromParent();
+    return true;
+  }
+
+  DEBUG(dbgs() << "real copy:   " << *MI);
+  TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+                   DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+
+  if (DstMO.isDead())
+    TransferDeadFlag(MI, DstMO.getReg(), TRI);
+  if (MI->getNumOperands() > 2)
+    TransferImplicitDefs(MI);
+  DEBUG({
+    MachineBasicBlock::iterator dMI = MI;
+    dbgs() << "replaced by: " << *(--dMI);
+  });
+  MI->eraseFromParent();
+  return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "Machine Function\n"
+               << "********** LOWERING SUBREG INSTRS **********\n"
+               << "********** Function: "
+               << MF.getFunction()->getName() << '\n');
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+
+  bool MadeChange = false;
+
+  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+       mbbi != mbbe; ++mbbi) {
+    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+         mi != me;) {
+      MachineBasicBlock::iterator nmi = llvm::next(mi);
+      MachineInstr *MI = mi;
+      assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear");
+      assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+             "EXTRACT_SUBREG should no longer appear");
+      if (MI->isSubregToReg()) {
+        MadeChange |= LowerSubregToReg(MI);
+      } else if (MI->isCopy()) {
+        MadeChange |= LowerCopy(MI);
+      }
+      mi = nmi;
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 000000000000..8f0fb46879ac
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,791 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+  : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
+    AddressTaken(false) {
+  Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+  LeakDetector::removeGarbageObject(this);
+}
+
+/// getSymbol - Return the MCSymbol for this basic block.
+///
+MCSymbol *MachineBasicBlock::getSymbol() const {
+  const MachineFunction *MF = getParent();
+  MCContext &Ctx = MF->getContext();
+  const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+  return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+                               Twine(MF->getFunctionNumber()) + "_" +
+                               Twine(getNumber()));
+}
+
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+  MBB.print(OS);
+  return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+  MachineFunction &MF = *N->getParent();
+  N->Number = MF.addToMBBNumbering(N);
+
+  // Make sure the instructions have their operands in the reginfo lists.
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+    I->AddRegOperandsToUseLists(RegInfo);
+
+  LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+  N->getParent()->removeFromMBBNumbering(N->Number);
+  N->Number = -1;
+  LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+  assert(N->getParent() == 0 && "machine instruction already in a basic block");
+  N->setParent(Parent);
+
+  // Add the instruction's register operands to their corresponding
+  // use/def lists.
+  MachineFunction *MF = Parent->getParent();
+  N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+  LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+  assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+  // Remove from the use/def lists.
+  N->RemoveRegOperandsFromUseLists();
+
+  N->setParent(0);
+
+  LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+                      MachineBasicBlock::iterator first,
+                      MachineBasicBlock::iterator last) {
+  assert(Parent->getParent() == fromList.Parent->getParent() &&
+        "MachineInstr parent mismatch!");
+
+  // Splice within the same MBB -> no change.
+  if (Parent == fromList.Parent) return;
+
+  // If splicing between two blocks within the same function, just update the
+  // parent pointers.
+  for (; first != last; ++first)
+    first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+  assert(!MI->getParent() && "MI is still in a block!");
+  Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+  iterator I = begin();
+  while (I != end() && I->isPHI())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+  while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+  iterator I = end();
+  while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != end() && !I->getDesc().isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+  iterator B = begin(), I = end();
+  while (I != B) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    return I;
+  }
+  // The block is all debug values.
+  return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+  // A block with a landing pad successor only has one other successor.
+  if (succ_size() > 2)
+    return 0;
+  for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+    if ((*I)->isLandingPad())
+      return *I;
+  return 0;
+}
+
+void MachineBasicBlock::dump() const {
+  print(dbgs());
+}
+
+StringRef MachineBasicBlock::getName() const {
+  if (const BasicBlock *LBB = getBasicBlock())
+    return LBB->getName();
+  else
+    return "(null)";
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+  const MachineFunction *MF = getParent();
+  if (!MF) {
+    OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+       << " is null\n";
+    return;
+  }
+
+  if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+
+  if (Indexes)
+    OS << Indexes->getMBBStartIdx(this) << '\t';
+
+  OS << "BB#" << getNumber() << ": ";
+
+  const char *Comma = "";
+  if (const BasicBlock *LBB = getBasicBlock()) {
+    OS << Comma << "derived from LLVM BB ";
+    WriteAsOperand(OS, LBB, /*PrintType=*/false);
+    Comma = ", ";
+  }
+  if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+  if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+  OS << '\n';
+
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  if (!livein_empty()) {
+    if (Indexes) OS << '\t';
+    OS << "    Live Ins:";
+    for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+      OS << ' ' << PrintReg(*I, TRI);
+    OS << '\n';
+  }
+  // Print the preds of this block according to the CFG.
+  if (!pred_empty()) {
+    if (Indexes) OS << '\t';
+    OS << "    Predecessors according to CFG:";
+    for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+      OS << " BB#" << (*PI)->getNumber();
+    OS << '\n';
+  }
+
+  for (const_iterator I = begin(); I != end(); ++I) {
+    if (Indexes) {
+      if (Indexes->hasIndex(I))
+        OS << Indexes->getInstructionIndex(I);
+      OS << '\t';
+    }
+    OS << '\t';
+    I->print(OS, &getParent()->getTarget());
+  }
+
+  // Print the successors of this block according to the CFG.
+  if (!succ_empty()) {
+    if (Indexes) OS << '\t';
+    OS << "    Successors according to CFG:";
+    for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+      OS << " BB#" << (*SI)->getNumber();
+    OS << '\n';
+  }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+  std::vector<unsigned>::iterator I =
+    std::find(LiveIns.begin(), LiveIns.end(), Reg);
+  assert(I != LiveIns.end() && "Not a live in!");
+  LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+  return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+  getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+  MachineFunction::iterator BBI = NewBefore;
+  getParent()->splice(++BBI, this);
+}
+
+void MachineBasicBlock::updateTerminator() {
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  // A block with no successors has no concerns with fall-through edges.
+  if (this->succ_empty()) return;
+
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  DebugLoc dl;  // FIXME: this is nowhere
+  bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+  (void) B;
+  assert(!B && "UpdateTerminators requires analyzable predecessors!");
+  if (Cond.empty()) {
+    if (TBB) {
+      // The block has an unconditional branch. If its successor is now
+      // its layout successor, delete the branch.
+      if (isLayoutSuccessor(TBB))
+        TII->RemoveBranch(*this);
+    } else {
+      // The block has an unconditional fallthrough. If its successor is not
+      // its layout successor, insert a branch.
+      TBB = *succ_begin();
+      if (!isLayoutSuccessor(TBB))
+        TII->InsertBranch(*this, TBB, 0, Cond, dl);
+    }
+  } else {
+    if (FBB) {
+      // The block has a non-fallthrough conditional branch. If one of its
+      // successors is its layout successor, rewrite it to a fallthrough
+      // conditional branch.
+      if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond))
+          return;
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, FBB, 0, Cond, dl);
+      } else if (isLayoutSuccessor(FBB)) {
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, TBB, 0, Cond, dl);
+      }
+    } else {
+      // The block has a fallthrough conditional branch.
+      MachineBasicBlock *MBBA = *succ_begin();
+      MachineBasicBlock *MBBB = *llvm::next(succ_begin());
+      if (MBBA == TBB) std::swap(MBBB, MBBA);
+      if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond)) {
+          // We can't reverse the condition, add an unconditional branch.
+          Cond.clear();
+          TII->InsertBranch(*this, MBBA, 0, Cond, dl);
+          return;
+        }
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, MBBA, 0, Cond, dl);
+      } else if (!isLayoutSuccessor(MBBA)) {
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, TBB, MBBA, Cond, dl);
+      }
+    }
+  }
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+  // If we see non-zero value for the first time it means we actually use Weight
+  // list, so we fill all Weights with 0's.
+  if (weight != 0 && Weights.empty())
+    Weights.resize(Successors.size());
+
+  if (weight != 0 || !Weights.empty())
+    Weights.push_back(weight);
+
+   Successors.push_back(succ);
+   succ->addPredecessor(this);
+ }
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+  succ->removePredecessor(this);
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
+  Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+  assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
+  (*I)->removePredecessor(this);
+  return Successors.erase(I);
+}
+
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+                                         MachineBasicBlock *New) {
+  uint32_t weight = 0;
+  succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(SI);
+    weight = *WI;
+  }
+
+  // Update the successor information.
+  removeSuccessor(SI);
+  addSuccessor(New, weight);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+  Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+  pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
+  assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+  Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
+  if (this == fromMBB)
+    return;
+
+  while (!fromMBB->succ_empty()) {
+    MachineBasicBlock *Succ = *fromMBB->succ_begin();
+    uint32_t weight = 0;
+
+
+    // If Weight list is empty it means we don't use it (disabled optimization).
+    if (!fromMBB->Weights.empty())
+      weight = *fromMBB->Weights.begin();
+
+    addSuccessor(Succ, weight);
+    fromMBB->removeSuccessor(Succ);
+  }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
+  if (this == fromMBB)
+    return;
+
+  while (!fromMBB->succ_empty()) {
+    MachineBasicBlock *Succ = *fromMBB->succ_begin();
+    addSuccessor(Succ);
+    fromMBB->removeSuccessor(Succ);
+
+    // Fix up any PHI nodes in the successor.
+    for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
+         MI != ME && MI->isPHI(); ++MI)
+      for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (MO.getMBB() == fromMBB)
+          MO.setMBB(this);
+      }
+  }
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
+  return I != Successors.end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+  MachineFunction::const_iterator I(this);
+  return llvm::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+bool MachineBasicBlock::canFallThrough() {
+  MachineFunction::iterator Fallthrough = this;
+  ++Fallthrough;
+  // If FallthroughBlock is off the end of the function, it can't fall through.
+  if (Fallthrough == getParent()->end())
+    return false;
+
+  // If FallthroughBlock isn't a successor, no fallthrough is possible.
+  if (!isSuccessor(Fallthrough))
+    return false;
+
+  // Analyze the branches, if any, at the end of the block.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
+    // If we couldn't analyze the branch, examine the last instruction.
+    // If the block doesn't end in a known control barrier, assume fallthrough
+    // is possible. The isPredicable check is needed because this code can be
+    // called during IfConversion, where an instruction which is normally a
+    // Barrier is predicated and thus no longer an actual control barrier. This
+    // is over-conservative though, because if an instruction isn't actually
+    // predicated we could still treat it like a barrier.
+    return empty() || !back().getDesc().isBarrier() ||
+           back().getDesc().isPredicable();
+  }
+
+  // If there is no branch, control always falls through.
+  if (TBB == 0) return true;
+
+  // If there is some explicit branch to the fallthrough block, it can obviously
+  // reach, even though the branch should get folded to fall through implicitly.
+  if (MachineFunction::iterator(TBB) == Fallthrough ||
+      MachineFunction::iterator(FBB) == Fallthrough)
+    return true;
+
+  // If it's an unconditional branch to some block not the fall through, it
+  // doesn't fall through.
+  if (Cond.empty()) return false;
+
+  // Otherwise, if it is conditional and has no explicit false block, it falls
+  // through.
+  return FBB == 0;
+}
+
+MachineBasicBlock *
+MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+  MachineFunction *MF = getParent();
+  DebugLoc dl;  // FIXME: this is nowhere
+
+  // We may need to update this's terminator, but we can't do that if
+  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
+    return NULL;
+
+  // Avoid bugpoint weirdness: A block may end with a conditional branch but
+  // jumps to the same MBB is either case. We have duplicate CFG edges in that
+  // case that we can't handle. Since this never happens in properly optimized
+  // code, just skip those edges.
+  if (TBB && TBB == FBB) {
+    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+                 << getNumber() << '\n');
+    return NULL;
+  }
+
+  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+  MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+  DEBUG(dbgs() << "Splitting critical edge:"
+        " BB#" << getNumber()
+        << " -- BB#" << NMBB->getNumber()
+        << " -- BB#" << Succ->getNumber() << '\n');
+
+  // On some targets like Mips, branches may kill virtual registers. Make sure
+  // that LiveVariables is properly updated after updateTerminator replaces the
+  // terminators.
+  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();
+
+  // Collect a list of virtual registers killed by the terminators.
+  SmallVector<unsigned, 4> KilledRegs;
+  if (LV)
+    for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+      MachineInstr *MI = I;
+      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+           OE = MI->operands_end(); OI != OE; ++OI) {
+        if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+          continue;
+        unsigned Reg = OI->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+            LV->getVarInfo(Reg).removeKill(MI)) {
+          KilledRegs.push_back(Reg);
+          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+          OI->setIsKill(false);
+        }
+      }
+    }
+
+  ReplaceUsesOfBlockWith(Succ, NMBB);
+  updateTerminator();
+
+  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+  NMBB->addSuccessor(Succ);
+  if (!NMBB->isLayoutSuccessor(Succ)) {
+    Cond.clear();
+    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+  }
+
+  // Fix PHI nodes in Succ so they refer to NMBB instead of this
+  for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+       i != e && i->isPHI(); ++i)
+    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+      if (i->getOperand(ni+1).getMBB() == this)
+        i->getOperand(ni+1).setMBB(NMBB);
+
+  // Update LiveVariables.
+  if (LV) {
+    // Restore kills of virtual registers that were killed by the terminators.
+    while (!KilledRegs.empty()) {
+      unsigned Reg = KilledRegs.pop_back_val();
+      for (iterator I = end(), E = begin(); I != E;) {
+        if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+          continue;
+        LV->getVarInfo(Reg).Kills.push_back(I);
+        DEBUG(dbgs() << "Restored terminator kill: " << *I);
+        break;
+      }
+    }
+    // Update relevant live-through information.
+    LV->addNewBlock(NMBB, this, Succ);
+  }
+
+  if (MachineDominatorTree *MDT =
+      P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+    // Update dominator information.
+    MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
+
+    bool IsNewIDom = true;
+    for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+         PI != E; ++PI) {
+      MachineBasicBlock *PredBB = *PI;
+      if (PredBB == NMBB)
+        continue;
+      if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+        IsNewIDom = false;
+        break;
+      }
+    }
+
+    // We know "this" dominates the newly created basic block.
+    MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+    // If all the other predecessors of "Succ" are dominated by "Succ" itself
+    // then the new block is the new immediate dominator of "Succ". Otherwise,
+    // the new block doesn't dominate anything.
+    if (IsNewIDom)
+      MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+  }
+
+  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
+    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
+      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+        if (TIL == DestLoop) {
+          // Both in the same loop, the NMBB joins loop.
+          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+        } else if (TIL->contains(DestLoop)) {
+          // Edge from an outer loop to an inner loop.  Add to the outer loop.
+          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+        } else if (DestLoop->contains(TIL)) {
+          // Edge from an inner loop to an outer loop.  Add to the outer loop.
+          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+        } else {
+          // Edge from two loops with no containment relation.  Because these
+          // are natural loops, we know that the destination block must be the
+          // header of its loop (adding a branch into a loop elsewhere would
+          // create an irreducible loop).
+          assert(DestLoop->getHeader() == Succ &&
+                 "Should not create irreducible loops!");
+          if (MachineLoop *P = DestLoop->getParentLoop())
+            P->addBasicBlockToLoop(NMBB, MLI->getBase());
+        }
+      }
+    }
+
+  return NMBB;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+  assert(getParent() && "Not embedded in a function!");
+  getParent()->remove(this);
+  return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+  assert(getParent() && "Not embedded in a function!");
+  getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+                                               MachineBasicBlock *New) {
+  assert(Old != New && "Cannot replace self with self!");
+
+  MachineBasicBlock::iterator I = end();
+  while (I != begin()) {
+    --I;
+    if (!I->getDesc().isTerminator()) break;
+
+    // Scan the operands of this machine instruction, replacing any uses of Old
+    // with New.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      if (I->getOperand(i).isMBB() &&
+          I->getOperand(i).getMBB() == Old)
+        I->getOperand(i).setMBB(New);
+  }
+
+  // Update the successor information.
+  replaceSuccessor(Old, New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted.  If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG.  DestA and DestB can be
+/// null.
+///
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+                                             MachineBasicBlock *DestB,
+                                             bool isCond) {
+  // The values of DestA and DestB frequently come from a call to the
+  // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
+  // values from there.
+  //
+  // 1. If both DestA and DestB are null, then the block ends with no branches
+  //    (it falls through to its successor).
+  // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+  //    with only an unconditional branch.
+  // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+  //    with a conditional branch that falls through to a successor (DestB).
+  // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+  //    conditional branch followed by an unconditional branch. DestA is the
+  //    'true' destination and DestB is the 'false' destination.
+
+  bool Changed = false;
+
+  MachineFunction::iterator FallThru =
+    llvm::next(MachineFunction::iterator(this));
+
+  if (DestA == 0 && DestB == 0) {
+    // Block falls through to successor.
+    DestA = FallThru;
+    DestB = FallThru;
+  } else if (DestA != 0 && DestB == 0) {
+    if (isCond)
+      // Block ends in conditional jump that falls through to successor.
+      DestB = FallThru;
+  } else {
+    assert(DestA && DestB && isCond &&
+           "CFG in a bad state. Cannot correct CFG edges");
+  }
+
+  // Remove superfluous edges. I.e., those which aren't destinations of this
+  // basic block, duplicate edges, or landing pads.
+  SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
+  MachineBasicBlock::succ_iterator SI = succ_begin();
+  while (SI != succ_end()) {
+    const MachineBasicBlock *MBB = *SI;
+    if (!SeenMBBs.insert(MBB) ||
+        (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+      // This is a superfluous edge, remove it.
+      SI = removeSuccessor(SI);
+      Changed = true;
+    } else {
+      ++SI;
+    }
+  }
+
+  return Changed;
+}
+
+/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
+/// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+  DebugLoc DL;
+  MachineBasicBlock::iterator E = end();
+  if (MBBI != E) {
+    // Skip debug declarations, we don't want a DebugLoc from them.
+    MachineBasicBlock::iterator MBBI2 = MBBI;
+    while (MBBI2 != E && MBBI2->isDebugValue())
+      MBBI2++;
+    if (MBBI2 != E)
+      DL = MBBI2->getDebugLoc();
+  }
+  return DL;
+}
+
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+  if (Weights.empty())
+    return 0;
+
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  return *getWeightIterator(I);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+  assert(Weights.size() == Successors.size() && "Async weight list!");
+  size_t index = std::distance(Successors.begin(), I);
+  assert(index < Weights.size() && "Not a current successor!");
+  return Weights.begin() + index;
+}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+                          bool t) {
+  OS << "BB#" << MBB->getNumber();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp
new file mode 100644
index 000000000000..893a320a6a63
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequency.cpp
@@ -0,0 +1,59 @@
+//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequency.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq",
+                      "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq",
+                    "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequency::ID = 0;
+
+
+MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) {
+  initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry());
+  MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+                                MachineBranchProbabilityInfo>();
+}
+
+MachineBlockFrequency::~MachineBlockFrequency() {
+  delete MBFI;
+}
+
+void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineBranchProbabilityInfo>();
+  AU.setPreservesAll();
+}
+
+bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) {
+  MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+  MBFI->doFunction(&F, &MBPI);
+  return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) {
+  return MBFI->getBlockFreq(MBB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..c13fa6bc5333
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,113 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+                      "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+                    "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+  }
+
+  return Sum;
+}
+
+uint32_t
+MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) const {
+  uint32_t Weight = Src->getSuccWeight(Dst);
+  if (!Weight)
+    return DEFAULT_WEIGHT;
+  return Weight;
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+                                             MachineBasicBlock *Dst) const {
+  // Hot probability is at least 4/5 = 80%
+  uint32_t Weight = getEdgeWeight(Src, Dst);
+  uint32_t Sum = getSumForBlock(Src);
+
+  // FIXME: Implement BranchProbability::compare then change this code to
+  // compare this BranchProbability against a static "hot" BranchProbability.
+  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+  uint32_t MaxWeight = 0;
+  MachineBasicBlock *MaxSucc = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+
+    if (Weight > MaxWeight) {
+      MaxWeight = Weight;
+      MaxSucc = Succ;
+    }
+  }
+
+  // FIXME: Use BranchProbability::compare.
+  if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+    return MaxSucc;
+
+  return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) const {
+  uint32_t N = getEdgeWeight(Src, Dst);
+  uint32_t D = getSumForBlock(Src);
+
+  return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+                     MachineBasicBlock *Dst) const {
+
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
+  OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+     << " probability is "  << Prob 
+     << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+  return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 000000000000..3a60a37af443
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,535 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+
+using namespace llvm;
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
+STATISTIC(NumPhysCSEs,
+          "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
+
+namespace {
+  class MachineCSE : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    AliasAnalysis *AA;
+    MachineDominatorTree *DT;
+    MachineRegisterInfo *MRI;
+  public:
+    static char ID; // Pass identification
+    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+    }
+
+    virtual void releaseMemory() {
+      ScopeMap.clear();
+      Exps.clear();
+    }
+
+  private:
+    const unsigned LookAheadLimit;
+    typedef RecyclingAllocator<BumpPtrAllocator,
+        ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+    typedef ScopedHashTable<MachineInstr*, unsigned,
+        MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+    typedef ScopedHTType::ScopeTy ScopeType;
+    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+    ScopedHTType VNT;
+    SmallVector<MachineInstr*, 64> Exps;
+    unsigned CurrVN;
+
+    bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool isPhysDefTriviallyDead(unsigned Reg,
+                                MachineBasicBlock::const_iterator I,
+                                MachineBasicBlock::const_iterator E) const ;
+    bool hasLivePhysRegDefUses(const MachineInstr *MI,
+                               const MachineBasicBlock *MBB,
+                               SmallSet<unsigned,8> &PhysRefs) const;
+    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                          SmallSet<unsigned,8> &PhysRefs) const;
+    bool isCSECandidate(MachineInstr *MI);
+    bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+                           MachineInstr *CSMI, MachineInstr *MI);
+    void EnterScope(MachineBasicBlock *MBB);
+    void ExitScope(MachineBasicBlock *MBB);
+    bool ProcessBlock(MachineBasicBlock *MBB);
+    void ExitScopeIfDone(MachineDomTreeNode *Node,
+                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+    bool PerformCSE(MachineDomTreeNode *Node);
+  };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
+
+FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+                                          MachineBasicBlock *MBB) {
+  bool Changed = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (!MRI->hasOneNonDBGUse(Reg))
+      // Only coalesce single use copies. This ensure the copy will be
+      // deleted.
+      continue;
+    MachineInstr *DefMI = MRI->getVRegDef(Reg);
+    if (DefMI->getParent() != MBB)
+      continue;
+    if (!DefMI->isCopy())
+      continue;
+    unsigned SrcReg = DefMI->getOperand(1).getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+      continue;
+    if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+      continue;
+    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+      continue;
+    DEBUG(dbgs() << "Coalescing: " << *DefMI);
+    DEBUG(dbgs() << "***     to: " << *MI);
+    MO.setReg(SrcReg);
+    MRI->clearKillFlags(SrcReg);
+    DefMI->eraseFromParent();
+    ++NumCoalesces;
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+                                   MachineBasicBlock::const_iterator I,
+                                   MachineBasicBlock::const_iterator E) const {
+  unsigned LookAheadLeft = LookAheadLimit;
+  while (LookAheadLeft) {
+    // Skip over dbg_value's.
+    while (I != E && I->isDebugValue())
+      ++I;
+
+    if (I == E)
+      // Reached end of block, register is obviously dead.
+      return true;
+
+    bool SeenDef = false;
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (!TRI->regsOverlap(MO.getReg(), Reg))
+        continue;
+      if (MO.isUse())
+        // Found a use!
+        return false;
+      SeenDef = true;
+    }
+    if (SeenDef)
+      // See a def of Reg (or an alias) before encountering any use, it's 
+      // trivially dead.
+      return true;
+
+    --LookAheadLeft;
+    ++I;
+  }
+  return false;
+}
+
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+                                       const MachineBasicBlock *MBB,
+                                       SmallSet<unsigned,8> &PhysRefs) const {
+  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    // If the def is dead, it's ok. But the def may not marked "dead". That's
+    // common since this pass is run before livevariables. We can scan
+    // forward a few instructions and check if it is obviously dead.
+    if (MO.isDef() &&
+        (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+      continue;
+    PhysRefs.insert(Reg);
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+      PhysRefs.insert(*Alias);
+  }
+
+  return !PhysRefs.empty();
+}
+
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                                  SmallSet<unsigned,8> &PhysRefs) const {
+  // For now conservatively returns false if the common subexpression is
+  // not in the same basic block as the given instruction.
+  MachineBasicBlock *MBB = MI->getParent();
+  if (CSMI->getParent() != MBB)
+    return false;
+  MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+  MachineBasicBlock::const_iterator E = MI;
+  unsigned LookAheadLeft = LookAheadLimit;
+  while (LookAheadLeft) {
+    // Skip over dbg_value's.
+    while (I != E && I->isDebugValue())
+      ++I;
+
+    if (I == E)
+      return true;
+
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (TargetRegisterInfo::isVirtualRegister(MOReg))
+        continue;
+      if (PhysRefs.count(MOReg))
+        return false;
+    }
+
+    --LookAheadLeft;
+    ++I;
+  }
+
+  return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+  if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+      MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
+    return false;
+
+  // Ignore copies.
+  if (MI->isCopyLike())
+    return false;
+
+  // Ignore stuff that we obviously can't move.
+  const MCInstrDesc &MCID = MI->getDesc();  
+  if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+      MI->hasUnmodeledSideEffects())
+    return false;
+
+  if (MCID.mayLoad()) {
+    // Okay, this instruction does a load. As a refinement, we allow the target
+    // to decide whether the loaded value is actually a constant. If so, we can
+    // actually use it as a load.
+    if (!MI->isInvariantLoad(AA))
+      // FIXME: we should be able to hoist loads with no other side effects if
+      // there are no other instructions which can change memory in this loop.
+      // This is a trivial form of alias analysis.
+      return false;
+  }
+  return true;
+}
+
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg.
+bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+                                   MachineInstr *CSMI, MachineInstr *MI) {
+  // FIXME: Heuristics that works around the lack the live range splitting.
+
+  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+  // an immediate predecessor. We don't want to increase register pressure and
+  // end up causing other computation to be spilled.
+  if (MI->getDesc().isAsCheapAsAMove()) {
+    MachineBasicBlock *CSBB = CSMI->getParent();
+    MachineBasicBlock *BB = MI->getParent();
+    if (CSBB != BB && !CSBB->isSuccessor(BB))
+      return false;
+  }
+
+  // Heuristics #2: If the expression doesn't not use a vr and the only use
+  // of the redundant computation are copies, do not cse.
+  bool HasVRegUse = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isUse() &&
+        TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+      HasVRegUse = true;
+      break;
+    }
+  }
+  if (!HasVRegUse) {
+    bool HasNonCopyUse = false;
+    for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(Reg),
+           E = MRI->use_nodbg_end(); I != E; ++I) {
+      MachineInstr *Use = &*I;
+      // Ignore copies.
+      if (!Use->isCopyLike()) {
+        HasNonCopyUse = true;
+        break;
+      }
+    }
+    if (!HasNonCopyUse)
+      return false;
+  }
+
+  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+  // it unless the defined value is already used in the BB of the new use.
+  bool HasPHI = false;
+  SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
+  for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(CSReg),
+       E = MRI->use_nodbg_end(); I != E; ++I) {
+    MachineInstr *Use = &*I;
+    HasPHI |= Use->isPHI();
+    CSBBs.insert(Use->getParent());
+  }
+
+  if (!HasPHI)
+    return true;
+  return CSBBs.count(MI->getParent());
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+  ScopeType *Scope = new ScopeType(VNT);
+  ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+  assert(SI != ScopeMap.end());
+  ScopeMap.erase(SI);
+  delete SI->second;
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+  bool Changed = false;
+
+  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    if (!isCSECandidate(MI))
+      continue;
+
+    bool FoundCSE = VNT.count(MI);
+    if (!FoundCSE) {
+      // Look for trivial copy coalescing opportunities.
+      if (PerformTrivialCoalescing(MI, MBB)) {
+        Changed = true;
+
+        // After coalescing MI itself may become a copy.
+        if (MI->isCopyLike())
+          continue;
+        FoundCSE = VNT.count(MI);
+      }
+    }
+
+    // Commute commutable instructions.
+    bool Commuted = false;
+    if (!FoundCSE && MI->getDesc().isCommutable()) {
+      MachineInstr *NewMI = TII->commuteInstruction(MI);
+      if (NewMI) {
+        Commuted = true;
+        FoundCSE = VNT.count(NewMI);
+        if (NewMI != MI) {
+          // New instruction. It doesn't need to be kept.
+          NewMI->eraseFromParent();
+          Changed = true;
+        } else if (!FoundCSE)
+          // MI was changed but it didn't help, commute it back!
+          (void)TII->commuteInstruction(MI);
+      }
+    }
+
+    // If the instruction defines physical registers and the values *may* be
+    // used, then it's not safe to replace it with a common subexpression.
+    // It's also not safe if the instruction uses physical registers.
+    SmallSet<unsigned,8> PhysRefs;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+      FoundCSE = false;
+
+      // ... Unless the CS is local and it also defines the physical register
+      // which is not clobbered in between and the physical register uses 
+      // were not clobbered.
+      unsigned CSVN = VNT.lookup(MI);
+      MachineInstr *CSMI = Exps[CSVN];
+      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+        FoundCSE = true;
+    }
+
+    if (!FoundCSE) {
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+      continue;
+    }
+
+    // Found a common subexpression, eliminate it.
+    unsigned CSVN = VNT.lookup(MI);
+    MachineInstr *CSMI = Exps[CSVN];
+    DEBUG(dbgs() << "Examining: " << *MI);
+    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+    // Check if it's profitable to perform this CSE.
+    bool DoCSE = true;
+    unsigned NumDefs = MI->getDesc().getNumDefs();
+    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned OldReg = MO.getReg();
+      unsigned NewReg = CSMI->getOperand(i).getReg();
+      if (OldReg == NewReg)
+        continue;
+      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+             TargetRegisterInfo::isVirtualRegister(NewReg) &&
+             "Do not CSE physical register defs!");
+      if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+        DoCSE = false;
+        break;
+      }
+      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
+      --NumDefs;
+    }
+
+    // Actually perform the elimination.
+    if (DoCSE) {
+      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
+        MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+        MRI->clearKillFlags(CSEPairs[i].second);
+      }
+      MI->eraseFromParent();
+      ++NumCSEs;
+      if (!PhysRefs.empty())
+        ++NumPhysCSEs;
+      if (Commuted)
+        ++NumCommutes;
+      Changed = true;
+    } else {
+      DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+    }
+    CSEPairs.clear();
+  }
+
+  return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+  if (OpenChildren[Node])
+    return;
+
+  // Pop scope.
+  ExitScope(Node->getBlock());
+
+  // Now traverse upwards to pop ancestors whose offsprings are all done.
+  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+    unsigned Left = --OpenChildren[Parent];
+    if (Left != 0)
+      break;
+    ExitScope(Parent->getBlock());
+    Node = Parent;
+  }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+  SmallVector<MachineDomTreeNode*, 32> Scopes;
+  SmallVector<MachineDomTreeNode*, 8> WorkList;
+  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+  CurrVN = 0;
+
+  // Perform a DFS walk to determine the order of visit.
+  WorkList.push_back(Node);
+  do {
+    Node = WorkList.pop_back_val();
+    Scopes.push_back(Node);
+    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+    unsigned NumChildren = Children.size();
+    OpenChildren[Node] = NumChildren;
+    for (unsigned i = 0; i != NumChildren; ++i) {
+      MachineDomTreeNode *Child = Children[i];
+      ParentMap[Child] = Node;
+      WorkList.push_back(Child);
+    }
+  } while (!WorkList.empty());
+
+  // Now perform CSE.
+  bool Changed = false;
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+    MachineDomTreeNode *Node = Scopes[i];
+    MachineBasicBlock *MBB = Node->getBlock();
+    EnterScope(MBB);
+    Changed |= ProcessBlock(MBB);
+    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+    ExitScopeIfDone(Node, OpenChildren, ParentMap);
+  }
+
+  return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<MachineDominatorTree>();
+  return PerformCSE(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 000000000000..04c8ecbf9bdc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,59 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+}
+
+char MachineDominatorTree::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+                "MachineDominator Tree Construction", true, true)
+
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+  DT->recalculate(F);
+
+  return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+    : MachineFunctionPass(ID) {
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+  delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+  DT->releaseMemory();
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+  DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 000000000000..cd2515652831
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,744 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function.  This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Config/config.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+  MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+                                 unsigned FunctionNum, MachineModuleInfo &mmi,
+                                 GCModuleInfo* gmi)
+  : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
+  if (TM.getRegisterInfo())
+    RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
+  else
+    RegInfo = 0;
+  MFInfo = 0;
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
+  if (Fn->hasFnAttr(Attribute::StackAlignment))
+    FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+        Fn->getAttributes().getFnAttributes()));
+  ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
+  Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+  // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+  if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+    Alignment = std::max(Alignment,
+                         TM.getTargetLowering()->getPrefFunctionAlignment());
+  FunctionNumber = FunctionNum;
+  JumpTableInfo = 0;
+}
+
+MachineFunction::~MachineFunction() {
+  BasicBlocks.clear();
+  InstructionRecycler.clear(Allocator);
+  BasicBlockRecycler.clear(Allocator);
+  if (RegInfo) {
+    RegInfo->~MachineRegisterInfo();
+    Allocator.Deallocate(RegInfo);
+  }
+  if (MFInfo) {
+    MFInfo->~MachineFunctionInfo();
+    Allocator.Deallocate(MFInfo);
+  }
+  FrameInfo->~MachineFrameInfo();         Allocator.Deallocate(FrameInfo);
+  ConstantPool->~MachineConstantPool();   Allocator.Deallocate(ConstantPool);
+  
+  if (JumpTableInfo) {
+    JumpTableInfo->~MachineJumpTableInfo();
+    Allocator.Deallocate(JumpTableInfo);
+  }
+}
+
+/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+/// does already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+  if (JumpTableInfo) return JumpTableInfo;
+  
+  JumpTableInfo = new (Allocator)
+    MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+  return JumpTableInfo;
+}
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them.  This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function.  If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+  if (empty()) { MBBNumbering.clear(); return; }
+  MachineFunction::iterator MBBI, E = end();
+  if (MBB == 0)
+    MBBI = begin();
+  else
+    MBBI = MBB;
+  
+  // Figure out the block number this should have.
+  unsigned BlockNo = 0;
+  if (MBBI != begin())
+    BlockNo = prior(MBBI)->getNumber()+1;
+  
+  for (; MBBI != E; ++MBBI, ++BlockNo) {
+    if (MBBI->getNumber() != (int)BlockNo) {
+      // Remove use of the old number.
+      if (MBBI->getNumber() != -1) {
+        assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+               "MBB number mismatch!");
+        MBBNumbering[MBBI->getNumber()] = 0;
+      }
+      
+      // If BlockNo is already taken, set that block's number to -1.
+      if (MBBNumbering[BlockNo])
+        MBBNumbering[BlockNo]->setNumber(-1);
+
+      MBBNumbering[BlockNo] = MBBI;
+      MBBI->setNumber(BlockNo);
+    }
+  }    
+
+  // Okay, all the blocks are renumbered.  If we have compactified the block
+  // numbering, shrink MBBNumbering now.
+  assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+  MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
+                                    DebugLoc DL, bool NoImp) {
+  return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+    MachineInstr(MCID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+  return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+             MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+  MI->~MachineInstr();
+  InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+  return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+             MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+  assert(MBB->getParent() == this && "MBB parent mismatch!");
+  MBB->~MachineBasicBlock();
+  BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+                                      uint64_t s, unsigned base_alignment,
+                                      const MDNode *TBAAInfo) {
+  return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+                                           TBAAInfo);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+                                      int64_t Offset, uint64_t Size) {
+  return new (Allocator)
+             MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+                                                  MMO->getOffset()+Offset),
+                               MMO->getFlags(), Size,
+                               MMO->getBaseAlignment(), 0);
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+  return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+                                    MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isLoad())
+      ++Num;
+
+  // Allocate a new array and populate it with the load information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isLoad()) {
+      if (!(*I)->isStore())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the store flag.
+        MachineMemOperand *JustLoad =
+          getMachineMemOperand((*I)->getPointerInfo(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOStore,
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
+        Result[Index] = JustLoad;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+                                     MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isStore())
+      ++Num;
+
+  // Allocate a new array and populate it with the store information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isStore()) {
+      if (!(*I)->isLoad())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the load flag.
+        MachineMemOperand *JustStore =
+          getMachineMemOperand((*I)->getPointerInfo(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
+        Result[Index] = JustStore;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
+void MachineFunction::dump() const {
+  print(dbgs());
+}
+
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+  OS << "# Machine code for function " << Fn->getName() << ":\n";
+
+  // Print Frame Information
+  FrameInfo->print(*this, OS);
+  
+  // Print JumpTable Information
+  if (JumpTableInfo)
+    JumpTableInfo->print(OS);
+
+  // Print Constant Pool
+  ConstantPool->print(OS);
+  
+  const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+  
+  if (RegInfo && !RegInfo->livein_empty()) {
+    OS << "Function Live Ins: ";
+    for (MachineRegisterInfo::livein_iterator
+         I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+      OS << PrintReg(I->first, TRI);
+      if (I->second)
+        OS << " in " << PrintReg(I->second, TRI);
+      if (llvm::next(I) != E)
+        OS << ", ";
+    }
+    OS << '\n';
+  }
+  if (RegInfo && !RegInfo->liveout_empty()) {
+    OS << "Function Live Outs:";
+    for (MachineRegisterInfo::liveout_iterator
+         I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
+      OS << ' ' << PrintReg(*I, TRI);
+    OS << '\n';
+  }
+  
+  for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+    OS << '\n';
+    BB->print(OS, Indexes);
+  }
+
+  OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
+}
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+    static std::string getGraphName(const MachineFunction *F) {
+      return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+    }
+
+    std::string getNodeLabel(const MachineBasicBlock *Node,
+                             const MachineFunction *Graph) {
+      std::string OutStr;
+      {
+        raw_string_ostream OSS(OutStr);
+
+        if (isSimple()) {
+          OSS << "BB#" << Node->getNumber();
+          if (const BasicBlock *BB = Node->getBasicBlock())
+            OSS << ": " << BB->getName();
+        } else
+          Node->print(OSS);
+      }
+
+      if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+      // Process string output to make it nicer...
+      for (unsigned i = 0; i != OutStr.length(); ++i)
+        if (OutStr[i] == '\n') {                            // Left justify
+          OutStr[i] = '\\';
+          OutStr.insert(OutStr.begin()+i+1, 'l');
+        }
+      return OutStr;
+    }
+  };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+  ViewGraph(this, "mf" + getFunction()->getNameStr());
+#else
+  errs() << "MachineFunction::viewCFG is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+  ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+#else
+  errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+                                    const TargetRegisterClass *RC) {
+  MachineRegisterInfo &MRI = getRegInfo();
+  unsigned VReg = MRI.getLiveInVirtReg(PReg);
+  if (VReg) {
+    assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+    return VReg;
+  }
+  VReg = MRI.createVirtualRegister(RC);
+  MRI.addLiveIn(PReg, VReg);
+  return VReg;
+}
+
+/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, 
+                                        bool isLinkerPrivate) const {
+  assert(JumpTableInfo && "No jump tables");
+  
+  assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+  const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
+  
+  const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
+                                         MAI.getPrivateGlobalPrefix();
+  SmallString<60> Name;
+  raw_svector_ostream(Name)
+    << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+  const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+  return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+                               Twine(getFunctionNumber())+"$pb");
+}
+
+//===----------------------------------------------------------------------===//
+//  MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+                                        bool Immutable) {
+  assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+  // The alignment of the frame index can be determined from its offset from
+  // the incoming frame position.  If the frame object is at offset 32 and
+  // the stack is guaranteed to be 16-byte aligned, then we know that the
+  // object is 16-byte aligned.
+  unsigned StackAlign = TFI.getStackAlignment();
+  unsigned Align = MinAlign(SPOffset, StackAlign);
+  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+                                              /*isSS*/false, false));
+  return -++NumFixedObjects;
+}
+
+
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+  assert(MBB && "MBB must be valid");
+  const MachineFunction *MF = MBB->getParent();
+  assert(MF && "MBB must be part of a MachineFunction");
+  const TargetMachine &TM = MF->getTarget();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  BitVector BV(TRI->getNumRegs());
+
+  // Before CSI is calculated, no registers are considered pristine. They can be
+  // freely used and PEI will make sure they are saved.
+  if (!isCalleeSavedInfoValid())
+    return BV;
+
+  for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+    BV.set(*CSR);
+
+  // The entry MBB always has all CSRs pristine.
+  if (MBB == &MF->front())
+    return BV;
+
+  // On other MBBs the saved CSRs are not pristine.
+  const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+  for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+         E = CSI.end(); I != E; ++I)
+    BV.reset(I->getReg());
+
+  return BV;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+  if (Objects.empty()) return;
+
+  const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
+  int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+  OS << "Frame Objects:\n";
+
+  for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+    const StackObject &SO = Objects[i];
+    OS << "  fi#" << (int)(i-NumFixedObjects) << ": ";
+    if (SO.Size == ~0ULL) {
+      OS << "dead\n";
+      continue;
+    }
+    if (SO.Size == 0)
+      OS << "variable sized";
+    else
+      OS << "size=" << SO.Size;
+    OS << ", align=" << SO.Alignment;
+
+    if (i < NumFixedObjects)
+      OS << ", fixed";
+    if (i < NumFixedObjects || SO.SPOffset != -1) {
+      int64_t Off = SO.SPOffset - ValOffset;
+      OS << ", at location [SP";
+      if (Off > 0)
+        OS << "+" << Off;
+      else if (Off < 0)
+        OS << Off;
+      OS << "]";
+    }
+    OS << "\n";
+  }
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+  print(MF, dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+//  MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getEntrySize - Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+  // The size of a jump table entry is 4 bytes unless the entry is just the
+  // address of a block, in which case it is the pointer size.
+  switch (getEntryKind()) {
+  case MachineJumpTableInfo::EK_BlockAddress:
+    return TD.getPointerSize();
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32:
+  case MachineJumpTableInfo::EK_Custom32:
+    return 4;
+  case MachineJumpTableInfo::EK_Inline:
+    return 0;
+  }
+  assert(0 && "Unknown jump table encoding!");
+  return ~0;
+}
+
+/// getEntryAlignment - Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+  // The alignment of a jump table entry is the alignment of int32 unless the
+  // entry is just the address of a block, in which case it is the pointer
+  // alignment.
+  switch (getEntryKind()) {
+  case MachineJumpTableInfo::EK_BlockAddress:
+    return TD.getPointerABIAlignment();
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32:
+  case MachineJumpTableInfo::EK_Custom32:
+    return TD.getABIIntegerTypeAlignment(32);
+  case MachineJumpTableInfo::EK_Inline:
+    return 1;
+  }
+  assert(0 && "Unknown jump table encoding!");
+  return ~0;
+}
+
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
+///
+unsigned MachineJumpTableInfo::createJumpTableIndex(
+                               const std::vector<MachineBasicBlock*> &DestBBs) {
+  assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+  JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+  return JumpTables.size()-1;
+}
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+                                                  MachineBasicBlock *New) {
+  assert(Old != New && "Not making a change?");
+  bool MadeChange = false;
+  for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+    ReplaceMBBInJumpTable(i, Old, New);
+  return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+                                                 MachineBasicBlock *Old,
+                                                 MachineBasicBlock *New) {
+  assert(Old != New && "Not making a change?");
+  bool MadeChange = false;
+  MachineJumpTableEntry &JTE = JumpTables[Idx];
+  for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+    if (JTE.MBBs[j] == Old) {
+      JTE.MBBs[j] = New;
+      MadeChange = true;
+    }
+  return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+  if (JumpTables.empty()) return;
+
+  OS << "Jump Tables:\n";
+
+  for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+    OS << "  jt#" << i << ": ";
+    for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+      OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+  }
+
+  OS << '\n';
+}
+
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
+
+
+//===----------------------------------------------------------------------===//
+//  MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}
+
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getRelocationInfo();
+  return Val.ConstVal->getRelocationInfo();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+    if (Constants[i].isMachineConstantPoolEntry())
+      delete Constants[i].Val.MachineCPVal;
+  for (DenseSet<MachineConstantPoolValue*>::iterator I =
+       MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+       I != E; ++I)
+    delete *I;
+}
+
+/// CanShareConstantPoolEntry - Test whether the given two constants
+/// can be allocated the same constant pool entry.
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
+                                      const TargetData *TD) {
+  // Handle the trivial case quickly.
+  if (A == B) return true;
+
+  // If they have the same type but weren't the same constant, quickly
+  // reject them.
+  if (A->getType() == B->getType()) return false;
+
+  // For now, only support constants with the same size.
+  if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+    return false;
+
+  // If a floating-point value and an integer value have the same encoding,
+  // they can share a constant-pool entry.
+  if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+    if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
+      return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
+  if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+    if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
+      return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
+
+  // Two vectors can share an entry if each pair of corresponding
+  // elements could.
+  if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
+    if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+      if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
+        return false;
+      for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
+        if (!CanShareConstantPoolEntry(AV->getOperand(i),
+                                       BV->getOperand(i), TD))
+          return false;
+      return true;
+    }
+
+  // TODO: Handle other cases.
+
+  return false;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one.  User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, 
+                                                   unsigned Alignment) {
+  assert(Alignment && "Alignment must be specified!");
+  if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+  // Check to see if we already have this constant.
+  //
+  // FIXME, this could be made much more efficient for large constant pools.
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+    if (!Constants[i].isMachineConstantPoolEntry() &&
+        CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+      if ((unsigned)Constants[i].getAlignment() < Alignment)
+        Constants[i].Alignment = Alignment;
+      return i;
+    }
+  
+  Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+  return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+                                                   unsigned Alignment) {
+  assert(Alignment && "Alignment must be specified!");
+  if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+  
+  // Check to see if we already have this constant.
+  //
+  // FIXME, this could be made much more efficient for large constant pools.
+  int Idx = V->getExistingMachineCPValue(this, Alignment);
+  if (Idx != -1) {
+    MachineCPVsSharingEntries.insert(V);
+    return (unsigned)Idx;
+  }
+
+  Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+  return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+  if (Constants.empty()) return;
+
+  OS << "Constant Pool:\n";
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    OS << "  cp#" << i << ": ";
+    if (Constants[i].isMachineConstantPoolEntry())
+      Constants[i].Val.MachineCPVal->print(OS);
+    else
+      OS << *(Value*)Constants[i].Val.ConstVal;
+    OS << ", align=" << Constants[i].getAlignment();
+    OS << "\n";
+  }
+}
+
+void MachineConstantPool::dump() const { print(dbgs()); }
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 000000000000..054c750c9f2b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,58 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+using namespace llvm;
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
+                                                 CodeGenOpt::Level OL) :
+  FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+  releaseMemory();
+  assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+}
+
+bool MachineFunctionAnalysis::doInitialization(Module &M) {
+  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  assert(MMI && "MMI not around yet??");
+  MMI->setModule(&M);
+  NextFnNum = 0;
+  return false;
+}
+
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+  assert(!MF && "MachineFunctionAnalysis already initialized!");
+  MF = new MachineFunction(&F, TM, NextFnNum++,
+                           getAnalysis<MachineModuleInfo>(),
+                           getAnalysisIfAvailable<GCModuleInfo>());
+  return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+  delete MF;
+  MF = 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 000000000000..e5a491270a8c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,56 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+using namespace llvm;
+
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+                                             const std::string &Banner) const {
+  return createMachineFunctionPrinterPass(O, Banner);
+}
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+  // Do not codegen any 'available_externally' functions at all, they have
+  // definitions outside the translation unit.
+  if (F.hasAvailableExternallyLinkage())
+    return false;
+
+  MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+  return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addPreserved<MachineFunctionAnalysis>();
+
+  // MachineFunctionPass preserves all LLVM IR passes, but there's no
+  // high-level way to express this. Instead, just list a bunch of
+  // passes explicitly. This does not include setPreservesCFG,
+  // because CodeGen overloads that to mean preserving the MachineBasicBlock
+  // CFG in addition to the LLVM IR CFG.
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved("scalar-evolution");
+  AU.addPreserved("iv-users");
+  AU.addPreserved("memdep");
+  AU.addPreserved("live-values");
+  AU.addPreserved("domtree");
+  AU.addPreserved("domfrontier");
+  AU.addPreserved("loops");
+  AU.addPreserved("lda");
+
+  FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 000000000000..2aaa798a02c1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,60 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+  static char ID;
+
+  raw_ostream &OS;
+  const std::string Banner;
+
+  MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) 
+      : MachineFunctionPass(ID), OS(os), Banner(banner) {}
+
+  const char *getPassName() const { return "MachineFunction Printer"; }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) {
+    OS << "# " << Banner << ":\n";
+    MF.print(OS);
+    return false;
+  }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+                                                      const std::string &Banner){
+  return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 000000000000..143a29b08a1e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1743 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Value.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/FoldingSet.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// AddRegOperandToRegInfo - Add this register operand to the specified
+/// MachineRegisterInfo.  If it is null, then the next/prev fields should be
+/// explicitly nulled out.
+void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
+  assert(isReg() && "Can only add reg operand to use lists");
+  
+  // If the reginfo pointer is null, just explicitly null out or next/prev
+  // pointers, to ensure they are not garbage.
+  if (RegInfo == 0) {
+    Contents.Reg.Prev = 0;
+    Contents.Reg.Next = 0;
+    return;
+  }
+  
+  // Otherwise, add this operand to the head of the registers use/def list.
+  MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
+  
+  // For SSA values, we prefer to keep the definition at the start of the list.
+  // we do this by skipping over the definition if it is at the head of the
+  // list.
+  if (*Head && (*Head)->isDef())
+    Head = &(*Head)->Contents.Reg.Next;
+  
+  Contents.Reg.Next = *Head;
+  if (Contents.Reg.Next) {
+    assert(getReg() == Contents.Reg.Next->getReg() &&
+           "Different regs on the same list!");
+    Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
+  }
+  
+  Contents.Reg.Prev = Head;
+  *Head = this;
+}
+
+/// RemoveRegOperandFromRegInfo - Remove this register operand from the
+/// MachineRegisterInfo it is linked with.
+void MachineOperand::RemoveRegOperandFromRegInfo() {
+  assert(isOnRegUseList() && "Reg operand is not on a use list");
+  // Unlink this from the doubly linked list of operands.
+  MachineOperand *NextOp = Contents.Reg.Next;
+  *Contents.Reg.Prev = NextOp; 
+  if (NextOp) {
+    assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
+    NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
+  }
+  Contents.Reg.Prev = 0;
+  Contents.Reg.Next = 0;
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+  if (getReg() == Reg) return; // No change.
+  
+  // Otherwise, we have to change the register.  If this operand is embedded
+  // into a machine function, we need to update the old and new register's
+  // use/def lists.
+  if (MachineInstr *MI = getParent())
+    if (MachineBasicBlock *MBB = MI->getParent())
+      if (MachineFunction *MF = MBB->getParent()) {
+        RemoveRegOperandFromRegInfo();
+        SmallContents.RegNo = Reg;
+        AddRegOperandToRegInfo(&MF->getRegInfo());
+        return;
+      }
+        
+  // Otherwise, just change the register, no problem.  :)
+  SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+                                  const TargetRegisterInfo &TRI) {
+  assert(TargetRegisterInfo::isVirtualRegister(Reg));
+  if (SubIdx && getSubReg())
+    SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+  setReg(Reg);
+  if (SubIdx)
+    setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  if (getSubReg()) {
+    Reg = TRI.getSubReg(Reg, getSubReg());
+    // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+    // That won't happen in legal code.
+    setSubReg(0);
+  }
+  setReg(Reg);
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value.  If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+  // If this operand is currently a register operand, and if this is in a
+  // function, deregister the operand from the register's use/def list.
+  if (isReg() && getParent() && getParent()->getParent() &&
+      getParent()->getParent()->getParent())
+    RemoveRegOperandFromRegInfo();
+  
+  OpKind = MO_Immediate;
+  Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value.  If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+                                      bool isKill, bool isDead, bool isUndef,
+                                      bool isDebug) {
+  // If this operand is already a register operand, use setReg to update the 
+  // register's use/def lists.
+  if (isReg()) {
+    assert(!isEarlyClobber());
+    setReg(Reg);
+  } else {
+    // Otherwise, change this to a register and set the reg#.
+    OpKind = MO_Register;
+    SmallContents.RegNo = Reg;
+
+    // If this operand is embedded in a function, add the operand to the
+    // register's use/def list.
+    if (MachineInstr *MI = getParent())
+      if (MachineBasicBlock *MBB = MI->getParent())
+        if (MachineFunction *MF = MBB->getParent())
+          AddRegOperandToRegInfo(&MF->getRegInfo());
+  }
+
+  IsDef = isDef;
+  IsImp = isImp;
+  IsKill = isKill;
+  IsDead = isDead;
+  IsUndef = isUndef;
+  IsEarlyClobber = false;
+  IsDebug = isDebug;
+  SubReg = 0;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+  if (getType() != Other.getType() ||
+      getTargetFlags() != Other.getTargetFlags())
+    return false;
+  
+  switch (getType()) {
+  default: llvm_unreachable("Unrecognized operand type");
+  case MachineOperand::MO_Register:
+    return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+           getSubReg() == Other.getSubReg();
+  case MachineOperand::MO_Immediate:
+    return getImm() == Other.getImm();
+  case MachineOperand::MO_CImmediate:
+    return getCImm() == Other.getCImm();
+  case MachineOperand::MO_FPImmediate:
+    return getFPImm() == Other.getFPImm();
+  case MachineOperand::MO_MachineBasicBlock:
+    return getMBB() == Other.getMBB();
+  case MachineOperand::MO_FrameIndex:
+    return getIndex() == Other.getIndex();
+  case MachineOperand::MO_ConstantPoolIndex:
+    return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+  case MachineOperand::MO_JumpTableIndex:
+    return getIndex() == Other.getIndex();
+  case MachineOperand::MO_GlobalAddress:
+    return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+  case MachineOperand::MO_ExternalSymbol:
+    return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+           getOffset() == Other.getOffset();
+  case MachineOperand::MO_BlockAddress:
+    return getBlockAddress() == Other.getBlockAddress();
+  case MachineOperand::MO_MCSymbol:
+    return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_Metadata:
+    return getMetadata() == Other.getMetadata();
+  }
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+  // If the instruction is embedded into a basic block, we can find the
+  // target info for the instruction.
+  if (!TM)
+    if (const MachineInstr *MI = getParent())
+      if (const MachineBasicBlock *MBB = MI->getParent())
+        if (const MachineFunction *MF = MBB->getParent())
+          TM = &MF->getTarget();
+  const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+
+  switch (getType()) {
+  case MachineOperand::MO_Register:
+    OS << PrintReg(getReg(), TRI, getSubReg());
+
+    if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+        isEarlyClobber()) {
+      OS << '<';
+      bool NeedComma = false;
+      if (isDef()) {
+        if (NeedComma) OS << ',';
+        if (isEarlyClobber())
+          OS << "earlyclobber,";
+        if (isImplicit())
+          OS << "imp-";
+        OS << "def";
+        NeedComma = true;
+      } else if (isImplicit()) {
+          OS << "imp-use";
+          NeedComma = true;
+      }
+
+      if (isKill() || isDead() || isUndef()) {
+        if (NeedComma) OS << ',';
+        if (isKill())  OS << "kill";
+        if (isDead())  OS << "dead";
+        if (isUndef()) {
+          if (isKill() || isDead())
+            OS << ',';
+          OS << "undef";
+        }
+      }
+      OS << '>';
+    }
+    break;
+  case MachineOperand::MO_Immediate:
+    OS << getImm();
+    break;
+  case MachineOperand::MO_CImmediate:
+    getCImm()->getValue().print(OS, false);
+    break;
+  case MachineOperand::MO_FPImmediate:
+    if (getFPImm()->getType()->isFloatTy())
+      OS << getFPImm()->getValueAPF().convertToFloat();
+    else
+      OS << getFPImm()->getValueAPF().convertToDouble();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    OS << "<BB#" << getMBB()->getNumber() << ">";
+    break;
+  case MachineOperand::MO_FrameIndex:
+    OS << "<fi#" << getIndex() << '>';
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    OS << "<cp#" << getIndex();
+    if (getOffset()) OS << "+" << getOffset();
+    OS << '>';
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    OS << "<jt#" << getIndex() << '>';
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    OS << "<ga:";
+    WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
+    if (getOffset()) OS << "+" << getOffset();
+    OS << '>';
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    OS << "<es:" << getSymbolName();
+    if (getOffset()) OS << "+" << getOffset();
+    OS << '>';
+    break;
+  case MachineOperand::MO_BlockAddress:
+    OS << '<';
+    WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+    OS << '>';
+    break;
+  case MachineOperand::MO_Metadata:
+    OS << '<';
+    WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+    OS << '>';
+    break;
+  case MachineOperand::MO_MCSymbol:
+    OS << "<MCSym=" << *getMCSymbol() << '>';
+    break;
+  default:
+    llvm_unreachable("Unrecognized operand type");
+  }
+  
+  if (unsigned TF = getTargetFlags())
+    OS << "[TF=" << TF << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+  if (V == 0) return 0;
+  return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+  return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+  return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+  return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+  return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+  return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+                                     uint64_t s, unsigned int a,
+                                     const MDNode *TBAAInfo)
+  : PtrInfo(ptrinfo), Size(s),
+    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+    TBAAInfo(TBAAInfo) {
+  assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+         "invalid pointer value");
+  assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+  assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(getOffset());
+  ID.AddInteger(Size);
+  ID.AddPointer(getValue());
+  ID.AddInteger(Flags);
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+  // The Value and Offset may differ due to CSE. But the flags and size
+  // should be the same.
+  assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+  assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+  if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+    // Update the alignment value.
+    Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+      ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
+    // Also update the base and offset, because the new alignment may
+    // not be applicable with the old ones.
+    PtrInfo = MMO->PtrInfo;
+  }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+  return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+  assert((MMO.isLoad() || MMO.isStore()) &&
+         "SV has to be a load, store or both.");
+  
+  if (MMO.isVolatile())
+    OS << "Volatile ";
+
+  if (MMO.isLoad())
+    OS << "LD";
+  if (MMO.isStore())
+    OS << "ST";
+  OS << MMO.getSize();
+  
+  // Print the address information.
+  OS << "[";
+  if (!MMO.getValue())
+    OS << "<unknown>";
+  else
+    WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+  // If the alignment of the memory reference itself differs from the alignment
+  // of the base pointer, print the base alignment explicitly, next to the base
+  // pointer.
+  if (MMO.getBaseAlignment() != MMO.getAlignment())
+    OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+  if (MMO.getOffset() != 0)
+    OS << "+" << MMO.getOffset();
+  OS << "]";
+
+  // Print the alignment of the reference.
+  if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+      MMO.getBaseAlignment() != MMO.getSize())
+    OS << "(align=" << MMO.getAlignment() << ")";
+
+  // Print TBAA info.
+  if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+    OS << "(tbaa=";
+    if (TBAAInfo->getNumOperands() > 0)
+      WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+    else
+      OS << "<unknown>";
+    OS << ")";
+  }
+
+  // Print nontemporal info.
+  if (MMO.isNonTemporal())
+    OS << "(nontemporal)";
+
+  return OS;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// MCID NULL and no operands.
+MachineInstr::MachineInstr()
+  : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0),
+    Parent(0) {
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+  if (MCID->ImplicitDefs)
+    for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+      addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+  if (MCID->ImplicitUses)
+    for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+      addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
+  if (!NoImp)
+    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+  if (!NoImp)
+    addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
+                           bool NoImp)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+  if (!NoImp)
+    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+  if (!NoImp)
+    addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor two above, except
+/// that the MachineInstr is created and added to the end of the specified 
+/// basic block.
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
+  assert(MBB && "Cannot use inserting ctor with null basic block!");
+  NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+  addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+  MBB->push_back(this);  // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
+                           const MCInstrDesc &tid)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+  assert(MBB && "Cannot use inserting ctor with null basic block!");
+  NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+  addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+  MBB->push_back(this);  // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+  : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+    Parent(0), debugLoc(MI.getDebugLoc()) {
+  Operands.reserve(MI.getNumOperands());
+
+  // Add operands
+  for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+    addOperand(MI.getOperand(i));
+  NumImplicitOps = MI.NumImplicitOps;
+
+  // Copy all the flags.
+  Flags = MI.Flags;
+
+  // Set parent to null.
+  Parent = 0;
+
+  LeakDetector::addGarbageObject(this);
+}
+
+MachineInstr::~MachineInstr() {
+  LeakDetector::removeGarbageObject(this);
+#ifndef NDEBUG
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
+    assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
+           "Reg operand def/use list corrupted");
+  }
+#endif
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+  if (MachineBasicBlock *MBB = getParent())
+    return &MBB->getParent()->getRegInfo();
+  return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists.  This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists() {
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    if (Operands[i].isReg())
+      Operands[i].RemoveRegOperandFromRegInfo();
+  }
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists.  This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    if (Operands[i].isReg())
+      Operands[i].AddRegOperandToRegInfo(&RegInfo);
+  }
+}
+
+
+/// addOperand - Add the specified operand to the instruction.  If it is an
+/// implicit operand, it is added to the end of the operand list.  If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand). 
+void MachineInstr::addOperand(const MachineOperand &Op) {
+  bool isImpReg = Op.isReg() && Op.isImplicit();
+  assert((isImpReg || !OperandsComplete()) &&
+         "Trying to add an operand to a machine instr that is already done!");
+
+  MachineRegisterInfo *RegInfo = getRegInfo();
+
+  // If we are adding the operand to the end of the list, our job is simpler.
+  // This is true most of the time, so this is a reasonable optimization.
+  if (isImpReg || NumImplicitOps == 0) {
+    // We can only do this optimization if we know that the operand list won't
+    // reallocate.
+    if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
+      Operands.push_back(Op);
+    
+      // Set the parent of the operand.
+      Operands.back().ParentMI = this;
+  
+      // If the operand is a register, update the operand's use list.
+      if (Op.isReg()) {
+        Operands.back().AddRegOperandToRegInfo(RegInfo);
+        // If the register operand is flagged as early, mark the operand as such
+        unsigned OpNo = Operands.size() - 1;
+        if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+          Operands[OpNo].setIsEarlyClobber(true);
+      }
+      return;
+    }
+  }
+  
+  // Otherwise, we have to insert a real operand before any implicit ones.
+  unsigned OpNo = Operands.size()-NumImplicitOps;
+
+  // If this instruction isn't embedded into a function, then we don't need to
+  // update any operand lists.
+  if (RegInfo == 0) {
+    // Simple insertion, no reginfo update needed for other register operands.
+    Operands.insert(Operands.begin()+OpNo, Op);
+    Operands[OpNo].ParentMI = this;
+
+    // Do explicitly set the reginfo for this operand though, to ensure the
+    // next/prev fields are properly nulled out.
+    if (Operands[OpNo].isReg()) {
+      Operands[OpNo].AddRegOperandToRegInfo(0);
+      // If the register operand is flagged as early, mark the operand as such
+      if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+        Operands[OpNo].setIsEarlyClobber(true);
+    }
+
+  } else if (Operands.size()+1 <= Operands.capacity()) {
+    // Otherwise, we have to remove register operands from their register use
+    // list, add the operand, then add the register operands back to their use
+    // list.  This also must handle the case when the operand list reallocates
+    // to somewhere else.
+  
+    // If insertion of this operand won't cause reallocation of the operand
+    // list, just remove the implicit operands, add the operand, then re-add all
+    // the rest of the operands.
+    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+      assert(Operands[i].isReg() && "Should only be an implicit reg!");
+      Operands[i].RemoveRegOperandFromRegInfo();
+    }
+    
+    // Add the operand.  If it is a register, add it to the reg list.
+    Operands.insert(Operands.begin()+OpNo, Op);
+    Operands[OpNo].ParentMI = this;
+
+    if (Operands[OpNo].isReg()) {
+      Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+      // If the register operand is flagged as early, mark the operand as such
+      if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+        Operands[OpNo].setIsEarlyClobber(true);
+    }
+    
+    // Re-add all the implicit ops.
+    for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+      assert(Operands[i].isReg() && "Should only be an implicit reg!");
+      Operands[i].AddRegOperandToRegInfo(RegInfo);
+    }
+  } else {
+    // Otherwise, we will be reallocating the operand list.  Remove all reg
+    // operands from their list, then readd them after the operand list is
+    // reallocated.
+    RemoveRegOperandsFromUseLists();
+    
+    Operands.insert(Operands.begin()+OpNo, Op);
+    Operands[OpNo].ParentMI = this;
+  
+    // Re-add all the operands.
+    AddRegOperandsToUseLists(*RegInfo);
+
+      // If the register operand is flagged as early, mark the operand as such
+    if (Operands[OpNo].isReg()
+        && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+      Operands[OpNo].setIsEarlyClobber(true);
+  }
+}
+
+/// RemoveOperand - Erase an operand  from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+  assert(OpNo < Operands.size() && "Invalid operand number");
+  
+  // Special case removing the last one.
+  if (OpNo == Operands.size()-1) {
+    // If needed, remove from the reg def/use list.
+    if (Operands.back().isReg() && Operands.back().isOnRegUseList())
+      Operands.back().RemoveRegOperandFromRegInfo();
+    
+    Operands.pop_back();
+    return;
+  }
+
+  // Otherwise, we are removing an interior operand.  If we have reginfo to
+  // update, remove all operands that will be shifted down from their reg lists,
+  // move everything down, then re-add them.
+  MachineRegisterInfo *RegInfo = getRegInfo();
+  if (RegInfo) {
+    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+      if (Operands[i].isReg())
+        Operands[i].RemoveRegOperandFromRegInfo();
+    }
+  }
+  
+  Operands.erase(Operands.begin()+OpNo);
+
+  if (RegInfo) {
+    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+      if (Operands[i].isReg())
+        Operands[i].AddRegOperandToRegInfo(RegInfo);
+    }
+  }
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+                                 MachineMemOperand *MO) {
+  mmo_iterator OldMemRefs = MemRefs;
+  mmo_iterator OldMemRefsEnd = MemRefsEnd;
+
+  size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+  mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+  mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
+
+  std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+  NewMemRefs[NewNum - 1] = MO;
+
+  MemRefs = NewMemRefs;
+  MemRefsEnd = NewMemRefsEnd;
+}
+
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+                                 MICheckType Check) const {
+  // If opcodes or number of operands are not the same then the two
+  // instructions are obviously not identical.
+  if (Other->getOpcode() != getOpcode() ||
+      Other->getNumOperands() != getNumOperands())
+    return false;
+
+  // Check operands to make sure they match.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    const MachineOperand &OMO = Other->getOperand(i);
+    if (!MO.isReg()) {
+      if (!MO.isIdenticalTo(OMO))
+        return false;
+      continue;
+    }
+
+    // Clients may or may not want to ignore defs when testing for equality.
+    // For example, machine CSE pass only cares about finding common
+    // subexpressions, so it's safe to ignore virtual register defs.
+    if (MO.isDef()) {
+      if (Check == IgnoreDefs)
+        continue;
+      else if (Check == IgnoreVRegDefs) {
+        if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+            TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+          if (MO.getReg() != OMO.getReg())
+            return false;
+      } else {
+        if (!MO.isIdenticalTo(OMO))
+          return false;
+        if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+          return false;
+      }
+    } else {
+      if (!MO.isIdenticalTo(OMO))
+        return false;
+      if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+        return false;
+    }
+  }
+  // If DebugLoc does not match then two dbg.values are not identical.
+  if (isDebugValue())
+    if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
+        && getDebugLoc() != Other->getDebugLoc())
+      return false;
+  return true;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+  assert(getParent() && "Not embedded in a basic block!");
+  getParent()->remove(this);
+  return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing basic
+/// block, and deletes it.
+void MachineInstr::eraseFromParent() {
+  assert(getParent() && "Not embedded in a basic block!");
+  getParent()->erase(this);
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+  unsigned short NumOperands = MCID->getNumOperands();
+  if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+    return true;  // Broken: we have all the operands of this instruction!
+  return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+  unsigned NumOperands = MCID->getNumOperands();
+  if (!MCID->isVariadic())
+    return NumOperands;
+
+  for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isImplicit())
+      NumOperands++;
+  }
+  return NumOperands;
+}
+
+bool MachineInstr::isStackAligningInlineAsm() const {
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      return true;
+  }
+  return false;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+                                          const TargetRegisterInfo *TRI) const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (!MOReg)
+      continue;
+    if (MOReg == Reg ||
+        (TRI &&
+         TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+         TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         TRI->isSubRegister(MOReg, Reg)))
+      if (!isKill || MO.isKill())
+        return i;
+  }
+  return -1;
+}
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+                                         SmallVectorImpl<unsigned> *Ops) const {
+  bool PartDef = false; // Partial redefine.
+  bool FullDef = false; // Full define.
+  bool Use = false;
+
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.getReg() != Reg)
+      continue;
+    if (Ops)
+      Ops->push_back(i);
+    if (MO.isUse())
+      Use |= !MO.isUndef();
+    else if (MO.getSubReg())
+      PartDef = true;
+    else
+      FullDef = true;
+  }
+  // A partial redefine uses Reg unless there is also a full define.
+  return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+                                        const TargetRegisterInfo *TRI) const {
+  bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned MOReg = MO.getReg();
+    bool Found = (MOReg == Reg);
+    if (!Found && TRI && isPhys &&
+        TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+      if (Overlap)
+        Found = TRI->regsOverlap(MOReg, Reg);
+      else
+        Found = TRI->isSubRegister(MOReg, Reg);
+    }
+    if (Found && (!isDead || MO.isDead()))
+      return i;
+  }
+  return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+  const MCInstrDesc &MCID = getDesc();
+  if (MCID.isPredicable()) {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (MCID.OpInfo[i].isPredicate())
+        return i;
+  }
+
+  return -1;
+}
+  
+/// isRegTiedToUseOperand - Given the index of a register def operand,
+/// check if the register def is tied to a source operand, due to either
+/// two-address elimination or inline assembly constraints. Returns the
+/// first tied use operand index by reference is UseOpIdx is not null.
+bool MachineInstr::
+isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
+  if (isInlineAsm()) {
+    assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
+    const MachineOperand &MO = getOperand(DefOpIdx);
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+      return false;
+    // Determine the actual operand index that corresponds to this index.
+    unsigned DefNo = 0;
+    unsigned DefPart = 0;
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i < e; ) {
+      const MachineOperand &FMO = getOperand(i);
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!FMO.isImm())
+        return false;
+      // Skip over this def.
+      unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
+      unsigned PrevDef = i + 1;
+      i = PrevDef + NumOps;
+      if (i > DefOpIdx) {
+        DefPart = DefOpIdx - PrevDef;
+        break;
+      }
+      ++DefNo;
+    }
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i != e; ++i) {
+      const MachineOperand &FMO = getOperand(i);
+      if (!FMO.isImm())
+        continue;
+      if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+        continue;
+      unsigned Idx;
+      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
+          Idx == DefNo) {
+        if (UseOpIdx)
+          *UseOpIdx = (unsigned)i + 1 + DefPart;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
+  const MCInstrDesc &MCID = getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (MO.isReg() && MO.isUse() &&
+        MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) {
+      if (UseOpIdx)
+        *UseOpIdx = (unsigned)i;
+      return true;
+    }
+  }
+  return false;
+}
+
+/// isRegTiedToDefOperand - Return true if the operand of the specified index
+/// is a register use and it is tied to an def operand. It also returns the def
+/// operand index by reference.
+bool MachineInstr::
+isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
+  if (isInlineAsm()) {
+    const MachineOperand &MO = getOperand(UseOpIdx);
+    if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
+      return false;
+
+    // Find the flag operand corresponding to UseOpIdx
+    unsigned FlagIdx, NumOps=0;
+    for (FlagIdx = InlineAsm::MIOp_FirstOperand;
+         FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+      const MachineOperand &UFMO = getOperand(FlagIdx);
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!UFMO.isImm())
+        return false;
+      NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm());
+      assert(NumOps < getNumOperands() && "Invalid inline asm flag");
+      if (UseOpIdx < FlagIdx+NumOps+1)
+        break;
+    }
+    if (FlagIdx >= UseOpIdx)
+      return false;
+    const MachineOperand &UFMO = getOperand(FlagIdx);
+    unsigned DefNo;
+    if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+      if (!DefOpIdx)
+        return true;
+
+      unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
+      // Remember to adjust the index. First operand is asm string, second is
+      // the HasSideEffects and AlignStack bits, then there is a flag for each.
+      while (DefNo) {
+        const MachineOperand &FMO = getOperand(DefIdx);
+        assert(FMO.isImm());
+        // Skip over this def.
+        DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+        --DefNo;
+      }
+      *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
+      return true;
+    }
+    return false;
+  }
+
+  const MCInstrDesc &MCID = getDesc();
+  if (UseOpIdx >= MCID.getNumOperands())
+    return false;
+  const MachineOperand &MO = getOperand(UseOpIdx);
+  if (!MO.isReg() || !MO.isUse())
+    return false;
+  int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO);
+  if (DefIdx == -1)
+    return false;
+  if (DefOpIdx)
+    *DefOpIdx = (unsigned)DefIdx;
+  return true;
+}
+
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (MO.isReg() && MO.isUse())
+      MO.setIsKill(false);
+  }
+}
+
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+      continue;
+    for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+      MachineOperand &MOp = getOperand(j);
+      if (!MOp.isIdenticalTo(MO))
+        continue;
+      if (MO.isKill())
+        MOp.setIsKill();
+      else
+        MOp.setIsDead();
+      break;
+    }
+  }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
+    return;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (MCID.OpInfo[i].isPredicate()) {
+      // Predicated operands must be last operands.
+      addOperand(MI->getOperand(i));
+    }
+  }
+}
+
+void MachineInstr::substituteRegister(unsigned FromReg,
+                                      unsigned ToReg,
+                                      unsigned SubIdx,
+                                      const TargetRegisterInfo &RegInfo) {
+  if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+    if (SubIdx)
+      ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = getOperand(i);
+      if (!MO.isReg() || MO.getReg() != FromReg)
+        continue;
+      MO.substPhysReg(ToReg, RegInfo);
+    }
+  } else {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = getOperand(i);
+      if (!MO.isReg() || MO.getReg() != FromReg)
+        continue;
+      MO.substVirtReg(ToReg, SubIdx, RegInfo);
+    }
+  }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+                                AliasAnalysis *AA,
+                                bool &SawStore) const {
+  // Ignore stuff that we obviously can't move.
+  if (MCID->mayStore() || MCID->isCall()) {
+    SawStore = true;
+    return false;
+  }
+
+  if (isLabel() || isDebugValue() ||
+      MCID->isTerminator() || hasUnmodeledSideEffects())
+    return false;
+
+  // See if this instruction does a load.  If so, we have to guarantee that the
+  // loaded value doesn't change between the load and the its intended
+  // destination. The check for isInvariantLoad gives the targe the chance to
+  // classify the load as always returning a constant, e.g. a constant pool
+  // load.
+  if (MCID->mayLoad() && !isInvariantLoad(AA))
+    // Otherwise, this is a real load.  If there is a store between the load and
+    // end of block, or if the load is volatile, we can't move it.
+    return !SawStore && !hasVolatileMemoryRef();
+
+  return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+                                 AliasAnalysis *AA,
+                                 unsigned DstReg) const {
+  bool SawStore = false;
+  if (!TII->isTriviallyReMaterializable(this, AA) ||
+      !isSafeToMove(TII, AA, SawStore))
+    return false;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg())
+      continue;
+    // FIXME: For now, do not remat any instruction with register operands.
+    // Later on, we can loosen the restriction is the register operands have
+    // not been modified between the def and use. Note, this is different from
+    // MachineSink because the code is no longer in two-address form (at least
+    // partially).
+    if (MO.isUse())
+      return false;
+    else if (!MO.isDead() && MO.getReg() != DstReg)
+      return false;
+  }
+  return true;
+}
+
+/// hasVolatileMemoryRef - Return true if this instruction may have a
+/// volatile memory reference, or if the information describing the
+/// memory reference is not available. Return false if it is known to
+/// have no volatile memory references.
+bool MachineInstr::hasVolatileMemoryRef() const {
+  // An instruction known never to access memory won't have a volatile access.
+  if (!MCID->mayStore() &&
+      !MCID->mayLoad() &&
+      !MCID->isCall() &&
+      !hasUnmodeledSideEffects())
+    return false;
+
+  // Otherwise, if the instruction has no memory reference information,
+  // conservatively assume it wasn't preserved.
+  if (memoperands_empty())
+    return true;
+  
+  // Check the memory reference information for volatile references.
+  for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+    if ((*I)->isVolatile())
+      return true;
+
+  return false;
+}
+
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function.  For example,
+/// loading a value from the constant pool or from the argument area
+/// of a function if it does not change.  This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+  // If the instruction doesn't load at all, it isn't an invariant load.
+  if (!MCID->mayLoad())
+    return false;
+
+  // If the instruction has lost its memoperands, conservatively assume that
+  // it may not be an invariant load.
+  if (memoperands_empty())
+    return false;
+
+  const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+  for (mmo_iterator I = memoperands_begin(),
+       E = memoperands_end(); I != E; ++I) {
+    if ((*I)->isVolatile()) return false;
+    if ((*I)->isStore()) return false;
+
+    if (const Value *V = (*I)->getValue()) {
+      // A load from a constant PseudoSourceValue is invariant.
+      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+        if (PSV->isConstant(MFI))
+          continue;
+      // If we have an AliasAnalysis, ask it whether the memory is constant.
+      if (AA && AA->pointsToConstantMemory(
+                      AliasAnalysis::Location(V, (*I)->getSize(),
+                                              (*I)->getTBAAInfo())))
+        continue;
+    }
+
+    // Otherwise assume conservatively.
+    return false;
+  }
+
+  // Everything checks out.
+  return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+  if (!isPHI())
+    return 0;
+  assert(getNumOperands() >= 3 &&
+         "It's illegal to have a PHI without source operands");
+
+  unsigned Reg = getOperand(1).getReg();
+  for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+    if (getOperand(i).getReg() != Reg)
+      return 0;
+  return Reg;
+}
+
+bool MachineInstr::hasUnmodeledSideEffects() const {
+  if (getDesc().hasUnmodeledSideEffects())
+    return true;
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      return true;
+  }
+
+  return false;
+}
+
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+  for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.isUse())
+      continue;
+    if (!MO.isDead())
+      return false;
+  }
+  return true;
+}
+
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      addOperand(MO);
+  }
+}
+
+void MachineInstr::dump() const {
+  dbgs() << "  " << *this;
+}
+
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, 
+                         raw_ostream &CommentOS) {
+  const LLVMContext &Ctx = MF->getFunction()->getContext();
+  if (!DL.isUnknown()) {          // Print source line info.
+    DIScope Scope(DL.getScope(Ctx));
+    // Omit the directory, because it's likely to be long and uninteresting.
+    if (Scope.Verify())
+      CommentOS << Scope.getFilename();
+    else
+      CommentOS << "<unknown>";
+    CommentOS << ':' << DL.getLine();
+    if (DL.getCol() != 0)
+      CommentOS << ':' << DL.getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      CommentOS << " @[ ";
+      printDebugLoc(InlinedAtDL, MF, CommentOS);
+      CommentOS << " ]";
+    }
+  }
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+  // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+  const MachineFunction *MF = 0;
+  const MachineRegisterInfo *MRI = 0;
+  if (const MachineBasicBlock *MBB = getParent()) {
+    MF = MBB->getParent();
+    if (!TM && MF)
+      TM = &MF->getTarget();
+    if (MF)
+      MRI = &MF->getRegInfo();
+  }
+
+  // Save a list of virtual registers.
+  SmallVector<unsigned, 8> VirtRegs;
+
+  // Print explicitly defined operands on the left of an assignment syntax.
+  unsigned StartOp = 0, e = getNumOperands();
+  for (; StartOp < e && getOperand(StartOp).isReg() &&
+         getOperand(StartOp).isDef() &&
+         !getOperand(StartOp).isImplicit();
+       ++StartOp) {
+    if (StartOp != 0) OS << ", ";
+    getOperand(StartOp).print(OS, TM);
+    unsigned Reg = getOperand(StartOp).getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      VirtRegs.push_back(Reg);
+  }
+
+  if (StartOp != 0)
+    OS << " = ";
+
+  // Print the opcode name.
+  OS << getDesc().getName();
+
+  // Print the rest of the operands.
+  bool OmittedAnyCallClobbers = false;
+  bool FirstOp = true;
+  unsigned AsmDescOp = ~0u;
+  unsigned AsmOpCount = 0;
+
+  if (isInlineAsm()) {
+    // Print asm string.
+    OS << " ";
+    getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+    // Print HasSideEffects, IsAlignStack
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      OS << " [sideeffect]";
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      OS << " [alignstack]";
+
+    StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
+    FirstOp = false;
+  }
+
+
+  for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+
+    if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      VirtRegs.push_back(MO.getReg());
+
+    // Omit call-clobbered registers which aren't used anywhere. This makes
+    // call instructions much less noisy on targets where calls clobber lots
+    // of registers. Don't rely on MO.isDead() because we may be called before
+    // LiveVariables is run, or we may be looking at a non-allocatable reg.
+    if (MF && getDesc().isCall() &&
+        MO.isReg() && MO.isImplicit() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        const MachineRegisterInfo &MRI = MF->getRegInfo();
+        if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+          bool HasAliasLive = false;
+          for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+               unsigned AliasReg = *Alias; ++Alias)
+            if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+              HasAliasLive = true;
+              break;
+            }
+          if (!HasAliasLive) {
+            OmittedAnyCallClobbers = true;
+            continue;
+          }
+        }
+      }
+    }
+
+    if (FirstOp) FirstOp = false; else OS << ",";
+    OS << " ";
+    if (i < getDesc().NumOperands) {
+      const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+      if (MCOI.isPredicate())
+        OS << "pred:";
+      if (MCOI.isOptionalDef())
+        OS << "opt:";
+    }
+    if (isDebugValue() && MO.isMetadata()) {
+      // Pretty print DBG_VALUE instructions.
+      const MDNode *MD = MO.getMetadata();
+      if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
+        OS << "!\"" << MDS->getString() << '\"';
+      else
+        MO.print(OS, TM);
+    } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+      OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+    } else if (i == AsmDescOp && MO.isImm()) {
+      // Pretty print the inline asm operand descriptor.
+      OS << '$' << AsmOpCount++;
+      unsigned Flag = MO.getImm();
+      switch (InlineAsm::getKind(Flag)) {
+      case InlineAsm::Kind_RegUse:             OS << ":[reguse]"; break;
+      case InlineAsm::Kind_RegDef:             OS << ":[regdef]"; break;
+      case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break;
+      case InlineAsm::Kind_Clobber:            OS << ":[clobber]"; break;
+      case InlineAsm::Kind_Imm:                OS << ":[imm]"; break;
+      case InlineAsm::Kind_Mem:                OS << ":[mem]"; break;
+      default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break;
+      }
+
+      unsigned TiedTo = 0;
+      if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+        OS << " [tiedto:$" << TiedTo << ']';
+
+      // Compute the index of the next operand descriptor.
+      AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
+    } else
+      MO.print(OS, TM);
+  }
+
+  // Briefly indicate whether any call clobbers were omitted.
+  if (OmittedAnyCallClobbers) {
+    if (!FirstOp) OS << ",";
+    OS << " ...";
+  }
+
+  bool HaveSemi = false;
+  if (Flags) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    OS << " flags: ";
+
+    if (Flags & FrameSetup)
+      OS << "FrameSetup";
+  }
+
+  if (!memoperands_empty()) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+    OS << " mem:";
+    for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+         i != e; ++i) {
+      OS << **i;
+      if (llvm::next(i) != e)
+        OS << " ";
+    }
+  }
+
+  // Print the regclass of any virtual registers encountered.
+  if (MRI && !VirtRegs.empty()) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+      const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
+      OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
+      for (unsigned j = i+1; j != VirtRegs.size();) {
+        if (MRI->getRegClass(VirtRegs[j]) != RC) {
+          ++j;
+          continue;
+        }
+        if (VirtRegs[i] != VirtRegs[j])
+          OS << "," << PrintReg(VirtRegs[j]);
+        VirtRegs.erase(VirtRegs.begin()+j);
+      }
+    }
+  }
+
+  // Print debug location information.
+  if (!debugLoc.isUnknown() && MF) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    OS << " dbg:";
+    printDebugLoc(debugLoc, MF, OS);
+  }
+
+  OS << '\n';
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+                                     const TargetRegisterInfo *RegInfo,
+                                     bool AddIfNotFound) {
+  bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+  bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+  bool Found = false;
+  SmallVector<unsigned,4> DeadOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+
+    if (Reg == IncomingReg) {
+      if (!Found) {
+        if (MO.isKill())
+          // The register is already marked kill.
+          return true;
+        if (isPhysReg && isRegTiedToDefOperand(i))
+          // Two-address uses of physregs must not be marked kill.
+          return true;
+        MO.setIsKill();
+        Found = true;
+      }
+    } else if (hasAliases && MO.isKill() &&
+               TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      // A super-register kill already exists.
+      if (RegInfo->isSuperRegister(IncomingReg, Reg))
+        return true;
+      if (RegInfo->isSubRegister(IncomingReg, Reg))
+        DeadOps.push_back(i);
+    }
+  }
+
+  // Trim unneeded kill operands.
+  while (!DeadOps.empty()) {
+    unsigned OpIdx = DeadOps.back();
+    if (getOperand(OpIdx).isImplicit())
+      RemoveOperand(OpIdx);
+    else
+      getOperand(OpIdx).setIsKill(false);
+    DeadOps.pop_back();
+  }
+
+  // If not found, this means an alias of one of the operands is killed. Add a
+  // new implicit operand if required.
+  if (!Found && AddIfNotFound) {
+    addOperand(MachineOperand::CreateReg(IncomingReg,
+                                         false /*IsDef*/,
+                                         true  /*IsImp*/,
+                                         true  /*IsKill*/));
+    return true;
+  }
+  return Found;
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+                                   const TargetRegisterInfo *RegInfo,
+                                   bool AddIfNotFound) {
+  bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+  bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+  bool Found = false;
+  SmallVector<unsigned,4> DeadOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+
+    if (Reg == IncomingReg) {
+      MO.setIsDead();
+      Found = true;
+    } else if (hasAliases && MO.isDead() &&
+               TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      // There exists a super-register that's marked dead.
+      if (RegInfo->isSuperRegister(IncomingReg, Reg))
+        return true;
+      if (RegInfo->getSubRegisters(IncomingReg) &&
+          RegInfo->getSuperRegisters(Reg) &&
+          RegInfo->isSubRegister(IncomingReg, Reg))
+        DeadOps.push_back(i);
+    }
+  }
+
+  // Trim unneeded dead operands.
+  while (!DeadOps.empty()) {
+    unsigned OpIdx = DeadOps.back();
+    if (getOperand(OpIdx).isImplicit())
+      RemoveOperand(OpIdx);
+    else
+      getOperand(OpIdx).setIsDead(false);
+    DeadOps.pop_back();
+  }
+
+  // If not found, this means an alias of one of the operands is dead. Add a
+  // new implicit operand if required.
+  if (Found || !AddIfNotFound)
+    return Found;
+    
+  addOperand(MachineOperand::CreateReg(IncomingReg,
+                                       true  /*IsDef*/,
+                                       true  /*IsImp*/,
+                                       false /*IsKill*/,
+                                       true  /*IsDead*/));
+  return true;
+}
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+                                      const TargetRegisterInfo *RegInfo) {
+  if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
+    MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+    if (MO)
+      return;
+  } else {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = getOperand(i);
+      if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+          MO.getSubReg() == 0)
+        return;
+    }
+  }
+  addOperand(MachineOperand::CreateReg(IncomingReg,
+                                       true  /*IsDef*/,
+                                       true  /*IsImp*/));
+}
+
+void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+                                         const TargetRegisterInfo &TRI) {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    bool Dead = true;
+    for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
+         E = UsedRegs.end(); I != E; ++I)
+      if (TRI.regsOverlap(*I, Reg)) {
+        Dead = false;
+        break;
+      }
+    // If there are no uses, including partial uses, the def is dead.
+    if (Dead) MO.setIsDead();
+  }
+}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+  unsigned Hash = MI->getOpcode() * 37;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    uint64_t Key = (uint64_t)MO.getType() << 32;
+    switch (MO.getType()) {
+    default: break;
+    case MachineOperand::MO_Register:
+      if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;  // Skip virtual register defs.
+      Key |= MO.getReg();
+      break;
+    case MachineOperand::MO_Immediate:
+      Key |= MO.getImm();
+      break;
+    case MachineOperand::MO_FrameIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_JumpTableIndex:
+      Key |= MO.getIndex();
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+      break;
+    case MachineOperand::MO_BlockAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+      break;
+    case MachineOperand::MO_MCSymbol:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+      break;
+    }
+    Key += ~(Key << 32);
+    Key ^= (Key >> 22);
+    Key += ~(Key << 13);
+    Key ^= (Key >> 8);
+    Key += (Key << 3);
+    Key ^= (Key >> 15);
+    Key += ~(Key << 27);
+    Key ^= (Key >> 31);
+    Hash = (unsigned)Key + Hash * 37;
+  }
+  return Hash;
+}
+
+void MachineInstr::emitError(StringRef Msg) const {
+  // Find the source location cookie.
+  unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
+  for (unsigned i = getNumOperands(); i != 0; --i) {
+    if (getOperand(i-1).isMetadata() &&
+        (LocMD = getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
+  }
+
+  if (const MachineBasicBlock *MBB = getParent())
+    if (const MachineFunction *MF = MBB->getParent())
+      return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+  report_fatal_error(Msg);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 000000000000..722ceb202439
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,1211 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumHoisted,
+          "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+          "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+          "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+          "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+          "Number of machine instructions hoisted out of loops post regalloc");
+
+namespace {
+  class MachineLICM : public MachineFunctionPass {
+    bool PreRegAlloc;
+
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    const TargetLowering *TLI;
+    const TargetRegisterInfo *TRI;
+    const MachineFrameInfo *MFI;
+    MachineRegisterInfo *MRI;
+    const InstrItineraryData *InstrItins;
+
+    // Various analyses that we use...
+    AliasAnalysis        *AA;      // Alias analysis info.
+    MachineLoopInfo      *MLI;     // Current MachineLoopInfo
+    MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
+
+    // State that is updated as we process loops
+    bool         Changed;          // True if a loop is changed.
+    bool         FirstInLoop;      // True if it's the first LICM in the loop.
+    MachineLoop *CurLoop;          // The current loop we are working on.
+    MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+    BitVector AllocatableSet;
+
+    // Track 'estimated' register pressure.
+    SmallSet<unsigned, 32> RegSeen;
+    SmallVector<unsigned, 8> RegPressure;
+
+    // Register pressure "limit" per register class. If the pressure
+    // is higher than the limit, then it's considered high.
+    SmallVector<unsigned, 8> RegLimit;
+
+    // Register pressure on path leading from loop preheader to current BB.
+    SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
+    // For each opcode, keep a list of potential CSE instructions.
+    DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    MachineLICM() :
+      MachineFunctionPass(ID), PreRegAlloc(true) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
+
+    explicit MachineLICM(bool PreRA) :
+      MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const { return "Machine Instruction LICM"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual void releaseMemory() {
+      RegSeen.clear();
+      RegPressure.clear();
+      RegLimit.clear();
+      BackTrace.clear();
+      for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+             CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+        CI->second.clear();
+      CSEMap.clear();
+    }
+
+  private:
+    /// CandidateInfo - Keep track of information about hoisting candidates.
+    struct CandidateInfo {
+      MachineInstr *MI;
+      unsigned      Def;
+      int           FI;
+      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+        : MI(mi), Def(def), FI(fi) {}
+    };
+
+    /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+    /// invariants out to the preheader.
+    void HoistRegionPostRA();
+
+    /// HoistPostRA - When an instruction is found to only use loop invariant
+    /// operands that is safe to hoist, this instruction is called to do the
+    /// dirty work.
+    void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+    /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+    /// gather register def and frame object update information.
+    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+                   SmallSet<int, 32> &StoredFIs,
+                   SmallVector<CandidateInfo, 32> &Candidates);
+
+    /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+    /// current loop.
+    void AddToLiveIns(unsigned Reg);
+
+    /// IsLICMCandidate - Returns true if the instruction may be a suitable
+    /// candidate for LICM. e.g. If the instruction is a call, then it's
+    /// obviously not safe to hoist it.
+    bool IsLICMCandidate(MachineInstr &I);
+
+    /// IsLoopInvariantInst - Returns true if the instruction is loop
+    /// invariant. I.e., all virtual register operands are defined outside of
+    /// the loop, physical registers aren't accessed (explicitly or implicitly),
+    /// and the instruction is hoistable.
+    /// 
+    bool IsLoopInvariantInst(MachineInstr &I);
+
+    /// HasAnyPHIUse - Return true if the specified register is used by any
+    /// phi node.
+    bool HasAnyPHIUse(unsigned Reg) const;
+
+    /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+    /// and an use in the current loop, return true if the target considered
+    /// it 'high'.
+    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+                               unsigned Reg) const;
+
+    bool IsCheapInstruction(MachineInstr &MI) const;
+
+    /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+    /// check if hoisting an instruction of the given cost matrix can cause high
+    /// register pressure.
+    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+    /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+    /// the current block and update their register pressures to reflect the
+    /// effect of hoisting MI from the current block to the preheader.
+    void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
+    /// IsProfitableToHoist - Return true if it is potentially profitable to
+    /// hoist the given loop invariant.
+    bool IsProfitableToHoist(MachineInstr &MI);
+
+    /// HoistRegion - Walk the specified region of the CFG (defined by all
+    /// blocks dominated by the specified block, and that are in the current
+    /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+    /// visit definitions before uses, allowing us to hoist a loop body in one
+    /// pass without iteration.
+    ///
+    void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+
+    /// InitRegPressure - Find all virtual register references that are liveout
+    /// of the preheader to initialize the starting "register pressure". Note
+    /// this does not count live through (livein but not used) registers.
+    void InitRegPressure(MachineBasicBlock *BB);
+
+    /// UpdateRegPressure - Update estimate of register pressure after the
+    /// specified instruction.
+    void UpdateRegPressure(const MachineInstr *MI);
+
+    /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
+    /// the load itself could be hoisted. Return the unfolded and hoistable
+    /// load, or null if the load couldn't be unfolded or if it wouldn't
+    /// be hoistable.
+    MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+    /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+    /// duplicate of MI. Return this instruction if it's found.
+    const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+                                     std::vector<const MachineInstr*> &PrevMIs);
+
+    /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
+    /// the preheader that compute the same value. If it's found, do a RAU on
+    /// with the definition of the existing instruction rather than hoisting
+    /// the instruction to the preheader.
+    bool EliminateCSE(MachineInstr *MI,
+           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+
+    /// Hoist - When an instruction is found to only use loop invariant operands
+    /// that is safe to hoist, this instruction is called to do the dirty work.
+    /// It returns true if the instruction is hoisted.
+    bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+
+    /// InitCSEMap - Initialize the CSE map with instructions that are in the
+    /// current loop preheader that may become duplicates of instructions that
+    /// are hoisted out of the loop.
+    void InitCSEMap(MachineBasicBlock *BB);
+
+    /// getCurPreheader - Get the preheader for the current loop, splitting
+    /// a critical edge if needed.
+    MachineBasicBlock *getCurPreheader();
+  };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
+
+FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
+  return new MachineLICM(PreRegAlloc);
+}
+
+/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
+/// loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+  // Check whether this loop even has a unique predecessor.
+  if (!CurLoop->getLoopPredecessor())
+    return false;
+  // Ok, now check to see if any of its outer loops do.
+  for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+    if (L->getLoopPredecessor())
+      return false;
+  // None of them did, so this is the outermost with a unique predecessor.
+  return true;
+}
+
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+  if (PreRegAlloc)
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+  else
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
+
+  Changed = FirstInLoop = false;
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TLI = TM->getTargetLowering();
+  TRI = TM->getRegisterInfo();
+  MFI = MF.getFrameInfo();
+  MRI = &MF.getRegInfo();
+  InstrItins = TM->getInstrItineraryData();
+  AllocatableSet = TRI->getAllocatableSet(MF);
+
+  if (PreRegAlloc) {
+    // Estimate register pressure during pre-regalloc pass.
+    unsigned NumRC = TRI->getNumRegClasses();
+    RegPressure.resize(NumRC);
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+    RegLimit.resize(NumRC);
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+           E = TRI->regclass_end(); I != E; ++I)
+      RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+  }
+
+  // Get our Loop information...
+  MLI = &getAnalysis<MachineLoopInfo>();
+  DT  = &getAnalysis<MachineDominatorTree>();
+  AA  = &getAnalysis<AliasAnalysis>();
+
+  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+  while (!Worklist.empty()) {
+    CurLoop = Worklist.pop_back_val();
+    CurPreheader = 0;
+
+    // If this is done before regalloc, only visit outer-most preheader-sporting
+    // loops.
+    if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+      Worklist.append(CurLoop->begin(), CurLoop->end());
+      continue;
+    }
+
+    if (!PreRegAlloc)
+      HoistRegionPostRA();
+    else {
+      // CSEMap is initialized for loop header when the first instruction is
+      // being hoisted.
+      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+      FirstInLoop = true;
+      HoistRegion(N, true);
+      CSEMap.clear();
+    }
+  }
+
+  return Changed;
+}
+
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end(); o != oe; ++o) {
+    if (!(*o)->isStore() || !(*o)->getValue())
+      continue;
+    if (const FixedStackPseudoSourceValue *Value =
+        dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+      if (Value->getFrameIndex() == FI)
+        return true;
+    }
+  }
+  return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+                            unsigned *PhysRegDefs,
+                            SmallSet<int, 32> &StoredFIs,
+                            SmallVector<CandidateInfo, 32> &Candidates) {
+  bool RuledOut = false;
+  bool HasNonInvariantUse = false;
+  unsigned Def = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isFI()) {
+      // Remember if the instruction stores to the frame index.
+      int FI = MO.getIndex();
+      if (!StoredFIs.count(FI) &&
+          MFI->isSpillSlotObjectIndex(FI) &&
+          InstructionStoresToFI(MI, FI))
+        StoredFIs.insert(FI);
+      HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+           "Not expecting virtual register!");
+
+    if (!MO.isDef()) {
+      if (Reg && PhysRegDefs[Reg])
+        // If it's using a non-loop-invariant register, then it's obviously not
+        // safe to hoist.
+        HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (MO.isImplicit()) {
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+      if (!MO.isDead())
+        // Non-dead implicit def? This cannot be hoisted.
+        RuledOut = true;
+      // No need to check if a dead implicit def is also defined by
+      // another instruction.
+      continue;
+    }
+
+    // FIXME: For now, avoid instructions with multiple defs, unless
+    // it's a dead implicit def.
+    if (Def)
+      RuledOut = true;
+    else
+      Def = Reg;
+
+    // If we have already seen another instruction that defines the same
+    // register, then this is not safe.
+    if (++PhysRegDefs[Reg] > 1)
+      // MI defined register is seen defined by another instruction in
+      // the loop, it cannot be a LICM candidate.
+      RuledOut = true;
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      if (++PhysRegDefs[*AS] > 1)
+        RuledOut = true;
+  }
+
+  // Only consider reloads for now and remats which do not have register
+  // operands. FIXME: Consider unfold load folding instructions.
+  if (Def && !RuledOut) {
+    int FI = INT_MIN;
+    if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+        (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+      Candidates.push_back(CandidateInfo(MI, Def, FI));
+  }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+  unsigned NumRegs = TRI->getNumRegs();
+  unsigned *PhysRegDefs = new unsigned[NumRegs];
+  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+
+  SmallVector<CandidateInfo, 32> Candidates;
+  SmallSet<int, 32> StoredFIs;
+
+  // Walk the entire region, count number of defs for each register, and
+  // collect potential LICM candidates.
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    // Conservatively treat live-in's as an external def.
+    // FIXME: That means a reload that're reused in successor block(s) will not
+    // be LICM'ed.
+    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+           E = BB->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+    }
+
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+    }
+  }
+
+  // Now evaluate whether the potential candidates qualify.
+  // 1. Check if the candidate defined register is defined by another
+  //    instruction in the loop.
+  // 2. If the candidate is a load from stack slot (always true for now),
+  //    check if the slot is stored anywhere in the loop.
+  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+    if (Candidates[i].FI != INT_MIN &&
+        StoredFIs.count(Candidates[i].FI))
+      continue;
+
+    if (PhysRegDefs[Candidates[i].Def] == 1) {
+      bool Safe = true;
+      MachineInstr *MI = Candidates[i].MI;
+      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+        const MachineOperand &MO = MI->getOperand(j);
+        if (!MO.isReg() || MO.isDef() || !MO.getReg())
+          continue;
+        if (PhysRegDefs[MO.getReg()]) {
+          // If it's using a non-loop-invariant register, then it's obviously
+          // not safe to hoist.
+          Safe = false;
+          break;
+        }
+      }
+      if (Safe)
+        HoistPostRA(MI, Candidates[i].Def);
+    }
+  }
+
+  delete[] PhysRegDefs;
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    if (!BB->isLiveIn(Reg))
+      BB->addLiveIn(Reg);
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+        if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+          MO.setIsKill(false);
+      }
+    }
+  }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader) return;
+
+  // Now move the instructions to the predecessor, inserting it before any
+  // terminator instructions.
+  DEBUG({
+      dbgs() << "Hoisting " << *MI;
+      if (Preheader->getBasicBlock())
+        dbgs() << " to MachineBasicBlock "
+               << Preheader->getName();
+      if (MI->getParent()->getBasicBlock())
+        dbgs() << " from MachineBasicBlock "
+               << MI->getParent()->getName();
+      dbgs() << "\n";
+    });
+
+  // Splice the instruction to the preheader.
+  MachineBasicBlock *MBB = MI->getParent();
+  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+
+  // Add register to livein list to all the BBs in the current loop since a 
+  // loop invariant must be kept live throughout the whole loop. This is
+  // important to ensure later passes do not scavenge the def register.
+  AddToLiveIns(Def);
+
+  ++NumPostRAHoisted;
+  Changed = true;
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
+  assert(N != 0 && "Null dominator tree node?");
+  MachineBasicBlock *BB = N->getBlock();
+
+  // If this subregion is not in the top level loop at all, exit.
+  if (!CurLoop->contains(BB)) return;
+
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  if (IsHeader) {
+    // Compute registers which are livein into the loop headers.
+    RegSeen.clear();
+    BackTrace.clear();
+    InitRegPressure(Preheader);
+  }
+
+  // Remember livein register pressure.
+  BackTrace.push_back(RegPressure);
+
+  for (MachineBasicBlock::iterator
+         MII = BB->begin(), E = BB->end(); MII != E; ) {
+    MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+    MachineInstr *MI = &*MII;
+    if (!Hoist(MI, Preheader))
+      UpdateRegPressure(MI);
+    MII = NextMII;
+  }
+
+  // Don't hoist things out of a large switch statement.  This often causes
+  // code to be hoisted that wasn't going to be executed, and increases
+  // register pressure in a situation where it's likely to matter.
+  if (BB->succ_size() < 25) {
+    const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+    for (unsigned I = 0, E = Children.size(); I != E; ++I)
+      HoistRegion(Children[I]);
+  }
+
+  BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+  std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+  // If the preheader has only a single predecessor and it ends with a
+  // fallthrough or an unconditional branch, then scan its predecessor for live
+  // defs as well. This happens whenever the preheader is created by splitting
+  // the critical edge from the loop predecessor to the loop header.
+  if (BB->pred_size() == 1) {
+    MachineBasicBlock *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 4> Cond;
+    if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+      InitRegPressure(*BB->pred_begin());
+  }
+
+  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+       MII != E; ++MII) {
+    MachineInstr *MI = &*MII;
+    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      bool isNew = RegSeen.insert(Reg);
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (MO.isDef())
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      else {
+        bool isKill = isOperandKill(MO, MRI);
+        if (isNew && !isKill)
+          // Haven't seen this, it must be a livein.
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+        else if (!isNew && isKill)
+          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      }
+    }
+  }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  SmallVector<unsigned, 4> Defs;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    bool isNew = RegSeen.insert(Reg);
+    if (MO.isDef())
+      Defs.push_back(Reg);
+    else if (!isNew && isOperandKill(MO, MRI)) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+
+      if (RCCost > RegPressure[RCId])
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= RCCost;
+    }
+  }
+
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.pop_back_val();
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    RegPressure[RCId] += RCCost;
+  }
+}
+
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+  // Check if it's safe to move the instruction.
+  bool DontMoveAcrossStore = true;
+  if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
+    return false;
+  
+  return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+/// 
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+  if (!IsLICMCandidate(I))
+    return false;
+
+  // The instruction is loop invariant if all of its operands are.
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = I.getOperand(i);
+
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    // Don't hoist an instruction that uses or defines a physical register.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI->def_empty(Reg))
+          return false;
+        if (AllocatableSet.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI->def_empty(AliasReg))
+            return false;
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+        // Otherwise it's safe to move.
+        continue;
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
+        return false;
+      } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+        // If the reg is live into the loop, we can't hoist an instruction
+        // which would clobber it.
+        return false;
+      }
+    }
+
+    if (!MO.isUse())
+      continue;
+
+    assert(MRI->getVRegDef(Reg) &&
+           "Machine instr not mapped for this vreg?!");
+
+    // If the loop contains the definition of an operand, then the instruction
+    // isn't loop invariant.
+    if (CurLoop->contains(MRI->getVRegDef(Reg)))
+      return false;
+  }
+
+  // If we got this far, the instruction is loop invariant!
+  return true;
+}
+
+
+/// HasAnyPHIUse - Return true if the specified register is used by any
+/// phi node.
+bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI->isPHI())
+      return true;
+    // Look pass copies as well.
+    if (UseMI->isCopy()) {
+      unsigned Def = UseMI->getOperand(0).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Def) &&
+          HasAnyPHIUse(Def))
+        return true;
+    }
+  }
+  return false;
+}
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+                                        unsigned DefIdx, unsigned Reg) const {
+  if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+    return false;
+
+  for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+         E = MRI->use_nodbg_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (UseMI->isCopyLike())
+      continue;
+    if (!CurLoop->contains(UseMI->getParent()))
+      continue;
+    for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = UseMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (MOReg != Reg)
+        continue;
+
+      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+        return true;
+    }
+
+    // Only look at the first in loop use.
+    break;
+  }
+
+  return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+  if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+    return true;
+  if (!InstrItins || InstrItins->isEmpty())
+    return false;
+
+  bool isCheap = false;
+  unsigned NumDefs = MI.getDesc().getNumDefs();
+  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+    MachineOperand &DefMO = MI.getOperand(i);
+    if (!DefMO.isReg() || !DefMO.isDef())
+      continue;
+    --NumDefs;
+    unsigned Reg = DefMO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+
+    if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+      return false;
+    isCheap = true;
+  }
+
+  return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+       CI != CE; ++CI) {
+    if (CI->second <= 0) 
+      continue;
+
+    unsigned RCId = CI->first;
+    for (unsigned i = BackTrace.size(); i != 0; --i) {
+      SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+      if (RP[RCId] + CI->second >= RegLimit[RCId])
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  // First compute the 'cost' of the instruction, i.e. its contribution
+  // to register pressure.
+  DenseMap<unsigned, int> Cost;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    if (MO.isDef()) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second += RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, RCCost));
+    } else if (isOperandKill(MO, MRI)) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second -= RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, -RCCost));
+    }
+  }
+
+  // Update register pressure of blocks from loop header to current block.
+  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+    SmallVector<unsigned, 8> &RP = BackTrace[i];
+    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+         CI != CE; ++CI) {
+      unsigned RCId = CI->first;
+      RP[RCId] += CI->second;
+    }
+  }
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+  if (MI.isImplicitDef())
+    return true;
+
+  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+  // will increase register pressure. It's probably not worth it if the
+  // instruction is cheap.
+  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
+  // these tend to help performance in low register pressure situation. The
+  // trade off is it may cause spill in high pressure situation. It will end up
+  // adding a store in the loop preheader. But the reload is no more expensive.
+  // The side benefit is these loads are frequently CSE'ed.
+  if (IsCheapInstruction(MI)) {
+    if (!TII->isTriviallyReMaterializable(&MI, AA))
+      return false;
+  } else {
+    // Estimate register pressure to determine whether to LICM the instruction.
+    // In low register pressure situation, we can be more aggressive about 
+    // hoisting. Also, favors hoisting long latency instructions even in
+    // moderately high pressure situation.
+    // FIXME: If there are long latency loop-invariant instructions inside the
+    // loop at this point, why didn't the optimizer's LICM hoist them?
+    DenseMap<unsigned, int> Cost;
+    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      if (MO.isDef()) {
+        if (HasHighOperandLatency(MI, i, Reg)) {
+          ++NumHighLatency;
+          return true;
+        }
+
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second += RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, RCCost));
+      } else if (isOperandKill(MO, MRI)) {
+        // Is a virtual register use is a kill, hoisting it out of the loop
+        // may actually reduce register pressure or be register pressure
+        // neutral.
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second -= RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, -RCCost));
+      }
+    }
+
+    // Visit BBs from header to current BB, if hoisting this doesn't cause
+    // high register pressure, then it's safe to proceed.
+    if (!CanCauseHighRegPressure(Cost)) {
+      ++NumLowRP;
+      return true;
+    }
+
+    // High register pressure situation, only hoist if the instruction is going to
+    // be remat'ed.
+    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+        !MI.isInvariantLoad(AA))
+      return false;
+  }
+
+  // If result(s) of this instruction is used by PHIs outside of the loop, then
+  // don't hoist it if the instruction because it will introduce an extra copy.
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    if (HasAnyPHIUse(MO.getReg()))
+      return false;
+  }
+
+  return true;
+}
+
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+  // Don't unfold simple loads.
+  if (MI->getDesc().canFoldAsLoad())
+    return 0;
+
+  // If not, we may be able to unfold a load and hoist that.
+  // First test whether the instruction is loading from an amenable
+  // memory location.
+  if (!MI->isInvariantLoad(AA))
+    return 0;
+
+  // Next determine the register class for a temporary register.
+  unsigned LoadRegIndex;
+  unsigned NewOpc =
+    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+                                    /*UnfoldLoad=*/true,
+                                    /*UnfoldStore=*/false,
+                                    &LoadRegIndex);
+  if (NewOpc == 0) return 0;
+  const MCInstrDesc &MID = TII->get(NewOpc);
+  if (MID.getNumDefs() != 1) return 0;
+  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
+  // Ok, we're unfolding. Create a temporary register and do the unfold.
+  unsigned Reg = MRI->createVirtualRegister(RC);
+
+  MachineFunction &MF = *MI->getParent()->getParent();
+  SmallVector<MachineInstr *, 2> NewMIs;
+  bool Success =
+    TII->unfoldMemoryOperand(MF, MI, Reg,
+                             /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+                             NewMIs);
+  (void)Success;
+  assert(Success &&
+         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+         "succeeded!");
+  assert(NewMIs.size() == 2 &&
+         "Unfolded a load into multiple instructions!");
+  MachineBasicBlock *MBB = MI->getParent();
+  MBB->insert(MI, NewMIs[0]);
+  MBB->insert(MI, NewMIs[1]);
+  // If unfolding produced a load that wasn't loop-invariant or profitable to
+  // hoist, discard the new instructions and bail.
+  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+    NewMIs[0]->eraseFromParent();
+    NewMIs[1]->eraseFromParent();
+    return 0;
+  }
+
+  // Update register pressure for the unfolded instruction.
+  UpdateRegPressure(NewMIs[1]);
+
+  // Otherwise we successfully unfolded a load that we can hoist.
+  MI->eraseFromParent();
+  return NewMIs[0];
+}
+
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
+    const MachineInstr *MI = &*I;
+    unsigned Opcode = MI->getOpcode();
+    DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+      CI = CSEMap.find(Opcode);
+    if (CI != CSEMap.end())
+      CI->second.push_back(MI);
+    else {
+      std::vector<const MachineInstr*> CSEMIs;
+      CSEMIs.push_back(MI);
+      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+    }
+  }
+}
+
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+                              std::vector<const MachineInstr*> &PrevMIs) {
+  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+    const MachineInstr *PrevMI = PrevMIs[i];
+    if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
+      return PrevMI;
+  }
+  return 0;
+}
+
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+  // the undef property onto uses.
+  if (CI == CSEMap.end() || MI->isImplicitDef())
+    return false;
+
+  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+    // Replace virtual registers defined by MI by their counterparts defined
+    // by Dup.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+
+      // Physical registers may not differ here.
+      assert((!MO.isReg() || MO.getReg() == 0 ||
+              !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+              MO.getReg() == Dup->getOperand(i).getReg()) &&
+             "Instructions with different phys regs are not identical!");
+
+      if (MO.isReg() && MO.isDef() &&
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+        MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+        MRI->clearKillFlags(Dup->getOperand(i).getReg());
+      }
+    }
+    MI->eraseFromParent();
+    ++NumCSEed;
+    return true;
+  }
+  return false;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+  // First check whether we should hoist this instruction.
+  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+    // If not, try unfolding a hoistable load.
+    MI = ExtractHoistableLoad(MI);
+    if (!MI) return false;
+  }
+
+  // Now move the instructions to the predecessor, inserting it before any
+  // terminator instructions.
+  DEBUG({
+      dbgs() << "Hoisting " << *MI;
+      if (Preheader->getBasicBlock())
+        dbgs() << " to MachineBasicBlock "
+               << Preheader->getName();
+      if (MI->getParent()->getBasicBlock())
+        dbgs() << " from MachineBasicBlock "
+               << MI->getParent()->getName();
+      dbgs() << "\n";
+    });
+
+  // If this is the first instruction being hoisted to the preheader,
+  // initialize the CSE map with potential common expressions.
+  if (FirstInLoop) {
+    InitCSEMap(Preheader);
+    FirstInLoop = false;
+  }
+
+  // Look for opportunity to CSE the hoisted instruction.
+  unsigned Opcode = MI->getOpcode();
+  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+    CI = CSEMap.find(Opcode);
+  if (!EliminateCSE(MI, CI)) {
+    // Otherwise, splice the instruction to the preheader.
+    Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+
+    // Update register pressure for BBs from header to this block.
+    UpdateBackTraceRegPressure(MI);
+
+    // Clear the kill flags of any register this instruction defines,
+    // since they may need to be live throughout the entire loop
+    // rather than just live for part of it.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDef() && !MO.isDead())
+        MRI->clearKillFlags(MO.getReg());
+    }
+
+    // Add to the CSE map.
+    if (CI != CSEMap.end())
+      CI->second.push_back(MI);
+    else {
+      std::vector<const MachineInstr*> CSEMIs;
+      CSEMIs.push_back(MI);
+      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+    }
+  }
+
+  ++NumHoisted;
+  Changed = true;
+
+  return true;
+}
+
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+  // Determine the block to which to hoist instructions. If we can't find a
+  // suitable loop predecessor, we can't do any hoisting.
+
+  // If we've tried to get a preheader and failed, don't try again.
+  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+    return 0;
+
+  if (!CurPreheader) {
+    CurPreheader = CurLoop->getLoopPreheader();
+    if (!CurPreheader) {
+      MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+      if (!Pred) {
+        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+        return 0;
+      }
+
+      CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+      if (!CurPreheader) {
+        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+        return 0;
+      }
+    }
+  }
+  return CurPreheader;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 000000000000..189cb2ba5d1d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,83 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG.  Note that
+// the loops identified may actually be several natural loops that share the 
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace llvm {
+#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLB);
+#undef MLB
+#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLIB);
+#undef MLIB
+}
+
+char MachineLoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
+
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+  releaseMemory();
+  LI.Calculate(getAnalysis<MachineDominatorTree>().getBase());    // Update
+  return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+  MachineBasicBlock *TopMBB = getHeader();
+  MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+  if (TopMBB != Begin) {
+    MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
+    while (contains(PriorMBB)) {
+      TopMBB = PriorMBB;
+      if (TopMBB == Begin) break;
+      PriorMBB = prior(MachineFunction::iterator(TopMBB));
+    }
+  }
+  return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+  MachineBasicBlock *BotMBB = getHeader();
+  MachineFunction::iterator End = BotMBB->getParent()->end();
+  if (BotMBB != prior(End)) {
+    MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+    while (contains(NextMBB)) {
+      BotMBB = NextMBB;
+      if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+      NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+    }
+  }
+  return BotMBB;
+}
+
+void MachineLoop::dump() const {
+  print(dbgs());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp b/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 000000000000..17fe67f65045
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<SlotIndexes>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+  releaseMemory();
+  Indexes = &getAnalysis<SlotIndexes>();
+  return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+  DeleteContainerSeconds(Cache);
+  Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+  MachineLoopRange *&Range = Cache[Loop];
+  if (!Range)
+    Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+  return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+                                   MachineLoopRange::Allocator &alloc,
+                                   SlotIndexes &Indexes)
+  : Loop(loop), Intervals(alloc), Area(0) {
+  // Compute loop coverage.
+  for (MachineLoop::block_iterator I = Loop->block_begin(),
+         E = Loop->block_end(); I != E; ++I) {
+    const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+    Intervals.insert(Range.first, Range.second, 1u);
+    Area += Range.first.distance(Range.second);
+  }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+  Map::const_iterator I = Intervals.find(Start);
+  return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+  return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  unsigned na = a->getNumber();
+  unsigned nb = b->getNumber();
+  if (na < nb)
+    return -1;
+  if (na > nb)
+    return 1;
+  return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  if (a->getArea() != b->getArea())
+    return a->getArea() > b->getArea() ? -1 : 1;
+  return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+  OS << "Loop#" << getNumber() << " =";
+  for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+    OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+  MLR.print(OS);
+  return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 000000000000..fadc594efcb2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,567 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+                "Machine Module Information", false, false)
+char MachineModuleInfo::ID = 0;
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr : CallbackVH {
+  MMIAddrLabelMap *Map;
+public:
+  MMIAddrLabelMapCallbackPtr() : Map(0) {}
+  MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+
+  void setPtr(BasicBlock *BB) {
+    ValueHandleBase::operator=(BB);
+  }
+
+  void setMap(MMIAddrLabelMap *map) { Map = map; }
+
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *V2);
+};
+
+class MMIAddrLabelMap {
+  MCContext &Context;
+  struct AddrLabelSymEntry {
+    /// Symbols - The symbols for the label.  This is a pointer union that is
+    /// either one symbol (the common case) or a list of symbols.
+    PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+
+    Function *Fn;   // The containing function of the BasicBlock.
+    unsigned Index; // The index in BBCallbacks for the BasicBlock.
+  };
+
+  DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+  /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
+  /// use this so we get notified if a block is deleted or RAUWd.
+  std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+  /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+  /// whose corresponding BasicBlock got deleted.  These symbols need to be
+  /// emitted at some point in the file, so AsmPrinter emits them after the
+  /// function body.
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+    DeletedAddrLabelsNeedingEmission;
+public:
+
+  MMIAddrLabelMap(MCContext &context) : Context(context) {}
+  ~MMIAddrLabelMap() {
+    assert(DeletedAddrLabelsNeedingEmission.empty() &&
+           "Some labels for deleted blocks never got emitted");
+
+    // Deallocate any of the 'list of symbols' case.
+    for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
+         I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
+      if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
+        delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
+  }
+
+  MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
+  std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+  void takeDeletedSymbolsForFunction(Function *F,
+                                     std::vector<MCSymbol*> &Result);
+
+  void UpdateForDeletedBlock(BasicBlock *BB);
+  void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+  // If we already had an entry for this block, just return it.
+  if (!Entry.Symbols.isNull()) {
+    assert(BB->getParent() == Entry.Fn && "Parent changed");
+    if (Entry.Symbols.is<MCSymbol*>())
+      return Entry.Symbols.get<MCSymbol*>();
+    return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+  }
+
+  // Otherwise, this is a new entry, create a new symbol for it and add an
+  // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+  BBCallbacks.push_back(BB);
+  BBCallbacks.back().setMap(this);
+  Entry.Index = BBCallbacks.size()-1;
+  Entry.Fn = BB->getParent();
+  MCSymbol *Result = Context.CreateTempSymbol();
+  Entry.Symbols = Result;
+  return Result;
+}
+
+std::vector<MCSymbol*>
+MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+  std::vector<MCSymbol*> Result;
+
+  // If we already had an entry for this block, just return it.
+  if (Entry.Symbols.isNull())
+    Result.push_back(getAddrLabelSymbol(BB));
+  else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
+    Result.push_back(Sym);
+  else
+    Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
+  return Result;
+}
+
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+    DeletedAddrLabelsNeedingEmission.find(F);
+
+  // If there are no entries for the function, just return.
+  if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+
+  // Otherwise, take the list.
+  std::swap(Result, I->second);
+  DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+  // If the block got deleted, there is no need for the symbol.  If the symbol
+  // was already emitted, we can just forget about it, otherwise we need to
+  // queue it up for later emission when the function is output.
+  AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+  AddrLabelSymbols.erase(BB);
+  assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+  BBCallbacks[Entry.Index] = 0;  // Clear the callback.
+
+  assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+         "Block/parent mismatch");
+
+  // Handle both the single and the multiple symbols cases.
+  if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+    if (Sym->isDefined())
+      return;
+
+    // If the block is not yet defined, we need to emit it at the end of the
+    // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+    // for the containing Function.  Since the block is being deleted, its
+    // parent may already be removed, we have to get the function from 'Entry'.
+    DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+  } else {
+    std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
+
+    for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
+      MCSymbol *Sym = (*Syms)[i];
+      if (Sym->isDefined()) continue;  // Ignore already emitted labels.
+
+      // If the block is not yet defined, we need to emit it at the end of the
+      // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+      // for the containing Function.  Since the block is being deleted, its
+      // parent may already be removed, we have to get the function from
+      // 'Entry'.
+      DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+    }
+
+    // The entry is deleted, free the memory associated with the symbol list.
+    delete Syms;
+  }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+  // Get the entry for the RAUW'd block and remove it from our map.
+  AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+  AddrLabelSymbols.erase(Old);
+  assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+
+  AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+  // If New is not address taken, just move our symbol over to it.
+  if (NewEntry.Symbols.isNull()) {
+    BBCallbacks[OldEntry.Index].setPtr(New);    // Update the callback.
+    NewEntry = OldEntry;     // Set New's entry.
+    return;
+  }
+
+  BBCallbacks[OldEntry.Index] = 0;    // Update the callback.
+
+  // Otherwise, we need to add the old symbol to the new block's set.  If it is
+  // just a single entry, upgrade it to a symbol list.
+  if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
+    SymList->push_back(PrevSym);
+    NewEntry.Symbols = SymList;
+  }
+
+  std::vector<MCSymbol*> *SymList =
+    NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
+
+  // If the old entry was a single symbol, add it.
+  if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    SymList->push_back(Sym);
+    return;
+  }
+
+  // Otherwise, concatenate the list.
+  std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
+  SymList->insert(SymList->end(), Syms->begin(), Syms->end());
+  delete Syms;
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+  Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+  Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+                                     const TargetAsmInfo *TAI)
+: ImmutablePass(ID), Context(MAI, TAI),
+  ObjFileMMI(0),
+  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
+  CallsExternalVAFunctionWithFloatingPointArguments(false) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+  // Always emit some info, by default "no personality" info.
+  Personalities.push_back(NULL);
+  AddrLabelSymbols = 0;
+  TheModule = 0;
+}
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
+  assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
+         "should always be explicitly constructed by LLVMTargetMachine");
+  abort();
+}
+
+MachineModuleInfo::~MachineModuleInfo() {
+  delete ObjFileMMI;
+
+  // FIXME: Why isn't doFinalization being called??
+  //assert(AddrLabelSymbols == 0 && "doFinalization not called");
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+  assert(AddrLabelSymbols == 0 && "Improperly initialized");
+  return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
+  return false;
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+  // Clean up frame info.
+  FrameMoves.clear();
+
+  // Clean up exception info.
+  LandingPads.clear();
+  CallSiteMap.clear();
+  TypeInfos.clear();
+  FilterIds.clear();
+  FilterEnds.clear();
+  CallsEHReturn = 0;
+  CallsUnwindInit = 0;
+  VariableDbgInfo.clear();
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
+  // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+  // UsedFunctions.
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  if (!GV || !GV->hasInitializer()) return;
+
+  // Should be an array of 'i8*'.
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (InitList == 0) return;
+
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (const Function *F =
+          dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+      UsedFunctions.insert(F);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+
+/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+/// block when its address is taken.  This cannot be its normal LBB label
+/// because the block may be accessed outside its containing function.
+MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+  return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
+}
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken.  If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+std::vector<MCSymbol*> MachineModuleInfo::
+getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it.  This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+                              std::vector<MCSymbol*> &Result) {
+  // If no blocks have had their addresses taken, we're done.
+  if (AddrLabelSymbols == 0) return;
+  return AddrLabelSymbols->
+     takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+    (MachineBasicBlock *LandingPad) {
+  unsigned N = LandingPads.size();
+  for (unsigned i = 0; i < N; ++i) {
+    LandingPadInfo &LP = LandingPads[i];
+    if (LP.LandingPadBlock == LandingPad)
+      return LP;
+  }
+
+  LandingPads.push_back(LandingPadInfo(LandingPad));
+  return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+                                  MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.BeginLabels.push_back(BeginLabel);
+  LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+  MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.LandingPadLabel = LandingPadLabel;
+  return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+                                       const Function *Personality) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.Personality = Personality;
+
+  for (unsigned i = 0; i < Personalities.size(); ++i)
+    if (Personalities[i] == Personality)
+      return;
+
+  // If this is the first personality we're adding go
+  // ahead and add it at the beginning.
+  if (Personalities[0] == NULL)
+    Personalities[0] = Personality;
+  else
+    Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+                                  std::vector<const GlobalVariable *> &TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  for (unsigned N = TyInfo.size(); N; --N)
+    LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+                                  std::vector<const GlobalVariable *> &TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  std::vector<unsigned> IdsInFilter(TyInfo.size());
+  for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+    IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+  LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+  for (unsigned i = 0; i != LandingPads.size(); ) {
+    LandingPadInfo &LandingPad = LandingPads[i];
+    if (LandingPad.LandingPadLabel &&
+        !LandingPad.LandingPadLabel->isDefined() &&
+        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+      LandingPad.LandingPadLabel = 0;
+
+    // Special case: we *should* emit LPs with null LP MBB. This indicates
+    // "nounwind" case.
+    if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+      MCSymbol *EndLabel = LandingPad.EndLabels[j];
+      if ((BeginLabel->isDefined() ||
+           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+          (EndLabel->isDefined() ||
+           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+      --j, --e;
+    }
+
+    // Remove landing pads with no try-ranges.
+    if (LandingPads[i].BeginLabels.empty()) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    // If there is no landing pad, ensure that the list of typeids is empty.
+    // If the only typeid is a cleanup, this is the same as having no typeids.
+    if (!LandingPad.LandingPadBlock ||
+        (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+      LandingPad.TypeIds.clear();
+    ++i;
+  }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo.  This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
+  for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+    if (TypeInfos[i] == TI) return i + 1;
+
+  TypeInfos.push_back(TI);
+  return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos.  This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+  // If the new filter coincides with the tail of an existing filter, then
+  // re-use the existing filter.  Folding filters more than this requires
+  // re-ordering filters and/or their elements - probably not worth it.
+  for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+       E = FilterEnds.end(); I != E; ++I) {
+    unsigned i = *I, j = TyIds.size();
+
+    while (i && j)
+      if (FilterIds[--i] != TyIds[--j])
+        goto try_next;
+
+    if (!j)
+      // The new filter coincides with range [i, end) of the existing filter.
+      return -(1 + i);
+
+try_next:;
+  }
+
+  // Add the new filter.
+  int FilterID = -(1 + FilterIds.size());
+  FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+  for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+    FilterIds.push_back(TyIds[I]);
+  FilterEnds.push_back(FilterIds.size());
+  FilterIds.push_back(0); // terminator
+  return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+const Function *MachineModuleInfo::getPersonality() const {
+  // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+  // function
+  return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL/first personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+  const Function* Personality = NULL;
+
+  // Scan landing pads. If there is at least one non-NULL personality - use it.
+  for (unsigned i = 0; i != LandingPads.size(); ++i)
+    if (LandingPads[i].Personality) {
+      Personality = LandingPads[i].Personality;
+      break;
+    }
+
+  for (unsigned i = 0; i < Personalities.size(); ++i) {
+    if (Personalities[i] == Personality)
+      return i;
+  }
+
+  // This will happen if the current personality function is
+  // in the zero index.
+  return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 000000000000..5ab56c09f5f6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::Anchor() {}
+void MachineModuleInfoELF::Anchor() {}
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+  typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+  const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+  const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
+  return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+                                      MachineModuleInfoImpl::StubValueTy>&Map) {
+  MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
+  if (!List.empty())
+    qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+  return List;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 000000000000..9f4ef1287803
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+  Node->setNext(List);
+  List = Node;
+  if (Listener) Listener->NotifyAdd(Node->getName(),
+                                    Node->getCtor(),
+                                    Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+  for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+    if (*I == Node) {
+      if (Listener) Listener->NotifyRemove(Node->getName());
+      *I = (*I)->getNext();
+      break;
+    }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 000000000000..4b3e64c25f60
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,229 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+  VRegInfo.reserve(256);
+  RegAllocHints.reserve(256);
+  UsedPhysRegs.resize(TRI.getNumRegs());
+  
+  // Create the physreg use/def lists.
+  PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+  memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+           "Vreg use list non-empty still?");
+  for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+    assert(!PhysRegUseDefLists[i] &&
+           "PhysRegUseDefLists has entries after all instructions are deleted");
+#endif
+  delete [] PhysRegUseDefLists;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+  VRegInfo[Reg].first = RC;
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+                                       const TargetRegisterClass *RC) {
+  const TargetRegisterClass *OldRC = getRegClass(Reg);
+  if (OldRC == RC)
+    return RC;
+  const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
+  if (!NewRC)
+    return 0;
+  if (NewRC != OldRC)
+    setRegClass(Reg, NewRC);
+  return NewRC;
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+  assert(RegClass && "Cannot create register without RegClass!");
+  assert(RegClass->isAllocatable() &&
+         "Virtual register RegClass must be allocatable.");
+
+  // New virtual register number.
+  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
+  // Add a reg, but keep track of whether the vector reallocated or not.
+  const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+  void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+  VRegInfo.grow(Reg);
+  VRegInfo[Reg].first = RegClass;
+  RegAllocHints.grow(Reg);
+
+  if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
+    // The vector reallocated, handle this now.
+    HandleVRegListReallocation();
+  return Reg;
+}
+
+/// HandleVRegListReallocation - We just added a virtual register to the
+/// VRegInfo info list and it reallocated.  Update the use/def lists info
+/// pointers.
+void MachineRegisterInfo::HandleVRegListReallocation() {
+  // The back pointers for the vreg lists point into the previous vector.
+  // Update them to point to their correct slots.
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    MachineOperand *List = VRegInfo[Reg].second;
+    if (!List) continue;
+    // Update the back-pointer to be accurate once more.
+    List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+  }
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+  assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+  // TODO: This could be more efficient by bulk changing the operands.
+  for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+    MachineOperand &O = I.getOperand();
+    ++I;
+    O.setReg(ToReg);
+  }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found.  This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+  // Since we are in SSA form, we can use the first definition.
+  if (!def_empty(Reg))
+    return &*def_begin(Reg);
+  return 0;
+}
+
+bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
+  use_iterator UI = use_begin(RegNo);
+  if (UI == use_end())
+    return false;
+  return ++UI == use_end();
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+  use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+  if (UI == use_nodbg_end())
+    return false;
+  return ++UI == use_nodbg_end();
+}
+
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+  for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
+    UI.getOperand().setIsKill(false);
+}
+
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == Reg || I->second == Reg)
+      return true;
+  return false;
+}
+
+bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
+  for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+    if (*I == Reg)
+      return true;
+  return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->second == VReg)
+      return I->first;
+  return 0;
+}
+
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == PReg)
+      return I->second;
+  return 0;
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                                      const TargetRegisterInfo &TRI,
+                                      const TargetInstrInfo &TII) {
+  // Emit the copies into the top of the block.
+  for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+    if (LiveIns[i].second) {
+      if (use_empty(LiveIns[i].second)) {
+        // The livein has no uses. Drop it.
+        //
+        // It would be preferable to have isel avoid creating live-in
+        // records for unused arguments in the first place, but it's
+        // complicated by the debug info code for arguments.
+        LiveIns.erase(LiveIns.begin() + i);
+        --i; --e;
+      } else {
+        // Emit a copy.
+        BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+                TII.get(TargetOpcode::COPY), LiveIns[i].second)
+          .addReg(LiveIns[i].first);
+
+        // Add the register to the entry block live-in set.
+        EntryMBB->addLiveIn(LiveIns[i].first);
+      }
+    } else {
+      // Add the register to the entry block live-in set.
+      EntryMBB->addLiveIn(LiveIns[i].first);
+    }
+}
+
+void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
+  for (int i = UsedPhysRegs.find_first(); i >= 0;
+       i = UsedPhysRegs.find_next(i))
+         for (const unsigned *SS = TRI.getSubRegisters(i);
+              unsigned SubReg = *SS; ++SS)
+           if (SubReg > unsigned(i))
+             UsedPhysRegs.set(SubReg);
+}
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+  for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+    I.getOperand().getParent()->dump();
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 000000000000..84d6df25397c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,372 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+                                     SmallVectorImpl<MachineInstr*> *NewPHI)
+  : AV(0), InsertedPHIs(NewPHI) {
+  TII = MF.getTarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+  delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates.  ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+
+  VR = V;
+  VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+  return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+          SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+  if (BB->empty())
+    return 0;
+
+  MachineBasicBlock::iterator I = BB->front();
+  if (!I->isPHI())
+    return 0;
+
+  AvailableValsTy AVals;
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    AVals[PredValues[i].first] = PredValues[i].second;
+  while (I != BB->end() && I->isPHI()) {
+    bool Same = true;
+    for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+      unsigned SrcReg = I->getOperand(i).getReg();
+      MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+      if (AVals[SrcBB] != SrcReg) {
+        Same = false;
+        break;
+      }
+    }
+    if (Same)
+      return I->getOperand(0).getReg();
+    ++I;
+  }
+  return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstr *InsertNewDef(unsigned Opcode,
+                           MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+                           const TargetRegisterClass *RC,
+                           MachineRegisterInfo *MRI,
+                           const TargetInstrInfo *TII) {
+  unsigned NewVR = MRI->createVirtualRegister(RC);
+  return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!HasValueForBlock(BB))
+    return GetValueAtEndOfBlockInternal(BB);
+
+  // If there are no predecessors, just return undef.
+  if (BB->pred_empty()) {
+    // Insert an implicit_def to represent an undef value.
+    MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+                                        BB, BB->getFirstTerminator(),
+                                        VRC, MRI, TII);
+    return NewDef->getOperand(0).getReg();
+  }
+
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+  unsigned SingularValue = 0;
+
+  bool isFirstPred = true;
+  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+         E = BB->pred_end(); PI != E; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+    unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+    PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+    // Compute SingularValue.
+    if (isFirstPred) {
+      SingularValue = PredVal;
+      isFirstPred = false;
+    } else if (PredVal != SingularValue)
+      SingularValue = 0;
+  }
+
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+
+  // If an identical PHI is already in BB, just reuse it.
+  unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+  if (DupPHI)
+    return DupPHI;
+
+  // Otherwise, we do need a PHI: insert one now.
+  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+  MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+                                           Loc, VRC, MRI, TII);
+
+  // Fill in all the predecessors of the PHI.
+  MachineInstrBuilder MIB(InsertedPHI);
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+    InsertedPHI->eraseFromParent();
+    return ConstVal;
+  }
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+                                         MachineOperand *U) {
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+    if (&MI->getOperand(i) == U)
+      return MI->getOperand(i+1).getMBB();
+  }
+
+  llvm_unreachable("MachineOperand::getParent() failure?");
+  return 0;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+  MachineInstr *UseMI = U.getParent();
+  unsigned NewVR = 0;
+  if (UseMI->isPHI()) {
+    MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+    NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+  } else {
+    NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+  }
+
+  U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+  MRI->replaceRegWith(OldReg, NewReg);
+
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+         I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+    if (I->second == OldReg)
+      I->second = NewReg;
+}
+
+/// MachinePHIiter - Iterator for PHI operands.  This is used for the
+/// PHI_iterator in the SSAUpdaterImpl template.
+namespace {
+  class MachinePHIiter {
+  private:
+    MachineInstr *PHI;
+    unsigned idx;
+ 
+  public:
+    explicit MachinePHIiter(MachineInstr *P) // begin iterator
+      : PHI(P), idx(1) {}
+    MachinePHIiter(MachineInstr *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumOperands()) {}
+
+    MachinePHIiter &operator++() { idx += 2; return *this; } 
+    bool operator==(const MachinePHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const MachinePHIiter& x) const { return !operator==(x); }
+    unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+    MachineBasicBlock *getIncomingBlock() {
+      return PHI->getOperand(idx+1).getMBB();
+    }
+  };
+}
+
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+  typedef MachineBasicBlock BlkT;
+  typedef unsigned ValT;
+  typedef MachineInstr PhiT;
+
+  typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+  typedef MachinePHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
+  }
+
+  /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+  /// vector.
+  static void FindPredecessorBlocks(MachineBasicBlock *BB,
+                                    SmallVectorImpl<MachineBasicBlock*> *Preds){
+    for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+           E = BB->pred_end(); PI != E; ++PI)
+      Preds->push_back(*PI);
+  }
+
+  /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+  /// Add it into the specified block and return the register.
+  static unsigned GetUndefVal(MachineBasicBlock *BB,
+                              MachineSSAUpdater *Updater) {
+    // Insert an implicit_def to represent an undef value.
+    MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+                                        BB, BB->getFirstTerminator(),
+                                        Updater->VRC, Updater->MRI,
+                                        Updater->TII);
+    return NewDef->getOperand(0).getReg();
+  }
+
+  /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+  /// Add it into the specified block and return the register.
+  static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+                                 MachineSSAUpdater *Updater) {
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+                                     Updater->VRC, Updater->MRI,
+                                     Updater->TII);
+    return PHI->getOperand(0).getReg();
+  }
+
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+                            MachineBasicBlock *Pred) {
+    PHI->addOperand(MachineOperand::CreateReg(Val, false));
+    PHI->addOperand(MachineOperand::CreateMBB(Pred));
+  }
+
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static MachineInstr *InstrIsPHI(MachineInstr *I) {
+    if (I && I->isPHI())
+      return I;
+    return 0;
+  }
+
+  /// ValueIsPHI - Check if the instruction that defines the specified register
+  /// is a PHI instruction.
+  static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+    return InstrIsPHI(Updater->MRI->getVRegDef(Val));
+  }
+
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+    MachineInstr *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumOperands() <= 1)
+      return PHI;
+    return 0;
+  }
+
+  /// GetPHIValue - For the specified PHI instruction, return the register
+  /// that it defines.
+  static unsigned GetPHIValue(MachineInstr *PHI) {
+    return PHI->getOperand(0).getReg();
+  }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (unsigned V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 000000000000..916dff70a41e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,610 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool> 
+SplitEdges("machine-sink-split",
+           cl::desc("Split critical edges during machine sinking"),
+           cl::init(true), cl::Hidden);
+
+STATISTIC(NumSunk,      "Number of machine instructions sunk");
+STATISTIC(NumSplit,     "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+
+namespace {
+  class MachineSinking : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo  *MRI;  // Machine register information
+    MachineDominatorTree *DT;   // Machine dominator tree
+    MachineLoopInfo *LI;
+    AliasAnalysis *AA;
+    BitVector AllocatableSet;   // Which physregs are allocatable?
+
+    // Remember which edges have been considered for breaking.
+    SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+    CEBCandidates;
+
+  public:
+    static char ID; // Pass identification
+    MachineSinking() : MachineFunctionPass(ID) {
+      initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addPreserved<MachineLoopInfo>();
+    }
+
+    virtual void releaseMemory() {
+      CEBCandidates.clear();
+    }
+
+  private:
+    bool ProcessBlock(MachineBasicBlock &MBB);
+    bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                     MachineBasicBlock *From,
+                                     MachineBasicBlock *To);
+    MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+                                         MachineBasicBlock *From,
+                                         MachineBasicBlock *To,
+                                         bool BreakPHIEdge);
+    bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+    bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+                                 MachineBasicBlock *DefMBB,
+                                 bool &BreakPHIEdge, bool &LocalUse) const;
+    bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                         MachineBasicBlock *MBB);
+  };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
+
+FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                                     MachineBasicBlock *MBB) {
+  if (!MI->isCopy())
+    return false;
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+      !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+      !MRI->hasOneNonDBGUse(SrcReg))
+    return false;
+
+  const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+  const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+  if (SRC != DRC)
+    return false;
+
+  MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+  if (DefMI->isCopyLike())
+    return false;
+  DEBUG(dbgs() << "Coalescing: " << *DefMI);
+  DEBUG(dbgs() << "*** to: " << *MI);
+  MRI->replaceRegWith(DstReg, SrcReg);
+  MI->eraseFromParent();
+  ++NumCoalesces;
+  return true;
+}
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+                                        MachineBasicBlock *MBB,
+                                        MachineBasicBlock *DefMBB,
+                                        bool &BreakPHIEdge,
+                                        bool &LocalUse) const {
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "Only makes sense for vregs");
+
+  if (MRI->use_nodbg_empty(Reg))
+    return true;
+
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+
+  // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+  // into and they are all PHI nodes. In this case, machine-sink must break
+  // the critical edge first. e.g.
+  //
+  // BB#1: derived from LLVM BB %bb4.preheader
+  //   Predecessors according to CFG: BB#0
+  //     ...
+  //     %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+  //     ...
+  //     JE_4 <BB#37>, %EFLAGS<imp-use>
+  //   Successors according to CFG: BB#37 BB#2
+  //
+  // BB#2: derived from LLVM BB %bb.nph
+  //   Predecessors according to CFG: BB#0 BB#1
+  //     %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+  BreakPHIEdge = true;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (!(UseBlock == MBB && UseInst->isPHI() &&
+          UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+      BreakPHIEdge = false;
+      break;
+    }
+  }
+  if (BreakPHIEdge)
+    return true;
+
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    // Determine the block of the use.
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (UseInst->isPHI()) {
+      // PHI nodes use the operand in the predecessor block, not the block with
+      // the PHI.
+      UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+    } else if (UseBlock == DefMBB) {
+      LocalUse = true;
+      return false;
+    }
+
+    // Check that it dominates.
+    if (!DT->dominates(MBB, UseBlock))
+      return false;
+  }
+
+  return true;
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+  const TargetMachine &TM = MF.getTarget();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<MachineLoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+  AllocatableSet = TRI->getAllocatableSet(MF);
+
+  bool EverMadeChange = false;
+
+  while (1) {
+    bool MadeChange = false;
+
+    // Process all basic blocks.
+    CEBCandidates.clear();
+    for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+         I != E; ++I)
+      MadeChange |= ProcessBlock(*I);
+
+    // If this iteration over the code changed anything, keep iterating.
+    if (!MadeChange) break;
+    EverMadeChange = true;
+  }
+  return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+  // Can't sink anything out of a block that has less than two successors.
+  if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+  // Don't bother sinking code out of unreachable blocks. In addition to being
+  // unprofitable, it can also lead to infinite looping, because in an
+  // unreachable loop there may be nowhere to stop.
+  if (!DT->isReachableFromEntry(&MBB)) return false;
+
+  bool MadeChange = false;
+
+  // Walk the basic block bottom-up.  Remember if we saw a store.
+  MachineBasicBlock::iterator I = MBB.end();
+  --I;
+  bool ProcessedBegin, SawStore = false;
+  do {
+    MachineInstr *MI = I;  // The instruction to sink.
+
+    // Predecrement I (if it's not begin) so that it isn't invalidated by
+    // sinking.
+    ProcessedBegin = I == MBB.begin();
+    if (!ProcessedBegin)
+      --I;
+
+    if (MI->isDebugValue())
+      continue;
+
+    bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+    if (Joined) {
+      MadeChange = true;
+      continue;
+    }
+
+    if (SinkInstruction(MI, SawStore))
+      ++NumSunk, MadeChange = true;
+
+    // If we just processed the first instruction in the block, we're done.
+  } while (!ProcessedBegin);
+
+  return MadeChange;
+}
+
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                                 MachineBasicBlock *From,
+                                                 MachineBasicBlock *To) {
+  // FIXME: Need much better heuristics.
+
+  // If the pass has already considered breaking this edge (during this pass
+  // through the function), then let's go ahead and break it. This means
+  // sinking multiple "cheap" instructions into the same block.
+  if (!CEBCandidates.insert(std::make_pair(From, To)))
+    return true;
+
+  if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+    return true;
+
+  // MI is cheap, we probably don't want to break the critical edge for it.
+  // However, if this would allow some definitions of its source operands
+  // to be sunk then it's probably worth it.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (MRI->hasOneNonDBGUse(Reg))
+      return true;
+  }
+
+  return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+                                                     MachineBasicBlock *FromBB,
+                                                     MachineBasicBlock *ToBB,
+                                                     bool BreakPHIEdge) {
+  if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+    return 0;
+
+  // Avoid breaking back edge. From == To means backedge for single BB loop.
+  if (!SplitEdges || FromBB == ToBB)
+    return 0;
+
+  // Check for backedges of more "complex" loops.
+  if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+      LI->isLoopHeader(ToBB))
+    return 0;
+
+  // It's not always legal to break critical edges and sink the computation
+  // to the edge.
+  //
+  // BB#1:
+  // v1024
+  // Beq BB#3
+  // <fallthrough>
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+  //
+  // BB#1:
+  // ...
+  // Bne BB#2
+  // BB#4:
+  // v1024 =
+  // B BB#3
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+  // flow. We need to ensure the new basic block where the computation is
+  // sunk to dominates all the uses.
+  // It's only legal to break critical edge and sink the computation to the
+  // new block if all the predecessors of "To", except for "From", are
+  // not dominated by "From". Given SSA property, this means these
+  // predecessors are dominated by "To".
+  //
+  // There is no need to do this check if all the uses are PHI nodes. PHI
+  // sources are only defined on the specific predecessor edges.
+  if (!BreakPHIEdge) {
+    for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+           E = ToBB->pred_end(); PI != E; ++PI) {
+      if (*PI == FromBB)
+        continue;
+      if (!DT->dominates(ToBB, *PI))
+        return 0;
+    }
+  }
+
+  return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+  return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+  // be close to the source to make it easier to coalesce.
+  if (AvoidsSinking(MI, MRI))
+    return false;
+
+  // Check if it's safe to move the instruction.
+  if (!MI->isSafeToMove(TII, AA, SawStore))
+    return false;
+
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+
+  // Loop over all the operands of the specified instruction.  If there is
+  // anything we can't handle, bail out.
+  MachineBasicBlock *ParentBlock = MI->getParent();
+
+  // SuccToSinkTo - This is the successor to sink this instruction to, once we
+  // decide.
+  MachineBasicBlock *SuccToSinkTo = 0;
+
+  bool BreakPHIEdge = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;  // Ignore non-register operands.
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI->def_empty(Reg))
+          return false;
+
+        if (AllocatableSet.test(Reg))
+          return false;
+
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI->def_empty(AliasReg))
+            return false;
+
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
+        return false;
+      }
+    } else {
+      // Virtual register uses are always safe to sink.
+      if (MO.isUse()) continue;
+
+      // If it's not safe to move defs of the register class, then abort.
+      if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
+        return false;
+
+      // FIXME: This picks a successor to sink into based on having one
+      // successor that dominates all the uses.  However, there are cases where
+      // sinking can happen but where the sink point isn't a successor.  For
+      // example:
+      //
+      //   x = computation
+      //   if () {} else {}
+      //   use x
+      //
+      // the instruction could be sunk over the whole diamond for the
+      // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+      // after that.
+
+      // Virtual register defs can only be sunk if all their uses are in blocks
+      // dominated by one of the successors.
+      if (SuccToSinkTo) {
+        // If a previous operand picked a block to sink to, then this operand
+        // must be sinkable to the same block.
+        bool LocalUse = false;
+        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+                                     BreakPHIEdge, LocalUse))
+          return false;
+
+        continue;
+      }
+
+      // Otherwise, we should look at all the successors and decide which one
+      // we should sink to.
+      for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
+           E = ParentBlock->succ_end(); SI != E; ++SI) {
+        bool LocalUse = false;
+        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+                                    BreakPHIEdge, LocalUse)) {
+          SuccToSinkTo = *SI;
+          break;
+        }
+        if (LocalUse)
+          // Def is used locally, it's never safe to move this def.
+          return false;
+      }
+
+      // If we couldn't find a block to sink to, ignore this instruction.
+      if (SuccToSinkTo == 0)
+        return false;
+    }
+  }
+
+  // If there are no outputs, it must have side-effects.
+  if (SuccToSinkTo == 0)
+    return false;
+
+  // It's not safe to sink instructions to EH landing pad. Control flow into
+  // landing pad is implicitly defined.
+  if (SuccToSinkTo->isLandingPad())
+    return false;
+
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (MI->getParent() == SuccToSinkTo)
+    return false;
+
+  // If the instruction to move defines a dead physical register which is live
+  // when leaving the basic block, don't move it because it could turn into a
+  // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    if (SuccToSinkTo->isLiveIn(Reg))
+      return false;
+  }
+
+  DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+
+  // If the block has multiple predecessors, this would introduce computation on
+  // a path that it doesn't already exist.  We could split the critical edge,
+  // but for now we just punt.
+  if (SuccToSinkTo->pred_size() > 1) {
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    bool TryBreak = false;
+    bool store = true;
+    if (!MI->isSafeToMove(TII, AA, store)) {
+      DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+      TryBreak = true;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+      TryBreak = true;
+    }
+
+    // Don't sink instructions into a loop.
+    if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+      TryBreak = true;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    if (!TryBreak)
+      DEBUG(dbgs() << "Sinking along critical edge.\n");
+    else {
+      MachineBasicBlock *NewSucc =
+        SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+      if (!NewSucc) {
+        DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+                        "break critical edge\n");
+        return false;
+      } else {
+        DEBUG(dbgs() << " *** Splitting critical edge:"
+              " BB#" << ParentBlock->getNumber()
+              << " -- BB#" << NewSucc->getNumber()
+              << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+        SuccToSinkTo = NewSucc;
+        ++NumSplit;
+        BreakPHIEdge = false;
+      }
+    }
+  }
+
+  if (BreakPHIEdge) {
+    // BreakPHIEdge is true if all the uses are in the successor MBB being
+    // sunken into and they are all PHI nodes. In this case, machine-sink must
+    // break the critical edge first.
+    MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+                                                   SuccToSinkTo, BreakPHIEdge);
+    if (!NewSucc) {
+      DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+            "break critical edge\n");
+      return false;
+    }
+
+    DEBUG(dbgs() << " *** Splitting critical edge:"
+          " BB#" << ParentBlock->getNumber()
+          << " -- BB#" << NewSucc->getNumber()
+          << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+    SuccToSinkTo = NewSucc;
+    ++NumSplit;
+  }
+
+  // Determine where to insert into. Skip phi nodes.
+  MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+  while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
+    ++InsertPos;
+
+  // Move the instruction.
+  SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+                       ++MachineBasicBlock::iterator(MI));
+
+  // Conservatively, clear any kill flags, since it's possible that they are no
+  // longer correct.
+  MI->clearKillInfo();
+
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 000000000000..7a55852a1315
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,1218 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct MachineVerifier {
+
+    MachineVerifier(Pass *pass, const char *b) :
+      PASS(pass),
+      Banner(b),
+      OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+      {}
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    Pass *const PASS;
+    const char *Banner;
+    const char *const OutFileName;
+    raw_ostream *OS;
+    const MachineFunction *MF;
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const MachineRegisterInfo *MRI;
+
+    unsigned foundErrors;
+
+    typedef SmallVector<unsigned, 16> RegVector;
+    typedef DenseSet<unsigned> RegSet;
+    typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+    BitVector regsReserved;
+    RegSet regsLive;
+    RegVector regsDefined, regsDead, regsKilled;
+    RegSet regsLiveInButUnused;
+
+    SlotIndex lastIndex;
+
+    // Add Reg and any sub-registers to RV
+    void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+      RV.push_back(Reg);
+      if (TargetRegisterInfo::isPhysicalRegister(Reg))
+        for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+          RV.push_back(*R);
+    }
+
+    struct BBInfo {
+      // Is this MBB reachable from the MF entry point?
+      bool reachable;
+
+      // Vregs that must be live in because they are used without being
+      // defined. Map value is the user.
+      RegMap vregsLiveIn;
+
+      // Regs killed in MBB. They may be defined again, and will then be in both
+      // regsKilled and regsLiveOut.
+      RegSet regsKilled;
+
+      // Regs defined in MBB and live out. Note that vregs passing through may
+      // be live out without being mentioned here.
+      RegSet regsLiveOut;
+
+      // Vregs that pass through MBB untouched. This set is disjoint from
+      // regsKilled and regsLiveOut.
+      RegSet vregsPassed;
+
+      // Vregs that must pass through MBB because they are needed by a successor
+      // block. This set is disjoint from regsLiveOut.
+      RegSet vregsRequired;
+
+      BBInfo() : reachable(false) {}
+
+      // Add register to vregsPassed if it belongs there. Return true if
+      // anything changed.
+      bool addPassed(unsigned Reg) {
+        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+          return false;
+        if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+          return false;
+        return vregsPassed.insert(Reg).second;
+      }
+
+      // Same for a full set.
+      bool addPassed(const RegSet &RS) {
+        bool changed = false;
+        for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+          if (addPassed(*I))
+            changed = true;
+        return changed;
+      }
+
+      // Add register to vregsRequired if it belongs there. Return true if
+      // anything changed.
+      bool addRequired(unsigned Reg) {
+        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+          return false;
+        if (regsLiveOut.count(Reg))
+          return false;
+        return vregsRequired.insert(Reg).second;
+      }
+
+      // Same for a full set.
+      bool addRequired(const RegSet &RS) {
+        bool changed = false;
+        for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+          if (addRequired(*I))
+            changed = true;
+        return changed;
+      }
+
+      // Same for a full map.
+      bool addRequired(const RegMap &RM) {
+        bool changed = false;
+        for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+          if (addRequired(I->first))
+            changed = true;
+        return changed;
+      }
+
+      // Live-out registers are either in regsLiveOut or vregsPassed.
+      bool isLiveOut(unsigned Reg) const {
+        return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+      }
+    };
+
+    // Extra register info per MBB.
+    DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+    bool isReserved(unsigned Reg) {
+      return Reg < regsReserved.size() && regsReserved.test(Reg);
+    }
+
+    // Analysis information if available
+    LiveVariables *LiveVars;
+    LiveIntervals *LiveInts;
+    LiveStacks *LiveStks;
+    SlotIndexes *Indexes;
+
+    void visitMachineFunctionBefore();
+    void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+    void visitMachineInstrBefore(const MachineInstr *MI);
+    void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+    void visitMachineInstrAfter(const MachineInstr *MI);
+    void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+    void visitMachineFunctionAfter();
+
+    void report(const char *msg, const MachineFunction *MF);
+    void report(const char *msg, const MachineBasicBlock *MBB);
+    void report(const char *msg, const MachineInstr *MI);
+    void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+    void markReachable(const MachineBasicBlock *MBB);
+    void calcRegsPassed();
+    void checkPHIOps(const MachineBasicBlock *MBB);
+
+    void calcRegsRequired();
+    void verifyLiveVariables();
+    void verifyLiveIntervals();
+  };
+
+  struct MachineVerifierPass : public MachineFunctionPass {
+    static char ID; // Pass ID, replacement for typeid
+    const char *const Banner;
+
+    MachineVerifierPass(const char *b = 0)
+      : MachineFunctionPass(ID), Banner(b) {
+        initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+      }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) {
+      MF.verify(this, Banner);
+      return false;
+    }
+  };
+
+}
+
+char MachineVerifierPass::ID = 0;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+                "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+  return new MachineVerifierPass(Banner);
+}
+
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+  MachineVerifier(p, Banner)
+    .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+  raw_ostream *OutFile = 0;
+  if (OutFileName) {
+    std::string ErrorInfo;
+    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+                                 raw_fd_ostream::F_Append);
+    if (!ErrorInfo.empty()) {
+      errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+      exit(1);
+    }
+
+    OS = OutFile;
+  } else {
+    OS = &errs();
+  }
+
+  foundErrors = 0;
+
+  this->MF = &MF;
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
+  MRI = &MF.getRegInfo();
+
+  LiveVars = NULL;
+  LiveInts = NULL;
+  LiveStks = NULL;
+  Indexes = NULL;
+  if (PASS) {
+    LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+    // We don't want to verify LiveVariables if LiveIntervals is available.
+    if (!LiveInts)
+      LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+    LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+    Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+  }
+
+  visitMachineFunctionBefore();
+  for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+       MFI!=MFE; ++MFI) {
+    visitMachineBasicBlockBefore(MFI);
+    for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
+           MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+      if (MBBI->getParent() != MFI) {
+        report("Bad instruction parent pointer", MFI);
+        *OS << "Instruction: " << *MBBI;
+        continue;
+      }
+      visitMachineInstrBefore(MBBI);
+      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+        visitMachineOperand(&MBBI->getOperand(I), I);
+      visitMachineInstrAfter(MBBI);
+    }
+    visitMachineBasicBlockAfter(MFI);
+  }
+  visitMachineFunctionAfter();
+
+  if (OutFile)
+    delete OutFile;
+  else if (foundErrors)
+    report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
+
+  // Clean up.
+  regsLive.clear();
+  regsDefined.clear();
+  regsDead.clear();
+  regsKilled.clear();
+  regsLiveInButUnused.clear();
+  MBBInfoMap.clear();
+
+  return false;                 // no changes
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+  assert(MF);
+  *OS << '\n';
+  if (!foundErrors++) {
+    if (Banner)
+      *OS << "# " << Banner << '\n';
+    MF->print(*OS, Indexes);
+  }
+  *OS << "*** Bad machine code: " << msg << " ***\n"
+      << "- function:    " << MF->getFunction()->getNameStr() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+  assert(MBB);
+  report(msg, MBB->getParent());
+  *OS << "- basic block: " << MBB->getName()
+      << " " << (void*)MBB
+      << " (BB#" << MBB->getNumber() << ")";
+  if (Indexes)
+    *OS << " [" << Indexes->getMBBStartIdx(MBB)
+        << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
+  *OS << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+  assert(MI);
+  report(msg, MI->getParent());
+  *OS << "- instruction: ";
+  if (Indexes && Indexes->hasIndex(MI))
+    *OS << Indexes->getInstructionIndex(MI) << '\t';
+  MI->print(*OS, TM);
+}
+
+void MachineVerifier::report(const char *msg,
+                             const MachineOperand *MO, unsigned MONum) {
+  assert(MO);
+  report(msg, MO->getParent());
+  *OS << "- operand " << MONum << ":   ";
+  MO->print(*OS, TM);
+  *OS << "\n";
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+  BBInfo &MInfo = MBBInfoMap[MBB];
+  if (!MInfo.reachable) {
+    MInfo.reachable = true;
+    for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+           SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+      markReachable(*SuI);
+  }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+  lastIndex = SlotIndex();
+  regsReserved = TRI->getReservedRegs(*MF);
+
+  // A sub-register of a reserved register is also reserved
+  for (int Reg = regsReserved.find_first(); Reg>=0;
+       Reg = regsReserved.find_next(Reg)) {
+    for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+      // FIXME: This should probably be:
+      // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
+      regsReserved.set(*Sub);
+    }
+  }
+  markReachable(&MF->front());
+}
+
+// Does iterator point to a and b as the first two elements?
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+                      const MachineBasicBlock *a, const MachineBasicBlock *b) {
+  if (*i == a)
+    return *++i == b;
+  if (*i == b)
+    return *++i == a;
+  return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+  // Count the number of landing pad successors.
+  SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    if ((*I)->isLandingPad())
+      LandingPadSuccs.insert(*I);
+  }
+
+  const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+  const BasicBlock *BB = MBB->getBasicBlock();
+  if (LandingPadSuccs.size() > 1 &&
+      !(AsmInfo &&
+        AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+        BB && isa<SwitchInst>(BB->getTerminator())))
+    report("MBB has more than one landing pad successor", MBB);
+
+  // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+                          TBB, FBB, Cond)) {
+    // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+    // check whether its answers match up with reality.
+    if (!TBB && !FBB) {
+      // Block falls through to its successor.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actually fall
+        // out the bottom of the function.
+      } else if (MBB->succ_size() == LandingPadSuccs.size()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actuall fall
+        // out of the block.
+      } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+        report("MBB exits via unconditional fall-through but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (!MBB->isSuccessor(MBBI)) {
+        report("MBB exits via unconditional fall-through but its successor "
+               "differs from its CFG successor!", MBB);
+      }
+      if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+          !TII->isPredicated(&MBB->back())) {
+        report("MBB exits via unconditional fall-through but ends with a "
+               "barrier instruction!", MBB);
+      }
+      if (!Cond.empty()) {
+        report("MBB exits via unconditional fall-through but has a condition!",
+               MBB);
+      }
+    } else if (TBB && !FBB && Cond.empty()) {
+      // Block unconditionally branches somewhere.
+      if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+        report("MBB exits via unconditional branch but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (!MBB->isSuccessor(TBB)) {
+        report("MBB exits via unconditional branch but the CFG "
+               "successor doesn't match the actual successor!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via unconditional branch but doesn't contain "
+               "any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via unconditional branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via unconditional branch but the branch isn't a "
+               "terminator instruction!", MBB);
+      }
+    } else if (TBB && !FBB && !Cond.empty()) {
+      // Block conditionally branches somewhere, otherwise falls through.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        report("MBB conditionally falls through out of function!", MBB);
+      } if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/fall-through but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+        report("MBB exits via conditional branch/fall-through but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/fall-through but doesn't "
+               "contain any instructions!", MBB);
+      } else if (MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/fall-through but ends with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/fall-through but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+    } else if (TBB && FBB) {
+      // Block conditionally branches somewhere, otherwise branches
+      // somewhere else.
+      if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/branch but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+        report("MBB exits via conditional branch/branch but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/branch but doesn't "
+               "contain any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/branch but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+      if (Cond.empty()) {
+        report("MBB exits via conditinal branch/branch but there's no "
+               "condition!", MBB);
+      }
+    } else {
+      report("AnalyzeBranch returned invalid data!", MBB);
+    }
+  }
+
+  regsLive.clear();
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I) {
+    if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+      report("MBB live-in list contains non-physical register", MBB);
+      continue;
+    }
+    regsLive.insert(*I);
+    for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+      regsLive.insert(*R);
+  }
+  regsLiveInButUnused = regsLive;
+
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  assert(MFI && "Function has no frame info");
+  BitVector PR = MFI->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+    regsLive.insert(I);
+    for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+      regsLive.insert(*R);
+  }
+
+  regsKilled.clear();
+  regsDefined.clear();
+
+  if (Indexes)
+    lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MI->getNumOperands() < MCID.getNumOperands()) {
+    report("Too few operands", MI);
+    *OS << MCID.getNumOperands() << " operands expected, but "
+        << MI->getNumExplicitOperands() << " given.\n";
+  }
+
+  // Check the MachineMemOperands for basic consistency.
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I) {
+    if ((*I)->isLoad() && !MCID.mayLoad())
+      report("Missing mayLoad flag", MI);
+    if ((*I)->isStore() && !MCID.mayStore())
+      report("Missing mayStore flag", MI);
+  }
+
+  // Debug values must not have a slot index.
+  // Other instructions must have one.
+  if (LiveInts) {
+    bool mapped = !LiveInts->isNotInMIMap(MI);
+    if (MI->isDebugValue()) {
+      if (mapped)
+        report("Debug instruction has a slot index", MI);
+    } else {
+      if (!mapped)
+        report("Missing slot index", MI);
+    }
+  }
+
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+  const MachineInstr *MI = MO->getParent();
+  const MCInstrDesc &MCID = MI->getDesc();
+  const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+
+  // The first MCID.NumDefs operands must be explicit register defines
+  if (MONum < MCID.getNumDefs()) {
+    if (!MO->isReg())
+      report("Explicit definition must be a register", MO, MONum);
+    else if (!MO->isDef())
+      report("Explicit definition marked as use", MO, MONum);
+    else if (MO->isImplicit())
+      report("Explicit definition marked as implicit", MO, MONum);
+  } else if (MONum < MCID.getNumOperands()) {
+    // Don't check if it's the last operand in a variadic instruction. See,
+    // e.g., LDM_RET in the arm back end.
+    if (MO->isReg() &&
+        !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+      if (MO->isDef() && !MCOI.isOptionalDef())
+          report("Explicit operand marked as def", MO, MONum);
+      if (MO->isImplicit())
+        report("Explicit operand marked as implicit", MO, MONum);
+    }
+  } else {
+    // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+    if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+      report("Extra explicit operand on non-variadic instruction", MO, MONum);
+  }
+
+  switch (MO->getType()) {
+  case MachineOperand::MO_Register: {
+    const unsigned Reg = MO->getReg();
+    if (!Reg)
+      return;
+
+    // Check Live Variables.
+    if (MI->isDebugValue()) {
+      // Liveness checks are not valid for debug values.
+    } else if (MO->isUse() && !MO->isUndef()) {
+      regsLiveInButUnused.erase(Reg);
+
+      bool isKill = false;
+      unsigned defIdx;
+      if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+        // A two-addr use counts as a kill if use and def are the same.
+        unsigned DefReg = MI->getOperand(defIdx).getReg();
+        if (Reg == DefReg)
+          isKill = true;
+        else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          report("Two-address instruction operands must be identical",
+                 MO, MONum);
+        }
+      } else
+        isKill = MO->isKill();
+
+      if (isKill)
+        addRegWithSubRegs(regsKilled, Reg);
+
+      // Check that LiveVars knows this kill.
+      if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+          MO->isKill()) {
+        LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+        if (std::find(VI.Kills.begin(),
+                      VI.Kills.end(), MI) == VI.Kills.end())
+          report("Kill missing from LiveVariables", MO, MONum);
+      }
+
+      // Check LiveInts liveness and kill.
+      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+          LiveInts && !LiveInts->isNotInMIMap(MI)) {
+        SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
+        if (LiveInts->hasInterval(Reg)) {
+          const LiveInterval &LI = LiveInts->getInterval(Reg);
+          if (!LI.liveAt(UseIdx)) {
+            report("No live range at use", MO, MONum);
+            *OS << UseIdx << " is not live in " << LI << '\n';
+          }
+          // Check for extra kill flags.
+          // Note that we allow missing kill flags for now.
+          if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+            report("Live range continues after kill flag", MO, MONum);
+            *OS << "Live range: " << LI << '\n';
+          }
+        } else {
+          report("Virtual register has no Live interval", MO, MONum);
+        }
+      }
+
+      // Use of a dead register.
+      if (!regsLive.count(Reg)) {
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          // Reserved registers may be used even when 'dead'.
+          if (!isReserved(Reg))
+            report("Using an undefined physical register", MO, MONum);
+        } else {
+          BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+          // We don't know which virtual registers are live in, so only complain
+          // if vreg was killed in this MBB. Otherwise keep track of vregs that
+          // must be live in. PHI instructions are handled separately.
+          if (MInfo.regsKilled.count(Reg))
+            report("Using a killed virtual register", MO, MONum);
+          else if (!MI->isPHI())
+            MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+        }
+      }
+    } else if (MO->isDef()) {
+      // Register defined.
+      // TODO: verify that earlyclobber ops are not used.
+      if (MO->isDead())
+        addRegWithSubRegs(regsDead, Reg);
+      else
+        addRegWithSubRegs(regsDefined, Reg);
+
+      // Check LiveInts for a live range, but only for virtual registers.
+      if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+          !LiveInts->isNotInMIMap(MI)) {
+        SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
+        if (LiveInts->hasInterval(Reg)) {
+          const LiveInterval &LI = LiveInts->getInterval(Reg);
+          if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+            assert(VNI && "NULL valno is not allowed");
+            if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+              report("Inconsistent valno->def", MO, MONum);
+              *OS << "Valno " << VNI->id << " is not defined at "
+                  << DefIdx << " in " << LI << '\n';
+            }
+          } else {
+            report("No live range at def", MO, MONum);
+            *OS << DefIdx << " is not live in " << LI << '\n';
+          }
+        } else {
+          report("Virtual register has no Live interval", MO, MONum);
+        }
+      }
+    }
+
+    // Check register classes.
+    if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
+      unsigned SubIdx = MO->getSubReg();
+
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        unsigned sr = Reg;
+        if (SubIdx) {
+          unsigned s = TRI->getSubReg(Reg, SubIdx);
+          if (!s) {
+            report("Invalid subregister index for physical register",
+                   MO, MONum);
+            return;
+          }
+          sr = s;
+        }
+        if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+          if (!DRC->contains(sr)) {
+            report("Illegal physical register for instruction", MO, MONum);
+            *OS << TRI->getName(sr) << " is not a "
+                << DRC->getName() << " register.\n";
+          }
+        }
+      } else {
+        // Virtual register.
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        if (SubIdx) {
+          const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx);
+          if (!SRC) {
+            report("Invalid subregister index for virtual register", MO, MONum);
+            *OS << "Register class " << RC->getName()
+                << " does not support subreg index " << SubIdx << "\n";
+            return;
+          }
+          RC = SRC;
+        }
+        if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+          if (!RC->hasSuperClassEq(DRC)) {
+            report("Illegal virtual register for instruction", MO, MONum);
+            *OS << "Expected a " << DRC->getName() << " register, but got a "
+                << RC->getName() << " register\n";
+          }
+        }
+      }
+    }
+    break;
+  }
+
+  case MachineOperand::MO_MachineBasicBlock:
+    if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+      report("PHI operand is not in the CFG", MO, MONum);
+    break;
+
+  case MachineOperand::MO_FrameIndex:
+    if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+        LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+      SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+      if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+        report("Instruction loads from dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+      if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+        report("Instruction stores to dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+    }
+    break;
+
+  default:
+    break;
+  }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+  BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+  set_union(MInfo.regsKilled, regsKilled);
+  set_subtract(regsLive, regsKilled); regsKilled.clear();
+  set_subtract(regsLive, regsDead);   regsDead.clear();
+  set_union(regsLive, regsDefined);   regsDefined.clear();
+
+  if (Indexes && Indexes->hasIndex(MI)) {
+    SlotIndex idx = Indexes->getInstructionIndex(MI);
+    if (!(idx > lastIndex)) {
+      report("Instruction index out of order", MI);
+      *OS << "Last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = idx;
+  }
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+  MBBInfoMap[MBB].regsLiveOut = regsLive;
+  regsLive.clear();
+
+  if (Indexes) {
+    SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+    if (!(stop > lastIndex)) {
+      report("Block ends before last instruction index", MBB);
+      *OS << "Block ends at " << stop
+          << " last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = stop;
+  }
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+  // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+  // have any vregsPassed.
+  DenseSet<const MachineBasicBlock*> todo;
+  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+       MFI != MFE; ++MFI) {
+    const MachineBasicBlock &MBB(*MFI);
+    BBInfo &MInfo = MBBInfoMap[&MBB];
+    if (!MInfo.reachable)
+      continue;
+    for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+           SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+      BBInfo &SInfo = MBBInfoMap[*SuI];
+      if (SInfo.addPassed(MInfo.regsLiveOut))
+        todo.insert(*SuI);
+    }
+  }
+
+  // Iteratively push vregsPassed to successors. This will converge to the same
+  // final state regardless of DenseSet iteration order.
+  while (!todo.empty()) {
+    const MachineBasicBlock *MBB = *todo.begin();
+    todo.erase(MBB);
+    BBInfo &MInfo = MBBInfoMap[MBB];
+    for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+           SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+      if (*SuI == MBB)
+        continue;
+      BBInfo &SInfo = MBBInfoMap[*SuI];
+      if (SInfo.addPassed(MInfo.vregsPassed))
+        todo.insert(*SuI);
+    }
+  }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+  // First push live-in regs to predecessors' vregsRequired.
+  DenseSet<const MachineBasicBlock*> todo;
+  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+       MFI != MFE; ++MFI) {
+    const MachineBasicBlock &MBB(*MFI);
+    BBInfo &MInfo = MBBInfoMap[&MBB];
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+           PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+      BBInfo &PInfo = MBBInfoMap[*PrI];
+      if (PInfo.addRequired(MInfo.vregsLiveIn))
+        todo.insert(*PrI);
+    }
+  }
+
+  // Iteratively push vregsRequired to predecessors. This will converge to the
+  // same final state regardless of DenseSet iteration order.
+  while (!todo.empty()) {
+    const MachineBasicBlock *MBB = *todo.begin();
+    todo.erase(MBB);
+    BBInfo &MInfo = MBBInfoMap[MBB];
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+      if (*PrI == MBB)
+        continue;
+      BBInfo &SInfo = MBBInfoMap[*PrI];
+      if (SInfo.addRequired(MInfo.vregsRequired))
+        todo.insert(*PrI);
+    }
+  }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+  for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+       BBI != BBE && BBI->isPHI(); ++BBI) {
+    DenseSet<const MachineBasicBlock*> seen;
+
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+      unsigned Reg = BBI->getOperand(i).getReg();
+      const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+      if (!Pre->isSuccessor(MBB))
+        continue;
+      seen.insert(Pre);
+      BBInfo &PrInfo = MBBInfoMap[Pre];
+      if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+        report("PHI operand is not live-out from predecessor",
+               &BBI->getOperand(i), i);
+    }
+
+    // Did we see all predecessors?
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+      if (!seen.count(*PrI)) {
+        report("Missing PHI operand", BBI);
+        *OS << "BB#" << (*PrI)->getNumber()
+            << " is a predecessor according to the CFG.\n";
+      }
+    }
+  }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+  calcRegsPassed();
+
+  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+       MFI != MFE; ++MFI) {
+    BBInfo &MInfo = MBBInfoMap[MFI];
+
+    // Skip unreachable MBBs.
+    if (!MInfo.reachable)
+      continue;
+
+    checkPHIOps(MFI);
+  }
+
+  // Now check liveness info if available
+  if (LiveVars || LiveInts)
+    calcRegsRequired();
+  if (LiveVars)
+    verifyLiveVariables();
+  if (LiveInts)
+    verifyLiveIntervals();
+}
+
+void MachineVerifier::verifyLiveVariables() {
+  assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+    for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+         MFI != MFE; ++MFI) {
+      BBInfo &MInfo = MBBInfoMap[MFI];
+
+      // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+      if (MInfo.vregsRequired.count(Reg)) {
+        if (!VI.AliveBlocks.test(MFI->getNumber())) {
+          report("LiveVariables: Block missing from AliveBlocks", MFI);
+          *OS << "Virtual register " << PrintReg(Reg)
+              << " must be live through the block.\n";
+        }
+      } else {
+        if (VI.AliveBlocks.test(MFI->getNumber())) {
+          report("LiveVariables: Block should not be in AliveBlocks", MFI);
+          *OS << "Virtual register " << PrintReg(Reg)
+              << " is not needed live through the block.\n";
+        }
+      }
+    }
+  }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+  assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+  for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
+       LVE = LiveInts->end(); LVI != LVE; ++LVI) {
+    const LiveInterval &LI = *LVI->second;
+
+    // Spilling and splitting may leave unused registers around. Skip them.
+    if (MRI->use_empty(LI.reg))
+      continue;
+
+    // Physical registers have much weirdness going on, mostly from coalescing.
+    // We should probably fix it, but for now just ignore them.
+    if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+      continue;
+
+    assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+
+    for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+         I!=E; ++I) {
+      VNInfo *VNI = *I;
+      const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+
+      if (!DefVNI) {
+        if (!VNI->isUnused()) {
+          report("Valno not live at def and not marked unused", MF);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+        }
+        continue;
+      }
+
+      if (VNI->isUnused())
+        continue;
+
+      if (DefVNI != VNI) {
+        report("Live range at def has different valno", MF);
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+        continue;
+      }
+
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+      if (!MBB) {
+        report("Invalid definition index", MF);
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " in " << LI << '\n';
+        continue;
+      }
+
+      if (VNI->isPHIDef()) {
+        if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+          report("PHIDef value is not defined at MBB start", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << ", not at the beginning of BB#" << MBB->getNumber()
+              << " in " << LI << '\n';
+        }
+      } else {
+        // Non-PHI def.
+        const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+        if (!MI) {
+          report("No instruction at def index", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+          report("Defining instruction does not modify register", MI);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+        }
+
+        bool isEarlyClobber = false;
+        if (MI) {
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+                MOI->isEarlyClobber()) {
+              isEarlyClobber = true;
+              break;
+            }
+          }
+        }
+
+        // Early clobber defs begin at USE slots, but other defs must begin at
+        // DEF slots.
+        if (isEarlyClobber) {
+          if (!VNI->def.isUse()) {
+            report("Early clobber def must be at a USE slot", MF);
+            *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+                << " in " << LI << '\n';
+          }
+        } else if (!VNI->def.isDef()) {
+          report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        }
+      }
+    }
+
+    for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
+      const VNInfo *VNI = I->valno;
+      assert(VNI && "Live range has no valno");
+
+      if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+        report("Foreign valno in live range", MF);
+        I->print(*OS);
+        *OS << " has a valno not in " << LI << '\n';
+      }
+
+      if (VNI->isUnused()) {
+        report("Live range valno is marked unused", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+      }
+
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+      if (!MBB) {
+        report("Bad start of live segment, no basic block", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+        continue;
+      }
+      SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+      if (I->start != MBBStartIdx && I->start != VNI->def) {
+        report("Live segment must begin at MBB entry or valno def", MBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+      }
+
+      const MachineBasicBlock *EndMBB =
+                                LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+      if (!EndMBB) {
+        report("Bad end of live segment, no basic block", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+        continue;
+      }
+      if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+        // The live segment is ending inside EndMBB
+        const MachineInstr *MI =
+                        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+        if (!MI) {
+          report("Live segment doesn't end at a valid instruction", EndMBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+        } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+                   !MI->readsVirtualRegister(LI.reg)) {
+          // A live range can end with either a redefinition, a kill flag on a
+          // use, or a dead flag on a def.
+          // FIXME: Should we check for each of these?
+          bool hasDeadDef = false;
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+              hasDeadDef = true;
+              break;
+            }
+          }
+
+          if (!hasDeadDef) {
+            report("Instruction killing live segment neither defines nor reads "
+                   "register", MI);
+            I->print(*OS);
+            *OS << " in " << LI << '\n';
+          }
+        }
+      }
+
+      // Now check all the basic blocks in this live segment.
+      MachineFunction::const_iterator MFI = MBB;
+      // Is this live range the beginning of a non-PHIDef VN?
+      if (I->start == VNI->def && !VNI->isPHIDef()) {
+        // Not live-in to any blocks.
+        if (MBB == EndMBB)
+          continue;
+        // Skip this block.
+        ++MFI;
+      }
+      for (;;) {
+        assert(LiveInts->isLiveInToMBB(LI, MFI));
+        // We don't know how to track physregs into a landing pad.
+        if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+            MFI->isLandingPad()) {
+          if (&*MFI == EndMBB)
+            break;
+          ++MFI;
+          continue;
+        }
+        // Check that VNI is live-out of all predecessors.
+        for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+             PE = MFI->pred_end(); PI != PE; ++PI) {
+          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+          const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+          if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
+            if (PVNI && !PVNI->hasPHIKill()) {
+              report("Value live out of predecessor doesn't have PHIKill", MF);
+              *OS << "Valno #" << PVNI->id << " live out of BB#"
+                  << (*PI)->getNumber() << '@' << PEnd
+                  << " doesn't have PHIKill, but Valno #" << VNI->id
+                  << " is PHIDef and defined at the beginning of BB#"
+                  << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
+                  << " in " << LI << '\n';
+            }
+            continue;
+          }
+
+          if (!PVNI) {
+            report("Register not marked live out of predecessor", *PI);
+            *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+                << PEnd << " in " << LI << '\n';
+            continue;
+          }
+
+          if (PVNI != VNI) {
+            report("Different value live out of predecessor", *PI);
+            *OS << "Valno #" << PVNI->id << " live out of BB#"
+                << (*PI)->getNumber() << '@' << PEnd
+                << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+          }
+        }
+        if (&*MFI == EndMBB)
+          break;
+        ++MFI;
+      }
+    }
+
+    // Check the LI only has one connected component.
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+      unsigned NumComp = ConEQ.Classify(&LI);
+      if (NumComp > 1) {
+        report("Multiple connected components in live interval", MF);
+        *OS << NumComp << " components in " << LI << '\n';
+        for (unsigned comp = 0; comp != NumComp; ++comp) {
+          *OS << comp << ": valnos";
+          for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+               E = LI.vni_end(); I!=E; ++I)
+            if (comp == ConEQ.getEqClass(*I))
+              *OS << ' ' << (*I)->id;
+          *OS << '\n';
+        }
+      }
+    }
+  }
+}
+
diff --git a/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp
new file mode 100644
index 000000000000..cf05275d7a31
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp
@@ -0,0 +1,141 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+
+//===----------------------------------------------------------------------===//
+//                       ObjectCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
+ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
+ObjectCodeEmitter::~ObjectCodeEmitter() {}
+
+/// setBinaryObject - set the BinaryObject we are writting to
+void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
+
+/// emitByte - This callback is invoked when a byte needs to be
+/// written to the data stream, without buffer overflow testing.
+void ObjectCodeEmitter::emitByte(uint8_t B) {
+  BO->emitByte(B);
+}
+
+/// emitWordLE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitWordLE(uint32_t W) {
+  BO->emitWordLE(W);
+}
+
+/// emitWordBE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitWordBE(uint32_t W) {
+  BO->emitWordBE(W);
+}
+
+/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
+  BO->emitDWordLE(W);
+}
+
+/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
+  BO->emitDWordBE(W);
+}
+
+/// emitAlignment - Align 'BO' to the necessary alignment boundary.
+void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
+                                      uint8_t fill /* 0 */) {
+  BO->emitAlignment(Alignment, fill);
+}
+
+/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
+  BO->emitULEB128Bytes(Value);
+}
+
+/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
+  BO->emitSLEB128Bytes(Value);
+}
+
+/// emitString - This callback is invoked when a String needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitString(const std::string &String) {
+  BO->emitString(String);
+}
+
+/// getCurrentPCValue - This returns the address that the next emitted byte
+/// will be output to.
+uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// getCurrentPCOffset - Return the offset from the start of the emitted
+/// buffer that we are currently writing to.
+uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// addRelocation - Whenever a relocatable address is needed, it should be
+/// noted with this interface.
+void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
+  BO->addRelocation(relocation);
+}
+
+/// StartMachineBasicBlock - This should be called by the target when a new
+/// basic block is about to be emitted.  This way the MCE knows where the
+/// start of the block is, and can implement getMachineBasicBlockAddress.
+void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
+  if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+    MBBLocations.resize((MBB->getNumber()+1)*2);
+  MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+}
+
+/// getMachineBasicBlockAddress - Return the address of the specified
+/// MachineBasicBlock, only usable after the label for the MBB has been
+/// emitted.
+uintptr_t
+ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+  assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+         MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+  return MBBLocations[MBB->getNumber()];
+}
+
+/// getJumpTableEntryAddress - Return the address of the jump table with index
+/// 'Index' in the function that last called initJumpTableInfo.
+uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
+  assert(JTLocations.size() > Index && "JT not emitted!");
+  return JTLocations[Index];
+}
+
+/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
+  assert(CPLocations.size() > Index && "CP not emitted!");
+  return CPLocations[Index];
+}
+
+/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
+  assert(CPSections.size() > Index && "CP not emitted!");
+  return CPSections[Index];
+}
+
+} // end namespace llvm
+
diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 000000000000..48db200c513c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,37 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+  class OcamlGC : public GCStrategy {
+  public:
+    OcamlGC();
+  };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+  NeededSafePoints = 1 << GC::PostCall;
+  UsesMetadata = true;
+}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 000000000000..c05be130ec61
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,190 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+  class OptimizePHIs : public MachineFunctionPass {
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID; // Pass identification
+    OptimizePHIs() : MachineFunctionPass(ID) {
+      initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+    typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+    bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+                               InstrSet &PHIsInCycle);
+    bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+    bool OptimizeBB(MachineBasicBlock &MBB);
+  };
+}
+
+char OptimizePHIs::ID = 0;
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+                "Optimize machine instruction PHIs", false, false)
+
+FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+  MRI = &Fn.getRegInfo();
+  TII = Fn.getTarget().getInstrInfo();
+
+  // Find dead PHI cycles and PHI cycles that can be replaced by a single
+  // value.  InstCombine does these optimizations, but DAG legalization may
+  // introduce new opportunities, e.g., when i64 values are split up for
+  // 32-bit targets.
+  bool Changed = false;
+  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+    Changed |= OptimizeBB(*I);
+
+  return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs.  If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value.  PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+                                         unsigned &SingleValReg,
+                                         InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  // Scan the PHI operands.
+  for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+    unsigned SrcReg = MI->getOperand(i).getReg();
+    if (SrcReg == DstReg)
+      continue;
+    MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+    // Skip over register-to-register moves.
+    if (SrcMI && SrcMI->isCopy() &&
+        !SrcMI->getOperand(0).getSubReg() &&
+        !SrcMI->getOperand(1).getSubReg() &&
+        TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+      SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+    if (!SrcMI)
+      return false;
+
+    if (SrcMI->isPHI()) {
+      if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+        return false;
+    } else {
+      // Fail if there is more than one non-phi/non-move register.
+      if (SingleValReg != 0)
+        return false;
+      SingleValReg = SrcReg;
+    }
+  }
+  return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+  assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+         "PHI destination is not a virtual register");
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+         E = MRI->use_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+      return false;
+  }
+
+  return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator
+         MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+    MachineInstr *MI = &*MII++;
+    if (!MI->isPHI())
+      break;
+
+    // Check for single-value PHI cycles.
+    unsigned SingleValReg = 0;
+    InstrSet PHIsInCycle;
+    if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+        SingleValReg != 0) {
+      MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+      MI->eraseFromParent();
+      ++NumPHICycles;
+      Changed = true;
+      continue;
+    }
+
+    // Check for dead PHI cycles.
+    PHIsInCycle.clear();
+    if (IsDeadPHICycle(MI, PHIsInCycle)) {
+      for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+           PI != PE; ++PI) {
+        MachineInstr *PhiMI = *PI;
+        if (&*MII == PhiMI)
+          ++MII;
+        PhiMI->eraseFromParent();
+      }
+      ++NumDeadPHICycles;
+      Changed = true;
+    }
+  }
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 000000000000..af65f13bf065
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,434 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions.  This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+                     cl::Hidden, cl::desc("Disable critical edge splitting "
+                                          "during PHI elimination"));
+
+namespace {
+  class PHIElimination : public MachineFunctionPass {
+    MachineRegisterInfo *MRI; // Machine register information
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PHIElimination() : MachineFunctionPass(ID) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  private:
+    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+    /// in predecessor basic blocks.
+    ///
+    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+    void LowerAtomicPHINode(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator AfterPHIsIt);
+
+    /// analyzePHINodes - Gather information about the PHI nodes in
+    /// here. In particular, we want to map the number of uses of a virtual
+    /// register which is used in a PHI node. We map that to the BB the
+    /// vreg is coming from. This is used later to determine when the vreg
+    /// is killed in the BB.
+    ///
+    void analyzePHINodes(const MachineFunction& Fn);
+
+    /// Split critical edges where necessary for good coalescer performance.
+    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+                       LiveVariables &LV, MachineLoopInfo *MLI);
+
+    typedef std::pair<unsigned, unsigned> BBVRegPair;
+    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+    VRegPHIUse VRegPHIUseCount;
+
+    // Defs of PHI sources which are implicit_def.
+    SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+    // Map reusable lowered PHI node -> incoming join register.
+    typedef DenseMap<MachineInstr*, unsigned,
+                     MachineInstrExpressionTrait> LoweredPHIMap;
+    LoweredPHIMap LoweredPHIs;
+  };
+}
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
+                "Eliminate PHI nodes for register allocation", false, false)
+
+char& llvm::PHIEliminationID = PHIElimination::ID;
+
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<LiveVariables>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addPreserved<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+
+  bool Changed = false;
+
+  // Split critical edges to help the coalescer
+  if (!DisableEdgeSplitting) {
+    if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
+      MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+      for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+        Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+    }
+  }
+
+  // Populate VRegPHIUseCount
+  analyzePHINodes(MF);
+
+  // Eliminate PHI instructions by inserting copies into predecessor blocks.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Changed |= EliminatePHINodes(MF, *I);
+
+  // Remove dead IMPLICIT_DEF instructions.
+  for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
+         E = ImpDefs.end(); I != E; ++I) {
+    MachineInstr *DefMI = *I;
+    unsigned DefReg = DefMI->getOperand(0).getReg();
+    if (MRI->use_nodbg_empty(DefReg))
+      DefMI->eraseFromParent();
+  }
+
+  // Clean up the lowered PHI instructions.
+  for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
+       I != E; ++I)
+    MF.DeleteMachineInstr(I->first);
+
+  LoweredPHIs.clear();
+  ImpDefs.clear();
+  VRegPHIUseCount.clear();
+
+  return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
+                                             MachineBasicBlock &MBB) {
+  if (MBB.empty() || !MBB.front().isPHI())
+    return false;   // Quick exit for basic blocks without PHIs.
+
+  // Get an iterator to the first instruction after the last PHI node (this may
+  // also be the end of the basic block).
+  MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
+
+  while (MBB.front().isPHI())
+    LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+  return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+                                         const MachineRegisterInfo *MRI) {
+  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+    unsigned SrcReg = MPhi->getOperand(i).getReg();
+    const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+    if (!DefMI || !DefMI->isImplicitDef())
+      return false;
+  }
+  return true;
+}
+
+
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs.  This lowering method is always correct all of the
+/// time.
+///
+void PHIElimination::LowerAtomicPHINode(
+                                      MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator AfterPHIsIt) {
+  ++NumAtomic;
+  // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+  MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+  unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+  unsigned DestReg = MPhi->getOperand(0).getReg();
+  assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
+  bool isDead = MPhi->getOperand(0).isDead();
+
+  // Create a new register for the incoming PHI arguments.
+  MachineFunction &MF = *MBB.getParent();
+  unsigned IncomingReg = 0;
+  bool reusedIncoming = false;  // Is IncomingReg reused from an earlier PHI?
+
+  // Insert a register to register copy at the top of the current block (but
+  // after any remaining phi nodes) which copies the new incoming register
+  // into the phi node destination.
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  if (isSourceDefinedByImplicitDef(MPhi, MRI))
+    // If all sources of a PHI node are implicit_def, just emit an
+    // implicit_def instead of a copy.
+    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+            TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+  else {
+    // Can we reuse an earlier PHI node? This only happens for critical edges,
+    // typically those created by tail duplication.
+    unsigned &entry = LoweredPHIs[MPhi];
+    if (entry) {
+      // An identical PHI node was already lowered. Reuse the incoming register.
+      IncomingReg = entry;
+      reusedIncoming = true;
+      ++NumReused;
+      DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+    } else {
+      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+      entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+    }
+    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+            TII->get(TargetOpcode::COPY), DestReg)
+      .addReg(IncomingReg);
+  }
+
+  // Update live variable information if there is any.
+  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
+  if (LV) {
+    MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+    if (IncomingReg) {
+      LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+      // Increment use count of the newly created virtual register.
+      VI.NumUses++;
+      LV->setPHIJoin(IncomingReg);
+
+      // When we are reusing the incoming register, it may already have been
+      // killed in this block. The old kill will also have been inserted at
+      // AfterPHIsIt, so it appears before the current PHICopy.
+      if (reusedIncoming)
+        if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+          DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+          LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+          DEBUG(MBB.dump());
+        }
+
+      // Add information to LiveVariables to know that the incoming value is
+      // killed.  Note that because the value is defined in several places (once
+      // each for each incoming block), the "def" block and instruction fields
+      // for the VarInfo is not filled in.
+      LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+    }
+
+    // Since we are going to be deleting the PHI node, if it is the last use of
+    // any registers, or if the value itself is dead, we need to move this
+    // information over to the new copy we just inserted.
+    LV->removeVirtualRegistersKilled(MPhi);
+
+    // If the result is dead, update LV.
+    if (isDead) {
+      LV->addVirtualRegisterDead(DestReg, PHICopy);
+      LV->removeVirtualRegisterDead(DestReg, MPhi);
+    }
+  }
+
+  // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
+                                 MPhi->getOperand(i).getReg())];
+
+  // Now loop over all of the incoming arguments, changing them to copy into the
+  // IncomingReg register in the corresponding predecessor basic block.
+  SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+  for (int i = NumSrcs - 1; i >= 0; --i) {
+    unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+    unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+
+    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+           "Machine PHI Operands must all be virtual registers!");
+
+    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+    // path the PHI.
+    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+    // If source is defined by an implicit def, there is no need to insert a
+    // copy.
+    MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+    if (DefMI->isImplicitDef()) {
+      ImpDefs.insert(DefMI);
+      continue;
+    }
+
+    // Check to make sure we haven't already emitted the copy for this block.
+    // This can happen because PHI nodes may have multiple entries for the same
+    // basic block.
+    if (!MBBsInsertedInto.insert(&opBlock))
+      continue;  // If the copy has already been emitted, we're done.
+
+    // Find a safe location to insert the copy, this may be the first terminator
+    // in the block (or end()).
+    MachineBasicBlock::iterator InsertPos =
+      findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+
+    // Insert the copy.
+    if (!reusedIncoming && IncomingReg)
+      BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
+
+    // Now update live variable information if we have it.  Otherwise we're done
+    if (!LV) continue;
+
+    // We want to be able to insert a kill of the register if this PHI (aka, the
+    // copy we just inserted) is the last use of the source value.  Live
+    // variable analysis conservatively handles this by saying that the value is
+    // live until the end of the block the PHI entry lives in.  If the value
+    // really is dead at the PHI copy, there will be no successor blocks which
+    // have the value live-in.
+
+    // Also check to see if this register is in use by another PHI node which
+    // has not yet been eliminated.  If so, it will be killed at an appropriate
+    // point later.
+
+    // Is it used by any PHI instructions in this block?
+    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];
+
+    // Okay, if we now know that the value is not live out of the block, we can
+    // add a kill marker in this block saying that it kills the incoming value!
+    if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
+      // In our final twist, we have to decide which instruction kills the
+      // register.  In most cases this is the copy, however, the first
+      // terminator instruction at the end of the block may also use the value.
+      // In this case, we should mark *it* as being the killing block, not the
+      // copy.
+      MachineBasicBlock::iterator KillInst;
+      MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
+      if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
+        KillInst = Term;
+
+        // Check that no other terminators use values.
+#ifndef NDEBUG
+        for (MachineBasicBlock::iterator TI = llvm::next(Term);
+             TI != opBlock.end(); ++TI) {
+          if (TI->isDebugValue())
+            continue;
+          assert(!TI->readsRegister(SrcReg) &&
+                 "Terminator instructions cannot use virtual registers unless"
+                 "they are the first terminator in a block!");
+        }
+#endif
+      } else if (reusedIncoming || !IncomingReg) {
+        // We may have to rewind a bit if we didn't insert a copy this time.
+        KillInst = Term;
+        while (KillInst != opBlock.begin()) {
+          --KillInst;
+          if (KillInst->isDebugValue())
+            continue;
+          if (KillInst->readsRegister(SrcReg))
+            break;
+        }
+      } else {
+        // We just inserted this copy.
+        KillInst = prior(InsertPos);
+      }
+      assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+      // Finally, mark it killed.
+      LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+      // This vreg no longer lives all of the way through opBlock.
+      unsigned opBlockNum = opBlock.getNumber();
+      LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+    }
+  }
+
+  // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+  if (reusedIncoming || !IncomingReg)
+    MF.DeleteMachineInstr(MPhi);
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI)
+      for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+        ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
+                                     BBI->getOperand(i).getReg())];
+}
+
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+                                   MachineBasicBlock &MBB,
+                                   LiveVariables &LV,
+                                   MachineLoopInfo *MLI) {
+  if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+    return false;   // Quick exit for basic blocks without PHIs.
+
+  bool Changed = false;
+  for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+       BBI != BBE && BBI->isPHI(); ++BBI) {
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+      unsigned Reg = BBI->getOperand(i).getReg();
+      MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+      // We break edges when registers are live out from the predecessor block
+      // (not considering PHI nodes). If the register is live in to this block
+      // anyway, we would gain nothing from splitting.
+      // Avoid splitting backedges of loops. It would introduce small
+      // out-of-line blocks into the loop which is very bad for code placement.
+      if (PreMBB != &MBB &&
+          !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
+        if (!MLI ||
+            !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
+              MLI->isLoopHeader(&MBB))) {
+          if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+            Changed = true;
+            ++NumCriticalEdgesSplit;
+          }
+        }
+      }
+    }
+  }
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..10bfdcce6769
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                             unsigned SrcReg) {
+  // Handle the trivial case trivially.
+  if (MBB->empty())
+    return MBB->begin();
+
+  // Usually, we just want to insert the copy before the first terminator
+  // instruction. However, for the edge going to a landing pad, we must insert
+  // the copy before the call/invoke instruction.
+  if (!SuccMBB->isLandingPad())
+    return MBB->getFirstTerminator();
+
+  // Discover any defs/uses in this basic block.
+  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+  MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+         RE = MRI.reg_end(); RI != RE; ++RI) {
+    MachineInstr* DefUseMI = &*RI;
+    if (DefUseMI->getParent() == MBB)
+      DefUsesInMBB.insert(DefUseMI);
+  }
+
+  MachineBasicBlock::iterator InsertPoint;
+  if (DefUsesInMBB.empty()) {
+    // No defs.  Insert the copy at the start of the basic block.
+    InsertPoint = MBB->begin();
+  } else if (DefUsesInMBB.size() == 1) {
+    // Insert the copy immediately after the def/use.
+    InsertPoint = *DefUsesInMBB.begin();
+    ++InsertPoint;
+  } else {
+    // Insert the copy immediately after the last def/use.
+    InsertPoint = MBB->end();
+    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+    ++InsertPoint;
+  }
+
+  // Make sure the copy goes after any phi nodes however.
+  return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..9ac47fb4c505
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+    /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+    /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+    /// any def of SrcReg, but before any subsequent point where control flow
+    /// might jump out of the basic block.
+    MachineBasicBlock::iterator
+    findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                           unsigned SrcReg);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
new file mode 100644
index 000000000000..315aedddb9ef
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -0,0 +1,73 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+                "pick register allocator based on -O option",
+                createDefaultRegisterAllocator);
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+               RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+         cl::init(&createDefaultRegisterAllocator),
+         cl::desc("Register allocator to use"));
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
+  RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+  if (!Ctor) {
+    Ctor = RegAlloc;
+    RegisterRegAlloc::setDefault(RegAlloc);
+  }
+
+  // This forces linking of the linear scan register allocator,
+  // so -regalloc=linearscan still works in clang.
+  if (Ctor == createLinearScanRegisterAllocator)
+    return createLinearScanRegisterAllocator();
+
+  if (Ctor != createDefaultRegisterAllocator)
+    return Ctor();
+
+  // When the 'default' allocator is requested, pick one based on OptLevel.
+  switch (OptLevel) {
+  case CodeGenOpt::None:
+    return createFastRegisterAllocator();
+  default:
+    return createGreedyRegisterAllocator();
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 000000000000..c523e39bc258
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,465 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+//     Optimization of sign / zero extension instructions. It may be extended to
+//     handle other instructions with similar properties.
+//
+//     On some targets, some instructions, e.g. X86 sign / zero extension, may
+//     leave the source value in the lower part of the result. This optimization
+//     will replace some uses of the pre-extension value with uses of the
+//     sub-register of the results.
+//
+// - Optimize Comparisons
+//
+//     Optimization of comparison instructions. For instance, in this code:
+//
+//       sub r1, 1
+//       cmp r1, 0
+//       bz  L1
+//
+//     If the "sub" instruction all ready sets (or could be modified to set) the
+//     same flag that the "cmp" instruction sets and that "bz" uses, then we can
+//     eliminate the "cmp" instruction.
+//
+// - Optimize Bitcast pairs:
+//
+//     v1 = bitcast v0
+//     v2 = bitcast v1
+//        = v2
+//   =>
+//     v1 = bitcast v0
+//        = v0
+// 
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "peephole-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+           cl::desc("Aggressive extension optimization"));
+
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+                cl::desc("Disable the peephole optimizer"));
+
+STATISTIC(NumReuse,      "Number of extension results reused");
+STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
+STATISTIC(NumCmps,       "Number of compares eliminated");
+STATISTIC(NumImmFold,    "Number of move immediate foled");
+
+namespace {
+  class PeepholeOptimizer : public MachineFunctionPass {
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    MachineRegisterInfo   *MRI;
+    MachineDominatorTree  *DT;  // Machine dominator tree
+
+  public:
+    static char ID; // Pass identification
+    PeepholeOptimizer() : MachineFunctionPass(ID) {
+      initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      if (Aggressive) {
+        AU.addRequired<MachineDominatorTree>();
+        AU.addPreserved<MachineDominatorTree>();
+      }
+    }
+
+  private:
+    bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                          SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+    bool isMoveImmediate(MachineInstr *MI,
+                         SmallSet<unsigned, 4> &ImmDefRegs,
+                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+    bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                       SmallSet<unsigned, 4> &ImmDefRegs,
+                       DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+  };
+}
+
+char PeepholeOptimizer::ID = 0;
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
+
+FunctionPass *llvm::createPeepholeOptimizerPass() {
+  return new PeepholeOptimizer();
+}
+
+/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify the
+/// source, and if the source value is preserved as a sub-register of the
+/// result, then replace all reachable uses of the source with the subreg of the
+/// result.
+/// 
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+  unsigned SrcReg, DstReg, SubIdx;
+  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+    return false;
+  
+  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      TargetRegisterInfo::isPhysicalRegister(SrcReg))
+    return false;
+
+  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
+  if (++UI == MRI->use_nodbg_end())
+    // No other uses.
+    return false;
+
+  // The source has other uses. See if we can replace the other uses with use of
+  // the result of the extension.
+  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+  UI = MRI->use_nodbg_begin(DstReg);
+  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+       UI != UE; ++UI)
+    ReachedBBs.insert(UI->getParent());
+
+  // Uses that are in the same BB of uses of the result of the instruction.
+  SmallVector<MachineOperand*, 8> Uses;
+
+  // Uses that the result of the instruction can reach.
+  SmallVector<MachineOperand*, 8> ExtendedUses;
+
+  bool ExtendLife = true;
+  UI = MRI->use_nodbg_begin(SrcReg);
+  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+       UI != UE; ++UI) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = &*UI;
+    if (UseMI == MI)
+      continue;
+
+    if (UseMI->isPHI()) {
+      ExtendLife = false;
+      continue;
+    }
+
+    // It's an error to translate this:
+    //
+    //    %reg1025 = <sext> %reg1024
+    //     ...
+    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+    //
+    // into this:
+    //
+    //    %reg1025 = <sext> %reg1024
+    //     ...
+    //    %reg1027 = COPY %reg1025:4
+    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+    //
+    // The problem here is that SUBREG_TO_REG is there to assert that an
+    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+    // the COPY here, it will give us the value after the <sext>, not the
+    // original value of %reg1024 before <sext>.
+    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+      continue;
+
+    MachineBasicBlock *UseMBB = UseMI->getParent();
+    if (UseMBB == MBB) {
+      // Local uses that come after the extension.
+      if (!LocalMIs.count(UseMI))
+        Uses.push_back(&UseMO);
+    } else if (ReachedBBs.count(UseMBB)) {
+      // Non-local uses where the result of the extension is used. Always
+      // replace these unless it's a PHI.
+      Uses.push_back(&UseMO);
+    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+      // We may want to extend the live range of the extension result in order
+      // to replace these uses.
+      ExtendedUses.push_back(&UseMO);
+    } else {
+      // Both will be live out of the def MBB anyway. Don't extend live range of
+      // the extension result.
+      ExtendLife = false;
+      break;
+    }
+  }
+
+  if (ExtendLife && !ExtendedUses.empty())
+    // Extend the liveness of the extension result.
+    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+              std::back_inserter(Uses));
+
+  // Now replace all uses.
+  bool Changed = false;
+  if (!Uses.empty()) {
+    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+    // Look for PHI uses of the extended result, we don't want to extend the
+    // liveness of a PHI input. It breaks all kinds of assumptions down
+    // stream. A PHI use is expected to be the kill of its source values.
+    UI = MRI->use_nodbg_begin(DstReg);
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI)
+      if (UI->isPHI())
+        PHIBBs.insert(UI->getParent());
+
+    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+      MachineOperand *UseMO = Uses[i];
+      MachineInstr *UseMI = UseMO->getParent();
+      MachineBasicBlock *UseMBB = UseMI->getParent();
+      if (PHIBBs.count(UseMBB))
+        continue;
+
+      unsigned NewVR = MRI->createVirtualRegister(RC);
+      BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), NewVR)
+        .addReg(DstReg, 0, SubIdx);
+
+      UseMO->setReg(NewVR);
+      ++NumReuse;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
+/// a value cross register classes), and the source is defined by another
+/// bitcast instruction B. And if the register class of source of B matches
+/// the register class of instruction A, then it is legal to replace all uses
+/// of the def of A with source of B. e.g.
+///   %vreg0<def> = VMOVSR %vreg1
+///   %vreg3<def> = VMOVRS %vreg0
+///   Replace all uses of vreg3 with vreg1.
+
+bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
+                                             MachineBasicBlock *MBB) {
+  unsigned NumDefs = MI->getDesc().getNumDefs();
+  unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
+  if (NumDefs != 1)
+    return false;
+
+  unsigned Def = 0;
+  unsigned Src = 0;
+  for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef())
+      Def = Reg;
+    else if (Src)
+      // Multiple sources?
+      return false;
+    else
+      Src = Reg;
+  }
+
+  assert(Def && Src && "Malformed bitcast instruction!");
+
+  MachineInstr *DefMI = MRI->getVRegDef(Src);
+  if (!DefMI || !DefMI->getDesc().isBitcast())
+    return false;
+
+  unsigned SrcDef = 0;
+  unsigned SrcSrc = 0;
+  NumDefs = DefMI->getDesc().getNumDefs();
+  NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
+  if (NumDefs != 1)
+    return false;
+  for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+    const MachineOperand &MO = DefMI->getOperand(i);
+    if (!MO.isReg() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef())
+      SrcDef = Reg;
+    else if (SrcSrc)
+      // Multiple sources?
+      return false;
+    else
+      SrcSrc = Reg;
+  }
+
+  if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
+    return false;
+
+  MRI->replaceRegWith(Def, SrcSrc);
+  MRI->clearKillFlags(SrcSrc);
+  MI->eraseFromParent();
+  ++NumBitcasts;
+  return true;
+}
+
+/// OptimizeCmpInstr - If the instruction is a compare and the previous
+/// instruction it's comparing against all ready sets (or could be modified to
+/// set) the same flag as the compare, then we can remove the comparison and use
+/// the flag from the previous instruction.
+bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
+                                         MachineBasicBlock *MBB) {
+  // If this instruction is a comparison against zero and isn't comparing a
+  // physical register, we can try to optimize it.
+  unsigned SrcReg;
+  int CmpMask, CmpValue;
+  if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+      TargetRegisterInfo::isPhysicalRegister(SrcReg))
+    return false;
+
+  // Attempt to optimize the comparison instruction.
+  if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
+    ++NumCmps;
+    return true;
+  }
+
+  return false;
+}
+
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+                                        SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isMoveImmediate())
+    return false;
+  if (MCID.getNumDefs() != 1)
+    return false;
+  unsigned Reg = MI->getOperand(0).getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    ImmDefMIs.insert(std::make_pair(Reg, MI));
+    ImmDefRegs.insert(Reg);
+    return true;
+  }
+  
+  return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                                      SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (ImmDefRegs.count(Reg) == 0)
+      continue;
+    DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+    assert(II != ImmDefMIs.end());
+    if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+      ++NumImmFold;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  if (DisablePeephole)
+    return false;
+  
+  TM  = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  MRI = &MF.getRegInfo();
+  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+  bool Changed = false;
+
+  SmallPtrSet<MachineInstr*, 8> LocalMIs;
+  SmallSet<unsigned, 4> ImmDefRegs;
+  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+    
+    bool SeenMoveImm = false;
+    LocalMIs.clear();
+    ImmDefRegs.clear();
+    ImmDefMIs.clear();
+
+    bool First = true;
+    MachineBasicBlock::iterator PMII;
+    for (MachineBasicBlock::iterator
+           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+      MachineInstr *MI = &*MII;
+      LocalMIs.insert(MI);
+
+      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+          MI->hasUnmodeledSideEffects()) {
+        ++MII;
+        continue;
+      }
+
+      const MCInstrDesc &MCID = MI->getDesc();
+
+      if (MCID.isBitcast()) {
+        if (OptimizeBitcastInstr(MI, MBB)) {
+          // MI is deleted.
+          Changed = true;
+          MII = First ? I->begin() : llvm::next(PMII);
+          continue;
+        }        
+      } else if (MCID.isCompare()) {
+        if (OptimizeCmpInstr(MI, MBB)) {
+          // MI is deleted.
+          Changed = true;
+          MII = First ? I->begin() : llvm::next(PMII);
+          continue;
+        }
+      }
+
+      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+        SeenMoveImm = true;
+      } else {
+        Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+        if (SeenMoveImm)
+          Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+      }
+
+      First = false;
+      PMII = MII;
+      ++MII;
+    }
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 000000000000..c73e87733cb4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,712 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AntiDepBreaker.h"
+#include "AggressiveAntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+                       cl::desc("Enable scheduling after register allocation"),
+                       cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+                      cl::desc("Break post-RA scheduling anti-dependencies: "
+                               "\"critical\", \"all\", or \"none\""),
+                      cl::init("none"), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() { }
+
+namespace {
+  class PostRAScheduler : public MachineFunctionPass {
+    AliasAnalysis *AA;
+    const TargetInstrInfo *TII;
+    RegisterClassInfo RegClassInfo;
+    CodeGenOpt::Level OptLevel;
+
+  public:
+    static char ID;
+    PostRAScheduler(CodeGenOpt::Level ol) :
+      MachineFunctionPass(ID), OptLevel(ol) {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    const char *getPassName() const {
+      return "Post RA top-down list latency scheduler";
+    }
+
+    bool runOnMachineFunction(MachineFunction &Fn);
+  };
+  char PostRAScheduler::ID = 0;
+
+  class SchedulePostRATDList : public ScheduleDAGInstrs {
+    /// AvailableQueue - The priority queue to use for the available SUnits.
+    ///
+    LatencyPriorityQueue AvailableQueue;
+
+    /// PendingQueue - This contains all of the instructions whose operands have
+    /// been issued, but their results are not ready yet (due to the latency of
+    /// the operation).  Once the operands becomes available, the instruction is
+    /// added to the AvailableQueue.
+    std::vector<SUnit*> PendingQueue;
+
+    /// Topo - A topological ordering for SUnits.
+    ScheduleDAGTopologicalSort Topo;
+
+    /// HazardRec - The hazard recognizer to use.
+    ScheduleHazardRecognizer *HazardRec;
+
+    /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+    AntiDepBreaker *AntiDepBreak;
+
+    /// AA - AliasAnalysis for making memory reference queries.
+    AliasAnalysis *AA;
+
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    std::vector<unsigned> KillIndices;
+
+  public:
+    SchedulePostRATDList(
+      MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+      AliasAnalysis *AA, const RegisterClassInfo&,
+      TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+      SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+
+    ~SchedulePostRATDList();
+
+    /// StartBlock - Initialize register live-range state for scheduling in
+    /// this block.
+    ///
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// Schedule - Schedule the instruction range using list scheduling.
+    ///
+    void Schedule();
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count);
+
+    /// FinishBlock - Clean up register live-range state.
+    ///
+    void FinishBlock();
+
+    /// FixupKills - Fix register kill flags that have been made
+    /// invalid due to scheduling
+    ///
+    void FixupKills(MachineBasicBlock *MBB);
+
+  private:
+    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+    void ReleaseSuccessors(SUnit *SU);
+    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+    void ListScheduleTopDown();
+    void StartBlockForKills(MachineBasicBlock *BB);
+
+    // ToggleKillFlag - Toggle a register operand kill flag. Other
+    // adjustments may be made to the instruction if necessary. Return
+    // true if the operand has been deleted, false if not.
+    bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+  };
+}
+
+SchedulePostRATDList::SchedulePostRATDList(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  AliasAnalysis *AA, const RegisterClassInfo &RCI,
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+  SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+  : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+    KillIndices(TRI->getNumRegs())
+{
+  const TargetMachine &TM = MF.getTarget();
+  const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+  HazardRec =
+    TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+  AntiDepBreak =
+    ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+     ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+  delete HazardRec;
+  delete AntiDepBreak;
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+  TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  RegClassInfo.runOnMachineFunction(Fn);
+
+  // Check for explicit enable/disable of post-ra scheduling.
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
+  SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+  if (EnablePostRAScheduler.getPosition() > 0) {
+    if (!EnablePostRAScheduler)
+      return false;
+  } else {
+    // Check that post-RA scheduling is enabled for this target.
+    // This may upgrade the AntiDepMode.
+    const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
+    if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+      return false;
+  }
+
+  // Check for antidep breaking override...
+  if (EnableAntiDepBreaking.getPosition() > 0) {
+    AntiDepMode = (EnableAntiDepBreaking == "all")
+      ? TargetSubtargetInfo::ANTIDEP_ALL
+      : ((EnableAntiDepBreaking == "critical")
+         ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+         : TargetSubtargetInfo::ANTIDEP_NONE);
+  }
+
+  DEBUG(dbgs() << "PostRAScheduler\n");
+
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+                                 CriticalPathRCs);
+
+  // Loop over all of the basic blocks
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+    // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+    if (DebugDiv > 0) {
+      static int bbcnt = 0;
+      if (bbcnt++ % DebugDiv != DebugMod)
+        continue;
+      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+        ":BB#" << MBB->getNumber() << " ***\n";
+    }
+#endif
+
+    // Initialize register live-range state for scheduling in this block.
+    Scheduler.StartBlock(MBB);
+
+    // Schedule each sequence of instructions not interrupted by a label
+    // or anything else that effectively needs to shut down scheduling.
+    MachineBasicBlock::iterator Current = MBB->end();
+    unsigned Count = MBB->size(), CurrentCount = Count;
+    for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+      MachineInstr *MI = llvm::prior(I);
+      if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
+        Scheduler.Run(MBB, I, Current, CurrentCount);
+        Scheduler.EmitSchedule();
+        Current = MI;
+        CurrentCount = Count - 1;
+        Scheduler.Observe(MI, CurrentCount);
+      }
+      I = MI;
+      --Count;
+    }
+    assert(Count == 0 && "Instruction count mismatch!");
+    assert((MBB->begin() == Current || CurrentCount != 0) &&
+           "Instruction count mismatch!");
+    Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.EmitSchedule();
+
+    // Clean up register live-range state.
+    Scheduler.FinishBlock();
+
+    // Update register kills
+    Scheduler.FixupKills(MBB);
+  }
+
+  return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+  // Call the superclass.
+  ScheduleDAGInstrs::StartBlock(BB);
+
+  // Reset the hazard recognizer and anti-dep breaker.
+  HazardRec->Reset();
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::Schedule() {
+  // Build the scheduling graph.
+  BuildSchedGraph(AA);
+
+  if (AntiDepBreak != NULL) {
+    unsigned Broken =
+      AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
+                                          InsertPosIndex, DbgValues);
+
+    if (Broken != 0) {
+      // We made changes. Update the dependency graph.
+      // Theoretically we could update the graph in place:
+      // When a live range is changed to use a different register, remove
+      // the def's anti-dependence *and* output-dependence edges due to
+      // that register, and add new anti-dependence and output-dependence
+      // edges based on the next live range of the register.
+      SUnits.clear();
+      Sequence.clear();
+      EntrySU = SUnit();
+      ExitSU = SUnit();
+      BuildSchedGraph(AA);
+
+      NumFixedAnti += Broken;
+    }
+  }
+
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  AvailableQueue.initNodes(SUnits);
+  ListScheduleTopDown();
+  AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::FinishBlock() {
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->FinishBlock();
+
+  // Call the superclass.
+  ScheduleDAGInstrs::FinishBlock();
+}
+
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+  // Initialize the indices to indicate that no registers are live.
+  for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
+    KillIndices[i] = ~0u;
+
+  // Determine the live-out physregs for this block.
+  if (!BB->empty() && BB->back().getDesc().isReturn()) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+           E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      KillIndices[Reg] = BB->size();
+      // Repeat, for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = BB->size();
+      }
+    }
+  }
+  else {
+    // In a non-return block, examine the live-in regs of all successors.
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI) {
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {
+        unsigned Reg = *I;
+        KillIndices[Reg] = BB->size();
+        // Repeat, for all subregs.
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          KillIndices[*Subreg] = BB->size();
+        }
+      }
+    }
+  }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+                                          MachineOperand &MO) {
+  // Setting kill flag...
+  if (!MO.isKill()) {
+    MO.setIsKill(true);
+    return false;
+  }
+
+  // If MO itself is live, clear the kill flag...
+  if (KillIndices[MO.getReg()] != ~0u) {
+    MO.setIsKill(false);
+    return false;
+  }
+
+  // If any subreg of MO is live, then create an imp-def for that
+  // subreg and keep MO marked as killed.
+  MO.setIsKill(false);
+  bool AllDead = true;
+  const unsigned SuperReg = MO.getReg();
+  for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+       *Subreg; ++Subreg) {
+    if (KillIndices[*Subreg] != ~0u) {
+      MI->addOperand(MachineOperand::CreateReg(*Subreg,
+                                               true  /*IsDef*/,
+                                               true  /*IsImp*/,
+                                               false /*IsKill*/,
+                                               false /*IsDead*/));
+      AllDead = false;
+    }
+  }
+
+  if(AllDead)
+    MO.setIsKill(true);
+  return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+  std::set<unsigned> killedRegs;
+  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+  StartBlockForKills(MBB);
+
+  // Examine block from end to start...
+  unsigned Count = MBB->size();
+  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
+
+    // Update liveness.  Registers that are defed but not used in this
+    // instruction are now dead. Mark register and all subregs as they
+    // are completely defined.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      if (!MO.isDef()) continue;
+      // Ignore two-addr defs.
+      if (MI->isRegTiedToUseOperand(i)) continue;
+
+      KillIndices[Reg] = ~0u;
+
+      // Repeat for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = ~0u;
+      }
+    }
+
+    // Examine all used registers and set/clear kill flag. When a
+    // register is used multiple times we only set the kill flag on
+    // the first use.
+    killedRegs.clear();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      bool kill = false;
+      if (killedRegs.find(Reg) == killedRegs.end()) {
+        kill = true;
+        // A register is not killed if any subregs are live...
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          if (KillIndices[*Subreg] != ~0u) {
+            kill = false;
+            break;
+          }
+        }
+
+        // If subreg is not live, then register is killed if it became
+        // live in this instruction
+        if (kill)
+          kill = (KillIndices[Reg] == ~0u);
+      }
+
+      if (MO.isKill() != kill) {
+        DEBUG(dbgs() << "Fixing " << MO << " in ");
+        // Warning: ToggleKillFlag may invalidate MO.
+        ToggleKillFlag(MI, MO);
+        DEBUG(MI->dump());
+      }
+
+      killedRegs.insert(Reg);
+    }
+
+    // Mark any used register (that is not using undef) and subregs as
+    // now live...
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      KillIndices[Reg] = Count;
+
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = Count;
+      }
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  // Standard scheduler algorithms will recompute the depth of the successor
+  // here as such:
+  //   SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+  //
+  // However, we lazily compute node depth instead. Note that
+  // ScheduleNodeTopDown has already updated the depth of this node which causes
+  // all descendents to be marked dirty. Setting the successor depth explicitly
+  // here would cause depth to be recomputed for all its ancestors. If the
+  // successor is not yet ready (because of a transitively redundant edge) then
+  // this causes depth computation to be quadratic in the size of the DAG.
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    ReleaseSucc(SU, &*I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  Sequence.push_back(SU);
+  assert(CurCycle >= SU->getDepth() &&
+         "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue.ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // We're scheduling top-down but we're visiting the regions in
+  // bottom-up order, so we don't know the hazards at the start of a
+  // region. So assume no hazards (this should usually be ok as most
+  // blocks are a single region).
+  HazardRec->Reset();
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // Add all leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    bool available = SUnits[i].Preds.empty();
+    if (available) {
+      AvailableQueue.push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // In any cycle where we can't schedule any instructions, we must
+  // stall or emit a noop, depending on the target.
+  bool CycleHasInsts = false;
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    unsigned MinDepth = ~0u;
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->getDepth() <= CurCycle) {
+        AvailableQueue.push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else if (PendingQueue[i]->getDepth() < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth();
+    }
+
+    DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+
+    SUnit *FoundSUnit = 0;
+    bool HasNoopHazards = false;
+    while (!AvailableQueue.empty()) {
+      SUnit *CurSUnit = AvailableQueue.pop();
+
+      ScheduleHazardRecognizer::HazardType HT =
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+      if (HT == ScheduleHazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+      NotReady.push_back(CurSUnit);
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue.push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule...
+    if (FoundSUnit) {
+      // ... schedule the node...
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundSUnit);
+      CycleHasInsts = true;
+      if (HazardRec->atIssueLimit()) {
+        DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+        ++CurCycle;
+        CycleHasInsts = false;
+      }
+    } else {
+      if (CycleHasInsts) {
+        DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+      } else if (!HasNoopHazards) {
+        // Otherwise, we have a pipeline stall, but no other problem,
+        // just advance the current cycle and try again.
+        DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+        ++NumStalls;
+      } else {
+        // Otherwise, we have no instructions to issue and we have instructions
+        // that will fault if we don't do this right.  This is the case for
+        // processors without pipeline interlocks and other cases.
+        DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+        HazardRec->EmitNoop();
+        Sequence.push_back(0);   // NULL here means noop
+        ++NumNoops;
+      }
+
+      ++CurCycle;
+      CycleHasInsts = false;
+    }
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
+  return new PostRAScheduler(OptLevel);
+}
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 000000000000..c04d65637c94
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,295 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+
+using namespace llvm;
+
+char ProcessImplicitDefs::ID = 0;
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved<LiveVariables>();
+  AU.addRequired<LiveVariables>();
+  AU.addPreservedID(MachineLoopInfoID);
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  AU.addPreservedID(PHIEliminationID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool
+ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+                                            unsigned Reg, unsigned OpIdx,
+                                            SmallSet<unsigned, 8> &ImpDefRegs) {
+  switch(OpIdx) {
+  case 1:
+    return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+                            ImpDefRegs.count(MI->getOperand(0).getReg()));
+  case 2:
+    return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+                                  ImpDefRegs.count(MI->getOperand(0).getReg()));
+  default: return false;
+  }
+}
+
+static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
+                        SmallSet<unsigned, 8> &ImpDefRegs) {
+  if (MI->isCopy()) {
+    MachineOperand &MO0 = MI->getOperand(0);
+    MachineOperand &MO1 = MI->getOperand(1);
+    if (MO1.getReg() != Reg)
+      return false;
+    if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+      return true;
+    return false;
+  }
+  return false;
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
+/// there is one implicit_def for each use. Add isUndef marker to
+/// implicit_def defs and their uses.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
+
+  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+               << "********** Function: "
+               << ((Value*)fn.getFunction())->getName() << '\n');
+
+  bool Changed = false;
+
+  TII = fn.getTarget().getInstrInfo();
+  TRI = fn.getTarget().getRegisterInfo();
+  MRI = &fn.getRegInfo();
+  LV = &getAnalysis<LiveVariables>();
+
+  SmallSet<unsigned, 8> ImpDefRegs;
+  SmallVector<MachineInstr*, 8> ImpDefMIs;
+  SmallVector<MachineInstr*, 4> RUses;
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  SmallPtrSet<MachineInstr*, 8> ModInsts;
+
+  MachineBasicBlock *Entry = fn.begin();
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MachineBasicBlock *MBB = *DFI;
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ) {
+      MachineInstr *MI = &*I;
+      ++I;
+      if (MI->isImplicitDef()) {
+        if (MI->getOperand(0).getSubReg())
+          continue;
+        unsigned Reg = MI->getOperand(0).getReg();
+        ImpDefRegs.insert(Reg);
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+            ImpDefRegs.insert(*SS);
+        }
+        ImpDefMIs.push_back(MI);
+        continue;
+      }
+
+      // Eliminate %reg1032:sub<def> = COPY undef.
+      if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+        MachineOperand &MO = MI->getOperand(1);
+        if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
+          if (MO.isKill()) {
+            LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
+            vi.removeKill(MI);
+          }
+          MI->eraseFromParent();
+          Changed = true;
+          continue;
+        }
+      }
+
+      bool ChangedToImpDef = false;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand& MO = MI->getOperand(i);
+        if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        if (!ImpDefRegs.count(Reg))
+          continue;
+        // Use is a copy, just turn it into an implicit_def.
+        if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
+          bool isKill = MO.isKill();
+          MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+            MI->RemoveOperand(j);
+          if (isKill) {
+            ImpDefRegs.erase(Reg);
+            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+            vi.removeKill(MI);
+          }
+          ChangedToImpDef = true;
+          Changed = true;
+          break;
+        }
+
+        Changed = true;
+        MO.setIsUndef();
+        // This is a partial register redef of an implicit def.
+        // Make sure the whole register is defined by the instruction.
+        if (MO.isDef()) {
+          MI->addRegisterDefined(Reg);
+          continue;
+        }
+        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+          // Make sure other uses of 
+          for (unsigned j = i+1; j != e; ++j) {
+            MachineOperand &MOJ = MI->getOperand(j);
+            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+              MOJ.setIsUndef();
+          }
+          ImpDefRegs.erase(Reg);
+        }
+      }
+
+      if (ChangedToImpDef) {
+        // Backtrack to process this new implicit_def.
+        --I;
+      } else {
+        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+          MachineOperand& MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          ImpDefRegs.erase(MO.getReg());
+        }
+      }
+    }
+
+    // Any outstanding liveout implicit_def's?
+    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
+      MachineInstr *MI = ImpDefMIs[i];
+      unsigned Reg = MI->getOperand(0).getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+          !ImpDefRegs.count(Reg)) {
+        // Delete all "local" implicit_def's. That include those which define
+        // physical registers since they cannot be liveout.
+        MI->eraseFromParent();
+        Changed = true;
+        continue;
+      }
+
+      // If there are multiple defs of the same register and at least one
+      // is not an implicit_def, do not insert implicit_def's before the
+      // uses.
+      bool Skip = false;
+      SmallVector<MachineInstr*, 4> DeadImpDefs;
+      for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+             DE = MRI->def_end(); DI != DE; ++DI) {
+        MachineInstr *DeadImpDef = &*DI;
+        if (!DeadImpDef->isImplicitDef()) {
+          Skip = true;
+          break;
+        }
+        DeadImpDefs.push_back(DeadImpDef);
+      }
+      if (Skip)
+        continue;
+
+      // The only implicit_def which we want to keep are those that are live
+      // out of its block.
+      for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
+        DeadImpDefs[j]->eraseFromParent();
+      Changed = true;
+
+      // Process each use instruction once.
+      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+             UE = MRI->use_end(); UI != UE; ++UI) {
+        if (UI.getOperand().isUndef())
+          continue;
+        MachineInstr *RMI = &*UI;
+        if (ModInsts.insert(RMI))
+          RUses.push_back(RMI);
+      }
+
+      for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
+        MachineInstr *RMI = RUses[i];
+
+        // Turn a copy use into an implicit_def.
+        if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
+          RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+
+          bool isKill = false;
+          SmallVector<unsigned, 4> Ops;
+          for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+            MachineOperand &RRMO = RMI->getOperand(j);
+            if (RRMO.isReg() && RRMO.getReg() == Reg) {
+              Ops.push_back(j);
+              if (RRMO.isKill())
+                isKill = true;
+            }
+          }
+          // Leave the other operands along.
+          for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
+            unsigned OpIdx = Ops[j];
+            RMI->RemoveOperand(OpIdx-j);
+          }
+
+          // Update LiveVariables varinfo if the instruction is a kill.
+          if (isKill) {
+            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+            vi.removeKill(RMI);
+          }
+          continue;
+        }
+
+        // Replace Reg with a new vreg that's marked implicit.
+        const TargetRegisterClass* RC = MRI->getRegClass(Reg);
+        unsigned NewVReg = MRI->createVirtualRegister(RC);
+        bool isKill = true;
+        for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+          MachineOperand &RRMO = RMI->getOperand(j);
+          if (RRMO.isReg() && RRMO.getReg() == Reg) {
+            RRMO.setReg(NewVReg);
+            RRMO.setIsUndef();
+            if (isKill) {
+              // Only the first operand of NewVReg is marked kill.
+              RRMO.setIsKill();
+              isKill = false;
+            }
+          }
+        }
+      }
+      RUses.clear();
+      ModInsts.clear();
+    }
+    ImpDefRegs.clear();
+    ImpDefMIs.clear();
+  }
+
+  return Changed;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 000000000000..a901c5fefa3e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,852 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation.  After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pei"
+#include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
+
+STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+  const Function* F = Fn.getFunction();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
+  RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+  // function's frame information. Also eliminates call frame pseudo
+  // instructions.
+  calculateCallsInformation(Fn);
+
+  // Allow the target machine to make some adjustments to the function
+  // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+  TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+  // Scan the function for modified callee saved registers and insert spill code
+  // for any callee saved registers that are modified.
+  calculateCalleeSavedRegisters(Fn);
+
+  // Determine placement of CSR spill/restore code:
+  //  - With shrink wrapping, place spills and restores to tightly
+  //    enclose regions in the Machine CFG of the function where
+  //    they are used.
+  //  - Without shink wrapping (default), place all spills in the
+  //    entry block, all restores in return blocks.
+  placeCSRSpillsAndRestores(Fn);
+
+  // Add the code to save and restore the callee saved registers
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertCSRSpillsAndRestores(Fn);
+
+  // Allow the target machine to make final modifications to the function
+  // before the frame layout is finalized.
+  TFI->processFunctionBeforeFrameFinalized(Fn);
+
+  // Calculate actual frame offsets for all abstract stack objects...
+  calculateFrameObjectOffsets(Fn);
+
+  // Add prolog and epilog code to the function.  This function is required
+  // to align the stack frame as necessary for any stack variables or
+  // called functions.  Because of this, calculateCalleeSavedRegisters()
+  // must be called before this function in order to set the AdjustsStack
+  // and MaxCallFrameSize variables.
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertPrologEpilogCode(Fn);
+
+  // Replace all MO_FrameIndex operands with physical register references
+  // and actual offsets.
+  //
+  replaceFrameIndices(Fn);
+
+  // If register scavenging is needed, as we've enabled doing it as a
+  // post-pass, scavenge the virtual registers that frame index elimiation
+  // inserted.
+  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+    scavengeFrameVirtualRegs(Fn);
+
+  delete RS;
+  clearAllSets();
+  return true;
+}
+
+#if 0
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  if (ShrinkWrapping || ShrinkWrapFunc != "") {
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<MachineDominatorTree>();
+  }
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+#endif
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  unsigned MaxCallFrameSize = 0;
+  bool AdjustsStack = MFI->adjustsStack();
+
+  // Get the function call frame set-up and tear-down instruction opcode
+  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+  // Early exit for targets which have no call frame setup/destroy pseudo
+  // instructions.
+  if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+    return;
+
+  std::vector<MachineBasicBlock::iterator> FrameSDOps;
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+      if (I->getOpcode() == FrameSetupOpcode ||
+          I->getOpcode() == FrameDestroyOpcode) {
+        assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+               " instructions should have a single immediate argument!");
+        unsigned Size = I->getOperand(0).getImm();
+        if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+        AdjustsStack = true;
+        FrameSDOps.push_back(I);
+      } else if (I->isInlineAsm()) {
+        // Some inline asm's need a stack frame, as indicated by operand 1.
+        unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+        if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+          AdjustsStack = true;
+      }
+
+  MFI->setAdjustsStack(AdjustsStack);
+  MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+  for (std::vector<MachineBasicBlock::iterator>::iterator
+         i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+    MachineBasicBlock::iterator I = *i;
+
+    // If call frames are not being included as part of the stack frame, and
+    // the target doesn't indicate otherwise, remove the call frame pseudos
+    // here. The sub/add sp instruction pairs are still inserted, but we don't
+    // need to track the SP adjustment for frame index elimination.
+    if (TFI->canSimplifyCallFramePseudos(Fn))
+      RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+  }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  // Get the callee saved register list...
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+  // These are used to keep track the callee-save area. Initialize them.
+  MinCSFrameIndex = INT_MAX;
+  MaxCSFrameIndex = 0;
+
+  // Early exit for targets which have no callee saved registers.
+  if (CSRegs == 0 || CSRegs[0] == 0)
+    return;
+
+  // In Naked functions we aren't going to save any registers.
+  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+    return;
+
+  std::vector<CalleeSavedInfo> CSI;
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+      // If the reg is modified, save it!
+      CSI.push_back(CalleeSavedInfo(Reg));
+    } else {
+      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+           *AliasSet; ++AliasSet) {  // Check alias registers too.
+        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
+          CSI.push_back(CalleeSavedInfo(Reg));
+          break;
+        }
+      }
+    }
+  }
+
+  if (CSI.empty())
+    return;   // Early exit if no callee saved registers are modified!
+
+  unsigned NumFixedSpillSlots;
+  const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+  // Now that we know which registers need to be saved and restored, allocate
+  // stack slots for them.
+  for (std::vector<CalleeSavedInfo>::iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    unsigned Reg = I->getReg();
+    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+    int FrameIdx;
+    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+      I->setFrameIdx(FrameIdx);
+      continue;
+    }
+
+    // Check to see if this physreg must be spilled to a particular stack slot
+    // on this target.
+    const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+           FixedSlot->Reg != Reg)
+      ++FixedSlot;
+
+    if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+      // Nope, just spill it anywhere convenient.
+      unsigned Align = RC->getAlignment();
+      unsigned StackAlign = TFI->getStackAlignment();
+
+      // We may not be able to satisfy the desired alignment specification of
+      // the TargetRegisterClass if the stack alignment is smaller. Use the
+      // min.
+      Align = std::min(Align, StackAlign);
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+    } else {
+      // Spill it to the stack where we must.
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
+    }
+
+    I->setFrameIdx(FrameIdx);
+  }
+
+  MFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+  // Get callee saved register information.
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  MFI->setCalleeSavedInfoValid(true);
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty())
+    return;
+
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  MachineBasicBlock::iterator I;
+
+  if (! ShrinkWrapThisFunction) {
+    // Spill using target interface.
+    I = EntryBlock->begin();
+    if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+        // Add the callee-saved register as live-in.
+        // It's killed at the spill.
+        EntryBlock->addLiveIn(CSI[i].getReg());
+
+        // Insert the spill to the stack frame.
+        unsigned Reg = CSI[i].getReg();
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
+                                CSI[i].getFrameIdx(), RC, TRI);
+      }
+    }
+
+    // Restore using target interface.
+    for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+      MachineBasicBlock* MBB = ReturnBlocks[ri];
+      I = MBB->end(); --I;
+
+      // Skip over all terminator instructions, which are part of the return
+      // sequence.
+      MachineBasicBlock::iterator I2 = I;
+      while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+        I = I2;
+
+      bool AtStart = I == MBB->begin();
+      MachineBasicBlock::iterator BeforeI = I;
+      if (!AtStart)
+        --BeforeI;
+
+      // Restore all registers immediately before the return and any
+      // terminators that precede it.
+      if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+          unsigned Reg = CSI[i].getReg();
+          const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+          TII.loadRegFromStackSlot(*MBB, I, Reg,
+                                   CSI[i].getFrameIdx(),
+                                   RC, TRI);
+          assert(I != MBB->begin() &&
+                 "loadRegFromStackSlot didn't insert any code!");
+          // Insert in reverse order.  loadRegFromStackSlot can insert
+          // multiple instructions.
+          if (AtStart)
+            I = MBB->begin();
+          else {
+            I = BeforeI;
+            ++I;
+          }
+        }
+      }
+    }
+    return;
+  }
+
+  // Insert spills.
+  std::vector<CalleeSavedInfo> blockCSI;
+  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+         BE = CSRSave.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet save = BI->second;
+
+    if (save.empty())
+      continue;
+
+    blockCSI.clear();
+    for (CSRegSet::iterator RI = save.begin(),
+           RE = save.end(); RI != RE; ++RI) {
+      blockCSI.push_back(CSI[*RI]);
+    }
+    assert(blockCSI.size() > 0 &&
+           "Could not collect callee saved register info");
+
+    I = MBB->begin();
+
+    // When shrink wrapping, use stack slot stores/loads.
+    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+      // Add the callee-saved register as live-in.
+      // It's killed at the spill.
+      MBB->addLiveIn(blockCSI[i].getReg());
+
+      // Insert the spill to the stack frame.
+      unsigned Reg = blockCSI[i].getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(*MBB, I, Reg,
+                              true,
+                              blockCSI[i].getFrameIdx(),
+                              RC, TRI);
+    }
+  }
+
+  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+         BE = CSRRestore.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet restore = BI->second;
+
+    if (restore.empty())
+      continue;
+
+    blockCSI.clear();
+    for (CSRegSet::iterator RI = restore.begin(),
+           RE = restore.end(); RI != RE; ++RI) {
+      blockCSI.push_back(CSI[*RI]);
+    }
+    assert(blockCSI.size() > 0 &&
+           "Could not find callee saved register info");
+
+    // If MBB is empty and needs restores, insert at the _beginning_.
+    if (MBB->empty()) {
+      I = MBB->begin();
+    } else {
+      I = MBB->end();
+      --I;
+
+      // Skip over all terminator instructions, which are part of the
+      // return sequence.
+      if (! I->getDesc().isTerminator()) {
+        ++I;
+      } else {
+        MachineBasicBlock::iterator I2 = I;
+        while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+          I = I2;
+      }
+    }
+
+    bool AtStart = I == MBB->begin();
+    MachineBasicBlock::iterator BeforeI = I;
+    if (!AtStart)
+      --BeforeI;
+
+    // Restore all registers immediately before the return and any
+    // terminators that precede it.
+    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+      unsigned Reg = blockCSI[i].getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(*MBB, I, Reg,
+                               blockCSI[i].getFrameIdx(),
+                               RC, TRI);
+      assert(I != MBB->begin() &&
+             "loadRegFromStackSlot didn't insert any code!");
+      // Insert in reverse order.  loadRegFromStackSlot can insert
+      // multiple instructions.
+      if (AtStart)
+        I = MBB->begin();
+      else {
+        I = BeforeI;
+        ++I;
+      }
+    }
+  }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+                  bool StackGrowsDown, int64_t &Offset,
+                  unsigned &MaxAlign) {
+  // If the stack grows down, add the object size to find the lowest address.
+  if (StackGrowsDown)
+    Offset += MFI->getObjectSize(FrameIdx);
+
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+  // If the alignment of this object is greater than that of the stack, then
+  // increase the stack alignment to match.
+  MaxAlign = std::max(MaxAlign, Align);
+
+  // Adjust to alignment boundary.
+  Offset = (Offset + Align - 1) / Align * Align;
+
+  if (StackGrowsDown) {
+    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+  } else {
+    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+    MFI->setObjectOffset(FrameIdx, Offset);
+    Offset += MFI->getObjectSize(FrameIdx);
+  }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+  // Loop over all of the stack objects, assigning sequential addresses...
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  // Start at the beginning of the local area.
+  // The Offset is the distance from the stack top in the direction
+  // of stack growth -- so it's always nonnegative.
+  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+  if (StackGrowsDown)
+    LocalAreaOffset = -LocalAreaOffset;
+  assert(LocalAreaOffset >= 0
+         && "Local area offset should be in direction of stack growth");
+  int64_t Offset = LocalAreaOffset;
+
+  // If there are fixed sized objects that are preallocated in the local area,
+  // non-fixed objects can't be allocated right at the start of local area.
+  // We currently don't support filling in holes in between fixed sized
+  // objects, so we adjust 'Offset' to point to the end of last fixed sized
+  // preallocated object.
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int64_t FixedOff;
+    if (StackGrowsDown) {
+      // The maximum distance from the stack pointer is at lower address of
+      // the object -- which is given by offset. For down growing stack
+      // the offset is negative, so we negate the offset to get the distance.
+      FixedOff = -MFI->getObjectOffset(i);
+    } else {
+      // The maximum distance from the start pointer is at the upper
+      // address of the object.
+      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+    }
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+
+  // First assign frame offsets to stack objects that are used to spill
+  // callee saved registers.
+  if (StackGrowsDown) {
+    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+      // If the stack grows down, we need to add the size to find the lowest
+      // address of the object.
+      Offset += MFI->getObjectSize(i);
+
+      unsigned Align = MFI->getObjectAlignment(i);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
+    }
+  } else {
+    int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+    for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+      unsigned Align = MFI->getObjectAlignment(i);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      MFI->setObjectOffset(i, Offset);
+      Offset += MFI->getObjectSize(i);
+    }
+  }
+
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // frame pointer if a frame pointer is required.
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
+      !RegInfo->needsStackRealignment(Fn)) {
+    int SFI = RS->getScavengingFrameIndex();
+    if (SFI >= 0)
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+  }
+
+  // FIXME: Once this is working, then enable flag will change to a target
+  // check for whether the frame is large enough to want to use virtual
+  // frame index registers. Functions which don't want/need this optimization
+  // will continue to use the existing code path.
+  if (MFI->getUseLocalStackAllocationBlock()) {
+    unsigned Align = MFI->getLocalFrameMaxAlign();
+
+    // Adjust to alignment boundary.
+    Offset = (Offset + Align - 1) / Align * Align;
+
+    DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+    // Resolve offsets for objects in the local block.
+    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+      int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+      DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+            FIOffset << "]\n");
+      MFI->setObjectOffset(Entry.first, FIOffset);
+    }
+    // Allocate the local block
+    Offset += MFI->getLocalFrameSize();
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  // Make sure that the stack protector comes before the local variables on the
+  // stack.
+  SmallSet<int, 16> LargeStackObjs;
+  if (MFI->getStackProtectorIndex() >= 0) {
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+                      Offset, MaxAlign);
+
+    // Assign large stack objects first.
+    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+      if (MFI->isObjectPreAllocated(i) &&
+          MFI->getUseLocalStackAllocationBlock())
+        continue;
+      if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+        continue;
+      if (RS && (int)i == RS->getScavengingFrameIndex())
+        continue;
+      if (MFI->isDeadObjectIndex(i))
+        continue;
+      if (MFI->getStackProtectorIndex() == (int)i)
+        continue;
+      if (!MFI->MayNeedStackProtector(i))
+        continue;
+
+      AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+      LargeStackObjs.insert(i);
+    }
+  }
+
+  // Then assign frame offsets to stack objects that are not used to spill
+  // callee saved registers.
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isObjectPreAllocated(i) &&
+        MFI->getUseLocalStackAllocationBlock())
+      continue;
+    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+      continue;
+    if (RS && (int)i == RS->getScavengingFrameIndex())
+      continue;
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    if (MFI->getStackProtectorIndex() == (int)i)
+      continue;
+    if (LargeStackObjs.count(i))
+      continue;
+
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+  }
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // stack pointer.
+  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
+             !RegInfo->useFPForScavengingIndex(Fn))) {
+    int SFI = RS->getScavengingFrameIndex();
+    if (SFI >= 0)
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+  }
+
+  if (!TFI.targetHandlesStackFrameRounding()) {
+    // If we have reserved argument space for call sites in the function
+    // immediately on entry to the current function, count it as part of the
+    // overall stack size.
+    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+      Offset += MFI->getMaxCallFrameSize();
+
+    // Round up the size to a multiple of the alignment.  If the function has
+    // any calls or alloca's, align to the target's StackAlignment value to
+    // ensure that the callee's frame or the alloca data is suitably aligned;
+    // otherwise, for leaf functions, align to the TransientStackAlignment
+    // value.
+    unsigned StackAlign;
+    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+      StackAlign = TFI.getStackAlignment();
+    else
+      StackAlign = TFI.getTransientStackAlignment();
+
+    // If the frame pointer is eliminated, all frame offsets will be relative to
+    // SP not FP. Align to MaxAlign so this works.
+    StackAlign = std::max(StackAlign, MaxAlign);
+    unsigned AlignMask = StackAlign - 1;
+    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+  }
+
+  // Update frame info to pretend that this is part of the stack...
+  MFI->setStackSize(Offset - LocalAreaOffset);
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+  // Add prologue to the function...
+  TFI.emitPrologue(Fn);
+
+  // Add epilogue to restore the callee-save registers in each exiting block
+  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+    // If last instruction is a return instruction, add an epilogue
+    if (!I->empty() && I->back().getDesc().isReturn())
+      TFI.emitEpilogue(Fn, *I);
+  }
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+  if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+  const TargetMachine &TM = Fn.getTarget();
+  assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+  bool StackGrowsDown =
+    TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+  for (MachineFunction::iterator BB = Fn.begin(),
+         E = Fn.end(); BB != E; ++BB) {
+#ifndef NDEBUG
+    int SPAdjCount = 0; // frame setup / destroy count.
+#endif
+    int SPAdj = 0;  // SP offset due to call frame setup / destroy.
+    if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+      if (I->getOpcode() == FrameSetupOpcode ||
+          I->getOpcode() == FrameDestroyOpcode) {
+#ifndef NDEBUG
+        // Track whether we see even pairs of them
+        SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
+#endif
+        // Remember how much SP has been adjusted to create the call
+        // frame.
+        int Size = I->getOperand(0).getImm();
+
+        if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+            (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+          Size = -Size;
+
+        SPAdj += Size;
+
+        MachineBasicBlock::iterator PrevI = BB->end();
+        if (I != BB->begin()) PrevI = prior(I);
+        TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+        // Visit the instructions created by eliminateCallFramePseudoInstr().
+        if (PrevI == BB->end())
+          I = BB->begin();     // The replaced instr was the first in the block.
+        else
+          I = llvm::next(PrevI);
+        continue;
+      }
+
+      MachineInstr *MI = I;
+      bool DoIncr = true;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+        if (MI->getOperand(i).isFI()) {
+          // Some instructions (e.g. inline asm instructions) can have
+          // multiple frame indices and/or cause eliminateFrameIndex
+          // to insert more than one instruction. We need the register
+          // scavenger to go through all of these instructions so that
+          // it can update its register information. We keep the
+          // iterator at the point before insertion so that we can
+          // revisit them in full.
+          bool AtBeginning = (I == BB->begin());
+          if (!AtBeginning) --I;
+
+          // If this instruction has a FrameIndex operand, we need to
+          // use that target machine register info object to eliminate
+          // it.
+          TRI.eliminateFrameIndex(MI, SPAdj,
+                                  FrameIndexVirtualScavenging ?  NULL : RS);
+
+          // Reset the iterator if we were at the beginning of the BB.
+          if (AtBeginning) {
+            I = BB->begin();
+            DoIncr = false;
+          }
+
+          MI = 0;
+          break;
+        }
+
+      if (DoIncr && I != BB->end()) ++I;
+
+      // Update register states.
+      if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+    }
+
+    // If we have evenly matched pairs of frame setup / destroy instructions,
+    // make sure the adjustments come out to zero. If we don't have matched
+    // pairs, we can't be sure the missing bit isn't in another basic block
+    // due to a custom inserter playing tricks, so just asserting SPAdj==0
+    // isn't sufficient. See tMOVCC on Thumb1, for example.
+    assert((SPAdjCount || SPAdj == 0) &&
+           "Unbalanced call frame setup / destroy pairs?");
+  }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+  // Run through the instructions and find any virtual registers.
+  for (MachineFunction::iterator BB = Fn.begin(),
+       E = Fn.end(); BB != E; ++BB) {
+    RS->enterBasicBlock(BB);
+
+    unsigned VirtReg = 0;
+    unsigned ScratchReg = 0;
+    int SPAdj = 0;
+
+    // The instruction stream may change in the loop, so check BB->end()
+    // directly.
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+      MachineInstr *MI = I;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        if (MI->getOperand(i).isReg()) {
+          MachineOperand &MO = MI->getOperand(i);
+          unsigned Reg = MO.getReg();
+          if (Reg == 0)
+            continue;
+          if (!TargetRegisterInfo::isVirtualRegister(Reg))
+            continue;
+
+          ++NumVirtualFrameRegs;
+
+          // Have we already allocated a scratch register for this virtual?
+          if (Reg != VirtReg) {
+            // When we first encounter a new virtual register, it
+            // must be a definition.
+            assert(MI->getOperand(i).isDef() &&
+                   "frame index virtual missing def!");
+            // Scavenge a new scratch register
+            VirtReg = Reg;
+            const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+            ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+            ++NumScavengedRegs;
+          }
+          // Replace this reference to the virtual register with the
+          // scratch register.
+          assert (ScratchReg && "Missing scratch register!");
+          MI->getOperand(i).setReg(ScratchReg);
+
+        }
+      }
+      RS->forward(I);
+      ++I;
+    }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 000000000000..e2391591ad06
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,177 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation.  After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class RegScavenger;
+  class MachineBasicBlock;
+
+  class PEI : public MachineFunctionPass {
+  public:
+    static char ID;
+    PEI() : MachineFunctionPass(ID) {
+      initializePEIPass(*PassRegistry::getPassRegistry());
+    }
+
+    const char *getPassName() const {
+      return "Prolog/Epilog Insertion & Frame Finalization";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+    /// frame indexes with appropriate references.
+    ///
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+  private:
+    RegScavenger *RS;
+
+    // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+    // stack frame indexes.
+    unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+    // Analysis info for spill/restore placement.
+    // "CSR": "callee saved register".
+
+    // CSRegSet contains indices into the Callee Saved Register Info
+    // vector built by calculateCalleeSavedRegisters() and accessed
+    // via MF.getFrameInfo()->getCalleeSavedInfo().
+    typedef SparseBitVector<> CSRegSet;
+
+    // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+    // saved register indices.
+    typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+    // Set and maps for computing CSR spill/restore placement:
+    //  used in function (UsedCSRegs)
+    //  used in a basic block (CSRUsed)
+    //  anticipatable in a basic block (Antic{In,Out})
+    //  available in a basic block (Avail{In,Out})
+    //  to be spilled at the entry to a basic block (CSRSave)
+    //  to be restored at the end of a basic block (CSRRestore)
+    CSRegSet UsedCSRegs;
+    CSRegBlockMap CSRUsed;
+    CSRegBlockMap AnticIn, AnticOut;
+    CSRegBlockMap AvailIn, AvailOut;
+    CSRegBlockMap CSRSave;
+    CSRegBlockMap CSRRestore;
+
+    // Entry and return blocks of the current function.
+    MachineBasicBlock* EntryBlock;
+    SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+    // Map of MBBs to top level MachineLoops.
+    DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+    // Flag to control shrink wrapping per-function:
+    // may choose to skip shrink wrapping for certain
+    // functions.
+    bool ShrinkWrapThisFunction;
+
+    // Flag to control whether to use the register scavenger to resolve
+    // frame index materialization registers. Set according to
+    // TRI->requiresFrameIndexScavenging() for the curren function.
+    bool FrameIndexVirtualScavenging;
+
+#ifndef NDEBUG
+    // Machine function handle.
+    MachineFunction* MF;
+
+    // Flag indicating that the current function
+    // has at least one "short" path in the machine
+    // CFG from the entry block to an exit block.
+    bool HasFastExitPath;
+#endif
+
+    bool calculateSets(MachineFunction &Fn);
+    bool calcAnticInOut(MachineBasicBlock* MBB);
+    bool calcAvailInOut(MachineBasicBlock* MBB);
+    void calculateAnticAvail(MachineFunction &Fn);
+    bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+                              SmallVector<MachineBasicBlock*, 4>& blks);
+    bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+    bool calcSpillPlacements(MachineBasicBlock* MBB,
+                             SmallVector<MachineBasicBlock*, 4> &blks,
+                             CSRegBlockMap &prevSpills);
+    bool calcRestorePlacements(MachineBasicBlock* MBB,
+                               SmallVector<MachineBasicBlock*, 4> &blks,
+                               CSRegBlockMap &prevRestores);
+    void placeSpillsAndRestores(MachineFunction &Fn);
+    void placeCSRSpillsAndRestores(MachineFunction &Fn);
+    void calculateCallsInformation(MachineFunction &Fn);
+    void calculateCalleeSavedRegisters(MachineFunction &Fn);
+    void insertCSRSpillsAndRestores(MachineFunction &Fn);
+    void calculateFrameObjectOffsets(MachineFunction &Fn);
+    void replaceFrameIndices(MachineFunction &Fn);
+    void scavengeFrameVirtualRegs(MachineFunction &Fn);
+    void insertPrologEpilogCode(MachineFunction &Fn);
+
+    // Initialize DFA sets, called before iterations.
+    void clearAnticAvailSets();
+    // Clear all sets constructed by shrink wrapping.
+    void clearAllSets();
+
+    // Initialize all shrink wrapping data.
+    void initShrinkWrappingInfo();
+
+    // Convienences for dealing with machine loops.
+    MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+    MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+    // Propgate CSRs used in MBB to all MBBs of loop LP.
+    void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+    // Convenience for recognizing return blocks.
+    bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+    // Debugging methods.
+
+    // Mark this function as having fast exit paths.
+    void findFastExitPath();
+
+    // Verify placement of spills/restores.
+    void verifySpillRestorePlacement();
+
+    std::string getBasicBlockName(const MachineBasicBlock* MBB);
+    std::string stringifyCSRegSet(const CSRegSet& s);
+    void dumpSet(const CSRegSet& s);
+    void dumpUsed(MachineBasicBlock* MBB);
+    void dumpAllUsed();
+    void dumpSets(MachineBasicBlock* MBB);
+    void dumpSets1(MachineBasicBlock* MBB);
+    void dumpAllSets();
+    void dumpSRSets();
+#endif
+
+  };
+} // End llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 000000000000..73b66d868f3d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,134 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include <map>
+using namespace llvm;
+
+namespace {
+struct PSVGlobalsTy {
+  // PseudoSourceValues are immutable so don't need locking.
+  const PseudoSourceValue PSVs[4];
+  sys::Mutex Lock;  // Guards FSValues, but not the values inside it.
+  std::map<int, const PseudoSourceValue *> FSValues;
+
+  PSVGlobalsTy() : PSVs() {}
+  ~PSVGlobalsTy() {
+    for (std::map<int, const PseudoSourceValue *>::iterator
+           I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+      delete I->second;
+    }
+  }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+}  // anonymous namespace
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &PSVGlobals->PSVs[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &PSVGlobals->PSVs[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &PSVGlobals->PSVs[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &PSVGlobals->PSVs[3]; }
+
+static const char *const PSVNames[] = {
+  "Stack",
+  "GOT",
+  "JumpTable",
+  "ConstantPool"
+};
+
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static.  For now, we can safely use the global context for the time being to
+// squeak by.
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
+  Value(Type::getInt8PtrTy(getGlobalContext()),
+        Subclass) {}
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+  O << PSVNames[this - PSVGlobals->PSVs];
+}
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+  PSVGlobalsTy &PG = *PSVGlobals;
+  sys::ScopedLock locked(PG.Lock);
+  const PseudoSourceValue *&V = PG.FSValues[FI];
+  if (!V)
+    V = new FixedStackPseudoSourceValue(FI);
+  return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+  if (this == getStack())
+    return false;
+  if (this == getGOT() ||
+      this == getConstantPool() ||
+      this == getJumpTable())
+    return true;
+  llvm_unreachable("Unknown PseudoSourceValue!");
+  return false;
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+  if (this == getStack() ||
+      this == getGOT() ||
+      this == getConstantPool() ||
+      this == getJumpTable())
+    return false;
+  llvm_unreachable("Unknown PseudoSourceValue!");
+  return true;
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+  if (this == getGOT() ||
+      this == getConstantPool() ||
+      this == getJumpTable())
+    return false;
+  return true;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+  return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+  // Negative frame indices are used for special things that don't
+  // appear in LLVM IR. Non-negative indices may be used for things
+  // like static allocas.
+  if (!MFI)
+    return FI >= 0;
+  // Spill slots should not alias others.
+  return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+  if (!MFI)
+    return true;
+  // Spill slots will not alias any LLVM IR value.
+  return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+  OS << "FixedStack" << FI;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..031642117efc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,193 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+#include "RegisterClassInfo.h"
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+  LiveIntervalUnion::Allocator UnionAllocator;
+
+  // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual
+  // registers may have changed.
+  unsigned UserTag;
+
+protected:
+  // Array of LiveIntervalUnions indexed by physical register.
+  class LiveUnionArray {
+    unsigned NumRegs;
+    LiveIntervalUnion *Array;
+  public:
+    LiveUnionArray(): NumRegs(0), Array(0) {}
+    ~LiveUnionArray() { clear(); }
+
+    unsigned numRegs() const { return NumRegs; }
+
+    void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+    void clear();
+
+    LiveIntervalUnion& operator[](unsigned PhysReg) {
+      assert(PhysReg <  NumRegs && "physReg out of bounds");
+      return Array[PhysReg];
+    }
+  };
+
+  const TargetRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  VirtRegMap *VRM;
+  LiveIntervals *LIS;
+  RegisterClassInfo RegClassInfo;
+  LiveUnionArray PhysReg2LiveUnion;
+
+  // Current queries, one per physreg. They must be reinitialized each time we
+  // query on a new live virtual register.
+  OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+  RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+  virtual ~RegAllocBase() {}
+
+  // A RegAlloc pass should call this before allocatePhysRegs.
+  void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+  // Get an initialized query to check interferences between lvr and preg.  Note
+  // that Query::init must be called at least once for each physical register
+  // before querying a new live virtual register. This ties Queries and
+  // PhysReg2LiveUnion together.
+  LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+    Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]);
+    return Queries[PhysReg];
+  }
+
+  // Invalidate all cached information about virtual registers - live ranges may
+  // have changed.
+  void invalidateVirtRegs() { ++UserTag; }
+
+  // The top-level driver. The output is a VirtRegMap that us updated with
+  // physical register assignments.
+  //
+  // If an implementation wants to override the LiveInterval comparator, we
+  // should modify this interface to allow passing in an instance derived from
+  // LiveVirtRegQueue.
+  void allocatePhysRegs();
+
+  // Get a temporary reference to a Spiller instance.
+  virtual Spiller &spiller() = 0;
+
+  /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+  virtual void enqueue(LiveInterval *LI) = 0;
+
+  /// dequeue - Return the next unassigned register, or NULL.
+  virtual LiveInterval *dequeue() = 0;
+
+  // A RegAlloc pass should override this to provide the allocation heuristics.
+  // Each call must guarantee forward progess by returning an available PhysReg
+  // or new set of split live virtual registers. It is up to the splitter to
+  // converge quickly toward fully spilled live ranges.
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+  // A RegAlloc pass should call this when PassManager releases its memory.
+  virtual void releaseMemory();
+
+  // Helper for checking interference between a live virtual register and a
+  // physical register, including all its register aliases. If an interference
+  // exists, return the interfering register, which may be preg or an alias.
+  unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+  /// assign - Assign VirtReg to PhysReg.
+  /// This should not be called from selectOrSplit for the current register.
+  void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+  /// This can be invoked from selectOrSplit, but be careful to guarantee that
+  /// allocation is making progress.
+  void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  // Helper for spilling all live virtual registers currently unified under preg
+  // that interfere with the most recently queried lvr.  Return true if spilling
+  // was successful, and append any new spilled/split intervals to splitLVRs.
+  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// addMBBLiveIns - Add physreg liveins to basic blocks.
+  void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+  // Verify each LiveIntervalUnion.
+  void verify();
+#endif
+
+  // Use this group name for NamedRegionTimer.
+  static const char *TimerGroupName;
+
+public:
+  /// VerifyEnabled - True when -verify-regalloc is given.
+  static bool VerifyEnabled;
+
+private:
+  void seedLiveRegs();
+
+  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+                SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..5ea26adc7644
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,587 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "LiveDebugVariables.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned     , "Number of registers assigned");
+STATISTIC(NumUnassigned   , "Number of registers unassigned");
+STATISTIC(NumNewQueued    , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+                                      createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+               cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+  struct CompSpillWeight {
+    bool operator()(LiveInterval *A, LiveInterval *B) const {
+      return A->weight < B->weight;
+    }
+  };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+  // context
+  MachineFunction *MF;
+
+  // analyses
+  LiveStacks *LS;
+  RenderMachineFunction *RMF;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+  std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+                      CompSpillWeight> Queue;
+public:
+  RABasic();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Basic Register Allocator";
+  }
+
+  /// RABasic analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory();
+
+  virtual Spiller &spiller() { return *SpillerInstance; }
+
+  virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+  virtual void enqueue(LiveInterval *LI) {
+    Queue.push(LI);
+  }
+
+  virtual LiveInterval *dequeue() {
+    if (Queue.empty())
+      return 0;
+    LiveInterval *LI = Queue.top();
+    Queue.pop();
+    return LI;
+  }
+
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequiredID(MachineDominatorsID);
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  DEBUG(AU.addRequired<RenderMachineFunction>());
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+  SpillerInstance.reset(0);
+  RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+  LiveVirtRegBitSet VisitedVRegs;
+  OwningArrayPtr<LiveVirtRegBitSet>
+    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+  // Verify disjoint unions.
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+    PhysReg2LiveUnion[PhysReg].verify(VRegs);
+    // Union + intersection test could be done efficiently in one pass, but
+    // don't add a method to SparseBitVector unless we really need it.
+    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+    VisitedVRegs |= VRegs;
+  }
+
+  // Verify vreg coverage.
+  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+       liItr != liEnd; ++liItr) {
+    unsigned reg = liItr->first;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+    if (!VRM->hasPhys(reg)) continue; // spilled?
+    unsigned PhysReg = VRM->getPhys(reg);
+    if (!unionVRegs[PhysReg].test(reg)) {
+      dbgs() << "LiveVirtReg " << reg << " not in union " <<
+        TRI->getName(PhysReg) << "\n";
+      llvm_unreachable("unallocated live vreg");
+    }
+  }
+  // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+//                         RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+                                        unsigned NRegs) {
+  NumRegs = NRegs;
+  Array =
+    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+  for (unsigned r = 0; r != NRegs; ++r)
+    new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+  TRI = &vrm.getTargetRegInfo();
+  MRI = &vrm.getRegInfo();
+  VRM = &vrm;
+  LIS = &lis;
+  RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+  const unsigned NumRegs = TRI->getNumRegs();
+  if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+    PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+    // Cache an interferece query for each physical reg
+    Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+  }
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+  if (!Array)
+    return;
+  for (unsigned r = 0; r != NumRegs; ++r)
+    Array[r].~LiveIntervalUnion();
+  free(Array);
+  NumRegs =  0;
+  Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+  for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+    PhysReg2LiveUnion[r].clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+    unsigned RegNum = I->first;
+    LiveInterval &VirtReg = *I->second;
+    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+      PhysReg2LiveUnion[RegNum].unify(VirtReg);
+    else
+      enqueue(&VirtReg);
+  }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+               << " to " << PrintReg(PhysReg, TRI) << '\n');
+  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  MRI->setPhysRegUsed(PhysReg);
+  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+  ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+               << " from " << PrintReg(PhysReg, TRI) << '\n');
+  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+  VRM->clearVirt(VirtReg.reg);
+  ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+  seedLiveRegs();
+
+  // Continue assigning vregs one at a time to available physical registers.
+  while (LiveInterval *VirtReg = dequeue()) {
+    assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+    // Unused registers can appear when the spiller coalesces snippets.
+    if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+      DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+      LIS->removeInterval(VirtReg->reg);
+      continue;
+    }
+
+    // Invalidate all interference queries, live ranges could have changed.
+    invalidateVirtRegs();
+
+    // selectOrSplit requests the allocator to return an available physical
+    // register if possible and populate a list of new live intervals that
+    // result from splitting.
+    DEBUG(dbgs() << "\nselectOrSplit "
+                 << MRI->getRegClass(VirtReg->reg)->getName()
+                 << ':' << *VirtReg << '\n');
+    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+    VirtRegVec SplitVRegs;
+    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+    if (AvailablePhysReg == ~0u) {
+      // selectOrSplit failed to find a register!
+      const char *Msg = "ran out of registers during register allocation";
+      // Probably caused by an inline asm.
+      MachineInstr *MI;
+      for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+           (MI = I.skipInstruction());)
+        if (MI->isInlineAsm())
+          break;
+      if (MI)
+        MI->emitError(Msg);
+      else
+        report_fatal_error(Msg);
+      // Keep going after reporting the error.
+      VRM->assignVirt2Phys(VirtReg->reg,
+                 RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+      continue;
+    }
+
+    if (AvailablePhysReg)
+      assign(*VirtReg, AvailablePhysReg);
+
+    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+         I != E; ++I) {
+      LiveInterval *SplitVirtReg = *I;
+      assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+      if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+        DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n');
+        LIS->removeInterval(SplitVirtReg->reg);
+        continue;
+      }
+      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+             "expect split value in virtual register");
+      enqueue(SplitVirtReg);
+      ++NumNewQueued;
+    }
+  }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+                                                unsigned PhysReg) {
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    if (query(VirtReg, *AliasI).checkInterference())
+      return *AliasI;
+  return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+                            SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+  assert(Q.seenAllInterferences() && "need collectInterferences()");
+  const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+  for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+         E = PendingSpills.end(); I != E; ++I) {
+    LiveInterval &SpilledVReg = **I;
+    DEBUG(dbgs() << "extracting from " <<
+          TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+    // Deallocate the interfering vreg by removing it from the union.
+    // A LiveInterval instance may not be in a union during modification!
+    unassign(SpilledVReg, PhysReg);
+
+    // Spill the extracted interval.
+    LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+    spiller().spill(LRE);
+  }
+  // After extracting segments, the query's results are invalid. But keep the
+  // contents valid until we're done accessing pendingSpills.
+  Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Record each interference and determine if all are spillable before mutating
+  // either the union or live intervals.
+  unsigned NumInterferences = 0;
+  // Collect interferences assigned to any alias of the physical register.
+  for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+    LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+    NumInterferences += QAlias.collectInterferingVRegs();
+    if (QAlias.seenUnspillableVReg()) {
+      return false;
+    }
+  }
+  DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+        " interferences with " << VirtReg << "\n");
+  assert(NumInterferences > 0 && "expect interference");
+
+  // Spill each interfering vreg allocated to PhysReg or an alias.
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    spillReg(VirtReg, *AliasI, SplitVRegs);
+  return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+  SlotIndexes *Indexes = LIS->getSlotIndexes();
+  if (MF->size() <= 1)
+    return;
+
+  LiveIntervalUnion::SegmentIter SI;
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+    if (LiveUnion.empty())
+      continue;
+    MachineFunction::iterator MBB = llvm::next(MF->begin());
+    MachineFunction::iterator MFE = MF->end();
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(MBB);
+    SI.setMap(LiveUnion.getMap());
+    SI.find(Start);
+    while (SI.valid()) {
+      if (SI.start() <= Start) {
+        if (!MBB->isLiveIn(PhysReg))
+          MBB->addLiveIn(PhysReg);
+      } else if (SI.start() > Stop)
+        MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+      if (++MBB == MFE)
+        break;
+      tie(Start, Stop) = Indexes->getMBBRange(MBB);
+      SI.advanceTo(Start);
+    }
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+                                SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Populate a list of physical register spill candidates.
+  SmallVector<unsigned, 8> PhysRegSpillCands;
+
+  // Check for an available register in this class.
+  ArrayRef<unsigned> Order =
+    RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg));
+  for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E;
+       ++I) {
+    unsigned PhysReg = *I;
+
+    // Check interference and as a side effect, intialize queries for this
+    // VirtReg and its aliases.
+    unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+    if (interfReg == 0) {
+      // Found an available register.
+      return PhysReg;
+    }
+    LiveInterval *interferingVirtReg =
+      Queries[interfReg].firstInterference().liveUnionPos().value();
+
+    // The current VirtReg must either be spillable, or one of its interferences
+    // must have less spill weight.
+    if (interferingVirtReg->weight < VirtReg.weight ) {
+      PhysRegSpillCands.push_back(PhysReg);
+    }
+  }
+  // Try to spill another interfering reg with less spill weight.
+  for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+         PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+    if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+    assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+           "Interference after spill.");
+    // Tell the caller to allocate to this newly freed physical register.
+    return *PhysRegI;
+  }
+
+  // No other spill candidates were found, so spill the current VirtReg.
+  DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+  if (!VirtReg.isSpillable())
+    return ~0u;
+  LiveRangeEdit LRE(VirtReg, SplitVRegs);
+  spiller().spill(LRE);
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+  SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+
+  allocatePhysRegs();
+
+  addMBBLiveIns(MF);
+
+  // Diagnostic output before rewriting
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+  // optional HTML output
+  DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+  // FIXME: Verification currently must run before VirtRegRewriter. We should
+  // make the rewriter a separate pass and override verifyAnalysis instead. When
+  // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+  if (VerifyEnabled) {
+    // Verify accuracy of LiveIntervals. The standard machine code verifier
+    // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+    // FIXME: MachineVerifier is badly broken when using the standard
+    // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+    // inline spiller, some tests fail to verify because the coalescer does not
+    // always generate verifiable code.
+    MF->verify(this, "In RABasic::verify");
+
+    // Verify that LiveIntervals are partitioned into unions and disjoint within
+    // the unions.
+    verify();
+  }
+#endif // !NDEBUG
+
+  // Run rewriter
+  VRM->rewrite(LIS->getSlotIndexes());
+
+  // Write out new DBG_VALUE instructions.
+  getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+  return new RABasic();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 000000000000..b36a445291b7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,1073 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
+
+static RegisterRegAlloc
+  fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+  class RAFast : public MachineFunctionPass {
+  public:
+    static char ID;
+    RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
+               isBulkSpilling(false) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
+  private:
+    const TargetMachine *TM;
+    MachineFunction *MF;
+    MachineRegisterInfo *MRI;
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+    RegisterClassInfo RegClassInfo;
+
+    // Basic block currently being allocated.
+    MachineBasicBlock *MBB;
+
+    // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+    // values are spilled.
+    IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+    // Everything we know about a live virtual register.
+    struct LiveReg {
+      MachineInstr *LastUse;    // Last instr to use reg.
+      unsigned PhysReg;         // Currently held here.
+      unsigned short LastOpNum; // OpNum on LastUse.
+      bool Dirty;               // Register needs spill.
+
+      LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
+                              Dirty(false) {}
+    };
+
+    typedef DenseMap<unsigned, LiveReg> LiveRegMap;
+    typedef LiveRegMap::value_type LiveRegEntry;
+
+    // LiveVirtRegs - This map contains entries for each virtual register
+    // that is currently available in a physical register.
+    LiveRegMap LiveVirtRegs;
+
+    DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
+
+    // RegState - Track the state of a physical register.
+    enum RegState {
+      // A disabled register is not available for allocation, but an alias may
+      // be in use. A register can only be moved out of the disabled state if
+      // all aliases are disabled.
+      regDisabled,
+
+      // A free register is not currently in use and can be allocated
+      // immediately without checking aliases.
+      regFree,
+
+      // A reserved register has been assigned explicitly (e.g., setting up a
+      // call parameter), and it remains reserved until it is used.
+      regReserved
+
+      // A register state may also be a virtual register number, indication that
+      // the physical register is currently allocated to a virtual register. In
+      // that case, LiveVirtRegs contains the inverse mapping.
+    };
+
+    // PhysRegState - One of the RegState enums, or a virtreg.
+    std::vector<unsigned> PhysRegState;
+
+    // UsedInInstr - BitVector of physregs that are used in the current
+    // instruction, and so cannot be allocated.
+    BitVector UsedInInstr;
+
+    // SkippedInstrs - Descriptors of instructions whose clobber list was
+    // ignored because all registers were spilled. It is still necessary to
+    // mark all the clobbered registers as used by the function.
+    SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
+
+    // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+    // completely after spilling all live registers. LiveRegMap entries should
+    // not be erased.
+    bool isBulkSpilling;
+
+    enum {
+      spillClean = 1,
+      spillDirty = 100,
+      spillImpossible = ~0u
+    };
+  public:
+    virtual const char *getPassName() const {
+      return "Fast Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequiredID(PHIEliminationID);
+      AU.addRequiredID(TwoAddressInstructionPassID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    bool runOnMachineFunction(MachineFunction &Fn);
+    void AllocateBasicBlock();
+    void handleThroughOperands(MachineInstr *MI,
+                               SmallVectorImpl<unsigned> &VirtDead);
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+    bool isLastUseOfLocalReg(MachineOperand&);
+
+    void addKillFlag(const LiveReg&);
+    void killVirtReg(LiveRegMap::iterator);
+    void killVirtReg(unsigned VirtReg);
+    void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+    void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+    void usePhysReg(MachineOperand&);
+    void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+    unsigned calcSpillCost(unsigned PhysReg) const;
+    void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
+    void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+    LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+                                       unsigned VirtReg, unsigned Hint);
+    LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+                                       unsigned VirtReg, unsigned Hint);
+    void spillAll(MachineInstr *MI);
+    bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+  };
+  char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+  // Find the location Reg would belong...
+  int SS = StackSlotForVirtReg[VirtReg];
+  if (SS != -1)
+    return SS;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                            RC->getAlignment());
+
+  // Assign the slot.
+  StackSlotForVirtReg[VirtReg] = FrameIdx;
+  return FrameIdx;
+}
+
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
+///
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+  // Check for non-debug uses or defs following MO.
+  // This is the most likely way to fail - fast path it.
+  MachineOperand *Next = &MO;
+  while ((Next = Next->getNextOperandForReg()))
+    if (!Next->isDebug())
+      return false;
+
+  // If the register has ever been spilled or reloaded, we conservatively assume
+  // it is a global register used in multiple blocks.
+  if (StackSlotForVirtReg[MO.getReg()] != -1)
+    return false;
+
+  // Check that the use/def chain has exactly one operand - MO.
+  return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+}
+
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+  if (!LR.LastUse) return;
+  MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+  if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+    if (MO.getReg() == LR.PhysReg)
+      MO.setIsKill();
+    else
+      LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+  }
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+  addKillFlag(LRI->second);
+  const LiveReg &LR = LRI->second;
+  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+  PhysRegState[LR.PhysReg] = regFree;
+  // Erase from LiveVirtRegs unless we're spilling in bulk.
+  if (!isBulkSpilling)
+    LiveVirtRegs.erase(LRI);
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "killVirtReg needs a virtual register");
+  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  if (LRI != LiveVirtRegs.end())
+    killVirtReg(LRI);
+}
+
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Spilling a physical register is illegal!");
+  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+  spillVirtReg(MI, LRI);
+}
+
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+                          LiveRegMap::iterator LRI) {
+  LiveReg &LR = LRI->second;
+  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+
+  if (LR.Dirty) {
+    // If this physreg is used by the instruction, we want to kill it on the
+    // instruction, not on the spill.
+    bool SpillKill = LR.LastUse != MI;
+    LR.Dirty = false;
+    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+                 << " in " << PrintReg(LR.PhysReg, TRI));
+    const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
+    int FI = getStackSpaceFor(LRI->first, RC);
+    DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+    TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+    ++NumStores;   // Update statistics
+
+    // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+    // identify spilled location as the place to find corresponding variable's
+    // value.
+    SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+    for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+      MachineInstr *DBG = LRIDbgValues[li];
+      const MDNode *MDPtr =
+        DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
+      int64_t Offset = 0;
+      if (DBG->getOperand(1).isImm())
+        Offset = DBG->getOperand(1).getImm();
+      DebugLoc DL;
+      if (MI == MBB->end()) {
+        // If MI is at basic block end then use last instruction's location.
+        MachineBasicBlock::iterator EI = MI;
+        DL = (--EI)->getDebugLoc();
+      }
+      else
+        DL = MI->getDebugLoc();
+      if (MachineInstr *NewDV =
+          TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
+        MachineBasicBlock *MBB = DBG->getParent();
+        MBB->insert(MI, NewDV);
+        DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+      }
+    }
+    // Now this register is spilled there is should not be any DBG_VALUE pointing
+    // to this register because they are all pointing to spilled value now.
+    LRIDbgValues.clear();
+    if (SpillKill)
+      LR.LastUse = 0; // Don't kill register again
+  }
+  killVirtReg(LRI);
+}
+
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineInstr *MI) {
+  if (LiveVirtRegs.empty()) return;
+  isBulkSpilling = true;
+  // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+  // of spilling here is deterministic, if arbitrary.
+  for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+       i != e; ++i)
+    spillVirtReg(MI, i);
+  LiveVirtRegs.clear();
+  isBulkSpilling = false;
+}
+
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+  unsigned PhysReg = MO.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+         "Bad usePhysReg operand");
+
+  switch (PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regReserved:
+    PhysRegState[PhysReg] = regFree;
+    // Fall through
+  case regFree:
+    UsedInInstr.set(PhysReg);
+    MO.setIsKill();
+    return;
+  default:
+    // The physreg was allocated to a virtual register. That means the value we
+    // wanted has been clobbered.
+    llvm_unreachable("Instruction uses an allocated register");
+  }
+
+  // Maybe a superregister is reserved?
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    switch (PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    case regReserved:
+      assert(TRI->isSuperRegister(PhysReg, Alias) &&
+             "Instruction is not using a subregister of a reserved register");
+      // Leave the superregister in the working set.
+      PhysRegState[Alias] = regFree;
+      UsedInInstr.set(Alias);
+      MO.getParent()->addRegisterKilled(Alias, TRI, true);
+      return;
+    case regFree:
+      if (TRI->isSuperRegister(PhysReg, Alias)) {
+        // Leave the superregister in the working set.
+        UsedInInstr.set(Alias);
+        MO.getParent()->addRegisterKilled(Alias, TRI, true);
+        return;
+      }
+      // Some other alias was in the working set - clear it.
+      PhysRegState[Alias] = regDisabled;
+      break;
+    default:
+      llvm_unreachable("Instruction uses an alias of an allocated register");
+    }
+  }
+
+  // All aliases are disabled, bring register into working set.
+  PhysRegState[PhysReg] = regFree;
+  UsedInInstr.set(PhysReg);
+  MO.setIsKill();
+}
+
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+                           RegState NewState) {
+  UsedInInstr.set(PhysReg);
+  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  default:
+    spillVirtReg(MI, VirtReg);
+    // Fall through.
+  case regFree:
+  case regReserved:
+    PhysRegState[PhysReg] = NewState;
+    return;
+  }
+
+  // This is a disabled register, disable all aliases.
+  PhysRegState[PhysReg] = NewState;
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    default:
+      spillVirtReg(MI, VirtReg);
+      // Fall through.
+    case regFree:
+    case regReserved:
+      PhysRegState[Alias] = regDisabled;
+      if (TRI->isSuperRegister(PhysReg, Alias))
+        return;
+      break;
+    }
+  }
+}
+
+
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+  if (UsedInInstr.test(PhysReg)) {
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
+    return spillImpossible;
+  }
+  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regFree:
+    return 0;
+  case regReserved:
+    DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+                 << PrintReg(PhysReg, TRI) << " is reserved already.\n");
+    return spillImpossible;
+  default:
+    return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+  }
+
+  // This is a disabled register, add up cost of aliases.
+  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
+  unsigned Cost = 0;
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    if (UsedInInstr.test(Alias))
+      return spillImpossible;
+    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    case regFree:
+      ++Cost;
+      break;
+    case regReserved:
+      return spillImpossible;
+    default:
+      Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+      break;
+    }
+  }
+  return Cost;
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
+  DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+               << PrintReg(PhysReg, TRI) << "\n");
+  PhysRegState[PhysReg] = LRE.first;
+  assert(!LRE.second.PhysReg && "Already assigned a physreg");
+  LRE.second.PhysReg = PhysReg;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
+  const unsigned VirtReg = LRE.first;
+
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Can only allocate virtual registers");
+
+  const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+  // Ignore invalid hints.
+  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+               !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
+    Hint = 0;
+
+  // Take hint when possible.
+  if (Hint) {
+    // Ignore the hint if we would have to spill a dirty register.
+    unsigned Cost = calcSpillCost(Hint);
+    if (Cost < spillDirty) {
+      if (Cost)
+        definePhysReg(MI, Hint, regFree);
+      return assignVirtToPhysReg(LRE, Hint);
+    }
+  }
+
+  ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC);
+
+  // First try to find a completely free register.
+  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+    unsigned PhysReg = *I;
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
+      return assignVirtToPhysReg(LRE, PhysReg);
+  }
+
+  DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+               << RC->getName() << "\n");
+
+  unsigned BestReg = 0, BestCost = spillImpossible;
+  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+    unsigned Cost = calcSpillCost(*I);
+    DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+    DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+    DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
+    // Cost is 0 when all aliases are already disabled.
+    if (Cost == 0)
+      return assignVirtToPhysReg(LRE, *I);
+    if (Cost < BestCost)
+      BestReg = *I, BestCost = Cost;
+  }
+
+  if (BestReg) {
+    definePhysReg(MI, BestReg, regFree);
+    return assignVirtToPhysReg(LRE, BestReg);
+  }
+
+  // Nothing we can do. Report an error and keep going with a bad allocation.
+  MI->emitError("ran out of registers during register allocation");
+  definePhysReg(MI, *AO.begin(), regFree);
+  assignVirtToPhysReg(LRE, *AO.begin());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator
+RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
+                      unsigned VirtReg, unsigned Hint) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Not a virtual register");
+  LiveRegMap::iterator LRI;
+  bool New;
+  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+  LiveReg &LR = LRI->second;
+  if (New) {
+    // If there is no hint, peek at the only use of this register.
+    if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+        MRI->hasOneNonDBGUse(VirtReg)) {
+      const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
+      // It's a copy, use the destination register as a hint.
+      if (UseMI.isCopyLike())
+        Hint = UseMI.getOperand(0).getReg();
+    }
+    allocVirtReg(MI, *LRI, Hint);
+  } else if (LR.LastUse) {
+    // Redefining a live register - kill at the last use, unless it is this
+    // instruction defining VirtReg multiple times.
+    if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
+      addKillFlag(LR);
+  }
+  assert(LR.PhysReg && "Register not assigned");
+  LR.LastUse = MI;
+  LR.LastOpNum = OpNum;
+  LR.Dirty = true;
+  UsedInInstr.set(LR.PhysReg);
+  return LRI;
+}
+
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator
+RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+                      unsigned VirtReg, unsigned Hint) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Not a virtual register");
+  LiveRegMap::iterator LRI;
+  bool New;
+  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+  LiveReg &LR = LRI->second;
+  MachineOperand &MO = MI->getOperand(OpNum);
+  if (New) {
+    allocVirtReg(MI, *LRI, Hint);
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+                 << PrintReg(LR.PhysReg, TRI) << "\n");
+    TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+    ++NumLoads;
+  } else if (LR.Dirty) {
+    if (isLastUseOfLocalReg(MO)) {
+      DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+      if (MO.isUse())
+        MO.setIsKill();
+      else
+        MO.setIsDead();
+    } else if (MO.isKill()) {
+      DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+      MO.setIsKill(false);
+    } else if (MO.isDead()) {
+      DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+      MO.setIsDead(false);
+    }
+  } else if (MO.isKill()) {
+    // We must remove kill flags from uses of reloaded registers because the
+    // register would be killed immediately, and there might be a second use:
+    //   %foo = OR %x<kill>, %x
+    // This would cause a second reload of %x into a different register.
+    DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+    MO.setIsKill(false);
+  } else if (MO.isDead()) {
+    DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+    MO.setIsDead(false);
+  }
+  assert(LR.PhysReg && "Register not assigned");
+  LR.LastUse = MI;
+  LR.LastOpNum = OpNum;
+  UsedInInstr.set(LR.PhysReg);
+  return LRI;
+}
+
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+  MachineOperand &MO = MI->getOperand(OpNum);
+  if (!MO.getSubReg()) {
+    MO.setReg(PhysReg);
+    return MO.isKill() || MO.isDead();
+  }
+
+  // Handle subregister index.
+  MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+  MO.setSubReg(0);
+
+  // A kill flag implies killing the full register. Add corresponding super
+  // register kill.
+  if (MO.isKill()) {
+    MI->addRegisterKilled(PhysReg, TRI, true);
+    return true;
+  }
+  return MO.isDead();
+}
+
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+                                   SmallVectorImpl<unsigned> &VirtDead) {
+  DEBUG(dbgs() << "Scanning for through registers:");
+  SmallSet<unsigned, 8> ThroughRegs;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+        (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+      if (ThroughRegs.insert(Reg))
+        DEBUG(dbgs() << ' ' << PrintReg(Reg));
+    }
+  }
+
+  // If any physreg defines collide with preallocated through registers,
+  // we must spill and reallocate.
+  DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    UsedInInstr.set(Reg);
+    if (ThroughRegs.count(PhysRegState[Reg]))
+      definePhysReg(MI, Reg, regFree);
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+      UsedInInstr.set(*AS);
+      if (ThroughRegs.count(PhysRegState[*AS]))
+        definePhysReg(MI, *AS, regFree);
+    }
+  }
+
+  SmallVector<unsigned, 8> PartialDefs;
+  DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+    if (MO.isUse()) {
+      unsigned DefIdx = 0;
+      if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+      DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+        << DefIdx << ".\n");
+      LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+      unsigned PhysReg = LRI->second.PhysReg;
+      setPhysReg(MI, i, PhysReg);
+      // Note: we don't update the def operand yet. That would cause the normal
+      // def-scan to attempt spilling.
+    } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+      DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+      // Reload the register, but don't assign to the operand just yet.
+      // That would confuse the later phys-def processing pass.
+      LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+      PartialDefs.push_back(LRI->second.PhysReg);
+    } else if (MO.isEarlyClobber()) {
+      // Note: defineVirtReg may invalidate MO.
+      LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+      unsigned PhysReg = LRI->second.PhysReg;
+      if (setPhysReg(MI, i, PhysReg))
+        VirtDead.push_back(Reg);
+    }
+  }
+
+  // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+  UsedInInstr.reset();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+                 << " as used in instr\n");
+    UsedInInstr.set(Reg);
+  }
+
+  // Also mark PartialDefs as used to avoid reallocation.
+  for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+    UsedInInstr.set(PartialDefs[i]);
+}
+
+void RAFast::AllocateBasicBlock() {
+  DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+  // FIXME: This should probably be added by instruction selection instead?
+  // If the last instruction in the block is a return, make sure to mark it as
+  // using all of the live-out values in the function.  Things marked both call
+  // and return are tail calls; do not do this for them.  The tail callee need
+  // not take the same registers as input that it produces as output, and there
+  // are dependencies for its input registers elsewhere.
+  if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+      !MBB->back().getDesc().isCall()) {
+    MachineInstr *Ret = &MBB->back();
+
+    for (MachineRegisterInfo::liveout_iterator
+         I = MF->getRegInfo().liveout_begin(),
+         E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+      assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+             "Cannot have a live-out virtual register.");
+
+      // Add live-out registers as implicit uses.
+      Ret->addRegisterKilled(*I, TRI, true);
+    }
+  }
+
+  PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+  assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+
+  MachineBasicBlock::iterator MII = MBB->begin();
+
+  // Add live-in registers as live.
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I)
+    if (RegClassInfo.isAllocatable(*I))
+      definePhysReg(MII, *I, regReserved);
+
+  SmallVector<unsigned, 8> VirtDead;
+  SmallVector<MachineInstr*, 32> Coalesced;
+
+  // Otherwise, sequentially allocate each instruction in the MBB.
+  while (MII != MBB->end()) {
+    MachineInstr *MI = MII++;
+    const MCInstrDesc &MCID = MI->getDesc();
+    DEBUG({
+        dbgs() << "\n>> " << *MI << "Regs:";
+        for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+          if (PhysRegState[Reg] == regDisabled) continue;
+          dbgs() << " " << TRI->getName(Reg);
+          switch(PhysRegState[Reg]) {
+          case regFree:
+            break;
+          case regReserved:
+            dbgs() << "*";
+            break;
+          default:
+            dbgs() << '=' << PrintReg(PhysRegState[Reg]);
+            if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+              dbgs() << "*";
+            assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
+                   "Bad inverse map");
+            break;
+          }
+        }
+        dbgs() << '\n';
+        // Check that LiveVirtRegs is the inverse.
+        for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+             e = LiveVirtRegs.end(); i != e; ++i) {
+           assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+                  "Bad map key");
+           assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+                  "Bad map value");
+           assert(PhysRegState[i->second.PhysReg] == i->first &&
+                  "Bad inverse map");
+        }
+      });
+
+    // Debug values are not allowed to change codegen in any way.
+    if (MI->isDebugValue()) {
+      bool ScanDbgValue = true;
+      while (ScanDbgValue) {
+        ScanDbgValue = false;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg()) continue;
+          unsigned Reg = MO.getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+          LiveDbgValueMap[Reg].push_back(MI);
+          LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+          if (LRI != LiveVirtRegs.end())
+            setPhysReg(MI, i, LRI->second.PhysReg);
+          else {
+            int SS = StackSlotForVirtReg[Reg];
+            if (SS == -1) {
+              // We can't allocate a physreg for a DebugValue, sorry!
+              DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+              MO.setReg(0);
+            }
+            else {
+              // Modify DBG_VALUE now that the value is in a spill slot.
+              int64_t Offset = MI->getOperand(1).getImm();
+              const MDNode *MDPtr =
+                MI->getOperand(MI->getNumOperands()-1).getMetadata();
+              DebugLoc DL = MI->getDebugLoc();
+              if (MachineInstr *NewDV =
+                  TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+                DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+                      "\t" << *MI);
+                MachineBasicBlock *MBB = MI->getParent();
+                MBB->insert(MBB->erase(MI), NewDV);
+                // Scan NewDV operands from the beginning.
+                MI = NewDV;
+                ScanDbgValue = true;
+                break;
+              } else {
+                // We can't allocate a physreg for a DebugValue; sorry!
+                DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+                MO.setReg(0);
+              }
+            }
+          }
+        }
+      }
+      // Next instruction.
+      continue;
+    }
+
+    // If this is a copy, we may be able to coalesce.
+    unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
+    if (MI->isCopy()) {
+      CopyDst = MI->getOperand(0).getReg();
+      CopySrc = MI->getOperand(1).getReg();
+      CopyDstSub = MI->getOperand(0).getSubReg();
+      CopySrcSub = MI->getOperand(1).getSubReg();
+    }
+
+    // Track registers used by instruction.
+    UsedInInstr.reset();
+
+    // First scan.
+    // Mark physreg uses and early clobbers as used.
+    // Find the end of the virtreg operands
+    unsigned VirtOpEnd = 0;
+    bool hasTiedOps = false;
+    bool hasEarlyClobbers = false;
+    bool hasPartialRedefs = false;
+    bool hasPhysDefs = false;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg) continue;
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        VirtOpEnd = i+1;
+        if (MO.isUse()) {
+          hasTiedOps = hasTiedOps ||
+                              MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+        } else {
+          if (MO.isEarlyClobber())
+            hasEarlyClobbers = true;
+          if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+            hasPartialRedefs = true;
+        }
+        continue;
+      }
+      if (!RegClassInfo.isAllocatable(Reg)) continue;
+      if (MO.isUse()) {
+        usePhysReg(MO);
+      } else if (MO.isEarlyClobber()) {
+        definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+                               regFree : regReserved);
+        hasEarlyClobbers = true;
+      } else
+        hasPhysDefs = true;
+    }
+
+    // The instruction may have virtual register operands that must be allocated
+    // the same register at use-time and def-time: early clobbers and tied
+    // operands. If there are also physical defs, these registers must avoid
+    // both physical defs and uses, making them more constrained than normal
+    // operands.
+    // Similarly, if there are multiple defs and tied operands, we must make
+    // sure the same register is allocated to uses and defs.
+    // We didn't detect inline asm tied operands above, so just make this extra
+    // pass for all inline asm.
+    if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+        (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+      handleThroughOperands(MI, VirtDead);
+      // Don't attempt coalescing when we have funny stuff going on.
+      CopyDst = 0;
+      // Pretend we have early clobbers so the use operands get marked below.
+      // This is not necessary for the common case of a single tied use.
+      hasEarlyClobbers = true;
+    }
+
+    // Second scan.
+    // Allocate virtreg uses.
+    for (unsigned i = 0; i != VirtOpEnd; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+      if (MO.isUse()) {
+        LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+        unsigned PhysReg = LRI->second.PhysReg;
+        CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+        if (setPhysReg(MI, i, PhysReg))
+          killVirtReg(LRI);
+      }
+    }
+
+    MRI->addPhysRegsUsed(UsedInInstr);
+
+    // Track registers defined by instruction - early clobbers and tied uses at
+    // this point.
+    UsedInInstr.reset();
+    if (hasEarlyClobbers) {
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+        // Look for physreg defs and tied uses.
+        if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+        UsedInInstr.set(Reg);
+        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+          UsedInInstr.set(*AS);
+      }
+    }
+
+    unsigned DefOpEnd = MI->getNumOperands();
+    if (MCID.isCall()) {
+      // Spill all virtregs before a call. This serves two purposes: 1. If an
+      // exception is thrown, the landing pad is going to expect to find
+      // registers in their spill slots, and 2. we don't have to wade through
+      // all the <imp-def> operands on the call instruction.
+      DefOpEnd = VirtOpEnd;
+      DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
+      spillAll(MI);
+
+      // The imp-defs are skipped below, but we still need to mark those
+      // registers as used by the function.
+      SkippedInstrs.insert(&MCID);
+    }
+
+    // Third scan.
+    // Allocate defs and collect dead defs.
+    for (unsigned i = 0; i != DefOpEnd; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+        continue;
+      unsigned Reg = MO.getReg();
+
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        if (!RegClassInfo.isAllocatable(Reg)) continue;
+        definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+                               regFree : regReserved);
+        continue;
+      }
+      LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+      unsigned PhysReg = LRI->second.PhysReg;
+      if (setPhysReg(MI, i, PhysReg)) {
+        VirtDead.push_back(Reg);
+        CopyDst = 0; // cancel coalescing;
+      } else
+        CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+    }
+
+    // Kill dead defs after the scan to ensure that multiple defs of the same
+    // register are allocated identically. We didn't need to do this for uses
+    // because we are crerating our own kill flags, and they are always at the
+    // last use.
+    for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+      killVirtReg(VirtDead[i]);
+    VirtDead.clear();
+
+    MRI->addPhysRegsUsed(UsedInInstr);
+
+    if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+      DEBUG(dbgs() << "-- coalescing: " << *MI);
+      Coalesced.push_back(MI);
+    } else {
+      DEBUG(dbgs() << "<< " << *MI);
+    }
+  }
+
+  // Spill all physical registers holding virtual registers now.
+  DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+  spillAll(MBB->getFirstTerminator());
+
+  // Erase all the coalesced copies. We are delaying it until now because
+  // LiveVirtRegs might refer to the instrs.
+  for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+    MBB->erase(Coalesced[i]);
+  NumCopies += Coalesced.size();
+
+  DEBUG(MBB->dump());
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+  DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)Fn.getFunction())->getName() << '\n');
+  MF = &Fn;
+  MRI = &MF->getRegInfo();
+  TM = &Fn.getTarget();
+  TRI = TM->getRegisterInfo();
+  TII = TM->getInstrInfo();
+  RegClassInfo.runOnMachineFunction(Fn);
+  UsedInInstr.resize(TRI->getNumRegs());
+
+  // initialize the virtual->physical register map to have a 'null'
+  // mapping for all virtual registers
+  StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+       MBBi != MBBe; ++MBBi) {
+    MBB = &*MBBi;
+    AllocateBasicBlock();
+  }
+
+  // Make sure the set of used physregs is closed under subreg operations.
+  MRI->closePhysRegsUsed(*TRI);
+
+  // Add the clobber lists for all the instructions we skipped earlier.
+  for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
+       I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
+    if (const unsigned *Defs = (*I)->getImplicitDefs())
+      while (*Defs)
+        MRI->setPhysRegUsed(*Defs++);
+
+  SkippedInstrs.clear();
+  StackSlotForVirtReg.clear();
+  LiveDbgValueMap.clear();
+  return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+  return new RAFast();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..e235e87b54f3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1429 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits,  "Number of split local live ranges");
+STATISTIC(NumEvicted,      "Number of interferences evicted");
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+                                       createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass,
+                 public RegAllocBase,
+                 private LiveRangeEdit::Delegate {
+
+  // context
+  MachineFunction *MF;
+
+  // analyses
+  SlotIndexes *Indexes;
+  LiveStacks *LS;
+  MachineDominatorTree *DomTree;
+  MachineLoopInfo *Loops;
+  EdgeBundles *Bundles;
+  SpillPlacement *SpillPlacer;
+  LiveDebugVariables *DebugVars;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+  std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+  unsigned NextCascade;
+
+  // Live ranges pass through a number of stages as we try to allocate them.
+  // Some of the stages may also create new live ranges:
+  //
+  // - Region splitting.
+  // - Per-block splitting.
+  // - Local splitting.
+  // - Spilling.
+  //
+  // Ranges produced by one of the stages skip the previous stages when they are
+  // dequeued. This improves performance because we can skip interference checks
+  // that are unlikely to give any results. It also guarantees that the live
+  // range splitting algorithm terminates, something that is otherwise hard to
+  // ensure.
+  enum LiveRangeStage {
+    RS_New,      ///< Never seen before.
+    RS_First,    ///< First time in the queue.
+    RS_Second,   ///< Second time in the queue.
+    RS_Global,   ///< Produced by global splitting.
+    RS_Local,    ///< Produced by local splitting.
+    RS_Spill     ///< Produced by spilling.
+  };
+
+  static const char *const StageName[];
+
+  // RegInfo - Keep additional information about each live range.
+  struct RegInfo {
+    LiveRangeStage Stage;
+
+    // Cascade - Eviction loop prevention. See canEvictInterference().
+    unsigned Cascade;
+
+    RegInfo() : Stage(RS_New), Cascade(0) {}
+  };
+
+  IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+
+  LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+    return ExtraRegInfo[VirtReg.reg].Stage;
+  }
+
+  void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+    ExtraRegInfo.resize(MRI->getNumVirtRegs());
+    ExtraRegInfo[VirtReg.reg].Stage = Stage;
+  }
+
+  template<typename Iterator>
+  void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+    ExtraRegInfo.resize(MRI->getNumVirtRegs());
+    for (;Begin != End; ++Begin) {
+      unsigned Reg = (*Begin)->reg;
+      if (ExtraRegInfo[Reg].Stage == RS_New)
+        ExtraRegInfo[Reg].Stage = NewStage;
+    }
+  }
+
+  /// Cost of evicting interference.
+  struct EvictionCost {
+    unsigned BrokenHints; ///< Total number of broken hints.
+    float MaxWeight;      ///< Maximum spill weight evicted.
+
+    EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+
+    bool operator<(const EvictionCost &O) const {
+      if (BrokenHints != O.BrokenHints)
+        return BrokenHints < O.BrokenHints;
+      return MaxWeight < O.MaxWeight;
+    }
+  };
+
+  // splitting state.
+  std::auto_ptr<SplitAnalysis> SA;
+  std::auto_ptr<SplitEditor> SE;
+
+  /// Cached per-block interference maps
+  InterferenceCache IntfCache;
+
+  /// All basic blocks where the current register has uses.
+  SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+  /// Global live range splitting candidate info.
+  struct GlobalSplitCandidate {
+    unsigned PhysReg;
+    InterferenceCache::Cursor Intf;
+    BitVector LiveBundles;
+    SmallVector<unsigned, 8> ActiveBlocks;
+
+    void reset(InterferenceCache &Cache, unsigned Reg) {
+      PhysReg = Reg;
+      Intf.setPhysReg(Cache, Reg);
+      LiveBundles.clear();
+      ActiveBlocks.clear();
+    }
+  };
+
+  /// Candidate info for for each PhysReg in AllocationOrder.
+  /// This vector never shrinks, but grows to the size of the largest register
+  /// class.
+  SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+public:
+  RAGreedy();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Greedy Register Allocator";
+  }
+
+  /// RAGreedy analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual void releaseMemory();
+  virtual Spiller &spiller() { return *SpillerInstance; }
+  virtual void enqueue(LiveInterval *LI);
+  virtual LiveInterval *dequeue();
+  virtual unsigned selectOrSplit(LiveInterval&,
+                                 SmallVectorImpl<LiveInterval*>&);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+
+private:
+  void LRE_WillEraseInstruction(MachineInstr*);
+  bool LRE_CanEraseVirtReg(unsigned);
+  void LRE_WillShrinkVirtReg(unsigned);
+  void LRE_DidCloneVirtReg(unsigned, unsigned);
+
+  float calcSpillCost();
+  bool addSplitConstraints(InterferenceCache::Cursor, float&);
+  void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+  void growRegion(GlobalSplitCandidate &Cand);
+  float calcGlobalSplitCost(GlobalSplitCandidate&);
+  void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
+                         SmallVectorImpl<LiveInterval*>&);
+  void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+  bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+  bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+  void evictInterference(LiveInterval&, unsigned,
+                         SmallVectorImpl<LiveInterval*>&);
+
+  unsigned tryAssign(LiveInterval&, AllocationOrder&,
+                     SmallVectorImpl<LiveInterval*>&);
+  unsigned tryEvict(LiveInterval&, AllocationOrder&,
+                    SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
+  unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+                          SmallVectorImpl<LiveInterval*>&);
+  unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+    SmallVectorImpl<LiveInterval*>&);
+  unsigned trySplit(LiveInterval&, AllocationOrder&,
+                    SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+  "RS_New",
+  "RS_First",
+  "RS_Second",
+  "RS_Global",
+  "RS_Local",
+  "RS_Spill"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+  return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+  initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  AU.addRequired<EdgeBundles>();
+  AU.addRequired<SpillPlacement>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                     LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
+  // LRE itself will remove from SlotIndexes and parent basic block.
+  VRM->RemoveMachineInstrFromMaps(MI);
+}
+
+bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+  if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
+    unassign(LIS->getInterval(VirtReg), PhysReg);
+    return true;
+  }
+  // Unassigned virtreg is probably in the priority queue.
+  // RegAllocBase will erase it after dequeueing.
+  return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+  unsigned PhysReg = VRM->getPhys(VirtReg);
+  if (!PhysReg)
+    return;
+
+  // Register is assigned, put it back on the queue for reassignment.
+  LiveInterval &LI = LIS->getInterval(VirtReg);
+  unassign(LI, PhysReg);
+  enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+  // LRE may clone a virtual register because dead code elimination causes it to
+  // be split into connected components. Ensure that the new register gets the
+  // same stage as the parent.
+  ExtraRegInfo.grow(New);
+  ExtraRegInfo[New] = ExtraRegInfo[Old];
+}
+
+void RAGreedy::releaseMemory() {
+  SpillerInstance.reset(0);
+  ExtraRegInfo.clear();
+  GlobalCand.clear();
+  RegAllocBase::releaseMemory();
+}
+
+void RAGreedy::enqueue(LiveInterval *LI) {
+  // Prioritize live ranges by size, assigning larger ranges first.
+  // The queue holds (size, reg) pairs.
+  const unsigned Size = LI->getSize();
+  const unsigned Reg = LI->reg;
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "Can only enqueue virtual registers");
+  unsigned Prio;
+
+  ExtraRegInfo.grow(Reg);
+  if (ExtraRegInfo[Reg].Stage == RS_New)
+    ExtraRegInfo[Reg].Stage = RS_First;
+
+  if (ExtraRegInfo[Reg].Stage == RS_Second)
+    // Unsplit ranges that couldn't be allocated immediately are deferred until
+    // everything else has been allocated. Long ranges are allocated last so
+    // they are split against realistic interference.
+    Prio = (1u << 31) - Size;
+  else {
+    // Everything else is allocated in long->short order. Long ranges that don't
+    // fit should be spilled ASAP so they don't create interference.
+    Prio = (1u << 31) + Size;
+
+    // Boost ranges that have a physical register hint.
+    if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg)))
+      Prio |= (1u << 30);
+  }
+
+  Queue.push(std::make_pair(Prio, Reg));
+}
+
+LiveInterval *RAGreedy::dequeue() {
+  if (Queue.empty())
+    return 0;
+  LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+  Queue.pop();
+  return LI;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+                             AllocationOrder &Order,
+                             SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  Order.rewind();
+  unsigned PhysReg;
+  while ((PhysReg = Order.next()))
+    if (!checkPhysRegInterference(VirtReg, PhysReg))
+      break;
+  if (!PhysReg || Order.isHint(PhysReg))
+    return PhysReg;
+
+  // PhysReg is available, but there may be a better choice.
+
+  // If we missed a simple hint, try to cheaply evict interference from the
+  // preferred register.
+  if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+    if (Order.isHint(Hint)) {
+      DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+      EvictionCost MaxCost(1);
+      if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+        evictInterference(VirtReg, Hint, NewVRegs);
+        return Hint;
+      }
+    }
+
+  // Try to evict interference from a cheaper alternative.
+  unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+  // Most registers have 0 additional cost.
+  if (!Cost)
+    return PhysReg;
+
+  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+               << '\n');
+  unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+  return CheapReg ? CheapReg : PhysReg;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Interference eviction
+//===----------------------------------------------------------------------===//
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A          The live range to be assigned.
+/// @param IsHint     True when A is about to be assigned to its preferred
+///                   register.
+/// @param B          The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+                           LiveInterval &B, bool BreaksHint) {
+  bool CanSplit = getStage(B) <= RS_Second;
+
+  // Be fairly aggressive about following hints as long as the evictee can be
+  // split.
+  if (CanSplit && IsHint && !BreaksHint)
+    return true;
+
+  return A.weight > B.weight;
+}
+
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted.  When OnlyCheap is set, don't do anything
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @prarm IsHint  True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+///                when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+                                    bool IsHint, EvictionCost &MaxCost) {
+  // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+  // involved in an eviction before. If a cascade number was assigned, deny
+  // evicting anything with the same or a newer cascade number. This prevents
+  // infinite eviction loops.
+  //
+  // This works out so a register without a cascade number is allowed to evict
+  // anything, and it can be evicted by anything.
+  unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+  if (!Cascade)
+    Cascade = NextCascade;
+
+  EvictionCost Cost;
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    // If there is 10 or more interferences, chances are one is heavier.
+    if (Q.collectInterferingVRegs(10) >= 10)
+      return false;
+
+    // Check if any interfering live range is heavier than MaxWeight.
+    for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+      if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
+        return false;
+      // Never evict spill products. They cannot split or spill.
+      if (getStage(*Intf) == RS_Spill)
+        return false;
+      // Once a live range becomes small enough, it is urgent that we find a
+      // register for it. This is indicated by an infinite spill weight. These
+      // urgent live ranges get to evict almost anything.
+      bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable();
+      // Only evict older cascades or live ranges without a cascade.
+      unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+      if (Cascade <= IntfCascade) {
+        if (!Urgent)
+          return false;
+        // We permit breaking cascades for urgent evictions. It should be the
+        // last resort, though, so make it really expensive.
+        Cost.BrokenHints += 10;
+      }
+      // Would this break a satisfied hint?
+      bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+      // Update eviction cost.
+      Cost.BrokenHints += BreaksHint;
+      Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+      // Abort if this would be too expensive.
+      if (!(Cost < MaxCost))
+        return false;
+      // Finally, apply the eviction policy for non-urgent evictions.
+      if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+        return false;
+    }
+  }
+  MaxCost = Cost;
+  return true;
+}
+
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // Make sure that VirtReg has a cascade number, and assign that cascade
+  // number to every evicted register. These live ranges than then only be
+  // evicted by a newer cascade, preventing infinite loops.
+  unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+  if (!Cascade)
+    Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+  DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+               << " interference: Cascade " << Cascade << '\n');
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i];
+      unassign(*Intf, VRM->getPhys(Intf->reg));
+      assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+              VirtReg.isSpillable() < Intf->isSpillable()) &&
+             "Cannot decrease cascade number, illegal eviction");
+      ExtraRegInfo[Intf->reg].Cascade = Cascade;
+      ++NumEvicted;
+      NewVRegs.push_back(Intf);
+    }
+  }
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param  VirtReg Currently unassigned virtual register.
+/// @param  Order   Physregs to try.
+/// @return         Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+                            AllocationOrder &Order,
+                            SmallVectorImpl<LiveInterval*> &NewVRegs,
+                            unsigned CostPerUseLimit) {
+  NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+  // Keep track of the cheapest interference seen so far.
+  EvictionCost BestCost(~0u);
+  unsigned BestPhys = 0;
+
+  // When we are just looking for a reduced cost per use, don't break any
+  // hints, and only evict smaller spill weights.
+  if (CostPerUseLimit < ~0u) {
+    BestCost.BrokenHints = 0;
+    BestCost.MaxWeight = VirtReg.weight;
+  }
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+      continue;
+    // The first use of a callee-saved register in a function has cost 1.
+    // Don't start using a CSR when the CostPerUseLimit is low.
+    if (CostPerUseLimit == 1)
+     if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+       if (!MRI->isPhysRegUsed(CSR)) {
+         DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+                      << PrintReg(CSR, TRI) << '\n');
+         continue;
+       }
+
+    if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+      continue;
+
+    // Best so far.
+    BestPhys = PhysReg;
+
+    // Stop if the hint can be used.
+    if (Order.isHint(PhysReg))
+      break;
+  }
+
+  if (!BestPhys)
+    return 0;
+
+  evictInterference(VirtReg, BestPhys, NewVRegs);
+  return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                              Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+                                   float &Cost) {
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
+  // Reset interference dependent info.
+  SplitConstraints.resize(UseBlocks.size());
+  float StaticCost = 0;
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+
+    BC.Number = BI.MBB->getNumber();
+    Intf.moveToBlock(BC.Number);
+    BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+
+    if (!Intf.hasInterference())
+      continue;
+
+    // Number of spill code instructions to insert.
+    unsigned Ins = 0;
+
+    // Interference for the live-in value.
+    if (BI.LiveIn) {
+      if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
+        BC.Entry = SpillPlacement::MustSpill, ++Ins;
+      else if (Intf.first() < BI.FirstUse)
+        BC.Entry = SpillPlacement::PrefSpill, ++Ins;
+      else if (Intf.first() < BI.LastUse)
+        ++Ins;
+    }
+
+    // Interference for the live-out value.
+    if (BI.LiveOut) {
+      if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
+        BC.Exit = SpillPlacement::MustSpill, ++Ins;
+      else if (Intf.last() > BI.LastUse)
+        BC.Exit = SpillPlacement::PrefSpill, ++Ins;
+      else if (Intf.last() > BI.FirstUse)
+        ++Ins;
+    }
+
+    // Accumulate the total frequency of inserted spill code.
+    if (Ins)
+      StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+  }
+  Cost = StaticCost;
+
+  // Add constraints for use-blocks. Note that these are the only constraints
+  // that may add a positive bias, it is downhill from here.
+  SpillPlacer->addConstraints(SplitConstraints);
+  return SpillPlacer->scanActiveBundles();
+}
+
+
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+                                     ArrayRef<unsigned> Blocks) {
+  const unsigned GroupSize = 8;
+  SpillPlacement::BlockConstraint BCS[GroupSize];
+  unsigned TBS[GroupSize];
+  unsigned B = 0, T = 0;
+
+  for (unsigned i = 0; i != Blocks.size(); ++i) {
+    unsigned Number = Blocks[i];
+    Intf.moveToBlock(Number);
+
+    if (!Intf.hasInterference()) {
+      assert(T < GroupSize && "Array overflow");
+      TBS[T] = Number;
+      if (++T == GroupSize) {
+        SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+        T = 0;
+      }
+      continue;
+    }
+
+    assert(B < GroupSize && "Array overflow");
+    BCS[B].Number = Number;
+
+    // Interference for the live-in value.
+    if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+      BCS[B].Entry = SpillPlacement::MustSpill;
+    else
+      BCS[B].Entry = SpillPlacement::PrefSpill;
+
+    // Interference for the live-out value.
+    if (Intf.last() >= SA->getLastSplitPoint(Number))
+      BCS[B].Exit = SpillPlacement::MustSpill;
+    else
+      BCS[B].Exit = SpillPlacement::PrefSpill;
+
+    if (++B == GroupSize) {
+      ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+      SpillPlacer->addConstraints(Array);
+      B = 0;
+    }
+  }
+
+  ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+  SpillPlacer->addConstraints(Array);
+  SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+}
+
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+  // Keep track of through blocks that have not been added to SpillPlacer.
+  BitVector Todo = SA->getThroughBlocks();
+  SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+  unsigned AddedTo = 0;
+#ifndef NDEBUG
+  unsigned Visited = 0;
+#endif
+
+  for (;;) {
+    ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+    // Find new through blocks in the periphery of PrefRegBundles.
+    for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+      unsigned Bundle = NewBundles[i];
+      // Look at all blocks connected to Bundle in the full graph.
+      ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+      for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+           I != E; ++I) {
+        unsigned Block = *I;
+        if (!Todo.test(Block))
+          continue;
+        Todo.reset(Block);
+        // This is a new through block. Add it to SpillPlacer later.
+        ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+        ++Visited;
+#endif
+      }
+    }
+    // Any new blocks to add?
+    if (ActiveBlocks.size() == AddedTo)
+      break;
+    addThroughConstraints(Cand.Intf,
+                          ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo));
+    AddedTo = ActiveBlocks.size();
+
+    // Perhaps iterating can enable more bundles?
+    SpillPlacer->iterate();
+  }
+  DEBUG(dbgs() << ", v=" << Visited);
+}
+
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+  float Cost = 0;
+  const LiveInterval &LI = SA->getParent();
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    unsigned Number = BI.MBB->getNumber();
+    // We normally only need one spill instruction - a load or a store.
+    Cost += SpillPlacer->getBlockFrequency(Number);
+
+    // Unless the value is redefined in the block.
+    if (BI.LiveIn && BI.LiveOut) {
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(Number);
+      LiveInterval::const_iterator I = LI.find(Start);
+      assert(I != LI.end() && "Expected live-in value");
+      // Is there a different live-out value? If so, we need an extra spill
+      // instruction.
+      if (I->end < Stop)
+        Cost += SpillPlacer->getBlockFrequency(Number);
+    }
+  }
+  return Cost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+  float GlobalCost = 0;
+  const BitVector &LiveBundles = Cand.LiveBundles;
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+    unsigned Ins = 0;
+
+    if (BI.LiveIn)
+      Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+    if (BI.LiveOut)
+      Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+    if (Ins)
+      GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+  }
+
+  for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+    unsigned Number = Cand.ActiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+    if (!RegIn && !RegOut)
+      continue;
+    if (RegIn && RegOut) {
+      // We need double spill code if this block has interference.
+      Cand.Intf.moveToBlock(Number);
+      if (Cand.Intf.hasInterference())
+        GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
+      continue;
+    }
+    // live-in / stack-out or stack-in live-out.
+    GlobalCost += SpillPlacer->getBlockFrequency(Number);
+  }
+  return GlobalCost;
+}
+
+/// splitAroundRegion - Split VirtReg around the region determined by
+/// LiveBundles. Make an effort to avoid interference from PhysReg.
+///
+/// The 'register' interval is going to contain as many uses as possible while
+/// avoiding interference. The 'stack' interval is the complement constructed by
+/// SplitEditor. It will contain the rest.
+///
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
+                                 GlobalSplitCandidate &Cand,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  const BitVector &LiveBundles = Cand.LiveBundles;
+
+  DEBUG({
+    dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI)
+           << " with bundles";
+    for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+      dbgs() << " EB#" << i;
+    dbgs() << ".\n";
+  });
+
+  InterferenceCache::Cursor &Intf = Cand.Intf;
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  SE->reset(LREdit);
+
+  // Create the main cross-block interval.
+  const unsigned MainIntv = SE->openIntv();
+
+  // First handle all the blocks with uses.
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    bool RegIn  = BI.LiveIn &&
+                  LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = BI.LiveOut &&
+                  LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+    // Create separate intervals for isolated blocks with multiple uses.
+    if (!RegIn && !RegOut) {
+      DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+      if (!BI.isOneInstr()) {
+        SE->splitSingleBlock(BI);
+        SE->selectIntv(MainIntv);
+      }
+      continue;
+    }
+
+    Intf.moveToBlock(BI.MBB->getNumber());
+
+    if (RegIn && RegOut)
+      SE->splitLiveThroughBlock(BI.MBB->getNumber(),
+                                MainIntv, Intf.first(),
+                                MainIntv, Intf.last());
+    else if (RegIn)
+      SE->splitRegInBlock(BI, MainIntv, Intf.first());
+    else
+      SE->splitRegOutBlock(BI, MainIntv, Intf.last());
+  }
+
+  // Handle live-through blocks.
+  for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+    unsigned Number = Cand.ActiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+    if (!RegIn && !RegOut)
+      continue;
+    Intf.moveToBlock(Number);
+    SE->splitLiveThroughBlock(Number, RegIn  ? MainIntv : 0, Intf.first(),
+                                      RegOut ? MainIntv : 0, Intf.last());
+  }
+
+  ++NumGlobalSplits;
+
+  SmallVector<unsigned, 8> IntvMap;
+  SE->finish(&IntvMap);
+  DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+  ExtraRegInfo.resize(MRI->getNumVirtRegs());
+  unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+  // Sort out the new intervals created by splitting. We get four kinds:
+  // - Remainder intervals should not be split again.
+  // - Candidate intervals can be assigned to Cand.PhysReg.
+  // - Block-local splits are candidates for local splitting.
+  // - DCE leftovers should go back on the queue.
+  for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+    LiveInterval &Reg = *LREdit.get(i);
+
+    // Ignore old intervals from DCE.
+    if (getStage(Reg) != RS_New)
+      continue;
+
+    // Remainder interval. Don't try splitting again, spill if it doesn't
+    // allocate.
+    if (IntvMap[i] == 0) {
+      setStage(Reg, RS_Global);
+      continue;
+    }
+
+    // Main interval. Allow repeated splitting as long as the number of live
+    // blocks is strictly decreasing.
+    if (IntvMap[i] == MainIntv) {
+      if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
+        DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+                     << " blocks as original.\n");
+        // Don't allow repeated splitting as a safe guard against looping.
+        setStage(Reg, RS_Global);
+      }
+      continue;
+    }
+
+    // Other intervals are treated as new. This includes local intervals created
+    // for blocks with multiple uses, and anything created by DCE.
+  }
+
+  if (VerifyEnabled)
+    MF->verify(this, "After splitting live range around region");
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  float BestCost = Hysteresis * calcSpillCost();
+  DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+  const unsigned NoCand = ~0u;
+  unsigned BestCand = NoCand;
+  unsigned NumCands = 0;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    // Discard bad candidates before we run out of interference cache cursors.
+    // This will only affect register classes with a lot of registers (>32).
+    if (NumCands == IntfCache.getMaxCursors()) {
+      unsigned WorstCount = ~0u;
+      unsigned Worst = 0;
+      for (unsigned i = 0; i != NumCands; ++i) {
+        if (i == BestCand)
+          continue;
+        unsigned Count = GlobalCand[i].LiveBundles.count();
+        if (Count < WorstCount)
+          Worst = i, WorstCount = Count;
+      }
+      --NumCands;
+      GlobalCand[Worst] = GlobalCand[NumCands];
+    }
+
+    if (GlobalCand.size() <= NumCands)
+      GlobalCand.resize(NumCands+1);
+    GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+    Cand.reset(IntfCache, PhysReg);
+
+    SpillPlacer->prepare(Cand.LiveBundles);
+    float Cost;
+    if (!addSplitConstraints(Cand.Intf, Cost)) {
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+      continue;
+    }
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+    if (Cost >= BestCost) {
+      DEBUG({
+        if (BestCand == NoCand)
+          dbgs() << " worse than no bundles\n";
+        else
+          dbgs() << " worse than "
+                 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+      });
+      continue;
+    }
+    growRegion(Cand);
+
+    SpillPlacer->finish();
+
+    // No live bundles, defer to splitSingleBlocks().
+    if (!Cand.LiveBundles.any()) {
+      DEBUG(dbgs() << " no bundles.\n");
+      continue;
+    }
+
+    Cost += calcGlobalSplitCost(Cand);
+    DEBUG({
+      dbgs() << ", total = " << Cost << " with bundles";
+      for (int i = Cand.LiveBundles.find_first(); i>=0;
+           i = Cand.LiveBundles.find_next(i))
+        dbgs() << " EB#" << i;
+      dbgs() << ".\n";
+    });
+    if (Cost < BestCost) {
+      BestCand = NumCands;
+      BestCost = Hysteresis * Cost; // Prevent rounding effects.
+    }
+    ++NumCands;
+  }
+
+  if (BestCand == NoCand)
+    return 0;
+
+  splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs);
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                             Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+                              SmallVectorImpl<float> &GapWeight) {
+  assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  const unsigned NumGaps = Uses.size()-1;
+
+  // Start and end points for the interference check.
+  SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
+  SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+
+  GapWeight.assign(NumGaps, 0.0f);
+
+  // Add interference from each overlapping register.
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+           .checkInterference())
+      continue;
+
+    // We know that VirtReg is a continuous interval from FirstUse to LastUse,
+    // so we don't need InterferenceQuery.
+    //
+    // Interference that overlaps an instruction is counted in both gaps
+    // surrounding the instruction. The exception is interference before
+    // StartIdx and after StopIdx.
+    //
+    LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+    for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+      // Skip the gaps before IntI.
+      while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+        if (++Gap == NumGaps)
+          break;
+      if (Gap == NumGaps)
+        break;
+
+      // Update the gaps covered by IntI.
+      const float weight = IntI.value()->weight;
+      for (; Gap != NumGaps; ++Gap) {
+        GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+        if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+          break;
+      }
+      if (Gap == NumGaps)
+        break;
+    }
+  }
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+
+  // Note that it is possible to have an interval that is live-in or live-out
+  // while only covering a single block - A phi-def can use undef values from
+  // predecessors, and the block could be a single-block loop.
+  // We don't bother doing anything clever about such a case, we simply assume
+  // that the interval is continuous from FirstUse to LastUse. We should make
+  // sure that we don't do anything illegal to such an interval, though.
+
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  if (Uses.size() <= 2)
+    return 0;
+  const unsigned NumGaps = Uses.size()-1;
+
+  DEBUG({
+    dbgs() << "tryLocalSplit: ";
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+      dbgs() << ' ' << SA->UseSlots[i];
+    dbgs() << '\n';
+  });
+
+  // Since we allow local split results to be split again, there is a risk of
+  // creating infinite loops. It is tempting to require that the new live
+  // ranges have less instructions than the original. That would guarantee
+  // convergence, but it is too strict. A live range with 3 instructions can be
+  // split 2+3 (including the COPY), and we want to allow that.
+  //
+  // Instead we use these rules:
+  //
+  // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the
+  //    noop split, of course).
+  // 2. Require progress be made for ranges with getStage() >= RS_Local. All
+  //    the new ranges must have fewer instructions than before the split.
+  // 3. New ranges with the same number of instructions are marked RS_Local,
+  //    smaller ranges are marked RS_New.
+  //
+  // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+  // excessive splitting and infinite loops.
+  //
+  bool ProgressRequired = getStage(VirtReg) >= RS_Local;
+
+  // Best split candidate.
+  unsigned BestBefore = NumGaps;
+  unsigned BestAfter = 0;
+  float BestDiff = 0;
+
+  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
+  SmallVector<float, 8> GapWeight;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    // Keep track of the largest spill weight that would need to be evicted in
+    // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+    calcGapWeights(PhysReg, GapWeight);
+
+    // Try to find the best sequence of gaps to close.
+    // The new spill weight must be larger than any gap interference.
+
+    // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+    unsigned SplitBefore = 0, SplitAfter = 1;
+
+    // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+    // It is the spill weight that needs to be evicted.
+    float MaxGap = GapWeight[0];
+
+    for (;;) {
+      // Live before/after split?
+      const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+      const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+                   << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+                   << " i=" << MaxGap);
+
+      // Stop before the interval gets so big we wouldn't be making progress.
+      if (!LiveBefore && !LiveAfter) {
+        DEBUG(dbgs() << " all\n");
+        break;
+      }
+      // Should the interval be extended or shrunk?
+      bool Shrink = true;
+
+      // How many gaps would the new range have?
+      unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+      // Legally, without causing looping?
+      bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+      if (Legal && MaxGap < HUGE_VALF) {
+        // Estimate the new spill weight. Each instruction reads or writes the
+        // register. Conservatively assume there are no read-modify-write
+        // instructions.
+        //
+        // Try to guess the size of the new interval.
+        const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
+                                 Uses[SplitBefore].distance(Uses[SplitAfter]) +
+                                 (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
+        // Would this split be possible to allocate?
+        // Never allocate all gaps, we wouldn't be making progress.
+        DEBUG(dbgs() << " w=" << EstWeight);
+        if (EstWeight * Hysteresis >= MaxGap) {
+          Shrink = false;
+          float Diff = EstWeight - MaxGap;
+          if (Diff > BestDiff) {
+            DEBUG(dbgs() << " (best)");
+            BestDiff = Hysteresis * Diff;
+            BestBefore = SplitBefore;
+            BestAfter = SplitAfter;
+          }
+        }
+      }
+
+      // Try to shrink.
+      if (Shrink) {
+        if (++SplitBefore < SplitAfter) {
+          DEBUG(dbgs() << " shrink\n");
+          // Recompute the max when necessary.
+          if (GapWeight[SplitBefore - 1] >= MaxGap) {
+            MaxGap = GapWeight[SplitBefore];
+            for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+              MaxGap = std::max(MaxGap, GapWeight[i]);
+          }
+          continue;
+        }
+        MaxGap = 0;
+      }
+
+      // Try to extend the interval.
+      if (SplitAfter >= NumGaps) {
+        DEBUG(dbgs() << " end\n");
+        break;
+      }
+
+      DEBUG(dbgs() << " extend\n");
+      MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
+    }
+  }
+
+  // Didn't find any candidates?
+  if (BestBefore == NumGaps)
+    return 0;
+
+  DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+               << '-' << Uses[BestAfter] << ", " << BestDiff
+               << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  SE->reset(LREdit);
+
+  SE->openIntv();
+  SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+  SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
+  SE->useIntv(SegStart, SegStop);
+  SmallVector<unsigned, 8> IntvMap;
+  SE->finish(&IntvMap);
+  DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+  // If the new range has the same number of instructions as before, mark it as
+  // RS_Local so the next split will be forced to make progress. Otherwise,
+  // leave the new intervals as RS_New so they can compete.
+  bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+  bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+  unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+  if (NewGaps >= NumGaps) {
+    DEBUG(dbgs() << "Tagging non-progress ranges: ");
+    assert(!ProgressRequired && "Didn't make progress when it was required.");
+    for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+      if (IntvMap[i] == 1) {
+        setStage(*LREdit.get(i), RS_Local);
+        DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+      }
+    DEBUG(dbgs() << '\n');
+  }
+  ++NumLocalSplits;
+
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                          Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                            SmallVectorImpl<LiveInterval*>&NewVRegs) {
+  // Local intervals are handled separately.
+  if (LIS->intervalIsInOneMBB(VirtReg)) {
+    NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+    SA->analyze(&VirtReg);
+    return tryLocalSplit(VirtReg, Order, NewVRegs);
+  }
+
+  NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+  // Don't iterate global splitting.
+  // Move straight to spilling if this range was produced by a global split.
+  if (getStage(VirtReg) >= RS_Global)
+    return 0;
+
+  SA->analyze(&VirtReg);
+
+  // FIXME: SplitAnalysis may repair broken live ranges coming from the
+  // coalescer. That may cause the range to become allocatable which means that
+  // tryRegionSplit won't be making progress. This check should be replaced with
+  // an assertion when the coalescer is fixed.
+  if (SA->didRepairRange()) {
+    // VirtReg has changed, so all cached queries are invalid.
+    invalidateVirtRegs();
+    if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+      return PhysReg;
+  }
+
+  // First try to split around a region spanning multiple blocks.
+  unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
+  // Then isolate blocks with multiple uses.
+  SplitAnalysis::BlockPtrSet Blocks;
+  if (SA->getMultiUseBlocks(Blocks)) {
+    LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+    SE->reset(LREdit);
+    SE->splitSingleBlocks(Blocks);
+    setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global);
+    if (VerifyEnabled)
+      MF->verify(this, "After splitting live range around basic blocks");
+  }
+
+  // Don't assign any physregs.
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // First try assigning a free register.
+  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+  if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+    return PhysReg;
+
+  LiveRangeStage Stage = getStage(VirtReg);
+  DEBUG(dbgs() << StageName[Stage]
+               << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+
+  // Try to evict a less worthy live range, but only for ranges from the primary
+  // queue. The RS_Second ranges already failed to do this, and they should not
+  // get a second chance until they have been split.
+  if (Stage != RS_Second)
+    if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+      return PhysReg;
+
+  assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+  // The first time we see a live range, don't try to split or spill.
+  // Wait until the second time, when all smaller ranges have been allocated.
+  // This gives a better picture of the interference to split around.
+  if (Stage == RS_First) {
+    setStage(VirtReg, RS_Second);
+    DEBUG(dbgs() << "wait for second round\n");
+    NewVRegs.push_back(&VirtReg);
+    return 0;
+  }
+
+  // If we couldn't allocate a register from spilling, there is probably some
+  // invalid inline assembly. The base class wil report it.
+  if (Stage >= RS_Spill || !VirtReg.isSpillable())
+    return ~0u;
+
+  // Try splitting VirtReg or interferences.
+  unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
+  // Finally spill VirtReg itself.
+  NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+  LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+  spiller().spill(LRE);
+  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill);
+
+  if (VerifyEnabled)
+    MF->verify(this, "After spilling");
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  if (VerifyEnabled)
+    MF->verify(this, "Before greedy register allocator");
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+  Indexes = &getAnalysis<SlotIndexes>();
+  DomTree = &getAnalysis<MachineDominatorTree>();
+  SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+  Loops = &getAnalysis<MachineLoopInfo>();
+  Bundles = &getAnalysis<EdgeBundles>();
+  SpillPlacer = &getAnalysis<SpillPlacement>();
+  DebugVars = &getAnalysis<LiveDebugVariables>();
+
+  SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+  SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+  ExtraRegInfo.clear();
+  ExtraRegInfo.resize(MRI->getNumVirtRegs());
+  NextCascade = 1;
+  IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+
+  allocatePhysRegs();
+  addMBBLiveIns(MF);
+  LIS->addKillFlags();
+
+  // Run rewriter
+  {
+    NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+    VRM->rewrite(Indexes);
+  }
+
+  // Write out new DBG_VALUE instructions.
+  DebugVars->emitDebugValues(VRM);
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 000000000000..0dd3c598c154
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,1544 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "RegisterClassInfo.h"
+#include "Spiller.h"
+#include "RegisterCoalescer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <queue>
+#include <memory>
+#include <cmath>
+
+using namespace llvm;
+
+STATISTIC(NumIters     , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+STATISTIC(NumCoalesce,   "Number of copies coalesced");
+STATISTIC(NumDowngrade,  "Number of registers downgraded");
+
+static cl::opt<bool>
+NewHeuristic("new-spilling-heuristic",
+             cl::desc("Use new spilling heuristic"),
+             cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+TrivCoalesceEnds("trivial-coalesce-ends",
+                  cl::desc("Attempt trivial coalescing of interval ends"),
+                  cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+AvoidWAWHazard("avoid-waw-hazard",
+               cl::desc("Avoid write-write hazards for some register classes"),
+               cl::init(false), cl::Hidden);
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", "linear scan register allocator",
+                   createLinearScanRegisterAllocator);
+
+namespace {
+  // When we allocate a register, add it to a fixed-size queue of
+  // registers to skip in subsequent allocations. This trades a small
+  // amount of register pressure and increased spills for flexibility in
+  // the post-pass scheduler.
+  //
+  // Note that in a the number of registers used for reloading spills
+  // will be one greater than the value of this option.
+  //
+  // One big limitation of this is that it doesn't differentiate between
+  // different register classes. So on x86-64, if there is xmm register
+  // pressure, it can caused fewer GPRs to be held in the queue.
+  static cl::opt<unsigned>
+  NumRecentlyUsedRegs("linearscan-skip-count",
+                      cl::desc("Number of registers for linearscan to remember"
+                               "to skip."),
+                      cl::init(0),
+                      cl::Hidden);
+
+  struct RALinScan : public MachineFunctionPass {
+    static char ID;
+    RALinScan() : MachineFunctionPass(ID) {
+      initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeRegisterCoalescerPass(
+        *PassRegistry::getPassRegistry());
+      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+      initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      
+      // Initialize the queue to record recently-used registers.
+      if (NumRecentlyUsedRegs > 0)
+        RecentRegs.resize(NumRecentlyUsedRegs, 0);
+      RecentNext = RecentRegs.begin();
+      avoidWAW_ = 0;
+    }
+
+    typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+    typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
+  private:
+    /// RelatedRegClasses - This structure is built the first time a function is
+    /// compiled, and keeps track of which register classes have registers that
+    /// belong to multiple classes or have aliases that are in other classes.
+    EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+    DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+    // NextReloadMap - For each register in the map, it maps to the another
+    // register which is defined by a reload from the same stack slot and
+    // both reloads are in the same basic block.
+    DenseMap<unsigned, unsigned> NextReloadMap;
+
+    // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+    // un-favored for allocation.
+    SmallSet<unsigned, 8> DowngradedRegs;
+
+    // DowngradeMap - A map from virtual registers to physical registers being
+    // downgraded for the virtual registers.
+    DenseMap<unsigned, unsigned> DowngradeMap;
+
+    MachineFunction* mf_;
+    MachineRegisterInfo* mri_;
+    const TargetMachine* tm_;
+    const TargetRegisterInfo* tri_;
+    const TargetInstrInfo* tii_;
+    BitVector allocatableRegs_;
+    BitVector reservedRegs_;
+    LiveIntervals* li_;
+    MachineLoopInfo *loopInfo;
+    RegisterClassInfo RegClassInfo;
+
+    /// handled_ - Intervals are added to the handled_ set in the order of their
+    /// start value.  This is uses for backtracking.
+    std::vector<LiveInterval*> handled_;
+
+    /// fixed_ - Intervals that correspond to machine registers.
+    ///
+    IntervalPtrs fixed_;
+
+    /// active_ - Intervals that are currently being processed, and which have a
+    /// live range active for the current point.
+    IntervalPtrs active_;
+
+    /// inactive_ - Intervals that are currently being processed, but which have
+    /// a hold at the current point.
+    IntervalPtrs inactive_;
+
+    typedef std::priority_queue<LiveInterval*,
+                                SmallVector<LiveInterval*, 64>,
+                                greater_ptr<LiveInterval> > IntervalHeap;
+    IntervalHeap unhandled_;
+
+    /// regUse_ - Tracks register usage.
+    SmallVector<unsigned, 32> regUse_;
+    SmallVector<unsigned, 32> regUseBackUp_;
+
+    /// vrm_ - Tracks register assignments.
+    VirtRegMap* vrm_;
+
+    std::auto_ptr<VirtRegRewriter> rewriter_;
+
+    std::auto_ptr<Spiller> spiller_;
+
+    // The queue of recently-used registers.
+    SmallVector<unsigned, 4> RecentRegs;
+    SmallVector<unsigned, 4>::iterator RecentNext;
+
+    // Last write-after-write register written.
+    unsigned avoidWAW_;
+
+    // Record that we just picked this register.
+    void recordRecentlyUsed(unsigned reg) {
+      assert(reg != 0 && "Recently used register is NOREG!");
+      if (!RecentRegs.empty()) {
+        *RecentNext++ = reg;
+        if (RecentNext == RecentRegs.end())
+          RecentNext = RecentRegs.begin();
+      }
+    }
+
+  public:
+    virtual const char* getPassName() const {
+      return "Linear Scan Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<SlotIndexes>();
+      if (StrongPHIElim)
+        AU.addRequiredID(StrongPHIEliminationID);
+      // Make sure PassManager knows which analyses to make available
+      // to coalescing and which analyses coalescing invalidates.
+      AU.addRequiredTransitive<RegisterCoalescer>();
+      AU.addRequired<CalculateSpillWeights>();
+      AU.addRequiredID(LiveStacksID);
+      AU.addPreservedID(LiveStacksID);
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addRequired<VirtRegMap>();
+      AU.addPreserved<VirtRegMap>();
+      AU.addRequired<LiveDebugVariables>();
+      AU.addPreserved<LiveDebugVariables>();
+      AU.addRequiredID(MachineDominatorsID);
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    /// runOnMachineFunction - register allocate the whole function
+    bool runOnMachineFunction(MachineFunction&);
+
+    // Determine if we skip this register due to its being recently used.
+    bool isRecentlyUsed(unsigned reg) const {
+      return reg == avoidWAW_ ||
+       std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
+    }
+
+  private:
+    /// linearScan - the linear scan algorithm
+    void linearScan();
+
+    /// initIntervalSets - initialize the interval sets.
+    ///
+    void initIntervalSets();
+
+    /// processActiveIntervals - expire old intervals and move non-overlapping
+    /// ones to the inactive list.
+    void processActiveIntervals(SlotIndex CurPoint);
+
+    /// processInactiveIntervals - expire old intervals and move overlapping
+    /// ones to the active list.
+    void processInactiveIntervals(SlotIndex CurPoint);
+
+    /// hasNextReloadInterval - Return the next liveinterval that's being
+    /// defined by a reload from the same SS as the specified one.
+    LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+    /// DowngradeRegister - Downgrade a register for allocation.
+    void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+    /// UpgradeRegister - Upgrade a register for allocation.
+    void UpgradeRegister(unsigned Reg);
+
+    /// assignRegOrStackSlotAtInterval - assign a register if one
+    /// is available, or spill.
+    void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+    void updateSpillWeights(std::vector<float> &Weights,
+                            unsigned reg, float weight,
+                            const TargetRegisterClass *RC);
+
+    /// findIntervalsToSpill - Determine the intervals to spill for the
+    /// specified interval. It's passed the physical registers whose spill
+    /// weight is the lowest among all the registers whose live intervals
+    /// conflict with the interval.
+    void findIntervalsToSpill(LiveInterval *cur,
+                            std::vector<std::pair<unsigned,float> > &Candidates,
+                            unsigned NumCands,
+                            SmallVector<LiveInterval*, 8> &SpillIntervals);
+
+    /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+    /// try to allocate the definition to the same register as the source,
+    /// if the register is not defined during the life time of the interval.
+    /// This eliminates a copy, and is used to coalesce copies which were not
+    /// coalesced away before allocation either due to dest and src being in
+    /// different register classes or because the coalescer was overly
+    /// conservative.
+    unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
+
+    ///
+    /// Register usage / availability tracking helpers.
+    ///
+
+    void initRegUses() {
+      regUse_.resize(tri_->getNumRegs(), 0);
+      regUseBackUp_.resize(tri_->getNumRegs(), 0);
+    }
+
+    void finalizeRegUses() {
+#ifndef NDEBUG
+      // Verify all the registers are "freed".
+      bool Error = false;
+      for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
+        if (regUse_[i] != 0) {
+          dbgs() << tri_->getName(i) << " is still in use!\n";
+          Error = true;
+        }
+      }
+      if (Error)
+        llvm_unreachable(0);
+#endif
+      regUse_.clear();
+      regUseBackUp_.clear();
+    }
+
+    void addRegUse(unsigned physReg) {
+      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+             "should be physical register!");
+      ++regUse_[physReg];
+      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
+        ++regUse_[*as];
+    }
+
+    void delRegUse(unsigned physReg) {
+      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+             "should be physical register!");
+      assert(regUse_[physReg] != 0);
+      --regUse_[physReg];
+      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
+        assert(regUse_[*as] != 0);
+        --regUse_[*as];
+      }
+    }
+
+    bool isRegAvail(unsigned physReg) const {
+      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+             "should be physical register!");
+      return regUse_[physReg] == 0;
+    }
+
+    void backUpRegUses() {
+      regUseBackUp_ = regUse_;
+    }
+
+    void restoreRegUses() {
+      regUse_ = regUseBackUp_;
+    }
+
+    ///
+    /// Register handling helpers.
+    ///
+
+    /// getFreePhysReg - return a free physical register for this virtual
+    /// register interval if we have one, otherwise return 0.
+    unsigned getFreePhysReg(LiveInterval* cur);
+    unsigned getFreePhysReg(LiveInterval* cur,
+                            const TargetRegisterClass *RC,
+                            unsigned MaxInactiveCount,
+                            SmallVector<unsigned, 256> &inactiveCounts,
+                            bool SkipDGRegs);
+
+    /// getFirstNonReservedPhysReg - return the first non-reserved physical
+    /// register in the register class.
+    unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
+      ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
+      assert(!O.empty() && "All registers reserved?!");
+      return O.front();
+    }
+
+    void ComputeRelatedRegClasses();
+
+    template <typename ItTy>
+    void printIntervals(const char* const str, ItTy i, ItTy e) const {
+      DEBUG({
+          if (str)
+            dbgs() << str << " intervals:\n";
+
+          for (; i != e; ++i) {
+            dbgs() << '\t' << *i->first << " -> ";
+
+            unsigned reg = i->first->reg;
+            if (TargetRegisterInfo::isVirtualRegister(reg))
+              reg = vrm_->getPhys(reg);
+
+            dbgs() << tri_->getName(reg) << '\n';
+          }
+        });
+    }
+  };
+  char RALinScan::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+                      "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+                    "Linear Scan Register Allocator", false, false)
+
+void RALinScan::ComputeRelatedRegClasses() {
+  // First pass, add all reg classes to the union, and determine at least one
+  // reg class that each register is in.
+  bool HasAliases = false;
+  for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+       E = tri_->regclass_end(); RCI != E; ++RCI) {
+    RelatedRegClasses.insert(*RCI);
+    for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+         I != E; ++I) {
+      HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
+
+      const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+      if (PRC) {
+        // Already processed this register.  Just make sure we know that
+        // multiple register classes share a register.
+        RelatedRegClasses.unionSets(PRC, *RCI);
+      } else {
+        PRC = *RCI;
+      }
+    }
+  }
+
+  // Second pass, now that we know conservatively what register classes each reg
+  // belongs to, add info about aliases.  We don't need to do this for targets
+  // without register aliases.
+  if (HasAliases)
+    for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
+         I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+         I != E; ++I)
+      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+        const TargetRegisterClass *AliasClass = 
+          OneClassForEachPhysReg.lookup(*AS);
+        if (AliasClass)
+          RelatedRegClasses.unionSets(I->second, AliasClass);
+      }
+}
+
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
+/// allocate the definition the same register as the source register if the
+/// register is not defined during live time of the interval. If the interval is
+/// killed by a copy, try to use the destination register. This eliminates a
+/// copy. This is used to coalesce copies which were not coalesced away before
+/// allocation either due to dest and src being in different register classes or
+/// because the coalescer was overly conservative.
+unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
+  unsigned Preference = vrm_->getRegAllocPref(cur.reg);
+  if ((Preference && Preference == Reg) || !cur.containsOneValue())
+    return Reg;
+
+  // We cannot handle complicated live ranges. Simple linear stuff only.
+  if (cur.ranges.size() != 1)
+    return Reg;
+
+  const LiveRange &range = cur.ranges.front();
+
+  VNInfo *vni = range.valno;
+  if (vni->isUnused() || !vni->def.isValid())
+    return Reg;
+
+  unsigned CandReg;
+  {
+    MachineInstr *CopyMI;
+    if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+      // Defined by a copy, try to extend SrcReg forward
+      CandReg = CopyMI->getOperand(1).getReg();
+    else if (TrivCoalesceEnds &&
+            (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+             CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
+      // Only used by a copy, try to extend DstReg backwards
+      CandReg = CopyMI->getOperand(0).getReg();
+    else
+      return Reg;
+
+    // If the target of the copy is a sub-register then don't coalesce.
+    if(CopyMI->getOperand(0).getSubReg())
+      return Reg;
+  }
+
+  if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
+    if (!vrm_->isAssignedReg(CandReg))
+      return Reg;
+    CandReg = vrm_->getPhys(CandReg);
+  }
+  if (Reg == CandReg)
+    return Reg;
+
+  const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
+  if (!RC->contains(CandReg))
+    return Reg;
+
+  if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
+    return Reg;
+
+  // Try to coalesce.
+  DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+        << '\n');
+  vrm_->clearVirt(cur.reg);
+  vrm_->assignVirt2Phys(cur.reg, CandReg);
+
+  ++NumCoalesce;
+  return CandReg;
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &fn.getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  allocatableRegs_ = tri_->getAllocatableSet(fn);
+  reservedRegs_ = tri_->getReservedRegs(fn);
+  li_ = &getAnalysis<LiveIntervals>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+  RegClassInfo.runOnMachineFunction(fn);
+
+  // We don't run the coalescer here because we have no reason to
+  // interact with it.  If the coalescer requires interaction, it
+  // won't do anything.  If it doesn't require interaction, we assume
+  // it was run as a separate pass.
+
+  // If this is the first function compiled, compute the related reg classes.
+  if (RelatedRegClasses.empty())
+    ComputeRelatedRegClasses();
+
+  // Also resize register usage trackers.
+  initRegUses();
+
+  vrm_ = &getAnalysis<VirtRegMap>();
+  if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
+
+  spiller_.reset(createSpiller(*this, *mf_, *vrm_));
+
+  initIntervalSets();
+
+  linearScan();
+
+  // Rewrite spill code and update the PhysRegsUsed set.
+  rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+
+  // Write out new DBG_VALUE instructions.
+  getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
+  assert(unhandled_.empty() && "Unhandled live intervals remain!");
+
+  finalizeRegUses();
+
+  fixed_.clear();
+  active_.clear();
+  inactive_.clear();
+  handled_.clear();
+  NextReloadMap.clear();
+  DowngradedRegs.clear();
+  DowngradeMap.clear();
+  spiller_.reset(0);
+
+  return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+  assert(unhandled_.empty() && fixed_.empty() &&
+         active_.empty() && inactive_.empty() &&
+         "interval sets should be empty on initialization");
+
+  handled_.reserve(li_->getNumIntervals());
+
+  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+    if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
+      if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
+        mri_->setPhysRegUsed(i->second->reg);
+        fixed_.push_back(std::make_pair(i->second, i->second->begin()));
+      }
+    } else {
+      if (i->second->empty()) {
+        assignRegOrStackSlotAtInterval(i->second);
+      }
+      else
+        unhandled_.push(i->second);
+    }
+  }
+}
+
+void RALinScan::linearScan() {
+  // linear scan algorithm
+  DEBUG({
+      dbgs() << "********** LINEAR SCAN **********\n"
+             << "********** Function: "
+             << mf_->getFunction()->getName() << '\n';
+      printIntervals("fixed", fixed_.begin(), fixed_.end());
+    });
+
+  while (!unhandled_.empty()) {
+    // pick the interval with the earliest start point
+    LiveInterval* cur = unhandled_.top();
+    unhandled_.pop();
+    ++NumIters;
+    DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
+
+    assert(!cur->empty() && "Empty interval in unhandled set.");
+
+    processActiveIntervals(cur->beginIndex());
+    processInactiveIntervals(cur->beginIndex());
+
+    assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
+           "Can only allocate virtual registers!");
+
+    // Allocating a virtual register. try to find a free
+    // physical register or spill an interval (possibly this one) in order to
+    // assign it one.
+    assignRegOrStackSlotAtInterval(cur);
+
+    DEBUG({
+        printIntervals("active", active_.begin(), active_.end());
+        printIntervals("inactive", inactive_.begin(), inactive_.end());
+      });
+  }
+
+  // Expire any remaining active intervals
+  while (!active_.empty()) {
+    IntervalPtr &IP = active_.back();
+    unsigned reg = IP.first->reg;
+    DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
+    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+    reg = vrm_->getPhys(reg);
+    delRegUse(reg);
+    active_.pop_back();
+  }
+
+  // Expire any remaining inactive intervals
+  DEBUG({
+      for (IntervalPtrs::reverse_iterator
+             i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+        dbgs() << "\tinterval " << *i->first << " expired\n";
+    });
+  inactive_.clear();
+
+  // Add live-ins to every BB except for entry. Also perform trivial coalescing.
+  MachineFunction::iterator EntryMBB = mf_->begin();
+  SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
+  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+    LiveInterval &cur = *i->second;
+    unsigned Reg = 0;
+    bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
+    if (isPhys)
+      Reg = cur.reg;
+    else if (vrm_->isAssignedReg(cur.reg))
+      Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
+    if (!Reg)
+      continue;
+    // Ignore splited live intervals.
+    if (!isPhys && vrm_->getPreSplitReg(cur.reg))
+      continue;
+
+    for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
+         I != E; ++I) {
+      const LiveRange &LR = *I;
+      if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
+        for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
+          if (LiveInMBBs[i] != EntryMBB) {
+            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                   "Adding a virtual register to livein set?");
+            LiveInMBBs[i]->addLiveIn(Reg);
+          }
+        LiveInMBBs.clear();
+      }
+    }
+  }
+
+  DEBUG(dbgs() << *vrm_);
+
+  // Look for physical registers that end up not being allocated even though
+  // register allocator had to spill other registers in its register class.
+  if (!vrm_->FindUnusedRegisters(li_))
+    return;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(SlotIndex CurPoint)
+{
+  DEBUG(dbgs() << "\tprocessing active intervals:\n");
+
+  for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+    LiveInterval *Interval = active_[i].first;
+    LiveInterval::iterator IntervalPos = active_[i].second;
+    unsigned reg = Interval->reg;
+
+    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+    if (IntervalPos == Interval->end()) {     // Remove expired intervals.
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      delRegUse(reg);
+
+      // Pop off the end of the list.
+      active_[i] = active_.back();
+      active_.pop_back();
+      --i; --e;
+
+    } else if (IntervalPos->start > CurPoint) {
+      // Move inactive intervals to inactive list.
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
+      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      delRegUse(reg);
+      // add to inactive.
+      inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+      // Pop off the end of the list.
+      active_[i] = active_.back();
+      active_.pop_back();
+      --i; --e;
+    } else {
+      // Otherwise, just update the iterator position.
+      active_[i].second = IntervalPos;
+    }
+  }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
+{
+  DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
+
+  for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+    LiveInterval *Interval = inactive_[i].first;
+    LiveInterval::iterator IntervalPos = inactive_[i].second;
+    unsigned reg = Interval->reg;
+
+    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+    if (IntervalPos == Interval->end()) {       // remove expired intervals.
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+
+      // Pop off the end of the list.
+      inactive_[i] = inactive_.back();
+      inactive_.pop_back();
+      --i; --e;
+    } else if (IntervalPos->start <= CurPoint) {
+      // move re-activated intervals in active list
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
+      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      addRegUse(reg);
+      // add to active
+      active_.push_back(std::make_pair(Interval, IntervalPos));
+
+      // Pop off the end of the list.
+      inactive_[i] = inactive_.back();
+      inactive_.pop_back();
+      --i; --e;
+    } else {
+      // Otherwise, just update the iterator position.
+      inactive_[i].second = IntervalPos;
+    }
+  }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+void RALinScan::updateSpillWeights(std::vector<float> &Weights,
+                                   unsigned reg, float weight,
+                                   const TargetRegisterClass *RC) {
+  SmallSet<unsigned, 4> Processed;
+  SmallSet<unsigned, 4> SuperAdded;
+  SmallVector<unsigned, 4> Supers;
+  Weights[reg] += weight;
+  Processed.insert(reg);
+  for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
+    Weights[*as] += weight;
+    Processed.insert(*as);
+    if (tri_->isSubRegister(*as, reg) &&
+        SuperAdded.insert(*as) &&
+        RC->contains(*as)) {
+      Supers.push_back(*as);
+    }
+  }
+
+  // If the alias is a super-register, and the super-register is in the
+  // register class we are trying to allocate. Then add the weight to all
+  // sub-registers of the super-register even if they are not aliases.
+  // e.g. allocating for GR32, bh is not used, updating bl spill weight.
+  //      bl should get the same spill weight otherwise it will be chosen
+  //      as a spill candidate since spilling bh doesn't make ebx available.
+  for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
+    for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
+      if (!Processed.count(*sr))
+        Weights[*sr] += weight;
+  }
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+  for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+       I != E; ++I)
+    if (I->first == LI) return I;
+  return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
+                                    SlotIndex Point){
+  for (unsigned i = 0, e = V.size(); i != e; ++i) {
+    RALinScan::IntervalPtr &IP = V[i];
+    LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+                                                IP.second, Point);
+    if (I != IP.first->begin()) --I;
+    IP.second = I;
+  }
+}
+
+/// getConflictWeight - Return the number of conflicts between cur
+/// live interval and defs and uses of Reg weighted by loop depthes.
+static
+float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
+                        MachineRegisterInfo *mri_,
+                        MachineLoopInfo *loopInfo) {
+  float Conflicts = 0;
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+         E = mri_->reg_end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    if (cur->liveAt(li_->getInstructionIndex(MI))) {
+      unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
+      Conflicts += std::pow(10.0f, (float)loopDepth);
+    }
+  }
+  return Conflicts;
+}
+
+/// findIntervalsToSpill - Determine the intervals to spill for the
+/// specified interval. It's passed the physical registers whose spill
+/// weight is the lowest among all the registers whose live intervals
+/// conflict with the interval.
+void RALinScan::findIntervalsToSpill(LiveInterval *cur,
+                            std::vector<std::pair<unsigned,float> > &Candidates,
+                            unsigned NumCands,
+                            SmallVector<LiveInterval*, 8> &SpillIntervals) {
+  // We have figured out the *best* register to spill. But there are other
+  // registers that are pretty good as well (spill weight within 3%). Spill
+  // the one that has fewest defs and uses that conflict with cur.
+  float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
+  SmallVector<LiveInterval*, 8> SLIs[3];
+
+  DEBUG({
+      dbgs() << "\tConsidering " << NumCands << " candidates: ";
+      for (unsigned i = 0; i != NumCands; ++i)
+        dbgs() << tri_->getName(Candidates[i].first) << " ";
+      dbgs() << "\n";
+    });
+
+  // Calculate the number of conflicts of each candidate.
+  for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+    unsigned Reg = i->first->reg;
+    unsigned PhysReg = vrm_->getPhys(Reg);
+    if (!cur->overlapsFrom(*i->first, i->second))
+      continue;
+    for (unsigned j = 0; j < NumCands; ++j) {
+      unsigned Candidate = Candidates[j].first;
+      if (tri_->regsOverlap(PhysReg, Candidate)) {
+        if (NumCands > 1)
+          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+        SLIs[j].push_back(i->first);
+      }
+    }
+  }
+
+  for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+    unsigned Reg = i->first->reg;
+    unsigned PhysReg = vrm_->getPhys(Reg);
+    if (!cur->overlapsFrom(*i->first, i->second-1))
+      continue;
+    for (unsigned j = 0; j < NumCands; ++j) {
+      unsigned Candidate = Candidates[j].first;
+      if (tri_->regsOverlap(PhysReg, Candidate)) {
+        if (NumCands > 1)
+          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+        SLIs[j].push_back(i->first);
+      }
+    }
+  }
+
+  // Which is the best candidate?
+  unsigned BestCandidate = 0;
+  float MinConflicts = Conflicts[0];
+  for (unsigned i = 1; i != NumCands; ++i) {
+    if (Conflicts[i] < MinConflicts) {
+      BestCandidate = i;
+      MinConflicts = Conflicts[i];
+    }
+  }
+
+  std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
+            std::back_inserter(SpillIntervals));
+}
+
+namespace {
+  struct WeightCompare {
+  private:
+    const RALinScan &Allocator;
+
+  public:
+    WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
+
+    typedef std::pair<unsigned, float> RegWeightPair;
+    bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
+      return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
+    }
+  };
+}
+
+static bool weightsAreClose(float w1, float w2) {
+  if (!NewHeuristic)
+    return false;
+
+  float diff = w1 - w2;
+  if (diff <= 0.02f)  // Within 0.02f
+    return true;
+  return (diff / w2) <= 0.05f;  // Within 5%.
+}
+
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+  DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+  if (I == NextReloadMap.end())
+    return 0;
+  return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+  for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+    bool isNew = DowngradedRegs.insert(*AS);
+    (void)isNew; // Silence compiler warning.
+    assert(isNew && "Multiple reloads holding the same register?");
+    DowngradeMap.insert(std::make_pair(li->reg, *AS));
+  }
+  ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+  if (Reg) {
+    DowngradedRegs.erase(Reg);
+    for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+      DowngradedRegs.erase(*AS);
+  }
+}
+
+namespace {
+  struct LISorter {
+    bool operator()(LiveInterval* A, LiveInterval* B) {
+      return A->beginIndex() < B->beginIndex();
+    }
+  };
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
+  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+  DEBUG(dbgs() << "\tallocating current interval from "
+               << RC->getName() << ": ");
+
+  // This is an implicitly defined live interval, just assign any register.
+  if (cur->empty()) {
+    unsigned physReg = vrm_->getRegAllocPref(cur->reg);
+    if (!physReg)
+      physReg = getFirstNonReservedPhysReg(RC);
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
+    // Note the register is not really in use.
+    vrm_->assignVirt2Phys(cur->reg, physReg);
+    return;
+  }
+
+  backUpRegUses();
+
+  std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+  SlotIndex StartPosition = cur->beginIndex();
+  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+  // If start of this live interval is defined by a move instruction and its
+  // source is assigned a physical register that is compatible with the target
+  // register class, then we should try to assign it the same register.
+  // This can happen when the move is from a larger register class to a smaller
+  // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
+  if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
+    VNInfo *vni = cur->begin()->valno;
+    if (!vni->isUnused() && vni->def.isValid()) {
+      MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+      if (CopyMI && CopyMI->isCopy()) {
+        unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
+        unsigned SrcReg = CopyMI->getOperand(1).getReg();
+        unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
+        unsigned Reg = 0;
+        if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+          Reg = SrcReg;
+        else if (vrm_->isAssignedReg(SrcReg))
+          Reg = vrm_->getPhys(SrcReg);
+        if (Reg) {
+          if (SrcSubReg)
+            Reg = tri_->getSubReg(Reg, SrcSubReg);
+          if (DstSubReg)
+            Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+          if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+            mri_->setRegAllocationHint(cur->reg, 0, Reg);
+        }
+      }
+    }
+  }
+
+  // For every interval in inactive we overlap with, mark the
+  // register as not free and update spill weights.
+  for (IntervalPtrs::const_iterator i = inactive_.begin(),
+         e = inactive_.end(); i != e; ++i) {
+    unsigned Reg = i->first->reg;
+    assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+           "Can only allocate virtual registers!");
+    const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
+    // If this is not in a related reg class to the register we're allocating,
+    // don't check it.
+    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+        cur->overlapsFrom(*i->first, i->second-1)) {
+      Reg = vrm_->getPhys(Reg);
+      addRegUse(Reg);
+      SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+    }
+  }
+
+  // Speculatively check to see if we can get a register right now.  If not,
+  // we know we won't be able to by adding more constraints.  If so, we can
+  // check to see if it is valid.  Doing an exhaustive search of the fixed_ list
+  // is very bad (it contains all callee clobbered registers for any functions
+  // with a call), so we want to avoid doing that if possible.
+  unsigned physReg = getFreePhysReg(cur);
+  unsigned BestPhysReg = physReg;
+  if (physReg) {
+    // We got a register.  However, if it's in the fixed_ list, we might
+    // conflict with it.  Check to see if we conflict with it or any of its
+    // aliases.
+    SmallSet<unsigned, 8> RegAliases;
+    for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
+      RegAliases.insert(*AS);
+
+    bool ConflictsWithFixed = false;
+    for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+      IntervalPtr &IP = fixed_[i];
+      if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+        // Okay, this reg is on the fixed list.  Check to see if we actually
+        // conflict.
+        LiveInterval *I = IP.first;
+        if (I->endIndex() > StartPosition) {
+          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+          IP.second = II;
+          if (II != I->begin() && II->start > StartPosition)
+            --II;
+          if (cur->overlapsFrom(*I, II)) {
+            ConflictsWithFixed = true;
+            break;
+          }
+        }
+      }
+    }
+
+    // Okay, the register picked by our speculative getFreePhysReg call turned
+    // out to be in use.  Actually add all of the conflicting fixed registers to
+    // regUse_ so we can do an accurate query.
+    if (ConflictsWithFixed) {
+      // For every interval in fixed we overlap with, mark the register as not
+      // free and update spill weights.
+      for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+        IntervalPtr &IP = fixed_[i];
+        LiveInterval *I = IP.first;
+
+        const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+        if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+            I->endIndex() > StartPosition) {
+          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+          IP.second = II;
+          if (II != I->begin() && II->start > StartPosition)
+            --II;
+          if (cur->overlapsFrom(*I, II)) {
+            unsigned reg = I->reg;
+            addRegUse(reg);
+            SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+          }
+        }
+      }
+
+      // Using the newly updated regUse_ object, which includes conflicts in the
+      // future, see if there are any registers available.
+      physReg = getFreePhysReg(cur);
+    }
+  }
+
+  // Restore the physical register tracker, removing information about the
+  // future.
+  restoreRegUses();
+
+  // If we find a free register, we are done: assign this virtual to
+  // the free physical register and add this interval to the active
+  // list.
+  if (physReg) {
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
+    assert(RC->contains(physReg) && "Invalid candidate");
+    vrm_->assignVirt2Phys(cur->reg, physReg);
+    addRegUse(physReg);
+    active_.push_back(std::make_pair(cur, cur->begin()));
+    handled_.push_back(cur);
+
+    // Remember physReg for avoiding a write-after-write hazard in the next
+    // instruction.
+    if (AvoidWAWHazard &&
+        tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
+      avoidWAW_ = physReg;
+
+    // "Upgrade" the physical register since it has been allocated.
+    UpgradeRegister(physReg);
+    if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+      // "Downgrade" physReg to try to keep physReg from being allocated until
+      // the next reload from the same SS is allocated.
+      mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
+      DowngradeRegister(cur, physReg);
+    }
+    return;
+  }
+  DEBUG(dbgs() << "no free registers\n");
+
+  // Compile the spill weights into an array that is better for scanning.
+  std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
+  for (std::vector<std::pair<unsigned, float> >::iterator
+       I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+    updateSpillWeights(SpillWeights, I->first, I->second, RC);
+
+  // for each interval in active, update spill weights.
+  for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+       i != e; ++i) {
+    unsigned reg = i->first->reg;
+    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+    reg = vrm_->getPhys(reg);
+    updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
+  }
+
+  DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
+
+  // Find a register to spill.
+  float minWeight = HUGE_VALF;
+  unsigned minReg = 0;
+
+  bool Found = false;
+  std::vector<std::pair<unsigned,float> > RegsWeights;
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
+      float regWeight = SpillWeights[reg];
+      // Skip recently allocated registers and reserved registers.
+      if (minWeight > regWeight && !isRecentlyUsed(reg))
+        Found = true;
+      RegsWeights.push_back(std::make_pair(reg, regWeight));
+    }
+
+  // If we didn't find a register that is spillable, try aliases?
+  if (!Found) {
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
+      // No need to worry about if the alias register size < regsize of RC.
+      // We are going to spill all registers that alias it anyway.
+      for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
+        RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
+    }
+  }
+
+  // Sort all potential spill candidates by weight.
+  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
+  minReg = RegsWeights[0].first;
+  minWeight = RegsWeights[0].second;
+  if (minWeight == HUGE_VALF) {
+    // All registers must have inf weight. Just grab one!
+    minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
+    if (cur->weight == HUGE_VALF ||
+        li_->getApproximateInstructionCount(*cur) == 0) {
+      // Spill a physical register around defs and uses.
+      if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+        // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
+        // in fixed_. Reset them.
+        for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+          IntervalPtr &IP = fixed_[i];
+          LiveInterval *I = IP.first;
+          if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
+            IP.second = I->advanceTo(I->begin(), StartPosition);
+        }
+
+        DowngradedRegs.clear();
+        assignRegOrStackSlotAtInterval(cur);
+      } else {
+        assert(false && "Ran out of registers during register allocation!");
+        report_fatal_error("Ran out of registers during register allocation!");
+      }
+      return;
+    }
+  }
+
+  // Find up to 3 registers to consider as spill candidates.
+  unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
+  while (LastCandidate > 1) {
+    if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
+      break;
+    --LastCandidate;
+  }
+
+  DEBUG({
+      dbgs() << "\t\tregister(s) with min weight(s): ";
+
+      for (unsigned i = 0; i != LastCandidate; ++i)
+        dbgs() << tri_->getName(RegsWeights[i].first)
+               << " (" << RegsWeights[i].second << ")\n";
+    });
+
+  // If the current has the minimum weight, we need to spill it and
+  // add any added intervals back to unhandled, and restart
+  // linearscan.
+  if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+    DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
+    SmallVector<LiveInterval*, 8> added;
+    LiveRangeEdit LRE(*cur, added);
+    spiller_->spill(LRE);
+
+    std::sort(added.begin(), added.end(), LISorter());
+    if (added.empty())
+      return;  // Early exit if all spills were folded.
+
+    // Merge added with unhandled.  Note that we have already sorted
+    // intervals returned by addIntervalsForSpills by their starting
+    // point.
+    // This also update the NextReloadMap. That is, it adds mapping from a
+    // register defined by a reload from SS to the next reload from SS in the
+    // same basic block.
+    MachineBasicBlock *LastReloadMBB = 0;
+    LiveInterval *LastReload = 0;
+    int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+    for (unsigned i = 0, e = added.size(); i != e; ++i) {
+      LiveInterval *ReloadLi = added[i];
+      if (ReloadLi->weight == HUGE_VALF &&
+          li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+        SlotIndex ReloadIdx = ReloadLi->beginIndex();
+        MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+        int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+        if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+          // Last reload of same SS is in the same MBB. We want to try to
+          // allocate both reloads the same register and make sure the reg
+          // isn't clobbered in between if at all possible.
+          assert(LastReload->beginIndex() < ReloadIdx);
+          NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+        }
+        LastReloadMBB = ReloadMBB;
+        LastReload = ReloadLi;
+        LastReloadSS = ReloadSS;
+      }
+      unhandled_.push(ReloadLi);
+    }
+    return;
+  }
+
+  ++NumBacktracks;
+
+  // Push the current interval back to unhandled since we are going
+  // to re-run at least this iteration. Since we didn't modify it it
+  // should go back right in the front of the list
+  unhandled_.push(cur);
+
+  assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
+         "did not choose a register to spill?");
+
+  // We spill all intervals aliasing the register with
+  // minimum weight, rollback to the interval with the earliest
+  // start point and let the linear scan algorithm run again
+  SmallVector<LiveInterval*, 8> spillIs;
+
+  // Determine which intervals have to be spilled.
+  findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
+
+  // Set of spilled vregs (used later to rollback properly)
+  SmallSet<unsigned, 8> spilled;
+
+  // The earliest start of a Spilled interval indicates up to where
+  // in handled we need to roll back
+  assert(!spillIs.empty() && "No spill intervals?");
+  SlotIndex earliestStart = spillIs[0]->beginIndex();
+
+  // Spill live intervals of virtual regs mapped to the physical register we
+  // want to clear (and its aliases).  We only spill those that overlap with the
+  // current interval as the rest do not affect its allocation. we also keep
+  // track of the earliest start of all spilled live intervals since this will
+  // mark our rollback point.
+  SmallVector<LiveInterval*, 8> added;
+  while (!spillIs.empty()) {
+    LiveInterval *sli = spillIs.back();
+    spillIs.pop_back();
+    DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
+    if (sli->beginIndex() < earliestStart)
+      earliestStart = sli->beginIndex();
+    LiveRangeEdit LRE(*sli, added, 0, &spillIs);
+    spiller_->spill(LRE);
+    spilled.insert(sli->reg);
+  }
+
+  // Include any added intervals in earliestStart.
+  for (unsigned i = 0, e = added.size(); i != e; ++i) {
+    SlotIndex SI = added[i]->beginIndex();
+    if (SI < earliestStart)
+      earliestStart = SI;
+  }
+
+  DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
+
+  // Scan handled in reverse order up to the earliest start of a
+  // spilled live interval and undo each one, restoring the state of
+  // unhandled.
+  while (!handled_.empty()) {
+    LiveInterval* i = handled_.back();
+    // If this interval starts before t we are done.
+    if (!i->empty() && i->beginIndex() < earliestStart)
+      break;
+    DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
+    handled_.pop_back();
+
+    // When undoing a live interval allocation we must know if it is active or
+    // inactive to properly update regUse_ and the VirtRegMap.
+    IntervalPtrs::iterator it;
+    if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+      active_.erase(it);
+      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+      if (!spilled.count(i->reg))
+        unhandled_.push(i);
+      delRegUse(vrm_->getPhys(i->reg));
+      vrm_->clearVirt(i->reg);
+    } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+      inactive_.erase(it);
+      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+      if (!spilled.count(i->reg))
+        unhandled_.push(i);
+      vrm_->clearVirt(i->reg);
+    } else {
+      assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
+             "Can only allocate virtual registers!");
+      vrm_->clearVirt(i->reg);
+      unhandled_.push(i);
+    }
+
+    DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+    if (ii == DowngradeMap.end())
+      // It interval has a preference, it must be defined by a copy. Clear the
+      // preference now since the source interval allocation may have been
+      // undone as well.
+      mri_->setRegAllocationHint(i->reg, 0, 0);
+    else {
+      UpgradeRegister(ii->second);
+    }
+  }
+
+  // Rewind the iterators in the active, inactive, and fixed lists back to the
+  // point we reverted to.
+  RevertVectorIteratorsTo(active_, earliestStart);
+  RevertVectorIteratorsTo(inactive_, earliestStart);
+  RevertVectorIteratorsTo(fixed_, earliestStart);
+
+  // Scan the rest and undo each interval that expired after t and
+  // insert it in active (the next iteration of the algorithm will
+  // put it in inactive if required)
+  for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+    LiveInterval *HI = handled_[i];
+    if (!HI->expiredAt(earliestStart) &&
+        HI->expiredAt(cur->beginIndex())) {
+      DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
+      active_.push_back(std::make_pair(HI, HI->begin()));
+      assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
+      addRegUse(vrm_->getPhys(HI->reg));
+    }
+  }
+
+  // Merge added with unhandled.
+  // This also update the NextReloadMap. That is, it adds mapping from a
+  // register defined by a reload from SS to the next reload from SS in the
+  // same basic block.
+  MachineBasicBlock *LastReloadMBB = 0;
+  LiveInterval *LastReload = 0;
+  int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+  std::sort(added.begin(), added.end(), LISorter());
+  for (unsigned i = 0, e = added.size(); i != e; ++i) {
+    LiveInterval *ReloadLi = added[i];
+    if (ReloadLi->weight == HUGE_VALF &&
+        li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+      SlotIndex ReloadIdx = ReloadLi->beginIndex();
+      MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+      int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+      if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+        // Last reload of same SS is in the same MBB. We want to try to
+        // allocate both reloads the same register and make sure the reg
+        // isn't clobbered in between if at all possible.
+        assert(LastReload->beginIndex() < ReloadIdx);
+        NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+      }
+      LastReloadMBB = ReloadMBB;
+      LastReload = ReloadLi;
+      LastReloadSS = ReloadSS;
+    }
+    unhandled_.push(ReloadLi);
+  }
+}
+
+unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
+                                   const TargetRegisterClass *RC,
+                                   unsigned MaxInactiveCount,
+                                   SmallVector<unsigned, 256> &inactiveCounts,
+                                   bool SkipDGRegs) {
+  unsigned FreeReg = 0;
+  unsigned FreeRegInactiveCount = 0;
+
+  std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
+  // Resolve second part of the hint (if possible) given the current allocation.
+  unsigned physReg = Hint.second;
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+    physReg = vrm_->getPhys(physReg);
+
+  ArrayRef<unsigned> Order;
+  if (Hint.first)
+    Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
+  else
+    Order = RegClassInfo.getOrder(RC);
+
+  assert(!Order.empty() && "No allocatable register in this register class!");
+
+  // Scan for the first available register.
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
+    // Ignore "downgraded" registers.
+    if (SkipDGRegs && DowngradedRegs.count(Reg))
+      continue;
+    // Skip reserved registers.
+    if (reservedRegs_.test(Reg))
+      continue;
+    // Skip recently allocated registers.
+    if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
+      FreeReg = Reg;
+      if (FreeReg < inactiveCounts.size())
+        FreeRegInactiveCount = inactiveCounts[FreeReg];
+      else
+        FreeRegInactiveCount = 0;
+      break;
+    }
+  }
+
+  // If there are no free regs, or if this reg has the max inactive count,
+  // return this register.
+  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
+    // Remember what register we picked so we can skip it next time.
+    if (FreeReg != 0) recordRecentlyUsed(FreeReg);
+    return FreeReg;
+  }
+
+  // Continue scanning the registers, looking for the one with the highest
+  // inactive count.  Alkis found that this reduced register pressure very
+  // slightly on X86 (in rev 1.94 of this file), though this should probably be
+  // reevaluated now.
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
+    // Ignore "downgraded" registers.
+    if (SkipDGRegs && DowngradedRegs.count(Reg))
+      continue;
+    // Skip reserved registers.
+    if (reservedRegs_.test(Reg))
+      continue;
+    if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+        FreeRegInactiveCount < inactiveCounts[Reg] &&
+        (!SkipDGRegs || !isRecentlyUsed(Reg))) {
+      FreeReg = Reg;
+      FreeRegInactiveCount = inactiveCounts[Reg];
+      if (FreeRegInactiveCount == MaxInactiveCount)
+        break;    // We found the one with the max inactive count.
+    }
+  }
+
+  // Remember what register we picked so we can skip it next time.
+  recordRecentlyUsed(FreeReg);
+
+  return FreeReg;
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+  SmallVector<unsigned, 256> inactiveCounts;
+  unsigned MaxInactiveCount = 0;
+
+  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+  for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+       i != e; ++i) {
+    unsigned reg = i->first->reg;
+    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+
+    // If this is not in a related reg class to the register we're allocating,
+    // don't check it.
+    const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
+    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+      reg = vrm_->getPhys(reg);
+      if (inactiveCounts.size() <= reg)
+        inactiveCounts.resize(reg+1);
+      ++inactiveCounts[reg];
+      MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+    }
+  }
+
+  // If copy coalescer has assigned a "preferred" register, check if it's
+  // available first.
+  unsigned Preference = vrm_->getRegAllocPref(cur->reg);
+  if (Preference) {
+    DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
+    if (isRegAvail(Preference) &&
+        RC->contains(Preference))
+      return Preference;
+  }
+
+  unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+                                    true);
+  if (FreeReg)
+    return FreeReg;
+  return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+  return new RALinScan();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 000000000000..72230d4b0c5c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,723 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+//   (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+//   PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+//   (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+//   (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+//   architectures. In Proceedings of the Joint Conference on Languages,
+//   Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+//   NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "RenderMachineFunction.h"
+#include "Splitter.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "RegisterCoalescer.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <memory>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+                       createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+                cl::desc("Attempt coalescing during PBQP register allocation."),
+                cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+pbqpPreSplitting("pbqp-pre-splitting",
+                 cl::desc("Pre-split before PBQP register allocation."),
+                 cl::init(false), cl::Hidden);
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+  static char ID;
+
+  /// Construct a PBQP register allocator.
+  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+    initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+    initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+    initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+    initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+    initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+    initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+    initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+  }
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "PBQP Register Allocator";
+  }
+
+  /// PBQP analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+  /// Perform register allocation
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+
+  typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+  typedef std::vector<const LiveInterval*> Node2LIMap;
+  typedef std::vector<unsigned> AllowedSet;
+  typedef std::vector<AllowedSet> AllowedSetMap;
+  typedef std::pair<unsigned, unsigned> RegPair;
+  typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+  typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+  typedef std::set<unsigned> RegSet;
+
+
+  std::auto_ptr<PBQPBuilder> builder;
+
+  char *customPassID;
+
+  MachineFunction *mf;
+  const TargetMachine *tm;
+  const TargetRegisterInfo *tri;
+  const TargetInstrInfo *tii;
+  const MachineLoopInfo *loopInfo;
+  MachineRegisterInfo *mri;
+  RenderMachineFunction *rmf;
+
+  LiveIntervals *lis;
+  LiveStacks *lss;
+  VirtRegMap *vrm;
+
+  RegSet vregsToAlloc, emptyIntervalVRegs;
+
+  /// \brief Finds the initial set of vreg intervals to allocate.
+  void findVRegIntervalsToAlloc();
+
+  /// \brief Adds a stack interval if the given live interval has been
+  /// spilled. Used to support stack slot coloring.
+  void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
+
+  /// \brief Given a solved PBQP problem maps this solution back to a register
+  /// assignment.
+  bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                         const PBQP::Solution &solution);
+
+  /// \brief Postprocessing before final spilling. Sets basic block "live in"
+  /// variables.
+  void finalizeAlloc() const;
+
+};
+
+char RegAllocPBQP::ID = 0;
+
+} // End anonymous namespace.
+
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+  Node2VReg::const_iterator vregItr = node2VReg.find(node);
+  assert(vregItr != node2VReg.end() && "No vreg for node.");
+  return vregItr->second;
+}
+
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+  VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+  assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+  return nodeItr->second;
+  
+}
+
+const PBQPRAProblem::AllowedSet&
+  PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+  AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+  assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+  const AllowedSet &allowedSet = allowedSetItr->second;
+  return allowedSet;
+}
+
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+  assert(isPRegOption(vreg, option) && "Not a preg option.");
+
+  const AllowedSet& allowedSet = getAllowedSet(vreg);
+  assert(option <= allowedSet.size() && "Option outside allowed set.");
+  return allowedSet[option - 1];
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
+
+  typedef std::vector<const LiveInterval*> LIVector;
+
+  MachineRegisterInfo *mri = &mf->getRegInfo();
+  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();  
+
+  std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  PBQP::Graph &g = p->getGraph();
+  RegSet pregs;
+
+  // Collect the set of preg intervals, record that they're used in the MF.
+  for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+      pregs.insert(itr->first);
+      mri->setPhysRegUsed(itr->first);
+    }
+  }
+
+  BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+  // Iterate over vregs. 
+  for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+       vregItr != vregEnd; ++vregItr) {
+    unsigned vreg = *vregItr;
+    const TargetRegisterClass *trc = mri->getRegClass(vreg);
+    const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+    // Compute an initial allowed set for the current vreg.
+    typedef std::vector<unsigned> VRAllowed;
+    VRAllowed vrAllowed;
+    ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+    for (unsigned i = 0; i != rawOrder.size(); ++i) {
+      unsigned preg = rawOrder[i];
+      if (!reservedRegs.test(preg)) {
+        vrAllowed.push_back(preg);
+      }
+    }
+
+    // Remove any physical registers which overlap.
+    for (RegSet::const_iterator pregItr = pregs.begin(),
+                                pregEnd = pregs.end();
+         pregItr != pregEnd; ++pregItr) {
+      unsigned preg = *pregItr;
+      const LiveInterval *pregLI = &lis->getInterval(preg);
+
+      if (pregLI->empty()) {
+        continue;
+      }
+
+      if (!vregLI->overlaps(*pregLI)) {
+        continue;
+      }
+
+      // Remove the register from the allowed set.
+      VRAllowed::iterator eraseItr =
+        std::find(vrAllowed.begin(), vrAllowed.end(), preg);
+
+      if (eraseItr != vrAllowed.end()) {
+        vrAllowed.erase(eraseItr);
+      }
+
+      // Also remove any aliases.
+      const unsigned *aliasItr = tri->getAliasSet(preg);
+      if (aliasItr != 0) {
+        for (; *aliasItr != 0; ++aliasItr) {
+          VRAllowed::iterator eraseItr =
+            std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
+
+          if (eraseItr != vrAllowed.end()) {
+            vrAllowed.erase(eraseItr);
+          }
+        }
+      }
+    }
+
+    // Construct the node.
+    PBQP::Graph::NodeItr node = 
+      g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
+
+    // Record the mapping and allowed set in the problem.
+    p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
+
+    PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+        vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+
+    addSpillCosts(g.getNodeCosts(node), spillCost);
+  }
+
+  for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+         vr1Itr != vrEnd; ++vr1Itr) {
+    unsigned vr1 = *vr1Itr;
+    const LiveInterval &l1 = lis->getInterval(vr1);
+    const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+    for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+         vr2Itr != vrEnd; ++vr2Itr) {
+      unsigned vr2 = *vr2Itr;
+      const LiveInterval &l2 = lis->getInterval(vr2);
+      const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+      assert(!l2.empty() && "Empty interval in vreg set?");
+      if (l1.overlaps(l2)) {
+        PBQP::Graph::EdgeItr edge =
+          g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+                    PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+        addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
+      }
+    }
+  }
+
+  return p;
+}
+
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+                                PBQP::PBQPNum spillCost) {
+  costVec[0] = spillCost;
+}
+
+void PBQPBuilder::addInterferenceCosts(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    const TargetRegisterInfo *tri) {
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
+
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
+
+      if (tri->regsOverlap(preg1, preg2)) {
+        costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+      }
+    }
+  }
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+                                                MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
+
+  std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+  PBQP::Graph &g = p->getGraph();
+
+  const TargetMachine &tm = mf->getTarget();
+  CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
+
+  // Scan the machine function and add a coalescing cost whenever CoalescerPair
+  // gives the Ok.
+  for (MachineFunction::const_iterator mbbItr = mf->begin(),
+                                       mbbEnd = mf->end();
+       mbbItr != mbbEnd; ++mbbItr) {
+    const MachineBasicBlock *mbb = &*mbbItr;
+
+    for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+                                           miEnd = mbb->end();
+         miItr != miEnd; ++miItr) {
+      const MachineInstr *mi = &*miItr;
+
+      if (!cp.setRegisters(mi)) {
+        continue; // Not coalescable.
+      }
+
+      if (cp.getSrcReg() == cp.getDstReg()) {
+        continue; // Already coalesced.
+      }
+
+      unsigned dst = cp.getDstReg(),
+               src = cp.getSrcReg();
+
+      const float copyFactor = 0.5; // Cost of copy relative to load. Current
+      // value plucked randomly out of the air.
+                                      
+      PBQP::PBQPNum cBenefit =
+        copyFactor * LiveIntervals::getSpillWeight(false, true,
+                                                   loopInfo->getLoopDepth(mbb));
+
+      if (cp.isPhys()) {
+        if (!lis->isAllocatable(dst)) {
+          continue;
+        }
+
+        const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+        unsigned pregOpt = 0;  
+        while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+          ++pregOpt;
+        }
+        if (pregOpt < allowed.size()) {
+          ++pregOpt; // +1 to account for spill option.
+          PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+          addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+        }
+      } else {
+        const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+        const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+        PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+        PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+        PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+        if (edge == g.edgesEnd()) {
+          edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+                                                      allowed2->size() + 1,
+                                                      0));
+        } else {
+          if (g.getEdgeNode1(edge) == node2) {
+            std::swap(node1, node2);
+            std::swap(allowed1, allowed2);
+          }
+        }
+            
+        addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+                           cBenefit);
+      }
+    }
+  }
+
+  return p;
+}
+
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+                                                   unsigned pregOption,
+                                                   PBQP::PBQPNum benefit) {
+  costVec[pregOption] += -benefit;
+}
+
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    PBQP::PBQPNum benefit) {
+
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
+
+      if (preg1 == preg2) {
+        costMat[i + 1][j + 1] += -benefit;
+      } 
+    }
+  }
+}
+
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+  au.addRequired<SlotIndexes>();
+  au.addPreserved<SlotIndexes>();
+  au.addRequired<LiveIntervals>();
+  //au.addRequiredID(SplitCriticalEdgesID);
+  au.addRequired<RegisterCoalescer>();
+  if (customPassID)
+    au.addRequiredID(*customPassID);
+  au.addRequired<CalculateSpillWeights>();
+  au.addRequired<LiveStacks>();
+  au.addPreserved<LiveStacks>();
+  au.addRequired<MachineLoopInfo>();
+  au.addPreserved<MachineLoopInfo>();
+  if (pbqpPreSplitting)
+    au.addRequired<LoopSplitter>();
+  au.addRequired<VirtRegMap>();
+  au.addRequired<RenderMachineFunction>();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
+
+  // Iterate over all live ranges.
+  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
+
+    // Ignore physical ones.
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+      continue;
+
+    LiveInterval *li = itr->second;
+
+    // If this live interval is non-empty we will use pbqp to allocate it.
+    // Empty intervals we allocate in a simple post-processing stage in
+    // finalizeAlloc.
+    if (!li->empty()) {
+      vregsToAlloc.insert(li->reg);
+    } else {
+      emptyIntervalVRegs.insert(li->reg);
+    }
+  }
+}
+
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
+                                    MachineRegisterInfo* mri) {
+  int stackSlot = vrm->getStackSlot(spilled->reg);
+
+  if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
+    return;
+  }
+
+  const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
+  LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
+
+  VNInfo *vni;
+  if (stackInterval.getNumValNums() != 0) {
+    vni = stackInterval.getValNumInfo(0);
+  } else {
+    vni = stackInterval.getNextValue(
+      SlotIndex(), 0, lss->getVNInfoAllocator());
+  }
+
+  LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
+  stackInterval.MergeRangesInAsValue(rhsInterval, vni);
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                                     const PBQP::Solution &solution) {
+  // Set to true if we have any spills
+  bool anotherRoundNeeded = false;
+
+  // Clear the existing allocation.
+  vrm->clearAllVirt();
+
+  const PBQP::Graph &g = problem.getGraph();
+  // Iterate over the nodes mapping the PBQP solution to a register
+  // assignment.
+  for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+                                 nodeEnd = g.nodesEnd();
+       node != nodeEnd; ++node) {
+    unsigned vreg = problem.getVRegForNode(node);
+    unsigned alloc = solution.getSelection(node);
+
+    if (problem.isPRegOption(vreg, alloc)) {
+      unsigned preg = problem.getPRegForOption(vreg, alloc);    
+      DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+      assert(preg != 0 && "Invalid preg selected.");
+      vrm->assignVirt2Phys(vreg, preg);      
+    } else if (problem.isSpillOption(vreg, alloc)) {
+      vregsToAlloc.erase(vreg);
+      const LiveInterval* spillInterval = &lis->getInterval(vreg);
+      double oldWeight = spillInterval->weight;
+      rmf->rememberUseDefs(spillInterval);
+      std::vector<LiveInterval*> newSpills =
+        lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
+      addStackInterval(spillInterval, mri);
+      rmf->rememberSpills(spillInterval, newSpills);
+
+      (void) oldWeight;
+      DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+                   << oldWeight << ", New vregs: ");
+
+      // Copy any newly inserted live intervals into the list of regs to
+      // allocate.
+      for (std::vector<LiveInterval*>::const_iterator
+           itr = newSpills.begin(), end = newSpills.end();
+           itr != end; ++itr) {
+        assert(!(*itr)->empty() && "Empty spill range.");
+        DEBUG(dbgs() << (*itr)->reg << " ");
+        vregsToAlloc.insert((*itr)->reg);
+      }
+
+      DEBUG(dbgs() << ")\n");
+
+      // We need another round if spill intervals were added.
+      anotherRoundNeeded |= !newSpills.empty();
+    } else {
+      assert(false && "Unknown allocation option.");
+    }
+  }
+
+  return !anotherRoundNeeded;
+}
+
+
+void RegAllocPBQP::finalizeAlloc() const {
+  typedef LiveIntervals::iterator LIIterator;
+  typedef LiveInterval::Ranges::const_iterator LRIterator;
+
+  // First allocate registers for the empty intervals.
+  for (RegSet::const_iterator
+         itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
+         itr != end; ++itr) {
+    LiveInterval *li = &lis->getInterval(*itr);
+
+    unsigned physReg = vrm->getRegAllocPref(li->reg);
+
+    if (physReg == 0) {
+      const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+      physReg = liRC->getRawAllocationOrder(*mf).front();
+    }
+
+    vrm->assignVirt2Phys(li->reg, physReg);
+  }
+
+  // Finally iterate over the basic blocks to compute and set the live-in sets.
+  SmallVector<MachineBasicBlock*, 8> liveInMBBs;
+  MachineBasicBlock *entryMBB = &*mf->begin();
+
+  for (LIIterator liItr = lis->begin(), liEnd = lis->end();
+       liItr != liEnd; ++liItr) {
+
+    const LiveInterval *li = liItr->second;
+    unsigned reg = 0;
+
+    // Get the physical register for this interval
+    if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+      reg = li->reg;
+    } else if (vrm->isAssignedReg(li->reg)) {
+      reg = vrm->getPhys(li->reg);
+    } else {
+      // Ranges which are assigned a stack slot only are ignored.
+      continue;
+    }
+
+    if (reg == 0) {
+      // Filter out zero regs - they're for intervals that were spilled.
+      continue;
+    }
+
+    // Iterate over the ranges of the current interval...
+    for (LRIterator lrItr = li->begin(), lrEnd = li->end();
+         lrItr != lrEnd; ++lrItr) {
+
+      // Find the set of basic blocks which this range is live into...
+      if (lis->findLiveInMBBs(lrItr->start, lrItr->end,  liveInMBBs)) {
+        // And add the physreg for this interval to their live-in sets.
+        for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
+          if (liveInMBBs[i] != entryMBB) {
+            if (!liveInMBBs[i]->isLiveIn(reg)) {
+              liveInMBBs[i]->addLiveIn(reg);
+            }
+          }
+        }
+        liveInMBBs.clear();
+      }
+    }
+  }
+
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+
+  mf = &MF;
+  tm = &mf->getTarget();
+  tri = tm->getRegisterInfo();
+  tii = tm->getInstrInfo();
+  mri = &mf->getRegInfo(); 
+
+  lis = &getAnalysis<LiveIntervals>();
+  lss = &getAnalysis<LiveStacks>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+  rmf = &getAnalysis<RenderMachineFunction>();
+
+  vrm = &getAnalysis<VirtRegMap>();
+
+
+  DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
+
+  // Allocator main loop:
+  //
+  // * Map current regalloc problem to a PBQP problem
+  // * Solve the PBQP problem
+  // * Map the solution back to a register allocation
+  // * Spill if necessary
+  //
+  // This process is continued till no more spills are generated.
+
+  // Find the vreg intervals in need of allocation.
+  findVRegIntervalsToAlloc();
+
+  // If there are non-empty intervals allocate them using pbqp.
+  if (!vregsToAlloc.empty()) {
+
+    bool pbqpAllocComplete = false;
+    unsigned round = 0;
+
+    while (!pbqpAllocComplete) {
+      DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
+
+      std::auto_ptr<PBQPRAProblem> problem =
+        builder->build(mf, lis, loopInfo, vregsToAlloc);
+      PBQP::Solution solution =
+        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+          problem->getGraph());
+
+      pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
+
+      ++round;
+    }
+  }
+
+  // Finalise allocation, allocate empty ranges.
+  finalizeAlloc();
+
+  rmf->renderMachineFunction("After PBQP register allocation.", vrm);
+
+  vregsToAlloc.clear();
+  emptyIntervalVRegs.clear();
+
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+
+  // Run rewriter
+  std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+
+  rewriter->runOnMachineFunction(*mf, *vrm, lis);
+
+  return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator(
+                                           std::auto_ptr<PBQPBuilder> builder,
+                                           char *customPassID) {
+  return new RegAllocPBQP(builder, customPassID);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+  if (pbqpCoalescing) {
+    return createPBQPRegisterAllocator(
+             std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+  } // else
+  return createPBQPRegisterAllocator(
+           std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
+
+#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 000000000000..5a77e47bc591
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,112 @@
+//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
+{}
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+  bool Update = false;
+  MF = &mf;
+
+  // Allocate new array the first time we see a new target.
+  if (MF->getTarget().getRegisterInfo() != TRI) {
+    TRI = MF->getTarget().getRegisterInfo();
+    RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+    Update = true;
+  }
+
+  // Does this MF have different CSRs?
+  const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+  if (Update || CSR != CalleeSaved) {
+    // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+    // overlapping CSR.
+    CSRNum.clear();
+    CSRNum.resize(TRI->getNumRegs(), 0);
+    for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+      for (const unsigned *AS = TRI->getOverlaps(Reg);
+           unsigned Alias = *AS; ++AS)
+        CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+    Update = true;
+  }
+  CalleeSaved = CSR;
+
+  // Different reserved registers?
+  BitVector RR = TRI->getReservedRegs(*MF);
+  if (RR != Reserved)
+    Update = true;
+  Reserved = RR;
+
+  // Invalidate cached information from previous function.
+  if (Update)
+    ++Tag;
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+  RCInfo &RCI = RegClass[RC->getID()];
+
+  // Raw register count, including all reserved regs.
+  unsigned NumRegs = RC->getNumRegs();
+
+  if (!RCI.Order)
+    RCI.Order.reset(new unsigned[NumRegs]);
+
+  unsigned N = 0;
+  SmallVector<unsigned, 16> CSRAlias;
+
+  // FIXME: Once targets reserve registers instead of removing them from the
+  // allocation order, we can simply use begin/end here.
+  ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+  for (unsigned i = 0; i != RawOrder.size(); ++i) {
+    unsigned PhysReg = RawOrder[i];
+    // Remove reserved registers from the allocation order.
+    if (Reserved.test(PhysReg))
+      continue;
+    if (CSRNum[PhysReg])
+      // PhysReg aliases a CSR, save it for later.
+      CSRAlias.push_back(PhysReg);
+    else
+      RCI.Order[N++] = PhysReg;
+  }
+  RCI.NumRegs = N + CSRAlias.size();
+  assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+  // CSR aliases go after the volatile registers, preserve the target's order.
+  std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+
+  DEBUG({
+    dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+    for (unsigned I = 0; I != RCI.NumRegs; ++I)
+      dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+    dbgs() << " ]\n";
+  });
+
+  // RCI is now up-to-date.
+  RCI.Tag = Tag;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.h b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h
new file mode 100644
index 000000000000..d21fd67efe8b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h
@@ -0,0 +1,121 @@
+//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H
+#define LLVM_CODEGEN_REGISTERCLASSINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+class RegisterClassInfo {
+  struct RCInfo {
+    unsigned Tag;
+    unsigned NumRegs;
+    OwningArrayPtr<unsigned> Order;
+
+    RCInfo() : Tag(0), NumRegs(0) {}
+    operator ArrayRef<unsigned>() const {
+      return ArrayRef<unsigned>(Order.get(), NumRegs);
+    }
+  };
+
+  // Brief cached information for each register class.
+  OwningArrayPtr<RCInfo> RegClass;
+
+  // Tag changes whenever cached information needs to be recomputed. An RCInfo
+  // entry is valid when its tag matches.
+  unsigned Tag;
+
+  const MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+
+  // Callee saved registers of last MF. Assumed to be valid until the next
+  // runOnFunction() call.
+  const unsigned *CalleeSaved;
+
+  // Map register number to CalleeSaved index + 1;
+  SmallVector<uint8_t, 4> CSRNum;
+
+  // Reserved registers in the current MF.
+  BitVector Reserved;
+
+  // Compute all information about RC.
+  void compute(const TargetRegisterClass *RC) const;
+
+  // Return an up-to-date RCInfo for RC.
+  const RCInfo &get(const TargetRegisterClass *RC) const {
+    const RCInfo &RCI = RegClass[RC->getID()];
+    if (Tag != RCI.Tag)
+      compute(RC);
+    return RCI;
+  }
+
+public:
+  RegisterClassInfo();
+
+  /// runOnFunction - Prepare to answer questions about MF. This must be called
+  /// before any other methods are used.
+  void runOnMachineFunction(const MachineFunction &MF);
+
+  /// getNumAllocatableRegs - Returns the number of actually allocatable
+  /// registers in RC in the current function.
+  unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const {
+    return get(RC).NumRegs;
+  }
+
+  /// getOrder - Returns the preferred allocation order for RC. The order
+  /// contains no reserved registers, and registers that alias callee saved
+  /// registers come last.
+  ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
+    return get(RC);
+  }
+
+  /// getLastCalleeSavedAlias - Returns the last callee saved register that
+  /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
+  unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
+    assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+    if (unsigned N = CSRNum[PhysReg])
+      return CalleeSaved[N-1];
+    return 0;
+  }
+
+  /// isReserved - Returns true when PhysReg is a reserved register.
+  ///
+  /// Reserved registers may belong to an allocatable register class, but the
+  /// target has explicitly requested that they are not used.
+  ///
+  bool isReserved(unsigned PhysReg) const {
+    return Reserved.test(PhysReg);
+  }
+
+  /// isAllocatable - Returns true when PhysReg belongs to an allocatable
+  /// register class and it hasn't been reserved.
+  ///
+  /// Allocatable registers may show up in the allocation order of some virtual
+  /// register, so a register allocator needs to track its liveness and
+  /// availability.
+  bool isAllocatable(unsigned PhysReg) const {
+    return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
+  }
+};
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 000000000000..b91f92c6aa5a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,1797 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regcoalescing"
+#include "RegisterCoalescer.h"
+#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
+
+#include "llvm/Pass.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins    , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends  , "Number of copies extended");
+STATISTIC(NumReMats   , "Number of instructions re-materialized");
+STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts   , "Number of times interval joining aborted");
+
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+              cl::desc("Coalesce copies (default=true)"),
+              cl::init(true));
+
+static cl::opt<bool>
+DisableCrossClassJoin("disable-cross-class-join",
+               cl::desc("Avoid coalescing cross register class copies"),
+               cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnablePhysicalJoin("join-physregs",
+                   cl::desc("Join physical register copies"),
+                   cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+         cl::desc("Verify machine instrs before and after register coalescing"),
+         cl::Hidden);
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+                      "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+                    "Simple Register Coalescing", false, false)
+
+char RegisterCoalescer::ID = 0;
+
+static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
+  if (!a) return b;
+  if (!b) return a;
+  return tri.composeSubRegIndices(a, b);
+}
+
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+                        unsigned &Src, unsigned &Dst,
+                        unsigned &SrcSub, unsigned &DstSub) {
+  if (MI->isCopy()) {
+    Dst = MI->getOperand(0).getReg();
+    DstSub = MI->getOperand(0).getSubReg();
+    Src = MI->getOperand(1).getReg();
+    SrcSub = MI->getOperand(1).getSubReg();
+  } else if (MI->isSubregToReg()) {
+    Dst = MI->getOperand(0).getReg();
+    DstSub = compose(tri, MI->getOperand(0).getSubReg(),
+                     MI->getOperand(3).getImm());
+    Src = MI->getOperand(2).getReg();
+    SrcSub = MI->getOperand(2).getSubReg();
+  } else
+    return false;
+  return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+  srcReg_ = dstReg_ = subIdx_ = 0;
+  newRC_ = 0;
+  flipped_ = crossClass_ = false;
+
+  unsigned Src, Dst, SrcSub, DstSub;
+  if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
+    return false;
+  partial_ = SrcSub || DstSub;
+
+  // If one register is a physreg, it must be Dst.
+  if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+    if (TargetRegisterInfo::isPhysicalRegister(Dst))
+      return false;
+    std::swap(Src, Dst);
+    std::swap(SrcSub, DstSub);
+    flipped_ = true;
+  }
+
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+  if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+    // Eliminate DstSub on a physreg.
+    if (DstSub) {
+      Dst = tri_.getSubReg(Dst, DstSub);
+      if (!Dst) return false;
+      DstSub = 0;
+    }
+
+    // Eliminate SrcSub by picking a corresponding Dst superregister.
+    if (SrcSub) {
+      Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+      if (!Dst) return false;
+      SrcSub = 0;
+    } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+      return false;
+    }
+  } else {
+    // Both registers are virtual.
+
+    // Both registers have subreg indices.
+    if (SrcSub && DstSub) {
+      // For now we only handle the case of identical indices in commensurate
+      // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg
+      // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg.
+      if (SrcSub != DstSub)
+        return false;
+      const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+      const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+      if (!getCommonSubClass(DstRC, SrcRC))
+        return false;
+      SrcSub = DstSub = 0;
+    }
+
+    // There can be no SrcSub.
+    if (SrcSub) {
+      std::swap(Src, Dst);
+      DstSub = SrcSub;
+      SrcSub = 0;
+      assert(!flipped_ && "Unexpected flip");
+      flipped_ = true;
+    }
+
+    // Find the new register class.
+    const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+    if (DstSub)
+      newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+    else
+      newRC_ = getCommonSubClass(DstRC, SrcRC);
+    if (!newRC_)
+      return false;
+    crossClass_ = newRC_ != DstRC || newRC_ != SrcRC;
+  }
+  // Check our invariants
+  assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+  assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+         "Cannot have a physical SubIdx");
+  srcReg_ = Src;
+  dstReg_ = Dst;
+  subIdx_ = DstSub;
+  return true;
+}
+
+bool CoalescerPair::flip() {
+  if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_))
+    return false;
+  std::swap(srcReg_, dstReg_);
+  flipped_ = !flipped_;
+  return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+  unsigned Src, Dst, SrcSub, DstSub;
+  if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
+    return false;
+
+  // Find the virtual register that is srcReg_.
+  if (Dst == srcReg_) {
+    std::swap(Src, Dst);
+    std::swap(SrcSub, DstSub);
+  } else if (Src != srcReg_) {
+    return false;
+  }
+
+  // Now check that Dst matches dstReg_.
+  if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) {
+    if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+      return false;
+    assert(!subIdx_ && "Inconsistent CoalescerPair state.");
+    // DstSub could be set for a physreg from INSERT_SUBREG.
+    if (DstSub)
+      Dst = tri_.getSubReg(Dst, DstSub);
+    // Full copy of Src.
+    if (!SrcSub)
+      return dstReg_ == Dst;
+    // This is a partial register copy. Check that the parts match.
+    return tri_.getSubReg(dstReg_, SrcSub) == Dst;
+  } else {
+    // dstReg_ is virtual.
+    if (dstReg_ != Dst)
+      return false;
+    // Registers match, do the subregisters line up?
+    return compose(tri_, subIdx_, SrcSub) == DstSub;
+  }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreservedID(StrongPHIEliminationID);
+  AU.addPreservedID(PHIEliminationID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
+  /// Joined copies are not deleted immediately, but kept in JoinedCopies.
+  JoinedCopies.insert(CopyMI);
+
+  /// Mark all register operands of CopyMI as <undef> so they won't affect dead
+  /// code elimination.
+  for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
+       E = CopyMI->operands_end(); I != E; ++I)
+    if (I->isReg())
+      I->setIsUndef(true);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy.  For example:
+///
+///  A3 = B0
+///    ...
+///  B1 = A3      <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
+                                                    MachineInstr *CopyMI) {
+  // Bail if there is no dst interval - can happen when merging physical subreg
+  // operations.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
+  // the example above.
+  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+  if (BLR == IntB.end()) return false;
+  VNInfo *BValNo = BLR->valno;
+
+  // Get the location that B is defined at.  Two options: either this value has
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
+  // can't process it.
+  if (!BValNo->isDefByCopy()) return false;
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+  // The live range might not exist after fun with physreg coalescing.
+  if (ALR == IntA.end()) return false;
+  VNInfo *AValNo = ALR->valno;
+  // If it's re-defined by an early clobber somewhere in the live range, then
+  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+  // See PR3149:
+  // 172     %ECX<def> = MOV32rr %reg1039<kill>
+  // 180     INLINEASM <es:subl $5,$1
+  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+  //         %EAX<kill>,
+  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+  // 188     %EAX<def> = MOV32rr %EAX<kill>
+  // 196     %ECX<def> = MOV32rr %ECX<kill>
+  // 204     %ECX<def> = MOV32rr %ECX<kill>
+  // 212     %EAX<def> = MOV32rr %EAX<kill>
+  // 220     %EAX<def> = MOV32rr %EAX
+  // 228     %reg1039<def> = MOV32rr %ECX<kill>
+  // The early clobber operand ties ECX input to the ECX def.
+  //
+  // The live interval of ECX is represented as this:
+  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
+  // The coalescer has no idea there was a def in the middle of [174,230].
+  if (AValNo->hasRedefByEC())
+    return false;
+
+  // If AValNo is defined as a copy from IntB, we can potentially process this.
+  // Get the instruction that defines this value number.
+  if (!CP.isCoalescable(AValNo->getCopy()))
+    return false;
+
+  // Get the LiveRange in IntB that this value number starts with.
+  LiveInterval::iterator ValLR =
+    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+  if (ValLR == IntB.end())
+    return false;
+
+  // Make sure that the end of the live range is inside the same block as
+  // CopyMI.
+  MachineInstr *ValLREndInst =
+    li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
+  if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+    return false;
+
+  // Okay, we now know that ValLR ends in the same block that the CopyMI
+  // live-range starts.  If there are no intervening live ranges between them in
+  // IntB, we can merge them.
+  if (ValLR+1 != BLR) return false;
+
+  // If a live interval is a physical register, conservatively check if any
+  // of its aliases is overlapping the live interval of the virtual register.
+  // If so, do not coalesce.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
+        DEBUG({
+            dbgs() << "\t\tInterfere with alias ";
+            li_->getInterval(*AS).print(dbgs(), tri_);
+          });
+        return false;
+      }
+  }
+
+  DEBUG({
+      dbgs() << "Extending: ";
+      IntB.print(dbgs(), tri_);
+    });
+
+  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+  // We are about to delete CopyMI, so need to remove it as the 'instruction
+  // that defines this value #'. Update the valnum with the new defining
+  // instruction #.
+  BValNo->def  = FillerStart;
+  BValNo->setCopy(0);
+
+  // Okay, we can merge them.  We need to insert a new liverange:
+  // [ValLR.end, BLR.begin) of either value number, then we merge the
+  // two value numbers.
+  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+  // If the IntB live range is assigned to a physical register, and if that
+  // physreg has sub-registers, update their live intervals as well.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+      if (!li_->hasInterval(*SR))
+        continue;
+      LiveInterval &SRLI = li_->getInterval(*SR);
+      SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+                              SRLI.getNextValue(FillerStart, 0,
+                                                li_->getVNInfoAllocator())));
+    }
+  }
+
+  // Okay, merge "B1" into the same value number as "B0".
+  if (BValNo != ValLR->valno) {
+    // If B1 is killed by a PHI, then the merged live range must also be killed
+    // by the same PHI, as B0 and B1 can not overlap.
+    bool HasPHIKill = BValNo->hasPHIKill();
+    IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+    if (HasPHIKill)
+      ValLR->valno->setHasPHIKill(true);
+  }
+  DEBUG({
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << "\n";
+    });
+
+  // If the source instruction was killing the source register before the
+  // merge, unset the isKill marker given the live range has been extended.
+  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+  if (UIdx != -1) {
+    ValLREndInst->getOperand(UIdx).setIsKill(false);
+  }
+
+  // If the copy instruction was killing the destination register before the
+  // merge, find the last use and trim the live range. That will also add the
+  // isKill marker.
+  if (ALR->end == CopyIdx)
+    li_->shrinkToUses(&IntA);
+
+  ++numExtends;
+  return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
+                                                    LiveInterval &IntB,
+                                                    VNInfo *AValNo,
+                                                    VNInfo *BValNo) {
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    LiveInterval::Ranges::iterator BI =
+      std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+    if (BI != IntB.ranges.begin())
+      --BI;
+    for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+      if (BI->valno == BValNo)
+        continue;
+      if (BI->start <= AI->start && BI->end > AI->start)
+        return true;
+      if (BI->start > AI->start && BI->start < AI->end)
+        return true;
+    }
+  }
+  return false;
+}
+
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB.  If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+///  A3 = op A2 B0<kill>
+///    ...
+///  B1 = A3      <- this copy
+///    ...
+///     = op A3   <- more uses
+///
+/// ==>
+///
+///  B2 = op B0 A2<kill>
+///    ...
+///  B1 = B2      <- now an identify copy
+///    ...
+///     = op B2   <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
+                                                        MachineInstr *CopyMI) {
+  // FIXME: For now, only eliminate the copy by commuting its def when the
+  // source register is a virtual register. We want to guard against cases
+  // where the copy is a back edge copy and commuting the def lengthen the
+  // live interval of the source register to the entire loop.
+  if (CP.isPhys() && CP.isFlipped())
+    return false;
+
+  // Bail if there is no dst interval.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+  // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+  // the example above.
+  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+  if (!BValNo || !BValNo->isDefByCopy())
+    return false;
+
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  assert(AValNo && "COPY source not live");
+
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+  if (!DefMI)
+    return false;
+  const MCInstrDesc &MCID = DefMI->getDesc();
+  if (!MCID.isCommutable())
+    return false;
+  // If DefMI is a two-address instruction then commuting it will change the
+  // destination register.
+  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+  assert(DefIdx != -1);
+  unsigned UseOpIdx;
+  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+    return false;
+  unsigned Op1, Op2, NewDstIdx;
+  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+    return false;
+  if (Op1 == UseOpIdx)
+    NewDstIdx = Op2;
+  else if (Op2 == UseOpIdx)
+    NewDstIdx = Op1;
+  else
+    return false;
+
+  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+  unsigned NewReg = NewDstMO.getReg();
+  if (NewReg != IntB.reg || !NewDstMO.isKill())
+    return false;
+
+  // Make sure there are no other definitions of IntB that would reach the
+  // uses which the new definition can reach.
+  if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+    return false;
+
+  // Abort if the aliases of IntB.reg have values that are not simply the
+  // clobbers from the superreg.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) &&
+          HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
+        return false;
+
+  // If some of the uses of IntA.reg is already coalesced away, return false.
+  // It's not possible to determine whether it's safe to perform the coalescing.
+  for (MachineRegisterInfo::use_nodbg_iterator UI = 
+         mri_->use_nodbg_begin(IntA.reg), 
+       UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end())
+      continue;
+    if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+      return false;
+  }
+
+  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+               << *DefMI);
+
+  // At this point we have decided that it is legal to do this
+  // transformation.  Start by commuting the instruction.
+  MachineBasicBlock *MBB = DefMI->getParent();
+  MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+  if (!NewMI)
+    return false;
+  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+      !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
+    return false;
+  if (NewMI != DefMI) {
+    li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+    MBB->insert(DefMI, NewMI);
+    MBB->erase(DefMI);
+  }
+  unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+  NewMI->getOperand(OpIdx).setIsKill();
+
+  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+  // A = or A, B
+  // ...
+  // B = A
+  // ...
+  // C = A<kill>
+  // ...
+  //   = B
+
+  // Update uses of IntA of the specific Val# with IntB.
+  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+         UE = mri_->use_end(); UI != UE;) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = &*UI;
+    ++UI;
+    if (JoinedCopies.count(UseMI))
+      continue;
+    if (UseMI->isDebugValue()) {
+      // FIXME These don't have an instruction index.  Not clear we have enough
+      // info to decide whether to do this replacement or not.  For now do it.
+      UseMO.setReg(NewReg);
+      continue;
+    }
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end() || ULR->valno != AValNo)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      UseMO.substPhysReg(NewReg, *tri_);
+    else
+      UseMO.setReg(NewReg);
+    if (UseMI == CopyMI)
+      continue;
+    if (!UseMI->isCopy())
+      continue;
+    if (UseMI->getOperand(0).getReg() != IntB.reg ||
+        UseMI->getOperand(0).getSubReg())
+      continue;
+
+    // This copy will become a noop. If it's defining a new val#, merge it into
+    // BValNo.
+    SlotIndex DefIdx = UseIdx.getDefIndex();
+    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+    if (!DVNI)
+      continue;
+    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+    assert(DVNI->def == DefIdx);
+    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+    markAsJoined(UseMI);
+  }
+
+  // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+  // is updated.
+  VNInfo *ValNo = BValNo;
+  ValNo->def = AValNo->def;
+  ValNo->setCopy(0);
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+  }
+  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+  IntA.removeValNo(AValNo);
+  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
+  ++numCommutes;
+  return true;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+                                                       bool preserveSrcInt,
+                                                       unsigned DstReg,
+                                                       unsigned DstSubIdx,
+                                                       MachineInstr *CopyMI) {
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
+  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+  assert(SrcLR != SrcInt.end() && "Live range not found!");
+  VNInfo *ValNo = SrcLR->valno;
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+  if (!DefMI)
+    return false;
+  assert(DefMI && "Defining instruction disappeared");
+  const MCInstrDesc &MCID = DefMI->getDesc();
+  if (!MCID.isAsCheapAsAMove())
+    return false;
+  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
+    return false;
+  bool SawStore = false;
+  if (!DefMI->isSafeToMove(tii_, AA, SawStore))
+    return false;
+  if (MCID.getNumDefs() != 1)
+    return false;
+  if (!DefMI->isImplicitDef()) {
+    // Make sure the copy destination register class fits the instruction
+    // definition register class. The mismatch can happen as a result of earlier
+    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+    const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_);
+    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+      if (mri_->getRegClass(DstReg) != RC)
+        return false;
+    } else if (!RC->contains(DstReg))
+      return false;
+  }
+
+  // If destination register has a sub-register index on it, make sure it
+  // matches the instruction register class.
+  if (DstSubIdx) {
+    const MCInstrDesc &MCID = DefMI->getDesc();
+    if (MCID.getNumDefs() != 1)
+      return false;
+    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+    const TargetRegisterClass *DstSubRC =
+      DstRC->getSubRegisterRegClass(DstSubIdx);
+    const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_);
+    if (DefRC == DstRC)
+      DstSubIdx = 0;
+    else if (DefRC != DstSubRC)
+      return false;
+  }
+
+  RemoveCopyFlag(DstReg, CopyMI);
+
+  MachineBasicBlock *MBB = CopyMI->getParent();
+  MachineBasicBlock::iterator MII =
+    llvm::next(MachineBasicBlock::iterator(CopyMI));
+  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
+  MachineInstr *NewMI = prior(MII);
+
+  // CopyMI may have implicit operands, transfer them over to the newly
+  // rematerialized instruction. And update implicit def interval valnos.
+  for (unsigned i = CopyMI->getDesc().getNumOperands(),
+         e = CopyMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = CopyMI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      NewMI->addOperand(MO);
+    if (MO.isDef())
+      RemoveCopyFlag(MO.getReg(), CopyMI);
+  }
+
+  NewMI->copyImplicitOps(CopyMI);
+  li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+  CopyMI->eraseFromParent();
+  ReMatCopies.insert(CopyMI);
+  ReMatDefs.insert(DefMI);
+  DEBUG(dbgs() << "Remat: " << *NewMI);
+  ++NumReMats;
+
+  // The source interval can become smaller because we removed a use.
+  if (preserveSrcInt)
+    li_->shrinkToUses(&SrcInt);
+
+  return true;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
+  bool DstIsPhys = CP.isPhys();
+  unsigned SrcReg = CP.getSrcReg();
+  unsigned DstReg = CP.getDstReg();
+  unsigned SubIdx = CP.getSubIdx();
+
+  // Update LiveDebugVariables.
+  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+    // instead.
+    if (DstIsPhys) {
+      if (UseMI->isCopy() &&
+          !UseMI->getOperand(1).getSubReg() &&
+          !UseMI->getOperand(0).getSubReg() &&
+          UseMI->getOperand(1).getReg() == SrcReg &&
+          UseMI->getOperand(0).getReg() != SrcReg &&
+          UseMI->getOperand(0).getReg() != DstReg &&
+          !JoinedCopies.count(UseMI) &&
+          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
+                                  UseMI->getOperand(0).getReg(), 0, UseMI))
+        continue;
+    }
+
+    SmallVector<unsigned,8> Ops;
+    bool Reads, Writes;
+    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+    bool Kills = false, Deads = false;
+
+    // Replace SrcReg with DstReg in all UseMI operands.
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = UseMI->getOperand(Ops[i]);
+      Kills |= MO.isKill();
+      Deads |= MO.isDead();
+
+      if (DstIsPhys)
+        MO.substPhysReg(DstReg, *tri_);
+      else
+        MO.substVirtReg(DstReg, SubIdx, *tri_);
+    }
+
+    // This instruction is a copy that will be removed.
+    if (JoinedCopies.count(UseMI))
+      continue;
+
+    if (SubIdx) {
+      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+      // read-modify-write of DstReg.
+      if (Deads)
+        UseMI->addRegisterDead(DstReg, tri_);
+      else if (!Reads && Writes)
+        UseMI->addRegisterDefined(DstReg, tri_);
+
+      // Kill flags apply to the whole physical register.
+      if (DstIsPhys && Kills)
+        UseMI->addRegisterKilled(DstReg, tri_);
+    }
+
+    DEBUG({
+        dbgs() << "\t\tupdated: ";
+        if (!UseMI->isDebugValue())
+          dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+        dbgs() << *UseMI;
+      });
+  }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+                                  const TargetRegisterInfo *tri_) {
+  if (li.empty()) {
+    if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+      for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+        if (!li_->hasInterval(*SR))
+          continue;
+        LiveInterval &sli = li_->getInterval(*SR);
+        if (sli.empty())
+          li_->removeInterval(*SR);
+      }
+    li_->removeInterval(li.reg);
+    return true;
+  }
+  return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
+                                             MachineInstr *DefMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
+  LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+  if (DefIdx != MLR->valno->def)
+    return false;
+  li.removeValNo(MLR->valno);
+  return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
+                                              const MachineInstr *CopyMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+  if (li_->hasInterval(DstReg)) {
+    LiveInterval &LI = li_->getInterval(DstReg);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+    return;
+  for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
+    if (!li_->hasInterval(*AS))
+      continue;
+    LiveInterval &LI = li_->getInterval(*AS);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+}
+
+/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
+/// We need to be careful about coalescing a source physical register with a
+/// virtual register. Once the coalescing is done, it cannot be broken and these
+/// are not spillable! If the destination interval uses are far away, think
+/// twice about coalescing them!
+bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
+  bool Allocatable = li_->isAllocatable(CP.getDstReg());
+  LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+  /// Always join simple intervals that are defined by a single copy from a
+  /// reserved register. This doesn't increase register pressure, so it is
+  /// always beneficial.
+  if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
+    return true;
+
+  if (!EnablePhysicalJoin) {
+    DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
+    return false;
+  }
+
+  // Only coalesce to allocatable physreg, we don't want to risk modifying
+  // reserved registers.
+  if (!Allocatable) {
+    DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+    return false;  // Not coalescable.
+  }
+
+  // Don't join with physregs that have a ridiculous number of live
+  // ranges. The data structure performance is really bad when that
+  // happens.
+  if (li_->hasInterval(CP.getDstReg()) &&
+      li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+    ++numAborts;
+    DEBUG(dbgs()
+          << "\tPhysical register live interval too complicated, abort!\n");
+    return false;
+  }
+
+  // FIXME: Why are we skipping this test for partial copies?
+  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+  if (!CP.isPartial()) {
+    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+    unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
+    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+    if (Length > Threshold) {
+      ++numAborts;
+      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+      return false;
+    }
+  }
+  return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
+                                             unsigned DstReg,
+                                             const TargetRegisterClass *SrcRC,
+                                             const TargetRegisterClass *DstRC,
+                                             const TargetRegisterClass *NewRC) {
+  unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
+  // This heuristics is good enough in practice, but it's obviously not *right*.
+  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+  // out all but the most restrictive register classes.
+  if (NewRCCount > 4 ||
+      // Early exit if the function is fairly small, coalesce aggressively if
+      // that's the case. For really special register classes with 3 or
+      // fewer registers, be a bit more careful.
+      (li_->getFuncInstructionCount() / NewRCCount) < 8)
+    return true;
+  LiveInterval &SrcInt = li_->getInterval(SrcReg);
+  LiveInterval &DstInt = li_->getInterval(DstReg);
+  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+
+  // Coalesce aggressively if the intervals are small compared to the number of
+  // registers in the new class. The number 4 is fairly arbitrary, chosen to be
+  // less aggressive than the 8 used for the whole function size.
+  const unsigned ThresSize = 4 * NewRCCount;
+  if (SrcSize <= ThresSize && DstSize <= ThresSize)
+    return true;
+
+  // Estimate *register use density*. If it doubles or more, abort.
+  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+                                   mri_->use_nodbg_end());
+  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+                                   mri_->use_nodbg_end());
+  unsigned NewUses = SrcUses + DstUses;
+  unsigned NewSize = SrcSize + DstSize;
+  if (SrcRC != NewRC && SrcSize > ThresSize) {
+    unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
+    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+      return false;
+  }
+  if (DstRC != NewRC && DstSize > ThresSize) {
+    unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
+    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
+      return false;
+  }
+  return true;
+}
+
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI.  This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
+
+  Again = false;
+  if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+    return false; // Already done.
+
+  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+  CoalescerPair CP(*tii_, *tri_);
+  if (!CP.setRegisters(CopyMI)) {
+    DEBUG(dbgs() << "\tNot coalescable.\n");
+    return false;
+  }
+
+  // If they are already joined we continue.
+  if (CP.getSrcReg() == CP.getDstReg()) {
+    markAsJoined(CopyMI);
+    DEBUG(dbgs() << "\tCopy already coalesced.\n");
+    return false;  // Not coalescable.
+  }
+
+  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
+               << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+               << "\n");
+
+  // Enforce policies.
+  if (CP.isPhys()) {
+    if (!shouldJoinPhys(CP)) {
+      // Before giving up coalescing, if definition of source is defined by
+      // trivial computation, try rematerializing it.
+      if (!CP.isFlipped() &&
+          ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                  CP.getDstReg(), 0, CopyMI))
+        return true;
+      return false;
+    }
+  } else {
+    // Avoid constraining virtual register regclass too much.
+    if (CP.isCrossClass()) {
+      DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
+      if (DisableCrossClassJoin) {
+        DEBUG(dbgs() << "\tCross-class joins disabled.\n");
+        return false;
+      }
+      if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+                                 mri_->getRegClass(CP.getSrcReg()),
+                                 mri_->getRegClass(CP.getDstReg()),
+                                 CP.getNewRC())) {
+        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
+        Again = true;  // May be possible to coalesce later.
+        return false;
+      }
+    }
+
+    // When possible, let DstReg be the larger interval.
+    if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
+                           li_->getInterval(CP.getDstReg()).ranges.size())
+      CP.flip();
+  }
+
+  // Okay, attempt to join these two intervals.  On failure, this returns false.
+  // Otherwise, if one of the intervals being joined is a physreg, this method
+  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
+  // been modified, so we can use this information below to update aliases.
+  if (!JoinIntervals(CP)) {
+    // Coalescing failed.
+
+    // If definition of source is defined by trivial computation, try
+    // rematerializing it.
+    if (!CP.isFlipped() &&
+        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                CP.getDstReg(), 0, CopyMI))
+      return true;
+
+    // If we can eliminate the copy without merging the live ranges, do so now.
+    if (!CP.isPartial()) {
+      if (AdjustCopiesBackFrom(CP, CopyMI) ||
+          RemoveCopyByCommutingDef(CP, CopyMI)) {
+        markAsJoined(CopyMI);
+        DEBUG(dbgs() << "\tTrivial!\n");
+        return true;
+      }
+    }
+
+    // Otherwise, we are unable to join the intervals.
+    DEBUG(dbgs() << "\tInterference!\n");
+    Again = true;  // May be possible to coalesce later.
+    return false;
+  }
+
+  // Coalescing to a virtual register that is of a sub-register class of the
+  // other. Make sure the resulting register is set to the right register class.
+  if (CP.isCrossClass()) {
+    ++numCrossRCs;
+    mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+  }
+
+  // Remember to delete the copy instruction.
+  markAsJoined(CopyMI);
+
+  UpdateRegDefsUses(CP);
+
+  // If we have extended the live range of a physical register, make sure we
+  // update live-in lists as well.
+  if (CP.isPhys()) {
+    SmallVector<MachineBasicBlock*, 16> BlockSeq;
+    // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+    // ranges for this, and they are preserved.
+    LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+    for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+         I != E; ++I ) {
+      li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+      for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+        MachineBasicBlock &block = *BlockSeq[idx];
+        if (!block.isLiveIn(CP.getDstReg()))
+          block.addLiveIn(CP.getDstReg());
+      }
+      BlockSeq.clear();
+    }
+  }
+
+  // SrcReg is guarateed to be the register whose live interval that is
+  // being merged.
+  li_->removeInterval(CP.getSrcReg());
+
+  // Update regalloc hint.
+  tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
+
+  DEBUG({
+    LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+    dbgs() << "\tJoined. Result = ";
+    DstInt.print(dbgs(), tri_);
+    dbgs() << "\n";
+  });
+
+  ++numJoins;
+  return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be.  This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve.  InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+                                  SmallVector<VNInfo*, 16> &NewVNInfo,
+                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+                                  SmallVector<int, 16> &ThisValNoAssignments,
+                                  SmallVector<int, 16> &OtherValNoAssignments) {
+  unsigned VN = VNI->id;
+
+  // If the VN has already been computed, just return it.
+  if (ThisValNoAssignments[VN] >= 0)
+    return ThisValNoAssignments[VN];
+  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
+
+  // If this val is not a copy from the other val, then it must be a new value
+  // number in the destination.
+  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+  if (I == ThisFromOther.end()) {
+    NewVNInfo.push_back(VNI);
+    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+  }
+  VNInfo *OtherValNo = I->second;
+
+  // Otherwise, this *is* a copy from the RHS.  If the other side has already
+  // been computed, return it.
+  if (OtherValNoAssignments[OtherValNo->id] >= 0)
+    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+  // Mark this value number as currently being computed, then ask what the
+  // ultimate value # of the other value is.
+  ThisValNoAssignments[VN] = -2;
+  unsigned UltimateVN =
+    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+                      OtherValNoAssignments, ThisValNoAssignments);
+  return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+
+// Find out if we have something like
+// A = X
+// B = X
+// if so, we can pretend this is actually
+// A = X
+// B = A
+// which allows us to coalesce A and B.
+// VNI is the definition of B. LR is the life range of A that includes
+// the slot just before B. If we return true, we add "B = X" to DupCopies.
+static bool RegistersDefinedFromSameValue(LiveIntervals &li,
+                                          const TargetRegisterInfo &tri,
+                                          CoalescerPair &CP,
+                                          VNInfo *VNI,
+                                          LiveRange *LR,
+                                     SmallVector<MachineInstr*, 8> &DupCopies) {
+  // FIXME: This is very conservative. For example, we don't handle
+  // physical registers.
+
+  MachineInstr *MI = VNI->getCopy();
+
+  if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+    return false;
+
+  unsigned Dst = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+
+  if (!TargetRegisterInfo::isVirtualRegister(Src) ||
+      !TargetRegisterInfo::isVirtualRegister(Dst))
+    return false;
+
+  unsigned A = CP.getDstReg();
+  unsigned B = CP.getSrcReg();
+
+  if (B == Dst)
+    std::swap(A, B);
+  assert(Dst == A);
+
+  VNInfo *Other = LR->valno;
+  if (!Other->isDefByCopy())
+    return false;
+  const MachineInstr *OtherMI = Other->getCopy();
+
+  if (!OtherMI->isFullCopy())
+    return false;
+
+  unsigned OtherDst = OtherMI->getOperand(0).getReg();
+  unsigned OtherSrc = OtherMI->getOperand(1).getReg();
+
+  if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) ||
+      !TargetRegisterInfo::isVirtualRegister(OtherDst))
+    return false;
+
+  assert(OtherDst == B);
+
+  if (Src != OtherSrc)
+    return false;
+
+  // If the copies use two different value numbers of X, we cannot merge
+  // A and B.
+  LiveInterval &SrcInt = li.getInterval(Src);
+  if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def))
+    return false;
+
+  DupCopies.push_back(MI);
+
+  return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals.  On failure, this
+/// returns false.
+bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
+  LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
+  DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // If a live interval is a physical register, check for interference with any
+  // aliases. The interference check implemented here is a bit more conservative
+  // than the full interfeence check below. We allow overlapping live ranges
+  // only when one is a copy of the other.
+  if (CP.isPhys()) {
+    for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
+      if (!li_->hasInterval(*AS))
+        continue;
+      const LiveInterval &LHS = li_->getInterval(*AS);
+      LiveInterval::const_iterator LI = LHS.begin();
+      for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+           RI != RE; ++RI) {
+        LI = std::lower_bound(LI, LHS.end(), RI->start);
+        // Does LHS have an overlapping live range starting before RI?
+        if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+            (RI->start != RI->valno->def ||
+             !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+          DEBUG({
+            dbgs() << "\t\tInterference from alias: ";
+            LHS.print(dbgs(), tri_);
+            dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+          });
+          return false;
+        }
+
+        // Check that LHS ranges beginning in this range are copies.
+        for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+          if (LI->start != LI->valno->def ||
+              !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+            DEBUG({
+              dbgs() << "\t\tInterference from alias: ";
+              LHS.print(dbgs(), tri_);
+              dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+            });
+            return false;
+          }
+        }
+      }
+    }
+  }
+
+  // Compute the final value assignment, assuming that the live ranges can be
+  // coalesced.
+  SmallVector<int, 16> LHSValNoAssignments;
+  SmallVector<int, 16> RHSValNoAssignments;
+  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+  SmallVector<VNInfo*, 16> NewVNInfo;
+
+  SmallVector<MachineInstr*, 8> DupCopies;
+
+  LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
+  DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // Loop over the value numbers of the LHS, seeing if any are defined from
+  // the RHS.
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // Figure out the value # from the RHS.
+    LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+
+    // DstReg is known to be a register in the LHS interval.  If the src is
+    // from the RHS interval, we can use its value #.
+    MachineInstr *MI = VNI->getCopy();
+    if (!CP.isCoalescable(MI) &&
+        !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+      continue;
+
+    LHSValsDefinedFromRHS[VNI] = lr->valno;
+  }
+
+  // Loop over the value numbers of the RHS, seeing if any are defined from
+  // the LHS.
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // Figure out the value # from the LHS.
+    LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+
+    // DstReg is known to be a register in the RHS interval.  If the src is
+    // from the LHS interval, we can use its value #.
+    MachineInstr *MI = VNI->getCopy();
+    if (!CP.isCoalescable(MI) &&
+        !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+        continue;
+
+    RHSValsDefinedFromLHS[VNI] = lr->valno;
+  }
+
+  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+                      LHSValNoAssignments, RHSValNoAssignments);
+  }
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    // If this value number isn't a copy from the LHS, it's a new number.
+    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+      NewVNInfo.push_back(VNI);
+      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+      continue;
+    }
+
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+                      RHSValNoAssignments, LHSValNoAssignments);
+  }
+
+  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+  // interval lists to see if these intervals are coalescable.
+  LiveInterval::const_iterator I = LHS.begin();
+  LiveInterval::const_iterator IE = LHS.end();
+  LiveInterval::const_iterator J = RHS.begin();
+  LiveInterval::const_iterator JE = RHS.end();
+
+  // Skip ahead until the first place of potential sharing.
+  if (I != IE && J != JE) {
+    if (I->start < J->start) {
+      I = std::upper_bound(I, IE, J->start);
+      if (I != LHS.begin()) --I;
+    } else if (J->start < I->start) {
+      J = std::upper_bound(J, JE, I->start);
+      if (J != RHS.begin()) --J;
+    }
+  }
+
+  while (I != IE && J != JE) {
+    // Determine if these two live ranges overlap.
+    bool Overlaps;
+    if (I->start < J->start) {
+      Overlaps = I->end > J->start;
+    } else {
+      Overlaps = J->end > I->start;
+    }
+
+    // If so, check value # info to determine if they are really different.
+    if (Overlaps) {
+      // If the live range overlap will map to the same value number in the
+      // result liverange, we can still coalesce them.  If not, we can't.
+      if (LHSValNoAssignments[I->valno->id] !=
+          RHSValNoAssignments[J->valno->id])
+        return false;
+      // If it's re-defined by an early clobber somewhere in the live range,
+      // then conservatively abort coalescing.
+      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+        return false;
+    }
+
+    if (I->end < J->end)
+      ++I;
+    else
+      ++J;
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+         E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned LHSValID = LHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[LHSValID]->setHasPHIKill(true);
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+         E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned RHSValID = RHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[RHSValID]->setHasPHIKill(true);
+  }
+
+  if (LHSValNoAssignments.empty())
+    LHSValNoAssignments.push_back(-1);
+  if (RHSValNoAssignments.empty())
+    RHSValNoAssignments.push_back(-1);
+
+  SmallVector<unsigned, 8> SourceRegisters;
+  for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
+         E = DupCopies.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+
+    // We have pretended that the assignment to B in
+    // A = X
+    // B = X
+    // was actually a copy from A. Now that we decided to coalesce A and B,
+    // transform the code into
+    // A = X
+    // X = X
+    // and mark the X as coalesced to keep the illusion.
+    unsigned Src = MI->getOperand(1).getReg();
+    SourceRegisters.push_back(Src);
+    MI->getOperand(0).substVirtReg(Src, 0, *tri_);
+
+    markAsJoined(MI);
+  }
+
+  // If B = X was the last use of X in a liverange, we have to shrink it now
+  // that B = X is gone.
+  for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
+         E = SourceRegisters.end(); I != E; ++I) {
+    li_->shrinkToUses(&li_->getInterval(*I));
+  }
+
+  // If we get here, we know that we can coalesce the live ranges.  Ask the
+  // intervals to coalesce themselves now.
+  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+           mri_);
+  return true;
+}
+
+namespace {
+  // DepthMBBCompare - Comparison predicate that sort first based on the loop
+  // depth of the basic block (the unsigned), and then on the MBB number.
+  struct DepthMBBCompare {
+    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+      // Deeper loops first
+      if (LHS.first != RHS.first)
+        return LHS.first > RHS.first;
+
+      // Prefer blocks that are more connected in the CFG. This takes care of
+      // the most difficult copies first while intervals are short.
+      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+      if (cl != cr)
+        return cl > cr;
+
+      // As a last resort, sort by block number.
+      return LHS.second->getNumber() < RHS.second->getNumber();
+    }
+  };
+}
+
+void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+                                            std::vector<MachineInstr*> &TryAgain) {
+  DEBUG(dbgs() << MBB->getName() << ":\n");
+
+  SmallVector<MachineInstr*, 8> VirtCopies;
+  SmallVector<MachineInstr*, 8> PhysCopies;
+  SmallVector<MachineInstr*, 8> ImpDefCopies;
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E;) {
+    MachineInstr *Inst = MII++;
+
+    // If this isn't a copy nor a extract_subreg, we can't join intervals.
+    unsigned SrcReg, DstReg;
+    if (Inst->isCopy()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(1).getReg();
+    } else if (Inst->isSubregToReg()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(2).getReg();
+    } else
+      continue;
+
+    bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+    bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+      ImpDefCopies.push_back(Inst);
+    else if (SrcIsPhys || DstIsPhys)
+      PhysCopies.push_back(Inst);
+    else
+      VirtCopies.push_back(Inst);
+  }
+
+  // Try coalescing implicit copies and insert_subreg <undef> first,
+  // followed by copies to / from physical registers, then finally copies
+  // from virtual registers to virtual registers.
+  for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = ImpDefCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = PhysCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = VirtCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+}
+
+void RegisterCoalescer::joinIntervals() {
+  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+
+  std::vector<MachineInstr*> TryAgainList;
+  if (loopInfo->empty()) {
+    // If there are no loops in the function, join intervals in function order.
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+         I != E; ++I)
+      CopyCoalesceInMBB(I, TryAgainList);
+  } else {
+    // Otherwise, join intervals in inner loops before other intervals.
+    // Unfortunately we can't just iterate over loop hierarchy here because
+    // there may be more MBB's than BB's.  Collect MBB's for sorting.
+
+    // Join intervals in the function prolog first. We want to join physical
+    // registers with virtual registers before the intervals got too long.
+    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+      MachineBasicBlock *MBB = I;
+      MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+    }
+
+    // Sort by loop depth.
+    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+    // Finally, join intervals in loop nest order.
+    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+      CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+  }
+
+  // Joining intervals can allow other intervals to be joined.  Iteratively join
+  // until we make no progress.
+  bool ProgressMade = true;
+  while (ProgressMade) {
+    ProgressMade = false;
+
+    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+      MachineInstr *&TheCopy = TryAgainList[i];
+      if (!TheCopy)
+        continue;
+
+      bool Again = false;
+      bool Success = JoinCopy(TheCopy, Again);
+      if (Success || !Again) {
+        TheCopy= 0;   // Mark this one as done.
+        ProgressMade = true;
+      }
+    }
+  }
+}
+
+void RegisterCoalescer::releaseMemory() {
+  JoinedCopies.clear();
+  ReMatCopies.clear();
+  ReMatDefs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &fn.getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  li_ = &getAnalysis<LiveIntervals>();
+  ldv_ = &getAnalysis<LiveDebugVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+
+  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
+
+  if (VerifyCoalescing)
+    mf_->verify(this, "Before register coalescing");
+
+  RegClassInfo.runOnMachineFunction(fn);
+
+  // Join (coalesce) intervals if requested.
+  if (EnableJoining) {
+    joinIntervals();
+    DEBUG({
+        dbgs() << "********** INTERVALS POST JOINING **********\n";
+        for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+             I != E; ++I){
+          I->second->print(dbgs(), tri_);
+          dbgs() << "\n";
+        }
+      });
+  }
+
+  // Perform a final pass over the instructions and compute spill weights
+  // and remove identity moves.
+  SmallVector<unsigned, 4> DeadDefs;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* mbb = mbbi;
+    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+         mii != mie; ) {
+      MachineInstr *MI = mii;
+      if (JoinedCopies.count(MI)) {
+        // Delete all coalesced copies.
+        bool DoDelete = true;
+        assert(MI->isCopyLike() && "Unrecognized copy instruction");
+        unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+            MI->getNumOperands() > 2)
+          // Do not delete extract_subreg, insert_subreg of physical
+          // registers unless the definition is dead. e.g.
+          // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+          // or else the scavenger may complain. LowerSubregs will
+          // delete them later.
+          DoDelete = false;
+
+        if (MI->allDefsAreDead()) {
+          if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+              li_->hasInterval(SrcReg))
+            li_->shrinkToUses(&li_->getInterval(SrcReg));
+          DoDelete = true;
+        }
+        if (!DoDelete) {
+          // We need the instruction to adjust liveness, so make it a KILL.
+          if (MI->isSubregToReg()) {
+            MI->RemoveOperand(3);
+            MI->RemoveOperand(1);
+          }
+          MI->setDesc(tii_->get(TargetOpcode::KILL));
+          mii = llvm::next(mii);
+        } else {
+          li_->RemoveMachineInstrFromMaps(MI);
+          mii = mbbi->erase(mii);
+          ++numPeep;
+        }
+        continue;
+      }
+
+      // Now check if this is a remat'ed def instruction which is now dead.
+      if (ReMatDefs.count(MI)) {
+        bool isDead = true;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!Reg)
+            continue;
+          if (TargetRegisterInfo::isVirtualRegister(Reg))
+            DeadDefs.push_back(Reg);
+          if (MO.isDead())
+            continue;
+          if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+              !mri_->use_nodbg_empty(Reg)) {
+            isDead = false;
+            break;
+          }
+        }
+        if (isDead) {
+          while (!DeadDefs.empty()) {
+            unsigned DeadDef = DeadDefs.back();
+            DeadDefs.pop_back();
+            RemoveDeadDef(li_->getInterval(DeadDef), MI);
+          }
+          li_->RemoveMachineInstrFromMaps(mii);
+          mii = mbbi->erase(mii);
+          continue;
+        } else
+          DeadDefs.clear();
+      }
+
+      ++mii;
+
+      // Check for now unnecessary kill flags.
+      if (li_->isNotInMIMap(MI)) continue;
+      SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isKill()) continue;
+        unsigned reg = MO.getReg();
+        if (!reg || !li_->hasInterval(reg)) continue;
+        if (!li_->getInterval(reg).killedAt(DefIdx)) {
+          MO.setIsKill(false);
+          continue;
+        }
+        // When leaving a kill flag on a physreg, check if any subregs should
+        // remain alive.
+        if (!TargetRegisterInfo::isPhysicalRegister(reg))
+          continue;
+        for (const unsigned *SR = tri_->getSubRegisters(reg);
+             unsigned S = *SR; ++SR)
+          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+            MI->addRegisterDefined(S, tri_);
+      }
+    }
+  }
+
+  DEBUG(dump());
+  DEBUG(ldv_->dump());
+  if (VerifyCoalescing)
+    mf_->verify(this, "After register coalescing");
+  return true;
+}
+
+/// print - Implement the dump method.
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+   li_->print(O, m);
+}
+
+RegisterCoalescer *llvm::createRegisterCoalescer() {
+  return new RegisterCoalescer();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
new file mode 100644
index 000000000000..4131d91c00e9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,243 @@
+//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers, 
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegisterClassInfo.h"
+#include "llvm/Support/IncludeFile.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
+
+namespace llvm {
+
+  class MachineFunction;
+  class RegallocQuery;
+  class AnalysisUsage;
+  class MachineInstr;
+  class TargetRegisterInfo;
+  class TargetRegisterClass;
+  class TargetInstrInfo;
+  class LiveDebugVariables;
+  class VirtRegMap;
+  class MachineLoopInfo;
+
+  class CoalescerPair;
+
+  /// An abstract interface for register coalescers.  Coalescers must
+  /// implement this interface to be part of the coalescer analysis
+  /// group.
+  class RegisterCoalescer : public MachineFunctionPass {
+    MachineFunction* mf_;
+    MachineRegisterInfo* mri_;
+    const TargetMachine* tm_;
+    const TargetRegisterInfo* tri_;
+    const TargetInstrInfo* tii_;
+    LiveIntervals *li_;
+    LiveDebugVariables *ldv_;
+    const MachineLoopInfo* loopInfo;
+    AliasAnalysis *AA;
+    RegisterClassInfo RegClassInfo;
+
+    /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+    ///
+    SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+    /// ReMatCopies - Keep track of copies eliminated due to remat.
+    ///
+    SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+    /// ReMatDefs - Keep track of definition instructions which have
+    /// been remat'ed.
+    SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+    /// joinIntervals - join compatible live intervals
+    void joinIntervals();
+
+    /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+    /// copies that cannot yet be coalesced into the "TryAgain" list.
+    void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+                           std::vector<MachineInstr*> &TryAgain);
+
+    /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+    /// which are the src/dst of the copy instruction CopyMI.  This returns true
+    /// if the copy was successfully coalesced away. If it is not currently
+    /// possible to coalesce this interval, but it may be possible if other
+    /// things get coalesced, then it returns true by reference in 'Again'.
+    bool JoinCopy(MachineInstr *TheCopy, bool &Again);
+
+    /// JoinIntervals - Attempt to join these two intervals.  On failure, this
+    /// returns false.  The output "SrcInt" will not have been modified, so we can
+    /// use this information below to update aliases.
+    bool JoinIntervals(CoalescerPair &CP);
+
+    /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+    /// the source value number is defined by a copy from the destination reg
+    /// see if we can merge these two destination reg valno# into a single
+    /// value number, eliminating a copy.
+    bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+    /// HasOtherReachingDefs - Return true if there are definitions of IntB
+    /// other than BValNo val# that can reach uses of AValno val# of IntA.
+    bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+                              VNInfo *AValNo, VNInfo *BValNo);
+
+    /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+    /// If the source value number is defined by a commutable instruction and
+    /// its other operand is coalesced to the copy dest register, see if we
+    /// can transform the copy into a noop by commuting the definition.
+    bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+    /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+    /// computation, replace the copy by rematerialize the definition.
+    /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+    bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+                                 unsigned DstReg, unsigned DstSubIdx,
+                                 MachineInstr *CopyMI);
+
+    /// shouldJoinPhys - Return true if a physreg copy should be joined.
+    bool shouldJoinPhys(CoalescerPair &CP);
+
+    /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+    /// two virtual registers from different register classes.
+    bool isWinToJoinCrossClass(unsigned SrcReg,
+                               unsigned DstReg,
+                               const TargetRegisterClass *SrcRC,
+                               const TargetRegisterClass *DstRC,
+                               const TargetRegisterClass *NewRC);
+
+    /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+    /// update the subregister number if it is not zero. If DstReg is a
+    /// physical register and the existing subregister number of the def / use
+    /// being updated is not zero, make sure to set it to the correct physical
+    /// subregister.
+    void UpdateRegDefsUses(const CoalescerPair &CP);
+
+    /// RemoveDeadDef - If a def of a live interval is now determined dead,
+    /// remove the val# it defines. If the live interval becomes empty, remove
+    /// it as well.
+    bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+    /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
+    /// VNInfo copy flag for DstReg and all aliases.
+    void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
+
+    /// markAsJoined - Remember that CopyMI has already been joined.
+    void markAsJoined(MachineInstr *CopyMI);
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    RegisterCoalescer() : MachineFunctionPass(ID) {
+      initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// Register allocators must call this from their own
+    /// getAnalysisUsage to cover the case where the coalescer is not
+    /// a Pass in the proper sense and isn't managed by PassManager.
+    /// PassManager needs to know which analyses to make available and
+    /// which to invalidate when running the register allocator or any
+    /// pass that might call coalescing.  The long-term solution is to
+    /// allow hierarchies of PassManagers.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    virtual void releaseMemory();
+
+    /// runOnMachineFunction - pass entry point
+    virtual bool runOnMachineFunction(MachineFunction&);
+
+    /// print - Implement the dump method.
+    virtual void print(raw_ostream &O, const Module* = 0) const;
+  };
+
+  /// CoalescerPair - A helper class for register coalescers. When deciding if
+  /// two registers can be coalesced, CoalescerPair can determine if a copy
+  /// instruction would become an identity copy after coalescing.
+  class CoalescerPair {
+    const TargetInstrInfo &tii_;
+    const TargetRegisterInfo &tri_;
+
+    /// dstReg_ - The register that will be left after coalescing. It can be a
+    /// virtual or physical register.
+    unsigned dstReg_;
+
+    /// srcReg_ - the virtual register that will be coalesced into dstReg.
+    unsigned srcReg_;
+
+    /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the
+    /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a
+    /// virtual register.
+    unsigned subIdx_;
+
+    /// partial_ - True when the original copy was a partial subregister copy.
+    bool partial_;
+
+    /// crossClass_ - True when both regs are virtual, and newRC is constrained.
+    bool crossClass_;
+
+    /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy
+    /// instruction.
+    bool flipped_;
+
+    /// newRC_ - The register class of the coalesced register, or NULL if dstReg_
+    /// is a physreg.
+    const TargetRegisterClass *newRC_;
+
+  public:
+    CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
+      : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0),
+        partial_(false), crossClass_(false), flipped_(false), newRC_(0) {}
+
+    /// setRegisters - set registers to match the copy instruction MI. Return
+    /// false if MI is not a coalescable copy instruction.
+    bool setRegisters(const MachineInstr*);
+
+    /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible
+    /// because dstReg_ is a physical register, or subIdx_ is set.
+    bool flip();
+
+    /// isCoalescable - Return true if MI is a copy instruction that will become
+    /// an identity copy after coalescing.
+    bool isCoalescable(const MachineInstr*) const;
+
+    /// isPhys - Return true if DstReg is a physical register.
+    bool isPhys() const { return !newRC_; }
+
+    /// isPartial - Return true if the original copy instruction did not copy the
+    /// full register, but was a subreg operation.
+    bool isPartial() const { return partial_; }
+
+    /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's.
+    bool isCrossClass() const { return crossClass_; }
+
+    /// isFlipped - Return true when getSrcReg is the register being defined by
+    /// the original copy instruction.
+    bool isFlipped() const { return flipped_; }
+
+    /// getDstReg - Return the register (virtual or physical) that will remain
+    /// after coalescing.
+    unsigned getDstReg() const { return dstReg_; }
+
+    /// getSrcReg - Return the virtual register that will be coalesced away.
+    unsigned getSrcReg() const { return srcReg_; }
+
+    /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
+    /// coalesced into, or 0.
+    unsigned getSubIdx() const { return subIdx_; }
+
+    /// getNewRC - Return the register class of the coalesced register.
+    const TargetRegisterClass *getNewRC() const { return newRC_; }
+  };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 000000000000..9e9a145b0aa4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,395 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg) {
+  RegsAvailable.reset(Reg);
+
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs)
+    RegsAvailable.reset(SubReg);
+}
+
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+  if (isUsed(Reg))
+    return true;
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+    if (isUsed(*R))
+      return true;
+  return false;
+}
+
+void RegScavenger::initRegState() {
+  ScavengedReg = 0;
+  ScavengedRC = NULL;
+  ScavengeRestore = NULL;
+
+  // All registers started out unused.
+  RegsAvailable.set();
+
+  // Reserved registers are always used.
+  RegsAvailable ^= ReservedRegs;
+
+  if (!MBB)
+    return;
+
+  // Live-in registers are in use.
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I)
+    setUsed(*I);
+
+  // Pristine CSRs are also unavailable.
+  BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+    setUsed(I);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+  MachineFunction &MF = *mbb->getParent();
+  const TargetMachine &TM = MF.getTarget();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  MRI = &MF.getRegInfo();
+
+  assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+         "Target changed?");
+
+  // Self-initialize.
+  if (!MBB) {
+    NumPhysRegs = TRI->getNumRegs();
+    RegsAvailable.resize(NumPhysRegs);
+
+    // Create reserved registers bitvector.
+    ReservedRegs = TRI->getReservedRegs(MF);
+
+    // Create callee-saved registers bitvector.
+    CalleeSavedRegs.resize(NumPhysRegs);
+    const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+    if (CSRegs != NULL)
+      for (unsigned i = 0; CSRegs[i]; ++i)
+        CalleeSavedRegs.set(CSRegs[i]);
+  }
+
+  MBB = mbb;
+  initRegState();
+
+  Tracking = false;
+}
+
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+    BV.set(*R);
+}
+
+void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+    BV.set(*R);
+}
+
+void RegScavenger::forward() {
+  // Move ptr forward.
+  if (!Tracking) {
+    MBBI = MBB->begin();
+    Tracking = true;
+  } else {
+    assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+    MBBI = llvm::next(MBBI);
+  }
+  assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+  MachineInstr *MI = MBBI;
+
+  if (MI == ScavengeRestore) {
+    ScavengedReg = 0;
+    ScavengedRC = NULL;
+    ScavengeRestore = NULL;
+  }
+
+  if (MI->isDebugValue())
+    return;
+
+  // Find out which registers are early clobbered, killed, defined, and marked
+  // def-dead in this instruction.
+  // FIXME: The scavenger is not predication aware. If the instruction is
+  // predicated, conservatively assume "kill" markers do not actually kill the
+  // register. Similarly ignores "dead" markers.
+  bool isPred = TII->isPredicated(MI);
+  BitVector EarlyClobberRegs(NumPhysRegs);
+  BitVector KillRegs(NumPhysRegs);
+  BitVector DefRegs(NumPhysRegs);
+  BitVector DeadRegs(NumPhysRegs);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || isReserved(Reg))
+      continue;
+
+    if (MO.isUse()) {
+      // Ignore undef uses.
+      if (MO.isUndef())
+        continue;
+      // Two-address operands implicitly kill.
+      if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
+        addRegWithSubRegs(KillRegs, Reg);
+    } else {
+      assert(MO.isDef());
+      if (!isPred && MO.isDead())
+        addRegWithSubRegs(DeadRegs, Reg);
+      else
+        addRegWithSubRegs(DefRegs, Reg);
+      if (MO.isEarlyClobber())
+        addRegWithAliases(EarlyClobberRegs, Reg);
+    }
+  }
+
+  // Verify uses and defs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || isReserved(Reg))
+      continue;
+    if (MO.isUse()) {
+      if (MO.isUndef())
+        continue;
+      if (!isUsed(Reg)) {
+        // Check if it's partial live: e.g.
+        // D0 = insert_subreg D0<undef>, S0
+        // ... D0
+        // The problem is the insert_subreg could be eliminated. The use of
+        // D0 is using a partially undef value. This is not *incorrect* since
+        // S1 is can be freely clobbered.
+        // Ideally we would like a way to model this, but leaving the
+        // insert_subreg around causes both correctness and performance issues.
+        bool SubUsed = false;
+        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+             unsigned SubReg = *SubRegs; ++SubRegs)
+          if (isUsed(SubReg)) {
+            SubUsed = true;
+            break;
+          }
+        assert(SubUsed && "Using an undefined register!");
+      }
+      assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
+             "Using an early clobbered register!");
+    } else {
+      assert(MO.isDef());
+#if 0
+      // FIXME: Enable this once we've figured out how to correctly transfer
+      // implicit kills during codegen passes like the coalescer.
+      assert((KillRegs.test(Reg) || isUnused(Reg) ||
+              isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+             "Re-defining a live register!");
+#endif
+    }
+  }
+
+  // Commit the changes.
+  setUnused(KillRegs);
+  setUnused(DeadRegs);
+  setUsed(DefRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+  if (includeReserved)
+    used = ~RegsAvailable;
+  else
+    used = ~RegsAvailable & ~ReservedRegs;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I)
+    if (!isAliasUsed(*I)) {
+      DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+            "\n");
+      return *I;
+    }
+  return 0;
+}
+
+/// getRegsAvailable - Return all available registers in the register class
+/// in Mask.
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+  BitVector Mask(TRI->getNumRegs());
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I)
+    if (!isAliasUsed(*I))
+      Mask.set(*I);
+  return Mask;
+}
+
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after StargMII. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+                                       BitVector &Candidates,
+                                       unsigned InstrLimit,
+                                       MachineBasicBlock::iterator &UseMI) {
+  int Survivor = Candidates.find_first();
+  assert(Survivor > 0 && "No candidates for scavenging");
+
+  MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+  assert(StartMI != ME && "MI already at terminator");
+  MachineBasicBlock::iterator RestorePointMI = StartMI;
+  MachineBasicBlock::iterator MI = StartMI;
+
+  bool inVirtLiveRange = false;
+  for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+    if (MI->isDebugValue()) {
+      ++InstrLimit; // Don't count debug instructions
+      continue;
+    }
+    bool isVirtKillInsn = false;
+    bool isVirtDefInsn = false;
+    // Remove any candidates touched by instruction.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+        continue;
+      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        if (MO.isDef())
+          isVirtDefInsn = true;
+        else if (MO.isKill())
+          isVirtKillInsn = true;
+        continue;
+      }
+      Candidates.reset(MO.getReg());
+      for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+        Candidates.reset(*R);
+    }
+    // If we're not in a virtual reg's live range, this is a valid
+    // restore point.
+    if (!inVirtLiveRange) RestorePointMI = MI;
+
+    // Update whether we're in the live range of a virtual register
+    if (isVirtKillInsn) inVirtLiveRange = false;
+    if (isVirtDefInsn) inVirtLiveRange = true;
+
+    // Was our survivor untouched by this instruction?
+    if (Candidates.test(Survivor))
+      continue;
+
+    // All candidates gone?
+    if (Candidates.none())
+      break;
+
+    Survivor = Candidates.find_first();
+  }
+  // If we ran off the end, that's where we want to restore.
+  if (MI == ME) RestorePointMI = ME;
+  assert (RestorePointMI != StartMI &&
+          "No available scavenger restore location!");
+
+  // We ran out of candidates, so stop the search.
+  UseMI = RestorePointMI;
+  return Survivor;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+                                        MachineBasicBlock::iterator I,
+                                        int SPAdj) {
+  // Consider all allocatable registers in the register class initially
+  BitVector Candidates =
+    TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
+
+  // Exclude all the registers being used by the instruction.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = I->getOperand(i);
+    if (MO.isReg() && MO.getReg() != 0 &&
+        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      Candidates.reset(MO.getReg());
+  }
+
+  // Try to find a register that's unused if there is one, as then we won't
+  // have to spill. Search explicitly rather than masking out based on
+  // RegsAvailable, as RegsAvailable does not take aliases into account.
+  // That's what getRegsAvailable() is for.
+  BitVector Available = getRegsAvailable(RC);
+
+  if ((Candidates & Available).any())
+     Candidates &= Available;
+
+  // Find the register whose use is furthest away.
+  MachineBasicBlock::iterator UseMI;
+  unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+  // If we found an unused register there is no reason to spill it.
+  if (!isAliasUsed(SReg)) {
+    DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+    return SReg;
+  }
+
+  assert(ScavengedReg == 0 &&
+         "Scavenger slot is live, unable to scavenge another register!");
+
+  // Avoid infinite regress
+  ScavengedReg = SReg;
+
+  // If the target knows how to save/restore the register, let it do so;
+  // otherwise, use the emergency stack spill slot.
+  if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
+    // Spill the scavenged register before I.
+    assert(ScavengingFrameIndex >= 0 &&
+           "Cannot scavenge register without an emergency spill slot!");
+    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
+    MachineBasicBlock::iterator II = prior(I);
+    TRI->eliminateFrameIndex(II, SPAdj, this);
+
+    // Restore the scavenged register before its use (or first terminator).
+    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
+    II = prior(UseMI);
+    TRI->eliminateFrameIndex(II, SPAdj, this);
+  }
+
+  ScavengeRestore = prior(UseMI);
+
+  // Doing this here leads to infinite regress.
+  // ScavengedReg = SReg;
+  ScavengedRC = RC;
+
+  DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+        "\n");
+
+  return SReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp
new file mode 100644
index 000000000000..8b02ec44273a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp
@@ -0,0 +1,1012 @@
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rendermf"
+
+#include "RenderMachineFunction.h"
+
+#include "VirtRegMap.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+char RenderMachineFunction::ID = 0;
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
+                "Render machine functions (and related info) to HTML pages",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+                "Render machine functions (and related info) to HTML pages",
+                false, false)
+
+static cl::opt<std::string>
+outputFileSuffix("rmf-file-suffix",
+                 cl::desc("Appended to function name to get output file name "
+                          "(default: \".html\")"),
+                 cl::init(".html"), cl::Hidden);
+
+static cl::opt<std::string>
+machineFuncsToRender("rmf-funcs",
+                     cl::desc("Comma separated list of functions to render"
+                              ", or \"*\"."),
+                     cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+pressureClasses("rmf-classes",
+                cl::desc("Register classes to render pressure for."),
+                cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+showIntervals("rmf-intervals",
+              cl::desc("Live intervals to show alongside code."),
+              cl::init(""), cl::Hidden);
+
+static cl::opt<bool>
+filterEmpty("rmf-filter-empty-intervals",
+            cl::desc("Don't display empty intervals."),
+            cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+showEmptyIndexes("rmf-empty-indexes",
+                 cl::desc("Render indexes not associated with instructions or "
+                          "MBB starts."),
+                 cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+useFancyVerticals("rmf-fancy-verts",
+                  cl::desc("Use SVG for vertical text."),
+                  cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+prettyHTML("rmf-pretty-html",
+           cl::desc("Pretty print HTML. For debugging the renderer only.."),
+           cl::init(false), cl::Hidden);
+
+
+namespace llvm {
+
+  bool MFRenderingOptions::renderingOptionsProcessed;
+  std::set<std::string> MFRenderingOptions::mfNamesToRender;
+  bool MFRenderingOptions::renderAllMFs = false;
+
+  std::set<std::string> MFRenderingOptions::classNamesToRender;
+  bool MFRenderingOptions::renderAllClasses = false;
+
+  std::set<std::pair<unsigned, unsigned> >
+    MFRenderingOptions::intervalNumsToRender;
+  unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
+
+  template <typename OutputItr>
+  void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
+                                                         OutputItr outItr) {
+    std::string::const_iterator curPos = s.begin();
+    std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
+    while (nextComa != s.end()) {
+      std::string elem;
+      std::copy(curPos, nextComa, std::back_inserter(elem));
+      *outItr = elem;
+      ++outItr;
+      curPos = llvm::next(nextComa);
+      nextComa = std::find(curPos, s.end(), ',');
+    }
+
+    if (curPos != s.end()) {
+      std::string elem;
+      std::copy(curPos, s.end(), std::back_inserter(elem));
+      *outItr = elem;
+      ++outItr;
+    }
+  }
+
+  void MFRenderingOptions::processOptions() {
+    if (!renderingOptionsProcessed) {
+      processFuncNames();
+      processRegClassNames();
+      processIntervalNumbers();
+      renderingOptionsProcessed = true;
+    }
+  }
+
+  void MFRenderingOptions::processFuncNames() {
+    if (machineFuncsToRender == "*") {
+      renderAllMFs = true;
+    } else {
+      splitComaSeperatedList(machineFuncsToRender,
+                             std::inserter(mfNamesToRender,
+                                           mfNamesToRender.begin()));
+    }
+  }
+
+  void MFRenderingOptions::processRegClassNames() {
+    if (pressureClasses == "*") {
+      renderAllClasses = true;
+    } else {
+      splitComaSeperatedList(pressureClasses,
+                             std::inserter(classNamesToRender,
+                                           classNamesToRender.begin()));
+    }
+  }
+
+  void MFRenderingOptions::processIntervalNumbers() {
+    std::set<std::string> intervalRanges;
+    splitComaSeperatedList(showIntervals,
+                           std::inserter(intervalRanges,
+                                         intervalRanges.begin()));
+    std::for_each(intervalRanges.begin(), intervalRanges.end(),
+                  processIntervalRange);
+  }
+
+  void MFRenderingOptions::processIntervalRange(
+                                          const std::string &intervalRangeStr) {
+    if (intervalRangeStr == "*") {
+      intervalTypesToRender |= All;
+    } else if (intervalRangeStr == "virt-nospills*") {
+      intervalTypesToRender |= VirtNoSpills;
+    } else if (intervalRangeStr == "spills*") {
+      intervalTypesToRender |= VirtSpills;
+    } else if (intervalRangeStr == "virt*") {
+      intervalTypesToRender |= AllVirt;
+    } else if (intervalRangeStr == "phys*") {
+      intervalTypesToRender |= AllPhys;
+    } else {
+      std::istringstream iss(intervalRangeStr);
+      unsigned reg1, reg2;
+      if ((iss >> reg1 >> std::ws)) {
+        if (iss.eof()) {
+          intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
+        } else {
+          char c;
+          iss >> c;
+          if (c == '-' && (iss >> reg2)) {
+            intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
+          } else {
+            dbgs() << "Warning: Invalid interval range \""
+                   << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+          }
+        }
+      } else {
+        dbgs() << "Warning: Invalid interval number \""
+               << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+      }
+    }
+  }
+
+  void MFRenderingOptions::setup(MachineFunction *mf,
+                                 const TargetRegisterInfo *tri,
+                                 LiveIntervals *lis,
+                                 const RenderMachineFunction *rmf) {
+    this->mf = mf;
+    this->tri = tri;
+    this->lis = lis;
+    this->rmf = rmf;
+
+    clear();
+  }
+
+  void MFRenderingOptions::clear() {
+    regClassesTranslatedToCurrentFunction = false;
+    regClassSet.clear();
+
+    intervalsTranslatedToCurrentFunction = false;
+    intervalSet.clear();
+  }
+
+  void MFRenderingOptions::resetRenderSpecificOptions() {
+    intervalSet.clear();
+    intervalsTranslatedToCurrentFunction = false;
+  }
+
+  bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
+    processOptions();
+
+    return (renderAllMFs ||
+            mfNamesToRender.find(mf->getFunction()->getName()) !=
+              mfNamesToRender.end());    
+  }
+
+  const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
+    translateRegClassNamesToCurrentFunction();
+    return regClassSet;
+  }
+
+  const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
+    translateIntervalNumbersToCurrentFunction();
+    return intervalSet;
+  }
+
+  bool MFRenderingOptions::renderEmptyIndexes() const {
+    return showEmptyIndexes;
+  }
+
+  bool MFRenderingOptions::fancyVerticals() const {
+    return useFancyVerticals;
+  }
+
+  void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
+    if (!regClassesTranslatedToCurrentFunction) {
+      processOptions();
+      for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+                                                 rcEnd = tri->regclass_end();
+           rcItr != rcEnd; ++rcItr) {
+        const TargetRegisterClass *trc = *rcItr;
+        if (renderAllClasses ||
+            classNamesToRender.find(trc->getName()) !=
+              classNamesToRender.end()) {
+          regClassSet.insert(trc);
+        }
+      }
+      regClassesTranslatedToCurrentFunction = true;
+    }
+  }
+
+  void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
+    if (!intervalsTranslatedToCurrentFunction) {
+      processOptions();
+
+      // If we're not just doing explicit then do a copy over all matching
+      // types.
+      if (intervalTypesToRender != ExplicitOnly) {
+        for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+             liItr != liEnd; ++liItr) {
+          LiveInterval *li = liItr->second;
+
+          if (filterEmpty && li->empty())
+            continue;
+
+          if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
+               (intervalTypesToRender & AllPhys))) {
+            intervalSet.insert(li);
+          } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
+            if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || 
+                ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
+              intervalSet.insert(li);
+            }
+          }
+        }
+      }
+
+      // If we need to process the explicit list...
+      if (intervalTypesToRender != All) {
+        for (std::set<std::pair<unsigned, unsigned> >::const_iterator
+               regRangeItr = intervalNumsToRender.begin(),
+               regRangeEnd = intervalNumsToRender.end();
+             regRangeItr != regRangeEnd; ++regRangeItr) {
+          const std::pair<unsigned, unsigned> &range = *regRangeItr;
+          for (unsigned reg = range.first; reg != range.second; ++reg) {
+            if (lis->hasInterval(reg)) {
+              intervalSet.insert(&lis->getInterval(reg));
+            }
+          }
+        }
+      }
+
+      intervalsTranslatedToCurrentFunction = true;
+    }
+  }
+
+  // ---------- TargetRegisterExtraInformation implementation ----------
+
+  TargetRegisterExtraInfo::TargetRegisterExtraInfo()
+    : mapsPopulated(false) {
+  }
+
+  void TargetRegisterExtraInfo::setup(MachineFunction *mf,
+                                      MachineRegisterInfo *mri,
+                                      const TargetRegisterInfo *tri,
+                                      LiveIntervals *lis) {
+    this->mf = mf;
+    this->mri = mri;
+    this->tri = tri;
+    this->lis = lis;
+  }
+
+  void TargetRegisterExtraInfo::reset() {
+    if (!mapsPopulated) {
+      initWorst();
+      //initBounds();
+      initCapacity();
+      mapsPopulated = true;
+    }
+
+    resetPressureAndLiveStates();
+  }
+
+  void TargetRegisterExtraInfo::clear() {
+    prWorst.clear();
+    vrWorst.clear();
+    capacityMap.clear();
+    pressureMap.clear();
+    //liveStatesMap.clear();
+    mapsPopulated = false;
+  }
+
+  void TargetRegisterExtraInfo::initWorst() {
+    assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
+           "Worst map already initialised?");
+
+    // Start with the physical registers.
+    for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
+      WorstMapLine &pregLine = prWorst[preg];
+
+      for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+                                                 rcEnd = tri->regclass_end();
+           rcItr != rcEnd; ++rcItr) {
+        const TargetRegisterClass *trc = *rcItr;
+
+        unsigned numOverlaps = 0;
+        for (TargetRegisterClass::iterator rItr = trc->begin(),
+                                           rEnd = trc->end();
+             rItr != rEnd; ++rItr) {
+          unsigned trcPReg = *rItr;
+          if (tri->regsOverlap(preg, trcPReg))
+            ++numOverlaps;
+        }
+        
+        pregLine[trc] = numOverlaps;
+      }
+    }
+
+    // Now the register classes.
+    for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
+                                               rcEnd = tri->regclass_end();
+         rc1Itr != rcEnd; ++rc1Itr) {
+      const TargetRegisterClass *trc1 = *rc1Itr;
+      WorstMapLine &classLine = vrWorst[trc1];
+
+      for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
+           rc2Itr != rcEnd; ++rc2Itr) {
+        const TargetRegisterClass *trc2 = *rc2Itr;
+
+        unsigned worst = 0;
+
+        for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
+                                           trc1End = trc1->end();
+             trc1Itr != trc1End; ++trc1Itr) {
+          unsigned trc1Reg = *trc1Itr;
+          unsigned trc1RegWorst = 0;
+
+          for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
+                                             trc2End = trc2->end();
+               trc2Itr != trc2End; ++trc2Itr) {
+            unsigned trc2Reg = *trc2Itr;
+            if (tri->regsOverlap(trc1Reg, trc2Reg))
+              ++trc1RegWorst;
+          }
+          if (trc1RegWorst > worst) {
+            worst = trc1RegWorst;
+          }    
+        }
+
+        if (worst != 0) {
+          classLine[trc2] = worst;
+        }
+      }
+    }
+  }
+
+  unsigned TargetRegisterExtraInfo::getWorst(
+                                        unsigned reg,
+                                        const TargetRegisterClass *trc) const {
+    const WorstMapLine *wml = 0;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+      PRWorstMap::const_iterator prwItr = prWorst.find(reg);
+      assert(prwItr != prWorst.end() && "Missing prWorst entry.");
+      wml = &prwItr->second;
+    } else {
+      const TargetRegisterClass *regTRC = mri->getRegClass(reg);
+      VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
+      assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
+      wml = &vrwItr->second;
+    }
+    
+    WorstMapLine::const_iterator wmlItr = wml->find(trc);
+    if (wmlItr == wml->end())
+      return 0;
+
+    return wmlItr->second;
+  }
+
+  void TargetRegisterExtraInfo::initCapacity() {
+    assert(!mapsPopulated && capacityMap.empty() &&
+           "Capacity map already initialised?");
+
+    for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+           rcEnd = tri->regclass_end();
+         rcItr != rcEnd; ++rcItr) {
+      const TargetRegisterClass *trc = *rcItr;
+      unsigned capacity = trc->getRawAllocationOrder(*mf).size();
+
+      if (capacity != 0)
+        capacityMap[trc] = capacity;
+    }
+  }
+
+  unsigned TargetRegisterExtraInfo::getCapacity(
+                                         const TargetRegisterClass *trc) const {
+    CapacityMap::const_iterator cmItr = capacityMap.find(trc);
+    assert(cmItr != capacityMap.end() &&
+           "vreg with unallocable register class");
+    return cmItr->second;
+  }
+
+  void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
+    pressureMap.clear();
+    //liveStatesMap.clear();
+
+    // Iterate over all slots.
+    
+
+    // Iterate over all live intervals.
+    for (LiveIntervals::iterator liItr = lis->begin(),
+           liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval *li = liItr->second;
+
+      if (TargetRegisterInfo::isPhysicalRegister(li->reg))
+        continue;
+      
+      // For all ranges in the current interal.
+      for (LiveInterval::iterator lrItr = li->begin(),
+             lrEnd = li->end();
+           lrItr != lrEnd; ++lrItr) {
+        LiveRange *lr = &*lrItr;
+        
+        // For all slots in the current range.
+        for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
+
+          // Record increased pressure at index for all overlapping classes.
+          for (TargetRegisterInfo::regclass_iterator
+                 rcItr = tri->regclass_begin(),
+                 rcEnd = tri->regclass_end();
+               rcItr != rcEnd; ++rcItr) {
+            const TargetRegisterClass *trc = *rcItr;
+
+            if (trc->getRawAllocationOrder(*mf).empty())
+              continue;
+
+            unsigned worstAtI = getWorst(li->reg, trc);
+
+            if (worstAtI != 0) {
+              pressureMap[i][trc] += worstAtI;
+            }
+          }
+        }
+      }
+    } 
+  }
+
+  unsigned TargetRegisterExtraInfo::getPressureAtSlot(
+                                                 const TargetRegisterClass *trc,
+                                                 SlotIndex i) const {
+    PressureMap::const_iterator pmItr = pressureMap.find(i);
+    if (pmItr == pressureMap.end())
+      return 0;
+    const PressureMapLine &pmLine = pmItr->second;
+    PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
+    if (pmlItr == pmLine.end())
+      return 0;
+    return pmlItr->second;
+  }
+
+  bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
+                                                 const TargetRegisterClass *trc,
+                                                 SlotIndex i) const {
+    return (getPressureAtSlot(trc, i) > getCapacity(trc));
+  }
+
+  // ---------- MachineFunctionRenderer implementation ----------
+
+  void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
+    if (!prettyHTML)
+      return;
+    for (unsigned i = 0; i < ns; ++i) {
+      os << " ";
+    }
+  }
+
+  RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
+    return Spacer(ns);
+  }
+
+  raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
+    s.print(os);
+    return os;
+  }
+
+  template <typename Iterator>
+  std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
+    std::string r;
+
+    for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
+      char c = *sItr;
+
+      switch (c) {
+        case '<': r.append("&lt;"); break;
+        case '>': r.append("&gt;"); break;
+        case '&': r.append("&amp;"); break;
+        case ' ': r.append("&nbsp;"); break;
+        case '\"': r.append("&quot;"); break;
+        default: r.push_back(c); break;
+      }
+    }
+
+    return r;
+  }
+
+  RenderMachineFunction::LiveState
+  RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
+                                        SlotIndex i) const {
+    const MachineInstr *mi = sis->getInstructionFromIndex(i);
+
+    // For uses/defs recorded use/def indexes override current liveness and
+    // instruction operands (Only for the interval which records the indexes).
+    if (i.isUse() || i.isDef()) {
+      UseDefs::const_iterator udItr = useDefs.find(li);
+      if (udItr != useDefs.end()) {
+        const SlotSet &slotSet = udItr->second;
+        if (slotSet.count(i)) {
+          if (i.isUse()) {
+            return Used;
+          }
+          // else
+          return Defined;
+        }
+      }
+    }
+
+    // If the slot is a load/store, or there's no info in the use/def set then
+    // use liveness and instruction operand info.
+    if (li->liveAt(i)) {
+
+      if (mi == 0) {
+        if (vrm == 0 || 
+            (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+          return AliveReg;
+        } else {
+          return AliveStack;
+        }
+      } else {
+        if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+          return Defined;
+        } else if (i.isUse() && mi->readsRegister(li->reg)) {
+          return Used;
+        } else {
+          if (vrm == 0 || 
+              (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+            return AliveReg;
+          } else {
+            return AliveStack;
+          }
+        }
+      }
+    }
+    return Dead;
+  }
+
+  RenderMachineFunction::PressureState
+  RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
+                                              SlotIndex i) const {
+    if (trei.getPressureAtSlot(trc, i) == 0) {
+      return Zero;
+    } else if (trei.classOverCapacityAtSlot(trc, i)){
+      return High;
+    }
+    return Low;
+  }
+
+  /// \brief Render a machine instruction.
+  void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
+                                                 const MachineInstr *mi) const {
+    std::string s;
+    raw_string_ostream oss(s);
+    oss << *mi;
+
+    os << escapeChars(oss.str());
+  }
+
+  template <typename T>
+  void RenderMachineFunction::renderVertical(const Spacer &indent,
+                                             raw_ostream &os,
+                                             const T &t) const {
+    if (ro.fancyVerticals()) {
+      os << indent << "<object\n"
+         << indent + s(2) << "class=\"obj\"\n"
+         << indent + s(2) << "type=\"image/svg+xml\"\n"
+         << indent + s(2) << "width=\"14px\"\n"
+         << indent + s(2) << "height=\"55px\"\n"
+         << indent + s(2) << "data=\"data:image/svg+xml,\n"
+         << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
+         << indent + s(6) << "<text x='-55' y='10' "
+                             "font-family='Courier' font-size='12' "
+                             "transform='rotate(-90)' "
+                             "text-rendering='optimizeSpeed' "
+                             "fill='#000'>" << t << "</text>\n"
+         << indent + s(4) << "</svg>\">\n"
+         << indent << "</object>\n";
+    } else {
+      std::ostringstream oss;
+      oss << t;
+      std::string tStr(oss.str());
+
+      os << indent;
+      for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
+           tStrItr != tStrEnd; ++tStrItr) {
+        os << *tStrItr << "<br/>";
+      }
+      os << "\n";
+    }
+  }
+
+  void RenderMachineFunction::insertCSS(const Spacer &indent,
+                                        raw_ostream &os) const {
+    os << indent << "<style type=\"text/css\">\n"
+       << indent + s(2) << "body { font-color: black; }\n"
+       << indent + s(2) << "table.code td { font-family: monospace; "
+                    "border-width: 0px; border-style: solid; "
+                    "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
+       << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
+       << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
+       << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
+       << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
+       << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
+       << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
+       << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
+       << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
+       << indent + s(2) << "table.code th { border-width: 0px; "
+                    "border-style: solid; }\n"
+       << indent << "</style>\n";
+  }
+
+  void RenderMachineFunction::renderFunctionSummary(
+                                    const Spacer &indent, raw_ostream &os,
+                                    const char * const renderContextStr) const {
+    os << indent << "<h1>Function: " << mf->getFunction()->getName()
+                 << "</h1>\n"
+       << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
+  }
+
+
+  void RenderMachineFunction::renderPressureTableLegend(
+                                                      const Spacer &indent,
+                                                      raw_ostream &os) const {
+    os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
+       << indent << "<table class=\"code\">\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<th>Pressure</th><th>Description</th>"
+                    "<th>Appearance</th>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<td>No Pressure</td>"
+                    "<td>No physical registers of this class requested.</td>"
+                    "<td class=\"p-z\">&nbsp;&nbsp;</td>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<td>Low Pressure</td>"
+                    "<td>Sufficient physical registers to meet demand.</td>"
+                    "<td class=\"p-l\">&nbsp;&nbsp;</td>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<td>High Pressure</td>"
+                    "<td>Potentially insufficient physical registers to meet demand.</td>"
+                    "<td class=\"p-h\">&nbsp;&nbsp;</td>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent << "</table>\n";
+  }
+
+  template <typename CellType>
+  void RenderMachineFunction::renderCellsWithRLE(
+                   const Spacer &indent, raw_ostream &os,
+                   const std::pair<CellType, unsigned> &rleAccumulator,
+                   const std::map<CellType, std::string> &cellTypeStrs) const {
+
+    if (rleAccumulator.second == 0)
+      return; 
+
+    typename std::map<CellType, std::string>::const_iterator ctsItr =
+      cellTypeStrs.find(rleAccumulator.first);
+
+    assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
+
+    os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
+    if (rleAccumulator.second > 1)
+      os << " colspan=" << rleAccumulator.second;
+    os << "></td>\n";
+  }
+
+
+  void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
+                                                    raw_ostream &os) const {
+
+    std::map<LiveState, std::string> lsStrs;
+    lsStrs[Dead] = "l-n";
+    lsStrs[Defined] = "l-d";
+    lsStrs[Used] = "l-u";
+    lsStrs[AliveReg] = "l-r";
+    lsStrs[AliveStack] = "l-s";
+
+    std::map<PressureState, std::string> psStrs;
+    psStrs[Zero] = "p-z";
+    psStrs[Low] = "p-l";
+    psStrs[High] = "p-h";
+
+    // Open the table... 
+
+    os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
+       << indent + s(2) << "<tr>\n";
+
+    // Render the header row...
+
+    os << indent + s(4) << "<th>index</th>\n"
+       << indent + s(4) << "<th>instr</th>\n";
+
+    // Render class names if necessary...
+    if (!ro.regClasses().empty()) {
+      for (MFRenderingOptions::RegClassSet::const_iterator
+             rcItr = ro.regClasses().begin(),
+             rcEnd = ro.regClasses().end();
+           rcItr != rcEnd; ++rcItr) {
+        const TargetRegisterClass *trc = *rcItr;
+        os << indent + s(4) << "<th>\n";
+        renderVertical(indent + s(6), os, trc->getName());
+        os << indent + s(4) << "</th>\n";
+      }
+    }
+
+    // FIXME: Is there a nicer way to insert space between columns in HTML?
+    if (!ro.regClasses().empty() && !ro.intervals().empty())
+      os << indent + s(4) << "<th>&nbsp;&nbsp;</th>\n";
+
+    // Render interval numbers if necessary...
+    if (!ro.intervals().empty()) {
+      for (MFRenderingOptions::IntervalSet::const_iterator
+             liItr = ro.intervals().begin(),
+             liEnd = ro.intervals().end();
+           liItr != liEnd; ++liItr) {
+
+        const LiveInterval *li = *liItr;
+        os << indent + s(4) << "<th>\n";
+        renderVertical(indent + s(6), os, li->reg);
+        os << indent + s(4) << "</th>\n";
+      }
+    }
+
+    os << indent + s(2) << "</tr>\n";
+
+    // End header row, start with the data rows...
+
+    MachineInstr *mi = 0;
+
+    // Data rows:
+    for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
+         i = i.getNextSlot()) {
+     
+      // Render the slot column. 
+      os << indent + s(2) << "<tr height=6ex>\n";
+      
+      // Render the code column.
+      if (i.isLoad()) {
+        MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
+        mi = sis->getInstructionFromIndex(i);
+
+        if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
+            ro.renderEmptyIndexes()) {
+          os << indent + s(4) << "<td rowspan=4>" << i << "&nbsp;</td>\n"
+             << indent + s(4) << "<td rowspan=4>\n";
+
+          if (i == sis->getMBBStartIdx(mbb)) {
+            os << indent + s(6) << "BB#" << mbb->getNumber() << ":&nbsp;\n";
+          } else if (mi != 0) {
+            os << indent + s(6) << "&nbsp;&nbsp;";
+            renderMachineInstr(os, mi);
+          } else {
+            // Empty interval - leave blank.
+          }
+          os << indent + s(4) << "</td>\n";
+        } else {
+          i = i.getStoreIndex(); // <- Will be incremented to the next index.
+          continue;
+        }
+      }
+
+      // Render the class columns.
+      if (!ro.regClasses().empty()) {
+        std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
+        for (MFRenderingOptions::RegClassSet::const_iterator
+               rcItr = ro.regClasses().begin(),
+               rcEnd = ro.regClasses().end();
+             rcItr != rcEnd; ++rcItr) {
+          const TargetRegisterClass *trc = *rcItr;
+          PressureState newPressure = getPressureStateAt(trc, i);
+
+          if (newPressure == psRLEAccumulator.first) {
+            ++psRLEAccumulator.second;
+          } else {
+            renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+            psRLEAccumulator.first = newPressure;
+            psRLEAccumulator.second = 1;
+          }
+        }
+        renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+      }
+  
+      // FIXME: Is there a nicer way to insert space between columns in HTML?
+      if (!ro.regClasses().empty() && !ro.intervals().empty())
+        os << indent + s(4) << "<td width=2em></td>\n";
+
+      if (!ro.intervals().empty()) {
+        std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
+        for (MFRenderingOptions::IntervalSet::const_iterator
+               liItr = ro.intervals().begin(),
+               liEnd = ro.intervals().end();
+             liItr != liEnd; ++liItr) {
+          const LiveInterval *li = *liItr;
+          LiveState newLiveness = getLiveStateAt(li, i);
+
+          if (newLiveness == lsRLEAccumulator.first) {
+            ++lsRLEAccumulator.second;
+          } else {
+            renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+            lsRLEAccumulator.first = newLiveness;
+            lsRLEAccumulator.second = 1;
+          }
+        }
+        renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+      }
+      os << indent + s(2) << "</tr>\n";
+    }
+
+    os << indent << "</table>\n";
+
+    if (!ro.regClasses().empty())
+      renderPressureTableLegend(indent, os);
+  }
+
+  void RenderMachineFunction::renderFunctionPage(
+                                    raw_ostream &os,
+                                    const char * const renderContextStr) const {
+    os << "<html>\n"
+       << s(2) << "<head>\n"
+       << s(4) << "<title>" << fqn << "</title>\n";
+
+    insertCSS(s(4), os);
+
+    os << s(2) << "<head>\n"
+       << s(2) << "<body >\n";
+
+    renderFunctionSummary(s(4), os, renderContextStr);
+
+    os << s(4) << "<br/><br/><br/>\n";
+
+    //renderLiveIntervalInfoTable("    ", os);
+
+    os << s(4) << "<br/><br/><br/>\n";
+
+    renderCodeTablePlusPI(s(4), os);
+
+    os << s(2) << "</body>\n"
+       << "</html>\n";
+  }
+
+  void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
+    au.addRequired<SlotIndexes>();
+    au.addRequired<LiveIntervals>();
+    au.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(au);
+  }
+
+  bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
+
+    mf = &fn;
+    mri = &mf->getRegInfo();
+    tri = mf->getTarget().getRegisterInfo();
+    lis = &getAnalysis<LiveIntervals>();
+    sis = &getAnalysis<SlotIndexes>();
+
+    trei.setup(mf, mri, tri, lis);
+    ro.setup(mf, tri, lis, this);
+    spillIntervals.clear();
+    spillFor.clear();
+    useDefs.clear();
+
+    fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+          mf->getFunction()->getName().str();
+
+    return false;
+  }
+
+  void RenderMachineFunction::releaseMemory() {
+    trei.clear();
+    ro.clear();
+    spillIntervals.clear();
+    spillFor.clear();
+    useDefs.clear();
+  }
+
+  void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
+
+    if (!ro.shouldRenderCurrentMachineFunction())
+      return; 
+
+    for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
+                                           rEnd = mri->reg_end();
+         rItr != rEnd; ++rItr) {
+      const MachineInstr *mi = &*rItr;
+      if (mi->readsRegister(li->reg)) {
+        useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+      }
+      if (mi->definesRegister(li->reg)) {
+        useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+      }
+    }
+  }
+
+  void RenderMachineFunction::rememberSpills(
+                                     const LiveInterval *li,
+                                     const std::vector<LiveInterval*> &spills) {
+
+    if (!ro.shouldRenderCurrentMachineFunction())
+      return; 
+
+    for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
+                                                    siEnd = spills.end();
+         siItr != siEnd; ++siItr) {
+      const LiveInterval *spill = *siItr;
+      spillIntervals[li].insert(spill);
+      spillFor[spill] = li;
+    }
+  }
+
+  bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
+    SpillForMap::const_iterator sfItr = spillFor.find(li);
+    if (sfItr == spillFor.end())
+      return false;
+    return true;
+  }
+
+  void RenderMachineFunction::renderMachineFunction(
+                                                   const char *renderContextStr,
+                                                   const VirtRegMap *vrm,
+                                                   const char *renderSuffix) {
+    if (!ro.shouldRenderCurrentMachineFunction())
+      return; 
+
+    this->vrm = vrm;
+    trei.reset();
+
+    std::string rpFileName(mf->getFunction()->getName().str() +
+                           (renderSuffix ? renderSuffix : "") +
+                           outputFileSuffix);
+
+    std::string errMsg;
+    raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
+
+    renderFunctionPage(outFile, renderContextStr);
+
+    ro.resetRenderSpecificOptions();
+  }
+
+  std::string RenderMachineFunction::escapeChars(const std::string &s) const {
+    return escapeChars(s.begin(), s.end());
+  }
+
+}
diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.h b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h
new file mode 100644
index 000000000000..85719923c0c6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h
@@ -0,0 +1,338 @@
+//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+
+namespace llvm {
+
+  class LiveInterval;
+  class LiveIntervals;
+  class MachineInstr;
+  class MachineRegisterInfo;
+  class RenderMachineFunction;
+  class TargetRegisterClass;
+  class TargetRegisterInfo;
+  class VirtRegMap;
+  class raw_ostream;
+
+  /// \brief Helper class to process rendering options. Tries to be as lazy as
+  ///        possible.
+  class MFRenderingOptions {
+  public:
+
+    struct RegClassComp {
+      bool operator()(const TargetRegisterClass *trc1,
+                      const TargetRegisterClass *trc2) const {
+        std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
+        return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
+                                            trc2Name.begin(), trc2Name.end());
+      }
+    };
+
+    typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
+
+    struct IntervalComp {
+      bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+        return li1->reg < li2->reg;
+      }
+    };
+
+    typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
+
+    /// Initialise the rendering options.
+    void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
+               LiveIntervals *lis, const RenderMachineFunction *rmf);
+
+    /// Clear translations of options to the current function.
+    void clear();
+
+    /// Reset any options computed for this specific rendering.
+    void resetRenderSpecificOptions();
+
+    /// Should we render the current function.
+    bool shouldRenderCurrentMachineFunction() const;
+
+    /// Return the set of register classes to render pressure for.
+    const RegClassSet& regClasses() const;
+
+    /// Return the set of live intervals to render liveness for.
+    const IntervalSet& intervals() const;
+
+    /// Render indexes which are not associated with instructions / MBB starts.
+    bool renderEmptyIndexes() const;
+
+    /// Return whether or not to render using SVG for fancy vertical text.
+    bool fancyVerticals() const;
+
+  private:
+
+    static bool renderingOptionsProcessed;
+    static std::set<std::string> mfNamesToRender;
+    static bool renderAllMFs;
+
+    static std::set<std::string> classNamesToRender;
+    static bool renderAllClasses;
+
+
+    static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
+    typedef enum { ExplicitOnly     = 0,
+                   AllPhys          = 1,
+                   VirtNoSpills     = 2,
+                   VirtSpills       = 4,
+                   AllVirt          = 6,
+                   All              = 7 }
+      IntervalTypesToRender;
+    static unsigned intervalTypesToRender;
+
+    template <typename OutputItr>
+    static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
+
+    static void processOptions();
+
+    static void processFuncNames();
+    static void processRegClassNames();
+    static void processIntervalNumbers();
+
+    static void processIntervalRange(const std::string &intervalRangeStr);
+
+    MachineFunction *mf;
+    const TargetRegisterInfo *tri;
+    LiveIntervals *lis;
+    const RenderMachineFunction *rmf;
+
+    mutable bool regClassesTranslatedToCurrentFunction;
+    mutable RegClassSet regClassSet;
+
+    mutable bool intervalsTranslatedToCurrentFunction;
+    mutable IntervalSet intervalSet;
+
+    void translateRegClassNamesToCurrentFunction() const;
+
+    void translateIntervalNumbersToCurrentFunction() const;
+  };
+
+  /// \brief Provide extra information about the physical and virtual registers
+  ///        in the function being compiled.
+  class TargetRegisterExtraInfo {
+  public:
+    TargetRegisterExtraInfo();
+
+    /// \brief Set up TargetRegisterExtraInfo with pointers to necessary
+    ///        sources of information.
+    void setup(MachineFunction *mf, MachineRegisterInfo *mri,
+               const TargetRegisterInfo *tri, LiveIntervals *lis);
+
+    /// \brief Recompute tables for changed function.
+    void reset(); 
+
+    /// \brief Free all tables in TargetRegisterExtraInfo.
+    void clear();
+
+    /// \brief Maximum number of registers from trc which alias reg.
+    unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const;
+
+    /// \brief Returns the number of allocable registers in trc.
+    unsigned getCapacity(const TargetRegisterClass *trc) const;
+
+    /// \brief Return the number of registers of class trc that may be
+    ///        needed at slot i.
+    unsigned getPressureAtSlot(const TargetRegisterClass *trc,
+                               SlotIndex i) const;
+
+    /// \brief Return true if the number of registers of type trc that may be
+    ///        needed at slot i is greater than the capacity of trc.
+    bool classOverCapacityAtSlot(const TargetRegisterClass *trc,
+                                 SlotIndex i) const;
+
+  private:
+
+    MachineFunction *mf;
+    MachineRegisterInfo *mri;
+    const TargetRegisterInfo *tri;
+    LiveIntervals *lis;
+
+    typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine;
+    typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap;
+    VRWorstMap vrWorst;
+
+    typedef std::map<unsigned, WorstMapLine> PRWorstMap;
+    PRWorstMap prWorst;
+
+    typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap;
+    CapacityMap capacityMap;
+
+    typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine;
+    typedef std::map<SlotIndex, PressureMapLine> PressureMap;
+    PressureMap pressureMap;
+
+    bool mapsPopulated;
+
+    /// \brief Initialise the 'worst' table.
+    void initWorst();
+ 
+    /// \brief Initialise the 'capacity' table.
+    void initCapacity();
+
+    /// \brief Initialise/Reset the 'pressure' and live states tables.
+    void resetPressureAndLiveStates();
+  };
+
+  /// \brief Render MachineFunction objects and related information to a HTML
+  ///        page.
+  class RenderMachineFunction : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    RenderMachineFunction() : MachineFunctionPass(ID) {
+      initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+
+    virtual void releaseMemory();
+
+    void rememberUseDefs(const LiveInterval *li);
+
+    void rememberSpills(const LiveInterval *li,
+                        const std::vector<LiveInterval*> &spills);
+
+    bool isSpill(const LiveInterval *li) const;
+
+    /// \brief Render this machine function to HTML.
+    /// 
+    /// @param renderContextStr This parameter will be included in the top of
+    ///                         the html file to explain where (in the
+    ///                         codegen pipeline) this function was rendered
+    ///                         from. Set it to something like
+    ///                         "Pre-register-allocation".
+    /// @param vrm              If non-null the VRM will be queried to determine
+    ///                         whether a virtual register was allocated to a
+    ///                         physical register or spilled.
+    /// @param renderFilePrefix This string will be appended to the function
+    ///                         name (before the output file suffix) to enable
+    ///                         multiple renderings from the same function.
+    void renderMachineFunction(const char *renderContextStr,
+                               const VirtRegMap *vrm = 0,
+                               const char *renderSuffix = 0);
+
+  private:
+    class Spacer;
+    friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
+
+    std::string fqn;
+
+    MachineFunction *mf;
+    MachineRegisterInfo *mri;
+    const TargetRegisterInfo *tri;
+    LiveIntervals *lis;
+    SlotIndexes *sis;
+    const VirtRegMap *vrm;
+
+    TargetRegisterExtraInfo trei;
+    MFRenderingOptions ro;
+
+    
+
+    // Utilities.
+    typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
+    LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
+
+    typedef enum { Zero, Low, High } PressureState;
+    PressureState getPressureStateAt(const TargetRegisterClass *trc,
+                                     SlotIndex i) const;
+
+    typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
+      SpillIntervals;
+    SpillIntervals spillIntervals;
+
+    typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
+    SpillForMap spillFor;
+
+    typedef std::set<SlotIndex> SlotSet;
+    typedef std::map<const LiveInterval*, SlotSet> UseDefs;
+    UseDefs useDefs;
+
+    // ---------- Rendering methods ----------
+
+    /// For inserting spaces when pretty printing.
+    class Spacer {
+    public:
+      explicit Spacer(unsigned numSpaces) : ns(numSpaces) {}
+      Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); }
+      void print(raw_ostream &os) const;
+    private:
+      unsigned ns;
+    };
+
+    Spacer s(unsigned ns) const;
+
+    template <typename Iterator>
+    std::string escapeChars(Iterator sBegin, Iterator sEnd) const;
+
+    /// \brief Render a machine instruction.
+    void renderMachineInstr(raw_ostream &os,
+                            const MachineInstr *mi) const;
+
+    /// \brief Render vertical text.
+    template <typename T>
+    void renderVertical(const Spacer &indent,
+                        raw_ostream &os,
+                        const T &t) const;
+
+    /// \brief Insert CSS layout info.
+    void insertCSS(const Spacer &indent,
+                   raw_ostream &os) const;
+
+    /// \brief Render a brief summary of the function (including rendering
+    ///        context).
+    void renderFunctionSummary(const Spacer &indent,
+                               raw_ostream &os,
+                               const char * const renderContextStr) const;
+
+    /// \brief Render a legend for the pressure table.
+    void renderPressureTableLegend(const Spacer &indent,
+                                   raw_ostream &os) const;
+
+    /// \brief Render a consecutive set of HTML cells of the same class using
+    /// the colspan attribute for run-length encoding.
+    template <typename CellType>
+    void renderCellsWithRLE(
+                     const Spacer &indent, raw_ostream &os,
+                     const std::pair<CellType, unsigned> &rleAccumulator,
+                     const std::map<CellType, std::string> &cellTypeStrs) const;
+
+    /// \brief Render code listing, potentially with register pressure
+    ///        and live intervals shown alongside.
+    void renderCodeTablePlusPI(const Spacer &indent,
+                               raw_ostream &os) const;
+
+    /// \brief Render the HTML page representing the MachineFunction.
+    void renderFunctionPage(raw_ostream &os,
+                            const char * const renderContextStr) const;
+
+    std::string escapeChars(const std::string &s) const;
+  };
+}
+
+#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 000000000000..21375b286c99
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,607 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+#ifndef NDEBUG
+cl::opt<bool> StressSchedOpt(
+  "stress-sched", cl::Hidden, cl::init(false),
+  cl::desc("Stress test instruction scheduling"));
+#endif
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+  : TM(mf.getTarget()),
+    TII(TM.getInstrInfo()),
+    TRI(TM.getRegisterInfo()),
+    MF(mf), MRI(mf.getRegInfo()),
+    EntrySU(), ExitSU() {
+#ifndef NDEBUG
+  StressSched = StressSchedOpt;
+#endif
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+  if (!Node || !Node->isMachineOpcode()) return NULL;
+  return &TII->get(Node->getMachineOpcode());
+}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
+
+/// Run - perform scheduling.
+///
+void ScheduleDAG::Run(MachineBasicBlock *bb,
+                      MachineBasicBlock::iterator insertPos) {
+  BB = bb;
+  InsertPos = insertPos;
+
+  SUnits.clear();
+  Sequence.clear();
+  EntrySU = SUnit();
+  ExitSU = SUnit();
+
+  Schedule();
+
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already.  It also adds the current node as a successor of the
+/// specified node.
+bool SUnit::addPred(const SDep &D) {
+  // If this node already has this depenence, don't add a redundant one.
+  for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
+       I != E; ++I)
+    if (*I == D)
+      return false;
+  // Now add a corresponding succ to N.
+  SDep P = D;
+  P.setSUnit(this);
+  SUnit *N = D.getSUnit();
+  // Update the bookkeeping.
+  if (D.getKind() == SDep::Data) {
+    assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+    assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+    ++NumPreds;
+    ++N->NumSuccs;
+  }
+  if (!N->isScheduled) {
+    assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+    ++NumPredsLeft;
+  }
+  if (!isScheduled) {
+    assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+    ++N->NumSuccsLeft;
+  }
+  Preds.push_back(D);
+  N->Succs.push_back(P);
+  if (P.getLatency() != 0) {
+    this->setDepthDirty();
+    N->setHeightDirty();
+  }
+  return true;
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists.  It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+  // Find the matching predecessor.
+  for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+       I != E; ++I)
+    if (*I == D) {
+      bool FoundSucc = false;
+      // Find the corresponding successor in N.
+      SDep P = D;
+      P.setSUnit(this);
+      SUnit *N = D.getSUnit();
+      for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
+             EE = N->Succs.end(); II != EE; ++II)
+        if (*II == P) {
+          FoundSucc = true;
+          N->Succs.erase(II);
+          break;
+        }
+      assert(FoundSucc && "Mismatching preds / succs lists!");
+      Preds.erase(I);
+      // Update the bookkeeping.
+      if (P.getKind() == SDep::Data) {
+        assert(NumPreds > 0 && "NumPreds will underflow!");
+        assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+        --NumPreds;
+        --N->NumSuccs;
+      }
+      if (!N->isScheduled) {
+        assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+        --NumPredsLeft;
+      }
+      if (!isScheduled) {
+        assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+        --N->NumSuccsLeft;
+      }
+      if (P.getLatency() != 0) {
+        this->setDepthDirty();
+        N->setHeightDirty();
+      }
+      return;
+    }
+}
+
+void SUnit::setDepthDirty() {
+  if (!isDepthCurrent) return;
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *SU = WorkList.pop_back_val();
+    SU->isDepthCurrent = false;
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+         E = SU->Succs.end(); I != E; ++I) {
+      SUnit *SuccSU = I->getSUnit();
+      if (SuccSU->isDepthCurrent)
+        WorkList.push_back(SuccSU);
+    }
+  } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+  if (!isHeightCurrent) return;
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *SU = WorkList.pop_back_val();
+    SU->isHeightCurrent = false;
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+         E = SU->Preds.end(); I != E; ++I) {
+      SUnit *PredSU = I->getSUnit();
+      if (PredSU->isHeightCurrent)
+        WorkList.push_back(PredSU);
+    }
+  } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+  if (NewDepth <= getDepth())
+    return;
+  setDepthDirty();
+  Depth = NewDepth;
+  isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+  if (NewHeight <= getHeight())
+    return;
+  setHeightDirty();
+  Height = NewHeight;
+  isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxPredDepth = 0;
+    for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+         E = Cur->Preds.end(); I != E; ++I) {
+      SUnit *PredSU = I->getSUnit();
+      if (PredSU->isDepthCurrent)
+        MaxPredDepth = std::max(MaxPredDepth,
+                                PredSU->Depth + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(PredSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxPredDepth != Cur->Depth) {
+        Cur->setDepthDirty();
+        Cur->Depth = MaxPredDepth;
+      }
+      Cur->isDepthCurrent = true;
+    }
+  } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxSuccHeight = 0;
+    for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+         E = Cur->Succs.end(); I != E; ++I) {
+      SUnit *SuccSU = I->getSUnit();
+      if (SuccSU->isHeightCurrent)
+        MaxSuccHeight = std::max(MaxSuccHeight,
+                                 SuccSU->Height + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(SuccSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxSuccHeight != Cur->Height) {
+        Cur->setHeightDirty();
+        Cur->Height = MaxSuccHeight;
+      }
+      Cur->isHeightCurrent = true;
+    }
+  } while (!WorkList.empty());
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+  dbgs() << "SU(" << NodeNum << "): ";
+  G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+  dump(G);
+
+  dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
+  dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  dbgs() << "  # rdefs left       : " << NumRegDefsLeft << "\n";
+  dbgs() << "  Latency            : " << Latency << "\n";
+  dbgs() << "  Depth              : " << Depth << "\n";
+  dbgs() << "  Height             : " << Height << "\n";
+
+  if (Preds.size() != 0) {
+    dbgs() << "  Predecessors:\n";
+    for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+         I != E; ++I) {
+      dbgs() << "   ";
+      switch (I->getKind()) {
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
+      }
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      if (I->isArtificial())
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      if (I->isAssignedRegDep())
+        dbgs() << " Reg=" << G->TRI->getName(I->getReg());
+      dbgs() << "\n";
+    }
+  }
+  if (Succs.size() != 0) {
+    dbgs() << "  Successors:\n";
+    for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+         I != E; ++I) {
+      dbgs() << "   ";
+      switch (I->getKind()) {
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
+      }
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      if (I->isArtificial())
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      dbgs() << "\n";
+    }
+  }
+  dbgs() << "\n";
+}
+
+#ifndef NDEBUG
+/// VerifySchedule - Verify that all SUnits were scheduled and that
+/// their state is consistent.
+///
+void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+  bool AnyNotSched = false;
+  unsigned DeadNodes = 0;
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (!SUnits[i].isScheduled) {
+      if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+        ++DeadNodes;
+        continue;
+      }
+      if (!AnyNotSched)
+        dbgs() << "*** Scheduling failed! ***\n";
+      SUnits[i].dump(this);
+      dbgs() << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+    if (SUnits[i].isScheduled &&
+        (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
+          unsigned(INT_MAX)) {
+      if (!AnyNotSched)
+        dbgs() << "*** Scheduling failed! ***\n";
+      SUnits[i].dump(this);
+      dbgs() << "has an unexpected "
+           << (isBottomUp ? "Height" : "Depth") << " value!\n";
+      AnyNotSched = true;
+    }
+    if (isBottomUp) {
+      if (SUnits[i].NumSuccsLeft != 0) {
+        if (!AnyNotSched)
+          dbgs() << "*** Scheduling failed! ***\n";
+        SUnits[i].dump(this);
+        dbgs() << "has successors left!\n";
+        AnyNotSched = true;
+      }
+    } else {
+      if (SUnits[i].NumPredsLeft != 0) {
+        if (!AnyNotSched)
+          dbgs() << "*** Scheduling failed! ***\n";
+        SUnits[i].dump(this);
+        dbgs() << "has predecessors left!\n";
+        AnyNotSched = true;
+      }
+    }
+  }
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(!AnyNotSched);
+  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+         "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+  unsigned DAGSize = SUnits.size();
+  std::vector<SUnit*> WorkList;
+  WorkList.reserve(DAGSize);
+
+  Index2Node.resize(DAGSize);
+  Node2Index.resize(DAGSize);
+
+  // Initialize the data structures.
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    int NodeNum = SU->NodeNum;
+    unsigned Degree = SU->Succs.size();
+    // Temporarily use the Node2Index array as scratch space for degree counts.
+    Node2Index[NodeNum] = Degree;
+
+    // Is it a node without dependencies?
+    if (Degree == 0) {
+      assert(SU->Succs.empty() && "SUnit should have no successors");
+      // Collect leaf nodes.
+      WorkList.push_back(SU);
+    }
+  }
+
+  int Id = DAGSize;
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    Allocate(SU->NodeNum, --Id);
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      SUnit *SU = I->getSUnit();
+      if (!--Node2Index[SU->NodeNum])
+        // If all dependencies of the node are processed already,
+        // then the node can be computed now.
+        WorkList.push_back(SU);
+    }
+  }
+
+  Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+  // Check correctness of the ordering
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+      "Wrong topological sorting");
+    }
+  }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accommodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+  int UpperBound, LowerBound;
+  LowerBound = Node2Index[Y->NodeNum];
+  UpperBound = Node2Index[X->NodeNum];
+  bool HasLoop = false;
+  // Is Ord(X) < Ord(Y) ?
+  if (LowerBound < UpperBound) {
+    // Update the topological order.
+    Visited.reset();
+    DFS(Y, UpperBound, HasLoop);
+    assert(!HasLoop && "Inserted edge creates a loop!");
+    // Recompute topological indexes.
+    Shift(Visited, LowerBound, UpperBound);
+  }
+}
+
+/// RemovePred - Updates the topological ordering to accommodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+  // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+                                     bool &HasLoop) {
+  std::vector<const SUnit*> WorkList;
+  WorkList.reserve(SUnits.size());
+
+  WorkList.push_back(SU);
+  do {
+    SU = WorkList.back();
+    WorkList.pop_back();
+    Visited.set(SU->NodeNum);
+    for (int I = SU->Succs.size()-1; I >= 0; --I) {
+      int s = SU->Succs[I].getSUnit()->NodeNum;
+      if (Node2Index[s] == UpperBound) {
+        HasLoop = true;
+        return;
+      }
+      // Visit successors if not already and in affected region.
+      if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+        WorkList.push_back(SU->Succs[I].getSUnit());
+      }
+    }
+  } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+                                       int UpperBound) {
+  std::vector<int> L;
+  int shift = 0;
+  int i;
+
+  for (i = LowerBound; i <= UpperBound; ++i) {
+    // w is node at topological index i.
+    int w = Index2Node[i];
+    if (Visited.test(w)) {
+      // Unmark.
+      Visited.reset(w);
+      L.push_back(w);
+      shift = shift + 1;
+    } else {
+      Allocate(w, i - shift);
+    }
+  }
+
+  for (unsigned j = 0; j < L.size(); ++j) {
+    Allocate(L[j], i - shift);
+    i = i + 1;
+  }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+/// create a cycle.
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+  if (IsReachable(TargetSU, SU))
+    return true;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (I->isAssignedRegDep() &&
+        IsReachable(TargetSU, I->getSUnit()))
+      return true;
+  return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+                                             const SUnit *TargetSU) {
+  // If insertion of the edge SU->TargetSU would create a cycle
+  // then there is a path from TargetSU to SU.
+  int UpperBound, LowerBound;
+  LowerBound = Node2Index[TargetSU->NodeNum];
+  UpperBound = Node2Index[SU->NodeNum];
+  bool HasLoop = false;
+  // Is Ord(TargetSU) < Ord(SU) ?
+  if (LowerBound < UpperBound) {
+    Visited.reset();
+    // There may be a path from TargetSU to SU. Check for it.
+    DFS(TargetSU, UpperBound, HasLoop);
+  }
+  return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+  Node2Index[n] = index;
+  Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp
new file mode 100644
index 000000000000..f8b1bc76eb8b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -0,0 +1,68 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+void ScheduleDAG::EmitNoop() {
+  TII->insertNoop(*BB, InsertPos);
+}
+
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
+                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (I->getSUnit()->CopyDstRC) {
+      // Copy to physical register.
+      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+      // Find the destination physical register.
+      unsigned Reg = 0;
+      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+             EE = SU->Succs.end(); II != EE; ++II) {
+        if (II->isCtrl()) continue;  // ignore chain preds
+        if (II->getReg()) {
+          Reg = II->getReg();
+          break;
+        }
+      }
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+        .addReg(VRI->second);
+    } else {
+      // Copy from physical register.
+      assert(I->getReg() && "Unknown physical register!");
+      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+      (void)isNew; // Silence compiler warning.
+      assert(isNew && "Node emitted out of order - early");
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+        .addReg(I->getReg());
+    }
+    break;
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 000000000000..446adfc2b626
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,697 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched-instrs"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+using namespace llvm;
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+                                     const MachineLoopInfo &mli,
+                                     const MachineDominatorTree &mdt)
+  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+    InstrItins(mf.getTarget().getInstrItineraryData()),
+    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), 
+    LoopRegs(MLI, MDT), FirstDbgValue(0) {
+  DbgValues.clear();
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
+                            MachineBasicBlock::iterator begin,
+                            MachineBasicBlock::iterator end,
+                            unsigned endcount) {
+  BB = bb;
+  Begin = begin;
+  InsertPosIndex = endcount;
+
+  ScheduleDAG::Run(bb, end);
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+  do {
+    if (const Operator *U = dyn_cast<Operator>(V)) {
+      // If we find a ptrtoint, we can transfer control back to the
+      // regular getUnderlyingObjectFromInt.
+      if (U->getOpcode() == Instruction::PtrToInt)
+        return U->getOperand(0);
+      // If we find an add of a constant or a multiplied value, it's
+      // likely that the other operand will lead us to the base
+      // object. We don't have to worry about the case where the
+      // object address is somehow being computed by the multiply,
+      // because our callers only care when the result is an
+      // identifibale object.
+      if (U->getOpcode() != Instruction::Add ||
+          (!isa<ConstantInt>(U->getOperand(1)) &&
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
+        return V;
+      V = U->getOperand(0);
+    } else {
+      return V;
+    }
+    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
+  } while (1);
+}
+
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObject(const Value *V) {
+  // First just call Value::getUnderlyingObject to let it do what it does.
+  do {
+    V = GetUnderlyingObject(V);
+    // If it found an inttoptr, use special code to continue climing.
+    if (Operator::getOpcode(V) != Instruction::IntToPtr)
+      break;
+    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+    // If that succeeded in finding a pointer, continue the search.
+    if (!O->getType()->isPointerTy())
+      break;
+    V = O;
+  } while (1);
+  return V;
+}
+
+/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object. Otherwise return null.
+static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
+                                                const MachineFrameInfo *MFI,
+                                                bool &MayAlias) {
+  MayAlias = true;
+  if (!MI->hasOneMemOperand() ||
+      !(*MI->memoperands_begin())->getValue() ||
+      (*MI->memoperands_begin())->isVolatile())
+    return 0;
+
+  const Value *V = (*MI->memoperands_begin())->getValue();
+  if (!V)
+    return 0;
+
+  V = getUnderlyingObject(V);
+  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+    // For now, ignore PseudoSourceValues which may alias LLVM IR values
+    // because the code that uses this function has no way to cope with
+    // such aliases.
+    if (PSV->isAliased(MFI))
+      return 0;
+
+    MayAlias = PSV->mayAlias(MFI);
+    return V;
+  }
+
+  if (isIdentifiedObject(V))
+    return V;
+
+  return 0;
+}
+
+void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+  if (MachineLoop *ML = MLI.getLoopFor(BB))
+    if (BB == ML->getLoopLatch()) {
+      MachineBasicBlock *Header = ML->getHeader();
+      for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
+           E = Header->livein_end(); I != E; ++I)
+        LoopLiveInRegs.insert(*I);
+      LoopRegs.VisitLoop(ML);
+    }
+}
+
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+  ExitSU.setInstr(ExitMI);
+  bool AllDepKnown = ExitMI &&
+    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+  if (ExitMI && AllDepKnown) {
+    // If it's a call or a barrier, add dependencies on the defs and uses of
+    // instruction.
+    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = ExitMI->getOperand(i);
+      if (!MO.isReg() || MO.isDef()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+      Uses[Reg].push_back(&ExitSU);
+    }
+  } else {
+    // For others, e.g. fallthrough, conditional branch, assume the exit
+    // uses all the registers that are livein to the successor blocks.
+    SmallSet<unsigned, 8> Seen;
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI)
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {
+        unsigned Reg = *I;
+        if (Seen.insert(Reg))
+          Uses[Reg].push_back(&ExitSU);
+      }
+  }
+}
+
+void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
+  // We'll be allocating one SUnit for each instruction, plus one for
+  // the region exit node.
+  SUnits.reserve(BB->size());
+
+  // We build scheduling units by walking a block's instruction list from bottom
+  // to top.
+
+  // Remember where a generic side-effecting instruction is as we procede.
+  SUnit *BarrierChain = 0, *AliasChain = 0;
+
+  // Memory references to specific known memory locations are tracked
+  // so that they can be given more precise dependencies. We track
+  // separately the known memory locations that may alias and those
+  // that are known not to alias
+  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
+  // Ask the target if address-backscheduling is desirable, and if so how much.
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+
+  // Remove any stale debug info; sometimes BuildSchedGraph is called again
+  // without emitting the info from the previous call.
+  DbgValues.clear();
+  FirstDbgValue = NULL;
+
+  // Model data dependencies between instructions being scheduled and the
+  // ExitSU.
+  AddSchedBarrierDeps();
+
+  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+    assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
+  }
+
+  // Walk the list of instructions, from bottom moving up.
+  MachineInstr *PrevMI = NULL;
+  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+       MII != MIE; --MII) {
+    MachineInstr *MI = prior(MII);
+    if (MI && PrevMI) {
+      DbgValues.push_back(std::make_pair(PrevMI, MI));
+      PrevMI = NULL;
+    }
+
+    if (MI->isDebugValue()) {
+      PrevMI = MI;
+      continue;
+    }
+
+    const MCInstrDesc &MCID = MI->getDesc();
+    assert(!MCID.isTerminator() && !MI->isLabel() &&
+           "Cannot schedule terminators or labels!");
+    // Create the SUnit for this MI.
+    SUnit *SU = NewSUnit(MI);
+    SU->isCall = MCID.isCall();
+    SU->isCommutable = MCID.isCommutable();
+
+    // Assign the Latency field of SU using target-provided information.
+    if (UnitLatencies)
+      SU->Latency = 1;
+    else
+      ComputeLatency(SU);
+
+    // Add register-based dependencies (data, anti, and output).
+    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+      const MachineOperand &MO = MI->getOperand(j);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+
+      std::vector<SUnit *> &UseList = Uses[Reg];
+      // Defs are push in the order they are visited and never reordered.
+      std::vector<SUnit *> &DefList = Defs[Reg];
+      // Optionally add output and anti dependencies. For anti
+      // dependencies we use a latency of 0 because for a multi-issue
+      // target we want to allow the defining instruction to issue
+      // in the same cycle as the using instruction.
+      // TODO: Using a latency of 1 here for output dependencies assumes
+      //       there's no cost for reusing registers.
+      SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
+      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+        SUnit *DefSU = DefList[i];
+        if (DefSU == &ExitSU)
+          continue;
+        if (DefSU != SU &&
+            (Kind != SDep::Output || !MO.isDead() ||
+             !DefSU->getInstr()->registerDefIsDead(Reg)))
+          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
+      }
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        std::vector<SUnit *> &MemDefList = Defs[*Alias];
+        for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
+          SUnit *DefSU = MemDefList[i];
+          if (DefSU == &ExitSU)
+            continue;
+          if (DefSU != SU &&
+              (Kind != SDep::Output || !MO.isDead() ||
+               !DefSU->getInstr()->registerDefIsDead(*Alias)))
+            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
+        }
+      }
+
+      if (MO.isDef()) {
+        // Add any data dependencies.
+        unsigned DataLatency = SU->Latency;
+        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+          SUnit *UseSU = UseList[i];
+          if (UseSU == SU)
+            continue;
+          unsigned LDataLatency = DataLatency;
+          // Optionally add in a special extra latency for nodes that
+          // feed addresses.
+          // TODO: Do this for register aliases too.
+          // TODO: Perhaps we should get rid of
+          // SpecialAddressLatency and just move this into
+          // adjustSchedDependency for the targets that care about it.
+          if (SpecialAddressLatency != 0 && !UnitLatencies &&
+              UseSU != &ExitSU) {
+            MachineInstr *UseMI = UseSU->getInstr();
+            const MCInstrDesc &UseMCID = UseMI->getDesc();
+            int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+            assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+            if (RegUseIndex >= 0 &&
+                (UseMCID.mayLoad() || UseMCID.mayStore()) &&
+                (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+                UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+              LDataLatency += SpecialAddressLatency;
+          }
+          // Adjust the dependence latency using operand def/use
+          // information (if any), and then allow the target to
+          // perform its own adjustments.
+          const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+          if (!UnitLatencies) {
+            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+          }
+          UseSU->addPred(dep);
+        }
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          std::vector<SUnit *> &UseList = Uses[*Alias];
+          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+            SUnit *UseSU = UseList[i];
+            if (UseSU == SU)
+              continue;
+            const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+            }
+            UseSU->addPred(dep);
+          }
+        }
+
+        // If a def is going to wrap back around to the top of the loop,
+        // backschedule it.
+        if (!UnitLatencies && DefList.empty()) {
+          LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
+          if (I != LoopRegs.Deps.end()) {
+            const MachineOperand *UseMO = I->second.first;
+            unsigned Count = I->second.second;
+            const MachineInstr *UseMI = UseMO->getParent();
+            unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+            const MCInstrDesc &UseMCID = UseMI->getDesc();
+            // TODO: If we knew the total depth of the region here, we could
+            // handle the case where the whole loop is inside the region but
+            // is large enough that the isScheduleHigh trick isn't needed.
+            if (UseMOIdx < UseMCID.getNumOperands()) {
+              // Currently, we only support scheduling regions consisting of
+              // single basic blocks. Check to see if the instruction is in
+              // the same region by checking to see if it has the same parent.
+              if (UseMI->getParent() != MI->getParent()) {
+                unsigned Latency = SU->Latency;
+                if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+                  Latency += SpecialAddressLatency;
+                // This is a wild guess as to the portion of the latency which
+                // will be overlapped by work done outside the current
+                // scheduling region.
+                Latency -= std::min(Latency, Count);
+                // Add the artificial edge.
+                ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+                                    /*Reg=*/0, /*isNormalMemory=*/false,
+                                    /*isMustAlias=*/false,
+                                    /*isArtificial=*/true));
+              } else if (SpecialAddressLatency > 0 &&
+                         UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+                // The entire loop body is within the current scheduling region
+                // and the latency of this operation is assumed to be greater
+                // than the latency of the loop.
+                // TODO: Recursively mark data-edge predecessors as
+                //       isScheduleHigh too.
+                SU->isScheduleHigh = true;
+              }
+            }
+            LoopRegs.Deps.erase(I);
+          }
+        }
+
+        UseList.clear();
+        if (!MO.isDead())
+          DefList.clear();
+
+        // Calls will not be reordered because of chain dependencies (see
+        // below). Since call operands are dead, calls may continue to be added
+        // to the DefList making dependence checking quadratic in the size of
+        // the block. Instead, we leave only one call at the back of the
+        // DefList.
+        if (SU->isCall) {
+          while (!DefList.empty() && DefList.back()->isCall)
+            DefList.pop_back();
+        }
+        DefList.push_back(SU);
+      } else {
+        UseList.push_back(SU);
+      }
+    }
+
+    // Add chain dependencies.
+    // Chain dependencies used to enforce memory order should have
+    // latency of 0 (except for true dependency of Store followed by
+    // aliased Load... we estimate that with a single cycle of latency
+    // assuming the hardware will bypass)
+    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+    // after stack slots are lowered to actual addresses.
+    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+    // produce more precise dependence information.
+#define STORE_LOAD_LATENCY 1
+    unsigned TrueMemOrderLatency = 0;
+    if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+        (MI->hasVolatileMemoryRef() &&
+         (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+      // Be conservative with these and add dependencies on all memory
+      // references, even those that are known to not alias.
+      for (std::map<const Value *, SUnit *>::iterator I =
+             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      }
+      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+      }
+      NonAliasMemDefs.clear();
+      NonAliasMemUses.clear();
+      // Add SU to the barrier chain.
+      if (BarrierChain)
+        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      BarrierChain = SU;
+
+      // fall-through
+    new_alias_chain:
+      // Chain all possibly aliasing memory references though SU.
+      if (AliasChain)
+        AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      AliasChain = SU;
+      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+        PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+           E = AliasMemDefs.end(); I != E; ++I) {
+        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      }
+      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+      }
+      PendingLoads.clear();
+      AliasMemDefs.clear();
+      AliasMemUses.clear();
+    } else if (MCID.mayStore()) {
+      bool MayAlias = true;
+      TrueMemOrderLatency = STORE_LOAD_LATENCY;
+      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+        // A store to a specific PseudoSourceValue. Add precise dependencies.
+        // Record the def in MemDefs, first adding a dep if there is
+        // an existing def.
+        std::map<const Value *, SUnit *>::iterator I =
+          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+        std::map<const Value *, SUnit *>::iterator IE =
+          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+        if (I != IE) {
+          I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+                                  /*isNormalMemory=*/true));
+          I->second = SU;
+        } else {
+          if (MayAlias)
+            AliasMemDefs[V] = SU;
+          else
+            NonAliasMemDefs[V] = SU;
+        }
+        // Handle the uses in MemUses, if there are any.
+        std::map<const Value *, std::vector<SUnit *> >::iterator J =
+          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+        if (J != JE) {
+          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+            J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
+                                       /*Reg=*/0, /*isNormalMemory=*/true));
+          J->second.clear();
+        }
+        if (MayAlias) {
+          // Add dependencies from all the PendingLoads, i.e. loads
+          // with no underlying object.
+          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+            PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+          // Add dependence on alias chain, if needed.
+          if (AliasChain)
+            AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        }
+        // Add dependence on barrier chain, if needed.
+        if (BarrierChain)
+          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      } else {
+        // Treat all other stores conservatively.
+        goto new_alias_chain;
+      }
+
+      if (!ExitSU.isPred(SU))
+        // Push store's up a bit to avoid them getting in between cmp
+        // and branches.
+        ExitSU.addPred(SDep(SU, SDep::Order, 0,
+                            /*Reg=*/0, /*isNormalMemory=*/false,
+                            /*isMustAlias=*/false,
+                            /*isArtificial=*/true));
+    } else if (MCID.mayLoad()) {
+      bool MayAlias = true;
+      TrueMemOrderLatency = 0;
+      if (MI->isInvariantLoad(AA)) {
+        // Invariant load, no chain dependencies needed!
+      } else {
+        if (const Value *V =
+            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+          // A load from a specific PseudoSourceValue. Add precise dependencies.
+          std::map<const Value *, SUnit *>::iterator I =
+            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          std::map<const Value *, SUnit *>::iterator IE =
+            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          if (I != IE)
+            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+                                    /*isNormalMemory=*/true));
+          if (MayAlias)
+            AliasMemUses[V].push_back(SU);
+          else
+            NonAliasMemUses[V].push_back(SU);
+        } else {
+          // A load with no underlying object. Depend on all
+          // potentially aliasing stores.
+          for (std::map<const Value *, SUnit *>::iterator I =
+                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+
+          PendingLoads.push_back(SU);
+          MayAlias = true;
+        }
+
+        // Add dependencies on alias and barrier chains, if needed.
+        if (MayAlias && AliasChain)
+          AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        if (BarrierChain)
+          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      }
+    }
+  }
+  if (PrevMI)
+    FirstDbgValue = PrevMI;
+
+  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+    Defs[i].clear();
+    Uses[i].clear();
+  }
+  PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::FinishBlock() {
+  // Nothing to do.
+}
+
+void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+  // Compute the latency for the node.
+  if (!InstrItins || InstrItins->isEmpty()) {
+    SU->Latency = 1;
+
+    // Simplistic target-independent heuristic: assume that loads take
+    // extra time.
+    if (SU->getInstr()->getDesc().mayLoad())
+      SU->Latency += 2;
+  } else {
+    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+  }
+}
+
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                              SDep& dep) const {
+  if (!InstrItins || InstrItins->isEmpty())
+    return;
+
+  // For a data dependency with a known register...
+  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+    return;
+
+  const unsigned Reg = dep.getReg();
+
+  // ... find the definition of the register in the defining
+  // instruction
+  MachineInstr *DefMI = Def->getInstr();
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    const MachineOperand &MO = DefMI->getOperand(DefIdx);
+    if (MO.isReg() && MO.isImplicit() &&
+        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+      // This is an implicit def, getOperandLatency() won't return the correct
+      // latency. e.g.
+      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+      // What we want is to compute latency between def of %D6/%D7 and use of
+      // %Q3 instead.
+      DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+    }
+    MachineInstr *UseMI = Use->getInstr();
+    // For all uses of the register, calculate the maxmimum latency
+    int Latency = -1;
+    if (UseMI) {
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+                                              UseMI, i);
+        Latency = std::max(Latency, UseCycle);
+      }
+    } else {
+      // UseMI is null, then it must be a scheduling barrier.
+      if (!InstrItins || InstrItins->isEmpty())
+        return;
+      unsigned DefClass = DefMI->getDesc().getSchedClass();
+      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
+    }
+
+    // If we found a latency, then replace the existing dependence latency.
+    if (Latency >= 0)
+      dep.setLatency(Latency);
+  }
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+  SU->getInstr()->dump();
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+  std::string s;
+  raw_string_ostream oss(s);
+  if (SU == &EntrySU)
+    oss << "<entry>";
+  else if (SU == &ExitSU)
+    oss << "<exit>";
+  else
+    SU->getInstr()->print(oss);
+  return oss.str();
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+  // For MachineInstr-based scheduling, we're rescheduling the instructions in
+  // the block, so start by removing them from the block.
+  while (Begin != InsertPos) {
+    MachineBasicBlock::iterator I = Begin;
+    ++Begin;
+    BB->remove(I);
+  }
+
+  // If first instruction was a DBG_VALUE then put it back.
+  if (FirstDbgValue)
+    BB->insert(InsertPos, FirstDbgValue);
+
+  // Then re-insert them according to the given schedule.
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      BB->insert(InsertPos, SU->getInstr());
+    else
+      // Null SUnit* is a noop.
+      EmitNoop();
+  }
+
+  // Update the Begin iterator, as the first instruction in the block
+  // may have been scheduled later.
+  if (!Sequence.empty())
+    Begin = Sequence[0]->getInstr();
+
+  // Reinsert any remaining debug_values.
+  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+    MachineInstr *DbgValue = P.first;
+    MachineInstr *OrigPrivMI = P.second;
+    BB->insertAfter(OrigPrivMI, DbgValue);
+  }
+  DbgValues.clear();
+  FirstDbgValue = NULL;
+  return BB;
+}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h
new file mode 100644
index 000000000000..8a4ea855235c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h
@@ -0,0 +1,211 @@
+//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGInstrs class, which implements
+// scheduling for a MachineInstr-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGINSTRS_H
+#define SCHEDULEDAGINSTRS_H
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+  class MachineLoopInfo;
+  class MachineDominatorTree;
+
+  /// LoopDependencies - This class analyzes loop-oriented register
+  /// dependencies, which are used to guide scheduling decisions.
+  /// For example, loop induction variable increments should be
+  /// scheduled as soon as possible after the variable's last use.
+  ///
+  class LLVM_LIBRARY_VISIBILITY LoopDependencies {
+    const MachineLoopInfo &MLI;
+    const MachineDominatorTree &MDT;
+
+  public:
+    typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+      LoopDeps;
+    LoopDeps Deps;
+
+    LoopDependencies(const MachineLoopInfo &mli,
+                     const MachineDominatorTree &mdt) :
+      MLI(mli), MDT(mdt) {}
+
+    /// VisitLoop - Clear out any previous state and analyze the given loop.
+    ///
+    void VisitLoop(const MachineLoop *Loop) {
+      Deps.clear();
+      MachineBasicBlock *Header = Loop->getHeader();
+      SmallSet<unsigned, 8> LoopLiveIns;
+      for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+           LE = Header->livein_end(); LI != LE; ++LI)
+        LoopLiveIns.insert(*LI);
+
+      const MachineDomTreeNode *Node = MDT.getNode(Header);
+      const MachineBasicBlock *MBB = Node->getBlock();
+      assert(Loop->contains(MBB) &&
+             "Loop does not contain header!");
+      VisitRegion(Node, MBB, Loop, LoopLiveIns);
+    }
+
+  private:
+    void VisitRegion(const MachineDomTreeNode *Node,
+                     const MachineBasicBlock *MBB,
+                     const MachineLoop *Loop,
+                     const SmallSet<unsigned, 8> &LoopLiveIns) {
+      unsigned Count = 0;
+      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I) {
+        const MachineInstr *MI = I;
+        if (MI->isDebugValue())
+          continue;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isUse())
+            continue;
+          unsigned MOReg = MO.getReg();
+          if (LoopLiveIns.count(MOReg))
+            Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+        }
+        ++Count; // Not every iteration due to dbg_value above.
+      }
+
+      const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+      for (std::vector<MachineDomTreeNode*>::const_iterator I =
+           Children.begin(), E = Children.end(); I != E; ++I) {
+        const MachineDomTreeNode *ChildNode = *I;
+        MachineBasicBlock *ChildBlock = ChildNode->getBlock();
+        if (Loop->contains(ChildBlock))
+          VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
+      }
+    }
+  };
+
+  /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
+  /// MachineInstrs.
+  class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
+    const MachineLoopInfo &MLI;
+    const MachineDominatorTree &MDT;
+    const MachineFrameInfo *MFI;
+    const InstrItineraryData *InstrItins;
+
+    /// Defs, Uses - Remember where defs and uses of each physical register
+    /// are as we iterate upward through the instructions. This is allocated
+    /// here instead of inside BuildSchedGraph to avoid the need for it to be
+    /// initialized and destructed for each block.
+    std::vector<std::vector<SUnit *> > Defs;
+    std::vector<std::vector<SUnit *> > Uses;
+ 
+    /// PendingLoads - Remember where unknown loads are after the most recent
+    /// unknown store, as we iterate. As with Defs and Uses, this is here
+    /// to minimize construction/destruction.
+    std::vector<SUnit *> PendingLoads;
+
+    /// LoopRegs - Track which registers are used for loop-carried dependencies.
+    ///
+    LoopDependencies LoopRegs;
+
+    /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
+    /// back-edge-aware scheduling.
+    ///
+    SmallSet<unsigned, 8> LoopLiveInRegs;
+
+  protected:
+
+    /// DbgValues - Remember instruction that preceeds DBG_VALUE.
+    typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > 
+      DbgValueVector;
+    DbgValueVector DbgValues;
+    MachineInstr *FirstDbgValue;
+
+  public:
+    MachineBasicBlock::iterator Begin;    // The beginning of the range to
+                                          // be scheduled. The range extends
+                                          // to InsertPos.
+    unsigned InsertPosIndex;              // The index in BB of InsertPos.
+
+    explicit ScheduleDAGInstrs(MachineFunction &mf,
+                               const MachineLoopInfo &mli,
+                               const MachineDominatorTree &mdt);
+
+    virtual ~ScheduleDAGInstrs() {}
+
+    /// NewSUnit - Creates a new SUnit and return a ptr to it.
+    ///
+    SUnit *NewSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+      const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+      SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+      assert((Addr == 0 || Addr == &SUnits[0]) &&
+             "SUnits std::vector reallocated on the fly!");
+      SUnits.back().OrigNode = &SUnits.back();
+      return &SUnits.back();
+    }
+
+    /// Run - perform scheduling.
+    ///
+    void Run(MachineBasicBlock *bb,
+             MachineBasicBlock::iterator begin,
+             MachineBasicBlock::iterator end,
+             unsigned endindex);
+
+    /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+    /// input.
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+    /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+    /// list of instructions being scheduled to scheduling barrier. We want to
+    /// make sure instructions which define registers that are either used by
+    /// the terminator or are live-out are properly scheduled. This is
+    /// especially important when the definition latency of the return value(s)
+    /// are too high to be hidden by the branch or when the liveout registers
+    /// used by instructions in the fallthrough block.
+    void AddSchedBarrierDeps();
+
+    /// ComputeLatency - Compute node latency.
+    ///
+    virtual void ComputeLatency(SUnit *SU);
+
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
+    virtual MachineBasicBlock *EmitSchedule();
+
+    /// StartBlock - Prepare to perform scheduling in the given block.
+    ///
+    virtual void StartBlock(MachineBasicBlock *BB);
+
+    /// Schedule - Order nodes according to selected style, filling
+    /// in the Sequence member.
+    ///
+    virtual void Schedule() = 0;
+
+    /// FinishBlock - Clean up after scheduling in the given block.
+    ///
+    virtual void FinishBlock();
+
+    virtual void dumpNode(const SUnit *SU) const;
+
+    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 000000000000..4b55a2284f85
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,99 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+    static std::string getGraphName(const ScheduleDAG *G) {
+      return G->MF.getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+    
+    static bool hasNodeAddressLabel(const SUnit *Node,
+                                    const ScheduleDAG *Graph) {
+      return true;
+    }
+    
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    static std::string getEdgeAttributes(const SUnit *Node,
+                                         SUnitIterator EI,
+                                         const ScheduleDAG *Graph) {
+      if (EI.isArtificialDep())
+        return "color=cyan,style=dashed";
+      if (EI.isCtrlDep())
+        return "color=blue,style=dashed";
+      return "";
+    }
+    
+
+    std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+    static std::string getNodeAttributes(const SUnit *N,
+                                         const ScheduleDAG *Graph) {
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(ScheduleDAG *G,
+                                       GraphWriter<ScheduleDAG*> &GW) {
+      return G->addCustomGraphFeatures(GW);
+    }
+  };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+                                                       const ScheduleDAG *G) {
+  return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+  if (BB->getBasicBlock())
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + 
+              ":" + BB->getBasicBlock()->getNameStr());
+  else
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+#else
+  errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 000000000000..0e005d35189d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,243 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+                           const ScheduleDAG *SchedDAG,
+                           const char *ParentDebugType) :
+  ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+  IssueCount(0) {
+
+#ifndef NDEBUG
+  DebugType = ParentDebugType;
+#endif
+
+  // Determine the maximum depth of any itinerary. This determines the
+  // depth of the scoreboard. We always make the scoreboard at least 1
+  // cycle deep to avoid dealing with the boundary condition.
+  unsigned ScoreboardDepth = 1;
+  if (ItinData && !ItinData->isEmpty()) {
+    IssueWidth = ItinData->IssueWidth;
+
+    for (unsigned idx = 0; ; ++idx) {
+      if (ItinData->isEndMarker(idx))
+        break;
+
+      const InstrStage *IS = ItinData->beginStage(idx);
+      const InstrStage *E = ItinData->endStage(idx);
+      unsigned CurCycle = 0;
+      unsigned ItinDepth = 0;
+      for (; IS != E; ++IS) {
+        unsigned StageDepth = CurCycle + IS->getCycles();
+        if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+        CurCycle += IS->getNextCycles();
+      }
+
+      // Find the next power-of-2 >= ItinDepth
+      while (ItinDepth > ScoreboardDepth) {
+        ScoreboardDepth *= 2;
+      }
+    }
+    MaxLookAhead = ScoreboardDepth;
+  }
+
+  ReservedScoreboard.reset(ScoreboardDepth);
+  RequiredScoreboard.reset(ScoreboardDepth);
+
+  DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+               << ScoreboardDepth << '\n');
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+  IssueCount = 0;
+  RequiredScoreboard.reset();
+  ReservedScoreboard.reset();
+}
+
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+  dbgs() << "Scoreboard:\n";
+
+  unsigned last = Depth - 1;
+  while ((last > 0) && ((*this)[last] == 0))
+    last--;
+
+  for (unsigned i = 0; i <= last; i++) {
+    unsigned FUs = (*this)[i];
+    dbgs() << "\t";
+    for (int j = 31; j >= 0; j--)
+      dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+    dbgs() << '\n';
+  }
+}
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+  if (IssueWidth == 0)
+    return false;
+
+  return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  if (!ItinData || ItinData->isEmpty())
+    return NoHazard;
+
+  // Note that stalls will be negative for bottom-up scheduling.
+  int cycle = Stalls;
+
+  // Use the itinerary for the underlying instruction to check for
+  // free FU's in the scoreboard at the appropriate future cycles.
+
+  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+  if (MCID == NULL) {
+    // Don't check hazards for non-machineinstr Nodes.
+    return NoHazard;
+  }
+  unsigned idx = MCID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
+    // We must find one of the stage's units free for every cycle the
+    // stage is occupied. FIXME it would be more accurate to find the
+    // same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      int StageCycle = cycle + (int)i;
+      if (StageCycle < 0)
+        continue;
+
+      if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+        assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+               "Scoreboard depth exceeded!");
+        // This stage was stalled beyond pipeline depth, so cannot conflict.
+        break;
+      }
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[StageCycle];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[StageCycle];
+        break;
+      }
+
+      if (!freeUnits) {
+        DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+        DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+        DEBUG(DAG->dumpNode(SU));
+        return Hazard;
+      }
+    }
+
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+  if (!ItinData || ItinData->isEmpty())
+    return;
+
+  // Use the itinerary for the underlying instruction to reserve FU's
+  // in the scoreboard at the appropriate future cycles.
+  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+  assert(MCID && "The scheduler must filter non-machineinstrs");
+  if (DAG->TII->isZeroCost(MCID->Opcode))
+    return;
+
+  ++IssueCount;
+
+  unsigned cycle = 0;
+
+  unsigned idx = MCID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
+    // We must reserve one of the stage's units for every cycle the
+    // stage is occupied. FIXME it would be more accurate to reserve
+    // the same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+             "Scoreboard depth exceeded!");
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
+      // reduce to a single unit
+      unsigned freeUnit = 0;
+      do {
+        freeUnit = freeUnits;
+        freeUnits = freeUnit & (freeUnit - 1);
+      } while (freeUnits);
+
+      assert(freeUnit && "No function unit available!");
+      if (IS->getReservationKind() == InstrStage::Required)
+        RequiredScoreboard[cycle + i] |= freeUnit;
+      else
+        ReservedScoreboard[cycle + i] |= freeUnit;
+    }
+
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  DEBUG(ReservedScoreboard.dump());
+  DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+  IssueCount = 0;
+  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+  IssueCount = 0;
+  ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+  ReservedScoreboard.recede();
+  RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+  RequiredScoreboard.recede();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 000000000000..4f0d2caca22b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,7976 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined   , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
+
+namespace {
+  static cl::opt<bool>
+    CombinerAA("combiner-alias-analysis", cl::Hidden,
+               cl::desc("Turn on alias analysis during testing"));
+
+  static cl::opt<bool>
+    CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+               cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+  class DAGCombiner {
+    SelectionDAG &DAG;
+    const TargetLowering &TLI;
+    CombineLevel Level;
+    CodeGenOpt::Level OptLevel;
+    bool LegalOperations;
+    bool LegalTypes;
+
+    // Worklist of all of the nodes that need to be simplified.
+    std::vector<SDNode*> WorkList;
+
+    // AA - Used for DAG load/store alias analysis.
+    AliasAnalysis &AA;
+
+    /// AddUsersToWorkList - When an instruction is simplified, add all users of
+    /// the instruction to the work lists because they might get more simplified
+    /// now.
+    ///
+    void AddUsersToWorkList(SDNode *N) {
+      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+           UI != UE; ++UI)
+        AddToWorkList(*UI);
+    }
+
+    /// visit - call the node-specific routine that knows how to fold each
+    /// particular type of node.
+    SDValue visit(SDNode *N);
+
+  public:
+    /// AddToWorkList - Add to the work list making sure it's instance is at the
+    /// the back (next to be processed.)
+    void AddToWorkList(SDNode *N) {
+      removeFromWorkList(N);
+      WorkList.push_back(N);
+    }
+
+    /// removeFromWorkList - remove all instances of N from the worklist.
+    ///
+    void removeFromWorkList(SDNode *N) {
+      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+                     WorkList.end());
+    }
+
+    SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+                      bool AddTo = true);
+
+    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+      return CombineTo(N, &Res, 1, AddTo);
+    }
+
+    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+                      bool AddTo = true) {
+      SDValue To[] = { Res0, Res1 };
+      return CombineTo(N, To, 2, AddTo);
+    }
+
+    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+  private:
+
+    /// SimplifyDemandedBits - Check the specified integer node value to see if
+    /// it can be simplified or if things it uses can be simplified by bit
+    /// propagation.  If so, return true.
+    bool SimplifyDemandedBits(SDValue Op) {
+      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      return SimplifyDemandedBits(Op, Demanded);
+    }
+
+    bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+    bool CombineToPreIndexedLoadStore(SDNode *N);
+    bool CombineToPostIndexedLoadStore(SDNode *N);
+
+    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue PromoteIntBinOp(SDValue Op);
+    SDValue PromoteIntShiftOp(SDValue Op);
+    SDValue PromoteExtend(SDValue Op);
+    bool PromoteLoad(SDValue Op);
+
+    void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                         SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                         ISD::NodeType ExtType);
+
+    /// combine - call the node-specific routine that knows how to fold each
+    /// particular type of node. If that doesn't do anything, try the
+    /// target-specific DAG combines.
+    SDValue combine(SDNode *N);
+
+    // Visitation implementation - Implement dag node combining for different
+    // node types.  The semantics are as follows:
+    // Return Value:
+    //   SDValue.getNode() == 0 - No change was made
+    //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
+    //   otherwise              - N should be replaced by the returned Operand.
+    //
+    SDValue visitTokenFactor(SDNode *N);
+    SDValue visitMERGE_VALUES(SDNode *N);
+    SDValue visitADD(SDNode *N);
+    SDValue visitSUB(SDNode *N);
+    SDValue visitADDC(SDNode *N);
+    SDValue visitADDE(SDNode *N);
+    SDValue visitMUL(SDNode *N);
+    SDValue visitSDIV(SDNode *N);
+    SDValue visitUDIV(SDNode *N);
+    SDValue visitSREM(SDNode *N);
+    SDValue visitUREM(SDNode *N);
+    SDValue visitMULHU(SDNode *N);
+    SDValue visitMULHS(SDNode *N);
+    SDValue visitSMUL_LOHI(SDNode *N);
+    SDValue visitUMUL_LOHI(SDNode *N);
+    SDValue visitSMULO(SDNode *N);
+    SDValue visitUMULO(SDNode *N);
+    SDValue visitSDIVREM(SDNode *N);
+    SDValue visitUDIVREM(SDNode *N);
+    SDValue visitAND(SDNode *N);
+    SDValue visitOR(SDNode *N);
+    SDValue visitXOR(SDNode *N);
+    SDValue SimplifyVBinOp(SDNode *N);
+    SDValue visitSHL(SDNode *N);
+    SDValue visitSRA(SDNode *N);
+    SDValue visitSRL(SDNode *N);
+    SDValue visitCTLZ(SDNode *N);
+    SDValue visitCTTZ(SDNode *N);
+    SDValue visitCTPOP(SDNode *N);
+    SDValue visitSELECT(SDNode *N);
+    SDValue visitSELECT_CC(SDNode *N);
+    SDValue visitSETCC(SDNode *N);
+    SDValue visitSIGN_EXTEND(SDNode *N);
+    SDValue visitZERO_EXTEND(SDNode *N);
+    SDValue visitANY_EXTEND(SDNode *N);
+    SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+    SDValue visitTRUNCATE(SDNode *N);
+    SDValue visitBITCAST(SDNode *N);
+    SDValue visitBUILD_PAIR(SDNode *N);
+    SDValue visitFADD(SDNode *N);
+    SDValue visitFSUB(SDNode *N);
+    SDValue visitFMUL(SDNode *N);
+    SDValue visitFDIV(SDNode *N);
+    SDValue visitFREM(SDNode *N);
+    SDValue visitFCOPYSIGN(SDNode *N);
+    SDValue visitSINT_TO_FP(SDNode *N);
+    SDValue visitUINT_TO_FP(SDNode *N);
+    SDValue visitFP_TO_SINT(SDNode *N);
+    SDValue visitFP_TO_UINT(SDNode *N);
+    SDValue visitFP_ROUND(SDNode *N);
+    SDValue visitFP_ROUND_INREG(SDNode *N);
+    SDValue visitFP_EXTEND(SDNode *N);
+    SDValue visitFNEG(SDNode *N);
+    SDValue visitFABS(SDNode *N);
+    SDValue visitBRCOND(SDNode *N);
+    SDValue visitBR_CC(SDNode *N);
+    SDValue visitLOAD(SDNode *N);
+    SDValue visitSTORE(SDNode *N);
+    SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+    SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+    SDValue visitBUILD_VECTOR(SDNode *N);
+    SDValue visitCONCAT_VECTORS(SDNode *N);
+    SDValue visitVECTOR_SHUFFLE(SDNode *N);
+    SDValue visitMEMBARRIER(SDNode *N);
+
+    SDValue XformToShuffleWithZero(SDNode *N);
+    SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+    SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+    bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+    SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+    SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+    SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+                             SDValue N3, ISD::CondCode CC,
+                             bool NotExtCompare = false);
+    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+                          DebugLoc DL, bool foldBooleans = true);
+    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+                                         unsigned HiOp);
+    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+    SDValue BuildSDIV(SDNode *N);
+    SDValue BuildUDIV(SDNode *N);
+    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                               bool DemandHighBits = true);
+    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+    SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+    SDValue ReduceLoadWidth(SDNode *N);
+    SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue TransformFPLoadStorePair(SDNode *N);
+
+    SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+    /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for aliasing nodes and adding them to the Aliases vector.
+    void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+                          SmallVector<SDValue, 8> &Aliases);
+
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(SDValue Ptr1, int64_t Size1,
+                 const Value *SrcValue1, int SrcValueOffset1,
+                 unsigned SrcValueAlign1,
+                 const MDNode *TBAAInfo1,
+                 SDValue Ptr2, int64_t Size2,
+                 const Value *SrcValue2, int SrcValueOffset2,
+                 unsigned SrcValueAlign2,
+                 const MDNode *TBAAInfo2) const;
+
+    /// FindAliasInfo - Extracts the relevant alias information from the memory
+    /// node.  Returns true if the operand was a load.
+    bool FindAliasInfo(SDNode *N,
+                       SDValue &Ptr, int64_t &Size,
+                       const Value *&SrcValue, int &SrcValueOffset,
+                       unsigned &SrcValueAlignment,
+                       const MDNode *&TBAAInfo) const;
+
+    /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for a better chain (aliasing node.)
+    SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+  public:
+    DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+        OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
+
+    /// Run - runs the dag combiner on all nodes in the work list
+    void Run(CombineLevel AtLevel);
+
+    SelectionDAG &getDAG() const { return DAG; }
+
+    /// getShiftAmountTy - Returns a type large enough to hold any valid
+    /// shift amount - before type legalization these can be huge.
+    EVT getShiftAmountTy(EVT LHSTy) {
+      return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
+    }
+
+    /// isTypeLegal - This method returns true if we are running before type
+    /// legalization or if the specified VT is legal.
+    bool isTypeLegal(const EVT &VT) {
+      if (!LegalTypes) return true;
+      return TLI.isTypeLegal(VT);
+    }
+  };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+  DAGCombiner &DC;
+public:
+  explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    DC.removeFromWorkList(N);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Ignore updates.
+  }
+};
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+  ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+  ((DAGCombiner*)DC)->removeFromWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+                               unsigned Depth = 0) {
+  // No compile time optimizations on this type.
+  if (Op.getValueType() == MVT::ppcf128)
+    return 0;
+
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return 2;
+
+  // Don't allow anything with multiple uses.
+  if (!Op.hasOneUse()) return 0;
+
+  // Don't recurse exponentially.
+  if (Depth > 6) return 0;
+
+  switch (Op.getOpcode()) {
+  default: return false;
+  case ISD::ConstantFP:
+    // Don't invert constant FP values after legalize.  The negated constant
+    // isn't necessarily legal.
+    return LegalOperations ? 0 : 1;
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    if (!UnsafeFPMath) return 0;
+
+    // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return V;
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros.
+    if (!UnsafeFPMath) return 0;
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return 1;
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    if (HonorSignDependentRoundingFPMath()) return 0;
+
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return V;
+
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+  }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+                                    bool LegalOperations, unsigned Depth = 0) {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+  // Don't allow anything with multiple uses.
+  assert(Op.hasOneUse() && "Unknown reuse!");
+
+  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown code");
+  case ISD::ConstantFP: {
+    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+    V.changeSign();
+    return DAG.getConstantFP(V, Op.getValueType());
+  }
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    assert(UnsafeFPMath);
+
+    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(1), DAG,
+                                            LegalOperations, Depth+1),
+                       Op.getOperand(0));
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros.
+    assert(UnsafeFPMath);
+
+    // fold (fneg (fsub 0, B)) -> B
+    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+      if (N0CFP->getValueAPF().isZero())
+        return Op.getOperand(1);
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(0));
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    assert(!HonorSignDependentRoundingFPMath());
+
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+
+    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(0),
+                       GetNegatedExpression(Op.getOperand(1), DAG,
+                                            LegalOperations, Depth+1));
+
+  case ISD::FP_EXTEND:
+  case ISD::FSIN:
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(0), DAG,
+                                            LegalOperations, Depth+1));
+  case ISD::FP_ROUND:
+      return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+  }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking.  This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+                              SDValue &CC) {
+  if (N.getOpcode() == ISD::SETCC) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(2);
+    return true;
+  }
+  if (N.getOpcode() == ISD::SELECT_CC &&
+      N.getOperand(2).getOpcode() == ISD::Constant &&
+      N.getOperand(3).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(4);
+    return true;
+  }
+  return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use.  If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+  SDValue N0, N1, N2;
+  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+    return true;
+  return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+                                    SDValue N0, SDValue N1) {
+  EVT VT = N0.getValueType();
+  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+    if (isa<ConstantSDNode>(N1)) {
+      // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+      SDValue OpNode =
+        DAG.FoldConstantArithmetic(Opc, VT,
+                                   cast<ConstantSDNode>(N0.getOperand(1)),
+                                   cast<ConstantSDNode>(N1));
+      return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+    }
+    if (N0.hasOneUse()) {
+      // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+                                   N0.getOperand(0), N1);
+      AddToWorkList(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+    }
+  }
+
+  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+    if (isa<ConstantSDNode>(N0)) {
+      // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+      SDValue OpNode =
+        DAG.FoldConstantArithmetic(Opc, VT,
+                                   cast<ConstantSDNode>(N1.getOperand(1)),
+                                   cast<ConstantSDNode>(N0));
+      return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+    }
+    if (N1.hasOneUse()) {
+      // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+                                   N1.getOperand(0), N0);
+      AddToWorkList(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+                               bool AddTo) {
+  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.1 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        To[0].getNode()->dump(&DAG);
+        dbgs() << " and " << NumTo-1 << " other values\n";
+        for (unsigned i = 0, e = NumTo; i != e; ++i)
+          assert((!To[i].getNode() ||
+                  N->getValueType(i) == To[i].getValueType()) &&
+                 "Cannot combine value to value of different type!"));
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
+
+  if (AddTo) {
+    // Push the new nodes and any users onto the worklist
+    for (unsigned i = 0, e = NumTo; i != e; ++i) {
+      if (To[i].getNode()) {
+        AddToWorkList(To[i].getNode());
+        AddUsersToWorkList(To[i].getNode());
+      }
+    }
+  }
+
+  // Finally, if the node is now dead, remove it from the graph.  The node
+  // may not be dead if the replacement process recursively simplified to
+  // something else needing this node.
+  if (N->use_empty()) {
+    // Nodes can be reintroduced into the worklist.  Make sure we do not
+    // process a node that has been replaced.
+    removeFromWorkList(N);
+
+    // Finally, since the node is now dead, remove it from the graph.
+    DAG.DeleteNode(N);
+  }
+  return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+  // Replace all uses.  If any nodes become isomorphic to other nodes and
+  // are deleted, make sure to remove them from our worklist.
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+  // Push the new node and any (possibly new) users onto the worklist.
+  AddToWorkList(TLO.New.getNode());
+  AddUsersToWorkList(TLO.New.getNode());
+
+  // Finally, if the node is now dead, remove it from the graph.  The node
+  // may not be dead if the replacement process recursively simplified to
+  // something else needing this node.
+  if (TLO.Old.getNode()->use_empty()) {
+    removeFromWorkList(TLO.Old.getNode());
+
+    // If the operands of this node are only used by the node, they will now
+    // be dead.  Make sure to visit them first to delete dead nodes early.
+    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+      if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+        AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+    DAG.DeleteNode(TLO.Old.getNode());
+  }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation.  If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+  APInt KnownZero, KnownOne;
+  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+    return false;
+
+  // Revisit the node.
+  AddToWorkList(Op.getNode());
+
+  // Replace the old value with the new one.
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.2 ";
+        TLO.Old.getNode()->dump(&DAG);
+        dbgs() << "\nWith: ";
+        TLO.New.getNode()->dump(&DAG);
+        dbgs() << '\n');
+
+  CommitTargetLoweringOpt(TLO);
+  return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+  DebugLoc dl = Load->getDebugLoc();
+  EVT VT = Load->getValueType(0);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+  DEBUG(dbgs() << "\nReplacing.9 ";
+        Load->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Trunc.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
+                                &DeadNodes);
+  removeFromWorkList(Load);
+  DAG.DeleteNode(Load);
+  AddToWorkList(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+  Replace = false;
+  DebugLoc dl = Op.getDebugLoc();
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+                                                  : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    Replace = true;
+    return DAG.getExtLoad(ExtType, dl, PVT,
+                          LD->getChain(), LD->getBasePtr(),
+                          LD->getPointerInfo(),
+                          MemVT, LD->isVolatile(),
+                          LD->isNonTemporal(), LD->getAlignment());
+  }
+
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  default: break;
+  case ISD::AssertSext:
+    return DAG.getNode(ISD::AssertSext, dl, PVT,
+                       SExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::AssertZext:
+    return DAG.getNode(ISD::AssertZext, dl, PVT,
+                       ZExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::Constant: {
+    unsigned ExtOpc =
+      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    return DAG.getNode(ExtOpc, dl, PVT, Op);
+  }
+  }
+
+  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+    return SDValue();
+  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+    return SDValue();
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+                     DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace0 = false;
+    SDValue N0 = Op.getOperand(0);
+    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+    if (NN0.getNode() == 0)
+      return SDValue();
+
+    bool Replace1 = false;
+    SDValue N1 = Op.getOperand(1);
+    SDValue NN1;
+    if (N0 == N1)
+      NN1 = NN0;
+    else {
+      NN1 = PromoteOperand(N1, PVT, Replace1);
+      if (NN1.getNode() == 0)
+        return SDValue();
+    }
+
+    AddToWorkList(NN0.getNode());
+    if (NN1.getNode())
+      AddToWorkList(NN1.getNode());
+
+    if (Replace0)
+      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+    if (Replace1)
+      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
+  }
+  return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace = false;
+    SDValue N0 = Op.getOperand(0);
+    if (Opc == ISD::SRA)
+      N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+    else if (Opc == ISD::SRL)
+      N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+    else
+      N0 = PromoteOperand(N0, PVT, Replace);
+    if (N0.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(N0.getNode());
+    if (Replace)
+      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+    // fold (aext (aext x)) -> (aext x)
+    // fold (aext (zext x)) -> (zext x)
+    // fold (aext (sext x)) -> (sext x)
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+  }
+  return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+  if (!LegalOperations)
+    return false;
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return false;
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return false;
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    DebugLoc dl = Op.getDebugLoc();
+    SDNode *N = Op.getNode();
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+                                                  : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+                                   LD->getChain(), LD->getBasePtr(),
+                                   LD->getPointerInfo(),
+                                   MemVT, LD->isVolatile(),
+                                   LD->isNonTemporal(), LD->getAlignment());
+    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+    DEBUG(dbgs() << "\nPromoting ";
+          N->dump(&DAG);
+          dbgs() << "\nTo: ";
+          Result.getNode()->dump(&DAG);
+          dbgs() << '\n');
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+    removeFromWorkList(N);
+    DAG.DeleteNode(N);
+    AddToWorkList(Result.getNode());
+    return true;
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+  // set the instance variables, so that the various visit routines may use it.
+  Level = AtLevel;
+  LegalOperations = Level >= NoIllegalOperations;
+  LegalTypes = Level >= NoIllegalTypes;
+
+  // Add all the dag nodes to the worklist.
+  WorkList.reserve(DAG.allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I)
+    WorkList.push_back(I);
+
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+
+  // The root of the dag may dangle to deleted nodes until the dag combiner is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDValue());
+
+  // while the worklist isn't empty, inspect the node on the end of it and
+  // try and combine it.
+  while (!WorkList.empty()) {
+    SDNode *N = WorkList.back();
+    WorkList.pop_back();
+
+    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
+    // N is deleted from the DAG, since they too may now be dead or may have a
+    // reduced number of uses, allowing other xforms.
+    if (N->use_empty() && N != &Dummy) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        AddToWorkList(N->getOperand(i).getNode());
+
+      DAG.DeleteNode(N);
+      continue;
+    }
+
+    SDValue RV = combine(N);
+
+    if (RV.getNode() == 0)
+      continue;
+
+    ++NodesCombined;
+
+    // If we get back the same node we passed in, rather than a new node or
+    // zero, we know that the node must have defined multiple values and
+    // CombineTo was used.  Since CombineTo takes care of the worklist
+    // mechanics for us, we have no work to do in this case.
+    if (RV.getNode() == N)
+      continue;
+
+    assert(N->getOpcode() != ISD::DELETED_NODE &&
+           RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+           "Node was deleted but visit returned new node!");
+
+    DEBUG(dbgs() << "\nReplacing.3 ";
+          N->dump(&DAG);
+          dbgs() << "\nWith: ";
+          RV.getNode()->dump(&DAG);
+          dbgs() << '\n');
+
+    // Transfer debug value.
+    DAG.TransferDbgValues(SDValue(N, 0), RV);
+    WorkListRemover DeadNodes(*this);
+    if (N->getNumValues() == RV.getNode()->getNumValues())
+      DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+    else {
+      assert(N->getValueType(0) == RV.getValueType() &&
+             N->getNumValues() == 1 && "Type mismatch");
+      SDValue OpV = RV;
+      DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+    }
+
+    // Push the new node and any users onto the worklist
+    AddToWorkList(RV.getNode());
+    AddUsersToWorkList(RV.getNode());
+
+    // Add any uses of the old node to the worklist in case this node is the
+    // last one that uses them.  They may become dead after this node is
+    // deleted.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      AddToWorkList(N->getOperand(i).getNode());
+
+    // Finally, if the node is now dead, remove it from the graph.  The node
+    // may not be dead if the replacement process recursively simplified to
+    // something else needing this node.
+    if (N->use_empty()) {
+      // Nodes can be reintroduced into the worklist.  Make sure we do not
+      // process a node that has been replaced.
+      removeFromWorkList(N);
+
+      // Finally, since the node is now dead, remove it from the graph.
+      DAG.DeleteNode(N);
+    }
+  }
+
+  // If the root changed (e.g. it was a dead load, update the root).
+  DAG.setRoot(Dummy.getValue());
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::TokenFactor:        return visitTokenFactor(N);
+  case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
+  case ISD::ADD:                return visitADD(N);
+  case ISD::SUB:                return visitSUB(N);
+  case ISD::ADDC:               return visitADDC(N);
+  case ISD::ADDE:               return visitADDE(N);
+  case ISD::MUL:                return visitMUL(N);
+  case ISD::SDIV:               return visitSDIV(N);
+  case ISD::UDIV:               return visitUDIV(N);
+  case ISD::SREM:               return visitSREM(N);
+  case ISD::UREM:               return visitUREM(N);
+  case ISD::MULHU:              return visitMULHU(N);
+  case ISD::MULHS:              return visitMULHS(N);
+  case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
+  case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
+  case ISD::SMULO:              return visitSMULO(N);
+  case ISD::UMULO:              return visitUMULO(N);
+  case ISD::SDIVREM:            return visitSDIVREM(N);
+  case ISD::UDIVREM:            return visitUDIVREM(N);
+  case ISD::AND:                return visitAND(N);
+  case ISD::OR:                 return visitOR(N);
+  case ISD::XOR:                return visitXOR(N);
+  case ISD::SHL:                return visitSHL(N);
+  case ISD::SRA:                return visitSRA(N);
+  case ISD::SRL:                return visitSRL(N);
+  case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTPOP:              return visitCTPOP(N);
+  case ISD::SELECT:             return visitSELECT(N);
+  case ISD::SELECT_CC:          return visitSELECT_CC(N);
+  case ISD::SETCC:              return visitSETCC(N);
+  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
+  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
+  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
+  case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::TRUNCATE:           return visitTRUNCATE(N);
+  case ISD::BITCAST:            return visitBITCAST(N);
+  case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
+  case ISD::FADD:               return visitFADD(N);
+  case ISD::FSUB:               return visitFSUB(N);
+  case ISD::FMUL:               return visitFMUL(N);
+  case ISD::FDIV:               return visitFDIV(N);
+  case ISD::FREM:               return visitFREM(N);
+  case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
+  case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
+  case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
+  case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
+  case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
+  case ISD::FP_ROUND:           return visitFP_ROUND(N);
+  case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
+  case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
+  case ISD::FNEG:               return visitFNEG(N);
+  case ISD::FABS:               return visitFABS(N);
+  case ISD::BRCOND:             return visitBRCOND(N);
+  case ISD::BR_CC:              return visitBR_CC(N);
+  case ISD::LOAD:               return visitLOAD(N);
+  case ISD::STORE:              return visitSTORE(N);
+  case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
+  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+  case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
+  case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
+  case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  case ISD::MEMBARRIER:         return visitMEMBARRIER(N);
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+  SDValue RV = visit(N);
+
+  // If nothing happened, try a target-specific DAG combine.
+  if (RV.getNode() == 0) {
+    assert(N->getOpcode() != ISD::DELETED_NODE &&
+           "Node was deleted but visit returned NULL!");
+
+    if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+        TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+      // Expose the DAG combiner to the target combiner impls.
+      TargetLowering::DAGCombinerInfo
+        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+
+      RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+    }
+  }
+
+  // If nothing happened still, try promoting the operation.
+  if (RV.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default: break;
+    case ISD::ADD:
+    case ISD::SUB:
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+      RV = PromoteIntBinOp(SDValue(N, 0));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+      RV = PromoteIntShiftOp(SDValue(N, 0));
+      break;
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      RV = PromoteExtend(SDValue(N, 0));
+      break;
+    case ISD::LOAD:
+      if (PromoteLoad(SDValue(N, 0)))
+        RV = SDValue(N, 0);
+      break;
+    }
+  }
+
+  // If N is a commutative binary node, try commuting it to enable more
+  // sdisel CSE.
+  if (RV.getNode() == 0 &&
+      SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+      N->getNumValues() == 1) {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+
+    // Constant operands are canonicalized to RHS.
+    if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+      SDValue Ops[] = { N1, N0 };
+      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+                                            Ops, 2);
+      if (CSENode)
+        return SDValue(CSENode, 0);
+    }
+  }
+
+  return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+  if (unsigned NumOps = N->getNumOperands()) {
+    if (N->getOperand(0).getValueType() == MVT::Other)
+      return N->getOperand(0);
+    else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+      return N->getOperand(NumOps-1);
+    for (unsigned i = 1; i < NumOps-1; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        return N->getOperand(i);
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+  // If N has two operands, where one has an input chain equal to the other,
+  // the 'other' chain is redundant.
+  if (N->getNumOperands() == 2) {
+    if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+      return N->getOperand(0);
+    if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+      return N->getOperand(1);
+  }
+
+  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
+  SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
+  SmallPtrSet<SDNode*, 16> SeenOps;
+  bool Changed = false;             // If we should replace this token factor.
+
+  // Start out with this token factor.
+  TFs.push_back(N);
+
+  // Iterate through token factors.  The TFs grows when new token factors are
+  // encountered.
+  for (unsigned i = 0; i < TFs.size(); ++i) {
+    SDNode *TF = TFs[i];
+
+    // Check each of the operands.
+    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+      SDValue Op = TF->getOperand(i);
+
+      switch (Op.getOpcode()) {
+      case ISD::EntryToken:
+        // Entry tokens don't need to be added to the list. They are
+        // rededundant.
+        Changed = true;
+        break;
+
+      case ISD::TokenFactor:
+        if (Op.hasOneUse() &&
+            std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+          // Queue up for processing.
+          TFs.push_back(Op.getNode());
+          // Clean up in case the token factor is removed.
+          AddToWorkList(Op.getNode());
+          Changed = true;
+          break;
+        }
+        // Fall thru
+
+      default:
+        // Only add if it isn't already in the list.
+        if (SeenOps.insert(Op.getNode()))
+          Ops.push_back(Op);
+        else
+          Changed = true;
+        break;
+      }
+    }
+  }
+
+  SDValue Result;
+
+  // If we've change things around then replace token factor.
+  if (Changed) {
+    if (Ops.empty()) {
+      // The entry token is the only possible outcome.
+      Result = DAG.getEntryNode();
+    } else {
+      // New and improved token factor.
+      Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                           MVT::Other, &Ops[0], Ops.size());
+    }
+
+    // Don't add users to work list.
+    return CombineTo(N, Result, false);
+  }
+
+  return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+  WorkListRemover DeadNodes(*this);
+  // Replacing results may cause a different MERGE_VALUES to suddenly
+  // be CSE'd with N, and carry its uses with it. Iterate until no
+  // uses remain, to ensure that the node can be safely deleted.
+  do {
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+                                    &DeadNodes);
+  } while (!N->use_empty());
+  removeFromWorkList(N);
+  DAG.DeleteNode(N);
+  return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+                              SelectionDAG &DAG) {
+  EVT VT = N0.getValueType();
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+  if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+      isa<ConstantSDNode>(N00.getOperand(1))) {
+    // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+    N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+                     DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+                                 N00.getOperand(0), N01),
+                     DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+                                 N00.getOperand(1), N01));
+    return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (add x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (add c1, c2) -> c1+c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+  // fold (add x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (add Sym, c) -> Sym+c
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+        GA->getOpcode() == ISD::GlobalAddress)
+      return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+                                  GA->getOffset() +
+                                    (uint64_t)N1C->getSExtValue());
+  // fold ((c1-A)+c2) -> (c1+c2)-A
+  if (N1C && N0.getOpcode() == ISD::SUB)
+    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                         DAG.getConstant(N1C->getAPIntValue()+
+                                         N0C->getAPIntValue(), VT),
+                         N0.getOperand(1));
+  // reassociate add
+  SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+  if (RADD.getNode() != 0)
+    return RADD;
+  // fold ((0-A) + B) -> B-A
+  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+  // fold (A + (0-B)) -> A-B
+  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+  // fold (A+(B-A)) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+    return N1.getOperand(0);
+  // fold ((B-A)+A) -> B
+  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+    return N0.getOperand(0);
+  // fold (A+(B-(A+C))) to (B-C)
+  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+      N0 == N1.getOperand(1).getOperand(0))
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+                       N1.getOperand(1).getOperand(1));
+  // fold (A+(B-(C+A))) to (B-C)
+  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+      N0 == N1.getOperand(1).getOperand(1))
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+                       N1.getOperand(1).getOperand(0));
+  // fold (A+((B-A)+or-C)) to (B+or-C)
+  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+      N1.getOperand(0).getOpcode() == ISD::SUB &&
+      N0 == N1.getOperand(0).getOperand(1))
+    return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+                       N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+    SDValue N00 = N0.getOperand(0);
+    SDValue N01 = N0.getOperand(1);
+    SDValue N10 = N1.getOperand(0);
+    SDValue N11 = N1.getOperand(1);
+
+    if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+                         DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+  }
+
+  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (a+b) -> (a|b) iff a and b share no bits.
+  if (VT.isInteger() && !VT.isVector()) {
+    APInt LHSZero, LHSOne;
+    APInt RHSZero, RHSOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+    if (LHSZero.getBoolValue()) {
+      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+        return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+    }
+  }
+
+  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+  if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+    if (Result.getNode()) return Result;
+  }
+  if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+    if (Result.getNode()) return Result;
+  }
+
+  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+  if (N1.getOpcode() == ISD::SHL &&
+      N1.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N1.getOperand(0).getOperand(1),
+                                       N1.getOperand(1)));
+  if (N0.getOpcode() == ISD::SHL &&
+      N0.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N0.getOperand(0).getOperand(1),
+                                       N0.getOperand(1)));
+
+  if (N1.getOpcode() == ISD::AND) {
+    SDValue AndOp0 = N1.getOperand(0);
+    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+    // and similar xforms where the inner op is either ~0 or 0.
+    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+      DebugLoc DL = N->getDebugLoc();
+      return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+    }
+  }
+
+  // add (sext i1), X -> sub X, (zext i1)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+      N0.getOperand(0).getValueType() == MVT::i1 &&
+      !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+    DebugLoc DL = N->getDebugLoc();
+    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // If the flag result is dead, turn this into an ADD.
+  if (N->hasNUsesOfValue(0, 1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE,
+                                 N->getDebugLoc(), MVT::Glue));
+
+  // canonicalize constant to RHS.
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+  // fold (addc x, 0) -> x + no carry out
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+                                        N->getDebugLoc(), MVT::Glue));
+
+  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+  APInt LHSZero, LHSOne;
+  APInt RHSZero, RHSOne;
+  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+  if (LHSZero.getBoolValue()) {
+    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+    // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+    // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+      return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+                       DAG.getNode(ISD::CARRY_FALSE,
+                                   N->getDebugLoc(), MVT::Glue));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+  // If both operands are null we know that carry out will always be false.
+  if (N0C && N0C->isNullValue() && N0 == N1)
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
+                                                             N->getDebugLoc(),
+                                                             MVT::Glue));
+
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+                       N1, N0, CarryIn);
+
+  // fold (adde x, y, false) -> (addc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+  return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+                             SelectionDAG &DAG, bool LegalOperations) {
+  if (!VT.isVector()) {
+    return DAG.getConstant(0, VT);
+  }
+  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+    // Produce a vector of zeros.
+    SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+    std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+      &Ops[0], Ops.size());
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (sub x, x) -> 0
+  // FIXME: Refactor this and xor and other similar operations together.
+  if (N0 == N1)
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+  // fold (sub c1, c2) -> c1-c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+  // fold (sub x, c) -> (add x, -c)
+  if (N1C)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(-N1C->getAPIntValue(), VT));
+  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+  if (N0C && N0C->isAllOnesValue())
+    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold A-(A-B) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+    return N1.getOperand(1);
+  // fold (A+B)-A -> B
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+    return N0.getOperand(1);
+  // fold (A+B)-B -> A
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+    return N0.getOperand(0);
+  // fold C2-(A+C1) -> (C2-C1)-A
+  if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+    SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
+		       N1.getOperand(0));
+  }
+  // fold ((A+(B+or-C))-B) -> A+or-C
+  if (N0.getOpcode() == ISD::ADD &&
+      (N0.getOperand(1).getOpcode() == ISD::SUB ||
+       N0.getOperand(1).getOpcode() == ISD::ADD) &&
+      N0.getOperand(1).getOperand(0) == N1)
+    return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(1));
+  // fold ((A+(C+B))-B) -> A+C
+  if (N0.getOpcode() == ISD::ADD &&
+      N0.getOperand(1).getOpcode() == ISD::ADD &&
+      N0.getOperand(1).getOperand(1) == N1)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+  // fold ((A-(B-C))-C) -> A-B
+  if (N0.getOpcode() == ISD::SUB &&
+      N0.getOperand(1).getOpcode() == ISD::SUB &&
+      N0.getOperand(1).getOperand(1) == N1)
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+  // If either operand of a sub is undef, the result is undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  // If the relocation model supports it, consider symbol offsets.
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+      // fold (sub Sym, c) -> Sym-c
+      if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+        return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+                                    GA->getOffset() -
+                                      (uint64_t)N1C->getSExtValue());
+      // fold (sub Sym+c1, Sym+c2) -> c1-c2
+      if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+        if (GA->getGlobal() == GB->getGlobal())
+          return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+                                 VT);
+    }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (mul x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (mul c1, c2) -> c1*c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+  // fold (mul x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mul x, -1) -> 0-x
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), N0);
+  // fold (mul x, (1 << c)) -> x << c
+  if (N1C && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy(N0.getValueType())));
+  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+  if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+    unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+    // FIXME: If the input is something that is easily negated (e.g. a
+    // single-use add), we should put the negate there.
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT),
+                       DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                            DAG.getConstant(Log2Val,
+                                      getShiftAmountTy(N0.getValueType()))));
+  }
+  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+  if (N1C && N0.getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+    SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                             N1, N0.getOperand(1));
+    AddToWorkList(C3.getNode());
+    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                       N0.getOperand(0), C3);
+  }
+
+  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+  // use.
+  {
+    SDValue Sh(0,0), Y(0,0);
+    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
+    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+        N0.getNode()->hasOneUse()) {
+      Sh = N0; Y = N1;
+    } else if (N1.getOpcode() == ISD::SHL &&
+               isa<ConstantSDNode>(N1.getOperand(1)) &&
+               N1.getNode()->hasOneUse()) {
+      Sh = N1; Y = N0;
+    }
+
+    if (Sh.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                Sh.getOperand(0), Y);
+      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                         Mul, Sh.getOperand(1));
+    }
+  }
+
+  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      isa<ConstantSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+                       DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+                                   N0.getOperand(0), N1),
+                       DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  // reassociate mul
+  SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+  if (RMUL.getNode() != 0)
+    return RMUL;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (sdiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSExtValue() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), N0);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
+  if (!VT.isVector()) {
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+                         N0, N1);
+  }
+  // fold (sdiv X, pow2) -> simple ops after legalize
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+      (isPowerOf2_64(N1C->getSExtValue()) ||
+       isPowerOf2_64(-N1C->getSExtValue()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDValue();
+
+    int64_t pow2 = N1C->getSExtValue();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    unsigned lg2 = Log2_64(abs2);
+
+    // Splat the sign bit into the register
+    SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+                              DAG.getConstant(VT.getSizeInBits()-1,
+                                       getShiftAmountTy(N0.getValueType())));
+    AddToWorkList(SGN.getNode());
+
+    // Add (N0 < 0) ? abs2 - 1 : 0;
+    SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+                              DAG.getConstant(VT.getSizeInBits() - lg2,
+                                       getShiftAmountTy(SGN.getValueType())));
+    SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+    AddToWorkList(SRL.getNode());
+    AddToWorkList(ADD.getNode());    // Divide by pow2
+    SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+                  DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+
+    AddToWorkList(SRA.getNode());
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), SRA);
+  }
+
+  // if integer divide is expensive and we satisfy the requirements, emit an
+  // alternate sequence.
+  if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
+      !TLI.isIntDivCheap()) {
+    SDValue Op = BuildSDIV(N);
+    if (Op.getNode()) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (udiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+  // fold (udiv x, (1 << c)) -> x >>u c
+  if (N1C && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy(N0.getValueType())));
+  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (SHC->getAPIntValue().isPowerOf2()) {
+        EVT ADDVT = N1.getOperand(1).getValueType();
+        SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+                                  N1.getOperand(1),
+                                  DAG.getConstant(SHC->getAPIntValue()
+                                                                  .logBase2(),
+                                                  ADDVT));
+        AddToWorkList(Add.getNode());
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+      }
+    }
+  }
+  // fold (udiv x, c) -> alternate
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+    SDValue Op = BuildUDIV(N);
+    if (Op.getNode()) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (srem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+  if (!VT.isVector()) {
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+  }
+
+  // If X/C can be simplified by the division-by-constant logic, lower
+  // X%C to the equivalent of X-X/C*C.
+  if (N1C && !N1C->isNullValue()) {
+    SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+    AddToWorkList(Div.getNode());
+    SDValue OptimizedDiv = combine(Div.getNode());
+    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                OptimizedDiv, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+      AddToWorkList(Mul.getNode());
+      return Sub;
+    }
+  }
+
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (urem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+  // fold (urem x, pow2) -> (and x, pow2-1)
+  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue()-1,VT));
+  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (SHC->getAPIntValue().isPowerOf2()) {
+        SDValue Add =
+          DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+                                 VT));
+        AddToWorkList(Add.getNode());
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+      }
+    }
+  }
+
+  // If X/C can be simplified by the division-by-constant logic, lower
+  // X%C to the equivalent of X-X/C*C.
+  if (N1C && !N1C->isNullValue()) {
+    SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+    AddToWorkList(Div.getNode());
+    SDValue OptimizedDiv = combine(Div.getNode());
+    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                OptimizedDiv, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+      AddToWorkList(Mul.getNode());
+      return Sub;
+    }
+  }
+
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // fold (mulhs x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhs x, 1) -> (sra x, size(x)-1)
+  if (N1C && N1C->getAPIntValue() == 1)
+    return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+                       DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+                                       getShiftAmountTy(N0.getValueType())));
+  // fold (mulhs x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  // If the type twice as wide is legal, transform the mulhs to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // fold (mulhu x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhu x, 1) -> 0
+  if (N1C && N1C->getAPIntValue() == 1)
+    return DAG.getConstant(0, N0.getValueType());
+  // fold (mulhu x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+                                                unsigned HiOp) {
+  // If the high half is not needed, just compute the low half.
+  bool HiExists = N->hasAnyUseOfValue(1);
+  if (!HiExists &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+    SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+                              N->op_begin(), N->getNumOperands());
+    return CombineTo(N, Res, Res);
+  }
+
+  // If the low half is not needed, just compute the high half.
+  bool LoExists = N->hasAnyUseOfValue(0);
+  if (!LoExists &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+    SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+                              N->op_begin(), N->getNumOperands());
+    return CombineTo(N, Res, Res);
+  }
+
+  // If both halves are used, return as it is.
+  if (LoExists && HiExists)
+    return SDValue();
+
+  // If the two computed results can be simplified separately, separate them.
+  if (LoExists) {
+    SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+                             N->op_begin(), N->getNumOperands());
+    AddToWorkList(Lo.getNode());
+    SDValue LoOpt = combine(Lo.getNode());
+    if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+      return CombineTo(N, LoOpt, LoOpt);
+  }
+
+  if (HiExists) {
+    SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+                             N->op_begin(), N->getNumOperands());
+    AddToWorkList(Hi.getNode());
+    SDValue HiOpt = combine(Hi.getNode());
+    if (HiOpt.getNode() && HiOpt != Hi &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+      return CombineTo(N, HiOpt, HiOpt);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+  if (Res.getNode()) return Res;
+
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+  if (Res.getNode()) return Res;
+
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+  // (smulo x, 2) -> (saddo x, x)
+  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    if (C2->getAPIntValue() == 2)
+      return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
+                         N->getOperand(0), N->getOperand(0));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+  // (umulo x, 2) -> (uaddo x, x)
+  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    if (C2->getAPIntValue() == 2)
+      return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
+                         N->getOperand(0), N->getOperand(0));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+  // Bail early if none of these transforms apply.
+  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+  // For each of OP in AND/OR/XOR:
+  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+  //
+  // do not sink logical op inside of a vector extend, since it may combine
+  // into a vsetcc.
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::SIGN_EXTEND ||
+       // Avoid infinite looping with PromoteIntBinOp.
+       (N0.getOpcode() == ISD::ANY_EXTEND &&
+        (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
+       (N0.getOpcode() == ISD::TRUNCATE &&
+        (!TLI.isZExtFree(VT, Op0VT) ||
+         !TLI.isTruncateFree(Op0VT, VT)) &&
+        TLI.isTypeLegal(Op0VT))) &&
+      !VT.isVector() &&
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+                                 N0.getOperand(0).getValueType(),
+                                 N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.getNode());
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+  }
+
+  // For each of OP in SHL/SRL/SRA/AND...
+  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
+  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+       N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+      N0.getOperand(1) == N1.getOperand(1)) {
+    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+                                 N0.getOperand(0).getValueType(),
+                                 N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.getNode());
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                       ORNode, N0.getOperand(1));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N1.getValueType();
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (and x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (and c1, c2) -> c1&c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+  // fold (and x, -1) -> x
+  if (N1C && N1C->isAllOnesValue())
+    return N0;
+  // if (and x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+                                   APInt::getAllOnesValue(BitWidth)))
+    return DAG.getConstant(0, VT);
+  // reassociate and
+  SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+  if (RAND.getNode() != 0)
+    return RAND;
+  // fold (and (or x, C), D) -> D if (C & D) == D
+  if (N1C && N0.getOpcode() == ISD::OR)
+    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+        return N1;
+  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    SDValue N0Op0 = N0.getOperand(0);
+    APInt Mask = ~N1C->getAPIntValue();
+    Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+    if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+      SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+                                 N0.getValueType(), N0Op0);
+
+      // Replace uses of the AND with uses of the Zero extend node.
+      CombineTo(N, Zext);
+
+      // We actually want to replace all uses of the any_extend with the
+      // zero_extend, to avoid duplicating things.  This will later cause this
+      // AND to be folded.
+      CombineTo(N0.getNode(), Zext);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+      if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+      // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+                                      LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+      }
+      // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (zext_inreg (extload x)) -> (zextload x)
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       LN0->getPointerInfo(), MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (and (load x), 255) -> (zextload x, i8)
+  // fold (and (extload x, i16), 255) -> (zextload x, i8)
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
+    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+      uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+
+          SDValue NewLoad =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), LN0->getBasePtr(),
+                           LN0->getPointerInfo(),
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           LN0->getAlignment());
+          AddToWorkList(N);
+          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+
+        // Do not change the width of a volatile load.
+        // Do not generate loads of non-round integer types since these can
+        // be expensive (and would be wrong if the type is not byte sized).
+        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT PtrType = LN0->getOperand(1).getValueType();
+
+          unsigned Alignment = LN0->getAlignment();
+          SDValue NewPtr = LN0->getBasePtr();
+
+          // For big endian targets, we need to add an offset to the pointer
+          // to load the correct bytes.  For little endian systems, we merely
+          // need to read fewer bytes from the same pointer.
+          if (TLI.isBigEndian()) {
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            Alignment = MinAlign(Alignment, PtrOff);
+          }
+
+          AddToWorkList(NewPtr.getNode());
+
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          SDValue Load =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), NewPtr,
+                           LN0->getPointerInfo(),
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           Alignment);
+          AddToWorkList(N);
+          CombineTo(LN0, Load, Load.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                                        bool DemandHighBits) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+  bool LookPassAnd0 = false;
+  bool LookPassAnd1 = false;
+  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+      std::swap(N0, N1);
+  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+      std::swap(N0, N1);
+  if (N0.getOpcode() == ISD::AND) {
+    if (!N0.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!N01C || N01C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N0 = N0.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  if (N1.getOpcode() == ISD::AND) {
+    if (!N1.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C || N11C->getZExtValue() != 0xFF)
+      return SDValue();
+    N1 = N1.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+  if (!N0.getNode()->hasOneUse() ||
+      !N1.getNode()->hasOneUse())
+    return SDValue();
+
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+  if (!N01C || !N11C)
+    return SDValue();
+  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+    return SDValue();
+
+  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+  SDValue N00 = N0->getOperand(0);
+  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+    if (!N00.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+    if (!N001C || N001C->getZExtValue() != 0xFF)
+      return SDValue();
+    N00 = N00.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  SDValue N10 = N1->getOperand(0);
+  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+    if (!N10.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+    if (!N101C || N101C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N10 = N10.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N00 != N10)
+    return SDValue();
+
+  // Make sure everything beyond the low halfword is zero since the SRL 16
+  // will clear the top bits.
+  unsigned OpSizeInBits = VT.getSizeInBits();
+  if (DemandHighBits && OpSizeInBits > 16 &&
+      (!LookPassAnd0 || !LookPassAnd1) &&
+      !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+    return SDValue();
+
+  SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+  if (OpSizeInBits > 16)
+    Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+                      DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+  return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+  if (!N.getNode()->hasOneUse())
+    return false;
+
+  unsigned Opc = N.getOpcode();
+  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+    return false;
+
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  if (!N1C)
+    return false;
+
+  unsigned Num;
+  switch (N1C->getZExtValue()) {
+  default:
+    return false;
+  case 0xFF:       Num = 0; break;
+  case 0xFF00:     Num = 1; break;
+  case 0xFF0000:   Num = 2; break;
+  case 0xFF000000: Num = 3; break;
+  }
+
+  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+  SDValue N0 = N.getOperand(0);
+  if (Opc == ISD::AND) {
+    if (Num == 0 || Num == 2) {
+      // (x >> 8) & 0xff
+      // (x >> 8) & 0xff0000
+      if (N0.getOpcode() != ISD::SRL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    } else {
+      // (x << 8) & 0xff00
+      // (x << 8) & 0xff000000
+      if (N0.getOpcode() != ISD::SHL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    }
+  } else if (Opc == ISD::SHL) {
+    // (x & 0xff) << 8
+    // (x & 0xff0000) << 8
+    if (Num != 0 && Num != 2)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  } else { // Opc == ISD::SRL
+    // (x & 0xff00) >> 8
+    // (x & 0xff000000) >> 8
+    if (Num != 1 && Num != 3)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  }
+
+  if (Parts[Num])
+    return false;
+
+  Parts[Num] = N0.getOperand(0).getNode();
+  return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+  // Look for either
+  // (or (or (and), (and)), (or (and), (and)))
+  // (or (or (or (and), (and)), (and)), (and))
+  if (N0.getOpcode() != ISD::OR)
+    return SDValue();
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+
+  if (N1.getOpcode() == ISD::OR) {
+    // (or (or (and), (and)), (or (and), (and)))
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+    SDValue N010 = N01.getOperand(0);
+    if (!isBSwapHWordElement(N010, Parts))
+      return SDValue();
+    SDValue N011 = N01.getOperand(1);
+    if (!isBSwapHWordElement(N011, Parts))
+      return SDValue();
+  } else {
+    // (or (or (or (and), (and)), (and)), (and))
+    if (!isBSwapHWordElement(N1, Parts))
+      return SDValue();
+    if (!isBSwapHWordElement(N01, Parts))
+      return SDValue();
+    if (N00.getOpcode() != ISD::OR)
+      return SDValue();
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+  }
+
+  // Make sure the parts are all coming from the same node.
+  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+    return SDValue();
+
+  SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+                              SDValue(Parts[0],0));
+
+  // Result of the bswap should be rotated by 16. If it's not legal, than
+  // do  (x << 16) | (x >> 16).
+  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+    return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+  else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+    return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+  return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+                     DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+                     DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N1.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (or x, undef) -> -1
+  if (!LegalOperations &&
+      (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  }
+  // fold (or c1, c2) -> c1|c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+  // fold (or x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (or x, -1) -> -1
+  if (N1C && N1C->isAllOnesValue())
+    return N1;
+  // fold (or x, c) -> c iff (x & ~c) == 0
+  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+    return N1;
+
+  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+  BSwap = MatchBSwapHWordLow(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+
+  // reassociate or
+  SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+  if (ROR.getNode() != 0)
+    return ROR;
+  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  // iff (c1 & c2) == 0.
+  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+             isa<ConstantSDNode>(N0.getOperand(1))) {
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                                     N0.getOperand(0), N1),
+                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+  }
+  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+      if (cast<ConstantSDNode>(LR)->isNullValue() &&
+          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+        SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+                                      LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      N1.getOperand(1).getOpcode() == ISD::Constant &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    // We can only do this xform if we know that bits from X that are set in C2
+    // but not in C1 are already zero.  Likewise for Y.
+    const APInt &LHSMask =
+      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    const APInt &RHSMask =
+      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+      SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                              N0.getOperand(0), N1.getOperand(0));
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+                         DAG.getConstant(LHSMask | RHSMask, VT));
+    }
+  }
+
+  // See if this is some rotate idiom.
+  if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+    return SDValue(Rot, 0);
+
+  // Simplify the operands using demanded-bits information.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+  if (Op.getOpcode() == ISD::AND) {
+    if (isa<ConstantSDNode>(Op.getOperand(1))) {
+      Mask = Op.getOperand(1);
+      Op = Op.getOperand(0);
+    } else {
+      return false;
+    }
+  }
+
+  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+    Shift = Op;
+    return true;
+  }
+
+  return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
+  EVT VT = LHS.getValueType();
+  if (!TLI.isTypeLegal(VT)) return 0;
+
+  // The target must have at least one rotate flavor.
+  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+  if (!HasROTL && !HasROTR) return 0;
+
+  // Match "(X shl/srl V1) & V2" where V2 may not be present.
+  SDValue LHSShift;   // The shift.
+  SDValue LHSMask;    // AND value if any.
+  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+    return 0; // Not part of a rotate.
+
+  SDValue RHSShift;   // The shift.
+  SDValue RHSMask;    // AND value if any.
+  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+    return 0; // Not part of a rotate.
+
+  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+    return 0;   // Not shifting the same value.
+
+  if (LHSShift.getOpcode() == RHSShift.getOpcode())
+    return 0;   // Shifts must disagree.
+
+  // Canonicalize shl to left side in a shl/srl pair.
+  if (RHSShift.getOpcode() == ISD::SHL) {
+    std::swap(LHS, RHS);
+    std::swap(LHSShift, RHSShift);
+    std::swap(LHSMask , RHSMask );
+  }
+
+  unsigned OpSizeInBits = VT.getSizeInBits();
+  SDValue LHSShiftArg = LHSShift.getOperand(0);
+  SDValue LHSShiftAmt = LHSShift.getOperand(1);
+  SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+      RHSShiftAmt.getOpcode() == ISD::Constant) {
+    uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+    uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+    if ((LShVal + RShVal) != OpSizeInBits)
+      return 0;
+
+    SDValue Rot;
+    if (HasROTL)
+      Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
+    else
+      Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+
+    // If there is an AND of either shifted operand, apply it to the result.
+    if (LHSMask.getNode() || RHSMask.getNode()) {
+      APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+      if (LHSMask.getNode()) {
+        APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+        Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+      }
+      if (RHSMask.getNode()) {
+        APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+        Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+      }
+
+      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+    }
+
+    return Rot.getNode();
+  }
+
+  // If there is a mask here, and we have a variable shift, we can't be sure
+  // that we're masking out the right stuff.
+  if (LHSMask.getNode() || RHSMask.getNode())
+    return 0;
+
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+  if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+      LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC =
+          dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+      if (SUBC->getAPIntValue() == OpSizeInBits) {
+        if (HasROTL)
+          return DAG.getNode(ISD::ROTL, DL, VT,
+                             LHSShiftArg, LHSShiftAmt).getNode();
+        else
+          return DAG.getNode(ISD::ROTR, DL, VT,
+                             LHSShiftArg, RHSShiftAmt).getNode();
+      }
+    }
+  }
+
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+  if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+      RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC =
+          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+      if (SUBC->getAPIntValue() == OpSizeInBits) {
+        if (HasROTR)
+          return DAG.getNode(ISD::ROTR, DL, VT,
+                             LHSShiftArg, RHSShiftAmt).getNode();
+        else
+          return DAG.getNode(ISD::ROTL, DL, VT,
+                             LHSShiftArg, LHSShiftAmt).getNode();
+      }
+    }
+  }
+
+  // Look for sign/zext/any-extended or truncate cases:
+  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+    SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+    SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+    if (RExtOp0.getOpcode() == ISD::SUB &&
+        RExtOp0.getOperand(1) == LExtOp0) {
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotl x, y)
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotr x, (sub 32, y))
+      if (ConstantSDNode *SUBC =
+            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+        if (SUBC->getAPIntValue() == OpSizeInBits) {
+          return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+                             LHSShiftArg,
+                             HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+        }
+      }
+    } else if (LExtOp0.getOpcode() == ISD::SUB &&
+               RExtOp0 == LExtOp0.getOperand(1)) {
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+      //   (rotr x, y)
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+      //   (rotl x, (sub 32, y))
+      if (ConstantSDNode *SUBC =
+            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+        if (SUBC->getAPIntValue() == OpSizeInBits) {
+          return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+                             LHSShiftArg,
+                             HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LHS, RHS, CC;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (xor x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (xor c1, c2) -> c1^c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold (xor x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // reassociate xor
+  SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+  if (RXOR.getNode() != 0)
+    return RXOR;
+
+  // fold !(x cc y) -> (x !cc y)
+  if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+    bool isInt = LHS.getValueType().isInteger();
+    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                                               isInt);
+
+    if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+      switch (N0.getOpcode()) {
+      default:
+        llvm_unreachable("Unhandled SetCC Equivalent!");
+      case ISD::SETCC:
+        return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+      case ISD::SELECT_CC:
+        return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+                               N0.getOperand(3), NotCC);
+      }
+    }
+  }
+
+  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+      N0.getNode()->hasOneUse() &&
+      isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+    SDValue V = N0.getOperand(0);
+    V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+                    DAG.getConstant(1, V.getValueType()));
+    AddToWorkList(V.getNode());
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+  }
+
+  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+    }
+  }
+  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+  if (N1C && N1C->isAllOnesValue() &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+    }
+  }
+  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+  if (N1C && N0.getOpcode() == ISD::XOR) {
+    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N00C)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+                         DAG.getConstant(N1C->getAPIntValue() ^
+                                         N00C->getAPIntValue(), VT));
+    if (N01C)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+                         DAG.getConstant(N1C->getAPIntValue() ^
+                                         N01C->getAPIntValue(), VT));
+  }
+  // fold (xor x, x) -> 0
+  if (N0 == N1)
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+
+  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // Simplify the expression using non-local knowledge.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+  SDNode *LHS = N->getOperand(0).getNode();
+  if (!LHS->hasOneUse()) return SDValue();
+
+  // We want to pull some binops through shifts, so that we have (and (shift))
+  // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
+  // thing happens with address calculations, so it's important to canonicalize
+  // it.
+  bool HighBitSet = false;  // Can we transform this if the high bit is set?
+
+  switch (LHS->getOpcode()) {
+  default: return SDValue();
+  case ISD::OR:
+  case ISD::XOR:
+    HighBitSet = false; // We can only transform sra if the high bit is clear.
+    break;
+  case ISD::AND:
+    HighBitSet = true;  // We can only transform sra if the high bit is set.
+    break;
+  case ISD::ADD:
+    if (N->getOpcode() != ISD::SHL)
+      return SDValue(); // only shl(add) not sr[al](add).
+    HighBitSet = false; // We can only transform sra if the high bit is clear.
+    break;
+  }
+
+  // We require the RHS of the binop to be a constant as well.
+  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+  if (!BinOpCst) return SDValue();
+
+  // FIXME: disable this unless the input to the binop is a shift by a constant.
+  // If it is not a shift, it pessimizes some common cases like:
+  //
+  //    void foo(int *X, int i) { X[i & 1235] = 1; }
+  //    int bar(int *X, int i) { return X[i & 255]; }
+  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+       BinOpLHSVal->getOpcode() != ISD::SRA &&
+       BinOpLHSVal->getOpcode() != ISD::SRL) ||
+      !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // If this is a signed shift right, and the high bit is modified by the
+  // logical operation, do not perform the transformation. The highBitSet
+  // boolean indicates the value of the high bit of the constant which would
+  // cause it to be modified for this operation.
+  if (N->getOpcode() == ISD::SRA) {
+    bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+    if (BinOpRHSSignSet != HighBitSet)
+      return SDValue();
+  }
+
+  // Fold the constants, shifting the binop RHS by the shift amount.
+  SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+                               N->getValueType(0),
+                               LHS->getOperand(1), N->getOperand(1));
+
+  // Create the new shift.
+  SDValue NewShift = DAG.getNode(N->getOpcode(),
+                                 LHS->getOperand(0).getDebugLoc(),
+                                 VT, LHS->getOperand(0), N->getOperand(1));
+
+  // Create the new binop.
+  return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (shl c1, c2) -> c1<<c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+  // fold (shl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (shl x, c >= size(x)) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (shl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (shl undef, x) -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // if (shl x, c) is known to be zero, return 0
+  if (DAG.MaskedValueIsZero(SDValue(N, 0),
+                            APInt::getAllOnesValue(OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SHL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    if (c1 + c2 >= OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+
+  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+  // For this to be valid, the second form must not preserve any of the bits
+  // that are shifted out by the inner shift in the first form.  This means
+  // the outer shift size must be >= the number of bits added by the ext.
+  // As a corollary, we don't care what kind of ext it is.
+  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+              N0.getOpcode() == ISD::ANY_EXTEND ||
+              N0.getOpcode() == ISD::SIGN_EXTEND) &&
+      N0.getOperand(0).getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 =
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    if (c2 >= OpSizeInBits - InnerShiftSize) {
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+                         DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+                                     N0.getOperand(0)->getOperand(0)),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
+  }
+
+  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+  //                               (and (srl x, (sub c1, c2), MASK)
+  if (N1C && N0.getOpcode() == ISD::SRL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    if (c1 < VT.getSizeInBits()) {
+      uint64_t c2 = N1C->getZExtValue();
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - c1);
+      SDValue Shift;
+      if (c2 > c1) {
+        Mask = Mask.shl(c2-c1);
+        Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+                            DAG.getConstant(c2-c1, N1.getValueType()));
+      } else {
+        Mask = Mask.lshr(c1-c2);
+        Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+                            DAG.getConstant(c1-c2, N1.getValueType()));
+      }
+      return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift,
+                         DAG.getConstant(Mask, VT));
+    }
+  }
+  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    SDValue HiBitsMask =
+      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                            VT.getSizeInBits() -
+                                              N1C->getZExtValue()),
+                      VT);
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       HiBitsMask);
+  }
+
+  if (N1C) {
+    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSHL.getNode())
+      return NewSHL;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (sra c1, c2) -> (sra c1, c2)
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+  // fold (sra 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (sra -1, x) -> -1
+  if (N0C && N0C->isAllOnesValue())
+    return N0;
+  // fold (sra x, (setge c, size(x))) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (sra x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+  // sext_inreg.
+  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+    if (VT.isVector())
+      ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               ExtVT, VT.getVectorNumElements());
+    if ((!LegalOperations ||
+         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                         N0.getOperand(0), DAG.getValueType(ExtVT));
+  }
+
+  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRA) {
+    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, N1C->getValueType(0)));
+    }
+  }
+
+  // fold (sra (shl X, m), (sub result_size, n))
+  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+  // result_size - n != m.
+  // If truncate is free for the target sext(shl) is likely to result in better
+  // code.
+  if (N0.getOpcode() == ISD::SHL) {
+    // Get the two constanst of the shifts, CN0 = m, CN = n.
+    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N01C && N1C) {
+      // Determine what the truncate's result bitsize and type would be.
+      EVT TruncVT =
+        EVT::getIntegerVT(*DAG.getContext(),
+                          OpSizeInBits - N1C->getZExtValue());
+      // Determine the residual right-shift amount.
+      signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+      // If the shift is not a no-op (in which case this should be just a sign
+      // extend already), the truncated to type is legal, sign_extend is legal
+      // on that type, and the truncate to that type is both legal and free,
+      // perform the transform.
+      if ((ShiftAmt > 0) &&
+          TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+          TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+          TLI.isTruncateFree(VT, TruncVT)) {
+
+          SDValue Amt = DAG.getConstant(ShiftAmt,
+              getShiftAmountTy(N0.getOperand(0).getValueType()));
+          SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+                                      N0.getOperand(0), Amt);
+          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+                                      Shift);
+          return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+                             N->getValueType(0), Trunc);
+      }
+    }
+  }
+
+  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(),
+                                     TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+  //      if c1 is equal to the number of bits the trunc removes
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (N0.getOperand(0).getOpcode() == ISD::SRL ||
+       N0.getOperand(0).getOpcode() == ISD::SRA) &&
+      N0.getOperand(0).hasOneUse() &&
+      N0.getOperand(0).getOperand(1).hasOneUse() &&
+      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+    EVT LargeVT = N0.getOperand(0).getValueType();
+    ConstantSDNode *LargeShiftAmt =
+      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+        LargeShiftAmt->getZExtValue()) {
+      SDValue Amt =
+        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+              getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
+      SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+                                N0.getOperand(0).getOperand(0), Amt);
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+    }
+  }
+
+  // Simplify, based on bits shifted out of the LHS.
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+
+  // If the sign bit is known to be zero, switch this to a SRL.
+  if (DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+  if (N1C) {
+    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRA.getNode())
+      return NewSRA;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (srl c1, c2) -> c1 >>u c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+  // fold (srl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (srl x, c >= size(x)) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (srl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (srl x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+                                   APInt::getAllOnesValue(OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+
+  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    if (c1 + c2 >= OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+
+  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(0).getOpcode() == ISD::SRL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 =
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+    if (c1 + OpSizeInBits == InnerShiftSize) {
+      if (c1 + c2 >= InnerShiftSize)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+                                     N0.getOperand(0)->getOperand(0),
+                                     DAG.getConstant(c1 + c2, ShiftCountVT)));
+    }
+  }
+
+  // fold (srl (shl x, c), c) -> (and x, cst2)
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      N0.getValueSizeInBits() <= 64) {
+    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  }
+
+
+  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    // Shifting in all undef bits?
+    EVT SmallVT = N0.getOperand(0).getValueType();
+    if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+      return DAG.getUNDEF(VT);
+
+    if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+      uint64_t ShiftAmt = N1C->getZExtValue();
+      SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+                                       N0.getOperand(0),
+                          DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+      AddToWorkList(SmallShift.getNode());
+      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    }
+  }
+
+  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
+  // bit, which is unmodified by sra.
+  if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+    if (N0.getOpcode() == ISD::SRA)
+      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+  }
+
+  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
+  if (N1C && N0.getOpcode() == ISD::CTLZ &&
+      N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+    APInt KnownZero, KnownOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+    // If any of the input bits are KnownOne, then the input couldn't be all
+    // zeros, thus the result of the srl will always be zero.
+    if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+    // If all of the bits input the to ctlz node are known to be zero, then
+    // the result of the ctlz is "32" and the result of the shift is one.
+    APInt UnknownBits = ~KnownZero & Mask;
+    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+    // Otherwise, check to see if there is exactly one bit input to the ctlz.
+    if ((UnknownBits & (UnknownBits - 1)) == 0) {
+      // Okay, we know that only that the single bit specified by UnknownBits
+      // could be set on input to the CTLZ node. If this bit is set, the SRL
+      // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+      // to an SRL/XOR pair, which is likely to simplify more.
+      unsigned ShAmt = UnknownBits.countTrailingZeros();
+      SDValue Op = N0.getOperand(0);
+
+      if (ShAmt) {
+        Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+                  DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+        AddToWorkList(Op.getNode());
+      }
+
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+                         Op, DAG.getConstant(1, VT));
+    }
+  }
+
+  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(),
+                                     TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  // fold operands of srl based on knowledge that the low bits are not
+  // demanded.
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  if (N1C) {
+    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRL.getNode())
+      return NewSRL;
+  }
+
+  // Attempt to convert a srl of a load into a narrower zero-extending load.
+  SDValue NarrowLoad = ReduceLoadWidth(N);
+  if (NarrowLoad.getNode())
+    return NarrowLoad;
+
+  // Here is a common situation. We want to optimize:
+  //
+  //   %a = ...
+  //   %b = and i32 %a, 2
+  //   %c = srl i32 %b, 1
+  //   brcond i32 %c ...
+  //
+  // into
+  //
+  //   %a = ...
+  //   %b = and %a, 2
+  //   %c = setcc eq %b, 0
+  //   brcond %c ...
+  //
+  // However when after the source operand of SRL is optimized into AND, the SRL
+  // itself may not be optimized further. Look for it and add the BRCOND into
+  // the worklist.
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctlz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (cttz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctpop c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  EVT VT = N->getValueType(0);
+  EVT VT0 = N0.getValueType();
+
+  // fold (select C, X, X) -> X
+  if (N1 == N2)
+    return N1;
+  // fold (select true, X, Y) -> X
+  if (N0C && !N0C->isNullValue())
+    return N1;
+  // fold (select false, X, Y) -> Y
+  if (N0C && N0C->isNullValue())
+    return N2;
+  // fold (select C, 1, X) -> (or C, X)
+  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+  // fold (select C, 0, 1) -> (xor C, 1)
+  if (VT.isInteger() &&
+      (VT0 == MVT::i1 ||
+       (VT0.isInteger() &&
+        TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+      N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+    SDValue XORNode;
+    if (VT == VT0)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+                         N0, DAG.getConstant(1, VT0));
+    XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+                          N0, DAG.getConstant(1, VT0));
+    AddToWorkList(XORNode.getNode());
+    if (VT.bitsGT(VT0))
+      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+  }
+  // fold (select C, 0, X) -> (and (not C), X)
+  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+    AddToWorkList(NOTNode.getNode());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+  }
+  // fold (select C, X, 1) -> (or (not C), X)
+  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+    AddToWorkList(NOTNode.getNode());
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+  }
+  // fold (select C, X, 0) -> (and C, X)
+  if (VT == MVT::i1 && N2C && N2C->isNullValue())
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+  // fold (select X, X, Y) -> (or X, Y)
+  // fold (select X, 1, Y) -> (or X, Y)
+  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+  // fold (select X, Y, X) -> (and X, Y)
+  // fold (select X, Y, 0) -> (and X, Y)
+  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N1, N2))
+    return SDValue(N, 0);  // Don't revisit N.
+
+  // fold selects based on a setcc into other things, such as min/max/abs
+  if (N0.getOpcode() == ISD::SETCC) {
+    // FIXME:
+    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+    // having to say they don't support SELECT_CC on every type the DAG knows
+    // about, since there is no way to mark an opcode illegal at all value types
+    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+      return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         N1, N2, N0.getOperand(2));
+    return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  SDValue N3 = N->getOperand(3);
+  SDValue N4 = N->getOperand(4);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+  // fold select_cc lhs, rhs, x, x, cc -> x
+  if (N2 == N3)
+    return N2;
+
+  // Determine if the condition we're dealing with is constant
+  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+                              N0, N1, CC, N->getDebugLoc(), false);
+  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+    if (!SCCC->isNullValue())
+      return N2;    // cond always true -> true val
+    else
+      return N3;    // cond always false -> false val
+  }
+
+  // Fold to a simpler select_cc
+  if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+                       SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+                       SCC.getOperand(2));
+
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N2, N3))
+    return SDValue(N, 0);  // Don't revisit N.
+
+  // fold select_cc into other things, such as min/max/abs
+  return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+                       cast<CondCodeSDNode>(N->getOperand(2))->get(),
+                       N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+                                    unsigned ExtOpc,
+                                    SmallVector<SDNode*, 4> &ExtendNodes,
+                                    const TargetLowering &TLI) {
+  bool HasCopyToRegUses = false;
+  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+                            UE = N0.getNode()->use_end();
+       UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == N)
+      continue;
+    if (UI.getUse().getResNo() != N0.getResNo())
+      continue;
+    // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+    if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+      ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+      if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+        // Sign bits will be lost after a zext.
+        return false;
+      bool Add = false;
+      for (unsigned i = 0; i != 2; ++i) {
+        SDValue UseOp = User->getOperand(i);
+        if (UseOp == N0)
+          continue;
+        if (!isa<ConstantSDNode>(UseOp))
+          return false;
+        Add = true;
+      }
+      if (Add)
+        ExtendNodes.push_back(User);
+      continue;
+    }
+    // If truncates aren't free and there are users we can't
+    // extend, it isn't worthwhile.
+    if (!isTruncFree)
+      return false;
+    // Remember if this value is live-out.
+    if (User->getOpcode() == ISD::CopyToReg)
+      HasCopyToRegUses = true;
+  }
+
+  if (HasCopyToRegUses) {
+    bool BothLiveOut = false;
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI) {
+      SDUse &Use = UI.getUse();
+      if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+        BothLiveOut = true;
+        break;
+      }
+    }
+    if (BothLiveOut)
+      // Both unextended and extended values are live out. There had better be
+      // a good reason for the transformation.
+      return ExtendNodes.size();
+  }
+  return true;
+}
+
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                                  SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                                  ISD::NodeType ExtType) {
+  // Extend SetCC uses if necessary.
+  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+    SDNode *SetCC = SetCCs[i];
+    SmallVector<SDValue, 4> Ops;
+
+    for (unsigned j = 0; j != 2; ++j) {
+      SDValue SOp = SetCC->getOperand(j);
+      if (SOp == Trunc)
+        Ops.push_back(ExtLoad);
+      else
+        Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+    }
+
+    Ops.push_back(SetCC->getOperand(2));
+    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+                                 &Ops[0], Ops.size()));
+  }
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (sext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+  // fold (sext (sext x)) -> (sext x)
+  // fold (sext (aext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    // fold (sext (truncate (load x))) -> (sext (smaller load x))
+    // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+
+    // See if the value being truncated is already sign extended.  If so, just
+    // eliminate the trunc/sext pair.
+    SDValue Op = N0.getOperand(0);
+    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
+    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+    if (OpBits == DestBits) {
+      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+      // bits, it is already ready.
+      if (NumSignBits > DestBits-MidBits)
+        return Op;
+    } else if (OpBits < DestBits) {
+      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+      // bits, just sext from i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+    } else {
+      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+      // bits, just truncate to i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+    }
+
+    // fold (sext (truncate x)) -> (sextinreg x).
+    if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+                                                 N0.getValueType())) {
+      if (OpBits < DestBits)
+        Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+      else if (OpBits > DestBits)
+        Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+                         DAG.getValueType(N0.getValueType()));
+    }
+  }
+
+  // fold (sext (load x)) -> (sext (truncate (sextload x)))
+  // None of the supported targets knows how to perform load and sign extend
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::SIGN_EXTEND);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    if ((!LegalOperations && !LN0->isVolatile()) ||
+        TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                            N0.getValueType(), ExtLoad),
+                ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (sext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (sextload x), (sext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.sext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::SIGN_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
+  if (N0.getOpcode() == ISD::SETCC) {
+    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    // Only do this before legalize for now.
+    if (VT.isVector() && !LegalOperations) {
+      EVT N0VT = N0.getOperand(0).getValueType();
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+        return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                             N0.getOperand(1),
+                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      else {
+        EVT MatchingElementType =
+          EVT::getIntegerVT(*DAG.getContext(),
+                            N0VT.getScalarType().getSizeInBits());
+        EVT MatchingVectorType =
+          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                           N0VT.getVectorNumElements());
+        SDValue VsetCC =
+          DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                        N0.getOperand(1),
+                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+      }
+    }
+
+    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+    SDValue NegOne =
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       NegOne, DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode()) return SCC;
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+      return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                         DAG.getSetCC(N->getDebugLoc(),
+                                      TLI.getSetCCResultType(VT),
+                                      N0.getOperand(0), N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                         NegOne, DAG.getConstant(0, VT));
+  }
+
+  // fold (sext x) -> (zext x) if the sign bit is known zero.
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+      DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (zext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+  // fold (zext (zext x)) -> (zext x)
+  // fold (zext (aext x)) -> (zext x)
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  // fold (zext (truncate (load x))) -> (zext (smaller load x))
+  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (zext (truncate x)) -> (and x, mask)
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+    // fold (zext (truncate (load x))) -> (zext (smaller load x))
+    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+
+    SDValue Op = N0.getOperand(0);
+    if (Op.getValueType().bitsLT(VT)) {
+      Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+    } else if (Op.getValueType().bitsGT(VT)) {
+      Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+    }
+    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+                                  N0.getValueType().getScalarType());
+  }
+
+  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+  // if either of the casts is not free.
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
+    SDValue X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType().bitsLT(VT)) {
+      X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+    } else if (X.getValueType().bitsGT(VT)) {
+      X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+    }
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    Mask = Mask.zext(VT.getSizeInBits());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       X, DAG.getConstant(Mask, VT));
+  }
+
+  // fold (zext (load x)) -> (zext (truncate (zextload x)))
+  // None of the supported targets knows how to perform load and vector_zext
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ZERO_EXTEND);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (zext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (zextload x), (zext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.zext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::ZERO_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
+  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    if ((!LegalOperations && !LN0->isVolatile()) ||
+        TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+                            ExtLoad),
+                ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  if (N0.getOpcode() == ISD::SETCC) {
+    if (!LegalOperations && VT.isVector()) {
+      // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+      // Only do this before legalize for now.
+      EVT N0VT = N0.getOperand(0).getValueType();
+      EVT EltVT = VT.getVectorElementType();
+      SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+                                    DAG.getConstant(1, EltVT));
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                                         N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                           DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                                       &OneOps[0], OneOps.size()));
+
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      EVT MatchingElementType =
+        EVT::getIntegerVT(*DAG.getContext(),
+                          N0VT.getScalarType().getSizeInBits());
+      EVT MatchingVectorType =
+        EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                         N0VT.getVectorNumElements());
+      SDValue VsetCC =
+        DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                      N0.getOperand(1),
+                      cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+                         DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                                     &OneOps[0], OneOps.size()));
+    }
+
+    // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode()) return SCC;
+  }
+
+  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+      N0.hasOneUse()) {
+    SDValue ShAmt = N0.getOperand(1);
+    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+    if (N0.getOpcode() == ISD::SHL) {
+      SDValue InnerZExt = N0.getOperand(0);
+      // If the original shl may be shifting out bits, do not perform this
+      // transformation.
+      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+        InnerZExt.getOperand(0).getValueType().getSizeInBits();
+      if (ShAmtVal > KnownZeroBits)
+        return SDValue();
+    }
+
+    DebugLoc DL = N->getDebugLoc();
+
+    // Ensure that the shift amount is wide enough for the shifted value.
+    if (VT.getSizeInBits() >= 256)
+      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+    return DAG.getNode(N0.getOpcode(), DL, VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+                       ShAmt);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (aext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+  // fold (aext (aext x)) -> (aext x)
+  // fold (aext (zext x)) -> (zext x)
+  // fold (aext (sext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::ANY_EXTEND  ||
+      N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND)
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+  // fold (aext (truncate (load x))) -> (aext (smaller load x))
+  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (aext (truncate x))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue TruncOp = N0.getOperand(0);
+    if (TruncOp.getValueType() == VT)
+      return TruncOp; // x iff x size == zext size.
+    if (TruncOp.getValueType().bitsGT(VT))
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+  }
+
+  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+  // if the trunc is not free.
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                          N0.getValueType())) {
+    SDValue X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType().bitsLT(VT)) {
+      X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+    } else if (X.getValueType().bitsGT(VT)) {
+      X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+    }
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    Mask = Mask.zext(VT.getSizeInBits());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       X, DAG.getConstant(Mask, VT));
+  }
+
+  // fold (aext (load x)) -> (aext (truncate (extload x)))
+  // None of the supported targets knows how to perform load and any_ext
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ANY_EXTEND);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
+  if (N0.getOpcode() == ISD::LOAD &&
+      !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+                                     VT, LN0->getChain(), LN0->getBasePtr(),
+                                     LN0->getPointerInfo(), MemVT,
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(),
+              DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                          N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  if (N0.getOpcode() == ISD::SETCC) {
+    // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    // Only do this before legalize for now.
+    if (VT.isVector() && !LegalOperations) {
+      EVT N0VT = N0.getOperand(0).getValueType();
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+        return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                             N0.getOperand(1),
+                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      else {
+        EVT MatchingElementType =
+          EVT::getIntegerVT(*DAG.getContext(),
+                            N0VT.getScalarType().getSizeInBits());
+        EVT MatchingVectorType =
+          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                           N0VT.getVectorNumElements());
+        SDValue VsetCC =
+          DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                        N0.getOperand(1),
+                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+      }
+    }
+
+    // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode())
+      return SCC;
+  }
+
+  return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used.  If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+  switch (V.getOpcode()) {
+  default: break;
+  case ISD::OR:
+  case ISD::XOR:
+    // If the LHS or RHS don't contribute bits to the or, drop them.
+    if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+      return V.getOperand(1);
+    if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+      return V.getOperand(0);
+    break;
+  case ISD::SRL:
+    // Only look at single-use SRLs.
+    if (!V.getNode()->hasOneUse())
+      break;
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+      // See if we can recursively simplify the LHS.
+      unsigned Amt = RHSC->getZExtValue();
+
+      // Watch out for shift count overflow though.
+      if (Amt >= Mask.getBitWidth()) break;
+      APInt NewMask = Mask << Amt;
+      SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+      if (SimplifyLHS.getNode())
+        return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+                           SimplifyLHS, V.getOperand(1));
+    }
+  }
+  return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+  unsigned Opc = N->getOpcode();
+
+  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT ExtVT = VT;
+
+  // This transformation isn't valid for vector loads.
+  if (VT.isVector())
+    return SDValue();
+
+  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+  // extended to VT.
+  if (Opc == ISD::SIGN_EXTEND_INREG) {
+    ExtType = ISD::SEXTLOAD;
+    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  } else if (Opc == ISD::SRL) {
+    // Another special-case: SRL is basically zero-extending a narrower value.
+    ExtType = ISD::ZEXTLOAD;
+    N0 = SDValue(N, 0);
+    ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!N01) return SDValue();
+    ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+                              VT.getSizeInBits() - N01->getZExtValue());
+  }
+  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+    return SDValue();
+
+  unsigned EVTBits = ExtVT.getSizeInBits();
+
+  // Do not generate loads of non-round integer types since these can
+  // be expensive (and would be wrong if the type is not byte sized).
+  if (!ExtVT.isRound())
+    return SDValue();
+
+  unsigned ShAmt = 0;
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShAmt = N01->getZExtValue();
+      // Is the shift amount a multiple of size of VT?
+      if ((ShAmt & (EVTBits-1)) == 0) {
+        N0 = N0.getOperand(0);
+        // Is the load width a multiple of size of VT?
+        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+          return SDValue();
+      }
+
+      // At this point, we must have a load or else we can't do the transform.
+      if (!isa<LoadSDNode>(N0)) return SDValue();
+
+      // If the shift amount is larger than the input type then we're not
+      // accessing any of the loaded bytes.  If the load was a zextload/extload
+      // then the result of the shift+trunc is zero/undef (handled elsewhere).
+      // If the load was a sextload then the result is a splat of the sign bit
+      // of the extended byte.  This is not worth optimizing for.
+      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+        return SDValue();
+    }
+  }
+
+  // If the load is shifted left (and the result isn't shifted back right),
+  // we can fold the truncate through the shift.
+  unsigned ShLeftAmt = 0;
+  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+      ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShLeftAmt = N01->getZExtValue();
+      N0 = N0.getOperand(0);
+    }
+  }
+
+  // If we haven't found a load, we can't narrow it.  Don't transform one with
+  // multiple uses, this would require adding a new load.
+  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+      // Don't change the width of a volatile load.
+      cast<LoadSDNode>(N0)->isVolatile())
+    return SDValue();
+
+  // Verify that we are actually reducing a load width here.
+  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+    return SDValue();
+
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  EVT PtrType = N0.getOperand(1).getValueType();
+
+  // For big endian targets, we need to adjust the offset to the pointer to
+  // load the correct bytes.
+  if (TLI.isBigEndian()) {
+    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+  }
+
+  uint64_t PtrOff = ShAmt / 8;
+  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+  SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+                               PtrType, LN0->getBasePtr(),
+                               DAG.getConstant(PtrOff, PtrType));
+  AddToWorkList(NewPtr.getNode());
+
+  SDValue Load;
+  if (ExtType == ISD::NON_EXTLOAD)
+    Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+                        LN0->getPointerInfo().getWithOffset(PtrOff),
+                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+  else
+    Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+                          LN0->getPointerInfo().getWithOffset(PtrOff),
+                          ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                          NewAlign);
+
+  // Replace the old load's chain with the new load's chain.
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+                                &DeadNodes);
+
+  // Shift the result left, if we've swallowed a left shift.
+  SDValue Result = Load;
+  if (ShLeftAmt != 0) {
+    EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+      ShImmTy = VT;
+    Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+                         Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+  }
+
+  // Return the new loaded value.
+  return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N1)->getVT();
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+  // fold (sext_in_reg c1) -> c1
+  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+  // If the input is already sign extended, just drop the extension.
+  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+    return N0;
+
+  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N1);
+  }
+
+  // fold (sext_in_reg (sext x)) -> (sext x)
+  // fold (sext_in_reg (aext x)) -> (sext x)
+  // if x is small enough.
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+      return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+  }
+
+  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+  if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+    return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+  // fold operands of sext_in_reg based on knowledge that the top bits are not
+  // demanded.
+  if (SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (sext_in_reg (load x)) -> (smaller sextload x)
+  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+  SDValue NarrowLoad = ReduceLoadWidth(N);
+  if (NarrowLoad.getNode())
+    return NarrowLoad;
+
+  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+  if (N0.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+        // We can turn this into an SRA iff the input to the SRL is already sign
+        // extended enough.
+        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+          return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+                             N0.getOperand(0), N0.getOperand(1));
+      }
+  }
+
+  // fold (sext_inreg (extload x)) -> (sextload x)
+  if (ISD::isEXTLoad(N0.getNode()) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse() &&
+      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+    SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+                                       N0.getOperand(1), false);
+    if (BSwap.getNode() != 0)
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                         BSwap, N1);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // noop truncate
+  if (N0.getValueType() == N->getValueType(0))
+    return N0;
+  // fold (truncate c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+  // fold (truncate (truncate x)) -> (truncate x)
+  if (N0.getOpcode() == ISD::TRUNCATE)
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND ||
+      N0.getOpcode() == ISD::ANY_EXTEND) {
+    if (N0.getOperand(0).getValueType().bitsLT(VT))
+      // if the source is smaller than the dest, we still need an extend
+      return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                         N0.getOperand(0));
+    else if (N0.getOperand(0).getValueType().bitsGT(VT))
+      // if the source is larger than the dest, than we just need the truncate
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+    else
+      // if the source and dest are the same type, we can drop both the extend
+      // and the truncate.
+      return N0.getOperand(0);
+  }
+
+  // See if we can simplify the input to this truncate through knowledge that
+  // only the low bits are being used.
+  // For example "trunc (or (shl x, 8), y)" // -> trunc y
+  // Currently we only perform this optimization on scalars because vectors
+  // may have different active low bits.
+  if (!VT.isVector()) {
+    SDValue Shorter =
+      GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+                                               VT.getSizeInBits()));
+    if (Shorter.getNode())
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+  }
+  // fold (truncate (load x)) -> (smaller load x)
+  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+    SDValue Reduced = ReduceLoadWidth(N);
+    if (Reduced.getNode())
+      return Reduced;
+  }
+
+  // Simplify the operands using demanded-bits information.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+  SDValue Elt = N->getOperand(i);
+  if (Elt.getOpcode() != ISD::MERGE_VALUES)
+    return Elt.getNode();
+  return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+  assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+      LD1->getPointerInfo().getAddrSpace() !=
+         LD2->getPointerInfo().getAddrSpace())
+    return SDValue();
+  EVT LD1VT = LD1->getValueType(0);
+
+  if (ISD::isNON_EXTLoad(LD2) &&
+      LD2->hasOneUse() &&
+      // If both are volatile this would reduce the number of volatile loads.
+      // If one is volatile it might be ok, but play conservative and bail out.
+      !LD1->isVolatile() &&
+      !LD2->isVolatile() &&
+      DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+    unsigned Align = LD1->getAlignment();
+    unsigned NewAlign = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign <= Align &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+                         LD1->getBasePtr(), LD1->getPointerInfo(),
+                         false, false, Align);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+  // Only do this before legalize, since afterward the target may be depending
+  // on the bitconvert.
+  // First check to see if this is all constant.
+  if (!LegalTypes &&
+      N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+      VT.isVector()) {
+    bool isSimple = true;
+    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+      if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+          N0.getOperand(i).getOpcode() != ISD::Constant &&
+          N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+        isSimple = false;
+        break;
+      }
+
+    EVT DestEltVT = N->getValueType(0).getVectorElementType();
+    assert(!DestEltVT.isVector() &&
+           "Element type of vector ValueType must not be vector!");
+    if (isSimple)
+      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+  }
+
+  // If the input is a constant, let getNode fold it.
+  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+    SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
+    if (Res.getNode() != N) {
+      if (!LegalOperations ||
+          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+        return Res;
+
+      // Folding it resulted in an illegal node, and it's too late to
+      // do that. Clean up the old node and forego the transformation.
+      // Ideally this won't happen very often, because instcombine
+      // and the earlier dagcombine runs (where illegal nodes are
+      // permitted) should have folded most of them already.
+      DAG.DeleteNode(Res.getNode());
+    }
+  }
+
+  // (conv (conv x, t1), t2) -> (conv x, t2)
+  if (N0.getOpcode() == ISD::BITCAST)
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  // fold (conv (load x)) -> (load (conv*)x)
+  // If the resultant load doesn't need a higher alignment than the original!
+  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+      // Do not change the width of a volatile load.
+      !cast<LoadSDNode>(N0)->isVolatile() &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    unsigned Align = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned OrigAlign = LN0->getAlignment();
+
+    if (Align <= OrigAlign) {
+      SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+                                 LN0->getBasePtr(), LN0->getPointerInfo(),
+                                 LN0->isVolatile(), LN0->isNonTemporal(),
+                                 OrigAlign);
+      AddToWorkList(N);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+                            N0.getValueType(), Load),
+                Load.getValue(1));
+      return Load;
+    }
+  }
+
+  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+  // This often reduces constant pool loads.
+  if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+      N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+    SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
+                                  N0.getOperand(0));
+    AddToWorkList(NewConv.getNode());
+
+    APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+    if (N0.getOpcode() == ISD::FNEG)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+                         NewConv, DAG.getConstant(SignBit, VT));
+    assert(N0.getOpcode() == ISD::FABS);
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       NewConv, DAG.getConstant(~SignBit, VT));
+  }
+
+  // fold (bitconvert (fcopysign cst, x)) ->
+  //         (or (and (bitconvert x), sign), (and cst, (not sign)))
+  // Note that we don't handle (copysign x, cst) because this can always be
+  // folded to an fneg or fabs.
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+      isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+      VT.isInteger() && !VT.isVector()) {
+    unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+    if (isTypeLegal(IntXVT)) {
+      SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+                              IntXVT, N0.getOperand(1));
+      AddToWorkList(X.getNode());
+
+      // If X has a different width than the result/lhs, sext it or truncate it.
+      unsigned VTWidth = VT.getSizeInBits();
+      if (OrigXWidth < VTWidth) {
+        X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+        AddToWorkList(X.getNode());
+      } else if (OrigXWidth > VTWidth) {
+        // To get the sign bit in the right place, we have to shift it right
+        // before truncating.
+        X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+                        X.getValueType(), X,
+                        DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+        AddToWorkList(X.getNode());
+        X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+        AddToWorkList(X.getNode());
+      }
+
+      APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+      X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+                      X, DAG.getConstant(SignBit, VT));
+      AddToWorkList(X.getNode());
+
+      SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+                                VT, N0.getOperand(0));
+      Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+                        Cst, DAG.getConstant(~SignBit, VT));
+      AddToWorkList(Cst.getNode());
+
+      return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+    }
+  }
+
+  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+  if (N0.getOpcode() == ISD::BUILD_PAIR) {
+    SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+    if (CombineLD.getNode())
+      return CombineLD;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+  // If this is already the right type, we're done.
+  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+  unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+  // If this is a conversion of N elements of one type to N elements of another
+  // type, convert each element.  This handles FP<->INT cases.
+  if (SrcBitSize == DstBitSize) {
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                              BV->getValueType(0).getVectorNumElements());
+
+    // Due to the FP element handling below calling this routine recursively,
+    // we can end up with a scalar-to-vector node here.
+    if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+                         DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+                                     DstEltVT, BV->getOperand(0)));
+
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+      SDValue Op = BV->getOperand(i);
+      // If the vector element type is not legal, the BUILD_VECTOR operands
+      // are promoted and implicitly truncated.  Make that explicit here.
+      if (Op.getValueType() != SrcEltVT)
+        Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+      Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+                                DstEltVT, Op));
+      AddToWorkList(Ops.back().getNode());
+    }
+    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                       &Ops[0], Ops.size());
+  }
+
+  // Otherwise, we're growing or shrinking the elements.  To avoid having to
+  // handle annoying details of growing/shrinking FP values, we convert them to
+  // int first.
+  if (SrcEltVT.isFloatingPoint()) {
+    // Convert the input float vector to a int vector where the elements are the
+    // same sizes.
+    assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+    SrcEltVT = IntVT;
+  }
+
+  // Now we know the input is an integer vector.  If the output is a FP type,
+  // convert to integer first, then to FP of the right size.
+  if (DstEltVT.isFloatingPoint()) {
+    assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+    // Next, convert to FP elements of the same size.
+    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+  }
+
+  // Okay, we know the src/dst types are both integers of differing types.
+  // Handling growing first.
+  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+  if (SrcBitSize < DstBitSize) {
+    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+         i += NumInputsPerOutput) {
+      bool isLE = TLI.isLittleEndian();
+      APInt NewBits = APInt(DstBitSize, 0);
+      bool EltIsUndef = true;
+      for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+        // Shift the previously computed bits over.
+        NewBits <<= SrcBitSize;
+        SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+        if (Op.getOpcode() == ISD::UNDEF) continue;
+        EltIsUndef = false;
+
+        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
+                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
+      }
+
+      if (EltIsUndef)
+        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      else
+        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+    }
+
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                       &Ops[0], Ops.size());
+  }
+
+  // Finally, this must be the case where we are shrinking elements: each input
+  // turns into multiple outputs.
+  bool isS2V = ISD::isScalarToVector(BV);
+  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                            NumOutputsPerInput*BV->getNumOperands());
+  SmallVector<SDValue, 8> Ops;
+
+  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      continue;
+    }
+
+    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+                  getAPIntValue().zextOrTrunc(SrcBitSize);
+
+    for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+      APInt ThisVal = OpVal.trunc(DstBitSize);
+      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
+        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+                           Ops[0]);
+      OpVal = OpVal.lshr(DstBitSize);
+    }
+
+    // For big endian targets, swap the order of the pieces of each element.
+    if (TLI.isBigEndian())
+      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                     &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fadd c1, c2) -> (fadd c1, c2)
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+  // fold (fadd A, 0) -> A
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N0;
+  // fold (fadd A, (fneg B)) -> (fsub A, B)
+  if (isNegatibleForFree(N1, LegalOperations) == 2)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+                       GetNegatedExpression(N1, DAG, LegalOperations));
+  // fold (fadd (fneg A), B) -> (fsub B, A)
+  if (isNegatibleForFree(N0, LegalOperations) == 2)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+                       GetNegatedExpression(N0, DAG, LegalOperations));
+
+  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fsub c1, c2) -> c1-c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+  // fold (fsub A, 0) -> A
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N0;
+  // fold (fsub 0, B) -> -B
+  if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (isNegatibleForFree(N1, LegalOperations))
+      return GetNegatedExpression(N1, DAG, LegalOperations);
+    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+  }
+  // fold (fsub A, (fneg B)) -> (fadd A, B)
+  if (isNegatibleForFree(N1, LegalOperations))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+                       GetNegatedExpression(N1, DAG, LegalOperations));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fmul c1, c2) -> c1*c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+  // fold (fmul A, 0) -> 0
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N1;
+  // fold (fmul A, 0) -> 0, vector edition.
+  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+    return N1;
+  // fold (fmul X, 2.0) -> (fadd X, X)
+  if (N1CFP && N1CFP->isExactlyValue(+2.0))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+  // fold (fmul X, -1.0) -> (fneg X)
+  if (N1CFP && N1CFP->isExactlyValue(-1.0))
+    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           GetNegatedExpression(N0, DAG, LegalOperations),
+                           GetNegatedExpression(N1, DAG, LegalOperations));
+    }
+  }
+
+  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fdiv c1, c2) -> c1/c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+
+  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+                           GetNegatedExpression(N0, DAG, LegalOperations),
+                           GetNegatedExpression(N1, DAG, LegalOperations));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (frem c1, c2) -> fmod(c1,c2)
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+  if (N1CFP) {
+    const APFloat& V = N1CFP->getValueAPF();
+    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
+    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+    if (!V.isNegative()) {
+      if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+        return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+    } else {
+      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+        return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+                           DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+    }
+  }
+
+  // copysign(fabs(x), y) -> copysign(x, y)
+  // copysign(fneg(x), y) -> copysign(x, y)
+  // copysign(copysign(x,z), y) -> copysign(x, y)
+  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+      N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N1);
+
+  // copysign(x, abs(y)) -> abs(x)
+  if (N1.getOpcode() == ISD::FABS)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+  // copysign(x, copysign(y,z)) -> copysign(x, z)
+  if (N1.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0, N1.getOperand(1));
+
+  // copysign(x, fp_extend(y)) -> copysign(x, y)
+  // copysign(x, fp_round(y)) -> copysign(x, y)
+  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0, N1.getOperand(0));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
+
+  // fold (sint_to_fp c1) -> c1fp
+  if (N0C && OpVT != MVT::ppcf128 &&
+      // ...but only if the target supports immediate floating-point values
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+    return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+  // but UINT_TO_FP is legal on this target, try to convert.
+  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+      TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+    // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+    if (DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
+
+  // fold (uint_to_fp c1) -> c1fp
+  if (N0C && OpVT != MVT::ppcf128 &&
+      // ...but only if the target supports immediate floating-point values
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+    return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+  // but SINT_TO_FP is legal on this target, try to convert.
+  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+      TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+    // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+    if (DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_to_sint c1fp) -> c1
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_to_uint c1fp) -> c1
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_round c1fp) -> c1fp
+  if (N0CFP && N0.getValueType() != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+  // fold (fp_round (fp_extend x)) -> x
+  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+    return N0.getOperand(0);
+
+  // fold (fp_round (fp_round x)) -> (fp_round x)
+  if (N0.getOpcode() == ISD::FP_ROUND) {
+    // This is a value preserving truncation if both round's are.
+    bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+                   N0.getNode()->getConstantOperandVal(1) == 1;
+    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getIntPtrConstant(IsTrunc));
+  }
+
+  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+    SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+                              N0.getOperand(0), N1);
+    AddToWorkList(Tmp.getNode());
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       Tmp, N0.getOperand(1));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+  // fold (fp_round_inreg c1fp) -> c1fp
+  if (N0CFP && isTypeLegal(EVT)) {
+    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+  if (N->hasOneUse() &&
+      N->use_begin()->getOpcode() == ISD::FP_ROUND)
+    return SDValue();
+
+  // fold (fp_extend c1fp) -> c1fp
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+  // value of X.
+  if (N0.getOpcode() == ISD::FP_ROUND
+      && N0.getNode()->getConstantOperandVal(1) == 1) {
+    SDValue In = N0.getOperand(0);
+    if (In.getValueType() == VT) return In;
+    if (VT.bitsLT(In.getValueType()))
+      return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+                         In, N0.getOperand(1));
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+  }
+
+  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+  if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     N0.getValueType(),
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(),
+              DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+                          N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+              ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (isNegatibleForFree(N0, LegalOperations))
+    return GetNegatedExpression(N0, DAG, LegalOperations);
+
+  // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+  // constant pool values.
+  if (N0.getOpcode() == ISD::BITCAST &&
+      !VT.isVector() &&
+      N0.getNode()->hasOneUse() &&
+      N0.getOperand(0).getValueType().isInteger()) {
+    SDValue Int = N0.getOperand(0);
+    EVT IntVT = Int.getValueType();
+    if (IntVT.isInteger() && !IntVT.isVector()) {
+      Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+              DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+      AddToWorkList(Int.getNode());
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                         VT, Int);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fabs c1) -> fabs(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+  // fold (fabs (fabs x)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FABS)
+    return N->getOperand(0);
+  // fold (fabs (fneg x)) -> (fabs x)
+  // fold (fabs (fcopysign x, y)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+  // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+  // constant pool values.
+  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+      N0.getOperand(0).getValueType().isInteger() &&
+      !N0.getOperand(0).getValueType().isVector()) {
+    SDValue Int = N0.getOperand(0);
+    EVT IntVT = Int.getValueType();
+    if (IntVT.isInteger() && !IntVT.isVector()) {
+      Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+             DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+      AddToWorkList(Int.getNode());
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                         N->getValueType(0), Int);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
+  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+  // on the target.
+  if (N1.getOpcode() == ISD::SETCC &&
+      TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+                       Chain, N1.getOperand(2),
+                       N1.getOperand(0), N1.getOperand(1), N2);
+  }
+
+  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+       (N1.getOperand(0).hasOneUse() &&
+        N1.getOperand(0).getOpcode() == ISD::SRL))) {
+    SDNode *Trunc = 0;
+    if (N1.getOpcode() == ISD::TRUNCATE) {
+      // Look pass the truncate.
+      Trunc = N1.getNode();
+      N1 = N1.getOperand(0);
+    }
+
+    // Match this pattern so that we can generate simpler code:
+    //
+    //   %a = ...
+    //   %b = and i32 %a, 2
+    //   %c = srl i32 %b, 1
+    //   brcond i32 %c ...
+    //
+    // into
+    //
+    //   %a = ...
+    //   %b = and i32 %a, 2
+    //   %c = setcc eq %b, 0
+    //   brcond %c ...
+    //
+    // This applies only when the AND constant value has one bit set and the
+    // SRL constant is equal to the log2 of the AND constant. The back-end is
+    // smart enough to convert the result into a TEST/JMP sequence.
+    SDValue Op0 = N1.getOperand(0);
+    SDValue Op1 = N1.getOperand(1);
+
+    if (Op0.getOpcode() == ISD::AND &&
+        Op1.getOpcode() == ISD::Constant) {
+      SDValue AndOp1 = Op0.getOperand(1);
+
+      if (AndOp1.getOpcode() == ISD::Constant) {
+        const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+        if (AndConst.isPowerOf2() &&
+            cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+          SDValue SetCC =
+            DAG.getSetCC(N->getDebugLoc(),
+                         TLI.getSetCCResultType(Op0.getValueType()),
+                         Op0, DAG.getConstant(0, Op0.getValueType()),
+                         ISD::SETNE);
+
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
+          // Replace the uses of SRL with SETCC
+          WorkListRemover DeadNodes(*this);
+          DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+          removeFromWorkList(N1.getNode());
+          DAG.DeleteNode(N1.getNode());
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+      }
+    }
+
+    if (Trunc)
+      // Restore N1 if the above transformation doesn't match.
+      N1 = N->getOperand(1);
+  }
+
+  // Transform br(xor(x, y)) -> br(x != y)
+  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+    SDNode *TheXor = N1.getNode();
+    SDValue Op0 = TheXor->getOperand(0);
+    SDValue Op1 = TheXor->getOperand(1);
+    if (Op0.getOpcode() == Op1.getOpcode()) {
+      // Avoid missing important xor optimizations.
+      SDValue Tmp = visitXOR(TheXor);
+      if (Tmp.getNode() && Tmp.getNode() != TheXor) {
+        DEBUG(dbgs() << "\nReplacing.8 ";
+              TheXor->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Tmp.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+        removeFromWorkList(TheXor);
+        DAG.DeleteNode(TheXor);
+        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                           MVT::Other, Chain, Tmp, N2);
+      }
+    }
+
+    if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+      bool Equal = false;
+      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+            Op0.getOpcode() == ISD::XOR) {
+          TheXor = Op0.getNode();
+          Equal = true;
+        }
+
+      EVT SetCCVT = N1.getValueType();
+      if (LegalTypes)
+        SetCCVT = TLI.getSetCCResultType(SetCCVT);
+      SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+                                   SetCCVT,
+                                   Op0, Op1,
+                                   Equal ? ISD::SETEQ : ISD::SETNE);
+      // Replace the uses of XOR with SETCC
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+      removeFromWorkList(N1.getNode());
+      DAG.DeleteNode(N1.getNode());
+      return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                         MVT::Other, Chain, SetCC, N2);
+    }
+  }
+
+  return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
+  // Use SimplifySetCC to simplify SETCC's.
+  SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+                               CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+                               false);
+  if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+  // fold to a simpler setcc
+  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+                       N->getOperand(0), Simp.getOperand(2),
+                       Simp.getOperand(0), Simp.getOperand(1),
+                       N->getOperand(4));
+
+  return SDValue();
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+  if (!LegalOperations)
+    return false;
+
+  bool isLoad = true;
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->isIndexed())
+      return false;
+    VT = LD->getMemoryVT();
+    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->isIndexed())
+      return false;
+    VT = ST->getMemoryVT();
+    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else {
+    return false;
+  }
+
+  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+  // out.  There is no reason to make this a preinc/predec.
+  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+      Ptr.getNode()->hasOneUse())
+    return false;
+
+  // Ask the target to do addressing mode selection.
+  SDValue BasePtr;
+  SDValue Offset;
+  ISD::MemIndexedMode AM = ISD::UNINDEXED;
+  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+    return false;
+  // Don't create a indexed load / store with zero offset.
+  if (isa<ConstantSDNode>(Offset) &&
+      cast<ConstantSDNode>(Offset)->isNullValue())
+    return false;
+
+  // Try turning it into a pre-indexed load / store except when:
+  // 1) The new base ptr is a frame index.
+  // 2) If N is a store and the new base ptr is either the same as or is a
+  //    predecessor of the value being stored.
+  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+  //    that would create a cycle.
+  // 4) All uses are load / store ops that use it as old base ptr.
+
+  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
+  // (plus the implicit offset) to a register to preinc anyway.
+  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+    return false;
+
+  // Check #2.
+  if (!isLoad) {
+    SDValue Val = cast<StoreSDNode>(N)->getValue();
+    if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+      return false;
+  }
+
+  // Now check for #3 and #4.
+  bool RealUse = false;
+
+  // Caches for hasPredecessorHelper
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+
+  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+         E = Ptr.getNode()->use_end(); I != E; ++I) {
+    SDNode *Use = *I;
+    if (Use == N)
+      continue;
+    if (N->hasPredecessorHelper(Use, Visited, Worklist))
+      return false;
+
+    if (!((Use->getOpcode() == ISD::LOAD &&
+           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+          (Use->getOpcode() == ISD::STORE &&
+           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+      RealUse = true;
+  }
+
+  if (!RealUse)
+    return false;
+
+  SDValue Result;
+  if (isLoad)
+    Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+                                BasePtr, Offset, AM);
+  else
+    Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+                                 BasePtr, Offset, AM);
+  ++PreIndexedNodes;
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.4 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Result.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  if (isLoad) {
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+                                  &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+                                  &DeadNodes);
+  } else {
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+                                  &DeadNodes);
+  }
+
+  // Finally, since the node is now dead, remove it from the graph.
+  DAG.DeleteNode(N);
+
+  // Replace the uses of Ptr with uses of the updated base value.
+  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+                                &DeadNodes);
+  removeFromWorkList(Ptr.getNode());
+  DAG.DeleteNode(Ptr.getNode());
+
+  return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+  if (!LegalOperations)
+    return false;
+
+  bool isLoad = true;
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->isIndexed())
+      return false;
+    VT = LD->getMemoryVT();
+    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->isIndexed())
+      return false;
+    VT = ST->getMemoryVT();
+    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else {
+    return false;
+  }
+
+  if (Ptr.getNode()->hasOneUse())
+    return false;
+
+  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+         E = Ptr.getNode()->use_end(); I != E; ++I) {
+    SDNode *Op = *I;
+    if (Op == N ||
+        (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+      continue;
+
+    SDValue BasePtr;
+    SDValue Offset;
+    ISD::MemIndexedMode AM = ISD::UNINDEXED;
+    if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+      // Don't create a indexed load / store with zero offset.
+      if (isa<ConstantSDNode>(Offset) &&
+          cast<ConstantSDNode>(Offset)->isNullValue())
+        continue;
+
+      // Try turning it into a post-indexed load / store except when
+      // 1) All uses are load / store ops that use it as base ptr.
+      // 2) Op must be independent of N, i.e. Op is neither a predecessor
+      //    nor a successor of N. Otherwise, if Op is folded that would
+      //    create a cycle.
+
+      if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+        continue;
+
+      // Check for #1.
+      bool TryNext = false;
+      for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+             EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+        SDNode *Use = *II;
+        if (Use == Ptr.getNode())
+          continue;
+
+        // If all the uses are load / store addresses, then don't do the
+        // transformation.
+        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+          bool RealUse = false;
+          for (SDNode::use_iterator III = Use->use_begin(),
+                 EEE = Use->use_end(); III != EEE; ++III) {
+            SDNode *UseUse = *III;
+            if (!((UseUse->getOpcode() == ISD::LOAD &&
+                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
+                  (UseUse->getOpcode() == ISD::STORE &&
+                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+              RealUse = true;
+          }
+
+          if (!RealUse) {
+            TryNext = true;
+            break;
+          }
+        }
+      }
+
+      if (TryNext)
+        continue;
+
+      // Check for #2
+      if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+        SDValue Result = isLoad
+          ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+                               BasePtr, Offset, AM)
+          : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+                                BasePtr, Offset, AM);
+        ++PostIndexedNodes;
+        ++NodesCombined;
+        DEBUG(dbgs() << "\nReplacing.5 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Result.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        if (isLoad) {
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+                                        &DeadNodes);
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+                                        &DeadNodes);
+        } else {
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+                                        &DeadNodes);
+        }
+
+        // Finally, since the node is now dead, remove it from the graph.
+        DAG.DeleteNode(N);
+
+        // Replace the uses of Use with uses of the updated base value.
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+                                      Result.getValue(isLoad ? 1 : 0),
+                                      &DeadNodes);
+        removeFromWorkList(Op);
+        DAG.DeleteNode(Op);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+  LoadSDNode *LD  = cast<LoadSDNode>(N);
+  SDValue Chain = LD->getChain();
+  SDValue Ptr   = LD->getBasePtr();
+
+  // If load is not volatile and there are no uses of the loaded value (and
+  // the updated indexed value in case of indexed loads), change uses of the
+  // chain value into uses of the chain input (i.e. delete the dead load).
+  if (!LD->isVolatile()) {
+    if (N->getValueType(1) == MVT::Other) {
+      // Unindexed loads.
+      if (N->hasNUsesOfValue(0, 0)) {
+        // It's not safe to use the two value CombineTo variant here. e.g.
+        // v1, chain2 = load chain1, loc
+        // v2, chain3 = load chain2, loc
+        // v3         = add v2, c
+        // Now we replace use of chain2 with chain1.  This makes the second load
+        // isomorphic to the one we are deleting, and thus makes this load live.
+        DEBUG(dbgs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith chain: ";
+              Chain.getNode()->dump(&DAG);
+              dbgs() << "\n");
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+
+        if (N->use_empty()) {
+          removeFromWorkList(N);
+          DAG.DeleteNode(N);
+        }
+
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    } else {
+      // Indexed loads.
+      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+        SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+        DEBUG(dbgs() << "\nReplacing.7 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Undef.getNode()->dump(&DAG);
+              dbgs() << " and 2 other values\n");
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+                                      DAG.getUNDEF(N->getValueType(1)),
+                                      &DeadNodes);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+        removeFromWorkList(N);
+        DAG.DeleteNode(N);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
+  // If this load is directly stored, replace the load value with the stored
+  // value.
+  // TODO: Handle store large -> read small portion.
+  // TODO: Handle TRUNCSTORE/LOADEXT
+  if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+    if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+      if (PrevST->getBasePtr() == Ptr &&
+          PrevST->getValue().getValueType() == N->getValueType(0))
+      return CombineTo(N, Chain.getOperand(1), Chain);
+    }
+  }
+
+  // Try to infer better alignment information than the load already has.
+  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > LD->getAlignment())
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getPointerInfo(),
+                              LD->getMemoryVT(),
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
+    }
+  }
+
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDValue BetterChain = FindBetterChain(N, Chain);
+
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDValue ReplLoad;
+
+      // Replace the chain to void dependency.
+      if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+        ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+                               BetterChain, Ptr, LD->getPointerInfo(),
+                               LD->isVolatile(), LD->isNonTemporal(),
+                               LD->getAlignment());
+      } else {
+        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+                                  LD->getValueType(0),
+                                  BetterChain, Ptr, LD->getPointerInfo(),
+                                  LD->getMemoryVT(),
+                                  LD->isVolatile(),
+                                  LD->isNonTemporal(),
+                                  LD->getAlignment());
+      }
+
+      // Create token factor to keep old chain connected.
+      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                                  MVT::Other, Chain, ReplLoad.getValue(1));
+
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
+      // Replace uses with load result and token factor. Don't add users
+      // to work list.
+      return CombineTo(N, ReplLoad.getValue(0), Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed load.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out.  If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+  std::pair<unsigned, unsigned> Result(0, 0);
+
+  // Check for the structure we're looking for.
+  if (V->getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(V->getOperand(1)) ||
+      !ISD::isNormalLoad(V->getOperand(0).getNode()))
+    return Result;
+
+  // Check the chain and pointer.
+  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+  if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
+
+  // The store should be chained directly to the load or be an operand of a
+  // tokenfactor.
+  if (LD == Chain.getNode())
+    ; // ok.
+  else if (Chain->getOpcode() != ISD::TokenFactor)
+    return Result; // Fail.
+  else {
+    bool isOk = false;
+    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+      if (Chain->getOperand(i).getNode() == LD) {
+        isOk = true;
+        break;
+      }
+    if (!isOk) return Result;
+  }
+
+  // This only handles simple types.
+  if (V.getValueType() != MVT::i16 &&
+      V.getValueType() != MVT::i32 &&
+      V.getValueType() != MVT::i64)
+    return Result;
+
+  // Check the constant mask.  Invert it so that the bits being masked out are
+  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
+  // follow the sign bit for uniformity.
+  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+  unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+  if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
+  unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+  if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
+  if (NotMaskLZ == 64) return Result;  // All zero mask.
+
+  // See if we have a continuous run of bits.  If so, we have 0*1+0*
+  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+    return Result;
+
+  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+  if (V.getValueType() != MVT::i64 && NotMaskLZ)
+    NotMaskLZ -= 64-V.getValueSizeInBits();
+
+  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+  switch (MaskedBytes) {
+  case 1:
+  case 2:
+  case 4: break;
+  default: return Result; // All one mask, or 5-byte mask.
+  }
+
+  // Verify that the first bit starts at a multiple of mask so that the access
+  // is aligned the same as the access width.
+  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+  Result.first = MaskedBytes;
+  Result.second = NotMaskTZ/8;
+  return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo.  If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+                                SDValue IVal, StoreSDNode *St,
+                                DAGCombiner *DC) {
+  unsigned NumBytes = MaskInfo.first;
+  unsigned ByteShift = MaskInfo.second;
+  SelectionDAG &DAG = DC->getDAG();
+
+  // Check to see if IVal is all zeros in the part being masked in by the 'or'
+  // that uses this.  If not, this is not a replacement.
+  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+                                  ByteShift*8, (ByteShift+NumBytes)*8);
+  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+
+  // Check that it is legal on the target to do this.  It is legal if the new
+  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+  // legalization.
+  MVT VT = MVT::getIntegerVT(NumBytes*8);
+  if (!DC->isTypeLegal(VT))
+    return 0;
+
+  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
+  // shifted by ByteShift and truncated down to NumBytes.
+  if (ByteShift)
+    IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8,
+                                    DC->getShiftAmountTy(IVal.getValueType())));
+
+  // Figure out the offset for the store and the alignment of the access.
+  unsigned StOffset;
+  unsigned NewAlign = St->getAlignment();
+
+  if (DAG.getTargetLoweringInfo().isLittleEndian())
+    StOffset = ByteShift;
+  else
+    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+  SDValue Ptr = St->getBasePtr();
+  if (StOffset) {
+    Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    NewAlign = MinAlign(NewAlign, StOffset);
+  }
+
+  // Truncate down to the new size.
+  IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+
+  ++OpsNarrowed;
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+                      St->getPointerInfo().getWithOffset(StOffset),
+                      false, false, NewAlign).getNode();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  if (ST->isVolatile())
+    return SDValue();
+
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  SDValue Ptr   = ST->getBasePtr();
+  EVT VT = Value.getValueType();
+
+  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+    return SDValue();
+
+  unsigned Opc = Value.getOpcode();
+
+  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+  // is a byte mask indicating a consecutive number of bytes, check to see if
+  // Y is known to provide just those bytes.  If so, we try to replace the
+  // load + replace + store sequence with a single (narrower) store, which makes
+  // the load dead.
+  if (Opc == ISD::OR) {
+    std::pair<unsigned, unsigned> MaskedLoad;
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(1), ST,this))
+        return SDValue(NewST, 0);
+
+    // Or is commutative, so try swapping X and Y.
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(0), ST,this))
+        return SDValue(NewST, 0);
+  }
+
+  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+      Value.getOperand(1).getOpcode() != ISD::Constant)
+    return SDValue();
+
+  SDValue N0 = Value.getOperand(0);
+  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+      Chain == SDValue(N0.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(N0);
+    if (LD->getBasePtr() != Ptr ||
+        LD->getPointerInfo().getAddrSpace() !=
+        ST->getPointerInfo().getAddrSpace())
+      return SDValue();
+
+    // Find the type to narrow it the load / op / store to.
+    SDValue N1 = Value.getOperand(1);
+    unsigned BitWidth = N1.getValueSizeInBits();
+    APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+    if (Opc == ISD::AND)
+      Imm ^= APInt::getAllOnesValue(BitWidth);
+    if (Imm == 0 || Imm.isAllOnesValue())
+      return SDValue();
+    unsigned ShAmt = Imm.countTrailingZeros();
+    unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+    unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+    while (NewBW < BitWidth &&
+           !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+             TLI.isNarrowingProfitable(VT, NewVT))) {
+      NewBW = NextPowerOf2(NewBW);
+      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+    }
+    if (NewBW >= BitWidth)
+      return SDValue();
+
+    // If the lsb changed does not start at the type bitwidth boundary,
+    // start at the previous one.
+    if (ShAmt % NewBW)
+      ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+    if ((Imm & Mask) == Imm) {
+      APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+      if (Opc == ISD::AND)
+        NewImm ^= APInt::getAllOnesValue(NewBW);
+      uint64_t PtrOff = ShAmt / 8;
+      // For big endian targets, we need to adjust the offset to the pointer to
+      // load the correct bytes.
+      if (TLI.isBigEndian())
+        PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+      unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+      const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
+        return SDValue();
+
+      SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+                                   Ptr.getValueType(), Ptr,
+                                   DAG.getConstant(PtrOff, Ptr.getValueType()));
+      SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+                                  LD->getChain(), NewPtr,
+                                  LD->getPointerInfo().getWithOffset(PtrOff),
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  NewAlign);
+      SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+                                   DAG.getConstant(NewImm, NewVT));
+      SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+                                   NewVal, NewPtr,
+                                   ST->getPointerInfo().getWithOffset(PtrOff),
+                                   false, false, NewAlign);
+
+      AddToWorkList(NewPtr.getNode());
+      AddToWorkList(NewLD.getNode());
+      AddToWorkList(NewVal.getNode());
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+                                    &DeadNodes);
+      ++OpsNarrowed;
+      return NewST;
+    }
+  }
+
+  return SDValue();
+}
+
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+      Value.hasOneUse() &&
+      Chain == SDValue(Value.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(Value);
+    EVT VT = LD->getMemoryVT();
+    if (!VT.isFloatingPoint() ||
+        VT != ST->getMemoryVT() ||
+        LD->isNonTemporal() ||
+        ST->isNonTemporal() ||
+        LD->getPointerInfo().getAddrSpace() != 0 ||
+        ST->getPointerInfo().getAddrSpace() != 0)
+      return SDValue();
+
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+        !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+      return SDValue();
+
+    unsigned LDAlign = LD->getAlignment();
+    unsigned STAlign = ST->getAlignment();
+    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    if (LDAlign < ABIAlign || STAlign < ABIAlign)
+      return SDValue();
+
+    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+                                LD->getChain(), LD->getBasePtr(),
+                                LD->getPointerInfo(),
+                                false, false, LDAlign);
+
+    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+                                 NewLD, ST->getBasePtr(),
+                                 ST->getPointerInfo(),
+                                 false, false, STAlign);
+
+    AddToWorkList(NewLD.getNode());
+    AddToWorkList(NewST.getNode());
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+                                  &DeadNodes);
+    ++LdStFP2Int;
+    return NewST;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  SDValue Ptr   = ST->getBasePtr();
+
+  // If this is a store of a bit convert, store the input value if the
+  // resultant store does not need a higher alignment than the original.
+  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+      ST->isUnindexed()) {
+    unsigned OrigAlign = ST->getAlignment();
+    EVT SVT = Value.getOperand(0).getValueType();
+    unsigned Align = TLI.getTargetData()->
+      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+    if (Align <= OrigAlign &&
+        ((!LegalOperations && !ST->isVolatile()) ||
+         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+      return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+                          Ptr, ST->getPointerInfo(), ST->isVolatile(),
+                          ST->isNonTemporal(), OrigAlign);
+  }
+
+  // Turn 'store undef, Ptr' -> nothing.
+  if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+    return Chain;
+
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+    // NOTE: If the original store is volatile, this transform must not increase
+    // the number of stores.  For example, on x86-32 an f64 can be stored in one
+    // processor operation but an i64 (which is not legal) requires two.  So the
+    // transform should not be done in this case.
+    if (Value.getOpcode() != ISD::TargetConstantFP) {
+      SDValue Tmp;
+      switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unknown FP type");
+      case MVT::f80:    // We don't do this for these yet.
+      case MVT::f128:
+      case MVT::ppcf128:
+        break;
+      case MVT::f32:
+        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+                              bitcastToAPInt().getZExtValue(), MVT::i32);
+          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
+                              ST->isNonTemporal(), ST->getAlignment());
+        }
+        break;
+      case MVT::f64:
+        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+             !ST->isVolatile()) ||
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+          Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                getZExtValue(), MVT::i64);
+          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
+                              ST->isNonTemporal(), ST->getAlignment());
+        }
+
+        if (!ST->isVolatile() &&
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          // Many FP stores are not made apparent until after legalize, e.g. for
+          // argument passing.  Since this is so common, custom legalize the
+          // 64-bit integer store into two 32-bit stores.
+          uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+          SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+          if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+          unsigned Alignment = ST->getAlignment();
+          bool isVolatile = ST->isVolatile();
+          bool isNonTemporal = ST->isNonTemporal();
+
+          SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+                                     Ptr, ST->getPointerInfo(),
+                                     isVolatile, isNonTemporal,
+                                     ST->getAlignment());
+          Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, Ptr.getValueType()));
+          Alignment = MinAlign(Alignment, 4U);
+          SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+                                     Ptr, ST->getPointerInfo().getWithOffset(4),
+                                     isVolatile, isNonTemporal,
+                                     Alignment);
+          return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+                             St0, St1);
+        }
+
+        break;
+      }
+    }
+  }
+
+  // Try to infer better alignment information than the store already has.
+  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > ST->getAlignment())
+        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+                                 Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
+    }
+  }
+
+  // Try transforming a pair floating point load / store ops to integer
+  // load / store ops.
+  SDValue NewST = TransformFPLoadStorePair(N);
+  if (NewST.getNode())
+    return NewST;
+
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDValue BetterChain = FindBetterChain(N, Chain);
+
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDValue ReplStore;
+
+      // Replace the chain to avoid dependency.
+      if (ST->isTruncatingStore()) {
+        ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+                                      ST->getPointerInfo(),
+                                      ST->getMemoryVT(), ST->isVolatile(),
+                                      ST->isNonTemporal(), ST->getAlignment());
+      } else {
+        ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+                                 ST->getPointerInfo(),
+                                 ST->isVolatile(), ST->isNonTemporal(),
+                                 ST->getAlignment());
+      }
+
+      // Create token to keep both nodes around.
+      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                                  MVT::Other, Chain, ReplStore);
+
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
+      // Don't add users to work list.
+      return CombineTo(N, Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed store.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
+  // FIXME: is there such a thing as a truncating indexed store?
+  if (ST->isTruncatingStore() && ST->isUnindexed() &&
+      Value.getValueType().isInteger()) {
+    // See if we can simplify the input to this truncstore with knowledge that
+    // only the low bits are being used.  For example:
+    // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
+    SDValue Shorter =
+      GetDemandedBits(Value,
+                      APInt::getLowBitsSet(
+                        Value.getValueType().getScalarType().getSizeInBits(),
+                        ST->getMemoryVT().getScalarType().getSizeInBits()));
+    AddToWorkList(Value.getNode());
+    if (Shorter.getNode())
+      return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+                               Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+                               ST->isVolatile(), ST->isNonTemporal(),
+                               ST->getAlignment());
+
+    // Otherwise, see if we can simplify the operation with
+    // SimplifyDemandedBits, which only works if the value has a single use.
+    if (SimplifyDemandedBits(Value,
+                        APInt::getLowBitsSet(
+                          Value.getValueType().getScalarType().getSizeInBits(),
+                          ST->getMemoryVT().getScalarType().getSizeInBits())))
+      return SDValue(N, 0);
+  }
+
+  // If this is a load followed by a store to the same location, then the store
+  // is dead/noop.
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+    if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+        ST->isUnindexed() && !ST->isVolatile() &&
+        // There can't be any side effects between the load and store, such as
+        // a call or store.
+        Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+      // The store is dead, remove it.
+      return Chain;
+    }
+  }
+
+  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+  // truncating store.  We can do this even if this is already a truncstore.
+  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+      && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+      TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+                            ST->getMemoryVT())) {
+    return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+                             Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+                             ST->isVolatile(), ST->isNonTemporal(),
+                             ST->getAlignment());
+  }
+
+  return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+  SDValue InVec = N->getOperand(0);
+  SDValue InVal = N->getOperand(1);
+  SDValue EltNo = N->getOperand(2);
+
+  // If the inserted element is an UNDEF, just use the input vector.
+  if (InVal.getOpcode() == ISD::UNDEF)
+    return InVec;
+
+  EVT VT = InVec.getValueType();
+
+  // If we can't generate a legal BUILD_VECTOR, exit
+  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+    return SDValue();
+
+  // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+  // vector with the inserted element.
+  if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
+                                InVec.getNode()->op_end());
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                       VT, &Ops[0], Ops.size());
+  }
+  // If the invec is an UNDEF and if EltNo is a constant, create a new
+  // BUILD_VECTOR with undef elements and the inserted element.
+  if (InVec.getOpcode() == ISD::UNDEF &&
+      isa<ConstantSDNode>(EltNo)) {
+    EVT EltVT = VT.getVectorElementType();
+    unsigned NElts = VT.getVectorNumElements();
+    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
+
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                       VT, &Ops[0], Ops.size());
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+  // (vextract (scalar_to_vector val, 0) -> val
+  SDValue InVec = N->getOperand(0);
+
+  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    // Check if the result type doesn't match the inserted element type. A
+    // SCALAR_TO_VECTOR may truncate the inserted element and the
+    // EXTRACT_VECTOR_ELT may widen the extracted vector.
+    SDValue InOp = InVec.getOperand(0);
+    EVT NVT = N->getValueType(0);
+    if (InOp.getValueType() != NVT) {
+      assert(InOp.getValueType().isInteger() && NVT.isInteger());
+      return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+    }
+    return InOp;
+  }
+
+  // Perform only after legalization to ensure build_vector / vector_shuffle
+  // optimizations have already been done.
+  if (!LegalOperations) return SDValue();
+
+  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+  SDValue EltNo = N->getOperand(1);
+
+  if (isa<ConstantSDNode>(EltNo)) {
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    bool NewLoad = false;
+    bool BCNumEltsChanged = false;
+    EVT VT = InVec.getValueType();
+    EVT ExtVT = VT.getVectorElementType();
+    EVT LVT = ExtVT;
+
+    if (InVec.getOpcode() == ISD::BITCAST) {
+      EVT BCVT = InVec.getOperand(0).getValueType();
+      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+        return SDValue();
+      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+        BCNumEltsChanged = true;
+      InVec = InVec.getOperand(0);
+      ExtVT = BCVT.getVectorElementType();
+      NewLoad = true;
+    }
+
+    LoadSDNode *LN0 = NULL;
+    const ShuffleVectorSDNode *SVN = NULL;
+    if (ISD::isNormalLoad(InVec.getNode())) {
+      LN0 = cast<LoadSDNode>(InVec);
+    } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+               InVec.getOperand(0).getValueType() == ExtVT &&
+               ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+      LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+    } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+      // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+      // =>
+      // (load $addr+1*size)
+
+      // If the bit convert changed the number of elements, it is unsafe
+      // to examine the mask.
+      if (BCNumEltsChanged)
+        return SDValue();
+
+      // Select the input vector, guarding against out of range extract vector.
+      unsigned NumElems = VT.getVectorNumElements();
+      int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
+      InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+      if (InVec.getOpcode() == ISD::BITCAST)
+        InVec = InVec.getOperand(0);
+      if (ISD::isNormalLoad(InVec.getNode())) {
+        LN0 = cast<LoadSDNode>(InVec);
+        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+      }
+    }
+
+    if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+      return SDValue();
+
+    // If Idx was -1 above, Elt is going to be -1, so just return undef.
+    if (Elt == -1)
+      return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
+    unsigned Align = LN0->getAlignment();
+    if (NewLoad) {
+      // Check the resultant load doesn't need a higher alignment than the
+      // original load.
+      unsigned NewAlign =
+        TLI.getTargetData()
+            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+        return SDValue();
+
+      Align = NewAlign;
+    }
+
+    SDValue NewPtr = LN0->getBasePtr();
+    unsigned PtrOff = 0;
+
+    if (Elt) {
+      PtrOff = LVT.getSizeInBits() * Elt / 8;
+      EVT PtrType = NewPtr.getValueType();
+      if (TLI.isBigEndian())
+        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+      NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+                           DAG.getConstant(PtrOff, PtrType));
+    }
+
+    return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+                       LN0->getPointerInfo().getWithOffset(PtrOff),
+                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  unsigned NumInScalars = N->getNumOperands();
+  EVT VT = N->getValueType(0);
+
+  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+  // at most two distinct vectors, turn this into a shuffle node.
+  SDValue VecIn1, VecIn2;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    // Ignore undef inputs.
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    // If this input is something other than a EXTRACT_VECTOR_ELT with a
+    // constant index, bail out.
+    if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+
+    // If the input vector type disagrees with the result of the build_vector,
+    // we can't make a shuffle.
+    SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+    if (ExtractedFromVec.getValueType() != VT) {
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+
+    // Otherwise, remember this.  We allow up to two distinct input vectors.
+    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+      continue;
+
+    if (VecIn1.getNode() == 0) {
+      VecIn1 = ExtractedFromVec;
+    } else if (VecIn2.getNode() == 0) {
+      VecIn2 = ExtractedFromVec;
+    } else {
+      // Too many inputs.
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+  }
+
+  // If everything is good, we can make a shuffle operation.
+  if (VecIn1.getNode()) {
+    SmallVector<int, 8> Mask;
+    for (unsigned i = 0; i != NumInScalars; ++i) {
+      if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+        Mask.push_back(-1);
+        continue;
+      }
+
+      // If extracting from the first vector, just use the index directly.
+      SDValue Extract = N->getOperand(i);
+      SDValue ExtVal = Extract.getOperand(1);
+      if (Extract.getOperand(0) == VecIn1) {
+        unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+        if (ExtIndex > VT.getVectorNumElements())
+          return SDValue();
+
+        Mask.push_back(ExtIndex);
+        continue;
+      }
+
+      // Otherwise, use InIdx + VecSize
+      unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+      Mask.push_back(Idx+NumInScalars);
+    }
+
+    // Add count and size info.
+    if (!isTypeLegal(VT))
+      return SDValue();
+
+    // Return the new VECTOR_SHUFFLE node.
+    SDValue Ops[2];
+    Ops[0] = VecIn1;
+    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+  // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
+  // inputs come from at most two distinct vectors, turn this into a shuffle
+  // node.
+
+  // If we only have one input vector, we don't need to do any concatenation.
+  if (N->getNumOperands() == 1)
+    return N->getOperand(0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SDValue N0 = N->getOperand(0);
+
+  assert(N0.getValueType().getVectorNumElements() == NumElts &&
+        "Vector shuffle must be normalized in DAG");
+
+  // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+
+  // If it is a splat, check if the argument vector is another splat or a
+  // build_vector with all scalar elements the same.
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+    SDNode *V = N0.getNode();
+
+    // If this is a bit convert that changes the element type of the vector but
+    // not the number of vector elements, look through it.  Be careful not to
+    // look though conversions that change things like v4f32 to v2f64.
+    if (V->getOpcode() == ISD::BITCAST) {
+      SDValue ConvInput = V->getOperand(0);
+      if (ConvInput.getValueType().isVector() &&
+          ConvInput.getValueType().getVectorNumElements() == NumElts)
+        V = ConvInput.getNode();
+    }
+
+    if (V->getOpcode() == ISD::BUILD_VECTOR) {
+      assert(V->getNumOperands() == NumElts &&
+             "BUILD_VECTOR has wrong number of operands");
+      SDValue Base;
+      bool AllSame = true;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+          Base = V->getOperand(i);
+          break;
+        }
+      }
+      // Splat of <u, u, u, u>, return <u, u, u, u>
+      if (!Base.getNode())
+        return N0;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i) != Base) {
+          AllSame = false;
+          break;
+        }
+      }
+      // Splat of <x, x, x, x>, return <x, x, x, x>
+      if (AllSame)
+        return N0;
+    }
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
+  if (!TLI.getShouldFoldAtomicFences())
+    return SDValue();
+
+  SDValue atomic = N->getOperand(0);
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      break;
+    default:
+      return SDValue();
+  }
+
+  SDValue fence = atomic.getOperand(0);
+  if (fence.getOpcode() != ISD::MEMBARRIER)
+    return SDValue();
+
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+                                    fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2),
+                                    atomic.getOperand(3)), atomic.getResNo());
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+                                    fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2)),
+                     atomic.getResNo());
+    default:
+      return SDValue();
+  }
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+///      vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  if (N->getOpcode() == ISD::AND) {
+    if (RHS.getOpcode() == ISD::BITCAST)
+      RHS = RHS.getOperand(0);
+    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<int, 8> Indices;
+      unsigned NumElts = RHS.getNumOperands();
+      for (unsigned i = 0; i != NumElts; ++i) {
+        SDValue Elt = RHS.getOperand(i);
+        if (!isa<ConstantSDNode>(Elt))
+          return SDValue();
+        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+          Indices.push_back(i);
+        else if (cast<ConstantSDNode>(Elt)->isNullValue())
+          Indices.push_back(NumElts);
+        else
+          return SDValue();
+      }
+
+      // Let's see if the target supports this vector_shuffle.
+      EVT RVT = RHS.getValueType();
+      if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+        return SDValue();
+
+      // Return the new VECTOR_SHUFFLE node.
+      EVT EltVT = RVT.getVectorElementType();
+      SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+                                     DAG.getConstant(0, EltVT));
+      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                                 RVT, &ZeroOps[0], ZeroOps.size());
+      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+      SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+  // After legalize, the target may be depending on adds and other
+  // binary ops to provide legal ways to construct constants or other
+  // things. Simplifying them may result in a loss of legality.
+  if (LegalOperations) return SDValue();
+
+  assert(N->getValueType(0).isVector() &&
+         "SimplifyVBinOp only works on vectors!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue Shuffle = XformToShuffleWithZero(N);
+  if (Shuffle.getNode()) return Shuffle;
+
+  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+  // this operation.
+  if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+      RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+      SDValue LHSOp = LHS.getOperand(i);
+      SDValue RHSOp = RHS.getOperand(i);
+      // If these two elements can't be folded, bail out.
+      if ((LHSOp.getOpcode() != ISD::UNDEF &&
+           LHSOp.getOpcode() != ISD::Constant &&
+           LHSOp.getOpcode() != ISD::ConstantFP) ||
+          (RHSOp.getOpcode() != ISD::UNDEF &&
+           RHSOp.getOpcode() != ISD::Constant &&
+           RHSOp.getOpcode() != ISD::ConstantFP))
+        break;
+
+      // Can't fold divide by zero.
+      if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+          N->getOpcode() == ISD::FDIV) {
+        if ((RHSOp.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+            (RHSOp.getOpcode() == ISD::ConstantFP &&
+             cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+          break;
+      }
+
+      EVT VT = LHSOp.getValueType();
+      assert(RHSOp.getValueType() == VT &&
+             "SimplifyVBinOp with different BUILD_VECTOR element types");
+      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
+                                   LHSOp, RHSOp);
+      if (FoldOp.getOpcode() != ISD::UNDEF &&
+          FoldOp.getOpcode() != ISD::Constant &&
+          FoldOp.getOpcode() != ISD::ConstantFP)
+        break;
+      Ops.push_back(FoldOp);
+      AddToWorkList(FoldOp.getNode());
+    }
+
+    if (Ops.size() == LHS.getNumOperands())
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                         LHS.getValueType(), &Ops[0], Ops.size());
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+                                    SDValue N1, SDValue N2){
+  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+  // If we got a simplified select_cc node back from SimplifySelectCC, then
+  // break it down into a new SETCC node, and a new SELECT node, and then return
+  // the SELECT node, since we were called with a SELECT node.
+  if (SCC.getNode()) {
+    // Check to see if we got a select_cc back (to turn into setcc/select).
+    // Otherwise, just return whatever node we got back, like fabs.
+    if (SCC.getOpcode() == ISD::SELECT_CC) {
+      SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+                                  N0.getValueType(),
+                                  SCC.getOperand(0), SCC.getOperand(1),
+                                  SCC.getOperand(4));
+      AddToWorkList(SETCC.getNode());
+      return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+                         SCC.getOperand(2), SCC.getOperand(3), SETCC);
+    }
+
+    return SCC;
+  }
+  return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select.  Callers of this should assume that TheSelect is deleted if this
+/// returns true.  As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+                                    SDValue RHS) {
+
+  // Cannot simplify select with vector condition
+  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+  // If this is a select from two identical things, try to pull the operation
+  // through the select.
+  if (LHS.getOpcode() != RHS.getOpcode() ||
+      !LHS.hasOneUse() || !RHS.hasOneUse())
+    return false;
+
+  // If this is a load and the token chain is identical, replace the select
+  // of two loads with a load through a select of the address to load from.
+  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+  // constants have been dropped into the constant pool.
+  if (LHS.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+    LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+    // Token chains must be identical.
+    if (LHS.getOperand(0) != RHS.getOperand(0) ||
+        // Do not let this transformation reduce the number of volatile loads.
+        LLD->isVolatile() || RLD->isVolatile() ||
+        // If this is an EXTLOAD, the VT's must match.
+        LLD->getMemoryVT() != RLD->getMemoryVT() ||
+        // If this is an EXTLOAD, the kind of extension must match.
+        (LLD->getExtensionType() != RLD->getExtensionType() &&
+         // The only exception is if one of the extensions is anyext.
+         LLD->getExtensionType() != ISD::EXTLOAD &&
+         RLD->getExtensionType() != ISD::EXTLOAD) ||
+        // FIXME: this discards src value information.  This is
+        // over-conservative. It would be beneficial to be able to remember
+        // both potential memory locations.  Since we are discarding
+        // src value info, don't do the transformation if the memory
+        // locations are not in the default address space.
+        LLD->getPointerInfo().getAddrSpace() != 0 ||
+        RLD->getPointerInfo().getAddrSpace() != 0)
+      return false;
+
+    // Check that the select condition doesn't reach either load.  If so,
+    // folding this will induce a cycle into the DAG.  If not, this is safe to
+    // xform, so create a select of the addresses.
+    SDValue Addr;
+    if (TheSelect->getOpcode() == ISD::SELECT) {
+      SDNode *CondNode = TheSelect->getOperand(0).getNode();
+      if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+          (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+        return false;
+      Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0), LLD->getBasePtr(),
+                         RLD->getBasePtr());
+    } else {  // Otherwise SELECT_CC
+      SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+      SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+      if ((LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+          (LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+        return false;
+
+      Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0),
+                         TheSelect->getOperand(1),
+                         LLD->getBasePtr(), RLD->getBasePtr(),
+                         TheSelect->getOperand(4));
+    }
+
+    SDValue Load;
+    if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+      Load = DAG.getLoad(TheSelect->getValueType(0),
+                         TheSelect->getDebugLoc(),
+                         // FIXME: Discards pointer info.
+                         LLD->getChain(), Addr, MachinePointerInfo(),
+                         LLD->isVolatile(), LLD->isNonTemporal(),
+                         LLD->getAlignment());
+    } else {
+      Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+                            RLD->getExtensionType() : LLD->getExtensionType(),
+                            TheSelect->getDebugLoc(),
+                            TheSelect->getValueType(0),
+                            // FIXME: Discards pointer info.
+                            LLD->getChain(), Addr, MachinePointerInfo(),
+                            LLD->getMemoryVT(), LLD->isVolatile(),
+                            LLD->isNonTemporal(), LLD->getAlignment());
+    }
+
+    // Users of the select now use the result of the load.
+    CombineTo(TheSelect, Load);
+
+    // Users of the old loads now use the new load's chain.  We know the
+    // old-load value is dead now.
+    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+    return true;
+  }
+
+  return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+                                      SDValue N2, SDValue N3,
+                                      ISD::CondCode CC, bool NotExtCompare) {
+  // (x ? y : y) -> y.
+  if (N2 == N3) return N2;
+
+  EVT VT = N2.getValueType();
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+  // Determine if the condition we're dealing with is constant
+  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+                              N0, N1, CC, DL, false);
+  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+  // fold select_cc true, x, y -> x
+  if (SCCC && !SCCC->isNullValue())
+    return N2;
+  // fold select_cc false, x, y -> y
+  if (SCCC && SCCC->isNullValue())
+    return N3;
+
+  // Check to see if we can simplify the select into an fabs node
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+    // Allow either -0.0 or 0.0
+    if (CFP->getValueAPF().isZero()) {
+      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+      if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+          N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+          N2 == N3.getOperand(0))
+        return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+      if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+          N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+          N2.getOperand(0) == N3)
+        return DAG.getNode(ISD::FABS, DL, VT, N3);
+    }
+  }
+
+  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+  // in it.  This is a win when the constant is not otherwise available because
+  // it replaces two constant pool loads with one.  We only do this if the FP
+  // type is known to be legal, because if it isn't, then we are before legalize
+  // types an we want the other legalization to happen first (e.g. to avoid
+  // messing with soft float) and if the ConstantFP is not legal, because if
+  // it is legal, we may not need to store the FP constant in a constant pool.
+  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+    if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+      if (TLI.isTypeLegal(N2.getValueType()) &&
+          (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+           TargetLowering::Legal) &&
+          // If both constants have multiple uses, then we won't need to do an
+          // extra load, they are likely around in registers for other users.
+          (TV->hasOneUse() || FV->hasOneUse())) {
+        Constant *Elts[] = {
+          const_cast<ConstantFP*>(FV->getConstantFPValue()),
+          const_cast<ConstantFP*>(TV->getConstantFPValue())
+        };
+        const Type *FPTy = Elts[0]->getType();
+        const TargetData &TD = *TLI.getTargetData();
+
+        // Create a ConstantArray of the two constants.
+        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+                                            TD.getPrefTypeAlignment(FPTy));
+        unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+        // Get the offsets to the 0 and 1 element of the array so that we can
+        // select between them.
+        SDValue Zero = DAG.getIntPtrConstant(0);
+        unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+        SDValue One = DAG.getIntPtrConstant(EltSize);
+
+        SDValue Cond = DAG.getSetCC(DL,
+                                    TLI.getSetCCResultType(N0.getValueType()),
+                                    N0, N1, CC);
+        SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+                                        Cond, One, Zero);
+        CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+                            CstOffset);
+        return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+                           MachinePointerInfo::getConstantPool(), false,
+                           false, Alignment);
+
+      }
+    }
+
+  // Check to see if we can perform the "gzip trick", transforming
+  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+      N0.getValueType().isInteger() &&
+      N2.getValueType().isInteger() &&
+      (N1C->isNullValue() ||                         // (a < 0) ? b : 0
+       (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+    EVT XType = N0.getValueType();
+    EVT AType = N2.getValueType();
+    if (XType.bitsGE(AType)) {
+      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+      // single-bit constant.
+      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+        unsigned ShCtV = N2C->getAPIntValue().logBase2();
+        ShCtV = XType.getSizeInBits()-ShCtV-1;
+        SDValue ShCt = DAG.getConstant(ShCtV,
+                                       getShiftAmountTy(N0.getValueType()));
+        SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+                                    XType, N0, ShCt);
+        AddToWorkList(Shift.getNode());
+
+        if (XType.bitsGT(AType)) {
+          Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+          AddToWorkList(Shift.getNode());
+        }
+
+        return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+      }
+
+      SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+                                  XType, N0,
+                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(N0.getValueType())));
+      AddToWorkList(Shift.getNode());
+
+      if (XType.bitsGT(AType)) {
+        Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+        AddToWorkList(Shift.getNode());
+      }
+
+      return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+    }
+  }
+
+  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+  // where y is has a single bit set.
+  // A plaintext description would be, we can turn the SELECT_CC into an AND
+  // when the condition can be materialized as an all-ones register.  Any
+  // single bit-test can be materialized as an all-ones register with
+  // shift-left and shift-right-arith.
+  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+      N0->getValueType(0) == VT &&
+      N1C && N1C->isNullValue() &&
+      N2C && N2C->isNullValue()) {
+    SDValue AndLHS = N0->getOperand(0);
+    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+      // Shift the tested bit over the sign bit.
+      APInt AndMask = ConstAndRHS->getAPIntValue();
+      SDValue ShlAmt =
+        DAG.getConstant(AndMask.countLeadingZeros(),
+                        getShiftAmountTy(AndLHS.getValueType()));
+      SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+      // Now arithmetic right shift it all the way over, so the result is either
+      // all-ones, or zero.
+      SDValue ShrAmt =
+        DAG.getConstant(AndMask.getBitWidth()-1,
+                        getShiftAmountTy(Shl.getValueType()));
+      SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+      return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+    }
+  }
+
+  // fold select C, 16, 0 -> shl C, 4
+  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+      TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+
+    // If the caller doesn't want us to simplify this into a zext of a compare,
+    // don't do it.
+    if (NotExtCompare && N2C->getAPIntValue() == 1)
+      return SDValue();
+
+    // Get a SetCC of the condition
+    // FIXME: Should probably make sure that setcc is legal if we ever have a
+    // target where it isn't.
+    SDValue Temp, SCC;
+    // cast from setcc result type to select result type
+    if (LegalTypes) {
+      SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+                          N0, N1, CC);
+      if (N2.getValueType().bitsLT(SCC.getValueType()))
+        Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                           N2.getValueType(), SCC);
+    } else {
+      SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                         N2.getValueType(), SCC);
+    }
+
+    AddToWorkList(SCC.getNode());
+    AddToWorkList(Temp.getNode());
+
+    if (N2C->getAPIntValue() == 1)
+      return Temp;
+
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy(Temp.getValueType())));
+  }
+
+  // Check to see if this is the equivalent of setcc
+  // FIXME: Turn all of these into setcc if setcc if setcc is legal
+  // otherwise, go ahead with the folds.
+  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+    EVT XType = N0.getValueType();
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+      SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+      if (Res.getValueType() != VT)
+        Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+      return Res;
+    }
+
+    // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(ISD::CTLZ, XType))) {
+      SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+      return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+                         DAG.getConstant(Log2_32(XType.getSizeInBits()),
+                                       getShiftAmountTy(Ctlz.getValueType())));
+    }
+    // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+      SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+                                  XType, DAG.getConstant(0, XType), N0);
+      SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+      return DAG.getNode(ISD::SRL, DL, XType,
+                         DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+                         DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(XType)));
+    }
+    // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+      SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+                                 DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(N0.getValueType())));
+      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+    }
+  }
+
+  // Check to see if this is an integer abs.
+  // select_cc setg[te] X,  0,  X, -X ->
+  // select_cc setgt    X, -1,  X, -X ->
+  // select_cc setl[te] X,  0, -X,  X ->
+  // select_cc setlt    X,  1, -X,  X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C) {
+    ConstantSDNode *SubC = NULL;
+    if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+         (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+        N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+      SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+    else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+              (N1C->isOne() && CC == ISD::SETLT)) &&
+             N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+      SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
+    EVT XType = N0.getValueType();
+    if (SubC && SubC->isNullValue() && XType.isInteger()) {
+      SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+                                  N0,
+                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(N0.getValueType())));
+      SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+                                XType, N0, Shift);
+      AddToWorkList(Shift.getNode());
+      AddToWorkList(Add.getNode());
+      return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
+                                   SDValue N1, ISD::CondCode Cond,
+                                   DebugLoc DL, bool foldBooleans) {
+  TargetLowering::DAGCombinerInfo
+    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself.  Provides base object and offset as
+// results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+                           const GlobalValue *&GV, void *&CV) {
+  // Assume it is a primitive operation.
+  Base = Ptr; Offset = 0; GV = 0; CV = 0;
+
+  // If it's an adding a simple constant then integrate the offset.
+  if (Base.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+      Base = Base.getOperand(0);
+      Offset += C->getZExtValue();
+    }
+  }
+
+  // Return the underlying GlobalValue, and update the Offset.  Return false
+  // for GlobalAddressSDNode since the same GlobalAddress may be represented
+  // by multiple nodes with different offsets.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+    GV = G->getGlobal();
+    Offset += G->getOffset();
+    return false;
+  }
+
+  // Return the underlying Constant value, and update the Offset.  Return false
+  // for ConstantSDNodes since the same constant pool entry may be represented
+  // by multiple nodes with different offsets.
+  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+                                         : (void *)C->getConstVal();
+    Offset += C->getOffset();
+    return false;
+  }
+  // If it's any of the following then it can't alias with anything but itself.
+  return isa<FrameIndexSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+                          const Value *SrcValue1, int SrcValueOffset1,
+                          unsigned SrcValueAlign1,
+                          const MDNode *TBAAInfo1,
+                          SDValue Ptr2, int64_t Size2,
+                          const Value *SrcValue2, int SrcValueOffset2,
+                          unsigned SrcValueAlign2,
+                          const MDNode *TBAAInfo2) const {
+  // If they are the same then they must be aliases.
+  if (Ptr1 == Ptr2) return true;
+
+  // Gather base node and offset information.
+  SDValue Base1, Base2;
+  int64_t Offset1, Offset2;
+  const GlobalValue *GV1, *GV2;
+  void *CV1, *CV2;
+  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+
+  // If they have a same base address then check to see if they overlap.
+  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+  // It is possible for different frame indices to alias each other, mostly
+  // when tail call optimization reuses return address slots for arguments.
+  // To catch this case, look up the actual index of frame indices to compute
+  // the real alias relationship.
+  if (isFrameIndex1 && isFrameIndex2) {
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+  }
+
+  // Otherwise, if we know what the bases are, and they aren't identical, then
+  // we know they cannot alias.
+  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+    return false;
+
+  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+  // compared to the size and offset of the access, we may be able to prove they
+  // do not alias.  This check is conservative for now to catch cases created by
+  // splitting vector types.
+  if ((SrcValueAlign1 == SrcValueAlign2) &&
+      (SrcValueOffset1 != SrcValueOffset2) &&
+      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+
+    // There is no overlap between these relatively aligned accesses of similar
+    // size, return no alias.
+    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+      return false;
+  }
+
+  if (CombinerGlobalAA) {
+    // Use alias analysis information.
+    int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+    int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+    int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+    AliasAnalysis::AliasResult AAResult =
+      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+               AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+    if (AAResult == AliasAnalysis::NoAlias)
+      return false;
+  }
+
+  // Otherwise we have to assume they alias.
+  return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node.  Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+                        SDValue &Ptr, int64_t &Size,
+                        const Value *&SrcValue,
+                        int &SrcValueOffset,
+                        unsigned &SrcValueAlign,
+                        const MDNode *&TBAAInfo) const {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    Size = LD->getMemoryVT().getSizeInBits() >> 3;
+    SrcValue = LD->getSrcValue();
+    SrcValueOffset = LD->getSrcValueOffset();
+    SrcValueAlign = LD->getOriginalAlignment();
+    TBAAInfo = LD->getTBAAInfo();
+    return true;
+  }
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    Size = ST->getMemoryVT().getSizeInBits() >> 3;
+    SrcValue = ST->getSrcValue();
+    SrcValueOffset = ST->getSrcValueOffset();
+    SrcValueAlign = ST->getOriginalAlignment();
+    TBAAInfo = ST->getTBAAInfo();
+    return false;
+  }
+  llvm_unreachable("FindAliasInfo expected a memory operand");
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+                                   SmallVector<SDValue, 8> &Aliases) {
+  SmallVector<SDValue, 8> Chains;     // List of chains to visit.
+  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
+
+  // Get alias information for node.
+  SDValue Ptr;
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  unsigned SrcValueAlign;
+  const MDNode *SrcTBAAInfo;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+                              SrcValueAlign, SrcTBAAInfo);
+
+  // Starting off.
+  Chains.push_back(OriginalChain);
+  unsigned Depth = 0;
+
+  // Look at each chain and determine if it is an alias.  If so, add it to the
+  // aliases list.  If not, then continue up the chain looking for the next
+  // candidate.
+  while (!Chains.empty()) {
+    SDValue Chain = Chains.back();
+    Chains.pop_back();
+
+    // For TokenFactor nodes, look at each operand and only continue up the
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll
+    // find more and revert to original chain since the xform is unlikely to be
+    // profitable.
+    //
+    // FIXME: The depth check could be made to return the last non-aliasing
+    // chain we found before we hit a tokenfactor rather than the original
+    // chain.
+    if (Depth > 6 || Aliases.size() == 2) {
+      Aliases.clear();
+      Aliases.push_back(OriginalChain);
+      break;
+    }
+
+    // Don't bother if we've been before.
+    if (!Visited.insert(Chain.getNode()))
+      continue;
+
+    switch (Chain.getOpcode()) {
+    case ISD::EntryToken:
+      // Entry token is ideal chain operand, but handled in FindBetterChain.
+      break;
+
+    case ISD::LOAD:
+    case ISD::STORE: {
+      // Get alias information for Chain.
+      SDValue OpPtr;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      unsigned OpSrcValueAlign;
+      const MDNode *OpSrcTBAAInfo;
+      bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+                                    OpSrcValue, OpSrcValueOffset,
+                                    OpSrcValueAlign,
+                                    OpSrcTBAAInfo);
+
+      // If chain is alias then stop here.
+      if (!(IsLoad && IsOpLoad) &&
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  SrcTBAAInfo,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+                  OpSrcValueAlign, OpSrcTBAAInfo)) {
+        Aliases.push_back(Chain);
+      } else {
+        // Look further up the chain.
+        Chains.push_back(Chain.getOperand(0));
+        ++Depth;
+      }
+      break;
+    }
+
+    case ISD::TokenFactor:
+      // We have to check each of the operands of the token factor for "small"
+      // token factors, so we queue them up.  Adding the operands to the queue
+      // (stack) in reverse order maintains the original order and increases the
+      // likelihood that getNode will find a matching token factor (CSE.)
+      if (Chain.getNumOperands() > 16) {
+        Aliases.push_back(Chain);
+        break;
+      }
+      for (unsigned n = Chain.getNumOperands(); n;)
+        Chains.push_back(Chain.getOperand(--n));
+      ++Depth;
+      break;
+
+    default:
+      // For all other instructions we will just have to take what we can get.
+      Aliases.push_back(Chain);
+      break;
+    }
+  }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+  SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
+
+  // Accumulate all the aliases to this node.
+  GatherAllAliases(N, OldChain, Aliases);
+
+  // If no operands then chain to entry token.
+  if (Aliases.size() == 0)
+    return DAG.getEntryNode();
+
+  // If a single operand then chain to it.  We don't need to revisit it.
+  if (Aliases.size() == 1)
+    return Aliases[0];
+
+  // Construct a custom tailored token factor.
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+                     &Aliases[0], Aliases.size());
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+                           CodeGenOpt::Level OptLevel) {
+  /// run - This is the main entry point to this class.
+  ///
+  DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 000000000000..54a7d43f46d6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1373 @@
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported.  It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time.  For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support.  In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated.  Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time.  Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators.  More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+  LocalValueMap.clear();
+
+  // Start out as null, meaining no local-value instructions have
+  // been emitted.
+  LastLocalValue = 0;
+
+  // Advance the last local value past any EH_LABEL instructions.
+  MachineBasicBlock::iterator
+    I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+  while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+    LastLocalValue = I;
+    ++I;
+  }
+}
+
+bool FastISel::hasTrivialKill(const Value *V) const {
+  // Don't consider constants or arguments to have trivial kills.
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
+
+  // No-op casts are trivially coalesced by fast-isel.
+  if (const CastInst *Cast = dyn_cast<CastInst>(I))
+    if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+        !hasTrivialKill(Cast->getOperand(0)))
+      return false;
+
+  // Only instructions with a single use in the same basic block are considered
+  // to have trivial kills.
+  return I->hasOneUse() &&
+         !(I->getOpcode() == Instruction::BitCast ||
+           I->getOpcode() == Instruction::PtrToInt ||
+           I->getOpcode() == Instruction::IntToPtr) &&
+         cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+  EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+  // Don't handle non-simple values in FastISel.
+  if (!RealVT.isSimple())
+    return 0;
+
+  // Ignore illegal types. We must do this before looking up the value
+  // in ValueMap because Arguments are given virtual registers regardless
+  // of whether FastISel can handle them.
+  MVT VT = RealVT.getSimpleVT();
+  if (!TLI.isTypeLegal(VT)) {
+    // Handle integer promotions, though, because they're common and easy.
+    if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+      VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+    else
+      return 0;
+  }
+
+  // Look up the value to see if we already have a register for it. We
+  // cache values defined by Instructions across blocks, and other values
+  // only locally. This is because Instructions already have the SSA
+  // def-dominates-use requirement enforced.
+  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+  if (I != FuncInfo.ValueMap.end())
+    return I->second;
+
+  unsigned Reg = LocalValueMap[V];
+  if (Reg != 0)
+    return Reg;
+
+  // In bottom-up mode, just create the virtual register which will be used
+  // to hold the value. It will be materialized later.
+  if (isa<Instruction>(V) &&
+      (!isa<AllocaInst>(V) ||
+       !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+    return FuncInfo.InitializeRegForValue(V);
+
+  SavePoint SaveInsertPt = enterLocalValueArea();
+
+  // Materialize the value in a register. Emit any instructions in the
+  // local value area.
+  Reg = materializeRegForValue(V, VT);
+
+  leaveLocalValueArea(SaveInsertPt);
+
+  return Reg;
+}
+
+/// materializeRegForValue - Helper for getRegForValue. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+  unsigned Reg = 0;
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (CI->getValue().getActiveBits() <= 64)
+      Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+  } else if (isa<AllocaInst>(V)) {
+    Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+  } else if (isa<ConstantPointerNull>(V)) {
+    // Translate this as an integer zero so that it can be
+    // local-CSE'd with actual integer zeros.
+    Reg =
+      getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+  } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    if (CF->isNullValue()) {
+      Reg = TargetMaterializeFloatZero(CF);
+    } else {
+      // Try to emit the constant directly.
+      Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+    }
+
+    if (!Reg) {
+      // Try to emit the constant by using an integer constant with a cast.
+      const APFloat &Flt = CF->getValueAPF();
+      EVT IntVT = TLI.getPointerTy();
+
+      uint64_t x[2];
+      uint32_t IntBitWidth = IntVT.getSizeInBits();
+      bool isExact;
+      (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+                                APFloat::rmTowardZero, &isExact);
+      if (isExact) {
+        APInt IntVal(IntBitWidth, 2, x);
+
+        unsigned IntegerReg =
+          getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+        if (IntegerReg != 0)
+          Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+                           IntegerReg, /*Kill=*/false);
+      }
+    }
+  } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+    if (!SelectOperator(Op, Op->getOpcode()))
+      if (!isa<Instruction>(Op) ||
+          !TargetSelectInstruction(cast<Instruction>(Op)))
+        return 0;
+    Reg = lookUpRegForValue(Op);
+  } else if (isa<UndefValue>(V)) {
+    Reg = createResultReg(TLI.getRegClassFor(VT));
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+  }
+
+  // If target-independent code couldn't handle the value, give target-specific
+  // code a try.
+  if (!Reg && isa<Constant>(V))
+    Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+  // Don't cache constant materializations in the general ValueMap.
+  // To do so would require tracking what uses they dominate.
+  if (Reg != 0) {
+    LocalValueMap[V] = Reg;
+    LastLocalValue = MRI.getVRegDef(Reg);
+  }
+  return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+  // Look up the value to see if we already have a register for it. We
+  // cache values defined by Instructions across blocks, and other values
+  // only locally. This is because Instructions already have the SSA
+  // def-dominates-use requirement enforced.
+  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+  if (I != FuncInfo.ValueMap.end())
+    return I->second;
+  return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value.  It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+  if (!isa<Instruction>(I)) {
+    LocalValueMap[I] = Reg;
+    return;
+  }
+
+  unsigned &AssignedReg = FuncInfo.ValueMap[I];
+  if (AssignedReg == 0)
+    // Use the new register.
+    AssignedReg = Reg;
+  else if (Reg != AssignedReg) {
+    // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+    for (unsigned i = 0; i < NumRegs; i++)
+      FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
+
+    AssignedReg = Reg;
+  }
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+  unsigned IdxN = getRegForValue(Idx);
+  if (IdxN == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return std::pair<unsigned, bool>(0, false);
+
+  bool IdxNIsKill = hasTrivialKill(Idx);
+
+  // If the index is smaller or larger than intptr_t, truncate or extend it.
+  MVT PtrVT = TLI.getPointerTy();
+  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+  if (IdxVT.bitsLT(PtrVT)) {
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+                      IdxN, IdxNIsKill);
+    IdxNIsKill = true;
+  }
+  else if (IdxVT.bitsGT(PtrVT)) {
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+                      IdxN, IdxNIsKill);
+    IdxNIsKill = true;
+  }
+  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+  if (getLastLocalValue()) {
+    FuncInfo.InsertPt = getLastLocalValue();
+    FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+    ++FuncInfo.InsertPt;
+  } else
+    FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+  // Now skip past any EH_LABELs, which must remain at the beginning.
+  while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+         FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+    ++FuncInfo.InsertPt;
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+  MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+  DebugLoc OldDL = DL;
+  recomputeInsertPt();
+  DL = DebugLoc();
+  SavePoint SP = { OldInsertPt, OldDL };
+  return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+  if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+    LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+  // Restore the previous insert position.
+  FuncInfo.InsertPt = OldInsertPt.InsertPt;
+  DL = OldInsertPt.DL;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+  EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+  if (VT == MVT::Other || !VT.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  // We only handle legal types. For example, on x86-32 the instruction
+  // selector contains all of the 64-bit instructions from x86-64,
+  // under the assumption that i64 won't be used if the target doesn't
+  // support it.
+  if (!TLI.isTypeLegal(VT)) {
+    // MVT::i1 is special. Allow AND, OR, or XOR because they
+    // don't require additional zeroing, which makes them easy.
+    if (VT == MVT::i1 &&
+        (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+         ISDOpcode == ISD::XOR))
+      VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+    else
+      return false;
+  }
+
+  // Check if the first operand is a constant, and handle it as "ri".  At -O0,
+  // we don't have anything that canonicalizes operand order.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+    if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+      unsigned Op1 = getRegForValue(I->getOperand(1));
+      if (Op1 == 0) return false;
+
+      bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+      unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+                                        Op1IsKill, CI->getZExtValue(),
+                                        VT.getSimpleVT());
+      if (ResultReg == 0) return false;
+
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+
+
+  unsigned Op0 = getRegForValue(I->getOperand(0));
+  if (Op0 == 0)   // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+  // Check if the second operand is a constant and handle it appropriately.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    uint64_t Imm = CI->getZExtValue();
+
+    // Transform "sdiv exact X, 8" -> "sra X, 3".
+    if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+        cast<BinaryOperator>(I)->isExact() &&
+        isPowerOf2_64(Imm)) {
+      Imm = Log2_64(Imm);
+      ISDOpcode = ISD::SRA;
+    }
+
+    unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+                                      Op0IsKill, Imm, VT.getSimpleVT());
+    if (ResultReg == 0) return false;
+
+    // We successfully emitted code for the given LLVM Instruction.
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  // Check if the second operand is a constant float.
+  if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+    unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+                                     ISDOpcode, Op0, Op0IsKill, CF);
+    if (ResultReg != 0) {
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  unsigned Op1 = getRegForValue(I->getOperand(1));
+  if (Op1 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+  // Now we have both operands in registers. Emit the instruction.
+  unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+                                   ISDOpcode,
+                                   Op0, Op0IsKill,
+                                   Op1, Op1IsKill);
+  if (ResultReg == 0)
+    // Target-specific code wasn't able to find a machine opcode for
+    // the given ISD opcode and type. Halt "fast" selection and bail.
+    return false;
+
+  // We successfully emitted code for the given LLVM Instruction.
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool FastISel::SelectGetElementPtr(const User *I) {
+  unsigned N = getRegForValue(I->getOperand(0));
+  if (N == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+  const Type *Ty = I->getOperand(0)->getType();
+  MVT VT = TLI.getPointerTy();
+  for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+       E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+        // FIXME: This can be optimized by combining the add with a
+        // subsequent one.
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+        if (N == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+        NIsKill = true;
+      }
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->isZero()) continue;
+        uint64_t Offs =
+          TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+        if (N == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+        NIsKill = true;
+        continue;
+      }
+
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+      unsigned IdxN = Pair.first;
+      bool IdxNIsKill = Pair.second;
+      if (IdxN == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+
+      if (ElementSize != 1) {
+        IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+        if (IdxN == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+        IdxNIsKill = true;
+      }
+      N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+      if (N == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+    }
+  }
+
+  // We successfully emitted code for the given LLVM Instruction.
+  UpdateValueMap(I, N);
+  return true;
+}
+
+bool FastISel::SelectCall(const User *I) {
+  const CallInst *Call = cast<CallInst>(I);
+
+  // Handle simple inline asms.
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) {
+    // Don't attempt to handle constraints.
+    if (!IA->getConstraintString().empty())
+      return false;
+
+    unsigned ExtraInfo = 0;
+    if (IA->hasSideEffects())
+      ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+    if (IA->isAlignStack())
+      ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::INLINEASM))
+      .addExternalSymbol(IA->getAsmString().c_str())
+      .addImm(ExtraInfo);
+    return true;
+  }
+
+  const Function *F = Call->getCalledFunction();
+  if (!F) return false;
+
+  // Handle selected intrinsic function calls.
+  switch (F->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::dbg_declare: {
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
+    if (!DIVariable(DI->getVariable()).Verify() ||
+        !FuncInfo.MF->getMMI().hasDebugInfo())
+      return true;
+
+    const Value *Address = DI->getAddress();
+    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+      return true;
+
+    unsigned Reg = 0;
+    unsigned Offset = 0;
+    if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+      if (Arg->hasByValAttr()) {
+        // Byval arguments' frame index is recorded during argument lowering.
+        // Use this info directly.
+        Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+        if (Offset)
+          Reg = TRI.getFrameRegister(*FuncInfo.MF);
+      }
+    }
+    if (!Reg)
+      Reg = getRegForValue(Address);
+
+    if (Reg)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(TargetOpcode::DBG_VALUE))
+        .addReg(Reg, RegState::Debug).addImm(Offset)
+        .addMetadata(DI->getVariable());
+    return true;
+  }
+  case Intrinsic::dbg_value: {
+    // This form of DBG_VALUE is target-independent.
+    const DbgValueInst *DI = cast<DbgValueInst>(Call);
+    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    const Value *V = DI->getValue();
+    if (!V) {
+      // Currently the optimizer can produce this; insert an undef to
+      // help debugging.  Probably the optimizer should not do this.
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        .addReg(0U).addImm(DI->getOffset())
+        .addMetadata(DI->getVariable());
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      if (CI->getBitWidth() > 64)
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+          .addCImm(CI).addImm(DI->getOffset())
+          .addMetadata(DI->getVariable());
+      else 
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+          .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+          .addMetadata(DI->getVariable());
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        .addFPImm(CF).addImm(DI->getOffset())
+        .addMetadata(DI->getVariable());
+    } else if (unsigned Reg = lookUpRegForValue(V)) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+        .addMetadata(DI->getVariable());
+    } else {
+      // We can't yet handle anything else here because it would require
+      // generating code, thus altering codegen because of debug info.
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+    }
+    return true;
+  }
+  case Intrinsic::eh_exception: {
+    EVT VT = TLI.getValueType(Call->getType());
+    if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
+      break;
+
+    assert(FuncInfo.MBB->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(Reg);
+    UpdateValueMap(Call, ResultReg);
+    return true;
+  }
+  case Intrinsic::eh_selector: {
+    EVT VT = TLI.getValueType(Call->getType());
+    if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
+      break;
+    if (FuncInfo.MBB->isLandingPad())
+      AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+    else {
+#ifndef NDEBUG
+      FuncInfo.CatchInfoLost.insert(Call);
+#endif
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+    }
+
+    unsigned Reg = TLI.getExceptionSelectorRegister();
+    EVT SrcVT = TLI.getPointerTy();
+    const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(Reg);
+
+    bool ResultRegIsKill = hasTrivialKill(Call);
+
+    // Cast the register to the type of the selector.
+    if (SrcVT.bitsGT(MVT::i32))
+      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                             ResultReg, ResultRegIsKill);
+    else if (SrcVT.bitsLT(MVT::i32))
+      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                             ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+    if (ResultReg == 0)
+      // Unhandled operand. Halt "fast" selection and bail.
+      return false;
+
+    UpdateValueMap(Call, ResultReg);
+
+    return true;
+  }
+  case Intrinsic::objectsize: {
+    ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+    unsigned long long Res = CI->isZero() ? -1ULL : 0;
+    Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+    unsigned ResultReg = getRegForValue(ResCI);
+    if (ResultReg == 0)
+      return false;
+    UpdateValueMap(Call, ResultReg);
+    return true;
+  }
+  }
+
+  // An arbitrary call. Bail.
+  return false;
+}
+
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+
+  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+      DstVT == MVT::Other || !DstVT.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  // Check if the destination type is legal.
+  if (!TLI.isTypeLegal(DstVT))
+    return false;
+
+  // Check if the source operand is legal.
+  if (!TLI.isTypeLegal(SrcVT))
+    return false;
+
+  unsigned InputReg = getRegForValue(I->getOperand(0));
+  if (!InputReg)
+    // Unhandled operand.  Halt "fast" selection and bail.
+    return false;
+
+  bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+  unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+                                  DstVT.getSimpleVT(),
+                                  Opcode,
+                                  InputReg, InputRegIsKill);
+  if (!ResultReg)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool FastISel::SelectBitCast(const User *I) {
+  // If the bitcast doesn't change the type, just use the operand value.
+  if (I->getType() == I->getOperand(0)->getType()) {
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0)
+      return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+
+  // Bitcasts of other values become reg-reg copies or BITCAST operators.
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+
+  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+      DstVT == MVT::Other || !DstVT.isSimple() ||
+      !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  unsigned Op0 = getRegForValue(I->getOperand(0));
+  if (Op0 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+  // First, try to perform the bitcast by inserting a reg-reg copy.
+  unsigned ResultReg = 0;
+  if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+    TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+    TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+    // Don't attempt a cross-class copy. It will likely fail.
+    if (SrcClass == DstClass) {
+      ResultReg = createResultReg(DstClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              ResultReg).addReg(Op0);
+    }
+  }
+
+  // If the reg-reg copy failed, select a BITCAST opcode.
+  if (!ResultReg)
+    ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+                           ISD::BITCAST, Op0, Op0IsKill);
+
+  if (!ResultReg)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool
+FastISel::SelectInstruction(const Instruction *I) {
+  // Just before the terminator instruction, insert instructions to
+  // feed PHI nodes in successor blocks.
+  if (isa<TerminatorInst>(I))
+    if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+      return false;
+
+  DL = I->getDebugLoc();
+
+  // First, try doing target-independent selection.
+  if (SelectOperator(I, I->getOpcode())) {
+    DL = DebugLoc();
+    return true;
+  }
+
+  // Next, try calling the target to attempt to handle the instruction.
+  if (TargetSelectInstruction(I)) {
+    DL = DebugLoc();
+    return true;
+  }
+
+  DL = DebugLoc();
+  return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+  if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+    // The unconditional fall-through case, which needs no instructions.
+  } else {
+    // The unconditional branch case.
+    TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+                     SmallVector<MachineOperand, 0>(), DL);
+  }
+  FuncInfo.MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(const User *I) {
+  unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+  if (OpReg == 0) return false;
+
+  bool OpRegIsKill = hasTrivialKill(I);
+
+  // If the target has ISD::FNEG, use it.
+  EVT VT = TLI.getValueType(I->getType());
+  unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+                                  ISD::FNEG, OpReg, OpRegIsKill);
+  if (ResultReg != 0) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  // Bitcast the value to integer, twiddle the sign bit with xor,
+  // and then bitcast it back to floating-point.
+  if (VT.getSizeInBits() > 64) return false;
+  EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+  if (!TLI.isTypeLegal(IntVT))
+    return false;
+
+  unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+                               ISD::BITCAST, OpReg, OpRegIsKill);
+  if (IntReg == 0)
+    return false;
+
+  unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+                                       IntReg, /*Kill=*/true,
+                                       UINT64_C(1) << (VT.getSizeInBits()-1),
+                                       IntVT.getSimpleVT());
+  if (IntResultReg == 0)
+    return false;
+
+  ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+                         ISD::BITCAST, IntResultReg, /*Kill=*/true);
+  if (ResultReg == 0)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool
+FastISel::SelectExtractValue(const User *U) {
+  const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+  if (!EVI)
+    return false;
+
+  // Make sure we only try to handle extracts with a legal result.  But also
+  // allow i1 because it's easy.
+  EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+  if (!RealVT.isSimple())
+    return false;
+  MVT VT = RealVT.getSimpleVT();
+  if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+    return false;
+
+  const Value *Op0 = EVI->getOperand(0);
+  const Type *AggTy = Op0->getType();
+
+  // Get the base result register.
+  unsigned ResultReg;
+  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+  if (I != FuncInfo.ValueMap.end())
+    ResultReg = I->second;
+  else if (isa<Instruction>(Op0))
+    ResultReg = FuncInfo.InitializeRegForValue(Op0);
+  else
+    return false; // fast-isel can't handle aggregate constants at the moment
+
+  // Get the actual result register, which is an offset from the base register.
+  unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+  SmallVector<EVT, 4> AggValueVTs;
+  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+
+  for (unsigned i = 0; i < VTIndex; i++)
+    ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+  UpdateValueMap(EVI, ResultReg);
+  return true;
+}
+
+bool
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
+  switch (Opcode) {
+  case Instruction::Add:
+    return SelectBinaryOp(I, ISD::ADD);
+  case Instruction::FAdd:
+    return SelectBinaryOp(I, ISD::FADD);
+  case Instruction::Sub:
+    return SelectBinaryOp(I, ISD::SUB);
+  case Instruction::FSub:
+    // FNeg is currently represented in LLVM IR as a special case of FSub.
+    if (BinaryOperator::isFNeg(I))
+      return SelectFNeg(I);
+    return SelectBinaryOp(I, ISD::FSUB);
+  case Instruction::Mul:
+    return SelectBinaryOp(I, ISD::MUL);
+  case Instruction::FMul:
+    return SelectBinaryOp(I, ISD::FMUL);
+  case Instruction::SDiv:
+    return SelectBinaryOp(I, ISD::SDIV);
+  case Instruction::UDiv:
+    return SelectBinaryOp(I, ISD::UDIV);
+  case Instruction::FDiv:
+    return SelectBinaryOp(I, ISD::FDIV);
+  case Instruction::SRem:
+    return SelectBinaryOp(I, ISD::SREM);
+  case Instruction::URem:
+    return SelectBinaryOp(I, ISD::UREM);
+  case Instruction::FRem:
+    return SelectBinaryOp(I, ISD::FREM);
+  case Instruction::Shl:
+    return SelectBinaryOp(I, ISD::SHL);
+  case Instruction::LShr:
+    return SelectBinaryOp(I, ISD::SRL);
+  case Instruction::AShr:
+    return SelectBinaryOp(I, ISD::SRA);
+  case Instruction::And:
+    return SelectBinaryOp(I, ISD::AND);
+  case Instruction::Or:
+    return SelectBinaryOp(I, ISD::OR);
+  case Instruction::Xor:
+    return SelectBinaryOp(I, ISD::XOR);
+
+  case Instruction::GetElementPtr:
+    return SelectGetElementPtr(I);
+
+  case Instruction::Br: {
+    const BranchInst *BI = cast<BranchInst>(I);
+
+    if (BI->isUnconditional()) {
+      const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+      MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+      FastEmitBranch(MSucc, BI->getDebugLoc());
+      return true;
+    }
+
+    // Conditional branches are not handed yet.
+    // Halt "fast" selection and bail.
+    return false;
+  }
+
+  case Instruction::Unreachable:
+    // Nothing to emit.
+    return true;
+
+  case Instruction::Alloca:
+    // FunctionLowering has the static-sized case covered.
+    if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+      return true;
+
+    // Dynamic-sized alloca is not handled yet.
+    return false;
+
+  case Instruction::Call:
+    return SelectCall(I);
+
+  case Instruction::BitCast:
+    return SelectBitCast(I);
+
+  case Instruction::FPToSI:
+    return SelectCast(I, ISD::FP_TO_SINT);
+  case Instruction::ZExt:
+    return SelectCast(I, ISD::ZERO_EXTEND);
+  case Instruction::SExt:
+    return SelectCast(I, ISD::SIGN_EXTEND);
+  case Instruction::Trunc:
+    return SelectCast(I, ISD::TRUNCATE);
+  case Instruction::SIToFP:
+    return SelectCast(I, ISD::SINT_TO_FP);
+
+  case Instruction::IntToPtr: // Deliberate fall-through.
+  case Instruction::PtrToInt: {
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
+    if (DstVT.bitsGT(SrcVT))
+      return SelectCast(I, ISD::ZERO_EXTEND);
+    if (DstVT.bitsLT(SrcVT))
+      return SelectCast(I, ISD::TRUNCATE);
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0) return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+
+  case Instruction::ExtractValue:
+    return SelectExtractValue(I);
+
+  case Instruction::PHI:
+    llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+  default:
+    // Unhandled instruction. Halt "fast" selection and bail.
+    return false;
+  }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+  : FuncInfo(funcInfo),
+    MRI(FuncInfo.MF->getRegInfo()),
+    MFI(*FuncInfo.MF->getFrameInfo()),
+    MCP(*FuncInfo.MF->getConstantPool()),
+    TM(FuncInfo.MF->getTarget()),
+    TD(*TM.getTargetData()),
+    TII(*TM.getInstrInfo()),
+    TLI(*TM.getTargetLowering()),
+    TRI(*TM.getRegisterInfo()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+                             unsigned) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+                              unsigned,
+                              unsigned /*Op0*/, bool /*Op0IsKill*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
+                               unsigned /*Op1*/, bool /*Op1IsKill*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+                              unsigned, const ConstantFP * /*FPImm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
+                               uint64_t /*Imm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
+                               const ConstantFP * /*FPImm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+                                unsigned,
+                                unsigned /*Op0*/, bool /*Op0IsKill*/,
+                                unsigned /*Op1*/, bool /*Op1IsKill*/,
+                                uint64_t /*Imm*/) {
+  return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+                                unsigned Op0, bool Op0IsKill,
+                                uint64_t Imm, MVT ImmType) {
+  // If this is a multiply by a power of two, emit this as a shift left.
+  if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+    Opcode = ISD::SHL;
+    Imm = Log2_64(Imm);
+  } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+    // div x, 8 -> srl x, 3
+    Opcode = ISD::SRL;
+    Imm = Log2_64(Imm);
+  }
+
+  // Horrible hack (to be removed), check to make sure shift amounts are
+  // in-range.
+  if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+      Imm >= VT.getSizeInBits())
+    return 0;
+
+  // First check if immediate type is legal. If not, we can't use the ri form.
+  unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+  if (ResultReg != 0)
+    return ResultReg;
+  unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+  if (MaterialReg == 0) {
+    // This is a bit ugly/slow, but failing here means falling out of
+    // fast-isel, which would be very slow.
+    const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+                                              VT.getSizeInBits());
+    MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+  }
+  return FastEmit_rr(VT, VT, Opcode,
+                     Op0, Op0IsKill,
+                     MaterialReg, /*Kill=*/true);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+  return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+                                 const TargetRegisterClass* RC) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  unsigned Op0, bool Op0IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   unsigned Op1, bool Op1IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   unsigned Op1, bool Op1IsKill,
+                                   unsigned Op2, bool Op2IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addReg(Op2, Op2IsKill * RegState::Kill);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addReg(Op2, Op2IsKill * RegState::Kill);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm1)
+      .addImm(Imm2);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm1)
+      .addImm(Imm2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   const ConstantFP *FPImm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addFPImm(FPImm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addFPImm(FPImm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    unsigned Op0, bool Op0IsKill,
+                                    unsigned Op1, bool Op1IsKill,
+                                    uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addImm(Imm1).addImm(Imm2);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+                                              unsigned Op0, bool Op0IsKill,
+                                              uint32_t Idx) {
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+  assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+         "Cannot yet extract from physregs");
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+          DL, TII.get(TargetOpcode::COPY), ResultReg)
+    .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+  return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+  return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input.  We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB.  As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+  unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Only handle legal types. Two interesting things to note here. First,
+      // by bailing out early, we may leave behind some dead instructions,
+      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+      // own moves. Second, this check is necessary because FastISel doesn't
+      // use CreateRegs to create registers, so it always creates
+      // exactly one register for each non-void instruction.
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+        // Promote MVT::i1.
+        if (VT == MVT::i1)
+          VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+        else {
+          FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          return false;
+        }
+      }
+
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      // Set the DebugLoc for the copy. Prefer the location of the operand
+      // if there is one; use the location of the PHI otherwise.
+      DL = PN->getDebugLoc();
+      if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+        DL = Inst->getDebugLoc();
+
+      unsigned Reg = getRegForValue(PHIOp);
+      if (Reg == 0) {
+        FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        return false;
+      }
+      FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+      DL = DebugLoc();
+    }
+  }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 000000000000..d518b5d346a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,456 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+  if (I->use_empty()) return false;
+  if (isa<PHINode>(I)) return true;
+  const BasicBlock *BB = I->getParent();
+  for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+        UI != E; ++UI) {
+    const User *U = *UI;
+    if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+      return true;
+  }
+  return false;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
+  : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+  Fn = &fn;
+  MF = &mf;
+  RegInfo = &MF->getRegInfo();
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(Fn->getReturnType(),
+                Fn->getAttributes().getRetAttributes(), Outs, TLI);
+  CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
+				      Fn->isVarArg(),
+                                      Outs, Fn->getContext());
+
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
+  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        const Type *Ty = AI->getAllocatedType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align =
+          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+
+        TySize *= CUI->getZExtValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+        // The object may need to be placed onto the stack near the stack
+        // protector if one exists. Determine here if this object is a suitable
+        // candidate. I.e., it would trigger the creation of a stack protector.
+        bool MayNeedSP =
+          (AI->isArrayAllocation() ||
+           (TySize > 8 && isa<ArrayType>(Ty) &&
+            cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
+        StaticAllocaMap[AI] =
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+      }
+
+  for (; BB != EB; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // Mark values used outside their block as exported, by allocating
+      // a virtual register for them.
+      if (isUsedOutsideOfDefiningBlock(I))
+        if (!isa<AllocaInst>(I) ||
+            !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(I);
+
+      // Collect llvm.dbg.declare information. This is done now instead of
+      // during the initial isel pass through the IR so that it is done
+      // in a predictable order.
+      if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+        MachineModuleInfo &MMI = MF->getMMI();
+        if (MMI.hasDebugInfo() &&
+            DIVariable(DI->getVariable()).Verify() &&
+            !DI->getDebugLoc().isUnknown()) {
+          // Don't handle byval struct arguments or VLAs, for example.
+          // Non-byval arguments are handled here (they refer to the stack
+          // temporary alloca at this point).
+          const Value *Address = DI->getAddress();
+          if (Address) {
+            if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+              Address = BCI->getOperand(0);
+            if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+              DenseMap<const AllocaInst *, int>::iterator SI =
+                StaticAllocaMap.find(AI);
+              if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+                int FI = SI->second;
+                MMI.setVariableDbgInfo(DI->getVariable(),
+                                       FI, DI->getDebugLoc());
+              }
+            }
+          }
+        }
+      }
+    }
+
+  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
+  // also creates the initial PHI MachineInstrs, though none of the input
+  // operands are populated.
+  for (BB = Fn->begin(); BB != EB; ++BB) {
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+    MBBMap[BB] = MBB;
+    MF->push_back(MBB);
+
+    // Transfer the address-taken flag. This is necessary because there could
+    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+    // the first one should be marked.
+    if (BB->hasAddressTaken())
+      MBB->setHasAddressTaken();
+
+    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+    // appropriate.
+    for (BasicBlock::const_iterator I = BB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      if (PN->use_empty()) continue;
+
+      // Skip empty types
+      if (PN->getType()->isEmptyTy())
+        continue;
+
+      DebugLoc DL = PN->getDebugLoc();
+      unsigned PHIReg = ValueMap[PN];
+      assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+        const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+        for (unsigned i = 0; i != NumRegisters; ++i)
+          BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+        PHIReg += NumRegisters;
+      }
+    }
+  }
+
+  // Mark landing pad blocks.
+  for (BB = Fn->begin(); BB != EB; ++BB)
+    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+      MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+  assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+
+  MBBMap.clear();
+  ValueMap.clear();
+  StaticAllocaMap.clear();
+#ifndef NDEBUG
+  CatchInfoLost.clear();
+  CatchInfoFound.clear();
+#endif
+  LiveOutRegInfo.clear();
+  VisitedBBs.clear();
+  ArgDbgValues.clear();
+  ByValArgFrameIndexMap.clear();
+  RegFixups.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
+  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types.  Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, Ty, ValueVTs);
+
+  unsigned FirstReg = 0;
+  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
+
+    unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      unsigned R = CreateReg(RegisterVT);
+      if (!FirstReg) FirstReg = R;
+    }
+  }
+  return FirstReg;
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+  if (!LiveOutRegInfo.inBounds(Reg))
+    return NULL;
+
+  LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+  if (!LOI->IsValid)
+    return NULL;
+
+  if (BitWidth > LOI->KnownZero.getBitWidth()) {
+    LOI->NumSignBits = 1;
+    LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+    LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+  }
+
+  return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+  const Type *Ty = PN->getType();
+  if (!Ty->isIntegerTy() || Ty->isVectorTy())
+    return;
+
+  SmallVector<EVT, 1> ValueVTs;
+  ComputeValueVTs(TLI, Ty, ValueVTs);
+  assert(ValueVTs.size() == 1 &&
+         "PHIs with non-vector integer types should have a single VT.");
+  EVT IntVT = ValueVTs[0];
+
+  if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1)
+    return;
+  IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT);
+  unsigned BitWidth = IntVT.getSizeInBits();
+
+  unsigned DestReg = ValueMap[PN];
+  if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+    return;
+  LiveOutRegInfo.grow(DestReg);
+  LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+  Value *V = PN->getIncomingValue(0);
+  if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+    DestLOI.NumSignBits = 1;
+    APInt Zero(BitWidth, 0);
+    DestLOI.KnownZero = Zero;
+    DestLOI.KnownOne = Zero;
+    return;
+  }
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+    DestLOI.NumSignBits = Val.getNumSignBits();
+    DestLOI.KnownZero = ~Val;
+    DestLOI.KnownOne = Val;
+  } else {
+    assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+                                "CopyToReg node was created.");
+    unsigned SrcReg = ValueMap[V];
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+    if (!SrcLOI) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    DestLOI = *SrcLOI;
+  }
+
+  assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+         DestLOI.KnownOne.getBitWidth() == BitWidth &&
+         "Masks should have the same bit width as the type.");
+
+  for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+    if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+      DestLOI.NumSignBits = 1;
+      APInt Zero(BitWidth, 0);
+      DestLOI.KnownZero = Zero;
+      DestLOI.KnownOne = Zero;
+      return;
+    }
+
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+      DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+      DestLOI.KnownZero &= ~Val;
+      DestLOI.KnownOne &= Val;
+      continue;
+    }
+
+    assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+                                "its CopyToReg node was created.");
+    unsigned SrcReg = ValueMap[V];
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+    if (!SrcLOI) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+    DestLOI.KnownZero &= SrcLOI->KnownZero;
+    DestLOI.KnownOne &= SrcLOI->KnownOne;
+  }
+}
+
+/// setByValArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
+                                                      int FI) {
+  assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+  ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getByValArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
+  assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+  DenseMap<const Argument *, int>::iterator I =
+    ByValArgFrameIndexMap.find(A);
+  if (I != ByValArgFrameIndexMap.end())
+    return I->second;
+  DEBUG(dbgs() << "Argument does not have assigned frame index!");
+  return 0;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
+                        MachineBasicBlock *MBB) {
+  // Inform the MachineModuleInfo of the personality for this landing pad.
+  const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
+  assert(CE->getOpcode() == Instruction::BitCast &&
+         isa<Function>(CE->getOperand(0)) &&
+         "Personality should be a function");
+  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+  // Gather all the type infos for this landing pad and pass them along to
+  // MachineModuleInfo.
+  std::vector<const GlobalVariable *> TyInfo;
+  unsigned N = I.getNumArgOperands();
+
+  for (unsigned i = N - 1; i > 1; --i) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
+      unsigned FilterLength = CI->getZExtValue();
+      unsigned FirstCatch = i + FilterLength + !FilterLength;
+      assert(FirstCatch <= N && "Invalid filter length");
+
+      if (FirstCatch < N) {
+        TyInfo.reserve(N - FirstCatch);
+        for (unsigned j = FirstCatch; j < N; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+        MMI->addCatchTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      if (!FilterLength) {
+        // Cleanup.
+        MMI->addCleanup(MBB);
+      } else {
+        // Filter.
+        TyInfo.reserve(FilterLength - 1);
+        for (unsigned j = i + 1; j < FirstCatch; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+        MMI->addFilterTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      N = i;
+    }
+  }
+
+  if (N > 2) {
+    TyInfo.reserve(N - 2);
+    for (unsigned j = 2; j < N; ++j)
+      TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+    MMI->addCatchTypeInfo(MBB, TyInfo);
+  }
+}
+
+void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
+                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+  SmallPtrSet<const BasicBlock*, 4> Visited;
+
+  // The 'eh.selector' call may not be in the direct successor of a basic block,
+  // but could be several successors deeper. If we don't find it, try going one
+  // level further. <rdar://problem/8824861>
+  while (Visited.insert(SuccBB)) {
+    for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
+         I != E; ++I)
+      if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+        // Apply the catch info to LPad.
+        AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
+#ifndef NDEBUG
+        if (!FLI.MBBMap[SuccBB]->isLandingPad())
+          FLI.CatchInfoFound.insert(EHSel);
+#endif
+        return;
+      }
+
+    const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
+    if (Br && Br->isUnconditional())
+      SuccBB = Br->getSuccessor(0);
+    else
+      break;
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 000000000000..f0f4743298e7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,900 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Glue)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+unsigned InstrEmitter::CountOperands(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VRBase = 0;
+  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+    // Just use the input register directly!
+    SDValue Op(Node, ResNo);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+    (void)isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+    return;
+  }
+
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  bool MatchReg = true;
+  const TargetRegisterClass *UseRC = NULL;
+  EVT VT = Node->getValueType(ResNo);
+
+  // Stick to the preferred register classes for legal types.
+  if (TLI->isTypeLegal(VT))
+    UseRC = TLI->getRegClassFor(VT);
+
+  if (!IsClone && !IsCloned)
+    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      bool Match = true;
+      if (User->getOpcode() == ISD::CopyToReg && 
+          User->getOperand(2).getNode() == Node &&
+          User->getOperand(2).getResNo() == ResNo) {
+        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+          VRBase = DestReg;
+          Match = false;
+        } else if (DestReg != SrcReg)
+          Match = false;
+      } else {
+        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+          SDValue Op = User->getOperand(i);
+          if (Op.getNode() != Node || Op.getResNo() != ResNo)
+            continue;
+          EVT VT = Node->getValueType(Op.getResNo());
+          if (VT == MVT::Other || VT == MVT::Glue)
+            continue;
+          Match = false;
+          if (User->isMachineOpcode()) {
+            const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+            const TargetRegisterClass *RC = 0;
+            if (i+II.getNumDefs() < II.getNumOperands())
+              RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
+            if (!UseRC)
+              UseRC = RC;
+            else if (RC) {
+              const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+              // If multiple uses expect disjoint register classes, we emit
+              // copies in AddRegisterOperand.
+              if (ComRC)
+                UseRC = ComRC;
+            }
+          }
+        }
+      }
+      MatchReg &= Match;
+      if (VRBase)
+        break;
+    }
+
+  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+  SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+
+  // Figure out the register class to create for the destreg.
+  if (VRBase) {
+    DstRC = MRI->getRegClass(VRBase);
+  } else if (UseRC) {
+    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+    DstRC = UseRC;
+  } else {
+    DstRC = TLI->getRegClassFor(VT);
+  }
+    
+  // If all uses are reading from the src physical register and copying the
+  // register is either impossible or very expensive, then don't create a copy.
+  if (MatchReg && SrcRC->getCopyCost() < 0) {
+    VRBase = SrcReg;
+  } else {
+    // Create the reg, emit the copy.
+    VRBase = MRI->createVirtualRegister(DstRC);
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+            VRBase).addReg(SrcReg);
+  }
+
+  SDValue Op(Node, ResNo);
+  if (IsClone)
+    VRBaseMap.erase(Op);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                                unsigned ResNo) const {
+  if (!Node->hasOneUse())
+    return 0;
+
+  SDNode *User = *Node->use_begin();
+  if (User->getOpcode() == ISD::CopyToReg && 
+      User->getOperand(2).getNode() == Node &&
+      User->getOperand(2).getResNo() == ResNo) {
+    unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return Reg;
+  }
+  return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                                       const MCInstrDesc &II,
+                                       bool IsClone, bool IsCloned,
+                                       DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+         "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+  for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+    // If the specific node value is only used by a CopyToReg and the dest reg
+    // is a vreg in the same register class, use the CopyToReg'd destination
+    // register instead of creating a new vreg.
+    unsigned VRBase = 0;
+    const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI);
+    if (II.OpInfo[i].isOptionalDef()) {
+      // Optional def must be a physical register.
+      unsigned NumResults = CountResults(Node);
+      VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+      assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    if (!VRBase && !IsClone && !IsCloned)
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *User = *UI;
+        if (User->getOpcode() == ISD::CopyToReg && 
+            User->getOperand(2).getNode() == Node &&
+            User->getOperand(2).getResNo() == i) {
+          unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+            const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+            if (RegRC == RC) {
+              VRBase = Reg;
+              MI->addOperand(MachineOperand::CreateReg(Reg, true));
+              break;
+            }
+          }
+        }
+      }
+
+    // Create the result registers for this node and add the result regs to
+    // the machine instruction.
+    if (VRBase == 0) {
+      assert(RC && "Isn't a register operand!");
+      VRBase = MRI->createVirtualRegister(RC);
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    SDValue Op(Node, i);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+    (void)isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+  }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+                             DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode() &&
+      Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+    // Add an IMPLICIT_DEF instruction before every use.
+    unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+    // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
+    // does not include operand register class info.
+    if (!VReg) {
+      const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+      VReg = MRI->createVirtualRegister(RC);
+    }
+    BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+            TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+    return VReg;
+  }
+
+  DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+  assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+  return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                                 unsigned IIOpNum,
+                                 const MCInstrDesc *II,
+                                 DenseMap<SDValue, unsigned> &VRBaseMap,
+                                 bool IsDebug, bool IsClone, bool IsCloned) {
+  assert(Op.getValueType() != MVT::Other &&
+         Op.getValueType() != MVT::Glue &&
+         "Chain and glue operands should occur at end of operand list!");
+  // Get/emit the operand.
+  unsigned VReg = getVR(Op, VRBaseMap);
+  assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+    MCID.OpInfo[IIOpNum].isOptionalDef();
+
+  // If the instruction requires a register in a different class, create
+  // a new virtual register and copy the value into it.
+  if (II) {
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *DstRC = 0;
+    if (IIOpNum < II->getNumOperands())
+      DstRC = TII->getRegClass(*II, IIOpNum, TRI);
+    assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+           "Don't have operand info for this instruction!");
+    if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) {
+      unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+      BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+      VReg = NewVReg;
+    }
+  }
+
+  // If this value has only one use, that use is a kill. This is a
+  // conservative approximation. InstrEmitter does trivial coalescing
+  // with CopyFromReg nodes, so don't emit kill flags for them.
+  // Avoid kill flags on Schedule cloned nodes, since there will be
+  // multiple uses.
+  // Tied operands are never killed, so we need to check that. And that
+  // means we need to determine the index of the operand.
+  bool isKill = Op.hasOneUse() &&
+                Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+                !IsDebug &&
+                !(IsClone || IsCloned);
+  if (isKill) {
+    unsigned Idx = MI->getNumOperands();
+    while (Idx > 0 &&
+           MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+      --Idx;
+    bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+    if (isTied)
+      isKill = false;
+  }
+
+  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
+                                           false/*isImp*/, isKill,
+                                           false/*isDead*/, false/*isUndef*/,
+                                           false/*isEarlyClobber*/,
+                                           0/*SubReg*/, IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr.  II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for 
+/// assertions only.
+void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+                              unsigned IIOpNum,
+                              const MCInstrDesc *II,
+                              DenseMap<SDValue, unsigned> &VRBaseMap,
+                              bool IsDebug, bool IsClone, bool IsCloned) {
+  if (Op.isMachineOpcode()) {
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+                       IsDebug, IsClone, IsCloned);
+  } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+  } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+    const ConstantFP *CFP = F->getConstantFPValue();
+    MI->addOperand(MachineOperand::CreateFPImm(CFP));
+  } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+                                            TGA->getTargetFlags()));
+  } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+  } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+  } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+                                             JT->getTargetFlags()));
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+    int Offset = CP->getOffset();
+    unsigned Align = CP->getAlignment();
+    const Type *Type = CP->getType();
+    // MachineConstantPool wants an explicit alignment.
+    if (Align == 0) {
+      Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+      if (Align == 0) {
+        // Alignment of vector types.  FIXME!
+        Align = TM->getTargetData()->getTypeAllocSize(Type);
+      }
+    }
+    
+    unsigned Idx;
+    MachineConstantPool *MCP = MF->getConstantPool();
+    if (CP->isMachineConstantPoolEntry())
+      Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+    else
+      Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+                                             CP->getTargetFlags()));
+  } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
+                                            ES->getTargetFlags()));
+  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+                                            BA->getTargetFlags()));
+  } else {
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Glue &&
+           "Chain and glue operands should occur at end of operand list!");
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+                       IsDebug, IsClone, IsCloned);
+  }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+                         unsigned SubIdx, EVT VT) {
+  // Pick the register class of the superegister for this type
+  for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+         E = TRC->superregclasses_end(); I != E; ++I)
+    if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+      return *I;
+  assert(false && "Couldn't find the register class");
+  return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node, 
+                                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                                  bool IsClone, bool IsCloned) {
+  unsigned VRBase = 0;
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+       UI != E; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() == ISD::CopyToReg && 
+        User->getOperand(2).getNode() == Node) {
+      unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+        VRBase = DestReg;
+        break;
+      }
+    }
+  }
+  
+  if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+    // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub
+    unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+    // Figure out the register class to create for the destreg.
+    unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+    MachineInstr *DefMI = MRI->getVRegDef(VReg);
+    unsigned SrcReg, DstReg, DefSubIdx;
+    if (DefMI &&
+        TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+        SubIdx == DefSubIdx) {
+      // Optimize these:
+      // r1025 = s/zext r1024, 4
+      // r1026 = extract_subreg r1025, 4
+      // to a copy
+      // r1026 = copy r1024
+      const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+      VRBase = MRI->createVirtualRegister(TRC);
+      BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+    } else {
+      const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+      const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+      assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+      // Figure out the register class to create for the destreg.
+      // Note that if we're going to directly use an existing register,
+      // it must be precisely the required class, and not a subclass
+      // thereof.
+      if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+        // Create the reg
+        assert(SRC && "Couldn't find source register class");
+        VRBase = MRI->createVirtualRegister(SRC);
+      }
+
+      // Create the extract_subreg machine instruction.
+      MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                                 TII->get(TargetOpcode::COPY), VRBase);
+
+      // Add source, and subreg index
+      AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+                 IsClone, IsCloned);
+      assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+             "Cannot yet extract from physregs");
+      MI->getOperand(1).setSubReg(SubIdx);
+      MBB->insert(InsertPos, MI);
+    }
+  } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+             Opc == TargetOpcode::SUBREG_TO_REG) {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    SDValue N2 = Node->getOperand(2);
+    unsigned SubReg = getVR(N1, VRBaseMap);
+    unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+    const TargetRegisterClass *SRC =
+      getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Create the insert_subreg or subreg_to_reg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    
+    // If creating a subreg_to_reg, then the first input operand
+    // is an implicit value immediate, otherwise it's a register
+    if (Opc == TargetOpcode::SUBREG_TO_REG) {
+      const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+      MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+    } else
+      AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+                 IsClone, IsCloned);
+    // Add the subregster being inserted
+    AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+               IsClone, IsCloned);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else
+    llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+     
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+                                     DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+  // Create the new VReg in the destination class and emit a copy.
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+  const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+  unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+  BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+    NewVReg).addReg(VReg);
+
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+                                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                                  bool IsClone, bool IsCloned) {
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+  unsigned NewVReg = MRI->createVirtualRegister(RC);
+  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                             TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
+  unsigned NumOps = Node->getNumOperands();
+  assert((NumOps & 1) == 1 &&
+         "REG_SEQUENCE must have an odd number of operands!");
+  const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  for (unsigned i = 1; i != NumOps; ++i) {
+    SDValue Op = Node->getOperand(i);
+    if ((i & 1) == 0) {
+      unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+      unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+      const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+      const TargetRegisterClass *SRC =
+        TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+      if (SRC && SRC != RC) {
+        MRI->setRegClass(NewVReg, SRC);
+        RC = SRC;
+      }
+    }
+    AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+               IsClone, IsCloned);
+  }
+
+  MBB->insert(InsertPos, MI);
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+                           DenseMap<SDValue, unsigned> &VRBaseMap) {
+  uint64_t Offset = SD->getOffset();
+  MDNode* MDPtr = SD->getMDPtr();
+  DebugLoc DL = SD->getDebugLoc();
+
+  if (SD->getKind() == SDDbgValue::FRAMEIX) {
+    // Stack address; this needs to be lowered in target-dependent fashion.
+    // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+    unsigned FrameIx = SD->getFrameIx();
+    return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+  }
+  // Otherwise, we're going to create an instruction here.
+  const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+  if (SD->getKind() == SDDbgValue::SDNODE) {
+    SDNode *Node = SD->getSDNode();
+    SDValue Op = SDValue(Node, SD->getResNo());
+    // It's possible we replaced this SDNode with other(s) and therefore
+    // didn't generate code for it.  It's better to catch these cases where
+    // they happen and transfer the debug info, but trying to guarantee that
+    // in all cases would be very fragile; this is a safeguard for any
+    // that were missed.
+    DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+    if (I==VRBaseMap.end())
+      MIB.addReg(0U);       // undef
+    else
+      AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+                 /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+  } else if (SD->getKind() == SDDbgValue::CONST) {
+    const Value *V = SD->getConst();
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      if (CI->getBitWidth() > 64)
+        MIB.addCImm(CI);
+      else
+        MIB.addImm(CI->getSExtValue());
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      MIB.addFPImm(CF);
+    } else {
+      // Could be an Undef.  In any case insert an Undef so we can see what we
+      // dropped.
+      MIB.addReg(0U);
+    }
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MIB.addReg(0U);
+  }
+
+  MIB.addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // Handle subreg insert/extract specially
+  if (Opc == TargetOpcode::EXTRACT_SUBREG || 
+      Opc == TargetOpcode::INSERT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG) {
+    EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+    return;
+  }
+
+  // Handle COPY_TO_REGCLASS specially.
+  if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+    EmitCopyToRegClassNode(Node, VRBaseMap);
+    return;
+  }
+
+  // Handle REG_SEQUENCE specially.
+  if (Opc == TargetOpcode::REG_SEQUENCE) {
+    EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+    return;
+  }
+
+  if (Opc == TargetOpcode::IMPLICIT_DEF)
+    // We want a unique VR for each IMPLICIT_DEF use.
+    return;
+  
+  const MCInstrDesc &II = TII->get(Opc);
+  unsigned NumResults = CountResults(Node);
+  unsigned NodeOperands = CountOperands(Node);
+  bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
+#ifndef NDEBUG
+  unsigned NumMIOperands = NodeOperands + NumResults;
+  if (II.isVariadic())
+    assert(NumMIOperands >= II.getNumOperands() &&
+           "Too few operands for a variadic node!");
+  else
+    assert(NumMIOperands >= II.getNumOperands() &&
+           NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+           "#operands for dag node doesn't match .td file!");
+#endif
+
+  // Create the new machine instruction.
+  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+
+  // The MachineInstr constructor adds implicit-def operands. Scan through
+  // these to determine which are dead.
+  if (MI->getNumOperands() != 0 &&
+      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+    // First, collect all used registers.
+    SmallVector<unsigned, 8> UsedRegs;
+    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
+      if (F->getOpcode() == ISD::CopyFromReg)
+        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+      else {
+        // Collect declared implicit uses.
+        const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+        UsedRegs.append(MCID.getImplicitUses(),
+                        MCID.getImplicitUses() + MCID.getNumImplicitUses());
+        // In addition to declared implicit uses, we must also check for
+        // direct RegisterSDNode operands.
+        for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+          if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+            unsigned Reg = R->getReg();
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
+              UsedRegs.push_back(Reg);
+          }
+      }
+    // Then mark unused registers as dead.
+    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
+  }
+  
+  // Add result register values for things that are defined by this
+  // instruction.
+  if (NumResults)
+    CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+  
+  // Emit all of the actual operands of this instruction, adding them to the
+  // instruction as appropriate.
+  bool HasOptPRefs = II.getNumDefs() > NumResults;
+  assert((!HasOptPRefs || !HasPhysRegOuts) &&
+         "Unable to cope with optional defs and phys regs defs!");
+  unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+  for (unsigned i = NumSkip; i != NodeOperands; ++i)
+    AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+               VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+  // Transfer all of the memory reference descriptions of this instruction.
+  MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+                 cast<MachineSDNode>(Node)->memoperands_end());
+
+  // Insert the instruction into position in the block. This needs to
+  // happen before any custom inserter hook is called so that the
+  // hook knows where in the block to insert the replacement code.
+  MBB->insert(InsertPos, MI);
+
+  // Additional results must be physical register defs.
+  if (HasPhysRegOuts) {
+    for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+      unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+      if (Node->hasAnyUseOfValue(i))
+        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+      // If there are no uses, mark the register as dead now, so that
+      // MachineLICM/Sink can see that it's dead. Don't do this if the
+      // node has a Glue value, for the benefit of targets still using
+      // Glue for values in physregs.
+      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
+        MI->addRegisterDead(Reg, TRI);
+    }
+  }
+  
+  // If the instruction has implicit defs and the node doesn't, mark the
+  // implicit def as dead.  If the node has any glue outputs, we don't do this
+  // because we don't know what implicit defs are being used by glued nodes.
+  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
+    if (const unsigned *IDList = II.getImplicitDefs()) {
+      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
+           i != e; ++i)
+        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+    }
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    Node->dump();
+#endif
+    llvm_unreachable("This target-independent node should have been selected!");
+    break;
+  case ISD::EntryToken:
+    llvm_unreachable("EntryToken should have been excluded from the schedule!");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor: // fall thru
+    break;
+  case ISD::CopyToReg: {
+    unsigned SrcReg;
+    SDValue SrcVal = Node->getOperand(2);
+    if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+      SrcReg = R->getReg();
+    else
+      SrcReg = getVR(SrcVal, VRBaseMap);
+      
+    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+      break;
+
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+            DestReg).addReg(SrcReg);
+    break;
+  }
+  case ISD::CopyFromReg: {
+    unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+    break;
+  }
+  case ISD::EH_LABEL: {
+    MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+            TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+    break;
+  }
+      
+  case ISD::INLINEASM: {
+    unsigned NumOps = Node->getNumOperands();
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+      --NumOps;  // Ignore the glue operand.
+      
+    // Create the inline asm machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetOpcode::INLINEASM));
+
+    // Add the asm string as an external symbol operand.
+    SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+    const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+    MI->addOperand(MachineOperand::CreateES(AsmStr));
+      
+    // Add the HasSideEffect and isAlignStack bits.
+    int64_t ExtraInfo =
+      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+                          getZExtValue();
+    MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+
+    // Add all of the operand registers to the instruction.
+    for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+      unsigned Flags =
+        cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+      unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        
+      MI->addOperand(MachineOperand::CreateImm(Flags));
+      ++i;  // Skip the ID value.
+        
+      switch (InlineAsm::getKind(Flags)) {
+      default: llvm_unreachable("Bad flags!");
+        case InlineAsm::Kind_RegDef:
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          // FIXME: Add dead flags for physical and virtual registers defined.
+          // For now, mark physical register defs as implicit to help fast
+          // regalloc. This makes inline asm look a lot like calls.
+          MI->addOperand(MachineOperand::CreateReg(Reg, true,
+                       /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
+        }
+        break;
+      case InlineAsm::Kind_RegDefEarlyClobber:
+      case InlineAsm::Kind_Clobber:
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
+                         /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
+                                                   /*isKill=*/ false,
+                                                   /*isDead=*/ false,
+                                                   /*isUndef=*/false,
+                                                   /*isEarlyClobber=*/ true));
+        }
+        break;
+      case InlineAsm::Kind_RegUse:  // Use of register.
+      case InlineAsm::Kind_Imm:  // Immediate.
+      case InlineAsm::Kind_Mem:  // Addressing mode.
+        // The addressing mode has been selected, just add all of the
+        // operands to the machine instruction.
+        for (; NumVals; --NumVals, ++i)
+          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+                     /*IsDebug=*/false, IsClone, IsCloned);
+        break;
+      }
+    }
+    
+    // Get the mdnode from the asm if it exists and add it to the instruction.
+    SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+    const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+    if (MD)
+      MI->addOperand(MachineOperand::CreateMetadata(MD));
+    
+    MBB->insert(InsertPos, MI);
+    break;
+  }
+  }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+                           MachineBasicBlock::iterator insertpos)
+  : MF(mbb->getParent()),
+    MRI(&MF->getRegInfo()),
+    TM(&MF->getTarget()),
+    TII(TM->getInstrInfo()),
+    TRI(TM->getRegisterInfo()),
+    TLI(TM->getTargetLowering()),
+    MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 000000000000..19fc0445b166
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,142 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class MCInstrDesc;
+class SDDbgValue;
+
+class InstrEmitter {
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+  const TargetMachine *TM;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+
+  MachineBasicBlock *MBB;
+  MachineBasicBlock::iterator InsertPos;
+
+  /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+  /// implicit physical register output.
+  void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+                       bool IsClone, bool IsCloned,
+                       unsigned SrcReg,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getDstOfCopyToRegUse - If the only use of the specified result number of
+  /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+  unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                    unsigned ResNo) const;
+
+  void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                              const MCInstrDesc &II,
+                              bool IsClone, bool IsCloned,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getVR - Return the virtual register corresponding to the specified result
+  /// of the specified node.
+  unsigned getVR(SDValue Op,
+                 DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddRegisterOperand - Add the specified register as an operand to the
+  /// specified machine instr. Insert register copies if the register is
+  /// not in the required register class.
+  void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                          unsigned IIOpNum,
+                          const MCInstrDesc *II,
+                          DenseMap<SDValue, unsigned> &VRBaseMap,
+                          bool IsDebug, bool IsClone, bool IsCloned);
+
+  /// AddOperand - Add the specified operand to the specified machine instr.  II
+  /// specifies the instruction information for the node, and IIOpNum is the
+  /// operand number (in the II) that we are adding. IIOpNum and II are used for
+  /// assertions only.
+  void AddOperand(MachineInstr *MI, SDValue Op,
+                  unsigned IIOpNum,
+                  const MCInstrDesc *II,
+                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                  bool IsDebug, bool IsClone, bool IsCloned);
+
+  /// EmitSubregNode - Generate machine code for subreg nodes.
+  ///
+  void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+                      bool IsClone, bool IsCloned);
+
+  /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+  /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+  /// register is constrained to be in a particular register class.
+  ///
+  void EmitCopyToRegClassNode(SDNode *Node,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+  ///
+  void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+                       bool IsClone, bool IsCloned);
+public:
+  /// CountResults - The results of target nodes have register or immediate
+  /// operands first, then an optional chain, and optional flag operands
+  /// (which do not go into the machine instrs.)
+  static unsigned CountResults(SDNode *Node);
+
+  /// CountOperands - The inputs to target nodes have any actual inputs first,
+  /// followed by an optional chain operand, then flag operands.  Compute
+  /// the number of actual operands that will go into the resulting
+  /// MachineInstr.
+  static unsigned CountOperands(SDNode *Node);
+
+  /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+  ///
+  MachineInstr *EmitDbgValue(SDDbgValue *SD,
+                             DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitNode - Generate machine code for a node and needed dependencies.
+  ///
+  void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
+    if (Node->isMachineOpcode())
+      EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+    else
+      EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+  }
+
+  /// getBlock - Return the current basic block.
+  MachineBasicBlock *getBlock() { return MBB; }
+
+  /// getInsertPos - Return the current insertion position.
+  MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+  /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+  /// at the given position in the given block.
+  InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+  
+private:
+  void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+  void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 000000000000..d06e2bdce065
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3893 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it.  This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing.  For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class SelectionDAGLegalize {
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
+  SelectionDAG &DAG;
+
+  // Libcall insertion helpers.
+
+  /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
+  /// legalized.  We use this to ensure that calls are properly serialized
+  /// against each other, including inserted libcalls.
+  SmallVector<SDValue, 8> LastCALLSEQ;
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDValue, SDValue> LegalizedNodes;
+
+  void AddLegalizedOperand(SDValue From, SDValue To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+
+    // Transfer SDDbgValues.
+    DAG.TransferDbgValues(From, To);
+  }
+
+public:
+  explicit SelectionDAGLegalize(SelectionDAG &DAG);
+
+  void LegalizeDAG();
+
+private:
+  /// LegalizeOp - Return a legal replacement for the given operation, with
+  /// all legal operands.
+  SDValue LegalizeOp(SDValue O);
+
+  SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+  /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+  /// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
+  /// is necessary to spill the vector being inserted into to memory, perform
+  /// the insert there, and then read the result back.
+  SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+                                         SDValue Idx, DebugLoc dl);
+  SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+                                  SDValue Idx, DebugLoc dl);
+
+  /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+  /// performs the same shuffe in terms of order or result bytes, but on a type
+  /// whose vector element type is narrower than the original shuffle type.
+  /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+  SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+                                     SDValue N1, SDValue N2,
+                                     SmallVectorImpl<int> &Mask) const;
+
+  bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                    SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+  void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+                             DebugLoc dl);
+
+  SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+  SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+                        unsigned NumOps, bool isSigned, DebugLoc dl);
+
+  std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+                                                 SDNode *Node, bool isSigned);
+  SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+                          RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+                          RTLIB::Libcall Call_PPCF128);
+  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+                           RTLIB::Libcall Call_I8,
+                           RTLIB::Libcall Call_I16,
+                           RTLIB::Libcall Call_I32,
+                           RTLIB::Libcall Call_I64,
+                           RTLIB::Libcall Call_I128);
+  void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+  SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
+  SDValue ExpandBUILD_VECTOR(SDNode *Node);
+  SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+  void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+                                SmallVectorImpl<SDValue> &Results);
+  SDValue ExpandFCOPYSIGN(SDNode *Node);
+  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+                               DebugLoc dl);
+  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+                                DebugLoc dl);
+  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+                                DebugLoc dl);
+
+  SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+  SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+  SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+  SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+  std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+  void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+  SDValue getLastCALLSEQ() { return LastCALLSEQ.back();  }
+  void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
+  void pushLastCALLSEQ(SDValue s) {
+    LastCALLSEQ.push_back(s);
+  }
+  void popLastCALLSEQ() {
+    LastCALLSEQ.pop_back();
+  }
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
+                                                 SDValue N1, SDValue N2,
+                                             SmallVectorImpl<int> &Mask) const {
+  unsigned NumMaskElts = VT.getVectorNumElements();
+  unsigned NumDestElts = NVT.getVectorNumElements();
+  unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+  assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+  if (NumEltsGrowth == 1)
+    return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+  SmallVector<int, 8> NewMask;
+  for (unsigned i = 0; i != NumMaskElts; ++i) {
+    int Idx = Mask[i];
+    for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+      if (Idx < 0)
+        NewMask.push_back(-1);
+      else
+        NewMask.push_back(Idx * NumEltsGrowth + j);
+    }
+  }
+  assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+  assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+  return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
+  : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+    DAG(dag) {
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+  pushLastCALLSEQ(DAG.getEntryNode());
+
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  DAG.AssignTopologicalOrder();
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+    LegalizeOp(SDValue(I, 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDValue OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  LegalizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+  int next_depth = depth;
+  if (Node->getOpcode() == ISD::CALLSEQ_START)
+    next_depth = depth + 1;
+  if (Node->getOpcode() == ISD::CALLSEQ_END) {
+    assert(depth > 0 && "negative depth!");
+    if (depth == 1)
+      return Node;
+    else
+      next_depth = depth - 1;
+  }
+  if (Node->use_empty())
+    return 0;   // No CallSeqEnd
+
+  // The chain is usually at the end.
+  SDValue TheChain(Node, Node->getNumValues()-1);
+  if (TheChain.getValueType() != MVT::Other) {
+    // Sometimes it's at the beginning.
+    TheChain = SDValue(Node, 0);
+    if (TheChain.getValueType() != MVT::Other) {
+      // Otherwise, hunt for it.
+      for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+        if (Node->getValueType(i) == MVT::Other) {
+          TheChain = SDValue(Node, i);
+          break;
+        }
+
+      // Otherwise, we walked into a node without a chain.
+      if (TheChain.getValueType() != MVT::Other)
+        return 0;
+    }
+  }
+
+  for (SDNode::use_iterator UI = Node->use_begin(),
+       E = Node->use_end(); UI != E; ++UI) {
+
+    // Make sure to only follow users of our token chain.
+    SDNode *User = *UI;
+    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+      if (User->getOperand(i) == TheChain)
+        if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
+          return Result;
+  }
+  return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+  int nested = 0;
+  assert(Node && "Didn't find callseq_start for a call??");
+  while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+    Node = Node->getOperand(0).getNode();
+    assert(Node->getOperand(0).getValueType() == MVT::Other &&
+           "Node doesn't have a token chain argument!");
+    switch (Node->getOpcode()) {
+    default:
+      break;
+    case ISD::CALLSEQ_START:
+      if (!nested)
+        return Node;
+      Node = Node->getOperand(0).getNode();
+      nested--;
+      break;
+    case ISD::CALLSEQ_END:
+      nested++;
+      break;
+    }
+  }
+  return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0;
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest.  If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo.  This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+  if (N == Dest) return true;  // N certainly leads to Dest :)
+
+  // If we've already processed this node and it does lead to Dest, there is no
+  // need to reprocess it.
+  if (NodesLeadingTo.count(N)) return true;
+
+  // If the first result of this node has been already legalized, then it cannot
+  // reach N.
+  if (LegalizedNodes.count(SDValue(N, 0))) return false;
+
+  // Okay, this node has not already been legalized.  Check and legalize all
+  // operands.  If none lead to Dest, then we can legalize this node.
+  bool OperandsLeadToDest = false;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    OperandsLeadToDest |=     // If an operand leads to Dest, so do we.
+      LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
+                                   NodesLeadingTo);
+
+  if (OperandsLeadToDest) {
+    NodesLeadingTo.insert(N);
+    return true;
+  }
+
+  // Okay, this node looks safe, legalize it and return false.
+  LegalizeOp(SDValue(N, 0));
+  return false;
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+                                SelectionDAG &DAG, const TargetLowering &TLI) {
+  bool Extend = false;
+  DebugLoc dl = CFP->getDebugLoc();
+
+  // If a FP immediate is precise when represented as a float and if the
+  // target can do an extending load from float to double, we put it into
+  // the constant pool as a float, even if it's is statically typed as a
+  // double.  This shrinks FP constants and canonicalizes them for targets where
+  // an FP extending load is the same cost as a normal load (such as on the x87
+  // fp stack or PPC FP unit).
+  EVT VT = CFP->getValueType(0);
+  ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+  if (!UseCP) {
+    assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+    return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+                           (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+  }
+
+  EVT OrigVT = VT;
+  EVT SVT = VT;
+  while (SVT != MVT::f32) {
+    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+    if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
+        // Only do this if the target has a native EXTLOAD instruction from
+        // smaller type.
+        TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+        TLI.ShouldShrinkFPConstant(OrigVT)) {
+      const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+      LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+      VT = SVT;
+      Extend = true;
+    }
+  }
+
+  SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  if (Extend)
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+                          DAG.getEntryNode(),
+                          CPIdx, MachinePointerInfo::getConstantPool(),
+                          VT, false, false, Alignment);
+  return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+                     MachinePointerInfo::getConstantPool(), false, false,
+                     Alignment);
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static
+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+                             const TargetLowering &TLI) {
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+  SDValue Val = ST->getValue();
+  EVT VT = Val.getValueType();
+  int Alignment = ST->getAlignment();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ST->getMemoryVT().isFloatingPoint() ||
+      ST->getMemoryVT().isVector()) {
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (TLI.isTypeLegal(intVT)) {
+      // Expand to a bitconvert of the value to the integer type of the
+      // same size, then a (misaligned) int store.
+      // FIXME: Does not handle truncating floating point stores!
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+      return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+                          ST->isVolatile(), ST->isNonTemporal(), Alignment);
+    }
+    // Do a (aligned) store to a stack slot, then copy from the stack slot
+    // to the final destination using (unaligned) integer loads and stores.
+    EVT StoredVT = ST->getMemoryVT();
+    EVT RegVT =
+      TLI.getRegisterType(*DAG.getContext(),
+                          EVT::getIntegerVT(*DAG.getContext(),
+                                            StoredVT.getSizeInBits()));
+    unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+    // Perform the original store, only redirected to the stack slot.
+    SDValue Store = DAG.getTruncStore(Chain, dl,
+                                      Val, StackPtr, MachinePointerInfo(),
+                                      StoredVT, false, false, 0);
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    unsigned Offset = 0;
+
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the stack slot.
+      SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+                                 MachinePointerInfo(),
+                                 false, false, 0);
+      // Store it to the final location.  Remember the store.
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+                                  ST->getPointerInfo().getWithOffset(Offset),
+                                    ST->isVolatile(), ST->isNonTemporal(),
+                                    MinAlign(ST->getAlignment(), Offset)));
+      // Increment the pointers.
+      Offset += RegBytes;
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+    }
+
+    // The last store may be partial.  Do a truncating store.  On big-endian
+    // machines this requires an extending load from the stack slot to ensure
+    // that the bits are in the right place.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (StoredBytes - Offset));
+
+    // Load from the stack slot.
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                  MachinePointerInfo(),
+                                  MemVT, false, false, 0);
+
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+                                       ST->getPointerInfo()
+                                         .getWithOffset(Offset),
+                                       MemVT, ST->isVolatile(),
+                                       ST->isNonTemporal(),
+                                       MinAlign(ST->getAlignment(), Offset)));
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                       Stores.size());
+  }
+  assert(ST->getMemoryVT().isInteger() &&
+         !ST->getMemoryVT().isVector() &&
+         "Unaligned store of unknown type.");
+  // Get the half-size VT
+  EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+  int NumBits = NewStoredVT.getSizeInBits();
+  int IncrementSize = NumBits / 8;
+
+  // Divide the stored value in two parts.
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                      TLI.getShiftAmountTy(Val.getValueType()));
+  SDValue Lo = Val;
+  SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+  // Store the two parts
+  SDValue Store1, Store2;
+  Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+                             ST->getPointerInfo(), NewStoredVT,
+                             ST->isVolatile(), ST->isNonTemporal(), Alignment);
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+  Alignment = MinAlign(Alignment, IncrementSize);
+  Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
+                             NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+                             Alignment);
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static
+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+                            const TargetLowering &TLI) {
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  EVT VT = LD->getValueType(0);
+  EVT LoadedVT = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  if (VT.isFloatingPoint() || VT.isVector()) {
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+    if (TLI.isTypeLegal(intVT)) {
+      // Expand to a (misaligned) integer load of the same size,
+      // then bitconvert to floating point or vector.
+      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+                                    LD->isVolatile(),
+                                    LD->isNonTemporal(), LD->getAlignment());
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+      if (VT.isFloatingPoint() && LoadedVT != VT)
+        Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+
+      SDValue Ops[] = { Result, Chain };
+      return DAG.getMergeValues(Ops, 2, dl);
+    }
+
+    // Copy the value to a (aligned) stack slot using (unaligned) integer
+    // loads and stores, then do a (aligned) load from the stack slot.
+    EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+    unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    SDValue StackPtr = StackBase;
+    unsigned Offset = 0;
+
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the original location.
+      SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+                                 LD->getPointerInfo().getWithOffset(Offset),
+                                 LD->isVolatile(), LD->isNonTemporal(),
+                                 MinAlign(LD->getAlignment(), Offset));
+      // Follow the load with a store to the stack slot.  Remember the store.
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+                                    MachinePointerInfo(), false, false, 0));
+      // Increment the pointers.
+      Offset += RegBytes;
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
+    }
+
+    // The last copy may be partial.  Do an extending load.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (LoadedBytes - Offset));
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+                                  LD->getPointerInfo().getWithOffset(Offset),
+                                  MemVT, LD->isVolatile(),
+                                  LD->isNonTemporal(),
+                                  MinAlign(LD->getAlignment(), Offset));
+    // Follow the load with a store to the stack slot.  Remember the store.
+    // On big-endian machines this requires a truncating store to ensure
+    // that the bits end up in the right place.
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+                                       MachinePointerInfo(), MemVT,
+                                       false, false, 0));
+
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                             Stores.size());
+
+    // Finally, perform the original load only redirected to the stack slot.
+    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+                          MachinePointerInfo(), LoadedVT, false, false, 0);
+
+    // Callers expect a MERGE_VALUES node.
+    SDValue Ops[] = { Load, TF };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+  assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+         "Unaligned load of unsupported type.");
+
+  // Compute the new VT that is half the size of the old one.  This is an
+  // integer MVT.
+  unsigned NumBits = LoadedVT.getSizeInBits();
+  EVT NewLoadedVT;
+  NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+  NumBits >>= 1;
+
+  unsigned Alignment = LD->getAlignment();
+  unsigned IncrementSize = NumBits / 8;
+  ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+  // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+  if (HiExtType == ISD::NON_EXTLOAD)
+    HiExtType = ISD::ZEXTLOAD;
+
+  // Load the value in two parts
+  SDValue Lo, Hi;
+  if (TLI.isLittleEndian()) {
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+  } else {
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+  }
+
+  // aggregate the two parts
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                       TLI.getShiftAmountTy(Hi.getValueType()));
+  SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+  Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                             Hi.getValue(1));
+
+  SDValue Ops[] = { Result, TF };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+                               DebugLoc dl) {
+  SDValue Tmp1 = Vec;
+  SDValue Tmp2 = Val;
+  SDValue Tmp3 = Idx;
+
+  // If the target doesn't support this, we have to spill the input vector
+  // to a temporary stack slot, update the element, then reload it.  This is
+  // badness.  We could also load the value into a vector register (either
+  // with a "move to register" or "extload into register" instruction, then
+  // permute it into place, if the idx is a constant and if the idx is
+  // supported by the target.
+  EVT VT    = Tmp1.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Tmp3.getValueType();
+  EVT PtrVT = TLI.getPointerTy();
+  SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+  // Store the vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+                            MachinePointerInfo::getFixedStack(SPFI),
+                            false, false, 0);
+
+  // Truncate or zero extend offset to target pointer type.
+  unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+  Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+  // Add the offset to the index.
+  unsigned EltSize = EltVT.getSizeInBits()/8;
+  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+  SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+  // Store the scalar value.
+  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
+                         false, false, 0);
+  // Load the updated vector.
+  return DAG.getLoad(VT, dl, Ch, StackPtr,
+                     MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+  if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+    // SCALAR_TO_VECTOR requires that the type of the value being inserted
+    // match the element type of the vector being created, except for
+    // integers in which case the inserted value can be over width.
+    EVT EltVT = Vec.getValueType().getVectorElementType();
+    if (Val.getValueType() == EltVT ||
+        (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+      SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                  Vec.getValueType(), Val);
+
+      unsigned NumElts = Vec.getValueType().getVectorNumElements();
+      // We generate a shuffle of InVec and ScVec, so the shuffle mask
+      // should be 0,1,2,3,4,5... with the appropriate element replaced with
+      // elt 0 of the RHS.
+      SmallVector<int, 8> ShufOps;
+      for (unsigned i = 0; i != NumElts; ++i)
+        ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+      return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+                                  &ShufOps[0]);
+    }
+  }
+  return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  // FIXME: We shouldn't do this for TargetConstantFP's.
+  // FIXME: move this to the DAG Combiner!  Note that we can't regress due
+  // to phase ordering between legalized code and the dag combiner.  This
+  // probably means that we need to integrate dag combiner and legalizer
+  // together.
+  // We generally can't do this one for long doubles.
+  SDValue Tmp1 = ST->getChain();
+  SDValue Tmp2 = ST->getBasePtr();
+  SDValue Tmp3;
+  unsigned Alignment = ST->getAlignment();
+  bool isVolatile = ST->isVolatile();
+  bool isNonTemporal = ST->isNonTemporal();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+    if (CFP->getValueType(0) == MVT::f32 &&
+        TLI.isTypeLegal(MVT::i32)) {
+      Tmp3 = DAG.getConstant(CFP->getValueAPF().
+                                      bitcastToAPInt().zextOrTrunc(32),
+                              MVT::i32);
+      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                          isVolatile, isNonTemporal, Alignment);
+    }
+
+    if (CFP->getValueType(0) == MVT::f64) {
+      // If this target supports 64-bit registers, do a single 64-bit store.
+      if (TLI.isTypeLegal(MVT::i64)) {
+        Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                  zextOrTrunc(64), MVT::i64);
+        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                            isVolatile, isNonTemporal, Alignment);
+      }
+
+      if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
+        // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+        // stores.  If the target supports neither 32- nor 64-bits, this
+        // xform is certainly not worth it.
+        const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+        SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
+        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+        if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+                          isNonTemporal, Alignment);
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                            DAG.getIntPtrConstant(4));
+        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+                          ST->getPointerInfo().getWithOffset(4),
+                          isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
+
+        return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      }
+    }
+  }
+  return SDValue(0, 0);
+}
+
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
+  if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+    return Op;
+
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+    assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+             TargetLowering::TypeLegal &&
+           "Unexpected illegal type!");
+
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+    assert((TLI.getTypeAction(*DAG.getContext(),
+                              Node->getOperand(i).getValueType()) ==
+              TargetLowering::TypeLegal ||
+            Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+           "Unexpected illegal type!");
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  SDValue Result = Op;
+  bool isCustom = false;
+
+  // Figure out the correct action; the way to query this varies by opcode
+  TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+  bool SimpleFinishLegalizing = true;
+  switch (Node->getOpcode()) {
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::VAARG:
+  case ISD::STACKSAVE:
+    Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::EXTRACT_VECTOR_ELT:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getOperand(0).getValueType());
+    break;
+  case ISD::FP_ROUND_INREG:
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+    break;
+  }
+  case ISD::SELECT_CC:
+  case ISD::SETCC:
+  case ISD::BR_CC: {
+    unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+                         Node->getOpcode() == ISD::SETCC ? 2 : 1;
+    unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+    EVT OpVT = Node->getOperand(CompareOperand).getValueType();
+    ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+    Action = TLI.getCondCodeAction(CCCode, OpVT);
+    if (Action == TargetLowering::Legal) {
+      if (Node->getOpcode() == ISD::SELECT_CC)
+        Action = TLI.getOperationAction(Node->getOpcode(),
+                                        Node->getValueType(0));
+      else
+        Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+    }
+    break;
+  }
+  case ISD::LOAD:
+  case ISD::STORE:
+    // FIXME: Model these properly.  LOAD and STORE are complicated, and
+    // STORE expects the unlegalized operand in some cases.
+    SimpleFinishLegalizing = false;
+    break;
+  case ISD::CALLSEQ_START:
+  case ISD::CALLSEQ_END:
+    // FIXME: This shouldn't be necessary.  These nodes have special properties
+    // dealing with the recursive nature of legalization.  Removing this
+    // special case should be done as part of making LegalizeDAG non-recursive.
+    SimpleFinishLegalizing = false;
+    break;
+  case ISD::EXTRACT_ELEMENT:
+  case ISD::FLT_ROUNDS_:
+  case ISD::SADDO:
+  case ISD::SSUBO:
+  case ISD::UADDO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+  case ISD::FPOWI:
+  case ISD::MERGE_VALUES:
+  case ISD::EH_RETURN:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+  case ISD::EH_SJLJ_SETJMP:
+  case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
+    // These operations lie about being legal: when they claim to be legal,
+    // they should actually be expanded.
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Legal)
+      Action = TargetLowering::Expand;
+    break;
+  case ISD::TRAMPOLINE:
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+    // These operations lie about being legal: when they claim to be legal,
+    // they should actually be custom-lowered.
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Legal)
+      Action = TargetLowering::Custom;
+    break;
+  case ISD::BUILD_VECTOR:
+    // A weird case: legalization for BUILD_VECTOR never legalizes the
+    // operands!
+    // FIXME: This really sucks... changing it isn't semantically incorrect,
+    // but it massively pessimizes the code for floating-point BUILD_VECTORs
+    // because ConstantFP operands get legalized into constant pool loads
+    // before the BUILD_VECTOR code can see them.  It doesn't usually bite,
+    // though, because BUILD_VECTORS usually get lowered into other nodes
+    // which get legalized properly.
+    SimpleFinishLegalizing = false;
+    break;
+  default:
+    if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+      Action = TargetLowering::Legal;
+    } else {
+      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    }
+    break;
+  }
+
+  if (SimpleFinishLegalizing) {
+    SmallVector<SDValue, 8> Ops, ResultVals;
+    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+    switch (Node->getOpcode()) {
+    default: break;
+    case ISD::BR:
+    case ISD::BRIND:
+    case ISD::BR_JT:
+    case ISD::BR_CC:
+    case ISD::BRCOND:
+      assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+      // Branches tweak the chain to include LastCALLSEQ
+      Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
+                           getLastCALLSEQ());
+      Ops[0] = LegalizeOp(Ops[0]);
+      setLastCALLSEQ(DAG.getEntryNode());
+      break;
+    case ISD::SHL:
+    case ISD::SRL:
+    case ISD::SRA:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[1].getValueType().isVector())
+        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[1]));
+      break;
+    case ISD::SRL_PARTS:
+    case ISD::SRA_PARTS:
+    case ISD::SHL_PARTS:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[2].getValueType().isVector())
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[2]));
+      break;
+    }
+
+    Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
+                                            Ops.size()), 0);
+    switch (Action) {
+    case TargetLowering::Legal:
+      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+        ResultVals.push_back(Result.getValue(i));
+      break;
+    case TargetLowering::Custom:
+      // FIXME: The handling for custom lowering with multiple results is
+      // a complete mess.
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.getNode()) {
+        for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+          if (e == 1)
+            ResultVals.push_back(Tmp1);
+          else
+            ResultVals.push_back(Tmp1.getValue(i));
+        }
+        break;
+      }
+
+      // FALL THROUGH
+    case TargetLowering::Expand:
+      ExpandNode(Result.getNode(), ResultVals);
+      break;
+    case TargetLowering::Promote:
+      PromoteNode(Result.getNode(), ResultVals);
+      break;
+    }
+    if (!ResultVals.empty()) {
+      for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
+        if (ResultVals[i] != SDValue(Node, i))
+          ResultVals[i] = LegalizeOp(ResultVals[i]);
+        AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
+      }
+      return ResultVals[Op.getResNo()];
+    }
+  }
+
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "NODE: ";
+    Node->dump( &DAG);
+    dbgs() << "\n";
+#endif
+    assert(0 && "Do not know how to legalize this operator!");
+
+  case ISD::BUILD_VECTOR:
+    switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.getNode()) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = ExpandBUILD_VECTOR(Result.getNode());
+      break;
+    }
+    break;
+  case ISD::CALLSEQ_START: {
+    SDNode *CallEnd = FindCallEndFromCallStart(Node);
+    assert(CallEnd && "didn't find CALLSEQ_END!");
+
+    // Recursively Legalize all of the inputs of the call end that do not lead
+    // to this call start.  This ensures that any libcalls that need be inserted
+    // are inserted *before* the CALLSEQ_START.
+    {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+    for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+      LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
+                                   NodesLeadingTo);
+    }
+
+    // Now that we have legalized all of the inputs (which may have inserted
+    // libcalls), create the new CALLSEQ_START node.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    // Merge in the last call to ensure that this call starts after the last
+    // call ended.
+    if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
+      Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                         Tmp1, getLastCALLSEQ());
+      Tmp1 = LegalizeOp(Tmp1);
+    }
+
+    // Do not try to legalize the target-specific arguments (#1+).
+    if (Tmp1 != Node->getOperand(0)) {
+      SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+      Ops[0] = Tmp1;
+      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
+                                              Ops.size()), Result.getResNo());
+    }
+
+    // Remember that the CALLSEQ_START is legalized.
+    AddLegalizedOperand(Op.getValue(0), Result);
+    if (Node->getNumValues() == 2)    // If this has a flag result, remember it.
+      AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+    // Now that the callseq_start and all of the non-call nodes above this call
+    // sequence have been legalized, legalize the call itself.  During this
+    // process, no libcalls can/will be inserted, guaranteeing that no calls
+    // can overlap.
+    // Note that we are selecting this call!
+    setLastCALLSEQ(SDValue(CallEnd, 0));
+
+    // Legalize the call, starting from the CALLSEQ_END.
+    LegalizeOp(getLastCALLSEQ());
+    return Result;
+  }
+  case ISD::CALLSEQ_END:
+    {
+      SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
+
+      // If the CALLSEQ_START node hasn't been legalized first, legalize it.
+      // This will cause this node to be legalized as well as handling libcalls
+      // right.
+      if (getLastCALLSEQ().getNode() != Node) {
+        LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
+        DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+        assert(I != LegalizedNodes.end() &&
+               "Legalizing the call start should have legalized this node!");
+        return I->second;
+      }
+
+      pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
+    }
+
+    // Otherwise, the call start has been legalized and everything is going
+    // according to plan.  Just legalize ourselves normally here.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Do not try to legalize the target-specific arguments (#1+), except for
+    // an optional flag input.
+    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
+      if (Tmp1 != Node->getOperand(0)) {
+        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                &Ops[0], Ops.size()),
+                         Result.getResNo());
+      }
+    } else {
+      Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+      if (Tmp1 != Node->getOperand(0) ||
+          Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Ops.back() = Tmp2;
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                &Ops[0], Ops.size()),
+                         Result.getResNo());
+      }
+    }
+    // This finishes up call legalization.
+    popLastCALLSEQ();
+
+    // If the CALLSEQ_END node has a flag, remember that we legalized it.
+    AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
+    if (Node->getNumValues() == 2)
+      AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.getResNo());
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    Tmp1 = LegalizeOp(LD->getChain());   // Legalize the chain.
+    Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    if (ExtType == ISD::NON_EXTLOAD) {
+      EVT VT = Node->getValueType(0);
+      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                              Tmp1, Tmp2, LD->getOffset()),
+                       Result.getResNo());
+      Tmp3 = Result.getValue(0);
+      Tmp4 = Result.getValue(1);
+
+      switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal:
+        // If this is an unaligned load and the target doesn't support it,
+        // expand it.
+        if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+          const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+          unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+          if (LD->getAlignment() < ABIAlignment){
+            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+                                         DAG, TLI);
+            Tmp3 = Result.getOperand(0);
+            Tmp4 = Result.getOperand(1);
+            Tmp3 = LegalizeOp(Tmp3);
+            Tmp4 = LegalizeOp(Tmp4);
+          }
+        }
+        break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+        if (Tmp1.getNode()) {
+          Tmp3 = LegalizeOp(Tmp1);
+          Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        }
+        break;
+      case TargetLowering::Promote: {
+        // Only promote a load of vector type to another.
+        assert(VT.isVector() && "Cannot promote this load!");
+        // Change base type to a different vector type.
+        EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
+                           LD->isVolatile(), LD->isNonTemporal(),
+                           LD->getAlignment());
+        Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
+        Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        break;
+      }
+      }
+      // Since loads produce two values, make sure to remember that we
+      // legalized both of them.
+      AddLegalizedOperand(SDValue(Node, 0), Tmp3);
+      AddLegalizedOperand(SDValue(Node, 1), Tmp4);
+      return Op.getResNo() ? Tmp4 : Tmp3;
+    }
+
+    EVT SrcVT = LD->getMemoryVT();
+    unsigned SrcWidth = SrcVT.getSizeInBits();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+    bool isNonTemporal = LD->isNonTemporal();
+
+    if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+        // Some targets pretend to have an i1 loading operation, and actually
+        // load an i8.  This trick is correct for ZEXTLOAD because the top 7
+        // bits are guaranteed to be zero; it helps the optimizers understand
+        // that these bits are zero.  It is also useful for EXTLOAD, since it
+        // tells the optimizers that those bits are undefined.  It would be
+        // nice to have an effective generic way of getting these benefits...
+        // Until such a way is found, don't insist on promoting i1 here.
+        (SrcVT != MVT::i1 ||
+         TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+      // Promote to a byte-sized load if not loading an integral number of
+      // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+      unsigned NewWidth = SrcVT.getStoreSizeInBits();
+      EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+      SDValue Ch;
+
+      // The extra bits are guaranteed to be zero, since we stored them that
+      // way.  A zext load from NVT thus automatically gives zext from SrcVT.
+
+      ISD::LoadExtType NewExtType =
+        ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+                              Tmp1, Tmp2, LD->getPointerInfo(),
+                              NVT, isVolatile, isNonTemporal, Alignment);
+
+      Ch = Result.getValue(1); // The chain.
+
+      if (ExtType == ISD::SEXTLOAD)
+        // Having the top bits zero doesn't help when sign extending.
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                             Result.getValueType(),
+                             Result, DAG.getValueType(SrcVT));
+      else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+        // All the top bits are guaranteed to be zero - inform the optimizers.
+        Result = DAG.getNode(ISD::AssertZext, dl,
+                             Result.getValueType(), Result,
+                             DAG.getValueType(SrcVT));
+
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else if (SrcWidth & (SrcWidth - 1)) {
+      // If not loading a power-of-2 number of bits, expand as two loads.
+      assert(!SrcVT.isVector() && "Unsupported extload!");
+      unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+      assert(RoundWidth < SrcWidth);
+      unsigned ExtraWidth = SrcWidth - RoundWidth;
+      assert(ExtraWidth < RoundWidth);
+      assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+             "Load size not an integral number of bytes!");
+      EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+      EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+      SDValue Lo, Hi, Ch;
+      unsigned IncrementSize;
+
+      if (TLI.isLittleEndian()) {
+        // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+        // Load the bottom RoundWidth bits.
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+                            Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(RoundWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      } else {
+        // Big endian - avoid unaligned loads.
+        // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+        // Load the top RoundWidth bits.
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+                            dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(ExtraWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      }
+
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else {
+      switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                Tmp1, Tmp2, LD->getOffset()),
+                         Result.getResNo());
+        Tmp1 = Result.getValue(0);
+        Tmp2 = Result.getValue(1);
+
+        if (isCustom) {
+          Tmp3 = TLI.LowerOperation(Result, DAG);
+          if (Tmp3.getNode()) {
+            Tmp1 = LegalizeOp(Tmp3);
+            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+          }
+        } else {
+          // If this is an unaligned load and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+            const Type *Ty =
+              LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment =
+              TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (LD->getAlignment() < ABIAlignment){
+              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+                                           DAG, TLI);
+              Tmp1 = Result.getOperand(0);
+              Tmp2 = Result.getOperand(1);
+              Tmp1 = LegalizeOp(Tmp1);
+              Tmp2 = LegalizeOp(Tmp2);
+            }
+          }
+        }
+        break;
+      case TargetLowering::Expand:
+        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+          SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+                                     LD->getPointerInfo(),
+                                     LD->isVolatile(), LD->isNonTemporal(),
+                                     LD->getAlignment());
+          unsigned ExtendOp;
+          switch (ExtType) {
+          case ISD::EXTLOAD:
+            ExtendOp = (SrcVT.isFloatingPoint() ?
+                        ISD::FP_EXTEND : ISD::ANY_EXTEND);
+            break;
+          case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+          case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+          default: llvm_unreachable("Unexpected extend load type!");
+          }
+          Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Load.getValue(1));
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // legal, then scalarize it.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+          TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  Node->getValueType(0).getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+
+            LoadVals.push_back(ScalarLoad.getValue(0));
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // illegal, create the promoted vector from bitcasted segments.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+          EVT MemElemTy = Node->getValueType(0).getScalarType();
+          EVT SrcSclrTy = SrcVT.getScalarType();
+          unsigned SizeRatio =
+            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  SrcVT.getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+            if (TLI.isBigEndian()) {
+              // MSB (which is garbage, comes first)
+              LoadVals.push_back(ScalarLoad.getValue(0));
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+            } else {
+              // LSB (which is data, comes first)
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+              LoadVals.push_back(ScalarLoad.getValue(0));
+            }
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+            SrcVT.getScalarType(), NumElem*SizeRatio);
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
+            TempWideVector, &LoadVals[0], LoadVals.size());
+
+          // Cast to the correct type
+          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+
+        }
+
+        // FIXME: This does not work for vectors on most targets.  Sign- and
+        // zero-extend operations are currently folded into extending loads,
+        // whether they are legal or not, and then we end up here without any
+        // support for legalizing them.
+        assert(ExtType != ISD::EXTLOAD &&
+               "EXTLOAD should always be supported!");
+        // Turn the unsupported load into an EXTLOAD followed by an explicit
+        // zero/sign extend inreg.
+        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+                                Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+                                LD->isVolatile(), LD->isNonTemporal(),
+                                LD->getAlignment());
+        SDValue ValRes;
+        if (ExtType == ISD::SEXTLOAD)
+          ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                               Result.getValueType(),
+                               Result, DAG.getValueType(SrcVT));
+        else
+          ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        break;
+      }
+    }
+
+    // Since loads produce two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+    AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+    return Op.getResNo() ? Tmp2 : Tmp1;
+  }
+  case ISD::STORE: {
+    StoreSDNode *ST = cast<StoreSDNode>(Node);
+    Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
+    Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
+    unsigned Alignment = ST->getAlignment();
+    bool isVolatile = ST->isVolatile();
+    bool isNonTemporal = ST->isNonTemporal();
+
+    if (!ST->isTruncatingStore()) {
+      if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+        Result = SDValue(OptStore, 0);
+        break;
+      }
+
+      {
+        Tmp3 = LegalizeOp(ST->getValue());
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                Tmp1, Tmp3, Tmp2,
+                                                ST->getOffset()),
+                         Result.getResNo());
+
+        EVT VT = Tmp3.getValueType();
+        switch (TLI.getOperationAction(ISD::STORE, VT)) {
+        default: assert(0 && "This action is not supported yet!");
+        case TargetLowering::Legal:
+          // If this is an unaligned store and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (ST->getAlignment() < ABIAlignment)
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
+          }
+          break;
+        case TargetLowering::Custom:
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.getNode()) Result = Tmp1;
+          break;
+        case TargetLowering::Promote:
+          assert(VT.isVector() && "Unknown legal promote case!");
+          Tmp3 = DAG.getNode(ISD::BITCAST, dl,
+                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+                                ST->getPointerInfo(), isVolatile,
+                                isNonTemporal, Alignment);
+          break;
+        }
+        break;
+      }
+    } else {
+      Tmp3 = LegalizeOp(ST->getValue());
+
+      EVT StVT = ST->getMemoryVT();
+      unsigned StWidth = StVT.getSizeInBits();
+
+      if (StWidth != StVT.getStoreSizeInBits()) {
+        // Promote to a byte-sized store with upper bits zero if not
+        // storing an integral number of bytes.  For example, promote
+        // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    StVT.getStoreSizeInBits());
+        Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
+        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                   NVT, isVolatile, isNonTemporal, Alignment);
+      } else if (StWidth & (StWidth - 1)) {
+        // If not storing a power-of-2 number of bits, expand as two stores.
+        assert(!StVT.isVector() && "Unsupported truncstore!");
+        unsigned RoundWidth = 1 << Log2_32(StWidth);
+        assert(RoundWidth < StWidth);
+        unsigned ExtraWidth = StWidth - RoundWidth;
+        assert(ExtraWidth < RoundWidth);
+        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+               "Store size not an integral number of bytes!");
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+        SDValue Lo, Hi;
+        unsigned IncrementSize;
+
+        if (TLI.isLittleEndian()) {
+          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+          // Store the bottom RoundWidth bits.
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                 RoundVT,
+                                 isVolatile, isNonTemporal, Alignment);
+
+          // Store the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(RoundWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
+                                 MinAlign(Alignment, IncrementSize));
+        } else {
+          // Big endian - avoid unaligned stores.
+          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+          // Store the top RoundWidth bits.
+          Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(ExtraWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+                                 RoundVT, isVolatile, isNonTemporal, Alignment);
+
+          // Store the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+                              ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
+                                 MinAlign(Alignment, IncrementSize));
+        }
+
+        // The order of the stores doesn't matter.
+        Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      } else {
+        if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+            Tmp2 != ST->getBasePtr())
+          Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                  Tmp1, Tmp3, Tmp2,
+                                                  ST->getOffset()),
+                           Result.getResNo());
+
+        switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+        default: assert(0 && "This action is not supported yet!");
+        case TargetLowering::Legal:
+          // If this is an unaligned store and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (ST->getAlignment() < ABIAlignment)
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
+          }
+          break;
+        case TargetLowering::Custom:
+          Result = TLI.LowerOperation(Result, DAG);
+          break;
+        case TargetLowering::Expand:
+
+          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+          EVT NarrowScalarVT = StVT.getScalarType();
+
+          // The Store type is illegal, must scalarize the vector store.
+          SmallVector<SDValue, 8> Stores;
+          bool ScalarLegal = TLI.isTypeLegal(WideScalarVT);
+          if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+            unsigned NumElem = StVT.getVectorNumElements();
+
+            unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+            // Round odd types to the next pow of two.
+            if (!isPowerOf2_32(ScalarSize))
+              ScalarSize = NextPowerOf2(ScalarSize);
+            // Types smaller than 8 bits are promoted to 8 bits.
+            ScalarSize = std::max<unsigned>(ScalarSize, 8);
+            // Store stride
+            unsigned Stride = ScalarSize/8;
+            assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+            for (unsigned Idx=0; Idx<NumElem; Idx++) {
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+              EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+              Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+              SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                           ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                           isVolatile, isNonTemporal, Alignment);
+              Stores.push_back(Store);
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+          // The Store type is illegal, must scalarize the vector store.
+          // However, the scalar type is illegal. Must bitcast the result
+          // and store it in smaller parts.
+          if (!TLI.isTypeLegal(StVT) && StVT.isVector()) {
+            unsigned WideNumElem = StVT.getVectorNumElements();
+            unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+            unsigned SizeRatio =
+              (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+            EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+                                               SizeRatio*WideNumElem);
+
+            // Cast the wide elem vector to wider vec with smaller elem type.
+            // Example <2 x i64> -> <4 x i32>
+            Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+            for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+              // Extract elment i
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+              // bump pointer.
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+
+              // Store if, this element is:
+              //  - First element on big endian, or
+              //  - Last element on little endian
+              if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+                  ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+                SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                             ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                             isVolatile, isNonTemporal, Alignment);
+                Stores.push_back(Store);
+              }
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+
+          // TRUNCSTORE:i16 i32 -> STORE i16
+          assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
+          Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                isVolatile, isNonTemporal, Alignment);
+          break;
+        }
+      }
+    }
+    break;
+  }
+  }
+  assert(Result.getValueType() == Op.getValueType() &&
+         "Bad legalization!");
+
+  // Make sure that the generated code is itself legal.
+  if (Result != Op)
+    Result = LegalizeOp(Result);
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+  SDValue Vec = Op.getOperand(0);
+  SDValue Idx = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+  else
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+  if (Op.getValueType().isVector())
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
+                       false, false, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+                        MachinePointerInfo(),
+                        Vec.getValueType().getVectorElementType(),
+                        false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+  SDValue Vec  = Op.getOperand(0);
+  SDValue Part = Op.getOperand(1);
+  SDValue Idx  = Op.getOperand(2);
+  DebugLoc dl  = Op.getDebugLoc();
+
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // First store the whole vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+                            false, false, 0);
+
+  // Then store the inserted part.
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+  else
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                                    StackPtr);
+
+  // Store the subvector.
+  Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+                    MachinePointerInfo(), false, false, 0);
+
+  // Finally, load the updated vector.
+  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+                     false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+  // We can't handle this case efficiently.  Allocate a sufficiently
+  // aligned object on the stack, store each element into it, then load
+  // the result as a vector.
+  // Create the stack frame object.
+  EVT VT = Node->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue FIPtr = DAG.CreateStackTemporary(VT);
+  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // Emit a store of each element to the stack slot.
+  SmallVector<SDValue, 8> Stores;
+  unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+  // Store (in the right endianness) the elements to memory.
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    unsigned Offset = TypeByteSize*i;
+
+    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+    // If the destination vector element type is narrower than the source
+    // element type, only store the bits necessary.
+    if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+      Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+                                         Node->getOperand(i), Idx,
+                                         PtrInfo.getWithOffset(Offset),
+                                         EltVT, false, false, 0));
+    } else
+      Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+                                    Node->getOperand(i), Idx,
+                                    PtrInfo.getWithOffset(Offset),
+                                    false, false, 0));
+  }
+
+  SDValue StoreChain;
+  if (!Stores.empty())    // Not all undef elements?
+    StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                             &Stores[0], Stores.size());
+  else
+    StoreChain = DAG.getEntryNode();
+
+  // Result is a load from the stack slot.
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1 = Node->getOperand(0);
+  SDValue Tmp2 = Node->getOperand(1);
+
+  // Get the sign bit of the RHS.  First obtain a value that has the same
+  // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
+  SDValue SignBit;
+  EVT FloatVT = Tmp2.getValueType();
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+  if (TLI.isTypeLegal(IVT)) {
+    // Convert to an integer with the same sign bit.
+    SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+  } else {
+    // Store the float to memory, then load the sign part out as an integer.
+    MVT LoadTy = TLI.getPointerTy();
+    // First create a temporary that is aligned for both the load and store.
+    SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+    // Then store the float to it.
+    SDValue Ch =
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
+                   false, false, 0);
+    if (TLI.isBigEndian()) {
+      assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+      // Load out a legal integer with the same sign bit as the float.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+                            false, false, 0);
+    } else { // Little endian
+      SDValue LoadPtr = StackPtr;
+      // The float may be wider than the integer we are going to load.  Advance
+      // the pointer so that the loaded integer will contain the sign bit.
+      unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+      unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
+                            LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+      // Load a legal integer containing the sign bit.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+                            false, false, 0);
+      // Move the sign bit to the top bit of the loaded integer.
+      unsigned BitShift = LoadTy.getSizeInBits() -
+        (FloatVT.getSizeInBits() - 8 * ByteOffset);
+      assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
+      if (BitShift)
+        SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
+                              DAG.getConstant(BitShift,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+    }
+  }
+  // Now get the sign bit proper, by seeing whether the value is negative.
+  SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+                         SignBit, DAG.getConstant(0, SignBit.getValueType()),
+                         ISD::SETLT);
+  // Get the absolute value of the result.
+  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+  // Select between the nabs and abs value based on the sign bit of
+  // the input.
+  return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+                     DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+                     AbsVal);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+                                           SmallVectorImpl<SDValue> &Results) {
+  unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+  assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+          " not tell us which reg is the stack pointer!");
+  DebugLoc dl = Node->getDebugLoc();
+  EVT VT = Node->getValueType(0);
+  SDValue Tmp1 = SDValue(Node, 0);
+  SDValue Tmp2 = SDValue(Node, 1);
+  SDValue Tmp3 = Node->getOperand(2);
+  SDValue Chain = Tmp1.getOperand(0);
+
+  // Chain the dynamic stack allocation so that it doesn't modify the stack
+  // pointer when other instructions are using the stack.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+  SDValue Size  = Tmp2.getOperand(1);
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+  Chain = SP.getValue(1);
+  unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+  if (Align > StackAlign)
+    SP = DAG.getNode(ISD::AND, dl, VT, SP,
+                      DAG.getConstant(-(uint64_t)Align, VT));
+  Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size);       // Value
+  Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1);     // Output chain
+
+  Tmp2 = DAG.getCALLSEQ_END(Chain,  DAG.getIntPtrConstant(0, true),
+                            DAG.getIntPtrConstant(0, true), SDValue());
+
+  Results.push_back(Tmp1);
+  Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+                                                 SDValue &LHS, SDValue &RHS,
+                                                 SDValue &CC,
+                                                 DebugLoc dl) {
+  EVT OpVT = LHS.getValueType();
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+  switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+  default: assert(0 && "Unknown condition code action!");
+  case TargetLowering::Legal:
+    // Nothing to do.
+    break;
+  case TargetLowering::Expand: {
+    ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+    unsigned Opc = 0;
+    switch (CCCode) {
+    default: assert(0 && "Don't know how to expand this condition!");
+    case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    // FIXME: Implement more expansions.
+    }
+
+    SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+    SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+    RHS = SDValue();
+    CC  = SDValue();
+    break;
+  }
+  }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack.  This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed.  It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+                                               EVT SlotVT,
+                                               EVT DestVT,
+                                               DebugLoc dl) {
+  // Create the stack frame object.
+  unsigned SrcAlign =
+    TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+                                              getTypeForEVT(*DAG.getContext()));
+  SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+  FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+  int SPFI = StackPtrFI->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+  unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+  unsigned SlotSize = SlotVT.getSizeInBits();
+  unsigned DestSize = DestVT.getSizeInBits();
+  const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+  unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
+
+  // Emit a store to the stack slot.  Use a truncstore if the input value is
+  // later than DestVT.
+  SDValue Store;
+
+  if (SrcSize > SlotSize)
+    Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+                              PtrInfo, SlotVT, false, false, SrcAlign);
+  else {
+    assert(SrcSize == SlotSize && "Invalid store");
+    Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+                         PtrInfo, false, false, SrcAlign);
+  }
+
+  // Result is a load from the stack slot.
+  if (SlotSize == DestSize)
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+                       false, false, DestAlign);
+
+  assert(SlotSize < DestSize && "Unknown extension!");
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+                        PtrInfo, SlotVT, false, false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  // Create a vector sized/aligned stack slot, store the value to element #0,
+  // then load the whole vector back out.
+  SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+  FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+  int SPFI = StackPtrFI->getIndex();
+
+  SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+                                 StackPtr,
+                                 MachinePointerInfo::getFixedStack(SPFI),
+                                 Node->getValueType(0).getVectorElementType(),
+                                 false, false, 0);
+  return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+                     MachinePointerInfo::getFixedStack(SPFI),
+                     false, false, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+  unsigned NumElems = Node->getNumOperands();
+  SDValue Value1, Value2;
+  DebugLoc dl = Node->getDebugLoc();
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
+
+  // If the only non-undef value is the low element, turn this into a
+  // SCALAR_TO_VECTOR node.  If this is { X, X, X, X }, determine X.
+  bool isOnlyLowElement = true;
+  bool MoreThanTwoValues = false;
+  bool isConstant = true;
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue V = Node->getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    if (i > 0)
+      isOnlyLowElement = false;
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+      isConstant = false;
+
+    if (!Value1.getNode()) {
+      Value1 = V;
+    } else if (!Value2.getNode()) {
+      if (V != Value1)
+        Value2 = V;
+    } else if (V != Value1 && V != Value2) {
+      MoreThanTwoValues = true;
+    }
+  }
+
+  if (!Value1.getNode())
+    return DAG.getUNDEF(VT);
+
+  if (isOnlyLowElement)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+  // If all elements are constants, create a load from the constant pool.
+  if (isConstant) {
+    std::vector<Constant*> CV;
+    for (unsigned i = 0, e = NumElems; i != e; ++i) {
+      if (ConstantFPSDNode *V =
+          dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+        CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+      } else if (ConstantSDNode *V =
+                 dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+        if (OpVT==EltVT)
+          CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+        else {
+          // If OpVT and EltVT don't match, EltVT is not legal and the
+          // element values have been promoted/truncated earlier.  Undo this;
+          // we don't want a v16i8 to become a v16i32 for example.
+          const ConstantInt *CI = V->getConstantIntValue();
+          CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+                                        CI->getZExtValue()));
+        }
+      } else {
+        assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+        const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+        CV.push_back(UndefValue::get(OpNTy));
+      }
+    }
+    Constant *CP = ConstantVector::get(CV);
+    SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                       MachinePointerInfo::getConstantPool(),
+                       false, false, Alignment);
+  }
+
+  if (!MoreThanTwoValues) {
+    SmallVector<int, 8> ShuffleVec(NumElems, -1);
+    for (unsigned i = 0; i < NumElems; ++i) {
+      SDValue V = Node->getOperand(i);
+      if (V.getOpcode() == ISD::UNDEF)
+        continue;
+      ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+    }
+    if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+      // Get the splatted value into the low element of a vector register.
+      SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+      SDValue Vec2;
+      if (Value2.getNode())
+        Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+      else
+        Vec2 = DAG.getUNDEF(VT);
+
+      // Return shuffle(LowValVec, undef, <0,0,0,0>)
+      return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+    }
+  }
+
+  // Otherwise, we can't handle this case efficiently.
+  return ExpandVectorBuildThroughStack(Node);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall.  If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument.  If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+                                            bool isSigned) {
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+  // isTailCall may be true since the callee does not reference caller stack
+  // frame. Check if it's in the right position.
+  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), isTailCall,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  if (!CallInfo.second.getNode())
+    // It's a tailcall, return the chain (which is the DAG root).
+    return DAG.getRoot();
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                            const SDValue *Ops, unsigned NumOps,
+                                            bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+  TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                  false, 0, TLI.getLibcallCallingConv(LC), false,
+                  /*isReturnValueUsed=*/true,
+                  Callee, Args, DAG, dl);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+
+  return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+                                         SDNode *Node,
+                                         bool isSigned) {
+  SDValue InChain = Node->getOperand(0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+                                              RTLIB::Libcall Call_F32,
+                                              RTLIB::Libcall Call_F64,
+                                              RTLIB::Libcall Call_F80,
+                                              RTLIB::Libcall Call_PPCF128) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::f32: LC = Call_F32; break;
+  case MVT::f64: LC = Call_F64; break;
+  case MVT::f80: LC = Call_F80; break;
+  case MVT::ppcf128: LC = Call_PPCF128; break;
+  }
+  return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+                                               RTLIB::Libcall Call_I8,
+                                               RTLIB::Libcall Call_I16,
+                                               RTLIB::Libcall Call_I32,
+                                               RTLIB::Libcall Call_I64,
+                                               RTLIB::Libcall Call_I128) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC = Call_I8; break;
+  case MVT::i16:  LC = Call_I16; break;
+  case MVT::i32:  LC = Call_I32; break;
+  case MVT::i64:  LC = Call_I64; break;
+  case MVT::i128: LC = Call_I128; break;
+  }
+  return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+                                     const TargetLowering &TLI) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  return TLI.getLibcallName(LC) != 0;
+}
+
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+  unsigned OtherOpcode = 0;
+  if (isSigned)
+    OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+  else
+    OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+  SDValue Op0 = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == Node)
+      continue;
+    if (User->getOpcode() == OtherOpcode &&
+        User->getOperand(0) == Op0 &&
+        User->getOperand(1) == Op1)
+      return true;
+  }
+  return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
+  unsigned Opcode = Node->getOpcode();
+  bool isSigned = Opcode == ISD::SDIVREM;
+
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+
+  EVT RetVT = Node->getValueType(0);
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the return address of the remainder.
+  SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+  Entry.Node = FIPtr;
+  Entry.Ty = RetTy->getPointerTo();
+  Entry.isSExt = isSigned;
+  Entry.isZExt = !isSigned;
+  Args.push_back(Entry);
+
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  DebugLoc dl = Node->getDebugLoc();
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+
+  // Remainder is loaded back from the stack frame.
+  SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
+                            MachinePointerInfo(), false, false, 0);
+  Results.push_back(CallInfo.first);
+  Results.push_back(Rem);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it.  At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+                                                   SDValue Op0,
+                                                   EVT DestVT,
+                                                   DebugLoc dl) {
+  if (Op0.getValueType() == MVT::i32) {
+    // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+    // Get the stack frame index of a 8 byte buffer.
+    SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+    // word offset constant for Hi/Lo address computation
+    SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+    // set up Hi and Lo (into buffer) address based on endian
+    SDValue Hi = StackSlot;
+    SDValue Lo = DAG.getNode(ISD::ADD, dl,
+                             TLI.getPointerTy(), StackSlot, WordOff);
+    if (TLI.isLittleEndian())
+      std::swap(Hi, Lo);
+
+    // if signed map to unsigned space
+    SDValue Op0Mapped;
+    if (isSigned) {
+      // constant used to invert sign bit (signed to unsigned mapping)
+      SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+      Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+    } else {
+      Op0Mapped = Op0;
+    }
+    // store the lo of the constructed double - based on integer input
+    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+                                  Op0Mapped, Lo, MachinePointerInfo(),
+                                  false, false, 0);
+    // initial hi portion of constructed double
+    SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+    // store the hi of the constructed double - biased exponent
+    SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+    // load the constructed double
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+                               MachinePointerInfo(), false, false, 0);
+    // FP constant to bias correct the final result
+    SDValue Bias = DAG.getConstantFP(isSigned ?
+                                     BitsToDouble(0x4330000080000000ULL) :
+                                     BitsToDouble(0x4330000000000000ULL),
+                                     MVT::f64);
+    // subtract the bias
+    SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+    // final result
+    SDValue Result;
+    // handle final rounding
+    if (DestVT == MVT::f64) {
+      // do nothing
+      Result = Sub;
+    } else if (DestVT.bitsLT(MVT::f64)) {
+      Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+                           DAG.getIntPtrConstant(0));
+    } else if (DestVT.bitsGT(MVT::f64)) {
+      Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+    }
+    return Result;
+  }
+  assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+  // Code below here assumes !isSigned without checking again.
+
+  // Implementation of unsigned i64 to f64 following the algorithm in
+  // __floatundidf in compiler_rt. This implementation has the advantage
+  // of performing rounding correctly, both in the default rounding mode
+  // and in all alternate rounding modes.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+    SDValue TwoP52 =
+      DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+    SDValue TwoP84PlusTwoP52 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+    SDValue TwoP84 =
+      DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+    SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+    SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+                             DAG.getConstant(32, MVT::i64));
+    SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+    SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+    SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+    SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
+    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+                                TwoP84PlusTwoP52);
+    return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+  }
+
+  // Implementation of unsigned i64 to f32.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+    // For unsigned conversions, convert them to signed conversions using the
+    // algorithm from the x86_64 __floatundidf in compiler_rt.
+    if (!isSigned) {
+      SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+      SDValue ShiftConst =
+          DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+      SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+      SDValue AndConst = DAG.getConstant(1, MVT::i64);
+      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+      SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+      SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+      // TODO: This really should be implemented using a branch rather than a
+      // select.  We happen to get lucky and machinesink does the right
+      // thing most of the time.  This would be a good candidate for a
+      //pseudo-op, or, even better, for whole-function isel.
+      SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+      return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+    }
+
+    // Otherwise, implement the fully general conversion.
+
+    SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+         DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+         DAG.getConstant(UINT64_C(0x800), MVT::i64));
+    SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+         DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+    SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+                   And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+    SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+    SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+                   Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+                   ISD::SETUGE);
+    SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
+
+    SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+                             DAG.getConstant(32, SHVT));
+    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+    SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+    SDValue TwoP32 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+    SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+    SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+    SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+    SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+    return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+                       DAG.getIntPtrConstant(0));
+  }
+
+  SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+  SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+                                 Op0, DAG.getConstant(0, Op0.getValueType()),
+                                 ISD::SETLT);
+  SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+  SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+                                    SignSet, Four, Zero);
+
+  // If the sign bit of the integer is set, the large number will be treated
+  // as a negative number.  To counteract this, the dynamic code adds an
+  // offset depending on the data type.
+  uint64_t FF;
+  switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unsupported integer type!");
+  case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
+  case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
+  case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
+  case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
+  }
+  if (TLI.isLittleEndian()) FF <<= 32;
+  Constant *FudgeFactor = ConstantInt::get(
+                                       Type::getInt64Ty(*DAG.getContext()), FF);
+
+  SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+  Alignment = std::min(Alignment, 4u);
+  SDValue FudgeInReg;
+  if (DestVT == MVT::f32)
+    FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, Alignment);
+  else {
+    FudgeInReg =
+      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+                                DAG.getEntryNode(), CPIdx,
+                                MachinePointerInfo::getConstantPool(),
+                                MVT::f32, false, false, Alignment));
+  }
+
+  return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+                                                    EVT DestVT,
+                                                    bool isSigned,
+                                                    DebugLoc dl) {
+  // First step, figure out the appropriate *INT_TO_FP operation to use.
+  EVT NewInTy = LegalOp.getValueType();
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+    assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+    // If the target supports SINT_TO_FP of this type, use it.
+    if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+      OpToUse = ISD::SINT_TO_FP;
+      break;
+    }
+    if (isSigned) continue;
+
+    // If the target supports UINT_TO_FP of this type, use it.
+    if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+      OpToUse = ISD::UINT_TO_FP;
+      break;
+    }
+
+    // Otherwise, try a larger type.
+  }
+
+  // Okay, we found the operation and type to use.  Zero extend our input to the
+  // desired type then run the operation on it.
+  return DAG.getNode(OpToUse, dl, DestVT,
+                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                 dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+                                                    EVT DestVT,
+                                                    bool isSigned,
+                                                    DebugLoc dl) {
+  // First step, figure out the appropriate FP_TO*INT operation to use.
+  EVT NewOutTy = DestVT;
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+    assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+      OpToUse = ISD::FP_TO_SINT;
+      break;
+    }
+
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+      OpToUse = ISD::FP_TO_UINT;
+      break;
+    }
+
+    // Otherwise, try a larger type.
+  }
+
+
+  // Okay, we found the operation and type to use.
+  SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+  // Truncate the result of the extended FP_TO_*INT operation to the desired
+  // size.
+  return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+  EVT VT = Op.getValueType();
+  EVT SHVT = TLI.getShiftAmountTy(VT);
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unhandled Expand type in BSWAP!");
+  case MVT::i16:
+    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  case MVT::i32:
+    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+    return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+  case MVT::i64:
+    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+    Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+    return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+  }
+}
+
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+                                             DebugLoc dl) {
+  switch (Opc) {
+  default: assert(0 && "Cannot expand this yet!");
+  case ISD::CTPOP: {
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
+    unsigned Len = VT.getSizeInBits();
+
+    assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+           "CTPOP not implemented for this type.");
+
+    // This is the "best" algorithm from
+    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+    SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+    SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+    SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+    SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+    // v = v - ((v >> 1) & 0x55555555...)
+    Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(1, ShVT)),
+                                 Mask55));
+    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+    Op = DAG.getNode(ISD::ADD, dl, VT,
+                     DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(2, ShVT)),
+                                 Mask33));
+    // v = (v + (v >> 4)) & 0x0F0F0F0F...
+    Op = DAG.getNode(ISD::AND, dl, VT,
+                     DAG.getNode(ISD::ADD, dl, VT, Op,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(4, ShVT))),
+                     Mask0F);
+    // v = (v * 0x01010101...) >> (Len - 8)
+    Op = DAG.getNode(ISD::SRL, dl, VT,
+                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+                     DAG.getConstant(Len - 8, ShVT));
+
+    return Op;
+  }
+  case ISD::CTLZ: {
+    // for now, we do this:
+    // x = x | (x >> 1);
+    // x = x | (x >> 2);
+    // ...
+    // x = x | (x >>16);
+    // x = x | (x >>32); // for 64-bit input
+    // return popcount(~x);
+    //
+    // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
+    unsigned len = VT.getSizeInBits();
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::OR, dl, VT, Op,
+                       DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+    }
+    Op = DAG.getNOT(dl, Op, VT);
+    return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+  }
+  case ISD::CTTZ: {
+    // for now, we use: { return popcount(~x & (x - 1)); }
+    // unless the target has ctlz but not ctpop, in which case we use:
+    // { return 32 - nlz(~x & (x-1)); }
+    // see also http://www.hackersdelight.org/HDcode/ntz.cc
+    EVT VT = Op.getValueType();
+    SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+                               DAG.getNOT(dl, Op, VT),
+                               DAG.getNode(ISD::SUB, dl, VT, Op,
+                                           DAG.getConstant(1, VT)));
+    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+    if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+      return DAG.getNode(ISD::SUB, dl, VT,
+                         DAG.getConstant(VT.getSizeInBits(), VT),
+                         DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+    return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+  }
+  }
+}
+
+std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
+  unsigned Opc = Node->getOpcode();
+  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+  RTLIB::Libcall LC;
+
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic intrinsic Expand!");
+    break;
+  case ISD::ATOMIC_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+    }
+    break;
+  case ISD::ATOMIC_CMP_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_ADD:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_SUB:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_AND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_OR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_XOR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_NAND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+    }
+    break;
+  }
+
+  return ExpandChainLibCall(LC, Node, false);
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node,
+                                      SmallVectorImpl<SDValue> &Results) {
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  switch (Node->getOpcode()) {
+  case ISD::CTPOP:
+  case ISD::CTLZ:
+  case ISD::CTTZ:
+    Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::BSWAP:
+    Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+    break;
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+    Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+    break;
+  case ISD::FLT_ROUNDS_:
+    Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+    break;
+  case ISD::EH_RETURN:
+  case ISD::EH_LABEL:
+  case ISD::PREFETCH:
+  case ISD::VAEND:
+  case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
+    // If the target didn't expand these, there's nothing to do, so just
+    // preserve the chain and be done.
+    Results.push_back(Node->getOperand(0));
+    break;
+  case ISD::EH_SJLJ_SETJMP:
+    // If the target didn't expand this, just return 'zero' and preserve the
+    // chain.
+    Results.push_back(DAG.getConstant(0, MVT::i32));
+    Results.push_back(Node->getOperand(0));
+    break;
+  case ISD::MEMBARRIER: {
+    // If the target didn't lower this, lower it to '__sync_synchronize()' call
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> CallResult =
+      TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
+                      /*isReturnValueUsed=*/true,
+                      DAG.getExternalSymbol("__sync_synchronize",
+                                            TLI.getPointerTy()),
+                      Args, DAG, dl);
+    Results.push_back(CallResult.second);
+    break;
+  }
+  // By default, atomic intrinsics are marked Legal and lowered. Targets
+  // which don't support them directly, however, may want libcalls, in which
+  // case they mark them Expand, and we get here.
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_CMP_SWAP: {
+    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
+    Results.push_back(Tmp.first);
+    Results.push_back(Tmp.second);
+    break;
+  }
+  case ISD::DYNAMIC_STACKALLOC:
+    ExpandDYNAMIC_STACKALLOC(Node, Results);
+    break;
+  case ISD::MERGE_VALUES:
+    for (unsigned i = 0; i < Node->getNumValues(); i++)
+      Results.push_back(Node->getOperand(i));
+    break;
+  case ISD::UNDEF: {
+    EVT VT = Node->getValueType(0);
+    if (VT.isInteger())
+      Results.push_back(DAG.getConstant(0, VT));
+    else {
+      assert(VT.isFloatingPoint() && "Unknown value type!");
+      Results.push_back(DAG.getConstantFP(0, VT));
+    }
+    break;
+  }
+  case ISD::TRAP: {
+    // If this operation is not supported, lower it to 'abort()' call
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> CallResult =
+      TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
+                      /*isReturnValueUsed=*/true,
+                      DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+                      Args, DAG, dl);
+    Results.push_back(CallResult.second);
+    break;
+  }
+  case ISD::FP_ROUND:
+  case ISD::BITCAST:
+    Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FP_EXTEND:
+    Tmp1 = EmitStackConvert(Node->getOperand(0),
+                            Node->getOperand(0).getValueType(),
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    // NOTE: we could fall back on load/store here too for targets without
+    // SAR.  However, it is doubtful that any exist.
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT VT = Node->getValueType(0);
+    EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
+    if (VT.isVector())
+      ShiftAmountTy = VT;
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
+    SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+    Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+                       Node->getOperand(0), ShiftCst);
+    Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FP_ROUND_INREG: {
+    // The only way we can lower this is to turn it into a TRUNCSTORE,
+    // EXTLOAD pair, targeting a temporary location (a stack slot).
+
+    // NOTE: there is a choice here between constantly creating new stack
+    // slots and always reusing the same one.  We currently always create
+    // new ones, as reuse may inhibit scheduling.
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+                                Node->getOperand(0), Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FP_TO_UINT: {
+    SDValue True, False;
+    EVT VT =  Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
+    APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+    APInt x = APInt::getSignBit(NVT.getSizeInBits());
+    (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+    Tmp1 = DAG.getConstantFP(apf, VT);
+    Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+                        Node->getOperand(0),
+                        Tmp1, ISD::SETLT);
+    True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+    False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+                        DAG.getNode(ISD::FSUB, dl, VT,
+                                    Node->getOperand(0), Tmp1));
+    False = DAG.getNode(ISD::XOR, dl, NVT, False,
+                        DAG.getConstant(x, NVT));
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::VAARG: {
+    const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+    EVT VT = Node->getValueType(0);
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    unsigned Align = Node->getConstantOperandVal(3);
+
+    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+                                     MachinePointerInfo(V), false, false, 0);
+    SDValue VAList = VAListLoad;
+
+    if (Align > TLI.getMinStackArgumentAlignment()) {
+      assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+      VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+                           DAG.getConstant(Align - 1,
+                                           TLI.getPointerTy()));
+
+      VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+                           DAG.getConstant(-(int64_t)Align,
+                                           TLI.getPointerTy()));
+    }
+
+    // Increment the pointer, VAList, to the next vaarg
+    Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+                       DAG.getConstant(TLI.getTargetData()->
+                          getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                                       TLI.getPointerTy()));
+    // Store the incremented VAList to the legalized pointer
+    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+                        MachinePointerInfo(V), false, false, 0);
+    // Load the actual argument out of the pointer VAList
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+                                  false, false, 0));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::VACOPY: {
+    // This defaults to loading a pointer from the input and storing it to the
+    // output, returning the chain.
+    const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+    const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+    Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+                       Node->getOperand(2), MachinePointerInfo(VS),
+                       false, false, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+                        MachinePointerInfo(VD), false, false, 0);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+      // This must be an access of the only element.  Return it.
+      Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+                         Node->getOperand(0));
+    else
+      Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+    Results.push_back(Tmp1);
+    break;
+  case ISD::EXTRACT_SUBVECTOR:
+    Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+    break;
+  case ISD::INSERT_SUBVECTOR:
+    Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+    break;
+  case ISD::CONCAT_VECTORS: {
+    Results.push_back(ExpandVectorBuildThroughStack(Node));
+    break;
+  }
+  case ISD::SCALAR_TO_VECTOR:
+    Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+    break;
+  case ISD::INSERT_VECTOR_ELT:
+    Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+                                              Node->getOperand(1),
+                                              Node->getOperand(2), dl));
+    break;
+  case ISD::VECTOR_SHUFFLE: {
+    SmallVector<int, 8> Mask;
+    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+    EVT VT = Node->getValueType(0);
+    EVT EltVT = VT.getVectorElementType();
+    if (!TLI.isTypeLegal(EltVT))
+      EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+    unsigned NumElems = VT.getVectorNumElements();
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i != NumElems; ++i) {
+      if (Mask[i] < 0) {
+        Ops.push_back(DAG.getUNDEF(EltVT));
+        continue;
+      }
+      unsigned Idx = Mask[i];
+      if (Idx < NumElems)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  Node->getOperand(0),
+                                  DAG.getIntPtrConstant(Idx)));
+      else
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  Node->getOperand(1),
+                                  DAG.getIntPtrConstant(Idx - NumElems)));
+    }
+    Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::EXTRACT_ELEMENT: {
+    EVT OpTy = Node->getOperand(0).getValueType();
+    if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+      // 1 -> Hi
+      Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+                         DAG.getConstant(OpTy.getSizeInBits()/2,
+                    TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
+      Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+    } else {
+      // 0 -> Lo
+      Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+                         Node->getOperand(0));
+    }
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::STACKSAVE:
+    // Expand to CopyFromReg if the target set
+    // StackPointerRegisterToSaveRestore.
+    if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+      Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+                                           Node->getValueType(0)));
+      Results.push_back(Results[0].getValue(1));
+    } else {
+      Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+      Results.push_back(Node->getOperand(0));
+    }
+    break;
+  case ISD::STACKRESTORE:
+    // Expand to CopyToReg if the target set
+    // StackPointerRegisterToSaveRestore.
+    if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+      Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+                                         Node->getOperand(1)));
+    } else {
+      Results.push_back(Node->getOperand(0));
+    }
+    break;
+  case ISD::FCOPYSIGN:
+    Results.push_back(ExpandFCOPYSIGN(Node));
+    break;
+  case ISD::FNEG:
+    // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
+    Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+    Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+                       Node->getOperand(0));
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FABS: {
+    // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+    EVT VT = Node->getValueType(0);
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = DAG.getConstantFP(0.0, VT);
+    Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+                        Tmp1, Tmp2, ISD::SETUGT);
+    Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FSQRT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                      RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+    break;
+  case ISD::FSIN:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                      RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+    break;
+  case ISD::FCOS:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+                                      RTLIB::COS_F80, RTLIB::COS_PPCF128));
+    break;
+  case ISD::FLOG:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                      RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+    break;
+  case ISD::FLOG2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                      RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+    break;
+  case ISD::FLOG10:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+                                      RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+    break;
+  case ISD::FEXP:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                      RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+    break;
+  case ISD::FEXP2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                      RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+    break;
+  case ISD::FTRUNC:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+    break;
+  case ISD::FFLOOR:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+    break;
+  case ISD::FCEIL:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                      RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+    break;
+  case ISD::FRINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                      RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+    break;
+  case ISD::FNEARBYINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+                                      RTLIB::NEARBYINT_F64,
+                                      RTLIB::NEARBYINT_F80,
+                                      RTLIB::NEARBYINT_PPCF128));
+    break;
+  case ISD::FPOWI:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                      RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+    break;
+  case ISD::FPOW:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+                                      RTLIB::POW_F80, RTLIB::POW_PPCF128));
+    break;
+  case ISD::FDIV:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+                                      RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+    break;
+  case ISD::FREM:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+                                      RTLIB::REM_F80, RTLIB::REM_PPCF128));
+    break;
+  case ISD::FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+    break;
+  case ISD::FP16_TO_FP32:
+    Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+    break;
+  case ISD::FP32_TO_FP16:
+    Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+    break;
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+    // Check to see if this FP immediate is already legal.
+    // If this is a legal constant, turn it into a TargetConstantFP node.
+    if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+      Results.push_back(SDValue(Node, 0));
+    else
+      Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+    break;
+  }
+  case ISD::EHSELECTION: {
+    unsigned Reg = TLI.getExceptionSelectorRegister();
+    assert(Reg && "Can't expand to unknown register!");
+    Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+                                         Node->getValueType(0)));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::EXCEPTIONADDR: {
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    assert(Reg && "Can't expand to unknown register!");
+    Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+                                         Node->getValueType(0)));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::SUB: {
+    EVT VT = Node->getValueType(0);
+    assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+           TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+           "Don't know how to expand this subtraction!");
+    Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+    Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+    Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+    break;
+  }
+  case ISD::UREM:
+  case ISD::SREM: {
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    bool isSigned = Node->getOpcode() == ISD::SREM;
+    unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+    unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+    Tmp2 = Node->getOperand(0);
+    Tmp3 = Node->getOperand(1);
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, false))) {
+      Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+    } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+      // X % Y -> X-X/Y*Y
+      Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+      Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+    } else if (isSigned)
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SREM_I8,
+                              RTLIB::SREM_I16, RTLIB::SREM_I32,
+                              RTLIB::SREM_I64, RTLIB::SREM_I128);
+    else
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UREM_I8,
+                              RTLIB::UREM_I16, RTLIB::UREM_I32,
+                              RTLIB::UREM_I64, RTLIB::UREM_I128);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::UDIV:
+  case ISD::SDIV: {
+    bool isSigned = Node->getOpcode() == ISD::SDIV;
+    unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, true)))
+      Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+                         Node->getOperand(1));
+    else if (isSigned)
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SDIV_I8,
+                              RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+                              RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+    else
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UDIV_I8,
+                              RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+                              RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::MULHU:
+  case ISD::MULHS: {
+    unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+                                                              ISD::SMUL_LOHI;
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+           "If this wasn't legal, it shouldn't have been created!");
+    Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+                       Node->getOperand(1));
+    Results.push_back(Tmp1.getValue(1));
+    break;
+  }
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    // Expand into divrem libcall
+    ExpandDivRemLibCall(Node, Results);
+    break;
+  case ISD::MUL: {
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    // See if multiply or divide can be lowered using two-result operations.
+    // We just need the low half of the multiply; try both the signed
+    // and unsigned forms. If the target supports both SMUL_LOHI and
+    // UMUL_LOHI, form a preference by checking which forms of plain
+    // MULH it supports.
+    bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+    bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+    bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+    bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+    unsigned OpToUse = 0;
+    if (HasSMUL_LOHI && !HasMULHS) {
+      OpToUse = ISD::SMUL_LOHI;
+    } else if (HasUMUL_LOHI && !HasMULHU) {
+      OpToUse = ISD::UMUL_LOHI;
+    } else if (HasSMUL_LOHI) {
+      OpToUse = ISD::SMUL_LOHI;
+    } else if (HasUMUL_LOHI) {
+      OpToUse = ISD::UMUL_LOHI;
+    }
+    if (OpToUse) {
+      Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+                                    Node->getOperand(1)));
+      break;
+    }
+    Tmp1 = ExpandIntLibCall(Node, false,
+                            RTLIB::MUL_I8,
+                            RTLIB::MUL_I16, RTLIB::MUL_I32,
+                            RTLIB::MUL_I64, RTLIB::MUL_I128);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SADDO:
+  case ISD::SSUBO: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                              LHS, RHS);
+    Results.push_back(Sum);
+    EVT OType = Node->getValueType(1);
+
+    SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+    //   LHSSign -> LHS >= 0
+    //   RHSSign -> RHS >= 0
+    //   SumSign -> Sum >= 0
+    //
+    //   Add:
+    //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+    //   Sub:
+    //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+    //
+    SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+    SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+    SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+                                      Node->getOpcode() == ISD::SADDO ?
+                                      ISD::SETEQ : ISD::SETNE);
+
+    SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+    SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+    SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+    Results.push_back(Cmp);
+    break;
+  }
+  case ISD::UADDO:
+  case ISD::USUBO: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                              LHS, RHS);
+    Results.push_back(Sum);
+    Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+                                   Node->getOpcode () == ISD::UADDO ?
+                                   ISD::SETULT : ISD::SETUGT));
+    break;
+  }
+  case ISD::UMULO:
+  case ISD::SMULO: {
+    EVT VT = Node->getValueType(0);
+    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue BottomHalf;
+    SDValue TopHalf;
+    static const unsigned Ops[2][3] =
+        { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+          { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+    bool isSigned = Node->getOpcode() == ISD::SMULO;
+    if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+      BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+      TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+    } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+      BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+                               RHS);
+      TopHalf = BottomHalf.getValue(1);
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                                 VT.getSizeInBits() * 2))) {
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                            DAG.getIntPtrConstant(1));
+    } else {
+      // We can fall back to a libcall with an illegal type for the MUL if we
+      // have a libcall big enough.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+      if (WideVT == MVT::i16)
+        LC = RTLIB::MUL_I16;
+      else if (WideVT == MVT::i32)
+        LC = RTLIB::MUL_I32;
+      else if (WideVT == MVT::i64)
+        LC = RTLIB::MUL_I64;
+      else if (WideVT == MVT::i128)
+        LC = RTLIB::MUL_I128;
+      assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+      // The high part is obtained by SRA'ing all but one of the bits of low
+      // part.
+      unsigned LoSize = VT.getSizeInBits();
+      SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+      SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+      // Here we're passing the 2 arguments explicitly as 4 arguments that are
+      // pre-lowered to the correct types. This all depends upon WideVT not
+      // being a legal type for the architecture and thus has to be split to
+      // two arguments.
+      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+      SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                            DAG.getIntPtrConstant(1));
+    }
+
+    if (isSigned) {
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+                             TLI.getShiftAmountTy(BottomHalf.getValueType()));
+      Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+                             ISD::SETNE);
+    } else {
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+                             DAG.getConstant(0, VT), ISD::SETNE);
+    }
+    Results.push_back(BottomHalf);
+    Results.push_back(TopHalf);
+    break;
+  }
+  case ISD::BUILD_PAIR: {
+    EVT PairTy = Node->getValueType(0);
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+    Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+                       DAG.getConstant(PairTy.getSizeInBits()/2,
+                                       TLI.getShiftAmountTy(PairTy)));
+    Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+    break;
+  }
+  case ISD::SELECT:
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    if (Tmp1.getOpcode() == ISD::SETCC) {
+      Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+                             Tmp2, Tmp3,
+                             cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+    } else {
+      Tmp1 = DAG.getSelectCC(dl, Tmp1,
+                             DAG.getConstant(0, Tmp1.getValueType()),
+                             Tmp2, Tmp3, ISD::SETNE);
+    }
+    Results.push_back(Tmp1);
+    break;
+  case ISD::BR_JT: {
+    SDValue Chain = Node->getOperand(0);
+    SDValue Table = Node->getOperand(1);
+    SDValue Index = Node->getOperand(2);
+
+    EVT PTy = TLI.getPointerTy();
+
+    const TargetData &TD = *TLI.getTargetData();
+    unsigned EntrySize =
+      DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+    Index = DAG.getNode(ISD::MUL, dl, PTy,
+                        Index, DAG.getConstant(EntrySize, PTy));
+    SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+                                MachinePointerInfo::getJumpTable(), MemVT,
+                                false, false, 0);
+    Addr = LD;
+    if (TM.getRelocationModel() == Reloc::PIC_) {
+      // For PIC, the sequence is:
+      // BRIND(load(Jumptable + index) + RelocBase)
+      // RelocBase can be JumpTable, GOT or some sort of global base.
+      Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+                          TLI.getPICJumpTableRelocBase(Table, DAG));
+    }
+    Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::BRCOND:
+    // Expand brcond's setcc into its constituent parts and create a BR_CC
+    // Node.
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    if (Tmp2.getOpcode() == ISD::SETCC) {
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+                         Tmp1, Tmp2.getOperand(2),
+                         Tmp2.getOperand(0), Tmp2.getOperand(1),
+                         Node->getOperand(2));
+    } else {
+      // We test only the i1 bit.  Skip the AND if UNDEF.
+      Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+        DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+                    DAG.getConstant(1, Tmp2.getValueType()));
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+                         DAG.getCondCode(ISD::SETNE), Tmp3,
+                         DAG.getConstant(0, Tmp3.getValueType()),
+                         Node->getOperand(2));
+    }
+    Results.push_back(Tmp1);
+    break;
+  case ISD::SETCC: {
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+    // If we expanded the SETCC into an AND/OR, return the new node
+    if (Tmp2.getNode() == 0) {
+      Results.push_back(Tmp1);
+      break;
+    }
+
+    // Otherwise, SETCC for the given comparison type must be completely
+    // illegal; expand it into a SELECT_CC.
+    EVT VT = Node->getValueType(0);
+    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SELECT_CC: {
+    Tmp1 = Node->getOperand(0);   // LHS
+    Tmp2 = Node->getOperand(1);   // RHS
+    Tmp3 = Node->getOperand(2);   // True
+    Tmp4 = Node->getOperand(3);   // False
+    SDValue CC = Node->getOperand(4);
+
+    LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+                          Tmp1, Tmp2, CC, dl);
+
+    assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+    Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+    CC = DAG.getCondCode(ISD::SETNE);
+    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+                       Tmp3, Tmp4, CC);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::BR_CC: {
+    Tmp1 = Node->getOperand(0);              // Chain
+    Tmp2 = Node->getOperand(2);              // LHS
+    Tmp3 = Node->getOperand(3);              // RHS
+    Tmp4 = Node->getOperand(1);              // CC
+
+    LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+                          Tmp2, Tmp3, Tmp4, dl);
+    assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+    setLastCALLSEQ(DAG.getEntryNode());
+
+    assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+    Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+    Tmp4 = DAG.getCondCode(ISD::SETNE);
+    Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+                       Tmp3, Node->getOperand(4));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::GLOBAL_OFFSET_TABLE:
+  case ISD::GlobalAddress:
+  case ISD::GlobalTLSAddress:
+  case ISD::ExternalSymbol:
+  case ISD::ConstantPool:
+  case ISD::JumpTable:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+    // FIXME: Custom lowering for these operations shouldn't return null!
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      Results.push_back(SDValue(Node, i));
+    break;
+  }
+}
+void SelectionDAGLegalize::PromoteNode(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
+  EVT OVT = Node->getValueType(0);
+  if (Node->getOpcode() == ISD::UINT_TO_FP ||
+      Node->getOpcode() == ISD::SINT_TO_FP ||
+      Node->getOpcode() == ISD::SETCC) {
+    OVT = Node->getOperand(0).getValueType();
+  }
+  EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1, Tmp2, Tmp3;
+  switch (Node->getOpcode()) {
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+    // Zero extend the argument.
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+    // Perform the larger operation.
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+    if (Node->getOpcode() == ISD::CTTZ) {
+      //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
+                          Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+                          ISD::SETEQ);
+      Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+                          DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+    } else if (Node->getOpcode() == ISD::CTLZ) {
+      // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+      Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+                          DAG.getConstant(NVT.getSizeInBits() -
+                                          OVT.getSizeInBits(), NVT));
+    }
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+    break;
+  case ISD::BSWAP: {
+    unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:
+    Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+                                 Node->getOpcode() == ISD::FP_TO_SINT, dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:
+    Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+                                 Node->getOpcode() == ISD::SINT_TO_FP, dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    unsigned ExtOp, TruncOp;
+    if (OVT.isVector()) {
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
+    } else {
+      assert(OVT.isInteger() && "Cannot promote logic operation");
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    }
+    // Promote each of the values to the new type.
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    // Perform the larger operation, then convert back
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+    break;
+  }
+  case ISD::SELECT: {
+    unsigned ExtOp, TruncOp;
+    if (Node->getValueType(0).isVector()) {
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
+    } else if (Node->getValueType(0).isInteger()) {
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    } else {
+      ExtOp   = ISD::FP_EXTEND;
+      TruncOp = ISD::FP_ROUND;
+    }
+    Tmp1 = Node->getOperand(0);
+    // Promote each of the values to the new type.
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    // Perform the larger operation, then round down.
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+    if (TruncOp != ISD::FP_ROUND)
+      Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+    else
+      Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+                         DAG.getIntPtrConstant(0));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    SmallVector<int, 8> Mask;
+    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+    // Cast the two input vectors.
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+    // Convert the shuffle mask to the right # elements.
+    Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SETCC: {
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(2))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+                                  Tmp1, Tmp2, Node->getOperand(2)));
+    break;
+  }
+  }
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize() {
+  /// run - This is the main entry point to this class.
+  ///
+  SelectionDAGLegalize(*this).LegalizeDAG();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 000000000000..e6835d87f82c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1456 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float".  For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.  For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+                                   RTLIB::Libcall Call_F32,
+                                   RTLIB::Libcall Call_F64,
+                                   RTLIB::Libcall Call_F80,
+                                   RTLIB::Libcall Call_PPCF128) {
+  return
+    VT == MVT::f32 ? Call_F32 :
+    VT == MVT::f64 ? Call_F64 :
+    VT == MVT::f80 ? Call_F80 :
+    VT == MVT::ppcf128 ? Call_PPCF128 :
+    RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+//  Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to soften the result of this operator!");
+
+    case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N); break;
+    case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
+    case ISD::ConstantFP:
+      R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+      break;
+    case ISD::EXTRACT_VECTOR_ELT:
+      R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::FABS:        R = SoftenFloatRes_FABS(N); break;
+    case ISD::FADD:        R = SoftenFloatRes_FADD(N); break;
+    case ISD::FCEIL:       R = SoftenFloatRes_FCEIL(N); break;
+    case ISD::FCOPYSIGN:   R = SoftenFloatRes_FCOPYSIGN(N); break;
+    case ISD::FCOS:        R = SoftenFloatRes_FCOS(N); break;
+    case ISD::FDIV:        R = SoftenFloatRes_FDIV(N); break;
+    case ISD::FEXP:        R = SoftenFloatRes_FEXP(N); break;
+    case ISD::FEXP2:       R = SoftenFloatRes_FEXP2(N); break;
+    case ISD::FFLOOR:      R = SoftenFloatRes_FFLOOR(N); break;
+    case ISD::FLOG:        R = SoftenFloatRes_FLOG(N); break;
+    case ISD::FLOG2:       R = SoftenFloatRes_FLOG2(N); break;
+    case ISD::FLOG10:      R = SoftenFloatRes_FLOG10(N); break;
+    case ISD::FMA:         R = SoftenFloatRes_FMA(N); break;
+    case ISD::FMUL:        R = SoftenFloatRes_FMUL(N); break;
+    case ISD::FNEARBYINT:  R = SoftenFloatRes_FNEARBYINT(N); break;
+    case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
+    case ISD::FP_EXTEND:   R = SoftenFloatRes_FP_EXTEND(N); break;
+    case ISD::FP_ROUND:    R = SoftenFloatRes_FP_ROUND(N); break;
+    case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+    case ISD::FPOW:        R = SoftenFloatRes_FPOW(N); break;
+    case ISD::FPOWI:       R = SoftenFloatRes_FPOWI(N); break;
+    case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
+    case ISD::FRINT:       R = SoftenFloatRes_FRINT(N); break;
+    case ISD::FSIN:        R = SoftenFloatRes_FSIN(N); break;
+    case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
+    case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
+    case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
+    case ISD::LOAD:        R = SoftenFloatRes_LOAD(N); break;
+    case ISD::SELECT:      R = SoftenFloatRes_SELECT(N); break;
+    case ISD::SELECT_CC:   R = SoftenFloatRes_SELECT_CC(N); break;
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:  R = SoftenFloatRes_XINT_TO_FP(N); break;
+    case ISD::UNDEF:       R = SoftenFloatRes_UNDEF(N); break;
+    case ISD::VAARG:       R = SoftenFloatRes_VAARG(N); break;
+  }
+
+  // If R is null, the sub-method took care of registering the result.
+  if (R.getNode())
+    SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+  return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+  // Convert the inputs to integers, and build a new pair out of them.
+  return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0)),
+                     BitConvertToInteger(N->getOperand(0)),
+                     BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+                         TLI.getTypeToTransformTo(*DAG.getContext(),
+                                                  N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     NewOp.getValueType().getVectorElementType(),
+                     NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned Size = NVT.getSizeInBits();
+
+  // Mask = ~(1 << (Size-1))
+  APInt API = APInt::getAllOnesValue(Size);
+  API.clearBit(Size-1);
+  SDValue Mask = DAG.getConstant(API, NVT);
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::ADD_F32,
+                                  RTLIB::ADD_F64,
+                                  RTLIB::ADD_F80,
+                                  RTLIB::ADD_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::CEIL_F32,
+                                  RTLIB::CEIL_F64,
+                                  RTLIB::CEIL_F80,
+                                  RTLIB::CEIL_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+  SDValue RHS = BitConvertToInteger(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LVT = LHS.getValueType();
+  EVT RVT = RHS.getValueType();
+
+  unsigned LSize = LVT.getSizeInBits();
+  unsigned RSize = RVT.getSizeInBits();
+
+  // First get the sign bit of second operand.
+  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+                                  DAG.getConstant(RSize - 1,
+                                                  TLI.getShiftAmountTy(RVT)));
+  SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+  // Shift right or sign-extend it if the two operands have different types.
+  int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+  if (SizeDiff > 0) {
+    SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+                          DAG.getConstant(SizeDiff,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+    SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+  } else if (SizeDiff < 0) {
+    SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+    SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+                          DAG.getConstant(-SizeDiff,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+  }
+
+  // Clear the sign bit of the first operand.
+  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+                               DAG.getConstant(LSize - 1,
+                                               TLI.getShiftAmountTy(LVT)));
+  Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+  LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+  // Or the value with the sign bit.
+  return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::COS_F32,
+                                  RTLIB::COS_F64,
+                                  RTLIB::COS_F80,
+                                  RTLIB::COS_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::DIV_F32,
+                                  RTLIB::DIV_F64,
+                                  RTLIB::DIV_F80,
+                                  RTLIB::DIV_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::EXP_F32,
+                                  RTLIB::EXP_F64,
+                                  RTLIB::EXP_F80,
+                                  RTLIB::EXP_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::EXP2_F32,
+                                  RTLIB::EXP2_F64,
+                                  RTLIB::EXP2_F80,
+                                  RTLIB::EXP2_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::FLOOR_F32,
+                                  RTLIB::FLOOR_F64,
+                                  RTLIB::FLOOR_F80,
+                                  RTLIB::FLOOR_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG_F32,
+                                  RTLIB::LOG_F64,
+                                  RTLIB::LOG_F80,
+                                  RTLIB::LOG_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG2_F32,
+                                  RTLIB::LOG2_F64,
+                                  RTLIB::LOG2_F80,
+                                  RTLIB::LOG2_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG10_F32,
+                                  RTLIB::LOG10_F64,
+                                  RTLIB::LOG10_F80,
+                                  RTLIB::LOG10_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)),
+                     GetSoftenedFloat(N->getOperand(2)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::FMA_F32,
+                                  RTLIB::FMA_F64,
+                                  RTLIB::FMA_F80,
+                                  RTLIB::FMA_PPCF128),
+                     NVT, Ops, 3, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::MUL_F32,
+                                  RTLIB::MUL_F64,
+                                  RTLIB::MUL_F80,
+                                  RTLIB::MUL_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::NEARBYINT_F32,
+                                  RTLIB::NEARBYINT_F64,
+                                  RTLIB::NEARBYINT_F80,
+                                  RTLIB::NEARBYINT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+  SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+                     GetSoftenedFloat(N->getOperand(0)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SUB_F32,
+                                  RTLIB::SUB_F64,
+                                  RTLIB::SUB_F80,
+                                  RTLIB::SUB_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+                     N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::POW_F32,
+                                  RTLIB::POW_F64,
+                                  RTLIB::POW_F80,
+                                  RTLIB::POW_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+  assert(N->getOperand(1).getValueType() == MVT::i32 &&
+         "Unsupported power type!");
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::POWI_F32,
+                                  RTLIB::POWI_F64,
+                                  RTLIB::POWI_F80,
+                                  RTLIB::POWI_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::REM_F32,
+                                  RTLIB::REM_F64,
+                                  RTLIB::REM_F80,
+                                  RTLIB::REM_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::RINT_F32,
+                                  RTLIB::RINT_F64,
+                                  RTLIB::RINT_F80,
+                                  RTLIB::RINT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SIN_F32,
+                                  RTLIB::SIN_F64,
+                                  RTLIB::SIN_F80,
+                                  RTLIB::SIN_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SQRT_F32,
+                                  RTLIB::SQRT_F64,
+                                  RTLIB::SQRT_F80,
+                                  RTLIB::SQRT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SUB_F32,
+                                  RTLIB::SUB_F64,
+                                  RTLIB::SUB_F80,
+                                  RTLIB::SUB_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::TRUNC_F32,
+                                  RTLIB::TRUNC_F64,
+                                  RTLIB::TRUNC_F80,
+                                  RTLIB::TRUNC_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue NewL;
+  if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+    NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+                       NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+                       L->getPointerInfo(), NVT,
+                       L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+    // Legalized the chain result - switch anything that used the old chain to
+    // use the new one.
+    ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+    return NewL;
+  }
+
+  // Do a non-extending load followed by FP_EXTEND.
+  NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
+                     L->getMemoryVT(), dl, L->getChain(),
+                     L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
+                     L->getMemoryVT(), L->isVolatile(),
+                     L->isNonTemporal(), L->getAlignment());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+  return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+  SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+  SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),
+                     N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+  SDValue Chain = N->getOperand(0); // Get the chain.
+  SDValue Ptr = N->getOperand(1); // Get the pointer.
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue NewVAARG;
+  NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+                          N->getConstantOperandVal(3));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+  return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+  EVT NVT = EVT();
+  DebugLoc dl = N->getDebugLoc();
+
+  // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+  // a larger type, eg: i8 -> fp.  Even if it is legal, no libcall may exactly
+  // match.  Look for an appropriate libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+       t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+    NVT = (MVT::SimpleValueType)t;
+    // The source needs to big enough to hold the operand.
+    if (NVT.bitsGE(SVT))
+      LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+  }
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+  // Sign/zero extend the argument if the libcall takes a larger type.
+  SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                           NVT, N->getOperand(0));
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+                     &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to soften this operator's operand!");
+
+  case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;
+  case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
+  case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
+  case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
+  case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_UINT(N); break;
+  case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
+  case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
+  case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
+  case ISD::STORE:       Res = SoftenFloatOp_STORE(N, OpNo); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// SoftenSetCCOperands - Soften the operands of a comparison.  This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                           ISD::CondCode &CCCode, DebugLoc dl) {
+  SDValue LHSInt = GetSoftenedFloat(NewLHS);
+  SDValue RHSInt = GetSoftenedFloat(NewRHS);
+  EVT VT = NewLHS.getValueType();
+
+  assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
+
+  // Expand into one or more soft-fp libcall(s).
+  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+  switch (CCCode) {
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+    break;
+  case ISD::SETNE:
+  case ISD::SETUNE:
+    LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+    break;
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+    break;
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+    break;
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+    break;
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+    break;
+  case ISD::SETUO:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+    break;
+  case ISD::SETO:
+    LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+    break;
+  default:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+    switch (CCCode) {
+    case ISD::SETONE:
+      // SETONE = SETOLT | SETOGT
+      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+      // Fallthrough
+    case ISD::SETUGT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+      break;
+    case ISD::SETUGE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+      break;
+    case ISD::SETULT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+      break;
+    case ISD::SETULE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+      break;
+    case ISD::SETUEQ:
+      LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+      break;
+    default: assert(false && "Do not know how to soften this setcc!");
+    }
+  }
+
+  // Use the target specific return value for comparions lib calls.
+  EVT RetVT = TLI.getCmpLibcallReturnType();
+  SDValue Ops[2] = { LHSInt, RHSInt };
+  NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+  NewRHS = DAG.getConstant(0, RetVT);
+  CCCode = TLI.getCmpLibcallCC(LC1);
+  if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+    SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
+                                NewLHS, NewRHS, DAG.getCondCode(CCCode));
+    NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+    NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
+                         NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+    NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+    NewRHS = SDValue();
+  }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+                     GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+
+  RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only soften the stored value!");
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  SDValue Val = ST->getValue();
+  DebugLoc dl = N->getDebugLoc();
+
+  if (ST->isTruncatingStore())
+    // Do an FP_ROUND followed by a non-truncating store.
+    Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+                                          Val, DAG.getIntPtrConstant(0)));
+  else
+    Val = GetSoftenedFloat(Val);
+
+  return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+                      ST->getPointerInfo(),
+                      ST->isVolatile(), ST->isNonTemporal(),
+                      ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion.  At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Lo, Hi;
+  Lo = Hi = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
+  case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+  case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+  case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::VAARG:              ExpandRes_VAARG(N, Lo, Hi); break;
+
+  case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+  case ISD::FABS:       ExpandFloatRes_FABS(N, Lo, Hi); break;
+  case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
+  case ISD::FCEIL:      ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+  case ISD::FCOPYSIGN:  ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+  case ISD::FCOS:       ExpandFloatRes_FCOS(N, Lo, Hi); break;
+  case ISD::FDIV:       ExpandFloatRes_FDIV(N, Lo, Hi); break;
+  case ISD::FEXP:       ExpandFloatRes_FEXP(N, Lo, Hi); break;
+  case ISD::FEXP2:      ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+  case ISD::FFLOOR:     ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+  case ISD::FLOG:       ExpandFloatRes_FLOG(N, Lo, Hi); break;
+  case ISD::FLOG2:      ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+  case ISD::FLOG10:     ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+  case ISD::FMA:        ExpandFloatRes_FMA(N, Lo, Hi); break;
+  case ISD::FMUL:       ExpandFloatRes_FMUL(N, Lo, Hi); break;
+  case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+  case ISD::FNEG:       ExpandFloatRes_FNEG(N, Lo, Hi); break;
+  case ISD::FP_EXTEND:  ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+  case ISD::FPOW:       ExpandFloatRes_FPOW(N, Lo, Hi); break;
+  case ISD::FPOWI:      ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+  case ISD::FRINT:      ExpandFloatRes_FRINT(N, Lo, Hi); break;
+  case ISD::FSIN:       ExpandFloatRes_FSIN(N, Lo, Hi); break;
+  case ISD::FSQRT:      ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+  case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
+  case ISD::FTRUNC:     ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+  case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  assert(NVT.getSizeInBits() == integerPartWidth &&
+         "Do not know how to expand this float constant!");
+  APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+  Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+                                       &C.getRawData()[1])), NVT);
+  Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+                                       &C.getRawData()[0])), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  assert(N->getValueType(0) == MVT::ppcf128 &&
+         "Logic only correct for ppcf128!");
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Tmp;
+  GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+  Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+  // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+                   DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+                   DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::ADD_F32, RTLIB::ADD_F64,
+                                         RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                         RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::COPYSIGN_F32,
+                                         RTLIB::COPYSIGN_F64,
+                                         RTLIB::COPYSIGN_F80,
+                                         RTLIB::COPYSIGN_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::COS_F32, RTLIB::COS_F64,
+                                         RTLIB::COS_F80, RTLIB::COS_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::DIV_F32,
+                                          RTLIB::DIV_F64,
+                                          RTLIB::DIV_F80,
+                                          RTLIB::DIV_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                         RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                         RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
+                                         RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                         RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                         RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG10_F32,RTLIB::LOG10_F64,
+                                         RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+                                          SDValue &Hi) {
+  SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::FMA_F32,
+                                          RTLIB::FMA_F64,
+                                          RTLIB::FMA_F80,
+                                          RTLIB::FMA_PPCF128),
+                             N->getValueType(0), Ops, 3, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::MUL_F32,
+                                          RTLIB::MUL_F64,
+                                          RTLIB::MUL_F80,
+                                          RTLIB::MUL_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+                                                 SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::NEARBYINT_F32,
+                                         RTLIB::NEARBYINT_F64,
+                                         RTLIB::NEARBYINT_F80,
+                                         RTLIB::NEARBYINT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedFloat(N->getOperand(0), Lo, Hi);
+  Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+  Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+                                                SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::POW_F32, RTLIB::POW_F64,
+                                         RTLIB::POW_F80, RTLIB::POW_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                         RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                         RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                         RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                         RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::SUB_F32,
+                                          RTLIB::SUB_F64,
+                                          RTLIB::SUB_F80,
+                                          RTLIB::SUB_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                         RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  if (ISD::isNormalLoad(N)) {
+    ExpandRes_NormalLoad(N, Lo, Hi);
+    return;
+  }
+
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+  Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+                      LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
+                      LD->isNonTemporal(), LD->getAlignment());
+
+  // Remember the chain.
+  Chain = Hi.getValue(1);
+
+  // The low part is zero.
+  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+
+  // Modified the chain - switch anything that used the old chain to use the
+  // new one.
+  ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Src = N->getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+  DebugLoc dl = N->getDebugLoc();
+
+  // First do an SINT_TO_FP, whether the original was signed or unsigned.
+  // When promoting partial word types to i32 we must honor the signedness,
+  // though.
+  if (SrcVT.bitsLE(MVT::i32)) {
+    // The integer can be represented exactly in an f64.
+    Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                      MVT::i32, Src);
+    Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+    Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+  } else {
+    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+    if (SrcVT.bitsLE(MVT::i64)) {
+      Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                        MVT::i64, Src);
+      LC = RTLIB::SINTTOFP_I64_PPCF128;
+    } else if (SrcVT.bitsLE(MVT::i128)) {
+      Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+      LC = RTLIB::SINTTOFP_I128_PPCF128;
+    }
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+    Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+    GetPairElements(Hi, Lo, Hi);
+  }
+
+  if (isSigned)
+    return;
+
+  // Unsigned - fix up the SINT_TO_FP value just calculated.
+  Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+  SrcVT = Src.getValueType();
+
+  // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+  static const uint64_t TwoE32[]  = { 0x41f0000000000000LL, 0 };
+  static const uint64_t TwoE64[]  = { 0x43f0000000000000LL, 0 };
+  static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+  const uint64_t *Parts = 0;
+
+  switch (SrcVT.getSimpleVT().SimpleTy) {
+  default:
+    assert(false && "Unsupported UINT_TO_FP!");
+  case MVT::i32:
+    Parts = TwoE32;
+    break;
+  case MVT::i64:
+    Parts = TwoE64;
+    break;
+  case MVT::i128:
+    Parts = TwoE128;
+    break;
+  }
+
+  Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+                   DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+                                     MVT::ppcf128));
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+                   Lo, Hi, DAG.getCondCode(ISD::SETLT));
+  GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion.  At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
+      == TargetLowering::Custom)
+    Res = TLI.LowerOperation(SDValue(N, 0), DAG);
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+  #ifndef NDEBUG
+      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); dbgs() << "\n";
+  #endif
+      llvm_unreachable("Do not know how to expand this operator's operand!");
+
+    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
+    case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
+    case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+    case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
+    case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
+    case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+    case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+    case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
+    case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
+    case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+                                                    OpNo); break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison.  This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+                                                SDValue &NewRHS,
+                                                ISD::CondCode &CCCode,
+                                                DebugLoc dl) {
+  SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+  GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+  GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+  EVT VT = NewLHS.getValueType();
+  assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+
+  // FIXME:  This generated code sucks.  We want to generate
+  //         FCMPU crN, hi1, hi2
+  //         BNE crN, L:
+  //         FCMPU crN, lo1, lo2
+  // The following can be improved, but not that much.
+  SDValue Tmp1, Tmp2, Tmp3;
+  Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, ISD::SETOEQ);
+  Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+                      LHSLo, RHSLo, CCCode);
+  Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+  Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, ISD::SETUNE);
+  Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, CCCode);
+  Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+  NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+  NewRHS = SDValue();   // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+  assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+         "Logic only correct for ppcf128!");
+  SDValue Lo, Hi;
+  GetExpandedFloat(N->getOperand(0), Lo, Hi);
+  // Round it the rest of the way (e.g. to f32) if needed.
+  return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+                     N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way.
+  if (RVT == MVT::i32) {
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+           "Logic only correct for ppcf128!");
+    SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+                              N->getOperand(0), DAG.getValueType(MVT::f64));
+    Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+                      DAG.getIntPtrConstant(1));
+    return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+  }
+
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+  return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way.
+  if (RVT == MVT::i32) {
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+           "Logic only correct for ppcf128!");
+    const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+    APFloat APF = APFloat(APInt(128, 2, TwoE31));
+    SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+    //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+    // FIXME: generated code sucks.
+    return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+                                               DAG.getNode(ISD::FSUB, dl,
+                                                           MVT::ppcf128,
+                                                           N->getOperand(0),
+                                                           Tmp)),
+                                   DAG.getConstant(0x80000000, MVT::i32)),
+                       DAG.getNode(ISD::FP_TO_SINT, dl,
+                                   MVT::i32, N->getOperand(0)),
+                       DAG.getCondCode(ISD::SETGE));
+  }
+
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+  return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  if (ISD::isNormalStore(N))
+    return ExpandOp_NormalStore(N, OpNo);
+
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     ST->getValue().getValueType());
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+  SDValue Lo, Hi;
+  GetExpandedOp(ST->getValue(), Lo, Hi);
+
+  return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+                           ST->getPointerInfo(),
+                           ST->getMemoryVT(), ST->isVolatile(),
+                           ST->isNonTemporal(), ST->getAlignment());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 000000000000..e7c77dd10cb6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,2887 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type.  For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type.  For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type.  At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to promote this operator!");
+  case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
+  case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
+  case ISD::BITCAST:     Res = PromoteIntRes_BITCAST(N); break;
+  case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;
+  case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;
+  case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
+  case ISD::CONVERT_RNDSAT:
+                         Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+  case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
+  case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
+  case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
+  case ISD::EXTRACT_VECTOR_ELT:
+                         Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::LOAD:        Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+  case ISD::SELECT:      Res = PromoteIntRes_SELECT(N); break;
+  case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
+  case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
+  case ISD::SHL:         Res = PromoteIntRes_SHL(N); break;
+  case ISD::SIGN_EXTEND_INREG:
+                         Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+  case ISD::SRA:         Res = PromoteIntRes_SRA(N); break;
+  case ISD::SRL:         Res = PromoteIntRes_SRL(N); break;
+  case ISD::TRUNCATE:    Res = PromoteIntRes_TRUNCATE(N); break;
+  case ISD::UNDEF:       Res = PromoteIntRes_UNDEF(N); break;
+  case ISD::VAARG:       Res = PromoteIntRes_VAARG(N); break;
+
+  case ISD::EXTRACT_SUBVECTOR:
+                         Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::VECTOR_SHUFFLE:
+                         Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+  case ISD::INSERT_VECTOR_ELT:
+                         Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::BUILD_VECTOR:
+                         Res = PromoteIntRes_BUILD_VECTOR(N); break;
+  case ISD::SCALAR_TO_VECTOR:
+                         Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:  Res = PromoteIntRes_INT_EXTEND(N); break;
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:  Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+  case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:         Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+  case ISD::SDIV:
+  case ISD::SREM:        Res = PromoteIntRes_SDIV(N); break;
+
+  case ISD::UDIV:
+  case ISD::UREM:        Res = PromoteIntRes_UDIV(N); break;
+
+  case ISD::SADDO:
+  case ISD::SSUBO:       Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+  case ISD::UADDO:
+  case ISD::USUBO:       Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+  case ISD::SMULO:
+  case ISD::UMULO:       Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_SWAP:
+    Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+  case ISD::ATOMIC_CMP_SWAP:
+    Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+  }
+
+  // If the result is null then the sub-method took care of registering it.
+  if (Res.getNode())
+    SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+  // Sign-extend the new bits, and continue the assertion.
+  SDValue Op = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+  // Zero the new bits, and continue the assertion.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+  SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+  SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+                              N->getMemoryVT(),
+                              N->getChain(), N->getBasePtr(),
+                              Op2, N->getMemOperand());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+  SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+  SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+  SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+                              N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+                              Op2, Op3, N->getMemOperand());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+    break;
+  case TargetLowering::TypeLegal:
+    break;
+  case TargetLowering::TypePromoteInteger:
+    if (NOutVT.bitsEq(NInVT))
+      // The input promotes to the same size.  Convert the promoted value.
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+    break;
+  case TargetLowering::TypeSoftenFloat:
+    // Promote the integer operand by hand.
+    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+  case TargetLowering::TypeExpandInteger:
+  case TargetLowering::TypeExpandFloat:
+    break;
+  case TargetLowering::TypeScalarizeVector:
+    // Convert the element to an integer and promote it by hand.
+    if (!NOutVT.isVector())
+      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                         BitConvertToInteger(GetScalarizedVector(InOp)));
+    break;
+  case TargetLowering::TypeSplitVector: {
+    // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
+    // pieces of the input into integers and reassemble in the final type.
+    SDValue Lo, Hi;
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+    Lo = BitConvertToInteger(Lo);
+    Hi = BitConvertToInteger(Hi);
+
+    if (TLI.isBigEndian())
+      std::swap(Lo, Hi);
+
+    InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+                       EVT::getIntegerVT(*DAG.getContext(),
+                                         NOutVT.getSizeInBits()),
+                       JoinIntegers(Lo, Hi));
+    return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+  }
+  case TargetLowering::TypeWidenVector:
+    if (OutVT.bitsEq(NInVT))
+      // The input is widened to the same size.  Convert to the widened value.
+      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+  }
+
+  return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                     CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+  return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+                     DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+  // The pair element type may be legal, or may not promote to the same type as
+  // the result, for example i14 = BUILD_PAIR (i7, i7).  Handle all cases.
+  return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                     N->getValueType(0)), JoinIntegers(N->getOperand(0),
+                     N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  // FIXME there is no actual debug info here
+  DebugLoc dl = N->getDebugLoc();
+  // Zero extend things like i1, sign extend everything else.  It shouldn't
+  // matter in theory which one we pick, but this tends to give better code?
+  unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+  SDValue Result = DAG.getNode(Opc, dl,
+                               TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+                               SDValue(N, 0));
+  assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+           CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+          "can only promote integers");
+  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+                              N->getOperand(1), N->getOperand(2),
+                              N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+  // Zero extend to the promoted type and do the count there.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  DebugLoc dl = N->getDebugLoc();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+  // Subtract off the extra leading bits in the bigger type.
+  return DAG.getNode(ISD::SUB, dl, NVT, Op,
+                     DAG.getConstant(NVT.getSizeInBits() -
+                                     OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+  // Zero extend to the promoted type and do the count there.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+  // The count is the same in the promoted type except if the original
+  // value was zero.  This can be handled by setting the bit just off
+  // the top of the original type.
+  APInt TopBit(NVT.getSizeInBits(), 0);
+  TopBit.setBit(OVT.getSizeInBits());
+  Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+  return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+                     N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NewOpc = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
+
+  // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+  // not Legal, check to see if we can use FP_TO_SINT instead.  (If both UINT
+  // and SINT conversions are Custom, there is no way to tell which is
+  // preferable. We choose SINT because that's the right thing on PPC.)
+  if (N->getOpcode() == ISD::FP_TO_UINT &&
+      !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+      TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+    NewOpc = ISD::FP_TO_SINT;
+
+  SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+  // Assert that the converted value fits in the original type.  If it doesn't
+  // (eg: because the value being converted is too big), then the result of the
+  // original operation was undefined anyway, so the assert is still correct.
+  return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+                     ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+                     DAG.getValueType(N->getValueType(0).getScalarType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+  return DAG.getNode(ISD::AssertZext, dl,
+                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+
+  if (getTypeAction(N->getOperand(0).getValueType())
+      == TargetLowering::TypePromoteInteger) {
+    SDValue Res = GetPromotedInteger(N->getOperand(0));
+    assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+    // If the result and operand types are the same after promotion, simplify
+    // to an in-register extension.
+    if (NVT == Res.getValueType()) {
+      // The high bits are not guaranteed to be anything.  Insert an extend.
+      if (N->getOpcode() == ISD::SIGN_EXTEND)
+        return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+                           DAG.getValueType(N->getOperand(0).getValueType()));
+      if (N->getOpcode() == ISD::ZERO_EXTEND)
+        return DAG.getZeroExtendInReg(Res, dl,
+                      N->getOperand(0).getValueType().getScalarType());
+      assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+      return Res;
+    }
+  }
+
+  // Otherwise, just extend the original operand all the way to the larger type.
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  ISD::LoadExtType ExtType =
+    ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+                               N->getPointerInfo(),
+                               N->getMemoryVT(), N->isVolatile(),
+                               N->isNonTemporal(), N->getAlignment());
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+  // Simply change the return type of the boolean result.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+  EVT ValueVTs[] = { N->getValueType(0), NVT };
+  SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                            DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+  // Modified the sum result - switch anything that used the old sum to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 0), Res);
+
+  return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  // The operation overflowed iff the result in the larger type is not the
+  // sign extension of its truncation to the original type.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Do the arithmetic in the larger type.
+  unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+  SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+  // Calculate the overflow flag: sign extend the arithmetic result from
+  // the original type.
+  SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+                            DAG.getValueType(OVT));
+  // Overflowed if and only if this is not equal to Res.
+  Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+  // Sign extend the input.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+  SDValue LHS = GetPromotedInteger(N->getOperand(1));
+  SDValue RHS = GetPromotedInteger(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetPromotedInteger(N->getOperand(2));
+  SDValue RHS = GetPromotedInteger(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),
+                     N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+  assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the SETCC result using the canonical SETCC type.
+  SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+                              N->getOperand(1), N->getOperand(2));
+
+  // Convert to the expected type.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+  return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+  return DAG.getNode(ISD::SHL, N->getDebugLoc(),
+                TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+  // The input may have strange things in the top bits of the registers, but
+  // these operations don't care.  They may have weird bits going out, but
+  // that too is okay if they are integer operations.
+  SDValue LHS = GetPromotedInteger(N->getOperand(0));
+  SDValue RHS = GetPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                    LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+  // The input value must be properly sign extended.
+  SDValue Res = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SRA, N->getDebugLoc(),
+                     Res.getValueType(), Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+  // The input value must be properly zero extended.
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Res;
+  SDValue InOp = N->getOperand(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (getTypeAction(InOp.getValueType())) {
+  default: llvm_unreachable("Unknown type action!");
+  case TargetLowering::TypeLegal:
+  case TargetLowering::TypeExpandInteger:
+    Res = InOp;
+    break;
+  case TargetLowering::TypePromoteInteger:
+    Res = GetPromotedInteger(InOp);
+    break;
+  case TargetLowering::TypeSplitVector:
+    EVT InVT = InOp.getValueType();
+    assert(InVT.isVector() && "Cannot split scalar types");
+    unsigned NumElts = InVT.getVectorNumElements();
+    assert(NumElts == NVT.getVectorNumElements() &&
+           "Dst and Src must have the same number of elements");
+    EVT EltVT = InVT.getScalarType();
+    assert(isPowerOf2_32(NumElts) &&
+           "Promoted vector type must be a power of two");
+
+    EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+    EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+                                   NumElts/2);
+
+    SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(0));
+    SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(NumElts/2));
+    EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+    EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
+  }
+
+  // Truncate to NVT instead of VT
+  return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  // The operation overflowed iff the result in the larger type is not the
+  // zero extension of its truncation to the original type.
+  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Do the arithmetic in the larger type.
+  unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+  SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+  // Calculate the overflow flag: zero extend the arithmetic result from
+  // the original type.
+  SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+  // Overflowed if and only if this is not equal to Res.
+  Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+  // Promote the overflow bit trivially.
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+  EVT SmallVT = LHS.getValueType();
+
+  // To determine if the result overflowed in a larger type, we extend the
+  // input to the larger type, do the multiply, then check the high bits of
+  // the result to see if the overflow happened.
+  if (N->getOpcode() == ISD::SMULO) {
+    LHS = SExtPromotedInteger(LHS);
+    RHS = SExtPromotedInteger(RHS);
+  } else {
+    LHS = ZExtPromotedInteger(LHS);
+    RHS = ZExtPromotedInteger(RHS);
+  }
+  SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+  // Overflow occurred iff the high part of the result does not
+  // zero/sign-extend the low part.
+  SDValue Overflow;
+  if (N->getOpcode() == ISD::UMULO) {
+    // Unsigned overflow occurred iff the high part is non-zero.
+    SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+                             DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+                            DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+  } else {
+    // Signed overflow occurred iff the high part does not sign extend the low.
+    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+                               Mul, DAG.getValueType(SmallVT));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+  }
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Overflow);
+  return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+  // Zero extend the input.
+  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+  SDValue Chain = N->getOperand(0); // Get the chain.
+  SDValue Ptr = N->getOperand(1); // Get the pointer.
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+  // The argument is passed as NumRegs registers of type RegVT.
+
+  SmallVector<SDValue, 8> Parts(NumRegs);
+  for (unsigned i = 0; i < NumRegs; ++i) {
+    Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+                            N->getConstantOperandVal(3));
+    Chain = Parts[i].getValue(1);
+  }
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::reverse(Parts.begin(), Parts.end());
+
+  // Assemble the parts in the promoted type.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+  for (unsigned i = 1; i < NumRegs; ++i) {
+    SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+    // Shift it to the right position and "or" it in.
+    Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+                       DAG.getConstant(i * RegVT.getSizeInBits(),
+                                       TLI.getPointerTy()));
+    Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+  }
+
+  // Modified the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  return Res;
+}
+
+//===----------------------------------------------------------------------===//
+//  Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+    default:
+  #ifndef NDEBUG
+    dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+  #endif
+    llvm_unreachable("Do not know how to promote this operator's operand!");
+
+  case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break;
+  case ISD::BITCAST:      Res = PromoteIntOp_BITCAST(N); break;
+  case ISD::BR_CC:        Res = PromoteIntOp_BR_CC(N, OpNo); break;
+  case ISD::BRCOND:       Res = PromoteIntOp_BRCOND(N, OpNo); break;
+  case ISD::BUILD_PAIR:   Res = PromoteIntOp_BUILD_PAIR(N); break;
+  case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+  case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+  case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::CONVERT_RNDSAT:
+                          Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+  case ISD::INSERT_VECTOR_ELT:
+                          Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+  case ISD::MEMBARRIER:   Res = PromoteIntOp_MEMBARRIER(N); break;
+  case ISD::SCALAR_TO_VECTOR:
+                          Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT:       Res = PromoteIntOp_SELECT(N, OpNo); break;
+  case ISD::SELECT_CC:    Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+  case ISD::SETCC:        Res = PromoteIntOp_SETCC(N, OpNo); break;
+  case ISD::SIGN_EXTEND:  Res = PromoteIntOp_SIGN_EXTEND(N); break;
+  case ISD::SINT_TO_FP:   Res = PromoteIntOp_SINT_TO_FP(N); break;
+  case ISD::STORE:        Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+                                                   OpNo); break;
+  case ISD::TRUNCATE:     Res = PromoteIntOp_TRUNCATE(N); break;
+  case ISD::FP16_TO_FP32:
+  case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
+  case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison.  This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+                                            ISD::CondCode CCCode) {
+  // We have to insert explicit sign or zero extends.  Note that we could
+  // insert sign extends for ALL conditions, but zero extend is cheaper on
+  // many machines (an AND instead of two shifts), so prefer it.
+  switch (CCCode) {
+  default: llvm_unreachable("Unknown integer comparison!");
+  case ISD::SETEQ:
+  case ISD::SETNE:
+  case ISD::SETUGE:
+  case ISD::SETUGT:
+  case ISD::SETULE:
+  case ISD::SETULT:
+    // ALL of these operations will work if we either sign or zero extend
+    // the operands (including the unsigned comparisons!).  Zero extend is
+    // usually a simpler/cheaper operation, so prefer it.
+    NewLHS = ZExtPromotedInteger(NewLHS);
+    NewRHS = ZExtPromotedInteger(NewRHS);
+    break;
+  case ISD::SETGE:
+  case ISD::SETGT:
+  case ISD::SETLT:
+  case ISD::SETLE:
+    NewLHS = SExtPromotedInteger(NewLHS);
+    NewRHS = SExtPromotedInteger(NewRHS);
+    break;
+  }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+  // This should only occur in unusual situations like bitcasting to an
+  // x86_fp80, so just turn it into a store+load
+  return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+  // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+  // legal types.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                N->getOperand(1), LHS, RHS, N->getOperand(4)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "only know how to promote condition");
+
+  // Promote all the way up to the canonical SetCC type.
+  EVT SVT = TLI.getSetCCResultType(MVT::Other);
+  SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+  // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+                                        N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+  // Since the result type is legal, the operands must promote to it.
+  EVT OVT = N->getOperand(0).getValueType();
+  SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+  SDValue Hi = GetPromotedInteger(N->getOperand(1));
+  assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+  DebugLoc dl = N->getDebugLoc();
+
+  Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+                   DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+  return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+  // The vector type is legal but the element type is not.  This implies
+  // that the vector is a power-of-two in length and that the element
+  // type does not have a strange size (eg: it is not i1).
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+  // Promote the inserted value.  The type does not need to match the
+  // vector element type.  Check that any extra bits introduced will be
+  // truncated away.
+  assert(N->getOperand(0).getValueType().getSizeInBits() >=
+         N->getValueType(0).getVectorElementType().getSizeInBits() &&
+         "Type of inserted value narrower than vector element type!");
+
+  SmallVector<SDValue, 16> NewOps;
+  for (unsigned i = 0; i < NumElts; ++i)
+    NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+  return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+           CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+           "can only promote integer arguments");
+  SDValue InOp = GetPromotedInteger(N->getOperand(0));
+  return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+                              N->getOperand(1), N->getOperand(2),
+                              N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+                                                         unsigned OpNo) {
+  if (OpNo == 1) {
+    // Promote the inserted value.  This is valid because the type does not
+    // have to match the vector element type.
+
+    // Check that any extra bits introduced will be truncated away.
+    assert(N->getOperand(1).getValueType().getSizeInBits() >=
+           N->getValueType(0).getVectorElementType().getSizeInBits() &&
+           "Type of inserted value narrower than vector element type!");
+    return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                  GetPromotedInteger(N->getOperand(1)),
+                                  N->getOperand(2)),
+                   0);
+  }
+
+  assert(OpNo == 2 && "Different operand and result vector types?");
+
+  // Promote the index.
+  SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+  SDValue NewOps[6];
+  DebugLoc dl = N->getDebugLoc();
+  NewOps[0] = N->getOperand(0);
+  for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+    SDValue Flag = GetPromotedInteger(N->getOperand(i));
+    NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+  }
+  return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+  // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+  // the operand in place.
+  return SDValue(DAG.UpdateNodeOperands(N,
+                                GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Only know how to promote condition");
+
+  // Promote all the way up to the canonical SetCC type.
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+  SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+
+  return SDValue(DAG.UpdateNodeOperands(N, Cond,
+                                N->getOperand(1), N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+  // The CC (#4) and the possible return values (#2 and #3) have legal types.
+  return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+                                N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+  // The CC (#2) is always legal.
+  return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  DebugLoc dl = N->getDebugLoc();
+  Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+                     Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N,
+                                SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
+
+  // Truncate the value and store the result.
+  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+                           N->getMemoryVT(),
+                           isVolatile, isNonTemporal, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N,
+                                ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+  return DAG.getZeroExtendInReg(Op, dl,
+                                N->getOperand(0).getValueType().getScalarType());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion.  At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Lo, Hi;
+  Lo = Hi = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
+  case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+  case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+  case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::VAARG:              ExpandRes_VAARG(N, Lo, Hi); break;
+
+  case ISD::ANY_EXTEND:  ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+  case ISD::AssertSext:  ExpandIntRes_AssertSext(N, Lo, Hi); break;
+  case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
+  case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
+  case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
+  case ISD::CTLZ:        ExpandIntRes_CTLZ(N, Lo, Hi); break;
+  case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
+  case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
+  case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+  case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+  case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+  case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
+  case ISD::SDIV:        ExpandIntRes_SDIV(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+  case ISD::SREM:        ExpandIntRes_SREM(N, Lo, Hi); break;
+  case ISD::TRUNCATE:    ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+  case ISD::UDIV:        ExpandIntRes_UDIV(N, Lo, Hi); break;
+  case ISD::UREM:        ExpandIntRes_UREM(N, Lo, Hi); break;
+  case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_SWAP: {
+    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+    SplitInteger(Tmp.first, Lo, Hi);
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+    break;
+  }
+
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+  case ISD::ADD:
+  case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+  case ISD::ADDC:
+  case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+  case ISD::ADDE:
+  case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+  case ISD::SADDO:
+  case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+  case ISD::UADDO:
+  case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  case ISD::UMULO:
+  case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+  unsigned Opc = Node->getOpcode();
+  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+  RTLIB::Libcall LC;
+
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic intrinsic Expand!");
+    break;
+  case ISD::ATOMIC_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+    }
+    break;
+  case ISD::ATOMIC_CMP_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_ADD:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_SUB:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_AND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_OR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_XOR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_NAND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+    }
+    break;
+  }
+
+  return ExpandChainLibCall(LC, Node, false);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'.  Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+                                             SDValue &Lo, SDValue &Hi) {
+  DebugLoc DL = N->getDebugLoc();
+  // Expand the incoming operand to be shifted, so that we have its parts
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  EVT NVT = InL.getValueType();
+  unsigned VTBits = N->getValueType(0).getSizeInBits();
+  unsigned NVTBits = NVT.getSizeInBits();
+  EVT ShTy = N->getOperand(1).getValueType();
+
+  if (N->getOpcode() == ISD::SHL) {
+    if (Amt > VTBits) {
+      Lo = Hi = DAG.getConstant(0, NVT);
+    } else if (Amt > NVTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = DAG.getNode(ISD::SHL, DL,
+                       NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
+    } else if (Amt == NVTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = InL;
+    } else if (Amt == 1 &&
+               TLI.isOperationLegalOrCustom(ISD::ADDC,
+                              TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+      // Emit this X << 1 as X+X.
+      SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+      SDValue LoOps[2] = { InL, InL };
+      Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
+      SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+      Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+      Hi = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
+                                   DAG.getConstant(Amt, ShTy)),
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
+                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+    }
+    return;
+  }
+
+  if (N->getOpcode() == ISD::SRL) {
+    if (Amt > VTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = DAG.getConstant(0, NVT);
+    } else if (Amt > NVTBits) {
+      Lo = DAG.getNode(ISD::SRL, DL,
+                       NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+      Hi = DAG.getConstant(0, NVT);
+    } else if (Amt == NVTBits) {
+      Lo = InH;
+      Hi = DAG.getConstant(0, NVT);
+    } else {
+      Lo = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
+                                   DAG.getConstant(Amt, ShTy)),
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
+                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+    }
+    return;
+  }
+
+  assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+  if (Amt > VTBits) {
+    Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                          DAG.getConstant(NVTBits-1, ShTy));
+  } else if (Amt > NVTBits) {
+    Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                     DAG.getConstant(Amt-NVTBits, ShTy));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                     DAG.getConstant(NVTBits-1, ShTy));
+  } else if (Amt == NVTBits) {
+    Lo = InH;
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                     DAG.getConstant(NVTBits-1, ShTy));
+  } else {
+    Lo = DAG.getNode(ISD::OR, DL, NVT,
+                     DAG.getNode(ISD::SRL, DL, NVT, InL,
+                                 DAG.getConstant(Amt, ShTy)),
+                     DAG.getNode(ISD::SHL, DL, NVT, InH,
+                                 DAG.getConstant(NVTBits-Amt, ShTy)));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+  }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount.  If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDValue Amt = N->getOperand(1);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
+  unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+  unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+  assert(isPowerOf2_32(NVTBits) &&
+         "Expanded integer type size not a power of two!");
+  DebugLoc dl = N->getDebugLoc();
+
+  APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+
+  // If we don't know anything about the high bits, exit.
+  if (((KnownZero|KnownOne) & HighBitMask) == 0)
+    return false;
+
+  // Get the incoming operand to be shifted.
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  // If we know that any of the high bits of the shift amount are one, then we
+  // can do this as a couple of simple shifts.
+  if (KnownOne.intersects(HighBitMask)) {
+    // Mask out the high bit, which we know is set.
+    Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+                      DAG.getConstant(~HighBitMask, ShTy));
+
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unknown shift");
+    case ISD::SHL:
+      Lo = DAG.getConstant(0, NVT);              // Low part is zero.
+      Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+      return true;
+    case ISD::SRL:
+      Hi = DAG.getConstant(0, NVT);              // Hi part is zero.
+      Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    case ISD::SRA:
+      Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,       // Sign extend high part.
+                       DAG.getConstant(NVTBits-1, ShTy));
+      Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    }
+  }
+
+#if 0
+  // FIXME: This code is broken for shifts with a zero amount!
+  // If we know that all of the high bits of the shift amount are zero, then we
+  // can do this as a couple of simple shifts.
+  if ((KnownZero & HighBitMask) == HighBitMask) {
+    // Compute 32-amt.
+    SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
+                                 DAG.getConstant(NVTBits, ShTy),
+                                 Amt);
+    unsigned Op1, Op2;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unknown shift");
+    case ISD::SHL:  Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+    case ISD::SRL:
+    case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+    }
+
+    Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
+    Hi = DAG.getNode(ISD::OR, NVT,
+                     DAG.getNode(Op1, NVT, InH, Amt),
+                     DAG.getNode(Op2, NVT, InL, Amt2));
+    return true;
+  }
+#endif
+
+  return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDValue Amt = N->getOperand(1);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
+  unsigned NVTBits = NVT.getSizeInBits();
+  assert(isPowerOf2_32(NVTBits) &&
+         "Expanded integer type size not a power of two!");
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the incoming operand to be shifted.
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+  SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+  SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+  SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+                                 Amt, NVBitsNode, ISD::SETULT);
+
+  SDValue LoS, HiS, LoL, HiL;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unknown shift");
+  case ISD::SHL:
+    // Short: ShAmt < NVTBits
+    LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+    HiS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    LoL = DAG.getConstant(0, NVT);                        // Lo part is zero.
+    HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  case ISD::SRL:
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getConstant(0, NVT);                        // Hi part is zero.
+    LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  case ISD::SRA:
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getNode(ISD::SRA, dl, NVT, InH,             // Sign of Hi part.
+                      DAG.getConstant(NVTBits-1, ShTy));
+    LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  }
+
+  return false;
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+  EVT NVT = LHSL.getValueType();
+  SDValue LoOps[2] = { LHSL, RHSL };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+  // them.  TODO: Teach operation legalization how to expand unsupported
+  // ADDC/ADDE/SUBC/SUBE.  The problem is that these operations generate
+  // a carry of type MVT::Glue, but there doesn't seem to be any way to
+  // generate a value of this type in the expanded code sequence.
+  bool hasCarry =
+    TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+                                   ISD::ADDC : ISD::SUBC,
+                                 TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+  if (hasCarry) {
+    SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+    if (N->getOpcode() == ISD::ADD) {
+      Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+    }
+    return;
+  }
+
+  if (N->getOpcode() == ISD::ADD) {
+    Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+    SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+                                ISD::SETULT);
+    SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+                                ISD::SETULT);
+    SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+                                 DAG.getConstant(1, NVT), Carry1);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+  } else {
+    Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+    SDValue Cmp =
+      DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+                   LoOps[0], LoOps[1], ISD::SETULT);
+    SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+  SDValue LoOps[2] = { LHSL, RHSL };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  if (N->getOpcode() == ISD::ADDC) {
+    Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+  } else {
+    Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+  }
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+  SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+  HiOps[2] = Lo.getValue(1);
+  Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is any extension of the input (which degenerates to a copy).
+    Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+    Hi = DAG.getUNDEF(NVT);   // The high part is undefined.
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) ==
+           TargetLowering::TypePromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned EVTBits = EVT.getSizeInBits();
+
+  if (NVTBits < EVTBits) {
+    Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
+  } else {
+    Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+    // The high part replicates the sign bit of Lo, make it explicit.
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                     DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned EVTBits = EVT.getSizeInBits();
+
+  if (NVTBits < EVTBits) {
+    Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
+  } else {
+    Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+    // The high part must be zero, make it explicit.
+    Hi = DAG.getConstant(0, NVT);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Hi, Lo);  // Note swapped operands.
+  Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+  Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NBitWidth = NVT.getSizeInBits();
+  const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+  Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
+  Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+
+  SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+                                   DAG.getConstant(0, NVT), ISD::SETNE);
+
+  SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+
+  Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+                   DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+                   DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+
+  SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+                                   DAG.getConstant(0, NVT), ISD::SETNE);
+
+  SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+
+  Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+                   DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+  SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+  SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  if (ISD::isNormalLoad(N)) {
+    ExpandRes_NormalLoad(N, Lo, Hi);
+    return;
+  }
+
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  ISD::LoadExtType ExtType = N->getExtensionType();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
+  DebugLoc dl = N->getDebugLoc();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  if (N->getMemoryVT().bitsLE(NVT)) {
+    EVT MemVT = N->getMemoryVT();
+
+    Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+                        MemVT, isVolatile, isNonTemporal, Alignment);
+
+    // Remember the chain.
+    Ch = Lo.getValue(1);
+
+    if (ExtType == ISD::SEXTLOAD) {
+      // The high part is obtained by SRA'ing all but one of the bits of the
+      // lo part.
+      unsigned LoSize = Lo.getValueType().getSizeInBits();
+      Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                       DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+    } else if (ExtType == ISD::ZEXTLOAD) {
+      // The high part is just a zero.
+      Hi = DAG.getConstant(0, NVT);
+    } else {
+      assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+      // The high part is undefined.
+      Hi = DAG.getUNDEF(NVT);
+    }
+  } else if (TLI.isLittleEndian()) {
+    // Little-endian - low bits are at low addresses.
+    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+                     isVolatile, isNonTemporal, Alignment);
+
+    unsigned ExcessBits =
+      N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+    // Increment the pointer to the other half.
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
+
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                     Hi.getValue(1));
+  } else {
+    // Big-endian - high bits are at low addresses.  Favor aligned loads at
+    // the cost of some bit-fiddling.
+    EVT MemVT = N->getMemoryVT();
+    unsigned EBytes = MemVT.getStoreSize();
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+    // Load both the high bits and maybe some of the low bits.
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+                        EVT::getIntegerVT(*DAG.getContext(),
+                                          MemVT.getSizeInBits() - ExcessBits),
+                        isVolatile, isNonTemporal, Alignment);
+
+    // Increment the pointer to the other half.
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    // Load the rest of the low bits.
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize),
+                        EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
+
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                     Hi.getValue(1));
+
+    if (ExcessBits < NVT.getSizeInBits()) {
+      // Transfer low bits from the bottom of Hi to the top of Lo.
+      Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+                       DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                                   DAG.getConstant(ExcessBits,
+                                                   TLI.getPointerTy())));
+      // Move high bits to the right position in Hi.
+      Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+                       NVT, Hi,
+                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                       TLI.getPointerTy()));
+    }
+  }
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LL, LH, RL, RH;
+  GetExpandedInteger(N->getOperand(0), LL, LH);
+  GetExpandedInteger(N->getOperand(1), RL, RH);
+  Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+  Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+                                        SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+  bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+  bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+  bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+  if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+    SDValue LL, LH, RL, RH;
+    GetExpandedInteger(N->getOperand(0), LL, LH);
+    GetExpandedInteger(N->getOperand(1), RL, RH);
+    unsigned OuterBitSize = VT.getSizeInBits();
+    unsigned InnerBitSize = NVT.getSizeInBits();
+    unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+    unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+    APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+    if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+        DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+      // The inputs are both zero-extended.
+      if (HasUMUL_LOHI) {
+        // We can emit a umul_lohi.
+        Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+        Hi = SDValue(Lo.getNode(), 1);
+        return;
+      }
+      if (HasMULHU) {
+        // We can emit a mulhu+mul.
+        Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+        Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+        return;
+      }
+    }
+    if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+      // The input values are both sign-extended.
+      if (HasSMUL_LOHI) {
+        // We can emit a smul_lohi.
+        Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+        Hi = SDValue(Lo.getNode(), 1);
+        return;
+      }
+      if (HasMULHS) {
+        // We can emit a mulhs+mul.
+        Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+        Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+        return;
+      }
+    }
+    if (HasUMUL_LOHI) {
+      // Lo,Hi = umul LHS, RHS.
+      SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+                                       DAG.getVTList(NVT, NVT), LL, RL);
+      Lo = UMulLOHI;
+      Hi = UMulLOHI.getValue(1);
+      RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+      LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+      return;
+    }
+    if (HasMULHU) {
+      Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+      Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+      RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+      LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+      return;
+    }
+  }
+
+  // If nothing else, we can make a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::MUL_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::MUL_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MUL_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MUL_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  DebugLoc dl = Node->getDebugLoc();
+
+  // Expand the result by simply replacing it with the equivalent
+  // non-overflow-checking operation.
+  SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+                            ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                            LHS, RHS);
+  SplitInteger(Sum, Lo, Hi);
+
+  // Compute the overflow.
+  //
+  //   LHSSign -> LHS >= 0
+  //   RHSSign -> RHS >= 0
+  //   SumSign -> Sum >= 0
+  //
+  //   Add:
+  //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+  //   Sub:
+  //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+  //
+  EVT OType = Node->getValueType(1);
+  SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+  SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+  SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+  SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+                                    Node->getOpcode() == ISD::SADDO ?
+                                    ISD::SETEQ : ISD::SETNE);
+
+  SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+  SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+  SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // If we can emit an efficient shift operation, do so now.  Check to see if
+  // the RHS is a constant.
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+  // If we can determine that the high bit of the shift is zero or one, even if
+  // the low bits are variable, emit this shift in an optimized form.
+  if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+    return;
+
+  // If this target supports shift_PARTS, use it.  First, map to the _PARTS opc.
+  unsigned PartsOpc;
+  if (N->getOpcode() == ISD::SHL) {
+    PartsOpc = ISD::SHL_PARTS;
+  } else if (N->getOpcode() == ISD::SRL) {
+    PartsOpc = ISD::SRL_PARTS;
+  } else {
+    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    PartsOpc = ISD::SRA_PARTS;
+  }
+
+  // Next check to see if the target supports this SHL_PARTS operation or if it
+  // will custom expand it.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+  if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+      Action == TargetLowering::Custom) {
+    // Expand the subcomponents.
+    SDValue LHSL, LHSH;
+    GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+
+    SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
+    EVT VT = LHSL.getValueType();
+    Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+    Hi = Lo.getValue(1);
+    return;
+  }
+
+  // Otherwise, emit a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  bool isSigned;
+  if (N->getOpcode() == ISD::SHL) {
+    isSigned = false; /*sign irrelevant*/
+    if (VT == MVT::i16)
+      LC = RTLIB::SHL_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SHL_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SHL_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SHL_I128;
+  } else if (N->getOpcode() == ISD::SRL) {
+    isSigned = false;
+    if (VT == MVT::i16)
+      LC = RTLIB::SRL_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SRL_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SRL_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SRL_I128;
+  } else {
+    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    isSigned = true;
+    if (VT == MVT::i16)
+      LC = RTLIB::SRA_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SRA_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SRA_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SRA_I128;
+  }
+
+  if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+    SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+    return;
+  }
+
+  if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+    llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is sign extension of the input (degenerates to a copy).
+    Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+    // The high part is obtained by SRA'ing all but one of the bits of low part.
+    unsigned LoSize = NVT.getSizeInBits();
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                     DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) ==
+           TargetLowering::TypePromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+    unsigned ExcessBits =
+      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        ExcessBits)));
+  }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+  if (EVT.bitsLE(Lo.getValueType())) {
+    // sext_inreg the low part if needed.
+    Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+                     N->getOperand(1));
+
+    // The high part gets the sign extension from the lo-part.  This handles
+    // things like sextinreg V:i64 from i8.
+    Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+                     DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+                                     TLI.getPointerTy()));
+  } else {
+    // For example, extension of an i48 to an i64.  Leave the low part alone,
+    // sext_inreg the high part.
+    unsigned ExcessBits =
+      EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+    Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        ExcessBits)));
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+  Hi = DAG.getNode(ISD::SRL, dl,
+                   N->getOperand(0).getValueType(), N->getOperand(0),
+                   DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+  Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand the result by simply replacing it with the equivalent
+  // non-overflow-checking operation.
+  SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+                            ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                            LHS, RHS);
+  SplitInteger(Sum, Lo, Hi);
+
+  // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+  // overflows iff a - b > a.
+  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+                             N->getOpcode () == ISD::UADDO ?
+                             ISD::SETULT : ISD::SETUGT);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  const Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = TLI.getPointerTy();
+  const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+  DebugLoc dl = N->getDebugLoc();
+
+  // A divide for UMULO should be faster than a function call.
+  if (N->getOpcode() == ISD::UMULO) {
+    SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+    DebugLoc DL = N->getDebugLoc();
+
+    SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+    SplitInteger(MUL, Lo, Hi);
+
+    // A divide for UMULO will be faster than a function call. Select to
+    // make sure we aren't using 0.
+    SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+				  RHS, DAG.getConstant(0, VT), ISD::SETNE);
+    SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+				  DAG.getConstant(1, VT), RHS);
+    SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+    SDValue Overflow;
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+    ReplaceValueWith(SDValue(N, 1), Overflow);
+    return;
+  }
+
+  // Replace this with a libcall that will check overflow.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i32)
+    LC = RTLIB::MULO_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MULO_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MULO_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+  SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+  // Temporary for the overflow value, default it to zero.
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+			       DAG.getConstant(0, PtrVT), Temp,
+			       MachinePointerInfo(), false, false, 0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = N->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = N->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = true;
+    Entry.isZExt = false;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the address of the overflow check.
+  Entry.Node = Temp;
+  Entry.Ty = PtrTy->getPointerTo();
+  Entry.isSExt = true;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+
+  SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
+		    0, TLI.getLibcallCallingConv(LC), false,
+		    true, Func, Args, DAG, dl);
+
+  SplitInteger(CallInfo.first, Lo, Hi);
+  SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+			      MachinePointerInfo(), false, false, 0);
+  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+                             DAG.getConstant(0, PtrVT),
+                             ISD::SETNE);
+  // Use the overflow from the libcall everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::UDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::UDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::UDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::UDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::UREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::UREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::UREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::UREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is zero extension of the input (degenerates to a copy).
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+    Hi = DAG.getConstant(0, NVT);   // The high part is just a zero.
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) ==
+           TargetLowering::TypePromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+    unsigned ExcessBits =
+      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    Hi = DAG.getZeroExtendInReg(Hi, dl,
+                                EVT::getIntegerVT(*DAG.getContext(),
+                                                  ExcessBits));
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+  default:
+  #ifndef NDEBUG
+    dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+  #endif
+    llvm_unreachable("Do not know how to expand this operator's operand!");
+
+  case ISD::BITCAST:           Res = ExpandOp_BITCAST(N); break;
+  case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
+  case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;
+  case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT_CC:         Res = ExpandIntOp_SELECT_CC(N); break;
+  case ISD::SETCC:             Res = ExpandIntOp_SETCC(N); break;
+  case ISD::SINT_TO_FP:        Res = ExpandIntOp_SINT_TO_FP(N); break;
+  case ISD::STORE:   Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+  case ISD::TRUNCATE:          Res = ExpandIntOp_TRUNCATE(N); break;
+  case ISD::UINT_TO_FP:        Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:              Res = ExpandIntOp_Shift(N); break;
+  case ISD::RETURNADDR:
+  case ISD::FRAMEADDR:         Res = ExpandIntOp_RETURNADDR(N); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison.  This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+                                                  SDValue &NewRHS,
+                                                  ISD::CondCode &CCCode,
+                                                  DebugLoc dl) {
+  SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+  GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+  GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+  if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+    if (RHSLo == RHSHi) {
+      if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+        if (RHSCST->isAllOnesValue()) {
+          // Equality comparison to -1.
+          NewLHS = DAG.getNode(ISD::AND, dl,
+                               LHSLo.getValueType(), LHSLo, LHSHi);
+          NewRHS = RHSLo;
+          return;
+        }
+      }
+    }
+
+    NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+    NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+    NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    return;
+  }
+
+  // If this is a comparison of the sign bit, just look at the top part.
+  // X > -1,  x < 0
+  if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+    if ((CCCode == ISD::SETLT && CST->isNullValue()) ||     // X < 0
+        (CCCode == ISD::SETGT && CST->isAllOnesValue())) {  // X > -1
+      NewLHS = LHSHi;
+      NewRHS = RHSHi;
+      return;
+    }
+
+  // FIXME: This generated code sucks.
+  ISD::CondCode LowCC;
+  switch (CCCode) {
+  default: llvm_unreachable("Unknown integer setcc!");
+  case ISD::SETLT:
+  case ISD::SETULT: LowCC = ISD::SETULT; break;
+  case ISD::SETGT:
+  case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: LowCC = ISD::SETULE; break;
+  case ISD::SETGE:
+  case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+  }
+
+  // Tmp1 = lo(op1) < lo(op2)   // Always unsigned comparison
+  // Tmp2 = hi(op1) < hi(op2)   // Signedness depends on operands
+  // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+  // NOTE: on targets without efficient SELECT of bools, we can always use
+  // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
+  SDValue Tmp1, Tmp2;
+  Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+                           LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+  if (!Tmp1.getNode())
+    Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+                        LHSLo, RHSLo, LowCC);
+  Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+                           LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+  if (!Tmp2.getNode())
+    Tmp2 = DAG.getNode(ISD::SETCC, dl,
+                       TLI.getSetCCResultType(LHSHi.getValueType()),
+                       LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+  ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+  ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+  if ((Tmp1C && Tmp1C->isNullValue()) ||
+      (Tmp2C && Tmp2C->isNullValue() &&
+       (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+        CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+      (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+       (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+        CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+    // low part is known false, returns high part.
+    // For LE / GE, if high part is known false, ignore the low part.
+    // For LT / GT, if high part is known true, ignore the low part.
+    NewLHS = Tmp2;
+    NewRHS = SDValue();
+    return;
+  }
+
+  NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+                             LHSHi, RHSHi, ISD::SETEQ, false,
+                             DagCombineInfo, dl);
+  if (!NewLHS.getNode())
+    NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                          LHSHi, RHSHi, ISD::SETEQ);
+  NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+                       NewLHS, Tmp1, Tmp2);
+  NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+  // The value being shifted is legal, but the shift amount is too big.
+  // It follows that either the result of the shift is undefined, or the
+  // upper half of the shift amount is zero.  Just use the lower half.
+  SDValue Lo, Hi;
+  GetExpandedInteger(N->getOperand(1), Lo, Hi);
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+  // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant.  This
+  // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+  // constant to valid type.
+  SDValue Lo, Hi;
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+  SDValue Op = N->getOperand(0);
+  EVT DstVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Don't know how to expand this SINT_TO_FP!");
+  return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+  if (ISD::isNormalStore(N))
+    return ExpandOp_NormalStore(N, OpNo);
+
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+
+  EVT VT = N->getOperand(1).getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Lo, Hi;
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  if (N->getMemoryVT().bitsLE(NVT)) {
+    GetExpandedInteger(N->getValue(), Lo, Hi);
+    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+                             N->getMemoryVT(), isVolatile, isNonTemporal,
+                             Alignment);
+  }
+
+  if (TLI.isLittleEndian()) {
+    // Little-endian - low bits are at low addresses.
+    GetExpandedInteger(N->getValue(), Lo, Hi);
+
+    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+                      isVolatile, isNonTemporal, Alignment);
+
+    unsigned ExcessBits =
+      N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+    // Increment the pointer to the other half.
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
+                           NEVT, isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+  }
+
+  // Big-endian - high bits are at low addresses.  Favor aligned stores at
+  // the cost of some bit-fiddling.
+  GetExpandedInteger(N->getValue(), Lo, Hi);
+
+  EVT ExtVT = N->getMemoryVT();
+  unsigned EBytes = ExtVT.getStoreSize();
+  unsigned IncrementSize = NVT.getSizeInBits()/8;
+  unsigned ExcessBits = (EBytes - IncrementSize)*8;
+  EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+                               ExtVT.getSizeInBits() - ExcessBits);
+
+  if (ExcessBits < NVT.getSizeInBits()) {
+    // Transfer high bits from the top of Lo to the bottom of Hi.
+    Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                     TLI.getPointerTy()));
+    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+                     DAG.getNode(ISD::SRL, dl, NVT, Lo,
+                                 DAG.getConstant(ExcessBits,
+                                                 TLI.getPointerTy())));
+  }
+
+  // Store both the high bits and maybe some of the low bits.
+  Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+                         HiVT, isVolatile, isNonTemporal, Alignment);
+
+  // Increment the pointer to the other half.
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  // Store the lowest ExcessBits bits in the second half.
+  Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+                         N->getPointerInfo().getWithOffset(IncrementSize),
+                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                         isVolatile, isNonTemporal,
+                         MinAlign(Alignment, IncrementSize));
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+  // Just truncate the low part of the source.
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
+  case MVT::f32:     return &APFloat::IEEEsingle;
+  case MVT::f64:     return &APFloat::IEEEdouble;
+  case MVT::f80:     return &APFloat::x87DoubleExtended;
+  case MVT::f128:    return &APFloat::IEEEquad;
+  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+  }
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+  SDValue Op = N->getOperand(0);
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // The following optimization is valid only if every value in SrcVT (when
+  // treated as signed) is representable in DstVT.  Check that the mantissa
+  // size of DstVT is >= than the number of bits in SrcVT -1.
+  const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
+  if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+      TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+    // Do a signed conversion then adjust the result.
+    SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+    SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+    // The result of the signed conversion needs adjusting if the 'sign bit' of
+    // the incoming integer was set.  To handle this, we dynamically test to see
+    // if it is set, and, if so, add a fudge factor.
+
+    const uint64_t F32TwoE32  = 0x4F800000ULL;
+    const uint64_t F32TwoE64  = 0x5F800000ULL;
+    const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+    APInt FF(32, 0);
+    if (SrcVT == MVT::i32)
+      FF = APInt(32, F32TwoE32);
+    else if (SrcVT == MVT::i64)
+      FF = APInt(32, F32TwoE64);
+    else if (SrcVT == MVT::i128)
+      FF = APInt(32, F32TwoE128);
+    else
+      assert(false && "Unsupported UINT_TO_FP!");
+
+    // Check whether the sign bit is set.
+    SDValue Lo, Hi;
+    GetExpandedInteger(Op, Lo, Hi);
+    SDValue SignSet = DAG.getSetCC(dl,
+                                   TLI.getSetCCResultType(Hi.getValueType()),
+                                   Hi, DAG.getConstant(0, Hi.getValueType()),
+                                   ISD::SETLT);
+
+    // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+    SDValue FudgePtr = DAG.getConstantPool(
+                               ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+                                           TLI.getPointerTy());
+
+    // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+    SDValue Zero = DAG.getIntPtrConstant(0);
+    SDValue Four = DAG.getIntPtrConstant(4);
+    if (TLI.isBigEndian()) std::swap(Zero, Four);
+    SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+                                 Zero, Four);
+    unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+    FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+    Alignment = std::min(Alignment, 4u);
+
+    // Load the value out, extending it from f32 to the destination float type.
+    // FIXME: Avoid the extend by constructing the right constant pool?
+    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+                                   FudgePtr,
+                                   MachinePointerInfo::getConstantPool(),
+                                   MVT::f32,
+                                   false, false, Alignment);
+    return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+  }
+
+  // Otherwise, use a libcall.
+  RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Don't know how to expand this UINT_TO_FP!");
+  return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  SDValue InOp0 = N->getOperand(0);
+  EVT InVT = InOp0.getValueType();
+
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+  unsigned OutNumElems = OutVT.getVectorNumElements();
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  DebugLoc dl = N->getDebugLoc();
+  SDValue BaseIdx = N->getOperand(1);
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.reserve(OutNumElems);
+  for (unsigned i = 0; i != OutNumElems; ++i) {
+
+    // Extract the element from the original vector.
+    SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+      BaseIdx, DAG.getIntPtrConstant(i));
+    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+      InVT.getVectorElementType(), N->getOperand(0), Index);
+
+    SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+    // Insert the converted element to the new vector.
+    Ops.push_back(Op);
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+
+  ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<int, 8> NewMask;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    NewMask.push_back(SV->getMaskElt(i));
+  }
+
+  SDValue V0 = GetPromotedInteger(N->getOperand(0));
+  SDValue V1 = GetPromotedInteger(N->getOperand(1));
+  EVT OutVT = V0.getValueType();
+
+  return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+  unsigned NumElems = N->getNumOperands();
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  DebugLoc dl = N->getDebugLoc();
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.reserve(NumElems);
+  for (unsigned i = 0; i != NumElems; ++i) {
+    SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+    Ops.push_back(Op);
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+  DebugLoc dl = N->getDebugLoc();
+
+  assert(!N->getOperand(0).getValueType().isVector() &&
+         "Input must be a scalar");
+
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+  EVT NOutVTElem = NOutVT.getVectorElementType();
+
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                                        N->getOperand(0));
+
+  SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+    NOutVTElem, N->getOperand(1));
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT,
+    ConvertedVector, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue V0 = GetPromotedInteger(N->getOperand(0));
+  SDValue V1 = N->getOperand(1);
+  SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+    V0->getValueType(0).getScalarType(), V0, V1);
+
+  return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+  SmallVector<SDValue, 8> NewOps;
+
+  // For each incoming vector
+  for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) {
+    SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+    EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+    unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+    for (unsigned i=0; i<NumElem; ++i) {
+      // Extract element from incoming vector
+      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
+      Incoming, DAG.getIntPtrConstant(i));
+      SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+      NewOps.push_back(Tr);
+    }
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl,  N->getValueType(0),
+    &NewOps[0], NewOps.size());
+  }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 000000000000..ba658b022053
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1153 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method.  It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types.  This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+  // If a node is not processed, then none of its values should be mapped by any
+  // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+  // If a node is processed, then each value with an illegal type must be mapped
+  // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+  // Values with a legal type may be mapped by ReplacedValues, but not by any of
+  // the other maps.
+
+  // Note that these invariants may not hold momentarily when processing a node:
+  // the node being processed may be put in a map before being marked Processed.
+
+  // Note that it is possible to have nodes marked NewNode in the DAG.  This can
+  // occur in two ways.  Firstly, a node may be created during legalization but
+  // never passed to the legalization core.  This is usually due to the implicit
+  // folding that occurs when using the DAG.getNode operators.  Secondly, a new
+  // node may be passed to the legalization core, but when analyzed may morph
+  // into a different node, leaving the original node as a NewNode in the DAG.
+  // A node may morph if one of its operands changes during analysis.  Whether
+  // it actually morphs or not depends on whether, after updating its operands,
+  // it is equivalent to an existing node: if so, it morphs into that existing
+  // node (CSE).  An operand can change during analysis if the operand is a new
+  // node that morphs, or it is a processed value that was mapped to some other
+  // value (as recorded in ReplacedValues) in which case the operand is turned
+  // into that other value.  If a node morphs then the node it morphed into will
+  // be used instead of it for legalization, however the original node continues
+  // to live on in the DAG.
+  // The conclusion is that though there may be nodes marked NewNode in the DAG,
+  // all uses of such nodes are also marked NewNode: the result is a fungus of
+  // NewNodes growing on top of the useful nodes, and perhaps using them, but
+  // not used by them.
+
+  // If a value is mapped by ReplacedValues, then it must have no uses, except
+  // by nodes marked NewNode (see above).
+
+  // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+  // Note that ReplacedValues should be applied iteratively.
+
+  // Note that the ReplacedValues map may also map deleted nodes (by iterating
+  // over the DAG we never dereference deleted nodes).  This means that it may
+  // also map nodes marked NewNode if the deallocated memory was reallocated as
+  // another node, and that new node was not seen by the LegalizeTypes machinery
+  // (for example because it was created but not used).  In general, we cannot
+  // distinguish between new nodes and deleted nodes.
+  SmallVector<SDNode*, 16> NewNodes;
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    // Remember nodes marked NewNode - they are subject to extra checking below.
+    if (I->getNodeId() == NewNode)
+      NewNodes.push_back(I);
+
+    for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+      SDValue Res(I, i);
+      bool Failed = false;
+
+      unsigned Mapped = 0;
+      if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+        Mapped |= 1;
+        // Check that remapped values are only used by nodes marked NewNode.
+        for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+             UI != UE; ++UI)
+          if (UI.getUse().getResNo() == i)
+            assert(UI->getNodeId() == NewNode &&
+                   "Remapped value has non-trivial use!");
+
+        // Check that the final result of applying ReplacedValues is not
+        // marked NewNode.
+        SDValue NewVal = ReplacedValues[Res];
+        DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+        while (I != ReplacedValues.end()) {
+          NewVal = I->second;
+          I = ReplacedValues.find(NewVal);
+        }
+        assert(NewVal.getNode()->getNodeId() != NewNode &&
+               "ReplacedValues maps to a new node!");
+      }
+      if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+        Mapped |= 2;
+      if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+        Mapped |= 4;
+      if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+        Mapped |= 8;
+      if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+        Mapped |= 16;
+      if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+        Mapped |= 32;
+      if (SplitVectors.find(Res) != SplitVectors.end())
+        Mapped |= 64;
+      if (WidenedVectors.find(Res) != WidenedVectors.end())
+        Mapped |= 128;
+
+      if (I->getNodeId() != Processed) {
+        // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+        // marked NewNode too, since a deleted node may have been reallocated as
+        // another node that has not been seen by the LegalizeTypes machinery.
+        if ((I->getNodeId() == NewNode && Mapped > 1) ||
+            (I->getNodeId() != NewNode && Mapped != 0)) {
+          dbgs() << "Unprocessed value in a map!";
+          Failed = true;
+        }
+      } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+        if (Mapped > 1) {
+          dbgs() << "Value with legal type was transformed!";
+          Failed = true;
+        }
+      } else {
+        if (Mapped == 0) {
+          dbgs() << "Processed value not in any map!";
+          Failed = true;
+        } else if (Mapped & (Mapped - 1)) {
+          dbgs() << "Value in multiple maps!";
+          Failed = true;
+        }
+      }
+
+      if (Failed) {
+        if (Mapped & 1)
+          dbgs() << " ReplacedValues";
+        if (Mapped & 2)
+          dbgs() << " PromotedIntegers";
+        if (Mapped & 4)
+          dbgs() << " SoftenedFloats";
+        if (Mapped & 8)
+          dbgs() << " ScalarizedVectors";
+        if (Mapped & 16)
+          dbgs() << " ExpandedIntegers";
+        if (Mapped & 32)
+          dbgs() << " ExpandedFloats";
+        if (Mapped & 64)
+          dbgs() << " SplitVectors";
+        if (Mapped & 128)
+          dbgs() << " WidenedVectors";
+        dbgs() << "\n";
+        llvm_unreachable(0);
+      }
+    }
+  }
+
+  // Checked that NewNodes are only used by other NewNodes.
+  for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+    SDNode *N = NewNodes[i];
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI)
+      assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+  }
+}
+
+/// run - This is the main entry point for the type legalizer.  This does a
+/// top-down traversal of the dag, legalizing types as it goes.  Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+  bool Changed = false;
+
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+  Dummy.setNodeId(Unanalyzed);
+
+  // The root of the dag may dangle to deleted nodes until the type legalizer is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDValue());
+
+  // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+  // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+  // non-leaves.
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    if (I->getNumOperands() == 0) {
+      I->setNodeId(ReadyToProcess);
+      Worklist.push_back(I);
+    } else {
+      I->setNodeId(Unanalyzed);
+    }
+  }
+
+  // Now that we have a set of nodes to process, handle them all.
+  while (!Worklist.empty()) {
+#ifndef XDEBUG
+    if (EnableExpensiveChecks)
+#endif
+      PerformExpensiveChecks();
+
+    SDNode *N = Worklist.back();
+    Worklist.pop_back();
+    assert(N->getNodeId() == ReadyToProcess &&
+           "Node should be ready if on worklist!");
+
+    if (IgnoreNodeResults(N))
+      goto ScanOperands;
+
+    // Scan the values produced by the node, checking to see if any result
+    // types are illegal.
+    for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+      EVT ResultVT = N->getValueType(i);
+      switch (getTypeAction(ResultVT)) {
+      default:
+        assert(false && "Unknown action!");
+      case TargetLowering::TypeLegal:
+        break;
+      // The following calls must take care of *all* of the node's results,
+      // not just the illegal result they were passed (this includes results
+      // with a legal type).  Results can be remapped using ReplaceValueWith,
+      // or their promoted/expanded/etc values registered in PromotedIntegers,
+      // ExpandedIntegers etc.
+      case TargetLowering::TypePromoteInteger:
+        PromoteIntegerResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case TargetLowering::TypeExpandInteger:
+        ExpandIntegerResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case TargetLowering::TypeSoftenFloat:
+        SoftenFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case TargetLowering::TypeExpandFloat:
+        ExpandFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case TargetLowering::TypeScalarizeVector:
+        ScalarizeVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case TargetLowering::TypeSplitVector:
+        SplitVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case TargetLowering::TypeWidenVector:
+        WidenVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      }
+    }
+
+ScanOperands:
+    // Scan the operand list for the node, handling any nodes with operands that
+    // are illegal.
+    {
+    unsigned NumOperands = N->getNumOperands();
+    bool NeedsReanalyzing = false;
+    unsigned i;
+    for (i = 0; i != NumOperands; ++i) {
+      if (IgnoreNodeResults(N->getOperand(i).getNode()))
+        continue;
+
+      EVT OpVT = N->getOperand(i).getValueType();
+      switch (getTypeAction(OpVT)) {
+      default:
+        assert(false && "Unknown action!");
+      case TargetLowering::TypeLegal:
+        continue;
+      // The following calls must either replace all of the node's results
+      // using ReplaceValueWith, and return "false"; or update the node's
+      // operands in place, and return "true".
+      case TargetLowering::TypePromoteInteger:
+        NeedsReanalyzing = PromoteIntegerOperand(N, i);
+        Changed = true;
+        break;
+      case TargetLowering::TypeExpandInteger:
+        NeedsReanalyzing = ExpandIntegerOperand(N, i);
+        Changed = true;
+        break;
+      case TargetLowering::TypeSoftenFloat:
+        NeedsReanalyzing = SoftenFloatOperand(N, i);
+        Changed = true;
+        break;
+      case TargetLowering::TypeExpandFloat:
+        NeedsReanalyzing = ExpandFloatOperand(N, i);
+        Changed = true;
+        break;
+      case TargetLowering::TypeScalarizeVector:
+        NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+        Changed = true;
+        break;
+      case TargetLowering::TypeSplitVector:
+        NeedsReanalyzing = SplitVectorOperand(N, i);
+        Changed = true;
+        break;
+      case TargetLowering::TypeWidenVector:
+        NeedsReanalyzing = WidenVectorOperand(N, i);
+        Changed = true;
+        break;
+      }
+      break;
+    }
+
+    // The sub-method updated N in place.  Check to see if any operands are new,
+    // and if so, mark them.  If the node needs revisiting, don't add all users
+    // to the worklist etc.
+    if (NeedsReanalyzing) {
+      assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+      N->setNodeId(NewNode);
+      // Recompute the NodeId and correct processed operands, adding the node to
+      // the worklist if ready.
+      SDNode *M = AnalyzeNewNode(N);
+      if (M == N)
+        // The node didn't morph - nothing special to do, it will be revisited.
+        continue;
+
+      // The node morphed - this is equivalent to legalizing by replacing every
+      // value of N with the corresponding value of M.  So do that now.
+      assert(N->getNumValues() == M->getNumValues() &&
+             "Node morphing changed the number of results!");
+      for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+        // Replacing the value takes care of remapping the new value.
+        ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+      assert(N->getNodeId() == NewNode && "Unexpected node state!");
+      // The node continues to live on as part of the NewNode fungus that
+      // grows on top of the useful nodes.  Nothing more needs to be done
+      // with it - move on to the next node.
+      continue;
+    }
+
+    if (i == NumOperands) {
+      DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+    }
+    }
+NodeDone:
+
+    // If we reach here, the node was processed, potentially creating new nodes.
+    // Mark it as processed and add its users to the worklist as appropriate.
+    assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+    N->setNodeId(Processed);
+
+    for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      int NodeId = User->getNodeId();
+
+      // This node has two options: it can either be a new node or its Node ID
+      // may be a count of the number of operands it has that are not ready.
+      if (NodeId > 0) {
+        User->setNodeId(NodeId-1);
+
+        // If this was the last use it was waiting on, add it to the ready list.
+        if (NodeId-1 == ReadyToProcess)
+          Worklist.push_back(User);
+        continue;
+      }
+
+      // If this is an unreachable new node, then ignore it.  If it ever becomes
+      // reachable by being used by a newly created node then it will be handled
+      // by AnalyzeNewNode.
+      if (NodeId == NewNode)
+        continue;
+
+      // Otherwise, this node is new: this is the first operand of it that
+      // became ready.  Its new NodeId is the number of operands it has minus 1
+      // (as this node is now processed).
+      assert(NodeId == Unanalyzed && "Unknown node ID!");
+      User->setNodeId(User->getNumOperands() - 1);
+
+      // If the node only has a single operand, it is now ready.
+      if (User->getNumOperands() == 1)
+        Worklist.push_back(User);
+    }
+  }
+
+#ifndef XDEBUG
+  if (EnableExpensiveChecks)
+#endif
+    PerformExpensiveChecks();
+
+  // If the root changed (e.g. it was a dead load) update the root.
+  DAG.setRoot(Dummy.getValue());
+
+  // Remove dead nodes.  This is important to do for cleanliness but also before
+  // the checking loop below.  Implicit folding by the DAG.getNode operators and
+  // node morphing can cause unreachable nodes to be around with their flags set
+  // to new.
+  DAG.RemoveDeadNodes();
+
+  // In a debug build, scan all the nodes to make sure we found them all.  This
+  // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    bool Failed = false;
+
+    // Check that all result types are legal.
+    if (!IgnoreNodeResults(I))
+      for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+        if (!isTypeLegal(I->getValueType(i))) {
+          dbgs() << "Result type " << i << " illegal!\n";
+          Failed = true;
+        }
+
+    // Check that all operand types are legal.
+    for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+      if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+          !isTypeLegal(I->getOperand(i).getValueType())) {
+        dbgs() << "Operand type " << i << " illegal!\n";
+        Failed = true;
+      }
+
+    if (I->getNodeId() != Processed) {
+       if (I->getNodeId() == NewNode)
+         dbgs() << "New node not analyzed?\n";
+       else if (I->getNodeId() == Unanalyzed)
+         dbgs() << "Unanalyzed node not noticed?\n";
+       else if (I->getNodeId() > 0)
+         dbgs() << "Operand not processed?\n";
+       else if (I->getNodeId() == ReadyToProcess)
+         dbgs() << "Not added to worklist?\n";
+       Failed = true;
+    }
+
+    if (Failed) {
+      I->dump(&DAG); dbgs() << "\n";
+      llvm_unreachable(0);
+    }
+  }
+#endif
+
+  return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes.  Correct any processed operands (this may change the node) and
+/// calculate the NodeId.  If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+  // If this was an existing node that is already done, we're done.
+  if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+    return N;
+
+  // Remove any stale map entries.
+  ExpungeNode(N);
+
+  // Okay, we know that this node is new.  Recursively walk all of its operands
+  // to see if they are new also.  The depth of this walk is bounded by the size
+  // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+  // about revisiting of nodes.
+  //
+  // As we walk the operands, keep track of the number of nodes that are
+  // processed.  If non-zero, this will become the new nodeid of this node.
+  // Operands may morph when they are analyzed.  If so, the node will be
+  // updated after all operands have been analyzed.  Since this is rare,
+  // the code tries to minimize overhead in the non-morphing case.
+
+  SmallVector<SDValue, 8> NewOps;
+  unsigned NumProcessed = 0;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue OrigOp = N->getOperand(i);
+    SDValue Op = OrigOp;
+
+    AnalyzeNewValue(Op); // Op may morph.
+
+    if (Op.getNode()->getNodeId() == Processed)
+      ++NumProcessed;
+
+    if (!NewOps.empty()) {
+      // Some previous operand changed.  Add this one to the list.
+      NewOps.push_back(Op);
+    } else if (Op != OrigOp) {
+      // This is the first operand to change - add all operands so far.
+      NewOps.append(N->op_begin(), N->op_begin() + i);
+      NewOps.push_back(Op);
+    }
+  }
+
+  // Some operands changed - update the node.
+  if (!NewOps.empty()) {
+    SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
+    if (M != N) {
+      // The node morphed into a different node.  Normally for this to happen
+      // the original node would have to be marked NewNode.  However this can
+      // in theory momentarily not be the case while ReplaceValueWith is doing
+      // its stuff.  Mark the original node NewNode to help sanity checking.
+      N->setNodeId(NewNode);
+      if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+        // It morphed into a previously analyzed node - nothing more to do.
+        return M;
+
+      // It morphed into a different new node.  Do the equivalent of passing
+      // it to AnalyzeNewNode: expunge it and calculate the NodeId.  No need
+      // to remap the operands, since they are the same as the operands we
+      // remapped above.
+      N = M;
+      ExpungeNode(N);
+    }
+  }
+
+  // Calculate the NodeId.
+  N->setNodeId(N->getNumOperands() - NumProcessed);
+  if (N->getNodeId() == ReadyToProcess)
+    Worklist.push_back(N);
+
+  return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+  Val.setNode(AnalyzeNewNode(Val.getNode()));
+  if (Val.getNode()->getNodeId() == Processed)
+    // We were passed a processed node, or it morphed into one - remap it.
+    RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings.  In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+  if (N->getNodeId() != NewNode)
+    return;
+
+  // If N is not remapped by ReplacedValues then there is nothing to do.
+  unsigned i, e;
+  for (i = 0, e = N->getNumValues(); i != e; ++i)
+    if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+      break;
+
+  if (i == e)
+    return;
+
+  // Remove N from all maps - this is expensive but rare.
+
+  for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+       E = PromotedIntegers.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+       E = SoftenedFloats.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+       E = ScalarizedVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+       E = WidenedVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+       E = ReplacedValues.end(); I != E; ++I)
+    RemapValue(I->second);
+
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+    ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+  DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+  if (I != ReplacedValues.end()) {
+    // Use path compression to speed up future lookups if values get multiply
+    // replaced with other values.
+    RemapValue(I->second);
+    N = I->second;
+    assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+  }
+}
+
+namespace {
+  /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+  /// updates to nodes and recomputes their ready state.
+  class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+    DAGTypeLegalizer &DTL;
+    SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+  public:
+    explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+                                SmallSetVector<SDNode*, 16> &nta)
+      : DTL(dtl), NodesToAnalyze(nta) {}
+
+    virtual void NodeDeleted(SDNode *N, SDNode *E) {
+      assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+             N->getNodeId() != DAGTypeLegalizer::Processed &&
+             "Invalid node ID for RAUW deletion!");
+      // It is possible, though rare, for the deleted node N to occur as a
+      // target in a map, so note the replacement N -> E in ReplacedValues.
+      assert(E && "Node not replaced?");
+      DTL.NoteDeletion(N, E);
+
+      // In theory the deleted node could also have been scheduled for analysis.
+      // So remove it from the set of nodes which will be analyzed.
+      NodesToAnalyze.remove(N);
+
+      // In general nothing needs to be done for E, since it didn't change but
+      // only gained new uses.  However N -> E was just added to ReplacedValues,
+      // and the result of a ReplacedValues mapping is not allowed to be marked
+      // NewNode.  So if E is marked NewNode, then it needs to be analyzed.
+      if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+        NodesToAnalyze.insert(E);
+    }
+
+    virtual void NodeUpdated(SDNode *N) {
+      // Node updates can mean pretty much anything.  It is possible that an
+      // operand was set to something already processed (f.e.) in which case
+      // this node could become ready.  Recompute its flags.
+      assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+             N->getNodeId() != DAGTypeLegalizer::Processed &&
+             "Invalid node ID for RAUW deletion!");
+      N->setNodeId(DAGTypeLegalizer::NewNode);
+      NodesToAnalyze.insert(N);
+    }
+  };
+}
+
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value.  Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+  assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+  // If expansion produced new nodes, make sure they are properly marked.
+  ExpungeNode(From.getNode());
+  AnalyzeNewValue(To); // Expunges To.
+
+  // Anything that used the old node should now use the new one.  Note that this
+  // can potentially cause recursive merging.
+  SmallSetVector<SDNode*, 16> NodesToAnalyze;
+  NodeUpdateListener NUL(*this, NodesToAnalyze);
+  do {
+    DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+    // The old node may still be present in a map like ExpandedIntegers or
+    // PromotedIntegers.  Inform maps about the replacement.
+    ReplacedValues[From] = To;
+
+    // Process the list of nodes that need to be reanalyzed.
+    while (!NodesToAnalyze.empty()) {
+      SDNode *N = NodesToAnalyze.back();
+      NodesToAnalyze.pop_back();
+      if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+        // The node was analyzed while reanalyzing an earlier node - it is safe
+        // to skip.  Note that this is not a morphing node - otherwise it would
+        // still be marked NewNode.
+        continue;
+
+      // Analyze the node's operands and recalculate the node ID.
+      SDNode *M = AnalyzeNewNode(N);
+      if (M != N) {
+        // The node morphed into a different node.  Make everyone use the new
+        // node instead.
+        assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+        assert(N->getNumValues() == M->getNumValues() &&
+               "Node morphing changed the number of results!");
+        for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+          SDValue OldVal(N, i);
+          SDValue NewVal(M, i);
+          if (M->getNodeId() == Processed)
+            RemapValue(NewVal);
+          DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+          // OldVal may be a target of the ReplacedValues map which was marked
+          // NewNode to force reanalysis because it was updated.  Ensure that
+          // anything that ReplacedValues mapped to OldVal will now be mapped
+          // all the way to NewVal.
+          ReplacedValues[OldVal] = NewVal;
+        }
+        // The original node continues to exist in the DAG, marked NewNode.
+      }
+    }
+    // When recursively update nodes with new nodes, it is possible to have
+    // new uses of From due to CSE. If this happens, replace the new uses of
+    // From with To.
+  } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for promoted integer");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = PromotedIntegers[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for softened float");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = SoftenedFloats[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+         "Invalid type for scalarized vector");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = ScalarizedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+                                          SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't expanded");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+                                          SDValue Hi) {
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded integer");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+  assert(Entry.first.getNode() == 0 && "Node already expanded");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+                                        SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't expanded");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+                                        SDValue Hi) {
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded float");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+  assert(Entry.first.getNode() == 0 && "Node already expanded");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+                                      SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't split");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+                                      SDValue Hi) {
+  assert(Lo.getValueType().getVectorElementType() ==
+         Op.getValueType().getVectorElementType() &&
+         2*Lo.getValueType().getVectorNumElements() ==
+         Op.getValueType().getVectorNumElements() &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for split vector");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+  assert(Entry.first.getNode() == 0 && "Node already split");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for widened vector");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = WidenedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node already widened!");
+  OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+  unsigned BitWidth = Op.getValueType().getSizeInBits();
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+                     EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Only applies to vectors!");
+  unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+  EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+  unsigned NumElts = Op.getValueType().getVectorNumElements();
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+                     EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+                                               EVT DestVT) {
+  DebugLoc dl = Op.getDebugLoc();
+  // Create the stack frame object.  Make sure it is aligned for both
+  // the source and destination types.
+  SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+  // Emit a store to the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+  // Result is a load from the stack slot.
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  if (LegalizeResult)
+    TLI.ReplaceNodeResults(N, Results, DAG);
+  else
+    TLI.LowerOperationWrapper(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom lower it after all.
+    return false;
+
+  // Make everything that once used N's values now use those in Results instead.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    ReplaceValueWith(SDValue(N, i), Results[i]);
+  return true;
+}
+
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  TLI.ReplaceNodeResults(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom widen lower its result  after all.
+    return false;
+
+  // Update the widening map.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    SetWidenedVector(SDValue(N, i), Results[i]);
+  return true;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+  // Currently all types are split in half.
+  if (!InVT.isVector()) {
+    LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  } else {
+    unsigned NumElements = InVT.getVectorNumElements();
+    assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), NumElements/2);
+  }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+                                       SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = Pair.getDebugLoc();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+                   DAG.getIntPtrConstant(0));
+  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+                   DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+                                                  SDValue Index) {
+  DebugLoc dl = Index.getDebugLoc();
+  // Make sure the index type is big enough to compute in.
+  if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+    Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+  else
+    Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+  // Calculate the element offset and add it to the pointer.
+  unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+  Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+                      DAG.getConstant(EltSize, Index.getValueType()));
+  return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+  // Arbitrarily use dlHi for result DebugLoc
+  DebugLoc dlHi = Hi.getDebugLoc();
+  DebugLoc dlLo = Lo.getDebugLoc();
+  EVT LVT = Lo.getValueType();
+  EVT HVT = Hi.getValueType();
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                              LVT.getSizeInBits() + HVT.getSizeInBits());
+
+  Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+  Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+  Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+                   DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+  return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+                                     bool isSigned) {
+  unsigned NumOps = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+  if (NumOps == 0) {
+    return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+  } else if (NumOps == 1) {
+    SDValue Op = N->getOperand(0);
+    return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+  } else if (NumOps == 2) {
+    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+    return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+  }
+  SmallVector<SDValue, 8> Ops(NumOps);
+  for (unsigned i = 0; i < NumOps; ++i)
+    Ops[i] = N->getOperand(i);
+
+  return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
+}
+
+/// MakeLibCall - Generate a libcall taking the given operands as arguments and
+/// returning a result of type RetVT.
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                      const SDValue *Ops, unsigned NumOps,
+                                      bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+    TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                    false, 0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
+  return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+                                         SDNode *Node,
+                                         bool isSigned) {
+  SDValue InChain = Node->getOperand(0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  return CallInfo;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type.  A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+  DebugLoc dl = Bool.getDebugLoc();
+  ISD::NodeType ExtendCode;
+  switch (TLI.getBooleanContents()) {
+  default:
+    assert(false && "Unknown BooleanContent!");
+  case TargetLowering::UndefinedBooleanContent:
+    // Extend to VT by adding rubbish bits.
+    ExtendCode = ISD::ANY_EXTEND;
+    break;
+  case TargetLowering::ZeroOrOneBooleanContent:
+    // Extend to VT by adding zero bits.
+    ExtendCode = ISD::ZERO_EXTEND;
+    break;
+  case TargetLowering::ZeroOrNegativeOneBooleanContent: {
+    // Extend to VT by copying the sign bit.
+    ExtendCode = ISD::SIGN_EXTEND;
+    break;
+  }
+  }
+  return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+                                    EVT LoVT, EVT HiVT,
+                                    SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = Op.getDebugLoc();
+  assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+         Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+  Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+  Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+                   DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+  Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+                                    SDValue &Lo, SDValue &Hi) {
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 Op.getValueType().getSizeInBits()/2);
+  SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target.  Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+  return DAGTypeLegalizer(*this).run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 000000000000..952797dc75b8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,720 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class.  This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left.  This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+  const TargetLowering &TLI;
+  SelectionDAG &DAG;
+public:
+  // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+  // about the state of the node.  The enum has all the values.
+  enum NodeIdFlags {
+    /// ReadyToProcess - All operands have been processed, so this node is ready
+    /// to be handled.
+    ReadyToProcess = 0,
+
+    /// NewNode - This is a new node, not before seen, that was created in the
+    /// process of legalizing some other node.
+    NewNode = -1,
+
+    /// Unanalyzed - This node's ID needs to be set to the number of its
+    /// unprocessed operands.
+    Unanalyzed = -2,
+
+    /// Processed - This is a node that has already been processed.
+    Processed = -3
+
+    // 1+ - This is a node which has this many unprocessed operands.
+  };
+private:
+
+  /// ValueTypeActions - This is a bitvector that contains two bits for each
+  /// simple value type, where the two bits correspond to the LegalizeAction
+  /// enum from TargetLowering.  This can be queried with "getTypeAction(VT)".
+  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+  /// getTypeAction - Return how we should legalize values of this type.
+  TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+    return TLI.getTypeAction(*DAG.getContext(), VT);
+  }
+
+  /// isTypeLegal - Return true if this type is legal on this target.
+  bool isTypeLegal(EVT VT) const {
+    return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
+  }
+
+  /// IgnoreNodeResults - Pretend all of this node's results are legal.
+  bool IgnoreNodeResults(SDNode *N) const {
+    return N->getOpcode() == ISD::TargetConstant;
+  }
+
+  /// PromotedIntegers - For integer nodes that are below legal width, this map
+  /// indicates what promoted value to use.
+  DenseMap<SDValue, SDValue> PromotedIntegers;
+
+  /// ExpandedIntegers - For integer nodes that need to be expanded this map
+  /// indicates which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+
+  /// SoftenedFloats - For floating point nodes converted to integers of
+  /// the same size, this map indicates the converted value to use.
+  DenseMap<SDValue, SDValue> SoftenedFloats;
+
+  /// ExpandedFloats - For float nodes that need to be expanded this map
+  /// indicates which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+
+  /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+  /// scalar value of type 'ty' to use.
+  DenseMap<SDValue, SDValue> ScalarizedVectors;
+
+  /// SplitVectors - For nodes that need to be split this map indicates
+  /// which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+
+  /// WidenedVectors - For vector nodes that need to be widened, indicates
+  /// the widened value to use.
+  DenseMap<SDValue, SDValue> WidenedVectors;
+
+  /// ReplacedValues - For values that have been replaced with another,
+  /// indicates the replacement value to use.
+  DenseMap<SDValue, SDValue> ReplacedValues;
+
+  /// Worklist - This defines a worklist of nodes to process.  In order to be
+  /// pushed onto this worklist, all operands of a node must have already been
+  /// processed.
+  SmallVector<SDNode*, 128> Worklist;
+
+public:
+  explicit DAGTypeLegalizer(SelectionDAG &dag)
+    : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+    ValueTypeActions(TLI.getValueTypeActions()) {
+    assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+           "Too many value types for ValueTypeActions to hold!");
+  }
+
+  /// run - This is the main entry point for the type legalizer.  This does a
+  /// top-down traversal of the dag, legalizing types as it goes.  Returns
+  /// "true" if it made any changes.
+  bool run();
+
+  void NoteDeletion(SDNode *Old, SDNode *New) {
+    ExpungeNode(Old);
+    ExpungeNode(New);
+    for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+      ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+  }
+
+private:
+  SDNode *AnalyzeNewNode(SDNode *N);
+  void AnalyzeNewValue(SDValue &Val);
+  void ExpungeNode(SDNode *N);
+  void PerformExpensiveChecks();
+  void RemapValue(SDValue &N);
+
+  // Common routines.
+  SDValue BitConvertToInteger(SDValue Op);
+  SDValue BitConvertVectorToIntegerVector(SDValue Op);
+  SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+  bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+  bool CustomWidenLowerNode(SDNode *N, EVT VT);
+  SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+  SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+  SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+  SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+                      const SDValue *Ops, unsigned NumOps, bool isSigned,
+                      DebugLoc dl);
+	std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+									                               SDNode *Node, bool isSigned);
+	std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+  SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+  void ReplaceValueWith(SDValue From, SDValue To);
+  void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+                    SDValue &Lo, SDValue &Hi);
+
+  //===--------------------------------------------------------------------===//
+  // Integer Promotion Support: LegalizeIntegerTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+  /// larger integer type, this returns the promoted value.  The low bits of the
+  /// promoted value corresponding to the original type are exactly equal to Op.
+  /// The extra bits contain rubbish, so the promoted value may need to be zero-
+  /// or sign-extended from the original type before it is usable (the helpers
+  /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+  /// For example, if Op is an i16 and was promoted to an i32, then this method
+  /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+  /// 16 bits of which contain rubbish.
+  SDValue GetPromotedInteger(SDValue Op) {
+    SDValue &PromotedOp = PromotedIntegers[Op];
+    RemapValue(PromotedOp);
+    assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+    return PromotedOp;
+  }
+  void SetPromotedInteger(SDValue Op, SDValue Result);
+
+  /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+  /// final size.
+  SDValue SExtPromotedInteger(SDValue Op) {
+    EVT OldVT = Op.getValueType();
+    DebugLoc dl = Op.getDebugLoc();
+    Op = GetPromotedInteger(Op);
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+                       DAG.getValueType(OldVT));
+  }
+
+  /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+  /// final size.
+  SDValue ZExtPromotedInteger(SDValue Op) {
+    EVT OldVT = Op.getValueType();
+    DebugLoc dl = Op.getDebugLoc();
+    Op = GetPromotedInteger(Op);
+    return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+  }
+
+  // Integer Result Promotion.
+  void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_AssertSext(SDNode *N);
+  SDValue PromoteIntRes_AssertZext(SDNode *N);
+  SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+  SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+  SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+  SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntRes_BITCAST(SDNode *N);
+  SDValue PromoteIntRes_BSWAP(SDNode *N);
+  SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+  SDValue PromoteIntRes_Constant(SDNode *N);
+  SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+  SDValue PromoteIntRes_CTLZ(SDNode *N);
+  SDValue PromoteIntRes_CTPOP(SDNode *N);
+  SDValue PromoteIntRes_CTTZ(SDNode *N);
+  SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+  SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+  SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+  SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+  SDValue PromoteIntRes_Overflow(SDNode *N);
+  SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_SDIV(SDNode *N);
+  SDValue PromoteIntRes_SELECT(SDNode *N);
+  SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+  SDValue PromoteIntRes_SETCC(SDNode *N);
+  SDValue PromoteIntRes_SHL(SDNode *N);
+  SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+  SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue PromoteIntRes_SRA(SDNode *N);
+  SDValue PromoteIntRes_SRL(SDNode *N);
+  SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+  SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_UDIV(SDNode *N);
+  SDValue PromoteIntRes_UNDEF(SDNode *N);
+  SDValue PromoteIntRes_VAARG(SDNode *N);
+  SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+  // Integer Operand Promotion.
+  bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+  SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+  SDValue PromoteIntOp_BITCAST(SDNode *N);
+  SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+  SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+  SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+  SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
+  SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
+  SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+  SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_Shift(SDNode *N);
+  SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+  SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+  SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+  void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+  //===--------------------------------------------------------------------===//
+  // Integer Expansion Support: LegalizeIntegerTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetExpandedInteger - Given a processed operand Op which was expanded into
+  /// two integers of half the size, this returns the two halves.  The low bits
+  /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+  /// For example, if Op is an i64 which was expanded into two i32's, then this
+  /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+  /// Op, and Hi being equal to the upper 32 bits.
+  void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Integer Result Expansion.
+  void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+  void ExpandIntRes_ANY_EXTEND        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_AssertSext        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_AssertZext        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_Constant          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTLZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTPOP             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTTZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LOAD          (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SIGN_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_TRUNCATE          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ZERO_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBC           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBE           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_BSWAP             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_MUL               (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SREM              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UREM              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_Shift             (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+                             SDValue &Lo, SDValue &Hi);
+  bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+  bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Integer Operand Expansion.
+  bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+  SDValue ExpandIntOp_BITCAST(SDNode *N);
+  SDValue ExpandIntOp_BR_CC(SDNode *N);
+  SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+  SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+  SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+  SDValue ExpandIntOp_SETCC(SDNode *N);
+  SDValue ExpandIntOp_Shift(SDNode *N);
+  SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+  SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+  SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+  SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+
+  void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                  ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+  /// integer of the same size, this returns the integer.  The integer contains
+  /// exactly the same bits as Op - only the type changed.  For example, if Op
+  /// is an f32 which was softened to an i32, then this method returns an i32,
+  /// the bits of which coincide with those of Op.
+  SDValue GetSoftenedFloat(SDValue Op) {
+    SDValue &SoftenedOp = SoftenedFloats[Op];
+    RemapValue(SoftenedOp);
+    assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+    return SoftenedOp;
+  }
+  void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+  // Result Float to Integer Conversion.
+  void SoftenFloatResult(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatRes_BITCAST(SDNode *N);
+  SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+  SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+  SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue SoftenFloatRes_FABS(SDNode *N);
+  SDValue SoftenFloatRes_FADD(SDNode *N);
+  SDValue SoftenFloatRes_FCEIL(SDNode *N);
+  SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+  SDValue SoftenFloatRes_FCOS(SDNode *N);
+  SDValue SoftenFloatRes_FDIV(SDNode *N);
+  SDValue SoftenFloatRes_FEXP(SDNode *N);
+  SDValue SoftenFloatRes_FEXP2(SDNode *N);
+  SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+  SDValue SoftenFloatRes_FLOG(SDNode *N);
+  SDValue SoftenFloatRes_FLOG2(SDNode *N);
+  SDValue SoftenFloatRes_FLOG10(SDNode *N);
+  SDValue SoftenFloatRes_FMA(SDNode *N);
+  SDValue SoftenFloatRes_FMUL(SDNode *N);
+  SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+  SDValue SoftenFloatRes_FNEG(SDNode *N);
+  SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+  SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+  SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+  SDValue SoftenFloatRes_FPOW(SDNode *N);
+  SDValue SoftenFloatRes_FPOWI(SDNode *N);
+  SDValue SoftenFloatRes_FREM(SDNode *N);
+  SDValue SoftenFloatRes_FRINT(SDNode *N);
+  SDValue SoftenFloatRes_FSIN(SDNode *N);
+  SDValue SoftenFloatRes_FSQRT(SDNode *N);
+  SDValue SoftenFloatRes_FSUB(SDNode *N);
+  SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+  SDValue SoftenFloatRes_LOAD(SDNode *N);
+  SDValue SoftenFloatRes_SELECT(SDNode *N);
+  SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+  SDValue SoftenFloatRes_UNDEF(SDNode *N);
+  SDValue SoftenFloatRes_VAARG(SDNode *N);
+  SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+  // Operand Float to Integer Conversion.
+  bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatOp_BITCAST(SDNode *N);
+  SDValue SoftenFloatOp_BR_CC(SDNode *N);
+  SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+  SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+  SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
+  SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+  SDValue SoftenFloatOp_SETCC(SDNode *N);
+  SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+  void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                           ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Float Expansion Support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetExpandedFloat - Given a processed operand Op which was expanded into
+  /// two floating point values of half the size, this returns the two halves.
+  /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+  /// exactly equal Hi.  For example, if Op is a ppcf128 which was expanded
+  /// into two f64's, then this method returns the two f64's, with Lo being
+  /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+  void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Float Result Expansion.
+  void ExpandFloatResult(SDNode *N, unsigned ResNo);
+  void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FABS      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCOS      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FDIV      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FEXP      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FEXP2     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FFLOOR    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG2     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG10    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMA       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMUL      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FNEG      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FPOW      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FPOWI     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FRINT     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSUB      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FTRUNC    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_LOAD      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Float Operand Expansion.
+  bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+  SDValue ExpandFloatOp_BR_CC(SDNode *N);
+  SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+  SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+  SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+  SDValue ExpandFloatOp_SETCC(SDNode *N);
+  SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+  void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Scalarization Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetScalarizedVector - Given a processed one-element vector Op which was
+  /// scalarized to its element type, this returns the element.  For example,
+  /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+  SDValue GetScalarizedVector(SDValue Op) {
+    SDValue &ScalarizedOp = ScalarizedVectors[Op];
+    RemapValue(ScalarizedOp);
+    assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+    return ScalarizedOp;
+  }
+  void SetScalarizedVector(SDValue Op, SDValue Result);
+
+  // Vector Result Scalarization: <1 x ty> -> ty.
+  void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecRes_BinOp(SDNode *N);
+  SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+  SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+  SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+  SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+  SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+  SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+  SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+  SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue ScalarizeVecRes_SELECT(SDNode *N);
+  SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+  SDValue ScalarizeVecRes_SETCC(SDNode *N);
+  SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+  SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+  SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+  // Vector Operand Scalarization: <1 x ty> -> ty.
+  bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+  SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Splitting Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetSplitVector - Given a processed vector Op which was split into vectors
+  /// of half the size, this method returns the halves.  The first elements of
+  /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+  /// with the elements of Hi: Op is what you would get by concatenating Lo and
+  /// Hi.  For example, if Op is a v8i32 that was split into two v4i32's, then
+  /// this method returns the two v4i32's, with Lo corresponding to the first 4
+  /// elements of Op, and Hi to the last 4 elements.
+  void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+  void SplitVectorResult(SDNode *N, unsigned OpNo);
+  void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+                                  SDValue &Hi);
+
+  // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+  bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+  SDValue SplitVecOp_BITCAST(SDNode *N);
+  SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue SplitVecOp_FP_ROUND(SDNode *N);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetWidenedVector - Given a processed vector Op which was widened into a
+  /// larger vector, this method returns the larger vector.  The elements of
+  /// the returned vector consist of the elements of Op followed by elements
+  /// containing rubbish.  For example, if Op is a v2i32 that was widened to a
+  /// v4i32, then this method returns a v4i32 for which the first two elements
+  /// are the same as those of Op, while the last two elements contain rubbish.
+  SDValue GetWidenedVector(SDValue Op) {
+    SDValue &WidenedOp = WidenedVectors[Op];
+    RemapValue(WidenedOp);
+    assert(WidenedOp.getNode() && "Operand wasn't widened?");
+    return WidenedOp;
+  }
+  void SetWidenedVector(SDValue Op, SDValue Result);
+
+  // Widen Vector Result Promotion.
+  void WidenVectorResult(SDNode *N, unsigned ResNo);
+  SDValue WidenVecRes_BITCAST(SDNode* N);
+  SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+  SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+  SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+  SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+  SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_LOAD(SDNode* N);
+  SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+  SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
+  SDValue WidenVecRes_SELECT(SDNode* N);
+  SDValue WidenVecRes_SELECT_CC(SDNode* N);
+  SDValue WidenVecRes_SETCC(SDNode* N);
+  SDValue WidenVecRes_UNDEF(SDNode *N);
+  SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+  SDValue WidenVecRes_VSETCC(SDNode* N);
+
+  SDValue WidenVecRes_Binary(SDNode *N);
+  SDValue WidenVecRes_Convert(SDNode *N);
+  SDValue WidenVecRes_POWI(SDNode *N);
+  SDValue WidenVecRes_Shift(SDNode *N);
+  SDValue WidenVecRes_Unary(SDNode *N);
+  SDValue WidenVecRes_InregOp(SDNode *N);
+
+  // Widen Vector Operand.
+  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  SDValue WidenVecOp_BITCAST(SDNode *N);
+  SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue WidenVecOp_STORE(SDNode* N);
+
+  SDValue WidenVecOp_Convert(SDNode *N);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// Helper GenWidenVectorLoads - Helper function to generate a set of
+  /// loads to load a vector with a resulting wider type. It takes
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+                              LoadSDNode *LD);
+
+  /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+  /// loads to load a ector with a resulting wider type.  It takes
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  ///   ExtType: extension element type
+  SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                 LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+  /// Helper genWidenVectorStores - Helper function to generate a set of
+  /// stores to store a widen vector into non widen memory
+  ///   StChain: list of chains for the stores we have generated
+  ///   ST:      store of a widen value
+  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+  /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+  /// stores to store a truncate widen vector into non widen memory
+  ///   StChain: list of chains for the stores we have generated
+  ///   ST:      store of a widen value
+  void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                 StoreSDNode *ST);
+
+  /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+  /// input vector must have the same element type as NVT.
+  SDValue ModifyToType(SDValue InOp, EVT WidenVT);
+
+
+  //===--------------------------------------------------------------------===//
+  // Generic Splitting: LegalizeTypesGeneric.cpp
+  //===--------------------------------------------------------------------===//
+
+  // Legalization methods which only use that the illegal type is split into two
+  // not necessarily identical types.  As such they can be used for splitting
+  // vectors and expanding integers and floats.
+
+  void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+    if (Op.getValueType().isVector())
+      GetSplitVector(Op, Lo, Hi);
+    else if (Op.getValueType().isInteger())
+      GetExpandedInteger(Op, Lo, Hi);
+    else
+      GetExpandedFloat(Op, Lo, Hi);
+  }
+
+  /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+  /// which is split (or expanded) into two not necessarily identical pieces.
+  void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
+
+  /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+  /// high parts of the given value.
+  void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+  // Generic Result Splitting.
+  void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_SELECT      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_SELECT_CC   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_UNDEF       (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  //===--------------------------------------------------------------------===//
+  // Generic Expansion: LegalizeTypesGeneric.cpp
+  //===--------------------------------------------------------------------===//
+
+  // Legalization methods which only use that the illegal type is split into two
+  // identical types of half the size, and that the Lo/Hi part is stored first
+  // in memory on little/big-endian machines, followed by the Hi/Lo part.  As
+  // such they can be used for expanding integers and floats.
+
+  void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+    if (Op.getValueType().isInteger())
+      GetExpandedInteger(Op, Lo, Hi);
+    else
+      GetExpandedFloat(Op, Lo, Hi);
+  }
+
+  // Generic Result Expansion.
+  void ExpandRes_BITCAST           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_BUILD_PAIR        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_EXTRACT_ELEMENT   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_NormalLoad        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_VAARG             (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Generic Operand Expansion.
+  SDValue ExpandOp_BITCAST          (SDNode *N);
+  SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);
+  SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);
+  SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+  SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+  SDValue ExpandOp_NormalStore      (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 000000000000..85ea6b662044
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,480 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.  The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part.  This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Handle some special cases efficiently.
+  switch (getTypeAction(InVT)) {
+    default:
+      assert(false && "Unknown type action!");
+    case TargetLowering::TypeLegal:
+    case TargetLowering::TypePromoteInteger:
+      break;
+    case TargetLowering::TypeSoftenFloat:
+      // Convert the integer operand instead.
+      SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case TargetLowering::TypeExpandInteger:
+    case TargetLowering::TypeExpandFloat:
+      // Convert the expanded pieces of the input.
+      GetExpandedOp(InOp, Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case TargetLowering::TypeSplitVector:
+      GetSplitVector(InOp, Lo, Hi);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case TargetLowering::TypeScalarizeVector:
+      // Convert the element instead.
+      SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case TargetLowering::TypeWidenVector: {
+      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+      InOp = GetWidenedVector(InOp);
+      EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                   InVT.getVectorNumElements()/2);
+      Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                       DAG.getIntPtrConstant(0));
+      Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                       DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    }
+  }
+
+  if (InVT.isVector() && OutVT.isInteger()) {
+    // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+    // is legal but the result is not.
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+
+    if (isTypeLegal(NVT)) {
+      SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+                       DAG.getIntPtrConstant(0));
+      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+                       DAG.getIntPtrConstant(1));
+
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+
+      return;
+    }
+  }
+
+  // Lower the bit-convert to a store/load from the stack.
+  assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+  // Create the stack frame object.  Make sure it is aligned for both
+  // the source and expanded destination types.
+  unsigned Alignment =
+    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+                                              getTypeForEVT(*DAG.getContext()));
+  SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+  // Emit a store to the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
+                               false, false, 0);
+
+  // Load the first half from the stack slot.
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+
+  // Increment the pointer to the other half.
+  unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                         DAG.getIntPtrConstant(IncrementSize));
+
+  // Load the second half from the stack slot.
+  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+                   PtrInfo.getWithOffset(IncrementSize), false,
+                   false, MinAlign(Alignment, IncrementSize));
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  // Return the operands.
+  Lo = N->getOperand(0);
+  Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  GetExpandedOp(N->getOperand(0), Lo, Hi);
+  SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+                   Hi : Lo;
+
+  assert(Part.getValueType() == N->getValueType(0) &&
+         "Type twice as big as expanded type not itself expanded!");
+
+  GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+                                                    SDValue &Hi) {
+  SDValue OldVec = N->getOperand(0);
+  unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Convert to a vector of the expanded element type, for example
+  // <3 x i64> -> <6 x i32>.
+  EVT OldVT = N->getValueType(0);
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, 2*OldElts),
+                               OldVec);
+
+  // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+  SDValue Idx = N->getOperand(1);
+
+  // Make sure the type of Idx is big enough to hold the new values.
+  if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+  Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(1, Idx.getValueType()));
+  Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+  DebugLoc dl = N->getDebugLoc();
+
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  unsigned Alignment = LD->getAlignment();
+  bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+                   isVolatile, isNonTemporal, Alignment);
+
+  // Increment the pointer to the other half.
+  unsigned IncrementSize = NVT.getSizeInBits() / 8;
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
+                   isVolatile, isNonTemporal,
+                   MinAlign(Alignment, IncrementSize));
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                      Hi.getValue(1));
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT OVT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+  SDValue Chain = N->getOperand(0);
+  SDValue Ptr = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+  const unsigned Align = N->getConstantOperandVal(3);
+
+  Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+  Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0).isVector()) {
+    // An illegal expanding type is being converted to a legal vector type.
+    // Make a two element vector out of the expanded parts and convert that
+    // instead, but only if the new vector type is legal (otherwise there
+    // is no point, and it might create expansion loops).  For example, on
+    // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+    EVT OVT = N->getOperand(0).getValueType();
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+                               TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+                               2);
+
+    if (isTypeLegal(NVT)) {
+      SDValue Parts[2];
+      GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+      if (TLI.isBigEndian())
+        std::swap(Parts[0], Parts[1]);
+
+      SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+      return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+    }
+  }
+
+  // Otherwise, store to a temporary and load out again as the new type.
+  return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+  // The vector type is legal but the element type needs expansion.
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  EVT OldVT = N->getOperand(0).getValueType();
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  assert(OldVT == VecVT.getVectorElementType() &&
+         "BUILD_VECTOR operand type doesn't match vector element type!");
+
+  // Build a vector of twice the length out of the expanded elements.
+  // For example <3 x i64> -> <6 x i32>.
+  std::vector<SDValue> NewElts;
+  NewElts.reserve(NumElts*2);
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Lo, Hi;
+    GetExpandedOp(N->getOperand(i), Lo, Hi);
+    if (TLI.isBigEndian())
+      std::swap(Lo, Hi);
+    NewElts.push_back(Lo);
+    NewElts.push_back(Hi);
+  }
+
+  SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, NewElts.size()),
+                               &NewElts[0], NewElts.size());
+
+  // Convert the new vector to the old vector type.
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+  SDValue Lo, Hi;
+  GetExpandedOp(N->getOperand(0), Lo, Hi);
+  return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+  // The vector type is legal but the element type needs expansion.
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Val = N->getOperand(1);
+  EVT OldEVT = Val.getValueType();
+  EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+  assert(OldEVT == VecVT.getVectorElementType() &&
+         "Inserted element type doesn't match vector element type!");
+
+  // Bitconvert to a vector of twice the length with elements of the expanded
+  // type, insert the expanded vector elements, and then convert back.
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+                               NewVecVT, N->getOperand(0));
+
+  SDValue Lo, Hi;
+  GetExpandedOp(Val, Lo, Hi);
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  SDValue Idx = N->getOperand(2);
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+  NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+  Idx = DAG.getNode(ISD::ADD, dl,
+                    Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+  NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+  // Convert the new vector to the old vector type.
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+         "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumElts);
+  Ops[0] = N->getOperand(0);
+  SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+  for (unsigned i = 1; i < NumElts; ++i)
+    Ops[i] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+  assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+  DebugLoc dl = N->getDebugLoc();
+
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     St->getValue().getValueType());
+  SDValue Chain = St->getChain();
+  SDValue Ptr = St->getBasePtr();
+  unsigned Alignment = St->getAlignment();
+  bool isVolatile = St->isVolatile();
+  bool isNonTemporal = St->isNonTemporal();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+  SDValue Lo, Hi;
+  GetExpandedOp(St->getValue(), Lo, Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+                    isVolatile, isNonTemporal, Alignment);
+
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+  Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+                    St->getPointerInfo().getWithOffset(IncrementSize),
+                    isVolatile, isNonTemporal,
+                    MinAlign(Alignment, IncrementSize));
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  // A MERGE_VALUES node can produce any number of values.  We know that the
+  // first illegal one needs to be expanded into Lo/Hi.
+  unsigned i;
+
+  // The string of legal results gets turned into input operands, which have
+  // the same type.
+  for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
+    ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+
+  // The first illegal result must be the one that needs to be expanded.
+  GetSplitOp(N->getOperand(i), Lo, Hi);
+
+  // Legalize the rest of the results into the input operands whether they are
+  // legal or not.
+  unsigned e = N->getNumValues();
+  for (++i; i != e; ++i)
+    ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+                                       SDValue &Hi) {
+  SDValue LL, LH, RL, RH;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitOp(N->getOperand(1), LL, LH);
+  GetSplitOp(N->getOperand(2), RL, RH);
+
+  SDValue Cond = N->getOperand(0);
+  Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
+  Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+                                          SDValue &Hi) {
+  SDValue LL, LH, RL, RH;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitOp(N->getOperand(2), LL, LH);
+  GetSplitOp(N->getOperand(3), RL, RH);
+
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+                   N->getOperand(1), LL, RL, N->getOperand(4));
+  Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+                   N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  Lo = DAG.getUNDEF(LoVT);
+  Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 000000000000..ffff10ce2948
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,337 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types.  For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+  SelectionDAG& DAG;
+  const TargetLowering &TLI;
+  bool Changed; // Keep track of whether anything changed
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDValue, SDValue> LegalizedNodes;
+
+  // Adds a node to the translation cache
+  void AddLegalizedOperand(SDValue From, SDValue To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+  }
+
+  // Legalizes the given node
+  SDValue LegalizeOp(SDValue Op);
+  // Assuming the node is legal, "legalize" the results
+  SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+  // Implements unrolling a VSETCC.
+  SDValue UnrollVSETCC(SDValue Op);
+  // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+  // isn't legal.
+  // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+  // SINT_TO_FLOAT and SHR on vectors isn't legal.
+  SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+  SDValue ExpandFNEG(SDValue Op);
+  // Implements vector promotion; this is essentially just bitcasting the
+  // operands to a different type and bitcasting the result back to the
+  // original type.
+  SDValue PromoteVectorOp(SDValue Op);
+
+  public:
+  bool Run();
+  VectorLegalizer(SelectionDAG& dag) :
+      DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  DAG.AssignTopologicalOrder();
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+    LegalizeOp(SDValue(I, 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDValue OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  LegalizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+
+  return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+  // Generic legalization: just pass the operand through.
+  for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+    AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+  return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDNode* Node = Op.getNode();
+
+  // Legalize the operands
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+  SDValue Result =
+    SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+
+  bool HasVectorValue = false;
+  for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+       J != E;
+       ++J)
+    HasVectorValue |= J->isVector();
+  if (!HasVectorValue)
+    return TranslateLegalizeResults(Op, Result);
+
+  EVT QueryType;
+  switch (Op.getOpcode()) {
+  default:
+    return TranslateLegalizeResults(Op, Result);
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::SELECT:
+  case ISD::SELECT_CC:
+  case ISD::VSETCC:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FNEG:
+  case ISD::FABS:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+  case ISD::FPOWI:
+  case ISD::FPOW:
+  case ISD::FLOG:
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FCEIL:
+  case ISD::FTRUNC:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FFLOOR:
+  case ISD::SIGN_EXTEND_INREG:
+    QueryType = Node->getValueType(0);
+    break;
+  case ISD::FP_ROUND_INREG:
+    QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    QueryType = Node->getOperand(0).getValueType();
+    break;
+  }
+
+  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+  case TargetLowering::Promote:
+    // "Promote" the operation by bitcasting
+    Result = PromoteVectorOp(Op);
+    Changed = true;
+    break;
+  case TargetLowering::Legal: break;
+  case TargetLowering::Custom: {
+    SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+    if (Tmp1.getNode()) {
+      Result = Tmp1;
+      break;
+    }
+    // FALL THROUGH
+  }
+  case TargetLowering::Expand:
+    if (Node->getOpcode() == ISD::UINT_TO_FP)
+      Result = ExpandUINT_TO_FLOAT(Op);
+    else if (Node->getOpcode() == ISD::FNEG)
+      Result = ExpandFNEG(Op);
+    else if (Node->getOpcode() == ISD::VSETCC)
+      Result = UnrollVSETCC(Op);
+    else
+      Result = DAG.UnrollVectorOp(Op.getNode());
+    break;
+  }
+
+  // Make sure that the generated code is itself legal.
+  if (Result != Op) {
+    Result = LegalizeOp(Result);
+    Changed = true;
+  }
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+  // Vector "promotion" is basically just bitcasting and doing the operation
+  // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
+  // v1i64.
+  EVT VT = Op.getValueType();
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "Can't promote a vector with multiple results!");
+  EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  DebugLoc dl = Op.getDebugLoc();
+  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+    if (Op.getOperand(j).getValueType().isVector())
+      Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+    else
+      Operands[j] = Op.getOperand(j);
+  }
+
+  Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+  return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+
+
+  EVT VT = Op.getOperand(0).getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Make sure that the SINT_TO_FP and SRL instructions are available.
+  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
+      !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
+      return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+  assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+      "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+  unsigned BW = SVT.getSizeInBits();
+  SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+  // Constants to clear the upper part of the word.
+  // Notice that we can also use SHL+SHR, but using a constant is slightly
+  // faster on x86.
+  uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+  SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+  // Two to the power of half-word-size.
+  SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+  // Clear upper part of LO, lower HI
+  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+  // Convert hi and lo to floats
+  // Convert the hi part back to the upper values
+  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+          fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+  // Add the two halves
+  return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+    SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       Zero, Op.getOperand(0));
+  }
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+  EVT VT = Op.getValueType();
+  unsigned NumElems = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+  SmallVector<SDValue, 8> Ops(NumElems);
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+                                  DAG.getIntPtrConstant(i));
+    SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+                                  DAG.getIntPtrConstant(i));
+    Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+                         LHSElem, RHSElem, CC);
+    Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+                         DAG.getConstant(APInt::getAllOnesValue
+                                         (EltVT.getSizeInBits()), EltVT),
+                         DAG.getConstant(0, EltVT));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+  return VectorLegalizer(*this).Run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 000000000000..b5698f9c6738
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2568 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type.  For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register.  This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size.  For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to scalarize the result of this operator!");
+
+  case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;
+  case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
+  case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND:          R = ScalarizeVecRes_FP_ROUND(N); break;
+  case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
+  case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+  case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
+  case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+  case ISD::VSETCC:            R = ScalarizeVecRes_VSETCC(N); break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCEIL:
+  case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FFLOOR:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FP_EXTEND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::UINT_TO_FP:
+  case ISD::ZERO_EXTEND:
+    R = ScalarizeVecRes_UnaryOp(N);
+    break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::FADD:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::SUB:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    R = ScalarizeVecRes_BinOp(N);
+    break;
+  }
+
+  // If R is null, the sub-method took care of registering the result.
+  if (R.getNode())
+    SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                     NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+  return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+                              Op0, DAG.getValueType(NewVT),
+                              DAG.getValueType(Op0.getValueType()),
+                              N->getOperand(3),
+                              N->getOperand(4),
+                              cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     N->getValueType(0).getVectorElementType(),
+                     N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+                     NewVT, Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+  // The value to insert may have a wider type than the vector element type,
+  // so be sure to truncate it to the element type if necessary.
+  SDValue Op = N->getOperand(1);
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  if (Op.getValueType() != EltVT)
+    // FIXME: Can this happen for floating point types?
+    Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+  return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+  assert(N->isUnindexed() && "Indexed vector load?");
+
+  SDValue Result = DAG.getLoad(ISD::UNINDEXED,
+                               N->getExtensionType(),
+                               N->getValueType(0).getVectorElementType(),
+                               N->getDebugLoc(),
+                               N->getChain(), N->getBasePtr(),
+                               DAG.getUNDEF(N->getBasePtr().getValueType()),
+                               N->getPointerInfo(),
+                               N->getMemoryVT().getVectorElementType(),
+                               N->isVolatile(), N->isNonTemporal(),
+                               N->getOriginalAlignment());
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+  // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+  EVT DestVT = N->getValueType(0).getVectorElementType();
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+                     LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+  // If the operand is wider than the vector element type then it is implicitly
+  // truncated.  Make that explicit here.
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  SDValue InOp = N->getOperand(0);
+  if (InOp.getValueType() != EltVT)
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+  return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0), LHS,
+                     GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+                     N->getOperand(0), N->getOperand(1),
+                     LHS, GetScalarizedVector(N->getOperand(3)),
+                     N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+  // Figure out if the scalar is the LHS or RHS and return it.
+  SDValue Arg = N->getOperand(2).getOperand(0);
+  if (Arg.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+  unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+  return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  EVT NVT = N->getValueType(0).getVectorElementType();
+  EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
+
+  // VSETCC always returns a sign-extended value, while SETCC may not.  The
+  // SETCC result type may not match the vector element type.  Correct these.
+  if (NVT.bitsLE(SVT)) {
+    // The SETCC result type is bigger than the vector element type.
+    // Ensure the SETCC result is sign-extended.
+    if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+      Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
+                        DAG.getValueType(MVT::i1));
+    // Truncate to the final type.
+    return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
+  }
+
+  // The SETCC result type is smaller than the vector element type.
+  // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+  if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+    Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
+  // Sign extend to the final type.
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      dbgs() << "\n";
+#endif
+      llvm_unreachable("Do not know how to scalarize this operator's operand!");
+    case ISD::BITCAST:
+      Res = ScalarizeVecOp_BITCAST(N);
+      break;
+    case ISD::CONCAT_VECTORS:
+      Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+      break;
+    case ISD::EXTRACT_VECTOR_ELT:
+      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+      break;
+    case ISD::STORE:
+      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>.  Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+  SDValue Elt = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                     N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+  SmallVector<SDValue, 8> Ops(N->getNumOperands());
+  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+    Ops[i] = GetScalarizedVector(N->getOperand(i));
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+                     &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  if (Res.getValueType() != N->getValueType(0))
+    Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+                      Res);
+  return Res;
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>.  Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+  assert(N->isUnindexed() && "Indexed store of one-element vector?");
+  assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isTruncatingStore())
+    return DAG.getTruncStore(N->getChain(), dl,
+                             GetScalarizedVector(N->getOperand(1)),
+                             N->getBasePtr(), N->getPointerInfo(),
+                             N->getMemoryVT().getVectorElementType(),
+                             N->isVolatile(), N->isNonTemporal(),
+                             N->getAlignment());
+
+  return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+                      N->getBasePtr(), N->getPointerInfo(),
+                      N->isVolatile(), N->isNonTemporal(),
+                      N->getOriginalAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting.  At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Split node result: ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Lo, Hi;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SplitVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to split the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+  case ISD::BITCAST:           SplitVecRes_BITCAST(N, Lo, Hi); break;
+  case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+  case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+  case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+  case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
+  case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::LOAD:
+    SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+    break;
+  case ISD::SETCC:
+  case ISD::VSETCC:
+    SplitVecRes_SETCC(N, Lo, Hi);
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+    break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::CONVERT_RNDSAT:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCEIL:
+  case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FFLOOR:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::UINT_TO_FP:
+  case ISD::ZERO_EXTEND:
+    SplitVecRes_UnaryOp(N, Lo, Hi);
+    break;
+
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::FDIV:
+  case ISD::FPOW:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::UREM:
+  case ISD::SREM:
+  case ISD::FREM:
+    SplitVecRes_BinOp(N, Lo, Hi);
+    break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+                                         SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  SDValue RHSLo, RHSHi;
+  GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  // We know the result is a vector.  The input may be either a vector or a
+  // scalar value.
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+
+  // Handle some special cases efficiently.
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+  case TargetLowering::TypeLegal:
+  case TargetLowering::TypePromoteInteger:
+  case TargetLowering::TypeSoftenFloat:
+  case TargetLowering::TypeScalarizeVector:
+    break;
+  case TargetLowering::TypeExpandInteger:
+  case TargetLowering::TypeExpandFloat:
+    // A scalar to vector conversion, where the scalar needs expansion.
+    // If the vector is being split in two then we can just convert the
+    // expanded pieces.
+    if (LoVT == HiVT) {
+      GetExpandedOp(InOp, Lo, Hi);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+      return;
+    }
+    break;
+  case TargetLowering::TypeSplitVector:
+    // If the input is a vector that needs to be split, convert each split
+    // piece of the input now.
+    GetSplitVector(InOp, Lo, Hi);
+    Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+    Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+    return;
+  }
+
+  // In the general case, convert the input to an integer and split it by hand.
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+  if (TLI.isBigEndian())
+    std::swap(LoIntVT, HiIntVT);
+
+  SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+  Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+  Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+                                                SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  unsigned LoNumElts = LoVT.getVectorNumElements();
+  SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+  Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+  SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+  Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumSubvectors = N->getNumOperands() / 2;
+  if (NumSubvectors == 1) {
+    Lo = N->getOperand(0);
+    Hi = N->getOperand(1);
+    return;
+  }
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+  Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+  SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+  Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+                   DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+                                         SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+  Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+                   DAG.getValueType(LoVT));
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+                   DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Elt = N->getOperand(1);
+  SDValue Idx = N->getOperand(2);
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(Vec, Lo, Hi);
+
+  if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+    if (IdxVal < LoNumElts)
+      Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                       Lo.getValueType(), Lo, Elt, Idx);
+    else
+      Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+                       DAG.getIntPtrConstant(IdxVal - LoNumElts));
+    return;
+  }
+
+  // Spill the vector to the stack.
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+  // Store the new element.  This may be larger than the vector element type,
+  // so use a truncating store.
+  SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+  unsigned Alignment =
+    TLI.getTargetData()->getPrefTypeAlignment(VecType);
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
+                            false, false, 0);
+
+  // Load the Lo part from the stack slot.
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, 0);
+
+  // Increment the pointer to the other part.
+  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                         DAG.getIntPtrConstant(IncrementSize));
+
+  // Load the Hi part from the stack slot.
+  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+                                                    SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+  Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+                                        SDValue &Hi) {
+  assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+  EVT LoVT, HiVT;
+  DebugLoc dl = LD->getDebugLoc();
+  GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  SDValue Ch = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned Alignment = LD->getOriginalAlignment();
+  bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
+
+  EVT LoMemVT, HiMemVT;
+  GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+  Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+                   LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+                   Alignment);
+
+  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
+                   HiMemVT, isVolatile, isNonTemporal, Alignment);
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  SDValue LL, LH, RL, RH;
+  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                               LoVT.getVectorNumElements());
+  LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(0));
+  LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(0));
+  RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  // Get the dest types - they may not match the input types, e.g. int_to_fp.
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  switch (getTypeAction(InVT)) {
+  default: llvm_unreachable("Unexpected type action!");
+  case TargetLowering::TypeLegal: {
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  case TargetLowering::TypePromoteInteger: {
+    SDValue InOp = GetPromotedInteger(N->getOperand(0));
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
+                                 InOp.getValueType().getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  case TargetLowering::TypeSplitVector:
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+    break;
+  case TargetLowering::TypeWidenVector: {
+    // If the result needs to be split and the input needs to be widened,
+    // the two types must have different lengths. Use the widened result
+    // and extract from it to do the split.
+    SDValue InOp = GetWidenedVector(N->getOperand(0));
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  }
+
+  if (N->getOpcode() == ISD::FP_ROUND) {
+    Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+    Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+  } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+    SDValue DTyOpLo = DAG.getValueType(LoVT);
+    SDValue DTyOpHi = DAG.getValueType(HiVT);
+    SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+    SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+    SDValue RndOp = N->getOperand(3);
+    SDValue SatOp = N->getOperand(4);
+    ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+    Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+                              CvtCode);
+    Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+                              CvtCode);
+  } else {
+    Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+    Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+  }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+                                                  SDValue &Lo, SDValue &Hi) {
+  // The low and high parts of the original input give four input vectors.
+  SDValue Inputs[4];
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+  GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+  EVT NewVT = Inputs[0].getValueType();
+  unsigned NewElts = NewVT.getVectorNumElements();
+
+  // If Lo or Hi uses elements from at most two of the four input vectors, then
+  // express it as a vector shuffle of those two inputs.  Otherwise extract the
+  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+  SmallVector<int, 16> Ops;
+  for (unsigned High = 0; High < 2; ++High) {
+    SDValue &Output = High ? Hi : Lo;
+
+    // Build a shuffle mask for the output, discovering on the fly which
+    // input vectors to use as shuffle operands (recorded in InputUsed).
+    // If building a suitable shuffle vector proves too hard, then bail
+    // out with useBuildVector set.
+    unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+    unsigned FirstMaskIdx = High * NewElts;
+    bool useBuildVector = false;
+    for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+      // The mask element.  This indexes into the input.
+      int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+      // The input vector this mask element indexes into.
+      unsigned Input = (unsigned)Idx / NewElts;
+
+      if (Input >= array_lengthof(Inputs)) {
+        // The mask element does not index into any input vector.
+        Ops.push_back(-1);
+        continue;
+      }
+
+      // Turn the index into an offset from the start of the input vector.
+      Idx -= Input * NewElts;
+
+      // Find or create a shuffle vector operand to hold this input.
+      unsigned OpNo;
+      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+        if (InputUsed[OpNo] == Input) {
+          // This input vector is already an operand.
+          break;
+        } else if (InputUsed[OpNo] == -1U) {
+          // Create a new operand for this input vector.
+          InputUsed[OpNo] = Input;
+          break;
+        }
+      }
+
+      if (OpNo >= array_lengthof(InputUsed)) {
+        // More than two input vectors used!  Give up on trying to create a
+        // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
+        useBuildVector = true;
+        break;
+      }
+
+      // Add the mask index for the new shuffle vector.
+      Ops.push_back(Idx + OpNo * NewElts);
+    }
+
+    if (useBuildVector) {
+      EVT EltVT = NewVT.getVectorElementType();
+      SmallVector<SDValue, 16> SVOps;
+
+      // Extract the input elements by hand.
+      for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+        // The mask element.  This indexes into the input.
+        int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+        // The input vector this mask element indexes into.
+        unsigned Input = (unsigned)Idx / NewElts;
+
+        if (Input >= array_lengthof(Inputs)) {
+          // The mask element is "undef" or indexes off the end of the input.
+          SVOps.push_back(DAG.getUNDEF(EltVT));
+          continue;
+        }
+
+        // Turn the index into an offset from the start of the input vector.
+        Idx -= Input * NewElts;
+
+        // Extract the vector element by hand.
+        SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                    Inputs[Input], DAG.getIntPtrConstant(Idx)));
+      }
+
+      // Construct the Lo/Hi output using a BUILD_VECTOR.
+      Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+    } else if (InputUsed[0] == -1U) {
+      // No input vectors were used!  The result is undefined.
+      Output = DAG.getUNDEF(NewVT);
+    } else {
+      SDValue Op0 = Inputs[InputUsed[0]];
+      // If only one input was used, use an undefined vector for the other.
+      SDValue Op1 = InputUsed[1] == -1U ?
+        DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+      // At least one input vector was used.  Create a new shuffle vector.
+      Output =  DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+    }
+
+    Ops.clear();
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Split node operand: ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      dbgs() << "\n";
+#endif
+      llvm_unreachable("Do not know how to split this operator's operand!");
+
+    case ISD::BITCAST:           Res = SplitVecOp_BITCAST(N); break;
+    case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+    case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
+    case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
+    case ISD::STORE:
+      Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
+
+    case ISD::CTTZ:
+    case ISD::CTLZ:
+    case ISD::CTPOP:
+    case ISD::FP_EXTEND:
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT:
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:
+    case ISD::FTRUNC:
+    case ISD::TRUNCATE:
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      Res = SplitVecOp_UnaryOp(N);
+      break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+  // The result has a legal vector type, but the input needs splitting.
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  EVT InVT = Lo.getValueType();
+
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+                               InVT.getVectorNumElements());
+
+  Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+  Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+  // For example, i64 = BITCAST v4i16 on alpha.  Typically the vector will
+  // end up being split all the way down to individual components.  Convert the
+  // split pieces into integers and reassemble.
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  Lo = BitConvertToInteger(Lo);
+  Hi = BitConvertToInteger(Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+                     JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  // We know that the extracted result type is legal.
+  EVT SubVT = N->getValueType(0);
+  SDValue Idx = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+
+  uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+  if (IdxVal < LoElts) {
+    assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+           "Extracted subvector crosses vector split!");
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+  } else {
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+                       DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+  }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(1);
+  EVT VecVT = Vec.getValueType();
+
+  if (isa<ConstantSDNode>(Idx)) {
+    uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+    assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+    SDValue Lo, Hi;
+    GetSplitVector(Vec, Lo, Hi);
+
+    uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+    if (IdxVal < LoElts)
+      return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+    return SDValue(DAG.UpdateNodeOperands(N, Hi,
+                                  DAG.getConstant(IdxVal - LoElts,
+                                                  Idx.getValueType())), 0);
+  }
+
+  // Store the vector to the stack.
+  EVT EltVT = VecVT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+  // Load back the required element.
+  StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+                        MachinePointerInfo(), EltVT, false, false, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+  assert(N->isUnindexed() && "Indexed store of vector?");
+  assert(OpNo == 1 && "Can only split the stored value");
+  DebugLoc DL = N->getDebugLoc();
+
+  bool isTruncating = N->isTruncatingStore();
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  EVT MemoryVT = N->getMemoryVT();
+  unsigned Alignment = N->getOriginalAlignment();
+  bool isVol = N->isVolatile();
+  bool isNT = N->isNonTemporal();
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(1), Lo, Hi);
+
+  EVT LoMemVT, HiMemVT;
+  GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+  if (isTruncating)
+    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+                           LoMemVT, isVol, isNT, Alignment);
+  else
+    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+                      isVol, isNT, Alignment);
+
+  // Increment the pointer to the other half.
+  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+
+  if (isTruncating)
+    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
+                           HiMemVT, isVol, isNT, Alignment);
+  else
+    Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+                      N->getPointerInfo().getWithOffset(IncrementSize),
+                      isVol, isNT, Alignment);
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+  DebugLoc DL = N->getDebugLoc();
+
+  // The input operands all must have the same type, and we know the result the
+  // result type is valid.  Convert this to a buildvector which extracts all the
+  // input elements.
+  // TODO: If the input elements are power-two vectors, we could convert this to
+  // a new CONCAT_VECTORS node with elements that are half-wide.
+  SmallVector<SDValue, 32> Elts;
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+    SDValue Op = N->getOperand(op);
+    for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+         i != e; ++i) {
+      Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+                                 Op, DAG.getIntPtrConstant(i)));
+
+    }
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+                     &Elts[0], Elts.size());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+  // The result has a legal vector type, but the input needs splitting.
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  EVT InVT = Lo.getValueType();
+  
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+                               InVT.getVectorNumElements());
+  
+  Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+  Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+  
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}  
+
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+
+  // See if the target wants to custom widen this node.
+  if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+    return;
+
+  SDValue Res = SDValue();
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "WidenVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to widen the result of this operator!");
+
+  case ISD::BITCAST:           Res = WidenVecRes_BITCAST(N); break;
+  case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
+  case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
+  case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    Res = WidenVecRes_InregOp(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+  case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             Res = WidenVecRes_SETCC(N); break;
+  case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:
+    Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+    break;
+  case ISD::VSETCC:
+    Res = WidenVecRes_VSETCC(N);
+    break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::BSWAP:
+  case ISD::FADD:
+  case ISD::FCOPYSIGN:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::SUB:
+  case ISD::XOR:
+    Res = WidenVecRes_Binary(N);
+    break;
+
+  case ISD::FPOWI:
+    Res = WidenVecRes_POWI(N);
+    break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    Res = WidenVecRes_Shift(N);
+    break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::UINT_TO_FP:
+  case ISD::ZERO_EXTEND:
+    Res = WidenVecRes_Convert(N);
+    break;
+
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCEIL:
+  case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FFLOOR:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+    Res = WidenVecRes_Unary(N);
+    break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (Res.getNode())
+    SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+  // Binary op widening.
+  unsigned Opcode = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  EVT VT = WidenVT;
+  unsigned NumElts =  VT.getVectorNumElements();
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+    NumElts = NumElts / 2;
+    VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+  }
+
+  if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+    // Operation doesn't trap so just widen as normal.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  }
+
+  // No legal vector version so unroll the vector operation and then widen.
+  if (NumElts == 1)
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+  // Since the operation can trap, apply operation on the original vector.
+  EVT MaxVT = VT;
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+  SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+  unsigned ConcatEnd = 0;  // Current ConcatOps index.
+  int Idx = 0;        // Current Idx into input vectors.
+
+  // NumElts := greatest legal vector size (at most WidenVT)
+  // while (orig. vector has unhandled elements) {
+  //   take munches of size NumElts from the beginning and add to ConcatOps
+  //   NumElts := next smaller supported vector size or 1
+  // }
+  while (CurNumElts != 0) {
+    while (CurNumElts >= NumElts) {
+      SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+                                 DAG.getIntPtrConstant(Idx));
+      SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+                                 DAG.getIntPtrConstant(Idx));
+      ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+      Idx += NumElts;
+      CurNumElts -= NumElts;
+    }
+    do {
+      NumElts = NumElts / 2;
+      VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+    } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+    if (NumElts == 1) {
+      for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+                                   InOp1, DAG.getIntPtrConstant(Idx));
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+                                   InOp2, DAG.getIntPtrConstant(Idx));
+        ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+                                             EOp1, EOp2);
+      }
+      CurNumElts = 0;
+    }
+  }
+
+  // Check to see if we have a single operation with the widen type.
+  if (ConcatEnd == 1) {
+    VT = ConcatOps[0].getValueType();
+    if (VT == WidenVT)
+      return ConcatOps[0];
+  }
+
+  // while (Some element of ConcatOps is not of type MaxVT) {
+  //   From the end of ConcatOps, collect elements of the same type and put
+  //   them into an op of the next larger supported type
+  // }
+  while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+    Idx = ConcatEnd - 1;
+    VT = ConcatOps[Idx--].getValueType();
+    while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+      Idx--;
+
+    int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+    EVT NextVT;
+    do {
+      NextSize *= 2;
+      NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+    } while (!TLI.isTypeLegal(NextVT));
+
+    if (!VT.isVector()) {
+      // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+      SDValue VecOp = DAG.getUNDEF(NextVT);
+      unsigned NumToInsert = ConcatEnd - Idx - 1;
+      for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+        VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+                            ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+      }
+      ConcatOps[Idx+1] = VecOp;
+      ConcatEnd = Idx + 2;
+    } else {
+      // Vector type, create a CONCAT_VECTORS of type NextVT
+      SDValue undefVec = DAG.getUNDEF(VT);
+      unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+      SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+      unsigned RealVals = ConcatEnd - Idx - 1;
+      unsigned SubConcatEnd = 0;
+      unsigned SubConcatIdx = Idx + 1;
+      while (SubConcatEnd < RealVals)
+        SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+      while (SubConcatEnd < OpsToConcat)
+        SubConcatOps[SubConcatEnd++] = undefVec;
+      ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+                                            NextVT, &SubConcatOps[0],
+                                            OpsToConcat);
+      ConcatEnd = SubConcatIdx + 1;
+    }
+  }
+
+  // Check to see if we have a single operation with the widen type.
+  if (ConcatEnd == 1) {
+    VT = ConcatOps[0].getValueType();
+    if (VT == WidenVT)
+      return ConcatOps[0];
+  }
+
+  // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+  unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+  if (NumOps != ConcatEnd ) {
+    SDValue UndefVal = DAG.getUNDEF(MaxVT);
+    for (unsigned j = ConcatEnd; j < NumOps; ++j)
+      ConcatOps[j] = UndefVal;
+  }
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+  unsigned Opcode = N->getOpcode();
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+
+  if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+    InOp = GetWidenedVector(N->getOperand(0));
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts) {
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InOp);
+      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+    }
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      for (unsigned i = 1; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+                                  &Ops[0], NumConcat);
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVec);
+      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+                                  InOp, DAG.getIntPtrConstant(0));
+      // Extract the input and convert the shorten input vector.
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVal);
+      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = WidenVT.getVectorElementType();
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i) {
+    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+                              DAG.getIntPtrConstant(i));
+    if (N->getNumOperands() == 1)
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+    else
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+  }
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  SDValue ShOp = N->getOperand(1);
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  SDValue ShOp = N->getOperand(1);
+
+  EVT ShVT = ShOp.getValueType();
+  if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
+    ShOp = GetWidenedVector(ShOp);
+    ShVT = ShOp.getValueType();
+  }
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+                                   ShVT.getVectorElementType(),
+                                   WidenVT.getVectorNumElements());
+  if (ShVT != ShWidenVT)
+    ShOp = ModifyToType(ShOp, ShWidenVT);
+
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+  // Unary op widening.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               cast<VTSDNode>(N->getOperand(1))->getVT()
+                                 .getVectorElementType(),
+                               WidenVT.getVectorNumElements());
+  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+    break;
+  case TargetLowering::TypeLegal:
+    break;
+  case TargetLowering::TypePromoteInteger:
+    // If the InOp is promoted to the same size, convert it.  Otherwise,
+    // fall out of the switch and widen the promoted input.
+    InOp = GetPromotedInteger(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+    break;
+  case TargetLowering::TypeSoftenFloat:
+  case TargetLowering::TypeExpandInteger:
+  case TargetLowering::TypeExpandFloat:
+  case TargetLowering::TypeScalarizeVector:
+  case TargetLowering::TypeSplitVector:
+    break;
+  case TargetLowering::TypeWidenVector:
+    // If the InOp is widened to the same size, convert it.  Otherwise, fall
+    // out of the switch and widen the widened input.
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      // The input widens to the same size. Convert to the widen value.
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+    break;
+  }
+
+  unsigned WidenSize = WidenVT.getSizeInBits();
+  unsigned InSize = InVT.getSizeInBits();
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+    // Determine new input vector type.  The new input vector type will use
+    // the same element type (if its a vector) or use the input type as a
+    // vector.  It is the same size as the type to widen to.
+    EVT NewInVT;
+    unsigned NewNumElts = WidenSize / InSize;
+    if (InVT.isVector()) {
+      EVT InEltVT = InVT.getVectorElementType();
+      NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+                                 WidenSize / InEltVT.getSizeInBits());
+    } else {
+      NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+    }
+
+    if (TLI.isTypeLegal(NewInVT)) {
+      // Because the result and the input are different vector types, widening
+      // the result could create a legal type but widening the input might make
+      // it an illegal type that might lead to repeatedly splitting the input
+      // and then widening it. To avoid this, we widen the input only if
+      // it results in a legal type.
+      SmallVector<SDValue, 16> Ops(NewNumElts);
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      Ops[0] = InOp;
+      for (unsigned i = 1; i < NewNumElts; ++i)
+        Ops[i] = UndefVal;
+
+      SDValue NewVec;
+      if (InVT.isVector())
+        NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+                             NewInVT, &Ops[0], NewNumElts);
+      else
+        NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                             NewInVT, &Ops[0], NewNumElts);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+    }
+  }
+
+  return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  // Build a vector with undefined for the new nodes.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+  NewOps.reserve(WidenNumElts);
+  for (unsigned i = NumElts; i < WidenNumElts; ++i)
+    NewOps.push_back(DAG.getUNDEF(EltVT));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+  EVT InVT = N->getOperand(0).getValueType();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  unsigned NumOperands = N->getNumOperands();
+
+  bool InputWidened = false; // Indicates we need to widen the input.
+  if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+    if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+      // Add undef vectors to widen to correct length.
+      unsigned NumConcat = WidenVT.getVectorNumElements() /
+                           InVT.getVectorNumElements();
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      for (unsigned i=0; i < NumOperands; ++i)
+        Ops[i] = N->getOperand(i);
+      for (unsigned i = NumOperands; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+    }
+  } else {
+    InputWidened = true;
+    if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+      // The inputs and the result are widen to the same value.
+      unsigned i;
+      for (i=1; i < NumOperands; ++i)
+        if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+          break;
+
+      if (i > NumOperands)
+        // Everything but the first operand is an UNDEF so just return the
+        // widened first operand.
+        return GetWidenedVector(N->getOperand(0));
+
+      if (NumOperands == 2) {
+        // Replace concat of two operands with a shuffle.
+        SmallVector<int, 16> MaskOps(WidenNumElts);
+        for (unsigned i=0; i < WidenNumElts/2; ++i) {
+          MaskOps[i] = i;
+          MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+        }
+        return DAG.getVectorShuffle(WidenVT, dl,
+                                    GetWidenedVector(N->getOperand(0)),
+                                    GetWidenedVector(N->getOperand(1)),
+                                    &MaskOps[0]);
+      }
+    }
+  }
+
+  // Fall back to use extracts and build vector.
+  EVT EltVT = WidenVT.getVectorElementType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Idx = 0;
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (InputWidened)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+        Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                                 DAG.getIntPtrConstant(j));
+  }
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue InOp  = N->getOperand(0);
+  SDValue RndOp = N->getOperand(3);
+  SDValue SatOp = N->getOperand(4);
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+  SDValue DTyOp = DAG.getValueType(WidenVT);
+  SDValue STyOp = DAG.getValueType(InWidenVT);
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+  if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts)
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      for (unsigned i = 1; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+
+      InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      // Extract the input and convert the shorten input vector.
+      InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+                         DAG.getIntPtrConstant(0));
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                SatOp, CvtCode);
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = WidenVT.getVectorElementType();
+  DTyOp = DAG.getValueType(EltVT);
+  STyOp = DAG.getValueType(InEltVT);
+
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i) {
+    SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+                                 DAG.getIntPtrConstant(i));
+    Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+                                        SatOp, CvtCode);
+  }
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  EVT      VT = N->getValueType(0);
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SDValue  InOp = N->getOperand(0);
+  SDValue  Idx  = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+    InOp = GetWidenedVector(InOp);
+
+  EVT InVT = InOp.getValueType();
+
+  // Check if we can just return the input vector after widening.
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0 && InVT == WidenVT)
+    return InOp;
+
+  // Check if we can extract from the vector.
+  unsigned InNumElts = InVT.getVectorNumElements();
+  if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+  // We could try widening the input to the right length but for now, extract
+  // the original elements, fill the rest with undefs and build a vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned i;
+  for (i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                         DAG.getIntPtrConstant(IdxVal+i));
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+                     InOp.getValueType(), InOp,
+                     N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+
+  SDValue Result;
+  SmallVector<SDValue, 16> LdChain;  // Chain for the series of load
+  if (ExtType != ISD::NON_EXTLOAD)
+    Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+  else
+    Result = GenWidenVectorLoads(LdChain, LD);
+
+  // If we generate a single load, we can use that for the chain.  Otherwise,
+  // build a factor node to remember the multiple loads are independent and
+  // chain to that.
+  SDValue NewChain;
+  if (LdChain.size() == 1)
+    NewChain = LdChain[0];
+  else
+    NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+                           &LdChain[0], LdChain.size());
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), NewChain);
+
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+                     WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue Cond1 = N->getOperand(0);
+  EVT CondVT = Cond1.getValueType();
+  if (CondVT.isVector()) {
+    EVT CondEltVT = CondVT.getVectorElementType();
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(),
+                                        CondEltVT, WidenNumElts);
+    if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+      Cond1 = GetWidenedVector(Cond1);
+
+    if (Cond1.getValueType() != CondWidenVT)
+       Cond1 = ModifyToType(Cond1, CondWidenVT);
+  }
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+  assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+  SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     InOp1.getValueType(), N->getOperand(0),
+                     N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+                     InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Adjust mask based on new input vector length.
+  SmallVector<int, 16> NewMask;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int Idx = N->getMaskElt(i);
+    if (Idx < (int)NumElts)
+      NewMask.push_back(Idx);
+    else
+      NewMask.push_back(Idx - NumElts + WidenNumElts);
+  }
+  for (unsigned i = NumElts; i != WidenNumElts; ++i)
+    NewMask.push_back(-1);
+  return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = N->getOperand(0);
+  EVT InVT = InOp1.getValueType();
+  assert(InVT.isVector() && "can not widen non vector type");
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), WidenNumElts);
+  InOp1 = GetWidenedVector(InOp1);
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Assume that the input and output will be widen appropriately.  If not,
+  // we will have to unroll it at some point.
+  assert(InOp1.getValueType() == WidenInVT &&
+         InOp2.getValueType() == WidenInVT &&
+         "Input not widened to expected type!");
+  return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+                     WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to widen this operator's operand!");
+
+  case ISD::BITCAST:            Res = WidenVecOp_BITCAST(N); break;
+  case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
+  case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+  case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
+
+  case ISD::FP_EXTEND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Res = WidenVecOp_Convert(N);
+    break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+  // Since the result is legal and the input is illegal, it is unlikely
+  // that we can fix the input to a legal type so unroll the convert
+  // into some scalar code and create a nasty build vector.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue InOp = N->getOperand(0);
+  if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+    InOp = GetWidenedVector(InOp);
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+
+  unsigned Opcode = N->getOpcode();
+  SmallVector<SDValue, 16> Ops(NumElts);
+  for (unsigned i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+                                     DAG.getIntPtrConstant(i)));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  EVT InWidenVT = InOp.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Check if we can convert between two legal vector types and extract.
+  unsigned InWidenSize = InWidenVT.getSizeInBits();
+  unsigned Size = VT.getSizeInBits();
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+    unsigned NewNumElts = InWidenSize / Size;
+    EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+    if (TLI.isTypeLegal(NewVT)) {
+      SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+                         DAG.getIntPtrConstant(0));
+    }
+  }
+
+  return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+  // If the input vector is not legal, it is likely that we will not find a
+  // legal vector of the same size. Replace the concatenate vector with a
+  // nasty build vector.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumElts);
+
+  EVT InVT = N->getOperand(0).getValueType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+
+  unsigned Idx = 0;
+  unsigned NumOperands = N->getNumOperands();
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+      Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                               DAG.getIntPtrConstant(j));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(),
+                     N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+  // We have to widen the value but we want only to store the original
+  // vector type.
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+
+  SmallVector<SDValue, 16> StChain;
+  if (ST->isTruncatingStore())
+    GenWidenVectorTruncStores(StChain, ST);
+  else
+    GenWidenVectorStores(StChain, ST);
+
+  if (StChain.size() == 1)
+    return StChain[0];
+  else
+    return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
+                       MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+//  TLI:       Target lowering used to determine legal types.
+//  Width:     Width left need to load/store.
+//  WidenVT:   The widen vector type to load to/store from
+//  Align:     If 0, don't allow use of a wider type
+//  WidenEx:   If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+                       unsigned Width, EVT WidenVT,
+                       unsigned Align = 0, unsigned WidenEx = 0) {
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+  unsigned AlignInBits = Align*8;
+
+  // If we have one element to load/store, return it.
+  EVT RetVT = WidenEltVT;
+  if (Width == WidenEltWidth)
+    return RetVT;
+
+  // See if there is larger legal integer than the element type to load/store
+  unsigned VT;
+  for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+    EVT MemVT((MVT::SimpleValueType) VT);
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (MemVT.getSizeInBits() <= WidenEltWidth)
+      break;
+    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      RetVT = MemVT;
+      break;
+    }
+  }
+
+  // See if there is a larger vector type to load/store that has the same vector
+  // element type and is evenly divisible with the WidenVT.
+  for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+    EVT MemVT = (MVT::SimpleValueType) VT;
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+        (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+        return MemVT;
+    }
+  }
+
+  return RetVT;
+}
+
+// Builds a vector type from scalar loads
+//  VecTy: Resulting Vector type
+//  LDOps: Load operators to build a vector type
+//  [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+                                     SmallVector<SDValue, 16>& LdOps,
+                                     unsigned Start, unsigned End) {
+  DebugLoc dl = LdOps[Start].getDebugLoc();
+  EVT LdTy = LdOps[Start].getValueType();
+  unsigned Width = VecTy.getSizeInBits();
+  unsigned NumElts = Width / LdTy.getSizeInBits();
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+  unsigned Idx = 1;
+  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+  for (unsigned i = Start + 1; i != End; ++i) {
+    EVT NewLdTy = LdOps[i].getValueType();
+    if (NewLdTy != LdTy) {
+      NumElts = Width / NewLdTy.getSizeInBits();
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+      VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+      // Readjust position and vector position based on new load type
+      Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+      LdTy = NewLdTy;
+    }
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+                        DAG.getIntPtrConstant(Idx++));
+  }
+  return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+                                              LoadSDNode *LD) {
+  // The strategy assumes that we can efficiently load powers of two widths.
+  // The routines chops the vector into the largest vector loads with the same
+  // element type or scalar loads and then recombines it to the widen vector
+  // type.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
+
+  int LdWidth = LdVT.getSizeInBits();
+  int WidthDiff = WidenWidth - LdWidth;          // Difference
+  unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+  // Find the vector type that can load from.
+  EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+  int NewVTWidth = NewVT.getSizeInBits();
+  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+                             isVolatile, isNonTemporal, Align);
+  LdChain.push_back(LdOp.getValue(1));
+
+  // Check if we can load the element with one instruction
+  if (LdWidth <= NewVTWidth) {
+    if (!NewVT.isVector()) {
+      unsigned NumElts = WidenWidth / NewVTWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+    }
+    if (NewVT == WidenVT)
+      return LdOp;
+
+    assert(WidenWidth % NewVTWidth == 0);
+    unsigned NumConcat = WidenWidth / NewVTWidth;
+    SmallVector<SDValue, 16> ConcatOps(NumConcat);
+    SDValue UndefVal = DAG.getUNDEF(NewVT);
+    ConcatOps[0] = LdOp;
+    for (unsigned i = 1; i != NumConcat; ++i)
+      ConcatOps[i] = UndefVal;
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+                       NumConcat);
+  }
+
+  // Load vector by using multiple loads from largest vector to scalar
+  SmallVector<SDValue, 16> LdOps;
+  LdOps.push_back(LdOp);
+
+  LdWidth -= NewVTWidth;
+  unsigned Offset = 0;
+
+  while (LdWidth > 0) {
+    unsigned Increment = NewVTWidth / 8;
+    Offset += Increment;
+    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                          DAG.getIntPtrConstant(Increment));
+
+    if (LdWidth < NewVTWidth) {
+      // Our current type we are using is too large, find a better size
+      NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+      NewVTWidth = NewVT.getSizeInBits();
+    }
+
+    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                               LD->getPointerInfo().getWithOffset(Offset),
+                               isVolatile,
+                               isNonTemporal, MinAlign(Align, Increment));
+    LdChain.push_back(LdOp.getValue(1));
+    LdOps.push_back(LdOp);
+
+    LdWidth -= NewVTWidth;
+  }
+
+  // Build the vector from the loads operations
+  unsigned End = LdOps.size();
+  if (!LdOps[0].getValueType().isVector())
+    // All the loads are scalar loads.
+    return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+  // If the load contains vectors, build the vector using concat vector.
+  // All of the vectors used to loads are power of 2 and the scalars load
+  // can be combined to make a power of 2 vector.
+  SmallVector<SDValue, 16> ConcatOps(End);
+  int i = End - 1;
+  int Idx = End;
+  EVT LdTy = LdOps[i].getValueType();
+  // First combine the scalar loads to a vector
+  if (!LdTy.isVector())  {
+    for (--i; i >= 0; --i) {
+      LdTy = LdOps[i].getValueType();
+      if (LdTy.isVector())
+        break;
+    }
+    ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+  }
+  ConcatOps[--Idx] = LdOps[i];
+  for (--i; i >= 0; --i) {
+    EVT NewLdTy = LdOps[i].getValueType();
+    if (NewLdTy != LdTy) {
+      // Create a larger vector
+      ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+                                     &ConcatOps[Idx], End - Idx);
+      Idx = End - 1;
+      LdTy = NewLdTy;
+    }
+    ConcatOps[--Idx] = LdOps[i];
+  }
+
+  if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+                       &ConcatOps[Idx], End - Idx);
+
+  // We need to fill the rest with undefs to build the vector
+  unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+  SmallVector<SDValue, 16> WidenOps(NumOps);
+  SDValue UndefVal = DAG.getUNDEF(LdTy);
+  {
+    unsigned i = 0;
+    for (; i != End-Idx; ++i)
+      WidenOps[i] = ConcatOps[Idx+i];
+    for (; i != NumOps; ++i)
+      WidenOps[i] = UndefVal;
+  }
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                         LoadSDNode * LD,
+                                         ISD::LoadExtType ExtType) {
+  // For extension loads, it may not be more efficient to chop up the vector
+  // and then extended it.  Instead, we unroll the load and build a new vector.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
+
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
+
+  EVT EltVT = WidenVT.getVectorElementType();
+  EVT LdEltVT = LdVT.getVectorElementType();
+  unsigned NumElts = LdVT.getVectorNumElements();
+
+  // Load each element and widen
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Increment = LdEltVT.getSizeInBits() / 8;
+  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+                          LD->getPointerInfo(),
+                          LdEltVT, isVolatile, isNonTemporal, Align);
+  LdChain.push_back(Ops[0].getValue(1));
+  unsigned i = 0, Offset = Increment;
+  for (i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+                            LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+                            isVolatile, isNonTemporal, Align);
+    LdChain.push_back(Ops[i].getValue(1));
+  }
+
+  // Fill the rest with undefs
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i != WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // The strategy assumes that we can efficiently store powers of two widths.
+  // The routines chops the vector into the largest vector stores with the same
+  // element type or scalar stores.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+
+  EVT StVT = ST->getMemoryVT();
+  unsigned StWidth = StVT.getSizeInBits();
+  EVT ValVT = ValOp.getValueType();
+  unsigned ValWidth = ValVT.getSizeInBits();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned ValEltWidth = ValEltVT.getSizeInBits();
+  assert(StVT.getVectorElementType() == ValEltVT);
+
+  int Idx = 0;          // current index to store
+  unsigned Offset = 0;  // offset from base to store
+  while (StWidth != 0) {
+    // Find the largest vector type we can store with
+    EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+    unsigned NewVTWidth = NewVT.getSizeInBits();
+    unsigned Increment = NewVTWidth / 8;
+    if (NewVT.isVector()) {
+      unsigned NumVTElts = NewVT.getVectorNumElements();
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+                                   DAG.getIntPtrConstant(Idx));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
+                                       MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        Idx += NumVTElts;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0 && StWidth >= NewVTWidth);
+    } else {
+      // Cast the vector to the scalar type we can store
+      unsigned NumElts = ValWidth / NewVTWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+      // Readjust index position based on new vector type
+      Idx = Idx * ValEltWidth / NewVTWidth;
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+                      DAG.getIntPtrConstant(Idx++));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
+                                       MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0  && StWidth >= NewVTWidth);
+      // Restore index back to be relative to the original widen element type
+      Idx = Idx * NewVTWidth / ValEltWidth;
+    }
+  }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // For extension loads, it may not be more efficient to truncate the vector
+  // and then store it.  Instead, we extract each element and then store it.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+
+  EVT StVT = ST->getMemoryVT();
+  EVT ValVT = ValOp.getValueType();
+
+  // It must be true that we the widen vector type is bigger than where
+  // we need to store.
+  assert(StVT.isVector() && ValOp.getValueType().isVector());
+  assert(StVT.bitsLT(ValOp.getValueType()));
+
+  // For truncating stores, we can not play the tricks of chopping legal
+  // vector types and bit cast it to the right type.  Instead, we unroll
+  // the store.
+  EVT StEltVT  = StVT.getVectorElementType();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned Increment = ValEltVT.getSizeInBits() / 8;
+  unsigned NumElts = StVT.getVectorNumElements();
+  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+                                      ST->getPointerInfo(), StEltVT,
+                                      isVolatile, isNonTemporal, Align));
+  unsigned Offset = Increment;
+  for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+                                      ST->getPointerInfo().getWithOffset(Offset),
+                                        StEltVT, isVolatile, isNonTemporal,
+                                        MinAlign(Align, Offset)));
+  }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+  // Note that InOp might have been widened so it might already have
+  // the right width or it might need be narrowed.
+  EVT InVT = InOp.getValueType();
+  assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+         "input and widen element type must match");
+  DebugLoc dl = InOp.getDebugLoc();
+
+  // Check if InOp already has the right width.
+  if (InVT == NVT)
+    return InOp;
+
+  unsigned InNumElts = InVT.getVectorNumElements();
+  unsigned WidenNumElts = NVT.getVectorNumElements();
+  if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+    unsigned NumConcat = WidenNumElts / InNumElts;
+    SmallVector<SDValue, 16> Ops(NumConcat);
+    SDValue UndefVal = DAG.getUNDEF(InVT);
+    Ops[0] = InOp;
+    for (unsigned i = 1; i != NumConcat; ++i)
+      Ops[i] = UndefVal;
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+  }
+
+  if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+                       DAG.getIntPtrConstant(0));
+
+  // Fall back to extract and build.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = NVT.getVectorElementType();
+  unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+  unsigned Idx;
+  for (Idx = 0; Idx < MinNumElts; ++Idx)
+    Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                           DAG.getIntPtrConstant(Idx));
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for ( ; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 000000000000..2dcb22957325
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,114 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
+#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+  enum DbgValueKind {
+    SDNODE = 0,             // value is the result of an expression
+    CONST = 1,              // value is a constant
+    FRAMEIX = 2             // value is contents of a stack location
+  };
+private:
+  enum DbgValueKind kind;
+  union {
+    struct {
+      SDNode *Node;         // valid for expressions
+      unsigned ResNo;       // valid for expressions
+    } s;
+    const Value *Const;     // valid for constants
+    unsigned FrameIx;       // valid for stack objects
+  } u;
+  MDNode *mdPtr;
+  uint64_t Offset;
+  DebugLoc DL;
+  unsigned Order;
+  bool Invalid;
+public:
+  // Constructor for non-constants.
+  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
+             unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+                           Invalid(false) {
+    kind = SDNODE;
+    u.s.Node = N;
+    u.s.ResNo = R;
+  }
+
+  // Constructor for constants.
+  SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+             unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+    kind = CONST;
+    u.Const = C;
+  }
+
+  // Constructor for frame indices.
+  SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+    kind = FRAMEIX;
+    u.FrameIx = FI;
+  }
+
+  // Returns the kind.
+  DbgValueKind getKind() { return kind; }
+
+  // Returns the MDNode pointer.
+  MDNode *getMDPtr() { return mdPtr; }
+
+  // Returns the SDNode* for a register ref
+  SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+
+  // Returns the ResNo for a register ref
+  unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+
+  // Returns the Value* for a constant
+  const Value *getConst() { assert (kind==CONST); return u.Const; }
+
+  // Returns the FrameIx for a stack object
+  unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+
+  // Returns the offset.
+  uint64_t getOffset() { return Offset; }
+
+  // Returns the DebugLoc.
+  DebugLoc getDebugLoc() { return DL; }
+
+  // Returns the SDNodeOrder.  This is the order of the preceding node in the
+  // input.
+  unsigned getOrder() { return Order; }
+
+  // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+  // property. A SDDbgValue is invalid if the SDNode that produces the value is
+  // deleted.
+  void setIsInvalidated() { Invalid = true; }
+  bool isInvalidated() { return Invalid; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
new file mode 100644
index 000000000000..f88b26d5c422
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNodeOrdering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEORDERING_H
+#define LLVM_CODEGEN_SDNODEORDERING_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class SDNode;
+
+/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each
+/// SDNode that roughly corresponds to the ordering of the original LLVM
+/// instruction. This is used for turning off scheduling, because we'll forgo
+/// the normal scheduling algorithms and output the instructions according to
+/// this ordering.
+class SDNodeOrdering {
+  DenseMap<const SDNode*, unsigned> OrderMap;
+
+  void operator=(const SDNodeOrdering&);   // Do not implement.
+  SDNodeOrdering(const SDNodeOrdering&);   // Do not implement.
+public:
+  SDNodeOrdering() {}
+
+  void add(const SDNode *Node, unsigned O) {
+    OrderMap[Node] = O;
+  }
+  void remove(const SDNode *Node) {
+    DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
+    if (Itr != OrderMap.end())
+      OrderMap.erase(Itr);
+  }
+  void clear() {
+    OrderMap.clear();
+  }
+  unsigned getOrder(const SDNode *Node) {
+    return OrderMap[Node];
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 000000000000..b275c6321ae4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,644 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds,    "Number of nodes unfolded");
+STATISTIC(NumDups,       "Number of duplicated nodes");
+STATISTIC(NumPRCopies,   "Number of physical copies");
+
+static RegisterScheduler
+  fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+                   createFastDAGScheduler);
+
+namespace {
+  /// FastPriorityQueue - A degenerate priority queue that considers
+  /// all nodes to have the same priority.
+  ///
+  struct FastPriorityQueue {
+    SmallVector<SUnit *, 16> Queue;
+
+    bool empty() const { return Queue.empty(); }
+    
+    void push(SUnit *U) {
+      Queue.push_back(U);
+    }
+
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.back();
+      Queue.pop_back();
+      return V;
+    }
+  };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  FastPriorityQueue AvailableQueue;
+
+  /// LiveRegDefs - A set of physical registers and their definition
+  /// that are "live". These nodes must be scheduled before any other nodes that
+  /// modifies the registers can be scheduled.
+  unsigned NumLiveRegs;
+  std::vector<SUnit*> LiveRegDefs;
+  std::vector<unsigned> LiveRegCycles;
+
+public:
+  ScheduleDAGFast(MachineFunction &mf)
+    : ScheduleDAGSDNodes(mf) {}
+
+  void Schedule();
+
+  /// AddPred - adds a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  void AddPred(SUnit *SU, const SDep &D) {
+    SU->addPred(D);
+  }
+
+  /// RemovePred - removes a predecessor edge from SUnit SU.
+  /// This returns true if an edge was removed.
+  void RemovePred(SUnit *SU, const SDep &D) {
+    SU->removePred(D);
+  }
+
+private:
+  void ReleasePred(SUnit *SU, SDep *PredEdge);
+  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+  void ScheduleNodeBottomUp(SUnit*, unsigned);
+  SUnit *CopyAndMoveSuccessors(SUnit*);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+  void ListScheduleBottomUp();
+
+  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool ForceUnitLatencies() const { return true; }
+};
+}  // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+  NumLiveRegs = 0;
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
+  LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  // Execute the actual scheduling loop.
+  ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+
+  // If all the node's successors are scheduled, this node is ready
+  // to be scheduled. Ignore the special EntrySU node.
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+    PredSU->isAvailable = true;
+    AvailableQueue.push(PredSU);
+  }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    ReleasePred(SU, &*I);
+    if (I->isAssignedRegDep()) {
+      // This is a physical register dependency and it's impossible or
+      // expensive to copy the register. Make sure nothing that can 
+      // clobber the register is scheduled between the predecessor and
+      // this node.
+      if (!LiveRegDefs[I->getReg()]) {
+        ++NumLiveRegs;
+        LiveRegDefs[I->getReg()] = I->getSUnit();
+        LiveRegCycles[I->getReg()] = CurCycle;
+      }
+    }
+  }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
+  Sequence.push_back(SU);
+
+  ReleasePredecessors(SU, CurCycle);
+
+  // Release all the implicit physical register defs that are live.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        assert(LiveRegDefs[I->getReg()] == SU &&
+               "Physical register dependency violated?");
+        --NumLiveRegs;
+        LiveRegDefs[I->getReg()] = NULL;
+        LiveRegCycles[I->getReg()] = 0;
+      }
+    }
+  }
+
+  SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+  if (SU->getNode()->getGluedNode())
+    return NULL;
+
+  SDNode *N = SU->getNode();
+  if (!N)
+    return NULL;
+
+  SUnit *NewSU;
+  bool TryUnfold = false;
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Glue)
+      return NULL;
+    else if (VT == MVT::Other)
+      TryUnfold = true;
+  }
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDValue &Op = N->getOperand(i);
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
+    if (VT == MVT::Glue)
+      return NULL;
+  }
+
+  if (TryUnfold) {
+    SmallVector<SDNode*, 2> NewNodes;
+    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+      return NULL;
+
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+    N = NewNodes[1];
+    SDNode *LoadNode = NewNodes[0];
+    unsigned NumVals = N->getNumValues();
+    unsigned OldNumVals = SU->getNode()->getNumValues();
+    for (unsigned i = 0; i != NumVals; ++i)
+      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+                                   SDValue(LoadNode, 1));
+
+    SUnit *NewSU = NewSUnit(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NewSU->NodeNum);
+      
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+        NewSU->isTwoAddress = true;
+        break;
+      }
+    }
+    if (MCID.isCommutable())
+      NewSU->isCommutable = true;
+
+    // LoadNode may already exist. This can happen when there is another
+    // load from the same location and producing the same type of value
+    // but it has different alignment or volatileness.
+    bool isNewLoad = true;
+    SUnit *LoadSU;
+    if (LoadNode->getNodeId() != -1) {
+      LoadSU = &SUnits[LoadNode->getNodeId()];
+      isNewLoad = false;
+    } else {
+      LoadSU = NewSUnit(LoadNode);
+      LoadNode->setNodeId(LoadSU->NodeNum);
+    }
+
+    SDep ChainPred;
+    SmallVector<SDep, 4> ChainSuccs;
+    SmallVector<SDep, 4> LoadPreds;
+    SmallVector<SDep, 4> NodePreds;
+    SmallVector<SDep, 4> NodeSuccs;
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainPred = *I;
+      else if (I->getSUnit()->getNode() &&
+               I->getSUnit()->getNode()->isOperandOf(LoadNode))
+        LoadPreds.push_back(*I);
+      else
+        NodePreds.push_back(*I);
+    }
+    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainSuccs.push_back(*I);
+      else
+        NodeSuccs.push_back(*I);
+    }
+
+    if (ChainPred.getSUnit()) {
+      RemovePred(SU, ChainPred);
+      if (isNewLoad)
+        AddPred(LoadSU, ChainPred);
+    }
+    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+      const SDep &Pred = LoadPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad) {
+        AddPred(LoadSU, Pred);
+      }
+    }
+    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+      const SDep &Pred = NodePreds[i];
+      RemovePred(SU, Pred);
+      AddPred(NewSU, Pred);
+    }
+    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+      SDep D = NodeSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      D.setSUnit(NewSU);
+      AddPred(SuccDep, D);
+    }
+    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+      SDep D = ChainSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      if (isNewLoad) {
+        D.setSUnit(LoadSU);
+        AddPred(SuccDep, D);
+      }
+    } 
+    if (isNewLoad) {
+      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+    }
+
+    ++NumUnfolds;
+
+    if (NewSU->NumSuccsLeft == 0) {
+      NewSU->isAvailable = true;
+      return NewSU;
+    }
+    SU = NewSU;
+  }
+
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  NewSU = Clone(SU);
+
+  // New SUnit has the exact same predecessors.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (!I->isArtificial())
+      AddPred(NewSU, *I);
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(NewSU);
+      AddPred(SuccSU, D);
+      D.setSUnit(SU);
+      DelDeps.push_back(std::make_pair(SuccSU, D));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  ++NumDups;
+  return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                              const TargetRegisterClass *DestRC,
+                                              const TargetRegisterClass *SrcRC,
+                                               SmallVector<SUnit*, 2> &Copies) {
+  SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+  CopyFromSU->CopySrcRC = SrcRC;
+  CopyFromSU->CopyDstRC = DestRC;
+
+  SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+  CopyToSU->CopySrcRC = DestRC;
+  CopyToSU->CopyDstRC = SrcRC;
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(CopyToSU);
+      AddPred(SuccSU, D);
+      DelDeps.push_back(std::make_pair(SuccSU, *I));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+  }
+
+  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+  Copies.push_back(CopyFromSU);
+  Copies.push_back(CopyToSU);
+
+  ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+                                 const TargetInstrInfo *TII) {
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = MCID.getNumDefs();
+  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    if (Reg == *ImpDef)
+      break;
+    ++NumRes;
+  }
+  return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+                               std::vector<SUnit*> &LiveRegDefs,
+                               SmallSet<unsigned, 4> &RegAdded,
+                               SmallVector<unsigned, 4> &LRegs,
+                               const TargetRegisterInfo *TRI) {
+  bool Added = false;
+  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+    if (RegAdded.insert(Reg)) {
+      LRegs.push_back(Reg);
+      Added = true;
+    }
+  }
+  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+      if (RegAdded.insert(*Alias)) {
+        LRegs.push_back(*Alias);
+        Added = true;
+      }
+    }
+  return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+                                               SmallVector<unsigned, 4> &LRegs){
+  if (NumLiveRegs == 0)
+    return false;
+
+  SmallSet<unsigned, 4> RegAdded;
+  // If this node would clobber any "live" register, then it's not ready.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+                         RegAdded, LRegs, TRI);
+    }
+  }
+
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+    if (Node->getOpcode() == ISD::INLINEASM) {
+      // Inline asm can clobber physical defs.
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
+
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+        ++i; // Skip the ID value.
+        if (InlineAsm::isRegDefKind(Flags) ||
+            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+            InlineAsm::isClobberKind(Flags)) {
+          // Check for def of register or earlyclobber register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
+              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+          }
+        } else
+          i += NumVals;
+      }
+      continue;
+    }
+    if (!Node->isMachineOpcode())
+      continue;
+    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+    if (!MCID.ImplicitDefs)
+      continue;
+    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+    }
+  }
+  return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+  unsigned CurCycle = 0;
+
+  // Release any predecessors of the special Exit node.
+  ReleasePredecessors(&ExitSU, CurCycle);
+
+  // Add root to Available queue.
+  if (!SUnits.empty()) {
+    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+    RootSU->isAvailable = true;
+    AvailableQueue.push(RootSU);
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  SmallVector<SUnit*, 4> NotReady;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue.empty()) {
+    bool Delayed = false;
+    LRegsMap.clear();
+    SUnit *CurSU = AvailableQueue.pop();
+    while (CurSU) {
+      SmallVector<unsigned, 4> LRegs;
+      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+        break;
+      Delayed = true;
+      LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+      NotReady.push_back(CurSU);
+      CurSU = AvailableQueue.pop();
+    }
+
+    // All candidates are delayed due to live physical reg dependencies.
+    // Try code duplication or inserting cross class copies
+    // to resolve it.
+    if (Delayed && !CurSU) {
+      if (!CurSU) {
+        // Try duplicating the nodes that produces these
+        // "expensive to copy" values to break the dependency. In case even
+        // that doesn't work, insert cross class copies.
+        SUnit *TrySU = NotReady[0];
+        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+        assert(LRegs.size() == 1 && "Can't handle this yet!");
+        unsigned Reg = LRegs[0];
+        SUnit *LRDef = LiveRegDefs[Reg];
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getMinimalPhysRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is the same as RC, then it must be
+        // possible copy the value directly. Do not try duplicate the def.
+        // If cross copy register class is not the same as RC, then it's
+        // possible to copy the value but it require cross register class copies
+        // and it is expensive.
+        // If cross copy register class is null, then it's not possible to copy
+        // the value at all.
+        SUnit *NewDef = 0;
+        if (DestRC != RC) {
+          NewDef = CopyAndMoveSuccessors(LRDef);
+          if (!DestRC && !NewDef)
+            report_fatal_error("Can't handle live physical "
+                               "register dependency!");
+        }
+        if (!NewDef) {
+          // Issue copies, these can be expensive cross register class copies.
+          SmallVector<SUnit*, 2> Copies;
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
+          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                              /*Reg=*/0, /*isNormalMemory=*/false,
+                              /*isMustAlias=*/false, /*isArtificial=*/true));
+          NewDef = Copies.back();
+        }
+
+        DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
+        LiveRegDefs[Reg] = NewDef;
+        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                             /*Reg=*/0, /*isNormalMemory=*/false,
+                             /*isMustAlias=*/false, /*isArtificial=*/true));
+        TrySU->isAvailable = false;
+        CurSU = NewDef;
+      }
+
+      if (!CurSU) {
+        llvm_unreachable("Unable to resolve live physical register dependencies!");
+      }
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+      NotReady[i]->isPending = false;
+      // May no longer be available due to backtracking.
+      if (NotReady[i]->isAvailable)
+        AvailableQueue.push(NotReady[i]);
+    }
+    NotReady.clear();
+
+    if (CurSU)
+      ScheduleNodeBottomUp(CurSU, CurCycle);
+    ++CurCycle;
+  }
+
+  // Reverse the order since it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGFast(*IS->MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 000000000000..430283d5eff9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,265 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+  tdListDAGScheduler("list-td", "Top-down list scheduler",
+                     createTDListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation.  This supports
+/// top-down scheduling.
+///
+class ScheduleDAGList : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands become available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+public:
+  ScheduleDAGList(MachineFunction &mf,
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+    const TargetMachine &tm = mf.getTarget();
+    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
+
+  ~ScheduleDAGList() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void ReleaseSucc(SUnit *SU, const SDep &D);
+  void ReleaseSuccessors(SUnit *SU);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
+};
+}  // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  AvailableQueue->initNodes(SUnits);
+
+  ListScheduleTopDown();
+
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+  SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    PendingQueue.push_back(SuccSU);
+}
+
+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+  // Top down: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-td scheduler doesn't yet support physreg dependencies!");
+
+    ReleaseSucc(SU, *I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  Sequence.push_back(SU);
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->getDepth() == CurCycle) {
+        AvailableQueue->push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else {
+        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+      }
+    }
+
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue->empty()) {
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = 0;
+
+    bool HasNoopHazards = false;
+    while (!AvailableQueue->empty()) {
+      SUnit *CurSUnit = AvailableQueue->pop();
+
+      ScheduleHazardRecognizer::HazardType HT =
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+      if (HT == ScheduleHazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+      NotReady.push_back(CurSUnit);
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue->push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundSUnit);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;
+    } else if (!HasNoopHazards) {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+      HazardRec->AdvanceCycle();
+      ++NumStalls;
+      ++CurCycle;
+    } else {
+      // Otherwise, we have no instructions to issue and we have instructions
+      // that will fault if we don't do this right.  This is the case for
+      // processors without pipeline interlocks and other cases.
+      DEBUG(dbgs() << "*** Emitting noop\n");
+      HazardRec->EmitNoop();
+      Sequence.push_back(0);   // NULL here means noop
+      ++NumNoops;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 000000000000..12b183804c28
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,2994 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms.  The basic approach uses a priority
+// queue of available nodes to schedule.  One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds,    "Number of nodes unfolded");
+STATISTIC(NumDups,       "Number of duplicated nodes");
+STATISTIC(NumPRCopies,   "Number of physical register copies");
+
+static RegisterScheduler
+  burrListDAGScheduler("list-burr",
+                       "Bottom-up register reduction list scheduling",
+                       createBURRListDAGScheduler);
+static RegisterScheduler
+  tdrListrDAGScheduler("list-tdrr",
+                       "Top-down register reduction list scheduling",
+                       createTDRRListDAGScheduler);
+static RegisterScheduler
+  sourceListDAGScheduler("source",
+                         "Similar to list-burr but schedules in source "
+                         "order when possible",
+                         createSourceListDAGScheduler);
+
+static RegisterScheduler
+  hybridListDAGScheduler("list-hybrid",
+                         "Bottom-up register pressure aware list scheduling "
+                         "which tries to balance latency and register pressure",
+                         createHybridListDAGScheduler);
+
+static RegisterScheduler
+  ILPListDAGScheduler("list-ilp",
+                      "Bottom-up register pressure aware list scheduling "
+                      "which tries to balance ILP and register pressure",
+                      createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+  "disable-sched-cycles", cl::Hidden, cl::init(false),
+  cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+  cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+  "disable-sched-live-uses", cl::Hidden, cl::init(true),
+  cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+  cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+  cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+  "disable-sched-stalls", cl::Hidden, cl::init(true),
+  cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+  "disable-sched-critical-path", cl::Hidden, cl::init(false),
+  cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+  "disable-sched-height", cl::Hidden, cl::init(false),
+  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+
+static cl::opt<int> MaxReorderWindow(
+  "max-sched-reorder", cl::Hidden, cl::init(6),
+  cl::desc("Number of instructions to allow ahead of the critical path "
+           "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+  "sched-avg-ipc", cl::Hidden, cl::init(1),
+  cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+#ifndef NDEBUG
+namespace {
+  // For sched=list-ilp, Count the number of times each factor comes into play.
+  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
+         FactStatic, FactOther, NumFactors };
+}
+static const char *FactorName[NumFactors] =
+{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
+static int FactorCount[NumFactors];
+#endif //!NDEBUG
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation.  This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+  /// it is top-down.
+  bool isBottomUp;
+
+  /// NeedLatency - True if the scheduler will make use of latency information.
+  ///
+  bool NeedLatency;
+
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  SchedulingPriorityQueue *AvailableQueue;
+
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands becomes available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+  /// CurCycle - The current scheduler state corresponds to this cycle.
+  unsigned CurCycle;
+
+  /// MinAvailableCycle - Cycle of the soonest available instruction.
+  unsigned MinAvailableCycle;
+
+  /// IssueCount - Count instructions issued in this cycle
+  /// Currently valid only for bottom-up scheduling.
+  unsigned IssueCount;
+
+  /// LiveRegDefs - A set of physical registers and their definition
+  /// that are "live". These nodes must be scheduled before any other nodes that
+  /// modifies the registers can be scheduled.
+  unsigned NumLiveRegs;
+  std::vector<SUnit*> LiveRegDefs;
+  std::vector<SUnit*> LiveRegGens;
+
+  /// Topo - A topological ordering for SUnits which permits fast IsReachable
+  /// and similar queries.
+  ScheduleDAGTopologicalSort Topo;
+
+public:
+  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+                    SchedulingPriorityQueue *availqueue,
+                    CodeGenOpt::Level OptLevel)
+    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+      Topo(SUnits) {
+
+    const TargetMachine &tm = mf.getTarget();
+    if (DisableSchedCycles || !NeedLatency)
+      HazardRec = new ScheduleHazardRecognizer();
+    else
+      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
+
+  ~ScheduleDAGRRList() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+  /// IsReachable - Checks if SU is reachable from TargetSU.
+  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+    return Topo.IsReachable(SU, TargetSU);
+  }
+
+  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+  /// create a cycle.
+  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+    return Topo.WillCreateCycle(SU, TargetSU);
+  }
+
+  /// AddPred - adds a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  /// Updates the topological ordering if required.
+  void AddPred(SUnit *SU, const SDep &D) {
+    Topo.AddPred(SU, D.getSUnit());
+    SU->addPred(D);
+  }
+
+  /// RemovePred - removes a predecessor edge from SUnit SU.
+  /// This returns true if an edge was removed.
+  /// Updates the topological ordering if required.
+  void RemovePred(SUnit *SU, const SDep &D) {
+    Topo.RemovePred(SU, D.getSUnit());
+    SU->removePred(D);
+  }
+
+private:
+  bool isReady(SUnit *SU) {
+    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+      AvailableQueue->isReady(SU);
+  }
+
+  void ReleasePred(SUnit *SU, const SDep *PredEdge);
+  void ReleasePredecessors(SUnit *SU);
+  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
+  void ReleaseSuccessors(SUnit *SU);
+  void ReleasePending();
+  void AdvanceToCycle(unsigned NextCycle);
+  void AdvancePastStalls(SUnit *SU);
+  void EmitNode(SUnit *SU);
+  void ScheduleNodeBottomUp(SUnit*);
+  void CapturePred(SDep *PredEdge);
+  void UnscheduleNodeBottomUp(SUnit*);
+  void RestoreHazardCheckerBottomUp();
+  void BacktrackBottomUp(SUnit*, SUnit*);
+  SUnit *CopyAndMoveSuccessors(SUnit*);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+
+  SUnit *PickNodeToScheduleBottomUp();
+  void ListScheduleBottomUp();
+
+  void ScheduleNodeTopDown(SUnit*);
+  void ListScheduleTopDown();
+
+
+  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+  /// Updates the topological ordering if required.
+  SUnit *CreateNewSUnit(SDNode *N) {
+    unsigned NumSUnits = SUnits.size();
+    SUnit *NewNode = NewSUnit(N);
+    // Update the topological ordering.
+    if (NewNode->NodeNum >= NumSUnits)
+      Topo.InitDAGTopologicalSorting();
+    return NewNode;
+  }
+
+  /// CreateClone - Creates a new SUnit from an existing one.
+  /// Updates the topological ordering if required.
+  SUnit *CreateClone(SUnit *N) {
+    unsigned NumSUnits = SUnits.size();
+    SUnit *NewNode = Clone(N);
+    // Update the topological ordering.
+    if (NewNode->NodeNum >= NumSUnits)
+      Topo.InitDAGTopologicalSorting();
+    return NewNode;
+  }
+
+  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+  /// need actual latency information but the hybrid scheduler does.
+  bool ForceUnitLatencies() const {
+    return !NeedLatency;
+  }
+};
+}  // end anonymous namespace
+
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+                          const TargetLowering *TLI,
+                          const TargetInstrInfo *TII,
+                          const TargetRegisterInfo *TRI,
+                          unsigned &RegClass, unsigned &Cost) {
+  EVT VT = RegDefPos.GetValue();
+
+  // Special handling for untyped values.  These values can only come from
+  // the expansion of custom DAG-to-DAG patterns.
+  if (VT == MVT::untyped) {
+    const SDNode *Node = RegDefPos.GetNode();
+    unsigned Opcode = Node->getMachineOpcode();
+
+    if (Opcode == TargetOpcode::REG_SEQUENCE) {
+      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+      RegClass = RC->getID();
+      Cost = 1;
+      return;
+    }
+
+    unsigned Idx = RegDefPos.GetIdx();
+    const MCInstrDesc Desc = TII->get(Opcode);
+    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+    RegClass = RC->getID();
+    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+    // better way to determine it.
+    Cost = 1;
+  } else {
+    RegClass = TLI->getRepRegClassFor(VT)->getID();
+    Cost = TLI->getRepRegClassCostFor(VT);
+  }
+}
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+  DEBUG(dbgs()
+        << "********** List Scheduling BB#" << BB->getNumber()
+        << " '" << BB->getName() << "' **********\n");
+#ifndef NDEBUG
+  for (int i = 0; i < NumFactors; ++i) {
+    FactorCount[i] = 0;
+  }
+#endif //!NDEBUG
+
+  CurCycle = 0;
+  IssueCount = 0;
+  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+  NumLiveRegs = 0;
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+  LiveRegGens.resize(TRI->getNumRegs(), NULL);
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+  Topo.InitDAGTopologicalSorting();
+
+  AvailableQueue->initNodes(SUnits);
+
+  HazardRec->Reset();
+
+  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+  if (isBottomUp)
+    ListScheduleBottomUp();
+  else
+    ListScheduleTopDown();
+
+#ifndef NDEBUG
+  for (int i = 0; i < NumFactors; ++i) {
+    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
+  }
+#endif // !NDEBUG
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+
+  if (!ForceUnitLatencies()) {
+    // Updating predecessor's height. This is now the cycle when the
+    // predecessor can be scheduled without causing a pipeline stall.
+    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+  }
+
+  // If all the node's successors are scheduled, this node is ready
+  // to be scheduled. Ignore the special EntrySU node.
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+    PredSU->isAvailable = true;
+
+    unsigned Height = PredSU->getHeight();
+    if (Height < MinAvailableCycle)
+      MinAvailableCycle = Height;
+
+    if (isReady(PredSU)) {
+      AvailableQueue->push(PredSU);
+    }
+    // CapturePred and others may have left the node in the pending queue, avoid
+    // adding it twice.
+    else if (!PredSU->isPending) {
+      PredSU->isPending = true;
+      PendingQueue.push_back(PredSU);
+    }
+  }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    ReleasePred(SU, &*I);
+    if (I->isAssignedRegDep()) {
+      // This is a physical register dependency and it's impossible or
+      // expensive to copy the register. Make sure nothing that can
+      // clobber the register is scheduled between the predecessor and
+      // this node.
+      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+             "interference on register dependence");
+      LiveRegDefs[I->getReg()] = I->getSUnit();
+      if (!LiveRegGens[I->getReg()]) {
+        ++NumLiveRegs;
+        LiveRegGens[I->getReg()] = SU;
+      }
+    }
+  }
+}
+
+/// Check to see if any of the pending instructions are ready to issue.  If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+  if (DisableSchedCycles) {
+    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+    return;
+  }
+
+  // If the available queue is empty, it is safe to reset MinAvailableCycle.
+  if (AvailableQueue->empty())
+    MinAvailableCycle = UINT_MAX;
+
+  // Check to see if any of the pending instructions are ready to issue.  If
+  // so, add them to the available queue.
+  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+    unsigned ReadyCycle =
+      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+    if (ReadyCycle < MinAvailableCycle)
+      MinAvailableCycle = ReadyCycle;
+
+    if (PendingQueue[i]->isAvailable) {
+      if (!isReady(PendingQueue[i]))
+          continue;
+      AvailableQueue->push(PendingQueue[i]);
+    }
+    PendingQueue[i]->isPending = false;
+    PendingQueue[i] = PendingQueue.back();
+    PendingQueue.pop_back();
+    --i; --e;
+  }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+  if (NextCycle <= CurCycle)
+    return;
+
+  IssueCount = 0;
+  AvailableQueue->setCurCycle(NextCycle);
+  if (!HazardRec->isEnabled()) {
+    // Bypass lots of virtual calls in case of long latency.
+    CurCycle = NextCycle;
+  }
+  else {
+    for (; CurCycle != NextCycle; ++CurCycle) {
+      if (isBottomUp)
+        HazardRec->RecedeCycle();
+      else
+        HazardRec->AdvanceCycle();
+    }
+  }
+  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+  // available Q to release pending nodes at least once before popping.
+  ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+  if (DisableSchedCycles)
+    return;
+
+  // FIXME: Nodes such as CopyFromReg probably should not advance the current
+  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+  // has predecessors the cycle will be advanced when they are scheduled.
+  // But given the crude nature of modeling latency though such nodes, we
+  // currently need to treat these nodes like real instructions.
+  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+  // Bump CurCycle to account for latency. We assume the latency of other
+  // available instructions may be hidden by the stall (not a full pipe stall).
+  // This updates the hazard recognizer's cycle before reserving resources for
+  // this instruction.
+  AdvanceToCycle(ReadyCycle);
+
+  // Calls are scheduled in their preceding cycle, so don't conflict with
+  // hazards from instructions after the call. EmitNode will reset the
+  // scoreboard state before emitting the call.
+  if (isBottomUp && SU->isCall)
+    return;
+
+  // FIXME: For resource conflicts in very long non-pipelined stages, we
+  // should probably skip ahead here to avoid useless scoreboard checks.
+  int Stalls = 0;
+  while (true) {
+    ScheduleHazardRecognizer::HazardType HT =
+      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+    if (HT == ScheduleHazardRecognizer::NoHazard)
+      break;
+
+    ++Stalls;
+  }
+  AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+  if (!HazardRec->isEnabled())
+    return;
+
+  // Check for phys reg copy.
+  if (!SU->getNode())
+    return;
+
+  switch (SU->getNode()->getOpcode()) {
+  default:
+    assert(SU->getNode()->isMachineOpcode() &&
+           "This target-independent node should not be scheduled.");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor:
+  case ISD::CopyToReg:
+  case ISD::CopyFromReg:
+  case ISD::EH_LABEL:
+    // Noops don't affect the scoreboard state. Copies are likely to be
+    // removed.
+    return;
+  case ISD::INLINEASM:
+    // For inline asm, clear the pipeline state.
+    HazardRec->Reset();
+    return;
+  }
+  if (isBottomUp && SU->isCall) {
+    // Calls are scheduled with their preceding instructions. For bottom-up
+    // scheduling, clear the pipeline state before emitting.
+    HazardRec->Reset();
+  }
+
+  HazardRec->EmitInstruction(SU);
+
+  if (!isBottomUp && SU->isCall) {
+    HazardRec->Reset();
+  }
+}
+
+static void resetVRegCycle(SUnit *SU);
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+#ifndef NDEBUG
+  if (CurCycle < SU->getHeight())
+    DEBUG(dbgs() << "   Height [" << SU->getHeight()
+          << "] pipeline stall!\n");
+#endif
+
+  // FIXME: Do not modify node height. It may interfere with
+  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+  // node its ready cycle can aid heuristics, and after scheduling it can
+  // indicate the scheduled cycle.
+  SU->setHeightToAtLeast(CurCycle);
+
+  // Reserve resources for the scheduled intruction.
+  EmitNode(SU);
+
+  Sequence.push_back(SU);
+
+  AvailableQueue->ScheduledNode(SU);
+
+  // If HazardRec is disabled, and each inst counts as one cycle, then
+  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+  // PendingQueue for schedulers that implement HasReadyFilter.
+  if (!HazardRec->isEnabled() && AvgIPC < 2)
+    AdvanceToCycle(CurCycle + 1);
+
+  // Update liveness of predecessors before successors to avoid treating a
+  // two-address node as a live range def.
+  ReleasePredecessors(SU);
+
+  // Release all the implicit physical register defs that are live.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+      --NumLiveRegs;
+      LiveRegDefs[I->getReg()] = NULL;
+      LiveRegGens[I->getReg()] = NULL;
+    }
+  }
+
+  resetVRegCycle(SU);
+
+  SU->isScheduled = true;
+
+  // Conditions under which the scheduler should eagerly advance the cycle:
+  // (1) No available instructions
+  // (2) All pipelines full, so available instructions must have hazards.
+  //
+  // If HazardRec is disabled, the cycle was pre-advanced before calling
+  // ReleasePredecessors. In that case, IssueCount should remain 0.
+  //
+  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+  if (HazardRec->isEnabled() || AvgIPC > 1) {
+    if (SU->getNode() && SU->getNode()->isMachineOpcode())
+      ++IssueCount;
+    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+      AdvanceToCycle(CurCycle + 1);
+  }
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+  if (PredSU->isAvailable) {
+    PredSU->isAvailable = false;
+    if (!PredSU->isPending)
+      AvailableQueue->remove(PredSU);
+  }
+
+  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+  ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+  DEBUG(SU->dump(this));
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    CapturePred(&*I);
+    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
+      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+             "Physical register dependency violated?");
+      --NumLiveRegs;
+      LiveRegDefs[I->getReg()] = NULL;
+      LiveRegGens[I->getReg()] = NULL;
+    }
+  }
+
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      // This becomes the nearest def. Note that an earlier def may still be
+      // pending if this is a two-address node.
+      LiveRegDefs[I->getReg()] = SU;
+      if (!LiveRegDefs[I->getReg()]) {
+        ++NumLiveRegs;
+      }
+      if (LiveRegGens[I->getReg()] == NULL ||
+          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+        LiveRegGens[I->getReg()] = I->getSUnit();
+    }
+  }
+  if (SU->getHeight() < MinAvailableCycle)
+    MinAvailableCycle = SU->getHeight();
+
+  SU->setHeightDirty();
+  SU->isScheduled = false;
+  SU->isAvailable = true;
+  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+    // Don't make available until backtracking is complete.
+    SU->isPending = true;
+    PendingQueue.push_back(SU);
+  }
+  else {
+    AvailableQueue->push(SU);
+  }
+  AvailableQueue->UnscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+  HazardRec->Reset();
+
+  unsigned LookAhead = std::min((unsigned)Sequence.size(),
+                                HazardRec->getMaxLookAhead());
+  if (LookAhead == 0)
+    return;
+
+  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+  unsigned HazardCycle = (*I)->getHeight();
+  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+    SUnit *SU = *I;
+    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+      HazardRec->RecedeCycle();
+    }
+    EmitNode(SU);
+  }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+  SUnit *OldSU = Sequence.back();
+  while (true) {
+    Sequence.pop_back();
+    if (SU->isSucc(OldSU))
+      // Don't try to remove SU from AvailableQueue.
+      SU->isAvailable = false;
+    // FIXME: use ready cycle instead of height
+    CurCycle = OldSU->getHeight();
+    UnscheduleNodeBottomUp(OldSU);
+    AvailableQueue->setCurCycle(CurCycle);
+    if (OldSU == BtSU)
+      break;
+    OldSU = Sequence.back();
+  }
+
+  assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+  RestoreHazardCheckerBottomUp();
+
+  ReleasePending();
+
+  ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getGluedNode()) {
+    if (SUNode->isOperandOf(N))
+      return true;
+  }
+  return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+  SDNode *N = SU->getNode();
+  if (!N)
+    return NULL;
+
+  if (SU->getNode()->getGluedNode())
+    return NULL;
+
+  SUnit *NewSU;
+  bool TryUnfold = false;
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Glue)
+      return NULL;
+    else if (VT == MVT::Other)
+      TryUnfold = true;
+  }
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDValue &Op = N->getOperand(i);
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
+    if (VT == MVT::Glue)
+      return NULL;
+  }
+
+  if (TryUnfold) {
+    SmallVector<SDNode*, 2> NewNodes;
+    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+      return NULL;
+
+    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+    assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+    N = NewNodes[1];
+    SDNode *LoadNode = NewNodes[0];
+    unsigned NumVals = N->getNumValues();
+    unsigned OldNumVals = SU->getNode()->getNumValues();
+    for (unsigned i = 0; i != NumVals; ++i)
+      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+                                   SDValue(LoadNode, 1));
+
+    // LoadNode may already exist. This can happen when there is another
+    // load from the same location and producing the same type of value
+    // but it has different alignment or volatileness.
+    bool isNewLoad = true;
+    SUnit *LoadSU;
+    if (LoadNode->getNodeId() != -1) {
+      LoadSU = &SUnits[LoadNode->getNodeId()];
+      isNewLoad = false;
+    } else {
+      LoadSU = CreateNewSUnit(LoadNode);
+      LoadNode->setNodeId(LoadSU->NodeNum);
+
+      InitNumRegDefsLeft(LoadSU);
+      ComputeLatency(LoadSU);
+    }
+
+    SUnit *NewSU = CreateNewSUnit(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NewSU->NodeNum);
+
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+        NewSU->isTwoAddress = true;
+        break;
+      }
+    }
+    if (MCID.isCommutable())
+      NewSU->isCommutable = true;
+
+    InitNumRegDefsLeft(NewSU);
+    ComputeLatency(NewSU);
+
+    // Record all the edges to and from the old SU, by category.
+    SmallVector<SDep, 4> ChainPreds;
+    SmallVector<SDep, 4> ChainSuccs;
+    SmallVector<SDep, 4> LoadPreds;
+    SmallVector<SDep, 4> NodePreds;
+    SmallVector<SDep, 4> NodeSuccs;
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainPreds.push_back(*I);
+      else if (isOperandOf(I->getSUnit(), LoadNode))
+        LoadPreds.push_back(*I);
+      else
+        NodePreds.push_back(*I);
+    }
+    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainSuccs.push_back(*I);
+      else
+        NodeSuccs.push_back(*I);
+    }
+
+    // Now assign edges to the newly-created nodes.
+    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+      const SDep &Pred = ChainPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad)
+        AddPred(LoadSU, Pred);
+    }
+    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+      const SDep &Pred = LoadPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad)
+        AddPred(LoadSU, Pred);
+    }
+    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+      const SDep &Pred = NodePreds[i];
+      RemovePred(SU, Pred);
+      AddPred(NewSU, Pred);
+    }
+    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+      SDep D = NodeSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      D.setSUnit(NewSU);
+      AddPred(SuccDep, D);
+      // Balance register pressure.
+      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+        --NewSU->NumRegDefsLeft;
+    }
+    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+      SDep D = ChainSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      if (isNewLoad) {
+        D.setSUnit(LoadSU);
+        AddPred(SuccDep, D);
+      }
+    }
+
+    // Add a data dependency to reflect that NewSU reads the value defined
+    // by LoadSU.
+    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+
+    if (isNewLoad)
+      AvailableQueue->addNode(LoadSU);
+    AvailableQueue->addNode(NewSU);
+
+    ++NumUnfolds;
+
+    if (NewSU->NumSuccsLeft == 0) {
+      NewSU->isAvailable = true;
+      return NewSU;
+    }
+    SU = NewSU;
+  }
+
+  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
+  NewSU = CreateClone(SU);
+
+  // New SUnit has the exact same predecessors.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (!I->isArtificial())
+      AddPred(NewSU, *I);
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(NewSU);
+      AddPred(SuccSU, D);
+      D.setSUnit(SU);
+      DelDeps.push_back(std::make_pair(SuccSU, D));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  AvailableQueue->updateNode(SU);
+  AvailableQueue->addNode(NewSU);
+
+  ++NumDups;
+  return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                               const TargetRegisterClass *DestRC,
+                                               const TargetRegisterClass *SrcRC,
+                                               SmallVector<SUnit*, 2> &Copies) {
+  SUnit *CopyFromSU = CreateNewSUnit(NULL);
+  CopyFromSU->CopySrcRC = SrcRC;
+  CopyFromSU->CopyDstRC = DestRC;
+
+  SUnit *CopyToSU = CreateNewSUnit(NULL);
+  CopyToSU->CopySrcRC = DestRC;
+  CopyToSU->CopyDstRC = SrcRC;
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(CopyToSU);
+      AddPred(SuccSU, D);
+      DelDeps.push_back(std::make_pair(SuccSU, *I));
+    }
+    else {
+      // Avoid scheduling the def-side copy before other successors. Otherwise
+      // we could introduce another physreg interference on the copy and
+      // continue inserting copies indefinitely.
+      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
+             /*Reg=*/0, /*isNormalMemory=*/false,
+             /*isMustAlias=*/false, /*isArtificial=*/true);
+      AddPred(SuccSU, D);
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+  AvailableQueue->updateNode(SU);
+  AvailableQueue->addNode(CopyFromSU);
+  AvailableQueue->addNode(CopyToSU);
+  Copies.push_back(CopyFromSU);
+  Copies.push_back(CopyToSU);
+
+  ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+                                 const TargetInstrInfo *TII) {
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = MCID.getNumDefs();
+  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    if (Reg == *ImpDef)
+      break;
+    ++NumRes;
+  }
+  return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+                               std::vector<SUnit*> &LiveRegDefs,
+                               SmallSet<unsigned, 4> &RegAdded,
+                               SmallVector<unsigned, 4> &LRegs,
+                               const TargetRegisterInfo *TRI) {
+  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+    // Check if Ref is live.
+    if (!LiveRegDefs[*AliasI]) continue;
+
+    // Allow multiple uses of the same def.
+    if (LiveRegDefs[*AliasI] == SU) continue;
+
+    // Add Reg to the set of interfering live regs.
+    if (RegAdded.insert(*AliasI)) {
+      LRegs.push_back(*AliasI);
+    }
+  }
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
+  if (NumLiveRegs == 0)
+    return false;
+
+  SmallSet<unsigned, 4> RegAdded;
+  // If this node would clobber any "live" register, then it's not ready.
+  //
+  // If SU is the currently live definition of the same register that it uses,
+  // then we are free to schedule it.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
+      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+                         RegAdded, LRegs, TRI);
+  }
+
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+    if (Node->getOpcode() == ISD::INLINEASM) {
+      // Inline asm can clobber physical defs.
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
+
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+        ++i; // Skip the ID value.
+        if (InlineAsm::isRegDefKind(Flags) ||
+            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+            InlineAsm::isClobberKind(Flags)) {
+          // Check for def of register or earlyclobber register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
+              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+          }
+        } else
+          i += NumVals;
+      }
+      continue;
+    }
+
+    if (!Node->isMachineOpcode())
+      continue;
+    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+    if (!MCID.ImplicitDefs)
+      continue;
+    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+  }
+
+  return !LRegs.empty();
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+  SmallVector<SUnit*, 4> Interferences;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+  SUnit *CurSU = AvailableQueue->pop();
+  while (CurSU) {
+    SmallVector<unsigned, 4> LRegs;
+    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+      break;
+    LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+    Interferences.push_back(CurSU);
+    CurSU = AvailableQueue->pop();
+  }
+  if (CurSU) {
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+      Interferences[i]->isPending = false;
+      assert(Interferences[i]->isAvailable && "must still be available");
+      AvailableQueue->push(Interferences[i]);
+    }
+    return CurSU;
+  }
+
+  // All candidates are delayed due to live physical reg dependencies.
+  // Try backtracking, code duplication, or inserting cross class copies
+  // to resolve it.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    SUnit *TrySU = Interferences[i];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+    // Try unscheduling up to the point where it's safe to schedule
+    // this node.
+    SUnit *BtSU = NULL;
+    unsigned LiveCycle = UINT_MAX;
+    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+      unsigned Reg = LRegs[j];
+      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+        BtSU = LiveRegGens[Reg];
+        LiveCycle = BtSU->getHeight();
+      }
+    }
+    if (!WillCreateCycle(TrySU, BtSU))  {
+      BacktrackBottomUp(TrySU, BtSU);
+
+      // Force the current node to be scheduled before the node that
+      // requires the physical reg dep.
+      if (BtSU->isAvailable) {
+        BtSU->isAvailable = false;
+        if (!BtSU->isPending)
+          AvailableQueue->remove(BtSU);
+      }
+      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false, /*isArtificial=*/true));
+
+      // If one or more successors has been unscheduled, then the current
+      // node is no longer avaialable. Schedule a successor that's now
+      // available instead.
+      if (!TrySU->isAvailable) {
+        CurSU = AvailableQueue->pop();
+      }
+      else {
+        CurSU = TrySU;
+        TrySU->isPending = false;
+        Interferences.erase(Interferences.begin()+i);
+      }
+      break;
+    }
+  }
+
+  if (!CurSU) {
+    // Can't backtrack. If it's too expensive to copy the value, then try
+    // duplicate the nodes that produces these "too expensive to copy"
+    // values to break the dependency. In case even that doesn't work,
+    // insert cross class copies.
+    // If it's not too expensive, i.e. cost != -1, issue copies.
+    SUnit *TrySU = Interferences[0];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+    assert(LRegs.size() == 1 && "Can't handle this yet!");
+    unsigned Reg = LRegs[0];
+    SUnit *LRDef = LiveRegDefs[Reg];
+    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+    const TargetRegisterClass *RC =
+      TRI->getMinimalPhysRegClass(Reg, VT);
+    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+    // If cross copy register class is the same as RC, then it must be possible
+    // copy the value directly. Do not try duplicate the def.
+    // If cross copy register class is not the same as RC, then it's possible to
+    // copy the value but it require cross register class copies and it is
+    // expensive.
+    // If cross copy register class is null, then it's not possible to copy
+    // the value at all.
+    SUnit *NewDef = 0;
+    if (DestRC != RC) {
+      NewDef = CopyAndMoveSuccessors(LRDef);
+      if (!DestRC && !NewDef)
+        report_fatal_error("Can't handle live physical register dependency!");
+    }
+    if (!NewDef) {
+      // Issue copies, these can be expensive cross register class copies.
+      SmallVector<SUnit*, 2> Copies;
+      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
+            << " to SU #" << Copies.front()->NodeNum << "\n");
+      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false,
+                          /*isArtificial=*/true));
+      NewDef = Copies.back();
+    }
+
+    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
+          << " to SU #" << TrySU->NodeNum << "\n");
+    LiveRegDefs[Reg] = NewDef;
+    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                         /*Reg=*/0, /*isNormalMemory=*/false,
+                         /*isMustAlias=*/false,
+                         /*isArtificial=*/true));
+    TrySU->isAvailable = false;
+    CurSU = NewDef;
+  }
+
+  assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+  // Add the nodes that aren't ready back onto the available list.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    Interferences[i]->isPending = false;
+    // May no longer be available due to backtracking.
+    if (Interferences[i]->isAvailable) {
+      AvailableQueue->push(Interferences[i]);
+    }
+  }
+  return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+  // Release any predecessors of the special Exit node.
+  ReleasePredecessors(&ExitSU);
+
+  // Add root to Available queue.
+  if (!SUnits.empty()) {
+    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+    RootSU->isAvailable = true;
+    AvailableQueue->push(RootSU);
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty()) {
+    DEBUG(dbgs() << "\nExamining Available:\n";
+          AvailableQueue->dump(this));
+
+    // Pick the best node to schedule taking all constraints into
+    // consideration.
+    SUnit *SU = PickNodeToScheduleBottomUp();
+
+    AdvancePastStalls(SU);
+
+    ScheduleNodeBottomUp(SU);
+
+    while (AvailableQueue->empty() && !PendingQueue.empty()) {
+      // Advance the cycle to free resources. Skip ahead to the next ready SU.
+      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+    }
+  }
+
+  // Reverse the order if it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+  VerifySchedule(isBottomUp);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+    SuccSU->isAvailable = true;
+    AvailableQueue->push(SuccSU);
+  }
+}
+
+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
+  // Top down: release successors
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
+
+    ReleaseSucc(SU, &*I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+  Sequence.push_back(SU);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleTopDown() {
+  AvailableQueue->setCurCycle(CurCycle);
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty()) {
+    SUnit *CurSU = AvailableQueue->pop();
+
+    if (CurSU)
+      ScheduleNodeTopDown(CurSU);
+    ++CurCycle;
+    AvailableQueue->setCurCycle(CurCycle);
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(isBottomUp);
+#endif
+}
+
+
+//===----------------------------------------------------------------------===//
+//                RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+  SF &SortFunc;
+  reverse_sort(SF &sf) : SortFunc(sf) {}
+  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+  bool operator()(SUnit* left, SUnit* right) const {
+    // reverse left/right rather than simply !SortFunc(left, right)
+    // to expose different paths in the comparison logic.
+    return SortFunc(right, left);
+  }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = false,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  src_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  src_ls_rr_sort(const src_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  hybrid_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  ilp_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+  std::vector<SUnit*> Queue;
+  unsigned CurQueueId;
+  bool TracksRegPressure;
+
+  // SUnits - The SUnits for the current graph.
+  std::vector<SUnit> *SUnits;
+
+  MachineFunction &MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+  ScheduleDAGRRList *scheduleDAG;
+
+  // SethiUllmanNumbers - The SethiUllman number for each node.
+  std::vector<unsigned> SethiUllmanNumbers;
+
+  /// RegPressure - Tracking current reg pressure per register class.
+  ///
+  std::vector<unsigned> RegPressure;
+
+  /// RegLimit - Tracking the number of allocatable registers per register
+  /// class.
+  std::vector<unsigned> RegLimit;
+
+public:
+  RegReductionPQBase(MachineFunction &mf,
+                     bool hasReadyFilter,
+                     bool tracksrp,
+                     const TargetInstrInfo *tii,
+                     const TargetRegisterInfo *tri,
+                     const TargetLowering *tli)
+    : SchedulingPriorityQueue(hasReadyFilter),
+      CurQueueId(0), TracksRegPressure(tracksrp),
+      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+    if (TracksRegPressure) {
+      unsigned NumRC = TRI->getNumRegClasses();
+      RegLimit.resize(NumRC);
+      RegPressure.resize(NumRC);
+      std::fill(RegLimit.begin(), RegLimit.end(), 0);
+      std::fill(RegPressure.begin(), RegPressure.end(), 0);
+      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+             E = TRI->regclass_end(); I != E; ++I)
+        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+    }
+  }
+
+  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+    scheduleDAG = scheduleDag;
+  }
+
+  ScheduleHazardRecognizer* getHazardRec() {
+    return scheduleDAG->getHazardRec();
+  }
+
+  void initNodes(std::vector<SUnit> &sunits);
+
+  void addNode(const SUnit *SU);
+
+  void updateNode(const SUnit *SU);
+
+  void releaseState() {
+    SUnits = 0;
+    SethiUllmanNumbers.clear();
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+  }
+
+  unsigned getNodePriority(const SUnit *SU) const;
+
+  unsigned getNodeOrdering(const SUnit *SU) const {
+    if (!SU->getNode()) return 0;
+
+    return scheduleDAG->DAG->GetOrdering(SU->getNode());
+  }
+
+  bool empty() const { return Queue.empty(); }
+
+  void push(SUnit *U) {
+    assert(!U->NodeQueueId && "Node in the queue already");
+    U->NodeQueueId = ++CurQueueId;
+    Queue.push_back(U);
+  }
+
+  void remove(SUnit *SU) {
+    assert(!Queue.empty() && "Queue is empty!");
+    assert(SU->NodeQueueId != 0 && "Not in queue!");
+    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+                                                 SU);
+    if (I != prior(Queue.end()))
+      std::swap(*I, Queue.back());
+    Queue.pop_back();
+    SU->NodeQueueId = 0;
+  }
+
+  bool tracksRegPressure() const { return TracksRegPressure; }
+
+  void dumpRegPressure() const;
+
+  bool HighRegPressure(const SUnit *SU) const;
+
+  bool MayReduceRegPressure(SUnit *SU) const;
+
+  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+  void ScheduledNode(SUnit *SU);
+
+  void UnscheduledNode(SUnit *SU);
+
+protected:
+  bool canClobber(const SUnit *SU, const SUnit *Op);
+  void AddPseudoTwoAddrDeps();
+  void PrescheduleNodesWithMultipleUses();
+  void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+  std::vector<SUnit *>::iterator Best = Q.begin();
+  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+         E = Q.end(); I != E; ++I)
+    if (Picker(*Best, *I))
+      Best = I;
+  SUnit *V = *Best;
+  if (Best != prior(Q.end()))
+    std::swap(*Best, Q.back());
+  Q.pop_back();
+  return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+  if (DAG->StressSched) {
+    reverse_sort<SF> RPicker(Picker);
+    return popFromQueueImpl(Q, RPicker);
+  }
+#endif
+  (void)DAG;
+  return popFromQueueImpl(Q, Picker);
+}
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+  SF Picker;
+
+public:
+  RegReductionPriorityQueue(MachineFunction &mf,
+                            bool tracksrp,
+                            const TargetInstrInfo *tii,
+                            const TargetRegisterInfo *tri,
+                            const TargetLowering *tli)
+    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+      Picker(this) {}
+
+  bool isBottomUp() const { return SF::IsBottomUp; }
+
+  bool isReady(SUnit *U) const {
+    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+  }
+
+  SUnit *pop() {
+    if (Queue.empty()) return NULL;
+
+    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+    V->NodeQueueId = 0;
+    return V;
+  }
+
+  void dump(ScheduleDAG *DAG) const {
+    // Emulate pop() without clobbering NodeQueueIds.
+    std::vector<SUnit*> DumpQueue = Queue;
+    SF DumpPicker = Picker;
+    while (!DumpQueue.empty()) {
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+      if (isBottomUp())
+        dbgs() << "Height " << SU->getHeight() << ": ";
+      else
+        dbgs() << "Depth " << SU->getDepth() << ": ";
+      SU->dump(DAG);
+    }
+  }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//           Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+  bool LSchedLow = left->isScheduleLow;
+  bool RSchedLow = right->isScheduleLow;
+  if (LSchedLow != RSchedLow)
+    return LSchedLow < RSchedLow ? 1 : -1;
+  return 0;
+}
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+  if (SethiUllmanNumber != 0)
+    return SethiUllmanNumber;
+
+  unsigned Extra = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    SUnit *PredSU = I->getSUnit();
+    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+    if (PredSethiUllman > SethiUllmanNumber) {
+      SethiUllmanNumber = PredSethiUllman;
+      Extra = 0;
+    } else if (PredSethiUllman == SethiUllmanNumber)
+      ++Extra;
+  }
+
+  SethiUllmanNumber += Extra;
+
+  if (SethiUllmanNumber == 0)
+    SethiUllmanNumber = 1;
+
+  return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+  unsigned SUSize = SethiUllmanNumbers.size();
+  if (SUnits->size() > SUSize)
+    SethiUllmanNumbers.resize(SUSize*2, 0);
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+  SethiUllmanNumbers[SU->NodeNum] = 0;
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+  assert(SU->NodeNum < SethiUllmanNumbers.size());
+  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    // CopyToReg should be close to its uses to facilitate coalescing and
+    // avoid spilling.
+    return 0;
+  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG ||
+      Opc == TargetOpcode::INSERT_SUBREG)
+    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+    // close to their uses to facilitate coalescing.
+    return 0;
+  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+    // If SU does not have a register use, i.e. it doesn't produce a value
+    // that would be consumed (e.g. store), then it terminates a chain of
+    // computation.  Give it a large SethiUllman number so it will be
+    // scheduled right before its predecessors that it doesn't lengthen
+    // their live ranges.
+    return 0xffff;
+  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+    // If SU does not have a register def, schedule it close to its uses
+    // because it does not lengthen any live ranges.
+    return 0;
+#if 1
+  return SethiUllmanNumbers[SU->NodeNum];
+#else
+  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+  if (SU->isCallOp) {
+    // FIXME: This assumes all of the defs are used as call operands.
+    int NP = (int)Priority - SU->getNode()->getNumValues();
+    return (NP > 0) ? NP : 0;
+  }
+  return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                     Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+void RegReductionPQBase::dumpRegPressure() const {
+  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I) {
+    const TargetRegisterClass *RC = *I;
+    unsigned Id = RC->getID();
+    unsigned RP = RegPressure[Id];
+    if (!RP) continue;
+    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+          << '\n');
+  }
+}
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+  if (!TLI)
+    return false;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance()) {
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+
+      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+        return true;
+    }
+  }
+  return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+  const SDNode *N = SU->getNode();
+
+  if (!N->isMachineOpcode() || !SU->NumSuccs)
+    return false;
+
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  for (unsigned i = 0; i != NumDefs; ++i) {
+    EVT VT = N->getValueType(i);
+    if (!N->hasAnyUseOfValue(i))
+      continue;
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] >= RegLimit[RCId])
+      return true;
+  }
+  return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+  LiveUses = 0;
+  int PDiff = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      if (PredSU->getNode()->isMachineOpcode())
+        ++LiveUses;
+      continue;
+    }
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance()) {
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (RegPressure[RCId] >= RegLimit[RCId])
+        ++PDiff;
+    }
+  }
+  const SDNode *N = SU->getNode();
+
+  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+    return PDiff;
+
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  for (unsigned i = 0; i != NumDefs; ++i) {
+    EVT VT = N->getValueType(i);
+    if (!N->hasAnyUseOfValue(i))
+      continue;
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] >= RegLimit[RCId])
+      --PDiff;
+  }
+  return PDiff;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
+
+  if (!SU->getNode())
+    return;
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    // FIXME: The ScheduleDAG currently loses information about which of a
+    // node's values is consumed by each dependence. Consequently, if the node
+    // defines multiple register classes, we don't know which to pressurize
+    // here. Instead the following loop consumes the register defs in an
+    // arbitrary order. At least it handles the common case of clustered loads
+    // to the same class. For precise liveness, each SDep needs to indicate the
+    // result number. But that tightly couples the ScheduleDAG with the
+    // SelectionDAG making updates tricky. A simpler hack would be to attach a
+    // value type or register class to SDep.
+    //
+    // The most important aspect of register tracking is balancing the increase
+    // here with the reduction further below. Note that this SU may use multiple
+    // defs in PredSU. The can't be determined here, but we've already
+    // compensated by reducing NumRegDefsLeft in PredSU during
+    // ScheduleDAGSDNodes::AddSchedEdges.
+    --PredSU->NumRegDefsLeft;
+    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+      if (SkipRegDefs)
+        continue;
+
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+      RegPressure[RCId] += Cost;
+      break;
+    }
+  }
+
+  // We should have this assert, but there may be dead SDNodes that never
+  // materialize as SUnits, so they don't appear to generate liveness.
+  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+  int SkipRegDefs = (int)SU->NumRegDefsLeft;
+  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+    if (SkipRegDefs > 0)
+      continue;
+    unsigned RCId, Cost;
+    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+    if (RegPressure[RCId] < Cost) {
+      // Register pressure tracking is imprecise. This can happen. But we try
+      // hard not to let it happen because it likely results in poor scheduling.
+      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
+      RegPressure[RCId] = 0;
+    }
+    else {
+      RegPressure[RCId] -= Cost;
+    }
+  }
+  dumpRegPressure();
+}
+
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
+
+  const SDNode *N = SU->getNode();
+  if (!N) return;
+
+  if (!N->isMachineOpcode()) {
+    if (N->getOpcode() != ISD::CopyToReg)
+      return;
+  } else {
+    unsigned Opc = N->getMachineOpcode();
+    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+        Opc == TargetOpcode::INSERT_SUBREG ||
+        Opc == TargetOpcode::SUBREG_TO_REG ||
+        Opc == TargetOpcode::REG_SEQUENCE ||
+        Opc == TargetOpcode::IMPLICIT_DEF)
+      return;
+  }
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+    // counts data deps.
+    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+      continue;
+    const SDNode *PN = PredSU->getNode();
+    if (!PN->isMachineOpcode()) {
+      if (PN->getOpcode() == ISD::CopyFromReg) {
+        EVT VT = PN->getValueType(0);
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      }
+      continue;
+    }
+    unsigned POpc = PN->getMachineOpcode();
+    if (POpc == TargetOpcode::IMPLICIT_DEF)
+      continue;
+    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+        POpc == TargetOpcode::INSERT_SUBREG ||
+        POpc == TargetOpcode::SUBREG_TO_REG) {
+      EVT VT = PN->getValueType(0);
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      continue;
+    }
+    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+    for (unsigned i = 0; i != NumDefs; ++i) {
+      EVT VT = PN->getValueType(i);
+      if (!PN->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+        // Register pressure tracking is imprecise. This can happen.
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+    }
+  }
+
+  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+  // may transfer data dependencies to CopyToReg.
+  if (SU->NumSuccs && N->isMachineOpcode()) {
+    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+      EVT VT = N->getValueType(i);
+      if (VT == MVT::Glue || VT == MVT::Other)
+        continue;
+      if (!N->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+    }
+  }
+
+  dumpRegPressure();
+}
+
+//===----------------------------------------------------------------------===//
+//           Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+  unsigned MaxHeight = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain succs
+    unsigned Height = I->getSUnit()->getHeight();
+    // If there are bunch of CopyToRegs stacked up, they should be considered
+    // to be at the same position.
+    if (I->getSUnit()->getNode() &&
+        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+      Height = closestSucc(I->getSUnit())+1;
+    if (Height > MaxHeight)
+      MaxHeight = Height;
+  }
+  return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+  unsigned Scratches = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    Scratches++;
+  }
+  return Scratches;
+}
+
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+  bool RetVal = false;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *PredSU = I->getSUnit();
+    if (PredSU->getNode() &&
+        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+      unsigned Reg =
+        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        RetVal = true;
+        continue;
+      }
+    }
+    return false;
+  }
+  return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+  bool RetVal = false;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+      unsigned Reg =
+        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        RetVal = true;
+        continue;
+      }
+    }
+    return false;
+  }
+  return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+  if (DisableSchedVRegCycle)
+    return;
+
+  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+    return;
+
+  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+  SU->isVRegCycle = true;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    I->getSUnit()->isVRegCycle = true;
+  }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+  if (!SU->isVRegCycle)
+    return;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    SUnit *PredSU = I->getSUnit();
+    if (PredSU->isVRegCycle) {
+      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+             "VRegCycle def must be CopyFromReg");
+      I->getSUnit()->isVRegCycle = 0;
+    }
+  }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+  // If this SU also defines the VReg, don't hoist it as a "use".
+  if (SU->isVRegCycle)
+    return false;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (I->getSUnit()->isVRegCycle &&
+        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
+      return true;
+    }
+  }
+  return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+  if ((int)SPQ->getCurCycle() < Height) return true;
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return true;
+  return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+                            RegReductionPQBase *SPQ) {
+  // Scheduling an instruction that uses a VReg whose postincrement has not yet
+  // been scheduled will induce a copy. Model this as an extra cycle of latency.
+  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+  int LHeight = (int)left->getHeight() + LPenalty;
+  int RHeight = (int)right->getHeight() + RPenalty;
+
+  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+    BUHasStall(left, LHeight, SPQ);
+  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+    BUHasStall(right, RHeight, SPQ);
+
+  // If scheduling one of the node will cause a pipeline stall, delay it.
+  // If scheduling either one of the node will cause a pipeline stall, sort
+  // them according to their height.
+  if (LStall) {
+    if (!RStall) {
+      DEBUG(++FactorCount[FactStall]);
+      return 1;
+    }
+    if (LHeight != RHeight) {
+      DEBUG(++FactorCount[FactStall]);
+      return LHeight > RHeight ? 1 : -1;
+    }
+  } else if (RStall) {
+    DEBUG(++FactorCount[FactStall]);
+    return -1;
+  }
+
+  // If either node is scheduling for latency, sort them by height/depth
+  // and latency.
+  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+                     right->SchedulingPref == Sched::Latency)) {
+    if (DisableSchedCycles) {
+      if (LHeight != RHeight) {
+        DEBUG(++FactorCount[FactHeight]);
+        return LHeight > RHeight ? 1 : -1;
+      }
+    }
+    else {
+      // If neither instruction stalls (!LStall && !RStall) then
+      // its height is already covered so only its depth matters. We also reach
+      // this if both stall but have the same height.
+      int LDepth = left->getDepth() - LPenalty;
+      int RDepth = right->getDepth() - RPenalty;
+      if (LDepth != RDepth) {
+        DEBUG(++FactorCount[FactDepth]);
+        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
+              << ") depth " << LDepth << " vs SU (" << right->NodeNum
+              << ") depth " << RDepth << "\n");
+        return LDepth < RDepth ? 1 : -1;
+      }
+    }
+    if (left->Latency != right->Latency) {
+      DEBUG(++FactorCount[FactOther]);
+      return left->Latency > right->Latency ? 1 : -1;
+    }
+  }
+  return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+  // Schedule physical register definitions close to their use. This is
+  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+  // long as shortening physreg live ranges is generally good, we can defer
+  // creating a subtarget hook.
+  if (!DisableSchedPhysRegJoin) {
+    bool LHasPhysReg = left->hasPhysRegDefs;
+    bool RHasPhysReg = right->hasPhysRegDefs;
+    if (LHasPhysReg != RHasPhysReg) {
+      DEBUG(++FactorCount[FactRegUses]);
+      #ifndef NDEBUG
+      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+      #endif
+      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
+            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+            << PhysRegMsg[RHasPhysReg] << "\n");
+      return LHasPhysReg < RHasPhysReg;
+    }
+  }
+
+  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+
+  // Be really careful about hoisting call operands above previous calls.
+  // Only allows it if it would reduce register pressure.
+  if (left->isCall && right->isCallOp) {
+    unsigned RNumVals = right->getNode()->getNumValues();
+    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+  }
+  if (right->isCall && left->isCallOp) {
+    unsigned LNumVals = left->getNode()->getNumValues();
+    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+  }
+
+  if (LPriority != RPriority) {
+    DEBUG(++FactorCount[FactStatic]);
+    return LPriority > RPriority;
+  }
+
+  // One or both of the nodes are calls and their sethi-ullman numbers are the
+  // same, then keep source order.
+  if (left->isCall || right->isCall) {
+    unsigned LOrder = SPQ->getNodeOrdering(left);
+    unsigned ROrder = SPQ->getNodeOrdering(right);
+
+    // Prefer an ordering where the lower the non-zero order number, the higher
+    // the preference.
+    if ((LOrder || ROrder) && LOrder != ROrder)
+      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+  }
+
+  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+  // e.g.
+  // t1 = op t2, c1
+  // t3 = op t4, c2
+  //
+  // and the following instructions are both ready.
+  // t2 = op c3
+  // t4 = op c4
+  //
+  // Then schedule t2 = op first.
+  // i.e.
+  // t4 = op c4
+  // t2 = op c3
+  // t1 = op t2, c1
+  // t3 = op t4, c2
+  //
+  // This creates more short live intervals.
+  unsigned LDist = closestSucc(left);
+  unsigned RDist = closestSucc(right);
+  if (LDist != RDist) {
+    DEBUG(++FactorCount[FactOther]);
+    return LDist < RDist;
+  }
+
+  // How many registers becomes live when the node is scheduled.
+  unsigned LScratch = calcMaxScratches(left);
+  unsigned RScratch = calcMaxScratches(right);
+  if (LScratch != RScratch) {
+    DEBUG(++FactorCount[FactOther]);
+    return LScratch > RScratch;
+  }
+
+  // Comparing latency against a call makes little sense unless the node
+  // is register pressure-neutral.
+  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+    return (left->NodeQueueId > right->NodeQueueId);
+
+  // Do not compare latencies when one or both of the nodes are calls.
+  if (!DisableSchedCycles &&
+      !(left->isCall || right->isCall)) {
+    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
+  }
+  else {
+    if (left->getHeight() != right->getHeight()) {
+      DEBUG(++FactorCount[FactHeight]);
+      return left->getHeight() > right->getHeight();
+    }
+
+    if (left->getDepth() != right->getDepth()) {
+      DEBUG(++FactorCount[FactDepth]);
+      return left->getDepth() < right->getDepth();
+    }
+  }
+
+  assert(left->NodeQueueId && right->NodeQueueId &&
+         "NodeQueueId cannot be zero");
+  DEBUG(++FactorCount[FactOther]);
+  return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
+  return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
+  unsigned LOrder = SPQ->getNodeOrdering(left);
+  unsigned ROrder = SPQ->getNodeOrdering(right);
+
+  // Prefer an ordering where the lower the non-zero order number, the higher
+  // the preference.
+  if ((LOrder || ROrder) && LOrder != ROrder)
+    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+  return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  static const unsigned ReadyDelay = 3;
+
+  if (SPQ->MayReduceRegPressure(SU)) return true;
+
+  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
+  bool LHigh = SPQ->HighRegPressure(left);
+  bool RHigh = SPQ->HighRegPressure(right);
+  // Avoid causing spills. If register pressure is high, schedule for
+  // register pressure reduction.
+  if (LHigh && !RHigh) {
+    DEBUG(++FactorCount[FactPressureDiff]);
+    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
+          << right->NodeNum << ")\n");
+    return true;
+  }
+  else if (!LHigh && RHigh) {
+    DEBUG(++FactorCount[FactPressureDiff]);
+    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
+          << left->NodeNum << ")\n");
+    return false;
+  }
+  if (!LHigh && !RHigh) {
+    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
+  }
+  return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  if (SU->getHeight() > CurCycle) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    // CopyToReg should be close to its uses to facilitate coalescing and
+    // avoid spilling.
+    return true;
+
+  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG ||
+      Opc == TargetOpcode::INSERT_SUBREG)
+    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+    // close to their uses to facilitate coalescing.
+    return true;
+
+  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+    // If SU does not have a register def, schedule it close to its uses
+    // because it does not lengthen any live ranges.
+    return true;
+
+  return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
+  unsigned LLiveUses = 0, RLiveUses = 0;
+  int LPDiff = 0, RPDiff = 0;
+  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+  }
+  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+    DEBUG(++FactorCount[FactPressureDiff]);
+    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+    return LPDiff > RPDiff;
+  }
+
+  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+    bool LReduce = canEnableCoalescing(left);
+    bool RReduce = canEnableCoalescing(right);
+    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
+    if (LReduce && !RReduce) return false;
+    if (RReduce && !LReduce) return true;
+  }
+
+  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+    DEBUG(++FactorCount[FactRegUses]);
+    return LLiveUses < RLiveUses;
+  }
+
+  if (!DisableSchedStalls) {
+    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+    if (LStall != RStall) {
+      DEBUG(++FactorCount[FactHeight]);
+      return left->getHeight() > right->getHeight();
+    }
+  }
+
+  if (!DisableSchedCriticalPath) {
+    int spread = (int)left->getDepth() - (int)right->getDepth();
+    if (std::abs(spread) > MaxReorderWindow) {
+      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+            << left->getDepth() << " != SU(" << right->NodeNum << "): "
+            << right->getDepth() << "\n");
+      DEBUG(++FactorCount[FactDepth]);
+      return left->getDepth() < right->getDepth();
+    }
+  }
+
+  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+    int spread = (int)left->getHeight() - (int)right->getHeight();
+    if (std::abs(spread) > MaxReorderWindow) {
+      DEBUG(++FactorCount[FactHeight]);
+      return left->getHeight() > right->getHeight();
+    }
+  }
+
+  return BURRSort(left, right, SPQ);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  // Add pseudo dependency edges for two-address nodes.
+  AddPseudoTwoAddrDeps();
+  // Reroute edges to nodes with multiple uses.
+  if (!TracksRegPressure)
+    PrescheduleNodesWithMultipleUses();
+  // Calculate node priorities.
+  CalculateSethiUllmanNumbers();
+
+  // For single block loops, mark nodes that look like canonical IV increments.
+  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+      initVRegCycle(&sunits[i]);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                    Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+  if (SU->isTwoAddress) {
+    unsigned Opc = SU->getNode()->getMachineOpcode();
+    const MCInstrDesc &MCID = TII->get(Opc);
+    unsigned NumRes = MCID.getNumDefs();
+    unsigned NumOps = MCID.getNumOperands() - NumRes;
+    for (unsigned i = 0; i != NumOps; ++i) {
+      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
+        SDNode *DU = SU->getNode()->getOperand(i).getNode();
+        if (DU->getNodeId() != -1 &&
+            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+                                  const TargetInstrInfo *TII,
+                                  const TargetRegisterInfo *TRI) {
+  SDNode *N = SuccSU->getNode();
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+  assert(ImpDefs && "Caller should check hasPhysRegDefs");
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getGluedNode()) {
+    if (!SUNode->isMachineOpcode())
+      continue;
+    const unsigned *SUImpDefs =
+      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+    if (!SUImpDefs)
+      return false;
+    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+      EVT VT = N->getValueType(i);
+      if (VT == MVT::Glue || VT == MVT::Other)
+        continue;
+      if (!N->hasAnyUseOfValue(i))
+        continue;
+      unsigned Reg = ImpDefs[i - NumDefs];
+      for (;*SUImpDefs; ++SUImpDefs) {
+        unsigned SUReg = *SUImpDefs;
+        if (TRI->regsOverlap(Reg, SUReg))
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+///      N
+///    / |
+///   /  |
+///  U  store
+///  |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+///      N
+///      ||
+///      ||
+///     store
+///       |
+///       U
+///       |
+///      ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+  // Visit all the nodes in topological order, working top-down.
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    // For now, only look at nodes with no data successors, such as stores.
+    // These are especially important, due to the heuristics in
+    // getNodePriority for nodes with no data successors.
+    if (SU->NumSuccs != 0)
+      continue;
+    // For now, only look at nodes with exactly one data predecessor.
+    if (SU->NumPreds != 1)
+      continue;
+    // Avoid prescheduling copies to virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyToReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Locate the single data predecessor.
+    SUnit *PredSU = 0;
+    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+         EE = SU->Preds.end(); II != EE; ++II)
+      if (!II->isCtrl()) {
+        PredSU = II->getSUnit();
+        break;
+      }
+    assert(PredSU);
+
+    // Don't rewrite edges that carry physregs, because that requires additional
+    // support infrastructure.
+    if (PredSU->hasPhysRegDefs)
+      continue;
+    // Short-circuit the case where SU is PredSU's only data successor.
+    if (PredSU->NumSuccs == 1)
+      continue;
+    // Avoid prescheduling to copies from virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyFromReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Perform checks on the successors of PredSU.
+    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+         EE = PredSU->Succs.end(); II != EE; ++II) {
+      SUnit *PredSuccSU = II->getSUnit();
+      if (PredSuccSU == SU) continue;
+      // If PredSU has another successor with no data successors, for
+      // now don't attempt to choose either over the other.
+      if (PredSuccSU->NumSuccs == 0)
+        goto outer_loop_continue;
+      // Don't break physical register dependencies.
+      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+          goto outer_loop_continue;
+      // Don't introduce graph cycles.
+      if (scheduleDAG->IsReachable(SU, PredSuccSU))
+        goto outer_loop_continue;
+    }
+
+    // Ok, the transformation is safe and the heuristics suggest it is
+    // profitable. Update the graph.
+    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
+                 << " next to PredSU #" << PredSU->NodeNum
+                 << " to guide scheduling in the presence of multiple uses\n");
+    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+      SDep Edge = PredSU->Succs[i];
+      assert(!Edge.isAssignedRegDep());
+      SUnit *SuccSU = Edge.getSUnit();
+      if (SuccSU != SU) {
+        Edge.setSUnit(PredSU);
+        scheduleDAG->RemovePred(SuccSU, Edge);
+        scheduleDAG->AddPred(SU, Edge);
+        Edge.setSUnit(SU);
+        scheduleDAG->AddPred(SuccSU, Edge);
+        --i;
+      }
+    }
+  outer_loop_continue:;
+  }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    if (!SU->isTwoAddress)
+      continue;
+
+    SDNode *Node = SU->getNode();
+    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
+      continue;
+
+    bool isLiveOut = hasOnlyLiveOutUses(SU);
+    unsigned Opc = Node->getMachineOpcode();
+    const MCInstrDesc &MCID = TII->get(Opc);
+    unsigned NumRes = MCID.getNumDefs();
+    unsigned NumOps = MCID.getNumOperands() - NumRes;
+    for (unsigned j = 0; j != NumOps; ++j) {
+      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
+        continue;
+      SDNode *DU = SU->getNode()->getOperand(j).getNode();
+      if (DU->getNodeId() == -1)
+        continue;
+      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+      if (!DUSU) continue;
+      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+           E = DUSU->Succs.end(); I != E; ++I) {
+        if (I->isCtrl()) continue;
+        SUnit *SuccSU = I->getSUnit();
+        if (SuccSU == SU)
+          continue;
+        // Be conservative. Ignore if nodes aren't at roughly the same
+        // depth and height.
+        if (SuccSU->getHeight() < SU->getHeight() &&
+            (SU->getHeight() - SuccSU->getHeight()) > 1)
+          continue;
+        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+        // constrains whatever is using the copy, instead of the copy
+        // itself. In the case that the copy is coalesced, this
+        // preserves the intent of the pseudo two-address heurietics.
+        while (SuccSU->Succs.size() == 1 &&
+               SuccSU->getNode()->isMachineOpcode() &&
+               SuccSU->getNode()->getMachineOpcode() ==
+                 TargetOpcode::COPY_TO_REGCLASS)
+          SuccSU = SuccSU->Succs.front().getSUnit();
+        // Don't constrain non-instruction nodes.
+        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+          continue;
+        // Don't constrain nodes with physical register defs if the
+        // predecessor can clobber them.
+        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+            continue;
+        }
+        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+        // these may be coalesced away. We want them close to their uses.
+        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+            SuccOpc == TargetOpcode::INSERT_SUBREG ||
+            SuccOpc == TargetOpcode::SUBREG_TO_REG)
+          continue;
+        if ((!canClobber(SuccSU, DUSU) ||
+             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+             (!SU->isCommutable && SuccSU->isCommutable)) &&
+            !scheduleDAG->IsReachable(SuccSU, SU)) {
+          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
+                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
+                                        /*Reg=*/0, /*isNormalMemory=*/false,
+                                        /*isMustAlias=*/false,
+                                        /*isArtificial=*/true));
+        }
+      }
+    }
+  }
+}
+
+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
+/// predecessors of the successors of the SUnit SU. Stop when the provided
+/// limit is exceeded.
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+                                                    unsigned Limit) {
+  unsigned Sum = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    const SUnit *SuccSU = I->getSUnit();
+    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+         EE = SuccSU->Preds.end(); II != EE; ++II) {
+      SUnit *PredSU = II->getSUnit();
+      if (!PredSU->isScheduled)
+        if (++Sum > Limit)
+          return Sum;
+    }
+  }
+  return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res < 0;
+
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
+  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
+  bool LIsFloater = LIsTarget && left->NumPreds == 0;
+  bool RIsFloater = RIsTarget && right->NumPreds == 0;
+  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
+  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
+
+  if (left->NumSuccs == 0 && right->NumSuccs != 0)
+    return false;
+  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+    return true;
+
+  if (LIsFloater)
+    LBonus -= 2;
+  if (RIsFloater)
+    RBonus -= 2;
+  if (left->NumSuccs == 1)
+    LBonus += 2;
+  if (right->NumSuccs == 1)
+    RBonus += 2;
+
+  if (LPriority+LBonus != RPriority+RBonus)
+    return LPriority+LBonus < RPriority+RBonus;
+
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
+
+  if (left->NumSuccsLeft != right->NumSuccsLeft)
+    return left->NumSuccsLeft > right->NumSuccsLeft;
+
+  assert(left->NodeQueueId && right->NodeQueueId &&
+         "NodeQueueId cannot be zero");
+  return (left->NodeQueueId > right->NodeQueueId);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  BURegReductionPriorityQueue *PQ =
+    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  TDRegReductionPriorityQueue *PQ =
+    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  SrcRegReductionPriorityQueue *PQ =
+    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  const TargetLowering *TLI = &IS->getTargetLowering();
+
+  HybridBURRPriorityQueue *PQ =
+    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+                                CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  const TargetLowering *TLI = &IS->getTargetLowering();
+
+  ILPBURRPriorityQueue *PQ =
+    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 000000000000..71f07d6fa47a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,802 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "SDNodeDbgValue.h"
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+  "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+  cl::desc("Roughly estimate the number of cycles that 'long latency'"
+           "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+  : ScheduleDAG(mf),
+    InstrItins(mf.getTarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator insertPos) {
+  DAG = dag;
+  ScheduleDAG::Run(bb, insertPos);
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+  const SUnit *Addr = 0;
+  if (!SUnits.empty())
+    Addr = &SUnits[0];
+#endif
+  SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+  assert((Addr == 0 || Addr == &SUnits[0]) &&
+         "SUnits std::vector reallocated on the fly!");
+  SUnits.back().OrigNode = &SUnits.back();
+  SUnit *SU = &SUnits.back();
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  if (!N ||
+      (N->isMachineOpcode() &&
+       N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+    SU->SchedulingPref = Sched::None;
+  else
+    SU->SchedulingPref = TLI.getSchedulingPreference(N);
+  return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+  SUnit *SU = NewSUnit(Old->getNode());
+  SU->OrigNode = Old->OrigNode;
+  SU->Latency = Old->Latency;
+  SU->isVRegCycle = Old->isVRegCycle;
+  SU->isCall = Old->isCall;
+  SU->isCallOp = Old->isCallOp;
+  SU->isTwoAddress = Old->isTwoAddress;
+  SU->isCommutable = Old->isCommutable;
+  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+  SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+  SU->isScheduleHigh = Old->isScheduleHigh;
+  SU->isScheduleLow = Old->isScheduleLow;
+  SU->SchedulingPref = Old->SchedulingPref;
+  Old->isCloned = true;
+  return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+                                      const TargetRegisterInfo *TRI,
+                                      const TargetInstrInfo *TII,
+                                      unsigned &PhysReg, int &Cost) {
+  if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+    return;
+
+  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return;
+
+  unsigned ResNo = User->getOperand(2).getResNo();
+  if (Def->isMachineOpcode()) {
+    const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
+    if (ResNo >= II.getNumDefs() &&
+        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+      PhysReg = Reg;
+      const TargetRegisterClass *RC =
+        TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
+      Cost = RC->getCopyCost();
+    }
+  }
+}
+
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+  SmallVector<EVT, 4> VTs;
+  SDNode *GlueDestNode = Glue.getNode();
+
+  // Don't add glue from a node to itself.
+  if (GlueDestNode == N) return;
+
+  // Don't add glue to something which already has glue.
+  if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
+
+  for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+    VTs.push_back(N->getValueType(I));
+
+  if (AddGlue)
+    VTs.push_back(MVT::Glue);
+
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+    Ops.push_back(N->getOperand(I));
+
+  if (GlueDestNode)
+    Ops.push_back(Glue);
+
+  SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+  MachineSDNode::mmo_iterator Begin = 0, End = 0;
+  MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+  // Store memory references.
+  if (MN) {
+    Begin = MN->memoperands_begin();
+    End = MN->memoperands_end();
+  }
+
+  DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+  // Reset the memory references
+  if (MN)
+    MN->setMemRefs(Begin, End);
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+  SDNode *Chain = 0;
+  unsigned NumOps = Node->getNumOperands();
+  if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+    Chain = Node->getOperand(NumOps-1).getNode();
+  if (!Chain)
+    return;
+
+  // Look for other loads of the same chain. Find loads that are loading from
+  // the same base pointer and different offsets.
+  SmallPtrSet<SDNode*, 16> Visited;
+  SmallVector<int64_t, 4> Offsets;
+  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
+  bool Cluster = false;
+  SDNode *Base = Node;
+  for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+       I != E; ++I) {
+    SDNode *User = *I;
+    if (User == Node || !Visited.insert(User))
+      continue;
+    int64_t Offset1, Offset2;
+    if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+        Offset1 == Offset2)
+      // FIXME: Should be ok if they addresses are identical. But earlier
+      // optimizations really should have eliminated one of the loads.
+      continue;
+    if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+      Offsets.push_back(Offset1);
+    O2SMap.insert(std::make_pair(Offset2, User));
+    Offsets.push_back(Offset2);
+    if (Offset2 < Offset1)
+      Base = User;
+    Cluster = true;
+  }
+
+  if (!Cluster)
+    return;
+
+  // Sort them in increasing order.
+  std::sort(Offsets.begin(), Offsets.end());
+
+  // Check if the loads are close enough.
+  SmallVector<SDNode*, 4> Loads;
+  unsigned NumLoads = 0;
+  int64_t BaseOff = Offsets[0];
+  SDNode *BaseLoad = O2SMap[BaseOff];
+  Loads.push_back(BaseLoad);
+  for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+    int64_t Offset = Offsets[i];
+    SDNode *Load = O2SMap[Offset];
+    if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+      break; // Stop right here. Ignore loads that are further away.
+    Loads.push_back(Load);
+    ++NumLoads;
+  }
+
+  if (NumLoads == 0)
+    return;
+
+  // Cluster loads by adding MVT::Glue outputs and inputs. This also
+  // ensure they are scheduled in order of increasing addresses.
+  SDNode *Lead = Loads[0];
+  AddGlue(Lead, SDValue(0, 0), true, DAG);
+
+  SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+  for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+    bool OutGlue = I < E - 1;
+    SDNode *Load = Loads[I];
+
+    AddGlue(Load, InGlue, OutGlue, DAG);
+
+    if (OutGlue)
+      InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+    ++LoadsClustered;
+  }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    SDNode *Node = &*NI;
+    if (!Node || !Node->isMachineOpcode())
+      continue;
+
+    unsigned Opc = Node->getMachineOpcode();
+    const MCInstrDesc &MCID = TII->get(Opc);
+    if (MCID.mayLoad())
+      // Cluster loads from "near" addresses into combined SUnits.
+      ClusterNeighboringLoads(Node);
+  }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+  // During scheduling, the NodeId field of SDNode is used to map SDNodes
+  // to their associated SUnits by holding SUnits table indices. A value
+  // of -1 means the SDNode does not yet have an associated SUnit.
+  unsigned NumNodes = 0;
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    NI->setNodeId(-1);
+    ++NumNodes;
+  }
+
+  // Reserve entries in the vector for each of the SUnits we are creating.  This
+  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+  // invalidated.
+  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+  // This is a temporary workaround.
+  SUnits.reserve(NumNodes * 2);
+
+  // Add all nodes in depth first order.
+  SmallVector<SDNode*, 64> Worklist;
+  SmallPtrSet<SDNode*, 64> Visited;
+  Worklist.push_back(DAG->getRoot().getNode());
+  Visited.insert(DAG->getRoot().getNode());
+
+  SmallVector<SUnit*, 8> CallSUnits;
+  while (!Worklist.empty()) {
+    SDNode *NI = Worklist.pop_back_val();
+
+    // Add all operands to the worklist unless they've already been added.
+    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+      if (Visited.insert(NI->getOperand(i).getNode()))
+        Worklist.push_back(NI->getOperand(i).getNode());
+
+    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
+      continue;
+
+    // If this node has already been processed, stop now.
+    if (NI->getNodeId() != -1) continue;
+
+    SUnit *NodeSUnit = NewSUnit(NI);
+
+    // See if anything is glued to this node, if so, add them to glued
+    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
+    // is required to be the last operand and result of a node.
+
+    // Scan up to find glued preds.
+    SDNode *N = NI;
+    while (N->getNumOperands() &&
+           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+      N = N->getOperand(N->getNumOperands()-1).getNode();
+      assert(N->getNodeId() == -1 && "Node already inserted!");
+      N->setNodeId(NodeSUnit->NodeNum);
+      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+        NodeSUnit->isCall = true;
+    }
+
+    // Scan down to find any glued succs.
+    N = NI;
+    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+      SDValue GlueVal(N, N->getNumValues()-1);
+
+      // There are either zero or one users of the Glue result.
+      bool HasGlueUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+           UI != E; ++UI)
+        if (GlueVal.isOperandOf(*UI)) {
+          HasGlueUse = true;
+          assert(N->getNodeId() == -1 && "Node already inserted!");
+          N->setNodeId(NodeSUnit->NodeNum);
+          N = *UI;
+          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+            NodeSUnit->isCall = true;
+          break;
+        }
+      if (!HasGlueUse) break;
+    }
+
+    if (NodeSUnit->isCall)
+      CallSUnits.push_back(NodeSUnit);
+
+    // Schedule zero-latency TokenFactor below any nodes that may increase the
+    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+    // have false stalls.
+    if (NI->getOpcode() == ISD::TokenFactor)
+      NodeSUnit->isScheduleLow = true;
+
+    // If there are glue operands involved, N is now the bottom-most node
+    // of the sequence of nodes that are glued together.
+    // Update the SUnit.
+    NodeSUnit->setNode(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NodeSUnit->NodeNum);
+
+    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+    InitNumRegDefsLeft(NodeSUnit);
+
+    // Assign the Latency field of NodeSUnit using target-provided information.
+    ComputeLatency(NodeSUnit);
+  }
+
+  // Find all call operands.
+  while (!CallSUnits.empty()) {
+    SUnit *SU = CallSUnits.pop_back_val();
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->getOpcode() != ISD::CopyToReg)
+        continue;
+      SDNode *SrcN = SUNode->getOperand(2).getNode();
+      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
+      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+      SrcSU->isCallOp = true;
+    }
+  }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
+  // Pass 2: add the preds, succs, etc.
+  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+    SUnit *SU = &SUnits[su];
+    SDNode *MainNode = SU->getNode();
+
+    if (MainNode->isMachineOpcode()) {
+      unsigned Opc = MainNode->getMachineOpcode();
+      const MCInstrDesc &MCID = TII->get(Opc);
+      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+          SU->isTwoAddress = true;
+          break;
+        }
+      }
+      if (MCID.isCommutable())
+        SU->isCommutable = true;
+    }
+
+    // Find all predecessors and successors of the group.
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+      if (N->isMachineOpcode() &&
+          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+        SU->hasPhysRegClobbers = true;
+        unsigned NumUsed = InstrEmitter::CountResults(N);
+        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+          --NumUsed;    // Skip over unused values at the end.
+        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+          SU->hasPhysRegDefs = true;
+      }
+
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        SDNode *OpN = N->getOperand(i).getNode();
+        if (isPassiveNode(OpN)) continue;   // Not scheduled.
+        SUnit *OpSU = &SUnits[OpN->getNodeId()];
+        assert(OpSU && "Node has no SUnit!");
+        if (OpSU == SU) continue;           // In the same group.
+
+        EVT OpVT = N->getOperand(i).getValueType();
+        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+        bool isChain = OpVT == MVT::Other;
+
+        unsigned PhysReg = 0;
+        int Cost = 1;
+        // Determine if this is a physical register dependency.
+        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+        assert((PhysReg == 0 || !isChain) &&
+               "Chain dependence via physreg data?");
+        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+        // emits a copy from the physical register to a virtual register unless
+        // it requires a cross class copy (cost < 0). That means we are only
+        // treating "expensive to copy" register dependency as physical register
+        // dependency. This may change in the future though.
+        if (Cost >= 0 && !StressSched)
+          PhysReg = 0;
+
+        // If this is a ctrl dep, latency is 1.
+        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+        // Special-case TokenFactor chains as zero-latency.
+        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+          OpLatency = 0;
+
+        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+                               OpLatency, PhysReg);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
+        }
+
+        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+          // Multiple register uses are combined in the same SUnit. For example,
+          // we could have a set of glued nodes with all their defs consumed by
+          // another set of glued nodes. Register pressure tracking sees this as
+          // a single use, so to keep pressure balanced we reduce the defs.
+          //
+          // We can't tell (without more book-keeping) if this results from
+          // glued nodes or duplicate operands. As long as we don't reduce
+          // NumRegDefsLeft to zero, we handle the common cases well.
+          --OpSU->NumRegDefsLeft;
+        }
+      }
+    }
+  }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input.  This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+  // Cluster certain nodes which should be scheduled together.
+  ClusterNodes();
+  // Populate the SUnits array.
+  BuildSchedUnits();
+  // Compute all the scheduling dependencies between nodes.
+  AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+  // Check for phys reg copy.
+  if (!Node)
+    return;
+
+  if (!Node->isMachineOpcode()) {
+    if (Node->getOpcode() == ISD::CopyFromReg)
+      NodeNumDefs = 1;
+    else
+      NodeNumDefs = 0;
+    return;
+  }
+  unsigned POpc = Node->getMachineOpcode();
+  if (POpc == TargetOpcode::IMPLICIT_DEF) {
+    // No register need be allocated for this.
+    NodeNumDefs = 0;
+    return;
+  }
+  unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+  // Some instructions define regs that are not represented in the selection DAG
+  // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+  NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+  DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+                                           const ScheduleDAGSDNodes *SD)
+  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+  InitNodeNumDefs();
+  Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+  for (;Node;) { // Visit all glued nodes.
+    for (;DefIdx < NodeNumDefs; ++DefIdx) {
+      if (!Node->hasAnyUseOfValue(DefIdx))
+        continue;
+      ValueType = Node->getValueType(DefIdx);
+      ++DefIdx;
+      return; // Found a normal regdef.
+    }
+    Node = Node->getGluedNode();
+    if (Node == NULL) {
+      return; // No values left to visit.
+    }
+    InitNodeNumDefs();
+  }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+  assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+  for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+    assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+    ++SU->NumRegDefsLeft;
+  }
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+  SDNode *N = SU->getNode();
+
+  // TokenFactor operands are considered zero latency, and some schedulers
+  // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+  // whenever node latency is nonzero.
+  if (N && N->getOpcode() == ISD::TokenFactor) {
+    SU->Latency = 0;
+    return;
+  }
+
+  // Check to see if the scheduler cares about latencies.
+  if (ForceUnitLatencies()) {
+    SU->Latency = 1;
+    return;
+  }
+
+  if (!InstrItins || InstrItins->isEmpty()) {
+    if (N && N->isMachineOpcode() &&
+        TII->isHighLatencyDef(N->getMachineOpcode()))
+      SU->Latency = HighLatencyCycles;
+    else
+      SU->Latency = 1;
+    return;
+  }
+
+  // Compute the latency for the node.  We use the sum of the latencies for
+  // all nodes glued together into this SUnit.
+  SU->Latency = 0;
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+    if (N->isMachineOpcode())
+      SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+                                               unsigned OpIdx, SDep& dep) const{
+  // Check to see if the scheduler cares about latencies.
+  if (ForceUnitLatencies())
+    return;
+
+  if (dep.getKind() != SDep::Data)
+    return;
+
+  unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+  if (Use->isMachineOpcode())
+    // Adjust the use operand index by num of defs.
+    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+  if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+      !BB->succ_empty()) {
+    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      // This copy is a liveout value. It is likely coalesced, so reduce the
+      // latency so not to penalize the def.
+      // FIXME: need target specific adjustment here?
+      Latency = (Latency > 1) ? Latency - 1 : 1;
+  }
+  if (Latency >= 0)
+    dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+  if (!SU->getNode()) {
+    dbgs() << "PHYS REG COPY\n";
+    return;
+  }
+
+  SU->getNode()->dump(DAG);
+  dbgs() << "\n";
+  SmallVector<SDNode *, 4> GluedNodes;
+  for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+    GluedNodes.push_back(N);
+  while (!GluedNodes.empty()) {
+    dbgs() << "    ";
+    GluedNodes.back()->dump(DAG);
+    dbgs() << "\n";
+    GluedNodes.pop_back();
+  }
+}
+
+namespace {
+  struct OrderSorter {
+    bool operator()(const std::pair<unsigned, MachineInstr*> &A,
+                    const std::pair<unsigned, MachineInstr*> &B) {
+      return A.first < B.first;
+    }
+  };
+}
+
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+                               InstrEmitter &Emitter,
+                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+                            DenseMap<SDValue, unsigned> &VRBaseMap,
+                            unsigned Order) {
+  if (!N->getHasDebugValue())
+    return;
+
+  // Opportunistically insert immediate dbg_value uses, i.e. those with source
+  // order number right after the N.
+  MachineBasicBlock *BB = Emitter.getBlock();
+  MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+  ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
+  for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+    if (DVs[i]->isInvalidated())
+      continue;
+    unsigned DVOrder = DVs[i]->getOrder();
+    if (!Order || DVOrder == ++Order) {
+      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+      if (DbgMI) {
+        Orders.push_back(std::make_pair(DVOrder, DbgMI));
+        BB->insert(InsertPos, DbgMI);
+      }
+      DVs[i]->setIsInvalidated();
+    }
+  }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+                           InstrEmitter &Emitter,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+                           SmallSet<unsigned, 8> &Seen) {
+  unsigned Order = DAG->GetOrdering(N);
+  if (!Order || !Seen.insert(Order)) {
+    // Process any valid SDDbgValues even if node does not have any order
+    // assigned.
+    ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+    return;
+  }
+
+  MachineBasicBlock *BB = Emitter.getBlock();
+  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+    // Did not insert any instruction.
+    Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+    return;
+  }
+
+  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+  ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+  InstrEmitter Emitter(BB, InsertPos);
+  DenseMap<SDValue, unsigned> VRBaseMap;
+  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+  SmallSet<unsigned, 8> Seen;
+  bool HasDbg = DAG->hasDebugValues();
+
+  // If this is the first BB, emit byval parameter dbg_value's.
+  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+    for (; PDI != PDE; ++PDI) {
+      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+      if (DbgMI)
+        BB->insert(InsertPos, DbgMI);
+    }
+  }
+
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    SUnit *SU = Sequence[i];
+    if (!SU) {
+      // Null SUnit* is a noop.
+      EmitNoop();
+      continue;
+    }
+
+    // For pre-regalloc scheduling, create instructions corresponding to the
+    // SDNode and any glued SDNodes and append them to the block.
+    if (!SU->getNode()) {
+      // Emit a copy.
+      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      continue;
+    }
+
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode()->getGluedNode(); N;
+         N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
+      SDNode *N = GluedNodes.back();
+      Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+                       VRBaseMap);
+      // Remember the source order of the inserted instruction.
+      if (HasDbg)
+        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+      GluedNodes.pop_back();
+    }
+    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+                     VRBaseMap);
+    // Remember the source order of the inserted instruction.
+    if (HasDbg)
+      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
+                        Seen);
+  }
+
+  // Insert all the dbg_values which have not already been inserted in source
+  // order sequence.
+  if (HasDbg) {
+    MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+    // Sort the source order instructions and use the order to insert debug
+    // values.
+    std::sort(Orders.begin(), Orders.end(), OrderSorter());
+
+    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+    // Now emit the rest according to source order.
+    unsigned LastOrder = 0;
+    for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+      unsigned Order = Orders[i].first;
+      MachineInstr *MI = Orders[i].second;
+      // Insert all SDDbgValue's whose order(s) are before "Order".
+      if (!MI)
+        continue;
+      for (; DI != DE &&
+             (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+        if ((*DI)->isInvalidated())
+          continue;
+        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI) {
+          if (!LastOrder)
+            // Insert to start of the BB (after PHIs).
+            BB->insert(BBBegin, DbgMI);
+          else {
+            // Insert at the instruction, which may be in a different
+            // block, if the block was split by a custom inserter.
+            MachineBasicBlock::iterator Pos = MI;
+            MI->getParent()->insert(llvm::next(Pos), DbgMI);
+          }
+        }
+      }
+      LastOrder = Order;
+    }
+    // Add trailing DbgValue's before the terminator. FIXME: May want to add
+    // some of them before one or more conditional branches?
+    while (DI != DE) {
+      MachineBasicBlock *InsertBB = Emitter.getBlock();
+      MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
+      if (!(*DI)->isInvalidated()) {
+        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI)
+          InsertBB->insert(Pos, DbgMI);
+      }
+      ++DI;
+    }
+  }
+
+  BB = Emitter.getBlock();
+  InsertPos = Emitter.getInsertPos();
+  return BB;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 000000000000..9c27b2ea02ec
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,165 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+  /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+  ///
+  /// Edges between SUnits are initially based on edges in the SelectionDAG,
+  /// and additional edges can be added by the schedulers as heuristics.
+  /// SDNodes such as Constants, Registers, and a few others that are not
+  /// interesting to schedulers are not allocated SUnits.
+  ///
+  /// SDNodes with MVT::Glue operands are grouped along with the flagged
+  /// nodes into a single SUnit so that they are scheduled together.
+  ///
+  /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+  /// edges.  Physical register dependence information is not carried in
+  /// the DAG and must be handled explicitly by schedulers.
+  ///
+  class ScheduleDAGSDNodes : public ScheduleDAG {
+  public:
+    SelectionDAG *DAG;                    // DAG of the current basic block
+    const InstrItineraryData *InstrItins;
+
+    explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+    virtual ~ScheduleDAGSDNodes() {}
+
+    /// Run - perform scheduling.
+    ///
+    void Run(SelectionDAG *dag, MachineBasicBlock *bb,
+             MachineBasicBlock::iterator insertPos);
+
+    /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+    ///
+    static bool isPassiveNode(SDNode *Node) {
+      if (isa<ConstantSDNode>(Node))       return true;
+      if (isa<ConstantFPSDNode>(Node))     return true;
+      if (isa<RegisterSDNode>(Node))       return true;
+      if (isa<GlobalAddressSDNode>(Node))  return true;
+      if (isa<BasicBlockSDNode>(Node))     return true;
+      if (isa<FrameIndexSDNode>(Node))     return true;
+      if (isa<ConstantPoolSDNode>(Node))   return true;
+      if (isa<JumpTableSDNode>(Node))      return true;
+      if (isa<ExternalSymbolSDNode>(Node)) return true;
+      if (isa<BlockAddressSDNode>(Node))   return true;
+      if (Node->getOpcode() == ISD::EntryToken ||
+          isa<MDNodeSDNode>(Node)) return true;
+      return false;
+    }
+
+    /// NewSUnit - Creates a new SUnit and return a ptr to it.
+    ///
+    SUnit *NewSUnit(SDNode *N);
+
+    /// Clone - Creates a clone of the specified SUnit. It does not copy the
+    /// predecessors / successors info nor the temporary scheduling states.
+    ///
+    SUnit *Clone(SUnit *N);
+
+    /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+    /// are input.  This SUnit graph is similar to the SelectionDAG, but
+    /// excludes nodes that aren't interesting to scheduling, and represents
+    /// flagged together nodes with a single SUnit.
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+    /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
+    /// CopyToReg and its only active data operands are CopyFromReg within a
+    /// single block loop.
+    ///
+    void InitVRegCycleFlag(SUnit *SU);
+
+    /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+    ///
+    void InitNumRegDefsLeft(SUnit *SU);
+
+    /// ComputeLatency - Compute node latency.
+    ///
+    virtual void ComputeLatency(SUnit *SU);
+
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const { }
+
+    virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+                                       unsigned OpIdx, SDep& dep) const;
+
+    virtual MachineBasicBlock *EmitSchedule();
+
+    /// Schedule - Order nodes according to selected style, filling
+    /// in the Sequence member.
+    ///
+    virtual void Schedule() = 0;
+
+    virtual void dumpNode(const SUnit *SU) const;
+
+    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+    virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+    /// RegDefIter - In place iteration over the values defined by an
+    /// SUnit. This does not need copies of the iterator or any other STLisms.
+    /// The iterator creates itself, rather than being provided by the SchedDAG.
+    class RegDefIter {
+      const ScheduleDAGSDNodes *SchedDAG;
+      const SDNode *Node;
+      unsigned DefIdx;
+      unsigned NodeNumDefs;
+      EVT ValueType;
+    public:
+      RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+      bool IsValid() const { return Node != NULL; }
+
+      EVT GetValue() const {
+        assert(IsValid() && "bad iterator");
+        return ValueType;
+      }
+
+      const SDNode *GetNode() const {
+        return Node;
+      }
+
+      unsigned GetIdx() const {
+        return DefIdx-1;
+      }
+
+      void Advance();
+    private:
+      void InitNodeNumDefs();
+    };
+
+  private:
+    /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+    /// combined SUnits.
+    void ClusterNeighboringLoads(SDNode *Node);
+    /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+    ///
+    void ClusterNodes();
+
+    /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+    void BuildSchedUnits();
+    void AddSchedEdges();
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 000000000000..35ea0bb940b5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,6655 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeOrdering.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+  SDVTList Res = {VTs, NumVTs};
+  return Res;
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
+  case MVT::f32:     return &APFloat::IEEEsingle;
+  case MVT::f64:     return &APFloat::IEEEdouble;
+  case MVT::f80:     return &APFloat::x87DoubleExtended;
+  case MVT::f128:    return &APFloat::IEEEquad;
+  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+  }
+}
+
+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+
+//===----------------------------------------------------------------------===//
+//                              ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+  return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+                                           const APFloat& Val) {
+  assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+  // PPC long double cannot be converted to any other type.
+  if (VT == MVT::ppcf128 ||
+      &Val.getSemantics() == &APFloat::PPCDoubleDouble)
+    return false;
+
+  // convert modifies in place, so make a copy.
+  APFloat Val2 = APFloat(Val);
+  bool losesInfo;
+  (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+                      &losesInfo);
+  return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+//                              ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0).getNode();
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+  unsigned i = 0, e = N->getNumOperands();
+
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+
+  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // elements.
+  SDValue NotZero = N->getOperand(i);
+  if (isa<ConstantSDNode>(NotZero)) {
+    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(NotZero)) {
+    if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
+                bitcastToAPInt().isAllOnesValue())
+      return false;
+  } else
+    return false;
+
+  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != NotZero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0).getNode();
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+  unsigned i = 0, e = N->getNumOperands();
+
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+
+  // Do not accept build_vectors that aren't all constants or which have non-0
+  // elements.
+  SDValue Zero = N->getOperand(i);
+  if (isa<ConstantSDNode>(Zero)) {
+    if (!cast<ConstantSDNode>(Zero)->isNullValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(Zero)) {
+    if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+      return false;
+  } else
+    return false;
+
+  // Okay, we have at least one 0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != Zero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+    return true;
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+  if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+    return false;
+  unsigned NumElems = N->getNumOperands();
+  if (NumElems == 1)
+    return false;
+  for (unsigned i = 1; i < NumElems; ++i) {
+    SDValue V = N->getOperand(i);
+    if (V.getOpcode() != ISD::UNDEF)
+      return false;
+  }
+  return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+  // To perform this operation, we just need to swap the L and G bits of the
+  // operation.
+  unsigned OldL = (Operation >> 2) & 1;
+  unsigned OldG = (Operation >> 1) & 1;
+  return ISD::CondCode((Operation & ~6) |  // Keep the N, U, E bits
+                       (OldL << 1) |       // New G bit
+                       (OldG << 2));       // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+  unsigned Operation = Op;
+  if (isInteger)
+    Operation ^= 7;   // Flip L, G, E bits, but not U.
+  else
+    Operation ^= 15;  // Flip all of the condition bits.
+
+  if (Operation > ISD::SETTRUE2)
+    Operation &= ~8;  // Don't let N and U bits get set.
+
+  return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison.  Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Illegal integer setcc operation!");
+  case ISD::SETEQ:
+  case ISD::SETNE: return 0;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE: return 1;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return 2;
+  }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                       bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed integer setcc with an unsigned integer setcc.
+    return ISD::SETCC_INVALID;
+
+  unsigned Op = Op1 | Op2;  // Combine all of the condition bits.
+
+  // If the N and U bits get set then the resultant comparison DOES suddenly
+  // care about orderedness, and is true when ordered.
+  if (Op > ISD::SETTRUE2)
+    Op &= ~16;     // Clear the U bit if the N bit is set.
+
+  // Canonicalize illegal integer setcc's.
+  if (isInteger && Op == ISD::SETUNE)  // e.g. SETUGT | SETULT
+    Op = ISD::SETNE;
+
+  return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                        bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed setcc with an unsigned setcc.
+    return ISD::SETCC_INVALID;
+
+  // Combine all of the condition bits.
+  ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+  // Canonicalize illegal integer setcc's.
+  if (isInteger) {
+    switch (Result) {
+    default: break;
+    case ISD::SETUO : Result = ISD::SETFALSE; break;  // SETUGT & SETULT
+    case ISD::SETOEQ:                                 // SETEQ  & SETU[LG]E
+    case ISD::SETUEQ: Result = ISD::SETEQ   ; break;  // SETUGE & SETULE
+    case ISD::SETOLT: Result = ISD::SETULT  ; break;  // SETULT & SETNE
+    case ISD::SETOGT: Result = ISD::SETUGT  ; break;  // SETUGT & SETNE
+    }
+  }
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+//                           SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC)  {
+  ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+  ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDValue *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->getNode());
+    ID.AddInteger(Ops->getResNo());
+  }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDUse *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->getNode());
+    ID.AddInteger(Ops->getResNo());
+  }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+                          unsigned short OpC, SDVTList VTList,
+                          const SDValue *OpList, unsigned N) {
+  AddNodeIDOpcode(ID, OpC);
+  AddNodeIDValueTypes(ID, VTList);
+  AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+  switch (N->getOpcode()) {
+  case ISD::TargetExternalSymbol:
+  case ISD::ExternalSymbol:
+    llvm_unreachable("Should only be used on nodes with operands");
+  default: break;  // Normal nodes don't need extra info.
+  case ISD::TargetConstant:
+  case ISD::Constant:
+    ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+    break;
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP: {
+    ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+    break;
+  }
+  case ISD::TargetGlobalAddress:
+  case ISD::GlobalAddress:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::GlobalTLSAddress: {
+    const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+    ID.AddPointer(GA->getGlobal());
+    ID.AddInteger(GA->getOffset());
+    ID.AddInteger(GA->getTargetFlags());
+    break;
+  }
+  case ISD::BasicBlock:
+    ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+    break;
+  case ISD::Register:
+    ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+    break;
+
+  case ISD::SRCVALUE:
+    ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+    break;
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+    break;
+  case ISD::JumpTable:
+  case ISD::TargetJumpTable:
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+    break;
+  case ISD::ConstantPool:
+  case ISD::TargetConstantPool: {
+    const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+    ID.AddInteger(CP->getAlignment());
+    ID.AddInteger(CP->getOffset());
+    if (CP->isMachineConstantPoolEntry())
+      CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+    else
+      ID.AddPointer(CP->getConstVal());
+    ID.AddInteger(CP->getTargetFlags());
+    break;
+  }
+  case ISD::LOAD: {
+    const LoadSDNode *LD = cast<LoadSDNode>(N);
+    ID.AddInteger(LD->getMemoryVT().getRawBits());
+    ID.AddInteger(LD->getRawSubclassData());
+    break;
+  }
+  case ISD::STORE: {
+    const StoreSDNode *ST = cast<StoreSDNode>(N);
+    ID.AddInteger(ST->getMemoryVT().getRawBits());
+    ID.AddInteger(ST->getRawSubclassData());
+    break;
+  }
+  case ISD::ATOMIC_CMP_SWAP:
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX: {
+    const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+    ID.AddInteger(AT->getMemoryVT().getRawBits());
+    ID.AddInteger(AT->getRawSubclassData());
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+    for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+         i != e; ++i)
+      ID.AddInteger(SVN->getMaskElt(i));
+    break;
+  }
+  case ISD::TargetBlockAddress:
+  case ISD::BlockAddress: {
+    ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
+    ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+    break;
+  }
+  } // end switch (N->getOpcode())
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+  AddNodeIDOpcode(ID, N->getOpcode());
+  // Add the return value info.
+  AddNodeIDValueTypes(ID, N->getVTList());
+  // Add the operand info.
+  AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+  // Handle SDNode leafs with special info.
+  AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, temporalness, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+                     bool isNonTemporal) {
+  assert((ConvType & 3) == ConvType &&
+         "ConvType may not require more than 2 bits!");
+  assert((AM & 7) == AM &&
+         "AM may not require more than 3 bits!");
+  return ConvType |
+         (AM << 2) |
+         (isVolatile << 5) |
+         (isNonTemporal << 6);
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+  if (N->getValueType(0) == MVT::Glue)
+    return true; // Never CSE anything that produces a flag.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::HANDLENODE:
+  case ISD::EH_LABEL:
+    return true;   // Never CSE these nodes.
+  }
+
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Glue)
+      return true; // Never CSE anything that produces a flag.
+
+  return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted.
+  HandleSDNode Dummy(getRoot());
+
+  SmallVector<SDNode*, 128> DeadNodes;
+
+  // Add all obviously-dead nodes to the DeadNodes worklist.
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+    if (I->use_empty())
+      DeadNodes.push_back(I);
+
+  RemoveDeadNodes(DeadNodes);
+
+  // If the root changed (e.g. it was a dead load, update the root).
+  setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+                                   DAGUpdateListener *UpdateListener) {
+
+  // Process the worklist, deleting the nodes and adding their uses to the
+  // worklist.
+  while (!DeadNodes.empty()) {
+    SDNode *N = DeadNodes.pop_back_val();
+
+    if (UpdateListener)
+      UpdateListener->NodeDeleted(N, 0);
+
+    // Take the node out of the appropriate CSE map.
+    RemoveNodeFromCSEMaps(N);
+
+    // Next, brutally remove the operand list.  This is safe to do, as there are
+    // no cycles in the graph.
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+      SDUse &Use = *I++;
+      SDNode *Operand = Use.getNode();
+      Use.set(SDValue());
+
+      // Now that we removed this operand, see if there are no uses of it left.
+      if (Operand->use_empty())
+        DeadNodes.push_back(Operand);
+    }
+
+    DeallocateNode(N);
+  }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+  SmallVector<SDNode*, 16> DeadNodes(1, N);
+  RemoveDeadNodes(DeadNodes, UpdateListener);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+  // First take this out of the appropriate CSE map.
+  RemoveNodeFromCSEMaps(N);
+
+  // Finally, remove uses due to operands of this node, remove from the
+  // AllNodes list, and delete the node.
+  DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+  assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+  assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+  // Drop all of the operands and decrement used node's use counts.
+  N->DropOperands();
+
+  DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+  if (N->OperandsNeedDelete)
+    delete[] N->OperandList;
+
+  // Set the opcode to DELETED_NODE to help catch bugs when node
+  // memory is reallocated.
+  N->NodeType = ISD::DELETED_NODE;
+
+  NodeAllocator.Deallocate(AllNodes.remove(N));
+
+  // Remove the ordering of this node.
+  Ordering->remove(N);
+
+  // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
+  ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
+  for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
+    DbgVals[i]->setIsInvalidated();
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it.  This is useful when we're about to delete or repurpose
+/// the node.  We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+  bool Erased = false;
+  switch (N->getOpcode()) {
+  case ISD::HANDLENODE: return false;  // noop.
+  case ISD::CONDCODE:
+    assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+           "Cond code doesn't exist!");
+    Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+    CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+    break;
+  case ISD::ExternalSymbol:
+    Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
+  case ISD::TargetExternalSymbol: {
+    ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+    Erased = TargetExternalSymbols.erase(
+               std::pair<std::string,unsigned char>(ESN->getSymbol(),
+                                                    ESN->getTargetFlags()));
+    break;
+  }
+  case ISD::VALUETYPE: {
+    EVT VT = cast<VTSDNode>(N)->getVT();
+    if (VT.isExtended()) {
+      Erased = ExtendedValueTypeNodes.erase(VT);
+    } else {
+      Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+      ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+    }
+    break;
+  }
+  default:
+    // Remove it from the CSE Map.
+    assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+    assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+    Erased = CSEMap.RemoveNode(N);
+    break;
+  }
+#ifndef NDEBUG
+  // Verify that the node was actually in one of the CSE maps, unless it has a
+  // flag result (which cannot be CSE'd) or is one of the special cases that are
+  // not subject to CSE.
+  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+      !N->isMachineOpcode() && !doNotCSE(N)) {
+    N->dump(this);
+    dbgs() << "\n";
+    llvm_unreachable("Node is not in map!");
+  }
+#endif
+  return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
+                                       DAGUpdateListener *UpdateListener) {
+  // For node types that aren't CSE'd, just act as if no identical node
+  // already exists.
+  if (!doNotCSE(N)) {
+    SDNode *Existing = CSEMap.GetOrInsertNode(N);
+    if (Existing != N) {
+      // If there was already an existing matching node, use ReplaceAllUsesWith
+      // to replace the dead one with the existing one.  This can cause
+      // recursive merging of other unrelated nodes down the line.
+      ReplaceAllUsesWith(N, Existing, UpdateListener);
+
+      // N is now dead.  Inform the listener if it exists and delete it.
+      if (UpdateListener)
+        UpdateListener->NodeDeleted(N, Existing);
+      DeleteNodeNotInCSEMaps(N);
+      return;
+    }
+  }
+
+  // If the node doesn't already exist, we updated it.  Inform a listener if
+  // it exists.
+  if (UpdateListener)
+    UpdateListener->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  SDValue Ops[] = { Op };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+                                           SDValue Op1, SDValue Op2,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  SDValue Ops[] = { Op1, Op2 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+                                           const SDValue *Ops,unsigned NumOps,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node.  Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::BUILD_PAIR: {
+    EVT VT = N->getValueType(0);
+    assert(N->getNumValues() == 1 && "Too many results!");
+    assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+           "Wrong return type!");
+    assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+    assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+           "Mismatched operand types!");
+    assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+           "Wrong operand type!");
+    assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+           "Wrong return type size");
+    break;
+  }
+  case ISD::BUILD_VECTOR: {
+    assert(N->getNumValues() == 1 && "Too many results!");
+    assert(N->getValueType(0).isVector() && "Wrong return type!");
+    assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+           "Wrong number of operands!");
+    EVT EltVT = N->getValueType(0).getVectorElementType();
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+      assert((I->getValueType() == EltVT ||
+             (EltVT.isInteger() && I->getValueType().isInteger() &&
+              EltVT.bitsLE(I->getValueType()))) &&
+            "Wrong operand type!");
+    break;
+  }
+  }
+}
+
+/// VerifySDNode - Sanity check the given SDNode.  Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+  // The SDNode allocators cannot be used to allocate nodes with fields that are
+  // not present in an SDNode!
+  assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+  assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+  assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+  assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+  assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+  assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+  assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+  assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+  assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+  assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+  assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+  assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+  assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+  assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+  assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+  assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+  assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+  assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+  assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+  VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode.  Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+  // The MachineNode allocators cannot be used to allocate nodes with fields
+  // that are not present in a MachineNode!
+  // Currently there are no such nodes.
+
+  VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
+/// getEVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+  const Type *Ty = VT == MVT::iPTR ?
+                   PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+                   VT.getTypeForEVT(*getContext());
+
+  return TLI.getTargetData()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm)
+  : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+    EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+    Root(getEntryNode()), Ordering(0) {
+  AllNodes.push_back(&EntryNode);
+  Ordering = new SDNodeOrdering();
+  DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &mf) {
+  MF = &mf;
+  Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+  allnodes_clear();
+  delete Ordering;
+  delete DbgInfo;
+}
+
+void SelectionDAG::allnodes_clear() {
+  assert(&*AllNodes.begin() == &EntryNode);
+  AllNodes.remove(AllNodes.begin());
+  while (!AllNodes.empty())
+    DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+  allnodes_clear();
+  OperandAllocator.Reset();
+  CSEMap.clear();
+
+  ExtendedValueTypeNodes.clear();
+  ExternalSymbols.clear();
+  TargetExternalSymbols.clear();
+  std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+            static_cast<CondCodeSDNode*>(0));
+  std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+            static_cast<SDNode*>(0));
+
+  EntryNode.UseList = 0;
+  AllNodes.push_back(&EntryNode);
+  Root = getEntryNode();
+  Ordering->clear();
+  DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+  assert(!VT.isVector() &&
+         "getZeroExtendInReg should use the vector element type instead of "
+         "the vector type!");
+  if (Op.getValueType() == VT) return Op;
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  APInt Imm = APInt::getLowBitsSet(BitWidth,
+                                   VT.getSizeInBits());
+  return getNode(ISD::AND, DL, Op.getValueType(), Op,
+                 getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+  EVT EltVT = VT.getScalarType();
+  SDValue NegOne =
+    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+  EVT EltVT = VT.getScalarType();
+  assert((EltVT.getSizeInBits() >= 64 ||
+         (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+         "getConstant with a uint64_t value that doesn't fit in the type!");
+  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+  return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+  assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+  EVT EltVT = VT.getScalarType();
+  assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
+         "APInt size does not match type size!");
+
+  unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddPointer(&Val);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!VT.isVector())
+      return SDValue(N, 0);
+
+  if (!N) {
+    N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDValue Result(N, 0);
+  if (VT.isVector()) {
+    SmallVector<SDValue, 8> Ops;
+    Ops.assign(VT.getVectorNumElements(), Result);
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+  return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+  assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+  EVT EltVT = VT.getScalarType();
+
+  // Do the map lookup using the actual bit pattern for the floating point
+  // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+  // we don't have issues with SNANs.
+  unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddPointer(&V);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!VT.isVector())
+      return SDValue(N, 0);
+
+  if (!N) {
+    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDValue Result(N, 0);
+  if (VT.isVector()) {
+    SmallVector<SDValue, 8> Ops;
+    Ops.assign(VT.getVectorNumElements(), Result);
+    // FIXME DebugLoc info might be appropriate here
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+  EVT EltVT = VT.getScalarType();
+  if (EltVT==MVT::f32)
+    return getConstantFP(APFloat((float)Val), VT, isTarget);
+  else if (EltVT==MVT::f64)
+    return getConstantFP(APFloat(Val), VT, isTarget);
+  else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+    bool ignored;
+    APFloat apf = APFloat(Val);
+    apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+                &ignored);
+    return getConstantFP(apf, VT, isTarget);
+  } else {
+    assert(0 && "Unsupported type in getConstantFP");
+    return SDValue();
+  }
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
+                                       EVT VT, int64_t Offset,
+                                       bool isTargetGA,
+                                       unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTargetGA) &&
+         "Cannot set target flags on target-independent globals");
+
+  // Truncate (with sign-extension) the offset value to the pointer size.
+  EVT PTy = TLI.getPointerTy();
+  unsigned BitWidth = PTy.getSizeInBits();
+  if (BitWidth < 64)
+    Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee for determining thread-localness.
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+  }
+
+  unsigned Opc;
+  if (GVar && GVar->isThreadLocal())
+    Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+  else
+    Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(GV);
+  ID.AddInteger(Offset);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT,
+                                                      Offset, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(FI);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+                                   unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent jump tables");
+  unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(JTI);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+                                                  TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+                                      unsigned Alignment, int Offset,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
+  if (Alignment == 0)
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  ID.AddPointer(C);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+                                      unsigned Alignment, int Offset,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
+  if (Alignment == 0)
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  C->AddSelectionDAGCSEId(ID);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MBB);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+  if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+      ValueTypeNodes.size())
+    ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+  SDNode *&N = VT.isExtended() ?
+    ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+  if (N) return SDValue(N, 0);
+  N = new (NodeAllocator) VTSDNode(VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+  SDNode *&N = ExternalSymbols[Sym];
+  if (N) return SDValue(N, 0);
+  N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+                                              unsigned char TargetFlags) {
+  SDNode *&N =
+    TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+                                                               TargetFlags)];
+  if (N) return SDValue(N, 0);
+  N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+  if ((unsigned)Cond >= CondCodeNodes.size())
+    CondCodeNodes.resize(Cond+1);
+
+  if (CondCodeNodes[Cond] == 0) {
+    CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
+    CondCodeNodes[Cond] = N;
+    AllNodes.push_back(N);
+  }
+
+  return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+  std::swap(N1, N2);
+  int NElts = M.size();
+  for (int i = 0; i != NElts; ++i) {
+    if (M[i] >= NElts)
+      M[i] -= NElts;
+    else if (M[i] >= 0)
+      M[i] += NElts;
+  }
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
+                                       SDValue N2, const int *Mask) {
+  assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+  assert(VT.isVector() && N1.getValueType().isVector() &&
+         "Vector Shuffle VTs must be a vectors");
+  assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+         && "Vector Shuffle VTs must have same element type");
+
+  // Canonicalize shuffle undef, undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+    return getUNDEF(VT);
+
+  // Validate that all indices in Mask are within the range of the elements
+  // input to the shuffle.
+  unsigned NElts = VT.getVectorNumElements();
+  SmallVector<int, 8> MaskVec;
+  for (unsigned i = 0; i != NElts; ++i) {
+    assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+    MaskVec.push_back(Mask[i]);
+  }
+
+  // Canonicalize shuffle v, v -> v, undef
+  if (N1 == N2) {
+    N2 = getUNDEF(VT);
+    for (unsigned i = 0; i != NElts; ++i)
+      if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+  }
+
+  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
+  if (N1.getOpcode() == ISD::UNDEF)
+    commuteShuffle(N1, N2, MaskVec);
+
+  // Canonicalize all index into lhs, -> shuffle lhs, undef
+  // Canonicalize all index into rhs, -> shuffle rhs, undef
+  bool AllLHS = true, AllRHS = true;
+  bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+  for (unsigned i = 0; i != NElts; ++i) {
+    if (MaskVec[i] >= (int)NElts) {
+      if (N2Undef)
+        MaskVec[i] = -1;
+      else
+        AllLHS = false;
+    } else if (MaskVec[i] >= 0) {
+      AllRHS = false;
+    }
+  }
+  if (AllLHS && AllRHS)
+    return getUNDEF(VT);
+  if (AllLHS && !N2Undef)
+    N2 = getUNDEF(VT);
+  if (AllRHS) {
+    N1 = getUNDEF(VT);
+    commuteShuffle(N1, N2, MaskVec);
+  }
+
+  // If Identity shuffle, or all shuffle in to undef, return that node.
+  bool AllUndef = true;
+  bool Identity = true;
+  for (unsigned i = 0; i != NElts; ++i) {
+    if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+    if (MaskVec[i] >= 0) AllUndef = false;
+  }
+  if (Identity && NElts == N1.getValueType().getVectorNumElements())
+    return N1;
+  if (AllUndef)
+    return getUNDEF(VT);
+
+  FoldingSetNodeID ID;
+  SDValue Ops[2] = { N1, N2 };
+  AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+  for (unsigned i = 0; i != NElts; ++i)
+    ID.AddInteger(MaskVec[i]);
+
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  // Allocate the mask array for the node out of the BumpPtrAllocator, since
+  // SDNode doesn't have access to it.  This memory will be "leaked" when
+  // the node is deallocated, but recovered when the NodeAllocator is released.
+  int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+  memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+  ShuffleVectorSDNode *N =
+    new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
+                                       SDValue Val, SDValue DTy,
+                                       SDValue STy, SDValue Rnd, SDValue Sat,
+                                       ISD::CvtCode Code) {
+  // If the src and dest types are the same and the conversion is between
+  // integer types of the same sign or two floats, no conversion is necessary.
+  if (DTy == STy &&
+      (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+    return Val;
+
+  FoldingSetNodeID ID;
+  SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+  AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+                                                           Code);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+  ID.AddInteger(RegNo);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
+  FoldingSetNodeID ID;
+  SDValue Ops[] = { Root };
+  AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+  ID.AddPointer(Label);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(BA);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+  assert((!V || V->getType()->isPointerTy()) &&
+         "SrcValue is not a pointer?");
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(V);
+
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MD);
+
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+  EVT OpTy = Op.getValueType();
+  MVT ShTy = TLI.getShiftAmountTy(LHSTy);
+  if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+  ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
+  return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  unsigned ByteSize = VT.getStoreSize();
+  const Type *Ty = VT.getTypeForEVT(*getContext());
+  unsigned StackAlign =
+  std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+
+  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+  unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+                            VT2.getStoreSizeInBits())/8;
+  const Type *Ty1 = VT1.getTypeForEVT(*getContext());
+  const Type *Ty2 = VT2.getTypeForEVT(*getContext());
+  const TargetData *TD = TLI.getTargetData();
+  unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+                            TD->getPrefTypeAlignment(Ty2));
+
+  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
+                                SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return getConstant(1, VT);
+
+  case ISD::SETOEQ:
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+  case ISD::SETO:
+  case ISD::SETUO:
+  case ISD::SETUEQ:
+  case ISD::SETUNE:
+    assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+    break;
+  }
+
+  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+    const APInt &C2 = N2C->getAPIntValue();
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+      const APInt &C1 = N1C->getAPIntValue();
+
+      switch (Cond) {
+      default: llvm_unreachable("Unknown integer setcc!");
+      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, VT);
+      case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+      case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+      case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+      case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+      case ISD::SETLT:  return getConstant(C1.slt(C2), VT);
+      case ISD::SETGT:  return getConstant(C1.sgt(C2), VT);
+      case ISD::SETLE:  return getConstant(C1.sle(C2), VT);
+      case ISD::SETGE:  return getConstant(C1.sge(C2), VT);
+      }
+    }
+  }
+  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+      // No compile time operations on this type yet.
+      if (N1C->getValueType(0) == MVT::ppcf128)
+        return SDValue();
+
+      APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+      switch (Cond) {
+      default: break;
+      case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+      case ISD::SETNE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpLessThan, VT);
+      case ISD::SETLT:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+      case ISD::SETGT:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+      case ISD::SETLE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETGE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, VT);
+      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, VT);
+      case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+      case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+                                           R==APFloat::cmpLessThan, VT);
+      case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpUnordered, VT);
+      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+      }
+    } else {
+      // Ensure that the constant occurs on the RHS.
+      return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+    }
+  }
+
+  // Could not fold it.
+  return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+  // This predicate is not safe for vector operations.
+  if (Op.getValueType().isVector())
+    return false;
+
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+                                     unsigned Depth) const {
+  APInt KnownZero, KnownOne;
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
+                                     APInt &KnownZero, APInt &KnownOne,
+                                     unsigned Depth) const {
+  unsigned BitWidth = Mask.getBitWidth();
+  assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
+         "Mask size mismatches value type size!");
+
+  KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
+  if (Depth == 6 || Mask == 0)
+    return;  // Limit search depth.
+
+  APInt KnownZero2, KnownOne2;
+
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  case ISD::AND:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
+                      KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  case ISD::OR:
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
+                      KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  case ISD::XOR: {
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case ISD::MUL: {
+    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If low bits are zero in either operand, output low known-0 bits.
+    // Also compute a conserative estimate for high known-0 bits.
+    // More trickiness is possible, but this is sufficient for the
+    // interesting case of alignment computation.
+    KnownOne.clearAllBits();
+    unsigned TrailZ = KnownZero.countTrailingOnes() +
+                      KnownZero2.countTrailingOnes();
+    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                               KnownZero2.countLeadingOnes(),
+                               BitWidth) - BitWidth;
+
+    TrailZ = std::min(TrailZ, BitWidth);
+    LeadZ = std::min(LeadZ, BitWidth);
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+                APInt::getHighBitsSet(BitWidth, LeadZ);
+    KnownZero &= Mask;
+    return;
+  }
+  case ISD::UDIV: {
+    // For the purposes of computing leading zeros we can conservatively
+    // treat a udiv as a logical right shift by the power of 2 known to
+    // be less than the denominator.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(0),
+                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
+    ComputeMaskedBits(Op.getOperand(1),
+                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+    if (RHSUnknownLeadingOnes != BitWidth)
+      LeadZ = std::min(BitWidth,
+                       LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    return;
+  }
+  case ISD::SELECT:
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SELECT_CC:
+    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+    if (Op.getResNo() != 1)
+      return;
+    // The boolean result conforms to getBooleanContents.  Fall through.
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
+        BitWidth > 1)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    return;
+  case ISD::SHL:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero <<= ShAmt;
+      KnownOne  <<= ShAmt;
+      // low bits known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+    }
+    return;
+  case ISD::SRL:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    return;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      APInt InDemandedMask = (Mask << ShAmt);
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      if (HighBits.getBoolValue())
+        InDemandedMask |= APInt::getSignBit(BitWidth);
+
+      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      // Handle the sign bits.
+      APInt SignBit = APInt::getSignBit(BitWidth);
+      SignBit = SignBit.lshr(ShAmt);  // Adjust to where it is now in the mask.
+
+      if (KnownZero.intersects(SignBit)) {
+        KnownZero |= HighBits;  // New bits are known zero.
+      } else if (KnownOne.intersects(SignBit)) {
+        KnownOne  |= HighBits;  // New bits are known one.
+      }
+    }
+    return;
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+    // Sign extension.  Compute the demanded bits in the result that are not
+    // present in the input.
+    APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+
+    APInt InSignBit = APInt::getSignBit(EBits);
+    APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+
+    // If the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InSignBit = InSignBit.zext(BitWidth);
+    if (NewBits.getBoolValue())
+      InputDemandedBits |= InSignBit;
+
+    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+                      KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero.intersects(InSignBit)) {         // Input sign bit known clear
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne.intersects(InSignBit)) {   // Input sign bit known set
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                              // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne  &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    unsigned LowBits = Log2_32(BitWidth)+1;
+    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+    KnownOne.clearAllBits();
+    return;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.getNode())) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      EVT VT = LD->getMemoryVT();
+      unsigned MemBits = VT.getScalarType().getSizeInBits();
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+    }
+    return;
+  }
+  case ISD::ZERO_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+    APInt InMask    = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    KnownZero |= NewBits;
+    return;
+  }
+  case ISD::SIGN_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InSignBit = APInt::getSignBit(InBits);
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+    APInt InMask = Mask.trunc(InBits);
+
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded. Temporarily set this bit in the mask for our callee.
+    if (NewBits.getBoolValue())
+      InMask |= InSignBit;
+
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+
+    // Note if the sign bit is known to be zero or one.
+    bool SignBitKnownZero = KnownZero.isNegative();
+    bool SignBitKnownOne  = KnownOne.isNegative();
+    assert(!(SignBitKnownZero && SignBitKnownOne) &&
+           "Sign bit can't be known to be both zero and one!");
+
+    // If the sign bit wasn't actually demanded by our caller, we don't
+    // want it set in the KnownZero and KnownOne result values. Reset the
+    // mask and reapply it to the result values.
+    InMask = Mask.trunc(InBits);
+    KnownZero &= InMask;
+    KnownOne  &= InMask;
+
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
+    // If the sign bit is known zero or one, the top bits match.
+    if (SignBitKnownZero)
+      KnownZero |= NewBits;
+    else if (SignBitKnownOne)
+      KnownOne  |= NewBits;
+    return;
+  }
+  case ISD::ANY_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    return;
+  }
+  case ISD::TRUNCATE: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask = Mask.zext(InBits);
+    KnownZero = KnownZero.zext(InBits);
+    KnownOne = KnownOne.zext(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+    break;
+  }
+  case ISD::AssertZext: {
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+                      KnownOne, Depth+1);
+    KnownZero |= (~InMask) & Mask;
+    return;
+  }
+  case ISD::FGETSIGN:
+    // All bits are zero except the low bit.
+    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    return;
+
+  case ISD::SUB: {
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (CLHS->getAPIntValue().isNonNegative()) {
+        unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+                          Depth+1);
+
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+        }
+      }
+    }
+  }
+  // fall through
+  case ISD::ADD:
+  case ISD::ADDE: {
+    // Output known-0 bits are known if clear or set in both the low clear bits
+    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
+    // low 3 bits clear.
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       BitWidth - Mask.countLeadingZeros());
+    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    KnownZeroOut = std::min(KnownZeroOut,
+                            KnownZero2.countTrailingOnes());
+
+    if (Op.getOpcode() == ISD::ADD) {
+      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+      return;
+    }
+
+    // With ADDE, a carry bit may be added in, so we can only use this
+    // information if we know (at least) that the low two bits are clear.  We
+    // then return to the caller that the low bit is unknown but that other bits
+    // are known zero.
+    if (KnownZeroOut >= 2) // ADDE
+      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+    return;
+  }
+  case ISD::SREM:
+    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      const APInt &RA = Rem->getAPIntValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
+        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
+
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+      }
+    }
+    return;
+  case ISD::UREM: {
+    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      const APInt &RA = Rem->getAPIntValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        APInt Mask2 = LowBits & Mask;
+        KnownZero |= ~LowBits & Mask;
+        ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+        break;
+      }
+    }
+
+    // Since the result is less than or equal to either operand, any leading
+    // zero bits in either operand must also exist in the result.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+                      Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+                      Depth+1);
+
+    uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+    KnownOne.clearAllBits();
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    return;
+  }
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    if (unsigned Align = InferPtrAlignment(Op)) {
+      // The low bits are known zero if the pointer is aligned.
+      KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+      return;
+    }
+    break;
+
+  default:
+    if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+      break;
+    // Fallthrough
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+    // Allow the target to implement this method for its nodes.
+    TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+                                       Depth);
+    return;
+  }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+  EVT VT = Op.getValueType();
+  assert(VT.isInteger() && "Invalid VT!");
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned Tmp, Tmp2;
+  unsigned FirstAnswer = 1;
+
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::AssertSext:
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    return VTBits-Tmp+1;
+  case ISD::AssertZext:
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    return VTBits-Tmp;
+
+  case ISD::Constant: {
+    const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+    return Val.getNumSignBits();
+  }
+
+  case ISD::SIGN_EXTEND:
+    Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+  case ISD::SIGN_EXTEND_INREG:
+    // Max of the input and what this extends.
+    Tmp =
+      cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+    Tmp = VTBits-Tmp+1;
+
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    return std::max(Tmp, Tmp2);
+
+  case ISD::SRA:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    // SRA X, C   -> adds C sign bits.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      Tmp += C->getZExtValue();
+      if (Tmp > VTBits) Tmp = VTBits;
+    }
+    return Tmp;
+  case ISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (C->getZExtValue() >= VTBits ||      // Bad shift.
+          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getZExtValue();
+    }
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits at the worst.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp != 1) {
+      Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+      FirstAnswer = std::min(Tmp, Tmp2);
+      // We computed what we know about the sign bits as our first
+      // answer. Now proceed to the generic code that uses
+      // ComputeMaskedBits, and pick whichever answer is better.
+    }
+    break;
+
+  case ISD::SELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+    return std::min(Tmp, Tmp2);
+
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+    if (Op.getResNo() != 1)
+      break;
+    // The boolean result conforms to getBooleanContents.  Fall through.
+  case ISD::SETCC:
+    // If setcc returns 0/-1, all bits are sign bits.
+    if (TLI.getBooleanContents() ==
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+      return VTBits;
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+      // Handle rotate right by N like a rotate left by 32-N.
+      if (Op.getOpcode() == ISD::ROTR)
+        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+      // If we aren't rotating out all of the known-in sign bits, return the
+      // number that are left.  This handles rotl(sext(x), 1) for example.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+    }
+    break;
+  case ISD::ADD:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+      if (CRHS->isAllOnesValue()) {
+        APInt KnownZero, KnownOne;
+        APInt Mask = APInt::getAllOnesValue(VTBits);
+        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+          return VTBits;
+
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero.isNegative())
+          return Tmp;
+      }
+
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      return std::min(Tmp, Tmp2)-1;
+    break;
+
+  case ISD::SUB:
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+
+    // Handle NEG.
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CLHS->isNullValue()) {
+        APInt KnownZero, KnownOne;
+        APInt Mask = APInt::getAllOnesValue(VTBits);
+        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+          return VTBits;
+
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero.isNegative())
+          return Tmp2;
+
+        // Otherwise, we treat this like a SUB.
+      }
+
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      return std::min(Tmp, Tmp2)-1;
+    break;
+  case ISD::TRUNCATE:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+
+  // Handle LOADX separately here. EXTLOAD case will fallthrough.
+  if (Op.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    unsigned ExtType = LD->getExtensionType();
+    switch (ExtType) {
+    default: break;
+    case ISD::SEXTLOAD:    // '17' bits known
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+      return VTBits-Tmp+1;
+    case ISD::ZEXTLOAD:    // '16' bits known
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+      return VTBits-Tmp;
+    }
+  }
+
+  // Allow the target to implement this method for its nodes.
+  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_VOID) {
+    unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+    if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+  }
+
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  APInt KnownZero, KnownOne;
+  APInt Mask = APInt::getAllOnesValue(VTBits);
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+  if (KnownZero.isNegative()) {        // sign bit is 0
+    Mask = KnownZero;
+  } else if (KnownOne.isNegative()) {  // sign bit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return FirstAnswer;
+  }
+
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask = ~Mask;
+  Mask <<= Mask.getBitWidth()-VTBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD.  This handles the equivalence:
+///     X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+  if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+      !isa<ConstantSDNode>(Op.getOperand(1)))
+    return false;
+
+  if (Op.getOpcode() == ISD::OR &&
+      !MaskedValueIsZero(Op.getOperand(0),
+                     cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+    return false;
+
+  return true;
+}
+
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+  // If we're told that NaNs won't happen, assume they won't.
+  if (NoNaNsFPMath)
+    return true;
+
+  // If the value is a constant, we can obviously see if it is a NaN or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->getValueAPF().isNaN();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+  // If the value is a constant, we can obviously see if it is a zero or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->isZero();
+
+  // TODO: Recognize more cases here.
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::OR:
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+      return !C->isNullValue();
+    break;
+  }
+
+  return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+  // Check the obvious case.
+  if (A == B) return true;
+
+  // For for negative and positive zero.
+  if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+    if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+      if (CA->isZero() && CB->isZero()) return true;
+
+  // Otherwise they may not be equal.
+  return false;
+}
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
+  CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              EVT VT, SDValue Operand) {
+  // Constant fold unary operations with an integer constant operand.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+    const APInt &Val = C->getAPIntValue();
+    switch (Opcode) {
+    default: break;
+    case ISD::SIGN_EXTEND:
+      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::TRUNCATE:
+      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
+    case ISD::UINT_TO_FP:
+    case ISD::SINT_TO_FP: {
+      // No compile time operations on ppcf128.
+      if (VT == MVT::ppcf128) break;
+      APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+      (void)apf.convertFromAPInt(Val,
+                                 Opcode==ISD::SINT_TO_FP,
+                                 APFloat::rmNearestTiesToEven);
+      return getConstantFP(apf, VT);
+    }
+    case ISD::BITCAST:
+      if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+        return getConstantFP(Val.bitsToFloat(), VT);
+      else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+        return getConstantFP(Val.bitsToDouble(), VT);
+      break;
+    case ISD::BSWAP:
+      return getConstant(Val.byteSwap(), VT);
+    case ISD::CTPOP:
+      return getConstant(Val.countPopulation(), VT);
+    case ISD::CTLZ:
+      return getConstant(Val.countLeadingZeros(), VT);
+    case ISD::CTTZ:
+      return getConstant(Val.countTrailingZeros(), VT);
+    }
+  }
+
+  // Constant fold unary operations with a floating point constant operand.
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+    APFloat V = C->getValueAPF();    // make copy
+    if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
+      switch (Opcode) {
+      case ISD::FNEG:
+        V.changeSign();
+        return getConstantFP(V, VT);
+      case ISD::FABS:
+        V.clearSign();
+        return getConstantFP(V, VT);
+      case ISD::FP_ROUND:
+      case ISD::FP_EXTEND: {
+        bool ignored;
+        // This can return overflow, underflow, or inexact; we don't care.
+        // FIXME need to be more flexible about rounding mode.
+        (void)V.convert(*EVTToAPFloatSemantics(VT),
+                        APFloat::rmNearestTiesToEven, &ignored);
+        return getConstantFP(V, VT);
+      }
+      case ISD::FP_TO_SINT:
+      case ISD::FP_TO_UINT: {
+        integerPart x[2];
+        bool ignored;
+        assert(integerPartWidth >= 64);
+        // FIXME need to be more flexible about rounding mode.
+        APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+                              Opcode==ISD::FP_TO_SINT,
+                              APFloat::rmTowardZero, &ignored);
+        if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
+          break;
+        APInt api(VT.getSizeInBits(), 2, x);
+        return getConstant(api, VT);
+      }
+      case ISD::BITCAST:
+        if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+          return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+        else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+          return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+        break;
+      }
+    }
+  }
+
+  unsigned OpOpcode = Operand.getNode()->getOpcode();
+  switch (Opcode) {
+  case ISD::TokenFactor:
+  case ISD::MERGE_VALUES:
+  case ISD::CONCAT_VECTORS:
+    return Operand;         // Factor, merge or concat of one node?  No need.
+  case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+  case ISD::FP_EXTEND:
+    assert(VT.isFloatingPoint() &&
+           Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+    if (Operand.getValueType() == VT) return Operand;  // noop conversion.
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (Operand.getOpcode() == ISD::UNDEF)
+      return getUNDEF(VT);
+    break;
+  case ISD::SIGN_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid SIGN_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid sext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+      return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::UNDEF)
+      // sext(undef) = 0, because the top bits will all be the same.
+      return getConstant(0, VT);
+    break;
+  case ISD::ZERO_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid ZERO_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid zext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
+      return getNode(ISD::ZERO_EXTEND, DL, VT,
+                     Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::UNDEF)
+      // zext(undef) = 0, because the top bits will be zero.
+      return getConstant(0, VT);
+    break;
+  case ISD::ANY_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid ANY_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid anyext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+        OpOpcode == ISD::ANY_EXTEND)
+      // (ext (zext x)) -> (zext x)  and  (ext (sext x)) -> (sext x)
+      return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+
+    // (ext (trunx x)) -> x
+    if (OpOpcode == ISD::TRUNCATE) {
+      SDValue OpOp = Operand.getNode()->getOperand(0);
+      if (OpOp.getValueType() == VT)
+        return OpOp;
+    }
+    break;
+  case ISD::TRUNCATE:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid TRUNCATE!");
+    if (Operand.getValueType() == VT) return Operand;   // noop truncate
+    assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+           "Invalid truncate node, src < dst!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::TRUNCATE)
+      return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+             OpOpcode == ISD::ANY_EXTEND) {
+      // If the source is smaller than the dest, we still need an extend.
+      if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+            .bitsLT(VT.getScalarType()))
+        return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+      else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+        return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+      else
+        return Operand.getNode()->getOperand(0);
+    }
+    break;
+  case ISD::BITCAST:
+    // Basic sanity checking.
+    assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+           && "Cannot BITCAST between types of different sizes!");
+    if (VT == Operand.getValueType()) return Operand;  // noop conversion.
+    if (OpOpcode == ISD::BITCAST)  // bitconv(bitconv(x)) -> bitconv(x)
+      return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    break;
+  case ISD::SCALAR_TO_VECTOR:
+    assert(VT.isVector() && !Operand.getValueType().isVector() &&
+           (VT.getVectorElementType() == Operand.getValueType() ||
+            (VT.getVectorElementType().isInteger() &&
+             Operand.getValueType().isInteger() &&
+             VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+           "Illegal SCALAR_TO_VECTOR node!");
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+    if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+        isa<ConstantSDNode>(Operand.getOperand(1)) &&
+        Operand.getConstantOperandVal(1) == 0 &&
+        Operand.getOperand(0).getValueType() == VT)
+      return Operand.getOperand(0);
+    break;
+  case ISD::FNEG:
+    // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+    if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+      return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+                     Operand.getNode()->getOperand(0));
+    if (OpOpcode == ISD::FNEG)  // --X -> X
+      return Operand.getNode()->getOperand(0);
+    break;
+  case ISD::FABS:
+    if (OpOpcode == ISD::FNEG)  // abs(-X) -> abs(X)
+      return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+    break;
+  }
+
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+    FoldingSetNodeID ID;
+    SDValue Ops[1] = { Operand };
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
+                                             EVT VT,
+                                             ConstantSDNode *Cst1,
+                                             ConstantSDNode *Cst2) {
+  const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+
+  switch (Opcode) {
+  case ISD::ADD:  return getConstant(C1 + C2, VT);
+  case ISD::SUB:  return getConstant(C1 - C2, VT);
+  case ISD::MUL:  return getConstant(C1 * C2, VT);
+  case ISD::UDIV:
+    if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
+    break;
+  case ISD::UREM:
+    if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
+    break;
+  case ISD::SDIV:
+    if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
+    break;
+  case ISD::SREM:
+    if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
+    break;
+  case ISD::AND:  return getConstant(C1 & C2, VT);
+  case ISD::OR:   return getConstant(C1 | C2, VT);
+  case ISD::XOR:  return getConstant(C1 ^ C2, VT);
+  case ISD::SHL:  return getConstant(C1 << C2, VT);
+  case ISD::SRL:  return getConstant(C1.lshr(C2), VT);
+  case ISD::SRA:  return getConstant(C1.ashr(C2), VT);
+  case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
+  case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
+  default: break;
+  }
+
+  return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2) {
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  switch (Opcode) {
+  default: break;
+  case ISD::TokenFactor:
+    assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+           N2.getValueType() == MVT::Other && "Invalid token factor!");
+    // Fold trivial token factors.
+    if (N1.getOpcode() == ISD::EntryToken) return N2;
+    if (N2.getOpcode() == ISD::EntryToken) return N1;
+    if (N1 == N2) return N1;
+    break;
+  case ISD::CONCAT_VECTORS:
+    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+    // one big BUILD_VECTOR.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+        N2.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+                                    N1.getNode()->op_end());
+      Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+      return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+    }
+    break;
+  case ISD::AND:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
+    // worth handling here.
+    if (N2C && N2C->isNullValue())
+      return N2;
+    if (N2C && N2C->isAllOnesValue())  // X & -1 -> X
+      return N1;
+    break;
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    // (X ^|+- 0) -> X.  This commonly occurs when legalizing i64 values, so
+    // it's worth handling here.
+    if (N2C && N2C->isNullValue())
+      return N1;
+    break;
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::MULHU:
+  case ISD::MULHS:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::SREM:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+    if (UnsafeFPMath) {
+      if (Opcode == ISD::FADD) {
+        // 0+x --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+          if (CFP->getValueAPF().isZero())
+            return N2;
+        // x+0 --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+          if (CFP->getValueAPF().isZero())
+            return N1;
+      } else if (Opcode == ISD::FSUB) {
+        // x-0 --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+          if (CFP->getValueAPF().isZero())
+            return N1;
+      }
+    }
+    assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
+  case ISD::FCOPYSIGN:   // N1 and result must match.  N1/N2 need not match.
+    assert(N1.getValueType() == VT &&
+           N1.getValueType().isFloatingPoint() &&
+           N2.getValueType().isFloatingPoint() &&
+           "Invalid FCOPYSIGN!");
+    break;
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+    assert(VT == N1.getValueType() &&
+           "Shift operators return type must be the same as their first arg");
+    assert(VT.isInteger() && N2.getValueType().isInteger() &&
+           "Shifts only work on integers");
+    // Verify that the shift amount VT is bit enough to hold valid shift
+    // amounts.  This catches things like trying to shift an i1024 value by an
+    // i8, which is easy to fall into in generic code that uses
+    // TLI.getShiftAmount().
+    assert(N2.getValueType().getSizeInBits() >=
+                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+           "Invalid use of small shift amount with oversized value!");
+
+    // Always fold shifts of i1 values so the code generator doesn't need to
+    // handle them.  Since we know the size of the shift has to be less than the
+    // size of the value, the shift/rotate count is guaranteed to be zero.
+    if (VT == MVT::i1)
+      return N1;
+    if (N2C && N2C->isNullValue())
+      return N1;
+    break;
+  case ISD::FP_ROUND_INREG: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg round!");
+    assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+           "Cannot FP_ROUND_INREG integer types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "FP_ROUND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in FP_ROUND_INREG");
+    assert(EVT.bitsLE(VT) && "Not rounding down!");
+    if (cast<VTSDNode>(N2)->getVT() == VT) return N1;  // Not actually rounding.
+    break;
+  }
+  case ISD::FP_ROUND:
+    assert(VT.isFloatingPoint() &&
+           N1.getValueType().isFloatingPoint() &&
+           VT.bitsLE(N1.getValueType()) &&
+           isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+    if (N1.getValueType() == VT) return N1;  // noop conversion.
+    break;
+  case ISD::AssertSext:
+  case ISD::AssertZext: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(VT.isInteger() && EVT.isInteger() &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(!EVT.isVector() &&
+           "AssertSExt/AssertZExt type should be the vector element type "
+           "rather than the vector type!");
+    assert(EVT.bitsLE(VT) && "Not extending!");
+    if (VT == EVT) return N1; // noop assertion.
+    break;
+  }
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(VT.isInteger() && EVT.isInteger() &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "SIGN_EXTEND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in SIGN_EXTEND_INREG");
+    assert(EVT.bitsLE(VT) && "Not extending!");
+    if (EVT == VT) return N1;  // Not actually extending
+
+    if (N1C) {
+      APInt Val = N1C->getAPIntValue();
+      unsigned FromBits = EVT.getScalarType().getSizeInBits();
+      Val <<= Val.getBitWidth()-FromBits;
+      Val = Val.ashr(Val.getBitWidth()-FromBits);
+      return getConstant(Val, VT);
+    }
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+    if (N1.getOpcode() == ISD::UNDEF)
+      return getUNDEF(VT);
+
+    // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+    // expanding copies of large vectors from registers.
+    if (N2C &&
+        N1.getOpcode() == ISD::CONCAT_VECTORS &&
+        N1.getNumOperands() > 0) {
+      unsigned Factor =
+        N1.getOperand(0).getValueType().getVectorNumElements();
+      return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                     N1.getOperand(N2C->getZExtValue() / Factor),
+                     getConstant(N2C->getZExtValue() % Factor,
+                                 N2.getValueType()));
+    }
+
+    // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+    // expanding large vector constants.
+    if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+      SDValue Elt = N1.getOperand(N2C->getZExtValue());
+      EVT VEltTy = N1.getValueType().getVectorElementType();
+      if (Elt.getValueType() != VEltTy) {
+        // If the vector element type is not legal, the BUILD_VECTOR operands
+        // are promoted and implicitly truncated.  Make that explicit here.
+        Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
+      }
+      if (VT != VEltTy) {
+        // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
+        // result is implicitly extended.
+        Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
+      }
+      return Elt;
+    }
+
+    // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+    // operations are lowered to scalars.
+    if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+      // If the indices are the same, return the inserted element else
+      // if the indices are known different, extract the element from
+      // the original vector.
+      SDValue N1Op2 = N1.getOperand(2);
+      ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+
+      if (N1Op2C && N2C) {
+        if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+          if (VT == N1.getOperand(1).getValueType())
+            return N1.getOperand(1);
+          else
+            return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+        }
+
+        return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+      }
+    }
+    break;
+  case ISD::EXTRACT_ELEMENT:
+    assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+    assert(!N1.getValueType().isVector() && !VT.isVector() &&
+           (N1.getValueType().isInteger() == VT.isInteger()) &&
+           "Wrong types for EXTRACT_ELEMENT!");
+
+    // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+    // 64-bit integers into 32-bit parts.  Instead of building the extract of
+    // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+    if (N1.getOpcode() == ISD::BUILD_PAIR)
+      return N1.getOperand(N2C->getZExtValue());
+
+    // EXTRACT_ELEMENT of a constant int is also very common.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      unsigned ElementSize = VT.getSizeInBits();
+      unsigned Shift = ElementSize * N2C->getZExtValue();
+      APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+      return getConstant(ShiftedVal.trunc(ElementSize), VT);
+    }
+    break;
+  case ISD::EXTRACT_SUBVECTOR: {
+    SDValue Index = N2;
+    if (VT.isSimple() && N1.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             "Extract subvector VTs must be a vectors!");
+      assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+             "Extract subvector VTs must have the same element type!");
+      assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Extract subvector must be from larger vector to smaller vector!");
+
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((VT.getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= N1.getValueType().getVectorNumElements())
+               && "Extract subvector overflow!");
+      }
+
+      // Trivial extraction.
+      if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+        return N1;
+    }
+    break;
+  }
+  }
+
+  if (N1C) {
+    if (N2C) {
+      SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
+      if (SV.getNode()) return SV;
+    } else {      // Cannonicalize constant to RHS if commutative
+      if (isCommutativeBinOp(Opcode)) {
+        std::swap(N1C, N2C);
+        std::swap(N1, N2);
+      }
+    }
+  }
+
+  // Constant fold FP operations.
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+  if (N1CFP) {
+    if (!N2CFP && isCommutativeBinOp(Opcode)) {
+      // Cannonicalize constant to RHS if commutative
+      std::swap(N1CFP, N2CFP);
+      std::swap(N1, N2);
+    } else if (N2CFP && VT != MVT::ppcf128) {
+      APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+      APFloat::opStatus s;
+      switch (Opcode) {
+      case ISD::FADD:
+        s = V1.add(V2, APFloat::rmNearestTiesToEven);
+        if (s != APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FSUB:
+        s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FMUL:
+        s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FDIV:
+        s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FREM :
+        s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FCOPYSIGN:
+        V1.copySign(V2);
+        return getConstantFP(V1, VT);
+      default: break;
+      }
+    }
+  }
+
+  // Canonicalize an UNDEF to the RHS, even over a constant.
+  if (N1.getOpcode() == ISD::UNDEF) {
+    if (isCommutativeBinOp(Opcode)) {
+      std::swap(N1, N2);
+    } else {
+      switch (Opcode) {
+      case ISD::FP_ROUND_INREG:
+      case ISD::SIGN_EXTEND_INREG:
+      case ISD::SUB:
+      case ISD::FSUB:
+      case ISD::FDIV:
+      case ISD::FREM:
+      case ISD::SRA:
+        return N1;     // fold op(undef, arg2) -> undef
+      case ISD::UDIV:
+      case ISD::SDIV:
+      case ISD::UREM:
+      case ISD::SREM:
+      case ISD::SRL:
+      case ISD::SHL:
+        if (!VT.isVector())
+          return getConstant(0, VT);    // fold op(undef, arg2) -> 0
+        // For vectors, we can't easily build an all zero vector, just return
+        // the LHS.
+        return N2;
+      }
+    }
+  }
+
+  // Fold a bunch of operators when the RHS is undef.
+  if (N2.getOpcode() == ISD::UNDEF) {
+    switch (Opcode) {
+    case ISD::XOR:
+      if (N1.getOpcode() == ISD::UNDEF)
+        // Handle undef ^ undef -> 0 special case. This is a common
+        // idiom (misuse).
+        return getConstant(0, VT);
+      // fallthrough
+    case ISD::ADD:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::SUB:
+    case ISD::UDIV:
+    case ISD::SDIV:
+    case ISD::UREM:
+    case ISD::SREM:
+      return N2;       // fold op(arg1, undef) -> undef
+    case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
+    case ISD::FDIV:
+    case ISD::FREM:
+      if (UnsafeFPMath)
+        return N2;
+      break;
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::SRL:
+    case ISD::SHL:
+      if (!VT.isVector())
+        return getConstant(0, VT);  // fold op(arg1, undef) -> 0
+      // For vectors, we can't easily build an all zero vector, just return
+      // the LHS.
+      return N1;
+    case ISD::OR:
+      if (!VT.isVector())
+        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+      // For vectors, we can't easily build an all one vector, just return
+      // the LHS.
+      return N1;
+    case ISD::SRA:
+      return N1;
+    }
+  }
+
+  // Memoize this node if possible.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Glue) {
+    SDValue Ops[] = { N1, N2 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3) {
+  // Perform various simplifications.
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  switch (Opcode) {
+  case ISD::CONCAT_VECTORS:
+    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+    // one big BUILD_VECTOR.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+        N2.getOpcode() == ISD::BUILD_VECTOR &&
+        N3.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+                                    N1.getNode()->op_end());
+      Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+      Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
+      return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+    }
+    break;
+  case ISD::SETCC: {
+    // Use FoldSetCC to simplify SETCC's.
+    SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+    if (Simp.getNode()) return Simp;
+    break;
+  }
+  case ISD::SELECT:
+    if (N1C) {
+     if (N1C->getZExtValue())
+        return N2;             // select true, X, Y -> X
+      else
+        return N3;             // select false, X, Y -> Y
+    }
+
+    if (N2 == N3) return N2;   // select C, X, X -> X
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    llvm_unreachable("should use getVectorShuffle constructor!");
+    break;
+  case ISD::INSERT_SUBVECTOR: {
+    SDValue Index = N3;
+    if (VT.isSimple() && N1.getValueType().isSimple()
+        && N2.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             N2.getValueType().isVector() &&
+             "Insert subvector VTs must be a vectors");
+      assert(VT == N1.getValueType() &&
+             "Dest and insert subvector source types must match!");
+      assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Insert subvector must be from smaller vector to larger vector!");
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((N2.getValueType().getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= VT.getVectorNumElements())
+               && "Insert subvector overflow!");
+      }
+
+      // Trivial insertion.
+      if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+        return N2;
+    }
+    break;
+  }
+  case ISD::BITCAST:
+    // Fold bit_convert nodes from a type to themselves.
+    if (N1.getValueType() == VT)
+      return N1;
+    break;
+  }
+
+  // Memoize node if it doesn't produce a flag.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Glue) {
+    SDValue Ops[] = { N1, N2, N3 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4) {
+  SDValue Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4, SDValue N5) {
+  SDValue Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+  SmallVector<SDValue, 8> ArgChains;
+
+  // Include the original chain at the beginning of the list. When this is
+  // used by target LowerCall hooks, this helps legalize find the
+  // CALLSEQ_BEGIN node.
+  ArgChains.push_back(Chain);
+
+  // Add a chain value for each stack argument.
+  for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+       UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+        if (FI->getIndex() < 0)
+          ArgChains.push_back(SDValue(L, 1));
+
+  // Build a tokenfactor for all the chains.
+  return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+                 &ArgChains[0], ArgChains.size());
+}
+
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+                              DebugLoc dl) {
+  assert(Value.getOpcode() != ISD::UNDEF);
+
+  unsigned NumBits = VT.getScalarType().getSizeInBits();
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
+    if (VT.isInteger())
+      return DAG.getConstant(Val, VT);
+    return DAG.getConstantFP(APFloat(Val), VT);
+  }
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  if (NumBits > 8) {
+    // Use a multiplication with 0x010101... to extend the input to the
+    // required length.
+    APInt Magic = SplatByte(NumBits, 0x01);
+    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+  }
+
+  return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+                                  const TargetLowering &TLI,
+                                  std::string &Str, unsigned Offset) {
+  // Handle vector with all elements zero.
+  if (Str.empty()) {
+    if (VT.isInteger())
+      return DAG.getConstant(0, VT);
+    else if (VT == MVT::f32 || VT == MVT::f64)
+      return DAG.getConstantFP(0.0, VT);
+    else if (VT.isVector()) {
+      unsigned NumElts = VT.getVectorNumElements();
+      MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+      return DAG.getNode(ISD::BITCAST, dl, VT,
+                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+                                                             EltVT, NumElts)));
+    } else
+      llvm_unreachable("Expected type!");
+  }
+
+  assert(!VT.isVector() && "Can't handle vector type here!");
+  unsigned NumBits = VT.getSizeInBits();
+  unsigned MSB = NumBits / 8;
+  uint64_t Val = 0;
+  if (TLI.isLittleEndian())
+    Offset = Offset + MSB - 1;
+  for (unsigned i = 0; i != MSB; ++i) {
+    Val = (Val << 8) | (unsigned char)Str[Offset];
+    Offset += TLI.isLittleEndian() ? -1 : 1;
+  }
+  return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+                                      SelectionDAG &DAG) {
+  EVT VT = Base.getValueType();
+  return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+                     VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+  unsigned SrcDelta = 0;
+  GlobalAddressSDNode *G = NULL;
+  if (Src.getOpcode() == ISD::GlobalAddress)
+    G = cast<GlobalAddressSDNode>(Src);
+  else if (Src.getOpcode() == ISD::ADD &&
+           Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+           Src.getOperand(1).getOpcode() == ISD::Constant) {
+    G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+    SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+  }
+  if (!G)
+    return false;
+
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+  if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
+    return true;
+
+  return false;
+}
+
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+                                     unsigned Limit, uint64_t Size,
+                                     unsigned DstAlign, unsigned SrcAlign,
+                                     bool NonScalarIntSafe,
+                                     bool MemcpyStrSrc,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
+  assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+         "Expecting memcpy / memset source to meet alignment requirement!");
+  // If 'SrcAlign' is zero, that means the memory operation does not need to
+  // load the value, i.e. memset or memcpy from constant string. Otherwise,
+  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+  // is the specified alignment of the memory operation. If it is zero, that
+  // means it's possible to change the alignment of the destination.
+  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+  // not need to be loaded.
+  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+                                   NonScalarIntSafe, MemcpyStrSrc,
+                                   DAG.getMachineFunction());
+
+  if (VT == MVT::Other) {
+    if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
+        TLI.allowsUnalignedMemoryAccesses(VT)) {
+      VT = TLI.getPointerTy();
+    } else {
+      switch (DstAlign & 7) {
+      case 0:  VT = MVT::i64; break;
+      case 4:  VT = MVT::i32; break;
+      case 2:  VT = MVT::i16; break;
+      default: VT = MVT::i8;  break;
+      }
+    }
+
+    MVT LVT = MVT::i64;
+    while (!TLI.isTypeLegal(LVT))
+      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+    assert(LVT.isInteger());
+
+    if (VT.bitsGT(LVT))
+      VT = LVT;
+  }
+
+  unsigned NumMemOps = 0;
+  while (Size != 0) {
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    while (VTSize > Size) {
+      // For now, only use non-vector load / store's for the left-over pieces.
+      if (VT.isVector() || VT.isFloatingPoint()) {
+        VT = MVT::i64;
+        while (!TLI.isTypeLegal(VT))
+          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+        VTSize = VT.getSizeInBits() / 8;
+      } else {
+        // This can result in a type that is not legal on the target, e.g.
+        // 1 or 2 bytes on PPC.
+        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+        VTSize >>= 1;
+      }
+    }
+
+    if (++NumMemOps > Limit)
+      return false;
+    MemOps.push_back(VT);
+    Size -= VTSize;
+  }
+
+  return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+                                       SDValue Chain, SDValue Dst,
+                                       SDValue Src, uint64_t Size,
+                                       unsigned Align, bool isVol,
+                                       bool AlwaysInline,
+                                       MachinePointerInfo DstPtrInfo,
+                                       MachinePointerInfo SrcPtrInfo) {
+  // Turn a memcpy of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
+
+  // Expand memcpy to a series of load and store ops if the size operand falls
+  // below a certain threshold.
+  // TODO: In the AlwaysInline case, if the size is big then generate a loop
+  // rather than maybe a humongous number of loads and stores.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::vector<EVT> MemOps;
+  bool DstAlignCanChange = false;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
+  std::string Str;
+  bool CopyFromStr = isMemSrcFromString(Src, Str);
+  bool isZeroStr = CopyFromStr && Str.empty();
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                (isZeroStr ? 0 : SrcAlign),
+                                true, CopyFromStr, DAG, TLI))
+    return SDValue();
+
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
+  SmallVector<SDValue, 8> OutChains;
+  unsigned NumMemOps = MemOps.size();
+  uint64_t SrcOff = 0, DstOff = 0;
+  for (unsigned i = 0; i != NumMemOps; ++i) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    if (CopyFromStr &&
+        (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+      // It's unlikely a store of a vector immediate can be done in a single
+      // instruction. It would require a load from a constantpool first.
+      // We only handle zero vectors here.
+      // FIXME: Handle other cases where store of vector immediate is done in
+      // a single instruction.
+      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+      Store = DAG.getStore(Chain, dl, Value,
+                           getMemBasePlusOffset(Dst, DstOff, DAG),
+                           DstPtrInfo.getWithOffset(DstOff), isVol,
+                           false, Align);
+    } else {
+      // The type might not be legal for the target.  This should only happen
+      // if the type is smaller than a legal type, as on PPC, so the right
+      // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
+      // to Load/Store if NVT==VT.
+      // FIXME does the case above also need this?
+      EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+      assert(NVT.bitsGE(VT));
+      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+                             getMemBasePlusOffset(Src, SrcOff, DAG),
+                             SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
+                             MinAlign(SrcAlign, SrcOff));
+      Store = DAG.getTruncStore(Chain, dl, Value,
+                                getMemBasePlusOffset(Dst, DstOff, DAG),
+                                DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+                                false, Align);
+    }
+    OutChains.push_back(Store);
+    SrcOff += VTSize;
+    DstOff += VTSize;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+                                        SDValue Chain, SDValue Dst,
+                                        SDValue Src, uint64_t Size,
+                                        unsigned Align,  bool isVol,
+                                        bool AlwaysInline,
+                                        MachinePointerInfo DstPtrInfo,
+                                        MachinePointerInfo SrcPtrInfo) {
+  // Turn a memmove of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
+
+  // Expand memmove to a series of load and store ops if the size operand falls
+  // below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::vector<EVT> MemOps;
+  bool DstAlignCanChange = false;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                SrcAlign, true, false, DAG, TLI))
+    return SDValue();
+
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
+  uint64_t SrcOff = 0, DstOff = 0;
+  SmallVector<SDValue, 8> LoadValues;
+  SmallVector<SDValue, 8> LoadChains;
+  SmallVector<SDValue, 8> OutChains;
+  unsigned NumMemOps = MemOps.size();
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    Value = DAG.getLoad(VT, dl, Chain,
+                        getMemBasePlusOffset(Src, SrcOff, DAG),
+                        SrcPtrInfo.getWithOffset(SrcOff), isVol,
+                        false, SrcAlign);
+    LoadValues.push_back(Value);
+    LoadChains.push_back(Value.getValue(1));
+    SrcOff += VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                      &LoadChains[0], LoadChains.size());
+  OutChains.clear();
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    Store = DAG.getStore(Chain, dl, LoadValues[i],
+                         getMemBasePlusOffset(Dst, DstOff, DAG),
+                         DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
+    OutChains.push_back(Store);
+    DstOff += VTSize;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+                               SDValue Chain, SDValue Dst,
+                               SDValue Src, uint64_t Size,
+                               unsigned Align, bool isVol,
+                               MachinePointerInfo DstPtrInfo) {
+  // Turn a memset of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
+
+  // Expand memset to a series of load/store ops if the size operand
+  // falls below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::vector<EVT> MemOps;
+  bool DstAlignCanChange = false;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  bool NonScalarIntSafe =
+    isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
+                                Size, (DstAlignCanChange ? 0 : Align), 0,
+                                NonScalarIntSafe, false, DAG, TLI))
+    return SDValue();
+
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
+  SmallVector<SDValue, 8> OutChains;
+  uint64_t DstOff = 0;
+  unsigned NumMemOps = MemOps.size();
+
+  // Find the largest store and generate the bit pattern for it.
+  EVT LargestVT = MemOps[0];
+  for (unsigned i = 1; i < NumMemOps; i++)
+    if (MemOps[i].bitsGT(LargestVT))
+      LargestVT = MemOps[i];
+  SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+
+    // If this store is smaller than the largest store see whether we can get
+    // the smaller value for free with a truncate.
+    SDValue Value = MemSetValue;
+    if (VT.bitsLT(LargestVT)) {
+      if (!LargestVT.isVector() && !VT.isVector() &&
+          TLI.isTruncateFree(LargestVT, VT))
+        Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+      else
+        Value = getMemsetValue(Src, VT, DAG, dl);
+    }
+    assert(Value.getValueType() == VT && "Value with wrong type.");
+    SDValue Store = DAG.getStore(Chain, dl, Value,
+                                 getMemBasePlusOffset(Dst, DstOff, DAG),
+                                 DstPtrInfo.getWithOffset(DstOff),
+                                 isVol, false, Align);
+    OutChains.push_back(Store);
+    DstOff += VT.getSizeInBits() / 8;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                SDValue Src, SDValue Size,
+                                unsigned Align, bool isVol, bool AlwaysInline,
+                                MachinePointerInfo DstPtrInfo,
+                                MachinePointerInfo SrcPtrInfo) {
+
+  // Check to see if we should lower the memcpy to loads and stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memcpy with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                                             ConstantSize->getZExtValue(),Align,
+                                isVol, false, DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memcpy with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+                                isVol, AlwaysInline,
+                                DstPtrInfo, SrcPtrInfo);
+  if (Result.getNode())
+    return Result;
+
+  // If we really need inline code and the target declined to provide it,
+  // use a (potentially long) sequence of loads and stores.
+  if (AlwaysInline) {
+    assert(ConstantSize && "AlwaysInline requires a constant size!");
+    return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                                   ConstantSize->getZExtValue(), Align, isVol,
+                                   true, DstPtrInfo, SrcPtrInfo);
+  }
+
+  // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+  // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+  // respect volatile, so they may do things like read or write memory
+  // beyond the given memory regions. But fixing this isn't easy, and most
+  // people don't care.
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Node = Dst; Args.push_back(Entry);
+  Entry.Node = Src; Args.push_back(Entry);
+  Entry.Node = Size; Args.push_back(Entry);
+  // FIXME: pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl);
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                 SDValue Src, SDValue Size,
+                                 unsigned Align, bool isVol,
+                                 MachinePointerInfo DstPtrInfo,
+                                 MachinePointerInfo SrcPtrInfo) {
+
+  // Check to see if we should lower the memmove to loads and stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memmove with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result =
+      getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+                               ConstantSize->getZExtValue(), Align, isVol,
+                               false, DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memmove with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+                                 DstPtrInfo, SrcPtrInfo);
+  if (Result.getNode())
+    return Result;
+
+  // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+  // not be safe.  See memcpy above for more details.
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Node = Dst; Args.push_back(Entry);
+  Entry.Node = Src; Args.push_back(Entry);
+  Entry.Node = Size; Args.push_back(Entry);
+  // FIXME:  pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl);
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                SDValue Src, SDValue Size,
+                                unsigned Align, bool isVol,
+                                MachinePointerInfo DstPtrInfo) {
+
+  // Check to see if we should lower the memset to stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memset with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result =
+      getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+                      Align, isVol, DstPtrInfo);
+
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memset with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+                                DstPtrInfo);
+  if (Result.getNode())
+    return Result;
+
+  // Emit a library call.
+  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Dst; Entry.Ty = IntPtrTy;
+  Args.push_back(Entry);
+  // Extend or truncate the argument to be an i32 value for the call.
+  if (Src.getValueType().bitsGT(MVT::i32))
+    Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+  else
+    Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+  Entry.Node = Src;
+  Entry.Ty = Type::getInt32Ty(*getContext());
+  Entry.isSExt = true;
+  Args.push_back(Entry);
+  Entry.Node = Size;
+  Entry.Ty = IntPtrTy;
+  Entry.isSExt = false;
+  Args.push_back(Entry);
+  // FIXME: pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl);
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain, SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachinePointerInfo PtrInfo,
+                                unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachineMemOperand *MMO) {
+  assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+  assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+  EVT VT = Cmp.getValueType();
+
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  FoldingSetNodeID ID;
+  ID.AddInteger(MemVT.getRawBits());
+  SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Cmp, Swp, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                const Value* PtrVal,
+                                unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                MachineMemOperand *MMO) {
+  assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+          Opcode == ISD::ATOMIC_LOAD_SUB ||
+          Opcode == ISD::ATOMIC_LOAD_AND ||
+          Opcode == ISD::ATOMIC_LOAD_OR ||
+          Opcode == ISD::ATOMIC_LOAD_XOR ||
+          Opcode == ISD::ATOMIC_LOAD_NAND ||
+          Opcode == ISD::ATOMIC_LOAD_MIN ||
+          Opcode == ISD::ATOMIC_LOAD_MAX ||
+          Opcode == ISD::ATOMIC_LOAD_UMIN ||
+          Opcode == ISD::ATOMIC_LOAD_UMAX ||
+          Opcode == ISD::ATOMIC_SWAP) &&
+         "Invalid Atomic Op");
+
+  EVT VT = Val.getValueType();
+
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  FoldingSetNodeID ID;
+  ID.AddInteger(MemVT.getRawBits());
+  SDValue Ops[] = {Chain, Ptr, Val};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Val, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+                                     DebugLoc dl) {
+  if (NumOps == 1)
+    return Ops[0];
+
+  SmallVector<EVT, 4> VTs;
+  VTs.reserve(NumOps);
+  for (unsigned i = 0; i < NumOps; ++i)
+    VTs.push_back(Ops[i].getValueType());
+  return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+                 Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+                                  const EVT *VTs, unsigned NumVTs,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
+                                  unsigned Align, bool Vol,
+                                  bool ReadMem, bool WriteMem) {
+  return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+                             MemVT, PtrInfo, Align, Vol,
+                             ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
+                                  unsigned Align, bool Vol,
+                                  bool ReadMem, bool WriteMem) {
+  if (Align == 0)  // Ensure that codegen never sees alignment 0
+    Align = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = 0;
+  if (WriteMem)
+    Flags |= MachineMemOperand::MOStore;
+  if (ReadMem)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Vol)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
+
+  return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachineMemOperand *MMO) {
+  assert((Opcode == ISD::INTRINSIC_VOID ||
+          Opcode == ISD::INTRINSIC_W_CHAIN ||
+          Opcode == ISD::PREFETCH ||
+          (Opcode <= INT_MAX &&
+           (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+         "Opcode is not a memory-accessing opcode!");
+
+  // Memoize the node unless it returns a flag.
+  MemIntrinsicSDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+      return SDValue(E, 0);
+    }
+
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
+  }
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+  // If this is FI+Offset, we can model it.
+  if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+    return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+  // If this is (FI+Offset1)+Offset2, we can model it.
+  if (Ptr.getOpcode() != ISD::ADD ||
+      !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+      !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+    return MachinePointerInfo();
+
+  int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+  return MachinePointerInfo::getFixedStack(FI, Offset+
+                       cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+  // If the 'Offset' value isn't a constant, we can't handle this.
+  if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+    return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+  if (OffsetOp.getOpcode() == ISD::UNDEF)
+    return InferPointerInfo(Ptr);
+  return MachinePointerInfo();
+}
+
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+                      EVT VT, DebugLoc dl, SDValue Chain,
+                      SDValue Ptr, SDValue Offset,
+                      MachinePointerInfo PtrInfo, EVT MemVT,
+                      bool isVolatile, bool isNonTemporal,
+                      unsigned Alignment, const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(VT);
+
+  unsigned Flags = MachineMemOperand::MOLoad;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
+
+  // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+  // clients.
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr, Offset);
+
+  MachineFunction &MF = getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+                            TBAAInfo);
+  return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+                      EVT VT, DebugLoc dl, SDValue Chain,
+                      SDValue Ptr, SDValue Offset, EVT MemVT,
+                      MachineMemOperand *MMO) {
+  if (VT == MemVT) {
+    ExtType = ISD::NON_EXTLOAD;
+  } else if (ExtType == ISD::NON_EXTLOAD) {
+    assert(VT == MemVT && "Non-extending load from different memory type!");
+  } else {
+    // Extending load.
+    assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+           "Should only be an extending load, not truncating!");
+    assert(VT.isInteger() == MemVT.isInteger() &&
+           "Cannot convert from FP to Int or Int -> FP!");
+    assert(VT.isVector() == MemVT.isVector() &&
+           "Cannot use trunc store to convert to or from a vector!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+           "Cannot use trunc store to change the number of vector elements!");
+  }
+
+  bool Indexed = AM != ISD::UNINDEXED;
+  assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+         "Unindexed load with an offset!");
+
+  SDVTList VTs = Indexed ?
+    getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+  SDValue Ops[] = { Chain, Ptr, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(MemVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<LoadSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+                                             MemVT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
+                              SDValue Chain, SDValue Ptr,
+                              MachinePointerInfo PtrInfo,
+                              bool isVolatile, bool isNonTemporal,
+                              unsigned Alignment, const MDNode *TBAAInfo) {
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+                 PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+                                 SDValue Chain, SDValue Ptr,
+                                 MachinePointerInfo PtrInfo, EVT MemVT,
+                                 bool isVolatile, bool isNonTemporal,
+                                 unsigned Alignment, const MDNode *TBAAInfo) {
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+                 PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+                 TBAAInfo);
+}
+
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+                             SDValue Offset, ISD::MemIndexedMode AM) {
+  LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+  assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+         "Load is already a indexed load!");
+  return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+                 LD->getChain(), Base, Offset, LD->getPointerInfo(),
+                 LD->getMemoryVT(),
+                 LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, MachinePointerInfo PtrInfo,
+                               bool isVolatile, bool isNonTemporal,
+                               unsigned Alignment, const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(Val.getValueType());
+
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags,
+                            Val.getValueType().getStoreSize(), Alignment,
+                            TBAAInfo);
+
+  return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, MachineMemOperand *MMO) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
+  EVT VT = Val.getValueType();
+  SDVTList VTs = getVTList(MVT::Other);
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              false, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, MachinePointerInfo PtrInfo,
+                                    EVT SVT,bool isVolatile, bool isNonTemporal,
+                                    unsigned Alignment,
+                                    const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(SVT);
+
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+                            TBAAInfo);
+
+  return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, EVT SVT,
+                                    MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
+
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
+  if (VT == SVT)
+    return getStore(Chain, dl, Val, Ptr, MMO);
+
+  assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+         "Should only be a truncating store, not extending!");
+  assert(VT.isInteger() == SVT.isInteger() &&
+         "Can't do FP-INT conversion!");
+  assert(VT.isVector() == SVT.isVector() &&
+         "Cannot use trunc store to convert to or from a vector!");
+  assert((!VT.isVector() ||
+          VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+         "Cannot use trunc store to change the number of vector elements!");
+
+  SDVTList VTs = getVTList(MVT::Other);
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(SVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              true, SVT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+                              SDValue Offset, ISD::MemIndexedMode AM) {
+  StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+  assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+         "Store is already a indexed store!");
+  SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+  SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(ST->getMemoryVT().getRawBits());
+  ID.AddInteger(ST->getRawSubclassData());
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+                                              ST->isTruncatingStore(),
+                                              ST->getMemoryVT(),
+                                              ST->getMemOperand());
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
+                               SDValue Chain, SDValue Ptr,
+                               SDValue SV,
+                               unsigned Align) {
+  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+  return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              const SDUse *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, DL, VT);
+  case 1: return getNode(Opcode, DL, VT, Ops[0]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+
+  // Copy from an SDUse array into an SDValue array for use with
+  // the regular getNode logic.
+  SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+  return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              const SDValue *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, DL, VT);
+  case 1: return getNode(Opcode, DL, VT, Ops[0]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+
+  switch (Opcode) {
+  default: break;
+  case ISD::SELECT_CC: {
+    assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+    assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+           "LHS and RHS of condition must have same type!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "True and False arms of SelectCC must have same type!");
+    assert(Ops[2].getValueType() == VT &&
+           "select_cc node must be of same type as true and false value!");
+    break;
+  }
+  case ISD::BR_CC: {
+    assert(NumOps == 5 && "BR_CC takes 5 operands!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "LHS/RHS of comparison should match types!");
+    break;
+  }
+  }
+
+  // Memoize nodes.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+
+  if (VT != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+    void *IP = 0;
+
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              const std::vector<EVT> &ResultTys,
+                              const SDValue *Ops, unsigned NumOps) {
+  return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+                 Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              const EVT *VTs, unsigned NumVTs,
+                              const SDValue *Ops, unsigned NumOps) {
+  if (NumVTs == 1)
+    return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+  return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              const SDValue *Ops, unsigned NumOps) {
+  if (VTList.NumVTs == 1)
+    return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+#if 0
+  switch (Opcode) {
+  // FIXME: figure out how to safely handle things like
+  // int foo(int x) { return 1 << (x & 255); }
+  // int bar() { return foo(256); }
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS:
+  case ISD::SHL_PARTS:
+    if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+      return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+    else if (N3.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+        // If the and is only masking out bits that cannot effect the shift,
+        // eliminate the and.
+        unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+        if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+          return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+      }
+    break;
+  }
+#endif
+
+  // Memoize the node unless it returns a flag.
+  SDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    if (NumOps == 1) {
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+    } else if (NumOps == 2) {
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+    } else if (NumOps == 3) {
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
+    } else {
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+    }
+    CSEMap.InsertNode(N, IP);
+  } else {
+    if (NumOps == 1) {
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+    } else if (NumOps == 2) {
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+    } else if (NumOps == 3) {
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
+    } else {
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+    }
+  }
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+  return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1) {
+  SDValue Ops[] = { N1 };
+  return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2) {
+  SDValue Ops[] = { N1, N2 };
+  return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3) {
+  SDValue Ops[] = { N1, N2, N3 };
+  return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4) {
+  SDValue Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4, SDValue N5) {
+  SDValue Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+  return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(2);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  SDVTList Result = makeVTList(Array, 2);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+                          I->VTs[2] == VT3)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(3);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  Array[2] = VT3;
+  SDVTList Result = makeVTList(Array, 3);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+                          I->VTs[2] == VT3 && I->VTs[3] == VT4)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(4);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  Array[2] = VT3;
+  Array[3] = VT4;
+  SDVTList Result = makeVTList(Array, 4);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+  switch (NumVTs) {
+    case 0: llvm_unreachable("Cannot have nodes without results!");
+    case 1: return getVTList(VTs[0]);
+    case 2: return getVTList(VTs[0], VTs[1]);
+    case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+    case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
+    default: break;
+  }
+
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I) {
+    if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+      continue;
+
+    bool NoMatch = false;
+    for (unsigned i = 2; i != NumVTs; ++i)
+      if (VTs[i] != I->VTs[i]) {
+        NoMatch = true;
+        break;
+      }
+    if (!NoMatch)
+      return *I;
+  }
+
+  EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+  std::copy(VTs, VTs+NumVTs, Array);
+  SDVTList Result = makeVTList(Array, NumVTs);
+  VTList.push_back(Result);
+  return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands.  If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists.  If the resultant node does not exist in the DAG, the
+/// input node is returned.  As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+  assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  if (Op == N->getOperand(0)) return N;
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+    return Existing;
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  N->OperandList[0].set(Op);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+  assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+    return N;   // No operands changed, just return the input node.
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+    return Existing;
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  if (N->OperandList[0] != Op1)
+    N->OperandList[0].set(Op1);
+  if (N->OperandList[1] != Op2)
+    N->OperandList[1].set(Op2);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+                   SDValue Op3, SDValue Op4) {
+  SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+  return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+                   SDValue Op3, SDValue Op4, SDValue Op5) {
+  SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+  return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
+  assert(N->getNumOperands() == NumOps &&
+         "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  bool AnyChange = false;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    if (Ops[i] != N->getOperand(i)) {
+      AnyChange = true;
+      break;
+    }
+  }
+
+  // No operands changed, just return the input node.
+  if (!AnyChange) return N;
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+    return Existing;
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  for (unsigned i = 0; i != NumOps; ++i)
+    if (N->OperandList[i] != Ops[i])
+      N->OperandList[i].set(Ops[i]);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+  // Unlike the code in MorphNodeTo that does this, we don't need to
+  // watch for dead nodes here.
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+    SDUse &Use = *I++;
+    Use.set(SDValue());
+  }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1,
+                                   SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1,
+                                   SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, const SDValue *Ops,
+                                   unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, const SDValue *Ops,
+                                   unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3,
+                                   const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+                                   const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1, SDValue Op2,
+                                   SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3,
+                                   SDValue Op1, SDValue Op2,
+                                   SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   SDVTList VTs, const SDValue *Ops,
+                                   unsigned NumOps) {
+  N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+  // Reset the NodeID to -1.
+  N->setNodeId(-1);
+  return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.  Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  SDVTList VTs, const SDValue *Ops,
+                                  unsigned NumOps) {
+  // If an identical node already exists, use it.
+  void *IP = 0;
+  if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return ON;
+  }
+
+  if (!RemoveNodeFromCSEMaps(N))
+    IP = 0;
+
+  // Start the morphing.
+  N->NodeType = Opc;
+  N->ValueList = VTs.VTs;
+  N->NumValues = VTs.NumVTs;
+
+  // Clear the operands list, updating used nodes to remove this from their
+  // use list.  Keep track of any operands that become dead as a result.
+  SmallPtrSet<SDNode*, 16> DeadNodeSet;
+  for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+    SDUse &Use = *I++;
+    SDNode *Used = Use.getNode();
+    Use.set(SDValue());
+    if (Used->use_empty())
+      DeadNodeSet.insert(Used);
+  }
+
+  if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+    // Initialize the memory references information.
+    MN->setMemRefs(0, 0);
+    // If NumOps is larger than the # of operands we can have in a
+    // MachineSDNode, reallocate the operand list.
+    if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+      if (MN->OperandsNeedDelete)
+        delete[] MN->OperandList;
+      if (NumOps > array_lengthof(MN->LocalOperands))
+        // We're creating a final node that will live unmorphed for the
+        // remainder of the current SelectionDAG iteration, so we can allocate
+        // the operands directly out of a pool with no recycling metadata.
+        MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                         Ops, NumOps);
+      else
+        MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+      MN->OperandsNeedDelete = false;
+    } else
+      MN->InitOperands(MN->OperandList, Ops, NumOps);
+  } else {
+    // If NumOps is larger than the # of operands we currently have, reallocate
+    // the operand list.
+    if (NumOps > N->NumOperands) {
+      if (N->OperandsNeedDelete)
+        delete[] N->OperandList;
+      N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+      N->OperandsNeedDelete = true;
+    } else
+      N->InitOperands(N->OperandList, Ops, NumOps);
+  }
+
+  // Delete any nodes that are still dead after adding the uses for the
+  // new operands.
+  if (!DeadNodeSet.empty()) {
+    SmallVector<SDNode *, 16> DeadNodes;
+    for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+         E = DeadNodeSet.end(); I != E; ++I)
+      if ((*I)->use_empty())
+        DeadNodes.push_back(*I);
+    RemoveDeadNodes(DeadNodes);
+  }
+
+  if (IP)
+    CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1,
+                             SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                             EVT VT2, EVT VT3, EVT VT4,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             const std::vector<EVT> &ResultTys,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                             const SDValue *Ops, unsigned NumOps) {
+  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+  MachineSDNode *N;
+  void *IP = 0;
+
+  if (DoCSE) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+    IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return cast<MachineSDNode>(E);
+  }
+
+  // Allocate a new MachineSDNode.
+  N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
+
+  // Initialize the operands list.
+  if (NumOps > array_lengthof(N->LocalOperands))
+    // We're creating a final node that will live unmorphed for the
+    // remainder of the current SelectionDAG iteration, so we can allocate
+    // the operands directly out of a pool with no recycling metadata.
+    N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                    Ops, NumOps);
+  else
+    N->InitOperands(N->LocalOperands, Ops, NumOps);
+  N->OperandsNeedDelete = false;
+
+  if (DoCSE)
+    CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyMachineNode(N);
+#endif
+  return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                     SDValue Operand) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                  VT, Operand, SRIdxVal);
+  return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                    SDValue Operand, SDValue Subreg) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                  VT, Operand, Subreg, SRIdxVal);
+  return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+                                      const SDValue *Ops, unsigned NumOps) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return E;
+  }
+  return NULL;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+/// This class also manages a "downlink" DAGUpdateListener, to forward
+/// messages to ReplaceAllUsesWith's callers.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+  SelectionDAG::DAGUpdateListener *DownLink;
+  SDNode::use_iterator &UI;
+  SDNode::use_iterator &UE;
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    // Increment the iterator as needed.
+    while (UI != UE && N == *UI)
+      ++UI;
+
+    // Then forward the message.
+    if (DownLink) DownLink->NodeDeleted(N, E);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Just forward the message.
+    if (DownLink) DownLink->NodeUpdated(N);
+  }
+
+public:
+  RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+                     SDNode::use_iterator &ui,
+                     SDNode::use_iterator &ue)
+    : DownLink(dl), UI(ui), UE(ue) {}
+};
+
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
+                                      DAGUpdateListener *UpdateListener) {
+  SDNode *From = FromN.getNode();
+  assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+         "Cannot replace with this method!");
+  assert(From != To.getNode() && "Cannot replace uses of with self");
+
+  // Iterate over all the existing uses of From. New uses will be added
+  // to the beginning of the use list, which we avoid visiting.
+  // This specifically avoids visiting uses of From that arise while the
+  // replacement is happening, because any such uses would be the result
+  // of CSE: If an existing node looks like From after one of its operands
+  // is replaced by To, we don't want to replace of all its users with To
+  // too. See PR3018 for more info.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      ++UI;
+      Use.set(To);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+                                      DAGUpdateListener *UpdateListener) {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+    assert((!From->hasAnyUseOfValue(i) ||
+            From->getValueType(i) == To->getValueType(i)) &&
+           "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+  // Handle the trivial case.
+  if (From == To)
+    return;
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      ++UI;
+      Use.setNode(To);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values.  To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+                                      const SDValue *To,
+                                      DAGUpdateListener *UpdateListener) {
+  if (From->getNumValues() == 1)  // Handle the simple case efficiently.
+    return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      const SDValue &ToOp = To[Use.getResNo()];
+      ++UI;
+      Use.set(ToOp);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone.  The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+                                             DAGUpdateListener *UpdateListener){
+  // Handle the really simple, really trivial case efficiently.
+  if (From == To) return;
+
+  // Handle the simple, trivial, case efficiently.
+  if (From.getNode()->getNumValues() == 1) {
+    ReplaceAllUsesWith(From, To, UpdateListener);
+    return;
+  }
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From.getNode()->use_begin(),
+                       UE = From.getNode()->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+    bool UserRemovedFromCSEMaps = false;
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+
+      // Skip uses of different values from the same node.
+      if (Use.getResNo() != From.getResNo()) {
+        ++UI;
+        continue;
+      }
+
+      // If this node hasn't been modified yet, it's still in the CSE maps,
+      // so remove its old self from the CSE maps.
+      if (!UserRemovedFromCSEMaps) {
+        RemoveNodeFromCSEMaps(User);
+        UserRemovedFromCSEMaps = true;
+      }
+
+      ++UI;
+      Use.set(To);
+    } while (UI != UE && *UI == User);
+
+    // We are iterating over all uses of the From node, so if a use
+    // doesn't use the specific value, no changes are made.
+    if (!UserRemovedFromCSEMaps)
+      continue;
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+namespace {
+  /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+  /// to record information about a use.
+  struct UseMemo {
+    SDNode *User;
+    unsigned Index;
+    SDUse *Use;
+  };
+
+  /// operator< - Sort Memos by User.
+  bool operator<(const UseMemo &L, const UseMemo &R) {
+    return (intptr_t)L.User < (intptr_t)R.User;
+  }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone.  The same value
+/// may appear in both the From and To list.  The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+                                              const SDValue *To,
+                                              unsigned Num,
+                                              DAGUpdateListener *UpdateListener){
+  // Handle the simple, trivial case efficiently.
+  if (Num == 1)
+    return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+
+  // Read up all the uses and make records of them. This helps
+  // processing new uses that are introduced during the
+  // replacement process.
+  SmallVector<UseMemo, 4> Uses;
+  for (unsigned i = 0; i != Num; ++i) {
+    unsigned FromResNo = From[i].getResNo();
+    SDNode *FromNode = From[i].getNode();
+    for (SDNode::use_iterator UI = FromNode->use_begin(),
+         E = FromNode->use_end(); UI != E; ++UI) {
+      SDUse &Use = UI.getUse();
+      if (Use.getResNo() == FromResNo) {
+        UseMemo Memo = { *UI, i, &Use };
+        Uses.push_back(Memo);
+      }
+    }
+  }
+
+  // Sort the uses, so that all the uses from a given User are together.
+  std::sort(Uses.begin(), Uses.end());
+
+  for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+       UseIndex != UseIndexEnd; ) {
+    // We know that this user uses some value of From.  If it is the right
+    // value, update it.
+    SDNode *User = Uses[UseIndex].User;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // The Uses array is sorted, so all the uses for a given User
+    // are next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      unsigned i = Uses[UseIndex].Index;
+      SDUse &Use = *Uses[UseIndex].Use;
+      ++UseIndex;
+
+      Use.set(To[i]);
+    } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, UpdateListener);
+  }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+  unsigned DAGSize = 0;
+
+  // SortedPos tracks the progress of the algorithm. Nodes before it are
+  // sorted, nodes after it are unsorted. When the algorithm completes
+  // it is at the end of the list.
+  allnodes_iterator SortedPos = allnodes_begin();
+
+  // Visit all the nodes. Move nodes with no operands to the front of
+  // the list immediately. Annotate nodes that do have operands with their
+  // operand count. Before we do this, the Node Id fields of the nodes
+  // may contain arbitrary values. After, the Node Id fields for nodes
+  // before SortedPos will contain the topological sort index, and the
+  // Node Id fields for nodes At SortedPos and after will contain the
+  // count of outstanding operands.
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+    SDNode *N = I++;
+    checkForCycles(N);
+    unsigned Degree = N->getNumOperands();
+    if (Degree == 0) {
+      // A node with no uses, add it to the result array immediately.
+      N->setNodeId(DAGSize++);
+      allnodes_iterator Q = N;
+      if (Q != SortedPos)
+        SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+      assert(SortedPos != AllNodes.end() && "Overran node list");
+      ++SortedPos;
+    } else {
+      // Temporarily use the Node Id as scratch space for the degree count.
+      N->setNodeId(Degree);
+    }
+  }
+
+  // Visit all the nodes. As we iterate, moves nodes into sorted order,
+  // such that by the time the end is reached all nodes will be sorted.
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+    SDNode *N = I;
+    checkForCycles(N);
+    // N is in sorted position, so all its uses have one less operand
+    // that needs to be sorted.
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI) {
+      SDNode *P = *UI;
+      unsigned Degree = P->getNodeId();
+      assert(Degree != 0 && "Invalid node degree");
+      --Degree;
+      if (Degree == 0) {
+        // All of P's operands are sorted, so P may sorted now.
+        P->setNodeId(DAGSize++);
+        if (P != SortedPos)
+          SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+        assert(SortedPos != AllNodes.end() && "Overran node list");
+        ++SortedPos;
+      } else {
+        // Update P's outstanding operand count.
+        P->setNodeId(Degree);
+      }
+    }
+    if (I == SortedPos) {
+#ifndef NDEBUG
+      SDNode *S = ++I;
+      dbgs() << "Overran sorted position:\n";
+      S->dumprFull();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+
+  assert(SortedPos == AllNodes.end() &&
+         "Topological sort incomplete!");
+  assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+         "First node in topological sort is not the entry token!");
+  assert(AllNodes.front().getNodeId() == 0 &&
+         "First node in topological sort has non-zero id!");
+  assert(AllNodes.front().getNumOperands() == 0 &&
+         "First node in topological sort has operands!");
+  assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+         "Last node in topologic sort has unexpected id!");
+  assert(AllNodes.back().use_empty() &&
+         "Last node in topologic sort has users!");
+  assert(DAGSize == allnodes_size() && "Node count mismatch!");
+  return DAGSize;
+}
+
+/// AssignOrdering - Assign an order to the SDNode.
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
+  assert(SD && "Trying to assign an order to a null node!");
+  Ordering->add(SD, Order);
+}
+
+/// GetOrdering - Get the order for the SDNode.
+unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
+  assert(SD && "Trying to get the order of a null node!");
+  return Ordering->getOrder(SD);
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+  DbgInfo->add(DB, SD, isParameter);
+  if (SD)
+    SD->setHasDebugValue(true);
+}
+
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+  if (From == To || !From.getNode()->getHasDebugValue())
+    return;
+  SDNode *FromNode = From.getNode();
+  SDNode *ToNode = To.getNode();
+  ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
+  SmallVector<SDDbgValue *, 2> ClonedDVs;
+  for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
+       I != E; ++I) {
+    SDDbgValue *Dbg = *I;
+    if (Dbg->getKind() == SDDbgValue::SDNODE) {
+      SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+                                      Dbg->getOffset(), Dbg->getDebugLoc(),
+                                      Dbg->getOrder());
+      ClonedDVs.push_back(Clone);
+    }
+  }
+  for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+         E = ClonedDVs.end(); I != E; ++I)
+    AddDbgValue(*I, ToNode, false);
+}
+
+//===----------------------------------------------------------------------===//
+//                              SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+  DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL,
+                                         const GlobalValue *GA,
+                                         EVT VT, int64_t o, unsigned char TF)
+  : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+  TheGlobal = GA;
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+                     MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(isNonTemporal() == MMO->isNonTemporal() &&
+         "Non-temporal encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+                     const SDValue *Ops, unsigned NumOps, EVT memvt,
+                     MachineMemOperand *mmo)
+   : SDNode(Opc, dl, VTs, Ops, NumOps),
+     MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+  AddNodeIDNode(ID, this);
+}
+
+namespace {
+  struct EVTArray {
+    std::vector<EVT> VTs;
+
+    EVTArray() {
+      VTs.reserve(MVT::LAST_VALUETYPE);
+      for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+        VTs.push_back(MVT((MVT::SimpleValueType)i));
+    }
+  };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+  if (VT.isExtended()) {
+    sys::SmartScopedLock<true> Lock(*VTMutex);
+    return &(*EVTs->insert(VT).first);
+  } else {
+    assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           "Value type out of range!");
+    return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+  }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value.  This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  // TODO: Only iterate over uses of a given value of the node
+  for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    if (UI.getUse().getResNo() == Value) {
+      if (NUses == 0)
+        return false;
+      --NUses;
+    }
+  }
+
+  // Found exactly the right number of uses?
+  return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+    if (UI.getUse().getResNo() == Value)
+      return true;
+
+  return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+  bool Seen = false;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDNode *User = *I;
+    if (User == this)
+      Seen = true;
+    else
+      return false;
+  }
+
+  return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (*this == N->getOperand(i))
+      return true;
+  return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+  for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+    if (this == N->OperandList[i].getNode())
+      return true;
+  return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path.  In practice, this looks
+/// through token factors and non-volatile loads.  In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+                                               unsigned Depth) const {
+  if (*this == Dest) return true;
+
+  // Don't search too deeply, we just want to be able to see through
+  // TokenFactor's etc.
+  if (Depth == 0) return false;
+
+  // If this is a token factor, all inputs to the TF happen in parallel.  If any
+  // of the operands of the TF does not reach dest, then we cannot do the xform.
+  if (getOpcode() == ISD::TokenFactor) {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+        return false;
+    return true;
+  }
+
+  // Loads don't have side effects, look through them.
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+    if (!Ld->isVolatile())
+      return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+  }
+  return false;
+}
+
+/// hasPredecessor - Return true if N is a predecessor of this node.
+/// N is either an operand of this node, or can be reached by recursively
+/// traversing up the operands.
+/// NOTE: This is an expensive method. Use it carefully.
+bool SDNode::hasPredecessor(const SDNode *N) const {
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+  return hasPredecessorHelper(N, Visited, Worklist);
+}
+
+bool SDNode::hasPredecessorHelper(const SDNode *N,
+                                  SmallPtrSet<const SDNode *, 32> &Visited,
+                                  SmallVector<const SDNode *, 16> &Worklist) const {
+  if (Visited.empty()) {
+    Worklist.push_back(this);
+  } else {
+    // Take a look in the visited set. If we've already encountered this node
+    // we needn't search further.
+    if (Visited.count(N))
+      return true;
+  }
+
+  // Haven't visited N yet. Continue the search.
+  while (!Worklist.empty()) {
+    const SDNode *M = Worklist.pop_back_val();
+    for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+      SDNode *Op = M->getOperand(i).getNode();
+      if (Visited.insert(Op))
+        Worklist.push_back(Op);
+      if (Op == N)
+        return true;
+    }
+  }
+
+  return false;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+  assert(Num < NumOperands && "Invalid child # of SDNode!");
+  return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    if (isMachineOpcode()) {
+      if (G)
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getMachineOpcode() < TII->getNumOpcodes())
+            return TII->get(getMachineOpcode()).getName();
+      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+    }
+    if (G) {
+      const TargetLowering &TLI = G->getTargetLoweringInfo();
+      const char *Name = TLI.getTargetNodeName(getOpcode());
+      if (Name) return Name;
+      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+    }
+    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+  case ISD::DELETED_NODE:
+    return "<<Deleted Node!>>";
+#endif
+  case ISD::PREFETCH:      return "Prefetch";
+  case ISD::MEMBARRIER:    return "MemBarrier";
+  case ISD::ATOMIC_CMP_SWAP:    return "AtomicCmpSwap";
+  case ISD::ATOMIC_SWAP:        return "AtomicSwap";
+  case ISD::ATOMIC_LOAD_ADD:    return "AtomicLoadAdd";
+  case ISD::ATOMIC_LOAD_SUB:    return "AtomicLoadSub";
+  case ISD::ATOMIC_LOAD_AND:    return "AtomicLoadAnd";
+  case ISD::ATOMIC_LOAD_OR:     return "AtomicLoadOr";
+  case ISD::ATOMIC_LOAD_XOR:    return "AtomicLoadXor";
+  case ISD::ATOMIC_LOAD_NAND:   return "AtomicLoadNand";
+  case ISD::ATOMIC_LOAD_MIN:    return "AtomicLoadMin";
+  case ISD::ATOMIC_LOAD_MAX:    return "AtomicLoadMax";
+  case ISD::ATOMIC_LOAD_UMIN:   return "AtomicLoadUMin";
+  case ISD::ATOMIC_LOAD_UMAX:   return "AtomicLoadUMax";
+  case ISD::PCMARKER:      return "PCMarker";
+  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+  case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::MDNODE_SDNODE: return "MDNode";
+  case ISD::EntryToken:    return "EntryToken";
+  case ISD::TokenFactor:   return "TokenFactor";
+  case ISD::AssertSext:    return "AssertSext";
+  case ISD::AssertZext:    return "AssertZext";
+
+  case ISD::BasicBlock:    return "BasicBlock";
+  case ISD::VALUETYPE:     return "ValueType";
+  case ISD::Register:      return "Register";
+
+  case ISD::Constant:      return "Constant";
+  case ISD::ConstantFP:    return "ConstantFP";
+  case ISD::GlobalAddress: return "GlobalAddress";
+  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+  case ISD::FrameIndex:    return "FrameIndex";
+  case ISD::JumpTable:     return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR: return "RETURNADDR";
+  case ISD::FRAMEADDR: return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+  case ISD::LSDAADDR: return "LSDAADDR";
+  case ISD::EHSELECTION: return "EHSELECTION";
+  case ISD::EH_RETURN: return "EH_RETURN";
+  case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+  case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
+  case ISD::ConstantPool:  return "ConstantPool";
+  case ISD::ExternalSymbol: return "ExternalSymbol";
+  case ISD::BlockAddress:  return "BlockAddress";
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+    if (IID < Intrinsic::num_intrinsics)
+      return Intrinsic::getName((Intrinsic::ID)IID);
+    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+      return TII->getName(IID);
+    llvm_unreachable("Invalid intrinsic ID");
+  }
+
+  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
+  case ISD::TargetConstant: return "TargetConstant";
+  case ISD::TargetConstantFP:return "TargetConstantFP";
+  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex: return "TargetFrameIndex";
+  case ISD::TargetJumpTable:  return "TargetJumpTable";
+  case ISD::TargetConstantPool:  return "TargetConstantPool";
+  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+  case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+  case ISD::CopyToReg:     return "CopyToReg";
+  case ISD::CopyFromReg:   return "CopyFromReg";
+  case ISD::UNDEF:         return "undef";
+  case ISD::MERGE_VALUES:  return "merge_values";
+  case ISD::INLINEASM:     return "inlineasm";
+  case ISD::EH_LABEL:      return "eh_label";
+  case ISD::HANDLENODE:    return "handlenode";
+
+  // Unary operators
+  case ISD::FABS:   return "fabs";
+  case ISD::FNEG:   return "fneg";
+  case ISD::FSQRT:  return "fsqrt";
+  case ISD::FSIN:   return "fsin";
+  case ISD::FCOS:   return "fcos";
+  case ISD::FTRUNC: return "ftrunc";
+  case ISD::FFLOOR: return "ffloor";
+  case ISD::FCEIL:  return "fceil";
+  case ISD::FRINT:  return "frint";
+  case ISD::FNEARBYINT: return "fnearbyint";
+  case ISD::FEXP:   return "fexp";
+  case ISD::FEXP2:  return "fexp2";
+  case ISD::FLOG:   return "flog";
+  case ISD::FLOG2:  return "flog2";
+  case ISD::FLOG10: return "flog10";
+
+  // Binary operators
+  case ISD::ADD:    return "add";
+  case ISD::SUB:    return "sub";
+  case ISD::MUL:    return "mul";
+  case ISD::MULHU:  return "mulhu";
+  case ISD::MULHS:  return "mulhs";
+  case ISD::SDIV:   return "sdiv";
+  case ISD::UDIV:   return "udiv";
+  case ISD::SREM:   return "srem";
+  case ISD::UREM:   return "urem";
+  case ISD::SMUL_LOHI:  return "smul_lohi";
+  case ISD::UMUL_LOHI:  return "umul_lohi";
+  case ISD::SDIVREM:    return "sdivrem";
+  case ISD::UDIVREM:    return "udivrem";
+  case ISD::AND:    return "and";
+  case ISD::OR:     return "or";
+  case ISD::XOR:    return "xor";
+  case ISD::SHL:    return "shl";
+  case ISD::SRA:    return "sra";
+  case ISD::SRL:    return "srl";
+  case ISD::ROTL:   return "rotl";
+  case ISD::ROTR:   return "rotr";
+  case ISD::FADD:   return "fadd";
+  case ISD::FSUB:   return "fsub";
+  case ISD::FMUL:   return "fmul";
+  case ISD::FDIV:   return "fdiv";
+  case ISD::FMA:    return "fma";
+  case ISD::FREM:   return "frem";
+  case ISD::FCOPYSIGN: return "fcopysign";
+  case ISD::FGETSIGN:  return "fgetsign";
+  case ISD::FPOW:   return "fpow";
+
+  case ISD::FPOWI:  return "fpowi";
+  case ISD::SETCC:       return "setcc";
+  case ISD::VSETCC:      return "vsetcc";
+  case ISD::SELECT:      return "select";
+  case ISD::SELECT_CC:   return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:      return "concat_vectors";
+  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";
+  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
+  case ISD::CARRY_FALSE:         return "carry_false";
+  case ISD::ADDC:        return "addc";
+  case ISD::ADDE:        return "adde";
+  case ISD::SADDO:       return "saddo";
+  case ISD::UADDO:       return "uaddo";
+  case ISD::SSUBO:       return "ssubo";
+  case ISD::USUBO:       return "usubo";
+  case ISD::SMULO:       return "smulo";
+  case ISD::UMULO:       return "umulo";
+  case ISD::SUBC:        return "subc";
+  case ISD::SUBE:        return "sube";
+  case ISD::SHL_PARTS:   return "shl_parts";
+  case ISD::SRA_PARTS:   return "sra_parts";
+  case ISD::SRL_PARTS:   return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND: return "sign_extend";
+  case ISD::ZERO_EXTEND: return "zero_extend";
+  case ISD::ANY_EXTEND:  return "any_extend";
+  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+  case ISD::TRUNCATE:    return "truncate";
+  case ISD::FP_ROUND:    return "fp_round";
+  case ISD::FLT_ROUNDS_: return "flt_rounds";
+  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+  case ISD::FP_EXTEND:   return "fp_extend";
+
+  case ISD::SINT_TO_FP:  return "sint_to_fp";
+  case ISD::UINT_TO_FP:  return "uint_to_fp";
+  case ISD::FP_TO_SINT:  return "fp_to_sint";
+  case ISD::FP_TO_UINT:  return "fp_to_uint";
+  case ISD::BITCAST:     return "bitcast";
+  case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+  case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+  case ISD::CONVERT_RNDSAT: {
+    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+    default: llvm_unreachable("Unknown cvt code!");
+    case ISD::CVT_FF:  return "cvt_ff";
+    case ISD::CVT_FS:  return "cvt_fs";
+    case ISD::CVT_FU:  return "cvt_fu";
+    case ISD::CVT_SF:  return "cvt_sf";
+    case ISD::CVT_UF:  return "cvt_uf";
+    case ISD::CVT_SS:  return "cvt_ss";
+    case ISD::CVT_SU:  return "cvt_su";
+    case ISD::CVT_US:  return "cvt_us";
+    case ISD::CVT_UU:  return "cvt_uu";
+    }
+  }
+
+    // Control flow instructions
+  case ISD::BR:      return "br";
+  case ISD::BRIND:   return "brind";
+  case ISD::BR_JT:   return "br_jt";
+  case ISD::BRCOND:  return "brcond";
+  case ISD::BR_CC:   return "br_cc";
+  case ISD::CALLSEQ_START:  return "callseq_start";
+  case ISD::CALLSEQ_END:    return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:               return "load";
+  case ISD::STORE:              return "store";
+  case ISD::VAARG:              return "vaarg";
+  case ISD::VACOPY:             return "vacopy";
+  case ISD::VAEND:              return "vaend";
+  case ISD::VASTART:            return "vastart";
+  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:    return "extract_element";
+  case ISD::BUILD_PAIR:         return "build_pair";
+  case ISD::STACKSAVE:          return "stacksave";
+  case ISD::STACKRESTORE:       return "stackrestore";
+  case ISD::TRAP:               return "trap";
+
+  // Bit manipulation
+  case ISD::BSWAP:   return "bswap";
+  case ISD::CTPOP:   return "ctpop";
+  case ISD::CTTZ:    return "cttz";
+  case ISD::CTLZ:    return "ctlz";
+
+  // Trampolines
+  case ISD::TRAMPOLINE: return "trampoline";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: llvm_unreachable("Unknown setcc condition!");
+    case ISD::SETOEQ:  return "setoeq";
+    case ISD::SETOGT:  return "setogt";
+    case ISD::SETOGE:  return "setoge";
+    case ISD::SETOLT:  return "setolt";
+    case ISD::SETOLE:  return "setole";
+    case ISD::SETONE:  return "setone";
+
+    case ISD::SETO:    return "seto";
+    case ISD::SETUO:   return "setuo";
+    case ISD::SETUEQ:  return "setue";
+    case ISD::SETUGT:  return "setugt";
+    case ISD::SETUGE:  return "setuge";
+    case ISD::SETULT:  return "setult";
+    case ISD::SETULE:  return "setule";
+    case ISD::SETUNE:  return "setune";
+
+    case ISD::SETEQ:   return "seteq";
+    case ISD::SETGT:   return "setgt";
+    case ISD::SETGE:   return "setge";
+    case ISD::SETLT:   return "setlt";
+    case ISD::SETLE:   return "setle";
+    case ISD::SETNE:   return "setne";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:
+    return "";
+  case ISD::PRE_INC:
+    return "<pre-inc>";
+  case ISD::PRE_DEC:
+    return "<pre-dec>";
+  case ISD::POST_INC:
+    return "<post-inc>";
+  case ISD::POST_DEC:
+    return "<post-dec>";
+  }
+}
+
+std::string ISD::ArgFlagsTy::getArgFlagsString() {
+  std::string S = "< ";
+
+  if (isZExt())
+    S += "zext ";
+  if (isSExt())
+    S += "sext ";
+  if (isInReg())
+    S += "inreg ";
+  if (isSRet())
+    S += "sret ";
+  if (isByVal())
+    S += "byval ";
+  if (isNest())
+    S += "nest ";
+  if (getByValAlign())
+    S += "byval-align:" + utostr(getByValAlign()) + " ";
+  if (getOrigAlign())
+    S += "orig-align:" + utostr(getOrigAlign()) + " ";
+  if (getByValSize())
+    S += "byval-size:" + utostr(getByValSize()) + " ";
+  return S + ">";
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  print(dbgs(), G);
+  dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+  OS << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) OS << ",";
+    if (getValueType(i) == MVT::Other)
+      OS << "ch";
+    else
+      OS << getValueType(i).getEVTString();
+  }
+  OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (llvm::next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
+    OS << "<";
+    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (i) OS << ",";
+      if (Idx < 0)
+        OS << "u";
+      else
+        OS << Idx;
+    }
+    OS << ">";
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    OS << '<' << CSDN->getAPIntValue() << '>';
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+    else {
+      OS << "<APFloat(";
+      CSDN->getValueAPF().bitcastToAPInt().dump();
+      OS << ")>";
+    }
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int64_t offset = GADN->getOffset();
+    OS << '<';
+    WriteAsOperand(OS, GADN->getGlobal());
+    OS << '>';
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    OS << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned int TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      OS << "<" << *CP->getMachineCPVal() << ">";
+    else
+      OS << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    OS << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      OS << LBB->getName() << " ";
+    OS << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    OS << "'" << ES->getSymbol() << "'";
+    if (unsigned int TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      OS << "<" << M->getValue() << ">";
+    else
+      OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    OS << ":" << N->getVT().getEVTString();
+  }
+  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    OS << "<" << *LD->getMemOperand();
+
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD: OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
+    }
+    if (doExt)
+      OS << " from " << LD->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    OS << "<" << *ST->getMemOperand();
+
+    if (ST->isTruncatingStore())
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << "<" << *M->getMemOperand() << ">";
+  } else if (const BlockAddressSDNode *BA =
+               dyn_cast<BlockAddressSDNode>(this)) {
+    OS << "<";
+    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    OS << ", ";
+    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  }
+
+  if (G)
+    if (unsigned Order = G->GetOrdering(this))
+      OS << " [ORD=" << Order << ']';
+
+  if (getNodeId() != -1)
+    OS << " [ID=" << getNodeId() << ']';
+
+  DebugLoc dl = getDebugLoc();
+  if (G && !dl.isUnknown()) {
+    DIScope
+      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+    OS << " dbg:";
+    // Omit the directory, since it's usually long and uninteresting.
+    if (Scope.Verify())
+      OS << Scope.getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << dl.getLine();
+    if (dl.getCol() != 0)
+      OS << ':' << dl.getCol();
+  }
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) OS << ", "; else OS << " ";
+    OS << (void*)getOperand(i).getNode();
+    if (unsigned RN = getOperand(i).getResNo())
+      OS << ":" << RN;
+  }
+  print_details(OS, G);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+                                  const SelectionDAG *G, unsigned depth,
+                                  unsigned indent)
+{
+  if (depth == 0)
+    return;
+
+  OS.indent(indent);
+
+  N->print(OS, G);
+
+  if (depth < 1)
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    // Don't follow chain operands.
+    if (N->getOperand(i).getValueType() == MVT::Other)
+      continue;
+    OS << '\n';
+    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+  }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+                            unsigned depth) const {
+  printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+  printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  dumprWithDepth(G, 10);
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).getNode()->hasOneUse())
+      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+    else
+      dbgs() << "\n" << std::string(indent+2, ' ')
+           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+
+  dbgs() << "\n";
+  dbgs().indent(indent);
+  N->dump(G);
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+  assert(N->getNumValues() == 1 &&
+         "Can't unroll a vector with multiple results!");
+
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  unsigned i;
+  for (i= 0; i != NE; ++i) {
+    for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        // A vector operand; extract a single element.
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                              OperandEltVT,
+                              Operand,
+                              getConstant(i, TLI.getPointerTy()));
+      } else {
+        // A scalar operand; just use it as is.
+        Operands[j] = Operand;
+      }
+    }
+
+    switch (N->getOpcode()) {
+    default:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                &Operands[0], Operands.size()));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+                                getShiftAmountOperand(Operands[0].getValueType(),
+                                                      Operands[1])));
+      break;
+    case ISD::SIGN_EXTEND_INREG:
+    case ISD::FP_ROUND_INREG: {
+      EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                Operands[0],
+                                getValueType(ExtVT)));
+    }
+    }
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(getUNDEF(EltVT));
+
+  return getNode(ISD::BUILD_VECTOR, dl,
+                 EVT::getVectorVT(*getContext(), EltVT, ResNE),
+                 &Scalars[0], Scalars.size());
+}
+
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+                                     unsigned Bytes, int Dist) const {
+  if (LD->getChain() != Base->getChain())
+    return false;
+  EVT VT = LD->getValueType(0);
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LD->getOperand(1);
+  SDValue BaseLoc = Base->getOperand(1);
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+
+  // Handle X+C
+  if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
+
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+  // If this is a GlobalAddress + cst, return the alignment.
+  const GlobalValue *GV;
+  int64_t GVOffset = 0;
+  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+    // If GV has specified alignment, then use it. Otherwise, use the preferred
+    // alignment.
+    unsigned Align = GV->getAlignment();
+    if (!Align) {
+      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+        if (GVar->hasInitializer()) {
+          const TargetData *TD = TLI.getTargetData();
+          Align = TD->getPreferredAlignment(GVar);
+        }
+      }
+    }
+    return MinAlign(Align, GVOffset);
+  }
+
+  // If this is a direct reference to a stack slot, use information about the
+  // stack slot's alignment.
+  int FrameIdx = 1 << 31;
+  int64_t FrameOffset = 0;
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+    FrameIdx = FI->getIndex();
+  } else if (isBaseWithConstantOffset(Ptr) &&
+             isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+    // Handle FI+Cst
+    FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+    FrameOffset = Ptr.getConstantOperandVal(1);
+  }
+
+  if (FrameIdx != (1 << 31)) {
+    const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+    unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+                                    FrameOffset);
+    return FIInfoAlign;
+  }
+
+  return 0;
+}
+
+void SelectionDAG::dump() const {
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I) {
+    const SDNode *N = I;
+    if (!N->hasOneUse() && N != getRoot().getNode())
+      DumpNodes(N, 2, this);
+  }
+
+  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+
+  dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+                       const SelectionDAG *G, VisitedSDNodeSet &once) {
+  if (!once.insert(N))          // If we've been here before, return now.
+    return;
+
+  // Dump the current SDNode, but don't end the line yet.
+  OS << std::string(indent, ' ');
+  N->printr(OS, G);
+
+  // Having printed this SDNode, walk the children:
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+
+    if (i) OS << ",";
+    OS << " ";
+
+    if (child->getNumOperands() == 0) {
+      // This child has no grandchildren; print it inline right here.
+      child->printr(OS, G);
+      once.insert(child);
+    } else {         // Just the address. FIXME: also print the child's opcode.
+      OS << (void*)child;
+      if (unsigned RN = N->getOperand(i).getResNo())
+        OS << ":" << RN;
+    }
+  }
+
+  OS << "\n";
+
+  // Dump children that have grandchildren on their own line(s).
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+    DumpNodesr(OS, child, indent+2, G, once);
+  }
+}
+
+void SDNode::dumpr() const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+  return getGlobal()->getType()->getAddressSpace();
+}
+
+
+const Type *ConstantPoolSDNode::getType() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+                                        APInt &SplatUndef,
+                                        unsigned &SplatBitSize,
+                                        bool &HasAnyUndefs,
+                                        unsigned MinSplatBits,
+                                        bool isBigEndian) {
+  EVT VT = getValueType(0);
+  assert(VT.isVector() && "Expected a vector type");
+  unsigned sz = VT.getSizeInBits();
+  if (MinSplatBits > sz)
+    return false;
+
+  SplatValue = APInt(sz, 0);
+  SplatUndef = APInt(sz, 0);
+
+  // Get the bits.  Bits with undefined values (when the corresponding element
+  // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+  // in SplatValue.  If any of the values are not constant, give up and return
+  // false.
+  unsigned int nOps = getNumOperands();
+  assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+  unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+  for (unsigned j = 0; j < nOps; ++j) {
+    unsigned i = isBigEndian ? nOps-1-j : j;
+    SDValue OpVal = getOperand(i);
+    unsigned BitPos = j * EltBitSize;
+
+    if (OpVal.getOpcode() == ISD::UNDEF)
+      SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+    else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+      SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
+                    zextOrTrunc(sz) << BitPos;
+    else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+      SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+     else
+      return false;
+  }
+
+  // The build_vector is all constants or undefs.  Find the smallest element
+  // size that splats the vector.
+
+  HasAnyUndefs = (SplatUndef != 0);
+  while (sz > 8) {
+
+    unsigned HalfSize = sz / 2;
+    APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+    APInt LowValue = SplatValue.trunc(HalfSize);
+    APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+    APInt LowUndef = SplatUndef.trunc(HalfSize);
+
+    // If the two halves do not match (ignoring undef bits), stop here.
+    if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+        MinSplatBits > HalfSize)
+      break;
+
+    SplatValue = HighValue | LowValue;
+    SplatUndef = HighUndef & LowUndef;
+
+    sz = HalfSize;
+  }
+
+  SplatBitSize = sz;
+  return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+  // Find the first non-undef value in the shuffle mask.
+  unsigned i, e;
+  for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+    /* search */;
+
+  assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+  // Make sure all remaining elements are either undef or the same as the first
+  // non-undef value.
+  for (int Idx = Mask[i]; i != e; ++i)
+    if (Mask[i] >= 0 && Mask[i] != Idx)
+      return false;
+  return true;
+}
+
+#ifdef XDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+                                 SmallPtrSet<const SDNode*, 32> &Visited,
+                                 SmallPtrSet<const SDNode*, 32> &Checked) {
+  // If this node has already been checked, don't check it again.
+  if (Checked.count(N))
+    return;
+
+  // If a node has already been visited on this depth-first walk, reject it as
+  // a cycle.
+  if (!Visited.insert(N)) {
+    dbgs() << "Offending node:\n";
+    N->dumprFull();
+    errs() << "Detected cycle in SelectionDAG\n";
+    abort();
+  }
+
+  for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+
+  Checked.insert(N);
+  Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N) {
+#ifdef XDEBUG
+  assert(N && "Checking nonexistant SDNode");
+  SmallPtrSet<const SDNode*, 32> visited;
+  SmallPtrSet<const SDNode*, 32> checked;
+  checkForCyclesHelper(N, visited, checked);
+#endif
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
+  checkForCycles(DAG->getRoot().getNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 000000000000..81b03ee76a5c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,6602 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SDNodeDbgValue.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+                 cl::desc("Generate low-precision inline sequences "
+                          "for some float libcalls"),
+                 cl::location(LimitFloatPrecision),
+                 cl::init(0));
+
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                      const SDValue *Parts, unsigned NumParts,
+                                      EVT PartVT, EVT ValueVT);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent.  If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
+                                const SDValue *Parts,
+                                unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+  if (ValueVT.isVector())
+    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+
+  assert(NumParts > 0 && "No parts to assemble!");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Val = Parts[0];
+
+  if (NumParts > 1) {
+    // Assemble the value from multiple parts.
+    if (ValueVT.isInteger()) {
+      unsigned PartBits = PartVT.getSizeInBits();
+      unsigned ValueBits = ValueVT.getSizeInBits();
+
+      // Assemble the power of 2 part.
+      unsigned RoundParts = NumParts & (NumParts - 1) ?
+        1 << Log2_32(NumParts) : NumParts;
+      unsigned RoundBits = PartBits * RoundParts;
+      EVT RoundVT = RoundBits == ValueBits ?
+        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+      SDValue Lo, Hi;
+
+      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+      if (RoundParts > 2) {
+        Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+                              PartVT, HalfVT);
+        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+                              RoundParts / 2, PartVT, HalfVT);
+      } else {
+        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+      }
+
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+
+      Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+      if (RoundParts < NumParts) {
+        // Assemble the trailing non-power-of-2 part.
+        unsigned OddParts = NumParts - RoundParts;
+        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+        Hi = getCopyFromParts(DAG, DL,
+                              Parts + RoundParts, OddParts, PartVT, OddVT);
+
+        // Combine the round and odd parts.
+        Lo = Val;
+        if (TLI.isBigEndian())
+          std::swap(Lo, Hi);
+        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+        Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
+                                         TLI.getPointerTy()));
+        Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+        Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+      }
+    } else if (PartVT.isFloatingPoint()) {
+      // FP split into multiple FP parts (for ppcf128)
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+             "Unexpected split");
+      SDValue Lo, Hi;
+      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+    } else {
+      // FP split into integer parts (soft fp)
+      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+             !PartVT.isVector() && "Unexpected split");
+      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+    }
+  }
+
+  // There is now one part, held in Val.  Correct it to match ValueVT.
+  PartVT = Val.getValueType();
+
+  if (PartVT == ValueVT)
+    return Val;
+
+  if (PartVT.isInteger() && ValueVT.isInteger()) {
+    if (ValueVT.bitsLT(PartVT)) {
+      // For a truncate, see if we have any information to
+      // indicate whether the truncated bits will always be
+      // zero or sign-extension.
+      if (AssertOp != ISD::DELETED_NODE)
+        Val = DAG.getNode(AssertOp, DL, PartVT, Val,
+                          DAG.getValueType(ValueVT));
+      return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+    }
+    return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+  }
+
+  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+    // FP_ROUND's are always exact here.
+    if (ValueVT.bitsLT(Val.getValueType()))
+      return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
+                         DAG.getIntPtrConstant(1));
+
+    return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+  }
+
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+  llvm_unreachable("Unknown mismatch!");
+  return SDValue();
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent.  If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                      const SDValue *Parts, unsigned NumParts,
+                                      EVT PartVT, EVT ValueVT) {
+  assert(ValueVT.isVector() && "Not a vector value");
+  assert(NumParts > 0 && "No parts to assemble!");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Val = Parts[0];
+
+  // Handle a multi-element vector.
+  if (NumParts > 1) {
+    EVT IntermediateVT, RegisterVT;
+    unsigned NumIntermediates;
+    unsigned NumRegs =
+    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                               NumIntermediates, RegisterVT);
+    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+    NumParts = NumRegs; // Silence a compiler warning.
+    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+    assert(RegisterVT == Parts[0].getValueType() &&
+           "Part type doesn't match part!");
+
+    // Assemble the parts into intermediate operands.
+    SmallVector<SDValue, 8> Ops(NumIntermediates);
+    if (NumIntermediates == NumParts) {
+      // If the register was not expanded, truncate or copy the value,
+      // as appropriate.
+      for (unsigned i = 0; i != NumParts; ++i)
+        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+                                  PartVT, IntermediateVT);
+    } else if (NumParts > 0) {
+      // If the intermediate type was expanded, build the intermediate
+      // operands from the parts.
+      assert(NumParts % NumIntermediates == 0 &&
+             "Must expand into a divisible number of parts!");
+      unsigned Factor = NumParts / NumIntermediates;
+      for (unsigned i = 0; i != NumIntermediates; ++i)
+        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+                                  PartVT, IntermediateVT);
+    }
+
+    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+    // intermediate operands.
+    Val = DAG.getNode(IntermediateVT.isVector() ?
+                      ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+                      ValueVT, &Ops[0], NumIntermediates);
+  }
+
+  // There is now one part, held in Val.  Correct it to match ValueVT.
+  PartVT = Val.getValueType();
+
+  if (PartVT == ValueVT)
+    return Val;
+
+  if (PartVT.isVector()) {
+    // If the element type of the source/dest vectors are the same, but the
+    // parts vector has more elements than the value vector, then we have a
+    // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
+    // elements we want.
+    if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+      assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+             "Cannot narrow, it would be a lossy transformation");
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+                         DAG.getIntPtrConstant(0));
+    }
+
+    // Vector/Vector bitcast.
+    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+      "Cannot handle this kind of promotion");
+    // Promoted vector extract
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT, Val);
+
+  }
+
+  // Trivial bitcast if the types are the same size and the destination
+  // vector type is legal.
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+      TLI.isTypeLegal(ValueVT))
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+  // Handle cases such as i8 -> <1 x i1>
+  assert(ValueVT.getVectorNumElements() == 1 &&
+         "Only trivial scalar-to-vector conversions should get here!");
+
+  if (ValueVT.getVectorNumElements() == 1 &&
+      ValueVT.getVectorElementType() != PartVT) {
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT.getScalarType(), Val);
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+
+
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+                                 SDValue Val, SDValue *Parts, unsigned NumParts,
+                                 EVT PartVT);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts.  If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
+                           SDValue Val, SDValue *Parts, unsigned NumParts,
+                           EVT PartVT,
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+  EVT ValueVT = Val.getValueType();
+
+  // Handle the vector case separately.
+  if (ValueVT.isVector())
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned PartBits = PartVT.getSizeInBits();
+  unsigned OrigNumParts = NumParts;
+  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+  if (NumParts == 0)
+    return;
+
+  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+  if (PartVT == ValueVT) {
+    assert(NumParts == 1 && "No-op copy with multiple parts!");
+    Parts[0] = Val;
+    return;
+  }
+
+  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+    // If the parts cover more bits than the value has, promote the value.
+    if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+      assert(NumParts == 1 && "Do not know what to promote to!");
+      Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+    } else {
+      assert(PartVT.isInteger() && ValueVT.isInteger() &&
+             "Unknown mismatch!");
+      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+      Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+    }
+  } else if (PartBits == ValueVT.getSizeInBits()) {
+    // Different types of the same size.
+    assert(NumParts == 1 && PartVT != ValueVT);
+    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+    // If the parts cover less bits than value has, truncate the value.
+    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+           "Unknown mismatch!");
+    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+  }
+
+  // The value may have changed - recompute ValueVT.
+  ValueVT = Val.getValueType();
+  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+         "Failed to tile the value with PartVT!");
+
+  if (NumParts == 1) {
+    assert(PartVT == ValueVT && "Type conversion failed!");
+    Parts[0] = Val;
+    return;
+  }
+
+  // Expand the value into multiple parts.
+  if (NumParts & (NumParts - 1)) {
+    // The number of parts is not a power of 2.  Split off and copy the tail.
+    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+           "Do not know what to expand to!");
+    unsigned RoundParts = 1 << Log2_32(NumParts);
+    unsigned RoundBits = RoundParts * PartBits;
+    unsigned OddParts = NumParts - RoundParts;
+    SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+                                 DAG.getIntPtrConstant(RoundBits));
+    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+
+    if (TLI.isBigEndian())
+      // The odd parts were reversed by getCopyToParts - unreverse them.
+      std::reverse(Parts + RoundParts, Parts + NumParts);
+
+    NumParts = RoundParts;
+    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+  }
+
+  // The number of parts is a power of 2.  Repeatedly bisect the value using
+  // EXTRACT_ELEMENT.
+  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+                         EVT::getIntegerVT(*DAG.getContext(),
+                                           ValueVT.getSizeInBits()),
+                         Val);
+
+  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+    for (unsigned i = 0; i < NumParts; i += StepSize) {
+      unsigned ThisBits = StepSize * PartBits / 2;
+      EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+      SDValue &Part0 = Parts[i];
+      SDValue &Part1 = Parts[i+StepSize/2];
+
+      Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+                          ThisVT, Part0, DAG.getIntPtrConstant(1));
+      Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+                          ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+      if (ThisBits == PartBits && ThisVT != PartVT) {
+        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+      }
+    }
+  }
+
+  if (TLI.isBigEndian())
+    std::reverse(Parts, Parts + OrigNumParts);
+}
+
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                 SDValue Val, SDValue *Parts, unsigned NumParts,
+                                 EVT PartVT) {
+  EVT ValueVT = Val.getValueType();
+  assert(ValueVT.isVector() && "Not a vector");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  if (NumParts == 1) {
+    if (PartVT == ValueVT) {
+      // Nothing to do.
+    } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+      // Bitconvert vector->vector case.
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+               PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+      EVT ElementVT = PartVT.getVectorElementType();
+      // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
+      // undef elements.
+      SmallVector<SDValue, 16> Ops;
+      for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                  ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+      for (unsigned i = ValueVT.getVectorNumElements(),
+           e = PartVT.getVectorNumElements(); i != e; ++i)
+        Ops.push_back(DAG.getUNDEF(ElementVT));
+
+      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+      // FIXME: Use CONCAT for 2x -> 4x.
+
+      //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+      //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType().bitsGE(
+                 ValueVT.getVectorElementType()) &&
+               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+      // Promoted vector extract
+      bool Smaller = PartVT.bitsLE(ValueVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                        DL, PartVT, Val);
+    } else{
+      // Vector -> scalar conversion.
+      assert(ValueVT.getVectorNumElements() == 1 &&
+             "Only trivial vector-to-scalar conversions should get here!");
+      Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                        PartVT, Val, DAG.getIntPtrConstant(0));
+
+      bool Smaller = ValueVT.bitsLE(PartVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                         DL, PartVT, Val);
+    }
+
+    Parts[0] = Val;
+    return;
+  }
+
+  // Handle a multi-element vector.
+  EVT IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+                                                IntermediateVT,
+                                                NumIntermediates, RegisterVT);
+  unsigned NumElements = ValueVT.getVectorNumElements();
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  NumParts = NumRegs; // Silence a compiler warning.
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+  // Split the vector into intermediate operands.
+  SmallVector<SDValue, 8> Ops(NumIntermediates);
+  for (unsigned i = 0; i != NumIntermediates; ++i) {
+    if (IntermediateVT.isVector())
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+                           IntermediateVT, Val,
+                   DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
+    else
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                           IntermediateVT, Val, DAG.getIntPtrConstant(i));
+  }
+
+  // Split the intermediate operands into legal parts.
+  if (NumParts == NumIntermediates) {
+    // If the register was not expanded, promote or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, split each the value into
+    // legal parts.
+    assert(NumParts % NumIntermediates == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumParts / NumIntermediates;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+  }
+}
+
+
+
+
+namespace {
+  /// RegsForValue - This struct represents the registers (physical or virtual)
+  /// that a particular set of values is assigned, and the type information
+  /// about the value. The most common situation is to represent one value at a
+  /// time, but struct or array values are handled element-wise as multiple
+  /// values.  The splitting of aggregates is performed recursively, so that we
+  /// never have aggregate-typed registers. The values at this point do not
+  /// necessarily have legal types, so each value may require one or more
+  /// registers of some legal type.
+  ///
+  struct RegsForValue {
+    /// ValueVTs - The value types of the values, which may not be legal, and
+    /// may need be promoted or synthesized from one or more registers.
+    ///
+    SmallVector<EVT, 4> ValueVTs;
+
+    /// RegVTs - The value types of the registers. This is the same size as
+    /// ValueVTs and it records, for each value, what the type of the assigned
+    /// register or registers are. (Individual values are never synthesized
+    /// from more than one type of register.)
+    ///
+    /// With virtual registers, the contents of RegVTs is redundant with TLI's
+    /// getRegisterType member function, however when with physical registers
+    /// it is necessary to have a separate record of the types.
+    ///
+    SmallVector<EVT, 4> RegVTs;
+
+    /// Regs - This list holds the registers assigned to the values.
+    /// Each legal or promoted value requires one register, and each
+    /// expanded value requires multiple registers.
+    ///
+    SmallVector<unsigned, 4> Regs;
+
+    RegsForValue() {}
+
+    RegsForValue(const SmallVector<unsigned, 4> &regs,
+                 EVT regvt, EVT valuevt)
+      : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+                 unsigned Reg, const Type *Ty) {
+      ComputeValueVTs(tli, Ty, ValueVTs);
+
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT ValueVT = ValueVTs[Value];
+        unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+        EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+        for (unsigned i = 0; i != NumRegs; ++i)
+          Regs.push_back(Reg + i);
+        RegVTs.push_back(RegisterVT);
+        Reg += NumRegs;
+      }
+    }
+
+    /// areValueTypesLegal - Return true if types of all the values are legal.
+    bool areValueTypesLegal(const TargetLowering &TLI) {
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT RegisterVT = RegVTs[Value];
+        if (!TLI.isTypeLegal(RegisterVT))
+          return false;
+      }
+      return true;
+    }
+
+    /// append - Add the specified values to this one.
+    void append(const RegsForValue &RHS) {
+      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    }
+
+    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+    /// this value and returns the result as a ValueVTs value.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+                            DebugLoc dl,
+                            SDValue &Chain, SDValue *Flag) const;
+
+    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+    /// specified value into the registers specified by this object.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                       SDValue &Chain, SDValue *Flag) const;
+
+    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+    /// operand list.  This adds the code marker, matching input operand index
+    /// (if applicable), and includes the number of values added into it.
+    void AddInlineAsmOperands(unsigned Kind,
+                              bool HasMatching, unsigned MatchingIdx,
+                              SelectionDAG &DAG,
+                              std::vector<SDValue> &Ops) const;
+  };
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+                                      FunctionLoweringInfo &FuncInfo,
+                                      DebugLoc dl,
+                                      SDValue &Chain, SDValue *Flag) const {
+  // A Value with type {} or [0 x %t] needs no registers.
+  if (ValueVTs.empty())
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Assemble the legal parts into the final values.
+  SmallVector<SDValue, 4> Values(ValueVTs.size());
+  SmallVector<SDValue, 8> Parts;
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    // Copy the legal parts from the registers.
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    Parts.resize(NumRegs);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      SDValue P;
+      if (Flag == 0) {
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+      } else {
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+        *Flag = P.getValue(2);
+      }
+
+      Chain = P.getValue(1);
+      Parts[i] = P;
+
+      // If the source register was virtual and if we know something about it,
+      // add an assert node.
+      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+          !RegisterVT.isInteger() || RegisterVT.isVector())
+        continue;
+
+      const FunctionLoweringInfo::LiveOutInfo *LOI =
+        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+      if (!LOI)
+        continue;
+
+      unsigned RegSize = RegisterVT.getSizeInBits();
+      unsigned NumSignBits = LOI->NumSignBits;
+      unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+
+      // FIXME: We capture more information than the dag can represent.  For
+      // now, just use the tightest assertzext/assertsext possible.
+      bool isSExt = true;
+      EVT FromVT(MVT::Other);
+      if (NumSignBits == RegSize)
+        isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
+      else if (NumZeroBits >= RegSize-1)
+        isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
+      else if (NumSignBits > RegSize-8)
+        isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
+      else if (NumZeroBits >= RegSize-8)
+        isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
+      else if (NumSignBits > RegSize-16)
+        isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
+      else if (NumZeroBits >= RegSize-16)
+        isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+      else if (NumSignBits > RegSize-32)
+        isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
+      else if (NumZeroBits >= RegSize-32)
+        isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+      else
+        continue;
+
+      // Add an assertion node.
+      assert(FromVT != MVT::Other);
+      Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+                             RegisterVT, P, DAG.getValueType(FromVT));
+    }
+
+    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+                                     NumRegs, RegisterVT, ValueVT);
+    Part += NumRegs;
+    Parts.clear();
+  }
+
+  return DAG.getNode(ISD::MERGE_VALUES, dl,
+                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                     &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                                 SDValue &Chain, SDValue *Flag) const {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Get the list of the values's legal parts.
+  unsigned NumRegs = Regs.size();
+  SmallVector<SDValue, 8> Parts(NumRegs);
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+                   &Parts[Part], NumParts, RegisterVT);
+    Part += NumParts;
+  }
+
+  // Copy the parts into the registers.
+  SmallVector<SDValue, 8> Chains(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i) {
+    SDValue Part;
+    if (Flag == 0) {
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+    } else {
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+      *Flag = Part.getValue(1);
+    }
+
+    Chains[i] = Part.getValue(0);
+  }
+
+  if (NumRegs == 1 || Flag)
+    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+    // flagged to it. That is the CopyToReg nodes and the user are considered
+    // a single scheduling unit. If we create a TokenFactor and return it as
+    // chain, then the TokenFactor is both a predecessor (operand) of the
+    // user as well as a successor (the TF operands are flagged to the user).
+    // c1, f1 = CopyToReg
+    // c2, f2 = CopyToReg
+    // c3     = TokenFactor c1, c2
+    // ...
+    //        = op c3, ..., f2
+    Chain = Chains[NumRegs-1];
+  else
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list.  This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+                                        unsigned MatchingIdx,
+                                        SelectionDAG &DAG,
+                                        std::vector<SDValue> &Ops) const {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+  if (HasMatching)
+    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+  SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+  Ops.push_back(Res);
+
+  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+    EVT RegisterVT = RegVTs[Value];
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      assert(Reg < Regs.size() && "Mismatch in # registers expected");
+      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+    }
+  }
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+  AA = &aa;
+  GFI = gfi;
+  TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the current SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+  NodeMap.clear();
+  UnusedArgNodeMap.clear();
+  PendingLoads.clear();
+  PendingExports.clear();
+  CurDebugLoc = DebugLoc();
+  HasTailCall = false;
+}
+
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+  DanglingDebugInfoMap.clear();
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+  if (PendingLoads.empty())
+    return DAG.getRoot();
+
+  if (PendingLoads.size() == 1) {
+    SDValue Root = PendingLoads[0];
+    DAG.setRoot(Root);
+    PendingLoads.clear();
+    return Root;
+  }
+
+  // Otherwise, we have to make a token factor node.
+  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                               &PendingLoads[0], PendingLoads.size());
+  PendingLoads.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+  SDValue Root = DAG.getRoot();
+
+  if (PendingExports.empty())
+    return Root;
+
+  // Turn all of the CopyToReg chains into one factored node.
+  if (Root.getOpcode() != ISD::EntryToken) {
+    unsigned i = 0, e = PendingExports.size();
+    for (; i != e; ++i) {
+      assert(PendingExports[i].getNode()->getNumOperands() > 1);
+      if (PendingExports[i].getNode()->getOperand(0) == Root)
+        break;  // Don't add the root if we already indirectly depend on it.
+    }
+
+    if (i == e)
+      PendingExports.push_back(Root);
+  }
+
+  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                     &PendingExports[0],
+                     PendingExports.size());
+  PendingExports.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+  if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+  DAG.AssignOrdering(Node, SDNodeOrder);
+
+  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+    AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+  // Set up outgoing PHI node register values before emitting the terminator.
+  if (isa<TerminatorInst>(&I))
+    HandlePHINodesInSuccessorBlocks(I.getParent());
+
+  CurDebugLoc = I.getDebugLoc();
+
+  visit(I.getOpcode(), I);
+
+  if (!isa<TerminatorInst>(&I) && !HasTailCall)
+    CopyToExportRegsIfNeeded(&I);
+
+  CurDebugLoc = DebugLoc();
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+  // Note: this doesn't use InstVisitor, because it has to work with
+  // ConstantExpr's in addition to instructions.
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown instruction type encountered!");
+    // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+#include "llvm/Instruction.def"
+  }
+
+  // Assign the ordering to the freshly created DAG nodes.
+  if (NodeMap.count(&I)) {
+    ++SDNodeOrder;
+    AssignOrderingToNode(getValue(&I).getNode());
+  }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+                                                   SDValue Val) {
+  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+  if (DDI.getDI()) {
+    const DbgValueInst *DI = DDI.getDI();
+    DebugLoc dl = DDI.getdl();
+    unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+    MDNode *Variable = DI->getVariable();
+    uint64_t Offset = DI->getOffset();
+    SDDbgValue *SDV;
+    if (Val.getNode()) {
+      if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+        SDV = DAG.getDbgValue(Variable, Val.getNode(),
+                              Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+        DAG.AddDbgValue(SDV, Val.getNode(), false);
+      }
+    } else
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+    DanglingDebugInfoMap[V] = DanglingDebugInfo();
+  }
+}
+
+// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+  // If we already have an SDValue for this value, use it. It's important
+  // to do this first, so that we don't create a CopyFromReg if we already
+  // have a regular SDValue.
+  SDValue &N = NodeMap[V];
+  if (N.getNode()) return N;
+
+  // If there's a virtual register allocated and initialized for this
+  // value, use it.
+  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+  if (It != FuncInfo.ValueMap.end()) {
+    unsigned InReg = It->second;
+    RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+    SDValue Chain = DAG.getEntryNode();
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+    resolveDanglingDebugInfo(V, N);
+    return N;
+  }
+
+  // Otherwise create a new SDValue and remember it.
+  SDValue Val = getValueImpl(V);
+  NodeMap[V] = Val;
+  resolveDanglingDebugInfo(V, Val);
+  return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+  // If we already have an SDValue for this value, use it.
+  SDValue &N = NodeMap[V];
+  if (N.getNode()) return N;
+
+  // Otherwise create a new SDValue and remember it.
+  SDValue Val = getValueImpl(V);
+  NodeMap[V] = Val;
+  resolveDanglingDebugInfo(V, Val);
+  return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    EVT VT = TLI.getValueType(V->getType(), true);
+
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+      return DAG.getConstant(*CI, VT);
+
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+      return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
+
+    if (isa<ConstantPointerNull>(C))
+      return DAG.getConstant(0, TLI.getPointerTy());
+
+    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+      return DAG.getConstantFP(*CFP, VT);
+
+    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+      return DAG.getUNDEF(VT);
+
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      visit(CE->getOpcode(), *CE);
+      SDValue N1 = NodeMap[V];
+      assert(N1.getNode() && "visit didn't populate the NodeMap!");
+      return N1;
+    }
+
+    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+      SmallVector<SDValue, 4> Constants;
+      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+           OI != OE; ++OI) {
+        SDNode *Val = getValue(*OI).getNode();
+        // If the operand is an empty aggregate, there are no values.
+        if (!Val) continue;
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Constants.push_back(SDValue(Val, i));
+      }
+
+      return DAG.getMergeValues(&Constants[0], Constants.size(),
+                                getCurDebugLoc());
+    }
+
+    if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+             "Unknown struct or array constant!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, C->getType(), ValueVTs);
+      unsigned NumElts = ValueVTs.size();
+      if (NumElts == 0)
+        return SDValue(); // empty struct
+      SmallVector<SDValue, 4> Constants(NumElts);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        EVT EltVT = ValueVTs[i];
+        if (isa<UndefValue>(C))
+          Constants[i] = DAG.getUNDEF(EltVT);
+        else if (EltVT.isFloatingPoint())
+          Constants[i] = DAG.getConstantFP(0, EltVT);
+        else
+          Constants[i] = DAG.getConstant(0, EltVT);
+      }
+
+      return DAG.getMergeValues(&Constants[0], NumElts,
+                                getCurDebugLoc());
+    }
+
+    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      return DAG.getBlockAddress(BA, VT);
+
+    const VectorType *VecTy = cast<VectorType>(V->getType());
+    unsigned NumElements = VecTy->getNumElements();
+
+    // Now that we know the number and type of the elements, get that number of
+    // elements into the Ops array based on what kind of constant it is.
+    SmallVector<SDValue, 16> Ops;
+    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+      for (unsigned i = 0; i != NumElements; ++i)
+        Ops.push_back(getValue(CP->getOperand(i)));
+    } else {
+      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+      EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+      SDValue Op;
+      if (EltVT.isFloatingPoint())
+        Op = DAG.getConstantFP(0, EltVT);
+      else
+        Op = DAG.getConstant(0, EltVT);
+      Ops.assign(NumElements, Op);
+    }
+
+    // Create a BUILD_VECTOR node.
+    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                    VT, &Ops[0], Ops.size());
+  }
+
+  // If this is a static alloca, generate it as the frameindex instead of
+  // computation.
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end())
+      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+  }
+
+  // If this is an instruction which fast-isel has deferred, select it now.
+  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+    RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+    SDValue Chain = DAG.getEntryNode();
+    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+  }
+
+  llvm_unreachable("Can't get register for value!");
+  return SDValue();
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+  SDValue Chain = getControlRoot();
+  SmallVector<ISD::OutputArg, 8> Outs;
+  SmallVector<SDValue, 8> OutVals;
+
+  if (!FuncInfo.CanLowerReturn) {
+    unsigned DemoteReg = FuncInfo.DemoteRegister;
+    const Function *F = I.getParent()->getParent();
+
+    // Emit a store of the return value through the virtual register.
+    // Leave Outs empty so that LowerReturn won't try to load return
+    // registers the usual way.
+    SmallVector<EVT, 1> PtrValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+                    PtrValueVTs);
+
+    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+    SDValue RetOp = getValue(I.getOperand(0));
+
+    SmallVector<EVT, 4> ValueVTs;
+    SmallVector<uint64_t, 4> Offsets;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+    unsigned NumValues = ValueVTs.size();
+
+    SmallVector<SDValue, 4> Chains(NumValues);
+    for (unsigned i = 0; i != NumValues; ++i) {
+      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                                RetPtr.getValueType(), RetPtr,
+                                DAG.getIntPtrConstant(Offsets[i]));
+      Chains[i] =
+        DAG.getStore(Chain, getCurDebugLoc(),
+                     SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+                     // FIXME: better loc info would be nice.
+                     Add, MachinePointerInfo(), false, false, 0);
+    }
+
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                        MVT::Other, &Chains[0], NumValues);
+  } else if (I.getNumOperands() != 0) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+    if (NumValues) {
+      SDValue RetOp = getValue(I.getOperand(0));
+      for (unsigned j = 0, f = NumValues; j != f; ++j) {
+        EVT VT = ValueVTs[j];
+
+        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+        const Function *F = I.getParent()->getParent();
+        if (F->paramHasAttr(0, Attribute::SExt))
+          ExtendKind = ISD::SIGN_EXTEND;
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          ExtendKind = ISD::ZERO_EXTEND;
+
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+          VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
+
+        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        SmallVector<SDValue, 4> Parts(NumParts);
+        getCopyToParts(DAG, getCurDebugLoc(),
+                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+                       &Parts[0], NumParts, PartVT, ExtendKind);
+
+        // 'inreg' on function refers to return value
+        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+        if (F->paramHasAttr(0, Attribute::InReg))
+          Flags.setInReg();
+
+        // Propagate extension type if any
+        if (ExtendKind == ISD::SIGN_EXTEND)
+          Flags.setSExt();
+        else if (ExtendKind == ISD::ZERO_EXTEND)
+          Flags.setZExt();
+
+        for (unsigned i = 0; i < NumParts; ++i) {
+          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+                                        /*isfixed=*/true));
+          OutVals.push_back(Parts[i]);
+        }
+      }
+    }
+  }
+
+  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+  CallingConv::ID CallConv =
+    DAG.getMachineFunction().getFunction()->getCallingConv();
+  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+                          Outs, OutVals, getCurDebugLoc(), DAG);
+
+  // Verify that the target's LowerReturn behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerReturn didn't return a valid chain!");
+
+  // Update the DAG with the new chain value resulting from return lowering.
+  DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  // Skip empty types
+  if (V->getType()->isEmptyTy())
+    return;
+
+  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+  if (VMI != FuncInfo.ValueMap.end()) {
+    assert(!V->use_empty() && "Unused value assigned virtual registers!");
+    CopyValueToVirtualRegister(V, VMI->second);
+  }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+  // No need to export constants.
+  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+  // Already exported?
+  if (FuncInfo.isExportedInst(V)) return;
+
+  unsigned Reg = FuncInfo.InitializeRegForValue(V);
+  CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+                                                     const BasicBlock *FromBB) {
+  // The operands of the setcc have to be in this block.  We don't know
+  // how to export them from some other block.
+  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+    // Can export from current BB.
+    if (VI->getParent() == FromBB)
+      return true;
+
+    // Is already exported, noop.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // If this is an argument, we can export it if the BB is the entry block or
+  // if it is already exported.
+  if (isa<Argument>(V)) {
+    if (FromBB == &FromBB->getParent()->getEntryBlock())
+      return true;
+
+    // Otherwise, can only export this if it is already exported.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // Otherwise, constants can always be exported.
+  return true;
+}
+
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) {
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  if (!BPI)
+    return 0;
+  BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock());
+  BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock());
+  return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) {
+  uint32_t weight = getEdgeWeight(Src, Dst);
+  Src->addSuccessor(Dst, weight);
+}
+
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == BB;
+  return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+                                                  MachineBasicBlock *TBB,
+                                                  MachineBasicBlock *FBB,
+                                                  MachineBasicBlock *CurBB,
+                                                  MachineBasicBlock *SwitchBB) {
+  const BasicBlock *BB = CurBB->getBasicBlock();
+
+  // If the leaf of the tree is a comparison, merge the condition into
+  // the caseblock.
+  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+    // The operands of the cmp have to be in this block.  We don't know
+    // how to export them from some other block.  If this is the first block
+    // of the sequence, no exporting is needed.
+    if (CurBB == SwitchBB ||
+        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+      ISD::CondCode Condition;
+      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+        Condition = getICmpCondCode(IC->getPredicate());
+      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+        Condition = getFCmpCondCode(FC->getPredicate());
+      } else {
+        Condition = ISD::SETEQ; // silence warning.
+        llvm_unreachable("Unknown compare instruction");
+      }
+
+      CaseBlock CB(Condition, BOp->getOperand(0),
+                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+      SwitchCases.push_back(CB);
+      return;
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, TBB, FBB, CurBB);
+  SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+                                               MachineBasicBlock *TBB,
+                                               MachineBasicBlock *FBB,
+                                               MachineBasicBlock *CurBB,
+                                               MachineBasicBlock *SwitchBB,
+                                               unsigned Opc) {
+  // If this node is not part of the or/and tree, emit it as a branch.
+  const Instruction *BOp = dyn_cast<Instruction>(Cond);
+  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+      BOp->getParent() != CurBB->getBasicBlock() ||
+      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+    return;
+  }
+
+  //  Create TmpBB after CurBB.
+  MachineFunction::iterator BBI = CurBB;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+  CurBB->getParent()->insert(++BBI, TmpBB);
+
+  if (Opc == Instruction::Or) {
+    // Codegen X | Y as:
+    //   jmp_if_X TBB
+    //   jmp TmpBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+  } else {
+    assert(Opc == Instruction::And && "Unknown merge op!");
+    // Codegen X & Y as:
+    //   jmp_if_X TmpBB
+    //   jmp FBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+    //  This requires creation of TmpBB after CurBB.
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+  }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+  if (Cases.size() != 2) return true;
+
+  // If this is two comparisons of the same values or'd or and'd together, they
+  // will get folded into a single comparison, so don't emit two blocks.
+  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+    return false;
+  }
+
+  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+      Cases[0].CC == Cases[1].CC &&
+      isa<Constant>(Cases[0].CmpRHS) &&
+      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+      return false;
+    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+      return false;
+  }
+
+  return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+  MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+  // Update machine-CFG edges.
+  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = BrMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (I.isUnconditional()) {
+    // Update machine-CFG edges.
+    BrMBB->addSuccessor(Succ0MBB);
+
+    // If this is not a fall-through branch, emit the branch.
+    if (Succ0MBB != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Succ0MBB)));
+
+    return;
+  }
+
+  // If this condition is one of the special cases we handle, do special stuff
+  // now.
+  const Value *CondVal = I.getCondition();
+  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  // If this is a series of conditions that are or'd or and'd together, emit
+  // this as a sequence of branches instead of setcc's with and/or operations.
+  // As long as jumps are not expensive, this should improve performance.
+  // For example, instead of something like:
+  //     cmp A, B
+  //     C = seteq
+  //     cmp D, E
+  //     F = setle
+  //     or C, F
+  //     jnz foo
+  // Emit:
+  //     cmp A, B
+  //     je foo
+  //     cmp D, E
+  //     jle foo
+  //
+  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+    if (!TLI.isJumpExpensive() &&
+        BOp->hasOneUse() &&
+        (BOp->getOpcode() == Instruction::And ||
+         BOp->getOpcode() == Instruction::Or)) {
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+                           BOp->getOpcode());
+      // If the compares in later blocks need to use values not currently
+      // exported from this block, export them now.  This block should always
+      // be the first entry.
+      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+      // Allow some cases to be rejected.
+      if (ShouldEmitAsBranches(SwitchCases)) {
+        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+        }
+
+        // Emit the branch for this block.
+        visitSwitchCase(SwitchCases[0], BrMBB);
+        SwitchCases.erase(SwitchCases.begin());
+        return;
+      }
+
+      // Okay, we decided not to do this, remove any inserted MBB's and clear
+      // SwitchCases.
+      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+      SwitchCases.clear();
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, Succ0MBB, Succ1MBB, BrMBB);
+
+  // Use visitSwitchCase to actually insert the fast branch sequence for this
+  // cond branch.
+  visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+                                          MachineBasicBlock *SwitchBB) {
+  SDValue Cond;
+  SDValue CondLHS = getValue(CB.CmpLHS);
+  DebugLoc dl = getCurDebugLoc();
+
+  // Build the setcc now.
+  if (CB.CmpMHS == NULL) {
+    // Fold "(X == true)" to X and "(X == false)" to !X to
+    // handle common cases produced by branch lowering.
+    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+        CB.CC == ISD::SETEQ)
+      Cond = CondLHS;
+    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+             CB.CC == ISD::SETEQ) {
+      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+    } else
+      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+  } else {
+    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+    SDValue CmpOp = getValue(CB.CmpMHS);
+    EVT VT = CmpOp.getValueType();
+
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+                          ISD::SETLE);
+    } else {
+      SDValue SUB = DAG.getNode(ISD::SUB, dl,
+                                VT, CmpOp, DAG.getConstant(Low, VT));
+      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+    }
+  }
+
+  // Update successor info
+  addSuccessorWithWeight(SwitchBB, CB.TrueBB);
+  addSuccessorWithWeight(SwitchBB, CB.FalseBB);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // If the lhs block is the next block, invert the condition so that we can
+  // fall through to the lhs instead of the rhs block.
+  if (CB.TrueBB == NextBlock) {
+    std::swap(CB.TrueBB, CB.FalseBB);
+    SDValue True = DAG.getConstant(1, Cond.getValueType());
+    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+  }
+
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+                               MVT::Other, getControlRoot(), Cond,
+                               DAG.getBasicBlock(CB.TrueBB));
+
+  // Insert the false branch. Do this even if it's a fall through branch,
+  // this makes it easier to do DAG optimizations which require inverting
+  // the branch condition.
+  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                       DAG.getBasicBlock(CB.FalseBB));
+
+  DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+  // Emit the code for the jump table
+  assert(JT.Reg != -1U && "Should lower JT Header first!");
+  EVT PTy = TLI.getPointerTy();
+  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                     JT.Reg, PTy);
+  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+                                    MVT::Other, Index.getValue(1),
+                                    Table, Index);
+  DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+                                               JumpTableHeader &JTH,
+                                               MachineBasicBlock *SwitchBB) {
+  // Subtract the lowest switch case value from the value being switched on and
+  // conditional branch to default mbb if the result is greater than the
+  // difference between smallest and largest cases.
+  SDValue SwitchOp = getValue(JTH.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(JTH.First, VT));
+
+  // The SDNode we just created, which holds the value being switched on minus
+  // the smallest case value, needs to be copied to a virtual register so it
+  // can be used as an index into the jump table in a subsequent basic block.
+  // This value may be smaller or larger than the target's pointer type, and
+  // therefore require extension or truncating.
+  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
+
+  unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    JumpTableReg, SwitchOp);
+  JT.Reg = JumpTableReg;
+
+  // Emit the range check for the jump table, and branch to the default block
+  // for the switch statement if the value being switched on exceeds the largest
+  // case in the switch.
+  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+                             TLI.getSetCCResultType(Sub.getValueType()), Sub,
+                             DAG.getConstant(JTH.Last-JTH.First,VT),
+                             ISD::SETUGT);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                               MVT::Other, CopyTo, CMP,
+                               DAG.getBasicBlock(JT.Default));
+
+  if (JT.MBB != NextBlock)
+    BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+                         DAG.getBasicBlock(JT.MBB));
+
+  DAG.setRoot(BrCond);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+                                             MachineBasicBlock *SwitchBB) {
+  // Subtract the minimum value
+  SDValue SwitchOp = getValue(B.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(B.First, VT));
+
+  // Check range
+  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+                                  TLI.getSetCCResultType(Sub.getValueType()),
+                                  Sub, DAG.getConstant(B.Range, VT),
+                                  ISD::SETUGT);
+
+  // Determine the type of the test operands.
+  bool UsePtrType = false;
+  if (!TLI.isTypeLegal(VT))
+    UsePtrType = true;
+  else {
+    for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+      if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+        // Switch table case range are encoded into series of masks.
+        // Just use pointer type, it's guaranteed to fit.
+        UsePtrType = true;
+        break;
+      }
+  }
+  if (UsePtrType) {
+    VT = TLI.getPointerTy();
+    Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+  }
+
+  B.RegVT = VT;
+  B.Reg = FuncInfo.CreateReg(VT);
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    B.Reg, Sub);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+  addSuccessorWithWeight(SwitchBB, B.Default);
+  addSuccessorWithWeight(SwitchBB, MBB);
+
+  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                                MVT::Other, CopyTo, RangeCmp,
+                                DAG.getBasicBlock(B.Default));
+
+  if (MBB != NextBlock)
+    BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+                          DAG.getBasicBlock(MBB));
+
+  DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+                                           MachineBasicBlock* NextMBB,
+                                           unsigned Reg,
+                                           BitTestCase &B,
+                                           MachineBasicBlock *SwitchBB) {
+  EVT VT = BB.RegVT;
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                       Reg, VT);
+  SDValue Cmp;
+  unsigned PopCount = CountPopulation_64(B.Mask);
+  if (PopCount == 1) {
+    // Testing for a single bit; just compare the shift count with what it
+    // would need to be to shift a 1 bit in that position.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
+                       ISD::SETEQ);
+  } else if (PopCount == BB.Range) {
+    // There is only one zero bit in the range, test for it directly.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+                       ISD::SETNE);
+  } else {
+    // Make desired shift
+    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+                                    DAG.getConstant(1, VT), ShiftOp);
+
+    // Emit bit tests and jumps
+    SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       AndOp, DAG.getConstant(0, VT),
+                       ISD::SETNE);
+  }
+
+  addSuccessorWithWeight(SwitchBB, B.TargetBB);
+  addSuccessorWithWeight(SwitchBB, NextMBB);
+
+  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              Cmp, DAG.getBasicBlock(B.TargetBB));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (NextMBB != NextBlock)
+    BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+                        DAG.getBasicBlock(NextMBB));
+
+  DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+  // Retrieve successors.
+  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  const Value *Callee(I.getCalledValue());
+  if (isa<InlineAsm>(Callee))
+    visitInlineAsm(&I);
+  else
+    LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+  // If the value of the invoke is used outside of its defining block, make it
+  // available as a virtual register.
+  CopyToExportRegsIfNeeded(&I);
+
+  // Update successor info
+  InvokeMBB->addSuccessor(Return);
+  InvokeMBB->addSuccessor(LandingPad);
+
+  // Drop into normal successor.
+  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+                                                 CaseRecVector& WorkList,
+                                                 const Value* SV,
+                                                 MachineBasicBlock *Default,
+                                                 MachineBasicBlock *SwitchBB) {
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Size is the number of Cases represented by this range.
+  size_t Size = CR.Range.second - CR.Range.first;
+  if (Size > 3)
+    return false;
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // If any two of the cases has the same destination, and if one value
+  // is the same as the other, but has one bit unset that the other has set,
+  // use bit manipulation to do two compares at once.  For example:
+  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+  // TODO: Handle cases where CR.CaseBB != SwitchBB.
+  if (Size == 2 && CR.CaseBB == SwitchBB) {
+    Case &Small = *CR.Range.first;
+    Case &Big = *(CR.Range.second-1);
+
+    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+      // Check that there is only one bit different.
+      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+          (SmallValue | BigValue) == BigValue) {
+        // Isolate the common bit.
+        APInt CommonBit = BigValue & ~SmallValue;
+        assert((SmallValue | CommonBit) == BigValue &&
+               CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+        SDValue CondLHS = getValue(SV);
+        EVT VT = CondLHS.getValueType();
+        DebugLoc DL = getCurDebugLoc();
+
+        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                 DAG.getConstant(CommonBit, VT));
+        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+                                    Or, DAG.getConstant(BigValue, VT),
+                                    ISD::SETEQ);
+
+        // Update successor info.
+        SwitchBB->addSuccessor(Small.BB);
+        SwitchBB->addSuccessor(Default);
+
+        // Insert the true branch.
+        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+                                     getControlRoot(), Cond,
+                                     DAG.getBasicBlock(Small.BB));
+
+        // Insert the false branch.
+        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+                             DAG.getBasicBlock(Default));
+
+        DAG.setRoot(BrCond);
+        return true;
+      }
+    }
+  }
+
+  // Rearrange the case blocks so that the last one falls through if possible.
+  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+    // The last case block won't fall through into 'NextBlock' if we emit the
+    // branches in this order.  See if rearranging a case value would help.
+    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+      if (I->BB == NextBlock) {
+        std::swap(*I, BackCase);
+        break;
+      }
+    }
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the Case's target mbb if the value being switched on SV is equal
+  // to C.
+  MachineBasicBlock *CurBlock = CR.CaseBB;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+    MachineBasicBlock *FallThrough;
+    if (I != E-1) {
+      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+      CurMF->insert(BBI, FallThrough);
+
+      // Put SV in a virtual register to make it available from the new blocks.
+      ExportFromCurrentBlock(SV);
+    } else {
+      // If the last case doesn't match, go to the default block.
+      FallThrough = Default;
+    }
+
+    const Value *RHS, *LHS, *MHS;
+    ISD::CondCode CC;
+    if (I->High == I->Low) {
+      // This is just small small case range :) containing exactly 1 case
+      CC = ISD::SETEQ;
+      LHS = SV; RHS = I->High; MHS = NULL;
+    } else {
+      CC = ISD::SETLE;
+      LHS = I->Low; MHS = SV; RHS = I->High;
+    }
+    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+    // If emitting the first comparison, just call visitSwitchCase to emit the
+    // code into the current block.  Otherwise, push the CaseBlock onto the
+    // vector to be later processed by SDISel, and insert the node's MBB
+    // before the next MBB.
+    if (CurBlock == SwitchBB)
+      visitSwitchCase(CB, SwitchBB);
+    else
+      SwitchCases.push_back(CB);
+
+    CurBlock = FallThrough;
+  }
+
+  return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return !DisableJumpTables &&
+          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
+  return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
+                                             CaseRecVector& WorkList,
+                                             const Value* SV,
+                                             MachineBasicBlock* Default,
+                                             MachineBasicBlock *SwitchBB) {
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  if (!areJTsAllowed(TLI) || TSize.ult(4))
+    return false;
+
+  APInt Range = ComputeRange(First, Last);
+  double Density = TSize.roundToDouble() / Range.roundToDouble();
+  if (Density < 0.4)
+    return false;
+
+  DEBUG(dbgs() << "Lowering jump table\n"
+               << "First entry: " << First << ". Last entry: " << Last << '\n'
+               << "Range: " << Range
+               << ". Size: " << TSize << ". Density: " << Density << "\n\n");
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Create a new basic block to hold the code for loading the address
+  // of the jump table, and jumping to it.  Update successor information;
+  // we will either branch to the default case for the switch, or the jump
+  // table.
+  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+  CurMF->insert(BBI, JumpTableBB);
+
+  addSuccessorWithWeight(CR.CaseBB, Default);
+  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
+
+  // Build a vector of destination BBs, corresponding to each target
+  // of the jump table. If the value of the jump table slot corresponds to
+  // a case statement, push the case's BB onto the vector, otherwise, push
+  // the default BB.
+  std::vector<MachineBasicBlock*> DestBBs;
+  APInt TEI = First;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+    const APInt &High = cast<ConstantInt>(I->High)->getValue();
+
+    if (Low.sle(TEI) && TEI.sle(High)) {
+      DestBBs.push_back(I->BB);
+      if (TEI==High)
+        ++I;
+    } else {
+      DestBBs.push_back(Default);
+    }
+  }
+
+  // Update successor info. Add one edge to each unique successor.
+  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+         E = DestBBs.end(); I != E; ++I) {
+    if (!SuccsHandled[(*I)->getNumber()]) {
+      SuccsHandled[(*I)->getNumber()] = true;
+      addSuccessorWithWeight(JumpTableBB, *I);
+    }
+  }
+
+  // Create a jump table index for this jump table.
+  unsigned JTEncoding = TLI.getJumpTableEncoding();
+  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+                       ->createJumpTableIndex(DestBBs);
+
+  // Set the jump table information so that we can codegen it as a second
+  // MachineBasicBlock
+  JumpTable JT(-1U, JTI, JumpTableBB, Default);
+  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+  if (CR.CaseBB == SwitchBB)
+    visitJumpTableHeader(JT, JTH, SwitchBB);
+
+  JTCases.push_back(JumpTableBlock(JTH, JT));
+
+  return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+                                                  CaseRecVector& WorkList,
+                                                  const Value* SV,
+                                                  MachineBasicBlock *Default,
+                                                  MachineBasicBlock *SwitchBB) {
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+  double FMetric = 0;
+  CaseItr Pivot = CR.Range.first + Size/2;
+
+  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+  // (heuristically) allow us to emit JumpTable's later.
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  APInt LSize = FrontCase.size();
+  APInt RSize = TSize-LSize;
+  DEBUG(dbgs() << "Selecting best pivot: \n"
+               << "First: " << First << ", Last: " << Last <<'\n'
+               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+       J!=E; ++I, ++J) {
+    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+    APInt Range = ComputeRange(LEnd, RBegin);
+    assert((Range - 2ULL).isNonNegative() &&
+           "Invalid case distance");
+    // Use volatile double here to avoid excess precision issues on some hosts,
+    // e.g. that use 80-bit X87 registers.
+    volatile double LDensity =
+       (double)LSize.roundToDouble() /
+                           (LEnd - First + 1ULL).roundToDouble();
+    volatile double RDensity =
+      (double)RSize.roundToDouble() /
+                           (Last - RBegin + 1ULL).roundToDouble();
+    double Metric = Range.logBase2()*(LDensity+RDensity);
+    // Should always split in some non-trivial place
+    DEBUG(dbgs() <<"=>Step\n"
+                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+                 << "LDensity: " << LDensity
+                 << ", RDensity: " << RDensity << '\n'
+                 << "Metric: " << Metric << '\n');
+    if (FMetric < Metric) {
+      Pivot = J;
+      FMetric = Metric;
+      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
+    }
+
+    LSize += J->size();
+    RSize -= J->size();
+  }
+  if (areJTsAllowed(TLI)) {
+    // If our case is dense we *really* should handle it earlier!
+    assert((FMetric > 0) && "Should handle dense range earlier!");
+  } else {
+    Pivot = CR.Range.first + Size/2;
+  }
+
+  CaseRange LHSR(CR.Range.first, Pivot);
+  CaseRange RHSR(Pivot, CR.Range.second);
+  Constant *C = Pivot->Low;
+  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+  // We know that we branch to the LHS if the Value being switched on is
+  // less than the Pivot value, C.  We use this to optimize our binary
+  // tree a bit, by recognizing that if SV is greater than or equal to the
+  // LHS's Case Value, and that Case Value is exactly one less than the
+  // Pivot's Value, then we can branch directly to the LHS's Target,
+  // rather than creating a leaf node for it.
+  if ((LHSR.second - LHSR.first) == 1 &&
+      LHSR.first->High == CR.GE &&
+      cast<ConstantInt>(C)->getValue() ==
+      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+    TrueBB = LHSR.first->BB;
+  } else {
+    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, TrueBB);
+    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Similar to the optimization above, if the Value being switched on is
+  // known to be less than the Constant CR.LT, and the current Case Value
+  // is CR.LT - 1, then we can branch directly to the target block for
+  // the current Case Value, rather than emitting a RHS leaf node for it.
+  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+    FalseBB = RHSR.first->BB;
+  } else {
+    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, FalseBB);
+    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the LHS node if the value being switched on SV is less than C.
+  // Otherwise, branch to LHS.
+  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+  if (CR.CaseBB == SwitchBB)
+    visitSwitchCase(CB, SwitchBB);
+  else
+    SwitchCases.push_back(CB);
+
+  return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+                                                   CaseRecVector& WorkList,
+                                                   const Value* SV,
+                                                   MachineBasicBlock* Default,
+                                                   MachineBasicBlock *SwitchBB){
+  EVT PTy = TLI.getPointerTy();
+  unsigned IntPtrBits = PTy.getSizeInBits();
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // If target does not have legal shift left, do not emit bit tests at all.
+  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+    return false;
+
+  size_t numCmps = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I) {
+    // Single case counts one, case range - two.
+    numCmps += (I->Low == I->High ? 1 : 2);
+  }
+
+  // Count unique destinations
+  SmallSet<MachineBasicBlock*, 4> Dests;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    Dests.insert(I->BB);
+    if (Dests.size() > 3)
+      // Don't bother the code below, if there are too much unique destinations
+      return false;
+  }
+  DEBUG(dbgs() << "Total number of unique destinations: "
+        << Dests.size() << '\n'
+        << "Total number of comparisons: " << numCmps << '\n');
+
+  // Compute span of values.
+  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+  APInt cmpRange = maxValue - minValue;
+
+  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
+               << "Low bound: " << minValue << '\n'
+               << "High bound: " << maxValue << '\n');
+
+  if (cmpRange.uge(IntPtrBits) ||
+      (!(Dests.size() == 1 && numCmps >= 3) &&
+       !(Dests.size() == 2 && numCmps >= 5) &&
+       !(Dests.size() >= 3 && numCmps >= 6)))
+    return false;
+
+  DEBUG(dbgs() << "Emitting bit tests\n");
+  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+  // Optimize the case where all the case values fit in a
+  // word without having to subtract minValue. In this case,
+  // we can optimize away the subtraction.
+  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
+    cmpRange = maxValue;
+  } else {
+    lowBound = minValue;
+  }
+
+  CaseBitsVector CasesBits;
+  unsigned i, count = 0;
+
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    MachineBasicBlock* Dest = I->BB;
+    for (i = 0; i < count; ++i)
+      if (Dest == CasesBits[i].BB)
+        break;
+
+    if (i == count) {
+      assert((count < 3) && "Too much destinations to test!");
+      CasesBits.push_back(CaseBits(0, Dest, 0));
+      count++;
+    }
+
+    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+    uint64_t lo = (lowValue - lowBound).getZExtValue();
+    uint64_t hi = (highValue - lowBound).getZExtValue();
+
+    for (uint64_t j = lo; j <= hi; j++) {
+      CasesBits[i].Mask |=  1ULL << j;
+      CasesBits[i].Bits++;
+    }
+
+  }
+  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+  BitTestInfo BTC;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  DEBUG(dbgs() << "Cases:\n");
+  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
+                 << ", Bits: " << CasesBits[i].Bits
+                 << ", BB: " << CasesBits[i].BB << '\n');
+
+    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, CaseBB);
+    BTC.push_back(BitTestCase(CasesBits[i].Mask,
+                              CaseBB,
+                              CasesBits[i].BB));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  BitTestBlock BTB(lowBound, cmpRange, SV,
+                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
+                   CR.CaseBB, Default, BTC);
+
+  if (CR.CaseBB == SwitchBB)
+    visitBitTestHeader(BTB, SwitchBB);
+
+  BitTestCases.push_back(BTB);
+
+  return true;
+}
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+                                       const SwitchInst& SI) {
+  size_t numCmps = 0;
+
+  // Start with "simple" cases
+  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    Cases.push_back(Case(SI.getSuccessorValue(i),
+                         SI.getSuccessorValue(i),
+                         SMBB));
+  }
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size() >= 2)
+    // Must recompute end() each iteration because it may be
+    // invalidated by erase if we hold on to it
+    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+         J != Cases.end(); ) {
+      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+      MachineBasicBlock* nextBB = J->BB;
+      MachineBasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+                                           MachineBasicBlock *Last) {
+  // Update JTCases.
+  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+    if (JTCases[i].first.HeaderBB == First)
+      JTCases[i].first.HeaderBB = Last;
+
+  // Update BitTestCases.
+  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+    if (BitTestCases[i].Parent == First)
+      BitTestCases[i].Parent = Last;
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // If there is only the default destination, branch to it if it is not the
+  // next basic block.  Otherwise, just fall through.
+  if (SI.getNumOperands() == 2) {
+    // Update machine-CFG edges.
+
+    // If this is not a fall-through branch, emit the branch.
+    SwitchMBB->addSuccessor(Default);
+    if (Default != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Default)));
+
+    return;
+  }
+
+  // If there are any non-default case statements, create a vector of Cases
+  // representing each one, and sort the vector so that we can efficiently
+  // create a binary search tree from them.
+  CaseVector Cases;
+  size_t numCmps = Clusterify(Cases, SI);
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << '\n');
+  numCmps = 0;
+
+  // Get the Value to be switched on and default basic blocks, which will be
+  // inserted into CaseBlock records, representing basic blocks in the binary
+  // search tree.
+  const Value *SV = SI.getOperand(0);
+
+  // Push the initial CaseRec onto the worklist
+  CaseRecVector WorkList;
+  WorkList.push_back(CaseRec(SwitchMBB,0,0,
+                             CaseRange(Cases.begin(),Cases.end())));
+
+  while (!WorkList.empty()) {
+    // Grab a record representing a case range to process off the worklist
+    CaseRec CR = WorkList.back();
+    WorkList.pop_back();
+
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+      continue;
+
+    // If the range has few cases (two or less) emit a series of specific
+    // tests.
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
+      continue;
+
+    // If the switch has more than 5 blocks, and at least 40% dense, and the
+    // target supports indirect branches, then emit a jump table rather than
+    // lowering the switch to a binary tree of conditional branches.
+    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+      continue;
+
+    // Emit binary tree. We need to pick a pivot, and push left and right ranges
+    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
+  }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+  // Update machine-CFG edges with unique successors.
+  SmallVector<BasicBlock*, 32> succs;
+  succs.reserve(I.getNumSuccessors());
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
+    succs.push_back(I.getSuccessor(i));
+  array_pod_sort(succs.begin(), succs.end());
+  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
+  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+    addSuccessorWithWeight(IndirectBrMBB, Succ);
+  }
+
+  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitFSub(const User &I) {
+  // -0.0 - X --> fneg
+  const Type *Ty = I.getType();
+  if (isa<Constant>(I.getOperand(0)) &&
+      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+    SDValue Op2 = getValue(I.getOperand(1));
+    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                             Op2.getValueType(), Op2));
+    return;
+  }
+
+  visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+  // Coerce the shift amount to the right type if we can.
+  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+    unsigned ShiftSize = ShiftTy.getSizeInBits();
+    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    DebugLoc DL = getCurDebugLoc();
+
+    // If the operand is smaller than the shift count type, promote it.
+    if (ShiftSize > Op2Size)
+      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
+    // If the operand is larger than the shift count type but the shift
+    // count type has enough bits to represent any shift value, truncate
+    // it now. This is a common case and it exposes the truncate to
+    // optimization early.
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+    // Otherwise we'll need to temporarily settle for some other convenient
+    // type.  Type legalization will make adjustments once the shiftee is split.
+    else
+      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+  }
+
+  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  // Turn exact SDivs into multiplications.
+  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+  // exact bit.
+  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+      !isa<ConstantSDNode>(Op1) &&
+      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+  else
+    setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+                             Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+    predicate = IC->getPredicate();
+  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+    predicate = ICmpInst::Predicate(IC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+    predicate = FC->getPredicate();
+  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+    predicate = FCmpInst::Predicate(FC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Condition = getFCmpCondCode(predicate);
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0) return;
+
+  SmallVector<SDValue, 4> Values(NumValues);
+  SDValue Cond     = getValue(I.getOperand(0));
+  SDValue TrueVal  = getValue(I.getOperand(1));
+  SDValue FalseVal = getValue(I.getOperand(2));
+
+  for (unsigned i = 0; i != NumValues; ++i)
+    Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+                            Cond,
+                            SDValue(TrueVal.getNode(),
+                                    TrueVal.getResNo() + i),
+                            SDValue(FalseVal.getNode(),
+                                    FalseVal.getResNo() + i));
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // SExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+                           DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I){
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+  // FPToUI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+  // FPToSI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+  // UIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I){
+  // SIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+
+  // BitCast assures us that source and destination are the same size so this is
+  // either a BITCAST or a no-op.
+  if (DestVT != N.getValueType())
+    setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+                             DestVT, N)); // convert types.
+  else
+    setValue(&I, N);            // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InVal = getValue(I.getOperand(1));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                              TLI.getPointerTy(),
+                              getValue(I.getOperand(2)));
+  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()),
+                           InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                              TLI.getPointerTy(),
+                              getValue(I.getOperand(1)));
+  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+  unsigned MaskNumElts = Mask.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i)
+    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+      return false;
+  return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+  SmallVector<int, 8> Mask;
+  SDValue Src1 = getValue(I.getOperand(0));
+  SDValue Src2 = getValue(I.getOperand(1));
+
+  // Convert the ConstantVector mask operand into an array of ints, with -1
+  // representing undef values.
+  SmallVector<Constant*, 8> MaskElts;
+  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+  unsigned MaskNumElts = MaskElts.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (isa<UndefValue>(MaskElts[i]))
+      Mask.push_back(-1);
+    else
+      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+  }
+
+  EVT VT = TLI.getValueType(I.getType());
+  EVT SrcVT = Src1.getValueType();
+  unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+  if (SrcNumElts == MaskNumElts) {
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &Mask[0]));
+    return;
+  }
+
+  // Normalize the shuffle vector since mask and vector length don't match.
+  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+    // Mask is longer than the source vectors and is a multiple of the source
+    // vectors.  We can use concatenate vector to make the mask and vectors
+    // lengths match.
+    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+      // The shuffle is concatenating two vectors together.
+      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                               VT, Src1, Src2));
+      return;
+    }
+
+    // Pad both vectors with undefs to make them the same length as the mask.
+    unsigned NumConcat = MaskNumElts / SrcNumElts;
+    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+    SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+    MOps1[0] = Src1;
+    MOps2[0] = Src2;
+
+    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT,
+                                                  &MOps1[0], NumConcat);
+    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT,
+                                                  &MOps2[0], NumConcat);
+
+    // Readjust mask for new input vector length.
+    SmallVector<int, 8> MappedOps;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      if (Idx < (int)SrcNumElts)
+        MappedOps.push_back(Idx);
+      else
+        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+    }
+
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &MappedOps[0]));
+    return;
+  }
+
+  if (SrcNumElts > MaskNumElts) {
+    // Analyze the access pattern of the vector to see if we can extract
+    // two subvectors and do the shuffle. The analysis is done by calculating
+    // the range of elements the mask access on both vectors.
+    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+    int MaxRange[2] = {-1, -1};
+
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      int Input = 0;
+      if (Idx < 0)
+        continue;
+
+      if (Idx >= (int)SrcNumElts) {
+        Input = 1;
+        Idx -= SrcNumElts;
+      }
+      if (Idx > MaxRange[Input])
+        MaxRange[Input] = Idx;
+      if (Idx < MinRange[Input])
+        MinRange[Input] = Idx;
+    }
+
+    // Check if the access is smaller than the vector size and can we find
+    // a reasonable extract index.
+    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
+                                 // Extract.
+    int StartIdx[2];  // StartIdx to extract from
+    for (int Input=0; Input < 2; ++Input) {
+      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+        RangeUse[Input] = 0; // Unused
+        StartIdx[Input] = 0;
+      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+        // Fits within range but we should see if we can find a good
+        // start index that is a multiple of the mask length.
+        if (MaxRange[Input] < (int)MaskNumElts) {
+          RangeUse[Input] = 1; // Extract from beginning of the vector
+          StartIdx[Input] = 0;
+        } else {
+          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+              StartIdx[Input] + MaskNumElts <= SrcNumElts)
+            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+        }
+      }
+    }
+
+    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+      return;
+    }
+    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+      // Extract appropriate subvector and generate a vector shuffle
+      for (int Input=0; Input < 2; ++Input) {
+        SDValue &Src = Input == 0 ? Src1 : Src2;
+        if (RangeUse[Input] == 0)
+          Src = DAG.getUNDEF(VT);
+        else
+          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
+      }
+
+      // Calculate new mask.
+      SmallVector<int, 8> MappedOps;
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          MappedOps.push_back(Idx);
+        else if (Idx < (int)SrcNumElts)
+          MappedOps.push_back(Idx - StartIdx[0]);
+        else
+          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+      }
+
+      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                        &MappedOps[0]));
+      return;
+    }
+  }
+
+  // We can't use either concat vectors or extract subvectors so fall back to
+  // replacing the shuffle with extract and build vector.
+  // to insert and build vector.
+  EVT EltVT = VT.getVectorElementType();
+  EVT PtrVT = TLI.getPointerTy();
+  SmallVector<SDValue,8> Ops;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Mask[i] < 0) {
+      Ops.push_back(DAG.getUNDEF(EltVT));
+    } else {
+      int Idx = Mask[i];
+      SDValue Res;
+
+      if (Idx < (int)SrcNumElts)
+        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                          EltVT, Src1, DAG.getConstant(Idx, PtrVT));
+      else
+        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                          EltVT, Src2,
+                          DAG.getConstant(Idx - SrcNumElts, PtrVT));
+
+      Ops.push_back(Res);
+    }
+  }
+
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                           VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Value *Op1 = I.getOperand(1);
+  const Type *AggTy = I.getType();
+  const Type *ValTy = Op1->getType();
+  bool IntoUndef = isa<UndefValue>(Op0);
+  bool FromUndef = isa<UndefValue>(Op1);
+
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+  SmallVector<EVT, 4> AggValueVTs;
+  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumAggValues = AggValueVTs.size();
+  unsigned NumValValues = ValValueVTs.size();
+  SmallVector<SDValue, 4> Values(NumAggValues);
+
+  SDValue Agg = getValue(Op0);
+  unsigned i = 0;
+  // Copy the beginning value(s) from the original aggregate.
+  for (; i != LinearIndex; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+  // Copy values from the inserted value(s).
+  if (NumValValues) {
+    SDValue Val = getValue(Op1);
+    for (; i != LinearIndex + NumValValues; ++i)
+      Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  }
+  // Copy remaining value(s) from the original aggregate.
+  for (; i != NumAggValues; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
+                           &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Type *AggTy = Op0->getType();
+  const Type *ValTy = I.getType();
+  bool OutOfUndef = isa<UndefValue>(Op0);
+
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumValValues = ValValueVTs.size();
+
+  // Ignore a extractvalue that produces an empty object
+  if (!NumValValues) {
+    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+    return;
+  }
+
+  SmallVector<SDValue, 4> Values(NumValValues);
+
+  SDValue Agg = getValue(Op0);
+  // Copy out the selected value(s).
+  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+    Values[i - LinearIndex] =
+      OutOfUndef ?
+        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+        SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValValueVTs[0], NumValValues),
+                           &Values[0], NumValValues));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  const Type *Ty = I.getOperand(0)->getType();
+
+  for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
+       OI != E; ++OI) {
+    const Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        DAG.getIntPtrConstant(Offset));
+      }
+
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->isZero()) continue;
+        uint64_t Offs =
+            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        SDValue OffsVal;
+        EVT PTy = TLI.getPointerTy();
+        unsigned PtrBits = PTy.getSizeInBits();
+        if (PtrBits < 64)
+          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                                TLI.getPointerTy(),
+                                DAG.getConstant(Offs, MVT::i64));
+        else
+          OffsVal = DAG.getIntPtrConstant(Offs);
+
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        OffsVal);
+        continue;
+      }
+
+      // N = N + Idx * ElementSize;
+      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+                                TD->getTypeAllocSize(Ty));
+      SDValue IdxN = getValue(Idx);
+
+      // If the index is smaller or larger than intptr_t, truncate or extend
+      // it.
+      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+      // If this is a multiply by a power of two, turn it into a shl
+      // immediately.  This is a very common case.
+      if (ElementSize != 1) {
+        if (ElementSize.isPowerOf2()) {
+          unsigned Amt = ElementSize.logBase2();
+          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+                             N.getValueType(), IdxN,
+                             DAG.getConstant(Amt, TLI.getPointerTy()));
+        } else {
+          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+                             N.getValueType(), IdxN, Scale);
+        }
+      }
+
+      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                      N.getValueType(), N, IdxN);
+    }
+  }
+
+  setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+  // If this is a fixed sized alloca in the entry block of the function,
+  // allocate it statically on the stack.
+  if (FuncInfo.StaticAllocaMap.count(&I))
+    return;   // getValue will auto-populate this.
+
+  const Type *Ty = I.getAllocatedType();
+  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+  unsigned Align =
+    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+             I.getAlignment());
+
+  SDValue AllocSize = getValue(I.getArraySize());
+
+  EVT IntPtr = TLI.getPointerTy();
+  if (AllocSize.getValueType() != IntPtr)
+    AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
+                          AllocSize,
+                          DAG.getConstant(TySize, IntPtr));
+
+  // Handle alignment.  If the requested alignment is less than or equal to
+  // the stack alignment, ignore it.  If the size is greater than or equal to
+  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+  if (Align <= StackAlign)
+    Align = 0;
+
+  // Round the size of the allocation up to the stack alignment size
+  // by add SA-1 to the size.
+  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(StackAlign-1));
+
+  // Mask out the low bits for alignment purposes.
+  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+                            VTs, Ops, 3);
+  setValue(&I, DSA);
+  DAG.setRoot(DSA.getValue(1));
+
+  // Inform the Frame Information that we have just allocated a variable-sized
+  // object.
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+  const Value *SV = I.getOperand(0);
+  SDValue Ptr = getValue(SV);
+
+  const Type *Ty = I.getType();
+
+  bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  unsigned Alignment = I.getAlignment();
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  SDValue Root;
+  bool ConstantMemory = false;
+  if (I.isVolatile() || NumValues > MaxParallelChains)
+    // Serialize volatile loads with other side effects.
+    Root = getRoot();
+  else if (AA->pointsToConstantMemory(
+             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
+    // Do not serialize (non-volatile) loads of constant memory with anything.
+    Root = DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = DAG.getRoot();
+  }
+
+  SmallVector<SDValue, 4> Values(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
+  EVT PtrVT = Ptr.getValueType();
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // Serializing loads here may result in excessive register pressure, and
+    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+    // could recover a bit by hoisting nodes upward in the chain by recognizing
+    // they are side-effect free or do not alias. The optimizer should really
+    // avoid this case by converting large object/array copies to llvm.memcpy
+    // (MaxParallelChains should always remain as failsafe).
+    if (ChainI == MaxParallelChains) {
+      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
+    SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                            PtrVT, Ptr,
+                            DAG.getConstant(Offsets[i], PtrVT));
+    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+                            isNonTemporal, Alignment, TBAAInfo);
+
+    Values[i] = L;
+    Chains[ChainI] = L.getValue(1);
+  }
+
+  if (!ConstantMemory) {
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], ChainI);
+    if (isVolatile)
+      DAG.setRoot(Chain);
+    else
+      PendingLoads.push_back(Chain);
+  }
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  const Value *SrcV = I.getOperand(0);
+  const Value *PtrV = I.getOperand(1);
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  // Get the lowered operands. Note that we do this after
+  // checking if NumResults is zero, because with zero results
+  // the operands won't have values in the map.
+  SDValue Src = getValue(SrcV);
+  SDValue Ptr = getValue(PtrV);
+
+  SDValue Root = getRoot();
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
+  EVT PtrVT = Ptr.getValueType();
+  bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  unsigned Alignment = I.getAlignment();
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // See visitLoad comments.
+    if (ChainI == MaxParallelChains) {
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
+    SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
+                              DAG.getConstant(Offsets[i], PtrVT));
+    SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+                              SDValue(Src.getNode(), Src.getResNo() + i),
+                              Add, MachinePointerInfo(PtrV, Offsets[i]),
+                              isVolatile, isNonTemporal, Alignment, TBAAInfo);
+    Chains[ChainI] = St;
+  }
+
+  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+  ++SDNodeOrder;
+  AssignOrderingToNode(StoreNode.getNode());
+  DAG.setRoot(StoreNode);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+                                               unsigned Intrinsic) {
+  bool HasChain = !I.doesNotAccessMemory();
+  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+  // Build the operand list.
+  SmallVector<SDValue, 8> Ops;
+  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
+    if (OnlyLoad) {
+      // We don't need to serialize loads against other loads.
+      Ops.push_back(DAG.getRoot());
+    } else {
+      Ops.push_back(getRoot());
+    }
+  }
+
+  // Info is set by getTgtMemInstrinsic
+  TargetLowering::IntrinsicInfo Info;
+  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+      Info.opc == ISD::INTRINSIC_W_CHAIN)
+    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+  // Add all operands of the call to the operand list.
+  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+    SDValue Op = getValue(I.getArgOperand(i));
+    assert(TLI.isTypeLegal(Op.getValueType()) &&
+           "Intrinsic uses a non-legal type?");
+    Ops.push_back(Op);
+  }
+
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+           "Intrinsic uses a non-legal type?");
+  }
+#endif // NDEBUG
+
+  if (HasChain)
+    ValueVTs.push_back(MVT::Other);
+
+  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+  // Create the node.
+  SDValue Result;
+  if (IsTgtIntrinsic) {
+    // This is target intrinsic that touches memory
+    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+                                     VTs, &Ops[0], Ops.size(),
+                                     Info.memVT,
+                                   MachinePointerInfo(Info.ptrVal, Info.offset),
+                                     Info.align, Info.vol,
+                                     Info.readMem, Info.writeMem);
+  } else if (!HasChain) {
+    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  } else if (!I.getType()->isVoidTy()) {
+    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  } else {
+    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  }
+
+  if (HasChain) {
+    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+    if (OnlyLoad)
+      PendingLoads.push_back(Chain);
+    else
+      DAG.setRoot(Chain);
+  }
+
+  if (!I.getType()->isVoidTy()) {
+    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+      EVT VT = TLI.getValueType(PTy);
+      Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
+    }
+
+    setValue(&I, Result);
+  }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+///   Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x007fffff, MVT::i32));
+  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+                           DAG.getConstant(0x3f800000, MVT::i32));
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+            DebugLoc dl) {
+  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x7f800000, MVT::i32));
+  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+                           DAG.getConstant(23, TLI.getPointerTy()));
+  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+                           DAG.getConstant(127, MVT::i32));
+  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+///                     Op is the associated NodeType for I
+const char *
+SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
+                                           ISD::NodeType Op) {
+  SDValue Root = getRoot();
+  SDValue L =
+    DAG.getAtomic(Op, getCurDebugLoc(),
+                  getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
+                  Root,
+                  getValue(I.getArgOperand(0)),
+                  getValue(I.getArgOperand(1)),
+                  I.getArgOperand(0));
+  setValue(&I, L);
+  DAG.setRoot(L.getValue(1));
+  return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
+  SDValue Op1 = getValue(I.getArgOperand(0));
+  SDValue Op2 = getValue(I.getArgOperand(1));
+
+  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+  return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OFe 1.4426950f
+    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x3fb8aa3b));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // 0.000107046256 error, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      //
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
+                                             MVT::i32, t13);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+    // Scale the exponent by log(2) [0.69314718f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3f317218));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   LogofMantissa =
+      //     -1.1609546f +
+      //       (1.4034025f - 0.23903021f * x) * x;
+      //
+      // error 0.0034276066, which is better than 8 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbe74c456));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3fb3a2b1));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                          getF32Constant(DAG, 0x3f949a29));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   LogOfMantissa =
+      //     -1.7417939f +
+      //       (2.8212026f +
+      //         (-1.4699568f +
+      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+      //
+      // error 0.000061011436, which is 14 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbd67b6d6));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ee4f4b8));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fbc278b));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40348e95));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                          getF32Constant(DAG, 0x3fdef31a));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   LogOfMantissa =
+      //     -2.1072184f +
+      //       (4.2372794f +
+      //         (-3.7029485f +
+      //           (2.2781945f +
+      //             (-0.87823314f +
+      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+      //
+      // error 0.0000023660568, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbc91e5ac));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e4350aa));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f60d3e3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x4011cdf0));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x406cfd1c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x408797cb));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                          getF32Constant(DAG, 0x4006dcab));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog2(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+    // Get the exponent.
+    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    // Different possible minimax approximations of significand in
+    // floating-point for various degrees of accuracy over [1,2].
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+      //
+      // error 0.0049451742, which is more than 7 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbeb08fe0));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x40019463));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                           getF32Constant(DAG, 0x3fd6633d));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log2ofMantissa =
+      //     -2.51285454f +
+      //       (4.07009056f +
+      //         (-2.12067489f +
+      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+      //
+      // error 0.0000876136000, which is better than 13 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbda7262e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f25280b));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x4007b923));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40823e2f));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x4020d29c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log2ofMantissa =
+      //     -3.0400495f +
+      //       (6.1129976f +
+      //         (-5.3420409f +
+      //           (3.2865683f +
+      //             (-1.2669343f +
+      //               (0.27515199f -
+      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+      //
+      // error 0.0000018516, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbcd2769e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e8ce0b9));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fa22ae7));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40525723));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x40aaf200));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x40c39dad));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                           getF32Constant(DAG, 0x4042902c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG2, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog10(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+    // Scale the exponent by log10(2) [0.30102999f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3e9a209a));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log10ofMantissa =
+      //     -0.50419619f +
+      //       (0.60948995f - 0.10380950f * x) * x;
+      //
+      // error 0.0014886165, which is 6 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbdd49a13));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f1c0789));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                            getF32Constant(DAG, 0x3f011300));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log10ofMantissa =
+      //     -0.64831180f +
+      //       (0.91751397f +
+      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+      //
+      // error 0.00019228036, which is better than 12 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3d431f31));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ea21fb2));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f6ae232));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                                            getF32Constant(DAG, 0x3f25f7c3));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log10ofMantissa =
+      //     -0.84299375f +
+      //       (1.5327582f +
+      //         (-1.0688956f +
+      //           (0.49102474f +
+      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+      //
+      // error 0.0000037995730, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3c5d51ce));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e00685a));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3efb6798));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f88d192));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3fc4316c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+                                            getF32Constant(DAG, 0x3f57ce70));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG10, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp2(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP2, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGBuilder::visitPow(const CallInst &I) {
+  SDValue result;
+  const Value *Val = I.getArgOperand(0);
+  DebugLoc dl = getCurDebugLoc();
+  bool IsExp10 = false;
+
+  if (getValue(Val).getValueType() == MVT::f32 &&
+      getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+        APFloat Ten(10.0f);
+        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+      }
+    }
+  }
+
+  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(1));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OF10 3.3219281f
+    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x40549a78));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   twoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FPOW, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)),
+                         getValue(I.getArgOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+                          SelectionDAG &DAG) {
+  // If RHS is a constant, we can expand this out to a multiplication tree,
+  // otherwise we end up lowering to a call to __powidf2 (for example).  When
+  // optimizing for size, we only want to do this if the expansion would produce
+  // a small number of multiplies, otherwise we do the full expansion.
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+    // Get the exponent as a positive value.
+    unsigned Val = RHSC->getSExtValue();
+    if ((int)Val < 0) Val = -Val;
+
+    // powi(x, 0) -> 1.0
+    if (Val == 0)
+      return DAG.getConstantFP(1.0, LHS.getValueType());
+
+    const Function *F = DAG.getMachineFunction().getFunction();
+    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+        // If optimizing for size, don't insert too many multiplies.  This
+        // inserts up to 5 multiplies.
+        CountPopulation_32(Val)+Log2_32(Val) < 7) {
+      // We use the simple binary decomposition method to generate the multiply
+      // sequence.  There are more optimal ways to do this (for example,
+      // powi(x,15) generates one more multiply than it should), but this has
+      // the benefit of being both really simple and much better than a libcall.
+      SDValue Res;  // Logically starts equal to 1.0
+      SDValue CurSquare = LHS;
+      while (Val) {
+        if (Val & 1) {
+          if (Res.getNode())
+            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+          else
+            Res = CurSquare;  // 1.0*CurSquare.
+        }
+
+        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+                                CurSquare, CurSquare);
+        Val >>= 1;
+      }
+
+      // If the original was negative, invert the result, producing 1/(x*x*x).
+      if (RHSC->getSExtValue() < 0)
+        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+      return Res;
+    }
+  }
+
+  // Otherwise, expand to a libcall.
+  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+  if (N.getOpcode() != ISD::TRUNCATE)
+    return 0;
+
+  const SDValue &Ext = N.getOperand(0);
+  if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+    const SDValue &CFR = Ext.getOperand(0);
+    if (CFR.getOpcode() == ISD::CopyFromReg)
+      return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+    else
+      if (CFR.getOpcode() == ISD::TRUNCATE)
+        return getTruncatedArgReg(CFR);
+  }
+  return 0;
+}
+
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+                                              int64_t Offset,
+                                              const SDValue &N) {
+  const Argument *Arg = dyn_cast<Argument>(V);
+  if (!Arg)
+    return false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
+  // Ignore inlined function arguments here.
+  DIVariable DV(Variable);
+  if (DV.isInlinedFnArgument(MF.getFunction()))
+    return false;
+
+  unsigned Reg = 0;
+  if (Arg->hasByValAttr()) {
+    // Byval arguments' frame index is recorded during argument lowering.
+    // Use this info directly.
+    Reg = TRI->getFrameRegister(MF);
+    Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+    // If byval argument ofset is not recorded then ignore this.
+    if (!Offset)
+      Reg = 0;
+  }
+
+  if (N.getNode()) {
+    if (N.getOpcode() == ISD::CopyFromReg)
+      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    else
+      Reg = getTruncatedArgReg(N);
+    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+      if (PR)
+        Reg = PR;
+    }
+  }
+
+  if (!Reg) {
+    // Check if ValueMap has reg number.
+    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+    if (VMI != FuncInfo.ValueMap.end())
+      Reg = VMI->second;
+  }
+
+  if (!Reg && N.getNode()) {
+    // Check if frame index is available.
+    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+      if (FrameIndexSDNode *FINode =
+          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+        Reg = TRI->getFrameRegister(MF);
+        Offset = FINode->getIndex();
+      }
+  }
+
+  if (!Reg)
+    return false;
+
+  MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+                                    TII->get(TargetOpcode::DBG_VALUE))
+    .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+  FuncInfo.ArgDbgValues.push_back(&*MIB);
+  return true;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+  DebugLoc dl = getCurDebugLoc();
+  SDValue Res;
+
+  switch (Intrinsic) {
+  default:
+    // By default, turn this into a target intrinsic node.
+    visitTargetIntrinsic(I, Intrinsic);
+    return 0;
+  case Intrinsic::vastart:  visitVAStart(I); return 0;
+  case Intrinsic::vaend:    visitVAEnd(I); return 0;
+  case Intrinsic::vacopy:   visitVACopy(I); return 0;
+  case Intrinsic::returnaddress:
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::frameaddress:
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::setjmp:
+    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+  case Intrinsic::longjmp:
+    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+  case Intrinsic::memcpy: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
+                              MachinePointerInfo(I.getArgOperand(0)),
+                              MachinePointerInfo(I.getArgOperand(1))));
+    return 0;
+  }
+  case Intrinsic::memset: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+                              MachinePointerInfo(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::memmove: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1))));
+    return 0;
+  }
+  case Intrinsic::dbg_declare: {
+    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    MDNode *Variable = DI.getVariable();
+    const Value *Address = DI.getAddress();
+    if (!Address || !DIVariable(DI.getVariable()).Verify())
+      return 0;
+
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+
+    // Check if address has undef value.
+    if (isa<UndefValue>(Address) ||
+        (Address->use_empty() && !isa<Argument>(Address))) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      return 0;
+    }
+
+    SDValue &N = NodeMap[Address];
+    if (!N.getNode() && isa<Argument>(Address))
+      // Check unused arguments map.
+      N = UnusedArgNodeMap[Address];
+    SDDbgValue *SDV;
+    if (N.getNode()) {
+      // Parameters are handled specially.
+      bool isParameter =
+        DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+      if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+        Address = BCI->getOperand(0);
+      const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
+      if (isParameter && !AI) {
+        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+        if (FINode)
+          // Byval parameter.  We have a frame index at this point.
+          SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+                                0, dl, SDNodeOrder);
+        else {
+          // Address is an argument, so try to emit its dbg value using
+          // virtual register info from the FuncInfo.ValueMap.
+          EmitFuncArgumentDbgValue(Address, Variable, 0, N);
+          return 0;
+        }
+      } else if (AI)
+        SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+                              0, dl, SDNodeOrder);
+      else {
+        // Can't do anything with other non-AI cases yet.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        return 0;
+      }
+      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+    } else {
+      // If Address is an argument then try to emit its dbg value using
+      // virtual register info from the FuncInfo.ValueMap.
+      if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+        // If variable is pinned by a alloca in dominating bb then
+        // use StaticAllocaMap.
+        if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+          if (AI->getParent() != DI.getParent()) {
+            DenseMap<const AllocaInst*, int>::iterator SI =
+              FuncInfo.StaticAllocaMap.find(AI);
+            if (SI != FuncInfo.StaticAllocaMap.end()) {
+              SDV = DAG.getDbgValue(Variable, SI->second,
+                                    0, dl, SDNodeOrder);
+              DAG.AddDbgValue(SDV, 0, false);
+              return 0;
+            }
+          }
+        }
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
+      }
+    }
+    return 0;
+  }
+  case Intrinsic::dbg_value: {
+    const DbgValueInst &DI = cast<DbgValueInst>(I);
+    if (!DIVariable(DI.getVariable()).Verify())
+      return 0;
+
+    MDNode *Variable = DI.getVariable();
+    uint64_t Offset = DI.getOffset();
+    const Value *V = DI.getValue();
+    if (!V)
+      return 0;
+
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+    SDDbgValue *SDV;
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+      SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, false);
+    } else {
+      // Do not use getValue() in here; we don't want to generate code at
+      // this point if it hasn't been done yet.
+      SDValue N = NodeMap[V];
+      if (!N.getNode() && isa<Argument>(V))
+        // Check unused arguments map.
+        N = UnusedArgNodeMap[V];
+      if (N.getNode()) {
+        if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
+          SDV = DAG.getDbgValue(Variable, N.getNode(),
+                                N.getResNo(), Offset, dl, SDNodeOrder);
+          DAG.AddDbgValue(SDV, N.getNode(), false);
+        }
+      } else if (!V->use_empty() ) {
+        // Do not call getValue(V) yet, as we don't want to generate code.
+        // Remember it for later.
+        DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+        DanglingDebugInfoMap[V] = DDI;
+      } else {
+        // We may expand this to cover more cases.  One case where we have no
+        // data available is an unreferenced parameter.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
+      }
+    }
+
+    // Build a debug info table entry.
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+      V = BCI->getOperand(0);
+    const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end())
+      return 0; // VLAs.
+    int FI = SI->second;
+
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+      MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+    return 0;
+  }
+  case Intrinsic::eh_exception: {
+    // Insert the EXCEPTIONADDR instruction.
+    assert(FuncInfo.MBB->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[1];
+    Ops[0] = DAG.getRoot();
+    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+    setValue(&I, Op);
+    DAG.setRoot(Op.getValue(1));
+    return 0;
+  }
+
+  case Intrinsic::eh_selector: {
+    MachineBasicBlock *CallMBB = FuncInfo.MBB;
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    if (CallMBB->isLandingPad())
+      AddCatchInfo(I, &MMI, CallMBB);
+    else {
+#ifndef NDEBUG
+      FuncInfo.CatchInfoLost.insert(&I);
+#endif
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+    }
+
+    // Insert the EHSELECTION instruction.
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[2];
+    Ops[0] = getValue(I.getArgOperand(0));
+    Ops[1] = getRoot();
+    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+    DAG.setRoot(Op.getValue(1));
+    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
+    return 0;
+  }
+
+  case Intrinsic::eh_typeid_for: {
+    // Find the type id for the given typeinfo.
+    GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+    unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+    Res = DAG.getConstant(TypeID, MVT::i32);
+    setValue(&I, Res);
+    return 0;
+  }
+
+  case Intrinsic::eh_return_i32:
+  case Intrinsic::eh_return_i64:
+    DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+    DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+                            MVT::Other,
+                            getControlRoot(),
+                            getValue(I.getArgOperand(0)),
+                            getValue(I.getArgOperand(1))));
+    return 0;
+  case Intrinsic::eh_unwind_init:
+    DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+    return 0;
+  case Intrinsic::eh_dwarf_cfa: {
+    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
+                                        TLI.getPointerTy());
+    SDValue Offset = DAG.getNode(ISD::ADD, dl,
+                                 TLI.getPointerTy(),
+                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+                                             TLI.getPointerTy()),
+                                 CfaArg);
+    SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+                             TLI.getPointerTy(),
+                             DAG.getConstant(0, TLI.getPointerTy()));
+    setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+                             FA, Offset));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_callsite: {
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+    assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+    MMI.setCurrentCallSite(CI->getZExtValue());
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_setjmp: {
+    setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_longjmp: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+                            getRoot(), getValue(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_dispatch_setup: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+                            getRoot(), getValue(I.getArgOperand(0))));
+    return 0;
+  }
+
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = getValue(I.getArgOperand(1));
+    if (isa<ConstantSDNode>(ShAmt)) {
+      visitTargetIntrinsic(I, Intrinsic);
+      return 0;
+    }
+    unsigned NewIntrinsic = 0;
+    EVT ShAmtVT = MVT::v2i32;
+    switch (Intrinsic) {
+    case Intrinsic::x86_mmx_pslli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+      break;
+    case Intrinsic::x86_mmx_pslli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+      break;
+    case Intrinsic::x86_mmx_pslli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+      break;
+    case Intrinsic::x86_mmx_psrli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+      break;
+    case Intrinsic::x86_mmx_psrli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+      break;
+    case Intrinsic::x86_mmx_psrli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+      break;
+    case Intrinsic::x86_mmx_psrai_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+      break;
+    case Intrinsic::x86_mmx_psrai_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+      break;
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    // We must do this early because v2i32 is not a legal type.
+    DebugLoc dl = getCurDebugLoc();
+    SDValue ShOps[2];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    EVT DestVT = TLI.getValueType(I.getType());
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                       DAG.getConstant(NewIntrinsic, MVT::i32),
+                       getValue(I.getArgOperand(0)), ShAmt);
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::convertff:
+  case Intrinsic::convertfsi:
+  case Intrinsic::convertfui:
+  case Intrinsic::convertsif:
+  case Intrinsic::convertuif:
+  case Intrinsic::convertss:
+  case Intrinsic::convertsu:
+  case Intrinsic::convertus:
+  case Intrinsic::convertuu: {
+    ISD::CvtCode Code = ISD::CVT_INVALID;
+    switch (Intrinsic) {
+    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
+    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
+    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
+    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
+    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
+    }
+    EVT DestVT = TLI.getValueType(I.getType());
+    const Value *Op1 = I.getArgOperand(0);
+    Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+                               DAG.getValueType(DestVT),
+                               DAG.getValueType(getValue(Op1).getValueType()),
+                               getValue(I.getArgOperand(1)),
+                               getValue(I.getArgOperand(2)),
+                               Code);
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::sqrt:
+    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::powi:
+    setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
+                            getValue(I.getArgOperand(1)), DAG));
+    return 0;
+  case Intrinsic::sin:
+    setValue(&I, DAG.getNode(ISD::FSIN, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::cos:
+    setValue(&I, DAG.getNode(ISD::FCOS, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::log:
+    visitLog(I);
+    return 0;
+  case Intrinsic::log2:
+    visitLog2(I);
+    return 0;
+  case Intrinsic::log10:
+    visitLog10(I);
+    return 0;
+  case Intrinsic::exp:
+    visitExp(I);
+    return 0;
+  case Intrinsic::exp2:
+    visitExp2(I);
+    return 0;
+  case Intrinsic::pow:
+    visitPow(I);
+    return 0;
+  case Intrinsic::fma:
+    setValue(&I, DAG.getNode(ISD::FMA, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)),
+                             getValue(I.getArgOperand(2))));
+    return 0;
+  case Intrinsic::convert_to_fp16:
+    setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
+                             MVT::i16, getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::convert_from_fp16:
+    setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
+                             MVT::f32, getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::pcmarker: {
+    SDValue Tmp = getValue(I.getArgOperand(0));
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::readcyclecounter: {
+    SDValue Op = getRoot();
+    Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+                      DAG.getVTList(MVT::i64, MVT::Other),
+                      &Op, 1);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::bswap:
+    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::cttz: {
+    SDValue Arg = getValue(I.getArgOperand(0));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::ctlz: {
+    SDValue Arg = getValue(I.getArgOperand(0));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::ctpop: {
+    SDValue Arg = getValue(I.getArgOperand(0));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::stacksave: {
+    SDValue Op = getRoot();
+    Res = DAG.getNode(ISD::STACKSAVE, dl,
+                      DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::stackrestore: {
+    Res = getValue(I.getArgOperand(0));
+    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
+    return 0;
+  }
+  case Intrinsic::stackprotector: {
+    // Emit code into the DAG to store the stack guard onto the stack.
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    EVT PtrTy = TLI.getPointerTy();
+
+    SDValue Src = getValue(I.getArgOperand(0));   // The guard's value.
+    AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+    int FI = FuncInfo.StaticAllocaMap[Slot];
+    MFI->setStackProtectorIndex(FI);
+
+    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+    // Store the stack protector onto the stack.
+    Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+                       MachinePointerInfo::getFixedStack(FI),
+                       true, false, 0);
+    setValue(&I, Res);
+    DAG.setRoot(Res);
+    return 0;
+  }
+  case Intrinsic::objectsize: {
+    // If we don't know by now, we're never going to know.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+
+    assert(CI && "Non-constant type in __builtin_object_size?");
+
+    SDValue Arg = getValue(I.getCalledValue());
+    EVT Ty = Arg.getValueType();
+
+    if (CI->isZero())
+      Res = DAG.getConstant(-1ULL, Ty);
+    else
+      Res = DAG.getConstant(0, Ty);
+
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::var_annotation:
+    // Discard annotate attributes
+    return 0;
+
+  case Intrinsic::init_trampoline: {
+    const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getArgOperand(0));
+    Ops[2] = getValue(I.getArgOperand(1));
+    Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+    Ops[5] = DAG.getSrcValue(F);
+
+    Res = DAG.getNode(ISD::TRAMPOLINE, dl,
+                      DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+                      Ops, 6);
+
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::gcroot:
+    if (GFI) {
+      const Value *Alloca = I.getArgOperand(0);
+      const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+      GFI->addStackRoot(FI->getIndex(), TypeMap);
+    }
+    return 0;
+  case Intrinsic::gcread:
+  case Intrinsic::gcwrite:
+    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+    return 0;
+  case Intrinsic::flt_rounds:
+    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+    return 0;
+
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    setValue(&I, getValue(I.getArgOperand(0)));
+    return 0;
+  }
+
+  case Intrinsic::trap: {
+    StringRef TrapFuncName = getTrapFunctionName();
+    if (TrapFuncName.empty()) {
+      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      return 0;
+    }
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> Result =
+      TLI.LowerCallTo(getRoot(), I.getType(),
+                 false, false, false, false, 0, CallingConv::C,
+                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+                 Args, DAG, getCurDebugLoc());
+    DAG.setRoot(Result.second);
+    return 0;
+  }
+  case Intrinsic::uadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::UADDO);
+  case Intrinsic::sadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::SADDO);
+  case Intrinsic::usub_with_overflow:
+    return implVisitAluOverflow(I, ISD::USUBO);
+  case Intrinsic::ssub_with_overflow:
+    return implVisitAluOverflow(I, ISD::SSUBO);
+  case Intrinsic::umul_with_overflow:
+    return implVisitAluOverflow(I, ISD::UMULO);
+  case Intrinsic::smul_with_overflow:
+    return implVisitAluOverflow(I, ISD::SMULO);
+
+  case Intrinsic::prefetch: {
+    SDValue Ops[5];
+    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getArgOperand(0));
+    Ops[2] = getValue(I.getArgOperand(1));
+    Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = getValue(I.getArgOperand(3));
+    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+                                        DAG.getVTList(MVT::Other),
+                                        &Ops[0], 5,
+                                        EVT::getIntegerVT(*Context, 8),
+                                        MachinePointerInfo(I.getArgOperand(0)),
+                                        0, /* align */
+                                        false, /* volatile */
+                                        rw==0, /* read */
+                                        rw==1)); /* write */
+    return 0;
+  }
+  case Intrinsic::memory_barrier: {
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    for (int x = 1; x < 6; ++x)
+      Ops[x] = getValue(I.getArgOperand(x - 1));
+
+    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+    return 0;
+  }
+  case Intrinsic::atomic_cmp_swap: {
+    SDValue Root = getRoot();
+    SDValue L =
+      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+                    getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
+                    Root,
+                    getValue(I.getArgOperand(0)),
+                    getValue(I.getArgOperand(1)),
+                    getValue(I.getArgOperand(2)),
+                    MachinePointerInfo(I.getArgOperand(0)));
+    setValue(&I, L);
+    DAG.setRoot(L.getValue(1));
+    return 0;
+  }
+  case Intrinsic::atomic_load_add:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+  case Intrinsic::atomic_load_sub:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+  case Intrinsic::atomic_load_or:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+  case Intrinsic::atomic_load_xor:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+  case Intrinsic::atomic_load_and:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+  case Intrinsic::atomic_load_nand:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+  case Intrinsic::atomic_load_max:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+  case Intrinsic::atomic_load_min:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+  case Intrinsic::atomic_load_umin:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+  case Intrinsic::atomic_load_umax:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+  case Intrinsic::atomic_swap:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return 0;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    return 0;
+  }
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+                                      bool isTailCall,
+                                      MachineBasicBlock *LandingPad) {
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  const Type *RetTy = FTy->getReturnType();
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  MCSymbol *BeginLabel = 0;
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Args.reserve(CS.arg_size());
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<ISD::OutputArg, 4> Outs;
+  SmallVector<uint64_t, 4> Offsets;
+  GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+                Outs, TLI, &Offsets);
+
+  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+					   DAG.getMachineFunction(),
+					   FTy->isVarArg(), Outs,
+					   FTy->getContext());
+
+  SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
+
+  if (!CanLowerReturn) {
+    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+                      FTy->getReturnType());
+    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+                      FTy->getReturnType());
+    MachineFunction &MF = DAG.getMachineFunction();
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.Alignment = Align;
+    Args.push_back(Entry);
+    RetTy = Type::getVoidTy(FTy->getContext());
+  }
+
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    const Value *V = *i;
+
+    // Skip empty types
+    if (V->getType()->isEmptyTy())
+      continue;
+
+    SDValue ArgNode = getValue(V);
+    Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+    unsigned attrInd = i - CS.arg_begin() + 1;
+    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
+    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
+    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
+    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
+    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.Alignment = CS.getParamAlignment(attrInd);
+    Args.push_back(Entry);
+  }
+
+  if (LandingPad) {
+    // Insert a label before the invoke call to mark the try range.  This can be
+    // used to detect deletion of the invoke via the MachineModuleInfo.
+    BeginLabel = MMI.getContext().CreateTempSymbol();
+
+    // For SjLj, keep track of which landing pads go with which invokes
+    // so as to maintain the ordering of pads in the LSDA.
+    unsigned CallSiteIndex = MMI.getCurrentCallSite();
+    if (CallSiteIndex) {
+      MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      // Now that the call site is handled, stop tracking it.
+      MMI.setCurrentCallSite(0);
+    }
+
+    // Both PendingLoads and PendingExports must be flushed here;
+    // this call might not return.
+    (void)getRoot();
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
+  }
+
+  // Check if target-independent constraints permit a tail call here.
+  // Target-dependent constraints are checked within TLI.LowerCallTo.
+  if (isTailCall &&
+      !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
+    isTailCall = false;
+
+  // If there's a possibility that fast-isel has already selected some amount
+  // of the current basic block, don't emit a tail call.
+  if (isTailCall && EnableFastISel)
+    isTailCall = false;
+
+  std::pair<SDValue,SDValue> Result =
+    TLI.LowerCallTo(getRoot(), RetTy,
+                    CS.paramHasAttr(0, Attribute::SExt),
+                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
+                    CS.getCallingConv(),
+                    isTailCall,
+                    !CS.getInstruction()->use_empty(),
+                    Callee, Args, DAG, getCurDebugLoc());
+  assert((isTailCall || Result.second.getNode()) &&
+         "Non-null chain expected with non-tail call!");
+  assert((Result.second.getNode() || !Result.first.getNode()) &&
+         "Null value expected with tail call!");
+  if (Result.first.getNode()) {
+    setValue(CS.getInstruction(), Result.first);
+  } else if (!CanLowerReturn && Result.second.getNode()) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+    ComputeValueVTs(TLI, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+    unsigned NumValues = Outs.size();
+    SmallVector<SDValue, 4> Values(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
+                                DemoteStackSlot,
+                                DAG.getConstant(Offsets[i], PtrVT));
+      SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
+                              Add,
+                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+                              false, false, 1);
+      Values[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], NumValues);
+    PendingLoads.push_back(Chain);
+
+    // Collect the legal value parts into potentially illegal values
+    // that correspond to the original function's return values.
+    SmallVector<EVT, 4> RetTys;
+    RetTy = FTy->getReturnType();
+    ComputeValueVTs(TLI, RetTy, RetTys);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SmallVector<SDValue, 4> ReturnValues;
+    unsigned CurReg = 0;
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
+
+      SDValue ReturnValue =
+        getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
+                         RegisterVT, VT, AssertOp);
+      ReturnValues.push_back(ReturnValue);
+      CurReg += NumRegs;
+    }
+
+    setValue(CS.getInstruction(),
+             DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                         DAG.getVTList(&RetTys[0], RetTys.size()),
+                         &ReturnValues[0], ReturnValues.size()));
+  }
+
+  // Assign order to nodes here. If the call does not produce a result, it won't
+  // be mapped to a SDNode and visit() will not assign it an order number.
+  if (!Result.second.getNode()) {
+    // As a special case, a null chain means that a tail call has been emitted and
+    // the DAG root is already updated.
+    HasTailCall = true;
+    ++SDNodeOrder;
+    AssignOrderingToNode(DAG.getRoot().getNode());
+  } else {
+    DAG.setRoot(Result.second);
+    ++SDNodeOrder;
+    AssignOrderingToNode(Result.second.getNode());
+  }
+
+  if (LandingPad) {
+    // Insert a label at the end of the invoke call to mark the try range.  This
+    // can be used to detect deletion of the invoke via the MachineModuleInfo.
+    MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
+
+    // Inform MachineModuleInfo of range.
+    MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+  }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality())
+        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+                             const Type *LoadTy,
+                             SelectionDAGBuilder &Builder) {
+
+  // Check to see if this load can be trivially constant folded, e.g. if the
+  // input is from a string literal.
+  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+    // Cast pointer to the type we really want to load.
+    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+                                         PointerType::getUnqual(LoadTy));
+
+    if (const Constant *LoadCst =
+          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+                                       Builder.TD))
+      return Builder.getValue(LoadCst);
+  }
+
+  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
+  // still constant memory, the input chain can be the entry node.
+  SDValue Root;
+  bool ConstantMemory = false;
+
+  // Do not serialize (non-volatile) loads of constant memory with anything.
+  if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+    Root = Builder.DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = Builder.DAG.getRoot();
+  }
+
+  SDValue Ptr = Builder.getValue(PtrVal);
+  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+                                        Ptr, MachinePointerInfo(PtrVal),
+                                        false /*volatile*/,
+                                        false /*nontemporal*/, 1 /* align=1 */);
+
+  if (!ConstantMemory)
+    Builder.PendingLoads.push_back(LoadVal.getValue(1));
+  return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+  // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
+  if (I.getNumArgOperands() != 3)
+    return false;
+
+  const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+  if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+      !I.getArgOperand(2)->getType()->isIntegerTy() ||
+      !I.getType()->isIntegerTy())
+    return false;
+
+  const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+
+  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
+  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
+  if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+    bool ActuallyDoIt = true;
+    MVT LoadVT;
+    const Type *LoadTy;
+    switch (Size->getZExtValue()) {
+    default:
+      LoadVT = MVT::Other;
+      LoadTy = 0;
+      ActuallyDoIt = false;
+      break;
+    case 2:
+      LoadVT = MVT::i16;
+      LoadTy = Type::getInt16Ty(Size->getContext());
+      break;
+    case 4:
+      LoadVT = MVT::i32;
+      LoadTy = Type::getInt32Ty(Size->getContext());
+      break;
+    case 8:
+      LoadVT = MVT::i64;
+      LoadTy = Type::getInt64Ty(Size->getContext());
+      break;
+        /*
+    case 16:
+      LoadVT = MVT::v4i32;
+      LoadTy = Type::getInt32Ty(Size->getContext());
+      LoadTy = VectorType::get(LoadTy, 4);
+      break;
+         */
+    }
+
+    // This turns into unaligned loads.  We only do this if the target natively
+    // supports the MVT we'll be loading or if it is small enough (<= 4) that
+    // we'll only produce a small number of byte loads.
+
+    // Require that we can find a legal MVT, and only do this if the target
+    // supports unaligned loads of that type.  Expanding into byte loads would
+    // bloat the code.
+    if (ActuallyDoIt && Size->getZExtValue() > 4) {
+      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+      if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+        ActuallyDoIt = false;
+    }
+
+    if (ActuallyDoIt) {
+      SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+      SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+      SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+                                 ISD::SETNE);
+      EVT CallVT = TLI.getValueType(I.getType(), true);
+      setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+      return true;
+    }
+  }
+
+
+  return false;
+}
+
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+  // Handle inline assembly differently.
+  if (isa<InlineAsm>(I.getCalledValue())) {
+    visitInlineAsm(&I);
+    return;
+  }
+
+  // See if any floating point values are being passed to this function. This is
+  // used to emit an undefined reference to fltused on Windows.
+  const FunctionType *FT =
+    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  if (FT->isVarArg() &&
+      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      const Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+           i != e; ++i) {
+        if (!i->isFloatingPointTy()) continue;
+        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+        break;
+      }
+    }
+  }
+
+  const char *RenameFn = 0;
+  if (Function *F = I.getCalledFunction()) {
+    if (F->isDeclaration()) {
+      if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
+        if (unsigned IID = II->getIntrinsicID(F)) {
+          RenameFn = visitIntrinsicCall(I, IID);
+          if (!RenameFn)
+            return;
+        }
+      }
+      if (unsigned IID = F->getIntrinsicID()) {
+        RenameFn = visitIntrinsicCall(I, IID);
+        if (!RenameFn)
+          return;
+      }
+    }
+
+    // Check for well-known libc/libm calls.  If the function is internal, it
+    // can't be a library call.
+    if (!F->hasLocalLinkage() && F->hasName()) {
+      StringRef Name = F->getName();
+      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+        if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.getType() == I.getArgOperand(1)->getType()) {
+          SDValue LHS = getValue(I.getArgOperand(0));
+          SDValue RHS = getValue(I.getArgOperand(1));
+          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+                                   LHS.getValueType(), LHS, RHS));
+          return;
+        }
+      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "memcmp") {
+        if (visitMemCmpCall(I))
+          return;
+      }
+    }
+  }
+
+  SDValue Callee;
+  if (!RenameFn)
+    Callee = getValue(I.getCalledValue());
+  else
+    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+  // Check if we can potentially perform a tail call. More detailed checking is
+  // be done within LowerCallTo, after more information about the call is known.
+  LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+namespace {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+  /// CallOperand - If this is the result output operand or a clobber
+  /// this is null, otherwise it is the incoming operand to the CallInst.
+  /// This gets modified as the asm is processed.
+  SDValue CallOperand;
+
+  /// AssignedRegs - If this is a register or register class operand, this
+  /// contains the set of register corresponding to the operand.
+  RegsForValue AssignedRegs;
+
+  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+  }
+
+  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+  /// busy in OutputRegs/InputRegs.
+  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+                         std::set<unsigned> &OutputRegs,
+                         std::set<unsigned> &InputRegs,
+                         const TargetRegisterInfo &TRI) const {
+    if (isOutReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+    }
+    if (isInReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+    }
+  }
+
+  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+  /// corresponds to.  If there is no Value* for this operand, it returns
+  /// MVT::Other.
+  EVT getCallOperandValEVT(LLVMContext &Context,
+                           const TargetLowering &TLI,
+                           const TargetData *TD) const {
+    if (CallOperandVal == 0) return MVT::Other;
+
+    if (isa<BasicBlock>(CallOperandVal))
+      return TLI.getPointerTy();
+
+    const llvm::Type *OpTy = CallOperandVal->getType();
+
+    // FIXME: code duplicated from TargetLowering::ParseConstraints().
+    // If this is an indirect operand, the operand is a pointer to the
+    // accessed type.
+    if (isIndirect) {
+      const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+      if (!PtrTy)
+        report_fatal_error("Indirect operand for inline asm not a pointer!");
+      OpTy = PtrTy->getElementType();
+    }
+
+    // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+    if (const StructType *STy = dyn_cast<StructType>(OpTy))
+      if (STy->getNumElements() == 1)
+        OpTy = STy->getElementType(0);
+
+    // If OpTy is not a single value, it may be a struct/union that we
+    // can tile with integers.
+    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+      switch (BitSize) {
+      default: break;
+      case 1:
+      case 8:
+      case 16:
+      case 32:
+      case 64:
+      case 128:
+        OpTy = IntegerType::get(Context, BitSize);
+        break;
+      }
+    }
+
+    return TLI.getValueType(OpTy, true);
+  }
+
+private:
+  /// MarkRegAndAliases - Mark the specified register and all aliases in the
+  /// specified set.
+  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+                                const TargetRegisterInfo &TRI) {
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+    Regs.insert(Reg);
+    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+      for (; *Aliases; ++Aliases)
+        Regs.insert(*Aliases);
+  }
+};
+
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
+} // end anonymous namespace
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand.  We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process.  However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation.  This produces generally horrible, but correct, code.
+///
+///   OpInfo describes the operand.
+///   Input and OutputRegs are the set of already allocated physical registers.
+///
+static void GetRegistersForValue(SelectionDAG &DAG,
+                                 const TargetLowering &TLI,
+                                 DebugLoc DL,
+                                 SDISelAsmOperandInfo &OpInfo,
+                                 std::set<unsigned> &OutputRegs,
+                                 std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = *DAG.getContext();
+
+  // Compute whether this value requires an input register, an output register,
+  // or both.
+  bool isOutReg = false;
+  bool isInReg = false;
+  switch (OpInfo.Type) {
+  case InlineAsm::isOutput:
+    isOutReg = true;
+
+    // If there is an input constraint that matches this, we need to reserve
+    // the input register so no other inputs allocate to it.
+    isInReg = OpInfo.hasMatchingInput();
+    break;
+  case InlineAsm::isInput:
+    isInReg = true;
+    isOutReg = false;
+    break;
+  case InlineAsm::isClobber:
+    isOutReg = true;
+    isInReg = true;
+    break;
+  }
+
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  SmallVector<unsigned, 4> Regs;
+
+  // If this is a constraint for a single physreg, or a constraint for a
+  // register class, find it.
+  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                     OpInfo.ConstraintVT);
+
+  unsigned NumRegs = 1;
+  if (OpInfo.ConstraintVT != MVT::Other) {
+    // If this is a FP input in an integer register (or visa versa) insert a bit
+    // cast of the input value.  More generally, handle any case where the input
+    // value disagrees with the register class we plan to stick this in.
+    if (OpInfo.Type == InlineAsm::isInput &&
+        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+      // Try to convert to the first EVT that the reg class contains.  If the
+      // types are identical size, use a bitcast to convert (e.g. two differing
+      // vector types).
+      EVT RegVT = *PhysReg.second->vt_begin();
+      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+        // If the input is a FP value and we want it in FP registers, do a
+        // bitcast to the corresponding integer type.  This turns an f64 value
+        // into i64, which can be passed with two i32 values on a 32-bit
+        // machine.
+        RegVT = EVT::getIntegerVT(Context,
+                                  OpInfo.ConstraintVT.getSizeInBits());
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      }
+    }
+
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+  }
+
+  EVT RegVT;
+  EVT ValueVT = OpInfo.ConstraintVT;
+
+  // If this is a constraint for a specific physical register, like {r17},
+  // assign it now.
+  if (unsigned AssignedReg = PhysReg.first) {
+    const TargetRegisterClass *RC = PhysReg.second;
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = *RC->vt_begin();
+
+    // Get the actual register value type.  This is important, because the user
+    // may have asked for (e.g.) the AX register in i32 type.  We need to
+    // remember that AX is actually i16 to get the right extension.
+    RegVT = *RC->vt_begin();
+
+    // This is a explicit reference to a physical register.
+    Regs.push_back(AssignedReg);
+
+    // If this is an expanded reference, add the rest of the regs to Regs.
+    if (NumRegs != 1) {
+      TargetRegisterClass::iterator I = RC->begin();
+      for (; *I != AssignedReg; ++I)
+        assert(I != RC->end() && "Didn't find reg!");
+
+      // Already added the first reg.
+      --NumRegs; ++I;
+      for (; NumRegs; --NumRegs, ++I) {
+        assert(I != RC->end() && "Ran out of registers to allocate!");
+        Regs.push_back(*I);
+      }
+    }
+
+    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+    return;
+  }
+
+  // Otherwise, if this was a reference to an LLVM register class, create vregs
+  // for this reference.
+  if (const TargetRegisterClass *RC = PhysReg.second) {
+    RegVT = *RC->vt_begin();
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = RegVT;
+
+    // Create the appropriate number of virtual registers.
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+    for (; NumRegs; --NumRegs)
+      Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+    return;
+  }
+
+  // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+  /// ConstraintOperands - Information about all of the constraints.
+  SDISelAsmOperandInfoVector ConstraintOperands;
+
+  std::set<unsigned> OutputRegs, InputRegs;
+
+  TargetLowering::AsmOperandInfoVector
+    TargetConstraints = TLI.ParseConstraints(CS);
+
+  bool hasMemory = false;
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    EVT OpVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(!CS.getType()->isVoidTy() &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpVT = TLI.getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpVT = TLI.getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    // If this is an input or an indirect output, process the call argument.
+    // BasicBlocks are labels, currently appearing only in asm's.
+    if (OpInfo.CallOperandVal) {
+      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+      } else {
+        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+      }
+
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+    }
+
+    OpInfo.ConstraintVT = OpVT;
+
+    // Indirect operand accesses access memory.
+    if (OpInfo.isIndirect)
+      hasMemory = true;
+    else {
+      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+        TargetLowering::ConstraintType
+          CType = TLI.getConstraintType(OpInfo.Codes[j]);
+        if (CType == TargetLowering::C_Memory) {
+          hasMemory = true;
+          break;
+        }
+      }
+    }
+  }
+
+  SDValue Chain, Flag;
+
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
+
+  // Second pass over the constraints: compute which constraint option to use
+  // and assign registers to constraints that want a specific physreg.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+	std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+	  TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+	std::pair<unsigned, const TargetRegisterClass*> InputRC =
+	  TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (MatchRC.second != InputRC.second)) {
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
+        }
+        Input.ConstraintVT = OpInfo.ConstraintVT;
+      }
+    }
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+    // If this is a memory input, and if the operand is not indirect, do what we
+    // need to to provide an address for the memory input.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        !OpInfo.isIndirect) {
+      assert((OpInfo.isMultipleAlternative ||
+              (OpInfo.Type == InlineAsm::isInput)) &&
+             "Can only indirectify direct input operands!");
+
+      // Memory operands really want the address of the value.  If we don't have
+      // an indirect input, put it in the constpool if we can, otherwise spill
+      // it to a stack slot.
+      // TODO: This isn't quite right. We need to handle these according to
+      // the addressing mode that the constraint wants. Also, this may take
+      // an additional register for the computation and we don't want that
+      // either.
+
+      // If the operand is a float, integer, or vector constant, spill to a
+      // constant pool entry to get its address.
+      const Value *OpVal = OpInfo.CallOperandVal;
+      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+          isa<ConstantVector>(OpVal)) {
+        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+                                                 TLI.getPointerTy());
+      } else {
+        // Otherwise, create a stack slot and emit a store to it before the
+        // asm.
+        const Type *Ty = OpVal->getType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Chain = DAG.getStore(Chain, getCurDebugLoc(),
+                             OpInfo.CallOperand, StackSlot,
+                             MachinePointerInfo::getFixedStack(SSFI),
+                             false, false, 0);
+        OpInfo.CallOperand = StackSlot;
+      }
+
+      // There is no longer a Value* corresponding to this operand.
+      OpInfo.CallOperandVal = 0;
+
+      // It is now an indirect operand.
+      OpInfo.isIndirect = true;
+    }
+
+    // If this constraint is for a specific register, allocate it before
+    // anything else.
+    if (OpInfo.ConstraintType == TargetLowering::C_Register)
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+                           InputRegs);
+  }
+
+  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // to register class operands.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // C_Register operands have already been allocated, Other/Memory don't need
+    // to be.
+    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+                           InputRegs);
+  }
+
+  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+  std::vector<SDValue> AsmNodeOperands;
+  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
+  AsmNodeOperands.push_back(
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+                                      TLI.getPointerTy()));
+
+  // If we have a !srcloc metadata node associated with it, we want to attach
+  // this to the ultimately generated inline asm machineinstr.  To do this, we
+  // pass in the third operand as this (potentially null) inline asm MDNode.
+  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+  // Remember the HasSideEffect and AlignStack bits as operand 3.
+  unsigned ExtraInfo = 0;
+  if (IA->hasSideEffects())
+    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+  if (IA->isAlignStack())
+    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+                                                  TLI.getPointerTy()));
+
+  // Loop over all of the inputs, copying the operand values into the
+  // appropriate registers and processing the output regs.
+  RegsForValue RetValRegs;
+
+  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput: {
+      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+          OpInfo.ConstraintType != TargetLowering::C_Register) {
+        // Memory output, or 'other' output (e.g. 'X' constraint).
+        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+        // Add information to the INLINEASM node to know about this output.
+        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(OpInfo.CallOperand);
+        break;
+      }
+
+      // Otherwise, this is a register or register class output.
+
+      // Copy the output from the appropriate register.  Find a register that
+      // we can use.
+      if (OpInfo.AssignedRegs.Regs.empty())
+        report_fatal_error("Couldn't allocate output reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
+
+      // If this is an indirect operand, store through the pointer after the
+      // asm.
+      if (OpInfo.isIndirect) {
+        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+                                                      OpInfo.CallOperandVal));
+      } else {
+        // This is the result value of the call.
+        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+        // Concatenate this output onto the outputs list.
+        RetValRegs.append(OpInfo.AssignedRegs);
+      }
+
+      // Add information to the INLINEASM node to know that this register is
+      // set.
+      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+                                           InlineAsm::Kind_RegDefEarlyClobber :
+                                               InlineAsm::Kind_RegDef,
+                                               false,
+                                               0,
+                                               DAG,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isInput: {
+      SDValue InOperandVal = OpInfo.CallOperand;
+
+      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
+        // If this is required to match an output register we have already set,
+        // just use its register.
+        unsigned OperandNo = OpInfo.getMatchedOperand();
+
+        // Scan until we find the definition we already emitted of this operand.
+        // When we find it, create a RegsForValue operand.
+        unsigned CurOp = InlineAsm::Op_FirstOperand;
+        for (; OperandNo; --OperandNo) {
+          // Advance to the next operand.
+          unsigned OpFlag =
+            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+          assert((InlineAsm::isRegDefKind(OpFlag) ||
+                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
+          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+        }
+
+        unsigned OpFlag =
+          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+        if (InlineAsm::isRegDefKind(OpFlag) ||
+            InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+          if (OpInfo.isIndirect) {
+            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+            LLVMContext &Ctx = *DAG.getContext();
+            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
+                          " don't know how to handle tied "
+                          "indirect register inputs");
+          }
+
+          RegsForValue MatchedRegs;
+          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          MatchedRegs.RegVTs.push_back(RegVT);
+          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+               i != e; ++i)
+            MatchedRegs.Regs.push_back
+              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+          // Use the produced MatchedRegs object to
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                    Chain, &Flag);
+          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+                                           true, OpInfo.getMatchedOperand(),
+                                           DAG, AsmNodeOperands);
+          break;
+        }
+
+        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+               "Unexpected number of operands");
+        // Add information to the INLINEASM node to know about this input.
+        // See InlineAsm.h isUseOperandTiedToDef.
+        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+                                                    OpInfo.getMatchedOperand());
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+        break;
+      }
+
+      // Treat indirect 'X' constraint as memory.
+      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+          OpInfo.isIndirect)
+        OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+        std::vector<SDValue> Ops;
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+                                         Ops, DAG);
+        if (Ops.empty())
+          report_fatal_error("Invalid operand for inline asm constraint '" +
+                             Twine(OpInfo.ConstraintCode) + "'!");
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType =
+          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+        break;
+      }
+
+      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+               "Memory operands expect pointer values");
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      }
+
+      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+              OpInfo.ConstraintType == TargetLowering::C_Register) &&
+             "Unknown constraint type!");
+      assert(!OpInfo.isIndirect &&
+             "Don't know how to handle indirect register inputs yet!");
+
+      // Copy the input into the appropriate registers.
+      if (OpInfo.AssignedRegs.Regs.empty())
+        report_fatal_error("Couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                        Chain, &Flag);
+
+      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+                                               DAG, AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isClobber: {
+      // Add the clobbered value to the operand list, so that the register
+      // allocator is aware that the physreg got clobbered.
+      if (!OpInfo.AssignedRegs.Regs.empty())
+        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+                                                 false, 0, DAG,
+                                                 AsmNodeOperands);
+      break;
+    }
+    }
+  }
+
+  // Finish up input operands.  Set the input chain and add the flag last.
+  AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+                      DAG.getVTList(MVT::Other, MVT::Glue),
+                      &AsmNodeOperands[0], AsmNodeOperands.size());
+  Flag = Chain.getValue(1);
+
+  // If this asm returns a register value, copy the result from that register
+  // and set it as the value of the call.
+  if (!RetValRegs.Regs.empty()) {
+    SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+                                             Chain, &Flag);
+
+    // FIXME: Why don't we do this for inline asms with MRVs?
+    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+      EVT ResultType = TLI.getValueType(CS.getType());
+
+      // If any of the results of the inline asm is a vector, it may have the
+      // wrong width/num elts.  This can happen for register classes that can
+      // contain multiple different value types.  The preg or vreg allocated may
+      // not have the same VT as was expected.  Convert it to the right type
+      // with bit_convert.
+      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+        Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+                          ResultType, Val);
+
+      } else if (ResultType != Val.getValueType() &&
+                 ResultType.isInteger() && Val.getValueType().isInteger()) {
+        // If a result value was tied to an input value, the computed result may
+        // have a wider width than the expected result.  Extract the relevant
+        // portion.
+        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+      }
+
+      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+    }
+
+    setValue(CS.getInstruction(), Val);
+    // Don't need to use this as a chain in this case.
+    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+      return;
+  }
+
+  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+
+  // Process indirect outputs, first output all of the flagged copies out of
+  // physregs.
+  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+    const Value *Ptr = IndirectStoresToEmit[i].second;
+    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+                                             Chain, &Flag);
+    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+  }
+
+  // Emit the non-flagged stores from the physregs.
+  SmallVector<SDValue, 8> OutChains;
+  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+    SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+                               StoresToEmit[i].first,
+                               getValue(StoresToEmit[i].second),
+                               MachinePointerInfo(StoresToEmit[i].second),
+                               false, false, 0);
+    OutChains.push_back(Val);
+  }
+
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                        &OutChains[0], OutChains.size());
+
+  DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getArgOperand(0)),
+                          DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+  const TargetData &TD = *TLI.getTargetData();
+  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+                           getRoot(), getValue(I.getOperand(0)),
+                           DAG.getSrcValue(I.getOperand(0)),
+                           TD.getABITypeAlignment(I.getType()));
+  setValue(&I, V);
+  DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getArgOperand(0)),
+                          DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getArgOperand(0)),
+                          getValue(I.getArgOperand(1)),
+                          DAG.getSrcValue(I.getArgOperand(0)),
+                          DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+                            bool RetSExt, bool RetZExt, bool isVarArg,
+                            bool isInreg, unsigned NumFixedArgs,
+                            CallingConv::ID CallConv, bool isTailCall,
+                            bool isReturnValueUsed,
+                            SDValue Callee,
+                            ArgListTy &Args, SelectionDAG &DAG,
+                            DebugLoc dl) const {
+  // Handle all of the outgoing arguments.
+  SmallVector<ISD::OutputArg, 32> Outs;
+  SmallVector<SDValue, 32> OutVals;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      SDValue Op = SDValue(Args[i].Node.getNode(),
+                           Args[i].Node.getResNo() + Value);
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        getTargetData()->getABITypeAlignment(ArgTy);
+
+      if (Args[i].isZExt)
+        Flags.setZExt();
+      if (Args[i].isSExt)
+        Flags.setSExt();
+      if (Args[i].isInReg)
+        Flags.setInReg();
+      if (Args[i].isSRet)
+        Flags.setSRet();
+      if (Args[i].isByVal) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+        const Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
+        // For ByVal, alignment should come from FE.  BE will guess if this
+        // info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
+        if (Args[i].Alignment)
+          FrameAlign = Args[i].Alignment;
+        else
+          FrameAlign = getByValTypeAlignment(ElementTy);
+        Flags.setByValAlign(FrameAlign);
+      }
+      if (Args[i].isNest)
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+      SmallVector<SDValue, 4> Parts(NumParts);
+      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+      if (Args[i].isSExt)
+        ExtendKind = ISD::SIGN_EXTEND;
+      else if (Args[i].isZExt)
+        ExtendKind = ISD::ZERO_EXTEND;
+
+      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
+                     PartVT, ExtendKind);
+
+      for (unsigned j = 0; j != NumParts; ++j) {
+        // if it isn't first piece, alignment must be 1
+        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+                               i < NumFixedArgs);
+        if (NumParts > 1 && j == 0)
+          MyFlags.Flags.setSplit();
+        else if (j != 0)
+          MyFlags.Flags.setOrigAlign(1);
+
+        Outs.push_back(MyFlags);
+        OutVals.push_back(Parts[j]);
+      }
+    }
+  }
+
+  // Handle the incoming return values from the call.
+  SmallVector<ISD::InputArg, 32> Ins;
+  SmallVector<EVT, 4> RetTys;
+  ComputeValueVTs(*this, RetTy, RetTys);
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT.getSimpleVT();
+      MyFlags.Used = isReturnValueUsed;
+      if (RetSExt)
+        MyFlags.Flags.setSExt();
+      if (RetZExt)
+        MyFlags.Flags.setZExt();
+      if (isInreg)
+        MyFlags.Flags.setInReg();
+      Ins.push_back(MyFlags);
+    }
+  }
+
+  SmallVector<SDValue, 4> InVals;
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+                    Outs, OutVals, Ins, dl, DAG, InVals);
+
+  // Verify that the target's LowerCall behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerCall didn't return a valid chain!");
+  assert((!isTailCall || InVals.empty()) &&
+         "LowerCall emitted a return value for a tail call!");
+  assert((isTailCall || InVals.size() == Ins.size()) &&
+         "LowerCall didn't emit the correct number of values!");
+
+  // For a tail call, the return value is merely live-out and there aren't
+  // any nodes in the DAG representing it. Return a special value to
+  // indicate that a tail call has been emitted and no more Instructions
+  // should be processed in the current block.
+  if (isTailCall) {
+    DAG.setRoot(Chain);
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
+  // Collect the legal value parts into potentially illegal values
+  // that correspond to the original function's return values.
+  ISD::NodeType AssertOp = ISD::DELETED_NODE;
+  if (RetSExt)
+    AssertOp = ISD::AssertSext;
+  else if (RetZExt)
+    AssertOp = ISD::AssertZext;
+  SmallVector<SDValue, 4> ReturnValues;
+  unsigned CurReg = 0;
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+    ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
+                                            NumRegs, RegisterVT, VT,
+                                            AssertOp));
+    CurReg += NumRegs;
+  }
+
+  // For a function returning void, there is no return value. We can't create
+  // such a node, so we just return a null return value in that case. In
+  // that case, nothing will actually look at the value.
+  if (ReturnValues.empty())
+    return std::make_pair(SDValue(), Chain);
+
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+                            DAG.getVTList(&RetTys[0], RetTys.size()),
+                            &ReturnValues[0], ReturnValues.size());
+  return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) const {
+  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+  if (Res.getNode())
+    Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  llvm_unreachable("LowerOperation not implemented for this target!");
+  return SDValue();
+}
+
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+  SDValue Op = getNonRegisterValue(V);
+  assert((Op.getOpcode() != ISD::CopyFromReg ||
+          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+         "Copy from a reg to the same reg!");
+  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+  SDValue Chain = DAG.getEntryNode();
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+  PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  if (EnableFastISel)
+    return A->use_empty();
+
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+      return false;  // Use not in entry block.
+  }
+  return true;
+}
+
+void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
+  // If this is the entry block, emit arguments.
+  const Function &F = *LLVMBB->getParent();
+  SelectionDAG &DAG = SDB->DAG;
+  DebugLoc dl = SDB->getCurDebugLoc();
+  const TargetData *TD = TLI.getTargetData();
+  SmallVector<ISD::InputArg, 16> Ins;
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                Outs, TLI);
+
+  if (!FuncInfo->CanLowerReturn) {
+    // Put in an sret pointer parameter before all the other parameters.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+    // NOTE: Assuming that a pointer will never break down to more than one VT
+    // or one register.
+    ISD::ArgFlagsTy Flags;
+    Flags.setSRet();
+    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
+    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    Ins.push_back(RetArg);
+  }
+
+  // Set up the incoming argument description vector.
+  unsigned Idx = 1;
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++Idx) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    bool isArgValueUsed = !I->use_empty();
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        TD->getABITypeAlignment(ArgTy);
+
+      if (F.paramHasAttr(Idx, Attribute::ZExt))
+        Flags.setZExt();
+      if (F.paramHasAttr(Idx, Attribute::SExt))
+        Flags.setSExt();
+      if (F.paramHasAttr(Idx, Attribute::InReg))
+        Flags.setInReg();
+      if (F.paramHasAttr(Idx, Attribute::StructRet))
+        Flags.setSRet();
+      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(I->getType());
+        const Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
+        // For ByVal, alignment should be passed from FE.  BE will guess if
+        // this info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
+        if (F.getParamAlignment(Idx))
+          FrameAlign = F.getParamAlignment(Idx);
+        else
+          FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+        Flags.setByValAlign(FrameAlign);
+      }
+      if (F.paramHasAttr(Idx, Attribute::Nest))
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        if (NumRegs > 1 && i == 0)
+          MyFlags.Flags.setSplit();
+        // if it isn't first piece, alignment must be 1
+        else if (i > 0)
+          MyFlags.Flags.setOrigAlign(1);
+        Ins.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Call the target to set up the argument values.
+  SmallVector<SDValue, 8> InVals;
+  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+                                             F.isVarArg(), Ins,
+                                             dl, DAG, InVals);
+
+  // Verify that the target's LowerFormalArguments behaved as expected.
+  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+         "LowerFormalArguments didn't return a valid chain!");
+  assert(InVals.size() == Ins.size() &&
+         "LowerFormalArguments didn't emit the correct number of values!");
+  DEBUG({
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+        assert(InVals[i].getNode() &&
+               "LowerFormalArguments emitted a null value!");
+        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+               "LowerFormalArguments emitted a value with the wrong type!");
+      }
+    });
+
+  // Update the DAG with the new chain value resulting from argument lowering.
+  DAG.setRoot(NewRoot);
+
+  // Set up the argument values.
+  unsigned i = 0;
+  Idx = 1;
+  if (!FuncInfo->CanLowerReturn) {
+    // Create a virtual register for the sret pointer, and put in a copy
+    // from the sret argument into it.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    EVT VT = ValueVTs[0];
+    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
+                                        RegVT, VT, AssertOp);
+
+    MachineFunction& MF = SDB->DAG.getMachineFunction();
+    MachineRegisterInfo& RegInfo = MF.getRegInfo();
+    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+    FuncInfo->DemoteRegister = SRetReg;
+    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+                                    SRetReg, ArgValue);
+    DAG.setRoot(NewRoot);
+
+    // i indexes lowered arguments.  Bump it past the hidden sret argument.
+    // Idx indexes LLVM arguments.  Don't touch it.
+    ++i;
+  }
+
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+      ++I, ++Idx) {
+    SmallVector<SDValue, 4> ArgValues;
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+
+    // If this argument is unused then remember its value. It is used to generate
+    // debugging information.
+    if (I->use_empty() && NumValues)
+      SDB->setUnusedArgValue(I, InVals[i]);
+
+    for (unsigned Val = 0; Val != NumValues; ++Val) {
+      EVT VT = ValueVTs[Val];
+      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+      if (!I->use_empty()) {
+        ISD::NodeType AssertOp = ISD::DELETED_NODE;
+        if (F.paramHasAttr(Idx, Attribute::SExt))
+          AssertOp = ISD::AssertSext;
+        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+          AssertOp = ISD::AssertZext;
+
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
+                                             NumParts, PartVT, VT,
+                                             AssertOp));
+      }
+
+      i += NumParts;
+    }
+
+    // We don't need to do anything else for unused arguments.
+    if (ArgValues.empty())
+      continue;
+
+    // Note down frame index for byval arguments.
+    if (I->hasByValAttr())
+      if (FrameIndexSDNode *FI =
+          dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+        FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+
+    SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+                                     SDB->getCurDebugLoc());
+    SDB->setValue(I, Res);
+
+    // If this argument is live outside of the entry block, insert a copy from
+    // wherever we got it to the vreg that other BB's will reference it as.
+    if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+      // If we can, though, try to skip creating an unnecessary vreg.
+      // FIXME: This isn't very clean... it would be nice to make this more
+      // general.  It's also subtly incompatible with the hacks FastISel
+      // uses with vregs.
+      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        FuncInfo->ValueMap[I] = Reg;
+        continue;
+      }
+    }
+    if (!isOnlyUsedInEntryBlock(I)) {
+      FuncInfo->InitializeRegForValue(I);
+      SDB->CopyToExportRegsIfNeeded(I);
+    }
+  }
+
+  assert(i == InVals.size() && "Argument register count mismatch!");
+
+  // Finally, if the target has anything special to do, allow it to do so.
+  // FIXME: this should insert code into the DAG!
+  EmitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
+/// ensure constants are generated when needed.  Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input.  We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB.  As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Skip empty types
+      if (PN->getType()->isEmptyTy())
+        continue;
+
+      unsigned Reg;
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = ConstantsOut[C];
+        if (RegOut == 0) {
+          RegOut = FuncInfo.CreateRegs(C->getType());
+          CopyValueToVirtualRegister(C, RegOut);
+        }
+        Reg = RegOut;
+      } else {
+        DenseMap<const Value *, unsigned>::iterator I =
+          FuncInfo.ValueMap.find(PHIOp);
+        if (I != FuncInfo.ValueMap.end())
+          Reg = I->second;
+        else {
+          assert(isa<AllocaInst>(PHIOp) &&
+                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 "Didn't codegen value into a register!??");
+          Reg = FuncInfo.CreateRegs(PHIOp->getType());
+          CopyValueToVirtualRegister(PHIOp, Reg);
+        }
+      }
+
+      // Remember that this register needs to added to the machine PHI node as
+      // the input for this MBB.
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+          FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+        Reg += NumRegisters;
+      }
+    }
+  }
+  ConstantsOut.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 000000000000..a0884ebf5d56
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,548 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class DbgValueInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDDbgValue;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+  /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
+  DebugLoc CurDebugLoc;
+
+  DenseMap<const Value*, SDValue> NodeMap;
+  
+  /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+  /// to preserve debug information for incoming arguments.
+  DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+  /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+  class DanglingDebugInfo {
+    const DbgValueInst* DI;
+    DebugLoc dl;
+    unsigned SDNodeOrder;
+  public:
+    DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+    DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+      DI(di), dl(DL), SDNodeOrder(SDNO) { }
+    const DbgValueInst* getDI() { return DI; }
+    DebugLoc getdl() { return dl; }
+    unsigned getSDNodeOrder() { return SDNodeOrder; }
+  };
+
+  /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+  /// yet seen the referent.  We defer handling these until we do see it.
+  DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
+public:
+  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
+  /// them up and then emit token factor nodes when possible.  This allows us to
+  /// get simple disambiguation between loads without worrying about alias
+  /// analysis.
+  SmallVector<SDValue, 8> PendingLoads;
+private:
+
+  /// PendingExports - CopyToReg nodes that copy values to virtual registers
+  /// for export to other blocks need to be emitted before any terminator
+  /// instruction, but they have no other ordering requirements. We bunch them
+  /// up and the emit a single tokenfactor for them just before terminator
+  /// instructions.
+  SmallVector<SDValue, 8> PendingExports;
+
+  /// SDNodeOrder - A unique monotonically increasing number used to order the
+  /// SDNodes we create.
+  unsigned SDNodeOrder;
+
+  /// Case - A struct to record the Value for a switch case, and the
+  /// case's target basic block.
+  struct Case {
+    Constant* Low;
+    Constant* High;
+    MachineBasicBlock* BB;
+
+    Case() : Low(0), High(0), BB(0) { }
+    Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+      Low(low), High(high), BB(bb) { }
+    APInt size() const {
+      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
+      return (rHigh - rLow + 1ULL);
+    }
+  };
+
+  struct CaseBits {
+    uint64_t Mask;
+    MachineBasicBlock* BB;
+    unsigned Bits;
+
+    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+      Mask(mask), BB(bb), Bits(bits) { }
+  };
+
+  typedef std::vector<Case>           CaseVector;
+  typedef std::vector<CaseBits>       CaseBitsVector;
+  typedef CaseVector::iterator        CaseItr;
+  typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+  /// of conditional branches.
+  struct CaseRec {
+    CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+            CaseRange r) :
+    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+    /// CaseBB - The MBB in which to emit the compare and branch
+    MachineBasicBlock *CaseBB;
+    /// LT, GE - If nonzero, we know the current case value must be less-than or
+    /// greater-than-or-equal-to these Constants.
+    const Constant *LT;
+    const Constant *GE;
+    /// Range - A pair of iterators representing the range of case values to be
+    /// processed at this point in the binary search tree.
+    CaseRange Range;
+  };
+
+  typedef std::vector<CaseRec> CaseRecVector;
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator()(const Case &C1, const Case &C2) {
+      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+
+  struct CaseBitsCmp {
+    bool operator()(const CaseBits &C1, const CaseBits &C2) {
+      return C1.Bits > C2.Bits;
+    }
+  };
+
+  size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+
+  /// CaseBlock - This structure is used to communicate between
+  /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+  /// blocks needed by multi-case switch statements.
+  struct CaseBlock {
+    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+              const Value *cmpmiddle,
+              MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+              MachineBasicBlock *me)
+      : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+        TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+    // CC - the condition code to use for the case block's setcc node
+    ISD::CondCode CC;
+    // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+    // Emit by default LHS op RHS. MHS is used for range comparisons:
+    // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+    const Value *CmpLHS, *CmpMHS, *CmpRHS;
+    // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+    MachineBasicBlock *TrueBB, *FalseBB;
+    // ThisBB - the block into which to emit the code for the setcc and branches
+    MachineBasicBlock *ThisBB;
+  };
+  struct JumpTable {
+    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+  
+    /// Reg - the virtual register containing the index of the jump table entry
+    //. to jump to.
+    unsigned Reg;
+    /// JTI - the JumpTableIndex for this jump table in the function.
+    unsigned JTI;
+    /// MBB - the MBB into which to emit the code for the indirect jump.
+    MachineBasicBlock *MBB;
+    /// Default - the MBB of the default bb, which is a successor of the range
+    /// check MBB.  This is when updating PHI nodes in successors.
+    MachineBasicBlock *Default;
+  };
+  struct JumpTableHeader {
+    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+                    bool E = false):
+      First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+    APInt First;
+    APInt Last;
+    const Value *SValue;
+    MachineBasicBlock *HeaderBB;
+    bool Emitted;
+  };
+  typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+  struct BitTestCase {
+    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+      Mask(M), ThisBB(T), TargetBB(Tr) { }
+    uint64_t Mask;
+    MachineBasicBlock *ThisBB;
+    MachineBasicBlock *TargetBB;
+  };
+
+  typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+  struct BitTestBlock {
+    BitTestBlock(APInt F, APInt R, const Value* SV,
+                 unsigned Rg, EVT RgVT, bool E,
+                 MachineBasicBlock* P, MachineBasicBlock* D,
+                 const BitTestInfo& C):
+      First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+      Parent(P), Default(D), Cases(C) { }
+    APInt First;
+    APInt Range;
+    const Value *SValue;
+    unsigned Reg;
+    EVT RegVT;
+    bool Emitted;
+    MachineBasicBlock *Parent;
+    MachineBasicBlock *Default;
+    BitTestInfo Cases;
+  };
+
+public:
+  // TLI - This is information that describes the available target features we
+  // need for lowering.  This indicates when operations are unavailable,
+  // implemented with a libcall, etc.
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
+  SelectionDAG &DAG;
+  const TargetData *TD;
+  AliasAnalysis *AA;
+
+  /// SwitchCases - Vector of CaseBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<CaseBlock> SwitchCases;
+  /// JTCases - Vector of JumpTable structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<JumpTableBlock> JTCases;
+  /// BitTestCases - Vector of BitTestBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<BitTestBlock> BitTestCases;
+
+  // Emit PHI-node-operand constants only once even if used by multiple
+  // PHI nodes.
+  DenseMap<const Constant *, unsigned> ConstantsOut;
+
+  /// FuncInfo - Information about the function as a whole.
+  ///
+  FunctionLoweringInfo &FuncInfo;
+
+  /// OptLevel - What optimization level we're generating code for.
+  /// 
+  CodeGenOpt::Level OptLevel;
+  
+  /// GFI - Garbage collection metadata for the function.
+  GCFunctionInfo *GFI;
+
+  /// HasTailCall - This is set to true if a call in the current
+  /// block has been translated as a tail call. In this case,
+  /// no subsequent DAG nodes should be created.
+  ///
+  bool HasTailCall;
+
+  LLVMContext *Context;
+
+  SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+                      CodeGenOpt::Level ol)
+    : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+      DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false), Context(dag.getContext()) {
+  }
+
+  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+  /// clear - Clear out the current SelectionDAG and the associated
+  /// state and prepare this SelectionDAGBuilder object to be used
+  /// for a new block. This doesn't clear out information about
+  /// additional blocks that are needed to complete switch lowering
+  /// or PHI node updating; that information is cleared out as it is
+  /// consumed.
+  void clear();
+
+  /// clearDanglingDebugInfo - Clear the dangling debug information
+  /// map. This function is seperated from the clear so that debug
+  /// information that is dangling in a basic block can be properly
+  /// resolved in a different basic block. This allows the
+  /// SelectionDAG to resolve dangling debug information attached
+  /// to PHI nodes.
+  void clearDanglingDebugInfo();
+
+  /// getRoot - Return the current virtual root of the Selection DAG,
+  /// flushing any PendingLoad items. This must be done before emitting
+  /// a store or any other node that may need to be ordered after any
+  /// prior load instructions.
+  ///
+  SDValue getRoot();
+
+  /// getControlRoot - Similar to getRoot, but instead of flushing all the
+  /// PendingLoad items, flush all the PendingExports items. It is necessary
+  /// to do this before emitting a terminator instruction.
+  ///
+  SDValue getControlRoot();
+
+  DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+
+  unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+  void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+
+  /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+  /// from how the code appeared in the source. The ordering is used by the
+  /// scheduler to effectively turn off scheduling.
+  void AssignOrderingToNode(const SDNode *Node);
+
+  void visit(const Instruction &I);
+
+  void visit(unsigned Opcode, const User &I);
+
+  // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+  // generate the debug data structures now that we've seen its definition.
+  void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+  SDValue getValue(const Value *V);
+  SDValue getNonRegisterValue(const Value *V);
+  SDValue getValueImpl(const Value *V);
+
+  void setValue(const Value *V, SDValue NewN) {
+    SDValue &N = NodeMap[V];
+    assert(N.getNode() == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+  
+  void setUnusedArgValue(const Value *V, SDValue NewN) {
+    SDValue &N = UnusedArgNodeMap[V];
+    assert(N.getNode() == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+
+  void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+                            MachineBasicBlock *SwitchBB, unsigned Opc);
+  void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+                                    MachineBasicBlock *FBB,
+                                    MachineBasicBlock *CurBB,
+                                    MachineBasicBlock *SwitchBB);
+  bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+  bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+  void CopyToExportRegsIfNeeded(const Value *V);
+  void ExportFromCurrentBlock(const Value *V);
+  void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+                   MachineBasicBlock *LandingPad = NULL);
+
+  /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+  /// references that ned to refer to the last resulting block.
+  void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+private:
+  // Terminator instructions.
+  void visitRet(const ReturnInst &I);
+  void visitBr(const BranchInst &I);
+  void visitSwitch(const SwitchInst &I);
+  void visitIndirectBr(const IndirectBrInst &I);
+  void visitUnreachable(const UnreachableInst &I) { /* noop */ }
+
+  // Helpers for visitSwitch
+  bool handleSmallSwitchRange(CaseRec& CR,
+                              CaseRecVector& WorkList,
+                              const Value* SV,
+                              MachineBasicBlock* Default,
+                              MachineBasicBlock *SwitchBB);
+  bool handleJTSwitchCase(CaseRec& CR,
+                          CaseRecVector& WorkList,
+                          const Value* SV,
+                          MachineBasicBlock* Default,
+                          MachineBasicBlock *SwitchBB);
+  bool handleBTSplitSwitchCase(CaseRec& CR,
+                               CaseRecVector& WorkList,
+                               const Value* SV,
+                               MachineBasicBlock* Default,
+                               MachineBasicBlock *SwitchBB);
+  bool handleBitTestsSwitchCase(CaseRec& CR,
+                                CaseRecVector& WorkList,
+                                const Value* SV,
+                                MachineBasicBlock* Default,
+                                MachineBasicBlock *SwitchBB);
+
+  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+  void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+public:
+  void visitSwitchCase(CaseBlock &CB,
+                       MachineBasicBlock *SwitchBB);
+  void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
+  void visitBitTestCase(BitTestBlock &BB,
+                        MachineBasicBlock* NextMBB,
+                        unsigned Reg,
+                        BitTestCase &B,
+                        MachineBasicBlock *SwitchBB);
+  void visitJumpTable(JumpTable &JT);
+  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+                            MachineBasicBlock *SwitchBB);
+  
+private:
+  // These all get lowered before this pass.
+  void visitInvoke(const InvokeInst &I);
+  void visitUnwind(const UnwindInst &I);
+
+  void visitBinary(const User &I, unsigned OpCode);
+  void visitShift(const User &I, unsigned Opcode);
+  void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(const User &I);
+  void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+  void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(const User &I);
+  void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(const User &I);
+  void visitFCmp(const User &I);
+  // Visit the conversion instructions
+  void visitTrunc(const User &I);
+  void visitZExt(const User &I);
+  void visitSExt(const User &I);
+  void visitFPTrunc(const User &I);
+  void visitFPExt(const User &I);
+  void visitFPToUI(const User &I);
+  void visitFPToSI(const User &I);
+  void visitUIToFP(const User &I);
+  void visitSIToFP(const User &I);
+  void visitPtrToInt(const User &I);
+  void visitIntToPtr(const User &I);
+  void visitBitCast(const User &I);
+
+  void visitExtractElement(const User &I);
+  void visitInsertElement(const User &I);
+  void visitShuffleVector(const User &I);
+
+  void visitExtractValue(const ExtractValueInst &I);
+  void visitInsertValue(const InsertValueInst &I);
+
+  void visitGetElementPtr(const User &I);
+  void visitSelect(const User &I);
+
+  void visitAlloca(const AllocaInst &I);
+  void visitLoad(const LoadInst &I);
+  void visitStore(const StoreInst &I);
+  void visitPHI(const PHINode &I);
+  void visitCall(const CallInst &I);
+  bool visitMemCmpCall(const CallInst &I);
+  
+  void visitInlineAsm(ImmutableCallSite CS);
+  const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+  void visitPow(const CallInst &I);
+  void visitExp2(const CallInst &I);
+  void visitExp(const CallInst &I);
+  void visitLog(const CallInst &I);
+  void visitLog2(const CallInst &I);
+  void visitLog10(const CallInst &I);
+
+  void visitVAStart(const CallInst &I);
+  void visitVAArg(const VAArgInst &I);
+  void visitVAEnd(const CallInst &I);
+  void visitVACopy(const CallInst &I);
+
+  void visitUserOp1(const Instruction &I) {
+    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+  }
+  void visitUserOp2(const Instruction &I) {
+    llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+  }
+  
+  const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
+
+  void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+  /// EmitFuncArgumentDbgValue - If V is an function argument then create
+  /// corresponding DBG_VALUE machine instruction for it now. At the end of 
+  /// instruction selection, they will be inserted to the entry BB.
+  bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+                                int64_t Offset, const SDValue &N);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 000000000000..87bb296b8c79
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,2805 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+          cl::desc("Enable verbose messages in the \"fast\" "
+                   "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+          cl::desc("Enable abort calls when \"fast\" instruction fails"));
+
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+        cl::desc("use Machine Branch Probability Info"),
+        cl::init(true), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the first "
+                   "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the second "
+                   "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the post legalize types"
+                   " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+          cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+          cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+      cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+                  ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+                  ViewDAGCombine2 = false,
+                  ViewDAGCombineLT = false,
+                  ViewISelDAGs = false, ViewSchedDAGs = false,
+                  ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+               RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+            cl::init(&createDefaultScheduler),
+            cl::desc("Instruction schedulers available (before register"
+                     " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+                        createDefaultScheduler);
+
+namespace llvm {
+  //===--------------------------------------------------------------------===//
+  /// createDefaultScheduler - This creates an instruction scheduler appropriate
+  /// for the target.
+  ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+                                             CodeGenOpt::Level OptLevel) {
+    const TargetLowering &TLI = IS->getTargetLowering();
+
+    if (OptLevel == CodeGenOpt::None)
+      return createSourceListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::Latency)
+      return createTDListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::RegPressure)
+      return createBURRListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::Hybrid)
+      return createHybridListDAGScheduler(IS, OptLevel);
+    assert(TLI.getSchedulingPreference() == Sched::ILP &&
+           "Unknown sched type!");
+    return createILPListDAGScheduler(IS, OptLevel);
+  }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag.  These
+// instructions are special in various ways, which require special support to
+// insert.  The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                            MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+  dbgs() << "If a target marks an instruction with "
+          "'usesCustomInserter', it must implement "
+          "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+  llvm_unreachable(0);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+                                   CodeGenOpt::Level OL) :
+  MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
+  FuncInfo(new FunctionLoweringInfo(TLI)),
+  CurDAG(new SelectionDAG(tm)),
+  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+  GFI(),
+  OptLevel(OL),
+  DAGSize(0) {
+    initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+    initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+  }
+
+SelectionDAGISel::~SelectionDAGISel() {
+  delete SDB;
+  delete CurDAG;
+  delete FuncInfo;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<GCModuleInfo>();
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    AU.addRequired<BranchProbabilityInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it.  In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+  // Loop for blocks with phi nodes.
+  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    PHINode *PN = dyn_cast<PHINode>(BB->begin());
+    if (PN == 0) continue;
+
+  ReprocessBlock:
+    // For each block with a PHI node, check to see if any of the input values
+    // are potentially trapping constant expressions.  Constant expressions are
+    // the only potentially trapping value that can occur as the argument to a
+    // PHI.
+    for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+        if (CE == 0 || !CE->canTrap()) continue;
+
+        // The only case we have to worry about is when the edge is critical.
+        // Since this block has a PHI Node, we assume it has multiple input
+        // edges: check to see if the pred has multiple successors.
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        if (Pred->getTerminator()->getNumSuccessors() == 1)
+          continue;
+
+        // Okay, we have to split this edge.
+        SplitCriticalEdge(Pred->getTerminator(),
+                          GetSuccessorNumber(Pred, BB), SDISel, true);
+        goto ReprocessBlock;
+      }
+  }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+  // Do some sanity-checking on the command-line options.
+  assert((!EnableFastISelVerbose || EnableFastISel) &&
+         "-fast-isel-verbose requires -fast-isel");
+  assert((!EnableFastISelAbort || EnableFastISel) &&
+         "-fast-isel-abort requires -fast-isel");
+
+  const Function &Fn = *mf.getFunction();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+  MF = &mf;
+  RegInfo = &MF->getRegInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
+  DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
+  CurDAG->init(*MF);
+  FuncInfo->set(Fn, *MF);
+
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+  else
+    FuncInfo->BPI = 0;
+
+  SDB->init(GFI, *AA);
+
+  SelectAllBasicBlocks(Fn);
+
+  // If the first basic block in the function has live ins that need to be
+  // copied into vregs, emit the copies into the top of the block before
+  // emitting the code for the block.
+  MachineBasicBlock *EntryMBB = MF->begin();
+  RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+
+  DenseMap<unsigned, unsigned> LiveInMap;
+  if (!FuncInfo->ArgDbgValues.empty())
+    for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+           E = RegInfo->livein_end(); LI != E; ++LI)
+      if (LI->second)
+        LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
+  // Insert DBG_VALUE instructions for function arguments to the entry block.
+  for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+    MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+    unsigned Reg = MI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      EntryMBB->insert(EntryMBB->begin(), MI);
+    else {
+      MachineInstr *Def = RegInfo->getVRegDef(Reg);
+      MachineBasicBlock::iterator InsertPos = Def;
+      // FIXME: VR def may not be in entry block.
+      Def->getParent()->insert(llvm::next(InsertPos), MI);
+    }
+
+    // If Reg is live-in then update debug info to track its copy in a vreg.
+    DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+    if (LDI != LiveInMap.end()) {
+      MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+      MachineBasicBlock::iterator InsertPos = Def;
+      const MDNode *Variable =
+        MI->getOperand(MI->getNumOperands()-1).getMetadata();
+      unsigned Offset = MI->getOperand(1).getImm();
+      // Def is never a terminator here, so it is ok to increment InsertPos.
+      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+              TII.get(TargetOpcode::DBG_VALUE))
+        .addReg(LDI->second, RegState::Debug)
+        .addImm(Offset).addMetadata(Variable);
+
+      // If this vreg is directly copied into an exported register then
+      // that COPY instructions also need DBG_VALUE, if it is the only
+      // user of LDI->second.
+      MachineInstr *CopyUseMI = NULL;
+      for (MachineRegisterInfo::use_iterator
+             UI = RegInfo->use_begin(LDI->second);
+           MachineInstr *UseMI = UI.skipInstruction();) {
+        if (UseMI->isDebugValue()) continue;
+        if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+          CopyUseMI = UseMI; continue;
+        }
+        // Otherwise this is another use or second copy use.
+        CopyUseMI = NULL; break;
+      }
+      if (CopyUseMI) {
+        MachineInstr *NewMI =
+          BuildMI(*MF, CopyUseMI->getDebugLoc(),
+                  TII.get(TargetOpcode::DBG_VALUE))
+          .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+          .addImm(Offset).addMetadata(Variable);
+        EntryMBB->insertAfter(CopyUseMI, NewMI);
+      }
+    }
+  }
+
+  // Determine if there are any calls in this machine function.
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  if (!MFI->hasCalls()) {
+    for (MachineFunction::const_iterator
+           I = MF->begin(), E = MF->end(); I != E; ++I) {
+      const MachineBasicBlock *MBB = I;
+      for (MachineBasicBlock::const_iterator
+             II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+        const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
+
+        if ((MCID.isCall() && !MCID.isReturn()) ||
+            II->isStackAligningInlineAsm()) {
+          MFI->setHasCalls(true);
+          goto done;
+        }
+      }
+    }
+  done:;
+  }
+
+  // Determine if there is a call to setjmp in the machine function.
+  MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+
+  // Replace forward-declared registers with the registers containing
+  // the desired value.
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  for (DenseMap<unsigned, unsigned>::iterator
+       I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+       I != E; ++I) {
+    unsigned From = I->first;
+    unsigned To = I->second;
+    // If To is also scheduled to be replaced, find what its ultimate
+    // replacement is.
+    for (;;) {
+      DenseMap<unsigned, unsigned>::iterator J =
+        FuncInfo->RegFixups.find(To);
+      if (J == E) break;
+      To = J->second;
+    }
+    // Replace it.
+    MRI.replaceRegWith(From, To);
+  }
+
+  // Release function-specific state. SDB and CurDAG are already cleared
+  // at this point.
+  FuncInfo->clear();
+
+  return true;
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+                                        BasicBlock::const_iterator End,
+                                        bool &HadTailCall) {
+  // Lower all of the non-terminator instructions. If a call is emitted
+  // as a tail call, cease emitting nodes for this block. Terminators
+  // are handled below.
+  for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+    SDB->visit(*I);
+
+  // Make sure the root of the DAG is up-to-date.
+  CurDAG->setRoot(SDB->getControlRoot());
+  HadTailCall = SDB->HasTailCall;
+  SDB->clear();
+
+  // Final step, emit the lowered DAG as machine code.
+  CodeGenAndEmitDAG();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+  SmallPtrSet<SDNode*, 128> VisitedNodes;
+  SmallVector<SDNode*, 128> Worklist;
+
+  Worklist.push_back(CurDAG->getRoot().getNode());
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  do {
+    SDNode *N = Worklist.pop_back_val();
+
+    // If we've already seen this node, ignore it.
+    if (!VisitedNodes.insert(N))
+      continue;
+
+    // Otherwise, add all chain operands to the worklist.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        Worklist.push_back(N->getOperand(i).getNode());
+
+    // If this is a CopyToReg with a vreg dest, process it.
+    if (N->getOpcode() != ISD::CopyToReg)
+      continue;
+
+    unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+      continue;
+
+    // Ignore non-scalar or non-integer values.
+    SDValue Src = N->getOperand(2);
+    EVT SrcVT = Src.getValueType();
+    if (!SrcVT.isInteger() || SrcVT.isVector())
+      continue;
+
+    unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+    Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
+    CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+    FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+  } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+  std::string GroupName;
+  if (TimePassesIsEnabled)
+    GroupName = "Instruction Selection and Scheduling";
+  std::string BlockName;
+  int BlockNumber = -1;
+#ifdef NDEBUG
+  if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+      ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+      ViewSUnitDAGs)
+#endif
+  {
+    BlockNumber = FuncInfo->MBB->getNumber();
+    BlockName = MF->getFunction()->getNameStr() + ":" +
+                FuncInfo->MBB->getBasicBlock()->getNameStr();
+  }
+  DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
+
+  if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+  // Run the DAG combiner in pre-legalize mode.
+  {
+    NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+    CurDAG->Combine(Unrestricted, *AA, OptLevel);
+  }
+
+  DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
+
+  // Second step, hack on the DAG until it only uses operations and types that
+  // the target supports.
+  if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+                                               BlockName);
+
+  bool Changed;
+  {
+    NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+    Changed = CurDAG->LegalizeTypes();
+  }
+
+  DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
+
+  if (Changed) {
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+    // Run the DAG combiner in post-type-legalize mode.
+    {
+      NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+                         TimePassesIsEnabled);
+      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+    }
+
+    DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+          << " '" << BlockName << "'\n"; CurDAG->dump());
+  }
+
+  {
+    NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+    Changed = CurDAG->LegalizeVectors();
+  }
+
+  if (Changed) {
+    {
+      NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+      CurDAG->LegalizeTypes();
+    }
+
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+    // Run the DAG combiner in post-type-legalize mode.
+    {
+      NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+                         TimePassesIsEnabled);
+      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+    }
+
+    DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+          << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+  }
+
+  if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+  {
+    NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+    CurDAG->Legalize();
+  }
+
+  DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
+
+  if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+  // Run the DAG combiner in post-legalize mode.
+  {
+    NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+    CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+  }
+
+  DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
+
+  if (OptLevel != CodeGenOpt::None)
+    ComputeLiveOutVRegInfo();
+
+  if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+  // Third, instruction select all of the operations to machine code, adding the
+  // code to the MachineBasicBlock.
+  {
+    NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+    DoInstructionSelection();
+  }
+
+  DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
+
+  if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+  // Schedule machine code.
+  ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+  {
+    NamedRegionTimer T("Instruction Scheduling", GroupName,
+                       TimePassesIsEnabled);
+    Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+  }
+
+  if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+  // Emit machine code to BB.  This can change 'BB' to the last block being
+  // inserted into.
+  MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+  {
+    NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
+    FuncInfo->InsertPt = Scheduler->InsertPos;
+  }
+
+  // If the block was split, make sure we update any references that are used to
+  // update PHI nodes later on.
+  if (FirstMBB != LastMBB)
+    SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+  // Free the scheduler state.
+  {
+    NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+                       TimePassesIsEnabled);
+    delete Scheduler;
+  }
+
+  // Free the SelectionDAG state, now that we're finished with it.
+  CurDAG->clear();
+}
+
+void SelectionDAGISel::DoInstructionSelection() {
+  DEBUG(errs() << "===== Instruction selection begins: BB#"
+        << FuncInfo->MBB->getNumber()
+        << " '" << FuncInfo->MBB->getName() << "'\n");
+
+  PreprocessISelDAG();
+
+  // Select target instructions for the DAG.
+  {
+    // Number all nodes with a topological order and set DAGSize.
+    DAGSize = CurDAG->AssignTopologicalOrder();
+
+    // Create a dummy node (which is not added to allnodes), that adds
+    // a reference to the root node, preventing it from being deleted,
+    // and tracking any changes of the root.
+    HandleSDNode Dummy(CurDAG->getRoot());
+    ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+    ++ISelPosition;
+
+    // The AllNodes list is now topological-sorted. Visit the
+    // nodes by starting at the end of the list (the root of the
+    // graph) and preceding back toward the beginning (the entry
+    // node).
+    while (ISelPosition != CurDAG->allnodes_begin()) {
+      SDNode *Node = --ISelPosition;
+      // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+      // but there are currently some corner cases that it misses. Also, this
+      // makes it theoretically possible to disable the DAGCombiner.
+      if (Node->use_empty())
+        continue;
+
+      SDNode *ResNode = Select(Node);
+
+      // FIXME: This is pretty gross.  'Select' should be changed to not return
+      // anything at all and this code should be nuked with a tactical strike.
+
+      // If node should not be replaced, continue with the next one.
+      if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      // Replace node.
+      if (ResNode)
+        ReplaceUses(Node, ResNode);
+
+      // If after the replacement this node is not used any more,
+      // remove this dead node.
+      if (Node->use_empty()) { // Don't delete EntryToken, etc.
+        ISelUpdater ISU(ISelPosition);
+        CurDAG->RemoveDeadNode(Node, &ISU);
+      }
+    }
+
+    CurDAG->setRoot(Dummy.getValue());
+  }
+
+  DEBUG(errs() << "===== Instruction selection ends:\n");
+
+  PostprocessISelDAG();
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad() {
+  // Add a label to mark the beginning of the landing pad.  Deletion of the
+  // landing pad can thus be detected via the MachineModuleInfo.
+  MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
+
+  const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+  BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+    .addSym(Label);
+
+  // Mark exception register as live in.
+  unsigned Reg = TLI.getExceptionAddressRegister();
+  if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+
+  // Mark exception selector register as live in.
+  Reg = TLI.getExceptionSelectorRegister();
+  if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+
+  // FIXME: Hack around an exception handling flaw (PR1508): the personality
+  // function and list of typeids logically belong to the invoke (or, if you
+  // like, the basic block containing the invoke), and need to be associated
+  // with it in the dwarf exception handling tables.  Currently however the
+  // information is provided by an intrinsic (eh.selector) that can be moved
+  // to unexpected places by the optimizers: if the unwind edge is critical,
+  // then breaking it can result in the intrinsics being in the successor of
+  // the landing pad, not the landing pad itself.  This results
+  // in exceptions not being caught because no typeids are associated with
+  // the invoke.  This may not be the only way things can go wrong, but it
+  // is the only way we try to work around for the moment.
+  const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock();
+  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+  if (Br && Br->isUnconditional()) { // Critical edge?
+    BasicBlock::const_iterator I, E;
+    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+      if (isa<EHSelectorInst>(I))
+        break;
+
+    if (I == E)
+      // No catch info found - try to extract some from the successor.
+      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
+  }
+}
+
+
+
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst.  Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr.  For
+/// example we could have:
+///   A: x = load i32 *P
+///   B: y = icmp A, 42
+///   C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C".  We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             const Instruction *FoldInst,
+                                             FastISel *FastIS) {
+  // We know that the load has a single use, but don't know what it is.  If it
+  // isn't one of the folded instructions, then we can't succeed here.  Handle
+  // this by scanning the single-use users of the load until we get to FoldInst.
+  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
+  
+  const Instruction *TheUser = LI->use_back();
+  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
+         // Stay in the right block.
+         TheUser->getParent() == FoldInst->getParent() &&
+         --MaxUsers) {  // Don't scan too far.
+    // If there are multiple or no uses of this instruction, then bail out.
+    if (!TheUser->hasOneUse())
+      return false;
+    
+    TheUser = TheUser->use_back();
+  }
+  
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile()) return false;
+
+  // Figure out which vreg this is going into.  If there is no assigned vreg yet
+  // then there actually was no reference to it.  Perhaps the load is referenced
+  // by a dead instruction.
+  unsigned LoadReg = FastIS->getRegForValue(LI);
+  if (LoadReg == 0)
+    return false;
+
+  // Check to see what the uses of this vreg are.  If it has no uses, or more
+  // than one use (at the machine instr level) then we can't fold it.
+  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+  if (RI == RegInfo->reg_end())
+    return false;
+
+  // See if there is exactly one use of the vreg.  If there are multiple uses,
+  // then the instruction got lowered to multiple machine instructions or the
+  // use of the loaded value ended up being multiple operands of the result, in
+  // either case, we can't fold this.
+  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+  if (PostRI != RegInfo->reg_end())
+    return false;
+
+  assert(RI.getOperand().isUse() &&
+         "The only use of the vreg must be a use, we haven't emitted the def!");
+
+  MachineInstr *User = &*RI;
+
+  // Set the insertion point properly.  Folding the load can cause generation of
+  // other random instructions (like sign extends) for addressing modes, make
+  // sure they get inserted in a logical place before the new instruction.
+  FuncInfo->InsertPt = User;
+  FuncInfo->MBB = User->getParent();
+
+  // Ask the target to try folding the load.
+  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+                                      FunctionLoweringInfo *FuncInfo) {
+  return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+         !isa<TerminatorInst>(I) && // Terminators aren't folded.
+         !isa<DbgInfoIntrinsic>(I) &&  // Debug instructions aren't folded.
+         !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+  // Initialize the Fast-ISel state, if needed.
+  FastISel *FastIS = 0;
+  if (EnableFastISel)
+    FastIS = TLI.createFastISel(*FuncInfo);
+
+  // Iterate over all basic blocks in the function.
+  ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+  for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+       I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+    const BasicBlock *LLVMBB = *I;
+
+    if (OptLevel != CodeGenOpt::None) {
+      bool AllPredsVisited = true;
+      for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+           PI != PE; ++PI) {
+        if (!FuncInfo->VisitedBBs.count(*PI)) {
+          AllPredsVisited = false;
+          break;
+        }
+      }
+
+      if (AllPredsVisited) {
+        for (BasicBlock::const_iterator I = LLVMBB->begin();
+             isa<PHINode>(I); ++I)
+          FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+      } else {
+        for (BasicBlock::const_iterator I = LLVMBB->begin();
+             isa<PHINode>(I); ++I)
+          FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+      }
+
+      FuncInfo->VisitedBBs.insert(LLVMBB);
+    }
+
+    FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+    FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+    BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+    BasicBlock::const_iterator const End = LLVMBB->end();
+    BasicBlock::const_iterator BI = End;
+
+    FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+    // Setup an EH landing-pad block.
+    if (FuncInfo->MBB->isLandingPad())
+      PrepareEHLandingPad();
+
+    // Lower any arguments needed in this block if this is the entry block.
+    if (LLVMBB == &Fn.getEntryBlock())
+      LowerArguments(LLVMBB);
+
+    // Before doing SelectionDAG ISel, see if FastISel has been requested.
+    if (FastIS) {
+      FastIS->startNewBlock();
+
+      // Emit code for any incoming arguments. This must happen before
+      // beginning FastISel on the entry block.
+      if (LLVMBB == &Fn.getEntryBlock()) {
+        CurDAG->setRoot(SDB->getControlRoot());
+        SDB->clear();
+        CodeGenAndEmitDAG();
+
+        // If we inserted any instructions at the beginning, make a note of
+        // where they are, so we can be sure to emit subsequent instructions
+        // after them.
+        if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+          FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+        else
+          FastIS->setLastLocalValue(0);
+      }
+
+      // Do FastISel on as many instructions as possible.
+      for (; BI != Begin; --BI) {
+        const Instruction *Inst = llvm::prior(BI);
+
+        // If we no longer require this instruction, skip it.
+        if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+          continue;
+
+        // Bottom-up: reset the insert pos at the top, after any local-value
+        // instructions.
+        FastIS->recomputeInsertPt();
+
+        // Try to select the instruction with FastISel.
+        if (FastIS->SelectInstruction(Inst)) {
+          ++NumFastIselSuccess;
+          // If fast isel succeeded, skip over all the folded instructions, and
+          // then see if there is a load right before the selected instructions.
+          // Try to fold the load if so.
+          const Instruction *BeforeInst = Inst;
+          while (BeforeInst != Begin) {
+            BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+            if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+              break;
+          }
+          if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+            // If we succeeded, don't re-select the load.
+            BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+          continue;
+        }
+
+        // Then handle certain instructions as single-LLVM-Instruction blocks.
+        if (isa<CallInst>(Inst)) {
+          ++NumFastIselFailures;
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel missed call: ";
+            Inst->dump();
+          }
+
+          if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+            unsigned &R = FuncInfo->ValueMap[Inst];
+            if (!R)
+              R = FuncInfo->CreateRegs(Inst->getType());
+          }
+
+          bool HadTailCall = false;
+          SelectBasicBlock(Inst, BI, HadTailCall);
+
+          // If the call was emitted as a tail call, we're done with the block.
+          if (HadTailCall) {
+            --BI;
+            break;
+          }
+
+          continue;
+        }
+
+        if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
+          // Don't abort, and use a different message for terminator misses.
+          ++NumFastIselFailures;
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel missed terminator: ";
+            Inst->dump();
+          }
+        } else {
+          ++NumFastIselFailures;
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel miss: ";
+            Inst->dump();
+          }
+          if (EnableFastISelAbort)
+            // The "fast" selector couldn't handle something and bailed.
+            // For the purpose of debugging, just abort.
+            llvm_unreachable("FastISel didn't select the entire block");
+        }
+        break;
+      }
+
+      FastIS->recomputeInsertPt();
+    }
+
+    if (Begin != BI)
+      ++NumDAGBlocks;
+    else
+      ++NumFastIselBlocks;
+
+    if (Begin != BI) {
+      // Run SelectionDAG instruction selection on the remainder of the block
+      // not handled by FastISel. If FastISel is not run, this is the entire
+      // block.
+      bool HadTailCall;
+      SelectBasicBlock(Begin, BI, HadTailCall);
+    }
+
+    FinishBasicBlock();
+    FuncInfo->PHINodesToUpdate.clear();
+  }
+
+  delete FastIS;
+  SDB->clearDanglingDebugInfo();
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+  DEBUG(dbgs() << "Total amount of phi nodes to update: "
+               << FuncInfo->PHINodesToUpdate.size() << "\n";
+        for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+          dbgs() << "Node " << i << " : ("
+                 << FuncInfo->PHINodesToUpdate[i].first
+                 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+  // Next, now that we know what the last MBB the LLVM BB expanded is, update
+  // PHI nodes in successors.
+  if (SDB->SwitchCases.empty() &&
+      SDB->JTCases.empty() &&
+      SDB->BitTestCases.empty()) {
+    for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+        continue;
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
+      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+    }
+    return;
+  }
+
+  for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!SDB->BitTestCases[i].Emitted) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // Emit the code
+      SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
+      CurDAG->setRoot(SDB->getRoot());
+      SDB->clear();
+      CodeGenAndEmitDAG();
+    }
+
+    for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // Emit the code
+      if (j+1 != ej)
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Cases[j+1].ThisBB,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j],
+                              FuncInfo->MBB);
+      else
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Default,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j],
+                              FuncInfo->MBB);
+
+
+      CurDAG->setRoot(SDB->getRoot());
+      SDB->clear();
+      CodeGenAndEmitDAG();
+    }
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      // This is "default" BB. We have two jumps to it. From "header" BB and
+      // from last "case" BB.
+      if (PHIBB == SDB->BitTestCases[i].Default) {
+        PHI->addOperand(MachineOperand::
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
+        PHI->addOperand(MachineOperand::
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
+                                                  back().ThisBB));
+      }
+      // One of "cases" BB.
+      for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+           j != ej; ++j) {
+        MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+        if (cBB->isSuccessor(PHIBB)) {
+          PHI->addOperand(MachineOperand::
+                          CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                    false));
+          PHI->addOperand(MachineOperand::CreateMBB(cBB));
+        }
+      }
+    }
+  }
+  SDB->BitTestCases.clear();
+
+  // If the JumpTable record is filled in, then we need to emit a jump table.
+  // Updating the PHI nodes is tricky in this case, since we need to determine
+  // whether the PHI is a successor of the range check MBB or the jump table MBB
+  for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!SDB->JTCases[i].first.Emitted) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // Emit the code
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+                                FuncInfo->MBB);
+      CurDAG->setRoot(SDB->getRoot());
+      SDB->clear();
+      CodeGenAndEmitDAG();
+    }
+
+    // Set the current basic block to the mbb we wish to insert the code into
+    FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+    FuncInfo->InsertPt = FuncInfo->MBB->end();
+    // Emit the code
+    SDB->visitJumpTable(SDB->JTCases[i].second);
+    CurDAG->setRoot(SDB->getRoot());
+    SDB->clear();
+    CodeGenAndEmitDAG();
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      // "default" BB. We can go there only from header BB.
+      if (PHIBB == SDB->JTCases[i].second.Default) {
+        PHI->addOperand
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
+        PHI->addOperand
+          (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
+      }
+      // JT BB. Just iterate over successors here
+      if (FuncInfo->MBB->isSuccessor(PHIBB)) {
+        PHI->addOperand
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
+        PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+      }
+    }
+  }
+  SDB->JTCases.clear();
+
+  // If the switch block involved a branch to one of the actual successors, we
+  // need to update PHI nodes in that block.
+  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+    assert(PHI->isPHI() &&
+           "This is not a machine PHI node that we are updating!");
+    if (FuncInfo->MBB->isSuccessor(PHI->getParent())) {
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
+      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+    }
+  }
+
+  // If we generated any switch lowering information, build and codegen any
+  // additional DAGs necessary.
+  for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+    // Set the current basic block to the mbb we wish to insert the code into
+    FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+    FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+    // Determine the unique successors.
+    SmallVector<MachineBasicBlock *, 2> Succs;
+    Succs.push_back(SDB->SwitchCases[i].TrueBB);
+    if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+      Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+    // Emit the code. Note that this could result in FuncInfo->MBB being split.
+    SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+    CurDAG->setRoot(SDB->getRoot());
+    SDB->clear();
+    CodeGenAndEmitDAG();
+
+    // Remember the last block, now that any splitting is done, for use in
+    // populating PHI nodes in successors.
+    MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+    // Handle any PHI nodes in successors of this chunk, as if we were coming
+    // from the original BB before switch expansion.  Note that PHI nodes can
+    // occur multiple times in PHINodesToUpdate.  We have to be very careful to
+    // handle them the right number of times.
+    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+      FuncInfo->MBB = Succs[i];
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // FuncInfo->MBB may have been removed from the CFG if a branch was
+      // constant folded.
+      if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+        for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin();
+             Phi != FuncInfo->MBB->end() && Phi->isPHI();
+             ++Phi) {
+          // This value for this PHI node is recorded in PHINodesToUpdate.
+          for (unsigned pn = 0; ; ++pn) {
+            assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+                   "Didn't find PHI entry!");
+            if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
+              Phi->addOperand(MachineOperand::
+                              CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
+                                        false));
+              Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+  SDB->SwitchCases.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+  RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+  if (!Ctor) {
+    Ctor = ISHeuristic;
+    RegisterScheduler::setDefault(Ctor);
+  }
+
+  return Ctor(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+                                    int64_t DesiredMaskS) const {
+  const APInt &ActualMask = RHS->getAPIntValue();
+  const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask.intersects(~DesiredMask))
+    return false;
+
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  APInt NeededMask = DesiredMask & ~ActualMask;
+  if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+    return true;
+
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+                                   int64_t DesiredMaskS) const {
+  const APInt &ActualMask = RHS->getAPIntValue();
+  const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask.intersects(~DesiredMask))
+    return false;
+
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  APInt NeededMask = DesiredMask & ~ActualMask;
+
+  APInt KnownZero, KnownOne;
+  CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+  // If all the missing bits in the or are already known to be set, match!
+  if ((NeededMask & KnownOne) == NeededMask)
+    return true;
+
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen.  Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+  std::vector<SDValue> InOps;
+  std::swap(InOps, Ops);
+
+  Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+  Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1
+  Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc
+  Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]);  // 3 (SideEffect, AlignStack)
+
+  unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+  if (InOps[e-1].getValueType() == MVT::Glue)
+    --e;  // Don't process a glue operand if it is here.
+
+  while (i != e) {
+    unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+    if (!InlineAsm::isMemKind(Flags)) {
+      // Just skip over this operand, copying the operands verbatim.
+      Ops.insert(Ops.end(), InOps.begin()+i,
+                 InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+      i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+    } else {
+      assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+             "Memory operand with multiple values?");
+      // Otherwise, this is a memory operand.  Ask the target to select it.
+      std::vector<SDValue> SelOps;
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+        report_fatal_error("Could not match memory address.  Inline asm"
+                           " failure!");
+
+      // Add this to the output node.
+      unsigned NewFlags =
+        InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+      Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
+      Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+      i += 2;
+    }
+  }
+
+  // Add the glue input back if present.
+  if (e != InOps.size())
+    Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+  unsigned FlagResNo = N->getNumValues()-1;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDUse &Use = I.getUse();
+    if (Use.getResNo() == FlagResNo)
+      return Use.getUser();
+  }
+  return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+                          SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+                          bool IgnoreChains) {
+  // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+  // greater than all of its (recursive) operands.  If we scan to a point where
+  // 'use' is smaller than the node we're scanning for, then we know we will
+  // never find it.
+  //
+  // The Use may be -1 (unassigned) if it is a newly allocated node.  This can
+  // happen because we scan down to newly selected nodes in the case of glue
+  // uses.
+  if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+    return false;
+
+  // Don't revisit nodes if we already scanned it and didn't fail, we know we
+  // won't fail if we scan it again.
+  if (!Visited.insert(Use))
+    return false;
+
+  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+    // Ignore chain uses, they are validated by HandleMergeInputChains.
+    if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+      continue;
+
+    SDNode *N = Use->getOperand(i).getNode();
+    if (N == Def) {
+      if (Use == ImmedUse || Use == Root)
+        continue;  // We are not looking for immediate use.
+      assert(N != Root);
+      return true;
+    }
+
+    // Traverse up the operand chain.
+    if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
+      return true;
+  }
+  return false;
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+                                          SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+  return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+                                     CodeGenOpt::Level OptLevel,
+                                     bool IgnoreChains) {
+  if (OptLevel == CodeGenOpt::None) return false;
+
+  // If Root use can somehow reach N through a path that that doesn't contain
+  // U then folding N would create a cycle. e.g. In the following
+  // diagram, Root can reach N through X. If N is folded into into Root, then
+  // X is both a predecessor and a successor of U.
+  //
+  //          [N*]           //
+  //         ^   ^           //
+  //        /     \          //
+  //      [U*]    [X]?       //
+  //        ^     ^          //
+  //         \   /           //
+  //          \ /            //
+  //         [Root*]         //
+  //
+  // * indicates nodes to be folded together.
+  //
+  // If Root produces glue, then it gets (even more) interesting. Since it
+  // will be "glued" together with its glue use in the scheduler, we need to
+  // check if it might reach N.
+  //
+  //          [N*]           //
+  //         ^   ^           //
+  //        /     \          //
+  //      [U*]    [X]?       //
+  //        ^       ^        //
+  //         \       \       //
+  //          \      |       //
+  //         [Root*] |       //
+  //          ^      |       //
+  //          f      |       //
+  //          |      /       //
+  //         [Y]    /        //
+  //           ^   /         //
+  //           f  /          //
+  //           | /           //
+  //          [GU]           //
+  //
+  // If GU (glue use) indirectly reaches N (the load), and Root folds N
+  // (call it Fold), then X is a predecessor of GU and a successor of
+  // Fold. But since Fold and GU are glued together, this will create
+  // a cycle in the scheduling graph.
+
+  // If the node has glue, walk down the graph to the "lowest" node in the
+  // glueged set.
+  EVT VT = Root->getValueType(Root->getNumValues()-1);
+  while (VT == MVT::Glue) {
+    SDNode *GU = findGlueUse(Root);
+    if (GU == NULL)
+      break;
+    Root = GU;
+    VT = Root->getValueType(Root->getNumValues()-1);
+
+    // If our query node has a glue result with a use, we've walked up it.  If
+    // the user (which has already been selected) has a chain or indirectly uses
+    // the chain, our WalkChainUsers predicate will not consider it.  Because of
+    // this, we cannot ignore chains in this predicate.
+    IgnoreChains = false;
+  }
+
+
+  SmallPtrSet<SDNode*, 16> Visited;
+  return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+}
+
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+  std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+  SelectInlineAsmMemoryOperands(Ops);
+
+  std::vector<EVT> VTs;
+  VTs.push_back(MVT::Other);
+  VTs.push_back(MVT::Glue);
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+                                VTs, &Ops[0], Ops.size());
+  New->setNodeId(-1);
+  return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+  return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+  assert(Val >= 128 && "Not a VBR");
+  Val &= 127;  // Remove first vbr bit.
+
+  unsigned Shift = 7;
+  uint64_t NextBits;
+  do {
+    NextBits = MatcherTable[Idx++];
+    Val |= (NextBits&127) << Shift;
+    Shift += 7;
+  } while (NextBits & 128);
+
+  return Val;
+}
+
+
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
+void SelectionDAGISel::
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+                    const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                    SDValue InputGlue,
+                    const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+                    bool isMorphNodeTo) {
+  SmallVector<SDNode*, 4> NowDeadNodes;
+
+  ISelUpdater ISU(ISelPosition);
+
+  // Now that all the normal results are replaced, we replace the chain and
+  // glue results if present.
+  if (!ChainNodesMatched.empty()) {
+    assert(InputChain.getNode() != 0 &&
+           "Matched input chains but didn't produce a chain");
+    // Loop over all of the nodes we matched that produced a chain result.
+    // Replace all the chain results with the final chain we ended up with.
+    for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+      SDNode *ChainNode = ChainNodesMatched[i];
+
+      // If this node was already deleted, don't look at it.
+      if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+        continue;
+
+      // Don't replace the results of the root node if we're doing a
+      // MorphNodeTo.
+      if (ChainNode == NodeToMatch && isMorphNodeTo)
+        continue;
+
+      SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+      if (ChainVal.getValueType() == MVT::Glue)
+        ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+      assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+      CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+
+      // If the node became dead and we haven't already seen it, delete it.
+      if (ChainNode->use_empty() &&
+          !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+        NowDeadNodes.push_back(ChainNode);
+    }
+  }
+
+  // If the result produces glue, update any glue results in the matched
+  // pattern with the glue result.
+  if (InputGlue.getNode() != 0) {
+    // Handle any interior nodes explicitly marked.
+    for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+      SDNode *FRN = GlueResultNodesMatched[i];
+
+      // If this node was already deleted, don't look at it.
+      if (FRN->getOpcode() == ISD::DELETED_NODE)
+        continue;
+
+      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+             "Doesn't have a glue result");
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+                                        InputGlue, &ISU);
+
+      // If the node became dead and we haven't already seen it, delete it.
+      if (FRN->use_empty() &&
+          !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
+        NowDeadNodes.push_back(FRN);
+    }
+  }
+
+  if (!NowDeadNodes.empty())
+    CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+
+  DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+  CR_Simple,
+  CR_InducesCycle,
+  CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(SDNode *ChainedNode,
+               SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+               SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+  ChainResult Result = CR_Simple;
+
+  for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+         E = ChainedNode->use_end(); UI != E; ++UI) {
+    // Make sure the use is of the chain, not some other value we produce.
+    if (UI.getUse().getValueType() != MVT::Other) continue;
+
+    SDNode *User = *UI;
+
+    // If we see an already-selected machine node, then we've gone beyond the
+    // pattern that we're selecting down into the already selected chunk of the
+    // DAG.
+    if (User->isMachineOpcode() ||
+        User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.
+      continue;
+
+    if (User->getOpcode() == ISD::CopyToReg ||
+        User->getOpcode() == ISD::CopyFromReg ||
+        User->getOpcode() == ISD::INLINEASM ||
+        User->getOpcode() == ISD::EH_LABEL) {
+      // If their node ID got reset to -1 then they've already been selected.
+      // Treat them like a MachineOpcode.
+      if (User->getNodeId() == -1)
+        continue;
+    }
+
+    // If we have a TokenFactor, we handle it specially.
+    if (User->getOpcode() != ISD::TokenFactor) {
+      // If the node isn't a token factor and isn't part of our pattern, then it
+      // must be a random chained node in between two nodes we're selecting.
+      // This happens when we have something like:
+      //   x = load ptr
+      //   call
+      //   y = x+4
+      //   store y -> ptr
+      // Because we structurally match the load/store as a read/modify/write,
+      // but the call is chained between them.  We cannot fold in this case
+      // because it would induce a cycle in the graph.
+      if (!std::count(ChainedNodesInPattern.begin(),
+                      ChainedNodesInPattern.end(), User))
+        return CR_InducesCycle;
+
+      // Otherwise we found a node that is part of our pattern.  For example in:
+      //   x = load ptr
+      //   y = x+4
+      //   store y -> ptr
+      // This would happen when we're scanning down from the load and see the
+      // store as a user.  Record that there is a use of ChainedNode that is
+      // part of the pattern and keep scanning uses.
+      Result = CR_LeadsToInteriorNode;
+      InteriorChainedNodes.push_back(User);
+      continue;
+    }
+
+    // If we found a TokenFactor, there are two cases to consider: first if the
+    // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+    // uses of the TF are in our pattern) we just want to ignore it.  Second,
+    // the TokenFactor can be sandwiched in between two chained nodes, like so:
+    //     [Load chain]
+    //         ^
+    //         |
+    //       [Load]
+    //       ^    ^
+    //       |    \                    DAG's like cheese
+    //      /       \                       do you?
+    //     /         |
+    // [TokenFactor] [Op]
+    //     ^          ^
+    //     |          |
+    //      \        /
+    //       \      /
+    //       [Store]
+    //
+    // In this case, the TokenFactor becomes part of our match and we rewrite it
+    // as a new TokenFactor.
+    //
+    // To distinguish these two cases, do a recursive walk down the uses.
+    switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+    case CR_Simple:
+      // If the uses of the TokenFactor are just already-selected nodes, ignore
+      // it, it is "below" our pattern.
+      continue;
+    case CR_InducesCycle:
+      // If the uses of the TokenFactor lead to nodes that are not part of our
+      // pattern that are not selected, folding would turn this into a cycle,
+      // bail out now.
+      return CR_InducesCycle;
+    case CR_LeadsToInteriorNode:
+      break;  // Otherwise, keep processing.
+    }
+
+    // Okay, we know we're in the interesting interior case.  The TokenFactor
+    // is now going to be considered part of the pattern so that we rewrite its
+    // uses (it may have uses that are not part of the pattern) with the
+    // ultimate chain result of the generated code.  We will also add its chain
+    // inputs as inputs to the ultimate TokenFactor we create.
+    Result = CR_LeadsToInteriorNode;
+    ChainedNodesInPattern.push_back(User);
+    InteriorChainedNodes.push_back(User);
+    continue;
+  }
+
+  return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains.  The
+/// input vector contains a list of all of the chained nodes that we match.  We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                       SelectionDAG *CurDAG) {
+  // Walk all of the chained nodes we've matched, recursively scanning down the
+  // users of the chain result. This adds any TokenFactor nodes that are caught
+  // in between chained nodes to the chained and interior nodes list.
+  SmallVector<SDNode*, 3> InteriorChainedNodes;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+                       InteriorChainedNodes) == CR_InducesCycle)
+      return SDValue(); // Would induce a cycle.
+  }
+
+  // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+  // that we are interested in.  Form our input TokenFactor node.
+  SmallVector<SDValue, 3> InputChains;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    // Add the input chain of this node to the InputChains list (which will be
+    // the operands of the generated TokenFactor) if it's not an interior node.
+    SDNode *N = ChainNodesMatched[i];
+    if (N->getOpcode() != ISD::TokenFactor) {
+      if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+        continue;
+
+      // Otherwise, add the input chain.
+      SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+      assert(InChain.getValueType() == MVT::Other && "Not a chain");
+      InputChains.push_back(InChain);
+      continue;
+    }
+
+    // If we have a token factor, we want to add all inputs of the token factor
+    // that are not part of the pattern we're matching.
+    for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+      if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+                      N->getOperand(op).getNode()))
+        InputChains.push_back(N->getOperand(op));
+    }
+  }
+
+  SDValue Res;
+  if (InputChains.size() == 1)
+    return InputChains[0];
+  return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+                         MVT::Other, &InputChains[0], InputChains.size());
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+          const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+  // It is possible we're using MorphNodeTo to replace a node with no
+  // normal results with one that has a normal result (or we could be
+  // adding a chain) and the input could have glue and chains as well.
+  // In this case we need to shift the operands down.
+  // FIXME: This is a horrible hack and broken in obscure cases, no worse
+  // than the old isel though.
+  int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+  unsigned NTMNumResults = Node->getNumValues();
+  if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+    OldGlueResultNo = NTMNumResults-1;
+    if (NTMNumResults != 1 &&
+        Node->getValueType(NTMNumResults-2) == MVT::Other)
+      OldChainResultNo = NTMNumResults-2;
+  } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+    OldChainResultNo = NTMNumResults-1;
+
+  // Call the underlying SelectionDAG routine to do the transmogrification. Note
+  // that this deletes operands of the old node that become dead.
+  SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+  // MorphNodeTo can operate in two ways: if an existing node with the
+  // specified operands exists, it can just return it.  Otherwise, it
+  // updates the node in place to have the requested operands.
+  if (Res == Node) {
+    // If we updated the node in place, reset the node ID.  To the isel,
+    // this should be just like a newly allocated machine node.
+    Res->setNodeId(-1);
+  }
+
+  unsigned ResNumResults = Res->getNumValues();
+  // Move the glue if needed.
+  if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+      (unsigned)OldGlueResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
+                                      SDValue(Res, ResNumResults-1));
+
+  if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+    --ResNumResults;
+
+  // Move the chain reference if needed.
+  if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+      (unsigned)OldChainResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+                                      SDValue(Res, ResNumResults-1));
+
+  // Otherwise, no replacement happened because the node already exists. Replace
+  // Uses of the old node with the new one.
+  if (Res != Node)
+    CurDAG->ReplaceAllUsesWith(Node, Res);
+
+  return Res;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N,
+          const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+  // Accept if it is exactly the same as a previously recorded node.
+  unsigned RecNo = MatcherTable[MatcherIndex++];
+  assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+  return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                      SelectionDAGISel &SDISel) {
+  return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                   SelectionDAGISel &SDISel, SDNode *N) {
+  return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDNode *N) {
+  uint16_t Opc = MatcherTable[MatcherIndex++];
+  Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+  return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (N.getValueType() == VT) return true;
+
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI,
+               unsigned ChildNo) {
+  if (ChildNo >= N.getNumOperands())
+    return false;  // Match fails if out of range child #.
+  return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+              SDValue N) {
+  return cast<CondCodeSDNode>(N)->get() ==
+      (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (cast<VTSDNode>(N)->getVT() == VT)
+    return true;
+
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+             SDValue N) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+  return C != 0 && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+  if (N->getOpcode() != ISD::AND) return false;
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+           SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+  if (N->getOpcode() != ISD::OR) return false;
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node.  If the current predicate is known to
+/// fail, set Result=true and return anything.  If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with.  If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+                                       unsigned Index, SDValue N,
+                                       bool &Result, SelectionDAGISel &SDISel,
+                 SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+  switch (Table[Index++]) {
+  default:
+    Result = false;
+    return Index-1;  // Could not evaluate this predicate.
+  case SelectionDAGISel::OPC_CheckSame:
+    Result = !::CheckSame(Table, Index, N, RecordedNodes);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPatternPredicate:
+    Result = !::CheckPatternPredicate(Table, Index, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPredicate:
+    Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckOpcode:
+    Result = !::CheckOpcode(Table, Index, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckType:
+    Result = !::CheckType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckChild0Type:
+  case SelectionDAGISel::OPC_CheckChild1Type:
+  case SelectionDAGISel::OPC_CheckChild2Type:
+  case SelectionDAGISel::OPC_CheckChild3Type:
+  case SelectionDAGISel::OPC_CheckChild4Type:
+  case SelectionDAGISel::OPC_CheckChild5Type:
+  case SelectionDAGISel::OPC_CheckChild6Type:
+  case SelectionDAGISel::OPC_CheckChild7Type:
+    Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+                        Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+    return Index;
+  case SelectionDAGISel::OPC_CheckCondCode:
+    Result = !::CheckCondCode(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckValueType:
+    Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckInteger:
+    Result = !::CheckInteger(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckAndImm:
+    Result = !::CheckAndImm(Table, Index, N, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckOrImm:
+    Result = !::CheckOrImm(Table, Index, N, SDISel);
+    return Index;
+  }
+}
+
+namespace {
+
+struct MatchScope {
+  /// FailIndex - If this match fails, this is the index to continue with.
+  unsigned FailIndex;
+
+  /// NodeStack - The node stack when the scope was formed.
+  SmallVector<SDValue, 4> NodeStack;
+
+  /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+  unsigned NumRecordedNodes;
+
+  /// NumMatchedMemRefs - The number of matched memref entries.
+  unsigned NumMatchedMemRefs;
+
+  /// InputChain/InputGlue - The current chain/glue
+  SDValue InputChain, InputGlue;
+
+  /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+  bool HasChainNodesMatched, HasGlueResultNodesMatched;
+};
+
+}
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+                 unsigned TableSize) {
+  // FIXME: Should these even be selected?  Handle these cases in the caller?
+  switch (NodeToMatch->getOpcode()) {
+  default:
+    break;
+  case ISD::EntryToken:       // These nodes remain the same.
+  case ISD::BasicBlock:
+  case ISD::Register:
+  //case ISD::VALUETYPE:
+  //case ISD::CONDCODE:
+  case ISD::HANDLENODE:
+  case ISD::MDNODE_SDNODE:
+  case ISD::TargetConstant:
+  case ISD::TargetConstantFP:
+  case ISD::TargetConstantPool:
+  case ISD::TargetFrameIndex:
+  case ISD::TargetExternalSymbol:
+  case ISD::TargetBlockAddress:
+  case ISD::TargetJumpTable:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::TargetGlobalAddress:
+  case ISD::TokenFactor:
+  case ISD::CopyFromReg:
+  case ISD::CopyToReg:
+  case ISD::EH_LABEL:
+    NodeToMatch->setNodeId(-1); // Mark selected.
+    return 0;
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+                                      NodeToMatch->getOperand(0));
+    return 0;
+  case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+  case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
+  }
+
+  assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+  // Set up the node stack with NodeToMatch as the only node on the stack.
+  SmallVector<SDValue, 8> NodeStack;
+  SDValue N = SDValue(NodeToMatch, 0);
+  NodeStack.push_back(N);
+
+  // MatchScopes - Scopes used when matching, if a match failure happens, this
+  // indicates where to continue checking.
+  SmallVector<MatchScope, 8> MatchScopes;
+
+  // RecordedNodes - This is the set of nodes that have been recorded by the
+  // state machine.  The second value is the parent of the node, or null if the
+  // root is recorded.
+  SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+  // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+  // pattern.
+  SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+  // These are the current input chain and glue for use when generating nodes.
+  // Various Emit operations change these.  For example, emitting a copytoreg
+  // uses and updates these.
+  SDValue InputChain, InputGlue;
+
+  // ChainNodesMatched - If a pattern matches nodes that have input/output
+  // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+  // which ones they are.  The result is captured into this list so that we can
+  // update the chain results when the pattern is complete.
+  SmallVector<SDNode*, 3> ChainNodesMatched;
+  SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
+  DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+        NodeToMatch->dump(CurDAG);
+        errs() << '\n');
+
+  // Determine where to start the interpreter.  Normally we start at opcode #0,
+  // but if the state machine starts with an OPC_SwitchOpcode, then we
+  // accelerate the first lookup (which is guaranteed to be hot) with the
+  // OpcodeOffset table.
+  unsigned MatcherIndex = 0;
+
+  if (!OpcodeOffset.empty()) {
+    // Already computed the OpcodeOffset table, just index into it.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+    DEBUG(errs() << "  Initial Opcode index to " << MatcherIndex << "\n");
+
+  } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+    // Otherwise, the table isn't computed, but the state machine does start
+    // with an OPC_SwitchOpcode instruction.  Populate the table now, since this
+    // is the first time we're selecting an instruction.
+    unsigned Idx = 1;
+    while (1) {
+      // Get the size of this case.
+      unsigned CaseSize = MatcherTable[Idx++];
+      if (CaseSize & 128)
+        CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+      if (CaseSize == 0) break;
+
+      // Get the opcode, add the index to the table.
+      uint16_t Opc = MatcherTable[Idx++];
+      Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+      if (Opc >= OpcodeOffset.size())
+        OpcodeOffset.resize((Opc+1)*2);
+      OpcodeOffset[Opc] = Idx;
+      Idx += CaseSize;
+    }
+
+    // Okay, do the lookup for the first opcode.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+  }
+
+  while (1) {
+    assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+    unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+    BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+    switch (Opcode) {
+    case OPC_Scope: {
+      // Okay, the semantics of this operation are that we should push a scope
+      // then evaluate the first child.  However, pushing a scope only to have
+      // the first check fail (which then pops it) is inefficient.  If we can
+      // determine immediately that the first check (or first several) will
+      // immediately fail, don't even bother pushing a scope for them.
+      unsigned FailIndex;
+
+      while (1) {
+        unsigned NumToSkip = MatcherTable[MatcherIndex++];
+        if (NumToSkip & 128)
+          NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+        // Found the end of the scope with no match.
+        if (NumToSkip == 0) {
+          FailIndex = 0;
+          break;
+        }
+
+        FailIndex = MatcherIndex+NumToSkip;
+
+        unsigned MatcherIndexOfPredicate = MatcherIndex;
+        (void)MatcherIndexOfPredicate; // silence warning.
+
+        // If we can't evaluate this predicate without pushing a scope (e.g. if
+        // it is a 'MoveParent') or if the predicate succeeds on this node, we
+        // push the scope and evaluate the full predicate chain.
+        bool Result;
+        MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+                                              Result, *this, RecordedNodes);
+        if (!Result)
+          break;
+
+        DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
+                     << "index " << MatcherIndexOfPredicate
+                     << ", continuing at " << FailIndex << "\n");
+        ++NumDAGIselRetries;
+
+        // Otherwise, we know that this case of the Scope is guaranteed to fail,
+        // move to the next case.
+        MatcherIndex = FailIndex;
+      }
+
+      // If the whole scope failed to match, bail.
+      if (FailIndex == 0) break;
+
+      // Push a MatchScope which indicates where to go if the first child fails
+      // to match.
+      MatchScope NewEntry;
+      NewEntry.FailIndex = FailIndex;
+      NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+      NewEntry.NumRecordedNodes = RecordedNodes.size();
+      NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+      NewEntry.InputChain = InputChain;
+      NewEntry.InputGlue = InputGlue;
+      NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+      NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
+      MatchScopes.push_back(NewEntry);
+      continue;
+    }
+    case OPC_RecordNode: {
+      // Remember this node, it may end up being an operand in the pattern.
+      SDNode *Parent = 0;
+      if (NodeStack.size() > 1)
+        Parent = NodeStack[NodeStack.size()-2].getNode();
+      RecordedNodes.push_back(std::make_pair(N, Parent));
+      continue;
+    }
+
+    case OPC_RecordChild0: case OPC_RecordChild1:
+    case OPC_RecordChild2: case OPC_RecordChild3:
+    case OPC_RecordChild4: case OPC_RecordChild5:
+    case OPC_RecordChild6: case OPC_RecordChild7: {
+      unsigned ChildNo = Opcode-OPC_RecordChild0;
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+
+      RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+                                             N.getNode()));
+      continue;
+    }
+    case OPC_RecordMemRef:
+      MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+      continue;
+
+    case OPC_CaptureGlueInput:
+      // If the current node has an input glue, capture it in InputGlue.
+      if (N->getNumOperands() != 0 &&
+          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+        InputGlue = N->getOperand(N->getNumOperands()-1);
+      continue;
+
+    case OPC_MoveChild: {
+      unsigned ChildNo = MatcherTable[MatcherIndex++];
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+      N = N.getOperand(ChildNo);
+      NodeStack.push_back(N);
+      continue;
+    }
+
+    case OPC_MoveParent:
+      // Pop the current node off the NodeStack.
+      NodeStack.pop_back();
+      assert(!NodeStack.empty() && "Node stack imbalance!");
+      N = NodeStack.back();
+      continue;
+
+    case OPC_CheckSame:
+      if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+      continue;
+    case OPC_CheckPatternPredicate:
+      if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+      continue;
+    case OPC_CheckPredicate:
+      if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+                                N.getNode()))
+        break;
+      continue;
+    case OPC_CheckComplexPat: {
+      unsigned CPNum = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+                               RecordedNodes[RecNo].first, CPNum,
+                               RecordedNodes))
+        break;
+      continue;
+    }
+    case OPC_CheckOpcode:
+      if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+      continue;
+
+    case OPC_CheckType:
+      if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+
+    case OPC_SwitchOpcode: {
+      unsigned CurNodeOpcode = N.getOpcode();
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+
+        uint16_t Opc = MatcherTable[MatcherIndex++];
+        Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+        // If the opcode matches, then we will execute this case.
+        if (CurNodeOpcode == Opc)
+          break;
+
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
+                   << " to " << MatcherIndex << "\n");
+      continue;
+    }
+
+    case OPC_SwitchType: {
+      MVT CurNodeVT = N.getValueType().getSimpleVT();
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+
+        MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (CaseVT == MVT::iPTR)
+          CaseVT = TLI.getPointerTy();
+
+        // If the VT matches, then we will execute this case.
+        if (CurNodeVT == CaseVT)
+          break;
+
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+                   << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+      continue;
+    }
+    case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+    case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+    case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+    case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+      if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+                            Opcode-OPC_CheckChild0Type))
+        break;
+      continue;
+    case OPC_CheckCondCode:
+      if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckValueType:
+      if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+    case OPC_CheckInteger:
+      if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckAndImm:
+      if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+    case OPC_CheckOrImm:
+      if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+
+    case OPC_CheckFoldableChainNode: {
+      assert(NodeStack.size() != 1 && "No parent node");
+      // Verify that all intermediate nodes between the root and this one have
+      // a single use.
+      bool HasMultipleUses = false;
+      for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+        if (!NodeStack[i].hasOneUse()) {
+          HasMultipleUses = true;
+          break;
+        }
+      if (HasMultipleUses) break;
+
+      // Check to see that the target thinks this is profitable to fold and that
+      // we can fold it without inducing cycles in the graph.
+      if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                              NodeToMatch) ||
+          !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                         NodeToMatch, OptLevel,
+                         true/*We validate our own chains*/))
+        break;
+
+      continue;
+    }
+    case OPC_EmitInteger: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      int64_t Val = MatcherTable[MatcherIndex++];
+      if (Val & 128)
+        Val = GetVBR(Val, MatcherTable, MatcherIndex);
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
+      continue;
+    }
+    case OPC_EmitRegister: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      unsigned RegNo = MatcherTable[MatcherIndex++];
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+      continue;
+    }
+    case OPC_EmitRegister2: {
+      // For targets w/ more than 256 register names, the register enum
+      // values are stored in two bytes in the matcher table (just like
+      // opcodes).
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      unsigned RegNo = MatcherTable[MatcherIndex++];
+      RegNo |= MatcherTable[MatcherIndex++] << 8;
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+      continue;
+    }
+
+    case OPC_EmitConvertToTarget:  {
+      // Convert from IMM/FPIMM to target version.
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      SDValue Imm = RecordedNodes[RecNo].first;
+
+      if (Imm->getOpcode() == ISD::Constant) {
+        int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
+        Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+      } else if (Imm->getOpcode() == ISD::ConstantFP) {
+        const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+        Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+      }
+
+      RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+      continue;
+    }
+
+    case OPC_EmitMergeInputChains1_0:    // OPC_EmitMergeInputChains, 1, 0
+    case OPC_EmitMergeInputChains1_1: {  // OPC_EmitMergeInputChains, 1, 1
+      // These are space-optimized forms of OPC_EmitMergeInputChains.
+      assert(InputChain.getNode() == 0 &&
+             "EmitMergeInputChains should be the first chain producing node");
+      assert(ChainNodesMatched.empty() &&
+             "Should only have one EmitMergeInputChains per match");
+
+      // Read all of the chained nodes.
+      unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+      // FIXME: What if other value results of the node have uses not matched
+      // by this pattern?
+      if (ChainNodesMatched.back() != NodeToMatch &&
+          !RecordedNodes[RecNo].first.hasOneUse()) {
+        ChainNodesMatched.clear();
+        break;
+      }
+
+      // Merge the input chains if they are not intra-pattern references.
+      InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+      if (InputChain.getNode() == 0)
+        break;  // Failed to merge.
+      continue;
+    }
+
+    case OPC_EmitMergeInputChains: {
+      assert(InputChain.getNode() == 0 &&
+             "EmitMergeInputChains should be the first chain producing node");
+      // This node gets a list of nodes we matched in the input that have
+      // chains.  We want to token factor all of the input chains to these nodes
+      // together.  However, if any of the input chains is actually one of the
+      // nodes matched in this pattern, then we have an intra-match reference.
+      // Ignore these because the newly token factored chain should not refer to
+      // the old nodes.
+      unsigned NumChains = MatcherTable[MatcherIndex++];
+      assert(NumChains != 0 && "Can't TF zero chains");
+
+      assert(ChainNodesMatched.empty() &&
+             "Should only have one EmitMergeInputChains per match");
+
+      // Read all of the chained nodes.
+      for (unsigned i = 0; i != NumChains; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+        // FIXME: What if other value results of the node have uses not matched
+        // by this pattern?
+        if (ChainNodesMatched.back() != NodeToMatch &&
+            !RecordedNodes[RecNo].first.hasOneUse()) {
+          ChainNodesMatched.clear();
+          break;
+        }
+      }
+
+      // If the inner loop broke out, the match fails.
+      if (ChainNodesMatched.empty())
+        break;
+
+      // Merge the input chains if they are not intra-pattern references.
+      InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+      if (InputChain.getNode() == 0)
+        break;  // Failed to merge.
+
+      continue;
+    }
+
+    case OPC_EmitCopyToReg: {
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+      if (InputChain.getNode() == 0)
+        InputChain = CurDAG->getEntryNode();
+
+      InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+                                        DestPhysReg, RecordedNodes[RecNo].first,
+                                        InputGlue);
+
+      InputGlue = InputChain.getValue(1);
+      continue;
+    }
+
+    case OPC_EmitNodeXForm: {
+      unsigned XFormNo = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+      RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
+      continue;
+    }
+
+    case OPC_EmitNode:
+    case OPC_MorphNodeTo: {
+      uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+      TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+      unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+      // Get the result VT list.
+      unsigned NumVTs = MatcherTable[MatcherIndex++];
+      SmallVector<EVT, 4> VTs;
+      for (unsigned i = 0; i != NumVTs; ++i) {
+        MVT::SimpleValueType VT =
+          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+        VTs.push_back(VT);
+      }
+
+      if (EmitNodeInfo & OPFL_Chain)
+        VTs.push_back(MVT::Other);
+      if (EmitNodeInfo & OPFL_GlueOutput)
+        VTs.push_back(MVT::Glue);
+
+      // This is hot code, so optimize the two most common cases of 1 and 2
+      // results.
+      SDVTList VTList;
+      if (VTs.size() == 1)
+        VTList = CurDAG->getVTList(VTs[0]);
+      else if (VTs.size() == 2)
+        VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+      else
+        VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+      // Get the operand list.
+      unsigned NumOps = MatcherTable[MatcherIndex++];
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i != NumOps; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+        assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+        Ops.push_back(RecordedNodes[RecNo].first);
+      }
+
+      // If there are variadic operands to add, handle them now.
+      if (EmitNodeInfo & OPFL_VariadicInfo) {
+        // Determine the start index to copy from.
+        unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+        FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+        assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+               "Invalid variadic node");
+        // Copy all of the variadic operands, not including a potential glue
+        // input.
+        for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+             i != e; ++i) {
+          SDValue V = NodeToMatch->getOperand(i);
+          if (V.getValueType() == MVT::Glue) break;
+          Ops.push_back(V);
+        }
+      }
+
+      // If this has chain/glue inputs, add them.
+      if (EmitNodeInfo & OPFL_Chain)
+        Ops.push_back(InputChain);
+      if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+        Ops.push_back(InputGlue);
+
+      // Create the node.
+      SDNode *Res = 0;
+      if (Opcode != OPC_MorphNodeTo) {
+        // If this is a normal EmitNode command, just create the new node and
+        // add the results to the RecordedNodes list.
+        Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+                                     VTList, Ops.data(), Ops.size());
+
+        // Add all the non-glue/non-chain results to the RecordedNodes list.
+        for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+          if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+          RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+                                                             (SDNode*) 0));
+        }
+
+      } else {
+        Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+                        EmitNodeInfo);
+      }
+
+      // If the node had chain/glue results, update our notion of the current
+      // chain and glue.
+      if (EmitNodeInfo & OPFL_GlueOutput) {
+        InputGlue = SDValue(Res, VTs.size()-1);
+        if (EmitNodeInfo & OPFL_Chain)
+          InputChain = SDValue(Res, VTs.size()-2);
+      } else if (EmitNodeInfo & OPFL_Chain)
+        InputChain = SDValue(Res, VTs.size()-1);
+
+      // If the OPFL_MemRefs glue is set on this node, slap all of the
+      // accumulated memrefs onto it.
+      //
+      // FIXME: This is vastly incorrect for patterns with multiple outputs
+      // instructions that access memory and for ComplexPatterns that match
+      // loads.
+      if (EmitNodeInfo & OPFL_MemRefs) {
+        // Only attach load or store memory operands if the generated
+        // instruction may load or store.
+        const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+        bool mayLoad = MCID.mayLoad();
+        bool mayStore = MCID.mayStore();
+
+        unsigned NumMemRefs = 0;
+        for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+             MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+          if ((*I)->isLoad()) {
+            if (mayLoad)
+              ++NumMemRefs;
+          } else if ((*I)->isStore()) {
+            if (mayStore)
+              ++NumMemRefs;
+          } else {
+            ++NumMemRefs;
+          }
+        }
+
+        MachineSDNode::mmo_iterator MemRefs =
+          MF->allocateMemRefsArray(NumMemRefs);
+
+        MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+        for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+             MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+          if ((*I)->isLoad()) {
+            if (mayLoad)
+              *MemRefsPos++ = *I;
+          } else if ((*I)->isStore()) {
+            if (mayStore)
+              *MemRefsPos++ = *I;
+          } else {
+            *MemRefsPos++ = *I;
+          }
+        }
+
+        cast<MachineSDNode>(Res)
+          ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+      }
+
+      DEBUG(errs() << "  "
+                   << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+                   << " node: "; Res->dump(CurDAG); errs() << "\n");
+
+      // If this was a MorphNodeTo then we're completely done!
+      if (Opcode == OPC_MorphNodeTo) {
+        // Update chain and glue uses.
+        UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                            InputGlue, GlueResultNodesMatched, true);
+        return Res;
+      }
+
+      continue;
+    }
+
+    case OPC_MarkGlueResults: {
+      unsigned NumNodes = MatcherTable[MatcherIndex++];
+
+      // Read and remember all the glue-result nodes.
+      for (unsigned i = 0; i != NumNodes; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+      }
+      continue;
+    }
+
+    case OPC_CompleteMatch: {
+      // The match has been completed, and any new nodes (if any) have been
+      // created.  Patch up references to the matched dag to use the newly
+      // created nodes.
+      unsigned NumResults = MatcherTable[MatcherIndex++];
+
+      for (unsigned i = 0; i != NumResults; ++i) {
+        unsigned ResSlot = MatcherTable[MatcherIndex++];
+        if (ResSlot & 128)
+          ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+        assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+        SDValue Res = RecordedNodes[ResSlot].first;
+
+        assert(i < NodeToMatch->getNumValues() &&
+               NodeToMatch->getValueType(i) != MVT::Other &&
+               NodeToMatch->getValueType(i) != MVT::Glue &&
+               "Invalid number of results to complete!");
+        assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+                NodeToMatch->getValueType(i) == MVT::iPTR ||
+                Res.getValueType() == MVT::iPTR ||
+                NodeToMatch->getValueType(i).getSizeInBits() ==
+                    Res.getValueType().getSizeInBits()) &&
+               "invalid replacement");
+        CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+      }
+
+      // If the root node defines glue, add it to the glue nodes to update list.
+      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+        GlueResultNodesMatched.push_back(NodeToMatch);
+
+      // Update chain and glue uses.
+      UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                          InputGlue, GlueResultNodesMatched, false);
+
+      assert(NodeToMatch->use_empty() &&
+             "Didn't replace all uses of the node?");
+
+      // FIXME: We just return here, which interacts correctly with SelectRoot
+      // above.  We should fix this to not return an SDNode* anymore.
+      return 0;
+    }
+    }
+
+    // If the code reached this point, then the match failed.  See if there is
+    // another child to try in the current 'Scope', otherwise pop it until we
+    // find a case to check.
+    DEBUG(errs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
+    ++NumDAGIselRetries;
+    while (1) {
+      if (MatchScopes.empty()) {
+        CannotYetSelect(NodeToMatch);
+        return 0;
+      }
+
+      // Restore the interpreter state back to the point where the scope was
+      // formed.
+      MatchScope &LastScope = MatchScopes.back();
+      RecordedNodes.resize(LastScope.NumRecordedNodes);
+      NodeStack.clear();
+      NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+      N = NodeStack.back();
+
+      if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+        MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+      MatcherIndex = LastScope.FailIndex;
+
+      DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
+
+      InputChain = LastScope.InputChain;
+      InputGlue = LastScope.InputGlue;
+      if (!LastScope.HasChainNodesMatched)
+        ChainNodesMatched.clear();
+      if (!LastScope.HasGlueResultNodesMatched)
+        GlueResultNodesMatched.clear();
+
+      // Check to see what the offset is at the new MatcherIndex.  If it is zero
+      // we have reached the end of this scope, otherwise we have another child
+      // in the current scope to try.
+      unsigned NumToSkip = MatcherTable[MatcherIndex++];
+      if (NumToSkip & 128)
+        NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+      // If we have another child in this scope to match, update FailIndex and
+      // try it.
+      if (NumToSkip != 0) {
+        LastScope.FailIndex = MatcherIndex+NumToSkip;
+        break;
+      }
+
+      // End of this scope, pop it and try the next child in the containing
+      // scope.
+      MatchScopes.pop_back();
+    }
+  }
+}
+
+
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+  std::string msg;
+  raw_string_ostream Msg(msg);
+  Msg << "Cannot select: ";
+
+  if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_VOID) {
+    N->printrFull(Msg, CurDAG);
+  } else {
+    bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+    unsigned iid =
+      cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+    if (iid < Intrinsic::num_intrinsics)
+      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+    else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+      Msg << "target intrinsic %" << TII->getName(iid);
+    else
+      Msg << "unknown intrinsic #" << iid;
+  }
+  report_fatal_error(Msg.str());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 000000000000..cd1647b17b9b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,302 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+    explicit DOTGraphTraits(bool isSimple=false) :
+      DefaultDOTGraphTraits(isSimple) {}
+
+    static bool hasEdgeDestLabels() {
+      return true;
+    }
+
+    static unsigned numEdgeDestLabels(const void *Node) {
+      return ((const SDNode *) Node)->getNumValues();
+    }
+
+    static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+      return ((const SDNode *) Node)->getValueType(i).getEVTString();
+    }
+
+    template<typename EdgeIter>
+    static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+      return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+    }
+
+    /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+    /// should actually target another edge source, not a node.  If this method
+    /// is implemented, getEdgeTarget should be implemented.
+    template<typename EdgeIter>
+    static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+      return true;
+    }
+
+    /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+    /// called to determine which outgoing edge of Node is the target of this
+    /// edge.
+    template<typename EdgeIter>
+    static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+      SDNode *TargetNode = *I;
+      SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+      std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+      return NI;
+    }
+
+    static std::string getGraphName(const SelectionDAG *G) {
+      return G->getMachineFunction().getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+
+    static bool hasNodeAddressLabel(const SDNode *Node,
+                                    const SelectionDAG *Graph) {
+      return true;
+    }
+
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    template<typename EdgeIter>
+    static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+                                         const SelectionDAG *Graph) {
+      SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+      EVT VT = Op.getValueType();
+      if (VT == MVT::Glue)
+        return "color=red,style=bold";
+      else if (VT == MVT::Other)
+        return "color=blue,style=dashed";
+      return "";
+    }
+
+
+    static std::string getSimpleNodeLabel(const SDNode *Node,
+                                          const SelectionDAG *G) {
+      std::string Result = Node->getOperationName(G);
+      {
+        raw_string_ostream OS(Result);
+        Node->print_details(OS, G);
+      }
+      return Result;
+    }
+    std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+    static std::string getNodeAttributes(const SDNode *N,
+                                         const SelectionDAG *Graph) {
+#ifndef NDEBUG
+      const std::string &Attrs = Graph->getGraphAttrs(N);
+      if (!Attrs.empty()) {
+        if (Attrs.find("shape=") == std::string::npos)
+          return std::string("shape=Mrecord,") + Attrs;
+        else
+          return Attrs;
+      }
+#endif
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(SelectionDAG *G,
+                                       GraphWriter<SelectionDAG*> &GW) {
+      GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+      if (G->getRoot().getNode())
+        GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+                    "color=blue,style=dashed");
+    }
+  };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+                                                        const SelectionDAG *G) {
+  return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+            false, Title);
+#else
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+  viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+  NodeGraphAttrs.clear();
+#else
+  errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = Attrs;
+#else
+  errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+  std::map<const SDNode *, std::string>::const_iterator I =
+    NodeGraphAttrs.find(N);
+
+  if (I != NodeGraphAttrs.end())
+    return I->second;
+  else
+    return "";
+#else
+  errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+  return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+  errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor.  Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+                                          int level, bool &printed) {
+  bool hit_limit = false;
+
+#ifndef NDEBUG
+  if (level >= 20) {
+    if (!printed) {
+      printed = true;
+      DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+    }
+    return true;
+  }
+
+  unsigned oldSize = visited.size();
+  visited.insert(N);
+  if (visited.size() != oldSize) {
+    setGraphColor(N, Color);
+    for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+        i != iend;
+        ++i) {
+      hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+    }
+  }
+#else
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+  return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  DenseSet<SDNode *> visited;
+  bool printed = false;
+  if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+    // Visually mark that we hit the limit
+    if (strcmp(Color, "red") == 0) {
+      setSubgraphColorHelper(N, "blue", visited, 0, printed);
+    } else if (strcmp(Color, "yellow") == 0) {
+      setSubgraphColorHelper(N, "green", visited, 0, printed);
+    }
+  }
+
+#else
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+  std::string s;
+  raw_string_ostream O(s);
+  O << "SU(" << SU->NodeNum << "): ";
+  if (SU->getNode()) {
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
+      O << DOTGraphTraits<SelectionDAG*>
+        ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+      GluedNodes.pop_back();
+      if (!GluedNodes.empty())
+        O << "\n    ";
+    }
+  } else {
+    O << "CROSS RC COPY";
+  }
+  return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+  if (DAG) {
+    // Draw a special "GraphRoot" node to indicate the root of the graph.
+    GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+    const SDNode *N = DAG->getRoot().getNode();
+    if (N && N->getNodeId() != -1)
+      GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+                  "color=blue,style=dashed");
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 000000000000..2626ac3bbb2a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,3370 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+using namespace llvm;
+
+/// We are in the process of implementing a new TypeLegalization action
+/// - the promotion of vector elements. This feature is disabled by default
+/// and only enabled using this flag.
+static cl::opt<bool>
+AllowPromoteIntElem("promote-elements", cl::Hidden,
+  cl::desc("Allow promotion of integer vector element types"));
+
+namespace llvm {
+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
+  bool isLocal = GV->hasLocalLinkage();
+  bool isDeclaration = GV->isDeclaration();
+  // FIXME: what should we do for protected and internal visibility?
+  // For variables, is internal different from hidden?
+  bool isHidden = GV->hasHiddenVisibility();
+
+  if (reloc == Reloc::PIC_) {
+    if (isLocal || isHidden)
+      return TLSModel::LocalDynamic;
+    else
+      return TLSModel::GeneralDynamic;
+  } else {
+    if (!isDeclaration || isHidden)
+      return TLSModel::LocalExec;
+    else
+      return TLSModel::InitialExec;
+  }
+}
+}
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+  Names[RTLIB::SHL_I16] = "__ashlhi3";
+  Names[RTLIB::SHL_I32] = "__ashlsi3";
+  Names[RTLIB::SHL_I64] = "__ashldi3";
+  Names[RTLIB::SHL_I128] = "__ashlti3";
+  Names[RTLIB::SRL_I16] = "__lshrhi3";
+  Names[RTLIB::SRL_I32] = "__lshrsi3";
+  Names[RTLIB::SRL_I64] = "__lshrdi3";
+  Names[RTLIB::SRL_I128] = "__lshrti3";
+  Names[RTLIB::SRA_I16] = "__ashrhi3";
+  Names[RTLIB::SRA_I32] = "__ashrsi3";
+  Names[RTLIB::SRA_I64] = "__ashrdi3";
+  Names[RTLIB::SRA_I128] = "__ashrti3";
+  Names[RTLIB::MUL_I8] = "__mulqi3";
+  Names[RTLIB::MUL_I16] = "__mulhi3";
+  Names[RTLIB::MUL_I32] = "__mulsi3";
+  Names[RTLIB::MUL_I64] = "__muldi3";
+  Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::MULO_I32] = "__mulosi4";
+  Names[RTLIB::MULO_I64] = "__mulodi4";
+  Names[RTLIB::MULO_I128] = "__muloti4";
+  Names[RTLIB::SDIV_I8] = "__divqi3";
+  Names[RTLIB::SDIV_I16] = "__divhi3";
+  Names[RTLIB::SDIV_I32] = "__divsi3";
+  Names[RTLIB::SDIV_I64] = "__divdi3";
+  Names[RTLIB::SDIV_I128] = "__divti3";
+  Names[RTLIB::UDIV_I8] = "__udivqi3";
+  Names[RTLIB::UDIV_I16] = "__udivhi3";
+  Names[RTLIB::UDIV_I32] = "__udivsi3";
+  Names[RTLIB::UDIV_I64] = "__udivdi3";
+  Names[RTLIB::UDIV_I128] = "__udivti3";
+  Names[RTLIB::SREM_I8] = "__modqi3";
+  Names[RTLIB::SREM_I16] = "__modhi3";
+  Names[RTLIB::SREM_I32] = "__modsi3";
+  Names[RTLIB::SREM_I64] = "__moddi3";
+  Names[RTLIB::SREM_I128] = "__modti3";
+  Names[RTLIB::UREM_I8] = "__umodqi3";
+  Names[RTLIB::UREM_I16] = "__umodhi3";
+  Names[RTLIB::UREM_I32] = "__umodsi3";
+  Names[RTLIB::UREM_I64] = "__umoddi3";
+  Names[RTLIB::UREM_I128] = "__umodti3";
+
+  // These are generally not available.
+  Names[RTLIB::SDIVREM_I8] = 0;
+  Names[RTLIB::SDIVREM_I16] = 0;
+  Names[RTLIB::SDIVREM_I32] = 0;
+  Names[RTLIB::SDIVREM_I64] = 0;
+  Names[RTLIB::SDIVREM_I128] = 0;
+  Names[RTLIB::UDIVREM_I8] = 0;
+  Names[RTLIB::UDIVREM_I16] = 0;
+  Names[RTLIB::UDIVREM_I32] = 0;
+  Names[RTLIB::UDIVREM_I64] = 0;
+  Names[RTLIB::UDIVREM_I128] = 0;
+
+  Names[RTLIB::NEG_I32] = "__negsi2";
+  Names[RTLIB::NEG_I64] = "__negdi2";
+  Names[RTLIB::ADD_F32] = "__addsf3";
+  Names[RTLIB::ADD_F64] = "__adddf3";
+  Names[RTLIB::ADD_F80] = "__addxf3";
+  Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+  Names[RTLIB::SUB_F32] = "__subsf3";
+  Names[RTLIB::SUB_F64] = "__subdf3";
+  Names[RTLIB::SUB_F80] = "__subxf3";
+  Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+  Names[RTLIB::MUL_F32] = "__mulsf3";
+  Names[RTLIB::MUL_F64] = "__muldf3";
+  Names[RTLIB::MUL_F80] = "__mulxf3";
+  Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+  Names[RTLIB::DIV_F32] = "__divsf3";
+  Names[RTLIB::DIV_F64] = "__divdf3";
+  Names[RTLIB::DIV_F80] = "__divxf3";
+  Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+  Names[RTLIB::REM_F32] = "fmodf";
+  Names[RTLIB::REM_F64] = "fmod";
+  Names[RTLIB::REM_F80] = "fmodl";
+  Names[RTLIB::REM_PPCF128] = "fmodl";
+  Names[RTLIB::FMA_F32] = "fmaf";
+  Names[RTLIB::FMA_F64] = "fma";
+  Names[RTLIB::FMA_F80] = "fmal";
+  Names[RTLIB::FMA_PPCF128] = "fmal";
+  Names[RTLIB::POWI_F32] = "__powisf2";
+  Names[RTLIB::POWI_F64] = "__powidf2";
+  Names[RTLIB::POWI_F80] = "__powixf2";
+  Names[RTLIB::POWI_PPCF128] = "__powitf2";
+  Names[RTLIB::SQRT_F32] = "sqrtf";
+  Names[RTLIB::SQRT_F64] = "sqrt";
+  Names[RTLIB::SQRT_F80] = "sqrtl";
+  Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+  Names[RTLIB::LOG_F32] = "logf";
+  Names[RTLIB::LOG_F64] = "log";
+  Names[RTLIB::LOG_F80] = "logl";
+  Names[RTLIB::LOG_PPCF128] = "logl";
+  Names[RTLIB::LOG2_F32] = "log2f";
+  Names[RTLIB::LOG2_F64] = "log2";
+  Names[RTLIB::LOG2_F80] = "log2l";
+  Names[RTLIB::LOG2_PPCF128] = "log2l";
+  Names[RTLIB::LOG10_F32] = "log10f";
+  Names[RTLIB::LOG10_F64] = "log10";
+  Names[RTLIB::LOG10_F80] = "log10l";
+  Names[RTLIB::LOG10_PPCF128] = "log10l";
+  Names[RTLIB::EXP_F32] = "expf";
+  Names[RTLIB::EXP_F64] = "exp";
+  Names[RTLIB::EXP_F80] = "expl";
+  Names[RTLIB::EXP_PPCF128] = "expl";
+  Names[RTLIB::EXP2_F32] = "exp2f";
+  Names[RTLIB::EXP2_F64] = "exp2";
+  Names[RTLIB::EXP2_F80] = "exp2l";
+  Names[RTLIB::EXP2_PPCF128] = "exp2l";
+  Names[RTLIB::SIN_F32] = "sinf";
+  Names[RTLIB::SIN_F64] = "sin";
+  Names[RTLIB::SIN_F80] = "sinl";
+  Names[RTLIB::SIN_PPCF128] = "sinl";
+  Names[RTLIB::COS_F32] = "cosf";
+  Names[RTLIB::COS_F64] = "cos";
+  Names[RTLIB::COS_F80] = "cosl";
+  Names[RTLIB::COS_PPCF128] = "cosl";
+  Names[RTLIB::POW_F32] = "powf";
+  Names[RTLIB::POW_F64] = "pow";
+  Names[RTLIB::POW_F80] = "powl";
+  Names[RTLIB::POW_PPCF128] = "powl";
+  Names[RTLIB::CEIL_F32] = "ceilf";
+  Names[RTLIB::CEIL_F64] = "ceil";
+  Names[RTLIB::CEIL_F80] = "ceill";
+  Names[RTLIB::CEIL_PPCF128] = "ceill";
+  Names[RTLIB::TRUNC_F32] = "truncf";
+  Names[RTLIB::TRUNC_F64] = "trunc";
+  Names[RTLIB::TRUNC_F80] = "truncl";
+  Names[RTLIB::TRUNC_PPCF128] = "truncl";
+  Names[RTLIB::RINT_F32] = "rintf";
+  Names[RTLIB::RINT_F64] = "rint";
+  Names[RTLIB::RINT_F80] = "rintl";
+  Names[RTLIB::RINT_PPCF128] = "rintl";
+  Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+  Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+  Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+  Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+  Names[RTLIB::FLOOR_F32] = "floorf";
+  Names[RTLIB::FLOOR_F64] = "floor";
+  Names[RTLIB::FLOOR_F80] = "floorl";
+  Names[RTLIB::FLOOR_PPCF128] = "floorl";
+  Names[RTLIB::COPYSIGN_F32] = "copysignf";
+  Names[RTLIB::COPYSIGN_F64] = "copysign";
+  Names[RTLIB::COPYSIGN_F80] = "copysignl";
+  Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+  Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+  Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+  Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+  Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+  Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
+  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+  Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+  Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+  Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
+  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+  Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+  Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+  Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+  Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+  Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+  Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+  Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
+  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+  Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+  Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+  Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
+  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+  Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+  Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+  Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+  Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+  Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+  Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+  Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+  Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+  Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+  Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+  Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+  Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+  Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+  Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+  Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+  Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+  Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+  Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+  Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+  Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+  Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+  Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+  Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+  Names[RTLIB::OEQ_F32] = "__eqsf2";
+  Names[RTLIB::OEQ_F64] = "__eqdf2";
+  Names[RTLIB::UNE_F32] = "__nesf2";
+  Names[RTLIB::UNE_F64] = "__nedf2";
+  Names[RTLIB::OGE_F32] = "__gesf2";
+  Names[RTLIB::OGE_F64] = "__gedf2";
+  Names[RTLIB::OLT_F32] = "__ltsf2";
+  Names[RTLIB::OLT_F64] = "__ltdf2";
+  Names[RTLIB::OLE_F32] = "__lesf2";
+  Names[RTLIB::OLE_F64] = "__ledf2";
+  Names[RTLIB::OGT_F32] = "__gtsf2";
+  Names[RTLIB::OGT_F64] = "__gtdf2";
+  Names[RTLIB::UO_F32] = "__unordsf2";
+  Names[RTLIB::UO_F64] = "__unorddf2";
+  Names[RTLIB::O_F32] = "__unordsf2";
+  Names[RTLIB::O_F64] = "__unorddf2";
+  Names[RTLIB::MEMCPY] = "memcpy";
+  Names[RTLIB::MEMMOVE] = "memmove";
+  Names[RTLIB::MEMSET] = "memset";
+  Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+  Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+  Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+  Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+  Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+  Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+  Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+  Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+  Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+    CCs[i] = CallingConv::C;
+  }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::f64)
+      return FPEXT_F32_F64;
+  }
+
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+  if (RetVT == MVT::f32) {
+    if (OpVT == MVT::f64)
+      return FPROUND_F64_F32;
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F32;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F32;
+  } else if (RetVT == MVT::f64) {
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F64;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F64;
+  }
+
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F64_I16;
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F80_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F64_I16;
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F80_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I32_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I32_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I32_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I64_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I64_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I64_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I128_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I128_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I128_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I32_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I32_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I32_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I64_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I64_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I64_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I128_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I128_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I128_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+  CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CCs[RTLIB::UNE_F32] = ISD::SETNE;
+  CCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CCs[RTLIB::OGE_F32] = ISD::SETGE;
+  CCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CCs[RTLIB::OLT_F32] = ISD::SETLT;
+  CCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CCs[RTLIB::OLE_F32] = ISD::SETLE;
+  CCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CCs[RTLIB::OGT_F32] = ISD::SETGT;
+  CCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CCs[RTLIB::UO_F32] = ISD::SETNE;
+  CCs[RTLIB::UO_F64] = ISD::SETNE;
+  CCs[RTLIB::O_F32] = ISD::SETEQ;
+  CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm,
+                               const TargetLoweringObjectFile *tlof)
+  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof),
+  mayPromoteElements(AllowPromoteIntElem) {
+  // All operations default to being supported.
+  memset(OpActions, 0, sizeof(OpActions));
+  memset(LoadExtActions, 0, sizeof(LoadExtActions));
+  memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+  memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+  memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+  // Set default actions for various operations.
+  for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+    // Default all indexed load / store to expand.
+    for (unsigned IM = (unsigned)ISD::PRE_INC;
+         IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+      setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+      setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+    }
+
+    // These operations default to expand.
+    setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+  }
+
+  // Most targets ignore the @llvm.prefetch intrinsic.
+  setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+  // ConstantFP nodes default to expand.  Targets can either change this to
+  // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+  // to optimize expansions for certain constants.
+  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+
+  // These library functions default to expand.
+  setOperationAction(ISD::FLOG , MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+  setOperationAction(ISD::FEXP , MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG , MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+  setOperationAction(ISD::FEXP , MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
+  // Default ISD::TRAP to expand (which turns it into abort).
+  setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+  IsLittleEndian = TD->isLittleEndian();
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+  maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+  maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+    = maxStoresPerMemmoveOptSize = 4;
+  benefitFromCodePlacementOpt = false;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  BooleanContents = UndefinedBooleanContent;
+  SchedPreferenceInfo = Sched::Latency;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  MinFunctionAlignment = 0;
+  PrefFunctionAlignment = 0;
+  PrefLoopAlignment = 0;
+  MinStackArgumentAlignment = 1;
+  ShouldFoldAtomicFences = false;
+
+  InitLibcallNames(LibcallRoutineNames);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLowering::~TargetLowering() {
+  delete &TLOF;
+}
+
+MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
+  return MVT::getIntegerVT(8*TD->getPointerSize());
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+  assert(isTypeLegal(VT));
+  switch (Op) {
+  default:
+    return false;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+    return true;
+  }
+}
+
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+                                          unsigned &NumIntermediates,
+                                          EVT &RegisterVT,
+                                          TargetLowering *TLI) {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT EltTy = VT.getVectorElementType();
+
+  unsigned NumVectorRegs = 1;
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+
+  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  if (!TLI->isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
+  EVT DestVT = TLI->getRegisterType(NewVT);
+  RegisterVT = DestVT;
+  if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+  // Otherwise, promotion or legal types use the same number of registers as
+  // the vector decimated to the appropriate level.
+  return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
+  for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+       I != E; ++I) {
+    if (isTypeLegal(*I))
+      return true;
+  }
+  return false;
+}
+
+/// hasLegalSuperRegRegClasses - Return true if the specified register class
+/// has one or more super-reg register classes that are legal.
+bool
+TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{
+  if (*RC->superregclasses_begin() == 0)
+    return false;
+  for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+         E = RC->superregclasses_end(); I != E; ++I) {
+    const TargetRegisterClass *RRC = *I;
+    if (isLegalRC(RRC))
+      return true;
+  }
+  return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLowering::findRepresentativeClass(EVT VT) const {
+  const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+  if (!RC)
+    return std::make_pair(RC, 0);
+  const TargetRegisterClass *BestRC = RC;
+  for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+         E = RC->superregclasses_end(); I != E; ++I) {
+    const TargetRegisterClass *RRC = *I;
+    if (RRC->isASubClass() || !isLegalRC(RRC))
+      continue;
+    if (!hasLegalSuperRegRegClasses(RRC))
+      return std::make_pair(RRC, 1);
+    BestRC = RRC;
+  }
+  return std::make_pair(BestRC, 1);
+}
+
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+         "Too many value types for ValueTypeActions to hold!");
+
+  // Everything defaults to needing one register.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    NumRegistersForVT[i] = 1;
+    RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+  }
+  // ...except isVoid, which doesn't need any registers.
+  NumRegistersForVT[MVT::isVoid] = 0;
+
+  // Find the largest integer register class.
+  unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+  // Every integer value type larger than this largest register takes twice as
+  // many registers to represent as the previous ValueType.
+  for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
+    EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
+    if (!ExpandedVT.isInteger())
+      break;
+    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+    RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+    TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+    ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
+  }
+
+  // Inspect all of the ValueType's smaller than the largest integer
+  // register to see which ones need promotion.
+  unsigned LegalIntReg = LargestIntReg;
+  for (unsigned IntReg = LargestIntReg - 1;
+       IntReg >= (unsigned)MVT::i1; --IntReg) {
+    EVT IVT = (MVT::SimpleValueType)IntReg;
+    if (isTypeLegal(IVT)) {
+      LegalIntReg = IntReg;
+    } else {
+      RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+        (MVT::SimpleValueType)LegalIntReg;
+      ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+    }
+  }
+
+  // ppcf128 type is really two f64's.
+  if (!isTypeLegal(MVT::ppcf128)) {
+    NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+    RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+    TransformToType[MVT::ppcf128] = MVT::f64;
+    ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+  }
+
+  // Decide how to handle f64. If the target does not have native f64 support,
+  // expand it to i64 and we will be generating soft float library calls.
+  if (!isTypeLegal(MVT::f64)) {
+    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+    TransformToType[MVT::f64] = MVT::i64;
+    ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+  }
+
+  // Decide how to handle f32. If the target does not have native support for
+  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  if (!isTypeLegal(MVT::f32)) {
+    if (isTypeLegal(MVT::f64)) {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+      TransformToType[MVT::f32] = MVT::f64;
+      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
+    } else {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+      TransformToType[MVT::f32] = MVT::i32;
+      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+    }
+  }
+
+  // Loop over all of the vector value types to see which need transformations.
+  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT VT = (MVT::SimpleValueType)i;
+    if (isTypeLegal(VT)) continue;
+
+    // Determine if there is a legal wider type.  If so, we should promote to
+    // that wider vector type.
+    EVT EltVT = VT.getVectorElementType();
+    unsigned NElts = VT.getVectorNumElements();
+    if (NElts != 1) {
+      bool IsLegalWiderType = false;
+      // If we allow the promotion of vector elements using a flag,
+      // then return TypePromoteInteger on vector elements.
+      // First try to promote the elements of integer vectors. If no legal
+      // promotion was found, fallback to the widen-vector method.
+      if (mayPromoteElements)
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        EVT SVT = (MVT::SimpleValueType)nVT;
+        // Promote vectors of integers to vectors with the same number
+        // of elements, with a wider element type.
+        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+            && SVT.getVectorNumElements() == NElts &&
+            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+          TransformToType[i] = SVT;
+          RegisterTypeForVT[i] = SVT;
+          NumRegistersForVT[i] = 1;
+          ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+
+      if (IsLegalWiderType) continue;
+
+      // Try to widen the vector.
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        EVT SVT = (MVT::SimpleValueType)nVT;
+        if (SVT.getVectorElementType() == EltVT &&
+            SVT.getVectorNumElements() > NElts &&
+            isTypeLegal(SVT)) {
+          TransformToType[i] = SVT;
+          RegisterTypeForVT[i] = SVT;
+          NumRegistersForVT[i] = 1;
+          ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+      if (IsLegalWiderType) continue;
+    }
+
+    MVT IntermediateVT;
+    EVT RegisterVT;
+    unsigned NumIntermediates;
+    NumRegistersForVT[i] =
+      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+                                RegisterVT, this);
+    RegisterTypeForVT[i] = RegisterVT;
+
+    EVT NVT = VT.getPow2VectorType();
+    if (NVT == VT) {
+      // Type is already a power of 2.  The default action is to split.
+      TransformToType[i] = MVT::Other;
+      unsigned NumElts = VT.getVectorNumElements();
+      ValueTypeActions.setTypeAction(VT,
+            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+    } else {
+      TransformToType[i] = NVT;
+      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+    }
+  }
+
+  // Determine the 'representative' register class for each value type.
+  // An representative register class is the largest (meaning one which is
+  // not a sub-register class / subreg register class) legal register class for
+  // a group of value types. For example, on i386, i8, i16, and i32
+  // representative would be GR32; while on x86_64 it's GR64.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    const TargetRegisterClass* RRC;
+    uint8_t Cost;
+    tie(RRC, Cost) =  findRepresentativeClass((MVT::SimpleValueType)i);
+    RepRegClassForVT[i] = RRC;
+    RepRegClassCostForVT[i] = Cost;
+  }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  return NULL;
+}
+
+
+MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+  return PointerTy.SimpleTy;
+}
+
+MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
+  return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register.  It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                                EVT &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                                EVT &RegisterVT) const {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // If there is a wider vector type with the same element type as this one,
+  // we should widen to that legal vector type.  This handles things like
+  // <2 x float> -> <4 x float>.
+  if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) {
+    RegisterVT = getTypeToTransformTo(Context, VT);
+    if (isTypeLegal(RegisterVT)) {
+      IntermediateVT = RegisterVT;
+      NumIntermediates = 1;
+      return 1;
+    }
+  }
+
+  // Figure out the right, legal destination reg to copy into.
+  EVT EltTy = VT.getVectorElementType();
+
+  unsigned NumVectorRegs = 1;
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !isTypeLegal(
+                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+  if (!isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  EVT DestVT = getRegisterType(Context, NewVT);
+  RegisterVT = DestVT;
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
+  if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+  // Otherwise, promotion or legal types use the same number of registers as
+  // the vector decimated to the appropriate level.
+  return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function.  This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+                         SmallVectorImpl<ISD::OutputArg> &Outs,
+                         const TargetLowering &TLI,
+                         SmallVectorImpl<uint64_t> *Offsets) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, ReturnType, ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0) return;
+  unsigned Offset = 0;
+
+  for (unsigned j = 0, f = NumValues; j != f; ++j) {
+    EVT VT = ValueVTs[j];
+    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+    if (attr & Attribute::SExt)
+      ExtendKind = ISD::SIGN_EXTEND;
+    else if (attr & Attribute::ZExt)
+      ExtendKind = ISD::ZERO_EXTEND;
+
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling
+    // conventions. The frontend should mark functions whose return values
+    // require promoting with signext or zeroext attributes.
+    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+      if (VT.bitsLT(MinVT))
+        VT = MinVT;
+    }
+
+    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+                        PartVT.getTypeForEVT(ReturnType->getContext()));
+
+    // 'inreg' on function refers to return value
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+    if (attr & Attribute::InReg)
+      Flags.setInReg();
+
+    // Propagate extension type if any
+    if (attr & Attribute::SExt)
+      Flags.setSExt();
+    else if (attr & Attribute::ZExt)
+      Flags.setZExt();
+
+    for (unsigned i = 0; i < NumParts; ++i) {
+      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
+      if (Offsets) {
+        Offsets->push_back(Offset);
+        Offset += PartSize;
+      }
+    }
+  }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.  This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+  return TD->getCallFrameTypeAlignment(Ty);
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function.  The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+  // In non-pic modes, just use the address of a block.
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return MachineJumpTableInfo::EK_BlockAddress;
+
+  // In PIC mode, if the target supports a GPRel32 directive, use it.
+  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+    return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+  // Otherwise, use a label difference.
+  return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                 SelectionDAG &DAG) const {
+  // If our PIC model is GP relative, use the global offset table as the base.
+  if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+  return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                             unsigned JTI,MCContext &Ctx) const{
+  // The normal PIC reloc base is the label at the start of the jump table.
+  return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // Assume that everything is safe in static mode.
+  if (getTargetMachine().getRelocationModel() == Reloc::Static)
+    return true;
+
+  // In dynamic-no-pic mode, assume that known defined values are safe.
+  if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+      GA &&
+      !GA->getGlobal()->isDeclaration() &&
+      !GA->getGlobal()->isWeakForLinker())
+    return true;
+
+  // Otherwise assume nothing is safe.
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//  Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+                                                        const APInt &Demanded) {
+  DebugLoc dl = Op.getDebugLoc();
+
+  // FIXME: ISD::SELECT, ISD::SELECT_CC
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::XOR:
+  case ISD::AND:
+  case ISD::OR: {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    if (!C) return false;
+
+    if (Op.getOpcode() == ISD::XOR &&
+        (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+      return false;
+
+    // if we can expand it to have all bits set, do it
+    if (C->getAPIntValue().intersects(~Demanded)) {
+      EVT VT = Op.getValueType();
+      SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+                                DAG.getConstant(Demanded &
+                                                C->getAPIntValue(),
+                                                VT));
+      return CombineTo(Op, New);
+    }
+
+    break;
+  }
+  }
+
+  return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+                                                    unsigned BitWidth,
+                                                    const APInt &Demanded,
+                                                    DebugLoc dl) {
+  assert(Op.getNumOperands() == 2 &&
+         "ShrinkDemandedOp only supports binary operators!");
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "ShrinkDemandedOp only supports nodes with one result!");
+
+  // Don't do this if the node has another user, which may require the
+  // full value.
+  if (!Op.getNode()->hasOneUse())
+    return false;
+
+  // Search for the smallest integer type with free casts to and from
+  // Op's type. For expedience, just check power-of-2 integer types.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+  if (!isPowerOf2_32(SmallVTBits))
+    SmallVTBits = NextPowerOf2(SmallVTBits);
+  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+    EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+    if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+        TLI.isZExtFree(SmallVT, Op.getValueType())) {
+      // We found a type with free casts.
+      SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(0)),
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(1)));
+      SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+      return CombineTo(Op, Z);
+    }
+  }
+  return false;
+}
+
+/// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream.  If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller).  The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+                                          const APInt &DemandedMask,
+                                          APInt &KnownZero,
+                                          APInt &KnownOne,
+                                          TargetLoweringOpt &TLO,
+                                          unsigned Depth) const {
+  unsigned BitWidth = DemandedMask.getBitWidth();
+  assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+         "Mask size mismatches value type size!");
+  APInt NewMask = DemandedMask;
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Don't know anything.
+  KnownZero = KnownOne = APInt(BitWidth, 0);
+
+  // Other users may use these bits.
+  if (!Op.getNode()->hasOneUse()) {
+    if (Depth != 0) {
+      // If not at the root, Just compute the KnownZero/KnownOne bits to
+      // simplify things downstream.
+      TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+      return false;
+    }
+    // If this is the root being simplified, allow it to have multiple uses,
+    // just set the NewMask to all bits.
+    NewMask = APInt::getAllOnesValue(BitWidth);
+  } else if (DemandedMask == 0) {
+    // Not demanding any bits from Op.
+    if (Op.getOpcode() != ISD::UNDEF)
+      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+    return false;
+  } else if (Depth == 6) {        // Limit search depth.
+    return false;
+  }
+
+  APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
+    KnownZero = ~KnownOne & NewMask;
+    return false;   // Don't fall through, will infinitely loop.
+  case ISD::AND:
+    // If the RHS is a constant, check to see if the LHS would be zero without
+    // using the bits from the RHS.  Below, we use knowledge about the RHS to
+    // simplify the LHS, here we're using information from the LHS to simplify
+    // the RHS.
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      APInt LHSZero, LHSOne;
+      // Do not increment Depth here; that can cause an infinite loop.
+      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
+                                LHSZero, LHSOne, Depth);
+      // If the LHS already has zeros where RHSC does, this and is dead.
+      if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+        return TLO.CombineTo(Op, Op.getOperand(0));
+      // If any of the set bits in the RHS are known zero on the LHS, shrink
+      // the constant.
+      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+        return true;
+    }
+
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If all of the demanded bits are known one on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+      return true;
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    break;
+  case ISD::OR:
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    break;
+  case ISD::XOR:
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((KnownZero & NewMask) == NewMask)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((KnownZero2 & NewMask) == NewMask)
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // If all of the unknown bits are known to be zero on one side or the other
+    // (but not both) turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+                                               Op.getOperand(0),
+                                               Op.getOperand(1)));
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
+      if ((KnownOne & KnownOne2) == KnownOne) {
+        EVT VT = Op.getValueType();
+        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+                                                 Op.getOperand(0), ANDC));
+      }
+    }
+
+    // If the RHS is a constant, see if we can simplify it.
+    // for XOR, we prefer to force bits to 1 if they will make a -1.
+    // if we can't force bits, try to shrink constant
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      APInt Expanded = C->getAPIntValue() | (~NewMask);
+      // if we can expand it to have all bits set, do it
+      if (Expanded.isAllOnesValue()) {
+        if (Expanded != C->getAPIntValue()) {
+          EVT VT = Op.getValueType();
+          SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+                                          TLO.DAG.getConstant(Expanded, VT));
+          return TLO.CombineTo(Op, New);
+        }
+        // if it already has all the bits set, nothing to change
+        // but don't shrink either!
+      } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+        return true;
+      }
+    }
+
+    KnownZero = KnownZeroOut;
+    KnownOne  = KnownOneOut;
+    break;
+  case ISD::SELECT:
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SELECT_CC:
+    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SHL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+      SDValue InOp = Op.getOperand(0);
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the bottom bits (which are shifted
+      // out) are never demanded.
+      if (InOp.getOpcode() == ISD::SRL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+          unsigned Opc = ISD::SHL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SRL;
+          }
+
+          SDValue NewSA =
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          EVT VT = Op.getValueType();
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }
+
+      if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+
+      // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+      // are not demanded. This will likely allow the anyext to be folded away.
+      if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+        SDValue InnerOp = InOp.getNode()->getOperand(0);
+        EVT InnerVT = InnerOp.getValueType();
+        if ((APInt::getHighBitsSet(BitWidth,
+                                   BitWidth - InnerVT.getSizeInBits()) &
+               DemandedMask) == 0 &&
+            isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+          EVT ShTy = getShiftAmountTy(InnerVT);
+          if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+            ShTy = InnerVT;
+          SDValue NarrowShl =
+            TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+                            TLO.DAG.getConstant(ShAmt, ShTy));
+          return
+            TLO.CombineTo(Op,
+                          TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+                                          NarrowShl));
+        }
+      }
+
+      KnownZero <<= SA->getZExtValue();
+      KnownOne  <<= SA->getZExtValue();
+      // low bits known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+    }
+    break;
+  case ISD::SRL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      EVT VT = Op.getValueType();
+      unsigned ShAmt = SA->getZExtValue();
+      unsigned VTSize = VT.getSizeInBits();
+      SDValue InOp = Op.getOperand(0);
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the top bits (which are shifted out)
+      // are never demanded.
+      if (InOp.getOpcode() == ISD::SHL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+          unsigned Opc = ISD::SRL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SHL;
+          }
+
+          SDValue NewSA =
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }
+
+      // Compute the new bits that are at the top now.
+      if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    break;
+  case ISD::SRA:
+    // If this is an arithmetic shift right and only the low-bit is set, we can
+    // always convert this into a logical shr, even if the shift amount is
+    // variable.  The low bit of the shift cannot be an input sign bit unless
+    // the shift amount is >= the size of the datatype, which is undefined.
+    if (DemandedMask == 1)
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+                                           Op.getOperand(0), Op.getOperand(1)));
+
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      EVT VT = Op.getValueType();
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      APInt InDemandedMask = (NewMask << ShAmt);
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+      if (HighBits.intersects(NewMask))
+        InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+
+      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      // Handle the sign bit, adjusted to where it is now in the mask.
+      APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+                                                 Op.getOperand(0),
+                                                 Op.getOperand(1)));
+      } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+        KnownOne |= HighBits;
+      }
+    }
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    // Sign extension.  Compute the demanded bits in the result that are not
+    // present in the input.
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth,
+                            BitWidth - EVT.getScalarType().getSizeInBits());
+
+    // If none of the extended bits are demanded, eliminate the sextinreg.
+    if ((NewBits & NewMask) == 0)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+
+    APInt InSignBit =
+      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+    APInt InputDemandedBits =
+      APInt::getLowBitsSet(BitWidth,
+                           EVT.getScalarType().getSizeInBits()) &
+      NewMask;
+
+    // Since the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InputDemandedBits |= InSignBit;
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+
+    // If the input sign bit is known zero, convert this into a zero extension.
+    if (KnownZero.intersects(InSignBit))
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+
+    if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
+      KnownOne |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                       // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ZERO_EXTEND: {
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt InMask = NewMask.trunc(OperandBitWidth);
+
+    // If none of the top bits are demanded, convert this into an any_extend.
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+    if (!NewBits.intersects(NewMask))
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+                                               Op.getValueType(),
+                                               Op.getOperand(0)));
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    KnownZero |= NewBits;
+    break;
+  }
+  case ISD::SIGN_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
+    APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+    APInt NewBits   = ~InMask & NewMask;
+
+    // If none of the top bits are demanded, convert this into an any_extend.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+                                              Op.getValueType(),
+                                              Op.getOperand(0)));
+
+    // Since some of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    APInt InDemandedBits = InMask & NewMask;
+    InDemandedBits |= InSignBit;
+    InDemandedBits = InDemandedBits.trunc(InBits);
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
+    // If the sign bit is known zero, convert this to a zero extend.
+    if (KnownZero.intersects(InSignBit))
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+                                               Op.getValueType(),
+                                               Op.getOperand(0)));
+
+    // If the sign bit is known one, the top bits match.
+    if (KnownOne.intersects(InSignBit)) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ANY_EXTEND: {
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt InMask = NewMask.trunc(OperandBitWidth);
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    break;
+  }
+  case ISD::TRUNCATE: {
+    // Simplify the input, using demanded bit information, and compute the known
+    // zero/one bits live out.
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt TruncMask = NewMask.zext(OperandBitWidth);
+    if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+
+    // If the input is only used by this truncate, see if we can shrink it based
+    // on the known demanded bits.
+    if (Op.getOperand(0).getNode()->hasOneUse()) {
+      SDValue In = Op.getOperand(0);
+      switch (In.getOpcode()) {
+      default: break;
+      case ISD::SRL:
+        // Shrink SRL by a constant if none of the high bits shifted in are
+        // demanded.
+        if (TLO.LegalTypes() &&
+            !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+          // undesirable.
+          break;
+        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+        if (!ShAmt)
+          break;
+        SDValue Shift = In.getOperand(1);
+        if (TLO.LegalTypes()) {
+          uint64_t ShVal = ShAmt->getZExtValue();
+          Shift =
+            TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+        }
+
+        APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+                                               OperandBitWidth - BitWidth);
+        HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
+
+        if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+          // None of the shifted in bits are needed.  Add a truncate of the
+          // shift input, then shift it.
+          SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+                                             Op.getValueType(),
+                                             In.getOperand(0));
+          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+                                                   Op.getValueType(),
+                                                   NewTrunc,
+                                                   Shift));
+        }
+        break;
+      }
+    }
+
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    break;
+  }
+  case ISD::AssertZext: {
+    // Demand all the bits of the input that are demanded in the output.
+    // The low bits are obvious; the high bits are demanded because we're
+    // asserting that they're zero here.
+    if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    APInt InMask = APInt::getLowBitsSet(BitWidth,
+                                        VT.getSizeInBits());
+    KnownZero |= ~InMask & NewMask;
+    break;
+  }
+  case ISD::BITCAST:
+    // If this is an FP->Int bitcast and if the sign bit is the only
+    // thing demanded, turn this into a FGETSIGN.
+    if (!Op.getOperand(0).getValueType().isVector() &&
+        NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+        Op.getOperand(0).getValueType().isFloatingPoint()) {
+      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+      bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+      if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+        EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
+        // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+        // place.  We expect the SHL to be eliminated by other optimizations.
+        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+        unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+        if (!OpVTLegal && OpVTSizeInBits > 32)
+          Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
+        unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+        SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                                 Op.getValueType(),
+                                                 Sign, ShAmt));
+      }
+    }
+    break;
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::SUB: {
+    // Add, Sub, and Mul don't demand any bits in positions beyond that
+    // of the highest bit demanded of them.
+    APInt LoMask = APInt::getLowBitsSet(BitWidth,
+                                        BitWidth - NewMask.countLeadingZeros());
+    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    // See if the operation should be performed at a smaller bit width.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+  }
+  // FALL THROUGH
+  default:
+    // Just use ComputeMaskedBits to compute output bits.
+    TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+
+  // If we know the value of all of the demanded bits, return this as a
+  // constant.
+  if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+  return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                    const APInt &Mask,
+                                                    APInt &KnownZero,
+                                                    APInt &KnownOne,
+                                                    const SelectionDAG &DAG,
+                                                    unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use ComputeNumSignBits if you don't know whether Op"
+         " is a target node!");
+  return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+  // A left-shift of a constant one will have exactly one bit set, because
+  // shifting the bit off the end is undefined.
+  if (Val.getOpcode() == ISD::SHL)
+    if (ConstantSDNode *C =
+         dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+      if (C->getAPIntValue() == 1)
+        return true;
+
+  // Similarly, a right-shift of a constant sign-bit will have exactly
+  // one bit set.
+  if (Val.getOpcode() == ISD::SRL)
+    if (ConstantSDNode *C =
+         dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+      if (C->getAPIntValue().isSignBit())
+        return true;
+
+  // More could be done here, though the above checks are enough
+  // to handle some common cases.
+
+  // Fall back to ComputeMaskedBits to catch other known cases.
+  EVT OpVT = Val.getValueType();
+  unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+  return (KnownZero.countPopulation() == BitWidth - 1) &&
+         (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+                              ISD::CondCode Cond, bool foldBooleans,
+                              DAGCombinerInfo &DCI, DebugLoc dl) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
+  }
+
+  // Ensure that the constant occurs on the RHS, and fold constant
+  // comparisons.
+  if (isa<ConstantSDNode>(N0.getNode()))
+    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+
+  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+    const APInt &C1 = N1C->getAPIntValue();
+
+    // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+    // equality comparison, then we're just comparing whether X itself is
+    // zero.
+    if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+        N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+        N0.getOperand(1).getOpcode() == ISD::Constant) {
+      const APInt &ShAmt
+        = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+          ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+        if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+          // (srl (ctlz x), 5) == 0  -> X != 0
+          // (srl (ctlz x), 5) != 1  -> X != 0
+          Cond = ISD::SETNE;
+        } else {
+          // (srl (ctlz x), 5) != 0  -> X == 0
+          // (srl (ctlz x), 5) == 1  -> X == 0
+          Cond = ISD::SETEQ;
+        }
+        SDValue Zero = DAG.getConstant(0, N0.getValueType());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+                            Zero, Cond);
+      }
+    }
+
+    SDValue CTPOP = N0;
+    // Look through truncs that don't change the value of a ctpop.
+    if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+      CTPOP = N0.getOperand(0);
+
+    if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+        (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+                        Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+      EVT CTVT = CTPOP.getValueType();
+      SDValue CTOp = CTPOP.getOperand(0);
+
+      // (ctpop x) u< 2 -> (x & x-1) == 0
+      // (ctpop x) u> 1 -> (x & x-1) != 0
+      if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+        SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+                                  DAG.getConstant(1, CTVT));
+        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+        ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+      }
+
+      // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+    }
+
+    // (zext x) == C --> x == (trunc C)
+    if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+        (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      unsigned MinBits = N0.getValueSizeInBits();
+      SDValue PreZExt;
+      if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+        // ZExt
+        MinBits = N0->getOperand(0).getValueSizeInBits();
+        PreZExt = N0->getOperand(0);
+      } else if (N0->getOpcode() == ISD::AND) {
+        // DAGCombine turns costly ZExts into ANDs
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+          if ((C->getAPIntValue()+1).isPowerOf2()) {
+            MinBits = C->getAPIntValue().countTrailingOnes();
+            PreZExt = N0->getOperand(0);
+          }
+      } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+        // ZEXTLOAD
+        if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+          MinBits = LN0->getMemoryVT().getSizeInBits();
+          PreZExt = N0;
+        }
+      }
+
+      // Make sure we're not loosing bits from the constant.
+      if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+        EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+        if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+          // Will get folded away.
+          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+          SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+          return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+        }
+      }
+    }
+
+    // If the LHS is '(and load, const)', the RHS is 0,
+    // the test is for equality or unsigned, and all 1 bits of the const are
+    // in the same partial word, see if we can shorten the load.
+    if (DCI.isBeforeLegalize() &&
+        N0.getOpcode() == ISD::AND && C1 == 0 &&
+        N0.getNode()->hasOneUse() &&
+        isa<LoadSDNode>(N0.getOperand(0)) &&
+        N0.getOperand(0).getNode()->hasOneUse() &&
+        isa<ConstantSDNode>(N0.getOperand(1))) {
+      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+      APInt bestMask;
+      unsigned bestWidth = 0, bestOffset = 0;
+      if (!Lod->isVolatile() && Lod->isUnindexed()) {
+        unsigned origWidth = N0.getValueType().getSizeInBits();
+        unsigned maskWidth = origWidth;
+        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+        // 8 bits, but have to be careful...
+        if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+          origWidth = Lod->getMemoryVT().getSizeInBits();
+        const APInt &Mask =
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+          APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+          for (unsigned offset=0; offset<origWidth/width; offset++) {
+            if ((newMask & Mask) == Mask) {
+              if (!TD->isLittleEndian())
+                bestOffset = (origWidth/width - offset - 1) * (width/8);
+              else
+                bestOffset = (uint64_t)offset * (width/8);
+              bestMask = Mask.lshr(offset * (width/8) * 8);
+              bestWidth = width;
+              break;
+            }
+            newMask = newMask << width;
+          }
+        }
+      }
+      if (bestWidth) {
+        EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
+        if (newVT.isRound()) {
+          EVT PtrType = Lod->getOperand(1).getValueType();
+          SDValue Ptr = Lod->getBasePtr();
+          if (bestOffset != 0)
+            Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+                              DAG.getConstant(bestOffset, PtrType));
+          unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+          SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+                                Lod->getPointerInfo().getWithOffset(bestOffset),
+                                        false, false, NewAlign);
+          return DAG.getSetCC(dl, VT,
+                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+                                      DAG.getConstant(bestMask.trunc(bestWidth),
+                                                      newVT)),
+                              DAG.getConstant(0LL, newVT), Cond);
+        }
+      }
+    }
+
+    // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+      unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+      // If the comparison constant has bits in the upper part, the
+      // zero-extended value could never match.
+      if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+                                              C1.getBitWidth() - InSize))) {
+        switch (Cond) {
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+        case ISD::SETEQ: return DAG.getConstant(0, VT);
+        case ISD::SETULT:
+        case ISD::SETULE:
+        case ISD::SETNE: return DAG.getConstant(1, VT);
+        case ISD::SETGT:
+        case ISD::SETGE:
+          // True if the sign bit of C1 is set.
+          return DAG.getConstant(C1.isNegative(), VT);
+        case ISD::SETLT:
+        case ISD::SETLE:
+          // True if the sign bit of C1 isn't set.
+          return DAG.getConstant(C1.isNonNegative(), VT);
+        default:
+          break;
+        }
+      }
+
+      // Otherwise, we can perform the comparison with the low bits.
+      switch (Cond) {
+      case ISD::SETEQ:
+      case ISD::SETNE:
+      case ISD::SETUGT:
+      case ISD::SETUGE:
+      case ISD::SETULT:
+      case ISD::SETULE: {
+        EVT newVT = N0.getOperand(0).getValueType();
+        if (DCI.isBeforeLegalizeOps() ||
+            (isOperationLegal(ISD::SETCC, newVT) &&
+              getCondCodeAction(Cond, newVT)==Legal))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                              DAG.getConstant(C1.trunc(InSize), newVT),
+                              Cond);
+        break;
+      }
+      default:
+        break;   // todo, be more careful with signed comparisons
+      }
+    } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+               (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+      unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+      EVT ExtDstTy = N0.getValueType();
+      unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+      // If the constant doesn't fit into the number of bits for the source of
+      // the sign extension, it is impossible for both sides to be equal.
+      if (C1.getMinSignedBits() > ExtSrcTyBits)
+        return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+      SDValue ZextOp;
+      EVT Op0Ty = N0.getOperand(0).getValueType();
+      if (Op0Ty == ExtSrcTy) {
+        ZextOp = N0.getOperand(0);
+      } else {
+        APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+        ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+                              DAG.getConstant(Imm, Op0Ty));
+      }
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(ZextOp.getNode());
+      // Otherwise, make this a use of a zext.
+      return DAG.getSetCC(dl, VT, ZextOp,
+                          DAG.getConstant(C1 & APInt::getLowBitsSet(
+                                                              ExtDstTyBits,
+                                                              ExtSrcTyBits),
+                                          ExtDstTy),
+                          Cond);
+    } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
+      if (N0.getOpcode() == ISD::SETCC &&
+          isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+        if (TrueWhenTrue)
+          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+        // Invert the condition.
+        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+        CC = ISD::getSetCCInverse(CC,
+                                  N0.getOperand(0).getValueType().isInteger());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+      }
+
+      if ((N0.getOpcode() == ISD::XOR ||
+           (N0.getOpcode() == ISD::AND &&
+            N0.getOperand(0).getOpcode() == ISD::XOR &&
+            N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+          isa<ConstantSDNode>(N0.getOperand(1)) &&
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+        // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
+        // can only do this if the top bits are known zero.
+        unsigned BitWidth = N0.getValueSizeInBits();
+        if (DAG.MaskedValueIsZero(N0,
+                                  APInt::getHighBitsSet(BitWidth,
+                                                        BitWidth-1))) {
+          // Okay, get the un-inverted input value.
+          SDValue Val;
+          if (N0.getOpcode() == ISD::XOR)
+            Val = N0.getOperand(0);
+          else {
+            assert(N0.getOpcode() == ISD::AND &&
+                    N0.getOperand(0).getOpcode() == ISD::XOR);
+            // ((X^1)&1)^1 -> X & 1
+            Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+                              N0.getOperand(0).getOperand(0),
+                              N0.getOperand(1));
+          }
+
+          return DAG.getSetCC(dl, VT, Val, N1,
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
+      } else if (N1C->getAPIntValue() == 1 &&
+                 (VT == MVT::i1 ||
+                  getBooleanContents() == ZeroOrOneBooleanContent)) {
+        SDValue Op0 = N0;
+        if (Op0.getOpcode() == ISD::TRUNCATE)
+          Op0 = Op0.getOperand(0);
+
+        if ((Op0.getOpcode() == ISD::XOR) &&
+            Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+            Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+          // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+          Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+          return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+                              Cond);
+        } else if (Op0.getOpcode() == ISD::AND &&
+                isa<ConstantSDNode>(Op0.getOperand(1)) &&
+                cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+          // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+          if (Op0.getValueType().bitsGT(VT))
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                          DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+                          DAG.getConstant(1, VT));
+          else if (Op0.getValueType().bitsLT(VT))
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                        DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+                        DAG.getConstant(1, VT));
+
+          return DAG.getSetCC(dl, VT, Op0,
+                              DAG.getConstant(0, Op0.getValueType()),
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
+      }
+    }
+
+    APInt MinVal, MaxVal;
+    unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+    if (ISD::isSignedIntSetCC(Cond)) {
+      MinVal = APInt::getSignedMinValue(OperandBitSize);
+      MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+    } else {
+      MinVal = APInt::getMinValue(OperandBitSize);
+      MaxVal = APInt::getMaxValue(OperandBitSize);
+    }
+
+    // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+    if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+      if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+      // X >= C0 --> X > (C0-1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(C1-1, N1.getValueType()),
+                          (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+    }
+
+    if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+      if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+      // X <= C0 --> X < (C0+1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(C1+1, N1.getValueType()),
+                          (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+    }
+
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+      return DAG.getConstant(0, VT);      // X < MIN --> false
+    if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+      return DAG.getConstant(1, VT);      // X >= MIN --> true
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+      return DAG.getConstant(0, VT);      // X > MAX --> false
+    if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+      return DAG.getConstant(1, VT);      // X <= MAX --> true
+
+    // Canonicalize setgt X, Min --> setne X, Min
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+    // Canonicalize setlt X, Max --> setne X, Max
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+    // If we have setult X, 1, turn it into seteq X, 0
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(MinVal, N0.getValueType()),
+                          ISD::SETEQ);
+    // If we have setugt X, Max-1, turn it into seteq X, Max
+    else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(MaxVal, N0.getValueType()),
+                          ISD::SETEQ);
+
+    // If we have "setcc X, C0", check to see if we can shrink the immediate
+    // by changing cc.
+
+    // SETUGT X, SINTMAX  -> SETLT X, 0
+    if (Cond == ISD::SETUGT &&
+        C1 == APInt::getSignedMaxValue(OperandBitSize))
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(0, N1.getValueType()),
+                          ISD::SETLT);
+
+    // SETULT X, SINTMIN  -> SETGT X, -1
+    if (Cond == ISD::SETULT &&
+        C1 == APInt::getSignedMinValue(OperandBitSize)) {
+      SDValue ConstMinusOne =
+          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+                          N1.getValueType());
+      return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+    }
+
+    // Fold bit comparisons when we can.
+    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+        (VT == N0.getValueType() ||
+         (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+        N0.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS =
+                  dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+        EVT ShiftTy = DCI.isBeforeLegalize() ?
+          getPointerTy() : getShiftAmountTy(N0.getValueType());
+        if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
+          // Perform the xform if the AND RHS is a single bit.
+          if (AndRHS->getAPIntValue().isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+          }
+        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+          // (X & 8) == 8  -->  (X & 8) >> 3
+          // Perform the xform if C1 is a single bit.
+          if (C1.isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
+          }
+        }
+      }
+  }
+
+  if (isa<ConstantFPSDNode>(N0.getNode())) {
+    // Constant fold or commute setcc.
+    SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+    if (O.getNode()) return O;
+  } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+    // If the RHS of an FP comparison is a constant, simplify it away in
+    // some cases.
+    if (CFP->getValueAPF().isNaN()) {
+      // If an operand is known to be a nan, we can fold it.
+      switch (ISD::getUnorderedFlavor(Cond)) {
+      default: llvm_unreachable("Unknown flavor!");
+      case 0:  // Known false.
+        return DAG.getConstant(0, VT);
+      case 1:  // Known true.
+        return DAG.getConstant(1, VT);
+      case 2:  // Undefined.
+        return DAG.getUNDEF(VT);
+      }
+    }
+
+    // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
+    // constant if knowing that the operand is non-nan is enough.  We prefer to
+    // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+    // materialize 0.0.
+    if (Cond == ISD::SETO || Cond == ISD::SETUO)
+      return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+    // If the condition is not legal, see if we can find an equivalent one
+    // which is legal.
+    if (!isCondCodeLegal(Cond, N0.getValueType())) {
+      // If the comparison was an awkward floating-point == or != and one of
+      // the comparison operands is infinity or negative infinity, convert the
+      // condition to a less-awkward <= or >=.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+        } else {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+        }
+      }
+    }
+  }
+
+  if (N0 == N1) {
+    // We can always fold X == X for integer setcc's.
+    if (N0.getValueType().isInteger())
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    unsigned UOF = ISD::getUnorderedFlavor(Cond);
+    if (UOF == 2)   // FP operators that are undefined on NaNs.
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+      return DAG.getConstant(UOF, VT);
+    // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
+    // if it is not already.
+    ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+    if (NewCond != Cond)
+      return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+  }
+
+  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+      N0.getValueType().isInteger()) {
+    if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+        N0.getOpcode() == ISD::XOR) {
+      // Simplify (X+Y) == (X+Z) -->  Y == Z
+      if (N0.getOpcode() == N1.getOpcode()) {
+        if (N0.getOperand(0) == N1.getOperand(0))
+          return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+        if (N0.getOperand(1) == N1.getOperand(1))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+        if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+          // If X op Y == Y op X, try other combinations.
+          if (N0.getOperand(0) == N1.getOperand(1))
+            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+                                Cond);
+          if (N0.getOperand(1) == N1.getOperand(0))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+                                Cond);
+        }
+      }
+
+      if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+        if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+          // Turn (X+C1) == C2 --> X == C2-C1
+          if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+            return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                                DAG.getConstant(RHSC->getAPIntValue()-
+                                                LHSR->getAPIntValue(),
+                                N0.getValueType()), Cond);
+          }
+
+          // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+          if (N0.getOpcode() == ISD::XOR)
+            // If we know that all of the inverted bits are zero, don't bother
+            // performing the inversion.
+            if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+              return
+                DAG.getSetCC(dl, VT, N0.getOperand(0),
+                             DAG.getConstant(LHSR->getAPIntValue() ^
+                                               RHSC->getAPIntValue(),
+                                             N0.getValueType()),
+                             Cond);
+        }
+
+        // Turn (C1-X) == C2 --> X == C1-C2
+        if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+          if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+            return
+              DAG.getSetCC(dl, VT, N0.getOperand(1),
+                           DAG.getConstant(SUBC->getAPIntValue() -
+                                             RHSC->getAPIntValue(),
+                                           N0.getValueType()),
+                           Cond);
+          }
+        }
+      }
+
+      // Simplify (X+Z) == X -->  Z == 0
+      if (N0.getOperand(0) == N1)
+        return DAG.getSetCC(dl, VT, N0.getOperand(1),
+                        DAG.getConstant(0, N0.getValueType()), Cond);
+      if (N0.getOperand(1) == N1) {
+        if (DAG.isCommutativeBinOp(N0.getOpcode()))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                          DAG.getConstant(0, N0.getValueType()), Cond);
+        else if (N0.getNode()->hasOneUse()) {
+          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // (Z-X) == X  --> Z == X<<1
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
+                                     N1,
+                       DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.getNode());
+          return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+        }
+      }
+    }
+
+    if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+        N1.getOpcode() == ISD::XOR) {
+      // Simplify  X == (X+Z) -->  Z == 0
+      if (N1.getOperand(0) == N0) {
+        return DAG.getSetCC(dl, VT, N1.getOperand(1),
+                        DAG.getConstant(0, N1.getValueType()), Cond);
+      } else if (N1.getOperand(1) == N0) {
+        if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+          return DAG.getSetCC(dl, VT, N1.getOperand(0),
+                          DAG.getConstant(0, N1.getValueType()), Cond);
+        } else if (N1.getNode()->hasOneUse()) {
+          assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // X == (Z-X)  --> X<<1 == Z
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+                       DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.getNode());
+          return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+        }
+      }
+    }
+
+    // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+    // Note that where y is variable and is known to have at most
+    // one bit set (for example, if it is z&1) we cannot do this;
+    // the expressions are not equivalent when y==0.
+    if (N0.getOpcode() == ISD::AND)
+      if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+        if (ValueHasExactlyOneBitSet(N1, DAG)) {
+          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+          SDValue Zero = DAG.getConstant(0, N1.getValueType());
+          return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+        }
+      }
+    if (N1.getOpcode() == ISD::AND)
+      if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+        if (ValueHasExactlyOneBitSet(N0, DAG)) {
+          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+          SDValue Zero = DAG.getConstant(0, N0.getValueType());
+          return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+        }
+      }
+  }
+
+  // Fold away ALL boolean setcc's.
+  SDValue Temp;
+  if (N0.getValueType() == MVT::i1 && foldBooleans) {
+    switch (Cond) {
+    default: llvm_unreachable("Unknown integer setcc!");
+    case ISD::SETEQ:  // X == Y  -> ~(X^Y)
+      Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+      N0 = DAG.getNOT(dl, Temp, MVT::i1);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETNE:  // X != Y   -->  (X^Y)
+      N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+      break;
+    case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
+    case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
+      Temp = DAG.getNOT(dl, N0, MVT::i1);
+      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
+    case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
+      Temp = DAG.getNOT(dl, N1, MVT::i1);
+      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
+    case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
+      Temp = DAG.getNOT(dl, N0, MVT::i1);
+      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
+    case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
+      Temp = DAG.getNOT(dl, N1, MVT::i1);
+      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+      break;
+    }
+    if (VT != MVT::i1) {
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(N0.getNode());
+      // FIXME: If running after legalize, we probably can't do this.
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+    }
+    return N0;
+  }
+
+  // Could not fold it.
+  return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+                                    int64_t &Offset) const {
+  if (isa<GlobalAddressSDNode>(N)) {
+    GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+    GA = GASD->getGlobal();
+    Offset += GASD->getOffset();
+    return true;
+  }
+
+  if (N->getOpcode() == ISD::ADD) {
+    SDValue N1 = N->getOperand(0);
+    SDValue N2 = N->getOperand(1);
+    if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+      if (V) {
+        Offset += V->getSExtValue();
+        return true;
+      }
+    } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+      if (V) {
+        Offset += V->getSExtValue();
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  // Default implementation: no optimization.
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'r': return C_RegisterClass;
+    case 'm':    // memory
+    case 'o':    // offsetable
+    case 'V':    // not offsetable
+      return C_Memory;
+    case 'i':    // Simple Integer or Relocatable Constant
+    case 'n':    // Simple Integer
+    case 'E':    // Floating Point Constant
+    case 'F':    // Floating Point Constant
+    case 's':    // Relocatable Constant
+    case 'p':    // Address.
+    case 'X':    // Allow ANY value.
+    case 'I':    // Target registers.
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'O':
+    case 'P':
+    case '<':
+    case '>':
+      return C_Other;
+    }
+  }
+
+  if (Constraint.size() > 1 && Constraint[0] == '{' &&
+      Constraint[Constraint.size()-1] == '}')
+    return C_Register;
+  return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+  if (ConstraintVT.isInteger())
+    return "r";
+  if (ConstraintVT.isFloatingPoint())
+    return "f";      // works for many targets
+  return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                  std::string &Constraint,
+                                                  std::vector<SDValue> &Ops,
+                                                  SelectionDAG &DAG) const {
+
+  if (Constraint.length() > 1) return;
+
+  char ConstraintLetter = Constraint[0];
+  switch (ConstraintLetter) {
+  default: break;
+  case 'X':     // Allows any operand; labels (basic block) use this.
+    if (Op.getOpcode() == ISD::BasicBlock) {
+      Ops.push_back(Op);
+      return;
+    }
+    // fall through
+  case 'i':    // Simple Integer or Relocatable Constant
+  case 'n':    // Simple Integer
+  case 's': {  // Relocatable Constant
+    // These operands are interested in values of the form (GV+C), where C may
+    // be folded in as an offset of GV, or it may be explicitly added.  Also, it
+    // is possible and fine if either GV or C are missing.
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+    // If we have "(add GV, C)", pull out GV/C
+    if (Op.getOpcode() == ISD::ADD) {
+      C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+      GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+      if (C == 0 || GA == 0) {
+        C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+        GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+      }
+      if (C == 0 || GA == 0)
+        C = 0, GA = 0;
+    }
+
+    // If we find a valid operand, map to the TargetXXX version so that the
+    // value itself doesn't get selected.
+    if (GA) {   // Either &GV   or   &GV+C
+      if (ConstraintLetter != 'n') {
+        int64_t Offs = GA->getOffset();
+        if (C) Offs += C->getZExtValue();
+        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+                                                 C ? C->getDebugLoc() : DebugLoc(),
+                                                 Op.getValueType(), Offs));
+        return;
+      }
+    }
+    if (C) {   // just C, no GV.
+      // Simple constants are not allowed for 's'.
+      if (ConstraintLetter != 's') {
+        // gcc prints these as sign extended.  Sign extend value to 64 bits
+        // now; without this it would get ZExt'd later in
+        // ScheduleDAGSDNodes::EmitNode, which is very generic.
+        Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+                                            MVT::i64));
+        return;
+      }
+    }
+    break;
+  }
+  }
+}
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint[0] != '{')
+    return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+  assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+  // Remove the braces from around the name.
+  StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+  // Figure out which register class contains this reg.
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+       E = RI->regclass_end(); RCI != E; ++RCI) {
+    const TargetRegisterClass *RC = *RCI;
+
+    // If none of the value types for this register class are valid, we
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    bool isLegal = false;
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (isTypeLegal(*I)) {
+        isLegal = true;
+        break;
+      }
+    }
+
+    if (!isLegal) continue;
+
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+         I != E; ++I) {
+      if (RegName.equals_lower(RI->getName(*I)))
+        return std::make_pair(*I, RC);
+    }
+  }
+
+  return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+  assert(!ConstraintCode.empty() && "No known constraint!");
+  return isdigit(ConstraintCode[0]);
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+  assert(!ConstraintCode.empty() && "No known constraint!");
+  return atoi(ConstraintCode.c_str());
+}
+
+
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+    ImmutableCallSite CS) const {
+  /// ConstraintOperands - Information about all of the constraints.
+  AsmOperandInfoVector ConstraintOperands;
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+  unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  InlineAsm::ConstraintInfoVector
+    ConstraintInfos = IA->ParseConstraints();
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+    AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    // Update multiple alternative constraint count.
+    if (OpInfo.multipleAlternatives.size() > maCount)
+      maCount = OpInfo.multipleAlternatives.size();
+
+    OpInfo.ConstraintVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(!CS.getType()->isVoidTy() &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpInfo.ConstraintVT = getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    if (OpInfo.CallOperandVal) {
+      const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+      if (OpInfo.isIndirect) {
+        const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+        if (!PtrTy)
+          report_fatal_error("Indirect operand for inline asm not a pointer!");
+        OpTy = PtrTy->getElementType();
+      }
+
+      // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+      if (const StructType *STy = dyn_cast<StructType>(OpTy))
+        if (STy->getNumElements() == 1)
+          OpTy = STy->getElementType(0);
+
+      // If OpTy is not a single value, it may be a struct/union that we
+      // can tile with integers.
+      if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+        unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+        switch (BitSize) {
+        default: break;
+        case 1:
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+        case 128:
+          OpInfo.ConstraintVT =
+              EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+          break;
+        }
+      } else if (dyn_cast<PointerType>(OpTy)) {
+        OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+      } else {
+        OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+      }
+    }
+  }
+
+  // If we have multiple alternative constraints, select the best alternative.
+  if (ConstraintInfos.size()) {
+    if (maCount) {
+      unsigned bestMAIndex = 0;
+      int bestWeight = -1;
+      // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
+      int weight = -1;
+      unsigned maIndex;
+      // Compute the sums of the weights for each alternative, keeping track
+      // of the best (highest weight) one so far.
+      for (maIndex = 0; maIndex < maCount; ++maIndex) {
+        int weightSum = 0;
+        for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+            cIndex != eIndex; ++cIndex) {
+          AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+          if (OpInfo.Type == InlineAsm::isClobber)
+            continue;
+
+          // If this is an output operand with a matching input operand,
+          // look up the matching input. If their types mismatch, e.g. one
+          // is an integer, the other is floating point, or their sizes are
+          // different, flag it as an maCantMatch.
+          if (OpInfo.hasMatchingInput()) {
+            AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+            if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+              if ((OpInfo.ConstraintVT.isInteger() !=
+                   Input.ConstraintVT.isInteger()) ||
+                  (OpInfo.ConstraintVT.getSizeInBits() !=
+                   Input.ConstraintVT.getSizeInBits())) {
+                weightSum = -1;  // Can't match.
+                break;
+              }
+            }
+          }
+          weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+          if (weight == -1) {
+            weightSum = -1;
+            break;
+          }
+          weightSum += weight;
+        }
+        // Update best.
+        if (weightSum > bestWeight) {
+          bestWeight = weightSum;
+          bestMAIndex = maIndex;
+        }
+      }
+
+      // Now select chosen alternative in each constraint.
+      for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+          cIndex != eIndex; ++cIndex) {
+        AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+        if (cInfo.Type == InlineAsm::isClobber)
+          continue;
+        cInfo.selectAlternative(bestMAIndex);
+      }
+    }
+  }
+
+  // Check and hook up tied operands, choose constraint code to use.
+  for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+      cIndex != eIndex; ++cIndex) {
+    AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+	std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+	  getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+	std::pair<unsigned, const TargetRegisterClass*> InputRC =
+	  getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (MatchRC.second != InputRC.second)) {
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
+        }
+      }
+
+    }
+  }
+
+  return ConstraintOperands;
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+  switch (CT) {
+  default: llvm_unreachable("Unknown constraint type!");
+  case TargetLowering::C_Other:
+  case TargetLowering::C_Unknown:
+    return 0;
+  case TargetLowering::C_Register:
+    return 1;
+  case TargetLowering::C_RegisterClass:
+    return 2;
+  case TargetLowering::C_Memory:
+    return 3;
+  }
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getMultipleConstraintMatchWeight(
+    AsmOperandInfo &info, int maIndex) const {
+  InlineAsm::ConstraintCodeVector *rCodes;
+  if (maIndex >= (int)info.multipleAlternatives.size())
+    rCodes = &info.Codes;
+  else
+    rCodes = &info.multipleAlternatives[maIndex].Codes;
+  ConstraintWeight BestWeight = CW_Invalid;
+
+  // Loop over the options, keeping track of the most general one.
+  for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+    ConstraintWeight weight =
+      getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+    if (weight > BestWeight)
+      BestWeight = weight;
+  }
+
+  return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+    case 'i': // immediate integer.
+    case 'n': // immediate integer with a known value.
+      if (isa<ConstantInt>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 's': // non-explicit intregal immediate.
+      if (isa<GlobalValue>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 'E': // immediate float if host format.
+    case 'F': // immediate float.
+      if (isa<ConstantFP>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case '<': // memory operand with autodecrement.
+    case '>': // memory operand with autoincrement.
+    case 'm': // memory operand.
+    case 'o': // offsettable memory operand
+    case 'V': // non-offsettable memory operand
+      weight = CW_Memory;
+      break;
+    case 'r': // general register.
+    case 'g': // general register, memory operand or immediate integer.
+              // note: Clang converts "g" to "imr".
+      if (CallOperandVal->getType()->isIntegerTy())
+        weight = CW_Register;
+      break;
+    case 'X': // any operand.
+    default:
+      weight = CW_Default;
+      break;
+  }
+  return weight;
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+///    Other         -> immediates and magic values
+///    Register      -> one specific register
+///    RegisterClass -> a group of regs
+///    Memory        -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it.  The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory.  Because of
+/// this the heuristic we use is:
+///
+///  1) If there is an 'other' constraint, and if the operand is valid for
+///     that constraint, use it.  This makes us take advantage of 'i'
+///     constraints when available.
+///  2) Otherwise, pick the most general constraint present.  This prefers
+///     'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+                             const TargetLowering &TLI,
+                             SDValue Op, SelectionDAG *DAG) {
+  assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+  unsigned BestIdx = 0;
+  TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+  int BestGenerality = -1;
+
+  // Loop over the options, keeping track of the most general one.
+  for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+    TargetLowering::ConstraintType CType =
+      TLI.getConstraintType(OpInfo.Codes[i]);
+
+    // If this is an 'other' constraint, see if the operand is valid for it.
+    // For example, on X86 we might have an 'rI' constraint.  If the operand
+    // is an integer in the range [0..31] we want to use I (saving a load
+    // of a register), otherwise we must use 'r'.
+    if (CType == TargetLowering::C_Other && Op.getNode()) {
+      assert(OpInfo.Codes[i].size() == 1 &&
+             "Unhandled multi-letter 'other' constraint");
+      std::vector<SDValue> ResultOps;
+      TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
+                                       ResultOps, *DAG);
+      if (!ResultOps.empty()) {
+        BestType = CType;
+        BestIdx = i;
+        break;
+      }
+    }
+
+    // Things with matching constraints can only be registers, per gcc
+    // documentation.  This mainly affects "g" constraints.
+    if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+      continue;
+
+    // This constraint letter is more general than the previous one, use it.
+    int Generality = getConstraintGenerality(CType);
+    if (Generality > BestGenerality) {
+      BestType = CType;
+      BestIdx = i;
+      BestGenerality = Generality;
+    }
+  }
+
+  OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+  OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+                                            SDValue Op,
+                                            SelectionDAG *DAG) const {
+  assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+  // Single-letter constraints ('r') are very common.
+  if (OpInfo.Codes.size() == 1) {
+    OpInfo.ConstraintCode = OpInfo.Codes[0];
+    OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+  } else {
+    ChooseConstraint(OpInfo, *this, Op, DAG);
+  }
+
+  // 'X' matches anything.
+  if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+    // Labels and constants are handled elsewhere ('X' is the only thing
+    // that matches labels).  For Functions, the type here is the type of
+    // the result, which is not what we want to look at; leave them alone.
+    Value *v = OpInfo.CallOperandVal;
+    if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+      OpInfo.CallOperandVal = v;
+      return;
+    }
+
+    // Otherwise, try to resolve it to something we know about by looking at
+    // the actual operand type.
+    if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+      OpInfo.ConstraintCode = Repl;
+      OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                           const Type *Ty) const {
+  // The default implementation of this implements a conservative RISCy, r+r and
+  // r+i addr mode.
+
+  // Allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  // Only support r+r,
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  }
+
+  return true;
+}
+
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+                                       SelectionDAG &DAG) const {
+  ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+  APInt d = C->getAPIntValue();
+  assert(d != 0 && "Division by zero!");
+
+  // Shift the value upfront if it is even, so the LSB is one.
+  unsigned ShAmt = d.countTrailingZeros();
+  if (ShAmt) {
+    // TODO: For UDIV use SRL instead of SRA.
+    SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+    d = d.ashr(ShAmt);
+  }
+
+  // Calculate the multiplicative inverse, using Newton's method.
+  APInt t, xn = d;
+  while ((t = d*xn) != 1)
+    xn *= APInt(d.getBitWidth(), 2) - t;
+
+  Op2 = DAG.getConstant(xn, Op1.getValueType());
+  return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+                                  std::vector<SDNode*>* Created) const {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl= N->getDebugLoc();
+
+  // Check to see if we can do this.
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
+  APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+  APInt::ms magics = d.magic();
+
+  // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
+  SDValue Q;
+  if (isOperationLegalOrCustom(ISD::MULHS, VT))
+    Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+                    DAG.getConstant(magics.m, VT));
+  else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+    Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+                              N->getOperand(0),
+                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+  else
+    return SDValue();       // No mulhs or equvialent
+  // If d > 0 and m < 0, add the numerator
+  if (d.isStrictlyPositive() && magics.m.isNegative()) {
+    Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d.isNegative() && magics.m.isStrictlyPositive()) {
+    Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0) {
+    Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // Extract the sign bit and add it to the quotient
+  SDValue T =
+    DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+                                           getShiftAmountTy(Q.getValueType())));
+  if (Created)
+    Created->push_back(T.getNode());
+  return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                                  std::vector<SDNode*>* Created) const {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Check to see if we can do this.
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
+  // FIXME: We should use a narrower constant when the upper
+  // bits are known to be zero.
+  const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+  APInt::mu magics = N1C.magicu();
+
+  SDValue Q = N->getOperand(0);
+
+  // If the divisor is even, we can avoid using the expensive fixup by shifting
+  // the divided value upfront.
+  if (magics.a != 0 && !N1C[0]) {
+    unsigned Shift = N1C.countTrailingZeros();
+    Q = DAG.getNode(ISD::SRL, dl, VT, Q,
+                    DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+    if (Created)
+      Created->push_back(Q.getNode());
+
+    // Get magic number for the shifted divisor.
+    magics = N1C.lshr(Shift).magicu(Shift);
+    assert(magics.a == 0 && "Should use cheap fixup now");
+  }
+
+  // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
+  if (isOperationLegalOrCustom(ISD::MULHU, VT))
+    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
+  else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+    Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+                            DAG.getConstant(magics.m, VT)).getNode(), 1);
+  else
+    return SDValue();       // No mulhu or equvialent
+  if (Created)
+    Created->push_back(Q.getNode());
+
+  if (magics.a == 0) {
+    assert(magics.s < N1C.getBitWidth() &&
+           "We shouldn't generate an undefined shift!");
+    return DAG.getNode(ISD::SRL, dl, VT, Q,
+                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+  } else {
+    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+                      DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+             DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a081e3cd493f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+  : TD(TM.getTargetData()) {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 000000000000..5a253a4d97e4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,441 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+//   "Accurate Garbage Collection in an Uncooperative Environment"
+//   Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+  class ShadowStackGC : public GCStrategy {
+    /// RootChain - This is the global linked-list that contains the chain of GC
+    /// roots.
+    GlobalVariable *Head;
+
+    /// StackEntryTy - Abstract type of a link in the shadow stack.
+    ///
+    StructType *StackEntryTy;
+    StructType *FrameMapTy;
+
+    /// Roots - GC roots in the current function. Each is a pair of the
+    /// intrinsic call and its corresponding alloca.
+    std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+  public:
+    ShadowStackGC();
+
+    bool initializeCustomLowering(Module &M);
+    bool performCustomLowering(Function &F);
+
+  private:
+    bool IsNullValue(Value *V);
+    Constant *GetFrameMap(Function &F);
+    const Type* GetConcreteStackEntryType(Function &F);
+    void CollectRoots(Function &F);
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context, 
+                                        IRBuilder<> &B, Value *BasePtr,
+                                        int Idx1, const char *Name);
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+                                        IRBuilder<> &B, Value *BasePtr,
+                                        int Idx1, int Idx2, const char *Name);
+  };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+  /// EscapeEnumerator - This is a little algorithm to find all escape points
+  /// from a function so that "finally"-style code can be inserted. In addition
+  /// to finding the existing return and unwind instructions, it also (if
+  /// necessary) transforms any call instructions into invokes and sends them to
+  /// a landing pad.
+  ///
+  /// It's wrapped up in a state machine using the same transform C# uses for
+  /// 'yield return' enumerators, This transform allows it to be non-allocating.
+  class EscapeEnumerator {
+    Function &F;
+    const char *CleanupBBName;
+
+    // State.
+    int State;
+    Function::iterator StateBB, StateE;
+    IRBuilder<> Builder;
+
+  public:
+    EscapeEnumerator(Function &F, const char *N = "cleanup")
+      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+    IRBuilder<> *Next() {
+      switch (State) {
+      default:
+        return 0;
+
+      case 0:
+        StateBB = F.begin();
+        StateE = F.end();
+        State = 1;
+
+      case 1:
+        // Find all 'return' and 'unwind' instructions.
+        while (StateBB != StateE) {
+          BasicBlock *CurBB = StateBB++;
+
+          // Branches and invokes do not escape, only unwind and return do.
+          TerminatorInst *TI = CurBB->getTerminator();
+          if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+            continue;
+
+          Builder.SetInsertPoint(TI->getParent(), TI);
+          return &Builder;
+        }
+
+        State = 2;
+
+        // Find all 'call' instructions.
+        SmallVector<Instruction*,16> Calls;
+        for (Function::iterator BB = F.begin(),
+                                E = F.end(); BB != E; ++BB)
+          for (BasicBlock::iterator II = BB->begin(),
+                                    EE = BB->end(); II != EE; ++II)
+            if (CallInst *CI = dyn_cast<CallInst>(II))
+              if (!CI->getCalledFunction() ||
+                  !CI->getCalledFunction()->getIntrinsicID())
+                Calls.push_back(CI);
+
+        if (Calls.empty())
+          return 0;
+
+        // Create a cleanup block.
+        BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(),
+                                                   CleanupBBName, &F);
+        UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB);
+
+        // Transform the 'call' instructions into 'invoke's branching to the
+        // cleanup block. Go in reverse order to make prettier BB names.
+        SmallVector<Value*,16> Args;
+        for (unsigned I = Calls.size(); I != 0; ) {
+          CallInst *CI = cast<CallInst>(Calls[--I]);
+
+          // Split the basic block containing the function call.
+          BasicBlock *CallBB = CI->getParent();
+          BasicBlock *NewBB =
+            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+          // Remove the unconditional branch inserted at the end of CallBB.
+          CallBB->getInstList().pop_back();
+          NewBB->getInstList().remove(CI);
+
+          // Create a new invoke instruction.
+          Args.clear();
+          CallSite CS(CI);
+          Args.append(CS.arg_begin(), CS.arg_end());
+
+          InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
+                                              NewBB, CleanupBB,
+                                              Args, CI->getName(), CallBB);
+          II->setCallingConv(CI->getCallingConv());
+          II->setAttributes(CI->getAttributes());
+          CI->replaceAllUsesWith(II);
+          delete CI;
+        }
+
+        Builder.SetInsertPoint(UI->getParent(), UI);
+        return &Builder;
+      }
+    }
+  };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+  InitRoots = true;
+  CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+  // doInitialization creates the abstract type of this value.
+  const Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+  // Truncate the ShadowStackDescriptor if some metadata is null.
+  unsigned NumMeta = 0;
+  SmallVector<Constant*, 16> Metadata;
+  for (unsigned I = 0; I != Roots.size(); ++I) {
+    Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+    if (!C->isNullValue())
+      NumMeta = I + 1;
+    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+  }
+  Metadata.resize(NumMeta);
+
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  
+  Constant *BaseElts[] = {
+    ConstantInt::get(Int32Ty, Roots.size(), false),
+    ConstantInt::get(Int32Ty, NumMeta, false),
+  };
+
+  Constant *DescriptorElts[] = {
+    ConstantStruct::get(FrameMapTy, BaseElts),
+    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
+  };
+
+  Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
+  StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys);
+  
+  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+  //        that, short of multithreaded LLVM, it should be safe; all that is
+  //        necessary is that a simple Module::iterator loop not be invalidated.
+  //        Appending to the GlobalVariable list is safe in that sense.
+  //
+  //        All of the output passes emit globals last. The ExecutionEngine
+  //        explicitly supports adding globals to the module after
+  //        initialization.
+  //
+  //        Still, if it isn't deemed acceptable, then this transformation needs
+  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
+  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
+  //        provided a ModulePass))).
+  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+                                    GlobalVariable::InternalLinkage,
+                                    FrameMap, "__gc_" + F.getName());
+
+  Constant *GEPIndices[2] = {
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+                          };
+  return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+}
+
+const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+  // doInitialization creates the generic version of this type.
+  std::vector<Type*> EltTys;
+  EltTys.push_back(StackEntryTy);
+  for (size_t I = 0; I != Roots.size(); I++)
+    EltTys.push_back(Roots[I].second->getAllocatedType());
+  
+  return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys);
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+  // struct FrameMap {
+  //   int32_t NumRoots; // Number of roots in stack frame.
+  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
+  //   void *Meta[];     // May be absent for roots without metadata.
+  // };
+  std::vector<Type*> EltTys;
+  // 32 bits is ok up to a 32GB stack frame. :)
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  // Specifies length of variable length array. 
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  FrameMapTy = StructType::createNamed("gc_map", EltTys);
+  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+  // struct StackEntry {
+  //   ShadowStackEntry *Next; // Caller's stack entry.
+  //   FrameMap *Map;          // Pointer to constant FrameMap.
+  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
+  // };
+  
+  StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry");
+  
+  EltTys.clear();
+  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+  EltTys.push_back(FrameMapPtrTy);
+  StackEntryTy->setBody(EltTys);
+  const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+  // Get the root chain if it already exists.
+  Head = M.getGlobalVariable("llvm_gc_root_chain");
+  if (!Head) {
+    // If the root chain does not exist, insert a new one with linkonce
+    // linkage!
+    Head = new GlobalVariable(M, StackEntryPtrTy, false,
+                              GlobalValue::LinkOnceAnyLinkage,
+                              Constant::getNullValue(StackEntryPtrTy),
+                              "llvm_gc_root_chain");
+  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+    Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->isNullValue();
+  return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+  // FIXME: Account for original alignment. Could fragment the root array.
+  //   Approach 1: Null initialize empty slots at runtime. Yuck.
+  //   Approach 2: Emit a map of the array instead of just a count.
+
+  assert(Roots.empty() && "Not cleaned up?");
+
+  SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+        if (Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::gcroot) {
+            std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+              CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+            if (IsNullValue(CI->getArgOperand(1)))
+              Roots.push_back(Pair);
+            else
+              MetaRoots.push_back(Pair);
+          }
+
+  // Number roots with metadata (usually empty) at the beginning, so that the
+  // FrameMap::Meta array can be elided.
+  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+                         int Idx, int Idx2, const char *Name) {
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
+  Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+                         int Idx, const char *Name) {
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx) };
+  Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+  LLVMContext &Context = F.getContext();
+  
+  // Find calls to llvm.gcroot.
+  CollectRoots(F);
+
+  // If there are no roots in this function, then there is no need to add a
+  // stack map entry for it.
+  if (Roots.empty())
+    return false;
+
+  // Build the constant map and figure the type of the shadow stack entry.
+  Value *FrameMap = GetFrameMap(F);
+  const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+  // Build the shadow stack entry at the very start of the function.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  IRBuilder<> AtEntry(IP->getParent(), IP);
+
+  Instruction *StackEntry   = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+                                                   "gc_frame");
+
+  while (isa<AllocaInst>(IP)) ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Initialize the map pointer and load the current head of the shadow stack.
+  Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
+  Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
+                                        0,1,"gc_frame.map");
+  AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+  // After all the allocas...
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    // For each root, find the corresponding slot in the aggregate...
+    Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+    // And use it in lieu of the alloca.
+    AllocaInst *OriginalAlloca = Roots[I].second;
+    SlotPtr->takeName(OriginalAlloca);
+    OriginalAlloca->replaceAllUsesWith(SlotPtr);
+  }
+
+  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+  // really necessary (the collector would never see the intermediate state at
+  // runtime), but it's nicer not to push the half-initialized entry onto the
+  // shadow stack.
+  while (isa<StoreInst>(IP)) ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Push the entry onto the shadow stack.
+  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+                                        StackEntry,0,0,"gc_frame.next");
+  Instruction *NewHeadVal   = CreateGEP(Context, AtEntry, 
+                                        StackEntry, 0, "gc_newhead");
+  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+  AtEntry.CreateStore(NewHeadVal, Head);
+
+  // For each instruction that escapes...
+  EscapeEnumerator EE(F, "gc_cleanup");
+  while (IRBuilder<> *AtExit = EE.Next()) {
+    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+    // AtEntry, since that would make the value live for the entire function.
+    Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
+                                           "gc_frame.next");
+    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+                       AtExit->CreateStore(SavedHead, Head);
+  }
+
+  // Delete the original allocas (which are no longer used) and the intrinsic
+  // calls (which are no longer valid). Doing this last avoids invalidating
+  // iterators.
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    Roots[I].first->eraseFromParent();
+    Roots[I].second->eraseFromParent();
+  }
+
+  Roots.clear();
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 000000000000..160f38f69236
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1152 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+//   machine CFG to tightly surround their uses so that execution paths that
+//   do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+//   loop the spills are placed in the loop preheader, and restores are
+//   placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+//   If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+//   the use info for the CSRs inside the region is propagated outward in the
+//   CFG to ensure validity of the spill/restore placements. This decreases
+//   the effectiveness of shrink wrapping but does not require edge splitting
+//   in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+               cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+               cl::desc("Shrink wrap the specified function"),
+               cl::value_desc("funcname"),
+               cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+  None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+  cl::desc("Print shrink wrapping debugging information"),
+  cl::values(
+    clEnumVal(None      , "disable debug output"),
+    clEnumVal(BasicInfo , "print basic DF sets"),
+    clEnumVal(Iterations, "print SR sets for each iteration"),
+    clEnumVal(Details   , "print all DF sets"),
+    clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  if (ShrinkWrapping || ShrinkWrapFunc != "") {
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<MachineDominatorTree>();
+  }
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+//  ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+  assert(LP && "Machine loop is NULL.");
+  MachineBasicBlock* PHDR = LP->getLoopPreheader();
+  MachineLoop* PLP = LP->getParentLoop();
+  while (PLP) {
+    PHDR = PLP->getLoopPreheader();
+    PLP = PLP->getParentLoop();
+  }
+  return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+  if (LP == 0)
+    return 0;
+  MachineLoop* PLP = LP->getParentLoop();
+  while (PLP) {
+    LP = PLP;
+    PLP = PLP->getParentLoop();
+  }
+  return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+  return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+  AnticIn.clear();
+  AnticOut.clear();
+  AvailIn.clear();
+  AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+  ReturnBlocks.clear();
+  clearAnticAvailSets();
+  UsedCSRegs.clear();
+  CSRUsed.clear();
+  TLLoops.clear();
+  CSRSave.clear();
+  CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+  clearAllSets();
+  EntryBlock = 0;
+#ifndef NDEBUG
+  HasFastExitPath = false;
+#endif
+  ShrinkWrapThisFunction = ShrinkWrapping;
+  // DEBUG: enable or disable shrink wrapping for the current function
+  // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+  if (ShrinkWrapFunc != "") {
+    std::string MFName = MF->getFunction()->getNameStr();
+    ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+  }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+  DEBUG(MF = &Fn);
+
+  initShrinkWrappingInfo();
+
+  DEBUG(if (ShrinkWrapThisFunction) {
+      dbgs() << "Place CSR spills/restores for "
+             << MF->getFunction()->getName() << "\n";
+    });
+
+  if (calculateSets(Fn))
+    placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+  bool changed = false;
+
+  // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+  SmallVector<MachineBasicBlock*, 4> successors;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+    if (SUCC != MBB)
+      successors.push_back(SUCC);
+  }
+
+  unsigned i = 0, e = successors.size();
+  if (i != e) {
+    CSRegSet prevAnticOut = AnticOut[MBB];
+    MachineBasicBlock* SUCC = successors[i];
+
+    AnticOut[MBB] = AnticIn[SUCC];
+    for (++i; i != e; ++i) {
+      SUCC = successors[i];
+      AnticOut[MBB] &= AnticIn[SUCC];
+    }
+    if (prevAnticOut != AnticOut[MBB])
+      changed = true;
+  }
+
+  // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+  CSRegSet prevAnticIn = AnticIn[MBB];
+  AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+  if (prevAnticIn != AnticIn[MBB])
+    changed = true;
+  return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+  bool changed = false;
+
+  // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+  SmallVector<MachineBasicBlock*, 4> predecessors;
+  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock* PRED = *PI;
+    if (PRED != MBB)
+      predecessors.push_back(PRED);
+  }
+
+  unsigned i = 0, e = predecessors.size();
+  if (i != e) {
+    CSRegSet prevAvailIn = AvailIn[MBB];
+    MachineBasicBlock* PRED = predecessors[i];
+
+    AvailIn[MBB] = AvailOut[PRED];
+    for (++i; i != e; ++i) {
+      PRED = predecessors[i];
+      AvailIn[MBB] &= AvailOut[PRED];
+    }
+    if (prevAvailIn != AvailIn[MBB])
+      changed = true;
+  }
+
+  // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+  CSRegSet prevAvailOut = AvailOut[MBB];
+  AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+  if (prevAvailOut != AvailOut[MBB])
+    changed = true;
+  return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+  // Initialize data flow sets.
+  clearAnticAvailSets();
+
+  // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+  bool changed = true;
+  unsigned iterations = 0;
+  while (changed) {
+    changed = false;
+    ++iterations;
+    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+
+      // Calculate anticipated in, out regs at MBB from
+      // anticipated at successors of MBB.
+      changed |= calcAnticInOut(MBB);
+
+      // Calculate available in, out regs at MBB from
+      // available at predecessors of MBB.
+      changed |= calcAvailInOut(MBB);
+    }
+  }
+
+  DEBUG({
+      if (ShrinkWrapDebugging >= Details) {
+        dbgs()
+          << "-----------------------------------------------------------\n"
+          << " Antic/Avail Sets:\n"
+          << "-----------------------------------------------------------\n"
+          << "iterations = " << iterations << "\n"
+          << "-----------------------------------------------------------\n"
+          << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+          << "-----------------------------------------------------------\n";
+
+        for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+             MBBI != MBBE; ++MBBI) {
+          MachineBasicBlock* MBB = MBBI;
+          dumpSets(MBB);
+        }
+
+        dbgs()
+          << "-----------------------------------------------------------\n";
+      }
+    });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+  if (! MBB || !LP)
+    return;
+
+  std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+  for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+    MachineBasicBlock* LBB = loopBlocks[i];
+    if (LBB == MBB)
+      continue;
+    if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+      continue;
+    CSRUsed[LBB] |= CSRUsed[MBB];
+  }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+///  1. the current function has more than 500 MBBs: heuristic limit
+///     on function size to reduce compile time impact of the current
+///     iterative algorithm.
+///  2. all CSRs are used in the entry block.
+///  3. all CSRs are used in all immediate successors of the entry block.
+///  4. all CSRs are used in a subset of blocks, each of which dominates
+///     all return blocks. These blocks, taken as a subgraph of the MCFG,
+///     are equivalent to the entry block since all execution paths pass
+///     through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+  // Sets used to compute spill, restore placement sets.
+  const std::vector<CalleeSavedInfo> CSI =
+    Fn.getFrameInfo()->getCalleeSavedInfo();
+
+  // If no CSRs used, we are done.
+  if (CSI.empty()) {
+    DEBUG(if (ShrinkWrapThisFunction)
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": uses no callee-saved registers\n");
+    return false;
+  }
+
+  // Save refs to entry and return blocks.
+  EntryBlock = Fn.begin();
+  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+       MBB != E; ++MBB)
+    if (isReturnBlock(MBB))
+      ReturnBlocks.push_back(MBB);
+
+  // Determine if this function has fast exit paths.
+  DEBUG(if (ShrinkWrapThisFunction)
+          findFastExitPath());
+
+  // Limit shrink wrapping via the current iterative bit vector
+  // implementation to functions with <= 500 MBBs.
+  if (Fn.size() > 500) {
+    DEBUG(if (ShrinkWrapThisFunction)
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": too large (" << Fn.size() << " MBBs)\n");
+    ShrinkWrapThisFunction = false;
+  }
+
+  // Return now if not shrink wrapping.
+  if (! ShrinkWrapThisFunction)
+    return false;
+
+  // Collect set of used CSRs.
+  for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+    UsedCSRegs.set(inx);
+  }
+
+  // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+  // whether or not to shrink wrap this function.
+  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+  bool allCSRUsesInEntryBlock = true;
+  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+       MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock* MBB = MBBI;
+    for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+      for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+        unsigned Reg = CSI[inx].getReg();
+        // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+        // CSRUsed map for the current block.
+        for (unsigned opInx = 0, opEnd = I->getNumOperands();
+             opInx != opEnd; ++opInx) {
+          const MachineOperand &MO = I->getOperand(opInx);
+          if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+            continue;
+          unsigned MOReg = MO.getReg();
+          if (!MOReg)
+            continue;
+          if (MOReg == Reg ||
+              (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+               TargetRegisterInfo::isPhysicalRegister(Reg) &&
+               TRI->isSubRegister(Reg, MOReg))) {
+            // CSR Reg is defined/used in block MBB.
+            CSRUsed[MBB].set(inx);
+            // Check for uses in EntryBlock.
+            if (MBB != EntryBlock)
+              allCSRUsesInEntryBlock = false;
+          }
+        }
+      }
+    }
+
+    if (CSRUsed[MBB].empty())
+      continue;
+
+    // Propagate CSRUsed[MBB] in loops
+    if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+      // Add top level loop to work list.
+      MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+      MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+      if (! HDR) {
+        HDR = PLP->getHeader();
+        assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+        MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+        HDR = *PI;
+      }
+      TLLoops[HDR] = PLP;
+
+      // Push uses from inside loop to its parent loops,
+      // or to all other MBBs in its loop.
+      if (LP->getLoopDepth() > 1) {
+        for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+             PLP = PLP->getParentLoop()) {
+          propagateUsesAroundLoop(MBB, PLP);
+        }
+      } else {
+        propagateUsesAroundLoop(MBB, LP);
+      }
+    }
+  }
+
+  if (allCSRUsesInEntryBlock) {
+    DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                 << ": all CSRs used in EntryBlock\n");
+    ShrinkWrapThisFunction = false;
+  } else {
+    bool allCSRsUsedInEntryFanout = true;
+    for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+           SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+      MachineBasicBlock* SUCC = *SI;
+      if (CSRUsed[SUCC] != UsedCSRegs)
+        allCSRsUsedInEntryFanout = false;
+    }
+    if (allCSRsUsedInEntryFanout) {
+      DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": all CSRs used in imm successors of EntryBlock\n");
+      ShrinkWrapThisFunction = false;
+    }
+  }
+
+  if (ShrinkWrapThisFunction) {
+    // Check if MBB uses CSRs and dominates all exit nodes.
+    // Such nodes are equiv. to the entry node w.r.t.
+    // CSR uses: every path through the function must
+    // pass through this node. If each CSR is used at least
+    // once by these nodes, shrink wrapping is disabled.
+    CSRegSet CSRUsedInChokePoints;
+    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+      if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+        continue;
+      bool dominatesExitNodes = true;
+      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+        if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+          dominatesExitNodes = false;
+          break;
+        }
+      if (dominatesExitNodes) {
+        CSRUsedInChokePoints |= CSRUsed[MBB];
+        if (CSRUsedInChokePoints == UsedCSRegs) {
+          DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                       << ": all CSRs used in choke point(s) at "
+                       << getBasicBlockName(MBB) << "\n");
+          ShrinkWrapThisFunction = false;
+          break;
+        }
+      }
+    }
+  }
+
+  // Return now if we have decided not to apply shrink wrapping
+  // to the current function.
+  if (! ShrinkWrapThisFunction)
+    return false;
+
+  DEBUG({
+      dbgs() << "ENABLED: " << Fn.getFunction()->getName();
+      if (HasFastExitPath)
+        dbgs() << " (fast exit path)";
+      dbgs() << "\n";
+      if (ShrinkWrapDebugging >= BasicInfo) {
+        dbgs() << "------------------------------"
+             << "-----------------------------\n";
+        dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+        if (ShrinkWrapDebugging >= Details) {
+          dbgs() << "------------------------------"
+               << "-----------------------------\n";
+          dumpAllUsed();
+        }
+      }
+    });
+
+  // Build initial DF sets to determine minimal regions in the
+  // Machine CFG around which CSRs must be spilled and restored.
+  calculateAnticAvail(Fn);
+
+  return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+                               SmallVector<MachineBasicBlock*, 4>& blks) {
+  if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+    bool processThisBlock = false;
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           SE = MBB->succ_end(); SI != SE; ++SI) {
+      MachineBasicBlock* SUCC = *SI;
+      if (SUCC->pred_size() > 1) {
+        processThisBlock = true;
+        break;
+      }
+    }
+    if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+        MachineBasicBlock* PRED = *PI;
+        if (PRED->succ_size() > 1) {
+          processThisBlock = true;
+          break;
+        }
+      }
+    }
+    if (! processThisBlock)
+      return false;
+  }
+
+  CSRegSet prop;
+  if (!CSRSave[MBB].empty())
+    prop = CSRSave[MBB];
+  else if (!CSRRestore[MBB].empty())
+    prop = CSRRestore[MBB];
+  else
+    prop = CSRUsed[MBB];
+  if (prop.empty())
+    return false;
+
+  // Propagate selected bits to successors, predecessors of MBB.
+  bool addedUses = false;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+    // Self-loop
+    if (SUCC == MBB)
+      continue;
+    if (! CSRUsed[SUCC].contains(prop)) {
+      CSRUsed[SUCC] |= prop;
+      addedUses = true;
+      blks.push_back(SUCC);
+      DEBUG(if (ShrinkWrapDebugging >= Iterations)
+              dbgs() << getBasicBlockName(MBB)
+                   << "(" << stringifyCSRegSet(prop) << ")->"
+                   << "successor " << getBasicBlockName(SUCC) << "\n");
+    }
+  }
+  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock* PRED = *PI;
+    // Self-loop
+    if (PRED == MBB)
+      continue;
+    if (! CSRUsed[PRED].contains(prop)) {
+      CSRUsed[PRED] |= prop;
+      addedUses = true;
+      blks.push_back(PRED);
+      DEBUG(if (ShrinkWrapDebugging >= Iterations)
+              dbgs() << getBasicBlockName(MBB)
+                   << "(" << stringifyCSRegSet(prop) << ")->"
+                   << "predecessor " << getBasicBlockName(PRED) << "\n");
+    }
+  }
+  return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+  bool addedUses = false;
+
+  // Place restores for top level loops where needed.
+  for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+         I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+    MachineBasicBlock* MBB = I->first;
+    MachineLoop* LP = I->second;
+    MachineBasicBlock* HDR = LP->getHeader();
+    SmallVector<MachineBasicBlock*, 4> exitBlocks;
+    CSRegSet loopSpills;
+
+    loopSpills = CSRSave[MBB];
+    if (CSRSave[MBB].empty()) {
+      loopSpills = CSRUsed[HDR];
+      assert(!loopSpills.empty() && "No CSRs used in loop?");
+    } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+      continue;
+
+    LP->getExitBlocks(exitBlocks);
+    assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+    for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+      MachineBasicBlock* EXB = exitBlocks[i];
+      if (! CSRUsed[EXB].contains(loopSpills)) {
+        CSRUsed[EXB] |= loopSpills;
+        addedUses = true;
+        DEBUG(if (ShrinkWrapDebugging >= Iterations)
+                dbgs() << "LOOP " << getBasicBlockName(MBB)
+                     << "(" << stringifyCSRegSet(loopSpills) << ")->"
+                     << getBasicBlockName(EXB) << "\n");
+        if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+          blks.push_back(EXB);
+      }
+    }
+  }
+  return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+                              SmallVector<MachineBasicBlock*, 4> &blks,
+                              CSRegBlockMap &prevSpills) {
+  bool placedSpills = false;
+  // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+  CSRegSet anticInPreds;
+  SmallVector<MachineBasicBlock*, 4> predecessors;
+  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock* PRED = *PI;
+    if (PRED != MBB)
+      predecessors.push_back(PRED);
+  }
+  unsigned i = 0, e = predecessors.size();
+  if (i != e) {
+    MachineBasicBlock* PRED = predecessors[i];
+    anticInPreds = UsedCSRegs - AnticIn[PRED];
+    for (++i; i != e; ++i) {
+      PRED = predecessors[i];
+      anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+    }
+  } else {
+    // Handle uses in entry blocks (which have no predecessors).
+    // This is necessary because the DFA formulation assumes the
+    // entry and (multiple) exit nodes cannot have CSR uses, which
+    // is not the case in the real world.
+    anticInPreds = UsedCSRegs;
+  }
+  // Compute spills required at MBB:
+  CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+  if (! CSRSave[MBB].empty()) {
+    if (MBB == EntryBlock) {
+      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+        CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+    } else {
+      // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+      if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+        CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+      }
+    }
+  }
+  placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+  prevSpills[MBB] = CSRSave[MBB];
+  // Remember this block for adding restores to successor
+  // blocks for multi-entry region.
+  if (placedSpills)
+    blks.push_back(MBB);
+
+  DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+  return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+                                SmallVector<MachineBasicBlock*, 4> &blks,
+                                CSRegBlockMap &prevRestores) {
+  bool placedRestores = false;
+  // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+  CSRegSet availOutSucc;
+  SmallVector<MachineBasicBlock*, 4> successors;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+    if (SUCC != MBB)
+      successors.push_back(SUCC);
+  }
+  unsigned i = 0, e = successors.size();
+  if (i != e) {
+    MachineBasicBlock* SUCC = successors[i];
+    availOutSucc = UsedCSRegs - AvailOut[SUCC];
+    for (++i; i != e; ++i) {
+      SUCC = successors[i];
+      availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+    }
+  } else {
+    if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+      // Handle uses in return blocks (which have no successors).
+      // This is necessary because the DFA formulation assumes the
+      // entry and (multiple) exit nodes cannot have CSR uses, which
+      // is not the case in the real world.
+      availOutSucc = UsedCSRegs;
+    }
+  }
+  // Compute restores required at MBB:
+  CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+  // Postprocess restore placements at MBB.
+  // Remove the CSRs that are restored in the return blocks.
+  // Lest this be confusing, note that:
+  // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+  if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+    if (! CSRSave[EntryBlock].empty())
+      CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+  }
+  placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+  prevRestores[MBB] = CSRRestore[MBB];
+  // Remember this block for adding saves to predecessor
+  // blocks for multi-entry region.
+  if (placedRestores)
+    blks.push_back(MBB);
+
+  DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+          dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+  return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+  CSRegBlockMap prevCSRSave;
+  CSRegBlockMap prevCSRRestore;
+  SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+  bool changed = true;
+  unsigned iterations = 0;
+
+  // Iterate computation of spill and restore placements in the MCFG until:
+  //   1. CSR use info has been fully propagated around the MCFG, and
+  //   2. computation of CSRSave[], CSRRestore[] reach fixed points.
+  while (changed) {
+    changed = false;
+    ++iterations;
+
+    DEBUG(if (ShrinkWrapDebugging >= Iterations)
+            dbgs() << "iter " << iterations
+                 << " --------------------------------------------------\n");
+
+    // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+    // which determines the placements of spills and restores.
+    // Keep track of changes to spills, restores in each iteration to
+    // minimize the total iterations.
+    bool SRChanged = false;
+    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+
+      // Place spills for CSRs in MBB.
+      SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+      // Place restores for CSRs in MBB.
+      SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+    }
+
+    // Add uses of CSRs used inside loops where needed.
+    changed |= addUsesForTopLevelLoops(cvBlocks);
+
+    // Add uses for CSRs spilled or restored at branch, join points.
+    if (changed || SRChanged) {
+      while (! cvBlocks.empty()) {
+        MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+        changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+      }
+      if (! ncvBlocks.empty()) {
+        cvBlocks = ncvBlocks;
+        ncvBlocks.clear();
+      }
+    }
+
+    if (changed) {
+      calculateAnticAvail(Fn);
+      CSRSave.clear();
+      CSRRestore.clear();
+    }
+  }
+
+  // Check for effectiveness:
+  //  SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+  //  numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+  // Gives a measure of how many CSR spills have been moved from EntryBlock
+  // to minimal regions enclosing their uses.
+  CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+  unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+  numSRReduced += numSRReducedThisFunc;
+  DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+      dbgs() << "-----------------------------------------------------------\n";
+      dbgs() << "total iterations = " << iterations << " ( "
+           << Fn.getFunction()->getName()
+           << " " << numSRReducedThisFunc
+           << " " << Fn.size()
+           << " )\n";
+      dbgs() << "-----------------------------------------------------------\n";
+      dumpSRSets();
+      dbgs() << "-----------------------------------------------------------\n";
+      if (numSRReducedThisFunc)
+        verifySpillRestorePlacement();
+    });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+  if (! EntryBlock)
+    return;
+  // Fina a path from EntryBlock to any return block that does not branch:
+  //        Entry
+  //          |     ...
+  //          v      |
+  //         B1<-----+
+  //          |
+  //          v
+  //       Return
+  for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+         SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+
+    // Assume positive, disprove existence of fast path.
+    HasFastExitPath = true;
+
+    // Check the immediate successors.
+    if (isReturnBlock(SUCC)) {
+      if (ShrinkWrapDebugging >= BasicInfo)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+             << "->" << getBasicBlockName(SUCC) << "\n";
+      break;
+    }
+    // Traverse df from SUCC, look for a branch block.
+    std::string exitPath = getBasicBlockName(SUCC);
+    for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+           BE = df_end(SUCC); BI != BE; ++BI) {
+      MachineBasicBlock* SBB = *BI;
+      // Reject paths with branch nodes.
+      if (SBB->succ_size() > 1) {
+        HasFastExitPath = false;
+        break;
+      }
+      exitPath += "->" + getBasicBlockName(SBB);
+    }
+    if (HasFastExitPath) {
+      if (ShrinkWrapDebugging >= BasicInfo)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+             << "->" << exitPath << "\n";
+      break;
+    }
+  }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+///         all CSRs spilled at MMBB are restored on all paths
+///         from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+///           all CSRs restored at MBB are spilled on all paths
+///           reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+  unsigned numReturnBlocks = 0;
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock* MBB = MBBI;
+    if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+      ++numReturnBlocks;
+  }
+  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+         BE = CSRSave.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet spilled = BI->second;
+    CSRegSet restored;
+
+    if (spilled.empty())
+      continue;
+
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(spilled)
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+    if (CSRRestore[MBB].intersects(spilled)) {
+      restored |= (CSRRestore[MBB] & spilled);
+    }
+
+    // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+    // we must find restores for all spills w/no intervening spills on all
+    // paths from MBB to all return blocks.
+    for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+           BE = df_end(MBB); BI != BE; ++BI) {
+      MachineBasicBlock* SBB = *BI;
+      if (SBB == MBB)
+        continue;
+      // Stop when we encounter spills of any CSRs spilled at MBB that
+      // have not yet been seen to be restored.
+      if (CSRSave[SBB].intersects(spilled) &&
+          !restored.contains(CSRSave[SBB] & spilled))
+        break;
+      // Collect the CSRs spilled at MBB that are restored
+      // at this DF successor of MBB.
+      if (CSRRestore[SBB].intersects(spilled))
+        restored |= (CSRRestore[SBB] & spilled);
+      // If we are at a retun block, check that the restores
+      // we have seen so far exhaust the spills at MBB, then
+      // reset the restores.
+      if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+        if (restored != spilled) {
+          CSRegSet notRestored = (spilled - restored);
+          DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+                       << stringifyCSRegSet(notRestored)
+                       << " spilled at " << getBasicBlockName(MBB)
+                       << " are never restored on path to return "
+                       << getBasicBlockName(SBB) << "\n");
+        }
+        restored.clear();
+      }
+    }
+  }
+
+  // Check restore placements.
+  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+         BE = CSRRestore.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet restored = BI->second;
+    CSRegSet spilled;
+
+    if (restored.empty())
+      continue;
+
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB])
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(restored) << "\n");
+
+    if (CSRSave[MBB].intersects(restored)) {
+      spilled |= (CSRSave[MBB] & restored);
+    }
+    // Walk inverse depth first from MBB to find spills of all
+    // CSRs restored at MBB:
+    for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+           BE = idf_end(MBB); BI != BE; ++BI) {
+      MachineBasicBlock* PBB = *BI;
+      if (PBB == MBB)
+        continue;
+      // Stop when we encounter restores of any CSRs restored at MBB that
+      // have not yet been seen to be spilled.
+      if (CSRRestore[PBB].intersects(restored) &&
+          !spilled.contains(CSRRestore[PBB] & restored))
+        break;
+      // Collect the CSRs restored at MBB that are spilled
+      // at this DF predecessor of MBB.
+      if (CSRSave[PBB].intersects(restored))
+        spilled |= (CSRSave[PBB] & restored);
+    }
+    if (spilled != restored) {
+      CSRegSet notSpilled = (restored - spilled);
+      DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+                   << stringifyCSRegSet(notSpilled)
+                   << " restored at " << getBasicBlockName(MBB)
+                   << " are never spilled\n");
+    }
+  }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+  if (!MBB)
+    return "";
+
+  if (MBB->getBasicBlock())
+    return MBB->getBasicBlock()->getNameStr();
+
+  std::ostringstream name;
+  name << "_MBB_" << MBB->getNumber();
+  return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+  const std::vector<CalleeSavedInfo> CSI =
+    MF->getFrameInfo()->getCalleeSavedInfo();
+
+  std::ostringstream srep;
+  if (CSI.size() == 0) {
+    srep << "[]";
+    return srep.str();
+  }
+  srep << "[";
+  CSRegSet::iterator I = s.begin(), E = s.end();
+  if (I != E) {
+    unsigned reg = CSI[*I].getReg();
+    srep << TRI->getName(reg);
+    for (++I; I != E; ++I) {
+      reg = CSI[*I].getReg();
+      srep << ",";
+      srep << TRI->getName(reg);
+    }
+  }
+  srep << "]";
+  return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+  DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+  DEBUG({
+      if (MBB)
+        dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
+    });
+}
+
+void PEI::dumpAllUsed() {
+    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+      dumpUsed(MBB);
+    }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+  DEBUG({
+      if (MBB)
+        dbgs() << getBasicBlockName(MBB)           << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])  << " | "
+               << stringifyCSRegSet(AnticIn[MBB])  << " | "
+               << stringifyCSRegSet(AnticOut[MBB]) << " | "
+               << stringifyCSRegSet(AvailIn[MBB])  << " | "
+               << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+    });
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+  DEBUG({
+      if (MBB)
+        dbgs() << getBasicBlockName(MBB)             << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])    << " | "
+               << stringifyCSRegSet(AnticIn[MBB])    << " | "
+               << stringifyCSRegSet(AnticOut[MBB])   << " | "
+               << stringifyCSRegSet(AvailIn[MBB])    << " | "
+               << stringifyCSRegSet(AvailOut[MBB])   << " | "
+               << stringifyCSRegSet(CSRSave[MBB])    << " | "
+               << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+    });
+}
+
+void PEI::dumpAllSets() {
+    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+      dumpSets1(MBB);
+    }
+}
+
+void PEI::dumpSRSets() {
+  DEBUG({
+      for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+           MBB != E; ++MBB) {
+        if (!CSRSave[MBB].empty()) {
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB]);
+          if (CSRRestore[MBB].empty())
+            dbgs() << '\n';
+        }
+
+        if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+          dbgs() << "    "
+                 << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+      }
+    });
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 000000000000..65a33da93afe
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,588 @@
+//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+  class SjLjEHPass : public FunctionPass {
+
+    const TargetLowering *TLI;
+
+    const Type *FunctionContextTy;
+    Constant *RegisterFn;
+    Constant *UnregisterFn;
+    Constant *BuiltinSetjmpFn;
+    Constant *FrameAddrFn;
+    Constant *StackAddrFn;
+    Constant *StackRestoreFn;
+    Constant *LSDAAddrFn;
+    Value *PersonalityFn;
+    Constant *SelectorFn;
+    Constant *ExceptionFn;
+    Constant *CallSiteFn;
+    Constant *DispatchSetupFn;
+
+    Value *CallSite;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit SjLjEHPass(const TargetLowering *tli = NULL)
+      : FunctionPass(ID), TLI(tli) { }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    const char *getPassName() const {
+      return "SJLJ Exception Handling preparation";
+    }
+
+  private:
+    void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
+    void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
+                            SwitchInst *CatchSwitch);
+    void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+    bool insertSjLjEHSupport(Function &F);
+  };
+} // end anonymous namespace
+
+char SjLjEHPass::ID = 0;
+
+// Public Interface To the SjLjEHPass pass.
+FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
+  return new SjLjEHPass(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPass::doInitialization(Module &M) {
+  // Build the function context structure.
+  // builtin_setjmp uses a five word jbuf
+  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+  Type *Int32Ty = Type::getInt32Ty(M.getContext());
+  FunctionContextTy =
+    StructType::get(VoidPtrTy,                        // __prev
+                    Int32Ty,                          // call_site
+                    ArrayType::get(Int32Ty, 4),       // __data
+                    VoidPtrTy,                        // __personality
+                    VoidPtrTy,                        // __lsda
+                    ArrayType::get(VoidPtrTy, 5),     // __jbuf
+                    NULL);
+  RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+                                     Type::getVoidTy(M.getContext()),
+                                     PointerType::getUnqual(FunctionContextTy),
+                                     (Type *)0);
+  UnregisterFn =
+    M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+                          Type::getVoidTy(M.getContext()),
+                          PointerType::getUnqual(FunctionContextTy),
+                          (Type *)0);
+  FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+  StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+  StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+  BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+  LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+  SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
+  ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+  CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+  DispatchSetupFn
+    = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
+  PersonalityFn = 0;
+
+  return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
+                                     Value *CallSite) {
+  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                              Number);
+  // Insert a store of the call-site number
+  new StoreInst(CallSiteNoC, CallSite, true, I);  // volatile
+}
+
+/// markInvokeCallSite - Insert code to mark the call_site for this invoke
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
+                                    Value *CallSite,
+                                    SwitchInst *CatchSwitch) {
+  ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                              InvokeNo);
+  // The runtime comes back to the dispatcher with the call_site - 1 in
+  // the context. Odd, but there it is.
+  ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo - 1);
+
+  // If the unwind edge has phi nodes, split the edge.
+  if (isa<PHINode>(II->getUnwindDest()->begin())) {
+    SplitCriticalEdge(II, 1, this);
+
+    // If there are any phi nodes left, they must have a single predecessor.
+    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      PN->eraseFromParent();
+    }
+  }
+
+  // Insert the store of the call site value
+  insertCallSiteStore(II, InvokeNo, CallSite);
+
+  // Record the call site value for the back end so it stays associated with
+  // the invoke.
+  CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
+
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+  // We still want this to look like an invoke so we emit the LSDA properly,
+  // so we don't transform the invoke into a call here.
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
+/// we spill into a stack location, guaranteeing that there is nothing live
+/// across the unwind edge.  This process also splits all critical edges
+/// coming out of invoke's.
+/// FIXME: Move this function to a common utility file (Local.cpp?) so
+/// both SjLj and LowerInvoke can use it.
+void SjLjEHPass::
+splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    const Type *Ty = AI->getType();
+    // Aggregate types can't be cast, but are legal argument types, so we have
+    // to handle them differently. We use an extract/insert pair as a
+    // lightweight method to achieve the same goal.
+    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+      NI->insertAfter(EI);
+      AI->replaceAllUsesWith(NI);
+      // Set the operand of the instructions back to the AllocaInst.
+      EI->setOperand(0, AI);
+      NI->setOperand(0, AI);
+    } else {
+      // This is always a no-op cast because we're casting AI to AI->getType()
+      // so src and destination types are identical. BitCast is the only
+      // possibility.
+      CastInst *NC = new BitCastInst(
+        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+      AI->replaceAllUsesWith(NC);
+      // Set the operand of the cast instruction back to the AllocaInst.
+      // Normally it's forbidden to replace a CastInst's operand because it
+      // could cause the opcode to reflect an illegal conversion. However,
+      // we're replacing it here with the same value it was constructed with.
+      // We do this because the above replaceAllUsesWith() clobbered the
+      // operand, but we want this one to remain.
+      NC->setOperand(0, AI);
+    }
+  }
+
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      SmallVector<Instruction*,16> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      // FIXME: Spilling this way is overkill, as it forces all uses of
+      // the value to be reloaded from the stack slot, even those that aren't
+      // in the unwind blocks. We should be more selective.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
+  SmallVector<ReturnInst*,16> Returns;
+  SmallVector<UnwindInst*,16> Unwinds;
+  SmallVector<InvokeInst*,16> Invokes;
+
+  // Look through the terminators of the basic blocks to find invokes, returns
+  // and unwinds.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+  }
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  // If we don't have any invokes, there's nothing to do.
+  if (Invokes.empty()) return false;
+
+  // Find the eh.selector.*, eh.exception and alloca calls.
+  //
+  // Remember any allocas() that aren't in the entry block, as the
+  // jmpbuf saved SP will need to be updated for them.
+  //
+  // We'll use the first eh.selector to determine the right personality
+  // function to use. For SJLJ, we always use the same personality for the
+  // whole function, not on a per-selector basis.
+  // FIXME: That's a bit ugly. Better way?
+  SmallVector<CallInst*,16> EH_Selectors;
+  SmallVector<CallInst*,16> EH_Exceptions;
+  SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
+  // Note: Skip the entry block since there's nothing there that interests
+  // us. eh.selector and eh.exception shouldn't ever be there, and we
+  // want to disregard any allocas that are there.
+  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (CI->getCalledFunction() == SelectorFn) {
+          if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
+          EH_Selectors.push_back(CI);
+        } else if (CI->getCalledFunction() == ExceptionFn) {
+          EH_Exceptions.push_back(CI);
+        } else if (CI->getCalledFunction() == StackRestoreFn) {
+          JmpbufUpdatePoints.push_back(CI);
+        }
+      } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+        JmpbufUpdatePoints.push_back(AI);
+      }
+    }
+  }
+
+  // If we don't have any eh.selector calls, we can't determine the personality
+  // function. Without a personality function, we can't process exceptions.
+  if (!PersonalityFn) return false;
+
+  // We have invokes, so we need to add register/unregister calls to get this
+  // function onto the global unwind stack.
+  //
+  // First thing we need to do is scan the whole function for values that are
+  // live across unwind edges.  Each value that is live across an unwind edge we
+  // spill into a stack location, guaranteeing that there is nothing live across
+  // the unwind edge.  This process also splits all critical edges coming out of
+  // invoke's.
+  splitLiveRangesAcrossInvokes(Invokes);
+
+  BasicBlock *EntryBB = F.begin();
+  // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+  // that needs to be restored on all exits from the function.  This is an
+  // alloca because the value needs to be added to the global context list.
+  unsigned Align = 4; // FIXME: Should be a TLI check?
+  AllocaInst *FunctionContext =
+    new AllocaInst(FunctionContextTy, 0, Align,
+                   "fcn_context", F.begin()->begin());
+
+  Value *Idxs[2];
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  Value *Zero = ConstantInt::get(Int32Ty, 0);
+  // We need to also keep around a reference to the call_site field
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                       "call_site",
+                                       EntryBB->getTerminator());
+
+  // The exception selector comes back in context->data[1]
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                            "fc_data",
+                                            EntryBB->getTerminator());
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                  "exc_selector_gep",
+                                                  EntryBB->getTerminator());
+  // The exception value comes back in context->data[0]
+  Idxs[1] = Zero;
+  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                   "exception_gep",
+                                                   EntryBB->getTerminator());
+
+  // The result of the eh.selector call will be replaced with a a reference to
+  // the selector value returned in the function context. We leave the selector
+  // itself so the EH analysis later can use it.
+  for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+    CallInst *I = EH_Selectors[i];
+    Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+    I->replaceAllUsesWith(SelectorVal);
+  }
+
+  // eh.exception calls are replaced with references to the proper location in
+  // the context. Unlike eh.selector, the eh.exception calls are removed
+  // entirely.
+  for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+    CallInst *I = EH_Exceptions[i];
+    // Possible for there to be duplicates, so check to make sure the
+    // instruction hasn't already been removed.
+    if (!I->getParent()) continue;
+    Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+    const Type *Ty = Type::getInt8PtrTy(F.getContext());
+    Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+    I->replaceAllUsesWith(Val);
+    I->eraseFromParent();
+  }
+
+  // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+  // branch to a dispatch block for non-zero returns. If we return normally,
+  // we're not handling an exception and just register the function context and
+  // continue.
+
+  // Create the dispatch block.  The dispatch block is basically a big switch
+  // statement that goes to all of the invoke landing pads.
+  BasicBlock *DispatchBlock =
+    BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+  // Insert a load of the callsite in the dispatch block, and a switch on its
+  // value. By default, we issue a trap statement.
+  BasicBlock *TrapBlock =
+    BasicBlock::Create(F.getContext(), "trapbb", &F);
+  CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
+                   "", TrapBlock);
+  new UnreachableInst(F.getContext(), TrapBlock);
+
+  Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+                                     DispatchBlock);
+  SwitchInst *DispatchSwitch =
+    SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
+                       DispatchBlock);
+  // Split the entry block to insert the conditional branch for the setjmp.
+  BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                   "eh.sjlj.setjmp.cont");
+
+  // Populate the Function Context
+  //   1. LSDA address
+  //   2. Personality function address
+  //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+  // LSDA address
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 4);
+  Value *LSDAFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+                                 EntryBB->getTerminator());
+  new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+  Idxs[1] = ConstantInt::get(Int32Ty, 3);
+  Value *PersonalityFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+                EntryBB->getTerminator());
+
+  // Save the frame pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 5);
+  Value *JBufPtr
+    = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "jbuf_gep",
+                                EntryBB->getTerminator());
+  Idxs[1] = ConstantInt::get(Int32Ty, 0);
+  Value *FramePtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+                              EntryBB->getTerminator());
+
+  Value *Val = CallInst::Create(FrameAddrFn,
+                                ConstantInt::get(Int32Ty, 0),
+                                "fp",
+                                EntryBB->getTerminator());
+  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+  // Save the stack pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *StackPtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+                              EntryBB->getTerminator());
+
+  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+  Value *SetjmpArg =
+    CastInst::Create(Instruction::BitCast, JBufPtr,
+                     Type::getInt8PtrTy(F.getContext()), "",
+                     EntryBB->getTerminator());
+  Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+                                        "dispatch",
+                                        EntryBB->getTerminator());
+
+  // Add a call to dispatch_setup after the setjmp call. This is expanded to any
+  // target-specific setup that needs to be done.
+  CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
+
+  // check the return value of the setjmp. non-zero goes to dispatcher.
+  Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                 ICmpInst::ICMP_EQ, DispatchVal, Zero,
+                                 "notunwind");
+  // Nuke the uncond branch.
+  EntryBB->getTerminator()->eraseFromParent();
+
+  // Put in a new condbranch in its place.
+  BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+  // Register the function context and make sure it's known to not throw
+  CallInst *Register =
+    CallInst::Create(RegisterFn, FunctionContext, "",
+                     ContBlock->getTerminator());
+  Register->setDoesNotThrow();
+
+  // At this point, we are all set up, update the invoke instructions to mark
+  // their call_site values, and fill in the dispatch switch accordingly.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+    markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+  // Mark call instructions that aren't nounwind as no-action (call_site ==
+  // -1). Skip the entry block, as prior to then, no function context has been
+  // created for this function and any unexpected exceptions thrown will go
+  // directly to the caller's context, which is what we want anyway, so no need
+  // to do anything here.
+  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        // Ignore calls to the EH builtins (eh.selector, eh.exception)
+        Constant *Callee = CI->getCalledFunction();
+        if (Callee != SelectorFn && Callee != ExceptionFn
+            && !CI->doesNotThrow())
+          insertCallSiteStore(CI, -1, CallSite);
+      }
+  }
+
+  // Replace all unwinds with a branch to the unwind handler.
+  // ??? Should this ever happen with sjlj exceptions?
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    BranchInst::Create(TrapBlock, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();
+  }
+
+  // Following any allocas not in the entry block, update the saved SP in the
+  // jmpbuf to the new value.
+  for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+    Instruction *AI = JmpbufUpdatePoints[i];
+    Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+    StackAddr->insertAfter(AI);
+    Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+    StoreStackAddr->insertAfter(StackAddr);
+  }
+
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, add a call to unregister the function context.
+  for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+    CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
+  return true;
+}
+
+bool SjLjEHPass::runOnFunction(Function &F) {
+  bool Res = insertSjLjEHSupport(F);
+  return Res;
+}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 000000000000..ca79cafcf4be
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,179 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass  ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "slotindexes"
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char SlotIndexes::ID = 0;
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+                "Slot index numbering", false, false)
+
+STATISTIC(NumLocalRenum,  "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+  au.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+  mi2iMap.clear();
+  MBBRanges.clear();
+  idx2MBBMap.clear();
+  clearList();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+  // Compute numbering as follows:
+  // Grab an iterator to the start of the index list.
+  // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+  // iterator in lock-step (though skipping it over indexes which have
+  // null pointers in the instruction field).
+  // At each iteration assert that the instruction pointed to in the index
+  // is the same one pointed to by the MI iterator. This 
+
+  // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+  // only need to be set up once after the first numbering is computed.
+
+  mf = &fn;
+  initList();
+
+  // Check that the list contains only the sentinal.
+  assert(indexListHead->getNext() == 0 &&
+         "Index list non-empty at initial numbering?");
+  assert(idx2MBBMap.empty() &&
+         "Index -> MBB mapping non-empty at initial numbering?");
+  assert(MBBRanges.empty() &&
+         "MBB -> Index mapping non-empty at initial numbering?");
+  assert(mi2iMap.empty() &&
+         "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+  functionSize = 0;
+  unsigned index = 0;
+  MBBRanges.resize(mf->getNumBlockIDs());
+  idx2MBBMap.reserve(mf->size());
+
+  push_back(createEntry(0, index));
+
+  // Iterate over the function.
+  for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
+       mbbItr != mbbEnd; ++mbbItr) {
+    MachineBasicBlock *mbb = &*mbbItr;
+
+    // Insert an index for the MBB start.
+    SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+
+    for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
+         miItr != miEnd; ++miItr) {
+      MachineInstr *mi = miItr;
+      if (mi->isDebugValue())
+        continue;
+
+      // Insert a store index for the instr.
+      push_back(createEntry(mi, index += SlotIndex::InstrDist));
+
+      // Save this base index in the maps.
+      mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+ 
+      ++functionSize;
+    }
+
+    // We insert one blank instructions between basic blocks.
+    push_back(createEntry(0, index += SlotIndex::InstrDist));
+
+    MBBRanges[mbb->getNumber()].first = blockStartIndex;
+    MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
+    idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+  }
+
+  // Sort the Idx2MBBMap
+  std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+  DEBUG(dump());
+
+  // And we're done!
+  return false;
+}
+
+void SlotIndexes::renumberIndexes() {
+  // Renumber updates the index of every element of the index list.
+  DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+  ++NumGlobalRenum;
+
+  unsigned index = 0;
+
+  for (IndexListEntry *curEntry = front(); curEntry != getTail();
+       curEntry = curEntry->getNext()) {
+    curEntry->setIndex(index);
+    index += SlotIndex::InstrDist;
+  }
+}
+
+// Renumber indexes locally after curEntry was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) {
+  // Number indexes with half the default spacing so we can catch up quickly.
+  const unsigned Space = SlotIndex::InstrDist/2;
+  assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+
+  IndexListEntry *start = curEntry->getPrev();
+  unsigned index = start->getIndex();
+  IndexListEntry *tail = getTail();
+  do {
+    curEntry->setIndex(index += Space);
+    curEntry = curEntry->getNext();
+    // If the next index is bigger, we have caught up.
+  } while (curEntry != tail && curEntry->getIndex() <= index);
+
+  DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-'
+               << index << " ***\n");
+  ++NumLocalRenum;
+}
+
+
+void SlotIndexes::dump() const {
+  for (const IndexListEntry *itr = front(); itr != getTail();
+       itr = itr->getNext()) {
+    dbgs() << itr->getIndex() << " ";
+
+    if (itr->getInstr() != 0) {
+      dbgs() << *itr->getInstr();
+    } else {
+      dbgs() << "\n";
+    }
+  }
+
+  for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+    dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+           << MBBRanges[i].second << ")\n";
+}
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+  if (isValid())
+    os << entry().getIndex() << "LudS"[getSlot()];
+  else
+    os << "invalid";
+}
+
+// Dump a SlotIndex to stderr.
+void SlotIndex::dump() const {
+  print(dbgs());
+  dbgs() << "\n";
+}
+
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..694961863261
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,352 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+//   E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+                      "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+                    "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<EdgeBundles>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+  /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle.
+  /// Ideally, these two numbers should be identical, but inaccuracies in the
+  /// block frequency estimates means that we need to normalize ingoing and
+  /// outgoing frequencies separately so they are commensurate.
+  float Scale[2];
+
+  /// Bias - Normalized contributions from non-transparent blocks.
+  /// A bundle connected to a MustSpill block has a huge negative bias,
+  /// otherwise it is a number in the range [-2;2].
+  float Bias;
+
+  /// Value - Output value of this node computed from the Bias and links.
+  /// This is always in the range [-1;1]. A positive number means the variable
+  /// should go in a register through this bundle.
+  float Value;
+
+  typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+  /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+  /// bundles. The weights are all positive and add up to at most 2, weights
+  /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+  /// sum can be less than 2 when the variable is not live into / out of some
+  /// connected basic blocks.
+  LinkVector Links;
+
+  /// preferReg - Return true when this node prefers to be in a register.
+  bool preferReg() const {
+    // Undecided nodes (Value==0) go on the stack.
+    return Value > 0;
+  }
+
+  /// mustSpill - Return True if this node is so biased that it must spill.
+  bool mustSpill() const {
+    // Actually, we must spill if Bias < sum(weights).
+    // It may be worth it to compute the weight sum here?
+    return Bias < -2.0f;
+  }
+
+  /// Node - Create a blank Node.
+  Node() {
+    Scale[0] = Scale[1] = 0;
+  }
+
+  /// clear - Reset per-query data, but preserve frequencies that only depend on
+  // the CFG.
+  void clear() {
+    Bias = Value = 0;
+    Links.clear();
+  }
+
+  /// addLink - Add a link to bundle b with weight w.
+  /// out=0 for an ingoing link, and 1 for an outgoing link.
+  void addLink(unsigned b, float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w *= Scale[out];
+
+    // There can be multiple links to the same bundle, add them up.
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      if (I->second == b) {
+        I->first += w;
+        return;
+      }
+    // This must be the first link to b.
+    Links.push_back(std::make_pair(w, b));
+  }
+
+  /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+  /// Return the change to the total number of positive biases.
+  void addBias(float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w *= Scale[out];
+    Bias += w;
+  }
+
+  /// update - Recompute Value from Bias and Links. Return true when node
+  /// preference changes.
+  bool update(const Node nodes[]) {
+    // Compute the weighted sum of inputs.
+    float Sum = Bias;
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      Sum += I->first * nodes[I->second].Value;
+
+    // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+    // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+    // two reasons:
+    //  1. It avoids arbitrary bias when all links are 0 as is possible during
+    //     initial iterations.
+    //  2. It helps tame rounding errors when the links nominally sum to 0.
+    const float Thres = 1e-4f;
+    bool Before = preferReg();
+    if (Sum < -Thres)
+      Value = -1;
+    else if (Sum > Thres)
+      Value = 1;
+    else
+      Value = 0;
+    return Before != preferReg();
+  }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  bundles = &getAnalysis<EdgeBundles>();
+  loops = &getAnalysis<MachineLoopInfo>();
+
+  assert(!nodes && "Leaking node array");
+  nodes = new Node[bundles->getNumBundles()];
+
+  // Compute total ingoing and outgoing block frequencies for all bundles.
+  BlockFrequency.resize(mf.getNumBlockIDs());
+  for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+    float Freq = LiveIntervals::getSpillWeight(true, false,
+                                               loops->getLoopDepth(I));
+    unsigned Num = I->getNumber();
+    BlockFrequency[Num] = Freq;
+    nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq;
+    nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq;
+  }
+
+  // Scales are reciprocal frequencies.
+  for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i)
+    for (unsigned d = 0; d != 2; ++d)
+      if (nodes[i].Scale[d] > 0)
+        nodes[i].Scale[d] = 1 / nodes[i].Scale[d];
+
+  // We never change the function.
+  return false;
+}
+
+void SpillPlacement::releaseMemory() {
+  delete[] nodes;
+  nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+  if (ActiveNodes->test(n))
+    return;
+  ActiveNodes->set(n);
+  nodes[n].clear();
+}
+
+
+/// addConstraints - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+  for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
+       E = LiveBlocks.end(); I != E; ++I) {
+    float Freq = getBlockFrequency(I->Number);
+    const float Bias[] = {
+      0,           // DontCare,
+      1,           // PrefReg,
+      -1,          // PrefSpill
+      -HUGE_VALF   // MustSpill
+    };
+
+    // Live-in to block?
+    if (I->Entry != DontCare) {
+      unsigned ib = bundles->getBundle(I->Number, 0);
+      activate(ib);
+      nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+    }
+
+    // Live-out from block?
+    if (I->Exit != DontCare) {
+      unsigned ob = bundles->getBundle(I->Number, 1);
+      activate(ob);
+      nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+    }
+  }
+}
+
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+  for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
+       ++I) {
+    unsigned Number = *I;
+    unsigned ib = bundles->getBundle(Number, 0);
+    unsigned ob = bundles->getBundle(Number, 1);
+
+    // Ignore self-loops.
+    if (ib == ob)
+      continue;
+    activate(ib);
+    activate(ob);
+    if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
+      Linked.push_back(ib);
+    if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
+      Linked.push_back(ob);
+    float Freq = getBlockFrequency(Number);
+    nodes[ib].addLink(ob, Freq, 1);
+    nodes[ob].addLink(ib, Freq, 0);
+  }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+  Linked.clear();
+  RecentPositive.clear();
+  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+    nodes[n].update(nodes);
+    // A node that must spill, or a node without any links is not going to
+    // change its value ever again, so exclude it from iterations.
+    if (nodes[n].mustSpill())
+      continue;
+    if (!nodes[n].Links.empty())
+      Linked.push_back(n);
+    if (nodes[n].preferReg())
+      RecentPositive.push_back(n);
+  }
+  return !RecentPositive.empty();
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate() {
+  // First update the recently positive nodes. They have likely received new
+  // negative bias that will turn them off.
+  while (!RecentPositive.empty())
+    nodes[RecentPositive.pop_back_val()].update(nodes);
+
+  if (Linked.empty())
+    return;
+
+  // Run up to 10 iterations. The edge bundle numbering is closely related to
+  // basic block numbering, so there is a strong tendency towards chains of
+  // linked nodes with sequential numbers. By scanning the linked nodes
+  // backwards and forwards, we make it very likely that a single node can
+  // affect the entire network in a single iteration. That means very fast
+  // convergence, usually in a single iteration.
+  for (unsigned iteration = 0; iteration != 10; ++iteration) {
+    // Scan backwards, skipping the last node which was just updated.
+    bool Changed = false;
+    for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+           llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+      unsigned n = *I;
+      if (nodes[n].update(nodes)) {
+        Changed = true;
+        if (nodes[n].preferReg())
+          RecentPositive.push_back(n);
+      }
+    }
+    if (!Changed || !RecentPositive.empty())
+      return;
+
+    // Scan forwards, skipping the first node which was just updated.
+    Changed = false;
+    for (SmallVectorImpl<unsigned>::const_iterator I =
+           llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+      unsigned n = *I;
+      if (nodes[n].update(nodes)) {
+        Changed = true;
+        if (nodes[n].preferReg())
+          RecentPositive.push_back(n);
+      }
+    }
+    if (!Changed || !RecentPositive.empty())
+      return;
+  }
+}
+
+void SpillPlacement::prepare(BitVector &RegBundles) {
+  Linked.clear();
+  RecentPositive.clear();
+  // Reuse RegBundles as our ActiveNodes vector.
+  ActiveNodes = &RegBundles;
+  ActiveNodes->clear();
+  ActiveNodes->resize(bundles->getNumBundles());
+}
+
+bool
+SpillPlacement::finish() {
+  assert(ActiveNodes && "Call prepare() first");
+
+  // Write preferences back to ActiveNodes.
+  bool Perfect = true;
+  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+    if (!nodes[n].preferReg()) {
+      ActiveNodes->reset(n);
+      Perfect = false;
+    }
+  ActiveNodes = 0;
+  return Perfect;
+}
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..6952ad800965
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,142 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+
+class SpillPlacement  : public MachineFunctionPass {
+  struct Node;
+  const MachineFunction *MF;
+  const EdgeBundles *bundles;
+  const MachineLoopInfo *loops;
+  Node *nodes;
+
+  // Nodes that are active in the current computation. Owned by the prepare()
+  // caller.
+  BitVector *ActiveNodes;
+
+  // Nodes with active links. Populated by scanActiveBundles.
+  SmallVector<unsigned, 8> Linked;
+
+  // Nodes that went positive during the last call to scanActiveBundles or
+  // iterate.
+  SmallVector<unsigned, 8> RecentPositive;
+
+  // Block frequencies are computed once. Indexed by block number.
+  SmallVector<float, 4> BlockFrequency;
+
+public:
+  static char ID; // Pass identification, replacement for typeid.
+
+  SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+  ~SpillPlacement() { releaseMemory(); }
+
+  /// BorderConstraint - A basic block has separate constraints for entry and
+  /// exit.
+  enum BorderConstraint {
+    DontCare,  ///< Block doesn't care / variable not live.
+    PrefReg,   ///< Block entry/exit prefers a register.
+    PrefSpill, ///< Block entry/exit prefers a stack slot.
+    MustSpill  ///< A register is impossible, variable must be spilled.
+  };
+
+  /// BlockConstraint - Entry and exit constraints for a basic block.
+  struct BlockConstraint {
+    unsigned Number;            ///< Basic block number (from MBB::getNumber()).
+    BorderConstraint Entry : 8; ///< Constraint on block entry.
+    BorderConstraint Exit : 8;  ///< Constraint on block exit.
+  };
+
+  /// prepare - Reset state and prepare for a new spill placement computation.
+  /// @param RegBundles Bit vector to receive the edge bundles where the
+  ///                   variable should be kept in a register. Each bit
+  ///                   corresponds to an edge bundle, a set bit means the
+  ///                   variable should be kept in a register through the
+  ///                   bundle. A clear bit means the variable should be
+  ///                   spilled. This vector is retained.
+  void prepare(BitVector &RegBundles);
+
+  /// addConstraints - Add constraints and biases. This method may be called
+  /// more than once to accumulate constraints.
+  /// @param LiveBlocks Constraints for blocks that have the variable live in or
+  ///                   live out.
+  void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+  /// addLinks - Add transparent blocks with the given numbers.
+  void addLinks(ArrayRef<unsigned> Links);
+
+  /// scanActiveBundles - Perform an initial scan of all bundles activated by
+  /// addConstraints and addLinks, updating their state. Add all the bundles
+  /// that now prefer a register to RecentPositive.
+  /// Prepare internal data structures for iterate.
+  /// Return true is there are any positive nodes.
+  bool scanActiveBundles();
+
+  /// iterate - Update the network iteratively until convergence, or new bundles
+  /// are found.
+  void iterate();
+
+  /// getRecentPositive - Return an array of bundles that became positive during
+  /// the previous call to scanActiveBundles or iterate.
+  ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+  /// finish - Compute the optimal spill code placement given the
+  /// constraints. No MustSpill constraints will be violated, and the smallest
+  /// possible number of PrefX constraints will be violated, weighted by
+  /// expected execution frequencies.
+  /// The selected bundles are returned in the bitvector passed to prepare().
+  /// @return True if a perfect solution was found, allowing the variable to be
+  ///         in a register through all relevant bundles.
+  bool finish();
+
+  /// getBlockFrequency - Return the estimated block execution frequency per
+  /// function invocation.
+  float getBlockFrequency(unsigned Number) const {
+    return BlockFrequency[Number];
+  }
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+  virtual void releaseMemory();
+
+  void activate(unsigned);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp
new file mode 100644
index 000000000000..b6bbcd7176dd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,241 @@
+//===-- llvm/CodeGen/Spiller.cpp -  Spiller -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  enum SpillerName { trivial, standard, inline_ };
+}
+
+static cl::opt<SpillerName>
+spillerOpt("spiller",
+           cl::desc("Spiller to use: (default: standard)"),
+           cl::Prefix,
+           cl::values(clEnumVal(trivial,   "trivial spiller"),
+                      clEnumVal(standard,  "default spiller"),
+                      clEnumValN(inline_,  "inline", "inline spiller"),
+                      clEnumValEnd),
+           cl::init(standard));
+
+// Spiller virtual destructor implementation.
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+  MachineFunctionPass *pass;
+  MachineFunction *mf;
+  VirtRegMap *vrm;
+  LiveIntervals *lis;
+  MachineFrameInfo *mfi;
+  MachineRegisterInfo *mri;
+  const TargetInstrInfo *tii;
+  const TargetRegisterInfo *tri;
+
+  /// Construct a spiller base.
+  SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+    : pass(&pass), mf(&mf), vrm(&vrm)
+  {
+    lis = &pass.getAnalysis<LiveIntervals>();
+    mfi = mf.getFrameInfo();
+    mri = &mf.getRegInfo();
+    tii = mf.getTarget().getInstrInfo();
+    tri = mf.getTarget().getRegisterInfo();
+  }
+
+  /// Add spill ranges for every use/def of the live interval, inserting loads
+  /// immediately before each use, and stores after each def. No folding or
+  /// remat is attempted.
+  void trivialSpillEverywhere(LiveInterval *li,
+                              SmallVectorImpl<LiveInterval*> &newIntervals) {
+    DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
+
+    assert(li->weight != HUGE_VALF &&
+           "Attempting to spill already spilled value.");
+
+    assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
+           "Trying to spill a stack slot.");
+
+    DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+
+    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+    unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+    // Iterate over reg uses/defs.
+    for (MachineRegisterInfo::reg_iterator
+         regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+      // Grab the use/def instr.
+      MachineInstr *mi = &*regItr;
+
+      DEBUG(dbgs() << "  Processing " << *mi);
+
+      // Step regItr to the next use/def instr.
+      do {
+        ++regItr;
+      } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+      // Collect uses & defs for this instr.
+      SmallVector<unsigned, 2> indices;
+      bool hasUse = false;
+      bool hasDef = false;
+      for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+        MachineOperand &op = mi->getOperand(i);
+        if (!op.isReg() || op.getReg() != li->reg)
+          continue;
+        hasUse |= mi->getOperand(i).isUse();
+        hasDef |= mi->getOperand(i).isDef();
+        indices.push_back(i);
+      }
+
+      // Create a new vreg & interval for this instr.
+      unsigned newVReg = mri->createVirtualRegister(trc);
+      vrm->grow();
+      vrm->assignVirt2StackSlot(newVReg, ss);
+      LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+      newLI->weight = HUGE_VALF;
+
+      // Update the reg operands & kill flags.
+      for (unsigned i = 0; i < indices.size(); ++i) {
+        unsigned mopIdx = indices[i];
+        MachineOperand &mop = mi->getOperand(mopIdx);
+        mop.setReg(newVReg);
+        if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
+          mop.setIsKill(true);
+        }
+      }
+      assert(hasUse || hasDef);
+
+      // Insert reload if necessary.
+      MachineBasicBlock::iterator miItr(mi);
+      if (hasUse) {
+        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+                                  tri);
+        MachineInstr *loadInstr(prior(miItr));
+        SlotIndex loadIndex =
+          lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
+        vrm->addSpillSlotUse(ss, loadInstr);
+        SlotIndex endIndex = loadIndex.getNextIndex();
+        VNInfo *loadVNI =
+          newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
+        newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
+      }
+
+      // Insert store if necessary.
+      if (hasDef) {
+        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+                                 true, ss, trc, tri);
+        MachineInstr *storeInstr(llvm::next(miItr));
+        SlotIndex storeIndex =
+          lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
+        vrm->addSpillSlotUse(ss, storeInstr);
+        SlotIndex beginIndex = storeIndex.getPrevIndex();
+        VNInfo *storeVNI =
+          newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
+        newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
+      }
+
+      newIntervals.push_back(newLI);
+    }
+  }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+
+  TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+                 VirtRegMap &vrm)
+    : SpillerBase(pass, mf, vrm) {}
+
+  void spill(LiveRangeEdit &LRE) {
+    // Ignore spillIs - we don't use it.
+    trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
+  }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Falls back on LiveIntervals::addIntervalsForSpills.
+class StandardSpiller : public Spiller {
+protected:
+  MachineFunction *mf;
+  LiveIntervals *lis;
+  LiveStacks *lss;
+  MachineLoopInfo *loopInfo;
+  VirtRegMap *vrm;
+public:
+  StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+                  VirtRegMap &vrm)
+    : mf(&mf),
+      lis(&pass.getAnalysis<LiveIntervals>()),
+      lss(&pass.getAnalysis<LiveStacks>()),
+      loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
+      vrm(&vrm) {}
+
+  /// Falls back on LiveIntervals::addIntervalsForSpills.
+  void spill(LiveRangeEdit &LRE) {
+    std::vector<LiveInterval*> added =
+      lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
+                                 loopInfo, *vrm);
+    LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
+                              added.begin(), added.end());
+
+    // Update LiveStacks.
+    int SS = vrm->getStackSlot(LRE.getReg());
+    if (SS == VirtRegMap::NO_STACK_SLOT)
+      return;
+    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
+    LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+    if (!SI.hasAtLeastOneValue())
+      SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+    SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
+  }
+};
+
+} // end anonymous namespace
+
+llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
+                                   MachineFunction &mf,
+                                   VirtRegMap &vrm) {
+  switch (spillerOpt) {
+  default: assert(0 && "unknown spiller");
+  case trivial: return new TrivialSpiller(pass, mf, vrm);
+  case standard: return new StandardSpiller(pass, mf, vrm);
+  case inline_: return createInlineSpiller(pass, mf, vrm);
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
new file mode 100644
index 000000000000..41f1727da439
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -0,0 +1,46 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+namespace llvm {
+
+  class LiveRangeEdit;
+  class MachineFunction;
+  class MachineFunctionPass;
+  class VirtRegMap;
+
+  /// Spiller interface.
+  ///
+  /// Implementations are utility classes which insert spill or remat code on
+  /// demand.
+  class Spiller {
+  public:
+    virtual ~Spiller() = 0;
+
+    /// spill - Spill the LRE.getParent() live interval.
+    virtual void spill(LiveRangeEdit &LRE) = 0;
+
+  };
+
+  /// Create and return a spiller object, as specified on the command line.
+  Spiller* createSpiller(MachineFunctionPass &pass,
+                         MachineFunction &mf,
+                         VirtRegMap &vrm);
+
+  /// Create and return a spiller that will insert spill code directly instead
+  /// of deferring though VirtRegMap.
+  Spiller *createInlineSpiller(MachineFunctionPass &pass,
+                               MachineFunction &mf,
+                               VirtRegMap &vrm);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 000000000000..761cab7ce850
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1386 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "SplitKit.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple,   "Number of splits that were simple");
+STATISTIC(NumCopies,   "Number of copies inserted for splitting");
+STATISTIC(NumRemats,   "Number of rematerialized defs for splitting");
+STATISTIC(NumRepairs,  "Number of invalid live ranges repaired");
+
+//===----------------------------------------------------------------------===//
+//                                 Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
+                             const LiveIntervals &lis,
+                             const MachineLoopInfo &mli)
+  : MF(vrm.getMachineFunction()),
+    VRM(vrm),
+    LIS(lis),
+    Loops(mli),
+    TII(*MF.getTarget().getInstrInfo()),
+    CurLI(0),
+    LastSplitPoint(MF.getNumBlockIDs()) {}
+
+void SplitAnalysis::clear() {
+  UseSlots.clear();
+  UseBlocks.clear();
+  ThroughBlocks.clear();
+  CurLI = 0;
+  DidRepairRange = false;
+}
+
+SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
+  const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+  const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
+  std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+
+  // Compute split points on the first call. The pair is independent of the
+  // current live interval.
+  if (!LSP.first.isValid()) {
+    MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
+    if (FirstTerm == MBB->end())
+      LSP.first = LIS.getMBBEndIdx(MBB);
+    else
+      LSP.first = LIS.getInstructionIndex(FirstTerm);
+
+    // If there is a landing pad successor, also find the call instruction.
+    if (!LPad)
+      return LSP.first;
+    // There may not be a call instruction (?) in which case we ignore LPad.
+    LSP.second = LSP.first;
+    for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+         I != E;) {
+      --I;
+      if (I->getDesc().isCall()) {
+        LSP.second = LIS.getInstructionIndex(I);
+        break;
+      }
+    }
+  }
+
+  // If CurLI is live into a landing pad successor, move the last split point
+  // back to the call that may throw.
+  if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
+    return LSP.second;
+  else
+    return LSP.first;
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
+void SplitAnalysis::analyzeUses() {
+  assert(UseSlots.empty() && "Call clear first");
+
+  // First get all the defs from the interval values. This provides the correct
+  // slots for early clobbers.
+  for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(),
+       E = CurLI->vni_end(); I != E; ++I)
+    if (!(*I)->isPHIDef() && !(*I)->isUnused())
+      UseSlots.push_back((*I)->def);
+
+  // Get use slots form the use-def chain.
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (MachineRegisterInfo::use_nodbg_iterator
+       I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
+       ++I)
+    if (!I.getOperand().isUndef())
+      UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+
+  array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+  // Remove duplicates, keeping the smaller slot for each instruction.
+  // That is what we want for early clobbers.
+  UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+                             SlotIndex::isSameInstr),
+                 UseSlots.end());
+
+  // Compute per-live block info.
+  if (!calcLiveBlockInfo()) {
+    // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+    // I am looking at you, RegisterCoalescer!
+    DidRepairRange = true;
+    ++NumRepairs;
+    DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+    const_cast<LiveIntervals&>(LIS)
+      .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+    UseBlocks.clear();
+    ThroughBlocks.clear();
+    bool fixed = calcLiveBlockInfo();
+    (void)fixed;
+    assert(fixed && "Couldn't fix broken live interval");
+  }
+
+  DEBUG(dbgs() << "Analyze counted "
+               << UseSlots.size() << " instrs in "
+               << UseBlocks.size() << " blocks, through "
+               << NumThroughBlocks << " blocks.\n");
+}
+
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+bool SplitAnalysis::calcLiveBlockInfo() {
+  ThroughBlocks.resize(MF.getNumBlockIDs());
+  NumThroughBlocks = NumGapBlocks = 0;
+  if (CurLI->empty())
+    return true;
+
+  LiveInterval::const_iterator LVI = CurLI->begin();
+  LiveInterval::const_iterator LVE = CurLI->end();
+
+  SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+  UseI = UseSlots.begin();
+  UseE = UseSlots.end();
+
+  // Loop over basic blocks where CurLI is live.
+  MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  for (;;) {
+    BlockInfo BI;
+    BI.MBB = MFI;
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+    // If the block contains no uses, the range must be live through. At one
+    // point, RegisterCoalescer could create dangling ranges that ended
+    // mid-block.
+    if (UseI == UseE || *UseI >= Stop) {
+      ++NumThroughBlocks;
+      ThroughBlocks.set(BI.MBB->getNumber());
+      // The range shouldn't end mid-block if there are no uses. This shouldn't
+      // happen.
+      if (LVI->end < Stop)
+        return false;
+    } else {
+      // This block has uses. Find the first and last uses in the block.
+      BI.FirstUse = *UseI;
+      assert(BI.FirstUse >= Start);
+      do ++UseI;
+      while (UseI != UseE && *UseI < Stop);
+      BI.LastUse = UseI[-1];
+      assert(BI.LastUse < Stop);
+
+      // LVI is the first live segment overlapping MBB.
+      BI.LiveIn = LVI->start <= Start;
+
+      // Look for gaps in the live range.
+      BI.LiveOut = true;
+      while (LVI->end < Stop) {
+        SlotIndex LastStop = LVI->end;
+        if (++LVI == LVE || LVI->start >= Stop) {
+          BI.LiveOut = false;
+          BI.LastUse = LastStop;
+          break;
+        }
+        if (LastStop < LVI->start) {
+          // There is a gap in the live range. Create duplicate entries for the
+          // live-in snippet and the live-out snippet.
+          ++NumGapBlocks;
+
+          // Push the Live-in part.
+          BI.LiveThrough = false;
+          BI.LiveOut = false;
+          UseBlocks.push_back(BI);
+          UseBlocks.back().LastUse = LastStop;
+
+          // Set up BI for the live-out part.
+          BI.LiveIn = false;
+          BI.LiveOut = true;
+          BI.FirstUse = LVI->start;
+        }
+      }
+
+      // Don't set LiveThrough when the block has a gap.
+      BI.LiveThrough = BI.LiveIn && BI.LiveOut;
+      UseBlocks.push_back(BI);
+
+      // LVI is now at LVE or LVI->end >= Stop.
+      if (LVI == LVE)
+        break;
+    }
+
+    // Live segment ends exactly at Stop. Move to the next segment.
+    if (LVI->end == Stop && ++LVI == LVE)
+      break;
+
+    // Pick the next basic block.
+    if (LVI->start < Stop)
+      ++MFI;
+    else
+      MFI = LIS.getMBBFromIndex(LVI->start);
+  }
+
+  assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
+  return true;
+}
+
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+  if (cli->empty())
+    return 0;
+  LiveInterval *li = const_cast<LiveInterval*>(cli);
+  LiveInterval::iterator LVI = li->begin();
+  LiveInterval::iterator LVE = li->end();
+  unsigned Count = 0;
+
+  // Loop over basic blocks where li is live.
+  MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+  for (;;) {
+    ++Count;
+    LVI = li->advanceTo(LVI, Stop);
+    if (LVI == LVE)
+      return Count;
+    do {
+      ++MFI;
+      Stop = LIS.getMBBEndIdx(MFI);
+    } while (Stop <= LVI->start);
+  }
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+  unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+  const LiveInterval &Orig = LIS.getInterval(OrigReg);
+  assert(!Orig.empty() && "Splitting empty interval?");
+  LiveInterval::const_iterator I = Orig.find(Idx);
+
+  // Range containing Idx should begin at Idx.
+  if (I != Orig.end() && I->start <= Idx)
+    return I->start == Idx;
+
+  // Range does not contain Idx, previous must end at Idx.
+  return I != Orig.begin() && (--I)->end == Idx;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+  clear();
+  CurLI = li;
+  analyzeUses();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+                         LiveIntervals &lis,
+                         VirtRegMap &vrm,
+                         MachineDominatorTree &mdt)
+  : SA(sa), LIS(lis), VRM(vrm),
+    MRI(vrm.getMachineFunction().getRegInfo()),
+    MDT(mdt),
+    TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+    TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+    Edit(0),
+    OpenIdx(0),
+    RegAssign(Allocator)
+{}
+
+void SplitEditor::reset(LiveRangeEdit &lre) {
+  Edit = &lre;
+  OpenIdx = 0;
+  RegAssign.clear();
+  Values.clear();
+
+  // We don't need to clear LiveOutCache, only LiveOutSeen entries are read.
+  LiveOutSeen.clear();
+
+  // We don't need an AliasAnalysis since we will only be performing
+  // cheap-as-a-copy remats anyway.
+  Edit->anyRematerializable(LIS, TII, 0);
+}
+
+void SplitEditor::dump() const {
+  if (RegAssign.empty()) {
+    dbgs() << " empty\n";
+    return;
+  }
+
+  for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+    dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+  dbgs() << '\n';
+}
+
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+                              const VNInfo *ParentVNI,
+                              SlotIndex Idx) {
+  assert(ParentVNI && "Mapping  NULL value");
+  assert(Idx.isValid() && "Invalid SlotIndex");
+  assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+  LiveInterval *LI = Edit->get(RegIdx);
+
+  // Create a new value.
+  VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+  // Use insert for lookup, so we can add missing values with a second lookup.
+  std::pair<ValueMap::iterator, bool> InsP =
+    Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI));
+
+  // This was the first time (RegIdx, ParentVNI) was mapped.
+  // Keep it as a simple def without any liveness.
+  if (InsP.second)
+    return VNI;
+
+  // If the previous value was a simple mapping, add liveness for it now.
+  if (VNInfo *OldVNI = InsP.first->second) {
+    SlotIndex Def = OldVNI->def;
+    LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+    // No longer a simple mapping.
+    InsP.first->second = 0;
+  }
+
+  // This is a complex mapping, add liveness for VNI
+  SlotIndex Def = VNI->def;
+  LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+
+  return VNI;
+}
+
+void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) {
+  assert(ParentVNI && "Mapping  NULL value");
+  VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)];
+
+  // ParentVNI was either unmapped or already complex mapped. Either way.
+  if (!VNI)
+    return;
+
+  // This was previously a single mapping. Make sure the old def is represented
+  // by a trivial live range.
+  SlotIndex Def = VNI->def;
+  Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  VNI = 0;
+}
+
+// extendRange - Extend the live range to reach Idx.
+// Potentially create phi-def values.
+void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
+  assert(Idx.isValid() && "Invalid SlotIndex");
+  MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
+  assert(IdxMBB && "No MBB at Idx");
+  LiveInterval *LI = Edit->get(RegIdx);
+
+  // Is there a def in the same MBB we can extend?
+  if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx))
+    return;
+
+  // Now for the fun part. We know that ParentVNI potentially has multiple defs,
+  // and we may need to create even more phi-defs to preserve VNInfo SSA form.
+  // Perform a search for all predecessor blocks where we know the dominating
+  // VNInfo.
+  VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot());
+
+  // When there were multiple different values, we may need new PHIs.
+  if (!VNI)
+    return updateSSA();
+
+  // Poor man's SSA update for the single-value case.
+  LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]);
+  for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+         E = LiveInBlocks.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I->DomNode->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    if (I->Kill.isValid())
+      LI->addRange(LiveRange(Start, I->Kill, VNI));
+    else {
+      LiveOutCache[MBB] = LOP;
+      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+    }
+  }
+}
+
+/// findReachingDefs - Search the CFG for known live-out values.
+/// Add required live-in blocks to LiveInBlocks.
+VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI,
+                                      MachineBasicBlock *KillMBB,
+                                      SlotIndex Kill) {
+  // Initialize the live-out cache the first time it is needed.
+  if (LiveOutSeen.empty()) {
+    unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+    LiveOutSeen.resize(N);
+    LiveOutCache.resize(N);
+  }
+
+  // Blocks where LI should be live-in.
+  SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
+
+  // Remember if we have seen more than one value.
+  bool UniqueVNI = true;
+  VNInfo *TheVNI = 0;
+
+  // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
+  for (unsigned i = 0; i != WorkList.size(); ++i) {
+    MachineBasicBlock *MBB = WorkList[i];
+    assert(!MBB->pred_empty() && "Value live-in to entry block?");
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI) {
+       MachineBasicBlock *Pred = *PI;
+       LiveOutPair &LOP = LiveOutCache[Pred];
+
+       // Is this a known live-out block?
+       if (LiveOutSeen.test(Pred->getNumber())) {
+         if (VNInfo *VNI = LOP.first) {
+           if (TheVNI && TheVNI != VNI)
+             UniqueVNI = false;
+           TheVNI = VNI;
+         }
+         continue;
+       }
+
+       // First time. LOP is garbage and must be cleared below.
+       LiveOutSeen.set(Pred->getNumber());
+
+       // Does Pred provide a live-out value?
+       SlotIndex Start, Last;
+       tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred);
+       Last = Last.getPrevSlot();
+       VNInfo *VNI = LI->extendInBlock(Start, Last);
+       LOP.first = VNI;
+       if (VNI) {
+         LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)];
+         if (TheVNI && TheVNI != VNI)
+           UniqueVNI = false;
+         TheVNI = VNI;
+         continue;
+       }
+       LOP.second = 0;
+
+       // No, we need a live-in value for Pred as well
+       if (Pred != KillMBB)
+          WorkList.push_back(Pred);
+       else
+          // Loopback to KillMBB, so value is really live through.
+         Kill = SlotIndex();
+    }
+  }
+
+  // Transfer WorkList to LiveInBlocks in reverse order.
+  // This ordering works best with updateSSA().
+  LiveInBlocks.clear();
+  LiveInBlocks.reserve(WorkList.size());
+  while(!WorkList.empty())
+    LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]);
+
+  // The kill block may not be live-through.
+  assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB);
+  LiveInBlocks.back().Kill = Kill;
+
+  return UniqueVNI ? TheVNI : 0;
+}
+
+void SplitEditor::updateSSA() {
+  // This is essentially the same iterative algorithm that SSAUpdater uses,
+  // except we already have a dominator tree, so we don't have to recompute it.
+  unsigned Changes;
+  do {
+    Changes = 0;
+    // Propagate live-out values down the dominator tree, inserting phi-defs
+    // when necessary.
+    for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+           E = LiveInBlocks.end(); I != E; ++I) {
+      MachineDomTreeNode *Node = I->DomNode;
+      // Skip block if the live-in value has already been determined.
+      if (!Node)
+        continue;
+      MachineBasicBlock *MBB = Node->getBlock();
+      MachineDomTreeNode *IDom = Node->getIDom();
+      LiveOutPair IDomValue;
+
+      // We need a live-in value to a block with no immediate dominator?
+      // This is probably an unreachable block that has survived somehow.
+      bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber());
+
+      // IDom dominates all of our predecessors, but it may not be their
+      // immediate dominator. Check if any of them have live-out values that are
+      // properly dominated by IDom. If so, we need a phi-def here.
+      if (!needPHI) {
+        IDomValue = LiveOutCache[IDom->getBlock()];
+        for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+               PE = MBB->pred_end(); PI != PE; ++PI) {
+          LiveOutPair Value = LiveOutCache[*PI];
+          if (!Value.first || Value.first == IDomValue.first)
+            continue;
+          // This predecessor is carrying something other than IDomValue.
+          // It could be because IDomValue hasn't propagated yet, or it could be
+          // because MBB is in the dominance frontier of that value.
+          if (MDT.dominates(IDom, Value.second)) {
+            needPHI = true;
+            break;
+          }
+        }
+      }
+
+      // The value may be live-through even if Kill is set, as can happen when
+      // we are called from extendRange. In that case LiveOutSeen is true, and
+      // LiveOutCache indicates a foreign or missing value.
+      LiveOutPair &LOP = LiveOutCache[MBB];
+
+      // Create a phi-def if required.
+      if (needPHI) {
+        ++Changes;
+        SlotIndex Start = LIS.getMBBStartIdx(MBB);
+        unsigned RegIdx = RegAssign.lookup(Start);
+        LiveInterval *LI = Edit->get(RegIdx);
+        VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
+        VNI->setIsPHIDef(true);
+        I->Value = VNI;
+        // This block is done, we know the final value.
+        I->DomNode = 0;
+        if (I->Kill.isValid())
+          LI->addRange(LiveRange(Start, I->Kill, VNI));
+        else {
+          LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+          LOP = LiveOutPair(VNI, Node);
+        }
+      } else if (IDomValue.first) {
+        // No phi-def here. Remember incoming value.
+        I->Value = IDomValue.first;
+        if (I->Kill.isValid())
+          continue;
+        // Propagate IDomValue if needed:
+        // MBB is live-out and doesn't define its own value.
+        if (LOP.second != Node && LOP.first != IDomValue.first) {
+          ++Changes;
+          LOP = IDomValue;
+        }
+      }
+    }
+  } while (Changes);
+
+  // The values in LiveInBlocks are now accurate. No more phi-defs are needed
+  // for these blocks, so we can color the live ranges.
+  for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+         E = LiveInBlocks.end(); I != E; ++I) {
+    if (!I->DomNode)
+      continue;
+    assert(I->Value && "No live-in value found");
+    MachineBasicBlock *MBB = I->DomNode->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    unsigned RegIdx = RegAssign.lookup(Start);
+    LiveInterval *LI = Edit->get(RegIdx);
+    LI->addRange(LiveRange(Start, I->Kill.isValid() ?
+                                  I->Kill : LIS.getMBBEndIdx(MBB), I->Value));
+  }
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+                                   VNInfo *ParentVNI,
+                                   SlotIndex UseIdx,
+                                   MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I) {
+  MachineInstr *CopyMI = 0;
+  SlotIndex Def;
+  LiveInterval *LI = Edit->get(RegIdx);
+
+  // We may be trying to avoid interference that ends at a deleted instruction,
+  // so always begin RegIdx 0 early and all others late.
+  bool Late = RegIdx != 0;
+
+  // Attempt cheap-as-a-copy rematerialization.
+  LiveRangeEdit::Remat RM(ParentVNI);
+  if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
+    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+    ++NumRemats;
+  } else {
+    // Can't remat, just insert a copy from parent.
+    CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+               .addReg(Edit->getReg());
+    Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
+            .getDefIndex();
+    ++NumCopies;
+  }
+
+  // Define the value in Reg.
+  VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
+  VNI->setCopy(CopyMI);
+  return VNI;
+}
+
+/// Create a new virtual register and live interval.
+unsigned SplitEditor::openIntv() {
+  // Create the complement as index 0.
+  if (Edit->empty())
+    Edit->create(LIS, VRM);
+
+  // Create the open interval.
+  OpenIdx = Edit->size();
+  Edit->create(LIS, VRM);
+  return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+  assert(Idx != 0 && "Cannot select the complement interval");
+  assert(Idx < Edit->size() && "Can only select previously opened interval");
+  DEBUG(dbgs() << "    selectIntv " << OpenIdx << " -> " << Idx << '\n');
+  OpenIdx = Idx;
+}
+
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before enterIntvBefore");
+  DEBUG(dbgs() << "    enterIntvBefore " << Idx);
+  Idx = Idx.getBaseIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx;
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "enterIntvBefore called with invalid index");
+
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before enterIntvAfter");
+  DEBUG(dbgs() << "    enterIntvAfter " << Idx);
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx;
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "enterIntvAfter called with invalid index");
+
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+                              llvm::next(MachineBasicBlock::iterator(MI)));
+  return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+  SlotIndex End = LIS.getMBBEndIdx(&MBB);
+  SlotIndex Last = End.getPrevSlot();
+  DEBUG(dbgs() << "    enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return End;
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id);
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+                              LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+  RegAssign.insert(VNI->def, End, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+  useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+  assert(OpenIdx && "openIntv not called before useIntv");
+  DEBUG(dbgs() << "    useIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
+}
+
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+  DEBUG(dbgs() << "    leaveIntvAfter " << Idx);
+
+  // The interval must be live beyond the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+                              llvm::next(MachineBasicBlock::iterator(MI)));
+  return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+  DEBUG(dbgs() << "    leaveIntvBefore " << Idx);
+
+  // The interval must be live into the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+  SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+  DEBUG(dbgs() << "    leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Start;
+  }
+
+  VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+                              MBB.SkipPHIsAndLabels(MBB.begin()));
+  RegAssign.insert(Start, VNI->def, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+  assert(OpenIdx && "openIntv not called before overlapIntv");
+  const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+  assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+         "Parent changes value in extended range");
+  assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+         "Range cannot span basic blocks");
+
+  // The complement interval will be extended as needed by extendRange().
+  if (ParentVNI)
+    markComplexMapped(0, ParentVNI);
+  DEBUG(dbgs() << "    overlapIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
+}
+
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need extendRange().
+bool SplitEditor::transferValues() {
+  bool Skipped = false;
+  LiveInBlocks.clear();
+  RegAssignMap::const_iterator AssignI = RegAssign.begin();
+  for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
+         ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
+    DEBUG(dbgs() << "  blit " << *ParentI << ':');
+    VNInfo *ParentVNI = ParentI->valno;
+    // RegAssign has holes where RegIdx 0 should be used.
+    SlotIndex Start = ParentI->start;
+    AssignI.advanceTo(Start);
+    do {
+      unsigned RegIdx;
+      SlotIndex End = ParentI->end;
+      if (!AssignI.valid()) {
+        RegIdx = 0;
+      } else if (AssignI.start() <= Start) {
+        RegIdx = AssignI.value();
+        if (AssignI.stop() < End) {
+          End = AssignI.stop();
+          ++AssignI;
+        }
+      } else {
+        RegIdx = 0;
+        End = std::min(End, AssignI.start());
+      }
+
+      // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+      LiveInterval *LI = Edit->get(RegIdx);
+
+      // Check for a simply defined value that can be blitted directly.
+      if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) {
+        DEBUG(dbgs() << ':' << VNI->id);
+        LI->addRange(LiveRange(Start, End, VNI));
+        Start = End;
+        continue;
+      }
+
+      // Skip rematerialized values, we need to use extendRange() and
+      // extendPHIKillRanges() to completely recompute the live ranges.
+      if (Edit->didRematerialize(ParentVNI)) {
+        DEBUG(dbgs() << "(remat)");
+        Skipped = true;
+        Start = End;
+        continue;
+      }
+
+      // Initialize the live-out cache the first time it is needed.
+      if (LiveOutSeen.empty()) {
+        unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+        LiveOutSeen.resize(N);
+        LiveOutCache.resize(N);
+      }
+
+      // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+      // so the live range is accurate. Add live-in blocks in [Start;End) to the
+      // LiveInBlocks.
+      MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+      SlotIndex BlockStart, BlockEnd;
+      tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+
+      // The first block may be live-in, or it may have its own def.
+      if (Start != BlockStart) {
+        VNInfo *VNI = LI->extendInBlock(BlockStart,
+                                        std::min(BlockEnd, End).getPrevSlot());
+        assert(VNI && "Missing def for complex mapped value");
+        DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+        // MBB has its own def. Is it also live-out?
+        if (BlockEnd <= End) {
+          LiveOutSeen.set(MBB->getNumber());
+          LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+        }
+        // Skip to the next block for live-in.
+        ++MBB;
+        BlockStart = BlockEnd;
+      }
+
+      // Handle the live-in blocks covered by [Start;End).
+      assert(Start <= BlockStart && "Expected live-in block");
+      while (BlockStart < End) {
+        DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+        BlockEnd = LIS.getMBBEndIdx(MBB);
+        if (BlockStart == ParentVNI->def) {
+          // This block has the def of a parent PHI, so it isn't live-in.
+          assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+          VNInfo *VNI = LI->extendInBlock(BlockStart,
+                                         std::min(BlockEnd, End).getPrevSlot());
+          assert(VNI && "Missing def for complex mapped parent PHI");
+          if (End >= BlockEnd) {
+            // Live-out as well.
+            LiveOutSeen.set(MBB->getNumber());
+            LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+          }
+        } else {
+          // This block needs a live-in value.
+          LiveInBlocks.push_back(MDT[MBB]);
+          // The last block covered may not be live-out.
+          if (End < BlockEnd)
+            LiveInBlocks.back().Kill = End;
+          else {
+            // Live-out, but we need updateSSA to tell us the value.
+            LiveOutSeen.set(MBB->getNumber());
+            LiveOutCache[MBB] = LiveOutPair((VNInfo*)0,
+                                            (MachineDomTreeNode*)0);
+          }
+        }
+        BlockStart = BlockEnd;
+        ++MBB;
+      }
+      Start = End;
+    } while (Start != ParentI->end);
+    DEBUG(dbgs() << '\n');
+  }
+
+  if (!LiveInBlocks.empty())
+    updateSSA();
+
+  return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+    // Extend live ranges to be live-out for successor PHI values.
+  for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+       E = Edit->getParent().vni_end(); I != E; ++I) {
+    const VNInfo *PHIVNI = *I;
+    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+      continue;
+    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+      // The predecessor may not have a live-out value. That is OK, like an
+      // undef PHI operand.
+      if (Edit->getParent().liveAt(End)) {
+        assert(RegAssign.lookup(End) == RegIdx &&
+               "Different register assignment in phi predecessor");
+        extendRange(RegIdx, End);
+      }
+    }
+  }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
+       RE = MRI.reg_end(); RI != RE;) {
+    MachineOperand &MO = RI.getOperand();
+    MachineInstr *MI = MO.getParent();
+    ++RI;
+    // LiveDebugVariables should have handled all DBG_VALUE instructions.
+    if (MI->isDebugValue()) {
+      DEBUG(dbgs() << "Zapping " << *MI);
+      MO.setReg(0);
+      continue;
+    }
+
+    // <undef> operands don't really read the register, so just assign them to
+    // the complement.
+    if (MO.isUse() && MO.isUndef()) {
+      MO.setReg(Edit->get(0)->reg);
+      continue;
+    }
+
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    if (MO.isDef())
+      Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+    // Rewrite to the mapped register at Idx.
+    unsigned RegIdx = RegAssign.lookup(Idx);
+    MO.setReg(Edit->get(RegIdx)->reg);
+    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+                 << Idx << ':' << RegIdx << '\t' << *MI);
+
+    // Extend liveness to Idx if the instruction reads reg.
+    if (!ExtendRanges)
+      continue;
+
+    // Skip instructions that don't read Reg.
+    if (MO.isDef()) {
+      if (!MO.getSubReg() && !MO.isEarlyClobber())
+        continue;
+      // We may wan't to extend a live range for a partial redef, or for a use
+      // tied to an early clobber.
+      Idx = Idx.getPrevSlot();
+      if (!Edit->getParent().liveAt(Idx))
+        continue;
+    } else
+      Idx = Idx.getUseIndex();
+
+    extendRange(RegIdx, Idx);
+  }
+}
+
+void SplitEditor::deleteRematVictims() {
+  SmallVector<MachineInstr*, 8> Dead;
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+    LiveInterval *LI = *I;
+    for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
+           LII != LIE; ++LII) {
+      // Dead defs end at the store slot.
+      if (LII->end != LII->valno->def.getNextSlot())
+        continue;
+      MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
+      assert(MI && "Missing instruction for dead def");
+      MI->addRegisterDead(LI->reg, &TRI);
+
+      if (!MI->allDefsAreDead())
+        continue;
+
+      DEBUG(dbgs() << "All defs dead: " << *MI);
+      Dead.push_back(MI);
+    }
+  }
+
+  if (Dead.empty())
+    return;
+
+  Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
+}
+
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+  ++NumFinished;
+
+  // At this point, the live intervals in Edit contain VNInfos corresponding to
+  // the inserted copies.
+
+  // Add the original defs from the parent interval.
+  for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+         E = Edit->getParent().vni_end(); I != E; ++I) {
+    const VNInfo *ParentVNI = *I;
+    if (ParentVNI->isUnused())
+      continue;
+    unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+    VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
+    VNI->setIsPHIDef(ParentVNI->isPHIDef());
+    VNI->setCopy(ParentVNI->getCopy());
+
+    // Mark rematted values as complex everywhere to force liveness computation.
+    // The new live ranges may be truncated.
+    if (Edit->didRematerialize(ParentVNI))
+      for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+        markComplexMapped(i, ParentVNI);
+  }
+
+  // Transfer the simply mapped values, check if any are skipped.
+  bool Skipped = transferValues();
+  if (Skipped)
+    extendPHIKillRanges();
+  else
+    ++NumSimple;
+
+  // Rewrite virtual registers, possibly extending ranges.
+  rewriteAssigned(Skipped);
+
+  // Delete defs that were rematted everywhere.
+  if (Skipped)
+    deleteRematVictims();
+
+  // Get rid of unused values and set phi-kill flags.
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
+    (*I)->RenumberValues(LIS);
+
+  // Provide a reverse mapping from original indices to Edit ranges.
+  if (LRMap) {
+    LRMap->clear();
+    for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+      LRMap->push_back(i);
+  }
+
+  // Now check if any registers were separated into multiple components.
+  ConnectedVNInfoEqClasses ConEQ(LIS);
+  for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+    // Don't use iterators, they are invalidated by create() below.
+    LiveInterval *li = Edit->get(i);
+    unsigned NumComp = ConEQ.Classify(li);
+    if (NumComp <= 1)
+      continue;
+    DEBUG(dbgs() << "  " << NumComp << " components: " << *li << '\n');
+    SmallVector<LiveInterval*, 8> dups;
+    dups.push_back(li);
+    for (unsigned j = 1; j != NumComp; ++j)
+      dups.push_back(&Edit->create(LIS, VRM));
+    ConEQ.Distribute(&dups[0], MRI);
+    // The new intervals all map back to i.
+    if (LRMap)
+      LRMap->resize(Edit->size(), i);
+  }
+
+  // Calculate spill weight and allocation hints for new intervals.
+  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+
+  assert(!LRMap || LRMap->size() == Edit->size());
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
+/// may be an advantage to split CurLI for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+  // If CurLI is local to one block, there is no point to splitting it.
+  if (UseBlocks.size() <= 1)
+    return false;
+  // Add blocks with multiple uses.
+  for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) {
+    const BlockInfo &BI = UseBlocks[i];
+    if (BI.FirstUse == BI.LastUse)
+      continue;
+    Blocks.insert(BI.MBB);
+  }
+  return !Blocks.empty();
+}
+
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+  openIntv();
+  SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+  SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse,
+    LastSplitPoint));
+  if (!BI.LiveOut || BI.LastUse < LastSplitPoint) {
+    useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+  } else {
+      // The last use is after the last valid split point.
+    SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+    useIntv(SegStart, SegStop);
+    overlapIntv(SegStop, BI.LastUse);
+  }
+}
+
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    if (Blocks.count(BI.MBB))
+      splitSingleBlock(BI);
+  }
+  finish();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                    Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+                                        unsigned IntvIn, SlotIndex LeaveBefore,
+                                        unsigned IntvOut, SlotIndex EnterAfter){
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+  DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+               << ") intf " << LeaveBefore << '-' << EnterAfter
+               << ", live-through " << IntvIn << " -> " << IntvOut);
+
+  assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+  if (!IntvOut) {
+    DEBUG(dbgs() << ", spill on entry.\n");
+    //
+    //        <<<<<<<<<    Possible LeaveBefore interference.
+    //    |-----------|    Live through.
+    //    -____________    Spill on entry.
+    //
+    selectIntv(IntvIn);
+    MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+    SlotIndex Idx = leaveIntvAtTop(*MBB);
+    assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    (void)Idx;
+    return;
+  }
+
+  if (!IntvIn) {
+    DEBUG(dbgs() << ", reload on exit.\n");
+    //
+    //    >>>>>>>          Possible EnterAfter interference.
+    //    |-----------|    Live through.
+    //    ___________--    Reload on exit.
+    //
+    selectIntv(IntvOut);
+    MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+    SlotIndex Idx = enterIntvAtEnd(*MBB);
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    (void)Idx;
+    return;
+  }
+
+  if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+    DEBUG(dbgs() << ", straight through.\n");
+    //
+    //    |-----------|    Live through.
+    //    -------------    Straight through, same intv, no interference.
+    //
+    selectIntv(IntvOut);
+    useIntv(Start, Stop);
+    return;
+  }
+
+  // We cannot legally insert splits after LSP.
+  SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+
+  if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+                  LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+    DEBUG(dbgs() << ", switch avoiding interference.\n");
+    //
+    //    >>>>     <<<<    Non-overlapping EnterAfter/LeaveBefore interference.
+    //    |-----------|    Live through.
+    //    ------=======    Switch intervals between interference.
+    //
+    SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP;
+    selectIntv(IntvOut);
+    SlotIndex Idx = enterIntvBefore(Cut);
+    useIntv(Idx, Stop);
+    selectIntv(IntvIn);
+    useIntv(Start, Idx);
+    assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    return;
+  }
+
+  DEBUG(dbgs() << ", create local intv for interference.\n");
+  //
+  //    >>><><><><<<<    Overlapping EnterAfter/LeaveBefore interference.
+  //    |-----------|    Live through.
+  //    ==---------==    Switch intervals before/after interference.
+  //
+  assert(LeaveBefore <= EnterAfter && "Missed case");
+
+  selectIntv(IntvOut);
+  SlotIndex Idx = enterIntvAfter(EnterAfter);
+  useIntv(Idx, Stop);
+  assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+  selectIntv(IntvIn);
+  Idx = leaveIntvBefore(LeaveBefore);
+  useIntv(Start, Idx);
+  assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+                                  unsigned IntvIn, SlotIndex LeaveBefore) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+               << "), uses " << BI.FirstUse << '-' << BI.LastUse
+               << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+               << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+  assert(IntvIn && "Must have register in");
+  assert(BI.LiveIn && "Must be live-in");
+  assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+  if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) {
+    DEBUG(dbgs() << " before interference.\n");
+    //
+    //               <<<    Interference after kill.
+    //     |---o---x   |    Killed in block.
+    //     =========        Use IntvIn everywhere.
+    //
+    selectIntv(IntvIn);
+    useIntv(Start, BI.LastUse);
+    return;
+  }
+
+  SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+  if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) {
+    //
+    //               <<<    Possible interference after last use.
+    //     |---o---o---|    Live-out on stack.
+    //     =========____    Leave IntvIn after last use.
+    //
+    //                 <    Interference after last use.
+    //     |---o---o--o|    Live-out on stack, late last use.
+    //     ============     Copy to stack after LSP, overlap IntvIn.
+    //            \_____    Stack interval is live-out.
+    //
+    if (BI.LastUse < LSP) {
+      DEBUG(dbgs() << ", spill after last use before interference.\n");
+      selectIntv(IntvIn);
+      SlotIndex Idx = leaveIntvAfter(BI.LastUse);
+      useIntv(Start, Idx);
+      assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    } else {
+      DEBUG(dbgs() << ", spill before last split point.\n");
+      selectIntv(IntvIn);
+      SlotIndex Idx = leaveIntvBefore(LSP);
+      overlapIntv(Idx, BI.LastUse);
+      useIntv(Start, Idx);
+      assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    }
+    return;
+  }
+
+  // The interference is overlapping somewhere we wanted to use IntvIn. That
+  // means we need to create a local interval that can be allocated a
+  // different register.
+  unsigned LocalIntv = openIntv();
+  (void)LocalIntv;
+  DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+  if (!BI.LiveOut || BI.LastUse < LSP) {
+    //
+    //           <<<<<<<    Interference overlapping uses.
+    //     |---o---o---|    Live-out on stack.
+    //     =====----____    Leave IntvIn before interference, then spill.
+    //
+    SlotIndex To = leaveIntvAfter(BI.LastUse);
+    SlotIndex From = enterIntvBefore(LeaveBefore);
+    useIntv(From, To);
+    selectIntv(IntvIn);
+    useIntv(Start, From);
+    assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+    return;
+  }
+
+  //           <<<<<<<    Interference overlapping uses.
+  //     |---o---o--o|    Live-out on stack, late last use.
+  //     =====-------     Copy to stack before LSP, overlap LocalIntv.
+  //            \_____    Stack interval is live-out.
+  //
+  SlotIndex To = leaveIntvBefore(LSP);
+  overlapIntv(To, BI.LastUse);
+  SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+  useIntv(From, To);
+  selectIntv(IntvIn);
+  useIntv(Start, From);
+  assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+                                   unsigned IntvOut, SlotIndex EnterAfter) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+               << "), uses " << BI.FirstUse << '-' << BI.LastUse
+               << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+               << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+  SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+  assert(IntvOut && "Must have register out");
+  assert(BI.LiveOut && "Must be live-out");
+  assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+  if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) {
+    DEBUG(dbgs() << " after interference.\n");
+    //
+    //    >>>>             Interference before def.
+    //    |   o---o---|    Defined in block.
+    //        =========    Use IntvOut everywhere.
+    //
+    selectIntv(IntvOut);
+    useIntv(BI.FirstUse, Stop);
+    return;
+  }
+
+  if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) {
+    DEBUG(dbgs() << ", reload after interference.\n");
+    //
+    //    >>>>             Interference before def.
+    //    |---o---o---|    Live-through, stack-in.
+    //    ____=========    Enter IntvOut before first use.
+    //
+    selectIntv(IntvOut);
+    SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse));
+    useIntv(Idx, Stop);
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    return;
+  }
+
+  // The interference is overlapping somewhere we wanted to use IntvOut. That
+  // means we need to create a local interval that can be allocated a
+  // different register.
+  DEBUG(dbgs() << ", interference overlaps uses.\n");
+  //
+  //    >>>>>>>          Interference overlapping uses.
+  //    |---o---o---|    Live-through, stack-in.
+  //    ____---======    Create local interval for interference range.
+  //
+  selectIntv(IntvOut);
+  SlotIndex Idx = enterIntvAfter(EnterAfter);
+  useIntv(Idx, Stop);
+  assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+  openIntv();
+  SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse));
+  useIntv(From, Idx);
+}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
new file mode 100644
index 000000000000..7948b725f856
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -0,0 +1,469 @@
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITKIT_H
+#define LLVM_CODEGEN_SPLITKIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class ConnectedVNInfoEqClasses;
+class LiveInterval;
+class LiveIntervals;
+class LiveRangeEdit;
+class MachineInstr;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+class VNInfo;
+class raw_ostream;
+
+/// At some point we should just include MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class SplitAnalysis {
+public:
+  const MachineFunction &MF;
+  const VirtRegMap &VRM;
+  const LiveIntervals &LIS;
+  const MachineLoopInfo &Loops;
+  const TargetInstrInfo &TII;
+
+  // Sorted slot indexes of using instructions.
+  SmallVector<SlotIndex, 8> UseSlots;
+
+  /// Additional information about basic blocks where the current variable is
+  /// live. Such a block will look like one of these templates:
+  ///
+  ///  1. |   o---x   | Internal to block. Variable is only live in this block.
+  ///  2. |---x       | Live-in, kill.
+  ///  3. |       o---| Def, live-out.
+  ///  4. |---x   o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
+  ///  5. |---o---o---| Live-through with uses or defs.
+  ///  6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+  ///
+  /// Two BlockInfo entries are created for template 4. One for the live-in
+  /// segment, and one for the live-out segment. These entries look as if the
+  /// block were split in the middle where the live range isn't live.
+  ///
+  /// Live-through blocks without any uses don't get BlockInfo entries. They
+  /// are simply listed in ThroughBlocks instead.
+  ///
+  struct BlockInfo {
+    MachineBasicBlock *MBB;
+    SlotIndex FirstUse;   ///< First instr using current reg.
+    SlotIndex LastUse;    ///< Last instr using current reg.
+    bool LiveThrough;     ///< Live in whole block (Templ 5. above).
+    bool LiveIn;          ///< Current reg is live in.
+    bool LiveOut;         ///< Current reg is live out.
+
+    /// isOneInstr - Returns true when this BlockInfo describes a single
+    /// instruction.
+    bool isOneInstr() const {
+      return SlotIndex::isSameInstr(FirstUse, LastUse);
+    }
+  };
+
+private:
+  // Current live interval.
+  const LiveInterval *CurLI;
+
+  /// LastSplitPoint - Last legal split point in each basic block in the current
+  /// function. The first entry is the first terminator, the second entry is the
+  /// last valid split point for a variable that is live in to a landing pad
+  /// successor.
+  SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint;
+
+  /// UseBlocks - Blocks where CurLI has uses.
+  SmallVector<BlockInfo, 8> UseBlocks;
+
+  /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+  /// the live range has a gap.
+  unsigned NumGapBlocks;
+
+  /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+  BitVector ThroughBlocks;
+
+  /// NumThroughBlocks - Number of live-through blocks.
+  unsigned NumThroughBlocks;
+
+  /// DidRepairRange - analyze was forced to shrinkToUses().
+  bool DidRepairRange;
+
+  SlotIndex computeLastSplitPoint(unsigned Num);
+
+  // Sumarize statistics by counting instructions using CurLI.
+  void analyzeUses();
+
+  /// calcLiveBlockInfo - Compute per-block information about CurLI.
+  bool calcLiveBlockInfo();
+
+public:
+  SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+                const MachineLoopInfo &mli);
+
+  /// analyze - set CurLI to the specified interval, and analyze how it may be
+  /// split.
+  void analyze(const LiveInterval *li);
+
+  /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
+  /// by analyze(). This really shouldn't happen, but sometimes the coalescer
+  /// can create live ranges that end in mid-air.
+  bool didRepairRange() const { return DidRepairRange; }
+
+  /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+  /// new interval.
+  void clear();
+
+  /// getParent - Return the last analyzed interval.
+  const LiveInterval &getParent() const { return *CurLI; }
+
+  /// getLastSplitPoint - Return that base index of the last valid split point
+  /// in the basic block numbered Num.
+  SlotIndex getLastSplitPoint(unsigned Num) {
+    // Inline the common simple case.
+    if (LastSplitPoint[Num].first.isValid() &&
+        !LastSplitPoint[Num].second.isValid())
+      return LastSplitPoint[Num].first;
+    return computeLastSplitPoint(Num);
+  }
+
+  /// isOriginalEndpoint - Return true if the original live range was killed or
+  /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+  /// and 'use' for an early-clobber def.
+  /// This can be used to recognize code inserted by earlier live range
+  /// splitting.
+  bool isOriginalEndpoint(SlotIndex Idx) const;
+
+  /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+  /// where CurLI has uses.
+  ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+  /// getNumThroughBlocks - Return the number of through blocks.
+  unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+  /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+  bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+  /// getThroughBlocks - Return the set of through blocks.
+  const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+  /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+  unsigned getNumLiveBlocks() const {
+    return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+  }
+
+  /// countLiveBlocks - Return the number of blocks where li is live. This is
+  /// guaranteed to return the same number as getNumLiveBlocks() after calling
+  /// analyze(li).
+  unsigned countLiveBlocks(const LiveInterval *li) const;
+
+  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+
+  /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
+  /// having CurLI split to a new live interval. Return true if Blocks can be
+  /// passed to SplitEditor::splitSingleBlocks.
+  bool getMultiUseBlocks(BlockPtrSet &Blocks);
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv* 
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with finish().
+///
+class SplitEditor {
+  SplitAnalysis &SA;
+  LiveIntervals &LIS;
+  VirtRegMap &VRM;
+  MachineRegisterInfo &MRI;
+  MachineDominatorTree &MDT;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
+
+  /// Edit - The current parent register and new intervals created.
+  LiveRangeEdit *Edit;
+
+  /// Index into Edit of the currently open interval.
+  /// The index 0 is used for the complement, so the first interval started by
+  /// openIntv will be 1.
+  unsigned OpenIdx;
+
+  typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+  /// Allocator for the interval map. This will eventually be shared with
+  /// SlotIndexes and LiveIntervals.
+  RegAssignMap::Allocator Allocator;
+
+  /// RegAssign - Map of the assigned register indexes.
+  /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+  /// Idx.
+  RegAssignMap RegAssign;
+
+  typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap;
+
+  /// Values - keep track of the mapping from parent values to values in the new
+  /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+  ///
+  /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+  /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx).
+  ///    Each value is represented by a minimal live range at its def.
+  /// 3. A non-null VNInfo - the value is mapped to a single new value.
+  ///    The new value has no live ranges anywhere.
+  ValueMap Values;
+
+  typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+  typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+  // LiveOutCache - Map each basic block where a new register is live out to the
+  // live-out value and its defining block.
+  // One of these conditions shall be true:
+  //
+  //  1. !LiveOutCache.count(MBB)
+  //  2. LiveOutCache[MBB].second.getNode() == MBB
+  //  3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+  //
+  // This is only a cache, the values can be computed as:
+  //
+  //  VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+  //  Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+  //
+  // The cache is also used as a visited set by extendRange(). It can be shared
+  // by all the new registers because at most one is live out of each block.
+  LiveOutMap LiveOutCache;
+
+  // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid
+  // entry in LiveOutCache.
+  BitVector LiveOutSeen;
+
+  /// LiveInBlock - Info for updateSSA() about a block where a register is
+  /// live-in.
+  /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA()
+  /// adds the computed live-in value.
+  struct LiveInBlock {
+    // Dominator tree node for the block.
+    // Cleared by updateSSA when the final value has been determined.
+    MachineDomTreeNode *DomNode;
+
+    // Live-in value filled in by updateSSA once it is known.
+    VNInfo *Value;
+
+    // Position in block where the live-in range ends, or SlotIndex() if the
+    // range passes through the block.
+    SlotIndex Kill;
+
+    LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {}
+  };
+
+  /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and
+  /// updateSSA(). This list is usually empty, it exists here to avoid frequent
+  /// reallocations.
+  SmallVector<LiveInBlock, 16> LiveInBlocks;
+
+  /// defValue - define a value in RegIdx from ParentVNI at Idx.
+  /// Idx does not have to be ParentVNI->def, but it must be contained within
+  /// ParentVNI's live range in ParentLI. The new value is added to the value
+  /// map.
+  /// Return the new LI value.
+  VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+  /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless
+  /// of the number of defs.
+  void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI);
+
+  /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+  /// rematerialization or a COPY from parent. Return the new value.
+  VNInfo *defFromParent(unsigned RegIdx,
+                        VNInfo *ParentVNI,
+                        SlotIndex UseIdx,
+                        MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator I);
+
+  /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx.
+  /// Insert PHIDefs as needed to preserve SSA form.
+  void extendRange(unsigned RegIdx, SlotIndex Idx);
+
+  /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all
+  /// reaching defs for LI are found.
+  /// @param LI   Live interval whose value is needed.
+  /// @param MBB  Block where LI should be live-in.
+  /// @param Kill Kill point in MBB.
+  /// @return Unique value seen, or NULL.
+  VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB,
+                           SlotIndex Kill);
+
+  /// updateSSA - Compute and insert PHIDefs such that all blocks in
+  // LiveInBlocks get a known live-in value. Add live ranges to the blocks.
+  void updateSSA();
+
+  /// transferValues - Transfer values to the new ranges.
+  /// Return true if any ranges were skipped.
+  bool transferValues();
+
+  /// extendPHIKillRanges - Extend the ranges of all values killed by original
+  /// parent PHIDefs.
+  void extendPHIKillRanges();
+
+  /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+  void rewriteAssigned(bool ExtendRanges);
+
+  /// deleteRematVictims - Delete defs that are dead after rematerializing.
+  void deleteRematVictims();
+
+public:
+  /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+  /// Newly created intervals will be appended to newIntervals.
+  SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
+              MachineDominatorTree&);
+
+  /// reset - Prepare for a new split.
+  void reset(LiveRangeEdit&);
+
+  /// Create a new virtual register and live interval.
+  /// Return the interval index, starting from 1. Interval index 0 is the
+  /// implicit complement interval.
+  unsigned openIntv();
+
+  /// currentIntv - Return the current interval index.
+  unsigned currentIntv() const { return OpenIdx; }
+
+  /// selectIntv - Select a previously opened interval index.
+  void selectIntv(unsigned Idx);
+
+  /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+  /// If the parent interval is not live before Idx, a COPY is not inserted.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvBefore(SlotIndex Idx);
+
+  /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvAfter(SlotIndex Idx);
+
+  /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+  /// Use the open interval from he inserted copy to the MBB end.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+  /// useIntv - indicate that all instructions in MBB should use OpenLI.
+  void useIntv(const MachineBasicBlock &MBB);
+
+  /// useIntv - indicate that all instructions in range should use OpenLI.
+  void useIntv(SlotIndex Start, SlotIndex End);
+
+  /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+  /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvBefore(SlotIndex Idx);
+
+  /// leaveIntvAtTop - Leave the interval at the top of MBB.
+  /// Add liveness from the MBB top to the copy.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+  /// overlapIntv - Indicate that all instructions in range should use the open
+  /// interval, but also let the complement interval be live.
+  ///
+  /// This doubles the register pressure, but is sometimes required to deal with
+  /// register uses after the last valid split point.
+  ///
+  /// The Start index should be a return value from a leaveIntv* call, and End
+  /// should be in the same basic block. The parent interval must have the same
+  /// value across the range.
+  ///
+  void overlapIntv(SlotIndex Start, SlotIndex End);
+
+  /// finish - after all the new live ranges have been created, compute the
+  /// remaining live range, and rewrite instructions to use the new registers.
+  /// @param LRMap When not null, this vector will map each live range in Edit
+  ///              back to the indices returned by openIntv.
+  ///              There may be extra indices created by dead code elimination.
+  void finish(SmallVectorImpl<unsigned> *LRMap = 0);
+
+  /// dump - print the current interval maping to dbgs().
+  void dump() const;
+
+  // ===--- High level methods ---===
+
+  /// splitSingleBlock - Split CurLI into a separate live interval around the
+  /// uses in a single block. This is intended to be used as part of a larger
+  /// split, and doesn't call finish().
+  void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
+  /// splitSingleBlocks - Split CurLI into a separate live interval inside each
+  /// basic block in Blocks.
+  void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+
+  /// splitLiveThroughBlock - Split CurLI in the given block such that it
+  /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+  /// the block, but they will be ignored when placing split points.
+  ///
+  /// @param MBBNum      Block number.
+  /// @param IntvIn      Interval index entering the block.
+  /// @param LeaveBefore When set, leave IntvIn before this point.
+  /// @param IntvOut     Interval index leaving the block.
+  /// @param EnterAfter  When set, enter IntvOut after this point.
+  void splitLiveThroughBlock(unsigned MBBNum,
+                             unsigned IntvIn, SlotIndex LeaveBefore,
+                             unsigned IntvOut, SlotIndex EnterAfter);
+
+  /// splitRegInBlock - Split CurLI in the given block such that it enters the
+  /// block in IntvIn and leaves it on the stack (or not at all). Split points
+  /// are placed in a way that avoids putting uses in the stack interval. This
+  /// may require creating a local interval when there is interference.
+  ///
+  /// @param BI          Block descriptor.
+  /// @param IntvIn      Interval index entering the block. Not 0.
+  /// @param LeaveBefore When set, leave IntvIn before this point.
+  void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+                       unsigned IntvIn, SlotIndex LeaveBefore);
+
+  /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+  /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+  /// Split points are placed to avoid interference and such that the uses are
+  /// not in the stack interval. This may require creating a local interval
+  /// when there is interference.
+  ///
+  /// @param BI          Block descriptor.
+  /// @param IntvOut     Interval index leaving the block.
+  /// @param EnterAfter  When set, enter IntvOut after this point.
+  void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+                        unsigned IntvOut, SlotIndex EnterAfter);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Splitter.cpp b/contrib/llvm/lib/CodeGen/Splitter.cpp
new file mode 100644
index 000000000000..ec75df4b7d1f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Splitter.cpp
@@ -0,0 +1,827 @@
+//===-- llvm/CodeGen/Splitter.cpp -  Splitter -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsplitter"
+
+#include "Splitter.h"
+
+#include "RegisterCoalescer.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char LoopSplitter::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
+
+namespace llvm {
+
+  class StartSlotComparator {
+  public:
+    StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
+    bool operator()(const MachineBasicBlock *mbb1,
+                    const MachineBasicBlock *mbb2) const {
+      return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
+    }
+  private:
+    LiveIntervals &lis;
+  };
+
+  class LoopSplit {
+  public:
+    LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
+      : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
+      assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+             "Cannot split physical registers.");
+    }
+
+    LiveInterval& getLI() const { return li; }
+
+    MachineLoop& getLoop() const { return loop; }
+
+    bool isValid() const { return valid; }
+
+    bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
+
+    void invalidate() { valid = false; }
+
+    void splitIncoming() { inSplit = true; }
+
+    void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
+
+    void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
+
+    void apply() {
+      assert(valid && "Attempt to apply invalid split.");
+      applyIncoming();
+      applyOutgoing();
+      copyRanges();
+      renameInside();
+    }
+
+  private:
+    LoopSplitter &ls;
+    LiveInterval &li;
+    MachineLoop &loop;
+    bool valid, inSplit;
+    std::set<MachineLoop::Edge> outSplits;
+    std::vector<MachineInstr*> loopInstrs;
+
+    LiveInterval *newLI;
+    std::map<VNInfo*, VNInfo*> vniMap;
+
+    LiveInterval* getNewLI() {
+      if (newLI == 0) {
+        const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
+        unsigned vreg = ls.mri->createVirtualRegister(trc);
+        newLI = &ls.lis->getOrCreateInterval(vreg);
+      }
+      return newLI;
+    }
+
+    VNInfo* getNewVNI(VNInfo *oldVNI) {
+      VNInfo *newVNI = vniMap[oldVNI];
+
+      if (newVNI == 0) {
+        newVNI = getNewLI()->createValueCopy(oldVNI,
+                                             ls.lis->getVNInfoAllocator());
+        vniMap[oldVNI] = newVNI;
+      }
+
+      return newVNI;
+    }
+
+    void applyIncoming() {
+      if (!inSplit) {
+        return;
+      }
+
+      MachineBasicBlock *preHeader = loop.getLoopPreheader();
+      if (preHeader == 0) {
+        assert(ls.canInsertPreHeader(loop) &&
+               "Can't insert required preheader.");
+        preHeader = &ls.insertPreHeader(loop);
+      }
+
+      LiveRange *preHeaderRange =
+        ls.lis->findExitingRange(li, preHeader);
+      assert(preHeaderRange != 0 && "Range not live into preheader.");
+
+      // Insert the new copy.
+      MachineInstr *copy = BuildMI(*preHeader,
+                                   preHeader->getFirstTerminator(),
+                                   DebugLoc(),
+                                   ls.tii->get(TargetOpcode::COPY))
+        .addReg(getNewLI()->reg, RegState::Define)
+        .addReg(li.reg, RegState::Kill);
+
+      ls.lis->InsertMachineInstrInMaps(copy);
+
+      SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+      VNInfo *newVal = getNewVNI(preHeaderRange->valno);
+      newVal->def = copyDefIdx;
+      newVal->setCopy(copy);
+      li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
+
+      getNewLI()->addRange(LiveRange(copyDefIdx,
+                                     ls.lis->getMBBEndIdx(preHeader),
+                                     newVal));
+    }
+
+    void applyOutgoing() {
+
+      for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
+                                                 osEnd = outSplits.end();
+           osItr != osEnd; ++osItr) {
+        MachineLoop::Edge edge = *osItr;
+        MachineBasicBlock *outBlock = edge.second;
+        if (ls.isCriticalEdge(edge)) {
+          assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
+          outBlock = &ls.splitEdge(edge, loop);
+        }
+        LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
+        assert(outRange != 0 && "No exiting range?");
+
+        MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
+                                     DebugLoc(),
+                                     ls.tii->get(TargetOpcode::COPY))
+          .addReg(li.reg, RegState::Define)
+          .addReg(getNewLI()->reg, RegState::Kill);
+
+        ls.lis->InsertMachineInstrInMaps(copy);
+
+        SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+        
+        // Blow away output range definition.
+        outRange->valno->def = ls.lis->getInvalidIndex();
+        li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+
+        SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+        assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
+        VNInfo *newVal =
+          getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
+
+        getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
+                                       copyDefIdx, newVal));
+                                       
+      }
+    }
+
+    void copyRange(LiveRange &lr) {
+      std::pair<bool, LoopSplitter::SlotPair> lsr =
+        ls.getLoopSubRange(lr, loop);
+      
+      if (!lsr.first)
+        return;
+
+      LiveRange loopRange(lsr.second.first, lsr.second.second,
+                          getNewVNI(lr.valno));
+
+      li.removeRange(loopRange.start, loopRange.end, true);
+
+      getNewLI()->addRange(loopRange);
+    }
+
+    void copyRanges() {
+      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+                                                iEnd = loopInstrs.end();
+           iItr != iEnd; ++iItr) {
+        MachineInstr &instr = **iItr;
+        SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
+        if (instr.modifiesRegister(li.reg, 0)) {
+          LiveRange *defRange =
+            li.getLiveRangeContaining(instrIdx.getDefIndex());
+          if (defRange != 0) // May have caught this already.
+            copyRange(*defRange);
+        }
+        if (instr.readsRegister(li.reg, 0)) {
+          LiveRange *useRange =
+            li.getLiveRangeContaining(instrIdx.getUseIndex());
+          if (useRange != 0) { // May have caught this already.
+            copyRange(*useRange);
+          }
+        }
+      }
+
+      for (MachineLoop::block_iterator bbItr = loop.block_begin(),
+                                       bbEnd = loop.block_end();
+           bbItr != bbEnd; ++bbItr) {
+        MachineBasicBlock &loopBlock = **bbItr;
+        LiveRange *enteringRange =
+          ls.lis->findEnteringRange(li, &loopBlock);
+        if (enteringRange != 0) {
+          copyRange(*enteringRange);
+        }
+      }
+    } 
+
+    void renameInside() {
+      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+                                                iEnd = loopInstrs.end();
+           iItr != iEnd; ++iItr) {
+        MachineInstr &instr = **iItr;
+        for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
+          MachineOperand &mop = instr.getOperand(i);
+          if (mop.isReg() && mop.getReg() == li.reg) {
+            mop.setReg(getNewLI()->reg);
+          }
+        }
+      }
+    }
+
+  };
+
+  void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
+    au.addRequired<MachineDominatorTree>();
+    au.addPreserved<MachineDominatorTree>();
+    au.addRequired<MachineLoopInfo>();
+    au.addPreserved<MachineLoopInfo>();
+    au.addPreserved<RegisterCoalescer>();
+    au.addPreserved<CalculateSpillWeights>();
+    au.addPreserved<LiveStacks>();
+    au.addRequired<SlotIndexes>();
+    au.addPreserved<SlotIndexes>();
+    au.addRequired<LiveIntervals>();
+    au.addPreserved<LiveIntervals>();
+    MachineFunctionPass::getAnalysisUsage(au);
+  }
+
+  bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
+
+    mf = &fn;
+    mri = &mf->getRegInfo();
+    tii = mf->getTarget().getInstrInfo();
+    tri = mf->getTarget().getRegisterInfo();
+    sis = &getAnalysis<SlotIndexes>();
+    lis = &getAnalysis<LiveIntervals>();
+    mli = &getAnalysis<MachineLoopInfo>();
+    mdt = &getAnalysis<MachineDominatorTree>();
+
+    fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+      mf->getFunction()->getName().str();
+
+    dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
+
+    dumpOddTerminators();
+
+//      dbgs() << "----------------------------------------\n";
+//      lis->dump();
+//      dbgs() << "----------------------------------------\n";
+       
+//     std::deque<MachineLoop*> loops;
+//     std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+//     dbgs() << "Loops:\n";
+//     while (!loops.empty()) {
+//       MachineLoop &loop = *loops.front();
+//       loops.pop_front();
+//       std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+
+//       dumpLoopInfo(loop);
+//     }
+    
+    //lis->dump();
+    //exit(0);
+
+    // Setup initial intervals.
+    for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval *li = liItr->second;
+
+      if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
+          !lis->intervalIsInOneMBB(*li)) {
+        intervals.push_back(li);
+      }
+    }
+
+    processIntervals();
+
+    intervals.clear();
+
+//     dbgs() << "----------------------------------------\n";
+//     lis->dump();
+//     dbgs() << "----------------------------------------\n";
+
+    dumpOddTerminators();
+
+    //exit(1);
+
+    return false;
+  }
+
+  void LoopSplitter::releaseMemory() {
+    fqn.clear();
+    intervals.clear();
+    loopRangeMap.clear();
+  }
+
+  void LoopSplitter::dumpOddTerminators() {
+    for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
+         bbItr != bbEnd; ++bbItr) {
+      MachineBasicBlock *mbb = &*bbItr;
+      MachineBasicBlock *a = 0, *b = 0;
+      SmallVector<MachineOperand, 4> c;
+      if (tii->AnalyzeBranch(*mbb, a, b, c)) {
+        dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
+        dbgs() << "  Terminators:\n";
+        for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
+             iItr != iEnd; ++iItr) {
+          MachineInstr *instr= &*iItr;
+          dbgs() << "    " << *instr << "";
+        }
+        dbgs() << "\n  Listed successors: [ ";
+        for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
+             sItr != sEnd; ++sItr) {
+          MachineBasicBlock *succMBB = *sItr;
+          dbgs() << succMBB->getNumber() << " ";
+        }
+        dbgs() << "]\n\n";
+      }
+    }
+  }
+
+  void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
+    MachineBasicBlock &headerBlock = *loop.getHeader();
+    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+    ExitEdgesList exitEdges;
+    loop.getExitEdges(exitEdges);
+
+    dbgs() << "  Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
+    for (std::vector<MachineBasicBlock*>::const_iterator
+           subBlockItr = loop.getBlocks().begin(),
+           subBlockEnd = loop.getBlocks().end();
+         subBlockItr != subBlockEnd; ++subBlockItr) {
+      MachineBasicBlock &subBlock = **subBlockItr;
+      dbgs() << "BB#" << subBlock.getNumber() << " ";
+    }
+    dbgs() << "], Exit edges: [ ";
+    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+                                 exitEdgeEnd = exitEdges.end();
+         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+      MachineLoop::Edge &exitEdge = *exitEdgeItr;
+      dbgs() << "(MBB#" << exitEdge.first->getNumber()
+             << ", MBB#" << exitEdge.second->getNumber() << ") ";
+    }
+    dbgs() << "], Sub-Loop Headers: [ ";
+    for (MachineLoop::iterator subLoopItr = loop.begin(),
+                               subLoopEnd = loop.end();
+         subLoopItr != subLoopEnd; ++subLoopItr) {
+      MachineLoop &subLoop = **subLoopItr;
+      MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
+      dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
+    }
+    dbgs() << "]\n";
+  }
+
+  void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
+    mbb.updateTerminator();
+
+    for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
+         miItr != miEnd; ++miItr) {
+      if (lis->isNotInMIMap(miItr)) {
+        lis->InsertMachineInstrInMaps(miItr);
+      }
+    }
+  }
+
+  bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
+    MachineBasicBlock *header = loop.getHeader();
+    MachineBasicBlock *a = 0, *b = 0;
+    SmallVector<MachineOperand, 4> c;
+
+    for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
+                                          pbEnd = header->pred_end();
+         pbItr != pbEnd; ++pbItr) {
+      MachineBasicBlock *predBlock = *pbItr;
+      if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
+        return false;
+      }
+    }
+
+    MachineFunction::iterator headerItr(header);
+    if (headerItr == mf->begin())
+      return true;
+    MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
+    assert(headerLayoutPred != 0 && "Header should have layout pred.");
+
+    return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
+  }
+
+  MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
+    assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
+
+    MachineBasicBlock &header = *loop.getHeader();
+
+    // Save the preds - we'll need to update them once we insert the preheader.
+    typedef std::set<MachineBasicBlock*> HeaderPreds;
+    HeaderPreds headerPreds;
+
+    for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+                                          predEnd = header.pred_end();
+         predItr != predEnd; ++predItr) {
+      if (!loop.contains(*predItr))
+        headerPreds.insert(*predItr);
+    }
+
+    assert(!headerPreds.empty() && "No predecessors for header?");
+
+    //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
+
+    MachineBasicBlock *preHeader =
+      mf->CreateMachineBasicBlock(header.getBasicBlock());
+
+    assert(preHeader != 0 && "Failed to create pre-header.");
+
+    mf->insert(header, preHeader);
+
+    for (HeaderPreds::iterator hpItr = headerPreds.begin(),
+                               hpEnd = headerPreds.end(); 
+         hpItr != hpEnd; ++hpItr) {
+      assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
+      MachineBasicBlock &hp = **hpItr;
+      hp.ReplaceUsesOfBlockWith(&header, preHeader);
+    }
+    preHeader->addSuccessor(&header);
+
+    MachineBasicBlock *oldLayoutPred =
+      llvm::prior(MachineFunction::iterator(preHeader));
+    if (oldLayoutPred != 0) {
+      updateTerminators(*oldLayoutPred);
+    }
+
+    lis->InsertMBBInMaps(preHeader);
+
+    if (MachineLoop *parentLoop = loop.getParentLoop()) {
+      assert(parentLoop->getHeader() != loop.getHeader() &&
+             "Parent loop has same header?");
+      parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
+
+      // Invalidate all parent loop ranges.
+      while (parentLoop != 0) {
+        loopRangeMap.erase(parentLoop);
+        parentLoop = parentLoop->getParentLoop();
+      }
+    }
+
+    for (LiveIntervals::iterator liItr = lis->begin(),
+                                 liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval &li = *liItr->second;
+
+      // Is this safe for physregs?
+      // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
+      if (!lis->isLiveInToMBB(li, &header))
+        continue;
+
+      if (lis->isLiveInToMBB(li, preHeader)) {
+        assert(lis->isLiveOutOfMBB(li, preHeader) &&
+               "Range terminates in newly added preheader?");
+        continue;
+      }
+
+      bool insertRange = false;
+
+      for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
+                                            predEnd = preHeader->pred_end();
+           predItr != predEnd; ++predItr) {
+        MachineBasicBlock *predMBB = *predItr;
+        if (lis->isLiveOutOfMBB(li, predMBB)) {
+          insertRange = true;
+          break;
+        }
+      }
+
+      if (!insertRange)
+        continue;
+
+      SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+      assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+             "PHI def index points at actual instruction.");
+      VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
+      li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
+                            lis->getMBBEndIdx(preHeader),
+                            newVal));
+    }
+
+
+    //dbgs() << "Dumping SlotIndexes:\n";
+    //sis->dump();
+
+    //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
+
+    return *preHeader;
+  }
+
+  bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
+    assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
+    if (edge.second->pred_size() > 1)
+      return true;
+    return false;
+  }
+
+  bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
+    MachineFunction::iterator outBlockItr(edge.second);
+    if (outBlockItr == mf->begin())
+      return true;
+    MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
+    assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
+    MachineBasicBlock *a = 0, *b = 0;
+    SmallVector<MachineOperand, 4> c;
+    return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
+            !tii->AnalyzeBranch(*edge.first, a, b, c));
+  }
+
+  MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
+                                             MachineLoop &loop) {
+
+    MachineBasicBlock &inBlock = *edge.first;
+    MachineBasicBlock &outBlock = *edge.second;
+
+    assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
+           "Splitting non-critical edge?");
+
+    //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
+    //       << " -> MBB#" << outBlock.getNumber() << ")...";
+
+    MachineBasicBlock *splitBlock =
+      mf->CreateMachineBasicBlock();
+
+    assert(splitBlock != 0 && "Failed to create split block.");
+
+    mf->insert(&outBlock, splitBlock);
+
+    inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
+    splitBlock->addSuccessor(&outBlock);
+
+    MachineBasicBlock *oldLayoutPred =
+      llvm::prior(MachineFunction::iterator(splitBlock));
+    if (oldLayoutPred != 0) {
+      updateTerminators(*oldLayoutPred);
+    }
+
+    lis->InsertMBBInMaps(splitBlock);
+
+    loopRangeMap.erase(&loop);
+
+    MachineLoop *splitParentLoop = loop.getParentLoop();
+    while (splitParentLoop != 0 &&
+           !splitParentLoop->contains(&outBlock)) {
+      splitParentLoop = splitParentLoop->getParentLoop();
+    }
+
+    if (splitParentLoop != 0) {
+      assert(splitParentLoop->contains(&loop) &&
+             "Split-block parent doesn't contain original loop?");
+      splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
+      
+      // Invalidate all parent loop ranges.
+      while (splitParentLoop != 0) {
+        loopRangeMap.erase(splitParentLoop);
+        splitParentLoop = splitParentLoop->getParentLoop();
+      }
+    }
+
+
+    for (LiveIntervals::iterator liItr = lis->begin(),
+                                 liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval &li = *liItr->second;
+      bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
+                       lis->isLiveInToMBB(li, &outBlock);
+      if (lis->isLiveInToMBB(li, splitBlock)) {
+        if (!intersects) {
+          li.removeRange(lis->getMBBStartIdx(splitBlock),
+                         lis->getMBBEndIdx(splitBlock), true);
+        }
+      } else if (intersects) {
+        SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+        assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
+        VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+                                         lis->getVNInfoAllocator());
+        li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
+                              lis->getMBBEndIdx(splitBlock),
+                              newVal));
+      }
+    }
+
+    //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
+
+    return *splitBlock;
+  }
+
+  LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
+    typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
+    LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
+    if (lrItr == loopRangeMap.end()) {
+      LoopMBBSet loopMBBs((StartSlotComparator(*lis))); 
+      std::copy(loop.block_begin(), loop.block_end(),
+                std::inserter(loopMBBs, loopMBBs.begin()));
+
+      assert(!loopMBBs.empty() && "No blocks in loop?");
+
+      LoopRanges &loopRanges = loopRangeMap[&loop];
+      assert(loopRanges.empty() && "Loop encountered but not processed?");
+      SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
+      loopRanges.push_back(
+        std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
+                       lis->getInvalidIndex()));
+      for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
+                                curBlockEnd = loopMBBs.end();
+           curBlockItr != curBlockEnd; ++curBlockItr) {
+        SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
+        if (newStart != oldEnd) {
+          loopRanges.back().second = oldEnd;
+          loopRanges.push_back(std::make_pair(newStart,
+                                              lis->getInvalidIndex()));
+        }
+        oldEnd = lis->getMBBEndIdx(*curBlockItr);
+      }
+
+      loopRanges.back().second =
+        lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
+
+      return loopRanges;
+    }
+    return lrItr->second;
+  }
+
+  std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
+                                                           const LiveRange &lr,
+                                                           MachineLoop &loop) {
+    LoopRanges &loopRanges = getLoopRanges(loop);
+    LoopRanges::iterator lrItr = loopRanges.begin(),
+                         lrEnd = loopRanges.end();
+    while (lrItr != lrEnd && lr.start >= lrItr->second) {
+      ++lrItr;
+    }
+
+    if (lrItr == lrEnd) {
+      SlotIndex invalid = lis->getInvalidIndex();
+      return std::make_pair(false, SlotPair(invalid, invalid));
+    }
+
+    SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
+    SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
+
+    return std::make_pair(true, SlotPair(srStart, srEnd));      
+  }
+
+  void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
+    LoopRanges &loopRanges = getLoopRanges(loop);
+    dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
+    for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
+         lrItr != lrEnd; ++lrItr) {
+      dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
+    }
+    dbgs() << "]\n";
+  }
+
+  void LoopSplitter::processHeader(LoopSplit &split) {
+    MachineBasicBlock &header = *split.getLoop().getHeader();
+    //dbgs() << "  Processing loop header BB#" << header.getNumber() << "\n";
+
+    if (!lis->isLiveInToMBB(split.getLI(), &header))
+      return; // Not live in, but nothing wrong so far.
+
+    MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
+    if (!preHeader) {
+
+      if (!canInsertPreHeader(split.getLoop())) {
+        split.invalidate();
+        return; // Couldn't insert a pre-header. Bail on this interval.
+      }
+
+      for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+                                            predEnd = header.pred_end();
+           predItr != predEnd; ++predItr) {
+        if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
+          split.splitIncoming();
+          break;
+        }
+      }
+    } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
+      split.splitIncoming();
+    }
+  }
+
+  void LoopSplitter::processLoopExits(LoopSplit &split) {
+    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+    ExitEdgesList exitEdges;
+    split.getLoop().getExitEdges(exitEdges);
+
+    //dbgs() << "  Processing loop exits:\n";
+
+    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+                                 exitEdgeEnd = exitEdges.end();
+         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+      MachineLoop::Edge exitEdge = *exitEdgeItr;
+
+      LiveRange *outRange =
+        split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
+
+      if (outRange != 0) {
+        if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
+          split.invalidate();
+          return;
+        }
+
+        split.splitOutgoing(exitEdge);
+      }
+    }
+  }
+
+  void LoopSplitter::processLoopUses(LoopSplit &split) {
+    std::set<MachineInstr*> processed;
+
+    for (MachineRegisterInfo::reg_iterator
+           rItr = mri->reg_begin(split.getLI().reg),
+           rEnd = mri->reg_end();
+      rItr != rEnd; ++rItr) {
+      MachineInstr &instr = *rItr;
+      if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
+        split.addLoopInstr(&instr);
+        processed.insert(&instr);
+      }
+    }
+
+    //dbgs() << "  Rewriting reg" << li.reg << " to reg" << newLI->reg
+    //       << " in blocks [ ";
+    //dbgs() << "]\n";
+  }
+
+  bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
+    assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+           "Attempt to split physical register.");
+
+    LoopSplit split(*this, li, loop);
+    processHeader(split);
+    if (split.isValid())
+      processLoopExits(split);
+    if (split.isValid())
+      processLoopUses(split);
+    if (split.isValid() /* && split.isWorthwhile() */) {
+      split.apply();
+      DEBUG(dbgs() << "Success.\n");
+      return true;
+    }
+    DEBUG(dbgs() << "Failed.\n");
+    return false;
+  }
+
+  void LoopSplitter::processInterval(LiveInterval &li) {
+    std::deque<MachineLoop*> loops;
+    std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+
+    while (!loops.empty()) {
+      MachineLoop &loop = *loops.front();
+      loops.pop_front();
+      DEBUG(
+        dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
+               << loop.getHeader()->getNumber() << " ";
+      );
+      if (!splitOverLoop(li, loop)) {
+        // Couldn't split over outer loop, schedule sub-loops to be checked.
+	std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+      }
+    }
+  }
+
+  void LoopSplitter::processIntervals() {
+    while (!intervals.empty()) {
+      LiveInterval &li = *intervals.front();
+      intervals.pop_front();
+
+      assert(!lis->intervalIsInOneMBB(li) &&
+             "Single interval in process worklist.");
+
+      processInterval(li);      
+    }
+  }
+
+}
diff --git a/contrib/llvm/lib/CodeGen/Splitter.h b/contrib/llvm/lib/CodeGen/Splitter.h
new file mode 100644
index 000000000000..9fb1b8b30139
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Splitter.h
@@ -0,0 +1,101 @@
+//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITTER_H
+#define LLVM_CODEGEN_SPLITTER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+#include <deque>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+  class LiveInterval;
+  class LiveIntervals;
+  struct LiveRange;
+  class LoopSplit;
+  class MachineDominatorTree;
+  class MachineRegisterInfo;
+  class SlotIndexes;
+  class TargetInstrInfo;
+  class VNInfo;
+
+  class LoopSplitter : public MachineFunctionPass {
+    friend class LoopSplit;
+  public:
+    static char ID;
+
+    LoopSplitter() : MachineFunctionPass(ID) {
+      initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+
+    virtual void releaseMemory();
+
+
+  private:
+
+    MachineFunction *mf;
+    LiveIntervals *lis;
+    MachineLoopInfo *mli;
+    MachineRegisterInfo *mri;
+    MachineDominatorTree *mdt;
+    SlotIndexes *sis;
+    const TargetInstrInfo *tii;
+    const TargetRegisterInfo *tri;
+
+    std::string fqn;
+    std::deque<LiveInterval*> intervals;
+
+    typedef std::pair<SlotIndex, SlotIndex> SlotPair;
+    typedef std::vector<SlotPair> LoopRanges;
+    typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
+    LoopRangeMap loopRangeMap;
+
+    void dumpLoopInfo(MachineLoop &loop);
+
+    void dumpOddTerminators();
+
+    void updateTerminators(MachineBasicBlock &mbb);
+
+    bool canInsertPreHeader(MachineLoop &loop);
+    MachineBasicBlock& insertPreHeader(MachineLoop &loop);
+
+    bool isCriticalEdge(MachineLoop::Edge &edge);
+    bool canSplitEdge(MachineLoop::Edge &edge);
+    MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
+
+    LoopRanges& getLoopRanges(MachineLoop &loop);
+    std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
+                                              MachineLoop &loop);
+
+    void dumpLoopRanges(MachineLoop &loop);
+
+    void processHeader(LoopSplit &split);
+    void processLoopExits(LoopSplit &split);
+    void processLoopUses(LoopSplit &split);
+
+    bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
+
+    void processInterval(LiveInterval &li);
+
+    void processIntervals();
+  };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 000000000000..d3cbd15b64e8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,260 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// SSPBufferSize - The lower bound for a buffer to be considered for stack
+// smashing protection.
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+              cl::desc("Lower bound for a buffer to be considered for "
+                       "stack protection"));
+
+namespace {
+  class StackProtector : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// target type sizes.
+    const TargetLowering *TLI;
+
+    Function *F;
+    Module *M;
+
+    DominatorTree* DT;
+
+    /// InsertStackProtectors - Insert code into the prologue and epilogue of
+    /// the function.
+    ///
+    ///  - The prologue code loads and stores the stack guard onto the stack.
+    ///  - The epilogue checks the value stored in the prologue against the
+    ///    original value. It calls __stack_chk_fail if they differ.
+    bool InsertStackProtectors();
+
+    /// CreateFailBB - Create a basic block to jump to when the stack protector
+    /// check fails.
+    BasicBlock *CreateFailBB();
+
+    /// RequiresStackProtector - Check whether or not this function needs a
+    /// stack protector based upon the stack protector level.
+    bool RequiresStackProtector() const;
+  public:
+    static char ID;             // Pass identification, replacement for typeid.
+    StackProtector() : FunctionPass(ID), TLI(0) {
+      initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+    }
+    StackProtector(const TargetLowering *tli)
+      : FunctionPass(ID), TLI(tli) {
+        initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+    }
+
+    virtual bool runOnFunction(Function &Fn);
+  };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+INITIALIZE_PASS(StackProtector, "stack-protector",
+                "Insert stack protectors", false, false)
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+  return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+  F = &Fn;
+  M = F->getParent();
+  DT = getAnalysisIfAvailable<DominatorTree>();
+
+  if (!RequiresStackProtector()) return false;
+  
+  return InsertStackProtectors();
+}
+
+/// RequiresStackProtector - Check whether or not this function needs a stack
+/// protector based upon the stack protector level. The heuristic we use is to
+/// add a guard variable to functions that call alloca, and functions with
+/// buffers larger than SSPBufferSize bytes.
+bool StackProtector::RequiresStackProtector() const {
+  if (F->hasFnAttr(Attribute::StackProtectReq))
+    return true;
+
+  if (!F->hasFnAttr(Attribute::StackProtect))
+    return false;
+
+  const TargetData *TD = TLI->getTargetData();
+
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    BasicBlock *BB = I;
+
+    for (BasicBlock::iterator
+           II = BB->begin(), IE = BB->end(); II != IE; ++II)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+        if (AI->isArrayAllocation())
+          // This is a call to alloca with a variable size. Emit stack
+          // protectors.
+          return true;
+
+        if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
+          // We apparently only care about character arrays.
+          if (!AT->getElementType()->isIntegerTy(8))
+            continue;
+
+          // If an array has more than SSPBufferSize bytes of allocated space,
+          // then we emit stack protectors.
+          if (SSPBufferSize <= TD->getTypeAllocSize(AT))
+            return true;
+        }
+      }
+  }
+
+  return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+///  - The prologue code loads and stores the stack guard onto the stack.
+///  - The epilogue checks the value stored in the prologue against the original
+///    value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+  BasicBlock *FailBB = 0;       // The basic block to jump to if check fails.
+  BasicBlock *FailBBDom = 0;    // FailBB's dominator.
+  AllocaInst *AI = 0;           // Place on stack that stores the stack guard.
+  Value *StackGuardVar = 0;  // The stack guard variable.
+
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+    BasicBlock *BB = I++;
+    ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+    if (!RI) continue;
+
+    if (!FailBB) {
+      // Insert code into the entry block that stores the __stack_chk_guard
+      // variable onto the stack:
+      //
+      //   entry:
+      //     StackGuardSlot = alloca i8*
+      //     StackGuard = load __stack_chk_guard
+      //     call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+      // 
+      const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+      unsigned AddressSpace, Offset;
+      if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+        Constant *OffsetVal =
+          ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+        
+        StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+                                      PointerType::get(PtrTy, AddressSpace));
+      } else {
+        StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); 
+      }
+
+      BasicBlock &Entry = F->getEntryBlock();
+      Instruction *InsPt = &Entry.front();
+
+      AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+      LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+      Value *Args[] = { LI, AI };
+      CallInst::
+        Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+               Args, "", InsPt);
+
+      // Create the basic block to jump to when the guard check fails.
+      FailBB = CreateFailBB();
+    }
+
+    // For each block with a return instruction, convert this:
+    //
+    //   return:
+    //     ...
+    //     ret ...
+    //
+    // into this:
+    //
+    //   return:
+    //     ...
+    //     %1 = load __stack_chk_guard
+    //     %2 = load StackGuardSlot
+    //     %3 = cmp i1 %1, %2
+    //     br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+    //
+    //   SP_return:
+    //     ret ...
+    //
+    //   CallStackCheckFailBlk:
+    //     call void @__stack_chk_fail()
+    //     unreachable
+
+    // Split the basic block before the return instruction.
+    BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+    if (DT && DT->isReachableFromEntry(BB)) {
+      DT->addNewBlock(NewBB, BB);
+      FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB;
+    }
+
+    // Remove default branch instruction to the new BB.
+    BB->getTerminator()->eraseFromParent();
+
+    // Move the newly created basic block to the point right after the old basic
+    // block so that it's in the "fall through" position.
+    NewBB->moveAfter(BB);
+
+    // Generate the stack protector instructions in the old basic block.
+    LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+    LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+    ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
+    BranchInst::Create(NewBB, FailBB, Cmp, BB);
+  }
+
+  // Return if we didn't modify any basic blocks. I.e., there are no return
+  // statements in the function.
+  if (!FailBB) return false;
+
+  if (DT && FailBBDom)
+    DT->addNewBlock(FailBB, FailBBDom);
+
+  return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+  BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+                                          "CallStackCheckFailBlk", F);
+  Constant *StackChkFail =
+    M->getOrInsertFunction("__stack_chk_fail",
+                           Type::getVoidTy(F->getContext()), NULL);
+  CallInst::Create(StackChkFail, "", FailBB);
+  new UnreachableInst(F->getContext(), FailBB);
+  return FailBB;
+}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 000000000000..57cbe1ba5960
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,768 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+             cl::init(false), cl::Hidden,
+             cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<bool>
+ColorWithRegsOpt("color-ss-with-regs",
+                 cl::init(false), cl::Hidden,
+                 cl::desc("Color stack slots with free registers"));
+
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumRegRepl,    "Number of stack slot refs replaced with reg refs");
+STATISTIC(NumLoadElim,   "Number of loads eliminated");
+STATISTIC(NumStoreElim,  "Number of stores eliminated");
+STATISTIC(NumDead,       "Number of trivially dead stack accesses eliminated");
+
+namespace {
+  class StackSlotColoring : public MachineFunctionPass {
+    bool ColorWithRegs;
+    LiveStacks* LS;
+    VirtRegMap* VRM;
+    MachineFrameInfo *MFI;
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo  *TII;
+    const TargetRegisterInfo *TRI;
+    const MachineLoopInfo *loopInfo;
+
+    // SSIntervals - Spill slot intervals.
+    std::vector<LiveInterval*> SSIntervals;
+
+    // SSRefs - Keep a list of frame index references for each spill slot.
+    SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+    // OrigAlignments - Alignments of stack objects before coloring.
+    SmallVector<unsigned, 16> OrigAlignments;
+
+    // OrigSizes - Sizess of stack objects before coloring.
+    SmallVector<unsigned, 16> OrigSizes;
+
+    // AllColors - If index is set, it's a spill slot, i.e. color.
+    // FIXME: This assumes PEI locate spill slot with smaller indices
+    // closest to stack pointer / frame pointer. Therefore, smaller
+    // index == better color.
+    BitVector AllColors;
+
+    // NextColor - Next "color" that's not yet used.
+    int NextColor;
+
+    // UsedColors - "Colors" that have been assigned.
+    BitVector UsedColors;
+
+    // Assignments - Color to intervals mapping.
+    SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+  public:
+    static char ID; // Pass identification
+    StackSlotColoring() :
+      MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
+    StackSlotColoring(bool RegColor) :
+      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
+      AU.addRequired<LiveStacks>();
+      AU.addRequired<VirtRegMap>();
+      AU.addPreserved<VirtRegMap>();      
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char* getPassName() const {
+      return "Stack Slot Coloring";
+    }
+
+  private:
+    void InitializeSlots();
+    void ScanForSpillSlotRefs(MachineFunction &MF);
+    bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+    int ColorSlot(LiveInterval *li);
+    bool ColorSlots(MachineFunction &MF);
+    bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+                                SmallVector<SmallVector<int, 4>, 16> &RevMap,
+                                BitVector &SlotIsReg);
+    void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+                            MachineFunction &MF);
+    bool PropagateBackward(MachineBasicBlock::iterator MII,
+                           MachineBasicBlock *MBB,
+                           unsigned OldReg, unsigned NewReg);
+    bool PropagateForward(MachineBasicBlock::iterator MII,
+                          MachineBasicBlock *MBB,
+                          unsigned OldReg, unsigned NewReg);
+    void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+                                    unsigned Reg, const TargetRegisterClass *RC,
+                                    SmallSet<unsigned, 4> &Defs,
+                                    MachineFunction &MF);
+    bool AllMemRefsCanBeUnfolded(int SS);
+    bool RemoveDeadStores(MachineBasicBlock* MBB);
+  };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
+
+FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
+  return new StackSlotColoring(RegColor);
+}
+
+namespace {
+  // IntervalSorter - Comparison predicate that sort live intervals by
+  // their weight.
+  struct IntervalSorter {
+    bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+      return LHS->weight > RHS->weight;
+    }
+  };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+  SSRefs.resize(MFI->getObjectIndexEnd());
+
+  // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = &*MBBI;
+    unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+    for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+         MII != EE; ++MII) {
+      MachineInstr *MI = &*MII;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isFI())
+          continue;
+        int FI = MO.getIndex();
+        if (FI < 0)
+          continue;
+        if (!LS->hasInterval(FI))
+          continue;
+        LiveInterval &li = LS->getInterval(FI);
+        if (!MI->isDebugValue())
+          li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+        SSRefs[FI].push_back(MI);
+      }
+    }
+  }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+  int LastFI = MFI->getObjectIndexEnd();
+  OrigAlignments.resize(LastFI);
+  OrigSizes.resize(LastFI);
+  AllColors.resize(LastFI);
+  UsedColors.resize(LastFI);
+  Assignments.resize(LastFI);
+
+  // Gather all spill slots into a list.
+  DEBUG(dbgs() << "Spill slot intervals:\n");
+  for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+    LiveInterval &li = i->second;
+    DEBUG(li.dump());
+    int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+    if (MFI->isDeadObjectIndex(FI))
+      continue;
+    SSIntervals.push_back(&li);
+    OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+    OrigSizes[FI]      = MFI->getObjectSize(FI);
+    AllColors.set(FI);
+  }
+  DEBUG(dbgs() << '\n');
+
+  // Sort them by weight.
+  std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+  // Get first "color".
+  NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+  const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+  for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+    LiveInterval *OtherLI = OtherLIs[i];
+    if (OtherLI->overlaps(*li))
+      return true;
+  }
+  return false;
+}
+
+/// ColorSlotsWithFreeRegs - If there are any free registers available, try
+/// replacing spill slots references with registers instead.
+bool
+StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+                                   SmallVector<SmallVector<int, 4>, 16> &RevMap,
+                                   BitVector &SlotIsReg) {
+  if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
+    return false;
+
+  bool Changed = false;
+  DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+    LiveInterval *li = SSIntervals[i];
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+    if (!UsedColors[SS] || li->weight < 20)
+      // If the weight is < 20, i.e. two references in a loop with depth 1,
+      // don't bother with it.
+      continue;
+
+    // These slots allow to share the same registers.
+    bool AllColored = true;
+    SmallVector<unsigned, 4> ColoredRegs;
+    for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
+      int RSS = RevMap[SS][j];
+      const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
+      // If it's not colored to another stack slot, try coloring it
+      // to a "free" register.
+      if (!RC) {
+        AllColored = false;
+        continue;
+      }
+      unsigned Reg = VRM->getFirstUnusedRegister(RC);
+      if (!Reg) {
+        AllColored = false;
+        continue;
+      }
+      if (!AllMemRefsCanBeUnfolded(RSS)) {
+        AllColored = false;
+        continue;
+      } else {
+        DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
+                     << TRI->getName(Reg) << '\n');
+        ColoredRegs.push_back(Reg);
+        SlotMapping[RSS] = Reg;
+        SlotIsReg.set(RSS);
+        Changed = true;
+      }
+    }
+
+    // Register and its sub-registers are no longer free.
+    while (!ColoredRegs.empty()) {
+      unsigned Reg = ColoredRegs.back();
+      ColoredRegs.pop_back();
+      VRM->setRegisterUsed(Reg);
+      // If reg is a callee-saved register, it will have to be spilled in
+      // the prologue.
+      MRI->setPhysRegUsed(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        VRM->setRegisterUsed(*AS);
+        MRI->setPhysRegUsed(*AS);
+      }
+    }
+    // This spill slot is dead after the rewrites
+    if (AllColored) {
+      MFI->RemoveStackObject(SS);
+      ++NumEliminated;
+    }
+  }
+  DEBUG(dbgs() << '\n');
+
+  return Changed;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+  int Color = -1;
+  bool Share = false;
+  if (!DisableSharing) {
+    // Check if it's possible to reuse any of the used colors.
+    Color = UsedColors.find_first();
+    while (Color != -1) {
+      if (!OverlapWithAssignments(li, Color)) {
+        Share = true;
+        ++NumEliminated;
+        break;
+      }
+      Color = UsedColors.find_next(Color);
+    }
+  }
+
+  // Assign it to the first available color (assumed to be the best) if it's
+  // not possible to share a used color with other objects.
+  if (!Share) {
+    assert(NextColor != -1 && "No more spill slots?");
+    Color = NextColor;
+    UsedColors.set(Color);
+    NextColor = AllColors.find_next(NextColor);
+  }
+
+  // Record the assignment.
+  Assignments[Color].push_back(li);
+  int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+  DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+  // Change size and alignment of the allocated slot. If there are multiple
+  // objects sharing the same slot, then make sure the size and alignment
+  // are large enough for all.
+  unsigned Align = OrigAlignments[FI];
+  if (!Share || Align > MFI->getObjectAlignment(Color))
+    MFI->setObjectAlignment(Color, Align);
+  int64_t Size = OrigSizes[FI];
+  if (!Share || Size > MFI->getObjectSize(Color))
+    MFI->setObjectSize(Color, Size);
+  return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+  unsigned NumObjs = MFI->getObjectIndexEnd();
+  SmallVector<int, 16> SlotMapping(NumObjs, -1);
+  SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+  SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+  BitVector SlotIsReg(NumObjs);
+  BitVector UsedColors(NumObjs);
+
+  DEBUG(dbgs() << "Color spill slot intervals:\n");
+  bool Changed = false;
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+    LiveInterval *li = SSIntervals[i];
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+    int NewSS = ColorSlot(li);
+    assert(NewSS >= 0 && "Stack coloring failed?");
+    SlotMapping[SS] = NewSS;
+    RevMap[NewSS].push_back(SS);
+    SlotWeights[NewSS] += li->weight;
+    UsedColors.set(NewSS);
+    Changed |= (SS != NewSS);
+  }
+
+  DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+    LiveInterval *li = SSIntervals[i];
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+    li->weight = SlotWeights[SS];
+  }
+  // Sort them by new weight.
+  std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+    DEBUG(SSIntervals[i]->dump());
+  DEBUG(dbgs() << '\n');
+#endif
+
+  // Can we "color" a stack slot with a unused register?
+  Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
+
+  if (!Changed)
+    return false;
+
+  // Rewrite all MO_FrameIndex operands.
+  SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+  for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+    bool isReg = SlotIsReg[SS];
+    int NewFI = SlotMapping[SS];
+    if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+      continue;
+
+    const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
+    SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+    for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+      if (!isReg)
+        RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+      else {
+        // Rewrite to use a register instead.
+        unsigned MBBId = RefMIs[i]->getParent()->getNumber();
+        SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
+        UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
+      }
+  }
+
+  // Delete unused stack slots.
+  while (NextColor != -1) {
+    DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+    MFI->RemoveStackObject(NextColor);
+    NextColor = AllColors.find_next(NextColor);
+  }
+
+  return true;
+}
+
+/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
+/// spill slot index can be unfolded.
+bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
+  SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+  for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
+    MachineInstr *MI = RefMIs[i];
+    if (TII->isLoadFromStackSlot(MI, SS) ||
+        TII->isStoreToStackSlot(MI, SS))
+      // Restore and spill will become copies.
+      return true;
+    if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
+      return false;
+    for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+      MachineOperand &MO = MI->getOperand(j);
+      if (MO.isFI() && MO.getIndex() != SS)
+        // If it uses another frameindex, we can, currently* unfold it.
+        return false;
+    }
+  }
+  return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+                                           int NewFI, MachineFunction &MF) {
+  // Update the operands.
+  for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isFI())
+      continue;
+    int FI = MO.getIndex();
+    if (FI != OldFI)
+      continue;
+    MO.setIndex(NewFI);
+  }
+
+  // Update the memory references. This changes the MachineMemOperands
+  // directly. They may be in use by multiple instructions, however all
+  // instructions using OldFI are being rewritten to use NewFI.
+  const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+  const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I)
+    if ((*I)->getValue() == OldSV)
+      (*I)->setValue(NewSV);
+}
+
+/// PropagateBackward - Traverse backward and look for the definition of
+/// OldReg. If it can successfully update all of the references with NewReg,
+/// do so and return true.
+bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
+                                          MachineBasicBlock *MBB,
+                                          unsigned OldReg, unsigned NewReg) {
+  if (MII == MBB->begin())
+    return false;
+
+  SmallVector<MachineOperand*, 4> Uses;
+  SmallVector<MachineOperand*, 4> Refs;
+  while (--MII != MBB->begin()) {
+    bool FoundDef = false;  // Not counting 2address def.
+
+    Uses.clear();
+    const MCInstrDesc &MCID = MII->getDesc();
+    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MII->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0)
+        continue;
+      if (Reg == OldReg) {
+        if (MO.isImplicit())
+          return false;
+
+        // Abort the use is actually a sub-register def. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg() || MII->isSubregToReg())
+          return false;
+
+        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+        if (RC && !RC->contains(NewReg))
+          return false;
+
+        if (MO.isUse()) {
+          Uses.push_back(&MO);
+        } else {
+          Refs.push_back(&MO);
+          if (!MII->isRegTiedToUseOperand(i))
+            FoundDef = true;
+        }
+      } else if (TRI->regsOverlap(Reg, NewReg)) {
+        return false;
+      } else if (TRI->regsOverlap(Reg, OldReg)) {
+        if (!MO.isUse() || !MO.isKill())
+          return false;
+      }
+    }
+
+    if (FoundDef) {
+      // Found non-two-address def. Stop here.
+      for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+        Refs[i]->setReg(NewReg);
+      return true;
+    }
+
+    // Two-address uses must be updated as well.
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+      Refs.push_back(Uses[i]);
+  }
+  return false;
+}
+
+/// PropagateForward - Traverse forward and look for the kill of OldReg. If
+/// it can successfully update all of the uses with NewReg, do so and
+/// return true.
+bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
+                                         MachineBasicBlock *MBB,
+                                         unsigned OldReg, unsigned NewReg) {
+  if (MII == MBB->end())
+    return false;
+
+  SmallVector<MachineOperand*, 4> Uses;
+  while (++MII != MBB->end()) {
+    bool FoundKill = false;
+    const MCInstrDesc &MCID = MII->getDesc();
+    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MII->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0)
+        continue;
+      if (Reg == OldReg) {
+        if (MO.isDef() || MO.isImplicit())
+          return false;
+
+        // Abort the use is actually a sub-register use. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg())
+          return false;
+
+        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+        if (RC && !RC->contains(NewReg))
+          return false;
+        if (MO.isKill())
+          FoundKill = true;
+
+        Uses.push_back(&MO);
+      } else if (TRI->regsOverlap(Reg, NewReg) ||
+                 TRI->regsOverlap(Reg, OldReg))
+        return false;
+    }
+    if (FoundKill) {
+      for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+        Uses[i]->setReg(NewReg);
+      return true;
+    }
+  }
+  return false;
+}
+
+/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
+/// folded memory references and replacing those references with register
+/// references instead.
+void
+StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+                                               unsigned Reg,
+                                               const TargetRegisterClass *RC,
+                                               SmallSet<unsigned, 4> &Defs,
+                                               MachineFunction &MF) {
+  MachineBasicBlock *MBB = MI->getParent();
+  if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
+    if (PropagateForward(MI, MBB, DstReg, Reg)) {
+      DEBUG(dbgs() << "Eliminated load: ");
+      DEBUG(MI->dump());
+      ++NumLoadElim;
+    } else {
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+              DstReg).addReg(Reg);
+      ++NumRegRepl;
+    }
+
+    if (!Defs.count(Reg)) {
+      // If this is the first use of Reg in this MBB and it wasn't previously
+      // defined in MBB, add it to livein.
+      MBB->addLiveIn(Reg);
+      Defs.insert(Reg);
+    }
+  } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
+    if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
+      DEBUG(dbgs() << "Eliminated store: ");
+      DEBUG(MI->dump());
+      ++NumStoreElim;
+    } else {
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+        .addReg(SrcReg);
+      ++NumRegRepl;
+    }
+
+    // Remember reg has been defined in MBB.
+    Defs.insert(Reg);
+  } else {
+    SmallVector<MachineInstr*, 4> NewMIs;
+    bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
+    (void)Success; // Silence compiler warning.
+    assert(Success && "Failed to unfold!");
+    MachineInstr *NewMI = NewMIs[0];
+    MBB->insert(MI, NewMI);
+    ++NumRegRepl;
+
+    if (NewMI->readsRegister(Reg)) {
+      if (!Defs.count(Reg))
+        // If this is the first use of Reg in this MBB and it wasn't previously
+        // defined in MBB, add it to livein.
+        MBB->addLiveIn(Reg);
+      Defs.insert(Reg);
+    }
+  }
+  MBB->erase(MI);
+}
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores.  If they're both using the same stack slot, then the store is
+/// definitely dead.  This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+  // FIXME: This could be much more aggressive, but we need to investigate
+  // the compile time impact of doing so.
+  bool changed = false;
+
+  SmallVector<MachineInstr*, 4> toErase;
+
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+       I != E; ++I) {
+    if (DCELimit != -1 && (int)NumDead >= DCELimit)
+      break;
+    
+    MachineBasicBlock::iterator NextMI = llvm::next(I);
+    if (NextMI == MBB->end()) continue;
+    
+    int FirstSS, SecondSS;
+    unsigned LoadReg = 0;
+    unsigned StoreReg = 0;
+    if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+    if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+    if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+    
+    ++NumDead;
+    changed = true;
+    
+    if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+      ++NumDead;
+      toErase.push_back(I);
+    }
+    
+    toErase.push_back(NextMI);
+    ++I;
+  }
+  
+  for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+       E = toErase.end(); I != E; ++I)
+    (*I)->eraseFromParent();
+  
+  return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+      dbgs() << "********** Stack Slot Coloring **********\n"
+             << "********** Function: " 
+             << MF.getFunction()->getName() << '\n';
+    });
+
+  MFI = MF.getFrameInfo();
+  MRI = &MF.getRegInfo(); 
+  TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  LS = &getAnalysis<LiveStacks>();
+  VRM = &getAnalysis<VirtRegMap>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+
+  bool Changed = false;
+
+  unsigned NumSlots = LS->getNumIntervals();
+  if (NumSlots < 2) {
+    if (NumSlots == 0 || !VRM->HasUnusedRegisters())
+      // Nothing to do!
+      return false;
+  }
+
+  // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+  // coloring. The stack could be modified before the longjmp is executed,
+  // resulting in the wrong value being used afterwards. (See
+  // <rdar://problem/8007500>.)
+  if (MF.callsSetJmp())
+    return false;
+
+  // Gather spill slot references
+  ScanForSpillSlotRefs(MF);
+  InitializeSlots();
+  Changed = ColorSlots(MF);
+
+  NextColor = -1;
+  SSIntervals.clear();
+  for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+    SSRefs[i].clear();
+  SSRefs.clear();
+  OrigAlignments.clear();
+  OrigSizes.clear();
+  AllColors.clear();
+  UsedColors.clear();
+  for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+    Assignments[i].clear();
+  Assignments.clear();
+
+  if (Changed) {
+    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+      Changed |= RemoveDeadStores(I);
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 000000000000..227eb47e6827
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,829 @@
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
+// 
+//    Budimlic, et al. Fast copy coalescing and live-range identification.
+//    In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+//    Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+//    PLDI '02. ACM, New York, NY, 25-32.
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest.  This technique is explained in:
+//
+//   Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+//     Quality and Efficiency,
+//   In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+//   Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+//   CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+  class StrongPHIElimination : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    StrongPHIElimination() : MachineFunctionPass(ID) {
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage&) const;
+    bool runOnMachineFunction(MachineFunction&);
+
+  private:
+    /// This struct represents a single node in the union-find data structure
+    /// representing the variable congruence classes. There is one difference
+    /// from a normal union-find data structure. We steal two bits from the parent
+    /// pointer . One of these bits is used to represent whether the register
+    /// itself has been isolated, and the other is used to represent whether the
+    /// PHI with that register as its destination has been isolated.
+    ///
+    /// Note that this leads to the strange situation where the leader of a
+    /// congruence class may no longer logically be a member, due to being
+    /// isolated.
+    struct Node {
+      enum Flags {
+        kRegisterIsolatedFlag = 1,
+        kPHIIsolatedFlag = 2
+      };
+      Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+      Node *getLeader();
+
+      PointerIntPair<Node*, 2> parent;
+      unsigned value;
+      unsigned rank;
+    };
+
+    /// Add a register in a new congruence class containing only itself.
+    void addReg(unsigned);
+
+    /// Join the congruence classes of two registers. This function is biased
+    /// towards the left argument, i.e. after
+    ///
+    /// addReg(r2);
+    /// unionRegs(r1, r2);
+    ///
+    /// the leader of the unioned congruence class is the same as the leader of
+    /// r1's congruence class prior to the union. This is actually relied upon
+    /// in the copy insertion code.
+    void unionRegs(unsigned, unsigned);
+
+    /// Get the color of a register. The color is 0 if the register has been
+    /// isolated.
+    unsigned getRegColor(unsigned);
+
+    // Isolate a register.
+    void isolateReg(unsigned);
+
+    /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+    /// isolated. Otherwise, it is the original color of its destination and
+    /// all of its operands (before they were isolated, if they were).
+    unsigned getPHIColor(MachineInstr*);
+
+    /// Isolate a PHI.
+    void isolatePHI(MachineInstr*);
+
+    /// Traverses a basic block, splitting any interferences found between
+    /// registers in the same congruence class. It takes two DenseMaps as
+    /// arguments that it also updates: CurrentDominatingParent, which maps
+    /// a color to the register in that congruence class whose definition was
+    /// most recently seen, and ImmediateDominatingParent, which maps a register
+    /// to the register in the same congruence class that most immediately
+    /// dominates it.
+    ///
+    /// This function assumes that it is being called in a depth-first traversal
+    /// of the dominator tree.
+    void SplitInterferencesForBasicBlock(
+      MachineBasicBlock&,
+      DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+      DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+    // Lowers a PHI instruction, inserting copies of the source and destination
+    // registers as necessary.
+    void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+    // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+    // everywhere that Reg appears. Requires Reg and NewReg to have non-
+    // overlapping lifetimes.
+    void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    MachineDominatorTree *DT;
+    LiveIntervals *LI;
+
+    BumpPtrAllocator Allocator;
+
+    DenseMap<unsigned, Node*> RegNodeMap;
+
+    // Maps a basic block to a list of its defs of registers that appear as PHI
+    // sources.
+    DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+    // Maps a color to a pair of a MachineInstr* and a virtual register, which
+    // is the operand of that PHI corresponding to the current basic block.
+    DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+    // FIXME: Can these two data structures be combined? Would a std::multimap
+    // be any better?
+
+    // Stores pairs of predecessor basic blocks and the source registers of
+    // inserted copy instructions.
+    typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+    SrcCopySet InsertedSrcCopySet;
+
+    // Maps pairs of predecessor basic blocks and colors to their defining copy
+    // instructions.
+    typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+      SrcCopyMap;
+    SrcCopyMap InsertedSrcCopyMap;
+
+    // Maps inserted destination copy registers to their defining copy
+    // instructions.
+    typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+    DestCopyMap InsertedDestCopies;
+  };
+
+  struct MIIndexCompare {
+    MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
+    }
+
+    LiveIntervals *LI;
+  };
+} // namespace
+
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
+
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+  // FIXME: This only needs to check from the first terminator, as only the
+  // first terminator can use a virtual register.
+  for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+    assert (RI != MBB->rend());
+    MachineInstr *MI = &*RI;
+
+    for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+         OE = MI->operands_end(); OI != OE; ++OI) {
+      MachineOperand &MO = *OI;
+      if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+        return &MO;
+    }
+  }
+  return NULL;
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+  TII = MF.getTarget().getInstrInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<LiveIntervals>();
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+      PHISrcDefs[I].push_back(BBI);
+
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        MachineOperand &SrcMO = BBI->getOperand(i);
+        unsigned SrcReg = SrcMO.getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
+
+        MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+        if (DefMI)
+          PHISrcDefs[DefMI->getParent()].push_back(DefMI);
+      }
+    }
+  }
+
+  // Perform a depth-first traversal of the dominator tree, splitting
+  // interferences amongst PHI-congruence classes.
+  DenseMap<unsigned, unsigned> CurrentDominatingParent;
+  DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+    SplitInterferencesForBasicBlock(*DI->getBlock(),
+                                    CurrentDominatingParent,
+                                    ImmediateDominatingParent);
+  }
+
+  // Insert copies for all PHI source and destination registers.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      InsertCopiesForPHI(BBI, I);
+    }
+  }
+
+  // FIXME: Preserve the equivalence classes during copy insertion and use
+  // the preversed equivalence classes instead of recomputing them.
+  RegNodeMap.clear();
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        unsigned SrcReg = BBI->getOperand(i).getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
+      }
+    }
+  }
+
+  DenseMap<unsigned, unsigned> RegRenamingMap;
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+    while (BBI != BBE && BBI->isPHI()) {
+      MachineInstr *PHI = BBI;
+
+      assert(PHI->getNumOperands() > 0);
+
+      unsigned SrcReg = PHI->getOperand(1).getReg();
+      unsigned SrcColor = getRegColor(SrcReg);
+      unsigned NewReg = RegRenamingMap[SrcColor];
+      if (!NewReg) {
+        NewReg = SrcReg;
+        RegRenamingMap[SrcColor] = SrcReg;
+      }
+      MergeLIsAndRename(SrcReg, NewReg);
+
+      unsigned DestReg = PHI->getOperand(0).getReg();
+      if (!InsertedDestCopies.count(DestReg))
+        MergeLIsAndRename(DestReg, NewReg);
+
+      for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+        unsigned SrcReg = PHI->getOperand(i).getReg();
+        MergeLIsAndRename(SrcReg, NewReg);
+      }
+
+      ++BBI;
+      LI->RemoveMachineInstrFromMaps(PHI);
+      PHI->eraseFromParent();
+      Changed = true;
+    }
+  }
+
+  // Due to the insertion of copies to split live ranges, the live intervals are
+  // guaranteed to not overlap, except in one case: an original PHI source and a
+  // PHI destination copy. In this case, they have the same value and thus don't
+  // truly intersect, so we merge them into the value live at that point.
+  // FIXME: Is there some better way we can handle this?
+  for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+       E = InsertedDestCopies.end(); I != E; ++I) {
+    unsigned DestReg = I->first;
+    unsigned DestColor = getRegColor(DestReg);
+    unsigned NewReg = RegRenamingMap[DestColor];
+
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+    LiveInterval &NewLI = LI->getInterval(NewReg);
+
+    assert(DestLI.ranges.size() == 1
+           && "PHI destination copy's live interval should be a single live "
+               "range from the beginning of the BB to the copy instruction.");
+    LiveRange *DestLR = DestLI.begin();
+    VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+    if (!NewVNI) {
+      NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+      MachineInstr *CopyInstr = I->second;
+      CopyInstr->getOperand(1).setIsKill(true);
+    }
+
+    LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+    NewLI.addRange(NewLR);
+
+    LI->removeInterval(DestReg);
+    MRI->replaceRegWith(DestReg, NewReg);
+  }
+
+  // Adjust the live intervals of all PHI source registers to handle the case
+  // where the PHIs in successor blocks were the only later uses of the source
+  // register.
+  for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+       E = InsertedSrcCopySet.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I->first;
+    unsigned SrcReg = I->second;
+    if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+      SrcReg = RenamedRegister;
+
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+    bool isLiveOut = false;
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+      if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+        isLiveOut = true;
+        break;
+      }
+    }
+
+    if (isLiveOut)
+      continue;
+
+    MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+    assert(LastUse);
+    SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+    LastUse->setIsKill(true);
+  }
+
+  LI->renumber();
+
+  Allocator.Reset();
+  RegNodeMap.clear();
+  PHISrcDefs.clear();
+  InsertedSrcCopySet.clear();
+  InsertedSrcCopyMap.clear();
+  InsertedDestCopies.clear();
+
+  return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+  if (RegNodeMap.count(Reg))
+    return;
+  RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+  Node *N = this;
+  Node *Parent = parent.getPointer();
+  Node *Grandparent = Parent->parent.getPointer();
+
+  while (Parent != Grandparent) {
+    N->parent.setPointer(Grandparent);
+    N = Grandparent;
+    Parent = Parent->parent.getPointer();
+    Grandparent = Parent->parent.getPointer();
+  }
+
+  return Parent;
+}
+
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+  DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+  if (RI == RegNodeMap.end())
+    return 0;
+  Node *Node = RI->second;
+  if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+    return 0;
+  return Node->getLeader()->value;
+}
+
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+  Node *Node1 = RegNodeMap[Reg1]->getLeader();
+  Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+  if (Node1->rank > Node2->rank) {
+    Node2->parent.setPointer(Node1->getLeader());
+  } else if (Node1->rank < Node2->rank) {
+    Node1->parent.setPointer(Node2->getLeader());
+  } else if (Node1 != Node2) {
+    Node2->parent.setPointer(Node1->getLeader());
+    Node1->rank++;
+  }
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+  Node *Node = RegNodeMap[Reg];
+  Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  Node *DestNode = RegNodeMap[DestReg];
+  if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+    return 0;
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+    if (SrcColor)
+      return SrcColor;
+  }
+  return 0;
+}
+
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+  Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+  Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+///    congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+///    same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+///   def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+/// 
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+    MachineBasicBlock &MBB,
+    DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+    DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+  // Sort defs by their order in the original basic block, as the code below
+  // assumes that it is processing definitions in dominance order.
+  std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+  std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+  for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+       BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+    for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+         E = (*BBI)->operands_end(); I != E; ++I) {
+      const MachineOperand &MO = *I;
+
+      // FIXME: This would be faster if it were possible to bail out of checking
+      // an instruction's operands after the explicit defs, but this is incorrect
+      // for variadic instructions, which may appear before register allocation
+      // in the future.
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      unsigned DestReg = MO.getReg();
+      if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+        continue;
+
+      // If the virtual register being defined is not used in any PHI or has
+      // already been isolated, then there are no more interferences to check.
+      unsigned DestColor = getRegColor(DestReg);
+      if (!DestColor)
+        continue;
+
+      // The input to this pass sometimes is not in SSA form in every basic
+      // block, as some virtual registers have redefinitions. We could eliminate
+      // this by fixing the passes that generate the non-SSA code, or we could
+      // handle it here by tracking defining machine instructions rather than
+      // virtual registers. For now, we just handle the situation conservatively
+      // in a way that will possibly lead to false interferences.
+      unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+      unsigned NewParent = CurrentParent;
+      if (NewParent == DestReg)
+        continue;
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+                           || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+
+      // If NewParent is nonzero, then its definition dominates the current
+      // instruction, so it is only necessary to check for the liveness of
+      // NewParent in order to check for an interference.
+      if (NewParent
+          && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+        // If there is an interference, always isolate the new register. This
+        // could be improved by using a heuristic that decides which of the two
+        // registers to isolate.
+        isolateReg(DestReg);
+        CurrentParent = NewParent;
+      } else {
+        // If there is no interference, update ImmediateDominatingParent and set
+        // the CurrentDominatingParent for this color to the current register.
+        ImmediateDominatingParent[DestReg] = NewParent;
+        CurrentParent = DestReg;
+      }
+    }
+  }
+
+  // We now walk the PHIs in successor blocks and check for interferences. This
+  // is necessary because the use of a PHI's operands are logically contained in
+  // the predecessor block. The def of a PHI's destination register is processed
+  // along with the other defs in a basic block.
+
+  CurrentPHIForColor.clear();
+
+  for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+       SE = MBB.succ_end(); SI != SE; ++SI) {
+    for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      MachineInstr *PHI = BBI;
+
+      // If a PHI is already isolated, either by being isolated directly or
+      // having all of its operands isolated, ignore it.
+      unsigned Color = getPHIColor(PHI);
+      if (!Color)
+        continue;
+
+      // Find the index of the PHI operand that corresponds to this basic block.
+      unsigned PredIndex;
+      for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+        if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+          break;
+      }
+      assert(PredIndex < PHI->getNumOperands());
+      unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      unsigned &CurrentParent = CurrentDominatingParent[Color];
+      unsigned NewParent = CurrentParent;
+      while (NewParent
+             && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+                 || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+      CurrentParent = NewParent;
+
+      // If there is an interference with a register, always isolate the
+      // register rather than the PHI. It is also possible to isolate the
+      // PHI, but that introduces copies for all of the registers involved
+      // in that PHI.
+      if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+                    && NewParent != PredOperandReg)
+        isolateReg(NewParent);
+
+      std::pair<MachineInstr*, unsigned>
+        &CurrentPHI = CurrentPHIForColor[Color];
+
+      // If two PHIs have the same operand from every shared predecessor, then
+      // they don't actually interfere. Otherwise, isolate the current PHI. This
+      // could possibly be improved, e.g. we could isolate the PHI with the
+      // fewest operands.
+      if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+        isolatePHI(PHI);
+      else
+        CurrentPHI = std::make_pair(PHI, PredOperandReg);
+    }
+  }
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+                                              MachineBasicBlock *MBB) {
+  assert(PHI->isPHI());
+  ++NumPHIsLowered;
+  unsigned PHIColor = getPHIColor(PHI);
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    MachineOperand &SrcMO = PHI->getOperand(i);
+
+    // If a source is defined by an implicit def, there is no need to insert a
+    // copy in the predecessor.
+    if (SrcMO.isUndef())
+      continue;
+
+    unsigned SrcReg = SrcMO.getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+           "Machine PHI Operands must all be virtual registers!");
+
+    MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+    unsigned SrcColor = getRegColor(SrcReg);
+
+    // If neither the PHI nor the operand were isolated, then we only need to
+    // set the phi-kill flag on the VNInfo at this PHI.
+    if (PHIColor && SrcColor == PHIColor) {
+      LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+      SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+      VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+      assert(SrcVNI);
+      SrcVNI->setHasPHIKill(true);
+      continue;
+    }
+
+    unsigned CopyReg = 0;
+    if (PHIColor) {
+      SrcCopyMap::const_iterator I
+        = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+      CopyReg
+        = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+    }
+
+    if (!CopyReg) {
+      const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+      CopyReg = MRI->createVirtualRegister(RC);
+
+      MachineBasicBlock::iterator
+        CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+      unsigned SrcSubReg = SrcMO.getSubReg();
+      MachineInstr *CopyInstr = BuildMI(*PredBB,
+                                        CopyInsertPoint,
+                                        PHI->getDebugLoc(),
+                                        TII->get(TargetOpcode::COPY),
+                                        CopyReg).addReg(SrcReg, 0, SrcSubReg);
+      LI->InsertMachineInstrInMaps(CopyInstr);
+      ++NumSrcCopiesInserted;
+
+      // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+      // the newly added range.
+      LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+      InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+      addReg(CopyReg);
+      if (PHIColor) {
+        unionRegs(PHIColor, CopyReg);
+        assert(getRegColor(CopyReg) != CopyReg);
+      } else {
+        PHIColor = CopyReg;
+        assert(getRegColor(CopyReg) == CopyReg);
+      }
+
+      if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+        InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
+    }
+
+    SrcMO.setReg(CopyReg);
+
+    // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+    // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+    // processed later, but this is still correct to do at this point because we
+    // never rely on LiveIntervals being correct while inserting copies.
+    // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+    // pass does?
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+    if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+      SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
+  }
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  unsigned DestColor = getRegColor(DestReg);
+
+  if (PHIColor && DestColor == PHIColor) {
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+
+    // Set the phi-def flag for the VN at this PHI.
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+    assert(DestVNI);
+    DestVNI->setIsPHIDef(true);
+  
+    // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+    // instruction. After PHI elimination, PHI instructions are replaced by VNs
+    // with the phi-def flag set, and the live ranges of these VNs start at the
+    // beginning of the basic block.
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    DestVNI->def = MBBStartIndex;
+    DestLI.addRange(LiveRange(MBBStartIndex,
+                              PHIIndex.getDefIndex(),
+                              DestVNI));
+    return;
+  }
+
+  const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+  unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+  MachineInstr *CopyInstr = BuildMI(*MBB,
+                                    MBB->SkipPHIsAndLabels(MBB->begin()),
+                                    PHI->getDebugLoc(),
+                                    TII->get(TargetOpcode::COPY),
+                                    DestReg).addReg(CopyReg);
+  LI->InsertMachineInstrInMaps(CopyInstr);
+  PHI->getOperand(0).setReg(CopyReg);
+  ++NumDestCopiesInserted;
+
+  // Add the region from the beginning of MBB to the copy instruction to
+  // CopyReg's live interval, and give the VNInfo the phidef flag.
+  LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+  SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+  SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+  VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+                                        CopyInstr,
+                                        LI->getVNInfoAllocator());
+  CopyVNI->setIsPHIDef(true);
+  CopyLI.addRange(LiveRange(MBBStartIndex,
+                            DestCopyIndex.getDefIndex(),
+                            CopyVNI));
+
+  // Adjust DestReg's live interval to adjust for its new definition at
+  // CopyInstr.
+  LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+  SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+  DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+  assert(DestVNI);
+  DestVNI->def = DestCopyIndex.getDefIndex();
+
+  InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+  if (Reg == NewReg)
+    return;
+
+  LiveInterval &OldLI = LI->getInterval(Reg);
+  LiveInterval &NewLI = LI->getInterval(NewReg);
+
+  // Merge the live ranges of the two registers.
+  DenseMap<VNInfo*, VNInfo*> VNMap;
+  for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+       LRI != LRE; ++LRI) {
+    LiveRange OldLR = *LRI;
+    VNInfo *OldVN = OldLR.valno;
+
+    VNInfo *&NewVN = VNMap[OldVN];
+    if (!NewVN) {
+      NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+      VNMap[OldVN] = NewVN;
+    }
+
+    LiveRange LR(OldLR.start, OldLR.end, NewVN);
+    NewLI.addRange(LR);
+  }
+
+  // Remove the LiveInterval for the register being renamed and replace all
+  // of its defs and uses with the new register.
+  LI->removeInterval(Reg);
+  MRI->replaceRegWith(Reg, NewReg);
+}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 000000000000..6b801cbf6e1e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,938 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumTails     , "Number of tails duplicated");
+STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs , "Number of phis added");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+                  cl::desc("Maximum instructions to consider tail duplicating"),
+                  cl::init(2), cl::Hidden);
+
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+              cl::desc("Verify sanity of PHI instructions during taildup"),
+              cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
+namespace {
+  /// TailDuplicatePass - Perform tail duplication.
+  class TailDuplicatePass : public MachineFunctionPass {
+    bool PreRegAlloc;
+    const TargetInstrInfo *TII;
+    MachineModuleInfo *MMI;
+    MachineRegisterInfo *MRI;
+
+    // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+    SmallVector<unsigned, 16> SSAUpdateVRs;
+
+    // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+    // source virtual registers.
+    DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+
+  public:
+    static char ID;
+    explicit TailDuplicatePass(bool PreRA) :
+      MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Tail Duplication"; }
+
+  private:
+    void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+                           MachineBasicBlock *BB);
+    void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+                    MachineBasicBlock *PredBB,
+                    DenseMap<unsigned, unsigned> &LocalVRMap,
+                    SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+                    const DenseSet<unsigned> &UsedByPhi,
+                    bool Remove);
+    void DuplicateInstruction(MachineInstr *MI,
+                              MachineBasicBlock *TailBB,
+                              MachineBasicBlock *PredBB,
+                              MachineFunction &MF,
+                              DenseMap<unsigned, unsigned> &LocalVRMap,
+                              const DenseSet<unsigned> &UsedByPhi);
+    void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+                              SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                              SmallSetVector<MachineBasicBlock*, 8> &Succs);
+    bool TailDuplicateBlocks(MachineFunction &MF);
+    bool shouldTailDuplicate(const MachineFunction &MF,
+                             bool IsSimple, MachineBasicBlock &TailBB);
+    bool isSimpleBB(MachineBasicBlock *TailBB);
+    bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+    bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+                           SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                           const DenseSet<unsigned> &RegsUsedByPhi,
+                           SmallVector<MachineInstr*, 16> &Copies);
+    bool TailDuplicate(MachineBasicBlock *TailBB,
+                       bool IsSimple,
+                       MachineFunction &MF,
+                       SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                       SmallVector<MachineInstr*, 16> &Copies);
+    bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+                                bool IsSimple,
+                                MachineFunction &MF);
+
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+  };
+
+  char TailDuplicatePass::ID = 0;
+}
+
+FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
+  return new TailDuplicatePass(PreRegAlloc);
+}
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+  bool MadeChange = false;
+  while (TailDuplicateBlocks(MF))
+    MadeChange = true;
+
+  return MadeChange;
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+                                                MBB->pred_end());
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != MBB->end()) {
+      if (!MI->isPHI())
+        break;
+      for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+             PE = Preds.end(); PI != PE; ++PI) {
+        MachineBasicBlock *PredBB = *PI;
+        bool Found = false;
+        for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+          MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+          if (PHIBB == PredBB) {
+            Found = true;
+            break;
+          }
+        }
+        if (!Found) {
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  missing input from predecessor BB#"
+                 << PredBB->getNumber() << '\n';
+          llvm_unreachable(0);
+        }
+      }
+
+      for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+        MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+        if (CheckExtra && !Preds.count(PHIBB)) {
+          dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+                 << ": " << *MI;
+          dbgs() << "  extra input from predecessor BB#"
+                 << PHIBB->getNumber() << '\n';
+          llvm_unreachable(0);
+        }
+        if (PHIBB->getNumber() < 0) {
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
+          llvm_unreachable(0);
+        }
+      }
+      ++MI;
+    }
+  }
+}
+
+/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+bool
+TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+                                          bool IsSimple,
+                                          MachineFunction &MF) {
+  // Save the successors list.
+  SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+                                              MBB->succ_end());
+
+  SmallVector<MachineBasicBlock*, 8> TDBBs;
+  SmallVector<MachineInstr*, 16> Copies;
+  if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
+    return false;
+
+  ++NumTails;
+
+  SmallVector<MachineInstr*, 8> NewPHIs;
+  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+  // TailBB's immediate successors are now successors of those predecessors
+  // which duplicated TailBB. Add the predecessors as sources to the PHI
+  // instructions.
+  bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+  if (PreRegAlloc)
+    UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+  // If it is dead, remove it.
+  if (isDead) {
+    NumInstrDups -= MBB->size();
+    RemoveDeadBlock(MBB);
+    ++NumDeadBlocks;
+  }
+
+  // Update SSA form.
+  if (!SSAUpdateVRs.empty()) {
+    for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+      unsigned VReg = SSAUpdateVRs[i];
+      SSAUpdate.Initialize(VReg);
+
+      // If the original definition is still around, add it as an available
+      // value.
+      MachineInstr *DefMI = MRI->getVRegDef(VReg);
+      MachineBasicBlock *DefBB = 0;
+      if (DefMI) {
+        DefBB = DefMI->getParent();
+        SSAUpdate.AddAvailableValue(DefBB, VReg);
+      }
+
+      // Add the new vregs as available values.
+      DenseMap<unsigned, AvailableValsTy>::iterator LI =
+        SSAUpdateVals.find(VReg);
+      for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+        MachineBasicBlock *SrcBB = LI->second[j].first;
+        unsigned SrcReg = LI->second[j].second;
+        SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+      }
+
+      // Rewrite uses that are outside of the original def's block.
+      MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+      while (UI != MRI->use_end()) {
+        MachineOperand &UseMO = UI.getOperand();
+        MachineInstr *UseMI = &*UI;
+        ++UI;
+        if (UseMI->isDebugValue()) {
+          // SSAUpdate can replace the use with an undef. That creates
+          // a debug instruction that is a kill.
+          // FIXME: Should it SSAUpdate job to delete debug instructions
+          // instead of replacing the use with undef?
+          UseMI->eraseFromParent();
+          continue;
+        }
+        if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+          continue;
+        SSAUpdate.RewriteUse(UseMO);
+      }
+    }
+
+    SSAUpdateVRs.clear();
+    SSAUpdateVals.clear();
+  }
+
+  // Eliminate some of the copies inserted by tail duplication to maintain
+  // SSA form.
+  for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+    MachineInstr *Copy = Copies[i];
+    if (!Copy->isCopy())
+      continue;
+    unsigned Dst = Copy->getOperand(0).getReg();
+    unsigned Src = Copy->getOperand(1).getReg();
+    MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+    if (++UI == MRI->use_end()) {
+      // Copy is the only use. Do trivial copy propagation here.
+      MRI->replaceRegWith(Dst, Src);
+      Copy->eraseFromParent();
+    }
+  }
+
+  if (NewPHIs.size())
+    NumAddedPHIs += NewPHIs.size();
+
+  return true;
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  if (PreRegAlloc && TailDupVerify) {
+    DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+    VerifyPHIs(MF, true);
+  }
+
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+
+    if (NumTails == TailDupLimit)
+      break;
+
+    bool IsSimple = isSimpleBB(MBB);
+
+    if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+      continue;
+
+    MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
+  }
+
+  if (PreRegAlloc && TailDupVerify)
+    VerifyPHIs(MF, false);
+
+  return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+                         const MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI->isDebugValue())
+      continue;
+    if (UseMI->getParent() != BB)
+      return true;
+  }
+  return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+    if (MI->getOperand(i+1).getMBB() == SrcBB)
+      return i;
+  return 0;
+}
+
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+                              DenseSet<unsigned> *UsedByPhi) {
+  for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
+      I != E; ++I) {
+    const MachineInstr &MI = *I;
+    if (!MI.isPHI())
+      break;
+    for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+      unsigned SrcReg = MI.getOperand(i).getReg();
+      UsedByPhi->insert(SrcReg);
+    }
+  }
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+                                          MachineBasicBlock *BB) {
+  DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+  if (LI != SSAUpdateVals.end())
+    LI->second.push_back(std::make_pair(BB, NewReg));
+  else {
+    AvailableValsTy Vals;
+    Vals.push_back(std::make_pair(BB, NewReg));
+    SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+    SSAUpdateVRs.push_back(OrigReg);
+  }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+                                   MachineBasicBlock *TailBB,
+                                   MachineBasicBlock *PredBB,
+                                   DenseMap<unsigned, unsigned> &LocalVRMap,
+                           SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+                                   const DenseSet<unsigned> &RegsUsedByPhi,
+                                   bool Remove) {
+  unsigned DefReg = MI->getOperand(0).getReg();
+  unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+  assert(SrcOpIdx && "Unable to find matching PHI source?");
+  unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+  const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+  LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+  // Insert a copy from source to the end of the block. The def register is the
+  // available value liveout of the block.
+  unsigned NewDef = MRI->createVirtualRegister(RC);
+  Copies.push_back(std::make_pair(NewDef, SrcReg));
+  if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
+    AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+  if (!Remove)
+    return;
+
+  // Remove PredBB from the PHI node.
+  MI->RemoveOperand(SrcOpIdx+1);
+  MI->RemoveOperand(SrcOpIdx);
+  if (MI->getNumOperands() == 1)
+    MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+                                     MachineBasicBlock *TailBB,
+                                     MachineBasicBlock *PredBB,
+                                     MachineFunction &MF,
+                                     DenseMap<unsigned, unsigned> &LocalVRMap,
+                                     const DenseSet<unsigned> &UsedByPhi) {
+  MachineInstr *NewMI = TII->duplicate(MI, MF);
+  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (MO.isDef()) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      unsigned NewReg = MRI->createVirtualRegister(RC);
+      MO.setReg(NewReg);
+      LocalVRMap.insert(std::make_pair(Reg, NewReg));
+      if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
+        AddSSAUpdateEntry(Reg, NewReg, PredBB);
+    } else {
+      DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+      if (VI != LocalVRMap.end())
+        MO.setReg(VI->second);
+    }
+  }
+  PredBB->insert(PredBB->end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                  SmallSetVector<MachineBasicBlock*,8> &Succs) {
+  for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+         SE = Succs.end(); SI != SE; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+         II != EE; ++II) {
+      if (!II->isPHI())
+        break;
+      unsigned Idx = 0;
+      for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+        MachineOperand &MO = II->getOperand(i+1);
+        if (MO.getMBB() == FromBB) {
+          Idx = i;
+          break;
+        }
+      }
+
+      assert(Idx != 0);
+      MachineOperand &MO0 = II->getOperand(Idx);
+      unsigned Reg = MO0.getReg();
+      if (isDead) {
+        // Folded into the previous BB.
+        // There could be duplicate phi source entries. FIXME: Should sdisel
+        // or earlier pass fixed this?
+        for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+          MachineOperand &MO = II->getOperand(i+1);
+          if (MO.getMBB() == FromBB) {
+            II->RemoveOperand(i+1);
+            II->RemoveOperand(i);
+          }
+        }
+      } else
+        Idx = 0;
+
+      // If Idx is set, the operands at Idx and Idx+1 must be removed.
+      // We reuse the location to avoid expensive RemoveOperand calls.
+
+      DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+      if (LI != SSAUpdateVals.end()) {
+        // This register is defined in the tail block.
+        for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+          MachineBasicBlock *SrcBB = LI->second[j].first;
+          // If we didn't duplicate a bb into a particular predecessor, we
+          // might still have added an entry to SSAUpdateVals to correcly
+          // recompute SSA. If that case, avoid adding a dummy extra argument
+          // this PHI.
+          if (!SrcBB->isSuccessor(SuccBB))
+            continue;
+
+          unsigned SrcReg = LI->second[j].second;
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(SrcReg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
+        }
+      } else {
+        // Live in tail block, must also be live in predecessors.
+        for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+          MachineBasicBlock *SrcBB = TDBBs[j];
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(Reg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(Reg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
+        }
+      }
+      if (Idx != 0) {
+        II->RemoveOperand(Idx+1);
+        II->RemoveOperand(Idx);
+      }
+    }
+  }
+}
+
+/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
+bool
+TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+                                       bool IsSimple,
+                                       MachineBasicBlock &TailBB) {
+  // Only duplicate blocks that end with unconditional branches.
+  if (TailBB.canFallThrough())
+    return false;
+
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB.isSuccessor(&TailBB))
+    return false;
+
+  // Set the limit on the cost to duplicate. When optimizing for size,
+  // duplicate only one, because one branch instruction can be eliminated to
+  // compensate for the duplication.
+  unsigned MaxDuplicateCount;
+  if (TailDuplicateSize.getNumOccurrences() == 0 &&
+      MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    MaxDuplicateCount = 1;
+  else
+    MaxDuplicateCount = TailDuplicateSize;
+
+  // If the target has hardware branch prediction that can handle indirect
+  // branches, duplicating them can often make them predictable when there
+  // are common paths through the code.  The limit needs to be high enough
+  // to allow undoing the effects of tail merging and other optimizations
+  // that rearrange the predecessors of the indirect branch.
+
+  bool HasIndirectbr = false;
+  if (!TailBB.empty())
+    HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+
+  if (HasIndirectbr && PreRegAlloc)
+    MaxDuplicateCount = 20;
+
+  // Check the instructions in the block to determine whether tail-duplication
+  // is invalid or unlikely to be profitable.
+  unsigned InstrCount = 0;
+  for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
+       ++I) {
+    // Non-duplicable things shouldn't be tail-duplicated.
+    if (I->getDesc().isNotDuplicable())
+      return false;
+
+    // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+    // A return may expand into a lot more instructions (e.g. reload of callee
+    // saved registers) after PEI.
+    if (PreRegAlloc && I->getDesc().isReturn())
+      return false;
+
+    // Avoid duplicating calls before register allocation. Calls presents a
+    // barrier to register allocation so duplicating them may end up increasing
+    // spills.
+    if (PreRegAlloc && I->getDesc().isCall())
+      return false;
+
+    if (!I->isPHI() && !I->isDebugValue())
+      InstrCount += 1;
+
+    if (InstrCount > MaxDuplicateCount)
+      return false;
+  }
+
+  if (HasIndirectbr && PreRegAlloc)
+    return true;
+
+  if (IsSimple)
+    return true;
+
+  if (!PreRegAlloc)
+    return true;
+
+  return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+  if (TailBB->succ_size() != 1)
+    return false;
+  if (TailBB->pred_empty())
+    return false;
+  MachineBasicBlock::iterator I = TailBB->begin();
+  MachineBasicBlock::iterator E = TailBB->end();
+  while (I != E && I->isDebugValue())
+    ++I;
+  if (I == E)
+    return true;
+  return I->getDesc().isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+              SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+  for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+         SE = A.succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock *BB = *SI;
+    if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+      return true;
+  }
+
+  return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+  for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+       PE = BB.pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->succ_size() > 1)
+      return false;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      return false;
+
+    if (!PredCond.empty())
+      return false;
+  }
+  return true;
+}
+
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+                                     SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                     const DenseSet<unsigned> &UsedByPhi,
+                                     SmallVector<MachineInstr*, 16> &Copies) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+                                           TailBB->succ_end());
+  SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                           TailBB->pred_end());
+  bool Changed = false;
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->getLandingPadSuccessor())
+      continue;
+
+    if (bothUsedInPHI(*PredBB, Succs))
+      continue;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+
+    Changed = true;
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From simple Succ: " << *TailBB);
+
+    MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+    MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+    // Make PredFBB explicit.
+    if (PredCond.empty())
+      PredFBB = PredTBB;
+
+    // Make fall through explicit.
+    if (!PredTBB)
+      PredTBB = NextBB;
+    if (!PredFBB)
+      PredFBB = NextBB;
+
+    // Redirect
+    if (PredFBB == TailBB)
+      PredFBB = NewTarget;
+    if (PredTBB == TailBB)
+      PredTBB = NewTarget;
+
+    // Make the branch unconditional if possible
+    if (PredTBB == PredFBB) {
+      PredCond.clear();
+      PredFBB = NULL;
+    }
+
+    // Avoid adding fall through branches.
+    if (PredFBB == NextBB)
+      PredFBB = NULL;
+    if (PredTBB == NextBB && PredFBB == NULL)
+      PredTBB = NULL;
+
+    TII->RemoveBranch(*PredBB);
+
+    if (PredTBB)
+      TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+    PredBB->removeSuccessor(TailBB);
+    unsigned NumSuccessors = PredBB->succ_size();
+    assert(NumSuccessors <= 1);
+    if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+      PredBB->addSuccessor(NewTarget);
+
+    TDBBs.push_back(PredBB);
+  }
+  return Changed;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+                                 bool IsSimple,
+                                 MachineFunction &MF,
+                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                 SmallVector<MachineInstr*, 16> &Copies) {
+  DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+  DenseSet<unsigned> UsedByPhi;
+  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+  if (IsSimple)
+    return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
+  // Iterate through all the unique predecessors and tail-duplicate this
+  // block into them, if possible. Copying the list ahead of time also
+  // avoids trouble with the predecessor list reallocating.
+  bool Changed = false;
+  SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                              TailBB->pred_end());
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    assert(TailBB != PredBB &&
+           "Single-block loop should have been rejected earlier!");
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() > 1)
+      continue;
+
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+    if (!PredCond.empty())
+      continue;
+    // Don't duplicate into a fall-through predecessor (at least for now).
+    if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+      continue;
+
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From Succ: " << *TailBB);
+
+    TDBBs.push_back(PredBB);
+
+    // Remove PredBB's unconditional branch.
+    TII->RemoveBranch(*PredBB);
+
+    // Clone the contents of TailBB into PredBB.
+    DenseMap<unsigned, unsigned> LocalVRMap;
+    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+    MachineBasicBlock::iterator I = TailBB->begin();
+    while (I != TailBB->end()) {
+      MachineInstr *MI = &*I;
+      ++I;
+      if (MI->isPHI()) {
+        // Replace the uses of the def of the PHI with the register coming
+        // from PredBB.
+        ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+      } else {
+        // Replace def of virtual registers with new registers, and update
+        // uses with PHI source register or the new registers.
+        DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+      }
+    }
+    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+                               TII->get(TargetOpcode::COPY),
+                               CopyInfos[i].first).addReg(CopyInfos[i].second));
+    }
+
+    // Simplify
+    TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
+    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+    // Update the CFG.
+    PredBB->removeSuccessor(PredBB->succ_begin());
+    assert(PredBB->succ_empty() &&
+           "TailDuplicate called on block with multiple successors!");
+    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+           E = TailBB->succ_end(); I != E; ++I)
+      PredBB->addSuccessor(*I);
+
+    Changed = true;
+    ++NumTailDups;
+  }
+
+  // If TailBB was duplicated into all its predecessors except for the prior
+  // block, which falls through unconditionally, move the contents of this
+  // block into the prior block.
+  MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  SmallVector<MachineOperand, 4> PriorCond;
+  // This has to check PrevBB->succ_size() because EH edges are ignored by
+  // AnalyzeBranch.
+  if (PrevBB->succ_size() == 1 && 
+      !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+      PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+      !TailBB->hasAddressTaken()) {
+    DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+          << "From MBB: " << *TailBB);
+    if (PreRegAlloc) {
+      DenseMap<unsigned, unsigned> LocalVRMap;
+      SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+      MachineBasicBlock::iterator I = TailBB->begin();
+      // Process PHI instructions first.
+      while (I != TailBB->end() && I->isPHI()) {
+        // Replace the uses of the def of the PHI with the register coming
+        // from PredBB.
+        MachineInstr *MI = &*I++;
+        ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+        if (MI->getParent())
+          MI->eraseFromParent();
+      }
+
+      // Now copy the non-PHI instructions.
+      while (I != TailBB->end()) {
+        // Replace def of virtual registers with new registers, and update
+        // uses with PHI source register or the new registers.
+        MachineInstr *MI = &*I++;
+        DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+        MI->eraseFromParent();
+      }
+      MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+      for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+        Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
+                                 TII->get(TargetOpcode::COPY),
+                                 CopyInfos[i].first)
+                           .addReg(CopyInfos[i].second));
+      }
+    } else {
+      // No PHIs to worry about, just splice the instructions over.
+      PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+    }
+    PrevBB->removeSuccessor(PrevBB->succ_begin());
+    assert(PrevBB->succ_empty());
+    PrevBB->transferSuccessors(TailBB);
+    TDBBs.push_back(PrevBB);
+    Changed = true;
+  }
+
+  // If this is after register allocation, there are no phis to fix.
+  if (!PreRegAlloc)
+    return Changed;
+
+  // If we made no changes so far, we are safe.
+  if (!Changed)
+    return Changed;
+
+
+  // Handle the nasty case in that we duplicated a block that is part of a loop
+  // into some but not all of its predecessors. For example:
+  //    1 -> 2 <-> 3                 |
+  //          \                      |
+  //           \---> rest            |
+  // if we duplicate 2 into 1 but not into 3, we end up with
+  // 12 -> 3 <-> 2 -> rest           |
+  //   \             /               |
+  //    \----->-----/                |
+  // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+  // with a phi in 3 (which now dominates 2).
+  // What we do here is introduce a copy in 3 of the register defined by the
+  // phi, just like when we are duplicating 2 into 3, but we don't copy any
+  // real instructions or remove the 3 -> 2 edge from the phi in 2.
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+    if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+      continue;
+
+    // EH edges
+    if (PredBB->succ_size() != 1)
+      continue;
+
+    DenseMap<unsigned, unsigned> LocalVRMap;
+    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+    MachineBasicBlock::iterator I = TailBB->begin();
+    // Process PHI instructions first.
+    while (I != TailBB->end() && I->isPHI()) {
+      // Replace the uses of the def of the PHI with the register coming
+      // from PredBB.
+      MachineInstr *MI = &*I++;
+      ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+    }
+    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+                               TII->get(TargetOpcode::COPY),
+                               CopyInfos[i].first).addReg(CopyInfos[i].second));
+    }
+  }
+
+  return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+  assert(MBB->pred_empty() && "MBB must be dead!");
+  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+  // Remove all successors.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_end()-1);
+
+  // Remove the block.
+  MBB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
new file mode 100644
index 000000000000..86e71d8ccbb6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -0,0 +1,443 @@
+//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfoImpl class, it just provides default
+// implementations of various methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+  "disable-sched-hazard", cl::Hidden, cl::init(false),
+  cl::desc("Disable hazard detection during preRA scheduling"));
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                             MachineBasicBlock *NewDest) const {
+  MachineBasicBlock *MBB = Tail->getParent();
+
+  // Remove all the old successors of MBB from the CFG.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_begin());
+
+  // Remove all the dead instructions from the end of MBB.
+  MBB->erase(Tail, MBB->end());
+
+  // If MBB isn't immediately before MBB, insert a branch to it.
+  if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+    InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+                 Tail->getDebugLoc());
+  MBB->addSuccessor(NewDest);
+}
+
+// commuteInstruction - The default implementation of this method just exchanges
+// the two operands returned by findCommutedOpIndices.
+MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
+                                                      bool NewMI) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool HasDef = MCID.getNumDefs();
+  if (HasDef && !MI->getOperand(0).isReg())
+    // No idea how to commute this instruction. Target should implement its own.
+    return 0;
+  unsigned Idx1, Idx2;
+  if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Don't know how to commute: " << *MI;
+    report_fatal_error(Msg.str());
+  }
+
+  assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+         "This only knows how to commute register operands so far");
+  unsigned Reg1 = MI->getOperand(Idx1).getReg();
+  unsigned Reg2 = MI->getOperand(Idx2).getReg();
+  bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+  bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+  bool ChangeReg0 = false;
+  if (HasDef && MI->getOperand(0).getReg() == Reg1) {
+    // Must be two address instruction!
+    assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+           "Expecting a two-address instruction!");
+    Reg2IsKill = false;
+    ChangeReg0 = true;
+  }
+
+  if (NewMI) {
+    // Create a new instruction.
+    unsigned Reg0 = HasDef
+      ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0;
+    bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
+    MachineFunction &MF = *MI->getParent()->getParent();
+    if (HasDef)
+      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+        .addReg(Reg2, getKillRegState(Reg2IsKill))
+        .addReg(Reg1, getKillRegState(Reg2IsKill));
+    else
+      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+        .addReg(Reg2, getKillRegState(Reg2IsKill))
+        .addReg(Reg1, getKillRegState(Reg2IsKill));
+  }
+
+  if (ChangeReg0)
+    MI->getOperand(0).setReg(Reg2);
+  MI->getOperand(Idx2).setReg(Reg1);
+  MI->getOperand(Idx1).setReg(Reg2);
+  MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+  MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+  return MI;
+}
+
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
+                                                unsigned &SrcOpIdx1,
+                                                unsigned &SrcOpIdx2) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isCommutable())
+    return false;
+  // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+  // is not true, then the target must implement this.
+  SrcOpIdx1 = MCID.getNumDefs();
+  SrcOpIdx2 = SrcOpIdx1 + 1;
+  if (!MI->getOperand(SrcOpIdx1).isReg() ||
+      !MI->getOperand(SrcOpIdx2).isReg())
+    // No idea.
+    return false;
+  return true;
+}
+
+
+bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const {
+  bool MadeChange = false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
+    return false;
+
+  for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (MCID.OpInfo[i].isPredicate()) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg()) {
+        MO.setReg(Pred[j].getReg());
+        MadeChange = true;
+      } else if (MO.isImm()) {
+        MO.setImm(Pred[j].getImm());
+        MadeChange = true;
+      } else if (MO.isMBB()) {
+        MO.setMBB(Pred[j].getMBB());
+        MadeChange = true;
+      }
+      ++j;
+    }
+  }
+  return MadeChange;
+}
+
+void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned DestReg,
+                                        unsigned SubIdx,
+                                        const MachineInstr *Orig,
+                                        const TargetRegisterInfo &TRI) const {
+  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+  MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+  MBB.insert(I, MI);
+}
+
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+                                      const MachineInstr *MI1,
+                                      const MachineRegisterInfo *MRI) const {
+  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
+                                             MachineFunction &MF) const {
+  assert(!Orig->getDesc().isNotDuplicable() &&
+         "Instruction cannot be duplicated");
+  return MF.CloneMachineInstr(Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+                                              unsigned FoldIdx) {
+  assert(MI->isCopy() && "MI must be a COPY instruction");
+  if (MI->getNumOperands() != 2)
+    return 0;
+  assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+  const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+  const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+  if (FoldOp.getSubReg() || LiveOp.getSubReg())
+    return 0;
+
+  unsigned FoldReg = FoldOp.getReg();
+  unsigned LiveReg = LiveOp.getReg();
+
+  assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+         "Cannot fold physregs");
+
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+  if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+    return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+  if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
+    return RC;
+
+  // FIXME: Allow folding when register classes are memory compatible.
+  return 0;
+}
+
+bool TargetInstrInfoImpl::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                   const SmallVectorImpl<unsigned> &Ops,
+                                   int FI) const {
+  unsigned Flags = 0;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    if (MI->getOperand(Ops[i]).isDef())
+      Flags |= MachineMemOperand::MOStore;
+    else
+      Flags |= MachineMemOperand::MOLoad;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  assert(MBB && "foldMemoryOperand needs an inserted instruction");
+  MachineFunction &MF = *MBB->getParent();
+
+  // Ask the target to do the actual folding.
+  if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+    // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+    assert((!(Flags & MachineMemOperand::MOStore) ||
+            NewMI->getDesc().mayStore()) &&
+           "Folded a def to a non-store!");
+    assert((!(Flags & MachineMemOperand::MOLoad) ||
+            NewMI->getDesc().mayLoad()) &&
+           "Folded a use to a non-load!");
+    const MachineFrameInfo &MFI = *MF.getFrameInfo();
+    assert(MFI.getObjectOffset(FI) != -1);
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              Flags, MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    NewMI->addMemOperand(MF, MMO);
+
+    // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+    return MBB->insert(MI, NewMI);
+  }
+
+  // Straight COPY may fold as load/store.
+  if (!MI->isCopy() || Ops.size() != 1)
+    return 0;
+
+  const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+  if (!RC)
+    return 0;
+
+  const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+  MachineBasicBlock::iterator Pos = MI;
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+  if (Flags == MachineMemOperand::MOStore)
+    storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+  else
+    loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+  return --Pos;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                   const SmallVectorImpl<unsigned> &Ops,
+                                   MachineInstr* LoadMI) const {
+  assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
+  // Ask the target to do the actual folding.
+  MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+  if (!NewMI) return 0;
+
+  NewMI = MBB.insert(MI, NewMI);
+
+  // Copy the memoperands from the load to the folded instruction.
+  NewMI->setMemRefs(LoadMI->memoperands_begin(),
+                    LoadMI->memoperands_end());
+
+  return NewMI;
+}
+
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+  // A load from a fixed stack slot can be rematerialized. This may be
+  // redundant with subsequent checks, but it's target-independent,
+  // simple, and a common case.
+  int FrameIdx = 0;
+  if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+    return true;
+
+  const MCInstrDesc &MCID = MI->getDesc();
+
+  // Avoid instructions obviously unsafe for remat.
+  if (MCID.isNotDuplicable() || MCID.mayStore() ||
+      MI->hasUnmodeledSideEffects())
+    return false;
+
+  // Don't remat inline asm. We have no idea how expensive it is
+  // even if it's side effect free.
+  if (MI->isInlineAsm())
+    return false;
+
+  // Avoid instructions which load from potentially varying memory.
+  if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+    return false;
+
+  // If any of the registers accessed are non-constant, conservatively assume
+  // the instruction is not rematerializable.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // Check for a well-behaved physical register.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI.def_empty(Reg))
+          return false;
+        BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
+        if (AllocatableRegs.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI.def_empty(AliasReg))
+            return false;
+          if (AllocatableRegs.test(AliasReg))
+            return false;
+        }
+      } else {
+        // A physreg def. We can't remat it.
+        return false;
+      }
+      continue;
+    }
+
+    // Only allow one virtual-register def, and that in the first operand.
+    if (MO.isDef() != (i == 0))
+      return false;
+
+    // Don't allow any virtual-register uses. Rematting an instruction with
+    // virtual register uses would length the live ranges of the uses, which
+    // is not necessarily a good idea, certainly not "trivial".
+    if (MO.isUse())
+      return false;
+  }
+
+  // Everything checked out.
+  return true;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
+                                               const MachineBasicBlock *MBB,
+                                               const MachineFunction &MF) const{
+  // Terminators and labels can't be scheduled around.
+  if (MI->getDesc().isTerminator() || MI->isLabel())
+    return true;
+
+  // Don't attempt to schedule around any instruction that defines
+  // a stack-oriented pointer, as it's unlikely to be profitable. This
+  // saves compile time, because it doesn't require every single
+  // stack slot reference to depend on the instruction that does the
+  // modification.
+  const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+  if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+    return true;
+
+  return false;
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+  return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  // Dummy hazard recognizer allows all instructions to issue.
+  return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  return (ScheduleHazardRecognizer *)
+    new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 000000000000..a3c562013b59
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,1160 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+//                                  ELF
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
+  : TargetLoweringObjectFile(),
+    TLSDataSection(0),
+    TLSBSSSection(0),
+    DataRelSection(0),
+    DataRelLocalSection(0),
+    DataRelROSection(0),
+    DataRelROLocalSection(0),
+    MergeableConst4Section(0),
+    MergeableConst8Section(0),
+    MergeableConst16Section(0) {
+}
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+                                             const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  BSSSection =
+    getContext().getELFSection(".bss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+
+  TextSection =
+    getContext().getELFSection(".text", ELF::SHT_PROGBITS,
+                               ELF::SHF_EXECINSTR |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getText());
+
+  DataSection =
+    getContext().getELFSection(".data", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+
+  ReadOnlySection =
+    getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
+
+  TLSDataSection =
+    getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
+                               SectionKind::getThreadData());
+
+  TLSBSSSection =
+    getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
+                               SectionKind::getThreadBSS());
+
+  DataRelSection =
+    getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRel());
+
+  DataRelLocalSection =
+    getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRelLocal());
+
+  DataRelROSection =
+    getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRel());
+
+  DataRelROLocalSection =
+    getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRelLocal());
+
+  MergeableConst4Section =
+    getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
+                               SectionKind::getMergeableConst4());
+
+  MergeableConst8Section =
+    getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
+                               SectionKind::getMergeableConst8());
+
+  MergeableConst16Section =
+    getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
+                               SectionKind::getMergeableConst16());
+
+  StaticCtorSection =
+    getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRel());
+
+  StaticDtorSection =
+    getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRel());
+
+  // Exception Handling Sections.
+
+  // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+  // it contains relocatable pointers.  In PIC mode, this is probably a big
+  // runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
+  // Debug Info Sections.
+  DwarfAbbrevSection =
+    getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfInfoSection =
+    getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfLineSection =
+    getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfFrameSection =
+    getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfStrSection =
+    getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfLocSection =
+    getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfARangesSection =
+    getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfRangesSection =
+    getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
+  return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC,
+                                    SectionKind::getDataRel());
+}
+
+MCSymbol *
+TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
+                                                     Mangler *Mang,
+                                                MachineModuleInfo *MMI) const {
+  unsigned Encoding = getPersonalityEncoding();
+  switch (Encoding & 0x70) {
+  default:
+    report_fatal_error("We do not support this DWARF encoding yet!");
+  case dwarf::DW_EH_PE_absptr:
+    return  Mang->getSymbol(GV);
+    break;
+  case dwarf::DW_EH_PE_pcrel: {
+    return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+                                          Mang->getSymbol(GV)->getName());
+    break;
+  }
+  }
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
+                                                       const TargetMachine &TM,
+                                                       const MCSymbol *Sym) const {
+  SmallString<64> NameData("DW.ref.");
+  NameData += Sym->getName();
+  MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+  Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+  Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+  StringRef Prefix = ".data.";
+  NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
+  unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+  const MCSection *Sec = getContext().getELFSection(NameData,
+                                                    ELF::SHT_PROGBITS,
+                                                    Flags,
+                                                    SectionKind::getDataRel(),
+                                                    0, Label->getName());
+  Streamer.SwitchSection(Sec);
+  Streamer.EmitValueToAlignment(8);
+  Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+  const MCExpr *E = MCConstantExpr::Create(8, getContext());
+  Streamer.EmitELFSize(Label, E);
+  Streamer.EmitLabel(Label);
+
+  unsigned Size = TM.getTargetData()->getPointerSize();
+  Streamer.EmitSymbolValue(Sym, Size);
+}
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+  // N.B.: The defaults used in here are no the same ones used in MC.
+  // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+  // both gas and MC will produce a section with no flags. Given
+  // section(".eh_frame") gcc will produce
+  // .section	.eh_frame,"a",@progbits
+  if (Name.empty() || Name[0] != '.') return K;
+
+  // Some lame default implementation based on some magic section names.
+  if (Name == ".bss" ||
+      Name.startswith(".bss.") ||
+      Name.startswith(".gnu.linkonce.b.") ||
+      Name.startswith(".llvm.linkonce.b.") ||
+      Name == ".sbss" ||
+      Name.startswith(".sbss.") ||
+      Name.startswith(".gnu.linkonce.sb.") ||
+      Name.startswith(".llvm.linkonce.sb."))
+    return SectionKind::getBSS();
+
+  if (Name == ".tdata" ||
+      Name.startswith(".tdata.") ||
+      Name.startswith(".gnu.linkonce.td.") ||
+      Name.startswith(".llvm.linkonce.td."))
+    return SectionKind::getThreadData();
+
+  if (Name == ".tbss" ||
+      Name.startswith(".tbss.") ||
+      Name.startswith(".gnu.linkonce.tb.") ||
+      Name.startswith(".llvm.linkonce.tb."))
+    return SectionKind::getThreadBSS();
+
+  return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+  if (Name == ".init_array")
+    return ELF::SHT_INIT_ARRAY;
+
+  if (Name == ".fini_array")
+    return ELF::SHT_FINI_ARRAY;
+
+  if (Name == ".preinit_array")
+    return ELF::SHT_PREINIT_ARRAY;
+
+  if (K.isBSS() || K.isThreadBSS())
+    return ELF::SHT_NOBITS;
+
+  return ELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (!K.isMetadata())
+    Flags |= ELF::SHF_ALLOC;
+
+  if (K.isText())
+    Flags |= ELF::SHF_EXECINSTR;
+
+  if (K.isWriteable())
+    Flags |= ELF::SHF_WRITE;
+
+  if (K.isThreadLocal())
+    Flags |= ELF::SHF_TLS;
+
+  // K.isMergeableConst() is left out to honour PR4650
+  if (K.isMergeableCString() || K.isMergeableConst4() ||
+      K.isMergeableConst8() || K.isMergeableConst16())
+    Flags |= ELF::SHF_MERGE;
+
+  if (K.isMergeableCString())
+    Flags |= ELF::SHF_STRINGS;
+
+  return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  StringRef SectionName = GV->getSection();
+
+  // Infer section flags from the section name if we can.
+  Kind = getELFKindForNamedSection(SectionName, Kind);
+
+  return getContext().getELFSection(SectionName,
+                                    getELFSectionType(SectionName, Kind),
+                                    getELFSectionFlags(Kind), Kind);
+}
+
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".text.";
+  if (Kind.isReadOnly())             return ".rodata.";
+
+  if (Kind.isThreadData())           return ".tdata.";
+  if (Kind.isThreadBSS())            return ".tbss.";
+
+  if (Kind.isDataNoRel())            return ".data.";
+  if (Kind.isDataRelLocal())         return ".data.rel.local.";
+  if (Kind.isDataRel())              return ".data.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".data.rel.ro.";
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // If we have -ffunction-section or -fdata-section then we should emit the
+  // global value to a uniqued section specifically for it.
+  bool EmitUniquedSection;
+  if (Kind.isText())
+    EmitUniquedSection = TM.getFunctionSections();
+  else
+    EmitUniquedSection = TM.getDataSections();
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+      !Kind.isCommon() && !Kind.isBSS()) {
+    const char *Prefix;
+    Prefix = getSectionPrefixForGlobal(Kind);
+
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin(), Sym->getName().end());
+    StringRef Group = "";
+    unsigned Flags = getELFSectionFlags(Kind);
+    if (GV->isWeakForLinker()) {
+      Group = Sym->getName();
+      Flags |= ELF::SHF_GROUP;
+    }
+
+    return getContext().getELFSection(Name.str(),
+                                      getELFSectionType(Name.str(), Kind),
+                                      Flags, Kind, 0, Group);
+  }
+
+  if (Kind.isText()) return TextSection;
+
+  if (Kind.isMergeable1ByteCString() ||
+      Kind.isMergeable2ByteCString() ||
+      Kind.isMergeable4ByteCString()) {
+
+    // We also need alignment here.
+    // FIXME: this is getting the alignment of the character, not the
+    // alignment of the global!
+    unsigned Align =
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+    const char *SizeSpec = ".rodata.str1.";
+    if (Kind.isMergeable2ByteCString())
+      SizeSpec = ".rodata.str2.";
+    else if (Kind.isMergeable4ByteCString())
+      SizeSpec = ".rodata.str4.";
+    else
+      assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+    std::string Name = SizeSpec + utostr(Align);
+    return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                      ELF::SHF_ALLOC |
+                                      ELF::SHF_MERGE |
+                                      ELF::SHF_STRINGS,
+                                      Kind);
+  }
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4() && MergeableConst4Section)
+      return MergeableConst4Section;
+    if (Kind.isMergeableConst8() && MergeableConst8Section)
+      return MergeableConst8Section;
+    if (Kind.isMergeableConst16() && MergeableConst16Section)
+      return MergeableConst16Section;
+    return ReadOnlySection;  // .const
+  }
+
+  if (Kind.isReadOnly())             return ReadOnlySection;
+
+  if (Kind.isThreadData())           return TLSDataSection;
+  if (Kind.isThreadBSS())            return TLSBSSSection;
+
+  // Note: we claim that common symbols are put in BSSSection, but they are
+  // really emitted with the magic .comm directive, which creates a symbol table
+  // entry but not a section.
+  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+  if (Kind.isDataNoRel())            return DataSection;
+  if (Kind.isDataRelLocal())         return DataRelLocalSection;
+  if (Kind.isDataRel())              return DataRelSection;
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isMergeableConst4() && MergeableConst4Section)
+    return MergeableConst4Section;
+  if (Kind.isMergeableConst8() && MergeableConst8Section)
+    return MergeableConst8Section;
+  if (Kind.isMergeableConst16() && MergeableConst16Section)
+    return MergeableConst16Section;
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI,
+                               unsigned Encoding, MCStreamer &Streamer) const {
+
+  if (Encoding & dwarf::DW_EH_PE_indirect) {
+    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += ".DW.stub";
+
+    // Add information about the stub reference to ELFMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+    }
+
+    return TargetLoweringObjectFile::
+      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+  }
+
+  return TargetLoweringObjectFile::
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+//===----------------------------------------------------------------------===//
+//                                 MachO
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+  : TargetLoweringObjectFile(),
+    TLSDataSection(0),
+    TLSBSSSection(0),
+    TLSTLVSection(0),
+    TLSThreadInitSection(0),
+    CStringSection(0),
+    UStringSection(0),
+    TextCoalSection(0),
+    ConstTextCoalSection(0),
+    ConstDataSection(0),
+    DataCoalSection(0),
+    DataCommonSection(0),
+    DataBSSSection(0),
+    FourByteConstantSection(0),
+    EightByteConstantSection(0),
+    SixteenByteConstantSection(0),
+    LazySymbolPointerSection(0),
+    NonLazySymbolPointerSection(0) {
+}
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+                                               const TargetMachine &TM) {
+  IsFunctionEHFrameSymbolPrivate = false;
+  SupportsWeakOmittedEHFrame = false;
+
+  // .comm doesn't support alignment before Leopard.
+  Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
+    CommDirectiveSupportsAlignment = false;
+
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  TextSection // .text
+    = getContext().getMachOSection("__TEXT", "__text",
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
+  DataSection // .data
+    = getContext().getMachOSection("__DATA", "__data", 0,
+                                   SectionKind::getDataRel());
+
+  TLSDataSection // .tdata
+    = getContext().getMachOSection("__DATA", "__thread_data",
+                                   MCSectionMachO::S_THREAD_LOCAL_REGULAR,
+                                   SectionKind::getDataRel());
+  TLSBSSSection // .tbss
+    = getContext().getMachOSection("__DATA", "__thread_bss",
+                                   MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+                                   SectionKind::getThreadBSS());
+
+  // TODO: Verify datarel below.
+  TLSTLVSection // .tlv
+    = getContext().getMachOSection("__DATA", "__thread_vars",
+                                   MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
+                                   SectionKind::getDataRel());
+
+  TLSThreadInitSection
+    = getContext().getMachOSection("__DATA", "__thread_init",
+                          MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+                          SectionKind::getDataRel());
+
+  CStringSection // .cstring
+    = getContext().getMachOSection("__TEXT", "__cstring",
+                                   MCSectionMachO::S_CSTRING_LITERALS,
+                                   SectionKind::getMergeable1ByteCString());
+  UStringSection
+    = getContext().getMachOSection("__TEXT","__ustring", 0,
+                                   SectionKind::getMergeable2ByteCString());
+  FourByteConstantSection // .literal4
+    = getContext().getMachOSection("__TEXT", "__literal4",
+                                   MCSectionMachO::S_4BYTE_LITERALS,
+                                   SectionKind::getMergeableConst4());
+  EightByteConstantSection // .literal8
+    = getContext().getMachOSection("__TEXT", "__literal8",
+                                   MCSectionMachO::S_8BYTE_LITERALS,
+                                   SectionKind::getMergeableConst8());
+
+  // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+  // to using it in -static mode.
+  SixteenByteConstantSection = 0;
+  if (TM.getRelocationModel() != Reloc::Static &&
+      TM.getTargetData()->getPointerSize() == 32)
+    SixteenByteConstantSection =   // .literal16
+      getContext().getMachOSection("__TEXT", "__literal16",
+                                   MCSectionMachO::S_16BYTE_LITERALS,
+                                   SectionKind::getMergeableConst16());
+
+  ReadOnlySection  // .const
+    = getContext().getMachOSection("__TEXT", "__const", 0,
+                                   SectionKind::getReadOnly());
+
+  TextCoalSection
+    = getContext().getMachOSection("__TEXT", "__textcoal_nt",
+                                   MCSectionMachO::S_COALESCED |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
+  ConstTextCoalSection
+    = getContext().getMachOSection("__TEXT", "__const_coal",
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getReadOnly());
+  ConstDataSection  // .const_data
+    = getContext().getMachOSection("__DATA", "__const", 0,
+                                   SectionKind::getReadOnlyWithRel());
+  DataCoalSection
+    = getContext().getMachOSection("__DATA","__datacoal_nt",
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getDataRel());
+  DataCommonSection
+    = getContext().getMachOSection("__DATA","__common",
+                                   MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
+  DataBSSSection
+    = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
+
+
+  LazySymbolPointerSection
+    = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
+                                   MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
+  NonLazySymbolPointerSection
+    = getContext().getMachOSection("__DATA", "__nl_symbol_ptr",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorSection
+      = getContext().getMachOSection("__TEXT", "__constructor", 0,
+                                     SectionKind::getDataRel());
+    StaticDtorSection
+      = getContext().getMachOSection("__TEXT", "__destructor", 0,
+                                     SectionKind::getDataRel());
+  } else {
+    StaticCtorSection
+      = getContext().getMachOSection("__DATA", "__mod_init_func",
+                                     MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
+    StaticDtorSection
+      = getContext().getMachOSection("__DATA", "__mod_term_func",
+                                     MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
+  }
+
+  // Exception Handling.
+  LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
+                                             SectionKind::getReadOnlyWithRel());
+
+  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+    CompactUnwindSection =
+      getContext().getMachOSection("__LD", "__compact_unwind",
+                                   MCSectionMachO::S_ATTR_DEBUG,
+                                   SectionKind::getReadOnly());
+
+  // Debug Information.
+  DwarfAbbrevSection =
+    getContext().getMachOSection("__DWARF", "__debug_abbrev",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfInfoSection =
+    getContext().getMachOSection("__DWARF", "__debug_info",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfLineSection =
+    getContext().getMachOSection("__DWARF", "__debug_line",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfFrameSection =
+    getContext().getMachOSection("__DWARF", "__debug_frame",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getContext().getMachOSection("__DWARF", "__debug_pubnames",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getContext().getMachOSection("__DWARF", "__debug_pubtypes",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfStrSection =
+    getContext().getMachOSection("__DWARF", "__debug_str",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfLocSection =
+    getContext().getMachOSection("__DWARF", "__debug_loc",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfARangesSection =
+    getContext().getMachOSection("__DWARF", "__debug_aranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfRangesSection =
+    getContext().getMachOSection("__DWARF", "__debug_ranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getContext().getMachOSection("__DWARF", "__debug_macinfo",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfDebugInlineSection =
+    getContext().getMachOSection("__DWARF", "__debug_inlined",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+
+  TLSExtraDataSection = TLSTLVSection;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
+  return getContext().getMachOSection("__TEXT", "__eh_frame",
+                                      MCSectionMachO::S_COALESCED |
+                                      MCSectionMachO::S_ATTR_NO_TOC |
+                                      MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                                      MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                                      SectionKind::getReadOnly());
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  // Parse the section specifier and create it if valid.
+  StringRef Segment, Section;
+  unsigned TAA = 0, StubSize = 0;
+  bool TAAParsed;
+  std::string ErrorCode =
+    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+                                          TAA, TAAParsed, StubSize);
+  if (!ErrorCode.empty()) {
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
+                      "' has an invalid section specifier '" + GV->getSection()+
+                      "': " + ErrorCode + ".");
+    // Fall back to dropping it into the data section.
+    return DataSection;
+  }
+
+  // Get the section.
+  const MCSectionMachO *S =
+    getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+  // If TAA wasn't set by ParseSectionSpecifier() above,
+  // use the value returned by getMachOSection() as a default.
+  if (!TAAParsed)
+    TAA = S->getTypeAndAttributes();
+
+  // Okay, now that we got the section, verify that the TAA & StubSize agree.
+  // If the user declared multiple globals with different section flags, we need
+  // to reject it here.
+  if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
+                      "' section type or attributes does not match previous"
+                      " section specifier");
+  }
+
+  return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  // Handle thread local data.
+  if (Kind.isThreadBSS()) return TLSBSSSection;
+  if (Kind.isThreadData()) return TLSDataSection;
+
+  if (Kind.isText())
+    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+  // If this is weak/linkonce, put this in a coalescable section, either in text
+  // or data depending on if it is writable.
+  if (GV->isWeakForLinker()) {
+    if (Kind.isReadOnly())
+      return ConstTextCoalSection;
+    return DataCoalSection;
+  }
+
+  // FIXME: Alignment check should be handled by section classifier.
+  if (Kind.isMergeable1ByteCString() &&
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+    return CStringSection;
+
+  // Do not put 16-bit arrays in the UString section if they have an
+  // externally visible label, this runs into issues with certain linker
+  // versions.
+  if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+    return UStringSection;
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4())
+      return FourByteConstantSection;
+    if (Kind.isMergeableConst8())
+      return EightByteConstantSection;
+    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+      return SixteenByteConstantSection;
+  }
+
+  // Otherwise, if it is readonly, but not something we can specially optimize,
+  // just drop it in .const.
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  // If this is marked const, put it into a const section.  But if the dynamic
+  // linker needs to write to it, put it in the data segment.
+  if (Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  // Put zero initialized globals with strong external linkage in the
+  // DATA, __common section with the .zerofill directive.
+  if (Kind.isBSSExtern())
+    return DataCommonSection;
+
+  // Put zero initialized globals with local linkage in __DATA,__bss directive
+  // with the .zerofill directive (aka .lcomm).
+  if (Kind.isBSSLocal())
+    return DataBSSSection;
+
+  // Otherwise, just drop the variable in the normal data section.
+  return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+  // If this constant requires a relocation, we have to put it in the data
+  // segment, not in the text segment.
+  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  if (Kind.isMergeableConst4())
+    return FourByteConstantSection;
+  if (Kind.isMergeableConst8())
+    return EightByteConstantSection;
+  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+    return SixteenByteConstantSection;
+  return ReadOnlySection;  // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+  /// On Darwin, internally linked data beginning with "L" or "l" does not have
+  /// the directive emitted (this occurs in ObjC metadata).
+  if (!GV) return false;
+
+  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+    // FIXME: ObjC metadata is currently emitted as internal symbols that have
+    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
+    // this horrible hack can go away.
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
+      return false;
+  }
+
+  return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+
+  if (Encoding & DW_EH_PE_indirect) {
+    MachineModuleInfoMachO &MachOMMI =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += "$non_lazy_ptr";
+
+    // Add information about the stub reference to MachOMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+    }
+
+    return TargetLoweringObjectFile::
+      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+  }
+
+  return TargetLoweringObjectFile::
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *TargetLoweringObjectFileMachO::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+  MachineModuleInfoMachO &MachOMMI =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, true);
+  Name += "$non_lazy_ptr";
+
+  // Add information about the stub reference to MachOMMI so that the stub
+  // gets emitted by the asmprinter.
+  MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+  if (StubSym.getPointer() == 0) {
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+  }
+
+  return SSym;
+}
+
+unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
+  return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const {
+  return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+//===----------------------------------------------------------------------===//
+//                                  COFF
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF()
+  : TargetLoweringObjectFile(),
+    DrectveSection(0),
+    PDataSection(0),
+    XDataSection(0) {
+}
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  TextSection =
+    getContext().getCOFFSection(".text",
+                                COFF::IMAGE_SCN_CNT_CODE |
+                                COFF::IMAGE_SCN_MEM_EXECUTE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getText());
+  DataSection =
+    getContext().getCOFFSection(".data",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+  ReadOnlySection =
+    getContext().getCOFFSection(".rdata",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getReadOnly());
+  StaticCtorSection =
+    getContext().getCOFFSection(".ctors",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+  StaticDtorSection =
+    getContext().getCOFFSection(".dtors",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+
+  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+  // though it contains relocatable pointers.  In PIC mode, this is probably a
+  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getContext().getCOFFSection(".gcc_except_table",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getReadOnly());
+  // Debug info.
+  DwarfAbbrevSection =
+    getContext().getCOFFSection(".debug_abbrev",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfInfoSection =
+    getContext().getCOFFSection(".debug_info",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfLineSection =
+    getContext().getCOFFSection(".debug_line",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfFrameSection =
+    getContext().getCOFFSection(".debug_frame",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getContext().getCOFFSection(".debug_pubnames",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getContext().getCOFFSection(".debug_pubtypes",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfStrSection =
+    getContext().getCOFFSection(".debug_str",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfLocSection =
+    getContext().getCOFFSection(".debug_loc",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfARangesSection =
+    getContext().getCOFFSection(".debug_aranges",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfRangesSection =
+    getContext().getCOFFSection(".debug_ranges",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getContext().getCOFFSection(".debug_macinfo",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+
+  DrectveSection =
+    getContext().getCOFFSection(".drectve",
+                                COFF::IMAGE_SCN_LNK_INFO,
+                                SectionKind::getMetadata());
+
+  PDataSection =
+    getContext().getCOFFSection(".pdata",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+
+  XDataSection =
+    getContext().getCOFFSection(".xdata",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
+  return getContext().getCOFFSection(".eh_frame",
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getWin64EHFuncTableSection(
+                                                       StringRef suffix) const {
+  if (suffix == "")
+    return PDataSection;
+  return getContext().getCOFFSection((".pdata"+suffix).str(),
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getWin64EHTableSection(
+                                                       StringRef suffix) const {
+  if (suffix == "")
+    return XDataSection;
+  return getContext().getCOFFSection((".xdata"+suffix).str(),
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (K.isMetadata())
+    Flags |=
+      COFF::IMAGE_SCN_MEM_DISCARDABLE;
+  else if (K.isText())
+    Flags |=
+      COFF::IMAGE_SCN_MEM_EXECUTE |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_CNT_CODE;
+  else if (K.isBSS ())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_MEM_WRITE;
+  else if (K.isReadOnly())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ;
+  else if (K.isWriteable())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_MEM_WRITE;
+
+  return Flags;
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  return getContext().getCOFFSection(GV->getSection(),
+                                     getCOFFSectionFlags(Kind),
+                                     Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())
+    return ".text$";
+  if (Kind.isBSS ())
+    return ".bss$";
+  if (Kind.isWriteable())
+    return ".data$";
+  return ".rdata$";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker()) {
+    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+
+    unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+    Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+    return getContext().getCOFFSection(Name.str(), Characteristics,
+                          COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
+  }
+
+  if (Kind.isText())
+    return getTextSection();
+
+  return getDataSection();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 000000000000..6d6244e4f879
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1530 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+//     A = B op C
+//
+// to:
+//
+//     A = B
+//     A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted        , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted    , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
+STATISTIC(NumReMats,           "Number of instructions re-materialized");
+STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+
+namespace {
+  class TwoAddressInstructionPass : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    LiveVariables *LV;
+    AliasAnalysis *AA;
+
+    // DistanceMap - Keep track the distance of a MI from the start of the
+    // current basic block.
+    DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+    // SrcRegMap - A map from virtual registers to physical registers which
+    // are likely targets to be coalesced to due to copies from physical
+    // registers to virtual registers. e.g. v1024 = move r0.
+    DenseMap<unsigned, unsigned> SrcRegMap;
+
+    // DstRegMap - A map from virtual registers to physical registers which
+    // are likely targets to be coalesced to due to copies to physical
+    // registers from virtual registers. e.g. r1 = move v1024.
+    DenseMap<unsigned, unsigned> DstRegMap;
+
+    /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
+    /// during the initial walk of the machine function.
+    SmallVector<MachineInstr*, 16> RegSequences;
+
+    bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
+                              unsigned Reg,
+                              MachineBasicBlock::iterator OldPos);
+
+    bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
+                             MachineInstr *MI, MachineInstr *DefMI,
+                             MachineBasicBlock *MBB, unsigned Loc);
+
+    bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
+                           unsigned &LastDef);
+
+    MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
+                                   unsigned Dist);
+
+    bool isProfitableToCommute(unsigned regB, unsigned regC,
+                               MachineInstr *MI, MachineBasicBlock *MBB,
+                               unsigned Dist);
+
+    bool CommuteInstruction(MachineBasicBlock::iterator &mi,
+                            MachineFunction::iterator &mbbi,
+                            unsigned RegB, unsigned RegC, unsigned Dist);
+
+    bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+
+    bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+                            MachineBasicBlock::iterator &nmi,
+                            MachineFunction::iterator &mbbi,
+                            unsigned RegA, unsigned RegB, unsigned Dist);
+
+    typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
+    bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                               SmallVector<NewKill, 4> &NewKills,
+                               MachineBasicBlock *MBB, unsigned Dist);
+    bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                           MachineBasicBlock::iterator &nmi,
+                           MachineFunction::iterator &mbbi, unsigned Dist);
+
+    bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                                 MachineBasicBlock::iterator &nmi,
+                                 MachineFunction::iterator &mbbi,
+                                 unsigned SrcIdx, unsigned DstIdx,
+                                 unsigned Dist,
+                                 SmallPtrSet<MachineInstr*, 8> &Processed);
+
+    void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+                  SmallPtrSet<MachineInstr*, 8> &Processed);
+
+    void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
+                     SmallPtrSet<MachineInstr*, 8> &Processed);
+
+    void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+
+    /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+    /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+    /// sub-register references of the register defined by REG_SEQUENCE.
+    bool EliminateRegSequences();
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<LiveVariables>();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addPreservedID(MachineDominatorsID);
+      AU.addPreservedID(PHIEliminationID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    /// runOnMachineFunction - Pass entry point.
+    bool runOnMachineFunction(MachineFunction&);
+  };
+}
+
+char TwoAddressInstructionPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
+/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
+                                           MachineInstr *MI, unsigned SavedReg,
+                                           MachineBasicBlock::iterator OldPos) {
+  // Check if it's safe to move this instruction.
+  bool SeenStore = true; // Be conservative.
+  if (!MI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  unsigned DefReg = 0;
+  SmallSet<unsigned, 4> UseRegs;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (!MOReg)
+      continue;
+    if (MO.isUse() && MOReg != SavedReg)
+      UseRegs.insert(MO.getReg());
+    if (!MO.isDef())
+      continue;
+    if (MO.isImplicit())
+      // Don't try to move it if it implicitly defines a register.
+      return false;
+    if (DefReg)
+      // For now, don't move any instructions that define multiple registers.
+      return false;
+    DefReg = MO.getReg();
+  }
+
+  // Find the instruction that kills SavedReg.
+  MachineInstr *KillMI = NULL;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI->use_nodbg_begin(SavedReg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+    MachineOperand &UseMO = UI.getOperand();
+    if (!UseMO.isKill())
+      continue;
+    KillMI = UseMO.getParent();
+    break;
+  }
+
+  if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+    return false;
+
+  // If any of the definitions are used by another instruction between the
+  // position and the kill use, then it's not safe to sink it.
+  // 
+  // FIXME: This can be sped up if there is an easy way to query whether an
+  // instruction is before or after another instruction. Then we can use
+  // MachineRegisterInfo def / use instead.
+  MachineOperand *KillMO = NULL;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  ++KillPos;
+
+  unsigned NumVisited = 0;
+  for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (DefReg == MOReg)
+        return false;
+
+      if (MO.isKill()) {
+        if (OtherMI == KillMI && MOReg == SavedReg)
+          // Save the operand that kills the register. We want to unset the kill
+          // marker if we can sink MI past it.
+          KillMO = &MO;
+        else if (UseRegs.count(MOReg))
+          // One of the uses is killed before the destination.
+          return false;
+      }
+    }
+  }
+
+  // Update kill and LV information.
+  KillMO->setIsKill(false);
+  KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+  KillMO->setIsKill(true);
+  
+  if (LV)
+    LV->replaceKillInstruction(SavedReg, KillMI, MI);
+
+  // Move instruction to its destination.
+  MBB->remove(MI);
+  MBB->insert(KillPos, MI);
+
+  ++Num3AddrSunk;
+  return true;
+}
+
+/// isTwoAddrUse - Return true if the specified MI is using the specified
+/// register as a two-address operand.
+static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
+  const MCInstrDesc &MCID = UseMI->getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = UseMI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg &&
+        (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
+      // Earlier use is a two-address one.
+      return true;
+  }
+  return false;
+}
+
+/// isProfitableToReMat - Return true if the heuristics determines it is likely
+/// to be profitable to re-materialize the definition of Reg rather than copy
+/// the register.
+bool
+TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
+                                         const TargetRegisterClass *RC,
+                                         MachineInstr *MI, MachineInstr *DefMI,
+                                         MachineBasicBlock *MBB, unsigned Loc) {
+  bool OtherUse = false;
+  for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = UseMO.getParent();
+    MachineBasicBlock *UseMBB = UseMI->getParent();
+    if (UseMBB == MBB) {
+      DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+      if (DI != DistanceMap.end() && DI->second == Loc)
+        continue;  // Current use.
+      OtherUse = true;
+      // There is at least one other use in the MBB that will clobber the
+      // register. 
+      if (isTwoAddrUse(UseMI, Reg))
+        return true;
+    }
+  }
+
+  // If other uses in MBB are not two-address uses, then don't remat.
+  if (OtherUse)
+    return false;
+
+  // No other uses in the same block, remat if it's defined in the same
+  // block so it does not unnecessarily extend the live range.
+  return MBB == DefMI->getParent();
+}
+
+/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
+                                           MachineBasicBlock *MBB, unsigned Dist,
+                                           unsigned &LastDef) {
+  LastDef = 0;
+  unsigned LastUse = Dist;
+  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+         E = MRI->reg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    MachineInstr *MI = MO.getParent();
+    if (MI->getParent() != MBB || MI->isDebugValue())
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (MO.isUse() && DI->second < LastUse)
+      LastUse = DI->second;
+    if (MO.isDef() && DI->second > LastDef)
+      LastDef = DI->second;
+  }
+
+  return !(LastUse > LastDef && LastUse < Dist);
+}
+
+MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
+                                                         MachineBasicBlock *MBB,
+                                                         unsigned Dist) {
+  unsigned LastUseDist = 0;
+  MachineInstr *LastUse = 0;
+  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+         E = MRI->reg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    MachineInstr *MI = MO.getParent();
+    if (MI->getParent() != MBB || MI->isDebugValue())
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (DI->second >= Dist)
+      continue;
+
+    if (MO.isUse() && DI->second > LastUseDist) {
+      LastUse = DI->first;
+      LastUseDist = DI->second;
+    }
+  }
+  return LastUse;
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+                        unsigned &SrcReg, unsigned &DstReg,
+                        bool &IsSrcPhys, bool &IsDstPhys) {
+  SrcReg = 0;
+  DstReg = 0;
+  if (MI.isCopy()) {
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
+  } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(2).getReg();
+  } else
+    return false;
+
+  IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+  IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+  return true;
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+///   %reg1034 = copy %reg1024
+///   %reg1035 = copy %reg1025<kill>
+///   %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+                     const MachineRegisterInfo *MRI,
+                     const TargetInstrInfo *TII) {
+  MachineInstr *DefMI = &MI;
+  for (;;) {
+    if (!DefMI->killsRegister(Reg))
+      return false;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      return true;
+    MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+    // If there are multiple defs, we can't do a simple analysis, so just
+    // go with what the kill flag says.
+    if (llvm::next(Begin) != MRI->def_end())
+      return true;
+    DefMI = &*Begin;
+    bool IsSrcPhys, IsDstPhys;
+    unsigned SrcReg,  DstReg;
+    // If the def is something other than a copy, then it isn't going to
+    // be coalesced, so follow the kill flag.
+    if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+      return true;
+    Reg = SrcReg;
+  }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned NumOps = MI.isInlineAsm()
+    ? MI.getNumOperands() : MCID.getNumOperands();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+      continue;
+    unsigned ti;
+    if (MI.isRegTiedToDefOperand(i, &ti)) {
+      DstReg = MI.getOperand(ti).getReg();
+      return true;
+    }
+  }
+  return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+                                     MachineRegisterInfo *MRI,
+                                     const TargetInstrInfo *TII,
+                                     bool &IsCopy,
+                                     unsigned &DstReg, bool &IsDstPhys) {
+  if (!MRI->hasOneNonDBGUse(Reg))
+    // None or more than one use.
+    return 0;
+  MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
+  if (UseMI.getParent() != MBB)
+    return 0;
+  unsigned SrcReg;
+  bool IsSrcPhys;
+  if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+    IsCopy = true;
+    return &UseMI;
+  }
+  IsDstPhys = false;
+  if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+    IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+    return &UseMI;
+  }
+  return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+  while (TargetRegisterInfo::isVirtualRegister(Reg))  {
+    DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+    if (SI == RegMap.end())
+      return 0;
+    Reg = SI->second;
+  }
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Reg;
+  return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+  if (RegA == RegB)
+    return true;
+  if (!RegA || !RegB)
+    return false;
+  return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+                                       MachineInstr *MI, MachineBasicBlock *MBB,
+                                       unsigned Dist) {
+  // Determine if it's profitable to commute this two address instruction. In
+  // general, we want no uses between this instruction and the definition of
+  // the two-address register.
+  // e.g.
+  // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+  // %reg1029<def> = MOV8rr %reg1028
+  // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+  // insert => %reg1030<def> = MOV8rr %reg1028
+  // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+  // In this case, it might not be possible to coalesce the second MOV8rr
+  // instruction if the first one is coalesced. So it would be profitable to
+  // commute it:
+  // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+  // %reg1029<def> = MOV8rr %reg1028
+  // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+  // insert => %reg1030<def> = MOV8rr %reg1029
+  // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>  
+
+  if (!MI->killsRegister(regC))
+    return false;
+
+  // Ok, we have something like:
+  // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+  // let's see if it's worth commuting it.
+
+  // Look for situations like this:
+  // %reg1024<def> = MOV r1
+  // %reg1025<def> = MOV r0
+  // %reg1026<def> = ADD %reg1024, %reg1025
+  // r0            = MOV %reg1026
+  // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+  unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+  unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+  unsigned ToRegB = getMappedReg(regB, DstRegMap);
+  unsigned ToRegC = getMappedReg(regC, DstRegMap);
+  if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+      ((!FromRegC && !ToRegC) ||
+       regsAreCompatible(FromRegB, ToRegC, TRI) ||
+       regsAreCompatible(FromRegC, ToRegB, TRI)))
+    return true;
+
+  // If there is a use of regC between its last def (could be livein) and this
+  // instruction, then bail.
+  unsigned LastDefC = 0;
+  if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+    return false;
+
+  // If there is a use of regB between its last def (could be livein) and this
+  // instruction, then go ahead and make this transformation.
+  unsigned LastDefB = 0;
+  if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+    return true;
+
+  // Since there are no intervening uses for both registers, then commute
+  // if the def of regC is closer. Its live interval is shorter.
+  return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool
+TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
+                               MachineFunction::iterator &mbbi,
+                               unsigned RegB, unsigned RegC, unsigned Dist) {
+  MachineInstr *MI = mi;
+  DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);
+  MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+  if (NewMI == 0) {
+    DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+    return false;
+  }
+
+  DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+  // If the instruction changed to commute it, update livevar.
+  if (NewMI != MI) {
+    if (LV)
+      // Update live variables
+      LV->replaceKillInstruction(RegC, MI, NewMI);
+
+    mbbi->insert(mi, NewMI);           // Insert the new inst
+    mbbi->erase(mi);                   // Nuke the old inst.
+    mi = NewMI;
+    DistanceMap.insert(std::make_pair(NewMI, Dist));
+  }
+
+  // Update source register map.
+  unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+  if (FromRegC) {
+    unsigned RegA = MI->getOperand(0).getReg();
+    SrcRegMap[RegA] = FromRegC;
+  }
+
+  return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+  // Look for situations like this:
+  // %reg1024<def> = MOV r1
+  // %reg1025<def> = MOV r0
+  // %reg1026<def> = ADD %reg1024, %reg1025
+  // r2            = MOV %reg1026
+  // Turn ADD into a 3-address instruction to avoid a copy.
+  unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+  if (!FromRegB)
+    return false;
+  unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+  return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+}
+
+/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+                                              MachineBasicBlock::iterator &nmi,
+                                              MachineFunction::iterator &mbbi,
+                                              unsigned RegA, unsigned RegB,
+                                              unsigned Dist) {
+  MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
+  if (NewMI) {
+    DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+    DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
+    bool Sunk = false;
+
+    if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+      // FIXME: Temporary workaround. If the new instruction doesn't
+      // uses RegB, convertToThreeAddress must have created more
+      // then one instruction.
+      Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+
+    mbbi->erase(mi); // Nuke the old inst.
+
+    if (!Sunk) {
+      DistanceMap.insert(std::make_pair(NewMI, Dist));
+      mi = NewMI;
+      nmi = llvm::next(mi);
+    }
+
+    // Update source and destination register maps.
+    SrcRegMap.erase(RegA);
+    DstRegMap.erase(RegB);
+    return true;
+  }
+
+  return false;
+}
+
+/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// is a copy or a two-address instruction.
+void
+TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+                                    SmallPtrSet<MachineInstr*, 8> &Processed) {
+  SmallVector<unsigned, 4> VirtRegPairs;
+  bool IsDstPhys;
+  bool IsCopy = false;
+  unsigned NewReg = 0;
+  unsigned Reg = DstReg;
+  while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+                                                      NewReg, IsDstPhys)) {
+    if (IsCopy && !Processed.insert(UseMI))
+      break;
+
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+    if (DI != DistanceMap.end())
+      // Earlier in the same MBB.Reached via a back edge.
+      break;
+
+    if (IsDstPhys) {
+      VirtRegPairs.push_back(NewReg);
+      break;
+    }
+    bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+    if (!isNew)
+      assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+    VirtRegPairs.push_back(NewReg);
+    Reg = NewReg;
+  }
+
+  if (!VirtRegPairs.empty()) {
+    unsigned ToReg = VirtRegPairs.back();
+    VirtRegPairs.pop_back();
+    while (!VirtRegPairs.empty()) {
+      unsigned FromReg = VirtRegPairs.back();
+      VirtRegPairs.pop_back();
+      bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+      if (!isNew)
+        assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+      ToReg = FromReg;
+    }
+    bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+    if (!isNew)
+      assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+  }
+}
+
+/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1    = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
+                                     MachineBasicBlock *MBB,
+                                     SmallPtrSet<MachineInstr*, 8> &Processed) {
+  if (Processed.count(MI))
+    return;
+
+  bool IsSrcPhys, IsDstPhys;
+  unsigned SrcReg, DstReg;
+  if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+    return;
+
+  if (IsDstPhys && !IsSrcPhys)
+    DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+  else if (!IsDstPhys && IsSrcPhys) {
+    bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+    if (!isNew)
+      assert(SrcRegMap[DstReg] == SrcReg &&
+             "Can't map to two src physical registers!");
+
+    ScanUses(DstReg, MBB, Processed);
+  }
+
+  Processed.insert(MI);
+  return;
+}
+
+/// isSafeToDelete - If the specified instruction does not produce any side
+/// effects and all of its defs are dead, then it's safe to delete.
+static bool isSafeToDelete(MachineInstr *MI,
+                           const TargetInstrInfo *TII,
+                           SmallVector<unsigned, 4> &Kills) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.mayStore() || MCID.isCall())
+    return false;
+  if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+    return false;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.isDef() && !MO.isDead())
+      return false;
+    if (MO.isUse() && MO.isKill())
+      Kills.push_back(MO.getReg());
+  }
+  return true;
+}
+
+/// canUpdateDeletedKills - Check if all the registers listed in Kills are
+/// killed by instructions in MBB preceding the current instruction at
+/// position Dist.  If so, return true and record information about the
+/// preceding kills in NewKills.
+bool TwoAddressInstructionPass::
+canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                      SmallVector<NewKill, 4> &NewKills,
+                      MachineBasicBlock *MBB, unsigned Dist) {
+  while (!Kills.empty()) {
+    unsigned Kill = Kills.back();
+    Kills.pop_back();
+    if (TargetRegisterInfo::isPhysicalRegister(Kill))
+      return false;
+
+    MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
+    if (!LastKill)
+      return false;
+
+    bool isModRef = LastKill->definesRegister(Kill);
+    NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
+                                      LastKill));
+  }
+  return true;
+}
+
+/// DeleteUnusedInstr - If an instruction with a tied register operand can
+/// be safely deleted, just delete it.
+bool
+TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                                             MachineBasicBlock::iterator &nmi,
+                                             MachineFunction::iterator &mbbi,
+                                             unsigned Dist) {
+  // Check if the instruction has no side effects and if all its defs are dead.
+  SmallVector<unsigned, 4> Kills;
+  if (!isSafeToDelete(mi, TII, Kills))
+    return false;
+
+  // If this instruction kills some virtual registers, we need to
+  // update the kill information. If it's not possible to do so,
+  // then bail out.
+  SmallVector<NewKill, 4> NewKills;
+  if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
+    return false;
+
+  if (LV) {
+    while (!NewKills.empty()) {
+      MachineInstr *NewKill = NewKills.back().second;
+      unsigned Kill = NewKills.back().first.first;
+      bool isDead = NewKills.back().first.second;
+      NewKills.pop_back();
+      if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+        if (isDead)
+          LV->addVirtualRegisterDead(Kill, NewKill);
+        else
+          LV->addVirtualRegisterKilled(Kill, NewKill);
+      }
+    }
+  }
+
+  mbbi->erase(mi); // Nuke the old inst.
+  mi = nmi;
+  return true;
+}
+
+/// TryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy.  Returns true if the tied operands
+/// are eliminated altogether.
+bool TwoAddressInstructionPass::
+TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                        MachineBasicBlock::iterator &nmi,
+                        MachineFunction::iterator &mbbi,
+                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
+                        SmallPtrSet<MachineInstr*, 8> &Processed) {
+  const MCInstrDesc &MCID = mi->getDesc();
+  unsigned regA = mi->getOperand(DstIdx).getReg();
+  unsigned regB = mi->getOperand(SrcIdx).getReg();
+
+  assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+         "cannot make instruction into two-address form");
+
+  // If regA is dead and the instruction can be deleted, just delete
+  // it so it doesn't clobber regB.
+  bool regBKilled = isKilled(*mi, regB, MRI, TII);
+  if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+      DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
+    ++NumDeletes;
+    return true; // Done with this instruction.
+  }
+
+  // Check if it is profitable to commute the operands.
+  unsigned SrcOp1, SrcOp2;
+  unsigned regC = 0;
+  unsigned regCIdx = ~0U;
+  bool TryCommute = false;
+  bool AggressiveCommute = false;
+  if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
+      TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+    if (SrcIdx == SrcOp1)
+      regCIdx = SrcOp2;
+    else if (SrcIdx == SrcOp2)
+      regCIdx = SrcOp1;
+
+    if (regCIdx != ~0U) {
+      regC = mi->getOperand(regCIdx).getReg();
+      if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+        // If C dies but B does not, swap the B and C operands.
+        // This makes the live ranges of A and C joinable.
+        TryCommute = true;
+      else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+        TryCommute = true;
+        AggressiveCommute = true;
+      }
+    }
+  }
+
+  // If it's profitable to commute, try to do so.
+  if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+    ++NumCommuted;
+    if (AggressiveCommute)
+      ++NumAggrCommuted;
+    return false;
+  }
+
+  if (TargetRegisterInfo::isVirtualRegister(regA))
+    ScanUses(regA, &*mbbi, Processed);
+
+  if (MCID.isConvertibleTo3Addr()) {
+    // This instruction is potentially convertible to a true
+    // three-address instruction.  Check if it is profitable.
+    if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
+      // Try to convert it.
+      if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
+        ++NumConvertedTo3Addr;
+        return true; // Done with this instruction.
+      }
+    }
+  }
+
+  // If this is an instruction with a load folded into it, try unfolding
+  // the load, e.g. avoid this:
+  //   movq %rdx, %rcx
+  //   addq (%rax), %rcx
+  // in favor of this:
+  //   movq (%rax), %rcx
+  //   addq %rdx, %rcx
+  // because it's preferable to schedule a load than a register copy.
+  if (MCID.mayLoad() && !regBKilled) {
+    // Determine if a load can be unfolded.
+    unsigned LoadRegIndex;
+    unsigned NewOpc =
+      TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+                                      /*UnfoldLoad=*/true,
+                                      /*UnfoldStore=*/false,
+                                      &LoadRegIndex);
+    if (NewOpc != 0) {
+      const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+      if (UnfoldMCID.getNumDefs() == 1) {
+        MachineFunction &MF = *mbbi->getParent();
+
+        // Unfold the load.
+        DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
+        const TargetRegisterClass *RC =
+          TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
+        unsigned Reg = MRI->createVirtualRegister(RC);
+        SmallVector<MachineInstr *, 2> NewMIs;
+        if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+                                      /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
+                                      NewMIs)) {
+          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+          return false;
+        }
+        assert(NewMIs.size() == 2 &&
+               "Unfolded a load into multiple instructions!");
+        // The load was previously folded, so this is the only use.
+        NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+        // Tentatively insert the instructions into the block so that they
+        // look "normal" to the transformation logic.
+        mbbi->insert(mi, NewMIs[0]);
+        mbbi->insert(mi, NewMIs[1]);
+
+        DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
+                     << "2addr:    NEW INST: " << *NewMIs[1]);
+
+        // Transform the instruction, now that it no longer has a load.
+        unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+        unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+        MachineBasicBlock::iterator NewMI = NewMIs[1];
+        bool TransformSuccess =
+          TryInstructionTransform(NewMI, mi, mbbi,
+                                  NewSrcIdx, NewDstIdx, Dist, Processed);
+        if (TransformSuccess ||
+            NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+          // Success, or at least we made an improvement. Keep the unfolded
+          // instructions and discard the original.
+          if (LV) {
+            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = mi->getOperand(i);
+              if (MO.isReg() && 
+                  TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+                if (MO.isUse()) {
+                  if (MO.isKill()) {
+                    if (NewMIs[0]->killsRegister(MO.getReg()))
+                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+                    else {
+                      assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+                             "Kill missing after load unfold!");
+                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+                    }
+                  }
+                } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+                  if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+                    LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+                  else {
+                    assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+                           "Dead flag missing after load unfold!");
+                    LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+                  }
+                }
+              }
+            }
+            LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+          }
+          mi->eraseFromParent();
+          mi = NewMIs[1];
+          if (TransformSuccess)
+            return true;
+        } else {
+          // Transforming didn't eliminate the tie and didn't lead to an
+          // improvement. Clean up the unfolded instructions and keep the
+          // original.
+          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+          NewMIs[0]->eraseFromParent();
+          NewMIs[1]->eraseFromParent();
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "Machine Function\n");
+  const TargetMachine &TM = MF.getTarget();
+  MRI = &MF.getRegInfo();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  LV = getAnalysisIfAvailable<LiveVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool MadeChange = false;
+
+  DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+  DEBUG(dbgs() << "********** Function: " 
+        << MF.getFunction()->getName() << '\n');
+
+  // ReMatRegs - Keep track of the registers whose def's are remat'ed.
+  BitVector ReMatRegs(MRI->getNumVirtRegs());
+
+  typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
+    TiedOperandMap;
+  TiedOperandMap TiedOperands(4);
+
+  SmallPtrSet<MachineInstr*, 8> Processed;
+  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+       mbbi != mbbe; ++mbbi) {
+    unsigned Dist = 0;
+    DistanceMap.clear();
+    SrcRegMap.clear();
+    DstRegMap.clear();
+    Processed.clear();
+    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+         mi != me; ) {
+      MachineBasicBlock::iterator nmi = llvm::next(mi);
+      if (mi->isDebugValue()) {
+        mi = nmi;
+        continue;
+      }
+
+      // Remember REG_SEQUENCE instructions, we'll deal with them later.
+      if (mi->isRegSequence())
+        RegSequences.push_back(&*mi);
+
+      const MCInstrDesc &MCID = mi->getDesc();
+      bool FirstTied = true;
+
+      DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+      ProcessCopy(&*mi, &*mbbi, Processed);
+
+      // First scan through all the tied register uses in this instruction
+      // and record a list of pairs of tied operands for each register.
+      unsigned NumOps = mi->isInlineAsm()
+        ? mi->getNumOperands() : MCID.getNumOperands();
+      for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+        unsigned DstIdx = 0;
+        if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+          continue;
+
+        if (FirstTied) {
+          FirstTied = false;
+          ++NumTwoAddressInstrs;
+          DEBUG(dbgs() << '\t' << *mi);
+        }
+
+        assert(mi->getOperand(SrcIdx).isReg() &&
+               mi->getOperand(SrcIdx).getReg() &&
+               mi->getOperand(SrcIdx).isUse() &&
+               "two address instruction invalid");
+
+        unsigned regB = mi->getOperand(SrcIdx).getReg();
+        TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
+      }
+
+      // Now iterate over the information collected above.
+      for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+             OE = TiedOperands.end(); OI != OE; ++OI) {
+        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
+
+        // If the instruction has a single pair of tied operands, try some
+        // transformations that may either eliminate the tied operands or
+        // improve the opportunities for coalescing away the register copy.
+        if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+          unsigned SrcIdx = TiedPairs[0].first;
+          unsigned DstIdx = TiedPairs[0].second;
+
+          // If the registers are already equal, nothing needs to be done.
+          if (mi->getOperand(SrcIdx).getReg() ==
+              mi->getOperand(DstIdx).getReg())
+            break; // Done with this instruction.
+
+          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+                                      Processed))
+            break; // The tied operands have been eliminated.
+        }
+
+        bool IsEarlyClobber = false;
+        bool RemovedKillFlag = false;
+        bool AllUsesCopied = true;
+        unsigned LastCopiedReg = 0;
+        unsigned regB = OI->first;
+        for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+          unsigned SrcIdx = TiedPairs[tpi].first;
+          unsigned DstIdx = TiedPairs[tpi].second;
+
+          const MachineOperand &DstMO = mi->getOperand(DstIdx);
+          unsigned regA = DstMO.getReg();
+          IsEarlyClobber |= DstMO.isEarlyClobber();
+
+          // Grab regB from the instruction because it may have changed if the
+          // instruction was commuted.
+          regB = mi->getOperand(SrcIdx).getReg();
+
+          if (regA == regB) {
+            // The register is tied to multiple destinations (or else we would
+            // not have continued this far), but this use of the register
+            // already matches the tied destination.  Leave it.
+            AllUsesCopied = false;
+            continue;
+          }
+          LastCopiedReg = regA;
+
+          assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+                 "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+          // First, verify that we don't have a use of "a" in the instruction
+          // (a = b + a for example) because our transformation will not
+          // work. This should never occur because we are in SSA form.
+          for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+            assert(i == DstIdx ||
+                   !mi->getOperand(i).isReg() ||
+                   mi->getOperand(i).getReg() != regA);
+#endif
+
+          // Emit a copy or rematerialize the definition.
+          const TargetRegisterClass *rc = MRI->getRegClass(regB);
+          MachineInstr *DefMI = MRI->getVRegDef(regB);
+          // If it's safe and profitable, remat the definition instead of
+          // copying it.
+          if (DefMI &&
+              DefMI->getDesc().isAsCheapAsAMove() &&
+              DefMI->isSafeToReMat(TII, AA, regB) &&
+              isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
+            DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
+            unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
+            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
+            ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
+            ++NumReMats;
+          } else {
+            BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
+                    regA).addReg(regB);
+          }
+
+          MachineBasicBlock::iterator prevMI = prior(mi);
+          // Update DistanceMap.
+          DistanceMap.insert(std::make_pair(prevMI, Dist));
+          DistanceMap[mi] = ++Dist;
+
+          DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
+
+          MachineOperand &MO = mi->getOperand(SrcIdx);
+          assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
+                 "inconsistent operand info for 2-reg pass");
+          if (MO.isKill()) {
+            MO.setIsKill(false);
+            RemovedKillFlag = true;
+          }
+          MO.setReg(regA);
+        }
+
+        if (AllUsesCopied) {
+          if (!IsEarlyClobber) {
+            // Replace other (un-tied) uses of regB with LastCopiedReg.
+            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = mi->getOperand(i);
+              if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+                if (MO.isKill()) {
+                  MO.setIsKill(false);
+                  RemovedKillFlag = true;
+                }
+                MO.setReg(LastCopiedReg);
+              }
+            }
+          }
+
+          // Update live variables for regB.
+          if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
+            LV->addVirtualRegisterKilled(regB, prior(mi));
+
+        } else if (RemovedKillFlag) {
+          // Some tied uses of regB matched their destination registers, so
+          // regB is still used in this instruction, but a kill flag was
+          // removed from a different tied use of regB, so now we need to add
+          // a kill flag to one of the remaining uses of regB.
+          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = mi->getOperand(i);
+            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+              MO.setIsKill(true);
+              break;
+            }
+          }
+        }
+
+        // Schedule the source copy / remat inserted to form two-address
+        // instruction. FIXME: Does it matter the distance map may not be
+        // accurate after it's scheduled?
+        TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
+
+        MadeChange = true;
+
+        DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+      }
+
+      // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+      if (mi->isInsertSubreg()) {
+        // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+        // To   %reg:subidx = COPY %subreg
+        unsigned SubIdx = mi->getOperand(3).getImm();
+        mi->RemoveOperand(3);
+        assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+        mi->getOperand(0).setSubReg(SubIdx);
+        mi->RemoveOperand(1);
+        mi->setDesc(TII->get(TargetOpcode::COPY));
+        DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+      }
+
+      // Clear TiedOperands here instead of at the top of the loop
+      // since most instructions do not have tied operands.
+      TiedOperands.clear();
+      mi = nmi;
+    }
+  }
+
+  // Some remat'ed instructions are dead.
+  for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+    if (MRI->use_nodbg_empty(VReg)) {
+      MachineInstr *DefMI = MRI->getVRegDef(VReg);
+      DefMI->eraseFromParent();
+    }
+  }
+
+  // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
+  // SSA form. It's now safe to de-SSA.
+  MadeChange |= EliminateRegSequences();
+
+  return MadeChange;
+}
+
+static void UpdateRegSequenceSrcs(unsigned SrcReg,
+                                  unsigned DstReg, unsigned SubIdx,
+                                  MachineRegisterInfo *MRI,
+                                  const TargetRegisterInfo &TRI) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+         RE = MRI->reg_end(); RI != RE; ) {
+    MachineOperand &MO = RI.getOperand();
+    ++RI;
+    MO.substVirtReg(DstReg, SubIdx, TRI);
+  }
+}
+
+/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
+/// EXTRACT_SUBREG from the same register and to the same virtual register
+/// with different sub-register indices, attempt to combine the
+/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
+/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
+/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
+/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
+/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
+/// reg1026 to reg1029.
+void
+TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
+                                              unsigned DstReg) {
+  SmallSet<unsigned, 4> Seen;
+  for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
+    unsigned SrcReg = Srcs[i];
+    if (!Seen.insert(SrcReg))
+      continue;
+
+    // Check that the instructions are all in the same basic block.
+    MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg);
+    MachineInstr *DstDefMI = MRI->getVRegDef(DstReg);
+    if (SrcDefMI->getParent() != DstDefMI->getParent())
+      continue;
+
+    // If there are no other uses than copies which feed into
+    // the reg_sequence, then we might be able to coalesce them.
+    bool CanCoalesce = true;
+    SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+      MachineInstr *UseMI = &*UI;
+      if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
+        CanCoalesce = false;
+        break;
+      }
+      SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
+      DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
+    }
+
+    if (!CanCoalesce || SrcSubIndices.size() < 2)
+      continue;
+
+    // Check that the source subregisters can be combined.
+    std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
+    unsigned NewSrcSubIdx = 0;
+    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
+                                      NewSrcSubIdx))
+      continue;
+
+    // Check that the destination subregisters can also be combined.
+    std::sort(DstSubIndices.begin(), DstSubIndices.end());
+    unsigned NewDstSubIdx = 0;
+    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
+                                      NewDstSubIdx))
+      continue;
+
+    // If neither source nor destination can be combined to the full register,
+    // just give up.  This could be improved if it ever matters.
+    if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
+      continue;
+
+    // Now that we know that all the uses are extract_subregs and that those
+    // subregs can somehow be combined, scan all the extract_subregs again to
+    // make sure the subregs are in the right order and can be composed.
+    MachineInstr *SomeMI = 0;
+    CanCoalesce = true;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+      MachineInstr *UseMI = &*UI;
+      assert(UseMI->isCopy());
+      unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
+      unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
+      assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
+      if ((NewDstSubIdx == 0 &&
+           TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
+          (NewSrcSubIdx == 0 &&
+           TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
+        CanCoalesce = false;
+        break;
+      }
+      // Keep track of one of the uses.
+      SomeMI = UseMI;
+    }
+    if (!CanCoalesce)
+      continue;
+
+    // Insert a copy to replace the original.
+    MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
+                                   SomeMI->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY))
+      .addReg(DstReg, RegState::Define, NewDstSubIdx)
+      .addReg(SrcReg, 0, NewSrcSubIdx);
+
+    // Remove all the old extract instructions.
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ) {
+      MachineInstr *UseMI = &*UI;
+      ++UI;
+      if (UseMI == CopyMI)
+        continue;
+      assert(UseMI->isCopy());
+      // Move any kills to the new copy or extract instruction.
+      if (UseMI->getOperand(1).isKill()) {
+        CopyMI->getOperand(1).setIsKill();
+        if (LV)
+          // Update live variables
+          LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
+      }
+      UseMI->eraseFromParent();
+    }
+  }
+}
+
+static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
+                                    MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI != RegSeq && UseMI->isRegSequence())
+      return true;
+  }
+  return false;
+}
+
+/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+/// sub-register references of the register defined by REG_SEQUENCE. e.g.
+///
+/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
+/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
+/// =>
+/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+bool TwoAddressInstructionPass::EliminateRegSequences() {
+  if (RegSequences.empty())
+    return false;
+
+  for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
+    MachineInstr *MI = RegSequences[i];
+    unsigned DstReg = MI->getOperand(0).getReg();
+    if (MI->getOperand(0).getSubReg() ||
+        TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+        !(MI->getNumOperands() & 1)) {
+      DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+      llvm_unreachable(0);
+    }
+
+    bool IsImpDef = true;
+    SmallVector<unsigned, 4> RealSrcs;
+    SmallSet<unsigned, 4> Seen;
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+      unsigned SrcReg = MI->getOperand(i).getReg();
+      unsigned SubIdx = MI->getOperand(i+1).getImm();
+      if (MI->getOperand(i).getSubReg() ||
+          TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+        DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+        llvm_unreachable(0);
+      }
+
+      MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+      if (DefMI->isImplicitDef()) {
+        DefMI->eraseFromParent();
+        continue;
+      }
+      IsImpDef = false;
+
+      // Remember COPY sources. These might be candidate for coalescing.
+      if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
+        RealSrcs.push_back(DefMI->getOperand(1).getReg());
+
+      bool isKill = MI->getOperand(i).isKill();
+      if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+          !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+                                         MRI->getRegClass(SrcReg), SubIdx)) {
+        // REG_SEQUENCE cannot have duplicated operands, add a copy.
+        // Also add an copy if the source is live-in the block. We don't want
+        // to end up with a partial-redef of a livein, e.g.
+        // BB0:
+        // reg1051:10<def> =
+        // ...
+        // BB1:
+        // ... = reg1051:10
+        // BB2:
+        // reg1051:9<def> =
+        // LiveIntervalAnalysis won't like it.
+        //
+        // If the REG_SEQUENCE doesn't kill its source, keeping live variables
+        // correctly up to date becomes very difficult. Insert a copy.
+
+        // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+        // might insert a COPY that uses SrcReg after is was killed.
+        if (isKill)
+          for (unsigned j = i + 2; j < e; j += 2)
+            if (MI->getOperand(j).getReg() == SrcReg) {
+              MI->getOperand(j).setIsKill();
+              isKill = false;
+              break;
+            }
+
+        MachineBasicBlock::iterator InsertLoc = MI;
+        MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
+                                MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
+            .addReg(DstReg, RegState::Define, SubIdx)
+            .addReg(SrcReg, getKillRegState(isKill));
+        MI->getOperand(i).setReg(0);
+        if (LV && isKill)
+          LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+        DEBUG(dbgs() << "Inserted: " << *CopyMI);
+      }
+    }
+
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+      unsigned SrcReg = MI->getOperand(i).getReg();
+      if (!SrcReg) continue;
+      unsigned SubIdx = MI->getOperand(i+1).getImm();
+      UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
+    }
+
+    if (IsImpDef) {
+      DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+      MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+      for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+        MI->RemoveOperand(j);      
+    } else {
+      DEBUG(dbgs() << "Eliminated: " << *MI);
+      MI->eraseFromParent();
+    }
+
+    // Try coalescing some EXTRACT_SUBREG instructions. This can create
+    // INSERT_SUBREG instructions that must have <undef> flags added by
+    // LiveIntervalAnalysis, so only run it when LiveVariables is available.
+    if (LV)
+      CoalesceExtSubRegs(RealSrcs, DstReg);
+  }
+
+  RegSequences.clear();
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 000000000000..52693f03e828
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,215 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass.  Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node.  To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack.  In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks.  Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+  class UnreachableBlockElim : public FunctionPass {
+    virtual bool runOnFunction(Function &F);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    UnreachableBlockElim() : FunctionPass(ID) {
+      initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<ProfileInfo>();
+    }
+  };
+}
+char UnreachableBlockElim::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
+                "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+  return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+  SmallPtrSet<BasicBlock*, 8> Reachable;
+
+  // Mark all reachable blocks.
+  for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+       df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+    /* Mark all reachable blocks */;
+
+  // Loop over all dead blocks, remembering them and deleting all instructions
+  // in them.
+  std::vector<BasicBlock*> DeadBlocks;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (!Reachable.count(I)) {
+      BasicBlock *BB = I;
+      DeadBlocks.push_back(BB);
+      while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+        PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+        BB->getInstList().pop_front();
+      }
+      for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+        (*SI)->removePredecessor(BB);
+      BB->dropAllReferences();
+    }
+
+  // Actually remove the blocks now.
+  ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+    if (PI) PI->removeBlock(DeadBlocks[i]);
+    DeadBlocks[i]->eraseFromParent();
+  }
+
+  return DeadBlocks.size();
+}
+
+
+namespace {
+  class UnreachableMachineBlockElim : public MachineFunctionPass {
+    virtual bool runOnMachineFunction(MachineFunction &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    MachineModuleInfo *MMI;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
+  };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+  "Remove unreachable machine basic blocks", false, false)
+
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  bool ModifiedPHI = false;
+
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+  MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+  // Mark all reachable blocks.
+  for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+       I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+       I != E; ++I)
+    /* Mark all reachable blocks */;
+
+  // Loop over all dead blocks, remembering them and deleting all instructions
+  // in them.
+  std::vector<MachineBasicBlock*> DeadBlocks;
+  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    MachineBasicBlock *BB = I;
+
+    // Test for deadness.
+    if (!Reachable.count(BB)) {
+      DeadBlocks.push_back(BB);
+
+      // Update dominator and loop info.
+      if (MLI) MLI->removeBlock(BB);
+      if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
+      while (BB->succ_begin() != BB->succ_end()) {
+        MachineBasicBlock* succ = *BB->succ_begin();
+
+        MachineBasicBlock::iterator start = succ->begin();
+        while (start != succ->end() && start->isPHI()) {
+          for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+            if (start->getOperand(i).isMBB() &&
+                start->getOperand(i).getMBB() == BB) {
+              start->RemoveOperand(i);
+              start->RemoveOperand(i-1);
+            }
+
+          start++;
+        }
+
+        BB->removeSuccessor(BB->succ_begin());
+      }
+    }
+  }
+
+  // Actually remove the blocks now.
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+    DeadBlocks[i]->eraseFromParent();
+
+  // Cleanup PHI nodes.
+  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    MachineBasicBlock *BB = I;
+    // Prune unneeded PHI entries.
+    SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+                                             BB->pred_end());
+    MachineBasicBlock::iterator phi = BB->begin();
+    while (phi != BB->end() && phi->isPHI()) {
+      for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+        if (!preds.count(phi->getOperand(i).getMBB())) {
+          phi->RemoveOperand(i);
+          phi->RemoveOperand(i-1);
+          ModifiedPHI = true;
+        }
+
+      if (phi->getNumOperands() == 3) {
+        unsigned Input = phi->getOperand(1).getReg();
+        unsigned Output = phi->getOperand(0).getReg();
+
+        MachineInstr* temp = phi;
+        ++phi;
+        temp->eraseFromParent();
+        ModifiedPHI = true;
+
+        if (Input != Output) {
+          MachineRegisterInfo &MRI = F.getRegInfo();
+          MRI.constrainRegClass(Input, MRI.getRegClass(Output));
+          MRI.replaceRegWith(Output, Input);
+        }
+
+        continue;
+      }
+
+      ++phi;
+    }
+  }
+
+  F.RenumberBlocks();
+
+  return (DeadBlocks.size() || ModifiedPHI);
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 000000000000..7557979ec9bb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,371 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregmap"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills  , "Number of register spills");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
+
+//===----------------------------------------------------------------------===//
+//  VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+  MRI = &mf.getRegInfo();
+  TII = mf.getTarget().getInstrInfo();
+  TRI = mf.getTarget().getRegisterInfo();
+  MF = &mf;
+
+  ReMatId = MAX_STACK_SLOT+1;
+  LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
+  
+  Virt2PhysMap.clear();
+  Virt2StackSlotMap.clear();
+  Virt2ReMatIdMap.clear();
+  Virt2SplitMap.clear();
+  Virt2SplitKillMap.clear();
+  ReMatMap.clear();
+  ImplicitDefed.clear();
+  SpillSlotToUsesMap.clear();
+  MI2VirtMap.clear();
+  SpillPt2VirtMap.clear();
+  RestorePt2VirtMap.clear();
+  EmergencySpillMap.clear();
+  EmergencySpillSlots.clear();
+  
+  SpillSlotToUsesMap.resize(8);
+  ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
+
+  allocatableRCRegs.clear();
+  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I)
+    allocatableRCRegs.insert(std::make_pair(*I,
+                                            TRI->getAllocatableSet(mf, *I)));
+
+  grow();
+  
+  return false;
+}
+
+void VirtRegMap::grow() {
+  unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+  Virt2PhysMap.resize(NumRegs);
+  Virt2StackSlotMap.resize(NumRegs);
+  Virt2ReMatIdMap.resize(NumRegs);
+  Virt2SplitMap.resize(NumRegs);
+  Virt2SplitKillMap.resize(NumRegs);
+  ReMatMap.resize(NumRegs);
+  ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                      RC->getAlignment());
+  if (LowSpillSlot == NO_STACK_SLOT)
+    LowSpillSlot = SS;
+  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+    HighSpillSlot = SS;
+  assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+  unsigned Idx = SS-LowSpillSlot;
+  while (Idx >= SpillSlotToUsesMap.size())
+    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+  return SS;
+}
+
+unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
+  unsigned physReg = Hint.second;
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+    physReg = getPhys(physReg);
+  if (Hint.first == 0)
+    return (TargetRegisterInfo::isPhysicalRegister(physReg))
+      ? physReg : 0;
+  return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign stack slot to already spilled register");
+  const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+  ++NumSpills;
+  return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign stack slot to already spilled register");
+  assert((SS >= 0 ||
+          (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+         "illegal fixed frame index");
+  Virt2StackSlotMap[virtReg] = SS;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign re-mat id to already spilled register");
+  Virt2ReMatIdMap[virtReg] = ReMatId;
+  return ReMatId++;
+}
+
+void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign re-mat id to already spilled register");
+  Virt2ReMatIdMap[virtReg] = id;
+}
+
+int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
+  std::map<const TargetRegisterClass*, int>::iterator I =
+    EmergencySpillSlots.find(RC);
+  if (I != EmergencySpillSlots.end())
+    return I->second;
+  return EmergencySpillSlots[RC] = createSpillSlot(RC);
+}
+
+void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
+  if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
+    // If FI < LowSpillSlot, this stack reference was produced by
+    // instruction selection and is not a spill
+    if (FI >= LowSpillSlot) {
+      assert(FI >= 0 && "Spill slot index should not be negative!");
+      assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+             && "Invalid spill slot");
+      SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
+    }
+  }
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+                            MachineInstr *NewMI, ModRef MRInfo) {
+  // Move previous memory references folded to new instruction.
+  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+  for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+         E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+    MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+    MI2VirtMap.erase(I++);
+  }
+
+  // add new memory reference
+  MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
+  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
+  MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isFI())
+      continue;
+    int FI = MO.getIndex();
+    if (MF->getFrameInfo()->isFixedObjectIndex(FI))
+      continue;
+    // This stack reference was produced by instruction selection and
+    // is not a spill
+    if (FI < LowSpillSlot)
+      continue;
+    assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+           && "Invalid spill slot");
+    SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
+  }
+  MI2VirtMap.erase(MI);
+  SpillPt2VirtMap.erase(MI);
+  RestorePt2VirtMap.erase(MI);
+  EmergencySpillMap.erase(MI);
+}
+
+/// FindUnusedRegisters - Gather a list of allocatable registers that
+/// have not been allocated to any virtual register.
+bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
+  unsigned NumRegs = TRI->getNumRegs();
+  UnusedRegs.reset();
+  UnusedRegs.resize(NumRegs);
+
+  BitVector Used(NumRegs);
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+      Used.set(Virt2PhysMap[Reg]);
+  }
+
+  BitVector Allocatable = TRI->getAllocatableSet(*MF);
+  bool AnyUnused = false;
+  for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
+    if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
+      bool ReallyUnused = true;
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        if (Used[*AS] || LIs->hasInterval(*AS)) {
+          ReallyUnused = false;
+          break;
+        }
+      }
+      if (ReallyUnused) {
+        AnyUnused = true;
+        UnusedRegs.set(Reg);
+      }
+    }
+  }
+
+  return AnyUnused;
+}
+
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+  DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+               << "********** Function: "
+               << MF->getFunction()->getName() << '\n');
+  DEBUG(dump());
+  SmallVector<unsigned, 8> SuperDeads;
+  SmallVector<unsigned, 8> SuperDefs;
+  SmallVector<unsigned, 8> SuperKills;
+
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    DEBUG(MBBI->print(dbgs(), Indexes));
+    for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+         MII != MIE;) {
+      MachineInstr *MI = MII;
+      ++MII;
+
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+           MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+        MachineOperand &MO = *MOI;
+        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          continue;
+        unsigned VirtReg = MO.getReg();
+        unsigned PhysReg = getPhys(VirtReg);
+        assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+        // Preserve semantics of sub-register operands.
+        if (MO.getSubReg()) {
+          // A virtual register kill refers to the whole register, so we may
+          // have to add <imp-use,kill> operands for the super-register.
+          if (MO.isUse()) {
+            if (MO.isKill() && !MO.isUndef())
+              SuperKills.push_back(PhysReg);
+          } else if (MO.isDead())
+            SuperDeads.push_back(PhysReg);
+          else
+            SuperDefs.push_back(PhysReg);
+
+          // PhysReg operands cannot have subregister indexes.
+          PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+          assert(PhysReg && "Invalid SubReg for physical register");
+          MO.setSubReg(0);
+        }
+        // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+        // we need the inlining here.
+        MO.setReg(PhysReg);
+      }
+
+      // Add any missing super-register kills after rewriting the whole
+      // instruction.
+      while (!SuperKills.empty())
+        MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+      while (!SuperDeads.empty())
+        MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+      while (!SuperDefs.empty())
+        MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
+      DEBUG(dbgs() << "> " << *MI);
+
+      // Finally, remove any identity copies.
+      if (MI->isIdentityCopy()) {
+        ++NumIdCopies;
+        if (MI->getNumOperands() == 2) {
+          DEBUG(dbgs() << "Deleting identity copy.\n");
+          RemoveMachineInstrFromMaps(MI);
+          if (Indexes)
+            Indexes->removeMachineInstrFromMaps(MI);
+          // It's safe to erase MI because MII has already been incremented.
+          MI->eraseFromParent();
+        } else {
+          // Transform identity copy to a KILL to deal with subregisters.
+          MI->setDesc(TII->get(TargetOpcode::KILL));
+          DEBUG(dbgs() << "Identity copy: " << *MI);
+        }
+      }
+    }
+  }
+
+  // Tell MRI about physical registers in use.
+  for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+    if (!MRI->reg_nodbg_empty(Reg))
+      MRI->setPhysRegUsed(Reg);
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
+  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  OS << "********** REGISTER MAP **********\n";
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> "
+         << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+         << MRI.getRegClass(Reg)->getName() << "\n";
+    }
+  }
+
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+         << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+    }
+  }
+  OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+  print(dbgs());
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 000000000000..03abff356934
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,528 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+
+namespace llvm {
+  class LiveIntervals;
+  class MachineInstr;
+  class MachineFunction;
+  class MachineRegisterInfo;
+  class TargetInstrInfo;
+  class TargetRegisterInfo;
+  class raw_ostream;
+  class SlotIndexes;
+
+  class VirtRegMap : public MachineFunctionPass {
+  public:
+    enum {
+      NO_PHYS_REG = 0,
+      NO_STACK_SLOT = (1L << 30)-1,
+      MAX_STACK_SLOT = (1L << 18)-1
+    };
+
+    enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+    typedef std::multimap<MachineInstr*,
+                          std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+  private:
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineFunction *MF;
+
+    DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
+
+    /// Virt2PhysMap - This is a virtual to physical register
+    /// mapping. Each virtual register is required to have an entry in
+    /// it; even spilled virtual registers (the register mapped to a
+    /// spilled register is the temporary used to load it from the
+    /// stack).
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+
+    /// Virt2StackSlotMap - This is virtual register to stack slot
+    /// mapping. Each spilled virtual register has an entry in it
+    /// which corresponds to the stack slot this register is spilled
+    /// at.
+    IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+
+    /// Virt2ReMatIdMap - This is virtual register to rematerialization id
+    /// mapping. Each spilled virtual register that should be remat'd has an
+    /// entry in it which corresponds to the remat id.
+    IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
+
+    /// Virt2SplitMap - This is virtual register to splitted virtual register
+    /// mapping.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+
+    /// Virt2SplitKillMap - This is splitted virtual register to its last use
+    /// (kill) index mapping.
+    IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
+
+    /// ReMatMap - This is virtual register to re-materialized instruction
+    /// mapping. Each virtual register whose definition is going to be
+    /// re-materialized has an entry in it.
+    IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
+
+    /// MI2VirtMap - This is MachineInstr to virtual register
+    /// mapping. In the case of memory spill code being folded into
+    /// instructions, we need to know which virtual register was
+    /// read/written by this instruction.
+    MI2VirtMapTy MI2VirtMap;
+
+    /// SpillPt2VirtMap - This records the virtual registers which should
+    /// be spilled right after the MachineInstr due to live interval
+    /// splitting.
+    std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
+    SpillPt2VirtMap;
+
+    /// RestorePt2VirtMap - This records the virtual registers which should
+    /// be restored right before the MachineInstr due to live interval
+    /// splitting.
+    std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
+
+    /// EmergencySpillMap - This records the physical registers that should
+    /// be spilled / restored around the MachineInstr since the register
+    /// allocator has run out of registers.
+    std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
+
+    /// EmergencySpillSlots - This records emergency spill slots used to
+    /// spill physical registers when the register allocator runs out of
+    /// registers. Ideally only one stack slot is used per function per
+    /// register class.
+    std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
+
+    /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+    /// virtual register, an unique id is being assigned. This keeps track of
+    /// the highest id used so far. Note, this starts at (1<<18) to avoid
+    /// conflicts with stack slot numbers.
+    int ReMatId;
+
+    /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
+    int LowSpillSlot, HighSpillSlot;
+
+    /// SpillSlotToUsesMap - Records uses for each register spill slot.
+    SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
+
+    /// ImplicitDefed - One bit for each virtual register. If set it indicates
+    /// the register is implicitly defined.
+    BitVector ImplicitDefed;
+
+    /// UnusedRegs - A list of physical registers that have not been used.
+    BitVector UnusedRegs;
+
+    /// createSpillSlot - Allocate a spill slot for RC from MFI.
+    unsigned createSpillSlot(const TargetRegisterClass *RC);
+
+    VirtRegMap(const VirtRegMap&);     // DO NOT IMPLEMENT
+    void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+  public:
+    static char ID;
+    VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
+                   Virt2StackSlotMap(NO_STACK_SLOT), 
+                   Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
+                   Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
+                   ReMatId(MAX_STACK_SLOT+1),
+                   LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    MachineFunction &getMachineFunction() const {
+      assert(MF && "getMachineFunction called before runOnMachineFunction");
+      return *MF;
+    }
+
+    MachineRegisterInfo &getRegInfo() const { return *MRI; }
+    const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
+    void grow();
+
+    /// @brief returns true if the specified virtual register is
+    /// mapped to a physical register
+    bool hasPhys(unsigned virtReg) const {
+      return getPhys(virtReg) != NO_PHYS_REG;
+    }
+
+    /// @brief returns the physical register mapped to the specified
+    /// virtual register
+    unsigned getPhys(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2PhysMap[virtReg];
+    }
+
+    /// @brief creates a mapping for the specified virtual register to
+    /// the specified physical register
+    void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+             TargetRegisterInfo::isPhysicalRegister(physReg));
+      assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+             "attempt to assign physical register to already mapped "
+             "virtual register");
+      Virt2PhysMap[virtReg] = physReg;
+    }
+
+    /// @brief clears the specified virtual register's, physical
+    /// register mapping
+    void clearVirt(unsigned virtReg) {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+             "attempt to clear a not assigned virtual register");
+      Virt2PhysMap[virtReg] = NO_PHYS_REG;
+    }
+
+    /// @brief clears all virtual to physical register mappings
+    void clearAllVirt() {
+      Virt2PhysMap.clear();
+      grow();
+    }
+
+    /// @brief returns the register allocation preference.
+    unsigned getRegAllocPref(unsigned virtReg);
+
+    /// @brief returns true if VirtReg is assigned to its preferred physreg.
+    bool hasPreferredPhys(unsigned VirtReg) {
+      return getPhys(VirtReg) == getRegAllocPref(VirtReg);
+    }
+
+    /// @brief records virtReg is a split live interval from SReg.
+    void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
+      Virt2SplitMap[virtReg] = SReg;
+    }
+
+    /// @brief returns the live interval virtReg is split from.
+    unsigned getPreSplitReg(unsigned virtReg) const {
+      return Virt2SplitMap[virtReg];
+    }
+
+    /// getOriginal - Return the original virtual register that VirtReg descends
+    /// from through splitting.
+    /// A register that was not created by splitting is its own original.
+    /// This operation is idempotent.
+    unsigned getOriginal(unsigned VirtReg) const {
+      unsigned Orig = getPreSplitReg(VirtReg);
+      return Orig ? Orig : VirtReg;
+    }
+
+    /// @brief returns true if the specified virtual register is not
+    /// mapped to a stack slot or rematerialized.
+    bool isAssignedReg(unsigned virtReg) const {
+      if (getStackSlot(virtReg) == NO_STACK_SLOT &&
+          getReMatId(virtReg) == NO_STACK_SLOT)
+        return true;
+      // Split register can be assigned a physical register as well as a
+      // stack slot or remat id.
+      return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+    }
+
+    /// @brief returns the stack slot mapped to the specified virtual
+    /// register
+    int getStackSlot(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2StackSlotMap[virtReg];
+    }
+
+    /// @brief returns the rematerialization id mapped to the specified virtual
+    /// register
+    int getReMatId(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2ReMatIdMap[virtReg];
+    }
+
+    /// @brief create a mapping for the specifed virtual register to
+    /// the next available stack slot
+    int assignVirt2StackSlot(unsigned virtReg);
+    /// @brief create a mapping for the specified virtual register to
+    /// the specified stack slot
+    void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+    /// @brief assign an unique re-materialization id to the specified
+    /// virtual register.
+    int assignVirtReMatId(unsigned virtReg);
+    /// @brief assign an unique re-materialization id to the specified
+    /// virtual register.
+    void assignVirtReMatId(unsigned virtReg, int id);
+
+    /// @brief returns true if the specified virtual register is being
+    /// re-materialized.
+    bool isReMaterialized(unsigned virtReg) const {
+      return ReMatMap[virtReg] != NULL;
+    }
+
+    /// @brief returns the original machine instruction being re-issued
+    /// to re-materialize the specified virtual register.
+    MachineInstr *getReMaterializedMI(unsigned virtReg) const {
+      return ReMatMap[virtReg];
+    }
+
+    /// @brief records the specified virtual register will be
+    /// re-materialized and the original instruction which will be re-issed
+    /// for this purpose.  If parameter all is true, then all uses of the
+    /// registers are rematerialized and it's safe to delete the definition.
+    void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+      ReMatMap[virtReg] = def;
+    }
+
+    /// @brief record the last use (kill) of a split virtual register.
+    void addKillPoint(unsigned virtReg, SlotIndex index) {
+      Virt2SplitKillMap[virtReg] = index;
+    }
+
+    SlotIndex getKillPoint(unsigned virtReg) const {
+      return Virt2SplitKillMap[virtReg];
+    }
+
+    /// @brief remove the last use (kill) of a split virtual register.
+    void removeKillPoint(unsigned virtReg) {
+      Virt2SplitKillMap[virtReg] = SlotIndex();
+    }
+
+    /// @brief returns true if the specified MachineInstr is a spill point.
+    bool isSpillPt(MachineInstr *Pt) const {
+      return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
+    }
+
+    /// @brief returns the virtual registers that should be spilled due to
+    /// splitting right after the specified MachineInstr.
+    std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
+      return SpillPt2VirtMap[Pt];
+    }
+
+    /// @brief records the specified MachineInstr as a spill point for virtReg.
+    void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
+      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+        I = SpillPt2VirtMap.find(Pt);
+      if (I != SpillPt2VirtMap.end())
+        I->second.push_back(std::make_pair(virtReg, isKill));
+      else {
+        std::vector<std::pair<unsigned,bool> > Virts;
+        Virts.push_back(std::make_pair(virtReg, isKill));
+        SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
+      }
+    }
+
+    /// @brief - transfer spill point information from one instruction to
+    /// another.
+    void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
+      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+        I = SpillPt2VirtMap.find(Old);
+      if (I == SpillPt2VirtMap.end())
+        return;
+      while (!I->second.empty()) {
+        unsigned virtReg = I->second.back().first;
+        bool isKill = I->second.back().second;
+        I->second.pop_back();
+        addSpillPoint(virtReg, isKill, New);
+      }
+      SpillPt2VirtMap.erase(I);
+    }
+
+    /// @brief returns true if the specified MachineInstr is a restore point.
+    bool isRestorePt(MachineInstr *Pt) const {
+      return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
+    }
+
+    /// @brief returns the virtual registers that should be restoreed due to
+    /// splitting right after the specified MachineInstr.
+    std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
+      return RestorePt2VirtMap[Pt];
+    }
+
+    /// @brief records the specified MachineInstr as a restore point for virtReg.
+    void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
+      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+        RestorePt2VirtMap.find(Pt);
+      if (I != RestorePt2VirtMap.end())
+        I->second.push_back(virtReg);
+      else {
+        std::vector<unsigned> Virts;
+        Virts.push_back(virtReg);
+        RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
+      }
+    }
+
+    /// @brief - transfer restore point information from one instruction to
+    /// another.
+    void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
+      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+        RestorePt2VirtMap.find(Old);
+      if (I == RestorePt2VirtMap.end())
+        return;
+      while (!I->second.empty()) {
+        unsigned virtReg = I->second.back();
+        I->second.pop_back();
+        addRestorePoint(virtReg, New);
+      }
+      RestorePt2VirtMap.erase(I);
+    }
+
+    /// @brief records that the specified physical register must be spilled
+    /// around the specified machine instr.
+    void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
+      if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
+        EmergencySpillMap[MI].push_back(PhysReg);
+      else {
+        std::vector<unsigned> PhysRegs;
+        PhysRegs.push_back(PhysReg);
+        EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
+      }
+    }
+
+    /// @brief returns true if one or more physical registers must be spilled
+    /// around the specified instruction.
+    bool hasEmergencySpills(MachineInstr *MI) const {
+      return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
+    }
+
+    /// @brief returns the physical registers to be spilled and restored around
+    /// the instruction.
+    std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
+      return EmergencySpillMap[MI];
+    }
+
+    /// @brief - transfer emergency spill information from one instruction to
+    /// another.
+    void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
+      std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
+        EmergencySpillMap.find(Old);
+      if (I == EmergencySpillMap.end())
+        return;
+      while (!I->second.empty()) {
+        unsigned virtReg = I->second.back();
+        I->second.pop_back();
+        addEmergencySpill(virtReg, New);
+      }
+      EmergencySpillMap.erase(I);
+    }
+
+    /// @brief return or get a emergency spill slot for the register class.
+    int getEmergencySpillSlot(const TargetRegisterClass *RC);
+
+    /// @brief Return lowest spill slot index.
+    int getLowSpillSlot() const {
+      return LowSpillSlot;
+    }
+
+    /// @brief Return highest spill slot index.
+    int getHighSpillSlot() const {
+      return HighSpillSlot;
+    }
+
+    /// @brief Records a spill slot use.
+    void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
+
+    /// @brief Returns true if spill slot has been used.
+    bool isSpillSlotUsed(int FrameIndex) const {
+      assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
+      return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
+    }
+
+    /// @brief Mark the specified register as being implicitly defined.
+    void setIsImplicitlyDefined(unsigned VirtReg) {
+      ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+    }
+
+    /// @brief Returns true if the virtual register is implicitly defined.
+    bool isImplicitlyDefined(unsigned VirtReg) const {
+      return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
+    }
+
+    /// @brief Updates information about the specified virtual register's value
+    /// folded into newMI machine instruction.
+    void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
+                    ModRef MRInfo);
+
+    /// @brief Updates information about the specified virtual register's value
+    /// folded into the specified machine instruction.
+    void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
+
+    /// @brief returns the virtual registers' values folded in memory
+    /// operands of this instruction
+    std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+    getFoldedVirts(MachineInstr* MI) const {
+      return MI2VirtMap.equal_range(MI);
+    }
+    
+    /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
+    /// the folded instruction map and spill point map.
+    void RemoveMachineInstrFromMaps(MachineInstr *MI);
+
+    /// FindUnusedRegisters - Gather a list of allocatable registers that
+    /// have not been allocated to any virtual register.
+    bool FindUnusedRegisters(LiveIntervals* LIs);
+
+    /// HasUnusedRegisters - Return true if there are any allocatable registers
+    /// that have not been allocated to any virtual register.
+    bool HasUnusedRegisters() const {
+      return !UnusedRegs.none();
+    }
+
+    /// setRegisterUsed - Remember the physical register is now used.
+    void setRegisterUsed(unsigned Reg) {
+      UnusedRegs.reset(Reg);
+    }
+
+    /// isRegisterUnused - Return true if the physical register has not been
+    /// used.
+    bool isRegisterUnused(unsigned Reg) const {
+      return UnusedRegs[Reg];
+    }
+
+    /// getFirstUnusedRegister - Return the first physical register that has not
+    /// been used.
+    unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
+      int Reg = UnusedRegs.find_first();
+      while (Reg != -1) {
+        if (allocatableRCRegs[RC][Reg])
+          return (unsigned)Reg;
+        Reg = UnusedRegs.find_next(Reg);
+      }
+      return 0;
+    }
+
+    /// rewrite - Rewrite all instructions in MF to use only physical registers
+    /// by mapping all virtual register operands to their assigned physical
+    /// registers.
+    ///
+    /// @param Indexes Optionally remove deleted instructions from indexes.
+    void rewrite(SlotIndexes *Indexes);
+
+    void print(raw_ostream &OS, const Module* M = 0) const;
+    void dump() const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
+    VRM.print(OS);
+    return OS;
+  }
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp
new file mode 100644
index 000000000000..a5ec797b27db
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp
@@ -0,0 +1,2633 @@
+//===-- llvm/CodeGen/Rewriter.cpp -  Rewriter -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregrewriter"
+#include "VirtRegRewriter.h"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDSE     , "Number of dead stores elided");
+STATISTIC(NumDSS     , "Number of dead spill slots removed");
+STATISTIC(NumCommutes, "Number of instructions commuted");
+STATISTIC(NumDRM     , "Number of re-materializable defs elided");
+STATISTIC(NumStores  , "Number of stores added");
+STATISTIC(NumPSpills , "Number of physical register spills");
+STATISTIC(NumOmitted , "Number of reloads omitted");
+STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
+STATISTIC(NumCopified, "Number of available reloads turned into copies");
+STATISTIC(NumReMats  , "Number of re-materialization");
+STATISTIC(NumLoads   , "Number of loads added");
+STATISTIC(NumReused  , "Number of values reused");
+STATISTIC(NumDCE     , "Number of copies elided");
+STATISTIC(NumSUnfold , "Number of stores unfolded");
+STATISTIC(NumModRefUnfold, "Number of modref unfolded");
+
+namespace {
+  enum RewriterName { local, trivial };
+}
+
+static cl::opt<RewriterName>
+RewriterOpt("rewriter",
+            cl::desc("Rewriter to use (default=local)"),
+            cl::Prefix,
+            cl::values(clEnumVal(local,   "local rewriter"),
+                       clEnumVal(trivial, "trivial rewriter"),
+                       clEnumValEnd),
+            cl::init(local));
+
+static cl::opt<bool>
+ScheduleSpills("schedule-spills",
+               cl::desc("Schedule spill code"),
+               cl::init(false));
+
+VirtRegRewriter::~VirtRegRewriter() {}
+
+/// substitutePhysReg - Replace virtual register in MachineOperand with a
+/// physical register. Do the right thing with the sub-register index.
+/// Note that operands may be added, so the MO reference is no longer valid.
+static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
+                              const TargetRegisterInfo &TRI) {
+  if (MO.getSubReg()) {
+    MO.substPhysReg(Reg, TRI);
+
+    // Any kill flags apply to the full virtual register, so they also apply to
+    // the full physical register.
+    // We assume that partial defs have already been decorated with a super-reg
+    // <imp-def> operand by LiveIntervals.
+    MachineInstr &MI = *MO.getParent();
+    if (MO.isUse() && !MO.isUndef() &&
+        (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+      MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
+  } else {
+    MO.setReg(Reg);
+  }
+}
+
+namespace {
+
+/// This class is intended for use with the new spilling framework only. It
+/// rewrites vreg def/uses to use the assigned preg, but does not insert any
+/// spill code.
+struct TrivialRewriter : public VirtRegRewriter {
+
+  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+                            LiveIntervals* LIs) {
+    DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
+    DEBUG(dbgs() << "********** Function: "
+          << MF.getFunction()->getName() << '\n');
+    DEBUG(dbgs() << "**** Machine Instrs"
+          << "(NOTE! Does not include spills and reloads!) ****\n");
+    DEBUG(MF.dump());
+
+    MachineRegisterInfo *mri = &MF.getRegInfo();
+    const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
+
+    bool changed = false;
+
+    for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
+         liItr != liEnd; ++liItr) {
+
+      const LiveInterval *li = liItr->second;
+      unsigned reg = li->reg;
+
+      if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+        if (!li->empty())
+          mri->setPhysRegUsed(reg);
+      }
+      else {
+        if (!VRM.hasPhys(reg))
+          continue;
+        unsigned pReg = VRM.getPhys(reg);
+        mri->setPhysRegUsed(pReg);
+        // Copy the register use-list before traversing it.
+        SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
+        for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
+               E = mri->reg_end(); I != E; ++I)
+          reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+        for (unsigned N=0; N != reglist.size(); ++N)
+          substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
+                            pReg, *tri);
+        changed |= !reglist.empty();
+      }
+    }
+
+    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+    DEBUG(MF.dump());
+
+    return changed;
+  }
+
+};
+
+}
+
+// ************************************************************************ //
+
+namespace {
+
+/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
+/// from top down, keep track of which spill slots or remat are available in
+/// each register.
+///
+/// Note that not all physregs are created equal here.  In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using
+/// that we cannot CHANGE, but we can read if we like.  We keep track of this
+/// on a per-stack-slot / remat id basis as the low bit in the value of the
+/// SpillSlotsAvailable entries.  The predicate 'canClobberPhysReg()' checks
+/// this bit and addAvailable sets it if.
+class AvailableSpills {
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+
+  // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
+  // or remat'ed virtual register values that are still available, due to
+  // being loaded or stored to, but not invalidated yet.
+  std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
+
+  // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
+  // indicating which stack slot values are currently held by a physreg.  This
+  // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
+  // physreg is modified.
+  std::multimap<unsigned, int> PhysRegsAvailable;
+
+  void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+  void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+  AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
+    : TRI(tri), TII(tii) {
+  }
+
+  /// clear - Reset the state.
+  void clear() {
+    SpillSlotsOrReMatsAvailable.clear();
+    PhysRegsAvailable.clear();
+  }
+
+  const TargetRegisterInfo *getRegInfo() const { return TRI; }
+
+  /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
+  /// available in a physical register, return that PhysReg, otherwise
+  /// return 0.
+  unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
+    std::map<int, unsigned>::const_iterator I =
+      SpillSlotsOrReMatsAvailable.find(Slot);
+    if (I != SpillSlotsOrReMatsAvailable.end()) {
+      return I->second >> 1;  // Remove the CanClobber bit.
+    }
+    return 0;
+  }
+
+  /// addAvailable - Mark that the specified stack slot / remat is available
+  /// in the specified physreg.  If CanClobber is true, the physreg can be
+  /// modified at any time without changing the semantics of the program.
+  void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
+    // If this stack slot is thought to be available in some other physreg,
+    // remove its record.
+    ModifyStackSlotOrReMat(SlotOrReMat);
+
+    PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
+    SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
+                                              (unsigned)CanClobber;
+
+    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+      DEBUG(dbgs() << "Remembering RM#"
+                   << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
+    else
+      DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
+    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+          << (CanClobber ? " canclobber" : "") << "\n");
+  }
+
+  /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
+  /// the value of the specified stackslot register if it desires. The
+  /// specified stack slot must be available in a physreg for this query to
+  /// make sense.
+  bool canClobberPhysRegForSS(int SlotOrReMat) const {
+    assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
+           "Value not available!");
+    return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
+  }
+
+  /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
+  /// physical register where values for some stack slot(s) might be
+  /// available.
+  bool canClobberPhysReg(unsigned PhysReg) const {
+    std::multimap<unsigned, int>::const_iterator I =
+      PhysRegsAvailable.lower_bound(PhysReg);
+    while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+      int SlotOrReMat = I->second;
+      I++;
+      if (!canClobberPhysRegForSS(SlotOrReMat))
+        return false;
+    }
+    return true;
+  }
+
+  /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+  /// stackslot register. The register is still available but is no longer
+  /// allowed to be modifed.
+  void disallowClobberPhysReg(unsigned PhysReg);
+
+  /// ClobberPhysReg - This is called when the specified physreg changes
+  /// value.  We use this to invalidate any info about stuff that lives in
+  /// it and any of its aliases.
+  void ClobberPhysReg(unsigned PhysReg);
+
+  /// ModifyStackSlotOrReMat - This method is called when the value in a stack
+  /// slot changes.  This removes information about which register the
+  /// previous value for this slot lives in (as the previous value is dead
+  /// now).
+  void ModifyStackSlotOrReMat(int SlotOrReMat);
+
+  /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
+  /// other stack slots sharing the same register are no longer valid.
+  void ClobberSharingStackSlots(int StackSlot);
+
+  /// AddAvailableRegsToLiveIn - Availability information is being kept coming
+  /// into the specified MBB. Add available physical registers as potential
+  /// live-in's. If they are reused in the MBB, they will be added to the
+  /// live-in set to make register scavenger and post-allocation scheduler.
+  void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
+                                std::vector<MachineOperand*> &KillOps);
+};
+
+}
+
+// ************************************************************************ //
+
+// Given a location where a reload of a spilled register or a remat of
+// a constant is to be inserted, attempt to find a safe location to
+// insert the load at an earlier point in the basic-block, to hide
+// latency of the load and to avoid address-generation interlock
+// issues.
+static MachineBasicBlock::iterator
+ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
+                 MachineBasicBlock::iterator const Begin,
+                 unsigned PhysReg,
+                 const TargetRegisterInfo *TRI,
+                 bool DoReMat,
+                 int SSorRMId,
+                 const TargetInstrInfo *TII,
+                 const MachineFunction &MF)
+{
+  if (!ScheduleSpills)
+    return InsertLoc;
+
+  // Spill backscheduling is of primary interest to addresses, so
+  // don't do anything if the register isn't in the register class
+  // used for pointers.
+
+  const TargetLowering *TL = MF.getTarget().getTargetLowering();
+
+  if (!TL->isTypeLegal(TL->getPointerTy()))
+    // Believe it or not, this is true on 16-bit targets like PIC16.
+    return InsertLoc;
+
+  const TargetRegisterClass *ptrRegClass =
+    TL->getRegClassFor(TL->getPointerTy());
+  if (!ptrRegClass->contains(PhysReg))
+    return InsertLoc;
+
+  // Scan upwards through the preceding instructions. If an instruction doesn't
+  // reference the stack slot or the register we're loading, we can
+  // backschedule the reload up past it.
+  MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
+  while (NewInsertLoc != Begin) {
+    MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
+    for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
+      MachineOperand &Op = Prev->getOperand(i);
+      if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
+        goto stop;
+    }
+    if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
+        Prev->findRegisterDefOperand(PhysReg))
+      goto stop;
+    for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
+      if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
+          Prev->findRegisterDefOperand(*Alias))
+        goto stop;
+    NewInsertLoc = Prev;
+  }
+stop:;
+
+  // If we made it to the beginning of the block, turn around and move back
+  // down just past any existing reloads. They're likely to be reloads/remats
+  // for instructions earlier than what our current reload/remat is for, so
+  // they should be scheduled earlier.
+  if (NewInsertLoc == Begin) {
+    int FrameIdx;
+    while (InsertLoc != NewInsertLoc &&
+           (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
+            TII->isTriviallyReMaterializable(NewInsertLoc)))
+      ++NewInsertLoc;
+  }
+
+  return NewInsertLoc;
+}
+
+namespace {
+
+// ReusedOp - For each reused operand, we keep track of a bit of information,
+// in case we need to rollback upon processing a new operand.  See comments
+// below.
+struct ReusedOp {
+  // The MachineInstr operand that reused an available value.
+  unsigned Operand;
+
+  // StackSlotOrReMat - The spill slot or remat id of the value being reused.
+  unsigned StackSlotOrReMat;
+
+  // PhysRegReused - The physical register the value was available in.
+  unsigned PhysRegReused;
+
+  // AssignedPhysReg - The physreg that was assigned for use by the reload.
+  unsigned AssignedPhysReg;
+
+  // VirtReg - The virtual register itself.
+  unsigned VirtReg;
+
+  ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+           unsigned vreg)
+    : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
+      AssignedPhysReg(apr), VirtReg(vreg) {}
+};
+
+/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+/// is reused instead of reloaded.
+class ReuseInfo {
+  MachineInstr &MI;
+  std::vector<ReusedOp> Reuses;
+  BitVector PhysRegsClobbered;
+public:
+  ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
+    PhysRegsClobbered.resize(tri->getNumRegs());
+  }
+
+  bool hasReuses() const {
+    return !Reuses.empty();
+  }
+
+  /// addReuse - If we choose to reuse a virtual register that is already
+  /// available instead of reloading it, remember that we did so.
+  void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
+                unsigned PhysRegReused, unsigned AssignedPhysReg,
+                unsigned VirtReg) {
+    // If the reload is to the assigned register anyway, no undo will be
+    // required.
+    if (PhysRegReused == AssignedPhysReg) return;
+
+    // Otherwise, remember this.
+    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
+                              AssignedPhysReg, VirtReg));
+  }
+
+  void markClobbered(unsigned PhysReg) {
+    PhysRegsClobbered.set(PhysReg);
+  }
+
+  bool isClobbered(unsigned PhysReg) const {
+    return PhysRegsClobbered.test(PhysReg);
+  }
+
+  /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
+  /// is some other operand that is using the specified register, either pick
+  /// a new register to use, or evict the previous reload and use this reg.
+  unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
+                           MachineFunction &MF, MachineInstr *MI,
+                           AvailableSpills &Spills,
+                           std::vector<MachineInstr*> &MaybeDeadStores,
+                           SmallSet<unsigned, 8> &Rejected,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps,
+                           VirtRegMap &VRM);
+
+  /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+  /// 'Rejected' set to remember which registers have been considered and
+  /// rejected for the reload. This avoids infinite looping in case like
+  /// this:
+  /// t1 := op t2, t3
+  /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+  /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+  /// t1 <- desires r1
+  ///       sees r1 is taken by t2, tries t2's reload register r0
+  ///       sees r0 is taken by t3, tries t3's reload register r1
+  ///       sees r1 is taken by t2, tries t2's reload register r0 ...
+  unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
+                           AvailableSpills &Spills,
+                           std::vector<MachineInstr*> &MaybeDeadStores,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps,
+                           VirtRegMap &VRM) {
+    SmallSet<unsigned, 8> Rejected;
+    MachineFunction &MF = *MI->getParent()->getParent();
+    const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
+    return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
+                           Rejected, RegKills, KillOps, VRM);
+  }
+};
+
+}
+
+// ****************** //
+// Utility Functions  //
+// ****************** //
+
+/// findSinglePredSuccessor - Return via reference a vector of machine basic
+/// blocks each of which is a successor of the specified BB and has no other
+/// predecessor.
+static void findSinglePredSuccessor(MachineBasicBlock *MBB,
+                                   SmallVectorImpl<MachineBasicBlock *> &Succs){
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock *SuccMBB = *SI;
+    if (SuccMBB->pred_size() == 1)
+      Succs.push_back(SuccMBB);
+  }
+}
+
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+  MachineOperand *KillOp = KillOps[Reg];
+  KillOp->setIsKill(false);
+  // KillOps[Reg] might be a def of a super-register.
+  unsigned KReg = KillOp->getReg();
+  if (!RegKills[KReg])
+    return;
+
+  assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
+         "invalid superreg kill flags");
+  KillOps[KReg] = NULL;
+  RegKills.reset(KReg);
+
+  // If it's a def of a super-register. Its other sub-regsters are no
+  // longer killed as well.
+  for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+    DEBUG(dbgs() << "  Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+    assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
+           "invalid subreg kill flags");
+    KillOps[*SR] = NULL;
+    RegKills.reset(*SR);
+  }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+                          const TargetRegisterInfo* TRI, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
+  if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+    ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+    return;
+  }
+  // No previous kill for this reg. Check for subreg kills as well.
+  // d4 =
+  // store d4, fi#0
+  // ...
+  //    = s8<kill>
+  // ...
+  //    = d4  <avoiding reload>
+  for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+    unsigned SReg = *SR;
+    if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+      ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
+  }
+}
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI,
+                            const TargetRegisterInfo* TRI,
+                            BitVector &RegKills,
+                            std::vector<MachineOperand*> &KillOps,
+                            SmallVector<unsigned, 2> *KillRegs = NULL) {
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (KillRegs)
+      KillRegs->push_back(Reg);
+    assert(Reg < KillOps.size());
+    if (KillOps[Reg] == &MO) {
+      // This operand was the kill, now no longer.
+      KillOps[Reg] = NULL;
+      RegKills.reset(Reg);
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+        if (RegKills[*SR]) {
+          assert(KillOps[*SR] == &MO && "bad subreg kill flags");
+          KillOps[*SR] = NULL;
+          RegKills.reset(*SR);
+        }
+      }
+    }
+    else {
+      // This operand may have reused a previously killed reg. Keep it live in
+      // case it continues to be used after erasing this instruction.
+      ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+    }
+  }
+}
+
+/// InvalidateRegDef - If the def operand of the specified def MI is now dead
+/// (since its spill instruction is removed), mark it isDead. Also checks if
+/// the def MI has other definition operands that are not dead. Returns it by
+/// reference.
+static bool InvalidateRegDef(MachineBasicBlock::iterator I,
+                             MachineInstr &NewDef, unsigned Reg,
+                             bool &HasLiveDef,
+                             const TargetRegisterInfo *TRI) {
+  // Due to remat, it's possible this reg isn't being reused. That is,
+  // the def of this reg (by prev MI) is now dead.
+  MachineInstr *DefMI = I;
+  MachineOperand *DefOp = NULL;
+  for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = DefMI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
+      continue;
+    if (MO.getReg() == Reg)
+      DefOp = &MO;
+    else if (!MO.isDead())
+      HasLiveDef = true;
+  }
+  if (!DefOp)
+    return false;
+
+  bool FoundUse = false, Done = false;
+  MachineBasicBlock::iterator E = &NewDef;
+  ++I; ++E;
+  for (; !Done && I != E; ++I) {
+    MachineInstr *NMI = I;
+    for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
+      MachineOperand &MO = NMI->getOperand(j);
+      if (!MO.isReg() || MO.getReg() == 0 ||
+          (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
+        continue;
+      if (MO.isUse())
+        FoundUse = true;
+      Done = true; // Stop after scanning all the operands of this MI.
+    }
+  }
+  if (!FoundUse) {
+    // Def is dead!
+    DefOp->setIsDead();
+    return true;
+  }
+  return false;
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
+                        BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps) {
+  // These do not affect kill info at all.
+  if (MI.isDebugValue())
+    return;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // This operand may have reused a previously killed reg. Keep it live.
+    ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+
+    if (MO.isKill()) {
+      RegKills.set(Reg);
+      KillOps[Reg] = &MO;
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+        RegKills.set(*SR);
+        KillOps[*SR] = &MO;
+      }
+    }
+  }
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    RegKills.reset(Reg);
+    KillOps[Reg] = NULL;
+    // It also defines (or partially define) aliases.
+    for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+      RegKills.reset(*SR);
+      KillOps[*SR] = NULL;
+    }
+    for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
+      RegKills.reset(*SR);
+      KillOps[*SR] = NULL;
+    }
+  }
+}
+
+/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
+///
+static void ReMaterialize(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MII,
+                          unsigned DestReg, unsigned Reg,
+                          const TargetInstrInfo *TII,
+                          const TargetRegisterInfo *TRI,
+                          VirtRegMap &VRM) {
+  MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
+#ifndef NDEBUG
+  const MCInstrDesc &MCID = ReMatDefMI->getDesc();
+  assert(MCID.getNumDefs() == 1 &&
+         "Don't know how to remat instructions that define > 1 values!");
+#endif
+  TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
+  MachineInstr *NewMI = prior(MII);
+  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;
+    unsigned VirtReg = MO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
+      continue;
+    assert(MO.isUse());
+    unsigned Phys = VRM.getPhys(VirtReg);
+    assert(Phys && "Virtual register is not assigned a register?");
+    substitutePhysReg(MO, Phys, *TRI);
+  }
+  ++NumReMats;
+}
+
+/// findSuperReg - Find the SubReg's super-register of given register class
+/// where its SubIdx sub-register is SubReg.
+static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
+                             unsigned SubIdx, const TargetRegisterInfo *TRI) {
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I) {
+    unsigned Reg = *I;
+    if (TRI->getSubReg(Reg, SubIdx) == SubReg)
+      return Reg;
+  }
+  return 0;
+}
+
+// ******************************** //
+// Available Spills Implementation  //
+// ******************************** //
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+  std::multimap<unsigned, int>::iterator I =
+    PhysRegsAvailable.lower_bound(PhysReg);
+  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+    int SlotOrReMat = I->second;
+    I++;
+    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+           "Bidirectional map mismatch!");
+    SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+         << " copied, it is available for use but can no longer be modified\n");
+  }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+    disallowClobberPhysRegOnly(*AS);
+  disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value.  We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+  std::multimap<unsigned, int>::iterator I =
+    PhysRegsAvailable.lower_bound(PhysReg);
+  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+    int SlotOrReMat = I->second;
+    PhysRegsAvailable.erase(I++);
+    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+           "Bidirectional map mismatch!");
+    SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+          << " clobbered, invalidating ");
+    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+      DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
+    else
+      DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
+  }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value.  We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+    ClobberPhysRegOnly(*AS);
+  ClobberPhysRegOnly(PhysReg);
+}
+
+/// AddAvailableRegsToLiveIn - Availability information is being kept coming
+/// into the specified MBB. Add available physical registers as potential
+/// live-in's. If they are reused in the MBB, they will be added to the
+/// live-in set to make register scavenger and post-allocation scheduler.
+void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
+                                        BitVector &RegKills,
+                                        std::vector<MachineOperand*> &KillOps) {
+  std::set<unsigned> NotAvailable;
+  for (std::multimap<unsigned, int>::iterator
+         I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
+       I != E; ++I) {
+    unsigned Reg = I->first;
+    const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
+    // FIXME: A temporary workaround. We can't reuse available value if it's
+    // not safe to move the def of the virtual register's class. e.g.
+    // X86::RFP* register classes. Do not add it as a live-in.
+    if (!TII->isSafeToMoveRegClassDefs(RC))
+      // This is no longer available.
+      NotAvailable.insert(Reg);
+    else {
+      MBB.addLiveIn(Reg);
+      if (RegKills[Reg])
+        ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+    }
+
+    // Skip over the same register.
+    std::multimap<unsigned, int>::iterator NI = llvm::next(I);
+    while (NI != E && NI->first == Reg) {
+      ++I;
+      ++NI;
+    }
+  }
+
+  for (std::set<unsigned>::iterator I = NotAvailable.begin(),
+         E = NotAvailable.end(); I != E; ++I) {
+    ClobberPhysReg(*I);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
+       *SubRegs; ++SubRegs)
+      ClobberPhysReg(*SubRegs);
+  }
+}
+
+/// ModifyStackSlotOrReMat - This method is called when the value in a stack
+/// slot changes.  This removes information about which register the previous
+/// value for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
+  std::map<int, unsigned>::iterator It =
+    SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
+  if (It == SpillSlotsOrReMatsAvailable.end()) return;
+  unsigned Reg = It->second >> 1;
+  SpillSlotsOrReMatsAvailable.erase(It);
+
+  // This register may hold the value of multiple stack slots, only remove this
+  // stack slot from the set of values the register contains.
+  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+  for (; ; ++I) {
+    assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+           "Map inverse broken!");
+    if (I->second == SlotOrReMat) break;
+  }
+  PhysRegsAvailable.erase(I);
+}
+
+void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
+  std::map<int, unsigned>::iterator It =
+    SpillSlotsOrReMatsAvailable.find(StackSlot);
+  if (It == SpillSlotsOrReMatsAvailable.end()) return;
+  unsigned Reg = It->second >> 1;
+
+  // Erase entries in PhysRegsAvailable for other stack slots.
+  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+  while (I != PhysRegsAvailable.end() && I->first == Reg) {
+    std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
+    if (I->second != StackSlot) {
+      DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
+                   << PrintReg(Reg, TRI) << '\n');
+      SpillSlotsOrReMatsAvailable.erase(I->second);
+      PhysRegsAvailable.erase(I);
+    }
+    I = NextI;
+  }
+}
+
+// ************************** //
+// Reuse Info Implementation  //
+// ************************** //
+
+/// GetRegForReload - We are about to emit a reload into PhysReg.  If there
+/// is some other operand that is using the specified register, either pick
+/// a new register to use, or evict the previous reload and use this reg.
+unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
+                         unsigned PhysReg,
+                         MachineFunction &MF,
+                         MachineInstr *MI, AvailableSpills &Spills,
+                         std::vector<MachineInstr*> &MaybeDeadStores,
+                         SmallSet<unsigned, 8> &Rejected,
+                         BitVector &RegKills,
+                         std::vector<MachineOperand*> &KillOps,
+                         VirtRegMap &VRM) {
+  const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = Spills.getRegInfo();
+
+  if (Reuses.empty()) return PhysReg;  // This is most often empty.
+
+  for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+    ReusedOp &Op = Reuses[ro];
+    // If we find some other reuse that was supposed to use this register
+    // exactly for its reload, we can change this reload to use ITS reload
+    // register. That is, unless its reload register has already been
+    // considered and subsequently rejected because it has also been reused
+    // by another operand.
+    if (Op.PhysRegReused == PhysReg &&
+        Rejected.count(Op.AssignedPhysReg) == 0 &&
+        RC->contains(Op.AssignedPhysReg)) {
+      // Yup, use the reload register that we didn't use before.
+      unsigned NewReg = Op.AssignedPhysReg;
+      Rejected.insert(PhysReg);
+      return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
+                             Rejected, RegKills, KillOps, VRM);
+    } else {
+      // Otherwise, we might also have a problem if a previously reused
+      // value aliases the new register. If so, codegen the previous reload
+      // and use this one.
+      unsigned PRRU = Op.PhysRegReused;
+      if (TRI->regsOverlap(PRRU, PhysReg)) {
+        // Okay, we found out that an alias of a reused register
+        // was used.  This isn't good because it means we have
+        // to undo a previous reuse.
+        MachineBasicBlock *MBB = MI->getParent();
+        const TargetRegisterClass *AliasRC =
+          MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
+
+        // Copy Op out of the vector and remove it, we're going to insert an
+        // explicit load for it.
+        ReusedOp NewOp = Op;
+        Reuses.erase(Reuses.begin()+ro);
+
+        // MI may be using only a sub-register of PhysRegUsed.
+        unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
+        unsigned SubIdx = 0;
+        assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
+               "A reuse cannot be a virtual register");
+        if (PRRU != RealPhysRegUsed) {
+          // What was the sub-register index?
+          SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
+          assert(SubIdx &&
+                 "Operand physreg is not a sub-register of PhysRegUsed");
+        }
+
+        // Ok, we're going to try to reload the assigned physreg into the
+        // slot that we were supposed to in the first place.  However, that
+        // register could hold a reuse.  Check to see if it conflicts or
+        // would prefer us to use a different register.
+        unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
+                                              MF, MI, Spills, MaybeDeadStores,
+                                              Rejected, RegKills, KillOps, VRM);
+
+        bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
+        int SSorRMId = DoReMat
+          ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
+
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
+                           DoReMat, SSorRMId, TII, MF);
+
+        if (DoReMat) {
+          ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
+                        TRI, VRM);
+        } else {
+          TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
+                                    NewOp.StackSlotOrReMat, AliasRC, TRI);
+          MachineInstr *LoadMI = prior(InsertLoc);
+          VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
+          // Any stores to this stack slot are not dead anymore.
+          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
+          ++NumLoads;
+        }
+        Spills.ClobberPhysReg(NewPhysReg);
+        Spills.ClobberPhysReg(NewOp.PhysRegReused);
+
+        unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
+        MI->getOperand(NewOp.Operand).setReg(RReg);
+        MI->getOperand(NewOp.Operand).setSubReg(0);
+
+        Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
+        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+
+        DEBUG(dbgs() << "Reuse undone!\n");
+        --NumReused;
+
+        // Finally, PhysReg is now available, go ahead and use it.
+        return PhysReg;
+      }
+    }
+  }
+  return PhysReg;
+}
+
+// ************************************************************************ //
+
+/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
+/// stack slot mod/ref. It also checks if it's possible to unfold the
+/// instruction by having it define a specified physical register instead.
+static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
+                                 const TargetInstrInfo *TII,
+                                 const TargetRegisterInfo *TRI,
+                                 VirtRegMap &VRM) {
+  if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
+    return false;
+
+  bool Found = false;
+  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+  for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+    unsigned VirtReg = I->second.first;
+    VirtRegMap::ModRef MR = I->second.second;
+    if (MR & VirtRegMap::isModRef)
+      if (VRM.getStackSlot(VirtReg) == SS) {
+        Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
+        break;
+      }
+  }
+  if (!Found)
+    return false;
+
+  // Does the instruction uses a register that overlaps the scratch register?
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;
+    unsigned Reg = MO.getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      if (!VRM.hasPhys(Reg))
+        continue;
+      Reg = VRM.getPhys(Reg);
+    }
+    if (TRI->regsOverlap(PhysReg, Reg))
+      return false;
+  }
+  return true;
+}
+
+/// FindFreeRegister - Find a free register of a given register class by looking
+/// at (at most) the last two machine instructions.
+static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
+                                 MachineBasicBlock &MBB,
+                                 const TargetRegisterClass *RC,
+                                 const TargetRegisterInfo *TRI,
+                                 BitVector &AllocatableRegs) {
+  BitVector Defs(TRI->getNumRegs());
+  BitVector Uses(TRI->getNumRegs());
+  SmallVector<unsigned, 4> LocalUses;
+  SmallVector<unsigned, 4> Kills;
+
+  // Take a look at 2 instructions at most.
+  unsigned Count = 0;
+  while (Count < 2) {
+    if (MII == MBB.begin())
+      break;
+    MachineInstr *PrevMI = prior(MII);
+    MII = PrevMI;
+
+    if (PrevMI->isDebugValue())
+      continue; // Skip over dbg_value instructions.
+    ++Count;
+
+    for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = PrevMI->getOperand(i);
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;
+      unsigned Reg = MO.getReg();
+      if (MO.isDef()) {
+        Defs.set(Reg);
+        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+          Defs.set(*AS);
+      } else  {
+        LocalUses.push_back(Reg);
+        if (MO.isKill() && AllocatableRegs[Reg])
+          Kills.push_back(Reg);
+      }
+    }
+
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+      unsigned Kill = Kills[i];
+      if (!Defs[Kill] && !Uses[Kill] &&
+          RC->contains(Kill))
+        return Kill;
+    }
+    for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+      unsigned Reg = LocalUses[i];
+      Uses.set(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Uses.set(*AS);
+    }
+  }
+
+  return 0;
+}
+
+static
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
+                         const TargetRegisterInfo &TRI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == VirtReg)
+      substitutePhysReg(MO, PhysReg, TRI);
+  }
+}
+
+namespace {
+
+struct RefSorter {
+  bool operator()(const std::pair<MachineInstr*, int> &A,
+                  const std::pair<MachineInstr*, int> &B) {
+    return A.second < B.second;
+  }
+};
+
+// ***************************** //
+// Local Spiller Implementation  //
+// ***************************** //
+
+class LocalRewriter : public VirtRegRewriter {
+  MachineRegisterInfo *MRI;
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+  VirtRegMap *VRM;
+  LiveIntervals *LIs;
+  BitVector AllocatableRegs;
+  DenseMap<MachineInstr*, unsigned> DistanceMap;
+  DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
+
+  MachineBasicBlock *MBB;       // Basic block currently being processed.
+
+public:
+
+  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+                            LiveIntervals* LIs);
+
+private:
+  void EraseInstr(MachineInstr *MI) {
+    VRM->RemoveMachineInstrFromMaps(MI);
+    LIs->RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+  }
+
+  bool OptimizeByUnfold2(unsigned VirtReg, int SS,
+                         MachineBasicBlock::iterator &MII,
+                         std::vector<MachineInstr*> &MaybeDeadStores,
+                         AvailableSpills &Spills,
+                         BitVector &RegKills,
+                         std::vector<MachineOperand*> &KillOps);
+
+  bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+                        std::vector<MachineInstr*> &MaybeDeadStores,
+                        AvailableSpills &Spills,
+                        BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps);
+
+  bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+                           unsigned VirtReg, unsigned SrcReg, int SS,
+                           AvailableSpills &Spills,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps,
+                           const TargetRegisterInfo *TRI);
+
+  void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+                           int Idx, unsigned PhysReg, int StackSlot,
+                           const TargetRegisterClass *RC,
+                           bool isAvailable, MachineInstr *&LastStore,
+                           AvailableSpills &Spills,
+                           SmallSet<MachineInstr*, 4> &ReMatDefs,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps);
+
+  void TransferDeadness(unsigned Reg, BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps);
+
+  bool InsertEmergencySpills(MachineInstr *MI);
+
+  bool InsertRestores(MachineInstr *MI,
+                      AvailableSpills &Spills,
+                      BitVector &RegKills,
+                      std::vector<MachineOperand*> &KillOps);
+
+  bool InsertSpills(MachineInstr *MI);
+
+  void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                   std::vector<MachineInstr*> &MaybeDeadStores,
+                   BitVector &RegKills,
+                   ReuseInfo &ReusedOperands,
+                   std::vector<MachineOperand*> &KillOps);
+
+  void RewriteMBB(LiveIntervals *LIs,
+                  AvailableSpills &Spills, BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps);
+};
+}
+
+bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
+                                         LiveIntervals* lis) {
+  MRI = &MF.getRegInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+  VRM = &vrm;
+  LIs = lis;
+  AllocatableRegs = TRI->getAllocatableSet(MF);
+  DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
+        << MF.getFunction()->getName() << "':\n");
+  DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
+        " reloads!) ****\n");
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
+
+  // Spills - Keep track of which spilled values are available in physregs
+  // so that we can choose to reuse the physregs instead of emitting
+  // reloads. This is usually refreshed per basic block.
+  AvailableSpills Spills(TRI, TII);
+
+  // Keep track of kill information.
+  BitVector RegKills(TRI->getNumRegs());
+  std::vector<MachineOperand*> KillOps;
+  KillOps.resize(TRI->getNumRegs(), NULL);
+
+  // SingleEntrySuccs - Successor blocks which have a single predecessor.
+  SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+  SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+  // Traverse the basic blocks depth first.
+  MachineBasicBlock *Entry = MF.begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*,
+         SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MBB = *DFI;
+    if (!EarlyVisited.count(MBB))
+      RewriteMBB(LIs, Spills, RegKills, KillOps);
+
+    // If this MBB is the only predecessor of a successor. Keep the
+    // availability information and visit it next.
+    do {
+      // Keep visiting single predecessor successor as long as possible.
+      SinglePredSuccs.clear();
+      findSinglePredSuccessor(MBB, SinglePredSuccs);
+      if (SinglePredSuccs.empty())
+        MBB = 0;
+      else {
+        // FIXME: More than one successors, each of which has MBB has
+        // the only predecessor.
+        MBB = SinglePredSuccs[0];
+        if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+          Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+          RewriteMBB(LIs, Spills, RegKills, KillOps);
+        }
+      }
+    } while (MBB);
+
+    // Clear the availability info.
+    Spills.clear();
+  }
+
+  DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
+
+  // Mark unused spill slots.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  int SS = VRM->getLowSpillSlot();
+  if (SS != VirtRegMap::NO_STACK_SLOT) {
+    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
+      SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
+      if (!VRM->isSpillSlotUsed(SS)) {
+        MFI->RemoveStackObject(SS);
+        for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
+          MachineInstr *DVMI = DbgValues[j];
+          DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
+          EraseInstr(DVMI);
+        }
+        ++NumDSS;
+      }
+      DbgValues.clear();
+    }
+  }
+  Slot2DbgValues.clear();
+
+  return true;
+}
+
+/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+/// a scratch register is available.
+///     xorq  %r12<kill>, %r13
+///     addq  %rax, -184(%rbp)
+///     addq  %r13, -184(%rbp)
+/// ==>
+///     xorq  %r12<kill>, %r13
+///     movq  -184(%rbp), %r12
+///     addq  %rax, %r12
+///     addq  %r13, %r12
+///     movq  %r12, -184(%rbp)
+bool LocalRewriter::
+OptimizeByUnfold2(unsigned VirtReg, int SS,
+                  MachineBasicBlock::iterator &MII,
+                  std::vector<MachineInstr*> &MaybeDeadStores,
+                  AvailableSpills &Spills,
+                  BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps) {
+
+  MachineBasicBlock::iterator NextMII = llvm::next(MII);
+  // Skip over dbg_value instructions.
+  while (NextMII != MBB->end() && NextMII->isDebugValue())
+    NextMII = llvm::next(NextMII);
+  if (NextMII == MBB->end())
+    return false;
+
+  if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+    return false;
+
+  // Now let's see if the last couple of instructions happens to have freed up
+  // a register.
+  const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+  unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
+  if (!PhysReg)
+    return false;
+
+  MachineFunction &MF = *MBB->getParent();
+  TRI = MF.getTarget().getRegisterInfo();
+  MachineInstr &MI = *MII;
+  if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // If the next instruction also folds the same SS modref and can be unfoled,
+  // then it's worthwhile to issue a load from SS into the free register and
+  // then unfold these instructions.
+  if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // Back-schedule reloads and remats.
+  ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
+
+  // Load from SS to the spare physical register.
+  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
+  // This invalidates Phys.
+  Spills.ClobberPhysReg(PhysReg);
+  // Remember it's available.
+  Spills.addAvailable(SS, PhysReg);
+  MaybeDeadStores[SS] = NULL;
+
+  // Unfold current MI.
+  SmallVector<MachineInstr*, 4> NewMIs;
+  if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+    llvm_unreachable("Unable unfold the load / store folding instruction!");
+  assert(NewMIs.size() == 1);
+  AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+  VRM->transferRestorePts(&MI, NewMIs[0]);
+  MII = MBB->insert(MII, NewMIs[0]);
+  InvalidateKills(MI, TRI, RegKills, KillOps);
+  EraseInstr(&MI);
+  ++NumModRefUnfold;
+
+  // Unfold next instructions that fold the same SS.
+  do {
+    MachineInstr &NextMI = *NextMII;
+    NextMII = llvm::next(NextMII);
+    NewMIs.clear();
+    if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
+      llvm_unreachable("Unable unfold the load / store folding instruction!");
+    assert(NewMIs.size() == 1);
+    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+    VRM->transferRestorePts(&NextMI, NewMIs[0]);
+    MBB->insert(NextMII, NewMIs[0]);
+    InvalidateKills(NextMI, TRI, RegKills, KillOps);
+    EraseInstr(&NextMI);
+    ++NumModRefUnfold;
+    // Skip over dbg_value instructions.
+    while (NextMII != MBB->end() && NextMII->isDebugValue())
+      NextMII = llvm::next(NextMII);
+    if (NextMII == MBB->end())
+      break;
+  } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
+
+  // Store the value back into SS.
+  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
+  MachineInstr *StoreMI = prior(NextMII);
+  VRM->addSpillSlotUse(SS, StoreMI);
+  VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+
+  return true;
+}
+
+/// OptimizeByUnfold - Turn a store folding instruction into a load folding
+/// instruction. e.g.
+///     xorl  %edi, %eax
+///     movl  %eax, -32(%ebp)
+///     movl  -36(%ebp), %eax
+///     orl   %eax, -32(%ebp)
+/// ==>
+///     xorl  %edi, %eax
+///     orl   -36(%ebp), %eax
+///     mov   %eax, -32(%ebp)
+/// This enables unfolding optimization for a subsequent instruction which will
+/// also eliminate the newly introduced store instruction.
+bool LocalRewriter::
+OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+                 std::vector<MachineInstr*> &MaybeDeadStores,
+                 AvailableSpills &Spills,
+                 BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  MachineFunction &MF = *MBB->getParent();
+  MachineInstr &MI = *MII;
+  unsigned UnfoldedOpc = 0;
+  unsigned UnfoldPR = 0;
+  unsigned UnfoldVR = 0;
+  int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+  for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
+    // Only transform a MI that folds a single register.
+    if (UnfoldedOpc)
+      return false;
+    UnfoldVR = I->second.first;
+    VirtRegMap::ModRef MR = I->second.second;
+    // MI2VirtMap be can updated which invalidate the iterator.
+    // Increment the iterator first.
+    ++I;
+    if (VRM->isAssignedReg(UnfoldVR))
+      continue;
+    // If this reference is not a use, any previous store is now dead.
+    // Otherwise, the store to this stack slot is not dead anymore.
+    FoldedSS = VRM->getStackSlot(UnfoldVR);
+    MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+    if (DeadStore && (MR & VirtRegMap::isModRef)) {
+      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+      if (!PhysReg || !DeadStore->readsRegister(PhysReg))
+        continue;
+      UnfoldPR = PhysReg;
+      UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+                                                    false, true);
+    }
+  }
+
+  if (!UnfoldedOpc) {
+    if (!UnfoldVR)
+      return false;
+
+    // Look for other unfolding opportunities.
+    return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
+                             RegKills, KillOps);
+  }
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+      continue;
+    unsigned VirtReg = MO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+      continue;
+    if (VRM->isAssignedReg(VirtReg)) {
+      unsigned PhysReg = VRM->getPhys(VirtReg);
+      if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+    } else if (VRM->isReMaterialized(VirtReg))
+      continue;
+    int SS = VRM->getStackSlot(VirtReg);
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+    if (PhysReg) {
+      if (TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+      continue;
+    }
+    if (VRM->hasPhys(VirtReg)) {
+      PhysReg = VRM->getPhys(VirtReg);
+      if (!TRI->regsOverlap(PhysReg, UnfoldPR))
+        continue;
+    }
+
+    // Ok, we'll need to reload the value into a register which makes
+    // it impossible to perform the store unfolding optimization later.
+    // Let's see if it is possible to fold the load if the store is
+    // unfolded. This allows us to perform the store unfolding
+    // optimization.
+    SmallVector<MachineInstr*, 4> NewMIs;
+    if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+      assert(NewMIs.size() == 1);
+      MachineInstr *NewMI = NewMIs.back();
+      MBB->insert(MII, NewMI);
+      NewMIs.clear();
+      int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+      assert(Idx != -1);
+      SmallVector<unsigned, 1> Ops;
+      Ops.push_back(Idx);
+      MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
+      NewMI->eraseFromParent();
+      if (FoldedMI) {
+        VRM->addSpillSlotUse(SS, FoldedMI);
+        if (!VRM->hasPhys(UnfoldVR))
+          VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
+        VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+        MII = FoldedMI;
+        InvalidateKills(MI, TRI, RegKills, KillOps);
+        EraseInstr(&MI);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+/// where SrcReg is r1 and it is tied to r0. Return true if after
+/// commuting this instruction it will be r0 = op r2, r1.
+static bool CommuteChangesDestination(MachineInstr *DefMI,
+                                      const MCInstrDesc &MCID,
+                                      unsigned SrcReg,
+                                      const TargetInstrInfo *TII,
+                                      unsigned &DstIdx) {
+  if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
+    return false;
+  if (!DefMI->getOperand(1).isReg() ||
+      DefMI->getOperand(1).getReg() != SrcReg)
+    return false;
+  unsigned DefIdx;
+  if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+    return false;
+  unsigned SrcIdx1, SrcIdx2;
+  if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+    return false;
+  if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+    DstIdx = 2;
+    return true;
+  }
+  return false;
+}
+
+/// CommuteToFoldReload -
+/// Look for
+/// r1 = load fi#1
+/// r1 = op r1, r2<kill>
+/// store r1, fi#1
+///
+/// If op is commutable and r2 is killed, then we can xform these to
+/// r2 = op r2, fi#1
+/// store r2, fi#1
+bool LocalRewriter::
+CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+                    unsigned VirtReg, unsigned SrcReg, int SS,
+                    AvailableSpills &Spills,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps,
+                    const TargetRegisterInfo *TRI) {
+  if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
+    return false;
+
+  MachineInstr &MI = *MII;
+  MachineBasicBlock::iterator DefMII = prior(MII);
+  MachineInstr *DefMI = DefMII;
+  const MCInstrDesc &MCID = DefMI->getDesc();
+  unsigned NewDstIdx;
+  if (DefMII != MBB->begin() &&
+      MCID.isCommutable() &&
+      CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
+    MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+    unsigned NewReg = NewDstMO.getReg();
+    if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+      return false;
+    MachineInstr *ReloadMI = prior(DefMII);
+    int FrameIdx;
+    unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+    if (DestReg != SrcReg || FrameIdx != SS)
+      return false;
+    int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+    if (UseIdx == -1)
+      return false;
+    unsigned DefIdx;
+    if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
+      return false;
+    assert(DefMI->getOperand(DefIdx).isReg() &&
+           DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+    // Now commute def instruction.
+    MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+    if (!CommutedMI)
+      return false;
+    MBB->insert(MII, CommutedMI);
+    SmallVector<unsigned, 1> Ops;
+    Ops.push_back(NewDstIdx);
+    MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
+    // Not needed since foldMemoryOperand returns new MI.
+    CommutedMI->eraseFromParent();
+    if (!FoldedMI)
+      return false;
+
+    VRM->addSpillSlotUse(SS, FoldedMI);
+    VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+    // Insert new def MI and spill MI.
+    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
+    MII = prior(MII);
+    MachineInstr *StoreMI = MII;
+    VRM->addSpillSlotUse(SS, StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+    MII = FoldedMI;  // Update MII to backtrack.
+
+    // Delete all 3 old instructions.
+    InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+    EraseInstr(ReloadMI);
+    InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+    EraseInstr(DefMI);
+    InvalidateKills(MI, TRI, RegKills, KillOps);
+    EraseInstr(&MI);
+
+    // If NewReg was previously holding value of some SS, it's now clobbered.
+    // This has to be done now because it's a physical register. When this
+    // instruction is re-visited, it's ignored.
+    Spills.ClobberPhysReg(NewReg);
+
+    ++NumCommutes;
+    return true;
+  }
+
+  return false;
+}
+
+/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+/// the last store to the same slot is now dead. If so, remove the last store.
+void LocalRewriter::
+SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+                    int Idx, unsigned PhysReg, int StackSlot,
+                    const TargetRegisterClass *RC,
+                    bool isAvailable, MachineInstr *&LastStore,
+                    AvailableSpills &Spills,
+                    SmallSet<MachineInstr*, 4> &ReMatDefs,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps) {
+
+  MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
+                           TRI);
+  MachineInstr *StoreMI = prior(oldNextMII);
+  VRM->addSpillSlotUse(StackSlot, StoreMI);
+  DEBUG(dbgs() << "Store:\t" << *StoreMI);
+
+  // If there is a dead store to this stack slot, nuke it now.
+  if (LastStore) {
+    DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
+    ++NumDSE;
+    SmallVector<unsigned, 2> KillRegs;
+    InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+    MachineBasicBlock::iterator PrevMII = LastStore;
+    bool CheckDef = PrevMII != MBB->begin();
+    if (CheckDef)
+      --PrevMII;
+    EraseInstr(LastStore);
+    if (CheckDef) {
+      // Look at defs of killed registers on the store. Mark the defs
+      // as dead since the store has been deleted and they aren't
+      // being reused.
+      for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+        bool HasOtherDef = false;
+        if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
+          MachineInstr *DeadDef = PrevMII;
+          if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+            // FIXME: This assumes a remat def does not have side effects.
+            EraseInstr(DeadDef);
+            ++NumDRM;
+          }
+        }
+      }
+    }
+  }
+
+  // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
+  // the last of multiple instructions is the actual store.
+  LastStore = prior(oldNextMII);
+
+  // If the stack slot value was previously available in some other
+  // register, change it now.  Otherwise, make the register available,
+  // in PhysReg.
+  Spills.ModifyStackSlotOrReMat(StackSlot);
+  Spills.ClobberPhysReg(PhysReg);
+  Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+  ++NumStores;
+}
+
+/// isSafeToDelete - Return true if this instruction doesn't produce any side
+/// effect and all of its defs are dead.
+static bool isSafeToDelete(MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
+      MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
+      MI.isLabel() || MI.isDebugValue() ||
+      MI.hasUnmodeledSideEffects())
+    return false;
+
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI.isInlineAsm())
+    return false;
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+    if (MO.isDef() && !MO.isDead())
+      return false;
+    if (MO.isUse() && MO.isKill())
+      // FIXME: We can't remove kill markers or else the scavenger will assert.
+      // An alternative is to add a ADD pseudo instruction to replace kill
+      // markers.
+      return false;
+  }
+  return true;
+}
+
+/// TransferDeadness - A identity copy definition is dead and it's being
+/// removed. Find the last def or use and mark it as dead / kill.
+void LocalRewriter::
+TransferDeadness(unsigned Reg, BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  SmallPtrSet<MachineInstr*, 4> Seens;
+  SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+         RE = MRI->reg_end(); RI != RE; ++RI) {
+    MachineInstr *UDMI = &*RI;
+    if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (Seens.insert(UDMI))
+      Refs.push_back(std::make_pair(UDMI, DI->second));
+  }
+
+  if (Refs.empty())
+    return;
+  std::sort(Refs.begin(), Refs.end(), RefSorter());
+
+  while (!Refs.empty()) {
+    MachineInstr *LastUDMI = Refs.back().first;
+    Refs.pop_back();
+
+    MachineOperand *LastUD = NULL;
+    for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = LastUDMI->getOperand(i);
+      if (!MO.isReg() || MO.getReg() != Reg)
+        continue;
+      if (!LastUD || (LastUD->isUse() && MO.isDef()))
+        LastUD = &MO;
+      if (LastUDMI->isRegTiedToDefOperand(i))
+        break;
+    }
+    if (LastUD->isDef()) {
+      // If the instruction has no side effect, delete it and propagate
+      // backward further. Otherwise, mark is dead and we are done.
+      if (!isSafeToDelete(*LastUDMI)) {
+        LastUD->setIsDead();
+        break;
+      }
+      EraseInstr(LastUDMI);
+    } else {
+      LastUD->setIsKill();
+      RegKills.set(Reg);
+      KillOps[Reg] = LastUD;
+      break;
+    }
+  }
+}
+
+/// InsertEmergencySpills - Insert emergency spills before MI if requested by
+/// VRM. Return true if spills were inserted.
+bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
+  if (!VRM->hasEmergencySpills(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  SmallSet<int, 4> UsedSS;
+  std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
+  for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+    unsigned PhysReg = EmSpills[i];
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
+    assert(RC && "Unable to determine register class!");
+    int SS = VRM->getEmergencySpillSlot(RC);
+    if (UsedSS.count(SS))
+      llvm_unreachable("Need to spill more than one physical registers!");
+    UsedSS.insert(SS);
+    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
+    MachineInstr *StoreMI = prior(MII);
+    VRM->addSpillSlotUse(SS, StoreMI);
+
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
+                       TII, *MBB->getParent());
+
+    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
+
+    MachineInstr *LoadMI = prior(InsertLoc);
+    VRM->addSpillSlotUse(SS, LoadMI);
+    ++NumPSpills;
+    DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+  }
+  return true;
+}
+
+/// InsertRestores - Restore registers before MI is requested by VRM. Return
+/// true is any instructions were inserted.
+bool LocalRewriter::InsertRestores(MachineInstr *MI,
+                                   AvailableSpills &Spills,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  if (!VRM->isRestorePt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
+  for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+    unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    unsigned Phys = VRM->getPhys(VirtReg);
+    MRI->setPhysRegUsed(Phys);
+
+    // Check if the value being restored if available. If so, it must be
+    // from a predecessor BB that fallthrough into this BB. We do not
+    // expect:
+    // BB1:
+    // r1 = load fi#1
+    // ...
+    //    = r1<kill>
+    // ... # r1 not clobbered
+    // ...
+    //    = load fi#1
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+    if (InReg == Phys) {
+      // If the value is already available in the expected register, save
+      // a reload / remat.
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+                   <<" instead of reloading into physreg "
+                   << TRI->getName(Phys) << '\n');
+
+      // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+      // the kill flags for the current instruction after processing it.
+
+      ++NumOmitted;
+      continue;
+    } else if (InReg && InReg != Phys) {
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+                   <<" by copying it into physreg "
+                   << TRI->getName(Phys) << '\n');
+
+      // If the reloaded / remat value is available in another register,
+      // copy it to the desired register.
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                         *MBB->getParent());
+      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
+                                     TII->get(TargetOpcode::COPY), Phys)
+                               .addReg(InReg, RegState::Kill);
+
+      // This invalidates Phys.
+      Spills.ClobberPhysReg(Phys);
+      // Remember it's available.
+      Spills.addAvailable(SSorRMId, Phys);
+
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      DEBUG(dbgs() << '\t' << *CopyMI);
+      ++NumCopified;
+      continue;
+    }
+
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                       *MBB->getParent());
+
+    if (VRM->isReMaterialized(VirtReg)) {
+      ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
+    } else {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
+      MachineInstr *LoadMI = prior(InsertLoc);
+      VRM->addSpillSlotUse(SSorRMId, LoadMI);
+      ++NumLoads;
+      DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+    }
+
+    // This invalidates Phys.
+    Spills.ClobberPhysReg(Phys);
+    // Remember it's available.
+    Spills.addAvailable(SSorRMId, Phys);
+
+    UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+    DEBUG(dbgs() << '\t' << *prior(MII));
+  }
+  return true;
+}
+
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
+/// true if spills were inserted.
+bool LocalRewriter::InsertSpills(MachineInstr *MI) {
+  if (!VRM->isSpillPt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<std::pair<unsigned,bool> > &SpillRegs =
+    VRM->getSpillPtSpills(MI);
+  for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+    unsigned VirtReg = SpillRegs[i].first;
+    bool isKill = SpillRegs[i].second;
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    unsigned Phys = VRM->getPhys(VirtReg);
+    int StackSlot = VRM->getStackSlot(VirtReg);
+    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+    TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
+                             RC, TRI);
+    MachineInstr *StoreMI = prior(oldNextMII);
+    VRM->addSpillSlotUse(StackSlot, StoreMI);
+    DEBUG(dbgs() << "Store:\t" << *StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+  }
+  return true;
+}
+
+
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                                std::vector<MachineInstr*> &MaybeDeadStores,
+                                BitVector &RegKills,
+                                ReuseInfo &ReusedOperands,
+                                std::vector<MachineOperand*> &KillOps) {
+  // Clear kill info.
+  SmallSet<unsigned, 2> KilledMIRegs;
+  SmallVector<unsigned, 4> VirtUseOps;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;   // Ignore non-register operands.
+
+    unsigned VirtReg = MO.getReg();
+
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+      // Ignore physregs for spilling, but remember that it is used by this
+      // function.
+      MRI->setPhysRegUsed(VirtReg);
+      continue;
+    }
+
+    // We want to process implicit virtual register uses first.
+    if (MO.isImplicit())
+      // If the virtual register is implicitly defined, emit a implicit_def
+      // before so scavenger knows it's "defined".
+      // FIXME: This is a horrible hack done the by register allocator to
+      // remat a definition with virtual register operand.
+      VirtUseOps.insert(VirtUseOps.begin(), i);
+    else
+      VirtUseOps.push_back(i);
+
+    // A partial def causes problems because the same operand both reads and
+    // writes the register. This rewriter is designed to rewrite uses and defs
+    // separately, so a partial def would already have been rewritten to a
+    // physreg by the time we get to processing defs.
+    // Add an implicit use operand to model the partial def.
+    if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+        MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+      VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+      MI.addOperand(MachineOperand::CreateReg(VirtReg,
+                                              false,  // isDef
+                                              true)); // isImplicit
+      DEBUG(dbgs() << "Partial redef: " << MI);
+    }
+  }
+
+  // Process all of the spilled uses and all non spilled reg references.
+  SmallVector<int, 2> PotentialDeadStoreSlots;
+  KilledMIRegs.clear();
+  for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+    unsigned i = VirtUseOps[j];
+    unsigned VirtReg = MI.getOperand(i).getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+           "Not a virtual register?");
+
+    unsigned SubIdx = MI.getOperand(i).getSubReg();
+    if (VRM->isAssignedReg(VirtReg)) {
+      // This virtual register was assigned a physreg!
+      unsigned Phys = VRM->getPhys(VirtReg);
+      MRI->setPhysRegUsed(Phys);
+      if (MI.getOperand(i).isDef())
+        ReusedOperands.markClobbered(Phys);
+      substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+      if (VRM->isImplicitlyDefined(VirtReg))
+        // FIXME: Is this needed?
+        BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+      continue;
+    }
+
+    // This virtual register is now known to be a spilled value.
+    if (!MI.getOperand(i).isUse())
+      continue;  // Handle defs in the loop below (handle use&def here though)
+
+    bool AvoidReload = MI.getOperand(i).isUndef();
+    // Check if it is defined by an implicit def. It should not be spilled.
+    // Note, this is for correctness reason. e.g.
+    // 8   %reg1024<def> = IMPLICIT_DEF
+    // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+    // The live range [12, 14) are not part of the r1024 live interval since
+    // it's defined by an implicit def. It will not conflicts with live
+    // interval of r1025. Now suppose both registers are spilled, you can
+    // easily see a situation where both registers are reloaded before
+    // the INSERT_SUBREG and both target registers that would overlap.
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    int ReuseSlot = SSorRMId;
+
+    // Check to see if this stack slot is available.
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+    // If this is a sub-register use, make sure the reuse register is in the
+    // right register class. For example, for x86 not all of the 32-bit
+    // registers have accessible sub-registers.
+    // Similarly so for EXTRACT_SUBREG. Consider this:
+    // EDI = op
+    // MOV32_mr fi#1, EDI
+    // ...
+    //       = EXTRACT_SUBREG fi#1
+    // fi#1 is available in EDI, but it cannot be reused because it's not in
+    // the right register file.
+    if (PhysReg && !AvoidReload && SubIdx) {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      if (!RC->contains(PhysReg))
+        PhysReg = 0;
+    }
+
+    if (PhysReg && !AvoidReload) {
+      // This spilled operand might be part of a two-address operand.  If this
+      // is the case, then changing it will necessarily require changing the
+      // def part of the instruction as well.  However, in some cases, we
+      // aren't allowed to modify the reused register.  If none of these cases
+      // apply, reuse it.
+      bool CanReuse = true;
+      bool isTied = MI.isRegTiedToDefOperand(i);
+      if (isTied) {
+        // Okay, we have a two address operand.  We can reuse this physreg as
+        // long as we are allowed to clobber the value and there isn't an
+        // earlier def that has already clobbered the physreg.
+        CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+          Spills.canClobberPhysReg(PhysReg);
+      }
+      // If this is an asm, and a PhysReg alias is used elsewhere as an
+      // earlyclobber operand, we can't also use it as an input.
+      if (MI.isInlineAsm()) {
+        for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+          MachineOperand &MOk = MI.getOperand(k);
+          if (MOk.isReg() && MOk.isEarlyClobber() &&
+              TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+            CanReuse = false;
+            DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+                         << " for " << PrintReg(VirtReg) << ": " << MOk
+                         << '\n');
+            break;
+          }
+        }
+      }
+
+      if (CanReuse) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg "
+              << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
+              << " instead of reloading into "
+              << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+
+        // Reusing a physreg may resurrect it. But we expect ProcessUses to
+        // update the kill flags for the current instr after processing it.
+
+        // The only technical detail we have is that we don't know that
+        // PhysReg won't be clobbered by a reloaded stack slot that occurs
+        // later in the instruction.  In particular, consider 'op V1, V2'.
+        // If V1 is available in physreg R0, we would choose to reuse it
+        // here, instead of reloading it into the register the allocator
+        // indicated (say R1).  However, V2 might have to be reloaded
+        // later, and it might indicate that it needs to live in R0.  When
+        // this occurs, we need to have information available that
+        // indicates it is safe to use R1 for the reload instead of R0.
+        //
+        // To further complicate matters, we might conflict with an alias,
+        // or R0 and R1 might not be compatible with each other.  In this
+        // case, we actually insert a reload for V1 in R1, ensuring that
+        // we can get at R0 or its alias.
+        ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+                                VRM->getPhys(VirtReg), VirtReg);
+        if (isTied)
+          // Only mark it clobbered if this is a use&def operand.
+          ReusedOperands.markClobbered(PhysReg);
+        ++NumReused;
+
+        if (MI.getOperand(i).isKill() &&
+            ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+          // The store of this spilled value is potentially dead, but we
+          // won't know for certain until we've confirmed that the re-use
+          // above is valid, which means waiting until the other operands
+          // are processed. For now we just track the spill slot, we'll
+          // remove it after the other operands are processed if valid.
+
+          PotentialDeadStoreSlots.push_back(ReuseSlot);
+        }
+
+        // Mark is isKill if it's there no other uses of the same virtual
+        // register and it's not a two-address operand. IsKill will be
+        // unset if reg is reused.
+        if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+          MI.getOperand(i).setIsKill();
+          KilledMIRegs.insert(VirtReg);
+        }
+        continue;
+      }  // CanReuse
+
+      // Otherwise we have a situation where we have a two-address instruction
+      // whose mod/ref operand needs to be reloaded.  This reload is already
+      // available in some register "PhysReg", but if we used PhysReg as the
+      // operand to our 2-addr instruction, the instruction would modify
+      // PhysReg.  This isn't cool if something later uses PhysReg and expects
+      // to get its initial value.
+      //
+      // To avoid this problem, and to avoid doing a load right after a store,
+      // we emit a copy from PhysReg into the designated register for this
+      // operand.
+      //
+      // This case also applies to an earlyclobber'd PhysReg.
+      unsigned DesignatedReg = VRM->getPhys(VirtReg);
+      assert(DesignatedReg && "Must map virtreg to physreg!");
+
+      // Note that, if we reused a register for a previous operand, the
+      // register we want to reload into might not actually be
+      // available.  If this occurs, use the register indicated by the
+      // reuser.
+      if (ReusedOperands.hasReuses())
+        DesignatedReg = ReusedOperands.
+          GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+                          MaybeDeadStores, RegKills, KillOps, *VRM);
+
+      // If the mapped designated register is actually the physreg we have
+      // incoming, we don't need to inserted a dead copy.
+      if (DesignatedReg == PhysReg) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+              << " for " << PrintReg(VirtReg)
+              << " instead of reloading into same physreg.\n");
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+        ReusedOperands.markClobbered(RReg);
+        ++NumReused;
+        continue;
+      }
+
+      MRI->setPhysRegUsed(DesignatedReg);
+      ReusedOperands.markClobbered(DesignatedReg);
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+                                     TII->get(TargetOpcode::COPY),
+                                     DesignatedReg).addReg(PhysReg);
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      // This invalidates DesignatedReg.
+      Spills.ClobberPhysReg(DesignatedReg);
+
+      Spills.addAvailable(ReuseSlot, DesignatedReg);
+      unsigned RReg =
+        SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+      ++NumReused;
+      continue;
+    } // if (PhysReg)
+
+    // Otherwise, reload it and remember that we have it.
+    PhysReg = VRM->getPhys(VirtReg);
+    assert(PhysReg && "Must map virtreg to physreg!");
+
+    // Note that, if we reused a register for a previous operand, the
+    // register we want to reload into might not actually be
+    // available.  If this occurs, use the register indicated by the
+    // reuser.
+    if (ReusedOperands.hasReuses())
+      PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                  Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+    MRI->setPhysRegUsed(PhysReg);
+    ReusedOperands.markClobbered(PhysReg);
+    if (AvoidReload)
+      ++NumAvoided;
+    else {
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+
+      if (DoReMat) {
+        ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+      } else {
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+        MachineInstr *LoadMI = prior(InsertLoc);
+        VRM->addSpillSlotUse(SSorRMId, LoadMI);
+        ++NumLoads;
+        DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+      }
+      // This invalidates PhysReg.
+      Spills.ClobberPhysReg(PhysReg);
+
+      // Any stores to this stack slot are not dead anymore.
+      if (!DoReMat)
+        MaybeDeadStores[SSorRMId] = NULL;
+      Spills.addAvailable(SSorRMId, PhysReg);
+      // Assumes this is the last use. IsKill will be unset if reg is reused
+      // unless it's a two-address operand.
+      if (!MI.isRegTiedToDefOperand(i) &&
+          KilledMIRegs.count(VirtReg) == 0) {
+        MI.getOperand(i).setIsKill();
+        KilledMIRegs.insert(VirtReg);
+      }
+
+      UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+    }
+    unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+    MI.getOperand(i).setReg(RReg);
+    MI.getOperand(i).setSubReg(0);
+  }
+
+  // Ok - now we can remove stores that have been confirmed dead.
+  for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+    // This was the last use and the spilled value is still available
+    // for reuse. That means the spill was unnecessary!
+    int PDSSlot = PotentialDeadStoreSlots[j];
+    MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+    if (DeadStore) {
+      DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+      InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+      EraseInstr(DeadStore);
+      MaybeDeadStores[PDSSlot] = NULL;
+      ++NumDSE;
+    }
+  }
+}
+
+/// rewriteMBB - Keep track of which spills are available even after the
+/// register allocator is done with them.  If possible, avoid reloading vregs.
+void
+LocalRewriter::RewriteMBB(LiveIntervals *LIs,
+                          AvailableSpills &Spills, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
+
+  DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
+               << MBB->getName() << "':\n");
+
+  MachineFunction &MF = *MBB->getParent();
+
+  // MaybeDeadStores - When we need to write a value back into a stack slot,
+  // keep track of the inserted store.  If the stack slot value is never read
+  // (because the value was used from some available register, for example), and
+  // subsequently stored to, the original store is dead.  This map keeps track
+  // of inserted stores that are not used.  If we see a subsequent store to the
+  // same stack slot, the original store is deleted.
+  std::vector<MachineInstr*> MaybeDeadStores;
+  MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+
+  // ReMatDefs - These are rematerializable def MIs which are not deleted.
+  SmallSet<MachineInstr*, 4> ReMatDefs;
+
+  // Keep track of the registers we have already spilled in case there are
+  // multiple defs of the same register in MI.
+  SmallSet<unsigned, 8> SpilledMIRegs;
+
+  RegKills.reset();
+  KillOps.clear();
+  KillOps.resize(TRI->getNumRegs(), NULL);
+
+  DistanceMap.clear();
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E; ) {
+    MachineBasicBlock::iterator NextMII = llvm::next(MII);
+
+    if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
+      NextMII = llvm::next(MII);
+
+    if (InsertEmergencySpills(MII))
+      NextMII = llvm::next(MII);
+
+    InsertRestores(MII, Spills, RegKills, KillOps);
+
+    if (InsertSpills(MII))
+      NextMII = llvm::next(MII);
+
+    bool Erased = false;
+    bool BackTracked = false;
+    MachineInstr &MI = *MII;
+
+    // Remember DbgValue's which reference stack slots.
+    if (MI.isDebugValue() && MI.getOperand(0).isFI())
+      Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
+
+    /// ReusedOperands - Keep track of operand reuse in case we need to undo
+    /// reuse.
+    ReuseInfo ReusedOperands(MI, TRI);
+
+    ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
+
+    DEBUG(dbgs() << '\t' << MI);
+
+
+    // If we have folded references to memory operands, make sure we clear all
+    // physical registers that may contain the value of the spilled virtual
+    // register
+
+    // Copy the folded virts to a small vector, we may change MI2VirtMap.
+    SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
+    // C++0x FTW!
+    for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
+                   VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
+           VRM->getFoldedVirts(&MI);
+         FVRange.first != FVRange.second; ++FVRange.first)
+      FoldedVirts.push_back(FVRange.first->second);
+
+    SmallSet<int, 2> FoldedSS;
+    for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
+      unsigned VirtReg = FoldedVirts[FVI].first;
+      VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
+      DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << "  MR: " << MR);
+
+      int SS = VRM->getStackSlot(VirtReg);
+      if (SS == VirtRegMap::NO_STACK_SLOT)
+        continue;
+      FoldedSS.insert(SS);
+      DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
+
+      // If this folded instruction is just a use, check to see if it's a
+      // straight load from the virt reg slot.
+      if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+        int FrameIdx;
+        unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+        if (DestReg && FrameIdx == SS) {
+          // If this spill slot is available, turn it into a copy (or nothing)
+          // instead of leaving it as a load!
+          if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+            DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
+            if (DestReg != InReg) {
+              MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+              MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                                             TII->get(TargetOpcode::COPY))
+                .addReg(DestReg, RegState::Define, DefMO->getSubReg())
+                .addReg(InReg, RegState::Kill);
+              // Revisit the copy so we make sure to notice the effects of the
+              // operation on the destreg (either needing to RA it if it's
+              // virtual or needing to clobber any values if it's physical).
+              NextMII = CopyMI;
+              NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+              BackTracked = true;
+            } else {
+              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+              // InvalidateKills resurrects any prior kill of the copy's source
+              // allowing the source reg to be reused in place of the copy.
+              Spills.disallowClobberPhysReg(InReg);
+            }
+
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            EraseInstr(&MI);
+            Erased = true;
+            goto ProcessNextInst;
+          }
+        } else {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          if (PhysReg &&
+              TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
+            MBB->insert(MII, NewMIs[0]);
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            EraseInstr(&MI);
+            Erased = true;
+            --NextMII;  // backtrack to the unfolded instruction.
+            BackTracked = true;
+            goto ProcessNextInst;
+          }
+        }
+      }
+
+      // If this reference is not a use, any previous store is now dead.
+      // Otherwise, the store to this stack slot is not dead anymore.
+      MachineInstr* DeadStore = MaybeDeadStores[SS];
+      if (DeadStore) {
+        bool isDead = !(MR & VirtRegMap::isRef);
+        MachineInstr *NewStore = NULL;
+        if (MR & VirtRegMap::isModRef) {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          // We can reuse this physreg as long as we are allowed to clobber
+          // the value and there isn't an earlier def that has already clobbered
+          // the physreg.
+          if (PhysReg &&
+              !ReusedOperands.isClobbered(PhysReg) &&
+              Spills.canClobberPhysReg(PhysReg) &&
+              !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+            MachineOperand *KillOpnd =
+              DeadStore->findRegisterUseOperand(PhysReg, true);
+            // Note, if the store is storing a sub-register, it's possible the
+            // super-register is needed below.
+            if (KillOpnd && !KillOpnd->getSubReg() &&
+                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+              MBB->insert(MII, NewMIs[0]);
+              NewStore = NewMIs[1];
+              MBB->insert(MII, NewStore);
+              VRM->addSpillSlotUse(SS, NewStore);
+              InvalidateKills(MI, TRI, RegKills, KillOps);
+              EraseInstr(&MI);
+              Erased = true;
+              --NextMII;
+              --NextMII;  // backtrack to the unfolded instruction.
+              BackTracked = true;
+              isDead = true;
+              ++NumSUnfold;
+            }
+          }
+        }
+
+        if (isDead) {  // Previous store is dead.
+          // If we get here, the store is dead, nuke it now.
+          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+          EraseInstr(DeadStore);
+          if (!NewStore)
+            ++NumDSE;
+        }
+
+        MaybeDeadStores[SS] = NULL;
+        if (NewStore) {
+          // Treat this store as a spill merged into a copy. That makes the
+          // stack slot value available.
+          VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+          goto ProcessNextInst;
+        }
+      }
+
+      // If the spill slot value is available, and this is a new definition of
+      // the value, the value is not available anymore.
+      if (MR & VirtRegMap::isMod) {
+        // Notice that the value in this stack slot has been modified.
+        Spills.ModifyStackSlotOrReMat(SS);
+
+        // If this is *just* a mod of the value, check to see if this is just a
+        // store to the spill slot (i.e. the spill got merged into the copy). If
+        // so, realize that the vreg is available now, and add the store to the
+        // MaybeDeadStore info.
+        int StackSlot;
+        if (!(MR & VirtRegMap::isRef)) {
+          if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+            assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+                   "Src hasn't been allocated yet?");
+
+            if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
+                                    Spills, RegKills, KillOps, TRI)) {
+              NextMII = llvm::next(MII);
+              BackTracked = true;
+              goto ProcessNextInst;
+            }
+
+            // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
+            // this as a potentially dead store in case there is a subsequent
+            // store into the stack slot without a read from it.
+            MaybeDeadStores[StackSlot] = &MI;
+
+            // If the stack slot value was previously available in some other
+            // register, change it now.  Otherwise, make the register
+            // available in PhysReg.
+            Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
+          }
+        }
+      }
+    }
+
+    // Process all of the spilled defs.
+    SpilledMIRegs.clear();
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+        continue;
+
+      unsigned VirtReg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        // Also check if it's copying from an "undef", if so, we can't
+        // eliminate this or else the undef marker is lost and it will
+        // confuses the scavenger. This is extremely rare.
+        if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
+            MI.getNumOperands() == 2) {
+          ++NumDCE;
+          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+          SmallVector<unsigned, 2> KillRegs;
+          InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+          if (MO.isDead() && !KillRegs.empty()) {
+            // Source register or an implicit super/sub-register use is killed.
+            assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
+            // Last def is now dead.
+            TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
+          }
+          EraseInstr(&MI);
+          Erased = true;
+          Spills.disallowClobberPhysReg(VirtReg);
+          goto ProcessNextInst;
+        }
+
+        // If it's not a no-op copy, it clobbers the value in the destreg.
+        Spills.ClobberPhysReg(VirtReg);
+        ReusedOperands.markClobbered(VirtReg);
+
+        // Check to see if this instruction is a load from a stack slot into
+        // a register.  If so, this provides the stack slot value in the reg.
+        int FrameIdx;
+        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+          assert(DestReg == VirtReg && "Unknown load situation!");
+
+          // If it is a folded reference, then it's not safe to clobber.
+          bool Folded = FoldedSS.count(FrameIdx);
+          // Otherwise, if it wasn't available, remember that it is now!
+          Spills.addAvailable(FrameIdx, DestReg, !Folded);
+          goto ProcessNextInst;
+        }
+
+        continue;
+      }
+
+      unsigned SubIdx = MO.getSubReg();
+      bool DoReMat = VRM->isReMaterialized(VirtReg);
+      if (DoReMat)
+        ReMatDefs.insert(&MI);
+
+      // The only vregs left are stack slot definitions.
+      int StackSlot = VRM->getStackSlot(VirtReg);
+      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+      // If this def is part of a two-address operand, make sure to execute
+      // the store from the correct physical register.
+      unsigned PhysReg;
+      unsigned TiedOp;
+      if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+        PhysReg = MI.getOperand(TiedOp).getReg();
+        if (SubIdx) {
+          unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+          assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+                 "Can't find corresponding super-register!");
+          PhysReg = SuperReg;
+        }
+      } else {
+        PhysReg = VRM->getPhys(VirtReg);
+        if (ReusedOperands.isClobbered(PhysReg)) {
+          // Another def has taken the assigned physreg. It must have been a
+          // use&def which got it due to reuse. Undo the reuse!
+          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                      Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+        }
+      }
+
+      // If StackSlot is available in a register that also holds other stack
+      // slots, clobber those stack slots now.
+      Spills.ClobberSharingStackSlots(StackSlot);
+
+      assert(PhysReg && "VR not assigned a physical register?");
+      MRI->setPhysRegUsed(PhysReg);
+      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+      ReusedOperands.markClobbered(RReg);
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+
+      if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
+        MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+        SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
+          LastStore, Spills, ReMatDefs, RegKills, KillOps);
+        NextMII = llvm::next(MII);
+
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        if (MI.isIdentityCopy()) {
+          ++NumDCE;
+          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+          InvalidateKills(MI, TRI, RegKills, KillOps);
+          EraseInstr(&MI);
+          Erased = true;
+          UpdateKills(*LastStore, TRI, RegKills, KillOps);
+          goto ProcessNextInst;
+        }
+      }
+    }
+    ProcessNextInst:
+    // Delete dead instructions without side effects.
+    if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+      InvalidateKills(MI, TRI, RegKills, KillOps);
+      EraseInstr(&MI);
+      Erased = true;
+    }
+    if (!Erased)
+      DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
+    if (!Erased && !BackTracked) {
+      for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+        UpdateKills(*II, TRI, RegKills, KillOps);
+    }
+    MII = NextMII;
+  }
+
+}
+
+llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
+  switch (RewriterOpt) {
+  default: llvm_unreachable("Unreachable!");
+  case local:
+    return new LocalRewriter();
+  case trivial:
+    return new TrivialRewriter();
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.h b/contrib/llvm/lib/CodeGen/VirtRegRewriter.h
new file mode 100644
index 000000000000..93474e0d7ff7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.h
@@ -0,0 +1,32 @@
+//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
+#define LLVM_CODEGEN_VIRTREGREWRITER_H
+
+namespace llvm {
+  class LiveIntervals;
+  class MachineFunction;
+  class VirtRegMap;
+  
+  /// VirtRegRewriter interface: Implementations of this interface assign
+  /// spilled virtual registers to stack slots, rewriting the code.
+  struct VirtRegRewriter {
+    virtual ~VirtRegRewriter();
+    virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+                                      LiveIntervals* LIs) = 0;
+  };
+
+  /// createVirtRegRewriter - Create an return a rewriter object, as specified
+  /// on the command line.
+  VirtRegRewriter* createVirtRegRewriter();
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CompilerDriver/Action.cpp b/contrib/llvm/lib/CompilerDriver/Action.cpp
new file mode 100644
index 000000000000..a8d625c7ac04
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/Action.cpp
@@ -0,0 +1,134 @@
+//===--- Action.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Action class - implementation and auxiliary functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/Action.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Error.h"
+#include "llvm/CompilerDriver/Main.h"
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/TimeValue.h"
+
+#include <stdexcept>
+#include <string>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace llvmc {
+
+extern const char* ProgramName;
+
+}
+
+namespace {
+
+  void PrintString (const std::string& str) {
+    errs() << str << ' ';
+  }
+
+  void PrintCommand (const std::string& Cmd, const StrVector& Args) {
+    errs() << Cmd << ' ';
+    std::for_each(Args.begin(), Args.end(), &PrintString);
+    errs() << '\n';
+  }
+
+  bool IsSegmentationFault (int returnCode) {
+#ifdef LLVM_ON_WIN32
+    return (returnCode >= 0xc0000000UL)
+#else
+    return (returnCode < 0);
+#endif
+  }
+
+  int ExecuteProgram (const std::string& name, const StrVector& args) {
+    sys::Path prog(name);
+
+    if (sys::path::is_relative(prog.str())) {
+      prog = PrependMainExecutablePath(name, ProgramName,
+                                       (void *)(intptr_t)&Main);
+
+      if (!prog.canExecute()) {
+        prog = sys::Program::FindProgramByName(name);
+        if (prog.isEmpty()) {
+          PrintError("Can't find program '" + name + "'");
+          return -1;
+        }
+      }
+    }
+    if (!prog.canExecute()) {
+      PrintError("Program '" + name + "' is not executable.");
+      return -1;
+    }
+
+    // Build the command line vector and the redirects array.
+    const sys::Path* redirects[3] = {0,0,0};
+    sys::Path stdout_redirect;
+
+    std::vector<const char*> argv;
+    argv.reserve((args.size()+2));
+    argv.push_back(name.c_str());
+
+    for (StrVector::const_iterator B = args.begin(), E = args.end();
+         B!=E; ++B) {
+      if (*B == ">") {
+        ++B;
+        stdout_redirect.set(*B);
+        redirects[1] = &stdout_redirect;
+      }
+      else {
+        argv.push_back((*B).c_str());
+      }
+    }
+    argv.push_back(0);  // null terminate list.
+
+    // Invoke the program.
+    int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
+
+    if (IsSegmentationFault(ret)) {
+      errs() << "Segmentation fault: ";
+      PrintCommand(name, args);
+    }
+
+    return ret;
+  }
+}
+
+namespace llvmc {
+  void AppendToGlobalTimeLog (const std::string& cmd, double time);
+}
+
+int llvmc::Action::Execute () const {
+  if (DryRun || VerboseMode)
+    PrintCommand(Command_, Args_);
+
+  if (!DryRun) {
+    if (Time) {
+      sys::TimeValue now = sys::TimeValue::now();
+      int ret = ExecuteProgram(Command_, Args_);
+      sys::TimeValue now2 = sys::TimeValue::now();
+      now2 -= now;
+      double elapsed = now2.seconds()  + now2.microseconds()  / 1000000.0;
+      AppendToGlobalTimeLog(Command_, elapsed);
+
+      return ret;
+    }
+    else {
+      return ExecuteProgram(Command_, Args_);
+    }
+  }
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp b/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp
new file mode 100644
index 000000000000..38442038d738
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/BuiltinOptions.cpp
@@ -0,0 +1,61 @@
+//===--- BuiltinOptions.cpp - The LLVM Compiler Driver ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Definitions of all global command-line option variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+
+#ifdef ENABLE_LLVMC_DYNAMIC_PLUGINS
+#include "llvm/Support/PluginLoader.h"
+#endif
+
+namespace cl = llvm::cl;
+
+namespace llvmc {
+
+cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
+                                     cl::ZeroOrMore);
+cl::opt<std::string> OutputFilename("o", cl::desc("Output file name"),
+                                    cl::value_desc("file"), cl::Prefix);
+cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"),
+                                 cl::value_desc("<directory>"), cl::Prefix);
+cl::list<std::string> Languages("x",
+          cl::desc("Specify the language of the following input files"),
+          cl::ZeroOrMore);
+
+cl::opt<bool> DryRun("dry-run",
+                     cl::desc("Only pretend to run commands"));
+cl::opt<bool> Time("time", cl::desc("Time individual commands"));
+cl::opt<bool> VerboseMode("v",
+                          cl::desc("Enable verbose mode"));
+
+cl::opt<bool> CheckGraph("check-graph",
+                         cl::desc("Check the compilation graph for errors"),
+                         cl::Hidden);
+cl::opt<bool> WriteGraph("write-graph",
+                         cl::desc("Write compilation-graph.dot file"),
+                         cl::Hidden);
+cl::opt<bool> ViewGraph("view-graph",
+                         cl::desc("Show compilation graph in GhostView"),
+                         cl::Hidden);
+
+cl::opt<SaveTempsEnum::Values> SaveTemps
+("save-temps", cl::desc("Keep temporary files"),
+ cl::init(SaveTempsEnum::Unset),
+ cl::values(clEnumValN(SaveTempsEnum::Obj, "obj",
+                       "Save files in the directory specified with -o"),
+            clEnumValN(SaveTempsEnum::Cwd, "cwd",
+                       "Use current working directory"),
+            clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
+            clEnumValEnd),
+ cl::ValueOptional);
+
+}  // End namespace llvmc.
diff --git a/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp b/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp
new file mode 100644
index 000000000000..33c6566499b8
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp
@@ -0,0 +1,655 @@
+//===--- CompilationGraph.cpp - The LLVM Compiler Driver --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Compilation graph - implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <queue>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace llvmc {
+
+  const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
+    // Remove the '.'.
+    StringRef suf = sys::path::extension(File.str()).substr(1);
+    LanguageMap::const_iterator Lang =
+      this->find(suf.empty() ? "*empty*" : suf);
+    if (Lang == this->end()) {
+      PrintError("File '" + File.str() + "' has unknown suffix '"
+                 + suf.str() + '\'');
+      return 0;
+    }
+    return &Lang->second;
+  }
+}
+
+namespace {
+
+  /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error.
+  template <class C>
+  const Edge* ChooseEdge(const C& EdgesContainer,
+                         const InputLanguagesSet& InLangs,
+                         const std::string& NodeName = "root") {
+    const Edge* MaxEdge = 0;
+    int MaxWeight = 0;
+    bool SingleMax = true;
+
+    // TODO: fix calculation of SingleMax.
+    for (typename C::const_iterator B = EdgesContainer.begin(),
+           E = EdgesContainer.end(); B != E; ++B) {
+      const Edge* e = B->getPtr();
+      int EW = e->Weight(InLangs);
+      if (EW < 0) {
+        // (error) invocation in TableGen -> we don't need to print an error
+        // message.
+        return 0;
+      }
+      if (EW > MaxWeight) {
+        MaxEdge = e;
+        MaxWeight = EW;
+        SingleMax = true;
+      } else if (EW == MaxWeight) {
+        SingleMax = false;
+      }
+    }
+
+    if (!SingleMax) {
+      PrintError("Node " + NodeName + ": multiple maximal outward edges found!"
+                 " Most probably a specification error.");
+      return 0;
+    }
+    if (!MaxEdge) {
+      PrintError("Node " + NodeName + ": no maximal outward edge found!"
+                 " Most probably a specification error.");
+      return 0;
+    }
+    return MaxEdge;
+  }
+
+}
+
+void Node::AddEdge(Edge* Edg) {
+  // If there already was an edge between two nodes, modify it instead
+  // of adding a new edge.
+  const std::string& ToolName = Edg->ToolName();
+  for (container_type::iterator B = OutEdges.begin(), E = OutEdges.end();
+       B != E; ++B) {
+    if ((*B)->ToolName() == ToolName) {
+      llvm::IntrusiveRefCntPtr<Edge>(Edg).swap(*B);
+      return;
+    }
+  }
+  OutEdges.push_back(llvm::IntrusiveRefCntPtr<Edge>(Edg));
+}
+
+CompilationGraph::CompilationGraph() {
+  NodesMap["root"] = Node(this);
+}
+
+Node* CompilationGraph::getNode(const std::string& ToolName) {
+  nodes_map_type::iterator I = NodesMap.find(ToolName);
+  if (I == NodesMap.end()) {
+    PrintError("Node " + ToolName + " is not in the graph");
+    return 0;
+  }
+  return &I->second;
+}
+
+const Node* CompilationGraph::getNode(const std::string& ToolName) const {
+  nodes_map_type::const_iterator I = NodesMap.find(ToolName);
+  if (I == NodesMap.end()) {
+    PrintError("Node " + ToolName + " is not in the graph!");
+    return 0;
+  }
+  return &I->second;
+}
+
+// Find the tools list corresponding to the given language name.
+const CompilationGraph::tools_vector_type*
+CompilationGraph::getToolsVector(const std::string& LangName) const
+{
+  tools_map_type::const_iterator I = ToolsMap.find(LangName);
+  if (I == ToolsMap.end()) {
+    PrintError("No tool corresponding to the language " + LangName + " found");
+    return 0;
+  }
+  return &I->second;
+}
+
+void CompilationGraph::insertNode(Tool* V) {
+  if (NodesMap.count(V->Name()) == 0)
+    NodesMap[V->Name()] = Node(this, V);
+}
+
+int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
+  Node* B = getNode(Edg->ToolName());
+  if (B == 0)
+    return 1;
+
+  if (A == "root") {
+    const char** InLangs = B->ToolPtr->InputLanguages();
+    for (;*InLangs; ++InLangs)
+      ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg));
+    NodesMap["root"].AddEdge(Edg);
+  }
+  else {
+    Node* N = getNode(A);
+    if (N == 0)
+      return 1;
+
+    N->AddEdge(Edg);
+  }
+  // Increase the inward edge counter.
+  B->IncrInEdges();
+
+  return 0;
+}
+
+// Pass input file through the chain until we bump into a Join node or
+// a node that says that it is the last.
+int CompilationGraph::PassThroughGraph (const sys::Path& InFile,
+                                        const Node* StartNode,
+                                        const InputLanguagesSet& InLangs,
+                                        const sys::Path& TempDir,
+                                        const LanguageMap& LangMap) const {
+  sys::Path In = InFile;
+  const Node* CurNode = StartNode;
+
+  while(true) {
+    Tool* CurTool = CurNode->ToolPtr.getPtr();
+
+    if (CurTool->IsJoin()) {
+      JoinTool& JT = static_cast<JoinTool&>(*CurTool);
+      JT.AddToJoinList(In);
+      break;
+    }
+
+    Action CurAction;
+    if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(),
+                                          TempDir, InLangs, LangMap)) {
+      return ret;
+    }
+
+    if (int ret = CurAction.Execute())
+      return ret;
+
+    if (CurAction.StopCompilation())
+      return 0;
+
+    const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+    if (Edg == 0)
+      return 1;
+
+    CurNode = getNode(Edg->ToolName());
+    if (CurNode == 0)
+      return 1;
+
+    In = CurAction.OutFile();
+  }
+
+  return 0;
+}
+
+// Find the head of the toolchain corresponding to the given file.
+// Also, insert an input language into InLangs.
+const Node* CompilationGraph::
+FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
+              InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
+
+  // Determine the input language.
+  const std::string* InLang = (ForceLanguage ? ForceLanguage
+                               : LangMap.GetLanguage(In));
+  if (InLang == 0)
+    return 0;
+  const std::string& InLanguage = *InLang;
+
+  // Add the current input language to the input language set.
+  InLangs.insert(InLanguage);
+
+  // Find the toolchain for the input language.
+  const tools_vector_type* pTV = getToolsVector(InLanguage);
+  if (pTV == 0)
+    return 0;
+
+  const tools_vector_type& TV = *pTV;
+  if (TV.empty()) {
+    PrintError("No toolchain corresponding to language "
+               + InLanguage + " found");
+    return 0;
+  }
+
+  const Edge* Edg = ChooseEdge(TV, InLangs);
+  if (Edg == 0)
+    return 0;
+
+  return getNode(Edg->ToolName());
+}
+
+// Helper function used by Build().
+// Traverses initial portions of the toolchains (up to the first Join node).
+// This function is also responsible for handling the -x option.
+int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
+                                    const sys::Path& TempDir,
+                                    const LanguageMap& LangMap) {
+  // This is related to -x option handling.
+  cl::list<std::string>::const_iterator xIter = Languages.begin(),
+    xBegin = xIter, xEnd = Languages.end();
+  bool xEmpty = true;
+  const std::string* xLanguage = 0;
+  unsigned xPos = 0, xPosNext = 0, filePos = 0;
+
+  if (xIter != xEnd) {
+    xEmpty = false;
+    xPos = Languages.getPosition(xIter - xBegin);
+    cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+    xPosNext = (xNext == xEnd) ? std::numeric_limits<unsigned>::max()
+      : Languages.getPosition(xNext - xBegin);
+    xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+  }
+
+  // For each input file:
+  for (cl::list<std::string>::const_iterator B = InputFilenames.begin(),
+         CB = B, E = InputFilenames.end(); B != E; ++B) {
+    sys::Path In = sys::Path(*B);
+
+    // Code for handling the -x option.
+    // Output: std::string* xLanguage (can be NULL).
+    if (!xEmpty) {
+      filePos = InputFilenames.getPosition(B - CB);
+
+      if (xPos < filePos) {
+        if (filePos < xPosNext) {
+          xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+        }
+        else { // filePos >= xPosNext
+          // Skip xIters while filePos > xPosNext
+          while (filePos > xPosNext) {
+            ++xIter;
+            xPos = xPosNext;
+
+            cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+            if (xNext == xEnd)
+              xPosNext = std::numeric_limits<unsigned>::max();
+            else
+              xPosNext = Languages.getPosition(xNext - xBegin);
+            xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+          }
+        }
+      }
+    }
+
+    // Find the toolchain corresponding to this file.
+    const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap);
+    if (N == 0)
+      return 1;
+    // Pass file through the chain starting at head.
+    if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap))
+      return ret;
+  }
+
+  return 0;
+}
+
+// Sort the nodes in topological order.
+int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
+  std::queue<const Node*> Q;
+
+  Node* Root = getNode("root");
+  if (Root == 0)
+    return 1;
+
+  Q.push(Root);
+
+  while (!Q.empty()) {
+    const Node* A = Q.front();
+    Q.pop();
+    Out.push_back(A);
+    for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+         EB != EE; ++EB) {
+      Node* B = getNode((*EB)->ToolName());
+      if (B == 0)
+        return 1;
+
+      B->DecrInEdges();
+      if (B->HasNoInEdges())
+        Q.push(B);
+    }
+  }
+
+  return 0;
+}
+
+namespace {
+  bool NotJoinNode(const Node* N) {
+    return N->ToolPtr ? !N->ToolPtr->IsJoin() : true;
+  }
+}
+
+// Call TopologicalSort and filter the resulting list to include
+// only Join nodes.
+int CompilationGraph::
+TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) {
+  std::vector<const Node*> TopSorted;
+  if (int ret = TopologicalSort(TopSorted))
+    return ret;
+  std::remove_copy_if(TopSorted.begin(), TopSorted.end(),
+                      std::back_inserter(Out), NotJoinNode);
+
+  return 0;
+}
+
+int CompilationGraph::Build (const sys::Path& TempDir,
+                             const LanguageMap& LangMap) {
+  InputLanguagesSet InLangs;
+  bool WasSomeActionGenerated = !InputFilenames.empty();
+
+  // Traverse initial parts of the toolchains and fill in InLangs.
+  if (int ret = BuildInitial(InLangs, TempDir, LangMap))
+    return ret;
+
+  std::vector<const Node*> JTV;
+  if (int ret = TopologicalSortFilterJoinNodes(JTV))
+    return ret;
+
+  // For all join nodes in topological order:
+  for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end();
+       B != E; ++B) {
+
+    const Node* CurNode = *B;
+    JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
+
+    // Are there any files in the join list?
+    if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
+      continue;
+
+    WasSomeActionGenerated = true;
+    Action CurAction;
+    if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(),
+                                     TempDir, InLangs, LangMap)) {
+      return ret;
+    }
+
+    if (int ret = CurAction.Execute())
+      return ret;
+
+    if (CurAction.StopCompilation())
+      return 0;
+
+    const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+    if (Edg == 0)
+      return 1;
+
+    const Node* NextNode = getNode(Edg->ToolName());
+    if (NextNode == 0)
+      return 1;
+
+    if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
+                                   InLangs, TempDir, LangMap)) {
+      return ret;
+    }
+  }
+
+  if (!WasSomeActionGenerated) {
+    PrintError("no input files");
+    return 1;
+  }
+
+  return 0;
+}
+
+int CompilationGraph::CheckLanguageNames() const {
+  int ret = 0;
+
+  // Check that names for output and input languages on all edges do match.
+  for (const_nodes_iterator B = this->NodesMap.begin(),
+         E = this->NodesMap.end(); B != E; ++B) {
+
+    const Node & N1 = B->second;
+    if (N1.ToolPtr) {
+      for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd();
+           EB != EE; ++EB) {
+        const Node* N2 = this->getNode((*EB)->ToolName());
+        if (N2 == 0)
+          return 1;
+
+        if (!N2->ToolPtr) {
+          ++ret;
+          errs() << "Error: there is an edge from '" << N1.ToolPtr->Name()
+                 << "' back to the root!\n\n";
+          continue;
+        }
+
+        const char** OutLangs = N1.ToolPtr->OutputLanguages();
+        const char** InLangs = N2->ToolPtr->InputLanguages();
+        bool eq = false;
+        const char* OutLang = 0;
+        for (;*OutLangs; ++OutLangs) {
+          OutLang = *OutLangs;
+          for (;*InLangs; ++InLangs) {
+            if (std::strcmp(OutLang, *InLangs) == 0) {
+              eq = true;
+              break;
+            }
+          }
+        }
+
+        if (!eq) {
+          ++ret;
+          errs() << "Error: Output->input language mismatch in the edge '"
+                 << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name()
+                 << "'!\n"
+                 << "Expected one of { ";
+
+          InLangs = N2->ToolPtr->InputLanguages();
+          for (;*InLangs; ++InLangs) {
+            errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
+          }
+
+          errs() << " }, but got '" << OutLang << "'!\n\n";
+        }
+
+      }
+    }
+  }
+
+  return ret;
+}
+
+int CompilationGraph::CheckMultipleDefaultEdges() const {
+  int ret = 0;
+  InputLanguagesSet Dummy;
+
+  // For all nodes, just iterate over the outgoing edges and check if there is
+  // more than one edge with maximum weight.
+  for (const_nodes_iterator B = this->NodesMap.begin(),
+         E = this->NodesMap.end(); B != E; ++B) {
+    const Node& N = B->second;
+    int MaxWeight = -1024;
+
+    // Ignore the root node.
+    if (!N.ToolPtr)
+      continue;
+
+    for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd();
+         EB != EE; ++EB) {
+      int EdgeWeight = (*EB)->Weight(Dummy);
+      if (EdgeWeight > MaxWeight) {
+        MaxWeight = EdgeWeight;
+      }
+      else if (EdgeWeight == MaxWeight) {
+        ++ret;
+        errs() << "Error: there are multiple maximal edges stemming from the '"
+               << N.ToolPtr->Name() << "' node!\n\n";
+        break;
+      }
+    }
+  }
+
+  return ret;
+}
+
+int CompilationGraph::CheckCycles() {
+  unsigned deleted = 0;
+  std::queue<Node*> Q;
+
+  Node* Root = getNode("root");
+  if (Root == 0)
+    return 1;
+
+  Q.push(Root);
+
+  // Try to delete all nodes that have no ingoing edges, starting from the
+  // root. If there are any nodes left after this operation, then we have a
+  // cycle. This relies on '--check-graph' not performing the topological sort.
+  while (!Q.empty()) {
+    Node* A = Q.front();
+    Q.pop();
+    ++deleted;
+
+    for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+         EB != EE; ++EB) {
+      Node* B = getNode((*EB)->ToolName());
+      if (B == 0)
+        return 1;
+
+      B->DecrInEdges();
+      if (B->HasNoInEdges())
+        Q.push(B);
+    }
+  }
+
+  if (deleted != NodesMap.size()) {
+    errs() << "Error: there are cycles in the compilation graph!\n"
+           << "Try inspecting the diagram produced by "
+           << "'llvmc --view-graph'.\n\n";
+    return 1;
+  }
+
+  return 0;
+}
+
+int CompilationGraph::Check () {
+  // We try to catch as many errors as we can in one go.
+  int errs = 0;
+  int ret = 0;
+
+  // Check that output/input language names match.
+  ret = this->CheckLanguageNames();
+  if (ret < 0)
+    return 1;
+  errs += ret;
+
+  // Check for multiple default edges.
+  ret = this->CheckMultipleDefaultEdges();
+  if (ret < 0)
+    return 1;
+  errs += ret;
+
+  // Check for cycles.
+  ret = this->CheckCycles();
+  if (ret < 0)
+    return 1;
+  errs += ret;
+
+  return errs;
+}
+
+// Code related to graph visualization.
+
+namespace {
+
+std::string SquashStrArray (const char** StrArr) {
+  std::string ret;
+
+  for (; *StrArr; ++StrArr) {
+    if (*(StrArr + 1)) {
+      ret += *StrArr;
+      ret +=  ", ";
+    }
+    else {
+      ret += *StrArr;
+    }
+  }
+
+  return ret;
+}
+
+} // End anonymous namespace.
+
+namespace llvm {
+  template <>
+  struct DOTGraphTraits<llvmc::CompilationGraph*>
+    : public DefaultDOTGraphTraits
+  {
+    DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+    template<typename GraphType>
+    static std::string getNodeLabel(const Node* N, const GraphType&)
+    {
+      if (N->ToolPtr)
+        if (N->ToolPtr->IsJoin())
+          return N->Name() + "\n (join" +
+            (N->HasChildren() ? ")"
+             : std::string(": ") +
+             SquashStrArray(N->ToolPtr->OutputLanguages()) + ')');
+        else
+          return N->Name();
+      else
+        return "root";
+    }
+
+    template<typename EdgeIter>
+    static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) {
+      if (N->ToolPtr) {
+        return SquashStrArray(N->ToolPtr->OutputLanguages());
+      }
+      else {
+        return SquashStrArray(I->ToolPtr->InputLanguages());
+      }
+    }
+  };
+
+} // End namespace llvm
+
+int CompilationGraph::writeGraph(const std::string& OutputFilename) {
+  std::string ErrorInfo;
+  raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo);
+
+  if (ErrorInfo.empty()) {
+    errs() << "Writing '"<< OutputFilename << "' file...";
+    llvm::WriteGraph(O, this);
+    errs() << "done.\n";
+  }
+  else {
+    PrintError("Error opening file '" + OutputFilename + "' for writing!");
+    return 1;
+  }
+
+  return 0;
+}
+
+void CompilationGraph::viewGraph() {
+  llvm::ViewGraph(this, "compilation-graph");
+}
diff --git a/contrib/llvm/lib/CompilerDriver/Main.cpp b/contrib/llvm/lib/CompilerDriver/Main.cpp
new file mode 100644
index 000000000000..7120027f7ce0
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/Main.cpp
@@ -0,0 +1,146 @@
+//===--- Main.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  llvmc::Main function - driver entry point.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/AutoGenerated.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+
+#include <sstream>
+#include <string>
+
+namespace cl = llvm::cl;
+namespace sys = llvm::sys;
+using namespace llvmc;
+
+namespace {
+
+  std::stringstream* GlobalTimeLog;
+
+  /// GetTempDir - Get the temporary directory location. Returns non-zero value
+  /// on error.
+  int GetTempDir(sys::Path& tempDir) {
+    // The --temp-dir option.
+    if (!TempDirname.empty()) {
+      tempDir = TempDirname;
+    }
+    // GCC 4.5-style -save-temps handling.
+    else if (SaveTemps == SaveTempsEnum::Unset) {
+      tempDir = sys::Path::GetTemporaryDirectory();
+      return 0;
+    }
+    else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
+      tempDir = sys::path::parent_path(OutputFilename);
+    }
+    else {
+      // SaveTemps == Cwd --> use current dir (leave tempDir empty).
+      return 0;
+    }
+
+    bool Exists;
+    if (llvm::sys::fs::exists(tempDir.str(), Exists) || !Exists) {
+      std::string ErrMsg;
+      if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
+        PrintError(ErrMsg);
+        return 1;
+      }
+    }
+
+    return 0;
+  }
+
+  /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns
+  /// non-zero value in case of error.
+  int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
+    int ret;
+    sys::Path tempDir;
+    bool toDelete = (SaveTemps == SaveTempsEnum::Unset);
+
+    if (int ret = GetTempDir(tempDir))
+      return ret;
+
+    ret = graph.Build(tempDir, langMap);
+
+    if (toDelete)
+      tempDir.eraseFromDisk(true);
+
+    return ret;
+  }
+}
+
+namespace llvmc {
+
+// Used to implement -time option. External linkage is intentional.
+void AppendToGlobalTimeLog(const std::string& cmd, double time) {
+  *GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
+}
+
+// Sometimes user code wants to access the argv[0] value.
+const char* ProgramName;
+
+int Main(int argc, char** argv) {
+  int ret = 0;
+  LanguageMap langMap;
+  CompilationGraph graph;
+
+  ProgramName = argv[0];
+
+  cl::ParseCommandLineOptions
+    (argc, argv,
+     /* Overview = */ "LLVM Compiler Driver (Work In Progress)",
+     /* ReadResponseFiles = */ false);
+
+  if (int ret = autogenerated::RunInitialization(langMap, graph))
+    return ret;
+
+  if (CheckGraph) {
+    ret = graph.Check();
+    if (!ret)
+      llvm::errs() << "check-graph: no errors found.\n";
+
+    return ret;
+  }
+
+  if (ViewGraph) {
+    graph.viewGraph();
+    if (!WriteGraph)
+      return 0;
+  }
+
+  if (WriteGraph) {
+    const std::string& Out = (OutputFilename.empty()
+                              ? std::string("compilation-graph.dot")
+                              : OutputFilename);
+    return graph.writeGraph(Out);
+  }
+
+  if (Time) {
+    GlobalTimeLog = new std::stringstream;
+    GlobalTimeLog->precision(2);
+  }
+
+  ret = BuildTargets(graph, langMap);
+
+  if (Time) {
+    llvm::errs() << GlobalTimeLog->str();
+    delete GlobalTimeLog;
+  }
+
+  return ret;
+}
+
+} // end namespace llvmc
diff --git a/contrib/llvm/lib/CompilerDriver/Tool.cpp b/contrib/llvm/lib/CompilerDriver/Tool.cpp
new file mode 100644
index 000000000000..876759aa72b0
--- /dev/null
+++ b/contrib/llvm/lib/CompilerDriver/Tool.cpp
@@ -0,0 +1,95 @@
+//===--- Tool.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Tool base class - implementation details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Tool.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Path.h"
+
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace {
+  sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
+                         const std::string& Suffix) {
+    sys::Path Out;
+
+    // Make sure we don't end up with path names like '/file.o' if the
+    // TempDir is empty.
+    if (TempDir.empty()) {
+      Out.set(BaseName);
+    }
+    else {
+      Out = TempDir;
+      Out.appendComponent(BaseName);
+    }
+    Out.appendSuffix(Suffix);
+    // NOTE: makeUnique always *creates* a unique temporary file,
+    // which is good, since there will be no races. However, some
+    // tools do not like it when the output file already exists, so
+    // they need to be placated with -f or something like that.
+    Out.makeUnique(true, NULL);
+    return Out;
+  }
+}
+
+sys::Path Tool::OutFilename(const sys::Path& In,
+                            const sys::Path& TempDir,
+                            bool StopCompilation,
+                            const char* OutputSuffix) const {
+  sys::Path Out;
+
+  if (StopCompilation) {
+    if (!OutputFilename.empty()) {
+      Out.set(OutputFilename);
+    }
+    else if (IsJoin()) {
+      Out.set("a");
+      Out.appendSuffix(OutputSuffix);
+    }
+    else {
+      Out.set(sys::path::stem(In.str()));
+      Out.appendSuffix(OutputSuffix);
+    }
+  }
+  else {
+    if (IsJoin())
+      Out = MakeTempFile(TempDir, "tmp", OutputSuffix);
+    else
+      Out = MakeTempFile(TempDir, sys::path::stem(In.str()), OutputSuffix);
+  }
+  return Out;
+}
+
+namespace {
+  template <class A, class B>
+  bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) {
+    return std::less<A>()(p1.first, p2.first);
+  }
+}
+
+StrVector Tool::SortArgs(ArgsVector& Args) const {
+  StrVector Out;
+
+  // HACK: this won't be needed when we'll migrate away from CommandLine.
+  std::stable_sort(Args.begin(), Args.end(),
+                   &CompareFirst<unsigned, std::string>);
+  for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
+    Out.push_back(B->second);
+  }
+
+  return Out;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
new file mode 100644
index 000000000000..7652090af8f5
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -0,0 +1,1130 @@
+//===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common interface used by the various execution engine
+// subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cmath>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
+STATISTIC(NumGlobals  , "Number of global vars initialized");
+
+ExecutionEngine *(*ExecutionEngine::JITCtor)(
+  Module *M,
+  std::string *ErrorStr,
+  JITMemoryManager *JMM,
+  CodeGenOpt::Level OptLevel,
+  bool GVsWithCode,
+  TargetMachine *TM) = 0;
+ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
+  Module *M,
+  std::string *ErrorStr,
+  JITMemoryManager *JMM,
+  CodeGenOpt::Level OptLevel,
+  bool GVsWithCode,
+  TargetMachine *TM) = 0;
+ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
+                                                std::string *ErrorStr) = 0;
+
+ExecutionEngine::ExecutionEngine(Module *M)
+  : EEState(*this),
+    LazyFunctionCreator(0),
+    ExceptionTableRegister(0),
+    ExceptionTableDeregister(0) {
+  CompilingLazily         = false;
+  GVCompilationDisabled   = false;
+  SymbolSearchingDisabled = false;
+  Modules.push_back(M);
+  assert(M && "Module is null?");
+}
+
+ExecutionEngine::~ExecutionEngine() {
+  clearAllGlobalMappings();
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+    delete Modules[i];
+}
+
+void ExecutionEngine::DeregisterAllTables() {
+  if (ExceptionTableDeregister) {
+    DenseMap<const Function*, void*>::iterator it = AllExceptionTables.begin();
+    DenseMap<const Function*, void*>::iterator ite = AllExceptionTables.end();
+    for (; it != ite; ++it)
+      ExceptionTableDeregister(it->second);
+    AllExceptionTables.clear();
+  }
+}
+
+namespace {
+/// \brief Helper class which uses a value handler to automatically deletes the
+/// memory block when the GlobalVariable is destroyed.
+class GVMemoryBlock : public CallbackVH {
+  GVMemoryBlock(const GlobalVariable *GV)
+    : CallbackVH(const_cast<GlobalVariable*>(GV)) {}
+
+public:
+  /// \brief Returns the address the GlobalVariable should be written into.  The
+  /// GVMemoryBlock object prefixes that.
+  static char *Create(const GlobalVariable *GV, const TargetData& TD) {
+    const Type *ElTy = GV->getType()->getElementType();
+    size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
+    void *RawMemory = ::operator new(
+      TargetData::RoundUpAlignment(sizeof(GVMemoryBlock),
+                                   TD.getPreferredAlignment(GV))
+      + GVSize);
+    new(RawMemory) GVMemoryBlock(GV);
+    return static_cast<char*>(RawMemory) + sizeof(GVMemoryBlock);
+  }
+
+  virtual void deleted() {
+    // We allocated with operator new and with some extra memory hanging off the
+    // end, so don't just delete this.  I'm not sure if this is actually
+    // required.
+    this->~GVMemoryBlock();
+    ::operator delete(this);
+  }
+};
+}  // anonymous namespace
+
+char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
+  return GVMemoryBlock::Create(GV, *getTargetData());
+}
+
+bool ExecutionEngine::removeModule(Module *M) {
+  for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
+        E = Modules.end(); I != E; ++I) {
+    Module *Found = *I;
+    if (Found == M) {
+      Modules.erase(I);
+      clearGlobalMappingsFromModule(M);
+      return true;
+    }
+  }
+  return false;
+}
+
+Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
+    if (Function *F = Modules[i]->getFunction(FnName))
+      return F;
+  }
+  return 0;
+}
+
+
+void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
+                                          const GlobalValue *ToUnmap) {
+  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
+  void *OldVal;
+
+  // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
+  // GlobalAddressMap.
+  if (I == GlobalAddressMap.end())
+    OldVal = 0;
+  else {
+    OldVal = I->second;
+    GlobalAddressMap.erase(I);
+  }
+
+  GlobalAddressReverseMap.erase(OldVal);
+  return OldVal;
+}
+
+void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
+  MutexGuard locked(lock);
+
+  DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
+        << "\' to [" << Addr << "]\n";);
+  void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
+  assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
+  CurVal = Addr;
+
+  // If we are using the reverse mapping, add it too.
+  if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+    AssertingVH<const GlobalValue> &V =
+      EEState.getGlobalAddressReverseMap(locked)[Addr];
+    assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+    V = GV;
+  }
+}
+
+void ExecutionEngine::clearAllGlobalMappings() {
+  MutexGuard locked(lock);
+
+  EEState.getGlobalAddressMap(locked).clear();
+  EEState.getGlobalAddressReverseMap(locked).clear();
+}
+
+void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
+  MutexGuard locked(lock);
+
+  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+    EEState.RemoveMapping(locked, FI);
+  for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
+       GI != GE; ++GI)
+    EEState.RemoveMapping(locked, GI);
+}
+
+void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+  MutexGuard locked(lock);
+
+  ExecutionEngineState::GlobalAddressMapTy &Map =
+    EEState.getGlobalAddressMap(locked);
+
+  // Deleting from the mapping?
+  if (Addr == 0)
+    return EEState.RemoveMapping(locked, GV);
+
+  void *&CurVal = Map[GV];
+  void *OldVal = CurVal;
+
+  if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
+    EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
+  CurVal = Addr;
+
+  // If we are using the reverse mapping, add it too.
+  if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+    AssertingVH<const GlobalValue> &V =
+      EEState.getGlobalAddressReverseMap(locked)[Addr];
+    assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+    V = GV;
+  }
+  return OldVal;
+}
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+
+  ExecutionEngineState::GlobalAddressMapTy::iterator I =
+    EEState.getGlobalAddressMap(locked).find(GV);
+  return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
+}
+
+const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
+  MutexGuard locked(lock);
+
+  // If we haven't computed the reverse mapping yet, do so first.
+  if (EEState.getGlobalAddressReverseMap(locked).empty()) {
+    for (ExecutionEngineState::GlobalAddressMapTy::iterator
+         I = EEState.getGlobalAddressMap(locked).begin(),
+         E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
+      EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(
+                                                          I->second, I->first));
+  }
+
+  std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
+    EEState.getGlobalAddressReverseMap(locked).find(Addr);
+  return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
+}
+
+namespace {
+class ArgvArray {
+  char *Array;
+  std::vector<char*> Values;
+public:
+  ArgvArray() : Array(NULL) {}
+  ~ArgvArray() { clear(); }
+  void clear() {
+    delete[] Array;
+    Array = NULL;
+    for (size_t I = 0, E = Values.size(); I != E; ++I) {
+      delete[] Values[I];
+    }
+    Values.clear();
+  }
+  /// Turn a vector of strings into a nice argv style array of pointers to null
+  /// terminated strings.
+  void *reset(LLVMContext &C, ExecutionEngine *EE,
+              const std::vector<std::string> &InputArgv);
+};
+}  // anonymous namespace
+void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
+                       const std::vector<std::string> &InputArgv) {
+  clear();  // Free the old contents.
+  unsigned PtrSize = EE->getTargetData()->getPointerSize();
+  Array = new char[(InputArgv.size()+1)*PtrSize];
+
+  DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
+  const Type *SBytePtr = Type::getInt8PtrTy(C);
+
+  for (unsigned i = 0; i != InputArgv.size(); ++i) {
+    unsigned Size = InputArgv[i].size()+1;
+    char *Dest = new char[Size];
+    Values.push_back(Dest);
+    DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
+
+    std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
+    Dest[Size-1] = 0;
+
+    // Endian safe: Array[i] = (PointerTy)Dest;
+    EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize),
+                           SBytePtr);
+  }
+
+  // Null terminate it
+  EE->StoreValueToMemory(PTOGV(0),
+                         (GenericValue*)(Array+InputArgv.size()*PtrSize),
+                         SBytePtr);
+  return Array;
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
+                                                       bool isDtors) {
+  const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
+  GlobalVariable *GV = module->getNamedGlobal(Name);
+
+  // If this global has internal linkage, or if it has a use, then it must be
+  // an old-style (llvmgcc3) static ctor with __main linked in and in use.  If
+  // this is the case, don't execute any of the global ctors, __main will do
+  // it.
+  if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
+
+  // Should be an array of '{ i32, void ()* }' structs.  The first value is
+  // the init priority, which we ignore.
+  if (isa<ConstantAggregateZero>(GV->getInitializer()))
+    return;
+  ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    if (isa<ConstantAggregateZero>(InitList->getOperand(i)))
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+
+    Constant *FP = CS->getOperand(1);
+    if (FP->isNullValue())
+      continue;  // Found a sentinal value, ignore.
+
+    // Strip off constant expression casts.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+      if (CE->isCast())
+        FP = CE->getOperand(0);
+
+    // Execute the ctor/dtor function!
+    if (Function *F = dyn_cast<Function>(FP))
+      runFunction(F, std::vector<GenericValue>());
+
+    // FIXME: It is marginally lame that we just do nothing here if we see an
+    // entry we don't recognize. It might not be unreasonable for the verifier
+    // to not even allow this and just assert here.
+  }
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
+  // Execute global ctors/dtors for each module in the program.
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+    runStaticConstructorsDestructors(Modules[i], isDtors);
+}
+
+#ifndef NDEBUG
+/// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
+static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
+  unsigned PtrSize = EE->getTargetData()->getPointerSize();
+  for (unsigned i = 0; i < PtrSize; ++i)
+    if (*(i + (uint8_t*)Loc))
+      return false;
+  return true;
+}
+#endif
+
+int ExecutionEngine::runFunctionAsMain(Function *Fn,
+                                       const std::vector<std::string> &argv,
+                                       const char * const * envp) {
+  std::vector<GenericValue> GVArgs;
+  GenericValue GVArgc;
+  GVArgc.IntVal = APInt(32, argv.size());
+
+  // Check main() type
+  unsigned NumArgs = Fn->getFunctionType()->getNumParams();
+  const FunctionType *FTy = Fn->getFunctionType();
+  const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
+
+  // Check the argument types.
+  if (NumArgs > 3)
+    report_fatal_error("Invalid number of arguments of main() supplied");
+  if (NumArgs >= 3 && FTy->getParamType(2) != PPInt8Ty)
+    report_fatal_error("Invalid type for third argument of main() supplied");
+  if (NumArgs >= 2 && FTy->getParamType(1) != PPInt8Ty)
+    report_fatal_error("Invalid type for second argument of main() supplied");
+  if (NumArgs >= 1 && !FTy->getParamType(0)->isIntegerTy(32))
+    report_fatal_error("Invalid type for first argument of main() supplied");
+  if (!FTy->getReturnType()->isIntegerTy() &&
+      !FTy->getReturnType()->isVoidTy())
+    report_fatal_error("Invalid return type of main() supplied");
+
+  ArgvArray CArgv;
+  ArgvArray CEnv;
+  if (NumArgs) {
+    GVArgs.push_back(GVArgc); // Arg #0 = argc.
+    if (NumArgs > 1) {
+      // Arg #1 = argv.
+      GVArgs.push_back(PTOGV(CArgv.reset(Fn->getContext(), this, argv)));
+      assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) &&
+             "argv[0] was null after CreateArgv");
+      if (NumArgs > 2) {
+        std::vector<std::string> EnvVars;
+        for (unsigned i = 0; envp[i]; ++i)
+          EnvVars.push_back(envp[i]);
+        // Arg #2 = envp.
+        GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars)));
+      }
+    }
+  }
+
+  return runFunction(Fn, GVArgs).IntVal.getZExtValue();
+}
+
+ExecutionEngine *ExecutionEngine::create(Module *M,
+                                         bool ForceInterpreter,
+                                         std::string *ErrorStr,
+                                         CodeGenOpt::Level OptLevel,
+                                         bool GVsWithCode) {
+  return EngineBuilder(M)
+      .setEngineKind(ForceInterpreter
+                     ? EngineKind::Interpreter
+                     : EngineKind::JIT)
+      .setErrorStr(ErrorStr)
+      .setOptLevel(OptLevel)
+      .setAllocateGVsWithCode(GVsWithCode)
+      .create();
+}
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter.  This takes ownership
+/// of the module.
+ExecutionEngine *ExecutionEngine::createJIT(Module *M,
+                                            std::string *ErrorStr,
+                                            JITMemoryManager *JMM,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool GVsWithCode,
+                                            CodeModel::Model CMM) {
+  if (ExecutionEngine::JITCtor == 0) {
+    if (ErrorStr)
+      *ErrorStr = "JIT has not been linked in.";
+    return 0;
+  }
+
+  // Use the defaults for extra parameters.  Users can use EngineBuilder to
+  // set them.
+  StringRef MArch = "";
+  StringRef MCPU = "";
+  SmallVector<std::string, 1> MAttrs;
+
+  TargetMachine *TM =
+          EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
+  if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+  TM->setCodeModel(CMM);
+
+  return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM);
+}
+
+ExecutionEngine *EngineBuilder::create() {
+  // Make sure we can resolve symbols in the program as well. The zero arg
+  // to the function tells DynamicLibrary to load the program, not a library.
+  if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
+    return 0;
+
+  // If the user specified a memory manager but didn't specify which engine to
+  // create, we assume they only want the JIT, and we fail if they only want
+  // the interpreter.
+  if (JMM) {
+    if (WhichEngine & EngineKind::JIT)
+      WhichEngine = EngineKind::JIT;
+    else {
+      if (ErrorStr)
+        *ErrorStr = "Cannot create an interpreter with a memory manager.";
+      return 0;
+    }
+  }
+
+  // Unless the interpreter was explicitly selected or the JIT is not linked,
+  // try making a JIT.
+  if (WhichEngine & EngineKind::JIT) {
+    if (TargetMachine *TM =
+        EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr)) {
+      TM->setCodeModel(CMModel);
+
+      if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+        ExecutionEngine *EE =
+          ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
+                                     AllocateGVsWithCode, TM);
+        if (EE) return EE;
+      } else if (ExecutionEngine::JITCtor) {
+        ExecutionEngine *EE =
+          ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
+                                   AllocateGVsWithCode, TM);
+        if (EE) return EE;
+      }
+    }
+  }
+
+  // If we can't make a JIT and we didn't request one specifically, try making
+  // an interpreter instead.
+  if (WhichEngine & EngineKind::Interpreter) {
+    if (ExecutionEngine::InterpCtor)
+      return ExecutionEngine::InterpCtor(M, ErrorStr);
+    if (ErrorStr)
+      *ErrorStr = "Interpreter has not been linked in.";
+    return 0;
+  }
+
+  if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
+    if (ErrorStr)
+      *ErrorStr = "JIT has not been linked in.";
+  }
+
+  return 0;
+}
+
+void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
+  if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
+    return getPointerToFunction(F);
+
+  MutexGuard locked(lock);
+  if (void *P = EEState.getGlobalAddressMap(locked)[GV])
+    return P;
+
+  // Global variable might have been added since interpreter started.
+  if (GlobalVariable *GVar =
+          const_cast<GlobalVariable *>(dyn_cast<GlobalVariable>(GV)))
+    EmitGlobalVariable(GVar);
+  else
+    llvm_unreachable("Global hasn't had an address allocated yet!");
+
+  return EEState.getGlobalAddressMap(locked)[GV];
+}
+
+/// \brief Converts a Constant* into a GenericValue, including handling of
+/// ConstantExpr values.
+GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
+  // If its undefined, return the garbage.
+  if (isa<UndefValue>(C)) {
+    GenericValue Result;
+    switch (C->getType()->getTypeID()) {
+    case Type::IntegerTyID:
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      // Although the value is undefined, we still have to construct an APInt
+      // with the correct bit width.
+      Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
+      break;
+    default:
+      break;
+    }
+    return Result;
+  }
+
+  // Otherwise, if the value is a ConstantExpr...
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    Constant *Op0 = CE->getOperand(0);
+    switch (CE->getOpcode()) {
+    case Instruction::GetElementPtr: {
+      // Compute the index
+      GenericValue Result = getConstantValue(Op0);
+      SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+      uint64_t Offset =
+        TD->getIndexedOffset(Op0->getType(), &Indices[0], Indices.size());
+
+      char* tmp = (char*) Result.PointerVal;
+      Result = PTOGV(tmp + Offset);
+      return Result;
+    }
+    case Instruction::Trunc: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      GV.IntVal = GV.IntVal.trunc(BitWidth);
+      return GV;
+    }
+    case Instruction::ZExt: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      GV.IntVal = GV.IntVal.zext(BitWidth);
+      return GV;
+    }
+    case Instruction::SExt: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      GV.IntVal = GV.IntVal.sext(BitWidth);
+      return GV;
+    }
+    case Instruction::FPTrunc: {
+      // FIXME long double
+      GenericValue GV = getConstantValue(Op0);
+      GV.FloatVal = float(GV.DoubleVal);
+      return GV;
+    }
+    case Instruction::FPExt:{
+      // FIXME long double
+      GenericValue GV = getConstantValue(Op0);
+      GV.DoubleVal = double(GV.FloatVal);
+      return GV;
+    }
+    case Instruction::UIToFP: {
+      GenericValue GV = getConstantValue(Op0);
+      if (CE->getType()->isFloatTy())
+        GV.FloatVal = float(GV.IntVal.roundToDouble());
+      else if (CE->getType()->isDoubleTy())
+        GV.DoubleVal = GV.IntVal.roundToDouble();
+      else if (CE->getType()->isX86_FP80Ty()) {
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        (void)apf.convertFromAPInt(GV.IntVal,
+                                   false,
+                                   APFloat::rmNearestTiesToEven);
+        GV.IntVal = apf.bitcastToAPInt();
+      }
+      return GV;
+    }
+    case Instruction::SIToFP: {
+      GenericValue GV = getConstantValue(Op0);
+      if (CE->getType()->isFloatTy())
+        GV.FloatVal = float(GV.IntVal.signedRoundToDouble());
+      else if (CE->getType()->isDoubleTy())
+        GV.DoubleVal = GV.IntVal.signedRoundToDouble();
+      else if (CE->getType()->isX86_FP80Ty()) {
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        (void)apf.convertFromAPInt(GV.IntVal,
+                                   true,
+                                   APFloat::rmNearestTiesToEven);
+        GV.IntVal = apf.bitcastToAPInt();
+      }
+      return GV;
+    }
+    case Instruction::FPToUI: // double->APInt conversion handles sign
+    case Instruction::FPToSI: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      if (Op0->getType()->isFloatTy())
+        GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth);
+      else if (Op0->getType()->isDoubleTy())
+        GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
+      else if (Op0->getType()->isX86_FP80Ty()) {
+        APFloat apf = APFloat(GV.IntVal);
+        uint64_t v;
+        bool ignored;
+        (void)apf.convertToInteger(&v, BitWidth,
+                                   CE->getOpcode()==Instruction::FPToSI,
+                                   APFloat::rmTowardZero, &ignored);
+        GV.IntVal = v; // endian?
+      }
+      return GV;
+    }
+    case Instruction::PtrToInt: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
+      return GV;
+    }
+    case Instruction::IntToPtr: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      if (PtrWidth != GV.IntVal.getBitWidth())
+        GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
+      assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
+      GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue()));
+      return GV;
+    }
+    case Instruction::BitCast: {
+      GenericValue GV = getConstantValue(Op0);
+      const Type* DestTy = CE->getType();
+      switch (Op0->getType()->getTypeID()) {
+        default: llvm_unreachable("Invalid bitcast operand");
+        case Type::IntegerTyID:
+          assert(DestTy->isFloatingPointTy() && "invalid bitcast");
+          if (DestTy->isFloatTy())
+            GV.FloatVal = GV.IntVal.bitsToFloat();
+          else if (DestTy->isDoubleTy())
+            GV.DoubleVal = GV.IntVal.bitsToDouble();
+          break;
+        case Type::FloatTyID:
+          assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
+          GV.IntVal = APInt::floatToBits(GV.FloatVal);
+          break;
+        case Type::DoubleTyID:
+          assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
+          GV.IntVal = APInt::doubleToBits(GV.DoubleVal);
+          break;
+        case Type::PointerTyID:
+          assert(DestTy->isPointerTy() && "Invalid bitcast");
+          break; // getConstantValue(Op0)  above already converted it
+      }
+      return GV;
+    }
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      GenericValue LHS = getConstantValue(Op0);
+      GenericValue RHS = getConstantValue(CE->getOperand(1));
+      GenericValue GV;
+      switch (CE->getOperand(0)->getType()->getTypeID()) {
+      default: llvm_unreachable("Bad add type!");
+      case Type::IntegerTyID:
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid integer opcode");
+          case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break;
+          case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break;
+          case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break;
+          case Instruction::UDiv:GV.IntVal = LHS.IntVal.udiv(RHS.IntVal); break;
+          case Instruction::SDiv:GV.IntVal = LHS.IntVal.sdiv(RHS.IntVal); break;
+          case Instruction::URem:GV.IntVal = LHS.IntVal.urem(RHS.IntVal); break;
+          case Instruction::SRem:GV.IntVal = LHS.IntVal.srem(RHS.IntVal); break;
+          case Instruction::And: GV.IntVal = LHS.IntVal & RHS.IntVal; break;
+          case Instruction::Or:  GV.IntVal = LHS.IntVal | RHS.IntVal; break;
+          case Instruction::Xor: GV.IntVal = LHS.IntVal ^ RHS.IntVal; break;
+        }
+        break;
+      case Type::FloatTyID:
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid float opcode");
+          case Instruction::FAdd:
+            GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
+          case Instruction::FSub:
+            GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
+          case Instruction::FMul:
+            GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
+          case Instruction::FDiv:
+            GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
+          case Instruction::FRem:
+            GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
+        }
+        break;
+      case Type::DoubleTyID:
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid double opcode");
+          case Instruction::FAdd:
+            GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
+          case Instruction::FSub:
+            GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
+          case Instruction::FMul:
+            GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
+          case Instruction::FDiv:
+            GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
+          case Instruction::FRem:
+            GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
+        }
+        break;
+      case Type::X86_FP80TyID:
+      case Type::PPC_FP128TyID:
+      case Type::FP128TyID: {
+        APFloat apfLHS = APFloat(LHS.IntVal);
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid long double opcode");
+          case Instruction::FAdd:
+            apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FSub:
+            apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FMul:
+            apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FDiv:
+            apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FRem:
+            apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          }
+        }
+        break;
+      }
+      return GV;
+    }
+    default:
+      break;
+    }
+
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ConstantExpr not handled: " << *CE;
+    report_fatal_error(OS.str());
+  }
+
+  // Otherwise, we have a simple constant.
+  GenericValue Result;
+  switch (C->getType()->getTypeID()) {
+  case Type::FloatTyID:
+    Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
+    break;
+  case Type::DoubleTyID:
+    Result.DoubleVal = cast<ConstantFP>(C)->getValueAPF().convertToDouble();
+    break;
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+    Result.IntVal = cast <ConstantFP>(C)->getValueAPF().bitcastToAPInt();
+    break;
+  case Type::IntegerTyID:
+    Result.IntVal = cast<ConstantInt>(C)->getValue();
+    break;
+  case Type::PointerTyID:
+    if (isa<ConstantPointerNull>(C))
+      Result.PointerVal = 0;
+    else if (const Function *F = dyn_cast<Function>(C))
+      Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
+    else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+      Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
+    else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      Result = PTOGV(getPointerToBasicBlock(const_cast<BasicBlock*>(
+                                                        BA->getBasicBlock())));
+    else
+      llvm_unreachable("Unknown constant pointer type!");
+    break;
+  default:
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ERROR: Constant unimplemented for type: " << *C->getType();
+    report_fatal_error(OS.str());
+  }
+
+  return Result;
+}
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+                             unsigned StoreBytes) {
+  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+  uint8_t *Src = (uint8_t *)IntVal.getRawData();
+
+  if (sys::isLittleEndianHost()) {
+    // Little-endian host - the source is ordered from LSB to MSB.  Order the
+    // destination from LSB to MSB: Do a straight copy.
+    memcpy(Dst, Src, StoreBytes);
+  } else {
+    // Big-endian host - the source is an array of 64 bit words ordered from
+    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
+    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+    while (StoreBytes > sizeof(uint64_t)) {
+      StoreBytes -= sizeof(uint64_t);
+      // May not be aligned so use memcpy.
+      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+      Src += sizeof(uint64_t);
+    }
+
+    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+  }
+}
+
+void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
+                                         GenericValue *Ptr, const Type *Ty) {
+  const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty);
+
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
+    break;
+  case Type::FloatTyID:
+    *((float*)Ptr) = Val.FloatVal;
+    break;
+  case Type::DoubleTyID:
+    *((double*)Ptr) = Val.DoubleVal;
+    break;
+  case Type::X86_FP80TyID:
+    memcpy(Ptr, Val.IntVal.getRawData(), 10);
+    break;
+  case Type::PointerTyID:
+    // Ensure 64 bit target pointers are fully initialized on 32 bit hosts.
+    if (StoreBytes != sizeof(PointerTy))
+      memset(&(Ptr->PointerVal), 0, StoreBytes);
+
+    *((PointerTy*)Ptr) = Val.PointerVal;
+    break;
+  default:
+    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+  }
+
+  if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian())
+    // Host and target are different endian - reverse the stored bytes.
+    std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+  uint8_t *Dst = (uint8_t *)IntVal.getRawData();
+
+  if (sys::isLittleEndianHost())
+    // Little-endian host - the destination must be ordered from LSB to MSB.
+    // The source is ordered from LSB to MSB: Do a straight copy.
+    memcpy(Dst, Src, LoadBytes);
+  else {
+    // Big-endian - the destination is an array of 64 bit words ordered from
+    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
+    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+    // a word.
+    while (LoadBytes > sizeof(uint64_t)) {
+      LoadBytes -= sizeof(uint64_t);
+      // May not be aligned so use memcpy.
+      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+      Dst += sizeof(uint64_t);
+    }
+
+    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+  }
+}
+
+/// FIXME: document
+///
+void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
+                                          GenericValue *Ptr,
+                                          const Type *Ty) {
+  const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty);
+
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    // An APInt with all words initially zero.
+    Result.IntVal = APInt(cast<IntegerType>(Ty)->getBitWidth(), 0);
+    LoadIntFromMemory(Result.IntVal, (uint8_t*)Ptr, LoadBytes);
+    break;
+  case Type::FloatTyID:
+    Result.FloatVal = *((float*)Ptr);
+    break;
+  case Type::DoubleTyID:
+    Result.DoubleVal = *((double*)Ptr);
+    break;
+  case Type::PointerTyID:
+    Result.PointerVal = *((PointerTy*)Ptr);
+    break;
+  case Type::X86_FP80TyID: {
+    // This is endian dependent, but it will only work on x86 anyway.
+    // FIXME: Will not trap if loading a signaling NaN.
+    uint64_t y[2];
+    memcpy(y, Ptr, 10);
+    Result.IntVal = APInt(80, 2, y);
+    break;
+  }
+  default:
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "Cannot load value of type " << *Ty << "!";
+    report_fatal_error(OS.str());
+  }
+}
+
+void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
+  DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
+  DEBUG(Init->dump());
+  if (isa<UndefValue>(Init)) {
+    return;
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
+    unsigned ElementSize =
+      getTargetData()->getTypeAllocSize(CP->getType()->getElementType());
+    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+      InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
+    return;
+  } else if (isa<ConstantAggregateZero>(Init)) {
+    memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType()));
+    return;
+  } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
+    unsigned ElementSize =
+      getTargetData()->getTypeAllocSize(CPA->getType()->getElementType());
+    for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+      InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
+    return;
+  } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
+    const StructLayout *SL =
+      getTargetData()->getStructLayout(cast<StructType>(CPS->getType()));
+    for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+      InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
+    return;
+  } else if (Init->getType()->isFirstClassType()) {
+    GenericValue Val = getConstantValue(Init);
+    StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType());
+    return;
+  }
+
+  DEBUG(dbgs() << "Bad Type: " << *Init->getType() << "\n");
+  llvm_unreachable("Unknown constant type to initialize memory with!");
+}
+
+/// EmitGlobals - Emit all of the global variables to memory, storing their
+/// addresses into GlobalAddress.  This must make sure to copy the contents of
+/// their initializers into the memory.
+void ExecutionEngine::emitGlobals() {
+  // Loop over all of the global variables in the program, allocating the memory
+  // to hold them.  If there is more than one module, do a prepass over globals
+  // to figure out how the different modules should link together.
+  std::map<std::pair<std::string, const Type*>,
+           const GlobalValue*> LinkedGlobalsMap;
+
+  if (Modules.size() != 1) {
+    for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+      Module &M = *Modules[m];
+      for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I) {
+        const GlobalValue *GV = I;
+        if (GV->hasLocalLinkage() || GV->isDeclaration() ||
+            GV->hasAppendingLinkage() || !GV->hasName())
+          continue;// Ignore external globals and globals with internal linkage.
+
+        const GlobalValue *&GVEntry =
+          LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+
+        // If this is the first time we've seen this global, it is the canonical
+        // version.
+        if (!GVEntry) {
+          GVEntry = GV;
+          continue;
+        }
+
+        // If the existing global is strong, never replace it.
+        if (GVEntry->hasExternalLinkage() ||
+            GVEntry->hasDLLImportLinkage() ||
+            GVEntry->hasDLLExportLinkage())
+          continue;
+
+        // Otherwise, we know it's linkonce/weak, replace it if this is a strong
+        // symbol.  FIXME is this right for common?
+        if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
+          GVEntry = GV;
+      }
+    }
+  }
+
+  std::vector<const GlobalValue*> NonCanonicalGlobals;
+  for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+    Module &M = *Modules[m];
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      // In the multi-module case, see what this global maps to.
+      if (!LinkedGlobalsMap.empty()) {
+        if (const GlobalValue *GVEntry =
+              LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
+          // If something else is the canonical global, ignore this one.
+          if (GVEntry != &*I) {
+            NonCanonicalGlobals.push_back(I);
+            continue;
+          }
+        }
+      }
+
+      if (!I->isDeclaration()) {
+        addGlobalMapping(I, getMemoryForGV(I));
+      } else {
+        // External variable reference. Try to use the dynamic loader to
+        // get a pointer to it.
+        if (void *SymAddr =
+            sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
+          addGlobalMapping(I, SymAddr);
+        else {
+          report_fatal_error("Could not resolve external global address: "
+                            +I->getName());
+        }
+      }
+    }
+
+    // If there are multiple modules, map the non-canonical globals to their
+    // canonical location.
+    if (!NonCanonicalGlobals.empty()) {
+      for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) {
+        const GlobalValue *GV = NonCanonicalGlobals[i];
+        const GlobalValue *CGV =
+          LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+        void *Ptr = getPointerToGlobalIfAvailable(CGV);
+        assert(Ptr && "Canonical global wasn't codegen'd!");
+        addGlobalMapping(GV, Ptr);
+      }
+    }
+
+    // Now that all of the globals are set up in memory, loop through them all
+    // and initialize their contents.
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      if (!I->isDeclaration()) {
+        if (!LinkedGlobalsMap.empty()) {
+          if (const GlobalValue *GVEntry =
+                LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
+            if (GVEntry != &*I)  // Not the canonical variable.
+              continue;
+        }
+        EmitGlobalVariable(I);
+      }
+    }
+  }
+}
+
+// EmitGlobalVariable - This method emits the specified global variable to the
+// address specified in GlobalAddresses, or allocates new memory if it's not
+// already in the map.
+void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
+  void *GA = getPointerToGlobalIfAvailable(GV);
+
+  if (GA == 0) {
+    // If it's not already specified, allocate memory for the global.
+    GA = getMemoryForGV(GV);
+    addGlobalMapping(GV, GA);
+  }
+
+  // Don't initialize if it's thread local, let the client do it.
+  if (!GV->isThreadLocal())
+    InitializeMemory(GV->getInitializer(), GA);
+
+  const Type *ElTy = GV->getType()->getElementType();
+  size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
+  NumInitBytes += (unsigned)GVSize;
+  ++NumGlobals;
+}
+
+ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
+  : EE(EE), GlobalAddressMap(this) {
+}
+
+sys::Mutex *
+ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
+  return &EES->EE.lock;
+}
+
+void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
+                                                      const GlobalValue *Old) {
+  void *OldVal = EES->GlobalAddressMap.lookup(Old);
+  EES->GlobalAddressReverseMap.erase(OldVal);
+}
+
+void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
+                                                    const GlobalValue *,
+                                                    const GlobalValue *) {
+  assert(false && "The ExecutionEngine doesn't know how to handle a"
+         " RAUW on a value it has a global mapping for.");
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
new file mode 100644
index 000000000000..f8f1f4a78ee5
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -0,0 +1,254 @@
+//===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings for the ExecutionEngine library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstring>
+
+using namespace llvm;
+
+/*===-- Operations on generic values --------------------------------------===*/
+
+LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
+                                                unsigned long long N,
+                                                LLVMBool IsSigned) {
+  GenericValue *GenVal = new GenericValue();
+  GenVal->IntVal = APInt(unwrap<IntegerType>(Ty)->getBitWidth(), N, IsSigned);
+  return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P) {
+  GenericValue *GenVal = new GenericValue();
+  GenVal->PointerVal = P;
+  return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) {
+  GenericValue *GenVal = new GenericValue();
+  switch (unwrap(TyRef)->getTypeID()) {
+  case Type::FloatTyID:
+    GenVal->FloatVal = N;
+    break;
+  case Type::DoubleTyID:
+    GenVal->DoubleVal = N;
+    break;
+  default:
+    llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+  }
+  return wrap(GenVal);
+}
+
+unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) {
+  return unwrap(GenValRef)->IntVal.getBitWidth();
+}
+
+unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef,
+                                         LLVMBool IsSigned) {
+  GenericValue *GenVal = unwrap(GenValRef);
+  if (IsSigned)
+    return GenVal->IntVal.getSExtValue();
+  else
+    return GenVal->IntVal.getZExtValue();
+}
+
+void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal) {
+  return unwrap(GenVal)->PointerVal;
+}
+
+double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
+  switch (unwrap(TyRef)->getTypeID()) {
+  case Type::FloatTyID:
+    return unwrap(GenVal)->FloatVal;
+  case Type::DoubleTyID:
+    return unwrap(GenVal)->DoubleVal;
+  default:
+    llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+    break;
+  }
+  return 0; // Not reached
+}
+
+void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
+  delete unwrap(GenVal);
+}
+
+/*===-- Operations on execution engines -----------------------------------===*/
+
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+                                            LLVMModuleRef M,
+                                            char **OutError) {
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::Either)
+         .setErrorStr(&Error);
+  if (ExecutionEngine *EE = builder.create()){
+    *OutEE = wrap(EE);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+                                        LLVMModuleRef M,
+                                        char **OutError) {
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::Interpreter)
+         .setErrorStr(&Error);
+  if (ExecutionEngine *Interp = builder.create()) {
+    *OutInterp = wrap(Interp);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+                                        LLVMModuleRef M,
+                                        unsigned OptLevel,
+                                        char **OutError) {
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::JIT)
+         .setErrorStr(&Error)
+         .setOptLevel((CodeGenOpt::Level)OptLevel);
+  if (ExecutionEngine *JIT = builder.create()) {
+    *OutJIT = wrap(JIT);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+                                   LLVMModuleProviderRef MP,
+                                   char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateExecutionEngineForModule(OutEE,
+                                            reinterpret_cast<LLVMModuleRef>(MP),
+                                            OutError);
+}
+
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+                               LLVMModuleProviderRef MP,
+                               char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateInterpreterForModule(OutInterp,
+                                        reinterpret_cast<LLVMModuleRef>(MP),
+                                        OutError);
+}
+
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+                               LLVMModuleProviderRef MP,
+                               unsigned OptLevel,
+                               char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateJITCompilerForModule(OutJIT,
+                                        reinterpret_cast<LLVMModuleRef>(MP),
+                                        OptLevel, OutError);
+}
+
+
+void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
+  delete unwrap(EE);
+}
+
+void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE) {
+  unwrap(EE)->runStaticConstructorsDestructors(false);
+}
+
+void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
+  unwrap(EE)->runStaticConstructorsDestructors(true);
+}
+
+int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
+                          unsigned ArgC, const char * const *ArgV,
+                          const char * const *EnvP) {
+  std::vector<std::string> ArgVec;
+  for (unsigned I = 0; I != ArgC; ++I)
+    ArgVec.push_back(ArgV[I]);
+  
+  return unwrap(EE)->runFunctionAsMain(unwrap<Function>(F), ArgVec, EnvP);
+}
+
+LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
+                                    unsigned NumArgs,
+                                    LLVMGenericValueRef *Args) {
+  std::vector<GenericValue> ArgVec;
+  ArgVec.reserve(NumArgs);
+  for (unsigned I = 0; I != NumArgs; ++I)
+    ArgVec.push_back(*unwrap(Args[I]));
+  
+  GenericValue *Result = new GenericValue();
+  *Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec);
+  return wrap(Result);
+}
+
+void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
+  unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
+}
+
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
+  unwrap(EE)->addModule(unwrap(M));
+}
+
+void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
+  /* The module provider is now actually a module. */
+  LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
+}
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+                          LLVMModuleRef *OutMod, char **OutError) {
+  Module *Mod = unwrap(M);
+  unwrap(EE)->removeModule(Mod);
+  *OutMod = wrap(Mod);
+  return 0;
+}
+
+LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+                                  LLVMModuleProviderRef MP,
+                                  LLVMModuleRef *OutMod, char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
+                          OutError);
+}
+
+LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+                          LLVMValueRef *OutFn) {
+  if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
+    *OutFn = wrap(F);
+    return 0;
+  }
+  return 1;
+}
+
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+  return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
+}
+
+LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
+  return wrap(unwrap(EE)->getTargetData());
+}
+
+void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
+                          void* Addr) {
+  unwrap(EE)->addGlobalMapping(unwrap<GlobalValue>(Global), Addr);
+}
+
+void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+  return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
new file mode 100644
index 000000000000..498063bf6555
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -0,0 +1,1350 @@
+//===-- Execution.cpp - Implement code to simulate the program ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains the actual instruction interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "interpreter"
+#include "Interpreter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
+
+static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
+          cl::desc("make the interpreter print every volatile load and store"));
+
+//===----------------------------------------------------------------------===//
+//                     Various Helper Functions
+//===----------------------------------------------------------------------===//
+
+static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
+  SF.Values[V] = Val;
+}
+
+//===----------------------------------------------------------------------===//
+//                    Binary Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+#define IMPLEMENT_BINARY_OPERATOR(OP, TY) \
+   case Type::TY##TyID: \
+     Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \
+     break
+
+static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(+, Float);
+    IMPLEMENT_BINARY_OPERATOR(+, Double);
+  default:
+    dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(-, Float);
+    IMPLEMENT_BINARY_OPERATOR(-, Double);
+  default:
+    dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(*, Float);
+    IMPLEMENT_BINARY_OPERATOR(*, Double);
+  default:
+    dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFDivInst(GenericValue &Dest, GenericValue Src1, 
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(/, Float);
+    IMPLEMENT_BINARY_OPERATOR(/, Double);
+  default:
+    dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFRemInst(GenericValue &Dest, GenericValue Src1, 
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::FloatTyID:
+    Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal);
+    break;
+  case Type::DoubleTyID:
+    Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal);
+    break;
+  default:
+    dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+#define IMPLEMENT_INTEGER_ICMP(OP, TY) \
+   case Type::IntegerTyID:  \
+      Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
+      break;
+
+// Handle pointers specially because they must be compared with only as much
+// width as the host has.  We _do not_ want to be comparing 64 bit values when
+// running on a 32-bit target, otherwise the upper 32 bits might mess up
+// comparisons if they contain garbage.
+#define IMPLEMENT_POINTER_ICMP(OP) \
+   case Type::PointerTyID: \
+      Dest.IntVal = APInt(1,(void*)(intptr_t)Src1.PointerVal OP \
+                            (void*)(intptr_t)Src2.PointerVal); \
+      break;
+
+static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(eq,Ty);
+    IMPLEMENT_POINTER_ICMP(==);
+  default:
+    dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ne,Ty);
+    IMPLEMENT_POINTER_ICMP(!=);
+  default:
+    dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ult,Ty);
+    IMPLEMENT_POINTER_ICMP(<);
+  default:
+    dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(slt,Ty);
+    IMPLEMENT_POINTER_ICMP(<);
+  default:
+    dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+    IMPLEMENT_POINTER_ICMP(>);
+  default:
+    dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+    IMPLEMENT_POINTER_ICMP(>);
+  default:
+    dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ule,Ty);
+    IMPLEMENT_POINTER_ICMP(<=);
+  default:
+    dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(sle,Ty);
+    IMPLEMENT_POINTER_ICMP(<=);
+  default:
+    dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(uge,Ty);
+    IMPLEMENT_POINTER_ICMP(>=);
+  default:
+    dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(sge,Ty);
+    IMPLEMENT_POINTER_ICMP(>=);
+  default:
+    dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+void Interpreter::visitICmpInst(ICmpInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *Ty    = I.getOperand(0)->getType();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue R;   // Result
+  
+  switch (I.getPredicate()) {
+  case ICmpInst::ICMP_EQ:  R = executeICMP_EQ(Src1,  Src2, Ty); break;
+  case ICmpInst::ICMP_NE:  R = executeICMP_NE(Src1,  Src2, Ty); break;
+  case ICmpInst::ICMP_ULT: R = executeICMP_ULT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SLT: R = executeICMP_SLT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_UGT: R = executeICMP_UGT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SGT: R = executeICMP_SGT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_ULE: R = executeICMP_ULE(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SLE: R = executeICMP_SLE(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
+  default:
+    dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  }
+ 
+  SetValue(&I, R, SF);
+}
+
+#define IMPLEMENT_FCMP(OP, TY) \
+   case Type::TY##TyID: \
+     Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
+     break
+
+static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(==, Float);
+    IMPLEMENT_FCMP(==, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(!=, Float);
+    IMPLEMENT_FCMP(!=, Double);
+
+  default:
+    dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(<=, Float);
+    IMPLEMENT_FCMP(<=, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(>=, Float);
+    IMPLEMENT_FCMP(>=, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(<, Float);
+    IMPLEMENT_FCMP(<, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(>, Float);
+    IMPLEMENT_FCMP(>, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+#define IMPLEMENT_UNORDERED(TY, X,Y)                                     \
+  if (TY->isFloatTy()) {                                                 \
+    if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) {          \
+      Dest.IntVal = APInt(1,true);                                       \
+      return Dest;                                                       \
+    }                                                                    \
+  } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \
+    Dest.IntVal = APInt(1,true);                                         \
+    return Dest;                                                         \
+  }
+
+
+static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OEQ(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_ONE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OLE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OGE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OLT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OGT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  if (Ty->isFloatTy())
+    Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && 
+                           Src2.FloatVal == Src2.FloatVal));
+  else
+    Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && 
+                           Src2.DoubleVal == Src2.DoubleVal));
+  return Dest;
+}
+
+static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  if (Ty->isFloatTy())
+    Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || 
+                           Src2.FloatVal != Src2.FloatVal));
+  else
+    Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || 
+                           Src2.DoubleVal != Src2.DoubleVal));
+  return Dest;
+}
+
+void Interpreter::visitFCmpInst(FCmpInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *Ty    = I.getOperand(0)->getType();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue R;   // Result
+  
+  switch (I.getPredicate()) {
+  case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
+  case FCmpInst::FCMP_TRUE:  R.IntVal = APInt(1,true); break;
+  case FCmpInst::FCMP_ORD:   R = executeFCMP_ORD(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UNO:   R = executeFCMP_UNO(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UEQ:   R = executeFCMP_UEQ(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OEQ:   R = executeFCMP_OEQ(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UNE:   R = executeFCMP_UNE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_ONE:   R = executeFCMP_ONE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_ULT:   R = executeFCMP_ULT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OLT:   R = executeFCMP_OLT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UGT:   R = executeFCMP_UGT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OGT:   R = executeFCMP_OGT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_ULE:   R = executeFCMP_ULE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OLE:   R = executeFCMP_OLE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
+  default:
+    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  }
+ 
+  SetValue(&I, R, SF);
+}
+
+static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, 
+                                   GenericValue Src2, const Type *Ty) {
+  GenericValue Result;
+  switch (predicate) {
+  case ICmpInst::ICMP_EQ:    return executeICMP_EQ(Src1, Src2, Ty);
+  case ICmpInst::ICMP_NE:    return executeICMP_NE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_UGT:   return executeICMP_UGT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SGT:   return executeICMP_SGT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_ULT:   return executeICMP_ULT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SLT:   return executeICMP_SLT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_UGE:   return executeICMP_UGE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SGE:   return executeICMP_SGE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_ULE:   return executeICMP_ULE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SLE:   return executeICMP_SLE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ORD:   return executeFCMP_ORD(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UNO:   return executeFCMP_UNO(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OEQ:   return executeFCMP_OEQ(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UEQ:   return executeFCMP_UEQ(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ONE:   return executeFCMP_ONE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UNE:   return executeFCMP_UNE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OLT:   return executeFCMP_OLT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ULT:   return executeFCMP_ULT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OGT:   return executeFCMP_OGT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UGT:   return executeFCMP_UGT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OLE:   return executeFCMP_OLE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ULE:   return executeFCMP_ULE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OGE:   return executeFCMP_OGE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UGE:   return executeFCMP_UGE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_FALSE: { 
+    GenericValue Result;
+    Result.IntVal = APInt(1, false);
+    return Result;
+  }
+  case FCmpInst::FCMP_TRUE: {
+    GenericValue Result;
+    Result.IntVal = APInt(1, true);
+    return Result;
+  }
+  default:
+    dbgs() << "Unhandled Cmp predicate\n";
+    llvm_unreachable(0);
+  }
+}
+
+void Interpreter::visitBinaryOperator(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *Ty    = I.getOperand(0)->getType();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue R;   // Result
+
+  switch (I.getOpcode()) {
+  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
+  case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+  case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+  case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+  case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+  case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
+  case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
+  case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+  default:
+    dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+    llvm_unreachable(0);
+  }
+
+  SetValue(&I, R, SF);
+}
+
+static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
+                                      GenericValue Src3) {
+  return Src1.IntVal == 0 ? Src3 : Src2;
+}
+
+void Interpreter::visitSelectInst(SelectInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+  GenericValue R = executeSelectInst(Src1, Src2, Src3);
+  SetValue(&I, R, SF);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                     Terminator Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::exitCalled(GenericValue GV) {
+  // runAtExitHandlers() assumes there are no stack frames, but
+  // if exit() was called, then it had a stack frame. Blow away
+  // the stack before interpreting atexit handlers.
+  ECStack.clear();
+  runAtExitHandlers();
+  exit(GV.IntVal.zextOrTrunc(32).getZExtValue());
+}
+
+/// Pop the last stack frame off of ECStack and then copy the result
+/// back into the result variable if we are not returning void. The
+/// result variable may be the ExitValue, or the Value of the calling
+/// CallInst if there was a previous stack frame. This method may
+/// invalidate any ECStack iterators you have. This method also takes
+/// care of switching to the normal destination BB, if we are returning
+/// from an invoke.
+///
+void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
+                                                 GenericValue Result) {
+  // Pop the current stack frame.
+  ECStack.pop_back();
+
+  if (ECStack.empty()) {  // Finished main.  Put result into exit code...
+    if (RetTy && !RetTy->isVoidTy()) {          // Nonvoid return type?
+      ExitValue = Result;   // Capture the exit value of the program
+    } else {
+      memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+    }
+  } else {
+    // If we have a previous stack frame, and we have a previous call,
+    // fill in the return value...
+    ExecutionContext &CallingSF = ECStack.back();
+    if (Instruction *I = CallingSF.Caller.getInstruction()) {
+      // Save result...
+      if (!CallingSF.Caller.getType()->isVoidTy())
+        SetValue(I, Result, CallingSF);
+      if (InvokeInst *II = dyn_cast<InvokeInst> (I))
+        SwitchToNewBasicBlock (II->getNormalDest (), CallingSF);
+      CallingSF.Caller = CallSite();          // We returned from the call...
+    }
+  }
+}
+
+void Interpreter::visitReturnInst(ReturnInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *RetTy = Type::getVoidTy(I.getContext());
+  GenericValue Result;
+
+  // Save away the return value... (if we are not 'ret void')
+  if (I.getNumOperands()) {
+    RetTy  = I.getReturnValue()->getType();
+    Result = getOperandValue(I.getReturnValue(), SF);
+  }
+
+  popStackAndReturnValueToCaller(RetTy, Result);
+}
+
+void Interpreter::visitUnwindInst(UnwindInst &I) {
+  // Unwind stack
+  Instruction *Inst;
+  do {
+    ECStack.pop_back();
+    if (ECStack.empty())
+      report_fatal_error("Empty stack during unwind!");
+    Inst = ECStack.back().Caller.getInstruction();
+  } while (!(Inst && isa<InvokeInst>(Inst)));
+
+  // Return from invoke
+  ExecutionContext &InvokingSF = ECStack.back();
+  InvokingSF.Caller = CallSite();
+
+  // Go to exceptional destination BB of invoke instruction
+  SwitchToNewBasicBlock(cast<InvokeInst>(Inst)->getUnwindDest(), InvokingSF);
+}
+
+void Interpreter::visitUnreachableInst(UnreachableInst &I) {
+  report_fatal_error("Program executed an 'unreachable' instruction!");
+}
+
+void Interpreter::visitBranchInst(BranchInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  BasicBlock *Dest;
+
+  Dest = I.getSuccessor(0);          // Uncond branches have a fixed dest...
+  if (!I.isUnconditional()) {
+    Value *Cond = I.getCondition();
+    if (getOperandValue(Cond, SF).IntVal == 0) // If false cond...
+      Dest = I.getSuccessor(1);
+  }
+  SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitSwitchInst(SwitchInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue CondVal = getOperandValue(I.getOperand(0), SF);
+  const Type *ElTy = I.getOperand(0)->getType();
+
+  // Check to see if any of the cases match...
+  BasicBlock *Dest = 0;
+  for (unsigned i = 2, e = I.getNumOperands(); i != e; i += 2)
+    if (executeICMP_EQ(CondVal, getOperandValue(I.getOperand(i), SF), ElTy)
+        .IntVal != 0) {
+      Dest = cast<BasicBlock>(I.getOperand(i+1));
+      break;
+    }
+
+  if (!Dest) Dest = I.getDefaultDest();   // No cases matched: use default
+  SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitIndirectBrInst(IndirectBrInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  void *Dest = GVTOP(getOperandValue(I.getAddress(), SF));
+  SwitchToNewBasicBlock((BasicBlock*)Dest, SF);
+}
+
+
+// SwitchToNewBasicBlock - This method is used to jump to a new basic block.
+// This function handles the actual updating of block and instruction iterators
+// as well as execution of all of the PHI nodes in the destination block.
+//
+// This method does this because all of the PHI nodes must be executed
+// atomically, reading their inputs before any of the results are updated.  Not
+// doing this can cause problems if the PHI nodes depend on other PHI nodes for
+// their inputs.  If the input PHI node is updated before it is read, incorrect
+// results can happen.  Thus we use a two phase approach.
+//
+void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){
+  BasicBlock *PrevBB = SF.CurBB;      // Remember where we came from...
+  SF.CurBB   = Dest;                  // Update CurBB to branch destination
+  SF.CurInst = SF.CurBB->begin();     // Update new instruction ptr...
+
+  if (!isa<PHINode>(SF.CurInst)) return;  // Nothing fancy to do
+
+  // Loop over all of the PHI nodes in the current block, reading their inputs.
+  std::vector<GenericValue> ResultValues;
+
+  for (; PHINode *PN = dyn_cast<PHINode>(SF.CurInst); ++SF.CurInst) {
+    // Search for the value corresponding to this previous bb...
+    int i = PN->getBasicBlockIndex(PrevBB);
+    assert(i != -1 && "PHINode doesn't contain entry for predecessor??");
+    Value *IncomingValue = PN->getIncomingValue(i);
+
+    // Save the incoming value for this PHI node...
+    ResultValues.push_back(getOperandValue(IncomingValue, SF));
+  }
+
+  // Now loop over all of the PHI nodes setting their values...
+  SF.CurInst = SF.CurBB->begin();
+  for (unsigned i = 0; isa<PHINode>(SF.CurInst); ++SF.CurInst, ++i) {
+    PHINode *PN = cast<PHINode>(SF.CurInst);
+    SetValue(PN, ResultValues[i], SF);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                     Memory Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitAllocaInst(AllocaInst &I) {
+  ExecutionContext &SF = ECStack.back();
+
+  const Type *Ty = I.getType()->getElementType();  // Type to be allocated
+
+  // Get the number of elements being allocated by the array...
+  unsigned NumElements = 
+    getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
+
+  unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty);
+
+  // Avoid malloc-ing zero bytes, use max()...
+  unsigned MemToAlloc = std::max(1U, NumElements * TypeSize);
+
+  // Allocate enough memory to hold the type...
+  void *Memory = malloc(MemToAlloc);
+
+  DEBUG(dbgs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " 
+               << NumElements << " (Total: " << MemToAlloc << ") at "
+               << uintptr_t(Memory) << '\n');
+
+  GenericValue Result = PTOGV(Memory);
+  assert(Result.PointerVal != 0 && "Null pointer returned by malloc!");
+  SetValue(&I, Result, SF);
+
+  if (I.getOpcode() == Instruction::Alloca)
+    ECStack.back().Allocas.add(Memory);
+}
+
+// getElementOffset - The workhorse for getelementptr.
+//
+GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
+                                              gep_type_iterator E,
+                                              ExecutionContext &SF) {
+  assert(Ptr->getType()->isPointerTy() &&
+         "Cannot getElementOffset of a nonpointer type!");
+
+  uint64_t Total = 0;
+
+  for (; I != E; ++I) {
+    if (const StructType *STy = dyn_cast<StructType>(*I)) {
+      const StructLayout *SLO = TD.getStructLayout(STy);
+
+      const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
+      unsigned Index = unsigned(CPU->getZExtValue());
+
+      Total += SLO->getElementOffset(Index);
+    } else {
+      const SequentialType *ST = cast<SequentialType>(*I);
+      // Get the index number for the array... which must be long type...
+      GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
+
+      int64_t Idx;
+      unsigned BitWidth = 
+        cast<IntegerType>(I.getOperand()->getType())->getBitWidth();
+      if (BitWidth == 32)
+        Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue();
+      else {
+        assert(BitWidth == 64 && "Invalid index type for getelementptr");
+        Idx = (int64_t)IdxGV.IntVal.getZExtValue();
+      }
+      Total += TD.getTypeAllocSize(ST->getElementType())*Idx;
+    }
+  }
+
+  GenericValue Result;
+  Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total;
+  DEBUG(dbgs() << "GEP Index " << Total << " bytes.\n");
+  return Result;
+}
+
+void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeGEPOperation(I.getPointerOperand(),
+                                   gep_type_begin(I), gep_type_end(I), SF), SF);
+}
+
+void Interpreter::visitLoadInst(LoadInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+  GenericValue *Ptr = (GenericValue*)GVTOP(SRC);
+  GenericValue Result;
+  LoadValueFromMemory(Result, Ptr, I.getType());
+  SetValue(&I, Result, SF);
+  if (I.isVolatile() && PrintVolatile)
+    dbgs() << "Volatile load " << I;
+}
+
+void Interpreter::visitStoreInst(StoreInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Val = getOperandValue(I.getOperand(0), SF);
+  GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+  StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC),
+                     I.getOperand(0)->getType());
+  if (I.isVolatile() && PrintVolatile)
+    dbgs() << "Volatile store: " << I;
+}
+
+//===----------------------------------------------------------------------===//
+//                 Miscellaneous Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitCallSite(CallSite CS) {
+  ExecutionContext &SF = ECStack.back();
+
+  // Check to see if this is an intrinsic function call...
+  Function *F = CS.getCalledFunction();
+  if (F && F->isDeclaration())
+    switch (F->getIntrinsicID()) {
+    case Intrinsic::not_intrinsic:
+      break;
+    case Intrinsic::vastart: { // va_start
+      GenericValue ArgIndex;
+      ArgIndex.UIntPairVal.first = ECStack.size() - 1;
+      ArgIndex.UIntPairVal.second = 0;
+      SetValue(CS.getInstruction(), ArgIndex, SF);
+      return;
+    }
+    case Intrinsic::vaend:    // va_end is a noop for the interpreter
+      return;
+    case Intrinsic::vacopy:   // va_copy: dest = src
+      SetValue(CS.getInstruction(), getOperandValue(*CS.arg_begin(), SF), SF);
+      return;
+    default:
+      // If it is an unknown intrinsic function, use the intrinsic lowering
+      // class to transform it into hopefully tasty LLVM code.
+      //
+      BasicBlock::iterator me(CS.getInstruction());
+      BasicBlock *Parent = CS.getInstruction()->getParent();
+      bool atBegin(Parent->begin() == me);
+      if (!atBegin)
+        --me;
+      IL->LowerIntrinsicCall(cast<CallInst>(CS.getInstruction()));
+
+      // Restore the CurInst pointer to the first instruction newly inserted, if
+      // any.
+      if (atBegin) {
+        SF.CurInst = Parent->begin();
+      } else {
+        SF.CurInst = me;
+        ++SF.CurInst;
+      }
+      return;
+    }
+
+
+  SF.Caller = CS;
+  std::vector<GenericValue> ArgVals;
+  const unsigned NumArgs = SF.Caller.arg_size();
+  ArgVals.reserve(NumArgs);
+  uint16_t pNum = 1;
+  for (CallSite::arg_iterator i = SF.Caller.arg_begin(),
+         e = SF.Caller.arg_end(); i != e; ++i, ++pNum) {
+    Value *V = *i;
+    ArgVals.push_back(getOperandValue(V, SF));
+  }
+
+  // To handle indirect calls, we must get the pointer value from the argument
+  // and treat it as a function pointer.
+  GenericValue SRC = getOperandValue(SF.Caller.getCalledValue(), SF);
+  callFunction((Function*)GVTOP(SRC), ArgVals);
+}
+
+void Interpreter::visitShl(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+    Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
+  else
+    Dest.IntVal = Src1.IntVal;
+  
+  SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitLShr(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+    Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
+  else
+    Dest.IntVal = Src1.IntVal;
+  
+  SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitAShr(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+    Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
+  else
+    Dest.IntVal = Src1.IntVal;
+  
+  SetValue(&I, Dest, SF);
+}
+
+GenericValue Interpreter::executeTruncInst(Value *SrcVal, const Type *DstTy,
+                                           ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  const IntegerType *DITy = cast<IntegerType>(DstTy);
+  unsigned DBitWidth = DITy->getBitWidth();
+  Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeSExtInst(Value *SrcVal, const Type *DstTy,
+                                          ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  const IntegerType *DITy = cast<IntegerType>(DstTy);
+  unsigned DBitWidth = DITy->getBitWidth();
+  Dest.IntVal = Src.IntVal.sext(DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy,
+                                          ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  const IntegerType *DITy = cast<IntegerType>(DstTy);
+  unsigned DBitWidth = DITy->getBitWidth();
+  Dest.IntVal = Src.IntVal.zext(DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+                                             ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
+         "Invalid FPTrunc instruction");
+  Dest.FloatVal = (float) Src.DoubleVal;
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy,
+                                           ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
+         "Invalid FPTrunc instruction");
+  Dest.DoubleVal = (double) Src.FloatVal;
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
+  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+
+  if (SrcTy->getTypeID() == Type::FloatTyID)
+    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+  else
+    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
+  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+
+  if (SrcTy->getTypeID() == Type::FloatTyID)
+    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+  else
+    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+
+  if (DstTy->getTypeID() == Type::FloatTyID)
+    Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
+  else
+    Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+  return Dest;
+}
+
+GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+
+  if (DstTy->getTypeID() == Type::FloatTyID)
+    Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
+  else
+    Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
+  return Dest;
+
+}
+
+GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+                                              ExecutionContext &SF) {
+  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction");
+
+  Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
+  return Dest;
+}
+
+GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+                                              ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
+
+  uint32_t PtrSize = TD.getPointerSizeInBits();
+  if (PtrSize != Src.IntVal.getBitWidth())
+    Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
+
+  Dest.PointerVal = PointerTy(intptr_t(Src.IntVal.getZExtValue()));
+  return Dest;
+}
+
+GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
+                                             ExecutionContext &SF) {
+  
+  const Type *SrcTy = SrcVal->getType();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  if (DstTy->isPointerTy()) {
+    assert(SrcTy->isPointerTy() && "Invalid BitCast");
+    Dest.PointerVal = Src.PointerVal;
+  } else if (DstTy->isIntegerTy()) {
+    if (SrcTy->isFloatTy()) {
+      Dest.IntVal = APInt::floatToBits(Src.FloatVal);
+    } else if (SrcTy->isDoubleTy()) {
+      Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
+    } else if (SrcTy->isIntegerTy()) {
+      Dest.IntVal = Src.IntVal;
+    } else 
+      llvm_unreachable("Invalid BitCast");
+  } else if (DstTy->isFloatTy()) {
+    if (SrcTy->isIntegerTy())
+      Dest.FloatVal = Src.IntVal.bitsToFloat();
+    else
+      Dest.FloatVal = Src.FloatVal;
+  } else if (DstTy->isDoubleTy()) {
+    if (SrcTy->isIntegerTy())
+      Dest.DoubleVal = Src.IntVal.bitsToDouble();
+    else
+      Dest.DoubleVal = Src.DoubleVal;
+  } else
+    llvm_unreachable("Invalid Bitcast");
+
+  return Dest;
+}
+
+void Interpreter::visitTruncInst(TruncInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSExtInst(SExtInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeSExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitZExtInst(ZExtInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeZExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPTruncInst(FPTruncInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPExtInst(FPExtInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitUIToFPInst(UIToFPInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeUIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSIToFPInst(SIToFPInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeSIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToUIInst(FPToUIInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPToUIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToSIInst(FPToSIInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPToSIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitPtrToIntInst(PtrToIntInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executePtrToIntInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitIntToPtrInst(IntToPtrInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeIntToPtrInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitBitCastInst(BitCastInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeBitCastInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+#define IMPLEMENT_VAARG(TY) \
+   case Type::TY##TyID: Dest.TY##Val = Src.TY##Val; break
+
+void Interpreter::visitVAArgInst(VAArgInst &I) {
+  ExecutionContext &SF = ECStack.back();
+
+  // Get the incoming valist parameter.  LLI treats the valist as a
+  // (ec-stack-depth var-arg-index) pair.
+  GenericValue VAList = getOperandValue(I.getOperand(0), SF);
+  GenericValue Dest;
+  GenericValue Src = ECStack[VAList.UIntPairVal.first]
+                      .VarArgs[VAList.UIntPairVal.second];
+  const Type *Ty = I.getType();
+  switch (Ty->getTypeID()) {
+    case Type::IntegerTyID: Dest.IntVal = Src.IntVal;
+    IMPLEMENT_VAARG(Pointer);
+    IMPLEMENT_VAARG(Float);
+    IMPLEMENT_VAARG(Double);
+  default:
+    dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+
+  // Set the Value of this Instruction.
+  SetValue(&I, Dest, SF);
+
+  // Move the pointer to the next vararg.
+  ++VAList.UIntPairVal.second;
+}
+
+GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
+                                                ExecutionContext &SF) {
+  switch (CE->getOpcode()) {
+  case Instruction::Trunc:   
+      return executeTruncInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::ZExt:
+      return executeZExtInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::SExt:
+      return executeSExtInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPTrunc:
+      return executeFPTruncInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPExt:
+      return executeFPExtInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::UIToFP:
+      return executeUIToFPInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::SIToFP:
+      return executeSIToFPInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPToUI:
+      return executeFPToUIInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPToSI:
+      return executeFPToSIInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::PtrToInt:
+      return executePtrToIntInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::IntToPtr:
+      return executeIntToPtrInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::BitCast:
+      return executeBitCastInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::GetElementPtr:
+    return executeGEPOperation(CE->getOperand(0), gep_type_begin(CE),
+                               gep_type_end(CE), SF);
+  case Instruction::FCmp:
+  case Instruction::ICmp:
+    return executeCmpInst(CE->getPredicate(),
+                          getOperandValue(CE->getOperand(0), SF),
+                          getOperandValue(CE->getOperand(1), SF),
+                          CE->getOperand(0)->getType());
+  case Instruction::Select:
+    return executeSelectInst(getOperandValue(CE->getOperand(0), SF),
+                             getOperandValue(CE->getOperand(1), SF),
+                             getOperandValue(CE->getOperand(2), SF));
+  default :
+    break;
+  }
+
+  // The cases below here require a GenericValue parameter for the result
+  // so we initialize one, compute it and then return it.
+  GenericValue Op0 = getOperandValue(CE->getOperand(0), SF);
+  GenericValue Op1 = getOperandValue(CE->getOperand(1), SF);
+  GenericValue Dest;
+  const Type * Ty = CE->getOperand(0)->getType();
+  switch (CE->getOpcode()) {
+  case Instruction::Add:  Dest.IntVal = Op0.IntVal + Op1.IntVal; break;
+  case Instruction::Sub:  Dest.IntVal = Op0.IntVal - Op1.IntVal; break;
+  case Instruction::Mul:  Dest.IntVal = Op0.IntVal * Op1.IntVal; break;
+  case Instruction::FAdd: executeFAddInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FSub: executeFSubInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FMul: executeFMulInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break;
+  case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break;
+  case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break;
+  case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break;
+  case Instruction::And:  Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
+  case Instruction::Or:   Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
+  case Instruction::Xor:  Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
+  case Instruction::Shl:  
+    Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
+    break;
+  case Instruction::LShr: 
+    Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue());
+    break;
+  case Instruction::AShr: 
+    Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
+    break;
+  default:
+    dbgs() << "Unhandled ConstantExpr: " << *CE << "\n";
+    llvm_unreachable(0);
+    return GenericValue();
+  }
+  return Dest;
+}
+
+GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    return getConstantExprValue(CE, SF);
+  } else if (Constant *CPV = dyn_cast<Constant>(V)) {
+    return getConstantValue(CPV);
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    return PTOGV(getPointerToGlobal(GV));
+  } else {
+    return SF.Values[V];
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                        Dispatch and Execution Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// callFunction - Execute the specified function...
+//
+void Interpreter::callFunction(Function *F,
+                               const std::vector<GenericValue> &ArgVals) {
+  assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 ||
+          ECStack.back().Caller.arg_size() == ArgVals.size()) &&
+         "Incorrect number of arguments passed into function call!");
+  // Make a new stack frame... and fill it in.
+  ECStack.push_back(ExecutionContext());
+  ExecutionContext &StackFrame = ECStack.back();
+  StackFrame.CurFunction = F;
+
+  // Special handling for external functions.
+  if (F->isDeclaration()) {
+    GenericValue Result = callExternalFunction (F, ArgVals);
+    // Simulate a 'ret' instruction of the appropriate type.
+    popStackAndReturnValueToCaller (F->getReturnType (), Result);
+    return;
+  }
+
+  // Get pointers to first LLVM BB & Instruction in function.
+  StackFrame.CurBB     = F->begin();
+  StackFrame.CurInst   = StackFrame.CurBB->begin();
+
+  // Run through the function arguments and initialize their values...
+  assert((ArgVals.size() == F->arg_size() ||
+         (ArgVals.size() > F->arg_size() && F->getFunctionType()->isVarArg()))&&
+         "Invalid number of values passed to function invocation!");
+
+  // Handle non-varargs arguments...
+  unsigned i = 0;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); 
+       AI != E; ++AI, ++i)
+    SetValue(AI, ArgVals[i], StackFrame);
+
+  // Handle varargs arguments...
+  StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end());
+}
+
+
+void Interpreter::run() {
+  while (!ECStack.empty()) {
+    // Interpret a single instruction & increment the "PC".
+    ExecutionContext &SF = ECStack.back();  // Current stack frame
+    Instruction &I = *SF.CurInst++;         // Increment before execute
+
+    // Track the number of dynamic instructions executed.
+    ++NumDynamicInsts;
+
+    DEBUG(dbgs() << "About to interpret: " << I);
+    visit(I);   // Dispatch to one of the visit* methods...
+#if 0
+    // This is not safe, as visiting the instruction could lower it and free I.
+DEBUG(
+    if (!isa<CallInst>(I) && !isa<InvokeInst>(I) && 
+        I.getType() != Type::VoidTy) {
+      dbgs() << "  --> ";
+      const GenericValue &Val = SF.Values[&I];
+      switch (I.getType()->getTypeID()) {
+      default: llvm_unreachable("Invalid GenericValue Type");
+      case Type::VoidTyID:    dbgs() << "void"; break;
+      case Type::FloatTyID:   dbgs() << "float " << Val.FloatVal; break;
+      case Type::DoubleTyID:  dbgs() << "double " << Val.DoubleVal; break;
+      case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal);
+        break;
+      case Type::IntegerTyID: 
+        dbgs() << "i" << Val.IntVal.getBitWidth() << " "
+               << Val.IntVal.toStringUnsigned(10)
+               << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
+        break;
+      }
+    });
+#endif
+  }
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
new file mode 100644
index 000000000000..f7e2a4df951e
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -0,0 +1,491 @@
+//===-- ExternalFunctions.cpp - Implement External Functions --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains both code to deal with invoking "external" functions, but
+//  also contains code that implements "exported" external functions.
+//
+//  There are currently two mechanisms for handling external functions in the
+//  Interpreter.  The first is to implement lle_* wrapper functions that are
+//  specific to well-known library functions which manually translate the
+//  arguments from GenericValues and make the call.  If such a wrapper does
+//  not exist, and libffi is available, then the Interpreter will attempt to
+//  invoke the function using libffi, after finding its address.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Config/config.h"     // Detect libffi
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include <csignal>
+#include <cstdio>
+#include <map>
+#include <cmath>
+#include <cstring>
+
+#ifdef HAVE_FFI_CALL
+#ifdef HAVE_FFI_H
+#include <ffi.h>
+#define USE_LIBFFI
+#elif HAVE_FFI_FFI_H
+#include <ffi/ffi.h>
+#define USE_LIBFFI
+#endif
+#endif
+
+using namespace llvm;
+
+static ManagedStatic<sys::Mutex> FunctionsLock;
+
+typedef GenericValue (*ExFunc)(const FunctionType *,
+                               const std::vector<GenericValue> &);
+static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
+static std::map<std::string, ExFunc> FuncNames;
+
+#ifdef USE_LIBFFI
+typedef void (*RawFunc)();
+static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
+#endif
+
+static Interpreter *TheInterpreter;
+
+static char getTypeID(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:    return 'V';
+  case Type::IntegerTyID:
+    switch (cast<IntegerType>(Ty)->getBitWidth()) {
+      case 1:  return 'o';
+      case 8:  return 'B';
+      case 16: return 'S';
+      case 32: return 'I';
+      case 64: return 'L';
+      default: return 'N';
+    }
+  case Type::FloatTyID:   return 'F';
+  case Type::DoubleTyID:  return 'D';
+  case Type::PointerTyID: return 'P';
+  case Type::FunctionTyID:return 'M';
+  case Type::StructTyID:  return 'T';
+  case Type::ArrayTyID:   return 'A';
+  default: return 'U';
+  }
+}
+
+// Try to find address of external function given a Function object.
+// Please note, that interpreter doesn't know how to assemble a
+// real call in general case (this is JIT job), that's why it assumes,
+// that all external functions has the same (and pretty "general") signature.
+// The typical example of such functions are "lle_X_" ones.
+static ExFunc lookupFunction(const Function *F) {
+  // Function not found, look it up... start by figuring out what the
+  // composite function name should be.
+  std::string ExtName = "lle_";
+  const FunctionType *FT = F->getFunctionType();
+  for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
+    ExtName += getTypeID(FT->getContainedType(i));
+  ExtName + "_" + F->getNameStr();
+
+  sys::ScopedLock Writer(*FunctionsLock);
+  ExFunc FnPtr = FuncNames[ExtName];
+  if (FnPtr == 0)
+    FnPtr = FuncNames["lle_X_" + F->getNameStr()];
+  if (FnPtr == 0)  // Try calling a generic function... if it exists...
+    FnPtr = (ExFunc)(intptr_t)
+      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr());
+  if (FnPtr != 0)
+    ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
+  return FnPtr;
+}
+
+#ifdef USE_LIBFFI
+static ffi_type *ffiTypeFor(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    case Type::VoidTyID: return &ffi_type_void;
+    case Type::IntegerTyID:
+      switch (cast<IntegerType>(Ty)->getBitWidth()) {
+        case 8:  return &ffi_type_sint8;
+        case 16: return &ffi_type_sint16;
+        case 32: return &ffi_type_sint32;
+        case 64: return &ffi_type_sint64;
+      }
+    case Type::FloatTyID:   return &ffi_type_float;
+    case Type::DoubleTyID:  return &ffi_type_double;
+    case Type::PointerTyID: return &ffi_type_pointer;
+    default: break;
+  }
+  // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+  report_fatal_error("Type could not be mapped for use with libffi.");
+  return NULL;
+}
+
+static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
+                         void *ArgDataPtr) {
+  switch (Ty->getTypeID()) {
+    case Type::IntegerTyID:
+      switch (cast<IntegerType>(Ty)->getBitWidth()) {
+        case 8: {
+          int8_t *I8Ptr = (int8_t *) ArgDataPtr;
+          *I8Ptr = (int8_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+        case 16: {
+          int16_t *I16Ptr = (int16_t *) ArgDataPtr;
+          *I16Ptr = (int16_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+        case 32: {
+          int32_t *I32Ptr = (int32_t *) ArgDataPtr;
+          *I32Ptr = (int32_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+        case 64: {
+          int64_t *I64Ptr = (int64_t *) ArgDataPtr;
+          *I64Ptr = (int64_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+      }
+    case Type::FloatTyID: {
+      float *FloatPtr = (float *) ArgDataPtr;
+      *FloatPtr = AV.FloatVal;
+      return ArgDataPtr;
+    }
+    case Type::DoubleTyID: {
+      double *DoublePtr = (double *) ArgDataPtr;
+      *DoublePtr = AV.DoubleVal;
+      return ArgDataPtr;
+    }
+    case Type::PointerTyID: {
+      void **PtrPtr = (void **) ArgDataPtr;
+      *PtrPtr = GVTOP(AV);
+      return ArgDataPtr;
+    }
+    default: break;
+  }
+  // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+  report_fatal_error("Type value could not be mapped for use with libffi.");
+  return NULL;
+}
+
+static bool ffiInvoke(RawFunc Fn, Function *F,
+                      const std::vector<GenericValue> &ArgVals,
+                      const TargetData *TD, GenericValue &Result) {
+  ffi_cif cif;
+  const FunctionType *FTy = F->getFunctionType();
+  const unsigned NumArgs = F->arg_size();
+
+  // TODO: We don't have type information about the remaining arguments, because
+  // this information is never passed into ExecutionEngine::runFunction().
+  if (ArgVals.size() > NumArgs && F->isVarArg()) {
+    report_fatal_error("Calling external var arg function '" + F->getName()
+                      + "' is not supported by the Interpreter.");
+  }
+
+  unsigned ArgBytes = 0;
+
+  std::vector<ffi_type*> args(NumArgs);
+  for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+       A != E; ++A) {
+    const unsigned ArgNo = A->getArgNo();
+    const Type *ArgTy = FTy->getParamType(ArgNo);
+    args[ArgNo] = ffiTypeFor(ArgTy);
+    ArgBytes += TD->getTypeStoreSize(ArgTy);
+  }
+
+  SmallVector<uint8_t, 128> ArgData;
+  ArgData.resize(ArgBytes);
+  uint8_t *ArgDataPtr = ArgData.data();
+  SmallVector<void*, 16> values(NumArgs);
+  for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+       A != E; ++A) {
+    const unsigned ArgNo = A->getArgNo();
+    const Type *ArgTy = FTy->getParamType(ArgNo);
+    values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
+    ArgDataPtr += TD->getTypeStoreSize(ArgTy);
+  }
+
+  const Type *RetTy = FTy->getReturnType();
+  ffi_type *rtype = ffiTypeFor(RetTy);
+
+  if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
+    SmallVector<uint8_t, 128> ret;
+    if (RetTy->getTypeID() != Type::VoidTyID)
+      ret.resize(TD->getTypeStoreSize(RetTy));
+    ffi_call(&cif, Fn, ret.data(), values.data());
+    switch (RetTy->getTypeID()) {
+      case Type::IntegerTyID:
+        switch (cast<IntegerType>(RetTy)->getBitWidth()) {
+          case 8:  Result.IntVal = APInt(8 , *(int8_t *) ret.data()); break;
+          case 16: Result.IntVal = APInt(16, *(int16_t*) ret.data()); break;
+          case 32: Result.IntVal = APInt(32, *(int32_t*) ret.data()); break;
+          case 64: Result.IntVal = APInt(64, *(int64_t*) ret.data()); break;
+        }
+        break;
+      case Type::FloatTyID:   Result.FloatVal   = *(float *) ret.data(); break;
+      case Type::DoubleTyID:  Result.DoubleVal  = *(double*) ret.data(); break;
+      case Type::PointerTyID: Result.PointerVal = *(void **) ret.data(); break;
+      default: break;
+    }
+    return true;
+  }
+
+  return false;
+}
+#endif // USE_LIBFFI
+
+GenericValue Interpreter::callExternalFunction(Function *F,
+                                     const std::vector<GenericValue> &ArgVals) {
+  TheInterpreter = this;
+
+  FunctionsLock->acquire();
+
+  // Do a lookup to see if the function is in our cache... this should just be a
+  // deferred annotation!
+  std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F);
+  if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F)
+                                                   : FI->second) {
+    FunctionsLock->release();
+    return Fn(F->getFunctionType(), ArgVals);
+  }
+
+#ifdef USE_LIBFFI
+  std::map<const Function *, RawFunc>::iterator RF = RawFunctions->find(F);
+  RawFunc RawFn;
+  if (RF == RawFunctions->end()) {
+    RawFn = (RawFunc)(intptr_t)
+      sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
+    if (!RawFn)
+      RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
+    if (RawFn != 0)
+      RawFunctions->insert(std::make_pair(F, RawFn));  // Cache for later
+  } else {
+    RawFn = RF->second;
+  }
+
+  FunctionsLock->release();
+
+  GenericValue Result;
+  if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getTargetData(), Result))
+    return Result;
+#endif // USE_LIBFFI
+
+  if (F->getName() == "__main")
+    errs() << "Tried to execute an unknown external function: "
+      << *F->getType() << " __main\n";
+  else
+    report_fatal_error("Tried to execute an unknown external function: " +
+                       F->getName());
+#ifndef USE_LIBFFI
+  errs() << "Recompiling LLVM with --enable-libffi might help.\n";
+#endif
+  return GenericValue();
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Functions "exported" to the running application...
+//
+
+// Visual Studio warns about returning GenericValue in extern "C" linkage
+#ifdef _MSC_VER
+    #pragma warning(disable : 4190)
+#endif
+
+extern "C" {  // Don't add C++ manglings to llvm mangling :)
+
+// void atexit(Function*)
+GenericValue lle_X_atexit(const FunctionType *FT,
+                          const std::vector<GenericValue> &Args) {
+  assert(Args.size() == 1);
+  TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
+  GenericValue GV;
+  GV.IntVal = 0;
+  return GV;
+}
+
+// void exit(int)
+GenericValue lle_X_exit(const FunctionType *FT,
+                        const std::vector<GenericValue> &Args) {
+  TheInterpreter->exitCalled(Args[0]);
+  return GenericValue();
+}
+
+// void abort(void)
+GenericValue lle_X_abort(const FunctionType *FT,
+                         const std::vector<GenericValue> &Args) {
+  //FIXME: should we report or raise here?
+  //report_fatal_error("Interpreted program raised SIGABRT");
+  raise (SIGABRT);
+  return GenericValue();
+}
+
+// int sprintf(char *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_sprintf(const FunctionType *FT,
+                           const std::vector<GenericValue> &Args) {
+  char *OutputBuffer = (char *)GVTOP(Args[0]);
+  const char *FmtStr = (const char *)GVTOP(Args[1]);
+  unsigned ArgNo = 2;
+
+  // printf should return # chars printed.  This is completely incorrect, but
+  // close enough for now.
+  GenericValue GV;
+  GV.IntVal = APInt(32, strlen(FmtStr));
+  while (1) {
+    switch (*FmtStr) {
+    case 0: return GV;             // Null terminator...
+    default:                       // Normal nonspecial character
+      sprintf(OutputBuffer++, "%c", *FmtStr++);
+      break;
+    case '\\': {                   // Handle escape codes
+      sprintf(OutputBuffer, "%c%c", *FmtStr, *(FmtStr+1));
+      FmtStr += 2; OutputBuffer += 2;
+      break;
+    }
+    case '%': {                    // Handle format specifiers
+      char FmtBuf[100] = "", Buffer[1000] = "";
+      char *FB = FmtBuf;
+      *FB++ = *FmtStr++;
+      char Last = *FB++ = *FmtStr++;
+      unsigned HowLong = 0;
+      while (Last != 'c' && Last != 'd' && Last != 'i' && Last != 'u' &&
+             Last != 'o' && Last != 'x' && Last != 'X' && Last != 'e' &&
+             Last != 'E' && Last != 'g' && Last != 'G' && Last != 'f' &&
+             Last != 'p' && Last != 's' && Last != '%') {
+        if (Last == 'l' || Last == 'L') HowLong++;  // Keep track of l's
+        Last = *FB++ = *FmtStr++;
+      }
+      *FB = 0;
+
+      switch (Last) {
+      case '%':
+        memcpy(Buffer, "%", 2); break;
+      case 'c':
+        sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+        break;
+      case 'd': case 'i':
+      case 'u': case 'o':
+      case 'x': case 'X':
+        if (HowLong >= 1) {
+          if (HowLong == 1 &&
+              TheInterpreter->getTargetData()->getPointerSizeInBits() == 64 &&
+              sizeof(long) < sizeof(int64_t)) {
+            // Make sure we use %lld with a 64 bit argument because we might be
+            // compiling LLI on a 32 bit compiler.
+            unsigned Size = strlen(FmtBuf);
+            FmtBuf[Size] = FmtBuf[Size-1];
+            FmtBuf[Size+1] = 0;
+            FmtBuf[Size-1] = 'l';
+          }
+          sprintf(Buffer, FmtBuf, Args[ArgNo++].IntVal.getZExtValue());
+        } else
+          sprintf(Buffer, FmtBuf,uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+        break;
+      case 'e': case 'E': case 'g': case 'G': case 'f':
+        sprintf(Buffer, FmtBuf, Args[ArgNo++].DoubleVal); break;
+      case 'p':
+        sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break;
+      case 's':
+        sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break;
+      default:
+        errs() << "<unknown printf code '" << *FmtStr << "'!>";
+        ArgNo++; break;
+      }
+      size_t Len = strlen(Buffer);
+      memcpy(OutputBuffer, Buffer, Len + 1);
+      OutputBuffer += Len;
+      }
+      break;
+    }
+  }
+  return GV;
+}
+
+// int printf(const char *, ...) - a very rough implementation to make output
+// useful.
+GenericValue lle_X_printf(const FunctionType *FT,
+                          const std::vector<GenericValue> &Args) {
+  char Buffer[10000];
+  std::vector<GenericValue> NewArgs;
+  NewArgs.push_back(PTOGV((void*)&Buffer[0]));
+  NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
+  GenericValue GV = lle_X_sprintf(FT, NewArgs);
+  outs() << Buffer;
+  return GV;
+}
+
+// int sscanf(const char *format, ...);
+GenericValue lle_X_sscanf(const FunctionType *FT,
+                          const std::vector<GenericValue> &args) {
+  assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
+
+  char *Args[10];
+  for (unsigned i = 0; i < args.size(); ++i)
+    Args[i] = (char*)GVTOP(args[i]);
+
+  GenericValue GV;
+  GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
+                        Args[5], Args[6], Args[7], Args[8], Args[9]));
+  return GV;
+}
+
+// int scanf(const char *format, ...);
+GenericValue lle_X_scanf(const FunctionType *FT,
+                         const std::vector<GenericValue> &args) {
+  assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
+
+  char *Args[10];
+  for (unsigned i = 0; i < args.size(); ++i)
+    Args[i] = (char*)GVTOP(args[i]);
+
+  GenericValue GV;
+  GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
+                        Args[5], Args[6], Args[7], Args[8], Args[9]));
+  return GV;
+}
+
+// int fprintf(FILE *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_fprintf(const FunctionType *FT,
+                           const std::vector<GenericValue> &Args) {
+  assert(Args.size() >= 2);
+  char Buffer[10000];
+  std::vector<GenericValue> NewArgs;
+  NewArgs.push_back(PTOGV(Buffer));
+  NewArgs.insert(NewArgs.end(), Args.begin()+1, Args.end());
+  GenericValue GV = lle_X_sprintf(FT, NewArgs);
+
+  fputs(Buffer, (FILE *) GVTOP(Args[0]));
+  return GV;
+}
+
+} // End extern "C"
+
+// Done with externals; turn the warning back on
+#ifdef _MSC_VER
+    #pragma warning(default: 4190)
+#endif
+
+
+void Interpreter::initializeExternalFunctions() {
+  sys::ScopedLock Writer(*FunctionsLock);
+  FuncNames["lle_X_atexit"]       = lle_X_atexit;
+  FuncNames["lle_X_exit"]         = lle_X_exit;
+  FuncNames["lle_X_abort"]        = lle_X_abort;
+
+  FuncNames["lle_X_printf"]       = lle_X_printf;
+  FuncNames["lle_X_sprintf"]      = lle_X_sprintf;
+  FuncNames["lle_X_sscanf"]       = lle_X_sscanf;
+  FuncNames["lle_X_scanf"]        = lle_X_scanf;
+  FuncNames["lle_X_fprintf"]      = lle_X_fprintf;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
new file mode 100644
index 000000000000..43e34533c7ba
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -0,0 +1,98 @@
+//===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top-level functionality for the LLVM interpreter.
+// This interpreter is designed to be a very simple, portable, inefficient
+// interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include <cstring>
+using namespace llvm;
+
+namespace {
+
+static struct RegisterInterp {
+  RegisterInterp() { Interpreter::Register(); }
+} InterpRegistrator;
+
+}
+
+extern "C" void LLVMLinkInInterpreter() { }
+
+/// create - Create a new interpreter object.  This can never fail.
+///
+ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
+  // Tell this Module to materialize everything and release the GVMaterializer.
+  if (M->MaterializeAllPermanently(ErrStr))
+    // We got an error, just return 0
+    return 0;
+
+  return new Interpreter(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Interpreter ctor - Initialize stuff
+//
+Interpreter::Interpreter(Module *M)
+  : ExecutionEngine(M), TD(M) {
+      
+  memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+  setTargetData(&TD);
+  // Initialize the "backend"
+  initializeExecutionEngine();
+  initializeExternalFunctions();
+  emitGlobals();
+
+  IL = new IntrinsicLowering(TD);
+}
+
+Interpreter::~Interpreter() {
+  delete IL;
+}
+
+void Interpreter::runAtExitHandlers () {
+  while (!AtExitHandlers.empty()) {
+    callFunction(AtExitHandlers.back(), std::vector<GenericValue>());
+    AtExitHandlers.pop_back();
+    run();
+  }
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue
+Interpreter::runFunction(Function *F,
+                         const std::vector<GenericValue> &ArgValues) {
+  assert (F && "Function *F was null at entry to run()");
+
+  // Try extra hard not to pass extra args to a function that isn't
+  // expecting them.  C programmers frequently bend the rules and
+  // declare main() with fewer parameters than it actually gets
+  // passed, and the interpreter barfs if you pass a function more
+  // parameters than it is declared to take. This does not attempt to
+  // take into account gratuitous differences in declared types,
+  // though.
+  std::vector<GenericValue> ActualArgs;
+  const unsigned ArgCount = F->getFunctionType()->getNumParams();
+  for (unsigned i = 0; i < ArgCount; ++i)
+    ActualArgs.push_back(ArgValues[i]);
+
+  // Set up the function call.
+  callFunction(F, ActualArgs);
+
+  // Start executing the function.
+  run();
+
+  return ExitValue;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
new file mode 100644
index 000000000000..bfebe3debfcd
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -0,0 +1,242 @@
+//===-- Interpreter.h ------------------------------------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the interpreter structure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLI_INTERPRETER_H
+#define LLI_INTERPRETER_H
+
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+namespace llvm {
+
+class IntrinsicLowering;
+struct FunctionInfo;
+template<typename T> class generic_gep_type_iterator;
+class ConstantExpr;
+typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
+
+
+// AllocaHolder - Object to track all of the blocks of memory allocated by
+// alloca.  When the function returns, this object is popped off the execution
+// stack, which causes the dtor to be run, which frees all the alloca'd memory.
+//
+class AllocaHolder {
+  friend class AllocaHolderHandle;
+  std::vector<void*> Allocations;
+  unsigned RefCnt;
+public:
+  AllocaHolder() : RefCnt(0) {}
+  void add(void *mem) { Allocations.push_back(mem); }
+  ~AllocaHolder() {
+    for (unsigned i = 0; i < Allocations.size(); ++i)
+      free(Allocations[i]);
+  }
+};
+
+// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into
+// a vector...
+//
+class AllocaHolderHandle {
+  AllocaHolder *H;
+public:
+  AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; }
+  AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; }
+  ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; }
+
+  void add(void *mem) { H->add(mem); }
+};
+
+typedef std::vector<GenericValue> ValuePlaneTy;
+
+// ExecutionContext struct - This struct represents one stack frame currently
+// executing.
+//
+struct ExecutionContext {
+  Function             *CurFunction;// The currently executing function
+  BasicBlock           *CurBB;      // The currently executing BB
+  BasicBlock::iterator  CurInst;    // The next instruction to execute
+  std::map<Value *, GenericValue> Values; // LLVM values used in this invocation
+  std::vector<GenericValue>  VarArgs; // Values passed through an ellipsis
+  CallSite             Caller;     // Holds the call that called subframes.
+                                   // NULL if main func or debugger invoked fn
+  AllocaHolderHandle    Allocas;    // Track memory allocated by alloca
+};
+
+// Interpreter - This class represents the entirety of the interpreter.
+//
+class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
+  GenericValue ExitValue;          // The return value of the called function
+  TargetData TD;
+  IntrinsicLowering *IL;
+
+  // The runtime stack of executing code.  The top of the stack is the current
+  // function record.
+  std::vector<ExecutionContext> ECStack;
+
+  // AtExitHandlers - List of functions to call when the program exits,
+  // registered with the atexit() library function.
+  std::vector<Function*> AtExitHandlers;
+
+public:
+  explicit Interpreter(Module *M);
+  ~Interpreter();
+
+  /// runAtExitHandlers - Run any functions registered by the program's calls to
+  /// atexit(3), which we intercept and store in AtExitHandlers.
+  ///
+  void runAtExitHandlers();
+
+  static void Register() {
+    InterpCtor = create;
+  }
+  
+  /// create - Create an interpreter ExecutionEngine. This can never fail.
+  ///
+  static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0);
+
+  /// run - Start execution with the specified function and arguments.
+  ///
+  virtual GenericValue runFunction(Function *F,
+                                   const std::vector<GenericValue> &ArgValues);
+
+  /// recompileAndRelinkFunction - For the interpreter, functions are always
+  /// up-to-date.
+  ///
+  virtual void *recompileAndRelinkFunction(Function *F) {
+    return getPointerToFunction(F);
+  }
+
+  /// freeMachineCodeForFunction - The interpreter does not generate any code.
+  ///
+  void freeMachineCodeForFunction(Function *F) { }
+
+  // Methods used to execute code:
+  // Place a call on the stack
+  void callFunction(Function *F, const std::vector<GenericValue> &ArgVals);
+  void run();                // Execute instructions until nothing left to do
+
+  // Opcode Implementations
+  void visitReturnInst(ReturnInst &I);
+  void visitBranchInst(BranchInst &I);
+  void visitSwitchInst(SwitchInst &I);
+  void visitIndirectBrInst(IndirectBrInst &I);
+
+  void visitBinaryOperator(BinaryOperator &I);
+  void visitICmpInst(ICmpInst &I);
+  void visitFCmpInst(FCmpInst &I);
+  void visitAllocaInst(AllocaInst &I);
+  void visitLoadInst(LoadInst &I);
+  void visitStoreInst(StoreInst &I);
+  void visitGetElementPtrInst(GetElementPtrInst &I);
+  void visitPHINode(PHINode &PN) { 
+    llvm_unreachable("PHI nodes already handled!"); 
+  }
+  void visitTruncInst(TruncInst &I);
+  void visitZExtInst(ZExtInst &I);
+  void visitSExtInst(SExtInst &I);
+  void visitFPTruncInst(FPTruncInst &I);
+  void visitFPExtInst(FPExtInst &I);
+  void visitUIToFPInst(UIToFPInst &I);
+  void visitSIToFPInst(SIToFPInst &I);
+  void visitFPToUIInst(FPToUIInst &I);
+  void visitFPToSIInst(FPToSIInst &I);
+  void visitPtrToIntInst(PtrToIntInst &I);
+  void visitIntToPtrInst(IntToPtrInst &I);
+  void visitBitCastInst(BitCastInst &I);
+  void visitSelectInst(SelectInst &I);
+
+
+  void visitCallSite(CallSite CS);
+  void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); }
+  void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); }
+  void visitUnwindInst(UnwindInst &I);
+  void visitUnreachableInst(UnreachableInst &I);
+
+  void visitShl(BinaryOperator &I);
+  void visitLShr(BinaryOperator &I);
+  void visitAShr(BinaryOperator &I);
+
+  void visitVAArgInst(VAArgInst &I);
+  void visitInstruction(Instruction &I) {
+    errs() << I;
+    llvm_unreachable("Instruction not interpretable yet!");
+  }
+
+  GenericValue callExternalFunction(Function *F,
+                                    const std::vector<GenericValue> &ArgVals);
+  void exitCalled(GenericValue GV);
+
+  void addAtExitHandler(Function *F) {
+    AtExitHandlers.push_back(F);
+  }
+
+  GenericValue *getFirstVarArg () {
+    return &(ECStack.back ().VarArgs[0]);
+  }
+
+private:  // Helper functions
+  GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I,
+                                   gep_type_iterator E, ExecutionContext &SF);
+
+  // SwitchToNewBasicBlock - Start execution in a new basic block and run any
+  // PHI nodes in the top of the block.  This is used for intraprocedural
+  // control flow.
+  //
+  void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF);
+
+  void *getPointerToFunction(Function *F) { return (void*)F; }
+  void *getPointerToBasicBlock(BasicBlock *BB) { return (void*)BB; }
+
+  void initializeExecutionEngine() { }
+  void initializeExternalFunctions();
+  GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
+  GenericValue getOperandValue(Value *V, ExecutionContext &SF);
+  GenericValue executeTruncInst(Value *SrcVal, const Type *DstTy,
+                                ExecutionContext &SF);
+  GenericValue executeSExtInst(Value *SrcVal, const Type *DstTy,
+                               ExecutionContext &SF);
+  GenericValue executeZExtInst(Value *SrcVal, const Type *DstTy,
+                               ExecutionContext &SF);
+  GenericValue executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+                                  ExecutionContext &SF);
+  GenericValue executeFPExtInst(Value *SrcVal, const Type *DstTy,
+                                ExecutionContext &SF);
+  GenericValue executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+                                   ExecutionContext &SF);
+  GenericValue executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+                                   ExecutionContext &SF);
+  GenericValue executeBitCastInst(Value *SrcVal, const Type *DstTy,
+                                  ExecutionContext &SF);
+  GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal, 
+                                    const Type *Ty, ExecutionContext &SF);
+  void popStackAndReturnValueToCaller(const Type *RetTy, GenericValue Result);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp
new file mode 100644
index 000000000000..fa8bee460427
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -0,0 +1,161 @@
+//===-- Intercept.cpp - System function interception routines -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If a function call occurs to an external function, the JIT is designed to use
+// the dynamic loader interface to find a function to call.  This is useful for
+// calling system calls and library functions that are not available in LLVM.
+// Some system calls, however, need to be handled specially.  For this reason,
+// we intercept some of them here and use our own stubs to handle them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure) {
+  if (!isSymbolSearchingDisabled()) {
+    // Check to see if this is one of the functions we want to intercept.  Note,
+    // we cast to intptr_t here to silence a -pedantic warning that complains
+    // about casting a function pointer to a normal pointer.
+    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+    // We should not invoke parent's ctors/dtors from generated main()!
+    // On Mingw and Cygwin, the symbol __main is resolved to
+    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+    // (and register wrong callee's dtors with atexit(3)).
+    // We expect ExecutionEngine::runStaticConstructorsDestructors()
+    // is called before ExecutionEngine::runFunctionAsMain() is called.
+    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+    const char *NameStr = Name.c_str();
+    // If this is an asm specifier, skip the sentinal.
+    if (NameStr[0] == 1) ++NameStr;
+
+    // If it's an external function, look it up in the process image...
+    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+    if (Ptr) return Ptr;
+
+    // If it wasn't found and if it starts with an underscore ('_') character,
+    // and has an asm specifier, try again without the underscore.
+    if (Name[0] == 1 && NameStr[0] == '_') {
+      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+      if (Ptr) return Ptr;
+    }
+
+    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+    // are references to hidden visibility symbols that dlsym cannot resolve.
+    // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+      // This mirrors logic in libSystemStubs.a.
+      std::string Prefix = std::string(Name.begin(), Name.end()-9);
+      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+        return Ptr;
+      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+        return Ptr;
+    }
+#endif
+  }
+
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
new file mode 100644
index 000000000000..445d2d0670c8
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
@@ -0,0 +1,821 @@
+//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool implements a just-in-time compiler for LLVM, allowing direct
+// execution of LLVM bitcode in an efficient manner.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#ifdef __APPLE__
+// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead
+// of atexit). It passes the address of linker generated symbol __dso_handle
+// to the function.
+// This configuration change happened at version 5330.
+# include <AvailabilityMacros.h>
+# if defined(MAC_OS_X_VERSION_10_4) && \
+     ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \
+      (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \
+       __APPLE_CC__ >= 5330))
+#  ifndef HAVE___DSO_HANDLE
+#   define HAVE___DSO_HANDLE 1
+#  endif
+# endif
+#endif
+
+#if HAVE___DSO_HANDLE
+extern void *__dso_handle __attribute__ ((__visibility__ ("hidden")));
+#endif
+
+namespace {
+
+static struct RegisterJIT {
+  RegisterJIT() { JIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInJIT() {
+}
+
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+
+// libgcc defines the __register_frame function to dynamically register new
+// dwarf frames for exception handling. This functionality is not portable
+// across compilers and is only provided by GCC. We use the __register_frame
+// function here so that code generated by the JIT cooperates with the unwinding
+// runtime of libgcc. When JITting with exception handling enable, LLVM
+// generates dwarf frames and registers it to libgcc with __register_frame.
+//
+// The __register_frame function works with Linux.
+//
+// Unfortunately, this functionality seems to be in libgcc after the unwinding
+// library of libgcc for darwin was written. The code for darwin overwrites the
+// value updated by __register_frame with a value fetched with "keymgr".
+// "keymgr" is an obsolete functionality, which should be rewritten some day.
+// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we
+// need a workaround in LLVM which uses the "keymgr" to dynamically modify the
+// values of an opaque key, used by libgcc to find dwarf tables.
+
+extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
+
+#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
+# define USE_KEYMGR 1
+#else
+# define USE_KEYMGR 0
+#endif
+
+#if USE_KEYMGR
+
+namespace {
+
+// LibgccObject - This is the structure defined in libgcc. There is no #include
+// provided for this structure, so we also define it here. libgcc calls it
+// "struct object". The structure is undocumented in libgcc.
+struct LibgccObject {
+  void *unused1;
+  void *unused2;
+  void *unused3;
+
+  /// frame - Pointer to the exception table.
+  void *frame;
+
+  /// encoding -  The encoding of the object?
+  union {
+    struct {
+      unsigned long sorted : 1;
+      unsigned long from_array : 1;
+      unsigned long mixed_encoding : 1;
+      unsigned long encoding : 8;
+      unsigned long count : 21;
+    } b;
+    size_t i;
+  } encoding;
+
+  /// fde_end - libgcc defines this field only if some macro is defined. We
+  /// include this field even if it may not there, to make libgcc happy.
+  char *fde_end;
+
+  /// next - At least we know it's a chained list!
+  struct LibgccObject *next;
+};
+
+// "kemgr" stuff. Apparently, all frame tables are stored there.
+extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *);
+extern "C" void *_keymgr_get_and_lock_processwide_ptr(int);
+#define KEYMGR_GCC3_DW2_OBJ_LIST        302     /* Dwarf2 object list  */
+
+/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It
+/// probably contains all dwarf tables that are loaded.
+struct LibgccObjectInfo {
+
+  /// seenObjects - LibgccObjects already parsed by the unwinding runtime.
+  ///
+  struct LibgccObject* seenObjects;
+
+  /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime.
+  ///
+  struct LibgccObject* unseenObjects;
+
+  unsigned unused[2];
+};
+
+/// darwin_register_frame - Since __register_frame does not work with darwin's
+/// libgcc,we provide our own function, which "tricks" libgcc by modifying the
+/// "Dwarf2 object list" key.
+void DarwinRegisterFrame(void* FrameBegin) {
+  // Get the key.
+  LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+    _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+  assert(LOI && "This should be preallocated by the runtime");
+
+  // Allocate a new LibgccObject to represent this frame. Deallocation of this
+  // object may be impossible: since darwin code in libgcc was written after
+  // the ability to dynamically register frames, things may crash if we
+  // deallocate it.
+  struct LibgccObject* ob = (struct LibgccObject*)
+    malloc(sizeof(struct LibgccObject));
+
+  // Do like libgcc for the values of the field.
+  ob->unused1 = (void *)-1;
+  ob->unused2 = 0;
+  ob->unused3 = 0;
+  ob->frame = FrameBegin;
+  ob->encoding.i = 0;
+  ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
+
+  // Put the info on both places, as libgcc uses the first or the second
+  // field. Note that we rely on having two pointers here. If fde_end was a
+  // char, things would get complicated.
+  ob->fde_end = (char*)LOI->unseenObjects;
+  ob->next = LOI->unseenObjects;
+
+  // Update the key's unseenObjects list.
+  LOI->unseenObjects = ob;
+
+  // Finally update the "key". Apparently, libgcc requires it.
+  _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
+                                         LOI);
+
+}
+
+}
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter.  This takes ownership
+/// of the module.
+ExecutionEngine *JIT::createJIT(Module *M,
+                                std::string *ErrorStr,
+                                JITMemoryManager *JMM,
+                                CodeGenOpt::Level OptLevel,
+                                bool GVsWithCode,
+                                TargetMachine *TM) {
+  // Try to register the program as a source of symbols to resolve against.
+  //
+  // FIXME: Don't do this here.
+  sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+  // If the target supports JIT code generation, create the JIT.
+  if (TargetJITInfo *TJ = TM->getJITInfo()) {
+    return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+  } else {
+    if (ErrorStr)
+      *ErrorStr = "target does not support JIT code generation";
+    return 0;
+  }
+}
+
+namespace {
+/// This class supports the global getPointerToNamedFunction(), which allows
+/// bugpoint or gdb users to search for a function by name without any context.
+class JitPool {
+  SmallPtrSet<JIT*, 1> JITs;  // Optimize for process containing just 1 JIT.
+  mutable sys::Mutex Lock;
+public:
+  void Add(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.insert(jit);
+  }
+  void Remove(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.erase(jit);
+  }
+  void *getPointerToNamedFunction(const char *Name) const {
+    MutexGuard guard(Lock);
+    assert(JITs.size() != 0 && "No Jit registered");
+    //search function in every instance of JIT
+    for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
+           end = JITs.end();
+         Jit != end; ++Jit) {
+      if (Function *F = (*Jit)->FindFunctionNamed(Name))
+        return (*Jit)->getPointerToFunction(F);
+    }
+    // The function is not available : fallback on the first created (will
+    // search in symbol of the current program/library)
+    return (*JITs.begin())->getPointerToNamedFunction(Name);
+  }
+};
+ManagedStatic<JitPool> AllJits;
+}
+extern "C" {
+  // getPointerToNamedFunction - This function is used as a global wrapper to
+  // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+  // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+  // need to resolve function(s) that are being mis-codegenerated, so we need to
+  // resolve their addresses at runtime, and this is the way to do it.
+  void *getPointerToNamedFunction(const char *Name) {
+    return AllJits->getPointerToNamedFunction(Name);
+  }
+}
+
+JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+         JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
+  : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
+    isAlreadyCodeGenerating(false) {
+  setTargetData(TM.getTargetData());
+
+  jitstate = new JITState(M);
+
+  // Initialize JCE
+  JCE = createEmitter(*this, JMM, TM);
+
+  // Register in global list of all JITs.
+  AllJits->Add(this);
+
+  // Add target data
+  MutexGuard locked(lock);
+  FunctionPassManager &PM = jitstate->getPM(locked);
+  PM.add(new TargetData(*TM.getTargetData()));
+
+  // Turn the machine code intermediate representation into bytes in memory that
+  // may be executed.
+  if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+    report_fatal_error("Target does not support machine code emission!");
+  }
+
+  // Register routine for informing unwinding runtime about new EH frames
+#if HAVE_EHTABLE_SUPPORT
+#if USE_KEYMGR
+  struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+    _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+
+  // The key is created on demand, and libgcc creates it the first time an
+  // exception occurs. Since we need the key to register frames, we create
+  // it now.
+  if (!LOI)
+    LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1);
+  _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
+  InstallExceptionTableRegister(DarwinRegisterFrame);
+  // Not sure about how to deregister on Darwin.
+#else
+  InstallExceptionTableRegister(__register_frame);
+  InstallExceptionTableDeregister(__deregister_frame);
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+
+  // Initialize passes.
+  PM.doInitialization();
+}
+
+JIT::~JIT() {
+  // Unregister all exception tables registered by this JIT.
+  DeregisterAllTables();
+  // Cleanup.
+  AllJits->Remove(this);
+  delete jitstate;
+  delete JCE;
+  delete &TM;
+}
+
+/// addModule - Add a new Module to the JIT.  If we previously removed the last
+/// Module, we need re-initialize jitstate with a valid Module.
+void JIT::addModule(Module *M) {
+  MutexGuard locked(lock);
+
+  if (Modules.empty()) {
+    assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
+
+    jitstate = new JITState(M);
+
+    FunctionPassManager &PM = jitstate->getPM(locked);
+    PM.add(new TargetData(*TM.getTargetData()));
+
+    // Turn the machine code intermediate representation into bytes in memory
+    // that may be executed.
+    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+      report_fatal_error("Target does not support machine code emission!");
+    }
+
+    // Initialize passes.
+    PM.doInitialization();
+  }
+
+  ExecutionEngine::addModule(M);
+}
+
+/// removeModule - If we are removing the last Module, invalidate the jitstate
+/// since the PassManager it contains references a released Module.
+bool JIT::removeModule(Module *M) {
+  bool result = ExecutionEngine::removeModule(M);
+
+  MutexGuard locked(lock);
+
+  if (jitstate->getModule() == M) {
+    delete jitstate;
+    jitstate = 0;
+  }
+
+  if (!jitstate && !Modules.empty()) {
+    jitstate = new JITState(Modules[0]);
+
+    FunctionPassManager &PM = jitstate->getPM(locked);
+    PM.add(new TargetData(*TM.getTargetData()));
+
+    // Turn the machine code intermediate representation into bytes in memory
+    // that may be executed.
+    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+      report_fatal_error("Target does not support machine code emission!");
+    }
+
+    // Initialize passes.
+    PM.doInitialization();
+  }
+  return result;
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue JIT::runFunction(Function *F,
+                              const std::vector<GenericValue> &ArgValues) {
+  assert(F && "Function *F was null at entry to run()");
+
+  void *FPtr = getPointerToFunction(F);
+  assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+  const FunctionType *FTy = F->getFunctionType();
+  const Type *RetTy = FTy->getReturnType();
+
+  assert((FTy->getNumParams() == ArgValues.size() ||
+          (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+         "Wrong number of arguments passed into function!");
+  assert(FTy->getNumParams() == ArgValues.size() &&
+         "This doesn't support passing arguments through varargs (yet)!");
+
+  // Handle some common cases first.  These cases correspond to common `main'
+  // prototypes.
+  if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+    switch (ArgValues.size()) {
+    case 3:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy() &&
+          FTy->getParamType(2)->isPointerTy()) {
+        int (*PF)(int, char **, const char **) =
+          (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1]),
+                                 (const char **)GVTOP(ArgValues[2])));
+        return rv;
+      }
+      break;
+    case 2:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy()) {
+        int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1])));
+        return rv;
+      }
+      break;
+    case 1:
+      if (FTy->getNumParams() == 1 &&
+          FTy->getParamType(0)->isIntegerTy(32)) {
+        GenericValue rv;
+        int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+        return rv;
+      }
+      break;
+    }
+  }
+
+  // Handle cases where no arguments are passed first.
+  if (ArgValues.empty()) {
+    GenericValue rv;
+    switch (RetTy->getTypeID()) {
+    default: llvm_unreachable("Unknown return type for function call!");
+    case Type::IntegerTyID: {
+      unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+      if (BitWidth == 1)
+        rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 8)
+        rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 16)
+        rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 32)
+        rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 64)
+        rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+      else
+        llvm_unreachable("Integer types > 64 bits not supported");
+      return rv;
+    }
+    case Type::VoidTyID:
+      rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+      return rv;
+    case Type::FloatTyID:
+      rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::DoubleTyID:
+      rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      llvm_unreachable("long double not supported yet");
+      return rv;
+    case Type::PointerTyID:
+      return PTOGV(((void*(*)())(intptr_t)FPtr)());
+    }
+  }
+
+  // Okay, this is not one of our quick and easy cases.  Because we don't have a
+  // full FFI, we have to codegen a nullary stub function that just calls the
+  // function we are interested in, passing in constants for all of the
+  // arguments.  Make this function and return.
+
+  // First, create the function.
+  FunctionType *STy=FunctionType::get(RetTy, false);
+  Function *Stub = Function::Create(STy, Function::InternalLinkage, "",
+                                    F->getParent());
+
+  // Insert a basic block.
+  BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub);
+
+  // Convert all of the GenericValue arguments over to constants.  Note that we
+  // currently don't support varargs.
+  SmallVector<Value*, 8> Args;
+  for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
+    Constant *C = 0;
+    const Type *ArgTy = FTy->getParamType(i);
+    const GenericValue &AV = ArgValues[i];
+    switch (ArgTy->getTypeID()) {
+    default: llvm_unreachable("Unknown argument type for function call!");
+    case Type::IntegerTyID:
+        C = ConstantInt::get(F->getContext(), AV.IntVal);
+        break;
+    case Type::FloatTyID:
+        C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal));
+        break;
+    case Type::DoubleTyID:
+        C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal));
+        break;
+    case Type::PPC_FP128TyID:
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+        C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal));
+        break;
+    case Type::PointerTyID:
+      void *ArgPtr = GVTOP(AV);
+      if (sizeof(void*) == 4)
+        C = ConstantInt::get(Type::getInt32Ty(F->getContext()),
+                             (int)(intptr_t)ArgPtr);
+      else
+        C = ConstantInt::get(Type::getInt64Ty(F->getContext()),
+                             (intptr_t)ArgPtr);
+      // Cast the integer to pointer
+      C = ConstantExpr::getIntToPtr(C, ArgTy);
+      break;
+    }
+    Args.push_back(C);
+  }
+
+  CallInst *TheCall = CallInst::Create(F, Args, "", StubBB);
+  TheCall->setCallingConv(F->getCallingConv());
+  TheCall->setTailCall();
+  if (!TheCall->getType()->isVoidTy())
+    // Return result of the call.
+    ReturnInst::Create(F->getContext(), TheCall, StubBB);
+  else
+    ReturnInst::Create(F->getContext(), StubBB);           // Just return void.
+
+  // Finally, call our nullary stub function.
+  GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
+  // Erase it, since no other function can have a reference to it.
+  Stub->eraseFromParent();
+  // And return the result.
+  return Result;
+}
+
+void JIT::RegisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  EventListeners.push_back(L);
+}
+void JIT::UnregisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  std::vector<JITEventListener*>::reverse_iterator I=
+      std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+  if (I != EventListeners.rend()) {
+    std::swap(*I, EventListeners.back());
+    EventListeners.pop_back();
+  }
+}
+void JIT::NotifyFunctionEmitted(
+    const Function &F,
+    void *Code, size_t Size,
+    const JITEvent_EmittedFunctionDetails &Details) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
+  }
+}
+
+void JIT::NotifyFreeingMachineCode(void *OldPtr) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
+  }
+}
+
+/// runJITOnFunction - Run the FunctionPassManager full of
+/// just-in-time compilation passes on F, hopefully filling in
+/// GlobalAddress[F] with the address of F's machine code.
+///
+void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
+  MutexGuard locked(lock);
+
+  class MCIListener : public JITEventListener {
+    MachineCodeInfo *const MCI;
+   public:
+    MCIListener(MachineCodeInfo *mci) : MCI(mci) {}
+    virtual void NotifyFunctionEmitted(const Function &,
+                                       void *Code, size_t Size,
+                                       const EmittedFunctionDetails &) {
+      MCI->setAddress(Code);
+      MCI->setSize(Size);
+    }
+  };
+  MCIListener MCIL(MCI);
+  if (MCI)
+    RegisterJITEventListener(&MCIL);
+
+  runJITOnFunctionUnlocked(F, locked);
+
+  if (MCI)
+    UnregisterJITEventListener(&MCIL);
+}
+
+void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
+  assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
+
+  jitTheFunction(F, locked);
+
+  // If the function referred to another function that had not yet been
+  // read from bitcode, and we are jitting non-lazily, emit it now.
+  while (!jitstate->getPendingFunctions(locked).empty()) {
+    Function *PF = jitstate->getPendingFunctions(locked).back();
+    jitstate->getPendingFunctions(locked).pop_back();
+
+    assert(!PF->hasAvailableExternallyLinkage() &&
+           "Externally-defined function should not be in pending list.");
+
+    jitTheFunction(PF, locked);
+
+    // Now that the function has been jitted, ask the JITEmitter to rewrite
+    // the stub with real address of the function.
+    updateFunctionStub(PF);
+  }
+}
+
+void JIT::jitTheFunction(Function *F, const MutexGuard &locked) {
+  isAlreadyCodeGenerating = true;
+  jitstate->getPM(locked).run(*F);
+  isAlreadyCodeGenerating = false;
+
+  // clear basic block addresses after this function is done
+  getBasicBlockAddressMap(locked).clear();
+}
+
+/// getPointerToFunction - This method is used to get the address of the
+/// specified function, compiling it if necessary.
+///
+void *JIT::getPointerToFunction(Function *F) {
+
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;   // Check if function already code gen'd
+
+  MutexGuard locked(lock);
+
+  // Now that this thread owns the lock, make sure we read in the function if it
+  // exists in this Module.
+  std::string ErrorMsg;
+  if (F->Materialize(&ErrorMsg)) {
+    report_fatal_error("Error reading function '" + F->getName()+
+                      "' from bitcode file: " + ErrorMsg);
+  }
+
+  // ... and check if another thread has already code gen'd the function.
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;
+
+  if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+    bool AbortOnFailure = !F->hasExternalWeakLinkage();
+    void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+    addGlobalMapping(F, Addr);
+    return Addr;
+  }
+
+  runJITOnFunctionUnlocked(F, locked);
+
+  void *Addr = getPointerToGlobalIfAvailable(F);
+  assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+  return Addr;
+}
+
+void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
+  MutexGuard locked(lock);
+
+  BasicBlockAddressMapTy::iterator I =
+    getBasicBlockAddressMap(locked).find(BB);
+  if (I == getBasicBlockAddressMap(locked).end()) {
+    getBasicBlockAddressMap(locked)[BB] = Addr;
+  } else {
+    // ignore repeats: some BBs can be split into few MBBs?
+  }
+}
+
+void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
+  MutexGuard locked(lock);
+  getBasicBlockAddressMap(locked).erase(BB);
+}
+
+void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
+  // make sure it's function is compiled by JIT
+  (void)getPointerToFunction(BB->getParent());
+
+  // resolve basic block address
+  MutexGuard locked(lock);
+
+  BasicBlockAddressMapTy::iterator I =
+    getBasicBlockAddressMap(locked).find(BB);
+  if (I != getBasicBlockAddressMap(locked).end()) {
+    return I->second;
+  } else {
+    assert(0 && "JIT does not have BB address for address-of-label, was"
+           " it eliminated by optimizer?");
+    return 0;
+  }
+}
+
+/// getOrEmitGlobalVariable - Return the address of the specified global
+/// variable, possibly emitting it to memory if needed.  This is used by the
+/// Emitter.
+void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
+  MutexGuard locked(lock);
+
+  void *Ptr = getPointerToGlobalIfAvailable(GV);
+  if (Ptr) return Ptr;
+
+  // If the global is external, just remember the address.
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) {
+#if HAVE___DSO_HANDLE
+    if (GV->getName() == "__dso_handle")
+      return (void*)&__dso_handle;
+#endif
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
+    if (Ptr == 0) {
+      report_fatal_error("Could not resolve external global address: "
+                        +GV->getName());
+    }
+    addGlobalMapping(GV, Ptr);
+  } else {
+    // If the global hasn't been emitted to memory yet, allocate space and
+    // emit it into memory.
+    Ptr = getMemoryForGV(GV);
+    addGlobalMapping(GV, Ptr);
+    EmitGlobalVariable(GV);  // Initialize the variable.
+  }
+  return Ptr;
+}
+
+/// recompileAndRelinkFunction - This method is used to force a function
+/// which has already been compiled, to be compiled again, possibly
+/// after it has been modified. Then the entry to the old copy is overwritten
+/// with a branch to the new copy. If there was no old copy, this acts
+/// just like JIT::getPointerToFunction().
+///
+void *JIT::recompileAndRelinkFunction(Function *F) {
+  void *OldAddr = getPointerToGlobalIfAvailable(F);
+
+  // If it's not already compiled there is no reason to patch it up.
+  if (OldAddr == 0) { return getPointerToFunction(F); }
+
+  // Delete the old function mapping.
+  addGlobalMapping(F, 0);
+
+  // Recodegen the function
+  runJITOnFunction(F);
+
+  // Update state, forward the old function to the new function.
+  void *Addr = getPointerToGlobalIfAvailable(F);
+  assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+  TJI.replaceMachineCodeForFunction(OldAddr, Addr);
+  return Addr;
+}
+
+/// getMemoryForGV - This method abstracts memory allocation of global
+/// variable so that the JIT can allocate thread local variables depending
+/// on the target.
+///
+char* JIT::getMemoryForGV(const GlobalVariable* GV) {
+  char *Ptr;
+
+  // GlobalVariable's which are not "constant" will cause trouble in a server
+  // situation. It's returned in the same block of memory as code which may
+  // not be writable.
+  if (isGVCompilationDisabled() && !GV->isConstant()) {
+    report_fatal_error("Compilation of non-internal GlobalValue is disabled!");
+  }
+
+  // Some applications require globals and code to live together, so they may
+  // be allocated into the same buffer, but in general globals are allocated
+  // through the memory manager which puts them near the code but not in the
+  // same buffer.
+  const Type *GlobalType = GV->getType()->getElementType();
+  size_t S = getTargetData()->getTypeAllocSize(GlobalType);
+  size_t A = getTargetData()->getPreferredAlignment(GV);
+  if (GV->isThreadLocal()) {
+    MutexGuard locked(lock);
+    Ptr = TJI.allocateThreadLocalMemory(S);
+  } else if (TJI.allocateSeparateGVMemory()) {
+    if (A <= 8) {
+      Ptr = (char*)malloc(S);
+    } else {
+      // Allocate S+A bytes of memory, then use an aligned pointer within that
+      // space.
+      Ptr = (char*)malloc(S+A);
+      unsigned MisAligned = ((intptr_t)Ptr & (A-1));
+      Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0);
+    }
+  } else if (AllocateGVsWithCode) {
+    Ptr = (char*)JCE->allocateSpace(S, A);
+  } else {
+    Ptr = (char*)JCE->allocateGlobal(S, A);
+  }
+  return Ptr;
+}
+
+void JIT::addPendingFunction(Function *F) {
+  MutexGuard locked(lock);
+  jitstate->getPendingFunctions(locked).push_back(F);
+}
+
+
+JITEventListener::~JITEventListener() {}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h
new file mode 100644
index 000000000000..b879fc36e59b
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h
@@ -0,0 +1,226 @@
+//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the top-level JIT data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JIT_H
+#define JIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/ValueHandle.h"
+
+namespace llvm {
+
+class Function;
+struct JITEvent_EmittedFunctionDetails;
+class MachineCodeEmitter;
+class MachineCodeInfo;
+class TargetJITInfo;
+class TargetMachine;
+
+class JITState {
+private:
+  FunctionPassManager PM;  // Passes to compile a function
+  Module *M;               // Module used to create the PM
+
+  /// PendingFunctions - Functions which have not been code generated yet, but
+  /// were called from a function being code generated.
+  std::vector<AssertingVH<Function> > PendingFunctions;
+
+public:
+  explicit JITState(Module *M) : PM(M), M(M) {}
+
+  FunctionPassManager &getPM(const MutexGuard &L) {
+    return PM;
+  }
+
+  Module *getModule() const { return M; }
+  std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
+    return PendingFunctions;
+  }
+};
+
+
+class JIT : public ExecutionEngine {
+  /// types
+  typedef ValueMap<const BasicBlock *, void *>
+      BasicBlockAddressMapTy;
+  /// data
+  TargetMachine &TM;       // The current target we are compiling to
+  TargetJITInfo &TJI;      // The JITInfo for the target we are compiling to
+  JITCodeEmitter *JCE;     // JCE object
+  std::vector<JITEventListener*> EventListeners;
+
+  /// AllocateGVsWithCode - Some applications require that global variables and
+  /// code be allocated into the same region of memory, in which case this flag
+  /// should be set to true.  Doing so breaks freeMachineCodeForFunction.
+  bool AllocateGVsWithCode;
+
+  /// True while the JIT is generating code.  Used to assert against recursive
+  /// entry.
+  bool isAlreadyCodeGenerating;
+
+  JITState *jitstate;
+
+  /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their
+  /// actualized version, only filled for basic blocks that have their address
+  /// taken.
+  BasicBlockAddressMapTy BasicBlockAddressMap;
+
+
+  JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+      JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+      bool AllocateGVsWithCode);
+public:
+  ~JIT();
+
+  static void Register() {
+    JITCtor = createJIT;
+  }
+
+  /// getJITInfo - Return the target JIT information structure.
+  ///
+  TargetJITInfo &getJITInfo() const { return TJI; }
+
+  /// create - Create an return a new JIT compiler if there is one available
+  /// for the current target.  Otherwise, return null.
+  ///
+  static ExecutionEngine *create(Module *M,
+                                 std::string *Err,
+                                 JITMemoryManager *JMM,
+                                 CodeGenOpt::Level OptLevel =
+                                   CodeGenOpt::Default,
+                                 bool GVsWithCode = true,
+                                 CodeModel::Model CMM = CodeModel::Default) {
+    return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
+                                      CMM);
+  }
+
+  virtual void addModule(Module *M);
+
+  /// removeModule - Remove a Module from the list of modules.  Returns true if
+  /// M is found.
+  virtual bool removeModule(Module *M);
+
+  /// runFunction - Start execution with the specified function and arguments.
+  ///
+  virtual GenericValue runFunction(Function *F,
+                                   const std::vector<GenericValue> &ArgValues);
+
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function by using the dlsym function call.  As such it is only
+  /// useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  void *getPointerToNamedFunction(const std::string &Name,
+                                  bool AbortOnFailure = true);
+
+  // CompilationCallback - Invoked the first time that a call site is found,
+  // which causes lazy compilation of the target function.
+  //
+  static void CompilationCallback();
+
+  /// getPointerToFunction - This returns the address of the specified function,
+  /// compiling it if necessary.
+  ///
+  void *getPointerToFunction(Function *F);
+
+  /// addPointerToBasicBlock - Adds address of the specific basic block.
+  void addPointerToBasicBlock(const BasicBlock *BB, void *Addr);
+
+  /// clearPointerToBasicBlock - Removes address of specific basic block.
+  void clearPointerToBasicBlock(const BasicBlock *BB);
+
+  /// getPointerToBasicBlock - This returns the address of the specified basic
+  /// block, assuming function is compiled.
+  void *getPointerToBasicBlock(BasicBlock *BB);
+
+  /// getOrEmitGlobalVariable - Return the address of the specified global
+  /// variable, possibly emitting it to memory if needed.  This is used by the
+  /// Emitter.
+  void *getOrEmitGlobalVariable(const GlobalVariable *GV);
+
+  /// getPointerToFunctionOrStub - If the specified function has been
+  /// code-gen'd, return a pointer to the function.  If not, compile it, or use
+  /// a stub to implement lazy compilation if available.
+  ///
+  void *getPointerToFunctionOrStub(Function *F);
+
+  /// recompileAndRelinkFunction - This method is used to force a function
+  /// which has already been compiled, to be compiled again, possibly
+  /// after it has been modified. Then the entry to the old copy is overwritten
+  /// with a branch to the new copy. If there was no old copy, this acts
+  /// just like JIT::getPointerToFunction().
+  ///
+  void *recompileAndRelinkFunction(Function *F);
+
+  /// freeMachineCodeForFunction - deallocate memory used to code-generate this
+  /// Function.
+  ///
+  void freeMachineCodeForFunction(Function *F);
+
+  /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
+  /// function was encountered.  Add it to a pending list to be processed after
+  /// the current function.
+  ///
+  void addPendingFunction(Function *F);
+
+  /// getCodeEmitter - Return the code emitter this JIT is emitting into.
+  ///
+  JITCodeEmitter *getCodeEmitter() const { return JCE; }
+
+  static ExecutionEngine *createJIT(Module *M,
+                                    std::string *ErrorStr,
+                                    JITMemoryManager *JMM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool GVsWithCode,
+                                    TargetMachine *TM);
+
+  // Run the JIT on F and return information about the generated code
+  void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
+
+  virtual void RegisterJITEventListener(JITEventListener *L);
+  virtual void UnregisterJITEventListener(JITEventListener *L);
+  /// These functions correspond to the methods on JITEventListener.  They
+  /// iterate over the registered listeners and call the corresponding method on
+  /// each.
+  void NotifyFunctionEmitted(
+      const Function &F, void *Code, size_t Size,
+      const JITEvent_EmittedFunctionDetails &Details);
+  void NotifyFreeingMachineCode(void *OldPtr);
+
+  BasicBlockAddressMapTy &
+  getBasicBlockAddressMap(const MutexGuard &) {
+    return BasicBlockAddressMap;
+  }
+
+
+private:
+  static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
+                                       TargetMachine &tm);
+  void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
+  void updateFunctionStub(Function *F);
+  void jitTheFunction(Function *F, const MutexGuard &locked);
+
+protected:
+
+  /// getMemoryforGV - Allocate memory for a global variable.
+  virtual char* getMemoryForGV(const GlobalVariable* GV);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
new file mode 100644
index 000000000000..e71c20b89fda
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -0,0 +1,211 @@
+//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITDebugRegisterer.h"
+#include "../../CodeGen/ELF.h"
+#include "../../CodeGen/ELFWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include <string>
+
+namespace llvm {
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+  // Debuggers puts a breakpoint in this function.
+  LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
+
+  // We put information about the JITed function in this global, which the
+  // debugger reads.  Make sure to specify the version statically, because the
+  // debugger checks the version before we can set it during runtime.
+  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+}
+
+namespace {
+
+  /// JITDebugLock - Used to serialize all code registration events, since they
+  /// modify global variables.
+  sys::Mutex JITDebugLock;
+
+}
+
+JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { }
+
+JITDebugRegisterer::~JITDebugRegisterer() {
+  // Free all ELF memory.
+  for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end();
+       I != E; ++I) {
+    // Call the private method that doesn't update the map so our iterator
+    // doesn't break.
+    UnregisterFunctionInternal(I);
+  }
+  FnMap.clear();
+}
+
+std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
+  // Stack allocate an empty module with an empty LLVMContext for the ELFWriter
+  // API.  We don't use the real module because then the ELFWriter would write
+  // out unnecessary GlobalValues during finalization.
+  LLVMContext Context;
+  Module M("", Context);
+
+  // Make a buffer for the ELF in memory.
+  std::string Buffer;
+  raw_string_ostream O(Buffer);
+  ELFWriter EW(O, TM);
+  EW.doInitialization(M);
+
+  // Copy the binary into the .text section.  This isn't necessary, but it's
+  // useful to be able to disassemble the ELF by hand.
+  ELFSection &Text = EW.getTextSection(const_cast<Function *>(F));
+  Text.Addr = (uint64_t)I.FnStart;
+  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+  // instead of a vector.
+  Text.getData().assign(I.FnStart, I.FnEnd);
+
+  // Copy the exception handling call frame information into the .eh_frame
+  // section.  This allows GDB to get a good stack trace, particularly on
+  // linux x86_64.  Mark this as a PROGBITS section that needs to be loaded
+  // into memory at runtime.
+  ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC);
+  // Pointers in the DWARF EH info are all relative to the EH frame start,
+  // which is stored here.
+  EH.Addr = (uint64_t)I.EhStart;
+  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+  // instead of a vector.
+  EH.getData().assign(I.EhStart, I.EhEnd);
+
+  // Add this single function to the symbol table, so the debugger prints the
+  // name instead of '???'.  We give the symbol default global visibility.
+  ELFSym *FnSym = ELFSym::getGV(F,
+                                ELF::STB_GLOBAL,
+                                ELF::STT_FUNC,
+                                ELF::STV_DEFAULT);
+  FnSym->SectionIdx = Text.SectionIdx;
+  FnSym->Size = I.FnEnd - I.FnStart;
+  FnSym->Value = 0;  // Offset from start of section.
+  EW.SymbolList.push_back(FnSym);
+
+  EW.doFinalization(M);
+  O.flush();
+
+  // When trying to debug why GDB isn't getting the debug info right, it's
+  // awfully helpful to write the object file to disk so that it can be
+  // inspected with readelf and objdump.
+  if (JITEmitDebugInfoToDisk) {
+    std::string Filename;
+    raw_string_ostream O2(Filename);
+    O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o";
+    O2.flush();
+    std::string Errors;
+    raw_fd_ostream O3(Filename.c_str(), Errors);
+    O3 << Buffer;
+    O3.close();
+  }
+
+  return Buffer;
+}
+
+void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
+  // TODO: Support non-ELF platforms.
+  if (!TM.getELFWriterInfo())
+    return;
+
+  std::string Buffer = MakeELF(F, I);
+
+  jit_code_entry *JITCodeEntry = new jit_code_entry();
+  JITCodeEntry->symfile_addr = Buffer.c_str();
+  JITCodeEntry->symfile_size = Buffer.size();
+
+  // Add a mapping from F to the entry and buffer, so we can delete this
+  // info later.
+  FnMap[F] = std::make_pair(Buffer, JITCodeEntry);
+
+  // Acquire the lock and do the registration.
+  {
+    MutexGuard locked(JITDebugLock);
+    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+
+    // Insert this entry at the head of the list.
+    JITCodeEntry->prev_entry = NULL;
+    jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry;
+    JITCodeEntry->next_entry = NextEntry;
+    if (NextEntry != NULL) {
+      NextEntry->prev_entry = JITCodeEntry;
+    }
+    __jit_debug_descriptor.first_entry = JITCodeEntry;
+    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+    __jit_debug_register_code();
+  }
+}
+
+void JITDebugRegisterer::UnregisterFunctionInternal(
+    RegisteredFunctionsMap::iterator I) {
+  jit_code_entry *&JITCodeEntry = I->second.second;
+
+  // Acquire the lock and do the unregistration.
+  {
+    MutexGuard locked(JITDebugLock);
+    __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
+
+    // Remove the jit_code_entry from the linked list.
+    jit_code_entry *PrevEntry = JITCodeEntry->prev_entry;
+    jit_code_entry *NextEntry = JITCodeEntry->next_entry;
+    if (NextEntry) {
+      NextEntry->prev_entry = PrevEntry;
+    }
+    if (PrevEntry) {
+      PrevEntry->next_entry = NextEntry;
+    } else {
+      assert(__jit_debug_descriptor.first_entry == JITCodeEntry);
+      __jit_debug_descriptor.first_entry = NextEntry;
+    }
+
+    // Tell GDB which entry we removed, and unregister the code.
+    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+    __jit_debug_register_code();
+  }
+
+  delete JITCodeEntry;
+  JITCodeEntry = NULL;
+
+  // Free the ELF file in memory.
+  std::string &Buffer = I->second.first;
+  Buffer.clear();
+}
+
+void JITDebugRegisterer::UnregisterFunction(const Function *F) {
+  // TODO: Support non-ELF platforms.
+  if (!TM.getELFWriterInfo())
+    return;
+
+  RegisteredFunctionsMap::iterator I = FnMap.find(F);
+  if (I == FnMap.end()) return;
+  UnregisterFunctionInternal(I);
+  FnMap.erase(I);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
new file mode 100644
index 000000000000..dce506bbfefd
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
@@ -0,0 +1,116 @@
+//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+  typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+  } jit_actions_t;
+
+  struct jit_code_entry {
+    struct jit_code_entry *next_entry;
+    struct jit_code_entry *prev_entry;
+    const char *symfile_addr;
+    uint64_t symfile_size;
+  };
+
+  struct jit_descriptor {
+    uint32_t version;
+    // This should be jit_actions_t, but we want to be specific about the
+    // bit-width.
+    uint32_t action_flag;
+    struct jit_code_entry *relevant_entry;
+    struct jit_code_entry *first_entry;
+  };
+
+}
+
+namespace llvm {
+
+class ELFSection;
+class Function;
+class TargetMachine;
+
+
+/// This class encapsulates information we want to send to the debugger.
+///
+struct DebugInfo {
+  uint8_t *FnStart;
+  uint8_t *FnEnd;
+  uint8_t *EhStart;
+  uint8_t *EhEnd;
+
+  DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {}
+};
+
+typedef DenseMap< const Function*, std::pair<std::string, jit_code_entry*> >
+  RegisteredFunctionsMap;
+
+/// This class registers debug info for JITed code with an attached debugger.
+/// Without proper debug info, GDB can't do things like source level debugging
+/// or even produce a proper stack trace on linux-x86_64.  To use this class,
+/// whenever a function is JITed, create a DebugInfo struct and pass it to the
+/// RegisterFunction method.  The method will then do whatever is necessary to
+/// inform the debugger about the JITed function.
+class JITDebugRegisterer {
+
+  TargetMachine &TM;
+
+  /// FnMap - A map of functions that have been registered to the associated
+  /// temporary files.  Used for cleanup.
+  RegisteredFunctionsMap FnMap;
+
+  /// MakeELF - Builds the ELF file in memory and returns a std::string that
+  /// contains the ELF.
+  std::string MakeELF(const Function *F, DebugInfo &I);
+
+public:
+  JITDebugRegisterer(TargetMachine &tm);
+
+  /// ~JITDebugRegisterer - Unregisters all code and frees symbol files.
+  ///
+  ~JITDebugRegisterer();
+
+  /// RegisterFunction - Register debug info for the given function with an
+  /// attached debugger.  Clients must call UnregisterFunction on all
+  /// registered functions before deleting them to free the associated symbol
+  /// file and unregister it from the debugger.
+  void RegisterFunction(const Function *F, DebugInfo &I);
+
+  /// UnregisterFunction - Unregister the debug info for the given function
+  /// from the debugger and free associated memory.
+  void UnregisterFunction(const Function *F);
+
+private:
+  /// UnregisterFunctionInternal - Unregister the debug info for the given
+  /// function from the debugger and delete any temporary files.  The private
+  /// version of this method does not remove the function from FnMap so that it
+  /// can be called while iterating over FnMap.
+  void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I);
+
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
new file mode 100644
index 000000000000..ddb0d5478596
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -0,0 +1,598 @@
+//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {}
+
+
+unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
+                                               JITCodeEmitter& jce,
+                                               unsigned char* StartFunction,
+                                               unsigned char* EndFunction,
+                                               unsigned char* &EHFramePtr) {
+  assert(MMI && "MachineModuleInfo not registered!");
+
+  const TargetMachine& TM = F.getTarget();
+  TD = TM.getTargetData();
+  stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
+  RI = TM.getRegisterInfo();
+  TFI = TM.getFrameLowering();
+  JCE = &jce;
+
+  unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
+                                                     EndFunction);
+
+  unsigned char* Result = 0;
+
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
+  EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
+
+  Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
+                       StartFunction, EndFunction, ExceptionTable);
+
+  return Result;
+}
+
+
+void
+JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
+                                const std::vector<MachineMove> &Moves) const {
+  unsigned PointerSize = TD->getPointerSize();
+  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+          PointerSize : -PointerSize;
+  MCSymbol *BaseLabel = 0;
+
+  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+    const MachineMove &Move = Moves[i];
+    MCSymbol *Label = Move.getLabel();
+
+    // Throw out move if the label is invalid.
+    if (Label && (*JCE->getLabelLocations())[Label] == 0)
+      continue;
+
+    intptr_t LabelPtr = 0;
+    if (Label) LabelPtr = JCE->getLabelAddress(Label);
+
+    const MachineLocation &Dst = Move.getDestination();
+    const MachineLocation &Src = Move.getSource();
+
+    // Advance row if new location.
+    if (BaseLabelPtr && Label && BaseLabel != Label) {
+      JCE->emitByte(dwarf::DW_CFA_advance_loc4);
+      JCE->emitInt32(LabelPtr - BaseLabelPtr);
+
+      BaseLabel = Label;
+      BaseLabelPtr = LabelPtr;
+    }
+
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      if (!Src.isReg()) {
+        if (Src.getReg() == MachineLocation::VirtualFP) {
+          JCE->emitByte(dwarf::DW_CFA_def_cfa_offset);
+        } else {
+          JCE->emitByte(dwarf::DW_CFA_def_cfa);
+          JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
+        }
+
+        JCE->emitULEB128Bytes(-Src.getOffset());
+      } else {
+        llvm_unreachable("Machine move not supported yet.");
+      }
+    } else if (Src.isReg() &&
+      Src.getReg() == MachineLocation::VirtualFP) {
+      if (Dst.isReg()) {
+        JCE->emitByte(dwarf::DW_CFA_def_cfa_register);
+        JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true));
+      } else {
+        llvm_unreachable("Machine move not supported yet.");
+      }
+    } else {
+      unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
+      int Offset = Dst.getOffset() / stackGrowth;
+
+      if (Offset < 0) {
+        JCE->emitByte(dwarf::DW_CFA_offset_extended_sf);
+        JCE->emitULEB128Bytes(Reg);
+        JCE->emitSLEB128Bytes(Offset);
+      } else if (Reg < 64) {
+        JCE->emitByte(dwarf::DW_CFA_offset + Reg);
+        JCE->emitULEB128Bytes(Offset);
+      } else {
+        JCE->emitByte(dwarf::DW_CFA_offset_extended);
+        JCE->emitULEB128Bytes(Reg);
+        JCE->emitULEB128Bytes(Offset);
+      }
+    }
+  }
+}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+static unsigned SharedTypeIds(const LandingPadInfo *L,
+                              const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+  unsigned Count = 0;
+
+  for (; Count != MinSize; ++Count)
+    if (LIds[Count] != RIds[Count])
+      return Count;
+
+  return Count;
+}
+
+
+/// PadLT - Order landing pads lexicographically by type id.
+static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+  for (unsigned i = 0; i != MinSize; ++i)
+    if (LIds[i] != RIds[i])
+      return LIds[i] < RIds[i];
+
+  return LSize < RSize;
+}
+
+namespace {
+
+/// ActionEntry - Structure describing an entry in the actions table.
+struct ActionEntry {
+  int ValueForTypeID; // The value to write - may not be equal to the type id.
+  int NextAction;
+  struct ActionEntry *Previous;
+};
+
+/// PadRange - Structure holding a try-range and the associated landing pad.
+struct PadRange {
+  // The index of the landing pad.
+  unsigned PadIndex;
+  // The index of the begin and end labels in the landing pad's label lists.
+  unsigned RangeIndex;
+};
+
+typedef DenseMap<MCSymbol*, PadRange> RangeMapType;
+
+/// CallSiteEntry - Structure describing an entry in the call-site table.
+struct CallSiteEntry {
+  MCSymbol *BeginLabel; // zero indicates the start of the function.
+  MCSymbol *EndLabel;   // zero indicates the end of the function.
+  MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
+  unsigned Action;
+};
+
+}
+
+unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
+                                         unsigned char* StartFunction,
+                                         unsigned char* EndFunction) const {
+  assert(MMI && "MachineModuleInfo not registered!");
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads(JCE->getLabelLocations());
+
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  if (PadInfos.empty()) return 0;
+
+  // Sort the landing pads in order of their type ids.  This is used to fold
+  // duplicate actions.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+    LandingPads.push_back(&PadInfos[i]);
+  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+  // Negative type ids index into FilterIds, positive type ids index into
+  // TypeInfos.  The value written for a positive type id is just the type
+  // id itself.  For a negative type id, however, the value written is the
+  // (negative) byte offset of the corresponding FilterIds entry.  The byte
+  // offset is usually equal to the type id, because the FilterIds entries
+  // are written using a variable width encoding which outputs one byte per
+  // entry as long as the value written is not too large, but can differ.
+  // This kind of complication does not occur for positive type ids because
+  // type infos are output using a fixed width encoding.
+  // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+  SmallVector<int, 16> FilterOffsets;
+  FilterOffsets.reserve(FilterIds.size());
+  int Offset = -1;
+  for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+    E = FilterIds.end(); I != E; ++I) {
+    FilterOffsets.push_back(Offset);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
+  }
+
+  // Compute the actions table and gather the first action index for each
+  // landing pad site.
+  SmallVector<ActionEntry, 32> Actions;
+  SmallVector<unsigned, 64> FirstActions;
+  FirstActions.reserve(LandingPads.size());
+
+  int FirstAction = 0;
+  unsigned SizeActions = 0;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LP = LandingPads[i];
+    const std::vector<int> &TypeIds = LP->TypeIds;
+    const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+    unsigned SizeSiteActions = 0;
+
+    if (NumShared < TypeIds.size()) {
+      unsigned SizeAction = 0;
+      ActionEntry *PrevAction = 0;
+
+      if (NumShared) {
+        const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+        assert(Actions.size());
+        PrevAction = &Actions.back();
+        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+          SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+          SizeAction += -PrevAction->NextAction;
+          PrevAction = PrevAction->Previous;
+        }
+      }
+
+      // Compute the actions.
+      for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+        int TypeID = TypeIds[I];
+        assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+        int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+        SizeSiteActions += SizeAction;
+
+        ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+        Actions.push_back(Action);
+
+        PrevAction = &Actions.back();
+      }
+
+      // Record the first action of the landing pad site.
+      FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+    } // else identical - re-use previous FirstAction
+
+    FirstActions.push_back(FirstAction);
+
+    // Compute this sites contribution to size.
+    SizeActions += SizeSiteActions;
+  }
+
+  // Compute the call-site table.  Entries must be ordered by address.
+  SmallVector<CallSiteEntry, 64> CallSites;
+
+  RangeMapType PadMap;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LandingPad = LandingPads[i];
+    for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+      PadRange P = { i, j };
+      PadMap[BeginLabel] = P;
+    }
+  }
+
+  bool MayThrow = false;
+  MCSymbol *LastLabel = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+        I != E; ++I) {
+    for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+          MI != E; ++MI) {
+      if (!MI->isLabel()) {
+        MayThrow |= MI->getDesc().isCall();
+        continue;
+      }
+
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+      assert(BeginLabel && "Invalid label!");
+
+      if (BeginLabel == LastLabel)
+        MayThrow = false;
+
+      RangeMapType::iterator L = PadMap.find(BeginLabel);
+
+      if (L == PadMap.end())
+        continue;
+
+      PadRange P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+              "Inconsistent landing pad map!");
+
+      // If some instruction between the previous try-range and this one may
+      // throw, create a call-site entry with no landing pad for the region
+      // between the try-ranges.
+      if (MayThrow) {
+        CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+        CallSites.push_back(Site);
+      }
+
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+      CallSiteEntry Site = {BeginLabel, LastLabel,
+        LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
+
+      assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
+              "Invalid landing pad!");
+
+      // Try to merge with the previous call-site.
+      if (CallSites.size()) {
+        CallSiteEntry &Prev = CallSites.back();
+        if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+          // Extend the range of the previous entry.
+          Prev.EndLabel = Site.EndLabel;
+          continue;
+        }
+      }
+
+      // Otherwise, create a new call-site.
+      CallSites.push_back(Site);
+    }
+  }
+  // If some instruction between the previous try-range and the end of the
+  // function may throw, create a call-site entry with no landing pad for the
+  // region following the try-range.
+  if (MayThrow) {
+    CallSiteEntry Site = {LastLabel, 0, 0, 0};
+    CallSites.push_back(Site);
+  }
+
+  // Final tallies.
+  unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
+                                            sizeof(int32_t) + // Site length.
+                                            sizeof(int32_t)); // Landing pad.
+  for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+
+  unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+
+  unsigned TypeOffset = sizeof(int8_t) + // Call site format
+                        // Call-site table length
+                        MCAsmInfo::getULEB128Size(SizeSites) +
+                        SizeSites + SizeActions + SizeTypes;
+
+  // Begin the exception table.
+  JCE->emitAlignmentWithFill(4, 0);
+  // Asm->EOL("Padding");
+
+  unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue();
+
+  // Emit the header.
+  JCE->emitByte(dwarf::DW_EH_PE_omit);
+  // Asm->EOL("LPStart format (DW_EH_PE_omit)");
+  JCE->emitByte(dwarf::DW_EH_PE_absptr);
+  // Asm->EOL("TType format (DW_EH_PE_absptr)");
+  JCE->emitULEB128Bytes(TypeOffset);
+  // Asm->EOL("TType base offset");
+  JCE->emitByte(dwarf::DW_EH_PE_udata4);
+  // Asm->EOL("Call site format (DW_EH_PE_udata4)");
+  JCE->emitULEB128Bytes(SizeSites);
+  // Asm->EOL("Call-site table length");
+
+  // Emit the landing pad site information.
+  for (unsigned i = 0; i < CallSites.size(); ++i) {
+    CallSiteEntry &S = CallSites[i];
+    intptr_t BeginLabelPtr = 0;
+    intptr_t EndLabelPtr = 0;
+
+    if (!S.BeginLabel) {
+      BeginLabelPtr = (intptr_t)StartFunction;
+      JCE->emitInt32(0);
+    } else {
+      BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel);
+      JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction);
+    }
+
+    // Asm->EOL("Region start");
+
+    if (!S.EndLabel)
+      EndLabelPtr = (intptr_t)EndFunction;
+    else
+      EndLabelPtr = JCE->getLabelAddress(S.EndLabel);
+
+    JCE->emitInt32(EndLabelPtr - BeginLabelPtr);
+    //Asm->EOL("Region length");
+
+    if (!S.PadLabel) {
+      JCE->emitInt32(0);
+    } else {
+      unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel);
+      JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction);
+    }
+    // Asm->EOL("Landing pad");
+
+    JCE->emitULEB128Bytes(S.Action);
+    // Asm->EOL("Action");
+  }
+
+  // Emit the actions.
+  for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+    ActionEntry &Action = Actions[I];
+
+    JCE->emitSLEB128Bytes(Action.ValueForTypeID);
+    //Asm->EOL("TypeInfo index");
+    JCE->emitSLEB128Bytes(Action.NextAction);
+    //Asm->EOL("Next action");
+  }
+
+  // Emit the type ids.
+  for (unsigned M = TypeInfos.size(); M; --M) {
+    const GlobalVariable *GV = TypeInfos[M - 1];
+
+    if (GV) {
+      if (TD->getPointerSize() == sizeof(int32_t))
+        JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+      else
+        JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+    } else {
+      if (TD->getPointerSize() == sizeof(int32_t))
+        JCE->emitInt32(0);
+      else
+        JCE->emitInt64(0);
+    }
+    // Asm->EOL("TypeInfo");
+  }
+
+  // Emit the filter typeids.
+  for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+    unsigned TypeID = FilterIds[j];
+    JCE->emitULEB128Bytes(TypeID);
+    //Asm->EOL("Filter TypeInfo index");
+  }
+
+  JCE->emitAlignmentWithFill(4, 0);
+
+  return DwarfExceptionTable;
+}
+
+unsigned char*
+JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
+  unsigned PointerSize = TD->getPointerSize();
+  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+          PointerSize : -PointerSize;
+
+  unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
+  // EH Common Frame header
+  JCE->allocateSpace(4, 0);
+  unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+  JCE->emitInt32((int)0);
+  JCE->emitByte(dwarf::DW_CIE_VERSION);
+  JCE->emitString(Personality ? "zPLR" : "zR");
+  JCE->emitULEB128Bytes(1);
+  JCE->emitSLEB128Bytes(stackGrowth);
+  JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+  if (Personality) {
+    // Augmentation Size: 3 small ULEBs of one byte each, and the personality
+    // function which size is PointerSize.
+    JCE->emitULEB128Bytes(3 + PointerSize);
+
+    // We set the encoding of the personality as direct encoding because we use
+    // the function pointer. The encoding is not relative because the current
+    // PC value may be bigger than the personality function pointer.
+    if (PointerSize == 4) {
+      JCE->emitByte(dwarf::DW_EH_PE_sdata4);
+      JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality)));
+    } else {
+      JCE->emitByte(dwarf::DW_EH_PE_sdata8);
+      JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
+    }
+
+    // LSDA encoding: This must match the encoding used in EmitEHFrame ()
+    if (PointerSize == 4)
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+    else
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
+    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+  } else {
+    JCE->emitULEB128Bytes(1);
+    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+  }
+
+  std::vector<MachineMove> Moves;
+  TFI->getInitialFrameState(Moves);
+  EmitFrameMoves(0, Moves);
+
+  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+  JCE->emitInt32At((uintptr_t*)StartCommonPtr,
+                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+                               FrameCommonBeginPtr));
+
+  return StartCommonPtr;
+}
+
+
+unsigned char*
+JITDwarfEmitter::EmitEHFrame(const Function* Personality,
+                             unsigned char* StartCommonPtr,
+                             unsigned char* StartFunction,
+                             unsigned char* EndFunction,
+                             unsigned char* ExceptionTable) const {
+  unsigned PointerSize = TD->getPointerSize();
+
+  // EH frame header.
+  unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
+  JCE->allocateSpace(4, 0);
+  unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+  // FDE CIE Offset
+  JCE->emitInt32(FrameBeginPtr - StartCommonPtr);
+  JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue());
+  JCE->emitInt32(EndFunction - StartFunction);
+
+  // If there is a personality and landing pads then point to the language
+  // specific data area in the exception table.
+  if (Personality) {
+    JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8);
+
+    if (PointerSize == 4) {
+      if (!MMI->getLandingPads().empty())
+        JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+      else
+        JCE->emitInt32((int)0);
+    } else {
+      if (!MMI->getLandingPads().empty())
+        JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+      else
+        JCE->emitInt64((int)0);
+    }
+  } else {
+    JCE->emitULEB128Bytes(0);
+  }
+
+  // Indicate locations of function specific  callee saved registers in
+  // frame.
+  EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
+
+  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+  // Indicate the size of the table
+  JCE->emitInt32At((uintptr_t*)StartEHPtr,
+                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+                               StartEHPtr));
+
+  // Double zeroes for the unwind runtime
+  if (PointerSize == 8) {
+    JCE->emitInt64(0);
+    JCE->emitInt64(0);
+  } else {
+    JCE->emitInt32(0);
+    JCE->emitInt32(0);
+  }
+
+  return StartEHPtr;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
new file mode 100644
index 000000000000..e1d00454d8d2
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -0,0 +1,73 @@
+//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+
+namespace llvm {
+
+class Function;
+class JITCodeEmitter;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineMove;
+class TargetData;
+class TargetFrameLowering;
+class TargetMachine;
+class TargetRegisterInfo;
+
+class JITDwarfEmitter {
+  const TargetData* TD;
+  JITCodeEmitter* JCE;
+  const TargetRegisterInfo* RI;
+  const TargetFrameLowering *TFI;
+  MachineModuleInfo* MMI;
+  JIT& Jit;
+  bool stackGrowthDirection;
+
+  unsigned char* EmitExceptionTable(MachineFunction* MF,
+                                    unsigned char* StartFunction,
+                                    unsigned char* EndFunction) const;
+
+  void EmitFrameMoves(intptr_t BaseLabelPtr,
+                      const std::vector<MachineMove> &Moves) const;
+
+  unsigned char* EmitCommonEHFrame(const Function* Personality) const;
+
+  unsigned char* EmitEHFrame(const Function* Personality,
+                             unsigned char* StartBufferPtr,
+                             unsigned char* StartFunction,
+                             unsigned char* EndFunction,
+                             unsigned char* ExceptionTable) const;
+
+public:
+
+  JITDwarfEmitter(JIT& jit);
+
+  unsigned char* EmitDwarfTable(MachineFunction& F,
+                                JITCodeEmitter& JCE,
+                                unsigned char* StartFunction,
+                                unsigned char* EndFunction,
+                                unsigned char* &EHFramePtr);
+
+
+  void setModuleInfo(MachineModuleInfo* Info) {
+    MMI = Info;
+  }
+};
+
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
new file mode 100644
index 000000000000..d046b8aea641
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -0,0 +1,1308 @@
+//===-- JITEmitter.cpp - Write machine code to executable memory ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineCodeEmitter object that is used by the JIT to
+// write machine code to memory and remember where relocatable values are.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "JIT.h"
+#include "JITDebugRegisterer.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
+#include <algorithm>
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumBytes, "Number of bytes of machine code compiled");
+STATISTIC(NumRelos, "Number of relocations applied");
+STATISTIC(NumRetries, "Number of retries with more memory");
+
+
+// A declaration may stop being a declaration once it's fully read from bitcode.
+// This function returns true if F is fully read and is still a declaration.
+static bool isNonGhostDeclaration(const Function *F) {
+  return F->isDeclaration() && !F->isMaterializable();
+}
+
+//===----------------------------------------------------------------------===//
+// JIT lazy compilation code.
+//
+namespace {
+  class JITEmitter;
+  class JITResolverState;
+
+  template<typename ValueTy>
+  struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
+    typedef JITResolverState *ExtraData;
+    static void onRAUW(JITResolverState *, Value *Old, Value *New) {
+      assert(false && "The JIT doesn't know how to handle a"
+             " RAUW on a value it has emitted.");
+    }
+  };
+
+  struct CallSiteValueMapConfig : public NoRAUWValueMapConfig<Function*> {
+    typedef JITResolverState *ExtraData;
+    static void onDelete(JITResolverState *JRS, Function *F);
+  };
+
+  class JITResolverState {
+  public:
+    typedef ValueMap<Function*, void*, NoRAUWValueMapConfig<Function*> >
+      FunctionToLazyStubMapTy;
+    typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy;
+    typedef ValueMap<Function *, SmallPtrSet<void*, 1>,
+                     CallSiteValueMapConfig> FunctionToCallSitesMapTy;
+    typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
+  private:
+    /// FunctionToLazyStubMap - Keep track of the lazy stub created for a
+    /// particular function so that we can reuse them if necessary.
+    FunctionToLazyStubMapTy FunctionToLazyStubMap;
+
+    /// CallSiteToFunctionMap - Keep track of the function that each lazy call
+    /// site corresponds to, and vice versa.
+    CallSiteToFunctionMapTy CallSiteToFunctionMap;
+    FunctionToCallSitesMapTy FunctionToCallSitesMap;
+
+    /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a
+    /// particular GlobalVariable so that we can reuse them if necessary.
+    GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
+
+    /// Instance of the JIT this ResolverState serves.
+    JIT *TheJIT;
+
+  public:
+    JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
+                                 FunctionToCallSitesMap(this),
+                                 TheJIT(jit) {}
+
+    FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
+      const MutexGuard& locked) {
+      assert(locked.holds(TheJIT->lock));
+      return FunctionToLazyStubMap;
+    }
+
+    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) {
+      assert(lck.holds(TheJIT->lock));
+      return GlobalToIndirectSymMap;
+    }
+
+    std::pair<void *, Function *> LookupFunctionFromCallSite(
+        const MutexGuard &locked, void *CallSite) const {
+      assert(locked.holds(TheJIT->lock));
+
+      // The address given to us for the stub may not be exactly right, it
+      // might be a little bit after the stub.  As such, use upper_bound to
+      // find it.
+      CallSiteToFunctionMapTy::const_iterator I =
+        CallSiteToFunctionMap.upper_bound(CallSite);
+      assert(I != CallSiteToFunctionMap.begin() &&
+             "This is not a known call site!");
+      --I;
+      return *I;
+    }
+
+    void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
+      assert(locked.holds(TheJIT->lock));
+
+      bool Inserted = CallSiteToFunctionMap.insert(
+          std::make_pair(CallSite, F)).second;
+      (void)Inserted;
+      assert(Inserted && "Pair was already in CallSiteToFunctionMap");
+      FunctionToCallSitesMap[F].insert(CallSite);
+    }
+
+    void EraseAllCallSitesForPrelocked(Function *F);
+
+    // Erases _all_ call sites regardless of their function.  This is used to
+    // unregister the stub addresses from the StubToResolverMap in
+    // ~JITResolver().
+    void EraseAllCallSitesPrelocked();
+  };
+
+  /// JITResolver - Keep track of, and resolve, call sites for functions that
+  /// have not yet been compiled.
+  class JITResolver {
+    typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy;
+    typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy;
+    typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy;
+
+    /// LazyResolverFn - The target lazy resolver function that we actually
+    /// rewrite instructions to use.
+    TargetJITInfo::LazyResolverFn LazyResolverFn;
+
+    JITResolverState state;
+
+    /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap
+    /// for external functions.  TODO: Of course, external functions don't need
+    /// a lazy stub.  It's actually here to make it more likely that far calls
+    /// succeed, but no single stub can guarantee that.  I'll remove this in a
+    /// subsequent checkin when I actually fix far calls.
+    std::map<void*, void*> ExternalFnToStubMap;
+
+    /// revGOTMap - map addresses to indexes in the GOT
+    std::map<void*, unsigned> revGOTMap;
+    unsigned nextGOTIndex;
+
+    JITEmitter &JE;
+
+    /// Instance of JIT corresponding to this Resolver.
+    JIT *TheJIT;
+
+  public:
+    explicit JITResolver(JIT &jit, JITEmitter &je)
+      : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
+      LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
+    }
+
+    ~JITResolver();
+
+    /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
+    /// lazy-compilation stub if it has already been created.
+    void *getLazyFunctionStubIfAvailable(Function *F);
+
+    /// getLazyFunctionStub - This returns a pointer to a function's
+    /// lazy-compilation stub, creating one on demand as needed.
+    void *getLazyFunctionStub(Function *F);
+
+    /// getExternalFunctionStub - Return a stub for the function at the
+    /// specified address, created lazily on demand.
+    void *getExternalFunctionStub(void *FnAddr);
+
+    /// getGlobalValueIndirectSym - Return an indirect symbol containing the
+    /// specified GV address.
+    void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
+
+    /// getGOTIndexForAddress - Return a new or existing index in the GOT for
+    /// an address.  This function only manages slots, it does not manage the
+    /// contents of the slots or the memory associated with the GOT.
+    unsigned getGOTIndexForAddr(void *addr);
+
+    /// JITCompilerFn - This function is called to resolve a stub to a compiled
+    /// address.  If the LLVM Function corresponding to the stub has not yet
+    /// been compiled, this function compiles it first.
+    static void *JITCompilerFn(void *Stub);
+  };
+
+  class StubToResolverMapTy {
+    /// Map a stub address to a specific instance of a JITResolver so that
+    /// lazily-compiled functions can find the right resolver to use.
+    ///
+    /// Guarded by Lock.
+    std::map<void*, JITResolver*> Map;
+
+    /// Guards Map from concurrent accesses.
+    mutable sys::Mutex Lock;
+
+  public:
+    /// Registers a Stub to be resolved by Resolver.
+    void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
+      MutexGuard guard(Lock);
+      Map.insert(std::make_pair(Stub, Resolver));
+    }
+    /// Unregisters the Stub when it's invalidated.
+    void UnregisterStubResolver(void *Stub) {
+      MutexGuard guard(Lock);
+      Map.erase(Stub);
+    }
+    /// Returns the JITResolver instance that owns the Stub.
+    JITResolver *getResolverFromStub(void *Stub) const {
+      MutexGuard guard(Lock);
+      // The address given to us for the stub may not be exactly right, it might
+      // be a little bit after the stub.  As such, use upper_bound to find it.
+      // This is the same trick as in LookupFunctionFromCallSite from
+      // JITResolverState.
+      std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
+      assert(I != Map.begin() && "This is not a known stub!");
+      --I;
+      return I->second;
+    }
+    /// True if any stubs refer to the given resolver. Only used in an assert().
+    /// O(N)
+    bool ResolverHasStubs(JITResolver* Resolver) const {
+      MutexGuard guard(Lock);
+      for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
+             E = Map.end(); I != E; ++I) {
+        if (I->second == Resolver)
+          return true;
+      }
+      return false;
+    }
+  };
+  /// This needs to be static so that a lazy call stub can access it with no
+  /// context except the address of the stub.
+  ManagedStatic<StubToResolverMapTy> StubToResolverMap;
+
+  /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
+  /// used to output functions to memory for execution.
+  class JITEmitter : public JITCodeEmitter {
+    JITMemoryManager *MemMgr;
+
+    // When outputting a function stub in the context of some other function, we
+    // save BufferBegin/BufferEnd/CurBufferPtr here.
+    uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+
+    // When reattempting to JIT a function after running out of space, we store
+    // the estimated size of the function we're trying to JIT here, so we can
+    // ask the memory manager for at least this much space.  When we
+    // successfully emit the function, we reset this back to zero.
+    uintptr_t SizeEstimate;
+
+    /// Relocations - These are the relocations that the function needs, as
+    /// emitted.
+    std::vector<MachineRelocation> Relocations;
+
+    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+    /// It is filled in by the StartMachineBasicBlock callback and queried by
+    /// the getMachineBasicBlockAddress callback.
+    std::vector<uintptr_t> MBBLocations;
+
+    /// ConstantPool - The constant pool for the current function.
+    ///
+    MachineConstantPool *ConstantPool;
+
+    /// ConstantPoolBase - A pointer to the first entry in the constant pool.
+    ///
+    void *ConstantPoolBase;
+
+    /// ConstPoolAddresses - Addresses of individual constant pool entries.
+    ///
+    SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
+    /// JumpTable - The jump tables for the current function.
+    ///
+    MachineJumpTableInfo *JumpTable;
+
+    /// JumpTableBase - A pointer to the first entry in the jump table.
+    ///
+    void *JumpTableBase;
+
+    /// Resolver - This contains info about the currently resolved functions.
+    JITResolver Resolver;
+
+    /// DE - The dwarf emitter for the jit.
+    OwningPtr<JITDwarfEmitter> DE;
+
+    /// DR - The debug registerer for the jit.
+    OwningPtr<JITDebugRegisterer> DR;
+
+    /// LabelLocations - This vector is a mapping from Label ID's to their
+    /// address.
+    DenseMap<MCSymbol*, uintptr_t> LabelLocations;
+
+    /// MMI - Machine module info for exception informations
+    MachineModuleInfo* MMI;
+
+    // CurFn - The llvm function being emitted.  Only valid during
+    // finishFunction().
+    const Function *CurFn;
+
+    /// Information about emitted code, which is passed to the
+    /// JITEventListeners.  This is reset in startFunction and used in
+    /// finishFunction.
+    JITEvent_EmittedFunctionDetails EmissionDetails;
+
+    struct EmittedCode {
+      void *FunctionBody;  // Beginning of the function's allocation.
+      void *Code;  // The address the function's code actually starts at.
+      void *ExceptionTable;
+      EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+    };
+    struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
+      typedef JITEmitter *ExtraData;
+      static void onDelete(JITEmitter *, const Function*);
+      static void onRAUW(JITEmitter *, const Function*, const Function*);
+    };
+    ValueMap<const Function *, EmittedCode,
+             EmittedFunctionConfig> EmittedFunctions;
+
+    DebugLoc PrevDL;
+
+    /// Instance of the JIT
+    JIT *TheJIT;
+
+  public:
+    JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
+      : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
+        EmittedFunctions(this), TheJIT(&jit) {
+      MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
+      if (jit.getJITInfo().needsGOT()) {
+        MemMgr->AllocateGOT();
+        DEBUG(dbgs() << "JIT is managing a GOT\n");
+      }
+
+      if (JITExceptionHandling || JITEmitDebugInfo) {
+        DE.reset(new JITDwarfEmitter(jit));
+      }
+      if (JITEmitDebugInfo) {
+        DR.reset(new JITDebugRegisterer(TM));
+      }
+    }
+    ~JITEmitter() {
+      delete MemMgr;
+    }
+
+    /// classof - Methods for support type inquiry through isa, cast, and
+    /// dyn_cast:
+    ///
+    static inline bool classof(const MachineCodeEmitter*) { return true; }
+
+    JITResolver &getJITResolver() { return Resolver; }
+
+    virtual void startFunction(MachineFunction &F);
+    virtual bool finishFunction(MachineFunction &F);
+
+    void emitConstantPool(MachineConstantPool *MCP);
+    void initJumpTableInfo(MachineJumpTableInfo *MJTI);
+    void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
+
+    void startGVStub(const GlobalValue* GV,
+                     unsigned StubSize, unsigned Alignment = 1);
+    void startGVStub(void *Buffer, unsigned StubSize);
+    void finishGVStub();
+    virtual void *allocIndirectGV(const GlobalValue *GV,
+                                  const uint8_t *Buffer, size_t Size,
+                                  unsigned Alignment);
+
+    /// allocateSpace - Reserves space in the current block if any, or
+    /// allocate a new one of the given size.
+    virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
+
+    /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
+    /// this method does not allocate memory in the current output buffer,
+    /// because a global may live longer than the current function.
+    virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
+    virtual void addRelocation(const MachineRelocation &MR) {
+      Relocations.push_back(MR);
+    }
+
+    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+        MBBLocations.resize((MBB->getNumber()+1)*2);
+      MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+      if (MBB->hasAddressTaken())
+        TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(),
+                                       (void*)getCurrentPCValue());
+      DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+                   << (void*) getCurrentPCValue() << "]\n");
+    }
+
+    virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
+    virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
+
+    virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const{
+      assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+      return MBBLocations[MBB->getNumber()];
+    }
+
+    /// retryWithMoreMemory - Log a retry and deallocate all memory for the
+    /// given function.  Increase the minimum allocation size so that we get
+    /// more memory next time.
+    void retryWithMoreMemory(MachineFunction &F);
+
+    /// deallocateMemForFunction - Deallocate all memory for the specified
+    /// function body.
+    void deallocateMemForFunction(const Function *F);
+
+    virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
+
+    virtual void emitLabel(MCSymbol *Label) {
+      LabelLocations[Label] = getCurrentPCValue();
+    }
+
+    virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() {
+      return &LabelLocations;
+    }
+
+    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+      assert(LabelLocations.count(Label) && "Label not emitted!");
+      return LabelLocations.find(Label)->second;
+    }
+
+    virtual void setModuleInfo(MachineModuleInfo* Info) {
+      MMI = Info;
+      if (DE.get()) DE->setModuleInfo(Info);
+    }
+
+  private:
+    void *getPointerToGlobal(GlobalValue *GV, void *Reference,
+                             bool MayNeedFarStub);
+    void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
+  };
+}
+
+void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
+  JRS->EraseAllCallSitesForPrelocked(F);
+}
+
+void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
+  FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
+  if (F2C == FunctionToCallSitesMap.end())
+    return;
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
+         E = F2C->second.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(*I);
+    bool Erased = CallSiteToFunctionMap.erase(*I);
+    (void)Erased;
+    assert(Erased && "Missing call site->function mapping");
+  }
+  FunctionToCallSitesMap.erase(F2C);
+}
+
+void JITResolverState::EraseAllCallSitesPrelocked() {
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (CallSiteToFunctionMapTy::const_iterator
+         I = CallSiteToFunctionMap.begin(),
+         E = CallSiteToFunctionMap.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(I->first);
+  }
+  CallSiteToFunctionMap.clear();
+  FunctionToCallSitesMap.clear();
+}
+
+JITResolver::~JITResolver() {
+  // No need to lock because we're in the destructor, and state isn't shared.
+  state.EraseAllCallSitesPrelocked();
+  assert(!StubToResolverMap->ResolverHasStubs(this) &&
+         "Resolver destroyed with stubs still alive.");
+}
+
+/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
+/// if it has already been created.
+void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
+  MutexGuard locked(TheJIT->lock);
+
+  // If we already have a stub for this function, recycle it.
+  return state.getFunctionToLazyStubMap(locked).lookup(F);
+}
+
+/// getFunctionStub - This returns a pointer to a function stub, creating
+/// one on demand as needed.
+void *JITResolver::getLazyFunctionStub(Function *F) {
+  MutexGuard locked(TheJIT->lock);
+
+  // If we already have a lazy stub for this function, recycle it.
+  void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
+  if (Stub) return Stub;
+
+  // Call the lazy resolver function if we are JIT'ing lazily.  Otherwise we
+  // must resolve the symbol now.
+  void *Actual = TheJIT->isCompilingLazily()
+    ? (void *)(intptr_t)LazyResolverFn : (void *)0;
+
+  // If this is an external declaration, attempt to resolve the address now
+  // to place in the stub.
+  if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) {
+    Actual = TheJIT->getPointerToFunction(F);
+
+    // If we resolved the symbol to a null address (eg. a weak external)
+    // don't emit a stub. Return a null pointer to the application.
+    if (!Actual) return 0;
+  }
+
+  TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+  JE.startGVStub(F, SL.Size, SL.Alignment);
+  // Codegen a new stub, calling the lazy resolver or the actual address of the
+  // external function, if it was resolved.
+  Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
+  JE.finishGVStub();
+
+  if (Actual != (void*)(intptr_t)LazyResolverFn) {
+    // If we are getting the stub for an external function, we really want the
+    // address of the stub in the GlobalAddressMap for the JIT, not the address
+    // of the external function.
+    TheJIT->updateGlobalMapping(F, Stub);
+  }
+
+  DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
+        << F->getName() << "'\n");
+
+  if (TheJIT->isCompilingLazily()) {
+    // Register this JITResolver as the one corresponding to this call site so
+    // JITCompilerFn will be able to find it.
+    StubToResolverMap->RegisterStubResolver(Stub, this);
+
+    // Finally, keep track of the stub-to-Function mapping so that the
+    // JITCompilerFn knows which function to compile!
+    state.AddCallSite(locked, Stub, F);
+  } else if (!Actual) {
+    // If we are JIT'ing non-lazily but need to call a function that does not
+    // exist yet, add it to the JIT's work list so that we can fill in the
+    // stub address later.
+    assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
+           "'Actual' should have been set above.");
+    TheJIT->addPendingFunction(F);
+  }
+
+  return Stub;
+}
+
+/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
+/// GV address.
+void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
+  MutexGuard locked(TheJIT->lock);
+
+  // If we already have a stub for this global variable, recycle it.
+  void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+  if (IndirectSym) return IndirectSym;
+
+  // Otherwise, codegen a new indirect symbol.
+  IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
+                                                                JE);
+
+  DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym
+        << "] for GV '" << GV->getName() << "'\n");
+
+  return IndirectSym;
+}
+
+/// getExternalFunctionStub - Return a stub for the function at the
+/// specified address, created lazily on demand.
+void *JITResolver::getExternalFunctionStub(void *FnAddr) {
+  // If we already have a stub for this function, recycle it.
+  void *&Stub = ExternalFnToStubMap[FnAddr];
+  if (Stub) return Stub;
+
+  TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+  JE.startGVStub(0, SL.Size, SL.Alignment);
+  Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
+  JE.finishGVStub();
+
+  DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub
+               << "] for external function at '" << FnAddr << "'\n");
+  return Stub;
+}
+
+unsigned JITResolver::getGOTIndexForAddr(void* addr) {
+  unsigned idx = revGOTMap[addr];
+  if (!idx) {
+    idx = ++nextGOTIndex;
+    revGOTMap[addr] = idx;
+    DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr ["
+                 << addr << "]\n");
+  }
+  return idx;
+}
+
+/// JITCompilerFn - This function is called when a lazy compilation stub has
+/// been entered.  It looks up which function this stub corresponds to, compiles
+/// it if necessary, then returns the resultant function pointer.
+void *JITResolver::JITCompilerFn(void *Stub) {
+  JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
+  assert(JR && "Unable to find the corresponding JITResolver to the call site");
+
+  Function* F = 0;
+  void* ActualPtr = 0;
+
+  {
+    // Only lock for getting the Function. The call getPointerToFunction made
+    // in this function might trigger function materializing, which requires
+    // JIT lock to be unlocked.
+    MutexGuard locked(JR->TheJIT->lock);
+
+    // The address given to us for the stub may not be exactly right, it might
+    // be a little bit after the stub.  As such, use upper_bound to find it.
+    std::pair<void*, Function*> I =
+      JR->state.LookupFunctionFromCallSite(locked, Stub);
+    F = I.second;
+    ActualPtr = I.first;
+  }
+
+  // If we have already code generated the function, just return the address.
+  void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
+
+  if (!Result) {
+    // Otherwise we don't have it, do lazy compilation now.
+
+    // If lazy compilation is disabled, emit a useful error message and abort.
+    if (!JR->TheJIT->isCompilingLazily()) {
+      report_fatal_error("LLVM JIT requested to do lazy compilation of"
+                         " function '"
+                        + F->getName() + "' when lazy compiles are disabled!");
+    }
+
+    DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName()
+          << "' In stub ptr = " << Stub << " actual ptr = "
+          << ActualPtr << "\n");
+
+    Result = JR->TheJIT->getPointerToFunction(F);
+  }
+
+  // Reacquire the lock to update the GOT map.
+  MutexGuard locked(JR->TheJIT->lock);
+
+  // We might like to remove the call site from the CallSiteToFunction map, but
+  // we can't do that! Multiple threads could be stuck, waiting to acquire the
+  // lock above. As soon as the 1st function finishes compiling the function,
+  // the next one will be released, and needs to be able to find the function it
+  // needs to call.
+
+  // FIXME: We could rewrite all references to this stub if we knew them.
+
+  // What we will do is set the compiled function address to map to the
+  // same GOT entry as the stub so that later clients may update the GOT
+  // if they see it still using the stub address.
+  // Note: this is done so the Resolver doesn't have to manage GOT memory
+  // Do this without allocating map space if the target isn't using a GOT
+  if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
+    JR->revGOTMap[Result] = JR->revGOTMap[Stub];
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// JITEmitter code.
+//
+void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
+                                     bool MayNeedFarStub) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return TheJIT->getOrEmitGlobalVariable(GV);
+
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return TheJIT->getPointerToGlobal(GA->resolveAliasedGlobal(false));
+
+  // If we have already compiled the function, return a pointer to its body.
+  Function *F = cast<Function>(V);
+
+  void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F);
+  if (FnStub) {
+    // Return the function stub if it's already created.  We do this first so
+    // that we're returning the same address for the function as any previous
+    // call.  TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
+    // close enough to call.
+    return FnStub;
+  }
+
+  // If we know the target can handle arbitrary-distance calls, try to
+  // return a direct pointer.
+  if (!MayNeedFarStub) {
+    // If we have code, go ahead and return that.
+    void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+    if (ResultPtr) return ResultPtr;
+
+    // If this is an external function pointer, we can force the JIT to
+    // 'compile' it, which really just adds it to the map.
+    if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage())
+      return TheJIT->getPointerToFunction(F);
+  }
+
+  // Otherwise, we may need a to emit a stub, and, conservatively, we always do
+  // so.  Note that it's possible to return null from getLazyFunctionStub in the
+  // case of a weak extern that fails to resolve.
+  return Resolver.getLazyFunctionStub(F);
+}
+
+void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
+  // Make sure GV is emitted first, and create a stub containing the fully
+  // resolved address.
+  void *GVAddress = getPointerToGlobal(V, Reference, false);
+  void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
+  return StubAddr;
+}
+
+void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
+  if (DL.isUnknown()) return;
+  if (!BeforePrintingInsn) return;
+
+  const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
+
+  if (DL.getScope(Context) != 0 && PrevDL != DL) {
+    JITEvent_EmittedFunctionDetails::LineStart NextLine;
+    NextLine.Address = getCurrentPCValue();
+    NextLine.Loc = DL;
+    EmissionDetails.LineStarts.push_back(NextLine);
+  }
+
+  PrevDL = DL;
+}
+
+static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
+                                           const TargetData *TD) {
+  const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+  if (Constants.empty()) return 0;
+
+  unsigned Size = 0;
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = Constants[i];
+    unsigned AlignMask = CPE.getAlignment() - 1;
+    Size = (Size + AlignMask) & ~AlignMask;
+    const Type *Ty = CPE.getType();
+    Size += TD->getTypeAllocSize(Ty);
+  }
+  return Size;
+}
+
+void JITEmitter::startFunction(MachineFunction &F) {
+  DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
+        << F.getFunction()->getName() << "\n");
+
+  uintptr_t ActualSize = 0;
+  // Set the memory writable, if it's not already
+  MemMgr->setMemoryWritable();
+
+  if (SizeEstimate > 0) {
+    // SizeEstimate will be non-zero on reallocation attempts.
+    ActualSize = SizeEstimate;
+  }
+
+  BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(),
+                                                         ActualSize);
+  BufferEnd = BufferBegin+ActualSize;
+  EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin;
+
+  // Ensure the constant pool/jump table info is at least 4-byte aligned.
+  emitAlignment(16);
+
+  emitConstantPool(F.getConstantPool());
+  if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+    initJumpTableInfo(MJTI);
+
+  // About to start emitting the machine code for the function.
+  emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
+  TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
+  EmittedFunctions[F.getFunction()].Code = CurBufferPtr;
+
+  MBBLocations.clear();
+
+  EmissionDetails.MF = &F;
+  EmissionDetails.LineStarts.clear();
+}
+
+bool JITEmitter::finishFunction(MachineFunction &F) {
+  if (CurBufferPtr == BufferEnd) {
+    // We must call endFunctionBody before retrying, because
+    // deallocateMemForFunction requires it.
+    MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+    retryWithMoreMemory(F);
+    return true;
+  }
+
+  if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+    emitJumpTableInfo(MJTI);
+
+  // FnStart is the start of the text, not the start of the constant pool and
+  // other per-function data.
+  uint8_t *FnStart =
+    (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
+
+  // FnEnd is the end of the function's machine code.
+  uint8_t *FnEnd = CurBufferPtr;
+
+  if (!Relocations.empty()) {
+    CurFn = F.getFunction();
+    NumRelos += Relocations.size();
+
+    // Resolve the relocations to concrete pointers.
+    for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+      MachineRelocation &MR = Relocations[i];
+      void *ResultPtr = 0;
+      if (!MR.letTargetResolve()) {
+        if (MR.isExternalSymbol()) {
+          ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
+                                                        false);
+          DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
+                       << ResultPtr << "]\n");
+
+          // If the target REALLY wants a stub for this function, emit it now.
+          if (MR.mayNeedFarStub()) {
+            ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
+          }
+        } else if (MR.isGlobalValue()) {
+          ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
+                                         BufferBegin+MR.getMachineCodeOffset(),
+                                         MR.mayNeedFarStub());
+        } else if (MR.isIndirectSymbol()) {
+          ResultPtr = getPointerToGVIndirectSym(
+              MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset());
+        } else if (MR.isBasicBlock()) {
+          ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
+        } else if (MR.isConstantPoolIndex()) {
+          ResultPtr =
+            (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+        } else {
+          assert(MR.isJumpTableIndex());
+          ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex());
+        }
+
+        MR.setResultPointer(ResultPtr);
+      }
+
+      // if we are managing the GOT and the relocation wants an index,
+      // give it one
+      if (MR.isGOTRelative() && MemMgr->isManagingGOT()) {
+        unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
+        MR.setGOTIndex(idx);
+        if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
+          DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr
+                       << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+                       << "\n");
+          ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
+        }
+      }
+    }
+
+    CurFn = 0;
+    TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0],
+                                  Relocations.size(), MemMgr->getGOTBase());
+  }
+
+  // Update the GOT entry for F to point to the new code.
+  if (MemMgr->isManagingGOT()) {
+    unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
+    if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
+      DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin
+                   << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+                   << "\n");
+      ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
+    }
+  }
+
+  // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for
+  // global variables that were referenced in the relocations.
+  MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+
+  if (CurBufferPtr == BufferEnd) {
+    retryWithMoreMemory(F);
+    return true;
+  } else {
+    // Now that we've succeeded in emitting the function, reset the
+    // SizeEstimate back down to zero.
+    SizeEstimate = 0;
+  }
+
+  BufferBegin = CurBufferPtr = 0;
+  NumBytes += FnEnd-FnStart;
+
+  // Invalidate the icache if necessary.
+  sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
+
+  TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
+                                EmissionDetails);
+
+  // Reset the previous debug location.
+  PrevDL = DebugLoc();
+
+  DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
+        << "] Function: " << F.getFunction()->getName()
+        << ": " << (FnEnd-FnStart) << " bytes of text, "
+        << Relocations.size() << " relocations\n");
+
+  Relocations.clear();
+  ConstPoolAddresses.clear();
+
+  // Mark code region readable and executable if it's not so already.
+  MemMgr->setMemoryExecutable();
+
+  DEBUG({
+      if (sys::hasDisassembler()) {
+        dbgs() << "JIT: Disassembled code:\n";
+        dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+                                         (uintptr_t)FnStart);
+      } else {
+        dbgs() << "JIT: Binary code:\n";
+        uint8_t* q = FnStart;
+        for (int i = 0; q < FnEnd; q += 4, ++i) {
+          if (i == 4)
+            i = 0;
+          if (i == 0)
+            dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
+          bool Done = false;
+          for (int j = 3; j >= 0; --j) {
+            if (q + j >= FnEnd)
+              Done = true;
+            else
+              dbgs() << (unsigned short)q[j];
+          }
+          if (Done)
+            break;
+          dbgs() << ' ';
+          if (i == 3)
+            dbgs() << '\n';
+        }
+        dbgs()<< '\n';
+      }
+    });
+
+  if (JITExceptionHandling || JITEmitDebugInfo) {
+    uintptr_t ActualSize = 0;
+    SavedBufferBegin = BufferBegin;
+    SavedBufferEnd = BufferEnd;
+    SavedCurBufferPtr = CurBufferPtr;
+
+    BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
+                                                             ActualSize);
+    BufferEnd = BufferBegin+ActualSize;
+    EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin;
+    uint8_t *EhStart;
+    uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd,
+                                                EhStart);
+    MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
+                              FrameRegister);
+    uint8_t *EhEnd = CurBufferPtr;
+    BufferBegin = SavedBufferBegin;
+    BufferEnd = SavedBufferEnd;
+    CurBufferPtr = SavedCurBufferPtr;
+
+    if (JITExceptionHandling) {
+      TheJIT->RegisterTable(F.getFunction(), FrameRegister);
+    }
+
+    if (JITEmitDebugInfo) {
+      DebugInfo I;
+      I.FnStart = FnStart;
+      I.FnEnd = FnEnd;
+      I.EhStart = EhStart;
+      I.EhEnd = EhEnd;
+      DR->RegisterFunction(F.getFunction(), I);
+    }
+  }
+
+  if (MMI)
+    MMI->EndFunction();
+
+  return false;
+}
+
+void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
+  DEBUG(dbgs() << "JIT: Ran out of space for native code.  Reattempting.\n");
+  Relocations.clear();  // Clear the old relocations or we'll reapply them.
+  ConstPoolAddresses.clear();
+  ++NumRetries;
+  deallocateMemForFunction(F.getFunction());
+  // Try again with at least twice as much free space.
+  SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin));
+
+  for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){
+    if (MBB->hasAddressTaken())
+      TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock());
+  }
+}
+
+/// deallocateMemForFunction - Deallocate all memory for the specified
+/// function body.  Also drop any references the function has to stubs.
+/// May be called while the Function is being destroyed inside ~Value().
+void JITEmitter::deallocateMemForFunction(const Function *F) {
+  ValueMap<const Function *, EmittedCode, EmittedFunctionConfig>::iterator
+    Emitted = EmittedFunctions.find(F);
+  if (Emitted != EmittedFunctions.end()) {
+    MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody);
+    MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable);
+    TheJIT->NotifyFreeingMachineCode(Emitted->second.Code);
+
+    EmittedFunctions.erase(Emitted);
+  }
+
+  if(JITExceptionHandling) {
+    TheJIT->DeregisterTable(F);
+  }
+
+  if (JITEmitDebugInfo) {
+    DR->UnregisterFunction(F);
+  }
+}
+
+
+void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+  if (BufferBegin)
+    return JITCodeEmitter::allocateSpace(Size, Alignment);
+
+  // create a new memory block if there is no active one.
+  // care must be taken so that BufferBegin is invalidated when a
+  // block is trimmed
+  BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment);
+  BufferEnd = BufferBegin+Size;
+  return CurBufferPtr;
+}
+
+void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+  // Delegate this call through the memory manager.
+  return MemMgr->allocateGlobal(Size, Alignment);
+}
+
+void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
+  if (TheJIT->getJITInfo().hasCustomConstantPool())
+    return;
+
+  const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+  if (Constants.empty()) return;
+
+  unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
+  unsigned Align = MCP->getConstantPoolAlignment();
+  ConstantPoolBase = allocateSpace(Size, Align);
+  ConstantPool = MCP;
+
+  if (ConstantPoolBase == 0) return;  // Buffer overflow.
+
+  DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
+               << "] (size: " << Size << ", alignment: " << Align << ")\n");
+
+  // Initialize the memory for all of the constant pool entries.
+  unsigned Offset = 0;
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = Constants[i];
+    unsigned AlignMask = CPE.getAlignment() - 1;
+    Offset = (Offset + AlignMask) & ~AlignMask;
+
+    uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset;
+    ConstPoolAddresses.push_back(CAddr);
+    if (CPE.isMachineConstantPoolEntry()) {
+      // FIXME: add support to lower machine constant pool values into bytes!
+      report_fatal_error("Initialize memory with machine specific constant pool"
+                        "entry has not been implemented!");
+    }
+    TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
+    DEBUG(dbgs() << "JIT:   CP" << i << " at [0x";
+          dbgs().write_hex(CAddr) << "]\n");
+
+    const Type *Ty = CPE.Val.ConstVal->getType();
+    Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty);
+  }
+}
+
+void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
+  if (TheJIT->getJITInfo().hasCustomJumpTables())
+    return;
+  if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline)
+    return;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  unsigned NumEntries = 0;
+  for (unsigned i = 0, e = JT.size(); i != e; ++i)
+    NumEntries += JT[i].MBBs.size();
+
+  unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getTargetData());
+
+  // Just allocate space for all the jump tables now.  We will fix up the actual
+  // MBB entries in the tables after we emit the code for each block, since then
+  // we will know the final locations of the MBBs in memory.
+  JumpTable = MJTI;
+  JumpTableBase = allocateSpace(NumEntries * EntrySize,
+                             MJTI->getEntryAlignment(*TheJIT->getTargetData()));
+}
+
+void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
+  if (TheJIT->getJITInfo().hasCustomJumpTables())
+    return;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty() || JumpTableBase == 0) return;
+
+
+  switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Inline:
+    return;
+  case MachineJumpTableInfo::EK_BlockAddress: {
+    // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+    //     .word LBB123
+    assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) &&
+           "Cross JIT'ing?");
+
+    // For each jump table, map each target in the jump table to the address of
+    // an emitted MachineBasicBlock.
+    intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
+
+    for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      // Store the address of the basic block for this jump table slot in the
+      // memory we allocated for the jump table in 'initJumpTableInfo'
+      for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
+        *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
+    }
+    break;
+  }
+
+  case MachineJumpTableInfo::EK_Custom32:
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32: {
+    assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == 4&&"Cross JIT'ing?");
+    // For each jump table, place the offset from the beginning of the table
+    // to the target address.
+    int *SlotPtr = (int*)JumpTableBase;
+
+    for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      // Store the offset of the basic block for this jump table slot in the
+      // memory we allocated for the jump table in 'initJumpTableInfo'
+      uintptr_t Base = (uintptr_t)SlotPtr;
+      for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+        uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]);
+        /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook.
+        *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base);
+      }
+    }
+    break;
+  }
+  }
+}
+
+void JITEmitter::startGVStub(const GlobalValue* GV,
+                             unsigned StubSize, unsigned Alignment) {
+  SavedBufferBegin = BufferBegin;
+  SavedBufferEnd = BufferEnd;
+  SavedCurBufferPtr = CurBufferPtr;
+
+  BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
+  BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) {
+  SavedBufferBegin = BufferBegin;
+  SavedBufferEnd = BufferEnd;
+  SavedCurBufferPtr = CurBufferPtr;
+
+  BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
+  BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::finishGVStub() {
+  assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space.");
+  NumBytes += getCurrentPCOffset();
+  BufferBegin = SavedBufferBegin;
+  BufferEnd = SavedBufferEnd;
+  CurBufferPtr = SavedCurBufferPtr;
+}
+
+void *JITEmitter::allocIndirectGV(const GlobalValue *GV,
+                                  const uint8_t *Buffer, size_t Size,
+                                  unsigned Alignment) {
+  uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment);
+  memcpy(IndGV, Buffer, Size);
+  return IndGV;
+}
+
+// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry
+// in the constant pool that was last emitted with the 'emitConstantPool'
+// method.
+//
+uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
+  assert(ConstantNum < ConstantPool->getConstants().size() &&
+         "Invalid ConstantPoolIndex!");
+  return ConstPoolAddresses[ConstantNum];
+}
+
+// getJumpTableEntryAddress - Return the address of the JumpTable with index
+// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo'
+//
+uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
+  const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
+  assert(Index < JT.size() && "Invalid jump table index!");
+
+  unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getTargetData());
+
+  unsigned Offset = 0;
+  for (unsigned i = 0; i < Index; ++i)
+    Offset += JT[i].MBBs.size();
+
+   Offset *= EntrySize;
+
+  return (uintptr_t)((char *)JumpTableBase + Offset);
+}
+
+void JITEmitter::EmittedFunctionConfig::onDelete(
+  JITEmitter *Emitter, const Function *F) {
+  Emitter->deallocateMemForFunction(F);
+}
+void JITEmitter::EmittedFunctionConfig::onRAUW(
+  JITEmitter *, const Function*, const Function*) {
+  llvm_unreachable("The JIT doesn't know how to handle a"
+                   " RAUW on a value it has emitted.");
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Public interface to this file
+//===----------------------------------------------------------------------===//
+
+JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
+                                   TargetMachine &tm) {
+  return new JITEmitter(jit, JMM, tm);
+}
+
+// getPointerToFunctionOrStub - If the specified function has been
+// code-gen'd, return a pointer to the function.  If not, compile it, or use
+// a stub to implement lazy compilation if available.
+//
+void *JIT::getPointerToFunctionOrStub(Function *F) {
+  // If we have already code generated the function, just return the address.
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;
+
+  // Get a stub if the target supports it.
+  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+  return JE->getJITResolver().getLazyFunctionStub(F);
+}
+
+void JIT::updateFunctionStub(Function *F) {
+  // Get the empty stub we generated earlier.
+  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+  void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
+  void *Addr = getPointerToGlobalIfAvailable(F);
+  assert(Addr != Stub && "Function must have non-stub address to be updated.");
+
+  // Tell the target jit info to rewrite the stub at the specified address,
+  // rather than creating a new one.
+  TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout();
+  JE->startGVStub(Stub, layout.Size);
+  getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter());
+  JE->finishGVStub();
+}
+
+/// freeMachineCodeForFunction - release machine code memory for given Function.
+///
+void JIT::freeMachineCodeForFunction(Function *F) {
+  // Delete translation for this from the ExecutionEngine, so it will get
+  // retranslated next time it is used.
+  updateGlobalMapping(F, 0);
+
+  // Free the actual memory for the function body and related stuff.
+  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+  cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
new file mode 100644
index 000000000000..eec23cec0af9
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -0,0 +1,727 @@
+//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DefaultJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <vector>
+#include <cassert>
+#include <climits>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
+
+JITMemoryManager::~JITMemoryManager() {}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// MemoryRangeHeader - For a range of memory, this is the header that we put
+  /// on the block of memory.  It is carefully crafted to be one word of memory.
+  /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader
+  /// which starts with this.
+  struct FreeRangeHeader;
+  struct MemoryRangeHeader {
+    /// ThisAllocated - This is true if this block is currently allocated.  If
+    /// not, this can be converted to a FreeRangeHeader.
+    unsigned ThisAllocated : 1;
+
+    /// PrevAllocated - Keep track of whether the block immediately before us is
+    /// allocated.  If not, the word immediately before this header is the size
+    /// of the previous block.
+    unsigned PrevAllocated : 1;
+
+    /// BlockSize - This is the size in bytes of this memory block,
+    /// including this header.
+    uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
+
+
+    /// getBlockAfter - Return the memory block immediately after this one.
+    ///
+    MemoryRangeHeader &getBlockAfter() const {
+      return *(MemoryRangeHeader*)((char*)this+BlockSize);
+    }
+
+    /// getFreeBlockBefore - If the block before this one is free, return it,
+    /// otherwise return null.
+    FreeRangeHeader *getFreeBlockBefore() const {
+      if (PrevAllocated) return 0;
+      intptr_t PrevSize = ((intptr_t *)this)[-1];
+      return (FreeRangeHeader*)((char*)this-PrevSize);
+    }
+
+    /// FreeBlock - Turn an allocated block into a free block, adjusting
+    /// bits in the object headers, and adding an end of region memory block.
+    FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
+
+    /// TrimAllocationToSize - If this allocated block is significantly larger
+    /// than NewSize, split it into two pieces (where the former is NewSize
+    /// bytes, including the header), and add the new block to the free list.
+    FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
+                                          uint64_t NewSize);
+  };
+
+  /// FreeRangeHeader - For a memory block that isn't already allocated, this
+  /// keeps track of the current block and has a pointer to the next free block.
+  /// Free blocks are kept on a circularly linked list.
+  struct FreeRangeHeader : public MemoryRangeHeader {
+    FreeRangeHeader *Prev;
+    FreeRangeHeader *Next;
+
+    /// getMinBlockSize - Get the minimum size for a memory block.  Blocks
+    /// smaller than this size cannot be created.
+    static unsigned getMinBlockSize() {
+      return sizeof(FreeRangeHeader)+sizeof(intptr_t);
+    }
+
+    /// SetEndOfBlockSizeMarker - The word at the end of every free block is
+    /// known to be the size of the free block.  Set it for this block.
+    void SetEndOfBlockSizeMarker() {
+      void *EndOfBlock = (char*)this + BlockSize;
+      ((intptr_t *)EndOfBlock)[-1] = BlockSize;
+    }
+
+    FreeRangeHeader *RemoveFromFreeList() {
+      assert(Next->Prev == this && Prev->Next == this && "Freelist broken!");
+      Next->Prev = Prev;
+      return Prev->Next = Next;
+    }
+
+    void AddToFreeList(FreeRangeHeader *FreeList) {
+      Next = FreeList;
+      Prev = FreeList->Prev;
+      Prev->Next = this;
+      Next->Prev = this;
+    }
+
+    /// GrowBlock - The block after this block just got deallocated.  Merge it
+    /// into the current block.
+    void GrowBlock(uintptr_t NewSize);
+
+    /// AllocateBlock - Mark this entire block allocated, updating freelists
+    /// etc.  This returns a pointer to the circular free-list.
+    FreeRangeHeader *AllocateBlock();
+  };
+}
+
+
+/// AllocateBlock - Mark this entire block allocated, updating freelists
+/// etc.  This returns a pointer to the circular free-list.
+FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
+  assert(!ThisAllocated && !getBlockAfter().PrevAllocated &&
+         "Cannot allocate an allocated block!");
+  // Mark this block allocated.
+  ThisAllocated = 1;
+  getBlockAfter().PrevAllocated = 1;
+
+  // Remove it from the free list.
+  return RemoveFromFreeList();
+}
+
+/// FreeBlock - Turn an allocated block into a free block, adjusting
+/// bits in the object headers, and adding an end of region memory block.
+/// If possible, coalesce this block with neighboring blocks.  Return the
+/// FreeRangeHeader to allocate from.
+FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
+  MemoryRangeHeader *FollowingBlock = &getBlockAfter();
+  assert(ThisAllocated && "This block is already free!");
+  assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
+
+  FreeRangeHeader *FreeListToReturn = FreeList;
+
+  // If the block after this one is free, merge it into this block.
+  if (!FollowingBlock->ThisAllocated) {
+    FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
+    // "FreeList" always needs to be a valid free block.  If we're about to
+    // coalesce with it, update our notion of what the free list is.
+    if (&FollowingFreeBlock == FreeList) {
+      FreeList = FollowingFreeBlock.Next;
+      FreeListToReturn = 0;
+      assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
+    }
+    FollowingFreeBlock.RemoveFromFreeList();
+
+    // Include the following block into this one.
+    BlockSize += FollowingFreeBlock.BlockSize;
+    FollowingBlock = &FollowingFreeBlock.getBlockAfter();
+
+    // Tell the block after the block we are coalescing that this block is
+    // allocated.
+    FollowingBlock->PrevAllocated = 1;
+  }
+
+  assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
+
+  if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
+    PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
+    return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
+  }
+
+  // Otherwise, mark this block free.
+  FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this;
+  FollowingBlock->PrevAllocated = 0;
+  FreeBlock.ThisAllocated = 0;
+
+  // Link this into the linked list of free blocks.
+  FreeBlock.AddToFreeList(FreeList);
+
+  // Add a marker at the end of the block, indicating the size of this free
+  // block.
+  FreeBlock.SetEndOfBlockSizeMarker();
+  return FreeListToReturn ? FreeListToReturn : &FreeBlock;
+}
+
+/// GrowBlock - The block after this block just got deallocated.  Merge it
+/// into the current block.
+void FreeRangeHeader::GrowBlock(uintptr_t NewSize) {
+  assert(NewSize > BlockSize && "Not growing block?");
+  BlockSize = NewSize;
+  SetEndOfBlockSizeMarker();
+  getBlockAfter().PrevAllocated = 0;
+}
+
+/// TrimAllocationToSize - If this allocated block is significantly larger
+/// than NewSize, split it into two pieces (where the former is NewSize
+/// bytes, including the header), and add the new block to the free list.
+FreeRangeHeader *MemoryRangeHeader::
+TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
+  assert(ThisAllocated && getBlockAfter().PrevAllocated &&
+         "Cannot deallocate part of an allocated block!");
+
+  // Don't allow blocks to be trimmed below minimum required size
+  NewSize = std::max<uint64_t>(FreeRangeHeader::getMinBlockSize(), NewSize);
+
+  // Round up size for alignment of header.
+  unsigned HeaderAlign = __alignof(FreeRangeHeader);
+  NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
+
+  // Size is now the size of the block we will remove from the start of the
+  // current block.
+  assert(NewSize <= BlockSize &&
+         "Allocating more space from this block than exists!");
+
+  // If splitting this block will cause the remainder to be too small, do not
+  // split the block.
+  if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
+    return FreeList;
+
+  // Otherwise, we splice the required number of bytes out of this block, form
+  // a new block immediately after it, then mark this block allocated.
+  MemoryRangeHeader &FormerNextBlock = getBlockAfter();
+
+  // Change the size of this block.
+  BlockSize = NewSize;
+
+  // Get the new block we just sliced out and turn it into a free block.
+  FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
+  NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
+  NewNextBlock.ThisAllocated = 0;
+  NewNextBlock.PrevAllocated = 1;
+  NewNextBlock.SetEndOfBlockSizeMarker();
+  FormerNextBlock.PrevAllocated = 0;
+  NewNextBlock.AddToFreeList(FreeList);
+  return &NewNextBlock;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+  class DefaultJITMemoryManager;
+
+  class JITSlabAllocator : public SlabAllocator {
+    DefaultJITMemoryManager &JMM;
+  public:
+    JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
+    virtual ~JITSlabAllocator() { }
+    virtual MemSlab *Allocate(size_t Size);
+    virtual void Deallocate(MemSlab *Slab);
+  };
+
+  /// DefaultJITMemoryManager - Manage memory for the JIT code generation.
+  /// This splits a large block of MAP_NORESERVE'd memory into two
+  /// sections, one for function stubs, one for the functions themselves.  We
+  /// have to do this because we may need to emit a function stub while in the
+  /// middle of emitting a function, and we don't know how large the function we
+  /// are emitting is.
+  class DefaultJITMemoryManager : public JITMemoryManager {
+
+    // Whether to poison freed memory.
+    bool PoisonMemory;
+
+    /// LastSlab - This points to the last slab allocated and is used as the
+    /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all
+    /// stubs, data, and code contiguously in memory.  In general, however, this
+    /// is not possible because the NearBlock parameter is ignored on Windows
+    /// platforms and even on Unix it works on a best-effort pasis.
+    sys::MemoryBlock LastSlab;
+
+    // Memory slabs allocated by the JIT.  We refer to them as slabs so we don't
+    // confuse them with the blocks of memory described above.
+    std::vector<sys::MemoryBlock> CodeSlabs;
+    JITSlabAllocator BumpSlabAllocator;
+    BumpPtrAllocator StubAllocator;
+    BumpPtrAllocator DataAllocator;
+
+    // Circular list of free blocks.
+    FreeRangeHeader *FreeMemoryList;
+
+    // When emitting code into a memory block, this is the block.
+    MemoryRangeHeader *CurBlock;
+
+    uint8_t *GOTBase;     // Target Specific reserved memory
+  public:
+    DefaultJITMemoryManager();
+    ~DefaultJITMemoryManager();
+
+    /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the
+    /// last slab it allocated, so that subsequent allocations follow it.
+    sys::MemoryBlock allocateNewSlab(size_t size);
+
+    /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at
+    /// least this much unless more is requested.
+    static const size_t DefaultCodeSlabSize;
+
+    /// DefaultSlabSize - Allocate data into slabs of this size unless we get
+    /// an allocation above SizeThreshold.
+    static const size_t DefaultSlabSize;
+
+    /// DefaultSizeThreshold - For any allocation larger than this threshold, we
+    /// should allocate a separate slab.
+    static const size_t DefaultSizeThreshold;
+
+    void AllocateGOT();
+
+    // Testing methods.
+    virtual bool CheckInvariants(std::string &ErrorStr);
+    size_t GetDefaultCodeSlabSize() { return DefaultCodeSlabSize; }
+    size_t GetDefaultDataSlabSize() { return DefaultSlabSize; }
+    size_t GetDefaultStubSlabSize() { return DefaultSlabSize; }
+    unsigned GetNumCodeSlabs() { return CodeSlabs.size(); }
+    unsigned GetNumDataSlabs() { return DataAllocator.GetNumSlabs(); }
+    unsigned GetNumStubSlabs() { return StubAllocator.GetNumSlabs(); }
+
+    /// startFunctionBody - When a function starts, allocate a block of free
+    /// executable memory, returning a pointer to it and its actual size.
+    uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
+
+      FreeRangeHeader* candidateBlock = FreeMemoryList;
+      FreeRangeHeader* head = FreeMemoryList;
+      FreeRangeHeader* iter = head->Next;
+
+      uintptr_t largest = candidateBlock->BlockSize;
+
+      // Search for the largest free block
+      while (iter != head) {
+        if (iter->BlockSize > largest) {
+          largest = iter->BlockSize;
+          candidateBlock = iter;
+        }
+        iter = iter->Next;
+      }
+
+      largest = largest - sizeof(MemoryRangeHeader);
+
+      // If this block isn't big enough for the allocation desired, allocate
+      // another block of memory and add it to the free list.
+      if (largest < ActualSize ||
+          largest <= FreeRangeHeader::getMinBlockSize()) {
+        DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
+        candidateBlock = allocateNewCodeSlab((size_t)ActualSize);
+      }
+
+      // Select this candidate block for allocation
+      CurBlock = candidateBlock;
+
+      // Allocate the entire memory block.
+      FreeMemoryList = candidateBlock->AllocateBlock();
+      ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader);
+      return (uint8_t *)(CurBlock + 1);
+    }
+
+    /// allocateNewCodeSlab - Helper method to allocate a new slab of code
+    /// memory from the OS and add it to the free list.  Returns the new
+    /// FreeRangeHeader at the base of the slab.
+    FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) {
+      // If the user needs at least MinSize free memory, then we account for
+      // two MemoryRangeHeaders: the one in the user's block, and the one at the
+      // end of the slab.
+      size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader);
+      size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin);
+      sys::MemoryBlock B = allocateNewSlab(SlabSize);
+      CodeSlabs.push_back(B);
+      char *MemBase = (char*)(B.base());
+
+      // Put a tiny allocated block at the end of the memory chunk, so when
+      // FreeBlock calls getBlockAfter it doesn't fall off the end.
+      MemoryRangeHeader *EndBlock =
+          (MemoryRangeHeader*)(MemBase + B.size()) - 1;
+      EndBlock->ThisAllocated = 1;
+      EndBlock->PrevAllocated = 0;
+      EndBlock->BlockSize = sizeof(MemoryRangeHeader);
+
+      // Start out with a vast new block of free memory.
+      FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase;
+      NewBlock->ThisAllocated = 0;
+      // Make sure getFreeBlockBefore doesn't look into unmapped memory.
+      NewBlock->PrevAllocated = 1;
+      NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock;
+      NewBlock->SetEndOfBlockSizeMarker();
+      NewBlock->AddToFreeList(FreeMemoryList);
+
+      assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize &&
+             "The block was too small!");
+      return NewBlock;
+    }
+
+    /// endFunctionBody - The function F is now allocated, and takes the memory
+    /// in the range [FunctionStart,FunctionEnd).
+    void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                         uint8_t *FunctionEnd) {
+      assert(FunctionEnd > FunctionStart);
+      assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
+             "Mismatched function start/end!");
+
+      uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
+
+      // Release the memory at the end of this block that isn't needed.
+      FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+    }
+
+    /// allocateSpace - Allocate a memory block of the given size.  This method
+    /// cannot be called between calls to startFunctionBody and endFunctionBody.
+    uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+      CurBlock = FreeMemoryList;
+      FreeMemoryList = FreeMemoryList->AllocateBlock();
+
+      uint8_t *result = (uint8_t *)(CurBlock + 1);
+
+      if (Alignment == 0) Alignment = 1;
+      result = (uint8_t*)(((intptr_t)result+Alignment-1) &
+               ~(intptr_t)(Alignment-1));
+
+      uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
+      FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+
+      return result;
+    }
+
+    /// allocateStub - Allocate memory for a function stub.
+    uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                          unsigned Alignment) {
+      return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment);
+    }
+
+    /// allocateGlobal - Allocate memory for a global.
+    uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+      return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+    }
+
+    /// startExceptionTable - Use startFunctionBody to allocate memory for the
+    /// function's exception table.
+    uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
+      return startFunctionBody(F, ActualSize);
+    }
+
+    /// endExceptionTable - The exception table of F is now allocated,
+    /// and takes the memory in the range [TableStart,TableEnd).
+    void endExceptionTable(const Function *F, uint8_t *TableStart,
+                           uint8_t *TableEnd, uint8_t* FrameRegister) {
+      assert(TableEnd > TableStart);
+      assert(TableStart == (uint8_t *)(CurBlock+1) &&
+             "Mismatched table start/end!");
+
+      uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
+
+      // Release the memory at the end of this block that isn't needed.
+      FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+    }
+
+    uint8_t *getGOTBase() const {
+      return GOTBase;
+    }
+
+    void deallocateBlock(void *Block) {
+      // Find the block that is allocated for this function.
+      MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1;
+      assert(MemRange->ThisAllocated && "Block isn't allocated!");
+
+      // Fill the buffer with garbage!
+      if (PoisonMemory) {
+        memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+      }
+
+      // Free the memory.
+      FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
+    }
+
+    /// deallocateFunctionBody - Deallocate all memory for the specified
+    /// function body.
+    void deallocateFunctionBody(void *Body) {
+      if (Body) deallocateBlock(Body);
+    }
+
+    /// deallocateExceptionTable - Deallocate memory for the specified
+    /// exception table.
+    void deallocateExceptionTable(void *ET) {
+      if (ET) deallocateBlock(ET);
+    }
+
+    /// setMemoryWritable - When code generation is in progress,
+    /// the code pages may need permissions changed.
+    void setMemoryWritable()
+    {
+      for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+        sys::Memory::setWritable(CodeSlabs[i]);
+    }
+    /// setMemoryExecutable - When code generation is done and we're ready to
+    /// start execution, the code pages may need permissions changed.
+    void setMemoryExecutable()
+    {
+      for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+        sys::Memory::setExecutable(CodeSlabs[i]);
+    }
+
+    /// setPoisonMemory - Controls whether we write garbage over freed memory.
+    ///
+    void setPoisonMemory(bool poison) {
+      PoisonMemory = poison;
+    }
+  };
+}
+
+MemSlab *JITSlabAllocator::Allocate(size_t Size) {
+  sys::MemoryBlock B = JMM.allocateNewSlab(Size);
+  MemSlab *Slab = (MemSlab*)B.base();
+  Slab->Size = B.size();
+  Slab->NextPtr = 0;
+  return Slab;
+}
+
+void JITSlabAllocator::Deallocate(MemSlab *Slab) {
+  sys::MemoryBlock B(Slab, Slab->Size);
+  sys::Memory::ReleaseRWX(B);
+}
+
+DefaultJITMemoryManager::DefaultJITMemoryManager()
+  :
+#ifdef NDEBUG
+    PoisonMemory(false),
+#else
+    PoisonMemory(true),
+#endif
+    LastSlab(0, 0),
+    BumpSlabAllocator(*this),
+    StubAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator),
+    DataAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator) {
+
+  // Allocate space for code.
+  sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize);
+  CodeSlabs.push_back(MemBlock);
+  uint8_t *MemBase = (uint8_t*)MemBlock.base();
+
+  // We set up the memory chunk with 4 mem regions, like this:
+  //  [ START
+  //    [ Free      #0 ] -> Large space to allocate functions from.
+  //    [ Allocated #1 ] -> Tiny space to separate regions.
+  //    [ Free      #2 ] -> Tiny space so there is always at least 1 free block.
+  //    [ Allocated #3 ] -> Tiny space to prevent looking past end of block.
+  //  END ]
+  //
+  // The last three blocks are never deallocated or touched.
+
+  // Add MemoryRangeHeader to the end of the memory region, indicating that
+  // the space after the block of memory is allocated.  This is block #3.
+  MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
+  Mem3->ThisAllocated = 1;
+  Mem3->PrevAllocated = 0;
+  Mem3->BlockSize     = sizeof(MemoryRangeHeader);
+
+  /// Add a tiny free region so that the free list always has one entry.
+  FreeRangeHeader *Mem2 =
+    (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
+  Mem2->ThisAllocated = 0;
+  Mem2->PrevAllocated = 1;
+  Mem2->BlockSize     = FreeRangeHeader::getMinBlockSize();
+  Mem2->SetEndOfBlockSizeMarker();
+  Mem2->Prev = Mem2;   // Mem2 *is* the free list for now.
+  Mem2->Next = Mem2;
+
+  /// Add a tiny allocated region so that Mem2 is never coalesced away.
+  MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1;
+  Mem1->ThisAllocated = 1;
+  Mem1->PrevAllocated = 0;
+  Mem1->BlockSize     = sizeof(MemoryRangeHeader);
+
+  // Add a FreeRangeHeader to the start of the function body region, indicating
+  // that the space is free.  Mark the previous block allocated so we never look
+  // at it.
+  FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase;
+  Mem0->ThisAllocated = 0;
+  Mem0->PrevAllocated = 1;
+  Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
+  Mem0->SetEndOfBlockSizeMarker();
+  Mem0->AddToFreeList(Mem2);
+
+  // Start out with the freelist pointing to Mem0.
+  FreeMemoryList = Mem0;
+
+  GOTBase = NULL;
+}
+
+void DefaultJITMemoryManager::AllocateGOT() {
+  assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+  GOTBase = new uint8_t[sizeof(void*) * 8192];
+  HasGOT = true;
+}
+
+DefaultJITMemoryManager::~DefaultJITMemoryManager() {
+  for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+    sys::Memory::ReleaseRWX(CodeSlabs[i]);
+
+  delete[] GOTBase;
+}
+
+sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
+  // Allocate a new block close to the last one.
+  std::string ErrMsg;
+  sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
+  sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
+  if (B.base() == 0) {
+    report_fatal_error("Allocation failed when allocating new memory in the"
+                       " JIT\n" + Twine(ErrMsg));
+  }
+  LastSlab = B;
+  ++NumSlabs;
+  // Initialize the slab to garbage when debugging.
+  if (PoisonMemory) {
+    memset(B.base(), 0xCD, B.size());
+  }
+  return B;
+}
+
+/// CheckInvariants - For testing only.  Return "" if all internal invariants
+/// are preserved, and a helpful error message otherwise.  For free and
+/// allocated blocks, make sure that adding BlockSize gives a valid block.
+/// For free blocks, make sure they're in the free list and that their end of
+/// block size marker is correct.  This function should return an error before
+/// accessing bad memory.  This function is defined here instead of in
+/// JITMemoryManagerTest.cpp so that we don't have to expose all of the
+/// implementation details of DefaultJITMemoryManager.
+bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
+  raw_string_ostream Err(ErrorStr);
+
+  // Construct a the set of FreeRangeHeader pointers so we can query it
+  // efficiently.
+  llvm::SmallPtrSet<MemoryRangeHeader*, 16> FreeHdrSet;
+  FreeRangeHeader* FreeHead = FreeMemoryList;
+  FreeRangeHeader* FreeRange = FreeHead;
+
+  do {
+    // Check that the free range pointer is in the blocks we've allocated.
+    bool Found = false;
+    for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+         E = CodeSlabs.end(); I != E && !Found; ++I) {
+      char *Start = (char*)I->base();
+      char *End = Start + I->size();
+      Found = (Start <= (char*)FreeRange && (char*)FreeRange < End);
+    }
+    if (!Found) {
+      Err << "Corrupt free list; points to " << FreeRange;
+      return false;
+    }
+
+    if (FreeRange->Next->Prev != FreeRange) {
+      Err << "Next and Prev pointers do not match.";
+      return false;
+    }
+
+    // Otherwise, add it to the set.
+    FreeHdrSet.insert(FreeRange);
+    FreeRange = FreeRange->Next;
+  } while (FreeRange != FreeHead);
+
+  // Go over each block, and look at each MemoryRangeHeader.
+  for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+       E = CodeSlabs.end(); I != E; ++I) {
+    char *Start = (char*)I->base();
+    char *End = Start + I->size();
+
+    // Check each memory range.
+    for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL;
+         Start <= (char*)Hdr && (char*)Hdr < End;
+         Hdr = &Hdr->getBlockAfter()) {
+      if (Hdr->ThisAllocated == 0) {
+        // Check that this range is in the free list.
+        if (!FreeHdrSet.count(Hdr)) {
+          Err << "Found free header at " << Hdr << " that is not in free list.";
+          return false;
+        }
+
+        // Now make sure the size marker at the end of the block is correct.
+        uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1;
+        if (!(Start <= (char*)Marker && (char*)Marker < End)) {
+          Err << "Block size in header points out of current MemoryBlock.";
+          return false;
+        }
+        if (Hdr->BlockSize != *Marker) {
+          Err << "End of block size marker (" << *Marker << ") "
+              << "and BlockSize (" << Hdr->BlockSize << ") don't match.";
+          return false;
+        }
+      }
+
+      if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) {
+        Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != "
+            << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")";
+        return false;
+      } else if (!LastHdr && !Hdr->PrevAllocated) {
+        Err << "The first header should have PrevAllocated true.";
+        return false;
+      }
+
+      // Remember the last header.
+      LastHdr = Hdr;
+    }
+  }
+
+  // All invariants are preserved.
+  return true;
+}
+
+JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
+  return new DefaultJITMemoryManager();
+}
+
+// Allocate memory for code in 512K slabs.
+const size_t DefaultJITMemoryManager::DefaultCodeSlabSize = 512 * 1024;
+
+// Allocate globals and stubs in slabs of 64K.  (probably 16 pages)
+const size_t DefaultJITMemoryManager::DefaultSlabSize = 64 * 1024;
+
+// Waste at most 16K at the end of each bump slab.  (probably 4 pages)
+const size_t DefaultJITMemoryManager::DefaultSizeThreshold = 16 * 1024;
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
new file mode 100644
index 000000000000..9a9ed6d33484
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -0,0 +1,192 @@
+//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that calls into OProfile to tell
+// it about JITted functions.  For now, we only record function names and sizes,
+// but eventually we'll also record line number information.
+//
+// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
+// definition of the interface we're using.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "oprofile-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h"
+#include <stddef.h>
+using namespace llvm;
+
+#if USE_OPROFILE
+
+#include <opagent.h>
+
+namespace {
+
+class OProfileJITEventListener : public JITEventListener {
+  op_agent_t Agent;
+public:
+  OProfileJITEventListener();
+  ~OProfileJITEventListener();
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *FnStart, size_t FnSize,
+                                     const EmittedFunctionDetails &Details);
+  virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+OProfileJITEventListener::OProfileJITEventListener()
+    : Agent(op_open_agent()) {
+  if (Agent == NULL) {
+    const std::string err_str = sys::StrError();
+    DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
+  } else {
+    DEBUG(dbgs() << "Connected to OProfile agent.\n");
+  }
+}
+
+OProfileJITEventListener::~OProfileJITEventListener() {
+  if (Agent != NULL) {
+    if (op_close_agent(Agent) == -1) {
+      const std::string err_str = sys::StrError();
+      DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
+                   << err_str << "\n");
+    } else {
+      DEBUG(dbgs() << "Disconnected from OProfile agent.\n");
+    }
+  }
+}
+
+class FilenameCache {
+  // Holds the filename of each Scope, so that we can pass a null-terminated
+  // string into oprofile.  Use an AssertingVH rather than a ValueMap because we
+  // shouldn't be modifying any MDNodes while this map is alive.
+  DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+
+ public:
+  const char *getFilename(MDNode *Scope) {
+    std::string &Filename = Filenames[Scope];
+    if (Filename.empty()) {
+      Filename = DIScope(Scope).getFilename();
+    }
+    return Filename.c_str();
+  }
+};
+
+static debug_line_info LineStartToOProfileFormat(
+    const MachineFunction &MF, FilenameCache &Filenames,
+    uintptr_t Address, DebugLoc Loc) {
+  debug_line_info Result;
+  Result.vma = Address;
+  Result.lineno = Loc.getLine();
+  Result.filename = Filenames.getFilename(
+    Loc.getScope(MF.getFunction()->getContext()));
+  DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
+               << Result.filename << ":" << Result.lineno << "\n");
+  return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void OProfileJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const EmittedFunctionDetails &Details) {
+  assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
+  if (op_write_native_code(Agent, F.getName().data(),
+                           reinterpret_cast<uint64_t>(FnStart),
+                           FnStart, FnSize) == -1) {
+    DEBUG(dbgs() << "Failed to tell OProfile about native function "
+          << F.getName() << " at ["
+          << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    return;
+  }
+
+  if (!Details.LineStarts.empty()) {
+    // Now we convert the line number information from the address/DebugLoc
+    // format in Details to the address/filename/lineno format that OProfile
+    // expects.  Note that OProfile 0.9.4 has a bug that causes it to ignore
+    // line numbers for addresses above 4G.
+    FilenameCache Filenames;
+    std::vector<debug_line_info> LineInfo;
+    LineInfo.reserve(1 + Details.LineStarts.size());
+
+    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+    assert(!FirstLoc.isUnknown()
+           && "LineStarts should not contain unknown DebugLocs");
+    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+    if (FunctionDI.Verify()) {
+      // If we have debug info for the function itself, use that as the line
+      // number of the first several instructions.  Otherwise, after filling
+      // LineInfo, we'll adjust the address of the first line number to point at
+      // the start of the function.
+      debug_line_info line_info;
+      line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
+      line_info.lineno = FunctionDI.getLineNumber();
+      line_info.filename = Filenames.getFilename(FirstLocScope);
+      LineInfo.push_back(line_info);
+    }
+
+    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+           I = Details.LineStarts.begin(), E = Details.LineStarts.end();
+         I != E; ++I) {
+      LineInfo.push_back(LineStartToOProfileFormat(
+                           *Details.MF, Filenames, I->Address, I->Loc));
+    }
+
+    // In case the function didn't have line info of its own, adjust the first
+    // line info's address to include the start of the function.
+    LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
+
+    if (op_write_debug_line_info(Agent, FnStart,
+                                 LineInfo.size(), &*LineInfo.begin()) == -1) {
+      DEBUG(dbgs()
+            << "Failed to tell OProfile about line numbers for native function "
+            << F.getName() << " at ["
+            << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    }
+  }
+}
+
+// Removes the being-deleted function from the symbol table.
+void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+  assert(FnStart && "Invalid function pointer");
+  if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
+    DEBUG(dbgs()
+          << "Failed to tell OProfile about unload of native function at "
+          << FnStart << "\n");
+  }
+}
+
+}  // anonymous namespace.
+
+namespace llvm {
+JITEventListener *createOProfileJITEventListener() {
+  return new OProfileJITEventListener;
+}
+}
+
+#else  // USE_OPROFILE
+
+namespace llvm {
+// By defining this to return NULL, we can let clients call it unconditionally,
+// even if they haven't configured with the OProfile libraries.
+JITEventListener *createOProfileJITEventListener() {
+  return NULL;
+}
+}  // namespace llvm
+
+#endif  // USE_OPROFILE
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt
new file mode 100644
index 000000000000..f7ed176fef78
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMCJIT
+  MCJIT.cpp
+  TargetSelect.cpp
+  )
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp
new file mode 100644
index 000000000000..e431c848d630
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp
@@ -0,0 +1,161 @@
+//===-- Intercept.cpp - System function interception routines -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If a function call occurs to an external function, the JIT is designed to use
+// the dynamic loader interface to find a function to call.  This is useful for
+// calling system calls and library functions that are not available in LLVM.
+// Some system calls, however, need to be handled specially.  For this reason,
+// we intercept some of them here and use our own stubs to handle them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+                                       bool AbortOnFailure) {
+  if (!isSymbolSearchingDisabled()) {
+    // Check to see if this is one of the functions we want to intercept.  Note,
+    // we cast to intptr_t here to silence a -pedantic warning that complains
+    // about casting a function pointer to a normal pointer.
+    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+    // We should not invoke parent's ctors/dtors from generated main()!
+    // On Mingw and Cygwin, the symbol __main is resolved to
+    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+    // (and register wrong callee's dtors with atexit(3)).
+    // We expect ExecutionEngine::runStaticConstructorsDestructors()
+    // is called before ExecutionEngine::runFunctionAsMain() is called.
+    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+    const char *NameStr = Name.c_str();
+    // If this is an asm specifier, skip the sentinal.
+    if (NameStr[0] == 1) ++NameStr;
+
+    // If it's an external function, look it up in the process image...
+    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+    if (Ptr) return Ptr;
+
+    // If it wasn't found and if it starts with an underscore ('_') character,
+    // and has an asm specifier, try again without the underscore.
+    if (Name[0] == 1 && NameStr[0] == '_') {
+      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+      if (Ptr) return Ptr;
+    }
+
+    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+    // are references to hidden visibility symbols that dlsym cannot resolve.
+    // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+      // This mirrors logic in libSystemStubs.a.
+      std::string Prefix = std::string(Name.begin(), Name.end()-9);
+      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+        return Ptr;
+      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+        return Ptr;
+    }
+#endif
+  }
+
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
new file mode 100644
index 000000000000..4475f4d5c0d8
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -0,0 +1,221 @@
+//===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "MCJITMemoryManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+namespace {
+
+static struct RegisterJIT {
+  RegisterJIT() { MCJIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInMCJIT() {
+}
+
+ExecutionEngine *MCJIT::createJIT(Module *M,
+                                  std::string *ErrorStr,
+                                  JITMemoryManager *JMM,
+                                  CodeGenOpt::Level OptLevel,
+                                  bool GVsWithCode,
+                                  TargetMachine *TM) {
+  // Try to register the program as a source of symbols to resolve against.
+  //
+  // FIXME: Don't do this here.
+  sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+  // If the target supports JIT code generation, create the JIT.
+  if (TargetJITInfo *TJ = TM->getJITInfo())
+    return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel,
+                     GVsWithCode);
+
+  if (ErrorStr)
+    *ErrorStr = "target does not support JIT code generation";
+  return 0;
+}
+
+MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
+             RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel,
+             bool AllocateGVsWithCode)
+  : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
+
+  PM.add(new TargetData(*TM->getTargetData()));
+
+  // Turn the machine code intermediate representation into bytes in memory
+  // that may be executed.
+  if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) {
+    report_fatal_error("Target does not support MC emission!");
+  }
+
+  // Initialize passes.
+  // FIXME: When we support multiple modules, we'll want to move the code
+  // gen and finalization out of the constructor here and do it more
+  // on-demand as part of getPointerToFunction().
+  PM.run(*M);
+  // Flush the output buffer so the SmallVector gets its data.
+  OS.flush();
+
+  // Load the object into the dynamic linker.
+  // FIXME: It would be nice to avoid making yet another copy.
+  MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(),
+                                                              Buffer.size()));
+  if (Dyld.loadObject(MB))
+    report_fatal_error(Dyld.getErrorString());
+  // Resolve any relocations.
+  Dyld.resolveRelocations();
+}
+
+MCJIT::~MCJIT() {
+  delete MemMgr;
+}
+
+void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
+  report_fatal_error("not yet implemented");
+  return 0;
+}
+
+void *MCJIT::getPointerToFunction(Function *F) {
+  if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+    bool AbortOnFailure = !F->hasExternalWeakLinkage();
+    void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+    addGlobalMapping(F, Addr);
+    return Addr;
+  }
+
+  // FIXME: Should we be using the mangler for this? Probably.
+  StringRef BaseName = F->getName();
+  if (BaseName[0] == '\1')
+    return (void*)Dyld.getSymbolAddress(BaseName.substr(1));
+  return (void*)Dyld.getSymbolAddress((TM->getMCAsmInfo()->getGlobalPrefix()
+                                       + BaseName).str());
+}
+
+void *MCJIT::recompileAndRelinkFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+}
+
+void MCJIT::freeMachineCodeForFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+}
+
+GenericValue MCJIT::runFunction(Function *F,
+                                const std::vector<GenericValue> &ArgValues) {
+  assert(F && "Function *F was null at entry to run()");
+
+  void *FPtr = getPointerToFunction(F);
+  assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+  const FunctionType *FTy = F->getFunctionType();
+  const Type *RetTy = FTy->getReturnType();
+
+  assert((FTy->getNumParams() == ArgValues.size() ||
+          (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+         "Wrong number of arguments passed into function!");
+  assert(FTy->getNumParams() == ArgValues.size() &&
+         "This doesn't support passing arguments through varargs (yet)!");
+
+  // Handle some common cases first.  These cases correspond to common `main'
+  // prototypes.
+  if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+    switch (ArgValues.size()) {
+    case 3:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy() &&
+          FTy->getParamType(2)->isPointerTy()) {
+        int (*PF)(int, char **, const char **) =
+          (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1]),
+                                 (const char **)GVTOP(ArgValues[2])));
+        return rv;
+      }
+      break;
+    case 2:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy()) {
+        int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1])));
+        return rv;
+      }
+      break;
+    case 1:
+      if (FTy->getNumParams() == 1 &&
+          FTy->getParamType(0)->isIntegerTy(32)) {
+        GenericValue rv;
+        int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+        return rv;
+      }
+      break;
+    }
+  }
+
+  // Handle cases where no arguments are passed first.
+  if (ArgValues.empty()) {
+    GenericValue rv;
+    switch (RetTy->getTypeID()) {
+    default: llvm_unreachable("Unknown return type for function call!");
+    case Type::IntegerTyID: {
+      unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+      if (BitWidth == 1)
+        rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 8)
+        rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 16)
+        rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 32)
+        rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 64)
+        rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+      else
+        llvm_unreachable("Integer types > 64 bits not supported");
+      return rv;
+    }
+    case Type::VoidTyID:
+      rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+      return rv;
+    case Type::FloatTyID:
+      rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::DoubleTyID:
+      rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      llvm_unreachable("long double not supported yet");
+      return rv;
+    case Type::PointerTyID:
+      return PTOGV(((void*(*)())(intptr_t)FPtr)());
+    }
+  }
+
+  assert("Full-featured argument passing not supported yet!");
+  return GenericValue();
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
new file mode 100644
index 000000000000..b64c21a97360
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -0,0 +1,91 @@
+//===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+
+#include "llvm/PassManager.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+// FIXME: This makes all kinds of horrible assumptions for the time being,
+// like only having one module, not needing to worry about multi-threading,
+// blah blah. Purely in get-it-up-and-limping mode for now.
+
+class MCJIT : public ExecutionEngine {
+  MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
+        RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel,
+        bool AllocateGVsWithCode);
+
+  TargetMachine *TM;
+  MCContext *Ctx;
+  RTDyldMemoryManager *MemMgr;
+
+  // FIXME: These may need moved to a separate 'jitstate' member like the
+  // non-MC JIT does for multithreading and such. Just keep them here for now.
+  PassManager PM;
+  Module *M;
+  // FIXME: This really doesn't belong here.
+  SmallVector<char, 4096> Buffer; // Working buffer into which we JIT.
+  raw_svector_ostream OS;
+
+  RuntimeDyld Dyld;
+
+public:
+  ~MCJIT();
+
+  /// @name ExecutionEngine interface implementation
+  /// @{
+
+  virtual void *getPointerToBasicBlock(BasicBlock *BB);
+
+  virtual void *getPointerToFunction(Function *F);
+
+  virtual void *recompileAndRelinkFunction(Function *F);
+
+  virtual void freeMachineCodeForFunction(Function *F);
+
+  virtual GenericValue runFunction(Function *F,
+                                   const std::vector<GenericValue> &ArgValues);
+
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function by using the dlsym function call.  As such it is only
+  /// useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  void *getPointerToNamedFunction(const std::string &Name,
+                                  bool AbortOnFailure = true);
+  /// @}
+  /// @name (Private) Registration Interfaces
+  /// @{
+
+  static void Register() {
+    MCJITCtor = createJIT;
+  }
+
+  static ExecutionEngine *createJIT(Module *M,
+                                    std::string *ErrorStr,
+                                    JITMemoryManager *JMM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool GVsWithCode,
+                                    TargetMachine *TM);
+
+  // @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
new file mode 100644
index 000000000000..40bc031a0771
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
@@ -0,0 +1,69 @@
+//===-- MCJITMemoryManager.h - Definition for the Memory Manager ---C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H
+
+#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include <assert.h>
+
+namespace llvm {
+
+// The MCJIT memory manager is a layer between the standard JITMemoryManager
+// and the RuntimeDyld interface that maps objects, by name, onto their
+// matching LLVM IR counterparts in the module(s) being compiled.
+class MCJITMemoryManager : public RTDyldMemoryManager {
+  JITMemoryManager *JMM;
+
+  // FIXME: Multiple modules.
+  Module *M;
+public:
+  MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : JMM(jmm), M(m) {}
+
+  // Allocate ActualSize bytes, or more, for the named function. Return
+  // a pointer to the allocated memory and update Size to reflect how much
+  // memory was acutally allocated.
+  uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) {
+    // FIXME: This should really reference the MCAsmInfo to get the global
+    //        prefix.
+    if (Name[0] == '_') ++Name;
+    Function *F = M->getFunction(Name);
+    // Some ObjC names have a prefixed \01 in the IR. If we failed to find
+    // the symbol and it's of the ObjC conventions (starts with "-"), try
+    // prepending a \01 and see if we can find it that way.
+    if (!F && Name[0] == '-')
+      F = M->getFunction((Twine("\1") + Name).str());
+    assert(F && "No matching function in JIT IR Module!");
+    return JMM->startFunctionBody(F, Size);
+  }
+
+  // Mark the end of the function, including how much of the allocated
+  // memory was actually used.
+  void endFunctionBody(const char *Name, uint8_t *FunctionStart,
+                       uint8_t *FunctionEnd) {
+    // FIXME: This should really reference the MCAsmInfo to get the global
+    //        prefix.
+    if (Name[0] == '_') ++Name;
+    Function *F = M->getFunction(Name);
+    // Some ObjC names have a prefixed \01 in the IR. If we failed to find
+    // the symbol and it's of the ObjC conventions (starts with "-"), try
+    // prepending a \01 and see if we can find it that way.
+    if (!F && Name[0] == '-')
+      F = M->getFunction((Twine("\1") + Name).str());
+    assert(F && "No matching function in JIT IR Module!");
+    JMM->endFunctionBody(F, FunctionStart, FunctionEnd);
+  }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile b/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile
new file mode 100644
index 000000000000..967efbc0efa4
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCJIT
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
new file mode 100644
index 000000000000..9e53f8757ec0
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMRuntimeDyld
+  RuntimeDyld.cpp
+  )
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile
new file mode 100644
index 000000000000..5d6f26d950fe
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMRuntimeDyld
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
new file mode 100644
index 000000000000..33dd70502798
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -0,0 +1,95 @@
+//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "RuntimeDyldImpl.h"
+using namespace llvm;
+using namespace llvm::object;
+
+// Empty out-of-line virtual destructor as the key function.
+RTDyldMemoryManager::~RTDyldMemoryManager() {}
+RuntimeDyldImpl::~RuntimeDyldImpl() {}
+
+namespace llvm {
+
+void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
+                                      uint8_t *EndAddress) {
+  // Allocate memory for the function via the memory manager.
+  uintptr_t Size = EndAddress - StartAddress + 1;
+  uintptr_t AllocSize = Size;
+  uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize);
+  assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
+         "Memory manager failed to allocate enough memory!");
+  // Copy the function payload into the memory block.
+  memcpy(Mem, StartAddress, Size);
+  MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
+  // Remember where we put it.
+  Functions[Name] = sys::MemoryBlock(Mem, Size);
+  // Default the assigned address for this symbol to wherever this
+  // allocated it.
+  SymbolTable[Name] = Mem;
+  DEBUG(dbgs() << "    allocated to [" << Mem << ", " << Mem + Size << "]\n");
+}
+
+// Resolve the relocations for all symbols we currently know about.
+void RuntimeDyldImpl::resolveRelocations() {
+  // Just iterate over the symbols in our symbol table and assign their
+  // addresses.
+  StringMap<uint8_t*>::iterator i = SymbolTable.begin();
+  StringMap<uint8_t*>::iterator e = SymbolTable.end();
+  for (;i != e; ++i)
+    reassignSymbolAddress(i->getKey(), i->getValue());
+}
+
+//===----------------------------------------------------------------------===//
+// RuntimeDyld class implementation
+RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+  Dyld = 0;
+  MM = mm;
+}
+
+RuntimeDyld::~RuntimeDyld() {
+  delete Dyld;
+}
+
+bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
+  if (!Dyld) {
+    if (RuntimeDyldMachO::isKnownFormat(InputBuffer))
+      Dyld = new RuntimeDyldMachO(MM);
+    else
+      report_fatal_error("Unknown object format!");
+  } else {
+    if(!Dyld->isCompatibleFormat(InputBuffer))
+      report_fatal_error("Incompatible object format!");
+  }
+
+  return Dyld->loadObject(InputBuffer);
+}
+
+void *RuntimeDyld::getSymbolAddress(StringRef Name) {
+  return Dyld->getSymbolAddress(Name);
+}
+
+void RuntimeDyld::resolveRelocations() {
+  Dyld->resolveRelocations();
+}
+
+void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+  Dyld->reassignSymbolAddress(Name, Addr);
+}
+
+StringRef RuntimeDyld::getErrorString() {
+  return Dyld->getErrorString();
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
new file mode 100644
index 000000000000..bcdfb04801a5
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -0,0 +1,152 @@
+//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the implementations of runtime dynamic linker facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_IMPL_H
+#define LLVM_RUNTIME_DYLD_IMPL_H
+
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+class RuntimeDyldImpl {
+protected:
+  unsigned CPUType;
+  unsigned CPUSubtype;
+
+  // The MemoryManager to load objects into.
+  RTDyldMemoryManager *MemMgr;
+
+  // FIXME: This all assumes we're dealing with external symbols for anything
+  //        explicitly referenced. I.e., we can index by name and things
+  //        will work out. In practice, this may not be the case, so we
+  //        should find a way to effectively generalize.
+
+  // For each function, we have a MemoryBlock of it's instruction data.
+  StringMap<sys::MemoryBlock> Functions;
+
+  // Master symbol table. As modules are loaded and external symbols are
+  // resolved, their addresses are stored here.
+  StringMap<uint8_t*> SymbolTable;
+
+  bool HasError;
+  std::string ErrorStr;
+
+  // Set the error state and record an error string.
+  bool Error(const Twine &Msg) {
+    ErrorStr = Msg.str();
+    HasError = true;
+    return true;
+  }
+
+  void extractFunction(StringRef Name, uint8_t *StartAddress,
+                       uint8_t *EndAddress);
+
+public:
+  RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
+
+  virtual ~RuntimeDyldImpl();
+
+  virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+
+  void *getSymbolAddress(StringRef Name) {
+    // FIXME: Just look up as a function for now. Overly simple of course.
+    // Work in progress.
+    return SymbolTable.lookup(Name);
+  }
+
+  void resolveRelocations();
+
+  virtual void reassignSymbolAddress(StringRef Name, uint8_t *Addr) = 0;
+
+  // Is the linker in an error state?
+  bool hasError() { return HasError; }
+
+  // Mark the error condition as handled and continue.
+  void clearError() { HasError = false; }
+
+  // Get the error message.
+  StringRef getErrorString() { return ErrorStr; }
+
+  virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
+};
+
+
+class RuntimeDyldMachO : public RuntimeDyldImpl {
+
+  // For each symbol, keep a list of relocations based on it. Anytime
+  // its address is reassigned (the JIT re-compiled the function, e.g.),
+  // the relocations get re-resolved.
+  struct RelocationEntry {
+    std::string Target;     // Object this relocation is contained in.
+    uint64_t    Offset;     // Offset into the object for the relocation.
+    uint32_t    Data;       // Second word of the raw macho relocation entry.
+    int64_t     Addend;     // Addend encoded in the instruction itself, if any.
+    bool        isResolved; // Has this relocation been resolved previously?
+
+    RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
+      : Target(t), Offset(offset), Data(data), Addend(addend),
+        isResolved(false) {}
+  };
+  typedef SmallVector<RelocationEntry, 4> RelocationList;
+  StringMap<RelocationList> Relocations;
+
+  // FIXME: Also keep a map of all the relocations contained in an object. Use
+  // this to dynamically answer whether all of the relocations in it have
+  // been resolved or not.
+
+  bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                         unsigned Type, unsigned Size);
+  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                               unsigned Type, unsigned Size);
+  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                            unsigned Type, unsigned Size);
+
+  bool loadSegment32(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+  bool loadSegment64(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+
+public:
+  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  bool loadObject(MemoryBuffer *InputBuffer);
+
+  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
+
+  static bool isKnownFormat(const MemoryBuffer *InputBuffer);
+
+  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
+    return isKnownFormat(InputBuffer);
+  };
+};
+
+} // end namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
new file mode 100644
index 000000000000..623e9b2acca3
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -0,0 +1,524 @@
+//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "RuntimeDyldImpl.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+bool RuntimeDyldMachO::
+resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                  unsigned Type, unsigned Size) {
+  // This just dispatches to the proper target specific routine.
+  switch (CPUType) {
+  default: assert(0 && "Unsupported CPU type!");
+  case mach::CTM_x86_64:
+    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
+                                   isPCRel, Type, Size);
+  case mach::CTM_ARM:
+    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
+                                isPCRel, Type, Size);
+  }
+  llvm_unreachable("");
+}
+
+bool RuntimeDyldMachO::
+resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
+                        bool isPCRel, unsigned Type,
+                        unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel)
+    // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
+    // address. Is that expected? Only for branches, perhaps?
+    Value -= Address + 4;
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_X86_64_Unsigned:
+  case macho::RIT_X86_64_Branch: {
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    return false;
+  }
+  case macho::RIT_X86_64_Signed:
+  case macho::RIT_X86_64_GOTLoad:
+  case macho::RIT_X86_64_GOT:
+  case macho::RIT_X86_64_Subtractor:
+  case macho::RIT_X86_64_Signed1:
+  case macho::RIT_X86_64_Signed2:
+  case macho::RIT_X86_64_Signed4:
+  case macho::RIT_X86_64_TLV:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
+                                         bool isPCRel, unsigned Type,
+                                         unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel) {
+    Value -= Address;
+    // ARM PCRel relocations have an effective-PC offset of two instructions
+    // (four bytes in Thumb mode, 8 bytes in ARM mode).
+    // FIXME: For now, assume ARM mode.
+    Value -= 8;
+  }
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_Vanilla: {
+    llvm_unreachable("Invalid relocation type!");
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    break;
+  }
+  case macho::RIT_ARM_Branch24Bit: {
+    // Mask the value into the target address. We know instructions are
+    // 32-bit aligned, so we can do it all at once.
+    uint32_t *p = (uint32_t*)Address;
+    // The low two bits of the value are not encoded.
+    Value >>= 2;
+    // Mask the value to 24 bits.
+    Value &= 0xffffff;
+    // FIXME: If the destination is a Thumb function (and the instruction
+    // is a non-predicated BL instruction), we need to change it to a BLX
+    // instruction instead.
+
+    // Insert the value into the instruction.
+    *p = (*p & ~0xffffff) | Value;
+    break;
+  }
+  case macho::RIT_ARM_ThumbBranch22Bit:
+  case macho::RIT_ARM_ThumbBranch32Bit:
+  case macho::RIT_ARM_Half:
+  case macho::RIT_ARM_HalfDifference:
+  case macho::RIT_Pair:
+  case macho::RIT_Difference:
+  case macho::RIT_ARM_LocalDifference:
+  case macho::RIT_ARM_PreboundLazyPointer:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::
+loadSegment32(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
+  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
+  if (!SegmentLC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section> Sect;
+    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: For the time being, we're only loading text segments.
+    if (Sect->Flags != 0x80000400)
+      continue;
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::SymbolTableEntry> STE;
+      Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > SegmentLC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        continue;
+      // Flags == 0x8 marks a thumb function for ARM, which is fine as it
+      // doesn't require any special handling here.
+      if (STE->Flags != 0x0 && STE->Flags != 0x8)
+        continue;
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // If there weren't any functions (odd, but just in case...)
+    if (!Symbols.size())
+      continue;
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
+                                           SegmentLC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      // FIXME: Some targets (ARM) use internal relocations even for
+      // externally visible symbols, if the definition is in the same
+      // file as the reference. We need to convert those back to by-name
+      // references. We can resolve the address based on the section
+      // offset and see if we have a symbol at that address. If we do,
+      // use that; otherwise, puke.
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+
+bool RuntimeDyldMachO::
+loadSegment64(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
+  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
+  if (!Segment64LC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section64> Sect;
+    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: For the time being, we're only loading text segments.
+    if (Sect->Flags != 0x80000400)
+      continue;
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::Symbol64TableEntry> STE;
+      Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > Segment64LC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        continue;
+      if (STE->Flags != 0x0)
+        continue;
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // If there weren't any functions (odd, but just in case...)
+    if (!Symbols.size())
+      continue;
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
+                                           Segment64LC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
+  // If the linker is in an error state, don't do anything.
+  if (hasError())
+    return true;
+  // Load the Mach-O wrapper object.
+  std::string ErrorStr;
+  OwningPtr<MachOObject> Obj(
+    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
+  if (!Obj)
+    return Error("unable to load object: '" + ErrorStr + "'");
+
+  // Get the CPU type information from the header.
+  const macho::Header &Header = Obj->getHeader();
+
+  // FIXME: Error checking that the loaded object is compatible with
+  //        the system we're running on.
+  CPUType = Header.CPUType;
+  CPUSubtype = Header.CPUSubtype;
+
+  // Validate that the load commands match what we expect.
+  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
+    *DysymtabLCI = 0;
+  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
+    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
+    switch (LCI.Command.Type) {
+    case macho::LCT_Segment:
+    case macho::LCT_Segment64:
+      if (SegmentLCI)
+        return Error("unexpected input object (multiple segments)");
+      SegmentLCI = &LCI;
+      break;
+    case macho::LCT_Symtab:
+      if (SymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      SymtabLCI = &LCI;
+      break;
+    case macho::LCT_Dysymtab:
+      if (DysymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      DysymtabLCI = &LCI;
+      break;
+    default:
+      return Error("unexpected input object (unexpected load command");
+    }
+  }
+
+  if (!SymtabLCI)
+    return Error("no symbol table found in object");
+  if (!SegmentLCI)
+    return Error("no symbol table found in object");
+
+  // Read and register the symbol table data.
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
+  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
+  if (!SymtabLC)
+    return Error("unable to load symbol table load command");
+  Obj->RegisterStringTable(*SymtabLC);
+
+  // Read the dynamic link-edit information, if present (not present in static
+  // objects).
+  if (DysymtabLCI) {
+    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
+    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
+    if (!DysymtabLC)
+      return Error("unable to load dynamic link-exit load command");
+
+    // FIXME: We don't support anything interesting yet.
+//    if (DysymtabLC->LocalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: local symbol entries");
+//    if (DysymtabLC->ExternalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
+//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
+//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
+  }
+
+  // Load the segment load command.
+  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
+    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  } else {
+    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  }
+
+  return false;
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+  // Assign the address in our symbol table.
+  SymbolTable[Name] = Addr;
+
+  RelocationList &Relocs = Relocations[Name];
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    RelocationEntry &RE = Relocs[i];
+    uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
+    bool isPCRel = (RE.Data >> 24) & 1;
+    unsigned Type = (RE.Data >> 28) & 0xf;
+    unsigned Size = 1 << ((RE.Data >> 25) & 3);
+
+    DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
+          << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
+          << " from '" << Name << " (" << format("%p", Addr) << ")"
+          << "(" << (isPCRel ? "pcrel" : "absolute")
+          << ", type: " << Type << ", Size: " << Size << ").\n");
+
+    resolveRelocation(Target, Addr, isPCRel, Type, Size);
+    RE.isResolved = true;
+  }
+}
+
+bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+  StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
+  if (Magic == "\xFE\xED\xFA\xCE") return true;
+  if (Magic == "\xCE\xFA\xED\xFE") return true;
+  if (Magic == "\xFE\xED\xFA\xCF") return true;
+  if (Magic == "\xCF\xFA\xED\xFE") return true;
+  return false;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
new file mode 100644
index 000000000000..f51aff3603b8
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -0,0 +1,90 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetRegistry for the appropriate JIT to use, and allows
+// the user to specify a specific one on the commandline with -march=x. Clients
+// should initialize targets prior to calling createJIT.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch.  Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *EngineBuilder::selectTarget(Module *Mod,
+                              StringRef MArch,
+                              StringRef MCPU,
+                              const SmallVectorImpl<std::string>& MAttrs,
+                              std::string *ErrorStr) {
+  Triple TheTriple(Mod->getTargetTriple());
+  if (TheTriple.getTriple().empty())
+    TheTriple.setTriple(sys::getHostTriple());
+
+  // Adjust the triple to match what the user requested.
+  const Target *TheTarget = 0;
+  if (!MArch.empty()) {
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      if (MArch == it->getName()) {
+        TheTarget = &*it;
+        break;
+      }
+    }
+
+    if (!TheTarget) {
+      *ErrorStr = "No available targets are compatible with this -march, "
+        "see -version for the available targets.\n";
+      return 0;
+    }
+
+    // Adjust the triple to match (if known), otherwise stick with the
+    // module/host triple.
+    Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+    if (Type != Triple::UnknownArch)
+      TheTriple.setArch(Type);
+  } else {
+    std::string Error;
+    TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+    if (TheTarget == 0) {
+      if (ErrorStr)
+        *ErrorStr = Error;
+      return 0;
+    }
+  }
+
+  if (!TheTarget->hasJIT()) {
+    errs() << "WARNING: This target JIT is not designed for the host you are"
+           << " running.  If bad things happen, please choose a different "
+           << "-march switch.\n";
+  }
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (!MAttrs.empty()) {
+    SubtargetFeatures Features;
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  // Allocate a target...
+  TargetMachine *Target =
+    TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr);
+  assert(Target && "Could not allocate target machine!");
+  return Target;
+}
diff --git a/contrib/llvm/lib/Linker/LinkArchives.cpp b/contrib/llvm/lib/Linker/LinkArchives.cpp
new file mode 100644
index 000000000000..2c4ed7fdc17a
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkArchives.cpp
@@ -0,0 +1,198 @@
+//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Config/config.h"
+#include <memory>
+#include <set>
+using namespace llvm;
+
+/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
+/// exist in an LLVM module. This is a bit tricky because there may be two
+/// symbols with the same name but different LLVM types that will be resolved to
+/// each other but aren't currently (thus we need to treat it as resolved).
+///
+/// Inputs:
+///  M - The module in which to find undefined symbols.
+///
+/// Outputs:
+///  UndefinedSymbols - A set of C++ strings containing the name of all
+///                     undefined symbols.
+///
+static void
+GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
+  std::set<std::string> DefinedSymbols;
+  UndefinedSymbols.clear();
+
+  // If the program doesn't define a main, try pulling one in from a .a file.
+  // This is needed for programs where the main function is defined in an
+  // archive, such f2c'd programs.
+  Function *Main = M->getFunction("main");
+  if (Main == 0 || Main->isDeclaration())
+    UndefinedSymbols.insert("main");
+
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }      
+    }
+
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }      
+    }
+
+  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    if (I->hasName())
+      DefinedSymbols.insert(I->getName());
+
+  // Prune out any defined symbols from the undefined symbols set...
+  for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
+       I != UndefinedSymbols.end(); )
+    if (DefinedSymbols.count(*I))
+      UndefinedSymbols.erase(I++);  // This symbol really is defined!
+    else
+      ++I; // Keep this symbol in the undefined symbols list
+}
+
+/// LinkInArchive - opens an archive library and link in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+///  Filename - The pathname of the archive.
+///
+/// Return Value:
+///  TRUE  - An error occurred.
+///  FALSE - No errors.
+bool
+Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
+  // Make sure this is an archive file we're dealing with
+  if (!Filename.isArchive())
+    return error("File '" + Filename.str() + "' is not an archive.");
+
+  // Open the archive file
+  verbose("Linking archive file '" + Filename.str() + "'");
+
+  // Find all of the symbols currently undefined in the bitcode program.
+  // If all the symbols are defined, the program is complete, and there is
+  // no reason to link in any archive files.
+  std::set<std::string> UndefinedSymbols;
+  GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+  if (UndefinedSymbols.empty()) {
+    verbose("No symbols undefined, skipping library '" + Filename.str() + "'");
+    return false;  // No need to link anything in!
+  }
+
+  std::string ErrMsg;
+  std::auto_ptr<Archive> AutoArch (
+    Archive::OpenAndLoadSymbols(Filename, Context, &ErrMsg));
+
+  Archive* arch = AutoArch.get();
+
+  if (!arch)
+    return error("Cannot read archive '" + Filename.str() +
+                 "': " + ErrMsg);
+  if (!arch->isBitcodeArchive()) {
+    is_native = true;
+    return false;
+  }
+  is_native = false;
+
+  // Save a set of symbols that are not defined by the archive. Since we're
+  // entering a loop, there's no point searching for these multiple times. This
+  // variable is used to "set_subtract" from the set of undefined symbols.
+  std::set<std::string> NotDefinedByArchive;
+
+  // Save the current set of undefined symbols, because we may have to make
+  // multiple passes over the archive:
+  std::set<std::string> CurrentlyUndefinedSymbols;
+
+  do {
+    CurrentlyUndefinedSymbols = UndefinedSymbols;
+
+    // Find the modules we need to link into the target module.  Note that arch
+    // keeps ownership of these modules and may return the same Module* from a
+    // subsequent call.
+    std::set<Module*> Modules;
+    if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
+      return error("Cannot find symbols in '" + Filename.str() + 
+                   "': " + ErrMsg);
+
+    // If we didn't find any more modules to link this time, we are done
+    // searching this archive.
+    if (Modules.empty())
+      break;
+
+    // Any symbols remaining in UndefinedSymbols after
+    // findModulesDefiningSymbols are ones that the archive does not define. So
+    // we add them to the NotDefinedByArchive variable now.
+    NotDefinedByArchive.insert(UndefinedSymbols.begin(),
+        UndefinedSymbols.end());
+
+    // Loop over all the Modules that we got back from the archive
+    for (std::set<Module*>::iterator I=Modules.begin(), E=Modules.end();
+         I != E; ++I) {
+
+      // Get the module we must link in.
+      std::string moduleErrorMsg;
+      Module* aModule = *I;
+      if (aModule != NULL) {
+        if (aModule->MaterializeAll(&moduleErrorMsg))
+          return error("Could not load a module: " + moduleErrorMsg);
+
+        verbose("  Linking in module: " + aModule->getModuleIdentifier());
+
+        // Link it in
+        if (LinkInModule(aModule, &moduleErrorMsg))
+          return error("Cannot link in module '" +
+                       aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
+      } 
+    }
+    
+    // Get the undefined symbols from the aggregate module. This recomputes the
+    // symbols we still need after the new modules have been linked in.
+    GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+    // At this point we have two sets of undefined symbols: UndefinedSymbols
+    // which holds the undefined symbols from all the modules, and
+    // NotDefinedByArchive which holds symbols we know the archive doesn't
+    // define. There's no point searching for symbols that we won't find in the
+    // archive so we subtract these sets.
+    set_subtract(UndefinedSymbols, NotDefinedByArchive);
+
+    // If there's no symbols left, no point in continuing to search the
+    // archive.
+    if (UndefinedSymbols.empty())
+      break;
+  } while (CurrentlyUndefinedSymbols != UndefinedSymbols);
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Linker/LinkItems.cpp b/contrib/llvm/lib/Linker/LinkItems.cpp
new file mode 100644
index 000000000000..52a0d175a5cd
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkItems.cpp
@@ -0,0 +1,241 @@
+//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+// LinkItems - This function is the main entry point into linking. It takes a
+// list of LinkItem which indicates the order the files should be linked and
+// how each file should be treated (plain file or with library search). The
+// function only links bitcode and produces a result list of items that are
+// native objects. 
+bool
+Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
+  // Clear the NativeItems just in case
+  NativeItems.clear();
+
+  // For each linkage item ...
+  for (ItemList::const_iterator I = Items.begin(), E = Items.end();
+       I != E; ++I) {
+    if (I->second) {
+      // Link in the library suggested.
+      bool is_native = false;
+      if (LinkInLibrary(I->first, is_native))
+        return true;
+      if (is_native)
+        NativeItems.push_back(*I);
+    } else {
+      // Link in the file suggested
+      bool is_native = false;
+      if (LinkInFile(sys::Path(I->first), is_native))
+        return true;
+      if (is_native)
+        NativeItems.push_back(*I);
+    }
+  }
+
+  // At this point we have processed all the link items provided to us. Since
+  // we have an aggregated module at this point, the dependent libraries in
+  // that module should also be aggregated with duplicates eliminated. This is
+  // now the time to process the dependent libraries to resolve any remaining
+  // symbols.
+  bool is_native;
+  for (Module::lib_iterator I = Composite->lib_begin(),
+         E = Composite->lib_end(); I != E; ++I) {
+    if(LinkInLibrary(*I, is_native))
+      return true;
+    if (is_native)
+      NativeItems.push_back(std::make_pair(*I, true));
+  }
+
+  return false;
+}
+
+
+/// LinkInLibrary - links one library into the HeadModule.
+///
+bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) {
+  is_native = false;
+  // Determine where this library lives.
+  sys::Path Pathname = FindLib(Lib);
+  if (Pathname.isEmpty())
+    return error("Cannot find library '" + Lib.str() + "'");
+
+  // If its an archive, try to link it in
+  std::string Magic;
+  Pathname.getMagicNumber(Magic, 64);
+  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+    default: llvm_unreachable("Bad file type identification");
+    case sys::Unknown_FileType:
+      return warning("Supposed library '" + Lib.str() + "' isn't a library.");
+
+    case sys::Bitcode_FileType:
+      // LLVM ".so" file.
+      if (LinkInFile(Pathname, is_native))
+        return true;
+      break;
+
+    case sys::Archive_FileType:
+      if (LinkInArchive(Pathname, is_native))
+        return error("Cannot link archive '" + Pathname.str() + "'");
+      break;
+
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+    case sys::COFF_FileType:
+      is_native = true;
+      break;
+  }
+  return false;
+}
+
+/// LinkLibraries - takes the specified library files and links them into the
+/// main bitcode object file.
+///
+/// Inputs:
+///  Libraries  - The list of libraries to link into the module.
+///
+/// Return value:
+///  FALSE - No error.
+///  TRUE  - Error.
+///
+bool Linker::LinkInLibraries(const std::vector<std::string> &Libraries) {
+
+  // Process the set of libraries we've been provided.
+  bool is_native = false;
+  for (unsigned i = 0; i < Libraries.size(); ++i)
+    if (LinkInLibrary(Libraries[i], is_native))
+      return true;
+
+  // At this point we have processed all the libraries provided to us. Since
+  // we have an aggregated module at this point, the dependent libraries in
+  // that module should also be aggregated with duplicates eliminated. This is
+  // now the time to process the dependent libraries to resolve any remaining
+  // symbols.
+  const Module::LibraryListType& DepLibs = Composite->getLibraries();
+  for (Module::LibraryListType::const_iterator I = DepLibs.begin(),
+         E = DepLibs.end(); I != E; ++I)
+    if (LinkInLibrary(*I, is_native))
+      return true;
+
+  return false;
+}
+
+/// LinkInFile - opens a bitcode file and links in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+///  File - The pathname of the bitcode file.
+///
+/// Outputs:
+///  ErrorMessage - A C++ string detailing what error occurred, if any.
+///
+/// Return Value:
+///  TRUE  - An error occurred.
+///  FALSE - No errors.
+///
+bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
+  is_native = false;
+  
+  // Check for a file of name "-", which means "read standard input"
+  if (File.str() == "-") {
+    std::auto_ptr<Module> M;
+    OwningPtr<MemoryBuffer> Buffer;
+    error_code ec;
+    if (!(ec = MemoryBuffer::getSTDIN(Buffer))) {
+      if (!Buffer->getBufferSize()) {
+        Error = "standard input is empty";
+      } else {
+        M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error));
+        if (M.get())
+          if (!LinkInModule(M.get(), &Error))
+            return false;
+      }
+    }
+    return error("Cannot link stdin: " + ec.message());
+  }
+
+  // Determine what variety of file it is.
+  std::string Magic;
+  if (!File.getMagicNumber(Magic, 64))
+    return error("Cannot find linker input '" + File.str() + "'");
+
+  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+    default: llvm_unreachable("Bad file type identification");
+    case sys::Unknown_FileType:
+      return warning("Ignoring file '" + File.str() + 
+                   "' because does not contain bitcode.");
+
+    case sys::Archive_FileType:
+      // A user may specify an ar archive without -l, perhaps because it
+      // is not installed as a library. Detect that and link the archive.
+      if (LinkInArchive(File, is_native))
+        return true;
+      break;
+
+    case sys::Bitcode_FileType: {
+      verbose("Linking bitcode file '" + File.str() + "'");
+      std::auto_ptr<Module> M(LoadObject(File));
+      if (M.get() == 0)
+        return error("Cannot load file '" + File.str() + "': " + Error);
+      if (LinkInModule(M.get(), &Error))
+        return error("Cannot link file '" + File.str() + "': " + Error);
+
+      verbose("Linked in file '" + File.str() + "'");
+      break;
+    }
+
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+    case sys::COFF_FileType:
+      is_native = true;
+      break;
+  }
+  return false;
+}
+
+/// LinkFiles - takes a module and a list of files and links them all together.
+/// It locates the file either in the current directory, as its absolute
+/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH.
+///
+/// Inputs:
+///  Files      - A vector of sys::Path indicating the LLVM bitcode filenames
+///               to be linked.  The names can refer to a mixture of pure LLVM
+///               bitcode files and archive (ar) formatted files.
+///
+/// Return value:
+///  FALSE - No errors.
+///  TRUE  - Some error occurred.
+///
+bool Linker::LinkInFiles(const std::vector<sys::Path> &Files) {
+  bool is_native;
+  for (unsigned i = 0; i < Files.size(); ++i)
+    if (LinkInFile(Files[i], is_native))
+      return true;
+  return false;
+}
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
new file mode 100644
index 000000000000..55aa9bf18887
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -0,0 +1,968 @@
+//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM module linker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TypeMap implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TypeMapTy : public ValueMapTypeRemapper {
+  /// MappedTypes - This is a mapping from a source type to a destination type
+  /// to use.
+  DenseMap<Type*, Type*> MappedTypes;
+
+  /// SpeculativeTypes - When checking to see if two subgraphs are isomorphic,
+  /// we speculatively add types to MappedTypes, but keep track of them here in
+  /// case we need to roll back.
+  SmallVector<Type*, 16> SpeculativeTypes;
+  
+  /// DefinitionsToResolve - This is a list of non-opaque structs in the source
+  /// module that are mapped to an opaque struct in the destination module.
+  SmallVector<StructType*, 16> DefinitionsToResolve;
+public:
+  
+  /// addTypeMapping - Indicate that the specified type in the destination
+  /// module is conceptually equivalent to the specified type in the source
+  /// module.
+  void addTypeMapping(Type *DstTy, Type *SrcTy);
+
+  /// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+  /// module from a type definition in the source module.
+  void linkDefinedTypeBodies();
+  
+  /// get - Return the mapped type to use for the specified input type from the
+  /// source module.
+  Type *get(Type *SrcTy);
+
+  FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
+
+private:
+  Type *getImpl(Type *T);
+  /// remapType - Implement the ValueMapTypeRemapper interface.
+  Type *remapType(Type *SrcTy) {
+    return get(SrcTy);
+  }
+  
+  bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
+};
+}
+
+void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
+  Type *&Entry = MappedTypes[SrcTy];
+  if (Entry) return;
+  
+  if (DstTy == SrcTy) {
+    Entry = DstTy;
+    return;
+  }
+  
+  // Check to see if these types are recursively isomorphic and establish a
+  // mapping between them if so.
+  if (!areTypesIsomorphic(DstTy, SrcTy)) {
+    // Oops, they aren't isomorphic.  Just discard this request by rolling out
+    // any speculative mappings we've established.
+    for (unsigned i = 0, e = SpeculativeTypes.size(); i != e; ++i)
+      MappedTypes.erase(SpeculativeTypes[i]);
+  }
+  SpeculativeTypes.clear();
+}
+
+/// areTypesIsomorphic - Recursively walk this pair of types, returning true
+/// if they are isomorphic, false if they are not.
+bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
+  // Two types with differing kinds are clearly not isomorphic.
+  if (DstTy->getTypeID() != SrcTy->getTypeID()) return false;
+
+  // If we have an entry in the MappedTypes table, then we have our answer.
+  Type *&Entry = MappedTypes[SrcTy];
+  if (Entry)
+    return Entry == DstTy;
+
+  // Two identical types are clearly isomorphic.  Remember this
+  // non-speculatively.
+  if (DstTy == SrcTy) {
+    Entry = DstTy;
+    return true;
+  }
+  
+  // Okay, we have two types with identical kinds that we haven't seen before.
+
+  // If this is an opaque struct type, special case it.
+  if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
+    // Mapping an opaque type to any struct, just keep the dest struct.
+    if (SSTy->isOpaque()) {
+      Entry = DstTy;
+      SpeculativeTypes.push_back(SrcTy);
+      return true;
+    }
+
+    // Mapping a non-opaque source type to an opaque dest.  Keep the dest, but
+    // fill it in later.  This doesn't need to be speculative.
+    if (cast<StructType>(DstTy)->isOpaque()) {
+      Entry = DstTy;
+      DefinitionsToResolve.push_back(SSTy);
+      return true;
+    }
+  }
+  
+  // If the number of subtypes disagree between the two types, then we fail.
+  if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
+    return false;
+  
+  // Fail if any of the extra properties (e.g. array size) of the type disagree.
+  if (isa<IntegerType>(DstTy))
+    return false;  // bitwidth disagrees.
+  if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
+    if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
+      return false;
+  } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
+    if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
+      return false;
+  } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
+    StructType *SSTy = cast<StructType>(SrcTy);
+    if (DSTy->isAnonymous() != SSTy->isAnonymous() ||
+        DSTy->isPacked() != SSTy->isPacked())
+      return false;
+  } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
+    if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+      return false;
+  } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+    if (DVTy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+      return false;
+  }
+
+  // Otherwise, we speculate that these two types will line up and recursively
+  // check the subelements.
+  Entry = DstTy;
+  SpeculativeTypes.push_back(SrcTy);
+
+  for (unsigned i = 0, e = SrcTy->getNumContainedTypes(); i != e; ++i)
+    if (!areTypesIsomorphic(DstTy->getContainedType(i),
+                            SrcTy->getContainedType(i)))
+      return false;
+  
+  // If everything seems to have lined up, then everything is great.
+  return true;
+}
+
+/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+/// module from a type definition in the source module.
+void TypeMapTy::linkDefinedTypeBodies() {
+  SmallVector<Type*, 16> Elements;
+  SmallString<16> TmpName;
+  
+  // Note that processing entries in this loop (calling 'get') can add new
+  // entries to the DefinitionsToResolve vector.
+  while (!DefinitionsToResolve.empty()) {
+    StructType *SrcSTy = DefinitionsToResolve.pop_back_val();
+    StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
+    
+    // TypeMap is a many-to-one mapping, if there were multiple types that
+    // provide a body for DstSTy then previous iterations of this loop may have
+    // already handled it.  Just ignore this case.
+    if (!DstSTy->isOpaque()) continue;
+    assert(!SrcSTy->isOpaque() && "Not resolving a definition?");
+    
+    // Map the body of the source type over to a new body for the dest type.
+    Elements.resize(SrcSTy->getNumElements());
+    for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+      Elements[i] = getImpl(SrcSTy->getElementType(i));
+    
+    DstSTy->setBody(Elements, SrcSTy->isPacked());
+    
+    // If DstSTy has no name or has a longer name than STy, then viciously steal
+    // STy's name.
+    if (!SrcSTy->hasName()) continue;
+    StringRef SrcName = SrcSTy->getName();
+    
+    if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) {
+      TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end());
+      SrcSTy->setName("");
+      DstSTy->setName(TmpName.str());
+      TmpName.clear();
+    }
+  }
+}
+
+
+/// get - Return the mapped type to use for the specified input type from the
+/// source module.
+Type *TypeMapTy::get(Type *Ty) {
+  Type *Result = getImpl(Ty);
+  
+  // If this caused a reference to any struct type, resolve it before returning.
+  if (!DefinitionsToResolve.empty())
+    linkDefinedTypeBodies();
+  return Result;
+}
+
+/// getImpl - This is the recursive version of get().
+Type *TypeMapTy::getImpl(Type *Ty) {
+  // If we already have an entry for this type, return it.
+  Type **Entry = &MappedTypes[Ty];
+  if (*Entry) return *Entry;
+  
+  // If this is not a named struct type, then just map all of the elements and
+  // then rebuild the type from inside out.
+  if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isAnonymous()) {
+    // If there are no element types to map, then the type is itself.  This is
+    // true for the anonymous {} struct, things like 'float', integers, etc.
+    if (Ty->getNumContainedTypes() == 0)
+      return *Entry = Ty;
+    
+    // Remap all of the elements, keeping track of whether any of them change.
+    bool AnyChange = false;
+    SmallVector<Type*, 4> ElementTypes;
+    ElementTypes.resize(Ty->getNumContainedTypes());
+    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i) {
+      ElementTypes[i] = getImpl(Ty->getContainedType(i));
+      AnyChange |= ElementTypes[i] != Ty->getContainedType(i);
+    }
+    
+    // If we found our type while recursively processing stuff, just use it.
+    Entry = &MappedTypes[Ty];
+    if (*Entry) return *Entry;
+    
+    // If all of the element types mapped directly over, then the type is usable
+    // as-is.
+    if (!AnyChange)
+      return *Entry = Ty;
+    
+    // Otherwise, rebuild a modified type.
+    switch (Ty->getTypeID()) {
+    default: assert(0 && "unknown derived type to remap");
+    case Type::ArrayTyID:
+      return *Entry = ArrayType::get(ElementTypes[0],
+                                     cast<ArrayType>(Ty)->getNumElements());
+    case Type::VectorTyID: 
+      return *Entry = VectorType::get(ElementTypes[0],
+                                      cast<VectorType>(Ty)->getNumElements());
+    case Type::PointerTyID:
+      return *Entry = PointerType::get(ElementTypes[0],
+                                      cast<PointerType>(Ty)->getAddressSpace());
+    case Type::FunctionTyID:
+      return *Entry = FunctionType::get(ElementTypes[0],
+                                        ArrayRef<Type*>(ElementTypes).slice(1),
+                                        cast<FunctionType>(Ty)->isVarArg());
+    case Type::StructTyID:
+      // Note that this is only reached for anonymous structs.
+      return *Entry = StructType::get(Ty->getContext(), ElementTypes,
+                                      cast<StructType>(Ty)->isPacked());
+    }
+  }
+
+  // Otherwise, this is an unmapped named struct.  If the struct can be directly
+  // mapped over, just use it as-is.  This happens in a case when the linked-in
+  // module has something like:
+  //   %T = type {%T*, i32}
+  //   @GV = global %T* null
+  // where T does not exist at all in the destination module.
+  //
+  // The other case we watch for is when the type is not in the destination
+  // module, but that it has to be rebuilt because it refers to something that
+  // is already mapped.  For example, if the destination module has:
+  //  %A = type { i32 }
+  // and the source module has something like
+  //  %A' = type { i32 }
+  //  %B = type { %A'* }
+  //  @GV = global %B* null
+  // then we want to create a new type: "%B = type { %A*}" and have it take the
+  // pristine "%B" name from the source module.
+  //
+  // To determine which case this is, we have to recursively walk the type graph
+  // speculating that we'll be able to reuse it unmodified.  Only if this is
+  // safe would we map the entire thing over.  Because this is an optimization,
+  // and is not required for the prettiness of the linked module, we just skip
+  // it and always rebuild a type here.
+  StructType *STy = cast<StructType>(Ty);
+  
+  // If the type is opaque, we can just use it directly.
+  if (STy->isOpaque())
+    return *Entry = STy;
+  
+  // Otherwise we create a new type and resolve its body later.  This will be
+  // resolved by the top level of get().
+  DefinitionsToResolve.push_back(STy);
+  return *Entry = StructType::createNamed(STy->getContext(), "");
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ModuleLinker implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// ModuleLinker - This is an implementation class for the LinkModules
+  /// function, which is the entrypoint for this file.
+  class ModuleLinker {
+    Module *DstM, *SrcM;
+    
+    TypeMapTy TypeMap; 
+
+    /// ValueMap - Mapping of values from what they used to be in Src, to what
+    /// they are now in DstM.  ValueToValueMapTy is a ValueMap, which involves
+    /// some overhead due to the use of Value handles which the Linker doesn't
+    /// actually need, but this allows us to reuse the ValueMapper code.
+    ValueToValueMapTy ValueMap;
+    
+    struct AppendingVarInfo {
+      GlobalVariable *NewGV;  // New aggregate global in dest module.
+      Constant *DstInit;      // Old initializer from dest module.
+      Constant *SrcInit;      // Old initializer from src module.
+    };
+    
+    std::vector<AppendingVarInfo> AppendingVars;
+    
+  public:
+    std::string ErrorMsg;
+    
+    ModuleLinker(Module *dstM, Module *srcM) : DstM(dstM), SrcM(srcM) { }
+    
+    bool run();
+    
+  private:
+    /// emitError - Helper method for setting a message and returning an error
+    /// code.
+    bool emitError(const Twine &Message) {
+      ErrorMsg = Message.str();
+      return true;
+    }
+    
+    /// getLinkageResult - This analyzes the two global values and determines
+    /// what the result will look like in the destination module.
+    bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+                          GlobalValue::LinkageTypes &LT, bool &LinkFromSrc);
+
+    /// getLinkedToGlobal - Given a global in the source module, return the
+    /// global in the destination module that is being linked to, if any.
+    GlobalValue *getLinkedToGlobal(GlobalValue *SrcGV) {
+      // If the source has no name it can't link.  If it has local linkage,
+      // there is no name match-up going on.
+      if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+        return 0;
+      
+      // Otherwise see if we have a match in the destination module's symtab.
+      GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
+      if (DGV == 0) return 0;
+        
+      // If we found a global with the same name in the dest module, but it has
+      // internal linkage, we are really not doing any linkage here.
+      if (DGV->hasLocalLinkage())
+        return 0;
+
+      // Otherwise, we do in fact link to the destination global.
+      return DGV;
+    }
+    
+    void computeTypeMapping();
+    
+    bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
+    bool linkGlobalProto(GlobalVariable *SrcGV);
+    bool linkFunctionProto(Function *SrcF);
+    bool linkAliasProto(GlobalAlias *SrcA);
+    
+    void linkAppendingVarInit(const AppendingVarInfo &AVI);
+    void linkGlobalInits();
+    void linkFunctionBody(Function *Dst, Function *Src);
+    void linkAliasBodies();
+    void linkNamedMDNodes();
+  };
+}
+
+
+
+/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
+/// in the symbol table.  This is good for all clients except for us.  Go
+/// through the trouble to force this back.
+static void forceRenaming(GlobalValue *GV, StringRef Name) {
+  // If the global doesn't force its name or if it already has the right name,
+  // there is nothing for us to do.
+  if (GV->hasLocalLinkage() || GV->getName() == Name)
+    return;
+
+  Module *M = GV->getParent();
+
+  // If there is a conflict, rename the conflict.
+  if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
+    GV->takeName(ConflictGV);
+    ConflictGV->setName(Name);    // This will cause ConflictGV to get renamed
+    assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
+  } else {
+    GV->setName(Name);              // Force the name back
+  }
+}
+
+/// CopyGVAttributes - copy additional attributes (those not needed to construct
+/// a GlobalValue) from the SrcGV to the DestGV.
+static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
+  // Use the maximum alignment, rather than just copying the alignment of SrcGV.
+  unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
+  DestGV->copyAttributesFrom(SrcGV);
+  DestGV->setAlignment(Alignment);
+  
+  forceRenaming(DestGV, SrcGV->getName());
+}
+
+/// getLinkageResult - This analyzes the two global values and determines what
+/// the result will look like in the destination module.  In particular, it
+/// computes the resultant linkage type, computes whether the global in the
+/// source should be copied over to the destination (replacing the existing
+/// one), and computes whether this linkage is an error or not. It also performs
+/// visibility checks: we cannot link together two symbols with different
+/// visibilities.
+bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+                                    GlobalValue::LinkageTypes &LT, 
+                                    bool &LinkFromSrc) {
+  assert(Dest && "Must have two globals being queried");
+  assert(!Src->hasLocalLinkage() &&
+         "If Src has internal linkage, Dest shouldn't be set!");
+  
+  bool SrcIsDeclaration = Src->isDeclaration();
+  bool DestIsDeclaration = Dest->isDeclaration();
+  
+  if (SrcIsDeclaration) {
+    // If Src is external or if both Src & Dest are external..  Just link the
+    // external globals, we aren't adding anything.
+    if (Src->hasDLLImportLinkage()) {
+      // If one of GVs has DLLImport linkage, result should be dllimport'ed.
+      if (DestIsDeclaration) {
+        LinkFromSrc = true;
+        LT = Src->getLinkage();
+      }
+    } else if (Dest->hasExternalWeakLinkage()) {
+      // If the Dest is weak, use the source linkage.
+      LinkFromSrc = true;
+      LT = Src->getLinkage();
+    } else {
+      LinkFromSrc = false;
+      LT = Dest->getLinkage();
+    }
+  } else if (DestIsDeclaration && !Dest->hasDLLImportLinkage()) {
+    // If Dest is external but Src is not:
+    LinkFromSrc = true;
+    LT = Src->getLinkage();
+  } else if (Src->isWeakForLinker()) {
+    // At this point we know that Dest has LinkOnce, External*, Weak, Common,
+    // or DLL* linkage.
+    if (Dest->hasExternalWeakLinkage() ||
+        Dest->hasAvailableExternallyLinkage() ||
+        (Dest->hasLinkOnceLinkage() &&
+         (Src->hasWeakLinkage() || Src->hasCommonLinkage()))) {
+      LinkFromSrc = true;
+      LT = Src->getLinkage();
+    } else {
+      LinkFromSrc = false;
+      LT = Dest->getLinkage();
+    }
+  } else if (Dest->isWeakForLinker()) {
+    // At this point we know that Src has External* or DLL* linkage.
+    if (Src->hasExternalWeakLinkage()) {
+      LinkFromSrc = false;
+      LT = Dest->getLinkage();
+    } else {
+      LinkFromSrc = true;
+      LT = GlobalValue::ExternalLinkage;
+    }
+  } else {
+    assert((Dest->hasExternalLinkage()  || Dest->hasDLLImportLinkage() ||
+            Dest->hasDLLExportLinkage() || Dest->hasExternalWeakLinkage()) &&
+           (Src->hasExternalLinkage()   || Src->hasDLLImportLinkage() ||
+            Src->hasDLLExportLinkage()  || Src->hasExternalWeakLinkage()) &&
+           "Unexpected linkage type!");
+    return emitError("Linking globals named '" + Src->getName() +
+                 "': symbol multiply defined!");
+  }
+
+  // Check visibility
+  if (Src->getVisibility() != Dest->getVisibility() &&
+      !SrcIsDeclaration && !DestIsDeclaration &&
+      !Src->hasAvailableExternallyLinkage() &&
+      !Dest->hasAvailableExternallyLinkage())
+    return emitError("Linking globals named '" + Src->getName() +
+                   "': symbols have different visibilities!");
+  return false;
+}
+
+/// computeTypeMapping - Loop over all of the linked values to compute type
+/// mappings.  For example, if we link "extern Foo *x" and "Foo *x = NULL", then
+/// we have two struct types 'Foo' but one got renamed when the module was
+/// loaded into the same LLVMContext.
+void ModuleLinker::computeTypeMapping() {
+  // Incorporate globals.
+  for (Module::global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I) {
+    GlobalValue *DGV = getLinkedToGlobal(I);
+    if (DGV == 0) continue;
+    
+    if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) {
+      TypeMap.addTypeMapping(DGV->getType(), I->getType());
+      continue;      
+    }
+    
+    // Unify the element type of appending arrays.
+    ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
+    ArrayType *SAT = cast<ArrayType>(I->getType()->getElementType());
+    TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+  }
+  
+  // Incorporate functions.
+  for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) {
+    if (GlobalValue *DGV = getLinkedToGlobal(I))
+      TypeMap.addTypeMapping(DGV->getType(), I->getType());
+  }
+  
+  // Don't bother incorporating aliases, they aren't generally typed well.
+  
+  // Now that we have discovered all of the type equivalences, get a body for
+  // any 'opaque' types in the dest module that are now resolved. 
+  TypeMap.linkDefinedTypeBodies();
+}
+
+/// linkAppendingVarProto - If there were any appending global variables, link
+/// them together now.  Return true on error.
+bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
+                                         GlobalVariable *SrcGV) {
+ 
+  if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
+    return emitError("Linking globals named '" + SrcGV->getName() +
+           "': can only link appending global with another appending global!");
+  
+  ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+  ArrayType *SrcTy =
+    cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
+  Type *EltTy = DstTy->getElementType();
+  
+  // Check to see that they two arrays agree on type.
+  if (EltTy != SrcTy->getElementType())
+    return emitError("Appending variables with different element types!");
+  if (DstGV->isConstant() != SrcGV->isConstant())
+    return emitError("Appending variables linked with different const'ness!");
+  
+  if (DstGV->getAlignment() != SrcGV->getAlignment())
+    return emitError(
+             "Appending variables with different alignment need to be linked!");
+  
+  if (DstGV->getVisibility() != SrcGV->getVisibility())
+    return emitError(
+            "Appending variables with different visibility need to be linked!");
+  
+  if (DstGV->getSection() != SrcGV->getSection())
+    return emitError(
+          "Appending variables with different section name need to be linked!");
+  
+  uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
+  ArrayType *NewType = ArrayType::get(EltTy, NewSize);
+  
+  // Create the new global variable.
+  GlobalVariable *NG =
+    new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
+                       DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV,
+                       DstGV->isThreadLocal(),
+                       DstGV->getType()->getAddressSpace());
+  
+  // Propagate alignment, visibility and section info.
+  CopyGVAttributes(NG, DstGV);
+  
+  AppendingVarInfo AVI;
+  AVI.NewGV = NG;
+  AVI.DstInit = DstGV->getInitializer();
+  AVI.SrcInit = SrcGV->getInitializer();
+  AppendingVars.push_back(AVI);
+
+  // Replace any uses of the two global variables with uses of the new
+  // global.
+  ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
+
+  DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
+  DstGV->eraseFromParent();
+  
+  // Zap the initializer in the source variable so we don't try to link it.
+  SrcGV->setInitializer(0);
+  SrcGV->setLinkage(GlobalValue::ExternalLinkage);
+  return false;
+}
+
+/// linkGlobalProto - Loop through the global variables in the src module and
+/// merge them into the dest module.
+bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
+  GlobalValue *DGV = getLinkedToGlobal(SGV);
+
+  if (DGV) {
+    // Concatenation of appending linkage variables is magic and handled later.
+    if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
+      return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
+    
+    // Determine whether linkage of these two globals follows the source
+    // module's definition or the destination module's definition.
+    GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    bool LinkFromSrc = false;
+    if (getLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc))
+      return true;
+
+    // If we're not linking from the source, then keep the definition that we
+    // have.
+    if (!LinkFromSrc) {
+      // Special case for const propagation.
+      if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+        if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
+          DGVar->setConstant(true);
+      
+      // Set calculated linkage.
+      DGV->setLinkage(NewLinkage);
+      
+      // Make sure to remember this mapping.
+      ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
+      
+      // Destroy the source global's initializer (and convert it to a prototype)
+      // so that we don't attempt to copy it over when processing global
+      // initializers.
+      SGV->setInitializer(0);
+      SGV->setLinkage(GlobalValue::ExternalLinkage);
+      return false;
+    }
+  }
+  
+  // No linking to be performed or linking from the source: simply create an
+  // identical version of the symbol over in the dest module... the
+  // initializer will be filled in later by LinkGlobalInits.
+  GlobalVariable *NewDGV =
+    new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()),
+                       SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+                       SGV->getName(), /*insertbefore*/0,
+                       SGV->isThreadLocal(),
+                       SGV->getType()->getAddressSpace());
+  // Propagate alignment, visibility and section info.
+  CopyGVAttributes(NewDGV, SGV);
+
+  if (DGV) {
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+  
+  // Make sure to remember this mapping.
+  ValueMap[SGV] = NewDGV;
+  return false;
+}
+
+/// linkFunctionProto - Link the function in the source module into the
+/// destination module if needed, setting up mapping information.
+bool ModuleLinker::linkFunctionProto(Function *SF) {
+  GlobalValue *DGV = getLinkedToGlobal(SF);
+
+  if (DGV) {
+    GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    bool LinkFromSrc = false;
+    if (getLinkageResult(DGV, SF, NewLinkage, LinkFromSrc))
+      return true;
+    
+    if (!LinkFromSrc) {
+      // Set calculated linkage
+      DGV->setLinkage(NewLinkage);
+      
+      // Make sure to remember this mapping.
+      ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
+      
+      // Remove the body from the source module so we don't attempt to remap it.
+      SF->deleteBody();
+      return false;
+    }
+  }
+  
+  // If there is no linkage to be performed or we are linking from the source,
+  // bring SF over.
+  Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
+                                     SF->getLinkage(), SF->getName(), DstM);
+  CopyGVAttributes(NewDF, SF);
+
+  if (DGV) {
+    // Any uses of DF need to change to NewDF, with cast.
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+  
+  ValueMap[SF] = NewDF;
+  return false;
+}
+
+/// LinkAliasProto - Set up prototypes for any aliases that come over from the
+/// source module.
+bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
+  GlobalValue *DGV = getLinkedToGlobal(SGA);
+  
+  if (DGV) {
+    GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    bool LinkFromSrc = false;
+    if (getLinkageResult(DGV, SGA, NewLinkage, LinkFromSrc))
+      return true;
+    
+    if (!LinkFromSrc) {
+      // Set calculated linkage.
+      DGV->setLinkage(NewLinkage);
+      
+      // Make sure to remember this mapping.
+      ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
+      
+      // Remove the body from the source module so we don't attempt to remap it.
+      SGA->setAliasee(0);
+      return false;
+    }
+  }
+  
+  // If there is no linkage to be performed or we're linking from the source,
+  // bring over SGA.
+  GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
+                                       SGA->getLinkage(), SGA->getName(),
+                                       /*aliasee*/0, DstM);
+  CopyGVAttributes(NewDA, SGA);
+
+  if (DGV) {
+    // Any uses of DGV need to change to NewDA, with cast.
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+  
+  ValueMap[SGA] = NewDA;
+  return false;
+}
+
+void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
+  // Merge the initializer.
+  SmallVector<Constant*, 16> Elements;
+  if (ConstantArray *I = dyn_cast<ConstantArray>(AVI.DstInit)) {
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      Elements.push_back(I->getOperand(i));
+  } else {
+    assert(isa<ConstantAggregateZero>(AVI.DstInit));
+    ArrayType *DstAT = cast<ArrayType>(AVI.DstInit->getType());
+    Type *EltTy = DstAT->getElementType();
+    Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy));
+  }
+  
+  Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap);
+  if (const ConstantArray *I = dyn_cast<ConstantArray>(SrcInit)) {
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      Elements.push_back(I->getOperand(i));
+  } else {
+    assert(isa<ConstantAggregateZero>(SrcInit));
+    ArrayType *SrcAT = cast<ArrayType>(SrcInit->getType());
+    Type *EltTy = SrcAT->getElementType();
+    Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy));
+  }
+  ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
+  AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
+}
+
+
+// linkGlobalInits - Update the initializers in the Dest module now that all
+// globals that may be referenced are in Dest.
+void ModuleLinker::linkGlobalInits() {
+  // Loop over all of the globals in the src module, mapping them over as we go
+  for (Module::const_global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I) {
+    if (!I->hasInitializer()) continue;      // Only process initialized GV's.
+    
+    // Grab destination global variable.
+    GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]);
+    // Figure out what the initializer looks like in the dest module.
+    DGV->setInitializer(MapValue(I->getInitializer(), ValueMap,
+                                 RF_None, &TypeMap));
+  }
+}
+
+// linkFunctionBody - Copy the source function over into the dest function and
+// fix up references to values.  At this point we know that Dest is an external
+// function, and that Src is not.
+void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
+  assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
+
+  // Go through and convert function arguments over, remembering the mapping.
+  Function::arg_iterator DI = Dst->arg_begin();
+  for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+       I != E; ++I, ++DI) {
+    DI->setName(I->getName());  // Copy the name over.
+
+    // Add a mapping to our mapping.
+    ValueMap[I] = DI;
+  }
+
+  // Splice the body of the source function into the dest function.
+  Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
+
+  // At this point, all of the instructions and values of the function are now
+  // copied over.  The only problem is that they are still referencing values in
+  // the Source function as operands.  Loop through all of the operands of the
+  // functions and patch them up to point to the local versions.
+  for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap);
+
+  // There is no need to map the arguments anymore.
+  for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+       I != E; ++I)
+    ValueMap.erase(I);
+}
+
+
+void ModuleLinker::linkAliasBodies() {
+  for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
+       I != E; ++I)
+    if (Constant *Aliasee = I->getAliasee()) {
+      GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
+      DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap));
+    }
+}
+
+/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest
+/// module.
+void ModuleLinker::linkNamedMDNodes() {
+  for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
+       E = SrcM->named_metadata_end(); I != E; ++I) {
+    NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
+    // Add Src elements into Dest node.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      DestNMD->addOperand(MapValue(I->getOperand(i), ValueMap,
+                                   RF_None, &TypeMap));
+  }
+}
+  
+bool ModuleLinker::run() {
+  assert(DstM && "Null Destination module");
+  assert(SrcM && "Null Source Module");
+
+  // Inherit the target data from the source module if the destination module
+  // doesn't have one already.
+  if (DstM->getDataLayout().empty() && !SrcM->getDataLayout().empty())
+    DstM->setDataLayout(SrcM->getDataLayout());
+
+  // Copy the target triple from the source to dest if the dest's is empty.
+  if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+    DstM->setTargetTriple(SrcM->getTargetTriple());
+
+  if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() &&
+      SrcM->getDataLayout() != DstM->getDataLayout())
+    errs() << "WARNING: Linking two modules of different data layouts!\n";
+  if (!SrcM->getTargetTriple().empty() &&
+      DstM->getTargetTriple() != SrcM->getTargetTriple()) {
+    errs() << "WARNING: Linking two modules of different target triples: ";
+    if (!SrcM->getModuleIdentifier().empty())
+      errs() << SrcM->getModuleIdentifier() << ": ";
+    errs() << "'" << SrcM->getTargetTriple() << "' and '" 
+           << DstM->getTargetTriple() << "'\n";
+  }
+
+  // Append the module inline asm string.
+  if (!SrcM->getModuleInlineAsm().empty()) {
+    if (DstM->getModuleInlineAsm().empty())
+      DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm());
+    else
+      DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
+                               SrcM->getModuleInlineAsm());
+  }
+
+  // Update the destination module's dependent libraries list with the libraries
+  // from the source module. There's no opportunity for duplicates here as the
+  // Module ensures that duplicate insertions are discarded.
+  for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end();
+       SI != SE; ++SI)
+    DstM->addLibrary(*SI);
+  
+  // If the source library's module id is in the dependent library list of the
+  // destination library, remove it since that module is now linked in.
+  StringRef ModuleId = SrcM->getModuleIdentifier();
+  if (!ModuleId.empty())
+    DstM->removeLibrary(sys::path::stem(ModuleId));
+
+  
+  // Loop over all of the linked values to compute type mappings.
+  computeTypeMapping();
+
+  // Remap all of the named mdnoes in Src into the DstM module. We do this
+  // after linking GlobalValues so that MDNodes that reference GlobalValues
+  // are properly remapped.
+  linkNamedMDNodes();
+
+  // Insert all of the globals in src into the DstM module... without linking
+  // initializers (which could refer to functions not yet mapped over).
+  for (Module::global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I)
+    if (linkGlobalProto(I))
+      return true;
+
+  // Link the functions together between the two modules, without doing function
+  // bodies... this just adds external function prototypes to the DstM
+  // function...  We do this so that when we begin processing function bodies,
+  // all of the global values that may be referenced are available in our
+  // ValueMap.
+  for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I)
+    if (linkFunctionProto(I))
+      return true;
+
+  // If there were any aliases, link them now.
+  for (Module::alias_iterator I = SrcM->alias_begin(),
+       E = SrcM->alias_end(); I != E; ++I)
+    if (linkAliasProto(I))
+      return true;
+
+  for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
+    linkAppendingVarInit(AppendingVars[i]);
+  
+  // Update the initializers in the DstM module now that all globals that may
+  // be referenced are in DstM.
+  linkGlobalInits();
+
+  // Link in the function bodies that are defined in the source module into
+  // DstM.
+  for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
+    if (SF->isDeclaration()) continue;      // No body if function is external.
+    
+    linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+  }
+
+  // Resolve all uses of aliases with aliasees.
+  linkAliasBodies();
+
+  // Now that all of the types from the source are used, resolve any structs
+  // copied over to the dest that didn't exist there.
+  TypeMap.linkDefinedTypeBodies();
+  
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// LinkModules entrypoint.
+//===----------------------------------------------------------------------===//
+
+// LinkModules - This function links two modules together, with the resulting
+// left module modified to be the composite of the two input modules.  If an
+// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+// the problem.  Upon failure, the Dest module could be in a modified state, and
+// shouldn't be relied on to be consistent.
+bool Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
+  ModuleLinker TheLinker(Dest, Src);
+  if (TheLinker.run()) {
+    if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
+    return true;
+  }
+  
+  return false;
+}
diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp
new file mode 100644
index 000000000000..fba91da5ddd1
--- /dev/null
+++ b/contrib/llvm/lib/Linker/Linker.cpp
@@ -0,0 +1,174 @@
+//===- lib/Linker/Linker.cpp - Basic Linker functionality  ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic Linker functionality that all usages will need.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+Linker::Linker(StringRef progname, StringRef modname,
+               LLVMContext& C, unsigned flags):
+  Context(C),
+  Composite(new Module(modname, C)),
+  LibPaths(),
+  Flags(flags),
+  Error(),
+  ProgramName(progname) { }
+
+Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
+  Context(aModule->getContext()),
+  Composite(aModule),
+  LibPaths(),
+  Flags(flags),
+  Error(),
+  ProgramName(progname) { }
+
+Linker::~Linker() {
+  delete Composite;
+}
+
+bool
+Linker::error(StringRef message) {
+  Error = message;
+  if (!(Flags&QuietErrors))
+    errs() << ProgramName << ": error: " << message << "\n";
+  return true;
+}
+
+bool
+Linker::warning(StringRef message) {
+  Error = message;
+  if (!(Flags&QuietWarnings))
+    errs() << ProgramName << ": warning: " << message << "\n";
+  return false;
+}
+
+void
+Linker::verbose(StringRef message) {
+  if (Flags&Verbose)
+    errs() << "  " << message << "\n";
+}
+
+void
+Linker::addPath(const sys::Path& path) {
+  LibPaths.push_back(path);
+}
+
+void
+Linker::addPaths(const std::vector<std::string>& paths) {
+  for (unsigned i = 0, e = paths.size(); i != e; ++i)
+    LibPaths.push_back(sys::Path(paths[i]));
+}
+
+void
+Linker::addSystemPaths() {
+  sys::Path::GetBitcodeLibraryPaths(LibPaths);
+  LibPaths.insert(LibPaths.begin(),sys::Path("./"));
+}
+
+Module*
+Linker::releaseModule() {
+  Module* result = Composite;
+  LibPaths.clear();
+  Error.clear();
+  Composite = 0;
+  Flags = 0;
+  return result;
+}
+
+// LoadObject - Read in and parse the bitcode file named by FN and return the
+// module it contains (wrapped in an auto_ptr), or auto_ptr<Module>() and set
+// Error if an error occurs.
+std::auto_ptr<Module>
+Linker::LoadObject(const sys::Path &FN) {
+  std::string ParseErrorMessage;
+  Module *Result = 0;
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(FN.c_str(), Buffer))
+    ParseErrorMessage = "Error reading file '" + FN.str() + "'" + ": "
+                      + ec.message();
+  else
+    Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
+
+  if (Result)
+    return std::auto_ptr<Module>(Result);
+  Error = "Bitcode file '" + FN.str() + "' could not be loaded";
+  if (ParseErrorMessage.size())
+    Error += ": " + ParseErrorMessage;
+  return std::auto_ptr<Module>();
+}
+
+// IsLibrary - Determine if "Name" is a library in "Directory". Return
+// a non-empty sys::Path if its found, an empty one otherwise.
+static inline sys::Path IsLibrary(StringRef Name,
+                                  const sys::Path &Directory) {
+
+  sys::Path FullPath(Directory);
+
+  // Try the libX.a form
+  FullPath.appendComponent(("lib" + Name).str());
+  FullPath.appendSuffix("a");
+  if (FullPath.isArchive())
+    return FullPath;
+
+  // Try the libX.bca form
+  FullPath.eraseSuffix();
+  FullPath.appendSuffix("bca");
+  if (FullPath.isArchive())
+    return FullPath;
+
+  // Try the libX.so (or .dylib) form
+  FullPath.eraseSuffix();
+  FullPath.appendSuffix(sys::Path::GetDLLSuffix());
+  if (FullPath.isDynamicLibrary())  // Native shared library?
+    return FullPath;
+  if (FullPath.isBitcodeFile())    // .so file containing bitcode?
+    return FullPath;
+
+  // Not found .. fall through
+
+  // Indicate that the library was not found in the directory.
+  FullPath.clear();
+  return FullPath;
+}
+
+/// FindLib - Try to convert Filename into the name of a file that we can open,
+/// if it does not already name a file we can open, by first trying to open
+/// Filename, then libFilename.[suffix] for each of a set of several common
+/// library suffixes, in each of the directories in LibPaths. Returns an empty
+/// Path if no matching file can be found.
+///
+sys::Path
+Linker::FindLib(StringRef Filename) {
+  // Determine if the pathname can be found as it stands.
+  sys::Path FilePath(Filename);
+  if (FilePath.canRead() &&
+      (FilePath.isArchive() || FilePath.isDynamicLibrary()))
+    return FilePath;
+
+  // Iterate over the directories in Paths to see if we can find the library
+  // there.
+  for (unsigned Index = 0; Index != LibPaths.size(); ++Index) {
+    sys::Path Directory(LibPaths[Index]);
+    sys::Path FullPath = IsLibrary(Filename, Directory);
+    if (!FullPath.isEmpty())
+      return FullPath;
+  }
+  return sys::Path();
+}
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
new file mode 100644
index 000000000000..59e1b8eb8105
--- /dev/null
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -0,0 +1,1730 @@
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFObjectWriter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+#include "../Target/ARM/ARMFixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+
+#undef  DEBUG_TYPE
+#define DEBUG_TYPE "reloc-info"
+
+bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+  const MCFixupKindInfo &FKI =
+    Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
+
+  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
+  switch (Variant) {
+  default:
+    return false;
+  case MCSymbolRefExpr::VK_GOT:
+  case MCSymbolRefExpr::VK_PLT:
+  case MCSymbolRefExpr::VK_GOTPCREL:
+  case MCSymbolRefExpr::VK_GOTOFF:
+  case MCSymbolRefExpr::VK_TPOFF:
+  case MCSymbolRefExpr::VK_TLSGD:
+  case MCSymbolRefExpr::VK_GOTTPOFF:
+  case MCSymbolRefExpr::VK_INDNTPOFF:
+  case MCSymbolRefExpr::VK_NTPOFF:
+  case MCSymbolRefExpr::VK_GOTNTPOFF:
+  case MCSymbolRefExpr::VK_TLSLDM:
+  case MCSymbolRefExpr::VK_DTPOFF:
+  case MCSymbolRefExpr::VK_TLSLD:
+    return true;
+  }
+}
+
+ELFObjectWriter::~ELFObjectWriter()
+{}
+
+// Emit the ELF header.
+void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
+                                  unsigned NumberOfSections) {
+  // ELF Header
+  // ----------
+  //
+  // Note
+  // ----
+  // emitWord method behaves differently for ELF32 and ELF64, writing
+  // 4 bytes in the former and 8 in the latter.
+
+  Write8(0x7f); // e_ident[EI_MAG0]
+  Write8('E');  // e_ident[EI_MAG1]
+  Write8('L');  // e_ident[EI_MAG2]
+  Write8('F');  // e_ident[EI_MAG3]
+
+  Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+
+  // e_ident[EI_DATA]
+  Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+
+  Write8(ELF::EV_CURRENT);        // e_ident[EI_VERSION]
+  // e_ident[EI_OSABI]
+  switch (TargetObjectWriter->getOSType()) {
+    case Triple::FreeBSD:  Write8(ELF::ELFOSABI_FREEBSD); break;
+    case Triple::Linux:    Write8(ELF::ELFOSABI_LINUX); break;
+    default:               Write8(ELF::ELFOSABI_NONE); break;
+  }
+  Write8(0);                  // e_ident[EI_ABIVERSION]
+
+  WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
+
+  Write16(ELF::ET_REL);             // e_type
+
+  Write16(TargetObjectWriter->getEMachine()); // e_machine = target
+
+  Write32(ELF::EV_CURRENT);         // e_version
+  WriteWord(0);                    // e_entry, no entry point in .o file
+  WriteWord(0);                    // e_phoff, no program header for .o
+  WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+            sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
+
+  // e_flags = whatever the target wants
+  WriteEFlags();
+
+  // e_ehsize = ELF header size
+  Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+
+  Write16(0);                  // e_phentsize = prog header entry size
+  Write16(0);                  // e_phnum = # prog header entries = 0
+
+  // e_shentsize = Section header entry size
+  Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+
+  // e_shnum     = # of section header ents
+  if (NumberOfSections >= ELF::SHN_LORESERVE)
+    Write16(0);
+  else
+    Write16(NumberOfSections);
+
+  // e_shstrndx  = Section # of '.shstrtab'
+  if (NumberOfSections >= ELF::SHN_LORESERVE)
+    Write16(ELF::SHN_XINDEX);
+  else
+    Write16(ShstrtabIndex);
+}
+
+void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF,
+                                       MCDataFragment *ShndxF,
+                                       uint64_t name,
+                                       uint8_t info, uint64_t value,
+                                       uint64_t size, uint8_t other,
+                                       uint32_t shndx,
+                                       bool Reserved) {
+  if (ShndxF) {
+    if (shndx >= ELF::SHN_LORESERVE && !Reserved)
+      String32(*ShndxF, shndx);
+    else
+      String32(*ShndxF, 0);
+  }
+
+  uint16_t Index = (shndx >= ELF::SHN_LORESERVE && !Reserved) ?
+    uint16_t(ELF::SHN_XINDEX) : shndx;
+
+  if (is64Bit()) {
+    String32(*SymtabF, name);  // st_name
+    String8(*SymtabF, info);   // st_info
+    String8(*SymtabF, other);  // st_other
+    String16(*SymtabF, Index); // st_shndx
+    String64(*SymtabF, value); // st_value
+    String64(*SymtabF, size);  // st_size
+  } else {
+    String32(*SymtabF, name);  // st_name
+    String32(*SymtabF, value); // st_value
+    String32(*SymtabF, size);  // st_size
+    String8(*SymtabF, info);   // st_info
+    String8(*SymtabF, other);  // st_other
+    String16(*SymtabF, Index); // st_shndx
+  }
+}
+
+uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
+                                      const MCAsmLayout &Layout) {
+  if (Data.isCommon() && Data.isExternal())
+    return Data.getCommonAlignment();
+
+  const MCSymbol &Symbol = Data.getSymbol();
+
+  if (Symbol.isAbsolute() && Symbol.isVariable()) {
+    if (const MCExpr *Value = Symbol.getVariableValue()) {
+      int64_t IntValue;
+      if (Value->EvaluateAsAbsolute(IntValue, Layout))
+	return (uint64_t)IntValue;
+    }
+  }
+
+  if (!Symbol.isInSection())
+    return 0;
+
+
+  if (Data.getFragment()) {
+    if (Data.getFlags() & ELF_Other_ThumbFunc)
+      return Layout.getSymbolOffset(&Data)+1;
+    else
+      return Layout.getSymbolOffset(&Data);
+  }
+
+  return 0;
+}
+
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                               const MCAsmLayout &Layout) {
+  // The presence of symbol versions causes undefined symbols and
+  // versions declared with @@@ to be renamed.
+
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Alias = it->getSymbol();
+    const MCSymbol &Symbol = Alias.AliasedSymbol();
+    MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+    // Not an alias.
+    if (&Symbol == &Alias)
+      continue;
+
+    StringRef AliasName = Alias.getName();
+    size_t Pos = AliasName.find('@');
+    if (Pos == StringRef::npos)
+      continue;
+
+    // Aliases defined with .symvar copy the binding from the symbol they alias.
+    // This is the first place we are able to copy this information.
+    it->setExternal(SD.isExternal());
+    MCELF::SetBinding(*it, MCELF::GetBinding(SD));
+
+    StringRef Rest = AliasName.substr(Pos);
+    if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
+      continue;
+
+    // FIXME: produce a better error message.
+    if (Symbol.isUndefined() && Rest.startswith("@@") &&
+        !Rest.startswith("@@@"))
+      report_fatal_error("A @@ version cannot be undefined");
+
+    Renames.insert(std::make_pair(&Symbol, &Alias));
+  }
+}
+
+void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
+                                  MCDataFragment *ShndxF,
+                                  ELFSymbolData &MSD,
+                                  const MCAsmLayout &Layout) {
+  MCSymbolData &OrigData = *MSD.SymbolData;
+  MCSymbolData &Data =
+    Layout.getAssembler().getSymbolData(OrigData.getSymbol().AliasedSymbol());
+
+  bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
+    Data.getSymbol().isVariable();
+
+  uint8_t Binding = MCELF::GetBinding(OrigData);
+  uint8_t Visibility = MCELF::GetVisibility(OrigData);
+  uint8_t Type = MCELF::GetType(Data);
+
+  uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
+  uint8_t Other = Visibility;
+
+  uint64_t Value = SymbolValue(Data, Layout);
+  uint64_t Size = 0;
+
+  assert(!(Data.isCommon() && !Data.isExternal()));
+
+  const MCExpr *ESize = Data.getSize();
+  if (ESize) {
+    int64_t Res;
+    if (!ESize->EvaluateAsAbsolute(Res, Layout))
+      report_fatal_error("Size expression must be absolute.");
+    Size = Res;
+  }
+
+  // Write out the symbol table entry
+  WriteSymbolEntry(SymtabF, ShndxF, MSD.StringIndex, Info, Value,
+                   Size, Other, MSD.SectionIndex, IsReserved);
+}
+
+void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
+                                       MCDataFragment *ShndxF,
+                                       const MCAssembler &Asm,
+                                       const MCAsmLayout &Layout,
+                                     const SectionIndexMapTy &SectionIndexMap) {
+  // The string table must be emitted first because we need the index
+  // into the string table for all the symbol names.
+  assert(StringTable.size() && "Missing string table");
+
+  // FIXME: Make sure the start of the symbol table is aligned.
+
+  // The first entry is the undefined symbol entry.
+  WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false);
+
+  // Write the symbol table entries.
+  LastLocalSymbolIndex = LocalSymbolData.size() + 1;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
+    ELFSymbolData &MSD = LocalSymbolData[i];
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+  }
+
+  // Write out a symbol table entry for each regular section.
+  for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e;
+       ++i) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(i->getSection());
+    if (Section.getType() == ELF::SHT_RELA ||
+        Section.getType() == ELF::SHT_REL ||
+        Section.getType() == ELF::SHT_STRTAB ||
+        Section.getType() == ELF::SHT_SYMTAB)
+      continue;
+    WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
+                     ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
+    LastLocalSymbolIndex++;
+  }
+
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
+    ELFSymbolData &MSD = ExternalSymbolData[i];
+    MCSymbolData &Data = *MSD.SymbolData;
+    assert(((Data.getFlags() & ELF_STB_Global) ||
+            (Data.getFlags() & ELF_STB_Weak)) &&
+           "External symbol requires STB_GLOBAL or STB_WEAK flag");
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+    if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+      LastLocalSymbolIndex++;
+  }
+
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
+    ELFSymbolData &MSD = UndefinedSymbolData[i];
+    MCSymbolData &Data = *MSD.SymbolData;
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+    if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+      LastLocalSymbolIndex++;
+  }
+}
+
+const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
+                                               const MCValue &Target,
+                                               const MCFragment &F, 
+                                               const MCFixup &Fixup,
+                                               bool IsPCRel) const {
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+  const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+  const MCSymbol *Renamed = Renames.lookup(&Symbol);
+  const MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+  if (ASymbol.isUndefined()) {
+    if (Renamed)
+      return Renamed;
+    return &ASymbol;
+  }
+
+  if (SD.isExternal()) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(ASymbol.getSection());
+  const SectionKind secKind = Section.getKind();
+
+  if (secKind.isBSS())
+    return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+
+  if (secKind.isThreadLocal()) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+  const MCSectionELF &Sec2 =
+    static_cast<const MCSectionELF&>(F.getParent()->getSection());
+
+  if (&Sec2 != &Section &&
+      (Kind == MCSymbolRefExpr::VK_PLT ||
+       Kind == MCSymbolRefExpr::VK_GOTPCREL ||
+       Kind == MCSymbolRefExpr::VK_GOTOFF)) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  if (Section.getFlags() & ELF::SHF_MERGE) {
+    if (Target.getConstant() == 0)
+      return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  return ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+
+}
+
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                       const MCAsmLayout &Layout,
+                                       const MCFragment *Fragment,
+                                       const MCFixup &Fixup,
+                                       MCValue Target,
+                                       uint64_t &FixedValue) {
+  int64_t Addend = 0;
+  int Index = 0;
+  int64_t Value = Target.getConstant();
+  const MCSymbol *RelocSymbol = NULL;
+
+  bool IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  if (!Target.isAbsolute()) {
+    const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+    const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+    RelocSymbol = SymbolToReloc(Asm, Target, *Fragment, Fixup, IsPCRel);
+
+    if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+      const MCSymbol &SymbolB = RefB->getSymbol();
+      MCSymbolData &SDB = Asm.getSymbolData(SymbolB);
+      IsPCRel = true;
+
+      // Offset of the symbol in the section
+      int64_t a = Layout.getSymbolOffset(&SDB);
+
+      // Ofeset of the relocation in the section
+      int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+      Value += b - a;
+    }
+
+    if (!RelocSymbol) {
+      MCSymbolData &SD = Asm.getSymbolData(ASymbol);
+      MCFragment *F = SD.getFragment();
+
+      Index = F->getParent()->getOrdinal() + 1;
+
+      // Offset of the symbol in the section
+      Value += Layout.getSymbolOffset(&SD);
+    } else {
+      if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref)
+        WeakrefUsedInReloc.insert(RelocSymbol);
+      else
+        UsedInReloc.insert(RelocSymbol);
+      Index = -1;
+    }
+    Addend = Value;
+    // Compensate for the addend on i386.
+    if (is64Bit())
+      Value = 0;
+  }
+
+  FixedValue = Value;
+  unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
+                               (RelocSymbol != 0), Addend);
+
+  uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
+    Fixup.getOffset();
+
+  if (!hasRelocationAddend())
+    Addend = 0;
+  ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
+  Relocations[Fragment->getParent()].push_back(ERE);
+}
+
+
+uint64_t
+ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+                                             const MCSymbol *S) {
+  MCSymbolData &SD = Asm.getSymbolData(*S);
+  return SD.getIndex();
+}
+
+bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
+                                 const MCSymbolData &Data,
+                                 bool Used, bool Renamed) {
+  if (Data.getFlags() & ELF_Other_Weakref)
+    return false;
+
+  if (Used)
+    return true;
+
+  if (Renamed)
+    return false;
+
+  const MCSymbol &Symbol = Data.getSymbol();
+
+  if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
+    return true;
+
+  const MCSymbol &A = Symbol.AliasedSymbol();
+  if (Symbol.isVariable() && !A.isVariable() && A.isUndefined())
+    return false;
+
+  bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL;
+  if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
+    return false;
+
+  if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined())
+    return false;
+
+  if (Symbol.isTemporary())
+    return false;
+
+  return true;
+}
+
+bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature,
+                              bool isUsedInReloc) {
+  if (Data.isExternal())
+    return false;
+
+  const MCSymbol &Symbol = Data.getSymbol();
+  const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+  if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) {
+    if (isSignature && !isUsedInReloc)
+      return true;
+
+    return false;
+  }
+
+  return true;
+}
+
+void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
+                                      SectionIndexMapTy &SectionIndexMap,
+                                      const RelMapTy &RelMap) {
+  unsigned Index = 1;
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() != ELF::SHT_GROUP)
+      continue;
+    SectionIndexMap[&Section] = Index++;
+  }
+
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() == ELF::SHT_GROUP ||
+        Section.getType() == ELF::SHT_REL ||
+        Section.getType() == ELF::SHT_RELA)
+      continue;
+    SectionIndexMap[&Section] = Index++;
+    const MCSectionELF *RelSection = RelMap.lookup(&Section);
+    if (RelSection)
+      SectionIndexMap[RelSection] = Index++;
+  }
+}
+
+void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
+                                      const SectionIndexMapTy &SectionIndexMap,
+                                         RevGroupMapTy RevGroupMap,
+                                         unsigned NumRegularSections) {
+  // FIXME: Is this the correct place to do this?
+  // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
+  if (NeedsGOT) {
+    llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+    MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
+    MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
+    Data.setExternal(true);
+    MCELF::SetBinding(Data, ELF::STB_GLOBAL);
+  }
+
+  // Index 0 is always the empty string.
+  StringMap<uint64_t> StringIndexMap;
+  StringTable += '\x00';
+
+  // FIXME: We could optimize suffixes in strtab in the same way we
+  // optimize them in shstrtab.
+
+  // Add the data for the symbols.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    bool Used = UsedInReloc.count(&Symbol);
+    bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
+    bool isSignature = RevGroupMap.count(&Symbol);
+
+    if (!isInSymtab(Asm, *it,
+                    Used || WeakrefUsed || isSignature,
+                    Renames.count(&Symbol)))
+      continue;
+
+    ELFSymbolData MSD;
+    MSD.SymbolData = it;
+    const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+    // Undefined symbols are global, but this is the first place we
+    // are able to set it.
+    bool Local = isLocal(*it, isSignature, Used);
+    if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) {
+      MCSymbolData &SD = Asm.getSymbolData(RefSymbol);
+      MCELF::SetBinding(*it, ELF::STB_GLOBAL);
+      MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+    }
+
+    if (RefSymbol.isUndefined() && !Used && WeakrefUsed)
+      MCELF::SetBinding(*it, ELF::STB_WEAK);
+
+    if (it->isCommon()) {
+      assert(!Local);
+      MSD.SectionIndex = ELF::SHN_COMMON;
+    } else if (Symbol.isAbsolute() || RefSymbol.isVariable()) {
+      MSD.SectionIndex = ELF::SHN_ABS;
+    } else if (RefSymbol.isUndefined()) {
+      if (isSignature && !Used)
+        MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap[&Symbol]);
+      else
+        MSD.SectionIndex = ELF::SHN_UNDEF;
+    } else {
+      const MCSectionELF &Section =
+        static_cast<const MCSectionELF&>(RefSymbol.getSection());
+      MSD.SectionIndex = SectionIndexMap.lookup(&Section);
+      if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+        NeedsSymtabShndx = true;
+      assert(MSD.SectionIndex && "Invalid section index!");
+    }
+
+    // The @@@ in symbol version is replaced with @ in undefined symbols and
+    // @@ in defined ones.
+    StringRef Name = Symbol.getName();
+    SmallString<32> Buf;
+
+    size_t Pos = Name.find("@@@");
+    if (Pos != StringRef::npos) {
+      Buf += Name.substr(0, Pos);
+      unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
+      Buf += Name.substr(Pos + Skip);
+      Name = Buf;
+    }
+
+    uint64_t &Entry = StringIndexMap[Name];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Name;
+      StringTable += '\x00';
+    }
+    MSD.StringIndex = Entry;
+    if (MSD.SectionIndex == ELF::SHN_UNDEF)
+      UndefinedSymbolData.push_back(MSD);
+    else if (Local)
+      LocalSymbolData.push_back(MSD);
+    else
+      ExternalSymbolData.push_back(MSD);
+  }
+
+  // Symbols are required to be in lexicographic order.
+  array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
+  array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+  array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+  // Set the symbol indices. Local symbols must come before all other
+  // symbols with non-local bindings.
+  unsigned Index = 1;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+    LocalSymbolData[i].SymbolData->setIndex(Index++);
+
+  Index += NumRegularSections;
+
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+}
+
+void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
+                                               MCAsmLayout &Layout,
+                                               RelMapTy &RelMap) {
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    if (Relocations[&SD].empty())
+      continue;
+
+    MCContext &Ctx = Asm.getContext();
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(SD.getSection());
+
+    const StringRef SectionName = Section.getSectionName();
+    std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
+    RelaSectionName += SectionName;
+
+    unsigned EntrySize;
+    if (hasRelocationAddend())
+      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+    else
+      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+
+    const MCSectionELF *RelaSection =
+      Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
+                        ELF::SHT_RELA : ELF::SHT_REL, 0,
+                        SectionKind::getReadOnly(),
+                        EntrySize, "");
+    RelMap[&Section] = RelaSection;
+    Asm.getOrCreateSectionData(*RelaSection);
+  }
+}
+
+void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+                                       const RelMapTy &RelMap) {
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(SD.getSection());
+
+    const MCSectionELF *RelaSection = RelMap.lookup(&Section);
+    if (!RelaSection)
+      continue;
+    MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
+    RelaSD.setAlignment(is64Bit() ? 8 : 4);
+
+    MCDataFragment *F = new MCDataFragment(&RelaSD);
+    WriteRelocationsFragment(Asm, F, &*it);
+  }
+}
+
+void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+                                       uint64_t Flags, uint64_t Address,
+                                       uint64_t Offset, uint64_t Size,
+                                       uint32_t Link, uint32_t Info,
+                                       uint64_t Alignment,
+                                       uint64_t EntrySize) {
+  Write32(Name);        // sh_name: index into string table
+  Write32(Type);        // sh_type
+  WriteWord(Flags);     // sh_flags
+  WriteWord(Address);   // sh_addr
+  WriteWord(Offset);    // sh_offset
+  WriteWord(Size);      // sh_size
+  Write32(Link);        // sh_link
+  Write32(Info);        // sh_info
+  WriteWord(Alignment); // sh_addralign
+  WriteWord(EntrySize); // sh_entsize
+}
+
+void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
+                                               MCDataFragment *F,
+                                               const MCSectionData *SD) {
+  std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
+  // sort by the r_offset just like gnu as does
+  array_pod_sort(Relocs.begin(), Relocs.end());
+
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    ELFRelocationEntry entry = Relocs[e - i - 1];
+
+    if (!entry.Index)
+      ;
+    else if (entry.Index < 0)
+      entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol);
+    else
+      entry.Index += LocalSymbolData.size();
+    if (is64Bit()) {
+      String64(*F, entry.r_offset);
+
+      struct ELF::Elf64_Rela ERE64;
+      ERE64.setSymbolAndType(entry.Index, entry.Type);
+      String64(*F, ERE64.r_info);
+
+      if (hasRelocationAddend())
+        String64(*F, entry.r_addend);
+    } else {
+      String32(*F, entry.r_offset);
+
+      struct ELF::Elf32_Rela ERE32;
+      ERE32.setSymbolAndType(entry.Index, entry.Type);
+      String32(*F, ERE32.r_info);
+
+      if (hasRelocationAddend())
+        String32(*F, entry.r_addend);
+    }
+  }
+}
+
+static int compareBySuffix(const void *a, const void *b) {
+  const MCSectionELF *secA = *static_cast<const MCSectionELF* const *>(a);
+  const MCSectionELF *secB = *static_cast<const MCSectionELF* const *>(b);
+  const StringRef &NameA = secA->getSectionName();
+  const StringRef &NameB = secB->getSectionName();
+  const unsigned sizeA = NameA.size();
+  const unsigned sizeB = NameB.size();
+  const unsigned len = std::min(sizeA, sizeB);
+  for (unsigned int i = 0; i < len; ++i) {
+    char ca = NameA[sizeA - i - 1];
+    char cb = NameB[sizeB - i - 1];
+    if (ca != cb)
+      return cb - ca;
+  }
+
+  return sizeB - sizeA;
+}
+
+void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
+                                             MCAsmLayout &Layout,
+                                             SectionIndexMapTy &SectionIndexMap,
+                                             const RelMapTy &RelMap) {
+  MCContext &Ctx = Asm.getContext();
+  MCDataFragment *F;
+
+  unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+
+  // We construct .shstrtab, .symtab and .strtab in this order to match gnu as.
+  const MCSectionELF *ShstrtabSection =
+    Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+                      SectionKind::getReadOnly());
+  MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+  ShstrtabSD.setAlignment(1);
+
+  const MCSectionELF *SymtabSection =
+    Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+                      SectionKind::getReadOnly(),
+                      EntrySize, "");
+  MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+  SymtabSD.setAlignment(is64Bit() ? 8 : 4);
+
+  MCSectionData *SymtabShndxSD = NULL;
+
+  if (NeedsSymtabShndx) {
+    const MCSectionELF *SymtabShndxSection =
+      Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0,
+                        SectionKind::getReadOnly(), 4, "");
+    SymtabShndxSD = &Asm.getOrCreateSectionData(*SymtabShndxSection);
+    SymtabShndxSD->setAlignment(4);
+  }
+
+  const MCSectionELF *StrtabSection;
+  StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
+                                    SectionKind::getReadOnly());
+  MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
+  StrtabSD.setAlignment(1);
+
+  ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
+  ShstrtabIndex = SectionIndexMap.lookup(ShstrtabSection);
+  SymbolTableIndex = SectionIndexMap.lookup(SymtabSection);
+  StringTableIndex = SectionIndexMap.lookup(StrtabSection);
+
+  // Symbol table
+  F = new MCDataFragment(&SymtabSD);
+  MCDataFragment *ShndxF = NULL;
+  if (NeedsSymtabShndx) {
+    ShndxF = new MCDataFragment(SymtabShndxSD);
+  }
+  WriteSymbolTable(F, ShndxF, Asm, Layout, SectionIndexMap);
+
+  F = new MCDataFragment(&StrtabSD);
+  F->getContents().append(StringTable.begin(), StringTable.end());
+
+  F = new MCDataFragment(&ShstrtabSD);
+
+  std::vector<const MCSectionELF*> Sections;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    Sections.push_back(&Section);
+  }
+  array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix);
+
+  // Section header string table.
+  //
+  // The first entry of a string table holds a null character so skip
+  // section 0.
+  uint64_t Index = 1;
+  F->getContents() += '\x00';
+
+  for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
+    const MCSectionELF &Section = *Sections[I];
+
+    StringRef Name = Section.getSectionName();
+    if (I != 0) {
+      StringRef PreviousName = Sections[I - 1]->getSectionName();
+      if (PreviousName.endswith(Name)) {
+        SectionStringTableIndex[&Section] = Index - Name.size() - 1;
+        continue;
+      }
+    }
+    // Remember the index into the string table so we can write it
+    // into the sh_name field of the section header table.
+    SectionStringTableIndex[&Section] = Index;
+
+    Index += Name.size() + 1;
+    F->getContents() += Name;
+    F->getContents() += '\x00';
+  }
+}
+
+void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
+                                            MCAsmLayout &Layout,
+                                            GroupMapTy &GroupMap,
+                                            RevGroupMapTy &RevGroupMap,
+                                            SectionIndexMapTy &SectionIndexMap,
+                                            const RelMapTy &RelMap) {
+  // Create the .note.GNU-stack section if needed.
+  MCContext &Ctx = Asm.getContext();
+  if (Asm.getNoExecStack()) {
+    const MCSectionELF *GnuStackSection =
+      Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0,
+                        SectionKind::getReadOnly());
+    Asm.getOrCreateSectionData(*GnuStackSection);
+  }
+
+  // Build the groups
+  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+       it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    if (!(Section.getFlags() & ELF::SHF_GROUP))
+      continue;
+
+    const MCSymbol *SignatureSymbol = Section.getGroup();
+    Asm.getOrCreateSymbolData(*SignatureSymbol);
+    const MCSectionELF *&Group = RevGroupMap[SignatureSymbol];
+    if (!Group) {
+      Group = Ctx.CreateELFGroupSection();
+      MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+      Data.setAlignment(4);
+      MCDataFragment *F = new MCDataFragment(&Data);
+      String32(*F, ELF::GRP_COMDAT);
+    }
+    GroupMap[Group] = SignatureSymbol;
+  }
+
+  ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
+  // Add sections to the groups
+  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+       it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    if (!(Section.getFlags() & ELF::SHF_GROUP))
+      continue;
+    const MCSectionELF *Group = RevGroupMap[Section.getGroup()];
+    MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+    // FIXME: we could use the previous fragment
+    MCDataFragment *F = new MCDataFragment(&Data);
+    unsigned Index = SectionIndexMap.lookup(&Section);
+    String32(*F, Index);
+  }
+}
+
+void ELFObjectWriter::WriteSection(MCAssembler &Asm,
+                                   const SectionIndexMapTy &SectionIndexMap,
+                                   uint32_t GroupSymbolIndex,
+                                   uint64_t Offset, uint64_t Size,
+                                   uint64_t Alignment,
+                                   const MCSectionELF &Section) {
+  uint64_t sh_link = 0;
+  uint64_t sh_info = 0;
+
+  switch(Section.getType()) {
+  case ELF::SHT_DYNAMIC:
+    sh_link = SectionStringTableIndex[&Section];
+    sh_info = 0;
+    break;
+
+  case ELF::SHT_REL:
+  case ELF::SHT_RELA: {
+    const MCSectionELF *SymtabSection;
+    const MCSectionELF *InfoSection;
+    SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB,
+                                                   0,
+                                                   SectionKind::getReadOnly());
+    sh_link = SectionIndexMap.lookup(SymtabSection);
+    assert(sh_link && ".symtab not found");
+
+    // Remove ".rel" and ".rela" prefixes.
+    unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+    StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+    InfoSection = Asm.getContext().getELFSection(SectionName,
+                                                 ELF::SHT_PROGBITS, 0,
+                                                 SectionKind::getReadOnly());
+    sh_info = SectionIndexMap.lookup(InfoSection);
+    break;
+  }
+
+  case ELF::SHT_SYMTAB:
+  case ELF::SHT_DYNSYM:
+    sh_link = StringTableIndex;
+    sh_info = LastLocalSymbolIndex;
+    break;
+
+  case ELF::SHT_SYMTAB_SHNDX:
+    sh_link = SymbolTableIndex;
+    break;
+
+  case ELF::SHT_PROGBITS:
+  case ELF::SHT_STRTAB:
+  case ELF::SHT_NOBITS:
+  case ELF::SHT_NOTE:
+  case ELF::SHT_NULL:
+  case ELF::SHT_ARM_ATTRIBUTES:
+  case ELF::SHT_INIT_ARRAY:
+  case ELF::SHT_FINI_ARRAY:
+  case ELF::SHT_PREINIT_ARRAY:
+  case ELF::SHT_X86_64_UNWIND:
+    // Nothing to do.
+    break;
+
+  case ELF::SHT_GROUP: {
+    sh_link = SymbolTableIndex;
+    sh_info = GroupSymbolIndex;
+    break;
+  }
+
+  default:
+    assert(0 && "FIXME: sh_type value not supported!");
+    break;
+  }
+
+  WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
+                   Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
+                   Alignment, Section.getEntrySize());
+}
+
+bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) {
+  return SD.getOrdinal() == ~UINT32_C(0) &&
+    !SD.getSection().isVirtualSection();
+}
+
+uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
+  uint64_t Ret = 0;
+  for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+       ++i) {
+    const MCFragment &F = *i;
+    assert(F.getKind() == MCFragment::FT_Data);
+    Ret += cast<MCDataFragment>(F).getContents().size();
+  }
+  return Ret;
+}
+
+uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
+                                             const MCSectionData &SD) {
+  if (IsELFMetaDataSection(SD))
+    return DataSectionSize(SD);
+  return Layout.getSectionFileSize(&SD);
+}
+
+uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
+                                                const MCSectionData &SD) {
+  if (IsELFMetaDataSection(SD))
+    return DataSectionSize(SD);
+  return Layout.getSectionAddressSize(&SD);
+}
+
+void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCSectionELF &Section) {
+  uint64_t FileOff = OS.tell();
+  const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+  uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+  WriteZeros(Padding);
+  FileOff += Padding;
+
+  FileOff += GetSectionFileSize(Layout, SD);
+
+  if (IsELFMetaDataSection(SD)) {
+    for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+         ++i) {
+      const MCFragment &F = *i;
+      assert(F.getKind() == MCFragment::FT_Data);
+      WriteBytes(cast<MCDataFragment>(F).getContents().str());
+    }
+  } else {
+    Asm.WriteSectionData(&SD, Layout);
+  }
+}
+
+void ELFObjectWriter::WriteSectionHeader(MCAssembler &Asm,
+                                         const GroupMapTy &GroupMap,
+                                         const MCAsmLayout &Layout,
+                                      const SectionIndexMapTy &SectionIndexMap,
+                                   const SectionOffsetMapTy &SectionOffsetMap) {
+  const unsigned NumSections = Asm.size() + 1;
+
+  std::vector<const MCSectionELF*> Sections;
+  Sections.resize(NumSections - 1);
+
+  for (SectionIndexMapTy::const_iterator i=
+         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
+    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
+    Sections[p.second - 1] = p.first;
+  }
+
+  // Null section first.
+  uint64_t FirstSectionSize =
+    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+  uint32_t FirstSectionLink =
+    ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
+  WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+
+  for (unsigned i = 0; i < NumSections - 1; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+    uint32_t GroupSymbolIndex;
+    if (Section.getType() != ELF::SHT_GROUP)
+      GroupSymbolIndex = 0;
+    else
+      GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm,
+                                                     GroupMap.lookup(&Section));
+
+    uint64_t Size = GetSectionAddressSize(Layout, SD);
+
+    WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
+                 SectionOffsetMap.lookup(&Section), Size,
+                 SD.getAlignment(), Section);
+  }
+}
+
+void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm,
+                                  std::vector<const MCSectionELF*> &Sections) {
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() == ELF::SHT_GROUP)
+      Sections.push_back(&Section);
+  }
+
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() != ELF::SHT_GROUP &&
+        Section.getType() != ELF::SHT_REL &&
+        Section.getType() != ELF::SHT_RELA)
+      Sections.push_back(&Section);
+  }
+
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() == ELF::SHT_REL ||
+        Section.getType() == ELF::SHT_RELA)
+      Sections.push_back(&Section);
+  }
+}
+
+void ELFObjectWriter::WriteObject(MCAssembler &Asm,
+                                  const MCAsmLayout &Layout) {
+  GroupMapTy GroupMap;
+  RevGroupMapTy RevGroupMap;
+  SectionIndexMapTy SectionIndexMap;
+
+  unsigned NumUserSections = Asm.size();
+
+  DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
+  CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+
+  const unsigned NumUserAndRelocSections = Asm.size();
+  CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
+                        RevGroupMap, SectionIndexMap, RelMap);
+  const unsigned AllSections = Asm.size();
+  const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections;
+
+  unsigned NumRegularSections = NumUserSections + NumIndexedSections;
+
+  // Compute symbol table information.
+  ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap, NumRegularSections);
+
+
+  WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+
+  CreateMetadataSections(const_cast<MCAssembler&>(Asm),
+                         const_cast<MCAsmLayout&>(Layout),
+                         SectionIndexMap,
+                         RelMap);
+
+  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
+  uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+                                    sizeof(ELF::Elf32_Ehdr);
+  uint64_t FileOff = HeaderSize;
+
+  std::vector<const MCSectionELF*> Sections;
+  ComputeSectionOrder(Asm, Sections);
+  unsigned NumSections = Sections.size();
+  SectionOffsetMapTy SectionOffsetMap;
+  for (unsigned i = 0; i < NumRegularSections + 1; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+
+    // Remember the offset into the file for this section.
+    SectionOffsetMap[&Section] = FileOff;
+
+    // Get the size of the section in the output file (including padding).
+    FileOff += GetSectionFileSize(Layout, SD);
+  }
+
+  FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
+
+  const unsigned SectionHeaderOffset = FileOff - HeaderSize;
+
+  uint64_t SectionHeaderEntrySize = is64Bit() ?
+    sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr);
+  FileOff += (NumSections + 1) * SectionHeaderEntrySize;
+
+  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+
+    // Remember the offset into the file for this section.
+    SectionOffsetMap[&Section] = FileOff;
+
+    // Get the size of the section in the output file (including padding).
+    FileOff += GetSectionFileSize(Layout, SD);
+  }
+
+  // Write out the ELF header ...
+  WriteHeader(SectionHeaderOffset, NumSections + 1);
+
+  // ... then the regular sections ...
+  // + because of .shstrtab
+  for (unsigned i = 0; i < NumRegularSections + 1; ++i)
+    WriteDataSectionData(Asm, Layout, *Sections[i]);
+
+  FileOff = OS.tell();
+  uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
+  WriteZeros(Padding);
+
+  // ... then the section header table ...
+  WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap,
+                     SectionOffsetMap);
+
+  FileOff = OS.tell();
+
+  // ... and then the remainting sections ...
+  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
+    WriteDataSectionData(Asm, Layout, *Sections[i]);
+}
+
+bool
+ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+  if (DataA.getFlags() & ELF_STB_Weak)
+    return false;
+  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+                                                 Asm, DataA, FB,InSet, IsPCRel);
+}
+
+MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                            raw_ostream &OS,
+                                            bool IsLittleEndian) {
+  switch (MOTW->getEMachine()) {
+    case ELF::EM_386:
+    case ELF::EM_X86_64:
+      return new X86ELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    case ELF::EM_ARM:
+      return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    case ELF::EM_MBLAZE:
+      return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    default: llvm_unreachable("Unsupported architecture"); break;
+  }
+}
+
+
+/// START OF SUBCLASSES for ELFObjectWriter
+//===- ARMELFObjectWriter -------------------------------------------===//
+
+ARMELFObjectWriter::ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                       raw_ostream &_OS,
+                                       bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+ARMELFObjectWriter::~ARMELFObjectWriter()
+{}
+
+// FIXME: get the real EABI Version from the Triple.
+void ARMELFObjectWriter::WriteEFlags() {
+  Write32(ELF::EF_ARM_EABIMASK & DefaultEABIVersion);
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is an approximation of what ARM/gcc does.
+
+STATISTIC(PCRelCount, "Total number of PIC Relocations");
+STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                   const MCValue &Target,
+                                                   const MCFragment &F,
+                                                   const MCFixup &Fixup,
+                                                   bool IsPCRel) const {
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+  bool EmitThisSym = false;
+
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(Symbol.getSection());
+  bool InNormalSection = true;
+  unsigned RelocType = 0;
+  RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+  DEBUG(
+      const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+      MCSymbolRefExpr::VariantKind Kind2;
+      Kind2 = Target.getSymB() ?  Target.getSymB()->getKind() :
+        MCSymbolRefExpr::VK_None;
+      dbgs() << "considering symbol "
+        << Section.getSectionName() << "/"
+        << Symbol.getName() << "/"
+        << " Rel:" << (unsigned)RelocType
+        << " Kind: " << (int)Kind << "/" << (int)Kind2
+        << " Tmp:"
+        << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
+        << Symbol.isVariable() << "/" << Symbol.isTemporary()
+        << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
+
+  if (IsPCRel) { ++PCRelCount;
+    switch (RelocType) {
+    default:
+      // Most relocation types are emitted as explicit symbols
+      InNormalSection =
+        StringSwitch<bool>(Section.getSectionName())
+        .Case(".data.rel.ro.local", false)
+        .Case(".data.rel", false)
+        .Case(".bss", false)
+        .Default(true);
+      EmitThisSym = true;
+      break;
+    case ELF::R_ARM_ABS32:
+      // But things get strange with R_ARM_ABS32
+      // In this case, most things that go in .rodata show up
+      // as section relative relocations
+      InNormalSection =
+        StringSwitch<bool>(Section.getSectionName())
+        .Case(".data.rel.ro.local", false)
+        .Case(".data.rel", false)
+        .Case(".rodata", false)
+        .Case(".bss", false)
+        .Default(true);
+      EmitThisSym = false;
+      break;
+    }
+  } else {
+    NonPCRelCount++;
+    InNormalSection =
+      StringSwitch<bool>(Section.getSectionName())
+      .Case(".data.rel.ro.local", false)
+      .Case(".rodata", false)
+      .Case(".data.rel", false)
+      .Case(".bss", false)
+      .Default(true);
+
+    switch (RelocType) {
+    default: EmitThisSym = true; break;
+    case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+    }
+  }
+
+  if (EmitThisSym)
+    return &Symbol;
+  if (! Symbol.isTemporary() && InNormalSection) {
+    return &Symbol;
+  }
+  return NULL;
+}
+
+// Need to examine the Fixup when determining whether to 
+// emit the relocation as an explicit symbol or as a section relative
+// offset
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) {
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+  unsigned Type = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+  if (RelocNeedsGOT(Modifier))
+    NeedsGOT = true;
+  
+  return Type;
+}
+
+unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
+                                               const MCFixup &Fixup,
+                                               bool IsPCRel) const  {
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+  unsigned Type = 0;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default: assert(0 && "Unimplemented");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_REL32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        assert(0 && "unimplemented");
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_uncondbranch:
+      switch (Modifier) {
+      case MCSymbolRefExpr::VK_ARM_PLT:
+        Type = ELF::R_ARM_PLT32;
+        break;
+      default:
+        Type = ELF::R_ARM_CALL;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+      Type = ELF::R_ARM_MOVT_PREL;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+    case ARM::fixup_arm_movw_lo16_pcrel:
+      Type = ELF::R_ARM_MOVW_PREL_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      Type = ELF::R_ARM_THM_MOVT_PREL;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+      break;
+    case ARM::fixup_arm_thumb_bl:
+    case ARM::fixup_arm_thumb_blx:
+      switch (Modifier) {
+      case MCSymbolRefExpr::VK_ARM_PLT:
+        Type = ELF::R_ARM_THM_CALL;
+        break;
+      default:
+        Type = ELF::R_ARM_NONE;
+        break;
+      }
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier"); break;
+      case MCSymbolRefExpr::VK_ARM_GOT:
+        Type = ELF::R_ARM_GOT_BREL;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        Type = ELF::R_ARM_TLS_GD32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TPOFF:
+        Type = ELF::R_ARM_TLS_LE32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_ABS32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTOFF:
+        Type = ELF::R_ARM_GOTOFF32;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_ldst_pcrel_12:
+    case ARM::fixup_arm_pcrel_10:
+    case ARM::fixup_arm_adr_pcrel_12:
+    case ARM::fixup_arm_thumb_bl:
+    case ARM::fixup_arm_thumb_cb:
+    case ARM::fixup_arm_thumb_cp:
+    case ARM::fixup_arm_thumb_br:
+      assert(0 && "Unimplemented");
+      break;
+    case ARM::fixup_arm_uncondbranch:
+      Type = ELF::R_ARM_CALL;
+      break;
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+      Type = ELF::R_ARM_MOVT_ABS;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+      Type = ELF::R_ARM_MOVW_ABS_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+      Type = ELF::R_ARM_THM_MOVT_ABS;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+      Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+      break;
+    }
+  }
+
+  return Type;
+}
+
+//===- MBlazeELFObjectWriter -------------------------------------------===//
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                             raw_ostream &_OS,
+                                             bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
+}
+
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
+}
+
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+                                             const MCFixup &Fixup,
+                                             bool IsPCRel,
+                                             bool IsRelocWithSymbol,
+                                             int64_t Addend) {
+  // determine the type of the relocation
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented");
+    case FK_PCRel_4:
+      Type = ELF::R_MICROBLAZE_64_PCREL;
+      break;
+    case FK_PCRel_2:
+      Type = ELF::R_MICROBLAZE_32_PCREL;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      Type = ((IsRelocWithSymbol || Addend !=0)
+              ? ELF::R_MICROBLAZE_32
+              : ELF::R_MICROBLAZE_64);
+      break;
+    case FK_Data_2:
+      Type = ELF::R_MICROBLAZE_32;
+      break;
+    }
+  }
+  return Type;
+}
+
+//===- X86ELFObjectWriter -------------------------------------------===//
+
+
+X86ELFObjectWriter::X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                       raw_ostream &_OS,
+                                       bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) {
+  // determine the type of the relocation
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+  unsigned Type;
+  if (is64Bit()) {
+    if (IsPCRel) {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
+      case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
+      case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
+
+      case FK_PCRel_8:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC64;
+        break;
+      case X86::reloc_signed_4byte:
+      case X86::reloc_riprel_4byte_movq_load:
+      case X86::reloc_riprel_4byte:
+      case FK_PCRel_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_PC32;
+          break;
+        case MCSymbolRefExpr::VK_PLT:
+          Type = ELF::R_X86_64_PLT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_GOTTPOFF:
+          Type = ELF::R_X86_64_GOTTPOFF;
+        break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_X86_64_TLSGD;
+          break;
+        case MCSymbolRefExpr::VK_TLSLD:
+          Type = ELF::R_X86_64_TLSLD;
+          break;
+        }
+        break;
+      case FK_PCRel_2:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC16;
+        break;
+      case FK_PCRel_1:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC8;
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+      case FK_Data_8: Type = ELF::R_X86_64_64; break;
+      case X86::reloc_signed_4byte:
+        assert(isInt<32>(Target.getConstant()));
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_32S;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_X86_64_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_X86_64_TPOFF32;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_X86_64_DTPOFF32;
+          break;
+        }
+        break;
+      case FK_Data_4:
+        Type = ELF::R_X86_64_32;
+        break;
+      case FK_Data_2: Type = ELF::R_X86_64_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_X86_64_8; break;
+      }
+    }
+  } else {
+    if (IsPCRel) {
+      switch (Modifier) {
+      default:
+        llvm_unreachable("Unimplemented");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_386_PC32;
+        break;
+      case MCSymbolRefExpr::VK_PLT:
+        Type = ELF::R_386_PLT32;
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case X86::reloc_global_offset_table:
+        Type = ELF::R_386_GOTPC;
+        break;
+
+      // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+      // instead?
+      case X86::reloc_signed_4byte:
+      case FK_PCRel_4:
+      case FK_Data_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_386_32;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_386_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTOFF:
+          Type = ELF::R_386_GOTOFF;
+          break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_386_TLS_GD;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_386_TLS_LE_32;
+          break;
+        case MCSymbolRefExpr::VK_INDNTPOFF:
+          Type = ELF::R_386_TLS_IE;
+          break;
+        case MCSymbolRefExpr::VK_NTPOFF:
+          Type = ELF::R_386_TLS_LE;
+          break;
+        case MCSymbolRefExpr::VK_GOTNTPOFF:
+          Type = ELF::R_386_TLS_GOTIE;
+          break;
+        case MCSymbolRefExpr::VK_TLSLDM:
+          Type = ELF::R_386_TLS_LDM;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_386_TLS_LDO_32;
+          break;
+        case MCSymbolRefExpr::VK_GOTTPOFF:
+          Type = ELF::R_386_TLS_IE_32;
+          break;
+        }
+        break;
+      case FK_Data_2: Type = ELF::R_386_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_386_8; break;
+      }
+    }
+  }
+
+  if (RelocNeedsGOT(Modifier))
+    NeedsGOT = true;
+
+  return Type;
+}
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.h b/contrib/llvm/lib/MC/ELFObjectWriter.h
new file mode 100644
index 000000000000..7593099fb447
--- /dev/null
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.h
@@ -0,0 +1,414 @@
+//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_ELFOBJECTWRITER_H
+#define LLVM_MC_ELFOBJECTWRITER_H
+
+#include "MCELF.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+
+class MCSection;
+class MCDataFragment;
+class MCSectionELF;
+
+class ELFObjectWriter : public MCObjectWriter {
+  protected:
+
+    static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+    static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
+    static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
+    static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+                           bool Used, bool Renamed);
+    static bool isLocal(const MCSymbolData &Data, bool isSignature,
+                        bool isUsedInReloc);
+    static bool IsELFMetaDataSection(const MCSectionData &SD);
+    static uint64_t DataSectionSize(const MCSectionData &SD);
+    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+                                       const MCSectionData &SD);
+    static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+                                          const MCSectionData &SD);
+
+    void WriteDataSectionData(MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCSectionELF &Section);
+
+    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+      return Kind == X86::reloc_riprel_4byte ||
+        Kind == X86::reloc_riprel_4byte_movq_load;
+    }*/
+
+    /// ELFSymbolData - Helper struct for containing some precomputed
+    /// information on symbols.
+    struct ELFSymbolData {
+      MCSymbolData *SymbolData;
+      uint64_t StringIndex;
+      uint32_t SectionIndex;
+
+      // Support lexicographic sorting.
+      bool operator<(const ELFSymbolData &RHS) const {
+        if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
+          return true;
+        if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
+          return false;
+        return SymbolData->getSymbol().getName() <
+               RHS.SymbolData->getSymbol().getName();
+      }
+    };
+
+    /// @name Relocation Data
+    /// @{
+
+    struct ELFRelocationEntry {
+      // Make these big enough for both 32-bit and 64-bit
+      uint64_t r_offset;
+      int Index;
+      unsigned Type;
+      const MCSymbol *Symbol;
+      uint64_t r_addend;
+
+      ELFRelocationEntry()
+        : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
+
+      ELFRelocationEntry(uint64_t RelocOffset, int Idx,
+                         unsigned RelType, const MCSymbol *Sym,
+                         uint64_t Addend)
+        : r_offset(RelocOffset), Index(Idx), Type(RelType),
+          Symbol(Sym), r_addend(Addend) {}
+
+      // Support lexicographic sorting.
+      bool operator<(const ELFRelocationEntry &RE) const {
+        return RE.r_offset < r_offset;
+      }
+    };
+
+    /// The target specific ELF writer instance.
+    llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+    SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+    SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+    DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
+    llvm::DenseMap<const MCSectionData*,
+                   std::vector<ELFRelocationEntry> > Relocations;
+    DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+    /// @}
+    /// @name Symbol Table Data
+    /// @{
+
+    SmallString<256> StringTable;
+    std::vector<ELFSymbolData> LocalSymbolData;
+    std::vector<ELFSymbolData> ExternalSymbolData;
+    std::vector<ELFSymbolData> UndefinedSymbolData;
+
+    /// @}
+
+    bool NeedsGOT;
+
+    bool NeedsSymtabShndx;
+
+    // This holds the symbol table index of the last local symbol.
+    unsigned LastLocalSymbolIndex;
+    // This holds the .strtab section index.
+    unsigned StringTableIndex;
+    // This holds the .symtab section index.
+    unsigned SymbolTableIndex;
+
+    unsigned ShstrtabIndex;
+
+
+    virtual const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+                                          const MCValue &Target,
+                                          const MCFragment &F,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel) const;
+
+    // For arch-specific emission of explicit reloc symbol
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel) const {
+      return NULL;
+    }
+
+    bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+    bool hasRelocationAddend() const {
+      return TargetObjectWriter->hasRelocationAddend();
+    }
+
+  public:
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                    raw_ostream &_OS, bool IsLittleEndian)
+      : MCObjectWriter(_OS, IsLittleEndian),
+        TargetObjectWriter(MOTW),
+        NeedsGOT(false), NeedsSymtabShndx(false){
+    }
+
+    virtual ~ELFObjectWriter();
+
+    void WriteWord(uint64_t W) {
+      if (is64Bit())
+        Write64(W);
+      else
+        Write32(W);
+    }
+
+    void StringLE16(char *buf, uint16_t Value) {
+      buf[0] = char(Value >> 0);
+      buf[1] = char(Value >> 8);
+    }
+
+    void StringLE32(char *buf, uint32_t Value) {
+      StringLE16(buf, uint16_t(Value >> 0));
+      StringLE16(buf + 2, uint16_t(Value >> 16));
+    }
+
+    void StringLE64(char *buf, uint64_t Value) {
+      StringLE32(buf, uint32_t(Value >> 0));
+      StringLE32(buf + 4, uint32_t(Value >> 32));
+    }
+
+    void StringBE16(char *buf ,uint16_t Value) {
+      buf[0] = char(Value >> 8);
+      buf[1] = char(Value >> 0);
+    }
+
+    void StringBE32(char *buf, uint32_t Value) {
+      StringBE16(buf, uint16_t(Value >> 16));
+      StringBE16(buf + 2, uint16_t(Value >> 0));
+    }
+
+    void StringBE64(char *buf, uint64_t Value) {
+      StringBE32(buf, uint32_t(Value >> 32));
+      StringBE32(buf + 4, uint32_t(Value >> 0));
+    }
+
+    void String8(MCDataFragment &F, uint8_t Value) {
+      char buf[1];
+      buf[0] = Value;
+      F.getContents() += StringRef(buf, 1);
+    }
+
+    void String16(MCDataFragment &F, uint16_t Value) {
+      char buf[2];
+      if (isLittleEndian())
+        StringLE16(buf, Value);
+      else
+        StringBE16(buf, Value);
+      F.getContents() += StringRef(buf, 2);
+    }
+
+    void String32(MCDataFragment &F, uint32_t Value) {
+      char buf[4];
+      if (isLittleEndian())
+        StringLE32(buf, Value);
+      else
+        StringBE32(buf, Value);
+      F.getContents() += StringRef(buf, 4);
+    }
+
+    void String64(MCDataFragment &F, uint64_t Value) {
+      char buf[8];
+      if (isLittleEndian())
+        StringLE64(buf, Value);
+      else
+        StringBE64(buf, Value);
+      F.getContents() += StringRef(buf, 8);
+    }
+
+    virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+    /// Default e_flags = 0
+    virtual void WriteEFlags() { Write32(0); }
+
+    virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          uint64_t name, uint8_t info,
+                          uint64_t value, uint64_t size,
+                          uint8_t other, uint32_t shndx,
+                          bool Reserved);
+
+    virtual void WriteSymbol(MCDataFragment *SymtabF,  MCDataFragment *ShndxF,
+                     ELFSymbolData &MSD,
+                     const MCAsmLayout &Layout);
+
+    typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+    virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          const MCAssembler &Asm,
+                          const MCAsmLayout &Layout,
+                          const SectionIndexMapTy &SectionIndexMap);
+
+    virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                                  const MCFragment *Fragment, const MCFixup &Fixup,
+                                  MCValue Target, uint64_t &FixedValue);
+
+    virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+                                         const MCSymbol *S);
+
+    // Map from a group section to the signature symbol
+    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+    // Map from a signature symbol to the group section
+    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+    // Map from a section to the section with the relocations
+    typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
+    // Map from a section to its offset
+    typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
+
+    /// ComputeSymbolTable - Compute the symbol table data
+    ///
+    /// \param StringTable [out] - The string table data.
+    /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+    /// string table.
+    virtual void ComputeSymbolTable(MCAssembler &Asm,
+                            const SectionIndexMapTy &SectionIndexMap,
+                                    RevGroupMapTy RevGroupMap,
+                                    unsigned NumRegularSections);
+
+    virtual void ComputeIndexMap(MCAssembler &Asm,
+                                 SectionIndexMapTy &SectionIndexMap,
+                                 const RelMapTy &RelMap);
+
+    void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                  RelMapTy &RelMap);
+
+    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+                          const RelMapTy &RelMap);
+
+    virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                        SectionIndexMapTy &SectionIndexMap,
+                                        const RelMapTy &RelMap);
+
+    // Create the sections that show up in the symbol table. Currently
+    // those are the .note.GNU-stack section and the group sections.
+    virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                       GroupMapTy &GroupMap,
+                                       RevGroupMapTy &RevGroupMap,
+                                       SectionIndexMapTy &SectionIndexMap,
+                                       const RelMapTy &RelMap);
+
+    virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                          const MCAsmLayout &Layout);
+
+    void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+                            const MCAsmLayout &Layout,
+                            const SectionIndexMapTy &SectionIndexMap,
+                            const SectionOffsetMapTy &SectionOffsetMap);
+
+    void ComputeSectionOrder(MCAssembler &Asm,
+                             std::vector<const MCSectionELF*> &Sections);
+
+    virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+                          uint64_t Address, uint64_t Offset,
+                          uint64_t Size, uint32_t Link, uint32_t Info,
+                          uint64_t Alignment, uint64_t EntrySize);
+
+    virtual void WriteRelocationsFragment(const MCAssembler &Asm,
+                                          MCDataFragment *F,
+                                          const MCSectionData *SD);
+
+    virtual bool
+    IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                           const MCSymbolData &DataA,
+                                           const MCFragment &FB,
+                                           bool InSet,
+                                           bool IsPCRel) const;
+
+    virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+    virtual void WriteSection(MCAssembler &Asm,
+                      const SectionIndexMapTy &SectionIndexMap,
+                      uint32_t GroupSymbolIndex,
+                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
+                      const MCSectionELF &Section);
+
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) = 0;
+  };
+
+  //===- X86ELFObjectWriter -------------------------------------------===//
+
+  class X86ELFObjectWriter : public ELFObjectWriter {
+  public:
+    X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
+
+    virtual ~X86ELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+
+
+  //===- ARMELFObjectWriter -------------------------------------------===//
+
+  class ARMELFObjectWriter : public ELFObjectWriter {
+  public:
+    // FIXME: MCAssembler can't yet return the Subtarget,
+    enum { DefaultEABIVersion = 0x05000000U };
+
+    ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
+
+    virtual ~ARMELFObjectWriter();
+
+    virtual void WriteEFlags();
+  protected:
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel) const;
+
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  private:
+    unsigned GetRelocTypeInner(const MCValue &Target,
+                               const MCFixup &Fixup, bool IsPCRel) const;
+    
+  };
+
+  //===- MBlazeELFObjectWriter -------------------------------------------===//
+
+  class MBlazeELFObjectWriter : public ELFObjectWriter {
+  public:
+    MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                          raw_ostream &_OS,
+                          bool IsLittleEndian);
+
+    virtual ~MBlazeELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
new file mode 100644
index 000000000000..502b60b0edf4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -0,0 +1,137 @@
+//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Dwarf.h"
+#include <cctype>
+#include <cstring>
+using namespace llvm;
+
+MCAsmInfo::MCAsmInfo() {
+  PointerSize = 4;
+  IsLittleEndian = true;
+  StackGrowsUp = false;
+  HasSubsectionsViaSymbols = false;
+  HasMachoZeroFillDirective = false;
+  HasMachoTBSSDirective = false;
+  HasStaticCtorDtorReferenceInStaticMode = false;
+  LinkerRequiresNonEmptyDwarfLines = false;
+  MaxInstLength = 4;
+  PCSymbol = "$";
+  SeparatorString = ";";
+  CommentColumn = 40;
+  CommentString = "#";
+  LabelSuffix = ":";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".";
+  LinkerPrivateGlobalPrefix = "";
+  InlineAsmStart = "APP";
+  InlineAsmEnd = "NO_APP";
+  AssemblerDialect = 0;
+  AllowQuotesInName = false;
+  AllowNameToStartWithDigit = false;
+  AllowPeriodsInName = true;
+  ZeroDirective = "\t.zero\t";
+  AsciiDirective = "\t.ascii\t";
+  AscizDirective = "\t.asciz\t";
+  Data8bitsDirective = "\t.byte\t";
+  Data16bitsDirective = "\t.short\t";
+  Data32bitsDirective = "\t.long\t";
+  Data64bitsDirective = "\t.quad\t";
+  SunStyleELFSectionSwitchSyntax = false;
+  UsesELFSectionDirectiveForBSS = false;
+  AlignDirective = "\t.align\t";
+  AlignmentIsInBytes = true;
+  TextAlignFillValue = 0;
+  GPRel32Directive = 0;
+  GlobalDirective = "\t.globl\t";
+  HasSetDirective = true;
+  HasAggressiveSymbolFolding = true;
+  HasLCOMMDirective = false;
+  COMMDirectiveAlignmentIsInBytes = true;
+  HasDotTypeDotSizeDirective = true;
+  HasSingleParameterDotFile = true;
+  HasNoDeadStrip = false;
+  HasSymbolResolver = false;
+  WeakRefDirective = 0;
+  WeakDefDirective = 0;
+  LinkOnceDirective = 0;
+  HiddenVisibilityAttr = MCSA_Hidden;
+  HiddenDeclarationVisibilityAttr = MCSA_Hidden;
+  ProtectedVisibilityAttr = MCSA_Protected;
+  HasLEB128 = false;
+  SupportsDebugInformation = false;
+  ExceptionsType = ExceptionHandling::None;
+  DwarfUsesInlineInfoSection = false;
+  DwarfRequiresRelocationForSectionOffset = true;
+  DwarfSectionOffsetDirective = 0;
+  DwarfUsesLabelOffsetForRanges = true;
+  DwarfRegNumForCFI = false;
+  HasMicrosoftFastStdCallMangling = false;
+
+  AsmTransCBE = 0;
+}
+
+MCAsmInfo::~MCAsmInfo() {
+}
+
+
+unsigned MCAsmInfo::getULEB128Size(unsigned Value) {
+  unsigned Size = 0;
+  do {
+    Value >>= 7;
+    Size += sizeof(int8_t);
+  } while (Value);
+  return Size;
+}
+
+unsigned MCAsmInfo::getSLEB128Size(int Value) {
+  unsigned Size = 0;
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+
+  do {
+    unsigned Byte = Value & 0x7f;
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    Size += sizeof(int8_t);
+  } while (IsMore);
+  return Size;
+}
+
+const MCExpr *
+MCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                       unsigned Encoding,
+                                       MCStreamer &Streamer) const {
+  return getExprForFDESymbol(Sym, Encoding, Streamer);
+}
+
+const MCExpr *
+MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
+                               unsigned Encoding,
+                               MCStreamer &Streamer) const {
+  if (!(Encoding & dwarf::DW_EH_PE_pcrel))
+    return MCSymbolRefExpr::Create(Sym, Streamer.getContext());
+
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context);
+  MCSymbol *PCSym = Context.CreateTempSymbol();
+  Streamer.EmitLabel(PCSym);
+  const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
+  return MCBinaryExpr::CreateSub(Res, PC, Context);
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
new file mode 100644
index 000000000000..7fc7d7abb232
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -0,0 +1,37 @@
+//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on COFF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+MCAsmInfoCOFF::MCAsmInfoCOFF() {
+  GlobalPrefix = "_";
+  COMMDirectiveAlignmentIsInBytes = false;
+  HasLCOMMDirective = true;
+  HasDotTypeDotSizeDirective = false;
+  HasSingleParameterDotFile = false;
+  PrivateGlobalPrefix = "L";  // Prefix for private global symbols
+  WeakRefDirective = "\t.weak\t";
+  LinkOnceDirective = "\t.linkonce discard\n";
+  
+  // Doesn't support visibility:
+  HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid;
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+  SupportsDebugInformation = true;
+  DwarfSectionOffsetDirective = "\t.secrel32\t";
+  HasMicrosoftFastStdCallMangling = true;
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
new file mode 100644
index 000000000000..5851cb0391d8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -0,0 +1,61 @@
+//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+MCAsmInfoDarwin::MCAsmInfoDarwin() {
+  // Common settings for all Darwin targets.
+  // Syntax:
+  GlobalPrefix = "_";
+  PrivateGlobalPrefix = "L";
+  LinkerPrivateGlobalPrefix = "l";
+  AllowQuotesInName = true;
+  HasSingleParameterDotFile = false;
+  HasSubsectionsViaSymbols = true;
+
+  AlignmentIsInBytes = false;
+  COMMDirectiveAlignmentIsInBytes = false;
+  InlineAsmStart = " InlineAsm Start";
+  InlineAsmEnd = " InlineAsm End";
+
+  // Directives:
+  WeakDefDirective = "\t.weak_definition ";
+  WeakRefDirective = "\t.weak_reference ";
+  ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
+  HasMachoZeroFillDirective = true;  // Uses .zerofill
+  HasMachoTBSSDirective = true; // Uses .tbss
+  HasStaticCtorDtorReferenceInStaticMode = true;
+
+  // FIXME: Darwin 10 and newer don't need this.
+  LinkerRequiresNonEmptyDwarfLines = true;
+
+  // FIXME: Change this once MC is the system assembler.
+  HasAggressiveSymbolFolding = false;
+
+  HiddenVisibilityAttr = MCSA_PrivateExtern;
+  HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+  // Doesn't support protected visibility.
+  ProtectedVisibilityAttr = MCSA_Global;
+  
+  HasDotTypeDotSizeDirective = false;
+  HasNoDeadStrip = true;
+  HasSymbolResolver = true;
+
+  DwarfRequiresRelocationForSectionOffset = false;
+  DwarfUsesLabelOffsetForRanges = false;
+}
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
new file mode 100644
index 000000000000..d5d08e8f69fb
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -0,0 +1,1256 @@
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+class MCAsmStreamer : public MCStreamer {
+protected:
+  formatted_raw_ostream &OS;
+  const MCAsmInfo &MAI;
+private:
+  OwningPtr<MCInstPrinter> InstPrinter;
+  OwningPtr<MCCodeEmitter> Emitter;
+  OwningPtr<TargetAsmBackend> AsmBackend;
+
+  SmallString<128> CommentToEmit;
+  raw_svector_ostream CommentStream;
+
+  unsigned IsVerboseAsm : 1;
+  unsigned ShowInst : 1;
+  unsigned UseLoc : 1;
+  unsigned UseCFI : 1;
+
+  enum EHSymbolFlags { EHGlobal         = 1,
+                       EHWeakDefinition = 1 << 1,
+                       EHPrivateExtern  = 1 << 2 };
+  DenseMap<const MCSymbol*, unsigned> FlagMap;
+
+  bool needsSet(const MCExpr *Value);
+
+  void EmitRegisterName(int64_t Register);
+
+public:
+  MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
+                bool isVerboseAsm, bool useLoc, bool useCFI,
+                MCInstPrinter *printer, MCCodeEmitter *emitter,
+                TargetAsmBackend *asmbackend,
+                bool showInst)
+    : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+      InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
+      CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
+      ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI) {
+    if (InstPrinter && IsVerboseAsm)
+      InstPrinter->setCommentStream(CommentStream);
+  }
+  ~MCAsmStreamer() {}
+
+  inline void EmitEOL() {
+    // If we don't have any comments, just emit a \n.
+    if (!IsVerboseAsm) {
+      OS << '\n';
+      return;
+    }
+    EmitCommentsAndEOL();
+  }
+  void EmitCommentsAndEOL();
+
+  /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+  /// all.
+  virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+  /// hasRawTextSupport - We support EmitRawText.
+  virtual bool hasRawTextSupport() const { return true; }
+
+  /// AddComment - Add a comment that can be emitted to the generated .s
+  /// file if applicable as a QoI issue to make the output of the compiler
+  /// more readable.  This only affects the MCAsmStreamer, and only when
+  /// verbose assembly output is enabled.
+  virtual void AddComment(const Twine &T);
+
+  /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+  virtual void AddEncodingComment(const MCInst &Inst);
+
+  /// GetCommentOS - Return a raw_ostream that comments can be written to.
+  /// Unlike AddComment, you are required to terminate comments with \n if you
+  /// use this method.
+  virtual raw_ostream &GetCommentOS() {
+    if (!IsVerboseAsm)
+      return nulls();  // Discard comments unless in verbose asm mode.
+    return CommentStream;
+  }
+
+  /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+  virtual void AddBlankLine() {
+    EmitEOL();
+  }
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void ChangeSection(const MCSection *Section);
+
+  virtual void InitSections() {
+    // FIXME, this is MachO specific, but the testsuite
+    // expects this.
+    SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                         MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                         0, SectionKind::getText()));
+  }
+
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+                                   MCSymbol *EHSymbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize);
+  virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                         const MCSymbol *Label);
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+
+  /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+  ///
+  /// @param Symbol - The common symbol to emit.
+  /// @param Size - The size of the common symbol.
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+
+  virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+                               uint64_t Size, unsigned ByteAlignment = 0);
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace);
+  virtual void EmitIntValue(uint64_t Value, unsigned Size,
+                            unsigned AddrSpace = 0);
+
+  virtual void EmitULEB128Value(const MCExpr *Value);
+
+  virtual void EmitSLEB128Value(const MCExpr *Value);
+
+  virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace);
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+  virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                     unsigned Column, unsigned Flags,
+                                     unsigned Isa, unsigned Discriminator,
+                                     StringRef FileName);
+
+  virtual void EmitCFISections(bool EH, bool Debug);
+  virtual void EmitCFIStartProc();
+  virtual void EmitCFIEndProc();
+  virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
+  virtual void EmitCFIDefCfaOffset(int64_t Offset);
+  virtual void EmitCFIDefCfaRegister(int64_t Register);
+  virtual void EmitCFIOffset(int64_t Register, int64_t Offset);
+  virtual void EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+  virtual void EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+  virtual void EmitCFIRememberState();
+  virtual void EmitCFIRestoreState();
+  virtual void EmitCFISameValue(int64_t Register);
+  virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
+  virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
+
+  virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
+  virtual void EmitWin64EHEndProc();
+  virtual void EmitWin64EHStartChained();
+  virtual void EmitWin64EHEndChained();
+  virtual void EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+                                  bool Except);
+  virtual void EmitWin64EHHandlerData();
+  virtual void EmitWin64EHPushReg(unsigned Register);
+  virtual void EmitWin64EHSetFrame(unsigned Register, unsigned Offset);
+  virtual void EmitWin64EHAllocStack(unsigned Size);
+  virtual void EmitWin64EHSaveReg(unsigned Register, unsigned Offset);
+  virtual void EmitWin64EHSaveXMM(unsigned Register, unsigned Offset);
+  virtual void EmitWin64EHPushFrame(bool Code);
+  virtual void EmitWin64EHEndProlog();
+
+  virtual void EmitFnStart();
+  virtual void EmitFnEnd();
+  virtual void EmitCantUnwind();
+  virtual void EmitPersonality(const MCSymbol *Personality);
+  virtual void EmitHandlerData();
+  virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+  virtual void EmitPad(int64_t Offset);
+  virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
+
+
+  virtual void EmitInstruction(const MCInst &Inst);
+
+  /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+  /// the specified string in the output .s file.  This capability is
+  /// indicated by the hasRawTextSupport() predicate.
+  virtual void EmitRawText(StringRef String);
+
+  virtual void Finish();
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+/// AddComment - Add a comment that can be emitted to the generated .s
+/// file if applicable as a QoI issue to make the output of the compiler
+/// more readable.  This only affects the MCAsmStreamer, and only when
+/// verbose assembly output is enabled.
+void MCAsmStreamer::AddComment(const Twine &T) {
+  if (!IsVerboseAsm) return;
+
+  // Make sure that CommentStream is flushed.
+  CommentStream.flush();
+
+  T.toVector(CommentToEmit);
+  // Each comment goes on its own line.
+  CommentToEmit.push_back('\n');
+
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+void MCAsmStreamer::EmitCommentsAndEOL() {
+  if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+    OS << '\n';
+    return;
+  }
+
+  CommentStream.flush();
+  StringRef Comments = CommentToEmit.str();
+
+  assert(Comments.back() == '\n' &&
+         "Comment array not newline terminated");
+  do {
+    // Emit a line of comments.
+    OS.PadToColumn(MAI.getCommentColumn());
+    size_t Position = Comments.find('\n');
+    OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+    Comments = Comments.substr(Position+1);
+  } while (!Comments.empty());
+
+  CommentToEmit.clear();
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+  assert(Bytes && "Invalid size!");
+  return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+  Section->PrintSwitchToSection(MAI, OS);
+}
+
+void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+                                        MCSymbol *EHSymbol) {
+  if (UseCFI)
+    return;
+
+  unsigned Flags = FlagMap.lookup(Symbol);
+
+  if (Flags & EHGlobal)
+    EmitSymbolAttribute(EHSymbol, MCSA_Global);
+  if (Flags & EHWeakDefinition)
+    EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+  if (Flags & EHPrivateExtern)
+    EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
+void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  MCStreamer::EmitLabel(Symbol);
+
+  OS << *Symbol << MAI.getLabelSuffix();
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  switch (Flag) {
+  default: assert(0 && "Invalid flag!");
+  case MCAF_SyntaxUnified:         OS << "\t.syntax unified"; break;
+  case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
+  case MCAF_Code16:                OS << "\t.code\t16"; break;
+  case MCAF_Code32:                OS << "\t.code\t32"; break;
+  }
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
+  // This needs to emit to a temporary string to get properly quoted
+  // MCSymbols when they have spaces in them.
+  OS << "\t.thumb_func";
+  // Only Mach-O hasSubsectionsViaSymbols()
+  if (MAI.hasSubsectionsViaSymbols())
+    OS << '\t' << *Func;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  OS << *Symbol << " = " << *Value;
+  EmitEOL();
+
+  // FIXME: Lift context changes into super class.
+  Symbol->setVariableValue(Value);
+}
+
+void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+  OS << ".weakref " << *Alias << ", " << *Symbol;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                             const MCSymbol *LastLabel,
+                                             const MCSymbol *Label,
+                                             unsigned PointerSize) {
+  EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                              const MCSymbol *Label) {
+  EmitIntValue(dwarf::DW_CFA_advance_loc4, 1);
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  AddrDelta = ForceExpAbs(AddrDelta);
+  EmitValue(AddrDelta, 4);
+}
+
+
+void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                        MCSymbolAttr Attribute) {
+  switch (Attribute) {
+  case MCSA_Invalid: assert(0 && "Invalid symbol attribute");
+  case MCSA_ELF_TypeFunction:    /// .type _foo, STT_FUNC  # aka @function
+  case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC
+  case MCSA_ELF_TypeObject:      /// .type _foo, STT_OBJECT  # aka @object
+  case MCSA_ELF_TypeTLS:         /// .type _foo, STT_TLS     # aka @tls_object
+  case MCSA_ELF_TypeCommon:      /// .type _foo, STT_COMMON  # aka @common
+  case MCSA_ELF_TypeNoType:      /// .type _foo, STT_NOTYPE  # aka @notype
+  case MCSA_ELF_TypeGnuUniqueObject:  /// .type _foo, @gnu_unique_object
+    assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
+    OS << "\t.type\t" << *Symbol << ','
+       << ((MAI.getCommentString()[0] != '@') ? '@' : '%');
+    switch (Attribute) {
+    default: assert(0 && "Unknown ELF .type");
+    case MCSA_ELF_TypeFunction:    OS << "function"; break;
+    case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
+    case MCSA_ELF_TypeObject:      OS << "object"; break;
+    case MCSA_ELF_TypeTLS:         OS << "tls_object"; break;
+    case MCSA_ELF_TypeCommon:      OS << "common"; break;
+    case MCSA_ELF_TypeNoType:      OS << "no_type"; break;
+    case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break;
+    }
+    EmitEOL();
+    return;
+  case MCSA_Global: // .globl/.global
+    OS << MAI.getGlobalDirective();
+    FlagMap[Symbol] |= EHGlobal;
+    break;
+  case MCSA_Hidden:         OS << "\t.hidden\t";          break;
+  case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
+  case MCSA_Internal:       OS << "\t.internal\t";        break;
+  case MCSA_LazyReference:  OS << "\t.lazy_reference\t";  break;
+  case MCSA_Local:          OS << "\t.local\t";           break;
+  case MCSA_NoDeadStrip:    OS << "\t.no_dead_strip\t";   break;
+  case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
+  case MCSA_PrivateExtern:
+    OS << "\t.private_extern\t";
+    FlagMap[Symbol] |= EHPrivateExtern;
+    break;
+  case MCSA_Protected:      OS << "\t.protected\t";       break;
+  case MCSA_Reference:      OS << "\t.reference\t";       break;
+  case MCSA_Weak:           OS << "\t.weak\t";            break;
+  case MCSA_WeakDefinition:
+    OS << "\t.weak_definition\t";
+    FlagMap[Symbol] |= EHWeakDefinition;
+    break;
+      // .weak_reference
+  case MCSA_WeakReference:  OS << MAI.getWeakRefDirective(); break;
+  case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
+  }
+
+  OS << *Symbol;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  OS << ".desc" << ' ' << *Symbol << ',' << DescValue;
+  EmitEOL();
+}
+
+void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+  OS << "\t.def\t " << *Symbol << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {
+  OS << "\t.scl\t" << StorageClass << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolType (int Type) {
+  OS << "\t.type\t" << Type << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EndCOFFSymbolDef() {
+  OS << "\t.endef";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  assert(MAI.hasDotTypeDotSizeDirective());
+  OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
+}
+
+void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                     unsigned ByteAlignment) {
+  OS << "\t.comm\t" << *Symbol << ',' << Size;
+  if (ByteAlignment != 0) {
+    if (MAI.getCOMMDirectiveAlignmentIsInBytes())
+      OS << ',' << ByteAlignment;
+    else
+      OS << ',' << Log2_32(ByteAlignment);
+  }
+  EmitEOL();
+}
+
+/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+///
+/// @param Symbol - The common symbol to emit.
+/// @param Size - The size of the common symbol.
+void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  assert(MAI.hasLCOMMDirective() && "Doesn't have .lcomm, can't emit it!");
+  OS << "\t.lcomm\t" << *Symbol << ',' << Size;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                 unsigned Size, unsigned ByteAlignment) {
+  // Note: a .zerofill directive does not switch sections.
+  OS << ".zerofill ";
+
+  // This is a mach-o specific directive.
+  const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
+  OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
+
+  if (Symbol != NULL) {
+    OS << ',' << *Symbol << ',' << Size;
+    if (ByteAlignment != 0)
+      OS << ',' << Log2_32(ByteAlignment);
+  }
+  EmitEOL();
+}
+
+// .tbss sym, size, align
+// This depends that the symbol has already been mangled from the original,
+// e.g. _a.
+void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                   uint64_t Size, unsigned ByteAlignment) {
+  assert(Symbol != NULL && "Symbol shouldn't be NULL!");
+  // Instead of using the Section we'll just use the shortcut.
+  // This is a mach-o specific directive and section.
+  OS << ".tbss " << *Symbol << ", " << Size;
+
+  // Output align if we have it.  We default to 1 so don't bother printing
+  // that.
+  if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
+
+  EmitEOL();
+}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+  OS << '"';
+
+  for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+    unsigned char C = Data[i];
+    if (C == '"' || C == '\\') {
+      OS << '\\' << (char)C;
+      continue;
+    }
+
+    if (isprint((unsigned char)C)) {
+      OS << (char)C;
+      continue;
+    }
+
+    switch (C) {
+      case '\b': OS << "\\b"; break;
+      case '\f': OS << "\\f"; break;
+      case '\n': OS << "\\n"; break;
+      case '\r': OS << "\\r"; break;
+      case '\t': OS << "\\t"; break;
+      default:
+        OS << '\\';
+        OS << toOctal(C >> 6);
+        OS << toOctal(C >> 3);
+        OS << toOctal(C >> 0);
+        break;
+    }
+  }
+
+  OS << '"';
+}
+
+
+void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  if (Data.empty()) return;
+
+  if (Data.size() == 1) {
+    OS << MAI.getData8bitsDirective(AddrSpace);
+    OS << (unsigned)(unsigned char)Data[0];
+    EmitEOL();
+    return;
+  }
+
+  // If the data ends with 0 and the target supports .asciz, use it, otherwise
+  // use .ascii
+  if (MAI.getAscizDirective() && Data.back() == 0) {
+    OS << MAI.getAscizDirective();
+    Data = Data.substr(0, Data.size()-1);
+  } else {
+    OS << MAI.getAsciiDirective();
+  }
+
+  OS << ' ';
+  PrintQuotedString(Data, OS);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+                                 unsigned AddrSpace) {
+  EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                  unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  const char *Directive = 0;
+  switch (Size) {
+  default: break;
+  case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+  case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+  case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+  case 8:
+    Directive = MAI.getData64bitsDirective(AddrSpace);
+    // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+    if (Directive) break;
+    int64_t IntValue;
+    if (!Value->EvaluateAsAbsolute(IntValue))
+      report_fatal_error("Don't know how to emit this value.");
+    if (getContext().getAsmInfo().isLittleEndian()) {
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+    } else {
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+    }
+    return;
+  }
+
+  assert(Directive && "Invalid size for machine code value!");
+  OS << Directive << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue)) {
+    EmitULEB128IntValue(IntValue);
+    return;
+  }
+  assert(MAI.hasLEB128() && "Cannot print a .uleb");
+  OS << ".uleb128 " << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue)) {
+    EmitSLEB128IntValue(IntValue);
+    return;
+  }
+  assert(MAI.hasLEB128() && "Cannot print a .sleb");
+  OS << ".sleb128 " << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  assert(MAI.getGPRel32Directive() != 0);
+  OS << MAI.getGPRel32Directive() << *Value;
+  EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue.  This implements directives such as '.space'.
+void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                             unsigned AddrSpace) {
+  if (NumBytes == 0) return;
+
+  if (AddrSpace == 0)
+    if (const char *ZeroDirective = MAI.getZeroDirective()) {
+      OS << ZeroDirective << NumBytes;
+      if (FillValue != 0)
+        OS << ',' << (int)FillValue;
+      EmitEOL();
+      return;
+    }
+
+  // Emit a byte at a time.
+  MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                         unsigned ValueSize,
+                                         unsigned MaxBytesToEmit) {
+  // Some assemblers don't support non-power of two alignments, so we always
+  // emit alignments as a power of two if possible.
+  if (isPowerOf2_32(ByteAlignment)) {
+    switch (ValueSize) {
+    default: llvm_unreachable("Invalid size for machine code value!");
+    case 1: OS << MAI.getAlignDirective(); break;
+    // FIXME: use MAI for this!
+    case 2: OS << ".p2alignw "; break;
+    case 4: OS << ".p2alignl "; break;
+    case 8: llvm_unreachable("Unsupported alignment size!");
+    }
+
+    if (MAI.getAlignmentIsInBytes())
+      OS << ByteAlignment;
+    else
+      OS << Log2_32(ByteAlignment);
+
+    if (Value || MaxBytesToEmit) {
+      OS << ", 0x";
+      OS.write_hex(truncateToSize(Value, ValueSize));
+
+      if (MaxBytesToEmit)
+        OS << ", " << MaxBytesToEmit;
+    }
+    EmitEOL();
+    return;
+  }
+
+  // Non-power of two alignment.  This is not widely supported by assemblers.
+  // FIXME: Parameterize this based on MAI.
+  switch (ValueSize) {
+  default: llvm_unreachable("Invalid size for machine code value!");
+  case 1: OS << ".balign";  break;
+  case 2: OS << ".balignw"; break;
+  case 4: OS << ".balignl"; break;
+  case 8: llvm_unreachable("Unsupported alignment size!");
+  }
+
+  OS << ' ' << ByteAlignment;
+  OS << ", " << truncateToSize(Value, ValueSize);
+  if (MaxBytesToEmit)
+    OS << ", " << MaxBytesToEmit;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                      unsigned MaxBytesToEmit) {
+  // Emit with a text fill value.
+  EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(),
+                       1, MaxBytesToEmit);
+}
+
+void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                      unsigned char Value) {
+  // FIXME: Verify that Offset is associated with the current section.
+  OS << ".org " << *Offset << ", " << (unsigned) Value;
+  EmitEOL();
+}
+
+
+void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
+  assert(MAI.hasSingleParameterDotFile());
+  OS << "\t.file\t";
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+}
+
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+  if (UseLoc) {
+    OS << "\t.file\t" << FileNo << ' ';
+    PrintQuotedString(Filename, OS);
+    EmitEOL();
+  }
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                          unsigned Column, unsigned Flags,
+                                          unsigned Isa,
+                                          unsigned Discriminator,
+                                          StringRef FileName) {
+  this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+                                          Isa, Discriminator, FileName);
+  if (!UseLoc)
+    return;
+
+  OS << "\t.loc\t" << FileNo << " " << Line << " " << Column;
+  if (Flags & DWARF2_FLAG_BASIC_BLOCK)
+    OS << " basic_block";
+  if (Flags & DWARF2_FLAG_PROLOGUE_END)
+    OS << " prologue_end";
+  if (Flags & DWARF2_FLAG_EPILOGUE_BEGIN)
+    OS << " epilogue_begin";
+
+  unsigned OldFlags = getContext().getCurrentDwarfLoc().getFlags();
+  if ((Flags & DWARF2_FLAG_IS_STMT) != (OldFlags & DWARF2_FLAG_IS_STMT)) {
+    OS << " is_stmt ";
+
+    if (Flags & DWARF2_FLAG_IS_STMT)
+      OS << "1";
+    else
+      OS << "0";
+  }
+
+  if (Isa)
+    OS << "isa " << Isa;
+  if (Discriminator)
+    OS << "discriminator " << Discriminator;
+
+  if (IsVerboseAsm) {
+    OS.PadToColumn(MAI.getCommentColumn());
+    OS << MAI.getCommentString() << ' ' << FileName << ':' 
+       << Line << ':' << Column;
+  }
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
+  MCStreamer::EmitCFISections(EH, Debug);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_sections ";
+  if (EH) {
+    OS << ".eh_frame";
+    if (Debug)
+      OS << ", .debug_frame";
+  } else if (Debug) {
+    OS << ".debug_frame";
+  }
+
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIStartProc() {
+  MCStreamer::EmitCFIStartProc();
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_startproc";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIEndProc() {
+  MCStreamer::EmitCFIEndProc();
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_endproc";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegisterName(int64_t Register) {
+  if (InstPrinter && !MAI.useDwarfRegNumForCFI()) {
+    const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
+    unsigned LLVMRegister = TAI.getLLVMRegNum(Register, true);
+    InstPrinter->printRegName(OS, LLVMRegister);
+  } else {
+    OS << Register;
+  }
+}
+
+void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+  MCStreamer::EmitCFIDefCfa(Register, Offset);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_def_cfa ";
+  EmitRegisterName(Register);
+  OS << ", " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+  MCStreamer::EmitCFIDefCfaOffset(Offset);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_def_cfa_offset " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+  MCStreamer::EmitCFIDefCfaRegister(Register);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_def_cfa_register ";
+  EmitRegisterName(Register);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+  this->MCStreamer::EmitCFIOffset(Register, Offset);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_offset ";
+  EmitRegisterName(Register);
+  OS << ", " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+                                       unsigned Encoding) {
+  MCStreamer::EmitCFIPersonality(Sym, Encoding);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+  MCStreamer::EmitCFILsda(Sym, Encoding);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRememberState() {
+  MCStreamer::EmitCFIRememberState();
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_remember_state";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRestoreState() {
+  MCStreamer::EmitCFIRestoreState();
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_restore_state";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISameValue(int64_t Register) {
+  MCStreamer::EmitCFISameValue(Register);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_same_value ";
+  EmitRegisterName(Register);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+  MCStreamer::EmitCFIRelOffset(Register, Offset);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_rel_offset ";
+  EmitRegisterName(Register);
+  OS << ", " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+  MCStreamer::EmitCFIAdjustCfaOffset(Adjustment);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_adjust_cfa_offset " << Adjustment;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
+  MCStreamer::EmitWin64EHStartProc(Symbol);
+
+  OS << ".seh_proc " << *Symbol;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndProc() {
+  MCStreamer::EmitWin64EHEndProc();
+
+  OS << "\t.seh_endproc";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHStartChained() {
+  MCStreamer::EmitWin64EHStartChained();
+
+  OS << "\t.seh_startchained";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndChained() {
+  MCStreamer::EmitWin64EHEndChained();
+
+  OS << "\t.seh_endchained";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+                                       bool Except) {
+  MCStreamer::EmitWin64EHHandler(Sym, Unwind, Except);
+
+  OS << "\t.seh_handler " << *Sym;
+  if (Unwind)
+    OS << ", @unwind";
+  if (Except)
+    OS << ", @except";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHHandlerData() {
+  MCStreamer::EmitWin64EHHandlerData();
+
+  // Switch sections. Don't call SwitchSection directly, because that will
+  // cause the section switch to be visible in the emitted assembly.
+  // We only do this so the section switch that terminates the handler
+  // data block is visible.
+  MCWin64EHUnwindInfo *CurFrame = getCurrentW64UnwindInfo();
+  StringRef suffix=MCWin64EHUnwindEmitter::GetSectionSuffix(CurFrame->Function);
+  const MCSection *xdataSect =
+    getContext().getTargetAsmInfo().getWin64EHTableSection(suffix);
+  if (xdataSect)
+    SwitchSectionNoChange(xdataSect);
+
+  OS << "\t.seh_handlerdata";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) {
+  MCStreamer::EmitWin64EHPushReg(Register);
+
+  OS << "\t.seh_pushreg " << Register;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
+  MCStreamer::EmitWin64EHSetFrame(Register, Offset);
+
+  OS << "\t.seh_setframe " << Register << ", " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHAllocStack(unsigned Size) {
+  MCStreamer::EmitWin64EHAllocStack(Size);
+
+  OS << "\t.seh_stackalloc " << Size;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
+  MCStreamer::EmitWin64EHSaveReg(Register, Offset);
+
+  OS << "\t.seh_savereg " << Register << ", " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
+  MCStreamer::EmitWin64EHSaveXMM(Register, Offset);
+
+  OS << "\t.seh_savexmm " << Register << ", " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHPushFrame(bool Code) {
+  MCStreamer::EmitWin64EHPushFrame(Code);
+
+  OS << "\t.seh_pushframe";
+  if (Code)
+    OS << " @code";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitWin64EHEndProlog(void) {
+  MCStreamer::EmitWin64EHEndProlog();
+
+  OS << "\t.seh_endprologue";
+  EmitEOL();
+}
+
+void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
+  raw_ostream &OS = GetCommentOS();
+  SmallString<256> Code;
+  SmallVector<MCFixup, 4> Fixups;
+  raw_svector_ostream VecOS(Code);
+  Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // If we are showing fixups, create symbolic markers in the encoded
+  // representation. We do this by making a per-bit map to the fixup item index,
+  // then trying to display it as nicely as possible.
+  SmallVector<uint8_t, 64> FixupMap;
+  FixupMap.resize(Code.size() * 8);
+  for (unsigned i = 0, e = Code.size() * 8; i != e; ++i)
+    FixupMap[i] = 0;
+
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+    for (unsigned j = 0; j != Info.TargetSize; ++j) {
+      unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
+      assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
+      FixupMap[Index] = 1 + i;
+    }
+  }
+
+  // FIXME: Note the fixup comments for Thumb2 are completely bogus since the
+  // high order halfword of a 32-bit Thumb2 instruction is emitted first.
+  OS << "encoding: [";
+  for (unsigned i = 0, e = Code.size(); i != e; ++i) {
+    if (i)
+      OS << ',';
+
+    // See if all bits are the same map entry.
+    uint8_t MapEntry = FixupMap[i * 8 + 0];
+    for (unsigned j = 1; j != 8; ++j) {
+      if (FixupMap[i * 8 + j] == MapEntry)
+        continue;
+
+      MapEntry = uint8_t(~0U);
+      break;
+    }
+
+    if (MapEntry != uint8_t(~0U)) {
+      if (MapEntry == 0) {
+        OS << format("0x%02x", uint8_t(Code[i]));
+      } else {
+        if (Code[i]) {
+          // FIXME: Some of the 8 bits require fix up.
+          OS << format("0x%02x", uint8_t(Code[i])) << '\''
+             << char('A' + MapEntry - 1) << '\'';
+        } else
+          OS << char('A' + MapEntry - 1);
+      }
+    } else {
+      // Otherwise, write out in binary.
+      OS << "0b";
+      for (unsigned j = 8; j--;) {
+        unsigned Bit = (Code[i] >> j) & 1;
+
+        unsigned FixupBit;
+        if (getContext().getAsmInfo().isLittleEndian())
+          FixupBit = i * 8 + j;
+        else
+          FixupBit = i * 8 + (7-j);
+
+        if (uint8_t MapEntry = FixupMap[FixupBit]) {
+          assert(Bit == 0 && "Encoder wrote into fixed up bit!");
+          OS << char('A' + MapEntry - 1);
+        } else
+          OS << Bit;
+      }
+    }
+  }
+  OS << "]\n";
+
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+    OS << "  fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
+       << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
+  }
+}
+
+void MCAsmStreamer::EmitFnStart() {
+  OS << "\t.fnstart";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitFnEnd() {
+  OS << "\t.fnend";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCantUnwind() {
+  OS << "\t.cantunwind";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitHandlerData() {
+  OS << "\t.handlerdata";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) {
+  OS << "\t.personality " << Personality->getName();
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+  OS << "\t.setfp\t";
+  InstPrinter->printRegName(OS, FpReg);
+  OS << ", ";
+  InstPrinter->printRegName(OS, SpReg);
+  if (Offset)
+    OS << ", #" << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitPad(int64_t Offset) {
+  OS << "\t.pad\t#" << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                                bool isVector) {
+  assert(RegList.size() && "RegList should not be empty");
+  if (isVector)
+    OS << "\t.vsave\t{";
+  else
+    OS << "\t.save\t{";
+
+  InstPrinter->printRegName(OS, RegList[0]);
+
+  for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
+    OS << ", ";
+    InstPrinter->printRegName(OS, RegList[i]);
+  }
+
+  OS << "}";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+  // Show the encoding in a comment if we have a code emitter.
+  if (Emitter)
+    AddEncodingComment(Inst);
+
+  // Show the MCInst if enabled.
+  if (ShowInst) {
+    Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+    GetCommentOS() << "\n";
+  }
+
+  // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+  if (InstPrinter)
+    InstPrinter->printInst(&Inst, OS);
+  else
+    Inst.print(OS, &MAI);
+  EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file.  This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCAsmStreamer::EmitRawText(StringRef String) {
+  if (!String.empty() && String.back() == '\n')
+    String = String.substr(0, String.size()-1);
+  OS << String;
+  EmitEOL();
+}
+
+void MCAsmStreamer::Finish() {
+  // Dump out the dwarf file & directory tables and line tables.
+  if (getContext().hasDwarfFiles() && !UseLoc)
+    MCDwarfFileTable::Emit(this);
+
+  if (!UseCFI)
+    EmitFrames(false);
+}
+MCStreamer *llvm::createAsmStreamer(MCContext &Context,
+                                    formatted_raw_ostream &OS,
+                                    bool isVerboseAsm, bool useLoc,
+                                    bool useCFI, MCInstPrinter *IP,
+                                    MCCodeEmitter *CE, TargetAsmBackend *TAB,
+                                    bool ShowInst) {
+  return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
+                           IP, CE, TAB, ShowInst);
+}
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
new file mode 100644
index 000000000000..527a63caaee5
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -0,0 +1,977 @@
+//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "assembler"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
+STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(FragmentLayouts, "Number of fragment layouts");
+STATISTIC(ObjectBytes, "Number of emitted object file bytes");
+STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
+STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
+}
+}
+
+// FIXME FIXME FIXME: There are number of places in this file where we convert
+// what is a 64-bit assembler value used for computation into a value in the
+// object file, which may truncate it. We should detect that truncation where
+// invalid and report errors back.
+
+/* *** */
+
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+  : Assembler(Asm), LastValidFragment()
+ {
+  // Compute the section layout order. Virtual sections must go last.
+  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+    if (!it->getSection().isVirtualSection())
+      SectionOrder.push_back(&*it);
+  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+    if (it->getSection().isVirtualSection())
+      SectionOrder.push_back(&*it);
+}
+
+bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
+  const MCSectionData &SD = *F->getParent();
+  const MCFragment *LastValid = LastValidFragment.lookup(&SD);
+  if (!LastValid)
+    return false;
+  assert(LastValid->getParent() == F->getParent());
+  return F->getLayoutOrder() <= LastValid->getLayoutOrder();
+}
+
+void MCAsmLayout::Invalidate(MCFragment *F) {
+  // If this fragment wasn't already up-to-date, we don't need to do anything.
+  if (!isFragmentUpToDate(F))
+    return;
+
+  // Otherwise, reset the last valid fragment to this fragment.
+  const MCSectionData &SD = *F->getParent();
+  LastValidFragment[&SD] = F;
+}
+
+void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+  MCSectionData &SD = *F->getParent();
+
+  MCFragment *Cur = LastValidFragment[&SD];
+  if (!Cur)
+    Cur = &*SD.begin();
+  else
+    Cur = Cur->getNextNode();
+
+  // Advance the layout position until the fragment is up-to-date.
+  while (!isFragmentUpToDate(F)) {
+    const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
+    Cur = Cur->getNextNode();
+  }
+}
+
+uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+  EnsureValid(F);
+  assert(F->Offset != ~UINT64_C(0) && "Address not set!");
+  return F->Offset;
+}
+
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+  const MCSymbol &S = SD->getSymbol();
+
+  // If this is a variable, then recursively evaluate now.
+  if (S.isVariable()) {
+    MCValue Target;
+    if (!S.getVariableValue()->EvaluateAsRelocatable(Target, *this))
+      report_fatal_error("unable to evaluate offset for variable '" +
+                         S.getName() + "'");
+
+    // Verify that any used symbols are defined.
+    if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymA()->getSymbol().getName() + "'");
+    if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymB()->getSymbol().getName() + "'");
+      
+    uint64_t Offset = Target.getConstant();
+    if (Target.getSymA())
+      Offset += getSymbolOffset(&Assembler.getSymbolData(
+                                  Target.getSymA()->getSymbol()));
+    if (Target.getSymB())
+      Offset -= getSymbolOffset(&Assembler.getSymbolData(
+                                  Target.getSymB()->getSymbol()));
+    return Offset;
+  }
+
+  assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
+  return getFragmentOffset(SD->getFragment()) + SD->getOffset();
+}
+
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
+  // The size is the last fragment's end offset.
+  const MCFragment &F = SD->getFragmentList().back();
+  return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
+}
+
+uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
+  // Virtual sections have no file size.
+  if (SD->getSection().isVirtualSection())
+    return 0;
+
+  // Otherwise, the file size is the same as the address space size.
+  return getSectionAddressSize(SD);
+}
+
+/* *** */
+
+MCFragment::MCFragment() : Kind(FragmentType(~0)) {
+}
+
+MCFragment::~MCFragment() {
+}
+
+MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
+  : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
+{
+  if (Parent)
+    Parent->getFragmentList().push_back(this);
+}
+
+/* *** */
+
+MCSectionData::MCSectionData() : Section(0) {}
+
+MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
+  : Section(&_Section),
+    Ordinal(~UINT32_C(0)),
+    Alignment(1),
+    HasInstructions(false)
+{
+  if (A)
+    A->getSectionList().push_back(this);
+}
+
+/* *** */
+
+MCSymbolData::MCSymbolData() : Symbol(0) {}
+
+MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
+                           uint64_t _Offset, MCAssembler *A)
+  : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
+    IsExternal(false), IsPrivateExtern(false),
+    CommonSize(0), SymbolSize(0), CommonAlign(0),
+    Flags(0), Index(0)
+{
+  if (A)
+    A->getSymbolList().push_back(this);
+}
+
+/* *** */
+
+MCAssembler::MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+                         MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+                         raw_ostream &OS_)
+  : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
+    OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false)
+{
+}
+
+MCAssembler::~MCAssembler() {
+}
+
+bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
+  // Non-temporary labels should always be visible to the linker.
+  if (!Symbol.isTemporary())
+    return true;
+
+  // Absolute temporary labels are never visible.
+  if (!Symbol.isInSection())
+    return false;
+
+  // Otherwise, check if the section requires symbols even for temporary labels.
+  return getBackend().doesSectionRequireSymbols(Symbol.getSection());
+}
+
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+  // Linker visible symbols define atoms.
+  if (isSymbolLinkerVisible(SD->getSymbol()))
+    return SD;
+
+  // Absolute and undefined symbols have no defining atom.
+  if (!SD->getFragment())
+    return 0;
+
+  // Non-linker visible symbols in sections which can't be atomized have no
+  // defining atom.
+  if (!getBackend().isSectionAtomizable(
+        SD->getFragment()->getParent()->getSection()))
+    return 0;
+
+  // Otherwise, return the atom for the containing fragment.
+  return SD->getFragment()->getAtom();
+}
+
+bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+                                const MCFixup &Fixup, const MCFragment *DF,
+                                MCValue &Target, uint64_t &Value) const {
+  ++stats::EvaluateFixup;
+
+  if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
+    report_fatal_error("expected relocatable expression");
+
+  bool IsPCRel = Backend.getFixupKindInfo(
+    Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
+
+  bool IsResolved;
+  if (IsPCRel) {
+    if (Target.getSymB()) {
+      IsResolved = false;
+    } else if (!Target.getSymA()) {
+      IsResolved = false;
+    } else {
+      const MCSymbolRefExpr *A = Target.getSymA();
+      const MCSymbol &SA = A->getSymbol();
+      if (A->getKind() != MCSymbolRefExpr::VK_None ||
+          SA.AliasedSymbol().isUndefined()) {
+        IsResolved = false;
+      } else {
+        const MCSymbolData &DataA = getSymbolData(SA);
+        IsResolved =
+          getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
+                                                             *DF, false, true);
+      }
+    }
+  } else {
+    IsResolved = Target.isAbsolute();
+  }
+
+  Value = Target.getConstant();
+
+  bool IsThumb = false;
+  if (const MCSymbolRefExpr *A = Target.getSymA()) {
+    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    if (Sym.isDefined())
+      Value += Layout.getSymbolOffset(&getSymbolData(Sym));
+    if (isThumbFunc(&Sym))
+      IsThumb = true;
+  }
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+    if (Sym.isDefined())
+      Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
+  }
+
+
+  bool ShouldAlignPC = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+                         MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
+  assert((ShouldAlignPC ? IsPCRel : true) &&
+    "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
+
+  if (IsPCRel) {
+    uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
+    
+    // A number of ARM fixups in Thumb mode require that the effective PC
+    // address be determined as the 32-bit aligned version of the actual offset.
+    if (ShouldAlignPC) Offset &= ~0x3;
+    Value -= Offset;
+  }
+
+  // ARM fixups based from a thumb function address need to have the low
+  // bit set. The actual value is always at least 16-bit aligned, so the
+  // low bit is normally clear and available for use as an ISA flag for
+  // interworking.
+  if (IsThumb)
+    Value |= 1;
+
+  return IsResolved;
+}
+
+uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+                                          const MCFragment &F) const {
+  switch (F.getKind()) {
+  case MCFragment::FT_Data:
+    return cast<MCDataFragment>(F).getContents().size();
+  case MCFragment::FT_Fill:
+    return cast<MCFillFragment>(F).getSize();
+  case MCFragment::FT_Inst:
+    return cast<MCInstFragment>(F).getInstSize();
+
+  case MCFragment::FT_LEB:
+    return cast<MCLEBFragment>(F).getContents().size();
+
+  case MCFragment::FT_Align: {
+    const MCAlignFragment &AF = cast<MCAlignFragment>(F);
+    unsigned Offset = Layout.getFragmentOffset(&AF);
+    unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
+    if (Size > AF.getMaxBytesToEmit())
+      return 0;
+    return Size;
+  }
+
+  case MCFragment::FT_Org: {
+    MCOrgFragment &OF = cast<MCOrgFragment>(F);
+    int64_t TargetLocation;
+    if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
+      report_fatal_error("expected assembly-time absolute expression");
+
+    // FIXME: We need a way to communicate this error.
+    uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+    int64_t Size = TargetLocation - FragmentOffset;
+    if (Size < 0 || Size >= 0x40000000)
+      report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
+                         "' (at offset '" + Twine(FragmentOffset) + "')");
+    return Size;
+  }
+
+  case MCFragment::FT_Dwarf:
+    return cast<MCDwarfLineAddrFragment>(F).getContents().size();
+  case MCFragment::FT_DwarfFrame:
+    return cast<MCDwarfCallFrameFragment>(F).getContents().size();
+  }
+
+  assert(0 && "invalid fragment kind");
+  return 0;
+}
+
+void MCAsmLayout::LayoutFragment(MCFragment *F) {
+  MCFragment *Prev = F->getPrevNode();
+
+  // We should never try to recompute something which is up-to-date.
+  assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
+  // We should never try to compute the fragment layout if it's predecessor
+  // isn't up-to-date.
+  assert((!Prev || isFragmentUpToDate(Prev)) &&
+         "Attempt to compute fragment before it's predecessor!");
+
+  ++stats::FragmentLayouts;
+
+  // Compute fragment offset and size.
+  uint64_t Offset = 0;
+  if (Prev)
+    Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
+
+  F->Offset = Offset;
+  LastValidFragment[F->getParent()] = F;
+}
+
+/// WriteFragmentData - Write the \arg F data to the output file.
+static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                              const MCFragment &F) {
+  MCObjectWriter *OW = &Asm.getWriter();
+  uint64_t Start = OW->getStream().tell();
+  (void) Start;
+
+  ++stats::EmittedFragments;
+
+  // FIXME: Embed in fragments instead?
+  uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
+  switch (F.getKind()) {
+  case MCFragment::FT_Align: {
+    MCAlignFragment &AF = cast<MCAlignFragment>(F);
+    uint64_t Count = FragmentSize / AF.getValueSize();
+
+    assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+    // FIXME: This error shouldn't actually occur (the front end should emit
+    // multiple .align directives to enforce the semantics it wants), but is
+    // severe enough that we want to report it. How to handle this?
+    if (Count * AF.getValueSize() != FragmentSize)
+      report_fatal_error("undefined .align directive, value size '" +
+                        Twine(AF.getValueSize()) +
+                        "' is not a divisor of padding size '" +
+                        Twine(FragmentSize) + "'");
+
+    // See if we are aligning with nops, and if so do that first to try to fill
+    // the Count bytes.  Then if that did not fill any bytes or there are any
+    // bytes left to fill use the the Value and ValueSize to fill the rest.
+    // If we are aligning with nops, ask that target to emit the right data.
+    if (AF.hasEmitNops()) {
+      if (!Asm.getBackend().WriteNopData(Count, OW))
+        report_fatal_error("unable to write nop sequence of " +
+                          Twine(Count) + " bytes");
+      break;
+    }
+
+    // Otherwise, write out in multiples of the value size.
+    for (uint64_t i = 0; i != Count; ++i) {
+      switch (AF.getValueSize()) {
+      default:
+        assert(0 && "Invalid size!");
+      case 1: OW->Write8 (uint8_t (AF.getValue())); break;
+      case 2: OW->Write16(uint16_t(AF.getValue())); break;
+      case 4: OW->Write32(uint32_t(AF.getValue())); break;
+      case 8: OW->Write64(uint64_t(AF.getValue())); break;
+      }
+    }
+    break;
+  }
+
+  case MCFragment::FT_Data: {
+    MCDataFragment &DF = cast<MCDataFragment>(F);
+    assert(FragmentSize == DF.getContents().size() && "Invalid size!");
+    OW->WriteBytes(DF.getContents().str());
+    break;
+  }
+
+  case MCFragment::FT_Fill: {
+    MCFillFragment &FF = cast<MCFillFragment>(F);
+
+    assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+    for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
+      switch (FF.getValueSize()) {
+      default:
+        assert(0 && "Invalid size!");
+      case 1: OW->Write8 (uint8_t (FF.getValue())); break;
+      case 2: OW->Write16(uint16_t(FF.getValue())); break;
+      case 4: OW->Write32(uint32_t(FF.getValue())); break;
+      case 8: OW->Write64(uint64_t(FF.getValue())); break;
+      }
+    }
+    break;
+  }
+
+  case MCFragment::FT_Inst: {
+    MCInstFragment &IF = cast<MCInstFragment>(F);
+    OW->WriteBytes(StringRef(IF.getCode().begin(), IF.getCode().size()));
+    break;
+  }
+
+  case MCFragment::FT_LEB: {
+    MCLEBFragment &LF = cast<MCLEBFragment>(F);
+    OW->WriteBytes(LF.getContents().str());
+    break;
+  }
+
+  case MCFragment::FT_Org: {
+    MCOrgFragment &OF = cast<MCOrgFragment>(F);
+
+    for (uint64_t i = 0, e = FragmentSize; i != e; ++i)
+      OW->Write8(uint8_t(OF.getValue()));
+
+    break;
+  }
+
+  case MCFragment::FT_Dwarf: {
+    const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
+    OW->WriteBytes(OF.getContents().str());
+    break;
+  }
+  case MCFragment::FT_DwarfFrame: {
+    const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
+    OW->WriteBytes(CF.getContents().str());
+    break;
+  }
+  }
+
+  assert(OW->getStream().tell() - Start == FragmentSize);
+}
+
+void MCAssembler::WriteSectionData(const MCSectionData *SD,
+                                   const MCAsmLayout &Layout) const {
+  // Ignore virtual sections.
+  if (SD->getSection().isVirtualSection()) {
+    assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
+
+    // Check that contents are only things legal inside a virtual section.
+    for (MCSectionData::const_iterator it = SD->begin(),
+           ie = SD->end(); it != ie; ++it) {
+      switch (it->getKind()) {
+      default:
+        assert(0 && "Invalid fragment in virtual section!");
+      case MCFragment::FT_Data: {
+        // Check that we aren't trying to write a non-zero contents (or fixups)
+        // into a virtual section. This is to support clients which use standard
+        // directives to fill the contents of virtual sections.
+        MCDataFragment &DF = cast<MCDataFragment>(*it);
+        assert(DF.fixup_begin() == DF.fixup_end() &&
+               "Cannot have fixups in virtual section!");
+        for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
+          assert(DF.getContents()[i] == 0 &&
+                 "Invalid data value for virtual section!");
+        break;
+      }
+      case MCFragment::FT_Align:
+        // Check that we aren't trying to write a non-zero value into a virtual
+        // section.
+        assert((!cast<MCAlignFragment>(it)->getValueSize() ||
+                !cast<MCAlignFragment>(it)->getValue()) &&
+               "Invalid align in virtual section!");
+        break;
+      case MCFragment::FT_Fill:
+        assert(!cast<MCFillFragment>(it)->getValueSize() &&
+               "Invalid fill in virtual section!");
+        break;
+      }
+    }
+
+    return;
+  }
+
+  uint64_t Start = getWriter().getStream().tell();
+  (void) Start;
+
+  for (MCSectionData::const_iterator it = SD->begin(),
+         ie = SD->end(); it != ie; ++it)
+    WriteFragmentData(*this, Layout, *it);
+
+  assert(getWriter().getStream().tell() - Start ==
+         Layout.getSectionAddressSize(SD));
+}
+
+
+uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+                                  MCFragment &F,
+                                  const MCFixup &Fixup) {
+   // Evaluate the fixup.
+   MCValue Target;
+   uint64_t FixedValue;
+   if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+     // The fixup was unresolved, we need a relocation. Inform the object
+     // writer of the relocation, and give it an opportunity to adjust the
+     // fixup value if need be.
+     getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, FixedValue);
+   }
+   return FixedValue;
+ }
+
+void MCAssembler::Finish() {
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - pre-layout\n--\n";
+      dump(); });
+
+  // Create the layout object.
+  MCAsmLayout Layout(*this);
+
+  // Create dummy fragments and assign section ordinals.
+  unsigned SectionIndex = 0;
+  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+    // Create dummy fragments to eliminate any empty sections, this simplifies
+    // layout.
+    if (it->getFragmentList().empty())
+      new MCDataFragment(it);
+
+    it->setOrdinal(SectionIndex++);
+  }
+
+  // Assign layout order indices to sections and fragments.
+  for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
+    MCSectionData *SD = Layout.getSectionOrder()[i];
+    SD->setLayoutOrder(i);
+
+    unsigned FragmentIndex = 0;
+    for (MCSectionData::iterator it2 = SD->begin(),
+           ie2 = SD->end(); it2 != ie2; ++it2)
+      it2->setLayoutOrder(FragmentIndex++);
+  }
+
+  // Layout until everything fits.
+  while (LayoutOnce(Layout))
+    continue;
+
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - post-relaxation\n--\n";
+      dump(); });
+
+  // Finalize the layout, including fragment lowering.
+  FinishLayout(Layout);
+
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - final-layout\n--\n";
+      dump(); });
+
+  uint64_t StartOffset = OS.tell();
+
+  // Allow the object writer a chance to perform post-layout binding (for
+  // example, to set the index fields in the symbol data).
+  getWriter().ExecutePostLayoutBinding(*this, Layout);
+
+  // Evaluate and apply the fixups, generating relocation entries as necessary.
+  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+    for (MCSectionData::iterator it2 = it->begin(),
+           ie2 = it->end(); it2 != ie2; ++it2) {
+      MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
+      if (DF) {
+        for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+               ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+          MCFixup &Fixup = *it3;
+          uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
+          getBackend().ApplyFixup(Fixup, DF->getContents().data(),
+                                  DF->getContents().size(), FixedValue);
+        }
+      }
+      MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
+      if (IF) {
+        for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
+               ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
+          MCFixup &Fixup = *it3;
+          uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
+          getBackend().ApplyFixup(Fixup, IF->getCode().data(),
+                                  IF->getCode().size(), FixedValue);
+        }
+      }
+    }
+  }
+
+  // Write the object file.
+  getWriter().WriteObject(*this, Layout);
+
+  stats::ObjectBytes += OS.tell() - StartOffset;
+}
+
+bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
+                                       const MCFragment *DF,
+                                       const MCAsmLayout &Layout) const {
+  if (getRelaxAll())
+    return true;
+
+  // If we cannot resolve the fixup value, it requires relaxation.
+  MCValue Target;
+  uint64_t Value;
+  if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+    return true;
+
+  // Otherwise, relax if the value is too big for a (signed) i8.
+  //
+  // FIXME: This is target dependent!
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+                                          const MCAsmLayout &Layout) const {
+  // If this inst doesn't ever need relaxation, ignore it. This occurs when we
+  // are intentionally pushing out inst fragments, or because we relaxed a
+  // previous instruction to one that doesn't need relaxation.
+  if (!getBackend().MayNeedRelaxation(IF->getInst()))
+    return false;
+
+  for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
+         ie = IF->fixup_end(); it != ie; ++it)
+    if (FixupNeedsRelaxation(*it, IF, Layout))
+      return true;
+
+  return false;
+}
+
+bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+                                   MCInstFragment &IF) {
+  if (!FragmentNeedsRelaxation(&IF, Layout))
+    return false;
+
+  ++stats::RelaxedInstructions;
+
+  // FIXME-PERF: We could immediately lower out instructions if we can tell
+  // they are fully resolved, to avoid retesting on later passes.
+
+  // Relax the fragment.
+
+  MCInst Relaxed;
+  getBackend().RelaxInstruction(IF.getInst(), Relaxed);
+
+  // Encode the new instruction.
+  //
+  // FIXME-PERF: If it matters, we could let the target do this. It can
+  // probably do so more efficiently in many cases.
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
+  VecOS.flush();
+
+  // Update the instruction fragment.
+  IF.setInst(Relaxed);
+  IF.getCode() = Code;
+  IF.getFixups().clear();
+  // FIXME: Eliminate copy.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+    IF.getFixups().push_back(Fixups[i]);
+
+  return true;
+}
+
+bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+  int64_t Value = 0;
+  uint64_t OldSize = LF.getContents().size();
+  bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
+  SmallString<8> &Data = LF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  if (LF.isSigned())
+    MCObjectWriter::EncodeSLEB128(Value, OSE);
+  else
+    MCObjectWriter::EncodeULEB128(Value, OSE);
+  OSE.flush();
+  return OldSize != LF.getContents().size();
+}
+
+bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
+				     MCDwarfLineAddrFragment &DF) {
+  int64_t AddrDelta = 0;
+  uint64_t OldSize = DF.getContents().size();
+  bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
+  int64_t LineDelta;
+  LineDelta = DF.getLineDelta();
+  SmallString<8> &Data = DF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OSE);
+  OSE.flush();
+  return OldSize != Data.size();
+}
+
+bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+                                              MCDwarfCallFrameFragment &DF) {
+  int64_t AddrDelta = 0;
+  uint64_t OldSize = DF.getContents().size();
+  bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
+  SmallString<8> &Data = DF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE);
+  OSE.flush();
+  return OldSize != Data.size();
+}
+
+bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+                                    MCSectionData &SD) {
+  MCFragment *FirstInvalidFragment = NULL;
+  // Scan for fragments that need relaxation.
+  for (MCSectionData::iterator it2 = SD.begin(),
+         ie2 = SD.end(); it2 != ie2; ++it2) {
+    // Check if this is an fragment that needs relaxation.
+    bool relaxedFrag = false;
+    switch(it2->getKind()) {
+    default:
+          break;
+    case MCFragment::FT_Inst:
+      relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+      break;
+    case MCFragment::FT_Dwarf:
+      relaxedFrag = RelaxDwarfLineAddr(Layout,
+                                       *cast<MCDwarfLineAddrFragment>(it2));
+      break;
+    case MCFragment::FT_DwarfFrame:
+      relaxedFrag =
+        RelaxDwarfCallFrameFragment(Layout,
+                                    *cast<MCDwarfCallFrameFragment>(it2));
+      break;
+    case MCFragment::FT_LEB:
+      relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+      break;
+    }
+    // Update the layout, and remember that we relaxed.
+    if (relaxedFrag && !FirstInvalidFragment)
+      FirstInvalidFragment = it2;
+  }
+  if (FirstInvalidFragment) {
+    Layout.Invalidate(FirstInvalidFragment);
+    return true;
+  }
+  return false;
+}
+
+bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+  ++stats::RelaxationSteps;
+
+  bool WasRelaxed = false;
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    MCSectionData &SD = *it;
+    while(LayoutSectionOnce(Layout, SD))
+      WasRelaxed = true;
+  }
+
+  return WasRelaxed;
+}
+
+void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+  // The layout is done. Mark every fragment as valid.
+  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+    Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
+  }
+}
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+  OS << "<MCFixup" << " Offset:" << AF.getOffset()
+     << " Value:" << *AF.getValue()
+     << " Kind:" << AF.getKind() << ">";
+  return OS;
+}
+
+}
+
+void MCFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<";
+  switch (getKind()) {
+  case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+  case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
+  case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
+  case MCFragment::FT_Inst:  OS << "MCInstFragment"; break;
+  case MCFragment::FT_Org:   OS << "MCOrgFragment"; break;
+  case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+  case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+  case MCFragment::FT_LEB:   OS << "MCLEBFragment"; break;
+  }
+
+  OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+     << " Offset:" << Offset << ">";
+
+  switch (getKind()) {
+  case MCFragment::FT_Align: {
+    const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+    if (AF->hasEmitNops())
+      OS << " (emit nops)";
+    OS << "\n       ";
+    OS << " Alignment:" << AF->getAlignment()
+       << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+       << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+    break;
+  }
+  case MCFragment::FT_Data:  {
+    const MCDataFragment *DF = cast<MCDataFragment>(this);
+    OS << "\n       ";
+    OS << " Contents:[";
+    const SmallVectorImpl<char> &Contents = DF->getContents();
+    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+      if (i) OS << ",";
+      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+    }
+    OS << "] (" << Contents.size() << " bytes)";
+
+    if (!DF->getFixups().empty()) {
+      OS << ",\n       ";
+      OS << " Fixups:[";
+      for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+             ie = DF->fixup_end(); it != ie; ++it) {
+        if (it != DF->fixup_begin()) OS << ",\n                ";
+        OS << *it;
+      }
+      OS << "]";
+    }
+    break;
+  }
+  case MCFragment::FT_Fill:  {
+    const MCFillFragment *FF = cast<MCFillFragment>(this);
+    OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+       << " Size:" << FF->getSize();
+    break;
+  }
+  case MCFragment::FT_Inst:  {
+    const MCInstFragment *IF = cast<MCInstFragment>(this);
+    OS << "\n       ";
+    OS << " Inst:";
+    IF->getInst().dump_pretty(OS);
+    break;
+  }
+  case MCFragment::FT_Org:  {
+    const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+    OS << "\n       ";
+    OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+    break;
+  }
+  case MCFragment::FT_Dwarf:  {
+    const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << OF->getAddrDelta()
+       << " LineDelta:" << OF->getLineDelta();
+    break;
+  }
+  case MCFragment::FT_DwarfFrame:  {
+    const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << CF->getAddrDelta();
+    break;
+  }
+  case MCFragment::FT_LEB: {
+    const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+    OS << "\n       ";
+    OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+    break;
+  }
+  }
+  OS << ">";
+}
+
+void MCSectionData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSectionData";
+  OS << " Alignment:" << getAlignment() << " Fragments:[\n      ";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n      ";
+    it->dump();
+  }
+  OS << "]>";
+}
+
+void MCSymbolData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSymbolData Symbol:" << getSymbol()
+     << " Fragment:" << getFragment() << " Offset:" << getOffset()
+     << " Flags:" << getFlags() << " Index:" << getIndex();
+  if (isCommon())
+    OS << " (common, size:" << getCommonSize()
+       << " align: " << getCommonAlignment() << ")";
+  if (isExternal())
+    OS << " (external)";
+  if (isPrivateExtern())
+    OS << " (private extern)";
+  OS << ">";
+}
+
+void MCAssembler::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCAssembler\n";
+  OS << "  Sections:[\n    ";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n    ";
+    it->dump();
+  }
+  OS << "],\n";
+  OS << "  Symbols:[";
+
+  for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+    if (it != symbol_begin()) OS << ",\n           ";
+    it->dump();
+  }
+  OS << "]>\n";
+}
diff --git a/contrib/llvm/lib/MC/MCCodeEmitter.cpp b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
new file mode 100644
index 000000000000..c122763b2fe5
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
@@ -0,0 +1,18 @@
+//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCCodeEmitter.h"
+
+using namespace llvm;
+
+MCCodeEmitter::MCCodeEmitter() {
+}
+
+MCCodeEmitter::~MCCodeEmitter() {
+}
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
new file mode 100644
index 000000000000..8faa72ecb4e8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -0,0 +1,315 @@
+//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCLabel.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+
+MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) :
+  MAI(mai), TAI(tai),
+  Allocator(), Symbols(Allocator), UsedNames(Allocator),
+  NextUniqueID(0),
+  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
+  AllowTemporaryLabels(true) {
+  MachOUniquingMap = 0;
+  ELFUniquingMap = 0;
+  COFFUniquingMap = 0;
+
+  SecureLogFile = getenv("AS_SECURE_LOG_FILE");
+  SecureLog = 0;
+  SecureLogUsed = false;
+
+  DwarfLocSeen = false;
+}
+
+MCContext::~MCContext() {
+  // NOTE: The symbols are all allocated out of a bump pointer allocator,
+  // we don't need to free them here.
+
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)MachOUniquingMap;
+  delete (ELFUniqueMapTy*)ELFUniquingMap;
+  delete (COFFUniqueMapTy*)COFFUniquingMap;
+
+  // If the stream for the .secure_log_unique directive was created free it.
+  delete (raw_ostream*)SecureLog;
+
+  delete TAI;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Manipulation
+//===----------------------------------------------------------------------===//
+
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
+  assert(!Name.empty() && "Normal symbols cannot be unnamed!");
+
+  // Do the lookup and get the entire StringMapEntry.  We want access to the
+  // key if we are creating the entry.
+  StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
+  MCSymbol *Sym = Entry.getValue();
+
+  if (Sym)
+    return Sym;
+
+  Sym = CreateSymbol(Name);
+  Entry.setValue(Sym);
+  return Sym;
+}
+
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+  // Determine whether this is an assembler temporary or normal label, if used.
+  bool isTemporary = false;
+  if (AllowTemporaryLabels)
+    isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+
+  StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
+  if (NameEntry->getValue()) {
+    assert(isTemporary && "Cannot rename non temporary symbols");
+    SmallString<128> NewName = Name;
+    do {
+      NewName.resize(Name.size());
+      raw_svector_ostream(NewName) << NextUniqueID++;
+      NameEntry = &UsedNames.GetOrCreateValue(NewName);
+    } while (NameEntry->getValue());
+  }
+  NameEntry->setValue(true);
+
+  // Ok, the entry doesn't already exist.  Have the MCSymbol object itself refer
+  // to the copy of the string that is embedded in the UsedNames entry.
+  MCSymbol *Result = new (*this) MCSymbol(NameEntry->getKey(), isTemporary);
+
+  return Result;
+}
+
+MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+  SmallString<128> NameSV;
+  Name.toVector(NameSV);
+  return GetOrCreateSymbol(NameSV.str());
+}
+
+MCSymbol *MCContext::CreateTempSymbol() {
+  SmallString<128> NameSV;
+  raw_svector_ostream(NameSV)
+    << MAI.getPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
+  return CreateSymbol(NameSV);
+}
+
+unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
+  MCLabel *&Label = Instances[LocalLabelVal];
+  if (!Label)
+    Label = new (*this) MCLabel(0);
+  return Label->incInstance();
+}
+
+unsigned MCContext::GetInstance(int64_t LocalLabelVal) {
+  MCLabel *&Label = Instances[LocalLabelVal];
+  if (!Label)
+    Label = new (*this) MCLabel(0);
+  return Label->getInstance();
+}
+
+MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) {
+  return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                           Twine(LocalLabelVal) +
+                           "\2" +
+                           Twine(NextInstance(LocalLabelVal)));
+}
+MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal,
+                                               int bORf) {
+  return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                           Twine(LocalLabelVal) +
+                           "\2" +
+                           Twine(GetInstance(LocalLabelVal) + bORf));
+}
+
+MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
+  return Symbols.lookup(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Section Management
+//===----------------------------------------------------------------------===//
+
+const MCSectionMachO *MCContext::
+getMachOSection(StringRef Segment, StringRef Section,
+                unsigned TypeAndAttributes,
+                unsigned Reserved2, SectionKind Kind) {
+
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+
+  // Create the map if it doesn't already exist.
+  if (MachOUniquingMap == 0)
+    MachOUniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
+
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+  if (Entry) return Entry;
+
+  // Otherwise, return a new section.
+  return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
+                                            Reserved2, Kind);
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind) {
+  return getELFSection(Section, Type, Flags, Kind, 0, "");
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind, unsigned EntrySize, StringRef Group) {
+  if (ELFUniquingMap == 0)
+    ELFUniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
+
+  // Possibly refine the entry size first.
+  if (!EntrySize) {
+    EntrySize = MCSectionELF::DetermineEntrySize(Kind);
+  }
+
+  MCSymbol *GroupSym = NULL;
+  if (!Group.empty())
+    GroupSym = GetOrCreateSymbol(Group);
+
+  MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
+                                                  Kind, EntrySize, GroupSym);
+  Entry.setValue(Result);
+  return Result;
+}
+
+const MCSectionELF *MCContext::CreateELFGroupSection() {
+  MCSectionELF *Result =
+    new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
+                             SectionKind::getReadOnly(), 4, NULL);
+  return Result;
+}
+
+const MCSection *MCContext::getCOFFSection(StringRef Section,
+                                           unsigned Characteristics,
+                                           int Selection,
+                                           SectionKind Kind) {
+  if (COFFUniquingMap == 0)
+    COFFUniquingMap = new COFFUniqueMapTy();
+  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
+
+  MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
+                                                    Characteristics,
+                                                    Selection, Kind);
+
+  Entry.setValue(Result);
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Management
+//===----------------------------------------------------------------------===//
+
+/// GetDwarfFile - takes a file name an number to place in the dwarf file and
+/// directory tables.  If the file number has already been allocated it is an
+/// error and zero is returned and the client reports the error, else the
+/// allocated file number is returned.  The file numbers may be in any order.
+unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
+  // TODO: a FileNumber of zero says to use the next available file number.
+  // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
+  // to not be less than one.  This needs to be change to be not less than zero.
+
+  // Make space for this FileNumber in the MCDwarfFiles vector if needed.
+  if (FileNumber >= MCDwarfFiles.size()) {
+    MCDwarfFiles.resize(FileNumber + 1);
+  } else {
+    MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+    if (ExistingFile)
+      // It is an error to use see the same number more than once.
+      return 0;
+  }
+
+  // Get the new MCDwarfFile slot for this FileNumber.
+  MCDwarfFile *&File = MCDwarfFiles[FileNumber];
+
+  // Separate the directory part from the basename of the FileName.
+  std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+
+  // Find or make a entry in the MCDwarfDirs vector for this Directory.
+  StringRef Name;
+  unsigned DirIndex;
+  // Capture directory name.
+  if (Slash.second.empty()) {
+    Name = Slash.first;
+    DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used.
+  } else {
+    StringRef Directory = Slash.first;
+    Name = Slash.second;
+    for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) {
+      if (Directory == MCDwarfDirs[DirIndex])
+        break;
+    }
+    if (DirIndex >= MCDwarfDirs.size()) {
+      char *Buf = static_cast<char *>(Allocate(Directory.size()));
+      memcpy(Buf, Directory.data(), Directory.size());
+      MCDwarfDirs.push_back(StringRef(Buf, Directory.size()));
+    }
+    // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
+    // no directories.  MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
+    // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are
+    // stored at MCDwarfFiles[FileNumber].Name .
+    DirIndex++;
+  }
+
+  // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
+  // vector.
+  char *Buf = static_cast<char *>(Allocate(Name.size()));
+  memcpy(Buf, Name.data(), Name.size());
+  File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex);
+
+  // return the allocated FileNumber.
+  return FileNumber;
+}
+
+/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
+/// currently is assigned and false otherwise.
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
+  if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
+    return false;
+
+  return MCDwarfFiles[FileNumber] != 0;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler.cpp
new file mode 100644
index 000000000000..08096906462f
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler.cpp
@@ -0,0 +1,14 @@
+//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+using namespace llvm;
+
+MCDisassembler::~MCDisassembler() {
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
new file mode 100644
index 000000000000..5480b4b12b2c
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -0,0 +1,164 @@
+//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Disassembler.h"
+#include "llvm-c/Disassembler.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmInfo.h"  // FIXME.
+#include "llvm/Target/TargetMachine.h"  // FIXME.
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Support/MemoryObject.h"
+
+namespace llvm {
+class Target;
+} // namespace llvm
+using namespace llvm;
+
+// LLVMCreateDisasm() creates a disassembler for the TripleName.  Symbolic
+// disassembly is supported by passing a block of information in the DisInfo
+// parameter and specifying the TagType and callback functions as described in
+// the header llvm-c/Disassembler.h .  The pointer to the block and the 
+// functions can all be passed as NULL.  If successful, this returns a
+// disassembler context.  If not, it returns NULL.
+//
+LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
+                                      int TagType, LLVMOpInfoCallback GetOpInfo,
+                                      LLVMSymbolLookupCallback SymbolLookUp) {
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  // FIXME: We shouldn't need to initialize the Target(Machine)s.
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllMCAsmInfos();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+  llvm::InitializeAllDisassemblers();
+
+  // Get the target.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  assert(TheTarget && "Unable to create target!");
+
+  // Get the assembler info needed to setup the MCContext.
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
+  assert(MAI && "Unable to create target asm info!");
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  std::string CPU;
+
+  // FIXME: We shouldn't need to do this (and link in codegen).
+  //        When we split this out, we should do it in a way that makes
+  //        it straightforward to switch subtargets on the fly.
+  TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU,
+                                                     FeaturesStr);
+  assert(TM && "Unable to create target machine!");
+
+  // Get the target assembler info needed to setup the context.
+  const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
+  assert(tai && "Unable to create target assembler!");
+
+  // Set up the MCContext for creating symbols and MCExpr's.
+  MCContext *Ctx = new MCContext(*MAI, tai);
+  assert(Ctx && "Unable to create MCContext!");
+
+  // Set up disassembler.
+  MCDisassembler *DisAsm = TheTarget->createMCDisassembler();
+  assert(DisAsm && "Unable to create disassembler!");
+  DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx);
+
+  // Set up the instruction printer.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
+                                                     *MAI);
+  assert(IP && "Unable to create instruction printer!");
+
+  LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
+                                                GetOpInfo, SymbolLookUp,
+                                                TheTarget, MAI, TM, tai, Ctx,
+                                                DisAsm, IP);
+  assert(DC && "Allocation failure!");
+  return DC;
+}
+
+//
+// LLVMDisasmDispose() disposes of the disassembler specified by the context.
+//
+void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
+  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  delete DC;
+}
+
+namespace {
+//
+// The memory object created by LLVMDisasmInstruction().
+//
+class DisasmMemoryObject : public MemoryObject {
+  uint8_t *Bytes;
+  uint64_t Size;
+  uint64_t BasePC;
+public:
+  DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
+                     Bytes(bytes), Size(size), BasePC(basePC) {}
+ 
+  uint64_t getBase() const { return BasePC; }
+  uint64_t getExtent() const { return Size; }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const {
+    if (Addr - BasePC >= Size)
+      return -1;
+    *Byte = Bytes[Addr - BasePC];
+    return 0;
+  }
+};
+} // end anonymous namespace
+
+//
+// LLVMDisasmInstruction() disassembles a single instruction using the
+// disassembler context specified in the parameter DC.  The bytes of the
+// instruction are specified in the parameter Bytes, and contains at least
+// BytesSize number of bytes.  The instruction is at the address specified by
+// the PC parameter.  If a valid instruction can be disassembled its string is
+// returned indirectly in OutString which whos size is specified in the
+// parameter OutStringSize.  This function returns the number of bytes in the
+// instruction or zero if there was no valid instruction.  If this function
+// returns zero the caller will have to pick how many bytes they want to step
+// over by printing a .byte, .long etc. to continue.
+//
+size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
+                             uint64_t BytesSize, uint64_t PC, char *OutString,
+                             size_t OutStringSize){
+  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject.
+  DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC);
+
+  uint64_t Size;
+  MCInst Inst;
+  const MCDisassembler *DisAsm = DC->getDisAsm();
+  MCInstPrinter *IP = DC->getIP();
+  if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls()))
+    return 0;
+
+  SmallVector<char, 64> InsnStr;
+  raw_svector_ostream OS(InsnStr);
+  IP->printInst(&Inst, OS);
+  OS.flush();
+
+  assert(OutStringSize != 0 && "Output buffer cannot be zero size");
+  size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
+  std::memcpy(OutString, InsnStr.data(), OutputSize);
+  OutString[OutputSize] = '\0'; // Terminate string.
+
+  return Size;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h
new file mode 100644
index 000000000000..f0ec42a017a4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h
@@ -0,0 +1,96 @@
+//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Disassembly library's disassembler 
+// context.  The disassembler is responsible for producing strings for
+// individual instructions according to a given architecture and disassembly
+// syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_DISASSEMBLER_H
+#define LLVM_MC_DISASSEMBLER_H
+
+#include "llvm-c/Disassembler.h"
+#include <string>
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+class TargetAsmInfo;
+class MCContext;
+class MCAsmInfo;
+class MCDisassembler;
+class MCInstPrinter; 
+class Target;
+class TargetMachine;
+
+//
+// This is the disassembler context returned by LLVMCreateDisasm().
+//
+class LLVMDisasmContext {
+private:
+  //
+  // The passed parameters when the disassembler context is created.
+  //
+  // The TripleName for this disassembler.
+  std::string TripleName;
+  // The pointer to the caller's block of symbolic information.
+  void *DisInfo;
+  // The Triple specific symbolic information type returned by GetOpInfo.
+  int TagType;
+  // The function to get the symbolic information for operands.
+  LLVMOpInfoCallback GetOpInfo;
+  // The function to look up a symbol name.
+  LLVMSymbolLookupCallback SymbolLookUp;
+  //
+  // The objects created and saved by LLVMCreateDisasm() then used by
+  // LLVMDisasmInstruction().
+  //
+  // The LLVM target corresponding to the disassembler.
+  // FIXME: using llvm::OwningPtr<const llvm::Target> causes a malloc error
+  //        when this LLVMDisasmContext is deleted.
+  const Target *TheTarget;
+  // The assembly information for the target architecture.
+  llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
+  // The target machine instance.
+  llvm::OwningPtr<llvm::TargetMachine> TM;
+  // The disassembler for the target architecture.
+  // FIXME: using llvm::OwningPtr<const llvm::TargetAsmInfo> causes a malloc
+  //        error when this LLVMDisasmContext is deleted.
+  const TargetAsmInfo *Tai;
+  // The assembly context for creating symbols and MCExprs.
+  llvm::OwningPtr<const llvm::MCContext> Ctx;
+  // The disassembler for the target architecture.
+  llvm::OwningPtr<const llvm::MCDisassembler> DisAsm;
+  // The instruction printer for the target architecture.
+  llvm::OwningPtr<llvm::MCInstPrinter> IP;
+
+public:
+  LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
+                    LLVMOpInfoCallback getOpInfo,
+                    LLVMSymbolLookupCallback symbolLookUp,
+                    const Target *theTarget, const MCAsmInfo *mAI,
+                    llvm::TargetMachine *tM, const TargetAsmInfo *tai,
+                    llvm::MCContext *ctx, const MCDisassembler *disAsm,
+                    MCInstPrinter *iP) : TripleName(tripleName),
+                    DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
+                    SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) {
+    TM.reset(tM);
+    MAI.reset(mAI);
+    Ctx.reset(ctx);
+    DisAsm.reset(disAsm);
+    IP.reset(iP);
+  }
+  const MCDisassembler *getDisAsm() const { return DisAsm.get(); }
+  MCInstPrinter *getIP() { return IP.get(); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp
new file mode 100644
index 000000000000..bdd99afe1ae4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -0,0 +1,419 @@
+//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's  disassembler class.
+// The disassembler is responsible for vending individual instructions according
+// to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelect.h"
+using namespace llvm;
+
+bool EDDisassembler::sInitialized = false;
+EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
+
+struct TripleMap {
+  Triple::ArchType Arch;
+  const char *String;
+};
+
+static struct TripleMap triplemap[] = {
+  { Triple::x86,          "i386-unknown-unknown"    },
+  { Triple::x86_64,       "x86_64-unknown-unknown"  },
+  { Triple::arm,          "arm-unknown-unknown"     },
+  { Triple::thumb,        "thumb-unknown-unknown"   },
+  { Triple::InvalidArch,  NULL,                     }
+};
+
+/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
+///   or NULL if there is an error
+///
+/// @arg arch - The Triple::ArchType for the desired architecture
+static const char *tripleFromArch(Triple::ArchType arch) {
+  unsigned int infoIndex;
+  
+  for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
+    if (arch == triplemap[infoIndex].Arch)
+      return triplemap[infoIndex].String;
+  }
+  
+  return NULL;
+}
+
+/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
+///   for the desired assembly syntax, suitable for passing to 
+///   Target::createMCInstPrinter()
+///
+/// @arg arch   - The target architecture
+/// @arg syntax - The assembly syntax in sd form
+static int getLLVMSyntaxVariant(Triple::ArchType arch,
+                                EDDisassembler::AssemblySyntax syntax) {
+  switch (syntax) {
+  default:
+    return -1;
+  // Mappings below from X86AsmPrinter.cpp
+  case EDDisassembler::kEDAssemblySyntaxX86ATT:
+    if (arch == Triple::x86 || arch == Triple::x86_64)
+      return 0;
+    else
+      return -1;
+  case EDDisassembler::kEDAssemblySyntaxX86Intel:
+    if (arch == Triple::x86 || arch == Triple::x86_64)
+      return 1;
+    else
+      return -1;
+  case EDDisassembler::kEDAssemblySyntaxARMUAL:
+    if (arch == Triple::arm || arch == Triple::thumb)
+      return 0;
+    else
+      return -1;
+  }
+}
+
+void EDDisassembler::initialize() {
+  if (sInitialized)
+    return;
+  
+  sInitialized = true;
+  
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllMCAsmInfos();
+  InitializeAllAsmPrinters();
+  InitializeAllAsmParsers();
+  InitializeAllDisassemblers();
+}
+
+#undef BRINGUP_TARGET
+
+EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
+                                                AssemblySyntax syntax) {
+  CPUKey key;
+  key.Arch = arch;
+  key.Syntax = syntax;
+  
+  EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
+  
+  if (i != sDisassemblers.end()) {
+    return i->second;
+  } else {
+    EDDisassembler* sdd = new EDDisassembler(key);
+    if (!sdd->valid()) {
+      delete sdd;
+      return NULL;
+    }
+    
+    sDisassemblers[key] = sdd;
+    
+    return sdd;
+  }
+  
+  return NULL;
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+                                                AssemblySyntax syntax) {
+  return getDisassembler(Triple(str).getArch(), syntax);
+}
+
+EDDisassembler::EDDisassembler(CPUKey &key) : 
+  Valid(false), 
+  HasSemantics(false), 
+  ErrorStream(nulls()), 
+  Key(key) {
+  const char *triple = tripleFromArch(key.Arch);
+    
+  if (!triple)
+    return;
+  
+  LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+  
+  if (LLVMSyntaxVariant < 0)
+    return;
+  
+  std::string tripleString(triple);
+  std::string errorString;
+  
+  Tgt = TargetRegistry::lookupTarget(tripleString, 
+                                     errorString);
+  
+  if (!Tgt)
+    return;
+  
+  std::string CPU;
+  std::string featureString;
+  TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
+                                               featureString));
+
+  const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
+  
+  if (!registerInfo)
+    return;
+    
+  initMaps(*registerInfo);
+  
+  AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
+  
+  if (!AsmInfo)
+    return;
+
+  Disassembler.reset(Tgt->createMCDisassembler());
+  
+  if (!Disassembler)
+    return;
+    
+  InstInfos = Disassembler->getEDInfo();
+  
+  InstString.reset(new std::string);
+  InstStream.reset(new raw_string_ostream(*InstString));
+  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+  
+  if (!InstPrinter)
+    return;
+    
+  GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
+  SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+  SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
+  
+  initMaps(*TargetMachine->getRegisterInfo());
+    
+  Valid = true;
+}
+
+EDDisassembler::~EDDisassembler() {
+  if (!valid())
+    return;
+}
+
+namespace {
+  /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
+  ///   as provided by the sd interface.  See MemoryObject.
+  class EDMemoryObject : public llvm::MemoryObject {
+  private:
+    EDByteReaderCallback Callback;
+    void *Arg;
+  public:
+    EDMemoryObject(EDByteReaderCallback callback,
+                   void *arg) : Callback(callback), Arg(arg) { }
+    ~EDMemoryObject() { }
+    uint64_t getBase() const { return 0x0; }
+    uint64_t getExtent() const { return (uint64_t)-1; }
+    int readByte(uint64_t address, uint8_t *ptr) const {
+      if (!Callback)
+        return -1;
+      
+      if (Callback(ptr, address, Arg))
+        return -1;
+      
+      return 0;
+    }
+  };
+}
+
+EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, 
+                                   uint64_t address, 
+                                   void *arg) {
+  EDMemoryObject memoryObject(byteReader, arg);
+  
+  MCInst* inst = new MCInst;
+  uint64_t byteSize;
+  
+  if (!Disassembler->getInstruction(*inst,
+                                    byteSize,
+                                    memoryObject,
+                                    address,
+                                    ErrorStream)) {
+    delete inst;
+    return NULL;
+  } else {
+    const llvm::EDInstInfo *thisInstInfo = NULL;
+
+    if (InstInfos) {
+      thisInstInfo = &InstInfos[inst->getOpcode()];
+    }
+    
+    EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
+    return sdInst;
+  }
+}
+
+void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
+  unsigned numRegisters = registerInfo.getNumRegs();
+  unsigned registerIndex;
+  
+  for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
+    const char* registerName = registerInfo.get(registerIndex).Name;
+    
+    RegVec.push_back(registerName);
+    RegRMap[registerName] = registerIndex;
+  }
+  
+  switch (Key.Arch) {
+  default:
+    break;
+  case Triple::x86:
+  case Triple::x86_64:
+    stackPointers.insert(registerIDWithName("SP"));
+    stackPointers.insert(registerIDWithName("ESP"));
+    stackPointers.insert(registerIDWithName("RSP"));
+    
+    programCounters.insert(registerIDWithName("IP"));
+    programCounters.insert(registerIDWithName("EIP"));
+    programCounters.insert(registerIDWithName("RIP"));
+    break;
+  case Triple::arm:
+  case Triple::thumb:
+    stackPointers.insert(registerIDWithName("SP"));
+    
+    programCounters.insert(registerIDWithName("PC"));
+    break;  
+  }
+}
+
+const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
+  if (registerID >= RegVec.size())
+    return NULL;
+  else
+    return RegVec[registerID].c_str();
+}
+
+unsigned EDDisassembler::registerIDWithName(const char *name) const {
+  regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
+  if (iter == RegRMap.end())
+    return 0;
+  else
+    return (*iter).second;
+}
+
+bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
+  return (stackPointers.find(registerID) != stackPointers.end());
+}
+
+bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
+  return (programCounters.find(registerID) != programCounters.end());
+}
+
+int EDDisassembler::printInst(std::string &str, MCInst &inst) {
+  PrinterMutex.acquire();
+  
+  InstPrinter->printInst(&inst, *InstStream);
+  InstStream->flush();
+  str = *InstString;
+  InstString->clear();
+  
+  PrinterMutex.release();
+  
+  return 0;
+}
+
+static void diag_handler(const SMDiagnostic &diag,
+                         void *context)
+{
+  if (context) {
+    EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
+    diag.Print("", disassembler->ErrorStream);
+  }
+}
+
+int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
+                              SmallVectorImpl<AsmToken> &tokens,
+                              const std::string &str) {
+  int ret = 0;
+  
+  switch (Key.Arch) {
+  default:
+    return -1;
+  case Triple::x86:
+  case Triple::x86_64:
+  case Triple::arm:
+  case Triple::thumb:
+    break;
+  }
+  
+  const char *cStr = str.c_str();
+  MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
+  
+  StringRef instName;
+  SMLoc instLoc;
+  
+  SourceMgr sourceMgr;
+  sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
+  sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
+  MCContext context(*AsmInfo, NULL);
+  OwningPtr<MCStreamer> streamer(createNullStreamer(context));
+  OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+                                                         context, *streamer,
+                                                         *AsmInfo));
+
+  StringRef triple = tripleFromArch(Key.Arch);
+  OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
+  OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
+                                                               *genericParser));
+  
+  AsmToken OpcodeToken = genericParser->Lex();
+  AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
+    
+  if (OpcodeToken.is(AsmToken::Identifier)) {
+    instName = OpcodeToken.getString();
+    instLoc = OpcodeToken.getLoc();
+    
+    if (NextToken.isNot(AsmToken::Eof) &&
+        TargetParser->ParseInstruction(instName, instLoc, operands))
+      ret = -1;
+  } else {
+    ret = -1;
+  }
+  
+  ParserMutex.acquire();
+  
+  if (!ret) {
+    GenericAsmLexer->setBuffer(buf);
+  
+    while (SpecificAsmLexer->Lex(),
+           SpecificAsmLexer->isNot(AsmToken::Eof) &&
+           SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
+      if (SpecificAsmLexer->is(AsmToken::Error)) {
+        ret = -1;
+        break;
+      }
+      tokens.push_back(SpecificAsmLexer->getTok());
+    }
+  }
+
+  ParserMutex.release();
+  
+  return ret;
+}
+
+int EDDisassembler::llvmSyntaxVariant() const {
+  return LLVMSyntaxVariant;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h
new file mode 100644
index 000000000000..11d69c151cf9
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h
@@ -0,0 +1,269 @@
+//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// disassembler class.  The disassembler is responsible for vending individual
+// instructions according to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDDISASSEMBLER_H
+#define LLVM_EDDISASSEMBLER_H
+
+#include "EDInfo.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+class AsmLexer;
+class AsmToken;
+class MCContext;
+class MCAsmInfo;
+class MCAsmLexer;
+class AsmParser;
+class TargetAsmLexer;
+class TargetAsmParser;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInst;
+class MCParsedAsmOperand;
+class MCStreamer;
+class MCSubtargetInfo;
+template <typename T> class SmallVectorImpl;
+class SourceMgr;
+class Target;
+class TargetMachine;
+class TargetRegisterInfo;
+
+struct EDInstInfo;
+struct EDInst;
+struct EDOperand;
+struct EDToken;
+
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
+
+/// EDDisassembler - Encapsulates a disassembler for a single architecture and
+///   disassembly syntax.  Also manages the static disassembler registry.
+struct EDDisassembler {
+  typedef enum {
+    /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+    kEDAssemblySyntaxX86Intel  = 0,
+    /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+    kEDAssemblySyntaxX86ATT    = 1,
+    kEDAssemblySyntaxARMUAL    = 2
+  } AssemblySyntax;
+  
+  
+  ////////////////////
+  // Static members //
+  ////////////////////
+  
+  /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
+  ///   pair
+  struct CPUKey {
+    /// The architecture type
+    llvm::Triple::ArchType Arch;
+    
+    /// The assembly syntax
+    AssemblySyntax Syntax;
+    
+    /// operator== - Equality operator
+    bool operator==(const CPUKey &key) const {
+      return (Arch == key.Arch &&
+              Syntax == key.Syntax);
+    }
+    
+    /// operator< - Less-than operator
+    bool operator<(const CPUKey &key) const {
+      return ((Arch < key.Arch) ||
+              ((Arch == key.Arch) && Syntax < (key.Syntax)));
+    }
+  };
+  
+  typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
+  
+  /// True if the disassembler registry has been initialized; false if not
+  static bool sInitialized;
+  /// A map from disassembler specifications to disassemblers.  Populated
+  ///   lazily.
+  static DisassemblerMap_t sDisassemblers;
+
+  /// getDisassembler - Returns the specified disassemble, or NULL on failure
+  ///
+  /// @arg arch   - The desired architecture
+  /// @arg syntax - The desired disassembly syntax
+  static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
+                                         AssemblySyntax syntax);
+  
+  /// getDisassembler - Returns the disassembler for a given combination of
+  ///   CPU type, CPU subtype, and assembly syntax, or NULL on failure
+  ///
+  /// @arg str    - The string representation of the architecture triple, e.g.,
+  ///               "x86_64-apple-darwin"
+  /// @arg syntax - The disassembly syntax for the required disassembler
+  static EDDisassembler *getDisassembler(llvm::StringRef str,
+                                         AssemblySyntax syntax);
+  
+  /// initialize - Initializes the disassembler registry and the LLVM backend
+  static void initialize();
+  
+  ////////////////////////
+  // Per-object members //
+  ////////////////////////
+  
+  /// True only if the object has been successfully initialized
+  bool Valid;
+  /// True if the disassembler can provide semantic information
+  bool HasSemantics;
+  
+  /// The stream to write errors to
+  llvm::raw_ostream &ErrorStream;
+
+  /// The architecture/syntax pair for the current architecture
+  CPUKey Key;
+  /// The LLVM target corresponding to the disassembler
+  const llvm::Target *Tgt;
+  /// The target machine instance.
+  llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
+  /// The assembly information for the target architecture
+  llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+  /// The disassembler for the target architecture
+  llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
+  /// The output string for the instruction printer; must be guarded with 
+  ///   PrinterMutex
+  llvm::OwningPtr<std::string> InstString;
+  /// The output stream for the disassembler; must be guarded with
+  ///   PrinterMutex
+  llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
+  /// The instruction printer for the target architecture; must be guarded with
+  ///   PrinterMutex when printing
+  llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
+  /// The mutex that guards the instruction printer's printing functions, which
+  ///   use a shared stream
+  llvm::sys::Mutex PrinterMutex;
+  /// The array of instruction information provided by the TableGen backend for
+  ///   the target architecture
+  const llvm::EDInstInfo *InstInfos;
+  /// The target-specific lexer for use in tokenizing strings, in
+  ///   target-independent and target-specific portions
+  llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
+  llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+  /// The guard for the above
+  llvm::sys::Mutex ParserMutex;
+  /// The LLVM number used for the target disassembly syntax variant
+  int LLVMSyntaxVariant;
+    
+  typedef std::vector<std::string> regvec_t;
+  typedef std::map<std::string, unsigned> regrmap_t;
+  
+  /// A vector of registers for quick mapping from LLVM register IDs to names
+  regvec_t RegVec;
+  /// A map of registers for quick mapping from register names to LLVM IDs
+  regrmap_t RegRMap;
+  
+  /// A set of register IDs for aliases of the stack pointer for the current
+  ///   architecture
+  std::set<unsigned> stackPointers;
+  /// A set of register IDs for aliases of the program counter for the current
+  ///   architecture
+  std::set<unsigned> programCounters;
+  
+  /// Constructor - initializes a disassembler with all the necessary objects,
+  ///   which come pre-allocated from the registry accessor function
+  ///
+  /// @arg key                - the architecture and disassembly syntax for the 
+  ///                           disassembler
+  EDDisassembler(CPUKey& key);
+  
+  /// valid - reports whether there was a failure in the constructor.
+  bool valid() {
+    return Valid;
+  }
+  
+  /// hasSemantics - reports whether the disassembler can provide operands and
+  ///   tokens.
+  bool hasSemantics() {
+    return HasSemantics;
+  }
+  
+  ~EDDisassembler();
+  
+  /// createInst - creates and returns an instruction given a callback and
+  ///   memory address, or NULL on failure
+  ///
+  /// @arg byteReader - A callback function that provides machine code bytes
+  /// @arg address    - The address of the first byte of the instruction,
+  ///                   suitable for passing to byteReader
+  /// @arg arg        - An opaque argument for byteReader
+  EDInst *createInst(EDByteReaderCallback byteReader, 
+                     uint64_t address, 
+                     void *arg);
+
+  /// initMaps - initializes regVec and regRMap using the provided register
+  ///   info
+  ///
+  /// @arg registerInfo - the register information to use as a source
+  void initMaps(const llvm::TargetRegisterInfo &registerInfo);
+  /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a 
+  ///   register for a given register ID, or NULL on failure
+  ///
+  /// @arg registerID - the ID of the register to be queried
+  const char *nameWithRegisterID(unsigned registerID) const;
+  /// registerIDWithName - Returns the ID of a register for a given register
+  ///   name, or (unsigned)-1 on failure
+  ///
+  /// @arg name - The name of the register
+  unsigned registerIDWithName(const char *name) const;
+  
+  /// registerIsStackPointer - reports whether a register ID is an alias for the
+  ///   stack pointer register
+  ///
+  /// @arg registerID - The LLVM register ID
+  bool registerIsStackPointer(unsigned registerID);
+  /// registerIsStackPointer - reports whether a register ID is an alias for the
+  ///   stack pointer register
+  ///
+  /// @arg registerID - The LLVM register ID
+  bool registerIsProgramCounter(unsigned registerID);
+  
+  /// printInst - prints an MCInst to a string, returning 0 on success, or -1
+  ///   otherwise
+  ///
+  /// @arg str  - A reference to a string which is filled in with the string
+  ///             representation of the instruction
+  /// @arg inst - A reference to the MCInst to be printed
+  int printInst(std::string& str,
+                llvm::MCInst& inst);
+  
+  /// parseInst - extracts operands and tokens from a string for use in
+  ///   tokenizing the string.  Returns 0 on success, or -1 otherwise.
+  ///
+  /// @arg operands - A reference to a vector that will be filled in with the
+  ///                 parsed operands
+  /// @arg tokens   - A reference to a vector that will be filled in with the
+  ///                 tokens
+  /// @arg str      - The string representation of the instruction
+  int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
+                llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
+                const std::string &str);
+  
+  /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
+  int llvmSyntaxVariant() const;  
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h b/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h
new file mode 100644
index 000000000000..e43ad1635246
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h
@@ -0,0 +1,84 @@
+//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINFO_H
+#define LLVM_EDINFO_H
+
+enum {
+  EDIS_MAX_OPERANDS = 13,
+  EDIS_MAX_SYNTAXES = 2
+};
+
+enum OperandTypes {
+  kOperandTypeNone,
+  kOperandTypeImmediate,
+  kOperandTypeRegister,
+  kOperandTypeX86Memory,
+  kOperandTypeX86EffectiveAddress,
+  kOperandTypeX86PCRelative,
+  kOperandTypeARMBranchTarget,
+  kOperandTypeARMSoReg,
+  kOperandTypeARMSoImm,
+  kOperandTypeARMRotImm,
+  kOperandTypeARMSoImm2Part,
+  kOperandTypeARMPredicate,
+  kOperandTypeAddrModeImm12,
+  kOperandTypeLdStSOReg,
+  kOperandTypeARMAddrMode2,
+  kOperandTypeARMAddrMode2Offset,
+  kOperandTypeARMAddrMode3,
+  kOperandTypeARMAddrMode3Offset,
+  kOperandTypeARMAddrMode4,
+  kOperandTypeARMAddrMode5,
+  kOperandTypeARMAddrMode6,
+  kOperandTypeARMAddrMode6Offset,
+  kOperandTypeARMAddrMode7,
+  kOperandTypeARMAddrModePC,
+  kOperandTypeARMRegisterList,
+  kOperandTypeARMDPRRegisterList,
+  kOperandTypeARMSPRRegisterList,
+  kOperandTypeARMTBAddrMode,
+  kOperandTypeThumbITMask,
+  kOperandTypeThumbAddrModeRegS1,
+  kOperandTypeThumbAddrModeRegS2,
+  kOperandTypeThumbAddrModeRegS4,
+  kOperandTypeThumbAddrModeImmS1,
+  kOperandTypeThumbAddrModeImmS2,
+  kOperandTypeThumbAddrModeImmS4,
+  kOperandTypeThumbAddrModeRR,
+  kOperandTypeThumbAddrModeSP,
+  kOperandTypeThumbAddrModePC,
+  kOperandTypeThumb2AddrModeReg,
+  kOperandTypeThumb2SoReg,
+  kOperandTypeThumb2SoImm,
+  kOperandTypeThumb2AddrModeImm8,
+  kOperandTypeThumb2AddrModeImm8Offset,
+  kOperandTypeThumb2AddrModeImm12,
+  kOperandTypeThumb2AddrModeSoReg,
+  kOperandTypeThumb2AddrModeImm8s4,
+  kOperandTypeThumb2AddrModeImm8s4Offset
+};
+
+enum OperandFlags {
+  kOperandFlagSource = 0x1,
+  kOperandFlagTarget = 0x2
+};
+
+enum InstructionTypes {
+  kInstructionTypeNone,
+  kInstructionTypeMove,
+  kInstructionTypeBranch,
+  kInstructionTypePush,
+  kInstructionTypePop,
+  kInstructionTypeCall,
+  kInstructionTypeReturn
+};
+
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp
new file mode 100644
index 000000000000..6057e169e347
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp
@@ -0,0 +1,212 @@
+//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's instruction class.
+// The instruction is responsible for vending the string representation, 
+// individual tokens, and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDInst.h"
+#include "EDDisassembler.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDInst::EDInst(llvm::MCInst *inst,
+               uint64_t byteSize, 
+               EDDisassembler &disassembler,
+               const llvm::EDInstInfo *info) :
+  Disassembler(disassembler),
+  Inst(inst),
+  ThisInstInfo(info),
+  ByteSize(byteSize),
+  BranchTarget(-1),
+  MoveSource(-1),
+  MoveTarget(-1) {
+  OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
+}
+
+EDInst::~EDInst() {
+  unsigned int index;
+  unsigned int numOperands = Operands.size();
+  
+  for (index = 0; index < numOperands; ++index)
+    delete Operands[index];
+  
+  unsigned int numTokens = Tokens.size();
+  
+  for (index = 0; index < numTokens; ++index)
+    delete Tokens[index];
+  
+  delete Inst;
+}
+
+uint64_t EDInst::byteSize() {
+  return ByteSize;
+}
+
+int EDInst::stringify() {
+  if (StringifyResult.valid())
+    return StringifyResult.result();
+  
+  if (Disassembler.printInst(String, *Inst))
+    return StringifyResult.setResult(-1);
+
+  String.push_back('\n');
+  
+  return StringifyResult.setResult(0);
+}
+
+int EDInst::getString(const char*& str) {
+  if (stringify())
+    return -1;
+  
+  str = String.c_str();
+  
+  return 0;
+}
+
+unsigned EDInst::instID() {
+  return Inst->getOpcode();
+}
+
+bool EDInst::isBranch() {
+  if (ThisInstInfo)
+    return 
+      ThisInstInfo->instructionType == kInstructionTypeBranch ||
+      ThisInstInfo->instructionType == kInstructionTypeCall;
+  else
+    return false;
+}
+
+bool EDInst::isMove() {
+  if (ThisInstInfo)
+    return ThisInstInfo->instructionType == kInstructionTypeMove;
+  else
+    return false;
+}
+
+int EDInst::parseOperands() {
+  if (ParseResult.valid())
+    return ParseResult.result();
+  
+  if (!ThisInstInfo)
+    return ParseResult.setResult(-1);
+  
+  unsigned int opIndex;
+  unsigned int mcOpIndex = 0;
+  
+  for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
+    if (isBranch() &&
+        (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
+      BranchTarget = opIndex;
+    }
+    else if (isMove()) {
+      if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
+        MoveSource = opIndex;
+      else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
+        MoveTarget = opIndex;
+    }
+    
+    EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
+    
+    Operands.push_back(operand);
+  }
+  
+  return ParseResult.setResult(0);
+}
+
+int EDInst::branchTargetID() {
+  if (parseOperands())
+    return -1;
+  return BranchTarget;
+}
+
+int EDInst::moveSourceID() {
+  if (parseOperands())
+    return -1;
+  return MoveSource;
+}
+
+int EDInst::moveTargetID() {
+  if (parseOperands())
+    return -1;
+  return MoveTarget;
+}
+
+int EDInst::numOperands() {
+  if (parseOperands())
+    return -1;
+  return Operands.size();
+}
+
+int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
+  if (parseOperands())
+    return -1;
+  
+  if (index >= Operands.size())
+    return -1;
+  
+  operand = Operands[index];
+  return 0;
+}
+
+int EDInst::tokenize() {
+  if (TokenizeResult.valid())
+    return TokenizeResult.result();
+    
+  if (ThisInstInfo == NULL)
+    return TokenizeResult.setResult(-1);
+  
+  if (stringify())
+    return TokenizeResult.setResult(-1);
+    
+  return TokenizeResult.setResult(EDToken::tokenize(Tokens,
+                                                    String,
+                                                    OperandOrder,
+                                                    Disassembler));
+    
+}
+
+int EDInst::numTokens() {
+  if (tokenize())
+    return -1;
+  return Tokens.size();
+}
+
+int EDInst::getToken(EDToken *&token, unsigned int index) {
+  if (tokenize())
+    return -1;
+  token = Tokens[index];
+  return 0;
+}
+
+#ifdef __BLOCKS__
+int EDInst::visitTokens(EDTokenVisitor_t visitor) {
+  if (tokenize())
+    return -1;
+  
+  tokvec_t::iterator iter;
+  
+  for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
+    int ret = visitor(*iter);
+    if (ret == 1)
+      return 0;
+    if (ret != 0)
+      return -1;
+  }
+  
+  return 0;
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.h b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h
new file mode 100644
index 000000000000..ceb9505028de
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h
@@ -0,0 +1,182 @@
+//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// instruction class.  The instruction is responsible for vending the string
+// representation, individual tokens and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINST_H
+#define LLVM_EDINST_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+  class MCInst;
+  struct EDInstInfo;
+  struct EDToken;
+  struct EDDisassembler;
+  struct EDOperand;
+
+#ifdef __BLOCKS__
+  typedef int (^EDTokenVisitor_t)(EDToken *token);
+#endif
+
+/// CachedResult - Encapsulates the result of a function along with the validity
+///   of that result, so that slow functions don't need to run twice
+struct CachedResult {
+  /// True if the result has been obtained by executing the function
+  bool Valid;
+  /// The result last obtained from the function
+  int Result;
+  
+  /// Constructor - Initializes an invalid result
+  CachedResult() : Valid(false) { }
+  /// valid - Returns true if the result has been obtained by executing the
+  ///   function and false otherwise
+  bool valid() { return Valid; }
+  /// result - Returns the result of the function or an undefined value if
+  ///   valid() is false
+  int result() { return Result; }
+  /// setResult - Sets the result of the function and declares it valid
+  ///   returning the result (so that setResult() can be called from inside a
+  ///   return statement)
+  /// @arg result - The result of the function
+  int setResult(int result) { Result = result; Valid = true; return result; }
+};
+
+/// EDInst - Encapsulates a single instruction, which can be queried for its
+///   string representation, as well as its operands and tokens
+struct EDInst {
+  /// The parent disassembler
+  EDDisassembler &Disassembler;
+  /// The containing MCInst
+  llvm::MCInst *Inst;
+  /// The instruction information provided by TableGen for this instruction
+  const llvm::EDInstInfo *ThisInstInfo;
+  /// The number of bytes for the machine code representation of the instruction
+  uint64_t ByteSize;
+  
+  /// The result of the stringify() function
+  CachedResult StringifyResult;
+  /// The string representation of the instruction
+  std::string String;
+  /// The order in which operands from the InstInfo's operand information appear
+  /// in String
+  const char* OperandOrder;
+  
+  /// The result of the parseOperands() function
+  CachedResult ParseResult;
+  typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
+  /// The instruction's operands
+  opvec_t Operands;
+  /// The operand corresponding to the target, if the instruction is a branch
+  int BranchTarget;
+  /// The operand corresponding to the source, if the instruction is a move
+  int MoveSource;
+  /// The operand corresponding to the target, if the instruction is a move
+  int MoveTarget;
+  
+  /// The result of the tokenize() function
+  CachedResult TokenizeResult;
+  typedef std::vector<EDToken*> tokvec_t;
+  /// The instruction's tokens
+  tokvec_t Tokens;
+  
+  /// Constructor - initializes an instruction given the output of the LLVM
+  ///   C++ disassembler
+  ///
+  /// @arg inst         - The MCInst, which will now be owned by this object
+  /// @arg byteSize     - The size of the consumed instruction, in bytes
+  /// @arg disassembler - The parent disassembler
+  /// @arg instInfo     - The instruction information produced by the table
+  ///                     generator for this instruction
+  EDInst(llvm::MCInst *inst,
+         uint64_t byteSize,
+         EDDisassembler &disassembler,
+         const llvm::EDInstInfo *instInfo);
+  ~EDInst();
+  
+  /// byteSize - returns the number of bytes consumed by the machine code
+  ///   representation of the instruction
+  uint64_t byteSize();
+  /// instID - returns the LLVM instruction ID of the instruction
+  unsigned instID();
+  
+  /// stringify - populates the String and AsmString members of the instruction,
+  ///   returning 0 on success or -1 otherwise
+  int stringify();
+  /// getString - retrieves a pointer to the string representation of the
+  ///   instructinon, returning 0 on success or -1 otherwise
+  ///
+  /// @arg str - A reference to a pointer that, on success, is set to point to
+  ///   the string representation of the instruction; this string is still owned
+  ///   by the instruction and will be deleted when it is
+  int getString(const char *&str);
+  
+  /// isBranch - Returns true if the instruction is a branch
+  bool isBranch();
+  /// isMove - Returns true if the instruction is a move
+  bool isMove();
+  
+  /// parseOperands - populates the Operands member of the instruction,
+  ///   returning 0 on success or -1 otherwise
+  int parseOperands();
+  /// branchTargetID - returns the ID (suitable for use with getOperand()) of 
+  ///   the target operand if the instruction is a branch, or -1 otherwise
+  int branchTargetID();
+  /// moveSourceID - returns the ID of the source operand if the instruction
+  ///   is a move, or -1 otherwise
+  int moveSourceID();
+  /// moveTargetID - returns the ID of the target operand if the instruction
+  ///   is a move, or -1 otherwise
+  int moveTargetID();
+  
+  /// numOperands - returns the number of operands available to retrieve, or -1
+  ///   on error
+  int numOperands();
+  /// getOperand - retrieves an operand from the instruction's operand list by
+  ///   index, returning 0 on success or -1 on error
+  ///
+  /// @arg operand  - A reference whose target is pointed at the operand on
+  ///                 success, although the operand is still owned by the EDInst
+  /// @arg index    - The index of the operand in the instruction
+  int getOperand(EDOperand *&operand, unsigned int index);
+
+  /// tokenize - populates the Tokens member of the instruction, returning 0 on
+  ///   success or -1 otherwise
+  int tokenize();
+  /// numTokens - returns the number of tokens in the instruction, or -1 on
+  ///   error
+  int numTokens();
+  /// getToken - retrieves a token from the instruction's token list by index,
+  ///   returning 0 on success or -1 on error
+  ///
+  /// @arg token  - A reference whose target is pointed at the token on success,
+  ///               although the token is still owned by the EDInst
+  /// @arg index  - The index of the token in the instrcutino
+  int getToken(EDToken *&token, unsigned int index);
+
+#ifdef __BLOCKS__
+  /// visitTokens - Visits each token in turn and applies a block to it,
+  ///   returning 0 if all blocks are visited and/or the block signals
+  ///   termination by returning 1; returns -1 on error
+  ///
+  /// @arg visitor  - The visitor block to apply to all tokens.
+  int visitTokens(EDTokenVisitor_t visitor);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp
new file mode 100644
index 000000000000..6a4e56ff72c4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp
@@ -0,0 +1,315 @@
+//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's operand class.  The
+// operand is responsible for allowing evaluation given a particular register 
+// context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDOperand.h"
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+EDOperand::EDOperand(const EDDisassembler &disassembler,
+                     const EDInst &inst,
+                     unsigned int opIndex,
+                     unsigned int &mcOpIndex) :
+  Disassembler(disassembler),
+  Inst(inst),
+  OpIndex(opIndex),
+  MCOpIndex(mcOpIndex) {
+  unsigned int numMCOperands = 0;
+    
+  if (Disassembler.Key.Arch == Triple::x86 ||
+      Disassembler.Key.Arch == Triple::x86_64) {
+    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+    
+    switch (operandType) {
+    default:
+      break;
+    case kOperandTypeImmediate:
+      numMCOperands = 1;
+      break;
+    case kOperandTypeRegister:
+      numMCOperands = 1;
+      break;
+    case kOperandTypeX86Memory:
+      numMCOperands = 5;
+      break;
+    case kOperandTypeX86EffectiveAddress:
+      numMCOperands = 4;
+      break;
+    case kOperandTypeX86PCRelative:
+      numMCOperands = 1;
+      break;
+    }
+  }
+  else if (Disassembler.Key.Arch == Triple::arm ||
+           Disassembler.Key.Arch == Triple::thumb) {
+    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+    
+    switch (operandType) {
+    default:
+    case kOperandTypeARMRegisterList:
+    case kOperandTypeARMDPRRegisterList:
+    case kOperandTypeARMSPRRegisterList:
+      break;
+    case kOperandTypeImmediate:
+    case kOperandTypeRegister:
+    case kOperandTypeARMBranchTarget:
+    case kOperandTypeARMSoImm:
+    case kOperandTypeARMRotImm:
+    case kOperandTypeThumb2SoImm:
+    case kOperandTypeARMSoImm2Part:
+    case kOperandTypeARMPredicate:
+    case kOperandTypeThumbITMask:
+    case kOperandTypeThumb2AddrModeImm8Offset:
+    case kOperandTypeARMTBAddrMode:
+    case kOperandTypeThumb2AddrModeImm8s4Offset:
+    case kOperandTypeARMAddrMode7:
+    case kOperandTypeThumb2AddrModeReg:
+      numMCOperands = 1;
+      break;
+    case kOperandTypeThumb2SoReg:
+    case kOperandTypeAddrModeImm12:
+    case kOperandTypeARMAddrMode2Offset:
+    case kOperandTypeARMAddrMode3Offset:
+    case kOperandTypeARMAddrMode4:
+    case kOperandTypeARMAddrMode5:
+    case kOperandTypeARMAddrModePC:
+    case kOperandTypeThumb2AddrModeImm8:
+    case kOperandTypeThumb2AddrModeImm12:
+    case kOperandTypeThumb2AddrModeImm8s4:
+    case kOperandTypeThumbAddrModeImmS1:
+    case kOperandTypeThumbAddrModeImmS2:
+    case kOperandTypeThumbAddrModeImmS4:
+    case kOperandTypeThumbAddrModeRR:
+    case kOperandTypeThumbAddrModeSP:
+    case kOperandTypeThumbAddrModePC:
+      numMCOperands = 2;
+      break;
+    case kOperandTypeARMSoReg:
+    case kOperandTypeLdStSOReg:
+    case kOperandTypeARMAddrMode2:
+    case kOperandTypeARMAddrMode3:
+    case kOperandTypeThumb2AddrModeSoReg:
+    case kOperandTypeThumbAddrModeRegS1:
+    case kOperandTypeThumbAddrModeRegS2:
+    case kOperandTypeThumbAddrModeRegS4:
+    case kOperandTypeARMAddrMode6Offset:
+      numMCOperands = 3;
+      break;
+    case kOperandTypeARMAddrMode6:
+      numMCOperands = 4;
+      break;
+    }
+  }
+    
+  mcOpIndex += numMCOperands;
+}
+
+EDOperand::~EDOperand() {
+}
+
+int EDOperand::evaluate(uint64_t &result,
+                        EDRegisterReaderCallback callback,
+                        void *arg) {
+  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+  
+  switch (Disassembler.Key.Arch) {
+  default:
+    return -1;  
+  case Triple::x86:
+  case Triple::x86_64:    
+    switch (operandType) {
+    default:
+      return -1;
+    case kOperandTypeImmediate:
+      result = Inst.Inst->getOperand(MCOpIndex).getImm();
+      return 0;
+    case kOperandTypeRegister:
+    {
+      unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      return callback(&result, reg, arg);
+    }
+    case kOperandTypeX86PCRelative:
+    {
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+        
+      uint64_t ripVal;
+        
+      // TODO fix how we do this
+        
+      if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+        return -1;
+        
+      result = ripVal + displacement;
+      return 0;
+    }
+    case kOperandTypeX86Memory:
+    case kOperandTypeX86EffectiveAddress:  
+    {
+      unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+      unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+    
+      uint64_t addr = 0;
+        
+      unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+        
+      if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) {
+        unsigned fsID = Disassembler.registerIDWithName("FS");
+        unsigned gsID = Disassembler.registerIDWithName("GS");
+        
+        if (segmentReg == fsID ||
+            segmentReg == gsID) {
+          uint64_t segmentBase;
+          if (!callback(&segmentBase, segmentReg, arg))
+            addr += segmentBase;        
+        }
+      }
+        
+      if (baseReg) {
+        uint64_t baseVal;
+        if (callback(&baseVal, baseReg, arg))
+          return -1;
+        addr += baseVal;
+      }
+        
+      if (indexReg) {
+        uint64_t indexVal;
+        if (callback(&indexVal, indexReg, arg))
+          return -1;
+        addr += (scaleAmount * indexVal);
+      }
+       
+      addr += displacement;
+       
+      result = addr;
+      return 0;
+    }
+    } // switch (operandType)
+    break;
+  case Triple::arm:
+  case Triple::thumb:
+    switch (operandType) {
+    default:
+      return -1;
+    case kOperandTypeImmediate:
+      if (!Inst.Inst->getOperand(MCOpIndex).isImm())
+        return -1;
+            
+      result = Inst.Inst->getOperand(MCOpIndex).getImm();
+      return 0;
+    case kOperandTypeRegister:
+    {
+      if (!Inst.Inst->getOperand(MCOpIndex).isReg())
+        return -1;
+        
+      unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      return callback(&result, reg, arg);
+    }
+    case kOperandTypeARMBranchTarget:
+    {
+      if (!Inst.Inst->getOperand(MCOpIndex).isImm())
+        return -1;
+        
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+      
+      uint64_t pcVal;
+      
+      if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
+        return -1;
+      
+      result = pcVal + displacement;
+      return 0;
+    }
+    }
+    break;
+  }
+  
+  return -1;
+}
+
+int EDOperand::isRegister() {
+  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
+}
+
+unsigned EDOperand::regVal() {
+  return Inst.Inst->getOperand(MCOpIndex).getReg(); 
+}
+
+int EDOperand::isImmediate() {
+  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
+}
+
+uint64_t EDOperand::immediateVal() {
+  return Inst.Inst->getOperand(MCOpIndex).getImm();
+}
+
+int EDOperand::isMemory() {
+  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+    
+  switch (operandType) {
+  default:
+    return 0;
+  case kOperandTypeX86Memory:
+  case kOperandTypeX86PCRelative:
+  case kOperandTypeX86EffectiveAddress:
+  case kOperandTypeARMSoReg:
+  case kOperandTypeARMSoImm:
+  case kOperandTypeARMAddrMode2:
+  case kOperandTypeARMAddrMode2Offset:
+  case kOperandTypeARMAddrMode3:
+  case kOperandTypeARMAddrMode3Offset:
+  case kOperandTypeARMAddrMode4:
+  case kOperandTypeARMAddrMode5:
+  case kOperandTypeARMAddrMode6:
+  case kOperandTypeARMAddrMode7:
+  case kOperandTypeARMAddrModePC:
+  case kOperandTypeARMBranchTarget:
+  case kOperandTypeThumbAddrModeRegS1:
+  case kOperandTypeThumbAddrModeRegS2:
+  case kOperandTypeThumbAddrModeRegS4:
+  case kOperandTypeThumbAddrModeRR:
+  case kOperandTypeThumbAddrModeSP:
+  case kOperandTypeThumb2SoImm:
+  case kOperandTypeThumb2AddrModeImm8:
+  case kOperandTypeThumb2AddrModeImm8Offset:
+  case kOperandTypeThumb2AddrModeImm12:
+  case kOperandTypeThumb2AddrModeSoReg:
+  case kOperandTypeThumb2AddrModeImm8s4:
+  case kOperandTypeThumb2AddrModeReg:
+    return 1;
+  }
+}
+
+#ifdef __BLOCKS__
+namespace {
+  struct RegisterReaderWrapper {
+    EDOperand::EDRegisterBlock_t regBlock;
+  };
+}
+
+static int readerWrapperCallback(uint64_t *value, unsigned regID, void *arg) {
+  RegisterReaderWrapper *wrapper = (RegisterReaderWrapper *)arg;
+  return wrapper->regBlock(value, regID);
+}
+
+int EDOperand::evaluate(uint64_t &result, EDRegisterBlock_t regBlock) {
+  RegisterReaderWrapper wrapper;
+  wrapper.regBlock = regBlock;
+  return evaluate(result, readerWrapperCallback, (void*)&wrapper);
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h
new file mode 100644
index 000000000000..50260ec965a6
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h
@@ -0,0 +1,91 @@
+//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's 
+// operand class.  The operand is responsible for allowing evaluation given a
+// particular register context.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDOPERAND_H
+#define LLVM_EDOPERAND_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+struct EDDisassembler;
+struct EDInst;
+  
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, 
+                                        void* arg);
+
+
+/// EDOperand - Encapsulates a single operand, which can be evaluated by the
+///   client
+struct EDOperand {
+  /// The parent disassembler
+  const EDDisassembler &Disassembler;
+  /// The parent instruction
+  const EDInst &Inst;
+  
+  /// The index of the operand in the EDInst
+  unsigned int OpIndex;
+  /// The index of the first component of the operand in the MCInst
+  unsigned int MCOpIndex;
+  
+  /// Constructor - Initializes an EDOperand
+  ///
+  /// @arg disassembler - The disassembler responsible for the operand
+  /// @arg inst         - The instruction containing this operand
+  /// @arg opIndex      - The index of the operand in inst
+  /// @arg mcOpIndex    - The index of the operand in the original MCInst
+  EDOperand(const EDDisassembler &disassembler,
+            const EDInst &inst,
+            unsigned int opIndex,
+            unsigned int &mcOpIndex);
+  ~EDOperand();
+  
+  /// evaluate - Returns the numeric value of an operand to the extent possible,
+  ///   returning 0 on success or -1 if there was some problem (such as a 
+  ///   register not being readable)
+  ///
+  /// @arg result   - A reference whose target is filled in with the value of
+  ///                 the operand (the address if it is a memory operand)
+  /// @arg callback - A function to call to obtain register values
+  /// @arg arg      - An opaque argument to pass to callback
+  int evaluate(uint64_t &result,
+               EDRegisterReaderCallback callback,
+               void *arg);
+
+  /// isRegister - Returns 1 if the operand is a register or 0 otherwise
+  int isRegister();
+  /// regVal - Returns the register value.
+  unsigned regVal();
+  
+  /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
+  int isImmediate();
+  /// immediateVal - Returns the immediate value.
+  uint64_t immediateVal();
+  
+  /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
+  int isMemory();
+  
+#ifdef __BLOCKS__
+  typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
+  /// evaluate - Like evaluate for a callback, but uses a block instead
+  int evaluate(uint64_t &result,
+               EDRegisterBlock_t regBlock);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp
new file mode 100644
index 000000000000..de770b41ef35
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp
@@ -0,0 +1,210 @@
+//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembler library's token class.  The
+// token is responsible for vending information about the token, such as its
+// type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDToken.h"
+#include "EDDisassembler.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+EDToken::EDToken(StringRef str,
+                 enum tokenType type,
+                 uint64_t localType,
+                 EDDisassembler &disassembler) :
+  Disassembler(disassembler),
+  Str(str),
+  Type(type),
+  LocalType(localType),
+  OperandID(-1) {
+}
+
+EDToken::~EDToken() {
+}
+
+void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
+  Type = kTokenLiteral;
+  LiteralSign = sign;
+  LiteralAbsoluteValue = absoluteValue;
+}
+
+void EDToken::makeRegister(unsigned registerID) {
+  Type = kTokenRegister;
+  RegisterID = registerID;
+}
+
+void EDToken::setOperandID(int operandID) {
+  OperandID = operandID;
+}
+
+enum EDToken::tokenType EDToken::type() const {
+  return Type;
+}
+
+uint64_t EDToken::localType() const {
+  return LocalType;
+}
+
+StringRef EDToken::string() const {
+  return Str;
+}
+
+int EDToken::operandID() const {
+  return OperandID;
+}
+
+int EDToken::literalSign() const {
+  if (Type != kTokenLiteral)
+    return -1;
+  return (LiteralSign ? 1 : 0);
+}
+
+int EDToken::literalAbsoluteValue(uint64_t &value) const {
+  if (Type != kTokenLiteral)
+    return -1;
+  value = LiteralAbsoluteValue;
+  return 0;
+}
+
+int EDToken::registerID(unsigned &registerID) const {
+  if (Type != kTokenRegister)
+    return -1;
+  registerID = RegisterID;
+  return 0;
+}
+
+int EDToken::tokenize(std::vector<EDToken*> &tokens,
+                      std::string &str,
+                      const char *operandOrder,
+                      EDDisassembler &disassembler) {
+  SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
+  SmallVector<AsmToken, 10> asmTokens;
+  
+  if (disassembler.parseInst(parsedOperands, asmTokens, str))
+    return -1;
+  
+  SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
+  unsigned int operandIndex;
+  SmallVectorImpl<AsmToken>::iterator tokenIterator;
+  
+  operandIterator = parsedOperands.begin();
+  operandIndex = 0;
+  
+  bool readOpcode = false;
+  
+  const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
+  
+  for (tokenIterator = asmTokens.begin();
+       tokenIterator != asmTokens.end();
+       ++tokenIterator) {
+    SMLoc tokenLoc = tokenIterator->getLoc();
+    
+    const char *tokenPointer = tokenLoc.getPointer();
+    
+    if (tokenPointer > wsPointer) {
+      unsigned long wsLength = tokenPointer - wsPointer;
+      
+      EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
+                                             EDToken::kTokenWhitespace,
+                                             0,
+                                             disassembler);
+      
+      tokens.push_back(whitespaceToken);
+    }
+    
+    wsPointer = tokenPointer + tokenIterator->getString().size();
+    
+    while (operandIterator != parsedOperands.end() &&
+           tokenLoc.getPointer() > 
+           (*operandIterator)->getEndLoc().getPointer()) {
+      ++operandIterator;
+      ++operandIndex;
+    }
+    
+    EDToken *token;
+    
+    switch (tokenIterator->getKind()) {
+    case AsmToken::Identifier:
+      if (!readOpcode) {
+        token = new EDToken(tokenIterator->getString(),
+                            EDToken::kTokenOpcode,
+                            (uint64_t)tokenIterator->getKind(),
+                            disassembler);
+        readOpcode = true;
+        break;
+      }
+      // any identifier that isn't an opcode is mere punctuation; so we fall
+      // through
+    default:
+      token = new EDToken(tokenIterator->getString(),
+                          EDToken::kTokenPunctuation,
+                          (uint64_t)tokenIterator->getKind(),
+                          disassembler);
+      break;
+    case AsmToken::Integer:
+    {
+      token = new EDToken(tokenIterator->getString(),
+                          EDToken::kTokenLiteral,
+                          (uint64_t)tokenIterator->getKind(),
+                          disassembler);
+        
+      int64_t intVal = tokenIterator->getIntVal();
+      
+      if (intVal < 0)  
+        token->makeLiteral(true, -intVal);
+      else
+        token->makeLiteral(false, intVal);
+      break;
+    }
+    case AsmToken::Register:
+    {
+      token = new EDToken(tokenIterator->getString(),
+                          EDToken::kTokenLiteral,
+                          (uint64_t)tokenIterator->getKind(),
+                          disassembler);
+      
+      token->makeRegister((unsigned)tokenIterator->getRegVal());
+      break;
+    }
+    }
+    
+    if (operandIterator != parsedOperands.end() &&
+       tokenLoc.getPointer() >= 
+       (*operandIterator)->getStartLoc().getPointer()) {
+      /// operandIndex == 0 means the operand is the instruction (which the
+      /// AsmParser treats as an operand but edis does not).  We therefore skip
+      /// operandIndex == 0 and subtract 1 from all other operand indices.
+      
+      if (operandIndex > 0)
+        token->setOperandID(operandOrder[operandIndex - 1]);
+    }
+    
+    tokens.push_back(token);
+  }
+  
+  // Free any parsed operands.
+  for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i)
+    delete parsedOperands[i];
+
+  return 0;
+}
+
+int EDToken::getString(const char*& buf) {
+  if (PermStr.length() == 0) {
+    PermStr = Str.str();
+  }
+  buf = PermStr.c_str();
+  return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.h b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h
new file mode 100644
index 000000000000..ba467078686a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h
@@ -0,0 +1,139 @@
+//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's token
+// class.  The token is responsible for vending information about the token, 
+// such as its type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDTOKEN_H
+#define LLVM_EDTOKEN_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+  
+struct EDDisassembler;
+
+/// EDToken - Encapsulates a single token, which can provide a string
+///   representation of itself or interpret itself in various ways, depending
+///   on the token type.
+struct EDToken {
+  enum tokenType {
+    kTokenWhitespace,
+    kTokenOpcode,
+    kTokenLiteral,
+    kTokenRegister,
+    kTokenPunctuation
+  };
+  
+  /// The parent disassembler
+  EDDisassembler &Disassembler;
+
+  /// The token's string representation
+  llvm::StringRef Str;
+  /// The token's string representation, but in a form suitable for export
+  std::string PermStr;
+  /// The type of the token, as exposed through the external API
+  enum tokenType Type;
+  /// The type of the token, as recorded by the syntax-specific tokenizer
+  uint64_t LocalType;
+  /// The operand corresponding to the token, or (unsigned int)-1 if not
+  ///   part of an operand.
+  int OperandID;
+  
+  /// The sign if the token is a literal (1 if negative, 0 otherwise)
+  bool LiteralSign;
+  /// The absolute value if the token is a literal
+  uint64_t LiteralAbsoluteValue;
+  /// The LLVM register ID if the token is a register name
+  unsigned RegisterID;
+  
+  /// Constructor - Initializes an EDToken with the information common to all
+  ///   tokens
+  ///
+  /// @arg str          - The string corresponding to the token
+  /// @arg type         - The token's type as exposed through the public API
+  /// @arg localType    - The token's type as recorded by the tokenizer
+  /// @arg disassembler - The disassembler responsible for the token
+  EDToken(llvm::StringRef str,
+          enum tokenType type,
+          uint64_t localType,
+          EDDisassembler &disassembler);
+  
+  /// makeLiteral - Adds the information specific to a literal
+  /// @arg sign           - The sign of the literal (1 if negative, 0 
+  ///                       otherwise)
+  ///
+  /// @arg absoluteValue  - The absolute value of the literal
+  void makeLiteral(bool sign, uint64_t absoluteValue);
+  /// makeRegister - Adds the information specific to a register
+  ///
+  /// @arg registerID - The LLVM register ID
+  void makeRegister(unsigned registerID);
+  
+  /// setOperandID - Links the token to a numbered operand
+  ///
+  /// @arg operandID  - The operand ID to link to
+  void setOperandID(int operandID);
+  
+  ~EDToken();
+  
+  /// type - Returns the public type of the token
+  enum tokenType type() const;
+  /// localType - Returns the tokenizer-specific type of the token
+  uint64_t localType() const;
+  /// string - Returns the string representation of the token
+  llvm::StringRef string() const;
+  /// operandID - Returns the operand ID of the token
+  int operandID() const;
+  
+  /// literalSign - Returns the sign of the token 
+  ///   (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
+  int literalSign() const;
+  /// literalAbsoluteValue - Retrieves the absolute value of the token, and
+  ///   returns -1 if the token is not a literal
+  /// @arg value  - A reference to a value that is filled in with the absolute
+  ///               value, if it is valid
+  int literalAbsoluteValue(uint64_t &value) const;
+  /// registerID - Retrieves the register ID of the token, and returns -1 if the
+  ///   token is not a register
+  ///
+  /// @arg registerID - A reference to a value that is filled in with the 
+  ///                   register ID, if it is valid
+  int registerID(unsigned &registerID) const;
+  
+  /// tokenize - Tokenizes a string using the platform- and syntax-specific
+  ///   tokenizer, and returns 0 on success (-1 on failure)
+  ///
+  /// @arg tokens       - A vector that will be filled in with pointers to
+  ///                     allocated tokens
+  /// @arg str          - The string, as outputted by the AsmPrinter
+  /// @arg operandOrder - The order of the operands from the operandFlags array
+  ///                     as they appear in str
+  /// @arg disassembler - The disassembler for the desired target and
+  //                      assembly syntax
+  static int tokenize(std::vector<EDToken*> &tokens,
+                      std::string &str,
+                      const char *operandOrder,
+                      EDDisassembler &disassembler);
+  
+  /// getString - Directs a character pointer to the string, returning 0 on
+  ///   success (-1 on failure)
+  /// @arg buf  - A reference to a pointer that is set to point to the string.
+  ///   The string is still owned by the token.
+  int getString(const char*& buf);
+};
+
+} // end namespace llvm
+#endif
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
new file mode 100644
index 000000000000..ad86db13d510
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -0,0 +1,1086 @@
+//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+// Given a special op, return the address skip amount (in units of
+// DWARF2_LINE_MIN_INSN_LENGTH.
+#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
+
+// The maximum address skip amount that can be encoded with a special op.
+#define MAX_SPECIAL_ADDR_DELTA         SPECIAL_ADDR(255)
+
+// First special line opcode - leave room for the standard opcodes.
+// Note: If you want to change this, you'll have to update the
+// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().  
+#define DWARF2_LINE_OPCODE_BASE         13
+
+// Minimum line offset in a special line info. opcode.  This value
+// was chosen to give a reasonable range of values.
+#define DWARF2_LINE_BASE                -5
+
+// Range of line offsets in a special line info. opcode.
+#define DWARF2_LINE_RANGE               14
+
+// Define the architecture-dependent minimum instruction length (in bytes).
+// This value should be rather too small than too big.
+#define DWARF2_LINE_MIN_INSN_LENGTH     1
+
+// Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
+// this routine is a nop and will be optimized away.
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) {
+  if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
+    return AddrDelta;
+  if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
+    // TODO: report this error, but really only once.
+    ;
+  }
+  return AddrDelta / DWARF2_LINE_MIN_INSN_LENGTH;
+}
+
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
+  if (!MCOS->getContext().getDwarfLocSeen())
+    return;
+
+  // Create a symbol at in the current section for use in the line entry.
+  MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
+  // Set the value of the symbol to use for the MCLineEntry.
+  MCOS->EmitLabel(LineSym);
+
+  // Get the current .loc info saved in the context.
+  const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
+
+  // Create a (local) line entry with the symbol and the current .loc info.
+  MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+  // clear DwarfLocSeen saying the current .loc info is now used.
+  MCOS->getContext().ClearDwarfLocSeen();
+
+  // Get the MCLineSection for this section, if one does not exist for this
+  // section create it.
+  const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+    MCOS->getContext().getMCLineSections();
+  MCLineSection *LineSection = MCLineSections.lookup(Section);
+  if (!LineSection) {
+    // Create a new MCLineSection.  This will be deleted after the dwarf line
+    // table is created using it by iterating through the MCLineSections
+    // DenseMap.
+    LineSection = new MCLineSection;
+    // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+    MCOS->getContext().addMCLineSection(Section, LineSection);
+  }
+
+  // Add the line entry to this section's entries.
+  LineSection->addLineEntry(LineEntry);
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal .
+// 
+static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
+                                                  const MCSymbol &Start,
+                                                  const MCSymbol &End,
+                                                  int IntVal) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
+  const MCExpr *RHS =
+    MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
+  const MCExpr *Res1 =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
+  const MCExpr *Res2 =
+    MCConstantExpr::Create(IntVal, MCOS.getContext());
+  const MCExpr *Res3 =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
+  return Res3;
+}
+
+//
+// This emits the Dwarf line table for the specified section from the entries
+// in the LineSection.
+//
+static inline void EmitDwarfLineTable(MCStreamer *MCOS,
+                                      const MCSection *Section,
+                                      const MCLineSection *LineSection) {
+  unsigned FileNum = 1;
+  unsigned LastLine = 1;
+  unsigned Column = 0;
+  unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+  unsigned Isa = 0;
+  MCSymbol *LastLabel = NULL;
+
+  // Loop through each MCLineEntry and encode the dwarf line number table.
+  for (MCLineSection::const_iterator
+         it = LineSection->getMCLineEntries()->begin(),
+         ie = LineSection->getMCLineEntries()->end(); it != ie; ++it) {
+
+    if (FileNum != it->getFileNum()) {
+      FileNum = it->getFileNum();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
+      MCOS->EmitULEB128IntValue(FileNum);
+    }
+    if (Column != it->getColumn()) {
+      Column = it->getColumn();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
+      MCOS->EmitULEB128IntValue(Column);
+    }
+    if (Isa != it->getIsa()) {
+      Isa = it->getIsa();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
+      MCOS->EmitULEB128IntValue(Isa);
+    }
+    if ((it->getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
+      Flags = it->getFlags();
+      MCOS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
+    }
+    if (it->getFlags() & DWARF2_FLAG_BASIC_BLOCK)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
+    if (it->getFlags() & DWARF2_FLAG_PROLOGUE_END)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
+    if (it->getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
+
+    int64_t LineDelta = static_cast<int64_t>(it->getLine()) - LastLine;
+    MCSymbol *Label = it->getLabel();
+
+    // At this point we want to emit/create the sequence to encode the delta in
+    // line numbers and the increment of the address from the previous Label
+    // and the current Label.
+    const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+    MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+                                   asmInfo.getPointerSize());
+
+    LastLine = it->getLine();
+    LastLabel = Label;
+  }
+
+  // Emit a DW_LNE_end_sequence for the end of the section.
+  // Using the pointer Section create a temporary label at the end of the
+  // section and use that and the LastLabel to compute the address delta
+  // and use INT64_MAX as the line delta which is the signal that this is
+  // actually a DW_LNE_end_sequence.
+
+  // Switch to the section to be able to create a symbol at its end.
+  MCOS->SwitchSection(Section);
+
+  MCContext &context = MCOS->getContext();
+  // Create a symbol at the end of the section.
+  MCSymbol *SectionEnd = context.CreateTempSymbol();
+  // Set the value of the symbol, as we are at the end of the section.
+  MCOS->EmitLabel(SectionEnd);
+
+  // Switch back the the dwarf line section.
+  MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+  const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+  MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
+                                 asmInfo.getPointerSize());
+}
+
+//
+// This emits the Dwarf file and the line tables.
+//
+void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+  // Switch to the section where the table will be emitted into.
+  MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+  // Create a symbol at the beginning of this section.
+  MCSymbol *LineStartSym = context.CreateTempSymbol();
+  // Set the value of the symbol, as we are at the start of the section.
+  MCOS->EmitLabel(LineStartSym);
+
+  // Create a symbol for the end of the section (to be set when we get there).
+  MCSymbol *LineEndSym = context.CreateTempSymbol();
+
+  // The first 4 bytes is the total length of the information for this
+  // compilation unit (not including these 4 bytes for the length).
+  MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
+                     4);
+
+  // Next 2 bytes is the Version, which is Dwarf 2.
+  MCOS->EmitIntValue(2, 2);
+
+  // Create a symbol for the end of the prologue (to be set when we get there).
+  MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
+
+  // Length of the prologue, is the next 4 bytes.  Which is the start of the
+  // section to the end of the prologue.  Not including the 4 bytes for the
+  // total length, the 2 bytes for the version, and these 4 bytes for the
+  // length of the prologue.
+  MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
+                                        (4 + 2 + 4)),
+                  4, 0);
+
+  // Parameters of the state machine, are next.
+  MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_OPCODE_BASE, 1);
+
+  // Standard opcode lengths
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_copy
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_line
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_file
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_column
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+  MCOS->EmitIntValue(1, 1); // DW_LNS_set_isa
+
+  // Put out the directory and file tables.
+
+  // First the directory table.
+  const std::vector<StringRef> &MCDwarfDirs =
+    context.getMCDwarfDirs();
+  for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+    MCOS->EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
+    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+  }
+  MCOS->EmitIntValue(0, 1); // Terminate the directory list
+
+  // Second the file table.
+  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+    MCOS->getContext().getMCDwarfFiles();
+  for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+    MCOS->EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
+    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+    // the Directory num
+    MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
+    MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
+    MCOS->EmitIntValue(0, 1); // filesize (always 0)
+  }
+  MCOS->EmitIntValue(0, 1); // Terminate the file list
+
+  // This is the end of the prologue, so set the value of the symbol at the
+  // end of the prologue (that was used in a previous expression).
+  MCOS->EmitLabel(ProEndSym);
+
+  // Put out the line tables.
+  const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+    MCOS->getContext().getMCLineSections();
+  const std::vector<const MCSection *> &MCLineSectionOrder =
+    MCOS->getContext().getMCLineSectionOrder();
+  for (std::vector<const MCSection*>::const_iterator it =
+         MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+       ++it) {
+    const MCSection *Sec = *it;
+    const MCLineSection *Line = MCLineSections.lookup(Sec);
+    EmitDwarfLineTable(MCOS, Sec, Line);
+
+    // Now delete the MCLineSections that were created in MCLineEntry::Make()
+    // and used to emit the line table.
+    delete Line;
+  }
+
+  if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines()
+      && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) {
+    // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
+    // it requires:  
+    // total_length >= prologue_length + 10
+    // We are 4 bytes short, since we have total_length = 51 and
+    // prologue_length = 45
+
+    // The regular end_sequence should be sufficient.
+    MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
+  }
+
+  // This is the end of the section, so set the value of the symbol at the end
+  // of this section (that was used in a previous expression).
+  MCOS->EmitLabel(LineEndSym);
+}
+
+/// Utility function to write the encoding to an object writer.
+void MCDwarfLineAddr::Write(MCObjectWriter *OW, int64_t LineDelta,
+                            uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+  OW->WriteBytes(OS.str());
+}
+
+/// Utility function to emit the encoding to a streamer.
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
+                           uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+  MCOS->EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
+                             raw_ostream &OS) {
+  uint64_t Temp, Opcode;
+  bool NeedCopy = false;
+
+  // Scale the address delta by the minimum instruction length.
+  AddrDelta = ScaleAddrDelta(AddrDelta);
+
+  // A LineDelta of INT64_MAX is a signal that this is actually a
+  // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the 
+  // end_sequence to emit the matrix entry.
+  if (LineDelta == INT64_MAX) {
+    if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+      OS << char(dwarf::DW_LNS_const_add_pc);
+    else {
+      OS << char(dwarf::DW_LNS_advance_pc);
+      MCObjectWriter::EncodeULEB128(AddrDelta, OS);
+    }
+    OS << char(dwarf::DW_LNS_extended_op);
+    OS << char(1);
+    OS << char(dwarf::DW_LNE_end_sequence);
+    return;
+  }
+
+  // Bias the line delta by the base.
+  Temp = LineDelta - DWARF2_LINE_BASE;
+
+  // If the line increment is out of range of a special opcode, we must encode
+  // it with DW_LNS_advance_line.
+  if (Temp >= DWARF2_LINE_RANGE) {
+    OS << char(dwarf::DW_LNS_advance_line);
+    SmallString<32> Tmp;
+    raw_svector_ostream OSE(Tmp);
+    MCObjectWriter::EncodeSLEB128(LineDelta, OSE);
+    OS << OSE.str();
+
+    LineDelta = 0;
+    Temp = 0 - DWARF2_LINE_BASE;
+    NeedCopy = true;
+  }
+
+  // Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
+  if (LineDelta == 0 && AddrDelta == 0) {
+    OS << char(dwarf::DW_LNS_copy);
+    return;
+  }
+
+  // Bias the opcode by the special opcode base.
+  Temp += DWARF2_LINE_OPCODE_BASE;
+
+  // Avoid overflow when addr_delta is large.
+  if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+    // Try using a special opcode.
+    Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+    if (Opcode <= 255) {
+      OS << char(Opcode);
+      return;
+    }
+
+    // Try using DW_LNS_const_add_pc followed by special op.
+    Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+    if (Opcode <= 255) {
+      OS << char(dwarf::DW_LNS_const_add_pc);
+      OS << char(Opcode);
+      return;
+    }
+  }
+
+  // Otherwise use DW_LNS_advance_pc.
+  OS << char(dwarf::DW_LNS_advance_pc);
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+  OS << OSE.str();
+
+  if (NeedCopy)
+    OS << char(dwarf::DW_LNS_copy);
+  else
+    OS << char(Temp);
+}
+
+void MCDwarfFile::print(raw_ostream &OS) const {
+  OS << '"' << getName() << '"';
+}
+
+void MCDwarfFile::dump() const {
+  print(dbgs());
+}
+
+static int getDataAlignmentFactor(MCStreamer &streamer) {
+  MCContext &context = streamer.getContext();
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  int size = asmInfo.getPointerSize();
+  if (asmInfo.isStackGrowthDirectionUp())
+    return size;
+  else
+    return -size;
+}
+
+static unsigned getSizeForEncoding(MCStreamer &streamer,
+                                   unsigned symbolEncoding) {
+  MCContext &context = streamer.getContext();
+  unsigned format = symbolEncoding & 0x0f;
+  switch (format) {
+  default:
+    assert(0 && "Unknown Encoding");
+  case dwarf::DW_EH_PE_absptr:
+  case dwarf::DW_EH_PE_signed:
+    return context.getAsmInfo().getPointerSize();
+  case dwarf::DW_EH_PE_udata2:
+  case dwarf::DW_EH_PE_sdata2:
+    return 2;
+  case dwarf::DW_EH_PE_udata4:
+  case dwarf::DW_EH_PE_sdata4:
+    return 4;
+  case dwarf::DW_EH_PE_udata8:
+  case dwarf::DW_EH_PE_sdata8:
+    return 8;
+  }
+}
+
+static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
+                       unsigned symbolEncoding, const char *comment = 0) {
+  MCContext &context = streamer.getContext();
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol,
+                                                symbolEncoding,
+                                                streamer);
+  unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+  if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
+  streamer.EmitAbsValue(v, size);
+}
+
+static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
+                            unsigned symbolEncoding) {
+  MCContext &context = streamer.getContext();
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  const MCExpr *v = asmInfo.getExprForPersonalitySymbol(&symbol,
+                                                        symbolEncoding,
+                                                        streamer);
+  unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+  streamer.EmitValue(v, size);
+}
+
+static const MachineLocation TranslateMachineLocation(
+                                                  const TargetAsmInfo &TAI,
+                                                  const MachineLocation &Loc) {
+  unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
+    MachineLocation::VirtualFP :
+    unsigned(TAI.getDwarfRegNum(Loc.getReg(), true));
+  const MachineLocation &NewLoc = Loc.isReg() ?
+    MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
+  return NewLoc;
+}
+
+namespace {
+  class FrameEmitterImpl {
+    int CFAOffset;
+    int CIENum;
+    bool UsingCFI;
+    bool IsEH;
+    const MCSymbol *SectionStart;
+  public:
+    FrameEmitterImpl(bool usingCFI, bool isEH, const MCSymbol *sectionStart) :
+      CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
+      SectionStart(sectionStart) {
+    }
+
+    /// EmitCompactUnwind - Emit the unwind information in a compact way. If
+    /// we're successful, return 'true'. Otherwise, return 'false' and it will
+    /// emit the normal CIE and FDE.
+    bool EmitCompactUnwind(MCStreamer &streamer,
+                           const MCDwarfFrameInfo &frame);
+
+    const MCSymbol &EmitCIE(MCStreamer &streamer,
+                            const MCSymbol *personality,
+                            unsigned personalityEncoding,
+                            const MCSymbol *lsda,
+                            unsigned lsdaEncoding);
+    MCSymbol *EmitFDE(MCStreamer &streamer,
+                      const MCSymbol &cieStart,
+                      const MCDwarfFrameInfo &frame);
+    void EmitCFIInstructions(MCStreamer &streamer,
+                             const std::vector<MCCFIInstruction> &Instrs,
+                             MCSymbol *BaseLabel);
+    void EmitCFIInstruction(MCStreamer &Streamer,
+                            const MCCFIInstruction &Instr);
+  };
+
+} // end anonymous namespace
+
+static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
+                             StringRef Prefix) {
+  if (Streamer.isVerboseAsm()) {
+    const char *EncStr = 0;
+    switch (Encoding) {
+    default: EncStr = "<unknown encoding>";
+    case dwarf::DW_EH_PE_absptr: EncStr = "absptr";
+    case dwarf::DW_EH_PE_omit:   EncStr = "omit";
+    case dwarf::DW_EH_PE_pcrel:  EncStr = "pcrel";
+    case dwarf::DW_EH_PE_udata4: EncStr = "udata4";
+    case dwarf::DW_EH_PE_udata8: EncStr = "udata8";
+    case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4";
+    case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
+      EncStr = "indirect pcrel udata4";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
+      EncStr = "indirect pcrel sdata4";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
+      EncStr = "indirect pcrel udata8";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
+      EncStr = "indirect pcrel sdata8";
+    }
+
+    Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
+  }
+
+  Streamer.EmitIntValue(Encoding, 1);
+}
+
+void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
+                                          const MCCFIInstruction &Instr) {
+  int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+  bool VerboseAsm = Streamer.isVerboseAsm();
+
+  switch (Instr.getOperation()) {
+  case MCCFIInstruction::Move:
+  case MCCFIInstruction::RelMove: {
+    const MachineLocation &Dst = Instr.getDestination();
+    const MachineLocation &Src = Instr.getSource();
+    const bool IsRelative = Instr.getOperation() == MCCFIInstruction::RelMove;
+
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset");
+        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
+      } else {
+        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa");
+        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+        if (VerboseAsm) Streamer.AddComment(Twine("Reg ") +
+                                            Twine(Src.getReg()));
+        Streamer.EmitULEB128IntValue(Src.getReg());
+      }
+
+      if (IsRelative)
+        CFAOffset += Src.getOffset();
+      else
+        CFAOffset = -Src.getOffset();
+
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+      Streamer.EmitULEB128IntValue(CFAOffset);
+      return;
+    }
+
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register");
+      Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg()));
+      Streamer.EmitULEB128IntValue(Dst.getReg());
+      return;
+    }
+
+    unsigned Reg = Src.getReg();
+    int Offset = Dst.getOffset();
+    if (IsRelative)
+      Offset -= CFAOffset;
+    Offset = Offset / dataAlignmentFactor;
+
+    if (Offset < 0) {
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf");
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+      Streamer.EmitULEB128IntValue(Reg);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+      Streamer.EmitSLEB128IntValue(Offset);
+    } else if (Reg < 64) {
+      if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") +
+                                          Twine(Reg) + ")");
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+      Streamer.EmitULEB128IntValue(Offset);
+    } else {
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended");
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+      Streamer.EmitULEB128IntValue(Reg);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
+      Streamer.EmitULEB128IntValue(Offset);
+    }
+    return;
+  }
+  case MCCFIInstruction::Remember:
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
+    Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
+    return;
+  case MCCFIInstruction::Restore:
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
+    Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
+    return;
+  case MCCFIInstruction::SameValue: {
+    unsigned Reg = Instr.getDestination().getReg();
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
+    Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
+    if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+    Streamer.EmitULEB128IntValue(Reg);
+    return;
+  }
+  }
+  llvm_unreachable("Unhandled case in switch");
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
+                                    const std::vector<MCCFIInstruction> &Instrs,
+                                           MCSymbol *BaseLabel) {
+  for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
+    const MCCFIInstruction &Instr = Instrs[i];
+    MCSymbol *Label = Instr.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+    // Advance row if new location.
+    if (BaseLabel && Label) {
+      MCSymbol *ThisSym = Label;
+      if (ThisSym != BaseLabel) {
+        if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4");
+        streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+        BaseLabel = ThisSym;
+      }
+    }
+
+    EmitCFIInstruction(streamer, Instr);
+  }
+}
+
+/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
+/// successful, return 'true'. Otherwise, return 'false' and it will emit the
+/// normal CIE and FDE.
+bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+                                         const MCDwarfFrameInfo &Frame) {
+#if 1
+  return false;
+#else
+  MCContext &Context = Streamer.getContext();
+  const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+  bool VerboseAsm = Streamer.isVerboseAsm();
+
+  // range-start range-length  compact-unwind-enc personality-func   lsda
+  //  _foo       LfooEnd-_foo  0x00000023          0                 0
+  //  _bar       LbarEnd-_bar  0x00000025         __gxx_personality  except_tab1
+  //
+  //   .section __LD,__compact_unwind,regular,debug
+  //
+  //   # compact unwind for _foo
+  //   .quad _foo
+  //   .set L1,LfooEnd-_foo
+  //   .long L1
+  //   .long 0x01010001
+  //   .quad 0
+  //   .quad 0
+  //
+  //   # compact unwind for _bar
+  //   .quad _bar
+  //   .set L2,LbarEnd-_bar
+  //   .long L2
+  //   .long 0x01020011
+  //   .quad __gxx_personality
+  //   .quad except_tab1
+
+  uint32_t Encoding =
+    TAI.getCompactUnwindEncoding(Frame.Instructions,
+                                 getDataAlignmentFactor(Streamer), IsEH);
+  if (!Encoding) return false;
+
+  // The encoding needs to know we have an LSDA.
+  if (Frame.Lsda)
+    Encoding |= 0x40000000;
+
+  Streamer.SwitchSection(TAI.getCompactUnwindSection());
+
+  // Range Start
+  unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI);
+  unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
+  if (VerboseAsm) Streamer.AddComment("Range Start");
+  Streamer.EmitSymbolValue(Frame.Function, Size);
+
+  // Range Length
+  const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
+                                              *Frame.End, 0);
+  if (VerboseAsm) Streamer.AddComment("Range Length");
+  Streamer.EmitAbsValue(Range, 4);
+
+  // Compact Encoding
+  Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
+  if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") +
+                                      Twine(llvm::utohexstr(Encoding)));
+  Streamer.EmitIntValue(Encoding, Size);
+
+  // Personality Function
+  Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
+  if (VerboseAsm) Streamer.AddComment("Personality Function");
+  if (Frame.Personality)
+    Streamer.EmitSymbolValue(Frame.Personality, Size);
+  else
+    Streamer.EmitIntValue(0, Size); // No personality fn
+
+  // LSDA
+  Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
+  if (VerboseAsm) Streamer.AddComment("LSDA");
+  if (Frame.Lsda)
+    Streamer.EmitSymbolValue(Frame.Lsda, Size);
+  else
+    Streamer.EmitIntValue(0, Size); // No LSDA
+
+  return true;
+#endif
+}
+
+const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
+                                          const MCSymbol *personality,
+                                          unsigned personalityEncoding,
+                                          const MCSymbol *lsda,
+                                          unsigned lsdaEncoding) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  bool verboseAsm = streamer.isVerboseAsm();
+
+  MCSymbol *sectionStart;
+  if (TAI.isFunctionEHFrameSymbolPrivate() || !IsEH)
+    sectionStart = context.CreateTempSymbol();
+  else
+    sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
+
+  streamer.EmitLabel(sectionStart);
+  CIENum++;
+
+  MCSymbol *sectionEnd = context.CreateTempSymbol();
+
+  // Length
+  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
+                                               *sectionEnd, 4);
+  if (verboseAsm) streamer.AddComment("CIE Length");
+  streamer.EmitAbsValue(Length, 4);
+
+  // CIE ID
+  unsigned CIE_ID = IsEH ? 0 : -1;
+  if (verboseAsm) streamer.AddComment("CIE ID Tag");
+  streamer.EmitIntValue(CIE_ID, 4);
+
+  // Version
+  if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
+  streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+
+  // Augmentation String
+  SmallString<8> Augmentation;
+  if (IsEH) {
+    if (verboseAsm) streamer.AddComment("CIE Augmentation");
+    Augmentation += "z";
+    if (personality)
+      Augmentation += "P";
+    if (lsda)
+      Augmentation += "L";
+    Augmentation += "R";
+    streamer.EmitBytes(Augmentation.str(), 0);
+  }
+  streamer.EmitIntValue(0, 1);
+
+  // Code Alignment Factor
+  if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor");
+  streamer.EmitULEB128IntValue(1);
+
+  // Data Alignment Factor
+  if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor");
+  streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+
+  // Return Address Register
+  if (verboseAsm) streamer.AddComment("CIE Return Address Column");
+  streamer.EmitULEB128IntValue(TAI.getDwarfRARegNum(true));
+
+  // Augmentation Data Length (optional)
+
+  unsigned augmentationLength = 0;
+  if (IsEH) {
+    if (personality) {
+      // Personality Encoding
+      augmentationLength += 1;
+      // Personality
+      augmentationLength += getSizeForEncoding(streamer, personalityEncoding);
+    }
+    if (lsda)
+      augmentationLength += 1;
+    // Encoding of the FDE pointers
+    augmentationLength += 1;
+
+    if (verboseAsm) streamer.AddComment("Augmentation Size");
+    streamer.EmitULEB128IntValue(augmentationLength);
+
+    // Augmentation Data (optional)
+    if (personality) {
+      // Personality Encoding
+      EmitEncodingByte(streamer, personalityEncoding,
+                       "Personality Encoding");
+      // Personality
+      if (verboseAsm) streamer.AddComment("Personality");
+      EmitPersonality(streamer, *personality, personalityEncoding);
+    }
+
+    if (lsda)
+      EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
+
+    // Encoding of the FDE pointers
+    EmitEncodingByte(streamer, TAI.getFDEEncoding(UsingCFI),
+                     "FDE Encoding");
+  }
+
+  // Initial Instructions
+
+  const std::vector<MachineMove> &Moves = TAI.getInitialFrameState();
+  std::vector<MCCFIInstruction> Instructions;
+
+  for (int i = 0, n = Moves.size(); i != n; ++i) {
+    MCSymbol *Label = Moves[i].getLabel();
+    const MachineLocation &Dst =
+      TranslateMachineLocation(TAI, Moves[i].getDestination());
+    const MachineLocation &Src =
+      TranslateMachineLocation(TAI, Moves[i].getSource());
+    MCCFIInstruction Inst(Label, Dst, Src);
+    Instructions.push_back(Inst);
+  }
+
+  EmitCFIInstructions(streamer, Instructions, NULL);
+
+  // Padding
+  streamer.EmitValueToAlignment(IsEH
+                                ? 4 : context.getAsmInfo().getPointerSize());
+
+  streamer.EmitLabel(sectionEnd);
+  return *sectionStart;
+}
+
+MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
+                                    const MCSymbol &cieStart,
+                                    const MCDwarfFrameInfo &frame) {
+  MCContext &context = streamer.getContext();
+  MCSymbol *fdeStart = context.CreateTempSymbol();
+  MCSymbol *fdeEnd = context.CreateTempSymbol();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  bool verboseAsm = streamer.isVerboseAsm();
+
+  if (!TAI.isFunctionEHFrameSymbolPrivate() && IsEH) {
+    MCSymbol *EHSym =
+      context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
+    streamer.EmitEHSymAttributes(frame.Function, EHSym);
+    streamer.EmitLabel(EHSym);
+  }
+
+  // Length
+  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+  if (verboseAsm) streamer.AddComment("FDE Length");
+  streamer.EmitAbsValue(Length, 4);
+
+  streamer.EmitLabel(fdeStart);
+
+  // CIE Pointer
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  if (IsEH) {
+    const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
+                                                 0);
+    if (verboseAsm) streamer.AddComment("FDE CIE Offset");
+    streamer.EmitAbsValue(offset, 4);
+  } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) {
+    const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
+                                                 cieStart, 0);
+    streamer.EmitAbsValue(offset, 4);
+  } else {
+    streamer.EmitSymbolValue(&cieStart, 4);
+  }
+
+  unsigned fdeEncoding = TAI.getFDEEncoding(UsingCFI);
+  unsigned size = getSizeForEncoding(streamer, fdeEncoding);
+
+  // PC Begin
+  unsigned PCBeginEncoding = IsEH ? fdeEncoding :
+    (unsigned)dwarf::DW_EH_PE_absptr;
+  unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding);
+  EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location");
+
+  // PC Range
+  const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
+                                              *frame.End, 0);
+  if (verboseAsm) streamer.AddComment("FDE address range");
+  streamer.EmitAbsValue(Range, size);
+
+  if (IsEH) {
+    // Augmentation Data Length
+    unsigned augmentationLength = 0;
+
+    if (frame.Lsda)
+      augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
+
+    if (verboseAsm) streamer.AddComment("Augmentation size");
+    streamer.EmitULEB128IntValue(augmentationLength);
+
+    // Augmentation Data
+    if (frame.Lsda)
+      EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
+                 "Language Specific Data Area");
+  }
+
+  // Call Frame Instructions
+
+  EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+
+  // Padding
+  streamer.EmitValueToAlignment(PCBeginSize);
+
+  return fdeEnd;
+}
+
+namespace {
+  struct CIEKey {
+    static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); }
+    static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); }
+
+    CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
+           unsigned LsdaEncoding_) : Personality(Personality_),
+                                     PersonalityEncoding(PersonalityEncoding_),
+                                     LsdaEncoding(LsdaEncoding_) {
+    }
+    const MCSymbol* Personality;
+    unsigned PersonalityEncoding;
+    unsigned LsdaEncoding;
+  };
+}
+
+namespace llvm {
+  template <>
+  struct DenseMapInfo<CIEKey> {
+    static CIEKey getEmptyKey() {
+      return CIEKey::getEmptyKey();
+    }
+    static CIEKey getTombstoneKey() {
+      return CIEKey::getTombstoneKey();
+    }
+    static unsigned getHashValue(const CIEKey &Key) {
+      FoldingSetNodeID ID;
+      ID.AddPointer(Key.Personality);
+      ID.AddInteger(Key.PersonalityEncoding);
+      ID.AddInteger(Key.LsdaEncoding);
+      return ID.ComputeHash();
+    }
+    static bool isEqual(const CIEKey &LHS,
+                        const CIEKey &RHS) {
+      return LHS.Personality == RHS.Personality &&
+        LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+        LHS.LsdaEncoding == RHS.LsdaEncoding;
+    }
+  };
+}
+
+void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
+                               bool UsingCFI,
+                               bool IsEH) {
+  MCContext &Context = Streamer.getContext();
+  const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+  const MCSection &Section = IsEH ? *TAI.getEHFrameSection() :
+                                    *TAI.getDwarfFrameSection();
+  Streamer.SwitchSection(&Section);
+  MCSymbol *SectionStart = Context.CreateTempSymbol();
+  Streamer.EmitLabel(SectionStart);
+
+  MCSymbol *FDEEnd = NULL;
+  DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+  FrameEmitterImpl Emitter(UsingCFI, IsEH, SectionStart);
+
+  const MCSymbol *DummyDebugKey = NULL;
+  for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
+    const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+    if (IsEH && TAI.getCompactUnwindSection() &&
+        Emitter.EmitCompactUnwind(Streamer, Frame)) {
+      FDEEnd = NULL;
+      continue;
+    }
+
+    CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
+               Frame.LsdaEncoding);
+    const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
+    if (!CIEStart)
+      CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
+                                  Frame.PersonalityEncoding, Frame.Lsda,
+                                  Frame.LsdaEncoding);
+
+    FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
+
+    if (i != n - 1)
+      Streamer.EmitLabel(FDEEnd);
+  }
+
+  Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+  if (FDEEnd)
+    Streamer.EmitLabel(FDEEnd);
+}
+
+void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
+                                         uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
+  Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
+                                           raw_ostream &OS) {
+  // FIXME: Assumes the code alignment factor is 1.
+  if (AddrDelta == 0) {
+  } else if (isUIntN(6, AddrDelta)) {
+    uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
+    OS << Opcode;
+  } else if (isUInt<8>(AddrDelta)) {
+    OS << uint8_t(dwarf::DW_CFA_advance_loc1);
+    OS << uint8_t(AddrDelta);
+  } else if (isUInt<16>(AddrDelta)) {
+    // FIXME: check what is the correct behavior on a big endian machine.
+    OS << uint8_t(dwarf::DW_CFA_advance_loc2);
+    OS << uint8_t( AddrDelta       & 0xff);
+    OS << uint8_t((AddrDelta >> 8) & 0xff);
+  } else {
+    // FIXME: check what is the correct behavior on a big endian machine.
+    assert(isUInt<32>(AddrDelta));
+    OS << uint8_t(dwarf::DW_CFA_advance_loc4);
+    OS << uint8_t( AddrDelta        & 0xff);
+    OS << uint8_t((AddrDelta >> 8)  & 0xff);
+    OS << uint8_t((AddrDelta >> 16) & 0xff);
+    OS << uint8_t((AddrDelta >> 24) & 0xff);
+
+  }
+}
diff --git a/contrib/llvm/lib/MC/MCELF.cpp b/contrib/llvm/lib/MC/MCELF.cpp
new file mode 100644
index 000000000000..2c3f8e8f7866
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELF.cpp
@@ -0,0 +1,72 @@
+//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+namespace llvm {
+
+void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+  SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+
+unsigned MCELF::GetBinding(const MCSymbolData &SD) {
+  uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  return Binding;
+}
+
+void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
+  SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
+}
+
+unsigned MCELF::GetType(const MCSymbolData &SD) {
+  uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+  return Type;
+}
+
+void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0x3 << ELF_STV_Shift);
+  SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+}
+
+unsigned MCELF::GetVisibility(MCSymbolData &SD) {
+  unsigned Visibility =
+    (SD.getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+  return Visibility;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCELF.h b/contrib/llvm/lib/MC/MCELF.h
new file mode 100644
index 000000000000..e08f1e65429a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELF.h
@@ -0,0 +1,35 @@
+//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some support functions used by the ELF Streamer and
+// ObjectWriter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELF_H
+#define LLVM_MC_MCELF_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCSymbolData;
+
+class MCELF {
+ public:
+  static void SetBinding(MCSymbolData &SD, unsigned Binding);
+  static unsigned GetBinding(const MCSymbolData &SD);
+  static void SetType(MCSymbolData &SD, unsigned Type);
+  static unsigned GetType(const MCSymbolData &SD);
+  static void SetVisibility(MCSymbolData &SD, unsigned Visibility);
+  static unsigned GetVisibility(MCSymbolData &SD);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
new file mode 100644
index 000000000000..12a02a9e9740
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -0,0 +1,23 @@
+//===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
+                                                 Triple::OSType OSType_,
+                                                 uint16_t EMachine_,
+                                                 bool HasRelocationAddend_)
+  : OSType(OSType_), EMachine(EMachine_),
+    HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {
+}
+
+MCELFObjectTargetWriter::~MCELFObjectTargetWriter() {
+}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
new file mode 100644
index 000000000000..49340edbed5e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -0,0 +1,386 @@
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCELFStreamer.h"
+#include "MCELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+void MCELFStreamer::InitSections() {
+  // This emulates the same behavior of GNU as. This makes it easier
+  // to compare the output as the major sections are in the same order.
+  SetSectionText();
+  SetSectionData();
+  SetSectionBss();
+  SetSectionText();
+}
+
+void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCObjectStreamer::EmitLabel(Symbol);
+
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(Symbol->getSection());
+  MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+  if (Section.getFlags() & ELF::SHF_TLS)
+    MCELF::SetType(SD, ELF::STT_TLS);
+}
+
+void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  switch (Flag) {
+  case MCAF_SyntaxUnified: return; // no-op here.
+  case MCAF_Code16: return; // no-op here.
+  case MCAF_Code32: return; // no-op here.
+  case MCAF_SubsectionsViaSymbols:
+    getAssembler().setSubsectionsViaSymbols(true);
+    return;
+  }
+
+  assert(0 && "invalid assembler flag!");
+}
+
+void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
+  // FIXME: Anything needed here to flag the function as thumb?
+
+  getAssembler().setIsThumbFunc(Func);
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
+  SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+}
+
+void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  // FIXME: Lift context changes into super class.
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCELFStreamer::ChangeSection(const MCSection *Section) {
+  const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
+  if (Grp)
+    getAssembler().getOrCreateSymbolData(*Grp);
+  this->MCObjectStreamer::ChangeSection(Section);
+}
+
+void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  MCSymbolData &AliasSD = getAssembler().getOrCreateSymbolData(*Alias);
+  AliasSD.setFlags(AliasSD.getFlags() | ELF_Other_Weakref);
+  const MCExpr *Value = MCSymbolRefExpr::Create(Symbol, getContext());
+  Alias->setVariableValue(Value);
+}
+
+void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          MCSymbolAttr Attribute) {
+  // Indirect symbols are handled differently, to match how 'as' handles
+  // them. This makes writing matching .o files easier.
+  if (Attribute == MCSA_IndirectSymbol) {
+    // Note that we intentionally cannot use the symbol data here; this is
+    // important for matching the string table that 'as' generates.
+    IndirectSymbolData ISD;
+    ISD.Symbol = Symbol;
+    ISD.SectionData = getCurrentSectionData();
+    getAssembler().getIndirectSymbols().push_back(ISD);
+    return;
+  }
+
+  // Adding a symbol attribute always introduces the symbol, note that an
+  // important side effect of calling getOrCreateSymbolData here is to register
+  // the symbol with the assembler.
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // The implementation of symbol attributes is designed to match 'as', but it
+  // leaves much to desired. It doesn't really make sense to arbitrarily add and
+  // remove flags, but 'as' allows this (in particular, see .desc).
+  //
+  // In the future it might be worth trying to make these operations more well
+  // defined.
+  switch (Attribute) {
+  case MCSA_LazyReference:
+  case MCSA_Reference:
+  case MCSA_NoDeadStrip:
+  case MCSA_SymbolResolver:
+  case MCSA_PrivateExtern:
+  case MCSA_WeakDefinition:
+  case MCSA_WeakDefAutoPrivate:
+  case MCSA_Invalid:
+  case MCSA_ELF_TypeIndFunction:
+  case MCSA_IndirectSymbol:
+    assert(0 && "Invalid symbol attribute for ELF!");
+    break;
+
+  case MCSA_ELF_TypeGnuUniqueObject:
+    // Ignore for now.
+    break;
+
+  case MCSA_Global:
+    MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+    SD.setExternal(true);
+    BindingExplicitlySet.insert(Symbol);
+    break;
+
+  case MCSA_WeakReference:
+  case MCSA_Weak:
+    MCELF::SetBinding(SD, ELF::STB_WEAK);
+    SD.setExternal(true);
+    BindingExplicitlySet.insert(Symbol);
+    break;
+
+  case MCSA_Local:
+    MCELF::SetBinding(SD, ELF::STB_LOCAL);
+    SD.setExternal(false);
+    BindingExplicitlySet.insert(Symbol);
+    break;
+
+  case MCSA_ELF_TypeFunction:
+    MCELF::SetType(SD, ELF::STT_FUNC);
+    break;
+
+  case MCSA_ELF_TypeObject:
+    MCELF::SetType(SD, ELF::STT_OBJECT);
+    break;
+
+  case MCSA_ELF_TypeTLS:
+    MCELF::SetType(SD, ELF::STT_TLS);
+    break;
+
+  case MCSA_ELF_TypeCommon:
+    MCELF::SetType(SD, ELF::STT_COMMON);
+    break;
+
+  case MCSA_ELF_TypeNoType:
+    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    break;
+
+  case MCSA_Protected:
+    MCELF::SetVisibility(SD, ELF::STV_PROTECTED);
+    break;
+
+  case MCSA_Hidden:
+    MCELF::SetVisibility(SD, ELF::STV_HIDDEN);
+    break;
+
+  case MCSA_Internal:
+    MCELF::SetVisibility(SD, ELF::STV_INTERNAL);
+    break;
+  }
+}
+
+void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                       unsigned ByteAlignment) {
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  if (!BindingExplicitlySet.count(Symbol)) {
+    MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+    SD.setExternal(true);
+  }
+
+  MCELF::SetType(SD, ELF::STT_OBJECT);
+
+  if (MCELF::GetBinding(SD) == ELF_STB_Local) {
+    const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
+                                                                    ELF::SHT_NOBITS,
+                                                                    ELF::SHF_WRITE |
+                                                                    ELF::SHF_ALLOC,
+                                                                    SectionKind::getBSS());
+    Symbol->setSection(*Section);
+
+    struct LocalCommon L = {&SD, Size, ByteAlignment};
+    LocalCommons.push_back(L);
+  } else {
+    SD.setCommon(Size, ByteAlignment);
+  }
+
+  SD.setSize(MCConstantExpr::Create(Size, getContext()));
+}
+
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  // FIXME: Should this be caught and done earlier?
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  MCELF::SetBinding(SD, ELF::STB_LOCAL);
+  SD.setExternal(false);
+  BindingExplicitlySet.insert(Symbol);
+  // FIXME: ByteAlignment is not needed here, but is required.
+  EmitCommonSymbol(Symbol, Size, 1);
+}
+
+void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                           int64_t Value, unsigned ValueSize,
+                                           unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                        unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+// Add a symbol for the file name of this module. This is the second
+// entry in the module's symbol table (the first being the null symbol).
+void MCELFStreamer::EmitFileDirective(StringRef Filename) {
+  MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
+  Symbol->setSection(*getCurrentSection());
+  Symbol->setAbsolute();
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
+}
+
+void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+  switch (expr->getKind()) {
+  case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+    fixSymbolsInTLSFixups(be->getLHS());
+    fixSymbolsInTLSFixups(be->getRHS());
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+    switch (symRef.getKind()) {
+    default:
+      return;
+    case MCSymbolRefExpr::VK_GOTTPOFF:
+    case MCSymbolRefExpr::VK_INDNTPOFF:
+    case MCSymbolRefExpr::VK_NTPOFF:
+    case MCSymbolRefExpr::VK_GOTNTPOFF:
+    case MCSymbolRefExpr::VK_TLSGD:
+    case MCSymbolRefExpr::VK_TLSLD:
+    case MCSymbolRefExpr::VK_TLSLDM:
+    case MCSymbolRefExpr::VK_TPOFF:
+    case MCSymbolRefExpr::VK_DTPOFF:
+    case MCSymbolRefExpr::VK_ARM_TLSGD:
+    case MCSymbolRefExpr::VK_ARM_TPOFF:
+    case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+      break;
+    }
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
+    MCELF::SetType(SD, ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+    break;
+  }
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+  this->MCObjectStreamer::EmitInstToFragment(Inst);
+  MCInstFragment &F = *cast<MCInstFragment>(getCurrentFragment());
+
+  for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i)
+    fixSymbolsInTLSFixups(F.getFixups()[i].getValue());
+}
+
+void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+    fixSymbolsInTLSFixups(Fixups[i].getValue());
+
+  // Add the fixups and data.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
+  }
+  DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCELFStreamer::Finish() {
+  EmitFrames(true);
+
+  for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
+                                                e = LocalCommons.end();
+       i != e; ++i) {
+    MCSymbolData *SD = i->SD;
+    uint64_t Size = i->Size;
+    unsigned ByteAlignment = i->ByteAlignment;
+    const MCSymbol &Symbol = SD->getSymbol();
+    const MCSection &Section = Symbol.getSection();
+
+    MCSectionData &SectData = getAssembler().getOrCreateSectionData(Section);
+    new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &SectData);
+
+    MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+    SD->setFragment(F);
+
+    // Update the maximum alignment of the section if necessary.
+    if (ByteAlignment > SectData.getAlignment())
+      SectData.setAlignment(ByteAlignment);
+  }
+
+  this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                    raw_ostream &OS, MCCodeEmitter *CE,
+                                    bool RelaxAll, bool NoExecStack) {
+  MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
+  if (RelaxAll)
+    S->getAssembler().setRelaxAll(true);
+  if (NoExecStack)
+    S->getAssembler().setNoExecStack(true);
+  return S;
+}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.h b/contrib/llvm/lib/MC/MCELFStreamer.h
new file mode 100644
index 000000000000..855e7e9ca60f
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFStreamer.h
@@ -0,0 +1,140 @@
+//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+class MCELFStreamer : public MCObjectStreamer {
+public:
+  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                raw_ostream &OS, MCCodeEmitter *Emitter,
+                MCAssembler *Assembler)
+    : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
+
+
+  ~MCELFStreamer() {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+     SD.setSize(Value);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+  /// @}
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+  }
+
+  void SetSectionData() {
+    SetSection(".data", ELF::SHT_PROGBITS,
+               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionText() {
+    SetSection(".text", ELF::SHT_PROGBITS,
+               ELF::SHF_EXECINSTR |
+               ELF::SHF_ALLOC, SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionBss() {
+    SetSection(".bss", ELF::SHT_NOBITS,
+               ELF::SHF_WRITE |
+               ELF::SHF_ALLOC, SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
new file mode 100644
index 000000000000..fcf1aabb5a86
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -0,0 +1,605 @@
+//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcexpr"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
+}
+}
+
+void MCExpr::print(raw_ostream &OS) const {
+  switch (getKind()) {
+  case MCExpr::Target:
+    return cast<MCTargetExpr>(this)->PrintImpl(OS);
+  case MCExpr::Constant:
+    OS << cast<MCConstantExpr>(*this).getValue();
+    return;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
+    const MCSymbol &Sym = SRE.getSymbol();
+    // Parenthesize names that start with $ so that they don't look like
+    // absolute names.
+    bool UseParens = Sym.getName()[0] == '$';
+
+    if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_HA16 ||
+        SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_LO16) {
+      OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+      UseParens = true;
+    }
+
+    if (UseParens)
+      OS << '(' << Sym << ')';
+    else
+      OS << Sym;
+
+    if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF)
+      OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+    else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+             SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 &&
+             SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_LO16)
+      OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+
+    return;
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
+    switch (UE.getOpcode()) {
+    default: assert(0 && "Invalid opcode!");
+    case MCUnaryExpr::LNot:  OS << '!'; break;
+    case MCUnaryExpr::Minus: OS << '-'; break;
+    case MCUnaryExpr::Not:   OS << '~'; break;
+    case MCUnaryExpr::Plus:  OS << '+'; break;
+    }
+    OS << *UE.getSubExpr();
+    return;
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
+
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
+      OS << *BE.getLHS();
+    } else {
+      OS << '(' << *BE.getLHS() << ')';
+    }
+
+    switch (BE.getOpcode()) {
+    default: assert(0 && "Invalid opcode!");
+    case MCBinaryExpr::Add:
+      // Print "X-42" instead of "X+-42".
+      if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
+        if (RHSC->getValue() < 0) {
+          OS << RHSC->getValue();
+          return;
+        }
+      }
+
+      OS <<  '+';
+      break;
+    case MCBinaryExpr::And:  OS <<  '&'; break;
+    case MCBinaryExpr::Div:  OS <<  '/'; break;
+    case MCBinaryExpr::EQ:   OS << "=="; break;
+    case MCBinaryExpr::GT:   OS <<  '>'; break;
+    case MCBinaryExpr::GTE:  OS << ">="; break;
+    case MCBinaryExpr::LAnd: OS << "&&"; break;
+    case MCBinaryExpr::LOr:  OS << "||"; break;
+    case MCBinaryExpr::LT:   OS <<  '<'; break;
+    case MCBinaryExpr::LTE:  OS << "<="; break;
+    case MCBinaryExpr::Mod:  OS <<  '%'; break;
+    case MCBinaryExpr::Mul:  OS <<  '*'; break;
+    case MCBinaryExpr::NE:   OS << "!="; break;
+    case MCBinaryExpr::Or:   OS <<  '|'; break;
+    case MCBinaryExpr::Shl:  OS << "<<"; break;
+    case MCBinaryExpr::Shr:  OS << ">>"; break;
+    case MCBinaryExpr::Sub:  OS <<  '-'; break;
+    case MCBinaryExpr::Xor:  OS <<  '^'; break;
+    }
+
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
+      OS << *BE.getRHS();
+    } else {
+      OS << '(' << *BE.getRHS() << ')';
+    }
+    return;
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+}
+
+void MCExpr::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+/* *** */
+
+const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS,
+                                         const MCExpr *RHS, MCContext &Ctx) {
+  return new (Ctx) MCBinaryExpr(Opc, LHS, RHS);
+}
+
+const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr,
+                                       MCContext &Ctx) {
+  return new (Ctx) MCUnaryExpr(Opc, Expr);
+}
+
+const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
+  return new (Ctx) MCConstantExpr(Value);
+}
+
+/* *** */
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
+                                               VariantKind Kind,
+                                               MCContext &Ctx) {
+  return new (Ctx) MCSymbolRefExpr(Sym, Kind);
+}
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
+                                               MCContext &Ctx) {
+  return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx);
+}
+
+StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
+  switch (Kind) {
+  default:
+  case VK_Invalid: return "<<invalid>>";
+  case VK_None: return "<<none>>";
+
+  case VK_GOT: return "GOT";
+  case VK_GOTOFF: return "GOTOFF";
+  case VK_GOTPCREL: return "GOTPCREL";
+  case VK_GOTTPOFF: return "GOTTPOFF";
+  case VK_INDNTPOFF: return "INDNTPOFF";
+  case VK_NTPOFF: return "NTPOFF";
+  case VK_GOTNTPOFF: return "GOTNTPOFF";
+  case VK_PLT: return "PLT";
+  case VK_TLSGD: return "TLSGD";
+  case VK_TLSLD: return "TLSLD";
+  case VK_TLSLDM: return "TLSLDM";
+  case VK_TPOFF: return "TPOFF";
+  case VK_DTPOFF: return "DTPOFF";
+  case VK_TLVP: return "TLVP";
+  case VK_ARM_PLT: return "(PLT)";
+  case VK_ARM_GOT: return "(GOT)";
+  case VK_ARM_GOTOFF: return "(GOTOFF)";
+  case VK_ARM_TPOFF: return "(tpoff)";
+  case VK_ARM_GOTTPOFF: return "(gottpoff)";
+  case VK_ARM_TLSGD: return "(tlsgd)";
+  case VK_PPC_TOC: return "toc";
+  case VK_PPC_DARWIN_HA16: return "ha16";
+  case VK_PPC_DARWIN_LO16: return "lo16";
+  case VK_PPC_GAS_HA16: return "ha";
+  case VK_PPC_GAS_LO16: return "l";
+  }
+}
+
+MCSymbolRefExpr::VariantKind
+MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
+  return StringSwitch<VariantKind>(Name)
+    .Case("GOT", VK_GOT)
+    .Case("got", VK_GOT)
+    .Case("GOTOFF", VK_GOTOFF)
+    .Case("gotoff", VK_GOTOFF)
+    .Case("GOTPCREL", VK_GOTPCREL)
+    .Case("gotpcrel", VK_GOTPCREL)
+    .Case("GOTTPOFF", VK_GOTTPOFF)
+    .Case("gottpoff", VK_GOTTPOFF)
+    .Case("INDNTPOFF", VK_INDNTPOFF)
+    .Case("indntpoff", VK_INDNTPOFF)
+    .Case("NTPOFF", VK_NTPOFF)
+    .Case("ntpoff", VK_NTPOFF)
+    .Case("GOTNTPOFF", VK_GOTNTPOFF)
+    .Case("gotntpoff", VK_GOTNTPOFF)
+    .Case("PLT", VK_PLT)
+    .Case("plt", VK_PLT)
+    .Case("TLSGD", VK_TLSGD)
+    .Case("tlsgd", VK_TLSGD)
+    .Case("TLSLD", VK_TLSLD)
+    .Case("tlsld", VK_TLSLD)
+    .Case("TLSLDM", VK_TLSLDM)
+    .Case("tlsldm", VK_TLSLDM)
+    .Case("TPOFF", VK_TPOFF)
+    .Case("tpoff", VK_TPOFF)
+    .Case("DTPOFF", VK_DTPOFF)
+    .Case("dtpoff", VK_DTPOFF)
+    .Case("TLVP", VK_TLVP)
+    .Case("tlvp", VK_TLVP)
+    .Default(VK_Invalid);
+}
+
+/* *** */
+
+void MCTargetExpr::Anchor() {}
+
+/* *** */
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
+  return EvaluateAsAbsolute(Res, 0, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+                                const MCAsmLayout &Layout) const {
+  return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+                                const MCAsmLayout &Layout,
+                                const SectionAddrMap &Addrs) const {
+  return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
+  return EvaluateAsAbsolute(Res, &Asm, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+                                const MCAsmLayout *Layout,
+                                const SectionAddrMap *Addrs) const {
+  MCValue Value;
+
+  // Fast path constants.
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) {
+    Res = CE->getValue();
+    return true;
+  }
+
+  // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
+  // absolutize differences across sections and that is what the MachO writer
+  // uses Addrs for.
+  bool IsRelocatable =
+    EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs);
+
+  // Record the current value.
+  Res = Value.getConstant();
+
+  return IsRelocatable && Value.isAbsolute();
+}
+
+/// \brief Helper method for \see EvaluateSymbolAdd().
+static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+                                                const MCAsmLayout *Layout,
+                                                const SectionAddrMap *Addrs,
+                                                bool InSet,
+                                                const MCSymbolRefExpr *&A,
+                                                const MCSymbolRefExpr *&B,
+                                                int64_t &Addend) {
+  if (!A || !B)
+    return;
+
+  const MCSymbol &SA = A->getSymbol();
+  const MCSymbol &SB = B->getSymbol();
+
+  if (SA.isUndefined() || SB.isUndefined())
+    return;
+
+  if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+    return;
+
+  MCSymbolData &AD = Asm->getSymbolData(SA);
+  MCSymbolData &BD = Asm->getSymbolData(SB);
+
+  if (AD.getFragment() == BD.getFragment()) {
+    Addend += (AD.getOffset() - BD.getOffset());
+
+    // Pointers to Thumb symbols need to have their low-bit set to allow
+    // for interworking.
+    if (Asm->isThumbFunc(&SA))
+      Addend |= 1;
+
+    // Clear the symbol expr pointers to indicate we have folded these
+    // operands.
+    A = B = 0;
+    return;
+  }
+
+  if (!Layout)
+    return;
+
+  const MCSectionData &SecA = *AD.getFragment()->getParent();
+  const MCSectionData &SecB = *BD.getFragment()->getParent();
+
+  if ((&SecA != &SecB) && !Addrs)
+    return;
+
+  // Eagerly evaluate.
+  Addend += (Layout->getSymbolOffset(&Asm->getSymbolData(A->getSymbol())) -
+             Layout->getSymbolOffset(&Asm->getSymbolData(B->getSymbol())));
+  if (Addrs && (&SecA != &SecB))
+    Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+
+  // Clear the symbol expr pointers to indicate we have folded these
+  // operands.
+  A = B = 0;
+}
+
+/// \brief Evaluate the result of an add between (conceptually) two MCValues.
+///
+/// This routine conceptually attempts to construct an MCValue:
+///   Result = (Result_A - Result_B + Result_Cst)
+/// from two MCValue's LHS and RHS where
+///   Result = LHS + RHS
+/// and
+///   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+///
+/// This routine attempts to aggresively fold the operands such that the result
+/// is representable in an MCValue, but may not always succeed.
+///
+/// \returns True on success, false if the result is not representable in an
+/// MCValue.
+
+/// NOTE: It is really important to have both the Asm and Layout arguments.
+/// They might look redundant, but this function can be used before layout
+/// is done (see the object streamer for example) and having the Asm argument
+/// lets us avoid relaxations early.
+static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
+                                const MCAsmLayout *Layout,
+                                const SectionAddrMap *Addrs,
+                                bool InSet,
+                                const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+                                const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
+                                MCValue &Res) {
+  // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
+  // about dealing with modifiers. This will ultimately bite us, one day.
+  const MCSymbolRefExpr *LHS_A = LHS.getSymA();
+  const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+  int64_t LHS_Cst = LHS.getConstant();
+
+  // Fold the result constant immediately.
+  int64_t Result_Cst = LHS_Cst + RHS_Cst;
+
+  assert((!Layout || Asm) &&
+         "Must have an assembler object if layout is given!");
+
+  // If we have a layout, we can fold resolved differences.
+  if (Asm) {
+    // First, fold out any differences which are fully resolved. By
+    // reassociating terms in
+    //   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+    // we have the four possible differences:
+    //   (LHS_A - LHS_B),
+    //   (LHS_A - RHS_B),
+    //   (RHS_A - LHS_B),
+    //   (RHS_A - RHS_B).
+    // Since we are attempting to be as aggressive as possible about folding, we
+    // attempt to evaluate each possible alternative.
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B,
+                                        Result_Cst);
+  }
+
+  // We can't represent the addition or subtraction of two symbols.
+  if ((LHS_A && RHS_A) || (LHS_B && RHS_B))
+    return false;
+
+  // At this point, we have at most one additive symbol and one subtractive
+  // symbol -- find them.
+  const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
+  const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
+
+  // If we have a negated symbol, then we must have also have a non-negated
+  // symbol in order to encode the expression.
+  if (B && !A)
+    return false;
+
+  Res = MCValue::get(A, B, Result_Cst);
+  return true;
+}
+
+bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
+                                   const MCAsmLayout &Layout) const {
+  return EvaluateAsRelocatableImpl(Res, &Layout.getAssembler(), &Layout,
+                                   0, false);
+}
+
+bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                       const MCAssembler *Asm,
+                                       const MCAsmLayout *Layout,
+                                       const SectionAddrMap *Addrs,
+                                       bool InSet) const {
+  ++stats::MCExprEvaluate;
+
+  switch (getKind()) {
+  case Target:
+    return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
+
+  case Constant:
+    Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
+    return true;
+
+  case SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+    const MCSymbol &Sym = SRE->getSymbol();
+
+    // Evaluate recursively if this is a variable.
+    if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+      bool Ret = Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm,
+                                                                   Layout,
+                                                                   Addrs,
+                                                                   true);
+      // If we failed to simplify this to a constant, let the target
+      // handle it.
+      if (Ret && !Res.getSymA() && !Res.getSymB())
+        return true;
+    }
+
+    Res = MCValue::get(SRE, 0, 0);
+    return true;
+  }
+
+  case Unary: {
+    const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
+    MCValue Value;
+
+    if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
+                                                      Addrs, InSet))
+      return false;
+
+    switch (AUE->getOpcode()) {
+    case MCUnaryExpr::LNot:
+      if (!Value.isAbsolute())
+        return false;
+      Res = MCValue::get(!Value.getConstant());
+      break;
+    case MCUnaryExpr::Minus:
+      /// -(a - b + const) ==> (b - a - const)
+      if (Value.getSymA() && !Value.getSymB())
+        return false;
+      Res = MCValue::get(Value.getSymB(), Value.getSymA(),
+                         -Value.getConstant());
+      break;
+    case MCUnaryExpr::Not:
+      if (!Value.isAbsolute())
+        return false;
+      Res = MCValue::get(~Value.getConstant());
+      break;
+    case MCUnaryExpr::Plus:
+      Res = Value;
+      break;
+    }
+
+    return true;
+  }
+
+  case Binary: {
+    const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
+    MCValue LHSValue, RHSValue;
+
+    if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
+                                                  Addrs, InSet) ||
+        !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
+                                                  Addrs, InSet))
+      return false;
+
+    // We only support a few operations on non-constant expressions, handle
+    // those first.
+    if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
+      switch (ABE->getOpcode()) {
+      default:
+        return false;
+      case MCBinaryExpr::Sub:
+        // Negate RHS and add.
+        return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+                                   RHSValue.getSymB(), RHSValue.getSymA(),
+                                   -RHSValue.getConstant(),
+                                   Res);
+
+      case MCBinaryExpr::Add:
+        return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+                                   RHSValue.getSymA(), RHSValue.getSymB(),
+                                   RHSValue.getConstant(),
+                                   Res);
+      }
+    }
+
+    // FIXME: We need target hooks for the evaluation. It may be limited in
+    // width, and gas defines the result of comparisons and right shifts
+    // differently from Apple as.
+    int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
+    int64_t Result = 0;
+    switch (ABE->getOpcode()) {
+    case MCBinaryExpr::Add:  Result = LHS + RHS; break;
+    case MCBinaryExpr::And:  Result = LHS & RHS; break;
+    case MCBinaryExpr::Div:  Result = LHS / RHS; break;
+    case MCBinaryExpr::EQ:   Result = LHS == RHS; break;
+    case MCBinaryExpr::GT:   Result = LHS > RHS; break;
+    case MCBinaryExpr::GTE:  Result = LHS >= RHS; break;
+    case MCBinaryExpr::LAnd: Result = LHS && RHS; break;
+    case MCBinaryExpr::LOr:  Result = LHS || RHS; break;
+    case MCBinaryExpr::LT:   Result = LHS < RHS; break;
+    case MCBinaryExpr::LTE:  Result = LHS <= RHS; break;
+    case MCBinaryExpr::Mod:  Result = LHS % RHS; break;
+    case MCBinaryExpr::Mul:  Result = LHS * RHS; break;
+    case MCBinaryExpr::NE:   Result = LHS != RHS; break;
+    case MCBinaryExpr::Or:   Result = LHS | RHS; break;
+    case MCBinaryExpr::Shl:  Result = LHS << RHS; break;
+    case MCBinaryExpr::Shr:  Result = LHS >> RHS; break;
+    case MCBinaryExpr::Sub:  Result = LHS - RHS; break;
+    case MCBinaryExpr::Xor:  Result = LHS ^ RHS; break;
+    }
+
+    Res = MCValue::get(Result);
+    return true;
+  }
+  }
+
+  assert(0 && "Invalid assembly expression kind!");
+  return false;
+}
+
+const MCSection *MCExpr::FindAssociatedSection() const {
+  switch (getKind()) {
+  case Target:
+    // We never look through target specific expressions.
+    return cast<MCTargetExpr>(this)->FindAssociatedSection();
+
+  case Constant:
+    return MCSymbol::AbsolutePseudoSection;
+
+  case SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+    const MCSymbol &Sym = SRE->getSymbol();
+
+    if (Sym.isDefined())
+      return &Sym.getSection();
+
+    return 0;
+  }
+
+  case Unary:
+    return cast<MCUnaryExpr>(this)->getSubExpr()->FindAssociatedSection();
+
+  case Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
+    const MCSection *LHS_S = BE->getLHS()->FindAssociatedSection();
+    const MCSection *RHS_S = BE->getRHS()->FindAssociatedSection();
+
+    // If either section is absolute, return the other.
+    if (LHS_S == MCSymbol::AbsolutePseudoSection)
+      return RHS_S;
+    if (RHS_S == MCSymbol::AbsolutePseudoSection)
+      return LHS_S;
+
+    // Otherwise, return the first non-null section.
+    return LHS_S ? LHS_S : RHS_S;
+  }
+  }
+
+  assert(0 && "Invalid assembly expression kind!");
+  return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
new file mode 100644
index 000000000000..4cb628b395c3
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -0,0 +1,66 @@
+//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  OS << "<MCOperand ";
+  if (!isValid())
+    OS << "INVALID";
+  else if (isReg())
+    OS << "Reg:" << getReg();
+  else if (isImm())
+    OS << "Imm:" << getImm();
+  else if (isExpr()) {
+    OS << "Expr:(" << *getExpr() << ")";
+  } else
+    OS << "UNDEFINED";
+  OS << ">";
+}
+
+void MCOperand::dump() const {
+  print(dbgs(), 0);
+  dbgs() << "\n";
+}
+
+void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  OS << "<MCInst " << getOpcode();
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    OS << " ";
+    getOperand(i).print(OS, MAI);
+  }
+  OS << ">";
+}
+
+void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
+                         const MCInstPrinter *Printer,
+                         StringRef Separator) const {
+  OS << "<MCInst #" << getOpcode();
+
+  // Show the instruction opcode name if we have access to a printer.
+  if (Printer)
+    OS << ' ' << Printer->getOpcodeName(getOpcode());
+
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    OS << Separator;
+    getOperand(i).print(OS, MAI);
+  }
+  OS << ">";
+}
+
+void MCInst::dump() const {
+  print(dbgs(), 0);
+  dbgs() << "\n";
+}
diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp
new file mode 100644
index 000000000000..81a939ffaaef
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp
@@ -0,0 +1,25 @@
+//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
+using namespace llvm;
+
+MCInstPrinter::~MCInstPrinter() {
+}
+
+/// getOpcodeName - Return the name of the specified opcode enum (e.g.
+/// "MOV32ri") or empty if we can't resolve it.
+StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return "";
+}
+
+void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  assert(0 && "Target should implement this");
+}
diff --git a/contrib/llvm/lib/MC/MCLabel.cpp b/contrib/llvm/lib/MC/MCLabel.cpp
new file mode 100644
index 000000000000..9c0fc92e6c05
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCLabel.cpp
@@ -0,0 +1,21 @@
+//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCLabel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCLabel::print(raw_ostream &OS) const {
+  OS << '"' << getInstance() << '"';
+}
+
+void MCLabel::dump() const {
+  print(dbgs());
+}
diff --git a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp
new file mode 100644
index 000000000000..309752ec5f02
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp
@@ -0,0 +1,249 @@
+//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+class MCLoggingStreamer : public MCStreamer {
+  llvm::OwningPtr<MCStreamer> Child;
+  
+  raw_ostream &OS;
+
+public:
+  MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS)
+    : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {}
+
+  void LogCall(const char *Function) {
+    OS << Function << "\n";
+  }
+
+  void LogCall(const char *Function, const Twine &Message) {
+    OS << Function << ": " << Message << "\n";
+  }
+
+  virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); }
+  
+  virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); }
+
+  virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); }
+
+  virtual void AddComment(const Twine &T) {
+    LogCall("AddComment", T);
+    return Child->AddComment(T);
+  }
+
+  virtual void AddBlankLine() {
+    LogCall("AddBlankLine");
+    return Child->AddBlankLine();
+  }
+
+  virtual void ChangeSection(const MCSection *Section) {
+    LogCall("ChangeSection");
+    return Child->ChangeSection(Section);
+  }
+
+  virtual void InitSections() {
+    LogCall("InitSections");
+    return Child->InitSections();
+  }
+
+  virtual void EmitLabel(MCSymbol *Symbol) {
+    LogCall("EmitLabel");
+    return Child->EmitLabel(Symbol);
+  }
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+    LogCall("EmitAssemblerFlag");
+    return Child->EmitAssemblerFlag(Flag);
+  }
+
+  virtual void EmitThumbFunc(MCSymbol *Func) {
+    LogCall("EmitThumbFunc");
+    return Child->EmitThumbFunc(Func);
+  }
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+    LogCall("EmitAssignment");
+    return Child->EmitAssignment(Symbol, Value);
+  }
+
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+    LogCall("EmitWeakReference");
+    return Child->EmitWeakReference(Alias, Symbol);
+  }
+
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize) {
+    LogCall("EmitDwarfAdvanceLineAddr");
+    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+                                           PointerSize);
+  }
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+    LogCall("EmitSymbolAttribute");
+    return Child->EmitSymbolAttribute(Symbol, Attribute);
+  }
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    LogCall("EmitSymbolDesc");
+    return Child->EmitSymbolDesc(Symbol, DescValue);
+  }
+
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    LogCall("BeginCOFFSymbolDef");
+    return Child->BeginCOFFSymbolDef(Symbol);
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    LogCall("EmitCOFFSymbolStorageClass");
+    return Child->EmitCOFFSymbolStorageClass(StorageClass);
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    LogCall("EmitCOFFSymbolType");
+    return Child->EmitCOFFSymbolType(Type);
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    LogCall("EndCOFFSymbolDef");
+    return Child->EndCOFFSymbolDef();
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    LogCall("EmitELFSize");
+    return Child->EmitELFSize(Symbol, Value);
+  }
+
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment) {
+    LogCall("EmitCommonSymbol");
+    return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    LogCall("EmitLocalCommonSymbol");
+    return Child->EmitLocalCommonSymbol(Symbol, Size);
+  }
+  
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    LogCall("EmitZerofill");
+    return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+                               uint64_t Size, unsigned ByteAlignment = 0) {
+    LogCall("EmitTBSSSymbol");
+    return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+    LogCall("EmitBytes");
+    return Child->EmitBytes(Data, AddrSpace);
+  }
+
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace){
+    LogCall("EmitValue");
+    return Child->EmitValueImpl(Value, Size, AddrSpace);
+  }
+
+  virtual void EmitULEB128Value(const MCExpr *Value) {
+    LogCall("EmitULEB128Value");
+    return Child->EmitULEB128Value(Value);
+  }
+
+  virtual void EmitSLEB128Value(const MCExpr *Value) {
+    LogCall("EmitSLEB128Value");
+    return Child->EmitSLEB128Value(Value);
+  }
+
+  virtual void EmitGPRel32Value(const MCExpr *Value) {
+    LogCall("EmitGPRel32Value");
+    return Child->EmitGPRel32Value(Value);
+  }
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace) {
+    LogCall("EmitFill");
+    return Child->EmitFill(NumBytes, FillValue, AddrSpace);
+  }
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0) {
+    LogCall("EmitValueToAlignment");
+    return Child->EmitValueToAlignment(ByteAlignment, Value,
+                                       ValueSize, MaxBytesToEmit);
+  }
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0) {
+    LogCall("EmitCodeAlignment");
+    return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit);
+  }
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0) {
+    LogCall("EmitValueToOffset");
+    return Child->EmitValueToOffset(Offset, Value);
+  }
+
+  virtual void EmitFileDirective(StringRef Filename) {
+    LogCall("EmitFileDirective", "FileName:" + Filename);
+    return Child->EmitFileDirective(Filename);
+  }
+
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+    LogCall("EmitDwarfFileDirective",
+            "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
+    return Child->EmitDwarfFileDirective(FileNo, Filename);
+  }
+
+  virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                     unsigned Column, unsigned Flags,
+                                     unsigned Isa, unsigned Discriminator,
+                                     StringRef FileName) {
+    LogCall("EmitDwarfLocDirective",
+            "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
+            " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
+            " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
+            return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+                                                Isa, Discriminator, FileName);
+  }
+
+  virtual void EmitInstruction(const MCInst &Inst) {
+    LogCall("EmitInstruction");
+    return Child->EmitInstruction(Inst);
+  }
+
+  virtual void EmitRawText(StringRef String) {
+    LogCall("EmitRawText", "\"" + String + "\"");
+    return Child->EmitRawText(String);
+  }
+
+  virtual void Finish() {
+    LogCall("Finish");
+    return Child->Finish();
+  }
+
+};
+
+} // end anonymous namespace.
+
+MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) {
+  return new MCLoggingStreamer(Child, OS);
+}
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
new file mode 100644
index 000000000000..1b21249ca321
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -0,0 +1,420 @@
+//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCMachOStreamer : public MCObjectStreamer {
+private:
+  virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+  MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+                                   MCSymbol *EHSymbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitFileDirective(StringRef Filename) {
+    // FIXME: Just ignore the .file; it isn't important enough to fail the
+    // entire assembly.
+
+    //report_fatal_error("unsupported directive: '.file'");
+  }
+
+  virtual void Finish();
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+void MCMachOStreamer::InitSections() {
+  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                    0, SectionKind::getText()));
+
+}
+
+void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+                                          MCSymbol *EHSymbol) {
+  MCSymbolData &SD =
+    getAssembler().getOrCreateSymbolData(*Symbol);
+  if (SD.isExternal())
+    EmitSymbolAttribute(EHSymbol, MCSA_Global);
+  if (SD.getFlags() & SF_WeakDefinition)
+    EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+  if (SD.isPrivateExtern())
+    EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  // isSymbolLinkerVisible uses the section.
+  Symbol->setSection(*getCurrentSection());
+  // We have to create a new fragment if this is an atom defining symbol,
+  // fragments cannot span atoms.
+  if (getAssembler().isSymbolLinkerVisible(*Symbol))
+    new MCDataFragment(getCurrentSectionData());
+
+  MCObjectStreamer::EmitLabel(Symbol);
+
+  MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+  // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
+  // to clear the weak reference and weak definition bits too, but the
+  // implementation was buggy. For now we just try to match 'as', for
+  // diffability.
+  //
+  // FIXME: Cleanup this code, these bits should be emitted based on semantic
+  // properties, not on the order of definition, etc.
+  SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
+}
+
+void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  // Let the target do whatever target specific stuff it needs to do.
+  getAssembler().getBackend().HandleAssemblerFlag(Flag);
+  // Do any generic stuff we need to do.
+  switch (Flag) {
+  case MCAF_SyntaxUnified: return; // no-op here.
+  case MCAF_Code16: return; // no-op here.
+  case MCAF_Code32: return; // no-op here.
+  case MCAF_SubsectionsViaSymbols:
+    getAssembler().setSubsectionsViaSymbols(true);
+    return;
+  default:
+    llvm_unreachable("invalid assembler flag!");
+  }
+}
+
+void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+  // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
+
+  // Remember that the function is a thumb function. Fixup and relocation
+  // values will need adjusted.
+  getAssembler().setIsThumbFunc(Symbol);
+
+  // Mark the thumb bit on the symbol.
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SD.setFlags(SD.getFlags() | SF_ThumbFunc);
+}
+
+void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  // FIXME: Lift context changes into super class.
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          MCSymbolAttr Attribute) {
+  // Indirect symbols are handled differently, to match how 'as' handles
+  // them. This makes writing matching .o files easier.
+  if (Attribute == MCSA_IndirectSymbol) {
+    // Note that we intentionally cannot use the symbol data here; this is
+    // important for matching the string table that 'as' generates.
+    IndirectSymbolData ISD;
+    ISD.Symbol = Symbol;
+    ISD.SectionData = getCurrentSectionData();
+    getAssembler().getIndirectSymbols().push_back(ISD);
+    return;
+  }
+
+  // Adding a symbol attribute always introduces the symbol, note that an
+  // important side effect of calling getOrCreateSymbolData here is to register
+  // the symbol with the assembler.
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // The implementation of symbol attributes is designed to match 'as', but it
+  // leaves much to desired. It doesn't really make sense to arbitrarily add and
+  // remove flags, but 'as' allows this (in particular, see .desc).
+  //
+  // In the future it might be worth trying to make these operations more well
+  // defined.
+  switch (Attribute) {
+  case MCSA_Invalid:
+  case MCSA_ELF_TypeFunction:
+  case MCSA_ELF_TypeIndFunction:
+  case MCSA_ELF_TypeObject:
+  case MCSA_ELF_TypeTLS:
+  case MCSA_ELF_TypeCommon:
+  case MCSA_ELF_TypeNoType:
+  case MCSA_ELF_TypeGnuUniqueObject:
+  case MCSA_IndirectSymbol:
+  case MCSA_Hidden:
+  case MCSA_Internal:
+  case MCSA_Protected:
+  case MCSA_Weak:
+  case MCSA_Local:
+    assert(0 && "Invalid symbol attribute for Mach-O!");
+    break;
+
+  case MCSA_Global:
+    SD.setExternal(true);
+    // This effectively clears the undefined lazy bit, in Darwin 'as', although
+    // it isn't very consistent because it implements this as part of symbol
+    // lookup.
+    //
+    // FIXME: Cleanup this code, these bits should be emitted based on semantic
+    // properties, not on the order of definition, etc.
+    SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy);
+    break;
+
+  case MCSA_LazyReference:
+    // FIXME: This requires -dynamic.
+    SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+    if (Symbol->isUndefined())
+      SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy);
+    break;
+
+    // Since .reference sets the no dead strip bit, it is equivalent to
+    // .no_dead_strip in practice.
+  case MCSA_Reference:
+  case MCSA_NoDeadStrip:
+    SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+    break;
+
+  case MCSA_SymbolResolver:
+    SD.setFlags(SD.getFlags() | SF_SymbolResolver);
+    break;
+
+  case MCSA_PrivateExtern:
+    SD.setExternal(true);
+    SD.setPrivateExtern(true);
+    break;
+
+  case MCSA_WeakReference:
+    // FIXME: This requires -dynamic.
+    if (Symbol->isUndefined())
+      SD.setFlags(SD.getFlags() | SF_WeakReference);
+    break;
+
+  case MCSA_WeakDefinition:
+    // FIXME: 'as' enforces that this is defined and global. The manual claims
+    // it has to be in a coalesced section, but this isn't enforced.
+    SD.setFlags(SD.getFlags() | SF_WeakDefinition);
+    break;
+
+  case MCSA_WeakDefAutoPrivate:
+    SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference);
+    break;
+  }
+}
+
+void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  // Encode the 'desc' value into the lowest implementation defined bits.
+  assert(DescValue == (DescValue & SF_DescFlagsMask) &&
+         "Invalid .desc value!");
+  getAssembler().getOrCreateSymbolData(*Symbol).setFlags(
+    DescValue & SF_DescFlagsMask);
+}
+
+void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                       unsigned ByteAlignment) {
+  // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SD.setExternal(true);
+  SD.setCommon(Size, ByteAlignment);
+}
+
+void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                   unsigned Size, unsigned ByteAlignment) {
+  MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
+
+  // The symbol may not be present, which only creates the section.
+  if (!Symbol)
+    return;
+
+  // FIXME: Assert that this section has the zerofill type.
+
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // Emit an align fragment if necessary.
+  if (ByteAlignment != 1)
+    new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData);
+
+  MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+  SD.setFragment(F);
+
+  Symbol->setSection(*Section);
+
+  // Update the maximum alignment on the zero fill section if necessary.
+  if (ByteAlignment > SectData.getAlignment())
+    SectData.setAlignment(ByteAlignment);
+}
+
+// This should always be called with the thread local bss section.  Like the
+// .zerofill directive this doesn't actually switch sections on us.
+void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                     uint64_t Size, unsigned ByteAlignment) {
+  EmitZerofill(Section, Symbol, Size, ByteAlignment);
+  return;
+}
+
+void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                           int64_t Value, unsigned ValueSize,
+                                           unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                        unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // Add the fixups and data.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
+  }
+  DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCMachOStreamer::Finish() {
+  EmitFrames(true);
+
+  // We have to set the fragment atom associations so we can relax properly for
+  // Mach-O.
+
+  // First, scan the symbol table to build a lookup table from fragments to
+  // defining symbols.
+  DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
+  for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(),
+         ie = getAssembler().symbol_end(); it != ie; ++it) {
+    if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) &&
+        it->getFragment()) {
+      // An atom defining symbol should never be internal to a fragment.
+      assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!");
+      DefiningSymbolMap[it->getFragment()] = it;
+    }
+  }
+
+  // Set the fragment atom associations by tracking the last seen atom defining
+  // symbol.
+  for (MCAssembler::iterator it = getAssembler().begin(),
+         ie = getAssembler().end(); it != ie; ++it) {
+    MCSymbolData *CurrentAtom = 0;
+    for (MCSectionData::iterator it2 = it->begin(),
+           ie2 = it->end(); it2 != ie2; ++it2) {
+      if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
+        CurrentAtom = SD;
+      it2->setAtom(CurrentAtom);
+    }
+  }
+
+  this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *CE,
+                                      bool RelaxAll) {
+  MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE);
+  if (RelaxAll)
+    S->getAssembler().setRelaxAll(true);
+  return S;
+}
diff --git a/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
new file mode 100644
index 000000000000..146cebf01a3a
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
@@ -0,0 +1,22 @@
+//===-- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+
+using namespace llvm;
+
+MCMachObjectTargetWriter::MCMachObjectTargetWriter(
+  bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_,
+  bool UseAggressiveSymbolFolding_)
+  : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_),
+    UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) {
+}
+
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {
+}
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
new file mode 100644
index 000000000000..9577af010205
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -0,0 +1,104 @@
+//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+namespace {
+
+  class MCNullStreamer : public MCStreamer {
+  public:
+    MCNullStreamer(MCContext &Context) : MCStreamer(Context) {}
+
+    /// @name MCStreamer Interface
+    /// @{
+
+    virtual void InitSections() {
+    }
+
+    virtual void ChangeSection(const MCSection *Section) {
+    }
+
+    virtual void EmitLabel(MCSymbol *Symbol) {
+      assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+      assert(getCurrentSection() && "Cannot emit before setting section!");
+      Symbol->setSection(*getCurrentSection());
+    }
+
+    virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+    virtual void EmitThumbFunc(MCSymbol *Func) {}
+
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
+    virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                          const MCSymbol *LastLabel,
+                                          const MCSymbol *Label,
+                                          unsigned PointerSize) {}
+
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
+
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+    virtual void EmitCOFFSymbolType(int Type) {}
+    virtual void EndCOFFSymbolDef() {}
+
+    virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+    virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                  unsigned ByteAlignment) {}
+    virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+    virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                              unsigned Size = 0, unsigned ByteAlignment = 0) {}
+    virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                uint64_t Size, unsigned ByteAlignment) {}
+    virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
+
+    virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                               unsigned AddrSpace) {}
+    virtual void EmitULEB128Value(const MCExpr *Value) {}
+    virtual void EmitSLEB128Value(const MCExpr *Value) {}
+    virtual void EmitGPRel32Value(const MCExpr *Value) {}
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                      unsigned ValueSize = 1,
+                                      unsigned MaxBytesToEmit = 0) {}
+
+    virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                   unsigned MaxBytesToEmit = 0) {}
+
+    virtual void EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value = 0) {}
+    
+    virtual void EmitFileDirective(StringRef Filename) {}
+    virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {
+      return false;
+    }
+    virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa, unsigned Discriminator,
+                                       StringRef FileName) {}
+    virtual void EmitInstruction(const MCInst &Inst) {}
+
+    virtual void Finish() {}
+    
+    /// @}
+  };
+
+}
+    
+MCStreamer *llvm::createNullStreamer(MCContext &Context) {
+  return new MCNullStreamer(Context);
+}
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
new file mode 100644
index 000000000000..8635aac00302
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -0,0 +1,254 @@
+//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                   raw_ostream &OS, MCCodeEmitter *Emitter_)
+  : MCStreamer(Context),
+    Assembler(new MCAssembler(Context, TAB,
+                              *Emitter_, *TAB.createObjectWriter(OS),
+                              OS)),
+    CurSectionData(0)
+{
+}
+
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                   raw_ostream &OS, MCCodeEmitter *Emitter_,
+                                   MCAssembler *_Assembler)
+  : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0)
+{
+}
+
+MCObjectStreamer::~MCObjectStreamer() {
+  delete &Assembler->getBackend();
+  delete &Assembler->getEmitter();
+  delete &Assembler->getWriter();
+  delete Assembler;
+}
+
+MCFragment *MCObjectStreamer::getCurrentFragment() const {
+  assert(getCurrentSectionData() && "No current section!");
+
+  if (!getCurrentSectionData()->empty())
+    return &getCurrentSectionData()->getFragmentList().back();
+
+  return 0;
+}
+
+MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+  MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!F)
+    F = new MCDataFragment(getCurrentSectionData());
+  return F;
+}
+
+const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler);
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbols(BE->getLHS());
+    AddValueSymbols(BE->getRHS());
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+    break;
+  }
+
+  return Value;
+}
+
+void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                     unsigned AddrSpace) {
+  assert(AddrSpace == 0 && "Address space must be 0!");
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  // Avoid fixups when possible.
+  int64_t AbsValue;
+  if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+    EmitIntValue(AbsValue, Size, AddrSpace);
+    return;
+  }
+  DF->addFixup(MCFixup::Create(DF->getContents().size(),
+                               Value,
+                               MCFixup::getKindForSize(Size, false)));
+  DF->getContents().resize(DF->getContents().size() + Size, 0);
+}
+
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
+  MCStreamer::EmitLabel(Symbol);
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // FIXME: This is wasteful, we don't necessarily need to create a data
+  // fragment. Instead, we should mark the symbol as pointing into the data
+  // fragment if it exists, otherwise we should just queue the label and set its
+  // fragment pointer when we emit the next fragment.
+  MCDataFragment *F = getOrCreateDataFragment();
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+}
+
+void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+    EmitULEB128IntValue(IntValue);
+    return;
+  }
+  Value = ForceExpAbs(Value);
+  new MCLEBFragment(*Value, false, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+    EmitSLEB128IntValue(IntValue);
+    return;
+  }
+  Value = ForceExpAbs(Value);
+  new MCLEBFragment(*Value, true, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
+                                         const MCSymbol *Symbol) {
+  report_fatal_error("This file format doesn't support weak aliases.");
+}
+
+void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+
+  CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+}
+
+void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+  // Scan for values.
+  for (unsigned i = Inst.getNumOperands(); i--; )
+    if (Inst.getOperand(i).isExpr())
+      AddValueSymbols(Inst.getOperand(i).getExpr());
+
+  getCurrentSectionData()->setHasInstructions(true);
+
+  // Now that a machine instruction has been assembled into this section, make
+  // a line entry for any .loc directive that has been seen.
+  MCLineEntry::Make(this, getCurrentSection());
+
+  // If this instruction doesn't need relaxation, just emit it as data.
+  if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+    EmitInstToData(Inst);
+    return;
+  }
+
+  // Otherwise, if we are relaxing everything, relax the instruction as much as
+  // possible and emit it as data.
+  if (getAssembler().getRelaxAll()) {
+    MCInst Relaxed;
+    getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+    while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+      getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+    EmitInstToData(Relaxed);
+    return;
+  }
+
+  // Otherwise emit to a separate fragment.
+  EmitInstToFragment(Inst);
+}
+
+void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
+  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+  SmallString<128> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
+  VecOS.flush();
+  IF->getCode().append(Code.begin(), Code.end());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                                const MCSymbol *LastLabel,
+                                                const MCSymbol *Label,
+                                                unsigned PointerSize) {
+  if (!LastLabel) {
+    EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+    return;
+  }
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  int64_t Res;
+  if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+    MCDwarfLineAddr::Emit(this, LineDelta, Res);
+    return;
+  }
+  AddrDelta = ForceExpAbs(AddrDelta);
+  new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                                 const MCSymbol *Label) {
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  int64_t Res;
+  if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+    MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
+    return;
+  }
+  AddrDelta = ForceExpAbs(AddrDelta);
+  new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                        unsigned char Value) {
+  int64_t Res;
+  if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
+    new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+    return;
+  }
+
+  MCSymbol *CurrentPos = getContext().CreateTempSymbol();
+  EmitLabel(CurrentPos);
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *Ref =
+    MCSymbolRefExpr::Create(CurrentPos, Variant, getContext());
+  const MCExpr *Delta =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
+
+  if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
+    report_fatal_error("expected assembly-time absolute expression");
+  EmitFill(Res, Value, 0);
+}
+
+void MCObjectStreamer::Finish() {
+  // Dump out the dwarf file & directory tables and line tables.
+  if (getContext().hasDwarfFiles())
+    MCDwarfFileTable::Emit(this);
+
+  getAssembler().Finish();
+}
diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp
new file mode 100644
index 000000000000..efe9f68ee22b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp
@@ -0,0 +1,80 @@
+//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+MCObjectWriter::~MCObjectWriter() {
+}
+
+/// Utility function to encode a SLEB128 value.
+void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
+  bool More;
+  do {
+    uint8_t Byte = Value & 0x7f;
+    // NOTE: this assumes that this signed shift is an arithmetic right shift.
+    Value >>= 7;
+    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+              ((Value == -1) && ((Byte & 0x40) != 0))));
+    if (More)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (More);
+}
+
+/// Utility function to encode a ULEB128 value.
+void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) {
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (Value != 0);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+                                                   const MCSymbolRefExpr *A,
+                                                   const MCSymbolRefExpr *B,
+                                                   bool InSet) const {
+  // Modified symbol references cannot be resolved.
+  if (A->getKind() != MCSymbolRefExpr::VK_None ||
+      B->getKind() != MCSymbolRefExpr::VK_None)
+    return false;
+
+  const MCSymbol &SA = A->getSymbol();
+  const MCSymbol &SB = B->getSymbol();
+  if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+    return false;
+
+  const MCSymbolData &DataA = Asm.getSymbolData(SA);
+  const MCSymbolData &DataB = Asm.getSymbolData(SB);
+
+  return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
+                                                *DataB.getFragment(),
+                                                InSet,
+                                                false);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+  const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+  const MCSection &SecB = FB.getParent()->getSection();
+  // On ELF and COFF  A - B is absolute if A and B are in the same section.
+  return &SecA == &SecB;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
new file mode 100644
index 000000000000..0c1f8f0df774
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -0,0 +1,441 @@
+//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI)  {
+  CurBuf = NULL;
+  CurPtr = NULL;
+}
+
+AsmLexer::~AsmLexer() {
+}
+
+void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
+  CurBuf = buf;
+
+  if (ptr)
+    CurPtr = ptr;
+  else
+    CurPtr = CurBuf->getBufferStart();
+
+  TokStart = 0;
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location.  This is defined to always return AsmToken::Error.
+AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+  SetError(SMLoc::getFromPointer(Loc), Msg);
+
+  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
+}
+
+int AsmLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default:
+    return (unsigned char)CurChar;
+  case 0:
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.
+    return EOF;
+  }
+}
+
+/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
+///
+/// The leading integral digit sequence and dot should have already been
+/// consumed, some or all of the fractional digit sequence *can* have been
+/// consumed.
+AsmToken AsmLexer::LexFloatLiteral() {
+  // Skip the fractional digit sequence.
+  while (isdigit(*CurPtr))
+    ++CurPtr;
+
+  // Check for exponent; we intentionally accept a slighlty wider set of
+  // literals here and rely on the upstream client to reject invalid ones (e.g.,
+  // "1e+").
+  if (*CurPtr == 'e' || *CurPtr == 'E') {
+    ++CurPtr;
+    if (*CurPtr == '-' || *CurPtr == '+')
+      ++CurPtr;
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+  }
+
+  return AsmToken(AsmToken::Real,
+                  StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+static bool IsIdentifierChar(char c) {
+  return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+}
+AsmToken AsmLexer::LexIdentifier() {
+  // Check for floating point literals.
+  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
+    // Disambiguate a .1243foo identifier from a floating literal.
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+    if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+      return LexFloatLiteral();
+  }
+
+  while (IsIdentifierChar(*CurPtr))
+    ++CurPtr;
+
+  // Handle . as a special case.
+  if (CurPtr == TokStart+1 && TokStart[0] == '.')
+    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
+
+  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexSlash: Slash: /
+///           C-Style Comment: /* ... */
+AsmToken AsmLexer::LexSlash() {
+  switch (*CurPtr) {
+  case '*': break; // C style comment.
+  case '/': return ++CurPtr, LexLineComment();
+  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
+  }
+
+  // C Style comment.
+  ++CurPtr;  // skip the star.
+  while (1) {
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    case EOF:
+      return ReturnError(TokStart, "unterminated comment");
+    case '*':
+      // End of the comment?
+      if (CurPtr[0] != '/') break;
+
+      ++CurPtr;   // End the */.
+      return LexToken();
+    }
+  }
+}
+
+/// LexLineComment: Comment: #[^\n]*
+///                        : //[^\n]*
+AsmToken AsmLexer::LexLineComment() {
+  // FIXME: This is broken if we happen to a comment at the end of a file, which
+  // was .included, and which doesn't end with a newline.
+  int CurChar = getNextChar();
+  while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+    CurChar = getNextChar();
+
+  if (CurChar == EOF)
+    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
+  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+}
+
+static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
+  if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
+    CurPtr += 2;
+  if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
+    CurPtr += 3;
+}
+
+/// LexDigit: First character is [0-9].
+///   Local Label: [0-9][:]
+///   Forward/Backward Label: [0-9][fb]
+///   Binary integer: 0b[01]+
+///   Octal integer: 0[0-7]+
+///   Hex integer: 0x[0-9a-fA-F]+
+///   Decimal integer: [1-9][0-9]*
+AsmToken AsmLexer::LexDigit() {
+  // Decimal integer: [1-9][0-9]*
+  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+
+    // Check for floating point literals.
+    if (*CurPtr == '.' || *CurPtr == 'e') {
+      ++CurPtr;
+      return LexFloatLiteral();
+    }
+
+    StringRef Result(TokStart, CurPtr - TokStart);
+
+    long long Value;
+    if (Result.getAsInteger(10, Value)) {
+      // Allow positive values that are too large to fit into a signed 64-bit
+      // integer, but that do fit in an unsigned one, we just convert them over.
+      unsigned long long UValue;
+      if (Result.getAsInteger(10, UValue))
+        return ReturnError(TokStart, "invalid decimal number");
+      Value = (long long)UValue;
+    }
+
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+
+    return AsmToken(AsmToken::Integer, Result, Value);
+  }
+
+  if (*CurPtr == 'b') {
+    ++CurPtr;
+    // See if we actually have "0b" as part of something like "jmp 0b\n"
+    if (!isdigit(CurPtr[0])) {
+      --CurPtr;
+      StringRef Result(TokStart, CurPtr - TokStart);
+      return AsmToken(AsmToken::Integer, Result, 0);
+    }
+    const char *NumStart = CurPtr;
+    while (CurPtr[0] == '0' || CurPtr[0] == '1')
+      ++CurPtr;
+
+    // Requires at least one binary digit.
+    if (CurPtr == NumStart)
+      return ReturnError(TokStart, "invalid binary number");
+
+    StringRef Result(TokStart, CurPtr - TokStart);
+
+    long long Value;
+    if (Result.substr(2).getAsInteger(2, Value))
+      return ReturnError(TokStart, "invalid binary number");
+
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+
+    return AsmToken(AsmToken::Integer, Result, Value);
+  }
+
+  if (*CurPtr == 'x') {
+    ++CurPtr;
+    const char *NumStart = CurPtr;
+    while (isxdigit(CurPtr[0]))
+      ++CurPtr;
+
+    // Requires at least one hex digit.
+    if (CurPtr == NumStart)
+      return ReturnError(CurPtr-2, "invalid hexadecimal number");
+
+    unsigned long long Result;
+    if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
+      return ReturnError(TokStart, "invalid hexadecimal number");
+
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+
+    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+                    (int64_t)Result);
+  }
+
+  // Must be an octal number, it starts with 0.
+  while (*CurPtr >= '0' && *CurPtr <= '9')
+    ++CurPtr;
+
+  StringRef Result(TokStart, CurPtr - TokStart);
+  long long Value;
+  if (Result.getAsInteger(8, Value))
+    return ReturnError(TokStart, "invalid octal number");
+
+  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+  // suffixes on integer literals.
+  SkipIgnoredIntegerSuffix(CurPtr);
+
+  return AsmToken(AsmToken::Integer, Result, Value);
+}
+
+/// LexSingleQuote: Integer: 'b'
+AsmToken AsmLexer::LexSingleQuote() {
+  int CurChar = getNextChar();
+
+  if (CurChar == '\\')
+    CurChar = getNextChar();
+
+  if (CurChar == EOF)
+    return ReturnError(TokStart, "unterminated single quote");
+
+  CurChar = getNextChar();
+
+  if (CurChar != '\'')
+    return ReturnError(TokStart, "single quote way too long");
+
+  // The idea here being that 'c' is basically just an integral
+  // constant.
+  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
+  long long Value;
+
+  if (Res.startswith("\'\\")) {
+    char theChar = Res[2];
+    switch (theChar) {
+      default: Value = theChar; break;
+      case '\'': Value = '\''; break;
+      case 't': Value = '\t'; break;
+      case 'n': Value = '\n'; break;
+      case 'b': Value = '\b'; break;
+    }
+  } else
+    Value = TokStart[1];
+
+  return AsmToken(AsmToken::Integer, Res, Value);
+}
+
+
+/// LexQuote: String: "..."
+AsmToken AsmLexer::LexQuote() {
+  int CurChar = getNextChar();
+  // TODO: does gas allow multiline string constants?
+  while (CurChar != '"') {
+    if (CurChar == '\\') {
+      // Allow \", etc.
+      CurChar = getNextChar();
+    }
+
+    if (CurChar == EOF)
+      return ReturnError(TokStart, "unterminated string constant");
+
+    CurChar = getNextChar();
+  }
+
+  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+}
+
+StringRef AsmLexer::LexUntilEndOfStatement() {
+  TokStart = CurPtr;
+
+  while (!isAtStartOfComment(*CurPtr) &&    // Start of line comment.
+         !isAtStatementSeparator(CurPtr) && // End of statement marker.
+         *CurPtr != '\n' &&
+         *CurPtr != '\r' &&
+         (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+    ++CurPtr;
+  }
+  return StringRef(TokStart, CurPtr-TokStart);
+}
+
+bool AsmLexer::isAtStartOfComment(char Char) {
+  // FIXME: This won't work for multi-character comment indicators like "//".
+  return Char == *MAI.getCommentString();
+}
+
+bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
+  return strncmp(Ptr, MAI.getSeparatorString(),
+                 strlen(MAI.getSeparatorString())) == 0;
+}
+
+AsmToken AsmLexer::LexToken() {
+  TokStart = CurPtr;
+  // This always consumes at least one character.
+  int CurChar = getNextChar();
+
+  if (isAtStartOfComment(CurChar))
+    return LexLineComment();
+  if (isAtStatementSeparator(TokStart)) {
+    CurPtr += strlen(MAI.getSeparatorString()) - 1;
+    return AsmToken(AsmToken::EndOfStatement,
+                    StringRef(TokStart, strlen(MAI.getSeparatorString())));
+  }
+
+  switch (CurChar) {
+  default:
+    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+      return LexIdentifier();
+
+    // Unknown character, emit an error.
+    return ReturnError(TokStart, "invalid character in input");
+  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+  case 0:
+  case ' ':
+  case '\t':
+    // Ignore whitespace.
+    return LexToken();
+  case '\n': // FALL THROUGH.
+  case '\r':
+    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
+  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
+  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
+  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
+  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
+  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
+  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
+  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
+  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
+  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
+  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
+  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
+  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
+  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
+  case '=':
+    if (*CurPtr == '=')
+      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
+  case '|':
+    if (*CurPtr == '|')
+      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
+  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
+  case '&':
+    if (*CurPtr == '&')
+      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
+  case '!':
+    if (*CurPtr == '=')
+      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
+  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
+  case '/': return LexSlash();
+  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+  case '\'': return LexSingleQuote();
+  case '"': return LexQuote();
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    return LexDigit();
+  case '<':
+    switch (*CurPtr) {
+    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
+                                        StringRef(TokStart, 2));
+    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
+                                        StringRef(TokStart, 2));
+    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
+                                        StringRef(TokStart, 2));
+    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
+    }
+  case '>':
+    switch (*CurPtr) {
+    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
+                                        StringRef(TokStart, 2));
+    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
+                                        StringRef(TokStart, 2));
+    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
+    }
+
+  // TODO: Quoted identifiers (objc methods etc)
+  // local labels: [0-9][:]
+  // Forward/backward labels: [0-9][fb]
+  // Integers, fp constants, character constants.
+  }
+}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
new file mode 100644
index 000000000000..0c181f39611e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -0,0 +1,2731 @@
+//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/AsmCond.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include <cctype>
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+FatalAssemblerWarnings("fatal-assembler-warnings",
+                       cl::desc("Consider warnings as error"));
+
+namespace {
+
+/// \brief Helper class for tracking macro definitions.
+struct Macro {
+  StringRef Name;
+  StringRef Body;
+  std::vector<StringRef> Parameters;
+
+public:
+  Macro(StringRef N, StringRef B, const std::vector<StringRef> &P) :
+    Name(N), Body(B), Parameters(P) {}
+};
+
+/// \brief Helper class for storing information about an active macro
+/// instantiation.
+struct MacroInstantiation {
+  /// The macro being instantiated.
+  const Macro *TheMacro;
+
+  /// The macro instantiation with substitutions.
+  MemoryBuffer *Instantiation;
+
+  /// The location of the instantiation.
+  SMLoc InstantiationLoc;
+
+  /// The location where parsing should resume upon instantiation completion.
+  SMLoc ExitLoc;
+
+public:
+  MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+                     MemoryBuffer *I);
+};
+
+/// \brief The concrete assembly parser instance.
+class AsmParser : public MCAsmParser {
+  friend class GenericAsmParser;
+
+  AsmParser(const AsmParser &);   // DO NOT IMPLEMENT
+  void operator=(const AsmParser &);  // DO NOT IMPLEMENT
+private:
+  AsmLexer Lexer;
+  MCContext &Ctx;
+  MCStreamer &Out;
+  const MCAsmInfo &MAI;
+  SourceMgr &SrcMgr;
+  MCAsmParserExtension *GenericParser;
+  MCAsmParserExtension *PlatformParser;
+
+  /// This is the current buffer index we're lexing from as managed by the
+  /// SourceMgr object.
+  int CurBuffer;
+
+  AsmCond TheCondState;
+  std::vector<AsmCond> TheCondStack;
+
+  /// DirectiveMap - This is a table handlers for directives.  Each handler is
+  /// invoked after the directive identifier is read and is responsible for
+  /// parsing and validating the rest of the directive.  The handler is passed
+  /// in the directive name and the location of the directive keyword.
+  StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap;
+
+  /// MacroMap - Map of currently defined macros.
+  StringMap<Macro*> MacroMap;
+
+  /// ActiveMacros - Stack of active macro instantiations.
+  std::vector<MacroInstantiation*> ActiveMacros;
+
+  /// Boolean tracking whether macro substitution is enabled.
+  unsigned MacrosEnabled : 1;
+
+  /// Flag tracking whether any errors have been encountered.
+  unsigned HadError : 1;
+
+public:
+  AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+            const MCAsmInfo &MAI);
+  ~AsmParser();
+
+  virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
+
+  void AddDirectiveHandler(MCAsmParserExtension *Object,
+                           StringRef Directive,
+                           DirectiveHandler Handler) {
+    DirectiveMap[Directive] = std::make_pair(Object, Handler);
+  }
+
+public:
+  /// @name MCAsmParser Interface
+  /// {
+
+  virtual SourceMgr &getSourceManager() { return SrcMgr; }
+  virtual MCAsmLexer &getLexer() { return Lexer; }
+  virtual MCContext &getContext() { return Ctx; }
+  virtual MCStreamer &getStreamer() { return Out; }
+
+  virtual bool Warning(SMLoc L, const Twine &Msg);
+  virtual bool Error(SMLoc L, const Twine &Msg);
+
+  const AsmToken &Lex();
+
+  bool ParseExpression(const MCExpr *&Res);
+  virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool ParseAbsoluteExpression(int64_t &Res);
+
+  /// }
+
+private:
+  void CheckForValidSection();
+
+  bool ParseStatement();
+
+  bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
+  bool expandMacro(SmallString<256> &Buf, StringRef Body,
+                   const std::vector<StringRef> &Parameters,
+                   const std::vector<std::vector<AsmToken> > &A,
+                   const SMLoc &L);
+  void HandleMacroExit();
+
+  void PrintMacroInstantiations();
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
+                    bool ShowLine = true) const {
+    SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
+  }
+
+  /// EnterIncludeFile - Enter the specified file. This returns true on failure.
+  bool EnterIncludeFile(const std::string &Filename);
+
+  /// \brief Reset the current lexer position to that given by \arg Loc. The
+  /// current token is not set; clients should ensure Lex() is called
+  /// subsequently.
+  void JumpToLoc(SMLoc Loc);
+
+  void EatToEndOfStatement();
+
+  /// \brief Parse up to the end of statement and a return the contents from the
+  /// current token until the end of the statement; the current token on exit
+  /// will be either the EndOfStatement or EOF.
+  StringRef ParseStringToEndOfStatement();
+
+  bool ParseAssignment(StringRef Name, bool allow_redef);
+
+  bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
+  bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
+  bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+  bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
+
+  /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+  /// and set \arg Res to the identifier contents.
+  bool ParseIdentifier(StringRef &Res);
+
+  // Directive Parsing.
+
+ // ".ascii", ".asciiz", ".string"
+  bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+  bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+  bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
+  bool ParseDirectiveFill(); // ".fill"
+  bool ParseDirectiveSpace(); // ".space"
+  bool ParseDirectiveZero(); // ".zero"
+  bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); // ".set", ".equ", ".equiv"
+  bool ParseDirectiveOrg(); // ".org"
+  // ".align{,32}", ".p2align{,w,l}"
+  bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
+
+  /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
+  /// accepts a single symbol (which should be a label or an external).
+  bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+
+  bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+
+  bool ParseDirectiveAbort(); // ".abort"
+  bool ParseDirectiveInclude(); // ".include"
+
+  bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+  // ".ifdef" or ".ifndef", depending on expect_defined
+  bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
+  bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+  bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+  bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+
+  /// ParseEscapedString - Parse the current token as a string which may include
+  /// escaped characters and return the string contents.
+  bool ParseEscapedString(std::string &Data);
+
+  const MCExpr *ApplyModifierToExpr(const MCExpr *E,
+                                    MCSymbolRefExpr::VariantKind Variant);
+};
+
+/// \brief Generic implementations of directive handling, etc. which is shared
+/// (or the default, at least) for all assembler parser.
+class GenericAsmParser : public MCAsmParserExtension {
+  template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<GenericAsmParser, Handler>);
+  }
+public:
+  GenericAsmParser() {}
+
+  AsmParser &getParser() {
+    return (AsmParser&) this->MCAsmParserExtension::getParser();
+  }
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    this->MCAsmParserExtension::Initialize(Parser);
+
+    // Debugging directives.
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs");
+
+    // CFI directives.
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFISections>(
+                                                               ".cfi_sections");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIStartProc>(
+                                                              ".cfi_startproc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIEndProc>(
+                                                                ".cfi_endproc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfa>(
+                                                         ".cfi_def_cfa");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>(
+                                                         ".cfi_def_cfa_offset");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset>(
+                                                      ".cfi_adjust_cfa_offset");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>(
+                                                       ".cfi_def_cfa_register");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>(
+                                                                 ".cfi_offset");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIRelOffset>(
+                                                             ".cfi_rel_offset");
+    AddDirectiveHandler<
+     &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality");
+    AddDirectiveHandler<
+            &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_lsda");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value");
+
+    // Macro directives.
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+      ".macros_on");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+      ".macros_off");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
+
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128");
+  }
+
+  bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+
+  bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFISections(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIAdjustCfaOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRelOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc);
+
+  bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
+
+  bool ParseDirectiveLEB128(StringRef, SMLoc);
+};
+
+}
+
+namespace llvm {
+
+extern MCAsmParserExtension *createDarwinAsmParser();
+extern MCAsmParserExtension *createELFAsmParser();
+extern MCAsmParserExtension *createCOFFAsmParser();
+
+}
+
+enum { DEFAULT_ADDRSPACE = 0 };
+
+AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
+                     MCStreamer &_Out, const MCAsmInfo &_MAI)
+  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
+    GenericParser(new GenericAsmParser), PlatformParser(0),
+    CurBuffer(0), MacrosEnabled(true) {
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+  // Initialize the generic parser.
+  GenericParser->Initialize(*this);
+
+  // Initialize the platform / file format parser.
+  //
+  // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
+  // created.
+  if (_MAI.hasMicrosoftFastStdCallMangling()) {
+    PlatformParser = createCOFFAsmParser();
+    PlatformParser->Initialize(*this);
+  } else if (_MAI.hasSubsectionsViaSymbols()) {
+    PlatformParser = createDarwinAsmParser();
+    PlatformParser->Initialize(*this);
+  } else {
+    PlatformParser = createELFAsmParser();
+    PlatformParser->Initialize(*this);
+  }
+}
+
+AsmParser::~AsmParser() {
+  assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
+
+  // Destroy any macros.
+  for (StringMap<Macro*>::iterator it = MacroMap.begin(),
+         ie = MacroMap.end(); it != ie; ++it)
+    delete it->getValue();
+
+  delete PlatformParser;
+  delete GenericParser;
+}
+
+void AsmParser::PrintMacroInstantiations() {
+  // Print the active macro instantiation stack.
+  for (std::vector<MacroInstantiation*>::const_reverse_iterator
+         it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
+    PrintMessage((*it)->InstantiationLoc, "while in macro instantiation",
+                 "note");
+}
+
+bool AsmParser::Warning(SMLoc L, const Twine &Msg) {
+  if (FatalAssemblerWarnings)
+    return Error(L, Msg);
+  PrintMessage(L, Msg, "warning");
+  PrintMacroInstantiations();
+  return false;
+}
+
+bool AsmParser::Error(SMLoc L, const Twine &Msg) {
+  HadError = true;
+  PrintMessage(L, Msg, "error");
+  PrintMacroInstantiations();
+  return true;
+}
+
+bool AsmParser::EnterIncludeFile(const std::string &Filename) {
+  std::string IncludedFile;
+  int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+  if (NewBuf == -1)
+    return true;
+
+  CurBuffer = NewBuf;
+
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+  return false;
+}
+
+void AsmParser::JumpToLoc(SMLoc Loc) {
+  CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
+}
+
+const AsmToken &AsmParser::Lex() {
+  const AsmToken *tok = &Lexer.Lex();
+
+  if (tok->is(AsmToken::Eof)) {
+    // If this is the end of an included file, pop the parent file off the
+    // include stack.
+    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+    if (ParentIncludeLoc != SMLoc()) {
+      JumpToLoc(ParentIncludeLoc);
+      tok = &Lexer.Lex();
+    }
+  }
+
+  if (tok->is(AsmToken::Error))
+    Error(Lexer.getErrLoc(), Lexer.getErr());
+
+  return *tok;
+}
+
+bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
+  // Create the initial section, if requested.
+  if (!NoInitialTextSection)
+    Out.InitSections();
+
+  // Prime the lexer.
+  Lex();
+
+  HadError = false;
+  AsmCond StartingCondState = TheCondState;
+
+  // While we have input, parse each statement.
+  while (Lexer.isNot(AsmToken::Eof)) {
+    if (!ParseStatement()) continue;
+
+    // We had an error, validate that one was emitted and recover by skipping to
+    // the next line.
+    assert(HadError && "Parse statement returned an error, but none emitted!");
+    EatToEndOfStatement();
+  }
+
+  if (TheCondState.TheCond != StartingCondState.TheCond ||
+      TheCondState.Ignore != StartingCondState.Ignore)
+    return TokError("unmatched .ifs or .elses");
+
+  // Check to see there are no empty DwarfFile slots.
+  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+    getContext().getMCDwarfFiles();
+  for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+    if (!MCDwarfFiles[i])
+      TokError("unassigned file number: " + Twine(i) + " for .file directives");
+  }
+
+  // Check to see that all assembler local symbols were actually defined.
+  // Targets that don't do subsections via symbols may not want this, though,
+  // so conservatively exclude them. Only do this if we're finalizing, though,
+  // as otherwise we won't necessarilly have seen everything yet.
+  if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
+    const MCContext::SymbolTable &Symbols = getContext().getSymbols();
+    for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
+         e = Symbols.end();
+         i != e; ++i) {
+      MCSymbol *Sym = i->getValue();
+      // Variable symbols may not be marked as defined, so check those
+      // explicitly. If we know it's a variable, we have a definition for
+      // the purposes of this check.
+      if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
+        // FIXME: We would really like to refer back to where the symbol was
+        // first referenced for a source location. We need to add something
+        // to track that. Currently, we just point to the end of the file.
+        PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
+                     Sym->getName() + "' not defined", "error", false);
+    }
+  }
+
+
+  // Finalize the output stream if there are no errors and if the client wants
+  // us to.
+  if (!HadError && !NoFinalize)
+    Out.Finish();
+
+  return HadError;
+}
+
+void AsmParser::CheckForValidSection() {
+  if (!getStreamer().getCurrentSection()) {
+    TokError("expected section directive before assembly directive");
+    Out.SwitchSection(Ctx.getMachOSection(
+                        "__TEXT", "__text",
+                        MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                        0, SectionKind::getText()));
+  }
+}
+
+/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
+void AsmParser::EatToEndOfStatement() {
+  while (Lexer.isNot(AsmToken::EndOfStatement) &&
+         Lexer.isNot(AsmToken::Eof))
+    Lex();
+
+  // Eat EOL.
+  if (Lexer.is(AsmToken::EndOfStatement))
+    Lex();
+}
+
+StringRef AsmParser::ParseStringToEndOfStatement() {
+  const char *Start = getTok().getLoc().getPointer();
+
+  while (Lexer.isNot(AsmToken::EndOfStatement) &&
+         Lexer.isNot(AsmToken::Eof))
+    Lex();
+
+  const char *End = getTok().getLoc().getPointer();
+  return StringRef(Start, End - Start);
+}
+
+/// ParseParenExpr - Parse a paren expression and return it.
+/// NOTE: This assumes the leading '(' has already been consumed.
+///
+/// parenexpr ::= expr)
+///
+bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  if (ParseExpression(Res)) return true;
+  if (Lexer.isNot(AsmToken::RParen))
+    return TokError("expected ')' in parentheses expression");
+  EndLoc = Lexer.getLoc();
+  Lex();
+  return false;
+}
+
+/// ParseBracketExpr - Parse a bracket expression and return it.
+/// NOTE: This assumes the leading '[' has already been consumed.
+///
+/// bracketexpr ::= expr]
+///
+bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  if (ParseExpression(Res)) return true;
+  if (Lexer.isNot(AsmToken::RBrac))
+    return TokError("expected ']' in brackets expression");
+  EndLoc = Lexer.getLoc();
+  Lex();
+  return false;
+}
+
+/// ParsePrimaryExpr - Parse a primary expression and return it.
+///  primaryexpr ::= (parenexpr
+///  primaryexpr ::= symbol
+///  primaryexpr ::= number
+///  primaryexpr ::= '.'
+///  primaryexpr ::= ~,+,- primaryexpr
+bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  switch (Lexer.getKind()) {
+  default:
+    return TokError("unknown token in expression");
+  // If we have an error assume that we've already handled it.
+  case AsmToken::Error:
+    return true;
+  case AsmToken::Exclaim:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreateLNot(Res, getContext());
+    return false;
+  case AsmToken::Dollar:
+  case AsmToken::String:
+  case AsmToken::Identifier: {
+    EndLoc = Lexer.getLoc();
+
+    StringRef Identifier;
+    if (ParseIdentifier(Identifier))
+      return true;
+
+    // This is a symbol reference.
+    std::pair<StringRef, StringRef> Split = Identifier.split('@');
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
+
+    // Lookup the symbol variant if used.
+    MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+    if (Split.first.size() != Identifier.size()) {
+      Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+      if (Variant == MCSymbolRefExpr::VK_Invalid) {
+        Variant = MCSymbolRefExpr::VK_None;
+        return TokError("invalid variant '" + Split.second + "'");
+      }
+    }
+
+    // If this is an absolute variable reference, substitute it now to preserve
+    // semantics in the face of reassignment.
+    if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
+      if (Variant)
+        return Error(EndLoc, "unexpected modifier on variable reference");
+
+      Res = Sym->getVariableValue();
+      return false;
+    }
+
+    // Otherwise create a symbol ref.
+    Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
+    return false;
+  }
+  case AsmToken::Integer: {
+    SMLoc Loc = getTok().getLoc();
+    int64_t IntVal = getTok().getIntVal();
+    Res = MCConstantExpr::Create(IntVal, getContext());
+    EndLoc = Lexer.getLoc();
+    Lex(); // Eat token.
+    // Look for 'b' or 'f' following an Integer as a directional label
+    if (Lexer.getKind() == AsmToken::Identifier) {
+      StringRef IDVal = getTok().getString();
+      if (IDVal == "f" || IDVal == "b"){
+        MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
+                                                      IDVal == "f" ? 1 : 0);
+        Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+                                      getContext());
+        if(IDVal == "b" && Sym->isUndefined())
+          return Error(Loc, "invalid reference to undefined symbol");
+        EndLoc = Lexer.getLoc();
+        Lex(); // Eat identifier.
+      }
+    }
+    return false;
+  }
+  case AsmToken::Real: {
+    APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
+    uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+    Res = MCConstantExpr::Create(IntVal, getContext());
+    Lex(); // Eat token.
+    return false;
+  }
+  case AsmToken::Dot: {
+    // This is a '.' reference, which references the current PC.  Emit a
+    // temporary label to the streamer and refer to it.
+    MCSymbol *Sym = Ctx.CreateTempSymbol();
+    Out.EmitLabel(Sym);
+    Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    EndLoc = Lexer.getLoc();
+    Lex(); // Eat identifier.
+    return false;
+  }
+  case AsmToken::LParen:
+    Lex(); // Eat the '('.
+    return ParseParenExpr(Res, EndLoc);
+  case AsmToken::LBrac:
+    if (!PlatformParser->HasBracketExpressions())
+      return TokError("brackets expression not supported on this target");
+    Lex(); // Eat the '['.
+    return ParseBracketExpr(Res, EndLoc);
+  case AsmToken::Minus:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreateMinus(Res, getContext());
+    return false;
+  case AsmToken::Plus:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreatePlus(Res, getContext());
+    return false;
+  case AsmToken::Tilde:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreateNot(Res, getContext());
+    return false;
+  }
+}
+
+bool AsmParser::ParseExpression(const MCExpr *&Res) {
+  SMLoc EndLoc;
+  return ParseExpression(Res, EndLoc);
+}
+
+const MCExpr *
+AsmParser::ApplyModifierToExpr(const MCExpr *E,
+                               MCSymbolRefExpr::VariantKind Variant) {
+  // Recurse over the given expression, rebuilding it to apply the given variant
+  // if there is exactly one symbol.
+  switch (E->getKind()) {
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    return 0;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+    if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
+      TokError("invalid variant on expression '" +
+               getTok().getIdentifier() + "' (already modified)");
+      return E;
+    }
+
+    return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+    const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant);
+    if (!Sub)
+      return 0;
+    return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant);
+    const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant);
+
+    if (!LHS && !RHS)
+      return 0;
+
+    if (!LHS) LHS = BE->getLHS();
+    if (!RHS) RHS = BE->getRHS();
+
+    return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+  return 0;
+}
+
+/// ParseExpression - Parse an expression and return it.
+///
+///  expr ::= expr +,- expr          -> lowest.
+///  expr ::= expr |,^,&,! expr      -> middle.
+///  expr ::= expr *,/,%,<<,>> expr  -> highest.
+///  expr ::= primaryexpr
+///
+bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+  // Parse the expression.
+  Res = 0;
+  if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
+    return true;
+
+  // As a special case, we support 'a op b @ modifier' by rewriting the
+  // expression to include the modifier. This is inefficient, but in general we
+  // expect users to use 'a@modifier op b'.
+  if (Lexer.getKind() == AsmToken::At) {
+    Lex();
+
+    if (Lexer.isNot(AsmToken::Identifier))
+      return TokError("unexpected symbol modifier following '@'");
+
+    MCSymbolRefExpr::VariantKind Variant =
+      MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
+    if (Variant == MCSymbolRefExpr::VK_Invalid)
+      return TokError("invalid variant '" + getTok().getIdentifier() + "'");
+
+    const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant);
+    if (!ModifiedRes) {
+      return TokError("invalid modifier '" + getTok().getIdentifier() +
+                      "' (no symbols present)");
+      return true;
+    }
+
+    Res = ModifiedRes;
+    Lex();
+  }
+
+  // Try to constant fold it up front, if possible.
+  int64_t Value;
+  if (Res->EvaluateAsAbsolute(Value))
+    Res = MCConstantExpr::Create(Value, getContext());
+
+  return false;
+}
+
+bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+  Res = 0;
+  return ParseParenExpr(Res, EndLoc) ||
+         ParseBinOpRHS(1, Res, EndLoc);
+}
+
+bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
+  const MCExpr *Expr;
+
+  SMLoc StartLoc = Lexer.getLoc();
+  if (ParseExpression(Expr))
+    return true;
+
+  if (!Expr->EvaluateAsAbsolute(Res))
+    return Error(StartLoc, "expected absolute expression");
+
+  return false;
+}
+
+static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
+                                   MCBinaryExpr::Opcode &Kind) {
+  switch (K) {
+  default:
+    return 0;    // not a binop.
+
+    // Lowest Precedence: &&, ||, @
+  case AsmToken::AmpAmp:
+    Kind = MCBinaryExpr::LAnd;
+    return 1;
+  case AsmToken::PipePipe:
+    Kind = MCBinaryExpr::LOr;
+    return 1;
+
+
+    // Low Precedence: |, &, ^
+    //
+    // FIXME: gas seems to support '!' as an infix operator?
+  case AsmToken::Pipe:
+    Kind = MCBinaryExpr::Or;
+    return 2;
+  case AsmToken::Caret:
+    Kind = MCBinaryExpr::Xor;
+    return 2;
+  case AsmToken::Amp:
+    Kind = MCBinaryExpr::And;
+    return 2;
+
+    // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
+  case AsmToken::EqualEqual:
+    Kind = MCBinaryExpr::EQ;
+    return 3;
+  case AsmToken::ExclaimEqual:
+  case AsmToken::LessGreater:
+    Kind = MCBinaryExpr::NE;
+    return 3;
+  case AsmToken::Less:
+    Kind = MCBinaryExpr::LT;
+    return 3;
+  case AsmToken::LessEqual:
+    Kind = MCBinaryExpr::LTE;
+    return 3;
+  case AsmToken::Greater:
+    Kind = MCBinaryExpr::GT;
+    return 3;
+  case AsmToken::GreaterEqual:
+    Kind = MCBinaryExpr::GTE;
+    return 3;
+
+    // High Intermediate Precedence: +, -
+  case AsmToken::Plus:
+    Kind = MCBinaryExpr::Add;
+    return 4;
+  case AsmToken::Minus:
+    Kind = MCBinaryExpr::Sub;
+    return 4;
+
+    // Highest Precedence: *, /, %, <<, >>
+  case AsmToken::Star:
+    Kind = MCBinaryExpr::Mul;
+    return 5;
+  case AsmToken::Slash:
+    Kind = MCBinaryExpr::Div;
+    return 5;
+  case AsmToken::Percent:
+    Kind = MCBinaryExpr::Mod;
+    return 5;
+  case AsmToken::LessLess:
+    Kind = MCBinaryExpr::Shl;
+    return 5;
+  case AsmToken::GreaterGreater:
+    Kind = MCBinaryExpr::Shr;
+    return 5;
+  }
+}
+
+
+/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
+/// Res contains the LHS of the expression on input.
+bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
+                              SMLoc &EndLoc) {
+  while (1) {
+    MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
+    unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
+
+    // If the next token is lower precedence than we are allowed to eat, return
+    // successfully with what we ate already.
+    if (TokPrec < Precedence)
+      return false;
+
+    Lex();
+
+    // Eat the next primary expression.
+    const MCExpr *RHS;
+    if (ParsePrimaryExpr(RHS, EndLoc)) return true;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    MCBinaryExpr::Opcode Dummy;
+    unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
+    if (TokPrec < NextTokPrec) {
+      if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
+    }
+
+    // Merge LHS and RHS according to operator.
+    Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
+  }
+}
+
+
+
+
+/// ParseStatement:
+///   ::= EndOfStatement
+///   ::= Label* Directive ...Operands... EndOfStatement
+///   ::= Label* Identifier OperandList* EndOfStatement
+bool AsmParser::ParseStatement() {
+  if (Lexer.is(AsmToken::EndOfStatement)) {
+    Out.AddBlankLine();
+    Lex();
+    return false;
+  }
+
+  // Statements always start with an identifier or are a full line comment.
+  AsmToken ID = getTok();
+  SMLoc IDLoc = ID.getLoc();
+  StringRef IDVal;
+  int64_t LocalLabelVal = -1;
+  // A full line comment is a '#' as the first token.
+  if (Lexer.is(AsmToken::Hash)) {
+    EatToEndOfStatement();
+    return false;
+  }
+
+  // Allow an integer followed by a ':' as a directional local label.
+  if (Lexer.is(AsmToken::Integer)) {
+    LocalLabelVal = getTok().getIntVal();
+    if (LocalLabelVal < 0) {
+      if (!TheCondState.Ignore)
+        return TokError("unexpected token at start of statement");
+      IDVal = "";
+    }
+    else {
+      IDVal = getTok().getString();
+      Lex(); // Consume the integer token to be used as an identifier token.
+      if (Lexer.getKind() != AsmToken::Colon) {
+        if (!TheCondState.Ignore)
+          return TokError("unexpected token at start of statement");
+      }
+    }
+
+  } else if (Lexer.is(AsmToken::Dot)) {
+    // Treat '.' as a valid identifier in this context.
+    Lex();
+    IDVal = ".";
+
+  } else if (ParseIdentifier(IDVal)) {
+    if (!TheCondState.Ignore)
+      return TokError("unexpected token at start of statement");
+    IDVal = "";
+  }
+
+
+  // Handle conditional assembly here before checking for skipping.  We
+  // have to do this so that .endif isn't skipped in a ".if 0" block for
+  // example.
+  if (IDVal == ".if")
+    return ParseDirectiveIf(IDLoc);
+  if (IDVal == ".ifdef")
+    return ParseDirectiveIfdef(IDLoc, true);
+  if (IDVal == ".ifndef" || IDVal == ".ifnotdef")
+    return ParseDirectiveIfdef(IDLoc, false);
+  if (IDVal == ".elseif")
+    return ParseDirectiveElseIf(IDLoc);
+  if (IDVal == ".else")
+    return ParseDirectiveElse(IDLoc);
+  if (IDVal == ".endif")
+    return ParseDirectiveEndIf(IDLoc);
+
+  // If we are in a ".if 0" block, ignore this statement.
+  if (TheCondState.Ignore) {
+    EatToEndOfStatement();
+    return false;
+  }
+
+  // FIXME: Recurse on local labels?
+
+  // See what kind of statement we have.
+  switch (Lexer.getKind()) {
+  case AsmToken::Colon: {
+    CheckForValidSection();
+
+    // identifier ':'   -> Label.
+    Lex();
+
+    // Diagnose attempt to use '.' as a label.
+    if (IDVal == ".")
+      return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
+
+    // Diagnose attempt to use a variable as a label.
+    //
+    // FIXME: Diagnostics. Note the location of the definition as a label.
+    // FIXME: This doesn't diagnose assignment to a symbol which has been
+    // implicitly marked as external.
+    MCSymbol *Sym;
+    if (LocalLabelVal == -1)
+      Sym = getContext().GetOrCreateSymbol(IDVal);
+    else
+      Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
+    if (!Sym->isUndefined() || Sym->isVariable())
+      return Error(IDLoc, "invalid symbol redefinition");
+
+    // Emit the label.
+    Out.EmitLabel(Sym);
+
+    // Consume any end of statement token, if present, to avoid spurious
+    // AddBlankLine calls().
+    if (Lexer.is(AsmToken::EndOfStatement)) {
+      Lex();
+      if (Lexer.is(AsmToken::Eof))
+        return false;
+    }
+
+    return ParseStatement();
+  }
+
+  case AsmToken::Equal:
+    // identifier '=' ... -> assignment statement
+    Lex();
+
+    return ParseAssignment(IDVal, true);
+
+  default: // Normal instruction or directive.
+    break;
+  }
+
+  // If macros are enabled, check to see if this is a macro instantiation.
+  if (MacrosEnabled)
+    if (const Macro *M = MacroMap.lookup(IDVal))
+      return HandleMacroEntry(IDVal, IDLoc, M);
+
+  // Otherwise, we have a normal instruction or directive.
+  if (IDVal[0] == '.' && IDVal != ".") {
+    // Assembler features
+    if (IDVal == ".set" || IDVal == ".equ")
+      return ParseDirectiveSet(IDVal, true);
+    if (IDVal == ".equiv")
+      return ParseDirectiveSet(IDVal, false);
+
+    // Data directives
+
+    if (IDVal == ".ascii")
+      return ParseDirectiveAscii(IDVal, false);
+    if (IDVal == ".asciz" || IDVal == ".string")
+      return ParseDirectiveAscii(IDVal, true);
+
+    if (IDVal == ".byte")
+      return ParseDirectiveValue(1);
+    if (IDVal == ".short")
+      return ParseDirectiveValue(2);
+    if (IDVal == ".value")
+      return ParseDirectiveValue(2);
+    if (IDVal == ".2byte")
+      return ParseDirectiveValue(2);
+    if (IDVal == ".long")
+      return ParseDirectiveValue(4);
+    if (IDVal == ".int")
+      return ParseDirectiveValue(4);
+    if (IDVal == ".4byte")
+      return ParseDirectiveValue(4);
+    if (IDVal == ".quad")
+      return ParseDirectiveValue(8);
+    if (IDVal == ".8byte")
+      return ParseDirectiveValue(8);
+    if (IDVal == ".single" || IDVal == ".float")
+      return ParseDirectiveRealValue(APFloat::IEEEsingle);
+    if (IDVal == ".double")
+      return ParseDirectiveRealValue(APFloat::IEEEdouble);
+
+    if (IDVal == ".align") {
+      bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+      return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+    }
+    if (IDVal == ".align32") {
+      bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+      return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+    }
+    if (IDVal == ".balign")
+      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
+    if (IDVal == ".balignw")
+      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
+    if (IDVal == ".balignl")
+      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
+    if (IDVal == ".p2align")
+      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
+    if (IDVal == ".p2alignw")
+      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
+    if (IDVal == ".p2alignl")
+      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+
+    if (IDVal == ".org")
+      return ParseDirectiveOrg();
+
+    if (IDVal == ".fill")
+      return ParseDirectiveFill();
+    if (IDVal == ".space" || IDVal == ".skip")
+      return ParseDirectiveSpace();
+    if (IDVal == ".zero")
+      return ParseDirectiveZero();
+
+    // Symbol attribute directives
+
+    if (IDVal == ".globl" || IDVal == ".global")
+      return ParseDirectiveSymbolAttribute(MCSA_Global);
+    // ELF only? Should it be here?
+    if (IDVal == ".local")
+      return ParseDirectiveSymbolAttribute(MCSA_Local);
+    if (IDVal == ".hidden")
+      return ParseDirectiveSymbolAttribute(MCSA_Hidden);
+    if (IDVal == ".indirect_symbol")
+      return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
+    if (IDVal == ".internal")
+      return ParseDirectiveSymbolAttribute(MCSA_Internal);
+    if (IDVal == ".lazy_reference")
+      return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
+    if (IDVal == ".no_dead_strip")
+      return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+    if (IDVal == ".symbol_resolver")
+      return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
+    if (IDVal == ".private_extern")
+      return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
+    if (IDVal == ".protected")
+      return ParseDirectiveSymbolAttribute(MCSA_Protected);
+    if (IDVal == ".reference")
+      return ParseDirectiveSymbolAttribute(MCSA_Reference);
+    if (IDVal == ".weak")
+      return ParseDirectiveSymbolAttribute(MCSA_Weak);
+    if (IDVal == ".weak_definition")
+      return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
+    if (IDVal == ".weak_reference")
+      return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
+    if (IDVal == ".weak_def_can_be_hidden")
+      return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+
+    if (IDVal == ".comm" || IDVal == ".common")
+      return ParseDirectiveComm(/*IsLocal=*/false);
+    if (IDVal == ".lcomm")
+      return ParseDirectiveComm(/*IsLocal=*/true);
+
+    if (IDVal == ".abort")
+      return ParseDirectiveAbort();
+    if (IDVal == ".include")
+      return ParseDirectiveInclude();
+
+    if (IDVal == ".code16" || IDVal == ".code32" || IDVal == ".code64")
+      return TokError(Twine(IDVal) + " not supported yet");
+
+    // Look up the handler in the handler table.
+    std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
+      DirectiveMap.lookup(IDVal);
+    if (Handler.first)
+      return (*Handler.second)(Handler.first, IDVal, IDLoc);
+
+    // Target hook for parsing target specific directives.
+    if (!getTargetParser().ParseDirective(ID))
+      return false;
+
+    bool retval = Warning(IDLoc, "ignoring directive for now");
+    EatToEndOfStatement();
+    return retval;
+  }
+
+  CheckForValidSection();
+
+  // Canonicalize the opcode to lower case.
+  SmallString<128> Opcode;
+  for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
+    Opcode.push_back(tolower(IDVal[i]));
+
+  SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+  bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
+                                                     ParsedOperands);
+
+  // Dump the parsed representation, if requested.
+  if (getShowParsedOperands()) {
+    SmallString<256> Str;
+    raw_svector_ostream OS(Str);
+    OS << "parsed instruction: [";
+    for (unsigned i = 0; i != ParsedOperands.size(); ++i) {
+      if (i != 0)
+        OS << ", ";
+      ParsedOperands[i]->print(OS);
+    }
+    OS << "]";
+
+    PrintMessage(IDLoc, OS.str(), "note");
+  }
+
+  // If parsing succeeded, match the instruction.
+  if (!HadError)
+    HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
+                                                         Out);
+
+  // Free any parsed operands.
+  for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+    delete ParsedOperands[i];
+
+  // Don't skip the rest of the line, the instruction parser is responsible for
+  // that.
+  return false;
+}
+
+bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body,
+                            const std::vector<StringRef> &Parameters,
+                            const std::vector<std::vector<AsmToken> > &A,
+                            const SMLoc &L) {
+  raw_svector_ostream OS(Buf);
+  unsigned NParameters = Parameters.size();
+  if (NParameters != 0 && NParameters != A.size())
+    return Error(L, "Wrong number of arguments");
+
+  while (!Body.empty()) {
+    // Scan for the next substitution.
+    std::size_t End = Body.size(), Pos = 0;
+    for (; Pos != End; ++Pos) {
+      // Check for a substitution or escape.
+      if (!NParameters) {
+        // This macro has no parameters, look for $0, $1, etc.
+        if (Body[Pos] != '$' || Pos + 1 == End)
+          continue;
+
+        char Next = Body[Pos + 1];
+        if (Next == '$' || Next == 'n' || isdigit(Next))
+          break;
+      } else {
+        // This macro has parameters, look for \foo, \bar, etc.
+        if (Body[Pos] == '\\' && Pos + 1 != End)
+          break;
+      }
+    }
+
+    // Add the prefix.
+    OS << Body.slice(0, Pos);
+
+    // Check if we reached the end.
+    if (Pos == End)
+      break;
+
+    if (!NParameters) {
+      switch (Body[Pos+1]) {
+        // $$ => $
+      case '$':
+        OS << '$';
+        break;
+
+        // $n => number of arguments
+      case 'n':
+        OS << A.size();
+        break;
+
+        // $[0-9] => argument
+      default: {
+        // Missing arguments are ignored.
+        unsigned Index = Body[Pos+1] - '0';
+        if (Index >= A.size())
+          break;
+
+        // Otherwise substitute with the token values, with spaces eliminated.
+        for (std::vector<AsmToken>::const_iterator it = A[Index].begin(),
+               ie = A[Index].end(); it != ie; ++it)
+          OS << it->getString();
+        break;
+      }
+      }
+      Pos += 2;
+    } else {
+      unsigned I = Pos + 1;
+      while (isalnum(Body[I]) && I + 1 != End)
+        ++I;
+
+      const char *Begin = Body.data() + Pos +1;
+      StringRef Argument(Begin, I - (Pos +1));
+      unsigned Index = 0;
+      for (; Index < NParameters; ++Index)
+        if (Parameters[Index] == Argument)
+          break;
+
+      // FIXME: We should error at the macro definition.
+      if (Index == NParameters)
+        return Error(L, "Parameter not found");
+
+      for (std::vector<AsmToken>::const_iterator it = A[Index].begin(),
+             ie = A[Index].end(); it != ie; ++it)
+        OS << it->getString();
+
+      Pos += 1 + Argument.size();
+    }
+    // Update the scan point.
+    Body = Body.substr(Pos);
+  }
+
+  // We include the .endmacro in the buffer as our queue to exit the macro
+  // instantiation.
+  OS << ".endmacro\n";
+  return false;
+}
+
+MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+                                       MemoryBuffer *I)
+  : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitLoc(EL)
+{
+}
+
+bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
+                                 const Macro *M) {
+  // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
+  // this, although we should protect against infinite loops.
+  if (ActiveMacros.size() == 20)
+    return TokError("macros cannot be nested more than 20 levels deep");
+
+  // Parse the macro instantiation arguments.
+  std::vector<std::vector<AsmToken> > MacroArguments;
+  MacroArguments.push_back(std::vector<AsmToken>());
+  unsigned ParenLevel = 0;
+  for (;;) {
+    if (Lexer.is(AsmToken::Eof))
+      return TokError("unexpected token in macro instantiation");
+    if (Lexer.is(AsmToken::EndOfStatement))
+      break;
+
+    // If we aren't inside parentheses and this is a comma, start a new token
+    // list.
+    if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+      MacroArguments.push_back(std::vector<AsmToken>());
+    } else {
+      // Adjust the current parentheses level.
+      if (Lexer.is(AsmToken::LParen))
+        ++ParenLevel;
+      else if (Lexer.is(AsmToken::RParen) && ParenLevel)
+        --ParenLevel;
+
+      // Append the token to the current argument list.
+      MacroArguments.back().push_back(getTok());
+    }
+    Lex();
+  }
+
+  // Macro instantiation is lexical, unfortunately. We construct a new buffer
+  // to hold the macro body with substitutions.
+  SmallString<256> Buf;
+  StringRef Body = M->Body;
+
+  if (expandMacro(Buf, Body, M->Parameters, MacroArguments, getTok().getLoc()))
+    return true;
+
+  MemoryBuffer *Instantiation =
+    MemoryBuffer::getMemBufferCopy(Buf.str(), "<instantiation>");
+
+  // Create the macro instantiation object and add to the current macro
+  // instantiation stack.
+  MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+                                                  getTok().getLoc(),
+                                                  Instantiation);
+  ActiveMacros.push_back(MI);
+
+  // Jump to the macro instantiation and prime the lexer.
+  CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+  Lex();
+
+  return false;
+}
+
+void AsmParser::HandleMacroExit() {
+  // Jump to the EndOfStatement we should return to, and consume it.
+  JumpToLoc(ActiveMacros.back()->ExitLoc);
+  Lex();
+
+  // Pop the instantiation entry.
+  delete ActiveMacros.back();
+  ActiveMacros.pop_back();
+}
+
+static void MarkUsed(const MCExpr *Value) {
+  switch (Value->getKind()) {
+  case MCExpr::Binary:
+    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getLHS());
+    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getRHS());
+    break;
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    break;
+  case MCExpr::SymbolRef: {
+    static_cast<const MCSymbolRefExpr*>(Value)->getSymbol().setUsed(true);
+    break;
+  }
+  case MCExpr::Unary:
+    MarkUsed(static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
+    break;
+  }
+}
+
+bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
+  // FIXME: Use better location, we should use proper tokens.
+  SMLoc EqualLoc = Lexer.getLoc();
+
+  const MCExpr *Value;
+  if (ParseExpression(Value))
+    return true;
+
+  MarkUsed(Value);
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in assignment");
+
+  // Error on assignment to '.'.
+  if (Name == ".") {
+    return Error(EqualLoc, ("assignment to pseudo-symbol '.' is unsupported "
+                            "(use '.space' or '.org').)"));
+  }
+
+  // Eat the end of statement marker.
+  Lex();
+
+  // Validate that the LHS is allowed to be a variable (either it has not been
+  // used as a symbol, or it is an absolute symbol).
+  MCSymbol *Sym = getContext().LookupSymbol(Name);
+  if (Sym) {
+    // Diagnose assignment to a label.
+    //
+    // FIXME: Diagnostics. Note the location of the definition as a label.
+    // FIXME: Diagnose assignment to protected identifier (e.g., register name).
+    if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+      ; // Allow redefinitions of undefined symbols only used in directives.
+    else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
+      return Error(EqualLoc, "redefinition of '" + Name + "'");
+    else if (!Sym->isVariable())
+      return Error(EqualLoc, "invalid assignment to '" + Name + "'");
+    else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
+      return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
+                   Name + "'");
+
+    // Don't count these checks as uses.
+    Sym->setUsed(false);
+  } else
+    Sym = getContext().GetOrCreateSymbol(Name);
+
+  // FIXME: Handle '.'.
+
+  // Do the assignment.
+  Out.EmitAssignment(Sym, Value);
+
+  return false;
+}
+
+/// ParseIdentifier:
+///   ::= identifier
+///   ::= string
+bool AsmParser::ParseIdentifier(StringRef &Res) {
+  // The assembler has relaxed rules for accepting identifiers, in particular we
+  // allow things like '.globl $foo', which would normally be separate
+  // tokens. At this level, we have already lexed so we cannot (currently)
+  // handle this as a context dependent token, instead we detect adjacent tokens
+  // and return the combined identifier.
+  if (Lexer.is(AsmToken::Dollar)) {
+    SMLoc DollarLoc = getLexer().getLoc();
+
+    // Consume the dollar sign, and check for a following identifier.
+    Lex();
+    if (Lexer.isNot(AsmToken::Identifier))
+      return true;
+
+    // We have a '$' followed by an identifier, make sure they are adjacent.
+    if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+      return true;
+
+    // Construct the joined identifier and consume the token.
+    Res = StringRef(DollarLoc.getPointer(),
+                    getTok().getIdentifier().size() + 1);
+    Lex();
+    return false;
+  }
+
+  if (Lexer.isNot(AsmToken::Identifier) &&
+      Lexer.isNot(AsmToken::String))
+    return true;
+
+  Res = getTok().getIdentifier();
+
+  Lex(); // Consume the identifier token.
+
+  return false;
+}
+
+/// ParseDirectiveSet:
+///   ::= .equ identifier ',' expression
+///   ::= .equiv identifier ',' expression
+///   ::= .set identifier ',' expression
+bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
+  StringRef Name;
+
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier after '" + Twine(IDVal) + "'");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '" + Twine(IDVal) + "'");
+  Lex();
+
+  return ParseAssignment(Name, allow_redef);
+}
+
+bool AsmParser::ParseEscapedString(std::string &Data) {
+  assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
+
+  Data = "";
+  StringRef Str = getTok().getStringContents();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] != '\\') {
+      Data += Str[i];
+      continue;
+    }
+
+    // Recognize escaped characters. Note that this escape semantics currently
+    // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+    ++i;
+    if (i == e)
+      return TokError("unexpected backslash at end of string");
+
+    // Recognize octal sequences.
+    if ((unsigned) (Str[i] - '0') <= 7) {
+      // Consume up to three octal characters.
+      unsigned Value = Str[i] - '0';
+
+      if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+        ++i;
+        Value = Value * 8 + (Str[i] - '0');
+
+        if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+          ++i;
+          Value = Value * 8 + (Str[i] - '0');
+        }
+      }
+
+      if (Value > 255)
+        return TokError("invalid octal escape sequence (out of range)");
+
+      Data += (unsigned char) Value;
+      continue;
+    }
+
+    // Otherwise recognize individual escapes.
+    switch (Str[i]) {
+    default:
+      // Just reject invalid escape sequences for now.
+      return TokError("invalid escape sequence (unrecognized character)");
+
+    case 'b': Data += '\b'; break;
+    case 'f': Data += '\f'; break;
+    case 'n': Data += '\n'; break;
+    case 'r': Data += '\r'; break;
+    case 't': Data += '\t'; break;
+    case '"': Data += '"'; break;
+    case '\\': Data += '\\'; break;
+    }
+  }
+
+  return false;
+}
+
+/// ParseDirectiveAscii:
+///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
+bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
+    for (;;) {
+      if (getLexer().isNot(AsmToken::String))
+        return TokError("expected string in '" + Twine(IDVal) + "' directive");
+
+      std::string Data;
+      if (ParseEscapedString(Data))
+        return true;
+
+      getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
+      if (ZeroTerminated)
+        getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
+
+      Lex();
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveValue
+///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveValue(unsigned Size) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
+    for (;;) {
+      const MCExpr *Value;
+      SMLoc ExprLoc = getLexer().getLoc();
+      if (ParseExpression(Value))
+        return true;
+
+      // Special case constant expressions to match code generator.
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        assert(Size <= 8 && "Invalid size");
+        uint64_t IntValue = MCE->getValue();
+        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+          return Error(ExprLoc, "literal value out of range for directive");
+        getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE);
+      } else
+        getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveRealValue
+///  ::= (.single | .double) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
+    for (;;) {
+      // We don't truly support arithmetic on floating point expressions, so we
+      // have to manually parse unary prefixes.
+      bool IsNeg = false;
+      if (getLexer().is(AsmToken::Minus)) {
+        Lex();
+        IsNeg = true;
+      } else if (getLexer().is(AsmToken::Plus))
+        Lex();
+
+      if (getLexer().isNot(AsmToken::Integer) &&
+          getLexer().isNot(AsmToken::Real) &&
+          getLexer().isNot(AsmToken::Identifier))
+        return TokError("unexpected token in directive");
+
+      // Convert to an APFloat.
+      APFloat Value(Semantics);
+      StringRef IDVal = getTok().getString();
+      if (getLexer().is(AsmToken::Identifier)) {
+        if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf"))
+          Value = APFloat::getInf(Semantics);
+        else if (!IDVal.compare_lower("nan"))
+          Value = APFloat::getNaN(Semantics, false, ~0);
+        else
+          return TokError("invalid floating point literal");
+      } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
+          APFloat::opInvalidOp)
+        return TokError("invalid floating point literal");
+      if (IsNeg)
+        Value.changeSign();
+
+      // Consume the numeric token.
+      Lex();
+
+      // Emit the value as an integer.
+      APInt AsInt = Value.bitcastToAPInt();
+      getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+                                 AsInt.getBitWidth() / 8, DEFAULT_ADDRSPACE);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveSpace
+///  ::= .space expression [ , expression ]
+bool AsmParser::ParseDirectiveSpace() {
+  CheckForValidSection();
+
+  int64_t NumBytes;
+  if (ParseAbsoluteExpression(NumBytes))
+    return true;
+
+  int64_t FillExpr = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '.space' directive");
+    Lex();
+
+    if (ParseAbsoluteExpression(FillExpr))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.space' directive");
+  }
+
+  Lex();
+
+  if (NumBytes <= 0)
+    return TokError("invalid number of bytes in '.space' directive");
+
+  // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
+  getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
+/// ParseDirectiveZero
+///  ::= .zero expression
+bool AsmParser::ParseDirectiveZero() {
+  CheckForValidSection();
+
+  int64_t NumBytes;
+  if (ParseAbsoluteExpression(NumBytes))
+    return true;
+
+  int64_t Val = 0;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    if (ParseAbsoluteExpression(Val))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.zero' directive");
+
+  Lex();
+
+  getStreamer().EmitFill(NumBytes, Val, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
+/// ParseDirectiveFill
+///  ::= .fill expression , expression , expression
+bool AsmParser::ParseDirectiveFill() {
+  CheckForValidSection();
+
+  int64_t NumValues;
+  if (ParseAbsoluteExpression(NumValues))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.fill' directive");
+  Lex();
+
+  int64_t FillSize;
+  if (ParseAbsoluteExpression(FillSize))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.fill' directive");
+  Lex();
+
+  int64_t FillExpr;
+  if (ParseAbsoluteExpression(FillExpr))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.fill' directive");
+
+  Lex();
+
+  if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
+    return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
+
+  for (uint64_t i = 0, e = NumValues; i != e; ++i)
+    getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
+/// ParseDirectiveOrg
+///  ::= .org expression [ , expression ]
+bool AsmParser::ParseDirectiveOrg() {
+  CheckForValidSection();
+
+  const MCExpr *Offset;
+  if (ParseExpression(Offset))
+    return true;
+
+  // Parse optional fill expression.
+  int64_t FillExpr = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '.org' directive");
+    Lex();
+
+    if (ParseAbsoluteExpression(FillExpr))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.org' directive");
+  }
+
+  Lex();
+
+  // FIXME: Only limited forms of relocatable expressions are accepted here, it
+  // has to be relative to the current section.
+  getStreamer().EmitValueToOffset(Offset, FillExpr);
+
+  return false;
+}
+
+/// ParseDirectiveAlign
+///  ::= {.align, ...} expression [ , expression [ , expression ]]
+bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+  CheckForValidSection();
+
+  SMLoc AlignmentLoc = getLexer().getLoc();
+  int64_t Alignment;
+  if (ParseAbsoluteExpression(Alignment))
+    return true;
+
+  SMLoc MaxBytesLoc;
+  bool HasFillExpr = false;
+  int64_t FillExpr = 0;
+  int64_t MaxBytesToFill = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in directive");
+    Lex();
+
+    // The fill expression can be omitted while specifying a maximum number of
+    // alignment bytes, e.g:
+    //  .align 3,,4
+    if (getLexer().isNot(AsmToken::Comma)) {
+      HasFillExpr = true;
+      if (ParseAbsoluteExpression(FillExpr))
+        return true;
+    }
+
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+
+      MaxBytesLoc = getLexer().getLoc();
+      if (ParseAbsoluteExpression(MaxBytesToFill))
+        return true;
+
+      if (getLexer().isNot(AsmToken::EndOfStatement))
+        return TokError("unexpected token in directive");
+    }
+  }
+
+  Lex();
+
+  if (!HasFillExpr)
+    FillExpr = 0;
+
+  // Compute alignment in bytes.
+  if (IsPow2) {
+    // FIXME: Diagnose overflow.
+    if (Alignment >= 32) {
+      Error(AlignmentLoc, "invalid alignment value");
+      Alignment = 31;
+    }
+
+    Alignment = 1ULL << Alignment;
+  }
+
+  // Diagnose non-sensical max bytes to align.
+  if (MaxBytesLoc.isValid()) {
+    if (MaxBytesToFill < 1) {
+      Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
+            "many bytes, ignoring maximum bytes expression");
+      MaxBytesToFill = 0;
+    }
+
+    if (MaxBytesToFill >= Alignment) {
+      Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
+              "has no effect");
+      MaxBytesToFill = 0;
+    }
+  }
+
+  // Check whether we should use optimal code alignment for this .align
+  // directive.
+  bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+  if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
+      ValueSize == 1 && UseCodeAlign) {
+    getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
+  } else {
+    // FIXME: Target specific behavior about how the "extra" bytes are filled.
+    getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
+                                       MaxBytesToFill);
+  }
+
+  return false;
+}
+
+/// ParseDirectiveSymbolAttribute
+///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
+bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      StringRef Name;
+
+      if (ParseIdentifier(Name))
+        return TokError("expected identifier in directive");
+
+      MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+      getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveComm
+///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
+bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+  CheckForValidSection();
+
+  SMLoc IDLoc = getLexer().getLoc();
+  StringRef Name;
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = getLexer().getLoc();
+    if (ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+
+    // If this target takes alignments in bytes (not log) validate and convert.
+    if (Lexer.getMAI().getAlignmentIsInBytes()) {
+      if (!isPowerOf2_64(Pow2Alignment))
+        return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
+      Pow2Alignment = Log2_64(Pow2Alignment);
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+  Lex();
+
+  // NOTE: a size of zero for a .comm should create a undefined symbol
+  // but a size of .lcomm creates a bss symbol of size zero.
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+                 "be less than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+                 "alignment, can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  // '.lcomm' is equivalent to '.zerofill'.
+  // Create the Symbol as a common or local common with Size and Pow2Alignment
+  if (IsLocal) {
+    getStreamer().EmitZerofill(Ctx.getMachOSection(
+                                 "__DATA", "__bss", MCSectionMachO::S_ZEROFILL,
+                                 0, SectionKind::getBSS()),
+                               Sym, Size, 1 << Pow2Alignment);
+    return false;
+  }
+
+  getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+  return false;
+}
+
+/// ParseDirectiveAbort
+///  ::= .abort [... message ...]
+bool AsmParser::ParseDirectiveAbort() {
+  // FIXME: Use loc from directive.
+  SMLoc Loc = getLexer().getLoc();
+
+  StringRef Str = ParseStringToEndOfStatement();
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.abort' directive");
+
+  Lex();
+
+  if (Str.empty())
+    Error(Loc, ".abort detected. Assembly stopping.");
+  else
+    Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+  // FIXME: Actually abort assembly here.
+
+  return false;
+}
+
+/// ParseDirectiveInclude
+///  ::= .include "filename"
+bool AsmParser::ParseDirectiveInclude() {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.include' directive");
+
+  std::string Filename = getTok().getString();
+  SMLoc IncludeLoc = getLexer().getLoc();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.include' directive");
+
+  // Strip the quotes.
+  Filename = Filename.substr(1, Filename.size()-2);
+
+  // Attempt to switch the lexer to the included file before consuming the end
+  // of statement to avoid losing it when we switch.
+  if (EnterIncludeFile(Filename)) {
+    Error(IncludeLoc, "Could not find include file '" + Filename + "'");
+    return true;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveIf
+/// ::= .if expression
+bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+  if(TheCondState.Ignore) {
+    EatToEndOfStatement();
+  }
+  else {
+    int64_t ExprValue;
+    if (ParseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.if' directive");
+
+    Lex();
+
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+  StringRef Name;
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+
+  if (TheCondState.Ignore) {
+    EatToEndOfStatement();
+  } else {
+    if (ParseIdentifier(Name))
+      return TokError("expected identifier after '.ifdef'");
+
+    Lex();
+
+    MCSymbol *Sym = getContext().LookupSymbol(Name);
+
+    if (expect_defined)
+      TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+    else
+      TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElseIf
+/// ::= .elseif expression
+bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+                          " an .elseif");
+  TheCondState.TheCond = AsmCond::ElseIfCond;
+
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+      LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet) {
+    TheCondState.Ignore = true;
+    EatToEndOfStatement();
+  }
+  else {
+    int64_t ExprValue;
+    if (ParseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.elseif' directive");
+
+    Lex();
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElse
+/// ::= .else
+bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.else' directive");
+
+  Lex();
+
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+                          ".elseif");
+  TheCondState.TheCond = AsmCond::ElseCond;
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+    LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet)
+    TheCondState.Ignore = true;
+  else
+    TheCondState.Ignore = false;
+
+  return false;
+}
+
+/// ParseDirectiveEndIf
+/// ::= .endif
+bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.endif' directive");
+
+  Lex();
+
+  if ((TheCondState.TheCond == AsmCond::NoCond) ||
+      TheCondStack.empty())
+    Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
+                        ".else");
+  if (!TheCondStack.empty()) {
+    TheCondState = TheCondStack.back();
+    TheCondStack.pop_back();
+  }
+
+  return false;
+}
+
+/// ParseDirectiveFile
+/// ::= .file [number] string
+bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
+  // FIXME: I'm not sure what this is.
+  int64_t FileNumber = -1;
+  SMLoc FileNumberLoc = getLexer().getLoc();
+  if (getLexer().is(AsmToken::Integer)) {
+    FileNumber = getTok().getIntVal();
+    Lex();
+
+    if (FileNumber < 1)
+      return TokError("file number less than one");
+  }
+
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("unexpected token in '.file' directive");
+
+  StringRef Filename = getTok().getString();
+  Filename = Filename.substr(1, Filename.size()-2);
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.file' directive");
+
+  if (FileNumber == -1)
+    getStreamer().EmitFileDirective(Filename);
+  else {
+    if (getStreamer().EmitDwarfFileDirective(FileNumber, Filename))
+      Error(FileNumberLoc, "file number already allocated");
+  }
+
+  return false;
+}
+
+/// ParseDirectiveLine
+/// ::= .line [number]
+bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Integer))
+      return TokError("unexpected token in '.line' directive");
+
+    int64_t LineNumber = getTok().getIntVal();
+    (void) LineNumber;
+    Lex();
+
+    // FIXME: Do something with the .line.
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.line' directive");
+
+  return false;
+}
+
+
+/// ParseDirectiveLoc
+/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
+///                                [epilogue_begin] [is_stmt VALUE] [isa VALUE]
+/// The first number is a file number, must have been previously assigned with
+/// a .file directive, the second number is the line number and optionally the
+/// third number is a column position (zero if not specified).  The remaining
+/// optional items are .loc sub-directives.
+bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
+
+  if (getLexer().isNot(AsmToken::Integer))
+    return TokError("unexpected token in '.loc' directive");
+  int64_t FileNumber = getTok().getIntVal();
+  if (FileNumber < 1)
+    return TokError("file number less than one in '.loc' directive");
+  if (!getContext().isValidDwarfFileNumber(FileNumber))
+    return TokError("unassigned file number in '.loc' directive");
+  Lex();
+
+  int64_t LineNumber = 0;
+  if (getLexer().is(AsmToken::Integer)) {
+    LineNumber = getTok().getIntVal();
+    if (LineNumber < 1)
+      return TokError("line number less than one in '.loc' directive");
+    Lex();
+  }
+
+  int64_t ColumnPos = 0;
+  if (getLexer().is(AsmToken::Integer)) {
+    ColumnPos = getTok().getIntVal();
+    if (ColumnPos < 0)
+      return TokError("column position less than zero in '.loc' directive");
+    Lex();
+  }
+
+  unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+  unsigned Isa = 0;
+  int64_t Discriminator = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      StringRef Name;
+      SMLoc Loc = getTok().getLoc();
+      if (getParser().ParseIdentifier(Name))
+        return TokError("unexpected token in '.loc' directive");
+
+      if (Name == "basic_block")
+        Flags |= DWARF2_FLAG_BASIC_BLOCK;
+      else if (Name == "prologue_end")
+        Flags |= DWARF2_FLAG_PROLOGUE_END;
+      else if (Name == "epilogue_begin")
+        Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+      else if (Name == "is_stmt") {
+        SMLoc Loc = getTok().getLoc();
+        const MCExpr *Value;
+        if (getParser().ParseExpression(Value))
+          return true;
+        // The expression must be the constant 0 or 1.
+        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+          int Value = MCE->getValue();
+          if (Value == 0)
+            Flags &= ~DWARF2_FLAG_IS_STMT;
+          else if (Value == 1)
+            Flags |= DWARF2_FLAG_IS_STMT;
+          else
+            return Error(Loc, "is_stmt value not 0 or 1");
+        }
+        else {
+          return Error(Loc, "is_stmt value not the constant value of 0 or 1");
+        }
+      }
+      else if (Name == "isa") {
+        SMLoc Loc = getTok().getLoc();
+        const MCExpr *Value;
+        if (getParser().ParseExpression(Value))
+          return true;
+        // The expression must be a constant greater or equal to 0.
+        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+          int Value = MCE->getValue();
+          if (Value < 0)
+            return Error(Loc, "isa number less than zero");
+          Isa = Value;
+        }
+        else {
+          return Error(Loc, "isa number not a constant value");
+        }
+      }
+      else if (Name == "discriminator") {
+        if (getParser().ParseAbsoluteExpression(Discriminator))
+          return true;
+      }
+      else {
+        return Error(Loc, "unknown sub-directive in '.loc' directive");
+      }
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+    }
+  }
+
+  getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
+                                      Isa, Discriminator, StringRef());
+
+  return false;
+}
+
+/// ParseDirectiveStabs
+/// ::= .stabs string, number, number, number
+bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive,
+                                           SMLoc DirectiveLoc) {
+  return TokError("unsupported directive '" + Directive + "'");
+}
+
+/// ParseDirectiveCFISections
+/// ::= .cfi_sections section [, section]
+bool GenericAsmParser::ParseDirectiveCFISections(StringRef,
+                                                 SMLoc DirectiveLoc) {
+  StringRef Name;
+  bool EH = false;
+  bool Debug = false;
+
+  if (getParser().ParseIdentifier(Name))
+    return TokError("Expected an identifier");
+
+  if (Name == ".eh_frame")
+    EH = true;
+  else if (Name == ".debug_frame")
+    Debug = true;
+
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+
+    if (getParser().ParseIdentifier(Name))
+      return TokError("Expected an identifier");
+
+    if (Name == ".eh_frame")
+      EH = true;
+    else if (Name == ".debug_frame")
+      Debug = true;
+  }
+
+  getStreamer().EmitCFISections(EH, Debug);
+
+  return false;
+}
+
+/// ParseDirectiveCFIStartProc
+/// ::= .cfi_startproc
+bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef,
+                                                  SMLoc DirectiveLoc) {
+  getStreamer().EmitCFIStartProc();
+  return false;
+}
+
+/// ParseDirectiveCFIEndProc
+/// ::= .cfi_endproc
+bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) {
+  getStreamer().EmitCFIEndProc();
+  return false;
+}
+
+/// ParseRegisterOrRegisterNumber - parse register name or number.
+bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+                                                     SMLoc DirectiveLoc) {
+  unsigned RegNo;
+
+  if (getLexer().isNot(AsmToken::Integer)) {
+    if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc,
+      DirectiveLoc))
+      return true;
+    Register = getContext().getTargetAsmInfo().getDwarfRegNum(RegNo, true);
+  } else
+    return getParser().ParseAbsoluteExpression(Register);
+
+  return false;
+}
+
+/// ParseDirectiveCFIDefCfa
+/// ::= .cfi_def_cfa register,  offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
+                                               SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  getStreamer().EmitCFIDefCfa(Register, Offset);
+  return false;
+}
+
+/// ParseDirectiveCFIDefCfaOffset
+/// ::= .cfi_def_cfa_offset offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef,
+                                                     SMLoc DirectiveLoc) {
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  getStreamer().EmitCFIDefCfaOffset(Offset);
+  return false;
+}
+
+/// ParseDirectiveCFIAdjustCfaOffset
+/// ::= .cfi_adjust_cfa_offset adjustment
+bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef,
+                                                        SMLoc DirectiveLoc) {
+  int64_t Adjustment = 0;
+  if (getParser().ParseAbsoluteExpression(Adjustment))
+    return true;
+
+  getStreamer().EmitCFIAdjustCfaOffset(Adjustment);
+  return false;
+}
+
+/// ParseDirectiveCFIDefCfaRegister
+/// ::= .cfi_def_cfa_register register
+bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
+                                                       SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  getStreamer().EmitCFIDefCfaRegister(Register);
+  return false;
+}
+
+/// ParseDirectiveCFIOffset
+/// ::= .cfi_offset register, offset
+bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  int64_t Offset = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  getStreamer().EmitCFIOffset(Register, Offset);
+  return false;
+}
+
+/// ParseDirectiveCFIRelOffset
+/// ::= .cfi_rel_offset register, offset
+bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef,
+                                                  SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  getStreamer().EmitCFIRelOffset(Register, Offset);
+  return false;
+}
+
+static bool isValidEncoding(int64_t Encoding) {
+  if (Encoding & ~0xff)
+    return false;
+
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return true;
+
+  const unsigned Format = Encoding & 0xf;
+  if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
+      Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
+      Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
+      Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
+    return false;
+
+  const unsigned Application = Encoding & 0x70;
+  if (Application != dwarf::DW_EH_PE_absptr &&
+      Application != dwarf::DW_EH_PE_pcrel)
+    return false;
+
+  return true;
+}
+
+/// ParseDirectiveCFIPersonalityOrLsda
+/// ::= .cfi_personality encoding, [symbol_name]
+/// ::= .cfi_lsda encoding, [symbol_name]
+bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
+                                                    SMLoc DirectiveLoc) {
+  int64_t Encoding = 0;
+  if (getParser().ParseAbsoluteExpression(Encoding))
+    return true;
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return false;
+
+  if (!isValidEncoding(Encoding))
+    return TokError("unsupported encoding.");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (IDVal == ".cfi_personality")
+    getStreamer().EmitCFIPersonality(Sym, Encoding);
+  else {
+    assert(IDVal == ".cfi_lsda");
+    getStreamer().EmitCFILsda(Sym, Encoding);
+  }
+  return false;
+}
+
+/// ParseDirectiveCFIRememberState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal,
+                                                      SMLoc DirectiveLoc) {
+  getStreamer().EmitCFIRememberState();
+  return false;
+}
+
+/// ParseDirectiveCFIRestoreState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal,
+                                                     SMLoc DirectiveLoc) {
+  getStreamer().EmitCFIRestoreState();
+  return false;
+}
+
+/// ParseDirectiveCFISameValue
+/// ::= .cfi_same_value register
+bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal,
+                                                  SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  getStreamer().EmitCFISameValue(Register);
+
+  return false;
+}
+
+/// ParseDirectiveMacrosOnOff
+/// ::= .macros_on
+/// ::= .macros_off
+bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive,
+                                                 SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(getLexer().getLoc(),
+                 "unexpected token in '" + Directive + "' directive");
+
+  getParser().MacrosEnabled = Directive == ".macros_on";
+
+  return false;
+}
+
+/// ParseDirectiveMacro
+/// ::= .macro name [parameters]
+bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
+                                           SMLoc DirectiveLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  std::vector<StringRef> Parameters;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for(;;) {
+      StringRef Parameter;
+      if (getParser().ParseIdentifier(Parameter))
+        return TokError("expected identifier in directive");
+      Parameters.push_back(Parameter);
+
+      if (getLexer().isNot(AsmToken::Comma))
+        break;
+      Lex();
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.macro' directive");
+
+  // Eat the end of statement.
+  Lex();
+
+  AsmToken EndToken, StartToken = getTok();
+
+  // Lex the macro definition.
+  for (;;) {
+    // Check whether we have reached the end of the file.
+    if (getLexer().is(AsmToken::Eof))
+      return Error(DirectiveLoc, "no matching '.endmacro' in definition");
+
+    // Otherwise, check whether we have reach the .endmacro.
+    if (getLexer().is(AsmToken::Identifier) &&
+        (getTok().getIdentifier() == ".endm" ||
+         getTok().getIdentifier() == ".endmacro")) {
+      EndToken = getTok();
+      Lex();
+      if (getLexer().isNot(AsmToken::EndOfStatement))
+        return TokError("unexpected token in '" + EndToken.getIdentifier() +
+                        "' directive");
+      break;
+    }
+
+    // Otherwise, scan til the end of the statement.
+    getParser().EatToEndOfStatement();
+  }
+
+  if (getParser().MacroMap.lookup(Name)) {
+    return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
+  }
+
+  const char *BodyStart = StartToken.getLoc().getPointer();
+  const char *BodyEnd = EndToken.getLoc().getPointer();
+  StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
+  getParser().MacroMap[Name] = new Macro(Name, Body, Parameters);
+  return false;
+}
+
+/// ParseDirectiveEndMacro
+/// ::= .endm
+/// ::= .endmacro
+bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
+                                           SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '" + Directive + "' directive");
+
+  // If we are inside a macro instantiation, terminate the current
+  // instantiation.
+  if (!getParser().ActiveMacros.empty()) {
+    getParser().HandleMacroExit();
+    return false;
+  }
+
+  // Otherwise, this .endmacro is a stray entry in the file; well formed
+  // .endmacro directives are handled during the macro definition parsing.
+  return TokError("unexpected '" + Directive + "' in file, "
+                  "no current macro definition");
+}
+
+bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
+  getParser().CheckForValidSection();
+
+  const MCExpr *Value;
+
+  if (getParser().ParseExpression(Value))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  if (DirName[1] == 's')
+    getStreamer().EmitSLEB128Value(Value);
+  else
+    getStreamer().EmitULEB128Value(Value);
+
+  return false;
+}
+
+
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
+                                     MCContext &C, MCStreamer &Out,
+                                     const MCAsmInfo &MAI) {
+  return new AsmParser(T, SM, C, Out, MAI);
+}
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
new file mode 100644
index 000000000000..66ad384c7db2
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -0,0 +1,442 @@
+//===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/COFF.h"
+using namespace llvm;
+
+namespace {
+
+class COFFAsmParser : public MCAsmParserExtension {
+  template<bool (COFFAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<COFFAsmParser, Handler>);
+  }
+
+  bool ParseSectionSwitch(StringRef Section,
+                          unsigned Characteristics,
+                          SectionKind Kind);
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    MCAsmParserExtension::Initialize(Parser);
+
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+
+    // Win64 EH directives.
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
+                                                                   ".seh_proc");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>(
+                                                                ".seh_endproc");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>(
+                                                           ".seh_startchained");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>(
+                                                             ".seh_endchained");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandler>(
+                                                                ".seh_handler");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>(
+                                                            ".seh_handlerdata");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>(
+                                                                ".seh_pushreg");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>(
+                                                               ".seh_setframe");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>(
+                                                             ".seh_stackalloc");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>(
+                                                                ".seh_savereg");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>(
+                                                                ".seh_savexmm");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>(
+                                                              ".seh_pushframe");
+    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>(
+                                                            ".seh_endprologue");
+  }
+
+  bool ParseSectionDirectiveText(StringRef, SMLoc) {
+    return ParseSectionSwitch(".text",
+                              COFF::IMAGE_SCN_CNT_CODE
+                            | COFF::IMAGE_SCN_MEM_EXECUTE
+                            | COFF::IMAGE_SCN_MEM_READ,
+                              SectionKind::getText());
+  }
+  bool ParseSectionDirectiveData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data",
+                              COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+                            | COFF::IMAGE_SCN_MEM_READ
+                            | COFF::IMAGE_SCN_MEM_WRITE,
+                              SectionKind::getDataRel());
+  }
+  bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+    return ParseSectionSwitch(".bss",
+                              COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+                            | COFF::IMAGE_SCN_MEM_READ
+                            | COFF::IMAGE_SCN_MEM_WRITE,
+                              SectionKind::getBSS());
+  }
+
+  bool ParseDirectiveDef(StringRef, SMLoc);
+  bool ParseDirectiveScl(StringRef, SMLoc);
+  bool ParseDirectiveType(StringRef, SMLoc);
+  bool ParseDirectiveEndef(StringRef, SMLoc);
+
+  // Win64 EH directives.
+  bool ParseSEHDirectiveStartProc(StringRef, SMLoc);
+  bool ParseSEHDirectiveEndProc(StringRef, SMLoc);
+  bool ParseSEHDirectiveStartChained(StringRef, SMLoc);
+  bool ParseSEHDirectiveEndChained(StringRef, SMLoc);
+  bool ParseSEHDirectiveHandler(StringRef, SMLoc);
+  bool ParseSEHDirectiveHandlerData(StringRef, SMLoc);
+  bool ParseSEHDirectivePushReg(StringRef, SMLoc);
+  bool ParseSEHDirectiveSetFrame(StringRef, SMLoc);
+  bool ParseSEHDirectiveAllocStack(StringRef, SMLoc);
+  bool ParseSEHDirectiveSaveReg(StringRef, SMLoc);
+  bool ParseSEHDirectiveSaveXMM(StringRef, SMLoc);
+  bool ParseSEHDirectivePushFrame(StringRef, SMLoc);
+  bool ParseSEHDirectiveEndProlog(StringRef, SMLoc);
+
+  bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except);
+  bool ParseSEHRegisterNumber(unsigned &RegNo);
+public:
+  COFFAsmParser() {}
+};
+
+} // end annonomous namespace.
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+                                       unsigned Characteristics,
+                                       SectionKind Kind) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lex();
+
+  getStreamer().SwitchSection(getContext().getCOFFSection(
+                                Section, Characteristics, Kind));
+
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
+  StringRef SymbolName;
+
+  if (getParser().ParseIdentifier(SymbolName))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+  getStreamer().BeginCOFFSymbolDef(Sym);
+
+  Lex();
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
+  int64_t SymbolStorageClass;
+  if (getParser().ParseAbsoluteExpression(SymbolStorageClass))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass);
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+  int64_t Type;
+  if (getParser().ParseAbsoluteExpression(Type))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitCOFFSymbolType(Type);
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EndCOFFSymbolDef();
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
+  StringRef SymbolID;
+  if (getParser().ParseIdentifier(SymbolID))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+
+  Lex();
+  getStreamer().EmitWin64EHStartProc(Symbol);
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EmitWin64EHEndProc();
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EmitWin64EHStartChained();
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EmitWin64EHEndChained();
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) {
+  StringRef SymbolID;
+  if (getParser().ParseIdentifier(SymbolID))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("you must specify one or both of @unwind or @except");
+  Lex();
+  bool unwind = false, except = false;
+  if (ParseAtUnwindOrAtExcept(unwind, except))
+    return true;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    if (ParseAtUnwindOrAtExcept(unwind, except))
+      return true;
+  }
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  MCSymbol *handler = getContext().GetOrCreateSymbol(SymbolID);
+
+  Lex();
+  getStreamer().EmitWin64EHHandler(handler, unwind, except);
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EmitWin64EHHandlerData();
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc L) {
+  unsigned Reg;
+  if (ParseSEHRegisterNumber(Reg))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitWin64EHPushReg(Reg);
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) {
+  unsigned Reg;
+  int64_t Off;
+  if (ParseSEHRegisterNumber(Reg))
+    return true;
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("you must specify a stack pointer offset");
+
+  Lex();
+  SMLoc startLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Off))
+    return true;
+
+  if (Off & 0x0F)
+    return Error(startLoc, "offset is not a multiple of 16");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitWin64EHSetFrame(Reg, Off);
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) {
+  int64_t Size;
+  SMLoc startLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Size))
+    return true;
+
+  if (Size & 7)
+    return Error(startLoc, "size is not a multiple of 8");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitWin64EHAllocStack(Size);
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) {
+  unsigned Reg;
+  int64_t Off;
+  if (ParseSEHRegisterNumber(Reg))
+    return true;
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("you must specify an offset on the stack");
+
+  Lex();
+  SMLoc startLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Off))
+    return true;
+
+  if (Off & 7)
+    return Error(startLoc, "size is not a multiple of 8");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  // FIXME: Err on %xmm* registers
+  getStreamer().EmitWin64EHSaveReg(Reg, Off);
+  return false;
+}
+
+// FIXME: This method is inherently x86-specific. It should really be in the
+// x86 backend.
+bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) {
+  unsigned Reg;
+  int64_t Off;
+  if (ParseSEHRegisterNumber(Reg))
+    return true;
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("you must specify an offset on the stack");
+
+  Lex();
+  SMLoc startLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Off))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  if (Off & 0x0F)
+    return Error(startLoc, "offset is not a multiple of 16");
+
+  Lex();
+  // FIXME: Err on non-%xmm* registers
+  getStreamer().EmitWin64EHSaveXMM(Reg, Off);
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) {
+  bool Code = false;
+  StringRef CodeID;
+  if (getLexer().is(AsmToken::At)) {
+    SMLoc startLoc = getLexer().getLoc();
+    Lex();
+    if (!getParser().ParseIdentifier(CodeID)) {
+      if (CodeID != "code")
+        return Error(startLoc, "expected @code");
+      Code = true;
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitWin64EHPushFrame(Code);
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EmitWin64EHEndProlog();
+  return false;
+}
+
+bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
+  StringRef identifier;
+  if (getLexer().isNot(AsmToken::At))
+    return TokError("a handler attribute must begin with '@'");
+  SMLoc startLoc = getLexer().getLoc();
+  Lex();
+  if (getParser().ParseIdentifier(identifier))
+    return Error(startLoc, "expected @unwind or @except");
+  if (identifier == "unwind")
+    unwind = true;
+  else if (identifier == "except")
+    except = true;
+  else
+    return Error(startLoc, "expected @unwind or @except");
+  return false;
+}
+
+bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
+  SMLoc startLoc = getLexer().getLoc();
+  if (getLexer().is(AsmToken::Percent)) {
+    const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
+    SMLoc endLoc;
+    unsigned LLVMRegNo;
+    if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc))
+      return true;
+
+    // Check that this is a non-volatile register.
+    const unsigned *NVRegs = TAI.getCalleeSavedRegs();
+    unsigned i;
+    for (i = 0; NVRegs[i] != 0; ++i)
+      if (NVRegs[i] == LLVMRegNo)
+        break;
+    if (NVRegs[i] == 0)
+      return Error(startLoc, "expected non-volatile register");
+
+    int SEHRegNo = TAI.getSEHRegNum(LLVMRegNo);
+    if (SEHRegNo < 0)
+      return Error(startLoc,"register can't be represented in SEH unwind info");
+    RegNo = SEHRegNo;
+  }
+  else {
+    int64_t n;
+    if (getParser().ParseAbsoluteExpression(n))
+      return true;
+    if (n > 15)
+      return Error(startLoc, "register number is too high");
+    RegNo = n;
+  }
+
+  return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createCOFFAsmParser() {
+  return new COFFAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
new file mode 100644
index 000000000000..6f450682cbeb
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -0,0 +1,668 @@
+//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace llvm;
+
+namespace {
+
+/// \brief Implementation of directive handling which is shared across all
+/// Darwin targets.
+class DarwinAsmParser : public MCAsmParserExtension {
+  template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<DarwinAsmParser, Handler>);
+  }
+
+  bool ParseSectionSwitch(const char *Segment, const char *Section,
+                          unsigned TAA = 0, unsigned ImplicitAlign = 0,
+                          unsigned StubSize = 0);
+
+public:
+  DarwinAsmParser() {}
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    this->MCAsmParserExtension::Initialize(Parser);
+
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
+      ".subsections_via_symbols");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
+      ".secure_log_unique");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
+      ".secure_log_reset");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
+
+    // Special section directives.
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
+
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident");
+  }
+
+  bool ParseDirectiveDesc(StringRef, SMLoc);
+  bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
+  bool ParseDirectiveLsym(StringRef, SMLoc);
+  bool ParseDirectiveSection(StringRef, SMLoc);
+  bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
+  bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
+  bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
+  bool ParseDirectiveTBSS(StringRef, SMLoc);
+  bool ParseDirectiveZerofill(StringRef, SMLoc);
+
+  // Named Section Directive
+  bool ParseSectionDirectiveConst(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__const");
+  }
+  bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__static_const");
+  }
+  bool ParseSectionDirectiveCString(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__literal4",
+                              MCSectionMachO::S_4BYTE_LITERALS, 4);
+  }
+  bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__literal8",
+                              MCSectionMachO::S_8BYTE_LITERALS, 8);
+  }
+  bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__literal16",
+                              MCSectionMachO::S_16BYTE_LITERALS, 16);
+  }
+  bool ParseSectionDirectiveConstructor(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__constructor");
+  }
+  bool ParseSectionDirectiveDestructor(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__destructor");
+  }
+  bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__fvmlib_init0");
+  }
+  bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__fvmlib_init1");
+  }
+  bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__symbol_stub",
+                              MCSectionMachO::S_SYMBOL_STUBS |
+                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                              // FIXME: Different on PPC and ARM.
+                              0, 16);
+  }
+  bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__picsymbol_stub",
+                              MCSectionMachO::S_SYMBOL_STUBS |
+                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26);
+  }
+  bool ParseSectionDirectiveData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__data");
+  }
+  bool ParseSectionDirectiveStaticData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__static_data");
+  }
+  bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__nl_symbol_ptr",
+                              MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__la_symbol_ptr",
+                              MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveDyld(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__dyld");
+  }
+  bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__mod_init_func",
+                              MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__mod_term_func",
+                              MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveConstData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__const");
+  }
+  bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__class",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__meta_class",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cat_cls_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cat_inst_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__protocol",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__string_object",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cls_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__inst_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cls_refs",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+                              MCSectionMachO::S_LITERAL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__message_refs",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+                              MCSectionMachO::S_LITERAL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__symbols",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__category",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__class_vars",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__instance_vars",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__module_info",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__selector_strs",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__thread_data",
+                              MCSectionMachO::S_THREAD_LOCAL_REGULAR);
+  }
+  bool ParseSectionDirectiveText(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__text",
+                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+  }
+  bool ParseSectionDirectiveTLV(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__thread_vars",
+                              MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
+  }
+  bool ParseSectionDirectiveIdent(StringRef, SMLoc) {
+    // Darwin silently ignores the .ident directive.
+    getParser().EatToEndOfStatement();
+    return false;
+  }
+  bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__thread_init",
+                         MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
+  }
+
+};
+
+}
+
+bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
+                                         const char *Section,
+                                         unsigned TAA, unsigned Align,
+                                         unsigned StubSize) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lex();
+
+  // FIXME: Arch specific.
+  bool isText = StringRef(Segment) == "__TEXT";  // FIXME: Hack.
+  getStreamer().SwitchSection(getContext().getMachOSection(
+                                Segment, Section, TAA, StubSize,
+                                isText ? SectionKind::getText()
+                                       : SectionKind::getDataRel()));
+
+  // Set the implicit alignment, if any.
+  //
+  // FIXME: This isn't really what 'as' does; I think it just uses the implicit
+  // alignment on the section (e.g., if one manually inserts bytes into the
+  // section, then just issuing the section switch directive will not realign
+  // the section. However, this is arguably more reasonable behavior, and there
+  // is no good reason for someone to intentionally emit incorrectly sized
+  // values into the implicitly aligned sections.
+  if (Align)
+    getStreamer().EmitValueToAlignment(Align, 0, 1, 0);
+
+  return false;
+}
+
+/// ParseDirectiveDesc
+///  ::= .desc identifier , expression
+bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.desc' directive");
+  Lex();
+
+  int64_t DescValue;
+  if (getParser().ParseAbsoluteExpression(DescValue))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.desc' directive");
+
+  Lex();
+
+  // Set the n_desc field of this Symbol to this DescValue
+  getStreamer().EmitSymbolDesc(Sym, DescValue);
+
+  return false;
+}
+
+/// ParseDirectiveDumpOrLoad
+///  ::= ( .dump | .load ) "filename"
+bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive,
+                                               SMLoc IDLoc) {
+  bool IsDump = Directive == ".dump";
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.dump' or '.load' directive");
+
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.dump' or '.load' directive");
+
+  Lex();
+
+  // FIXME: If/when .dump and .load are implemented they will be done in the
+  // the assembly parser and not have any need for an MCStreamer API.
+  if (IsDump)
+    return Warning(IDLoc, "ignoring directive .dump for now");
+  else
+    return Warning(IDLoc, "ignoring directive .load for now");
+}
+
+/// ParseDirectiveLsym
+///  ::= .lsym identifier , expression
+bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.lsym' directive");
+  Lex();
+
+  const MCExpr *Value;
+  if (getParser().ParseExpression(Value))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.lsym' directive");
+
+  Lex();
+
+  // We don't currently support this directive.
+  //
+  // FIXME: Diagnostic location!
+  (void) Sym;
+  return TokError("directive '.lsym' is unsupported");
+}
+
+/// ParseDirectiveSection:
+///   ::= .section identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  SMLoc Loc = getLexer().getLoc();
+
+  StringRef SectionName;
+  if (getParser().ParseIdentifier(SectionName))
+    return Error(Loc, "expected identifier after '.section' directive");
+
+  // Verify there is a following comma.
+  if (!getLexer().is(AsmToken::Comma))
+    return TokError("unexpected token in '.section' directive");
+
+  std::string SectionSpec = SectionName;
+  SectionSpec += ",";
+
+  // Add all the tokens until the end of the line, ParseSectionSpecifier will
+  // handle this.
+  StringRef EOL = getLexer().LexUntilEndOfStatement();
+  SectionSpec.append(EOL.begin(), EOL.end());
+
+  Lex();
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.section' directive");
+  Lex();
+
+
+  StringRef Segment, Section;
+  unsigned StubSize;
+  unsigned TAA;
+  bool TAAParsed;
+  std::string ErrorStr =
+    MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
+                                          TAA, TAAParsed, StubSize);
+
+  if (!ErrorStr.empty())
+    return Error(Loc, ErrorStr.c_str());
+
+  // FIXME: Arch specific.
+  bool isText = Segment == "__TEXT";  // FIXME: Hack.
+  getStreamer().SwitchSection(getContext().getMachOSection(
+                                Segment, Section, TAA, StubSize,
+                                isText ? SectionKind::getText()
+                                : SectionKind::getDataRel()));
+  return false;
+}
+
+/// ParseDirectiveSecureLogUnique
+///  ::= .secure_log_unique ... message ...
+bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
+  StringRef LogMessage = getParser().ParseStringToEndOfStatement();
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.secure_log_unique' directive");
+
+  if (getContext().getSecureLogUsed() != false)
+    return Error(IDLoc, ".secure_log_unique specified multiple times");
+
+  // Get the secure log path.
+  const char *SecureLogFile = getContext().getSecureLogFile();
+  if (SecureLogFile == NULL)
+    return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
+                 "environment variable unset.");
+
+  // Open the secure log file if we haven't already.
+  raw_ostream *OS = getContext().getSecureLog();
+  if (OS == NULL) {
+    std::string Err;
+    OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append);
+    if (!Err.empty()) {
+       delete OS;
+       return Error(IDLoc, Twine("can't open secure log file: ") +
+                    SecureLogFile + " (" + Err + ")");
+    }
+    getContext().setSecureLog(OS);
+  }
+
+  // Write the message.
+  int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
+  *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+      << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
+      << LogMessage + "\n";
+
+  getContext().setSecureLogUsed(true);
+
+  return false;
+}
+
+/// ParseDirectiveSecureLogReset
+///  ::= .secure_log_reset
+bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.secure_log_reset' directive");
+
+  Lex();
+
+  getContext().setSecureLogUsed(false);
+
+  return false;
+}
+
+/// ParseDirectiveSubsectionsViaSymbols
+///  ::= .subsections_via_symbols
+bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.subsections_via_symbols' directive");
+
+  Lex();
+
+  getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+  return false;
+}
+
+/// ParseDirectiveTBSS
+///  ::= .tbss identifier, size, align
+bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
+  SMLoc IDLoc = getLexer().getLoc();
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = getLexer().getLoc();
+    if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.tbss' directive");
+
+  Lex();
+
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
+                 "zero");
+
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
+                 "than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  getStreamer().EmitTBSSSymbol(getContext().getMachOSection(
+                                 "__DATA", "__thread_bss",
+                                 MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+                                 0, SectionKind::getThreadBSS()),
+                               Sym, Size, 1 << Pow2Alignment);
+
+  return false;
+}
+
+/// ParseDirectiveZerofill
+///  ::= .zerofill segname , sectname [, identifier , size_expression [
+///      , align_expression ]]
+bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
+  StringRef Segment;
+  if (getParser().ParseIdentifier(Segment))
+    return TokError("expected segment name after '.zerofill' directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  StringRef Section;
+  if (getParser().ParseIdentifier(Section))
+    return TokError("expected section name after comma in '.zerofill' "
+                    "directive");
+
+  // If this is the end of the line all that was wanted was to create the
+  // the section but with no symbol.
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    // Create the zerofill section but no symbol
+    getStreamer().EmitZerofill(getContext().getMachOSection(
+                                 Segment, Section, MCSectionMachO::S_ZEROFILL,
+                                 0, SectionKind::getBSS()));
+    return false;
+  }
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  SMLoc IDLoc = getLexer().getLoc();
+  StringRef IDStr;
+  if (getParser().ParseIdentifier(IDStr))
+    return TokError("expected identifier in directive");
+
+  // handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = getLexer().getLoc();
+    if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.zerofill' directive");
+
+  Lex();
+
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
+                 "than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, "
+                 "can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  // Create the zerofill Symbol with Size and Pow2Alignment
+  //
+  // FIXME: Arch specific.
+  getStreamer().EmitZerofill(getContext().getMachOSection(
+                               Segment, Section, MCSectionMachO::S_ZEROFILL,
+                               0, SectionKind::getBSS()),
+                             Sym, Size, 1 << Pow2Alignment);
+
+  return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createDarwinAsmParser() {
+  return new DarwinAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
new file mode 100644
index 000000000000..dcf689a6f0e7
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -0,0 +1,533 @@
+//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+namespace {
+
+class ELFAsmParser : public MCAsmParserExtension {
+  template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<ELFAsmParser, Handler>);
+  }
+
+  bool ParseSectionSwitch(StringRef Section, unsigned Type,
+                          unsigned Flags, SectionKind Kind);
+  bool SeenIdent;
+
+public:
+  ELFAsmParser() : SeenIdent(false) {
+    BracketExpressionsSupported = true;
+  }
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    this->MCAsmParserExtension::Initialize(Parser);
+
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePushSection>(".pushsection");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
+  }
+
+  // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
+  // the best way for us to get access to it?
+  bool ParseSectionDirectiveData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
+                              ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                              SectionKind::getDataRel());
+  }
+  bool ParseSectionDirectiveText(StringRef, SMLoc) {
+    return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
+                              ELF::SHF_EXECINSTR |
+                              ELF::SHF_ALLOC, SectionKind::getText());
+  }
+  bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+    return ParseSectionSwitch(".bss", ELF::SHT_NOBITS,
+                              ELF::SHF_WRITE |
+                              ELF::SHF_ALLOC, SectionKind::getBSS());
+  }
+  bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".rodata", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC,
+                              SectionKind::getReadOnly());
+  }
+  bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".tdata", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_TLS | ELF::SHF_WRITE,
+                              SectionKind::getThreadData());
+  }
+  bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
+    return ParseSectionSwitch(".tbss", ELF::SHT_NOBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_TLS | ELF::SHF_WRITE,
+                              SectionKind::getThreadBSS());
+  }
+  bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getDataRel());
+  }
+  bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getReadOnlyWithRel());
+  }
+  bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getReadOnlyWithRelLocal());
+  }
+  bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
+    return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getDataRel());
+  }
+  bool ParseDirectivePushSection(StringRef, SMLoc);
+  bool ParseDirectivePopSection(StringRef, SMLoc);
+  bool ParseDirectiveSection(StringRef, SMLoc);
+  bool ParseDirectiveSize(StringRef, SMLoc);
+  bool ParseDirectivePrevious(StringRef, SMLoc);
+  bool ParseDirectiveType(StringRef, SMLoc);
+  bool ParseDirectiveIdent(StringRef, SMLoc);
+  bool ParseDirectiveSymver(StringRef, SMLoc);
+  bool ParseDirectiveWeakref(StringRef, SMLoc);
+
+private:
+  bool ParseSectionName(StringRef &SectionName);
+};
+
+}
+
+bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lex();
+
+  getStreamer().SwitchSection(getContext().getELFSection(
+                                Section, Type, Flags, Kind));
+
+  return false;
+}
+
+bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  const MCExpr *Expr;
+  if (getParser().ParseExpression(Expr))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  getStreamer().EmitELFSize(Sym, Expr);
+  return false;
+}
+
+bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
+  // A section name can contain -, so we cannot just use
+  // ParseIdentifier.
+  SMLoc FirstLoc = getLexer().getLoc();
+  unsigned Size = 0;
+
+  if (getLexer().is(AsmToken::String)) {
+    SectionName = getTok().getIdentifier();
+    Lex();
+    return false;
+  }
+
+  for (;;) {
+    StringRef Tmp;
+    unsigned CurSize;
+
+    SMLoc PrevLoc = getLexer().getLoc();
+    if (getLexer().is(AsmToken::Minus)) {
+      CurSize = 1;
+      Lex(); // Consume the "-".
+    } else if (getLexer().is(AsmToken::String)) {
+      CurSize = getTok().getIdentifier().size() + 2;
+      Lex();
+    } else if (getLexer().is(AsmToken::Identifier)) {
+      CurSize = getTok().getIdentifier().size();
+      Lex();
+    } else {
+      break;
+    }
+
+    Size += CurSize;
+    SectionName = StringRef(FirstLoc.getPointer(), Size);
+
+    // Make sure the following token is adjacent.
+    if (PrevLoc.getPointer() + CurSize != getTok().getLoc().getPointer())
+      break;
+  }
+  if (Size == 0)
+    return true;
+
+  return false;
+}
+
+static SectionKind computeSectionKind(unsigned Flags) {
+  if (Flags & ELF::SHF_EXECINSTR)
+    return SectionKind::getText();
+  if (Flags & ELF::SHF_TLS)
+    return SectionKind::getThreadData();
+  return SectionKind::getDataRel();
+}
+
+static int parseSectionFlags(StringRef flagsStr) {
+  int flags = 0;
+
+  for (unsigned i = 0; i < flagsStr.size(); i++) {
+    switch (flagsStr[i]) {
+    case 'a':
+      flags |= ELF::SHF_ALLOC;
+      break;
+    case 'x':
+      flags |= ELF::SHF_EXECINSTR;
+      break;
+    case 'w':
+      flags |= ELF::SHF_WRITE;
+      break;
+    case 'M':
+      flags |= ELF::SHF_MERGE;
+      break;
+    case 'S':
+      flags |= ELF::SHF_STRINGS;
+      break;
+    case 'T':
+      flags |= ELF::SHF_TLS;
+      break;
+    case 'c':
+      flags |= ELF::XCORE_SHF_CP_SECTION;
+      break;
+    case 'd':
+      flags |= ELF::XCORE_SHF_DP_SECTION;
+      break;
+    case 'G':
+      flags |= ELF::SHF_GROUP;
+      break;
+    default:
+      return -1;
+    }
+  }
+
+  return flags;
+}
+
+bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
+  getStreamer().PushSection();
+
+  if (ParseDirectiveSection(s, loc)) {
+    getStreamer().PopSection();
+    return true;
+  }
+
+  return false;
+}
+
+bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+  if (!getStreamer().PopSection())
+    return TokError(".popsection without corresponding .pushsection");
+  return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  StringRef SectionName;
+
+  if (ParseSectionName(SectionName))
+    return TokError("expected identifier in directive");
+
+  StringRef TypeName;
+  int64_t Size = 0;
+  StringRef GroupName;
+  unsigned Flags = 0;
+
+  // Set the defaults first.
+  if (SectionName == ".fini" || SectionName == ".init" ||
+      SectionName == ".rodata")
+    Flags |= ELF::SHF_ALLOC;
+  if (SectionName == ".fini" || SectionName == ".init")
+    Flags |= ELF::SHF_EXECINSTR;
+
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+
+    if (getLexer().isNot(AsmToken::String))
+      return TokError("expected string in directive");
+
+    StringRef FlagsStr = getTok().getStringContents();
+    Lex();
+
+    int extraFlags = parseSectionFlags(FlagsStr);
+    if (extraFlags < 0)
+      return TokError("unknown flag");
+    Flags |= extraFlags;
+
+    bool Mergeable = Flags & ELF::SHF_MERGE;
+    bool Group = Flags & ELF::SHF_GROUP;
+
+    if (getLexer().isNot(AsmToken::Comma)) {
+      if (Mergeable)
+        return TokError("Mergeable section must specify the type");
+      if (Group)
+        return TokError("Group section must specify the type");
+    } else {
+      Lex();
+      if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+        return TokError("expected '@' or '%' before type");
+
+      Lex();
+      if (getParser().ParseIdentifier(TypeName))
+        return TokError("expected identifier in directive");
+
+      if (Mergeable) {
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected the entry size");
+        Lex();
+        if (getParser().ParseAbsoluteExpression(Size))
+          return true;
+        if (Size <= 0)
+          return TokError("entry size must be positive");
+      }
+
+      if (Group) {
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected group name");
+        Lex();
+        if (getParser().ParseIdentifier(GroupName))
+          return true;
+        if (getLexer().is(AsmToken::Comma)) {
+          Lex();
+          StringRef Linkage;
+          if (getParser().ParseIdentifier(Linkage))
+            return true;
+          if (Linkage != "comdat")
+            return TokError("Linkage must be 'comdat'");
+        }
+      }
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  unsigned Type = ELF::SHT_PROGBITS;
+
+  if (!TypeName.empty()) {
+    if (TypeName == "init_array")
+      Type = ELF::SHT_INIT_ARRAY;
+    else if (TypeName == "fini_array")
+      Type = ELF::SHT_FINI_ARRAY;
+    else if (TypeName == "preinit_array")
+      Type = ELF::SHT_PREINIT_ARRAY;
+    else if (TypeName == "nobits")
+      Type = ELF::SHT_NOBITS;
+    else if (TypeName == "progbits")
+      Type = ELF::SHT_PROGBITS;
+    else if (TypeName == "note")
+      Type = ELF::SHT_NOTE;
+    else if (TypeName == "unwind")
+      Type = ELF::SHT_X86_64_UNWIND;
+    else
+      return TokError("unknown section type");
+  }
+
+  SectionKind Kind = computeSectionKind(Flags);
+  getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
+                                                         Flags, Kind, Size,
+                                                         GroupName));
+  return false;
+}
+
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+  const MCSection *PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection == NULL)
+      return TokError(".previous without corresponding .section");
+  getStreamer().SwitchSection(PreviousSection);
+
+  return false;
+}
+
+/// ParseDirectiveELFType
+///  ::= .type identifier , @attribute
+bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.type' directive");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+    return TokError("expected '@' or '%' before type");
+  Lex();
+
+  StringRef Type;
+  SMLoc TypeLoc;
+
+  TypeLoc = getLexer().getLoc();
+  if (getParser().ParseIdentifier(Type))
+    return TokError("expected symbol type in directive");
+
+  MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+    .Case("function", MCSA_ELF_TypeFunction)
+    .Case("object", MCSA_ELF_TypeObject)
+    .Case("tls_object", MCSA_ELF_TypeTLS)
+    .Case("common", MCSA_ELF_TypeCommon)
+    .Case("notype", MCSA_ELF_TypeNoType)
+    .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+    .Default(MCSA_Invalid);
+
+  if (Attr == MCSA_Invalid)
+    return Error(TypeLoc, "unsupported attribute in '.type' directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.type' directive");
+
+  Lex();
+
+  getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+  return false;
+}
+
+/// ParseDirectiveIdent
+///  ::= .ident string
+bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("unexpected token in '.ident' directive");
+
+  StringRef Data = getTok().getIdentifier();
+
+  Lex();
+
+  const MCSection *Comment =
+    getContext().getELFSection(".comment", ELF::SHT_PROGBITS,
+                               ELF::SHF_MERGE |
+                               ELF::SHF_STRINGS,
+                               SectionKind::getReadOnly(),
+                               1, "");
+
+  getStreamer().PushSection();
+  getStreamer().SwitchSection(Comment);
+  if (!SeenIdent) {
+    getStreamer().EmitIntValue(0, 1);
+    SeenIdent = true;
+  }
+  getStreamer().EmitBytes(Data, 0);
+  getStreamer().EmitIntValue(0, 1);
+  getStreamer().PopSection();
+  return false;
+}
+
+/// ParseDirectiveSymver
+///  ::= .symver foo, bar2@zed
+bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("expected a comma");
+
+  Lex();
+
+  StringRef AliasName;
+  if (getParser().ParseIdentifier(AliasName))
+    return TokError("expected identifier in directive");
+
+  if (AliasName.find('@') == StringRef::npos)
+    return TokError("expected a '@' in the name");
+
+  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
+
+  getStreamer().EmitAssignment(Alias, Value);
+  return false;
+}
+
+/// ParseDirectiveWeakref
+///  ::= .weakref foo, bar
+bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
+  // FIXME: Share code with the other alias building directives.
+
+  StringRef AliasName;
+  if (getParser().ParseIdentifier(AliasName))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("expected a comma");
+
+  Lex();
+
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  getStreamer().EmitWeakReference(Alias, Sym);
+  return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createELFAsmParser() {
+  return new ELFAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
new file mode 100644
index 000000000000..dceece78ba10
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -0,0 +1,27 @@
+//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) {
+}
+
+MCAsmLexer::~MCAsmLexer() {
+}
+
+SMLoc MCAsmLexer::getLoc() const {
+  return SMLoc::getFromPointer(TokStart);
+}
+
+SMLoc AsmToken::getLoc() const {
+  return SMLoc::getFromPointer(Str.data());
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
new file mode 100644
index 000000000000..4030e41036aa
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -0,0 +1,48 @@
+//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
+}
+
+MCAsmParser::~MCAsmParser() {
+}
+
+void MCAsmParser::setTargetParser(TargetAsmParser &P) {
+  assert(!TargetParser && "Target parser is already initialized!");
+  TargetParser = &P;
+  TargetParser->Initialize(*this);
+}
+
+const AsmToken &MCAsmParser::getTok() {
+  return getLexer().getTok();
+}
+
+bool MCAsmParser::TokError(const Twine &Msg) {
+  Error(getLexer().getLoc(), Msg);
+  return true;
+}
+
+bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
+  SMLoc L;
+  return ParseExpression(Res, L);
+}
+
+void MCParsedAsmOperand::dump() const {
+  dbgs() << "  " << *this;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
new file mode 100644
index 000000000000..3f25a14926b6
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -0,0 +1,22 @@
+//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+using namespace llvm;
+
+MCAsmParserExtension::MCAsmParserExtension() :
+  BracketExpressionsSupported(false) {
+}
+
+MCAsmParserExtension::~MCAsmParserExtension() {
+}
+
+void MCAsmParserExtension::Initialize(MCAsmParser &Parser) {
+  this->Parser = &Parser;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp
new file mode 100644
index 000000000000..512f6b044911
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp
@@ -0,0 +1,19 @@
+//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+TargetAsmParser::TargetAsmParser()
+  : AvailableFeatures(0)
+{
+}
+
+TargetAsmParser::~TargetAsmParser() {
+}
diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp
new file mode 100644
index 000000000000..6098e6b8f38b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp
@@ -0,0 +1,234 @@
+//===- lib/MC/MCPureStreamer.cpp - MC "Pure" Object Output ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
+// FIXME: Remove this.
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCPureStreamer : public MCObjectStreamer {
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+  MCPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                 raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+  virtual void Finish();
+
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitThumbFunc(MCSymbol *Func) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCOFFSymbolType(int Type) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EndCOFFSymbolDef() {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitFileDirective(StringRef Filename) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+    report_fatal_error("unsupported directive in pure streamer");
+    return false;
+  }
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+void MCPureStreamer::InitSections() {
+  // FIMXE: To what!?
+  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                    0, SectionKind::getText()));
+
+}
+
+void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  Symbol->setSection(*getCurrentSection());
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // We have to create a new fragment if this is an atom defining symbol,
+  // fragments cannot span atoms.
+  if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+    new MCDataFragment(getCurrentSectionData());
+
+  // FIXME: This is wasteful, we don't necessarily need to create a data
+  // fragment. Instead, we should mark the symbol as pointing into the data
+  // fragment if it exists, otherwise we should just queue the label and set its
+  // fragment pointer when we emit the next fragment.
+  MCDataFragment *F = getOrCreateDataFragment();
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+}
+
+void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  // FIXME: Lift context changes into super class.
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                  unsigned Size, unsigned ByteAlignment) {
+  report_fatal_error("not yet implemented in pure streamer");
+}
+
+void MCPureStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                          int64_t Value, unsigned ValueSize,
+                                          unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                       unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                       unsigned char Value) {
+  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
+void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
+  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+  // Add the fixups and data.
+  //
+  // FIXME: Revisit this design decision when relaxation is done, we may be
+  // able to get away with not storing any extra data in the MCInst.
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  IF->getCode() = Code;
+  IF->getFixups() = Fixups;
+}
+
+void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // Add the fixups and data.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
+  }
+  DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCPureStreamer::Finish() {
+  // FIXME: Handle DWARF tables?
+
+  this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                     raw_ostream &OS, MCCodeEmitter *CE) {
+  return new MCPureStreamer(Context, TAB, OS, CE);
+}
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
new file mode 100644
index 000000000000..a792d5631790
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -0,0 +1,22 @@
+//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MCSection
+//===----------------------------------------------------------------------===//
+
+MCSection::~MCSection() {
+}
+
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
new file mode 100644
index 000000000000..90091f06e9ac
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -0,0 +1,84 @@
+//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSectionCOFF::~MCSectionCOFF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
+                                               const MCAsmInfo &MAI) const {
+  
+  // FIXME: Does .section .bss/.data/.text work everywhere??
+  if (Name == ".text" || Name == ".data" || Name == ".bss")
+    return true;
+
+  return false;
+}
+
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                         raw_ostream &OS) const {
+  
+  // standard sections don't require the '.section'
+  if (ShouldOmitSectionDirective(SectionName, MAI)) {
+    OS << '\t' << getSectionName() << '\n';
+    return;
+  }
+
+  OS << "\t.section\t" << getSectionName() << ",\"";
+  if (getKind().isText())
+    OS << 'x';
+  if (getKind().isWriteable())
+    OS << 'w';
+  else
+    OS << 'r';
+  if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE)
+    OS << 'n';
+  OS << "\"\n";
+  
+  if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
+    switch (Selection) {
+      case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
+        OS << "\t.linkonce one_only\n";
+        break;
+      case COFF::IMAGE_COMDAT_SELECT_ANY:
+        OS << "\t.linkonce discard\n";
+        break;
+      case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
+        OS << "\t.linkonce same_size\n";
+        break;
+      case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
+        OS << "\t.linkonce same_contents\n";
+        break;
+    //NOTE: as of binutils 2.20, there is no way to specifiy select largest
+    //      with the .linkonce directive. For now, we treat it as an invalid
+    //      comdat selection value.
+      case COFF::IMAGE_COMDAT_SELECT_LARGEST:
+    //  OS << "\t.linkonce largest\n";
+    //  break;
+      default:
+        assert (0 && "unsupported COFF selection type");
+        break;
+    }
+  }
+}
+
+bool MCSectionCOFF::UseCodeAlign() const {
+  return getKind().isText();
+}
+
+bool MCSectionCOFF::isVirtualSection() const {
+  return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
new file mode 100644
index 000000000000..dfd77c3fe813
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -0,0 +1,150 @@
+//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCSectionELF::~MCSectionELF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
+                                              const MCAsmInfo &MAI) const {
+  
+  // FIXME: Does .section .bss/.data/.text work everywhere??
+  if (Name == ".text" || Name == ".data" ||
+      (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS()))
+    return true;
+
+  return false;
+}
+
+void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                        raw_ostream &OS) const {
+   
+  if (ShouldOmitSectionDirective(SectionName, MAI)) {
+    OS << '\t' << getSectionName() << '\n';
+    return;
+  }
+
+  StringRef name = getSectionName();
+  if (name.find_first_not_of("0123456789_."
+                             "abcdefghijklmnopqrstuvwxyz"
+                             "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) {
+    OS << "\t.section\t" << name;
+  } else {
+    OS << "\t.section\t\"";
+    for (const char *b = name.begin(), *e = name.end(); b < e; ++b) {
+      if (*b == '"') // Unquoted "
+        OS << "\\\"";
+      else if (*b != '\\') // Neither " or backslash
+        OS << *b;
+      else if (b + 1 == e) // Trailing backslash
+        OS << "\\\\";
+      else {
+        OS << b[0] << b[1]; // Quoted character
+        ++b;
+      }
+    }
+    OS << '"';
+  }
+
+  // Handle the weird solaris syntax if desired.
+  if (MAI.usesSunStyleELFSectionSwitchSyntax() && 
+      !(Flags & ELF::SHF_MERGE)) {
+    if (Flags & ELF::SHF_ALLOC)
+      OS << ",#alloc";
+    if (Flags & ELF::SHF_EXECINSTR)
+      OS << ",#execinstr";
+    if (Flags & ELF::SHF_WRITE)
+      OS << ",#write";
+    if (Flags & ELF::SHF_TLS)
+      OS << ",#tls";
+    OS << '\n';
+    return;
+  }
+  
+  OS << ",\"";
+  if (Flags & ELF::SHF_ALLOC)
+    OS << 'a';
+  if (Flags & ELF::SHF_EXECINSTR)
+    OS << 'x';
+  if (Flags & ELF::SHF_GROUP)
+    OS << 'G';
+  if (Flags & ELF::SHF_WRITE)
+    OS << 'w';
+  if (Flags & ELF::SHF_MERGE)
+    OS << 'M';
+  if (Flags & ELF::SHF_STRINGS)
+    OS << 'S';
+  if (Flags & ELF::SHF_TLS)
+    OS << 'T';
+  
+  // If there are target-specific flags, print them.
+  if (Flags & ELF::XCORE_SHF_CP_SECTION)
+    OS << 'c';
+  if (Flags & ELF::XCORE_SHF_DP_SECTION)
+    OS << 'd';
+  
+  OS << '"';
+
+  OS << ',';
+
+  // If comment string is '@', e.g. as on ARM - use '%' instead
+  if (MAI.getCommentString()[0] == '@')
+    OS << '%';
+  else
+    OS << '@';
+
+  if (Type == ELF::SHT_INIT_ARRAY)
+    OS << "init_array";
+  else if (Type == ELF::SHT_FINI_ARRAY)
+    OS << "fini_array";
+  else if (Type == ELF::SHT_PREINIT_ARRAY)
+    OS << "preinit_array";
+  else if (Type == ELF::SHT_NOBITS)
+    OS << "nobits";
+  else if (Type == ELF::SHT_NOTE)
+    OS << "note";
+  else if (Type == ELF::SHT_PROGBITS)
+    OS << "progbits";
+
+  if (EntrySize) {
+    assert(Flags & ELF::SHF_MERGE);
+    OS << "," << EntrySize;
+  }
+
+  if (Flags & ELF::SHF_GROUP)
+    OS << "," << Group->getName() << ",comdat";
+  OS << '\n';
+}
+
+bool MCSectionELF::UseCodeAlign() const {
+  return getFlags() & ELF::SHF_EXECINSTR;
+}
+
+bool MCSectionELF::isVirtualSection() const {
+  return getType() == ELF::SHT_NOBITS;
+}
+
+unsigned MCSectionELF::DetermineEntrySize(SectionKind Kind) {
+  if (Kind.isMergeable1ByteCString()) return 1;
+  if (Kind.isMergeable2ByteCString()) return 2;
+  if (Kind.isMergeable4ByteCString()) return 4;
+  if (Kind.isMergeableConst4())       return 4;
+  if (Kind.isMergeableConst8())       return 8;
+  if (Kind.isMergeableConst16())      return 16;
+  return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
new file mode 100644
index 000000000000..e771556262a8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -0,0 +1,303 @@
+//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+/// SectionTypeDescriptors - These are strings that describe the various section
+/// types.  This *must* be kept in order with and stay synchronized with the
+/// section type list.
+static const struct {
+  const char *AssemblerName, *EnumName;
+} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = {
+  { "regular",                  "S_REGULAR" },                    // 0x00
+  { 0,                          "S_ZEROFILL" },                   // 0x01
+  { "cstring_literals",         "S_CSTRING_LITERALS" },           // 0x02
+  { "4byte_literals",           "S_4BYTE_LITERALS" },             // 0x03
+  { "8byte_literals",           "S_8BYTE_LITERALS" },             // 0x04
+  { "literal_pointers",         "S_LITERAL_POINTERS" },           // 0x05
+  { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" },   // 0x06
+  { "lazy_symbol_pointers",     "S_LAZY_SYMBOL_POINTERS" },       // 0x07
+  { "symbol_stubs",             "S_SYMBOL_STUBS" },               // 0x08
+  { "mod_init_funcs",           "S_MOD_INIT_FUNC_POINTERS" },     // 0x09
+  { "mod_term_funcs",           "S_MOD_TERM_FUNC_POINTERS" },     // 0x0A
+  { "coalesced",                "S_COALESCED" },                  // 0x0B
+  { 0, /*FIXME??*/              "S_GB_ZEROFILL" },                // 0x0C
+  { "interposing",              "S_INTERPOSING" },                // 0x0D
+  { "16byte_literals",          "S_16BYTE_LITERALS" },            // 0x0E
+  { 0, /*FIXME??*/              "S_DTRACE_DOF" },                 // 0x0F
+  { 0, /*FIXME??*/              "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+  { "thread_local_regular",     "S_THREAD_LOCAL_REGULAR" },       // 0x11
+  { "thread_local_zerofill",    "S_THREAD_LOCAL_ZEROFILL" },      // 0x12
+  { "thread_local_variables",   "S_THREAD_LOCAL_VARIABLES" },     // 0x13
+  { "thread_local_variable_pointers",
+    "S_THREAD_LOCAL_VARIABLE_POINTERS" },                         // 0x14
+  { "thread_local_init_function_pointers",
+    "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"},                     // 0x15
+};
+
+
+/// SectionAttrDescriptors - This is an array of descriptors for section
+/// attributes.  Unlike the SectionTypeDescriptors, this is not directly indexed
+/// by attribute, instead it is searched.  The last entry has an AttrFlagEnd
+/// AttrFlag value.
+static const struct {
+  unsigned AttrFlag;
+  const char *AssemblerName, *EnumName;
+} SectionAttrDescriptors[] = {
+#define ENTRY(ASMNAME, ENUM) \
+  { MCSectionMachO::ENUM, ASMNAME, #ENUM },
+ENTRY("pure_instructions",   S_ATTR_PURE_INSTRUCTIONS)
+ENTRY("no_toc",              S_ATTR_NO_TOC)
+ENTRY("strip_static_syms",   S_ATTR_STRIP_STATIC_SYMS)
+ENTRY("no_dead_strip",       S_ATTR_NO_DEAD_STRIP)
+ENTRY("live_support",        S_ATTR_LIVE_SUPPORT)
+ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
+ENTRY("debug",               S_ATTR_DEBUG)
+ENTRY(0 /*FIXME*/,           S_ATTR_SOME_INSTRUCTIONS)
+ENTRY(0 /*FIXME*/,           S_ATTR_EXT_RELOC)
+ENTRY(0 /*FIXME*/,           S_ATTR_LOC_RELOC)
+#undef ENTRY
+  { 0, "none", 0 }, // used if section has no attributes but has a stub size
+#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set
+  { AttrFlagEnd, 0, 0 }
+};
+
+MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
+                               unsigned TAA, unsigned reserved2, SectionKind K)
+  : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+  assert(Segment.size() <= 16 && Section.size() <= 16 &&
+         "Segment or section string too long");
+  for (unsigned i = 0; i != 16; ++i) {
+    if (i < Segment.size())
+      SegmentName[i] = Segment[i];
+    else
+      SegmentName[i] = 0;
+
+    if (i < Section.size())
+      SectionName[i] = Section[i];
+    else
+      SectionName[i] = 0;
+  }
+}
+
+void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                          raw_ostream &OS) const {
+  OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
+
+  // Get the section type and attributes.
+  unsigned TAA = getTypeAndAttributes();
+  if (TAA == 0) {
+    OS << '\n';
+    return;
+  }
+
+  unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
+  assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
+         "Invalid SectionType specified!");
+
+  if (SectionTypeDescriptors[SectionType].AssemblerName) {
+    OS << ',';
+    OS << SectionTypeDescriptors[SectionType].AssemblerName;
+  } else {
+    // If we have no name for the attribute, stop here.
+    OS << '\n';
+    return;
+  }
+
+  // If we don't have any attributes, we're done.
+  unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
+  if (SectionAttrs == 0) {
+    // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as
+    // the attribute specifier.
+    if (Reserved2 != 0)
+      OS << ",none," << Reserved2;
+    OS << '\n';
+    return;
+  }
+
+  // Check each attribute to see if we have it.
+  char Separator = ',';
+  for (unsigned i = 0;
+       SectionAttrs != 0 && SectionAttrDescriptors[i].AttrFlag;
+       ++i) {
+    // Check to see if we have this attribute.
+    if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
+      continue;
+
+    // Yep, clear it and print it.
+    SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
+
+    OS << Separator;
+    if (SectionAttrDescriptors[i].AssemblerName)
+      OS << SectionAttrDescriptors[i].AssemblerName;
+    else
+      OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
+    Separator = '+';
+  }
+
+  assert(SectionAttrs == 0 && "Unknown section attributes!");
+
+  // If we have a S_SYMBOL_STUBS size specified, print it.
+  if (Reserved2 != 0)
+    OS << ',' << Reserved2;
+  OS << '\n';
+}
+
+bool MCSectionMachO::UseCodeAlign() const {
+  return hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+}
+
+bool MCSectionMachO::isVirtualSection() const {
+  return (getType() == MCSectionMachO::S_ZEROFILL ||
+          getType() == MCSectionMachO::S_GB_ZEROFILL ||
+          getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
+}
+
+/// StripSpaces - This removes leading and trailing spaces from the StringRef.
+static void StripSpaces(StringRef &Str) {
+  while (!Str.empty() && isspace(Str[0]))
+    Str = Str.substr(1);
+  while (!Str.empty() && isspace(Str.back()))
+    Str = Str.substr(0, Str.size()-1);
+}
+
+/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
+/// This is a string that can appear after a .section directive in a mach-o
+/// flavored .s file.  If successful, this fills in the specified Out
+/// parameters and returns an empty string.  When an invalid section
+/// specifier is present, this returns a string indicating the problem.
+std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
+                                                  StringRef &Segment,    // Out.
+                                                  StringRef &Section,    // Out.
+                                                  unsigned  &TAA,        // Out.
+                                                  bool      &TAAParsed,  // Out.
+                                                  unsigned  &StubSize) { // Out.
+  TAAParsed = false;
+  // Find the first comma.
+  std::pair<StringRef, StringRef> Comma = Spec.split(',');
+
+  // If there is no comma, we fail.
+  if (Comma.second.empty())
+    return "mach-o section specifier requires a segment and section "
+           "separated by a comma";
+
+  // Capture segment, remove leading and trailing whitespace.
+  Segment = Comma.first;
+  StripSpaces(Segment);
+
+  // Verify that the segment is present and not too long.
+  if (Segment.empty() || Segment.size() > 16)
+    return "mach-o section specifier requires a segment whose length is "
+           "between 1 and 16 characters";
+
+  // Split the section name off from any attributes if present.
+  Comma = Comma.second.split(',');
+
+  // Capture section, remove leading and trailing whitespace.
+  Section = Comma.first;
+  StripSpaces(Section);
+
+  // Verify that the section is present and not too long.
+  if (Section.empty() || Section.size() > 16)
+    return "mach-o section specifier requires a section whose length is "
+           "between 1 and 16 characters";
+
+  // If there is no comma after the section, we're done.
+  TAA = 0;
+  StubSize = 0;
+  if (Comma.second.empty())
+    return "";
+
+  // Otherwise, we need to parse the section type and attributes.
+  Comma = Comma.second.split(',');
+
+  // Get the section type.
+  StringRef SectionType = Comma.first;
+  StripSpaces(SectionType);
+
+  // Figure out which section type it is.
+  unsigned TypeID;
+  for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID)
+    if (SectionTypeDescriptors[TypeID].AssemblerName &&
+        SectionType == SectionTypeDescriptors[TypeID].AssemblerName)
+      break;
+
+  // If we didn't find the section type, reject it.
+  if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE)
+    return "mach-o section specifier uses an unknown section type";
+
+  // Remember the TypeID.
+  TAA = TypeID;
+  TAAParsed = true;
+
+  // If we have no comma after the section type, there are no attributes.
+  if (Comma.second.empty()) {
+    // S_SYMBOL_STUBS always require a symbol stub size specifier.
+    if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+      return "mach-o section specifier of type 'symbol_stubs' requires a size "
+             "specifier";
+    return "";
+  }
+
+  // Otherwise, we do have some attributes.  Split off the size specifier if
+  // present.
+  Comma = Comma.second.split(',');
+  StringRef Attrs = Comma.first;
+
+  // The attribute list is a '+' separated list of attributes.
+  std::pair<StringRef, StringRef> Plus = Attrs.split('+');
+
+  while (1) {
+    StringRef Attr = Plus.first;
+    StripSpaces(Attr);
+
+    // Look up the attribute.
+    for (unsigned i = 0; ; ++i) {
+      if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd)
+        return "mach-o section specifier has invalid attribute";
+
+      if (SectionAttrDescriptors[i].AssemblerName &&
+          Attr == SectionAttrDescriptors[i].AssemblerName) {
+        TAA |= SectionAttrDescriptors[i].AttrFlag;
+        break;
+      }
+    }
+
+    if (Plus.second.empty()) break;
+    Plus = Plus.second.split('+');
+  };
+
+  // Okay, we've parsed the section attributes, see if we have a stub size spec.
+  if (Comma.second.empty()) {
+    // S_SYMBOL_STUBS always require a symbol stub size specifier.
+    if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+      return "mach-o section specifier of type 'symbol_stubs' requires a size "
+      "specifier";
+    return "";
+  }
+
+  // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS.
+  if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS)
+    return "mach-o section specifier cannot have a stub size specified because "
+           "it does not have type 'symbol_stubs'";
+
+  // Okay, if we do, it must be a number.
+  StringRef StubSizeStr = Comma.second;
+  StripSpaces(StubSizeStr);
+
+  // Convert the stub size from a string to an integer.
+  if (StubSizeStr.getAsInteger(0, StubSize))
+    return "mach-o section specifier has a malformed stub size";
+
+  return "";
+}
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
new file mode 100644
index 000000000000..6e96b78e315b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -0,0 +1,534 @@
+//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include <cstdlib>
+using namespace llvm;
+
+MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), EmitEHFrame(true),
+                                         EmitDebugFrame(false),
+                                         CurrentW64UnwindInfo(0) {
+  const MCSection *section = NULL;
+  SectionStack.push_back(std::make_pair(section, section));
+}
+
+MCStreamer::~MCStreamer() {
+  for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
+    delete W64UnwindInfos[i];
+}
+
+const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
+                                          const MCSymbol *A,
+                                          const MCSymbol *B) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *ARef =
+    MCSymbolRefExpr::Create(A, Variant, Context);
+  const MCExpr *BRef =
+    MCSymbolRefExpr::Create(B, Variant, Context);
+  const MCExpr *AddrDelta =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+  return AddrDelta;
+}
+
+const MCExpr *MCStreamer::ForceExpAbs(const MCExpr* Expr) {
+  if (Context.getAsmInfo().hasAggressiveSymbolFolding() ||
+      isa<MCSymbolRefExpr>(Expr))
+    return Expr;
+
+  MCSymbol *ABS = Context.CreateTempSymbol();
+  EmitAssignment(ABS, Expr);
+  return MCSymbolRefExpr::Create(ABS, Context);
+}
+
+raw_ostream &MCStreamer::GetCommentOS() {
+  // By default, discard comments.
+  return nulls();
+}
+
+void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
+                                      const MCSymbol *Label, int PointerSize) {
+  // emit the sequence to set the address
+  EmitIntValue(dwarf::DW_LNS_extended_op, 1);
+  EmitULEB128IntValue(PointerSize + 1);
+  EmitIntValue(dwarf::DW_LNE_set_address, 1);
+  EmitSymbolValue(Label, PointerSize);
+
+  // emit the sequence for the LineDelta (from 1) and a zero address delta.
+  MCDwarfLineAddr::Emit(this, LineDelta, 0);
+}
+
+/// EmitIntValue - Special case of EmitValue that avoids the client having to
+/// pass in a MCExpr for constant integers.
+void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+                              unsigned AddrSpace) {
+  assert(Size <= 8 && "Invalid size");
+  assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
+         "Invalid size");
+  char buf[8];
+  const bool isLittleEndian = Context.getAsmInfo().isLittleEndian();
+  for (unsigned i = 0; i != Size; ++i) {
+    unsigned index = isLittleEndian ? i : (Size - i - 1);
+    buf[i] = uint8_t(Value >> (index * 8));
+  }
+  EmitBytes(StringRef(buf, Size), AddrSpace);
+}
+
+/// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) {
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeULEB128(Value, OSE);
+  EmitBytes(OSE.str(), AddrSpace);
+}
+
+/// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeSLEB128(Value, OSE);
+  EmitBytes(OSE.str(), AddrSpace);
+}
+
+void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size,
+                              unsigned AddrSpace) {
+  const MCExpr *ABS = ForceExpAbs(Value);
+  EmitValue(ABS, Size, AddrSpace);
+}
+
+
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+                           unsigned AddrSpace) {
+  EmitValueImpl(Value, Size, AddrSpace);
+}
+
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                                  unsigned AddrSpace) {
+  EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size,
+                AddrSpace);
+}
+
+void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
+}
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue.  This implements directives such as '.space'.
+void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                          unsigned AddrSpace) {
+  const MCExpr *E = MCConstantExpr::Create(FillValue, getContext());
+  for (uint64_t i = 0, e = NumBytes; i != e; ++i)
+    EmitValue(E, 1, AddrSpace);
+}
+
+bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+                                        StringRef Filename) {
+  return getContext().GetDwarfFile(Filename, FileNo) == 0;
+}
+
+void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa,
+                                       unsigned Discriminator,
+                                       StringRef FileName) {
+  getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa,
+                                  Discriminator);
+}
+
+MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
+  if (FrameInfos.empty())
+    return NULL;
+  return &FrameInfos.back();
+}
+
+void MCStreamer::EnsureValidFrame() {
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  if (!CurFrame || CurFrame->End)
+    report_fatal_error("No open frame");
+}
+
+void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+                                     MCSymbol *EHSymbol) {
+}
+
+void MCStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection());
+
+  StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
+  if (!Symbol->getName().startswith(Prefix))
+    LastNonPrivate = Symbol;
+}
+
+void MCStreamer::EmitCFISections(bool EH, bool Debug) {
+  assert(EH || Debug);
+  EmitEHFrame = EH;
+  EmitDebugFrame = Debug;
+}
+
+void MCStreamer::EmitCFIStartProc() {
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  if (CurFrame && !CurFrame->End)
+    report_fatal_error("Starting a frame before finishing the previous one!");
+  MCDwarfFrameInfo Frame;
+  Frame.Begin = getContext().CreateTempSymbol();
+  Frame.Function = LastNonPrivate;
+  EmitLabel(Frame.Begin);
+  FrameInfos.push_back(Frame);
+}
+
+void MCStreamer::EmitCFIEndProc() {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->End = getContext().CreateTempSymbol();
+  EmitLabel(CurFrame->End);
+}
+
+void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(Register, -Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(MachineLocation::VirtualFP, -Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(MachineLocation::VirtualFP, Adjustment);
+  MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register);
+  MachineLocation Source(MachineLocation::VirtualFP);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register, Offset);
+  MachineLocation Source(Register, Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register, Offset);
+  MachineLocation Source(Register, Offset);
+  MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+                                    unsigned Encoding) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Personality = Sym;
+  CurFrame->PersonalityEncoding = Encoding;
+}
+
+void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Lsda = Sym;
+  CurFrame->LsdaEncoding = Encoding;
+}
+
+void MCStreamer::EmitCFIRememberState() {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRestoreState() {
+  // FIXME: Error if there is no matching cfi_remember_state.
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFISameValue(int64_t Register) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::SameValue, Label, Register);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
+  W64UnwindInfos.push_back(Frame);
+  CurrentW64UnwindInfo = W64UnwindInfos.back();
+}
+
+void MCStreamer::EnsureValidW64UnwindInfo() {
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (!CurFrame || CurFrame->End)
+    report_fatal_error("No open Win64 EH frame function!");
+}
+
+void MCStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (CurFrame && !CurFrame->End)
+    report_fatal_error("Starting a function before ending the previous one!");
+  MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
+  Frame->Begin = getContext().CreateTempSymbol();
+  Frame->Function = Symbol;
+  EmitLabel(Frame->Begin);
+  setCurrentW64UnwindInfo(Frame);
+}
+
+void MCStreamer::EmitWin64EHEndProc() {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (CurFrame->ChainedParent)
+    report_fatal_error("Not all chained regions terminated!");
+  CurFrame->End = getContext().CreateTempSymbol();
+  EmitLabel(CurFrame->End);
+}
+
+void MCStreamer::EmitWin64EHStartChained() {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  Frame->Begin = getContext().CreateTempSymbol();
+  Frame->Function = CurFrame->Function;
+  Frame->ChainedParent = CurFrame;
+  EmitLabel(Frame->Begin);
+  setCurrentW64UnwindInfo(Frame);
+}
+
+void MCStreamer::EmitWin64EHEndChained() {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (!CurFrame->ChainedParent)
+    report_fatal_error("End of a chained region outside a chained region!");
+  CurFrame->End = getContext().CreateTempSymbol();
+  EmitLabel(CurFrame->End);
+  CurrentW64UnwindInfo = CurFrame->ChainedParent;
+}
+
+void MCStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
+                                    bool Except) {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (CurFrame->ChainedParent)
+    report_fatal_error("Chained unwind areas can't have handlers!");
+  CurFrame->ExceptionHandler = Sym;
+  if (!Except && !Unwind)
+    report_fatal_error("Don't know what kind of handler this is!");
+  if (Unwind)
+    CurFrame->HandlesUnwind = true;
+  if (Except)
+    CurFrame->HandlesExceptions = true;
+}
+
+void MCStreamer::EmitWin64EHHandlerData() {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (CurFrame->ChainedParent)
+    report_fatal_error("Chained unwind areas can't have handlers!");
+}
+
+void MCStreamer::EmitWin64EHPushReg(unsigned Register) {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCWin64EHInstruction Inst(Win64EH::UOP_PushNonVol, Label, Register);
+  EmitLabel(Label);
+  CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (CurFrame->LastFrameInst >= 0)
+    report_fatal_error("Frame register and offset already specified!");
+  if (Offset & 0x0F)
+    report_fatal_error("Misaligned frame pointer offset!");
+  MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset);
+  CurFrame->LastFrameInst = CurFrame->Instructions.size();
+  CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHAllocStack(unsigned Size) {
+  EnsureValidW64UnwindInfo();
+  if (Size & 7)
+    report_fatal_error("Misaligned stack allocation!");
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCWin64EHInstruction Inst(Label, Size);
+  EmitLabel(Label);
+  CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
+  EnsureValidW64UnwindInfo();
+  if (Offset & 7)
+    report_fatal_error("Misaligned saved register offset!");
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCWin64EHInstruction Inst(
+     Offset > 512*1024-8 ? Win64EH::UOP_SaveNonVolBig : Win64EH::UOP_SaveNonVol,
+                            Label, Register, Offset);
+  EmitLabel(Label);
+  CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
+  EnsureValidW64UnwindInfo();
+  if (Offset & 0x0F)
+    report_fatal_error("Misaligned saved vector register offset!");
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCWin64EHInstruction Inst(
+    Offset > 512*1024-16 ? Win64EH::UOP_SaveXMM128Big : Win64EH::UOP_SaveXMM128,
+                            Label, Register, Offset);
+  EmitLabel(Label);
+  CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHPushFrame(bool Code) {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  if (CurFrame->Instructions.size() > 0)
+    report_fatal_error("If present, PushMachFrame must be the first UOP");
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCWin64EHInstruction Inst(Win64EH::UOP_PushMachFrame, Label, Code);
+  EmitLabel(Label);
+  CurFrame->Instructions.push_back(Inst);
+}
+
+void MCStreamer::EmitWin64EHEndProlog() {
+  EnsureValidW64UnwindInfo();
+  MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+  CurFrame->PrologEnd = getContext().CreateTempSymbol();
+  EmitLabel(CurFrame->PrologEnd);
+}
+
+void MCStreamer::EmitFnStart() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitFnEnd() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitCantUnwind() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitHandlerData() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitPersonality(const MCSymbol *Personality) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitPad(int64_t Offset) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file.  This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCStreamer::EmitRawText(StringRef String) {
+  errs() << "EmitRawText called on an MCStreamer that doesn't support it, "
+  " something must not be fully mc'ized\n";
+  abort();
+}
+
+void MCStreamer::EmitRawText(const Twine &T) {
+  SmallString<128> Str;
+  T.toVector(Str);
+  EmitRawText(Str.str());
+}
+
+void MCStreamer::EmitFrames(bool usingCFI) {
+  if (!getNumFrameInfos())
+    return;
+
+  if (EmitEHFrame)
+    MCDwarfFrameEmitter::Emit(*this, usingCFI, true);
+
+  if (EmitDebugFrame)
+    MCDwarfFrameEmitter::Emit(*this, usingCFI, false);
+}
+
+void MCStreamer::EmitW64Tables() {
+  if (!getNumW64UnwindInfos())
+    return;
+
+  MCWin64EHUnwindEmitter::Emit(*this);
+}
diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
new file mode 100644
index 000000000000..86dc1083cee9
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -0,0 +1,96 @@
+//===-- MCSubtargetInfo.cpp - Subtarget Information -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+void
+MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+                                     const SubtargetFeatureKV *PF,
+                                     const SubtargetFeatureKV *PD,
+                                     const SubtargetInfoKV *PI,
+                                     const InstrStage *IS,
+                                     const unsigned *OC,
+                                     const unsigned *FP,
+                                     unsigned NF, unsigned NP) {
+  TargetTriple = TT;
+  ProcFeatures = PF;
+  ProcDesc = PD;
+  ProcItins = PI;
+  Stages = IS;
+  OperandCycles = OC;
+  ForwardingPathes = FP;
+  NumFeatures = NF;
+  NumProcs = NP;
+
+  SubtargetFeatures Features(FS);
+  FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+                                        ProcFeatures, NumFeatures);
+}
+
+
+/// ReInitMCSubtargetInfo - Change CPU (and optionally supplemented with
+/// feature string) and recompute feature bits.
+uint64_t MCSubtargetInfo::ReInitMCSubtargetInfo(StringRef CPU, StringRef FS) {
+  SubtargetFeatures Features(FS);
+  FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+                                        ProcFeatures, NumFeatures);
+  return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version does not change the implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) {
+  FeatureBits ^= FB;
+  return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version will also change all implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
+  SubtargetFeatures Features;
+  FeatureBits = Features.ToggleFeature(FeatureBits, FS,
+                                       ProcFeatures, NumFeatures);
+  return FeatureBits;
+}
+
+
+InstrItineraryData
+MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
+  assert(ProcItins && "Instruction itineraries information not available!");
+
+#ifndef NDEBUG
+  for (size_t i = 1; i < NumProcs; i++) {
+    assert(strcmp(ProcItins[i - 1].Key, ProcItins[i].Key) < 0 &&
+           "Itineraries table is not sorted");
+  }
+#endif
+
+  // Find entry
+  SubtargetInfoKV KV;
+  KV.Key = CPU.data();
+  const SubtargetInfoKV *Found =
+    std::lower_bound(ProcItins, ProcItins+NumProcs, KV);
+  if (Found == ProcItins+NumProcs || StringRef(Found->Key) != CPU) {
+    errs() << "'" << CPU
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
+    return InstrItineraryData();
+  }
+
+  return InstrItineraryData(Stages, OperandCycles, ForwardingPathes,
+                            (InstrItinerary *)Found->Value);
+}
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
new file mode 100644
index 000000000000..c2fad1674aa4
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -0,0 +1,84 @@
+//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Sentinel value for the absolute pseudo section.
+const MCSection *MCSymbol::AbsolutePseudoSection =
+  reinterpret_cast<const MCSection *>(1);
+
+static bool isAcceptableChar(char C) {
+  if ((C < 'a' || C > 'z') &&
+      (C < 'A' || C > 'Z') &&
+      (C < '0' || C > '9') &&
+      C != '_' && C != '$' && C != '.' && C != '@')
+    return false;
+  return true;
+}
+
+/// NameNeedsQuoting - Return true if the identifier \arg Str needs quotes to be
+/// syntactically correct.
+static bool NameNeedsQuoting(StringRef Str) {
+  assert(!Str.empty() && "Cannot create an empty MCSymbol");
+  
+  // If any of the characters in the string is an unacceptable character, force
+  // quotes.
+  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+    if (!isAcceptableChar(Str[i]))
+      return true;
+  return false;
+}
+
+const MCSymbol &MCSymbol::AliasedSymbol() const {
+  const MCSymbol *S = this;
+  while (S->isVariable()) {
+    const MCExpr *Value = S->getVariableValue();
+    if (Value->getKind() != MCExpr::SymbolRef)
+      return *S;
+    const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Value);
+    S = &Ref->getSymbol();
+  }
+  return *S;
+}
+
+void MCSymbol::setVariableValue(const MCExpr *Value) {
+  assert(!IsUsed && "Cannot set a variable that has already been used.");
+  assert(Value && "Invalid variable value!");
+  assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
+         "Invalid redefinition!");
+  this->Value = Value;
+
+  // Variables should always be marked as in the same "section" as the value.
+  const MCSection *Section = Value->FindAssociatedSection();
+  if (Section) {
+    setSection(*Section);
+  } else {
+    setUndefined();
+  }
+}
+
+void MCSymbol::print(raw_ostream &OS) const {
+  // The name for this MCSymbol is required to be a valid target name.  However,
+  // some targets support quoting names with funny characters.  If the name
+  // contains a funny character, then print it quoted.
+  if (!NameNeedsQuoting(getName())) {
+    OS << getName();
+    return;
+  }
+    
+  OS << '"' << getName() << '"';
+}
+
+void MCSymbol::dump() const {
+  print(dbgs());
+}
diff --git a/contrib/llvm/lib/MC/MCValue.cpp b/contrib/llvm/lib/MC/MCValue.cpp
new file mode 100644
index 000000000000..c6ea16ce7b4d
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCValue.cpp
@@ -0,0 +1,36 @@
+//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  if (isAbsolute()) {
+    OS << getConstant();
+    return;
+  }
+
+  getSymA()->print(OS);
+
+  if (getSymB()) {
+    OS << " - ";
+    getSymB()->print(OS);
+  }
+
+  if (getConstant())
+    OS << " + " << getConstant();
+}
+
+void MCValue::dump() const {
+  print(dbgs(), 0);
+}
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
new file mode 100644
index 000000000000..e698384a49f1
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -0,0 +1,258 @@
+//===- lib/MC/MCWin64EH.cpp - MCWin64EH implementation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+// NOTE: All relocations generated here are 4-byte image-relative.
+
+static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &instArray){
+  uint8_t count = 0;
+  for (std::vector<MCWin64EHInstruction>::const_iterator I = instArray.begin(),
+       E = instArray.end(); I != E; ++I) {
+    switch (I->getOperation()) {
+    case Win64EH::UOP_PushNonVol:
+    case Win64EH::UOP_AllocSmall:
+    case Win64EH::UOP_SetFPReg:
+    case Win64EH::UOP_PushMachFrame:
+      count += 1;
+      break;
+    case Win64EH::UOP_SaveNonVol:
+    case Win64EH::UOP_SaveXMM128:
+      count += 2;
+      break;
+    case Win64EH::UOP_SaveNonVolBig:
+    case Win64EH::UOP_SaveXMM128Big:
+      count += 3;
+      break;
+    case Win64EH::UOP_AllocLarge:
+      if (I->getSize() > 512*1024-8)
+        count += 3;
+      else
+        count += 2;
+      break;
+    }
+  }
+  return count;
+}
+
+static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs,
+                              MCSymbol *rhs) {
+  MCContext &context = streamer.getContext();
+  const MCExpr *diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(
+                                                                  lhs, context),
+                                               MCSymbolRefExpr::Create(
+                                                                  rhs, context),
+                                               context);
+  streamer.EmitAbsValue(diff, 1);
+
+}
+
+static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin,
+                           MCWin64EHInstruction &inst) {
+  uint8_t b1, b2;
+  uint16_t w;
+  b2 = (inst.getOperation() & 0x0F);
+  switch (inst.getOperation()) {
+  case Win64EH::UOP_PushNonVol:
+    EmitAbsDifference(streamer, inst.getLabel(), begin);
+    b2 |= (inst.getRegister() & 0x0F) << 4;
+    streamer.EmitIntValue(b2, 1);
+    break;
+  case Win64EH::UOP_AllocLarge:
+    EmitAbsDifference(streamer, inst.getLabel(), begin);
+    if (inst.getSize() > 512*1024-8) {
+      b2 |= 0x10;
+      streamer.EmitIntValue(b2, 1);
+      w = inst.getSize() & 0xFFF8;
+      streamer.EmitIntValue(w, 2);
+      w = inst.getSize() >> 16;
+    } else {
+      streamer.EmitIntValue(b2, 1);
+      w = inst.getSize() >> 3;
+    }
+    streamer.EmitIntValue(w, 2);
+    break;
+  case Win64EH::UOP_AllocSmall:
+    b2 |= (((inst.getSize()-8) >> 3) & 0x0F) << 4;
+    EmitAbsDifference(streamer, inst.getLabel(), begin);
+    streamer.EmitIntValue(b2, 1);
+    break;
+  case Win64EH::UOP_SetFPReg:
+    b1 = inst.getOffset() & 0xF0;
+    streamer.EmitIntValue(b1, 1);
+    streamer.EmitIntValue(b2, 1);
+    break;
+  case Win64EH::UOP_SaveNonVol:
+  case Win64EH::UOP_SaveXMM128:
+    b2 |= (inst.getRegister() & 0x0F) << 4;
+    EmitAbsDifference(streamer, inst.getLabel(), begin);
+    streamer.EmitIntValue(b2, 1);
+    w = inst.getOffset() >> 3;
+    if (inst.getOperation() == Win64EH::UOP_SaveXMM128)
+      w >>= 1;
+    streamer.EmitIntValue(w, 2);
+    break;
+  case Win64EH::UOP_SaveNonVolBig:
+  case Win64EH::UOP_SaveXMM128Big:
+    b2 |= (inst.getRegister() & 0x0F) << 4;
+    EmitAbsDifference(streamer, inst.getLabel(), begin);
+    streamer.EmitIntValue(b2, 1);
+    if (inst.getOperation() == Win64EH::UOP_SaveXMM128Big)
+      w = inst.getOffset() & 0xFFF0;
+    else
+      w = inst.getOffset() & 0xFFF8;
+    streamer.EmitIntValue(w, 2);
+    w = inst.getOffset() >> 16;
+    streamer.EmitIntValue(w, 2);
+    break;
+  case Win64EH::UOP_PushMachFrame:
+    if (inst.isPushCodeFrame())
+      b2 |= 0x10;
+    EmitAbsDifference(streamer, inst.getLabel(), begin);
+    streamer.EmitIntValue(b2, 1);
+    break;
+  }
+}
+
+static void EmitRuntimeFunction(MCStreamer &streamer,
+                                const MCWin64EHUnwindInfo *info) {
+  MCContext &context = streamer.getContext();
+
+  streamer.EmitValueToAlignment(4);
+  streamer.EmitValue(MCSymbolRefExpr::Create(info->Begin, context), 4);
+  streamer.EmitValue(MCSymbolRefExpr::Create(info->End, context), 4);
+  streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol, context), 4);
+}
+
+static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
+  // If this UNWIND_INFO already has a symbol, it's already been emitted.
+  if (info->Symbol) return;
+
+  MCContext &context = streamer.getContext();
+  streamer.EmitValueToAlignment(4);
+  // Upper 3 bits are the version number (currently 1).
+  uint8_t flags = 0x01;
+  info->Symbol = context.CreateTempSymbol();
+  streamer.EmitLabel(info->Symbol);
+
+  if (info->ChainedParent)
+    flags |= Win64EH::UNW_ChainInfo << 3;
+  else {
+    if (info->HandlesUnwind)
+      flags |= Win64EH::UNW_TerminateHandler << 3;
+    if (info->HandlesExceptions)
+      flags |= Win64EH::UNW_ExceptionHandler << 3;
+  }
+  streamer.EmitIntValue(flags, 1);
+
+  if (info->PrologEnd)
+    EmitAbsDifference(streamer, info->PrologEnd, info->Begin);
+  else
+    streamer.EmitIntValue(0, 1);
+
+  uint8_t numCodes = CountOfUnwindCodes(info->Instructions);
+  streamer.EmitIntValue(numCodes, 1);
+
+  uint8_t frame = 0;
+  if (info->LastFrameInst >= 0) {
+    MCWin64EHInstruction &frameInst = info->Instructions[info->LastFrameInst];
+    assert(frameInst.getOperation() == Win64EH::UOP_SetFPReg);
+    frame = (frameInst.getRegister() & 0x0F) |
+            (frameInst.getOffset() & 0xF0);
+  }
+  streamer.EmitIntValue(frame, 1);
+
+  // Emit unwind instructions (in reverse order).
+  uint8_t numInst = info->Instructions.size();
+  for (uint8_t c = 0; c < numInst; ++c) {
+    MCWin64EHInstruction inst = info->Instructions.back();
+    info->Instructions.pop_back();
+    EmitUnwindCode(streamer, info->Begin, inst);
+  }
+
+  if (flags & (Win64EH::UNW_ChainInfo << 3))
+    EmitRuntimeFunction(streamer, info->ChainedParent);
+  else if (flags &
+           ((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3))
+    streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler, context),
+                       4);
+  else if (numCodes < 2) {
+    // The minimum size of an UNWIND_INFO struct is 8 bytes. If we're not
+    // a chained unwind info, if there is no handler, and if there are fewer
+    // than 2 slots used in the unwind code array, we have to pad to 8 bytes.
+    if (numCodes == 1)
+      streamer.EmitIntValue(0, 2);
+    else
+      streamer.EmitIntValue(0, 4);
+  }
+}
+
+StringRef MCWin64EHUnwindEmitter::GetSectionSuffix(const MCSymbol *func) {
+  if (!func || !func->isInSection()) return "";
+  const MCSection *section = &func->getSection();
+  const MCSectionCOFF *COFFSection;
+  if ((COFFSection = dyn_cast<MCSectionCOFF>(section))) {
+    StringRef name = COFFSection->getSectionName();
+    size_t dollar = name.find('$');
+    size_t dot = name.find('.', 1);
+    if (dollar == StringRef::npos && dot == StringRef::npos)
+      return "";
+    if (dot == StringRef::npos)
+      return name.substr(dollar);
+    if (dollar == StringRef::npos || dot < dollar)
+      return name.substr(dot);
+    return name.substr(dollar);
+  }
+  return "";
+}
+
+void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
+                                            MCWin64EHUnwindInfo *info) {
+  // Switch sections (the static function above is meant to be called from
+  // here and from Emit().
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  const MCSection *xdataSect =
+    TAI.getWin64EHTableSection(GetSectionSuffix(info->Function));
+  streamer.SwitchSection(xdataSect);
+
+  llvm::EmitUnwindInfo(streamer, info);
+}
+
+void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
+  MCContext &context = streamer.getContext();
+  // Emit the unwind info structs first.
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
+    MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
+    const MCSection *xdataSect =
+      TAI.getWin64EHTableSection(GetSectionSuffix(info.Function));
+    streamer.SwitchSection(xdataSect);
+    llvm::EmitUnwindInfo(streamer, &info);
+  }
+  // Now emit RUNTIME_FUNCTION entries.
+  for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
+    MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
+    const MCSection *pdataSect =
+      TAI.getWin64EHFuncTableSection(GetSectionSuffix(info.Function));
+    streamer.SwitchSection(pdataSect);
+    EmitRuntimeFunction(streamer, &info);
+  }
+}
+
+} // End of namespace llvm
+
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
new file mode 100644
index 000000000000..69efe231ad6e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -0,0 +1,785 @@
+//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+#include <vector>
+using namespace llvm;
+using namespace llvm::object;
+
+bool MachObjectWriter::
+doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
+  // Undefined symbols are always extern.
+  if (SD->Symbol->isUndefined())
+    return true;
+
+  // References to weak definitions require external relocation entries; the
+  // definition may not always be the one in the same object file.
+  if (SD->getFlags() & SF_WeakDefinition)
+    return true;
+
+  // Otherwise, we can use an internal relocation.
+  return false;
+}
+
+bool MachObjectWriter::
+MachSymbolData::operator<(const MachSymbolData &RHS) const {
+  return SymbolData->getSymbol().getName() <
+    RHS.SymbolData->getSymbol().getName();
+}
+
+bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+  const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+    (MCFixupKind) Kind);
+
+  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
+                                              const MCAsmLayout &Layout) const {
+  return getSectionAddress(Fragment->getParent()) +
+    Layout.getFragmentOffset(Fragment);
+}
+
+uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
+                                            const MCAsmLayout &Layout) const {
+  const MCSymbol &S = SD->getSymbol();
+
+  // If this is a variable, then recursively evaluate now.
+  if (S.isVariable()) {
+    MCValue Target;
+    if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
+      report_fatal_error("unable to evaluate offset for variable '" +
+                         S.getName() + "'");
+
+    // Verify that any used symbols are defined.
+    if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymA()->getSymbol().getName() + "'");
+    if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymB()->getSymbol().getName() + "'");
+
+    uint64_t Address = Target.getConstant();
+    if (Target.getSymA())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymA()->getSymbol()), Layout);
+    if (Target.getSymB())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymB()->getSymbol()), Layout);
+    return Address;
+  }
+
+  return getSectionAddress(SD->getFragment()->getParent()) +
+    Layout.getSymbolOffset(SD);
+}
+
+uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD,
+                                          const MCAsmLayout &Layout) const {
+  uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+  unsigned Next = SD->getLayoutOrder() + 1;
+  if (Next >= Layout.getSectionOrder().size())
+    return 0;
+
+  const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+  if (NextSD.getSection().isVirtualSection())
+    return 0;
+  return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+}
+
+void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
+                                   unsigned LoadCommandsSize,
+                                   bool SubsectionsViaSymbols) {
+  uint32_t Flags = 0;
+
+  if (SubsectionsViaSymbols)
+    Flags |= macho::HF_SubsectionsViaSymbols;
+
+  // struct mach_header (28 bytes) or
+  // struct mach_header_64 (32 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+
+  Write32(TargetObjectWriter->getCPUType());
+  Write32(TargetObjectWriter->getCPUSubtype());
+
+  Write32(macho::HFT_Object);
+  Write32(NumLoadCommands);
+  Write32(LoadCommandsSize);
+  Write32(Flags);
+  if (is64Bit())
+    Write32(0); // reserved
+
+  assert(OS.tell() - Start ==
+         (is64Bit() ? macho::Header64Size : macho::Header32Size));
+}
+
+/// WriteSegmentLoadCommand - Write a segment load command.
+///
+/// \arg NumSections - The number of sections in this segment.
+/// \arg SectionDataSize - The total size of the sections.
+void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
+                                               uint64_t VMSize,
+                                               uint64_t SectionDataStartOffset,
+                                               uint64_t SectionDataSize) {
+  // struct segment_command (56 bytes) or
+  // struct segment_command_64 (72 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  unsigned SegmentLoadCommandSize =
+    is64Bit() ? macho::SegmentLoadCommand64Size:
+    macho::SegmentLoadCommand32Size;
+  Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+  Write32(SegmentLoadCommandSize +
+          NumSections * (is64Bit() ? macho::Section64Size :
+                         macho::Section32Size));
+
+  WriteBytes("", 16);
+  if (is64Bit()) {
+    Write64(0); // vmaddr
+    Write64(VMSize); // vmsize
+    Write64(SectionDataStartOffset); // file offset
+    Write64(SectionDataSize); // file size
+  } else {
+    Write32(0); // vmaddr
+    Write32(VMSize); // vmsize
+    Write32(SectionDataStartOffset); // file offset
+    Write32(SectionDataSize); // file size
+  }
+  Write32(0x7); // maxprot
+  Write32(0x7); // initprot
+  Write32(NumSections);
+  Write32(0); // flags
+
+  assert(OS.tell() - Start == SegmentLoadCommandSize);
+}
+
+void MachObjectWriter::WriteSection(const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCSectionData &SD,
+                                    uint64_t FileOffset,
+                                    uint64_t RelocationsStart,
+                                    unsigned NumRelocations) {
+  uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+
+  // The offset is unused for virtual sections.
+  if (SD.getSection().isVirtualSection()) {
+    assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+    FileOffset = 0;
+  }
+
+  // struct section (68 bytes) or
+  // struct section_64 (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
+  WriteBytes(Section.getSectionName(), 16);
+  WriteBytes(Section.getSegmentName(), 16);
+  if (is64Bit()) {
+    Write64(getSectionAddress(&SD)); // address
+    Write64(SectionSize); // size
+  } else {
+    Write32(getSectionAddress(&SD)); // address
+    Write32(SectionSize); // size
+  }
+  Write32(FileOffset);
+
+  unsigned Flags = Section.getTypeAndAttributes();
+  if (SD.hasInstructions())
+    Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+  assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+  Write32(Log2_32(SD.getAlignment()));
+  Write32(NumRelocations ? RelocationsStart : 0);
+  Write32(NumRelocations);
+  Write32(Flags);
+  Write32(IndirectSymBase.lookup(&SD)); // reserved1
+  Write32(Section.getStubSize()); // reserved2
+  if (is64Bit())
+    Write32(0); // reserved3
+
+  assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+                               macho::Section32Size));
+}
+
+void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
+                                              uint32_t NumSymbols,
+                                              uint32_t StringTableOffset,
+                                              uint32_t StringTableSize) {
+  // struct symtab_command (24 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(macho::LCT_Symtab);
+  Write32(macho::SymtabLoadCommandSize);
+  Write32(SymbolOffset);
+  Write32(NumSymbols);
+  Write32(StringTableOffset);
+  Write32(StringTableSize);
+
+  assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+}
+
+void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                                uint32_t NumLocalSymbols,
+                                                uint32_t FirstExternalSymbol,
+                                                uint32_t NumExternalSymbols,
+                                                uint32_t FirstUndefinedSymbol,
+                                                uint32_t NumUndefinedSymbols,
+                                                uint32_t IndirectSymbolOffset,
+                                                uint32_t NumIndirectSymbols) {
+  // struct dysymtab_command (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(macho::LCT_Dysymtab);
+  Write32(macho::DysymtabLoadCommandSize);
+  Write32(FirstLocalSymbol);
+  Write32(NumLocalSymbols);
+  Write32(FirstExternalSymbol);
+  Write32(NumExternalSymbols);
+  Write32(FirstUndefinedSymbol);
+  Write32(NumUndefinedSymbols);
+  Write32(0); // tocoff
+  Write32(0); // ntoc
+  Write32(0); // modtaboff
+  Write32(0); // nmodtab
+  Write32(0); // extrefsymoff
+  Write32(0); // nextrefsyms
+  Write32(IndirectSymbolOffset);
+  Write32(NumIndirectSymbols);
+  Write32(0); // extreloff
+  Write32(0); // nextrel
+  Write32(0); // locreloff
+  Write32(0); // nlocrel
+
+  assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+}
+
+void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
+                                  const MCAsmLayout &Layout) {
+  MCSymbolData &Data = *MSD.SymbolData;
+  const MCSymbol &Symbol = Data.getSymbol();
+  uint8_t Type = 0;
+  uint16_t Flags = Data.getFlags();
+  uint32_t Address = 0;
+
+  // Set the N_TYPE bits. See <mach-o/nlist.h>.
+  //
+  // FIXME: Are the prebound or indirect fields possible here?
+  if (Symbol.isUndefined())
+    Type = macho::STT_Undefined;
+  else if (Symbol.isAbsolute())
+    Type = macho::STT_Absolute;
+  else
+    Type = macho::STT_Section;
+
+  // FIXME: Set STAB bits.
+
+  if (Data.isPrivateExtern())
+    Type |= macho::STF_PrivateExtern;
+
+  // Set external bit.
+  if (Data.isExternal() || Symbol.isUndefined())
+    Type |= macho::STF_External;
+
+  // Compute the symbol address.
+  if (Symbol.isDefined()) {
+    if (Symbol.isAbsolute()) {
+      Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
+    } else {
+      Address = getSymbolAddress(&Data, Layout);
+    }
+  } else if (Data.isCommon()) {
+    // Common symbols are encoded with the size in the address
+    // field, and their alignment in the flags.
+    Address = Data.getCommonSize();
+
+    // Common alignment is packed into the 'desc' bits.
+    if (unsigned Align = Data.getCommonAlignment()) {
+      unsigned Log2Size = Log2_32(Align);
+      assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+      if (Log2Size > 15)
+        report_fatal_error("invalid 'common' alignment '" +
+                           Twine(Align) + "'");
+      // FIXME: Keep this mask with the SymbolFlags enumeration.
+      Flags = (Flags & 0xF0FF) | (Log2Size << 8);
+    }
+  }
+
+  // struct nlist (12 bytes)
+
+  Write32(MSD.StringIndex);
+  Write8(Type);
+  Write8(MSD.SectionIndex);
+
+  // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+  // value.
+  Write16(Flags);
+  if (is64Bit())
+    Write64(Address);
+  else
+    Write32(Address);
+}
+
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                        const MCAsmLayout &Layout,
+                                        const MCFragment *Fragment,
+                                        const MCFixup &Fixup,
+                                        MCValue Target,
+                                        uint64_t &FixedValue) {
+  TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
+                                       Target, FixedValue);
+}
+
+void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
+  // This is the point where 'as' creates actual symbols for indirect symbols
+  // (in the following two passes). It would be easier for us to do this sooner
+  // when we see the attribute, but that makes getting the order in the symbol
+  // table much more complicated than it is worth.
+  //
+  // FIXME: Revisit this when the dust settles.
+
+  // Bind non lazy symbol pointers first.
+  unsigned IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
+
+    if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+      continue;
+
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
+
+    Asm.getOrCreateSymbolData(*it->Symbol);
+  }
+
+  // Then lazy symbol pointers and symbol stubs.
+  IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
+
+    if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+        Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+      continue;
+
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
+
+    // Set the symbol type to undefined lazy, but only on construction.
+    //
+    // FIXME: Do not hardcode.
+    bool Created;
+    MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+    if (Created)
+      Entry.setFlags(Entry.getFlags() | 0x0001);
+  }
+}
+
+/// ComputeSymbolTable - Compute the symbol table data
+///
+/// \param StringTable [out] - The string table data.
+/// \param StringIndexMap [out] - Map from symbol names to offsets in the
+/// string table.
+void MachObjectWriter::
+ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                   std::vector<MachSymbolData> &LocalSymbolData,
+                   std::vector<MachSymbolData> &ExternalSymbolData,
+                   std::vector<MachSymbolData> &UndefinedSymbolData) {
+  // Build section lookup table.
+  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+  unsigned Index = 1;
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it, ++Index)
+    SectionIndexMap[&it->getSection()] = Index;
+  assert(Index <= 256 && "Too many sections!");
+
+  // Index 0 is always the empty string.
+  StringMap<uint64_t> StringIndexMap;
+  StringTable += '\x00';
+
+  // Build the symbol arrays and the string table, but only for non-local
+  // symbols.
+  //
+  // The particular order that we collect the symbols and create the string
+  // table, then sort the symbols is chosen to match 'as'. Even though it
+  // doesn't matter for correctness, this is important for letting us diff .o
+  // files.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
+
+    if (!it->isExternal() && !Symbol.isUndefined())
+      continue;
+
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
+    }
+
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
+
+    if (Symbol.isUndefined()) {
+      MSD.SectionIndex = 0;
+      UndefinedSymbolData.push_back(MSD);
+    } else if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      ExternalSymbolData.push_back(MSD);
+    } else {
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      ExternalSymbolData.push_back(MSD);
+    }
+  }
+
+  // Now add the data for local symbols.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
+
+    if (it->isExternal() || Symbol.isUndefined())
+      continue;
+
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
+    }
+
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
+
+    if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      LocalSymbolData.push_back(MSD);
+    } else {
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      LocalSymbolData.push_back(MSD);
+    }
+  }
+
+  // External and undefined symbols are required to be in lexicographic order.
+  std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+  std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+  // Set the symbol indices.
+  Index = 0;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+    LocalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+  // The string table is padded to a multiple of 4.
+  while (StringTable.size() % 4)
+    StringTable += '\x00';
+}
+
+void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
+                                               const MCAsmLayout &Layout) {
+  uint64_t StartAddress = 0;
+  const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+  for (int i = 0, n = Order.size(); i != n ; ++i) {
+    const MCSectionData *SD = Order[i];
+    StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+    SectionAddress[SD] = StartAddress;
+    StartAddress += Layout.getSectionAddressSize(SD);
+
+    // Explicitly pad the section to match the alignment requirements of the
+    // following one. This is for 'gas' compatibility, it shouldn't
+    /// strictly be necessary.
+    StartAddress += getPaddingSize(SD, Layout);
+  }
+}
+
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                                const MCAsmLayout &Layout) {
+  computeSectionAddresses(Asm, Layout);
+
+  // Create symbol data for any indirect symbols.
+  BindIndirectSymbols(Asm);
+
+  // Compute symbol table information and bind symbol indices.
+  ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                     UndefinedSymbolData);
+}
+
+bool MachObjectWriter::
+IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                       const MCSymbolData &DataA,
+                                       const MCFragment &FB,
+                                       bool InSet,
+                                       bool IsPCRel) const {
+  if (InSet)
+    return true;
+
+  // The effective address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) == 0.
+  const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+  const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+  const MCSection &SecA = SA.getSection();
+  const MCSection &SecB = FB.getParent()->getSection();
+
+  if (IsPCRel) {
+    // The simple (Darwin, except on x86_64) way of dealing with this was to
+    // assume that any reference to a temporary symbol *must* be a temporary
+    // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+    // relocation to a temporary symbol (in the same section) is fully
+    // resolved. This also works in conjunction with absolutized .set, which
+    // requires the compiler to use .set to absolutize the differences between
+    // symbols which the compiler knows to be assembly time constants, so we
+    // don't need to worry about considering symbol differences fully resolved.
+
+    if (!Asm.getBackend().hasReliableSymbolDifference()) {
+      if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+        return false;
+      return true;
+    }
+  } else {
+    if (!TargetObjectWriter->useAggressiveSymbolFolding())
+      return false;
+  }
+
+  const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+
+  A_Base = FA.getAtom();
+  if (!A_Base)
+    return false;
+
+  B_Base = FB.getAtom();
+  if (!B_Base)
+    return false;
+
+  // If the atoms are the same, they are guaranteed to have the same address.
+  if (A_Base == B_Base)
+    return true;
+
+  // Otherwise, we can't prove this is fully resolved.
+  return false;
+}
+
+void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
+  unsigned NumSections = Asm.size();
+
+  // The section data starts after the header, the segment load command (and
+  // section headers) and the symbol table.
+  unsigned NumLoadCommands = 1;
+  uint64_t LoadCommandsSize = is64Bit() ?
+    macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+    macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+
+  // Add the symbol table load command sizes, if used.
+  unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+    UndefinedSymbolData.size();
+  if (NumSymbols) {
+    NumLoadCommands += 2;
+    LoadCommandsSize += (macho::SymtabLoadCommandSize +
+                         macho::DysymtabLoadCommandSize);
+  }
+
+  // Compute the total size of the section data, as well as its file size and vm
+  // size.
+  uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+                               macho::Header32Size) + LoadCommandsSize;
+  uint64_t SectionDataSize = 0;
+  uint64_t SectionDataFileSize = 0;
+  uint64_t VMSize = 0;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    uint64_t Address = getSectionAddress(&SD);
+    uint64_t Size = Layout.getSectionAddressSize(&SD);
+    uint64_t FileSize = Layout.getSectionFileSize(&SD);
+    FileSize += getPaddingSize(&SD, Layout);
+
+    VMSize = std::max(VMSize, Address + Size);
+
+    if (SD.getSection().isVirtualSection())
+      continue;
+
+    SectionDataSize = std::max(SectionDataSize, Address + Size);
+    SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
+  }
+
+  // The section data is padded to 4 bytes.
+  //
+  // FIXME: Is this machine dependent?
+  unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+  SectionDataFileSize += SectionDataPadding;
+
+  // Write the prolog, starting with the header and load command...
+  WriteHeader(NumLoadCommands, LoadCommandsSize,
+              Asm.getSubsectionsViaSymbols());
+  WriteSegmentLoadCommand(NumSections, VMSize,
+                          SectionDataStart, SectionDataSize);
+
+  // ... and then the section headers.
+  uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    unsigned NumRelocs = Relocs.size();
+    uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+    WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+    RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+  }
+
+  // Write the symbol table load command, if used.
+  if (NumSymbols) {
+    unsigned FirstLocalSymbol = 0;
+    unsigned NumLocalSymbols = LocalSymbolData.size();
+    unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+    unsigned NumExternalSymbols = ExternalSymbolData.size();
+    unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+    unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+    unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+    unsigned NumSymTabSymbols =
+      NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+    uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+    uint64_t IndirectSymbolOffset = 0;
+
+    // If used, the indirect symbols are written after the section data.
+    if (NumIndirectSymbols)
+      IndirectSymbolOffset = RelocTableEnd;
+
+    // The symbol table is written after the indirect symbol data.
+    uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+    // The string table is written after symbol table.
+    uint64_t StringTableOffset =
+      SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+                                              macho::Nlist32Size);
+    WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                           StringTableOffset, StringTable.size());
+
+    WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                             FirstExternalSymbol, NumExternalSymbols,
+                             FirstUndefinedSymbol, NumUndefinedSymbols,
+                             IndirectSymbolOffset, NumIndirectSymbols);
+  }
+
+  // Write the actual section data.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    Asm.WriteSectionData(it, Layout);
+
+    uint64_t Pad = getPaddingSize(it, Layout);
+    for (unsigned int i = 0; i < Pad; ++i)
+      Write8(0);
+  }
+
+  // Write the extra padding.
+  WriteZeros(SectionDataPadding);
+
+  // Write the relocation entries.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    // Write the section relocation entries, in reverse order to match 'as'
+    // (approximately, the exact algorithm is more complicated than this).
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+      Write32(Relocs[e - i - 1].Word0);
+      Write32(Relocs[e - i - 1].Word1);
+    }
+  }
+
+  // Write the symbol table data, if used.
+  if (NumSymbols) {
+    // Write the indirect symbol entries.
+    for (MCAssembler::const_indirect_symbol_iterator
+           it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // Indirect symbols in the non lazy symbol pointer section have some
+      // special handling.
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+      if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+        // If this symbol is defined and internal, mark it as such.
+        if (it->Symbol->isDefined() &&
+            !Asm.getSymbolData(*it->Symbol).isExternal()) {
+          uint32_t Flags = macho::ISF_Local;
+          if (it->Symbol->isAbsolute())
+            Flags |= macho::ISF_Absolute;
+          Write32(Flags);
+          continue;
+        }
+      }
+
+      Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+    }
+
+    // FIXME: Check that offsets match computed ones.
+
+    // Write the symbol table entries.
+    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+      WriteNlist(LocalSymbolData[i], Layout);
+    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+      WriteNlist(ExternalSymbolData[i], Layout);
+    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+      WriteNlist(UndefinedSymbolData[i], Layout);
+
+    // Write the string table.
+    OS << StringTable.str();
+  }
+}
+
+MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+                                             raw_ostream &OS,
+                                             bool IsLittleEndian) {
+  return new MachObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp
new file mode 100644
index 000000000000..348cd4c9ab1b
--- /dev/null
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@@ -0,0 +1,397 @@
+//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SubtargetFeature interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                          Static Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// hasFlag - Determine if a feature has a flag; '+' or '-'
+///
+static inline bool hasFlag(const StringRef Feature) {
+  assert(!Feature.empty() && "Empty string");
+  // Get first character
+  char Ch = Feature[0];
+  // Check if first character is '+' or '-' flag
+  return Ch == '+' || Ch =='-';
+}
+
+/// StripFlag - Return string stripped of flag.
+///
+static inline std::string StripFlag(const StringRef Feature) {
+  return hasFlag(Feature) ? Feature.substr(1) : Feature;
+}
+
+/// isEnabled - Return true if enable flag; '+'.
+///
+static inline bool isEnabled(const StringRef Feature) {
+  assert(!Feature.empty() && "Empty string");
+  // Get first character
+  char Ch = Feature[0];
+  // Check if first character is '+' for enabled
+  return Ch == '+';
+}
+
+/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
+///
+static inline std::string PrependFlag(const StringRef Feature,
+                                    bool IsEnabled) {
+  assert(!Feature.empty() && "Empty string");
+  if (hasFlag(Feature))
+    return Feature;
+  std::string Prefix = IsEnabled ? "+" : "-";
+  Prefix += Feature;
+  return Prefix;
+}
+
+/// Split - Splits a string of comma separated items in to a vector of strings.
+///
+static void Split(std::vector<std::string> &V, const StringRef S) {
+  if (S.empty())
+    return;
+
+  // Start at beginning of string.
+  size_t Pos = 0;
+  while (true) {
+    // Find the next comma
+    size_t Comma = S.find(',', Pos);
+    // If no comma found then the rest of the string is used
+    if (Comma == std::string::npos) {
+      // Add string to vector
+      V.push_back(S.substr(Pos));
+      break;
+    }
+    // Otherwise add substring to vector
+    V.push_back(S.substr(Pos, Comma - Pos));
+    // Advance to next item
+    Pos = Comma + 1;
+  }
+}
+
+/// Join a vector of strings to a string with a comma separating each element.
+///
+static std::string Join(const std::vector<std::string> &V) {
+  // Start with empty string.
+  std::string Result;
+  // If the vector is not empty 
+  if (!V.empty()) {
+    // Start with the first feature
+    Result = V[0];
+    // For each successive feature
+    for (size_t i = 1; i < V.size(); i++) {
+      // Add a comma
+      Result += ",";
+      // Add the feature
+      Result += V[i];
+    }
+  }
+  // Return the features string 
+  return Result;
+}
+
+/// Adding features.
+void SubtargetFeatures::AddFeature(const StringRef String,
+                                   bool IsEnabled) {
+  // Don't add empty features
+  if (!String.empty()) {
+    // Convert to lowercase, prepend flag and add to vector
+    Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+  }
+}
+
+/// Find KV in array using binary search.
+template<typename T> const T *Find(const StringRef S, const T *A, size_t L) {
+  // Make the lower bound element we're looking for
+  T KV;
+  KV.Key = S.data();
+  // Determine the end of the array
+  const T *Hi = A + L;
+  // Binary search the array
+  const T *F = std::lower_bound(A, Hi, KV);
+  // If not found then return NULL
+  if (F == Hi || StringRef(F->Key) != S) return NULL;
+  // Return the found array item
+  return F;
+}
+
+/// getLongestEntryLength - Return the length of the longest entry in the table.
+///
+static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
+                                    size_t Size) {
+  size_t MaxLen = 0;
+  for (size_t i = 0; i < Size; i++)
+    MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+  return MaxLen;
+}
+
+/// Display help for feature choices.
+///
+static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
+                 const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+  // Determine the length of the longest CPU and Feature entries.
+  unsigned MaxCPULen  = getLongestEntryLength(CPUTable, CPUTableSize);
+  unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+
+  // Print the CPU table.
+  errs() << "Available CPUs for this target:\n\n";
+  for (size_t i = 0; i != CPUTableSize; i++)
+    errs() << "  " << CPUTable[i].Key
+         << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
+         << " - " << CPUTable[i].Desc << ".\n";
+  errs() << "\n";
+  
+  // Print the Feature table.
+  errs() << "Available features for this target:\n\n";
+  for (size_t i = 0; i != FeatTableSize; i++)
+    errs() << "  " << FeatTable[i].Key
+         << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
+         << " - " << FeatTable[i].Desc << ".\n";
+  errs() << "\n";
+  
+  errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
+       << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+  std::exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+//                    SubtargetFeatures Implementation
+//===----------------------------------------------------------------------===//
+
+SubtargetFeatures::SubtargetFeatures(const StringRef Initial) {
+  // Break up string into separate features
+  Split(Features, Initial);
+}
+
+
+std::string SubtargetFeatures::getString() const {
+  return Join(Features);
+}
+
+/// SetImpliedBits - For each feature that is (transitively) implied by this
+/// feature, set it.
+///
+static
+void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+                    const SubtargetFeatureKV *FeatureTable,
+                    size_t FeatureTableSize) {
+  for (size_t i = 0; i < FeatureTableSize; ++i) {
+    const SubtargetFeatureKV &FE = FeatureTable[i];
+
+    if (FeatureEntry->Value == FE.Value) continue;
+
+    if (FeatureEntry->Implies & FE.Value) {
+      Bits |= FE.Value;
+      SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+    }
+  }
+}
+
+/// ClearImpliedBits - For each feature that (transitively) implies this
+/// feature, clear it.
+/// 
+static
+void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+                      const SubtargetFeatureKV *FeatureTable,
+                      size_t FeatureTableSize) {
+  for (size_t i = 0; i < FeatureTableSize; ++i) {
+    const SubtargetFeatureKV &FE = FeatureTable[i];
+
+    if (FeatureEntry->Value == FE.Value) continue;
+
+    if (FE.Implies & FeatureEntry->Value) {
+      Bits &= ~FE.Value;
+      ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+    }
+  }
+}
+
+/// ToggleFeature - Toggle a feature and returns the newly updated feature
+/// bits.
+uint64_t
+SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
+                                 const SubtargetFeatureKV *FeatureTable,
+                                 size_t FeatureTableSize) {
+  // Find feature in table.
+  const SubtargetFeatureKV *FeatureEntry =
+    Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+  // If there is a match
+  if (FeatureEntry) {
+    if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+      Bits &= ~FeatureEntry->Value;
+
+      // For each feature that implies this, clear it.
+      ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+    } else {
+      Bits |=  FeatureEntry->Value;
+
+      // For each feature that this implies, set it.
+      SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+    }
+  } else {
+    errs() << "'" << Feature
+           << "' is not a recognized feature for this target"
+           << " (ignoring feature)\n";
+  }
+
+  return Bits;
+}
+           
+
+/// getFeatureBits - Get feature bits a CPU.
+///
+uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
+                                         const SubtargetFeatureKV *CPUTable,
+                                         size_t CPUTableSize,
+                                         const SubtargetFeatureKV *FeatureTable,
+                                         size_t FeatureTableSize) {
+  if (!FeatureTableSize || !CPUTableSize)
+    return 0;
+
+#ifndef NDEBUG
+  for (size_t i = 1; i < CPUTableSize; i++) {
+    assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
+           "CPU table is not sorted");
+  }
+  for (size_t i = 1; i < FeatureTableSize; i++) {
+    assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
+          "CPU features table is not sorted");
+  }
+#endif
+  uint64_t Bits = 0;                    // Resulting bits
+
+  // Check if help is needed
+  if (CPU == "help")
+    Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+  
+  // Find CPU entry if CPU name is specified.
+  if (!CPU.empty()) {
+    const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize);
+    // If there is a match
+    if (CPUEntry) {
+      // Set base feature bits
+      Bits = CPUEntry->Value;
+
+      // Set the feature implied by this CPU feature, if any.
+      for (size_t i = 0; i < FeatureTableSize; ++i) {
+        const SubtargetFeatureKV &FE = FeatureTable[i];
+        if (CPUEntry->Value & FE.Value)
+          SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+      }
+    } else {
+      errs() << "'" << CPU
+             << "' is not a recognized processor for this target"
+             << " (ignoring processor)\n";
+    }
+  }
+
+  // Iterate through each feature
+  for (size_t i = 0, E = Features.size(); i < E; i++) {
+    const StringRef Feature = Features[i];
+    
+    // Check for help
+    if (Feature == "+help")
+      Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+    
+    // Find feature in table.
+    const SubtargetFeatureKV *FeatureEntry =
+                       Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+    // If there is a match
+    if (FeatureEntry) {
+      // Enable/disable feature in bits
+      if (isEnabled(Feature)) {
+        Bits |=  FeatureEntry->Value;
+
+        // For each feature that this implies, set it.
+        SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+      } else {
+        Bits &= ~FeatureEntry->Value;
+
+        // For each feature that implies this, clear it.
+        ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+      }
+    } else {
+      errs() << "'" << Feature
+             << "' is not a recognized feature for this target"
+             << " (ignoring feature)\n";
+    }
+  }
+
+  return Bits;
+}
+
+/// Get scheduling itinerary of a CPU.
+void *SubtargetFeatures::getItinerary(const StringRef CPU,
+                                      const SubtargetInfoKV *Table,
+                                      size_t TableSize) {
+  assert(Table && "missing table");
+#ifndef NDEBUG
+  for (size_t i = 1; i < TableSize; i++) {
+    assert(strcmp(Table[i - 1].Key, Table[i].Key) < 0 && "Table is not sorted");
+  }
+#endif
+
+  // Find entry
+  const SubtargetInfoKV *Entry = Find(CPU, Table, TableSize);
+  
+  if (Entry) {
+    return Entry->Value;
+  } else {
+    errs() << "'" << CPU
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
+    return NULL;
+  }
+}
+
+/// print - Print feature string.
+///
+void SubtargetFeatures::print(raw_ostream &OS) const {
+  for (size_t i = 0, e = Features.size(); i != e; ++i)
+    OS << Features[i] << "  ";
+  OS << "\n";
+}
+
+/// dump - Dump feature info.
+///
+void SubtargetFeatures::dump() const {
+  print(dbgs());
+}
+
+/// getDefaultSubtargetFeatures - Return a string listing the features
+/// associated with the target triple.
+///
+/// FIXME: This is an inelegant way of specifying the features of a
+/// subtarget. It would be better if we could encode this information
+/// into the IR. See <rdar://5972456>.
+///
+void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
+  if (Triple.getVendor() == Triple::Apple) {
+    if (Triple.getArch() == Triple::ppc) {
+      // powerpc-apple-*
+      AddFeature("altivec");
+    } else if (Triple.getArch() == Triple::ppc64) {
+      // powerpc64-apple-*
+      AddFeature("64bit");
+      AddFeature("altivec");
+    }
+  }
+}
diff --git a/contrib/llvm/lib/MC/TargetAsmBackend.cpp b/contrib/llvm/lib/MC/TargetAsmBackend.cpp
new file mode 100644
index 000000000000..192755742535
--- /dev/null
+++ b/contrib/llvm/lib/MC/TargetAsmBackend.cpp
@@ -0,0 +1,37 @@
+//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+TargetAsmBackend::TargetAsmBackend()
+  : HasReliableSymbolDifference(false)
+{
+}
+
+TargetAsmBackend::~TargetAsmBackend() {
+}
+
+const MCFixupKindInfo &
+TargetAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  static const MCFixupKindInfo Builtins[] = {
+    { "FK_Data_1", 0, 8, 0 },
+    { "FK_Data_2", 0, 16, 0 },
+    { "FK_Data_4", 0, 32, 0 },
+    { "FK_Data_8", 0, 64, 0 },
+    { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }
+  };
+  
+  assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
+         "Unknown fixup kind");
+  return Builtins[Kind];
+}
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..101237aabb02
--- /dev/null
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -0,0 +1,887 @@
+//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file writer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFObjectWriter"
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCSectionCOFF.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "llvm/Support/TimeValue.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+namespace {
+typedef llvm::SmallString<COFF::NameSize> name;
+
+enum AuxiliaryType {
+  ATFunctionDefinition,
+  ATbfAndefSymbol,
+  ATWeakExternal,
+  ATFile,
+  ATSectionDefinition
+};
+
+struct AuxSymbol {
+  AuxiliaryType   AuxType;
+  COFF::Auxiliary Aux;
+};
+
+class COFFSymbol;
+class COFFSection;
+
+class COFFSymbol {
+public:
+  COFF::symbol Data;
+
+  typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+
+  name             Name;
+  int              Index;
+  AuxiliarySymbols Aux;
+  COFFSymbol      *Other;
+  COFFSection     *Section;
+  int              Relocations;
+
+  MCSymbolData const *MCData;
+
+  COFFSymbol(llvm::StringRef name);
+  size_t size() const;
+  void set_name_offset(uint32_t Offset);
+
+  bool should_keep() const;
+};
+
+// This class contains staging data for a COFF relocation entry.
+struct COFFRelocation {
+  COFF::relocation Data;
+  COFFSymbol          *Symb;
+
+  COFFRelocation() : Symb(NULL) {}
+  static size_t size() { return COFF::RelocationSize; }
+};
+
+typedef std::vector<COFFRelocation> relocations;
+
+class COFFSection {
+public:
+  COFF::section Header;
+
+  std::string          Name;
+  int                  Number;
+  MCSectionData const *MCData;
+  COFFSymbol          *Symbol;
+  relocations          Relocations;
+
+  COFFSection(llvm::StringRef name);
+  static size_t size();
+};
+
+// This class holds the COFF string table.
+class StringTable {
+  typedef llvm::StringMap<size_t> map;
+  map Map;
+
+  void update_length();
+public:
+  std::vector<char> Data;
+
+  StringTable();
+  size_t size() const;
+  size_t insert(llvm::StringRef String);
+};
+
+class WinCOFFObjectWriter : public MCObjectWriter {
+public:
+
+  typedef std::vector<COFFSymbol*>  symbols;
+  typedef std::vector<COFFSection*> sections;
+
+  typedef DenseMap<MCSymbol  const *, COFFSymbol *>   symbol_map;
+  typedef DenseMap<MCSection const *, COFFSection *> section_map;
+
+  // Root level file contents.
+  bool Is64Bit;
+  COFF::header Header;
+  sections     Sections;
+  symbols      Symbols;
+  StringTable  Strings;
+
+  // Maps used during object file creation.
+  section_map SectionMap;
+  symbol_map  SymbolMap;
+
+  WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+  ~WinCOFFObjectWriter();
+
+  COFFSymbol *createSymbol(StringRef Name);
+  COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
+  COFFSection *createSection(StringRef Name);
+
+  template <typename object_t, typename list_t>
+  object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
+
+  void DefineSection(MCSectionData const &SectionData);
+  void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
+
+  void MakeSymbolReal(COFFSymbol &S, size_t Index);
+  void MakeSectionReal(COFFSection &S, size_t Number);
+
+  bool ExportSection(COFFSection const *S);
+  bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+
+  bool IsPhysicalSection(COFFSection *S);
+
+  // Entity writing methods.
+
+  void WriteFileHeader(const COFF::header &Header);
+  void WriteSymbol(const COFFSymbol *S);
+  void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
+  void WriteSectionHeader(const COFF::section &S);
+  void WriteRelocation(const COFF::relocation &R);
+
+  // MCObjectWriter interface implementation.
+
+  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+  void RecordRelocation(const MCAssembler &Asm,
+                        const MCAsmLayout &Layout,
+                        const MCFragment *Fragment,
+                        const MCFixup &Fixup,
+                        MCValue Target,
+                        uint64_t &FixedValue);
+
+  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+}
+
+static inline void write_uint32_le(void *Data, uint32_t const &Value) {
+  uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+  Ptr[0] = (Value & 0x000000FF) >>  0;
+  Ptr[1] = (Value & 0x0000FF00) >>  8;
+  Ptr[2] = (Value & 0x00FF0000) >> 16;
+  Ptr[3] = (Value & 0xFF000000) >> 24;
+}
+
+static inline void write_uint16_le(void *Data, uint16_t const &Value) {
+  uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+  Ptr[0] = (Value & 0x00FF) >> 0;
+  Ptr[1] = (Value & 0xFF00) >> 8;
+}
+
+static inline void write_uint8_le(void *Data, uint8_t const &Value) {
+  uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+  Ptr[0] = (Value & 0xFF) >> 0;
+}
+
+//------------------------------------------------------------------------------
+// Symbol class implementation
+
+COFFSymbol::COFFSymbol(llvm::StringRef name)
+  : Name(name.begin(), name.end())
+  , Other(NULL)
+  , Section(NULL)
+  , Relocations(0)
+  , MCData(NULL) {
+  memset(&Data, 0, sizeof(Data));
+}
+
+size_t COFFSymbol::size() const {
+  return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize);
+}
+
+// In the case that the name does not fit within 8 bytes, the offset
+// into the string table is stored in the last 4 bytes instead, leaving
+// the first 4 bytes as 0.
+void COFFSymbol::set_name_offset(uint32_t Offset) {
+  write_uint32_le(Data.Name + 0, 0);
+  write_uint32_le(Data.Name + 4, Offset);
+}
+
+/// logic to decide if the symbol should be reported in the symbol table
+bool COFFSymbol::should_keep() const {
+  // no section means its external, keep it
+  if (Section == NULL)
+    return true;
+
+  // if it has relocations pointing at it, keep it
+  if (Relocations > 0)   {
+    assert(Section->Number != -1 && "Sections with relocations must be real!");
+    return true;
+  }
+
+  // if the section its in is being droped, drop it
+  if (Section->Number == -1)
+      return false;
+
+  // if it is the section symbol, keep it
+  if (Section->Symbol == this)
+    return true;
+
+  // if its temporary, drop it
+  if (MCData && MCData->getSymbol().isTemporary())
+      return false;
+
+  // otherwise, keep it
+  return true;
+}
+
+//------------------------------------------------------------------------------
+// Section class implementation
+
+COFFSection::COFFSection(llvm::StringRef name)
+  : Name(name)
+  , MCData(NULL)
+  , Symbol(NULL) {
+  memset(&Header, 0, sizeof(Header));
+}
+
+size_t COFFSection::size() {
+  return COFF::SectionSize;
+}
+
+//------------------------------------------------------------------------------
+// StringTable class implementation
+
+/// Write the length of the string table into Data.
+/// The length of the string table includes uint32 length header.
+void StringTable::update_length() {
+  write_uint32_le(&Data.front(), Data.size());
+}
+
+StringTable::StringTable() {
+  // The string table data begins with the length of the entire string table
+  // including the length header. Allocate space for this header.
+  Data.resize(4);
+}
+
+size_t StringTable::size() const {
+  return Data.size();
+}
+
+/// Add String to the table iff it is not already there.
+/// @returns the index into the string table where the string is now located.
+size_t StringTable::insert(llvm::StringRef String) {
+  map::iterator i = Map.find(String);
+
+  if (i != Map.end())
+    return i->second;
+
+  size_t Offset = Data.size();
+
+  // Insert string data into string table.
+  Data.insert(Data.end(), String.begin(), String.end());
+  Data.push_back('\0');
+
+  // Put a reference to it in the map.
+  Map[String] = Offset;
+
+  // Update the internal length field.
+  update_length();
+
+  return Offset;
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter class implementation
+
+WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit)
+  : MCObjectWriter(OS, true)
+  , Is64Bit(is64Bit) {
+  memset(&Header, 0, sizeof(Header));
+
+  Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64
+          : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+WinCOFFObjectWriter::~WinCOFFObjectWriter() {
+  for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I)
+    delete *I;
+  for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I)
+    delete *I;
+}
+
+COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
+  return createCOFFEntity<COFFSymbol>(Name, Symbols);
+}
+
+COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){
+  symbol_map::iterator i = SymbolMap.find(Symbol);
+  if (i != SymbolMap.end())
+    return i->second;
+  COFFSymbol *RetSymbol
+    = createCOFFEntity<COFFSymbol>(Symbol->getName(), Symbols);
+  SymbolMap[Symbol] = RetSymbol;
+  return RetSymbol;
+}
+
+COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+  return createCOFFEntity<COFFSection>(Name, Sections);
+}
+
+/// A template used to lookup or create a symbol/section, and initialize it if
+/// needed.
+template <typename object_t, typename list_t>
+object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
+                                                list_t &List) {
+  object_t *Object = new object_t(Name);
+
+  List.push_back(Object);
+
+  return Object;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF section staging object.
+void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+  assert(SectionData.getSection().getVariant() == MCSection::SV_COFF
+    && "Got non COFF section in the COFF backend!");
+  // FIXME: Not sure how to verify this (at least in a debug build).
+  MCSectionCOFF const &Sec =
+    static_cast<MCSectionCOFF const &>(SectionData.getSection());
+
+  COFFSection *coff_section = createSection(Sec.getSectionName());
+  COFFSymbol  *coff_symbol = createSymbol(Sec.getSectionName());
+
+  coff_section->Symbol = coff_symbol;
+  coff_symbol->Section = coff_section;
+  coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
+
+  // In this case the auxiliary symbol is a Section Definition.
+  coff_symbol->Aux.resize(1);
+  memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+  coff_symbol->Aux[0].AuxType = ATSectionDefinition;
+  coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
+
+  coff_section->Header.Characteristics = Sec.getCharacteristics();
+
+  uint32_t &Characteristics = coff_section->Header.Characteristics;
+  switch (SectionData.getAlignment()) {
+  case 1:    Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES;    break;
+  case 2:    Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES;    break;
+  case 4:    Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES;    break;
+  case 8:    Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES;    break;
+  case 16:   Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES;   break;
+  case 32:   Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES;   break;
+  case 64:   Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES;   break;
+  case 128:  Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES;  break;
+  case 256:  Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES;  break;
+  case 512:  Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES;  break;
+  case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break;
+  case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break;
+  case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break;
+  case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break;
+  default:
+    llvm_unreachable("unsupported section alignment");
+  }
+
+  // Bind internal COFF section to MC section.
+  coff_section->MCData = &SectionData;
+  SectionMap[&SectionData.getSection()] = coff_section;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF symbol staging object.
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
+                                       MCAssembler &Assembler) {
+  COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&SymbolData.getSymbol());
+
+  coff_symbol->Data.Type         = (SymbolData.getFlags() & 0x0000FFFF) >>  0;
+  coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+
+  if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
+    coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+    if (SymbolData.getSymbol().isVariable()) {
+      coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+      const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+
+      // FIXME: This assert message isn't very good.
+      assert(Value->getKind() == MCExpr::SymbolRef &&
+              "Value must be a SymbolRef!");
+
+      const MCSymbolRefExpr *SymbolRef =
+        static_cast<const MCSymbolRefExpr *>(Value);
+      coff_symbol->Other = GetOrCreateCOFFSymbol(&SymbolRef->getSymbol());
+    } else {
+      std::string WeakName = std::string(".weak.")
+                           +  SymbolData.getSymbol().getName().str()
+                           + ".default";
+      COFFSymbol *WeakDefault = createSymbol(WeakName);
+      WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+      WeakDefault->Data.StorageClass  = COFF::IMAGE_SYM_CLASS_EXTERNAL;
+      WeakDefault->Data.Type          = 0;
+      WeakDefault->Data.Value         = 0;
+      coff_symbol->Other = WeakDefault;
+    }
+
+    // Setup the Weak External auxiliary symbol.
+    coff_symbol->Aux.resize(1);
+    memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+    coff_symbol->Aux[0].AuxType = ATWeakExternal;
+    coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+    coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+      COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+  }
+
+  // If no storage class was specified in the streamer, define it here.
+  if (coff_symbol->Data.StorageClass == 0) {
+    bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+
+    coff_symbol->Data.StorageClass =
+      external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+  }
+
+  if (SymbolData.Fragment != NULL)
+    coff_symbol->Section =
+      SectionMap[&SymbolData.Fragment->getParent()->getSection()];
+
+  // Bind internal COFF symbol to MC symbol.
+  coff_symbol->MCData = &SymbolData;
+  SymbolMap[&SymbolData.getSymbol()] = coff_symbol;
+}
+
+/// making a section real involves assigned it a number and putting
+/// name into the string table if needed
+void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
+  if (S.Name.size() > COFF::NameSize) {
+    size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+    // FIXME: Why is this number 999999? This number is never mentioned in the
+    // spec. I'm assuming this is due to the printed value needing to fit into
+    // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+    // 6)? The spec does not state if this entry should be null terminated in
+    // this case, and thus this seems to be the best way to do it. I think I
+    // just solved my own FIXME...
+    if (StringTableEntry > 999999)
+      report_fatal_error("COFF string table is greater than 999999 bytes.");
+
+    std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry));
+  } else
+    std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+
+  S.Number = Number;
+  S.Symbol->Data.SectionNumber = S.Number;
+  S.Symbol->Aux[0].Aux.SectionDefinition.Number = S.Number;
+}
+
+void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
+  if (S.Name.size() > COFF::NameSize) {
+    size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+    S.set_name_offset(StringTableEntry);
+  } else
+    std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+  S.Index = Index;
+}
+
+bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) {
+  return !S->MCData->getFragmentList().empty();
+}
+
+bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
+                                       MCAssembler &Asm) {
+  // This doesn't seem to be right. Strings referred to from the .data section
+  // need symbols so they can be linked to code in the .text section right?
+
+  // return Asm.isSymbolLinkerVisible (&SymbolData);
+
+  // For now, all non-variable symbols are exported,
+  // the linker will sort the rest out for us.
+  return SymbolData.isExternal() || !SymbolData.getSymbol().isVariable();
+}
+
+bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
+  return (S->Header.Characteristics
+         & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+}
+
+//------------------------------------------------------------------------------
+// entity writing methods
+
+void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
+  WriteLE16(Header.Machine);
+  WriteLE16(Header.NumberOfSections);
+  WriteLE32(Header.TimeDateStamp);
+  WriteLE32(Header.PointerToSymbolTable);
+  WriteLE32(Header.NumberOfSymbols);
+  WriteLE16(Header.SizeOfOptionalHeader);
+  WriteLE16(Header.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) {
+  WriteBytes(StringRef(S->Data.Name, COFF::NameSize));
+  WriteLE32(S->Data.Value);
+  WriteLE16(S->Data.SectionNumber);
+  WriteLE16(S->Data.Type);
+  Write8(S->Data.StorageClass);
+  Write8(S->Data.NumberOfAuxSymbols);
+  WriteAuxiliarySymbols(S->Aux);
+}
+
+void WinCOFFObjectWriter::WriteAuxiliarySymbols(
+                                        const COFFSymbol::AuxiliarySymbols &S) {
+  for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
+      i != e; ++i) {
+    switch(i->AuxType) {
+    case ATFunctionDefinition:
+      WriteLE32(i->Aux.FunctionDefinition.TagIndex);
+      WriteLE32(i->Aux.FunctionDefinition.TotalSize);
+      WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
+      WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
+      WriteZeros(sizeof(i->Aux.FunctionDefinition.unused));
+      break;
+    case ATbfAndefSymbol:
+      WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1));
+      WriteLE16(i->Aux.bfAndefSymbol.Linenumber);
+      WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2));
+      WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
+      WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3));
+      break;
+    case ATWeakExternal:
+      WriteLE32(i->Aux.WeakExternal.TagIndex);
+      WriteLE32(i->Aux.WeakExternal.Characteristics);
+      WriteZeros(sizeof(i->Aux.WeakExternal.unused));
+      break;
+    case ATFile:
+      WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName),
+                 sizeof(i->Aux.File.FileName)));
+      break;
+    case ATSectionDefinition:
+      WriteLE32(i->Aux.SectionDefinition.Length);
+      WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
+      WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
+      WriteLE32(i->Aux.SectionDefinition.CheckSum);
+      WriteLE16(i->Aux.SectionDefinition.Number);
+      Write8(i->Aux.SectionDefinition.Selection);
+      WriteZeros(sizeof(i->Aux.SectionDefinition.unused));
+      break;
+    }
+  }
+}
+
+void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) {
+  WriteBytes(StringRef(S.Name, COFF::NameSize));
+
+  WriteLE32(S.VirtualSize);
+  WriteLE32(S.VirtualAddress);
+  WriteLE32(S.SizeOfRawData);
+  WriteLE32(S.PointerToRawData);
+  WriteLE32(S.PointerToRelocations);
+  WriteLE32(S.PointerToLineNumbers);
+  WriteLE16(S.NumberOfRelocations);
+  WriteLE16(S.NumberOfLineNumbers);
+  WriteLE32(S.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
+  WriteLE32(R.VirtualAddress);
+  WriteLE32(R.SymbolTableIndex);
+  WriteLE16(R.Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MCObjectWriter interface implementations
+
+void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                                   const MCAsmLayout &Layout) {
+  // "Define" each section & symbol. This creates section & symbol
+  // entries in the staging area.
+
+  for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
+    DefineSection(*i);
+
+  for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
+                                          e = Asm.symbol_end(); i != e; i++) {
+    if (ExportSymbol(*i, Asm))
+      DefineSymbol(*i, Asm);
+  }
+}
+
+void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCFragment *Fragment,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
+                                           uint64_t &FixedValue) {
+  assert(Target.getSymA() != NULL && "Relocation must reference a symbol!");
+
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData &A_SD = Asm.getSymbolData(*A);
+
+  MCSectionData const *SectionData = Fragment->getParent();
+
+  // Mark this symbol as requiring an entry in the symbol table.
+  assert(SectionMap.find(&SectionData->getSection()) != SectionMap.end() &&
+         "Section must already have been defined in ExecutePostLayoutBinding!");
+  assert(SymbolMap.find(&A_SD.getSymbol()) != SymbolMap.end() &&
+         "Symbol must already have been defined in ExecutePostLayoutBinding!");
+
+  COFFSection *coff_section = SectionMap[&SectionData->getSection()];
+  COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
+  const MCSymbolRefExpr *SymA = Target.getSymA();
+  const MCSymbolRefExpr *SymB = Target.getSymB();
+  const bool CrossSection = SymB &&
+    &SymA->getSymbol().getSection() != &SymB->getSymbol().getSection();
+
+  if (Target.getSymB()) {
+    const MCSymbol *B = &Target.getSymB()->getSymbol();
+    MCSymbolData &B_SD = Asm.getSymbolData(*B);
+
+    // Offset of the symbol in the section
+    int64_t a = Layout.getSymbolOffset(&B_SD);
+
+    // Ofeset of the relocation in the section
+    int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
+    FixedValue = b - a;
+    // In the case where we have SymbA and SymB, we just need to store the delta
+    // between the two symbols.  Update FixedValue to account for the delta, and
+    // skip recording the relocation.
+    if (!CrossSection)
+      return;
+  } else {
+    FixedValue = Target.getConstant();
+  }
+
+  COFFRelocation Reloc;
+
+  Reloc.Data.SymbolTableIndex = 0;
+  Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
+
+  // Turn relocations for temporary symbols into section relocations.
+  if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) {
+    Reloc.Symb = coff_symbol->Section->Symbol;
+    FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
+                + coff_symbol->MCData->getOffset();
+  } else
+    Reloc.Symb = coff_symbol;
+
+  ++Reloc.Symb->Relocations;
+
+  Reloc.Data.VirtualAddress += Fixup.getOffset();
+
+  unsigned FixupKind = Fixup.getKind();
+
+  if (CrossSection)
+    FixupKind = FK_PCRel_4;
+
+  switch (FixupKind) {
+  case FK_PCRel_4:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+    Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
+                              : COFF::IMAGE_REL_I386_REL32;
+    // FIXME: Can anyone explain what this does other than adjust for the size
+    // of the offset?
+    FixedValue += 4;
+    break;
+  case FK_Data_4:
+  case X86::reloc_signed_4byte:
+    Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
+                              : COFF::IMAGE_REL_I386_DIR32;
+    break;
+  case FK_Data_8:
+    if (Is64Bit)
+      Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64;
+    else
+      llvm_unreachable("unsupported relocation type");
+    break;
+  default:
+    llvm_unreachable("unsupported relocation type");
+  }
+
+  coff_section->Relocations.push_back(Reloc);
+}
+
+void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
+                                      const MCAsmLayout &Layout) {
+  // Assign symbol and section indexes and offsets.
+  Header.NumberOfSections = 0;
+
+  for (sections::iterator i = Sections.begin(),
+                          e = Sections.end(); i != e; i++) {
+    if (Layout.getSectionAddressSize((*i)->MCData) > 0) {
+      MakeSectionReal(**i, ++Header.NumberOfSections);
+    } else {
+      (*i)->Number = -1;
+    }
+  }
+
+  Header.NumberOfSymbols = 0;
+
+  for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+    COFFSymbol *coff_symbol = *i;
+    MCSymbolData const *SymbolData = coff_symbol->MCData;
+
+    // Update section number & offset for symbols that have them.
+    if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
+      assert(coff_symbol->Section != NULL);
+
+      coff_symbol->Data.SectionNumber = coff_symbol->Section->Number;
+      coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
+                              + SymbolData->Offset;
+    }
+
+    if (coff_symbol->should_keep()) {
+      MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++);
+
+      // Update auxiliary symbol info.
+      coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+      Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+    } else
+      coff_symbol->Index = -1;
+  }
+
+  // Fixup weak external references.
+  for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+    COFFSymbol *coff_symbol = *i;
+    if (coff_symbol->Other != NULL) {
+      assert(coff_symbol->Index != -1);
+      assert(coff_symbol->Aux.size() == 1 &&
+             "Symbol must contain one aux symbol!");
+      assert(coff_symbol->Aux[0].AuxType == ATWeakExternal &&
+             "Symbol's aux symbol must be a Weak External!");
+      coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index;
+    }
+  }
+
+  // Assign file offsets to COFF object file structures.
+
+  unsigned offset = 0;
+
+  offset += COFF::HeaderSize;
+  offset += COFF::SectionSize * Header.NumberOfSections;
+
+  for (MCAssembler::const_iterator i = Asm.begin(),
+                                   e = Asm.end();
+                                   i != e; i++) {
+    COFFSection *Sec = SectionMap[&i->getSection()];
+
+    if (Sec->Number == -1)
+      continue;
+
+    Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i);
+
+    if (IsPhysicalSection(Sec)) {
+      Sec->Header.PointerToRawData = offset;
+
+      offset += Sec->Header.SizeOfRawData;
+    }
+
+    if (Sec->Relocations.size() > 0) {
+      Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+      Sec->Header.PointerToRelocations = offset;
+
+      offset += COFF::RelocationSize * Sec->Relocations.size();
+
+      for (relocations::iterator cr = Sec->Relocations.begin(),
+                                 er = Sec->Relocations.end();
+                                 cr != er; ++cr) {
+        assert((*cr).Symb->Index != -1);
+        (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
+      }
+    }
+
+    assert(Sec->Symbol->Aux.size() == 1
+      && "Section's symbol must have one aux!");
+    AuxSymbol &Aux = Sec->Symbol->Aux[0];
+    assert(Aux.AuxType == ATSectionDefinition &&
+           "Section's symbol's aux symbol must be a Section Definition!");
+    Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
+    Aux.Aux.SectionDefinition.NumberOfRelocations =
+                                                Sec->Header.NumberOfRelocations;
+    Aux.Aux.SectionDefinition.NumberOfLinenumbers =
+                                                Sec->Header.NumberOfLineNumbers;
+  }
+
+  Header.PointerToSymbolTable = offset;
+
+  Header.TimeDateStamp = sys::TimeValue::now().toEpochTime();
+
+  // Write it all to disk...
+  WriteFileHeader(Header);
+
+  {
+    sections::iterator i, ie;
+    MCAssembler::const_iterator j, je;
+
+    for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
+      if ((*i)->Number != -1)
+        WriteSectionHeader((*i)->Header);
+
+    for (i = Sections.begin(), ie = Sections.end(),
+         j = Asm.begin(), je = Asm.end();
+         (i != ie) && (j != je); ++i, ++j) {
+
+      if ((*i)->Number == -1)
+        continue;
+
+      if ((*i)->Header.PointerToRawData != 0) {
+        assert(OS.tell() == (*i)->Header.PointerToRawData &&
+               "Section::PointerToRawData is insane!");
+
+        Asm.WriteSectionData(j, Layout);
+      }
+
+      if ((*i)->Relocations.size() > 0) {
+        assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+               "Section::PointerToRelocations is insane!");
+
+        for (relocations::const_iterator k = (*i)->Relocations.begin(),
+                                               ke = (*i)->Relocations.end();
+                                               k != ke; k++) {
+          WriteRelocation(k->Data);
+        }
+      } else
+        assert((*i)->Header.PointerToRelocations == 0 &&
+               "Section::PointerToRelocations is insane!");
+    }
+  }
+
+  assert(OS.tell() == Header.PointerToSymbolTable &&
+         "Header::PointerToSymbolTable is insane!");
+
+  for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
+    if ((*i)->Index != -1)
+      WriteSymbol(*i);
+
+  OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter factory function
+
+namespace llvm {
+  MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) {
+    return new WinCOFFObjectWriter(OS, is64Bit);
+  }
+}
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
new file mode 100644
index 000000000000..6c36c1231c83
--- /dev/null
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -0,0 +1,406 @@
+//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file streamer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFStreamer"
+
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringMap.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class WinCOFFStreamer : public MCObjectStreamer {
+public:
+  MCSymbol const *CurSymbol;
+
+  WinCOFFStreamer(MCContext &Context,
+                  TargetAsmBackend &TAB,
+                  MCCodeEmitter &CE,
+                  raw_ostream &OS);
+
+  void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                       unsigned ByteAlignment, bool External);
+
+  // MCStreamer interface
+
+  virtual void InitSections();
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                            unsigned Size,unsigned ByteAlignment);
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment);
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                   unsigned ValueSize, unsigned MaxBytesToEmit);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit);
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual void EmitInstruction(const MCInst &Instruction);
+  virtual void EmitWin64EHHandlerData();
+  virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst) {
+    llvm_unreachable("Not used by WinCOFF.");
+  }
+  virtual void EmitInstToData(const MCInst &Inst) {
+    llvm_unreachable("Not used by WinCOFF.");
+  }
+
+  void SetSection(StringRef Section,
+                  unsigned Characteristics,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getCOFFSection(Section, Characteristics, Kind));
+  }
+
+  void SetSectionText() {
+    SetSection(".text",
+               COFF::IMAGE_SCN_CNT_CODE
+             | COFF::IMAGE_SCN_MEM_EXECUTE
+             | COFF::IMAGE_SCN_MEM_READ,
+               SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+
+  void SetSectionData() {
+    SetSection(".data",
+               COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ
+             | COFF::IMAGE_SCN_MEM_WRITE,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+
+  void SetSectionBSS() {
+    SetSection(".bss",
+               COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ
+             | COFF::IMAGE_SCN_MEM_WRITE,
+               SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+
+};
+} // end anonymous namespace.
+
+WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
+                                 TargetAsmBackend &TAB,
+                                 MCCodeEmitter &CE,
+                                 raw_ostream &OS)
+    : MCObjectStreamer(Context, TAB, OS, &CE)
+    , CurSymbol(NULL) {
+}
+
+void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                      unsigned ByteAlignment, bool External) {
+  assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+
+  std::string SectionName(".bss$linkonce");
+  SectionName.append(Symbol->getName().begin(), Symbol->getName().end());
+
+  MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  unsigned Characteristics =
+    COFF::IMAGE_SCN_LNK_COMDAT |
+    COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+    COFF::IMAGE_SCN_MEM_READ |
+    COFF::IMAGE_SCN_MEM_WRITE;
+
+  int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
+
+  const MCSection *Section = MCStreamer::getContext().getCOFFSection(
+    SectionName, Characteristics, Selection, SectionKind::getBSS());
+
+  MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
+
+  if (SectionData.getAlignment() < ByteAlignment)
+    SectionData.setAlignment(ByteAlignment);
+
+  SymbolData.setExternal(External);
+
+  Symbol->setSection(*Section);
+
+  if (ByteAlignment != 1)
+      new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
+
+  SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData));
+}
+
+// MCStreamer interface
+
+void WinCOFFStreamer::InitSections() {
+  SetSectionText();
+  SetSectionData();
+  SetSectionBSS();
+  SetSectionText();
+}
+
+void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  MCObjectStreamer::EmitLabel(Symbol);
+}
+
+void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  // FIXME: This is all very ugly and depressing. What needs to happen here
+  // depends on quite a few things that are all part of relaxation, which we
+  // don't really even do.
+
+  if (Value->getKind() != MCExpr::SymbolRef) {
+    // TODO: This is exactly the same as MachOStreamer. Consider merging into
+    // MCObjectStreamer.
+    getAssembler().getOrCreateSymbolData(*Symbol);
+    AddValueSymbols(Value);
+    Symbol->setVariableValue(Value);
+  } else {
+    // FIXME: This is a horrible way to do this :(. This should really be
+    // handled after we are done with the MC* objects and immediately before
+    // writing out the object file when we know exactly what the symbol should
+    // look like in the coff symbol table. I'm not doing that now because the
+    // COFF object writer doesn't have a clearly defined separation between MC
+    // data structures, the object writers data structures, and the raw, POD,
+    // data structures that get written to disk.
+
+    // Copy over the aliased data.
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+    const MCSymbolData &RealSD = getAssembler().getOrCreateSymbolData(
+      dyn_cast<const MCSymbolRefExpr>(Value)->getSymbol());
+
+    // FIXME: This is particularly nasty because it breaks as soon as any data
+    // members of MCSymbolData change.
+    SD.CommonAlign     = RealSD.CommonAlign;
+    SD.CommonSize      = RealSD.CommonSize;
+    SD.Flags           = RealSD.Flags;
+    SD.Fragment        = RealSD.Fragment;
+    SD.Index           = RealSD.Index;
+    SD.IsExternal      = RealSD.IsExternal;
+    SD.IsPrivateExtern = RealSD.IsPrivateExtern;
+    SD.Offset          = RealSD.Offset;
+    SD.SymbolSize      = RealSD.SymbolSize;
+  }
+}
+
+void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          MCSymbolAttr Attribute) {
+  assert(Symbol && "Symbol must be non-null!");
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  switch (Attribute) {
+  case MCSA_WeakReference:
+  case MCSA_Weak: {
+      MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+      SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
+      SD.setExternal(true);
+    }
+    break;
+
+  case MCSA_Global:
+    getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true);
+    break;
+
+  default:
+    llvm_unreachable("unsupported attribute");
+    break;
+  }
+}
+
+void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
+                              "to BeginCOFFSymbolDef!");
+  CurSymbol = Symbol;
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+  assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+  assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in "
+                                        "the first byte!");
+
+  getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+    StorageClass << COFF::SF_ClassShift,
+    COFF::SF_ClassMask);
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+  assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+  assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 "
+                                  "bytes");
+
+  getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+    Type << COFF::SF_TypeShift,
+    COFF::SF_TypeMask);
+}
+
+void WinCOFFStreamer::EndCOFFSymbolDef() {
+  assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+  CurSymbol = NULL;
+}
+
+void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                       unsigned ByteAlignment) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  AddCommonSymbol(Symbol, Size, ByteAlignment, true);
+}
+
+void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  AddCommonSymbol(Symbol, Size, 1, false);
+}
+
+void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                   unsigned Size,unsigned ByteAlignment) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                     uint64_t Size, unsigned ByteAlignment) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+  // MCObjectStreamer?
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                           int64_t Value,
+                                           unsigned ValueSize,
+                                           unsigned MaxBytesToEmit) {
+  // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+  // MCObjectStreamer?
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                        unsigned MaxBytesToEmit) {
+  // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+  // MCObjectStreamer?
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
+  // Ignore for now, linkers don't care, and proper debug
+  // info will be a much large effort.
+}
+
+void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
+  for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
+    if (Instruction.getOperand(i).isExpr())
+      AddValueSymbols(Instruction.getOperand(i).getExpr());
+
+  getCurrentSectionData()->setHasInstructions(true);
+
+  MCInstFragment *Fragment =
+    new MCInstFragment(Instruction, getCurrentSectionData());
+
+  raw_svector_ostream VecOS(Fragment->getCode());
+
+  getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS,
+                                                Fragment->getFixups());
+}
+
+void WinCOFFStreamer::EmitWin64EHHandlerData() {
+  MCStreamer::EmitWin64EHHandlerData();
+
+  // We have to emit the unwind info now, because this directive
+  // actually switches to the .xdata section!
+  MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
+}
+
+void WinCOFFStreamer::Finish() {
+  EmitW64Tables();
+  MCObjectStreamer::Finish();
+}
+
+namespace llvm
+{
+  MCStreamer *createWinCOFFStreamer(MCContext &Context,
+                                    TargetAsmBackend &TAB,
+                                    MCCodeEmitter &CE,
+                                    raw_ostream &OS,
+                                    bool RelaxAll) {
+    WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS);
+    S->getAssembler().setRelaxAll(RelaxAll);
+    return S;
+  }
+}
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
new file mode 100644
index 000000000000..4b31c7557dd3
--- /dev/null
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -0,0 +1,96 @@
+//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Binary class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Binary.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+// Include headers for createBinary.
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/COFF.h"
+
+using namespace llvm;
+using namespace object;
+
+Binary::~Binary() {
+  delete Data;
+}
+
+Binary::Binary(unsigned int Type, MemoryBuffer *Source)
+  : TypeID(Type)
+  , Data(Source) {}
+
+StringRef Binary::getData() const {
+  return Data->getBuffer();
+}
+
+StringRef Binary::getFileName() const {
+  return Data->getBufferIdentifier();
+}
+
+error_code object::createBinary(MemoryBuffer *Source,
+                                OwningPtr<Binary> &Result) {
+  OwningPtr<MemoryBuffer> scopedSource(Source);
+  if (!Source)
+    return make_error_code(errc::invalid_argument);
+  if (Source->getBufferSize() < 64)
+    return object_error::invalid_file_type;
+  sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(),
+                                static_cast<unsigned>(Source->getBufferSize()));
+  error_code ec;
+  switch (type) {
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_Executable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::ELF_Core_FileType: {
+      OwningPtr<Binary> ret(
+        ObjectFile::createELFObjectFile(scopedSource.take()));
+      if (!ret)
+        return object_error::invalid_file_type;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_Executable_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_Core_FileType:
+    case sys::Mach_O_PreloadExecutable_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicLinker_FileType:
+    case sys::Mach_O_Bundle_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: {
+      OwningPtr<Binary> ret(
+        ObjectFile::createMachOObjectFile(scopedSource.take()));
+      if (!ret)
+        return object_error::invalid_file_type;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    case sys::COFF_FileType: {
+      OwningPtr<Binary> ret(new COFFObjectFile(scopedSource.take(), ec));
+      if (ec) return ec;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    default: // Unrecognized object file format.
+      return object_error::invalid_file_type;
+  }
+}
+
+error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(Path, File))
+    return ec;
+  return createBinary(File.take(), Result);
+}
diff --git a/contrib/llvm/lib/Object/CMakeLists.txt b/contrib/llvm/lib/Object/CMakeLists.txt
new file mode 100644
index 000000000000..6a6814fd37d9
--- /dev/null
+++ b/contrib/llvm/lib/Object/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMObject
+  MachOObject.cpp
+  ObjectFile.cpp
+  COFFObjectFile.cpp
+  ELFObjectFile.cpp
+  )
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
new file mode 100644
index 000000000000..07de6bc99973
--- /dev/null
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -0,0 +1,455 @@
+//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the COFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/COFF.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+using support::ulittle8_t;
+using support::ulittle16_t;
+using support::ulittle32_t;
+using support::little16_t;
+}
+
+namespace {
+// Returns false if size is greater than the buffer size. And sets ec.
+bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) {
+  if (m->getBufferSize() < size) {
+    ec = object_error::unexpected_eof;
+    return false;
+  }
+  return true;
+}
+
+// Returns false if any bytes in [addr, addr + size) fall outsize of m.
+bool checkAddr(const MemoryBuffer *m,
+               error_code &ec,
+               uintptr_t addr,
+               uint64_t size) {
+  if (addr + size < addr ||
+      addr + size < size ||
+      addr + size > uintptr_t(m->getBufferEnd())) {
+    ec = object_error::unexpected_eof;
+    return false;
+  }
+  return true;
+}
+}
+
+const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const {
+  const coff_symbol *addr = reinterpret_cast<const coff_symbol*>(Symb.p);
+
+# ifndef NDEBUG
+  // Verify that the symbol points to a valid entry in the symbol table.
+  uintptr_t offset = uintptr_t(addr) - uintptr_t(base());
+  if (offset < Header->PointerToSymbolTable
+      || offset >= Header->PointerToSymbolTable
+         + (Header->NumberOfSymbols * sizeof(coff_symbol)))
+    report_fatal_error("Symbol was outside of symbol table.");
+
+  assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol)
+         == 0 && "Symbol did not point to the beginning of a symbol");
+# endif
+
+  return addr;
+}
+
+const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const {
+  const coff_section *addr = reinterpret_cast<const coff_section*>(Sec.p);
+
+# ifndef NDEBUG
+  // Verify that the section points to a valid entry in the section table.
+  if (addr < SectionTable
+      || addr >= (SectionTable + Header->NumberOfSections))
+    report_fatal_error("Section was outside of section table.");
+
+  uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable);
+  assert(offset % sizeof(coff_section) == 0 &&
+         "Section did not point to the beginning of a section");
+# endif
+
+  return addr;
+}
+
+error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
+                                         SymbolRef &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  symb += 1 + symb->NumberOfAuxSymbols;
+  Symb.p = reinterpret_cast<uintptr_t>(symb);
+  Result = SymbolRef(Symb, this);
+  return object_error::success;
+}
+
+ error_code COFFObjectFile::getSymbolName(DataRefImpl Symb,
+                                          StringRef &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  // Check for string table entry. First 4 bytes are 0.
+  if (symb->Name.Offset.Zeroes == 0) {
+    uint32_t Offset = symb->Name.Offset.Offset;
+    if (error_code ec = getString(Offset, Result))
+      return ec;
+    return object_error::success;
+  }
+
+  if (symb->Name.ShortName[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    Result = StringRef(symb->Name.ShortName);
+  else
+    // Not null terminated, use all 8 bytes.
+    Result = StringRef(symb->Name.ShortName, 8);
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
+                                            uint64_t &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  const coff_section *Section = NULL;
+  if (error_code ec = getSection(symb->SectionNumber, Section))
+    return ec;
+  char Type;
+  if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+    return ec;
+  if (Type == 'U' || Type == 'w')
+    Result = UnknownAddressOrSize;
+  else if (Section)
+    Result = Section->VirtualAddress + symb->Value;
+  else
+    Result = symb->Value;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
+                                         uint64_t &Result) const {
+  // FIXME: Return the correct size. This requires looking at all the symbols
+  //        in the same section as this symbol, and looking for either the next
+  //        symbol, or the end of the section.
+  const coff_symbol *symb = toSymb(Symb);
+  const coff_section *Section = NULL;
+  if (error_code ec = getSection(symb->SectionNumber, Section))
+    return ec;
+  char Type;
+  if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+    return ec;
+  if (Type == 'U' || Type == 'w')
+    Result = UnknownAddressOrSize;
+  else if (Section)
+    Result = Section->SizeOfRawData - symb->Value;
+  else
+    Result = 0;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+                                               char &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  StringRef name;
+  if (error_code ec = getSymbolName(Symb, name))
+    return ec;
+  char ret = StringSwitch<char>(name)
+    .StartsWith(".debug", 'N')
+    .StartsWith(".sxdata", 'N')
+    .Default('?');
+
+  if (ret != '?') {
+    Result = ret;
+    return object_error::success;
+  }
+
+  uint32_t Characteristics = 0;
+  if (symb->SectionNumber > 0) {
+    const coff_section *Section = NULL;
+    if (error_code ec = getSection(symb->SectionNumber, Section))
+      return ec;
+    Characteristics = Section->Characteristics;
+  }
+
+  switch (symb->SectionNumber) {
+  case COFF::IMAGE_SYM_UNDEFINED:
+    // Check storage classes.
+    if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
+      Result = 'w';
+      return object_error::success; // Don't do ::toupper.
+    } else
+      ret = 'u';
+    break;
+  case COFF::IMAGE_SYM_ABSOLUTE:
+    ret = 'a';
+    break;
+  case COFF::IMAGE_SYM_DEBUG:
+    ret = 'n';
+    break;
+  default:
+    // Check section type.
+    if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+      ret = 't';
+    else if (  Characteristics & COFF::IMAGE_SCN_MEM_READ
+            && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+      ret = 'r';
+    else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+      ret = 'd';
+    else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+      ret = 'b';
+    else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
+      ret = 'i';
+
+    // Check for section symbol.
+    else if (  symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
+            && symb->Value == 0)
+       ret = 's';
+  }
+
+  if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+    ret = ::toupper(ret);
+
+  Result = ret;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb,
+                                            bool &Result) const {
+  Result = false;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
+                                          SectionRef &Result) const {
+  const coff_section *sec = toSec(Sec);
+  sec += 1;
+  Sec.p = reinterpret_cast<uintptr_t>(sec);
+  Result = SectionRef(Sec, this);
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
+                                          StringRef &Result) const {
+  const coff_section *sec = toSec(Sec);
+  StringRef name;
+  if (sec->Name[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    name = sec->Name;
+  else
+    // Not null terminated, use all 8 bytes.
+    name = StringRef(sec->Name, 8);
+
+  // Check for string table entry. First byte is '/'.
+  if (name[0] == '/') {
+    uint32_t Offset;
+    name.substr(1).getAsInteger(10, Offset);
+    if (error_code ec = getString(Offset, name))
+      return ec;
+  }
+
+  Result = name;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
+                                             uint64_t &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->VirtualAddress;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
+                                          uint64_t &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->SizeOfRawData;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
+                                              StringRef &Result) const {
+  const coff_section *sec = toSec(Sec);
+  // The only thing that we need to verify is that the contents is contained
+  // within the file bounds. We don't need to make sure it doesn't cover other
+  // data, as there's nothing that says that is not allowed.
+  uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
+  uintptr_t con_end = con_start + sec->SizeOfRawData;
+  if (con_end >= uintptr_t(Data->getBufferEnd()))
+    return object_error::parse_failed;
+  Result = StringRef(reinterpret_cast<const char*>(con_start),
+                     sec->SizeOfRawData);
+  return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionText(DataRefImpl Sec,
+                                         bool &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+                                                 DataRefImpl Symb,
+                                                 bool &Result) const {
+  // FIXME: Unimplemented.
+  Result = false;
+  return object_error::success;
+}
+
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
+  : ObjectFile(Binary::isCOFF, Object, ec) {
+  // Check that we at least have enough room for a header.
+  if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
+
+  // The actual starting location of the COFF header in the file. This can be
+  // non-zero in PE/COFF files.
+  uint64_t HeaderStart = 0;
+
+  // Check if this is a PE/COFF file.
+  if (base()[0] == 0x4d && base()[1] == 0x5a) {
+    // PE/COFF, seek through MS-DOS compatibility stub and 4-byte
+    // PE signature to find 'normal' COFF header.
+    if (!checkSize(Data, ec, 0x3c + 8)) return;
+    HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c);
+    // Check the PE header. ("PE\0\0")
+    if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) {
+      ec = object_error::parse_failed;
+      return;
+    }
+    HeaderStart += 4; // Skip the PE Header.
+  }
+
+  Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart);
+  if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header)))
+    return;
+  
+  SectionTable =
+    reinterpret_cast<const coff_section *>( base()
+                                          + HeaderStart
+                                          + sizeof(coff_file_header)
+                                          + Header->SizeOfOptionalHeader);
+  if (!checkAddr(Data, ec, uintptr_t(SectionTable),
+                 Header->NumberOfSections * sizeof(coff_section)))
+    return;
+
+  SymbolTable =
+    reinterpret_cast<const coff_symbol *>(base()
+                                          + Header->PointerToSymbolTable);
+  if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
+                 Header->NumberOfSymbols * sizeof(coff_symbol)))
+    return;
+
+  // Find string table.
+  StringTable = reinterpret_cast<const char *>(base())
+                + Header->PointerToSymbolTable
+                + Header->NumberOfSymbols * sizeof(coff_symbol);
+  if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
+    return;
+
+  StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
+  if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
+    return;
+  // Check that the string table is null terminated if has any in it.
+  if (StringTableSize < 4
+      || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
+    ec = object_error::parse_failed;
+    return;
+  }
+  
+  ec = object_error::success;
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
+  DataRefImpl ret;
+  std::memset(&ret, 0, sizeof(DataRefImpl));
+  ret.p = reinterpret_cast<intptr_t>(SymbolTable);
+  return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
+  // The symbol table ends where the string table begins.
+  DataRefImpl ret;
+  std::memset(&ret, 0, sizeof(DataRefImpl));
+  ret.p = reinterpret_cast<intptr_t>(StringTable);
+  return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
+  DataRefImpl ret;
+  std::memset(&ret, 0, sizeof(DataRefImpl));
+  ret.p = reinterpret_cast<intptr_t>(SectionTable);
+  return section_iterator(SectionRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::end_sections() const {
+  DataRefImpl ret;
+  std::memset(&ret, 0, sizeof(DataRefImpl));
+  ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
+  return section_iterator(SectionRef(ret, this));
+}
+
+uint8_t COFFObjectFile::getBytesInAddress() const {
+  return getArch() == Triple::x86_64 ? 8 : 4;
+}
+
+StringRef COFFObjectFile::getFileFormatName() const {
+  switch(Header->Machine) {
+  case COFF::IMAGE_FILE_MACHINE_I386:
+    return "COFF-i386";
+  case COFF::IMAGE_FILE_MACHINE_AMD64:
+    return "COFF-x86-64";
+  default:
+    return "COFF-<unknown arch>";
+  }
+}
+
+unsigned COFFObjectFile::getArch() const {
+  switch(Header->Machine) {
+  case COFF::IMAGE_FILE_MACHINE_I386:
+    return Triple::x86;
+  case COFF::IMAGE_FILE_MACHINE_AMD64:
+    return Triple::x86_64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+error_code COFFObjectFile::getSection(int32_t index,
+                                      const coff_section *&Result) const {
+  // Check for special index values.
+  if (index == COFF::IMAGE_SYM_UNDEFINED ||
+      index == COFF::IMAGE_SYM_ABSOLUTE ||
+      index == COFF::IMAGE_SYM_DEBUG)
+    Result = NULL;
+  else if (index > 0 && index <= Header->NumberOfSections)
+    // We already verified the section table data, so no need to check again.
+    Result = SectionTable + (index - 1);
+  else
+    return object_error::parse_failed;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getString(uint32_t offset,
+                                     StringRef &Result) const {
+  if (StringTableSize <= 4)
+    // Tried to get a string from an empty string table.
+    return object_error::parse_failed;
+  if (offset >= StringTableSize)
+    return object_error::unexpected_eof;
+  Result = StringRef(StringTable + offset);
+  return object_error::success;
+}
+
+namespace llvm {
+
+  ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
+    error_code ec;
+    return new COFFObjectFile(Object, ec);
+  }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp
new file mode 100644
index 000000000000..e2ff4dfc0384
--- /dev/null
+++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp
@@ -0,0 +1,740 @@
+//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <limits>
+#include <utility>
+
+using namespace llvm;
+using namespace object;
+
+// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
+namespace {
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelperCommon {
+  typedef support::detail::packed_endian_specific_integral
+    <uint16_t, target_endianness, support::aligned> Elf_Half;
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Word;
+  typedef support::detail::packed_endian_specific_integral
+    <int32_t, target_endianness, support::aligned> Elf_Sword;
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Xword;
+  typedef support::detail::packed_endian_specific_integral
+    <int64_t, target_endianness, support::aligned> Elf_Sxword;
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct ELFDataTypeTypedefHelper;
+
+/// ELF 32bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, false>
+  : ELFDataTypeTypedefHelperCommon<target_endianness> {
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Off;
+};
+
+/// ELF 64bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, true>
+  : ELFDataTypeTypedefHelperCommon<target_endianness>{
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Off;
+};
+}
+
+// I really don't like doing this, but the alternative is copypasta.
+#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
+
+  // Section header.
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word sh_name;     // Section name (index into string table)
+  Elf_Word sh_type;     // Section type (SHT_*)
+  Elf_Word sh_flags;    // Section flags (SHF_*)
+  Elf_Addr sh_addr;     // Address where section is to be loaded
+  Elf_Off  sh_offset;   // File offset of section data, in bytes
+  Elf_Word sh_size;     // Size of section, in bytes
+  Elf_Word sh_link;     // Section type-specific header table index link
+  Elf_Word sh_info;     // Section type-specific extra information
+  Elf_Word sh_addralign;// Section address alignment
+  Elf_Word sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word  sh_name;     // Section name (index into string table)
+  Elf_Word  sh_type;     // Section type (SHT_*)
+  Elf_Xword sh_flags;    // Section flags (SHF_*)
+  Elf_Addr  sh_addr;     // Address where section is to be loaded
+  Elf_Off   sh_offset;   // File offset of section data, in bytes
+  Elf_Xword sh_size;     // Size of section, in bytes
+  Elf_Word  sh_link;     // Section type-specific header table index link
+  Elf_Word  sh_info;     // Section type-specific extra information
+  Elf_Xword sh_addralign;// Section address alignment
+  Elf_Xword sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
+
+  /// @brief Get the number of entities this section contains if it has any.
+  unsigned getEntityCount() const {
+    if (sh_entsize == 0)
+      return 0;
+    return sh_size / sh_entsize;
+  }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Word      st_size;  // Size of the symbol
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+};
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Xword     st_size;  // Size of the symbol
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
+  using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
+
+  // These accessors and mutators correspond to the ELF32_ST_BIND,
+  // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+class ELFObjectFile : public ObjectFile {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+
+  typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
+  typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
+
+  struct Elf_Ehdr {
+    unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
+    Elf_Half e_type;     // Type of file (see ET_*)
+    Elf_Half e_machine;  // Required architecture for this file (see EM_*)
+    Elf_Word e_version;  // Must be equal to 1
+    Elf_Addr e_entry;    // Address to jump to in order to start program
+    Elf_Off  e_phoff;    // Program header table's file offset, in bytes
+    Elf_Off  e_shoff;    // Section header table's file offset, in bytes
+    Elf_Word e_flags;    // Processor-specific flags
+    Elf_Half e_ehsize;   // Size of ELF header, in bytes
+    Elf_Half e_phentsize;// Size of an entry in the program header table
+    Elf_Half e_phnum;    // Number of entries in the program header table
+    Elf_Half e_shentsize;// Size of an entry in the section header table
+    Elf_Half e_shnum;    // Number of entries in the section header table
+    Elf_Half e_shstrndx; // Section header table index of section name
+                                  // string table
+    bool checkMagic() const {
+      return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+    }
+    unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
+    unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+  };
+
+  typedef SmallVector<const Elf_Shdr*, 1> SymbolTableSections_t;
+
+  const Elf_Ehdr *Header;
+  const Elf_Shdr *SectionHeaderTable;
+  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
+  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
+  SymbolTableSections_t SymbolTableSections;
+
+  void            validateSymbol(DataRefImpl Symb) const;
+  const Elf_Sym  *getSymbol(DataRefImpl Symb) const;
+  const Elf_Shdr *getSection(DataRefImpl index) const;
+  const Elf_Shdr *getSection(uint16_t index) const;
+  const char     *getString(uint16_t section, uint32_t offset) const;
+  const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
+
+protected:
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                           bool &Result) const;
+
+public:
+  ELFObjectFile(MemoryBuffer *Object, error_code &ec);
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+};
+} // end namespace
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>
+                  ::validateSymbol(DataRefImpl Symb) const {
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+  // FIXME: We really need to do proper error handling in the case of an invalid
+  //        input file. Because we don't use exceptions, I think we'll just pass
+  //        an error object around.
+  if (!(  symb
+        && SymbolTableSection
+        && symb >= (const Elf_Sym*)(base()
+                   + SymbolTableSection->sh_offset)
+        && symb <  (const Elf_Sym*)(base()
+                   + SymbolTableSection->sh_offset
+                   + SymbolTableSection->sh_size)))
+    // FIXME: Proper error handling.
+    report_fatal_error("Symb must point to a valid symbol!");
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNext(DataRefImpl Symb,
+                                        SymbolRef &Result) const {
+  validateSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+
+  ++Symb.d.a;
+  // Check to see if we are at the end of this symbol table.
+  if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
+    // We are at the end. If there are other symbol tables, jump to them.
+    ++Symb.d.b;
+    Symb.d.a = 1; // The 0th symbol in ELF is fake.
+    // Otherwise return the terminator.
+    if (Symb.d.b >= SymbolTableSections.size()) {
+      Symb.d.a = std::numeric_limits<uint32_t>::max();
+      Symb.d.b = std::numeric_limits<uint32_t>::max();
+    }
+  }
+
+  Result = SymbolRef(Symb, this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolName(DataRefImpl Symb,
+                                        StringRef &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  if (symb->st_name == 0) {
+    const Elf_Shdr *section = getSection(symb->st_shndx);
+    if (!section)
+      Result = "";
+    else
+      Result = getString(dot_shstrtab_sec, section->sh_name);
+    return object_error::success;
+  }
+
+  // Use the default symbol table name section.
+  Result = getString(dot_strtab_sec, symb->st_name);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolAddress(DataRefImpl Symb,
+                                           uint64_t &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section;
+  switch (symb->st_shndx) {
+  case ELF::SHN_COMMON:
+   // Undefined symbols have no address yet.
+  case ELF::SHN_UNDEF:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::SHN_ABS:
+    Result = symb->st_value;
+    return object_error::success;
+  default: Section = getSection(symb->st_shndx);
+  }
+
+  switch (symb->getType()) {
+  case ELF::STT_SECTION:
+    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::STT_FUNC:
+  case ELF::STT_OBJECT:
+  case ELF::STT_NOTYPE:
+    Result = symb->st_value;
+    return object_error::success;
+  default:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolSize(DataRefImpl Symb,
+                                        uint64_t &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  if (symb->st_size == 0)
+    Result = UnknownAddressOrSize;
+  Result = symb->st_size;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNMTypeChar(DataRefImpl Symb,
+                                              char &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section = getSection(symb->st_shndx);
+
+  char ret = '?';
+
+  if (Section) {
+    switch (Section->sh_type) {
+    case ELF::SHT_PROGBITS:
+    case ELF::SHT_DYNAMIC:
+      switch (Section->sh_flags) {
+      case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
+        ret = 't'; break;
+      case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
+        ret = 'd'; break;
+      case ELF::SHF_ALLOC:
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
+        ret = 'r'; break;
+      }
+      break;
+    case ELF::SHT_NOBITS: ret = 'b';
+    }
+  }
+
+  switch (symb->st_shndx) {
+  case ELF::SHN_UNDEF:
+    if (ret == '?')
+      ret = 'U';
+    break;
+  case ELF::SHN_ABS: ret = 'a'; break;
+  case ELF::SHN_COMMON: ret = 'c'; break;
+  }
+
+  switch (symb->getBinding()) {
+  case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
+  case ELF::STB_WEAK:
+    if (symb->st_shndx == ELF::SHN_UNDEF)
+      ret = 'w';
+    else
+      if (symb->getType() == ELF::STT_OBJECT)
+        ret = 'V';
+      else
+        ret = 'W';
+  }
+
+  if (ret == '?' && symb->getType() == ELF::STT_SECTION) {
+    StringRef name;
+    if (error_code ec = getSymbolName(Symb, name))
+      return ec;
+    Result = StringSwitch<char>(name)
+      .StartsWith(".debug", 'N')
+      .StartsWith(".note", 'n');
+    return object_error::success;
+  }
+
+  Result = ret;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSymbolInternal(DataRefImpl Symb,
+                                           bool &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+
+  if (  symb->getType() == ELF::STT_FILE
+     || symb->getType() == ELF::STT_SECTION)
+    Result = true;
+  Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
+  const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
+  sec += Header->e_shentsize;
+  Sec.p = reinterpret_cast<intptr_t>(sec);
+  Result = SectionRef(Sec, this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionName(DataRefImpl Sec,
+                                         StringRef &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionAddress(DataRefImpl Sec,
+                                            uint64_t &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  Result = sec->sh_addr;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionSize(DataRefImpl Sec,
+                                         uint64_t &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  Result = sec->sh_size;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionContents(DataRefImpl Sec,
+                                             StringRef &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  const char *start = (const char*)base() + sec->sh_offset;
+  Result = StringRef(start, sec->sh_size);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionText(DataRefImpl Sec,
+                                        bool &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_flags & ELF::SHF_EXECINSTR)
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                          ::sectionContainsSymbol(DataRefImpl Sec,
+                                                  DataRefImpl Symb,
+                                                  bool &Result) const {
+  // FIXME: Unimplemented.
+  Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
+                                                          , error_code &ec)
+  : ObjectFile(Binary::isELF, Object, ec)
+  , SectionHeaderTable(0)
+  , dot_shstrtab_sec(0)
+  , dot_strtab_sec(0) {
+  Header = reinterpret_cast<const Elf_Ehdr *>(base());
+
+  if (Header->e_shoff == 0)
+    return;
+
+  SectionHeaderTable =
+    reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
+  uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
+  if (!(  (const uint8_t *)SectionHeaderTable + SectionTableSize
+         <= base() + Data->getBufferSize()))
+    // FIXME: Proper error handling.
+    report_fatal_error("Section table goes past end of file!");
+
+
+  // To find the symbol tables we walk the section table to find SHT_STMTAB.
+  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+                  *e = i + Header->e_shnum * Header->e_shentsize;
+                   i != e; i += Header->e_shentsize) {
+    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+    if (sh->sh_type == ELF::SHT_SYMTAB) {
+      SymbolTableSections.push_back(sh);
+    }
+  }
+
+  // Get string table sections.
+  dot_shstrtab_sec = getSection(Header->e_shstrndx);
+  if (dot_shstrtab_sec) {
+    // Verify that the last byte in the string table in a null.
+    if (((const char*)base() + dot_shstrtab_sec->sh_offset)
+        [dot_shstrtab_sec->sh_size - 1] != 0)
+      // FIXME: Proper error handling.
+      report_fatal_error("String table must end with a null terminator!");
+  }
+
+  // Merge this into the above loop.
+  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+                  *e = i + Header->e_shnum * Header->e_shentsize;
+                   i != e; i += Header->e_shentsize) {
+    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+    if (sh->sh_type == ELF::SHT_STRTAB) {
+      StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
+      if (SectionName == ".strtab") {
+        if (dot_strtab_sec != 0)
+          // FIXME: Proper error handling.
+          report_fatal_error("Already found section named .strtab!");
+        dot_strtab_sec = sh;
+        const char *dot_strtab = (const char*)base() + sh->sh_offset;
+          if (dot_strtab[sh->sh_size - 1] != 0)
+            // FIXME: Proper error handling.
+            report_fatal_error("String table must end with a null terminator!");
+      }
+    }
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                                         ::begin_symbols() const {
+  DataRefImpl SymbolData;
+  memset(&SymbolData, 0, sizeof(SymbolData));
+  if (SymbolTableSections.size() == 0) {
+    SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+    SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  } else {
+    SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
+    SymbolData.d.b = 0;
+  }
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                                         ::end_symbols() const {
+  DataRefImpl SymbolData;
+  memset(&SymbolData, 0, sizeof(SymbolData));
+  SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+  SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+                                          ::begin_sections() const {
+  DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
+  ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+                                          ::end_sections() const {
+  DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
+  ret.p = reinterpret_cast<intptr_t>(base()
+                                     + Header->e_shoff
+                                     + (Header->e_shentsize * Header->e_shnum));
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
+  return is64Bits ? 8 : 4;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getFileFormatName() const {
+  switch(Header->e_ident[ELF::EI_CLASS]) {
+  case ELF::ELFCLASS32:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF32-i386";
+    case ELF::EM_X86_64:
+      return "ELF32-x86-64";
+    default:
+      return "ELF32-unknown";
+    }
+  case ELF::ELFCLASS64:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF64-i386";
+    case ELF::EM_X86_64:
+      return "ELF64-x86-64";
+    default:
+      return "ELF64-unknown";
+    }
+  default:
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid ELFCLASS!");
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
+  switch(Header->e_machine) {
+  case ELF::EM_386:
+    return Triple::x86;
+  case ELF::EM_X86_64:
+    return Triple::x86_64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
+ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
+  const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
+  return reinterpret_cast<const Elf_Sym *>(
+           base()
+           + sec->sh_offset
+           + (Symb.d.a * sec->sh_entsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
+  const Elf_Shdr *sec = getSection(Symb.d.b);
+  if (sec->sh_type != ELF::SHT_SYMTAB)
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid symbol table section!");
+  return sec;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const {
+  if (index == 0 || index >= ELF::SHN_LORESERVE)
+    return 0;
+  if (!SectionHeaderTable || index >= Header->e_shnum)
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid section index!");
+
+  return reinterpret_cast<const Elf_Shdr *>(
+         reinterpret_cast<const char *>(SectionHeaderTable)
+         + (index * Header->e_shentsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(uint16_t section,
+                                     ELF::Elf32_Word offset) const {
+  return getString(getSection(section), offset);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(const Elf_Shdr *section,
+                                     ELF::Elf32_Word offset) const {
+  assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
+  if (offset >= section->sh_size)
+    // FIXME: Proper error handling.
+    report_fatal_error("Symbol name offset outside of string table!");
+  return (const char *)base() + section->sh_offset + offset;
+}
+
+// EI_CLASS, EI_DATA.
+static std::pair<unsigned char, unsigned char>
+getElfArchType(MemoryBuffer *Object) {
+  if (Object->getBufferSize() < ELF::EI_NIDENT)
+    return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
+  return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
+                       , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
+}
+
+namespace llvm {
+
+  ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+    std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+    error_code ec;
+    if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+      return new ELFObjectFile<support::little, false>(Object, ec);
+    else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+      return new ELFObjectFile<support::big, false>(Object, ec);
+    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
+      return new ELFObjectFile<support::little, true>(Object, ec);
+    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+      return new ELFObjectFile<support::big, true>(Object, ec);
+    // FIXME: Proper error handling.
+    report_fatal_error("Not an ELF object file!");
+  }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp
new file mode 100644
index 000000000000..25946257ab5a
--- /dev/null
+++ b/contrib/llvm/lib/Object/Error.cpp
@@ -0,0 +1,57 @@
+//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines a new error_category for the Object library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class _object_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+}
+
+const char *_object_error_category::name() const {
+  return "llvm.object";
+}
+
+std::string _object_error_category::message(int ev) const {
+  switch (ev) {
+  case object_error::success: return "Success";
+  case object_error::invalid_file_type:
+    return "The file was not recognized as a valid object file";
+  case object_error::parse_failed:
+    return "Invalid data was encountered while parsing the file";
+  case object_error::unexpected_eof:
+    return "The end of the file was unexpectedly encountered";
+  default:
+    llvm_unreachable("An enumerator of object_error does not have a message "
+                     "defined.");
+  }
+}
+
+error_condition _object_error_category::default_error_condition(int ev) const {
+  if (ev == object_error::success)
+    return errc::success;
+  return errc::invalid_argument;
+}
+
+const error_category &object::object_category() {
+  static _object_error_category o;
+  return o;
+}
diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp
new file mode 100644
index 000000000000..9890febfb616
--- /dev/null
+++ b/contrib/llvm/lib/Object/MachOObject.cpp
@@ -0,0 +1,370 @@
+//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+/* Translation Utilities */
+
+template<typename T>
+static void SwapValue(T &Value) {
+  Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<typename T>
+static void ReadInMemoryStruct(const MachOObject &MOO,
+                               StringRef Buffer, uint64_t Base,
+                               InMemoryStruct<T> &Res) {
+  typedef T struct_type;
+  uint64_t Size = sizeof(struct_type);
+
+  // Check that the buffer contains the expected data.
+  if (Base + Size >  Buffer.size()) {
+    Res = 0;
+    return;
+  }
+
+  // Check whether we can return a direct pointer.
+  struct_type *Ptr = (struct_type *) (Buffer.data() + Base);
+  if (!MOO.isSwappedEndian()) {
+    Res = Ptr;
+    return;
+  }
+
+  // Otherwise, copy the struct and translate the values.
+  Res = *Ptr;
+  SwapStruct(*Res);
+}
+
+/* *** */
+
+MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
+                         bool Is64Bit_)
+  : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
+    IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
+    HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
+  // Load the common header.
+  memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
+  if (IsSwappedEndian) {
+    SwapValue(Header.Magic);
+    SwapValue(Header.CPUType);
+    SwapValue(Header.CPUSubtype);
+    SwapValue(Header.FileType);
+    SwapValue(Header.NumLoadCommands);
+    SwapValue(Header.SizeOfLoadCommands);
+    SwapValue(Header.Flags);
+  }
+
+  if (is64Bit()) {
+    memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
+           sizeof(Header64Ext));
+    if (IsSwappedEndian) {
+      SwapValue(Header64Ext.Reserved);
+    }
+  }
+
+  // Create the load command array if sane.
+  if (getHeader().NumLoadCommands < (1 << 20))
+    LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
+}
+
+MachOObject::~MachOObject() {
+  delete [] LoadCommands;
+}
+
+MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
+                                         std::string *ErrorStr) {
+  // First, check the magic value and initialize the basic object info.
+  bool IsLittleEndian = false, Is64Bit = false;
+  StringRef Magic = Buffer->getBuffer().slice(0, 4);
+  if (Magic == "\xFE\xED\xFA\xCE") {
+  }  else if (Magic == "\xCE\xFA\xED\xFE") {
+    IsLittleEndian = true;
+  } else if (Magic == "\xFE\xED\xFA\xCF") {
+    Is64Bit = true;
+  } else if (Magic == "\xCF\xFA\xED\xFE") {
+    IsLittleEndian = true;
+    Is64Bit = true;
+  } else {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
+    return 0;
+  }
+
+  // Ensure that the at least the full header is present.
+  unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
+  if (Buffer->getBufferSize() < HeaderSize) {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
+    return 0;
+  }
+
+  OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
+                                                Is64Bit));
+
+  // Check for bogus number of load commands.
+  if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
+    return 0;
+  }
+
+  if (ErrorStr) *ErrorStr = "";
+  return Object.take();
+}
+
+StringRef MachOObject::getData(size_t Offset, size_t Size) const {
+  return Buffer->getBuffer().substr(Offset,Size);
+}
+
+void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
+  HasStringTable = true;
+  StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
+                                           SLC.StringTableSize);
+}
+
+const MachOObject::LoadCommandInfo &
+MachOObject::getLoadCommandInfo(unsigned Index) const {
+  assert(Index < getHeader().NumLoadCommands && "Invalid index!");
+
+  // Load the command, if necessary.
+  if (Index >= NumLoadedCommands) {
+    uint64_t Offset;
+    if (Index == 0) {
+      Offset = getHeaderSize();
+    } else {
+      const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
+      Offset = Prev.Offset + Prev.Command.Size;
+    }
+
+    LoadCommandInfo &Info = LoadCommands[Index];
+    memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
+           sizeof(macho::LoadCommand));
+    if (IsSwappedEndian) {
+      SwapValue(Info.Command.Type);
+      SwapValue(Info.Command.Size);
+    }
+    Info.Offset = Offset;
+    NumLoadedCommands = Index + 1;
+  }
+
+  return LoadCommands[Index];
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.VMAddress);
+  SwapValue(Value.VMSize);
+  SwapValue(Value.FileOffset);
+  SwapValue(Value.FileSize);
+  SwapValue(Value.MaxVMProtection);
+  SwapValue(Value.InitialVMProtection);
+  SwapValue(Value.NumSections);
+  SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
+                         InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.VMAddress);
+  SwapValue(Value.VMSize);
+  SwapValue(Value.FileOffset);
+  SwapValue(Value.FileSize);
+  SwapValue(Value.MaxVMProtection);
+  SwapValue(Value.InitialVMProtection);
+  SwapValue(Value.NumSections);
+  SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
+                       InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.SymbolTableOffset);
+  SwapValue(Value.NumSymbolTableEntries);
+  SwapValue(Value.StringTableOffset);
+  SwapValue(Value.StringTableSize);
+}
+void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
+                          InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.LocalSymbolsIndex);
+  SwapValue(Value.NumLocalSymbols);
+  SwapValue(Value.ExternalSymbolsIndex);
+  SwapValue(Value.NumExternalSymbols);
+  SwapValue(Value.UndefinedSymbolsIndex);
+  SwapValue(Value.NumUndefinedSymbols);
+  SwapValue(Value.TOCOffset);
+  SwapValue(Value.NumTOCEntries);
+  SwapValue(Value.ModuleTableOffset);
+  SwapValue(Value.NumModuleTableEntries);
+  SwapValue(Value.ReferenceSymbolTableOffset);
+  SwapValue(Value.NumReferencedSymbolTableEntries);
+  SwapValue(Value.IndirectSymbolTableOffset);
+  SwapValue(Value.NumIndirectSymbolTableEntries);
+  SwapValue(Value.ExternalRelocationTableOffset);
+  SwapValue(Value.NumExternalRelocationTableEntries);
+  SwapValue(Value.LocalRelocationTableOffset);
+  SwapValue(Value.NumLocalRelocationTableEntries);
+}
+void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
+                        InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
+  SwapValue(Value.Index);
+}
+void
+MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
+                                          unsigned Index,
+                   InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
+  uint64_t Offset = (DLC.IndirectSymbolTableOffset +
+                     Index * sizeof(macho::IndirectSymbolTableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+
+template<>
+void SwapStruct(macho::Section &Value) {
+  SwapValue(Value.Address);
+  SwapValue(Value.Size);
+  SwapValue(Value.Offset);
+  SwapValue(Value.Align);
+  SwapValue(Value.RelocationTableOffset);
+  SwapValue(Value.NumRelocationTableEntries);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Reserved1);
+  SwapValue(Value.Reserved2);
+}
+void MachOObject::ReadSection(const LoadCommandInfo &LCI,
+                              unsigned Index,
+                              InMemoryStruct<macho::Section> &Res) const {
+  assert(LCI.Command.Type == macho::LCT_Segment &&
+         "Unexpected load command info!");
+  uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+                     Index * sizeof(macho::Section));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Section64 &Value) {
+  SwapValue(Value.Address);
+  SwapValue(Value.Size);
+  SwapValue(Value.Offset);
+  SwapValue(Value.Align);
+  SwapValue(Value.RelocationTableOffset);
+  SwapValue(Value.NumRelocationTableEntries);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Reserved1);
+  SwapValue(Value.Reserved2);
+  SwapValue(Value.Reserved3);
+}
+void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
+                                unsigned Index,
+                                InMemoryStruct<macho::Section64> &Res) const {
+  assert(LCI.Command.Type == macho::LCT_Segment64 &&
+         "Unexpected load command info!");
+  uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+                     Index * sizeof(macho::Section64));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::RelocationEntry &Value) {
+  SwapValue(Value.Word0);
+  SwapValue(Value.Word1);
+}
+void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
+                                      unsigned Index,
+                            InMemoryStruct<macho::RelocationEntry> &Res) const {
+  uint64_t Offset = (RelocationTableOffset +
+                     Index * sizeof(macho::RelocationEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &Value) {
+  SwapValue(Value.StringIndex);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
+                                       unsigned Index,
+                           InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+  uint64_t Offset = (SymbolTableOffset +
+                     Index * sizeof(macho::SymbolTableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &Value) {
+  SwapValue(Value.StringIndex);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
+                                       unsigned Index,
+                         InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+  uint64_t Offset = (SymbolTableOffset +
+                     Index * sizeof(macho::Symbol64TableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+/* ** */
+// Object Dumping Facilities
+void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
+void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
+
+void MachOObject::printHeader(raw_ostream &O) const {
+  O << "('cputype', " << Header.CPUType << ")\n";
+  O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
+  O << "('filetype', " << Header.FileType << ")\n";
+  O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
+  O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
+  O << "('flag', " << Header.Flags << ")\n";
+  
+  // Print extended header if 64-bit.
+  if (is64Bit())
+    O << "('reserved', " << Header64Ext.Reserved << ")\n";
+}
+
+void MachOObject::print(raw_ostream &O) const {
+  O << "Header:\n";
+  printHeader(O);
+  O << "Load Commands:\n";
+  
+  O << "Buffer:\n";
+}
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
new file mode 100644
index 000000000000..26a6e136d753
--- /dev/null
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -0,0 +1,469 @@
+//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOObjectFile class, which binds the MachOObject
+// class to the generic ObjectFile wrapper.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MachO.h"
+
+#include <cctype>
+#include <cstring>
+#include <limits>
+
+using namespace llvm;
+using namespace object;
+
+namespace llvm {
+
+typedef MachOObject::LoadCommandInfo LoadCommandInfo;
+
+class MachOObjectFile : public ObjectFile {
+public:
+  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec)
+    : ObjectFile(Binary::isMachO, Object, ec),
+      MachOObj(MOO),
+      RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {}
+
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+
+protected:
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
+                                           bool &Result) const;
+
+private:
+  MachOObject *MachOObj;
+  mutable uint32_t RegisteredStringTable;
+
+  void moveToNextSection(DataRefImpl &DRI) const;
+  void getSymbolTableEntry(DataRefImpl DRI,
+                           InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+  void getSymbol64TableEntry(DataRefImpl DRI,
+                          InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
+  void moveToNextSymbol(DataRefImpl &DRI) const;
+  void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
+  void getSection64(DataRefImpl DRI,
+                    InMemoryStruct<macho::Section64> &Res) const;
+};
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  error_code ec;
+  std::string Err;
+  MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
+  if (!MachOObj)
+    return NULL;
+  return new MachOObjectFile(Buffer, MachOObj, ec);
+}
+
+/*===-- Symbols -----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
+  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+  while (DRI.d.a < LoadCommandCount) {
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    if (LCI.Command.Type == macho::LCT_Symtab) {
+      InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+      MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+      if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
+        return;
+    }
+
+    DRI.d.a++;
+    DRI.d.b = 0;
+  }
+}
+
+void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
+    InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+  if (RegisteredStringTable != DRI.d.a) {
+    MachOObj->RegisterStringTable(*SymtabLoadCmd);
+    RegisteredStringTable = DRI.d.a;
+  }
+
+  MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+                                 Res);
+}
+
+void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
+    InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+  if (RegisteredStringTable != DRI.d.a) {
+    MachOObj->RegisterStringTable(*SymtabLoadCmd);
+    RegisteredStringTable = DRI.d.a;
+  }
+
+  MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+                                   Res);
+}
+
+
+error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
+                                          SymbolRef &Result) const {
+  DRI.d.b++;
+  moveToNextSymbol(DRI);
+  Result = SymbolRef(DRI, this);
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
+                                          StringRef &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
+                                             uint64_t &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = Entry->Value;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = Entry->Value;
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
+                                          uint64_t &Result) const {
+  Result = UnknownAddressOrSize;
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
+                                                char &Result) const {
+  uint8_t Type, Flags;
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Type = Entry->Type;
+    Flags = Entry->Flags;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Type = Entry->Type;
+    Flags = Entry->Flags;
+  }
+
+  char Char;
+  switch (Type & macho::STF_TypeMask) {
+    case macho::STT_Undefined:
+      Char = 'u';
+      break;
+    case macho::STT_Absolute:
+    case macho::STT_Section:
+      Char = 's';
+      break;
+    default:
+      Char = '?';
+      break;
+  }
+
+  if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
+    Char = toupper(Char);
+  Result = Char;
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI,
+                                             bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = Entry->Flags & macho::STF_StabsEntryMask;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = Entry->Flags & macho::STF_StabsEntryMask;
+  }
+  return object_error::success;
+}
+
+ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const {
+  // DRI.d.a = segment number; DRI.d.b = symbol index.
+  DataRefImpl DRI;
+  DRI.d.a = DRI.d.b = 0;
+  moveToNextSymbol(DRI);
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+ObjectFile::symbol_iterator MachOObjectFile::end_symbols() const {
+  DataRefImpl DRI;
+  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+  DRI.d.b = 0;
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+
+/*===-- Sections ----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
+  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+  while (DRI.d.a < LoadCommandCount) {
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    if (LCI.Command.Type == macho::LCT_Segment) {
+      InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
+      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
+      if (DRI.d.b < SegmentLoadCmd->NumSections)
+        return;
+    } else if (LCI.Command.Type == macho::LCT_Segment64) {
+      InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
+      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
+      if (DRI.d.b < Segment64LoadCmd->NumSections)
+        return;
+    }
+
+    DRI.d.a++;
+    DRI.d.b = 0;
+  }
+}
+
+error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
+                                           SectionRef &Result) const {
+  DRI.d.b++;
+  moveToNextSection(DRI);
+  Result = SectionRef(DRI, this);
+  return object_error::success;
+}
+
+void
+MachOObjectFile::getSection(DataRefImpl DRI,
+                            InMemoryStruct<macho::Section> &Res) const {
+  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+  MachOObj->ReadSection(LCI, DRI.d.b, Res);
+}
+
+void
+MachOObjectFile::getSection64(DataRefImpl DRI,
+                            InMemoryStruct<macho::Section64> &Res) const {
+  InMemoryStruct<macho::Segment64LoadCommand> SLC;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+  MachOObj->ReadSection64(LCI, DRI.d.b, Res);
+}
+
+static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  if (LCI.Command.Type == macho::LCT_Segment64)
+    return true;
+  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
+  return false;
+}
+
+error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
+                                           StringRef &Result) const {
+  // FIXME: thread safety.
+  static char result[34];
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Segment64LoadCommand> SLC;
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+    InMemoryStruct<macho::Section64> Sect;
+    MachOObj->ReadSection64(LCI, DRI.d.b, Sect);
+
+    strcpy(result, Sect->SegmentName);
+    strcat(result, ",");
+    strcat(result, Sect->Name);
+  } else {
+    InMemoryStruct<macho::SegmentLoadCommand> SLC;
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+    InMemoryStruct<macho::Section> Sect;
+    MachOObj->ReadSection(LCI, DRI.d.b, Sect);
+
+    strcpy(result, Sect->SegmentName);
+    strcat(result, ",");
+    strcat(result, Sect->Name);
+  }
+  Result = StringRef(result);
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
+                                              uint64_t &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = Sect->Address;
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = Sect->Address;
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
+                                           uint64_t &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = Sect->Size;
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = Sect->Size;
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
+                                               StringRef &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
+                                          bool &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = !strcmp(Sect->Name, "__text");
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = !strcmp(Sect->Name, "__text");
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+                                                  DataRefImpl Symb,
+                                                  bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(Symb, Entry);
+    Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(Symb, Entry);
+    Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+  }
+  return object_error::success;
+}
+
+ObjectFile::section_iterator MachOObjectFile::begin_sections() const {
+  DataRefImpl DRI;
+  DRI.d.a = DRI.d.b = 0;
+  moveToNextSection(DRI);
+  return section_iterator(SectionRef(DRI, this));
+}
+
+ObjectFile::section_iterator MachOObjectFile::end_sections() const {
+  DataRefImpl DRI;
+  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+  DRI.d.b = 0;
+  return section_iterator(SectionRef(DRI, this));
+}
+
+/*===-- Miscellaneous -----------------------------------------------------===*/
+
+uint8_t MachOObjectFile::getBytesInAddress() const {
+  return MachOObj->is64Bit() ? 8 : 4;
+}
+
+StringRef MachOObjectFile::getFileFormatName() const {
+  if (!MachOObj->is64Bit()) {
+    switch (MachOObj->getHeader().CPUType) {
+    case llvm::MachO::CPUTypeI386:
+      return "Mach-O 32-bit i386";
+    case llvm::MachO::CPUTypeARM:
+      return "Mach-O arm";
+    case llvm::MachO::CPUTypePowerPC:
+      return "Mach-O 32-bit ppc";
+    default:
+      assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+             "64-bit object file when we're not 64-bit?");
+      return "Mach-O 32-bit unknown";
+    }
+  }
+
+  switch (MachOObj->getHeader().CPUType) {
+  case llvm::MachO::CPUTypeX86_64:
+    return "Mach-O 64-bit x86-64";
+  case llvm::MachO::CPUTypePowerPC64:
+    return "Mach-O 64-bit ppc64";
+  default:
+    assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 &&
+           "32-bit object file when we're 64-bit?");
+    return "Mach-O 64-bit unknown";
+  }
+}
+
+unsigned MachOObjectFile::getArch() const {
+  switch (MachOObj->getHeader().CPUType) {
+  case llvm::MachO::CPUTypeI386:
+    return Triple::x86;
+  case llvm::MachO::CPUTypeX86_64:
+    return Triple::x86_64;
+  case llvm::MachO::CPUTypeARM:
+    return Triple::arm;
+  case llvm::MachO::CPUTypePowerPC:
+    return Triple::ppc;
+  case llvm::MachO::CPUTypePowerPC64:
+    return Triple::ppc64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+} // end namespace llvm
+
diff --git a/contrib/llvm/lib/Object/Makefile b/contrib/llvm/lib/Object/Makefile
new file mode 100644
index 000000000000..79388dc97f1a
--- /dev/null
+++ b/contrib/llvm/lib/Object/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Object/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMObject
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
new file mode 100644
index 000000000000..9a373ad21bd2
--- /dev/null
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -0,0 +1,68 @@
+//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings to the file-format-independent object
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm-c/Object.h"
+
+using namespace llvm;
+using namespace object;
+
+LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
+  return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
+}
+
+void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
+  delete unwrap(ObjectFile);
+}
+
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
+  ObjectFile::section_iterator SI = unwrap(ObjectFile)->begin_sections();
+  return wrap(new ObjectFile::section_iterator(SI));
+}
+
+void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) {
+  delete unwrap(SI);
+}
+
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+                                LLVMSectionIteratorRef SI) {
+  return (*unwrap(SI) == unwrap(ObjectFile)->end_sections()) ? 1 : 0;
+}
+
+void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
+  error_code ec;
+  unwrap(SI)->increment(ec);
+  if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message());
+}
+
+const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
+  StringRef ret;
+  if (error_code ec = (*unwrap(SI))->getName(ret))
+   report_fatal_error(ec.message());
+  return ret.data();
+}
+
+uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(SI))->getSize(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
+  StringRef ret;
+  if (error_code ec = (*unwrap(SI))->getContents(ret))
+    report_fatal_error(ec.message());
+  return ret.data();
+}
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
new file mode 100644
index 000000000000..a7798df33fe5
--- /dev/null
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -0,0 +1,61 @@
+//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+
+using namespace llvm;
+using namespace object;
+
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+  : Binary(Type, source) {
+}
+
+ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
+  if (!Object || Object->getBufferSize() < 64)
+    return 0;
+  sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(),
+                                static_cast<unsigned>(Object->getBufferSize()));
+  switch (type) {
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_Executable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::ELF_Core_FileType:
+      return createELFObjectFile(Object);
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_Executable_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_Core_FileType:
+    case sys::Mach_O_PreloadExecutable_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicLinker_FileType:
+    case sys::Mach_O_Bundle_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+      return createMachOObjectFile(Object);
+    case sys::COFF_FileType:
+      return createCOFFObjectFile(Object);
+    default:
+      llvm_unreachable("Unknown Object File Type");
+  }
+}
+
+ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(ObjectPath, File))
+    return NULL;
+  return createObjectFile(File.take());
+}
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
new file mode 100644
index 000000000000..c64da6e137ea
--- /dev/null
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -0,0 +1,3616 @@
+//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision floating
+// point values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits.h>
+#include <cstring>
+
+using namespace llvm;
+
+#define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
+
+/* Assumed in hexadecimal significand parsing, and conversion to
+   hexadecimal strings.  */
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
+
+namespace llvm {
+
+  /* Represents floating point arithmetic semantics.  */
+  struct fltSemantics {
+    /* The largest E such that 2^E is representable; this matches the
+       definition of IEEE 754.  */
+    exponent_t maxExponent;
+
+    /* The smallest E such that 2^E is a normalized number; this
+       matches the definition of IEEE 754.  */
+    exponent_t minExponent;
+
+    /* Number of bits in the significand.  This includes the integer
+       bit.  */
+    unsigned int precision;
+
+    /* True if arithmetic is supported.  */
+    unsigned int arithmeticOK;
+  };
+
+  const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
+  const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
+  const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
+  const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
+  const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
+  const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
+
+  // The PowerPC format consists of two doubles.  It does not map cleanly
+  // onto the usual format above.  For now only storage of constants of
+  // this type is supported, no arithmetic.
+  const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
+
+  /* A tight upper bound on number of parts required to hold the value
+     pow(5, power) is
+
+       power * 815 / (351 * integerPartWidth) + 1
+
+     However, whilst the result may require only this many parts,
+     because we are multiplying two values to get it, the
+     multiplication may require an extra part with the excess part
+     being zero (consider the trivial case of 1 * 1, tcFullMultiply
+     requires two parts to hold the single-part result).  So we add an
+     extra one to guarantee enough space whilst multiplying.  */
+  const unsigned int maxExponent = 16383;
+  const unsigned int maxPrecision = 113;
+  const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+  const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
+                                                / (351 * integerPartWidth));
+}
+
+/* A bunch of private, handy routines.  */
+
+static inline unsigned int
+partCountForBits(unsigned int bits)
+{
+  return ((bits) + integerPartWidth - 1) / integerPartWidth;
+}
+
+/* Returns 0U-9U.  Return values >= 10U are not digits.  */
+static inline unsigned int
+decDigitValue(unsigned int c)
+{
+  return c - '0';
+}
+
+static unsigned int
+hexDigitValue(unsigned int c)
+{
+  unsigned int r;
+
+  r = c - '0';
+  if (r <= 9)
+    return r;
+
+  r = c - 'A';
+  if (r <= 5)
+    return r + 10;
+
+  r = c - 'a';
+  if (r <= 5)
+    return r + 10;
+
+  return -1U;
+}
+
+static inline void
+assertArithmeticOK(const llvm::fltSemantics &semantics) {
+  assert(semantics.arithmeticOK &&
+         "Compile-time arithmetic does not support these semantics");
+}
+
+/* Return the value of a decimal exponent of the form
+   [+-]ddddddd.
+
+   If the exponent overflows, returns a large exponent with the
+   appropriate sign.  */
+static int
+readExponent(StringRef::iterator begin, StringRef::iterator end)
+{
+  bool isNegative;
+  unsigned int absExponent;
+  const unsigned int overlargeExponent = 24000;  /* FIXME.  */
+  StringRef::iterator p = begin;
+
+  assert(p != end && "Exponent has no digits");
+
+  isNegative = (*p == '-');
+  if (*p == '-' || *p == '+') {
+    p++;
+    assert(p != end && "Exponent has no digits");
+  }
+
+  absExponent = decDigitValue(*p++);
+  assert(absExponent < 10U && "Invalid character in exponent");
+
+  for (; p != end; ++p) {
+    unsigned int value;
+
+    value = decDigitValue(*p);
+    assert(value < 10U && "Invalid character in exponent");
+
+    value += absExponent * 10;
+    if (absExponent >= overlargeExponent) {
+      absExponent = overlargeExponent;
+      p = end;  /* outwit assert below */
+      break;
+    }
+    absExponent = value;
+  }
+
+  assert(p == end && "Invalid exponent in exponent");
+
+  if (isNegative)
+    return -(int) absExponent;
+  else
+    return (int) absExponent;
+}
+
+/* This is ugly and needs cleaning up, but I don't immediately see
+   how whilst remaining safe.  */
+static int
+totalExponent(StringRef::iterator p, StringRef::iterator end,
+              int exponentAdjustment)
+{
+  int unsignedExponent;
+  bool negative, overflow;
+  int exponent = 0;
+
+  assert(p != end && "Exponent has no digits");
+
+  negative = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
+    assert(p != end && "Exponent has no digits");
+  }
+
+  unsignedExponent = 0;
+  overflow = false;
+  for (; p != end; ++p) {
+    unsigned int value;
+
+    value = decDigitValue(*p);
+    assert(value < 10U && "Invalid character in exponent");
+
+    unsignedExponent = unsignedExponent * 10 + value;
+    if (unsignedExponent > 32767)
+      overflow = true;
+  }
+
+  if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
+    overflow = true;
+
+  if (!overflow) {
+    exponent = unsignedExponent;
+    if (negative)
+      exponent = -exponent;
+    exponent += exponentAdjustment;
+    if (exponent > 32767 || exponent < -32768)
+      overflow = true;
+  }
+
+  if (overflow)
+    exponent = negative ? -32768: 32767;
+
+  return exponent;
+}
+
+static StringRef::iterator
+skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
+                           StringRef::iterator *dot)
+{
+  StringRef::iterator p = begin;
+  *dot = end;
+  while (*p == '0' && p != end)
+    p++;
+
+  if (*p == '.') {
+    *dot = p++;
+
+    assert(end - begin != 1 && "Significand has no digits");
+
+    while (*p == '0' && p != end)
+      p++;
+  }
+
+  return p;
+}
+
+/* Given a normal decimal floating point number of the form
+
+     dddd.dddd[eE][+-]ddd
+
+   where the decimal point and exponent are optional, fill out the
+   structure D.  Exponent is appropriate if the significand is
+   treated as an integer, and normalizedExponent if the significand
+   is taken to have the decimal point after a single leading
+   non-zero digit.
+
+   If the value is zero, V->firstSigDigit points to a non-digit, and
+   the return exponent is zero.
+*/
+struct decimalInfo {
+  const char *firstSigDigit;
+  const char *lastSigDigit;
+  int exponent;
+  int normalizedExponent;
+};
+
+static void
+interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
+                 decimalInfo *D)
+{
+  StringRef::iterator dot = end;
+  StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
+
+  D->firstSigDigit = p;
+  D->exponent = 0;
+  D->normalizedExponent = 0;
+
+  for (; p != end; ++p) {
+    if (*p == '.') {
+      assert(dot == end && "String contains multiple dots");
+      dot = p++;
+      if (p == end)
+        break;
+    }
+    if (decDigitValue(*p) >= 10U)
+      break;
+  }
+
+  if (p != end) {
+    assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
+    assert(p != begin && "Significand has no digits");
+    assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+    /* p points to the first non-digit in the string */
+    D->exponent = readExponent(p + 1, end);
+
+    /* Implied decimal point?  */
+    if (dot == end)
+      dot = p;
+  }
+
+  /* If number is all zeroes accept any exponent.  */
+  if (p != D->firstSigDigit) {
+    /* Drop insignificant trailing zeroes.  */
+    if (p != begin) {
+      do
+        do
+          p--;
+        while (p != begin && *p == '0');
+      while (p != begin && *p == '.');
+    }
+
+    /* Adjust the exponents for any decimal point.  */
+    D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
+    D->normalizedExponent = (D->exponent +
+              static_cast<exponent_t>((p - D->firstSigDigit)
+                                      - (dot > D->firstSigDigit && dot < p)));
+  }
+
+  D->lastSigDigit = p;
+}
+
+/* Return the trailing fraction of a hexadecimal number.
+   DIGITVALUE is the first hex digit of the fraction, P points to
+   the next digit.  */
+static lostFraction
+trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
+                            unsigned int digitValue)
+{
+  unsigned int hexDigit;
+
+  /* If the first trailing digit isn't 0 or 8 we can work out the
+     fraction immediately.  */
+  if (digitValue > 8)
+    return lfMoreThanHalf;
+  else if (digitValue < 8 && digitValue > 0)
+    return lfLessThanHalf;
+
+  /* Otherwise we need to find the first non-zero digit.  */
+  while (*p == '0')
+    p++;
+
+  assert(p != end && "Invalid trailing hexadecimal fraction!");
+
+  hexDigit = hexDigitValue(*p);
+
+  /* If we ran off the end it is exactly zero or one-half, otherwise
+     a little more.  */
+  if (hexDigit == -1U)
+    return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
+  else
+    return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
+}
+
+/* Return the fraction lost were a bignum truncated losing the least
+   significant BITS bits.  */
+static lostFraction
+lostFractionThroughTruncation(const integerPart *parts,
+                              unsigned int partCount,
+                              unsigned int bits)
+{
+  unsigned int lsb;
+
+  lsb = APInt::tcLSB(parts, partCount);
+
+  /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
+  if (bits <= lsb)
+    return lfExactlyZero;
+  if (bits == lsb + 1)
+    return lfExactlyHalf;
+  if (bits <= partCount * integerPartWidth &&
+      APInt::tcExtractBit(parts, bits - 1))
+    return lfMoreThanHalf;
+
+  return lfLessThanHalf;
+}
+
+/* Shift DST right BITS bits noting lost fraction.  */
+static lostFraction
+shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
+{
+  lostFraction lost_fraction;
+
+  lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
+
+  APInt::tcShiftRight(dst, parts, bits);
+
+  return lost_fraction;
+}
+
+/* Combine the effect of two lost fractions.  */
+static lostFraction
+combineLostFractions(lostFraction moreSignificant,
+                     lostFraction lessSignificant)
+{
+  if (lessSignificant != lfExactlyZero) {
+    if (moreSignificant == lfExactlyZero)
+      moreSignificant = lfLessThanHalf;
+    else if (moreSignificant == lfExactlyHalf)
+      moreSignificant = lfMoreThanHalf;
+  }
+
+  return moreSignificant;
+}
+
+/* The error from the true value, in half-ulps, on multiplying two
+   floating point numbers, which differ from the value they
+   approximate by at most HUE1 and HUE2 half-ulps, is strictly less
+   than the returned value.
+
+   See "How to Read Floating Point Numbers Accurately" by William D
+   Clinger.  */
+static unsigned int
+HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
+{
+  assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
+
+  if (HUerr1 + HUerr2 == 0)
+    return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
+  else
+    return inexactMultiply + 2 * (HUerr1 + HUerr2);
+}
+
+/* The number of ulps from the boundary (zero, or half if ISNEAREST)
+   when the least significant BITS are truncated.  BITS cannot be
+   zero.  */
+static integerPart
+ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
+{
+  unsigned int count, partBits;
+  integerPart part, boundary;
+
+  assert(bits != 0);
+
+  bits--;
+  count = bits / integerPartWidth;
+  partBits = bits % integerPartWidth + 1;
+
+  part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+
+  if (isNearest)
+    boundary = (integerPart) 1 << (partBits - 1);
+  else
+    boundary = 0;
+
+  if (count == 0) {
+    if (part - boundary <= boundary - part)
+      return part - boundary;
+    else
+      return boundary - part;
+  }
+
+  if (part == boundary) {
+    while (--count)
+      if (parts[count])
+        return ~(integerPart) 0; /* A lot.  */
+
+    return parts[0];
+  } else if (part == boundary - 1) {
+    while (--count)
+      if (~parts[count])
+        return ~(integerPart) 0; /* A lot.  */
+
+    return -parts[0];
+  }
+
+  return ~(integerPart) 0; /* A lot.  */
+}
+
+/* Place pow(5, power) in DST, and return the number of parts used.
+   DST must be at least one part larger than size of the answer.  */
+static unsigned int
+powerOf5(integerPart *dst, unsigned int power)
+{
+  static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
+                                                  15625, 78125 };
+  integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
+  pow5s[0] = 78125 * 5;
+
+  unsigned int partsCount[16] = { 1 };
+  integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+  unsigned int result;
+  assert(power <= maxExponent);
+
+  p1 = dst;
+  p2 = scratch;
+
+  *p1 = firstEightPowers[power & 7];
+  power >>= 3;
+
+  result = 1;
+  pow5 = pow5s;
+
+  for (unsigned int n = 0; power; power >>= 1, n++) {
+    unsigned int pc;
+
+    pc = partsCount[n];
+
+    /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
+    if (pc == 0) {
+      pc = partsCount[n - 1];
+      APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
+      pc *= 2;
+      if (pow5[pc - 1] == 0)
+        pc--;
+      partsCount[n] = pc;
+    }
+
+    if (power & 1) {
+      integerPart *tmp;
+
+      APInt::tcFullMultiply(p2, p1, pow5, result, pc);
+      result += pc;
+      if (p2[result - 1] == 0)
+        result--;
+
+      /* Now result is in p1 with partsCount parts and p2 is scratch
+         space.  */
+      tmp = p1, p1 = p2, p2 = tmp;
+    }
+
+    pow5 += pc;
+  }
+
+  if (p1 != dst)
+    APInt::tcAssign(dst, p1, result);
+
+  return result;
+}
+
+/* Zero at the end to avoid modular arithmetic when adding one; used
+   when rounding up during hexadecimal output.  */
+static const char hexDigitsLower[] = "0123456789abcdef0";
+static const char hexDigitsUpper[] = "0123456789ABCDEF0";
+static const char infinityL[] = "infinity";
+static const char infinityU[] = "INFINITY";
+static const char NaNL[] = "nan";
+static const char NaNU[] = "NAN";
+
+/* Write out an integerPart in hexadecimal, starting with the most
+   significant nibble.  Write out exactly COUNT hexdigits, return
+   COUNT.  */
+static unsigned int
+partAsHex (char *dst, integerPart part, unsigned int count,
+           const char *hexDigitChars)
+{
+  unsigned int result = count;
+
+  assert(count != 0 && count <= integerPartWidth / 4);
+
+  part >>= (integerPartWidth - 4 * count);
+  while (count--) {
+    dst[count] = hexDigitChars[part & 0xf];
+    part >>= 4;
+  }
+
+  return result;
+}
+
+/* Write out an unsigned decimal integer.  */
+static char *
+writeUnsignedDecimal (char *dst, unsigned int n)
+{
+  char buff[40], *p;
+
+  p = buff;
+  do
+    *p++ = '0' + n % 10;
+  while (n /= 10);
+
+  do
+    *dst++ = *--p;
+  while (p != buff);
+
+  return dst;
+}
+
+/* Write out a signed decimal integer.  */
+static char *
+writeSignedDecimal (char *dst, int value)
+{
+  if (value < 0) {
+    *dst++ = '-';
+    dst = writeUnsignedDecimal(dst, -(unsigned) value);
+  } else
+    dst = writeUnsignedDecimal(dst, value);
+
+  return dst;
+}
+
+/* Constructors.  */
+void
+APFloat::initialize(const fltSemantics *ourSemantics)
+{
+  unsigned int count;
+
+  semantics = ourSemantics;
+  count = partCount();
+  if (count > 1)
+    significand.parts = new integerPart[count];
+}
+
+void
+APFloat::freeSignificand()
+{
+  if (partCount() > 1)
+    delete [] significand.parts;
+}
+
+void
+APFloat::assign(const APFloat &rhs)
+{
+  assert(semantics == rhs.semantics);
+
+  sign = rhs.sign;
+  category = rhs.category;
+  exponent = rhs.exponent;
+  sign2 = rhs.sign2;
+  exponent2 = rhs.exponent2;
+  if (category == fcNormal || category == fcNaN)
+    copySignificand(rhs);
+}
+
+void
+APFloat::copySignificand(const APFloat &rhs)
+{
+  assert(category == fcNormal || category == fcNaN);
+  assert(rhs.partCount() >= partCount());
+
+  APInt::tcAssign(significandParts(), rhs.significandParts(),
+                  partCount());
+}
+
+/* Make this number a NaN, with an arbitrary but deterministic value
+   for the significand.  If double or longer, this is a signalling NaN,
+   which may not be ideal.  If float, this is QNaN(0).  */
+void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
+{
+  category = fcNaN;
+  sign = Negative;
+
+  integerPart *significand = significandParts();
+  unsigned numParts = partCount();
+
+  // Set the significand bits to the fill.
+  if (!fill || fill->getNumWords() < numParts)
+    APInt::tcSet(significand, 0, numParts);
+  if (fill) {
+    APInt::tcAssign(significand, fill->getRawData(),
+                    std::min(fill->getNumWords(), numParts));
+
+    // Zero out the excess bits of the significand.
+    unsigned bitsToPreserve = semantics->precision - 1;
+    unsigned part = bitsToPreserve / 64;
+    bitsToPreserve %= 64;
+    significand[part] &= ((1ULL << bitsToPreserve) - 1);
+    for (part++; part != numParts; ++part)
+      significand[part] = 0;
+  }
+
+  unsigned QNaNBit = semantics->precision - 2;
+
+  if (SNaN) {
+    // We always have to clear the QNaN bit to make it an SNaN.
+    APInt::tcClearBit(significand, QNaNBit);
+
+    // If there are no bits set in the payload, we have to set
+    // *something* to make it a NaN instead of an infinity;
+    // conventionally, this is the next bit down from the QNaN bit.
+    if (APInt::tcIsZero(significand, numParts))
+      APInt::tcSetBit(significand, QNaNBit - 1);
+  } else {
+    // We always have to set the QNaN bit to make it a QNaN.
+    APInt::tcSetBit(significand, QNaNBit);
+  }
+
+  // For x87 extended precision, we want to make a NaN, not a
+  // pseudo-NaN.  Maybe we should expose the ability to make
+  // pseudo-NaNs?
+  if (semantics == &APFloat::x87DoubleExtended)
+    APInt::tcSetBit(significand, QNaNBit + 1);
+}
+
+APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+                         const APInt *fill) {
+  APFloat value(Sem, uninitialized);
+  value.makeNaN(SNaN, Negative, fill);
+  return value;
+}
+
+APFloat &
+APFloat::operator=(const APFloat &rhs)
+{
+  if (this != &rhs) {
+    if (semantics != rhs.semantics) {
+      freeSignificand();
+      initialize(rhs.semantics);
+    }
+    assign(rhs);
+  }
+
+  return *this;
+}
+
+bool
+APFloat::bitwiseIsEqual(const APFloat &rhs) const {
+  if (this == &rhs)
+    return true;
+  if (semantics != rhs.semantics ||
+      category != rhs.category ||
+      sign != rhs.sign)
+    return false;
+  if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+      sign2 != rhs.sign2)
+    return false;
+  if (category==fcZero || category==fcInfinity)
+    return true;
+  else if (category==fcNormal && exponent!=rhs.exponent)
+    return false;
+  else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+           exponent2!=rhs.exponent2)
+    return false;
+  else {
+    int i= partCount();
+    const integerPart* p=significandParts();
+    const integerPart* q=rhs.significandParts();
+    for (; i>0; i--, p++, q++) {
+      if (*p != *q)
+        return false;
+    }
+    return true;
+  }
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  sign = 0;
+  zeroSignificand();
+  exponent = ourSemantics.precision - 1;
+  significandParts()[0] = value;
+  normalize(rmNearestTiesToEven, lfExactlyZero);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  category = fcZero;
+  sign = false;
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  // Allocates storage if necessary but does not initialize it.
+  initialize(&ourSemantics);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics,
+                 fltCategory ourCategory, bool negative)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  category = ourCategory;
+  sign = negative;
+  if (category == fcNormal)
+    category = fcZero;
+  else if (ourCategory == fcNaN)
+    makeNaN();
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  convertFromString(text, rmNearestTiesToEven);
+}
+
+APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
+  initialize(rhs.semantics);
+  assign(rhs);
+}
+
+APFloat::~APFloat()
+{
+  freeSignificand();
+}
+
+// Profile - This method 'profiles' an APFloat for use with FoldingSet.
+void APFloat::Profile(FoldingSetNodeID& ID) const {
+  ID.Add(bitcastToAPInt());
+}
+
+unsigned int
+APFloat::partCount() const
+{
+  return partCountForBits(semantics->precision + 1);
+}
+
+unsigned int
+APFloat::semanticsPrecision(const fltSemantics &semantics)
+{
+  return semantics.precision;
+}
+
+const integerPart *
+APFloat::significandParts() const
+{
+  return const_cast<APFloat *>(this)->significandParts();
+}
+
+integerPart *
+APFloat::significandParts()
+{
+  assert(category == fcNormal || category == fcNaN);
+
+  if (partCount() > 1)
+    return significand.parts;
+  else
+    return &significand.part;
+}
+
+void
+APFloat::zeroSignificand()
+{
+  category = fcNormal;
+  APInt::tcSet(significandParts(), 0, partCount());
+}
+
+/* Increment an fcNormal floating point number's significand.  */
+void
+APFloat::incrementSignificand()
+{
+  integerPart carry;
+
+  carry = APInt::tcIncrement(significandParts(), partCount());
+
+  /* Our callers should never cause us to overflow.  */
+  assert(carry == 0);
+}
+
+/* Add the significand of the RHS.  Returns the carry flag.  */
+integerPart
+APFloat::addSignificand(const APFloat &rhs)
+{
+  integerPart *parts;
+
+  parts = significandParts();
+
+  assert(semantics == rhs.semantics);
+  assert(exponent == rhs.exponent);
+
+  return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
+}
+
+/* Subtract the significand of the RHS with a borrow flag.  Returns
+   the borrow flag.  */
+integerPart
+APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
+{
+  integerPart *parts;
+
+  parts = significandParts();
+
+  assert(semantics == rhs.semantics);
+  assert(exponent == rhs.exponent);
+
+  return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
+                           partCount());
+}
+
+/* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
+   on to the full-precision result of the multiplication.  Returns the
+   lost fraction.  */
+lostFraction
+APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
+{
+  unsigned int omsb;        // One, not zero, based MSB.
+  unsigned int partsCount, newPartsCount, precision;
+  integerPart *lhsSignificand;
+  integerPart scratch[4];
+  integerPart *fullSignificand;
+  lostFraction lost_fraction;
+  bool ignored;
+
+  assert(semantics == rhs.semantics);
+
+  precision = semantics->precision;
+  newPartsCount = partCountForBits(precision * 2);
+
+  if (newPartsCount > 4)
+    fullSignificand = new integerPart[newPartsCount];
+  else
+    fullSignificand = scratch;
+
+  lhsSignificand = significandParts();
+  partsCount = partCount();
+
+  APInt::tcFullMultiply(fullSignificand, lhsSignificand,
+                        rhs.significandParts(), partsCount, partsCount);
+
+  lost_fraction = lfExactlyZero;
+  omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+  exponent += rhs.exponent;
+
+  if (addend) {
+    Significand savedSignificand = significand;
+    const fltSemantics *savedSemantics = semantics;
+    fltSemantics extendedSemantics;
+    opStatus status;
+    unsigned int extendedPrecision;
+
+    /* Normalize our MSB.  */
+    extendedPrecision = precision + precision - 1;
+    if (omsb != extendedPrecision) {
+      APInt::tcShiftLeft(fullSignificand, newPartsCount,
+                         extendedPrecision - omsb);
+      exponent -= extendedPrecision - omsb;
+    }
+
+    /* Create new semantics.  */
+    extendedSemantics = *semantics;
+    extendedSemantics.precision = extendedPrecision;
+
+    if (newPartsCount == 1)
+      significand.part = fullSignificand[0];
+    else
+      significand.parts = fullSignificand;
+    semantics = &extendedSemantics;
+
+    APFloat extendedAddend(*addend);
+    status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
+    assert(status == opOK);
+    lost_fraction = addOrSubtractSignificand(extendedAddend, false);
+
+    /* Restore our state.  */
+    if (newPartsCount == 1)
+      fullSignificand[0] = significand.part;
+    significand = savedSignificand;
+    semantics = savedSemantics;
+
+    omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+  }
+
+  exponent -= (precision - 1);
+
+  if (omsb > precision) {
+    unsigned int bits, significantParts;
+    lostFraction lf;
+
+    bits = omsb - precision;
+    significantParts = partCountForBits(omsb);
+    lf = shiftRight(fullSignificand, significantParts, bits);
+    lost_fraction = combineLostFractions(lf, lost_fraction);
+    exponent += bits;
+  }
+
+  APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
+
+  if (newPartsCount > 4)
+    delete [] fullSignificand;
+
+  return lost_fraction;
+}
+
+/* Multiply the significands of LHS and RHS to DST.  */
+lostFraction
+APFloat::divideSignificand(const APFloat &rhs)
+{
+  unsigned int bit, i, partsCount;
+  const integerPart *rhsSignificand;
+  integerPart *lhsSignificand, *dividend, *divisor;
+  integerPart scratch[4];
+  lostFraction lost_fraction;
+
+  assert(semantics == rhs.semantics);
+
+  lhsSignificand = significandParts();
+  rhsSignificand = rhs.significandParts();
+  partsCount = partCount();
+
+  if (partsCount > 2)
+    dividend = new integerPart[partsCount * 2];
+  else
+    dividend = scratch;
+
+  divisor = dividend + partsCount;
+
+  /* Copy the dividend and divisor as they will be modified in-place.  */
+  for (i = 0; i < partsCount; i++) {
+    dividend[i] = lhsSignificand[i];
+    divisor[i] = rhsSignificand[i];
+    lhsSignificand[i] = 0;
+  }
+
+  exponent -= rhs.exponent;
+
+  unsigned int precision = semantics->precision;
+
+  /* Normalize the divisor.  */
+  bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
+  if (bit) {
+    exponent += bit;
+    APInt::tcShiftLeft(divisor, partsCount, bit);
+  }
+
+  /* Normalize the dividend.  */
+  bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
+  if (bit) {
+    exponent -= bit;
+    APInt::tcShiftLeft(dividend, partsCount, bit);
+  }
+
+  /* Ensure the dividend >= divisor initially for the loop below.
+     Incidentally, this means that the division loop below is
+     guaranteed to set the integer bit to one.  */
+  if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
+    exponent--;
+    APInt::tcShiftLeft(dividend, partsCount, 1);
+    assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
+  }
+
+  /* Long division.  */
+  for (bit = precision; bit; bit -= 1) {
+    if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
+      APInt::tcSubtract(dividend, divisor, 0, partsCount);
+      APInt::tcSetBit(lhsSignificand, bit - 1);
+    }
+
+    APInt::tcShiftLeft(dividend, partsCount, 1);
+  }
+
+  /* Figure out the lost fraction.  */
+  int cmp = APInt::tcCompare(dividend, divisor, partsCount);
+
+  if (cmp > 0)
+    lost_fraction = lfMoreThanHalf;
+  else if (cmp == 0)
+    lost_fraction = lfExactlyHalf;
+  else if (APInt::tcIsZero(dividend, partsCount))
+    lost_fraction = lfExactlyZero;
+  else
+    lost_fraction = lfLessThanHalf;
+
+  if (partsCount > 2)
+    delete [] dividend;
+
+  return lost_fraction;
+}
+
+unsigned int
+APFloat::significandMSB() const
+{
+  return APInt::tcMSB(significandParts(), partCount());
+}
+
+unsigned int
+APFloat::significandLSB() const
+{
+  return APInt::tcLSB(significandParts(), partCount());
+}
+
+/* Note that a zero result is NOT normalized to fcZero.  */
+lostFraction
+APFloat::shiftSignificandRight(unsigned int bits)
+{
+  /* Our exponent should not overflow.  */
+  assert((exponent_t) (exponent + bits) >= exponent);
+
+  exponent += bits;
+
+  return shiftRight(significandParts(), partCount(), bits);
+}
+
+/* Shift the significand left BITS bits, subtract BITS from its exponent.  */
+void
+APFloat::shiftSignificandLeft(unsigned int bits)
+{
+  assert(bits < semantics->precision);
+
+  if (bits) {
+    unsigned int partsCount = partCount();
+
+    APInt::tcShiftLeft(significandParts(), partsCount, bits);
+    exponent -= bits;
+
+    assert(!APInt::tcIsZero(significandParts(), partsCount));
+  }
+}
+
+APFloat::cmpResult
+APFloat::compareAbsoluteValue(const APFloat &rhs) const
+{
+  int compare;
+
+  assert(semantics == rhs.semantics);
+  assert(category == fcNormal);
+  assert(rhs.category == fcNormal);
+
+  compare = exponent - rhs.exponent;
+
+  /* If exponents are equal, do an unsigned bignum comparison of the
+     significands.  */
+  if (compare == 0)
+    compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
+                               partCount());
+
+  if (compare > 0)
+    return cmpGreaterThan;
+  else if (compare < 0)
+    return cmpLessThan;
+  else
+    return cmpEqual;
+}
+
+/* Handle overflow.  Sign is preserved.  We either become infinity or
+   the largest finite number.  */
+APFloat::opStatus
+APFloat::handleOverflow(roundingMode rounding_mode)
+{
+  /* Infinity?  */
+  if (rounding_mode == rmNearestTiesToEven ||
+      rounding_mode == rmNearestTiesToAway ||
+      (rounding_mode == rmTowardPositive && !sign) ||
+      (rounding_mode == rmTowardNegative && sign)) {
+    category = fcInfinity;
+    return (opStatus) (opOverflow | opInexact);
+  }
+
+  /* Otherwise we become the largest finite number.  */
+  category = fcNormal;
+  exponent = semantics->maxExponent;
+  APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
+                                   semantics->precision);
+
+  return opInexact;
+}
+
+/* Returns TRUE if, when truncating the current number, with BIT the
+   new LSB, with the given lost fraction and rounding mode, the result
+   would need to be rounded away from zero (i.e., by increasing the
+   signficand).  This routine must work for fcZero of both signs, and
+   fcNormal numbers.  */
+bool
+APFloat::roundAwayFromZero(roundingMode rounding_mode,
+                           lostFraction lost_fraction,
+                           unsigned int bit) const
+{
+  /* NaNs and infinities should not have lost fractions.  */
+  assert(category == fcNormal || category == fcZero);
+
+  /* Current callers never pass this so we don't handle it.  */
+  assert(lost_fraction != lfExactlyZero);
+
+  switch (rounding_mode) {
+  default:
+    llvm_unreachable(0);
+
+  case rmNearestTiesToAway:
+    return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
+
+  case rmNearestTiesToEven:
+    if (lost_fraction == lfMoreThanHalf)
+      return true;
+
+    /* Our zeroes don't have a significand to test.  */
+    if (lost_fraction == lfExactlyHalf && category != fcZero)
+      return APInt::tcExtractBit(significandParts(), bit);
+
+    return false;
+
+  case rmTowardZero:
+    return false;
+
+  case rmTowardPositive:
+    return sign == false;
+
+  case rmTowardNegative:
+    return sign == true;
+  }
+}
+
+APFloat::opStatus
+APFloat::normalize(roundingMode rounding_mode,
+                   lostFraction lost_fraction)
+{
+  unsigned int omsb;                /* One, not zero, based MSB.  */
+  int exponentChange;
+
+  if (category != fcNormal)
+    return opOK;
+
+  /* Before rounding normalize the exponent of fcNormal numbers.  */
+  omsb = significandMSB() + 1;
+
+  if (omsb) {
+    /* OMSB is numbered from 1.  We want to place it in the integer
+       bit numbered PRECISON if possible, with a compensating change in
+       the exponent.  */
+    exponentChange = omsb - semantics->precision;
+
+    /* If the resulting exponent is too high, overflow according to
+       the rounding mode.  */
+    if (exponent + exponentChange > semantics->maxExponent)
+      return handleOverflow(rounding_mode);
+
+    /* Subnormal numbers have exponent minExponent, and their MSB
+       is forced based on that.  */
+    if (exponent + exponentChange < semantics->minExponent)
+      exponentChange = semantics->minExponent - exponent;
+
+    /* Shifting left is easy as we don't lose precision.  */
+    if (exponentChange < 0) {
+      assert(lost_fraction == lfExactlyZero);
+
+      shiftSignificandLeft(-exponentChange);
+
+      return opOK;
+    }
+
+    if (exponentChange > 0) {
+      lostFraction lf;
+
+      /* Shift right and capture any new lost fraction.  */
+      lf = shiftSignificandRight(exponentChange);
+
+      lost_fraction = combineLostFractions(lf, lost_fraction);
+
+      /* Keep OMSB up-to-date.  */
+      if (omsb > (unsigned) exponentChange)
+        omsb -= exponentChange;
+      else
+        omsb = 0;
+    }
+  }
+
+  /* Now round the number according to rounding_mode given the lost
+     fraction.  */
+
+  /* As specified in IEEE 754, since we do not trap we do not report
+     underflow for exact results.  */
+  if (lost_fraction == lfExactlyZero) {
+    /* Canonicalize zeroes.  */
+    if (omsb == 0)
+      category = fcZero;
+
+    return opOK;
+  }
+
+  /* Increment the significand if we're rounding away from zero.  */
+  if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
+    if (omsb == 0)
+      exponent = semantics->minExponent;
+
+    incrementSignificand();
+    omsb = significandMSB() + 1;
+
+    /* Did the significand increment overflow?  */
+    if (omsb == (unsigned) semantics->precision + 1) {
+      /* Renormalize by incrementing the exponent and shifting our
+         significand right one.  However if we already have the
+         maximum exponent we overflow to infinity.  */
+      if (exponent == semantics->maxExponent) {
+        category = fcInfinity;
+
+        return (opStatus) (opOverflow | opInexact);
+      }
+
+      shiftSignificandRight(1);
+
+      return opInexact;
+    }
+  }
+
+  /* The normal case - we were and are not denormal, and any
+     significand increment above didn't overflow.  */
+  if (omsb == semantics->precision)
+    return opInexact;
+
+  /* We have a non-zero denormal.  */
+  assert(omsb < semantics->precision);
+
+  /* Canonicalize zeroes.  */
+  if (omsb == 0)
+    category = fcZero;
+
+  /* The fcZero case is a denormal that underflowed to zero.  */
+  return (opStatus) (opUnderflow | opInexact);
+}
+
+APFloat::opStatus
+APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcNormal, fcZero):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcZero):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcInfinity):
+  case convolve(fcZero, fcInfinity):
+    category = fcInfinity;
+    sign = rhs.sign ^ subtract;
+    return opOK;
+
+  case convolve(fcZero, fcNormal):
+    assign(rhs);
+    sign = rhs.sign ^ subtract;
+    return opOK;
+
+  case convolve(fcZero, fcZero):
+    /* Sign depends on rounding mode; handled by caller.  */
+    return opOK;
+
+  case convolve(fcInfinity, fcInfinity):
+    /* Differently signed infinities can only be validly
+       subtracted.  */
+    if (((sign ^ rhs.sign)!=0) != subtract) {
+      makeNaN();
+      return opInvalidOp;
+    }
+
+    return opOK;
+
+  case convolve(fcNormal, fcNormal):
+    return opDivByZero;
+  }
+}
+
+/* Add or subtract two normal numbers.  */
+lostFraction
+APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
+{
+  integerPart carry;
+  lostFraction lost_fraction;
+  int bits;
+
+  /* Determine if the operation on the absolute values is effectively
+     an addition or subtraction.  */
+  subtract ^= (sign ^ rhs.sign) ? true : false;
+
+  /* Are we bigger exponent-wise than the RHS?  */
+  bits = exponent - rhs.exponent;
+
+  /* Subtraction is more subtle than one might naively expect.  */
+  if (subtract) {
+    APFloat temp_rhs(rhs);
+    bool reverse;
+
+    if (bits == 0) {
+      reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
+      lost_fraction = lfExactlyZero;
+    } else if (bits > 0) {
+      lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
+      shiftSignificandLeft(1);
+      reverse = false;
+    } else {
+      lost_fraction = shiftSignificandRight(-bits - 1);
+      temp_rhs.shiftSignificandLeft(1);
+      reverse = true;
+    }
+
+    if (reverse) {
+      carry = temp_rhs.subtractSignificand
+        (*this, lost_fraction != lfExactlyZero);
+      copySignificand(temp_rhs);
+      sign = !sign;
+    } else {
+      carry = subtractSignificand
+        (temp_rhs, lost_fraction != lfExactlyZero);
+    }
+
+    /* Invert the lost fraction - it was on the RHS and
+       subtracted.  */
+    if (lost_fraction == lfLessThanHalf)
+      lost_fraction = lfMoreThanHalf;
+    else if (lost_fraction == lfMoreThanHalf)
+      lost_fraction = lfLessThanHalf;
+
+    /* The code above is intended to ensure that no borrow is
+       necessary.  */
+    assert(!carry);
+  } else {
+    if (bits > 0) {
+      APFloat temp_rhs(rhs);
+
+      lost_fraction = temp_rhs.shiftSignificandRight(bits);
+      carry = addSignificand(temp_rhs);
+    } else {
+      lost_fraction = shiftSignificandRight(-bits);
+      carry = addSignificand(rhs);
+    }
+
+    /* We have a guard bit; generating a carry cannot happen.  */
+    assert(!carry);
+  }
+
+  return lost_fraction;
+}
+
+APFloat::opStatus
+APFloat::multiplySpecials(const APFloat &rhs)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcInfinity):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcInfinity):
+    category = fcInfinity;
+    return opOK;
+
+  case convolve(fcZero, fcNormal):
+  case convolve(fcNormal, fcZero):
+  case convolve(fcZero, fcZero):
+    category = fcZero;
+    return opOK;
+
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcInfinity, fcZero):
+    makeNaN();
+    return opInvalidOp;
+
+  case convolve(fcNormal, fcNormal):
+    return opOK;
+  }
+}
+
+APFloat::opStatus
+APFloat::divideSpecials(const APFloat &rhs)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcInfinity, fcZero):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcZero, fcNormal):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcInfinity):
+    category = fcZero;
+    return opOK;
+
+  case convolve(fcNormal, fcZero):
+    category = fcInfinity;
+    return opDivByZero;
+
+  case convolve(fcInfinity, fcInfinity):
+  case convolve(fcZero, fcZero):
+    makeNaN();
+    return opInvalidOp;
+
+  case convolve(fcNormal, fcNormal):
+    return opOK;
+  }
+}
+
+APFloat::opStatus
+APFloat::modSpecials(const APFloat &rhs)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcZero, fcNormal):
+  case convolve(fcNormal, fcInfinity):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcZero):
+  case convolve(fcInfinity, fcZero):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcInfinity):
+  case convolve(fcZero, fcZero):
+    makeNaN();
+    return opInvalidOp;
+
+  case convolve(fcNormal, fcNormal):
+    return opOK;
+  }
+}
+
+/* Change sign.  */
+void
+APFloat::changeSign()
+{
+  /* Look mummy, this one's easy.  */
+  sign = !sign;
+}
+
+void
+APFloat::clearSign()
+{
+  /* So is this one. */
+  sign = 0;
+}
+
+void
+APFloat::copySign(const APFloat &rhs)
+{
+  /* And this one. */
+  sign = rhs.sign;
+}
+
+/* Normalized addition or subtraction.  */
+APFloat::opStatus
+APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
+                       bool subtract)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+
+  fs = addOrSubtractSpecials(rhs, subtract);
+
+  /* This return code means it was not a simple case.  */
+  if (fs == opDivByZero) {
+    lostFraction lost_fraction;
+
+    lost_fraction = addOrSubtractSignificand(rhs, subtract);
+    fs = normalize(rounding_mode, lost_fraction);
+
+    /* Can only be zero if we lost no fraction.  */
+    assert(category != fcZero || lost_fraction == lfExactlyZero);
+  }
+
+  /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+     positive zero unless rounding to minus infinity, except that
+     adding two like-signed zeroes gives that zero.  */
+  if (category == fcZero) {
+    if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
+      sign = (rounding_mode == rmTowardNegative);
+  }
+
+  return fs;
+}
+
+/* Normalized addition.  */
+APFloat::opStatus
+APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
+{
+  return addOrSubtract(rhs, rounding_mode, false);
+}
+
+/* Normalized subtraction.  */
+APFloat::opStatus
+APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
+{
+  return addOrSubtract(rhs, rounding_mode, true);
+}
+
+/* Normalized multiply.  */
+APFloat::opStatus
+APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+  sign ^= rhs.sign;
+  fs = multiplySpecials(rhs);
+
+  if (category == fcNormal) {
+    lostFraction lost_fraction = multiplySignificand(rhs, 0);
+    fs = normalize(rounding_mode, lost_fraction);
+    if (lost_fraction != lfExactlyZero)
+      fs = (opStatus) (fs | opInexact);
+  }
+
+  return fs;
+}
+
+/* Normalized divide.  */
+APFloat::opStatus
+APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+  sign ^= rhs.sign;
+  fs = divideSpecials(rhs);
+
+  if (category == fcNormal) {
+    lostFraction lost_fraction = divideSignificand(rhs);
+    fs = normalize(rounding_mode, lost_fraction);
+    if (lost_fraction != lfExactlyZero)
+      fs = (opStatus) (fs | opInexact);
+  }
+
+  return fs;
+}
+
+/* Normalized remainder.  This is not currently correct in all cases.  */
+APFloat::opStatus
+APFloat::remainder(const APFloat &rhs)
+{
+  opStatus fs;
+  APFloat V = *this;
+  unsigned int origSign = sign;
+
+  assertArithmeticOK(*semantics);
+  fs = V.divide(rhs, rmNearestTiesToEven);
+  if (fs == opDivByZero)
+    return fs;
+
+  int parts = partCount();
+  integerPart *x = new integerPart[parts];
+  bool ignored;
+  fs = V.convertToInteger(x, parts * integerPartWidth, true,
+                          rmNearestTiesToEven, &ignored);
+  if (fs==opInvalidOp)
+    return fs;
+
+  fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+                                        rmNearestTiesToEven);
+  assert(fs==opOK);   // should always work
+
+  fs = V.multiply(rhs, rmNearestTiesToEven);
+  assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
+
+  fs = subtract(V, rmNearestTiesToEven);
+  assert(fs==opOK || fs==opInexact);   // likewise
+
+  if (isZero())
+    sign = origSign;    // IEEE754 requires this
+  delete[] x;
+  return fs;
+}
+
+/* Normalized llvm frem (C fmod).
+   This is not currently correct in all cases.  */
+APFloat::opStatus
+APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
+{
+  opStatus fs;
+  assertArithmeticOK(*semantics);
+  fs = modSpecials(rhs);
+
+  if (category == fcNormal && rhs.category == fcNormal) {
+    APFloat V = *this;
+    unsigned int origSign = sign;
+
+    fs = V.divide(rhs, rmNearestTiesToEven);
+    if (fs == opDivByZero)
+      return fs;
+
+    int parts = partCount();
+    integerPart *x = new integerPart[parts];
+    bool ignored;
+    fs = V.convertToInteger(x, parts * integerPartWidth, true,
+                            rmTowardZero, &ignored);
+    if (fs==opInvalidOp)
+      return fs;
+
+    fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+                                          rmNearestTiesToEven);
+    assert(fs==opOK);   // should always work
+
+    fs = V.multiply(rhs, rounding_mode);
+    assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
+
+    fs = subtract(V, rounding_mode);
+    assert(fs==opOK || fs==opInexact);   // likewise
+
+    if (isZero())
+      sign = origSign;    // IEEE754 requires this
+    delete[] x;
+  }
+  return fs;
+}
+
+/* Normalized fused-multiply-add.  */
+APFloat::opStatus
+APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
+                          const APFloat &addend,
+                          roundingMode rounding_mode)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+
+  /* Post-multiplication sign, before addition.  */
+  sign ^= multiplicand.sign;
+
+  /* If and only if all arguments are normal do we need to do an
+     extended-precision calculation.  */
+  if (category == fcNormal &&
+      multiplicand.category == fcNormal &&
+      addend.category == fcNormal) {
+    lostFraction lost_fraction;
+
+    lost_fraction = multiplySignificand(multiplicand, &addend);
+    fs = normalize(rounding_mode, lost_fraction);
+    if (lost_fraction != lfExactlyZero)
+      fs = (opStatus) (fs | opInexact);
+
+    /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+       positive zero unless rounding to minus infinity, except that
+       adding two like-signed zeroes gives that zero.  */
+    if (category == fcZero && sign != addend.sign)
+      sign = (rounding_mode == rmTowardNegative);
+  } else {
+    fs = multiplySpecials(multiplicand);
+
+    /* FS can only be opOK or opInvalidOp.  There is no more work
+       to do in the latter case.  The IEEE-754R standard says it is
+       implementation-defined in this case whether, if ADDEND is a
+       quiet NaN, we raise invalid op; this implementation does so.
+
+       If we need to do the addition we can do so with normal
+       precision.  */
+    if (fs == opOK)
+      fs = addOrSubtract(addend, rounding_mode, false);
+  }
+
+  return fs;
+}
+
+/* Comparison requires normalized numbers.  */
+APFloat::cmpResult
+APFloat::compare(const APFloat &rhs) const
+{
+  cmpResult result;
+
+  assertArithmeticOK(*semantics);
+  assert(semantics == rhs.semantics);
+
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    return cmpUnordered;
+
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcZero):
+  case convolve(fcNormal, fcZero):
+    if (sign)
+      return cmpLessThan;
+    else
+      return cmpGreaterThan;
+
+  case convolve(fcNormal, fcInfinity):
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcZero, fcNormal):
+    if (rhs.sign)
+      return cmpGreaterThan;
+    else
+      return cmpLessThan;
+
+  case convolve(fcInfinity, fcInfinity):
+    if (sign == rhs.sign)
+      return cmpEqual;
+    else if (sign)
+      return cmpLessThan;
+    else
+      return cmpGreaterThan;
+
+  case convolve(fcZero, fcZero):
+    return cmpEqual;
+
+  case convolve(fcNormal, fcNormal):
+    break;
+  }
+
+  /* Two normal numbers.  Do they have the same sign?  */
+  if (sign != rhs.sign) {
+    if (sign)
+      result = cmpLessThan;
+    else
+      result = cmpGreaterThan;
+  } else {
+    /* Compare absolute values; invert result if negative.  */
+    result = compareAbsoluteValue(rhs);
+
+    if (sign) {
+      if (result == cmpLessThan)
+        result = cmpGreaterThan;
+      else if (result == cmpGreaterThan)
+        result = cmpLessThan;
+    }
+  }
+
+  return result;
+}
+
+/// APFloat::convert - convert a value of one floating point type to another.
+/// The return value corresponds to the IEEE754 exceptions.  *losesInfo
+/// records whether the transformation lost information, i.e. whether
+/// converting the result back to the original type will produce the
+/// original value (this is almost the same as return value==fsOK, but there
+/// are edge cases where this is not so).
+
+APFloat::opStatus
+APFloat::convert(const fltSemantics &toSemantics,
+                 roundingMode rounding_mode, bool *losesInfo)
+{
+  lostFraction lostFraction;
+  unsigned int newPartCount, oldPartCount;
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+  assertArithmeticOK(toSemantics);
+  lostFraction = lfExactlyZero;
+  newPartCount = partCountForBits(toSemantics.precision + 1);
+  oldPartCount = partCount();
+
+  /* Handle storage complications.  If our new form is wider,
+     re-allocate our bit pattern into wider storage.  If it is
+     narrower, we ignore the excess parts, but if narrowing to a
+     single part we need to free the old storage.
+     Be careful not to reference significandParts for zeroes
+     and infinities, since it aborts.  */
+  if (newPartCount > oldPartCount) {
+    integerPart *newParts;
+    newParts = new integerPart[newPartCount];
+    APInt::tcSet(newParts, 0, newPartCount);
+    if (category==fcNormal || category==fcNaN)
+      APInt::tcAssign(newParts, significandParts(), oldPartCount);
+    freeSignificand();
+    significand.parts = newParts;
+  } else if (newPartCount < oldPartCount) {
+    /* Capture any lost fraction through truncation of parts so we get
+       correct rounding whilst normalizing.  */
+    if (category==fcNormal)
+      lostFraction = lostFractionThroughTruncation
+        (significandParts(), oldPartCount, toSemantics.precision);
+    if (newPartCount == 1) {
+        integerPart newPart = 0;
+        if (category==fcNormal || category==fcNaN)
+          newPart = significandParts()[0];
+        freeSignificand();
+        significand.part = newPart;
+    }
+  }
+
+  if (category == fcNormal) {
+    /* Re-interpret our bit-pattern.  */
+    exponent += toSemantics.precision - semantics->precision;
+    semantics = &toSemantics;
+    fs = normalize(rounding_mode, lostFraction);
+    *losesInfo = (fs != opOK);
+  } else if (category == fcNaN) {
+    int shift = toSemantics.precision - semantics->precision;
+    // Do this now so significandParts gets the right answer
+    const fltSemantics *oldSemantics = semantics;
+    semantics = &toSemantics;
+    *losesInfo = false;
+    // No normalization here, just truncate
+    if (shift>0)
+      APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+    else if (shift < 0) {
+      unsigned ushift = -shift;
+      // Figure out if we are losing information.  This happens
+      // if are shifting out something other than 0s, or if the x87 long
+      // double input did not have its integer bit set (pseudo-NaN), or if the
+      // x87 long double input did not have its QNan bit set (because the x87
+      // hardware sets this bit when converting a lower-precision NaN to
+      // x87 long double).
+      if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
+        *losesInfo = true;
+      if (oldSemantics == &APFloat::x87DoubleExtended &&
+          (!(*significandParts() & 0x8000000000000000ULL) ||
+           !(*significandParts() & 0x4000000000000000ULL)))
+        *losesInfo = true;
+      APInt::tcShiftRight(significandParts(), newPartCount, ushift);
+    }
+    // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
+    // does not give you back the same bits.  This is dubious, and we
+    // don't currently do it.  You're really supposed to get
+    // an invalid operation signal at runtime, but nobody does that.
+    fs = opOK;
+  } else {
+    semantics = &toSemantics;
+    fs = opOK;
+    *losesInfo = false;
+  }
+
+  return fs;
+}
+
+/* Convert a floating point number to an integer according to the
+   rounding mode.  If the rounded integer value is out of range this
+   returns an invalid operation exception and the contents of the
+   destination parts are unspecified.  If the rounded value is in
+   range but the floating point number is not the exact integer, the C
+   standard doesn't require an inexact exception to be raised.  IEEE
+   854 does require it so we do that.
+
+   Note that for conversions to integer type the C standard requires
+   round-to-zero to always be used.  */
+APFloat::opStatus
+APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
+                                      bool isSigned,
+                                      roundingMode rounding_mode,
+                                      bool *isExact) const
+{
+  lostFraction lost_fraction;
+  const integerPart *src;
+  unsigned int dstPartsCount, truncatedBits;
+
+  assertArithmeticOK(*semantics);
+
+  *isExact = false;
+
+  /* Handle the three special cases first.  */
+  if (category == fcInfinity || category == fcNaN)
+    return opInvalidOp;
+
+  dstPartsCount = partCountForBits(width);
+
+  if (category == fcZero) {
+    APInt::tcSet(parts, 0, dstPartsCount);
+    // Negative zero can't be represented as an int.
+    *isExact = !sign;
+    return opOK;
+  }
+
+  src = significandParts();
+
+  /* Step 1: place our absolute value, with any fraction truncated, in
+     the destination.  */
+  if (exponent < 0) {
+    /* Our absolute value is less than one; truncate everything.  */
+    APInt::tcSet(parts, 0, dstPartsCount);
+    /* For exponent -1 the integer bit represents .5, look at that.
+       For smaller exponents leftmost truncated bit is 0. */
+    truncatedBits = semantics->precision -1U - exponent;
+  } else {
+    /* We want the most significant (exponent + 1) bits; the rest are
+       truncated.  */
+    unsigned int bits = exponent + 1U;
+
+    /* Hopelessly large in magnitude?  */
+    if (bits > width)
+      return opInvalidOp;
+
+    if (bits < semantics->precision) {
+      /* We truncate (semantics->precision - bits) bits.  */
+      truncatedBits = semantics->precision - bits;
+      APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
+    } else {
+      /* We want at least as many bits as are available.  */
+      APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
+      APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
+      truncatedBits = 0;
+    }
+  }
+
+  /* Step 2: work out any lost fraction, and increment the absolute
+     value if we would round away from zero.  */
+  if (truncatedBits) {
+    lost_fraction = lostFractionThroughTruncation(src, partCount(),
+                                                  truncatedBits);
+    if (lost_fraction != lfExactlyZero &&
+        roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
+      if (APInt::tcIncrement(parts, dstPartsCount))
+        return opInvalidOp;     /* Overflow.  */
+    }
+  } else {
+    lost_fraction = lfExactlyZero;
+  }
+
+  /* Step 3: check if we fit in the destination.  */
+  unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
+
+  if (sign) {
+    if (!isSigned) {
+      /* Negative numbers cannot be represented as unsigned.  */
+      if (omsb != 0)
+        return opInvalidOp;
+    } else {
+      /* It takes omsb bits to represent the unsigned integer value.
+         We lose a bit for the sign, but care is needed as the
+         maximally negative integer is a special case.  */
+      if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
+        return opInvalidOp;
+
+      /* This case can happen because of rounding.  */
+      if (omsb > width)
+        return opInvalidOp;
+    }
+
+    APInt::tcNegate (parts, dstPartsCount);
+  } else {
+    if (omsb >= width + !isSigned)
+      return opInvalidOp;
+  }
+
+  if (lost_fraction == lfExactlyZero) {
+    *isExact = true;
+    return opOK;
+  } else
+    return opInexact;
+}
+
+/* Same as convertToSignExtendedInteger, except we provide
+   deterministic values in case of an invalid operation exception,
+   namely zero for NaNs and the minimal or maximal value respectively
+   for underflow or overflow.
+   The *isExact output tells whether the result is exact, in the sense
+   that converting it back to the original floating point type produces
+   the original value.  This is almost equivalent to result==opOK,
+   except for negative zeroes.
+*/
+APFloat::opStatus
+APFloat::convertToInteger(integerPart *parts, unsigned int width,
+                          bool isSigned,
+                          roundingMode rounding_mode, bool *isExact) const
+{
+  opStatus fs;
+
+  fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
+                                    isExact);
+
+  if (fs == opInvalidOp) {
+    unsigned int bits, dstPartsCount;
+
+    dstPartsCount = partCountForBits(width);
+
+    if (category == fcNaN)
+      bits = 0;
+    else if (sign)
+      bits = isSigned;
+    else
+      bits = width - isSigned;
+
+    APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
+    if (sign && isSigned)
+      APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
+  }
+
+  return fs;
+}
+
+/* Same as convertToInteger(integerPart*, ...), except the result is returned in
+   an APSInt, whose initial bit-width and signed-ness are used to determine the
+   precision of the conversion.
+ */
+APFloat::opStatus
+APFloat::convertToInteger(APSInt &result,
+                          roundingMode rounding_mode, bool *isExact) const
+{
+  unsigned bitWidth = result.getBitWidth();
+  SmallVector<uint64_t, 4> parts(result.getNumWords());
+  opStatus status = convertToInteger(
+    parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
+  // Keeps the original signed-ness.
+  result = APInt(bitWidth, (unsigned)parts.size(), parts.data());
+  return status;
+}
+
+/* Convert an unsigned integer SRC to a floating point number,
+   rounding according to ROUNDING_MODE.  The sign of the floating
+   point number is not modified.  */
+APFloat::opStatus
+APFloat::convertFromUnsignedParts(const integerPart *src,
+                                  unsigned int srcCount,
+                                  roundingMode rounding_mode)
+{
+  unsigned int omsb, precision, dstCount;
+  integerPart *dst;
+  lostFraction lost_fraction;
+
+  assertArithmeticOK(*semantics);
+  category = fcNormal;
+  omsb = APInt::tcMSB(src, srcCount) + 1;
+  dst = significandParts();
+  dstCount = partCount();
+  precision = semantics->precision;
+
+  /* We want the most significant PRECISON bits of SRC.  There may not
+     be that many; extract what we can.  */
+  if (precision <= omsb) {
+    exponent = omsb - 1;
+    lost_fraction = lostFractionThroughTruncation(src, srcCount,
+                                                  omsb - precision);
+    APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
+  } else {
+    exponent = precision - 1;
+    lost_fraction = lfExactlyZero;
+    APInt::tcExtract(dst, dstCount, src, omsb, 0);
+  }
+
+  return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::convertFromAPInt(const APInt &Val,
+                          bool isSigned,
+                          roundingMode rounding_mode)
+{
+  unsigned int partCount = Val.getNumWords();
+  APInt api = Val;
+
+  sign = false;
+  if (isSigned && api.isNegative()) {
+    sign = true;
+    api = -api;
+  }
+
+  return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+/* Convert a two's complement integer SRC to a floating point number,
+   rounding according to ROUNDING_MODE.  ISSIGNED is true if the
+   integer is signed, in which case it must be sign-extended.  */
+APFloat::opStatus
+APFloat::convertFromSignExtendedInteger(const integerPart *src,
+                                        unsigned int srcCount,
+                                        bool isSigned,
+                                        roundingMode rounding_mode)
+{
+  opStatus status;
+
+  assertArithmeticOK(*semantics);
+  if (isSigned &&
+      APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
+    integerPart *copy;
+
+    /* If we're signed and negative negate a copy.  */
+    sign = true;
+    copy = new integerPart[srcCount];
+    APInt::tcAssign(copy, src, srcCount);
+    APInt::tcNegate(copy, srcCount);
+    status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
+    delete [] copy;
+  } else {
+    sign = false;
+    status = convertFromUnsignedParts(src, srcCount, rounding_mode);
+  }
+
+  return status;
+}
+
+/* FIXME: should this just take a const APInt reference?  */
+APFloat::opStatus
+APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
+                                        unsigned int width, bool isSigned,
+                                        roundingMode rounding_mode)
+{
+  unsigned int partCount = partCountForBits(width);
+  APInt api = APInt(width, partCount, parts);
+
+  sign = false;
+  if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
+    sign = true;
+    api = -api;
+  }
+
+  return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+APFloat::opStatus
+APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
+{
+  lostFraction lost_fraction = lfExactlyZero;
+  integerPart *significand;
+  unsigned int bitPos, partsCount;
+  StringRef::iterator dot, firstSignificantDigit;
+
+  zeroSignificand();
+  exponent = 0;
+  category = fcNormal;
+
+  significand = significandParts();
+  partsCount = partCount();
+  bitPos = partsCount * integerPartWidth;
+
+  /* Skip leading zeroes and any (hexa)decimal point.  */
+  StringRef::iterator begin = s.begin();
+  StringRef::iterator end = s.end();
+  StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+  firstSignificantDigit = p;
+
+  for (; p != end;) {
+    integerPart hex_value;
+
+    if (*p == '.') {
+      assert(dot == end && "String contains multiple dots");
+      dot = p++;
+      if (p == end) {
+        break;
+      }
+    }
+
+    hex_value = hexDigitValue(*p);
+    if (hex_value == -1U) {
+      break;
+    }
+
+    p++;
+
+    if (p == end) {
+      break;
+    } else {
+      /* Store the number whilst 4-bit nibbles remain.  */
+      if (bitPos) {
+        bitPos -= 4;
+        hex_value <<= bitPos % integerPartWidth;
+        significand[bitPos / integerPartWidth] |= hex_value;
+      } else {
+        lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
+        while (p != end && hexDigitValue(*p) != -1U)
+          p++;
+        break;
+      }
+    }
+  }
+
+  /* Hex floats require an exponent but not a hexadecimal point.  */
+  assert(p != end && "Hex strings require an exponent");
+  assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
+  assert(p != begin && "Significand has no digits");
+  assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+  /* Ignore the exponent if we are zero.  */
+  if (p != firstSignificantDigit) {
+    int expAdjustment;
+
+    /* Implicit hexadecimal point?  */
+    if (dot == end)
+      dot = p;
+
+    /* Calculate the exponent adjustment implicit in the number of
+       significant digits.  */
+    expAdjustment = static_cast<int>(dot - firstSignificantDigit);
+    if (expAdjustment < 0)
+      expAdjustment++;
+    expAdjustment = expAdjustment * 4 - 1;
+
+    /* Adjust for writing the significand starting at the most
+       significant nibble.  */
+    expAdjustment += semantics->precision;
+    expAdjustment -= partsCount * integerPartWidth;
+
+    /* Adjust for the given exponent.  */
+    exponent = totalExponent(p + 1, end, expAdjustment);
+  }
+
+  return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+                                      unsigned sigPartCount, int exp,
+                                      roundingMode rounding_mode)
+{
+  unsigned int parts, pow5PartCount;
+  fltSemantics calcSemantics = { 32767, -32767, 0, true };
+  integerPart pow5Parts[maxPowerOfFiveParts];
+  bool isNearest;
+
+  isNearest = (rounding_mode == rmNearestTiesToEven ||
+               rounding_mode == rmNearestTiesToAway);
+
+  parts = partCountForBits(semantics->precision + 11);
+
+  /* Calculate pow(5, abs(exp)).  */
+  pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
+
+  for (;; parts *= 2) {
+    opStatus sigStatus, powStatus;
+    unsigned int excessPrecision, truncatedBits;
+
+    calcSemantics.precision = parts * integerPartWidth - 1;
+    excessPrecision = calcSemantics.precision - semantics->precision;
+    truncatedBits = excessPrecision;
+
+    APFloat decSig(calcSemantics, fcZero, sign);
+    APFloat pow5(calcSemantics, fcZero, false);
+
+    sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
+                                                rmNearestTiesToEven);
+    powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
+                                              rmNearestTiesToEven);
+    /* Add exp, as 10^n = 5^n * 2^n.  */
+    decSig.exponent += exp;
+
+    lostFraction calcLostFraction;
+    integerPart HUerr, HUdistance;
+    unsigned int powHUerr;
+
+    if (exp >= 0) {
+      /* multiplySignificand leaves the precision-th bit set to 1.  */
+      calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+      powHUerr = powStatus != opOK;
+    } else {
+      calcLostFraction = decSig.divideSignificand(pow5);
+      /* Denormal numbers have less precision.  */
+      if (decSig.exponent < semantics->minExponent) {
+        excessPrecision += (semantics->minExponent - decSig.exponent);
+        truncatedBits = excessPrecision;
+        if (excessPrecision > calcSemantics.precision)
+          excessPrecision = calcSemantics.precision;
+      }
+      /* Extra half-ulp lost in reciprocal of exponent.  */
+      powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
+    }
+
+    /* Both multiplySignificand and divideSignificand return the
+       result with the integer bit set.  */
+    assert(APInt::tcExtractBit
+           (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+
+    HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
+                       powHUerr);
+    HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
+                                      excessPrecision, isNearest);
+
+    /* Are we guaranteed to round correctly if we truncate?  */
+    if (HUdistance >= HUerr) {
+      APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
+                       calcSemantics.precision - excessPrecision,
+                       excessPrecision);
+      /* Take the exponent of decSig.  If we tcExtract-ed less bits
+         above we must adjust our exponent to compensate for the
+         implicit right shift.  */
+      exponent = (decSig.exponent + semantics->precision
+                  - (calcSemantics.precision - excessPrecision));
+      calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
+                                                       decSig.partCount(),
+                                                       truncatedBits);
+      return normalize(rounding_mode, calcLostFraction);
+    }
+  }
+}
+
+APFloat::opStatus
+APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
+{
+  decimalInfo D;
+  opStatus fs;
+
+  /* Scan the text.  */
+  StringRef::iterator p = str.begin();
+  interpretDecimal(p, str.end(), &D);
+
+  /* Handle the quick cases.  First the case of no significant digits,
+     i.e. zero, and then exponents that are obviously too large or too
+     small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
+     definitely overflows if
+
+           (exp - 1) * L >= maxExponent
+
+     and definitely underflows to zero where
+
+           (exp + 1) * L <= minExponent - precision
+
+     With integer arithmetic the tightest bounds for L are
+
+           93/28 < L < 196/59            [ numerator <= 256 ]
+           42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
+  */
+
+  if (decDigitValue(*D.firstSigDigit) >= 10U) {
+    category = fcZero;
+    fs = opOK;
+
+  /* Check whether the normalized exponent is high enough to overflow
+     max during the log-rebasing in the max-exponent check below. */
+  } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
+    fs = handleOverflow(rounding_mode);
+
+  /* If it wasn't, then it also wasn't high enough to overflow max
+     during the log-rebasing in the min-exponent check.  Check that it
+     won't overflow min in either check, then perform the min-exponent
+     check. */
+  } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
+             (D.normalizedExponent + 1) * 28738 <=
+               8651 * (semantics->minExponent - (int) semantics->precision)) {
+    /* Underflow to zero and round.  */
+    zeroSignificand();
+    fs = normalize(rounding_mode, lfLessThanHalf);
+
+  /* We can finally safely perform the max-exponent check. */
+  } else if ((D.normalizedExponent - 1) * 42039
+             >= 12655 * semantics->maxExponent) {
+    /* Overflow and round.  */
+    fs = handleOverflow(rounding_mode);
+  } else {
+    integerPart *decSignificand;
+    unsigned int partCount;
+
+    /* A tight upper bound on number of bits required to hold an
+       N-digit decimal integer is N * 196 / 59.  Allocate enough space
+       to hold the full significand, and an extra part required by
+       tcMultiplyPart.  */
+    partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
+    partCount = partCountForBits(1 + 196 * partCount / 59);
+    decSignificand = new integerPart[partCount + 1];
+    partCount = 0;
+
+    /* Convert to binary efficiently - we do almost all multiplication
+       in an integerPart.  When this would overflow do we do a single
+       bignum multiplication, and then revert again to multiplication
+       in an integerPart.  */
+    do {
+      integerPart decValue, val, multiplier;
+
+      val = 0;
+      multiplier = 1;
+
+      do {
+        if (*p == '.') {
+          p++;
+          if (p == str.end()) {
+            break;
+          }
+        }
+        decValue = decDigitValue(*p++);
+        assert(decValue < 10U && "Invalid character in significand");
+        multiplier *= 10;
+        val = val * 10 + decValue;
+        /* The maximum number that can be multiplied by ten with any
+           digit added without overflowing an integerPart.  */
+      } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
+
+      /* Multiply out the current part.  */
+      APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
+                            partCount, partCount + 1, false);
+
+      /* If we used another part (likely but not guaranteed), increase
+         the count.  */
+      if (decSignificand[partCount])
+        partCount++;
+    } while (p <= D.lastSigDigit);
+
+    category = fcNormal;
+    fs = roundSignificandWithExponent(decSignificand, partCount,
+                                      D.exponent, rounding_mode);
+
+    delete [] decSignificand;
+  }
+
+  return fs;
+}
+
+APFloat::opStatus
+APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
+{
+  assertArithmeticOK(*semantics);
+  assert(!str.empty() && "Invalid string length");
+
+  /* Handle a leading minus sign.  */
+  StringRef::iterator p = str.begin();
+  size_t slen = str.size();
+  sign = *p == '-' ? 1 : 0;
+  if (*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String has no digits");
+  }
+
+  if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+    assert(slen - 2 && "Invalid string");
+    return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
+                                        rounding_mode);
+  }
+
+  return convertFromDecimalString(StringRef(p, slen), rounding_mode);
+}
+
+/* Write out a hexadecimal representation of the floating point value
+   to DST, which must be of sufficient size, in the C99 form
+   [-]0xh.hhhhp[+-]d.  Return the number of characters written,
+   excluding the terminating NUL.
+
+   If UPPERCASE, the output is in upper case, otherwise in lower case.
+
+   HEXDIGITS digits appear altogether, rounding the value if
+   necessary.  If HEXDIGITS is 0, the minimal precision to display the
+   number precisely is used instead.  If nothing would appear after
+   the decimal point it is suppressed.
+
+   The decimal exponent is always printed and has at least one digit.
+   Zero values display an exponent of zero.  Infinities and NaNs
+   appear as "infinity" or "nan" respectively.
+
+   The above rules are as specified by C99.  There is ambiguity about
+   what the leading hexadecimal digit should be.  This implementation
+   uses whatever is necessary so that the exponent is displayed as
+   stored.  This implies the exponent will fall within the IEEE format
+   range, and the leading hexadecimal digit will be 0 (for denormals),
+   1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
+   any other digits zero).
+*/
+unsigned int
+APFloat::convertToHexString(char *dst, unsigned int hexDigits,
+                            bool upperCase, roundingMode rounding_mode) const
+{
+  char *p;
+
+  assertArithmeticOK(*semantics);
+
+  p = dst;
+  if (sign)
+    *dst++ = '-';
+
+  switch (category) {
+  case fcInfinity:
+    memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
+    dst += sizeof infinityL - 1;
+    break;
+
+  case fcNaN:
+    memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
+    dst += sizeof NaNU - 1;
+    break;
+
+  case fcZero:
+    *dst++ = '0';
+    *dst++ = upperCase ? 'X': 'x';
+    *dst++ = '0';
+    if (hexDigits > 1) {
+      *dst++ = '.';
+      memset (dst, '0', hexDigits - 1);
+      dst += hexDigits - 1;
+    }
+    *dst++ = upperCase ? 'P': 'p';
+    *dst++ = '0';
+    break;
+
+  case fcNormal:
+    dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
+    break;
+  }
+
+  *dst = 0;
+
+  return static_cast<unsigned int>(dst - p);
+}
+
+/* Does the hard work of outputting the correctly rounded hexadecimal
+   form of a normal floating point number with the specified number of
+   hexadecimal digits.  If HEXDIGITS is zero the minimum number of
+   digits necessary to print the value precisely is output.  */
+char *
+APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
+                                  bool upperCase,
+                                  roundingMode rounding_mode) const
+{
+  unsigned int count, valueBits, shift, partsCount, outputDigits;
+  const char *hexDigitChars;
+  const integerPart *significand;
+  char *p;
+  bool roundUp;
+
+  *dst++ = '0';
+  *dst++ = upperCase ? 'X': 'x';
+
+  roundUp = false;
+  hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
+
+  significand = significandParts();
+  partsCount = partCount();
+
+  /* +3 because the first digit only uses the single integer bit, so
+     we have 3 virtual zero most-significant-bits.  */
+  valueBits = semantics->precision + 3;
+  shift = integerPartWidth - valueBits % integerPartWidth;
+
+  /* The natural number of digits required ignoring trailing
+     insignificant zeroes.  */
+  outputDigits = (valueBits - significandLSB () + 3) / 4;
+
+  /* hexDigits of zero means use the required number for the
+     precision.  Otherwise, see if we are truncating.  If we are,
+     find out if we need to round away from zero.  */
+  if (hexDigits) {
+    if (hexDigits < outputDigits) {
+      /* We are dropping non-zero bits, so need to check how to round.
+         "bits" is the number of dropped bits.  */
+      unsigned int bits;
+      lostFraction fraction;
+
+      bits = valueBits - hexDigits * 4;
+      fraction = lostFractionThroughTruncation (significand, partsCount, bits);
+      roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
+    }
+    outputDigits = hexDigits;
+  }
+
+  /* Write the digits consecutively, and start writing in the location
+     of the hexadecimal point.  We move the most significant digit
+     left and add the hexadecimal point later.  */
+  p = ++dst;
+
+  count = (valueBits + integerPartWidth - 1) / integerPartWidth;
+
+  while (outputDigits && count) {
+    integerPart part;
+
+    /* Put the most significant integerPartWidth bits in "part".  */
+    if (--count == partsCount)
+      part = 0;  /* An imaginary higher zero part.  */
+    else
+      part = significand[count] << shift;
+
+    if (count && shift)
+      part |= significand[count - 1] >> (integerPartWidth - shift);
+
+    /* Convert as much of "part" to hexdigits as we can.  */
+    unsigned int curDigits = integerPartWidth / 4;
+
+    if (curDigits > outputDigits)
+      curDigits = outputDigits;
+    dst += partAsHex (dst, part, curDigits, hexDigitChars);
+    outputDigits -= curDigits;
+  }
+
+  if (roundUp) {
+    char *q = dst;
+
+    /* Note that hexDigitChars has a trailing '0'.  */
+    do {
+      q--;
+      *q = hexDigitChars[hexDigitValue (*q) + 1];
+    } while (*q == '0');
+    assert(q >= p);
+  } else {
+    /* Add trailing zeroes.  */
+    memset (dst, '0', outputDigits);
+    dst += outputDigits;
+  }
+
+  /* Move the most significant digit to before the point, and if there
+     is something after the decimal point add it.  This must come
+     after rounding above.  */
+  p[-1] = p[0];
+  if (dst -1 == p)
+    dst--;
+  else
+    p[0] = '.';
+
+  /* Finally output the exponent.  */
+  *dst++ = upperCase ? 'P': 'p';
+
+  return writeSignedDecimal (dst, exponent);
+}
+
+// For good performance it is desirable for different APFloats
+// to produce different integers.
+uint32_t
+APFloat::getHashValue() const
+{
+  if (category==fcZero) return sign<<8 | semantics->precision ;
+  else if (category==fcInfinity) return sign<<9 | semantics->precision;
+  else if (category==fcNaN) return 1<<10 | semantics->precision;
+  else {
+    uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
+    const integerPart* p = significandParts();
+    for (int i=partCount(); i>0; i--, p++)
+      hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
+    return hash;
+  }
+}
+
+// Conversion from APFloat to/from host float/double.  It may eventually be
+// possible to eliminate these and have everybody deal with APFloats, but that
+// will take a while.  This approach will not easily extend to long double.
+// Current implementation requires integerPartWidth==64, which is correct at
+// the moment but could be made more general.
+
+// Denormals have exponent minExponent in APFloat, but minExponent-1 in
+// the actual IEEE respresentations.  We compensate for that here.
+
+APInt
+APFloat::convertF80LongDoubleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
+  assert(partCount()==2);
+
+  uint64_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+16383; //bias
+    mysignificand = significandParts()[0];
+    if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7fff;
+    mysignificand = 0x8000000000000000ULL;
+  } else {
+    assert(category == fcNaN && "Unknown category");
+    myexponent = 0x7fff;
+    mysignificand = significandParts()[0];
+  }
+
+  uint64_t words[2];
+  words[0] = mysignificand;
+  words[1] =  ((uint64_t)(sign & 1) << 15) |
+              (myexponent & 0x7fffLL);
+  return APInt(80, 2, words);
+}
+
+APInt
+APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
+  assert(partCount()==2);
+
+  uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
+
+  if (category==fcNormal) {
+    myexponent = exponent + 1023; //bias
+    myexponent2 = exponent2 + 1023;
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+    if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+      myexponent = 0;   // denormal
+    if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
+      myexponent2 = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+    myexponent2 = 0;
+    mysignificand2 = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7ff;
+    myexponent2 = 0;
+    mysignificand = 0;
+    mysignificand2 = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category");
+    myexponent = 0x7ff;
+    mysignificand = significandParts()[0];
+    myexponent2 = exponent2;
+    mysignificand2 = significandParts()[1];
+  }
+
+  uint64_t words[2];
+  words[0] =  ((uint64_t)(sign & 1) << 63) |
+              ((myexponent & 0x7ff) <<  52) |
+              (mysignificand & 0xfffffffffffffLL);
+  words[1] =  ((uint64_t)(sign2 & 1) << 63) |
+              ((myexponent2 & 0x7ff) <<  52) |
+              (mysignificand2 & 0xfffffffffffffLL);
+  return APInt(128, 2, words);
+}
+
+APInt
+APFloat::convertQuadrupleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
+  assert(partCount()==2);
+
+  uint64_t myexponent, mysignificand, mysignificand2;
+
+  if (category==fcNormal) {
+    myexponent = exponent+16383; //bias
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+    if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = mysignificand2 = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7fff;
+    mysignificand = mysignificand2 = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0x7fff;
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+  }
+
+  uint64_t words[2];
+  words[0] = mysignificand;
+  words[1] = ((uint64_t)(sign & 1) << 63) |
+             ((myexponent & 0x7fff) << 48) |
+             (mysignificand2 & 0xffffffffffffLL);
+
+  return APInt(128, 2, words);
+}
+
+APInt
+APFloat::convertDoubleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+  assert(partCount()==1);
+
+  uint64_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+1023; //bias
+    mysignificand = *significandParts();
+    if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7ff;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0x7ff;
+    mysignificand = *significandParts();
+  }
+
+  return APInt(64, ((((uint64_t)(sign & 1) << 63) |
+                     ((myexponent & 0x7ff) <<  52) |
+                     (mysignificand & 0xfffffffffffffLL))));
+}
+
+APInt
+APFloat::convertFloatAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+  assert(partCount()==1);
+
+  uint32_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+127; //bias
+    mysignificand = (uint32_t)*significandParts();
+    if (myexponent == 1 && !(mysignificand & 0x800000))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0xff;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0xff;
+    mysignificand = (uint32_t)*significandParts();
+  }
+
+  return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
+                    (mysignificand & 0x7fffff)));
+}
+
+APInt
+APFloat::convertHalfAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
+  assert(partCount()==1);
+
+  uint32_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+15; //bias
+    mysignificand = (uint32_t)*significandParts();
+    if (myexponent == 1 && !(mysignificand & 0x400))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x1f;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0x1f;
+    mysignificand = (uint32_t)*significandParts();
+  }
+
+  return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
+                    (mysignificand & 0x3ff)));
+}
+
+// This function creates an APInt that is just a bit map of the floating
+// point constant as it would appear in memory.  It is not a conversion,
+// and treating the result as a normal integer is unlikely to be useful.
+
+APInt
+APFloat::bitcastToAPInt() const
+{
+  if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
+    return convertHalfAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
+    return convertFloatAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
+    return convertDoubleAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&IEEEquad)
+    return convertQuadrupleAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
+    return convertPPCDoubleDoubleAPFloatToAPInt();
+
+  assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
+         "unknown format!");
+  return convertF80LongDoubleAPFloatToAPInt();
+}
+
+float
+APFloat::convertToFloat() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
+         "Float semantics are not IEEEsingle");
+  APInt api = bitcastToAPInt();
+  return api.bitsToFloat();
+}
+
+double
+APFloat::convertToDouble() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
+         "Float semantics are not IEEEdouble");
+  APInt api = bitcastToAPInt();
+  return api.bitsToDouble();
+}
+
+/// Integer bit is explicit in this format.  Intel hardware (387 and later)
+/// does not support these bit patterns:
+///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
+///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
+///  exponent = 0, integer bit 1 ("pseudodenormal")
+///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
+/// At the moment, the first two are treated as NaNs, the second two as Normal.
+void
+APFloat::initFromF80LongDoubleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==80);
+  uint64_t i1 = api.getRawData()[0];
+  uint64_t i2 = api.getRawData()[1];
+  uint64_t myexponent = (i2 & 0x7fff);
+  uint64_t mysignificand = i1;
+
+  initialize(&APFloat::x87DoubleExtended);
+  assert(partCount()==2);
+
+  sign = static_cast<unsigned int>(i2>>15);
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
+    // exponent meaningless
+    category = fcNaN;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = 0;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 16383;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = 0;
+    if (myexponent==0)          // denormal
+      exponent = -16382;
+  }
+}
+
+void
+APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==128);
+  uint64_t i1 = api.getRawData()[0];
+  uint64_t i2 = api.getRawData()[1];
+  uint64_t myexponent = (i1 >> 52) & 0x7ff;
+  uint64_t mysignificand = i1 & 0xfffffffffffffLL;
+  uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
+  uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
+
+  initialize(&APFloat::PPCDoubleDouble);
+  assert(partCount()==2);
+
+  sign = static_cast<unsigned int>(i1>>63);
+  sign2 = static_cast<unsigned int>(i2>>63);
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    // exponent2 and significand2 are required to be 0; we don't check
+    category = fcZero;
+  } else if (myexponent==0x7ff && mysignificand==0) {
+    // exponent, significand meaningless
+    // exponent2 and significand2 are required to be 0; we don't check
+    category = fcInfinity;
+  } else if (myexponent==0x7ff && mysignificand!=0) {
+    // exponent meaningless.  So is the whole second word, but keep it
+    // for determinism.
+    category = fcNaN;
+    exponent2 = myexponent2;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+  } else {
+    category = fcNormal;
+    // Note there is no category2; the second word is treated as if it is
+    // fcNormal, although it might be something else considered by itself.
+    exponent = myexponent - 1023;
+    exponent2 = myexponent2 - 1023;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+    if (myexponent==0)          // denormal
+      exponent = -1022;
+    else
+      significandParts()[0] |= 0x10000000000000LL;  // integer bit
+    if (myexponent2==0)
+      exponent2 = -1022;
+    else
+      significandParts()[1] |= 0x10000000000000LL;  // integer bit
+  }
+}
+
+void
+APFloat::initFromQuadrupleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==128);
+  uint64_t i1 = api.getRawData()[0];
+  uint64_t i2 = api.getRawData()[1];
+  uint64_t myexponent = (i2 >> 48) & 0x7fff;
+  uint64_t mysignificand  = i1;
+  uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+
+  initialize(&APFloat::IEEEquad);
+  assert(partCount()==2);
+
+  sign = static_cast<unsigned int>(i2>>63);
+  if (myexponent==0 &&
+      (mysignificand==0 && mysignificand2==0)) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x7fff &&
+             (mysignificand==0 && mysignificand2==0)) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x7fff &&
+             (mysignificand!=0 || mysignificand2 !=0)) {
+    // exponent meaningless
+    category = fcNaN;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 16383;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+    if (myexponent==0)          // denormal
+      exponent = -16382;
+    else
+      significandParts()[1] |= 0x1000000000000LL;  // integer bit
+  }
+}
+
+void
+APFloat::initFromDoubleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==64);
+  uint64_t i = *api.getRawData();
+  uint64_t myexponent = (i >> 52) & 0x7ff;
+  uint64_t mysignificand = i & 0xfffffffffffffLL;
+
+  initialize(&APFloat::IEEEdouble);
+  assert(partCount()==1);
+
+  sign = static_cast<unsigned int>(i>>63);
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x7ff && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x7ff && mysignificand!=0) {
+    // exponent meaningless
+    category = fcNaN;
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 1023;
+    *significandParts() = mysignificand;
+    if (myexponent==0)          // denormal
+      exponent = -1022;
+    else
+      *significandParts() |= 0x10000000000000LL;  // integer bit
+  }
+}
+
+void
+APFloat::initFromFloatAPInt(const APInt & api)
+{
+  assert(api.getBitWidth()==32);
+  uint32_t i = (uint32_t)*api.getRawData();
+  uint32_t myexponent = (i >> 23) & 0xff;
+  uint32_t mysignificand = i & 0x7fffff;
+
+  initialize(&APFloat::IEEEsingle);
+  assert(partCount()==1);
+
+  sign = i >> 31;
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0xff && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0xff && mysignificand!=0) {
+    // sign, exponent, significand meaningless
+    category = fcNaN;
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 127;  //bias
+    *significandParts() = mysignificand;
+    if (myexponent==0)    // denormal
+      exponent = -126;
+    else
+      *significandParts() |= 0x800000; // integer bit
+  }
+}
+
+void
+APFloat::initFromHalfAPInt(const APInt & api)
+{
+  assert(api.getBitWidth()==16);
+  uint32_t i = (uint32_t)*api.getRawData();
+  uint32_t myexponent = (i >> 10) & 0x1f;
+  uint32_t mysignificand = i & 0x3ff;
+
+  initialize(&APFloat::IEEEhalf);
+  assert(partCount()==1);
+
+  sign = i >> 15;
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x1f && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x1f && mysignificand!=0) {
+    // sign, exponent, significand meaningless
+    category = fcNaN;
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 15;  //bias
+    *significandParts() = mysignificand;
+    if (myexponent==0)    // denormal
+      exponent = -14;
+    else
+      *significandParts() |= 0x400; // integer bit
+  }
+}
+
+/// Treat api as containing the bits of a floating point number.  Currently
+/// we infer the floating point type from the size of the APInt.  The
+/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
+/// when the size is anything else).
+void
+APFloat::initFromAPInt(const APInt& api, bool isIEEE)
+{
+  if (api.getBitWidth() == 16)
+    return initFromHalfAPInt(api);
+  else if (api.getBitWidth() == 32)
+    return initFromFloatAPInt(api);
+  else if (api.getBitWidth()==64)
+    return initFromDoubleAPInt(api);
+  else if (api.getBitWidth()==80)
+    return initFromF80LongDoubleAPInt(api);
+  else if (api.getBitWidth()==128)
+    return (isIEEE ?
+            initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
+  else
+    llvm_unreachable(0);
+}
+
+APFloat
+APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
+{
+  return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
+}
+
+APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
+  APFloat Val(Sem, fcNormal, Negative);
+
+  // We want (in interchange format):
+  //   sign = {Negative}
+  //   exponent = 1..10
+  //   significand = 1..1
+
+  Val.exponent = Sem.maxExponent; // unbiased
+
+  // 1-initialize all bits....
+  Val.zeroSignificand();
+  integerPart *significand = Val.significandParts();
+  unsigned N = partCountForBits(Sem.precision);
+  for (unsigned i = 0; i != N; ++i)
+    significand[i] = ~((integerPart) 0);
+
+  // ...and then clear the top bits for internal consistency.
+  significand[N-1] &=
+    (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
+
+  return Val;
+}
+
+APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
+  APFloat Val(Sem, fcNormal, Negative);
+
+  // We want (in interchange format):
+  //   sign = {Negative}
+  //   exponent = 0..0
+  //   significand = 0..01
+
+  Val.exponent = Sem.minExponent; // unbiased
+  Val.zeroSignificand();
+  Val.significandParts()[0] = 1;
+  return Val;
+}
+
+APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
+  APFloat Val(Sem, fcNormal, Negative);
+
+  // We want (in interchange format):
+  //   sign = {Negative}
+  //   exponent = 0..0
+  //   significand = 10..0
+
+  Val.exponent = Sem.minExponent;
+  Val.zeroSignificand();
+  Val.significandParts()[partCountForBits(Sem.precision)-1] |=
+    (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
+
+  return Val;
+}
+
+APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
+  initFromAPInt(api, isIEEE);
+}
+
+APFloat::APFloat(float f) : exponent2(0), sign2(0) {
+  initFromAPInt(APInt::floatToBits(f));
+}
+
+APFloat::APFloat(double d) : exponent2(0), sign2(0) {
+  initFromAPInt(APInt::doubleToBits(d));
+}
+
+namespace {
+  static void append(SmallVectorImpl<char> &Buffer,
+                     unsigned N, const char *Str) {
+    unsigned Start = Buffer.size();
+    Buffer.set_size(Start + N);
+    memcpy(&Buffer[Start], Str, N);
+  }
+
+  template <unsigned N>
+  void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
+    append(Buffer, N, Str);
+  }
+
+  /// Removes data from the given significand until it is no more
+  /// precise than is required for the desired precision.
+  void AdjustToPrecision(APInt &significand,
+                         int &exp, unsigned FormatPrecision) {
+    unsigned bits = significand.getActiveBits();
+
+    // 196/59 is a very slight overestimate of lg_2(10).
+    unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
+
+    if (bits <= bitsRequired) return;
+
+    unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
+    if (!tensRemovable) return;
+
+    exp += tensRemovable;
+
+    APInt divisor(significand.getBitWidth(), 1);
+    APInt powten(significand.getBitWidth(), 10);
+    while (true) {
+      if (tensRemovable & 1)
+        divisor *= powten;
+      tensRemovable >>= 1;
+      if (!tensRemovable) break;
+      powten *= powten;
+    }
+
+    significand = significand.udiv(divisor);
+
+    // Truncate the significand down to its active bit count, but
+    // don't try to drop below 32.
+    unsigned newPrecision = std::max(32U, significand.getActiveBits());
+    significand = significand.trunc(newPrecision);
+  }
+
+
+  void AdjustToPrecision(SmallVectorImpl<char> &buffer,
+                         int &exp, unsigned FormatPrecision) {
+    unsigned N = buffer.size();
+    if (N <= FormatPrecision) return;
+
+    // The most significant figures are the last ones in the buffer.
+    unsigned FirstSignificant = N - FormatPrecision;
+
+    // Round.
+    // FIXME: this probably shouldn't use 'round half up'.
+
+    // Rounding down is just a truncation, except we also want to drop
+    // trailing zeros from the new result.
+    if (buffer[FirstSignificant - 1] < '5') {
+      while (buffer[FirstSignificant] == '0')
+        FirstSignificant++;
+
+      exp += FirstSignificant;
+      buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+      return;
+    }
+
+    // Rounding up requires a decimal add-with-carry.  If we continue
+    // the carry, the newly-introduced zeros will just be truncated.
+    for (unsigned I = FirstSignificant; I != N; ++I) {
+      if (buffer[I] == '9') {
+        FirstSignificant++;
+      } else {
+        buffer[I]++;
+        break;
+      }
+    }
+
+    // If we carried through, we have exactly one digit of precision.
+    if (FirstSignificant == N) {
+      exp += FirstSignificant;
+      buffer.clear();
+      buffer.push_back('1');
+      return;
+    }
+
+    exp += FirstSignificant;
+    buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+  }
+}
+
+void APFloat::toString(SmallVectorImpl<char> &Str,
+                       unsigned FormatPrecision,
+                       unsigned FormatMaxPadding) const {
+  switch (category) {
+  case fcInfinity:
+    if (isNegative())
+      return append(Str, "-Inf");
+    else
+      return append(Str, "+Inf");
+
+  case fcNaN: return append(Str, "NaN");
+
+  case fcZero:
+    if (isNegative())
+      Str.push_back('-');
+
+    if (!FormatMaxPadding)
+      append(Str, "0.0E+0");
+    else
+      Str.push_back('0');
+    return;
+
+  case fcNormal:
+    break;
+  }
+
+  if (isNegative())
+    Str.push_back('-');
+
+  // Decompose the number into an APInt and an exponent.
+  int exp = exponent - ((int) semantics->precision - 1);
+  APInt significand(semantics->precision,
+                    partCountForBits(semantics->precision),
+                    significandParts());
+
+  // Set FormatPrecision if zero.  We want to do this before we
+  // truncate trailing zeros, as those are part of the precision.
+  if (!FormatPrecision) {
+    // It's an interesting question whether to use the nominal
+    // precision or the active precision here for denormals.
+
+    // FormatPrecision = ceil(significandBits / lg_2(10))
+    FormatPrecision = (semantics->precision * 59 + 195) / 196;
+  }
+
+  // Ignore trailing binary zeros.
+  int trailingZeros = significand.countTrailingZeros();
+  exp += trailingZeros;
+  significand = significand.lshr(trailingZeros);
+
+  // Change the exponent from 2^e to 10^e.
+  if (exp == 0) {
+    // Nothing to do.
+  } else if (exp > 0) {
+    // Just shift left.
+    significand = significand.zext(semantics->precision + exp);
+    significand <<= exp;
+    exp = 0;
+  } else { /* exp < 0 */
+    int texp = -exp;
+
+    // We transform this using the identity:
+    //   (N)(2^-e) == (N)(5^e)(10^-e)
+    // This means we have to multiply N (the significand) by 5^e.
+    // To avoid overflow, we have to operate on numbers large
+    // enough to store N * 5^e:
+    //   log2(N * 5^e) == log2(N) + e * log2(5)
+    //                 <= semantics->precision + e * 137 / 59
+    //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
+
+    unsigned precision = semantics->precision + 137 * texp / 59;
+
+    // Multiply significand by 5^e.
+    //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
+    significand = significand.zext(precision);
+    APInt five_to_the_i(precision, 5);
+    while (true) {
+      if (texp & 1) significand *= five_to_the_i;
+
+      texp >>= 1;
+      if (!texp) break;
+      five_to_the_i *= five_to_the_i;
+    }
+  }
+
+  AdjustToPrecision(significand, exp, FormatPrecision);
+
+  llvm::SmallVector<char, 256> buffer;
+
+  // Fill the buffer.
+  unsigned precision = significand.getBitWidth();
+  APInt ten(precision, 10);
+  APInt digit(precision, 0);
+
+  bool inTrail = true;
+  while (significand != 0) {
+    // digit <- significand % 10
+    // significand <- significand / 10
+    APInt::udivrem(significand, ten, significand, digit);
+
+    unsigned d = digit.getZExtValue();
+
+    // Drop trailing zeros.
+    if (inTrail && !d) exp++;
+    else {
+      buffer.push_back((char) ('0' + d));
+      inTrail = false;
+    }
+  }
+
+  assert(!buffer.empty() && "no characters in buffer!");
+
+  // Drop down to FormatPrecision.
+  // TODO: don't do more precise calculations above than are required.
+  AdjustToPrecision(buffer, exp, FormatPrecision);
+
+  unsigned NDigits = buffer.size();
+
+  // Check whether we should use scientific notation.
+  bool FormatScientific;
+  if (!FormatMaxPadding)
+    FormatScientific = true;
+  else {
+    if (exp >= 0) {
+      // 765e3 --> 765000
+      //              ^^^
+      // But we shouldn't make the number look more precise than it is.
+      FormatScientific = ((unsigned) exp > FormatMaxPadding ||
+                          NDigits + (unsigned) exp > FormatPrecision);
+    } else {
+      // Power of the most significant digit.
+      int MSD = exp + (int) (NDigits - 1);
+      if (MSD >= 0) {
+        // 765e-2 == 7.65
+        FormatScientific = false;
+      } else {
+        // 765e-5 == 0.00765
+        //           ^ ^^
+        FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
+      }
+    }
+  }
+
+  // Scientific formatting is pretty straightforward.
+  if (FormatScientific) {
+    exp += (NDigits - 1);
+
+    Str.push_back(buffer[NDigits-1]);
+    Str.push_back('.');
+    if (NDigits == 1)
+      Str.push_back('0');
+    else
+      for (unsigned I = 1; I != NDigits; ++I)
+        Str.push_back(buffer[NDigits-1-I]);
+    Str.push_back('E');
+
+    Str.push_back(exp >= 0 ? '+' : '-');
+    if (exp < 0) exp = -exp;
+    SmallVector<char, 6> expbuf;
+    do {
+      expbuf.push_back((char) ('0' + (exp % 10)));
+      exp /= 10;
+    } while (exp);
+    for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
+      Str.push_back(expbuf[E-1-I]);
+    return;
+  }
+
+  // Non-scientific, positive exponents.
+  if (exp >= 0) {
+    for (unsigned I = 0; I != NDigits; ++I)
+      Str.push_back(buffer[NDigits-1-I]);
+    for (unsigned I = 0; I != (unsigned) exp; ++I)
+      Str.push_back('0');
+    return;
+  }
+
+  // Non-scientific, negative exponents.
+
+  // The number of digits to the left of the decimal point.
+  int NWholeDigits = exp + (int) NDigits;
+
+  unsigned I = 0;
+  if (NWholeDigits > 0) {
+    for (; I != (unsigned) NWholeDigits; ++I)
+      Str.push_back(buffer[NDigits-I-1]);
+    Str.push_back('.');
+  } else {
+    unsigned NZeros = 1 + (unsigned) -NWholeDigits;
+
+    Str.push_back('0');
+    Str.push_back('.');
+    for (unsigned Z = 1; Z != NZeros; ++Z)
+      Str.push_back('0');
+  }
+
+  for (; I != NDigits; ++I)
+    Str.push_back(buffer[NDigits-I-1]);
+}
+
+bool APFloat::getExactInverse(APFloat *inv) const {
+  // We can only guarantee the existence of an exact inverse for IEEE floats.
+  if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
+      semantics != &IEEEdouble && semantics != &IEEEquad)
+    return false;
+
+  // Special floats and denormals have no exact inverse.
+  if (category != fcNormal)
+    return false;
+
+  // Check that the number is a power of two by making sure that only the
+  // integer bit is set in the significand.
+  if (significandLSB() != semantics->precision - 1)
+    return false;
+
+  // Get the inverse.
+  APFloat reciprocal(*semantics, 1ULL);
+  if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
+    return false;
+
+  // Avoid multiplication with a denormal, it is not safe on all platforms and
+  // may be slower than a normal division.
+  if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
+    return false;
+
+  assert(reciprocal.category == fcNormal &&
+         reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
+
+  if (inv)
+    *inv = reciprocal;
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
new file mode 100644
index 000000000000..76265d445f45
--- /dev/null
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -0,0 +1,2947 @@
+//===-- APInt.cpp - Implement APInt class ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision integer
+// constant values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "apint"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cmath>
+#include <limits>
+#include <cstring>
+#include <cstdlib>
+using namespace llvm;
+
+/// A utility function for allocating memory, checking for allocation failures,
+/// and ensuring the contents are zeroed.
+inline static uint64_t* getClearedMemory(unsigned numWords) {
+  uint64_t * result = new uint64_t[numWords];
+  assert(result && "APInt memory allocation fails!");
+  memset(result, 0, numWords * sizeof(uint64_t));
+  return result;
+}
+
+/// A utility function for allocating memory and checking for allocation
+/// failure.  The content is not zeroed.
+inline static uint64_t* getMemory(unsigned numWords) {
+  uint64_t * result = new uint64_t[numWords];
+  assert(result && "APInt memory allocation fails!");
+  return result;
+}
+
+/// A utility function that converts a character to a digit.
+inline static unsigned getDigit(char cdigit, uint8_t radix) {
+  unsigned r;
+
+  if (radix == 16) {
+    r = cdigit - '0';
+    if (r <= 9)
+      return r;
+
+    r = cdigit - 'A';
+    if (r <= 5)
+      return r + 10;
+
+    r = cdigit - 'a';
+    if (r <= 5)
+      return r + 10;
+  }
+
+  r = cdigit - '0';
+  if (r < radix)
+    return r;
+
+  return -1U;
+}
+
+
+void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
+  pVal = getClearedMemory(getNumWords());
+  pVal[0] = val;
+  if (isSigned && int64_t(val) < 0)
+    for (unsigned i = 1; i < getNumWords(); ++i)
+      pVal[i] = -1ULL;
+}
+
+void APInt::initSlowCase(const APInt& that) {
+  pVal = getMemory(getNumWords());
+  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
+}
+
+
+APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
+  : BitWidth(numBits), VAL(0) {
+  assert(BitWidth && "Bitwidth too small");
+  assert(bigVal && "Null pointer detected!");
+  if (isSingleWord())
+    VAL = bigVal[0];
+  else {
+    // Get memory, cleared to 0
+    pVal = getClearedMemory(getNumWords());
+    // Calculate the number of words to copy
+    unsigned words = std::min<unsigned>(numWords, getNumWords());
+    // Copy the words from bigVal to pVal
+    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
+  }
+  // Make sure unused high bits are cleared
+  clearUnusedBits();
+}
+
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
+  : BitWidth(numbits), VAL(0) {
+  assert(BitWidth && "Bitwidth too small");
+  fromString(numbits, Str, radix);
+}
+
+APInt& APInt::AssignSlowCase(const APInt& RHS) {
+  // Don't do anything for X = X
+  if (this == &RHS)
+    return *this;
+
+  if (BitWidth == RHS.getBitWidth()) {
+    // assume same bit-width single-word case is already handled
+    assert(!isSingleWord());
+    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
+    return *this;
+  }
+
+  if (isSingleWord()) {
+    // assume case where both are single words is already handled
+    assert(!RHS.isSingleWord());
+    VAL = 0;
+    pVal = getMemory(RHS.getNumWords());
+    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+  } else if (getNumWords() == RHS.getNumWords())
+    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+  else if (RHS.isSingleWord()) {
+    delete [] pVal;
+    VAL = RHS.VAL;
+  } else {
+    delete [] pVal;
+    pVal = getMemory(RHS.getNumWords());
+    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+  }
+  BitWidth = RHS.BitWidth;
+  return clearUnusedBits();
+}
+
+APInt& APInt::operator=(uint64_t RHS) {
+  if (isSingleWord())
+    VAL = RHS;
+  else {
+    pVal[0] = RHS;
+    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+  }
+  return clearUnusedBits();
+}
+
+/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+void APInt::Profile(FoldingSetNodeID& ID) const {
+  ID.AddInteger(BitWidth);
+
+  if (isSingleWord()) {
+    ID.AddInteger(VAL);
+    return;
+  }
+
+  unsigned NumWords = getNumWords();
+  for (unsigned i = 0; i < NumWords; ++i)
+    ID.AddInteger(pVal[i]);
+}
+
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+  for (unsigned i = 0; i < len; ++i) {
+    dest[i] = y + x[i];
+    if (dest[i] < y)
+      y = 1; // Carry one to next digit.
+    else {
+      y = 0; // No need to carry so exit early
+      break;
+    }
+  }
+  return y;
+}
+
+/// @brief Prefix increment operator. Increments the APInt by one.
+APInt& APInt::operator++() {
+  if (isSingleWord())
+    ++VAL;
+  else
+    add_1(pVal, pVal, getNumWords(), 1);
+  return clearUnusedBits();
+}
+
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
+/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
+/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
+/// In other words, if y > x then this function returns 1, otherwise 0.
+/// @returns the borrow out of the subtraction
+static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
+  for (unsigned i = 0; i < len; ++i) {
+    uint64_t X = x[i];
+    x[i] -= y;
+    if (y > X)
+      y = 1;  // We have to "borrow 1" from next "digit"
+    else {
+      y = 0;  // No need to borrow
+      break;  // Remaining digits are unchanged so exit early
+    }
+  }
+  return bool(y);
+}
+
+/// @brief Prefix decrement operator. Decrements the APInt by one.
+APInt& APInt::operator--() {
+  if (isSingleWord())
+    --VAL;
+  else
+    sub_1(pVal, getNumWords(), 1);
+  return clearUnusedBits();
+}
+
+/// add - This function adds the integer array x to the integer array Y and
+/// places the result in dest.
+/// @returns the carry out from the addition
+/// @brief General addition of 64-bit integer arrays
+static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+                unsigned len) {
+  bool carry = false;
+  for (unsigned i = 0; i< len; ++i) {
+    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
+    dest[i] = x[i] + y[i] + carry;
+    carry = dest[i] < limit || (carry && dest[i] == limit);
+  }
+  return carry;
+}
+
+/// Adds the RHS APint to this APInt.
+/// @returns this, after addition of RHS.
+/// @brief Addition assignment operator.
+APInt& APInt::operator+=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    VAL += RHS.VAL;
+  else {
+    add(pVal, pVal, RHS.pVal, getNumWords());
+  }
+  return clearUnusedBits();
+}
+
+/// Subtracts the integer array y from the integer array x
+/// @returns returns the borrow out.
+/// @brief Generalized subtraction of 64-bit integer arrays.
+static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+                unsigned len) {
+  bool borrow = false;
+  for (unsigned i = 0; i < len; ++i) {
+    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
+    dest[i] = x_tmp - y[i];
+  }
+  return borrow;
+}
+
+/// Subtracts the RHS APInt from this APInt
+/// @returns this, after subtraction
+/// @brief Subtraction assignment operator.
+APInt& APInt::operator-=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    VAL -= RHS.VAL;
+  else
+    sub(pVal, pVal, RHS.pVal, getNumWords());
+  return clearUnusedBits();
+}
+
+/// Multiplies an integer array, x, by a uint64_t integer and places the result
+/// into dest.
+/// @returns the carry out of the multiplication.
+/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
+static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
+  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
+  uint64_t carry = 0;
+
+  // For each digit of x.
+  for (unsigned i = 0; i < len; ++i) {
+    // Split x into high and low words
+    uint64_t lx = x[i] & 0xffffffffULL;
+    uint64_t hx = x[i] >> 32;
+    // hasCarry - A flag to indicate if there is a carry to the next digit.
+    // hasCarry == 0, no carry
+    // hasCarry == 1, has carry
+    // hasCarry == 2, no carry and the calculation result == 0.
+    uint8_t hasCarry = 0;
+    dest[i] = carry + lx * ly;
+    // Determine if the add above introduces carry.
+    hasCarry = (dest[i] < carry) ? 1 : 0;
+    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
+    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
+    // (2^32 - 1) + 2^32 = 2^64.
+    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+    carry += (lx * hy) & 0xffffffffULL;
+    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
+    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
+            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
+  }
+  return carry;
+}
+
+/// Multiplies integer array x by integer array y and stores the result into
+/// the integer array dest. Note that dest's size must be >= xlen + ylen.
+/// @brief Generalized multiplicate of integer arrays.
+static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
+                unsigned ylen) {
+  dest[xlen] = mul_1(dest, x, xlen, y[0]);
+  for (unsigned i = 1; i < ylen; ++i) {
+    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
+    uint64_t carry = 0, lx = 0, hx = 0;
+    for (unsigned j = 0; j < xlen; ++j) {
+      lx = x[j] & 0xffffffffULL;
+      hx = x[j] >> 32;
+      // hasCarry - A flag to indicate if has carry.
+      // hasCarry == 0, no carry
+      // hasCarry == 1, has carry
+      // hasCarry == 2, no carry and the calculation result == 0.
+      uint8_t hasCarry = 0;
+      uint64_t resul = carry + lx * ly;
+      hasCarry = (resul < carry) ? 1 : 0;
+      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
+      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+      carry += (lx * hy) & 0xffffffffULL;
+      resul = (carry << 32) | (resul & 0xffffffffULL);
+      dest[i+j] += resul;
+      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
+              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
+              ((lx * hy) >> 32) + hx * hy;
+    }
+    dest[i+xlen] = carry;
+  }
+}
+
+APInt& APInt::operator*=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL *= RHS.VAL;
+    clearUnusedBits();
+    return *this;
+  }
+
+  // Get some bit facts about LHS and check for zero
+  unsigned lhsBits = getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+  if (!lhsWords)
+    // 0 * X ===> 0
+    return *this;
+
+  // Get some bit facts about RHS and check for zero
+  unsigned rhsBits = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+  if (!rhsWords) {
+    // X * 0 ===> 0
+    clearAllBits();
+    return *this;
+  }
+
+  // Allocate space for the result
+  unsigned destWords = rhsWords + lhsWords;
+  uint64_t *dest = getMemory(destWords);
+
+  // Perform the long multiply
+  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
+
+  // Copy result back into *this
+  clearAllBits();
+  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
+  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+
+  // delete dest array and return
+  delete[] dest;
+  return *this;
+}
+
+APInt& APInt::operator&=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL &= RHS.VAL;
+    return *this;
+  }
+  unsigned numWords = getNumWords();
+  for (unsigned i = 0; i < numWords; ++i)
+    pVal[i] &= RHS.pVal[i];
+  return *this;
+}
+
+APInt& APInt::operator|=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL |= RHS.VAL;
+    return *this;
+  }
+  unsigned numWords = getNumWords();
+  for (unsigned i = 0; i < numWords; ++i)
+    pVal[i] |= RHS.pVal[i];
+  return *this;
+}
+
+APInt& APInt::operator^=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL ^= RHS.VAL;
+    this->clearUnusedBits();
+    return *this;
+  }
+  unsigned numWords = getNumWords();
+  for (unsigned i = 0; i < numWords; ++i)
+    pVal[i] ^= RHS.pVal[i];
+  return clearUnusedBits();
+}
+
+APInt APInt::AndSlowCase(const APInt& RHS) const {
+  unsigned numWords = getNumWords();
+  uint64_t* val = getMemory(numWords);
+  for (unsigned i = 0; i < numWords; ++i)
+    val[i] = pVal[i] & RHS.pVal[i];
+  return APInt(val, getBitWidth());
+}
+
+APInt APInt::OrSlowCase(const APInt& RHS) const {
+  unsigned numWords = getNumWords();
+  uint64_t *val = getMemory(numWords);
+  for (unsigned i = 0; i < numWords; ++i)
+    val[i] = pVal[i] | RHS.pVal[i];
+  return APInt(val, getBitWidth());
+}
+
+APInt APInt::XorSlowCase(const APInt& RHS) const {
+  unsigned numWords = getNumWords();
+  uint64_t *val = getMemory(numWords);
+  for (unsigned i = 0; i < numWords; ++i)
+    val[i] = pVal[i] ^ RHS.pVal[i];
+
+  // 0^0==1 so clear the high bits in case they got set.
+  return APInt(val, getBitWidth()).clearUnusedBits();
+}
+
+bool APInt::operator !() const {
+  if (isSingleWord())
+    return !VAL;
+
+  for (unsigned i = 0; i < getNumWords(); ++i)
+    if (pVal[i])
+      return false;
+  return true;
+}
+
+APInt APInt::operator*(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    return APInt(BitWidth, VAL * RHS.VAL);
+  APInt Result(*this);
+  Result *= RHS;
+  return Result.clearUnusedBits();
+}
+
+APInt APInt::operator+(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    return APInt(BitWidth, VAL + RHS.VAL);
+  APInt Result(BitWidth, 0);
+  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+  return Result.clearUnusedBits();
+}
+
+APInt APInt::operator-(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    return APInt(BitWidth, VAL - RHS.VAL);
+  APInt Result(BitWidth, 0);
+  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+  return Result.clearUnusedBits();
+}
+
+bool APInt::operator[](unsigned bitPosition) const {
+  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
+  return (maskBit(bitPosition) &
+          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
+}
+
+bool APInt::EqualSlowCase(const APInt& RHS) const {
+  // Get some facts about the number of bits used in the two operands.
+  unsigned n1 = getActiveBits();
+  unsigned n2 = RHS.getActiveBits();
+
+  // If the number of bits isn't the same, they aren't equal
+  if (n1 != n2)
+    return false;
+
+  // If the number of bits fits in a word, we only need to compare the low word.
+  if (n1 <= APINT_BITS_PER_WORD)
+    return pVal[0] == RHS.pVal[0];
+
+  // Otherwise, compare everything
+  for (int i = whichWord(n1 - 1); i >= 0; --i)
+    if (pVal[i] != RHS.pVal[i])
+      return false;
+  return true;
+}
+
+bool APInt::EqualSlowCase(uint64_t Val) const {
+  unsigned n = getActiveBits();
+  if (n <= APINT_BITS_PER_WORD)
+    return pVal[0] == Val;
+  else
+    return false;
+}
+
+bool APInt::ult(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+  if (isSingleWord())
+    return VAL < RHS.VAL;
+
+  // Get active bit length of both operands
+  unsigned n1 = getActiveBits();
+  unsigned n2 = RHS.getActiveBits();
+
+  // If magnitude of LHS is less than RHS, return true.
+  if (n1 < n2)
+    return true;
+
+  // If magnitude of RHS is greather than LHS, return false.
+  if (n2 < n1)
+    return false;
+
+  // If they bot fit in a word, just compare the low order word
+  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+    return pVal[0] < RHS.pVal[0];
+
+  // Otherwise, compare all words
+  unsigned topWord = whichWord(std::max(n1,n2)-1);
+  for (int i = topWord; i >= 0; --i) {
+    if (pVal[i] > RHS.pVal[i])
+      return false;
+    if (pVal[i] < RHS.pVal[i])
+      return true;
+  }
+  return false;
+}
+
+bool APInt::slt(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+  if (isSingleWord()) {
+    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
+    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
+    return lhsSext < rhsSext;
+  }
+
+  APInt lhs(*this);
+  APInt rhs(RHS);
+  bool lhsNeg = isNegative();
+  bool rhsNeg = rhs.isNegative();
+  if (lhsNeg) {
+    // Sign bit is set so perform two's complement to make it positive
+    lhs.flipAllBits();
+    lhs++;
+  }
+  if (rhsNeg) {
+    // Sign bit is set so perform two's complement to make it positive
+    rhs.flipAllBits();
+    rhs++;
+  }
+
+  // Now we have unsigned values to compare so do the comparison if necessary
+  // based on the negativeness of the values.
+  if (lhsNeg)
+    if (rhsNeg)
+      return lhs.ugt(rhs);
+    else
+      return true;
+  else if (rhsNeg)
+    return false;
+  else
+    return lhs.ult(rhs);
+}
+
+void APInt::setBit(unsigned bitPosition) {
+  if (isSingleWord())
+    VAL |= maskBit(bitPosition);
+  else
+    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
+}
+
+/// Set the given bit to 0 whose position is given as "bitPosition".
+/// @brief Set a given bit to 0.
+void APInt::clearBit(unsigned bitPosition) {
+  if (isSingleWord())
+    VAL &= ~maskBit(bitPosition);
+  else
+    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
+}
+
+/// @brief Toggle every bit to its opposite value.
+
+/// Toggle a given bit to its opposite value whose position is given
+/// as "bitPosition".
+/// @brief Toggles a given bit to its opposite value.
+void APInt::flipBit(unsigned bitPosition) {
+  assert(bitPosition < BitWidth && "Out of the bit-width range!");
+  if ((*this)[bitPosition]) clearBit(bitPosition);
+  else setBit(bitPosition);
+}
+
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
+  assert(!str.empty() && "Invalid string length");
+  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+
+  size_t slen = str.size();
+
+  // Each computation below needs to know if it's negative.
+  StringRef::iterator p = str.begin();
+  unsigned isNegative = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String is only a sign, needs a value.");
+  }
+
+  // For radixes of power-of-two values, the bits required is accurately and
+  // easily computed
+  if (radix == 2)
+    return slen + isNegative;
+  if (radix == 8)
+    return slen * 3 + isNegative;
+  if (radix == 16)
+    return slen * 4 + isNegative;
+
+  // This is grossly inefficient but accurate. We could probably do something
+  // with a computation of roughly slen*64/20 and then adjust by the value of
+  // the first few digits. But, I'm not sure how accurate that could be.
+
+  // Compute a sufficient number of bits that is always large enough but might
+  // be too large. This avoids the assertion in the constructor. This
+  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+  // bits in that case.
+  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
+
+  // Convert to the actual binary value.
+  APInt tmp(sufficient, StringRef(p, slen), radix);
+
+  // Compute how many bits are required. If the log is infinite, assume we need
+  // just bit.
+  unsigned log = tmp.logBase2();
+  if (log == (unsigned)-1) {
+    return isNegative + 1;
+  } else {
+    return isNegative + log + 1;
+  }
+}
+
+// From http://www.burtleburtle.net, byBob Jenkins.
+// When targeting x86, both GCC and LLVM seem to recognize this as a
+// rotate instruction.
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define mix(a,b,c) \
+  { \
+    a -= c;  a ^= rot(c, 4);  c += b; \
+    b -= a;  b ^= rot(a, 6);  a += c; \
+    c -= b;  c ^= rot(b, 8);  b += a; \
+    a -= c;  a ^= rot(c,16);  c += b; \
+    b -= a;  b ^= rot(a,19);  a += c; \
+    c -= b;  c ^= rot(b, 4);  b += a; \
+  }
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define final(a,b,c) \
+  { \
+    c ^= b; c -= rot(b,14); \
+    a ^= c; a -= rot(c,11); \
+    b ^= a; b -= rot(a,25); \
+    c ^= b; c -= rot(b,16); \
+    a ^= c; a -= rot(c,4);  \
+    b ^= a; b -= rot(a,14); \
+    c ^= b; c -= rot(b,24); \
+  }
+
+// hashword() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins.  k is a pointer to an array of uint32_t values; length is
+// the length of the key, in 32-bit chunks.  This version only handles
+// keys that are a multiple of 32 bits in size.
+static inline uint32_t hashword(const uint64_t *k64, size_t length)
+{
+  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
+  uint32_t a,b,c;
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
+
+  /*------------------------------------------------- handle most of the key */
+  while (length > 3) {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 3;
+    k += 3;
+  }
+
+  /*------------------------------------------- handle the last 3 uint32_t's */
+  switch (length) {                  /* all the case statements fall through */
+  case 3 : c+=k[2];
+  case 2 : b+=k[1];
+  case 1 : a+=k[0];
+    final(a,b,c);
+    case 0:     /* case 0: nothing left to add */
+      break;
+    }
+  /*------------------------------------------------------ report the result */
+  return c;
+}
+
+// hashword8() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
+// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
+// function into about 35 instructions when inlined.
+static inline uint32_t hashword8(const uint64_t k64)
+{
+  uint32_t a,b,c;
+  a = b = c = 0xdeadbeef + 4;
+  b += k64 >> 32;
+  a += k64 & 0xffffffff;
+  final(a,b,c);
+  return c;
+}
+#undef final
+#undef mix
+#undef rot
+
+uint64_t APInt::getHashValue() const {
+  uint64_t hash;
+  if (isSingleWord())
+    hash = hashword8(VAL);
+  else
+    hash = hashword(pVal, getNumWords()*2);
+  return hash;
+}
+
+/// HiBits - This function returns the high "numBits" bits of this APInt.
+APInt APInt::getHiBits(unsigned numBits) const {
+  return APIntOps::lshr(*this, BitWidth - numBits);
+}
+
+/// LoBits - This function returns the low "numBits" bits of this APInt.
+APInt APInt::getLoBits(unsigned numBits) const {
+  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
+                        BitWidth - numBits);
+}
+
+unsigned APInt::countLeadingZerosSlowCase() const {
+  // Treat the most significand word differently because it might have
+  // meaningless bits set beyond the precision.
+  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
+  integerPart MSWMask;
+  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
+  else {
+    MSWMask = ~integerPart(0);
+    BitsInMSW = APINT_BITS_PER_WORD;
+  }
+
+  unsigned i = getNumWords();
+  integerPart MSW = pVal[i-1] & MSWMask;
+  if (MSW)
+    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
+
+  unsigned Count = BitsInMSW;
+  for (--i; i > 0u; --i) {
+    if (pVal[i-1] == 0)
+      Count += APINT_BITS_PER_WORD;
+    else {
+      Count += CountLeadingZeros_64(pVal[i-1]);
+      break;
+    }
+  }
+  return Count;
+}
+
+static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
+  unsigned Count = 0;
+  if (skip)
+    V <<= skip;
+  while (V && (V & (1ULL << 63))) {
+    Count++;
+    V <<= 1;
+  }
+  return Count;
+}
+
+unsigned APInt::countLeadingOnes() const {
+  if (isSingleWord())
+    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+
+  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
+  unsigned shift;
+  if (!highWordBits) {
+    highWordBits = APINT_BITS_PER_WORD;
+    shift = 0;
+  } else {
+    shift = APINT_BITS_PER_WORD - highWordBits;
+  }
+  int i = getNumWords() - 1;
+  unsigned Count = countLeadingOnes_64(pVal[i], shift);
+  if (Count == highWordBits) {
+    for (i--; i >= 0; --i) {
+      if (pVal[i] == -1ULL)
+        Count += APINT_BITS_PER_WORD;
+      else {
+        Count += countLeadingOnes_64(pVal[i], 0);
+        break;
+      }
+    }
+  }
+  return Count;
+}
+
+unsigned APInt::countTrailingZeros() const {
+  if (isSingleWord())
+    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
+  unsigned Count = 0;
+  unsigned i = 0;
+  for (; i < getNumWords() && pVal[i] == 0; ++i)
+    Count += APINT_BITS_PER_WORD;
+  if (i < getNumWords())
+    Count += CountTrailingZeros_64(pVal[i]);
+  return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countTrailingOnesSlowCase() const {
+  unsigned Count = 0;
+  unsigned i = 0;
+  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
+    Count += APINT_BITS_PER_WORD;
+  if (i < getNumWords())
+    Count += CountTrailingOnes_64(pVal[i]);
+  return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countPopulationSlowCase() const {
+  unsigned Count = 0;
+  for (unsigned i = 0; i < getNumWords(); ++i)
+    Count += CountPopulation_64(pVal[i]);
+  return Count;
+}
+
+APInt APInt::byteSwap() const {
+  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
+  if (BitWidth == 16)
+    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
+  else if (BitWidth == 32)
+    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
+  else if (BitWidth == 48) {
+    unsigned Tmp1 = unsigned(VAL >> 16);
+    Tmp1 = ByteSwap_32(Tmp1);
+    uint16_t Tmp2 = uint16_t(VAL);
+    Tmp2 = ByteSwap_16(Tmp2);
+    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
+  } else if (BitWidth == 64)
+    return APInt(BitWidth, ByteSwap_64(VAL));
+  else {
+    APInt Result(BitWidth, 0);
+    char *pByte = (char*)Result.pVal;
+    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
+      char Tmp = pByte[i];
+      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
+      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
+    }
+    return Result;
+  }
+}
+
+APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
+                                            const APInt& API2) {
+  APInt A = API1, B = API2;
+  while (!!B) {
+    APInt T = B;
+    B = APIntOps::urem(A, B);
+    A = T;
+  }
+  return A;
+}
+
+APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
+  union {
+    double D;
+    uint64_t I;
+  } T;
+  T.D = Double;
+
+  // Get the sign bit from the highest order bit
+  bool isNeg = T.I >> 63;
+
+  // Get the 11-bit exponent and adjust for the 1023 bit bias
+  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
+
+  // If the exponent is negative, the value is < 0 so just return 0.
+  if (exp < 0)
+    return APInt(width, 0u);
+
+  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
+  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
+
+  // If the exponent doesn't shift all bits out of the mantissa
+  if (exp < 52)
+    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
+                    APInt(width, mantissa >> (52 - exp));
+
+  // If the client didn't provide enough bits for us to shift the mantissa into
+  // then the result is undefined, just return 0
+  if (width <= exp - 52)
+    return APInt(width, 0);
+
+  // Otherwise, we have to shift the mantissa bits up to the right location
+  APInt Tmp(width, mantissa);
+  Tmp = Tmp.shl((unsigned)exp - 52);
+  return isNeg ? -Tmp : Tmp;
+}
+
+/// RoundToDouble - This function converts this APInt to a double.
+/// The layout for double is as following (IEEE Standard 754):
+///  --------------------------------------
+/// |  Sign    Exponent    Fraction    Bias |
+/// |-------------------------------------- |
+/// |  1[63]   11[62-52]   52[51-00]   1023 |
+///  --------------------------------------
+double APInt::roundToDouble(bool isSigned) const {
+
+  // Handle the simple case where the value is contained in one uint64_t.
+  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
+  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+    if (isSigned) {
+      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
+      return double(sext);
+    } else
+      return double(getWord(0));
+  }
+
+  // Determine if the value is negative.
+  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
+
+  // Construct the absolute value if we're negative.
+  APInt Tmp(isNeg ? -(*this) : (*this));
+
+  // Figure out how many bits we're using.
+  unsigned n = Tmp.getActiveBits();
+
+  // The exponent (without bias normalization) is just the number of bits
+  // we are using. Note that the sign bit is gone since we constructed the
+  // absolute value.
+  uint64_t exp = n;
+
+  // Return infinity for exponent overflow
+  if (exp > 1023) {
+    if (!isSigned || !isNeg)
+      return std::numeric_limits<double>::infinity();
+    else
+      return -std::numeric_limits<double>::infinity();
+  }
+  exp += 1023; // Increment for 1023 bias
+
+  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+  // extract the high 52 bits from the correct words in pVal.
+  uint64_t mantissa;
+  unsigned hiWord = whichWord(n-1);
+  if (hiWord == 0) {
+    mantissa = Tmp.pVal[0];
+    if (n > 52)
+      mantissa >>= n - 52; // shift down, we want the top 52 bits.
+  } else {
+    assert(hiWord > 0 && "huh?");
+    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
+    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+    mantissa = hibits | lobits;
+  }
+
+  // The leading bit of mantissa is implicit, so get rid of it.
+  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+  union {
+    double D;
+    uint64_t I;
+  } T;
+  T.I = sign | (exp << 52) | mantissa;
+  return T.D;
+}
+
+// Truncate to new width.
+APInt APInt::trunc(unsigned width) const {
+  assert(width < BitWidth && "Invalid APInt Truncate request");
+  assert(width && "Can't truncate to 0 bits");
+
+  if (width <= APINT_BITS_PER_WORD)
+    return APInt(width, getRawData()[0]);
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy full words.
+  unsigned i;
+  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
+    Result.pVal[i] = pVal[i];
+
+  // Truncate and copy any partial word.
+  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    Result.pVal[i] = pVal[i] << bits >> bits;
+
+  return Result;
+}
+
+// Sign extend to a new width.
+APInt APInt::sext(unsigned width) const {
+  assert(width > BitWidth && "Invalid APInt SignExtend request");
+
+  if (width <= APINT_BITS_PER_WORD) {
+    uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
+    val = (int64_t)val >> (width - BitWidth);
+    return APInt(width, val >> (APINT_BITS_PER_WORD - width));
+  }
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy full words.
+  unsigned i;
+  uint64_t word = 0;
+  for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
+    word = getRawData()[i];
+    Result.pVal[i] = word;
+  }
+
+  // Read and sign-extend any partial word.
+  unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    word = (int64_t)getRawData()[i] << bits >> bits;
+  else
+    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+
+  // Write remaining full words.
+  for (; i != width / APINT_BITS_PER_WORD; i++) {
+    Result.pVal[i] = word;
+    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+  }
+
+  // Write any partial word.
+  bits = (0 - width) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    Result.pVal[i] = word << bits >> bits;
+
+  return Result;
+}
+
+//  Zero extend to a new width.
+APInt APInt::zext(unsigned width) const {
+  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
+
+  if (width <= APINT_BITS_PER_WORD)
+    return APInt(width, VAL);
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy words.
+  unsigned i;
+  for (i = 0; i != getNumWords(); i++)
+    Result.pVal[i] = getRawData()[i];
+
+  // Zero remaining words.
+  memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
+
+  return Result;
+}
+
+APInt APInt::zextOrTrunc(unsigned width) const {
+  if (BitWidth < width)
+    return zext(width);
+  if (BitWidth > width)
+    return trunc(width);
+  return *this;
+}
+
+APInt APInt::sextOrTrunc(unsigned width) const {
+  if (BitWidth < width)
+    return sext(width);
+  if (BitWidth > width)
+    return trunc(width);
+  return *this;
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(const APInt &shiftAmt) const {
+  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(unsigned shiftAmt) const {
+  assert(shiftAmt <= BitWidth && "Invalid shift amount");
+  // Handle a degenerate case
+  if (shiftAmt == 0)
+    return *this;
+
+  // Handle single word shifts with built-in ashr
+  if (isSingleWord()) {
+    if (shiftAmt == BitWidth)
+      return APInt(BitWidth, 0); // undefined
+    else {
+      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
+      return APInt(BitWidth,
+        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
+    }
+  }
+
+  // If all the bits were shifted out, the result is, technically, undefined.
+  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
+  // issues in the algorithm below.
+  if (shiftAmt == BitWidth) {
+    if (isNegative())
+      return APInt(BitWidth, -1ULL, true);
+    else
+      return APInt(BitWidth, 0);
+  }
+
+  // Create some space for the result.
+  uint64_t * val = new uint64_t[getNumWords()];
+
+  // Compute some values needed by the following shift algorithms
+  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
+  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
+  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
+  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
+  if (bitsInWord == 0)
+    bitsInWord = APINT_BITS_PER_WORD;
+
+  // If we are shifting whole words, just move whole words
+  if (wordShift == 0) {
+    // Move the words containing significant bits
+    for (unsigned i = 0; i <= breakWord; ++i)
+      val[i] = pVal[i+offset]; // move whole word
+
+    // Adjust the top significant word for sign bit fill, if negative
+    if (isNegative())
+      if (bitsInWord < APINT_BITS_PER_WORD)
+        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
+  } else {
+    // Shift the low order words
+    for (unsigned i = 0; i < breakWord; ++i) {
+      // This combines the shifted corresponding word with the low bits from
+      // the next word (shifted into this word's high bits).
+      val[i] = (pVal[i+offset] >> wordShift) |
+               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+    }
+
+    // Shift the break word. In this case there are no bits from the next word
+    // to include in this word.
+    val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+    // Deal with sign extenstion in the break word, and possibly the word before
+    // it.
+    if (isNegative()) {
+      if (wordShift > bitsInWord) {
+        if (breakWord > 0)
+          val[breakWord-1] |=
+            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
+        val[breakWord] |= ~0ULL;
+      } else
+        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+    }
+  }
+
+  // Remaining words are 0 or -1, just assign them.
+  uint64_t fillValue = (isNegative() ? -1ULL : 0);
+  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+    val[i] = fillValue;
+  return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(const APInt &shiftAmt) const {
+  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(unsigned shiftAmt) const {
+  if (isSingleWord()) {
+    if (shiftAmt == BitWidth)
+      return APInt(BitWidth, 0);
+    else
+      return APInt(BitWidth, this->VAL >> shiftAmt);
+  }
+
+  // If all the bits were shifted out, the result is 0. This avoids issues
+  // with shifting by the size of the integer type, which produces undefined
+  // results. We define these "undefined results" to always be 0.
+  if (shiftAmt == BitWidth)
+    return APInt(BitWidth, 0);
+
+  // If none of the bits are shifted out, the result is *this. This avoids
+  // issues with shifting by the size of the integer type, which produces
+  // undefined results in the code below. This is also an optimization.
+  if (shiftAmt == 0)
+    return *this;
+
+  // Create some space for the result.
+  uint64_t * val = new uint64_t[getNumWords()];
+
+  // If we are shifting less than a word, compute the shift with a simple carry
+  if (shiftAmt < APINT_BITS_PER_WORD) {
+    uint64_t carry = 0;
+    for (int i = getNumWords()-1; i >= 0; --i) {
+      val[i] = (pVal[i] >> shiftAmt) | carry;
+      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
+    }
+    return APInt(val, BitWidth).clearUnusedBits();
+  }
+
+  // Compute some values needed by the remaining shift algorithms
+  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+  // If we are shifting whole words, just move whole words
+  if (wordShift == 0) {
+    for (unsigned i = 0; i < getNumWords() - offset; ++i)
+      val[i] = pVal[i+offset];
+    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
+      val[i] = 0;
+    return APInt(val,BitWidth).clearUnusedBits();
+  }
+
+  // Shift the low order words
+  unsigned breakWord = getNumWords() - offset -1;
+  for (unsigned i = 0; i < breakWord; ++i)
+    val[i] = (pVal[i+offset] >> wordShift) |
+             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+  // Shift the break word.
+  val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+  // Remaining words are 0
+  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+    val[i] = 0;
+  return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Left-shift this APInt by shiftAmt.
+/// @brief Left-shift function.
+APInt APInt::shl(const APInt &shiftAmt) const {
+  // It's undefined behavior in C to shift by BitWidth or greater.
+  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::shlSlowCase(unsigned shiftAmt) const {
+  // If all the bits were shifted out, the result is 0. This avoids issues
+  // with shifting by the size of the integer type, which produces undefined
+  // results. We define these "undefined results" to always be 0.
+  if (shiftAmt == BitWidth)
+    return APInt(BitWidth, 0);
+
+  // If none of the bits are shifted out, the result is *this. This avoids a
+  // lshr by the words size in the loop below which can produce incorrect
+  // results. It also avoids the expensive computation below for a common case.
+  if (shiftAmt == 0)
+    return *this;
+
+  // Create some space for the result.
+  uint64_t * val = new uint64_t[getNumWords()];
+
+  // If we are shifting less than a word, do it the easy way
+  if (shiftAmt < APINT_BITS_PER_WORD) {
+    uint64_t carry = 0;
+    for (unsigned i = 0; i < getNumWords(); i++) {
+      val[i] = pVal[i] << shiftAmt | carry;
+      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
+    }
+    return APInt(val, BitWidth).clearUnusedBits();
+  }
+
+  // Compute some values needed by the remaining shift algorithms
+  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+  // If we are shifting whole words, just move whole words
+  if (wordShift == 0) {
+    for (unsigned i = 0; i < offset; i++)
+      val[i] = 0;
+    for (unsigned i = offset; i < getNumWords(); i++)
+      val[i] = pVal[i-offset];
+    return APInt(val,BitWidth).clearUnusedBits();
+  }
+
+  // Copy whole words from this to Result.
+  unsigned i = getNumWords() - 1;
+  for (; i > offset; --i)
+    val[i] = pVal[i-offset] << wordShift |
+             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
+  val[offset] = pVal[0] << wordShift;
+  for (i = 0; i < offset; ++i)
+    val[i] = 0;
+  return APInt(val, BitWidth).clearUnusedBits();
+}
+
+APInt APInt::rotl(const APInt &rotateAmt) const {
+  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotl(unsigned rotateAmt) const {
+  if (rotateAmt == 0)
+    return *this;
+  // Don't get too fancy, just use existing shift/or facilities
+  APInt hi(*this);
+  APInt lo(*this);
+  hi.shl(rotateAmt);
+  lo.lshr(BitWidth - rotateAmt);
+  return hi | lo;
+}
+
+APInt APInt::rotr(const APInt &rotateAmt) const {
+  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotr(unsigned rotateAmt) const {
+  if (rotateAmt == 0)
+    return *this;
+  // Don't get too fancy, just use existing shift/or facilities
+  APInt hi(*this);
+  APInt lo(*this);
+  lo.lshr(rotateAmt);
+  hi.shl(BitWidth - rotateAmt);
+  return hi | lo;
+}
+
+// Square Root - this method computes and returns the square root of "this".
+// Three mechanisms are used for computation. For small values (<= 5 bits),
+// a table lookup is done. This gets some performance for common cases. For
+// values using less than 52 bits, the value is converted to double and then
+// the libc sqrt function is called. The result is rounded and then converted
+// back to a uint64_t which is then used to construct the result. Finally,
+// the Babylonian method for computing square roots is used.
+APInt APInt::sqrt() const {
+
+  // Determine the magnitude of the value.
+  unsigned magnitude = getActiveBits();
+
+  // Use a fast table for some small values. This also gets rid of some
+  // rounding errors in libc sqrt for small values.
+  if (magnitude <= 5) {
+    static const uint8_t results[32] = {
+      /*     0 */ 0,
+      /*  1- 2 */ 1, 1,
+      /*  3- 6 */ 2, 2, 2, 2,
+      /*  7-12 */ 3, 3, 3, 3, 3, 3,
+      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+      /*    31 */ 6
+    };
+    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
+  }
+
+  // If the magnitude of the value fits in less than 52 bits (the precision of
+  // an IEEE double precision floating point value), then we can use the
+  // libc sqrt function which will probably use a hardware sqrt computation.
+  // This should be faster than the algorithm below.
+  if (magnitude < 52) {
+#if HAVE_ROUND
+    return APInt(BitWidth,
+                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+#else
+    return APInt(BitWidth,
+                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
+#endif
+  }
+
+  // Okay, all the short cuts are exhausted. We must compute it. The following
+  // is a classical Babylonian method for computing the square root. This code
+  // was adapted to APINt from a wikipedia article on such computations.
+  // See http://www.wikipedia.org/ and go to the page named
+  // Calculate_an_integer_square_root.
+  unsigned nbits = BitWidth, i = 4;
+  APInt testy(BitWidth, 16);
+  APInt x_old(BitWidth, 1);
+  APInt x_new(BitWidth, 0);
+  APInt two(BitWidth, 2);
+
+  // Select a good starting value using binary logarithms.
+  for (;; i += 2, testy = testy.shl(2))
+    if (i >= nbits || this->ule(testy)) {
+      x_old = x_old.shl(i / 2);
+      break;
+    }
+
+  // Use the Babylonian method to arrive at the integer square root:
+  for (;;) {
+    x_new = (this->udiv(x_old) + x_old).udiv(two);
+    if (x_old.ule(x_new))
+      break;
+    x_old = x_new;
+  }
+
+  // Make sure we return the closest approximation
+  // NOTE: The rounding calculation below is correct. It will produce an
+  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
+  // determined to be a rounding issue with pari/gp as it begins to use a
+  // floating point representation after 192 bits. There are no discrepancies
+  // between this algorithm and pari/gp for bit widths < 192 bits.
+  APInt square(x_old * x_old);
+  APInt nextSquare((x_old + 1) * (x_old +1));
+  if (this->ult(square))
+    return x_old;
+  else if (this->ule(nextSquare)) {
+    APInt midpoint((nextSquare - square).udiv(two));
+    APInt offset(*this - square);
+    if (offset.ult(midpoint))
+      return x_old;
+    else
+      return x_old + 1;
+  } else
+    llvm_unreachable("Error in APInt::sqrt computation");
+  return x_old + 1;
+}
+
+/// Computes the multiplicative inverse of this APInt for a given modulo. The
+/// iterative extended Euclidean algorithm is used to solve for this value,
+/// however we simplify it to speed up calculating only the inverse, and take
+/// advantage of div+rem calculations. We also use some tricks to avoid copying
+/// (potentially large) APInts around.
+APInt APInt::multiplicativeInverse(const APInt& modulo) const {
+  assert(ult(modulo) && "This APInt must be smaller than the modulo");
+
+  // Using the properties listed at the following web page (accessed 06/21/08):
+  //   http://www.numbertheory.org/php/euclid.html
+  // (especially the properties numbered 3, 4 and 9) it can be proved that
+  // BitWidth bits suffice for all the computations in the algorithm implemented
+  // below. More precisely, this number of bits suffice if the multiplicative
+  // inverse exists, but may not suffice for the general extended Euclidean
+  // algorithm.
+
+  APInt r[2] = { modulo, *this };
+  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
+  APInt q(BitWidth, 0);
+
+  unsigned i;
+  for (i = 0; r[i^1] != 0; i ^= 1) {
+    // An overview of the math without the confusing bit-flipping:
+    // q = r[i-2] / r[i-1]
+    // r[i] = r[i-2] % r[i-1]
+    // t[i] = t[i-2] - t[i-1] * q
+    udivrem(r[i], r[i^1], q, r[i]);
+    t[i] -= t[i^1] * q;
+  }
+
+  // If this APInt and the modulo are not coprime, there is no multiplicative
+  // inverse, so return 0. We check this by looking at the next-to-last
+  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
+  // algorithm.
+  if (r[i] != 1)
+    return APInt(BitWidth, 0);
+
+  // The next-to-last t is the multiplicative inverse.  However, we are
+  // interested in a positive inverse. Calcuate a positive one from a negative
+  // one if necessary. A simple addition of the modulo suffices because
+  // abs(t[i]) is known to be less than *this/2 (see the link above).
+  return t[i].isNegative() ? t[i] + modulo : t[i];
+}
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
+/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., chapter 10.
+APInt::ms APInt::magic() const {
+  const APInt& d = *this;
+  unsigned p;
+  APInt ad, anc, delta, q1, r1, q2, r2, t;
+  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+  struct ms mag;
+
+  ad = d.abs();
+  t = signedMin + (d.lshr(d.getBitWidth() - 1));
+  anc = t - 1 - t.urem(ad);   // absolute value of nc
+  p = d.getBitWidth() - 1;    // initialize p
+  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
+  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
+  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
+  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = q1<<1;          // update q1 = 2p/abs(nc)
+    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
+    if (r1.uge(anc)) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = q2<<1;          // update q2 = 2p/abs(d)
+    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
+    if (r2.uge(ad)) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
+
+  mag.m = q2 + 1;
+  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
+  mag.s = p - d.getBitWidth();          // resulting shift
+  return mag;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+/// LeadingZeros can be used to simplify the calculation if the upper bits
+/// of the divided value are known zero.
+APInt::mu APInt::magicu(unsigned LeadingZeros) const {
+  const APInt& d = *this;
+  unsigned p;
+  APInt nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
+  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+  nc = allOnes - (-d).urem(d);
+  p = d.getBitWidth() - 1;  // initialize p
+  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
+  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
+  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
+  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1.uge(nc - r1)) {
+      q1 = q1 + q1 + 1;  // update q1
+      r1 = r1 + r1 - nc; // update r1
+    }
+    else {
+      q1 = q1+q1; // update q1
+      r1 = r1+r1; // update r1
+    }
+    if ((r2 + 1).uge(d - r2)) {
+      if (q2.uge(signedMax)) magu.a = 1;
+      q2 = q2+q2 + 1;     // update q2
+      r2 = r2+r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2.uge(signedMin)) magu.a = 1;
+      q2 = q2+q2;     // update q2
+      r2 = r2+r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < d.getBitWidth()*2 &&
+           (q1.ult(delta) || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - d.getBitWidth();  // resulting shift
+  return magu;
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
+                     unsigned m, unsigned n) {
+  assert(u && "Must provide dividend");
+  assert(v && "Must provide divisor");
+  assert(q && "Must provide quotient");
+  assert(u != v && u != q && v != q && "Must us different memory");
+  assert(n>1 && "n must be > 1");
+
+  // Knuth uses the value b as the base of the number system. In our case b
+  // is 2^31 so we just set it to -1u.
+  uint64_t b = uint64_t(1) << 32;
+
+#if 0
+  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+  DEBUG(dbgs() << "KnuthDiv: original:");
+  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+  DEBUG(dbgs() << " by");
+  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+  DEBUG(dbgs() << '\n');
+#endif
+  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+  // u and v by d. Note that we have taken Knuth's advice here to use a power
+  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+  // 2 allows us to shift instead of multiply and it is easy to determine the
+  // shift amount from the leading zeros.  We are basically normalizing the u
+  // and v so that its high bits are shifted to the top of v's range without
+  // overflow. Note that this can require an extra word in u so that u must
+  // be of length m+n+1.
+  unsigned shift = CountLeadingZeros_32(v[n-1]);
+  unsigned v_carry = 0;
+  unsigned u_carry = 0;
+  if (shift) {
+    for (unsigned i = 0; i < m+n; ++i) {
+      unsigned u_tmp = u[i] >> (32 - shift);
+      u[i] = (u[i] << shift) | u_carry;
+      u_carry = u_tmp;
+    }
+    for (unsigned i = 0; i < n; ++i) {
+      unsigned v_tmp = v[i] >> (32 - shift);
+      v[i] = (v[i] << shift) | v_carry;
+      v_carry = v_tmp;
+    }
+  }
+  u[m+n] = u_carry;
+#if 0
+  DEBUG(dbgs() << "KnuthDiv:   normal:");
+  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+  DEBUG(dbgs() << " by");
+  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+  DEBUG(dbgs() << '\n');
+#endif
+
+  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
+  int j = m;
+  do {
+    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
+    // D3. [Calculate q'.].
+    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
+    // on v[n-2] determines at high speed most of the cases in which the trial
+    // value qp is one too large, and it eliminates all cases where qp is two
+    // too large.
+    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
+    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
+    uint64_t qp = dividend / v[n-1];
+    uint64_t rp = dividend % v[n-1];
+    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+      qp--;
+      rp += v[n-1];
+      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+        qp--;
+    }
+    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+    // consists of a simple multiplication by a one-place number, combined with
+    // a subtraction.
+    bool isNeg = false;
+    for (unsigned i = 0; i < n; ++i) {
+      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
+      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
+      bool borrow = subtrahend > u_tmp;
+      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
+                   << ", subtrahend == " << subtrahend
+                   << ", borrow = " << borrow << '\n');
+
+      uint64_t result = u_tmp - subtrahend;
+      unsigned k = j + i;
+      u[k++] = (unsigned)(result & (b-1)); // subtract low word
+      u[k++] = (unsigned)(result >> 32);   // subtract high word
+      while (borrow && k <= m+n) { // deal with borrow to the left
+        borrow = u[k] == 0;
+        u[k]--;
+        k++;
+      }
+      isNeg |= borrow;
+      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
+                    u[j+i+1] << '\n');
+    }
+    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+    DEBUG(dbgs() << '\n');
+    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+    // this step is actually negative, (u[j+n]...u[j]) should be left as the
+    // true value plus b**(n+1), namely as the b's complement of
+    // the true value, and a "borrow" to the left should be remembered.
+    //
+    if (isNeg) {
+      bool carry = true;  // true because b's complement is "complement + 1"
+      for (unsigned i = 0; i <= m+n; ++i) {
+        u[i] = ~u[i] + carry; // b's complement
+        carry = carry && u[i] == 0;
+      }
+    }
+    DEBUG(dbgs() << "KnuthDiv: after complement:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+    DEBUG(dbgs() << '\n');
+
+    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+    // negative, go to step D6; otherwise go on to step D7.
+    q[j] = (unsigned)qp;
+    if (isNeg) {
+      // D6. [Add back]. The probability that this step is necessary is very
+      // small, on the order of only 2/b. Make sure that test data accounts for
+      // this possibility. Decrease q[j] by 1
+      q[j]--;
+      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+      // A carry will occur to the left of u[j+n], and it should be ignored
+      // since it cancels with the borrow that occurred in D4.
+      bool carry = false;
+      for (unsigned i = 0; i < n; i++) {
+        unsigned limit = std::min(u[j+i],v[i]);
+        u[j+i] += v[i] + carry;
+        carry = u[j+i] < limit || (carry && u[j+i] == limit);
+      }
+      u[j+n] += carry;
+    }
+    DEBUG(dbgs() << "KnuthDiv: after correction:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
+    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+
+  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
+  } while (--j >= 0);
+
+  DEBUG(dbgs() << "KnuthDiv: quotient:");
+  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
+  DEBUG(dbgs() << '\n');
+
+  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+  // remainder may be obtained by dividing u[...] by d. If r is non-null we
+  // compute the remainder (urem uses this).
+  if (r) {
+    // The value d is expressed by the "shift" value above since we avoided
+    // multiplication by d by using a shift left. So, all we have to do is
+    // shift right here. In order to mak
+    if (shift) {
+      unsigned carry = 0;
+      DEBUG(dbgs() << "KnuthDiv: remainder:");
+      for (int i = n-1; i >= 0; i--) {
+        r[i] = (u[i] >> shift) | carry;
+        carry = u[i] << (32 - shift);
+        DEBUG(dbgs() << " " << r[i]);
+      }
+    } else {
+      for (int i = n-1; i >= 0; i--) {
+        r[i] = u[i];
+        DEBUG(dbgs() << " " << r[i]);
+      }
+    }
+    DEBUG(dbgs() << '\n');
+  }
+#if 0
+  DEBUG(dbgs() << '\n');
+#endif
+}
+
+void APInt::divide(const APInt LHS, unsigned lhsWords,
+                   const APInt &RHS, unsigned rhsWords,
+                   APInt *Quotient, APInt *Remainder)
+{
+  assert(lhsWords >= rhsWords && "Fractional result");
+
+  // First, compose the values into an array of 32-bit words instead of
+  // 64-bit words. This is a necessity of both the "short division" algorithm
+  // and the Knuth "classical algorithm" which requires there to be native
+  // operations for +, -, and * on an m bit value with an m*2 bit result. We
+  // can't use 64-bit operands here because we don't have native results of
+  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
+  // work on large-endian machines.
+  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
+  unsigned n = rhsWords * 2;
+  unsigned m = (lhsWords * 2) - n;
+
+  // Allocate space for the temporary values we need either on the stack, if
+  // it will fit, or on the heap if it won't.
+  unsigned SPACE[128];
+  unsigned *U = 0;
+  unsigned *V = 0;
+  unsigned *Q = 0;
+  unsigned *R = 0;
+  if ((Remainder?4:3)*n+2*m+1 <= 128) {
+    U = &SPACE[0];
+    V = &SPACE[m+n+1];
+    Q = &SPACE[(m+n+1) + n];
+    if (Remainder)
+      R = &SPACE[(m+n+1) + n + (m+n)];
+  } else {
+    U = new unsigned[m + n + 1];
+    V = new unsigned[n];
+    Q = new unsigned[m+n];
+    if (Remainder)
+      R = new unsigned[n];
+  }
+
+  // Initialize the dividend
+  memset(U, 0, (m+n+1)*sizeof(unsigned));
+  for (unsigned i = 0; i < lhsWords; ++i) {
+    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+    U[i * 2] = (unsigned)(tmp & mask);
+    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+  }
+  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+  // Initialize the divisor
+  memset(V, 0, (n)*sizeof(unsigned));
+  for (unsigned i = 0; i < rhsWords; ++i) {
+    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+    V[i * 2] = (unsigned)(tmp & mask);
+    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+  }
+
+  // initialize the quotient and remainder
+  memset(Q, 0, (m+n) * sizeof(unsigned));
+  if (Remainder)
+    memset(R, 0, n * sizeof(unsigned));
+
+  // Now, adjust m and n for the Knuth division. n is the number of words in
+  // the divisor. m is the number of words by which the dividend exceeds the
+  // divisor (i.e. m+n is the length of the dividend). These sizes must not
+  // contain any zero words or the Knuth algorithm fails.
+  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
+    n--;
+    m++;
+  }
+  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
+    m--;
+
+  // If we're left with only a single word for the divisor, Knuth doesn't work
+  // so we implement the short division algorithm here. This is much simpler
+  // and faster because we are certain that we can divide a 64-bit quantity
+  // by a 32-bit quantity at hardware speed and short division is simply a
+  // series of such operations. This is just like doing short division but we
+  // are using base 2^32 instead of base 10.
+  assert(n != 0 && "Divide by zero?");
+  if (n == 1) {
+    unsigned divisor = V[0];
+    unsigned remainder = 0;
+    for (int i = m+n-1; i >= 0; i--) {
+      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
+      if (partial_dividend == 0) {
+        Q[i] = 0;
+        remainder = 0;
+      } else if (partial_dividend < divisor) {
+        Q[i] = 0;
+        remainder = (unsigned)partial_dividend;
+      } else if (partial_dividend == divisor) {
+        Q[i] = 1;
+        remainder = 0;
+      } else {
+        Q[i] = (unsigned)(partial_dividend / divisor);
+        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
+      }
+    }
+    if (R)
+      R[0] = remainder;
+  } else {
+    // Now we're ready to invoke the Knuth classical divide algorithm. In this
+    // case n > 1.
+    KnuthDiv(U, V, Q, R, m, n);
+  }
+
+  // If the caller wants the quotient
+  if (Quotient) {
+    // Set up the Quotient value's memory.
+    if (Quotient->BitWidth != LHS.BitWidth) {
+      if (Quotient->isSingleWord())
+        Quotient->VAL = 0;
+      else
+        delete [] Quotient->pVal;
+      Quotient->BitWidth = LHS.BitWidth;
+      if (!Quotient->isSingleWord())
+        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
+    } else
+      Quotient->clearAllBits();
+
+    // The quotient is in Q. Reconstitute the quotient into Quotient's low
+    // order words.
+    if (lhsWords == 1) {
+      uint64_t tmp =
+        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
+      if (Quotient->isSingleWord())
+        Quotient->VAL = tmp;
+      else
+        Quotient->pVal[0] = tmp;
+    } else {
+      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
+      for (unsigned i = 0; i < lhsWords; ++i)
+        Quotient->pVal[i] =
+          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+    }
+  }
+
+  // If the caller wants the remainder
+  if (Remainder) {
+    // Set up the Remainder value's memory.
+    if (Remainder->BitWidth != RHS.BitWidth) {
+      if (Remainder->isSingleWord())
+        Remainder->VAL = 0;
+      else
+        delete [] Remainder->pVal;
+      Remainder->BitWidth = RHS.BitWidth;
+      if (!Remainder->isSingleWord())
+        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
+    } else
+      Remainder->clearAllBits();
+
+    // The remainder is in R. Reconstitute the remainder into Remainder's low
+    // order words.
+    if (rhsWords == 1) {
+      uint64_t tmp =
+        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
+      if (Remainder->isSingleWord())
+        Remainder->VAL = tmp;
+      else
+        Remainder->pVal[0] = tmp;
+    } else {
+      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
+      for (unsigned i = 0; i < rhsWords; ++i)
+        Remainder->pVal[i] =
+          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+    }
+  }
+
+  // Clean up the memory we allocated.
+  if (U != &SPACE[0]) {
+    delete [] U;
+    delete [] V;
+    delete [] Q;
+    delete [] R;
+  }
+}
+
+APInt APInt::udiv(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+
+  // First, deal with the easy case
+  if (isSingleWord()) {
+    assert(RHS.VAL != 0 && "Divide by zero?");
+    return APInt(BitWidth, VAL / RHS.VAL);
+  }
+
+  // Get some facts about the LHS and RHS number of bits and words
+  unsigned rhsBits = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+  assert(rhsWords && "Divided by zero???");
+  unsigned lhsBits = this->getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+
+  // Deal with some degenerate cases
+  if (!lhsWords)
+    // 0 / X ===> 0
+    return APInt(BitWidth, 0);
+  else if (lhsWords < rhsWords || this->ult(RHS)) {
+    // X / Y ===> 0, iff X < Y
+    return APInt(BitWidth, 0);
+  } else if (*this == RHS) {
+    // X / X ===> 1
+    return APInt(BitWidth, 1);
+  } else if (lhsWords == 1 && rhsWords == 1) {
+    // All high words are zero, just use native divide
+    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
+  }
+
+  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+  APInt Quotient(1,0); // to hold result.
+  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
+  return Quotient;
+}
+
+APInt APInt::urem(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    assert(RHS.VAL != 0 && "Remainder by zero?");
+    return APInt(BitWidth, VAL % RHS.VAL);
+  }
+
+  // Get some facts about the LHS
+  unsigned lhsBits = getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+  // Get some facts about the RHS
+  unsigned rhsBits = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+  assert(rhsWords && "Performing remainder operation by zero ???");
+
+  // Check the degenerate cases
+  if (lhsWords == 0) {
+    // 0 % Y ===> 0
+    return APInt(BitWidth, 0);
+  } else if (lhsWords < rhsWords || this->ult(RHS)) {
+    // X % Y ===> X, iff X < Y
+    return *this;
+  } else if (*this == RHS) {
+    // X % X == 0;
+    return APInt(BitWidth, 0);
+  } else if (lhsWords == 1) {
+    // All high words are zero, just use native remainder
+    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
+  }
+
+  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+  APInt Remainder(1,0);
+  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
+  return Remainder;
+}
+
+void APInt::udivrem(const APInt &LHS, const APInt &RHS,
+                    APInt &Quotient, APInt &Remainder) {
+  // Get some size facts about the dividend and divisor
+  unsigned lhsBits  = LHS.getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+  unsigned rhsBits  = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+
+  // Check the degenerate cases
+  if (lhsWords == 0) {
+    Quotient = 0;                // 0 / Y ===> 0
+    Remainder = 0;               // 0 % Y ===> 0
+    return;
+  }
+
+  if (lhsWords < rhsWords || LHS.ult(RHS)) {
+    Remainder = LHS;            // X % Y ===> X, iff X < Y
+    Quotient = 0;               // X / Y ===> 0, iff X < Y
+    return;
+  }
+
+  if (LHS == RHS) {
+    Quotient  = 1;              // X / X ===> 1
+    Remainder = 0;              // X % X ===> 0;
+    return;
+  }
+
+  if (lhsWords == 1 && rhsWords == 1) {
+    // There is only one word to consider so use the native versions.
+    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
+    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
+    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
+    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
+    return;
+  }
+
+  // Okay, lets do it the long way
+  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
+}
+
+APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this+RHS;
+  Overflow = isNonNegative() == RHS.isNonNegative() &&
+             Res.isNonNegative() != isNonNegative();
+  return Res;
+}
+
+APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this+RHS;
+  Overflow = Res.ult(RHS);
+  return Res;
+}
+
+APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this - RHS;
+  Overflow = isNonNegative() != RHS.isNonNegative() &&
+             Res.isNonNegative() != isNonNegative();
+  return Res;
+}
+
+APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this-RHS;
+  Overflow = Res.ugt(*this);
+  return Res;
+}
+
+APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
+  // MININT/-1  -->  overflow.
+  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+  return sdiv(RHS);
+}
+
+APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this * RHS;
+  
+  if (*this != 0 && RHS != 0)
+    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+  else
+    Overflow = false;
+  return Res;
+}
+
+APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this * RHS;
+
+  if (*this != 0 && RHS != 0)
+    Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
+  else
+    Overflow = false;
+  return Res;
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+  Overflow = ShAmt >= getBitWidth();
+  if (Overflow)
+    ShAmt = getBitWidth()-1;
+
+  if (isNonNegative()) // Don't allow sign change.
+    Overflow = ShAmt >= countLeadingZeros();
+  else
+    Overflow = ShAmt >= countLeadingOnes();
+  
+  return *this << ShAmt;
+}
+
+
+
+
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
+  // Check our assumptions here
+  assert(!str.empty() && "Invalid string length");
+  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+
+  StringRef::iterator p = str.begin();
+  size_t slen = str.size();
+  bool isNeg = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String is only a sign, needs a value.");
+  }
+  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
+  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
+  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
+         "Insufficient bit width");
+
+  // Allocate memory
+  if (!isSingleWord())
+    pVal = getClearedMemory(getNumWords());
+
+  // Figure out if we can shift instead of multiply
+  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+  // Set up an APInt for the digit to add outside the loop so we don't
+  // constantly construct/destruct it.
+  APInt apdigit(getBitWidth(), 0);
+  APInt apradix(getBitWidth(), radix);
+
+  // Enter digit traversal loop
+  for (StringRef::iterator e = str.end(); p != e; ++p) {
+    unsigned digit = getDigit(*p, radix);
+    assert(digit < radix && "Invalid character in digit string");
+
+    // Shift or multiply the value by the radix
+    if (slen > 1) {
+      if (shift)
+        *this <<= shift;
+      else
+        *this *= apradix;
+    }
+
+    // Add in the digit we just interpreted
+    if (apdigit.isSingleWord())
+      apdigit.VAL = digit;
+    else
+      apdigit.pVal[0] = digit;
+    *this += apdigit;
+  }
+  // If its negative, put it in two's complement form
+  if (isNeg) {
+    (*this)--;
+    this->flipAllBits();
+  }
+}
+
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
+                     bool Signed, bool formatAsCLiteral) const {
+  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+
+  const char *Prefix = "";
+  if (formatAsCLiteral) {
+    switch (Radix) {
+      case 2:
+        // Binary literals are a non-standard extension added in gcc 4.3:
+        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
+        Prefix = "0b";
+        break;
+      case 8:
+        Prefix = "0";
+        break;
+      case 16:
+        Prefix = "0x";
+        break;
+    }
+  }
+
+  // First, check for a zero value and just short circuit the logic below.
+  if (*this == 0) {
+    while (*Prefix) {
+      Str.push_back(*Prefix);
+      ++Prefix;
+    };
+    Str.push_back('0');
+    return;
+  }
+
+  static const char Digits[] = "0123456789ABCDEF";
+
+  if (isSingleWord()) {
+    char Buffer[65];
+    char *BufPtr = Buffer+65;
+
+    uint64_t N;
+    if (!Signed) {
+      N = getZExtValue();
+    } else {
+      int64_t I = getSExtValue();
+      if (I >= 0) {
+        N = I;
+      } else {
+        Str.push_back('-');
+        N = -(uint64_t)I;
+      }
+    }
+
+    while (*Prefix) {
+      Str.push_back(*Prefix);
+      ++Prefix;
+    };
+
+    while (N) {
+      *--BufPtr = Digits[N % Radix];
+      N /= Radix;
+    }
+    Str.append(BufPtr, Buffer+65);
+    return;
+  }
+
+  APInt Tmp(*this);
+
+  if (Signed && isNegative()) {
+    // They want to print the signed version and it is a negative value
+    // Flip the bits and add one to turn it into the equivalent positive
+    // value and put a '-' in the result.
+    Tmp.flipAllBits();
+    Tmp++;
+    Str.push_back('-');
+  }
+
+  while (*Prefix) {
+    Str.push_back(*Prefix);
+    ++Prefix;
+  };
+
+  // We insert the digits backward, then reverse them to get the right order.
+  unsigned StartDig = Str.size();
+
+  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+  // because the number of bits per digit (1, 3 and 4 respectively) divides
+  // equaly.  We just shift until the value is zero.
+  if (Radix != 10) {
+    // Just shift tmp right for each digit width until it becomes zero
+    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
+    unsigned MaskAmt = Radix - 1;
+
+    while (Tmp != 0) {
+      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
+      Str.push_back(Digits[Digit]);
+      Tmp = Tmp.lshr(ShiftAmt);
+    }
+  } else {
+    APInt divisor(4, 10);
+    while (Tmp != 0) {
+      APInt APdigit(1, 0);
+      APInt tmp2(Tmp.getBitWidth(), 0);
+      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
+             &APdigit);
+      unsigned Digit = (unsigned)APdigit.getZExtValue();
+      assert(Digit < Radix && "divide failed");
+      Str.push_back(Digits[Digit]);
+      Tmp = tmp2;
+    }
+  }
+
+  // Reverse the digits before returning.
+  std::reverse(Str.begin()+StartDig, Str.end());
+}
+
+/// toString - This returns the APInt as a std::string.  Note that this is an
+/// inefficient method.  It is better to pass in a SmallVector/SmallString
+/// to the methods above.
+std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
+  SmallString<40> S;
+  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
+  return S.str();
+}
+
+
+void APInt::dump() const {
+  SmallString<40> S, U;
+  this->toStringUnsigned(U);
+  this->toStringSigned(S);
+  dbgs() << "APInt(" << BitWidth << "b, "
+         << U.str() << "u " << S.str() << "s)";
+}
+
+void APInt::print(raw_ostream &OS, bool isSigned) const {
+  SmallString<40> S;
+  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
+  OS << S.str();
+}
+
+// This implements a variety of operations on a representation of
+// arbitrary precision, two's-complement, bignum integer values.
+
+// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
+// and unrestricting assumption.
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
+
+/* Some handy functions local to this file.  */
+namespace {
+
+  /* Returns the integer part with the least significant BITS set.
+     BITS cannot be zero.  */
+  static inline integerPart
+  lowBitMask(unsigned int bits)
+  {
+    assert(bits != 0 && bits <= integerPartWidth);
+
+    return ~(integerPart) 0 >> (integerPartWidth - bits);
+  }
+
+  /* Returns the value of the lower half of PART.  */
+  static inline integerPart
+  lowHalf(integerPart part)
+  {
+    return part & lowBitMask(integerPartWidth / 2);
+  }
+
+  /* Returns the value of the upper half of PART.  */
+  static inline integerPart
+  highHalf(integerPart part)
+  {
+    return part >> (integerPartWidth / 2);
+  }
+
+  /* Returns the bit number of the most significant set bit of a part.
+     If the input number has no bits set -1U is returned.  */
+  static unsigned int
+  partMSB(integerPart value)
+  {
+    unsigned int n, msb;
+
+    if (value == 0)
+      return -1U;
+
+    n = integerPartWidth / 2;
+
+    msb = 0;
+    do {
+      if (value >> n) {
+        value >>= n;
+        msb += n;
+      }
+
+      n >>= 1;
+    } while (n);
+
+    return msb;
+  }
+
+  /* Returns the bit number of the least significant set bit of a
+     part.  If the input number has no bits set -1U is returned.  */
+  static unsigned int
+  partLSB(integerPart value)
+  {
+    unsigned int n, lsb;
+
+    if (value == 0)
+      return -1U;
+
+    lsb = integerPartWidth - 1;
+    n = integerPartWidth / 2;
+
+    do {
+      if (value << n) {
+        value <<= n;
+        lsb -= n;
+      }
+
+      n >>= 1;
+    } while (n);
+
+    return lsb;
+  }
+}
+
+/* Sets the least significant part of a bignum to the input value, and
+   zeroes out higher parts.  */
+void
+APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
+{
+  unsigned int i;
+
+  assert(parts > 0);
+
+  dst[0] = part;
+  for (i = 1; i < parts; i++)
+    dst[i] = 0;
+}
+
+/* Assign one bignum to another.  */
+void
+APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] = src[i];
+}
+
+/* Returns true if a bignum is zero, false otherwise.  */
+bool
+APInt::tcIsZero(const integerPart *src, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    if (src[i])
+      return false;
+
+  return true;
+}
+
+/* Extract the given bit of a bignum; returns 0 or 1.  */
+int
+APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
+{
+  return (parts[bit / integerPartWidth] &
+          ((integerPart) 1 << bit % integerPartWidth)) != 0;
+}
+
+/* Set the given bit of a bignum. */
+void
+APInt::tcSetBit(integerPart *parts, unsigned int bit)
+{
+  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
+}
+
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+  parts[bit / integerPartWidth] &=
+    ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
+/* Returns the bit number of the least significant set bit of a
+   number.  If the input number has no bits set -1U is returned.  */
+unsigned int
+APInt::tcLSB(const integerPart *parts, unsigned int n)
+{
+  unsigned int i, lsb;
+
+  for (i = 0; i < n; i++) {
+      if (parts[i] != 0) {
+          lsb = partLSB(parts[i]);
+
+          return lsb + i * integerPartWidth;
+      }
+  }
+
+  return -1U;
+}
+
+/* Returns the bit number of the most significant set bit of a number.
+   If the input number has no bits set -1U is returned.  */
+unsigned int
+APInt::tcMSB(const integerPart *parts, unsigned int n)
+{
+  unsigned int msb;
+
+  do {
+    --n;
+
+    if (parts[n] != 0) {
+      msb = partMSB(parts[n]);
+
+      return msb + n * integerPartWidth;
+    }
+  } while (n);
+
+  return -1U;
+}
+
+/* Copy the bit vector of width srcBITS from SRC, starting at bit
+   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
+   the least significant bit of DST.  All high bits above srcBITS in
+   DST are zero-filled.  */
+void
+APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
+                 unsigned int srcBits, unsigned int srcLSB)
+{
+  unsigned int firstSrcPart, dstParts, shift, n;
+
+  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
+  assert(dstParts <= dstCount);
+
+  firstSrcPart = srcLSB / integerPartWidth;
+  tcAssign (dst, src + firstSrcPart, dstParts);
+
+  shift = srcLSB % integerPartWidth;
+  tcShiftRight (dst, dstParts, shift);
+
+  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
+     in DST.  If this is less that srcBits, append the rest, else
+     clear the high bits.  */
+  n = dstParts * integerPartWidth - shift;
+  if (n < srcBits) {
+    integerPart mask = lowBitMask (srcBits - n);
+    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
+                          << n % integerPartWidth);
+  } else if (n > srcBits) {
+    if (srcBits % integerPartWidth)
+      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
+  }
+
+  /* Clear high parts.  */
+  while (dstParts < dstCount)
+    dst[dstParts++] = 0;
+}
+
+/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
+integerPart
+APInt::tcAdd(integerPart *dst, const integerPart *rhs,
+             integerPart c, unsigned int parts)
+{
+  unsigned int i;
+
+  assert(c <= 1);
+
+  for (i = 0; i < parts; i++) {
+    integerPart l;
+
+    l = dst[i];
+    if (c) {
+      dst[i] += rhs[i] + 1;
+      c = (dst[i] <= l);
+    } else {
+      dst[i] += rhs[i];
+      c = (dst[i] < l);
+    }
+  }
+
+  return c;
+}
+
+/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
+integerPart
+APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
+                  integerPart c, unsigned int parts)
+{
+  unsigned int i;
+
+  assert(c <= 1);
+
+  for (i = 0; i < parts; i++) {
+    integerPart l;
+
+    l = dst[i];
+    if (c) {
+      dst[i] -= rhs[i] + 1;
+      c = (dst[i] >= l);
+    } else {
+      dst[i] -= rhs[i];
+      c = (dst[i] > l);
+    }
+  }
+
+  return c;
+}
+
+/* Negate a bignum in-place.  */
+void
+APInt::tcNegate(integerPart *dst, unsigned int parts)
+{
+  tcComplement(dst, parts);
+  tcIncrement(dst, parts);
+}
+
+/*  DST += SRC * MULTIPLIER + CARRY   if add is true
+    DST  = SRC * MULTIPLIER + CARRY   if add is false
+
+    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
+    they must start at the same point, i.e. DST == SRC.
+
+    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+    returned.  Otherwise DST is filled with the least significant
+    DSTPARTS parts of the result, and if all of the omitted higher
+    parts were zero return zero, otherwise overflow occurred and
+    return one.  */
+int
+APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
+                      integerPart multiplier, integerPart carry,
+                      unsigned int srcParts, unsigned int dstParts,
+                      bool add)
+{
+  unsigned int i, n;
+
+  /* Otherwise our writes of DST kill our later reads of SRC.  */
+  assert(dst <= src || dst >= src + srcParts);
+  assert(dstParts <= srcParts + 1);
+
+  /* N loops; minimum of dstParts and srcParts.  */
+  n = dstParts < srcParts ? dstParts: srcParts;
+
+  for (i = 0; i < n; i++) {
+    integerPart low, mid, high, srcPart;
+
+      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+
+         This cannot overflow, because
+
+         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+
+         which is less than n^2.  */
+
+    srcPart = src[i];
+
+    if (multiplier == 0 || srcPart == 0)        {
+      low = carry;
+      high = 0;
+    } else {
+      low = lowHalf(srcPart) * lowHalf(multiplier);
+      high = highHalf(srcPart) * highHalf(multiplier);
+
+      mid = lowHalf(srcPart) * highHalf(multiplier);
+      high += highHalf(mid);
+      mid <<= integerPartWidth / 2;
+      if (low + mid < low)
+        high++;
+      low += mid;
+
+      mid = highHalf(srcPart) * lowHalf(multiplier);
+      high += highHalf(mid);
+      mid <<= integerPartWidth / 2;
+      if (low + mid < low)
+        high++;
+      low += mid;
+
+      /* Now add carry.  */
+      if (low + carry < low)
+        high++;
+      low += carry;
+    }
+
+    if (add) {
+      /* And now DST[i], and store the new low part there.  */
+      if (low + dst[i] < low)
+        high++;
+      dst[i] += low;
+    } else
+      dst[i] = low;
+
+    carry = high;
+  }
+
+  if (i < dstParts) {
+    /* Full multiplication, there is no overflow.  */
+    assert(i + 1 == dstParts);
+    dst[i] = carry;
+    return 0;
+  } else {
+    /* We overflowed if there is carry.  */
+    if (carry)
+      return 1;
+
+    /* We would overflow if any significant unwritten parts would be
+       non-zero.  This is true if any remaining src parts are non-zero
+       and the multiplier is non-zero.  */
+    if (multiplier)
+      for (; i < srcParts; i++)
+        if (src[i])
+          return 1;
+
+    /* We fitted in the narrow destination.  */
+    return 0;
+  }
+}
+
+/* DST = LHS * RHS, where DST has the same width as the operands and
+   is filled with the least significant parts of the result.  Returns
+   one if overflow occurred, otherwise zero.  DST must be disjoint
+   from both operands.  */
+int
+APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
+                  const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+  int overflow;
+
+  assert(dst != lhs && dst != rhs);
+
+  overflow = 0;
+  tcSet(dst, 0, parts);
+
+  for (i = 0; i < parts; i++)
+    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
+                               parts - i, true);
+
+  return overflow;
+}
+
+/* DST = LHS * RHS, where DST has width the sum of the widths of the
+   operands.  No overflow occurs.  DST must be disjoint from both
+   operands.  Returns the number of parts required to hold the
+   result.  */
+unsigned int
+APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
+                      const integerPart *rhs, unsigned int lhsParts,
+                      unsigned int rhsParts)
+{
+  /* Put the narrower number on the LHS for less loops below.  */
+  if (lhsParts > rhsParts) {
+    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
+  } else {
+    unsigned int n;
+
+    assert(dst != lhs && dst != rhs);
+
+    tcSet(dst, 0, rhsParts);
+
+    for (n = 0; n < lhsParts; n++)
+      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
+
+    n = lhsParts + rhsParts;
+
+    return n - (dst[n - 1] == 0);
+  }
+}
+
+/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
+   Otherwise set LHS to LHS / RHS with the fractional part discarded,
+   set REMAINDER to the remainder, return zero.  i.e.
+
+   OLD_LHS = RHS * LHS + REMAINDER
+
+   SCRATCH is a bignum of the same size as the operands and result for
+   use by the routine; its contents need not be initialized and are
+   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
+*/
+int
+APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
+                integerPart *remainder, integerPart *srhs,
+                unsigned int parts)
+{
+  unsigned int n, shiftCount;
+  integerPart mask;
+
+  assert(lhs != remainder && lhs != srhs && remainder != srhs);
+
+  shiftCount = tcMSB(rhs, parts) + 1;
+  if (shiftCount == 0)
+    return true;
+
+  shiftCount = parts * integerPartWidth - shiftCount;
+  n = shiftCount / integerPartWidth;
+  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
+
+  tcAssign(srhs, rhs, parts);
+  tcShiftLeft(srhs, parts, shiftCount);
+  tcAssign(remainder, lhs, parts);
+  tcSet(lhs, 0, parts);
+
+  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
+     the total.  */
+  for (;;) {
+      int compare;
+
+      compare = tcCompare(remainder, srhs, parts);
+      if (compare >= 0) {
+        tcSubtract(remainder, srhs, 0, parts);
+        lhs[n] |= mask;
+      }
+
+      if (shiftCount == 0)
+        break;
+      shiftCount--;
+      tcShiftRight(srhs, parts, 1);
+      if ((mask >>= 1) == 0)
+        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
+  }
+
+  return false;
+}
+
+/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
+   There are no restrictions on COUNT.  */
+void
+APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
+{
+  if (count) {
+    unsigned int jump, shift;
+
+    /* Jump is the inter-part jump; shift is is intra-part shift.  */
+    jump = count / integerPartWidth;
+    shift = count % integerPartWidth;
+
+    while (parts > jump) {
+      integerPart part;
+
+      parts--;
+
+      /* dst[i] comes from the two parts src[i - jump] and, if we have
+         an intra-part shift, src[i - jump - 1].  */
+      part = dst[parts - jump];
+      if (shift) {
+        part <<= shift;
+        if (parts >= jump + 1)
+          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
+      }
+
+      dst[parts] = part;
+    }
+
+    while (parts > 0)
+      dst[--parts] = 0;
+  }
+}
+
+/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
+   zero.  There are no restrictions on COUNT.  */
+void
+APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
+{
+  if (count) {
+    unsigned int i, jump, shift;
+
+    /* Jump is the inter-part jump; shift is is intra-part shift.  */
+    jump = count / integerPartWidth;
+    shift = count % integerPartWidth;
+
+    /* Perform the shift.  This leaves the most significant COUNT bits
+       of the result at zero.  */
+    for (i = 0; i < parts; i++) {
+      integerPart part;
+
+      if (i + jump >= parts) {
+        part = 0;
+      } else {
+        part = dst[i + jump];
+        if (shift) {
+          part >>= shift;
+          if (i + jump + 1 < parts)
+            part |= dst[i + jump + 1] << (integerPartWidth - shift);
+        }
+      }
+
+      dst[i] = part;
+    }
+  }
+}
+
+/* Bitwise and of two bignums.  */
+void
+APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] &= rhs[i];
+}
+
+/* Bitwise inclusive or of two bignums.  */
+void
+APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] |= rhs[i];
+}
+
+/* Bitwise exclusive or of two bignums.  */
+void
+APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] ^= rhs[i];
+}
+
+/* Complement a bignum in-place.  */
+void
+APInt::tcComplement(integerPart *dst, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] = ~dst[i];
+}
+
+/* Comparison (unsigned) of two bignums.  */
+int
+APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
+                 unsigned int parts)
+{
+  while (parts) {
+      parts--;
+      if (lhs[parts] == rhs[parts])
+        continue;
+
+      if (lhs[parts] > rhs[parts])
+        return 1;
+      else
+        return -1;
+    }
+
+  return 0;
+}
+
+/* Increment a bignum in-place, return the carry flag.  */
+integerPart
+APInt::tcIncrement(integerPart *dst, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    if (++dst[i] != 0)
+      break;
+
+  return i == parts;
+}
+
+/* Set the least significant BITS bits of a bignum, clear the
+   rest.  */
+void
+APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
+                                 unsigned int bits)
+{
+  unsigned int i;
+
+  i = 0;
+  while (bits > integerPartWidth) {
+    dst[i++] = ~(integerPart) 0;
+    bits -= integerPartWidth;
+  }
+
+  if (bits)
+    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
+
+  while (i < parts)
+    dst[i++] = 0;
+}
diff --git a/contrib/llvm/lib/Support/APSInt.cpp b/contrib/llvm/lib/Support/APSInt.cpp
new file mode 100644
index 000000000000..73acafa690c7
--- /dev/null
+++ b/contrib/llvm/lib/Support/APSInt.cpp
@@ -0,0 +1,23 @@
+//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the APSInt class, which is a simple class that
+// represents an arbitrary sized integer that knows its signedness.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+
+using namespace llvm;
+
+void APSInt::Profile(FoldingSetNodeID& ID) const {
+  ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0));
+  APInt::Profile(ID);
+}
diff --git a/contrib/llvm/lib/Support/Allocator.cpp b/contrib/llvm/lib/Support/Allocator.cpp
new file mode 100644
index 000000000000..215b0f249d96
--- /dev/null
+++ b/contrib/llvm/lib/Support/Allocator.cpp
@@ -0,0 +1,188 @@
+//===--- Allocator.cpp - Simple memory allocation abstraction -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BumpPtrAllocator interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Recycler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstring>
+
+namespace llvm {
+
+BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
+                                   SlabAllocator &allocator)
+    : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
+      CurSlab(0), BytesAllocated(0) { }
+
+BumpPtrAllocator::~BumpPtrAllocator() {
+  DeallocateSlabs(CurSlab);
+}
+
+/// AlignPtr - Align Ptr to Alignment bytes, rounding up.  Alignment should
+/// be a power of two.  This method rounds up, so AlignPtr(7, 4) == 8 and
+/// AlignPtr(8, 4) == 8.
+char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) {
+  assert(Alignment && (Alignment & (Alignment - 1)) == 0 &&
+         "Alignment is not a power of two!");
+
+  // Do the alignment.
+  return (char*)(((uintptr_t)Ptr + Alignment - 1) &
+                 ~(uintptr_t)(Alignment - 1));
+}
+
+/// StartNewSlab - Allocate a new slab and move the bump pointers over into
+/// the new slab.  Modifies CurPtr and End.
+void BumpPtrAllocator::StartNewSlab() {
+  // If we allocated a big number of slabs already it's likely that we're going
+  // to allocate more. Increase slab size to reduce mallocs and possibly memory
+  // overhead. The factors are chosen conservatively to avoid overallocation.
+  if (BytesAllocated >= SlabSize * 128)
+    SlabSize *= 2;
+
+  MemSlab *NewSlab = Allocator.Allocate(SlabSize);
+  NewSlab->NextPtr = CurSlab;
+  CurSlab = NewSlab;
+  CurPtr = (char*)(CurSlab + 1);
+  End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// DeallocateSlabs - Deallocate all memory slabs after and including this
+/// one.
+void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) {
+  while (Slab) {
+    MemSlab *NextSlab = Slab->NextPtr;
+#ifndef NDEBUG
+    // Poison the memory so stale pointers crash sooner.  Note we must
+    // preserve the Size and NextPtr fields at the beginning.
+    sys::Memory::setRangeWritable(Slab + 1, Slab->Size - sizeof(MemSlab));
+    memset(Slab + 1, 0xCD, Slab->Size - sizeof(MemSlab));
+#endif
+    Allocator.Deallocate(Slab);
+    Slab = NextSlab;
+  }
+}
+
+/// Reset - Deallocate all but the current slab and reset the current pointer
+/// to the beginning of it, freeing all memory allocated so far.
+void BumpPtrAllocator::Reset() {
+  if (!CurSlab)
+    return;
+  DeallocateSlabs(CurSlab->NextPtr);
+  CurSlab->NextPtr = 0;
+  CurPtr = (char*)(CurSlab + 1);
+  End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// Allocate - Allocate space at the specified alignment.
+///
+void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
+  if (!CurSlab) // Start a new slab if we haven't allocated one already.
+    StartNewSlab();
+
+  // Keep track of how many bytes we've allocated.
+  BytesAllocated += Size;
+
+  // 0-byte alignment means 1-byte alignment.
+  if (Alignment == 0) Alignment = 1;
+
+  // Allocate the aligned space, going forwards from CurPtr.
+  char *Ptr = AlignPtr(CurPtr, Alignment);
+
+  // Check if we can hold it.
+  if (Ptr + Size <= End) {
+    CurPtr = Ptr + Size;
+    return Ptr;
+  }
+
+  // If Size is really big, allocate a separate slab for it.
+  size_t PaddedSize = Size + sizeof(MemSlab) + Alignment - 1;
+  if (PaddedSize > SizeThreshold) {
+    MemSlab *NewSlab = Allocator.Allocate(PaddedSize);
+
+    // Put the new slab after the current slab, since we are not allocating
+    // into it.
+    NewSlab->NextPtr = CurSlab->NextPtr;
+    CurSlab->NextPtr = NewSlab;
+
+    Ptr = AlignPtr((char*)(NewSlab + 1), Alignment);
+    assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size);
+    return Ptr;
+  }
+
+  // Otherwise, start a new slab and try again.
+  StartNewSlab();
+  Ptr = AlignPtr(CurPtr, Alignment);
+  CurPtr = Ptr + Size;
+  assert(CurPtr <= End && "Unable to allocate memory!");
+  return Ptr;
+}
+
+unsigned BumpPtrAllocator::GetNumSlabs() const {
+  unsigned NumSlabs = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    ++NumSlabs;
+  }
+  return NumSlabs;
+}
+
+size_t BumpPtrAllocator::getTotalMemory() const {
+  size_t TotalMemory = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    TotalMemory += Slab->Size;
+  }
+  return TotalMemory;
+}
+  
+void BumpPtrAllocator::PrintStats() const {
+  unsigned NumSlabs = 0;
+  size_t TotalMemory = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    TotalMemory += Slab->Size;
+    ++NumSlabs;
+  }
+
+  errs() << "\nNumber of memory regions: " << NumSlabs << '\n'
+         << "Bytes used: " << BytesAllocated << '\n'
+         << "Bytes allocated: " << TotalMemory << '\n'
+         << "Bytes wasted: " << (TotalMemory - BytesAllocated)
+         << " (includes alignment, etc)\n";
+}
+
+MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator =
+  MallocSlabAllocator();
+
+SlabAllocator::~SlabAllocator() { }
+
+MallocSlabAllocator::~MallocSlabAllocator() { }
+
+MemSlab *MallocSlabAllocator::Allocate(size_t Size) {
+  MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0);
+  Slab->Size = Size;
+  Slab->NextPtr = 0;
+  return Slab;
+}
+
+void MallocSlabAllocator::Deallocate(MemSlab *Slab) {
+  Allocator.Deallocate(Slab);
+}
+
+void PrintRecyclerStats(size_t Size,
+                        size_t Align,
+                        size_t FreeListSize) {
+  errs() << "Recycler element size: " << Size << '\n'
+         << "Recycler element alignment: " << Align << '\n'
+         << "Number of elements free for recycling: " << FreeListSize << '\n';
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Atomic.cpp b/contrib/llvm/lib/Support/Atomic.cpp
new file mode 100644
index 000000000000..8214521f9827
--- /dev/null
+++ b/contrib/llvm/lib/Support/Atomic.cpp
@@ -0,0 +1,112 @@
+//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Atomic.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#undef MemoryFence
+#endif
+
+void sys::MemoryFence() {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+  return;
+#else
+#  if defined(__GNUC__)
+  __sync_synchronize();
+#  elif defined(_MSC_VER)
+  MemoryBarrier();
+#  else
+# error No memory fence implementation for your platform!
+#  endif
+#endif
+}
+
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+                                  sys::cas_flag new_value,
+                                  sys::cas_flag old_value) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+  sys::cas_flag result = *ptr;
+  if (result == old_value)
+    *ptr = new_value;
+  return result;
+#elif defined(__GNUC__)
+  return __sync_val_compare_and_swap(ptr, old_value, new_value);
+#elif defined(_MSC_VER)
+  return InterlockedCompareExchange(ptr, new_value, old_value);
+#else
+#  error No compare-and-swap implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+  ++(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedIncrement(ptr);
+#else
+#  error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+  --(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedDecrement(ptr);
+#else
+#  error No atomic decrement implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+#if !defined(LLVM_MULTITHREADED) || LLVM_MULTITHREADED == 0
+  *ptr += val;
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, val);
+#elif defined(_MSC_VER)
+  return InterlockedExchangeAdd(ptr, val) + val;
+#else
+#  error No atomic add implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original * val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
+
+sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original / val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
diff --git a/contrib/llvm/lib/Support/BranchProbability.cpp b/contrib/llvm/lib/Support/BranchProbability.cpp
new file mode 100644
index 000000000000..97342da3f078
--- /dev/null
+++ b/contrib/llvm/lib/Support/BranchProbability.cpp
@@ -0,0 +1,44 @@
+//===-------------- lib/Support/BranchProbability.cpp -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Branch Probability class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+BranchProbability::BranchProbability(uint32_t n, uint32_t d) {
+  assert(d > 0 && "Denomiator cannot be 0!");
+  assert(n <= d && "Probability cannot be bigger than 1!");
+  N = n;
+  D = d;
+}
+
+raw_ostream &BranchProbability::print(raw_ostream &OS) const {
+  OS << N << " / " << D << " = " << ((double)N / D);
+  return OS;
+}
+
+void BranchProbability::dump() const {
+  print(dbgs());
+  dbgs() << "\n";
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
+  Prob.print(OS);
+  return OS;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
new file mode 100644
index 000000000000..92c60a9631f7
--- /dev/null
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -0,0 +1,1426 @@
+//===-- CommandLine.cpp - Command line parser implementation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a command line argument processor that is useful when
+// creating a tool.  It provides a simple, minimalistic interface that is easily
+// extensible and supports nonlocal (library) command line options.
+//
+// Note that rather than trying to figure out what this code does, you could try
+// reading the library documentation located in docs/CommandLine.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include <cerrno>
+#include <cstdlib>
+using namespace llvm;
+using namespace cl;
+
+//===----------------------------------------------------------------------===//
+// Template instantiations and anchors.
+//
+namespace llvm { namespace cl {
+TEMPLATE_INSTANTIATION(class basic_parser<bool>);
+TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
+TEMPLATE_INSTANTIATION(class basic_parser<int>);
+TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
+TEMPLATE_INSTANTIATION(class basic_parser<double>);
+TEMPLATE_INSTANTIATION(class basic_parser<float>);
+TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
+TEMPLATE_INSTANTIATION(class basic_parser<char>);
+
+TEMPLATE_INSTANTIATION(class opt<unsigned>);
+TEMPLATE_INSTANTIATION(class opt<int>);
+TEMPLATE_INSTANTIATION(class opt<std::string>);
+TEMPLATE_INSTANTIATION(class opt<char>);
+TEMPLATE_INSTANTIATION(class opt<bool>);
+} } // end namespace llvm::cl
+
+void Option::anchor() {}
+void basic_parser_impl::anchor() {}
+void parser<bool>::anchor() {}
+void parser<boolOrDefault>::anchor() {}
+void parser<int>::anchor() {}
+void parser<unsigned>::anchor() {}
+void parser<double>::anchor() {}
+void parser<float>::anchor() {}
+void parser<std::string>::anchor() {}
+void parser<char>::anchor() {}
+
+//===----------------------------------------------------------------------===//
+
+// Globals for name and overview of program.  Program name is not a string to
+// avoid static ctor/dtor issues.
+static char ProgramName[80] = "<premain>";
+static const char *ProgramOverview = 0;
+
+// This collects additional help to be printed.
+static ManagedStatic<std::vector<const char*> > MoreHelp;
+
+extrahelp::extrahelp(const char *Help)
+  : morehelp(Help) {
+  MoreHelp->push_back(Help);
+}
+
+static bool OptionListChanged = false;
+
+// MarkOptionsChanged - Internal helper function.
+void cl::MarkOptionsChanged() {
+  OptionListChanged = true;
+}
+
+/// RegisteredOptionList - This is the list of the command line options that
+/// have statically constructed themselves.
+static Option *RegisteredOptionList = 0;
+
+void Option::addArgument() {
+  assert(NextRegistered == 0 && "argument multiply registered!");
+
+  NextRegistered = RegisteredOptionList;
+  RegisteredOptionList = this;
+  MarkOptionsChanged();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Basic, shared command line option processing machinery.
+//
+
+/// GetOptionInfo - Scan the list of registered options, turning them into data
+/// structures that are easier to handle.
+static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
+                          SmallVectorImpl<Option*> &SinkOpts,
+                          StringMap<Option*> &OptionsMap) {
+  SmallVector<const char*, 16> OptionNames;
+  Option *CAOpt = 0;  // The ConsumeAfter option if it exists.
+  for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
+    // If this option wants to handle multiple option names, get the full set.
+    // This handles enum options like "-O1 -O2" etc.
+    O->getExtraOptionNames(OptionNames);
+    if (O->ArgStr[0])
+      OptionNames.push_back(O->ArgStr);
+
+    // Handle named options.
+    for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+      // Add argument to the argument map!
+      if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
+        errs() << ProgramName << ": CommandLine Error: Argument '"
+             << OptionNames[i] << "' defined more than once!\n";
+      }
+    }
+
+    OptionNames.clear();
+
+    // Remember information about positional options.
+    if (O->getFormattingFlag() == cl::Positional)
+      PositionalOpts.push_back(O);
+    else if (O->getMiscFlags() & cl::Sink) // Remember sink options
+      SinkOpts.push_back(O);
+    else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+      if (CAOpt)
+        O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+      CAOpt = O;
+    }
+  }
+
+  if (CAOpt)
+    PositionalOpts.push_back(CAOpt);
+
+  // Make sure that they are in order of registration not backwards.
+  std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+}
+
+
+/// LookupOption - Lookup the option specified by the specified option on the
+/// command line.  If there is a value specified (after an equal sign) return
+/// that as well.  This assumes that leading dashes have already been stripped.
+static Option *LookupOption(StringRef &Arg, StringRef &Value,
+                            const StringMap<Option*> &OptionsMap) {
+  // Reject all dashes.
+  if (Arg.empty()) return 0;
+
+  size_t EqualPos = Arg.find('=');
+
+  // If we have an equals sign, remember the value.
+  if (EqualPos == StringRef::npos) {
+    // Look up the option.
+    StringMap<Option*>::const_iterator I = OptionsMap.find(Arg);
+    return I != OptionsMap.end() ? I->second : 0;
+  }
+
+  // If the argument before the = is a valid option name, we match.  If not,
+  // return Arg unmolested.
+  StringMap<Option*>::const_iterator I =
+    OptionsMap.find(Arg.substr(0, EqualPos));
+  if (I == OptionsMap.end()) return 0;
+
+  Value = Arg.substr(EqualPos+1);
+  Arg = Arg.substr(0, EqualPos);
+  return I->second;
+}
+
+/// LookupNearestOption - Lookup the closest match to the option specified by
+/// the specified option on the command line.  If there is a value specified
+/// (after an equal sign) return that as well.  This assumes that leading dashes
+/// have already been stripped.
+static Option *LookupNearestOption(StringRef Arg,
+                                   const StringMap<Option*> &OptionsMap,
+                                   std::string &NearestString) {
+  // Reject all dashes.
+  if (Arg.empty()) return 0;
+
+  // Split on any equal sign.
+  std::pair<StringRef, StringRef> SplitArg = Arg.split('=');
+  StringRef &LHS = SplitArg.first;  // LHS == Arg when no '=' is present.
+  StringRef &RHS = SplitArg.second;
+
+  // Find the closest match.
+  Option *Best = 0;
+  unsigned BestDistance = 0;
+  for (StringMap<Option*>::const_iterator it = OptionsMap.begin(),
+         ie = OptionsMap.end(); it != ie; ++it) {
+    Option *O = it->second;
+    SmallVector<const char*, 16> OptionNames;
+    O->getExtraOptionNames(OptionNames);
+    if (O->ArgStr[0])
+      OptionNames.push_back(O->ArgStr);
+
+    bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed;
+    StringRef Flag = PermitValue ? LHS : Arg;
+    for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+      StringRef Name = OptionNames[i];
+      unsigned Distance = StringRef(Name).edit_distance(
+        Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
+      if (!Best || Distance < BestDistance) {
+        Best = O;
+        BestDistance = Distance;
+	if (RHS.empty() || !PermitValue)
+	  NearestString = OptionNames[i];
+	else
+	  NearestString = std::string(OptionNames[i]) + "=" + RHS.str();
+      }
+    }
+  }
+
+  return Best;
+}
+
+/// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that
+/// does special handling of cl::CommaSeparated options.
+static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
+                                         StringRef ArgName,
+                                         StringRef Value, bool MultiArg = false)
+{
+  // Check to see if this option accepts a comma separated list of values.  If
+  // it does, we have to split up the value into multiple values.
+  if (Handler->getMiscFlags() & CommaSeparated) {
+    StringRef Val(Value);
+    StringRef::size_type Pos = Val.find(',');
+
+    while (Pos != StringRef::npos) {
+      // Process the portion before the comma.
+      if (Handler->addOccurrence(pos, ArgName, Val.substr(0, Pos), MultiArg))
+        return true;
+      // Erase the portion before the comma, AND the comma.
+      Val = Val.substr(Pos+1);
+      Value.substr(Pos+1);  // Increment the original value pointer as well.
+      // Check for another comma.
+      Pos = Val.find(',');
+    }
+
+    Value = Val;
+  }
+
+  if (Handler->addOccurrence(pos, ArgName, Value, MultiArg))
+    return true;
+
+  return false;
+}
+
+/// ProvideOption - For Value, this differentiates between an empty value ("")
+/// and a null value (StringRef()).  The later is accepted for arguments that
+/// don't allow a value (-foo) the former is rejected (-foo=).
+static inline bool ProvideOption(Option *Handler, StringRef ArgName,
+                                 StringRef Value, int argc, char **argv,
+                                 int &i) {
+  // Is this a multi-argument option?
+  unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
+
+  // Enforce value requirements
+  switch (Handler->getValueExpectedFlag()) {
+  case ValueRequired:
+    if (Value.data() == 0) {       // No value specified?
+      if (i+1 >= argc)
+        return Handler->error("requires a value!");
+      // Steal the next argument, like for '-o filename'
+      Value = argv[++i];
+    }
+    break;
+  case ValueDisallowed:
+    if (NumAdditionalVals > 0)
+      return Handler->error("multi-valued option specified"
+                            " with ValueDisallowed modifier!");
+
+    if (Value.data())
+      return Handler->error("does not allow a value! '" +
+                            Twine(Value) + "' specified.");
+    break;
+  case ValueOptional:
+    break;
+
+  default:
+    errs() << ProgramName
+         << ": Bad ValueMask flag! CommandLine usage error:"
+         << Handler->getValueExpectedFlag() << "\n";
+    llvm_unreachable(0);
+  }
+
+  // If this isn't a multi-arg option, just run the handler.
+  if (NumAdditionalVals == 0)
+    return CommaSeparateAndAddOccurence(Handler, i, ArgName, Value);
+
+  // If it is, run the handle several times.
+  bool MultiArg = false;
+
+  if (Value.data()) {
+    if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+      return true;
+    --NumAdditionalVals;
+    MultiArg = true;
+  }
+
+  while (NumAdditionalVals > 0) {
+    if (i+1 >= argc)
+      return Handler->error("not enough values!");
+    Value = argv[++i];
+
+    if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+      return true;
+    MultiArg = true;
+    --NumAdditionalVals;
+  }
+  return false;
+}
+
+static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
+  int Dummy = i;
+  return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy);
+}
+
+
+// Option predicates...
+static inline bool isGrouping(const Option *O) {
+  return O->getFormattingFlag() == cl::Grouping;
+}
+static inline bool isPrefixedOrGrouping(const Option *O) {
+  return isGrouping(O) || O->getFormattingFlag() == cl::Prefix;
+}
+
+// getOptionPred - Check to see if there are any options that satisfy the
+// specified predicate with names that are the prefixes in Name.  This is
+// checked by progressively stripping characters off of the name, checking to
+// see if there options that satisfy the predicate.  If we find one, return it,
+// otherwise return null.
+//
+static Option *getOptionPred(StringRef Name, size_t &Length,
+                             bool (*Pred)(const Option*),
+                             const StringMap<Option*> &OptionsMap) {
+
+  StringMap<Option*>::const_iterator OMI = OptionsMap.find(Name);
+
+  // Loop while we haven't found an option and Name still has at least two
+  // characters in it (so that the next iteration will not be the empty
+  // string.
+  while (OMI == OptionsMap.end() && Name.size() > 1) {
+    Name = Name.substr(0, Name.size()-1);   // Chop off the last character.
+    OMI = OptionsMap.find(Name);
+  }
+
+  if (OMI != OptionsMap.end() && Pred(OMI->second)) {
+    Length = Name.size();
+    return OMI->second;    // Found one!
+  }
+  return 0;                // No option found!
+}
+
+/// HandlePrefixedOrGroupedOption - The specified argument string (which started
+/// with at least one '-') does not fully match an available option.  Check to
+/// see if this is a prefix or grouped option.  If so, split arg into output an
+/// Arg/Value pair and return the Option to parse it with.
+static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
+                                             bool &ErrorParsing,
+                                         const StringMap<Option*> &OptionsMap) {
+  if (Arg.size() == 1) return 0;
+
+  // Do the lookup!
+  size_t Length = 0;
+  Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap);
+  if (PGOpt == 0) return 0;
+
+  // If the option is a prefixed option, then the value is simply the
+  // rest of the name...  so fall through to later processing, by
+  // setting up the argument name flags and value fields.
+  if (PGOpt->getFormattingFlag() == cl::Prefix) {
+    Value = Arg.substr(Length);
+    Arg = Arg.substr(0, Length);
+    assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
+    return PGOpt;
+  }
+
+  // This must be a grouped option... handle them now.  Grouping options can't
+  // have values.
+  assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+
+  do {
+    // Move current arg name out of Arg into OneArgName.
+    StringRef OneArgName = Arg.substr(0, Length);
+    Arg = Arg.substr(Length);
+
+    // Because ValueRequired is an invalid flag for grouped arguments,
+    // we don't need to pass argc/argv in.
+    assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
+           "Option can not be cl::Grouping AND cl::ValueRequired!");
+    int Dummy = 0;
+    ErrorParsing |= ProvideOption(PGOpt, OneArgName,
+                                  StringRef(), 0, 0, Dummy);
+
+    // Get the next grouping option.
+    PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
+  } while (PGOpt && Length != Arg.size());
+
+  // Return the last option with Arg cut down to just the last one.
+  return PGOpt;
+}
+
+
+
+static bool RequiresValue(const Option *O) {
+  return O->getNumOccurrencesFlag() == cl::Required ||
+         O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+static bool EatsUnboundedNumberOfValues(const Option *O) {
+  return O->getNumOccurrencesFlag() == cl::ZeroOrMore ||
+         O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+/// ParseCStringVector - Break INPUT up wherever one or more
+/// whitespace characters are found, and store the resulting tokens in
+/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated
+/// using strdup(), so it is the caller's responsibility to free()
+/// them later.
+///
+static void ParseCStringVector(std::vector<char *> &OutputVector,
+                               const char *Input) {
+  // Characters which will be treated as token separators:
+  StringRef Delims = " \v\f\t\r\n";
+
+  StringRef WorkStr(Input);
+  while (!WorkStr.empty()) {
+    // If the first character is a delimiter, strip them off.
+    if (Delims.find(WorkStr[0]) != StringRef::npos) {
+      size_t Pos = WorkStr.find_first_not_of(Delims);
+      if (Pos == StringRef::npos) Pos = WorkStr.size();
+      WorkStr = WorkStr.substr(Pos);
+      continue;
+    }
+
+    // Find position of first delimiter.
+    size_t Pos = WorkStr.find_first_of(Delims);
+    if (Pos == StringRef::npos) Pos = WorkStr.size();
+
+    // Everything from 0 to Pos is the next word to copy.
+    char *NewStr = (char*)malloc(Pos+1);
+    memcpy(NewStr, WorkStr.data(), Pos);
+    NewStr[Pos] = 0;
+    OutputVector.push_back(NewStr);
+
+    WorkStr = WorkStr.substr(Pos);
+  }
+}
+
+/// ParseEnvironmentOptions - An alternative entry point to the
+/// CommandLine library, which allows you to read the program's name
+/// from the caller (as PROGNAME) and its command-line arguments from
+/// an environment variable (whose name is given in ENVVAR).
+///
+void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
+                                 const char *Overview, bool ReadResponseFiles) {
+  // Check args.
+  assert(progName && "Program name not specified");
+  assert(envVar && "Environment variable name missing");
+
+  // Get the environment variable they want us to parse options out of.
+  const char *envValue = getenv(envVar);
+  if (!envValue)
+    return;
+
+  // Get program's "name", which we wouldn't know without the caller
+  // telling us.
+  std::vector<char*> newArgv;
+  newArgv.push_back(strdup(progName));
+
+  // Parse the value of the environment variable into a "command line"
+  // and hand it off to ParseCommandLineOptions().
+  ParseCStringVector(newArgv, envValue);
+  int newArgc = static_cast<int>(newArgv.size());
+  ParseCommandLineOptions(newArgc, &newArgv[0], Overview, ReadResponseFiles);
+
+  // Free all the strdup()ed strings.
+  for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+       i != e; ++i)
+    free(*i);
+}
+
+
+/// ExpandResponseFiles - Copy the contents of argv into newArgv,
+/// substituting the contents of the response files for the arguments
+/// of type @file.
+static void ExpandResponseFiles(unsigned argc, char** argv,
+                                std::vector<char*>& newArgv) {
+  for (unsigned i = 1; i != argc; ++i) {
+    char *arg = argv[i];
+
+    if (arg[0] == '@') {
+      sys::PathWithStatus respFile(++arg);
+
+      // Check that the response file is not empty (mmap'ing empty
+      // files can be problematic).
+      const sys::FileStatus *FileStat = respFile.getFileStatus();
+      if (FileStat && FileStat->getSize() != 0) {
+
+        // If we could open the file, parse its contents, otherwise
+        // pass the @file option verbatim.
+
+        // TODO: we should also support recursive loading of response files,
+        // since this is how gcc behaves. (From their man page: "The file may
+        // itself contain additional @file options; any such options will be
+        // processed recursively.")
+
+        // Mmap the response file into memory.
+        OwningPtr<MemoryBuffer> respFilePtr;
+        if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) {
+          ParseCStringVector(newArgv, respFilePtr->getBufferStart());
+          continue;
+        }
+      }
+    }
+    newArgv.push_back(strdup(arg));
+  }
+}
+
+void cl::ParseCommandLineOptions(int argc, char **argv,
+                                 const char *Overview, bool ReadResponseFiles) {
+  // Process all registered options.
+  SmallVector<Option*, 4> PositionalOpts;
+  SmallVector<Option*, 4> SinkOpts;
+  StringMap<Option*> Opts;
+  GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+
+  assert((!Opts.empty() || !PositionalOpts.empty()) &&
+         "No options specified!");
+
+  // Expand response files.
+  std::vector<char*> newArgv;
+  if (ReadResponseFiles) {
+    newArgv.push_back(strdup(argv[0]));
+    ExpandResponseFiles(argc, argv, newArgv);
+    argv = &newArgv[0];
+    argc = static_cast<int>(newArgv.size());
+  }
+
+  // Copy the program name into ProgName, making sure not to overflow it.
+  std::string ProgName = sys::path::filename(argv[0]);
+  size_t Len = std::min(ProgName.size(), size_t(79));
+  memcpy(ProgramName, ProgName.data(), Len);
+  ProgramName[Len] = '\0';
+
+  ProgramOverview = Overview;
+  bool ErrorParsing = false;
+
+  // Check out the positional arguments to collect information about them.
+  unsigned NumPositionalRequired = 0;
+
+  // Determine whether or not there are an unlimited number of positionals
+  bool HasUnlimitedPositionals = false;
+
+  Option *ConsumeAfterOpt = 0;
+  if (!PositionalOpts.empty()) {
+    if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+      assert(PositionalOpts.size() > 1 &&
+             "Cannot specify cl::ConsumeAfter without a positional argument!");
+      ConsumeAfterOpt = PositionalOpts[0];
+    }
+
+    // Calculate how many positional values are _required_.
+    bool UnboundedFound = false;
+    for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size();
+         i != e; ++i) {
+      Option *Opt = PositionalOpts[i];
+      if (RequiresValue(Opt))
+        ++NumPositionalRequired;
+      else if (ConsumeAfterOpt) {
+        // ConsumeAfter cannot be combined with "optional" positional options
+        // unless there is only one positional argument...
+        if (PositionalOpts.size() > 2)
+          ErrorParsing |=
+            Opt->error("error - this positional option will never be matched, "
+                       "because it does not Require a value, and a "
+                       "cl::ConsumeAfter option is active!");
+      } else if (UnboundedFound && !Opt->ArgStr[0]) {
+        // This option does not "require" a value...  Make sure this option is
+        // not specified after an option that eats all extra arguments, or this
+        // one will never get any!
+        //
+        ErrorParsing |= Opt->error("error - option can never match, because "
+                                   "another positional argument will match an "
+                                   "unbounded number of values, and this option"
+                                   " does not require a value!");
+      }
+      UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
+    }
+    HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt;
+  }
+
+  // PositionalVals - A vector of "positional" arguments we accumulate into
+  // the process at the end.
+  //
+  SmallVector<std::pair<StringRef,unsigned>, 4> PositionalVals;
+
+  // If the program has named positional arguments, and the name has been run
+  // across, keep track of which positional argument was named.  Otherwise put
+  // the positional args into the PositionalVals list...
+  Option *ActivePositionalArg = 0;
+
+  // Loop over all of the arguments... processing them.
+  bool DashDashFound = false;  // Have we read '--'?
+  for (int i = 1; i < argc; ++i) {
+    Option *Handler = 0;
+    Option *NearestHandler = 0;
+    std::string NearestHandlerString;
+    StringRef Value;
+    StringRef ArgName = "";
+
+    // If the option list changed, this means that some command line
+    // option has just been registered or deregistered.  This can occur in
+    // response to things like -load, etc.  If this happens, rescan the options.
+    if (OptionListChanged) {
+      PositionalOpts.clear();
+      SinkOpts.clear();
+      Opts.clear();
+      GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+      OptionListChanged = false;
+    }
+
+    // Check to see if this is a positional argument.  This argument is
+    // considered to be positional if it doesn't start with '-', if it is "-"
+    // itself, or if we have seen "--" already.
+    //
+    if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) {
+      // Positional argument!
+      if (ActivePositionalArg) {
+        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+        continue;  // We are done!
+      }
+
+      if (!PositionalOpts.empty()) {
+        PositionalVals.push_back(std::make_pair(argv[i],i));
+
+        // All of the positional arguments have been fulfulled, give the rest to
+        // the consume after option... if it's specified...
+        //
+        if (PositionalVals.size() >= NumPositionalRequired &&
+            ConsumeAfterOpt != 0) {
+          for (++i; i < argc; ++i)
+            PositionalVals.push_back(std::make_pair(argv[i],i));
+          break;   // Handle outside of the argument processing loop...
+        }
+
+        // Delay processing positional arguments until the end...
+        continue;
+      }
+    } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 &&
+               !DashDashFound) {
+      DashDashFound = true;  // This is the mythical "--"?
+      continue;              // Don't try to process it as an argument itself.
+    } else if (ActivePositionalArg &&
+               (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) {
+      // If there is a positional argument eating options, check to see if this
+      // option is another positional argument.  If so, treat it as an argument,
+      // otherwise feed it to the eating positional.
+      ArgName = argv[i]+1;
+      // Eat leading dashes.
+      while (!ArgName.empty() && ArgName[0] == '-')
+        ArgName = ArgName.substr(1);
+
+      Handler = LookupOption(ArgName, Value, Opts);
+      if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
+        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+        continue;  // We are done!
+      }
+
+    } else {     // We start with a '-', must be an argument.
+      ArgName = argv[i]+1;
+      // Eat leading dashes.
+      while (!ArgName.empty() && ArgName[0] == '-')
+        ArgName = ArgName.substr(1);
+
+      Handler = LookupOption(ArgName, Value, Opts);
+
+      // Check to see if this "option" is really a prefixed or grouped argument.
+      if (Handler == 0)
+        Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
+                                                ErrorParsing, Opts);
+
+      // Otherwise, look for the closest available option to report to the user
+      // in the upcoming error.
+      if (Handler == 0 && SinkOpts.empty())
+        NearestHandler = LookupNearestOption(ArgName, Opts,
+                                             NearestHandlerString);
+    }
+
+    if (Handler == 0) {
+      if (SinkOpts.empty()) {
+        errs() << ProgramName << ": Unknown command line argument '"
+             << argv[i] << "'.  Try: '" << argv[0] << " -help'\n";
+
+        if (NearestHandler) {
+          // If we know a near match, report it as well.
+          errs() << ProgramName << ": Did you mean '-"
+                 << NearestHandlerString << "'?\n";
+        }
+
+        ErrorParsing = true;
+      } else {
+        for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
+               E = SinkOpts.end(); I != E ; ++I)
+          (*I)->addOccurrence(i, "", argv[i]);
+      }
+      continue;
+    }
+
+    // If this is a named positional argument, just remember that it is the
+    // active one...
+    if (Handler->getFormattingFlag() == cl::Positional)
+      ActivePositionalArg = Handler;
+    else
+      ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i);
+  }
+
+  // Check and handle positional arguments now...
+  if (NumPositionalRequired > PositionalVals.size()) {
+    errs() << ProgramName
+         << ": Not enough positional command line arguments specified!\n"
+         << "Must specify at least " << NumPositionalRequired
+         << " positional arguments: See: " << argv[0] << " -help\n";
+
+    ErrorParsing = true;
+  } else if (!HasUnlimitedPositionals &&
+             PositionalVals.size() > PositionalOpts.size()) {
+    errs() << ProgramName
+         << ": Too many positional arguments specified!\n"
+         << "Can specify at most " << PositionalOpts.size()
+         << " positional arguments: See: " << argv[0] << " -help\n";
+    ErrorParsing = true;
+
+  } else if (ConsumeAfterOpt == 0) {
+    // Positional args have already been handled if ConsumeAfter is specified.
+    unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
+    for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
+      if (RequiresValue(PositionalOpts[i])) {
+        ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first,
+                                PositionalVals[ValNo].second);
+        ValNo++;
+        --NumPositionalRequired;  // We fulfilled our duty...
+      }
+
+      // If we _can_ give this option more arguments, do so now, as long as we
+      // do not give it values that others need.  'Done' controls whether the
+      // option even _WANTS_ any more.
+      //
+      bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required;
+      while (NumVals-ValNo > NumPositionalRequired && !Done) {
+        switch (PositionalOpts[i]->getNumOccurrencesFlag()) {
+        case cl::Optional:
+          Done = true;          // Optional arguments want _at most_ one value
+          // FALL THROUGH
+        case cl::ZeroOrMore:    // Zero or more will take all they can get...
+        case cl::OneOrMore:     // One or more will take all they can get...
+          ProvidePositionalOption(PositionalOpts[i],
+                                  PositionalVals[ValNo].first,
+                                  PositionalVals[ValNo].second);
+          ValNo++;
+          break;
+        default:
+          llvm_unreachable("Internal error, unexpected NumOccurrences flag in "
+                 "positional argument processing!");
+        }
+      }
+    }
+  } else {
+    assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size());
+    unsigned ValNo = 0;
+    for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j)
+      if (RequiresValue(PositionalOpts[j])) {
+        ErrorParsing |= ProvidePositionalOption(PositionalOpts[j],
+                                                PositionalVals[ValNo].first,
+                                                PositionalVals[ValNo].second);
+        ValNo++;
+      }
+
+    // Handle the case where there is just one positional option, and it's
+    // optional.  In this case, we want to give JUST THE FIRST option to the
+    // positional option and keep the rest for the consume after.  The above
+    // loop would have assigned no values to positional options in this case.
+    //
+    if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) {
+      ErrorParsing |= ProvidePositionalOption(PositionalOpts[1],
+                                              PositionalVals[ValNo].first,
+                                              PositionalVals[ValNo].second);
+      ValNo++;
+    }
+
+    // Handle over all of the rest of the arguments to the
+    // cl::ConsumeAfter command line option...
+    for (; ValNo != PositionalVals.size(); ++ValNo)
+      ErrorParsing |= ProvidePositionalOption(ConsumeAfterOpt,
+                                              PositionalVals[ValNo].first,
+                                              PositionalVals[ValNo].second);
+  }
+
+  // Loop over args and make sure all required args are specified!
+  for (StringMap<Option*>::iterator I = Opts.begin(),
+         E = Opts.end(); I != E; ++I) {
+    switch (I->second->getNumOccurrencesFlag()) {
+    case Required:
+    case OneOrMore:
+      if (I->second->getNumOccurrences() == 0) {
+        I->second->error("must be specified at least once!");
+        ErrorParsing = true;
+      }
+      // Fall through
+    default:
+      break;
+    }
+  }
+
+  // Now that we know if -debug is specified, we can use it.
+  // Note that if ReadResponseFiles == true, this must be done before the
+  // memory allocated for the expanded command line is free()d below.
+  DEBUG(dbgs() << "Args: ";
+        for (int i = 0; i < argc; ++i)
+          dbgs() << argv[i] << ' ';
+        dbgs() << '\n';
+       );
+
+  // Free all of the memory allocated to the map.  Command line options may only
+  // be processed once!
+  Opts.clear();
+  PositionalOpts.clear();
+  MoreHelp->clear();
+
+  // Free the memory allocated by ExpandResponseFiles.
+  if (ReadResponseFiles) {
+    // Free all the strdup()ed strings.
+    for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+         i != e; ++i)
+      free(*i);
+  }
+
+  // If we had an error processing our arguments, don't let the program execute
+  if (ErrorParsing) exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// Option Base class implementation
+//
+
+bool Option::error(const Twine &Message, StringRef ArgName) {
+  if (ArgName.data() == 0) ArgName = ArgStr;
+  if (ArgName.empty())
+    errs() << HelpStr;  // Be nice for positional arguments
+  else
+    errs() << ProgramName << ": for the -" << ArgName;
+
+  errs() << " option: " << Message << "\n";
+  return true;
+}
+
+bool Option::addOccurrence(unsigned pos, StringRef ArgName,
+                           StringRef Value, bool MultiArg) {
+  if (!MultiArg)
+    NumOccurrences++;   // Increment the number of times we have been seen
+
+  switch (getNumOccurrencesFlag()) {
+  case Optional:
+    if (NumOccurrences > 1)
+      return error("may only occur zero or one times!", ArgName);
+    break;
+  case Required:
+    if (NumOccurrences > 1)
+      return error("must occur exactly one time!", ArgName);
+    // Fall through
+  case OneOrMore:
+  case ZeroOrMore:
+  case ConsumeAfter: break;
+  default: return error("bad num occurrences flag value!");
+  }
+
+  return handleOccurrence(pos, ArgName, Value);
+}
+
+
+// getValueStr - Get the value description string, using "DefaultMsg" if nothing
+// has been specified yet.
+//
+static const char *getValueStr(const Option &O, const char *DefaultMsg) {
+  if (O.ValueStr[0] == 0) return DefaultMsg;
+  return O.ValueStr;
+}
+
+//===----------------------------------------------------------------------===//
+// cl::alias class implementation
+//
+
+// Return the width of the option tag for printing...
+size_t alias::getOptionWidth() const {
+  return std::strlen(ArgStr)+6;
+}
+
+// Print out the option for the alias.
+void alias::printOptionInfo(size_t GlobalWidth) const {
+  size_t L = std::strlen(ArgStr);
+  outs() << "  -" << ArgStr;
+  outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Parser Implementation code...
+//
+
+// basic_parser implementation
+//
+
+// Return the width of the option tag for printing...
+size_t basic_parser_impl::getOptionWidth(const Option &O) const {
+  size_t Len = std::strlen(O.ArgStr);
+  if (const char *ValName = getValueName())
+    Len += std::strlen(getValueStr(O, ValName))+3;
+
+  return Len + 6;
+}
+
+// printOptionInfo - Print out information about this option.  The
+// to-be-maintained width is specified.
+//
+void basic_parser_impl::printOptionInfo(const Option &O,
+                                        size_t GlobalWidth) const {
+  outs() << "  -" << O.ArgStr;
+
+  if (const char *ValName = getValueName())
+    outs() << "=<" << getValueStr(O, ValName) << '>';
+
+  outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n';
+}
+
+void basic_parser_impl::printOptionName(const Option &O,
+                                        size_t GlobalWidth) const {
+  outs() << "  -" << O.ArgStr;
+  outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+}
+
+
+// parser<bool> implementation
+//
+bool parser<bool>::parse(Option &O, StringRef ArgName,
+                         StringRef Arg, bool &Value) {
+  if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+      Arg == "1") {
+    Value = true;
+    return false;
+  }
+
+  if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+    Value = false;
+    return false;
+  }
+  return O.error("'" + Arg +
+                 "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<boolOrDefault> implementation
+//
+bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName,
+                                  StringRef Arg, boolOrDefault &Value) {
+  if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+      Arg == "1") {
+    Value = BOU_TRUE;
+    return false;
+  }
+  if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+    Value = BOU_FALSE;
+    return false;
+  }
+
+  return O.error("'" + Arg +
+                 "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<int> implementation
+//
+bool parser<int>::parse(Option &O, StringRef ArgName,
+                        StringRef Arg, int &Value) {
+  if (Arg.getAsInteger(0, Value))
+    return O.error("'" + Arg + "' value invalid for integer argument!");
+  return false;
+}
+
+// parser<unsigned> implementation
+//
+bool parser<unsigned>::parse(Option &O, StringRef ArgName,
+                             StringRef Arg, unsigned &Value) {
+
+  if (Arg.getAsInteger(0, Value))
+    return O.error("'" + Arg + "' value invalid for uint argument!");
+  return false;
+}
+
+// parser<double>/parser<float> implementation
+//
+static bool parseDouble(Option &O, StringRef Arg, double &Value) {
+  SmallString<32> TmpStr(Arg.begin(), Arg.end());
+  const char *ArgStart = TmpStr.c_str();
+  char *End;
+  Value = strtod(ArgStart, &End);
+  if (*End != 0)
+    return O.error("'" + Arg + "' value invalid for floating point argument!");
+  return false;
+}
+
+bool parser<double>::parse(Option &O, StringRef ArgName,
+                           StringRef Arg, double &Val) {
+  return parseDouble(O, Arg, Val);
+}
+
+bool parser<float>::parse(Option &O, StringRef ArgName,
+                          StringRef Arg, float &Val) {
+  double dVal;
+  if (parseDouble(O, Arg, dVal))
+    return true;
+  Val = (float)dVal;
+  return false;
+}
+
+
+
+// generic_parser_base implementation
+//
+
+// findOption - Return the option number corresponding to the specified
+// argument string.  If the option is not found, getNumOptions() is returned.
+//
+unsigned generic_parser_base::findOption(const char *Name) {
+  unsigned e = getNumOptions();
+
+  for (unsigned i = 0; i != e; ++i) {
+    if (strcmp(getOption(i), Name) == 0)
+      return i;
+  }
+  return e;
+}
+
+
+// Return the width of the option tag for printing...
+size_t generic_parser_base::getOptionWidth(const Option &O) const {
+  if (O.hasArgStr()) {
+    size_t Size = std::strlen(O.ArgStr)+6;
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+      Size = std::max(Size, std::strlen(getOption(i))+8);
+    return Size;
+  } else {
+    size_t BaseSize = 0;
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+      BaseSize = std::max(BaseSize, std::strlen(getOption(i))+8);
+    return BaseSize;
+  }
+}
+
+// printOptionInfo - Print out information about this option.  The
+// to-be-maintained width is specified.
+//
+void generic_parser_base::printOptionInfo(const Option &O,
+                                          size_t GlobalWidth) const {
+  if (O.hasArgStr()) {
+    size_t L = std::strlen(O.ArgStr);
+    outs() << "  -" << O.ArgStr;
+    outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n';
+
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+      size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8;
+      outs() << "    =" << getOption(i);
+      outs().indent(NumSpaces) << " -   " << getDescription(i) << '\n';
+    }
+  } else {
+    if (O.HelpStr[0])
+      outs() << "  " << O.HelpStr << '\n';
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+      size_t L = std::strlen(getOption(i));
+      outs() << "    -" << getOption(i);
+      outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n';
+    }
+  }
+}
+
+static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff
+
+// printGenericOptionDiff - Print the value of this option and it's default.
+//
+// "Generic" options have each value mapped to a name.
+void generic_parser_base::
+printGenericOptionDiff(const Option &O, const GenericOptionValue &Value,
+                       const GenericOptionValue &Default,
+                       size_t GlobalWidth) const {
+  outs() << "  -" << O.ArgStr;
+  outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+
+  unsigned NumOpts = getNumOptions();
+  for (unsigned i = 0; i != NumOpts; ++i) {
+    if (Value.compare(getOptionValue(i)))
+      continue;
+
+    outs() << "= " << getOption(i);
+    size_t L = std::strlen(getOption(i));
+    size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0;
+    outs().indent(NumSpaces) << " (default: ";
+    for (unsigned j = 0; j != NumOpts; ++j) {
+      if (Default.compare(getOptionValue(j)))
+        continue;
+      outs() << getOption(j);
+      break;
+    }
+    outs() << ")\n";
+    return;
+  }
+  outs() << "= *unknown option value*\n";
+}
+
+// printOptionDiff - Specializations for printing basic value types.
+//
+#define PRINT_OPT_DIFF(T)                                               \
+  void parser<T>::                                                      \
+  printOptionDiff(const Option &O, T V, OptionValue<T> D,               \
+                  size_t GlobalWidth) const {                           \
+    printOptionName(O, GlobalWidth);                                    \
+    std::string Str;                                                    \
+    {                                                                   \
+      raw_string_ostream SS(Str);                                       \
+      SS << V;                                                          \
+    }                                                                   \
+    outs() << "= " << Str;                                              \
+    size_t NumSpaces = MaxOptWidth > Str.size() ? MaxOptWidth - Str.size() : 0;\
+    outs().indent(NumSpaces) << " (default: ";                          \
+    if (D.hasValue())                                                   \
+      outs() << D.getValue();                                           \
+    else                                                                \
+      outs() << "*no default*";                                         \
+    outs() << ")\n";                                                    \
+  }                                                                     \
+
+PRINT_OPT_DIFF(bool)
+PRINT_OPT_DIFF(boolOrDefault)
+PRINT_OPT_DIFF(int)
+PRINT_OPT_DIFF(unsigned)
+PRINT_OPT_DIFF(double)
+PRINT_OPT_DIFF(float)
+PRINT_OPT_DIFF(char)
+
+void parser<std::string>::
+printOptionDiff(const Option &O, StringRef V, OptionValue<std::string> D,
+                size_t GlobalWidth) const {
+  printOptionName(O, GlobalWidth);
+  outs() << "= " << V;
+  size_t NumSpaces = MaxOptWidth > V.size() ? MaxOptWidth - V.size() : 0;
+  outs().indent(NumSpaces) << " (default: ";
+  if (D.hasValue())
+    outs() << D.getValue();
+  else
+    outs() << "*no default*";
+  outs() << ")\n";
+}
+
+// Print a placeholder for options that don't yet support printOptionDiff().
+void basic_parser_impl::
+printOptionNoValue(const Option &O, size_t GlobalWidth) const {
+  printOptionName(O, GlobalWidth);
+  outs() << "= *cannot print option value*\n";
+}
+
+//===----------------------------------------------------------------------===//
+// -help and -help-hidden option implementation
+//
+
+static int OptNameCompare(const void *LHS, const void *RHS) {
+  typedef std::pair<const char *, Option*> pair_ty;
+
+  return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+}
+
+// Copy Options into a vector so we can sort them as we like.
+static void
+sortOpts(StringMap<Option*> &OptMap,
+         SmallVectorImpl< std::pair<const char *, Option*> > &Opts,
+         bool ShowHidden) {
+  SmallPtrSet<Option*, 128> OptionSet;  // Duplicate option detection.
+
+  for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end();
+       I != E; ++I) {
+    // Ignore really-hidden options.
+    if (I->second->getOptionHiddenFlag() == ReallyHidden)
+      continue;
+
+    // Unless showhidden is set, ignore hidden flags.
+    if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
+      continue;
+
+    // If we've already seen this option, don't add it to the list again.
+    if (!OptionSet.insert(I->second))
+      continue;
+
+    Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
+                                                    I->second));
+  }
+
+  // Sort the options list alphabetically.
+  qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+}
+
+namespace {
+
+class HelpPrinter {
+  size_t MaxArgLen;
+  const Option *EmptyArg;
+  const bool ShowHidden;
+
+public:
+  explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {
+    EmptyArg = 0;
+  }
+
+  void operator=(bool Value) {
+    if (Value == false) return;
+
+    // Get all the options.
+    SmallVector<Option*, 4> PositionalOpts;
+    SmallVector<Option*, 4> SinkOpts;
+    StringMap<Option*> OptMap;
+    GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+    SmallVector<std::pair<const char *, Option*>, 128> Opts;
+    sortOpts(OptMap, Opts, ShowHidden);
+
+    if (ProgramOverview)
+      outs() << "OVERVIEW: " << ProgramOverview << "\n";
+
+    outs() << "USAGE: " << ProgramName << " [options]";
+
+    // Print out the positional options.
+    Option *CAOpt = 0;   // The cl::ConsumeAfter option, if it exists...
+    if (!PositionalOpts.empty() &&
+        PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter)
+      CAOpt = PositionalOpts[0];
+
+    for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) {
+      if (PositionalOpts[i]->ArgStr[0])
+        outs() << " --" << PositionalOpts[i]->ArgStr;
+      outs() << " " << PositionalOpts[i]->HelpStr;
+    }
+
+    // Print the consume after option info if it exists...
+    if (CAOpt) outs() << " " << CAOpt->HelpStr;
+
+    outs() << "\n\n";
+
+    // Compute the maximum argument length...
+    MaxArgLen = 0;
+    for (size_t i = 0, e = Opts.size(); i != e; ++i)
+      MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+    outs() << "OPTIONS:\n";
+    for (size_t i = 0, e = Opts.size(); i != e; ++i)
+      Opts[i].second->printOptionInfo(MaxArgLen);
+
+    // Print any extra help the user has declared.
+    for (std::vector<const char *>::iterator I = MoreHelp->begin(),
+          E = MoreHelp->end(); I != E; ++I)
+      outs() << *I;
+    MoreHelp->clear();
+
+    // Halt the program since help information was printed
+    exit(1);
+  }
+};
+} // End anonymous namespace
+
+// Define the two HelpPrinter instances that are used to print out help, or
+// help-hidden...
+//
+static HelpPrinter NormalPrinter(false);
+static HelpPrinter HiddenPrinter(true);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+    cl::location(NormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HHOp("help-hidden", cl::desc("Display all available options"),
+     cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+static cl::opt<bool>
+PrintOptions("print-options",
+             cl::desc("Print non-default options after command line parsing"),
+             cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+PrintAllOptions("print-all-options",
+                cl::desc("Print all option values after command line parsing"),
+                cl::Hidden, cl::init(false));
+
+// Print the value of each option.
+void cl::PrintOptionValues() {
+  if (!PrintOptions && !PrintAllOptions) return;
+
+  // Get all the options.
+  SmallVector<Option*, 4> PositionalOpts;
+  SmallVector<Option*, 4> SinkOpts;
+  StringMap<Option*> OptMap;
+  GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+  SmallVector<std::pair<const char *, Option*>, 128> Opts;
+  sortOpts(OptMap, Opts, /*ShowHidden*/true);
+
+  // Compute the maximum argument length...
+  size_t MaxArgLen = 0;
+  for (size_t i = 0, e = Opts.size(); i != e; ++i)
+    MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+  for (size_t i = 0, e = Opts.size(); i != e; ++i)
+    Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions);
+}
+
+static void (*OverrideVersionPrinter)() = 0;
+
+static int TargetArraySortFn(const void *LHS, const void *RHS) {
+  typedef std::pair<const char *, const Target*> pair_ty;
+  return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
+}
+
+namespace {
+class VersionPrinter {
+public:
+  void print() {
+    raw_ostream &OS = outs();
+    OS << "Low Level Virtual Machine (http://llvm.org/):\n"
+       << "  " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
+#ifdef LLVM_VERSION_INFO
+    OS << LLVM_VERSION_INFO;
+#endif
+    OS << "\n  ";
+#ifndef __OPTIMIZE__
+    OS << "DEBUG build";
+#else
+    OS << "Optimized build";
+#endif
+#ifndef NDEBUG
+    OS << " with assertions";
+#endif
+    std::string CPU = sys::getHostCPUName();
+    if (CPU == "generic") CPU = "(unknown)";
+    OS << ".\n"
+#if defined(ENABLE_TIMESTAMPS) && ENABLE_TIMESTAMPS == 1
+       << "  Built " << __DATE__ << " (" << __TIME__ << ").\n"
+#endif
+       << "  Host: " << sys::getHostTriple() << '\n'
+       << "  Host CPU: " << CPU << '\n'
+       << '\n'
+       << "  Registered Targets:\n";
+
+    std::vector<std::pair<const char *, const Target*> > Targets;
+    size_t Width = 0;
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      Targets.push_back(std::make_pair(it->getName(), &*it));
+      Width = std::max(Width, strlen(Targets.back().first));
+    }
+    if (!Targets.empty())
+      qsort(&Targets[0], Targets.size(), sizeof(Targets[0]),
+            TargetArraySortFn);
+
+    for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
+      OS << "    " << Targets[i].first;
+      OS.indent(Width - strlen(Targets[i].first)) << " - "
+             << Targets[i].second->getShortDescription() << '\n';
+    }
+    if (Targets.empty())
+      OS << "    (none)\n";
+  }
+  void operator=(bool OptionWasSpecified) {
+    if (!OptionWasSpecified) return;
+
+    if (OverrideVersionPrinter == 0) {
+      print();
+      exit(1);
+    }
+    (*OverrideVersionPrinter)();
+    exit(1);
+  }
+};
+} // End anonymous namespace
+
+
+// Define the --version option that prints out the LLVM version for the tool
+static VersionPrinter VersionPrinterInstance;
+
+static cl::opt<VersionPrinter, true, parser<bool> >
+VersOp("version", cl::desc("Display the version of this program"),
+    cl::location(VersionPrinterInstance), cl::ValueDisallowed);
+
+// Utility function for printing the help message.
+void cl::PrintHelpMessage() {
+  // This looks weird, but it actually prints the help message. The
+  // NormalPrinter variable is a HelpPrinter and the help gets printed when
+  // its operator= is invoked. That's because the "normal" usages of the
+  // help printer is to be assigned true/false depending on whether the
+  // -help option was given or not. Since we're circumventing that we have
+  // to make it look like -help was given, so we assign true.
+  NormalPrinter = true;
+}
+
+/// Utility function for printing version number.
+void cl::PrintVersionMessage() {
+  VersionPrinterInstance.print();
+}
+
+void cl::SetVersionPrinter(void (*func)()) {
+  OverrideVersionPrinter = func;
+}
diff --git a/contrib/llvm/lib/Support/ConstantRange.cpp b/contrib/llvm/lib/Support/ConstantRange.cpp
new file mode 100644
index 000000000000..81382d08dc23
--- /dev/null
+++ b/contrib/llvm/lib/Support/ConstantRange.cpp
@@ -0,0 +1,702 @@
+//===-- ConstantRange.cpp - ConstantRange implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Represent a range of possible values that may occur when the program is run
+// for an integral value.  This keeps track of a lower and upper bound for the
+// constant, which MAY wrap around the end of the numeric range.  To do this, it
+// keeps track of a [lower, upper) bound, which specifies an interval just like
+// STL iterators.  When used with boolean values, the following are important
+// ranges (other integral ranges use min/max values for special range values):
+//
+//  [F, F) = {}     = Empty set
+//  [T, F) = {T}
+//  [F, T) = {F}
+//  [T, T) = {F, T} = Full set
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Instructions.h"
+using namespace llvm;
+
+/// Initialize a full (the default) or empty set for the specified type.
+///
+ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
+  if (Full)
+    Lower = Upper = APInt::getMaxValue(BitWidth);
+  else
+    Lower = Upper = APInt::getMinValue(BitWidth);
+}
+
+/// Initialize a range to hold the single specified value.
+///
+ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {}
+
+ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
+  Lower(L), Upper(U) {
+  assert(L.getBitWidth() == U.getBitWidth() &&
+         "ConstantRange with unequal bit widths");
+  assert((L != U || (L.isMaxValue() || L.isMinValue())) &&
+         "Lower == Upper, but they aren't min or max value!");
+}
+
+ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
+                                            const ConstantRange &CR) {
+  if (CR.isEmptySet())
+    return CR;
+
+  uint32_t W = CR.getBitWidth();
+  switch (Pred) {
+    default: assert(!"Invalid ICmp predicate to makeICmpRegion()");
+    case ICmpInst::ICMP_EQ:
+      return CR;
+    case ICmpInst::ICMP_NE:
+      if (CR.isSingleElement())
+        return ConstantRange(CR.getUpper(), CR.getLower());
+      return ConstantRange(W);
+    case ICmpInst::ICMP_ULT: {
+      APInt UMax(CR.getUnsignedMax());
+      if (UMax.isMinValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(APInt::getMinValue(W), UMax);
+    }
+    case ICmpInst::ICMP_SLT: {
+      APInt SMax(CR.getSignedMax());
+      if (SMax.isMinSignedValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(APInt::getSignedMinValue(W), SMax);
+    }
+    case ICmpInst::ICMP_ULE: {
+      APInt UMax(CR.getUnsignedMax());
+      if (UMax.isMaxValue())
+        return ConstantRange(W);
+      return ConstantRange(APInt::getMinValue(W), UMax + 1);
+    }
+    case ICmpInst::ICMP_SLE: {
+      APInt SMax(CR.getSignedMax());
+      if (SMax.isMaxSignedValue())
+        return ConstantRange(W);
+      return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
+    }
+    case ICmpInst::ICMP_UGT: {
+      APInt UMin(CR.getUnsignedMin());
+      if (UMin.isMaxValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(UMin + 1, APInt::getNullValue(W));
+    }
+    case ICmpInst::ICMP_SGT: {
+      APInt SMin(CR.getSignedMin());
+      if (SMin.isMaxSignedValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
+    }
+    case ICmpInst::ICMP_UGE: {
+      APInt UMin(CR.getUnsignedMin());
+      if (UMin.isMinValue())
+        return ConstantRange(W);
+      return ConstantRange(UMin, APInt::getNullValue(W));
+    }
+    case ICmpInst::ICMP_SGE: {
+      APInt SMin(CR.getSignedMin());
+      if (SMin.isMinSignedValue())
+        return ConstantRange(W);
+      return ConstantRange(SMin, APInt::getSignedMinValue(W));
+    }
+  }
+}
+
+/// isFullSet - Return true if this set contains all of the elements possible
+/// for this data-type
+bool ConstantRange::isFullSet() const {
+  return Lower == Upper && Lower.isMaxValue();
+}
+
+/// isEmptySet - Return true if this set contains no members.
+///
+bool ConstantRange::isEmptySet() const {
+  return Lower == Upper && Lower.isMinValue();
+}
+
+/// isWrappedSet - Return true if this set wraps around the top of the range,
+/// for example: [100, 8)
+///
+bool ConstantRange::isWrappedSet() const {
+  return Lower.ugt(Upper);
+}
+
+/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+/// its bitwidth, for example: i8 [120, 140).
+///
+bool ConstantRange::isSignWrappedSet() const {
+  return contains(APInt::getSignedMaxValue(getBitWidth())) &&
+         contains(APInt::getSignedMinValue(getBitWidth()));
+}
+
+/// getSetSize - Return the number of elements in this set.
+///
+APInt ConstantRange::getSetSize() const {
+  if (isEmptySet()) 
+    return APInt(getBitWidth(), 0);
+  if (getBitWidth() == 1) {
+    if (Lower != Upper)  // One of T or F in the set...
+      return APInt(2, 1);
+    return APInt(2, 2);      // Must be full set...
+  }
+
+  // Simply subtract the bounds...
+  return Upper - Lower;
+}
+
+/// getUnsignedMax - Return the largest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMax() const {
+  if (isFullSet() || isWrappedSet())
+    return APInt::getMaxValue(getBitWidth());
+  else
+    return getUpper() - 1;
+}
+
+/// getUnsignedMin - Return the smallest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMin() const {
+  if (isFullSet() || (isWrappedSet() && getUpper() != 0))
+    return APInt::getMinValue(getBitWidth());
+  else
+    return getLower();
+}
+
+/// getSignedMax - Return the largest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMax() const {
+  APInt SignedMax(APInt::getSignedMaxValue(getBitWidth()));
+  if (!isWrappedSet()) {
+    if (getLower().sle(getUpper() - 1))
+      return getUpper() - 1;
+    else
+      return SignedMax;
+  } else {
+    if (getLower().isNegative() == getUpper().isNegative())
+      return SignedMax;
+    else
+      return getUpper() - 1;
+  }
+}
+
+/// getSignedMin - Return the smallest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMin() const {
+  APInt SignedMin(APInt::getSignedMinValue(getBitWidth()));
+  if (!isWrappedSet()) {
+    if (getLower().sle(getUpper() - 1))
+      return getLower();
+    else
+      return SignedMin;
+  } else {
+    if ((getUpper() - 1).slt(getLower())) {
+      if (getUpper() != SignedMin)
+        return SignedMin;
+      else
+        return getLower();
+    } else {
+      return getLower();
+    }
+  }
+}
+
+/// contains - Return true if the specified value is in the set.
+///
+bool ConstantRange::contains(const APInt &V) const {
+  if (Lower == Upper)
+    return isFullSet();
+
+  if (!isWrappedSet())
+    return Lower.ule(V) && V.ult(Upper);
+  else
+    return Lower.ule(V) || V.ult(Upper);
+}
+
+/// contains - Return true if the argument is a subset of this range.
+/// Two equal sets contain each other. The empty set contained by all other
+/// sets.
+///
+bool ConstantRange::contains(const ConstantRange &Other) const {
+  if (isFullSet() || Other.isEmptySet()) return true;
+  if (isEmptySet() || Other.isFullSet()) return false;
+
+  if (!isWrappedSet()) {
+    if (Other.isWrappedSet())
+      return false;
+
+    return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper);
+  }
+
+  if (!Other.isWrappedSet())
+    return Other.getUpper().ule(Upper) ||
+           Lower.ule(Other.getLower());
+
+  return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower());
+}
+
+/// subtract - Subtract the specified constant from the endpoints of this
+/// constant range.
+ConstantRange ConstantRange::subtract(const APInt &Val) const {
+  assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width");
+  // If the set is empty or full, don't modify the endpoints.
+  if (Lower == Upper) 
+    return *this;
+  return ConstantRange(Lower - Val, Upper - Val);
+}
+
+/// intersectWith - Return the range that results from the intersection of this
+/// range with another range.  The resultant range is guaranteed to include all
+/// elements contained in both input ranges, and to have the smallest possible
+/// set size that does so.  Because there may be two intersections with the
+/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A).
+ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
+  assert(getBitWidth() == CR.getBitWidth() && 
+         "ConstantRange types don't agree!");
+
+  // Handle common cases.
+  if (   isEmptySet() || CR.isFullSet()) return *this;
+  if (CR.isEmptySet() ||    isFullSet()) return CR;
+
+  if (!isWrappedSet() && CR.isWrappedSet())
+    return CR.intersectWith(*this);
+
+  if (!isWrappedSet() && !CR.isWrappedSet()) {
+    if (Lower.ult(CR.Lower)) {
+      if (Upper.ule(CR.Lower))
+        return ConstantRange(getBitWidth(), false);
+
+      if (Upper.ult(CR.Upper))
+        return ConstantRange(CR.Lower, Upper);
+
+      return CR;
+    } else {
+      if (Upper.ult(CR.Upper))
+        return *this;
+
+      if (Lower.ult(CR.Upper))
+        return ConstantRange(Lower, CR.Upper);
+
+      return ConstantRange(getBitWidth(), false);
+    }
+  }
+
+  if (isWrappedSet() && !CR.isWrappedSet()) {
+    if (CR.Lower.ult(Upper)) {
+      if (CR.Upper.ult(Upper))
+        return CR;
+
+      if (CR.Upper.ult(Lower))
+        return ConstantRange(CR.Lower, Upper);
+
+      if (getSetSize().ult(CR.getSetSize()))
+        return *this;
+      else
+        return CR;
+    } else if (CR.Lower.ult(Lower)) {
+      if (CR.Upper.ule(Lower))
+        return ConstantRange(getBitWidth(), false);
+
+      return ConstantRange(Lower, CR.Upper);
+    }
+    return CR;
+  }
+
+  if (CR.Upper.ult(Upper)) {
+    if (CR.Lower.ult(Upper)) {
+      if (getSetSize().ult(CR.getSetSize()))
+        return *this;
+      else
+        return CR;
+    }
+
+    if (CR.Lower.ult(Lower))
+      return ConstantRange(Lower, CR.Upper);
+
+    return CR;
+  } else if (CR.Upper.ult(Lower)) {
+    if (CR.Lower.ult(Lower))
+      return *this;
+
+    return ConstantRange(CR.Lower, Upper);
+  }
+  if (getSetSize().ult(CR.getSetSize()))
+    return *this;
+  else
+    return CR;
+}
+
+
+/// unionWith - Return the range that results from the union of this range with
+/// another range.  The resultant range is guaranteed to include the elements of
+/// both sets, but may contain more.  For example, [3, 9) union [12,15) is
+/// [3, 15), which includes 9, 10, and 11, which were not included in either
+/// set before.
+///
+ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
+  assert(getBitWidth() == CR.getBitWidth() && 
+         "ConstantRange types don't agree!");
+
+  if (   isFullSet() || CR.isEmptySet()) return *this;
+  if (CR.isFullSet() ||    isEmptySet()) return CR;
+
+  if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
+
+  if (!isWrappedSet() && !CR.isWrappedSet()) {
+    if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) {
+      // If the two ranges are disjoint, find the smaller gap and bridge it.
+      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+      if (d1.ult(d2))
+        return ConstantRange(Lower, CR.Upper);
+      else
+        return ConstantRange(CR.Lower, Upper);
+    }
+
+    APInt L = Lower, U = Upper;
+    if (CR.Lower.ult(L))
+      L = CR.Lower;
+    if ((CR.Upper - 1).ugt(U - 1))
+      U = CR.Upper;
+
+    if (L == 0 && U == 0)
+      return ConstantRange(getBitWidth());
+
+    return ConstantRange(L, U);
+  }
+
+  if (!CR.isWrappedSet()) {
+    // ------U   L-----  and  ------U   L----- : this
+    //   L--U                            L--U  : CR
+    if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower))
+      return *this;
+
+    // ------U   L----- : this
+    //    L---------U   : CR
+    if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper))
+      return ConstantRange(getBitWidth());
+
+    // ----U       L---- : this
+    //       L---U       : CR
+    //    <d1>  <d2>
+    if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) {
+      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+      if (d1.ult(d2))
+        return ConstantRange(Lower, CR.Upper);
+      else
+        return ConstantRange(CR.Lower, Upper);
+    }
+
+    // ----U     L----- : this
+    //        L----U    : CR
+    if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper))
+      return ConstantRange(CR.Lower, Upper);
+
+    // ------U    L---- : this
+    //    L-----U       : CR
+    if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower))
+      return ConstantRange(Lower, CR.Upper);
+  }
+
+  assert(isWrappedSet() && CR.isWrappedSet() &&
+         "ConstantRange::unionWith missed wrapped union unwrapped case");
+
+  // ------U    L----  and  ------U    L---- : this
+  // -U  L-----------  and  ------------U  L : CR
+  if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
+    return ConstantRange(getBitWidth());
+
+  APInt L = Lower, U = Upper;
+  if (CR.Upper.ugt(U))
+    U = CR.Upper;
+  if (CR.Lower.ult(L))
+    L = CR.Lower;
+
+  return ConstantRange(L, U);
+}
+
+/// zeroExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type.  The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// zero extended.
+ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
+  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+  unsigned SrcTySize = getBitWidth();
+  assert(SrcTySize < DstTySize && "Not a value extension");
+  if (isFullSet() || isWrappedSet())
+    // Change into [0, 1 << src bit width)
+    return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize));
+
+  return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
+}
+
+/// signExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type.  The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// sign extended.
+ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
+  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+  unsigned SrcTySize = getBitWidth();
+  assert(SrcTySize < DstTySize && "Not a value extension");
+  if (isFullSet() || isSignWrappedSet()) {
+    return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
+                         APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
+  }
+
+  return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize));
+}
+
+/// truncate - Return a new range in the specified integer type, which must be
+/// strictly smaller than the current type.  The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// truncated to the specified type.
+ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  assert(SrcTySize > DstTySize && "Not a value truncation");
+  APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
+  if (isFullSet() || getSetSize().ugt(Size))
+    return ConstantRange(DstTySize, /*isFullSet=*/true);
+
+  return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize));
+}
+
+/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is zero extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  if (SrcTySize > DstTySize)
+    return truncate(DstTySize);
+  else if (SrcTySize < DstTySize)
+    return zeroExtend(DstTySize);
+  else
+    return *this;
+}
+
+/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is sign extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  if (SrcTySize > DstTySize)
+    return truncate(DstTySize);
+  else if (SrcTySize < DstTySize)
+    return signExtend(DstTySize);
+  else
+    return *this;
+}
+
+ConstantRange
+ConstantRange::add(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+  APInt NewLower = getLower() + Other.getLower();
+  APInt NewUpper = getUpper() + Other.getUpper() - 1;
+  if (NewLower == NewUpper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  ConstantRange X = ConstantRange(NewLower, NewUpper);
+  if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+    // We've wrapped, therefore, full set.
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return X;
+}
+
+ConstantRange
+ConstantRange::sub(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+  APInt NewLower = getLower() - Other.getUpper() + 1;
+  APInt NewUpper = getUpper() - Other.getLower();
+  if (NewLower == NewUpper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  ConstantRange X = ConstantRange(NewLower, NewUpper);
+  if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+    // We've wrapped, therefore, full set.
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return X;
+}
+
+ConstantRange
+ConstantRange::multiply(const ConstantRange &Other) const {
+  // TODO: If either operand is a single element and the multiply is known to
+  // be non-wrapping, round the result min and max value to the appropriate
+  // multiple of that element. If wrapping is possible, at least adjust the
+  // range according to the greatest power-of-two factor of the single element.
+
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt this_min = getUnsignedMin().zext(getBitWidth() * 2);
+  APInt this_max = getUnsignedMax().zext(getBitWidth() * 2);
+  APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2);
+  APInt Other_max = Other.getUnsignedMax().zext(getBitWidth() * 2);
+
+  ConstantRange Result_zext = ConstantRange(this_min * Other_min,
+                                            this_max * Other_max + 1);
+  return Result_zext.truncate(getBitWidth());
+}
+
+ConstantRange
+ConstantRange::smax(const ConstantRange &Other) const {
+  // X smax Y is: range(smax(X_smin, Y_smin),
+  //                    smax(X_smax, Y_smax))
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin());
+  APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
+  if (NewU == NewL)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::umax(const ConstantRange &Other) const {
+  // X umax Y is: range(umax(X_umin, Y_umin),
+  //                    umax(X_umax, Y_umax))
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+  APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
+  if (NewU == NewL)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::udiv(const ConstantRange &RHS) const {
+  if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (RHS.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
+
+  APInt RHS_umin = RHS.getUnsignedMin();
+  if (RHS_umin == 0) {
+    // We want the lowest value in RHS excluding zero. Usually that would be 1
+    // except for a range in the form of [X, 1) in which case it would be X.
+    if (RHS.getUpper() == 1)
+      RHS_umin = RHS.getLower();
+    else
+      RHS_umin = APInt(getBitWidth(), 1);
+  }
+
+  APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
+
+  // If the LHS is Full and the RHS is a wrapped interval containing 1 then
+  // this could occur.
+  if (Lower == Upper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return ConstantRange(Lower, Upper);
+}
+
+ConstantRange
+ConstantRange::binaryAnd(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  // TODO: replace this with something less conservative
+
+  APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
+  if (umin.isAllOnesValue())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1);
+}
+
+ConstantRange
+ConstantRange::binaryOr(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  // TODO: replace this with something less conservative
+
+  APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+  if (umax.isMinValue())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(umax, APInt::getNullValue(getBitWidth()));
+}
+
+ConstantRange
+ConstantRange::shl(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  APInt min = getUnsignedMin().shl(Other.getUnsignedMin());
+  APInt max = getUnsignedMax().shl(Other.getUnsignedMax());
+
+  // there's no overflow!
+  APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
+  if (Zeros.ugt(Other.getUnsignedMax()))
+    return ConstantRange(min, max + 1);
+
+  // FIXME: implement the other tricky cases
+  return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+}
+
+ConstantRange
+ConstantRange::lshr(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  
+  APInt max = getUnsignedMax().lshr(Other.getUnsignedMin());
+  APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
+  if (min == max + 1)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return ConstantRange(min, max + 1);
+}
+
+ConstantRange ConstantRange::inverse() const {
+  if (isFullSet()) {
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  } else if (isEmptySet()) {
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  }
+  return ConstantRange(Upper, Lower);
+}
+
+/// print - Print out the bounds to a stream...
+///
+void ConstantRange::print(raw_ostream &OS) const {
+  if (isFullSet())
+    OS << "full-set";
+  else if (isEmptySet())
+    OS << "empty-set";
+  else
+    OS << "[" << Lower << "," << Upper << ")";
+}
+
+/// dump - Allow printing from a debugger easily...
+///
+void ConstantRange::dump() const {
+  print(dbgs());
+}
diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
new file mode 100644
index 000000000000..899c3890d78a
--- /dev/null
+++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -0,0 +1,281 @@
+//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ThreadLocal.h"
+#include <setjmp.h>
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+
+struct CrashRecoveryContextImpl;
+
+static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
+
+struct CrashRecoveryContextImpl {
+  CrashRecoveryContext *CRC;
+  std::string Backtrace;
+  ::jmp_buf JumpBuffer;
+  volatile unsigned Failed : 1;
+
+public:
+  CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
+                                                        Failed(false) {
+    CurrentContext.set(this);
+  }
+  ~CrashRecoveryContextImpl() {
+    CurrentContext.erase();
+  }
+
+  void HandleCrash() {
+    // Eliminate the current context entry, to avoid re-entering in case the
+    // cleanup code crashes.
+    CurrentContext.erase();
+
+    assert(!Failed && "Crash recovery context already failed!");
+    Failed = true;
+
+    // FIXME: Stash the backtrace.
+
+    // Jump back to the RunSafely we were called under.
+    longjmp(JumpBuffer, 1);
+  }
+};
+
+}
+
+static sys::Mutex gCrashRecoveryContexMutex;
+static bool gCrashRecoveryEnabled = false;
+
+static sys::ThreadLocal<const CrashRecoveryContextCleanup> 
+       tlIsRecoveringFromCrash;
+
+CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
+
+CrashRecoveryContext::~CrashRecoveryContext() {
+  // Reclaim registered resources.
+  CrashRecoveryContextCleanup *i = head;
+  tlIsRecoveringFromCrash.set(head);
+  while (i) {
+    CrashRecoveryContextCleanup *tmp = i;
+    i = tmp->next;
+    tmp->cleanupFired = true;
+    tmp->recoverResources();
+    delete tmp;
+  }
+  tlIsRecoveringFromCrash.erase();
+  
+  CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+  delete CRCI;
+}
+
+bool CrashRecoveryContext::isRecoveringFromCrash() {
+  return tlIsRecoveringFromCrash.get() != 0;
+}
+
+CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
+  if (!gCrashRecoveryEnabled)
+    return 0;
+
+  const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+  if (!CRCI)
+    return 0;
+
+  return CRCI->CRC;
+}
+
+void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
+{
+  if (!cleanup)
+    return;
+  if (head)
+    head->prev = cleanup;
+  cleanup->next = head;
+  head = cleanup;
+}
+
+void
+CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
+  if (!cleanup)
+    return;
+  if (cleanup == head) {
+    head = cleanup->next;
+    if (head)
+      head->prev = 0;
+  }
+  else {
+    cleanup->prev->next = cleanup->next;
+    if (cleanup->next)
+      cleanup->next->prev = cleanup->prev;
+  }
+  delete cleanup;
+}
+
+#ifdef LLVM_ON_WIN32
+
+// FIXME: No real Win32 implementation currently.
+
+void CrashRecoveryContext::Enable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = true;
+}
+
+void CrashRecoveryContext::Disable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (!gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = false;
+}
+
+#else
+
+// Generic POSIX implementation.
+//
+// This implementation relies on synchronous signals being delivered to the
+// current thread. We use a thread local object to keep track of the active
+// crash recovery context, and install signal handlers to invoke HandleCrash on
+// the active object.
+//
+// This implementation does not to attempt to chain signal handlers in any
+// reliable fashion -- if we get a signal outside of a crash recovery context we
+// simply disable crash recovery and raise the signal again.
+
+#include <signal.h>
+
+static int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
+static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static struct sigaction PrevActions[NumSignals];
+
+static void CrashRecoverySignalHandler(int Signal) {
+  // Lookup the current thread local recovery object.
+  const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+  if (!CRCI) {
+    // We didn't find a crash recovery context -- this means either we got a
+    // signal on a thread we didn't expect it on, the application got a signal
+    // outside of a crash recovery context, or something else went horribly
+    // wrong.
+    //
+    // Disable crash recovery and raise the signal again. The assumption here is
+    // that the enclosing application will terminate soon, and we won't want to
+    // attempt crash recovery again.
+    //
+    // This call of Disable isn't thread safe, but it doesn't actually matter.
+    CrashRecoveryContext::Disable();
+    raise(Signal);
+
+    // The signal will be thrown once the signal mask is restored.
+    return;
+  }
+
+  // Unblock the signal we received.
+  sigset_t SigMask;
+  sigemptyset(&SigMask);
+  sigaddset(&SigMask, Signal);
+  sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+  if (CRCI)
+    const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+}
+
+void CrashRecoveryContext::Enable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = true;
+
+  // Setup the signal handler.
+  struct sigaction Handler;
+  Handler.sa_handler = CrashRecoverySignalHandler;
+  Handler.sa_flags = 0;
+  sigemptyset(&Handler.sa_mask);
+
+  for (unsigned i = 0; i != NumSignals; ++i) {
+    sigaction(Signals[i], &Handler, &PrevActions[i]);
+  }
+}
+
+void CrashRecoveryContext::Disable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (!gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = false;
+
+  // Restore the previous signal handlers.
+  for (unsigned i = 0; i != NumSignals; ++i)
+    sigaction(Signals[i], &PrevActions[i], 0);
+}
+
+#endif
+
+bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
+  // If crash recovery is disabled, do nothing.
+  if (gCrashRecoveryEnabled) {
+    assert(!Impl && "Crash recovery context already initialized!");
+    CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
+    Impl = CRCI;
+
+    if (setjmp(CRCI->JumpBuffer) != 0) {
+      return false;
+    }
+  }
+
+  Fn(UserData);
+  return true;
+}
+
+void CrashRecoveryContext::HandleCrash() {
+  CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+  assert(CRCI && "Crash recovery context never initialized!");
+  CRCI->HandleCrash();
+}
+
+const std::string &CrashRecoveryContext::getBacktrace() const {
+  CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *) Impl;
+  assert(CRC && "Crash recovery context never initialized!");
+  assert(CRC->Failed && "No crash was detected!");
+  return CRC->Backtrace;
+}
+
+//
+
+namespace {
+struct RunSafelyOnThreadInfo {
+  void (*UserFn)(void*);
+  void *UserData;
+  CrashRecoveryContext *CRC;
+  bool Result;
+};
+}
+
+static void RunSafelyOnThread_Dispatch(void *UserData) {
+  RunSafelyOnThreadInfo *Info =
+    reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
+  Info->Result = Info->CRC->RunSafely(Info->UserFn, Info->UserData);
+}
+bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+                                             unsigned RequestedStackSize) {
+  RunSafelyOnThreadInfo Info = { Fn, UserData, this, false };
+  llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
+  return Info.Result;
+}
diff --git a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
new file mode 100644
index 000000000000..814566494d30
--- /dev/null
+++ b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -0,0 +1,357 @@
+//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// The algorithm we use attempts to exploit the dependency information by
+// minimizing top-down. We start by constructing an initial root set R, and
+// then iteratively:
+//
+//   1. Minimize the set R using the test predicate:
+//       P'(S) = P(S union pred*(S))
+//
+//   2. Extend R to R' = R union pred(R).
+//
+// until a fixed point is reached.
+//
+// The idea is that we want to quickly prune entire portions of the graph, so we
+// try to find high-level nodes that can be eliminated with all of their
+// dependents.
+//
+// FIXME: The current algorithm doesn't actually provide a strong guarantee
+// about the minimality of the result. The problem is that after adding nodes to
+// the required set, we no longer consider them for elimination. For strictly
+// well formed predicates, this doesn't happen, but it commonly occurs in
+// practice when there are unmodelled dependencies. I believe we can resolve
+// this by allowing the required set to be minimized as well, but need more test
+// cases first.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DAGDeltaAlgorithm.h"
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <map>
+using namespace llvm;
+
+namespace {
+
+class DAGDeltaAlgorithmImpl {
+  friend class DeltaActiveSetHelper;
+
+public:
+  typedef DAGDeltaAlgorithm::change_ty change_ty;
+  typedef DAGDeltaAlgorithm::changeset_ty changeset_ty;
+  typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty;
+  typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+
+private:
+  typedef std::vector<change_ty>::iterator pred_iterator_ty;
+  typedef std::vector<change_ty>::iterator succ_iterator_ty;
+  typedef std::set<change_ty>::iterator pred_closure_iterator_ty;
+  typedef std::set<change_ty>::iterator succ_closure_iterator_ty;
+
+  DAGDeltaAlgorithm &DDA;
+
+  const changeset_ty &Changes;
+  const std::vector<edge_ty> &Dependencies;
+
+  std::vector<change_ty> Roots;
+
+  /// Cache of failed test results. Successful test results are never cached
+  /// since we always reduce following a success. We maintain an independent
+  /// cache from that used by the individual delta passes because we may get
+  /// hits across multiple individual delta invocations.
+  mutable std::set<changeset_ty> FailedTestsCache;
+
+  // FIXME: Gross.
+  std::map<change_ty, std::vector<change_ty> > Predecessors;
+  std::map<change_ty, std::vector<change_ty> > Successors;
+
+  std::map<change_ty, std::set<change_ty> > PredClosure;
+  std::map<change_ty, std::set<change_ty> > SuccClosure;
+
+private:
+  pred_iterator_ty pred_begin(change_ty Node) {
+    assert(Predecessors.count(Node) && "Invalid node!");
+    return Predecessors[Node].begin();
+  }
+  pred_iterator_ty pred_end(change_ty Node) {
+    assert(Predecessors.count(Node) && "Invalid node!");
+    return Predecessors[Node].end();
+  }
+
+  pred_closure_iterator_ty pred_closure_begin(change_ty Node) {
+    assert(PredClosure.count(Node) && "Invalid node!");
+    return PredClosure[Node].begin();
+  }
+  pred_closure_iterator_ty pred_closure_end(change_ty Node) {
+    assert(PredClosure.count(Node) && "Invalid node!");
+    return PredClosure[Node].end();
+  }
+  
+  succ_iterator_ty succ_begin(change_ty Node) {
+    assert(Successors.count(Node) && "Invalid node!");
+    return Successors[Node].begin();
+  }
+  succ_iterator_ty succ_end(change_ty Node) {
+    assert(Successors.count(Node) && "Invalid node!");
+    return Successors[Node].end();
+  }
+
+  succ_closure_iterator_ty succ_closure_begin(change_ty Node) {
+    assert(SuccClosure.count(Node) && "Invalid node!");
+    return SuccClosure[Node].begin();
+  }
+  succ_closure_iterator_ty succ_closure_end(change_ty Node) {
+    assert(SuccClosure.count(Node) && "Invalid node!");
+    return SuccClosure[Node].end();
+  }
+
+  void UpdatedSearchState(const changeset_ty &Changes,
+                          const changesetlist_ty &Sets,
+                          const changeset_ty &Required) {
+    DDA.UpdatedSearchState(Changes, Sets, Required);
+  }
+
+  /// ExecuteOneTest - Execute a single test predicate on the change set \arg S.
+  bool ExecuteOneTest(const changeset_ty &S) {
+    // Check dependencies invariant.
+    DEBUG({
+        for (changeset_ty::const_iterator it = S.begin(),
+               ie = S.end(); it != ie; ++it)
+          for (succ_iterator_ty it2 = succ_begin(*it),
+                 ie2 = succ_end(*it); it2 != ie2; ++it2)
+            assert(S.count(*it2) && "Attempt to run invalid changeset!");
+      });
+
+    return DDA.ExecuteOneTest(S);
+  }
+
+public:
+  DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+                        const changeset_ty &_Changes,
+                        const std::vector<edge_ty> &_Dependencies);
+
+  changeset_ty Run();
+
+  /// GetTestResult - Get the test result for the active set \arg Changes with
+  /// \arg Required changes from the cache, executing the test if necessary.
+  ///
+  /// \param Changes - The set of active changes being minimized, which should
+  /// have their pred closure included in the test.
+  /// \param Required - The set of changes which have previously been
+  /// established to be required.
+  /// \return - The test result.
+  bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required);
+};
+
+/// Helper object for minimizing an active set of changes.
+class DeltaActiveSetHelper : public DeltaAlgorithm {
+  DAGDeltaAlgorithmImpl &DDAI;
+
+  const changeset_ty &Required;
+
+protected:
+  /// UpdatedSearchState - Callback used when the search state changes.
+  virtual void UpdatedSearchState(const changeset_ty &Changes,
+                                  const changesetlist_ty &Sets) {
+    DDAI.UpdatedSearchState(Changes, Sets, Required);
+  }
+
+  virtual bool ExecuteOneTest(const changeset_ty &S) {
+    return DDAI.GetTestResult(S, Required);
+  }
+
+public:
+  DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI,
+                       const changeset_ty &_Required)
+    : DDAI(_DDAI), Required(_Required) {}
+};
+
+}
+
+DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+                                             const changeset_ty &_Changes,
+                                             const std::vector<edge_ty>
+                                               &_Dependencies)
+  : DDA(_DDA),
+    Changes(_Changes),
+    Dependencies(_Dependencies)
+{
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it) {
+    Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
+    Successors.insert(std::make_pair(*it, std::vector<change_ty>()));
+  }
+  for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(),
+         ie = Dependencies.end(); it != ie; ++it) {
+    Predecessors[it->second].push_back(it->first);
+    Successors[it->first].push_back(it->second);
+  }
+
+  // Compute the roots.
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    if (succ_begin(*it) == succ_end(*it))
+      Roots.push_back(*it);
+
+  // Pre-compute the closure of the successor relation.
+  std::vector<change_ty> Worklist(Roots.begin(), Roots.end());
+  while (!Worklist.empty()) {
+    change_ty Change = Worklist.back();
+    Worklist.pop_back();
+
+    std::set<change_ty> &ChangeSuccs = SuccClosure[Change];
+    for (pred_iterator_ty it = pred_begin(Change), 
+           ie = pred_end(Change); it != ie; ++it) {
+      SuccClosure[*it].insert(Change);
+      SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end());
+      Worklist.push_back(*it);
+    }
+  }
+
+  // Invert to form the predecessor closure map.
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    PredClosure.insert(std::make_pair(*it, std::set<change_ty>()));
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+           ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
+      PredClosure[*it2].insert(*it);
+  
+  // Dump useful debug info.
+  DEBUG({
+      llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n";
+      llvm::errs() << "Changes: [";
+      for (changeset_ty::const_iterator it = Changes.begin(),
+             ie = Changes.end(); it != ie; ++it) {
+        if (it != Changes.begin()) llvm::errs() << ", ";
+        llvm::errs() << *it;
+
+        if (succ_begin(*it) != succ_end(*it)) {
+          llvm::errs() << "(";
+          for (succ_iterator_ty it2 = succ_begin(*it),
+                 ie2 = succ_end(*it); it2 != ie2; ++it2) {
+            if (it2 != succ_begin(*it)) llvm::errs() << ", ";
+            llvm::errs() << "->" << *it2;
+          }
+          llvm::errs() << ")";
+        }
+      }
+      llvm::errs() << "]\n";
+
+      llvm::errs() << "Roots: [";
+      for (std::vector<change_ty>::const_iterator it = Roots.begin(),
+             ie = Roots.end(); it != ie; ++it) {
+        if (it != Roots.begin()) llvm::errs() << ", ";
+        llvm::errs() << *it;
+      }
+      llvm::errs() << "]\n";
+
+      llvm::errs() << "Predecessor Closure:\n";
+      for (changeset_ty::const_iterator it = Changes.begin(),
+             ie = Changes.end(); it != ie; ++it) {
+        llvm::errs() << format("  %-4d: [", *it);
+        for (pred_closure_iterator_ty it2 = pred_closure_begin(*it),
+               ie2 = pred_closure_end(*it); it2 != ie2; ++it2) {
+          if (it2 != pred_closure_begin(*it)) llvm::errs() << ", ";
+          llvm::errs() << *it2;
+        }
+        llvm::errs() << "]\n";
+      }
+      
+      llvm::errs() << "Successor Closure:\n";
+      for (changeset_ty::const_iterator it = Changes.begin(),
+             ie = Changes.end(); it != ie; ++it) {
+        llvm::errs() << format("  %-4d: [", *it);
+        for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+               ie2 = succ_closure_end(*it); it2 != ie2; ++it2) {
+          if (it2 != succ_closure_begin(*it)) llvm::errs() << ", ";
+          llvm::errs() << *it2;
+        }
+        llvm::errs() << "]\n";
+      }
+
+      llvm::errs() << "\n\n";
+    });
+}
+
+bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes,
+                                          const changeset_ty &Required) {
+  changeset_ty Extended(Required);
+  Extended.insert(Changes.begin(), Changes.end());
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    Extended.insert(pred_closure_begin(*it), pred_closure_end(*it));
+
+  if (FailedTestsCache.count(Extended))
+    return false;
+
+  bool Result = ExecuteOneTest(Extended);
+  if (!Result)
+    FailedTestsCache.insert(Extended);
+
+  return Result;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithmImpl::Run() {
+  // The current set of changes we are minimizing, starting at the roots.
+  changeset_ty CurrentSet(Roots.begin(), Roots.end());
+
+  // The set of required changes.
+  changeset_ty Required;
+
+  // Iterate until the active set of changes is empty. Convergence is guaranteed
+  // assuming input was a DAG.
+  //
+  // Invariant:  CurrentSet intersect Required == {}
+  // Invariant:  Required == (Required union succ*(Required))
+  while (!CurrentSet.empty()) {
+    DEBUG({
+        llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, "
+                     << Required.size() << " required changes\n";
+      });
+
+    // Minimize the current set of changes.
+    DeltaActiveSetHelper Helper(*this, Required);
+    changeset_ty CurrentMinSet = Helper.Run(CurrentSet);
+
+    // Update the set of required changes. Since
+    //   CurrentMinSet subset CurrentSet
+    // and after the last iteration,
+    //   succ(CurrentSet) subset Required
+    // then
+    //   succ(CurrentMinSet) subset Required
+    // and our invariant on Required is maintained.
+    Required.insert(CurrentMinSet.begin(), CurrentMinSet.end());
+
+    // Replace the current set with the predecssors of the minimized set of
+    // active changes.
+    CurrentSet.clear();
+    for (changeset_ty::const_iterator it = CurrentMinSet.begin(),
+           ie = CurrentMinSet.end(); it != ie; ++it)
+      CurrentSet.insert(pred_begin(*it), pred_end(*it));
+
+    // FIXME: We could enforce CurrentSet intersect Required == {} here if we
+    // wanted to protect against cyclic graphs.
+  }
+
+  return Required;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
+                       const std::vector<edge_ty> &Dependencies) {
+  return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run();
+}
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
new file mode 100644
index 000000000000..9fdb12ecfdcb
--- /dev/null
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -0,0 +1,134 @@
+//===-- Debug.cpp - An easy way to add debug output to your code ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a handle way of adding debugging information to your
+// code, without it being enabled all of the time, and without having to add
+// command line options to enable it.
+//
+// In particular, just wrap your code with the DEBUG() macro, and it will be
+// enabled automatically if you specify '-debug' on the command-line.
+// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify
+// that your debug code belongs to class "foo".  Then, on the command line, you
+// can specify '-debug-only=foo' to enable JUST the debug information for the
+// foo class.
+//
+// When compiling in release mode, the -debug-* options and all code in DEBUG()
+// statements disappears, so it does not effect the runtime of the code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/Signals.h"
+
+using namespace llvm;
+
+// All Debug.h functionality is a no-op in NDEBUG mode.
+#ifndef NDEBUG
+bool llvm::DebugFlag;  // DebugFlag - Exported boolean set by the -debug option
+
+// -debug - Command line option to enable the DEBUG statements in the passes.
+// This flag may only be enabled in debug builds.
+static cl::opt<bool, true>
+Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
+      cl::location(DebugFlag));
+
+// -debug-buffer-size - Buffer the last N characters of debug output
+//until program termination.
+static cl::opt<unsigned>
+DebugBufferSize("debug-buffer-size",
+                cl::desc("Buffer the last N characters of debug output"
+                         "until program termination. "
+                         "[default 0 -- immediate print-out]"),
+                cl::Hidden,
+                cl::init(0));
+
+static std::string CurrentDebugType;
+
+namespace {
+
+struct DebugOnlyOpt {
+  void operator=(const std::string &Val) const {
+    DebugFlag |= !Val.empty();
+    CurrentDebugType = Val;
+  }
+};
+
+}
+
+static DebugOnlyOpt DebugOnlyOptLoc;
+
+static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+          cl::Hidden, cl::value_desc("debug string"),
+          cl::location(DebugOnlyOptLoc), cl::ValueRequired);
+
+// Signal handlers - dump debug output on termination.
+static void debug_user_sig_handler(void *Cookie) {
+  // This is a bit sneaky.  Since this is under #ifndef NDEBUG, we
+  // know that debug mode is enabled and dbgs() really is a
+  // circular_raw_ostream.  If NDEBUG is defined, then dbgs() ==
+  // errs() but this will never be invoked.
+  llvm::circular_raw_ostream *dbgout =
+    static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs());
+  dbgout->flushBufferWithBanner();
+}
+
+// isCurrentDebugType - Return true if the specified string is the debug type
+// specified on the command line, or if none was specified on the command line
+// with the -debug-only=X option.
+//
+bool llvm::isCurrentDebugType(const char *DebugType) {
+  return CurrentDebugType.empty() || DebugType == CurrentDebugType;
+}
+
+/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// option were specified.  Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void llvm::SetCurrentDebugType(const char *Type) {
+  CurrentDebugType = Type;
+}
+
+/// dbgs - Return a circular-buffered debug stream.
+raw_ostream &llvm::dbgs() {
+  // Do one-time initialization in a thread-safe way.
+  static struct dbgstream {
+    circular_raw_ostream strm;
+
+    dbgstream() :
+        strm(errs(), "*** Debug Log Output ***\n",
+             (!EnableDebugBuffering || !DebugFlag) ? 0 : DebugBufferSize) {
+      if (EnableDebugBuffering && DebugFlag && DebugBufferSize != 0)
+        // TODO: Add a handler for SIGUSER1-type signals so the user can
+        // force a debug dump.
+        sys::AddSignalHandler(&debug_user_sig_handler, 0);
+      // Otherwise we've already set the debug stream buffer size to
+      // zero, disabling buffering so it will output directly to errs().
+    }
+  } thestrm;
+
+  return thestrm.strm;
+}
+
+#else
+// Avoid "has no symbols" warning.
+namespace llvm {
+  /// dbgs - Return errs().
+  raw_ostream &dbgs() {
+    return errs();
+  }
+}
+
+#endif
+
+/// EnableDebugBuffering - Turn on signal handler installation.
+///
+bool llvm::EnableDebugBuffering = false;
diff --git a/contrib/llvm/lib/Support/DeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
new file mode 100644
index 000000000000..9e52874de832
--- /dev/null
+++ b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -0,0 +1,114 @@
+//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <iterator>
+using namespace llvm;
+
+DeltaAlgorithm::~DeltaAlgorithm() {
+}
+
+bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) {
+  if (FailedTestsCache.count(Changes))
+    return false;
+
+  bool Result = ExecuteOneTest(Changes);
+  if (!Result)
+    FailedTestsCache.insert(Changes);
+
+  return Result;
+}
+
+void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) {
+  // FIXME: Allow clients to provide heuristics for improved splitting.
+
+  // FIXME: This is really slow.
+  changeset_ty LHS, RHS;
+  unsigned idx = 0, N = S.size() / 2;
+  for (changeset_ty::const_iterator it = S.begin(),
+         ie = S.end(); it != ie; ++it, ++idx)
+    ((idx < N) ? LHS : RHS).insert(*it);
+  if (!LHS.empty())
+    Res.push_back(LHS);
+  if (!RHS.empty())
+    Res.push_back(RHS);
+}
+
+DeltaAlgorithm::changeset_ty
+DeltaAlgorithm::Delta(const changeset_ty &Changes,
+                      const changesetlist_ty &Sets) {
+  // Invariant: union(Res) == Changes
+  UpdatedSearchState(Changes, Sets);
+
+  // If there is nothing left we can remove, we are done.
+  if (Sets.size() <= 1)
+    return Changes;
+
+  // Look for a passing subset.
+  changeset_ty Res;
+  if (Search(Changes, Sets, Res))
+    return Res;
+
+  // Otherwise, partition the sets if possible; if not we are done.
+  changesetlist_ty SplitSets;
+  for (changesetlist_ty::const_iterator it = Sets.begin(),
+         ie = Sets.end(); it != ie; ++it)
+    Split(*it, SplitSets);
+  if (SplitSets.size() == Sets.size())
+    return Changes;
+
+  return Delta(Changes, SplitSets);
+}
+
+bool DeltaAlgorithm::Search(const changeset_ty &Changes,
+                            const changesetlist_ty &Sets,
+                            changeset_ty &Res) {
+  // FIXME: Parallelize.
+  for (changesetlist_ty::const_iterator it = Sets.begin(),
+         ie = Sets.end(); it != ie; ++it) {
+    // If the test passes on this subset alone, recurse.
+    if (GetTestResult(*it)) {
+      changesetlist_ty Sets;
+      Split(*it, Sets);
+      Res = Delta(*it, Sets);
+      return true;
+    }
+
+    // Otherwise, if we have more than two sets, see if test passes on the
+    // complement.
+    if (Sets.size() > 2) {
+      // FIXME: This is really slow.
+      changeset_ty Complement;
+      std::set_difference(
+        Changes.begin(), Changes.end(), it->begin(), it->end(),
+        std::insert_iterator<changeset_ty>(Complement, Complement.begin()));
+      if (GetTestResult(Complement)) {
+        changesetlist_ty ComplementSets;
+        ComplementSets.insert(ComplementSets.end(), Sets.begin(), it);
+        ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end());
+        Res = Delta(Complement, ComplementSets);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) {
+  // Check empty set first to quickly find poor test functions.
+  if (GetTestResult(changeset_ty()))
+    return changeset_ty();
+
+  // Otherwise run the real delta algorithm.
+  changesetlist_ty Sets;
+  Split(Changes, Sets);
+
+  return Delta(Changes, Sets);
+}
diff --git a/contrib/llvm/lib/Support/Disassembler.cpp b/contrib/llvm/lib/Support/Disassembler.cpp
new file mode 100644
index 000000000000..6362aff43a9d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Disassembler.cpp
@@ -0,0 +1,75 @@
+//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Disassembler.h"
+
+#include <cassert>
+#include <iomanip>
+#include <string>
+#include <sstream>
+
+#if USE_UDIS86
+#include <udis86.h>
+#endif
+
+using namespace llvm;
+
+bool llvm::sys::hasDisassembler()
+{
+#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+  // We have option to enable udis86 library.
+# if USE_UDIS86
+  return true;
+#else
+  return false;
+#endif
+#else
+  return false;
+#endif
+}
+
+std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
+                                         uint64_t pc) {
+  std::stringstream res;
+
+#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
+  && USE_UDIS86
+  unsigned bits;
+# if defined(__i386__)
+  bits = 32;
+# else
+  bits = 64;
+# endif
+
+  ud_t ud_obj;
+
+  ud_init(&ud_obj);
+  ud_set_input_buffer(&ud_obj, start, length);
+  ud_set_mode(&ud_obj, bits);
+  ud_set_pc(&ud_obj, pc);
+  ud_set_syntax(&ud_obj, UD_SYN_ATT);
+
+  res << std::setbase(16)
+      << std::setw(bits/4);
+
+  while (ud_disassemble(&ud_obj)) {
+    res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
+  }
+#else
+  res << "No disassembler available. See configure help for options.\n";
+#endif
+
+  return res.str();
+}
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
new file mode 100644
index 000000000000..0813321b6f79
--- /dev/null
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -0,0 +1,658 @@
+//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for generic dwarf information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+/// TagString - Return the string for the specified tag.
+///
+const char *llvm::dwarf::TagString(unsigned Tag) {
+  switch (Tag) {
+  case DW_TAG_array_type:                return "DW_TAG_array_type";
+  case DW_TAG_class_type:                return "DW_TAG_class_type";
+  case DW_TAG_entry_point:               return "DW_TAG_entry_point";
+  case DW_TAG_enumeration_type:          return "DW_TAG_enumeration_type";
+  case DW_TAG_formal_parameter:          return "DW_TAG_formal_parameter";
+  case DW_TAG_imported_declaration:      return "DW_TAG_imported_declaration";
+  case DW_TAG_label:                     return "DW_TAG_label";
+  case DW_TAG_lexical_block:             return "DW_TAG_lexical_block";
+  case DW_TAG_member:                    return "DW_TAG_member";
+  case DW_TAG_pointer_type:              return "DW_TAG_pointer_type";
+  case DW_TAG_reference_type:            return "DW_TAG_reference_type";
+  case DW_TAG_compile_unit:              return "DW_TAG_compile_unit";
+  case DW_TAG_string_type:               return "DW_TAG_string_type";
+  case DW_TAG_structure_type:            return "DW_TAG_structure_type";
+  case DW_TAG_subroutine_type:           return "DW_TAG_subroutine_type";
+  case DW_TAG_typedef:                   return "DW_TAG_typedef";
+  case DW_TAG_union_type:                return "DW_TAG_union_type";
+  case DW_TAG_unspecified_parameters:    return "DW_TAG_unspecified_parameters";
+  case DW_TAG_variant:                   return "DW_TAG_variant";
+  case DW_TAG_common_block:              return "DW_TAG_common_block";
+  case DW_TAG_common_inclusion:          return "DW_TAG_common_inclusion";
+  case DW_TAG_inheritance:               return "DW_TAG_inheritance";
+  case DW_TAG_inlined_subroutine:        return "DW_TAG_inlined_subroutine";
+  case DW_TAG_module:                    return "DW_TAG_module";
+  case DW_TAG_ptr_to_member_type:        return "DW_TAG_ptr_to_member_type";
+  case DW_TAG_set_type:                  return "DW_TAG_set_type";
+  case DW_TAG_subrange_type:             return "DW_TAG_subrange_type";
+  case DW_TAG_with_stmt:                 return "DW_TAG_with_stmt";
+  case DW_TAG_access_declaration:        return "DW_TAG_access_declaration";
+  case DW_TAG_base_type:                 return "DW_TAG_base_type";
+  case DW_TAG_catch_block:               return "DW_TAG_catch_block";
+  case DW_TAG_const_type:                return "DW_TAG_const_type";
+  case DW_TAG_constant:                  return "DW_TAG_constant";
+  case DW_TAG_enumerator:                return "DW_TAG_enumerator";
+  case DW_TAG_file_type:                 return "DW_TAG_file_type";
+  case DW_TAG_friend:                    return "DW_TAG_friend";
+  case DW_TAG_namelist:                  return "DW_TAG_namelist";
+  case DW_TAG_namelist_item:             return "DW_TAG_namelist_item";
+  case DW_TAG_packed_type:               return "DW_TAG_packed_type";
+  case DW_TAG_subprogram:                return "DW_TAG_subprogram";
+  case DW_TAG_template_type_parameter:  return "DW_TAG_template_type_parameter";
+  case DW_TAG_template_value_parameter:return "DW_TAG_template_value_parameter";
+  case DW_TAG_thrown_type:               return "DW_TAG_thrown_type";
+  case DW_TAG_try_block:                 return "DW_TAG_try_block";
+  case DW_TAG_variant_part:              return "DW_TAG_variant_part";
+  case DW_TAG_variable:                  return "DW_TAG_variable";
+  case DW_TAG_volatile_type:             return "DW_TAG_volatile_type";
+  case DW_TAG_dwarf_procedure:           return "DW_TAG_dwarf_procedure";
+  case DW_TAG_restrict_type:             return "DW_TAG_restrict_type";
+  case DW_TAG_interface_type:            return "DW_TAG_interface_type";
+  case DW_TAG_namespace:                 return "DW_TAG_namespace";
+  case DW_TAG_imported_module:           return "DW_TAG_imported_module";
+  case DW_TAG_unspecified_type:          return "DW_TAG_unspecified_type";
+  case DW_TAG_partial_unit:              return "DW_TAG_partial_unit";
+  case DW_TAG_imported_unit:             return "DW_TAG_imported_unit";
+  case DW_TAG_condition:                 return "DW_TAG_condition";
+  case DW_TAG_shared_type:               return "DW_TAG_shared_type";
+  case DW_TAG_lo_user:                   return "DW_TAG_lo_user";
+  case DW_TAG_hi_user:                   return "DW_TAG_hi_user";
+  case DW_TAG_auto_variable:             return "DW_TAG_auto_variable";
+  case DW_TAG_arg_variable:              return "DW_TAG_arg_variable";
+  case DW_TAG_return_variable:           return "DW_TAG_return_variable";
+  case DW_TAG_vector_type:               return "DW_TAG_vector_type";
+  }
+  return 0;
+}
+
+/// ChildrenString - Return the string for the specified children flag.
+///
+const char *llvm::dwarf::ChildrenString(unsigned Children) {
+  switch (Children) {
+  case DW_CHILDREN_no:                   return "DW_CHILDREN_no";
+  case DW_CHILDREN_yes:                  return "DW_CHILDREN_yes";
+  }
+  return 0;
+}
+
+/// AttributeString - Return the string for the specified attribute.
+///
+const char *llvm::dwarf::AttributeString(unsigned Attribute) {
+  switch (Attribute) {
+  case DW_AT_sibling:                    return "DW_AT_sibling";
+  case DW_AT_location:                   return "DW_AT_location";
+  case DW_AT_name:                       return "DW_AT_name";
+  case DW_AT_ordering:                   return "DW_AT_ordering";
+  case DW_AT_byte_size:                  return "DW_AT_byte_size";
+  case DW_AT_bit_offset:                 return "DW_AT_bit_offset";
+  case DW_AT_bit_size:                   return "DW_AT_bit_size";
+  case DW_AT_stmt_list:                  return "DW_AT_stmt_list";
+  case DW_AT_low_pc:                     return "DW_AT_low_pc";
+  case DW_AT_high_pc:                    return "DW_AT_high_pc";
+  case DW_AT_language:                   return "DW_AT_language";
+  case DW_AT_discr:                      return "DW_AT_discr";
+  case DW_AT_discr_value:                return "DW_AT_discr_value";
+  case DW_AT_visibility:                 return "DW_AT_visibility";
+  case DW_AT_import:                     return "DW_AT_import";
+  case DW_AT_string_length:              return "DW_AT_string_length";
+  case DW_AT_common_reference:           return "DW_AT_common_reference";
+  case DW_AT_comp_dir:                   return "DW_AT_comp_dir";
+  case DW_AT_const_value:                return "DW_AT_const_value";
+  case DW_AT_containing_type:            return "DW_AT_containing_type";
+  case DW_AT_default_value:              return "DW_AT_default_value";
+  case DW_AT_inline:                     return "DW_AT_inline";
+  case DW_AT_is_optional:                return "DW_AT_is_optional";
+  case DW_AT_lower_bound:                return "DW_AT_lower_bound";
+  case DW_AT_producer:                   return "DW_AT_producer";
+  case DW_AT_prototyped:                 return "DW_AT_prototyped";
+  case DW_AT_return_addr:                return "DW_AT_return_addr";
+  case DW_AT_start_scope:                return "DW_AT_start_scope";
+  case DW_AT_bit_stride:                 return "DW_AT_bit_stride";
+  case DW_AT_upper_bound:                return "DW_AT_upper_bound";
+  case DW_AT_abstract_origin:            return "DW_AT_abstract_origin";
+  case DW_AT_accessibility:              return "DW_AT_accessibility";
+  case DW_AT_address_class:              return "DW_AT_address_class";
+  case DW_AT_artificial:                 return "DW_AT_artificial";
+  case DW_AT_base_types:                 return "DW_AT_base_types";
+  case DW_AT_calling_convention:         return "DW_AT_calling_convention";
+  case DW_AT_count:                      return "DW_AT_count";
+  case DW_AT_data_member_location:       return "DW_AT_data_member_location";
+  case DW_AT_decl_column:                return "DW_AT_decl_column";
+  case DW_AT_decl_file:                  return "DW_AT_decl_file";
+  case DW_AT_decl_line:                  return "DW_AT_decl_line";
+  case DW_AT_declaration:                return "DW_AT_declaration";
+  case DW_AT_discr_list:                 return "DW_AT_discr_list";
+  case DW_AT_encoding:                   return "DW_AT_encoding";
+  case DW_AT_external:                   return "DW_AT_external";
+  case DW_AT_frame_base:                 return "DW_AT_frame_base";
+  case DW_AT_friend:                     return "DW_AT_friend";
+  case DW_AT_identifier_case:            return "DW_AT_identifier_case";
+  case DW_AT_macro_info:                 return "DW_AT_macro_info";
+  case DW_AT_namelist_item:              return "DW_AT_namelist_item";
+  case DW_AT_priority:                   return "DW_AT_priority";
+  case DW_AT_segment:                    return "DW_AT_segment";
+  case DW_AT_specification:              return "DW_AT_specification";
+  case DW_AT_static_link:                return "DW_AT_static_link";
+  case DW_AT_type:                       return "DW_AT_type";
+  case DW_AT_use_location:               return "DW_AT_use_location";
+  case DW_AT_variable_parameter:         return "DW_AT_variable_parameter";
+  case DW_AT_virtuality:                 return "DW_AT_virtuality";
+  case DW_AT_vtable_elem_location:       return "DW_AT_vtable_elem_location";
+  case DW_AT_allocated:                  return "DW_AT_allocated";
+  case DW_AT_associated:                 return "DW_AT_associated";
+  case DW_AT_data_location:              return "DW_AT_data_location";
+  case DW_AT_byte_stride:                return "DW_AT_byte_stride";
+  case DW_AT_entry_pc:                   return "DW_AT_entry_pc";
+  case DW_AT_use_UTF8:                   return "DW_AT_use_UTF8";
+  case DW_AT_extension:                  return "DW_AT_extension";
+  case DW_AT_ranges:                     return "DW_AT_ranges";
+  case DW_AT_trampoline:                 return "DW_AT_trampoline";
+  case DW_AT_call_column:                return "DW_AT_call_column";
+  case DW_AT_call_file:                  return "DW_AT_call_file";
+  case DW_AT_call_line:                  return "DW_AT_call_line";
+  case DW_AT_description:                return "DW_AT_description";
+  case DW_AT_binary_scale:               return "DW_AT_binary_scale";
+  case DW_AT_decimal_scale:              return "DW_AT_decimal_scale";
+  case DW_AT_small:                      return "DW_AT_small";
+  case DW_AT_decimal_sign:               return "DW_AT_decimal_sign";
+  case DW_AT_digit_count:                return "DW_AT_digit_count";
+  case DW_AT_picture_string:             return "DW_AT_picture_string";
+  case DW_AT_mutable:                    return "DW_AT_mutable";
+  case DW_AT_threads_scaled:             return "DW_AT_threads_scaled";
+  case DW_AT_explicit:                   return "DW_AT_explicit";
+  case DW_AT_object_pointer:             return "DW_AT_object_pointer";
+  case DW_AT_endianity:                  return "DW_AT_endianity";
+  case DW_AT_elemental:                  return "DW_AT_elemental";
+  case DW_AT_pure:                       return "DW_AT_pure";
+  case DW_AT_recursive:                  return "DW_AT_recursive";
+  case DW_AT_MIPS_linkage_name:          return "DW_AT_MIPS_linkage_name";
+  case DW_AT_sf_names:                   return "DW_AT_sf_names";
+  case DW_AT_src_info:                   return "DW_AT_src_info";
+  case DW_AT_mac_info:                   return "DW_AT_mac_info";
+  case DW_AT_src_coords:                 return "DW_AT_src_coords";
+  case DW_AT_body_begin:                 return "DW_AT_body_begin";
+  case DW_AT_body_end:                   return "DW_AT_body_end";
+  case DW_AT_GNU_vector:                 return "DW_AT_GNU_vector";
+  case DW_AT_lo_user:                    return "DW_AT_lo_user";
+  case DW_AT_hi_user:                    return "DW_AT_hi_user";
+  case DW_AT_APPLE_optimized:            return "DW_AT_APPLE_optimized";
+  case DW_AT_APPLE_flags:                return "DW_AT_APPLE_flags";
+  case DW_AT_APPLE_isa:                  return "DW_AT_APPLE_isa";
+  case DW_AT_APPLE_block:                return "DW_AT_APPLE_block";
+  case DW_AT_APPLE_major_runtime_vers:   return "DW_AT_APPLE_major_runtime_vers";
+  case DW_AT_APPLE_runtime_class:        return "DW_AT_APPLE_runtime_class";
+  case DW_AT_APPLE_omit_frame_ptr:       return "DW_AT_APPLE_omit_frame_ptr";
+  case DW_AT_APPLE_property_name:        return "DW_AT_APPLE_property_name";
+  case DW_AT_APPLE_property_getter:      return "DW_AT_APPLE_property_getter";
+  case DW_AT_APPLE_property_setter:      return "DW_AT_APPLE_property_setter";
+  case DW_AT_APPLE_property_attribute:   return "DW_AT_APPLE_property_attribute";
+  case DW_AT_APPLE_objc_complete_type:   return "DW_AT_APPLE_objc_complete_type";
+  }
+  return 0;
+}
+
+/// FormEncodingString - Return the string for the specified form encoding.
+///
+const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_FORM_addr:                     return "DW_FORM_addr";
+  case DW_FORM_block2:                   return "DW_FORM_block2";
+  case DW_FORM_block4:                   return "DW_FORM_block4";
+  case DW_FORM_data2:                    return "DW_FORM_data2";
+  case DW_FORM_data4:                    return "DW_FORM_data4";
+  case DW_FORM_data8:                    return "DW_FORM_data8";
+  case DW_FORM_string:                   return "DW_FORM_string";
+  case DW_FORM_block:                    return "DW_FORM_block";
+  case DW_FORM_block1:                   return "DW_FORM_block1";
+  case DW_FORM_data1:                    return "DW_FORM_data1";
+  case DW_FORM_flag:                     return "DW_FORM_flag";
+  case DW_FORM_sdata:                    return "DW_FORM_sdata";
+  case DW_FORM_strp:                     return "DW_FORM_strp";
+  case DW_FORM_udata:                    return "DW_FORM_udata";
+  case DW_FORM_ref_addr:                 return "DW_FORM_ref_addr";
+  case DW_FORM_ref1:                     return "DW_FORM_ref1";
+  case DW_FORM_ref2:                     return "DW_FORM_ref2";
+  case DW_FORM_ref4:                     return "DW_FORM_ref4";
+  case DW_FORM_ref8:                     return "DW_FORM_ref8";
+  case DW_FORM_ref_udata:                return "DW_FORM_ref_udata";
+  case DW_FORM_indirect:                 return "DW_FORM_indirect";
+  }
+  return 0;
+}
+
+/// OperationEncodingString - Return the string for the specified operation
+/// encoding.
+const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_OP_addr:                       return "DW_OP_addr";
+  case DW_OP_deref:                      return "DW_OP_deref";
+  case DW_OP_const1u:                    return "DW_OP_const1u";
+  case DW_OP_const1s:                    return "DW_OP_const1s";
+  case DW_OP_const2u:                    return "DW_OP_const2u";
+  case DW_OP_const2s:                    return "DW_OP_const2s";
+  case DW_OP_const4u:                    return "DW_OP_const4u";
+  case DW_OP_const4s:                    return "DW_OP_const4s";
+  case DW_OP_const8u:                    return "DW_OP_const8u";
+  case DW_OP_const8s:                    return "DW_OP_const8s";
+  case DW_OP_constu:                     return "DW_OP_constu";
+  case DW_OP_consts:                     return "DW_OP_consts";
+  case DW_OP_dup:                        return "DW_OP_dup";
+  case DW_OP_drop:                       return "DW_OP_drop";
+  case DW_OP_over:                       return "DW_OP_over";
+  case DW_OP_pick:                       return "DW_OP_pick";
+  case DW_OP_swap:                       return "DW_OP_swap";
+  case DW_OP_rot:                        return "DW_OP_rot";
+  case DW_OP_xderef:                     return "DW_OP_xderef";
+  case DW_OP_abs:                        return "DW_OP_abs";
+  case DW_OP_and:                        return "DW_OP_and";
+  case DW_OP_div:                        return "DW_OP_div";
+  case DW_OP_minus:                      return "DW_OP_minus";
+  case DW_OP_mod:                        return "DW_OP_mod";
+  case DW_OP_mul:                        return "DW_OP_mul";
+  case DW_OP_neg:                        return "DW_OP_neg";
+  case DW_OP_not:                        return "DW_OP_not";
+  case DW_OP_or:                         return "DW_OP_or";
+  case DW_OP_plus:                       return "DW_OP_plus";
+  case DW_OP_plus_uconst:                return "DW_OP_plus_uconst";
+  case DW_OP_shl:                        return "DW_OP_shl";
+  case DW_OP_shr:                        return "DW_OP_shr";
+  case DW_OP_shra:                       return "DW_OP_shra";
+  case DW_OP_xor:                        return "DW_OP_xor";
+  case DW_OP_skip:                       return "DW_OP_skip";
+  case DW_OP_bra:                        return "DW_OP_bra";
+  case DW_OP_eq:                         return "DW_OP_eq";
+  case DW_OP_ge:                         return "DW_OP_ge";
+  case DW_OP_gt:                         return "DW_OP_gt";
+  case DW_OP_le:                         return "DW_OP_le";
+  case DW_OP_lt:                         return "DW_OP_lt";
+  case DW_OP_ne:                         return "DW_OP_ne";
+  case DW_OP_lit0:                       return "DW_OP_lit0";
+  case DW_OP_lit1:                       return "DW_OP_lit1";
+  case DW_OP_lit2:                       return "DW_OP_lit2";
+  case DW_OP_lit3:                       return "DW_OP_lit3";
+  case DW_OP_lit4:                       return "DW_OP_lit4";
+  case DW_OP_lit5:                       return "DW_OP_lit5";
+  case DW_OP_lit6:                       return "DW_OP_lit6";
+  case DW_OP_lit7:                       return "DW_OP_lit7";
+  case DW_OP_lit8:                       return "DW_OP_lit8";
+  case DW_OP_lit9:                       return "DW_OP_lit9";
+  case DW_OP_lit10:                      return "DW_OP_lit10";
+  case DW_OP_lit11:                      return "DW_OP_lit11";
+  case DW_OP_lit12:                      return "DW_OP_lit12";
+  case DW_OP_lit13:                      return "DW_OP_lit13";
+  case DW_OP_lit14:                      return "DW_OP_lit14";
+  case DW_OP_lit15:                      return "DW_OP_lit15";
+  case DW_OP_lit16:                      return "DW_OP_lit16";
+  case DW_OP_lit17:                      return "DW_OP_lit17";
+  case DW_OP_lit18:                      return "DW_OP_lit18";
+  case DW_OP_lit19:                      return "DW_OP_lit19";
+  case DW_OP_lit20:                      return "DW_OP_lit20";
+  case DW_OP_lit21:                      return "DW_OP_lit21";
+  case DW_OP_lit22:                      return "DW_OP_lit22";
+  case DW_OP_lit23:                      return "DW_OP_lit23";
+  case DW_OP_lit24:                      return "DW_OP_lit24";
+  case DW_OP_lit25:                      return "DW_OP_lit25";
+  case DW_OP_lit26:                      return "DW_OP_lit26";
+  case DW_OP_lit27:                      return "DW_OP_lit27";
+  case DW_OP_lit28:                      return "DW_OP_lit28";
+  case DW_OP_lit29:                      return "DW_OP_lit29";
+  case DW_OP_lit30:                      return "DW_OP_lit30";
+  case DW_OP_lit31:                      return "DW_OP_lit31";
+  case DW_OP_reg0:                       return "DW_OP_reg0";
+  case DW_OP_reg1:                       return "DW_OP_reg1";
+  case DW_OP_reg2:                       return "DW_OP_reg2";
+  case DW_OP_reg3:                       return "DW_OP_reg3";
+  case DW_OP_reg4:                       return "DW_OP_reg4";
+  case DW_OP_reg5:                       return "DW_OP_reg5";
+  case DW_OP_reg6:                       return "DW_OP_reg6";
+  case DW_OP_reg7:                       return "DW_OP_reg7";
+  case DW_OP_reg8:                       return "DW_OP_reg8";
+  case DW_OP_reg9:                       return "DW_OP_reg9";
+  case DW_OP_reg10:                      return "DW_OP_reg10";
+  case DW_OP_reg11:                      return "DW_OP_reg11";
+  case DW_OP_reg12:                      return "DW_OP_reg12";
+  case DW_OP_reg13:                      return "DW_OP_reg13";
+  case DW_OP_reg14:                      return "DW_OP_reg14";
+  case DW_OP_reg15:                      return "DW_OP_reg15";
+  case DW_OP_reg16:                      return "DW_OP_reg16";
+  case DW_OP_reg17:                      return "DW_OP_reg17";
+  case DW_OP_reg18:                      return "DW_OP_reg18";
+  case DW_OP_reg19:                      return "DW_OP_reg19";
+  case DW_OP_reg20:                      return "DW_OP_reg20";
+  case DW_OP_reg21:                      return "DW_OP_reg21";
+  case DW_OP_reg22:                      return "DW_OP_reg22";
+  case DW_OP_reg23:                      return "DW_OP_reg23";
+  case DW_OP_reg24:                      return "DW_OP_reg24";
+  case DW_OP_reg25:                      return "DW_OP_reg25";
+  case DW_OP_reg26:                      return "DW_OP_reg26";
+  case DW_OP_reg27:                      return "DW_OP_reg27";
+  case DW_OP_reg28:                      return "DW_OP_reg28";
+  case DW_OP_reg29:                      return "DW_OP_reg29";
+  case DW_OP_reg30:                      return "DW_OP_reg30";
+  case DW_OP_reg31:                      return "DW_OP_reg31";
+  case DW_OP_breg0:                      return "DW_OP_breg0";
+  case DW_OP_breg1:                      return "DW_OP_breg1";
+  case DW_OP_breg2:                      return "DW_OP_breg2";
+  case DW_OP_breg3:                      return "DW_OP_breg3";
+  case DW_OP_breg4:                      return "DW_OP_breg4";
+  case DW_OP_breg5:                      return "DW_OP_breg5";
+  case DW_OP_breg6:                      return "DW_OP_breg6";
+  case DW_OP_breg7:                      return "DW_OP_breg7";
+  case DW_OP_breg8:                      return "DW_OP_breg8";
+  case DW_OP_breg9:                      return "DW_OP_breg9";
+  case DW_OP_breg10:                     return "DW_OP_breg10";
+  case DW_OP_breg11:                     return "DW_OP_breg11";
+  case DW_OP_breg12:                     return "DW_OP_breg12";
+  case DW_OP_breg13:                     return "DW_OP_breg13";
+  case DW_OP_breg14:                     return "DW_OP_breg14";
+  case DW_OP_breg15:                     return "DW_OP_breg15";
+  case DW_OP_breg16:                     return "DW_OP_breg16";
+  case DW_OP_breg17:                     return "DW_OP_breg17";
+  case DW_OP_breg18:                     return "DW_OP_breg18";
+  case DW_OP_breg19:                     return "DW_OP_breg19";
+  case DW_OP_breg20:                     return "DW_OP_breg20";
+  case DW_OP_breg21:                     return "DW_OP_breg21";
+  case DW_OP_breg22:                     return "DW_OP_breg22";
+  case DW_OP_breg23:                     return "DW_OP_breg23";
+  case DW_OP_breg24:                     return "DW_OP_breg24";
+  case DW_OP_breg25:                     return "DW_OP_breg25";
+  case DW_OP_breg26:                     return "DW_OP_breg26";
+  case DW_OP_breg27:                     return "DW_OP_breg27";
+  case DW_OP_breg28:                     return "DW_OP_breg28";
+  case DW_OP_breg29:                     return "DW_OP_breg29";
+  case DW_OP_breg30:                     return "DW_OP_breg30";
+  case DW_OP_breg31:                     return "DW_OP_breg31";
+  case DW_OP_regx:                       return "DW_OP_regx";
+  case DW_OP_fbreg:                      return "DW_OP_fbreg";
+  case DW_OP_bregx:                      return "DW_OP_bregx";
+  case DW_OP_piece:                      return "DW_OP_piece";
+  case DW_OP_deref_size:                 return "DW_OP_deref_size";
+  case DW_OP_xderef_size:                return "DW_OP_xderef_size";
+  case DW_OP_nop:                        return "DW_OP_nop";
+  case DW_OP_push_object_address:        return "DW_OP_push_object_address";
+  case DW_OP_call2:                      return "DW_OP_call2";
+  case DW_OP_call4:                      return "DW_OP_call4";
+  case DW_OP_call_ref:                   return "DW_OP_call_ref";
+  case DW_OP_form_tls_address:           return "DW_OP_form_tls_address";
+  case DW_OP_call_frame_cfa:             return "DW_OP_call_frame_cfa";
+  case DW_OP_bit_piece:                  return "DW_OP_bit_piece";
+  case DW_OP_lo_user:                    return "DW_OP_lo_user";
+  case DW_OP_hi_user:                    return "DW_OP_hi_user";
+  }
+  return 0;
+}
+
+/// AttributeEncodingString - Return the string for the specified attribute
+/// encoding.
+const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_ATE_address:                   return "DW_ATE_address";
+  case DW_ATE_boolean:                   return "DW_ATE_boolean";
+  case DW_ATE_complex_float:             return "DW_ATE_complex_float";
+  case DW_ATE_float:                     return "DW_ATE_float";
+  case DW_ATE_signed:                    return "DW_ATE_signed";
+  case DW_ATE_signed_char:               return "DW_ATE_signed_char";
+  case DW_ATE_unsigned:                  return "DW_ATE_unsigned";
+  case DW_ATE_unsigned_char:             return "DW_ATE_unsigned_char";
+  case DW_ATE_imaginary_float:           return "DW_ATE_imaginary_float";
+  case DW_ATE_packed_decimal:            return "DW_ATE_packed_decimal";
+  case DW_ATE_numeric_string:            return "DW_ATE_numeric_string";
+  case DW_ATE_edited:                    return "DW_ATE_edited";
+  case DW_ATE_signed_fixed:              return "DW_ATE_signed_fixed";
+  case DW_ATE_unsigned_fixed:            return "DW_ATE_unsigned_fixed";
+  case DW_ATE_decimal_float:             return "DW_ATE_decimal_float";
+  case DW_ATE_lo_user:                   return "DW_ATE_lo_user";
+  case DW_ATE_hi_user:                   return "DW_ATE_hi_user";
+  }
+  return 0;
+}
+
+/// DecimalSignString - Return the string for the specified decimal sign
+/// attribute.
+const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
+  switch (Sign) {
+  case DW_DS_unsigned:                   return "DW_DS_unsigned";
+  case DW_DS_leading_overpunch:          return "DW_DS_leading_overpunch";
+  case DW_DS_trailing_overpunch:         return "DW_DS_trailing_overpunch";
+  case DW_DS_leading_separate:           return "DW_DS_leading_separate";
+  case DW_DS_trailing_separate:          return "DW_DS_trailing_separate";
+  }
+  return 0;
+}
+
+/// EndianityString - Return the string for the specified endianity.
+///
+const char *llvm::dwarf::EndianityString(unsigned Endian) {
+  switch (Endian) {
+  case DW_END_default:                   return "DW_END_default";
+  case DW_END_big:                       return "DW_END_big";
+  case DW_END_little:                    return "DW_END_little";
+  case DW_END_lo_user:                   return "DW_END_lo_user";
+  case DW_END_hi_user:                   return "DW_END_hi_user";
+  }
+  return 0;
+}
+
+/// AccessibilityString - Return the string for the specified accessibility.
+///
+const char *llvm::dwarf::AccessibilityString(unsigned Access) {
+  switch (Access) {
+  // Accessibility codes
+  case DW_ACCESS_public:                 return "DW_ACCESS_public";
+  case DW_ACCESS_protected:              return "DW_ACCESS_protected";
+  case DW_ACCESS_private:                return "DW_ACCESS_private";
+  }
+  return 0;
+}
+
+/// VisibilityString - Return the string for the specified visibility.
+///
+const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
+  switch (Visibility) {
+  case DW_VIS_local:                     return "DW_VIS_local";
+  case DW_VIS_exported:                  return "DW_VIS_exported";
+  case DW_VIS_qualified:                 return "DW_VIS_qualified";
+  }
+  return 0;
+}
+
+/// VirtualityString - Return the string for the specified virtuality.
+///
+const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
+  switch (Virtuality) {
+  case DW_VIRTUALITY_none:               return "DW_VIRTUALITY_none";
+  case DW_VIRTUALITY_virtual:            return "DW_VIRTUALITY_virtual";
+  case DW_VIRTUALITY_pure_virtual:       return "DW_VIRTUALITY_pure_virtual";
+  }
+  return 0;
+}
+
+/// LanguageString - Return the string for the specified language.
+///
+const char *llvm::dwarf::LanguageString(unsigned Language) {
+  switch (Language) {
+  case DW_LANG_C89:                      return "DW_LANG_C89";
+  case DW_LANG_C:                        return "DW_LANG_C";
+  case DW_LANG_Ada83:                    return "DW_LANG_Ada83";
+  case DW_LANG_C_plus_plus:              return "DW_LANG_C_plus_plus";
+  case DW_LANG_Cobol74:                  return "DW_LANG_Cobol74";
+  case DW_LANG_Cobol85:                  return "DW_LANG_Cobol85";
+  case DW_LANG_Fortran77:                return "DW_LANG_Fortran77";
+  case DW_LANG_Fortran90:                return "DW_LANG_Fortran90";
+  case DW_LANG_Pascal83:                 return "DW_LANG_Pascal83";
+  case DW_LANG_Modula2:                  return "DW_LANG_Modula2";
+  case DW_LANG_Java:                     return "DW_LANG_Java";
+  case DW_LANG_C99:                      return "DW_LANG_C99";
+  case DW_LANG_Ada95:                    return "DW_LANG_Ada95";
+  case DW_LANG_Fortran95:                return "DW_LANG_Fortran95";
+  case DW_LANG_PLI:                      return "DW_LANG_PLI";
+  case DW_LANG_ObjC:                     return "DW_LANG_ObjC";
+  case DW_LANG_ObjC_plus_plus:           return "DW_LANG_ObjC_plus_plus";
+  case DW_LANG_UPC:                      return "DW_LANG_UPC";
+  case DW_LANG_D:                        return "DW_LANG_D";
+  case DW_LANG_lo_user:                  return "DW_LANG_lo_user";
+  case DW_LANG_hi_user:                  return "DW_LANG_hi_user";
+  }
+  return 0;
+}
+
+/// CaseString - Return the string for the specified identifier case.
+///
+const char *llvm::dwarf::CaseString(unsigned Case) {
+  switch (Case) {
+  case DW_ID_case_sensitive:             return "DW_ID_case_sensitive";
+  case DW_ID_up_case:                    return "DW_ID_up_case";
+  case DW_ID_down_case:                  return "DW_ID_down_case";
+  case DW_ID_case_insensitive:           return "DW_ID_case_insensitive";
+  }
+  return 0;
+}
+
+/// ConventionString - Return the string for the specified calling convention.
+///
+const char *llvm::dwarf::ConventionString(unsigned Convention) {
+   switch (Convention) {
+   case DW_CC_normal:                     return "DW_CC_normal";
+   case DW_CC_program:                    return "DW_CC_program";
+   case DW_CC_nocall:                     return "DW_CC_nocall";
+   case DW_CC_lo_user:                    return "DW_CC_lo_user";
+   case DW_CC_hi_user:                    return "DW_CC_hi_user";
+  }
+  return 0;
+}
+
+/// InlineCodeString - Return the string for the specified inline code.
+///
+const char *llvm::dwarf::InlineCodeString(unsigned Code) {
+  switch (Code) {
+  case DW_INL_not_inlined:               return "DW_INL_not_inlined";
+  case DW_INL_inlined:                   return "DW_INL_inlined";
+  case DW_INL_declared_not_inlined:      return "DW_INL_declared_not_inlined";
+  case DW_INL_declared_inlined:          return "DW_INL_declared_inlined";
+  }
+  return 0;
+}
+
+/// ArrayOrderString - Return the string for the specified array order.
+///
+const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
+  switch (Order) {
+  case DW_ORD_row_major:                 return "DW_ORD_row_major";
+  case DW_ORD_col_major:                 return "DW_ORD_col_major";
+  }
+  return 0;
+}
+
+/// DiscriminantString - Return the string for the specified discriminant
+/// descriptor.
+const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
+  switch (Discriminant) {
+  case DW_DSC_label:                     return "DW_DSC_label";
+  case DW_DSC_range:                     return "DW_DSC_range";
+  }
+  return 0;
+}
+
+/// LNStandardString - Return the string for the specified line number standard.
+///
+const char *llvm::dwarf::LNStandardString(unsigned Standard) {
+  switch (Standard) {
+  case DW_LNS_copy:                      return "DW_LNS_copy";
+  case DW_LNS_advance_pc:                return "DW_LNS_advance_pc";
+  case DW_LNS_advance_line:              return "DW_LNS_advance_line";
+  case DW_LNS_set_file:                  return "DW_LNS_set_file";
+  case DW_LNS_set_column:                return "DW_LNS_set_column";
+  case DW_LNS_negate_stmt:               return "DW_LNS_negate_stmt";
+  case DW_LNS_set_basic_block:           return "DW_LNS_set_basic_block";
+  case DW_LNS_const_add_pc:              return "DW_LNS_const_add_pc";
+  case DW_LNS_fixed_advance_pc:          return "DW_LNS_fixed_advance_pc";
+  case DW_LNS_set_prologue_end:          return "DW_LNS_set_prologue_end";
+  case DW_LNS_set_epilogue_begin:        return "DW_LNS_set_epilogue_begin";
+  case DW_LNS_set_isa:                   return "DW_LNS_set_isa";
+  }
+  return 0;
+}
+
+/// LNExtendedString - Return the string for the specified line number extended
+/// opcode encodings.
+const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
+  switch (Encoding) {
+  // Line Number Extended Opcode Encodings
+  case DW_LNE_end_sequence:              return "DW_LNE_end_sequence";
+  case DW_LNE_set_address:               return "DW_LNE_set_address";
+  case DW_LNE_define_file:               return "DW_LNE_define_file";
+  case DW_LNE_lo_user:                   return "DW_LNE_lo_user";
+  case DW_LNE_hi_user:                   return "DW_LNE_hi_user";
+  }
+  return 0;
+}
+
+/// MacinfoString - Return the string for the specified macinfo type encodings.
+///
+const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
+  switch (Encoding) {
+  // Macinfo Type Encodings
+  case DW_MACINFO_define:                return "DW_MACINFO_define";
+  case DW_MACINFO_undef:                 return "DW_MACINFO_undef";
+  case DW_MACINFO_start_file:            return "DW_MACINFO_start_file";
+  case DW_MACINFO_end_file:              return "DW_MACINFO_end_file";
+  case DW_MACINFO_vendor_ext:            return "DW_MACINFO_vendor_ext";
+  }
+  return 0;
+}
+
+/// CallFrameString - Return the string for the specified call frame instruction
+/// encodings.
+const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_CFA_advance_loc:               return "DW_CFA_advance_loc";
+  case DW_CFA_offset:                    return "DW_CFA_offset";
+  case DW_CFA_restore:                   return "DW_CFA_restore";
+  case DW_CFA_set_loc:                   return "DW_CFA_set_loc";
+  case DW_CFA_advance_loc1:              return "DW_CFA_advance_loc1";
+  case DW_CFA_advance_loc2:              return "DW_CFA_advance_loc2";
+  case DW_CFA_advance_loc4:              return "DW_CFA_advance_loc4";
+  case DW_CFA_offset_extended:           return "DW_CFA_offset_extended";
+  case DW_CFA_restore_extended:          return "DW_CFA_restore_extended";
+  case DW_CFA_undefined:                 return "DW_CFA_undefined";
+  case DW_CFA_same_value:                return "DW_CFA_same_value";
+  case DW_CFA_register:                  return "DW_CFA_register";
+  case DW_CFA_remember_state:            return "DW_CFA_remember_state";
+  case DW_CFA_restore_state:             return "DW_CFA_restore_state";
+  case DW_CFA_def_cfa:                   return "DW_CFA_def_cfa";
+  case DW_CFA_def_cfa_register:          return "DW_CFA_def_cfa_register";
+  case DW_CFA_def_cfa_offset:            return "DW_CFA_def_cfa_offset";
+  case DW_CFA_def_cfa_expression:        return "DW_CFA_def_cfa_expression";
+  case DW_CFA_expression:                return "DW_CFA_expression";
+  case DW_CFA_offset_extended_sf:        return "DW_CFA_offset_extended_sf";
+  case DW_CFA_def_cfa_sf:                return "DW_CFA_def_cfa_sf";
+  case DW_CFA_def_cfa_offset_sf:         return "DW_CFA_def_cfa_offset_sf";
+  case DW_CFA_val_offset:                return "DW_CFA_val_offset";
+  case DW_CFA_val_offset_sf:             return "DW_CFA_val_offset_sf";
+  case DW_CFA_val_expression:            return "DW_CFA_val_expression";
+  case DW_CFA_lo_user:                   return "DW_CFA_lo_user";
+  case DW_CFA_hi_user:                   return "DW_CFA_hi_user";
+  }
+  return 0;
+}
diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp
new file mode 100644
index 000000000000..455c3801cc68
--- /dev/null
+++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp
@@ -0,0 +1,170 @@
+//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system DynamicLibrary concept.
+//
+// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is
+// not thread safe!
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <vector>
+
+// Collection of symbol name/value pairs to be searched prior to any libraries.
+static std::map<std::string, void*> *ExplicitSymbols = 0;
+
+namespace {
+
+struct ExplicitSymbolsDeleter {
+  ~ExplicitSymbolsDeleter() {
+    if (ExplicitSymbols)
+      delete ExplicitSymbols;
+  }
+};
+
+}
+
+static ExplicitSymbolsDeleter Dummy;
+
+void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
+                                          void *symbolValue) {
+  if (ExplicitSymbols == 0)
+    ExplicitSymbols = new std::map<std::string, void*>();
+  (*ExplicitSymbols)[symbolName] = symbolValue;
+}
+
+#ifdef LLVM_ON_WIN32
+
+#include "Windows/DynamicLibrary.inc"
+
+#else
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+using namespace llvm;
+using namespace llvm::sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<void *> *OpenedHandles = 0;
+
+
+static SmartMutex<true>& getMutex() {
+  static SmartMutex<true> HandlesMutex;
+  return HandlesMutex;
+}
+
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
+  if (H == 0) {
+    if (ErrMsg) *ErrMsg = dlerror();
+    return true;
+  }
+#ifdef __CYGWIN__
+  // Cygwin searches symbols only in the main
+  // with the handle of dlopen(NULL, RTLD_GLOBAL).
+  if (Filename == NULL)
+    H = RTLD_DEFAULT;
+#endif
+  SmartScopedLock<true> Lock(getMutex());
+  if (OpenedHandles == 0)
+    OpenedHandles = new std::vector<void *>();
+  OpenedHandles->push_back(H);
+  return false;
+}
+#else
+
+using namespace llvm;
+using namespace llvm::sys;
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
+  return true;
+}
+#endif
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName);
+}
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+  // First check symbols added via AddSymbol().
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I =
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+
+    if (I != E)
+      return I->second;
+  }
+
+#if HAVE_DLFCN_H
+  // Now search the libraries.
+  SmartScopedLock<true> Lock(getMutex());
+  if (OpenedHandles) {
+    for (std::vector<void *>::iterator I = OpenedHandles->begin(),
+         E = OpenedHandles->end(); I != E; ++I) {
+      //lt_ptr ptr = lt_dlsym(*I, symbolName);
+      void *ptr = dlsym(*I, symbolName);
+      if (ptr) {
+        return ptr;
+      }
+    }
+  }
+#endif
+
+  if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
+    return Result;
+
+// This macro returns the address of a well-known, explicit symbol
+#define EXPLICIT_SYMBOL(SYM) \
+   if (!strcmp(symbolName, #SYM)) return &SYM
+
+// On linux we have a weird situation. The stderr/out/in symbols are both
+// macros and global variables because of standards requirements. So, we
+// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
+#if defined(__linux__)
+  {
+    EXPLICIT_SYMBOL(stderr);
+    EXPLICIT_SYMBOL(stdout);
+    EXPLICIT_SYMBOL(stdin);
+  }
+#else
+  // For everything else, we want to check to make sure the symbol isn't defined
+  // as a macro before using EXPLICIT_SYMBOL.
+  {
+#ifndef stdin
+    EXPLICIT_SYMBOL(stdin);
+#endif
+#ifndef stdout
+    EXPLICIT_SYMBOL(stdout);
+#endif
+#ifndef stderr
+    EXPLICIT_SYMBOL(stderr);
+#endif
+  }
+#endif
+#undef EXPLICIT_SYMBOL
+
+  return 0;
+}
+
+#endif // LLVM_ON_WIN32
diff --git a/contrib/llvm/lib/Support/Errno.cpp b/contrib/llvm/lib/Support/Errno.cpp
new file mode 100644
index 000000000000..18c658173a7a
--- /dev/null
+++ b/contrib/llvm/lib/Support/Errno.cpp
@@ -0,0 +1,74 @@
+//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the errno wrappers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+
+#if HAVE_STRING_H
+#include <string.h>
+
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace sys {
+
+#if HAVE_ERRNO_H
+std::string StrError() {
+  return StrError(errno);
+}
+#endif  // HAVE_ERRNO_H
+
+std::string StrError(int errnum) {
+  const int MaxErrStrLen = 2000;
+  char buffer[MaxErrStrLen];
+  buffer[0] = '\0';
+  char* str = buffer;
+#ifdef HAVE_STRERROR_R
+  // strerror_r is thread-safe.
+  if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+    // glibc defines its own incompatible version of strerror_r
+    // which may not use the buffer supplied.
+    str = strerror_r(errnum,buffer,MaxErrStrLen-1);
+# else
+    strerror_r(errnum,buffer,MaxErrStrLen-1);
+# endif
+#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
+    if (errnum)
+      strerror_s(buffer, errnum);
+#elif defined(HAVE_STRERROR)
+  // Copy the thread un-safe result of strerror into
+  // the buffer as fast as possible to minimize impact
+  // of collision of strerror in multiple threads.
+  if (errnum)
+    strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
+  buffer[MaxErrStrLen-1] = '\0';
+#else
+  // Strange that this system doesn't even have strerror
+  // but, oh well, just use a generic message
+  sprintf(buffer, "Error #%d", errnum);
+#endif
+  return str;
+}
+
+}  // namespace sys
+}  // namespace llvm
+
+#endif  // HAVE_STRING_H
diff --git a/contrib/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm/lib/Support/ErrorHandling.cpp
new file mode 100644
index 000000000000..e6cc57db8243
--- /dev/null
+++ b/contrib/llvm/lib/Support/ErrorHandling.cpp
@@ -0,0 +1,99 @@
+//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an API used to indicate fatal error conditions.  Non-fatal
+// errors (most of them) should be handled through LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+#include <cstdlib>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(_MSC_VER)
+# include <io.h>
+# include <fcntl.h>
+#endif
+
+using namespace llvm;
+
+static fatal_error_handler_t ErrorHandler = 0;
+static void *ErrorHandlerUserData = 0;
+
+void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
+                                       void *user_data) {
+  assert(!llvm_is_multithreaded() &&
+         "Cannot register error handlers after starting multithreaded mode!\n");
+  assert(!ErrorHandler && "Error handler already registered!\n");
+  ErrorHandler = handler;
+  ErrorHandlerUserData = user_data;
+}
+
+void llvm::remove_fatal_error_handler() {
+  ErrorHandler = 0;
+}
+
+void llvm::report_fatal_error(const char *Reason) {
+  report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(const std::string &Reason) {
+  report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(StringRef Reason) {
+  report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(const Twine &Reason) {
+  if (ErrorHandler) {
+    ErrorHandler(ErrorHandlerUserData, Reason.str());
+  } else {
+    // Blast the result out to stderr.  We don't try hard to make sure this
+    // succeeds (e.g. handling EINTR) and we can't use errs() here because
+    // raw ostreams can call report_fatal_error.
+    SmallVector<char, 64> Buffer;
+    raw_svector_ostream OS(Buffer);
+    OS << "LLVM ERROR: " << Reason << "\n";
+    StringRef MessageStr = OS.str();
+    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
+    (void)written; // If something went wrong, we deliberately just give up.
+  }
+
+  // If we reached here, we are failing ungracefully. Run the interrupt handlers
+  // to make sure any special cleanups get done, in particular that we remove
+  // files registered with RemoveFileOnSignal.
+  sys::RunInterruptHandlers();
+
+  exit(1);
+}
+
+void llvm::llvm_unreachable_internal(const char *msg, const char *file,
+                                     unsigned line) {
+  // This code intentionally doesn't call the ErrorHandler callback, because
+  // llvm_unreachable is intended to be used to indicate "impossible"
+  // situations, and not legitimate runtime errors.
+  if (msg)
+    dbgs() << msg << "\n";
+  dbgs() << "UNREACHABLE executed";
+  if (file)
+    dbgs() << " at " << file << ":" << line;
+  dbgs() << "!\n";
+  abort();
+}
diff --git a/contrib/llvm/lib/Support/FileUtilities.cpp b/contrib/llvm/lib/Support/FileUtilities.cpp
new file mode 100644
index 000000000000..4c8c0c63ffc4
--- /dev/null
+++ b/contrib/llvm/lib/Support/FileUtilities.cpp
@@ -0,0 +1,281 @@
+//===- Support/FileUtilities.cpp - File System Utilities ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a family of utility functions which are useful for doing
+// various things with files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include <cstdlib>
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+static bool isSignedChar(char C) {
+  return (C == '+' || C == '-');
+}
+
+static bool isExponentChar(char C) {
+  switch (C) {
+  case 'D':  // Strange exponential notation.
+  case 'd':  // Strange exponential notation.
+  case 'e':
+  case 'E': return true;
+  default: return false;
+  }
+}
+
+static bool isNumberChar(char C) {
+  switch (C) {
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+  case '.': return true;
+  default: return isSignedChar(C) || isExponentChar(C);
+  }
+}
+
+static const char *BackupNumber(const char *Pos, const char *FirstChar) {
+  // If we didn't stop in the middle of a number, don't backup.
+  if (!isNumberChar(*Pos)) return Pos;
+
+  // Otherwise, return to the start of the number.
+  bool HasPeriod = false;
+  while (Pos > FirstChar && isNumberChar(Pos[-1])) {
+    // Backup over at most one period.
+    if (Pos[-1] == '.') {
+      if (HasPeriod)
+        break;
+      HasPeriod = true;
+    }
+
+    --Pos;
+    if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1]))
+      break;
+  }
+  return Pos;
+}
+
+/// EndOfNumber - Return the first character that is not part of the specified
+/// number.  This assumes that the buffer is null terminated, so it won't fall
+/// off the end.
+static const char *EndOfNumber(const char *Pos) {
+  while (isNumberChar(*Pos))
+    ++Pos;
+  return Pos;
+}
+
+/// CompareNumbers - compare two numbers, returning true if they are different.
+static bool CompareNumbers(const char *&F1P, const char *&F2P,
+                           const char *F1End, const char *F2End,
+                           double AbsTolerance, double RelTolerance,
+                           std::string *ErrorMsg) {
+  const char *F1NumEnd, *F2NumEnd;
+  double V1 = 0.0, V2 = 0.0;
+
+  // If one of the positions is at a space and the other isn't, chomp up 'til
+  // the end of the space.
+  while (isspace(*F1P) && F1P != F1End)
+    ++F1P;
+  while (isspace(*F2P) && F2P != F2End)
+    ++F2P;
+
+  // If we stop on numbers, compare their difference.
+  if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) {
+    // The diff failed.
+    F1NumEnd = F1P;
+    F2NumEnd = F2P;
+  } else {
+    // Note that some ugliness is built into this to permit support for numbers
+    // that use "D" or "d" as their exponential marker, e.g. "1.234D45".  This
+    // occurs in 200.sixtrack in spec2k.
+    V1 = strtod(F1P, const_cast<char**>(&F1NumEnd));
+    V2 = strtod(F2P, const_cast<char**>(&F2NumEnd));
+
+    if (*F1NumEnd == 'D' || *F1NumEnd == 'd') {
+      // Copy string into tmp buffer to replace the 'D' with an 'e'.
+      SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
+      // Strange exponential notation!
+      StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
+
+      V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
+      F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
+    }
+
+    if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
+      // Copy string into tmp buffer to replace the 'D' with an 'e'.
+      SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
+      // Strange exponential notation!
+      StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
+
+      V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
+      F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
+    }
+  }
+
+  if (F1NumEnd == F1P || F2NumEnd == F2P) {
+    if (ErrorMsg) {
+      *ErrorMsg = "FP Comparison failed, not a numeric difference between '";
+      *ErrorMsg += F1P[0];
+      *ErrorMsg += "' and '";
+      *ErrorMsg += F2P[0];
+      *ErrorMsg += "'";
+    }
+    return true;
+  }
+
+  // Check to see if these are inside the absolute tolerance
+  if (AbsTolerance < std::abs(V1-V2)) {
+    // Nope, check the relative tolerance...
+    double Diff;
+    if (V2)
+      Diff = std::abs(V1/V2 - 1.0);
+    else if (V1)
+      Diff = std::abs(V2/V1 - 1.0);
+    else
+      Diff = 0;  // Both zero.
+    if (Diff > RelTolerance) {
+      if (ErrorMsg) {
+        raw_string_ostream(*ErrorMsg)
+          << "Compared: " << V1 << " and " << V2 << '\n'
+          << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n'
+          << "Out of tolerance: rel/abs: " << RelTolerance << '/'
+          << AbsTolerance;
+      }
+      return true;
+    }
+  }
+
+  // Otherwise, advance our read pointers to the end of the numbers.
+  F1P = F1NumEnd;  F2P = F2NumEnd;
+  return false;
+}
+
+/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
+/// files match, 1 if they are different, and 2 if there is a file error.  This
+/// function differs from DiffFiles in that you can specify an absolete and
+/// relative FP error that is allowed to exist.  If you specify a string to fill
+/// in for the error option, it will set the string to an error message if an
+/// error occurs, allowing the caller to distinguish between a failed diff and a
+/// file system error.
+///
+int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
+                                 const sys::PathWithStatus &FileB,
+                                 double AbsTol, double RelTol,
+                                 std::string *Error) {
+  const sys::FileStatus *FileAStat = FileA.getFileStatus(false, Error);
+  if (!FileAStat)
+    return 2;
+  const sys::FileStatus *FileBStat = FileB.getFileStatus(false, Error);
+  if (!FileBStat)
+    return 2;
+
+  // Check for zero length files because some systems croak when you try to
+  // mmap an empty file.
+  size_t A_size = FileAStat->getSize();
+  size_t B_size = FileBStat->getSize();
+
+  // If they are both zero sized then they're the same
+  if (A_size == 0 && B_size == 0)
+    return 0;
+
+  // If only one of them is zero sized then they can't be the same
+  if ((A_size == 0 || B_size == 0)) {
+    if (Error)
+      *Error = "Files differ: one is zero-sized, the other isn't";
+    return 1;
+  }
+
+  // Now its safe to mmap the files into memory because both files
+  // have a non-zero size.
+  error_code ec;
+  OwningPtr<MemoryBuffer> F1;
+  if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
+    if (Error)
+      *Error = ec.message();
+    return 2;
+  }
+  OwningPtr<MemoryBuffer> F2;
+  if (error_code ec = MemoryBuffer::getFile(FileB.c_str(), F2)) {
+    if (Error)
+      *Error = ec.message();
+    return 2;
+  }
+
+  // Okay, now that we opened the files, scan them for the first difference.
+  const char *File1Start = F1->getBufferStart();
+  const char *File2Start = F2->getBufferStart();
+  const char *File1End = F1->getBufferEnd();
+  const char *File2End = F2->getBufferEnd();
+  const char *F1P = File1Start;
+  const char *F2P = File2Start;
+
+  // Are the buffers identical?  Common case: Handle this efficiently.
+  if (A_size == B_size &&
+      std::memcmp(File1Start, File2Start, A_size) == 0)
+    return 0;
+
+  // Otherwise, we are done a tolerances are set.
+  if (AbsTol == 0 && RelTol == 0) {
+    if (Error)
+      *Error = "Files differ without tolerance allowance";
+    return 1;   // Files different!
+  }
+
+  bool CompareFailed = false;
+  while (1) {
+    // Scan for the end of file or next difference.
+    while (F1P < File1End && F2P < File2End && *F1P == *F2P)
+      ++F1P, ++F2P;
+
+    if (F1P >= File1End || F2P >= File2End) break;
+
+    // Okay, we must have found a difference.  Backup to the start of the
+    // current number each stream is at so that we can compare from the
+    // beginning.
+    F1P = BackupNumber(F1P, File1Start);
+    F2P = BackupNumber(F2P, File2Start);
+
+    // Now that we are at the start of the numbers, compare them, exiting if
+    // they don't match.
+    if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) {
+      CompareFailed = true;
+      break;
+    }
+  }
+
+  // Okay, we reached the end of file.  If both files are at the end, we
+  // succeeded.
+  bool F1AtEnd = F1P >= File1End;
+  bool F2AtEnd = F2P >= File2End;
+  if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) {
+    // Else, we might have run off the end due to a number: backup and retry.
+    if (F1AtEnd && isNumberChar(F1P[-1])) --F1P;
+    if (F2AtEnd && isNumberChar(F2P[-1])) --F2P;
+    F1P = BackupNumber(F1P, File1Start);
+    F2P = BackupNumber(F2P, File2Start);
+
+    // Now that we are at the start of the numbers, compare them, exiting if
+    // they don't match.
+    if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error))
+      CompareFailed = true;
+
+    // If we found the end, we succeeded.
+    if (F1P < File1End || F2P < File2End)
+      CompareFailed = true;
+  }
+
+  return CompareFailed;
+}
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
new file mode 100644
index 000000000000..1568342e9c9d
--- /dev/null
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -0,0 +1,426 @@
+//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a hash set that can be used to remove duplication of
+// nodes in a graph.  This code was originally created by Chris Lattner for use
+// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
+// set. 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeIDRef Implementation
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+/// used to lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeIDRef::ComputeHash() const {
+  // This is adapted from SuperFastHash by Paul Hsieh.
+  unsigned Hash = static_cast<unsigned>(Size);
+  for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
+    unsigned Data = *BP;
+    Hash         += Data & 0xFFFF;
+    unsigned Tmp  = ((Data >> 16) << 11) ^ Hash;
+    Hash          = (Hash << 16) ^ Tmp;
+    Hash         += Hash >> 11;
+  }
+  
+  // Force "avalanching" of final 127 bits.
+  Hash ^= Hash << 3;
+  Hash += Hash >> 5;
+  Hash ^= Hash << 4;
+  Hash += Hash >> 17;
+  Hash ^= Hash << 25;
+  Hash += Hash >> 6;
+  return Hash;
+}
+
+bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
+  if (Size != RHS.Size) return false;
+  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeID Implementation
+
+/// Add* - Add various data types to Bit data.
+///
+void FoldingSetNodeID::AddPointer(const void *Ptr) {
+  // Note: this adds pointers to the hash using sizes and endianness that
+  // depend on the host.  It doesn't matter however, because hashing on
+  // pointer values in inherently unstable.  Nothing  should depend on the 
+  // ordering of nodes in the folding set.
+  intptr_t PtrI = (intptr_t)Ptr;
+  Bits.push_back(unsigned(PtrI));
+  if (sizeof(intptr_t) > sizeof(unsigned))
+    Bits.push_back(unsigned(uint64_t(PtrI) >> 32));
+}
+void FoldingSetNodeID::AddInteger(signed I) {
+  Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(unsigned I) {
+  Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(long I) {
+  AddInteger((unsigned long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long I) {
+  if (sizeof(long) == sizeof(int))
+    AddInteger(unsigned(I));
+  else if (sizeof(long) == sizeof(long long)) {
+    AddInteger((unsigned long long)I);
+  } else {
+    llvm_unreachable("unexpected sizeof(long)");
+  }
+}
+void FoldingSetNodeID::AddInteger(long long I) {
+  AddInteger((unsigned long long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long long I) {
+  AddInteger(unsigned(I));
+  if ((uint64_t)(unsigned)I != I)
+    Bits.push_back(unsigned(I >> 32));
+}
+
+void FoldingSetNodeID::AddString(StringRef String) {
+  unsigned Size =  String.size();
+  Bits.push_back(Size);
+  if (!Size) return;
+
+  unsigned Units = Size / 4;
+  unsigned Pos = 0;
+  const unsigned *Base = (const unsigned*) String.data();
+  
+  // If the string is aligned do a bulk transfer.
+  if (!((intptr_t)Base & 3)) {
+    Bits.append(Base, Base + Units);
+    Pos = (Units + 1) * 4;
+  } else {
+    // Otherwise do it the hard way.
+    // To be compatible with above bulk transfer, we need to take endianness
+    // into account.
+    if (sys::isBigEndianHost()) {
+      for (Pos += 4; Pos <= Size; Pos += 4) {
+        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
+                     ((unsigned char)String[Pos - 3] << 16) |
+                     ((unsigned char)String[Pos - 2] << 8) |
+                      (unsigned char)String[Pos - 1];
+        Bits.push_back(V);
+      }
+    } else {
+      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+      for (Pos += 4; Pos <= Size; Pos += 4) {
+        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
+                     ((unsigned char)String[Pos - 2] << 16) |
+                     ((unsigned char)String[Pos - 3] << 8) |
+                      (unsigned char)String[Pos - 4];
+        Bits.push_back(V);
+      }
+    }
+  }
+  
+  // With the leftover bits.
+  unsigned V = 0;
+  // Pos will have overshot size by 4 - #bytes left over.
+  // No need to take endianness into account here - this is always executed.
+  switch (Pos - Size) {
+  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
+  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
+  case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
+  default: return; // Nothing left.
+  }
+
+  Bits.push_back(V);
+}
+
+// AddNodeID - Adds the Bit data of another ID to *this.
+void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
+  Bits.append(ID.Bits.begin(), ID.Bits.end());
+}
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 
+/// lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeID::ComputeHash() const {
+  return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
+  return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
+  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
+}
+
+/// Intern - Copy this node's data to a memory region allocated from the
+/// given allocator and return a FoldingSetNodeIDRef describing the
+/// interned data.
+FoldingSetNodeIDRef
+FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
+  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
+  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
+  return FoldingSetNodeIDRef(New, Bits.size());
+}
+
+//===----------------------------------------------------------------------===//
+/// Helper functions for FoldingSetImpl.
+
+/// GetNextPtr - In order to save space, each bucket is a
+/// singly-linked-list. In order to make deletion more efficient, we make
+/// the list circular, so we can delete a node without computing its hash.
+/// The problem with this is that the start of the hash buckets are not
+/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
+/// use GetBucketPtr when this happens.
+static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
+  // The low bit is set if this is the pointer back to the bucket.
+  if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
+    return 0;
+  
+  return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
+}
+
+
+/// testing.
+static void **GetBucketPtr(void *NextInBucketPtr) {
+  intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
+  assert((Ptr & 1) && "Not a bucket pointer");
+  return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
+}
+
+/// GetBucketFor - Hash the specified node ID and return the hash bucket for
+/// the specified ID.
+static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
+  // NumBuckets is always a power of 2.
+  unsigned BucketNum = Hash & (NumBuckets-1);
+  return Buckets + BucketNum;
+}
+
+/// AllocateBuckets - Allocated initialized bucket memory.
+static void **AllocateBuckets(unsigned NumBuckets) {
+  void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
+  // Set the very last bucket to be a non-null "pointer".
+  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+  return Buckets;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetImpl Implementation
+
+FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
+  assert(5 < Log2InitSize && Log2InitSize < 32 &&
+         "Initial hash table size out of range");
+  NumBuckets = 1 << Log2InitSize;
+  Buckets = AllocateBuckets(NumBuckets);
+  NumNodes = 0;
+}
+FoldingSetImpl::~FoldingSetImpl() {
+  free(Buckets);
+}
+void FoldingSetImpl::clear() {
+  // Set all but the last bucket to null pointers.
+  memset(Buckets, 0, NumBuckets*sizeof(void*));
+
+  // Set the very last bucket to be a non-null "pointer".
+  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+
+  // Reset the node count to zero.
+  NumNodes = 0;
+}
+
+/// GrowHashTable - Double the size of the hash table and rehash everything.
+///
+void FoldingSetImpl::GrowHashTable() {
+  void **OldBuckets = Buckets;
+  unsigned OldNumBuckets = NumBuckets;
+  NumBuckets <<= 1;
+  
+  // Clear out new buckets.
+  Buckets = AllocateBuckets(NumBuckets);
+  NumNodes = 0;
+
+  // Walk the old buckets, rehashing nodes into their new place.
+  FoldingSetNodeID TempID;
+  for (unsigned i = 0; i != OldNumBuckets; ++i) {
+    void *Probe = OldBuckets[i];
+    if (!Probe) continue;
+    while (Node *NodeInBucket = GetNextPtr(Probe)) {
+      // Figure out the next link, remove NodeInBucket from the old link.
+      Probe = NodeInBucket->getNextInBucket();
+      NodeInBucket->SetNextInBucket(0);
+
+      // Insert the node into the new bucket, after recomputing the hash.
+      InsertNode(NodeInBucket,
+                 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
+                              Buckets, NumBuckets));
+      TempID.clear();
+    }
+  }
+  
+  free(OldBuckets);
+}
+
+/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
+/// return it.  If not, return the insertion token that will make insertion
+/// faster.
+FoldingSetImpl::Node
+*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+                                     void *&InsertPos) {
+  
+  void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
+  void *Probe = *Bucket;
+  
+  InsertPos = 0;
+  
+  FoldingSetNodeID TempID;
+  while (Node *NodeInBucket = GetNextPtr(Probe)) {
+    if (NodeEquals(NodeInBucket, ID, TempID))
+      return NodeInBucket;
+    TempID.clear();
+
+    Probe = NodeInBucket->getNextInBucket();
+  }
+  
+  // Didn't find the node, return null with the bucket as the InsertPos.
+  InsertPos = Bucket;
+  return 0;
+}
+
+/// InsertNode - Insert the specified node into the folding set, knowing that it
+/// is not already in the map.  InsertPos must be obtained from 
+/// FindNodeOrInsertPos.
+void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
+  assert(N->getNextInBucket() == 0);
+  // Do we need to grow the hashtable?
+  if (NumNodes+1 > NumBuckets*2) {
+    GrowHashTable();
+    FoldingSetNodeID TempID;
+    InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
+  }
+
+  ++NumNodes;
+  
+  /// The insert position is actually a bucket pointer.
+  void **Bucket = static_cast<void**>(InsertPos);
+  
+  void *Next = *Bucket;
+  
+  // If this is the first insertion into this bucket, its next pointer will be
+  // null.  Pretend as if it pointed to itself, setting the low bit to indicate
+  // that it is a pointer to the bucket.
+  if (Next == 0)
+    Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
+
+  // Set the node's next pointer, and make the bucket point to the node.
+  N->SetNextInBucket(Next);
+  *Bucket = N;
+}
+
+/// RemoveNode - Remove a node from the folding set, returning true if one was
+/// removed or false if the node was not in the folding set.
+bool FoldingSetImpl::RemoveNode(Node *N) {
+  // Because each bucket is a circular list, we don't need to compute N's hash
+  // to remove it.
+  void *Ptr = N->getNextInBucket();
+  if (Ptr == 0) return false;  // Not in folding set.
+
+  --NumNodes;
+  N->SetNextInBucket(0);
+
+  // Remember what N originally pointed to, either a bucket or another node.
+  void *NodeNextPtr = Ptr;
+  
+  // Chase around the list until we find the node (or bucket) which points to N.
+  while (true) {
+    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
+      // Advance pointer.
+      Ptr = NodeInBucket->getNextInBucket();
+      
+      // We found a node that points to N, change it to point to N's next node,
+      // removing N from the list.
+      if (Ptr == N) {
+        NodeInBucket->SetNextInBucket(NodeNextPtr);
+        return true;
+      }
+    } else {
+      void **Bucket = GetBucketPtr(Ptr);
+      Ptr = *Bucket;
+      
+      // If we found that the bucket points to N, update the bucket to point to
+      // whatever is next.
+      if (Ptr == N) {
+        *Bucket = NodeNextPtr;
+        return true;
+      }
+    }
+  }
+}
+
+/// GetOrInsertNode - If there is an existing simple Node exactly
+/// equal to the specified node, return it.  Otherwise, insert 'N' and it
+/// instead.
+FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
+  FoldingSetNodeID ID;
+  GetNodeProfile(N, ID);
+  void *IP;
+  if (Node *E = FindNodeOrInsertPos(ID, IP))
+    return E;
+  InsertNode(N, IP);
+  return N;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetIteratorImpl Implementation
+
+FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
+  // Skip to the first non-null non-self-cycle bucket.
+  while (*Bucket != reinterpret_cast<void*>(-1) &&
+         (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
+    ++Bucket;
+  
+  NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+}
+
+void FoldingSetIteratorImpl::advance() {
+  // If there is another link within this bucket, go to it.
+  void *Probe = NodePtr->getNextInBucket();
+
+  if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
+    NodePtr = NextNodeInBucket;
+  else {
+    // Otherwise, this is the last link in this bucket.  
+    void **Bucket = GetBucketPtr(Probe);
+
+    // Skip to the next non-null non-self-cycle bucket.
+    do {
+      ++Bucket;
+    } while (*Bucket != reinterpret_cast<void*>(-1) &&
+             (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
+    
+    NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetBucketIteratorImpl Implementation
+
+FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
+  Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
+}
diff --git a/contrib/llvm/lib/Support/FormattedStream.cpp b/contrib/llvm/lib/Support/FormattedStream.cpp
new file mode 100644
index 000000000000..231ae48759e2
--- /dev/null
+++ b/contrib/llvm/lib/Support/FormattedStream.cpp
@@ -0,0 +1,101 @@
+//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of formatted_raw_ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+/// CountColumns - Examine the given char sequence and figure out which
+/// column we end up in after output.
+///
+static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) {
+  // Keep track of the current column by scanning the string for
+  // special characters
+
+  for (const char *End = Ptr + Size; Ptr != End; ++Ptr) {
+    ++Column;
+    if (*Ptr == '\n' || *Ptr == '\r')
+      Column = 0;
+    else if (*Ptr == '\t')
+      // Assumes tab stop = 8 characters.
+      Column += (8 - (Column & 0x7)) & 0x7;
+  }
+
+  return Column;
+}
+
+/// ComputeColumn - Examine the current output and figure out which
+/// column we end up in after output.
+void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
+  // If our previous scan pointer is inside the buffer, assume we already
+  // scanned those bytes. This depends on raw_ostream to not change our buffer
+  // in unexpected ways.
+  if (Ptr <= Scanned && Scanned <= Ptr + Size) {
+    // Scan all characters added since our last scan to determine the new
+    // column.
+    ColumnScanned = CountColumns(ColumnScanned, Scanned, 
+                                 Size - (Scanned - Ptr));
+  } else
+    ColumnScanned = CountColumns(ColumnScanned, Ptr, Size);
+
+  // Update the scanning pointer.
+  Scanned = Ptr + Size;
+}
+
+/// PadToColumn - Align the output to some column number.
+///
+/// \param NewCol - The column to move to.
+///
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { 
+  // Figure out what's in the buffer and add it to the column count.
+  ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
+
+  // Output spaces until we reach the desired column.
+  indent(std::max(int(NewCol - ColumnScanned), 1));
+  return *this;
+}
+
+void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+  // Figure out what's in the buffer and add it to the column count.
+  ComputeColumn(Ptr, Size);
+
+  // Write the data to the underlying stream (which is unbuffered, so
+  // the data will be immediately written out).
+  TheStream->write(Ptr, Size);
+
+  // Reset the scanning pointer.
+  Scanned = 0;
+}
+
+/// fouts() - This returns a reference to a formatted_raw_ostream for
+/// standard output.  Use it like: fouts() << "foo" << "bar";
+formatted_raw_ostream &llvm::fouts() {
+  static formatted_raw_ostream S(outs());
+  return S;
+}
+
+/// ferrs() - This returns a reference to a formatted_raw_ostream for
+/// standard error.  Use it like: ferrs() << "foo" << "bar";
+formatted_raw_ostream &llvm::ferrs() {
+  static formatted_raw_ostream S(errs());
+  return S;
+}
+
+/// fdbgs() - This returns a reference to a formatted_raw_ostream for
+/// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
+formatted_raw_ostream &llvm::fdbgs() {
+  static formatted_raw_ostream S(dbgs());
+  return S;
+}
diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp
new file mode 100644
index 000000000000..0dba28a2530c
--- /dev/null
+++ b/contrib/llvm/lib/Support/GraphWriter.cpp
@@ -0,0 +1,200 @@
+//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements misc. GraphWriter support routines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+std::string llvm::DOT::EscapeString(const std::string &Label) {
+  std::string Str(Label);
+  for (unsigned i = 0; i != Str.length(); ++i)
+  switch (Str[i]) {
+    case '\n':
+      Str.insert(Str.begin()+i, '\\');  // Escape character...
+      ++i;
+      Str[i] = 'n';
+      break;
+    case '\t':
+      Str.insert(Str.begin()+i, ' ');  // Convert to two spaces
+      ++i;
+      Str[i] = ' ';
+      break;
+    case '\\':
+      if (i+1 != Str.length())
+        switch (Str[i+1]) {
+          case 'l': continue; // don't disturb \l
+          case '|': case '{': case '}':
+            Str.erase(Str.begin()+i); continue;
+          default: break;
+        }
+    case '{': case '}':
+    case '<': case '>':
+    case '|': case '"':
+      Str.insert(Str.begin()+i, '\\');  // Escape character...
+      ++i;  // don't infinite loop
+      break;
+  }
+  return Str;
+}
+
+
+
+void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
+                        GraphProgram::Name program) {
+  std::string ErrMsg;
+#if HAVE_GRAPHVIZ
+  sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
+
+  std::vector<const char*> args;
+  args.push_back(Graphviz.c_str());
+  args.push_back(Filename.c_str());
+  args.push_back(0);
+  
+  errs() << "Running 'Graphviz' program... ";
+  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  Filename.eraseFromDisk();
+  errs() << " done. \n";
+
+#elif HAVE_XDOT_PY
+  std::vector<const char*> args;
+  args.push_back(LLVM_PATH_XDOT_PY);
+  args.push_back(Filename.c_str());
+
+  switch (program) {
+  case GraphProgram::DOT:   args.push_back("-f"); args.push_back("dot"); break;
+  case GraphProgram::FDP:   args.push_back("-f"); args.push_back("fdp"); break;
+  case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break;
+  case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break;
+  case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
+  default: errs() << "Unknown graph layout name; using default.\n";
+  }
+  
+  args.push_back(0);
+
+  errs() << "Running 'xdot.py' program... ";
+  if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
+                                   &args[0],0,0,0,0,&ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  Filename.eraseFromDisk();
+  errs() << " done. \n";
+
+#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
+                   HAVE_TWOPI || HAVE_CIRCO))
+  sys::Path PSFilename = Filename;
+  PSFilename.appendSuffix("ps");
+
+  sys::Path prog;
+
+  // Set default grapher
+#if HAVE_CIRCO
+  prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+#if HAVE_TWOPI
+  prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if HAVE_NEATO
+  prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if HAVE_FDP
+  prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if HAVE_DOT
+  prog = sys::Path(LLVM_PATH_DOT);
+#endif
+
+  // Find which program the user wants
+#if HAVE_DOT
+  if (program == GraphProgram::DOT)
+    prog = sys::Path(LLVM_PATH_DOT);
+#endif
+#if (HAVE_FDP)
+  if (program == GraphProgram::FDP)
+    prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if (HAVE_NEATO)
+  if (program == GraphProgram::NEATO)
+    prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if (HAVE_TWOPI)
+  if (program == GraphProgram::TWOPI)
+    prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if (HAVE_CIRCO)
+  if (program == GraphProgram::CIRCO)
+    prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+
+  std::vector<const char*> args;
+  args.push_back(prog.c_str());
+  args.push_back("-Tps");
+  args.push_back("-Nfontname=Courier");
+  args.push_back("-Gsize=7.5,10");
+  args.push_back(Filename.c_str());
+  args.push_back("-o");
+  args.push_back(PSFilename.c_str());
+  args.push_back(0);
+  
+  errs() << "Running '" << prog.str() << "' program... ";
+
+  if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  errs() << " done. \n";
+
+  sys::Path gv(LLVM_PATH_GV);
+  args.clear();
+  args.push_back(gv.c_str());
+  args.push_back(PSFilename.c_str());
+  args.push_back("--spartan");
+  args.push_back(0);
+  
+  ErrMsg.clear();
+  if (wait) {
+     if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
+        errs() << "Error: " << ErrMsg << "\n";
+     Filename.eraseFromDisk();
+     PSFilename.eraseFromDisk();
+  }
+  else {
+     sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
+     errs() << "Remember to erase graph files: " << Filename.str() << " "
+            << PSFilename.str() << "\n";
+  }
+#elif HAVE_DOTTY
+  sys::Path dotty(LLVM_PATH_DOTTY);
+
+  std::vector<const char*> args;
+  args.push_back(dotty.c_str());
+  args.push_back(Filename.c_str());
+  args.push_back(0);
+  
+  errs() << "Running 'dotty' program... ";
+  if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
+     errs() << "Error: " << ErrMsg << "\n";
+  } else {
+// Dotty spawns another app and doesn't wait until it returns
+#if defined (__MINGW32__) || defined (_WINDOWS)
+    return;
+#endif
+    Filename.eraseFromDisk();
+  }
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
new file mode 100644
index 000000000000..c525a1228129
--- /dev/null
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -0,0 +1,315 @@
+//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Host concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <string.h>
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Host.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Host.inc"
+#endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//
+//  Implementations of the CPU detection routines
+//
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+
+/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
+                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
+                                 unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+std::string sys::getHostCPUName() {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+    return "generic";
+  unsigned Family = 0;
+  unsigned Model  = 0;
+  DetectX86FamilyModel(EAX, Family, Model);
+
+  bool HasSSE3 = (ECX & 0x1);
+  GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  bool Em64T = (EDX >> 29) & 0x1;
+
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+
+  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+  if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+    switch (Family) {
+    case 3:
+      return "i386";
+    case 4:
+      switch (Model) {
+      case 0: // Intel486 DX processors
+      case 1: // Intel486 DX processors
+      case 2: // Intel486 SX processors
+      case 3: // Intel487 processors, IntelDX2 OverDrive processors,
+              // IntelDX2 processors
+      case 4: // Intel486 SL processor
+      case 5: // IntelSX2 processors
+      case 7: // Write-Back Enhanced IntelDX2 processors
+      case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
+      default: return "i486";
+      }
+    case 5:
+      switch (Model) {
+      case  1: // Pentium OverDrive processor for Pentium processor (60, 66),
+               // Pentium processors (60, 66)
+      case  2: // Pentium OverDrive processor for Pentium processor (75, 90,
+               // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+               // 150, 166, 200)
+      case  3: // Pentium OverDrive processors for Intel486 processor-based
+               // systems
+        return "pentium";
+
+      case  4: // Pentium OverDrive processor with MMX technology for Pentium
+               // processor (75, 90, 100, 120, 133), Pentium processor with
+               // MMX technology (166, 200)
+        return "pentium-mmx";
+
+      default: return "pentium";
+      }
+    case 6:
+      switch (Model) {
+      case  1: // Pentium Pro processor
+        return "pentiumpro";
+
+      case  3: // Intel Pentium II OverDrive processor, Pentium II processor,
+               // model 03
+      case  5: // Pentium II processor, model 05, Pentium II Xeon processor,
+               // model 05, and Intel Celeron processor, model 05
+      case  6: // Celeron processor, model 06
+        return "pentium2";
+
+      case  7: // Pentium III processor, model 07, and Pentium III Xeon
+               // processor, model 07
+      case  8: // Pentium III processor, model 08, Pentium III Xeon processor,
+               // model 08, and Celeron processor, model 08
+      case 10: // Pentium III Xeon processor, model 0Ah
+      case 11: // Pentium III processor, model 0Bh
+        return "pentium3";
+
+      case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+      case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+               // 0Dh. All processors are manufactured using the 90 nm process.
+        return "pentium-m";
+
+      case 14: // Intel Core Duo processor, Intel Core Solo processor, model
+               // 0Eh. All processors are manufactured using the 65 nm process.
+        return "yonah";
+
+      case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+               // mobile processor, Intel Core 2 Extreme processor, Intel
+               // Pentium Dual-Core processor, Intel Xeon processor, model
+               // 0Fh. All processors are manufactured using the 65 nm process.
+      case 22: // Intel Celeron processor model 16h. All processors are
+               // manufactured using the 65 nm process
+        return "core2";
+
+      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
+        return "i686"; // FIXME: ???
+
+      case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+               // 17h. All processors are manufactured using the 45 nm process.
+               //
+               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+        return "penryn";
+
+      case 26: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 45 nm process.
+      case 29: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+      case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
+               // As found in a Summer 2010 model iMac.
+      case 37: // Intel Core i7, laptop version.
+        return "corei7";
+
+      // SandyBridge:
+      case 42: // Intel Core i7 processor. All processors are manufactured
+               // using the 32 nm process.
+      case 44: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 32 nm process.
+      case 45:
+        return "corei7-avx";
+
+      case 28: // Intel Atom processor. All processors are manufactured using
+               // the 45 nm process
+        return "atom";
+
+      default: return "i686";
+      }
+    case 15: {
+      switch (Model) {
+      case  0: // Pentium 4 processor, Intel Xeon processor. All processors are
+               // model 00h and manufactured using the 0.18 micron process.
+      case  1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+               // processor MP, and Intel Celeron processor. All processors are
+               // model 01h and manufactured using the 0.18 micron process.
+      case  2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
+               // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+               // processor, and Mobile Intel Celeron processor. All processors
+               // are model 02h and manufactured using the 0.13 micron process.
+        return (Em64T) ? "x86-64" : "pentium4";
+
+      case  3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+               // processor. All processors are model 03h and manufactured using
+               // the 90 nm process.
+      case  4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+               // Pentium D processor, Intel Xeon processor, Intel Xeon
+               // processor MP, Intel Celeron D processor. All processors are
+               // model 04h and manufactured using the 90 nm process.
+      case  6: // Pentium 4 processor, Pentium D processor, Pentium processor
+               // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+               // MP, Intel Celeron D processor. All processors are model 06h
+               // and manufactured using the 65 nm process.
+        return (Em64T) ? "nocona" : "prescott";
+
+      default:
+        return (Em64T) ? "x86-64" : "pentium4";
+      }
+    }
+
+    default:
+      return "generic";
+    }
+  } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+    // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
+    // appears to be no way to generate the wide variety of AMD-specific targets
+    // from the information returned from CPUID.
+    switch (Family) {
+      case 4:
+        return "i486";
+      case 5:
+        switch (Model) {
+        case 6:
+        case 7:  return "k6";
+        case 8:  return "k6-2";
+        case 9:
+        case 13: return "k6-3";
+        default: return "pentium";
+        }
+      case 6:
+        switch (Model) {
+        case 4:  return "athlon-tbird";
+        case 6:
+        case 7:
+        case 8:  return "athlon-mp";
+        case 10: return "athlon-xp";
+        default: return "athlon";
+        }
+      case 15:
+        if (HasSSE3)
+          return "k8-sse3";
+        switch (Model) {
+        case 1:  return "opteron";
+        case 5:  return "athlon-fx"; // also opteron
+        default: return "athlon64";
+        }
+      case 16:
+        return "amdfam10";
+    default:
+      return "generic";
+    }
+  }
+  return "generic";
+}
+#else
+std::string sys::getHostCPUName() {
+  return "generic";
+}
+#endif
+
+bool sys::getHostCPUFeatures(StringMap<bool> &Features){
+  return false;
+}
diff --git a/contrib/llvm/lib/Support/IncludeFile.cpp b/contrib/llvm/lib/Support/IncludeFile.cpp
new file mode 100644
index 000000000000..5da88261ce53
--- /dev/null
+++ b/contrib/llvm/lib/Support/IncludeFile.cpp
@@ -0,0 +1,20 @@
+//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IncludeFile constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IncludeFile.h"
+
+using namespace llvm;
+
+// This constructor is used to ensure linking of other modules. See the
+// llvm/Support/IncludeFile.h header for details.
+IncludeFile::IncludeFile(const void*) {}
diff --git a/contrib/llvm/lib/Support/IntEqClasses.cpp b/contrib/llvm/lib/Support/IntEqClasses.cpp
new file mode 100644
index 000000000000..11344956e4c9
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntEqClasses.cpp
@@ -0,0 +1,70 @@
+//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+
+using namespace llvm;
+
+void IntEqClasses::grow(unsigned N) {
+  assert(NumClasses == 0 && "grow() called after compress().");
+  EC.reserve(N);
+  while (EC.size() < N)
+    EC.push_back(EC.size());
+}
+
+void IntEqClasses::join(unsigned a, unsigned b) {
+  assert(NumClasses == 0 && "join() called after compress().");
+  unsigned eca = EC[a];
+  unsigned ecb = EC[b];
+  // Update pointers while searching for the leaders, compressing the paths
+  // incrementally. The larger leader will eventually be updated, joining the
+  // classes.
+  while (eca != ecb)
+    if (eca < ecb)
+      EC[b] = eca, b = ecb, ecb = EC[b];
+    else
+      EC[a] = ecb, a = eca, eca = EC[a];
+}
+
+unsigned IntEqClasses::findLeader(unsigned a) const {
+  assert(NumClasses == 0 && "findLeader() called after compress().");
+  while (a != EC[a])
+    a = EC[a];
+  return a;
+}
+
+void IntEqClasses::compress() {
+  if (NumClasses)
+    return;
+  for (unsigned i = 0, e = EC.size(); i != e; ++i)
+    EC[i] = (EC[i] == i) ? NumClasses++ : EC[EC[i]];
+}
+
+void IntEqClasses::uncompress() {
+  if (!NumClasses)
+    return;
+  SmallVector<unsigned, 8> Leader;
+  for (unsigned i = 0, e = EC.size(); i != e; ++i)
+    if (EC[i] < Leader.size())
+      EC[i] = Leader[EC[i]];
+    else
+      Leader.push_back(EC[i] = i);
+  NumClasses = 0;
+}
diff --git a/contrib/llvm/lib/Support/IntervalMap.cpp b/contrib/llvm/lib/Support/IntervalMap.cpp
new file mode 100644
index 000000000000..4dfcc404ca42
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntervalMap.cpp
@@ -0,0 +1,161 @@
+//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the few non-templated functions in IntervalMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+
+namespace llvm {
+namespace IntervalMapImpl {
+
+void Path::replaceRoot(void *Root, unsigned Size, IdxPair Offsets) {
+  assert(!path.empty() && "Can't replace missing root");
+  path.front() = Entry(Root, Size, Offsets.first);
+  path.insert(path.begin() + 1, Entry(subtree(0), Offsets.second));
+}
+
+NodeRef Path::getLeftSibling(unsigned Level) const {
+  // The root has no siblings.
+  if (Level == 0)
+    return NodeRef();
+
+  // Go up the tree until we can go left.
+  unsigned l = Level - 1;
+  while (l && path[l].offset == 0)
+    --l;
+
+  // We can't go left.
+  if (path[l].offset == 0)
+    return NodeRef();
+
+  // NR is the subtree containing our left sibling.
+  NodeRef NR = path[l].subtree(path[l].offset - 1);
+
+  // Keep right all the way down.
+  for (++l; l != Level; ++l)
+    NR = NR.subtree(NR.size() - 1);
+  return NR;
+}
+
+void Path::moveLeft(unsigned Level) {
+  assert(Level != 0 && "Cannot move the root node");
+
+  // Go up the tree until we can go left.
+  unsigned l = 0;
+  if (valid()) {
+    l = Level - 1;
+    while (path[l].offset == 0) {
+      assert(l != 0 && "Cannot move beyond begin()");
+      --l;
+    }
+  } else if (height() < Level)
+    // end() may have created a height=0 path.
+    path.resize(Level + 1, Entry(0, 0, 0));
+
+  // NR is the subtree containing our left sibling.
+  --path[l].offset;
+  NodeRef NR = subtree(l);
+
+  // Get the rightmost node in the subtree.
+  for (++l; l != Level; ++l) {
+    path[l] = Entry(NR, NR.size() - 1);
+    NR = NR.subtree(NR.size() - 1);
+  }
+  path[l] = Entry(NR, NR.size() - 1);
+}
+
+NodeRef Path::getRightSibling(unsigned Level) const {
+  // The root has no siblings.
+  if (Level == 0)
+    return NodeRef();
+
+  // Go up the tree until we can go right.
+  unsigned l = Level - 1;
+  while (l && atLastEntry(l))
+    --l;
+
+  // We can't go right.
+  if (atLastEntry(l))
+    return NodeRef();
+
+  // NR is the subtree containing our right sibling.
+  NodeRef NR = path[l].subtree(path[l].offset + 1);
+
+  // Keep left all the way down.
+  for (++l; l != Level; ++l)
+    NR = NR.subtree(0);
+  return NR;
+}
+
+void Path::moveRight(unsigned Level) {
+  assert(Level != 0 && "Cannot move the root node");
+
+  // Go up the tree until we can go right.
+  unsigned l = Level - 1;
+  while (l && atLastEntry(l))
+    --l;
+
+  // NR is the subtree containing our right sibling. If we hit end(), we have
+  // offset(0) == node(0).size().
+  if (++path[l].offset == path[l].size)
+    return;
+  NodeRef NR = subtree(l);
+
+  for (++l; l != Level; ++l) {
+    path[l] = Entry(NR, 0);
+    NR = NR.subtree(0);
+  }
+  path[l] = Entry(NR, 0);
+}
+
+
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+                   const unsigned *CurSize, unsigned NewSize[],
+                   unsigned Position, bool Grow) {
+  assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements");
+  assert(Position <= Elements && "Invalid position");
+  if (!Nodes)
+    return IdxPair();
+
+  // Trivial algorithm: left-leaning even distribution.
+  const unsigned PerNode = (Elements + Grow) / Nodes;
+  const unsigned Extra = (Elements + Grow) % Nodes;
+  IdxPair PosPair = IdxPair(Nodes, 0);
+  unsigned Sum = 0;
+  for (unsigned n = 0; n != Nodes; ++n) {
+    Sum += NewSize[n] = PerNode + (n < Extra);
+    if (PosPair.first == Nodes && Sum > Position)
+      PosPair = IdxPair(n, Position - (Sum - NewSize[n]));
+  }
+  assert(Sum == Elements + Grow && "Bad distribution sum");
+
+  // Subtract the Grow element that was added.
+  if (Grow) {
+    assert(PosPair.first < Nodes && "Bad algebra");
+    assert(NewSize[PosPair.first] && "Too few elements to need Grow");
+    --NewSize[PosPair.first];
+  }
+
+#ifndef NDEBUG
+  Sum = 0;
+  for (unsigned n = 0; n != Nodes; ++n) {
+    assert(NewSize[n] <= Capacity && "Overallocated node");
+    Sum += NewSize[n];
+  }
+  assert(Sum == Elements && "Bad distribution sum");
+#endif
+
+  return PosPair;
+}
+
+} // namespace IntervalMapImpl
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/Support/IsInf.cpp b/contrib/llvm/lib/Support/IsInf.cpp
new file mode 100644
index 000000000000..d6da0c99e8d8
--- /dev/null
+++ b/contrib/llvm/lib/Support/IsInf.cpp
@@ -0,0 +1,49 @@
+//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isinf()
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISINF_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISINF_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISINF_IN_CMATH
+# include <cmath>
+using std::isinf;
+#elif HAVE_FINITE_IN_IEEEFP_H
+// A handy workaround I found at http://www.unixguide.net/sun/faq ...
+// apparently this has been a problem with Solaris for years.
+# include <ieeefp.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isinf(X) (!_finite(X))
+#elif defined(_AIX) && defined(__GNUC__)
+// GCC's fixincludes seems to be removing the isinf() declaration from the
+// system header /usr/include/math.h
+# include <math.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(__hpux)
+// HP-UX is "special"
+#include <math.h>
+static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); }
+#else
+# error "Don't know how to get isinf()"
+#endif
+
+namespace llvm {
+
+int IsInf(float f)  { return isinf(f); }
+int IsInf(double d) { return isinf(d); }
+
+} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/IsNAN.cpp b/contrib/llvm/lib/Support/IsNAN.cpp
new file mode 100644
index 000000000000..bdfdfbf3155d
--- /dev/null
+++ b/contrib/llvm/lib/Support/IsNAN.cpp
@@ -0,0 +1,33 @@
+//===-- IsNAN.cpp ---------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isnan().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISNAN_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISNAN_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISNAN_IN_CMATH
+# include <cmath>
+using std::isnan;
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isnan _isnan
+#else
+# error "Don't know how to get isnan()"
+#endif
+
+namespace llvm {
+  int IsNAN(float f)  { return isnan(f); }
+  int IsNAN(double d) { return isnan(d); }
+} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/ManagedStatic.cpp b/contrib/llvm/lib/Support/ManagedStatic.cpp
new file mode 100644
index 000000000000..c767c15e71c9
--- /dev/null
+++ b/contrib/llvm/lib/Support/ManagedStatic.cpp
@@ -0,0 +1,75 @@
+//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ManagedStatic class and llvm_shutdown().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Atomic.h"
+#include <cassert>
+using namespace llvm;
+
+static const ManagedStaticBase *StaticList = 0;
+
+void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
+                                              void (*Deleter)(void*)) const {
+  if (llvm_is_multithreaded()) {
+    llvm_acquire_global_lock();
+
+    if (Ptr == 0) {
+      void* tmp = Creator ? Creator() : 0;
+
+      sys::MemoryFence();
+      Ptr = tmp;
+      DeleterFn = Deleter;
+      
+      // Add to list of managed statics.
+      Next = StaticList;
+      StaticList = this;
+    }
+
+    llvm_release_global_lock();
+  } else {
+    assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
+           "Partially initialized ManagedStatic!?");
+    Ptr = Creator ? Creator() : 0;
+    DeleterFn = Deleter;
+  
+    // Add to list of managed statics.
+    Next = StaticList;
+    StaticList = this;
+  }
+}
+
+void ManagedStaticBase::destroy() const {
+  assert(DeleterFn && "ManagedStatic not initialized correctly!");
+  assert(StaticList == this &&
+         "Not destroyed in reverse order of construction?");
+  // Unlink from list.
+  StaticList = Next;
+  Next = 0;
+
+  // Destroy memory.
+  DeleterFn(Ptr);
+  
+  // Cleanup.
+  Ptr = 0;
+  DeleterFn = 0;
+}
+
+/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
+void llvm::llvm_shutdown() {
+  while (StaticList)
+    StaticList->destroy();
+
+  if (llvm_is_multithreaded()) llvm_stop_multithreaded();
+}
+
diff --git a/contrib/llvm/lib/Support/Memory.cpp b/contrib/llvm/lib/Support/Memory.cpp
new file mode 100644
index 000000000000..ac7af0ae8bdf
--- /dev/null
+++ b/contrib/llvm/lib/Support/Memory.cpp
@@ -0,0 +1,74 @@
+//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for allocating memory and dealing
+// with memory mapped files
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Memory.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Memory.inc"
+#endif
+
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
+                                                   size_t Len) {
+
+// icache invalidation for PPC and ARM.
+#if defined(__APPLE__)
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+     defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
+  sys_icache_invalidate(Addr, Len);
+#  endif
+
+#else
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+       defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+  const size_t LineSize = 32;
+
+  const intptr_t Mask = ~(LineSize - 1);
+  const intptr_t StartLine = ((intptr_t) Addr) & Mask;
+  const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("dcbf 0, %0" : : "r"(Line));
+  asm volatile("sync");
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("icbi 0, %0" : : "r"(Line));
+  asm volatile("isync");
+#  elif defined(__arm__) && defined(__GNUC__) && !defined(__FreeBSD__)
+  // FIXME: Can we safely always call this for __GNUC__ everywhere?
+  char *Start = (char*) Addr;
+  char *End = Start + Len;
+  __clear_cache(Start, End);
+#  endif
+
+#endif  // end apple
+
+  ValgrindDiscardTranslations(Addr, Len);
+}
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
new file mode 100644
index 000000000000..d264be9aced1
--- /dev/null
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -0,0 +1,376 @@
+//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the MemoryBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <new>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#include <sys/uio.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+namespace { const llvm::error_code success; }
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer implementation itself.
+//===----------------------------------------------------------------------===//
+
+MemoryBuffer::~MemoryBuffer() { }
+
+/// init - Initialize this MemoryBuffer as a reference to externally allocated
+/// memory, memory that we know is already null terminated.
+void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
+                        bool RequiresNullTerminator) {
+  assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
+         "Buffer is not null terminated!");
+  BufferStart = BufStart;
+  BufferEnd = BufEnd;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBufferMem implementation.
+//===----------------------------------------------------------------------===//
+
+/// CopyStringRef - Copies contents of a StringRef into a block of memory and
+/// null-terminates it.
+static void CopyStringRef(char *Memory, StringRef Data) {
+  memcpy(Memory, Data.data(), Data.size());
+  Memory[Data.size()] = 0; // Null terminate string.
+}
+
+/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
+template <typename T>
+static T *GetNamedBuffer(StringRef Buffer, StringRef Name,
+                         bool RequiresNullTerminator) {
+  char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
+  CopyStringRef(Mem + sizeof(T), Name);
+  return new (Mem) T(Buffer, RequiresNullTerminator);
+}
+
+namespace {
+/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
+class MemoryBufferMem : public MemoryBuffer {
+public:
+  MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
+    init(InputData.begin(), InputData.end(), RequiresNullTerminator);
+  }
+
+  virtual const char *getBufferIdentifier() const {
+     // The name is stored after the class itself.
+    return reinterpret_cast<const char*>(this + 1);
+  }
+  
+  virtual BufferKind getBufferKind() const {
+    return MemoryBuffer_Malloc;
+  }
+};
+}
+
+/// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
+/// that InputData must be a null terminated if RequiresNullTerminator is true!
+MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
+                                         StringRef BufferName,
+                                         bool RequiresNullTerminator) {
+  return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName,
+                                         RequiresNullTerminator);
+}
+
+/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
+/// copying the contents and taking ownership of it.  This has no requirements
+/// on EndPtr[0].
+MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
+                                             StringRef BufferName) {
+  MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
+  if (!Buf) return 0;
+  memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
+         InputData.size());
+  return Buf;
+}
+
+/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
+/// that is not initialized.  Note that the caller should initialize the
+/// memory allocated by this method.  The memory is owned by the MemoryBuffer
+/// object.
+MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
+                                                  StringRef BufferName) {
+  // Allocate space for the MemoryBuffer, the data and the name. It is important
+  // that MemoryBuffer and data are aligned so PointerIntPair works with them.
+  size_t AlignedStringLen =
+    RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1,
+                       sizeof(void*)); // TODO: Is sizeof(void*) enough?
+  size_t RealLen = AlignedStringLen + Size + 1;
+  char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
+  if (!Mem) return 0;
+
+  // The name is stored after the class itself.
+  CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
+
+  // The buffer begins after the name and must be aligned.
+  char *Buf = Mem + AlignedStringLen;
+  Buf[Size] = 0; // Null terminate buffer.
+
+  return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
+}
+
+/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
+/// is completely initialized to zeros.  Note that the caller should
+/// initialize the memory allocated by this method.  The memory is owned by
+/// the MemoryBuffer object.
+MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
+  MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
+  if (!SB) return 0;
+  memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
+  return SB;
+}
+
+
+/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
+/// if the Filename is "-".  If an error occurs, this returns null and fills
+/// in *ErrStr with a reason.  If stdin is empty, this API (unlike getSTDIN)
+/// returns an empty buffer.
+error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
+                                        OwningPtr<MemoryBuffer> &result,
+                                        int64_t FileSize) {
+  if (Filename == "-")
+    return getSTDIN(result);
+  return getFile(Filename, result, FileSize);
+}
+
+error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
+                                        OwningPtr<MemoryBuffer> &result,
+                                        int64_t FileSize) {
+  if (strcmp(Filename, "-") == 0)
+    return getSTDIN(result);
+  return getFile(Filename, result, FileSize);
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getFile implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MemoryBufferMMapFile - This represents a file that was mapped in with the
+/// sys::Path::MapInFilePages method.  When destroyed, it calls the
+/// sys::Path::UnMapFilePages method.
+class MemoryBufferMMapFile : public MemoryBufferMem {
+public:
+  MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator)
+    : MemoryBufferMem(Buffer, RequiresNullTerminator) { }
+
+  ~MemoryBufferMMapFile() {
+    static int PageSize = sys::Process::GetPageSize();
+
+    uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart());
+    size_t Size = getBufferSize();
+    uintptr_t RealStart = Start & ~(PageSize - 1);
+    size_t RealSize = Size + (Start - RealStart);
+
+    sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart),
+                              RealSize);
+  }
+  
+  virtual BufferKind getBufferKind() const {
+    return MemoryBuffer_MMap;
+  }
+};
+}
+
+error_code MemoryBuffer::getFile(StringRef Filename,
+                                 OwningPtr<MemoryBuffer> &result,
+                                 int64_t FileSize,
+                                 bool RequiresNullTerminator) {
+  // Ensure the path is null terminated.
+  SmallString<256> PathBuf(Filename.begin(), Filename.end());
+  return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize,
+                               RequiresNullTerminator);
+}
+
+error_code MemoryBuffer::getFile(const char *Filename,
+                                 OwningPtr<MemoryBuffer> &result,
+                                 int64_t FileSize,
+                                 bool RequiresNullTerminator) {
+  int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+  OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
+#endif
+  int FD = ::open(Filename, OpenFlags);
+  if (FD == -1)
+    return error_code(errno, posix_category());
+
+  error_code ret = getOpenFile(FD, Filename, result, FileSize, FileSize,
+                               0, RequiresNullTerminator);
+  close(FD);
+  return ret;
+}
+
+static bool shouldUseMmap(int FD,
+                          size_t FileSize,
+                          size_t MapSize,
+                          off_t Offset,
+                          bool RequiresNullTerminator,
+                          int PageSize) {
+  // We don't use mmap for small files because this can severely fragment our
+  // address space.
+  if (MapSize < 4096*4)
+    return false;
+
+  if (!RequiresNullTerminator)
+    return true;
+
+
+  // If we don't know the file size, use fstat to find out.  fstat on an open
+  // file descriptor is cheaper than stat on a random path.
+  // FIXME: this chunk of code is duplicated, but it avoids a fstat when
+  // RequiresNullTerminator = false and MapSize != -1.
+  if (FileSize == size_t(-1)) {
+    struct stat FileInfo;
+    // TODO: This should use fstat64 when available.
+    if (fstat(FD, &FileInfo) == -1) {
+      return error_code(errno, posix_category());
+    }
+    FileSize = FileInfo.st_size;
+  }
+
+  // If we need a null terminator and the end of the map is inside the file,
+  // we cannot use mmap.
+  size_t End = Offset + MapSize;
+  assert(End <= FileSize);
+  if (End != FileSize)
+    return false;
+
+  // Don't try to map files that are exactly a multiple of the system page size
+  // if we need a null terminator.
+  if ((FileSize & (PageSize -1)) == 0)
+    return false;
+
+  return true;
+}
+
+error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
+                                     OwningPtr<MemoryBuffer> &result,
+                                     size_t FileSize, size_t MapSize,
+                                     off_t Offset,
+                                     bool RequiresNullTerminator) {
+  static int PageSize = sys::Process::GetPageSize();
+
+  // Default is to map the full file.
+  if (MapSize == size_t(-1)) {
+    // If we don't know the file size, use fstat to find out.  fstat on an open
+    // file descriptor is cheaper than stat on a random path.
+    if (FileSize == size_t(-1)) {
+      struct stat FileInfo;
+      // TODO: This should use fstat64 when available.
+      if (fstat(FD, &FileInfo) == -1) {
+        return error_code(errno, posix_category());
+      }
+      FileSize = FileInfo.st_size;
+    }
+    MapSize = FileSize;
+  }
+
+  if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
+                    PageSize)) {
+    off_t RealMapOffset = Offset & ~(PageSize - 1);
+    off_t Delta = Offset - RealMapOffset;
+    size_t RealMapSize = MapSize + Delta;
+
+    if (const char *Pages = sys::Path::MapInFilePages(FD,
+                                                      RealMapSize,
+                                                      RealMapOffset)) {
+      result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
+          StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
+      return success;
+    }
+  }
+
+  MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
+  if (!Buf) {
+    // Failed to create a buffer. The only way it can fail is if
+    // new(std::nothrow) returns 0.
+    return make_error_code(errc::not_enough_memory);
+  }
+
+  OwningPtr<MemoryBuffer> SB(Buf);
+  char *BufPtr = const_cast<char*>(SB->getBufferStart());
+
+  size_t BytesLeft = MapSize;
+  if (lseek(FD, Offset, SEEK_SET) == -1)
+    return error_code(errno, posix_category());
+
+  while (BytesLeft) {
+    ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+    if (NumRead == -1) {
+      if (errno == EINTR)
+        continue;
+      // Error while reading.
+      return error_code(errno, posix_category());
+    } else if (NumRead == 0) {
+      // We hit EOF early, truncate and terminate buffer.
+      Buf->BufferEnd = BufPtr;
+      *BufPtr = 0;
+      result.swap(SB);
+      return success;
+    }
+    BytesLeft -= NumRead;
+    BufPtr += NumRead;
+  }
+
+  result.swap(SB);
+  return success;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getSTDIN implementation.
+//===----------------------------------------------------------------------===//
+
+error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
+  // Read in all of the data from stdin, we cannot mmap stdin.
+  //
+  // FIXME: That isn't necessarily true, we should try to mmap stdin and
+  // fallback if it fails.
+  sys::Program::ChangeStdinToBinary();
+
+  const ssize_t ChunkSize = 4096*4;
+  SmallString<ChunkSize> Buffer;
+  ssize_t ReadBytes;
+  // Read into Buffer until we hit EOF.
+  do {
+    Buffer.reserve(Buffer.size() + ChunkSize);
+    ReadBytes = read(0, Buffer.end(), ChunkSize);
+    if (ReadBytes == -1) {
+      if (errno == EINTR) continue;
+      return error_code(errno, posix_category());
+    }
+    Buffer.set_size(Buffer.size() + ReadBytes);
+  } while (ReadBytes != 0);
+
+  result.reset(getMemBufferCopy(Buffer, "<stdin>"));
+  return success;
+}
diff --git a/contrib/llvm/lib/Support/MemoryObject.cpp b/contrib/llvm/lib/Support/MemoryObject.cpp
new file mode 100644
index 000000000000..91e3ecd23a2e
--- /dev/null
+++ b/contrib/llvm/lib/Support/MemoryObject.cpp
@@ -0,0 +1,34 @@
+//===- MemoryObject.cpp - Abstract memory interface -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryObject.h"
+using namespace llvm;
+  
+MemoryObject::~MemoryObject() {
+}
+
+int MemoryObject::readBytes(uint64_t address,
+                            uint64_t size,
+                            uint8_t* buf,
+                            uint64_t* copied) const {
+  uint64_t current = address;
+  uint64_t limit = getBase() + getExtent();
+  
+  while (current - address < size && current < limit) {
+    if (readByte(current, &buf[(current - address)]))
+      return -1;
+    
+    current++;
+  }
+  
+  if (copied)
+    *copied = current - address;
+  
+  return 0;
+}
diff --git a/contrib/llvm/lib/Support/Mutex.cpp b/contrib/llvm/lib/Support/Mutex.cpp
new file mode 100644
index 000000000000..b408973bbad1
--- /dev/null
+++ b/contrib/llvm/lib/Support/Mutex.cpp
@@ -0,0 +1,157 @@
+//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_mutex_init does have an address, then mutex support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_mutex_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a Mutex using pthread calls
+MutexImpl::MutexImpl( bool recursive)
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_mutex data structures
+    pthread_mutex_t* mutex =
+      static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+    pthread_mutexattr_t attr;
+
+    // Initialize the mutex attributes
+    int errorcode = pthread_mutexattr_init(&attr);
+    assert(errorcode == 0);
+
+    // Initialize the mutex as a recursive mutex, if requested, or normal
+    // otherwise.
+    int kind = ( recursive  ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+    errorcode = pthread_mutexattr_settype(&attr, kind);
+    assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+    // Make it a process local mutex
+    errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+    assert(errorcode == 0);
+#endif
+
+    // Initialize the mutex
+    errorcode = pthread_mutex_init(mutex, &attr);
+    assert(errorcode == 0);
+
+    // Destroy the attributes
+    errorcode = pthread_mutexattr_destroy(&attr);
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = mutex;
+  }
+}
+
+// Destruct a Mutex
+MutexImpl::~MutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+    pthread_mutex_destroy(mutex);
+    free(mutex);
+  }
+}
+
+bool
+MutexImpl::acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_lock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+MutexImpl::release()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_unlock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_trylock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/Mutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/Mutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
new file mode 100644
index 000000000000..8fbaf2d42bf9
--- /dev/null
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -0,0 +1,303 @@
+//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Path concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Path.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Endian.h"
+#include <cassert>
+#include <cstring>
+#include <ostream>
+using namespace llvm;
+using namespace sys;
+namespace {
+using support::ulittle32_t;
+}
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+bool Path::operator==(const Path &that) const {
+  return path == that.path;
+}
+
+bool Path::operator<(const Path& that) const {
+  return path < that.path;
+}
+
+Path
+Path::GetLLVMConfigDir() {
+  Path result;
+#ifdef LLVM_ETCDIR
+  if (result.set(LLVM_ETCDIR))
+    return result;
+#endif
+  return GetLLVMDefaultConfigDir();
+}
+
+LLVMFileType
+sys::IdentifyFileType(const char *magic, unsigned length) {
+  assert(magic && "Invalid magic number string");
+  assert(length >=4 && "Invalid magic number length");
+  switch ((unsigned char)magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return Bitcode_FileType;
+      break;
+    case 'B':
+      if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+        return Bitcode_FileType;
+      break;
+    case '!':
+      if (length >= 8)
+        if (memcmp(magic,"!<arch>\n",8) == 0)
+          return Archive_FileType;
+      break;
+
+    case '\177':
+      if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+        if (length >= 18 && magic[17] == 0)
+          switch (magic[16]) {
+            default: break;
+            case 1: return ELF_Relocatable_FileType;
+            case 2: return ELF_Executable_FileType;
+            case 3: return ELF_SharedObject_FileType;
+            case 4: return ELF_Core_FileType;
+          }
+      }
+      break;
+
+    case 0xCA:
+      if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+          magic[3] == char(0xBE)) {
+        // This is complicated by an overlap with Java class files.
+        // See the Mach-O section in /usr/share/file/magic for details.
+        if (length >= 8 && magic[7] < 43)
+          // FIXME: Universal Binary of any type.
+          return Mach_O_DynamicallyLinkedSharedLib_FileType;
+      }
+      break;
+
+      // The two magic numbers for mach-o are:
+      // 0xfeedface - 32-bit mach-o
+      // 0xfeedfacf - 64-bit mach-o
+    case 0xFE:
+    case 0xCE:
+    case 0xCF: {
+      uint16_t type = 0;
+      if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+          magic[2] == char(0xFA) && 
+          (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+        /* Native endian */
+        if (length >= 16) type = magic[14] << 8 | magic[15];
+      } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+                 magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+                 magic[3] == char(0xFE)) {
+        /* Reverse endian */
+        if (length >= 14) type = magic[13] << 8 | magic[12];
+      }
+      switch (type) {
+        default: break;
+        case 1: return Mach_O_Object_FileType;
+        case 2: return Mach_O_Executable_FileType;
+        case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
+        case 4: return Mach_O_Core_FileType;
+        case 5: return Mach_O_PreloadExecutable_FileType;
+        case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
+        case 7: return Mach_O_DynamicLinker_FileType;
+        case 8: return Mach_O_Bundle_FileType;
+        case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
+        case 10: break; // FIXME: MH_DSYM companion file with only debug.
+      }
+      break;
+    }
+    case 0xF0: // PowerPC Windows
+    case 0x83: // Alpha 32-bit
+    case 0x84: // Alpha 64-bit
+    case 0x66: // MPS R4000 Windows
+    case 0x50: // mc68K
+    case 0x4c: // 80386 Windows
+      if (magic[1] == 0x01)
+        return COFF_FileType;
+
+    case 0x90: // PA-RISC Windows
+    case 0x68: // mc68K Windows
+      if (magic[1] == 0x02)
+        return COFF_FileType;
+      break;
+
+    case 0x4d: // Possible MS-DOS stub on Windows PE file
+      if (magic[1] == 0x5a) {
+        uint32_t off = *reinterpret_cast<const ulittle32_t *>(magic + 0x3c);
+        // PE/COFF file, either EXE or DLL.
+        if (off < length && memcmp(magic + off, "PE\0\0",4) == 0)
+          return COFF_FileType;
+      }
+      break;
+
+    case 0x64: // x86-64 Windows.
+      if (magic[1] == char(0x86))
+        return COFF_FileType;
+      break;
+
+    default:
+      break;
+  }
+  return Unknown_FileType;
+}
+
+bool
+Path::isArchive() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  return type == Archive_FileType;
+}
+
+bool
+Path::isDynamicLibrary() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  switch (type) {
+    default: return false;
+    case Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+    case ELF_SharedObject_FileType:
+    case COFF_FileType:  return true;
+  }
+}
+
+bool
+Path::isObjectFile() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+    return false;
+  return true;
+}
+
+Path
+Path::FindLibrary(std::string& name) {
+  std::vector<sys::Path> LibPaths;
+  GetSystemLibraryPaths(LibPaths);
+  for (unsigned i = 0; i < LibPaths.size(); ++i) {
+    sys::Path FullPath(LibPaths[i]);
+    FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT);
+    if (FullPath.isDynamicLibrary())
+      return FullPath;
+    FullPath.eraseSuffix();
+    FullPath.appendSuffix("a");
+    if (FullPath.isArchive())
+      return FullPath;
+  }
+  return sys::Path();
+}
+
+StringRef Path::GetDLLSuffix() {
+  return &(LTDL_SHLIB_EXT[1]);
+}
+
+void
+Path::appendSuffix(StringRef suffix) {
+  if (!suffix.empty()) {
+    path.append(".");
+    path.append(suffix);
+  }
+}
+
+bool
+Path::isBitcodeFile() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  return type == Bitcode_FileType;
+}
+
+bool Path::hasMagicNumber(StringRef Magic) const {
+  std::string actualMagic;
+  if (getMagicNumber(actualMagic, static_cast<unsigned>(Magic.size())))
+    return Magic == actualMagic;
+  return false;
+}
+
+static void getPathList(const char*path, std::vector<Path>& Paths) {
+  const char* at = path;
+  const char* delim = strchr(at, PathSeparator);
+  Path tmpPath;
+  while (delim != 0) {
+    std::string tmp(at, size_t(delim-at));
+    if (tmpPath.set(tmp))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+    at = delim + 1;
+    delim = strchr(at, PathSeparator);
+  }
+
+  if (*at != 0)
+    if (tmpPath.set(std::string(at)))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+}
+
+static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
+  assert(Sep[0] != '\0' && Sep[1] == '\0' &&
+         "Sep must be a 1-character string literal.");
+  if (path.empty())
+    return ".";
+
+  // If the path is all slashes, return a single slash.
+  // Otherwise, remove all trailing slashes.
+
+  signed pos = static_cast<signed>(path.size()) - 1;
+
+  while (pos >= 0 && path[pos] == Sep[0])
+    --pos;
+
+  if (pos < 0)
+    return path[0] == Sep[0] ? Sep : ".";
+
+  // Any slashes left?
+  signed i = 0;
+
+  while (i < pos && path[i] != Sep[0])
+    ++i;
+
+  if (i == pos) // No slashes?  Return "."
+    return ".";
+
+  // There is at least one slash left.  Remove all trailing non-slashes.
+  while (pos >= 0 && path[pos] != Sep[0])
+    --pos;
+
+  // Remove any trailing slashes.
+  while (pos >= 0 && path[pos] == Sep[0])
+    --pos;
+
+  if (pos < 0)
+    return path[0] == Sep[0] ? Sep : ".";
+
+  return path.substr(0, pos+1);
+}
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Path.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/Path.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp
new file mode 100644
index 000000000000..896c94c071bc
--- /dev/null
+++ b/contrib/llvm/lib/Support/PathV2.cpp
@@ -0,0 +1,774 @@
+//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the operating system PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+#include <cstdio>
+#include <cstring>
+
+namespace {
+  using llvm::StringRef;
+  using llvm::sys::path::is_separator;
+
+#ifdef LLVM_ON_WIN32
+  const StringRef separators = "\\/";
+  const char      prefered_separator = '\\';
+#else
+  const StringRef separators = "/";
+  const char      prefered_separator = '/';
+#endif
+
+  const llvm::error_code success;
+
+  StringRef find_first_component(StringRef path) {
+    // Look for this first component in the following order.
+    // * empty (in this case we return an empty string)
+    // * either C: or {//,\\}net.
+    // * {/,\}
+    // * {.,..}
+    // * {file,directory}name
+
+    if (path.empty())
+      return path;
+
+#ifdef LLVM_ON_WIN32
+    // C:
+    if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':')
+      return path.substr(0, 2);
+#endif
+
+    // //net
+    if ((path.size() > 2) &&
+        is_separator(path[0]) &&
+        path[0] == path[1] &&
+        !is_separator(path[2])) {
+      // Find the next directory separator.
+      size_t end = path.find_first_of(separators, 2);
+      return path.substr(0, end);
+    }
+
+    // {/,\}
+    if (is_separator(path[0]))
+      return path.substr(0, 1);
+
+    if (path.startswith(".."))
+      return path.substr(0, 2);
+
+    if (path[0] == '.')
+      return path.substr(0, 1);
+
+    // * {file,directory}name
+    size_t end = path.find_first_of(separators, 2);
+    return path.substr(0, end);
+  }
+
+  size_t filename_pos(StringRef str) {
+    if (str.size() == 2 &&
+        is_separator(str[0]) &&
+        str[0] == str[1])
+      return 0;
+
+    if (str.size() > 0 && is_separator(str[str.size() - 1]))
+      return str.size() - 1;
+
+    size_t pos = str.find_last_of(separators, str.size() - 1);
+
+#ifdef LLVM_ON_WIN32
+    if (pos == StringRef::npos)
+      pos = str.find_last_of(':', str.size() - 2);
+#endif
+
+    if (pos == StringRef::npos ||
+        (pos == 1 && is_separator(str[0])))
+      return 0;
+
+    return pos + 1;
+  }
+
+  size_t root_dir_start(StringRef str) {
+    // case "c:/"
+#ifdef LLVM_ON_WIN32
+    if (str.size() > 2 &&
+        str[1] == ':' &&
+        is_separator(str[2]))
+      return 2;
+#endif
+
+    // case "//"
+    if (str.size() == 2 &&
+        is_separator(str[0]) &&
+        str[0] == str[1])
+      return StringRef::npos;
+
+    // case "//net"
+    if (str.size() > 3 &&
+        is_separator(str[0]) &&
+        str[0] == str[1] &&
+        !is_separator(str[2])) {
+      return str.find_first_of(separators, 2);
+    }
+
+    // case "/"
+    if (str.size() > 0 && is_separator(str[0]))
+      return 0;
+
+    return StringRef::npos;
+  }
+
+  size_t parent_path_end(StringRef path) {
+    size_t end_pos = filename_pos(path);
+
+    bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
+
+    // Skip separators except for root dir.
+    size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
+
+    while(end_pos > 0 &&
+          (end_pos - 1) != root_dir_pos &&
+          is_separator(path[end_pos - 1]))
+      --end_pos;
+
+    if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
+      return StringRef::npos;
+
+    return end_pos;
+  }
+} // end unnamed namespace
+
+namespace llvm {
+namespace sys  {
+namespace path {
+
+const_iterator begin(StringRef path) {
+  const_iterator i;
+  i.Path      = path;
+  i.Component = find_first_component(path);
+  i.Position  = 0;
+  return i;
+}
+
+const_iterator end(StringRef path) {
+  const_iterator i;
+  i.Path      = path;
+  i.Position  = path.size();
+  return i;
+}
+
+const_iterator &const_iterator::operator++() {
+  assert(Position < Path.size() && "Tried to increment past end!");
+
+  // Increment Position to past the current component
+  Position += Component.size();
+
+  // Check for end.
+  if (Position == Path.size()) {
+    Component = StringRef();
+    return *this;
+  }
+
+  // Both POSIX and Windows treat paths that begin with exactly two separators
+  // specially.
+  bool was_net = Component.size() > 2 &&
+    is_separator(Component[0]) &&
+    Component[1] == Component[0] &&
+    !is_separator(Component[2]);
+
+  // Handle separators.
+  if (is_separator(Path[Position])) {
+    // Root dir.
+    if (was_net
+#ifdef LLVM_ON_WIN32
+        // c:/
+        || Component.endswith(":")
+#endif
+        ) {
+      Component = Path.substr(Position, 1);
+      return *this;
+    }
+
+    // Skip extra separators.
+    while (Position != Path.size() &&
+           is_separator(Path[Position])) {
+      ++Position;
+    }
+
+    // Treat trailing '/' as a '.'.
+    if (Position == Path.size()) {
+      --Position;
+      Component = ".";
+      return *this;
+    }
+  }
+
+  // Find next component.
+  size_t end_pos = Path.find_first_of(separators, Position);
+  Component = Path.slice(Position, end_pos);
+
+  return *this;
+}
+
+const_iterator &const_iterator::operator--() {
+  // If we're at the end and the previous char was a '/', return '.'.
+  if (Position == Path.size() &&
+      Path.size() > 1 &&
+      is_separator(Path[Position - 1])
+#ifdef LLVM_ON_WIN32
+      && Path[Position - 2] != ':'
+#endif
+      ) {
+    --Position;
+    Component = ".";
+    return *this;
+  }
+
+  // Skip separators unless it's the root directory.
+  size_t root_dir_pos = root_dir_start(Path);
+  size_t end_pos = Position;
+
+  while(end_pos > 0 &&
+        (end_pos - 1) != root_dir_pos &&
+        is_separator(Path[end_pos - 1]))
+    --end_pos;
+
+  // Find next separator.
+  size_t start_pos = filename_pos(Path.substr(0, end_pos));
+  Component = Path.slice(start_pos, end_pos);
+  Position = start_pos;
+  return *this;
+}
+
+bool const_iterator::operator==(const const_iterator &RHS) const {
+  return Path.begin() == RHS.Path.begin() &&
+         Position == RHS.Position;
+}
+
+bool const_iterator::operator!=(const const_iterator &RHS) const {
+  return !(*this == RHS);
+}
+
+ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
+  return Position - RHS.Position;
+}
+
+const StringRef root_path(StringRef path) {
+  const_iterator b = begin(path),
+                 pos = b,
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if (has_net || has_drive) {
+      if ((++pos != e) && is_separator((*pos)[0])) {
+        // {C:/,//net/}, so get the first two components.
+        return path.substr(0, b->size() + pos->size());
+      } else {
+        // just {C:,//net}, return the first component.
+        return *b;
+      }
+    }
+
+    // POSIX style root directory.
+    if (is_separator((*b)[0])) {
+      return *b;
+    }
+  }
+
+  return StringRef();
+}
+
+const StringRef root_name(StringRef path) {
+  const_iterator b = begin(path),
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if (has_net || has_drive) {
+      // just {C:,//net}, return the first component.
+      return *b;
+    }
+  }
+
+  // No path or no name.
+  return StringRef();
+}
+
+const StringRef root_directory(StringRef path) {
+  const_iterator b = begin(path),
+                 pos = b,
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if ((has_net || has_drive) &&
+        // {C:,//net}, skip to the next component.
+        (++pos != e) && is_separator((*pos)[0])) {
+      return *pos;
+    }
+
+    // POSIX style root directory.
+    if (!has_net && is_separator((*b)[0])) {
+      return *b;
+    }
+  }
+
+  // No path or no root.
+  return StringRef();
+}
+
+const StringRef relative_path(StringRef path) {
+  StringRef root = root_path(path);
+  return root.substr(root.size());
+}
+
+void append(SmallVectorImpl<char> &path, const Twine &a,
+                                         const Twine &b,
+                                         const Twine &c,
+                                         const Twine &d) {
+  SmallString<32> a_storage;
+  SmallString<32> b_storage;
+  SmallString<32> c_storage;
+  SmallString<32> d_storage;
+
+  SmallVector<StringRef, 4> components;
+  if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage));
+  if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage));
+  if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
+  if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
+
+  for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
+                                                  e = components.end();
+                                                  i != e; ++i) {
+    bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
+    bool component_has_sep = !i->empty() && is_separator((*i)[0]);
+    bool is_root_name = has_root_name(*i);
+
+    if (path_has_sep) {
+      // Strip separators from beginning of component.
+      size_t loc = i->find_first_not_of(separators);
+      StringRef c = i->substr(loc);
+
+      // Append it.
+      path.append(c.begin(), c.end());
+      continue;
+    }
+
+    if (!component_has_sep && !(path.empty() || is_root_name)) {
+      // Add a separator.
+      path.push_back(prefered_separator);
+    }
+
+    path.append(i->begin(), i->end());
+  }
+}
+
+void append(SmallVectorImpl<char> &path,
+            const_iterator begin, const_iterator end) {
+  for (; begin != end; ++begin)
+    path::append(path, *begin);
+}
+
+const StringRef parent_path(StringRef path) {
+  size_t end_pos = parent_path_end(path);
+  if (end_pos == StringRef::npos)
+    return StringRef();
+  else
+    return path.substr(0, end_pos);
+}
+
+void remove_filename(SmallVectorImpl<char> &path) {
+  size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
+  if (end_pos != StringRef::npos)
+    path.set_size(end_pos);
+}
+
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
+  StringRef p(path.begin(), path.size());
+  SmallString<32> ext_storage;
+  StringRef ext = extension.toStringRef(ext_storage);
+
+  // Erase existing extension.
+  size_t pos = p.find_last_of('.');
+  if (pos != StringRef::npos && pos >= filename_pos(p))
+    path.set_size(pos);
+
+  // Append '.' if needed.
+  if (ext.size() > 0 && ext[0] != '.')
+    path.push_back('.');
+
+  // Append extension.
+  path.append(ext.begin(), ext.end());
+}
+
+void native(const Twine &path, SmallVectorImpl<char> &result) {
+  // Clear result.
+  result.clear();
+#ifdef LLVM_ON_WIN32
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+  result.reserve(p.size());
+  for (StringRef::const_iterator i = p.begin(),
+                                 e = p.end();
+                                 i != e;
+                                 ++i) {
+    if (*i == '/')
+      result.push_back('\\');
+    else
+      result.push_back(*i);
+  }
+#else
+  path.toVector(result);
+#endif
+}
+
+const StringRef filename(StringRef path) {
+  return *(--end(path));
+}
+
+const StringRef stem(StringRef path) {
+  StringRef fname = filename(path);
+  size_t pos = fname.find_last_of('.');
+  if (pos == StringRef::npos)
+    return fname;
+  else
+    if ((fname.size() == 1 && fname == ".") ||
+        (fname.size() == 2 && fname == ".."))
+      return fname;
+    else
+      return fname.substr(0, pos);
+}
+
+const StringRef extension(StringRef path) {
+  StringRef fname = filename(path);
+  size_t pos = fname.find_last_of('.');
+  if (pos == StringRef::npos)
+    return StringRef();
+  else
+    if ((fname.size() == 1 && fname == ".") ||
+        (fname.size() == 2 && fname == ".."))
+      return StringRef();
+    else
+      return fname.substr(pos);
+}
+
+bool is_separator(char value) {
+  switch(value) {
+#ifdef LLVM_ON_WIN32
+    case '\\': // fall through
+#endif
+    case '/': return true;
+    default: return false;
+  }
+}
+
+bool has_root_name(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_name(p).empty();
+}
+
+bool has_root_directory(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_directory(p).empty();
+}
+
+bool has_root_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_path(p).empty();
+}
+
+bool has_relative_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !relative_path(p).empty();
+}
+
+bool has_filename(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !filename(p).empty();
+}
+
+bool has_parent_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !parent_path(p).empty();
+}
+
+bool has_stem(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !stem(p).empty();
+}
+
+bool has_extension(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !extension(p).empty();
+}
+
+bool is_absolute(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  bool rootDir = has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+       rootName = has_root_name(p);
+#else
+       rootName = true;
+#endif
+
+  return rootDir && rootName;
+}
+
+bool is_relative(const Twine &path) {
+  return !is_absolute(path);
+}
+
+} // end namespace path
+
+namespace fs {
+
+error_code make_absolute(SmallVectorImpl<char> &path) {
+  StringRef p(path.data(), path.size());
+
+  bool rootName      = path::has_root_name(p),
+       rootDirectory = path::has_root_directory(p);
+
+  // Already absolute.
+  if (rootName && rootDirectory)
+    return success;
+
+  // All of the following conditions will need the current directory.
+  SmallString<128> current_dir;
+  if (error_code ec = current_path(current_dir)) return ec;
+
+  // Relative path. Prepend the current directory.
+  if (!rootName && !rootDirectory) {
+    // Append path to the current directory.
+    path::append(current_dir, p);
+    // Set path to the result.
+    path.swap(current_dir);
+    return success;
+  }
+
+  if (!rootName && rootDirectory) {
+    StringRef cdrn = path::root_name(current_dir);
+    SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
+    path::append(curDirRootName, p);
+    // Set path to the result.
+    path.swap(curDirRootName);
+    return success;
+  }
+
+  if (rootName && !rootDirectory) {
+    StringRef pRootName      = path::root_name(p);
+    StringRef bRootDirectory = path::root_directory(current_dir);
+    StringRef bRelativePath  = path::relative_path(current_dir);
+    StringRef pRelativePath  = path::relative_path(p);
+
+    SmallString<128> res;
+    path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
+    path.swap(res);
+    return success;
+  }
+
+  llvm_unreachable("All rootName and rootDirectory combinations should have "
+                   "occurred above!");
+}
+
+error_code create_directories(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  StringRef parent = path::parent_path(p);
+  bool parent_exists;
+
+  if (error_code ec = fs::exists(parent, parent_exists)) return ec;
+
+  if (!parent_exists)
+    return create_directories(parent, existed);
+
+  return create_directory(p, existed);
+}
+
+bool exists(file_status status) {
+  return status_known(status) && status.type() != file_type::file_not_found;
+}
+
+bool status_known(file_status s) {
+  return s.type() != file_type::status_error;
+}
+
+bool is_directory(file_status status) {
+  return status.type() == file_type::directory_file;
+}
+
+error_code is_directory(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_directory(st);
+  return success;
+}
+
+bool is_regular_file(file_status status) {
+  return status.type() == file_type::regular_file;
+}
+
+error_code is_regular_file(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_regular_file(st);
+  return success;
+}
+
+bool is_symlink(file_status status) {
+  return status.type() == file_type::symlink_file;
+}
+
+error_code is_symlink(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_symlink(st);
+  return success;
+}
+
+bool is_other(file_status status) {
+  return exists(status) &&
+         !is_regular_file(status) &&
+         !is_directory(status) &&
+         !is_symlink(status);
+}
+
+void directory_entry::replace_filename(const Twine &filename, file_status st,
+                                       file_status symlink_st) {
+  SmallString<128> path(Path.begin(), Path.end());
+  path::remove_filename(path);
+  path::append(path, filename);
+  Path = path.str();
+  Status = st;
+  SymlinkStatus = symlink_st;
+}
+
+error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
+  SmallString<32>  MagicStorage;
+  StringRef Magic = magic.toStringRef(MagicStorage);
+  SmallString<32> Buffer;
+
+  if (error_code ec = get_magic(path, Magic.size(), Buffer)) {
+    if (ec == errc::value_too_large) {
+      // Magic.size() > file_size(Path).
+      result = false;
+      return success;
+    }
+    return ec;
+  }
+
+  result = Magic == Buffer;
+  return success;
+}
+
+error_code identify_magic(const Twine &path, LLVMFileType &result) {
+  SmallString<32> Magic;
+  error_code ec = get_magic(path, Magic.capacity(), Magic);
+  if (ec && ec != errc::value_too_large)
+    return ec;
+
+  result = IdentifyFileType(Magic.data(), Magic.size());
+  return success;
+}
+
+namespace {
+error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
+  if (ft == file_type::directory_file) {
+    // This code would be a lot better with exceptions ;/.
+    error_code ec;
+    for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+      if (ec) return ec;
+      file_status st;
+      if (error_code ec = i->status(st)) return ec;
+      if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec;
+    }
+    bool obviously_this_exists;
+    if (error_code ec = remove(path, obviously_this_exists)) return ec;
+    assert(obviously_this_exists);
+    ++count; // Include the directory itself in the items removed.
+  } else {
+    bool obviously_this_exists;
+    if (error_code ec = remove(path, obviously_this_exists)) return ec;
+    assert(obviously_this_exists);
+    ++count;
+  }
+
+  return success;
+}
+} // end unnamed namespace
+
+error_code remove_all(const Twine &path, uint32_t &num_removed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  file_status fs;
+  if (error_code ec = status(path, fs))
+    return ec;
+  num_removed = 0;
+  return remove_all_r(p, fs.type(), num_removed);
+}
+
+error_code directory_entry::status(file_status &result) const {
+  return fs::status(Path, result);
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+// Include the truly platform-specific parts.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/PathV2.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/PathV2.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/PluginLoader.cpp b/contrib/llvm/lib/Support/PluginLoader.cpp
new file mode 100644
index 000000000000..2924cfa38897
--- /dev/null
+++ b/contrib/llvm/lib/Support/PluginLoader.cpp
@@ -0,0 +1,47 @@
+//===-- PluginLoader.cpp - Implement -load command line option ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the -load <plugin> command line option handler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DONT_GET_PLUGIN_LOADER_OPTION
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+using namespace llvm;
+
+static ManagedStatic<std::vector<std::string> > Plugins;
+static ManagedStatic<sys::SmartMutex<true> > PluginsLock;
+
+void PluginLoader::operator=(const std::string &Filename) {
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
+  std::string Error;
+  if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) {
+    errs() << "Error opening '" << Filename << "': " << Error
+           << "\n  -load request ignored.\n";
+  } else {
+    Plugins->push_back(Filename);
+  }
+}
+
+unsigned PluginLoader::getNumPlugins() {
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
+  return Plugins.isConstructed() ? Plugins->size() : 0;
+}
+
+std::string &PluginLoader::getPlugin(unsigned num) {
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
+  assert(Plugins.isConstructed() && num < Plugins->size() &&
+         "Asking for an out of bounds plugin");
+  return (*Plugins)[num];
+}
diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
new file mode 100644
index 000000000000..082b7012eb23
--- /dev/null
+++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
@@ -0,0 +1,133 @@
+//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadLocal.h"
+#include "llvm/ADT/SmallString.h"
+
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+#include <CrashReporterClient.h>
+#endif
+
+using namespace llvm;
+
+namespace llvm {
+  bool DisablePrettyStackTrace = false;
+}
+
+// FIXME: This should be thread local when llvm supports threads.
+static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
+
+static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
+  unsigned NextID = 0;
+  if (Entry->getNextEntry())
+    NextID = PrintStack(Entry->getNextEntry(), OS);
+  OS << NextID << ".\t";
+  Entry->print(OS);
+  
+  return NextID+1;
+}
+
+/// PrintCurStackTrace - Print the current stack trace to the specified stream.
+static void PrintCurStackTrace(raw_ostream &OS) {
+  // Don't print an empty trace.
+  if (PrettyStackTraceHead.get() == 0) return;
+  
+  // If there are pretty stack frames registered, walk and emit them.
+  OS << "Stack dump:\n";
+  
+  PrintStack(PrettyStackTraceHead.get(), OS);
+  OS.flush();
+}
+
+// Integrate with crash reporter libraries.
+#if defined (__APPLE__) && HAVE_CRASHREPORTERCLIENT_H
+//  If any clients of llvm try to link to libCrashReporterClient.a themselves,
+//  only one crash info struct will be used.
+extern "C" {
+CRASH_REPORTER_CLIENT_HIDDEN 
+struct crashreporter_annotations_t gCRAnnotations 
+        __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) 
+        = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 };
+}
+#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
+static const char *__crashreporter_info__ = 0;
+asm(".desc ___crashreporter_info__, 0x10");
+#endif
+
+
+/// CrashHandler - This callback is run if a fatal signal is delivered to the
+/// process, it prints the pretty stack trace.
+static void CrashHandler(void *) {
+#ifndef __APPLE__
+  // On non-apple systems, just emit the crash stack trace to stderr.
+  PrintCurStackTrace(errs());
+#else
+  // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also
+  // put it into __crashreporter_info__.
+  SmallString<2048> TmpStr;
+  {
+    raw_svector_ostream Stream(TmpStr);
+    PrintCurStackTrace(Stream);
+  }
+  
+  if (!TmpStr.empty()) {
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+    // Cast to void to avoid warning.
+    (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+#elif HAVE_CRASHREPORTER_INFO 
+    __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
+#endif
+    errs() << TmpStr.str();
+  }
+  
+#endif
+}
+
+static bool RegisterCrashPrinter() {
+  if (!DisablePrettyStackTrace)
+    sys::AddSignalHandler(CrashHandler, 0);
+  return false;
+}
+
+PrettyStackTraceEntry::PrettyStackTraceEntry() {
+  // The first time this is called, we register the crash printer.
+  static bool HandlerRegistered = RegisterCrashPrinter();
+  (void)HandlerRegistered;
+    
+  // Link ourselves.
+  NextEntry = PrettyStackTraceHead.get();
+  PrettyStackTraceHead.set(this);
+}
+
+PrettyStackTraceEntry::~PrettyStackTraceEntry() {
+  assert(PrettyStackTraceHead.get() == this &&
+         "Pretty stack trace entry destruction is out of order");
+  PrettyStackTraceHead.set(getNextEntry());
+}
+
+void PrettyStackTraceString::print(raw_ostream &OS) const {
+  OS << Str << "\n";
+}
+
+void PrettyStackTraceProgram::print(raw_ostream &OS) const {
+  OS << "Program arguments: ";
+  // Print the argument list.
+  for (unsigned i = 0, e = ArgC; i != e; ++i)
+    OS << ArgV[i] << ' ';
+  OS << '\n';
+}
diff --git a/contrib/llvm/lib/Support/Process.cpp b/contrib/llvm/lib/Support/Process.cpp
new file mode 100644
index 000000000000..88ca7c3f220f
--- /dev/null
+++ b/contrib/llvm/lib/Support/Process.cpp
@@ -0,0 +1,33 @@
+//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Process concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Process.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Process.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Process.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/Program.cpp b/contrib/llvm/lib/Support/Program.cpp
new file mode 100644
index 000000000000..01860b082d62
--- /dev/null
+++ b/contrib/llvm/lib/Support/Program.cpp
@@ -0,0 +1,56 @@
+//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Program concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+int
+Program::ExecuteAndWait(const Path& path,
+                        const char** args,
+                        const char** envp,
+                        const Path** redirects,
+                        unsigned secondsToWait,
+                        unsigned memoryLimit,
+                        std::string* ErrMsg) {
+  Program prg;
+  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+    return prg.Wait(path, secondsToWait, ErrMsg);
+  else
+    return -1;
+}
+
+void
+Program::ExecuteNoWait(const Path& path,
+                       const char** args,
+                       const char** envp,
+                       const Path** redirects,
+                       unsigned memoryLimit,
+                       std::string* ErrMsg) {
+  Program prg;
+  prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Program.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Program.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/RWMutex.cpp b/contrib/llvm/lib/Support/RWMutex.cpp
new file mode 100644
index 000000000000..fc02f9cf7c11
--- /dev/null
+++ b/contrib/llvm/lib/Support/RWMutex.cpp
@@ -0,0 +1,157 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_rwlock_init does have an address, then rwlock support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_rwlock_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_rwlock data structures
+    pthread_rwlock_t* rwlock =
+      static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+    // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+    bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+    // Initialize the rwlock
+    int errorcode = pthread_rwlock_init(rwlock, NULL);
+    (void)errorcode;
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = rwlock;
+  }
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+    pthread_rwlock_destroy(rwlock);
+    free(rwlock);
+  }
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_rdlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_wrlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp
new file mode 100644
index 000000000000..d293da07d684
--- /dev/null
+++ b/contrib/llvm/lib/Support/Regex.cpp
@@ -0,0 +1,168 @@
+//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "regex_impl.h"
+#include <string>
+using namespace llvm;
+
+Regex::Regex(StringRef regex, unsigned Flags) {
+  unsigned flags = 0;
+  preg = new llvm_regex();
+  preg->re_endp = regex.end();
+  if (Flags & IgnoreCase) 
+    flags |= REG_ICASE;
+  if (Flags & Newline)
+    flags |= REG_NEWLINE;
+  error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+}
+
+Regex::~Regex() {
+  llvm_regfree(preg);
+  delete preg;
+}
+
+bool Regex::isValid(std::string &Error) {
+  if (!error)
+    return true;
+  
+  size_t len = llvm_regerror(error, preg, NULL, 0);
+  
+  Error.resize(len);
+  llvm_regerror(error, preg, &Error[0], len);
+  return false;
+}
+
+/// getNumMatches - In a valid regex, return the number of parenthesized
+/// matches it contains.
+unsigned Regex::getNumMatches() const {
+  return preg->re_nsub;
+}
+
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
+  unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+
+  // pmatch needs to have at least one element.
+  SmallVector<llvm_regmatch_t, 8> pm;
+  pm.resize(nmatch > 0 ? nmatch : 1);
+  pm[0].rm_so = 0;
+  pm[0].rm_eo = String.size();
+
+  int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
+
+  if (rc == REG_NOMATCH)
+    return false;
+  if (rc != 0) {
+    // regexec can fail due to invalid pattern or running out of memory.
+    error = rc;
+    return false;
+  }
+
+  // There was a match.
+
+  if (Matches) { // match position requested
+    Matches->clear();
+    
+    for (unsigned i = 0; i != nmatch; ++i) {
+      if (pm[i].rm_so == -1) {
+        // this group didn't match
+        Matches->push_back(StringRef());
+        continue;
+      }
+      assert(pm[i].rm_eo >= pm[i].rm_so);
+      Matches->push_back(StringRef(String.data()+pm[i].rm_so,
+                                   pm[i].rm_eo-pm[i].rm_so));
+    }
+  }
+
+  return true;
+}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+                       std::string *Error) {
+  SmallVector<StringRef, 8> Matches;
+
+  // Reset error, if given.
+  if (Error && !Error->empty()) *Error = "";
+
+  // Return the input if there was no match.
+  if (!match(String, &Matches))
+    return String;
+
+  // Otherwise splice in the replacement string, starting with the prefix before
+  // the match.
+  std::string Res(String.begin(), Matches[0].begin());
+
+  // Then the replacement string, honoring possible substitutions.
+  while (!Repl.empty()) {
+    // Skip to the next escape.
+    std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+    // Add the skipped substring.
+    Res += Split.first;
+
+    // Check for terminimation and trailing backslash.
+    if (Split.second.empty()) {
+      if (Repl.size() != Split.first.size() &&
+          Error && Error->empty())
+        *Error = "replacement string contained trailing backslash";
+      break;
+    }
+
+    // Otherwise update the replacement string and interpret escapes.
+    Repl = Split.second;
+
+    // FIXME: We should have a StringExtras function for mapping C99 escapes.
+    switch (Repl[0]) {
+      // Treat all unrecognized characters as self-quoting.
+    default:
+      Res += Repl[0];
+      Repl = Repl.substr(1);
+      break;
+
+      // Single character escapes.
+    case 't':
+      Res += '\t';
+      Repl = Repl.substr(1);
+      break;
+    case 'n':
+      Res += '\n';
+      Repl = Repl.substr(1);
+      break;
+
+      // Decimal escapes are backreferences.
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9': {
+      // Extract the backreference number.
+      StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+      Repl = Repl.substr(Ref.size());
+
+      unsigned RefValue;
+      if (!Ref.getAsInteger(10, RefValue) &&
+          RefValue < Matches.size())
+        Res += Matches[RefValue];
+      else if (Error && Error->empty())
+        *Error = "invalid backreference string '" + Ref.str() + "'";
+      break;
+    }
+    }
+  }
+
+  // And finally the suffix.
+  Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+  return Res;
+}
diff --git a/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
new file mode 100644
index 000000000000..d63830185c32
--- /dev/null
+++ b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -0,0 +1,73 @@
+//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file pulls the addresses of certain symbols out of the linker.  It must
+//  include as few header files as possible because it declares the symbols as
+//  void*, which would conflict with the actual symbol type if any header
+//  declared it.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string.h>
+
+// Must declare the symbols in the global namespace.
+static void *DoSearch(const char* symbolName) {
+#define EXPLICIT_SYMBOL(SYM) \
+   extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
+
+  // If this is darwin, it has some funky issues, try to solve them here.  Some
+  // important symbols are marked 'private external' which doesn't allow
+  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
+  // there is only a small handful of them.
+
+#ifdef __APPLE__
+  {
+    EXPLICIT_SYMBOL(__ashldi3);
+    EXPLICIT_SYMBOL(__ashrdi3);
+    EXPLICIT_SYMBOL(__cmpdi2);
+    EXPLICIT_SYMBOL(__divdi3);
+    EXPLICIT_SYMBOL(__fixdfdi);
+    EXPLICIT_SYMBOL(__fixsfdi);
+    EXPLICIT_SYMBOL(__fixunsdfdi);
+    EXPLICIT_SYMBOL(__fixunssfdi);
+    EXPLICIT_SYMBOL(__floatdidf);
+    EXPLICIT_SYMBOL(__floatdisf);
+    EXPLICIT_SYMBOL(__lshrdi3);
+    EXPLICIT_SYMBOL(__moddi3);
+    EXPLICIT_SYMBOL(__udivdi3);
+    EXPLICIT_SYMBOL(__umoddi3);
+
+    // __eprintf is sometimes used for assert() handling on x86.
+    //
+    // FIXME: Currently disabled when using Clang, as we don't always have our
+    // runtime support libraries available.
+#ifndef __clang__
+#ifdef __i386__
+    EXPLICIT_SYMBOL(__eprintf);
+#endif
+#endif
+  }
+#endif
+
+#ifdef __CYGWIN__
+  {
+    EXPLICIT_SYMBOL(_alloca);
+    EXPLICIT_SYMBOL(__main);
+  }
+#endif
+
+#undef EXPLICIT_SYMBOL
+  return 0;
+}
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
+  return DoSearch(symbolName);
+}
+}  // namespace llvm
diff --git a/contrib/llvm/lib/Support/Signals.cpp b/contrib/llvm/lib/Support/Signals.cpp
new file mode 100644
index 000000000000..a11789372d93
--- /dev/null
+++ b/contrib/llvm/lib/Support/Signals.cpp
@@ -0,0 +1,34 @@
+//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signals.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Signals.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Signals.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/SmallPtrSet.cpp b/contrib/llvm/lib/Support/SmallPtrSet.cpp
new file mode 100644
index 000000000000..997ce0b74cd2
--- /dev/null
+++ b/contrib/llvm/lib/Support/SmallPtrSet.cpp
@@ -0,0 +1,229 @@
+//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallPtrSet class.  See SmallPtrSet.h for an
+// overview of the algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+void SmallPtrSetImpl::shrink_and_clear() {
+  assert(!isSmall() && "Can't shrink a small set!");
+  free(CurArray);
+
+  // Reduce the number of buckets.
+  CurArraySize = NumElements > 16 ? 1 << (Log2_32_Ceil(NumElements) + 1) : 32;
+  NumElements = NumTombstones = 0;
+
+  // Install the new array.  Clear all the buckets to empty.
+  CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1));
+  assert(CurArray && "Failed to allocate memory?");
+  memset(CurArray, -1, CurArraySize*sizeof(void*));
+  
+  // The end pointer, always valid, is set to a valid element to help the
+  // iterator.
+  CurArray[CurArraySize] = 0;
+}
+
+bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
+  if (isSmall()) {
+    // Check to see if it is already in the set.
+    for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+         APtr != E; ++APtr)
+      if (*APtr == Ptr)
+        return false;
+    
+    // Nope, there isn't.  If we stay small, just 'pushback' now.
+    if (NumElements < CurArraySize-1) {
+      SmallArray[NumElements++] = Ptr;
+      return true;
+    }
+    // Otherwise, hit the big set case, which will call grow.
+  }
+  
+  if (NumElements*4 >= CurArraySize*3) {
+    // If more than 3/4 of the array is full, grow.
+    Grow(CurArraySize < 64 ? 128 : CurArraySize*2);
+  } else if (CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) {
+    // If fewer of 1/8 of the array is empty (meaning that many are filled with
+    // tombstones), rehash.
+    Grow(CurArraySize);
+  }
+  
+  // Okay, we know we have space.  Find a hash bucket.
+  const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
+  if (*Bucket == Ptr) return false; // Already inserted, good.
+  
+  // Otherwise, insert it!
+  if (*Bucket == getTombstoneMarker())
+    --NumTombstones;
+  *Bucket = Ptr;
+  ++NumElements;  // Track density.
+  return true;
+}
+
+bool SmallPtrSetImpl::erase_imp(const void * Ptr) {
+  if (isSmall()) {
+    // Check to see if it is in the set.
+    for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+         APtr != E; ++APtr)
+      if (*APtr == Ptr) {
+        // If it is in the set, replace this element.
+        *APtr = E[-1];
+        E[-1] = getEmptyMarker();
+        --NumElements;
+        return true;
+      }
+    
+    return false;
+  }
+  
+  // Okay, we know we have space.  Find a hash bucket.
+  void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
+  if (*Bucket != Ptr) return false;  // Not in the set?
+
+  // Set this as a tombstone.
+  *Bucket = getTombstoneMarker();
+  --NumElements;
+  ++NumTombstones;
+  return true;
+}
+
+const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const {
+  unsigned Bucket = Hash(Ptr);
+  unsigned ArraySize = CurArraySize;
+  unsigned ProbeAmt = 1;
+  const void *const *Array = CurArray;
+  const void *const *Tombstone = 0;
+  while (1) {
+    // Found Ptr's bucket?
+    if (Array[Bucket] == Ptr)
+      return Array+Bucket;
+    
+    // If we found an empty bucket, the pointer doesn't exist in the set.
+    // Return a tombstone if we've seen one so far, or the empty bucket if
+    // not.
+    if (Array[Bucket] == getEmptyMarker())
+      return Tombstone ? Tombstone : Array+Bucket;
+    
+    // If this is a tombstone, remember it.  If Ptr ends up not in the set, we
+    // prefer to return it than something that would require more probing.
+    if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
+      Tombstone = Array+Bucket;  // Remember the first tombstone found.
+    
+    // It's a hash collision or a tombstone. Reprobe.
+    Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
+  }
+}
+
+/// Grow - Allocate a larger backing store for the buckets and move it over.
+///
+void SmallPtrSetImpl::Grow(unsigned NewSize) {
+  // Allocate at twice as many buckets, but at least 128.
+  unsigned OldSize = CurArraySize;
+  
+  const void **OldBuckets = CurArray;
+  bool WasSmall = isSmall();
+  
+  // Install the new array.  Clear all the buckets to empty.
+  CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1));
+  assert(CurArray && "Failed to allocate memory?");
+  CurArraySize = NewSize;
+  memset(CurArray, -1, NewSize*sizeof(void*));
+  
+  // The end pointer, always valid, is set to a valid element to help the
+  // iterator.
+  CurArray[NewSize] = 0;
+  
+  // Copy over all the elements.
+  if (WasSmall) {
+    // Small sets store their elements in order.
+    for (const void **BucketPtr = OldBuckets, **E = OldBuckets+NumElements;
+         BucketPtr != E; ++BucketPtr) {
+      const void *Elt = *BucketPtr;
+      *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+    }
+  } else {
+    // Copy over all valid entries.
+    for (const void **BucketPtr = OldBuckets, **E = OldBuckets+OldSize;
+         BucketPtr != E; ++BucketPtr) {
+      // Copy over the element if it is valid.
+      const void *Elt = *BucketPtr;
+      if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
+        *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+    }
+    
+    free(OldBuckets);
+    NumTombstones = 0;
+  }
+}
+
+SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
+                                 const SmallPtrSetImpl& that) {
+  SmallArray = SmallStorage;
+
+  // If we're becoming small, prepare to insert into our stack space
+  if (that.isSmall()) {
+    CurArray = SmallArray;
+  // Otherwise, allocate new heap space (unless we were the same size)
+  } else {
+    CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
+    assert(CurArray && "Failed to allocate memory?");
+  }
+  
+  // Copy over the new array size
+  CurArraySize = that.CurArraySize;
+
+  // Copy over the contents from the other set
+  memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1));
+  
+  NumElements = that.NumElements;
+  NumTombstones = that.NumTombstones;
+}
+
+/// CopyFrom - implement operator= from a smallptrset that has the same pointer
+/// type, but may have a different small size.
+void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
+  if (isSmall() && RHS.isSmall())
+    assert(CurArraySize == RHS.CurArraySize &&
+           "Cannot assign sets with different small sizes");
+           
+  // If we're becoming small, prepare to insert into our stack space
+  if (RHS.isSmall()) {
+    if (!isSmall())
+      free(CurArray);
+    CurArray = SmallArray;
+  // Otherwise, allocate new heap space (unless we were the same size)
+  } else if (CurArraySize != RHS.CurArraySize) {
+    if (isSmall())
+      CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1));
+    else
+      CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1));
+    assert(CurArray && "Failed to allocate memory?");
+  }
+  
+  // Copy over the new array size
+  CurArraySize = RHS.CurArraySize;
+
+  // Copy over the contents from the other set
+  memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1));
+  
+  NumElements = RHS.NumElements;
+  NumTombstones = RHS.NumTombstones;
+}
+
+SmallPtrSetImpl::~SmallPtrSetImpl() {
+  if (!isSmall())
+    free(CurArray);
+}
diff --git a/contrib/llvm/lib/Support/SmallVector.cpp b/contrib/llvm/lib/Support/SmallVector.cpp
new file mode 100644
index 000000000000..a89f14957635
--- /dev/null
+++ b/contrib/llvm/lib/Support/SmallVector.cpp
@@ -0,0 +1,40 @@
+//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+/// grow_pod - This is an implementation of the grow() method which only works
+/// on POD-like datatypes and is out of line to reduce code duplication.
+void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) {
+  size_t CurSizeBytes = size_in_bytes();
+  size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
+  if (NewCapacityInBytes < MinSizeInBytes)
+    NewCapacityInBytes = MinSizeInBytes;
+
+  void *NewElts;
+  if (this->isSmall()) {
+    NewElts = malloc(NewCapacityInBytes);
+
+    // Copy the elements over.  No need to run dtors on PODs.
+    memcpy(NewElts, this->BeginX, CurSizeBytes);
+  } else {
+    // If this wasn't grown from the inline copy, grow the allocated space.
+    NewElts = realloc(this->BeginX, NewCapacityInBytes);
+  }
+
+  this->EndX = (char*)NewElts+CurSizeBytes;
+  this->BeginX = NewElts;
+  this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
+}
+
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
new file mode 100644
index 000000000000..de042a9f53c8
--- /dev/null
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -0,0 +1,232 @@
+//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceMgr class.  This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+namespace {
+  struct LineNoCacheTy {
+    int LastQueryBufferID;
+    const char *LastQuery;
+    unsigned LineNoOfQuery;
+  };
+}
+
+static LineNoCacheTy *getCache(void *Ptr) {
+  return (LineNoCacheTy*)Ptr;
+}
+
+
+SourceMgr::~SourceMgr() {
+  // Delete the line # cache if allocated.
+  if (LineNoCacheTy *Cache = getCache(LineNoCache))
+    delete Cache;
+
+  while (!Buffers.empty()) {
+    delete Buffers.back().Buffer;
+    Buffers.pop_back();
+  }
+}
+
+/// AddIncludeFile - Search for a file with the specified name in the current
+/// directory or in one of the IncludeDirs.  If no file is found, this returns
+/// ~0, otherwise it returns the buffer ID of the stacked file.
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+                                   SMLoc IncludeLoc,
+                                   std::string &IncludedFile) {
+  OwningPtr<MemoryBuffer> NewBuf;
+  IncludedFile = Filename;
+  MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+
+  // If the file didn't exist directly, see if it's in an include path.
+  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+    IncludedFile = IncludeDirectories[i] + "/" + Filename;
+    MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+  }
+
+  if (NewBuf == 0) return ~0U;
+
+  return AddNewSourceBuffer(NewBuf.take(), IncludeLoc);
+}
+
+
+/// FindBufferContainingLoc - Return the ID of the buffer containing the
+/// specified location, returning -1 if not found.
+int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
+  for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
+    if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
+        // Use <= here so that a pointer to the null at the end of the buffer
+        // is included as part of the buffer.
+        Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
+      return i;
+  return -1;
+}
+
+/// FindLineNumber - Find the line number for the specified location in the
+/// specified file.  This is not a fast method.
+unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
+  if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
+  assert(BufferID != -1 && "Invalid Location!");
+
+  MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
+
+  // Count the number of \n's between the start of the file and the specified
+  // location.
+  unsigned LineNo = 1;
+
+  const char *Ptr = Buff->getBufferStart();
+
+  // If we have a line number cache, and if the query is to a later point in the
+  // same file, start searching from the last query location.  This optimizes
+  // for the case when multiple diagnostics come out of one file in order.
+  if (LineNoCacheTy *Cache = getCache(LineNoCache))
+    if (Cache->LastQueryBufferID == BufferID &&
+        Cache->LastQuery <= Loc.getPointer()) {
+      Ptr = Cache->LastQuery;
+      LineNo = Cache->LineNoOfQuery;
+    }
+
+  // Scan for the location being queried, keeping track of the number of lines
+  // we see.
+  for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
+    if (*Ptr == '\n') ++LineNo;
+
+
+  // Allocate the line number cache if it doesn't exist.
+  if (LineNoCache == 0)
+    LineNoCache = new LineNoCacheTy();
+
+  // Update the line # cache.
+  LineNoCacheTy &Cache = *getCache(LineNoCache);
+  Cache.LastQueryBufferID = BufferID;
+  Cache.LastQuery = Ptr;
+  Cache.LineNoOfQuery = LineNo;
+  return LineNo;
+}
+
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
+  if (IncludeLoc == SMLoc()) return;  // Top of stack.
+
+  int CurBuf = FindBufferContainingLoc(IncludeLoc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+  OS << "Included from "
+     << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+     << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
+}
+
+
+/// GetMessage - Return an SMDiagnostic at the specified location with the
+/// specified string.
+///
+/// @param Type - If non-null, the kind of message (e.g., "error") which is
+/// prefixed to the message.
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
+                                   const char *Type, bool ShowLine) const {
+
+  // First thing to do: find the current buffer containing the specified
+  // location.
+  int CurBuf = FindBufferContainingLoc(Loc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+  MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
+
+  // Scan backward to find the start of the line.
+  const char *LineStart = Loc.getPointer();
+  while (LineStart != CurMB->getBufferStart() &&
+         LineStart[-1] != '\n' && LineStart[-1] != '\r')
+    --LineStart;
+
+  std::string LineStr;
+  if (ShowLine) {
+    // Get the end of the line.
+    const char *LineEnd = Loc.getPointer();
+    while (LineEnd != CurMB->getBufferEnd() &&
+           LineEnd[0] != '\n' && LineEnd[0] != '\r')
+      ++LineEnd;
+    LineStr = std::string(LineStart, LineEnd);
+  }
+
+  std::string PrintedMsg;
+  raw_string_ostream OS(PrintedMsg);
+  if (Type)
+    OS << Type << ": ";
+  OS << Msg;
+
+  return SMDiagnostic(*this, Loc,
+                      CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
+                      Loc.getPointer()-LineStart, OS.str(),
+                      LineStr, ShowLine);
+}
+
+void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
+                             const char *Type, bool ShowLine) const {
+  // Report the message with the diagnostic handler if present.
+  if (DiagHandler) {
+    DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
+    return;
+  }
+
+  raw_ostream &OS = errs();
+
+  int CurBuf = FindBufferContainingLoc(Loc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+  GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS);
+}
+
+//===----------------------------------------------------------------------===//
+// SMDiagnostic Implementation
+//===----------------------------------------------------------------------===//
+
+void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
+  if (ProgName && ProgName[0])
+    S << ProgName << ": ";
+
+  if (!Filename.empty()) {
+    if (Filename == "-")
+      S << "<stdin>";
+    else
+      S << Filename;
+
+    if (LineNo != -1) {
+      S << ':' << LineNo;
+      if (ColumnNo != -1)
+        S << ':' << (ColumnNo+1);
+    }
+    S << ": ";
+  }
+
+  S << Message << '\n';
+
+  if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
+    S << LineContents << '\n';
+
+    // Print out spaces/tabs before the caret.
+    for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
+      S << (LineContents[i] == '\t' ? '\t' : ' ');
+    S << "^\n";
+  }
+}
+
+
diff --git a/contrib/llvm/lib/Support/Statistic.cpp b/contrib/llvm/lib/Support/Statistic.cpp
new file mode 100644
index 000000000000..1e733d92e610
--- /dev/null
+++ b/contrib/llvm/lib/Support/Statistic.cpp
@@ -0,0 +1,152 @@
+//===-- Statistic.cpp - Easy way to expose stats information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the 'Statistic' class, which is designed to be an easy
+// way to expose various success metrics from passes.  These statistics are
+// printed at the end of a run, when the -stats command line option is enabled
+// on the command line.
+//
+// This is useful for reporting information like the number of instructions
+// simplified, optimized or removed by various transformations, like this:
+//
+// static Statistic NumInstEliminated("GCSE", "Number of instructions killed");
+//
+// Later, in the code: ++NumInstEliminated;
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cstring>
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+/// -stats - Command line option to cause transformations to emit stats about
+/// what they did.
+///
+static cl::opt<bool>
+Enabled("stats", cl::desc("Enable statistics output from program"));
+
+
+namespace {
+/// StatisticInfo - This class is used in a ManagedStatic so that it is created
+/// on demand (when the first statistic is bumped) and destroyed only when
+/// llvm_shutdown is called.  We print statistics from the destructor.
+class StatisticInfo {
+  std::vector<const Statistic*> Stats;
+  friend void llvm::PrintStatistics();
+  friend void llvm::PrintStatistics(raw_ostream &OS);
+public:
+  ~StatisticInfo();
+
+  void addStatistic(const Statistic *S) {
+    Stats.push_back(S);
+  }
+};
+}
+
+static ManagedStatic<StatisticInfo> StatInfo;
+static ManagedStatic<sys::SmartMutex<true> > StatLock;
+
+/// RegisterStatistic - The first time a statistic is bumped, this method is
+/// called.
+void Statistic::RegisterStatistic() {
+  // If stats are enabled, inform StatInfo that this statistic should be
+  // printed.
+  sys::SmartScopedLock<true> Writer(*StatLock);
+  if (!Initialized) {
+    if (Enabled)
+      StatInfo->addStatistic(this);
+
+    sys::MemoryFence();
+    // Remember we have been registered.
+    Initialized = true;
+  }
+}
+
+namespace {
+
+struct NameCompare {
+  bool operator()(const Statistic *LHS, const Statistic *RHS) const {
+    int Cmp = std::strcmp(LHS->getName(), RHS->getName());
+    if (Cmp != 0) return Cmp < 0;
+
+    // Secondary key is the description.
+    return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
+  }
+};
+
+}
+
+// Print information when destroyed, iff command line option is specified.
+StatisticInfo::~StatisticInfo() {
+  llvm::PrintStatistics();
+}
+
+void llvm::EnableStatistics() {
+  Enabled.setValue(true);
+}
+
+bool llvm::AreStatisticsEnabled() {
+  return Enabled;
+}
+
+void llvm::PrintStatistics(raw_ostream &OS) {
+  StatisticInfo &Stats = *StatInfo;
+
+  // Figure out how long the biggest Value and Name fields are.
+  unsigned MaxNameLen = 0, MaxValLen = 0;
+  for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+    MaxValLen = std::max(MaxValLen,
+                         (unsigned)utostr(Stats.Stats[i]->getValue()).size());
+    MaxNameLen = std::max(MaxNameLen,
+                          (unsigned)std::strlen(Stats.Stats[i]->getName()));
+  }
+
+  // Sort the fields by name.
+  std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare());
+
+  // Print out the statistics header...
+  OS << "===" << std::string(73, '-') << "===\n"
+     << "                          ... Statistics Collected ...\n"
+     << "===" << std::string(73, '-') << "===\n\n";
+
+  // Print all of the statistics.
+  for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+    std::string CountStr = utostr(Stats.Stats[i]->getValue());
+    OS << std::string(MaxValLen-CountStr.size(), ' ')
+       << CountStr << " " << Stats.Stats[i]->getName()
+       << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
+       << " - " << Stats.Stats[i]->getDesc() << "\n";
+  }
+
+  OS << '\n';  // Flush the output stream.
+  OS.flush();
+
+}
+
+void llvm::PrintStatistics() {
+  StatisticInfo &Stats = *StatInfo;
+
+  // Statistics not enabled?
+  if (Stats.Stats.empty()) return;
+
+  // Get the stream to write to.
+  raw_ostream &OutStream = *CreateInfoOutputFile();
+  PrintStatistics(OutStream);
+  delete &OutStream;   // Close the file.
+}
diff --git a/contrib/llvm/lib/Support/StringExtras.cpp b/contrib/llvm/lib/Support/StringExtras.cpp
new file mode 100644
index 000000000000..eb2fa084218a
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringExtras.cpp
@@ -0,0 +1,81 @@
+//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringExtras.h header
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+/// StrInStrNoCase - Portable version of strcasestr.  Locates the first
+/// occurrence of string 's1' in string 's2', ignoring case.  Returns
+/// the offset of s2 in s1 or npos if s2 cannot be found.
+StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
+  size_t N = s2.size(), M = s1.size();
+  if (N > M)
+    return StringRef::npos;
+  for (size_t i = 0, e = M - N + 1; i != e; ++i)
+    if (s1.substr(i, N).equals_lower(s2))
+      return i;
+  return StringRef::npos;
+}
+
+/// getToken - This function extracts one token from source, ignoring any
+/// leading characters that appear in the Delimiters string, and ending the
+/// token at any of the characters that appear in the Delimiters string.  If
+/// there are no tokens in the source string, an empty string is returned.
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
+                                               StringRef Delimiters) {
+  // Figure out where the token starts.
+  StringRef::size_type Start = Source.find_first_not_of(Delimiters);
+
+  // Find the next occurrence of the delimiter.
+  StringRef::size_type End = Source.find_first_of(Delimiters, Start);
+
+  return std::make_pair(Source.slice(Start, End), Source.substr(End));
+}
+
+/// SplitString - Split up the specified string according to the specified
+/// delimiters, appending the result fragments to the output list.
+void llvm::SplitString(StringRef Source,
+                       SmallVectorImpl<StringRef> &OutFragments,
+                       StringRef Delimiters) {
+  StringRef S2, S;
+  tie(S2, S) = getToken(Source, Delimiters);
+  while (!S2.empty()) {
+    OutFragments.push_back(S2);
+    tie(S2, S) = getToken(S, Delimiters);
+  }
+}
+
+void llvm::StringRef::split(SmallVectorImpl<StringRef> &A,
+                            StringRef Separators, int MaxSplit,
+                            bool KeepEmpty) const {
+  StringRef rest = *this;
+
+  // rest.data() is used to distinguish cases like "a," that splits into
+  // "a" + "" and "a" that splits into "a" + 0.
+  for (int splits = 0;
+       rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+       ++splits) {
+    std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators);
+
+    if (p.first.size() != 0 || KeepEmpty)
+      A.push_back(p.first);
+    rest = p.second;
+  }
+  // If we have a tail left, add it.
+  if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+    A.push_back(rest);
+}
diff --git a/contrib/llvm/lib/Support/StringMap.cpp b/contrib/llvm/lib/Support/StringMap.cpp
new file mode 100644
index 000000000000..a1ac512fa244
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringMap.cpp
@@ -0,0 +1,230 @@
+//===--- StringMap.cpp - String Hash table map implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cassert>
+using namespace llvm;
+
+StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
+  ItemSize = itemSize;
+  
+  // If a size is specified, initialize the table with that many buckets.
+  if (InitSize) {
+    init(InitSize);
+    return;
+  }
+  
+  // Otherwise, initialize it with zero buckets to avoid the allocation.
+  TheTable = 0;
+  NumBuckets = 0;
+  NumItems = 0;
+  NumTombstones = 0;
+}
+
+void StringMapImpl::init(unsigned InitSize) {
+  assert((InitSize & (InitSize-1)) == 0 &&
+         "Init Size must be a power of 2 or zero!");
+  NumBuckets = InitSize ? InitSize : 16;
+  NumItems = 0;
+  NumTombstones = 0;
+  
+  TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
+  
+  // Allocate one extra bucket, set it to look filled so the iterators stop at
+  // end.
+  TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
+}
+
+
+/// LookupBucketFor - Look up the bucket that the specified string should end
+/// up in.  If it already exists as a key in the map, the Item pointer for the
+/// specified bucket will be non-null.  Otherwise, it will be null.  In either
+/// case, the FullHashValue field of the bucket will be set to the hash value
+/// of the string.
+unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
+  unsigned HTSize = NumBuckets;
+  if (HTSize == 0) {  // Hash table unallocated so far?
+    init(16);
+    HTSize = NumBuckets;
+  }
+  unsigned FullHashValue = HashString(Name);
+  unsigned BucketNo = FullHashValue & (HTSize-1);
+  
+  unsigned ProbeAmt = 1;
+  int FirstTombstone = -1;
+  while (1) {
+    ItemBucket &Bucket = TheTable[BucketNo];
+    StringMapEntryBase *BucketItem = Bucket.Item;
+    // If we found an empty bucket, this key isn't in the table yet, return it.
+    if (BucketItem == 0) {
+      // If we found a tombstone, we want to reuse the tombstone instead of an
+      // empty bucket.  This reduces probing.
+      if (FirstTombstone != -1) {
+        TheTable[FirstTombstone].FullHashValue = FullHashValue;
+        return FirstTombstone;
+      }
+      
+      Bucket.FullHashValue = FullHashValue;
+      return BucketNo;
+    }
+    
+    if (BucketItem == getTombstoneVal()) {
+      // Skip over tombstones.  However, remember the first one we see.
+      if (FirstTombstone == -1) FirstTombstone = BucketNo;
+    } else if (Bucket.FullHashValue == FullHashValue) {
+      // If the full hash value matches, check deeply for a match.  The common
+      // case here is that we are only looking at the buckets (for item info
+      // being non-null and for the full hash value) not at the items.  This
+      // is important for cache locality.
+      
+      // Do the comparison like this because Name isn't necessarily
+      // null-terminated!
+      char *ItemStr = (char*)BucketItem+ItemSize;
+      if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
+        // We found a match!
+        return BucketNo;
+      }
+    }
+    
+    // Okay, we didn't find the item.  Probe to the next bucket.
+    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+    
+    // Use quadratic probing, it has fewer clumping artifacts than linear
+    // probing and has good cache behavior in the common case.
+    ++ProbeAmt;
+  }
+}
+
+
+/// FindKey - Look up the bucket that contains the specified key. If it exists
+/// in the map, return the bucket number of the key.  Otherwise return -1.
+/// This does not modify the map.
+int StringMapImpl::FindKey(StringRef Key) const {
+  unsigned HTSize = NumBuckets;
+  if (HTSize == 0) return -1;  // Really empty table?
+  unsigned FullHashValue = HashString(Key);
+  unsigned BucketNo = FullHashValue & (HTSize-1);
+  
+  unsigned ProbeAmt = 1;
+  while (1) {
+    ItemBucket &Bucket = TheTable[BucketNo];
+    StringMapEntryBase *BucketItem = Bucket.Item;
+    // If we found an empty bucket, this key isn't in the table yet, return.
+    if (BucketItem == 0)
+      return -1;
+    
+    if (BucketItem == getTombstoneVal()) {
+      // Ignore tombstones.
+    } else if (Bucket.FullHashValue == FullHashValue) {
+      // If the full hash value matches, check deeply for a match.  The common
+      // case here is that we are only looking at the buckets (for item info
+      // being non-null and for the full hash value) not at the items.  This
+      // is important for cache locality.
+      
+      // Do the comparison like this because NameStart isn't necessarily
+      // null-terminated!
+      char *ItemStr = (char*)BucketItem+ItemSize;
+      if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
+        // We found a match!
+        return BucketNo;
+      }
+    }
+    
+    // Okay, we didn't find the item.  Probe to the next bucket.
+    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+    
+    // Use quadratic probing, it has fewer clumping artifacts than linear
+    // probing and has good cache behavior in the common case.
+    ++ProbeAmt;
+  }
+}
+
+/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
+/// delete it.  This aborts if the value isn't in the table.
+void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
+  const char *VStr = (char*)V + ItemSize;
+  StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
+  (void)V2;
+  assert(V == V2 && "Didn't find key?");
+}
+
+/// RemoveKey - Remove the StringMapEntry for the specified key from the
+/// table, returning it.  If the key is not in the table, this returns null.
+StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
+  int Bucket = FindKey(Key);
+  if (Bucket == -1) return 0;
+  
+  StringMapEntryBase *Result = TheTable[Bucket].Item;
+  TheTable[Bucket].Item = getTombstoneVal();
+  --NumItems;
+  ++NumTombstones;
+  assert(NumItems + NumTombstones <= NumBuckets);
+
+  return Result;
+}
+
+
+
+/// RehashTable - Grow the table, redistributing values into the buckets with
+/// the appropriate mod-of-hashtable-size.
+void StringMapImpl::RehashTable() {
+  unsigned NewSize;
+
+  // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
+  // the buckets are empty (meaning that many are filled with tombstones),
+  // grow/rehash the table.
+  if (NumItems*4 > NumBuckets*3) {
+    NewSize = NumBuckets*2;
+  } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) {
+    NewSize = NumBuckets;
+  } else {
+    return;
+  }
+
+  // Allocate one extra bucket which will always be non-empty.  This allows the
+  // iterators to stop at end.
+  ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
+  NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
+  
+  // Rehash all the items into their new buckets.  Luckily :) we already have
+  // the hash values available, so we don't have to rehash any strings.
+  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
+    if (IB->Item && IB->Item != getTombstoneVal()) {
+      // Fast case, bucket available.
+      unsigned FullHash = IB->FullHashValue;
+      unsigned NewBucket = FullHash & (NewSize-1);
+      if (NewTableArray[NewBucket].Item == 0) {
+        NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
+        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
+        continue;
+      }
+      
+      // Otherwise probe for a spot.
+      unsigned ProbeSize = 1;
+      do {
+        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
+      } while (NewTableArray[NewBucket].Item);
+      
+      // Finally found a slot.  Fill it in.
+      NewTableArray[NewBucket].Item = IB->Item;
+      NewTableArray[NewBucket].FullHashValue = FullHash;
+    }
+  }
+  
+  free(TheTable);
+  
+  TheTable = NewTableArray;
+  NumBuckets = NewSize;
+  NumTombstones = 0;
+}
diff --git a/contrib/llvm/lib/Support/StringPool.cpp b/contrib/llvm/lib/Support/StringPool.cpp
new file mode 100644
index 000000000000..ff607cf8c4ad
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringPool.cpp
@@ -0,0 +1,35 @@
+//===-- StringPool.cpp - Interned string pool -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringPool class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringPool.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+StringPool::StringPool() {}
+
+StringPool::~StringPool() {
+  assert(InternTable.empty() && "PooledStringPtr leaked!");
+}
+
+PooledStringPtr StringPool::intern(StringRef Key) {
+  table_t::iterator I = InternTable.find(Key);
+  if (I != InternTable.end())
+    return PooledStringPtr(&*I);
+  
+  entry_t *S = entry_t::Create(Key.begin(), Key.end());
+  S->getValue().Pool = this;
+  InternTable.insert(S);
+  
+  return PooledStringPtr(S);
+}
diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp
new file mode 100644
index 000000000000..8c3fc094cd11
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringRef.cpp
@@ -0,0 +1,415 @@
+//===-- StringRef.cpp - Lightweight String References ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/OwningPtr.h"
+#include <bitset>
+
+using namespace llvm;
+
+// MSVC emits references to this into the translation units which reference it.
+#ifndef _MSC_VER
+const size_t StringRef::npos;
+#endif
+
+static char ascii_tolower(char x) {
+  if (x >= 'A' && x <= 'Z')
+    return x - 'A' + 'a';
+  return x;
+}
+
+static bool ascii_isdigit(char x) {
+  return x >= '0' && x <= '9';
+}
+
+/// compare_lower - Compare strings, ignoring case.
+int StringRef::compare_lower(StringRef RHS) const {
+  for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+    unsigned char LHC = ascii_tolower(Data[I]);
+    unsigned char RHC = ascii_tolower(RHS.Data[I]);
+    if (LHC != RHC)
+      return LHC < RHC ? -1 : 1;
+  }
+
+  if (Length == RHS.Length)
+    return 0;
+  return Length < RHS.Length ? -1 : 1;
+}
+
+/// compare_numeric - Compare strings, handle embedded numbers.
+int StringRef::compare_numeric(StringRef RHS) const {
+  for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+    if (Data[I] == RHS.Data[I])
+      continue;
+    if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
+      // The longer sequence of numbers is larger. This doesn't really handle
+      // prefixed zeros well.
+      for (size_t J = I+1; J != E+1; ++J) {
+        bool ld = J < Length && ascii_isdigit(Data[J]);
+        bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+        if (ld != rd)
+          return rd ? -1 : 1;
+        if (!rd)
+          break;
+      }
+    }
+    return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+  }
+  if (Length == RHS.Length)
+    return 0;
+  return Length < RHS.Length ? -1 : 1;
+}
+
+// Compute the edit distance between the two given strings.
+unsigned StringRef::edit_distance(llvm::StringRef Other,
+                                  bool AllowReplacements,
+                                  unsigned MaxEditDistance) {
+  // The algorithm implemented below is the "classic"
+  // dynamic-programming algorithm for computing the Levenshtein
+  // distance, which is described here:
+  //
+  //   http://en.wikipedia.org/wiki/Levenshtein_distance
+  //
+  // Although the algorithm is typically described using an m x n
+  // array, only two rows are used at a time, so this implemenation
+  // just keeps two separate vectors for those two rows.
+  size_type m = size();
+  size_type n = Other.size();
+
+  const unsigned SmallBufferSize = 64;
+  unsigned SmallBuffer[SmallBufferSize];
+  llvm::OwningArrayPtr<unsigned> Allocated;
+  unsigned *previous = SmallBuffer;
+  if (2*(n + 1) > SmallBufferSize) {
+    previous = new unsigned [2*(n+1)];
+    Allocated.reset(previous);
+  }
+  unsigned *current = previous + (n + 1);
+
+  for (unsigned i = 0; i <= n; ++i)
+    previous[i] = i;
+
+  for (size_type y = 1; y <= m; ++y) {
+    current[0] = y;
+    unsigned BestThisRow = current[0];
+
+    for (size_type x = 1; x <= n; ++x) {
+      if (AllowReplacements) {
+        current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
+                         min(current[x-1], previous[x])+1);
+      }
+      else {
+        if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
+        else current[x] = min(current[x-1], previous[x]) + 1;
+      }
+      BestThisRow = min(BestThisRow, current[x]);
+    }
+
+    if (MaxEditDistance && BestThisRow > MaxEditDistance)
+      return MaxEditDistance + 1;
+
+    unsigned *tmp = current;
+    current = previous;
+    previous = tmp;
+  }
+
+  unsigned Result = previous[n];
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// String Searching
+//===----------------------------------------------------------------------===//
+
+
+/// find - Search for the first string \arg Str in the string.
+///
+/// \return - The index of the first occurrence of \arg Str, or npos if not
+/// found.
+size_t StringRef::find(StringRef Str, size_t From) const {
+  size_t N = Str.size();
+  if (N > Length)
+    return npos;
+  for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+    if (substr(i, N).equals(Str))
+      return i;
+  return npos;
+}
+
+/// rfind - Search for the last string \arg Str in the string.
+///
+/// \return - The index of the last occurrence of \arg Str, or npos if not
+/// found.
+size_t StringRef::rfind(StringRef Str) const {
+  size_t N = Str.size();
+  if (N > Length)
+    return npos;
+  for (size_t i = Length - N + 1, e = 0; i != e;) {
+    --i;
+    if (substr(i, N).equals(Str))
+      return i;
+  }
+  return npos;
+}
+
+/// find_first_of - Find the first character in the string that is in \arg
+/// Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_of(StringRef Chars,
+                                              size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
+    if (CharBits.test((unsigned char)Data[i]))
+      return i;
+  return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// \arg C or npos if not found.
+StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
+    if (Data[i] != C)
+      return i;
+  return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// in the string \arg Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
+                                                  size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
+    if (!CharBits.test((unsigned char)Data[i]))
+      return i;
+  return npos;
+}
+
+/// find_last_of - Find the last character in the string that is in \arg C,
+/// or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_last_of(StringRef Chars,
+                                             size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
+  for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+    if (CharBits.test((unsigned char)Data[i]))
+      return i;
+  return npos;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpful Algorithms
+//===----------------------------------------------------------------------===//
+
+/// count - Return the number of non-overlapped occurrences of \arg Str in
+/// the string.
+size_t StringRef::count(StringRef Str) const {
+  size_t Count = 0;
+  size_t N = Str.size();
+  if (N > Length)
+    return 0;
+  for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+    if (substr(i, N).equals(Str))
+      ++Count;
+  return Count;
+}
+
+static unsigned GetAutoSenseRadix(StringRef &Str) {
+  if (Str.startswith("0x")) {
+    Str = Str.substr(2);
+    return 16;
+  } else if (Str.startswith("0b")) {
+    Str = Str.substr(2);
+    return 2;
+  } else if (Str.startswith("0")) {
+    return 8;
+  } else {
+    return 10;
+  }
+}
+
+
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                 unsigned long long &Result) {
+  // Autosense radix if not specified.
+  if (Radix == 0)
+    Radix = GetAutoSenseRadix(Str);
+
+  // Empty strings (after the radix autosense) are invalid.
+  if (Str.empty()) return true;
+
+  // Parse all the bytes of the string given this radix.  Watch for overflow.
+  Result = 0;
+  while (!Str.empty()) {
+    unsigned CharVal;
+    if (Str[0] >= '0' && Str[0] <= '9')
+      CharVal = Str[0]-'0';
+    else if (Str[0] >= 'a' && Str[0] <= 'z')
+      CharVal = Str[0]-'a'+10;
+    else if (Str[0] >= 'A' && Str[0] <= 'Z')
+      CharVal = Str[0]-'A'+10;
+    else
+      return true;
+
+    // If the parsed value is larger than the integer radix, the string is
+    // invalid.
+    if (CharVal >= Radix)
+      return true;
+
+    // Add in this character.
+    unsigned long long PrevResult = Result;
+    Result = Result*Radix+CharVal;
+
+    // Check for overflow.
+    if (Result < PrevResult)
+      return true;
+
+    Str = Str.substr(1);
+  }
+
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
+  return GetAsUnsignedInteger(*this, Radix, Result);
+}
+
+
+bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+  unsigned long long ULLVal;
+
+  // Handle positive strings first.
+  if (empty() || front() != '-') {
+    if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+        // Check for value so large it overflows a signed value.
+        (long long)ULLVal < 0)
+      return true;
+    Result = ULLVal;
+    return false;
+  }
+
+  // Get the positive part of the value.
+  if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+      // Reject values so large they'd overflow as negative signed, but allow
+      // "-0".  This negates the unsigned so that the negative isn't undefined
+      // on signed overflow.
+      (long long)-ULLVal > 0)
+    return true;
+
+  Result = -ULLVal;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
+  long long Val;
+  if (getAsInteger(Radix, Val) ||
+      (int)Val != Val)
+    return true;
+  Result = Val;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
+  unsigned long long Val;
+  if (getAsInteger(Radix, Val) ||
+      (unsigned)Val != Val)
+    return true;
+  Result = Val;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+  StringRef Str = *this;
+
+  // Autosense radix if not specified.
+  if (Radix == 0)
+    Radix = GetAutoSenseRadix(Str);
+
+  assert(Radix > 1 && Radix <= 36);
+
+  // Empty strings (after the radix autosense) are invalid.
+  if (Str.empty()) return true;
+
+  // Skip leading zeroes.  This can be a significant improvement if
+  // it means we don't need > 64 bits.
+  while (!Str.empty() && Str.front() == '0')
+    Str = Str.substr(1);
+
+  // If it was nothing but zeroes....
+  if (Str.empty()) {
+    Result = APInt(64, 0);
+    return false;
+  }
+
+  // (Over-)estimate the required number of bits.
+  unsigned Log2Radix = 0;
+  while ((1U << Log2Radix) < Radix) Log2Radix++;
+  bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
+
+  unsigned BitWidth = Log2Radix * Str.size();
+  if (BitWidth < Result.getBitWidth())
+    BitWidth = Result.getBitWidth(); // don't shrink the result
+  else
+    Result = Result.zext(BitWidth);
+
+  APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
+  if (!IsPowerOf2Radix) {
+    // These must have the same bit-width as Result.
+    RadixAP = APInt(BitWidth, Radix);
+    CharAP = APInt(BitWidth, 0);
+  }
+
+  // Parse all the bytes of the string given this radix.
+  Result = 0;
+  while (!Str.empty()) {
+    unsigned CharVal;
+    if (Str[0] >= '0' && Str[0] <= '9')
+      CharVal = Str[0]-'0';
+    else if (Str[0] >= 'a' && Str[0] <= 'z')
+      CharVal = Str[0]-'a'+10;
+    else if (Str[0] >= 'A' && Str[0] <= 'Z')
+      CharVal = Str[0]-'A'+10;
+    else
+      return true;
+
+    // If the parsed value is larger than the integer radix, the string is
+    // invalid.
+    if (CharVal >= Radix)
+      return true;
+
+    // Add in this character.
+    if (IsPowerOf2Radix) {
+      Result <<= Log2Radix;
+      Result |= CharVal;
+    } else {
+      Result *= RadixAP;
+      CharAP = CharVal;
+      Result += CharAP;
+    }
+
+    Str = Str.substr(1);
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Support/SystemUtils.cpp b/contrib/llvm/lib/Support/SystemUtils.cpp
new file mode 100644
index 000000000000..54b5e97bfe18
--- /dev/null
+++ b/contrib/llvm/lib/Support/SystemUtils.cpp
@@ -0,0 +1,55 @@
+//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used to do a variety of low-level, often
+// system-specific, tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check,
+                                       bool print_warning) {
+  if (stream_to_check.is_displayed()) {
+    if (print_warning) {
+      errs() << "WARNING: You're attempting to print out a bitcode file.\n"
+                "This is inadvisable as it may cause display problems. If\n"
+                "you REALLY want to taste LLVM bitcode first-hand, you\n"
+                "can force output with the `-f' option.\n\n";
+    }
+    return true;
+  }
+  return false;
+}
+
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
+/// @brief Find a named executable.
+sys::Path llvm::PrependMainExecutablePath(const std::string &ExeName,
+                                          const char *Argv0, void *MainAddr) {
+  // Check the directory that the calling program is in.  We can do
+  // this if ProgramPath contains at least one / character, indicating that it
+  // is a relative path to the executable itself.
+  sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr);
+  Result.eraseComponent();
+
+  if (!Result.isEmpty()) {
+    Result.appendComponent(ExeName);
+    Result.appendSuffix(sys::Path::GetEXESuffix());
+  }
+
+  return Result;
+}
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
new file mode 100644
index 000000000000..293a5d7a0168
--- /dev/null
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -0,0 +1,92 @@
+//===--- TargetRegistry.cpp - Target registration -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include <cassert>
+using namespace llvm;
+
+// Clients are responsible for avoid race conditions in registration.
+static Target *FirstTarget = 0;
+
+TargetRegistry::iterator TargetRegistry::begin() {
+  return iterator(FirstTarget);
+}
+
+const Target *TargetRegistry::lookupTarget(const std::string &TT,
+                                           std::string &Error) {
+  // Provide special warning when no targets are initialized.
+  if (begin() == end()) {
+    Error = "Unable to find target for this triple (no targets are registered)";
+    return 0;
+  }
+  const Target *Best = 0, *EquallyBest = 0;
+  unsigned BestQuality = 0;
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (unsigned Qual = it->TripleMatchQualityFn(TT)) {
+      if (!Best || Qual > BestQuality) {
+        Best = &*it;
+        EquallyBest = 0;
+        BestQuality = Qual;
+      } else if (Qual == BestQuality)
+        EquallyBest = &*it;
+    }
+  }
+
+  if (!Best) {
+    Error = "No available targets are compatible with this triple, "
+      "see -version for the available targets.";
+    return 0;
+  }
+
+  // Otherwise, take the best target, but make sure we don't have two equally
+  // good best targets.
+  if (EquallyBest) {
+    Error = std::string("Cannot choose between targets \"") +
+      Best->Name  + "\" and \"" + EquallyBest->Name + "\"";
+    return 0;
+  }
+
+  return Best;
+}
+
+void TargetRegistry::RegisterTarget(Target &T,
+                                    const char *Name,
+                                    const char *ShortDesc,
+                                    Target::TripleMatchQualityFnTy TQualityFn,
+                                    bool HasJIT) {
+  assert(Name && ShortDesc && TQualityFn &&
+         "Missing required target information!");
+
+  // Check if this target has already been initialized, we allow this as a
+  // convenience to some clients.
+  if (T.Name)
+    return;
+         
+  // Add to the list of targets.
+  T.Next = FirstTarget;
+  FirstTarget = &T;
+
+  T.Name = Name;
+  T.ShortDesc = ShortDesc;
+  T.TripleMatchQualityFn = TQualityFn;
+  T.HasJIT = HasJIT;
+}
+
+const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
+  const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error);
+
+  if (TheTarget && !TheTarget->hasJIT()) {
+    Error = "No JIT compatible target available for this host";
+    return 0;
+  }
+
+  return TheTarget;
+}
+
diff --git a/contrib/llvm/lib/Support/ThreadLocal.cpp b/contrib/llvm/lib/Support/ThreadLocal.cpp
new file mode 100644
index 000000000000..6b43048da155
--- /dev/null
+++ b/contrib/llvm/lib/Support/ThreadLocal.cpp
@@ -0,0 +1,84 @@
+//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/ThreadLocal.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { data = 0; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data(0) {
+  pthread_key_t* key = new pthread_key_t;
+  int errorcode = pthread_key_create(key, NULL);
+  assert(errorcode == 0);
+  (void) errorcode;
+  data = (void*)key;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_key_delete(*key);
+  assert(errorcode == 0);
+  (void) errorcode;
+  delete key;
+}
+
+void ThreadLocalImpl::setInstance(const void* d) {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_setspecific(*key, d);
+  assert(errorcode == 0);
+  (void) errorcode;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  return pthread_getspecific(*key);
+}
+
+void ThreadLocalImpl::removeInstance() {
+  setInstance(0);
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/ThreadLocal.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/ThreadLocal.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
+#endif
+#endif
diff --git a/contrib/llvm/lib/Support/Threading.cpp b/contrib/llvm/lib/Support/Threading.cpp
new file mode 100644
index 000000000000..b62b1a94b234
--- /dev/null
+++ b/contrib/llvm/lib/Support/Threading.cpp
@@ -0,0 +1,116 @@
+//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1
+  assert(!multithreaded_mode && "Already multithreaded!");
+  multithreaded_mode = true;
+  global_lock = new sys::Mutex(true);
+
+  // We fence here to ensure that all initialization is complete BEFORE we
+  // return from llvm_start_multithreaded().
+  sys::MemoryFence();
+  return true;
+#else
+  return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1
+  assert(multithreaded_mode && "Not currently multithreaded!");
+
+  // We fence here to insure that all threaded operations are complete BEFORE we
+  // return from llvm_stop_multithreaded().
+  sys::MemoryFence();
+
+  multithreaded_mode = false;
+  delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+  return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+  if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+  if (multithreaded_mode) global_lock->release();
+}
+
+#if defined(LLVM_MULTITHREADED) && LLVM_MULTITHREADED == 1 && defined(HAVE_PTHREAD_H)
+#include <pthread.h>
+
+struct ThreadInfo {
+  void (*UserFn)(void *);
+  void *UserData;
+};
+static void *ExecuteOnThread_Dispatch(void *Arg) {
+  ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
+  TI->UserFn(TI->UserData);
+  return 0;
+}
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+                                  unsigned RequestedStackSize) {
+  ThreadInfo Info = { Fn, UserData };
+  pthread_attr_t Attr;
+  pthread_t Thread;
+
+  // Construct the attributes object.
+  if (::pthread_attr_init(&Attr) != 0)
+    return;
+
+  // Set the requested stack size, if given.
+  if (RequestedStackSize != 0) {
+    if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
+      goto error;
+  }
+
+  // Construct and execute the thread.
+  if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
+    goto error;
+
+  // Wait for the thread and clean up.
+  ::pthread_join(Thread, 0);
+
+ error:
+  ::pthread_attr_destroy(&Attr);
+}
+
+#else
+
+// No non-pthread implementation, currently.
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+                                  unsigned RequestedStackSize) {
+  (void) RequestedStackSize;
+  Fn(UserData);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/TimeValue.cpp b/contrib/llvm/lib/Support/TimeValue.cpp
new file mode 100644
index 000000000000..1a0f7bc36394
--- /dev/null
+++ b/contrib/llvm/lib/Support/TimeValue.cpp
@@ -0,0 +1,57 @@
+//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+const TimeValue TimeValue::MinTime       = TimeValue ( INT64_MIN,0 );
+const TimeValue TimeValue::MaxTime       = TimeValue ( INT64_MAX,0 );
+const TimeValue TimeValue::ZeroTime      = TimeValue ( 0,0 );
+const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 );
+const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 );
+
+void
+TimeValue::normalize( void ) {
+  if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
+    do {
+      seconds_++;
+      nanos_ -= NANOSECONDS_PER_SECOND;
+    } while ( nanos_ >= NANOSECONDS_PER_SECOND );
+  } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
+    do {
+      seconds_--;
+      nanos_ += NANOSECONDS_PER_SECOND;
+    } while (nanos_ <= -NANOSECONDS_PER_SECOND);
+  }
+
+  if (seconds_ >= 1 && nanos_ < 0) {
+    seconds_--;
+    nanos_ += NANOSECONDS_PER_SECOND;
+  } else if (seconds_ < 0 && nanos_ > 0) {
+    seconds_++;
+    nanos_ -= NANOSECONDS_PER_SECOND;
+  }
+}
+
+}
+
+/// Include the platform specific portion of TimeValue class
+#ifdef LLVM_ON_UNIX
+#include "Unix/TimeValue.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/TimeValue.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
new file mode 100644
index 000000000000..a9ed5eecfa7e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -0,0 +1,393 @@
+//===-- Timer.cpp - Interval Timing Support -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interval Timing implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
+// of constructor/destructor ordering being unspecified by C++.  Basically the
+// problem is that a Statistic object gets destroyed, which ends up calling
+// 'GetLibSupportInfoOutputFile()' (below), which calls this function.
+// LibSupportInfoOutputFilename used to be a global variable, but sometimes it
+// would get destroyed before the Statistic, causing havoc to ensue.  We "fix"
+// this by creating the string the first time it is needed and never destroying
+// it.
+static ManagedStatic<std::string> LibSupportInfoOutputFilename;
+static std::string &getLibSupportInfoOutputFilename() {
+  return *LibSupportInfoOutputFilename;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > TimerLock;
+
+namespace {
+  static cl::opt<bool>
+  TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
+                                      "tracking (this may be slow)"),
+             cl::Hidden);
+
+  static cl::opt<std::string, true>
+  InfoOutputFilename("info-output-file", cl::value_desc("filename"),
+                     cl::desc("File to append -stats and -timer output to"),
+                   cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
+}
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+raw_ostream *llvm::CreateInfoOutputFile() {
+  const std::string &OutputFilename = getLibSupportInfoOutputFilename();
+  if (OutputFilename.empty())
+    return new raw_fd_ostream(2, false); // stderr.
+  if (OutputFilename == "-")
+    return new raw_fd_ostream(1, false); // stdout.
+  
+  // Append mode is used because the info output file is opened and closed
+  // each time -stats or -time-passes wants to print output to it. To
+  // compensate for this, the test-suite Makefiles have code to delete the
+  // info output file before running commands which write to it.
+  std::string Error;
+  raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(),
+                                           Error, raw_fd_ostream::F_Append);
+  if (Error.empty())
+    return Result;
+  
+  errs() << "Error opening info-output-file '"
+    << OutputFilename << " for appending!\n";
+  delete Result;
+  return new raw_fd_ostream(2, false); // stderr.
+}
+
+
+static TimerGroup *DefaultTimerGroup = 0;
+static TimerGroup *getDefaultTimerGroup() {
+  TimerGroup *tmp = DefaultTimerGroup;
+  sys::MemoryFence();
+  if (tmp) return tmp;
+  
+  llvm_acquire_global_lock();
+  tmp = DefaultTimerGroup;
+  if (!tmp) {
+    tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
+    sys::MemoryFence();
+    DefaultTimerGroup = tmp;
+  }
+  llvm_release_global_lock();
+
+  return tmp;
+}
+
+//===----------------------------------------------------------------------===//
+// Timer Implementation
+//===----------------------------------------------------------------------===//
+
+void Timer::init(StringRef N) {
+  assert(TG == 0 && "Timer already initialized");
+  Name.assign(N.begin(), N.end());
+  Started = false;
+  TG = getDefaultTimerGroup();
+  TG->addTimer(*this);
+}
+
+void Timer::init(StringRef N, TimerGroup &tg) {
+  assert(TG == 0 && "Timer already initialized");
+  Name.assign(N.begin(), N.end());
+  Started = false;
+  TG = &tg;
+  TG->addTimer(*this);
+}
+
+Timer::~Timer() {
+  if (!TG) return;  // Never initialized, or already cleared.
+  TG->removeTimer(*this);
+}
+
+static inline size_t getMemUsage() {
+  if (!TrackSpace) return 0;
+  return sys::Process::GetMallocUsage();
+}
+
+TimeRecord TimeRecord::getCurrentTime(bool Start) {
+  TimeRecord Result;
+  sys::TimeValue now(0,0), user(0,0), sys(0,0);
+  
+  if (Start) {
+    Result.MemUsed = getMemUsage();
+    sys::Process::GetTimeUsage(now, user, sys);
+  } else {
+    sys::Process::GetTimeUsage(now, user, sys);
+    Result.MemUsed = getMemUsage();
+  }
+
+  Result.WallTime   =  now.seconds() +  now.microseconds() / 1000000.0;
+  Result.UserTime   = user.seconds() + user.microseconds() / 1000000.0;
+  Result.SystemTime =  sys.seconds() +  sys.microseconds() / 1000000.0;
+  return Result;
+}
+
+static ManagedStatic<std::vector<Timer*> > ActiveTimers;
+
+void Timer::startTimer() {
+  Started = true;
+  ActiveTimers->push_back(this);
+  Time -= TimeRecord::getCurrentTime(true);
+}
+
+void Timer::stopTimer() {
+  Time += TimeRecord::getCurrentTime(false);
+
+  if (ActiveTimers->back() == this) {
+    ActiveTimers->pop_back();
+  } else {
+    std::vector<Timer*>::iterator I =
+      std::find(ActiveTimers->begin(), ActiveTimers->end(), this);
+    assert(I != ActiveTimers->end() && "stop but no startTimer?");
+    ActiveTimers->erase(I);
+  }
+}
+
+static void printVal(double Val, double Total, raw_ostream &OS) {
+  if (Total < 1e-7)   // Avoid dividing by zero.
+    OS << "        -----     ";
+  else {
+    OS << "  " << format("%7.4f", Val) << " (";
+    OS << format("%5.1f", Val*100/Total) << "%)";
+  }
+}
+
+void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
+  if (Total.getUserTime())
+    printVal(getUserTime(), Total.getUserTime(), OS);
+  if (Total.getSystemTime())
+    printVal(getSystemTime(), Total.getSystemTime(), OS);
+  if (Total.getProcessTime())
+    printVal(getProcessTime(), Total.getProcessTime(), OS);
+  printVal(getWallTime(), Total.getWallTime(), OS);
+  
+  OS << "  ";
+  
+  if (Total.getMemUsed())
+    OS << format("%9lld", (long long)getMemUsed()) << "  ";
+}
+
+
+//===----------------------------------------------------------------------===//
+//   NamedRegionTimer Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+typedef StringMap<Timer> Name2TimerMap;
+
+class Name2PairMap {
+  StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map;
+public:
+  ~Name2PairMap() {
+    for (StringMap<std::pair<TimerGroup*, Name2TimerMap> >::iterator
+         I = Map.begin(), E = Map.end(); I != E; ++I)
+      delete I->second.first;
+  }
+  
+  Timer &get(StringRef Name, StringRef GroupName) {
+    sys::SmartScopedLock<true> L(*TimerLock);
+    
+    std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName];
+    
+    if (!GroupEntry.first)
+      GroupEntry.first = new TimerGroup(GroupName);
+    
+    Timer &T = GroupEntry.second[Name];
+    if (!T.isInitialized())
+      T.init(Name, *GroupEntry.first);
+    return T;
+  }
+};
+
+}
+
+static ManagedStatic<Name2TimerMap> NamedTimers;
+static ManagedStatic<Name2PairMap> NamedGroupedTimers;
+
+static Timer &getNamedRegionTimer(StringRef Name) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  
+  Timer &T = (*NamedTimers)[Name];
+  if (!T.isInitialized())
+    T.init(Name);
+  return T;
+}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name,
+                                   bool Enabled)
+  : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
+                                   bool Enabled)
+  : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {}
+
+//===----------------------------------------------------------------------===//
+//   TimerGroup Implementation
+//===----------------------------------------------------------------------===//
+
+/// TimerGroupList - This is the global list of TimerGroups, maintained by the
+/// TimerGroup ctor/dtor and is protected by the TimerLock lock.
+static TimerGroup *TimerGroupList = 0;
+
+TimerGroup::TimerGroup(StringRef name)
+  : Name(name.begin(), name.end()), FirstTimer(0) {
+    
+  // Add the group to TimerGroupList.
+  sys::SmartScopedLock<true> L(*TimerLock);
+  if (TimerGroupList)
+    TimerGroupList->Prev = &Next;
+  Next = TimerGroupList;
+  Prev = &TimerGroupList;
+  TimerGroupList = this;
+}
+
+TimerGroup::~TimerGroup() {
+  // If the timer group is destroyed before the timers it owns, accumulate and
+  // print the timing data.
+  while (FirstTimer != 0)
+    removeTimer(*FirstTimer);
+  
+  // Remove the group from the TimerGroupList.
+  sys::SmartScopedLock<true> L(*TimerLock);
+  *Prev = Next;
+  if (Next)
+    Next->Prev = Prev;
+}
+
+
+void TimerGroup::removeTimer(Timer &T) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  
+  // If the timer was started, move its data to TimersToPrint.
+  if (T.Started)
+    TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
+
+  T.TG = 0;
+  
+  // Unlink the timer from our list.
+  *T.Prev = T.Next;
+  if (T.Next)
+    T.Next->Prev = T.Prev;
+  
+  // Print the report when all timers in this group are destroyed if some of
+  // them were started.
+  if (FirstTimer != 0 || TimersToPrint.empty())
+    return;
+  
+  raw_ostream *OutStream = CreateInfoOutputFile();
+  PrintQueuedTimers(*OutStream);
+  delete OutStream;   // Close the file.
+}
+
+void TimerGroup::addTimer(Timer &T) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  
+  // Add the timer to our list.
+  if (FirstTimer)
+    FirstTimer->Prev = &T.Next;
+  T.Next = FirstTimer;
+  T.Prev = &FirstTimer;
+  FirstTimer = &T;
+}
+
+void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
+  // Sort the timers in descending order by amount of time taken.
+  std::sort(TimersToPrint.begin(), TimersToPrint.end());
+  
+  TimeRecord Total;
+  for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
+    Total += TimersToPrint[i].first;
+  
+  // Print out timing header.
+  OS << "===" << std::string(73, '-') << "===\n";
+  // Figure out how many spaces to indent TimerGroup name.
+  unsigned Padding = (80-Name.length())/2;
+  if (Padding > 80) Padding = 0;         // Don't allow "negative" numbers
+  OS.indent(Padding) << Name << '\n';
+  OS << "===" << std::string(73, '-') << "===\n";
+  
+  // If this is not an collection of ungrouped times, print the total time.
+  // Ungrouped timers don't really make sense to add up.  We still print the
+  // TOTAL line to make the percentages make sense.
+  if (this != DefaultTimerGroup) {
+    OS << "  Total Execution Time: ";
+    OS << format("%5.4f", Total.getProcessTime()) << " seconds (";
+    OS << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
+  }
+  OS << '\n';
+  
+  if (Total.getUserTime())
+    OS << "   ---User Time---";
+  if (Total.getSystemTime())
+    OS << "   --System Time--";
+  if (Total.getProcessTime())
+    OS << "   --User+System--";
+  OS << "   ---Wall Time---";
+  if (Total.getMemUsed())
+    OS << "  ---Mem---";
+  OS << "  --- Name ---\n";
+  
+  // Loop through all of the timing data, printing it out.
+  for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) {
+    const std::pair<TimeRecord, std::string> &Entry = TimersToPrint[e-i-1];
+    Entry.first.print(Total, OS);
+    OS << Entry.second << '\n';
+  }
+  
+  Total.print(Total, OS);
+  OS << "Total\n\n";
+  OS.flush();
+  
+  TimersToPrint.clear();
+}
+
+/// print - Print any started timers in this group and zero them.
+void TimerGroup::print(raw_ostream &OS) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+
+  // See if any of our timers were started, if so add them to TimersToPrint and
+  // reset them.
+  for (Timer *T = FirstTimer; T; T = T->Next) {
+    if (!T->Started) continue;
+    TimersToPrint.push_back(std::make_pair(T->Time, T->Name));
+    
+    // Clear out the time.
+    T->Started = 0;
+    T->Time = TimeRecord();
+  }
+
+  // If any timers were started, print the group.
+  if (!TimersToPrint.empty())
+    PrintQueuedTimers(OS);
+}
+
+/// printAll - This static method prints all timers and clears them all out.
+void TimerGroup::printAll(raw_ostream &OS) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+
+  for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+    TG->print(OS);
+}
diff --git a/contrib/llvm/lib/Support/ToolOutputFile.cpp b/contrib/llvm/lib/Support/ToolOutputFile.cpp
new file mode 100644
index 000000000000..e7ca927ea537
--- /dev/null
+++ b/contrib/llvm/lib/Support/ToolOutputFile.cpp
@@ -0,0 +1,43 @@
+//===--- ToolOutputFile.cpp - Implement the tool_output_file class --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+using namespace llvm;
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+  : Filename(filename), Keep(false) {
+  // Arrange for the file to be deleted if the process is killed.
+  if (Filename != "-")
+    sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+  // Delete the file if the client hasn't told us not to.
+  if (!Keep && Filename != "-")
+    sys::Path(Filename).eraseFromDisk();
+
+  // Ok, the file is successfully written and closed, or deleted. There's no
+  // further need to clean it up on signals.
+  if (Filename != "-")
+    sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+                                   unsigned Flags)
+  : Installer(filename),
+    OS(filename, ErrorInfo, Flags) {
+  // If open fails, no cleanup is needed.
+  if (!ErrorInfo.empty())
+    Installer.Keep = true;
+}
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
new file mode 100644
index 000000000000..7e094ee78f36
--- /dev/null
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -0,0 +1,640 @@
+//===--- Triple.cpp - Target triple helper class --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//
+
+const char *Triple::getArchTypeName(ArchType Kind) {
+  switch (Kind) {
+  case InvalidArch: return "<invalid>";
+  case UnknownArch: return "unknown";
+
+  case alpha:   return "alpha";
+  case arm:     return "arm";
+  case bfin:    return "bfin";
+  case cellspu: return "cellspu";
+  case mips:    return "mips";
+  case mipsel:  return "mipsel";
+  case msp430:  return "msp430";
+  case ppc64:   return "powerpc64";
+  case ppc:     return "powerpc";
+  case sparc:   return "sparc";
+  case sparcv9: return "sparcv9";
+  case systemz: return "s390x";
+  case tce:     return "tce";
+  case thumb:   return "thumb";
+  case x86:     return "i386";
+  case x86_64:  return "x86_64";
+  case xcore:   return "xcore";
+  case mblaze:  return "mblaze";
+  case ptx32:   return "ptx32";
+  case ptx64:   return "ptx64";
+  }
+
+  return "<invalid>";
+}
+
+const char *Triple::getArchTypePrefix(ArchType Kind) {
+  switch (Kind) {
+  default:
+    return 0;
+
+  case alpha:   return "alpha";
+
+  case arm:
+  case thumb:   return "arm";
+
+  case bfin:    return "bfin";
+
+  case cellspu: return "spu";
+
+  case ppc64:
+  case ppc:     return "ppc";
+
+  case mblaze:  return "mblaze";
+
+  case sparcv9:
+  case sparc:   return "sparc";
+
+  case x86:
+  case x86_64:  return "x86";
+
+  case xcore:   return "xcore";
+
+  case ptx32:   return "ptx";
+  case ptx64:   return "ptx";
+  }
+}
+
+const char *Triple::getVendorTypeName(VendorType Kind) {
+  switch (Kind) {
+  case UnknownVendor: return "unknown";
+
+  case Apple: return "apple";
+  case PC: return "pc";
+  case SCEI: return "scei";
+  }
+
+  return "<invalid>";
+}
+
+const char *Triple::getOSTypeName(OSType Kind) {
+  switch (Kind) {
+  case UnknownOS: return "unknown";
+
+  case AuroraUX: return "auroraux";
+  case Cygwin: return "cygwin";
+  case Darwin: return "darwin";
+  case DragonFly: return "dragonfly";
+  case FreeBSD: return "freebsd";
+  case IOS: return "ios";
+  case Linux: return "linux";
+  case Lv2: return "lv2";
+  case MacOSX: return "macosx";
+  case MinGW32: return "mingw32";
+  case NetBSD: return "netbsd";
+  case OpenBSD: return "openbsd";
+  case Psp: return "psp";
+  case Solaris: return "solaris";
+  case Win32: return "win32";
+  case Haiku: return "haiku";
+  case Minix: return "minix";
+  case RTEMS: return "rtems";
+  }
+
+  return "<invalid>";
+}
+
+const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+  switch (Kind) {
+  case UnknownEnvironment: return "unknown";
+  case GNU: return "gnu";
+  case GNUEABI: return "gnueabi";
+  case EABI: return "eabi";
+  case MachO: return "macho";
+  }
+
+  return "<invalid>";
+}
+
+Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
+  if (Name == "alpha")
+    return alpha;
+  if (Name == "arm")
+    return arm;
+  if (Name == "bfin")
+    return bfin;
+  if (Name == "cellspu")
+    return cellspu;
+  if (Name == "mips")
+    return mips;
+  if (Name == "mipsel")
+    return mipsel;
+  if (Name == "msp430")
+    return msp430;
+  if (Name == "ppc64")
+    return ppc64;
+  if (Name == "ppc")
+    return ppc;
+  if (Name == "mblaze")
+    return mblaze;
+  if (Name == "sparc")
+    return sparc;
+  if (Name == "sparcv9")
+    return sparcv9;
+  if (Name == "systemz")
+    return systemz;
+  if (Name == "tce")
+    return tce;
+  if (Name == "thumb")
+    return thumb;
+  if (Name == "x86")
+    return x86;
+  if (Name == "x86-64")
+    return x86_64;
+  if (Name == "xcore")
+    return xcore;
+  if (Name == "ptx32")
+    return ptx32;
+  if (Name == "ptx64")
+    return ptx64;
+
+  return UnknownArch;
+}
+
+Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
+  // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
+  // archs which Darwin doesn't use.
+
+  // The matching this routine does is fairly pointless, since it is neither the
+  // complete architecture list, nor a reasonable subset. The problem is that
+  // historically the driver driver accepts this and also ties its -march=
+  // handling to the architecture name, so we need to be careful before removing
+  // support for it.
+
+  // This code must be kept in sync with Clang's Darwin specific argument
+  // translation.
+
+  if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" ||
+      Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" ||
+      Str == "ppc7450" || Str == "ppc970")
+    return Triple::ppc;
+
+  if (Str == "ppc64")
+    return Triple::ppc64;
+
+  if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" ||
+      Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" ||
+      Str == "pentIIm5" || Str == "pentium4")
+    return Triple::x86;
+
+  if (Str == "x86_64")
+    return Triple::x86_64;
+
+  // This is derived from the driver driver.
+  if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" ||
+      Str == "armv6" || Str == "armv7")
+    return Triple::arm;
+
+  if (Str == "ptx32")
+    return Triple::ptx32;
+  if (Str == "ptx64")
+    return Triple::ptx64;
+
+  return Triple::UnknownArch;
+}
+
+// Returns architecture name that is understood by the target assembler.
+const char *Triple::getArchNameForAssembler() {
+  if (!isOSDarwin() && getVendor() != Triple::Apple)
+    return NULL;
+
+  StringRef Str = getArchName();
+  if (Str == "i386")
+    return "i386";
+  if (Str == "x86_64")
+    return "x86_64";
+  if (Str == "powerpc")
+    return "ppc";
+  if (Str == "powerpc64")
+    return "ppc64";
+  if (Str == "mblaze" || Str == "microblaze")
+    return "mblaze";
+  if (Str == "arm")
+    return "arm";
+  if (Str == "armv4t" || Str == "thumbv4t")
+    return "armv4t";
+  if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5"
+      || Str == "thumbv5e")
+    return "armv5";
+  if (Str == "armv6" || Str == "thumbv6")
+    return "armv6";
+  if (Str == "armv7" || Str == "thumbv7")
+    return "armv7";
+  if (Str == "ptx32")
+    return "ptx32";
+  if (Str == "ptx64")
+    return "ptx64";
+  return NULL;
+}
+
+//
+
+Triple::ArchType Triple::ParseArch(StringRef ArchName) {
+  if (ArchName.size() == 4 && ArchName[0] == 'i' &&
+      ArchName[2] == '8' && ArchName[3] == '6' &&
+      ArchName[1] - '3' < 6) // i[3-9]86
+    return x86;
+  else if (ArchName == "amd64" || ArchName == "x86_64")
+    return x86_64;
+  else if (ArchName == "bfin")
+    return bfin;
+  else if (ArchName == "powerpc")
+    return ppc;
+  else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
+    return ppc64;
+  else if (ArchName == "mblaze")
+    return mblaze;
+  else if (ArchName == "arm" ||
+           ArchName.startswith("armv") ||
+           ArchName == "xscale")
+    return arm;
+  else if (ArchName == "thumb" ||
+           ArchName.startswith("thumbv"))
+    return thumb;
+  else if (ArchName.startswith("alpha"))
+    return alpha;
+  else if (ArchName == "spu" || ArchName == "cellspu")
+    return cellspu;
+  else if (ArchName == "msp430")
+    return msp430;
+  else if (ArchName == "mips" || ArchName == "mipseb" ||
+           ArchName == "mipsallegrex")
+    return mips;
+  else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
+           ArchName == "psp")
+    return mipsel;
+  else if (ArchName == "sparc")
+    return sparc;
+  else if (ArchName == "sparcv9")
+    return sparcv9;
+  else if (ArchName == "s390x")
+    return systemz;
+  else if (ArchName == "tce")
+    return tce;
+  else if (ArchName == "xcore")
+    return xcore;
+  else if (ArchName == "ptx32")
+    return ptx32;
+  else if (ArchName == "ptx64")
+    return ptx64;
+  else
+    return UnknownArch;
+}
+
+Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
+  if (VendorName == "apple")
+    return Apple;
+  else if (VendorName == "pc")
+    return PC;
+  else if (VendorName == "scei")
+    return SCEI;
+  else
+    return UnknownVendor;
+}
+
+Triple::OSType Triple::ParseOS(StringRef OSName) {
+  if (OSName.startswith("auroraux"))
+    return AuroraUX;
+  else if (OSName.startswith("cygwin"))
+    return Cygwin;
+  else if (OSName.startswith("darwin"))
+    return Darwin;
+  else if (OSName.startswith("dragonfly"))
+    return DragonFly;
+  else if (OSName.startswith("freebsd"))
+    return FreeBSD;
+  else if (OSName.startswith("ios"))
+    return IOS;
+  else if (OSName.startswith("linux"))
+    return Linux;
+  else if (OSName.startswith("lv2"))
+    return Lv2;
+  else if (OSName.startswith("macosx"))
+    return MacOSX;
+  else if (OSName.startswith("mingw32"))
+    return MinGW32;
+  else if (OSName.startswith("netbsd"))
+    return NetBSD;
+  else if (OSName.startswith("openbsd"))
+    return OpenBSD;
+  else if (OSName.startswith("psp"))
+    return Psp;
+  else if (OSName.startswith("solaris"))
+    return Solaris;
+  else if (OSName.startswith("win32"))
+    return Win32;
+  else if (OSName.startswith("haiku"))
+    return Haiku;
+  else if (OSName.startswith("minix"))
+    return Minix;
+  else if (OSName.startswith("rtems"))
+    return RTEMS;
+  else
+    return UnknownOS;
+}
+
+Triple::EnvironmentType Triple::ParseEnvironment(StringRef EnvironmentName) {
+  if (EnvironmentName.startswith("eabi"))
+    return EABI;
+  else if (EnvironmentName.startswith("gnueabi"))
+    return GNUEABI;
+  else if (EnvironmentName.startswith("gnu"))
+    return GNU;
+  else if (EnvironmentName.startswith("macho"))
+    return MachO;
+  else
+    return UnknownEnvironment;
+}
+
+void Triple::Parse() const {
+  assert(!isInitialized() && "Invalid parse call.");
+
+  Arch = ParseArch(getArchName());
+  Vendor = ParseVendor(getVendorName());
+  OS = ParseOS(getOSName());
+  Environment = ParseEnvironment(getEnvironmentName());
+
+  assert(isInitialized() && "Failed to initialize!");
+}
+
+std::string Triple::normalize(StringRef Str) {
+  // Parse into components.
+  SmallVector<StringRef, 4> Components;
+  for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) {
+    Last = Str.find('-', First);
+    Components.push_back(Str.slice(First, Last));
+  }
+
+  // If the first component corresponds to a known architecture, preferentially
+  // use it for the architecture.  If the second component corresponds to a
+  // known vendor, preferentially use it for the vendor, etc.  This avoids silly
+  // component movement when a component parses as (eg) both a valid arch and a
+  // valid os.
+  ArchType Arch = UnknownArch;
+  if (Components.size() > 0)
+    Arch = ParseArch(Components[0]);
+  VendorType Vendor = UnknownVendor;
+  if (Components.size() > 1)
+    Vendor = ParseVendor(Components[1]);
+  OSType OS = UnknownOS;
+  if (Components.size() > 2)
+    OS = ParseOS(Components[2]);
+  EnvironmentType Environment = UnknownEnvironment;
+  if (Components.size() > 3)
+    Environment = ParseEnvironment(Components[3]);
+
+  // Note which components are already in their final position.  These will not
+  // be moved.
+  bool Found[4];
+  Found[0] = Arch != UnknownArch;
+  Found[1] = Vendor != UnknownVendor;
+  Found[2] = OS != UnknownOS;
+  Found[3] = Environment != UnknownEnvironment;
+
+  // If they are not there already, permute the components into their canonical
+  // positions by seeing if they parse as a valid architecture, and if so moving
+  // the component to the architecture position etc.
+  for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
+    if (Found[Pos])
+      continue; // Already in the canonical position.
+
+    for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
+      // Do not reparse any components that already matched.
+      if (Idx < array_lengthof(Found) && Found[Idx])
+        continue;
+
+      // Does this component parse as valid for the target position?
+      bool Valid = false;
+      StringRef Comp = Components[Idx];
+      switch (Pos) {
+      default:
+        assert(false && "unexpected component type!");
+      case 0:
+        Arch = ParseArch(Comp);
+        Valid = Arch != UnknownArch;
+        break;
+      case 1:
+        Vendor = ParseVendor(Comp);
+        Valid = Vendor != UnknownVendor;
+        break;
+      case 2:
+        OS = ParseOS(Comp);
+        Valid = OS != UnknownOS;
+        break;
+      case 3:
+        Environment = ParseEnvironment(Comp);
+        Valid = Environment != UnknownEnvironment;
+        break;
+      }
+      if (!Valid)
+        continue; // Nope, try the next component.
+
+      // Move the component to the target position, pushing any non-fixed
+      // components that are in the way to the right.  This tends to give
+      // good results in the common cases of a forgotten vendor component
+      // or a wrongly positioned environment.
+      if (Pos < Idx) {
+        // Insert left, pushing the existing components to the right.  For
+        // example, a-b-i386 -> i386-a-b when moving i386 to the front.
+        StringRef CurrentComponent(""); // The empty component.
+        // Replace the component we are moving with an empty component.
+        std::swap(CurrentComponent, Components[Idx]);
+        // Insert the component being moved at Pos, displacing any existing
+        // components to the right.
+        for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
+          // Skip over any fixed components.
+          while (i < array_lengthof(Found) && Found[i]) ++i;
+          // Place the component at the new position, getting the component
+          // that was at this position - it will be moved right.
+          std::swap(CurrentComponent, Components[i]);
+        }
+      } else if (Pos > Idx) {
+        // Push right by inserting empty components until the component at Idx
+        // reaches the target position Pos.  For example, pc-a -> -pc-a when
+        // moving pc to the second position.
+        do {
+          // Insert one empty component at Idx.
+          StringRef CurrentComponent(""); // The empty component.
+          for (unsigned i = Idx; i < Components.size();) {
+            // Place the component at the new position, getting the component
+            // that was at this position - it will be moved right.
+            std::swap(CurrentComponent, Components[i]);
+            // If it was placed on top of an empty component then we are done.
+            if (CurrentComponent.empty())
+              break;
+            // Advance to the next component, skipping any fixed components.
+            while (++i < array_lengthof(Found) && Found[i])
+              ;
+          }
+          // The last component was pushed off the end - append it.
+          if (!CurrentComponent.empty())
+            Components.push_back(CurrentComponent);
+
+          // Advance Idx to the component's new position.
+          while (++Idx < array_lengthof(Found) && Found[Idx]) {}
+        } while (Idx < Pos); // Add more until the final position is reached.
+      }
+      assert(Pos < Components.size() && Components[Pos] == Comp &&
+             "Component moved wrong!");
+      Found[Pos] = true;
+      break;
+    }
+  }
+
+  // Special case logic goes here.  At this point Arch, Vendor and OS have the
+  // correct values for the computed components.
+
+  // Stick the corrected components back together to form the normalized string.
+  std::string Normalized;
+  for (unsigned i = 0, e = Components.size(); i != e; ++i) {
+    if (i) Normalized += '-';
+    Normalized += Components[i];
+  }
+  return Normalized;
+}
+
+StringRef Triple::getArchName() const {
+  return StringRef(Data).split('-').first;           // Isolate first component
+}
+
+StringRef Triple::getVendorName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  return Tmp.split('-').first;                       // Isolate second component
+}
+
+StringRef Triple::getOSName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  Tmp = Tmp.split('-').second;                       // Strip second component
+  return Tmp.split('-').first;                       // Isolate third component
+}
+
+StringRef Triple::getEnvironmentName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  Tmp = Tmp.split('-').second;                       // Strip second component
+  return Tmp.split('-').second;                      // Strip third component
+}
+
+StringRef Triple::getOSAndEnvironmentName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  return Tmp.split('-').second;                      // Strip second component
+}
+
+static unsigned EatNumber(StringRef &Str) {
+  assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
+  unsigned Result = 0;
+
+  do {
+    // Consume the leading digit.
+    Result = Result*10 + (Str[0] - '0');
+
+    // Eat the digit.
+    Str = Str.substr(1);
+  } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9');
+
+  return Result;
+}
+
+void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
+                          unsigned &Micro) const {
+  StringRef OSName = getOSName();
+
+  // Assume that the OS portion of the triple starts with the canonical name.
+  StringRef OSTypeName = getOSTypeName(getOS());
+  if (OSName.startswith(OSTypeName))
+    OSName = OSName.substr(OSTypeName.size());
+
+  // Any unset version defaults to 0.
+  Major = Minor = Micro = 0;
+
+  // Parse up to three components.
+  unsigned *Components[3] = { &Major, &Minor, &Micro };
+  for (unsigned i = 0; i != 3; ++i) {
+    if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+      break;
+
+    // Consume the leading number.
+    *Components[i] = EatNumber(OSName);
+
+    // Consume the separator, if present.
+    if (OSName.startswith("."))
+      OSName = OSName.substr(1);
+  }
+}
+
+void Triple::setTriple(const Twine &Str) {
+  Data = Str.str();
+  Arch = InvalidArch;
+}
+
+void Triple::setArch(ArchType Kind) {
+  setArchName(getArchTypeName(Kind));
+}
+
+void Triple::setVendor(VendorType Kind) {
+  setVendorName(getVendorTypeName(Kind));
+}
+
+void Triple::setOS(OSType Kind) {
+  setOSName(getOSTypeName(Kind));
+}
+
+void Triple::setEnvironment(EnvironmentType Kind) {
+  setEnvironmentName(getEnvironmentTypeName(Kind));
+}
+
+void Triple::setArchName(StringRef Str) {
+  // Work around a miscompilation bug for Twines in gcc 4.0.3.
+  SmallString<64> Triple;
+  Triple += Str;
+  Triple += "-";
+  Triple += getVendorName();
+  Triple += "-";
+  Triple += getOSAndEnvironmentName();
+  setTriple(Triple.str());
+}
+
+void Triple::setVendorName(StringRef Str) {
+  setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
+}
+
+void Triple::setOSName(StringRef Str) {
+  if (hasEnvironment())
+    setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
+              "-" + getEnvironmentName());
+  else
+    setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
+
+void Triple::setEnvironmentName(StringRef Str) {
+  setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
+            "-" + Str);
+}
+
+void Triple::setOSAndEnvironmentName(StringRef Str) {
+  setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
diff --git a/contrib/llvm/lib/Support/Twine.cpp b/contrib/llvm/lib/Support/Twine.cpp
new file mode 100644
index 000000000000..d62123cc985e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Twine.cpp
@@ -0,0 +1,165 @@
+//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+std::string Twine::str() const {
+  // If we're storing only a std::string, just return it.
+  if (LHSKind == StdStringKind && RHSKind == EmptyKind)
+    return *static_cast<const std::string*>(LHS);
+
+  // Otherwise, flatten and copy the contents first.
+  SmallString<256> Vec;
+  return toStringRef(Vec).str();
+}
+
+void Twine::toVector(SmallVectorImpl<char> &Out) const {
+  raw_svector_ostream OS(Out);
+  print(OS);
+}
+
+StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
+  if (isSingleStringRef())
+    return getSingleStringRef();
+  toVector(Out);
+  return StringRef(Out.data(), Out.size());
+}
+
+StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
+  if (isUnary()) {
+    switch (getLHSKind()) {
+    case CStringKind:
+      // Already null terminated, yay!
+      return StringRef(static_cast<const char*>(LHS));
+    case StdStringKind: {
+      const std::string *str = static_cast<const std::string*>(LHS);
+      return StringRef(str->c_str(), str->size());
+    }
+    default:
+      break;
+    }
+  }
+  toVector(Out);
+  Out.push_back(0);
+  Out.pop_back();
+  return StringRef(Out.data(), Out.size());
+}
+
+void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
+                          NodeKind Kind) const {
+  switch (Kind) {
+  case Twine::NullKind: break;
+  case Twine::EmptyKind: break;
+  case Twine::TwineKind:
+    static_cast<const Twine*>(Ptr)->print(OS);
+    break;
+  case Twine::CStringKind:
+    OS << static_cast<const char*>(Ptr);
+    break;
+  case Twine::StdStringKind:
+    OS << *static_cast<const std::string*>(Ptr);
+    break;
+  case Twine::StringRefKind:
+    OS << *static_cast<const StringRef*>(Ptr);
+    break;
+  case Twine::DecUIKind:
+    OS << (unsigned)(uintptr_t)Ptr;
+    break;
+  case Twine::DecIKind:
+    OS << (int)(intptr_t)Ptr;
+    break;
+  case Twine::DecULKind:
+    OS << *static_cast<const unsigned long*>(Ptr);
+    break;
+  case Twine::DecLKind:
+    OS << *static_cast<const long*>(Ptr);
+    break;
+  case Twine::DecULLKind:
+    OS << *static_cast<const unsigned long long*>(Ptr);
+    break;
+  case Twine::DecLLKind:
+    OS << *static_cast<const long long*>(Ptr);
+    break;
+  case Twine::UHexKind:
+    OS.write_hex(*static_cast<const uint64_t*>(Ptr));
+    break;
+  }
+}
+
+void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
+                              NodeKind Kind) const {
+  switch (Kind) {
+  case Twine::NullKind:
+    OS << "null"; break;
+  case Twine::EmptyKind:
+    OS << "empty"; break;
+  case Twine::TwineKind:
+    OS << "rope:";
+    static_cast<const Twine*>(Ptr)->printRepr(OS);
+    break;
+  case Twine::CStringKind:
+    OS << "cstring:\""
+       << static_cast<const char*>(Ptr) << "\"";
+    break;
+  case Twine::StdStringKind:
+    OS << "std::string:\""
+       << static_cast<const std::string*>(Ptr) << "\"";
+    break;
+  case Twine::StringRefKind:
+    OS << "stringref:\""
+       << static_cast<const StringRef*>(Ptr) << "\"";
+    break;
+  case Twine::DecUIKind:
+    OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\"";
+    break;
+  case Twine::DecIKind:
+    OS << "decI:\"" << (int)(intptr_t)Ptr << "\"";
+    break;
+  case Twine::DecULKind:
+    OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\"";
+    break;
+  case Twine::DecLKind:
+    OS << "decL:\"" << *static_cast<const long*>(Ptr) << "\"";
+    break;
+  case Twine::DecULLKind:
+    OS << "decULL:\"" << *static_cast<const unsigned long long*>(Ptr) << "\"";
+    break;
+  case Twine::DecLLKind:
+    OS << "decLL:\"" << *static_cast<const long long*>(Ptr) << "\"";
+    break;
+  case Twine::UHexKind:
+    OS << "uhex:\"" << static_cast<const uint64_t*>(Ptr) << "\"";
+    break;
+  }
+}
+
+void Twine::print(raw_ostream &OS) const {
+  printOneChild(OS, LHS, getLHSKind());
+  printOneChild(OS, RHS, getRHSKind());
+}
+
+void Twine::printRepr(raw_ostream &OS) const {
+  OS << "(Twine ";
+  printOneChildRepr(OS, LHS, getLHSKind());
+  OS << " ";
+  printOneChildRepr(OS, RHS, getRHSKind());
+  OS << ")";
+}
+
+void Twine::dump() const {
+  print(llvm::dbgs());
+}
+
+void Twine::dumpRepr() const {
+  printRepr(llvm::dbgs());
+}
diff --git a/contrib/llvm/lib/Support/Unix/Host.inc b/contrib/llvm/lib/Support/Unix/Host.inc
new file mode 100644
index 000000000000..eacb08b3fb00
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Host.inc
@@ -0,0 +1,69 @@
+ //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX Host support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
+#include "Unix.h"
+#include <sys/utsname.h>
+#include <cctype>
+#include <string>
+
+using namespace llvm;
+
+static std::string getOSVersion() {
+  struct utsname info;
+
+  if (uname(&info))
+    return "";
+
+  return info.release;
+}
+
+std::string sys::getHostTriple() {
+#ifdef __FreeBSD__
+  return LLVM_HOSTTRIPLE;
+#else
+  // FIXME: Derive directly instead of relying on the autoconf generated
+  // variable.
+
+  StringRef HostTripleString(LLVM_HOSTTRIPLE);
+  std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
+
+  // Normalize the arch, since the host triple may not actually match the host.
+  std::string Arch = ArchSplit.first;
+
+  std::string Triple(Arch);
+  Triple += '-';
+  Triple += ArchSplit.second;
+
+  // Force i<N>86 to i386.
+  if (Triple[0] == 'i' && isdigit(Triple[1]) &&
+      Triple[2] == '8' && Triple[3] == '6')
+    Triple[1] = '3';
+
+  // On darwin, we want to update the version to match that of the
+  // host.
+  std::string::size_type DarwinDashIdx = Triple.find("-darwin");
+  if (DarwinDashIdx != std::string::npos) {
+    Triple.resize(DarwinDashIdx + strlen("-darwin"));
+    Triple += getOSVersion();
+  }
+
+  return Triple;
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
new file mode 100644
index 000000000000..5a57a2870636
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -0,0 +1,151 @@
+//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some functions for various memory management utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#endif
+
+/// AllocateRWX - Allocate a slab of memory with read/write/execute
+/// permissions.  This is typically used for JIT applications where we want
+/// to emit code to the memory then jump to it.  Getting this type of memory
+/// is very OS specific.
+///
+llvm::sys::MemoryBlock
+llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
+                               std::string *ErrMsg) {
+  if (NumBytes == 0) return MemoryBlock();
+
+  size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+  int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+  static int zero_fd = open("/dev/zero", O_RDWR);
+  if (zero_fd == -1) {
+    MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
+    return MemoryBlock();
+  }
+  fd = zero_fd;
+#endif
+
+  int flags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+  MAP_ANONYMOUS
+#else
+  MAP_ANON
+#endif
+  ;
+
+  void* start = NearBlock ? (unsigned char*)NearBlock->base() +
+                            NearBlock->size() : 0;
+
+#if defined(__APPLE__) && defined(__arm__)
+  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
+                    flags, fd, 0);
+#else
+  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
+                    flags, fd, 0);
+#endif
+  if (pa == MAP_FAILED) {
+    if (NearBlock) //Try again without a near hint
+      return AllocateRWX(NumBytes, 0);
+
+    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
+    return MemoryBlock();
+  }
+
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+                                (vm_size_t)(pageSize*NumPages), 0,
+                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  if (KERN_SUCCESS != kr) {
+    MakeErrMsg(ErrMsg, "vm_protect max RX failed");
+    return sys::MemoryBlock();
+  }
+
+  kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+                  (vm_size_t)(pageSize*NumPages), 0,
+                  VM_PROT_READ | VM_PROT_WRITE);
+  if (KERN_SUCCESS != kr) {
+    MakeErrMsg(ErrMsg, "vm_protect RW failed");
+    return sys::MemoryBlock();
+  }
+#endif
+
+  MemoryBlock result;
+  result.Address = pa;
+  result.Size = NumPages*pageSize;
+
+  return result;
+}
+
+bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+  if (M.Address == 0 || M.Size == 0) return false;
+  if (0 != ::munmap(M.Address, M.Size))
+    return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
+  return false;
+}
+
+bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+  if (M.Address == 0 || M.Size == 0) return false;
+  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
+
+bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+  if (M.Address == 0 || M.Size == 0) return false;
+  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+                                (vm_size_t)Size, 0,
+                                VM_PROT_READ | VM_PROT_WRITE);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+                                (vm_size_t)Size, 0,
+                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Unix/Mutex.inc b/contrib/llvm/lib/Support/Unix/Mutex.inc
new file mode 100644
index 000000000000..fe6b17041457
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Mutex.inc
@@ -0,0 +1,43 @@
+//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm
+{
+using namespace sys;
+
+MutexImpl::MutexImpl( bool recursive)
+{
+}
+
+MutexImpl::~MutexImpl()
+{
+}
+
+bool
+MutexImpl::release()
+{
+  return true;
+}
+
+bool
+MutexImpl::tryacquire( void )
+{
+  return true;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
new file mode 100644
index 000000000000..f295b92e4a5b
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -0,0 +1,890 @@
+//===- llvm/Support/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_UTIME_H
+#include <utime.h>
+#endif
+#if HAVE_TIME_H
+#include <time.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
+// Put in a hack for Cygwin which falsely reports that the mkdtemp function
+// is available when it is not.
+#ifdef __CYGWIN__
+# undef HAVE_MKDTEMP
+#endif
+
+namespace {
+inline bool lastIsSlash(const std::string& path) {
+  return !path.empty() && path[path.length() - 1] == '/';
+}
+
+}
+
+namespace llvm {
+using namespace sys;
+
+const char sys::PathSeparator = ':';
+
+StringRef Path::GetEXESuffix() {
+  return StringRef();
+}
+
+Path::Path(StringRef p)
+  : path(p) {}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+  : path(StrStart, StrLen) {}
+
+Path&
+Path::operator=(StringRef that) {
+  path.assign(that.data(), that.size());
+  return *this;
+}
+
+bool
+Path::isValid() const {
+  // Empty paths are considered invalid here.
+  // This code doesn't check MAXPATHLEN because there's no need. Nothing in
+  // LLVM manipulates Paths with fixed-sizes arrays, and if the OS can't
+  // handle names longer than some limit, it'll report this on demand using
+  // ENAMETOLONG.
+  return !path.empty();
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  if (NameLen == 0)
+    return false;
+  return NameStart[0] == '/';
+}
+
+bool
+Path::isAbsolute() const {
+  if (path.empty())
+    return false;
+  return path[0] == '/';
+}
+
+Path
+Path::GetRootDirectory() {
+  Path result;
+  result.set("/");
+  return result;
+}
+
+Path
+Path::GetTemporaryDirectory(std::string *ErrMsg) {
+#if defined(HAVE_MKDTEMP)
+  // The best way is with mkdtemp but that's not available on many systems,
+  // Linux and FreeBSD have it. Others probably won't.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  if (0 == mkdtemp(pathname)) {
+    MakeErrMsg(ErrMsg,
+               std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#elif defined(HAVE_MKSTEMP)
+  // If no mkdtemp is available, mkstemp can be used to create a temporary file
+  // which is then removed and created as a directory. We prefer this over
+  // mktemp because of mktemp's inherent security and threading risks. We still
+  // have a slight race condition from the time the temporary file is created to
+  // the time it is re-created as a directoy.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  int fd = 0;
+  if (-1 == (fd = mkstemp(pathname))) {
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  ::close(fd);
+  ::unlink(pathname); // start race condition, ignore errors
+  if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#elif defined(HAVE_MKTEMP)
+  // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
+  // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
+  // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
+  // the XXXXXX with the pid of the process and a letter. That leads to only
+  // twenty six temporary files that can be generated.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  char *TmpName = ::mktemp(pathname);
+  if (TmpName == 0) {
+    MakeErrMsg(ErrMsg,
+      std::string(TmpName) + ": can't create unique directory name");
+    return Path();
+  }
+  if (-1 == ::mkdir(TmpName, S_IRWXU)) {
+    MakeErrMsg(ErrMsg,
+        std::string(TmpName) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(TmpName);
+#else
+  // This is the worst case implementation. tempnam(3) leaks memory unless its
+  // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
+  // issues. The mktemp(3) function doesn't have enough variability in the
+  // temporary name generated. So, we provide our own implementation that
+  // increments an integer from a random number seeded by the current time. This
+  // should be sufficiently unique that we don't have many collisions between
+  // processes. Generally LLVM processes don't run very long and don't use very
+  // many temporary files so this shouldn't be a big issue for LLVM.
+  static time_t num = ::time(0);
+  char pathname[MAXPATHLEN];
+  do {
+    num++;
+    sprintf(pathname, "/tmp/llvm_%010u", unsigned(num));
+  } while ( 0 == access(pathname, F_OK ) );
+  if (-1 == ::mkdir(pathname, S_IRWXU)) {
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#endif
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+#ifdef LTDL_SHLIBPATH_VAR
+  char* env_var = getenv(LTDL_SHLIBPATH_VAR);
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#endif
+  // FIXME: Should this look at LD_LIBRARY_PATH too?
+  Paths.push_back(sys::Path("/usr/local/lib/"));
+  Paths.push_back(sys::Path("/usr/X11R6/lib/"));
+  Paths.push_back(sys::Path("/usr/lib/"));
+  Paths.push_back(sys::Path("/lib/"));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#ifdef LLVM_LIBDIR
+  {
+    Path tmpPath;
+    if (tmpPath.set(LLVM_LIBDIR))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+  }
+#endif
+  GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+  return Path("/etc/llvm/");
+}
+
+Path
+Path::GetUserHomeDirectory() {
+  const char* home = getenv("HOME");
+  Path result;
+  if (home && result.set(home))
+    return result;
+  result.set("/");
+  return result;
+}
+
+Path
+Path::GetCurrentDirectory() {
+  char pathname[MAXPATHLEN];
+  if (!getcwd(pathname,MAXPATHLEN)) {
+    assert (false && "Could not query current working directory.");
+    return Path();
+  }
+
+  return Path(pathname);
+}
+
+#if defined(__FreeBSD__) || defined (__NetBSD__) || \
+    defined(__OpenBSD__) || defined(__minix)
+static int
+test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
+    const char *dir, const char *bin)
+{
+  struct stat sb;
+
+  snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
+  if (realpath(buf, ret) == NULL)
+    return (1);
+  if (stat(buf, &sb) != 0)
+    return (1);
+
+  return (0);
+}
+
+static char *
+getprogpath(char ret[PATH_MAX], const char *bin)
+{
+  char *pv, *s, *t, buf[PATH_MAX];
+
+  /* First approach: absolute path. */
+  if (bin[0] == '/') {
+    if (test_dir(buf, ret, "/", bin) == 0)
+      return (ret);
+    return (NULL);
+  }
+
+  /* Second approach: relative path. */
+  if (strchr(bin, '/') != NULL) {
+    if (getcwd(buf, PATH_MAX) == NULL)
+      return (NULL);
+    if (test_dir(buf, ret, buf, bin) == 0)
+      return (ret);
+    return (NULL);
+  }
+
+  /* Third approach: $PATH */
+  if ((pv = getenv("PATH")) == NULL)
+    return (NULL);
+  s = pv = strdup(pv);
+  if (pv == NULL)
+    return (NULL);
+  while ((t = strsep(&s, ":")) != NULL) {
+    if (test_dir(buf, ret, t, bin) == 0) {
+      free(pv);
+      return (ret);
+    }
+  }
+  free(pv);
+  return (NULL);
+}
+#endif // __FreeBSD__ || __NetBSD__
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+#if defined(__APPLE__)
+  // On OS X the executable path is saved to the stack by dyld. Reading it
+  // from there is much faster than calling dladdr, especially for large
+  // binaries with symbols.
+  char exe_path[MAXPATHLEN];
+  uint32_t size = sizeof(exe_path);
+  if (_NSGetExecutablePath(exe_path, &size) == 0) {
+    char link_path[MAXPATHLEN];
+    if (realpath(exe_path, link_path))
+      return Path(link_path);
+  }
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
+      defined(__OpenBSD__) || defined(__minix)
+  char exe_path[PATH_MAX];
+
+  if (getprogpath(exe_path, argv0) != NULL)
+    return Path(exe_path);
+#elif defined(__linux__) || defined(__CYGWIN__)
+  char exe_path[MAXPATHLEN];
+  ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
+  if (len >= 0)
+    return Path(StringRef(exe_path, len));
+#elif defined(HAVE_DLFCN_H)
+  // Use dladdr to get executable path if available.
+  Dl_info DLInfo;
+  int err = dladdr(MainAddr, &DLInfo);
+  if (err == 0)
+    return Path();
+
+  // If the filename is a symlink, we need to resolve and return the location of
+  // the actual executable.
+  char link_path[MAXPATHLEN];
+  if (realpath(DLInfo.dli_fname, link_path))
+    return Path(link_path);
+#else
+#error GetMainExecutable is not implemented on this host yet.
+#endif
+  return Path();
+}
+
+
+StringRef Path::getDirname() const {
+  return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+  // Find the last slash
+  std::string::size_type slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  std::string::size_type dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef(path).substr(slash);
+  else
+    return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+  // Find the last slash
+  std::string::size_type slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  std::string::size_type dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef();
+  else
+    return StringRef(path).substr(dot + 1);
+}
+
+bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
+  assert(len < 1024 && "Request for magic string too long");
+  char Buf[1025];
+  int fd = ::open(path.c_str(), O_RDONLY);
+  if (fd < 0)
+    return false;
+  ssize_t bytes_read = ::read(fd, Buf, len);
+  ::close(fd);
+  if (ssize_t(len) != bytes_read)
+    return false;
+  Magic.assign(Buf, len);
+  return true;
+}
+
+bool
+Path::exists() const {
+  return 0 == access(path.c_str(), F_OK );
+}
+
+bool
+Path::isDirectory() const {
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+  return ((buf.st_mode & S_IFMT) == S_IFDIR) ? true : false;
+}
+
+bool
+Path::isSymLink() const {
+  struct stat buf;
+  if (0 != lstat(path.c_str(), &buf))
+    return false;
+  return S_ISLNK(buf.st_mode);
+}
+
+
+bool
+Path::canRead() const {
+  return 0 == access(path.c_str(), R_OK);
+}
+
+bool
+Path::canWrite() const {
+  return 0 == access(path.c_str(), W_OK);
+}
+
+bool
+Path::isRegularFile() const {
+  // Get the status so we can determine if it's a file or directory
+  struct stat buf;
+
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+
+  if (S_ISREG(buf.st_mode))
+    return true;
+
+  return false;
+}
+
+bool
+Path::canExecute() const {
+  if (0 != access(path.c_str(), R_OK | X_OK ))
+    return false;
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+  if (!S_ISREG(buf.st_mode))
+    return false;
+  return true;
+}
+
+StringRef
+Path::getLast() const {
+  // Find the last slash
+  size_t pos = path.rfind('/');
+
+  // Handle the corner cases
+  if (pos == std::string::npos)
+    return path;
+
+  // If the last character is a slash
+  if (pos == path.length()-1) {
+    // Find the second to last slash
+    size_t pos2 = path.rfind('/', pos-1);
+    if (pos2 == std::string::npos)
+      return StringRef(path).substr(0,pos);
+    else
+      return StringRef(path).substr(pos2+1,pos-pos2-1);
+  }
+  // Return everything after the last slash
+  return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+  if (!fsIsValid || update) {
+    struct stat buf;
+    if (0 != stat(path.c_str(), &buf)) {
+      MakeErrMsg(ErrStr, path + ": can't get status of file");
+      return 0;
+    }
+    status.fileSize = buf.st_size;
+    status.modTime.fromEpochTime(buf.st_mtime);
+    status.mode = buf.st_mode;
+    status.user = buf.st_uid;
+    status.group = buf.st_gid;
+    status.uniqueID = uint64_t(buf.st_ino);
+    status.isDir  = S_ISDIR(buf.st_mode);
+    status.isFile = S_ISREG(buf.st_mode);
+    fsIsValid = true;
+  }
+  return &status;
+}
+
+static bool AddPermissionBits(const Path &File, int bits) {
+  // Get the umask value from the operating system.  We want to use it
+  // when changing the file's permissions. Since calling umask() sets
+  // the umask and returns its old value, we must call it a second
+  // time to reset it to the user's preference.
+  int mask = umask(0777); // The arg. to umask is arbitrary.
+  umask(mask);            // Restore the umask.
+
+  // Get the file's current mode.
+  struct stat buf;
+  if (0 != stat(File.c_str(), &buf))
+    return false;
+  // Change the file to have whichever permissions bits from 'bits'
+  // that the umask would not disable.
+  if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
+      return false;
+  return true;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0444))
+    return MakeErrMsg(ErrMsg, path + ": can't make file readable");
+  return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0222))
+    return MakeErrMsg(ErrMsg, path + ": can't make file writable");
+  return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0111))
+    return MakeErrMsg(ErrMsg, path + ": can't make file executable");
+  return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+  DIR* direntries = ::opendir(path.c_str());
+  if (direntries == 0)
+    return MakeErrMsg(ErrMsg, path + ": can't open directory");
+
+  std::string dirPath = path;
+  if (!lastIsSlash(dirPath))
+    dirPath += '/';
+
+  result.clear();
+  struct dirent* de = ::readdir(direntries);
+  for ( ; de != 0; de = ::readdir(direntries)) {
+    if (de->d_name[0] != '.') {
+      Path aPath(dirPath + (const char*)de->d_name);
+      struct stat st;
+      if (0 != lstat(aPath.path.c_str(), &st)) {
+        if (S_ISLNK(st.st_mode))
+          continue; // dangling symlink -- ignore
+        return MakeErrMsg(ErrMsg,
+                          aPath.path +  ": can't determine file object type");
+      }
+      result.insert(aPath);
+    }
+  }
+
+  closedir(direntries);
+  return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+  if (a_path.empty())
+    return false;
+  path = a_path;
+  return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+  if (name.empty())
+    return false;
+  if (!lastIsSlash(path))
+    path += '/';
+  path += name;
+  return true;
+}
+
+bool
+Path::eraseComponent() {
+  size_t slashpos = path.rfind('/',path.size());
+  if (slashpos == 0 || slashpos == std::string::npos) {
+    path.erase();
+    return true;
+  }
+  if (slashpos == path.size() - 1)
+    slashpos = path.rfind('/',slashpos-1);
+  if (slashpos == std::string::npos) {
+    path.erase();
+    return true;
+  }
+  path.erase(slashpos);
+  return true;
+}
+
+bool
+Path::eraseSuffix() {
+  size_t dotpos = path.rfind('.',path.size());
+  size_t slashpos = path.rfind('/',path.size());
+  if (dotpos != std::string::npos) {
+    if (slashpos == std::string::npos || dotpos > slashpos+1) {
+      path.erase(dotpos, path.size()-dotpos);
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
+
+  if (access(beg, R_OK | W_OK) == 0)
+    return false;
+
+  if (create_parents) {
+
+    char* c = end;
+
+    for (; c != beg; --c)
+      if (*c == '/') {
+
+        // Recurse to handling the parent directory.
+        *c = '\0';
+        bool x = createDirectoryHelper(beg, c, create_parents);
+        *c = '/';
+
+        // Return if we encountered an error.
+        if (x)
+          return true;
+
+        break;
+      }
+  }
+
+  return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+}
+
+bool
+Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
+  // Get a writeable copy of the path name
+  std::string pathname(path);
+
+  // Null-terminate the last component
+  size_t lastchar = path.length() - 1 ;
+
+  if (pathname[lastchar] != '/')
+    ++lastchar;
+
+  pathname[lastchar] = '\0';
+
+  if (createDirectoryHelper(&pathname[0], &pathname[lastchar], create_parents))
+    return MakeErrMsg(ErrMsg, pathname + ": can't create directory");
+
+  return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+  // Create the file
+  int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
+  if (fd < 0)
+    return MakeErrMsg(ErrMsg, path + ": can't create file");
+  ::close(fd);
+  return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+  // Make this into a unique file name
+  if (makeUnique( reuse_current, ErrMsg ))
+    return true;
+
+  // create the file
+  int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
+  if (fd < 0)
+    return MakeErrMsg(ErrMsg, path + ": can't create temporary file");
+  ::close(fd);
+  return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+  // Get the status so we can determine if it's a file or directory.
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf)) {
+    MakeErrMsg(ErrStr, path + ": can't get status of file");
+    return true;
+  }
+
+  // Note: this check catches strange situations. In all cases, LLVM should
+  // only be involved in the creation and deletion of regular files.  This
+  // check ensures that what we're trying to erase is a regular file. It
+  // effectively prevents LLVM from erasing things like /dev/null, any block
+  // special file, or other things that aren't "regular" files.
+  if (S_ISREG(buf.st_mode)) {
+    if (unlink(path.c_str()) != 0)
+      return MakeErrMsg(ErrStr, path + ": can't destroy file");
+    return false;
+  }
+
+  if (!S_ISDIR(buf.st_mode)) {
+    if (ErrStr) *ErrStr = "not a file or directory";
+    return true;
+  }
+
+  if (remove_contents) {
+    // Recursively descend the directory to remove its contents.
+    std::string cmd = "/bin/rm -rf " + path;
+    if (system(cmd.c_str()) != 0) {
+      MakeErrMsg(ErrStr, path + ": failed to recursively remove directory.");
+      return true;
+    }
+    return false;
+  }
+
+  // Otherwise, try to just remove the one directory.
+  std::string pathname(path);
+  size_t lastchar = path.length() - 1;
+  if (pathname[lastchar] == '/')
+    pathname[lastchar] = '\0';
+  else
+    pathname[lastchar+1] = '\0';
+
+  if (rmdir(pathname.c_str()) != 0)
+    return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
+  return false;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+  if (0 != ::rename(path.c_str(), newName.c_str()))
+    return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
+               newName.str() + "'");
+  return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+  struct utimbuf utb;
+  utb.actime = si.modTime.toPosixTime();
+  utb.modtime = utb.actime;
+  if (0 != ::utime(path.c_str(),&utb))
+    return MakeErrMsg(ErrStr, path + ": can't set file modification time");
+  if (0 != ::chmod(path.c_str(),si.mode))
+    return MakeErrMsg(ErrStr, path + ": can't set mode");
+  return false;
+}
+
+bool
+sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
+  int inFile = -1;
+  int outFile = -1;
+  inFile = ::open(Src.c_str(), O_RDONLY);
+  if (inFile == -1)
+    return MakeErrMsg(ErrMsg, Src.str() +
+      ": can't open source file to copy");
+
+  outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
+  if (outFile == -1) {
+    ::close(inFile);
+    return MakeErrMsg(ErrMsg, Dest.str() +
+      ": can't create destination file for copy");
+  }
+
+  char Buffer[16*1024];
+  while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) {
+    if (Amt == -1) {
+      if (errno != EINTR && errno != EAGAIN) {
+        ::close(inFile);
+        ::close(outFile);
+        return MakeErrMsg(ErrMsg, Src.str()+": can't read source file");
+      }
+    } else {
+      char *BufPtr = Buffer;
+      while (Amt) {
+        ssize_t AmtWritten = ::write(outFile, BufPtr, Amt);
+        if (AmtWritten == -1) {
+          if (errno != EINTR && errno != EAGAIN) {
+            ::close(inFile);
+            ::close(outFile);
+            return MakeErrMsg(ErrMsg, Dest.str() +
+              ": can't write destination file");
+          }
+        } else {
+          Amt -= AmtWritten;
+          BufPtr += AmtWritten;
+        }
+      }
+    }
+  }
+  ::close(inFile);
+  ::close(outFile);
+  return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+  bool Exists;
+  if (reuse_current && (fs::exists(path, Exists) || !Exists))
+    return false; // File doesn't exist already, just use it!
+
+  // Append an XXXXXX pattern to the end of the file for use with mkstemp,
+  // mktemp or our own implementation.
+  // This uses std::vector instead of SmallVector to avoid a dependence on
+  // libSupport. And performance isn't critical here.
+  std::vector<char> Buf;
+  Buf.resize(path.size()+8);
+  char *FNBuffer = &Buf[0];
+    path.copy(FNBuffer,path.size());
+  bool isdir;
+  if (!fs::is_directory(path, isdir) && isdir)
+    strcpy(FNBuffer+path.size(), "/XXXXXX");
+  else
+    strcpy(FNBuffer+path.size(), "-XXXXXX");
+
+#if defined(HAVE_MKSTEMP)
+  int TempFD;
+  if ((TempFD = mkstemp(FNBuffer)) == -1)
+    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+  // We don't need to hold the temp file descriptor... we will trust that no one
+  // will overwrite/delete the file before we can open it again.
+  close(TempFD);
+
+  // Save the name
+  path = FNBuffer;
+
+  // By default mkstemp sets the mode to 0600, so update mode bits now.
+  AddPermissionBits (*this, 0666);
+#elif defined(HAVE_MKTEMP)
+  // If we don't have mkstemp, use the old and obsolete mktemp function.
+  if (mktemp(FNBuffer) == 0)
+    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+  // Save the name
+  path = FNBuffer;
+#else
+  // Okay, looks like we have to do it all by our lonesome.
+  static unsigned FCounter = 0;
+  // Try to initialize with unique value.
+  if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8;
+  char* pos = strstr(FNBuffer, "XXXXXX");
+  do {
+    if (++FCounter > 0xFFFFFF) {
+      return MakeErrMsg(ErrMsg,
+        path + ": can't make unique filename: too many files");
+    }
+    sprintf(pos, "%06X", FCounter);
+    path = FNBuffer;
+  } while (exists());
+  // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit
+  // LLVM.
+#endif
+  return false;
+}
+
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
+  int Flags = MAP_PRIVATE;
+#ifdef MAP_FILE
+  Flags |= MAP_FILE;
+#endif
+  void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, Offset);
+  if (BasePtr == MAP_FAILED)
+    return 0;
+  return (const char*)BasePtr;
+}
+
+void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) {
+  ::munmap((void*)BasePtr, FileSize);
+}
+
+} // end llvm namespace
diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc
new file mode 100644
index 000000000000..03ff28367e44
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/PathV2.inc
@@ -0,0 +1,507 @@
+//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+#if HAVE_STDIO_H
+#include <stdio.h>
+#endif
+
+using namespace llvm;
+
+namespace {
+  /// This class automatically closes the given file descriptor when it goes out
+  /// of scope. You can take back explicit ownership of the file descriptor by
+  /// calling take(). The destructor does not verify that close was successful.
+  /// Therefore, never allow this class to call close on a file descriptor that
+  /// has been read from or written to.
+  struct AutoFD {
+    int FileDescriptor;
+
+    AutoFD(int fd) : FileDescriptor(fd) {}
+    ~AutoFD() {
+      if (FileDescriptor >= 0)
+        ::close(FileDescriptor);
+    }
+
+    int take() {
+      int ret = FileDescriptor;
+      FileDescriptor = -1;
+      return ret;
+    }
+
+    operator int() const {return FileDescriptor;}
+  };
+
+  error_code TempDir(SmallVectorImpl<char> &result) {
+    // FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
+    const char *dir = 0;
+    (dir = std::getenv("TMPDIR" )) ||
+    (dir = std::getenv("TMP"    )) ||
+    (dir = std::getenv("TEMP"   )) ||
+    (dir = std::getenv("TEMPDIR")) ||
+#ifdef P_tmpdir
+    (dir = P_tmpdir) ||
+#endif
+    (dir = "/tmp");
+
+    result.clear();
+    StringRef d(dir);
+    result.append(d.begin(), d.end());
+    return success;
+  }
+}
+
+namespace llvm {
+namespace sys  {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+  result.reserve(MAXPATHLEN);
+
+  while (true) {
+    if (::getcwd(result.data(), result.capacity()) == 0) {
+      // See if there was a real error.
+      if (errno != errc::not_enough_memory)
+        return error_code(errno, system_category());
+      // Otherwise there just wasn't enough space.
+      result.reserve(result.capacity() * 2);
+    } else
+      break;
+  }
+
+  result.set_size(strlen(result.data()));
+  return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  const size_t buf_sz = 32768;
+  char buffer[buf_sz];
+  int from_file = -1, to_file = -1;
+
+  // Open from.
+  if ((from_file = ::open(f.begin(), O_RDONLY)) < 0)
+    return error_code(errno, system_category());
+  AutoFD from_fd(from_file);
+
+  // Stat from.
+  struct stat from_stat;
+  if (::stat(f.begin(), &from_stat) != 0)
+    return error_code(errno, system_category());
+
+  // Setup to flags.
+  int to_flags = O_CREAT | O_WRONLY;
+  if (copt == copy_option::fail_if_exists)
+    to_flags |= O_EXCL;
+
+  // Open to.
+  if ((to_file = ::open(t.begin(), to_flags, from_stat.st_mode)) < 0)
+    return error_code(errno, system_category());
+  AutoFD to_fd(to_file);
+
+  // Copy!
+  ssize_t sz, sz_read = 1, sz_write;
+  while (sz_read > 0 &&
+         (sz_read = ::read(from_fd, buffer, buf_sz)) > 0) {
+    // Allow for partial writes - see Advanced Unix Programming (2nd Ed.),
+    // Marc Rochkind, Addison-Wesley, 2004, page 94
+    sz_write = 0;
+    do {
+      if ((sz = ::write(to_fd, buffer + sz_write, sz_read - sz_write)) < 0) {
+        sz_read = sz;  // cause read loop termination.
+        break;         // error.
+      }
+      sz_write += sz;
+    } while (sz_write < sz_read);
+  }
+
+  // After all the file operations above the return value of close actually
+  // matters.
+  if (::close(from_fd.take()) < 0) sz_read = -1;
+  if (::close(to_fd.take()) < 0) sz_read = -1;
+
+  // Check for errors.
+  if (sz_read < 0)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+    if (errno != errc::file_exists)
+      return error_code(errno, system_category());
+    existed = true;
+  } else
+    existed = false;
+
+  return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::link(t.begin(), f.begin()) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::symlink(t.begin(), f.begin()) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::remove(p.begin()) == -1) {
+    if (errno != errc::no_such_file_or_directory)
+      return error_code(errno, system_category());
+    existed = false;
+  } else
+    existed = true;
+
+  return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::rename(f.begin(), t.begin()) == -1) {
+    // If it's a cross device link, copy then delete, otherwise return the error
+    if (errno == EXDEV) {
+      if (error_code ec = copy_file(from, to, copy_option::overwrite_if_exists))
+        return ec;
+      bool Existed;
+      if (error_code ec = remove(from, Existed))
+        return ec;
+    } else
+      return error_code(errno, system_category());
+  }
+
+  return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::truncate(p.begin(), size) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code exists(const Twine &path, bool &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) == -1) {
+    if (errno != errc::no_such_file_or_directory)
+      return error_code(errno, system_category());
+    result = false;
+  } else
+    result = true;
+
+  return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  // Get arguments.
+  SmallString<128> a_storage;
+  SmallString<128> b_storage;
+  StringRef a = A.toNullTerminatedStringRef(a_storage);
+  StringRef b = B.toNullTerminatedStringRef(b_storage);
+
+  struct stat stat_a, stat_b;
+  int error_b = ::stat(b.begin(), &stat_b);
+  int error_a = ::stat(a.begin(), &stat_a);
+
+  // If both are invalid, it's an error. If only one is, the result is false.
+  if (error_a != 0 || error_b != 0) {
+    if (error_a == error_b)
+      return error_code(errno, system_category());
+    result = false;
+  } else {
+    result =
+      stat_a.st_dev == stat_b.st_dev &&
+      stat_a.st_ino == stat_b.st_ino;
+  }
+
+  return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) == -1)
+    return error_code(errno, system_category());
+  if (!S_ISREG(status.st_mode))
+    return make_error_code(errc::operation_not_permitted);
+
+  result = status.st_size;
+  return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) != 0) {
+    error_code ec(errno, system_category());
+    if (ec == errc::no_such_file_or_directory)
+      result = file_status(file_type::file_not_found);
+    else
+      result = file_status(file_type::status_error);
+    return ec;
+  }
+
+  if (S_ISDIR(status.st_mode))
+    result = file_status(file_type::directory_file);
+  else if (S_ISREG(status.st_mode))
+    result = file_status(file_type::regular_file);
+  else if (S_ISBLK(status.st_mode))
+    result = file_status(file_type::block_file);
+  else if (S_ISCHR(status.st_mode))
+    result = file_status(file_type::character_file);
+  else if (S_ISFIFO(status.st_mode))
+    result = file_status(file_type::fifo_file);
+  else if (S_ISSOCK(status.st_mode))
+    result = file_status(file_type::socket_file);
+  else
+    result = file_status(file_type::type_unknown);
+
+  return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path) {
+  SmallString<128> Model;
+  model.toVector(Model);
+  // Null terminate.
+  Model.c_str();
+
+  // Make model absolute by prepending a temp directory if it's not already.
+  bool absolute = path::is_absolute(Twine(Model));
+  if (!absolute) {
+    SmallString<128> TDir;
+    if (error_code ec = TempDir(TDir)) return ec;
+    path::append(TDir, Twine(Model));
+    Model.swap(TDir);
+  }
+
+  // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+  // needed if the randomly chosen path already exists.
+  SmallString<128> RandomPath;
+  RandomPath.reserve(Model.size() + 1);
+  ::srand(::time(NULL));
+
+retry_random_path:
+  // This is opened here instead of above to make it easier to track when to
+  // close it. Collisions should be rare enough for the possible extra syscalls
+  // not to matter.
+  FILE *RandomSource = ::fopen("/dev/urandom", "r");
+  RandomPath.set_size(0);
+  for (SmallVectorImpl<char>::const_iterator i = Model.begin(),
+                                             e = Model.end(); i != e; ++i) {
+    if (*i == '%') {
+      char val = 0;
+      if (RandomSource)
+        val = fgetc(RandomSource);
+      else
+        val = ::rand();
+      RandomPath.push_back("0123456789abcdef"[val & 15]);
+    } else
+      RandomPath.push_back(*i);
+  }
+
+  if (RandomSource)
+    ::fclose(RandomSource);
+
+  // Try to open + create the file.
+rety_open_create:
+  int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (RandomFD == -1) {
+    // If the file existed, try again, otherwise, error.
+    if (errno == errc::file_exists)
+      goto retry_random_path;
+    // The path prefix doesn't exist.
+    if (errno == errc::no_such_file_or_directory) {
+      StringRef p(RandomPath.begin(), RandomPath.size());
+      SmallString<64> dir_to_create;
+      for (path::const_iterator i = path::begin(p),
+                                e = --path::end(p); i != e; ++i) {
+        path::append(dir_to_create, *i);
+        bool Exists;
+        if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+        if (!Exists) {
+          // Don't try to create network paths.
+          if (i->size() > 2 && (*i)[0] == '/' &&
+                               (*i)[1] == '/' &&
+                               (*i)[2] != '/')
+            return make_error_code(errc::no_such_file_or_directory);
+          if (::mkdir(dir_to_create.c_str(), 0700) == -1)
+            return error_code(errno, system_category());
+        }
+      }
+      goto rety_open_create;
+    }
+    return error_code(errno, system_category());
+  }
+
+   // Make the path absolute.
+  char real_path_buff[PATH_MAX + 1];
+  if (realpath(RandomPath.c_str(), real_path_buff) == NULL) {
+    int error = errno;
+    ::close(RandomFD);
+    ::unlink(RandomPath.c_str());
+    return error_code(error, system_category());
+  }
+
+  result_path.clear();
+  StringRef d(real_path_buff);
+  result_path.append(d.begin(), d.end());
+
+  result_fd = RandomFD;
+  return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+  SmallString<128> path_null(path);
+  DIR *directory = ::opendir(path_null.c_str());
+  if (directory == 0)
+    return error_code(errno, system_category());
+
+  it.IterationHandle = reinterpret_cast<intptr_t>(directory);
+  // Add something for replace_filename to replace.
+  path::append(path_null, ".");
+  it.CurrentEntry = directory_entry(path_null.str());
+  return directory_iterator_increment(it);
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+  if (it.IterationHandle)
+    ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
+  it.IterationHandle = 0;
+  it.CurrentEntry = directory_entry();
+  return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+  errno = 0;
+  dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
+  if (cur_dir == 0 && errno != 0) {
+    return error_code(errno, system_category());
+  } else if (cur_dir != 0) {
+    StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
+    if ((name.size() == 1 && name[0] == '.') ||
+        (name.size() == 2 && name[0] == '.' && name[1] == '.'))
+      return directory_iterator_increment(it);
+    it.CurrentEntry.replace_filename(name);
+  } else
+    return directory_iterator_destruct(it);
+
+  return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result) {
+  SmallString<128> PathStorage;
+  StringRef Path = path.toNullTerminatedStringRef(PathStorage);
+  result.set_size(0);
+
+  // Open path.
+  std::FILE *file = std::fopen(Path.data(), "rb");
+  if (file == 0)
+    return error_code(errno, system_category());
+
+  // Reserve storage.
+  result.reserve(len);
+
+  // Read magic!
+  size_t size = std::fread(result.data(), 1, len, file);
+  if (std::ferror(file) != 0) {
+    std::fclose(file);
+    return error_code(errno, system_category());
+  } else if (size != result.size()) {
+    if (std::feof(file) != 0) {
+      std::fclose(file);
+      result.set_size(size);
+      return make_error_code(errc::value_too_large);
+    }
+  }
+  std::fclose(file);
+  result.set_size(len);
+  return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
new file mode 100644
index 000000000000..5cdb11ccebc4
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -0,0 +1,295 @@
+//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
+//  Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
+#include <malloc.h>
+#endif
+#ifdef HAVE_MALLOC_MALLOC_H
+#include <malloc/malloc.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+#  include <sys/ioctl.h>
+#endif
+#ifdef HAVE_TERMIOS_H
+#  include <termios.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+using namespace sys;
+
+unsigned
+Process::GetPageSize()
+{
+#if defined(__CYGWIN__)
+  // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
+  // memory protection and mmap() is 4k.
+  // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
+  const int page_size = 0x1000;
+#elif defined(HAVE_GETPAGESIZE)
+  const int page_size = ::getpagesize();
+#elif defined(HAVE_SYSCONF)
+  long page_size = ::sysconf(_SC_PAGE_SIZE);
+#else
+#warning Cannot get the page size on this machine
+#endif
+  return static_cast<unsigned>(page_size);
+}
+
+size_t Process::GetMallocUsage() {
+#if defined(HAVE_MALLINFO)
+  struct mallinfo mi;
+  mi = ::mallinfo();
+  return mi.uordblks;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+  malloc_statistics_t Stats;
+  malloc_zone_statistics(malloc_default_zone(), &Stats);
+  return Stats.size_in_use;   // darwin
+#elif defined(HAVE_SBRK)
+  // Note this is only an approximation and more closely resembles
+  // the value returned by mallinfo in the arena field.
+  static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0));
+  char *EndOfMemory = (char*)sbrk(0);
+  if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
+    return EndOfMemory - StartOfMemory;
+  else
+    return 0;
+#else
+#warning Cannot get malloc info on this platform
+  return 0;
+#endif
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+#if defined(HAVE_MALLINFO)
+  struct mallinfo mi = ::mallinfo();
+  return mi.uordblks + mi.hblkhd;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+  malloc_statistics_t Stats;
+  malloc_zone_statistics(malloc_default_zone(), &Stats);
+  return Stats.size_allocated;   // darwin
+#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__)
+  struct rusage usage;
+  ::getrusage(RUSAGE_SELF, &usage);
+  return usage.ru_maxrss;
+#else
+#warning Cannot get total memory size on this platform
+  return 0;
+#endif
+}
+
+void
+Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
+                      TimeValue& sys_time)
+{
+  elapsed = TimeValue::now();
+#if defined(HAVE_GETRUSAGE)
+  struct rusage usage;
+  ::getrusage(RUSAGE_SELF, &usage);
+  user_time = TimeValue(
+    static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
+    static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
+      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+  sys_time = TimeValue(
+    static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
+    static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
+      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+#else
+#warning Cannot get usage times on this platform
+  user_time.seconds(0);
+  user_time.microseconds(0);
+  sys_time.seconds(0);
+  sys_time.microseconds(0);
+#endif
+}
+
+int Process::GetCurrentUserId() {
+  return getuid();
+}
+
+int Process::GetCurrentGroupId() {
+  return getgid();
+}
+
+#ifdef HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this function
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+#if HAVE_SETRLIMIT
+  struct rlimit rlim;
+  rlim.rlim_cur = rlim.rlim_max = 0;
+  setrlimit(RLIMIT_CORE, &rlim);
+#endif
+
+#ifdef HAVE_MACH_MACH_H
+  // Disable crash reporting on Mac OS X 10.0-10.4
+
+  // get information about the original set of exception ports for the task
+  mach_msg_type_number_t Count = 0;
+  exception_mask_t OriginalMasks[EXC_TYPES_COUNT];
+  exception_port_t OriginalPorts[EXC_TYPES_COUNT];
+  exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
+  thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
+  kern_return_t err =
+    task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
+                             &Count, OriginalPorts, OriginalBehaviors,
+                             OriginalFlavors);
+  if (err == KERN_SUCCESS) {
+    // replace each with MACH_PORT_NULL.
+    for (unsigned i = 0; i != Count; ++i)
+      task_set_exception_ports(mach_task_self(), OriginalMasks[i],
+                               MACH_PORT_NULL, OriginalBehaviors[i],
+                               OriginalFlavors[i]);
+  }
+
+  // Disable crash reporting on Mac OS X 10.5
+  signal(SIGABRT, _exit);
+  signal(SIGILL,  _exit);
+  signal(SIGFPE,  _exit);
+  signal(SIGSEGV, _exit);
+  signal(SIGBUS,  _exit);
+#endif
+}
+
+bool Process::StandardInIsUserInput() {
+  return FileDescriptorIsDisplayed(STDIN_FILENO);
+}
+
+bool Process::StandardOutIsDisplayed() {
+  return FileDescriptorIsDisplayed(STDOUT_FILENO);
+}
+
+bool Process::StandardErrIsDisplayed() {
+  return FileDescriptorIsDisplayed(STDERR_FILENO);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+#if HAVE_ISATTY
+  return isatty(fd);
+#else
+  // If we don't have isatty, just return false.
+  return false;
+#endif
+}
+
+static unsigned getColumns(int FileID) {
+  // If COLUMNS is defined in the environment, wrap to that many columns.
+  if (const char *ColumnsStr = std::getenv("COLUMNS")) {
+    int Columns = std::atoi(ColumnsStr);
+    if (Columns > 0)
+      return Columns;
+  }
+
+  unsigned Columns = 0;
+
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+  // Try to determine the width of the terminal.
+  struct winsize ws;
+  if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
+    Columns = ws.ws_col;
+#endif
+
+  return Columns;
+}
+
+unsigned Process::StandardOutColumns() {
+  if (!StandardOutIsDisplayed())
+    return 0;
+
+  return getColumns(1);
+}
+
+unsigned Process::StandardErrColumns() {
+  if (!StandardErrIsDisplayed())
+    return 0;
+
+  return getColumns(2);
+}
+
+static bool terminalHasColors() {
+  if (const char *term = std::getenv("TERM")) {
+    // Most modern terminals support ANSI escape sequences for colors.
+    // We could check terminfo, or have a list of known terms that support
+    // colors, but that would be overkill.
+    // The user can always ask for no colors by setting TERM to dumb, or
+    // using a commandline flag.
+    return strcmp(term, "dumb") != 0;
+  }
+  return false;
+}
+
+bool Process::StandardOutHasColors() {
+  if (!StandardOutIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::StandardErrHasColors() {
+  if (!StandardErrIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::ColorNeedsFlush() {
+  // No, we use ANSI escape sequences.
+  return false;
+}
+
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+    COLOR(FGBG, "0", BOLD),\
+    COLOR(FGBG, "1", BOLD),\
+    COLOR(FGBG, "2", BOLD),\
+    COLOR(FGBG, "3", BOLD),\
+    COLOR(FGBG, "4", BOLD),\
+    COLOR(FGBG, "5", BOLD),\
+    COLOR(FGBG, "6", BOLD),\
+    COLOR(FGBG, "7", BOLD)\
+  }
+
+static const char colorcodes[2][2][8][10] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  return colorcodes[bg?1:0][bold?1:0][code&7];
+}
+
+const char *Process::OutputBold(bool bg) {
+  return "\033[1m";
+}
+
+const char *Process::ResetColor() {
+  return "\033[0m";
+}
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
new file mode 100644
index 000000000000..346baf1744dc
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -0,0 +1,430 @@
+//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Config/config.h>
+#include "llvm/Support/FileSystem.h"
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_POSIX_SPAWN
+#include <spawn.h>
+#if !defined(__APPLE__)
+  extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+#endif
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {}
+
+unsigned Program::GetPid() const {
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  return static_cast<unsigned>(pid);
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+  // Check some degenerate cases
+  if (progName.length() == 0) // no program
+    return Path();
+  Path temp;
+  if (!temp.set(progName)) // invalid name
+    return Path();
+  // Use the given path verbatim if it contains any slashes; this matches
+  // the behavior of sh(1) and friends.
+  if (progName.find('/') != std::string::npos)
+    return temp;
+
+  // At this point, the file name is valid and does not contain slashes. Search
+  // for it through the directories specified in the PATH environment variable.
+
+  // Get the path. If its empty, we can't do anything to find it.
+  const char *PathStr = getenv("PATH");
+  if (PathStr == 0)
+    return Path();
+
+  // Now we have a colon separated list of directories to search; try them.
+  size_t PathLen = strlen(PathStr);
+  while (PathLen) {
+    // Find the first colon...
+    const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
+
+    // Check to see if this first directory contains the executable...
+    Path FilePath;
+    if (FilePath.set(std::string(PathStr,Colon))) {
+      FilePath.appendComponent(progName);
+      if (FilePath.canExecute())
+        return FilePath;                    // Found the executable!
+    }
+
+    // Nope it wasn't in this directory, check the next path in the list!
+    PathLen -= Colon-PathStr;
+    PathStr = Colon;
+
+    // Advance past duplicate colons
+    while (*PathStr == ':') {
+      PathStr++;
+      PathLen--;
+    }
+  }
+  return Path();
+}
+
+static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  // Open the file
+  int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+  if (InFD == -1) {
+    MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
+              + (FD == 0 ? "input" : "output"));
+    return true;
+  }
+
+  // Install it as the requested FD
+  if (dup2(InFD, FD) == -1) {
+    MakeErrMsg(ErrMsg, "Cannot dup2");
+    close(InFD);
+    return true;
+  }
+  close(InFD);      // Close the original FD
+  return false;
+}
+
+#ifdef HAVE_POSIX_SPAWN
+static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
+                          posix_spawn_file_actions_t *FileActions) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  if (int Err = posix_spawn_file_actions_addopen(FileActions, FD,
+                            File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
+    return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+  return false;
+}
+#endif
+
+static void TimeOutHandler(int Sig) {
+}
+
+static void SetMemoryLimits (unsigned size)
+{
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
+  struct rlimit r;
+  __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
+
+  // Heap size
+  getrlimit (RLIMIT_DATA, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_DATA, &r);
+#ifdef RLIMIT_RSS
+  // Resident set size.
+  getrlimit (RLIMIT_RSS, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_RSS, &r);
+#endif
+#ifdef RLIMIT_AS  // e.g. NetBSD doesn't have it.
+  // Virtual memory.
+  getrlimit (RLIMIT_AS, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_AS, &r);
+#endif
+#endif
+}
+
+bool
+Program::Execute(const Path &path, const char **args, const char **envp,
+                 const Path **redirects, unsigned memoryLimit,
+                  std::string *ErrMsg) {
+  // If this OS has posix_spawn and there is no memory limit being implied, use
+  // posix_spawn.  It is more efficient than fork/exec.
+#ifdef HAVE_POSIX_SPAWN
+  if (memoryLimit == 0) {
+    posix_spawn_file_actions_t FileActionsStore;
+    posix_spawn_file_actions_t *FileActions = 0;
+
+    if (redirects) {
+      FileActions = &FileActionsStore;
+      posix_spawn_file_actions_init(FileActions);
+
+      // Redirect stdin/stdout.
+      if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
+          RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
+        return false;
+      if (redirects[1] == 0 || redirects[2] == 0 ||
+          *redirects[1] != *redirects[2]) {
+        // Just redirect stderr
+        if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
+      } else {
+        // If stdout and stderr should go to the same place, redirect stderr
+        // to the FD already open for stdout.
+        if (int Err = posix_spawn_file_actions_adddup2(FileActions, 1, 2))
+          return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
+      }
+    }
+
+    if (!envp)
+#if !defined(__APPLE__)
+      envp = const_cast<const char **>(environ);
+#else
+      // environ is missing in dylibs.
+      envp = const_cast<const char **>(*_NSGetEnviron());
+#endif
+
+    // Explicitly initialized to prevent what appears to be a valgrind false
+    // positive.
+    pid_t PID = 0;
+    int Err = posix_spawn(&PID, path.c_str(), FileActions, /*attrp*/0,
+                          const_cast<char **>(args), const_cast<char **>(envp));
+
+    if (FileActions)
+      posix_spawn_file_actions_destroy(FileActions);
+
+    if (Err)
+     return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
+
+    Data_ = reinterpret_cast<void*>(PID);
+    return true;
+  }
+#endif
+
+  // Create a child process.
+  int child = fork();
+  switch (child) {
+    // An error occurred:  Return to the caller.
+    case -1:
+      MakeErrMsg(ErrMsg, "Couldn't fork");
+      return false;
+
+    // Child process: Execute the program.
+    case 0: {
+      // Redirect file descriptors...
+      if (redirects) {
+        // Redirect stdin
+        if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; }
+        // Redirect stdout
+        if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; }
+        if (redirects[1] && redirects[2] &&
+            *(redirects[1]) == *(redirects[2])) {
+          // If stdout and stderr should go to the same place, redirect stderr
+          // to the FD already open for stdout.
+          if (-1 == dup2(1,2)) {
+            MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
+            return false;
+          }
+        } else {
+          // Just redirect stderr
+          if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; }
+        }
+      }
+
+      // Set memory limits
+      if (memoryLimit!=0) {
+        SetMemoryLimits(memoryLimit);
+      }
+
+      // Execute!
+      if (envp != 0)
+        execve(path.c_str(),
+               const_cast<char **>(args),
+               const_cast<char **>(envp));
+      else
+        execv(path.c_str(),
+              const_cast<char **>(args));
+      // If the execve() failed, we should exit. Follow Unix protocol and
+      // return 127 if the executable was not found, and 126 otherwise.
+      // Use _exit rather than exit so that atexit functions and static
+      // object destructors cloned from the parent process aren't
+      // redundantly run, and so that any data buffered in stdio buffers
+      // cloned from the parent aren't redundantly written out.
+      _exit(errno == ENOENT ? 127 : 126);
+    }
+
+    // Parent process: Break out of the switch to do our processing.
+    default:
+      break;
+  }
+
+  Data_ = reinterpret_cast<void*>(child);
+
+  return true;
+}
+
+int
+Program::Wait(const sys::Path &path,
+              unsigned secondsToWait,
+              std::string* ErrMsg)
+{
+#ifdef HAVE_SYS_WAIT_H
+  struct sigaction Act, Old;
+
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
+  // Install a timeout handler.  The handler itself does nothing, but the simple
+  // fact of having a handler at all causes the wait below to return with EINTR,
+  // unlike if we used SIG_IGN.
+  if (secondsToWait) {
+    memset(&Act, 0, sizeof(Act));
+    Act.sa_handler = TimeOutHandler;
+    sigemptyset(&Act.sa_mask);
+    sigaction(SIGALRM, &Act, &Old);
+    alarm(secondsToWait);
+  }
+
+  // Parent process: Wait for the child process to terminate.
+  int status;
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  pid_t child = static_cast<pid_t>(pid);
+  while (waitpid(pid, &status, 0) != child)
+    if (secondsToWait && errno == EINTR) {
+      // Kill the child.
+      kill(child, SIGKILL);
+
+      // Turn off the alarm and restore the signal handler
+      alarm(0);
+      sigaction(SIGALRM, &Old, 0);
+
+      // Wait for child to die
+      if (wait(&status) != child)
+        MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
+      else
+        MakeErrMsg(ErrMsg, "Child timed out", 0);
+
+      return -2;   // Timeout detected
+    } else if (errno != EINTR) {
+      MakeErrMsg(ErrMsg, "Error waiting for child process");
+      return -1;
+    }
+
+  // We exited normally without timeout, so turn off the timer.
+  if (secondsToWait) {
+    alarm(0);
+    sigaction(SIGALRM, &Old, 0);
+  }
+
+  // Return the proper exit status. Detect error conditions
+  // so we can return -1 for them and set ErrMsg informatively.
+  int result = 0;
+  if (WIFEXITED(status)) {
+    result = WEXITSTATUS(status);
+#ifdef HAVE_POSIX_SPAWN
+    // The posix_spawn child process returns 127 on any kind of error.
+    // Following the POSIX convention for command-line tools (which posix_spawn
+    // itself apparently does not), check to see if the failure was due to some
+    // reason other than the file not existing, and return 126 in this case.
+    bool Exists;
+    if (result == 127 && !llvm::sys::fs::exists(path.str(), Exists) && Exists)
+      result = 126;
+#endif
+    if (result == 127) {
+      if (ErrMsg)
+        *ErrMsg = llvm::sys::StrError(ENOENT);
+      return -1;
+    }
+    if (result == 126) {
+      if (ErrMsg)
+        *ErrMsg = "Program could not be executed";
+      return -1;
+    }
+  } else if (WIFSIGNALED(status)) {
+    if (ErrMsg) {
+      *ErrMsg = strsignal(WTERMSIG(status));
+#ifdef WCOREDUMP
+      if (WCOREDUMP(status))
+        *ErrMsg += " (core dumped)";
+#endif
+    }
+    // Return a special value to indicate that the process received an unhandled
+    // signal during execution as opposed to failing to execute.
+    return -2;
+  }
+  return result;
+#else
+  if (ErrMsg)
+    *ErrMsg = "Program::Wait is not implemented on this platform yet!";
+  return -1;
+#endif
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  uint64_t pid64 = reinterpret_cast<uint64_t>(Data_);
+  pid_t pid = static_cast<pid_t>(pid64);
+
+  if (kill(pid, SIGKILL) != 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+bool Program::ChangeStdoutToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+bool Program::ChangeStderrToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/README.txt b/contrib/llvm/lib/Support/Unix/README.txt
new file mode 100644
index 000000000000..3d547c2990d5
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/README.txt
@@ -0,0 +1,16 @@
+llvm/lib/Support/Unix README
+===========================
+
+This directory provides implementations of the lib/System classes that
+are common to two or more variants of UNIX. For example, the directory
+structure underneath this directory could look like this:
+
+Unix           - only code that is truly generic to all UNIX platforms
+  Posix        - code that is specific to Posix variants of UNIX
+  SUS          - code that is specific to the Single Unix Specification
+  SysV         - code that is specific to System V variants of UNIX
+
+As a rule, only those directories actually needing to be created should be
+created. Also, further subdirectories could be created to reflect versions of
+the various standards. For example, under SUS there could be v1, v2, and v3
+subdirectories to reflect the three major versions of SUS.
diff --git a/contrib/llvm/lib/Support/Unix/RWMutex.inc b/contrib/llvm/lib/Support/Unix/RWMutex.inc
new file mode 100644
index 000000000000..40e87ff13111
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock  =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  return true;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
new file mode 100644
index 000000000000..e286869e775d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -0,0 +1,303 @@
+//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occurring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+#include <algorithm>
+#if HAVE_EXECINFO_H
+# include <execinfo.h>         // For backtrace().
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_DLFCN_H && __GNUG__
+#include <dlfcn.h>
+#include <cxxabi.h>
+#endif
+using namespace llvm;
+
+static RETSIGTYPE SignalHandler(int Sig);  // defined below.
+
+static SmartMutex<true> SignalsMutex;
+
+/// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<sys::Path> FilesToRemove;
+static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
+
+// IntSigs - Signals that may interrupt the program at any time.
+static const int IntSigs[] = {
+  SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+};
+static const int *const IntSigsEnd =
+  IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
+
+// KillSigs - Signals that are synchronous with the program that will cause it
+// to die.
+static const int KillSigs[] = {
+  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+#ifdef SIGSYS
+  , SIGSYS
+#endif
+#ifdef SIGXCPU
+  , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+  , SIGXFSZ
+#endif
+#ifdef SIGEMT
+  , SIGEMT
+#endif
+};
+static const int *const KillSigsEnd =
+  KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]);
+
+static unsigned NumRegisteredSignals = 0;
+static struct {
+  struct sigaction SA;
+  int SigNo;
+} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+
+
+static void RegisterHandler(int Signal) {
+  assert(NumRegisteredSignals <
+         sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+         "Out of space for signal handlers!");
+
+  struct sigaction NewHandler;
+
+  NewHandler.sa_handler = SignalHandler;
+  NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
+  sigemptyset(&NewHandler.sa_mask);
+
+  // Install the new handler, save the old one in RegisteredSignalInfo.
+  sigaction(Signal, &NewHandler,
+            &RegisteredSignalInfo[NumRegisteredSignals].SA);
+  RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
+  ++NumRegisteredSignals;
+}
+
+static void RegisterHandlers() {
+  // If the handlers are already registered, we're done.
+  if (NumRegisteredSignals != 0) return;
+
+  std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
+  std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
+}
+
+static void UnregisterHandlers() {
+  // Restore all of the signal handlers to how they were before we showed up.
+  for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
+    sigaction(RegisteredSignalInfo[i].SigNo,
+              &RegisteredSignalInfo[i].SA, 0);
+  NumRegisteredSignals = 0;
+}
+
+
+/// RemoveFilesToRemove - Process the FilesToRemove list. This function
+/// should be called with the SignalsMutex lock held.
+static void RemoveFilesToRemove() {
+  while (!FilesToRemove.empty()) {
+    FilesToRemove.back().eraseFromDisk(true);
+    FilesToRemove.pop_back();
+  }
+}
+
+// SignalHandler - The signal handler that runs.
+static RETSIGTYPE SignalHandler(int Sig) {
+  // Restore the signal behavior to default, so that the program actually
+  // crashes when we return and the signal reissues.  This also ensures that if
+  // we crash in our signal handler that the program will terminate immediately
+  // instead of recursing in the signal handler.
+  UnregisterHandlers();
+
+  // Unmask all potentially blocked kill signals.
+  sigset_t SigMask;
+  sigfillset(&SigMask);
+  sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+  SignalsMutex.acquire();
+  RemoveFilesToRemove();
+
+  if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
+    if (InterruptFunction) {
+      void (*IF)() = InterruptFunction;
+      SignalsMutex.release();
+      InterruptFunction = 0;
+      IF();        // run the interrupt function.
+      return;
+    }
+
+    SignalsMutex.release();
+    raise(Sig);   // Execute the default handler.
+    return;
+  }
+
+  SignalsMutex.release();
+
+  // Otherwise if it is a fault (like SEGV) run any handler.
+  for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
+    CallBacksToRun[i].first(CallBacksToRun[i].second);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+  SignalsMutex.acquire();
+  RemoveFilesToRemove();
+  SignalsMutex.release();
+}
+
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
+  SignalsMutex.acquire();
+  InterruptFunction = IF;
+  SignalsMutex.release();
+  RegisterHandlers();
+}
+
+// RemoveFileOnSignal - The public API
+bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
+                                   std::string* ErrMsg) {
+  SignalsMutex.acquire();
+  FilesToRemove.push_back(Filename);
+
+  SignalsMutex.release();
+
+  RegisterHandlers();
+  return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+  SignalsMutex.acquire();
+  std::vector<sys::Path>::reverse_iterator I =
+    std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
+  if (I != FilesToRemove.rend())
+    FilesToRemove.erase(I.base()-1);
+  SignalsMutex.release();
+}
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process.  The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+  CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
+  RegisterHandlers();
+}
+
+
+// PrintStackTrace - In the case of a program crash or fault, print out a stack
+// trace so that the user has an indication of why and where we died.
+//
+// On glibc systems we have the 'backtrace' function, which works nicely, but
+// doesn't demangle symbols.
+static void PrintStackTrace(void *) {
+#ifdef HAVE_BACKTRACE
+  static void* StackTrace[256];
+  // Use backtrace() to output a backtrace on Linux systems with glibc.
+  int depth = backtrace(StackTrace,
+                        static_cast<int>(array_lengthof(StackTrace)));
+#if HAVE_DLFCN_H && __GNUG__
+  int width = 0;
+  for (int i = 0; i < depth; ++i) {
+    Dl_info dlinfo;
+    dladdr(StackTrace[i], &dlinfo);
+    const char* name = strrchr(dlinfo.dli_fname, '/');
+
+    int nwidth;
+    if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
+    else              nwidth = strlen(name) - 1;
+
+    if (nwidth > width) width = nwidth;
+  }
+
+  for (int i = 0; i < depth; ++i) {
+    Dl_info dlinfo;
+    dladdr(StackTrace[i], &dlinfo);
+
+    fprintf(stderr, "%-2d", i);
+
+    const char* name = strrchr(dlinfo.dli_fname, '/');
+    if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
+    else              fprintf(stderr, " %-*s", width, name+1);
+
+    fprintf(stderr, " %#0*lx",
+            (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+
+    if (dlinfo.dli_sname != NULL) {
+      int res;
+      fputc(' ', stderr);
+      char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+      if (d == NULL) fputs(dlinfo.dli_sname, stderr);
+      else           fputs(d, stderr);
+      free(d);
+
+      fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr);
+    }
+    fputc('\n', stderr);
+  }
+#else
+  backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
+#endif
+#endif
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void llvm::sys::PrintStackTraceOnErrorSignal() {
+  AddSignalHandler(PrintStackTrace, 0);
+}
+
+
+/***/
+
+// On Darwin, raise sends a signal to the main thread instead of the current
+// thread. This has the unfortunate effect that assert() and abort() will end up
+// bypassing our crash recovery attempts. We work around this for anything in
+// the same linkage unit by just defining our own versions of the assert handler
+// and abort.
+
+#ifdef __APPLE__
+
+#include <signal.h>
+#include <pthread.h>
+
+int raise(int sig) {
+  return pthread_kill(pthread_self(), sig);
+}
+
+void __assert_rtn(const char *func,
+                  const char *file,
+                  int line,
+                  const char *expr) {
+  if (func)
+    fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
+            expr, func, file, line);
+  else
+    fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
+            expr, file, line);
+  abort();
+}
+
+void abort() {
+  raise(SIGABRT);
+  usleep(1000);
+  __builtin_trap();
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/Unix/ThreadLocal.inc b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
new file mode 100644
index 000000000000..2b4c9017cd91
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
@@ -0,0 +1,26 @@
+//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { setInstance(0); }
+}
diff --git a/contrib/llvm/lib/Support/Unix/TimeValue.inc b/contrib/llvm/lib/Support/Unix/TimeValue.inc
new file mode 100644
index 000000000000..5cf5a9d44ed6
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/TimeValue.inc
@@ -0,0 +1,56 @@
+//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+
+namespace llvm {
+  using namespace sys;
+
+std::string TimeValue::str() const {
+  char buffer[32];
+
+  time_t ourTime = time_t(this->toEpochTime());
+#ifdef __hpux
+// note that the following line needs -D_REENTRANT on HP-UX to be picked up
+  asctime_r(localtime(&ourTime), buffer);
+#else
+  ::asctime_r(::localtime(&ourTime), buffer);
+#endif
+
+  std::string result(buffer);
+  return result.substr(0,24);
+}
+
+TimeValue TimeValue::now() {
+  struct timeval the_time;
+  timerclear(&the_time);
+  if (0 != ::gettimeofday(&the_time,0)) {
+    // This is *really* unlikely to occur because the only gettimeofday
+    // errors concern the timezone parameter which we're passing in as 0.
+    // In the unlikely case it does happen, just return MinTime, no error
+    // message needed.
+    return MinTime;
+  }
+
+  return TimeValue(
+    static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ),
+    static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
+      NANOSECONDS_PER_MICROSECOND ) );
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Unix/Unix.h b/contrib/llvm/lib/Support/Unix/Unix.h
new file mode 100644
index 000000000000..b7be3111d431
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/Unix.h
@@ -0,0 +1,87 @@
+//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Unix implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_UNIX_UNIX_H
+#define LLVM_SYSTEM_UNIX_UNIX_H
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on all UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Support/Errno.h"
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <string>
+#include <algorithm>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_ASSERT_H
+#include <assert.h>
+#endif
+
+#ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# ifdef HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+/// This function builds an error message into \p ErrMsg using the \p prefix
+/// string and the Unix error number given by \p errnum. If errnum is -1, the
+/// default then the value of errno is used.
+/// @brief Make an error message
+///
+/// If the error number can be converted to a string, it will be
+/// separated from prefix by ": ".
+static inline bool MakeErrMsg(
+  std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
+  if (!ErrMsg)
+    return true;
+  if (errnum == -1)
+    errnum = errno;
+  *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum);
+  return true;
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/Unix/system_error.inc b/contrib/llvm/lib/Support/Unix/system_error.inc
new file mode 100644
index 000000000000..681e919edb4e
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/system_error.inc
@@ -0,0 +1,34 @@
+//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Unix specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+  return _do_message::message(ev);
+}
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+#ifdef ELAST
+  if (ev > ELAST)
+    return error_condition(ev, system_category());
+#endif  // ELAST
+  return error_condition(ev, generic_category());
+}
diff --git a/contrib/llvm/lib/Support/Valgrind.cpp b/contrib/llvm/lib/Support/Valgrind.cpp
new file mode 100644
index 000000000000..703448524ed9
--- /dev/null
+++ b/contrib/llvm/lib/Support/Valgrind.cpp
@@ -0,0 +1,54 @@
+//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is
+//  defined.  If we have valgrind.h but valgrind isn't running, its macros are
+//  no-ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+
+static bool InitNotUnderValgrind() {
+  return !RUNNING_ON_VALGRIND;
+}
+
+// This bool is negated from what we'd expect because code may run before it
+// gets initialized.  If that happens, it will appear to be 0 (false), and we
+// want that to cause the rest of the code in this file to run the
+// Valgrind-provided macros.
+static const bool NotUnderValgrind = InitNotUnderValgrind();
+
+bool llvm::sys::RunningOnValgrind() {
+  if (NotUnderValgrind)
+    return false;
+  return RUNNING_ON_VALGRIND;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+  if (NotUnderValgrind)
+    return;
+
+  VALGRIND_DISCARD_TRANSLATIONS(Addr, Len);
+}
+
+#else  // !HAVE_VALGRIND_VALGRIND_H
+
+bool llvm::sys::RunningOnValgrind() {
+  return false;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+}
+
+#endif  // !HAVE_VALGRIND_VALGRIND_H
diff --git a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
new file mode 100644
index 000000000000..fc5f5809cb40
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
@@ -0,0 +1,137 @@
+//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of DynamicLibrary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+
+#ifdef _MSC_VER
+ #include <ntverp.h>
+#endif
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBIMAGEHLP != 1)
+  #error "libimagehlp.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<HMODULE> OpenedHandles;
+
+extern "C" {
+
+  static BOOL CALLBACK ELM_Callback(WIN32_ELMCB_PCSTR ModuleName,
+                                    ULONG_PTR ModuleBase,
+                                    ULONG ModuleSize,
+                                    PVOID UserContext)
+  {
+    // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
+    // into the process.
+    if (stricmp(ModuleName, "msvci70") != 0 &&
+        stricmp(ModuleName, "msvcirt") != 0 &&
+        stricmp(ModuleName, "msvcp50") != 0 &&
+        stricmp(ModuleName, "msvcp60") != 0 &&
+        stricmp(ModuleName, "msvcp70") != 0 &&
+        stricmp(ModuleName, "msvcr70") != 0 &&
+#ifndef __MINGW32__
+        // Mingw32 uses msvcrt.dll by default. Don't ignore it.
+        // Otherwise, user should be aware, what he's doing :)
+        stricmp(ModuleName, "msvcrt") != 0 &&
+#endif
+        stricmp(ModuleName, "msvcrt20") != 0 &&
+        stricmp(ModuleName, "msvcrt40") != 0) {
+      OpenedHandles.push_back((HMODULE)ModuleBase);
+    }
+    return TRUE;
+  }
+}
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
+                                            std::string *ErrMsg) {
+  if (filename) {
+    HMODULE a_handle = LoadLibrary(filename);
+
+    if (a_handle == 0)
+      return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : ");
+
+    OpenedHandles.push_back(a_handle);
+  } else {
+    // When no file is specified, enumerate all DLLs and EXEs in the
+    // process.
+    EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+  }
+
+  // Because we don't remember the handle, we will never free it; hence,
+  // it is loaded permanently.
+  return false;
+}
+
+// Stack probing routines are in the support library (e.g. libgcc), but we don't
+// have dynamic linking on windows. Provide a hook.
+#define EXPLICIT_SYMBOL(SYM)                    \
+  extern "C" { extern void *SYM; }
+#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO)
+
+#include "explicit_symbols.inc"
+
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+  // First check symbols added via AddSymbol().
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I =
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+    if (I != E)
+      return I->second;
+  }
+
+  // Now search the libraries.
+  for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
+       E = OpenedHandles.end(); I != E; ++I) {
+    FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
+    if (ptr) {
+      return (void *)(intptr_t)ptr;
+    }
+  }
+
+  #define EXPLICIT_SYMBOL(SYM)                    \
+    if (!strcmp(symbolName, #SYM)) return (void*)&SYM;
+  #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO)        \
+    if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO;
+
+  {
+    #include "explicit_symbols.inc"
+  }
+
+  #undef EXPLICIT_SYMBOL
+  #undef EXPLICIT_SYMBOL2
+
+  return 0;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Host.inc b/contrib/llvm/lib/Support/Windows/Host.inc
new file mode 100644
index 000000000000..733830e82f08
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Host.inc
@@ -0,0 +1,23 @@
+//===- llvm/Support/Win32/Host.inc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 Host support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <string>
+
+using namespace llvm;
+
+std::string sys::getHostTriple() {
+  // FIXME: Adapt to running version.
+  return LLVM_HOSTTRIPLE;
+}
diff --git a/contrib/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc
new file mode 100644
index 000000000000..9f69e7367e6f
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Memory.inc
@@ -0,0 +1,73 @@
+//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of various Memory
+// management utilities
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+MemoryBlock Memory::AllocateRWX(size_t NumBytes,
+                                const MemoryBlock *NearBlock,
+                                std::string *ErrMsg) {
+  if (NumBytes == 0) return MemoryBlock();
+
+  static const size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+  //FIXME: support NearBlock if ever needed on Win64.
+
+  void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT,
+                  PAGE_EXECUTE_READWRITE);
+  if (pa == NULL) {
+    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
+    return MemoryBlock();
+  }
+
+  MemoryBlock result;
+  result.Address = pa;
+  result.Size = NumPages*pageSize;
+  return result;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+  if (M.Address == 0 || M.Size == 0) return false;
+  if (!VirtualFree(M.Address, 0, MEM_RELEASE))
+    return MakeErrMsg(ErrMsg, "Can't release RWX Memory: ");
+  return false;
+}
+
+bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
+  return true;
+}
+
+bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
+  return false;
+}
+
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
+  return true;
+}
+
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
+  return false;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Mutex.inc b/contrib/llvm/lib/Support/Windows/Mutex.inc
new file mode 100644
index 000000000000..583dc6359a16
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Mutex.inc
@@ -0,0 +1,58 @@
+//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/Mutex.h"
+
+namespace llvm {
+using namespace sys;
+
+MutexImpl::MutexImpl(bool /*recursive*/)
+{
+  data_ = new CRITICAL_SECTION;
+  InitializeCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+MutexImpl::~MutexImpl()
+{
+  DeleteCriticalSection((LPCRITICAL_SECTION)data_);
+  delete (LPCRITICAL_SECTION)data_;
+  data_ = 0;
+}
+
+bool
+MutexImpl::acquire()
+{
+  EnterCriticalSection((LPCRITICAL_SECTION)data_);
+  return true;
+}
+
+bool
+MutexImpl::release()
+{
+  LeaveCriticalSection((LPCRITICAL_SECTION)data_);
+  return true;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+  return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
new file mode 100644
index 000000000000..42a92f9c6dfe
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -0,0 +1,931 @@
+//===- llvm/Support/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <malloc.h>
+#include <cstdio>
+
+// We need to undo a macro defined in Windows.h, otherwise we won't compile:
+#undef CopyFile
+#undef GetCurrentDirectory
+
+// Windows happily accepts either forward or backward slashes, though any path
+// returned by a Win32 API will have backward slashes.  As LLVM code basically
+// assumes forward slashes are used, backward slashs are converted where they
+// can be introduced into a path.
+//
+// Another invariant is that a path ends with a slash if and only if the path
+// is a root directory.  Any other use of a trailing slash is stripped.  Unlike
+// in Unix, Windows has a rather complicated notion of a root path and this
+// invariant helps simply the code.
+
+static void FlipBackSlashes(std::string& s) {
+  for (size_t i = 0; i < s.size(); i++)
+    if (s[i] == '\\')
+      s[i] = '/';
+}
+
+namespace llvm {
+namespace sys {
+
+const char PathSeparator = ';';
+
+StringRef Path::GetEXESuffix() {
+  return "exe";
+}
+
+Path::Path(llvm::StringRef p)
+  : path(p) {
+  FlipBackSlashes(path);
+}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+  : path(StrStart, StrLen) {
+  FlipBackSlashes(path);
+}
+
+Path&
+Path::operator=(StringRef that) {
+  path.assign(that.data(), that.size());
+  FlipBackSlashes(path);
+  return *this;
+}
+
+// push_back 0 on create, and pop_back on delete.
+struct ScopedNullTerminator {
+  std::string &str;
+  ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); }
+  ~ScopedNullTerminator() {
+    // str.pop_back(); But wait, C++03 doesn't have this...
+    assert(!str.empty() && str[str.size() - 1] == 0
+      && "Null char not present!");
+    str.resize(str.size() - 1);
+  }
+};
+
+bool
+Path::isValid() const {
+  if (path.empty())
+    return false;
+
+  // If there is a colon, it must be the second character, preceded by a letter
+  // and followed by something.
+  size_t len = path.size();
+  // This code assumes that path is null terminated, so make sure it is.
+  ScopedNullTerminator snt(path);
+  size_t pos = path.rfind(':',len);
+  size_t rootslash = 0;
+  if (pos != std::string::npos) {
+    if (pos != 1 || !isalpha(path[0]) || len < 3)
+      return false;
+      rootslash = 2;
+  }
+
+  // Look for a UNC path, and if found adjust our notion of the root slash.
+  if (len > 3 && path[0] == '/' && path[1] == '/') {
+    rootslash = path.find('/', 2);
+    if (rootslash == std::string::npos)
+      rootslash = 0;
+  }
+
+  // Check for illegal characters.
+  if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012"
+                         "\013\014\015\016\017\020\021\022\023\024\025\026"
+                         "\027\030\031\032\033\034\035\036\037")
+      != std::string::npos)
+    return false;
+
+  // Remove trailing slash, unless it's a root slash.
+  if (len > rootslash+1 && path[len-1] == '/')
+    path.erase(--len);
+
+  // Check each component for legality.
+  for (pos = 0; pos < len; ++pos) {
+    // A component may not end in a space.
+    if (path[pos] == ' ') {
+      if (path[pos+1] == '/' || path[pos+1] == '\0')
+        return false;
+    }
+
+    // A component may not end in a period.
+    if (path[pos] == '.') {
+      if (path[pos+1] == '/' || path[pos+1] == '\0') {
+        // Unless it is the pseudo-directory "."...
+        if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
+          return true;
+        // or "..".
+        if (pos > 0 && path[pos-1] == '.') {
+          if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':')
+            return true;
+        }
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void Path::makeAbsolute() {
+  TCHAR  FullPath[MAX_PATH + 1] = {0};
+  LPTSTR FilePart = NULL;
+
+  DWORD RetLength = ::GetFullPathNameA(path.c_str(),
+                        sizeof(FullPath)/sizeof(FullPath[0]),
+                        FullPath, &FilePart);
+
+  if (0 == RetLength) {
+    // FIXME: Report the error GetLastError()
+    assert(0 && "Unable to make absolute path!");
+  } else if (RetLength > MAX_PATH) {
+    // FIXME: Report too small buffer (needed RetLength bytes).
+    assert(0 && "Unable to make absolute path!");
+  } else {
+    path = FullPath;
+  }
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
+  switch (NameLen) {
+  case 0:
+    return false;
+  case 1:
+  case 2:
+    return NameStart[0] == '/';
+  default:
+    return
+      (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
+      (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
+  }
+}
+
+bool
+Path::isAbsolute() const {
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
+  switch (path.length()) {
+    case 0:
+      return false;
+    case 1:
+    case 2:
+      return path[0] == '/';
+    default:
+      return path[0] == '/' || (path[1] == ':' && path[2] == '/');
+  }
+}
+
+static Path *TempDirectory;
+
+Path
+Path::GetTemporaryDirectory(std::string* ErrMsg) {
+  if (TempDirectory)
+    return *TempDirectory;
+
+  char pathname[MAX_PATH];
+  if (!GetTempPath(MAX_PATH, pathname)) {
+    if (ErrMsg)
+      *ErrMsg = "Can't determine temporary directory";
+    return Path();
+  }
+
+  Path result;
+  result.set(pathname);
+
+  // Append a subdirectory passed on our process id so multiple LLVMs don't
+  // step on each other's toes.
+#ifdef __MINGW32__
+  // Mingw's Win32 header files are broken.
+  sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId()));
+#else
+  sprintf(pathname, "LLVM_%u", GetCurrentProcessId());
+#endif
+  result.appendComponent(pathname);
+
+  // If there's a directory left over from a previous LLVM execution that
+  // happened to have the same process id, get rid of it.
+  result.eraseFromDisk(true);
+
+  // And finally (re-)create the empty directory.
+  result.createDirectoryOnDisk(false);
+  TempDirectory = new Path(result);
+  return *TempDirectory;
+}
+
+// FIXME: the following set of functions don't map to Windows very well.
+Path
+Path::GetRootDirectory() {
+  // This is the only notion that that Windows has of a root directory. Nothing
+  // is here except for drives.
+  return Path("file:///");
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+  char buff[MAX_PATH];
+  // Generic form of C:\Windows\System32
+  HRESULT res =  SHGetFolderPathA(NULL,
+                                  CSIDL_FLAG_CREATE | CSIDL_SYSTEM,
+                                  NULL,
+                                  SHGFP_TYPE_CURRENT,
+                                  buff);
+  if (res != S_OK) {
+    assert(0 && "Failed to get system directory");
+    return;
+  }
+  Paths.push_back(sys::Path(buff));
+
+  // Reset buff.
+  buff[0] = 0;
+  // Generic form of C:\Windows
+  res =  SHGetFolderPathA(NULL,
+                          CSIDL_FLAG_CREATE | CSIDL_WINDOWS,
+                          NULL,
+                          SHGFP_TYPE_CURRENT,
+                          buff);
+  if (res != S_OK) {
+    assert(0 && "Failed to get windows directory");
+    return;
+  }
+  Paths.push_back(sys::Path(buff));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#ifdef LLVM_LIBDIR
+  {
+    Path tmpPath;
+    if (tmpPath.set(LLVM_LIBDIR))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+  }
+#endif
+  GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+  Path ret = GetUserHomeDirectory();
+  if (!ret.appendComponent(".llvm"))
+    assert(0 && "Failed to append .llvm");
+  return ret;
+}
+
+Path
+Path::GetUserHomeDirectory() {
+  char buff[MAX_PATH];
+  HRESULT res = SHGetFolderPathA(NULL,
+                                 CSIDL_FLAG_CREATE | CSIDL_APPDATA,
+                                 NULL,
+                                 SHGFP_TYPE_CURRENT,
+                                 buff);
+  if (res != S_OK)
+    assert(0 && "Failed to get user home directory");
+  return Path(buff);
+}
+
+Path
+Path::GetCurrentDirectory() {
+  char pathname[MAX_PATH];
+  ::GetCurrentDirectoryA(MAX_PATH,pathname);
+  return Path(pathname);
+}
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+  char pathname[MAX_PATH];
+  DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+  return ret != MAX_PATH ? Path(pathname) : Path();
+}
+
+
+// FIXME: the above set of functions don't map to Windows very well.
+
+
+StringRef Path::getDirname() const {
+  return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+  // Find the last slash
+  size_t slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  size_t dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef(path).substr(slash);
+  else
+    return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+  // Find the last slash
+  size_t slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  size_t dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef("");
+  else
+    return StringRef(path).substr(dot + 1);
+}
+
+bool
+Path::exists() const {
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isDirectory() const {
+  DWORD attr = GetFileAttributes(path.c_str());
+  return (attr != INVALID_FILE_ATTRIBUTES) &&
+         (attr & FILE_ATTRIBUTE_DIRECTORY);
+}
+
+bool
+Path::isSymLink() const {
+  DWORD attributes = GetFileAttributes(path.c_str());
+
+  if (attributes == INVALID_FILE_ATTRIBUTES)
+    // There's no sane way to report this :(.
+    assert(0 && "GetFileAttributes returned INVALID_FILE_ATTRIBUTES");
+
+  // This isn't exactly what defines a NTFS symlink, but it is only true for
+  // paths that act like a symlink.
+  return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
+}
+
+bool
+Path::canRead() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::canWrite() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY);
+}
+
+bool
+Path::canExecute() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isRegularFile() const {
+  bool res;
+  if (fs::is_regular_file(path, res))
+    return false;
+  return res;
+}
+
+StringRef
+Path::getLast() const {
+  // Find the last slash
+  size_t pos = path.rfind('/');
+
+  // Handle the corner cases
+  if (pos == std::string::npos)
+    return path;
+
+  // If the last character is a slash, we have a root directory
+  if (pos == path.length()-1)
+    return path;
+
+  // Return everything after the last slash
+  return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+  if (!fsIsValid || update) {
+    WIN32_FILE_ATTRIBUTE_DATA fi;
+    if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+      MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) +
+                      ": Can't get status: ");
+      return 0;
+    }
+
+    status.fileSize = fi.nFileSizeHigh;
+    status.fileSize <<= sizeof(fi.nFileSizeHigh)*8;
+    status.fileSize += fi.nFileSizeLow;
+
+    status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777;
+    status.user = 9999;    // Not applicable to Windows, so...
+    status.group = 9999;   // Not applicable to Windows, so...
+
+    // FIXME: this is only unique if the file is accessed by the same file path.
+    // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode
+    // numbers, but the concept doesn't exist in Windows.
+    status.uniqueID = 0;
+    for (unsigned i = 0; i < path.length(); ++i)
+      status.uniqueID += path[i];
+
+    ULARGE_INTEGER ui;
+    ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
+    ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
+    status.modTime.fromWin32Time(ui.QuadPart);
+
+    status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+    fsIsValid = true;
+  }
+  return &status;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+  // All files are readable on Windows (ignoring security attributes).
+  return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+  DWORD attr = GetFileAttributes(path.c_str());
+
+  // If it doesn't exist, we're done.
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    return false;
+
+  if (attr & FILE_ATTRIBUTE_READONLY) {
+    if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) {
+      MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: ");
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+  // All files are executable on Windows (ignoring security attributes).
+  return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+  WIN32_FILE_ATTRIBUTE_DATA fi;
+  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+    MakeErrMsg(ErrMsg, path + ": can't get status of file");
+    return true;
+  }
+
+  if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+    if (ErrMsg)
+      *ErrMsg = path + ": not a directory";
+    return true;
+  }
+
+  result.clear();
+  WIN32_FIND_DATA fd;
+  std::string searchpath = path;
+  if (path.size() == 0 || searchpath[path.size()-1] == '/')
+    searchpath += "*";
+  else
+    searchpath += "/*";
+
+  HANDLE h = FindFirstFile(searchpath.c_str(), &fd);
+  if (h == INVALID_HANDLE_VALUE) {
+    if (GetLastError() == ERROR_FILE_NOT_FOUND)
+      return true; // not really an error, now is it?
+    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+    return true;
+  }
+
+  do {
+    if (fd.cFileName[0] == '.')
+      continue;
+    Path aPath(path);
+    aPath.appendComponent(&fd.cFileName[0]);
+    result.insert(aPath);
+  } while (FindNextFile(h, &fd));
+
+  DWORD err = GetLastError();
+  FindClose(h);
+  if (err != ERROR_NO_MORE_FILES) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+    return true;
+  }
+  return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+  if (a_path.empty())
+    return false;
+  std::string save(path);
+  path = a_path;
+  FlipBackSlashes(path);
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+  if (name.empty())
+    return false;
+  std::string save(path);
+  if (!path.empty()) {
+    size_t last = path.size() - 1;
+    if (path[last] != '/')
+      path += '/';
+  }
+  path += name;
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::eraseComponent() {
+  size_t slashpos = path.rfind('/',path.size());
+  if (slashpos == path.size() - 1 || slashpos == std::string::npos)
+    return false;
+  std::string save(path);
+  path.erase(slashpos);
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::eraseSuffix() {
+  size_t dotpos = path.rfind('.',path.size());
+  size_t slashpos = path.rfind('/',path.size());
+  if (dotpos != std::string::npos) {
+    if (slashpos == std::string::npos || dotpos > slashpos+1) {
+      std::string save(path);
+      path.erase(dotpos, path.size()-dotpos);
+      if (!isValid()) {
+        path = save;
+        return false;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) {
+  if (ErrMsg)
+    *ErrMsg = std::string(pathname) + ": " + std::string(msg);
+  return true;
+}
+
+bool
+Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
+  // Get a writeable copy of the path name
+  size_t len = path.length();
+  char *pathname = reinterpret_cast<char *>(_alloca(len+2));
+  path.copy(pathname, len);
+  pathname[len] = 0;
+
+  // Make sure it ends with a slash.
+  if (len == 0 || pathname[len - 1] != '/') {
+    pathname[len] = '/';
+    pathname[++len] = 0;
+  }
+
+  // Determine starting point for initial / search.
+  char *next = pathname;
+  if (pathname[0] == '/' && pathname[1] == '/') {
+    // Skip host name.
+    next = strchr(pathname+2, '/');
+    if (next == NULL)
+      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+    // Skip share name.
+    next = strchr(next+1, '/');
+    if (next == NULL)
+      return PathMsg(ErrMsg, pathname,"badly formed remote directory");
+
+    next++;
+    if (*next == 0)
+      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+  } else {
+    if (pathname[1] == ':')
+      next += 2;    // skip drive letter
+    if (*next == '/')
+      next++;       // skip root directory
+  }
+
+  // If we're supposed to create intermediate directories
+  if (create_parents) {
+    // Loop through the directory components until we're done
+    while (*next) {
+      next = strchr(next, '/');
+      *next = 0;
+      if (!CreateDirectory(pathname, NULL) &&
+          GetLastError() != ERROR_ALREADY_EXISTS)
+          return MakeErrMsg(ErrMsg,
+            std::string(pathname) + ": Can't create directory: ");
+      *next++ = '/';
+    }
+  } else {
+    // Drop trailing slash.
+    pathname[len-1] = 0;
+    if (!CreateDirectory(pathname, NULL) &&
+        GetLastError() != ERROR_ALREADY_EXISTS) {
+      return MakeErrMsg(ErrMsg, std::string(pathname) +
+                        ": Can't create directory: ");
+    }
+  }
+  return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+  // Create the file
+  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+                        FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return MakeErrMsg(ErrMsg, path + ": Can't create file: ");
+
+  CloseHandle(h);
+  return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+  WIN32_FILE_ATTRIBUTE_DATA fi;
+  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
+    return true;
+
+  if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+    // If it doesn't exist, we're done.
+    bool Exists;
+    if (fs::exists(path, Exists) || !Exists)
+      return false;
+
+    char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
+    int lastchar = path.length() - 1 ;
+    path.copy(pathname, lastchar+1);
+
+    // Make path end with '/*'.
+    if (pathname[lastchar] != '/')
+      pathname[++lastchar] = '/';
+    pathname[lastchar+1] = '*';
+    pathname[lastchar+2] = 0;
+
+    if (remove_contents) {
+      WIN32_FIND_DATA fd;
+      HANDLE h = FindFirstFile(pathname, &fd);
+
+      // It's a bad idea to alter the contents of a directory while enumerating
+      // its contents. So build a list of its contents first, then destroy them.
+
+      if (h != INVALID_HANDLE_VALUE) {
+        std::vector<Path> list;
+
+        do {
+          if (strcmp(fd.cFileName, ".") == 0)
+            continue;
+          if (strcmp(fd.cFileName, "..") == 0)
+            continue;
+
+          Path aPath(path);
+          aPath.appendComponent(&fd.cFileName[0]);
+          list.push_back(aPath);
+        } while (FindNextFile(h, &fd));
+
+        DWORD err = GetLastError();
+        FindClose(h);
+        if (err != ERROR_NO_MORE_FILES) {
+          SetLastError(err);
+          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+        }
+
+        for (std::vector<Path>::iterator I = list.begin(); I != list.end();
+             ++I) {
+          Path &aPath = *I;
+          aPath.eraseFromDisk(true);
+        }
+      } else {
+        if (GetLastError() != ERROR_FILE_NOT_FOUND)
+          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+      }
+    }
+
+    pathname[lastchar] = 0;
+    if (!RemoveDirectory(pathname))
+      return MakeErrMsg(ErrStr,
+        std::string(pathname) + ": Can't destroy directory: ");
+    return false;
+  } else {
+    // Read-only files cannot be deleted on Windows.  Must remove the read-only
+    // attribute first.
+    if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+      if (!SetFileAttributes(path.c_str(),
+                             fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+    }
+
+    if (!DeleteFile(path.c_str()))
+      return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+    return false;
+  }
+}
+
+bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
+  assert(len < 1024 && "Request for magic string too long");
+  char* buf = reinterpret_cast<char*>(alloca(len));
+
+  HANDLE h = CreateFile(path.c_str(),
+                        GENERIC_READ,
+                        FILE_SHARE_READ,
+                        NULL,
+                        OPEN_EXISTING,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return false;
+
+  DWORD nRead = 0;
+  BOOL ret = ReadFile(h, buf, len, &nRead, NULL);
+  CloseHandle(h);
+
+  if (!ret || nRead != len)
+    return false;
+
+  Magic = std::string(buf, len);
+  return true;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+  if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
+    return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
+        + "': ");
+  return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
+  // FIXME: should work on directories also.
+  if (!si.isFile) {
+    return true;
+  }
+
+  HANDLE h = CreateFile(path.c_str(),
+                        FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
+                        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                        NULL,
+                        OPEN_EXISTING,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return true;
+
+  BY_HANDLE_FILE_INFORMATION bhfi;
+  if (!GetFileInformationByHandle(h, &bhfi)) {
+    DWORD err = GetLastError();
+    CloseHandle(h);
+    SetLastError(err);
+    return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
+  }
+
+  ULARGE_INTEGER ui;
+  ui.QuadPart = si.modTime.toWin32Time();
+  FILETIME ft;
+  ft.dwLowDateTime = ui.LowPart;
+  ft.dwHighDateTime = ui.HighPart;
+  BOOL ret = SetFileTime(h, NULL, &ft, &ft);
+  DWORD err = GetLastError();
+  CloseHandle(h);
+  if (!ret) {
+    SetLastError(err);
+    return MakeErrMsg(ErrMsg, path + ": SetFileTime: ");
+  }
+
+  // Best we can do with Unix permission bits is to interpret the owner
+  // writable bit.
+  if (si.mode & 0200) {
+    if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+      if (!SetFileAttributes(path.c_str(),
+              bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+    }
+  } else {
+    if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) {
+      if (!SetFileAttributes(path.c_str(),
+              bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+    }
+  }
+
+  return false;
+}
+
+bool
+CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
+  // Can't use CopyFile macro defined in Windows.h because it would mess up the
+  // above line.  We use the expansion it would have in a non-UNICODE build.
+  if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
+    return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() +
+               "' to '" + Dest.str() + "': ");
+  return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+  bool Exists;
+  if (reuse_current && (fs::exists(path, Exists) || !Exists))
+    return false; // File doesn't exist already, just use it!
+
+  // Reserve space for -XXXXXX at the end.
+  char *FNBuffer = (char*) alloca(path.size()+8);
+  unsigned offset = path.size();
+  path.copy(FNBuffer, offset);
+
+  // Find a numeric suffix that isn't used by an existing file.  Assume there
+  // won't be more than 1 million files with the same prefix.  Probably a safe
+  // bet.
+  static int FCounter = -1;
+  if (FCounter < 0) {
+    // Give arbitrary initial seed.
+    // FIXME: We should use sys::fs::unique_file() in future.
+    LARGE_INTEGER cnt64;
+    DWORD x = GetCurrentProcessId();
+    x = (x << 16) | (x >> 16);
+    if (QueryPerformanceCounter(&cnt64))    // RDTSC
+      x ^= cnt64.HighPart ^ cnt64.LowPart;
+    FCounter = x % 1000000;
+  }
+  do {
+    sprintf(FNBuffer+offset, "-%06u", FCounter);
+    if (++FCounter > 999999)
+      FCounter = 0;
+    path = FNBuffer;
+  } while (!fs::exists(path, Exists) && Exists);
+  return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+  // Make this into a unique file name
+  makeUnique(reuse_current, ErrMsg);
+
+  // Now go and create it
+  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+                        FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return MakeErrMsg(ErrMsg, path + ": can't create file");
+
+  CloseHandle(h);
+  return false;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
+  return 0;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+void Path::UnMapFilePages(const char *Base, size_t FileSize) {
+  assert(0 && "NOT IMPLEMENTED");
+}
+
+}
+}
diff --git a/contrib/llvm/lib/Support/Windows/PathV2.inc b/contrib/llvm/lib/Support/Windows/PathV2.inc
new file mode 100644
index 000000000000..af71b73cd693
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/PathV2.inc
@@ -0,0 +1,757 @@
+//===- llvm/Support/Windows/PathV2.inc - Windows Path Impl ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//===          is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <wincrypt.h>
+#include <fcntl.h>
+#include <io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// MinGW doesn't define this.
+#ifndef _ERRNO_T_DEFINED
+#define _ERRNO_T_DEFINED
+typedef int errno_t;
+#endif
+
+using namespace llvm;
+
+namespace {
+  typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
+    /*__in*/ LPCWSTR lpSymlinkFileName,
+    /*__in*/ LPCWSTR lpTargetFileName,
+    /*__in*/ DWORD dwFlags);
+
+  PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
+    ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
+                     "CreateSymbolicLinkW"));
+
+  error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
+    int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                                    utf8.begin(), utf8.size(),
+                                    utf16.begin(), 0);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    utf16.reserve(len + 1);
+    utf16.set_size(len);
+
+    len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                                    utf8.begin(), utf8.size(),
+                                    utf16.begin(), utf16.size());
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    // Make utf16 null terminated.
+    utf16.push_back(0);
+    utf16.pop_back();
+
+    return success;
+  }
+
+  error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+                               SmallVectorImpl<char> &utf8) {
+    // Get length.
+    int len = ::WideCharToMultiByte(CP_UTF8, 0,
+                                    utf16, utf16_len,
+                                    utf8.begin(), 0,
+                                    NULL, NULL);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    utf8.reserve(len);
+    utf8.set_size(len);
+
+    // Now do the actual conversion.
+    len = ::WideCharToMultiByte(CP_UTF8, 0,
+                                utf16, utf16_len,
+                                utf8.data(), utf8.size(),
+                                NULL, NULL);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    // Make utf8 null terminated.
+    utf8.push_back(0);
+    utf8.pop_back();
+
+    return success;
+  }
+
+  error_code TempDir(SmallVectorImpl<wchar_t> &result) {
+  retry_temp_dir:
+    DWORD len = ::GetTempPathW(result.capacity(), result.begin());
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    if (len > result.capacity()) {
+      result.reserve(len);
+      goto retry_temp_dir;
+    }
+
+    result.set_size(len);
+    return success;
+  }
+
+  // Forwarder for ScopedHandle.
+  BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
+    return ::CryptReleaseContext(Provider, 0);
+  }
+
+  typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
+                       BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
+    ScopedCryptContext;
+  bool is_separator(const wchar_t value) {
+    switch (value) {
+    case L'\\':
+    case L'/':
+      return true;
+    default:
+      return false;
+    }
+  }
+}
+
+namespace llvm {
+namespace sys  {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+  SmallVector<wchar_t, 128> cur_path;
+  cur_path.reserve(128);
+retry_cur_dir:
+  DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
+
+  // A zero return value indicates a failure other than insufficient space.
+  if (len == 0)
+    return windows_error(::GetLastError());
+
+  // If there's insufficient space, the len returned is larger than the len
+  // given.
+  if (len > cur_path.capacity()) {
+    cur_path.reserve(len);
+    goto retry_cur_dir;
+  }
+
+  cur_path.set_size(len);
+  // cur_path now holds the current directory in utf-16. Convert to utf-8.
+
+  // Find out how much space we need. Sadly, this function doesn't return the
+  // size needed unless you tell it the result size is 0, which means you
+  // _always_ have to call it twice.
+  len = ::WideCharToMultiByte(CP_UTF8, 0,
+                              cur_path.data(), cur_path.size(),
+                              result.data(), 0,
+                              NULL, NULL);
+
+  if (len == 0)
+    return make_error_code(windows_error(::GetLastError()));
+
+  result.reserve(len);
+  result.set_size(len);
+  // Now do the actual conversion.
+  len = ::WideCharToMultiByte(CP_UTF8, 0,
+                              cur_path.data(), cur_path.size(),
+                              result.data(), result.size(),
+                              NULL, NULL);
+  if (len == 0)
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  // Copy the file.
+  BOOL res = ::CopyFileW(wide_from.begin(), wide_to.begin(),
+                         copt != copy_option::overwrite_if_exists);
+
+  if (res == 0)
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
+    error_code ec = windows_error(::GetLastError());
+    if (ec == windows_error::already_exists)
+      existed = true;
+    else
+      return ec;
+  } else
+    existed = false;
+
+  return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+  // Only do it if the function is available at runtime.
+  if (!create_symbolic_link_api)
+    return make_error_code(errc::function_not_supported);
+
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  if (st.type() == file_type::directory_file) {
+    if (!::RemoveDirectoryW(c_str(path_utf16))) {
+      error_code ec = windows_error(::GetLastError());
+      if (ec != windows_error::file_not_found)
+        return ec;
+      existed = false;
+    } else
+      existed = true;
+  } else {
+    if (!::DeleteFileW(c_str(path_utf16))) {
+      error_code ec = windows_error(::GetLastError());
+      if (ec != windows_error::file_not_found)
+        return ec;
+      existed = false;
+    } else
+      existed = true;
+  }
+
+  return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!::MoveFileExW(wide_from.begin(), wide_to.begin(),
+                     MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  int fd = ::_wopen(path_utf16.begin(), O_BINARY, S_IREAD | S_IWRITE);
+  if (fd == -1)
+    return error_code(errno, generic_category());
+#ifdef HAVE__CHSIZE_S
+  errno_t error = ::_chsize_s(fd, size);
+#else
+  errno_t error = ::_chsize(fd, size);
+#endif
+  ::close(fd);
+  return error_code(error, generic_category());
+}
+
+error_code exists(const Twine &path, bool &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+
+  if (attributes == INVALID_FILE_ATTRIBUTES) {
+    // See if the file didn't actually exist.
+    error_code ec = make_error_code(windows_error(::GetLastError()));
+    if (ec != windows_error::file_not_found &&
+        ec != windows_error::path_not_found)
+      return ec;
+    result = false;
+  } else
+    result = true;
+  return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  // Get arguments.
+  SmallString<128> a_storage;
+  SmallString<128> b_storage;
+  StringRef a = A.toStringRef(a_storage);
+  StringRef b = B.toStringRef(b_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_a;
+  SmallVector<wchar_t, 128> wide_b;
+  if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
+  if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
+
+  AutoHandle HandleB(
+    ::CreateFileW(wide_b.begin(),
+                  0,
+                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                  0,
+                  OPEN_EXISTING,
+                  FILE_FLAG_BACKUP_SEMANTICS,
+                  0));
+
+  AutoHandle HandleA(
+    ::CreateFileW(wide_a.begin(),
+                  0,
+                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                  0,
+                  OPEN_EXISTING,
+                  FILE_FLAG_BACKUP_SEMANTICS,
+                  0));
+
+  // If both handles are invalid, it's an error.
+  if (HandleA == INVALID_HANDLE_VALUE &&
+      HandleB == INVALID_HANDLE_VALUE)
+    return windows_error(::GetLastError());
+
+  // If only one is invalid, it's false.
+  if (HandleA == INVALID_HANDLE_VALUE &&
+      HandleB == INVALID_HANDLE_VALUE) {
+    result = false;
+    return success;
+  }
+
+  // Get file information.
+  BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
+  if (!::GetFileInformationByHandle(HandleA, &InfoA))
+    return windows_error(::GetLastError());
+  if (!::GetFileInformationByHandle(HandleB, &InfoB))
+    return windows_error(::GetLastError());
+
+  // See if it's all the same.
+  result =
+    InfoA.dwVolumeSerialNumber           == InfoB.dwVolumeSerialNumber &&
+    InfoA.nFileIndexHigh                 == InfoB.nFileIndexHigh &&
+    InfoA.nFileIndexLow                  == InfoB.nFileIndexLow &&
+    InfoA.nFileSizeHigh                  == InfoB.nFileSizeHigh &&
+    InfoA.nFileSizeLow                   == InfoB.nFileSizeLow &&
+    InfoA.ftLastWriteTime.dwLowDateTime  ==
+      InfoB.ftLastWriteTime.dwLowDateTime &&
+    InfoA.ftLastWriteTime.dwHighDateTime ==
+      InfoB.ftLastWriteTime.dwHighDateTime;
+
+  return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  WIN32_FILE_ATTRIBUTE_DATA FileData;
+  if (!::GetFileAttributesExW(path_utf16.begin(),
+                              ::GetFileExInfoStandard,
+                              &FileData))
+    return windows_error(::GetLastError());
+
+  result =
+    (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
+    + FileData.nFileSizeLow;
+
+  return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  StringRef path8 = path.toStringRef(path_storage);
+  // FIXME: We should detect as many "special file name" as possible.
+  if (path8.compare_lower("nul") == 0) {
+    result = file_status(file_type::character_file);
+    return success;
+  }
+
+  if (error_code ec = UTF8ToUTF16(path8,
+                                  path_utf16))
+    return ec;
+
+  DWORD attr = ::GetFileAttributesW(path_utf16.begin());
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    goto handle_status_error;
+
+  // Handle reparse points.
+  if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
+    AutoHandle h(
+      ::CreateFileW(path_utf16.begin(),
+                    0, // Attributes only.
+                    FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                    NULL,
+                    OPEN_EXISTING,
+                    FILE_FLAG_BACKUP_SEMANTICS,
+                    0));
+    if (h == INVALID_HANDLE_VALUE)
+      goto handle_status_error;
+  }
+
+  if (attr & FILE_ATTRIBUTE_DIRECTORY)
+    result = file_status(file_type::directory_file);
+  else
+    result = file_status(file_type::regular_file);
+
+  return success;
+
+handle_status_error:
+  error_code ec = windows_error(::GetLastError());
+  if (ec == windows_error::file_not_found ||
+      ec == windows_error::path_not_found)
+    result = file_status(file_type::file_not_found);
+  else if (ec == windows_error::sharing_violation)
+    result = file_status(file_type::type_unknown);
+  else {
+    result = file_status(file_type::status_error);
+    return ec;
+  }
+
+  return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path) {
+  // Use result_path as temp storage.
+  result_path.set_size(0);
+  StringRef m = model.toStringRef(result_path);
+
+  SmallVector<wchar_t, 128> model_utf16;
+  if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec;
+
+  // Make model absolute by prepending a temp directory if it's not already.
+  bool absolute = path::is_absolute(m);
+
+  if (!absolute) {
+    SmallVector<wchar_t, 64> temp_dir;
+    if (error_code ec = TempDir(temp_dir)) return ec;
+    // Handle c: by removing it.
+    if (model_utf16.size() > 2 && model_utf16[1] == L':') {
+      model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+    }
+    model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
+  }
+
+  // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+  // needed if the randomly chosen path already exists.
+  SmallVector<wchar_t, 128> random_path_utf16;
+
+  // Get a Crypto Provider for CryptGenRandom.
+  HCRYPTPROV HCPC;
+  if (!::CryptAcquireContextW(&HCPC,
+                              NULL,
+                              NULL,
+                              PROV_RSA_FULL,
+                              CRYPT_VERIFYCONTEXT))
+    return windows_error(::GetLastError());
+  ScopedCryptContext CryptoProvider(HCPC);
+
+retry_random_path:
+  random_path_utf16.set_size(0);
+  for (SmallVectorImpl<wchar_t>::const_iterator i = model_utf16.begin(),
+                                                e = model_utf16.end();
+                                                i != e; ++i) {
+    if (*i == L'%') {
+      BYTE val = 0;
+      if (!::CryptGenRandom(CryptoProvider, 1, &val))
+          return windows_error(::GetLastError());
+      random_path_utf16.push_back("0123456789abcdef"[val & 15]);
+    }
+    else
+      random_path_utf16.push_back(*i);
+  }
+  // Make random_path_utf16 null terminated.
+  random_path_utf16.push_back(0);
+  random_path_utf16.pop_back();
+
+  // Try to create + open the path.
+retry_create_file:
+  HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
+                                        GENERIC_READ | GENERIC_WRITE,
+                                        FILE_SHARE_READ,
+                                        NULL,
+                                        // Return ERROR_FILE_EXISTS if the file
+                                        // already exists.
+                                        CREATE_NEW,
+                                        FILE_ATTRIBUTE_TEMPORARY,
+                                        NULL);
+  if (TempFileHandle == INVALID_HANDLE_VALUE) {
+    // If the file existed, try again, otherwise, error.
+    error_code ec = windows_error(::GetLastError());
+    if (ec == windows_error::file_exists)
+      goto retry_random_path;
+    // Check for non-existing parent directories.
+    if (ec == windows_error::path_not_found) {
+      // Create the directories using result_path as temp storage.
+      if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+                                      random_path_utf16.size(), result_path))
+        return ec;
+      StringRef p(result_path.begin(), result_path.size());
+      SmallString<64> dir_to_create;
+      for (path::const_iterator i = path::begin(p),
+                                e = --path::end(p); i != e; ++i) {
+        path::append(dir_to_create, *i);
+        bool Exists;
+        if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+        if (!Exists) {
+          // If c: doesn't exist, bail.
+          if (i->endswith(":"))
+            return ec;
+
+          SmallVector<wchar_t, 64> dir_to_create_utf16;
+          if (error_code ec = UTF8ToUTF16(dir_to_create, dir_to_create_utf16))
+            return ec;
+
+          // Create the directory.
+          if (!::CreateDirectoryW(dir_to_create_utf16.begin(), NULL))
+            return windows_error(::GetLastError());
+        }
+      }
+      goto retry_create_file;
+    }
+    return ec;
+  }
+
+  // Set result_path to the utf-8 representation of the path.
+  if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+                                  random_path_utf16.size(), result_path)) {
+    ::CloseHandle(TempFileHandle);
+    ::DeleteFileW(random_path_utf16.begin());
+    return ec;
+  }
+
+  // Convert the Windows API file handle into a C-runtime handle.
+  int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0);
+  if (fd == -1) {
+    ::CloseHandle(TempFileHandle);
+    ::DeleteFileW(random_path_utf16.begin());
+    // MSDN doesn't say anything about _open_osfhandle setting errno or
+    // GetLastError(), so just return invalid_handle.
+    return windows_error::invalid_handle;
+  }
+
+  result_fd = fd;
+  return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+  result.set_size(0);
+
+  // Convert path to UTF-16.
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  // Open file.
+  HANDLE file = ::CreateFileW(c_str(path_utf16),
+                              GENERIC_READ,
+                              FILE_SHARE_READ,
+                              NULL,
+                              OPEN_EXISTING,
+                              FILE_ATTRIBUTE_READONLY,
+                              NULL);
+  if (file == INVALID_HANDLE_VALUE)
+    return windows_error(::GetLastError());
+
+  // Allocate buffer.
+  result.reserve(len);
+
+  // Get magic!
+  DWORD bytes_read = 0;
+  BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL);
+  error_code ec = windows_error(::GetLastError());
+  ::CloseHandle(file);
+  if (!read_success || (bytes_read != len)) {
+    // Set result size to the number of bytes read if it's valid.
+    if (bytes_read <= len)
+      result.set_size(bytes_read);
+    // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
+    return ec;
+  }
+
+  result.set_size(len);
+  return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path,
+                                  path_utf16))
+    return ec;
+
+  // Convert path to the format that Windows is happy with.
+  if (path_utf16.size() > 0 &&
+      !is_separator(path_utf16[path.size() - 1]) &&
+      path_utf16[path.size() - 1] != L':') {
+    path_utf16.push_back(L'\\');
+    path_utf16.push_back(L'*');
+  } else {
+    path_utf16.push_back(L'*');
+  }
+
+  //  Get the first directory entry.
+  WIN32_FIND_DATAW FirstFind;
+  ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
+  if (!FindHandle)
+    return windows_error(::GetLastError());
+
+  size_t FilenameLen = ::wcslen(FirstFind.cFileName);
+  while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
+         (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
+                              FirstFind.cFileName[1] == L'.'))
+    if (!::FindNextFileW(FindHandle, &FirstFind)) {
+      error_code ec = windows_error(::GetLastError());
+      // Check for end.
+      if (ec == windows_error::no_more_files)
+        return directory_iterator_destruct(it);
+      return ec;
+    } else
+      FilenameLen = ::wcslen(FirstFind.cFileName);
+
+  // Construct the current directory entry.
+  SmallString<128> directory_entry_name_utf8;
+  if (error_code ec = UTF16ToUTF8(FirstFind.cFileName,
+                                  ::wcslen(FirstFind.cFileName),
+                                  directory_entry_name_utf8))
+    return ec;
+
+  it.IterationHandle = intptr_t(FindHandle.take());
+  SmallString<128> directory_entry_path(path);
+  path::append(directory_entry_path, directory_entry_name_utf8.str());
+  it.CurrentEntry = directory_entry(directory_entry_path.str());
+
+  return success;
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+  if (it.IterationHandle != 0)
+    // Closes the handle if it's valid.
+    ScopedFindHandle close(HANDLE(it.IterationHandle));
+  it.IterationHandle = 0;
+  it.CurrentEntry = directory_entry();
+  return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+  WIN32_FIND_DATAW FindData;
+  if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+    error_code ec = windows_error(::GetLastError());
+    // Check for end.
+    if (ec == windows_error::no_more_files)
+      return directory_iterator_destruct(it);
+    return ec;
+  }
+
+  size_t FilenameLen = ::wcslen(FindData.cFileName);
+  if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
+      (FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
+                           FindData.cFileName[1] == L'.'))
+    return directory_iterator_increment(it);
+
+  SmallString<128> directory_entry_path_utf8;
+  if (error_code ec = UTF16ToUTF8(FindData.cFileName,
+                                  ::wcslen(FindData.cFileName),
+                                  directory_entry_path_utf8))
+    return ec;
+
+  it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
+  return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
new file mode 100644
index 000000000000..06a7f0054d50
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -0,0 +1,222 @@
+//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <psapi.h>
+#include <malloc.h>
+#include <io.h>
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBPSAPI != 1)
+  #error "libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef __MINGW32__
+// This ban should be lifted when MinGW 1.0+ has defined this value.
+#  define _HEAPOK (-2)
+#endif
+
+namespace llvm {
+using namespace sys;
+
+// This function retrieves the page size using GetSystemInfo and is present
+// solely so it can be called once in Process::GetPageSize to initialize the
+// static variable PageSize.
+inline unsigned GetPageSizeOnce() {
+  // NOTE: A 32-bit application running under WOW64 is supposed to use
+  // GetNativeSystemInfo.  However, this interface is not present prior
+  // to Windows XP so to use it requires dynamic linking.  It is not clear
+  // how this affects the reported page size, if at all.  One could argue
+  // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
+  SYSTEM_INFO info;
+  GetSystemInfo(&info);
+  return static_cast<unsigned>(info.dwPageSize);
+}
+
+unsigned
+Process::GetPageSize() {
+  static const unsigned PageSize = GetPageSizeOnce();
+  return PageSize;
+}
+
+size_t
+Process::GetMallocUsage()
+{
+  _HEAPINFO hinfo;
+  hinfo._pentry = NULL;
+
+  size_t size = 0;
+
+  while (_heapwalk(&hinfo) == _HEAPOK)
+    size += hinfo._size;
+
+  return size;
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+  PROCESS_MEMORY_COUNTERS pmc;
+  GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
+  return pmc.PagefileUsage;
+}
+
+void
+Process::GetTimeUsage(
+  TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time)
+{
+  elapsed = TimeValue::now();
+
+  uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
+  GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
+                  (FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
+                  (FILETIME*)&UserTime);
+
+  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+  user_time.seconds( UserTime / 10000000 );
+  user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 );
+  sys_time.seconds( KernelTime / 10000000 );
+  sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 );
+}
+
+int Process::GetCurrentUserId()
+{
+  return 65536;
+}
+
+int Process::GetCurrentGroupId()
+{
+  return 65536;
+}
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this configuration item
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+  // Windows doesn't do core files, but it does do modal pop-up message
+  // boxes.  As this method is used by bugpoint, preventing these pop-ups
+  // is the moral equivalent of suppressing core files.
+  SetErrorMode(SEM_FAILCRITICALERRORS |
+               SEM_NOGPFAULTERRORBOX |
+               SEM_NOOPENFILEERRORBOX);
+}
+
+bool Process::StandardInIsUserInput() {
+  return FileDescriptorIsDisplayed(0);
+}
+
+bool Process::StandardOutIsDisplayed() {
+  return FileDescriptorIsDisplayed(1);
+}
+
+bool Process::StandardErrIsDisplayed() {
+  return FileDescriptorIsDisplayed(2);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+  DWORD Mode;	// Unused
+  return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0);
+}
+
+unsigned Process::StandardOutColumns() {
+  unsigned Columns = 0;
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+    Columns = csbi.dwSize.X;
+  return Columns;
+}
+
+unsigned Process::StandardErrColumns() {
+  unsigned Columns = 0;
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi))
+    Columns = csbi.dwSize.X;
+  return Columns;
+}
+
+// It always has colors.
+bool Process::StandardErrHasColors() {
+  return StandardErrIsDisplayed();
+}
+
+bool Process::StandardOutHasColors() {
+  return StandardOutIsDisplayed();
+}
+
+namespace {
+class DefaultColors
+{
+  private:
+    WORD defaultColor;
+  public:
+    DefaultColors()
+     :defaultColor(GetCurrentColor()) {}
+    static unsigned GetCurrentColor() {
+      CONSOLE_SCREEN_BUFFER_INFO csbi;
+      if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+        return csbi.wAttributes;
+      return 0;
+    }
+    WORD operator()() const { return defaultColor; }
+};
+
+DefaultColors defaultColors;
+}
+
+bool Process::ColorNeedsFlush() {
+  return true;
+}
+
+const char *Process::OutputBold(bool bg) {
+  WORD colors = DefaultColors::GetCurrentColor();
+  if (bg)
+    colors |= BACKGROUND_INTENSITY;
+  else
+    colors |= FOREGROUND_INTENSITY;
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  WORD colors;
+  if (bg) {
+    colors = ((code&1) ? BACKGROUND_RED : 0) |
+      ((code&2) ? BACKGROUND_GREEN : 0 ) |
+      ((code&4) ? BACKGROUND_BLUE : 0);
+    if (bold)
+      colors |= BACKGROUND_INTENSITY;
+  } else {
+    colors = ((code&1) ? FOREGROUND_RED : 0) |
+      ((code&2) ? FOREGROUND_GREEN : 0 ) |
+      ((code&4) ? FOREGROUND_BLUE : 0);
+    if (bold)
+      colors |= FOREGROUND_INTENSITY;
+  }
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::ResetColor() {
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
+  return 0;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
new file mode 100644
index 000000000000..e486e6ec2381
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -0,0 +1,405 @@
+//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+namespace {
+  struct Win32ProcessInfo {
+    HANDLE hProcess;
+    DWORD  dwProcessId;
+  };
+}
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+}
+
+unsigned Program::GetPid() const {
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  return wpi->dwProcessId;
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+  // Check some degenerate cases
+  if (progName.length() == 0) // no program
+    return Path();
+  Path temp;
+  if (!temp.set(progName)) // invalid name
+    return Path();
+  // Return paths with slashes verbatim.
+  if (progName.find('\\') != std::string::npos ||
+      progName.find('/') != std::string::npos)
+    return temp;
+
+  // At this point, the file name is valid and does not contain slashes.
+  // Let Windows search for it.
+  char buffer[MAX_PATH];
+  char *dummy = NULL;
+  DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
+                         buffer, &dummy);
+
+  // See if it wasn't found.
+  if (len == 0)
+    return Path();
+
+  // See if we got the entire path.
+  if (len < MAX_PATH)
+    return Path(buffer);
+
+  // Buffer was too small; grow and retry.
+  while (true) {
+    char *b = reinterpret_cast<char *>(_alloca(len+1));
+    DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy);
+
+    // It is unlikely the search failed, but it's always possible some file
+    // was added or removed since the last search, so be paranoid...
+    if (len2 == 0)
+      return Path();
+    else if (len2 <= len)
+      return Path(b);
+
+    len = len2;
+  }
+}
+
+static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
+  HANDLE h;
+  if (path == 0) {
+    DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
+                    GetCurrentProcess(), &h,
+                    0, TRUE, DUPLICATE_SAME_ACCESS);
+    return h;
+  }
+
+  const char *fname;
+  if (path->isEmpty())
+    fname = "NUL";
+  else
+    fname = path->c_str();
+
+  SECURITY_ATTRIBUTES sa;
+  sa.nLength = sizeof(sa);
+  sa.lpSecurityDescriptor = 0;
+  sa.bInheritHandle = TRUE;
+
+  h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
+                 &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
+                 FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE) {
+    MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
+        (fd ? "input: " : "output: "));
+  }
+
+  return h;
+}
+
+/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess.
+static bool ArgNeedsQuotes(const char *Str) {
+  return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
+}
+
+
+/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess and returns length of quoted arg with escaped quotes
+static unsigned int ArgLenWithQuotes(const char *Str) {
+  unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+
+  while (*Str != '\0') {
+    if (*Str == '\"')
+      ++len;
+
+    ++len;
+    ++Str;
+  }
+
+  return len;
+}
+
+
+bool
+Program::Execute(const Path& path,
+                 const char** args,
+                 const char** envp,
+                 const Path** redirects,
+                 unsigned memoryLimit,
+                 std::string* ErrMsg) {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+
+  if (!path.canExecute()) {
+    if (ErrMsg)
+      *ErrMsg = "program not executable";
+    return false;
+  }
+
+  // Windows wants a command line, not an array of args, to pass to the new
+  // process.  We have to concatenate them all, while quoting the args that
+  // have embedded spaces (or are empty).
+
+  // First, determine the length of the command line.
+  unsigned len = 0;
+  for (unsigned i = 0; args[i]; i++) {
+    len += ArgLenWithQuotes(args[i]) + 1;
+  }
+
+  // Now build the command line.
+  char *command = reinterpret_cast<char *>(_alloca(len+1));
+  char *p = command;
+
+  for (unsigned i = 0; args[i]; i++) {
+    const char *arg = args[i];
+
+    bool needsQuoting = ArgNeedsQuotes(arg);
+    if (needsQuoting)
+      *p++ = '"';
+
+    while (*arg != '\0') {
+      if (*arg == '\"')
+        *p++ = '\\';
+
+      *p++ = *arg++;
+    }
+
+    if (needsQuoting)
+      *p++ = '"';
+    *p++ = ' ';
+  }
+
+  *p = 0;
+
+  // The pointer to the environment block for the new process.
+  char *envblock = 0;
+
+  if (envp) {
+    // An environment block consists of a null-terminated block of
+    // null-terminated strings. Convert the array of environment variables to
+    // an environment block by concatenating them.
+
+    // First, determine the length of the environment block.
+    len = 0;
+    for (unsigned i = 0; envp[i]; i++)
+      len += strlen(envp[i]) + 1;
+
+    // Now build the environment block.
+    envblock = reinterpret_cast<char *>(_alloca(len+1));
+    p = envblock;
+
+    for (unsigned i = 0; envp[i]; i++) {
+      const char *ev = envp[i];
+      size_t len = strlen(ev) + 1;
+      memcpy(p, ev, len);
+      p += len;
+    }
+
+    *p = 0;
+  }
+
+  // Create a child process.
+  STARTUPINFO si;
+  memset(&si, 0, sizeof(si));
+  si.cb = sizeof(si);
+  si.hStdInput = INVALID_HANDLE_VALUE;
+  si.hStdOutput = INVALID_HANDLE_VALUE;
+  si.hStdError = INVALID_HANDLE_VALUE;
+
+  if (redirects) {
+    si.dwFlags = STARTF_USESTDHANDLES;
+
+    si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
+    if (si.hStdInput == INVALID_HANDLE_VALUE) {
+      MakeErrMsg(ErrMsg, "can't redirect stdin");
+      return false;
+    }
+    si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
+    if (si.hStdOutput == INVALID_HANDLE_VALUE) {
+      CloseHandle(si.hStdInput);
+      MakeErrMsg(ErrMsg, "can't redirect stdout");
+      return false;
+    }
+    if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
+      // If stdout and stderr should go to the same place, redirect stderr
+      // to the handle already open for stdout.
+      DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
+                      GetCurrentProcess(), &si.hStdError,
+                      0, TRUE, DUPLICATE_SAME_ACCESS);
+    } else {
+      // Just redirect stderr
+      si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
+      if (si.hStdError == INVALID_HANDLE_VALUE) {
+        CloseHandle(si.hStdInput);
+        CloseHandle(si.hStdOutput);
+        MakeErrMsg(ErrMsg, "can't redirect stderr");
+        return false;
+      }
+    }
+  }
+
+  PROCESS_INFORMATION pi;
+  memset(&pi, 0, sizeof(pi));
+
+  fflush(stdout);
+  fflush(stderr);
+  BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0,
+                          envblock, NULL, &si, &pi);
+  DWORD err = GetLastError();
+
+  // Regardless of whether the process got created or not, we are done with
+  // the handles we created for it to inherit.
+  CloseHandle(si.hStdInput);
+  CloseHandle(si.hStdOutput);
+  CloseHandle(si.hStdError);
+
+  // Now return an error if the process didn't get created.
+  if (!rc) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
+               path.str() + "'");
+    return false;
+  }
+  Win32ProcessInfo* wpi = new Win32ProcessInfo;
+  wpi->hProcess = pi.hProcess;
+  wpi->dwProcessId = pi.dwProcessId;
+  Data_ = wpi;
+
+  // Make sure these get closed no matter what.
+  AutoHandle hThread(pi.hThread);
+
+  // Assign the process to a job if a memory limit is defined.
+  AutoHandle hJob(0);
+  if (memoryLimit != 0) {
+    hJob = CreateJobObject(0, 0);
+    bool success = false;
+    if (hJob != 0) {
+      JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
+      memset(&jeli, 0, sizeof(jeli));
+      jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
+      jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576;
+      if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation,
+                                  &jeli, sizeof(jeli))) {
+        if (AssignProcessToJobObject(hJob, pi.hProcess))
+          success = true;
+      }
+    }
+    if (!success) {
+      SetLastError(GetLastError());
+      MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
+      TerminateProcess(pi.hProcess, 1);
+      WaitForSingleObject(pi.hProcess, INFINITE);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int
+Program::Wait(const Path &path,
+              unsigned secondsToWait,
+              std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+
+  // Wait for the process to terminate.
+  DWORD millisecondsToWait = INFINITE;
+  if (secondsToWait > 0)
+    millisecondsToWait = secondsToWait * 1000;
+
+  if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
+    if (!TerminateProcess(hProcess, 1)) {
+      MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+      // -2 indicates a crash or timeout as opposed to failure to execute.
+      return -2;
+    }
+    WaitForSingleObject(hProcess, INFINITE);
+  }
+
+  // Get its exit status.
+  DWORD status;
+  BOOL rc = GetExitCodeProcess(hProcess, &status);
+  DWORD err = GetLastError();
+
+  if (!rc) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, "Failed getting status for program.");
+    // -2 indicates a crash or timeout as opposed to failure to execute.
+    return -2;
+  }
+
+  return status;
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+  if (TerminateProcess(hProcess, 1) == 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+  int result = _setmode( _fileno(stdin), _O_BINARY );
+  return result == -1;
+}
+
+bool Program::ChangeStdoutToBinary(){
+  int result = _setmode( _fileno(stdout), _O_BINARY );
+  return result == -1;
+}
+
+bool Program::ChangeStderrToBinary(){
+  int result = _setmode( _fileno(stderr), _O_BINARY );
+  return result == -1;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/RWMutex.inc b/contrib/llvm/lib/Support/Windows/RWMutex.inc
new file mode 100644
index 000000000000..471f8fa294be
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/RWMutex.inc
@@ -0,0 +1,58 @@
+//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock  =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+// FIXME: Windows does not have reader-writer locks pre-Vista.  If you want
+// real reader-writer locks, you a threads implementation for Windows.
+
+namespace llvm {
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() {
+  data_ = calloc(1, sizeof(CRITICAL_SECTION));
+  InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+}
+
+RWMutexImpl::~RWMutexImpl() {
+  DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  free(data_);
+}
+
+bool RWMutexImpl::reader_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
new file mode 100644
index 000000000000..14f3f21f02a1
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -0,0 +1,328 @@
+//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Signals class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <stdio.h>
+#include <vector>
+#include <algorithm>
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+#include <psapi.h>
+
+#ifdef __MINGW32__
+ #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
+  #error "libimagehlp.a & libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+// Forward declare.
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
+
+// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
+static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static bool RegisteredUnhandledExceptionFilter = false;
+static bool CleanupExecuted = false;
+static bool ExitOnUnhandledExceptions = false;
+static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
+
+// Windows creates a new thread to execute the console handler when an event
+// (such as CTRL/C) occurs.  This causes concurrency issues with the above
+// globals which this critical section addresses.
+static CRITICAL_SECTION CriticalSection;
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef _MSC_VER
+/// CRTReportHook - Function called on a CRT debugging event.
+static int CRTReportHook(int ReportType, char *Message, int *Return) {
+  // Don't cause a DebugBreak() on return.
+  if (Return)
+    *Return = 0;
+
+  switch (ReportType) {
+  default:
+  case _CRT_ASSERT:
+    fprintf(stderr, "CRT assert: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_ERROR:
+    fprintf(stderr, "CRT error: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_WARN:
+    fprintf(stderr, "CRT warn: %s\n", Message);
+    break;
+  }
+
+  // Don't call _CrtDbgReport.
+  return TRUE;
+}
+#endif
+
+static void RegisterHandler() {
+  if (RegisteredUnhandledExceptionFilter) {
+    EnterCriticalSection(&CriticalSection);
+    return;
+  }
+
+  // Now's the time to create the critical section.  This is the first time
+  // through here, and there's only one thread.
+  InitializeCriticalSection(&CriticalSection);
+
+  // Enter it immediately.  Now if someone hits CTRL/C, the console handler
+  // can't proceed until the globals are updated.
+  EnterCriticalSection(&CriticalSection);
+
+  RegisteredUnhandledExceptionFilter = true;
+  OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
+  SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
+
+  // Environment variable to disable any kind of crash dialog.
+  if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+#ifdef _MSC_VER
+    _CrtSetReportHook(CRTReportHook);
+#endif
+    SetErrorMode(SEM_FAILCRITICALERRORS |
+                 SEM_NOGPFAULTERRORBOX |
+                 SEM_NOOPENFILEERRORBOX);
+    ExitOnUnhandledExceptions = true;
+  }
+
+  // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
+  // else multi-threading problems will ensue.
+}
+
+// RemoveFileOnSignal - The public API
+bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
+  RegisterHandler();
+
+  if (CleanupExecuted) {
+    if (ErrMsg)
+      *ErrMsg = "Process terminating -- cannot register for removal";
+    return true;
+  }
+
+  if (FilesToRemove == NULL)
+    FilesToRemove = new std::vector<sys::Path>;
+
+  FilesToRemove->push_back(Filename);
+
+  LeaveCriticalSection(&CriticalSection);
+  return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+  if (FilesToRemove == NULL)
+    return;
+
+  RegisterHandler();
+
+  FilesToRemove->push_back(Filename);
+  std::vector<sys::Path>::reverse_iterator I =
+  std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+  if (I != FilesToRemove->rend())
+    FilesToRemove->erase(I.base()-1);
+
+  LeaveCriticalSection(&CriticalSection);
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void sys::PrintStackTraceOnErrorSignal() {
+  RegisterHandler();
+  LeaveCriticalSection(&CriticalSection);
+}
+
+
+void sys::SetInterruptFunction(void (*IF)()) {
+  RegisterHandler();
+  InterruptFunction = IF;
+  LeaveCriticalSection(&CriticalSection);
+}
+
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process.  The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+  if (CallBacksToRun == 0)
+    CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
+  CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+  RegisterHandler();
+  LeaveCriticalSection(&CriticalSection);
+}
+}
+
+static void Cleanup() {
+  EnterCriticalSection(&CriticalSection);
+
+  // Prevent other thread from registering new files and directories for
+  // removal, should we be executing because of the console handler callback.
+  CleanupExecuted = true;
+
+  // FIXME: open files cannot be deleted.
+
+  if (FilesToRemove != NULL)
+    while (!FilesToRemove->empty()) {
+      FilesToRemove->back().eraseFromDisk();
+      FilesToRemove->pop_back();
+    }
+
+  if (CallBacksToRun)
+    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+      (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+
+  LeaveCriticalSection(&CriticalSection);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+  Cleanup();
+}
+
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
+  Cleanup();
+
+#ifdef _WIN64
+  // TODO: provide a x64 friendly version of the following
+#else
+
+  // Initialize the STACKFRAME structure.
+  STACKFRAME StackFrame;
+  memset(&StackFrame, 0, sizeof(StackFrame));
+
+  StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
+  StackFrame.AddrPC.Mode = AddrModeFlat;
+  StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
+  StackFrame.AddrStack.Mode = AddrModeFlat;
+  StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
+  StackFrame.AddrFrame.Mode = AddrModeFlat;
+
+  HANDLE hProcess = GetCurrentProcess();
+  HANDLE hThread = GetCurrentThread();
+
+  // Initialize the symbol handler.
+  SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
+  SymInitialize(hProcess, NULL, TRUE);
+
+  while (true) {
+    if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
+                   ep->ContextRecord, NULL, SymFunctionTableAccess,
+                   SymGetModuleBase, NULL)) {
+      break;
+    }
+
+    if (StackFrame.AddrFrame.Offset == 0)
+      break;
+
+    // Print the PC in hexadecimal.
+    DWORD PC = StackFrame.AddrPC.Offset;
+    fprintf(stderr, "%08lX", PC);
+
+    // Print the parameters.  Assume there are four.
+    fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+            StackFrame.Params[0],
+            StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
+
+    // Verify the PC belongs to a module in this process.
+    if (!SymGetModuleBase(hProcess, PC)) {
+      fputs(" <unknown module>\n", stderr);
+      continue;
+    }
+
+    // Print the symbol name.
+    char buffer[512];
+    IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
+    memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
+    symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
+    symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
+
+    DWORD dwDisp;
+    if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
+      fputc('\n', stderr);
+      continue;
+    }
+
+    buffer[511] = 0;
+    if (dwDisp > 0)
+      fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
+    else
+      fprintf(stderr, ", %s", symbol->Name);
+
+    // Print the source file and line number information.
+    IMAGEHLP_LINE line;
+    memset(&line, 0, sizeof(line));
+    line.SizeOfStruct = sizeof(line);
+    if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
+      fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
+      if (dwDisp > 0)
+        fprintf(stderr, "+%04lu byte(s)", dwDisp);
+    }
+
+    fputc('\n', stderr);
+  }
+
+#endif
+
+  if (ExitOnUnhandledExceptions)
+    _exit(-3);
+
+  // Allow dialog box to pop up allowing choice to start debugger.
+  if (OldFilter)
+    return (*OldFilter)(ep);
+  else
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
+  // We are running in our very own thread, courtesy of Windows.
+  EnterCriticalSection(&CriticalSection);
+  Cleanup();
+
+  // If an interrupt function has been set, go and run one it; otherwise,
+  // the process dies.
+  void (*IF)() = InterruptFunction;
+  InterruptFunction = 0;      // Don't run it on another CTRL-C.
+
+  if (IF) {
+    // Note: if the interrupt function throws an exception, there is nothing
+    // to catch it in this thread so it will kill the process.
+    IF();                     // Run it now.
+    LeaveCriticalSection(&CriticalSection);
+    return TRUE;              // Don't kill the process.
+  }
+
+  // Allow normal processing to take place; i.e., the process dies.
+  LeaveCriticalSection(&CriticalSection);
+  return FALSE;
+}
diff --git a/contrib/llvm/lib/Support/Windows/ThreadLocal.inc b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
new file mode 100644
index 000000000000..512462d89005
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
@@ -0,0 +1,54 @@
+//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/ThreadLocal.h"
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() {
+  DWORD* tls = new DWORD;
+  *tls = TlsAlloc();
+  assert(*tls != TLS_OUT_OF_INDEXES);
+  data = tls;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  TlsFree(*tls);
+  delete tls;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  return TlsGetValue(*tls);
+}
+
+void ThreadLocalImpl::setInstance(const void* d){
+  DWORD* tls = static_cast<DWORD*>(data);
+  int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
+  assert(errorcode != 0);
+  (void)errorcode;
+}
+
+void ThreadLocalImpl::removeInstance() {
+  setInstance(0);
+}
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/TimeValue.inc b/contrib/llvm/lib/Support/Windows/TimeValue.inc
new file mode 100644
index 000000000000..12275526f1c8
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/TimeValue.inc
@@ -0,0 +1,51 @@
+//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 implementation of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <time.h>
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code.
+//===----------------------------------------------------------------------===//
+
+TimeValue TimeValue::now() {
+  uint64_t ft;
+  GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
+
+  TimeValue t(0, 0);
+  t.fromWin32Time(ft);
+  return t;
+}
+
+std::string TimeValue::str() const {
+#ifdef __MINGW32__
+  // This ban may be lifted by either:
+  // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
+  // (ii) configure tests for either the time_t or __time64_t type.
+  time_t ourTime = time_t(this->toEpochTime());
+  struct tm *lt = ::localtime(&ourTime);
+#else
+  __time64_t ourTime = this->toEpochTime();
+  struct tm *lt = ::_localtime64(&ourTime);
+#endif
+
+  char buffer[25];
+  strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt);
+  return std::string(buffer);
+}
+
+
+}
diff --git a/contrib/llvm/lib/Support/Windows/Windows.h b/contrib/llvm/lib/Support/Windows/Windows.h
new file mode 100644
index 000000000000..4a1553b599d7
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Windows.h
@@ -0,0 +1,120 @@
+//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Win32 implementations.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+// mingw-w64 tends to define it as 0x0502 in its headers.
+#undef _WIN32_WINNT
+
+// Require at least Windows 2000 API.
+#define _WIN32_WINNT 0x0500
+#define _WIN32_IE    0x0500 // MinGW at it again.
+#define WIN32_LEAN_AND_MEAN
+
+#include "llvm/Config/config.h" // Get build system configuration settings
+#include <windows.h>
+#include <shlobj.h>
+#include <cassert>
+#include <string>
+
+inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
+  if (!ErrMsg)
+    return true;
+  char *buffer = NULL;
+  FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+  *ErrMsg = prefix + buffer;
+  LocalFree(buffer);
+  return true;
+}
+
+class AutoHandle {
+  HANDLE handle;
+
+public:
+  AutoHandle(HANDLE h) : handle(h) {}
+
+  ~AutoHandle() {
+    if (handle)
+      CloseHandle(handle);
+  }
+
+  operator HANDLE() {
+    return handle;
+  }
+
+  AutoHandle &operator=(HANDLE h) {
+    handle = h;
+    return *this;
+  }
+};
+
+template <class HandleType, uintptr_t InvalidHandle,
+          class DeleterType, DeleterType D>
+class ScopedHandle {
+  HandleType Handle;
+
+public:
+  ScopedHandle() : Handle(InvalidHandle) {}
+  ScopedHandle(HandleType handle) : Handle(handle) {}
+
+  ~ScopedHandle() {
+    if (Handle != HandleType(InvalidHandle))
+      D(Handle);
+  }
+
+  HandleType take() {
+    HandleType temp = Handle;
+    Handle = HandleType(InvalidHandle);
+    return temp;
+  }
+
+  operator HandleType() const { return Handle; }
+
+  ScopedHandle &operator=(HandleType handle) {
+    Handle = handle;
+    return *this;
+  }
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  // True if Handle is valid.
+  operator unspecified_bool_type() const {
+    return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+  }
+
+  bool operator!() const {
+    return Handle == HandleType(InvalidHandle);
+  }
+};
+
+typedef ScopedHandle<HANDLE, uintptr_t(-1),
+                      BOOL (WINAPI*)(HANDLE), ::FindClose>
+  ScopedFindHandle;
+
+namespace llvm {
+template <class T>
+class SmallVectorImpl;
+
+template <class T>
+typename SmallVectorImpl<T>::const_pointer
+c_str(SmallVectorImpl<T> &str) {
+  str.push_back(0);
+  str.pop_back();
+  return str.data();
+}
+} // end namespace llvm.
diff --git a/contrib/llvm/lib/Support/Windows/explicit_symbols.inc b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
new file mode 100644
index 000000000000..379645d2ff60
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
@@ -0,0 +1,66 @@
+/* in libgcc.a */
+
+#ifdef HAVE__ALLOCA
+  EXPLICIT_SYMBOL(_alloca)
+  EXPLICIT_SYMBOL2(alloca, _alloca)
+#endif
+#ifdef HAVE___ALLOCA
+  EXPLICIT_SYMBOL(__alloca)
+#endif
+#ifdef HAVE___CHKSTK
+  EXPLICIT_SYMBOL(__chkstk)
+#endif
+#ifdef HAVE____CHKSTK
+  EXPLICIT_SYMBOL(___chkstk)
+#endif
+#ifdef HAVE___MAIN
+  EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
+#endif
+
+#ifdef HAVE___ASHLDI3
+  EXPLICIT_SYMBOL(__ashldi3)
+#endif
+#ifdef HAVE___ASHRDI3
+  EXPLICIT_SYMBOL(__ashrdi3)
+#endif
+#ifdef HAVE___CMPDI2 // FIXME: unused
+  EXPLICIT_SYMBOL(__cmpdi2)
+#endif
+#ifdef HAVE___DIVDI3
+  EXPLICIT_SYMBOL(__divdi3)
+#endif
+#ifdef HAVE___FIXDFDI
+  EXPLICIT_SYMBOL(__fixdfdi)
+#endif
+#ifdef HAVE___FIXSFDI
+  EXPLICIT_SYMBOL(__fixsfdi)
+#endif
+#ifdef HAVE___FIXUNSDFDI
+  EXPLICIT_SYMBOL(__fixunsdfdi)
+#endif
+#ifdef HAVE___FIXUNSSFDI
+  EXPLICIT_SYMBOL(__fixunssfdi)
+#endif
+#ifdef HAVE___FLOATDIDF
+  EXPLICIT_SYMBOL(__floatdidf)
+#endif
+#ifdef HAVE___FLOATDISF
+  EXPLICIT_SYMBOL(__floatdisf)
+#endif
+#ifdef HAVE___LSHRDI3
+  EXPLICIT_SYMBOL(__lshrdi3)
+#endif
+#ifdef HAVE___MODDI3
+  EXPLICIT_SYMBOL(__moddi3)
+#endif
+#ifdef HAVE___UDIVDI3
+  EXPLICIT_SYMBOL(__udivdi3)
+#endif
+#ifdef HAVE___UMODDI3
+  EXPLICIT_SYMBOL(__umoddi3)
+#endif
+
+/* msvcrt */
+#if defined(_MSC_VER)
+  EXPLICIT_SYMBOL2(alloca, _alloca_probe)
+#endif
diff --git a/contrib/llvm/lib/Support/Windows/system_error.inc b/contrib/llvm/lib/Support/Windows/system_error.inc
new file mode 100644
index 000000000000..37ec81dd363c
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/system_error.inc
@@ -0,0 +1,142 @@
+//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Windows specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//===          is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include <windows.h>
+#include <winerror.h>
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+  LPVOID lpMsgBuf = 0;
+  DWORD retval = ::FormatMessageA(
+    FORMAT_MESSAGE_ALLOCATE_BUFFER |
+    FORMAT_MESSAGE_FROM_SYSTEM |
+    FORMAT_MESSAGE_IGNORE_INSERTS,
+    NULL,
+    ev,
+    MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
+    (LPSTR) &lpMsgBuf,
+    0,
+    NULL);
+  if (retval == 0) {
+    ::LocalFree(lpMsgBuf);
+    return std::string("Unknown error");
+  }
+
+  std::string str( static_cast<LPCSTR>(lpMsgBuf) );
+  ::LocalFree(lpMsgBuf);
+
+  while (str.size()
+     && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r'))
+    str.erase( str.size()-1 );
+  if (str.size() && str[str.size()-1] == '.')
+    str.erase( str.size()-1 );
+  return str;
+}
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y)
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+  switch (ev) {
+  MAP_ERR_TO_COND(0, success);
+  // Windows system -> posix_errno decode table  ---------------------------//
+  // see WinError.h comments for descriptions of errors
+  MAP_ERR_TO_COND(ERROR_ACCESS_DENIED,       permission_denied);
+  MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS,      file_exists);
+  MAP_ERR_TO_COND(ERROR_BAD_UNIT,            no_such_device);
+  MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW,     filename_too_long);
+  MAP_ERR_TO_COND(ERROR_BUSY,                device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_BUSY_DRIVE,          device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_CANNOT_MAKE,         permission_denied);
+  MAP_ERR_TO_COND(ERROR_CANTOPEN,            io_error);
+  MAP_ERR_TO_COND(ERROR_CANTREAD,            io_error);
+  MAP_ERR_TO_COND(ERROR_CANTWRITE,           io_error);
+  MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY,   permission_denied);
+  MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST,       no_such_device);
+  MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE,       device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY,       directory_not_empty);
+  MAP_ERR_TO_COND(ERROR_DIRECTORY,           invalid_argument);
+  MAP_ERR_TO_COND(ERROR_DISK_FULL,           no_space_on_device);
+  MAP_ERR_TO_COND(ERROR_FILE_EXISTS,         file_exists);
+  MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND,      no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL,    no_space_on_device);
+  MAP_ERR_TO_COND(ERROR_HANDLE_EOF,          value_too_large);
+  MAP_ERR_TO_COND(ERROR_INVALID_ACCESS,      permission_denied);
+  MAP_ERR_TO_COND(ERROR_INVALID_DRIVE,       no_such_device);
+  MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION,    function_not_supported);
+  MAP_ERR_TO_COND(ERROR_INVALID_HANDLE,      invalid_argument);
+  MAP_ERR_TO_COND(ERROR_INVALID_NAME,        invalid_argument);
+  MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION,      no_lock_available);
+  MAP_ERR_TO_COND(ERROR_LOCKED,              no_lock_available);
+  MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK,       invalid_argument);
+  MAP_ERR_TO_COND(ERROR_NOACCESS,            permission_denied);
+  MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY,   not_enough_memory);
+  MAP_ERR_TO_COND(ERROR_NOT_READY,           resource_unavailable_try_again);
+  MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE,     cross_device_link);
+  MAP_ERR_TO_COND(ERROR_OPEN_FAILED,         io_error);
+  MAP_ERR_TO_COND(ERROR_OPEN_FILES,          device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED,   operation_canceled);
+  MAP_ERR_TO_COND(ERROR_OUTOFMEMORY,         not_enough_memory);
+  MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND,      no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_BAD_NETPATH,         no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_READ_FAULT,          io_error);
+  MAP_ERR_TO_COND(ERROR_RETRY,               resource_unavailable_try_again);
+  MAP_ERR_TO_COND(ERROR_SEEK,                io_error);
+  MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION,   permission_denied);
+  MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+  MAP_ERR_TO_COND(ERROR_WRITE_FAULT,         io_error);
+  MAP_ERR_TO_COND(ERROR_WRITE_PROTECT,       permission_denied);
+  MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT,         timed_out);
+  MAP_ERR_TO_COND(WSAEACCES,                 permission_denied);
+  MAP_ERR_TO_COND(WSAEADDRINUSE,             address_in_use);
+  MAP_ERR_TO_COND(WSAEADDRNOTAVAIL,          address_not_available);
+  MAP_ERR_TO_COND(WSAEAFNOSUPPORT,           address_family_not_supported);
+  MAP_ERR_TO_COND(WSAEALREADY,               connection_already_in_progress);
+  MAP_ERR_TO_COND(WSAEBADF,                  bad_file_descriptor);
+  MAP_ERR_TO_COND(WSAECONNABORTED,           connection_aborted);
+  MAP_ERR_TO_COND(WSAECONNREFUSED,           connection_refused);
+  MAP_ERR_TO_COND(WSAECONNRESET,             connection_reset);
+  MAP_ERR_TO_COND(WSAEDESTADDRREQ,           destination_address_required);
+  MAP_ERR_TO_COND(WSAEFAULT,                 bad_address);
+  MAP_ERR_TO_COND(WSAEHOSTUNREACH,           host_unreachable);
+  MAP_ERR_TO_COND(WSAEINPROGRESS,            operation_in_progress);
+  MAP_ERR_TO_COND(WSAEINTR,                  interrupted);
+  MAP_ERR_TO_COND(WSAEINVAL,                 invalid_argument);
+  MAP_ERR_TO_COND(WSAEISCONN,                already_connected);
+  MAP_ERR_TO_COND(WSAEMFILE,                 too_many_files_open);
+  MAP_ERR_TO_COND(WSAEMSGSIZE,               message_size);
+  MAP_ERR_TO_COND(WSAENAMETOOLONG,           filename_too_long);
+  MAP_ERR_TO_COND(WSAENETDOWN,               network_down);
+  MAP_ERR_TO_COND(WSAENETRESET,              network_reset);
+  MAP_ERR_TO_COND(WSAENETUNREACH,            network_unreachable);
+  MAP_ERR_TO_COND(WSAENOBUFS,                no_buffer_space);
+  MAP_ERR_TO_COND(WSAENOPROTOOPT,            no_protocol_option);
+  MAP_ERR_TO_COND(WSAENOTCONN,               not_connected);
+  MAP_ERR_TO_COND(WSAENOTSOCK,               not_a_socket);
+  MAP_ERR_TO_COND(WSAEOPNOTSUPP,             operation_not_supported);
+  MAP_ERR_TO_COND(WSAEPROTONOSUPPORT,        protocol_not_supported);
+  MAP_ERR_TO_COND(WSAEPROTOTYPE,             wrong_protocol_type);
+  MAP_ERR_TO_COND(WSAETIMEDOUT,              timed_out);
+  MAP_ERR_TO_COND(WSAEWOULDBLOCK,            operation_would_block);
+  default: return error_condition(ev, system_category());
+  }
+}
diff --git a/contrib/llvm/lib/Support/circular_raw_ostream.cpp b/contrib/llvm/lib/Support/circular_raw_ostream.cpp
new file mode 100644
index 000000000000..ca0d30db388c
--- /dev/null
+++ b/contrib/llvm/lib/Support/circular_raw_ostream.cpp
@@ -0,0 +1,45 @@
+//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for circular buffered streams.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/circular_raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+  if (BufferSize == 0) {
+    TheStream->write(Ptr, Size);
+    return;
+  }
+
+  // Write into the buffer, wrapping if necessary.
+  while (Size != 0) {
+    unsigned Bytes =
+      std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray)));
+    memcpy(Cur, Ptr, Bytes);
+    Size -= Bytes;
+    Cur += Bytes;
+    if (Cur == BufferArray + BufferSize) {
+      // Reset the output pointer to the start of the buffer.
+      Cur = BufferArray;
+      Filled = true;
+    }
+  }    
+}
+
+void circular_raw_ostream::flushBufferWithBanner() {
+  if (BufferSize != 0) {
+    // Write out the buffer
+    TheStream->write(Banner, std::strlen(Banner));
+    flushBuffer();
+  }
+}
diff --git a/contrib/llvm/lib/Support/raw_os_ostream.cpp b/contrib/llvm/lib/Support/raw_os_ostream.cpp
new file mode 100644
index 000000000000..44f2325d7f8a
--- /dev/null
+++ b/contrib/llvm/lib/Support/raw_os_ostream.cpp
@@ -0,0 +1,30 @@
+//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support adapting raw_ostream to std::ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_os_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  raw_os_ostream
+//===----------------------------------------------------------------------===//
+
+raw_os_ostream::~raw_os_ostream() {
+  flush();
+}
+
+void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
+  OS.write(Ptr, Size);
+}
+
+uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); }
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
new file mode 100644
index 000000000000..5a71fa3d8cea
--- /dev/null
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -0,0 +1,763 @@
+//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for bulk buffered stream output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cctype>
+#include <cerrno>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(HAVE_FCNTL_H)
+# include <fcntl.h>
+#endif
+#if defined(HAVE_SYS_UIO_H) && defined(HAVE_WRITEV)
+#  include <sys/uio.h>
+#endif
+
+#if defined(__CYGWIN__)
+#include <io.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <io.h>
+#include <fcntl.h>
+#ifndef STDIN_FILENO
+# define STDIN_FILENO 0
+#endif
+#ifndef STDOUT_FILENO
+# define STDOUT_FILENO 1
+#endif
+#ifndef STDERR_FILENO
+# define STDERR_FILENO 2
+#endif
+#endif
+
+using namespace llvm;
+
+raw_ostream::~raw_ostream() {
+  // raw_ostream's subclasses should take care to flush the buffer
+  // in their destructors.
+  assert(OutBufCur == OutBufStart &&
+         "raw_ostream destructor called with non-empty buffer!");
+
+  if (BufferMode == InternalBuffer)
+    delete [] OutBufStart;
+}
+
+// An out of line virtual method to provide a home for the class vtable.
+void raw_ostream::handle() {}
+
+size_t raw_ostream::preferred_buffer_size() const {
+  // BUFSIZ is intended to be a reasonable default.
+  return BUFSIZ;
+}
+
+void raw_ostream::SetBuffered() {
+  // Ask the subclass to determine an appropriate buffer size.
+  if (size_t Size = preferred_buffer_size())
+    SetBufferSize(Size);
+  else
+    // It may return 0, meaning this stream should be unbuffered.
+    SetUnbuffered();
+}
+
+void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
+                                    BufferKind Mode) {
+  assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
+          (Mode != Unbuffered && BufferStart && Size)) &&
+         "stream must be unbuffered or have at least one byte");
+  // Make sure the current buffer is free of content (we can't flush here; the
+  // child buffer management logic will be in write_impl).
+  assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
+
+  if (BufferMode == InternalBuffer)
+    delete [] OutBufStart;
+  OutBufStart = BufferStart;
+  OutBufEnd = OutBufStart+Size;
+  OutBufCur = OutBufStart;
+  BufferMode = Mode;
+
+  assert(OutBufStart <= OutBufEnd && "Invalid size!");
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long N) {
+  // Zero is a special case.
+  if (N == 0)
+    return *this << '0';
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    *--CurPtr = '0' + char(N % 10);
+    N /= 10;
+  }
+  return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long N) {
+  if (N <  0) {
+    *this << '-';
+    N = -N;
+  }
+
+  return this->operator<<(static_cast<unsigned long>(N));
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long long N) {
+  // Output using 32-bit div/mod when possible.
+  if (N == static_cast<unsigned long>(N))
+    return this->operator<<(static_cast<unsigned long>(N));
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    *--CurPtr = '0' + char(N % 10);
+    N /= 10;
+  }
+  return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long long N) {
+  if (N < 0) {
+    *this << '-';
+    // Avoid undefined behavior on INT64_MIN with a cast.
+    N = -(unsigned long long)N;
+  }
+
+  return this->operator<<(static_cast<unsigned long long>(N));
+}
+
+raw_ostream &raw_ostream::write_hex(unsigned long long N) {
+  // Zero is a special case.
+  if (N == 0)
+    return *this << '0';
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    uintptr_t x = N % 16;
+    *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10);
+    N /= 16;
+  }
+
+  return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::write_escaped(StringRef Str,
+                                        bool UseHexEscapes) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    unsigned char c = Str[i];
+
+    switch (c) {
+    case '\\':
+      *this << '\\' << '\\';
+      break;
+    case '\t':
+      *this << '\\' << 't';
+      break;
+    case '\n':
+      *this << '\\' << 'n';
+      break;
+    case '"':
+      *this << '\\' << '"';
+      break;
+    default:
+      if (std::isprint(c)) {
+        *this << c;
+        break;
+      }
+
+      // Write out the escaped representation.
+      if (UseHexEscapes) {
+        *this << '\\' << 'x';
+        *this << hexdigit((c >> 4 & 0xF));
+        *this << hexdigit((c >> 0) & 0xF);
+      } else {
+        // Always use a full 3-character octal escape.
+        *this << '\\';
+        *this << char('0' + ((c >> 6) & 7));
+        *this << char('0' + ((c >> 3) & 7));
+        *this << char('0' + ((c >> 0) & 7));
+      }
+    }
+  }
+
+  return *this;
+}
+
+raw_ostream &raw_ostream::operator<<(const void *P) {
+  *this << '0' << 'x';
+
+  return write_hex((uintptr_t) P);
+}
+
+raw_ostream &raw_ostream::operator<<(double N) {
+#ifdef _WIN32
+  // On MSVCRT and compatible, output of %e is incompatible to Posix
+  // by default. Number of exponent digits should be at least 2. "%+03d"
+  // FIXME: Implement our formatter to here or Support/Format.h!
+  int fpcl = _fpclass(N);
+
+  // negative zero
+  if (fpcl == _FPCLASS_NZ)
+    return *this << "-0.000000e+00";
+
+  char buf[16];
+  unsigned len;
+  len = snprintf(buf, sizeof(buf), "%e", N);
+  if (len <= sizeof(buf) - 2) {
+    if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
+      int cs = buf[len - 4];
+      if (cs == '+' || cs == '-') {
+        int c1 = buf[len - 2];
+        int c0 = buf[len - 1];
+        if (isdigit(c1) && isdigit(c0)) {
+          // Trim leading '0': "...e+012" -> "...e+12\0"
+          buf[len - 3] = c1;
+          buf[len - 2] = c0;
+          buf[--len] = 0;
+        }
+      }
+    }
+    return this->operator<<(buf);
+  }
+#endif
+  return this->operator<<(format("%e", N));
+}
+
+
+
+void raw_ostream::flush_nonempty() {
+  assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
+  size_t Length = OutBufCur - OutBufStart;
+  OutBufCur = OutBufStart;
+  write_impl(OutBufStart, Length);
+}
+
+raw_ostream &raw_ostream::write(unsigned char C) {
+  // Group exceptional cases into a single branch.
+  if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) {
+    if (BUILTIN_EXPECT(!OutBufStart, false)) {
+      if (BufferMode == Unbuffered) {
+        write_impl(reinterpret_cast<char*>(&C), 1);
+        return *this;
+      }
+      // Set up a buffer and start over.
+      SetBuffered();
+      return write(C);
+    }
+
+    flush_nonempty();
+  }
+
+  *OutBufCur++ = C;
+  return *this;
+}
+
+raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
+  // Group exceptional cases into a single branch.
+  if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) {
+    if (BUILTIN_EXPECT(!OutBufStart, false)) {
+      if (BufferMode == Unbuffered) {
+        write_impl(Ptr, Size);
+        return *this;
+      }
+      // Set up a buffer and start over.
+      SetBuffered();
+      return write(Ptr, Size);
+    }
+
+    size_t NumBytes = OutBufEnd - OutBufCur;
+
+    // If the buffer is empty at this point we have a string that is larger
+    // than the buffer. Directly write the chunk that is a multiple of the
+    // preferred buffer size and put the remainder in the buffer.
+    if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) {
+      size_t BytesToWrite = Size - (Size % NumBytes);
+      write_impl(Ptr, BytesToWrite);
+      copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+      return *this;
+    }
+
+    // We don't have enough space in the buffer to fit the string in. Insert as
+    // much as possible, flush and start over with the remainder.
+    copy_to_buffer(Ptr, NumBytes);
+    flush_nonempty();
+    return write(Ptr + NumBytes, Size - NumBytes);
+  }
+
+  copy_to_buffer(Ptr, Size);
+
+  return *this;
+}
+
+void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
+  assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
+
+  // Handle short strings specially, memcpy isn't very good at very short
+  // strings.
+  switch (Size) {
+  case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
+  case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
+  case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
+  case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
+  case 0: break;
+  default:
+    memcpy(OutBufCur, Ptr, Size);
+    break;
+  }
+
+  OutBufCur += Size;
+}
+
+// Formatted output.
+raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
+  // If we have more than a few bytes left in our output buffer, try
+  // formatting directly onto its end.
+  size_t NextBufferSize = 127;
+  size_t BufferBytesLeft = OutBufEnd - OutBufCur;
+  if (BufferBytesLeft > 3) {
+    size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
+
+    // Common case is that we have plenty of space.
+    if (BytesUsed <= BufferBytesLeft) {
+      OutBufCur += BytesUsed;
+      return *this;
+    }
+
+    // Otherwise, we overflowed and the return value tells us the size to try
+    // again with.
+    NextBufferSize = BytesUsed;
+  }
+
+  // If we got here, we didn't have enough space in the output buffer for the
+  // string.  Try printing into a SmallVector that is resized to have enough
+  // space.  Iterate until we win.
+  SmallVector<char, 128> V;
+
+  while (1) {
+    V.resize(NextBufferSize);
+
+    // Try formatting into the SmallVector.
+    size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
+
+    // If BytesUsed fit into the vector, we win.
+    if (BytesUsed <= NextBufferSize)
+      return write(V.data(), BytesUsed);
+
+    // Otherwise, try again with a new size.
+    assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
+    NextBufferSize = BytesUsed;
+  }
+}
+
+/// indent - Insert 'NumSpaces' spaces.
+raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
+  static const char Spaces[] = "                                "
+                               "                                "
+                               "                ";
+
+  // Usually the indentation is small, handle it with a fastpath.
+  if (NumSpaces < array_lengthof(Spaces))
+    return write(Spaces, NumSpaces);
+
+  while (NumSpaces) {
+    unsigned NumToWrite = std::min(NumSpaces,
+                                   (unsigned)array_lengthof(Spaces)-1);
+    write(Spaces, NumToWrite);
+    NumSpaces -= NumToWrite;
+  }
+  return *this;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Formatted Output
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void format_object_base::home() {
+}
+
+//===----------------------------------------------------------------------===//
+//  raw_fd_ostream
+//===----------------------------------------------------------------------===//
+
+/// raw_fd_ostream - Open the specified file for writing. If an error
+/// occurs, information about the error is put into ErrorInfo, and the
+/// stream should be immediately destroyed; the string will be empty
+/// if no error occurred.
+raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+                               unsigned Flags)
+  : Error(false), UseAtomicWrites(false), pos(0)
+{
+  assert(Filename != 0 && "Filename is null");
+  // Verify that we don't have both "append" and "excl".
+  assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
+         "Cannot specify both 'excl' and 'append' file creation flags!");
+
+  ErrorInfo.clear();
+
+  // Handle "-" as stdout. Note that when we do this, we consider ourself
+  // the owner of stdout. This means that we can do things like close the
+  // file descriptor when we're done and set the "binary" flag globally.
+  if (Filename[0] == '-' && Filename[1] == 0) {
+    FD = STDOUT_FILENO;
+    // If user requested binary then put stdout into binary mode if
+    // possible.
+    if (Flags & F_Binary)
+      sys::Program::ChangeStdoutToBinary();
+    // Close stdout when we're done, to detect any output errors.
+    ShouldClose = true;
+    return;
+  }
+
+  int OpenFlags = O_WRONLY|O_CREAT;
+#ifdef O_BINARY
+  if (Flags & F_Binary)
+    OpenFlags |= O_BINARY;
+#endif
+
+  if (Flags & F_Append)
+    OpenFlags |= O_APPEND;
+  else
+    OpenFlags |= O_TRUNC;
+  if (Flags & F_Excl)
+    OpenFlags |= O_EXCL;
+
+  while ((FD = open(Filename, OpenFlags, 0664)) < 0) {
+    if (errno != EINTR) {
+      ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+      ShouldClose = false;
+      return;
+    }
+  }
+
+  // Ok, we successfully opened the file, so it'll need to be closed.
+  ShouldClose = true;
+}
+
+/// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
+/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
+  : raw_ostream(unbuffered), FD(fd),
+    ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
+#ifdef O_BINARY
+  // Setting STDOUT and STDERR to binary mode is necessary in Win32
+  // to avoid undesirable linefeed conversion.
+  if (fd == STDOUT_FILENO || fd == STDERR_FILENO)
+    setmode(fd, O_BINARY);
+#endif
+
+  // Get the starting position.
+  off_t loc = ::lseek(FD, 0, SEEK_CUR);
+  if (loc == (off_t)-1)
+    pos = 0;
+  else
+    pos = static_cast<uint64_t>(loc);
+}
+
+raw_fd_ostream::~raw_fd_ostream() {
+  if (FD >= 0) {
+    flush();
+    if (ShouldClose)
+      while (::close(FD) != 0)
+        if (errno != EINTR) {
+          error_detected();
+          break;
+        }
+  }
+
+#ifdef __MINGW32__
+  // On mingw, global dtors should not call exit().
+  // report_fatal_error() invokes exit(). We know report_fatal_error()
+  // might not write messages to stderr when any errors were detected
+  // on FD == 2.
+  if (FD == 2) return;
+#endif
+
+  // If there are any pending errors, report them now. Clients wishing
+  // to avoid report_fatal_error calls should check for errors with
+  // has_error() and clear the error flag with clear_error() before
+  // destructing raw_ostream objects which may have errors.
+  if (has_error())
+    report_fatal_error("IO failure on output stream.");
+}
+
+
+void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
+  assert(FD >= 0 && "File already closed.");
+  pos += Size;
+
+  do {
+    ssize_t ret;
+
+    // Check whether we should attempt to use atomic writes.
+    if (BUILTIN_EXPECT(!UseAtomicWrites, true)) {
+      ret = ::write(FD, Ptr, Size);
+    } else {
+      // Use ::writev() where available.
+#if defined(HAVE_WRITEV)
+      struct iovec IOV = { (void*) Ptr, Size };
+      ret = ::writev(FD, &IOV, 1);
+#else
+      ret = ::write(FD, Ptr, Size);
+#endif
+    }
+
+    if (ret < 0) {
+      // If it's a recoverable error, swallow it and retry the write.
+      //
+      // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
+      // raw_ostream isn't designed to do non-blocking I/O. However, some
+      // programs, such as old versions of bjam, have mistakenly used
+      // O_NONBLOCK. For compatibility, emulate blocking semantics by
+      // spinning until the write succeeds. If you don't want spinning,
+      // don't use O_NONBLOCK file descriptors with raw_ostream.
+      if (errno == EINTR || errno == EAGAIN
+#ifdef EWOULDBLOCK
+          || errno == EWOULDBLOCK
+#endif
+          )
+        continue;
+
+      // Otherwise it's a non-recoverable error. Note it and quit.
+      error_detected();
+      break;
+    }
+
+    // The write may have written some or all of the data. Update the
+    // size and buffer pointer to reflect the remainder that needs
+    // to be written. If there are no bytes left, we're done.
+    Ptr += ret;
+    Size -= ret;
+  } while (Size > 0);
+}
+
+void raw_fd_ostream::close() {
+  assert(ShouldClose);
+  ShouldClose = false;
+  flush();
+  while (::close(FD) != 0)
+    if (errno != EINTR) {
+      error_detected();
+      break;
+    }
+  FD = -1;
+}
+
+uint64_t raw_fd_ostream::seek(uint64_t off) {
+  flush();
+  pos = ::lseek(FD, off, SEEK_SET);
+  if (pos != off)
+    error_detected();
+  return pos;
+}
+
+size_t raw_fd_ostream::preferred_buffer_size() const {
+#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
+  // Windows and Minix have no st_blksize.
+  assert(FD >= 0 && "File not yet open!");
+  struct stat statbuf;
+  if (fstat(FD, &statbuf) != 0)
+    return 0;
+
+  // If this is a terminal, don't use buffering. Line buffering
+  // would be a more traditional thing to do, but it's not worth
+  // the complexity.
+  if (S_ISCHR(statbuf.st_mode) && isatty(FD))
+    return 0;
+  // Return the preferred block size.
+  return statbuf.st_blksize;
+#else
+  return raw_ostream::preferred_buffer_size();
+#endif
+}
+
+raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
+                                         bool bg) {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode =
+    (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
+    : sys::Process::OutputColor(colors, bold, bg);
+  if (colorcode) {
+    size_t len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
+raw_ostream &raw_fd_ostream::resetColor() {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode = sys::Process::ResetColor();
+  if (colorcode) {
+    size_t len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
+bool raw_fd_ostream::is_displayed() const {
+  return sys::Process::FileDescriptorIsDisplayed(FD);
+}
+
+//===----------------------------------------------------------------------===//
+//  outs(), errs(), nulls()
+//===----------------------------------------------------------------------===//
+
+/// outs() - This returns a reference to a raw_ostream for standard output.
+/// Use it like: outs() << "foo" << "bar";
+raw_ostream &llvm::outs() {
+  // Set buffer settings to model stdout behavior.
+  // Delete the file descriptor when the program exists, forcing error
+  // detection. If you don't want this behavior, don't use outs().
+  static raw_fd_ostream S(STDOUT_FILENO, true);
+  return S;
+}
+
+/// errs() - This returns a reference to a raw_ostream for standard error.
+/// Use it like: errs() << "foo" << "bar";
+raw_ostream &llvm::errs() {
+  // Set standard error to be unbuffered by default.
+  static raw_fd_ostream S(STDERR_FILENO, false, true);
+  return S;
+}
+
+/// nulls() - This returns a reference to a raw_ostream which discards output.
+raw_ostream &llvm::nulls() {
+  static raw_null_ostream S;
+  return S;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  raw_string_ostream
+//===----------------------------------------------------------------------===//
+
+raw_string_ostream::~raw_string_ostream() {
+  flush();
+}
+
+void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
+  OS.append(Ptr, Size);
+}
+
+//===----------------------------------------------------------------------===//
+//  raw_svector_ostream
+//===----------------------------------------------------------------------===//
+
+// The raw_svector_ostream implementation uses the SmallVector itself as the
+// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is
+// always pointing past the end of the vector, but within the vector
+// capacity. This allows raw_ostream to write directly into the correct place,
+// and we only need to set the vector size when the data is flushed.
+
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+  // Set up the initial external buffer. We make sure that the buffer has at
+  // least 128 bytes free; raw_ostream itself only requires 64, but we want to
+  // make sure that we don't grow the buffer unnecessarily on destruction (when
+  // the data is flushed). See the FIXME below.
+  OS.reserve(OS.size() + 128);
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+raw_svector_ostream::~raw_svector_ostream() {
+  // FIXME: Prevent resizing during this flush().
+  flush();
+}
+
+/// resync - This is called when the SmallVector we're appending to is changed
+/// outside of the raw_svector_ostream's control.  It is only safe to do this
+/// if the raw_svector_ostream has previously been flushed.
+void raw_svector_ostream::resync() {
+  assert(GetNumBytesInBuffer() == 0 && "Didn't flush before mutating vector");
+
+  if (OS.capacity() - OS.size() < 64)
+    OS.reserve(OS.capacity() * 2);
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
+  // If we're writing bytes from the end of the buffer into the smallvector, we
+  // don't need to copy the bytes, just commit the bytes because they are
+  // already in the right place.
+  if (Ptr == OS.end()) {
+    assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
+    OS.set_size(OS.size() + Size);
+  } else {
+    assert(GetNumBytesInBuffer() == 0 &&
+           "Should be writing from buffer if some bytes in it");
+    // Otherwise, do copy the bytes.
+    OS.append(Ptr, Ptr+Size);
+  }
+
+  // Grow the vector if necessary.
+  if (OS.capacity() - OS.size() < 64)
+    OS.reserve(OS.capacity() * 2);
+
+  // Update the buffer position.
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+uint64_t raw_svector_ostream::current_pos() const {
+   return OS.size();
+}
+
+StringRef raw_svector_ostream::str() {
+  flush();
+  return StringRef(OS.begin(), OS.size());
+}
+
+//===----------------------------------------------------------------------===//
+//  raw_null_ostream
+//===----------------------------------------------------------------------===//
+
+raw_null_ostream::~raw_null_ostream() {
+#ifndef NDEBUG
+  // ~raw_ostream asserts that the buffer is empty. This isn't necessary
+  // with raw_null_ostream, but it's better to have raw_null_ostream follow
+  // the rules than to change the rules just for raw_null_ostream.
+  flush();
+#endif
+}
+
+void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
+}
+
+uint64_t raw_null_ostream::current_pos() const {
+  return 0;
+}
diff --git a/contrib/llvm/lib/Support/regcclass.h b/contrib/llvm/lib/Support/regcclass.h
new file mode 100644
index 000000000000..2cea3e4e5406
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcclass.h
@@ -0,0 +1,70 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-class table */
+static struct cclass {
+	const char *name;
+	const char *chars;
+	const char *multis;
+} cclasses[] = {
+	{ "alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789",				""} ,
+	{ "alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+					""} ,
+	{ "blank",	" \t",		""} ,
+	{ "cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177",	""} ,
+	{ "digit",	"0123456789",	""} ,
+	{ "graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					""} ,
+	{ "lower",	"abcdefghijklmnopqrstuvwxyz",
+					""} ,
+	{ "print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+					""} ,
+	{ "punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					""} ,
+	{ "space",	"\t\n\v\f\r ",	""} ,
+	{ "upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+					""} ,
+	{ "xdigit",	"0123456789ABCDEFabcdef",
+					""} ,
+	{ NULL,		0,		"" }
+};
diff --git a/contrib/llvm/lib/Support/regcname.h b/contrib/llvm/lib/Support/regcname.h
new file mode 100644
index 000000000000..3c0bb248ffa7
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcname.h
@@ -0,0 +1,139 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cname.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-name table */
+static struct cname {
+	const char *name;
+	char code;
+} cnames[] = {
+	{ "NUL",			'\0' },
+	{ "SOH",			'\001' },
+	{ "STX",			'\002' },
+	{ "ETX",			'\003' },
+	{ "EOT",			'\004' },
+	{ "ENQ",			'\005' },
+	{ "ACK",			'\006' },
+	{ "BEL",			'\007' },
+	{ "alert",			'\007' },
+	{ "BS",				'\010' },
+	{ "backspace",			'\b' },
+	{ "HT",				'\011' },
+	{ "tab",			'\t' },
+	{ "LF",				'\012' },
+	{ "newline",			'\n' },
+	{ "VT",				'\013' },
+	{ "vertical-tab",		'\v' },
+	{ "FF",				'\014' },
+	{ "form-feed",			'\f' },
+	{ "CR",				'\015' },
+	{ "carriage-return",		'\r' },
+	{ "SO",				'\016' },
+	{ "SI",				'\017' },
+	{ "DLE",			'\020' },
+	{ "DC1",			'\021' },
+	{ "DC2",			'\022' },
+	{ "DC3",			'\023' },
+	{ "DC4",			'\024' },
+	{ "NAK",			'\025' },
+	{ "SYN",			'\026' },
+	{ "ETB",			'\027' },
+	{ "CAN",			'\030' },
+	{ "EM",				'\031' },
+	{ "SUB",			'\032' },
+	{ "ESC",			'\033' },
+	{ "IS4",			'\034' },
+	{ "FS",				'\034' },
+	{ "IS3",			'\035' },
+	{ "GS",				'\035' },
+	{ "IS2",			'\036' },
+	{ "RS",				'\036' },
+	{ "IS1",			'\037' },
+	{ "US",				'\037' },
+	{ "space",			' ' },
+	{ "exclamation-mark",		'!' },
+	{ "quotation-mark",		'"' },
+	{ "number-sign",		'#' },
+	{ "dollar-sign",		'$' },
+	{ "percent-sign",		'%' },
+	{ "ampersand",			'&' },
+	{ "apostrophe",			'\'' },
+	{ "left-parenthesis",		'(' },
+	{ "right-parenthesis",		')' },
+	{ "asterisk",			'*' },
+	{ "plus-sign",			'+' },
+	{ "comma",			',' },
+	{ "hyphen",			'-' },
+	{ "hyphen-minus",		'-' },
+	{ "period",			'.' },
+	{ "full-stop",			'.' },
+	{ "slash",			'/' },
+	{ "solidus",			'/' },
+	{ "zero",			'0' },
+	{ "one",			'1' },
+	{ "two",			'2' },
+	{ "three",			'3' },
+	{ "four",			'4' },
+	{ "five",			'5' },
+	{ "six",			'6' },
+	{ "seven",			'7' },
+	{ "eight",			'8' },
+	{ "nine",			'9' },
+	{ "colon",			':' },
+	{ "semicolon",			';' },
+	{ "less-than-sign",		'<' },
+	{ "equals-sign",		'=' },
+	{ "greater-than-sign",		'>' },
+	{ "question-mark",		'?' },
+	{ "commercial-at",		'@' },
+	{ "left-square-bracket",	'[' },
+	{ "backslash",			'\\' },
+	{ "reverse-solidus",		'\\' },
+	{ "right-square-bracket",	']' },
+	{ "circumflex",			'^' },
+	{ "circumflex-accent",		'^' },
+	{ "underscore",			'_' },
+	{ "low-line",			'_' },
+	{ "grave-accent",		'`' },
+	{ "left-brace",			'{' },
+	{ "left-curly-bracket",		'{' },
+	{ "vertical-line",		'|' },
+	{ "right-brace",		'}' },
+	{ "right-curly-bracket",	'}' },
+	{ "tilde",			'~' },
+	{ "DEL",			'\177' },
+	{ NULL,				0 }
+};
diff --git a/contrib/llvm/lib/Support/regcomp.c b/contrib/llvm/lib/Support/regcomp.c
new file mode 100644
index 000000000000..46c91a9c497c
--- /dev/null
+++ b/contrib/llvm/lib/Support/regcomp.c
@@ -0,0 +1,1525 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+#include "regcclass.h"
+#include "regcname.h"
+
+/*
+ * parse structure, passed up and down to avoid global variables and
+ * other clumsinesses
+ */
+struct parse {
+	char *next;		/* next character in RE */
+	char *end;		/* end of string (-> NUL normally) */
+	int error;		/* has an error been seen? */
+	sop *strip;		/* malloced strip */
+	sopno ssize;		/* malloced strip size (allocated) */
+	sopno slen;		/* malloced strip length (used) */
+	int ncsalloc;		/* number of csets allocated */
+	struct re_guts *g;
+#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
+	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
+	sopno pend[NPAREN];	/* -> ) ([0] unused) */
+};
+
+static void p_ere(struct parse *, int);
+static void p_ere_exp(struct parse *);
+static void p_str(struct parse *);
+static void p_bre(struct parse *, int, int);
+static int p_simp_re(struct parse *, int);
+static int p_count(struct parse *);
+static void p_bracket(struct parse *);
+static void p_b_term(struct parse *, cset *);
+static void p_b_cclass(struct parse *, cset *);
+static void p_b_eclass(struct parse *, cset *);
+static char p_b_symbol(struct parse *);
+static char p_b_coll_elem(struct parse *, int);
+static char othercase(int);
+static void bothcases(struct parse *, int);
+static void ordinary(struct parse *, int);
+static void nonnewline(struct parse *);
+static void repeat(struct parse *, sopno, int, int);
+static int seterr(struct parse *, int);
+static cset *allocset(struct parse *);
+static void freeset(struct parse *, cset *);
+static int freezeset(struct parse *, cset *);
+static int firstch(struct parse *, cset *);
+static int nch(struct parse *, cset *);
+static void mcadd(struct parse *, cset *, const char *);
+static void mcinvert(struct parse *, cset *);
+static void mccase(struct parse *, cset *);
+static int isinsets(struct re_guts *, int);
+static int samesets(struct re_guts *, int, int);
+static void categorize(struct parse *, struct re_guts *);
+static sopno dupl(struct parse *, sopno, sopno);
+static void doemit(struct parse *, sop, size_t);
+static void doinsert(struct parse *, sop, size_t, sopno);
+static void dofwd(struct parse *, sopno, sop);
+static void enlarge(struct parse *, sopno);
+static void stripsnug(struct parse *, struct re_guts *);
+static void findmust(struct parse *, struct re_guts *);
+static sopno pluscount(struct parse *, struct re_guts *);
+
+static char nuls[10];		/* place to point scanner in event of error */
+
+/*
+ * macros for use with parse structure
+ * BEWARE:  these know that the parse structure is named `p' !!!
+ */
+#define	PEEK()	(*p->next)
+#define	PEEK2()	(*(p->next+1))
+#define	MORE()	(p->next < p->end)
+#define	MORE2()	(p->next+1 < p->end)
+#define	SEE(c)	(MORE() && PEEK() == (c))
+#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
+#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define	NEXT()	(p->next++)
+#define	NEXT2()	(p->next += 2)
+#define	NEXTn(n)	(p->next += (n))
+#define	GETNEXT()	(*p->next++)
+#define	SETERROR(e)	seterr(p, (e))
+#define	REQUIRE(co, e)	(void)((co) || SETERROR(e))
+#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
+#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
+#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
+#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
+#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
+#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
+#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
+#define	HERE()		(p->slen)
+#define	THERE()		(p->slen - 1)
+#define	THERETHERE()	(p->slen - 2)
+#define	DROP(n)	(p->slen -= (n))
+
+#ifdef	_POSIX2_RE_DUP_MAX
+#define	DUPMAX	_POSIX2_RE_DUP_MAX
+#else
+#define	DUPMAX	255
+#endif
+#define	INFINITY	(DUPMAX + 1)
+
+#ifndef NDEBUG
+static int never = 0;		/* for use in asserts; shuts lint up */
+#else
+#define	never	0		/* some <assert.h>s have bugs too */
+#endif
+
+/*
+ - llvm_regcomp - interface for parser and compilation
+ */
+int				/* 0 success, otherwise REG_something */
+llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
+{
+	struct parse pa;
+	struct re_guts *g;
+	struct parse *p = &pa;
+	int i;
+	size_t len;
+#ifdef REDEBUG
+#	define	GOODFLAGS(f)	(f)
+#else
+#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
+#endif
+
+	cflags = GOODFLAGS(cflags);
+	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
+		return(REG_INVARG);
+
+	if (cflags&REG_PEND) {
+		if (preg->re_endp < pattern)
+			return(REG_INVARG);
+		len = preg->re_endp - pattern;
+	} else
+		len = strlen((const char *)pattern);
+
+	/* do the mallocs early so failure handling is easy */
+	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
+							(NC-1)*sizeof(cat_t));
+	if (g == NULL)
+		return(REG_ESPACE);
+	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
+	p->strip = (sop *)calloc(p->ssize, sizeof(sop));
+	p->slen = 0;
+	if (p->strip == NULL) {
+		free((char *)g);
+		return(REG_ESPACE);
+	}
+
+	/* set things up */
+	p->g = g;
+	p->next = (char *)pattern;	/* convenience; we do not modify it */
+	p->end = p->next + len;
+	p->error = 0;
+	p->ncsalloc = 0;
+	for (i = 0; i < NPAREN; i++) {
+		p->pbegin[i] = 0;
+		p->pend[i] = 0;
+	}
+	g->csetsize = NC;
+	g->sets = NULL;
+	g->setbits = NULL;
+	g->ncsets = 0;
+	g->cflags = cflags;
+	g->iflags = 0;
+	g->nbol = 0;
+	g->neol = 0;
+	g->must = NULL;
+	g->mlen = 0;
+	g->nsub = 0;
+	g->ncategories = 1;	/* category 0 is "everything else" */
+	g->categories = &g->catspace[-(CHAR_MIN)];
+	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
+	g->backrefs = 0;
+
+	/* do it */
+	EMIT(OEND, 0);
+	g->firststate = THERE();
+	if (cflags&REG_EXTENDED)
+		p_ere(p, OUT);
+	else if (cflags&REG_NOSPEC)
+		p_str(p);
+	else
+		p_bre(p, OUT, OUT);
+	EMIT(OEND, 0);
+	g->laststate = THERE();
+
+	/* tidy up loose ends and fill things in */
+	categorize(p, g);
+	stripsnug(p, g);
+	findmust(p, g);
+	g->nplus = pluscount(p, g);
+	g->magic = MAGIC2;
+	preg->re_nsub = g->nsub;
+	preg->re_g = g;
+	preg->re_magic = MAGIC1;
+#ifndef REDEBUG
+	/* not debugging, so can't rely on the assert() in llvm_regexec() */
+	if (g->iflags&REGEX_BAD)
+		SETERROR(REG_ASSERT);
+#endif
+
+	/* win or lose, we're done */
+	if (p->error != 0)	/* lose */
+		llvm_regfree(preg);
+	return(p->error);
+}
+
+/*
+ - p_ere - ERE parser top level, concatenation and alternation
+ */
+static void
+p_ere(struct parse *p, int stop)	/* character this ERE should end at */
+{
+	char c;
+	sopno prevback = 0;
+	sopno prevfwd = 0;
+	sopno conc;
+	int first = 1;		/* is this the first alternative? */
+
+	for (;;) {
+		/* do a bunch of concatenated expressions */
+		conc = HERE();
+		while (MORE() && (c = PEEK()) != '|' && c != stop)
+			p_ere_exp(p);
+		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
+
+		if (!EAT('|'))
+			break;		/* NOTE BREAK OUT */
+
+		if (first) {
+			INSERT(OCH_, conc);	/* offset is wrong */
+			prevfwd = conc;
+			prevback = conc;
+			first = 0;
+		}
+		ASTERN(OOR1, prevback);
+		prevback = THERE();
+		AHEAD(prevfwd);			/* fix previous offset */
+		prevfwd = HERE();
+		EMIT(OOR2, 0);			/* offset is very wrong */
+	}
+
+	if (!first) {		/* tail-end fixups */
+		AHEAD(prevfwd);
+		ASTERN(O_CH, prevback);
+	}
+
+	assert(!MORE() || SEE(stop));
+}
+
+/*
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ */
+static void
+p_ere_exp(struct parse *p)
+{
+	char c;
+	sopno pos;
+	int count;
+	int count2;
+	sopno subno;
+	int wascaret = 0;
+
+	assert(MORE());		/* caller should have ensured this */
+	c = GETNEXT();
+
+	pos = HERE();
+	switch (c) {
+	case '(':
+		REQUIRE(MORE(), REG_EPAREN);
+		p->g->nsub++;
+		subno = p->g->nsub;
+		if (subno < NPAREN)
+			p->pbegin[subno] = HERE();
+		EMIT(OLPAREN, subno);
+		if (!SEE(')'))
+			p_ere(p, ')');
+		if (subno < NPAREN) {
+			p->pend[subno] = HERE();
+			assert(p->pend[subno] != 0);
+		}
+		EMIT(ORPAREN, subno);
+		MUSTEAT(')', REG_EPAREN);
+		break;
+#ifndef POSIX_MISTAKE
+	case ')':		/* happens only if no current unmatched ( */
+		/*
+		 * You may ask, why the ifndef?  Because I didn't notice
+		 * this until slightly too late for 1003.2, and none of the
+		 * other 1003.2 regular-expression reviewers noticed it at
+		 * all.  So an unmatched ) is legal POSIX, at least until
+		 * we can get it fixed.
+		 */
+		SETERROR(REG_EPAREN);
+		break;
+#endif
+	case '^':
+		EMIT(OBOL, 0);
+		p->g->iflags |= USEBOL;
+		p->g->nbol++;
+		wascaret = 1;
+		break;
+	case '$':
+		EMIT(OEOL, 0);
+		p->g->iflags |= USEEOL;
+		p->g->neol++;
+		break;
+	case '|':
+		SETERROR(REG_EMPTY);
+		break;
+	case '*':
+	case '+':
+	case '?':
+		SETERROR(REG_BADRPT);
+		break;
+	case '.':
+		if (p->g->cflags&REG_NEWLINE)
+			nonnewline(p);
+		else
+			EMIT(OANY, 0);
+		break;
+	case '[':
+		p_bracket(p);
+		break;
+	case '\\':
+		REQUIRE(MORE(), REG_EESCAPE);
+		c = GETNEXT();
+		ordinary(p, c);
+		break;
+	case '{':		/* okay as ordinary except if digit follows */
+		REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
+		/* FALLTHROUGH */
+	default:
+		ordinary(p, c);
+		break;
+	}
+
+	if (!MORE())
+		return;
+	c = PEEK();
+	/* we call { a repetition if followed by a digit */
+	if (!( c == '*' || c == '+' || c == '?' ||
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
+		return;		/* no repetition, we're done */
+	NEXT();
+
+	REQUIRE(!wascaret, REG_BADRPT);
+	switch (c) {
+	case '*':	/* implemented as +? */
+		/* this case does not require the (y|) trick, noKLUDGE */
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+		break;
+	case '+':
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		break;
+	case '?':
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, pos);		/* offset slightly wrong */
+		ASTERN(OOR1, pos);		/* this one's right */
+		AHEAD(pos);			/* fix the OCH_ */
+		EMIT(OOR2, 0);			/* offset very wrong... */
+		AHEAD(THERE());			/* ...so fix it */
+		ASTERN(O_CH, THERETHERE());
+		break;
+	case '{':
+		count = p_count(p);
+		if (EAT(',')) {
+			if (isdigit((uch)PEEK())) {
+				count2 = p_count(p);
+				REQUIRE(count <= count2, REG_BADBR);
+			} else		/* single number with comma */
+				count2 = INFINITY;
+		} else		/* just a single number */
+			count2 = count;
+		repeat(p, pos, count, count2);
+		if (!EAT('}')) {	/* error heuristics */
+			while (MORE() && PEEK() != '}')
+				NEXT();
+			REQUIRE(MORE(), REG_EBRACE);
+			SETERROR(REG_BADBR);
+		}
+		break;
+	}
+
+	if (!MORE())
+		return;
+	c = PEEK();
+	if (!( c == '*' || c == '+' || c == '?' ||
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
+		return;
+	SETERROR(REG_BADRPT);
+}
+
+/*
+ - p_str - string (no metacharacters) "parser"
+ */
+static void
+p_str(struct parse *p)
+{
+	REQUIRE(MORE(), REG_EMPTY);
+	while (MORE())
+		ordinary(p, GETNEXT());
+}
+
+/*
+ - p_bre - BRE parser top level, anchoring and concatenation
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor.  The
+ * only undesirable side effect is that '$' gets included as a character
+ * category in such cases.  This is fairly harmless; not worth fixing.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_bre(struct parse *p,
+    int end1,		/* first terminating character */
+    int end2)		/* second terminating character */
+{
+	sopno start = HERE();
+	int first = 1;			/* first subexpression? */
+	int wasdollar = 0;
+
+	if (EAT('^')) {
+		EMIT(OBOL, 0);
+		p->g->iflags |= USEBOL;
+		p->g->nbol++;
+	}
+	while (MORE() && !SEETWO(end1, end2)) {
+		wasdollar = p_simp_re(p, first);
+		first = 0;
+	}
+	if (wasdollar) {	/* oops, that was a trailing anchor */
+		DROP(1);
+		EMIT(OEOL, 0);
+		p->g->iflags |= USEEOL;
+		p->g->neol++;
+	}
+
+	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
+}
+
+/*
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
+ */
+static int			/* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p,
+    int starordinary)		/* is a leading * an ordinary character? */
+{
+	int c;
+	int count;
+	int count2;
+	sopno pos;
+	int i;
+	sopno subno;
+#	define	BACKSL	(1<<CHAR_BIT)
+
+	pos = HERE();		/* repetion op, if any, covers from here */
+
+	assert(MORE());		/* caller should have ensured this */
+	c = GETNEXT();
+	if (c == '\\') {
+		REQUIRE(MORE(), REG_EESCAPE);
+		c = BACKSL | GETNEXT();
+	}
+	switch (c) {
+	case '.':
+		if (p->g->cflags&REG_NEWLINE)
+			nonnewline(p);
+		else
+			EMIT(OANY, 0);
+		break;
+	case '[':
+		p_bracket(p);
+		break;
+	case BACKSL|'{':
+		SETERROR(REG_BADRPT);
+		break;
+	case BACKSL|'(':
+		p->g->nsub++;
+		subno = p->g->nsub;
+		if (subno < NPAREN)
+			p->pbegin[subno] = HERE();
+		EMIT(OLPAREN, subno);
+		/* the MORE here is an error heuristic */
+		if (MORE() && !SEETWO('\\', ')'))
+			p_bre(p, '\\', ')');
+		if (subno < NPAREN) {
+			p->pend[subno] = HERE();
+			assert(p->pend[subno] != 0);
+		}
+		EMIT(ORPAREN, subno);
+		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+		break;
+	case BACKSL|')':	/* should not get here -- must be user */
+	case BACKSL|'}':
+		SETERROR(REG_EPAREN);
+		break;
+	case BACKSL|'1':
+	case BACKSL|'2':
+	case BACKSL|'3':
+	case BACKSL|'4':
+	case BACKSL|'5':
+	case BACKSL|'6':
+	case BACKSL|'7':
+	case BACKSL|'8':
+	case BACKSL|'9':
+		i = (c&~BACKSL) - '0';
+		assert(i < NPAREN);
+		if (p->pend[i] != 0) {
+			assert(i <= p->g->nsub);
+			EMIT(OBACK_, i);
+			assert(p->pbegin[i] != 0);
+			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+			EMIT(O_BACK, i);
+		} else
+			SETERROR(REG_ESUBREG);
+		p->g->backrefs = 1;
+		break;
+	case '*':
+		REQUIRE(starordinary, REG_BADRPT);
+		/* FALLTHROUGH */
+	default:
+		ordinary(p, (char)c);
+		break;
+	}
+
+	if (EAT('*')) {		/* implemented as +? */
+		/* this case does not require the (y|) trick, noKLUDGE */
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+	} else if (EATTWO('\\', '{')) {
+		count = p_count(p);
+		if (EAT(',')) {
+			if (MORE() && isdigit((uch)PEEK())) {
+				count2 = p_count(p);
+				REQUIRE(count <= count2, REG_BADBR);
+			} else		/* single number with comma */
+				count2 = INFINITY;
+		} else		/* just a single number */
+			count2 = count;
+		repeat(p, pos, count, count2);
+		if (!EATTWO('\\', '}')) {	/* error heuristics */
+			while (MORE() && !SEETWO('\\', '}'))
+				NEXT();
+			REQUIRE(MORE(), REG_EBRACE);
+			SETERROR(REG_BADBR);
+		}
+	} else if (c == '$')	/* $ (but not \$) ends it */
+		return(1);
+
+	return(0);
+}
+
+/*
+ - p_count - parse a repetition count
+ */
+static int			/* the value */
+p_count(struct parse *p)
+{
+	int count = 0;
+	int ndigits = 0;
+
+	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
+		count = count*10 + (GETNEXT() - '0');
+		ndigits++;
+	}
+
+	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+	return(count);
+}
+
+/*
+ - p_bracket - parse a bracketed character list
+ *
+ * Note a significant property of this code:  if the allocset() did SETERROR,
+ * no set operations are done.
+ */
+static void
+p_bracket(struct parse *p)
+{
+	cset *cs;
+	int invert = 0;
+
+	/* Dept of Truly Sickening Special-Case Kludges */
+	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
+		EMIT(OBOW, 0);
+		NEXTn(6);
+		return;
+	}
+	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
+		EMIT(OEOW, 0);
+		NEXTn(6);
+		return;
+	}
+
+	if ((cs = allocset(p)) == NULL) {
+		/* allocset did set error status in p */
+		return;
+	}
+
+	if (EAT('^'))
+		invert++;	/* make note to invert set at end */
+	if (EAT(']'))
+		CHadd(cs, ']');
+	else if (EAT('-'))
+		CHadd(cs, '-');
+	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
+		p_b_term(p, cs);
+	if (EAT('-'))
+		CHadd(cs, '-');
+	MUSTEAT(']', REG_EBRACK);
+
+	if (p->error != 0) {	/* don't mess things up further */
+		freeset(p, cs);
+		return;
+	}
+
+	if (p->g->cflags&REG_ICASE) {
+		int i;
+		int ci;
+
+		for (i = p->g->csetsize - 1; i >= 0; i--)
+			if (CHIN(cs, i) && isalpha(i)) {
+				ci = othercase(i);
+				if (ci != i)
+					CHadd(cs, ci);
+			}
+		if (cs->multis != NULL)
+			mccase(p, cs);
+	}
+	if (invert) {
+		int i;
+
+		for (i = p->g->csetsize - 1; i >= 0; i--)
+			if (CHIN(cs, i))
+				CHsub(cs, i);
+			else
+				CHadd(cs, i);
+		if (p->g->cflags&REG_NEWLINE)
+			CHsub(cs, '\n');
+		if (cs->multis != NULL)
+			mcinvert(p, cs);
+	}
+
+	assert(cs->multis == NULL);		/* xxx */
+
+	if (nch(p, cs) == 1) {		/* optimize singleton sets */
+		ordinary(p, firstch(p, cs));
+		freeset(p, cs);
+	} else
+		EMIT(OANYOF, freezeset(p, cs));
+}
+
+/*
+ - p_b_term - parse one term of a bracketed character list
+ */
+static void
+p_b_term(struct parse *p, cset *cs)
+{
+	char c;
+	char start, finish;
+	int i;
+
+	/* classify what we've got */
+	switch ((MORE()) ? PEEK() : '\0') {
+	case '[':
+		c = (MORE2()) ? PEEK2() : '\0';
+		break;
+	case '-':
+		SETERROR(REG_ERANGE);
+		return;			/* NOTE RETURN */
+		break;
+	default:
+		c = '\0';
+		break;
+	}
+
+	switch (c) {
+	case ':':		/* character class */
+		NEXT2();
+		REQUIRE(MORE(), REG_EBRACK);
+		c = PEEK();
+		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+		p_b_cclass(p, cs);
+		REQUIRE(MORE(), REG_EBRACK);
+		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+		break;
+	case '=':		/* equivalence class */
+		NEXT2();
+		REQUIRE(MORE(), REG_EBRACK);
+		c = PEEK();
+		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+		p_b_eclass(p, cs);
+		REQUIRE(MORE(), REG_EBRACK);
+		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+		break;
+	default:		/* symbol, ordinary character, or range */
+/* xxx revision needed for multichar stuff */
+		start = p_b_symbol(p);
+		if (SEE('-') && MORE2() && PEEK2() != ']') {
+			/* range */
+			NEXT();
+			if (EAT('-'))
+				finish = '-';
+			else
+				finish = p_b_symbol(p);
+		} else
+			finish = start;
+/* xxx what about signed chars here... */
+		REQUIRE(start <= finish, REG_ERANGE);
+		for (i = start; i <= finish; i++)
+			CHadd(cs, i);
+		break;
+	}
+}
+
+/*
+ - p_b_cclass - parse a character-class name and deal with it
+ */
+static void
+p_b_cclass(struct parse *p, cset *cs)
+{
+	char *sp = p->next;
+	struct cclass *cp;
+	size_t len;
+	const char *u;
+	char c;
+
+	while (MORE() && isalpha((uch)PEEK()))
+		NEXT();
+	len = p->next - sp;
+	for (cp = cclasses; cp->name != NULL; cp++)
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+			break;
+	if (cp->name == NULL) {
+		/* oops, didn't find it */
+		SETERROR(REG_ECTYPE);
+		return;
+	}
+
+	u = cp->chars;
+	while ((c = *u++) != '\0')
+		CHadd(cs, c);
+	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
+		MCadd(p, cs, u);
+}
+
+/*
+ - p_b_eclass - parse an equivalence-class name and deal with it
+ *
+ * This implementation is incomplete. xxx
+ */
+static void
+p_b_eclass(struct parse *p, cset *cs)
+{
+	char c;
+
+	c = p_b_coll_elem(p, '=');
+	CHadd(cs, c);
+}
+
+/*
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
+ */
+static char			/* value of symbol */
+p_b_symbol(struct parse *p)
+{
+	char value;
+
+	REQUIRE(MORE(), REG_EBRACK);
+	if (!EATTWO('[', '.'))
+		return(GETNEXT());
+
+	/* collating symbol */
+	value = p_b_coll_elem(p, '.');
+	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+	return(value);
+}
+
+/*
+ - p_b_coll_elem - parse a collating-element name and look it up
+ */
+static char			/* value of collating element */
+p_b_coll_elem(struct parse *p,
+    int endc)			/* name ended by endc,']' */
+{
+	char *sp = p->next;
+	struct cname *cp;
+	int len;
+
+	while (MORE() && !SEETWO(endc, ']'))
+		NEXT();
+	if (!MORE()) {
+		SETERROR(REG_EBRACK);
+		return(0);
+	}
+	len = p->next - sp;
+	for (cp = cnames; cp->name != NULL; cp++)
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+			return(cp->code);	/* known name */
+	if (len == 1)
+		return(*sp);	/* single character */
+	SETERROR(REG_ECOLLATE);			/* neither */
+	return(0);
+}
+
+/*
+ - othercase - return the case counterpart of an alphabetic
+ */
+static char			/* if no counterpart, return ch */
+othercase(int ch)
+{
+	ch = (uch)ch;
+	assert(isalpha(ch));
+	if (isupper(ch))
+		return ((uch)tolower(ch));
+	else if (islower(ch))
+		return ((uch)toupper(ch));
+	else			/* peculiar, but could happen */
+		return(ch);
+}
+
+/*
+ - bothcases - emit a dualcase version of a two-case character
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+bothcases(struct parse *p, int ch)
+{
+	char *oldnext = p->next;
+	char *oldend = p->end;
+	char bracket[3];
+
+	ch = (uch)ch;
+	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
+	p->next = bracket;
+	p->end = bracket+2;
+	bracket[0] = ch;
+	bracket[1] = ']';
+	bracket[2] = '\0';
+	p_bracket(p);
+	assert(p->next == bracket+2);
+	p->next = oldnext;
+	p->end = oldend;
+}
+
+/*
+ - ordinary - emit an ordinary character
+ */
+static void
+ordinary(struct parse *p, int ch)
+{
+	cat_t *cap = p->g->categories;
+
+	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
+		bothcases(p, ch);
+	else {
+		EMIT(OCHAR, (uch)ch);
+		if (cap[ch] == 0)
+			cap[ch] = p->g->ncategories++;
+	}
+}
+
+/*
+ - nonnewline - emit REG_NEWLINE version of OANY
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+nonnewline(struct parse *p)
+{
+	char *oldnext = p->next;
+	char *oldend = p->end;
+	char bracket[4];
+
+	p->next = bracket;
+	p->end = bracket+3;
+	bracket[0] = '^';
+	bracket[1] = '\n';
+	bracket[2] = ']';
+	bracket[3] = '\0';
+	p_bracket(p);
+	assert(p->next == bracket+3);
+	p->next = oldnext;
+	p->end = oldend;
+}
+
+/*
+ - repeat - generate code for a bounded repetition, recursively if needed
+ */
+static void
+repeat(struct parse *p,
+    sopno start,		/* operand from here to end of strip */
+    int from,			/* repeated from this number */
+    int to)			/* to this number of times (maybe INFINITY) */
+{
+	sopno finish = HERE();
+#	define	N	2
+#	define	INF	3
+#	define	REP(f, t)	((f)*8 + (t))
+#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+	sopno copy;
+
+	if (p->error != 0)	/* head off possible runaway recursion */
+		return;
+
+	assert(from <= to);
+
+	switch (REP(MAP(from), MAP(to))) {
+	case REP(0, 0):			/* must be user doing this */
+		DROP(finish-start);	/* drop the operand */
+		break;
+	case REP(0, 1):			/* as x{1,1}? */
+	case REP(0, N):			/* as x{1,n}? */
+	case REP(0, INF):		/* as x{1,}? */
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, start);		/* offset is wrong... */
+		repeat(p, start+1, 1, to);
+		ASTERN(OOR1, start);
+		AHEAD(start);			/* ... fix it */
+		EMIT(OOR2, 0);
+		AHEAD(THERE());
+		ASTERN(O_CH, THERETHERE());
+		break;
+	case REP(1, 1):			/* trivial case */
+		/* done */
+		break;
+	case REP(1, N):			/* as x?x{1,n-1} */
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, start);
+		ASTERN(OOR1, start);
+		AHEAD(start);
+		EMIT(OOR2, 0);			/* offset very wrong... */
+		AHEAD(THERE());			/* ...so fix it */
+		ASTERN(O_CH, THERETHERE());
+		copy = dupl(p, start+1, finish+1);
+		assert(copy == finish+4);
+		repeat(p, copy, 1, to-1);
+		break;
+	case REP(1, INF):		/* as x+ */
+		INSERT(OPLUS_, start);
+		ASTERN(O_PLUS, start);
+		break;
+	case REP(N, N):			/* as xx{m-1,n-1} */
+		copy = dupl(p, start, finish);
+		repeat(p, copy, from-1, to-1);
+		break;
+	case REP(N, INF):		/* as xx{n-1,INF} */
+		copy = dupl(p, start, finish);
+		repeat(p, copy, from-1, to);
+		break;
+	default:			/* "can't happen" */
+		SETERROR(REG_ASSERT);	/* just in case */
+		break;
+	}
+}
+
+/*
+ - seterr - set an error condition
+ */
+static int			/* useless but makes type checking happy */
+seterr(struct parse *p, int e)
+{
+	if (p->error == 0)	/* keep earliest error condition */
+		p->error = e;
+	p->next = nuls;		/* try to bring things to a halt */
+	p->end = nuls;
+	return(0);		/* make the return value well-defined */
+}
+
+/*
+ - allocset - allocate a set of characters for []
+ */
+static cset *
+allocset(struct parse *p)
+{
+	int no = p->g->ncsets++;
+	size_t nc;
+	size_t nbytes;
+	cset *cs;
+	size_t css = (size_t)p->g->csetsize;
+	int i;
+
+	if (no >= p->ncsalloc) {	/* need another column of space */
+		void *ptr;
+
+		p->ncsalloc += CHAR_BIT;
+		nc = p->ncsalloc;
+		assert(nc % CHAR_BIT == 0);
+		nbytes = nc / CHAR_BIT * css;
+
+		ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
+		if (ptr == NULL)
+			goto nomem;
+		p->g->sets = ptr;
+
+		ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
+		if (ptr == NULL)
+			goto nomem;
+		p->g->setbits = ptr;
+
+		for (i = 0; i < no; i++)
+			p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
+
+		(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
+	}
+	/* XXX should not happen */
+	if (p->g->sets == NULL || p->g->setbits == NULL)
+		goto nomem;
+
+	cs = &p->g->sets[no];
+	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
+	cs->mask = 1 << ((no) % CHAR_BIT);
+	cs->hash = 0;
+	cs->smultis = 0;
+	cs->multis = NULL;
+
+	return(cs);
+nomem:
+	free(p->g->sets);
+	p->g->sets = NULL;
+	free(p->g->setbits);
+	p->g->setbits = NULL;
+
+	SETERROR(REG_ESPACE);
+	/* caller's responsibility not to do set ops */
+	return(NULL);
+}
+
+/*
+ - freeset - free a now-unused set
+ */
+static void
+freeset(struct parse *p, cset *cs)
+{
+	size_t i;
+	cset *top = &p->g->sets[p->g->ncsets];
+	size_t css = (size_t)p->g->csetsize;
+
+	for (i = 0; i < css; i++)
+		CHsub(cs, i);
+	if (cs == top-1)	/* recover only the easy case */
+		p->g->ncsets--;
+}
+
+/*
+ - freezeset - final processing on a set of characters
+ *
+ * The main task here is merging identical sets.  This is usually a waste
+ * of time (although the hash code minimizes the overhead), but can win
+ * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to
+ * the same value!
+ */
+static int			/* set number */
+freezeset(struct parse *p, cset *cs)
+{
+	uch h = cs->hash;
+	size_t i;
+	cset *top = &p->g->sets[p->g->ncsets];
+	cset *cs2;
+	size_t css = (size_t)p->g->csetsize;
+
+	/* look for an earlier one which is the same */
+	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
+		if (cs2->hash == h && cs2 != cs) {
+			/* maybe */
+			for (i = 0; i < css; i++)
+				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
+					break;		/* no */
+			if (i == css)
+				break;			/* yes */
+		}
+
+	if (cs2 < top) {	/* found one */
+		freeset(p, cs);
+		cs = cs2;
+	}
+
+	return((int)(cs - p->g->sets));
+}
+
+/*
+ - firstch - return first character in a set (which must have at least one)
+ */
+static int			/* character; there is no "none" value */
+firstch(struct parse *p, cset *cs)
+{
+	size_t i;
+	size_t css = (size_t)p->g->csetsize;
+
+	for (i = 0; i < css; i++)
+		if (CHIN(cs, i))
+			return((char)i);
+	assert(never);
+	return(0);		/* arbitrary */
+}
+
+/*
+ - nch - number of characters in a set
+ */
+static int
+nch(struct parse *p, cset *cs)
+{
+	size_t i;
+	size_t css = (size_t)p->g->csetsize;
+	int n = 0;
+
+	for (i = 0; i < css; i++)
+		if (CHIN(cs, i))
+			n++;
+	return(n);
+}
+
+/*
+ - mcadd - add a collating element to a cset
+ */
+static void
+mcadd( struct parse *p, cset *cs, const char *cp)
+{
+	size_t oldend = cs->smultis;
+	void *np;
+
+	cs->smultis += strlen(cp) + 1;
+	np = realloc(cs->multis, cs->smultis);
+	if (np == NULL) {
+		if (cs->multis)
+			free(cs->multis);
+		cs->multis = NULL;
+		SETERROR(REG_ESPACE);
+		return;
+	}
+	cs->multis = np;
+
+	llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
+}
+
+/*
+ - mcinvert - invert the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities.  Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mcinvert(struct parse *p, cset *cs)
+{
+	assert(cs->multis == NULL);	/* xxx */
+}
+
+/*
+ - mccase - add case counterparts of the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities.  Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mccase(struct parse *p, cset *cs)
+{
+	assert(cs->multis == NULL);	/* xxx */
+}
+
+/*
+ - isinsets - is this character in any sets?
+ */
+static int			/* predicate */
+isinsets(struct re_guts *g, int c)
+{
+	uch *col;
+	int i;
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+	unsigned uc = (uch)c;
+
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+		if (col[uc] != 0)
+			return(1);
+	return(0);
+}
+
+/*
+ - samesets - are these two characters in exactly the same sets?
+ */
+static int			/* predicate */
+samesets(struct re_guts *g, int c1, int c2)
+{
+	uch *col;
+	int i;
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+	unsigned uc1 = (uch)c1;
+	unsigned uc2 = (uch)c2;
+
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+		if (col[uc1] != col[uc2])
+			return(0);
+	return(1);
+}
+
+/*
+ - categorize - sort out character categories
+ */
+static void
+categorize(struct parse *p, struct re_guts *g)
+{
+	cat_t *cats = g->categories;
+	int c;
+	int c2;
+	cat_t cat;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+		if (cats[c] == 0 && isinsets(g, c)) {
+			cat = g->ncategories++;
+			cats[c] = cat;
+			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
+				if (cats[c2] == 0 && samesets(g, c, c2))
+					cats[c2] = cat;
+		}
+}
+
+/*
+ - dupl - emit a duplicate of a bunch of sops
+ */
+static sopno			/* start of duplicate */
+dupl(struct parse *p,
+    sopno start,		/* from here */
+    sopno finish)		/* to this less one */
+{
+	sopno ret = HERE();
+	sopno len = finish - start;
+
+	assert(finish >= start);
+	if (len == 0)
+		return(ret);
+	enlarge(p, p->ssize + len);	/* this many unexpected additions */
+	assert(p->ssize >= p->slen + len);
+	(void) memmove((char *)(p->strip + p->slen),
+		(char *)(p->strip + start), (size_t)len*sizeof(sop));
+	p->slen += len;
+	return(ret);
+}
+
+/*
+ - doemit - emit a strip operator
+ *
+ * It might seem better to implement this as a macro with a function as
+ * hard-case backup, but it's just too big and messy unless there are
+ * some changes to the data structures.  Maybe later.
+ */
+static void
+doemit(struct parse *p, sop op, size_t opnd)
+{
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	/* deal with oversize operands ("can't happen", more or less) */
+	assert(opnd < 1<<OPSHIFT);
+
+	/* deal with undersized strip */
+	if (p->slen >= p->ssize)
+		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
+	assert(p->slen < p->ssize);
+
+	/* finally, it's all reduced to the easy case */
+	p->strip[p->slen++] = SOP(op, opnd);
+}
+
+/*
+ - doinsert - insert a sop into the strip
+ */
+static void
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
+{
+	sopno sn;
+	sop s;
+	int i;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	sn = HERE();
+	EMIT(op, opnd);		/* do checks, ensure space */
+	assert(HERE() == sn+1);
+	s = p->strip[sn];
+
+	/* adjust paren pointers */
+	assert(pos > 0);
+	for (i = 1; i < NPAREN; i++) {
+		if (p->pbegin[i] >= pos) {
+			p->pbegin[i]++;
+		}
+		if (p->pend[i] >= pos) {
+			p->pend[i]++;
+		}
+	}
+
+	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
+						(HERE()-pos-1)*sizeof(sop));
+	p->strip[pos] = s;
+}
+
+/*
+ - dofwd - complete a forward reference
+ */
+static void
+dofwd(struct parse *p, sopno pos, sop value)
+{
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	assert(value < 1<<OPSHIFT);
+	p->strip[pos] = OP(p->strip[pos]) | value;
+}
+
+/*
+ - enlarge - enlarge the strip
+ */
+static void
+enlarge(struct parse *p, sopno size)
+{
+	sop *sp;
+
+	if (p->ssize >= size)
+		return;
+
+	sp = (sop *)realloc(p->strip, size*sizeof(sop));
+	if (sp == NULL) {
+		SETERROR(REG_ESPACE);
+		return;
+	}
+	p->strip = sp;
+	p->ssize = size;
+}
+
+/*
+ - stripsnug - compact the strip
+ */
+static void
+stripsnug(struct parse *p, struct re_guts *g)
+{
+	g->nstates = p->slen;
+	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+	if (g->strip == NULL) {
+		SETERROR(REG_ESPACE);
+		g->strip = p->strip;
+	}
+}
+
+/*
+ - findmust - fill in must and mlen with longest mandatory literal string
+ *
+ * This algorithm could do fancy things like analyzing the operands of |
+ * for common subsequences.  Someday.  This code is simple and finds most
+ * of the interesting cases.
+ *
+ * Note that must and mlen got initialized during setup.
+ */
+static void
+findmust(struct parse *p, struct re_guts *g)
+{
+	sop *scan;
+	sop *start = 0; /* start initialized in the default case, after that */
+	sop *newstart = 0; /* newstart was initialized in the OCHAR case */
+	sopno newlen;
+	sop s;
+	char *cp;
+	sopno i;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	/* find the longest OCHAR sequence in strip */
+	newlen = 0;
+	scan = g->strip + 1;
+	do {
+		s = *scan++;
+		switch (OP(s)) {
+		case OCHAR:		/* sequence member */
+			if (newlen == 0)		/* new sequence */
+				newstart = scan - 1;
+			newlen++;
+			break;
+		case OPLUS_:		/* things that don't break one */
+		case OLPAREN:
+		case ORPAREN:
+			break;
+		case OQUEST_:		/* things that must be skipped */
+		case OCH_:
+			scan--;
+			do {
+				scan += OPND(s);
+				s = *scan;
+				/* assert() interferes w debug printouts */
+				if (OP(s) != O_QUEST && OP(s) != O_CH &&
+							OP(s) != OOR2) {
+					g->iflags |= REGEX_BAD;
+					return;
+				}
+			} while (OP(s) != O_QUEST && OP(s) != O_CH);
+			/* fallthrough */
+		default:		/* things that break a sequence */
+			if (newlen > g->mlen) {		/* ends one */
+				start = newstart;
+				g->mlen = newlen;
+			}
+			newlen = 0;
+			break;
+		}
+	} while (OP(s) != OEND);
+
+	if (g->mlen == 0)		/* there isn't one */
+		return;
+
+	/* turn it into a character string */
+	g->must = malloc((size_t)g->mlen + 1);
+	if (g->must == NULL) {		/* argh; just forget it */
+		g->mlen = 0;
+		return;
+	}
+	cp = g->must;
+	scan = start;
+	for (i = g->mlen; i > 0; i--) {
+		while (OP(s = *scan++) != OCHAR)
+			continue;
+		assert(cp < g->must + g->mlen);
+		*cp++ = (char)OPND(s);
+	}
+	assert(cp == g->must + g->mlen);
+	*cp++ = '\0';		/* just on general principles */
+}
+
+/*
+ - pluscount - count + nesting
+ */
+static sopno			/* nesting depth */
+pluscount(struct parse *p, struct re_guts *g)
+{
+	sop *scan;
+	sop s;
+	sopno plusnest = 0;
+	sopno maxnest = 0;
+
+	if (p->error != 0)
+		return(0);	/* there may not be an OEND */
+
+	scan = g->strip + 1;
+	do {
+		s = *scan++;
+		switch (OP(s)) {
+		case OPLUS_:
+			plusnest++;
+			break;
+		case O_PLUS:
+			if (plusnest > maxnest)
+				maxnest = plusnest;
+			plusnest--;
+			break;
+		}
+	} while (OP(s) != OEND);
+	if (plusnest != 0)
+		g->iflags |= REGEX_BAD;
+	return(maxnest);
+}
diff --git a/contrib/llvm/lib/Support/regengine.inc b/contrib/llvm/lib/Support/regengine.inc
new file mode 100644
index 000000000000..7e41f96f359d
--- /dev/null
+++ b/contrib/llvm/lib/Support/regengine.inc
@@ -0,0 +1,1034 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)engine.c	8.5 (Berkeley) 3/20/94
+ */
+
+/*
+ * The matching engine and friends.  This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define	matcher	smatcher
+#define	fast	sfast
+#define	slow	sslow
+#define	dissect	sdissect
+#define	backref	sbackref
+#define	step	sstep
+#define	print	sprint
+#define	at	sat
+#define	match	smat
+#define	nope	snope
+#endif
+#ifdef LNAMES
+#define	matcher	lmatcher
+#define	fast	lfast
+#define	slow	lslow
+#define	dissect	ldissect
+#define	backref	lbackref
+#define	step	lstep
+#define	print	lprint
+#define	at	lat
+#define	match	lmat
+#define	nope	lnope
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+	struct re_guts *g;
+	int eflags;
+	llvm_regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
+	const char *offp;		/* offsets work from here */
+	const char *beginp;		/* start of string -- virtual NUL precedes */
+	const char *endp;		/* end of string -- virtual NUL here */
+	const char *coldp;		/* can be no match starting before here */
+	const char **lastpos;		/* [nplus+1] */
+	STATEVARS;
+	states st;		/* current states */
+	states fresh;		/* states for a fresh start */
+	states tmp;		/* temporary */
+	states empty;		/* empty set of states */
+};
+
+static int matcher(struct re_guts *, const char *, size_t,
+                   llvm_regmatch_t[], int);
+static const char *dissect(struct match *, const char *, const char *, sopno,
+                           sopno);
+static const char *backref(struct match *, const char *, const char *, sopno,
+                           sopno, sopno, int);
+static const char *fast(struct match *, const char *, const char *, sopno, sopno);
+static const char *slow(struct match *, const char *, const char *, sopno, sopno);
+static states step(struct re_guts *, sopno, sopno, states, int, states);
+#define MAX_RECURSION	100
+#define	BOL	(OUT+1)
+#define	EOL	(BOL+1)
+#define	BOLEOL	(BOL+2)
+#define	NOTHING	(BOL+3)
+#define	BOW	(BOL+4)
+#define	EOW	(BOL+5)
+#define	CODEMAX	(BOL+5)		/* highest code used */
+#define	NONCHAR(c)	((c) > CHAR_MAX)
+#define	NNONCHAR	(CODEMAX-CHAR_MAX)
+#ifdef REDEBUG
+static void print(struct match *, char *, states, int, FILE *);
+#endif
+#ifdef REDEBUG
+static void at(struct match *, char *, char *, char *, sopno, sopno);
+#endif
+#ifdef REDEBUG
+static char *pchar(int);
+#endif
+
+#ifdef REDEBUG
+#define	SP(t, s, c)	print(m, t, s, c, stdout)
+#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
+#define	NOTE(str)	{ if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
+static int nope = 0;
+#else
+#define	SP(t, s, c)	/* nothing */
+#define	AT(t, p1, p2, s1, s2)	/* nothing */
+#define	NOTE(s)	/* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ */
+static int			/* 0 success, REG_NOMATCH failure */
+matcher(struct re_guts *g, const char *string, size_t nmatch,
+        llvm_regmatch_t pmatch[],
+    int eflags)
+{
+	const char *endp;
+	size_t i;
+	struct match mv;
+	struct match *m = &mv;
+	const char *dp;
+	const sopno gf = g->firststate+1;	/* +1 for OEND */
+	const sopno gl = g->laststate;
+	const char *start;
+	const char *stop;
+
+	/* simplify the situation where possible */
+	if (g->cflags&REG_NOSUB)
+		nmatch = 0;
+	if (eflags&REG_STARTEND) {
+		start = string + pmatch[0].rm_so;
+		stop = string + pmatch[0].rm_eo;
+	} else {
+		start = string;
+		stop = start + strlen(start);
+	}
+	if (stop < start)
+		return(REG_INVARG);
+
+	/* prescreening; this does wonders for this rather slow code */
+	if (g->must != NULL) {
+		for (dp = start; dp < stop; dp++)
+			if (*dp == g->must[0] && stop - dp >= g->mlen &&
+				memcmp(dp, g->must, (size_t)g->mlen) == 0)
+				break;
+		if (dp == stop)		/* we didn't find g->must */
+			return(REG_NOMATCH);
+	}
+
+	/* match struct setup */
+	m->g = g;
+	m->eflags = eflags;
+	m->pmatch = NULL;
+	m->lastpos = NULL;
+	m->offp = string;
+	m->beginp = start;
+	m->endp = stop;
+	STATESETUP(m, 4);
+	SETUP(m->st);
+	SETUP(m->fresh);
+	SETUP(m->tmp);
+	SETUP(m->empty);
+	CLEAR(m->empty);
+
+	/* this loop does only one repetition except for backrefs */
+	for (;;) {
+		endp = fast(m, start, stop, gf, gl);
+		if (endp == NULL) {		/* a miss */
+			free(m->pmatch);
+			free((void*)m->lastpos);
+			STATETEARDOWN(m);
+			return(REG_NOMATCH);
+		}
+		if (nmatch == 0 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* where? */
+		assert(m->coldp != NULL);
+		for (;;) {
+			NOTE("finding start");
+			endp = slow(m, m->coldp, stop, gf, gl);
+			if (endp != NULL)
+				break;
+			assert(m->coldp < m->endp);
+			m->coldp++;
+		}
+		if (nmatch == 1 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* oh my, he wants the subexpressions... */
+		if (m->pmatch == NULL)
+			m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
+							sizeof(llvm_regmatch_t));
+		if (m->pmatch == NULL) {
+			STATETEARDOWN(m);
+			return(REG_ESPACE);
+		}
+		for (i = 1; i <= m->g->nsub; i++)
+			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
+			NOTE("dissecting");
+			dp = dissect(m, m->coldp, endp, gf, gl);
+		} else {
+			if (g->nplus > 0 && m->lastpos == NULL)
+				m->lastpos = (const char **)malloc((g->nplus+1) *
+							sizeof(char *));
+			if (g->nplus > 0 && m->lastpos == NULL) {
+				free(m->pmatch);
+				STATETEARDOWN(m);
+				return(REG_ESPACE);
+			}
+			NOTE("backref dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		if (dp != NULL)
+			break;
+
+		/* uh-oh... we couldn't find a subexpression-level match */
+		assert(g->backrefs);	/* must be back references doing it */
+		assert(g->nplus == 0 || m->lastpos != NULL);
+		for (;;) {
+			if (dp != NULL || endp <= m->coldp)
+				break;		/* defeat */
+			NOTE("backoff");
+			endp = slow(m, m->coldp, endp-1, gf, gl);
+			if (endp == NULL)
+				break;		/* defeat */
+			/* try it on a shorter possibility */
+#ifndef NDEBUG
+			for (i = 1; i <= m->g->nsub; i++) {
+				assert(m->pmatch[i].rm_so == -1);
+				assert(m->pmatch[i].rm_eo == -1);
+			}
+#endif
+			NOTE("backoff dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		assert(dp == NULL || dp == endp);
+		if (dp != NULL)		/* found a shorter one */
+			break;
+
+		/* despite initial appearances, there is no match here */
+		NOTE("false alarm");
+		if (m->coldp == stop)
+			break;
+		start = m->coldp + 1;	/* recycle starting later */
+	}
+
+	/* fill in the details if requested */
+	if (nmatch > 0) {
+		pmatch[0].rm_so = m->coldp - m->offp;
+		pmatch[0].rm_eo = endp - m->offp;
+	}
+	if (nmatch > 1) {
+		assert(m->pmatch != NULL);
+		for (i = 1; i < nmatch; i++)
+			if (i <= m->g->nsub)
+				pmatch[i] = m->pmatch[i];
+			else {
+				pmatch[i].rm_so = -1;
+				pmatch[i].rm_eo = -1;
+			}
+	}
+
+	if (m->pmatch != NULL)
+		free((char *)m->pmatch);
+	if (m->lastpos != NULL)
+		free((char *)m->lastpos);
+	STATETEARDOWN(m);
+	return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ */
+static const char *			/* == stop (success) always */
+dissect(struct match *m, const char *start, const char *stop, sopno startst,
+        sopno stopst)
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	sopno es;	/* end sop of current subRE */
+	const char *sp;	/* start of string matched by it */
+	const char *stp;	/* string matched by it cannot pass here */
+	const char *rest;	/* start of rest of string */
+	const char *tail;	/* string unmatched by rest of RE */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	const char *ssp;	/* start of string matched by subsubRE */
+	const char *sep;	/* end of string matched by subsubRE */
+	const char *oldssp;	/* previous ssp */
+
+	AT("diss", start, stop, startst, stopst);
+	sp = start;
+	for (ss = startst; ss < stopst; ss = es) {
+		/* identify end of subRE */
+		es = ss;
+		switch (OP(m->g->strip[es])) {
+		case OPLUS_:
+		case OQUEST_:
+			es += OPND(m->g->strip[es]);
+			break;
+		case OCH_:
+			while (OP(m->g->strip[es]) != O_CH)
+				es += OPND(m->g->strip[es]);
+			break;
+		}
+		es++;
+
+		/* figure out what it matched */
+		switch (OP(m->g->strip[ss])) {
+		case OEND:
+			assert(nope);
+			break;
+		case OCHAR:
+			sp++;
+			break;
+		case OBOL:
+		case OEOL:
+		case OBOW:
+		case OEOW:
+			break;
+		case OANY:
+		case OANYOF:
+			sp++;
+			break;
+		case OBACK_:
+		case O_BACK:
+			assert(nope);
+			break;
+		/* cases where length of match is hard to find */
+		case OQUEST_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			/* did innards match? */
+			if (slow(m, sp, rest, ssub, esub) != NULL) {
+				const char *dp = dissect(m, sp, rest, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == rest);
+			} else		/* no */
+				assert(sp == rest);
+			sp = rest;
+			break;
+		case OPLUS_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			ssp = sp;
+			oldssp = ssp;
+			for (;;) {	/* find last match of innards */
+				sep = slow(m, ssp, rest, ssub, esub);
+				if (sep == NULL || sep == ssp)
+					break;	/* failed or matched null */
+				oldssp = ssp;	/* on to next try */
+				ssp = sep;
+			}
+			if (sep == NULL) {
+				/* last successful match */
+				sep = ssp;
+				ssp = oldssp;
+			}
+			assert(sep == rest);	/* must exhaust substring */
+			assert(slow(m, ssp, sep, ssub, esub) == rest);
+			{
+				const char *dp = dissect(m, ssp, sep, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == sep);
+			}
+			sp = rest;
+			break;
+		case OCH_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = ss + OPND(m->g->strip[ss]) - 1;
+			assert(OP(m->g->strip[esub]) == OOR1);
+			for (;;) {	/* find first matching branch */
+				if (slow(m, sp, rest, ssub, esub) == rest)
+					break;	/* it matched all of it */
+				/* that one missed, try next one */
+				assert(OP(m->g->strip[esub]) == OOR1);
+				esub++;
+				assert(OP(m->g->strip[esub]) == OOR2);
+				ssub = esub + 1;
+				esub += OPND(m->g->strip[esub]);
+				if (OP(m->g->strip[esub]) == OOR2)
+					esub--;
+				else
+					assert(OP(m->g->strip[esub]) == O_CH);
+			}
+			{
+				const char *dp = dissect(m, sp, rest, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == rest);
+			}
+			sp = rest;
+			break;
+		case O_PLUS:
+		case O_QUEST:
+		case OOR1:
+		case OOR2:
+		case O_CH:
+			assert(nope);
+			break;
+		case OLPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_so = sp - m->offp;
+			break;
+		case ORPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_eo = sp - m->offp;
+			break;
+		default:		/* uh oh */
+			assert(nope);
+			break;
+		}
+	}
+
+	assert(sp == stop);
+	return(sp);
+}
+
+/*
+ - backref - figure out what matched what, figuring in back references
+ */
+static const char *			/* == stop (success) or NULL (failure) */
+backref(struct match *m, const char *start, const char *stop, sopno startst,
+        sopno stopst, sopno lev, int rec)			/* PLUS nesting level */
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	const char *sp;	/* start of string matched by it */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	const char *ssp;	/* start of string matched by subsubRE */
+	const char *dp;
+	size_t len;
+	int hard;
+	sop s;
+	llvm_regoff_t offsave;
+	cset *cs;
+
+	AT("back", start, stop, startst, stopst);
+	sp = start;
+
+	/* get as far as we can with easy stuff */
+	hard = 0;
+	for (ss = startst; !hard && ss < stopst; ss++)
+		switch (OP(s = m->g->strip[ss])) {
+		case OCHAR:
+			if (sp == stop || *sp++ != (char)OPND(s))
+				return(NULL);
+			break;
+		case OANY:
+			if (sp == stop)
+				return(NULL);
+			sp++;
+			break;
+		case OANYOF:
+			cs = &m->g->sets[OPND(s)];
+			if (sp == stop || !CHIN(cs, *sp++))
+				return(NULL);
+			break;
+		case OBOL:
+			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOL:
+			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OBOW:
+			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp > m->beginp &&
+							!ISWORD(*(sp-1))) ) &&
+					(sp < m->endp && ISWORD(*sp)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOW:
+			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp < m->endp && !ISWORD(*sp)) ) &&
+					(sp > m->beginp && ISWORD(*(sp-1))) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case O_QUEST:
+			break;
+		case OOR1:	/* matches null but needs to skip */
+			ss++;
+			s = m->g->strip[ss];
+			do {
+				assert(OP(s) == OOR2);
+				ss += OPND(s);
+			} while (OP(s = m->g->strip[ss]) != O_CH);
+			/* note that the ss++ gets us past the O_CH */
+			break;
+		default:	/* have to make a choice */
+			hard = 1;
+			break;
+		}
+	if (!hard) {		/* that was it! */
+		if (sp != stop)
+			return(NULL);
+		return(sp);
+	}
+	ss--;			/* adjust for the for's final increment */
+
+	/* the hard stuff */
+	AT("hard", sp, stop, ss, stopst);
+	s = m->g->strip[ss];
+	switch (OP(s)) {
+	case OBACK_:		/* the vilest depths */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		if (m->pmatch[i].rm_eo == -1)
+			return(NULL);
+		assert(m->pmatch[i].rm_so != -1);
+		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+		if (len == 0 && rec++ > MAX_RECURSION)
+			return(NULL);
+		assert(stop - m->beginp >= len);
+		if (sp > stop - len)
+			return(NULL);	/* not enough left to match */
+		ssp = m->offp + m->pmatch[i].rm_so;
+		if (memcmp(sp, ssp, len) != 0)
+			return(NULL);
+		while (m->g->strip[ss] != SOP(O_BACK, i))
+			ss++;
+		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
+		break;
+	case OQUEST_:		/* to null or not */
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);	/* not */
+		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
+		break;
+	case OPLUS_:
+		assert(m->lastpos != NULL);
+		assert(lev+1 <= m->g->nplus);
+		m->lastpos[lev+1] = sp;
+		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
+		break;
+	case O_PLUS:
+		if (sp == m->lastpos[lev])	/* last pass matched null */
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		/* try another pass */
+		m->lastpos[lev] = sp;
+		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+		if (dp == NULL)
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		else
+			return(dp);
+		break;
+	case OCH_:		/* find the right one, if any */
+		ssub = ss + 1;
+		esub = ss + OPND(s) - 1;
+		assert(OP(m->g->strip[esub]) == OOR1);
+		for (;;) {	/* find first matching branch */
+			dp = backref(m, sp, stop, ssub, esub, lev, rec);
+			if (dp != NULL)
+				return(dp);
+			/* that one missed, try next one */
+			if (OP(m->g->strip[esub]) == O_CH)
+				return(NULL);	/* there is none */
+			esub++;
+			assert(OP(m->g->strip[esub]) == OOR2);
+			ssub = esub + 1;
+			esub += OPND(m->g->strip[esub]);
+			if (OP(m->g->strip[esub]) == OOR2)
+				esub--;
+			else
+				assert(OP(m->g->strip[esub]) == O_CH);
+		}
+		break;
+	case OLPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_so;
+		m->pmatch[i].rm_so = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_so = offsave;
+		return(NULL);
+		break;
+	case ORPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_eo;
+		m->pmatch[i].rm_eo = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_eo = offsave;
+		return(NULL);
+		break;
+	default:		/* uh oh */
+		assert(nope);
+		break;
+	}
+
+	/* "can't happen" */
+	assert(nope);
+	/* NOTREACHED */
+        return NULL;
+}
+
+/*
+ - fast - step through the string at top speed
+ */
+static const char *			/* where tentative match ended, or NULL */
+fast(struct match *m, const char *start, const char *stop, sopno startst,
+     sopno stopst)
+{
+	states st = m->st;
+	states fresh = m->fresh;
+	states tmp = m->tmp;
+	const char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	const char *coldp;	/* last p after which no match was underway */
+
+	CLEAR(st);
+	SET1(st, startst);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	ASSIGN(fresh, st);
+	SP("start", st, *p);
+	coldp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+		if (EQ(st, fresh))
+			coldp = p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, fresh);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("aft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	assert(coldp != NULL);
+	m->coldp = coldp;
+	if (ISSET(st, stopst))
+		return(p+1);
+	else
+		return(NULL);
+}
+
+/*
+ - slow - step through the string more deliberately
+ */
+static const char *			/* where it ended */
+slow(struct match *m, const char *start, const char *stop, sopno startst,
+     sopno stopst)
+{
+	states st = m->st;
+	states empty = m->empty;
+	states tmp = m->tmp;
+	const char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	const char *matchp;	/* last p at which a match ended */
+
+	AT("slow", start, stop, startst, stopst);
+	CLEAR(st);
+	SET1(st, startst);
+	SP("sstart", st, *p);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	matchp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst))
+			matchp = p;
+		if (EQ(st, empty) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, empty);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("saft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	return(matchp);
+}
+
+
+/*
+ - step - map set of states reachable before char to set reachable after
+ */
+static states
+step(struct re_guts *g,
+    sopno start,		/* start state within strip */
+    sopno stop,			/* state after stop state within strip */
+    states bef,			/* states reachable before */
+    int ch,			/* character or NONCHAR code */
+    states aft)			/* states already known reachable after */
+{
+	cset *cs;
+	sop s;
+	sopno pc;
+	onestate here;		/* note, macros know this name */
+	sopno look;
+	int i;
+
+	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
+		s = g->strip[pc];
+		switch (OP(s)) {
+		case OEND:
+			assert(pc == stop-1);
+			break;
+		case OCHAR:
+			/* only characters can match */
+			assert(!NONCHAR(ch) || ch != (char)OPND(s));
+			if (ch == (char)OPND(s))
+				FWD(aft, bef, 1);
+			break;
+		case OBOL:
+			if (ch == BOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OEOL:
+			if (ch == EOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OBOW:
+			if (ch == BOW)
+				FWD(aft, bef, 1);
+			break;
+		case OEOW:
+			if (ch == EOW)
+				FWD(aft, bef, 1);
+			break;
+		case OANY:
+			if (!NONCHAR(ch))
+				FWD(aft, bef, 1);
+			break;
+		case OANYOF:
+			cs = &g->sets[OPND(s)];
+			if (!NONCHAR(ch) && CHIN(cs, ch))
+				FWD(aft, bef, 1);
+			break;
+		case OBACK_:		/* ignored here */
+		case O_BACK:
+			FWD(aft, aft, 1);
+			break;
+		case OPLUS_:		/* forward, this is just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case O_PLUS:		/* both forward and back */
+			FWD(aft, aft, 1);
+			i = ISSETBACK(aft, OPND(s));
+			BACK(aft, aft, OPND(s));
+			if (!i && ISSETBACK(aft, OPND(s))) {
+				/* oho, must reconsider loop body */
+				pc -= OPND(s) + 1;
+				INIT(here, pc);
+			}
+			break;
+		case OQUEST_:		/* two branches, both forward */
+			FWD(aft, aft, 1);
+			FWD(aft, aft, OPND(s));
+			break;
+		case O_QUEST:		/* just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case OLPAREN:		/* not significant here */
+		case ORPAREN:
+			FWD(aft, aft, 1);
+			break;
+		case OCH_:		/* mark the first two branches */
+			FWD(aft, aft, 1);
+			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			FWD(aft, aft, OPND(s));
+			break;
+		case OOR1:		/* done a branch, find the O_CH */
+			if (ISSTATEIN(aft, here)) {
+				for (look = 1;
+						OP(s = g->strip[pc+look]) != O_CH;
+						look += OPND(s))
+					assert(OP(s) == OOR2);
+				FWD(aft, aft, look);
+			}
+			break;
+		case OOR2:		/* propagate OCH_'s marking */
+			FWD(aft, aft, 1);
+			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
+				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+				FWD(aft, aft, OPND(s));
+			}
+			break;
+		case O_CH:		/* just empty */
+			FWD(aft, aft, 1);
+			break;
+		default:		/* ooooops... */
+			assert(nope);
+			break;
+		}
+	}
+
+	return(aft);
+}
+
+#ifdef REDEBUG
+/*
+ - print - print a set of states
+ */
+static void
+print(struct match *m, char *caption, states st, int ch, FILE *d)
+{
+	struct re_guts *g = m->g;
+	int i;
+	int first = 1;
+
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)fprintf(d, "%s", caption);
+	if (ch != '\0')
+		(void)fprintf(d, " %s", pchar(ch));
+	for (i = 0; i < g->nstates; i++)
+		if (ISSET(st, i)) {
+			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+			first = 0;
+		}
+	(void)fprintf(d, "\n");
+}
+
+/* 
+ - at - print current situation
+ */
+static void
+at(struct match *m, char *title, char *start, char *stop, sopno startst,
+    sopno stopst)
+{
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)printf("%s %s-", title, pchar(*start));
+	(void)printf("%s ", pchar(*stop));
+	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
+}
+
+#ifndef PCHARDONE
+#define	PCHARDONE	/* never again */
+/*
+ - pchar - make a character printable
+ *
+ * Is this identical to regchar() over in debug.c?  Well, yes.  But a
+ * duplicate here avoids having a debugging-capable regexec.o tied to
+ * a matching debug.o, and this is convenient.  It all disappears in
+ * the non-debug compilation anyway, so it doesn't matter much.
+ */
+static char *			/* -> representation */
+pchar(int ch)
+{
+	static char pbuf[10];
+
+	if (isprint(ch) || ch == ' ')
+		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
+	else
+		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
+	return(pbuf);
+}
+#endif
+#endif
+
+#undef	matcher
+#undef	fast
+#undef	slow
+#undef	dissect
+#undef	backref
+#undef	step
+#undef	print
+#undef	at
+#undef	match
+#undef	nope
diff --git a/contrib/llvm/lib/Support/regerror.c b/contrib/llvm/lib/Support/regerror.c
new file mode 100644
index 000000000000..1d67c9a2b03b
--- /dev/null
+++ b/contrib/llvm/lib/Support/regerror.c
@@ -0,0 +1,135 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regerror.c	8.4 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif
+
+static const char *regatoi(const llvm_regex_t *, char *, int);
+
+static struct rerr {
+	int code;
+	const char *name;
+	const char *explain;
+} rerrs[] = {
+	{ REG_NOMATCH,	"REG_NOMATCH",	"llvm_regexec() failed to match" },
+	{ REG_BADPAT,	"REG_BADPAT",	"invalid regular expression" },
+	{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
+	{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
+	{ REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)" },
+	{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
+	{ REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced" },
+	{ REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced" },
+	{ REG_EBRACE,	"REG_EBRACE",	"braces not balanced" },
+	{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
+	{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
+	{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
+	{ REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid" },
+	{ REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression" },
+	{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
+	{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine" },
+	{ 0,		"",		"*** unknown regexp error code ***" }
+};
+
+/*
+ - llvm_regerror - the interface to error numbers
+ = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+ */
+/* ARGSUSED */
+size_t
+llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+	struct rerr *r;
+	size_t len;
+	int target = errcode &~ REG_ITOA;
+	const char *s;
+	char convbuf[50];
+
+	if (errcode == REG_ATOI)
+		s = regatoi(preg, convbuf, sizeof convbuf);
+	else {
+		for (r = rerrs; r->code != 0; r++)
+			if (r->code == target)
+				break;
+	
+		if (errcode&REG_ITOA) {
+			if (r->code != 0) {
+				assert(strlen(r->name) < sizeof(convbuf));
+				(void) llvm_strlcpy(convbuf, r->name, sizeof convbuf);
+			} else
+				(void)snprintf(convbuf, sizeof convbuf,
+				    "REG_0x%x", target);
+			s = convbuf;
+		} else
+			s = r->explain;
+	}
+
+	len = strlen(s) + 1;
+	if (errbuf_size > 0) {
+		llvm_strlcpy(errbuf, s, errbuf_size);
+	}
+
+	return(len);
+}
+
+/*
+ - regatoi - internal routine to implement REG_ATOI
+ */
+static const char *
+regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize)
+{
+	struct rerr *r;
+
+	for (r = rerrs; r->code != 0; r++)
+		if (strcmp(r->name, preg->re_endp) == 0)
+			break;
+	if (r->code == 0)
+		return("0");
+
+	(void)snprintf(localbuf, localbufsize, "%d", r->code);
+	return(localbuf);
+}
diff --git a/contrib/llvm/lib/Support/regex2.h b/contrib/llvm/lib/Support/regex2.h
new file mode 100644
index 000000000000..21659c34449a
--- /dev/null
+++ b/contrib/llvm/lib/Support/regex2.h
@@ -0,0 +1,157 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
+ */
+
+/*
+ * internals of regex_t
+ */
+#define	MAGIC1	((('r'^0200)<<8) | 'e')
+
+/*
+ * The internal representation is a *strip*, a sequence of
+ * operators ending with an endmarker.  (Some terminology etc. is a
+ * historical relic of earlier versions which used multiple strips.)
+ * Certain oddities in the representation are there to permit running
+ * the machinery backwards; in particular, any deviation from sequential
+ * flow must be marked at both its source and its destination.  Some
+ * fine points:
+ *
+ * - OPLUS_ and O_PLUS are *inside* the loop they create.
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create.
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while
+ *   OOR1 and OOR2 are respectively the end and the beginning of one of
+ *   the branches.  Note that there is an implicit OOR2 following OCH_
+ *   and an implicit OOR1 preceding O_CH.
+ *
+ * In state representations, an operator's bit is on to signify a state
+ * immediately *preceding* "execution" of that operator.
+ */
+typedef unsigned long sop;	/* strip operator */
+typedef long sopno;
+#define	OPRMASK	0xf8000000LU
+#define	OPDMASK	0x07ffffffLU
+#define	OPSHIFT	((unsigned)27)
+#define	OP(n)	((n)&OPRMASK)
+#define	OPND(n)	((n)&OPDMASK)
+#define	SOP(op, opnd)	((op)|(opnd))
+/* operators			   meaning	operand			*/
+/*						(back, fwd are offsets)	*/
+#define	OEND	(1LU<<OPSHIFT)	/* endmarker	-			*/
+#define	OCHAR	(2LU<<OPSHIFT)	/* character	unsigned char		*/
+#define	OBOL	(3LU<<OPSHIFT)	/* left anchor	-			*/
+#define	OEOL	(4LU<<OPSHIFT)	/* right anchor	-			*/
+#define	OANY	(5LU<<OPSHIFT)	/* .		-			*/
+#define	OANYOF	(6LU<<OPSHIFT)	/* [...]	set number		*/
+#define	OBACK_	(7LU<<OPSHIFT)	/* begin \d	paren number		*/
+#define	O_BACK	(8LU<<OPSHIFT)	/* end \d	paren number		*/
+#define	OPLUS_	(9LU<<OPSHIFT)	/* + prefix	fwd to suffix		*/
+#define	O_PLUS	(10LU<<OPSHIFT)	/* + suffix	back to prefix		*/
+#define	OQUEST_	(11LU<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
+#define	O_QUEST	(12LU<<OPSHIFT)	/* ? suffix	back to prefix		*/
+#define	OLPAREN	(13LU<<OPSHIFT)	/* (		fwd to )		*/
+#define	ORPAREN	(14LU<<OPSHIFT)	/* )		back to (		*/
+#define	OCH_	(15LU<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
+#define	OOR1	(16LU<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
+#define	OOR2	(17LU<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
+#define	O_CH	(18LU<<OPSHIFT)	/* end choice	back to OOR1		*/
+#define	OBOW	(19LU<<OPSHIFT)	/* begin word	-			*/
+#define	OEOW	(20LU<<OPSHIFT)	/* end word	-			*/
+
+/*
+ * Structure for [] character-set representation.  Character sets are
+ * done as bit vectors, grouped 8 to a byte vector for compactness.
+ * The individual set therefore has both a pointer to the byte vector
+ * and a mask to pick out the relevant bit of each byte.  A hash code
+ * simplifies testing whether two sets could be identical.
+ *
+ * This will get trickier for multicharacter collating elements.  As
+ * preliminary hooks for dealing with such things, we also carry along
+ * a string of multi-character elements, and decide the size of the
+ * vectors at run time.
+ */
+typedef struct {
+	uch *ptr;		/* -> uch [csetsize] */
+	uch mask;		/* bit within array */
+	uch hash;		/* hash code */
+	size_t smultis;
+	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
+} cset;
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
+#define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
+#define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
+#define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
+#define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* llvm_regcomp() internal fns */
+#define	MCsub(p, cs, cp)	mcsub(p, cs, cp)
+#define	MCin(p, cs, cp)	mcin(p, cs, cp)
+
+/* stuff for character categories */
+typedef unsigned char cat_t;
+
+/*
+ * main compiled-expression structure
+ */
+struct re_guts {
+	int magic;
+#		define	MAGIC2	((('R'^0200)<<8)|'E')
+	sop *strip;		/* malloced area for strip */
+	int csetsize;		/* number of bits in a cset vector */
+	int ncsets;		/* number of csets in use */
+	cset *sets;		/* -> cset [ncsets] */
+	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
+	int cflags;		/* copy of llvm_regcomp() cflags argument */
+	sopno nstates;		/* = number of sops */
+	sopno firststate;	/* the initial OEND (normally 0) */
+	sopno laststate;	/* the final OEND */
+	int iflags;		/* internal flags */
+#		define	USEBOL	01	/* used ^ */
+#		define	USEEOL	02	/* used $ */
+#		define	REGEX_BAD	04	/* something wrong */
+	int nbol;		/* number of ^ used */
+	int neol;		/* number of $ used */
+	int ncategories;	/* how many character categories */
+	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
+	char *must;		/* match must contain this string */
+	int mlen;		/* length of must */
+	size_t nsub;		/* copy of re_nsub */
+	int backrefs;		/* does it use back references? */
+	sopno nplus;		/* how deep does it nest +s? */
+	/* catspace must be last */
+	cat_t catspace[1];	/* actually [NC] */
+};
+
+/* misc utilities */
+#define	OUT	(CHAR_MAX+1)	/* a non-character value */
+#define	ISWORD(c)	(isalnum(c&0xff) || (c) == '_')
diff --git a/contrib/llvm/lib/Support/regex_impl.h b/contrib/llvm/lib/Support/regex_impl.h
new file mode 100644
index 000000000000..f8296c9ff75e
--- /dev/null
+++ b/contrib/llvm/lib/Support/regex_impl.h
@@ -0,0 +1,108 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992 Henry Spencer.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer of the University of Toronto.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regex.h	8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _REGEX_H_
+#define	_REGEX_H_
+
+#include <sys/types.h>
+typedef off_t llvm_regoff_t;
+typedef struct {
+  llvm_regoff_t rm_so;		/* start of match */
+  llvm_regoff_t rm_eo;		/* end of match */
+} llvm_regmatch_t;
+
+typedef struct llvm_regex {
+  int re_magic;
+  size_t re_nsub;		/* number of parenthesized subexpressions */
+  const char *re_endp;	/* end pointer for REG_PEND */
+  struct re_guts *re_g;	/* none of your business :-) */
+} llvm_regex_t;
+
+/* llvm_regcomp() flags */
+#define	REG_BASIC	0000
+#define	REG_EXTENDED	0001
+#define	REG_ICASE	0002
+#define	REG_NOSUB	0004
+#define	REG_NEWLINE	0010
+#define	REG_NOSPEC	0020
+#define	REG_PEND	0040
+#define	REG_DUMP	0200
+
+/* llvm_regerror() flags */
+#define	REG_NOMATCH	 1
+#define	REG_BADPAT	 2
+#define	REG_ECOLLATE	 3
+#define	REG_ECTYPE	 4
+#define	REG_EESCAPE	 5
+#define	REG_ESUBREG	 6
+#define	REG_EBRACK	 7
+#define	REG_EPAREN	 8
+#define	REG_EBRACE	 9
+#define	REG_BADBR	10
+#define	REG_ERANGE	11
+#define	REG_ESPACE	12
+#define	REG_BADRPT	13
+#define	REG_EMPTY	14
+#define	REG_ASSERT	15
+#define	REG_INVARG	16
+#define	REG_ATOI	255	/* convert name to number (!) */
+#define	REG_ITOA	0400	/* convert number to name (!) */
+
+/* llvm_regexec() flags */
+#define	REG_NOTBOL	00001
+#define	REG_NOTEOL	00002
+#define	REG_STARTEND	00004
+#define	REG_TRACE	00400	/* tracing of execution */
+#define	REG_LARGE	01000	/* force large representation */
+#define	REG_BACKR	02000	/* force use of backref code */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int	llvm_regcomp(llvm_regex_t *, const char *, int);
+size_t	llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+int	llvm_regexec(const llvm_regex_t *, const char *, size_t, 
+                     llvm_regmatch_t [], int);
+void	llvm_regfree(llvm_regex_t *);
+size_t  llvm_strlcpy(char *dst, const char *src, size_t siz);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_REGEX_H_ */
diff --git a/contrib/llvm/lib/Support/regexec.c b/contrib/llvm/lib/Support/regexec.c
new file mode 100644
index 000000000000..007861675ba1
--- /dev/null
+++ b/contrib/llvm/lib/Support/regexec.c
@@ -0,0 +1,162 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
+ */
+
+/*
+ * the outer shell of llvm_regexec()
+ *
+ * This file includes engine.inc *twice*, after muchos fiddling with the
+ * macros that code uses.  This lets the same code operate on two different
+ * representations for state sets.
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/* macros for manipulating states, small version */
+/* FIXME: 'states' is assumed as 'long' on small version. */
+#define	states1	long		/* for later use in llvm_regexec() decision */
+#define	states	states1
+#define	CLEAR(v)	((v) = 0)
+#define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
+#define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
+#define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
+#define	ASSIGN(d, s)	((d) = (s))
+#define	EQ(a, b)	((a) == (b))
+#define	STATEVARS	long dummy	/* dummy version */
+#define	STATESETUP(m, n)	/* nothing */
+#define	STATETEARDOWN(m)	/* nothing */
+#define	SETUP(v)	((v) = 0)
+#define	onestate	long
+#define	INIT(o, n)	((o) = (unsigned long)1 << (n))
+#define	INC(o)		((o) <<= 1)
+#define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
+#define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
+#define	ISSETBACK(v, n)		(((v) & ((unsigned long)here >> (n))) != 0)
+/* function names */
+#define SNAMES			/* engine.inc looks after details */
+
+#include "regengine.inc"
+
+/* now undo things */
+#undef	states
+#undef	CLEAR
+#undef	SET0
+#undef	SET1
+#undef	ISSET
+#undef	ASSIGN
+#undef	EQ
+#undef	STATEVARS
+#undef	STATESETUP
+#undef	STATETEARDOWN
+#undef	SETUP
+#undef	onestate
+#undef	INIT
+#undef	INC
+#undef	ISSTATEIN
+#undef	FWD
+#undef	BACK
+#undef	ISSETBACK
+#undef	SNAMES
+
+/* macros for manipulating states, large version */
+#define	states	char *
+#define	CLEAR(v)	memset(v, 0, m->g->nstates)
+#define	SET0(v, n)	((v)[n] = 0)
+#define	SET1(v, n)	((v)[n] = 1)
+#define	ISSET(v, n)	((v)[n])
+#define	ASSIGN(d, s)	memmove(d, s, m->g->nstates)
+#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
+#define	STATEVARS	long vn; char *space
+#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
+				if ((m)->space == NULL) return(REG_ESPACE); \
+				(m)->vn = 0; }
+#define	STATETEARDOWN(m)	{ free((m)->space); }
+#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
+#define	onestate	long
+#define	INIT(o, n)	((o) = (n))
+#define	INC(o)	((o)++)
+#define	ISSTATEIN(v, o)	((v)[o])
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
+#define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
+#define	ISSETBACK(v, n)	((v)[here - (n)])
+/* function names */
+#define	LNAMES			/* flag */
+
+#include "regengine.inc"
+
+/*
+ - llvm_regexec - interface for matching
+ *
+ * We put this here so we can exploit knowledge of the state representation
+ * when choosing which matcher to call.  Also, by this point the matchers
+ * have been prototyped.
+ */
+int				/* 0 success, REG_NOMATCH failure */
+llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
+             llvm_regmatch_t pmatch[], int eflags)
+{
+	struct re_guts *g = preg->re_g;
+#ifdef REDEBUG
+#	define	GOODFLAGS(f)	(f)
+#else
+#	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
+#endif
+
+	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
+		return(REG_BADPAT);
+	assert(!(g->iflags&REGEX_BAD));
+	if (g->iflags&REGEX_BAD)		/* backstop for no-debug case */
+		return(REG_BADPAT);
+	eflags = GOODFLAGS(eflags);
+
+	if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
+		return(smatcher(g, string, nmatch, pmatch, eflags));
+	else
+		return(lmatcher(g, string, nmatch, pmatch, eflags));
+}
diff --git a/contrib/llvm/lib/Support/regfree.c b/contrib/llvm/lib/Support/regfree.c
new file mode 100644
index 000000000000..dc2b4af90fa7
--- /dev/null
+++ b/contrib/llvm/lib/Support/regfree.c
@@ -0,0 +1,72 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regfree.c	8.3 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/*
+ - llvm_regfree - free everything
+ */
+void
+llvm_regfree(llvm_regex_t *preg)
+{
+	struct re_guts *g;
+
+	if (preg->re_magic != MAGIC1)	/* oops */
+		return;			/* nice to complain, but hard */
+
+	g = preg->re_g;
+	if (g == NULL || g->magic != MAGIC2)	/* oops again */
+		return;
+	preg->re_magic = 0;		/* mark it invalid */
+	g->magic = 0;			/* mark it invalid */
+
+	if (g->strip != NULL)
+		free((char *)g->strip);
+	if (g->sets != NULL)
+		free((char *)g->sets);
+	if (g->setbits != NULL)
+		free((char *)g->setbits);
+	if (g->must != NULL)
+		free(g->must);
+	free((char *)g);
+}
diff --git a/contrib/llvm/lib/Support/regstrlcpy.c b/contrib/llvm/lib/Support/regstrlcpy.c
new file mode 100644
index 000000000000..8b68afdf75f1
--- /dev/null
+++ b/contrib/llvm/lib/Support/regstrlcpy.c
@@ -0,0 +1,52 @@
+/*
+ * This code is derived from OpenBSD's libc, original license follows:
+ *
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "regex_impl.h"
+/*
+ * Copy src to string dst of size siz.  At most siz-1 characters
+ * will be copied.  Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+llvm_strlcpy(char *dst, const char *src, size_t siz)
+{
+	char *d = dst;
+	const char *s = src;
+	size_t n = siz;
+
+	/* Copy as many bytes as will fit */
+	if (n != 0) {
+		while (--n != 0) {
+			if ((*d++ = *s++) == '\0')
+				break;
+		}
+	}
+
+	/* Not enough room in dst, add NUL and traverse rest of src */
+	if (n == 0) {
+		if (siz != 0)
+			*d = '\0';		/* NUL-terminate dst */
+		while (*s++)
+			;
+	}
+
+	return(s - src - 1);	/* count does not include NUL */
+}
diff --git a/contrib/llvm/lib/Support/regutils.h b/contrib/llvm/lib/Support/regutils.h
new file mode 100644
index 000000000000..d0ee100a382b
--- /dev/null
+++ b/contrib/llvm/lib/Support/regutils.h
@@ -0,0 +1,53 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)utils.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* utility definitions */
+#define	NC		(CHAR_MAX - CHAR_MIN + 1)
+typedef unsigned char uch;
+
+/* switch off assertions (if not already off) if no REDEBUG */
+#ifndef REDEBUG
+#ifndef NDEBUG
+#define	NDEBUG	/* no assertions please */
+#endif
+#endif
+#include <assert.h>
+
+/* for old systems with bcopy() but no memmove() */
+#ifdef USEBCOPY
+#define	memmove(d, s, c)	bcopy(s, d, c)
+#endif
diff --git a/contrib/llvm/lib/Support/system_error.cpp b/contrib/llvm/lib/Support/system_error.cpp
new file mode 100644
index 000000000000..56898de31520
--- /dev/null
+++ b/contrib/llvm/lib/Support/system_error.cpp
@@ -0,0 +1,130 @@
+//===---------------------- system_error.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Errno.h"
+#include <string>
+#include <cstring>
+
+namespace llvm {
+
+// class error_category
+
+error_category::error_category() {
+}
+
+error_category::~error_category() {
+}
+
+error_condition
+error_category::default_error_condition(int ev) const {
+  return error_condition(ev, *this);
+}
+
+bool
+error_category::equivalent(int code, const error_condition& condition) const {
+  return default_error_condition(code) == condition;
+}
+
+bool
+error_category::equivalent(const error_code& code, int condition) const {
+  return *this == code.category() && code.value() == condition;
+}
+
+std::string
+_do_message::message(int ev) const {
+  return std::string(sys::StrError(ev));
+}
+
+class _generic_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+};
+
+const char*
+_generic_error_category::name() const {
+  return "generic";
+}
+
+std::string
+_generic_error_category::message(int ev) const {
+#ifdef ELAST
+  if (ev > ELAST)
+    return std::string("unspecified generic_category error");
+#endif  // ELAST
+  return _do_message::message(ev);
+}
+
+const error_category&
+generic_category() {
+  static _generic_error_category s;
+  return s;
+}
+
+class _system_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+
+const char*
+_system_error_category::name() const {
+  return "system";
+}
+
+// std::string _system_error_category::message(int ev) const {
+// Is in Platform/system_error.inc
+
+// error_condition _system_error_category::default_error_condition(int ev) const
+// Is in Platform/system_error.inc
+
+const error_category&
+system_category() {
+  static _system_error_category s;
+  return s;
+}
+
+const error_category&
+posix_category() {
+#ifdef LLVM_ON_WIN32
+  return generic_category();
+#else
+  return system_category();
+#endif
+}
+
+// error_condition
+
+std::string
+error_condition::message() const {
+  return _cat_->message(_val_);
+}
+
+// error_code
+
+std::string
+error_code::message() const {
+  return _cat_->message(_val_);
+}
+
+} // end namespace llvm
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/system_error.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/system_error.inc"
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
new file mode 100644
index 000000000000..08dc340f8541
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -0,0 +1,72 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMAsmPrinter;
+class ARMBaseTargetMachine;
+class FunctionPass;
+class JITCodeEmitter;
+class MachineInstr;
+class MCCodeEmitter;
+class MCInst;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCSubtargetInfo;
+class TargetAsmBackend;
+class formatted_raw_ostream;
+
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCSubtargetInfo &STI,
+                                      MCContext &Ctx);
+
+TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
+
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+                               CodeGenOpt::Level OptLevel);
+
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                          JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
+FunctionPass *createARMConstantIslandPass();
+FunctionPass *createNEONMoveFixPass();
+FunctionPass *createMLxExpansionPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createThumb2SizeReductionPass();
+
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                  ARMAsmPrinter &AP);
+
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+                                          bool Is64Bit,
+                                          uint32_t CPUType,
+                                          uint32_t CPUSubtype);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
new file mode 100644
index 000000000000..cf333ccd49ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -0,0 +1,251 @@
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget state.
+//
+
+def ModeThumb  : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
+                                  "Thumb mode">;
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+                                   "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+                                   "Enable VFP3 instructions",
+                                   [FeatureVFP2]>;
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+                                   "Enable NEON instructions",
+                                   [FeatureVFP3]>;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
+                                     "Enable Thumb2 instructions">;
+def FeatureNoARM  : SubtargetFeature<"noarm", "NoARM", "true",
+                                     "Does not support ARM mode execution">;
+def FeatureFP16   : SubtargetFeature<"fp16", "HasFP16", "true",
+                                     "Enable half-precision floating point">;
+def FeatureD16    : SubtargetFeature<"d16", "HasD16", "true",
+                                     "Restrict VFP3 to 16 double registers">;
+def FeatureHWDiv  : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+                                     "Enable divide instructions">;
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+                                 "Enable Thumb2 extract and pack instructions">;
+def FeatureDB     : SubtargetFeature<"db", "HasDataBarrier", "true",
+                                   "Has data barrier (dmb / dsb) instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+                                         "FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+                          "Floating point unit supports single precision only">;
+
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
+// to just not use them.
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+                                         "Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+                                       "HasVMLxForwarding", "true",
+                                       "Has multiplier accumulator forwarding">;
+
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+                                        "true",
+                                        "Use NEON for single precision FP">;
+
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+                                             "Prefer 32-bit Thumb instrs">;
+
+/// Some instructions update CPSR partially, which can add false dependency for
+/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
+/// mapped to a separate physical register. Avoid partial CPSR update for these
+/// processors.
+def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
+                                               "AvoidCPSRPartialUpdate", "true",
+                                 "Avoid CPSR partial update for OOO execution">;
+
+/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
+def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
+                                 "Supports v7 DSP instructions in Thumb2.">;
+
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+                                 "Supports Multiprocessing extension">;
+
+// ARM ISAs.
+def HasV4TOps   : SubtargetFeature<"v4t", "HasV4TOps", "true",
+                                   "Support ARM v4T instructions">;
+def HasV5TOps   : SubtargetFeature<"v5t", "HasV5TOps", "true",
+                                   "Support ARM v5T instructions",
+                                   [HasV4TOps]>;
+def HasV5TEOps  : SubtargetFeature<"v5te", "HasV5TEOps", "true",
+                             "Support ARM v5TE, v5TEj, and v5TExp instructions",
+                                   [HasV5TOps]>;
+def HasV6Ops    : SubtargetFeature<"v6", "HasV6Ops", "true",
+                                   "Support ARM v6 instructions",
+                                   [HasV5TEOps]>;
+def HasV6T2Ops  : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
+                                   "Support ARM v6t2 instructions",
+                                   [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>;
+def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
+                                   "Support ARM v7 instructions",
+                                   [HasV6T2Ops]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+include "ARMSchedule.td"
+
+// ARM processor families.
+def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+                                   "Cortex-A8 ARM processors",
+                                   [FeatureSlowFPBrcc, FeatureNEONForFP,
+                                    FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+                                    FeatureT2XtPk]>;
+def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+                                   "Cortex-A9 ARM processors",
+                                   [FeatureVMLxForwarding,
+                                    FeatureT2XtPk, FeatureFP16,
+                                    FeatureAvoidPartialCPSR]>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
+
+// V4 Processors.
+def : ProcNoItin<"generic",         []>;
+def : ProcNoItin<"arm8",            []>;
+def : ProcNoItin<"arm810",          []>;
+def : ProcNoItin<"strongarm",       []>;
+def : ProcNoItin<"strongarm110",    []>;
+def : ProcNoItin<"strongarm1100",   []>;
+def : ProcNoItin<"strongarm1110",   []>;
+
+// V4T Processors.
+def : ProcNoItin<"arm7tdmi",        [HasV4TOps]>;
+def : ProcNoItin<"arm7tdmi-s",      [HasV4TOps]>;
+def : ProcNoItin<"arm710t",         [HasV4TOps]>;
+def : ProcNoItin<"arm720t",         [HasV4TOps]>;
+def : ProcNoItin<"arm9",            [HasV4TOps]>;
+def : ProcNoItin<"arm9tdmi",        [HasV4TOps]>;
+def : ProcNoItin<"arm920",          [HasV4TOps]>;
+def : ProcNoItin<"arm920t",         [HasV4TOps]>;
+def : ProcNoItin<"arm922t",         [HasV4TOps]>;
+def : ProcNoItin<"arm940t",         [HasV4TOps]>;
+def : ProcNoItin<"ep9312",          [HasV4TOps]>;
+
+// V5T Processors.
+def : ProcNoItin<"arm10tdmi",       [HasV5TOps]>;
+def : ProcNoItin<"arm1020t",        [HasV5TOps]>;
+
+// V5TE Processors.
+def : ProcNoItin<"arm9e",           [HasV5TEOps]>;
+def : ProcNoItin<"arm926ej-s",      [HasV5TEOps]>;
+def : ProcNoItin<"arm946e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm966e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm968e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm10e",          [HasV5TEOps]>;
+def : ProcNoItin<"arm1020e",        [HasV5TEOps]>;
+def : ProcNoItin<"arm1022e",        [HasV5TEOps]>;
+def : ProcNoItin<"xscale",          [HasV5TEOps]>;
+def : ProcNoItin<"iwmmxt",          [HasV5TEOps]>;
+
+// V6 Processors.
+def : Processor<"arm1136j-s",       ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1136jf-s",      ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+def : Processor<"arm1176jz-s",      ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp",      ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"mpcore",           ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+
+// V6M Processors.
+def : Processor<"cortex-m0",        ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+                                                       FeatureDB]>;
+
+// V6T2 Processors.
+def : Processor<"arm1156t2-s",      ARMV6Itineraries, [HasV6T2Ops]>;
+def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+
+// V7a Processors.
+def : Processor<"cortex-a8",        CortexA8Itineraries,
+                                    [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2]>;
+def : Processor<"cortex-a9",        CortexA9Itineraries,
+                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2]>;
+def : Processor<"cortex-a9-mp",     CortexA9Itineraries,
+                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2, FeatureMP]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3",       [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv]>;
+
+// V7EM Processors.
+def : ProcNoItin<"cortex-m4",       [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv, FeatureDSPThumb2,
+                                     FeatureT2XtPk, FeatureVFP2,
+                                     FeatureVFPOnlySP]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo;
+
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = ARMInstrInfo;
+
+  let AssemblyWriters = [ARMAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 000000000000..595708fa7881
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,595 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+  enum ShiftOpc {
+    no_shift = 0,
+    asr,
+    lsl,
+    lsr,
+    ror,
+    rrx
+  };
+
+  enum AddrOpc {
+    add = '+', sub = '-'
+  };
+
+  static inline const char *getAddrOpcStr(AddrOpc Op) {
+    return Op == sub ? "-" : "";
+  }
+
+  static inline const char *getShiftOpcStr(ShiftOpc Op) {
+    switch (Op) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::asr: return "asr";
+    case ARM_AM::lsl: return "lsl";
+    case ARM_AM::lsr: return "lsr";
+    case ARM_AM::ror: return "ror";
+    case ARM_AM::rrx: return "rrx";
+    }
+  }
+
+  static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+    switch (Op) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::asr: return 2;
+    case ARM_AM::lsl: return 0;
+    case ARM_AM::lsr: return 1;
+    case ARM_AM::ror: return 3;
+    }
+  }
+
+  static inline ShiftOpc getShiftOpcForNode(SDValue N) {
+    switch (N.getOpcode()) {
+    default:          return ARM_AM::no_shift;
+    case ISD::SHL:    return ARM_AM::lsl;
+    case ISD::SRL:    return ARM_AM::lsr;
+    case ISD::SRA:    return ARM_AM::asr;
+    case ISD::ROTR:   return ARM_AM::ror;
+    //case ISD::ROTL:  // Only if imm -> turn into ROTR.
+    // Can't handle RRX here, because it would require folding a flag into
+    // the addressing mode.  :(  This causes us to miss certain things.
+    //case ARMISD::RRX: return ARM_AM::rrx;
+    }
+  }
+
+  enum AMSubMode {
+    bad_am_submode = 0,
+    ia,
+    ib,
+    da,
+    db
+  };
+
+  static inline const char *getAMSubModeStr(AMSubMode Mode) {
+    switch (Mode) {
+    default: assert(0 && "Unknown addressing sub-mode!");
+    case ARM_AM::ia: return "ia";
+    case ARM_AM::ib: return "ib";
+    case ARM_AM::da: return "da";
+    case ARM_AM::db: return "db";
+    }
+  }
+
+  /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+  ///
+  static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+    assert(Amt < 32 && "Invalid rotate amount");
+    return (Val >> Amt) | (Val << ((32-Amt)&31));
+  }
+
+  /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+  ///
+  static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+    assert(Amt < 32 && "Invalid rotate amount");
+    return (Val << Amt) | (Val >> ((32-Amt)&31));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #1: shift_operand with registers
+  //===--------------------------------------------------------------------===//
+  //
+  // This 'addressing mode' is used for arithmetic instructions.  It can
+  // represent things like:
+  //   reg
+  //   reg [asr|lsl|lsr|ror|rrx] reg
+  //   reg [asr|lsl|lsr|ror|rrx] imm
+  //
+  // This is stored three operands [rega, regb, opc].  The first is the base
+  // reg, the second is the shift amount (or reg0 if not present or imm).  The
+  // third operand encodes the shift opcode and the imm if a reg isn't present.
+  //
+  static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+    return ShOp | (Imm << 3);
+  }
+  static inline unsigned getSORegOffset(unsigned Op) {
+    return Op >> 3;
+  }
+  static inline ShiftOpc getSORegShOp(unsigned Op) {
+    return (ShiftOpc)(Op & 7);
+  }
+
+  /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+  /// the 8-bit imm value.
+  static inline unsigned getSOImmValImm(unsigned Imm) {
+    return Imm & 0xFF;
+  }
+  /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+  /// the rotate amount.
+  static inline unsigned getSOImmValRot(unsigned Imm) {
+    return (Imm >> 8) * 2;
+  }
+
+  /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+  /// computing the rotate amount to use.  If this immediate value cannot be
+  /// handled with a single shifter-op, determine a good rotate amount that will
+  /// take a maximal chunk of bits out of the immediate.
+  static inline unsigned getSOImmValRotate(unsigned Imm) {
+    // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+    // of zero.
+    if ((Imm & ~255U) == 0) return 0;
+
+    // Use CTZ to compute the rotate amount.
+    unsigned TZ = CountTrailingZeros_32(Imm);
+
+    // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
+    // not 9.
+    unsigned RotAmt = TZ & ~1;
+
+    // If we can handle this spread, return it.
+    if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+      return (32-RotAmt)&31;  // HW rotates right, not left.
+
+    // For values like 0xF000000F, we should ignore the low 6 bits, then
+    // retry the hunt.
+    if (Imm & 63U) {
+      unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+      unsigned RotAmt2 = TZ2 & ~1;
+      if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+        return (32-RotAmt2)&31;  // HW rotates right, not left.
+    }
+
+    // Otherwise, we have no way to cover this span of bits with a single
+    // shifter_op immediate.  Return a chunk of bits that will be useful to
+    // handle.
+    return (32-RotAmt)&31;  // HW rotates right, not left.
+  }
+
+  /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+  /// into an shifter_operand immediate operand, return the 12-bit encoding for
+  /// it.  If not, return -1.
+  static inline int getSOImmVal(unsigned Arg) {
+    // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+    // of zero.
+    if ((Arg & ~255U) == 0) return Arg;
+
+    unsigned RotAmt = getSOImmValRotate(Arg);
+
+    // If this cannot be handled with a single shifter_op, bail out.
+    if (rotr32(~255U, RotAmt) & Arg)
+      return -1;
+
+    // Encode this correctly.
+    return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+  }
+
+  /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+  /// or'ing together two SOImmVal's.
+  static inline bool isSOImmTwoPartVal(unsigned V) {
+    // If this can be handled with a single shifter_op, bail out.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+    if (V == 0)
+      return false;
+
+    // If this can be handled with two shifter_op's, accept.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+    return V == 0;
+  }
+
+  /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+  /// return the first chunk of it.
+  static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+    return rotr32(255U, getSOImmValRotate(V)) & V;
+  }
+
+  /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+  /// return the second chunk of it.
+  static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+    // Mask out the first hunk.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+    // Take what's left.
+    assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+    return V;
+  }
+
+  /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+  /// by a left shift. Returns the shift amount to use.
+  static inline unsigned getThumbImmValShift(unsigned Imm) {
+    // 8-bit (or less) immediates are trivially immediate operand with a shift
+    // of zero.
+    if ((Imm & ~255U) == 0) return 0;
+
+    // Use CTZ to compute the shift amount.
+    return CountTrailingZeros_32(Imm);
+  }
+
+  /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+  /// by left shifting a 8-bit immediate.
+  static inline bool isThumbImmShiftedVal(unsigned V) {
+    // If this can be handled with
+    V = (~255U << getThumbImmValShift(V)) & V;
+    return V == 0;
+  }
+
+  /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+  /// by a left shift. Returns the shift amount to use.
+  static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+    // 16-bit (or less) immediates are trivially immediate operand with a shift
+    // of zero.
+    if ((Imm & ~65535U) == 0) return 0;
+
+    // Use CTZ to compute the shift amount.
+    return CountTrailingZeros_32(Imm);
+  }
+
+  /// isThumbImm16ShiftedVal - Return true if the specified value can be
+  /// obtained by left shifting a 16-bit immediate.
+  static inline bool isThumbImm16ShiftedVal(unsigned V) {
+    // If this can be handled with
+    V = (~65535U << getThumbImm16ValShift(V)) & V;
+    return V == 0;
+  }
+
+  /// getThumbImmNonShiftedVal - If V is a value that satisfies
+  /// isThumbImmShiftedVal, return the non-shiftd value.
+  static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+    return V >> getThumbImmValShift(V);
+  }
+
+
+  /// getT2SOImmValSplat - Return the 12-bit encoded representation
+  /// if the specified value can be obtained by splatting the low 8 bits
+  /// into every other byte or every byte of a 32-bit value. i.e.,
+  ///     00000000 00000000 00000000 abcdefgh    control = 0
+  ///     00000000 abcdefgh 00000000 abcdefgh    control = 1
+  ///     abcdefgh 00000000 abcdefgh 00000000    control = 2
+  ///     abcdefgh abcdefgh abcdefgh abcdefgh    control = 3
+  /// Return -1 if none of the above apply.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmValSplatVal(unsigned V) {
+    unsigned u, Vs, Imm;
+    // control = 0
+    if ((V & 0xffffff00) == 0)
+      return V;
+
+    // If the value is zeroes in the first byte, just shift those off
+    Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+    // Any passing value only has 8 bits of payload, splatted across the word
+    Imm = Vs & 0xff;
+    // Likewise, any passing values have the payload splatted into the 3rd byte
+    u = Imm | (Imm << 16);
+
+    // control = 1 or 2
+    if (Vs == u)
+      return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+    // control = 3
+    if (Vs == (u | (u << 8)))
+      return (3 << 8) | Imm;
+
+    return -1;
+  }
+
+  /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
+  /// specified value is a rotated 8-bit value. Return -1 if no rotation
+  /// encoding is possible.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmValRotateVal(unsigned V) {
+    unsigned RotAmt = CountLeadingZeros_32(V);
+    if (RotAmt >= 24)
+      return -1;
+
+    // If 'Arg' can be handled with a single shifter_op return the value.
+    if ((rotr32(0xff000000U, RotAmt) & V) == V)
+      return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+    return -1;
+  }
+
+  /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+  /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
+  /// encoding for it.  If not, return -1.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmVal(unsigned Arg) {
+    // If 'Arg' is an 8-bit splat, then get the encoded value.
+    int Splat = getT2SOImmValSplatVal(Arg);
+    if (Splat != -1)
+      return Splat;
+
+    // If 'Arg' can be handled with a single shifter_op return the value.
+    int Rot = getT2SOImmValRotateVal(Arg);
+    if (Rot != -1)
+      return Rot;
+
+    return -1;
+  }
+
+  static inline unsigned getT2SOImmValRotate(unsigned V) {
+    if ((V & ~255U) == 0) return 0;
+    // Use CTZ to compute the rotate amount.
+    unsigned RotAmt = CountTrailingZeros_32(V);
+    return (32 - RotAmt) & 31;
+  }
+
+  static inline bool isT2SOImmTwoPartVal (unsigned Imm) {
+    unsigned V = Imm;
+    // Passing values can be any combination of splat values and shifter
+    // values. If this can be handled with a single shifter or splat, bail
+    // out. Those should be handled directly, not with a two-part val.
+    if (getT2SOImmValSplatVal(V) != -1)
+      return false;
+    V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+    if (V == 0)
+      return false;
+
+    // If this can be handled as an immediate, accept.
+    if (getT2SOImmVal(V) != -1) return true;
+
+    // Likewise, try masking out a splat value first.
+    V = Imm;
+    if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
+      V &= ~0xff00ff00U;
+    else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
+      V &= ~0x00ff00ffU;
+    // If what's left can be handled as an immediate, accept.
+    if (getT2SOImmVal(V) != -1) return true;
+
+    // Otherwise, do not accept.
+    return false;
+  }
+
+  static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
+    assert (isT2SOImmTwoPartVal(Imm) &&
+            "Immedate cannot be encoded as two part immediate!");
+    // Try a shifter operand as one part
+    unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+    // If the rest is encodable as an immediate, then return it.
+    if (getT2SOImmVal(V) != -1) return V;
+
+    // Try masking out a splat value first.
+    if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
+      return Imm & 0xff00ff00U;
+
+    // The other splat is all that's left as an option.
+    assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1);
+    return Imm & 0x00ff00ffU;
+  }
+
+  static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
+    // Mask out the first hunk
+    Imm ^= getT2SOImmTwoPartFirst(Imm);
+    // Return what's left
+    assert (getT2SOImmVal(Imm) != -1 &&
+            "Unable to encode second part of T2 two part SO immediate");
+    return Imm;
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #2
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for most simple load/store instructions.
+  //
+  // addrmode2 := reg +/- reg shop imm
+  // addrmode2 := reg +/- imm12
+  //
+  // The first operand is always a Reg.  The second operand is a reg if in
+  // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+  // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
+  // fourth operand 16-17 encodes the index mode.
+  //
+  // If this addressing mode is a frame index (before prolog/epilog insertion
+  // and code rewriting), this operand will have the form:  FI#, reg0, <offs>
+  // with no shift amount for the frame offset.
+  //
+  static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
+                                   unsigned IdxMode = 0) {
+    assert(Imm12 < (1 << 12) && "Imm too large!");
+    bool isSub = Opc == sub;
+    return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
+  }
+  static inline unsigned getAM2Offset(unsigned AM2Opc) {
+    return AM2Opc & ((1 << 12)-1);
+  }
+  static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+    return ((AM2Opc >> 12) & 1) ? sub : add;
+  }
+  static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+    return (ShiftOpc)((AM2Opc >> 13) & 7);
+  }
+  static inline unsigned getAM2IdxMode(unsigned AM2Opc) {
+    return (AM2Opc >> 16);
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #3
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for sign-extending loads, and load/store-pair instructions.
+  //
+  // addrmode3 := reg +/- reg
+  // addrmode3 := reg +/- imm8
+  //
+  // The first operand is always a Reg.  The second operand is a reg if in
+  // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+  // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
+  // index mode.
+
+  /// getAM3Opc - This function encodes the addrmode3 opc field.
+  static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
+                                   unsigned IdxMode = 0) {
+    bool isSub = Opc == sub;
+    return ((int)isSub << 8) | Offset | (IdxMode << 9);
+  }
+  static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+    return AM3Opc & 0xFF;
+  }
+  static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+    return ((AM3Opc >> 8) & 1) ? sub : add;
+  }
+  static inline unsigned getAM3IdxMode(unsigned AM3Opc) {
+    return (AM3Opc >> 9);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #4
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for load / store multiple instructions.
+  //
+  // addrmode4 := reg, <mode>
+  //
+  // The four modes are:
+  //    IA - Increment after
+  //    IB - Increment before
+  //    DA - Decrement after
+  //    DB - Decrement before
+  // For VFP instructions, only the IA and DB modes are valid.
+
+  static inline AMSubMode getAM4SubMode(unsigned Mode) {
+    return (AMSubMode)(Mode & 0x7);
+  }
+
+  static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+    return (int)SubMode;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #5
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for coprocessor instructions, such as FP load/stores.
+  //
+  // addrmode5 := reg +/- imm8*4
+  //
+  // The first operand is always a Reg.  The second operand encodes the
+  // operation in bit 8 and the immediate in bits 0-7.
+
+  /// getAM5Opc - This function encodes the addrmode5 opc field.
+  static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+    bool isSub = Opc == sub;
+    return ((int)isSub << 8) | Offset;
+  }
+  static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+    return AM5Opc & 0xFF;
+  }
+  static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+    return ((AM5Opc >> 8) & 1) ? sub : add;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #6
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for NEON load / store instructions.
+  //
+  // addrmode6 := reg with optional alignment
+  //
+  // This is stored in two operands [regaddr, align].  The first is the
+  // address register.  The second operand is the value of the alignment
+  // specifier in bytes or zero if no explicit alignment.
+  // Valid alignments depend on the specific instruction.
+
+  //===--------------------------------------------------------------------===//
+  // NEON Modified Immediates
+  //===--------------------------------------------------------------------===//
+  //
+  // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+  // vector operand, where a small immediate encoded in the instruction
+  // specifies a full NEON vector value.  These modified immediates are
+  // represented here as encoded integers.  The low 8 bits hold the immediate
+  // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+  // the "Cmode" field of the instruction.  The interfaces below treat the
+  // Op and Cmode values as a single 5-bit value.
+
+  static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+    return (OpCmode << 8) | Val;
+  }
+  static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+    return (ModImm >> 8) & 0x1f;
+  }
+  static inline unsigned getNEONModImmVal(unsigned ModImm) {
+    return ModImm & 0xff;
+  }
+
+  /// decodeNEONModImm - Decode a NEON modified immediate value into the
+  /// element value and the element size in bits.  (If the element size is
+  /// smaller than the vector, it is splatted into all the elements.)
+  static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+    unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+    unsigned Imm8 = getNEONModImmVal(ModImm);
+    uint64_t Val = 0;
+
+    if (OpCmode == 0xe) {
+      // 8-bit vector elements
+      Val = Imm8;
+      EltBits = 8;
+    } else if ((OpCmode & 0xc) == 0x8) {
+      // 16-bit vector elements
+      unsigned ByteNum = (OpCmode & 0x6) >> 1;
+      Val = Imm8 << (8 * ByteNum);
+      EltBits = 16;
+    } else if ((OpCmode & 0x8) == 0) {
+      // 32-bit vector elements, zero with one byte set
+      unsigned ByteNum = (OpCmode & 0x6) >> 1;
+      Val = Imm8 << (8 * ByteNum);
+      EltBits = 32;
+    } else if ((OpCmode & 0xe) == 0xc) {
+      // 32-bit vector elements, one byte with low bits set
+      unsigned ByteNum = 1 + (OpCmode & 0x1);
+      Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+      EltBits = 32;
+    } else if (OpCmode == 0x1e) {
+      // 64-bit vector elements
+      for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+        if ((ModImm >> ByteNum) & 1)
+          Val |= (uint64_t)0xff << (8 * ByteNum);
+      }
+      EltBits = 64;
+    } else {
+      assert(false && "Unsupported NEON immediate");
+    }
+    return Val;
+  }
+
+  AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
new file mode 100644
index 000000000000..5e438a976732
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
@@ -0,0 +1,516 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  ARMELFObjectWriter(Triple::OSType OSType)
+    : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+                              /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public TargetAsmBackend {
+  bool isThumbMode;  // Currently emitting Thumb code.
+public:
+  ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+
+  unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name                      Offset (bits) Size (bits)     Flags
+{ "fixup_arm_ldst_pcrel_12", 1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10",      1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10",       0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0,            8,   MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12",  1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12",   0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch",    0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch",  0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch",   0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx",     7,            21,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp",      1,             8,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bcc",     0,             8,  MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16",     0,            20,  0 },
+{ "fixup_arm_movw_lo16",     0,            20,  0 },
+{ "fixup_t2_movt_hi16",      0,            20,  0 },
+{ "fixup_t2_movw_lo16",      0,            20,  0 },
+{ "fixup_arm_movt_hi16_pcrel", 0,          20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0,          20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0,           20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0,           20,  MCFixupKindInfo::FKF_IsPCRel },
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+    switch (Flag) {
+    default: break;
+    case MCAF_Code16:
+      setIsThumb(true);
+      break;
+    case MCAF_Code32:
+      setIsThumb(false);
+      break;
+    }
+  }
+
+  unsigned getPointerSize() const { return 4; }
+  bool isThumb() const { return isThumbMode; }
+  void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // FIXME: Thumb targets, different move constant targets..
+  return false;
+}
+
+void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
+  return;
+}
+
+bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  if (isThumb()) {
+    // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
+    // use 0x46c0 (which is a 'mov r8, r8' insn).
+    uint64_t NumNops = Count / 2;
+    for (uint64_t i = 0; i != NumNops; ++i)
+      OW->Write16(0xbf00);
+    if (Count & 1)
+      OW->Write8(0);
+    return true;
+  }
+  // ARM mode
+  uint64_t NumNops = Count / 4;
+  for (uint64_t i = 0; i != NumNops; ++i)
+    OW->Write32(0xe1a00000);
+  switch (Count % 4) {
+  default: break; // No leftover bytes to write
+  case 1: OW->Write8(0); break;
+  case 2: OW->Write16(0); break;
+  case 3: OW->Write16(0); OW->Write8(0xa0); break;
+  }
+
+  return true;
+}
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_1:
+  case FK_Data_2:
+  case FK_Data_4:
+    return Value;
+  case ARM::fixup_arm_movt_hi16:
+    Value >>= 16;
+    // Fallthrough
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_arm_movw_lo16_pcrel: {
+    unsigned Hi4 = (Value & 0xF000) >> 12;
+    unsigned Lo12 = Value & 0x0FFF;
+    assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) &&
+            "Out of range pc-relative fixup value!");
+    // inst{19-16} = Hi4;
+    // inst{11-0} = Lo12;
+    Value = (Hi4 << 16) | (Lo12);
+    return Value;
+  }
+  case ARM::fixup_t2_movt_hi16:
+    Value >>= 16;
+    // Fallthrough
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movt_hi16_pcrel:  //FIXME: Shouldn't this be shifted like
+                                       // the other hi16 fixup?
+  case ARM::fixup_t2_movw_lo16_pcrel: {
+    unsigned Hi4 = (Value & 0xF000) >> 12;
+    unsigned i = (Value & 0x800) >> 11;
+    unsigned Mid3 = (Value & 0x700) >> 8;
+    unsigned Lo8 = Value & 0x0FF;
+    // inst{19-16} = Hi4;
+    // inst{26} = i;
+    // inst{14-12} = Mid3;
+    // inst{7-0} = Lo8;
+    // The value comes in as the whole thing, not just the portion required
+    // for this fixup, so we need to mask off the bits not handled by this
+    // portion (lo vs. hi).
+    Value &= 0xffff;
+    Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+    uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+    swapped |= (Value & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_arm_ldst_pcrel_12:
+    // ARM PC-relative values are offset by 8.
+    Value -= 4;
+    // FALLTHROUGH
+  case ARM::fixup_t2_ldst_pcrel_12: {
+    // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+    Value -= 4;
+    bool isAdd = true;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      isAdd = false;
+    }
+    assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+    Value |= isAdd << 23;
+
+    // Same addressing mode as fixup_arm_pcrel_10,
+    // but with 16-bit halfwords swapped.
+    if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+      uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+      swapped |= (Value & 0x0000FFFF) << 16;
+      return swapped;
+    }
+
+    return Value;
+  }
+  case ARM::fixup_thumb_adr_pcrel_10:
+    return ((Value - 4) >> 2) & 0xff;
+  case ARM::fixup_arm_adr_pcrel_12: {
+    // ARM PC-relative values are offset by 8.
+    Value -= 8;
+    unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      opc = 2; // 0b0010
+    }
+    assert(ARM_AM::getSOImmVal(Value) != -1 &&
+           "Out of range pc-relative fixup value!");
+    // Encode the immediate and shift the opcode into place.
+    return ARM_AM::getSOImmVal(Value) | (opc << 21);
+  }
+
+  case ARM::fixup_t2_adr_pcrel_12: {
+    Value -= 4;
+    unsigned opc = 0;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      opc = 5;
+    }
+
+    uint32_t out = (opc << 21);
+    out |= (Value & 0x800) << 15;
+    out |= (Value & 0x700) << 4;
+    out |= (Value & 0x0FF);
+
+    uint64_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    // These values don't encode the low two bits since they're always zero.
+    // Offset by 8 just as above.
+    return 0xffffff & ((Value - 8) >> 2);
+  case ARM::fixup_t2_uncondbranch: {
+    Value = Value - 4;
+    Value >>= 1; // Low bit is not encoded.
+
+    uint32_t out = 0;
+    bool I =  Value & 0x800000;
+    bool J1 = Value & 0x400000;
+    bool J2 = Value & 0x200000;
+    J1 ^= I;
+    J2 ^= I;
+
+    out |= I  << 26; // S bit
+    out |= !J1 << 13; // J1 bit
+    out |= !J2 << 11; // J2 bit
+    out |= (Value & 0x1FF800)  << 5; // imm6 field
+    out |= (Value & 0x0007FF);        // imm11 field
+
+    uint64_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_t2_condbranch: {
+    Value = Value - 4;
+    Value >>= 1; // Low bit is not encoded.
+
+    uint64_t out = 0;
+    out |= (Value & 0x80000) << 7; // S bit
+    out |= (Value & 0x40000) >> 7; // J2 bit
+    out |= (Value & 0x20000) >> 4; // J1 bit
+    out |= (Value & 0x1F800) << 5; // imm6 field
+    out |= (Value & 0x007FF);      // imm11 field
+
+    uint32_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_arm_thumb_bl: {
+    // The value doesn't encode the low bit (always zero) and is offset by
+    // four. The value is encoded into disjoint bit positions in the destination
+    // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
+    //
+    //   BL:  xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
+    //
+    // Note that the halfwords are stored high first, low second; so we need
+    // to transpose the fixup value here to map properly.
+    unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0;
+    uint32_t Binary = 0;
+    Value = 0x3fffff & ((Value - 4) >> 1);
+    Binary  = (Value & 0x7ff) << 16;    // Low imm11 value.
+    Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
+    Binary |= isNeg << 10;              // Sign bit.
+    return Binary;
+  }
+  case ARM::fixup_arm_thumb_blx: {
+    // The value doesn't encode the low two bits (always zero) and is offset by
+    // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
+    // positions in the destination opcode. x = unchanged, I = immediate value
+    // bit, S = sign extension bit, 0 = zero.
+    //
+    //   BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
+    //
+    // Note that the halfwords are stored high first, low second; so we need
+    // to transpose the fixup value here to map properly.
+    unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0;
+    uint32_t Binary = 0;
+    Value = 0xfffff & ((Value - 2) >> 2);
+    Binary  = (Value & 0x3ff) << 17;    // Low imm10L value.
+    Binary |= (Value & 0xffc00) >> 10;  // High imm10H value.
+    Binary |= isNeg << 10;              // Sign bit.
+    return Binary;
+  }
+  case ARM::fixup_arm_thumb_cp:
+    // Offset by 4, and don't encode the low two bits. Two bytes of that
+    // 'off by 4' is implicitly handled by the half-word ordering of the
+    // Thumb encoding, so we only need to adjust by 2 here.
+    return ((Value - 2) >> 2) & 0xff;
+  case ARM::fixup_arm_thumb_cb: {
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    uint32_t Binary = (Value - 4) >> 1;
+    return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+  }
+  case ARM::fixup_arm_thumb_br:
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    return ((Value - 4) >> 1) & 0x7ff;
+  case ARM::fixup_arm_thumb_bcc:
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    return ((Value - 4) >> 1) & 0xff;
+  case ARM::fixup_arm_pcrel_10:
+    Value = Value - 4; // ARM fixups offset by an additional word and don't
+                       // need to adjust for the half-word ordering.
+    // Fall through.
+  case ARM::fixup_t2_pcrel_10: {
+    // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+    Value = Value - 4;
+    bool isAdd = true;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      isAdd = false;
+    }
+    // These values don't encode the low two bits since they're always zero.
+    Value >>= 2;
+    assert ((Value < 256) && "Out of range pc-relative fixup value!");
+    Value |= isAdd << 23;
+
+    // Same addressing mode as fixup_arm_pcrel_10,
+    // but with 16-bit halfwords swapped.
+    if (Kind == ARM::fixup_t2_pcrel_10) {
+      uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+      swapped |= (Value & 0x0000FFFF) << 16;
+      return swapped;
+    }
+
+    return Value;
+  }
+  }
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
+    : ARMAsmBackend(T), OSType(_OSType) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
+                              /*IsLittleEndian*/ true);
+  }
+};
+
+// FIXME: Raise this to share code between Darwin and ELF.
+void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                  unsigned DataSize, uint64_t Value) const {
+  unsigned NumBytes = 4;        // FIXME: 2 for Thumb
+  Value = adjustFixupValue(Fixup.getKind(), Value);
+  if (!Value) return;           // Doesn't change encoding.
+
+  unsigned Offset = Fixup.getOffset();
+
+  // For each byte of the fragment that the fixup touches, mask in the bits from
+  // the fixup value. The Value has been "split up" into the appropriate
+  // bitfields above.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+  const object::mach::CPUSubtypeARM Subtype;
+  DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st)
+    : ARMAsmBackend(T), Subtype(st) { }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_ARM,
+                                     Subtype);
+  }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    return false;
+  }
+};
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+
+  case FK_Data_1:
+  case ARM::fixup_arm_thumb_bcc:
+  case ARM::fixup_arm_thumb_cp:
+  case ARM::fixup_thumb_adr_pcrel_10:
+    return 1;
+
+  case FK_Data_2:
+  case ARM::fixup_arm_thumb_br:
+  case ARM::fixup_arm_thumb_cb:
+    return 2;
+
+  case ARM::fixup_arm_ldst_pcrel_12:
+  case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    return 3;
+
+  case FK_Data_4:
+  case ARM::fixup_t2_ldst_pcrel_12:
+  case ARM::fixup_t2_condbranch:
+  case ARM::fixup_t2_uncondbranch:
+  case ARM::fixup_t2_pcrel_10:
+  case ARM::fixup_t2_adr_pcrel_12:
+  case ARM::fixup_arm_thumb_bl:
+  case ARM::fixup_arm_thumb_blx:
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_arm_movw_lo16_pcrel:
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    return 4;
+  }
+}
+
+void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+  Value = adjustFixupValue(Fixup.getKind(), Value);
+  if (!Value) return;           // Doesn't change encoding.
+
+  unsigned Offset = Fixup.getOffset();
+  assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+  // For each byte of the fragment that the fixup touches, mask in the
+  // bits from the fixup value.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin()) {
+    if (TheTriple.getArchName() == "armv4t" ||
+        TheTriple.getArchName() == "thumbv4t")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T);
+    else if (TheTriple.getArchName() == "armv5e" ||
+        TheTriple.getArchName() == "thumbv5e")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ);
+    else if (TheTriple.getArchName() == "armv6" ||
+        TheTriple.getArchName() == "thumbv6")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
+    return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
+  }
+
+  if (TheTriple.isOSWindows())
+    assert(0 && "Windows not supported on ARM");
+
+  return new ELFARMAsmBackend(T, Triple(TT).getOS());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 000000000000..dbc3ee41f3da
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1834 @@
+//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMAddressingModes.h"
+#include "ARMBuildAttrs.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+  // Per section and per symbol attributes are not supported.
+  // To implement them we would need the ability to delay this emission
+  // until the assembly file is fully parsed/generated as only then do we
+  // know the symbol and section numbers.
+  class AttributeEmitter {
+  public:
+    virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+    virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+    virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+    virtual void Finish() = 0;
+    virtual ~AttributeEmitter() {}
+  };
+
+  class AsmAttributeEmitter : public AttributeEmitter {
+    MCStreamer &Streamer;
+
+  public:
+    AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+    void MaybeSwitchVendor(StringRef Vendor) { }
+
+    void EmitAttribute(unsigned Attribute, unsigned Value) {
+      Streamer.EmitRawText("\t.eabi_attribute " +
+                           Twine(Attribute) + ", " + Twine(Value));
+    }
+
+    void EmitTextAttribute(unsigned Attribute, StringRef String) {
+      switch (Attribute) {
+      case ARMBuildAttrs::CPU_name:
+        Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+        break;
+      /* GAS requires .fpu to be emitted regardless of EABI attribute */
+      case ARMBuildAttrs::Advanced_SIMD_arch:
+      case ARMBuildAttrs::VFP_arch:
+        Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+        break;    
+      default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
+      }
+    }
+    void Finish() { }
+  };
+
+  class ObjectAttributeEmitter : public AttributeEmitter {
+    MCObjectStreamer &Streamer;
+    StringRef CurrentVendor;
+    SmallString<64> Contents;
+
+  public:
+    ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+      Streamer(Streamer_), CurrentVendor("") { }
+
+    void MaybeSwitchVendor(StringRef Vendor) {
+      assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+      if (CurrentVendor.empty())
+        CurrentVendor = Vendor;
+      else if (CurrentVendor == Vendor)
+        return;
+      else
+        Finish();
+
+      CurrentVendor = Vendor;
+
+      assert(Contents.size() == 0);
+    }
+
+    void EmitAttribute(unsigned Attribute, unsigned Value) {
+      // FIXME: should be ULEB
+      Contents += Attribute;
+      Contents += Value;
+    }
+
+    void EmitTextAttribute(unsigned Attribute, StringRef String) {
+      Contents += Attribute;
+      Contents += UppercaseString(String);
+      Contents += 0;
+    }
+
+    void Finish() {
+      const size_t ContentsSize = Contents.size();
+
+      // Vendor size + Vendor name + '\0'
+      const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+      // Tag + Tag Size
+      const size_t TagHeaderSize = 1 + 4;
+
+      Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+      Streamer.EmitBytes(CurrentVendor, 0);
+      Streamer.EmitIntValue(0, 1); // '\0'
+
+      Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+      Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+      Streamer.EmitBytes(Contents, 0);
+
+      Contents.clear();
+    }
+  };
+
+} // end of anonymous namespace
+
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+  MachineLocation Location;
+  assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+  // Frame address.  Currently handles register +- offset only.
+  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+    Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+  else {
+    DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+  }
+  return Location;
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+    AsmPrinter::EmitDwarfRegOp(MLoc);
+  else {
+    unsigned Reg = MLoc.getReg();
+    if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+      assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+      // S registers are described as bit-pieces of a register
+      // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+      // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+      
+      unsigned SReg = Reg - ARM::S0;
+      bool odd = SReg & 0x1;
+      unsigned Rx = 256 + (SReg >> 1);
+
+      OutStreamer.AddComment("DW_OP_regx for S register");
+      EmitInt8(dwarf::DW_OP_regx);
+
+      OutStreamer.AddComment(Twine(SReg));
+      EmitULEB128(Rx);
+
+      if (odd) {
+        OutStreamer.AddComment("DW_OP_bit_piece 32 32");
+        EmitInt8(dwarf::DW_OP_bit_piece);
+        EmitULEB128(32);
+        EmitULEB128(32);
+      } else {
+        OutStreamer.AddComment("DW_OP_bit_piece 32 0");
+        EmitInt8(dwarf::DW_OP_bit_piece);
+        EmitULEB128(32);
+        EmitULEB128(0);
+      }
+    } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+      assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+      // Q registers Q0-Q15 are described by composing two D registers together.
+      // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+
+      unsigned QReg = Reg - ARM::Q0;
+      unsigned D1 = 256 + 2 * QReg;
+      unsigned D2 = D1 + 1;
+      
+      OutStreamer.AddComment("DW_OP_regx for Q register: D1");
+      EmitInt8(dwarf::DW_OP_regx);
+      EmitULEB128(D1);
+      OutStreamer.AddComment("DW_OP_piece 8");
+      EmitInt8(dwarf::DW_OP_piece);
+      EmitULEB128(8);
+
+      OutStreamer.AddComment("DW_OP_regx for Q register: D2");
+      EmitInt8(dwarf::DW_OP_regx);
+      EmitULEB128(D2);
+      OutStreamer.AddComment("DW_OP_piece 8");
+      EmitInt8(dwarf::DW_OP_piece);
+      EmitULEB128(8);
+    }
+  }
+}
+
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
+  if (AFI->isThumbFunction()) {
+    OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+    OutStreamer.EmitThumbFunc(CurrentFnSym);
+  }
+
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// runOnMachineFunction - This uses the EmitInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  AFI = MF.getInfo<ARMFunctionInfo>();
+  MCP = MF.getConstantPool();
+
+  return AsmPrinter::runOnMachineFunction(MF);
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                 raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  unsigned TF = MO.getTargetFlags();
+
+  switch (MO.getType()) {
+  default:
+    assert(0 && "<unknown operand type>");
+  case MachineOperand::MO_Register: {
+    unsigned Reg = MO.getReg();
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    O << ARMInstPrinter::getRegisterName(Reg);
+    break;
+  }
+  case MachineOperand::MO_Immediate: {
+    int64_t Imm = MO.getImm();
+    O << '#';
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF == ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF == ARMII::MO_HI16))
+      O << ":upper16:";
+    O << Imm;
+    break;
+  }
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF & ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF & ARMII::MO_HI16))
+      O << ":upper16:";
+    O << *Mang->getSymbol(GV);
+
+    printOffset(MO.getOffset(), O);
+    if (TF == ARMII::MO_PLT)
+      O << "(PLT)";
+    break;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    if (TF == ARMII::MO_PLT)
+      O << "(PLT)";
+    break;
+  }
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << *GetCPISymbol(MO.getIndex());
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << *GetJTISymbol(MO.getIndex());
+    break;
+  }
+}
+
+//===--------------------------------------------------------------------===//
+
+MCSymbol *ARMAsmPrinter::
+GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+                            const MachineBasicBlock *MBB) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+    << getFunctionNumber() << '_' << uid << '_' << uid2
+    << "_set_" << MBB->getNumber();
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
+    << getFunctionNumber() << '_' << uid << '_' << uid2;
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+    << getFunctionNumber();
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                                    unsigned AsmVariant, const char *ExtraCode,
+                                    raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'a': // Print as a memory address.
+      if (MI->getOperand(OpNum).isReg()) {
+        O << "["
+          << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+          << "]";
+        return false;
+      }
+      // Fallthrough
+    case 'c': // Don't print "#" before an immediate operand.
+      if (!MI->getOperand(OpNum).isImm())
+        return true;
+      O << MI->getOperand(OpNum).getImm();
+      return false;
+    case 'P': // Print a VFP double precision register.
+    case 'q': // Print a NEON quad precision register.
+      printOperand(MI, OpNum, O);
+      return false;
+    case 'y': // Print a VFP single precision register as indexed double.
+      // This uses the ordering of the alias table to get the first 'd' register
+      // that overlaps the 's' register. Also, s0 is an odd register, hence the
+      // odd modulus check below.
+      if (MI->getOperand(OpNum).isReg()) {
+        unsigned Reg = MI->getOperand(OpNum).getReg();
+        const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+        O << ARMInstPrinter::getRegisterName(TRI->getAliasSet(Reg)[0]) <<
+        (((Reg % 2) == 1) ? "[0]" : "[1]");
+        return false;
+      }
+      return true;
+    case 'B': // Bitwise inverse of integer or symbol without a preceding #.
+      if (!MI->getOperand(OpNum).isImm())
+        return true;
+      O << ~(MI->getOperand(OpNum).getImm());
+      return false;
+    case 'L': // The low 16 bits of an immediate constant.
+      if (!MI->getOperand(OpNum).isImm())
+        return true;
+      O << (MI->getOperand(OpNum).getImm() & 0xffff);
+      return false;
+    case 'M': { // A register range suitable for LDM/STM.
+      if (!MI->getOperand(OpNum).isReg())
+        return true;
+      const MachineOperand &MO = MI->getOperand(OpNum);
+      unsigned RegBegin = MO.getReg();
+      // This takes advantage of the 2 operand-ness of ldm/stm and that we've
+      // already got the operands in registers that are operands to the
+      // inline asm statement.
+      
+      O << "{" << ARMInstPrinter::getRegisterName(RegBegin);
+      
+      // FIXME: The register allocator not only may not have given us the
+      // registers in sequence, but may not be in ascending registers. This
+      // will require changes in the register allocator that'll need to be
+      // propagated down here if the operands change.
+      unsigned RegOps = OpNum + 1;
+      while (MI->getOperand(RegOps).isReg()) {
+        O << ", " 
+          << ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg());
+        RegOps++;
+      }
+
+      O << "}";
+
+      return false;
+    }
+    // These modifiers are not yet supported.
+    case 'p': // The high single-precision register of a VFP double-precision
+              // register.
+    case 'e': // The low doubleword register of a NEON quad register.
+    case 'f': // The high doubleword register of a NEON quad register.
+    case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
+    case 'Q': // The least significant register of a pair.
+    case 'R': // The most significant register of a pair.
+    case 'H': // The highest-numbered register of a pair.
+      return true;
+    }
+  }
+
+  printOperand(MI, OpNum, O);
+  return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNum, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+    
+    switch (ExtraCode[0]) {
+      case 'A': // A memory operand for a VLD1/VST1 instruction.
+      default: return true;  // Unknown modifier.
+      case 'm': // The base register of a memory operand.
+        if (!MI->getOperand(OpNum).isReg())
+          return true;
+        O << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg());
+        return false;
+    }
+  }
+  
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline asm memory operand");
+  O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
+  return false;
+}
+
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (Subtarget->isTargetDarwin()) {
+    Reloc::Model RelocM = TM.getRelocationModel();
+    if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
+      // Declare all the text sections up front (before the DWARF sections
+      // emitted by AsmPrinter::doInitialization) so the assembler will keep
+      // them together at the beginning of the object file.  This helps
+      // avoid out-of-range branches that are due a fundamental limitation of
+      // the way symbol offsets are encoded with the current Darwin ARM
+      // relocations.
+      const TargetLoweringObjectFileMachO &TLOFMacho =
+        static_cast<const TargetLoweringObjectFileMachO &>(
+          getObjFileLowering());
+      OutStreamer.SwitchSection(TLOFMacho.getTextSection());
+      OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+      OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+      if (RelocM == Reloc::DynamicNoPIC) {
+        const MCSection *sect =
+          OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     12, SectionKind::getText());
+        OutStreamer.SwitchSection(sect);
+      } else {
+        const MCSection *sect =
+          OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     16, SectionKind::getText());
+        OutStreamer.SwitchSection(sect);
+      }
+      const MCSection *StaticInitSect =
+        OutContext.getMachOSection("__TEXT", "__StaticInit",
+                                   MCSectionMachO::S_REGULAR |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
+      OutStreamer.SwitchSection(StaticInitSect);
+    }
+  }
+
+  // Use unified assembler syntax.
+  OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
+
+  // Emit ARM Build Attributes
+  if (Subtarget->isTargetELF()) {
+
+    emitAttributes();
+  }
+}
+
+
+void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetDarwin()) {
+    // All darwin targets use mach-o.
+    const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+    MachineModuleInfoMachO &MMIMacho =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    // Output non-lazy-pointers for external and common global variables.
+    MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+    if (!Stubs.empty()) {
+      // Switch with ".non_lazy_symbol_pointer" directive.
+      OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+      EmitAlignment(2);
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$stub:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        //   .indirect_symbol _foo
+        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+        OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
+
+        if (MCSym.getInt())
+          // External to current translation unit.
+          OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+        else
+          // Internal to current translation unit.
+          //
+          // When we place the LSDA into the TEXT section, the type info
+          // pointers need to be indirect and pc-rel. We accomplish this by
+          // using NLPs; however, sometimes the types are local to the file.
+          // We need to fill in the value for the NLP in those cases.
+          OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                        OutContext),
+                                4/*size*/, 0/*addrspace*/);
+      }
+
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    Stubs = MMIMacho.GetHiddenGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+      EmitAlignment(2);
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$stub:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        //   .long _foo
+        OutStreamer.EmitValue(MCSymbolRefExpr::
+                              Create(Stubs[i].second.getPointer(),
+                                     OutContext),
+                              4/*size*/, 0/*addrspace*/);
+      }
+
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    // Funny Darwin hack: This flag tells the linker that no global symbols
+    // contain code that falls through to other global symbols (e.g. the obvious
+    // implementation of multiple entry points).  If this doesn't occur, the
+    // linker can safely perform dead code stripping.  Since LLVM never
+    // generates code that does this, it is always safe to set.
+    OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+  emitARMAttributeSection();
+
+  /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
+  bool emitFPU = false;
+  AttributeEmitter *AttrEmitter;
+  if (OutStreamer.hasRawTextSupport()) {
+    AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+    emitFPU = true;
+  } else {
+    MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+    AttrEmitter = new ObjectAttributeEmitter(O);
+  }
+
+  AttrEmitter->MaybeSwitchVendor("aeabi");
+
+  std::string CPUString = Subtarget->getCPUString();
+
+  if (CPUString == "cortex-a8" ||
+      Subtarget->isCortexA8()) {
+    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+                               ARMBuildAttrs::ApplicationProfile);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+                               ARMBuildAttrs::AllowThumb32);
+    // Fixme: figure out when this is emitted.
+    //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+    //                           ARMBuildAttrs::AllowWMMXv1);
+    //
+
+    /// ADD additional Else-cases here!
+  } else if (CPUString == "xscale") {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+                               ARMBuildAttrs::Allowed);
+  } else if (CPUString == "generic") {
+    // FIXME: Why these defaults?
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  if (Subtarget->hasNEON() && emitFPU) {
+    /* NEON is not exactly a VFP architecture, but GAS emit one of
+     * neon/vfpv3/vfpv2 for .fpu parameters */
+    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+    /* If emitted for NEON, omit from VFP below, since you can have both
+     * NEON and VFP in build attributes but only one .fpu */
+    emitFPU = false;
+  }
+
+  /* VFPv3 + .fpu */
+  if (Subtarget->hasVFP3()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+                               ARMBuildAttrs::AllowFPv3A);
+    if (emitFPU)
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
+
+  /* VFPv2 + .fpu */
+  } else if (Subtarget->hasVFP2()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+                               ARMBuildAttrs::AllowFPv2);
+    if (emitFPU)
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+  }
+
+  /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
+   * since NEON can have 1 (allowed) or 2 (MAC operations) */
+  if (Subtarget->hasNEON()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  // Signal various FP modes.
+  if (!UnsafeFPMath) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  if (NoInfsFPMath && NoNaNsFPMath)
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+                               ARMBuildAttrs::Allowed);
+  else
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+                               ARMBuildAttrs::AllowIEE754);
+
+  // FIXME: add more flags to ARMBuildAttrs.h
+  // 8-bytes alignment stuff.
+  AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+  AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+  // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
+  if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+  }
+  // FIXME: Should we signal R9 usage?
+
+  if (Subtarget->hasDivide())
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+  AttrEmitter->Finish();
+  delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+  // <format-version>
+  // [ <section-length> "vendor-name"
+  // [ <file-tag> <size> <attribute>*
+  //   | <section-tag> <size> <section-number>* 0 <attribute>*
+  //   | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+  //   ]+
+  // ]*
+
+  if (OutStreamer.hasRawTextSupport())
+    return;
+
+  const ARMElfTargetObjectFile &TLOFELF =
+    static_cast<const ARMElfTargetObjectFile &>
+    (getObjFileLowering());
+
+  OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+  // Format version
+  OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+                             unsigned LabelId, MCContext &Ctx) {
+
+  MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+                       + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+  return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+  switch (Modifier) {
+  default: llvm_unreachable("Unknown modifier!");
+  case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+  case ARMCP::TLSGD:       return MCSymbolRefExpr::VK_ARM_TLSGD;
+  case ARMCP::TPOFF:       return MCSymbolRefExpr::VK_ARM_TPOFF;
+  case ARMCP::GOTTPOFF:    return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+  case ARMCP::GOT:         return MCSymbolRefExpr::VK_ARM_GOT;
+  case ARMCP::GOTOFF:      return MCSymbolRefExpr::VK_ARM_GOTOFF;
+  }
+  return MCSymbolRefExpr::VK_None;
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+  bool isIndirect = Subtarget->isTargetDarwin() &&
+    Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+  if (!isIndirect)
+    return Mang->getSymbol(GV);
+
+  // FIXME: Remove this when Darwin transition to @GOT like syntax.
+  MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+  MachineModuleInfoMachO &MMIMachO =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  MachineModuleInfoImpl::StubValueTy &StubSym =
+    GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+    MMIMachO.getGVStubEntry(MCSym);
+  if (StubSym.getPointer() == 0)
+    StubSym = MachineModuleInfoImpl::
+      StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+  return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+
+  ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+  MCSymbol *MCSym;
+  if (ACPV->isLSDA()) {
+    SmallString<128> Str;
+    raw_svector_ostream OS(Str);
+    OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+    MCSym = OutContext.GetOrCreateSymbol(OS.str());
+  } else if (ACPV->isBlockAddress()) {
+    MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+  } else if (ACPV->isGlobalValue()) {
+    const GlobalValue *GV = ACPV->getGV();
+    MCSym = GetARMGVSymbol(GV);
+  } else {
+    assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+    MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+  }
+
+  // Create an MCSymbol for the reference.
+  const MCExpr *Expr =
+    MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+                            OutContext);
+
+  if (ACPV->getPCAdjustment()) {
+    MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                    getFunctionNumber(),
+                                    ACPV->getLabelId(),
+                                    OutContext);
+    const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+    PCRelExpr =
+      MCBinaryExpr::CreateAdd(PCRelExpr,
+                              MCConstantExpr::Create(ACPV->getPCAdjustment(),
+                                                     OutContext),
+                              OutContext);
+    if (ACPV->mustAddCurrentAddress()) {
+      // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+      // label, so just emit a local label end reference that instead.
+      MCSymbol *DotSym = OutContext.CreateTempSymbol();
+      OutStreamer.EmitLabel(DotSym);
+      const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+      PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+    }
+    Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+  }
+  OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+  unsigned Opcode = MI->getOpcode();
+  int OpNum = 1;
+  if (Opcode == ARM::BR_JTadd)
+    OpNum = 2;
+  else if (Opcode == ARM::BR_JTm)
+    OpNum = 3;
+
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+
+  // Emit a label for the jump table.
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
+
+  // Emit each entry of the table.
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    // Construct an MCExpr for the entry. We want a value of the form:
+    // (BasicBlockAddr - TableBeginAddr)
+    //
+    // For example, a table with entries jumping to basic blocks BB0 and BB1
+    // would look like:
+    // LJTI_0_0:
+    //    .word (LBB0 - LJTI_0_0)
+    //    .word (LBB1 - LJTI_0_0)
+    const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+                                                                   OutContext),
+                                     OutContext);
+    OutStreamer.EmitValue(Expr, 4);
+  }
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+  unsigned Opcode = MI->getOpcode();
+  int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+
+  // Emit a label for the jump table.
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
+
+  // Emit each entry of the table.
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  unsigned OffsetWidth = 4;
+  if (MI->getOpcode() == ARM::t2TBB_JT)
+    OffsetWidth = 1;
+  else if (MI->getOpcode() == ARM::t2TBH_JT)
+    OffsetWidth = 2;
+
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+                                                      OutContext);
+    // If this isn't a TBB or TBH, the entries are direct branch instructions.
+    if (OffsetWidth == 4) {
+      MCInst BrInst;
+      BrInst.setOpcode(ARM::t2B);
+      BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+      OutStreamer.EmitInstruction(BrInst);
+      continue;
+    }
+    // Otherwise it's an offset from the dispatch instruction. Construct an
+    // MCExpr for the entry. We want a value of the form:
+    // (BasicBlockAddr - TableBeginAddr) / 2
+    //
+    // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+    // would look like:
+    // LJTI_0_0:
+    //    .byte (LBB0 - LJTI_0_0) / 2
+    //    .byte (LBB1 - LJTI_0_0) / 2
+    const MCExpr *Expr =
+      MCBinaryExpr::CreateSub(MBBSymbolExpr,
+                              MCSymbolRefExpr::Create(JTISymbol, OutContext),
+                              OutContext);
+    Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+                                   OutContext);
+    OutStreamer.EmitValue(Expr, OffsetWidth);
+  }
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &OS) {
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==4);
+  OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  OS << V.getName();
+  OS << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+  OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+  OS << ']';
+  OS << "+";
+  printOperand(MI, NOps-2, OS);
+}
+
+static void populateADROperands(MCInst &Inst, unsigned Dest,
+                                const MCSymbol *Label,
+                                unsigned pred, unsigned ccreg,
+                                MCContext &Ctx) {
+  const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
+  Inst.addOperand(MCOperand::CreateReg(Dest));
+  Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+  // Add predicate operands.
+  Inst.addOperand(MCOperand::CreateImm(pred));
+  Inst.addOperand(MCOperand::CreateReg(ccreg));
+}
+
+void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
+                                           unsigned Opcode) {
+  MCInst TmpInst;
+
+  // Emit the instruction as usual, just patch the opcode.
+  LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+  TmpInst.setOpcode(Opcode);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+  assert(MI->getFlag(MachineInstr::FrameSetup) &&
+      "Only instruction which are involved into frame setup code are allowed");
+
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned Opc = MI->getOpcode();
+  unsigned SrcReg, DstReg;
+
+  if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+    // Two special cases:
+    // 1) tPUSH does not have src/dst regs.
+    // 2) for Thumb1 code we sometimes materialize the constant via constpool
+    // load. Yes, this is pretty fragile, but for now I don't see better
+    // way... :(
+    SrcReg = DstReg = ARM::SP;
+  } else {
+    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI->getOperand(0).getReg();
+  }
+
+  // Try to figure out the unwinding opcode out of src / dst regs.
+  if (MI->getDesc().mayStore()) {
+    // Register saves.
+    assert(DstReg == ARM::SP &&
+           "Only stack pointer as a destination reg is supported");
+
+    SmallVector<unsigned, 4> RegList;
+    // Skip src & dst reg, and pred ops.
+    unsigned StartOp = 2 + 2;
+    // Use all the operands.
+    unsigned NumOffset = 0;
+
+    switch (Opc) {
+    default:
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    case ARM::tPUSH:
+      // Special case here: no src & dst reg, but two extra imp ops.
+      StartOp = 2; NumOffset = 2;
+    case ARM::STMDB_UPD:
+    case ARM::t2STMDB_UPD:
+    case ARM::VSTMDDB_UPD:
+      assert(SrcReg == ARM::SP &&
+             "Only stack pointer as a source reg is supported");
+      for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+           i != NumOps; ++i)
+        RegList.push_back(MI->getOperand(i).getReg());
+      break;
+    case ARM::STR_PRE:
+      assert(MI->getOperand(2).getReg() == ARM::SP &&
+             "Only stack pointer as a source reg is supported");
+      RegList.push_back(SrcReg);
+      break;
+    }
+    OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+  } else {
+    // Changes of stack / frame pointer.
+    if (SrcReg == ARM::SP) {
+      int64_t Offset = 0;
+      switch (Opc) {
+      default:
+        MI->dump();
+        assert(0 && "Unsupported opcode for unwinding information");
+      case ARM::MOVr:
+        Offset = 0;
+        break;
+      case ARM::ADDri:
+        Offset = -MI->getOperand(2).getImm();
+        break;
+      case ARM::SUBri:
+        Offset = MI->getOperand(2).getImm();
+        break;
+      case ARM::tSUBspi:
+        Offset = MI->getOperand(2).getImm()*4;
+        break;
+      case ARM::tADDspi:
+      case ARM::tADDrSPi:
+        Offset = -MI->getOperand(2).getImm()*4;
+        break;
+      case ARM::tLDRpci: {
+        // Grab the constpool index and check, whether it corresponds to
+        // original or cloned constpool entry.
+        unsigned CPI = MI->getOperand(1).getIndex();
+        const MachineConstantPool *MCP = MF.getConstantPool();
+        if (CPI >= MCP->getConstants().size())
+          CPI = AFI.getOriginalCPIdx(CPI);
+        assert(CPI != -1U && "Invalid constpool index");
+
+        // Derive the actual offset.
+        const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+        assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+        // FIXME: Check for user, it should be "add" instruction!
+        Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+        break;
+      }
+      }
+
+      if (DstReg == FramePtr && FramePtr != ARM::SP)
+        // Set-up of the frame pointer. Positive values correspond to "add"
+        // instruction.
+        OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+      else if (DstReg == ARM::SP) {
+        // Change of SP by an offset. Positive values correspond to "sub"
+        // instruction.
+        OutStreamer.EmitPad(Offset);
+      } else {
+        MI->dump();
+        assert(0 && "Unsupported opcode for unwinding information");
+      }
+    } else if (DstReg == ARM::SP) {
+      // FIXME: .movsp goes here
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    }
+    else {
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    }
+  }
+}
+
+extern cl::opt<bool> EnableARMEHABI;
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "ARMGenMCPseudoLowering.inc"
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  // Do any auto-generated pseudo lowerings.
+  if (emitPseudoExpansionLowering(OutStreamer, MI))
+    return;
+
+  // Check for manual lowerings.
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+  case ARM::DBG_VALUE: {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      SmallString<128> TmpStr;
+      raw_svector_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+  }
+  case ARM::LEApcrel:
+  case ARM::tLEApcrel:
+  case ARM::t2LEApcrel: {
+    // FIXME: Need to also handle globals and externals
+    MCInst TmpInst;
+    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
+                      : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+                         : ARM::ADR));
+    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+                        GetCPISymbol(MI->getOperand(1).getIndex()),
+                        MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
+                        OutContext);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::LEApcrelJT:
+  case ARM::tLEApcrelJT:
+  case ARM::t2LEApcrelJT: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
+                      : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+                         : ARM::ADR));
+    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+                      GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+                                                  MI->getOperand(2).getImm()),
+                      MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
+                      OutContext);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  // Darwin call instructions are just normal call instructions with different
+  // clobber semantics (they clobber R9).
+  case ARM::BXr9_CALL:
+  case ARM::BX_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::BX);
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::tBXr9_CALL:
+  case ARM::tBX_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tBX);
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::BMOVPCRXr9_CALL:
+  case ARM::BMOVPCRX_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::MOVi16_ga_pcrel:
+  case ARM::t2MOVi16_ga_pcrel: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+    unsigned TF = MI->getOperand(1).getTargetFlags();
+    bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+    const GlobalValue *GV = MI->getOperand(1).getGlobal();
+    MCSymbol *GVSym = GetARMGVSymbol(GV);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    if (isPIC) {
+      MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(2).getImm(), OutContext);
+      const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+      unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+      const MCExpr *PCRelExpr =
+        ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+                                  MCBinaryExpr::CreateAdd(LabelSymExpr,
+                                      MCConstantExpr::Create(PCAdj, OutContext),
+                                          OutContext), OutContext), OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+    } else {
+      const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+    }
+
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::MOVTi16_ga_pcrel:
+  case ARM::t2MOVTi16_ga_pcrel: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+                      ? ARM::MOVTi16 : ARM::t2MOVTi16);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+    unsigned TF = MI->getOperand(2).getTargetFlags();
+    bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+    const GlobalValue *GV = MI->getOperand(2).getGlobal();
+    MCSymbol *GVSym = GetARMGVSymbol(GV);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    if (isPIC) {
+      MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(3).getImm(), OutContext);
+      const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+      unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+      const MCExpr *PCRelExpr =
+        ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+                                   MCBinaryExpr::CreateAdd(LabelSymExpr,
+                                      MCConstantExpr::Create(PCAdj, OutContext),
+                                          OutContext), OutContext), OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+    } else {
+      const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+    }
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::tPICADD: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     add r0, pc
+    // This adds the address of LPC0 to r0.
+
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the add.
+    MCInst AddInst;
+    AddInst.setOpcode(ARM::tADDhirr);
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    // Add predicate operands.
+    AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    AddInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(AddInst);
+    return;
+  }
+  case ARM::PICADD: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     add r0, pc, r0
+    // This adds the address of LPC0 to r0.
+
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the add.
+    MCInst AddInst;
+    AddInst.setOpcode(ARM::ADDrr);
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    // Add predicate operands.
+    AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+    // Add 's' bit operand (always reg0 for this)
+    AddInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(AddInst);
+    return;
+  }
+  case ARM::PICSTR:
+  case ARM::PICSTRB:
+  case ARM::PICSTRH:
+  case ARM::PICLDR:
+  case ARM::PICLDRB:
+  case ARM::PICLDRH:
+  case ARM::PICLDRSB:
+  case ARM::PICLDRSH: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     OP r0, [pc, r0]
+    // The LCP0 label is referenced by a constant pool entry in order to get
+    // a PC-relative address at the ldr instruction.
+
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the load
+    unsigned Opcode;
+    switch (MI->getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected opcode!");
+    case ARM::PICSTR:   Opcode = ARM::STRrs; break;
+    case ARM::PICSTRB:  Opcode = ARM::STRBrs; break;
+    case ARM::PICSTRH:  Opcode = ARM::STRH; break;
+    case ARM::PICLDR:   Opcode = ARM::LDRrs; break;
+    case ARM::PICLDRB:  Opcode = ARM::LDRBrs; break;
+    case ARM::PICLDRH:  Opcode = ARM::LDRH; break;
+    case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+    case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+    }
+    MCInst LdStInst;
+    LdStInst.setOpcode(Opcode);
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    LdStInst.addOperand(MCOperand::CreateImm(0));
+    // Add predicate operands.
+    LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+    OutStreamer.EmitInstruction(LdStInst);
+
+    return;
+  }
+  case ARM::CONSTPOOL_ENTRY: {
+    /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
+    /// in the function.  The first operand is the ID# for this instruction, the
+    /// second is the index into the MachineConstantPool that this is, the third
+    /// is the size in bytes of this constant pool entry.
+    unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+    unsigned CPIdx   = (unsigned)MI->getOperand(1).getIndex();
+
+    EmitAlignment(2);
+    OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+    if (MCPE.isMachineConstantPoolEntry())
+      EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+    else
+      EmitGlobalConstant(MCPE.Val.ConstVal);
+
+    return;
+  }
+  case ARM::t2BR_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::tMOVr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    return;
+  }
+  case ARM::t2TBB_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+
+    TmpInst.setOpcode(ARM::t2TBB);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    // Make sure the next instruction is 2-byte aligned.
+    EmitAlignment(1);
+    return;
+  }
+  case ARM::t2TBH_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+
+    TmpInst.setOpcode(ARM::t2TBH);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    return;
+  }
+  case ARM::tBR_JTr:
+  case ARM::BR_JTr: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // mov pc, target
+    MCInst TmpInst;
+    unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+      ARM::MOVr : ARM::tMOVr;
+    TmpInst.setOpcode(Opc);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    if (Opc == ARM::MOVr)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Make sure the Thumb jump table is 4-byte aligned.
+    if (Opc == ARM::tMOVr)
+      EmitAlignment(2);
+
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::BR_JTm: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // ldr pc, target
+    MCInst TmpInst;
+    if (MI->getOperand(1).getReg() == 0) {
+      // literal offset
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+    } else {
+      TmpInst.setOpcode(ARM::LDRrs);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+    }
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::BR_JTadd: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // add pc, target, idx
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::ADDrr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::TRAP: {
+    // Non-Darwin binutils don't yet support the "trap" mnemonic.
+    // FIXME: Remove this special case when they do.
+    if (!Subtarget->isTargetDarwin()) {
+      //.long 0xe7ffdefe @ trap
+      uint32_t Val = 0xe7ffdefeUL;
+      OutStreamer.AddComment("trap");
+      OutStreamer.EmitIntValue(Val, 4);
+      return;
+    }
+    break;
+  }
+  case ARM::tTRAP: {
+    // Non-Darwin binutils don't yet support the "trap" mnemonic.
+    // FIXME: Remove this special case when they do.
+    if (!Subtarget->isTargetDarwin()) {
+      //.short 57086 @ trap
+      uint16_t Val = 0xdefe;
+      OutStreamer.AddComment("trap");
+      OutStreamer.EmitIntValue(Val, 2);
+      return;
+    }
+    break;
+  }
+  case ARM::t2Int_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp_nofp:
+  case ARM::tInt_eh_sjlj_setjmp: {
+    // Two incoming args: GPR:$src, GPR:$val
+    // mov $val, pc
+    // adds $val, #7
+    // str $val, [$src, #4]
+    // movs r0, #0
+    // b 1f
+    // movs r0, #1
+    // 1:
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ValReg = MI->getOperand(1).getReg();
+    MCSymbol *Label = GetARMSJLJEHLabel();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp begin");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tADDi3);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      // 's' bit operand
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateImm(7));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tSTRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      // The offset immediate is #4. The operand value is scaled by 4 for the
+      // tSTR instruction.
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVi8);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tB);
+      TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVi8);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp end");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    OutStreamer.EmitLabel(Label);
+    return;
+  }
+
+  case ARM::Int_eh_sjlj_setjmp_nofp:
+  case ARM::Int_eh_sjlj_setjmp: {
+    // Two incoming args: GPR:$src, GPR:$val
+    // add $val, pc, #8
+    // str $val, [$src, #+4]
+    // mov r0, #0
+    // add pc, pc, #0
+    // mov r0, #1
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ValReg = MI->getOperand(1).getReg();
+
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::ADDri);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateImm(8));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp begin");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::STRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(4));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVi);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::ADDri);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVi);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp end");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::Int_eh_sjlj_longjmp: {
+    // ldr sp, [$src, #8]
+    // ldr $scratch, [$src, #4]
+    // ldr r7, [$src]
+    // bx $scratch
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ScratchReg = MI->getOperand(1).getReg();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(8));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(4));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::BX);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::tInt_eh_sjlj_longjmp: {
+    // ldr $scratch, [$src, #8]
+    // mov sp, $scratch
+    // ldr $scratch, [$src, #4]
+    // ldr r7, [$src]
+    // bx $scratch
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ScratchReg = MI->getOperand(1).getReg();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      // The offset immediate is #8. The operand value is scaled by 4 for the
+      // tLDR instruction.
+      TmpInst.addOperand(MCOperand::CreateImm(2));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tBX);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  }
+
+  MCInst TmpInst;
+  LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+  // Emit unwinding stuff for frame-related instructions
+  if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+    EmitUnwindingInstruction(MI);
+
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new ARMInstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() {
+  RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
+  RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
+
+  TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 000000000000..7741fc4b34e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,127 @@
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ARM Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+namespace ARM {
+  enum DW_ISA {
+    DW_ISA_ARM_thumb = 1,
+    DW_ISA_ARM_arm = 2
+  };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when printing asm code for different targets.
+  const ARMSubtarget *Subtarget;
+
+  /// AFI - Keep a pointer to ARMFunctionInfo for the current
+  /// MachineFunction.
+  ARMFunctionInfo *AFI;
+
+  /// MCP - Keep a pointer to constantpool entries of the current
+  /// MachineFunction.
+  const MachineConstantPool *MCP;
+
+public:
+  explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+      Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    }
+
+  virtual const char *getPassName() const {
+    return "ARM Assembly Printer";
+  }
+
+  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                    const char *Modifier = 0);
+
+  virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+  virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                                     unsigned AsmVariant,
+                                     const char *ExtraCode, raw_ostream &O);
+
+  void EmitJumpTable(const MachineInstr *MI);
+  void EmitJump2Table(const MachineInstr *MI);
+  virtual void EmitInstruction(const MachineInstr *MI);
+  bool runOnMachineFunction(MachineFunction &F);
+
+  virtual void EmitConstantPool() {} // we emit constant pools customly!
+  virtual void EmitFunctionEntryLabel();
+  void EmitStartOfAsmFile(Module &M);
+  void EmitEndOfAsmFile(Module &M);
+
+  // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
+private:
+  // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+  void emitAttributes();
+
+  // Helper for ELF .o only
+  void emitARMAttributeSection();
+
+  // Generic helper used to emit e.g. ARMv5 mul pseudos
+  void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+  void EmitUnwindingInstruction(const MachineInstr *MI);
+
+  // emitPseudoExpansionLowering - tblgen'erated.
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
+
+public:
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+  /// EmitDwarfRegOp - Emit dwarf register operation.
+  virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
+  virtual unsigned getISAEncoding() {
+    // ARM/Darwin adds ISA to the DWARF info for each function.
+    if (!Subtarget->isTargetDarwin())
+      return 0;
+    return Subtarget->isThumb() ?
+      llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+  }
+
+  MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+  MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+                                        const MachineBasicBlock *MBB) const;
+  MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+  MCSymbol *GetARMSJLJEHLabel(void) const;
+
+  MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+
+  /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+  /// the .s file.
+  virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
new file mode 100644
index 000000000000..458f7dd1f784
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
@@ -0,0 +1,294 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+// Note that the following auto-generated files only defined enum types, and
+// so are safe to include here.
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
+  enum CondCodes { // Meaning (integer)          Meaning (floating-point)
+    EQ,            // Equal                      Equal
+    NE,            // Not equal                  Not equal, or unordered
+    HS,            // Carry set                  >, ==, or unordered
+    LO,            // Carry clear                Less than
+    MI,            // Minus, negative            Less than
+    PL,            // Plus, positive or zero     >, ==, or unordered
+    VS,            // Overflow                   Unordered
+    VC,            // No overflow                Not unordered
+    HI,            // Unsigned higher            Greater than, or unordered
+    LS,            // Unsigned lower or same     Less than or equal
+    GE,            // Greater than or equal      Greater than or equal
+    LT,            // Less than                  Less than, or unordered
+    GT,            // Greater than               Greater than
+    LE,            // Less than or equal         <, ==, or unordered
+    AL             // Always (unconditional)     Always (unconditional)
+  };
+
+  inline static CondCodes getOppositeCondition(CondCodes CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case EQ: return NE;
+    case NE: return EQ;
+    case HS: return LO;
+    case LO: return HS;
+    case MI: return PL;
+    case PL: return MI;
+    case VS: return VC;
+    case VC: return VS;
+    case HI: return LS;
+    case LS: return HI;
+    case GE: return LT;
+    case LT: return GE;
+    case GT: return LE;
+    case LE: return GT;
+    }
+  }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case ARMCC::EQ:  return "eq";
+  case ARMCC::NE:  return "ne";
+  case ARMCC::HS:  return "hs";
+  case ARMCC::LO:  return "lo";
+  case ARMCC::MI:  return "mi";
+  case ARMCC::PL:  return "pl";
+  case ARMCC::VS:  return "vs";
+  case ARMCC::VC:  return "vc";
+  case ARMCC::HI:  return "hi";
+  case ARMCC::LS:  return "ls";
+  case ARMCC::GE:  return "ge";
+  case ARMCC::LT:  return "lt";
+  case ARMCC::GT:  return "gt";
+  case ARMCC::LE:  return "le";
+  case ARMCC::AL:  return "al";
+  }
+}
+
+namespace ARM_PROC {
+  enum IMod {
+    IE = 2,
+    ID = 3
+  };
+
+  enum IFlags {
+    F = 1,
+    I = 2,
+    A = 4
+  };
+
+  inline static const char *IFlagsToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown iflags operand");
+    case F: return "f";
+    case I: return "i";
+    case A: return "a";
+    }
+  }
+
+  inline static const char *IModToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown imod operand");
+    case IE: return "ie";
+    case ID: return "id";
+    }
+  }
+}
+
+namespace ARM_MB {
+  // The Memory Barrier Option constants map directly to the 4-bit encoding of
+  // the option field for memory barrier operations.
+  enum MemBOpt {
+    SY    = 15,
+    ST    = 14,
+    ISH   = 11,
+    ISHST = 10,
+    NSH   = 7,
+    NSHST = 6,
+    OSH   = 3,
+    OSHST = 2
+  };
+
+  inline static const char *MemBOptToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown memory operation");
+    case SY:    return "sy";
+    case ST:    return "st";
+    case ISH:   return "ish";
+    case ISHST: return "ishst";
+    case NSH:   return "nsh";
+    case NSHST: return "nshst";
+    case OSH:   return "osh";
+    case OSHST: return "oshst";
+    }
+  }
+} // namespace ARM_MB
+
+/// getARMRegisterNumbering - Given the enum value for some register, e.g.
+/// ARM::LR, return the number that it corresponds to (e.g. 14).
+inline static unsigned getARMRegisterNumbering(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  default:
+    llvm_unreachable("Unknown ARM register!");
+  case R0:  case S0:  case D0:  case Q0:  return 0;
+  case R1:  case S1:  case D1:  case Q1:  return 1;
+  case R2:  case S2:  case D2:  case Q2:  return 2;
+  case R3:  case S3:  case D3:  case Q3:  return 3;
+  case R4:  case S4:  case D4:  case Q4:  return 4;
+  case R5:  case S5:  case D5:  case Q5:  return 5;
+  case R6:  case S6:  case D6:  case Q6:  return 6;
+  case R7:  case S7:  case D7:  case Q7:  return 7;
+  case R8:  case S8:  case D8:  case Q8:  return 8;
+  case R9:  case S9:  case D9:  case Q9:  return 9;
+  case R10: case S10: case D10: case Q10: return 10;
+  case R11: case S11: case D11: case Q11: return 11;
+  case R12: case S12: case D12: case Q12: return 12;
+  case SP:  case S13: case D13: case Q13: return 13;
+  case LR:  case S14: case D14: case Q14: return 14;
+  case PC:  case S15: case D15: case Q15: return 15;
+
+  case S16: case D16: return 16;
+  case S17: case D17: return 17;
+  case S18: case D18: return 18;
+  case S19: case D19: return 19;
+  case S20: case D20: return 20;
+  case S21: case D21: return 21;
+  case S22: case D22: return 22;
+  case S23: case D23: return 23;
+  case S24: case D24: return 24;
+  case S25: case D25: return 25;
+  case S26: case D26: return 26;
+  case S27: case D27: return 27;
+  case S28: case D28: return 28;
+  case S29: case D29: return 29;
+  case S30: case D30: return 30;
+  case S31: case D31: return 31;
+  }
+}
+
+namespace ARMII {
+
+  /// ARM Index Modes
+  enum IndexMode {
+    IndexModeNone  = 0,
+    IndexModePre   = 1,
+    IndexModePost  = 2,
+    IndexModeUpd   = 3
+  };
+
+  /// ARM Addressing Modes
+  enum AddrMode {
+    AddrModeNone    = 0,
+    AddrMode1       = 1,
+    AddrMode2       = 2,
+    AddrMode3       = 3,
+    AddrMode4       = 4,
+    AddrMode5       = 5,
+    AddrMode6       = 6,
+    AddrModeT1_1    = 7,
+    AddrModeT1_2    = 8,
+    AddrModeT1_4    = 9,
+    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
+    AddrModeT2_i12  = 11,
+    AddrModeT2_i8   = 12,
+    AddrModeT2_so   = 13,
+    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
+    AddrModeT2_i8s4 = 15, // i8 * 4
+    AddrMode_i12    = 16
+  };
+
+  inline static const char *AddrModeToString(AddrMode addrmode) {
+    switch (addrmode) {
+    default: llvm_unreachable("Unknown memory operation");
+    case AddrModeNone:    return "AddrModeNone";
+    case AddrMode1:       return "AddrMode1";
+    case AddrMode2:       return "AddrMode2";
+    case AddrMode3:       return "AddrMode3";
+    case AddrMode4:       return "AddrMode4";
+    case AddrMode5:       return "AddrMode5";
+    case AddrMode6:       return "AddrMode6";
+    case AddrModeT1_1:    return "AddrModeT1_1";
+    case AddrModeT1_2:    return "AddrModeT1_2";
+    case AddrModeT1_4:    return "AddrModeT1_4";
+    case AddrModeT1_s:    return "AddrModeT1_s";
+    case AddrModeT2_i12:  return "AddrModeT2_i12";
+    case AddrModeT2_i8:   return "AddrModeT2_i8";
+    case AddrModeT2_so:   return "AddrModeT2_so";
+    case AddrModeT2_pc:   return "AddrModeT2_pc";
+    case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
+    case AddrMode_i12:    return "AddrMode_i12";
+    }
+  }
+
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // ARM Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_LO16 - On a symbol operand, this represents a relocation containing
+    /// lower 16 bit of the address. Used only via movw instruction.
+    MO_LO16,
+
+    /// MO_HI16 - On a symbol operand, this represents a relocation containing
+    /// higher 16 bit of the address. Used only via movt instruction.
+    MO_HI16,
+
+    /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+    /// i.e. "FOO$non_lazy_ptr".
+    /// Used only via movw instruction.
+    MO_LO16_NONLAZY,
+
+    /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+    /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+    MO_HI16_NONLAZY,
+
+    /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the PC relative address of the
+    /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+    /// Used only via movw instruction.
+    MO_LO16_NONLAZY_PIC,
+
+    /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the PC relative address of the
+    /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+    /// Used only via movt instruction.
+    MO_HI16_NONLAZY_PIC,
+
+    /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+    /// call operand.
+    MO_PLT
+  };
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 000000000000..649bd7d5ce3f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,2584 @@
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "ARMGenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+               cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+  unsigned MLxOpc;     // MLA / MLS opcode
+  unsigned MulOpc;     // Expanded multiplication opcode
+  unsigned AddSubOpc;  // Expanded add / sub opcode
+  bool NegAcc;         // True if the acc is negated before the add / sub.
+  bool HasLane;        // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
+  // fp scalar ops
+  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
+  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
+  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
+  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
+  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
+  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
+  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
+  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
+
+  // fp SIMD ops
+  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
+  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
+  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
+  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
+  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
+  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
+  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
+  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
+};
+
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+    Subtarget(STI) {
+  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+      assert(false && "Duplicated entries?");
+    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+  }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  if (usePreRAHazardRecognizer()) {
+    const InstrItineraryData *II = TM->getInstrItineraryData();
+    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+  }
+  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+    return (ScheduleHazardRecognizer *)
+      new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+  return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                        MachineBasicBlock::iterator &MBBI,
+                                        LiveVariables *LV) const {
+  // FIXME: Thumb2 support.
+
+  if (!EnableARM3Addr)
+    return NULL;
+
+  MachineInstr *MI = MBBI;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  uint64_t TSFlags = MI->getDesc().TSFlags;
+  bool isPre = false;
+  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+  default: return NULL;
+  case ARMII::IndexModePre:
+    isPre = true;
+    break;
+  case ARMII::IndexModePost:
+    break;
+  }
+
+  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+  // operation.
+  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+  if (MemOpc == 0)
+    return NULL;
+
+  MachineInstr *UpdateMI = NULL;
+  MachineInstr *MemMI = NULL;
+  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned NumOps = MCID.getNumOperands();
+  bool isLoad = !MCID.mayStore();
+  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+  const MachineOperand &Base = MI->getOperand(2);
+  const MachineOperand &Offset = MI->getOperand(NumOps-3);
+  unsigned WBReg = WB.getReg();
+  unsigned BaseReg = Base.getReg();
+  unsigned OffReg = Offset.getReg();
+  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+  switch (AddrMode) {
+  default:
+    assert(false && "Unknown indexed op!");
+    return NULL;
+  case ARMII::AddrMode2: {
+    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+    if (OffReg == 0) {
+      if (ARM_AM::getSOImmVal(Amt) == -1)
+        // Can't encode it in a so_imm operand. This transformation will
+        // add more than 1 instruction. Abandon!
+        return NULL;
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else if (Amt != 0) {
+      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  case ARMII::AddrMode3 : {
+    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+    if (OffReg == 0)
+      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt)
+        .addImm(Pred).addReg(0).addReg(0);
+    else
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  }
+
+  std::vector<MachineInstr*> NewMIs;
+  if (isPre) {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(WBReg).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+    NewMIs.push_back(MemMI);
+    NewMIs.push_back(UpdateMI);
+  } else {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(BaseReg).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+    if (WB.isDead())
+      UpdateMI->getOperand(0).setIsDead();
+    NewMIs.push_back(UpdateMI);
+    NewMIs.push_back(MemMI);
+  }
+
+  // Transfer LiveVariables states, kill / dead info.
+  if (LV) {
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        unsigned Reg = MO.getReg();
+
+        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+        if (MO.isDef()) {
+          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+          if (MO.isDead())
+            LV->addVirtualRegisterDead(Reg, NewMI);
+        }
+        if (MO.isUse() && MO.isKill()) {
+          for (unsigned j = 0; j < 2; ++j) {
+            // Look at the two new MI's in reverse order.
+            MachineInstr *NewMI = NewMIs[j];
+            if (!NewMI->readsRegister(Reg))
+              continue;
+            LV->addVirtualRegisterKilled(Reg, NewMI);
+            if (VI.removeKill(MI))
+              VI.Kills.push_back(NewMI);
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  MFI->insert(MBBI, NewMIs[1]);
+  MFI->insert(MBBI, NewMIs[0]);
+  return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(LastInst->getOperand(1));
+      Cond.push_back(LastInst->getOperand(2));
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+    while (isUncondBranchOpcode(SecondLastOpc)) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = I;
+        SecondLastOpc = SecondLastInst->getOpcode();
+      }
+    }
+  }
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB =  SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(1));
+    Cond.push_back(SecondLastInst->getOperand(2));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // ...likewise if it ends with a branch table followed by an unconditional
+  // branch. The branch folder can create these, and we must get rid of them for
+  // correctness of Thumb constant islands.
+  if ((isJumpTableBranchOpcode(SecondLastOpc) ||
+       isIndirectBranchOpcode(SecondLastOpc)) &&
+      isUncondBranchOpcode(LastOpc)) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (!isUncondBranchOpcode(I->getOpcode()) &&
+      !isCondBranchOpcode(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!isCondBranchOpcode(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+  int BOpc   = !AFI->isThumbFunction()
+    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+  int BccOpc = !AFI->isThumbFunction()
+    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "ARM branch conditions have two components!");
+
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch?
+      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+    else
+      BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+    return 1;
+  }
+
+  // Two-way conditional branch.
+  BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+  return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned Opc = MI->getOpcode();
+  if (isUncondBranchOpcode(Opc)) {
+    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+    return true;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setImm(Pred[0].getImm());
+    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+    return true;
+  }
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  if (Pred1.size() > 2 || Pred2.size() > 2)
+    return false;
+
+  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+  if (CC1 == CC2)
+    return true;
+
+  switch (CC1) {
+  default:
+    return false;
+  case ARMCC::AL:
+    return true;
+  case ARMCC::HS:
+    return CC2 == ARMCC::HI;
+  case ARMCC::LS:
+    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+  case ARMCC::GE:
+    return CC2 == ARMCC::GT;
+  case ARMCC::LE:
+    return CC2 == ARMCC::LT;
+  }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  // FIXME: This confuses implicit_def with optional CPSR def.
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
+    return false;
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+      Pred.push_back(MO);
+      Found = true;
+    }
+  }
+
+  return Found;
+}
+
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
+    return false;
+
+  if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+    ARMFunctionInfo *AFI =
+      MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+    return AFI->isThumb2Function();
+  }
+  return true;
+}
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+LLVM_ATTRIBUTE_NOINLINE
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI);
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) {
+  assert(JTI < JT.size());
+  return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.getSize())
+    return MCID.getSize();
+
+    // If this machine instr is an inline asm, measure it.
+    if (MI->getOpcode() == ARM::INLINEASM)
+      return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+    if (MI->isLabel())
+      return 0;
+  unsigned Opc = MI->getOpcode();
+    switch (Opc) {
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
+      return 0;
+    case ARM::MOVi16_ga_pcrel:
+    case ARM::MOVTi16_ga_pcrel:
+    case ARM::t2MOVi16_ga_pcrel:
+    case ARM::t2MOVTi16_ga_pcrel:
+      return 4;
+    case ARM::MOVi32imm:
+    case ARM::t2MOVi32imm:
+      return 8;
+    case ARM::CONSTPOOL_ENTRY:
+      // If this machine instr is a constant pool entry, its size is recorded as
+      // operand #2.
+      return MI->getOperand(2).getImm();
+    case ARM::Int_eh_sjlj_longjmp:
+      return 16;
+    case ARM::tInt_eh_sjlj_longjmp:
+      return 10;
+    case ARM::Int_eh_sjlj_setjmp:
+    case ARM::Int_eh_sjlj_setjmp_nofp:
+      return 20;
+    case ARM::tInt_eh_sjlj_setjmp:
+    case ARM::t2Int_eh_sjlj_setjmp:
+    case ARM::t2Int_eh_sjlj_setjmp_nofp:
+      return 12;
+    case ARM::BR_JTr:
+    case ARM::BR_JTm:
+    case ARM::BR_JTadd:
+    case ARM::tBR_JTr:
+    case ARM::t2BR_JT:
+    case ARM::t2TBB_JT:
+    case ARM::t2TBH_JT: {
+      // These are jumptable branches, i.e. a branch followed by an inlined
+      // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+      // entry is one byte; TBH two byte each.
+      unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+        ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+      unsigned NumOps = MCID.getNumOperands();
+      MachineOperand JTOP =
+        MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
+      unsigned JTI = JTOP.getIndex();
+      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+      assert(MJTI != 0);
+      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+      assert(JTI < JT.size());
+      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+      // 4 aligned. The assembler / linker may add 2 byte padding just before
+      // the JT entries.  The size does not include this padding; the
+      // constant islands pass does separate bookkeeping for it.
+      // FIXME: If we know the size of the function is less than (1 << 16) *2
+      // bytes, we can use 16-bit entries instead. Then there won't be an
+      // alignment issue.
+      unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+      unsigned NumEntries = getNumJTEntries(JT, JTI);
+      if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+        // Make sure the instruction that follows TBB is 2-byte aligned.
+        // FIXME: Constant island pass should insert an "ALIGN" instruction
+        // instead.
+        ++NumEntries;
+      return NumEntries * EntrySize + InstSize;
+    }
+    default:
+      // Otherwise, pseudo-instruction sizes are zero.
+      return 0;
+    }
+  return 0; // Not reached
+}
+
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+  bool GPRSrc  = ARM::GPRRegClass.contains(SrcReg);
+
+  if (GPRDest && GPRSrc) {
+    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+                                  .addReg(SrcReg, getKillRegState(KillSrc))));
+    return;
+  }
+
+  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+  bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);
+
+  unsigned Opc;
+  if (SPRDest && SPRSrc)
+    Opc = ARM::VMOVS;
+  else if (GPRDest && SPRSrc)
+    Opc = ARM::VMOVRS;
+  else if (SPRDest && GPRSrc)
+    Opc = ARM::VMOVSR;
+  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD;
+  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VORRq;
+  else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVQQ;
+  else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVQQQQ;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+  MIB.addReg(SrcReg, getKillRegState(KillSrc));
+  if (Opc == ARM::VORRq)
+    MIB.addReg(SrcReg, getKillRegState(KillSrc));
+  if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+    AddDefaultPred(MIB);
+}
+
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+                             unsigned Reg, unsigned SubIdx, unsigned State,
+                             const TargetRegisterInfo *TRI) {
+  if (!SubIdx)
+    return MIB.addReg(Reg, State);
+
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+  return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(
+                                         PseudoSourceValue::getFixedStack(FI)),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FI),
+                            Align);
+
+  // tGPR is used sometimes in ARM instructions that need to avoid using
+  // certain registers.  Just treat it as GPR here. Likewise, rGPR.
+  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+      || RC == ARM::rGPRRegisterClass)
+    RC = ARM::GPRRegisterClass;
+
+  switch (RC->getID()) {
+  case ARM::GPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::SPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::DPRRegClassID:
+  case ARM::DPR_VFP2RegClassID:
+  case ARM::DPR_8RegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::QPRRegClassID:
+  case ARM::QPR_VFP2RegClassID:
+  case ARM::QPR_8RegClassID:
+    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+                     .addFrameIndex(FI).addImm(16)
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addMemOperand(MMO));
+    } else {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addFrameIndex(FI)
+                     .addMemOperand(MMO));
+    }
+    break;
+  case ARM::QQPRRegClassID:
+  case ARM::QQPR_VFP2RegClassID:
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+      // FIXME: It's possible to only store part of the QQ register if the
+      // spilled def has a sub-register index.
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+                     .addFrameIndex(FI).addImm(16)
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addMemOperand(MMO));
+    } else {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+                       .addFrameIndex(FI))
+        .addMemOperand(MMO);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+            AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+    }
+    break;
+  case ARM::QQQQPRRegClassID: {
+    MachineInstrBuilder MIB =
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+                     .addFrameIndex(FI))
+      .addMemOperand(MMO);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+          AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown regclass!");
+  }
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                     int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STRrs:
+  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::STRi12:
+  case ARM::t2STRi12:
+  case ARM::tSTRspi:
+  case ARM::VSTRD:
+  case ARM::VSTRS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VST1q64Pseudo:
+    if (MI->getOperand(0).isFI() &&
+        MI->getOperand(2).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(2).getReg();
+    }
+    break;
+  case ARM::VSTMQIA:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FI),
+                            Align);
+
+  // tGPR is used sometimes in ARM instructions that need to avoid using
+  // certain registers.  Just treat it as GPR here.
+  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+      || RC == ARM::rGPRRegisterClass)
+    RC = ARM::GPRRegisterClass;
+
+  switch (RC->getID()) {
+  case ARM::GPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::SPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::DPRRegClassID:
+  case ARM::DPR_VFP2RegClassID:
+  case ARM::DPR_8RegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::QPRRegClassID:
+  case ARM::QPR_VFP2RegClassID:
+  case ARM::QPR_8RegClassID:
+    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+                     .addFrameIndex(FI).addImm(16)
+                     .addMemOperand(MMO));
+    } else {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+                     .addFrameIndex(FI)
+                     .addMemOperand(MMO));
+    }
+    break;
+  case ARM::QQPRRegClassID:
+  case ARM::QQPR_VFP2RegClassID:
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+                     .addFrameIndex(FI).addImm(16)
+                     .addMemOperand(MMO));
+    } else {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+                       .addFrameIndex(FI))
+        .addMemOperand(MMO);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+            AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+    }
+    break;
+  case ARM::QQQQPRRegClassID: {
+    MachineInstrBuilder MIB =
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+                     .addFrameIndex(FI))
+      .addMemOperand(MMO);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+    AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown regclass!");
+  }
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDRrs:
+  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::LDRi12:
+  case ARM::t2LDRi12:
+  case ARM::tLDRspi:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VLD1q64Pseudo:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VLDMQIA:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                           int FrameIx, uint64_t Offset,
+                                           const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+  MachineConstantPool *MCP = MF.getConstantPool();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+  assert(MCPE.isMachineConstantPoolEntry() &&
+         "Expecting a machine constantpool entry!");
+  ARMConstantPoolValue *ACPV =
+    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+  unsigned PCLabelId = AFI->createPICLabelUId();
+  ARMConstantPoolValue *NewCPV = 0;
+  // FIXME: The below assumes PIC relocation model and that the function
+  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+  // instructions, so that's probably OK, but is PIC always correct when
+  // we get here?
+  if (ACPV->isGlobalValue())
+    NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+                                      ARMCP::CPValue, 4);
+  else if (ACPV->isExtSymbol())
+    NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+                                      ACPV->getSymbol(), PCLabelId, 4);
+  else if (ACPV->isBlockAddress())
+    NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+                                      ARMCP::CPBlockAddress, 4);
+  else if (ACPV->isLSDA())
+    NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+                                      ARMCP::CPLSDA, 4);
+  else
+    llvm_unreachable("Unexpected ARM constantpool value type!!");
+  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+  return PCLabelId;
+}
+
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+              MachineBasicBlock::iterator I,
+              unsigned DestReg, unsigned SubIdx,
+              const MachineInstr *Orig,
+              const TargetRegisterInfo &TRI) const {
+  unsigned Opcode = Orig->getOpcode();
+  switch (Opcode) {
+  default: {
+    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+    MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+    MBB.insert(I, MI);
+    break;
+  }
+  case ARM::tLDRpci_pic:
+  case ARM::t2LDRpci_pic: {
+    MachineFunction &MF = *MBB.getParent();
+    unsigned CPI = Orig->getOperand(1).getIndex();
+    unsigned PCLabelId = duplicateCPV(MF, CPI);
+    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+                                      DestReg)
+      .addConstantPoolIndex(CPI).addImm(PCLabelId);
+    MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+    break;
+  }
+  }
+}
+
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+  MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+  switch(Orig->getOpcode()) {
+  case ARM::tLDRpci_pic:
+  case ARM::t2LDRpci_pic: {
+    unsigned CPI = Orig->getOperand(1).getIndex();
+    unsigned PCLabelId = duplicateCPV(MF, CPI);
+    Orig->getOperand(1).setIndex(CPI);
+    Orig->getOperand(2).setImm(PCLabelId);
+    break;
+  }
+  }
+  return MI;
+}
+
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+                                        const MachineInstr *MI1,
+                                        const MachineRegisterInfo *MRI) const {
+  int Opcode = MI0->getOpcode();
+  if (Opcode == ARM::t2LDRpci ||
+      Opcode == ARM::t2LDRpci_pic ||
+      Opcode == ARM::tLDRpci ||
+      Opcode == ARM::tLDRpci_pic ||
+      Opcode == ARM::MOV_ga_dyn ||
+      Opcode == ARM::MOV_ga_pcrel ||
+      Opcode == ARM::MOV_ga_pcrel_ldr ||
+      Opcode == ARM::t2MOV_ga_dyn ||
+      Opcode == ARM::t2MOV_ga_pcrel) {
+    if (MI1->getOpcode() != Opcode)
+      return false;
+    if (MI0->getNumOperands() != MI1->getNumOperands())
+      return false;
+
+    const MachineOperand &MO0 = MI0->getOperand(1);
+    const MachineOperand &MO1 = MI1->getOperand(1);
+    if (MO0.getOffset() != MO1.getOffset())
+      return false;
+
+    if (Opcode == ARM::MOV_ga_dyn ||
+        Opcode == ARM::MOV_ga_pcrel ||
+        Opcode == ARM::MOV_ga_pcrel_ldr ||
+        Opcode == ARM::t2MOV_ga_dyn ||
+        Opcode == ARM::t2MOV_ga_pcrel)
+      // Ignore the PC labels.
+      return MO0.getGlobal() == MO1.getGlobal();
+
+    const MachineFunction *MF = MI0->getParent()->getParent();
+    const MachineConstantPool *MCP = MF->getConstantPool();
+    int CPI0 = MO0.getIndex();
+    int CPI1 = MO1.getIndex();
+    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+    if (isARMCP0 && isARMCP1) {
+      ARMConstantPoolValue *ACPV0 =
+        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+      ARMConstantPoolValue *ACPV1 =
+        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+      return ACPV0->hasSameValue(ACPV1);
+    } else if (!isARMCP0 && !isARMCP1) {
+      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+    }
+    return false;
+  } else if (Opcode == ARM::PICLDR) {
+    if (MI1->getOpcode() != Opcode)
+      return false;
+    if (MI0->getNumOperands() != MI1->getNumOperands())
+      return false;
+
+    unsigned Addr0 = MI0->getOperand(1).getReg();
+    unsigned Addr1 = MI1->getOperand(1).getReg();
+    if (Addr0 != Addr1) {
+      if (!MRI ||
+          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+          !TargetRegisterInfo::isVirtualRegister(Addr1))
+        return false;
+
+      // This assumes SSA form.
+      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+      // Check if the loaded value, e.g. a constantpool of a global address, are
+      // the same.
+      if (!produceSameValue(Def0, Def1, MRI))
+        return false;
+    }
+
+    for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+      const MachineOperand &MO0 = MI0->getOperand(i);
+      const MachineOperand &MO1 = MI1->getOperand(i);
+      if (!MO0.isIdenticalTo(MO1))
+        return false;
+    }
+    return true;
+  }
+
+  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                               int64_t &Offset1,
+                                               int64_t &Offset2) const {
+  // Don't worry about Thumb: just ARM and Thumb2.
+  if (Subtarget.isThumb1Only()) return false;
+
+  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+    return false;
+
+  switch (Load1->getMachineOpcode()) {
+  default:
+    return false;
+  case ARM::LDRi12:
+  case ARM::LDRBi12:
+  case ARM::LDRD:
+  case ARM::LDRH:
+  case ARM::LDRSB:
+  case ARM::LDRSH:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRDi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRi12:
+  case ARM::t2LDRSHi12:
+    break;
+  }
+
+  switch (Load2->getMachineOpcode()) {
+  default:
+    return false;
+  case ARM::LDRi12:
+  case ARM::LDRBi12:
+  case ARM::LDRD:
+  case ARM::LDRH:
+  case ARM::LDRSB:
+  case ARM::LDRSH:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRDi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRi12:
+  case ARM::t2LDRSHi12:
+    break;
+  }
+
+  // Check if base addresses and chain operands match.
+  if (Load1->getOperand(0) != Load2->getOperand(0) ||
+      Load1->getOperand(4) != Load2->getOperand(4))
+    return false;
+
+  // Index should be Reg0.
+  if (Load1->getOperand(3) != Load2->getOperand(3))
+    return false;
+
+  // Determine the offsets.
+  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+      isa<ConstantSDNode>(Load2->getOperand(1))) {
+    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+    return true;
+  }
+
+  return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                               int64_t Offset1, int64_t Offset2,
+                                               unsigned NumLoads) const {
+  // Don't worry about Thumb: just ARM and Thumb2.
+  if (Subtarget.isThumb1Only()) return false;
+
+  assert(Offset2 > Offset1);
+
+  if ((Offset2 - Offset1) / 8 > 64)
+    return false;
+
+  if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+    return false;  // FIXME: overly conservative?
+
+  // Four loads in a row should be sufficient.
+  if (NumLoads >= 3)
+    return false;
+
+  return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+                                            const MachineBasicBlock *MBB,
+                                            const MachineFunction &MF) const {
+  // Debug info is never a scheduling boundary. It's necessary to be explicit
+  // due to the special treatment of IT instructions below, otherwise a
+  // dbg_value followed by an IT will result in the IT instruction being
+  // considered a scheduling hazard, which is wrong. It should be the actual
+  // instruction preceding the dbg_value instruction(s), just like it is
+  // when debug info is not present.
+  if (MI->isDebugValue())
+    return false;
+
+  // Terminators and labels can't be scheduled around.
+  if (MI->getDesc().isTerminator() || MI->isLabel())
+    return true;
+
+  // Treat the start of the IT block as a scheduling boundary, but schedule
+  // t2IT along with all instructions following it.
+  // FIXME: This is a big hammer. But the alternative is to add all potential
+  // true and anti dependencies to IT block instructions as implicit operands
+  // to the t2IT instruction. The added compile time and complexity does not
+  // seem worth it.
+  MachineBasicBlock::const_iterator I = MI;
+  // Make sure to skip any dbg_value instructions
+  while (++I != MBB->end() && I->isDebugValue())
+    ;
+  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+    return true;
+
+  // Don't attempt to schedule around any instruction that defines
+  // a stack-oriented pointer, as it's unlikely to be profitable. This
+  // saves compile time, because it doesn't require every single
+  // stack slot reference to depend on the instruction that does the
+  // modification.
+  if (MI->definesRegister(ARM::SP))
+    return true;
+
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+                    unsigned NumCycles, unsigned ExtraPredCycles,
+                    const BranchProbability &Probability) const {
+  if (!NumCycles)
+    return false;
+
+  // Attempt to estimate the relative costs of predication versus branching.
+  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
+  UnpredCost /= Probability.getDenominator();
+  UnpredCost += 1; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+  return (NumCycles + ExtraPredCycles) <= UnpredCost;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                    unsigned TCycles, unsigned TExtra,
+                    MachineBasicBlock &FMBB,
+                    unsigned FCycles, unsigned FExtra,
+                    const BranchProbability &Probability) const {
+  if (!TCycles || !FCycles)
+    return false;
+
+  // Attempt to estimate the relative costs of predication versus branching.
+  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
+  TUnpredCost /= Probability.getDenominator();
+    
+  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
+  unsigned FUnpredCost = Comp * FCycles;
+  FUnpredCost /= Probability.getDenominator();
+
+  unsigned UnpredCost = TUnpredCost + FUnpredCost;
+  UnpredCost += 1; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx == -1) {
+    PredReg = 0;
+    return ARMCC::AL;
+  }
+
+  PredReg = MI->getOperand(PIdx+1).getReg();
+  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+int llvm::getMatchingCondBranchOpcode(int Opc) {
+  if (Opc == ARM::B)
+    return ARM::Bcc;
+  else if (Opc == ARM::tB)
+    return ARM::tBcc;
+  else if (Opc == ARM::t2B)
+      return ARM::t2Bcc;
+
+  llvm_unreachable("Unknown unconditional branch opcode!");
+  return 0;
+}
+
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg, int NumBytes,
+                               ARMCC::CondCodes Pred, unsigned PredReg,
+                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  while (NumBytes) {
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+    assert(ThisVal && "Didn't extract field correctly");
+
+    // We will handle these bits from offset, clear them.
+    NumBytes &= ~ThisVal;
+
+    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+    // Build the new ADD / SUB.
+    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+      .setMIFlags(MIFlags);
+    BaseReg = DestReg;
+  }
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                unsigned FrameReg, int &Offset,
+                                const ARMBaseInstrInfo &TII) {
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrMode2.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrMode2;
+
+  if (Opcode == ARM::ADDri) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::MOVr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(FrameRegIdx+1);
+      Offset = 0;
+      return true;
+    } else if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(ARM::SUBri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getSOImmVal(Offset) != -1) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+    // as possible.
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    // Get the properly encoded SOImmVal field.
+    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrMode_i12: {
+      ImmIdx = FrameRegIdx + 1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode2: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode3: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      break;
+    }
+    case ARMII::AddrMode4:
+    case ARMII::AddrMode6:
+      // Can't fold any offset even if it's zero.
+      return false;
+    case ARMII::AddrMode5: {
+      ImmIdx = FrameRegIdx+1;
+      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      break;
+    }
+    default:
+      llvm_unreachable("Unsupported addressing mode!");
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+    }
+
+    // Attempt to fold address comp. if opcode has offset bits
+    if (NumBits > 0) {
+      // Common case: small offset, fits into instruction.
+      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+      int ImmedOffset = Offset / Scale;
+      unsigned Mask = (1 << NumBits) - 1;
+      if ((unsigned)Offset <= Mask * Scale) {
+        // Replace the FrameIndex with sp
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        // FIXME: When addrmode2 goes away, this will simplify (like the
+        // T2 version), as the LDR.i12 versions don't need the encoding
+        // tricks for the offset value.
+        if (isSub) {
+          if (AddrMode == ARMII::AddrMode_i12)
+            ImmedOffset = -ImmedOffset;
+          else
+            ImmedOffset |= 1 << NumBits;
+        }
+        ImmOp.ChangeToImmediate(ImmedOffset);
+        Offset = 0;
+        return true;
+      }
+
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      if (isSub) {
+        if (AddrMode == ARMII::AddrMode_i12)
+          ImmedOffset = -ImmedOffset;
+        else
+          ImmedOffset |= 1 << NumBits;
+      }
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  Offset = (isSub) ? -Offset : Offset;
+  return Offset == 0;
+}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+               int &CmpValue) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::CMPri:
+  case ARM::t2CMPri:
+    SrcReg = MI->getOperand(0).getReg();
+    CmpMask = ~0;
+    CmpValue = MI->getOperand(1).getImm();
+    return true;
+  case ARM::TSTri:
+  case ARM::t2TSTri:
+    SrcReg = MI->getOperand(0).getReg();
+    CmpMask = MI->getOperand(1).getImm();
+    CmpValue = 0;
+    return true;
+  }
+
+  return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+                              int CmpMask, bool CommonUse) {
+  switch (MI->getOpcode()) {
+    case ARM::ANDri:
+    case ARM::t2ANDri:
+      if (CmpMask != MI->getOperand(2).getImm())
+        return false;
+      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+        return true;
+      break;
+    case ARM::COPY: {
+      // Walk down one instruction which is potentially an 'and'.
+      const MachineInstr &Copy = *MI;
+      MachineBasicBlock::iterator AND(
+        llvm::next(MachineBasicBlock::iterator(MI)));
+      if (AND == MI->getParent()->end()) return false;
+      MI = AND;
+      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+                               CmpMask, true);
+    }
+  }
+
+  return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
+bool ARMBaseInstrInfo::
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+                     int CmpValue, const MachineRegisterInfo *MRI) const {
+  if (CmpValue != 0)
+    return false;
+
+  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+  if (llvm::next(DI) != MRI->def_end())
+    // Only support one definition.
+    return false;
+
+  MachineInstr *MI = &*DI;
+
+  // Masked compares sometimes use the same register as the corresponding 'and'.
+  if (CmpMask != ~0) {
+    if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+      MI = 0;
+      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+           UE = MRI->use_end(); UI != UE; ++UI) {
+        if (UI->getParent() != CmpInstr->getParent()) continue;
+        MachineInstr *PotentialAND = &*UI;
+        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+          continue;
+        MI = PotentialAND;
+        break;
+      }
+      if (!MI) return false;
+    }
+  }
+
+  // Conservatively refuse to convert an instruction which isn't in the same BB
+  // as the comparison.
+  if (MI->getParent() != CmpInstr->getParent())
+    return false;
+
+  // Check that CPSR isn't set between the comparison instruction and the one we
+  // want to change.
+  MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
+    B = MI->getParent()->begin();
+
+  // Early exit if CmpInstr is at the beginning of the BB.
+  if (I == B) return false;
+
+  --I;
+  for (; I != E; --I) {
+    const MachineInstr &Instr = *I;
+
+    for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+      const MachineOperand &MO = Instr.getOperand(IO);
+      if (!MO.isReg()) continue;
+
+      // This instruction modifies or uses CPSR after the one we want to
+      // change. We can't do this transformation.
+      if (MO.getReg() == ARM::CPSR)
+        return false;
+    }
+
+    if (I == B)
+      // The 'and' is below the comparison instruction.
+      return false;
+  }
+
+  // Set the "zero" bit in CPSR.
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::RSBrr:
+  case ARM::RSBri:
+  case ARM::RSCrr:
+  case ARM::RSCri:
+  case ARM::ADDrr:
+  case ARM::ADDri:
+  case ARM::ADCrr:
+  case ARM::ADCri:
+  case ARM::SUBrr:
+  case ARM::SUBri:
+  case ARM::SBCrr:
+  case ARM::SBCri:
+  case ARM::t2RSBri:
+  case ARM::t2ADDrr:
+  case ARM::t2ADDri:
+  case ARM::t2ADCrr:
+  case ARM::t2ADCri:
+  case ARM::t2SUBrr:
+  case ARM::t2SUBri:
+  case ARM::t2SBCrr:
+  case ARM::t2SBCri:
+  case ARM::ANDrr:
+  case ARM::ANDri:
+  case ARM::t2ANDrr:
+  case ARM::t2ANDri:
+  case ARM::ORRrr:
+  case ARM::ORRri:
+  case ARM::t2ORRrr:
+  case ARM::t2ORRri:
+  case ARM::EORrr:
+  case ARM::EORri:
+  case ARM::t2EORrr:
+  case ARM::t2EORri: {
+    // Scan forward for the use of CPSR, if it's a conditional code requires
+    // checking of V bit, then this is not safe to do. If we can't find the
+    // CPSR use (i.e. used in another block), then it's not safe to perform
+    // the optimization.
+    bool isSafe = false;
+    I = CmpInstr;
+    E = MI->getParent()->end();
+    while (!isSafe && ++I != E) {
+      const MachineInstr &Instr = *I;
+      for (unsigned IO = 0, EO = Instr.getNumOperands();
+           !isSafe && IO != EO; ++IO) {
+        const MachineOperand &MO = Instr.getOperand(IO);
+        if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+          continue;
+        if (MO.isDef()) {
+          isSafe = true;
+          break;
+        }
+        // Condition code is after the operand before CPSR.
+        ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+        switch (CC) {
+        default:
+          isSafe = true;
+          break;
+        case ARMCC::VS:
+        case ARMCC::VC:
+        case ARMCC::GE:
+        case ARMCC::LT:
+        case ARMCC::GT:
+        case ARMCC::LE:
+          return false;
+        }
+      }
+    }
+
+    if (!isSafe)
+      return false;
+
+    // Toggle the optional operand to CPSR.
+    MI->getOperand(5).setReg(ARM::CPSR);
+    MI->getOperand(5).setIsDef(true);
+    CmpInstr->eraseFromParent();
+    return true;
+  }
+  }
+
+  return false;
+}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+                                     MachineInstr *DefMI, unsigned Reg,
+                                     MachineRegisterInfo *MRI) const {
+  // Fold large immediates into add, sub, or, xor.
+  unsigned DefOpc = DefMI->getOpcode();
+  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+    return false;
+  if (!DefMI->getOperand(1).isImm())
+    // Could be t2MOVi32imm <ga:xx>
+    return false;
+
+  if (!MRI->hasOneNonDBGUse(Reg))
+    return false;
+
+  unsigned UseOpc = UseMI->getOpcode();
+  unsigned NewUseOpc = 0;
+  uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+  bool Commute = false;
+  switch (UseOpc) {
+  default: return false;
+  case ARM::SUBrr:
+  case ARM::ADDrr:
+  case ARM::ORRrr:
+  case ARM::EORrr:
+  case ARM::t2SUBrr:
+  case ARM::t2ADDrr:
+  case ARM::t2ORRrr:
+  case ARM::t2EORrr: {
+    Commute = UseMI->getOperand(2).getReg() != Reg;
+    switch (UseOpc) {
+    default: break;
+    case ARM::SUBrr: {
+      if (Commute)
+        return false;
+      ImmVal = -ImmVal;
+      NewUseOpc = ARM::SUBri;
+      // Fallthrough
+    }
+    case ARM::ADDrr:
+    case ARM::ORRrr:
+    case ARM::EORrr: {
+      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+        return false;
+      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+      switch (UseOpc) {
+      default: break;
+      case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+      }
+      break;
+    }
+    case ARM::t2SUBrr: {
+      if (Commute)
+        return false;
+      ImmVal = -ImmVal;
+      NewUseOpc = ARM::t2SUBri;
+      // Fallthrough
+    }
+    case ARM::t2ADDrr:
+    case ARM::t2ORRrr:
+    case ARM::t2EORrr: {
+      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+        return false;
+      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+      switch (UseOpc) {
+      default: break;
+      case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+      }
+      break;
+    }
+    }
+  }
+  }
+
+  unsigned OpIdx = Commute ? 2 : 1;
+  unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+  bool isKill = UseMI->getOperand(OpIdx).isKill();
+  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+  AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+                                      *UseMI, UseMI->getDebugLoc(),
+                                      get(NewUseOpc), NewReg)
+                              .addReg(Reg1, getKillRegState(isKill))
+                              .addImm(SOImmValV1)));
+  UseMI->setDesc(get(NewUseOpc));
+  UseMI->getOperand(1).setReg(NewReg);
+  UseMI->getOperand(1).setIsKill();
+  UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+  DefMI->eraseFromParent();
+  return true;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                 const MachineInstr *MI) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  const MCInstrDesc &Desc = MI->getDesc();
+  unsigned Class = Desc.getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
+
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unexpected multi-uops instruction!");
+    break;
+  case ARM::VLDMQIA:
+  case ARM::VSTMQIA:
+    return 2;
+
+  // The number of uOps for load / store multiple are determined by the number
+  // registers.
+  //
+  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+  // same cycle. The scheduling for the first load / store must be done
+  // separately by assuming the the address is not 64-bit aligned.
+  //
+  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
+  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+  case ARM::VLDMDIA:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD: {
+    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+    return (NumRegs / 2) + (NumRegs % 2) + 1;
+  }
+
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::LDMIA_UPD:
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::STMIA_UPD:
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+  case ARM::tLDMIA:
+  case ARM::tLDMIA_UPD:
+  case ARM::tSTMIA:
+  case ARM::tSTMIA_UPD:
+  case ARM::tPOP_RET:
+  case ARM::tPOP:
+  case ARM::tPUSH:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD: {
+    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+    if (Subtarget.isCortexA8()) {
+      if (NumRegs < 4)
+        return 2;
+      // 4 registers would be issued: 2, 2.
+      // 5 registers would be issued: 2, 2, 1.
+      UOps = (NumRegs / 2);
+      if (NumRegs % 2)
+        ++UOps;
+      return UOps;
+    } else if (Subtarget.isCortexA9()) {
+      UOps = (NumRegs / 2);
+      // If there are odd number of registers or if it's not 64-bit aligned,
+      // then it takes an extra AGU (Address Generation Unit) cycle.
+      if ((NumRegs % 2) ||
+          !MI->hasOneMemOperand() ||
+          (*MI->memoperands_begin())->getAlignment() < 8)
+        ++UOps;
+      return UOps;
+    } else {
+      // Assume the worst.
+      return NumRegs;
+    }
+  }
+  }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+                                  const MCInstrDesc &DefMCID,
+                                  unsigned DefClass,
+                                  unsigned DefIdx, unsigned DefAlign) const {
+  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    // Def is the address writeback.
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+
+  int DefCycle;
+  if (Subtarget.isCortexA8()) {
+    // (regno / 2) + (regno % 2) + 1
+    DefCycle = RegNo / 2 + 1;
+    if (RegNo % 2)
+      ++DefCycle;
+  } else if (Subtarget.isCortexA9()) {
+    DefCycle = RegNo;
+    bool isSLoad = false;
+
+    switch (DefMCID.getOpcode()) {
+    default: break;
+    case ARM::VLDMSIA:
+    case ARM::VLDMSIA_UPD:
+    case ARM::VLDMSDB_UPD:
+      isSLoad = true;
+      break;
+    }
+
+    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+    // then it takes an extra cycle.
+    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+      ++DefCycle;
+  } else {
+    // Assume the worst.
+    DefCycle = RegNo + 2;
+  }
+
+  return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+                                 const MCInstrDesc &DefMCID,
+                                 unsigned DefClass,
+                                 unsigned DefIdx, unsigned DefAlign) const {
+  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    // Def is the address writeback.
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+
+  int DefCycle;
+  if (Subtarget.isCortexA8()) {
+    // 4 registers would be issued: 1, 2, 1.
+    // 5 registers would be issued: 1, 2, 2.
+    DefCycle = RegNo / 2;
+    if (DefCycle < 1)
+      DefCycle = 1;
+    // Result latency is issue cycle + 2: E2.
+    DefCycle += 2;
+  } else if (Subtarget.isCortexA9()) {
+    DefCycle = (RegNo / 2);
+    // If there are odd number of registers or if it's not 64-bit aligned,
+    // then it takes an extra AGU (Address Generation Unit) cycle.
+    if ((RegNo % 2) || DefAlign < 8)
+      ++DefCycle;
+    // Result latency is AGU cycles + 2.
+    DefCycle += 2;
+  } else {
+    // Assume the worst.
+    DefCycle = RegNo + 2;
+  }
+
+  return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+                                  const MCInstrDesc &UseMCID,
+                                  unsigned UseClass,
+                                  unsigned UseIdx, unsigned UseAlign) const {
+  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    return ItinData->getOperandCycle(UseClass, UseIdx);
+
+  int UseCycle;
+  if (Subtarget.isCortexA8()) {
+    // (regno / 2) + (regno % 2) + 1
+    UseCycle = RegNo / 2 + 1;
+    if (RegNo % 2)
+      ++UseCycle;
+  } else if (Subtarget.isCortexA9()) {
+    UseCycle = RegNo;
+    bool isSStore = false;
+
+    switch (UseMCID.getOpcode()) {
+    default: break;
+    case ARM::VSTMSIA:
+    case ARM::VSTMSIA_UPD:
+    case ARM::VSTMSDB_UPD:
+      isSStore = true;
+      break;
+    }
+
+    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+    // then it takes an extra cycle.
+    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+      ++UseCycle;
+  } else {
+    // Assume the worst.
+    UseCycle = RegNo + 2;
+  }
+
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+                                 const MCInstrDesc &UseMCID,
+                                 unsigned UseClass,
+                                 unsigned UseIdx, unsigned UseAlign) const {
+  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    return ItinData->getOperandCycle(UseClass, UseIdx);
+
+  int UseCycle;
+  if (Subtarget.isCortexA8()) {
+    UseCycle = RegNo / 2;
+    if (UseCycle < 2)
+      UseCycle = 2;
+    // Read in E3.
+    UseCycle += 2;
+  } else if (Subtarget.isCortexA9()) {
+    UseCycle = (RegNo / 2);
+    // If there are odd number of registers or if it's not 64-bit aligned,
+    // then it takes an extra AGU (Address Generation Unit) cycle.
+    if ((RegNo % 2) || UseAlign < 8)
+      ++UseCycle;
+  } else {
+    // Assume the worst.
+    UseCycle = 1;
+  }
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                    const MCInstrDesc &DefMCID,
+                                    unsigned DefIdx, unsigned DefAlign,
+                                    const MCInstrDesc &UseMCID,
+                                    unsigned UseIdx, unsigned UseAlign) const {
+  unsigned DefClass = DefMCID.getSchedClass();
+  unsigned UseClass = UseMCID.getSchedClass();
+
+  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
+    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+  // This may be a def / use of a variable_ops instruction, the operand
+  // latency might be determinable dynamically. Let the target try to
+  // figure it out.
+  int DefCycle = -1;
+  bool LdmBypass = false;
+  switch (DefMCID.getOpcode()) {
+  default:
+    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+    break;
+
+  case ARM::VLDMDIA:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+    break;
+
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::LDMIA_UPD:
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+  case ARM::tLDMIA:
+  case ARM::tLDMIA_UPD:
+  case ARM::tPUSH:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+    LdmBypass = 1;
+    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
+    break;
+  }
+
+  if (DefCycle == -1)
+    // We can't seem to determine the result latency of the def, assume it's 2.
+    DefCycle = 2;
+
+  int UseCycle = -1;
+  switch (UseMCID.getOpcode()) {
+  default:
+    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+    break;
+
+  case ARM::VSTMDIA:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD:
+    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+    break;
+
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::STMIA_UPD:
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+  case ARM::tSTMIA:
+  case ARM::tSTMIA_UPD:
+  case ARM::tPOP_RET:
+  case ARM::tPOP:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD:
+    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
+    break;
+  }
+
+  if (UseCycle == -1)
+    // Assume it's read in the first stage.
+    UseCycle = 1;
+
+  UseCycle = DefCycle - UseCycle + 1;
+  if (UseCycle > 0) {
+    if (LdmBypass) {
+      // It's a variable_ops instruction so we can't use DefIdx here. Just use
+      // first def operand.
+      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
+                                          UseClass, UseIdx))
+        --UseCycle;
+    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+                                               UseClass, UseIdx)) {
+      --UseCycle;
+    }
+  }
+
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+      DefMI->isRegSequence() || DefMI->isImplicitDef())
+    return 1;
+
+  const MCInstrDesc &DefMCID = DefMI->getDesc();
+  if (!ItinData || ItinData->isEmpty())
+    return DefMCID.mayLoad() ? 3 : 1;
+
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
+  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+  if (DefMO.getReg() == ARM::CPSR) {
+    if (DefMI->getOpcode() == ARM::FMSTAT) {
+      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+      return Subtarget.isCortexA9() ? 1 : 20;
+    }
+
+    // CPSR set and branch can be paired in the same cycle.
+    if (UseMCID.isBranch())
+      return 0;
+  }
+
+  unsigned DefAlign = DefMI->hasOneMemOperand()
+    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+  unsigned UseAlign = UseMI->hasOneMemOperand()
+    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+                                  UseMCID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefMCID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal = DefMI->getOperand(3).getImm();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt = DefMI->getOperand(3).getImm();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  if (DefAlign < 8 && Subtarget.isCortexA9())
+    switch (DefMCID.getOpcode()) {
+    default: break;
+    case ARM::VLD1q8:
+    case ARM::VLD1q16:
+    case ARM::VLD1q32:
+    case ARM::VLD1q64:
+    case ARM::VLD1q8_UPD:
+    case ARM::VLD1q16_UPD:
+    case ARM::VLD1q32_UPD:
+    case ARM::VLD1q64_UPD:
+    case ARM::VLD2d8:
+    case ARM::VLD2d16:
+    case ARM::VLD2d32:
+    case ARM::VLD2q8:
+    case ARM::VLD2q16:
+    case ARM::VLD2q32:
+    case ARM::VLD2d8_UPD:
+    case ARM::VLD2d16_UPD:
+    case ARM::VLD2d32_UPD:
+    case ARM::VLD2q8_UPD:
+    case ARM::VLD2q16_UPD:
+    case ARM::VLD2q32_UPD:
+    case ARM::VLD3d8:
+    case ARM::VLD3d16:
+    case ARM::VLD3d32:
+    case ARM::VLD1d64T:
+    case ARM::VLD3d8_UPD:
+    case ARM::VLD3d16_UPD:
+    case ARM::VLD3d32_UPD:
+    case ARM::VLD1d64T_UPD:
+    case ARM::VLD3q8_UPD:
+    case ARM::VLD3q16_UPD:
+    case ARM::VLD3q32_UPD:
+    case ARM::VLD4d8:
+    case ARM::VLD4d16:
+    case ARM::VLD4d32:
+    case ARM::VLD1d64Q:
+    case ARM::VLD4d8_UPD:
+    case ARM::VLD4d16_UPD:
+    case ARM::VLD4d32_UPD:
+    case ARM::VLD1d64Q_UPD:
+    case ARM::VLD4q8_UPD:
+    case ARM::VLD4q16_UPD:
+    case ARM::VLD4q32_UPD:
+    case ARM::VLD1DUPq8:
+    case ARM::VLD1DUPq16:
+    case ARM::VLD1DUPq32:
+    case ARM::VLD1DUPq8_UPD:
+    case ARM::VLD1DUPq16_UPD:
+    case ARM::VLD1DUPq32_UPD:
+    case ARM::VLD2DUPd8:
+    case ARM::VLD2DUPd16:
+    case ARM::VLD2DUPd32:
+    case ARM::VLD2DUPd8_UPD:
+    case ARM::VLD2DUPd16_UPD:
+    case ARM::VLD2DUPd32_UPD:
+    case ARM::VLD4DUPd8:
+    case ARM::VLD4DUPd16:
+    case ARM::VLD4DUPd32:
+    case ARM::VLD4DUPd8_UPD:
+    case ARM::VLD4DUPd16_UPD:
+    case ARM::VLD4DUPd32_UPD:
+    case ARM::VLD1LNd8:
+    case ARM::VLD1LNd16:
+    case ARM::VLD1LNd32:
+    case ARM::VLD1LNd8_UPD:
+    case ARM::VLD1LNd16_UPD:
+    case ARM::VLD1LNd32_UPD:
+    case ARM::VLD2LNd8:
+    case ARM::VLD2LNd16:
+    case ARM::VLD2LNd32:
+    case ARM::VLD2LNq16:
+    case ARM::VLD2LNq32:
+    case ARM::VLD2LNd8_UPD:
+    case ARM::VLD2LNd16_UPD:
+    case ARM::VLD2LNd32_UPD:
+    case ARM::VLD2LNq16_UPD:
+    case ARM::VLD2LNq32_UPD:
+    case ARM::VLD4LNd8:
+    case ARM::VLD4LNd16:
+    case ARM::VLD4LNd32:
+    case ARM::VLD4LNq16:
+    case ARM::VLD4LNq32:
+    case ARM::VLD4LNd8_UPD:
+    case ARM::VLD4LNd16_UPD:
+    case ARM::VLD4LNd32_UPD:
+    case ARM::VLD4LNq16_UPD:
+    case ARM::VLD4LNq32_UPD:
+      // If the address is not 64-bit aligned, the latencies of these
+      // instructions increases by one.
+      ++Latency;
+      break;
+    }
+
+  return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                    SDNode *DefNode, unsigned DefIdx,
+                                    SDNode *UseNode, unsigned UseIdx) const {
+  if (!DefNode->isMachineOpcode())
+    return 1;
+
+  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
+
+  if (isZeroCost(DefMCID.Opcode))
+    return 0;
+
+  if (!ItinData || ItinData->isEmpty())
+    return DefMCID.mayLoad() ? 3 : 1;
+
+  if (!UseNode->isMachineOpcode()) {
+    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
+    if (Subtarget.isCortexA9())
+      return Latency <= 2 ? 1 : Latency - 1;
+    else
+      return Latency <= 3 ? 1 : Latency - 2;
+  }
+
+  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
+  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+  unsigned DefAlign = !DefMN->memoperands_empty()
+    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+  unsigned UseAlign = !UseMN->memoperands_empty()
+    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+                                  UseMCID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefMCID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  if (DefAlign < 8 && Subtarget.isCortexA9())
+    switch (DefMCID.getOpcode()) {
+    default: break;
+    case ARM::VLD1q8Pseudo:
+    case ARM::VLD1q16Pseudo:
+    case ARM::VLD1q32Pseudo:
+    case ARM::VLD1q64Pseudo:
+    case ARM::VLD1q8Pseudo_UPD:
+    case ARM::VLD1q16Pseudo_UPD:
+    case ARM::VLD1q32Pseudo_UPD:
+    case ARM::VLD1q64Pseudo_UPD:
+    case ARM::VLD2d8Pseudo:
+    case ARM::VLD2d16Pseudo:
+    case ARM::VLD2d32Pseudo:
+    case ARM::VLD2q8Pseudo:
+    case ARM::VLD2q16Pseudo:
+    case ARM::VLD2q32Pseudo:
+    case ARM::VLD2d8Pseudo_UPD:
+    case ARM::VLD2d16Pseudo_UPD:
+    case ARM::VLD2d32Pseudo_UPD:
+    case ARM::VLD2q8Pseudo_UPD:
+    case ARM::VLD2q16Pseudo_UPD:
+    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD3d8Pseudo:
+    case ARM::VLD3d16Pseudo:
+    case ARM::VLD3d32Pseudo:
+    case ARM::VLD1d64TPseudo:
+    case ARM::VLD3d8Pseudo_UPD:
+    case ARM::VLD3d16Pseudo_UPD:
+    case ARM::VLD3d32Pseudo_UPD:
+    case ARM::VLD1d64TPseudo_UPD:
+    case ARM::VLD3q8Pseudo_UPD:
+    case ARM::VLD3q16Pseudo_UPD:
+    case ARM::VLD3q32Pseudo_UPD:
+    case ARM::VLD3q8oddPseudo:
+    case ARM::VLD3q16oddPseudo:
+    case ARM::VLD3q32oddPseudo:
+    case ARM::VLD3q8oddPseudo_UPD:
+    case ARM::VLD3q16oddPseudo_UPD:
+    case ARM::VLD3q32oddPseudo_UPD:
+    case ARM::VLD4d8Pseudo:
+    case ARM::VLD4d16Pseudo:
+    case ARM::VLD4d32Pseudo:
+    case ARM::VLD1d64QPseudo:
+    case ARM::VLD4d8Pseudo_UPD:
+    case ARM::VLD4d16Pseudo_UPD:
+    case ARM::VLD4d32Pseudo_UPD:
+    case ARM::VLD1d64QPseudo_UPD:
+    case ARM::VLD4q8Pseudo_UPD:
+    case ARM::VLD4q16Pseudo_UPD:
+    case ARM::VLD4q32Pseudo_UPD:
+    case ARM::VLD4q8oddPseudo:
+    case ARM::VLD4q16oddPseudo:
+    case ARM::VLD4q32oddPseudo:
+    case ARM::VLD4q8oddPseudo_UPD:
+    case ARM::VLD4q16oddPseudo_UPD:
+    case ARM::VLD4q32oddPseudo_UPD:
+    case ARM::VLD1DUPq8Pseudo:
+    case ARM::VLD1DUPq16Pseudo:
+    case ARM::VLD1DUPq32Pseudo:
+    case ARM::VLD1DUPq8Pseudo_UPD:
+    case ARM::VLD1DUPq16Pseudo_UPD:
+    case ARM::VLD1DUPq32Pseudo_UPD:
+    case ARM::VLD2DUPd8Pseudo:
+    case ARM::VLD2DUPd16Pseudo:
+    case ARM::VLD2DUPd32Pseudo:
+    case ARM::VLD2DUPd8Pseudo_UPD:
+    case ARM::VLD2DUPd16Pseudo_UPD:
+    case ARM::VLD2DUPd32Pseudo_UPD:
+    case ARM::VLD4DUPd8Pseudo:
+    case ARM::VLD4DUPd16Pseudo:
+    case ARM::VLD4DUPd32Pseudo:
+    case ARM::VLD4DUPd8Pseudo_UPD:
+    case ARM::VLD4DUPd16Pseudo_UPD:
+    case ARM::VLD4DUPd32Pseudo_UPD:
+    case ARM::VLD1LNq8Pseudo:
+    case ARM::VLD1LNq16Pseudo:
+    case ARM::VLD1LNq32Pseudo:
+    case ARM::VLD1LNq8Pseudo_UPD:
+    case ARM::VLD1LNq16Pseudo_UPD:
+    case ARM::VLD1LNq32Pseudo_UPD:
+    case ARM::VLD2LNd8Pseudo:
+    case ARM::VLD2LNd16Pseudo:
+    case ARM::VLD2LNd32Pseudo:
+    case ARM::VLD2LNq16Pseudo:
+    case ARM::VLD2LNq32Pseudo:
+    case ARM::VLD2LNd8Pseudo_UPD:
+    case ARM::VLD2LNd16Pseudo_UPD:
+    case ARM::VLD2LNd32Pseudo_UPD:
+    case ARM::VLD2LNq16Pseudo_UPD:
+    case ARM::VLD2LNq32Pseudo_UPD:
+    case ARM::VLD4LNd8Pseudo:
+    case ARM::VLD4LNd16Pseudo:
+    case ARM::VLD4LNd32Pseudo:
+    case ARM::VLD4LNq16Pseudo:
+    case ARM::VLD4LNq32Pseudo:
+    case ARM::VLD4LNd8Pseudo_UPD:
+    case ARM::VLD4LNd16Pseudo_UPD:
+    case ARM::VLD4LNd32Pseudo_UPD:
+    case ARM::VLD4LNq16Pseudo_UPD:
+    case ARM::VLD4LNq32Pseudo_UPD:
+      // If the address is not 64-bit aligned, the latencies of these
+      // instructions increases by one.
+      ++Latency;
+      break;
+    }
+
+  return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      const MachineInstr *MI,
+                                      unsigned *PredCost) const {
+  if (MI->isCopyLike() || MI->isInsertSubreg() ||
+      MI->isRegSequence() || MI->isImplicitDef())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Class = MCID.getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
+    // When predicated, CPSR is an additional source operand for CPSR updating
+    // instructions, this apparently increases their latencies.
+    *PredCost = 1;
+  if (UOps)
+    return ItinData->getStageLatency(Class);
+  return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      SDNode *Node) const {
+  if (!Node->isMachineOpcode())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Opcode = Node->getMachineOpcode();
+  switch (Opcode) {
+  default:
+    return ItinData->getStageLatency(get(Opcode).getSchedClass());
+  case ARM::VLDMQIA:
+  case ARM::VSTMQIA:
+    return 2;
+  }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+  if (Subtarget.isCortexA8() &&
+      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+    // CortexA8 VFP instructions are not pipelined.
+    return true;
+
+  // Hoist VFP / NEON instructions with 4 or higher latency.
+  int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+  if (Latency <= 3)
+    return false;
+  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+                 const MachineInstr *DefMI, unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+  if (DDomain == ARMII::DomainGeneral) {
+    unsigned DefClass = DefMI->getDesc().getSchedClass();
+    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+    return (DefCycle != -1 && DefCycle <= 2);
+  }
+  return false;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+                                     unsigned &AddSubOpc,
+                                     bool &NegAcc, bool &HasLane) const {
+  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+  if (I == MLxEntryMap.end())
+    return false;
+
+  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+  MulOpc = Entry.MulOpc;
+  AddSubOpc = Entry.AddSubOpc;
+  NegAcc = Entry.NegAcc;
+  HasLane = Entry.HasLane;
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 000000000000..507e8974bf7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,503 @@
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINSTRUCTIONINFO_H
+#define ARMBASEINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+
+#define GET_INSTRINFO_HEADER
+#include "ARMGenInstrInfo.inc"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseRegisterInfo;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction Flags.
+
+    //===------------------------------------------------------------------===//
+    // This four-bit field describes the addressing mode used.
+    AddrModeMask  = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
+
+    // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+    // and store ops only.  Generic "updating" flag is used for ld/st multiple.
+    // The index mode enums are declared in ARMBaseInfo.h
+    IndexModeShift = 5,
+    IndexModeMask  = 3 << IndexModeShift,
+
+    //===------------------------------------------------------------------===//
+    // Instruction encoding formats.
+    //
+    FormShift     = 7,
+    FormMask      = 0x3f << FormShift,
+
+    // Pseudo instructions
+    Pseudo        = 0  << FormShift,
+
+    // Multiply instructions
+    MulFrm        = 1  << FormShift,
+
+    // Branch instructions
+    BrFrm         = 2  << FormShift,
+    BrMiscFrm     = 3  << FormShift,
+
+    // Data Processing instructions
+    DPFrm         = 4  << FormShift,
+    DPSoRegFrm    = 5  << FormShift,
+
+    // Load and Store
+    LdFrm         = 6  << FormShift,
+    StFrm         = 7  << FormShift,
+    LdMiscFrm     = 8  << FormShift,
+    StMiscFrm     = 9  << FormShift,
+    LdStMulFrm    = 10 << FormShift,
+
+    LdStExFrm     = 11 << FormShift,
+
+    // Miscellaneous arithmetic instructions
+    ArithMiscFrm  = 12 << FormShift,
+    SatFrm        = 13 << FormShift,
+
+    // Extend instructions
+    ExtFrm        = 14 << FormShift,
+
+    // VFP formats
+    VFPUnaryFrm   = 15 << FormShift,
+    VFPBinaryFrm  = 16 << FormShift,
+    VFPConv1Frm   = 17 << FormShift,
+    VFPConv2Frm   = 18 << FormShift,
+    VFPConv3Frm   = 19 << FormShift,
+    VFPConv4Frm   = 20 << FormShift,
+    VFPConv5Frm   = 21 << FormShift,
+    VFPLdStFrm    = 22 << FormShift,
+    VFPLdStMulFrm = 23 << FormShift,
+    VFPMiscFrm    = 24 << FormShift,
+
+    // Thumb format
+    ThumbFrm      = 25 << FormShift,
+
+    // Miscelleaneous format
+    MiscFrm       = 26 << FormShift,
+
+    // NEON formats
+    NGetLnFrm     = 27 << FormShift,
+    NSetLnFrm     = 28 << FormShift,
+    NDupFrm       = 29 << FormShift,
+    NLdStFrm      = 30 << FormShift,
+    N1RegModImmFrm= 31 << FormShift,
+    N2RegFrm      = 32 << FormShift,
+    NVCVTFrm      = 33 << FormShift,
+    NVDupLnFrm    = 34 << FormShift,
+    N2RegVShLFrm  = 35 << FormShift,
+    N2RegVShRFrm  = 36 << FormShift,
+    N3RegFrm      = 37 << FormShift,
+    N3RegVShFrm   = 38 << FormShift,
+    NVExtFrm      = 39 << FormShift,
+    NVMulSLFrm    = 40 << FormShift,
+    NVTBLFrm      = 41 << FormShift,
+
+    //===------------------------------------------------------------------===//
+    // Misc flags.
+
+    // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+    // it doesn't have a Rn operand.
+    UnaryDP       = 1 << 13,
+
+    // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+    // a 16-bit Thumb instruction if certain conditions are met.
+    Xform16Bit    = 1 << 14,
+
+    //===------------------------------------------------------------------===//
+    // Code domain.
+    DomainShift   = 15,
+    DomainMask    = 7 << DomainShift,
+    DomainGeneral = 0 << DomainShift,
+    DomainVFP     = 1 << DomainShift,
+    DomainNEON    = 2 << DomainShift,
+    DomainNEONA8  = 4 << DomainShift,
+
+    //===------------------------------------------------------------------===//
+    // Field shifts - such shifts are used to set field while generating
+    // machine instructions.
+    //
+    // FIXME: This list will need adjusting/fixing as the MC code emitter
+    // takes shape and the ARMCodeEmitter.cpp bits go away.
+    ShiftTypeShift = 4,
+
+    M_BitShift     = 5,
+    ShiftImmShift  = 5,
+    ShiftShift     = 7,
+    N_BitShift     = 7,
+    ImmHiShift     = 8,
+    SoRotImmShift  = 8,
+    RegRsShift     = 8,
+    ExtRotImmShift = 10,
+    RegRdLoShift   = 12,
+    RegRdShift     = 12,
+    RegRdHiShift   = 16,
+    RegRnShift     = 16,
+    S_BitShift     = 20,
+    W_BitShift     = 21,
+    AM3_I_BitShift = 22,
+    D_BitShift     = 22,
+    U_BitShift     = 23,
+    P_BitShift     = 24,
+    I_BitShift     = 25,
+    CondShift      = 28
+  };
+}
+
+class ARMBaseInstrInfo : public ARMGenInstrInfo {
+  const ARMSubtarget &Subtarget;
+
+protected:
+  // Can be only subclassed.
+  explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
+public:
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+  const ARMSubtarget &getSubtarget() const { return Subtarget; }
+
+  ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const;
+
+  ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                     const ScheduleDAG *DAG) const;
+
+  // Branch analysis.
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  // Predication support.
+  bool isPredicated(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+  }
+
+  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+                      : ARMCC::AL;
+  }
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  virtual bool isPredicable(MachineInstr *MI) const;
+
+  /// GetInstSize - Returns the size of the specified MachineInstr.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
+  virtual void reMaterialize(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI,
+                             unsigned DestReg, unsigned SubIdx,
+                             const MachineInstr *Orig,
+                             const TargetRegisterInfo &TRI) const;
+
+  MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+
+  virtual bool produceSameValue(const MachineInstr *MI0,
+                                const MachineInstr *MI1,
+                                const MachineRegisterInfo *MRI) const;
+
+  /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+  /// determine if two loads are loading from the same base address. It should
+  /// only return true if the base pointers are the same and the only
+  /// differences between the two addresses is the offset. It also returns the
+  /// offsets by reference.
+  virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                       int64_t &Offset1, int64_t &Offset2)const;
+
+  /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
+  /// should be scheduled togther. On some targets if two loads are loading from
+  /// addresses in the same cache line, it's better if they are scheduled
+  /// together. This function takes two integers that represent the load offsets
+  /// from the common base address. It returns true if it decides it's desirable
+  /// to schedule the two loads together. "NumLoads" is the number of loads that
+  /// have already been scheduled after Load1.
+  virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                       int64_t Offset1, int64_t Offset2,
+                                       unsigned NumLoads) const;
+
+  virtual bool isSchedulingBoundary(const MachineInstr *MI,
+                                    const MachineBasicBlock *MBB,
+                                    const MachineFunction &MF) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCycles, unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumT, unsigned ExtraT,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumF, unsigned ExtraF,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCycles,
+                                         const BranchProbability
+                                           &Probability) const {
+    return NumCycles == 1;
+  }
+
+  /// AnalyzeCompare - For a comparison instruction, return the source register
+  /// in SrcReg and the value it compares against in CmpValue. Return true if
+  /// the comparison instruction can be analyzed.
+  virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+                              int &CmpMask, int &CmpValue) const;
+
+  /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
+  /// that we can remove a "comparison with zero".
+  virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+                                    int CmpMask, int CmpValue,
+                                    const MachineRegisterInfo *MRI) const;
+
+  /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+  /// instruction, try to fold the immediate into the use instruction.
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const;
+
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr *MI) const;
+
+  virtual
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx,
+                        const MachineInstr *UseMI, unsigned UseIdx) const;
+  virtual
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        SDNode *DefNode, unsigned DefIdx,
+                        SDNode *UseNode, unsigned UseIdx) const;
+private:
+  int getVLDMDefCycle(const InstrItineraryData *ItinData,
+                      const MCInstrDesc &DefMCID,
+                      unsigned DefClass,
+                      unsigned DefIdx, unsigned DefAlign) const;
+  int getLDMDefCycle(const InstrItineraryData *ItinData,
+                     const MCInstrDesc &DefMCID,
+                     unsigned DefClass,
+                     unsigned DefIdx, unsigned DefAlign) const;
+  int getVSTMUseCycle(const InstrItineraryData *ItinData,
+                      const MCInstrDesc &UseMCID,
+                      unsigned UseClass,
+                      unsigned UseIdx, unsigned UseAlign) const;
+  int getSTMUseCycle(const InstrItineraryData *ItinData,
+                     const MCInstrDesc &UseMCID,
+                     unsigned UseClass,
+                     unsigned UseIdx, unsigned UseAlign) const;
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        const MCInstrDesc &DefMCID,
+                        unsigned DefIdx, unsigned DefAlign,
+                        const MCInstrDesc &UseMCID,
+                        unsigned UseIdx, unsigned UseAlign) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      SDNode *Node) const;
+
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const;
+  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx) const;
+
+private:
+  /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+  /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+  /// MLx table.
+  DenseMap<unsigned, unsigned> MLxEntryMap;
+
+  /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+  /// stalls when scheduled together with fp MLA / MLS opcodes.
+  SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+  /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+  /// instruction.
+  bool isFpMLxInstruction(unsigned Opcode) const {
+    return MLxEntryMap.count(Opcode);
+  }
+
+  /// isFpMLxInstruction - This version also returns the multiply opcode and the
+  /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+  /// the MLX instructions with an extra lane operand.
+  bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+                          unsigned &AddSubOpc, bool &NegAcc,
+                          bool &HasLane) const;
+
+  /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+  /// will cause stalls when scheduled after (within 4-cycle window) a fp
+  /// MLA / MLS instruction.
+  bool canCauseFpMLxStall(unsigned Opcode) const {
+    return MLxHazardOpcodes.count(Opcode);
+  }
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+  return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+                                          bool isDead = false) {
+  return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+  return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+  return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+  return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+    Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+  return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+int getMatchingCondBranchOpcode(int Opc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                             unsigned DestReg, unsigned BaseReg, int NumBytes,
+                             ARMCC::CondCodes Pred, unsigned PredReg,
+                             const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                            unsigned DestReg, unsigned BaseReg, int NumBytes,
+                            ARMCC::CondCodes Pred, unsigned PredReg,
+                            const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const ARMBaseRegisterInfo& MRI,
+                               unsigned MIFlags = 0);
+
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                          unsigned FrameReg, int &Offset,
+                          const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                         unsigned FrameReg, int &Offset,
+                         const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 000000000000..ba422952ac1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,1287 @@
+//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+          cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+          cl::desc("Enable pre-regalloc stack frame index allocation"));
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+          cl::desc("Enable use of a base pointer for complex stack frames"));
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+                                         const ARMSubtarget &sti)
+  : ARMGenRegisterInfo(), TII(tii), STI(sti),
+    FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+    BasePtr(ARM::R6) {
+}
+
+const unsigned*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const unsigned CalleeSavedRegs[] = {
+    ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+    ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+
+  static const unsigned DarwinCalleeSavedRegs[] = {
+    // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
+    // register.
+    ARM::LR,  ARM::R7,  ARM::R6, ARM::R5, ARM::R4,
+    ARM::R11, ARM::R10, ARM::R8,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+  return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // FIXME: avoid re-calculating this every time.
+  BitVector Reserved(getNumRegs());
+  Reserved.set(ARM::SP);
+  Reserved.set(ARM::PC);
+  Reserved.set(ARM::FPSCR);
+  if (TFI->hasFP(MF))
+    Reserved.set(FramePtr);
+  if (hasBasePointer(MF))
+    Reserved.set(BasePtr);
+  // Some targets reserve R9.
+  if (STI.isR9Reserved())
+    Reserved.set(ARM::R9);
+  // Reserve D16-D31 if the subtarget doesn't support them.
+  if (!STI.hasVFP3() || STI.hasD16()) {
+    assert(ARM::D31 == ARM::D16 + 15);
+    for (unsigned i = 0; i != 16; ++i)
+      Reserved.set(ARM::D16 + i);
+  }
+  return Reserved;
+}
+
+bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
+                                        unsigned Reg) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  switch (Reg) {
+  default: break;
+  case ARM::SP:
+  case ARM::PC:
+    return true;
+  case ARM::R6:
+    if (hasBasePointer(MF))
+      return true;
+    break;
+  case ARM::R7:
+  case ARM::R11:
+    if (FramePtr == Reg && TFI->hasFP(MF))
+      return true;
+    break;
+  case ARM::R9:
+    return STI.isR9Reserved();
+  }
+
+  return false;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                              const TargetRegisterClass *B,
+                                              unsigned SubIdx) const {
+  switch (SubIdx) {
+  default: return 0;
+  case ARM::ssub_0:
+  case ARM::ssub_1:
+  case ARM::ssub_2:
+  case ARM::ssub_3: {
+    // S sub-registers.
+    if (A->getSize() == 8) {
+      if (B == &ARM::SPR_8RegClass)
+        return &ARM::DPR_8RegClass;
+      assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
+      if (A == &ARM::DPR_8RegClass)
+        return A;
+      return &ARM::DPR_VFP2RegClass;
+    }
+
+    if (A->getSize() == 16) {
+      if (B == &ARM::SPR_8RegClass)
+        return &ARM::QPR_8RegClass;
+      return &ARM::QPR_VFP2RegClass;
+    }
+
+    if (A->getSize() == 32) {
+      if (B == &ARM::SPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return &ARM::QQPR_VFP2RegClass;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    return 0;  // Do not allow coalescing!
+  }
+  case ARM::dsub_0:
+  case ARM::dsub_1:
+  case ARM::dsub_2:
+  case ARM::dsub_3: {
+    // D sub-registers.
+    if (A->getSize() == 16) {
+      if (B == &ARM::DPR_VFP2RegClass)
+        return &ARM::QPR_VFP2RegClass;
+      if (B == &ARM::DPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    if (A->getSize() == 32) {
+      if (B == &ARM::DPR_VFP2RegClass)
+        return &ARM::QQPR_VFP2RegClass;
+      if (B == &ARM::DPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    if (B != &ARM::DPRRegClass)
+      return 0;  // Do not allow coalescing!
+    return A;
+  }
+  case ARM::dsub_4:
+  case ARM::dsub_5:
+  case ARM::dsub_6:
+  case ARM::dsub_7: {
+    // D sub-registers of QQQQ registers.
+    if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+
+  case ARM::qsub_0:
+  case ARM::qsub_1: {
+    // Q sub-registers.
+    if (A->getSize() == 32) {
+      if (B == &ARM::QPR_VFP2RegClass)
+        return &ARM::QQPR_VFP2RegClass;
+      if (B == &ARM::QPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    if (B == &ARM::QPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+  case ARM::qsub_2:
+  case ARM::qsub_3: {
+    // Q sub-registers of QQQQ registers.
+    if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+  }
+  return 0;
+}
+
+bool
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
+                                          SmallVectorImpl<unsigned> &SubIndices,
+                                          unsigned &NewSubIdx) const {
+
+  unsigned Size = RC->getSize() * 8;
+  if (Size < 6)
+    return 0;
+
+  NewSubIdx = 0;  // Whole register.
+  unsigned NumRegs = SubIndices.size();
+  if (NumRegs == 8) {
+    // 8 D registers -> 1 QQQQ register.
+    return (Size == 512 &&
+            SubIndices[0] == ARM::dsub_0 &&
+            SubIndices[1] == ARM::dsub_1 &&
+            SubIndices[2] == ARM::dsub_2 &&
+            SubIndices[3] == ARM::dsub_3 &&
+            SubIndices[4] == ARM::dsub_4 &&
+            SubIndices[5] == ARM::dsub_5 &&
+            SubIndices[6] == ARM::dsub_6 &&
+            SubIndices[7] == ARM::dsub_7);
+  } else if (NumRegs == 4) {
+    if (SubIndices[0] == ARM::qsub_0) {
+      // 4 Q registers -> 1 QQQQ register.
+      return (Size == 512 &&
+              SubIndices[1] == ARM::qsub_1 &&
+              SubIndices[2] == ARM::qsub_2 &&
+              SubIndices[3] == ARM::qsub_3);
+    } else if (SubIndices[0] == ARM::dsub_0) {
+      // 4 D registers -> 1 QQ register.
+      if (Size >= 256 &&
+          SubIndices[1] == ARM::dsub_1 &&
+          SubIndices[2] == ARM::dsub_2 &&
+          SubIndices[3] == ARM::dsub_3) {
+        if (Size == 512)
+          NewSubIdx = ARM::qqsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_4) {
+      // 4 D registers -> 1 QQ register (2nd).
+      if (Size == 512 &&
+          SubIndices[1] == ARM::dsub_5 &&
+          SubIndices[2] == ARM::dsub_6 &&
+          SubIndices[3] == ARM::dsub_7) {
+        NewSubIdx = ARM::qqsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_0) {
+      // 4 S registers -> 1 Q register.
+      if (Size >= 128 &&
+          SubIndices[1] == ARM::ssub_1 &&
+          SubIndices[2] == ARM::ssub_2 &&
+          SubIndices[3] == ARM::ssub_3) {
+        if (Size >= 256)
+          NewSubIdx = ARM::qsub_0;
+        return true;
+      }
+    }
+  } else if (NumRegs == 2) {
+    if (SubIndices[0] == ARM::qsub_0) {
+      // 2 Q registers -> 1 QQ register.
+      if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+        if (Size == 512)
+          NewSubIdx = ARM::qqsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::qsub_2) {
+      // 2 Q registers -> 1 QQ register (2nd).
+      if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+        NewSubIdx = ARM::qqsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_0) {
+      // 2 D registers -> 1 Q register.
+      if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+        if (Size >= 256)
+          NewSubIdx = ARM::qsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_2) {
+      // 2 D registers -> 1 Q register (2nd).
+      if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+        NewSubIdx = ARM::qsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_4) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+        NewSubIdx = ARM::qsub_2;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_6) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+        NewSubIdx = ARM::qsub_3;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_0) {
+      // 2 S registers -> 1 D register.
+      if (SubIndices[1] == ARM::ssub_1) {
+        if (Size >= 128)
+          NewSubIdx = ARM::dsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_2) {
+      // 2 S registers -> 1 D register (2nd).
+      if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+        NewSubIdx = ARM::dsub_1;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+const TargetRegisterClass*
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+                                                                         const {
+  const TargetRegisterClass *Super = RC;
+  TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+  do {
+    switch (Super->getID()) {
+    case ARM::GPRRegClassID:
+    case ARM::SPRRegClassID:
+    case ARM::DPRRegClassID:
+    case ARM::QPRRegClassID:
+    case ARM::QQPRRegClassID:
+    case ARM::QQQQPRRegClassID:
+      return Super;
+    }
+    Super = *I++;
+  } while (Super);
+  return RC;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return ARM::GPRRegisterClass;
+}
+
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                         MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case ARM::tGPRRegClassID:
+    return TFI->hasFP(MF) ? 4 : 5;
+  case ARM::GPRRegClassID: {
+    unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+    return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+  }
+  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
+  case ARM::DPRRegClassID:
+    return 32 - 10;
+  }
+}
+
+/// getRawAllocationOrder - Returns the register allocation order for a
+/// specified register class with a target-dependent hint.
+ArrayRef<unsigned>
+ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
+                                           unsigned HintType, unsigned HintReg,
+                                           const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  // Alternative register allocation orders when favoring even / odd registers
+  // of register pairs.
+
+  // No FP, R9 is available.
+  static const unsigned GPREven1[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd1[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R7, R9 is available.
+  static const unsigned GPREven2[] = {
+    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd2[] = {
+    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R11, R9 is available.
+  static const unsigned GPREven3[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9
+  };
+  static const unsigned GPROdd3[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+    ARM::R8
+  };
+
+  // No FP, R9 is not available.
+  static const unsigned GPREven4[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd4[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R7, R9 is not available.
+  static const unsigned GPREven5[] = {
+    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd5[] = {
+    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R11, R9 is not available.
+  static const unsigned GPREven6[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+  };
+  static const unsigned GPROdd6[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+  };
+
+  // We only support even/odd hints for GPR and rGPR.
+  if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
+    return RC->getRawAllocationOrder(MF);
+
+  if (HintType == ARMRI::RegPairEven) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return RC->getRawAllocationOrder(MF);
+
+    if (!TFI->hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return ArrayRef<unsigned>(GPREven1);
+      else
+        return ArrayRef<unsigned>(GPREven4);
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return ArrayRef<unsigned>(GPREven2);
+      else
+        return ArrayRef<unsigned>(GPREven5);
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return ArrayRef<unsigned>(GPREven3);
+      else
+        return ArrayRef<unsigned>(GPREven6);
+    }
+  } else if (HintType == ARMRI::RegPairOdd) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return RC->getRawAllocationOrder(MF);
+
+    if (!TFI->hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return ArrayRef<unsigned>(GPROdd1);
+      else
+        return ArrayRef<unsigned>(GPROdd4);
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return ArrayRef<unsigned>(GPROdd2);
+      else
+        return ArrayRef<unsigned>(GPROdd5);
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return ArrayRef<unsigned>(GPROdd3);
+      else
+        return ArrayRef<unsigned>(GPROdd6);
+    }
+  }
+  return RC->getRawAllocationOrder(MF);
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                         const MachineFunction &MF) const {
+  if (Reg == 0 || !isPhysicalRegister(Reg))
+    return 0;
+  if (Type == 0)
+    return Reg;
+  else if (Type == (unsigned)ARMRI::RegPairOdd)
+    // Odd register.
+    return getRegisterPairOdd(Reg, MF);
+  else if (Type == (unsigned)ARMRI::RegPairEven)
+    // Even register.
+    return getRegisterPairEven(Reg, MF);
+  return 0;
+}
+
+void
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                        MachineFunction &MF) const {
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+       Hint.first == (unsigned)ARMRI::RegPairEven) &&
+      TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+    // If 'Reg' is one of the even / odd register pair and it's now changed
+    // (e.g. coalesced) into a different register. The other register of the
+    // pair allocation hint must be updated to reflect the relationship
+    // change.
+    unsigned OtherReg = Hint.second;
+    Hint = MRI->getRegAllocationHint(OtherReg);
+    if (Hint.second == Reg)
+      // Make sure the pair has not already divorced.
+      MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+  }
+}
+
+bool
+ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+  // CortexA9 has a Write-after-write hazard for NEON registers.
+  if (!STI.isCortexA9())
+    return false;
+
+  switch (RC->getID()) {
+  case ARM::DPRRegClassID:
+  case ARM::DPR_8RegClassID:
+  case ARM::DPR_VFP2RegClassID:
+  case ARM::QPRRegClassID:
+  case ARM::QPR_8RegClassID:
+  case ARM::QPR_VFP2RegClassID:
+  case ARM::SPRRegClassID:
+  case ARM::SPR_8RegClassID:
+    // Avoid reusing S, D, and Q registers.
+    // Don't increase register pressure for QQ and QQQQ.
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  if (!EnableBasePointer)
+    return false;
+
+  if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+    return true;
+
+  // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+  // negative range for ldr/str (255), and thumb1 is positive offsets only.
+  // It's going to be better to use the SP or Base Pointer instead. When there
+  // are variable sized objects, we can't reference off of the SP, so we
+  // reserve a Base Pointer.
+  if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+    // Conservatively estimate whether the negative offset from the frame
+    // pointer will be sufficient to reach. If a function has a smallish
+    // frame, it's less likely to have lots of spills and callee saved
+    // space, so it's all more likely to be within range of the frame pointer.
+    // If it's wrong, the scavenger will still enable access to work, it just
+    // won't be optimal.
+    if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  // We can't realign the stack if:
+  // 1. Dynamic stack realignment is explicitly disabled,
+  // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+  // 3. There are VLAs in the function and the base pointer is disabled.
+  return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+          (!MFI->hasVarSizedObjects() || EnableBasePointer));
+}
+
+bool ARMBaseRegisterInfo::
+needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+                               F->hasFnAttr(Attribute::StackAlignment));
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+    return true;
+  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+    || needsStackRealignment(MF);
+}
+
+unsigned ARMBaseRegisterInfo::getRARegister() const {
+  return ARM::LR;
+}
+
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
+    return FramePtr;
+  return ARM::SP;
+}
+
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int ARMBaseRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+  return ARMGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                              const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R1:
+    return ARM::R0;
+  case ARM::R3:
+    return ARM::R2;
+  case ARM::R5:
+    return ARM::R4;
+  case ARM::R7:
+    return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+      ? 0 : ARM::R6;
+  case ARM::R9:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R8;
+  case ARM::R11:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+  case ARM::S1:
+    return ARM::S0;
+  case ARM::S3:
+    return ARM::S2;
+  case ARM::S5:
+    return ARM::S4;
+  case ARM::S7:
+    return ARM::S6;
+  case ARM::S9:
+    return ARM::S8;
+  case ARM::S11:
+    return ARM::S10;
+  case ARM::S13:
+    return ARM::S12;
+  case ARM::S15:
+    return ARM::S14;
+  case ARM::S17:
+    return ARM::S16;
+  case ARM::S19:
+    return ARM::S18;
+  case ARM::S21:
+    return ARM::S20;
+  case ARM::S23:
+    return ARM::S22;
+  case ARM::S25:
+    return ARM::S24;
+  case ARM::S27:
+    return ARM::S26;
+  case ARM::S29:
+    return ARM::S28;
+  case ARM::S31:
+    return ARM::S30;
+
+  case ARM::D1:
+    return ARM::D0;
+  case ARM::D3:
+    return ARM::D2;
+  case ARM::D5:
+    return ARM::D4;
+  case ARM::D7:
+    return ARM::D6;
+  case ARM::D9:
+    return ARM::D8;
+  case ARM::D11:
+    return ARM::D10;
+  case ARM::D13:
+    return ARM::D12;
+  case ARM::D15:
+    return ARM::D14;
+  case ARM::D17:
+    return ARM::D16;
+  case ARM::D19:
+    return ARM::D18;
+  case ARM::D21:
+    return ARM::D20;
+  case ARM::D23:
+    return ARM::D22;
+  case ARM::D25:
+    return ARM::D24;
+  case ARM::D27:
+    return ARM::D26;
+  case ARM::D29:
+    return ARM::D28;
+  case ARM::D31:
+    return ARM::D30;
+  }
+
+  return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
+                                             const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R0:
+    return ARM::R1;
+  case ARM::R2:
+    return ARM::R3;
+  case ARM::R4:
+    return ARM::R5;
+  case ARM::R6:
+    return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+      ? 0 : ARM::R7;
+  case ARM::R8:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R9;
+  case ARM::R10:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+  case ARM::S0:
+    return ARM::S1;
+  case ARM::S2:
+    return ARM::S3;
+  case ARM::S4:
+    return ARM::S5;
+  case ARM::S6:
+    return ARM::S7;
+  case ARM::S8:
+    return ARM::S9;
+  case ARM::S10:
+    return ARM::S11;
+  case ARM::S12:
+    return ARM::S13;
+  case ARM::S14:
+    return ARM::S15;
+  case ARM::S16:
+    return ARM::S17;
+  case ARM::S18:
+    return ARM::S19;
+  case ARM::S20:
+    return ARM::S21;
+  case ARM::S22:
+    return ARM::S23;
+  case ARM::S24:
+    return ARM::S25;
+  case ARM::S26:
+    return ARM::S27;
+  case ARM::S28:
+    return ARM::S29;
+  case ARM::S30:
+    return ARM::S31;
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D14:
+    return ARM::D15;
+  case ARM::D16:
+    return ARM::D17;
+  case ARM::D18:
+    return ARM::D19;
+  case ARM::D20:
+    return ARM::D21;
+  case ARM::D22:
+    return ARM::D23;
+  case ARM::D24:
+    return ARM::D25;
+  case ARM::D26:
+    return ARM::D27;
+  case ARM::D28:
+    return ARM::D29;
+  case ARM::D30:
+    return ARM::D31;
+  }
+
+  return 0;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator &MBBI,
+                  DebugLoc dl,
+                  unsigned DestReg, unsigned SubIdx, int Val,
+                  ARMCC::CondCodes Pred,
+                  unsigned PredReg, unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  const Constant *C =
+        ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx)
+    .addImm(0).addImm(Pred).addReg(PredReg)
+    .setMIFlags(MIFlags);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+  return EnableLocalStackAlloc;
+}
+
+static void
+emitSPUpdate(bool isARM,
+             MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             DebugLoc dl, const ARMBaseInstrInfo &TII,
+             int NumBytes,
+             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+  if (isARM)
+    emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                            Pred, PredReg, TII);
+  else
+    emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                           Pred, PredReg, TII);
+}
+
+
+void ARMBaseRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      assert(!AFI->isThumb1OnlyFunction() &&
+             "This eliminateCallFramePseudoInstr does not support Thumb1!");
+      bool isARM = !AFI->isThumbFunction();
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      int PIdx = Old->findFirstPredOperandIdx();
+      ARMCC::CondCodes Pred = (PIdx == -1)
+        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+        unsigned PredReg = Old->getOperand(2).getReg();
+        emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
+      } else {
+        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+        unsigned PredReg = Old->getOperand(3).getReg();
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+  const MCInstrDesc &Desc = MI->getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  int64_t InstrOffs = 0;;
+  int Scale = 1;
+  unsigned ImmIdx = 0;
+  switch (AddrMode) {
+  case ARMII::AddrModeT2_i8:
+  case ARMII::AddrModeT2_i12:
+  case ARMII::AddrMode_i12:
+    InstrOffs = MI->getOperand(Idx+1).getImm();
+    Scale = 1;
+    break;
+  case ARMII::AddrMode5: {
+    // VFP address mode.
+    const MachineOperand &OffOp = MI->getOperand(Idx+1);
+    InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+    if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+      InstrOffs = -InstrOffs;
+    Scale = 4;
+    break;
+  }
+  case ARMII::AddrMode2: {
+    ImmIdx = Idx+2;
+    InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+    if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+      InstrOffs = -InstrOffs;
+    break;
+  }
+  case ARMII::AddrMode3: {
+    ImmIdx = Idx+2;
+    InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+    if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+      InstrOffs = -InstrOffs;
+    break;
+  }
+  case ARMII::AddrModeT1_s: {
+    ImmIdx = Idx+1;
+    InstrOffs = MI->getOperand(ImmIdx).getImm();
+    Scale = 4;
+    break;
+  }
+  default:
+    llvm_unreachable("Unsupported addressing mode!");
+    break;
+  }
+
+  return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+  for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+    assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+  }
+
+  // It's the load/store FI references that cause issues, as it can be difficult
+  // to materialize the offset if it won't fit in the literal field. Estimate
+  // based on the size of the local frame and some conservative assumptions
+  // about the rest of the stack frame (note, this is pre-regalloc, so
+  // we don't know everything for certain yet) whether this offset is likely
+  // to be out of range of the immediate. Return true if so.
+
+  // We only generate virtual base registers for loads and stores, so
+  // return false for everything else.
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+  case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
+  case ARM::t2LDRi12: case ARM::t2LDRi8:
+  case ARM::t2STRi12: case ARM::t2STRi8:
+  case ARM::VLDRS: case ARM::VLDRD:
+  case ARM::VSTRS: case ARM::VSTRD:
+  case ARM::tSTRspi: case ARM::tLDRspi:
+    if (ForceAllBaseRegAlloc)
+      return true;
+    break;
+  default:
+    return false;
+  }
+
+  // Without a virtual base register, if the function has variable sized
+  // objects, all fixed-size local references will be via the frame pointer,
+  // Approximate the offset and see if it's legal for the instruction.
+  // Note that the incoming offset is based on the SP value at function entry,
+  // so it'll be negative.
+  MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Estimate an offset from the frame pointer.
+  // Conservatively assume all callee-saved registers get pushed. R4-R6
+  // will be earlier than the FP, so we ignore those.
+  // R7, LR
+  int64_t FPOffset = Offset - 8;
+  // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+  if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+    FPOffset -= 80;
+  // Estimate an offset from the stack pointer.
+  // The incoming offset is relating to the SP at the start of the function,
+  // but when we access the local it'll be relative to the SP after local
+  // allocation, so adjust our SP-relative offset by that allocation size.
+  Offset = -Offset;
+  Offset += MFI->getLocalFrameSize();
+  // Assume that we'll have at least some spill slots allocated.
+  // FIXME: This is a total SWAG number. We should run some statistics
+  //        and pick a real one.
+  Offset += 128; // 128 bytes of spill slots
+
+  // If there is a frame pointer, try using it.
+  // The FP is only available if there is no dynamic realignment. We
+  // don't know for sure yet whether we'll need that, so we guess based
+  // on whether there are any local variables that would trigger it.
+  unsigned StackAlign = TFI->getStackAlignment();
+  if (TFI->hasFP(MF) &&
+      !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+    if (isFrameOffsetLegal(MI, FPOffset))
+      return false;
+  }
+  // If we can reference via the stack pointer, try that.
+  // FIXME: This (and the code that resolves the references) can be improved
+  //        to only disallow SP relative references in the live range of
+  //        the VLA(s). In practice, it's unclear how much difference that
+  //        would make, but it may be worth doing.
+  if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+    return false;
+
+  // The offset likely isn't legal, we want to allocate a virtual base register.
+  return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                             unsigned BaseReg, int FrameIdx,
+                             int64_t Offset) const {
+  ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+  unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+    (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+  MachineBasicBlock::iterator Ins = MBB->begin();
+  DebugLoc DL;                  // Defaults to "unknown"
+  if (Ins != MBB->end())
+    DL = Ins->getDebugLoc();
+
+  const MCInstrDesc &MCID = TII.get(ADDriOpc);
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
+
+  MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+    .addFrameIndex(FrameIdx).addImm(Offset);
+
+  if (!AFI->isThumb1OnlyFunction())
+    AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                       unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int Off = Offset; // ARM doesn't need the general 64-bit offsets
+  unsigned i = 0;
+
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This resolveFrameIndex does not support Thumb1!");
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  bool Done = false;
+  if (!AFI->isThumbFunction())
+    Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+  else {
+    assert(AFI->isThumb2Function());
+    Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+  }
+  assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                             int64_t Offset) const {
+  const MCInstrDesc &Desc = MI->getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  unsigned i = 0;
+
+  while (!MI->getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+  }
+
+  // AddrMode4 and AddrMode6 cannot handle any offset.
+  if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+    return Offset == 0;
+
+  unsigned NumBits = 0;
+  unsigned Scale = 1;
+  bool isSigned = true;
+  switch (AddrMode) {
+  case ARMII::AddrModeT2_i8:
+  case ARMII::AddrModeT2_i12:
+    // i8 supports only negative, and i12 supports only positive, so
+    // based on Offset sign, consider the appropriate instruction
+    Scale = 1;
+    if (Offset < 0) {
+      NumBits = 8;
+      Offset = -Offset;
+    } else {
+      NumBits = 12;
+    }
+    break;
+  case ARMII::AddrMode5:
+    // VFP address mode.
+    NumBits = 8;
+    Scale = 4;
+    break;
+  case ARMII::AddrMode_i12:
+  case ARMII::AddrMode2:
+    NumBits = 12;
+    break;
+  case ARMII::AddrMode3:
+    NumBits = 8;
+    break;
+  case ARMII::AddrModeT1_s:
+    NumBits = 5;
+    Scale = 4;
+    isSigned = false;
+    break;
+  default:
+    llvm_unreachable("Unsupported addressing mode!");
+    break;
+  }
+
+  Offset += getFrameIndexInstrOffset(MI, i);
+  // Make sure the offset is encodable for instructions that scale the
+  // immediate.
+  if ((Offset & (Scale-1)) != 0)
+    return false;
+
+  if (isSigned && Offset < 0)
+    Offset = -Offset;
+
+  unsigned Mask = (1 << NumBits) - 1;
+  if ((unsigned)Offset <= Mask * Scale)
+    return true;
+
+  return false;
+}
+
+void
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                         int SPAdj, RegScavenger *RS) const {
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const ARMFrameLowering *TFI =
+    static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This eliminateFrameIndex does not support Thumb1!");
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  unsigned FrameReg;
+
+  int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // Modify MI as necessary to handle as much of 'Offset' as possible
+  bool Done = false;
+  if (!AFI->isThumbFunction())
+    Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+  else {
+    assert(AFI->isThumb2Function());
+    Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+  }
+  if (Done)
+    return;
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert((Offset ||
+          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
+         "This code isn't needed if offset already handled!");
+
+  unsigned ScratchReg = 0;
+  int PIdx = MI.findFirstPredOperandIdx();
+  ARMCC::CondCodes Pred = (PIdx == -1)
+    ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+  unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+  if (Offset == 0)
+    // Must be addrmode4/6.
+    MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+  else {
+    ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+    if (!AFI->isThumbFunction())
+      emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+                              Offset, Pred, PredReg, TII);
+    else {
+      assert(AFI->isThumb2Function());
+      emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+                             Offset, Pred, PredReg, TII);
+    }
+    // Update the original instruction to use the scratch register.
+    MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 000000000000..b4b4059e7361
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,216 @@
+//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEREGISTERINFO_H
+#define ARMBASEREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "ARMGenRegisterInfo.inc"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+/// Register allocation hints.
+namespace ARMRI {
+  enum {
+    RegPairOdd  = 1,
+    RegPairEven = 2
+  };
+}
+
+/// isARMLowRegister - Returns true if the register is low register r0-r7.
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  case R0:  case R1:  case R2:  case R3:
+  case R4:  case R5:  case R6:  case R7:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case R0:  case R1:  case R2:  case R3:
+    case R4:  case R5:  case R6:  case R7:
+    case LR:  case SP:  case PC:
+      return true;
+    case R8:  case R9:  case R10: case R11:
+      // For darwin we want r7 and lr to be next to each other.
+      return !isDarwin;
+    default:
+      return false;
+  }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case R8: case R9: case R10: case R11:
+      // Darwin has this second area.
+      return isDarwin;
+    default:
+      return false;
+  }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case D15: case D14: case D13: case D12:
+    case D11: case D10: case D9:  case D8:
+      return true;
+    default:
+      return false;
+  }
+}
+
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+  const ARMBaseInstrInfo &TII;
+  const ARMSubtarget &STI;
+
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+  /// BasePtr - ARM physical register used as a base ptr in complex stack
+  /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+  /// variable size stack objects.
+  unsigned BasePtr;
+
+  // Can be only subclassed.
+  explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+                               const ARMSubtarget &STI);
+
+  // Return the opcode that implements 'Op', or 0 if no opcode
+  unsigned getOpcode(int Op) const;
+
+public:
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
+  /// canCombineSubRegIndices - Given a register class and a list of
+  /// subregister indices, return true if it's possible to combine the
+  /// subregister indices into one that corresponds to a larger
+  /// subregister. Return the new subregister index by reference. Note the
+  /// new index may be zero if the given subregisters can be combined to
+  /// form the whole register.
+  virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+                                       SmallVectorImpl<unsigned> &SubIndices,
+                                       unsigned &NewSubIdx) const;
+
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+
+  ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+                                           unsigned HintType, unsigned HintReg,
+                                           const MachineFunction &MF) const;
+
+  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                               const MachineFunction &MF) const;
+
+  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                          MachineFunction &MF) const;
+
+  virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
+  bool hasBasePointer(const MachineFunction &MF) const;
+
+  bool canRealignStack(const MachineFunction &MF) const;
+  bool needsStackRealignment(const MachineFunction &MF) const;
+  int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+  void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                    unsigned BaseReg, int FrameIdx,
+                                    int64_t Offset) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
+  bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
+  bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+  unsigned getBaseRegister() const { return BasePtr; }
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+  bool isLowRegister(unsigned Reg) const;
+
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator &MBBI,
+                                 DebugLoc dl,
+                                 unsigned DestReg, unsigned SubIdx,
+                                 int Val,
+                                 ARMCC::CondCodes Pred = ARMCC::AL,
+                                 unsigned PredReg = 0,
+                                 unsigned MIFlags = MachineInstr::NoFlags)const;
+
+  /// Code Generation virtual methods...
+  virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+  virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+  virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                           MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator I) const;
+
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                   int SPAdj, RegScavenger *RS = NULL) const;
+
+private:
+  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 000000000000..69eddf03ec94
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,131 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.08).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+  enum SpecialAttr {
+    // This is for the .cpu asm attr. It translates into one or more
+    // AttrType (below) entries in the .ARM.attributes section in the ELF.
+    SEL_CPU 
+  };
+
+  enum AttrType {
+    // Rest correspond to ELF/.ARM.attributes
+    File                      = 1,
+    Section                   = 2,
+    Symbol                    = 3,
+    CPU_raw_name              = 4,
+    CPU_name                  = 5,
+    CPU_arch                  = 6,
+    CPU_arch_profile          = 7,
+    ARM_ISA_use               = 8,
+    THUMB_ISA_use             = 9,
+    VFP_arch                  = 10,
+    WMMX_arch                 = 11,
+    Advanced_SIMD_arch        = 12,
+    PCS_config                = 13,
+    ABI_PCS_R9_use            = 14,
+    ABI_PCS_RW_data           = 15,
+    ABI_PCS_RO_data           = 16,
+    ABI_PCS_GOT_use           = 17,
+    ABI_PCS_wchar_t           = 18,
+    ABI_FP_rounding           = 19,
+    ABI_FP_denormal           = 20,
+    ABI_FP_exceptions         = 21,
+    ABI_FP_user_exceptions    = 22,
+    ABI_FP_number_model       = 23,
+    ABI_align8_needed         = 24,
+    ABI_align8_preserved      = 25,
+    ABI_enum_size             = 26,
+    ABI_HardFP_use            = 27,
+    ABI_VFP_args              = 28,
+    ABI_WMMX_args             = 29,
+    ABI_optimization_goals    = 30,
+    ABI_FP_optimization_goals = 31,
+    compatibility             = 32,
+    CPU_unaligned_access      = 34,
+    VFP_HP_extension          = 36,
+    ABI_FP_16bit_format       = 38,
+    MPextension_use           = 42, // was 70, 2.08 ABI
+    DIV_use                   = 44,
+    nodefaults                = 64,
+    also_compatible_with      = 65,
+    T2EE_use                  = 66,
+    conformance               = 67,
+    Virtualization_use        = 68,
+    MPextension_use_old       = 70
+  };
+
+  // Magic numbers for .ARM.attributes
+  enum AttrMagic {
+    Format_Version  = 0x41
+  };
+
+  // Legal Values for CPU_arch, (=6), uleb128
+  enum CPUArch {
+    Pre_v4       = 0,
+    v4       = 1,   // e.g. SA110
+    v4T      = 2,   // e.g. ARM7TDMI
+    v5T      = 3,   // e.g. ARM9TDMI
+    v5TE     = 4,   // e.g. ARM946E_S
+    v5TEJ    = 5,   // e.g. ARM926EJ_S
+    v6       = 6,   // e.g. ARM1136J_S
+    v6KZ     = 7,   // e.g. ARM1176JZ_S
+    v6T2     = 8,   // e.g. ARM1156T2F_S
+    v6K      = 9,   // e.g. ARM1136J_S
+    v7       = 10,  // e.g. Cortex A8, Cortex M3
+    v6_M     = 11,  // e.g. Cortex M1
+    v6S_M    = 12,  // v6_M with the System extensions
+    v7E_M    = 13   // v7_M with DSP extensions
+  };
+
+  enum CPUArchProfile { // (=7), uleb128 
+    Not_Applicable = 0, // pre v7, or cross-profile code
+    ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+    RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+    MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+    SystemProfile = (0x53) // 'S' Application or real-time profile
+  };
+
+  // The following have a lot of common use cases
+  enum { 
+    //ARMISAUse (=8), uleb128  and THUMBISAUse (=9), uleb128
+    Not_Allowed = 0,
+    Allowed = 1,
+
+    // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+    AllowFPv2  = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+    AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+    AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 
+    AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) 
+    AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+    // Tag_WMMX_arch, (=11), uleb128
+    AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+    
+    // Tag_WMMX_arch, (=11), uleb128
+    AllowWMMXv1 = 2,  // The user permitted this entity to use WMMX v2
+
+    // Tag_ABI_FP_denormal, (=20), uleb128 
+    PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+    // Tag_ABI_FP_number_model, (=23), uleb128
+    AllowRTABI = 2,  // numbers, infinities, and one quiet NaN (see [RTABI])
+    AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+  };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 000000000000..ff7db1ff62ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,160 @@
+//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          CCState &State, bool CanFail) {
+  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+  // Try to get the first register.
+  if (unsigned Reg = State.AllocateReg(RegList, 4))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else {
+    // For the 2nd half of a v2f64, do not fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  // Try to get the second register.
+  if (unsigned Reg = State.AllocateReg(RegList, 4))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4, 4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags,
+                                   CCState &State) {
+  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                           CCValAssign::LocInfo &LocInfo,
+                           CCState &State, bool CanFail) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+  static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+  if (Reg == 0) {
+    // For the 2nd half of a v2f64, do not just fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 8),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  unsigned T = State.AllocateReg(LoRegList[i]);
+  (void)T;
+  assert(T == LoRegList[i] && "Could not allocate register");
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State) {
+  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                         CCValAssign::LocInfo &LocInfo, CCState &State) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+  if (Reg == 0)
+    return false; // we didn't handle it
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
+  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  return true;  // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
+  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                   State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 000000000000..d2981c0af8ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,164 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+  CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+  CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<4, 4>>,
+    
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+  CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  CCIfType<[f64], CCAssignToStack<8, 4>>,
+  CCIfType<[v2f64], CCAssignToStack<16, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention, common parts
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_Common : CallingConv<[
+
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // i64/f64 is passed in even pairs of GPRs
+  // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+  // (and the same is true for f64 if VFP is not enabled)
+  CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+  CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+                       "ArgFlags.getOrigAlign() != 8",
+                       CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+  CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  CCIfType<[f64], CCAssignToStack<8, 8>>,
+  CCIfType<[v2f64], CCAssignToStack<16, 8>>
+]>;
+
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 000000000000..d6fca6277501
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1898 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+  class ARMCodeEmitter : public MachineFunctionPass {
+    ARMJITInfo                *JTI;
+    const ARMInstrInfo        *II;
+    const TargetData          *TD;
+    const ARMSubtarget        *Subtarget;
+    TargetMachine             &TM;
+    JITCodeEmitter            &MCE;
+    MachineModuleInfo *MMI;
+    const std::vector<MachineConstantPoolEntry> *MCPEs;
+    const std::vector<MachineJumpTableEntry> *MJTEs;
+    bool IsPIC;
+    bool IsThumb;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    static char ID;
+  public:
+    ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+      : MachineFunctionPass(ID), JTI(0),
+        II((const ARMInstrInfo *)tm.getInstrInfo()),
+        TD(tm.getTargetData()), TM(tm),
+        MCE(mce), MCPEs(0), MJTEs(0),
+        IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
+
+    /// getBinaryCodeForInstr - This function, generated by the
+    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+    /// machine instructions.
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "ARM Machine Code Emitter";
+    }
+
+    void emitInstruction(const MachineInstr &MI);
+
+  private:
+
+    void emitWordLE(unsigned Binary);
+    void emitDWordLE(uint64_t Binary);
+    void emitConstPoolInstruction(const MachineInstr &MI);
+    void emitMOVi32immInstruction(const MachineInstr &MI);
+    void emitMOVi2piecesInstruction(const MachineInstr &MI);
+    void emitLEApcrelJTInstruction(const MachineInstr &MI);
+    void emitPseudoMoveInstruction(const MachineInstr &MI);
+    void addPCLabel(unsigned LabelID);
+    void emitPseudoInstruction(const MachineInstr &MI);
+    unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+                                    const MCInstrDesc &MCID,
+                                    const MachineOperand &MO,
+                                    unsigned OpIdx);
+
+    unsigned getMachineSoImmOpValue(unsigned SoImm);
+    unsigned getAddrModeSBit(const MachineInstr &MI,
+                             const MCInstrDesc &MCID) const;
+
+    void emitDataProcessingInstruction(const MachineInstr &MI,
+                                       unsigned ImplicitRd = 0,
+                                       unsigned ImplicitRn = 0);
+
+    void emitLoadStoreInstruction(const MachineInstr &MI,
+                                  unsigned ImplicitRd = 0,
+                                  unsigned ImplicitRn = 0);
+
+    void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+                                      unsigned ImplicitRn = 0);
+
+    void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+    void emitMulFrmInstruction(const MachineInstr &MI);
+
+    void emitExtendInstruction(const MachineInstr &MI);
+
+    void emitMiscArithInstruction(const MachineInstr &MI);
+
+    void emitSaturateInstruction(const MachineInstr &MI);
+
+    void emitBranchInstruction(const MachineInstr &MI);
+
+    void emitInlineJumpTable(unsigned JTIndex);
+
+    void emitMiscBranchInstruction(const MachineInstr &MI);
+
+    void emitVFPArithInstruction(const MachineInstr &MI);
+
+    void emitVFPConversionInstruction(const MachineInstr &MI);
+
+    void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+    void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+    void emitNEONLaneInstruction(const MachineInstr &MI);
+    void emitNEONDupInstruction(const MachineInstr &MI);
+    void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+    void emitNEON2RegInstruction(const MachineInstr &MI);
+    void emitNEON3RegInstruction(const MachineInstr &MI);
+
+    /// getMachineOpValue - Return binary encoding of operand. If the machine
+    /// operand requires relocation, record the relocation and return zero.
+    unsigned getMachineOpValue(const MachineInstr &MI,
+                               const MachineOperand &MO) const;
+    unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
+      return getMachineOpValue(MI, MI.getOperand(OpIdx));
+    }
+
+    // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+    //  TableGen'erated getBinaryCodeForInstr() function to encode any
+    //  operand values, instead querying getMachineOpValue() directly for
+    //  each operand it needs to encode. Thus, any of the new encoder
+    //  helper functions can simply return 0 as the values the return
+    //  are already handled elsewhere. They are placeholders to allow this
+    //  encoder to continue to function until the MC encoder is sufficiently
+    //  far along that this one can be eliminated entirely.
+    unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) 
+      const { return 0; }
+    unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) 
+      const { return 0; }
+    unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) 
+      const { return 0; }
+    unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+      const { return 0; }
+    unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+      unsigned Op) const { return 0; }
+    unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI,
+                                                 unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+                                            unsigned Op) const { return 0; }
+    unsigned getMsbOpValue(const MachineInstr &MI,
+                           unsigned Op) const { return 0; }
+    unsigned getSsatBitPosValue(const MachineInstr &MI,
+                                unsigned Op) const { return 0; }
+    uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const {return 0; }
+    uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0; }
+
+    unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+      const {
+      // {17-13} = reg
+      // {12}    = (U)nsigned (add == '1', sub == '0')
+      // {11-0}  = imm12
+      const MachineOperand &MO  = MI.getOperand(Op);
+      const MachineOperand &MO1 = MI.getOperand(Op + 1);
+      if (!MO.isReg()) {
+        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+        return 0;
+      }
+      unsigned Reg = getARMRegisterNumbering(MO.getReg());
+      int32_t Imm12 = MO1.getImm();
+      uint32_t Binary;
+      Binary = Imm12 & 0xfff;
+      if (Imm12 >= 0)
+        Binary |= (1 << 12);
+      Binary |= (Reg << 13);
+      return Binary;
+    }
+
+    unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+      return 0;
+    }
+
+    uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+      // {17-13} = reg
+      // {12}    = (U)nsigned (add == '1', sub == '0')
+      // {11-0}  = imm12
+      const MachineOperand &MO  = MI.getOperand(Op);
+      const MachineOperand &MO1 = MI.getOperand(Op + 1);
+      if (!MO.isReg()) {
+        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+        return 0;
+      }
+      unsigned Reg = getARMRegisterNumbering(MO.getReg());
+      int32_t Imm12 = MO1.getImm();
+
+      // Special value for #-0
+      if (Imm12 == INT32_MIN)
+        Imm12 = 0;
+
+      // Immediate is always encoded as positive. The 'U' bit controls add vs
+      // sub.
+      bool isAdd = true;
+      if (Imm12 < 0) {
+        Imm12 = -Imm12;
+        isAdd = false;
+      }
+
+      uint32_t Binary = Imm12 & 0xfff;
+      if (isAdd)
+        Binary |= (1 << 12);
+      Binary |= (Reg << 13);
+      return Binary;
+    }
+    unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
+    unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
+    unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
+    /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+    /// machine operand requires relocation, record the relocation and return
+    /// zero.
+    unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+                            unsigned Reloc);
+
+    /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+    ///
+    unsigned getShiftOp(unsigned Imm) const ;
+
+    /// Routines that handle operands which add machine relocations which are
+    /// fixed up by the relocation stage.
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+                           bool MayNeedFarStub,  bool Indirect,
+                           intptr_t ACPV = 0) const;
+    void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+    void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+    void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+    void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+                               intptr_t JTBase = 0) const;
+  };
+}
+
+char ARMCodeEmitter::ID = 0;
+
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
+/// code to the specified MCE object.
+FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
+  return new ARMCodeEmitter(TM, JCE);
+}
+
+bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+  assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+          MF.getTarget().getRelocationModel() != Reloc::Static) &&
+         "JIT relocation model must be set to static or default!");
+  JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
+  II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
+  TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
+  MCPEs = &MF.getConstantPool()->getConstants();
+  MJTEs = 0;
+  if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+  IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+  IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+  JTI->Initialize(MF, IsPIC);
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
+
+  do {
+    DEBUG(errs() << "JITTing function '"
+          << MF.getFunction()->getName() << "'\n");
+    MCE.startFunction(MF);
+    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+         MBB != E; ++MBB) {
+      MCE.StartMachineBasicBlock(MBB);
+      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I)
+        emitInstruction(*I);
+    }
+  } while (MCE.finishFunction(MF));
+
+  return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
+  switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+  default: llvm_unreachable("Unknown shift opc!");
+  case ARM_AM::asr: return 2;
+  case ARM_AM::lsl: return 0;
+  case ARM_AM::lsr: return 1;
+  case ARM_AM::ror:
+  case ARM_AM::rrx: return 3;
+  }
+  return 0;
+}
+
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+                                        const MachineOperand &MO,
+                                        unsigned Reloc) {
+  assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+      && "Relocation to this function should be for movt or movw");
+
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isGlobal())
+    emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+  else if (MO.isSymbol())
+    emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+  else if (MO.isMBB())
+    emitMachineBasicBlock(MO.getMBB(), Reloc);
+  else {
+#ifndef NDEBUG
+    errs() << MO;
+#endif
+    llvm_unreachable("Unsupported operand type for movw/movt");
+  }
+  return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+                                           const MachineOperand &MO) const {
+  if (MO.isReg())
+    return getARMRegisterNumbering(MO.getReg());
+  else if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isGlobal())
+    emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
+  else if (MO.isSymbol())
+    emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+  else if (MO.isCPI()) {
+    const MCInstrDesc &MCID = MI.getDesc();
+    // For VFP load, the immediate offset is multiplied by 4.
+    unsigned Reloc =  ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+      ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+    emitConstPoolAddress(MO.getIndex(), Reloc);
+  } else if (MO.isJTI())
+    emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+  else if (MO.isMBB())
+    emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+  else
+    llvm_unreachable("Unable to encode MachineOperand!");
+  return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+                                       bool MayNeedFarStub, bool Indirect,
+                                       intptr_t ACPV) const {
+  MachineRelocation MR = Indirect
+    ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+                                           const_cast<GlobalValue *>(GV),
+                                           ACPV, MayNeedFarStub)
+    : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+                               const_cast<GlobalValue *>(GV), ACPV,
+                               MayNeedFarStub);
+  MCE.addRelocation(MR);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+  MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                                 Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+  // Tell JIT emitter we'll resolve the address.
+  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                                    Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                                    Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+                                           unsigned Reloc,
+                                           intptr_t JTBase) const {
+  MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                             Reloc, BB, JTBase));
+}
+
+void ARMCodeEmitter::emitWordLE(unsigned Binary) {
+  DEBUG(errs() << "  0x";
+        errs().write_hex(Binary) << "\n");
+  MCE.emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
+  DEBUG(errs() << "  0x";
+        errs().write_hex(Binary) << "\n");
+  MCE.emitDWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
+  DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+  MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+  ++NumEmitted;  // Keep track of the # of mi's emitted
+  switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+  default: {
+    llvm_unreachable("Unhandled instruction encoding format!");
+    break;
+  }
+  case ARMII::MiscFrm:
+    if (MI.getOpcode() == ARM::LEApcrelJT) {
+      // Materialize jumptable address.
+      emitLEApcrelJTInstruction(MI);
+      break;
+    }
+    llvm_unreachable("Unhandled instruction encoding!");
+    break;
+  case ARMII::Pseudo:
+    emitPseudoInstruction(MI);
+    break;
+  case ARMII::DPFrm:
+  case ARMII::DPSoRegFrm:
+    emitDataProcessingInstruction(MI);
+    break;
+  case ARMII::LdFrm:
+  case ARMII::StFrm:
+    emitLoadStoreInstruction(MI);
+    break;
+  case ARMII::LdMiscFrm:
+  case ARMII::StMiscFrm:
+    emitMiscLoadStoreInstruction(MI);
+    break;
+  case ARMII::LdStMulFrm:
+    emitLoadStoreMultipleInstruction(MI);
+    break;
+  case ARMII::MulFrm:
+    emitMulFrmInstruction(MI);
+    break;
+  case ARMII::ExtFrm:
+    emitExtendInstruction(MI);
+    break;
+  case ARMII::ArithMiscFrm:
+    emitMiscArithInstruction(MI);
+    break;
+  case ARMII::SatFrm:
+    emitSaturateInstruction(MI);
+    break;
+  case ARMII::BrFrm:
+    emitBranchInstruction(MI);
+    break;
+  case ARMII::BrMiscFrm:
+    emitMiscBranchInstruction(MI);
+    break;
+  // VFP instructions.
+  case ARMII::VFPUnaryFrm:
+  case ARMII::VFPBinaryFrm:
+    emitVFPArithInstruction(MI);
+    break;
+  case ARMII::VFPConv1Frm:
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+  case ARMII::VFPConv4Frm:
+  case ARMII::VFPConv5Frm:
+    emitVFPConversionInstruction(MI);
+    break;
+  case ARMII::VFPLdStFrm:
+    emitVFPLoadStoreInstruction(MI);
+    break;
+  case ARMII::VFPLdStMulFrm:
+    emitVFPLoadStoreMultipleInstruction(MI);
+    break;
+
+  // NEON instructions.
+  case ARMII::NGetLnFrm:
+  case ARMII::NSetLnFrm:
+    emitNEONLaneInstruction(MI);
+    break;
+  case ARMII::NDupFrm:
+    emitNEONDupInstruction(MI);
+    break;
+  case ARMII::N1RegModImmFrm:
+    emitNEON1RegModImmInstruction(MI);
+    break;
+  case ARMII::N2RegFrm:
+    emitNEON2RegInstruction(MI);
+    break;
+  case ARMII::N3RegFrm:
+    emitNEON3RegInstruction(MI);
+    break;
+  }
+  MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
+  unsigned CPI = MI.getOperand(0).getImm();       // CP instruction index.
+  unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+  const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+  // Remember the CONSTPOOL_ENTRY address for later relocation.
+  JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+  // Emit constpool island entry. In most cases, the actual values will be
+  // resolved and relocated after code emission.
+  if (MCPE.isMachineConstantPoolEntry()) {
+    ARMConstantPoolValue *ACPV =
+      static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+    DEBUG(errs() << "  ** ARM constant pool #" << CPI << " @ "
+          << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
+
+    assert(ACPV->isGlobalValue() && "unsupported constant pool value");
+    const GlobalValue *GV = ACPV->getGV();
+    if (GV) {
+      Reloc::Model RelocM = TM.getRelocationModel();
+      emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+                        isa<Function>(GV),
+                        Subtarget->GVIsIndirectSymbol(GV, RelocM),
+                        (intptr_t)ACPV);
+     } else  {
+      emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+    }
+    emitWordLE(0);
+  } else {
+    const Constant *CV = MCPE.Val.ConstVal;
+
+    DEBUG({
+        errs() << "  ** Constant pool #" << CPI << " @ "
+               << (void*)MCE.getCurrentPCValue() << " ";
+        if (const Function *F = dyn_cast<Function>(CV))
+          errs() << F->getName();
+        else
+          errs() << *CV;
+        errs() << '\n';
+      });
+
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+      emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
+      emitWordLE(0);
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+      uint32_t Val = uint32_t(*CI->getValue().getRawData());
+      emitWordLE(Val);
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+      if (CFP->getType()->isFloatTy())
+        emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      else if (CFP->getType()->isDoubleTy())
+        emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      else {
+        llvm_unreachable("Unable to handle this constantpool entry!");
+      }
+    } else {
+      llvm_unreachable("Unable to handle this constantpool entry!");
+    }
+  }
+}
+
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+  const MachineOperand &MO0 = MI.getOperand(0);
+  const MachineOperand &MO1 = MI.getOperand(1);
+
+  // Emit the 'movw' instruction.
+  unsigned Binary = 0x30 << 20;  // mov: Insts{27-20} = 0b00110000
+
+  unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode imm16 as imm4:imm12
+  Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+  Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+  emitWordLE(Binary);
+
+  unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+  // Emit the 'movt' instruction.
+  Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode imm16 as imm4:imm1, same as movw above.
+  Binary |= Hi16 & 0xFFF;
+  Binary |= ((Hi16 >> 12) & 0xF) << 16;
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+  const MachineOperand &MO0 = MI.getOperand(0);
+  const MachineOperand &MO1 = MI.getOperand(1);
+  assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
+                                                  "Not a valid so_imm value!");
+  unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+  unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+  // Emit the 'mov' instruction.
+  unsigned Binary = 0xd << 21;  // mov: Insts{24-21} = 0b1101
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode so_imm.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>
+  Binary |= 1 << ARMII::I_BitShift;
+  Binary |= getMachineSoImmOpValue(V1);
+  emitWordLE(Binary);
+
+  // Now the 'orr' instruction.
+  Binary = 0xc << 21;  // orr: Insts{24-21} = 0b1100
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode Rn.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+  // Encode so_imm.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>
+  Binary |= 1 << ARMII::I_BitShift;
+  Binary |= getMachineSoImmOpValue(V2);
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+  // It's basically add r, pc, (LJTI - $+8)
+
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Emit the 'add' instruction.
+  unsigned Binary = 0x4 << 21;  // add: Insts{24-21} = 0b0100
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, MCID);
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode Rn which is PC.
+  Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+  // Encode the displacement.
+  Binary |= 1 << ARMII::I_BitShift;
+  emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
+  unsigned Opcode = MI.getDesc().Opcode;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+    Binary |= 1 << ARMII::S_BitShift;
+
+  // Encode register def if there is one.
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode the shift operation.
+  switch (Opcode) {
+  default: break;
+  case ARM::RRX:
+    // rrx
+    Binary |= 0x6 << 4;
+    break;
+  case ARM::MOVsrl_flag:
+    // lsr #1
+    Binary |= (0x2 << 4) | (1 << 7);
+    break;
+  case ARM::MOVsra_flag:
+    // asr #1
+    Binary |= (0x4 << 4) | (1 << 7);
+    break;
+  }
+
+  // Encode register Rm.
+  Binary |= getMachineOpValue(MI, 1);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
+  DEBUG(errs() << "  ** LPC" << LabelID << " @ "
+        << (void*)MCE.getCurrentPCValue() << '\n');
+  JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
+  unsigned Opcode = MI.getDesc().Opcode;
+  switch (Opcode) {
+  default:
+    llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+  case ARM::BX_CALL:
+  case ARM::BMOVPCRX_CALL:
+  case ARM::BXr9_CALL:
+  case ARM::BMOVPCRXr9_CALL: {
+    // First emit mov lr, pc
+    unsigned Binary = 0x01a0e00f;
+    Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+    emitWordLE(Binary);
+
+    // and then emit the branch.
+    emitMiscBranchInstruction(MI);
+    break;
+  }
+  case TargetOpcode::INLINEASM: {
+    // We allow inline assembler nodes with empty bodies - they can
+    // implicitly define registers, which is ok for JIT.
+    if (MI.getOperand(0).getSymbolName()[0]) {
+      report_fatal_error("JIT does not support inline asm!");
+    }
+    break;
+  }
+  case TargetOpcode::PROLOG_LABEL:
+  case TargetOpcode::EH_LABEL:
+    MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+    break;
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+    // Do nothing.
+    break;
+  case ARM::CONSTPOOL_ENTRY:
+    emitConstPoolInstruction(MI);
+    break;
+  case ARM::PICADD: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // PICADD is just an add instruction that implicitly read pc.
+    emitDataProcessingInstruction(MI, 0, ARM::PC);
+    break;
+  }
+  case ARM::PICLDR:
+  case ARM::PICLDRB:
+  case ARM::PICSTR:
+  case ARM::PICSTRB: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // These are just load / store instructions that implicitly read pc.
+    emitLoadStoreInstruction(MI, 0, ARM::PC);
+    break;
+  }
+  case ARM::PICLDRH:
+  case ARM::PICLDRSH:
+  case ARM::PICLDRSB:
+  case ARM::PICSTRH: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // These are just load / store instructions that implicitly read pc.
+    emitMiscLoadStoreInstruction(MI, ARM::PC);
+    break;
+  }
+
+  case ARM::MOVi32imm:
+    // Two instructions to materialize a constant.
+    if (Subtarget->hasV6T2Ops())
+      emitMOVi32immInstruction(MI);
+    else
+      emitMOVi2piecesInstruction(MI);
+    break;
+
+  case ARM::LEApcrelJT:
+    // Materialize jumptable address.
+    emitLEApcrelJTInstruction(MI);
+    break;
+  case ARM::RRX:
+  case ARM::MOVsrl_flag:
+  case ARM::MOVsra_flag:
+    emitPseudoMoveInstruction(MI);
+    break;
+  }
+}
+
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
+                                                const MCInstrDesc &MCID,
+                                                const MachineOperand &MO,
+                                                unsigned OpIdx) {
+  unsigned Binary = getMachineOpValue(MI, MO);
+
+  const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+  const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  unsigned Rs = MO1.getReg();
+  if (Rs) {
+    // Set shift operand (bit[7:4]).
+    // LSL - 0001
+    // LSR - 0011
+    // ASR - 0101
+    // ROR - 0111
+    // RRX - 0110 and bit[11:8] clear.
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x1; break;
+    case ARM_AM::lsr: SBits = 0x3; break;
+    case ARM_AM::asr: SBits = 0x5; break;
+    case ARM_AM::ror: SBits = 0x7; break;
+    case ARM_AM::rrx: SBits = 0x6; break;
+    }
+  } else {
+    // Set shift operand (bit[6:4]).
+    // LSL - 000
+    // LSR - 010
+    // ASR - 100
+    // ROR - 110
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x0; break;
+    case ARM_AM::lsr: SBits = 0x2; break;
+    case ARM_AM::asr: SBits = 0x4; break;
+    case ARM_AM::ror: SBits = 0x6; break;
+    }
+  }
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode the shift operation Rs or shift_imm (except rrx).
+  if (Rs) {
+    // Encode Rs bit[11:8].
+    assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+    return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+  }
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
+  int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+  assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+  // Encode rotate_imm.
+  unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+    << ARMII::SoRotImmShift;
+
+  // Encode immed_8.
+  Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+  return Binary;
+}
+
+unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
+                                         const MCInstrDesc &MCID) const {
+  for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){
+    const MachineOperand &MO = MI.getOperand(i-1);
+    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+      return 1 << ARMII::S_BitShift;
+  }
+  return 0;
+}
+
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
+                                                   unsigned ImplicitRd,
+                                                   unsigned ImplicitRn) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, MCID);
+
+  // Encode register def if there is one.
+  unsigned NumDefs = MCID.getNumDefs();
+  unsigned OpIdx = 0;
+  if (NumDefs)
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+  else if (ImplicitRd)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+
+  if (MCID.Opcode == ARM::MOVi16) {
+      // Get immediate from MI.
+      unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+                      ARM::reloc_arm_movw);
+      // Encode imm which is the same as in emitMOVi32immInstruction().
+      Binary |= Lo16 & 0xFFF;
+      Binary |= ((Lo16 >> 12) & 0xF) << 16;
+      emitWordLE(Binary);
+      return;
+  } else if(MCID.Opcode == ARM::MOVTi16) {
+      unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+                       ARM::reloc_arm_movt) >> 16);
+      Binary |= Hi16 & 0xFFF;
+      Binary |= ((Hi16 >> 12) & 0xF) << 16;
+      emitWordLE(Binary);
+      return;
+  } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
+      uint32_t v = ~MI.getOperand(2).getImm();
+      int32_t lsb = CountTrailingZeros_32(v);
+      int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+      // Instr{20-16} = msb, Instr{11-7} = lsb
+      Binary |= (msb & 0x1F) << 16;
+      Binary |= (lsb & 0x1F) << 7;
+      emitWordLE(Binary);
+      return;
+  } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
+      // Encode Rn in Instr{0-3}
+      Binary |= getMachineOpValue(MI, OpIdx++);
+
+      uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+      uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+      // Instr{20-16} = widthm1, Instr{11-7} = lsb
+      Binary |= (widthm1 & 0x1F) << 16;
+      Binary |= (lsb & 0x1F) << 7;
+      emitWordLE(Binary);
+      return;
+  }
+
+  // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  // Encode first non-shifter register operand if there is one.
+  bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
+  if (!isUnary) {
+    if (ImplicitRn)
+      // Special handling for implicit use (e.g. PC).
+      Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+    else {
+      Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+      ++OpIdx;
+    }
+  }
+
+  // Encode shifter operand.
+  const MachineOperand &MO = MI.getOperand(OpIdx);
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+    // Encode SoReg.
+    emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
+    return;
+  }
+
+  if (MO.isReg()) {
+    // Encode register Rm.
+    emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+    return;
+  }
+
+  // Encode so_imm.
+  Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
+                                              unsigned ImplicitRd,
+                                              unsigned ImplicitRn) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+  if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+      MI.getOpcode() == ARM::STRi12) {
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Operand 0 of a pre- and post-indexed store is the address base
+  // writeback. Skip it.
+  bool Skipped = false;
+  if (IsPrePost && Form == ARMII::StFrm) {
+    ++OpIdx;
+    Skipped = true;
+  }
+
+  // Set first operand
+  if (ImplicitRd)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  // Set second operand
+  if (ImplicitRn)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // If this is a two-address operand, skip it. e.g. LDR_PRE.
+  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  unsigned AM2Opc = (ImplicitRn == ARM::PC)
+    ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+  // Set bit U(23) according to sign of immed value (positive or negative).
+  Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+             ARMII::U_BitShift);
+  if (!MO2.getReg()) { // is immediate
+    if (ARM_AM::getAM2Offset(AM2Opc))
+      // Set the value of offset_12 field
+      Binary |= ARM_AM::getAM2Offset(AM2Opc);
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Set bit I(25), because this is not in immediate encoding.
+  Binary |= 1 << ARMII::I_BitShift;
+  assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+  // Set bit[3:0] to the corresponding Rm register
+  Binary |= getARMRegisterNumbering(MO2.getReg());
+
+  // If this instr is in scaled register offset/index instruction, set
+  // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+  if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+    Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift;  // shift
+    Binary |= ShImm              << ARMII::ShiftShift;     // shift_immed
+  }
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+                                                  unsigned ImplicitRn) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Operand 0 of a pre- and post-indexed store is the address base
+  // writeback. Skip it.
+  bool Skipped = false;
+  if (IsPrePost && Form == ARMII::StMiscFrm) {
+    ++OpIdx;
+    Skipped = true;
+  }
+
+  // Set first operand
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  // Skip LDRD and STRD's second operand.
+  if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
+    ++OpIdx;
+
+  // Set second operand
+  if (ImplicitRn)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // If this is a two-address operand, skip it. e.g. LDRH_POST.
+  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  unsigned AM3Opc = (ImplicitRn == ARM::PC)
+    ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+  // Set bit U(23) according to sign of immed value (positive or negative)
+  Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+             ARMII::U_BitShift);
+
+  // If this instr is in register offset/index encoding, set bit[3:0]
+  // to the corresponding Rm register.
+  if (MO2.getReg()) {
+    Binary |= getARMRegisterNumbering(MO2.getReg());
+    emitWordLE(Binary);
+    return;
+  }
+
+  // This instr is in immediate offset/index encoding, set bit 22 to 1.
+  Binary |= 1 << ARMII::AM3_I_BitShift;
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+    // Set operands
+    Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift;  // immedH
+    Binary |= (ImmOffs & 0xF);                      // immedL
+  }
+
+  emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+  unsigned Binary = 0;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  // IA - Increment after  - bit U = 1 and bit P = 0
+  // IB - Increment before - bit U = 1 and bit P = 1
+  // DA - Decrement after  - bit U = 0 and bit P = 0
+  // DB - Decrement before - bit U = 0 and bit P = 1
+  switch (Mode) {
+  default: llvm_unreachable("Unknown addressing sub-mode!");
+  case ARM_AM::da:                                     break;
+  case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+  case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+  case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+  }
+
+  return Binary;
+}
+
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Skip operand 0 of an instruction with base register update.
+  unsigned OpIdx = 0;
+  if (IsUpdating)
+    ++OpIdx;
+
+  // Set base address operand
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+  // Set bit W(21)
+  if (IsUpdating)
+    Binary |= 0x1 << ARMII::W_BitShift;
+
+  // Set registers
+  for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      break;
+    unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           RegNum < 16);
+    Binary |= 0x1 << RegNum;
+  }
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, MCID);
+
+  // 32x32->64bit operations have two destination registers. The number
+  // of register definitions will tell us if that's what we're dealing with.
+  unsigned OpIdx = 0;
+  if (MCID.getNumDefs() == 2)
+    Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+  // Encode Rm
+  Binary |= getMachineOpValue(MI, OpIdx++);
+
+  // Encode Rs
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+  // Many multiple instructions (e.g. MLA) have three src operands. Encode
+  // it as Rn (for multiply, that's in the same offset as RdLo.
+  if (MCID.getNumOperands() > OpIdx &&
+      !MCID.OpInfo[OpIdx].isPredicate() &&
+      !MCID.OpInfo[OpIdx].isOptionalDef())
+    Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  if (MO2.isReg()) {
+    // Two register operand form.
+    // Encode Rn.
+    Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+    // Encode Rm.
+    Binary |= getMachineOpValue(MI, MO2);
+    ++OpIdx;
+  } else {
+    Binary |= getMachineOpValue(MI, MO1);
+  }
+
+  // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+  if (MI.getOperand(OpIdx).isImm() &&
+      !MCID.OpInfo[OpIdx].isPredicate() &&
+      !MCID.OpInfo[OpIdx].isOptionalDef())
+    Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // PKH instructions are finished at this point
+  if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
+    emitWordLE(Binary);
+    return;
+  }
+
+  unsigned OpIdx = 0;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  const MachineOperand &MO = MI.getOperand(OpIdx++);
+  if (OpIdx == MCID.getNumOperands() ||
+      MCID.OpInfo[OpIdx].isPredicate() ||
+      MCID.OpInfo[OpIdx].isOptionalDef()) {
+    // Encode Rm and it's done.
+    Binary |= getMachineOpValue(MI, MO);
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Encode Rn.
+  Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+  // Encode Rm.
+  Binary |= getMachineOpValue(MI, OpIdx++);
+
+  // Encode shift_imm.
+  unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+  if (MCID.Opcode == ARM::PKHTB) {
+    assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+    if (ShiftAmt == 32)
+      ShiftAmt = 0;
+  }
+  assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+  Binary |= ShiftAmt << ARMII::ShiftShift;
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Part of binary is determined by TableGen.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode saturate bit position.
+  unsigned Pos = MI.getOperand(1).getImm();
+  if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
+    Pos -= 1;
+  assert((Pos < 16 || (Pos < 32 &&
+                       MCID.Opcode != ARM::SSAT16 &&
+                       MCID.Opcode != ARM::USAT16)) &&
+         "saturate bit position out of range");
+  Binary |= Pos << 16;
+
+  // Encode Rm
+  Binary |= getMachineOpValue(MI, 2);
+
+  // Encode shift_imm.
+  if (MCID.getNumOperands() == 4) {
+    unsigned ShiftOp = MI.getOperand(3).getImm();
+    ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+    if (Opc == ARM_AM::asr)
+      Binary |= (1 << 6);
+    unsigned ShiftAmt = MI.getOperand(3).getImm();
+    if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+      ShiftAmt = 0;
+    assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+    Binary |= ShiftAmt << ARMII::ShiftShift;
+  }
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  if (MCID.Opcode == ARM::TPsoft) {
+    llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
+  }
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Set signed_immed_24 field
+  Binary |= getMachineOpValue(MI, 0);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
+  // Remember the base address of the inline jump table.
+  uintptr_t JTBase = MCE.getCurrentPCValue();
+  JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+  DEBUG(errs() << "  ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
+               << '\n');
+
+  // Now emit the jump table entries.
+  const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+    if (IsPIC)
+      // DestBB address - JT base.
+      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+    else
+      // Absolute DestBB address.
+      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+    emitWordLE(0);
+  }
+}
+
+void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Handle jump tables.
+  if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
+    // First emit a ldr pc, [] instruction.
+    emitDataProcessingInstruction(MI, ARM::PC);
+
+    // Then emit the inline jump table.
+    unsigned JTIndex =
+      (MCID.Opcode == ARM::BR_JTr)
+      ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+    emitInlineJumpTable(JTIndex);
+    return;
+  } else if (MCID.Opcode == ARM::BR_JTm) {
+    // First emit a ldr pc, [] instruction.
+    emitLoadStoreInstruction(MI, ARM::PC);
+
+    // Then emit the inline jump table.
+    emitInlineJumpTable(MI.getOperand(3).getIndex());
+    return;
+  }
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
+    // The return register is LR.
+    Binary |= getARMRegisterNumbering(ARM::LR);
+  else
+    // otherwise, set the return register
+    Binary |= getMachineOpValue(MI, 0);
+
+  emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegD = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
+  RegD = getARMRegisterNumbering(RegD);
+  if (!isSPVFP)
+    Binary |=   RegD               << ARMII::RegRdShift;
+  else {
+    Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+    Binary |=  (RegD & 0x01)       << ARMII::D_BitShift;
+  }
+  return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegN = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
+  RegN = getARMRegisterNumbering(RegN);
+  if (!isSPVFP)
+    Binary |=   RegN               << ARMII::RegRnShift;
+  else {
+    Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+    Binary |=  (RegN & 0x01)       << ARMII::N_BitShift;
+  }
+  return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegM = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
+  RegM = getARMRegisterNumbering(RegM);
+  if (!isSPVFP)
+    Binary |=   RegM;
+  else {
+    Binary |= ((RegM & 0x1E) >> 1);
+    Binary |=  (RegM & 0x01)       << ARMII::M_BitShift;
+  }
+  return Binary;
+}
+
+void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+  assert((Binary & ARMII::D_BitShift) == 0 &&
+         (Binary & ARMII::N_BitShift) == 0 &&
+         (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+  // Encode Dd / Sd.
+  Binary |= encodeVFPRd(MI, OpIdx++);
+
+  // If this is a two-address operand, skip it, e.g. FMACD.
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  // Encode Dn / Sn.
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+    Binary |= encodeVFPRn(MI, OpIdx++);
+
+  if (OpIdx == MCID.getNumOperands() ||
+      MCID.OpInfo[OpIdx].isPredicate() ||
+      MCID.OpInfo[OpIdx].isOptionalDef()) {
+    // FCMPEZD etc. has only one operand.
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Encode Dm / Sm.
+  Binary |= encodeVFPRm(MI, OpIdx);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  switch (Form) {
+  default: break;
+  case ARMII::VFPConv1Frm:
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+    // Encode Dd / Sd.
+    Binary |= encodeVFPRd(MI, 0);
+    break;
+  case ARMII::VFPConv4Frm:
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 0);
+    break;
+  case ARMII::VFPConv5Frm:
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 0);
+    break;
+  }
+
+  switch (Form) {
+  default: break;
+  case ARMII::VFPConv1Frm:
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 1);
+    break;
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 1);
+    break;
+  case ARMII::VFPConv4Frm:
+  case ARMII::VFPConv5Frm:
+    // Encode Dd / Sd.
+    Binary |= encodeVFPRd(MI, 1);
+    break;
+  }
+
+  if (Form == ARMII::VFPConv5Frm)
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 2);
+  else if (Form == ARMII::VFPConv3Frm)
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 2);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Dd / Sd.
+  Binary |= encodeVFPRd(MI, OpIdx++);
+
+  // Encode address base.
+  const MachineOperand &Base = MI.getOperand(OpIdx++);
+  Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+  // If there is a non-zero immediate offset, encode it.
+  if (Base.isReg()) {
+    const MachineOperand &Offset = MI.getOperand(OpIdx);
+    if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+      if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+        Binary |= 1 << ARMII::U_BitShift;
+      Binary |= ImmOffs;
+      emitWordLE(Binary);
+      return;
+    }
+  }
+
+  // If immediate offset is omitted, default to +0.
+  Binary |= 1 << ARMII::U_BitShift;
+
+  emitWordLE(Binary);
+}
+
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Skip operand 0 of an instruction with base register update.
+  unsigned OpIdx = 0;
+  if (IsUpdating)
+    ++OpIdx;
+
+  // Set base address operand
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+  // Set bit W(21)
+  if (IsUpdating)
+    Binary |= 0x1 << ARMII::W_BitShift;
+
+  // First register is encoded in Dd.
+  Binary |= encodeVFPRd(MI, OpIdx+2);
+
+  // Count the number of registers.
+  unsigned NumRegs = 1;
+  for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      break;
+    ++NumRegs;
+  }
+  // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+  // Otherwise, it will be 0, in the case of 32-bit registers.
+  if(Binary & 0x100)
+    Binary |= NumRegs * 2;
+  else
+    Binary |= NumRegs;
+
+  emitWordLE(Binary);
+}
+
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegD = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  RegD = getARMRegisterNumbering(RegD);
+  Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+  Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+  return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegN = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  RegN = getARMRegisterNumbering(RegN);
+  Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+  Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+  return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegM = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  RegM = getARMRegisterNumbering(RegM);
+  Binary |= (RegM & 0xf);
+  Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+  return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+  assert((Binary & 0xfe000000) == 0xf2000000 &&
+         "not an ARM NEON data-processing instruction");
+  unsigned UBit = (Binary >> 24) & 1;
+  return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+  const MCInstrDesc &MCID = MI.getDesc();
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+    RegTOpIdx = 0;
+    RegNOpIdx = 1;
+    LnOpIdx = 2;
+  } else { // ARMII::NSetLnFrm
+    RegTOpIdx = 2;
+    RegNOpIdx = 0;
+    LnOpIdx = 3;
+  }
+
+  // Set the conditional execution predicate
+  Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+  unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+  RegT = getARMRegisterNumbering(RegT);
+  Binary |= (RegT << ARMII::RegRdShift);
+  Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+  unsigned LaneShift;
+  if ((Binary & (1 << 22)) != 0)
+    LaneShift = 0; // 8-bit elements
+  else if ((Binary & (1 << 5)) != 0)
+    LaneShift = 1; // 16-bit elements
+  else
+    LaneShift = 2; // 32-bit elements
+
+  unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+  unsigned Opc1 = Lane >> 2;
+  unsigned Opc2 = Lane & 3;
+  assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+  Binary |= (Opc1 << 21);
+  Binary |= (Opc2 << 5);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+  unsigned RegT = MI.getOperand(1).getReg();
+  RegT = getARMRegisterNumbering(RegT);
+  Binary |= (RegT << ARMII::RegRdShift);
+  Binary |= encodeNEONRn(MI, 0);
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+  unsigned Binary = getBinaryCodeForInstr(MI);
+  // Destination register is encoded in Dd.
+  Binary |= encodeNEONRd(MI, 0);
+  // Immediate fields: Op, Cmode, I, Imm3, Imm4
+  unsigned Imm = MI.getOperand(1).getImm();
+  unsigned Op = (Imm >> 12) & 1;
+  unsigned Cmode = (Imm >> 8) & 0xf;
+  unsigned I = (Imm >> 7) & 1;
+  unsigned Imm3 = (Imm >> 4) & 0x7;
+  unsigned Imm4 = Imm & 0xf;
+  Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+  if (IsThumb)
+    Binary = convertNEONDataProcToThumb(Binary);
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Binary = getBinaryCodeForInstr(MI);
+  // Destination register is encoded in Dd; source register in Dm.
+  unsigned OpIdx = 0;
+  Binary |= encodeNEONRd(MI, OpIdx++);
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+    ++OpIdx;
+  Binary |= encodeNEONRm(MI, OpIdx);
+  if (IsThumb)
+    Binary = convertNEONDataProcToThumb(Binary);
+  // FIXME: This does not handle VDUPfdf or VDUPfqf.
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Binary = getBinaryCodeForInstr(MI);
+  // Destination register is encoded in Dd; source registers in Dn and Dm.
+  unsigned OpIdx = 0;
+  Binary |= encodeNEONRd(MI, OpIdx++);
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+    ++OpIdx;
+  Binary |= encodeNEONRn(MI, OpIdx++);
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+    ++OpIdx;
+  Binary |= encodeNEONRm(MI, OpIdx);
+  if (IsThumb)
+    Binary = convertNEONDataProcToThumb(Binary);
+  // FIXME: This does not handle VMOVDneon or VMOVQ.
+  emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 000000000000..f45ebdc53500
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1906 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function.  This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCPEs,       "Number of constpool entries");
+STATISTIC(NumSplit,      "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
+STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
+STATISTIC(NumTBs,        "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+          cl::desc("Adjust basic block layout to better use TB[BH]"));
+
+namespace {
+  /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+  /// requires constant pool entries to be scattered among the instructions
+  /// inside a function.  To do this, it completely ignores the normal LLVM
+  /// constant pool; instead, it places constants wherever it feels like with
+  /// special instructions.
+  ///
+  /// The terminology used in this pass includes:
+  ///   Islands - Clumps of constants placed in the function.
+  ///   Water   - Potential places where an island could be formed.
+  ///   CPE     - A constant pool entry that has been placed somewhere, which
+  ///             tracks a list of users.
+  class ARMConstantIslands : public MachineFunctionPass {
+    /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+    /// by MBB Number.  The two-byte pads required for Thumb alignment are
+    /// counted as part of the following block (i.e., the offset and size for
+    /// a padded block will both be ==2 mod 4).
+    std::vector<unsigned> BBSizes;
+
+    /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+    /// The two-byte pads required for Thumb alignment are counted as part of
+    /// the following block.
+    std::vector<unsigned> BBOffsets;
+
+    /// WaterList - A sorted list of basic blocks where islands could be placed
+    /// (i.e. blocks that don't fall through to the following block, due
+    /// to a return, unreachable, or unconditional branch).
+    std::vector<MachineBasicBlock*> WaterList;
+
+    /// NewWaterList - The subset of WaterList that was created since the
+    /// previous iteration by inserting unconditional branches.
+    SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+    typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+    /// CPUser - One user of a constant pool, keeping the machine instruction
+    /// pointer, the constant pool being referenced, and the max displacement
+    /// allowed from the instruction to the CP.  The HighWaterMark records the
+    /// highest basic block where a new CPEntry can be placed.  To ensure this
+    /// pass terminates, the CP entries are initially placed at the end of the
+    /// function and then move monotonically to lower addresses.  The
+    /// exception to this rule is when the current CP entry for a particular
+    /// CPUser is out of range, but there is another CP entry for the same
+    /// constant value in range.  We want to use the existing in-range CP
+    /// entry, but if it later moves out of range, the search for new water
+    /// should resume where it left off.  The HighWaterMark is used to record
+    /// that point.
+    struct CPUser {
+      MachineInstr *MI;
+      MachineInstr *CPEMI;
+      MachineBasicBlock *HighWaterMark;
+      unsigned MaxDisp;
+      bool NegOk;
+      bool IsSoImm;
+      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+             bool neg, bool soimm)
+        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+        HighWaterMark = CPEMI->getParent();
+      }
+    };
+
+    /// CPUsers - Keep track of all of the machine instructions that use various
+    /// constant pools and their max displacement.
+    std::vector<CPUser> CPUsers;
+
+    /// CPEntry - One per constant pool entry, keeping the machine instruction
+    /// pointer, the constpool index, and the number of CPUser's which
+    /// reference this entry.
+    struct CPEntry {
+      MachineInstr *CPEMI;
+      unsigned CPI;
+      unsigned RefCount;
+      CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+        : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+    };
+
+    /// CPEntries - Keep track of all of the constant pool entry machine
+    /// instructions. For each original constpool index (i.e. those that
+    /// existed upon entry to this pass), it keeps a vector of entries.
+    /// Original elements are cloned as we go along; the clones are
+    /// put in the vector of the original element, but have distinct CPIs.
+    std::vector<std::vector<CPEntry> > CPEntries;
+
+    /// ImmBranch - One per immediate branch, keeping the machine instruction
+    /// pointer, conditional or unconditional, the max displacement,
+    /// and (if isCond is true) the corresponding unconditional branch
+    /// opcode.
+    struct ImmBranch {
+      MachineInstr *MI;
+      unsigned MaxDisp : 31;
+      bool isCond : 1;
+      int UncondBr;
+      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+        : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+    };
+
+    /// ImmBranches - Keep track of all the immediate branch instructions.
+    ///
+    std::vector<ImmBranch> ImmBranches;
+
+    /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+    ///
+    SmallVector<MachineInstr*, 4> PushPopMIs;
+
+    /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+    SmallVector<MachineInstr*, 4> T2JumpTables;
+
+    /// HasFarJump - True if any far jump instruction has been emitted during
+    /// the branch fix up pass.
+    bool HasFarJump;
+
+    /// HasInlineAsm - True if the function contains inline assembly.
+    bool HasInlineAsm;
+
+    const ARMInstrInfo *TII;
+    const ARMSubtarget *STI;
+    ARMFunctionInfo *AFI;
+    bool isThumb;
+    bool isThumb1;
+    bool isThumb2;
+  public:
+    static char ID;
+    ARMConstantIslands() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "ARM constant island placement and branch shortening pass";
+    }
+
+  private:
+    void DoInitialPlacement(MachineFunction &MF,
+                            std::vector<MachineInstr*> &CPEMIs);
+    CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+    void JumpTableFunctionScan(MachineFunction &MF);
+    void InitialFunctionScan(MachineFunction &MF,
+                             const std::vector<MachineInstr*> &CPEMIs);
+    MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+    void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+    void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+    bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+    int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+    bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
+    void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+                        MachineBasicBlock *&NewMBB);
+    bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+    void RemoveDeadCPEMI(MachineInstr *CPEMI);
+    bool RemoveUnusedCPEntries();
+    bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+                      MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+                      bool DoDump = false);
+    bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+                        CPUser &U);
+    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+    bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+    bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
+    bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
+    bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+    bool UndoLRSpillRestore();
+    bool OptimizeThumb2Instructions(MachineFunction &MF);
+    bool OptimizeThumb2Branches(MachineFunction &MF);
+    bool ReorderThumb2JumpTables(MachineFunction &MF);
+    bool OptimizeThumb2JumpTables(MachineFunction &MF);
+    MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+                                                  MachineBasicBlock *JTBB);
+
+    unsigned GetOffsetOf(MachineInstr *MI) const;
+    void dumpBBs();
+    void verify(MachineFunction &MF);
+  };
+  char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &MF) {
+  assert(BBOffsets.size() == BBSizes.size());
+  for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+    assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+  if (!isThumb)
+    return;
+#ifndef NDEBUG
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    if (!MBB->empty() &&
+        MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+      unsigned MBBId = MBB->getNumber();
+      assert(HasInlineAsm ||
+             (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+             (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
+    }
+  }
+  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+    CPUser &U = CPUsers[i];
+    unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+    unsigned CPEOffset  = GetOffsetOf(U.CPEMI);
+    unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+      UserOffset - CPEOffset;
+    assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+  }
+#endif
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+  for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+    DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
+                 << " size " << BBSizes[J] << "\n");
+  }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+  return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
+  MachineConstantPool &MCP = *MF.getConstantPool();
+
+  TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
+  AFI = MF.getInfo<ARMFunctionInfo>();
+  STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+
+  isThumb = AFI->isThumbFunction();
+  isThumb1 = AFI->isThumb1OnlyFunction();
+  isThumb2 = AFI->isThumb2Function();
+
+  HasFarJump = false;
+  HasInlineAsm = false;
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  MF.RenumberBlocks();
+
+  // Try to reorder and otherwise adjust the block layout to make good use
+  // of the TB[BH] instructions.
+  bool MadeChange = false;
+  if (isThumb2 && AdjustJumpTableBlocks) {
+    JumpTableFunctionScan(MF);
+    MadeChange |= ReorderThumb2JumpTables(MF);
+    // Data is out of date, so clear it. It'll be re-computed later.
+    T2JumpTables.clear();
+    // Blocks may have shifted around. Keep the numbering up to date.
+    MF.RenumberBlocks();
+  }
+
+  // Thumb1 functions containing constant pools get 4-byte alignment.
+  // This is so we can keep exact track of where the alignment padding goes.
+
+  // ARM and Thumb2 functions need to be 4-byte aligned.
+  if (!isThumb1)
+    MF.EnsureAlignment(2);  // 2 = log2(4)
+
+  // Perform the initial placement of the constant pool entries.  To start with,
+  // we put them all at the end of the function.
+  std::vector<MachineInstr*> CPEMIs;
+  if (!MCP.isEmpty()) {
+    DoInitialPlacement(MF, CPEMIs);
+    if (isThumb1)
+      MF.EnsureAlignment(2);  // 2 = log2(4)
+  }
+
+  /// The next UID to take is the first unused one.
+  AFI->initPICLabelUId(CPEMIs.size());
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block, the location of all the water, and finding all of the
+  // constant pool users.
+  InitialFunctionScan(MF, CPEMIs);
+  CPEMIs.clear();
+  DEBUG(dumpBBs());
+
+
+  /// Remove dead constant pool entries.
+  MadeChange |= RemoveUnusedCPEntries();
+
+  // Iteratively place constant pool entries and fix up branches until there
+  // is no change.
+  unsigned NoCPIters = 0, NoBRIters = 0;
+  while (true) {
+    bool CPChange = false;
+    for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+      CPChange |= HandleConstantPoolUser(MF, i);
+    if (CPChange && ++NoCPIters > 30)
+      llvm_unreachable("Constant Island pass failed to converge!");
+    DEBUG(dumpBBs());
+
+    // Clear NewWaterList now.  If we split a block for branches, it should
+    // appear as "new water" for the next iteration of constant pool placement.
+    NewWaterList.clear();
+
+    bool BRChange = false;
+    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+      BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+    if (BRChange && ++NoBRIters > 30)
+      llvm_unreachable("Branch Fix Up pass failed to converge!");
+    DEBUG(dumpBBs());
+
+    if (!CPChange && !BRChange)
+      break;
+    MadeChange = true;
+  }
+
+  // Shrink 32-bit Thumb2 branch, load, and store instructions.
+  if (isThumb2 && !STI->prefers32BitThumb())
+    MadeChange |= OptimizeThumb2Instructions(MF);
+
+  // After a while, this might be made debug-only, but it is not expensive.
+  verify(MF);
+
+  // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+  // undo the spill / restore of LR if possible.
+  if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
+    MadeChange |= UndoLRSpillRestore();
+
+  // Save the mapping between original and cloned constpool entries.
+  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+    for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+      const CPEntry & CPE = CPEntries[i][j];
+      AFI->recordCPEClone(i, CPE.CPI);
+    }
+  }
+
+  DEBUG(errs() << '\n'; dumpBBs());
+
+  BBSizes.clear();
+  BBOffsets.clear();
+  WaterList.clear();
+  CPUsers.clear();
+  CPEntries.clear();
+  ImmBranches.clear();
+  PushPopMIs.clear();
+  T2JumpTables.clear();
+
+  return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries.  To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
+                                        std::vector<MachineInstr*> &CPEMIs) {
+  // Create the basic block to hold the CPE's.
+  MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
+  MF.push_back(BB);
+
+  // Add all of the constants from the constant pool to the end block, use an
+  // identity mapping of CPI's to CPE's.
+  const std::vector<MachineConstantPoolEntry> &CPs =
+    MF.getConstantPool()->getConstants();
+
+  const TargetData &TD = *MF.getTarget().getTargetData();
+  for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+    unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+    // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
+    // we would have to pad them out or something so that instructions stay
+    // aligned.
+    assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+    MachineInstr *CPEMI =
+      BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+        .addImm(i).addConstantPoolIndex(i).addImm(Size);
+    CPEMIs.push_back(CPEMI);
+
+    // Add a new CPEntry, but no corresponding CPUser yet.
+    std::vector<CPEntry> CPEs;
+    CPEs.push_back(CPEntry(CPEMI, i));
+    CPEntries.push_back(CPEs);
+    ++NumCPEs;
+    DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
+                 << "\n");
+  }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+  // Get the next machine basic block in the function.
+  MachineFunction::iterator MBBI = MBB;
+  // Can't fall off end of function.
+  if (llvm::next(MBBI) == MBB->getParent()->end())
+    return false;
+
+  MachineBasicBlock *NextBB = llvm::next(MBBI);
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I == NextBB)
+      return true;
+
+  return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+                                        const MachineInstr *CPEMI) {
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  // Number of entries per constpool index should be small, just do a
+  // linear search.
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    if (CPEs[i].CPEMI == CPEMI)
+      return &CPEs[i];
+  }
+  return NULL;
+}
+
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I)
+      if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+        T2JumpTables.push_back(I);
+  }
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
+                                 const std::vector<MachineInstr*> &CPEMIs) {
+  // First thing, see if the function has any inline assembly in it. If so,
+  // we have to be conservative about alignment assumptions, as we don't
+  // know for sure the size of any instructions in the inline assembly.
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I)
+      if (I->getOpcode() == ARM::INLINEASM)
+        HasInlineAsm = true;
+  }
+
+  // Now go back through the instructions and build up our data structures.
+  unsigned Offset = 0;
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    // If this block doesn't fall through into the next MBB, then this is
+    // 'water' that a constant pool island could be placed.
+    if (!BBHasFallthrough(&MBB))
+      WaterList.push_back(&MBB);
+
+    unsigned MBBSize = 0;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+      if (I->isDebugValue())
+        continue;
+      // Add instruction size to MBBSize.
+      MBBSize += TII->GetInstSizeInBytes(I);
+
+      int Opc = I->getOpcode();
+      if (I->getDesc().isBranch()) {
+        bool isCond = false;
+        unsigned Bits = 0;
+        unsigned Scale = 1;
+        int UOpc = Opc;
+        switch (Opc) {
+        default:
+          continue;  // Ignore other JT branches
+        case ARM::tBR_JTr:
+          // A Thumb1 table jump may involve padding; for the offsets to
+          // be right, functions containing these must be 4-byte aligned.
+          // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+          // table entries. So this code checks whether offset of tBR_JTr + 2
+          // is aligned.  That is held in Offset+MBBSize, which already has
+          // 2 added in for the size of the mov pc instruction.
+          MF.EnsureAlignment(2U);
+          if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
+            // FIXME: Add a pseudo ALIGN instruction instead.
+            MBBSize += 2;           // padding
+          continue;   // Does not get an entry in ImmBranches
+        case ARM::t2BR_JT:
+          T2JumpTables.push_back(I);
+          continue;   // Does not get an entry in ImmBranches
+        case ARM::Bcc:
+          isCond = true;
+          UOpc = ARM::B;
+          // Fallthrough
+        case ARM::B:
+          Bits = 24;
+          Scale = 4;
+          break;
+        case ARM::tBcc:
+          isCond = true;
+          UOpc = ARM::tB;
+          Bits = 8;
+          Scale = 2;
+          break;
+        case ARM::tB:
+          Bits = 11;
+          Scale = 2;
+          break;
+        case ARM::t2Bcc:
+          isCond = true;
+          UOpc = ARM::t2B;
+          Bits = 20;
+          Scale = 2;
+          break;
+        case ARM::t2B:
+          Bits = 24;
+          Scale = 2;
+          break;
+        }
+
+        // Record this immediate branch.
+        unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+        ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+      }
+
+      if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+        PushPopMIs.push_back(I);
+
+      if (Opc == ARM::CONSTPOOL_ENTRY)
+        continue;
+
+      // Scan the instructions for constant pool operands.
+      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+        if (I->getOperand(op).isCPI()) {
+          // We found one.  The addressing mode tells us the max displacement
+          // from the PC that this instruction permits.
+
+          // Basic size info comes from the TSFlags field.
+          unsigned Bits = 0;
+          unsigned Scale = 1;
+          bool NegOk = false;
+          bool IsSoImm = false;
+
+          switch (Opc) {
+          default:
+            llvm_unreachable("Unknown addressing mode for CP reference!");
+            break;
+
+          // Taking the address of a CP entry.
+          case ARM::LEApcrel:
+            // This takes a SoImm, which is 8 bit immediate rotated. We'll
+            // pretend the maximum offset is 255 * 4. Since each instruction
+            // 4 byte wide, this is always correct. We'll check for other
+            // displacements that fits in a SoImm as well.
+            Bits = 8;
+            Scale = 4;
+            NegOk = true;
+            IsSoImm = true;
+            break;
+          case ARM::t2LEApcrel:
+            Bits = 12;
+            NegOk = true;
+            break;
+          case ARM::tLEApcrel:
+            Bits = 8;
+            Scale = 4;
+            break;
+
+          case ARM::LDRi12:
+          case ARM::LDRcp:
+          case ARM::t2LDRpci:
+            Bits = 12;  // +-offset_12
+            NegOk = true;
+            break;
+
+          case ARM::tLDRpci:
+            Bits = 8;
+            Scale = 4;  // +(offset_8*4)
+            break;
+
+          case ARM::VLDRD:
+          case ARM::VLDRS:
+            Bits = 8;
+            Scale = 4;  // +-(offset_8*4)
+            NegOk = true;
+            break;
+          }
+
+          // Remember that this is a user of a CP entry.
+          unsigned CPI = I->getOperand(op).getIndex();
+          MachineInstr *CPEMI = CPEMIs[CPI];
+          unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+          CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+
+          // Increment corresponding CPEntry reference count.
+          CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+          assert(CPE && "Cannot find a corresponding CPEntry!");
+          CPE->RefCount++;
+
+          // Instructions can only use one CP entry, don't bother scanning the
+          // rest of the operands.
+          break;
+        }
+    }
+
+    // In thumb mode, if this block is a constpool island, we may need padding
+    // so it's aligned on 4 byte boundary.
+    if (isThumb &&
+        !MBB.empty() &&
+        MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+        ((Offset%4) != 0 || HasInlineAsm))
+      MBBSize += 2;
+
+    BBSizes.push_back(MBBSize);
+    BBOffsets.push_back(Offset);
+    Offset += MBBSize;
+  }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function.  This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BBOffsets[MBB->getNumber()];
+
+  // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+  // alignment padding, and compensate if so.
+  if (isThumb &&
+      MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+      (Offset%4 != 0 || HasInlineAsm))
+    Offset += 2;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    if (&*I == MI) return Offset;
+    Offset += TII->GetInstSizeInBytes(I);
+  }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+                              const MachineBasicBlock *RHS) {
+  return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers.  Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+  // Renumber the MBB's to keep them consecutive.
+  NewBB->getParent()->RenumberBlocks(NewBB);
+
+  // Insert a size into BBSizes to align it properly with the (newly
+  // renumbered) block numbers.
+  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+  // Likewise for BBOffsets.
+  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+  // Next, update WaterList.  Specifically, we need to add NewMBB as having
+  // available water after it.
+  water_iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+                     CompareMBBNumbers);
+  WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch.  Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+  MachineBasicBlock *OrigBB = MI->getParent();
+  MachineFunction &MF = *OrigBB->getParent();
+
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+    MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+  MF.insert(MBBI, NewBB);
+
+  // Splice the instructions starting with MI over to NewBB.
+  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+  // Add an unconditional branch from OrigBB to NewBB.
+  // Note the new unconditional branch is not being recorded.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond to anything in the source.
+  unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+  BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+  ++NumSplit;
+
+  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
+  while (!OrigBB->succ_empty()) {
+    MachineBasicBlock *Succ = *OrigBB->succ_begin();
+    OrigBB->removeSuccessor(Succ);
+    NewBB->addSuccessor(Succ);
+
+    // This pass should be run after register allocation, so there should be no
+    // PHI nodes to update.
+    assert((Succ->empty() || !Succ->begin()->isPHI())
+           && "PHI nodes should be eliminated by now!");
+  }
+
+  // OrigBB branches to NewBB.
+  OrigBB->addSuccessor(NewBB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  // This is almost the same as UpdateForInsertedWaterBlock, except that
+  // the Water goes after OrigBB, not NewBB.
+  MF.RenumberBlocks(NewBB);
+
+  // Insert a size into BBSizes to align it properly with the (newly
+  // renumbered) block numbers.
+  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+  // Likewise for BBOffsets.
+  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+  // Next, update WaterList.  Specifically, we need to add OrigMBB as having
+  // available water after it (but not if it's already there, which happens
+  // when splitting before a conditional branch that is followed by an
+  // unconditional branch - in that case we want to insert NewBB).
+  water_iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+                     CompareMBBNumbers);
+  MachineBasicBlock* WaterBB = *IP;
+  if (WaterBB == OrigBB)
+    WaterList.insert(llvm::next(IP), NewBB);
+  else
+    WaterList.insert(IP, OrigBB);
+  NewWaterList.insert(OrigBB);
+
+  unsigned OrigBBI = OrigBB->getNumber();
+  unsigned NewBBI = NewBB->getNumber();
+
+  int delta = isThumb1 ? 2 : 4;
+
+  // Figure out how large the OrigBB is.  As the first half of the original
+  // block, it cannot contain a tablejump.  The size includes
+  // the new jump we added.  (It should be possible to do this without
+  // recounting everything, but it's very confusing, and this is rarely
+  // executed.)
+  unsigned OrigBBSize = 0;
+  for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+       I != E; ++I)
+    OrigBBSize += TII->GetInstSizeInBytes(I);
+  BBSizes[OrigBBI] = OrigBBSize;
+
+  // ...and adjust BBOffsets for NewBB accordingly.
+  BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+  // Figure out how large the NewMBB is.  As the second half of the original
+  // block, it may contain a tablejump.
+  unsigned NewBBSize = 0;
+  for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+       I != E; ++I)
+    NewBBSize += TII->GetInstSizeInBytes(I);
+  // Set the size of NewBB in BBSizes.  It does not include any padding now.
+  BBSizes[NewBBI] = NewBBSize;
+
+  MachineInstr* ThumbJTMI = prior(NewBB->end());
+  if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+    // We've added another 2-byte instruction before this tablejump, which
+    // means we will always need padding if we didn't before, and vice versa.
+
+    // The original offset of the jump instruction was:
+    unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+    if (OrigOffset%4 == 0) {
+      // We had padding before and now we don't.  No net change in code size.
+      delta = 0;
+    } else {
+      // We didn't have padding before and now we do.
+      BBSizes[NewBBI] += 2;
+      delta = 4;
+    }
+  }
+
+  // All BBOffsets following these blocks must be modified.
+  if (delta)
+    AdjustBBOffsetsAfter(NewBB, delta);
+
+  return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+                                         unsigned TrialOffset, unsigned MaxDisp,
+                                         bool NegativeOK, bool IsSoImm) {
+  // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+  // purposes of the displacement computation; compensate for that here.
+  // Effectively, the valid range of displacements is 2 bytes smaller for such
+  // references.
+  unsigned TotalAdj = 0;
+  if (isThumb && UserOffset%4 !=0) {
+    UserOffset -= 2;
+    TotalAdj = 2;
+  }
+  // CPEs will be rounded up to a multiple of 4.
+  if (isThumb && TrialOffset%4 != 0) {
+    TrialOffset += 2;
+    TotalAdj += 2;
+  }
+
+  // In Thumb2 mode, later branch adjustments can shift instructions up and
+  // cause alignment change. In the worst case scenario this can cause the
+  // user's effective address to be subtracted by 2 and the CPE's address to
+  // be plus 2.
+  if (isThumb2 && TotalAdj != 4)
+    MaxDisp -= (4 - TotalAdj);
+
+  if (UserOffset <= TrialOffset) {
+    // User before the Trial.
+    if (TrialOffset - UserOffset <= MaxDisp)
+      return true;
+    // FIXME: Make use full range of soimm values.
+  } else if (NegativeOK) {
+    if (UserOffset - TrialOffset <= MaxDisp)
+      return true;
+    // FIXME: Make use full range of soimm values.
+  }
+  return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+                                        MachineBasicBlock* Water, CPUser &U) {
+  unsigned MaxDisp = U.MaxDisp;
+  unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+                       BBSizes[Water->getNumber()];
+
+  // If the CPE is to be inserted before the instruction, that will raise
+  // the offset of the instruction.
+  if (CPEOffset < UserOffset)
+    UserOffset += U.CPEMI->getOperand(2).getImm();
+
+  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+                                      MachineInstr *CPEMI, unsigned MaxDisp,
+                                      bool NegOk, bool DoDump) {
+  unsigned CPEOffset  = GetOffsetOf(CPEMI);
+  assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+
+  if (DoDump) {
+    DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+                 << " max delta=" << MaxDisp
+                 << " insn address=" << UserOffset
+                 << " CPE address=" << CPEOffset
+                 << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+  }
+
+  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+    return false;
+
+  MachineBasicBlock *Succ = *MBB->succ_begin();
+  MachineBasicBlock *Pred = *MBB->pred_begin();
+  MachineInstr *PredMI = &Pred->back();
+  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+      || PredMI->getOpcode() == ARM::t2B)
+    return PredMI->getOperand(0).getMBB() == Succ;
+  return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+                                              int delta) {
+  MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
+  for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
+      i < e; ++i) {
+    BBOffsets[i] += delta;
+    // If some existing blocks have padding, adjust the padding as needed, a
+    // bit tricky.  delta can be negative so don't use % on that.
+    if (!isThumb)
+      continue;
+    MachineBasicBlock *MBB = MBBI;
+    if (!MBB->empty() && !HasInlineAsm) {
+      // Constant pool entries require padding.
+      if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+        unsigned OldOffset = BBOffsets[i] - delta;
+        if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
+          // add new padding
+          BBSizes[i] += 2;
+          delta += 2;
+        } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
+          // remove existing padding
+          BBSizes[i] -= 2;
+          delta -= 2;
+        }
+      }
+      // Thumb1 jump tables require padding.  They should be at the end;
+      // following unconditional branches are removed by AnalyzeBranch.
+      // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+      // table entries. So this code checks whether offset of tBR_JTr
+      // is aligned; if it is, the offset of the jump table following the
+      // instruction will not be aligned, and we need padding.
+      MachineInstr *ThumbJTMI = prior(MBB->end());
+      if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+        unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
+        unsigned OldMIOffset = NewMIOffset - delta;
+        if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
+          // remove existing padding
+          BBSizes[i] -= 2;
+          delta -= 2;
+        } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
+          // add new padding
+          BBSizes[i] += 2;
+          delta += 2;
+        }
+      }
+      if (delta==0)
+        return;
+    }
+    MBBI = llvm::next(MBBI);
+  }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount.  If the refcount
+/// becomes 0 remove the entry and instruction.  Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+  // Find the old entry. Eliminate it if it is no longer used.
+  CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+  assert(CPE && "Unexpected!");
+  if (--CPE->RefCount == 0) {
+    RemoveDeadCPEMI(CPEMI);
+    CPE->CPEMI = NULL;
+    --NumCPEs;
+    return true;
+  }
+  return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone.  Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+
+  // Check to see if the CPE is already in-range.
+  if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
+    DEBUG(errs() << "In range\n");
+    return 1;
+  }
+
+  // No.  Look for previously created clones of the CPE that are in range.
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    // We already tried this one
+    if (CPEs[i].CPEMI == CPEMI)
+      continue;
+    // Removing CPEs can leave empty entries, skip
+    if (CPEs[i].CPEMI == NULL)
+      continue;
+    if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
+      DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+                   << CPEs[i].CPI << "\n");
+      // Point the CPUser node to the replacement
+      U.CPEMI = CPEs[i].CPEMI;
+      // Change the CPI in the instruction operand to refer to the clone.
+      for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+        if (UserMI->getOperand(j).isCPI()) {
+          UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+          break;
+        }
+      // Adjust the refcount of the clone...
+      CPEs[i].RefCount++;
+      // ...and the original.  If we didn't remove the old entry, none of the
+      // addresses changed, so we don't need another pass.
+      return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+    }
+  }
+  return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+  switch (Opc) {
+  case ARM::tB:
+    return ((1<<10)-1)*2;
+  case ARM::t2B:
+    return ((1<<23)-1)*2;
+  default:
+    break;
+  }
+
+  return ((1<<23)-1)*4;
+}
+
+/// LookForWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not.  If it returns true, WaterIter
+/// is set to the WaterList entry.  For Thumb, prefer water that will not
+/// introduce padding to water that will.  To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+                                      water_iterator &WaterIter) {
+  if (WaterList.empty())
+    return false;
+
+  bool FoundWaterThatWouldPad = false;
+  water_iterator IPThatWouldPad;
+  for (water_iterator IP = prior(WaterList.end()),
+         B = WaterList.begin();; --IP) {
+    MachineBasicBlock* WaterBB = *IP;
+    // Check if water is in range and is either at a lower address than the
+    // current "high water mark" or a new water block that was created since
+    // the previous iteration by inserting an unconditional branch.  In the
+    // latter case, we want to allow resetting the high water mark back to
+    // this new water since we haven't seen it before.  Inserting branches
+    // should be relatively uncommon and when it does happen, we want to be
+    // sure to take advantage of it for all the CPEs near that block, so that
+    // we don't insert more branches than necessary.
+    if (WaterIsInRange(UserOffset, WaterBB, U) &&
+        (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+         NewWaterList.count(WaterBB))) {
+      unsigned WBBId = WaterBB->getNumber();
+      if (isThumb &&
+          (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
+        // This is valid Water, but would introduce padding.  Remember
+        // it in case we don't find any Water that doesn't do this.
+        if (!FoundWaterThatWouldPad) {
+          FoundWaterThatWouldPad = true;
+          IPThatWouldPad = IP;
+        }
+      } else {
+        WaterIter = IP;
+        return true;
+      }
+    }
+    if (IP == B)
+      break;
+  }
+  if (FoundWaterThatWouldPad) {
+    WaterIter = IPThatWouldPad;
+    return true;
+  }
+  return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct.  Otherwise the block is split to create a hole with an
+/// unconditional branch around it.  In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+                                        unsigned UserOffset,
+                                        MachineBasicBlock *&NewMBB) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  MachineBasicBlock *UserMBB = UserMI->getParent();
+  unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+                               BBSizes[UserMBB->getNumber()];
+  assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+  // If the block does not end in an unconditional branch already, and if the
+  // end of the block is within range, make new water there.  (The addition
+  // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+  // Thumb2, 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
+  // inside OffsetIsInRange.
+  if (BBHasFallthrough(UserMBB) &&
+      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
+                      U.MaxDisp, U.NegOk, U.IsSoImm)) {
+    DEBUG(errs() << "Split at end of block\n");
+    if (&UserMBB->back() == UserMI)
+      assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+    NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+    // Add an unconditional branch from UserMBB to fallthrough block.
+    // Record it for branch lengthening; this new branch will not get out of
+    // range, but if the preceding conditional branch is out of range, the
+    // targets will be exchanged, and the altered branch may be out of
+    // range, so the machinery has to know about it.
+    int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+    BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+    unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+    ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+                          MaxDisp, false, UncondBr));
+    int delta = isThumb1 ? 2 : 4;
+    BBSizes[UserMBB->getNumber()] += delta;
+    AdjustBBOffsetsAfter(UserMBB, delta);
+  } else {
+    // What a big block.  Find a place within the block to split it.
+    // This is a little tricky on Thumb1 since instructions are 2 bytes
+    // and constant pool entries are 4 bytes: if instruction I references
+    // island CPE, and instruction I+1 references CPE', it will
+    // not work well to put CPE as far forward as possible, since then
+    // CPE' cannot immediately follow it (that location is 2 bytes
+    // farther away from I+1 than CPE was from I) and we'd need to create
+    // a new island.  So, we make a first guess, then walk through the
+    // instructions between the one currently being looked at and the
+    // possible insertion point, and make sure any other instructions
+    // that reference CPEs will be able to use the same island area;
+    // if not, we back up the insertion point.
+
+    // The 4 in the following is for the unconditional branch we'll be
+    // inserting (allows for long branch on Thumb1).  Alignment of the
+    // island is handled inside OffsetIsInRange.
+    unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+    // This could point off the end of the block if we've already got
+    // constant pool entries following this block; only the last one is
+    // in the water list.  Back past any possible branches (allow for a
+    // conditional and a maximally long unconditional).
+    if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+      BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+                              (isThumb1 ? 6 : 8);
+    unsigned EndInsertOffset = BaseInsertOffset +
+           CPEMI->getOperand(2).getImm();
+    MachineBasicBlock::iterator MI = UserMI;
+    ++MI;
+    unsigned CPUIndex = CPUserIndex+1;
+    unsigned NumCPUsers = CPUsers.size();
+    MachineInstr *LastIT = 0;
+    for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+         Offset < BaseInsertOffset;
+         Offset += TII->GetInstSizeInBytes(MI),
+           MI = llvm::next(MI)) {
+      if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+        CPUser &U = CPUsers[CPUIndex];
+        if (!OffsetIsInRange(Offset, EndInsertOffset,
+                             U.MaxDisp, U.NegOk, U.IsSoImm)) {
+          BaseInsertOffset -= (isThumb1 ? 2 : 4);
+          EndInsertOffset  -= (isThumb1 ? 2 : 4);
+        }
+        // This is overly conservative, as we don't account for CPEMIs
+        // being reused within the block, but it doesn't matter much.
+        EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+        CPUIndex++;
+      }
+
+      // Remember the last IT instruction.
+      if (MI->getOpcode() == ARM::t2IT)
+        LastIT = MI;
+    }
+
+    DEBUG(errs() << "Split in middle of big block\n");
+    --MI;
+
+    // Avoid splitting an IT block.
+    if (LastIT) {
+      unsigned PredReg = 0;
+      ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+      if (CC != ARMCC::AL)
+        MI = LastIT;
+    }
+    NewMBB = SplitBlockBeforeInstr(MI);
+  }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range.  If so, pick up the constant pool value and move it some
+/// place in-range.  Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
+                                                unsigned CPUserIndex) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  // Compute this only once, it's expensive.  The 4 or 8 is the value the
+  // hardware keeps in the PC.
+  unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+  // See if the current entry is within range, or there is a clone of it
+  // in range.
+  int result = LookForExistingCPEntry(U, UserOffset);
+  if (result==1) return false;
+  else if (result==2) return true;
+
+  // No existing clone of this CPE is within range.
+  // We will be generating a new clone.  Get a UID for it.
+  unsigned ID = AFI->createPICLabelUId();
+
+  // Look for water where we can place this CPE.
+  MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+  MachineBasicBlock *NewMBB;
+  water_iterator IP;
+  if (LookForWater(U, UserOffset, IP)) {
+    DEBUG(errs() << "found water in range\n");
+    MachineBasicBlock *WaterBB = *IP;
+
+    // If the original WaterList entry was "new water" on this iteration,
+    // propagate that to the new island.  This is just keeping NewWaterList
+    // updated to match the WaterList, which will be updated below.
+    if (NewWaterList.count(WaterBB)) {
+      NewWaterList.erase(WaterBB);
+      NewWaterList.insert(NewIsland);
+    }
+    // The new CPE goes before the following block (NewMBB).
+    NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+  } else {
+    // No water found.
+    DEBUG(errs() << "No water found\n");
+    CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+
+    // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+    // called while handling branches so that the water will be seen on the
+    // next iteration for constant pools, but in this context, we don't want
+    // it.  Check for this so it will be removed from the WaterList.
+    // Also remove any entry from NewWaterList.
+    MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+    IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+    if (IP != WaterList.end())
+      NewWaterList.erase(WaterBB);
+
+    // We are adding new water.  Update NewWaterList.
+    NewWaterList.insert(NewIsland);
+  }
+
+  // Remove the original WaterList entry; we want subsequent insertions in
+  // this vicinity to go after the one we're about to insert.  This
+  // considerably reduces the number of times we have to move the same CPE
+  // more than once and is also important to ensure the algorithm terminates.
+  if (IP != WaterList.end())
+    WaterList.erase(IP);
+
+  // Okay, we know we can put an island before NewMBB now, do it!
+  MF.insert(NewMBB, NewIsland);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  UpdateForInsertedWaterBlock(NewIsland);
+
+  // Decrement the old entry, and remove it if refcount becomes 0.
+  DecrementOldEntry(CPI, CPEMI);
+
+  // Now that we have an island to add the CPE to, clone the original CPE and
+  // add it to the island.
+  U.HighWaterMark = NewIsland;
+  U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+  CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+  ++NumCPEs;
+
+  BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+  // Compensate for .align 2 in thumb mode.
+  if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
+    Size += 2;
+  // Increase the size of the island block to account for the new entry.
+  BBSizes[NewIsland->getNumber()] += Size;
+  AdjustBBOffsetsAfter(NewIsland, Size);
+
+  // Finally, change the CPI in the instruction operand to be ID.
+  for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+    if (UserMI->getOperand(i).isCPI()) {
+      UserMI->getOperand(i).setIndex(ID);
+      break;
+    }
+
+  DEBUG(errs() << "  Moved CPE to #" << ID << " CPI=" << CPI
+           << '\t' << *UserMI);
+
+  return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+  MachineBasicBlock *CPEBB = CPEMI->getParent();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  CPEMI->eraseFromParent();
+  BBSizes[CPEBB->getNumber()] -= Size;
+  // All succeeding offsets have the current size value added in, fix this.
+  if (CPEBB->empty()) {
+    // In thumb1 mode, the size of island may be padded by two to compensate for
+    // the alignment requirement.  Then it will now be 2 when the block is
+    // empty, so fix this.
+    // All succeeding offsets have the current size value added in, fix this.
+    if (BBSizes[CPEBB->getNumber()] != 0) {
+      Size += BBSizes[CPEBB->getNumber()];
+      BBSizes[CPEBB->getNumber()] = 0;
+    }
+  }
+  AdjustBBOffsetsAfter(CPEBB, -Size);
+  // An island has only one predecessor BB and one successor BB. Check if
+  // this BB's predecessor jumps directly to this BB's successor. This
+  // shouldn't happen currently.
+  assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+  // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+  unsigned MadeChange = false;
+  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+      std::vector<CPEntry> &CPEs = CPEntries[i];
+      for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+        if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+          RemoveDeadCPEMI(CPEs[j].CPEMI);
+          CPEs[j].CPEMI = NULL;
+          MadeChange = true;
+        }
+      }
+  }
+  return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+                                     unsigned MaxDisp) {
+  unsigned PCAdj      = isThumb ? 4 : 8;
+  unsigned BrOffset   = GetOffsetOf(MI) + PCAdj;
+  unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+  DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+               << " from BB#" << MI->getParent()->getNumber()
+               << " max delta=" << MaxDisp
+               << " from " << GetOffsetOf(MI) << " to " << DestOffset
+               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+  if (BrOffset <= DestOffset) {
+    // Branch before the Dest.
+    if (DestOffset-BrOffset <= MaxDisp)
+      return true;
+  } else {
+    if (BrOffset-DestOffset <= MaxDisp)
+      return true;
+  }
+  return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+  // Check to see if the DestBB is already in-range.
+  if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+    return false;
+
+  if (!Br.isCond)
+    return FixUpUnconditionalBr(MF, Br);
+  return FixUpConditionalBr(MF, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  if (!isThumb1)
+    llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+
+  // Use BL to implement far jump.
+  Br.MaxDisp = (1 << 21) * 2;
+  MI->setDesc(TII->get(ARM::tBfar));
+  BBSizes[MBB->getNumber()] += 2;
+  AdjustBBOffsetsAfter(MBB, 2);
+  HasFarJump = true;
+  ++NumUBrFixed;
+
+  DEBUG(errs() << "  Changed B to long jump " << *MI);
+
+  return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+  // Add an unconditional branch to the destination and invert the branch
+  // condition to jump over it:
+  // blt L1
+  // =>
+  // bge L2
+  // b   L1
+  // L2:
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+  CC = ARMCC::getOppositeCondition(CC);
+  unsigned CCReg = MI->getOperand(2).getReg();
+
+  // If the branch is at the end of its MBB and that has a fall-through block,
+  // direct the updated conditional branch to the fall-through block. Otherwise,
+  // split the MBB before the next instruction.
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *BMI = &MBB->back();
+  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+  ++NumCBrFixed;
+  if (BMI != MI) {
+    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+        BMI->getOpcode() == Br.UncondBr) {
+      // Last MI in the BB is an unconditional branch. Can we simply invert the
+      // condition and swap destinations:
+      // beq L1
+      // b   L2
+      // =>
+      // bne L2
+      // b   L1
+      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+      if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+        DEBUG(errs() << "  Invert Bcc condition and swap its destination with "
+                     << *BMI);
+        BMI->getOperand(0).setMBB(DestBB);
+        MI->getOperand(0).setMBB(NewDest);
+        MI->getOperand(1).setImm(CC);
+        return true;
+      }
+    }
+  }
+
+  if (NeedSplit) {
+    SplitBlockBeforeInstr(MI);
+    // No need for the branch to the next block. We're adding an unconditional
+    // branch to the destination.
+    int delta = TII->GetInstSizeInBytes(&MBB->back());
+    BBSizes[MBB->getNumber()] -= delta;
+    MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
+    AdjustBBOffsetsAfter(SplitBB, -delta);
+    MBB->back().eraseFromParent();
+    // BBOffsets[SplitBB] is wrong temporarily, fixed below
+  }
+  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+  DEBUG(errs() << "  Insert B to BB#" << DestBB->getNumber()
+               << " also invert condition and change dest. to BB#"
+               << NextBB->getNumber() << "\n");
+
+  // Insert a new conditional branch and a new unconditional branch.
+  // Also update the ImmBranch as well as adding a new entry for the new branch.
+  BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+    .addMBB(NextBB).addImm(CC).addReg(CCReg);
+  Br.MI = &MBB->back();
+  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+  ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+  // Remove the old conditional branch.  It may or may not still be in MBB.
+  BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+  MI->eraseFromParent();
+
+  // The net size change is an addition of one unconditional branch.
+  int delta = TII->GetInstSizeInBytes(&MBB->back());
+  AdjustBBOffsetsAfter(MBB, delta);
+  return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+  bool MadeChange = false;
+  for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+    MachineInstr *MI = PushPopMIs[i];
+    // First two operands are predicates.
+    if (MI->getOpcode() == ARM::tPOP_RET &&
+        MI->getOperand(2).getReg() == ARM::PC &&
+        MI->getNumExplicitOperands() == 3) {
+      // Create the new insn and copy the predicate from the old.
+      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
+        .addOperand(MI->getOperand(0))
+        .addOperand(MI->getOperand(1));
+      MI->eraseFromParent();
+      MadeChange = true;
+    }
+  }
+  return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // Shrink ADR and LDR from constantpool.
+  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+    CPUser &U = CPUsers[i];
+    unsigned Opcode = U.MI->getOpcode();
+    unsigned NewOpc = 0;
+    unsigned Scale = 1;
+    unsigned Bits = 0;
+    switch (Opcode) {
+    default: break;
+    case ARM::t2LEApcrel:
+      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+        NewOpc = ARM::tLEApcrel;
+        Bits = 8;
+        Scale = 4;
+      }
+      break;
+    case ARM::t2LDRpci:
+      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+        NewOpc = ARM::tLDRpci;
+        Bits = 8;
+        Scale = 4;
+      }
+      break;
+    }
+
+    if (!NewOpc)
+      continue;
+
+    unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+    unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+    // FIXME: Check if offset is multiple of scale if scale is not 4.
+    if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+      U.MI->setDesc(TII->get(NewOpc));
+      MachineBasicBlock *MBB = U.MI->getParent();
+      BBSizes[MBB->getNumber()] -= 2;
+      AdjustBBOffsetsAfter(MBB, -2);
+      ++NumT2CPShrunk;
+      MadeChange = true;
+    }
+  }
+
+  MadeChange |= OptimizeThumb2Branches(MF);
+  MadeChange |= OptimizeThumb2JumpTables(MF);
+  return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
+    ImmBranch &Br = ImmBranches[i];
+    unsigned Opcode = Br.MI->getOpcode();
+    unsigned NewOpc = 0;
+    unsigned Scale = 1;
+    unsigned Bits = 0;
+    switch (Opcode) {
+    default: break;
+    case ARM::t2B:
+      NewOpc = ARM::tB;
+      Bits = 11;
+      Scale = 2;
+      break;
+    case ARM::t2Bcc: {
+      NewOpc = ARM::tBcc;
+      Bits = 8;
+      Scale = 2;
+      break;
+    }
+    }
+    if (NewOpc) {
+      unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+      MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+      if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+        Br.MI->setDesc(TII->get(NewOpc));
+        MachineBasicBlock *MBB = Br.MI->getParent();
+        BBSizes[MBB->getNumber()] -= 2;
+        AdjustBBOffsetsAfter(MBB, -2);
+        ++NumT2BrShrunk;
+        MadeChange = true;
+      }
+    }
+
+    Opcode = Br.MI->getOpcode();
+    if (Opcode != ARM::tBcc)
+      continue;
+
+    NewOpc = 0;
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+    if (Pred == ARMCC::EQ)
+      NewOpc = ARM::tCBZ;
+    else if (Pred == ARMCC::NE)
+      NewOpc = ARM::tCBNZ;
+    if (!NewOpc)
+      continue;
+    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+    // Check if the distance is within 126. Subtract starting offset by 2
+    // because the cmp will be eliminated.
+    unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
+    unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+    if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+      MachineBasicBlock::iterator CmpMI = Br.MI;
+      if (CmpMI != Br.MI->getParent()->begin()) {
+        --CmpMI;
+        if (CmpMI->getOpcode() == ARM::tCMPi8) {
+          unsigned Reg = CmpMI->getOperand(0).getReg();
+          Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+          if (Pred == ARMCC::AL &&
+              CmpMI->getOperand(1).getImm() == 0 &&
+              isARMLowRegister(Reg)) {
+            MachineBasicBlock *MBB = Br.MI->getParent();
+            MachineInstr *NewBR =
+              BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+              .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+            CmpMI->eraseFromParent();
+            Br.MI->eraseFromParent();
+            Br.MI = NewBR;
+            BBSizes[MBB->getNumber()] -= 2;
+            AdjustBBOffsetsAfter(MBB, -2);
+            ++NumCBZ;
+            MadeChange = true;
+          }
+        }
+      }
+    }
+  }
+
+  return MadeChange;
+}
+
+/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // FIXME: After the tables are shrunk, can we get rid some of the
+  // constantpool tables?
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  if (MJTI == 0) return false;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+    MachineInstr *MI = T2JumpTables[i];
+    const MCInstrDesc &MCID = MI->getDesc();
+    unsigned NumOps = MCID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+    MachineOperand JTOP = MI->getOperand(JTOpIdx);
+    unsigned JTI = JTOP.getIndex();
+    assert(JTI < JT.size());
+
+    bool ByteOk = true;
+    bool HalfWordOk = true;
+    unsigned JTOffset = GetOffsetOf(MI) + 4;
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+      MachineBasicBlock *MBB = JTBBs[j];
+      unsigned DstOffset = BBOffsets[MBB->getNumber()];
+      // Negative offset is not ok. FIXME: We should change BB layout to make
+      // sure all the branches are forward.
+      if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+        ByteOk = false;
+      unsigned TBHLimit = ((1<<16)-1)*2;
+      if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+        HalfWordOk = false;
+      if (!ByteOk && !HalfWordOk)
+        break;
+    }
+
+    if (ByteOk || HalfWordOk) {
+      MachineBasicBlock *MBB = MI->getParent();
+      unsigned BaseReg = MI->getOperand(0).getReg();
+      bool BaseRegKill = MI->getOperand(0).isKill();
+      if (!BaseRegKill)
+        continue;
+      unsigned IdxReg = MI->getOperand(1).getReg();
+      bool IdxRegKill = MI->getOperand(1).isKill();
+
+      // Scan backwards to find the instruction that defines the base
+      // register. Due to post-RA scheduling, we can't count on it
+      // immediately preceding the branch instruction.
+      MachineBasicBlock::iterator PrevI = MI;
+      MachineBasicBlock::iterator B = MBB->begin();
+      while (PrevI != B && !PrevI->definesRegister(BaseReg))
+        --PrevI;
+
+      // If for some reason we didn't find it, we can't do anything, so
+      // just skip this one.
+      if (!PrevI->definesRegister(BaseReg))
+        continue;
+
+      MachineInstr *AddrMI = PrevI;
+      bool OptOk = true;
+      // Examine the instruction that calculates the jumptable entry address.
+      // Make sure it only defines the base register and kills any uses
+      // other than the index register.
+      for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+        const MachineOperand &MO = AddrMI->getOperand(k);
+        if (!MO.isReg() || !MO.getReg())
+          continue;
+        if (MO.isDef() && MO.getReg() != BaseReg) {
+          OptOk = false;
+          break;
+        }
+        if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
+          OptOk = false;
+          break;
+        }
+      }
+      if (!OptOk)
+        continue;
+
+      // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+      // that gave us the initial base register definition.
+      for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+        ;
+
+      // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
+      // to delete it as well.
+      MachineInstr *LeaMI = PrevI;
+      if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+           LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+          LeaMI->getOperand(0).getReg() != BaseReg)
+        OptOk = false;
+
+      if (!OptOk)
+        continue;
+
+      unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+      MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+        .addReg(IdxReg, getKillRegState(IdxRegKill))
+        .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+        .addImm(MI->getOperand(JTOpIdx+1).getImm());
+      // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
+      // is 2-byte aligned. For now, asm printer will fix it up.
+      unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+      unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
+      OrigSize += TII->GetInstSizeInBytes(LeaMI);
+      OrigSize += TII->GetInstSizeInBytes(MI);
+
+      AddrMI->eraseFromParent();
+      LeaMI->eraseFromParent();
+      MI->eraseFromParent();
+
+      int delta = OrigSize - NewSize;
+      BBSizes[MBB->getNumber()] -= delta;
+      AdjustBBOffsetsAfter(MBB, -delta);
+
+      ++NumTBs;
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  if (MJTI == 0) return false;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+    MachineInstr *MI = T2JumpTables[i];
+    const MCInstrDesc &MCID = MI->getDesc();
+    unsigned NumOps = MCID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+    MachineOperand JTOP = MI->getOperand(JTOpIdx);
+    unsigned JTI = JTOP.getIndex();
+    assert(JTI < JT.size());
+
+    // We prefer if target blocks for the jump table come after the jump
+    // instruction so we can use TB[BH]. Loop through the target blocks
+    // and try to adjust them such that that's true.
+    int JTNumber = MI->getParent()->getNumber();
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+      MachineBasicBlock *MBB = JTBBs[j];
+      int DTNumber = MBB->getNumber();
+
+      if (DTNumber < JTNumber) {
+        // The destination precedes the switch. Try to move the block forward
+        // so we have a positive offset.
+        MachineBasicBlock *NewBB =
+          AdjustJTTargetBlockForward(MBB, MI->getParent());
+        if (NewBB)
+          MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+        MadeChange = true;
+      }
+    }
+  }
+
+  return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+  MachineFunction &MF = *BB->getParent();
+
+  // If the destination block is terminated by an unconditional branch,
+  // try to move it; otherwise, create a new block following the jump
+  // table that branches back to the actual target. This is a very simple
+  // heuristic. FIXME: We can definitely improve it.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  SmallVector<MachineOperand, 4> CondPrior;
+  MachineFunction::iterator BBi = BB;
+  MachineFunction::iterator OldPrior = prior(BBi);
+
+  // If the block terminator isn't analyzable, don't try to move the block
+  bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+  // If the block ends in an unconditional branch, move it. The prior block
+  // has to have an analyzable terminator for us to move this one. Be paranoid
+  // and make sure we're not trying to move the entry block of the function.
+  if (!B && Cond.empty() && BB != MF.begin() &&
+      !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+    BB->moveAfter(JTBB);
+    OldPrior->updateTerminator();
+    BB->updateTerminator();
+    // Update numbering to account for the block being moved.
+    MF.RenumberBlocks();
+    ++NumJTMoved;
+    return NULL;
+  }
+
+  // Create a new MBB for the code after the jump BB.
+  MachineBasicBlock *NewBB =
+    MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+  MachineFunction::iterator MBBI = JTBB; ++MBBI;
+  MF.insert(MBBI, NewBB);
+
+  // Add an unconditional branch from NewBB to BB.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond directly to anything in the source.
+  assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+  BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  MF.RenumberBlocks(NewBB);
+
+  // Update the CFG.
+  NewBB->addSuccessor(BB);
+  JTBB->removeSuccessor(BB);
+  JTBB->addSuccessor(NewBB);
+
+  ++NumJTInserted;
+  return NewBB;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 000000000000..165a1d849ad5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,130 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
+                                           ARMCP::ARMCPKind K,
+                                           unsigned char PCAdj,
+                                           ARMCP::ARMCPModifier Modif,
+                                           bool AddCA)
+  : MachineConstantPoolValue((const Type*)cval->getType()),
+    CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
+    Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
+                                           const char *s, unsigned id,
+                                           unsigned char PCAdj,
+                                           ARMCP::ARMCPModifier Modif,
+                                           bool AddCA)
+  : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
+    CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
+    PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
+                                           ARMCP::ARMCPModifier Modif)
+  : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
+    CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
+    Modifier(Modif), AddCurrentAddress(false) {}
+
+const GlobalValue *ARMConstantPoolValue::getGV() const {
+  return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
+  return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+static bool CPV_streq(const char *S1, const char *S2) {
+  if (S1 == S2)
+    return true;
+  if (S1 && S2 && strcmp(S1, S2) == 0)
+    return true;
+  return false;
+}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+                                                    unsigned Alignment) {
+  unsigned AlignMask = Alignment - 1;
+  const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    if (Constants[i].isMachineConstantPoolEntry() &&
+        (Constants[i].getAlignment() & AlignMask) == 0) {
+      ARMConstantPoolValue *CPV =
+        (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+      if (CPV->CVal == CVal &&
+          CPV->LabelId == LabelId &&
+          CPV->PCAdjust == PCAdjust &&
+          CPV_streq(CPV->S, S) &&
+          CPV->Modifier == Modifier)
+        return i;
+    }
+  }
+
+  return -1;
+}
+
+ARMConstantPoolValue::~ARMConstantPoolValue() {
+  free((void*)S);
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+  ID.AddPointer(CVal);
+  ID.AddPointer(S);
+  ID.AddInteger(LabelId);
+  ID.AddInteger(PCAdjust);
+}
+
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+  if (ACPV->Kind == Kind &&
+      ACPV->CVal == CVal &&
+      ACPV->PCAdjust == PCAdjust &&
+      CPV_streq(ACPV->S, S) &&
+      ACPV->Modifier == Modifier) {
+    if (ACPV->LabelId == LabelId)
+      return true;
+    // Two PC relative constpool entries containing the same GV address or
+    // external symbols. FIXME: What about blockaddress?
+    if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+      return true;
+  }
+  return false;
+}
+
+void ARMConstantPoolValue::dump() const {
+  errs() << "  " << *this;
+}
+
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+  if (CVal)
+    O << CVal->getName();
+  else
+    O << S;
+  if (Modifier) O << "(" << getModifierText() << ")";
+  if (PCAdjust != 0) {
+    O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+    if (AddCurrentAddress) O << "-.";
+    O << ")";
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 000000000000..d008811c40e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,122 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class Constant;
+class BlockAddress;
+class GlobalValue;
+class LLVMContext;
+
+namespace ARMCP {
+  enum ARMCPKind {
+    CPValue,
+    CPExtSymbol,
+    CPBlockAddress,
+    CPLSDA
+  };
+
+  enum ARMCPModifier {
+    no_modifier,
+    TLSGD,
+    GOT,
+    GOTOFF,
+    GOTTPOFF,
+    TPOFF
+  };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+  const Constant *CVal;    // Constant being loaded.
+  const char *S;           // ExtSymbol being loaded.
+  unsigned LabelId;        // Label id of the load.
+  ARMCP::ARMCPKind Kind;   // Kind of constant.
+  unsigned char PCAdjust;  // Extra adjustment if constantpool is pc-relative.
+                           // 8 for ARM, 4 for Thumb.
+  ARMCP::ARMCPModifier Modifier;   // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+  bool AddCurrentAddress;
+
+public:
+  ARMConstantPoolValue(const Constant *cval, unsigned id,
+                       ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+                       unsigned char PCAdj = 0,
+                       ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+                       bool AddCurrentAddress = false);
+  ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
+                       unsigned char PCAdj = 0,
+                       ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+                       bool AddCurrentAddress = false);
+  ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
+  ARMConstantPoolValue();
+  ~ARMConstantPoolValue();
+
+  const GlobalValue *getGV() const;
+  const char *getSymbol() const { return S; }
+  const BlockAddress *getBlockAddress() const;
+  ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+  const char *getModifierText() const {
+    switch (Modifier) {
+    default: llvm_unreachable("Unknown modifier!");
+    // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+    // strings if that's legal.
+    case ARMCP::no_modifier: return "none";
+    case ARMCP::TLSGD:       return "tlsgd";
+    case ARMCP::GOT:         return "GOT";
+    case ARMCP::GOTOFF:      return "GOTOFF";
+    case ARMCP::GOTTPOFF:    return "gottpoff";
+    case ARMCP::TPOFF:       return "tpoff";
+    }
+  }
+  bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+  bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+  unsigned getLabelId() const { return LabelId; }
+  unsigned char getPCAdjustment() const { return PCAdjust; }
+  bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+  bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+  bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
+  bool isLSDA() { return Kind == ARMCP::CPLSDA; }
+
+  virtual unsigned getRelocationInfo() const { return 2; }
+
+  virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+                                        unsigned Alignment);
+
+  virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+  /// hasSameValue - Return true if this ARM constpool value
+  /// can share the same constantpool entry as another ARM constpool value.
+  bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+  void print(raw_ostream *O) const { if (O) print(*O); }
+  void print(raw_ostream &O) const;
+  void dump() const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+  V.print(O);
+  return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
new file mode 100644
index 000000000000..51e68b4553ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMELFWriterInfo.h"
+#include "ARMRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the ARMELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+                        TM.getTargetData()->isLittleEndian()) {
+}
+
+ARMELFWriterInfo::~ARMELFWriterInfo() {}
+
+unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  switch (MachineRelTy) {
+  case ARM::reloc_arm_absolute:
+  case ARM::reloc_arm_relative:
+  case ARM::reloc_arm_cp_entry:
+  case ARM::reloc_arm_vfp_cp_entry:
+  case ARM::reloc_arm_machine_cp_entry:
+  case ARM::reloc_arm_jt_base:
+  case ARM::reloc_arm_pic_jt:
+    assert(0 && "unsupported ARM relocation type"); break;
+    
+  case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
+  case ARM::reloc_arm_movt:   return ELF::R_ARM_MOVT_ABS; break;
+  case ARM::reloc_arm_movw:   return ELF::R_ARM_MOVW_ABS_NC; break;
+  default:
+    llvm_unreachable("unknown ARM relocation type"); break;
+  }
+  return 0;
+}
+
+long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
+  return 0;
+}
+
+unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
+  return 0;
+}
+
+bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
+  return 1;
+}
+
+unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  assert(0 &&
+         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+  return 0;
+}
+
+long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                             unsigned RelOffset,
+                                             unsigned RelTy) const {
+  assert(0 &&
+         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
new file mode 100644
index 000000000000..1c4e5329ac61
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_WRITER_INFO_H
+#define ARM_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class ARMELFWriterInfo : public TargetELFWriterInfo {
+  public:
+    ARMELFWriterInfo(TargetMachine &TM);
+    virtual ~ARMELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // ARM_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..94b72fdb9a7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,1326 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+using namespace llvm;
+
+namespace {
+  class ARMExpandPseudo : public MachineFunctionPass {
+  public:
+    static char ID;
+    ARMExpandPseudo() : MachineFunctionPass(ID) {}
+
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
+    ARMFunctionInfo *AFI;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM pseudo instruction expansion pass";
+    }
+
+  private:
+    void TransferImpOps(MachineInstr &OldMI,
+                        MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+    bool ExpandMI(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MBBI);
+    bool ExpandMBB(MachineBasicBlock &MBB);
+    void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+    void ExpandVST(MachineBasicBlock::iterator &MBBI);
+    void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+    void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+                    unsigned Opc, bool IsExt, unsigned NumRegs);
+    void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI);
+  };
+  char ARMExpandPseudo::ID = 0;
+}
+
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+                                     MachineInstrBuilder &UseMI,
+                                     MachineInstrBuilder &DefMI) {
+  const MCInstrDesc &Desc = OldMI.getDesc();
+  for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = OldMI.getOperand(i);
+    assert(MO.isReg() && MO.getReg());
+    if (MO.isUse())
+      UseMI.addOperand(MO);
+    else
+      DefMI.addOperand(MO);
+  }
+}
+
+namespace {
+  // Constants for register spacing in NEON load/store instructions.
+  // For quad-register load-lane and store-lane pseudo instructors, the
+  // spacing is initially assumed to be EvenDblSpc, and that is changed to
+  // OddDblSpc depending on the lane number operand.
+  enum NEONRegSpacing {
+    SingleSpc,
+    EvenDblSpc,
+    OddDblSpc
+  };
+
+  // Entries for NEON load/store information table.  The table is sorted by
+  // PseudoOpc for fast binary-search lookups.
+  struct NEONLdStTableEntry {
+    unsigned PseudoOpc;
+    unsigned RealOpc;
+    bool IsLoad;
+    bool HasWriteBack;
+    NEONRegSpacing RegSpacing;
+    unsigned char NumRegs; // D registers loaded or stored
+    unsigned char RegElts; // elements per D register; used for lane ops
+
+    // Comparison methods for binary search of the table.
+    bool operator<(const NEONLdStTableEntry &TE) const {
+      return PseudoOpc < TE.PseudoOpc;
+    }
+    friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+      return TE.PseudoOpc < PseudoOpc;
+    }
+    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+                                                const NEONLdStTableEntry &TE) {
+      return PseudoOpc < TE.PseudoOpc;
+    }
+  };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true,  SingleSpc, 2, 4},
+{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true,  SingleSpc, 2, 2},
+{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, SingleSpc, 2, 8},
+{ ARM::VLD1DUPq8Pseudo_UPD,  ARM::VLD1DUPq8_UPD,  true, true,  SingleSpc, 2, 8},
+
+{ ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true,  EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true,  EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq8Pseudo,      ARM::VLD1LNd8,      true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD,  true, true,  EvenDblSpc, 1, 8 },
+
+{ ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, SingleSpc,  4, 1 },
+{ ARM::VLD1d64QPseudo_UPD,  ARM::VLD1d64Q_UPD, true,  true,  SingleSpc,  4, 1 },
+{ ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, SingleSpc,  3, 1 },
+{ ARM::VLD1d64TPseudo_UPD,  ARM::VLD1d64T_UPD, true,  true,  SingleSpc,  3, 1 },
+
+{ ARM::VLD1q16Pseudo,       ARM::VLD1q16,      true,  false, SingleSpc,  2, 4 },
+{ ARM::VLD1q16Pseudo_UPD,   ARM::VLD1q16_UPD,  true,  true,  SingleSpc,  2, 4 },
+{ ARM::VLD1q32Pseudo,       ARM::VLD1q32,      true,  false, SingleSpc,  2, 2 },
+{ ARM::VLD1q32Pseudo_UPD,   ARM::VLD1q32_UPD,  true,  true,  SingleSpc,  2, 2 },
+{ ARM::VLD1q64Pseudo,       ARM::VLD1q64,      true,  false, SingleSpc,  2, 1 },
+{ ARM::VLD1q64Pseudo_UPD,   ARM::VLD1q64_UPD,  true,  true,  SingleSpc,  2, 1 },
+{ ARM::VLD1q8Pseudo,        ARM::VLD1q8,       true,  false, SingleSpc,  2, 8 },
+{ ARM::VLD1q8Pseudo_UPD,    ARM::VLD1q8_UPD,   true,  true,  SingleSpc,  2, 8 },
+
+{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true,  SingleSpc, 2, 4},
+{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true,  SingleSpc, 2, 2},
+{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, SingleSpc, 2, 8},
+{ ARM::VLD2DUPd8Pseudo_UPD,  ARM::VLD2DUPd8_UPD,  true, true,  SingleSpc, 2, 8},
+
+{ ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, SingleSpc,  2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true,  SingleSpc,  2, 4 },
+{ ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, SingleSpc,  2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true,  SingleSpc,  2, 2 },
+{ ARM::VLD2LNd8Pseudo,      ARM::VLD2LNd8,      true, false, SingleSpc,  2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD,  ARM::VLD2LNd8_UPD,  true, true,  SingleSpc,  2, 8 },
+{ ARM::VLD2LNq16Pseudo,     ARM::VLD2LNq16,     true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true,  EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo,     ARM::VLD2LNq32,     true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true,  EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo,       ARM::VLD2d16,      true,  false, SingleSpc,  2, 4 },
+{ ARM::VLD2d16Pseudo_UPD,   ARM::VLD2d16_UPD,  true,  true,  SingleSpc,  2, 4 },
+{ ARM::VLD2d32Pseudo,       ARM::VLD2d32,      true,  false, SingleSpc,  2, 2 },
+{ ARM::VLD2d32Pseudo_UPD,   ARM::VLD2d32_UPD,  true,  true,  SingleSpc,  2, 2 },
+{ ARM::VLD2d8Pseudo,        ARM::VLD2d8,       true,  false, SingleSpc,  2, 8 },
+{ ARM::VLD2d8Pseudo_UPD,    ARM::VLD2d8_UPD,   true,  true,  SingleSpc,  2, 8 },
+
+{ ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, SingleSpc,  4, 4 },
+{ ARM::VLD2q16Pseudo_UPD,   ARM::VLD2q16_UPD,  true,  true,  SingleSpc,  4, 4 },
+{ ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, SingleSpc,  4, 2 },
+{ ARM::VLD2q32Pseudo_UPD,   ARM::VLD2q32_UPD,  true,  true,  SingleSpc,  4, 2 },
+{ ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, SingleSpc,  4, 8 },
+{ ARM::VLD2q8Pseudo_UPD,    ARM::VLD2q8_UPD,   true,  true,  SingleSpc,  4, 8 },
+
+{ ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true,  SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo,     ARM::VLD3DUPd32,     true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true,  SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo,      ARM::VLD3DUPd8,      true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD,  ARM::VLD3DUPd8_UPD,  true, true,  SingleSpc, 3, 8},
+
+{ ARM::VLD3LNd16Pseudo,     ARM::VLD3LNd16,     true, false, SingleSpc,  3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true,  SingleSpc,  3, 4 },
+{ ARM::VLD3LNd32Pseudo,     ARM::VLD3LNd32,     true, false, SingleSpc,  3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true,  SingleSpc,  3, 2 },
+{ ARM::VLD3LNd8Pseudo,      ARM::VLD3LNd8,      true, false, SingleSpc,  3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD,  ARM::VLD3LNd8_UPD,  true, true,  SingleSpc,  3, 8 },
+{ ARM::VLD3LNq16Pseudo,     ARM::VLD3LNq16,     true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true,  EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo,     ARM::VLD3LNq32,     true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true,  EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo,       ARM::VLD3d16,      true,  false, SingleSpc,  3, 4 },
+{ ARM::VLD3d16Pseudo_UPD,   ARM::VLD3d16_UPD,  true,  true,  SingleSpc,  3, 4 },
+{ ARM::VLD3d32Pseudo,       ARM::VLD3d32,      true,  false, SingleSpc,  3, 2 },
+{ ARM::VLD3d32Pseudo_UPD,   ARM::VLD3d32_UPD,  true,  true,  SingleSpc,  3, 2 },
+{ ARM::VLD3d8Pseudo,        ARM::VLD3d8,       true,  false, SingleSpc,  3, 8 },
+{ ARM::VLD3d8Pseudo_UPD,    ARM::VLD3d8_UPD,   true,  true,  SingleSpc,  3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD,    ARM::VLD3q16_UPD, true,  true,  EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo,     ARM::VLD3q16,     true,  false, OddDblSpc,  3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true,  true,  OddDblSpc,  3, 4 },
+{ ARM::VLD3q32Pseudo_UPD,    ARM::VLD3q32_UPD, true,  true,  EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo,     ARM::VLD3q32,     true,  false, OddDblSpc,  3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true,  true,  OddDblSpc,  3, 2 },
+{ ARM::VLD3q8Pseudo_UPD,     ARM::VLD3q8_UPD,  true,  true,  EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo,      ARM::VLD3q8,      true,  false, OddDblSpc,  3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD,  ARM::VLD3q8_UPD,  true,  true,  OddDblSpc,  3, 8 },
+
+{ ARM::VLD4DUPd16Pseudo,     ARM::VLD4DUPd16,     true, false, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true,  SingleSpc, 4, 4},
+{ ARM::VLD4DUPd32Pseudo,     ARM::VLD4DUPd32,     true, false, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true,  SingleSpc, 4, 2},
+{ ARM::VLD4DUPd8Pseudo,      ARM::VLD4DUPd8,      true, false, SingleSpc, 4, 8},
+{ ARM::VLD4DUPd8Pseudo_UPD,  ARM::VLD4DUPd8_UPD,  true, true,  SingleSpc, 4, 8},
+
+{ ARM::VLD4LNd16Pseudo,     ARM::VLD4LNd16,     true, false, SingleSpc,  4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true,  SingleSpc,  4, 4 },
+{ ARM::VLD4LNd32Pseudo,     ARM::VLD4LNd32,     true, false, SingleSpc,  4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true,  SingleSpc,  4, 2 },
+{ ARM::VLD4LNd8Pseudo,      ARM::VLD4LNd8,      true, false, SingleSpc,  4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD,  ARM::VLD4LNd8_UPD,  true, true,  SingleSpc,  4, 8 },
+{ ARM::VLD4LNq16Pseudo,     ARM::VLD4LNq16,     true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true,  EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo,     ARM::VLD4LNq32,     true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true,  EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo,       ARM::VLD4d16,      true,  false, SingleSpc,  4, 4 },
+{ ARM::VLD4d16Pseudo_UPD,   ARM::VLD4d16_UPD,  true,  true,  SingleSpc,  4, 4 },
+{ ARM::VLD4d32Pseudo,       ARM::VLD4d32,      true,  false, SingleSpc,  4, 2 },
+{ ARM::VLD4d32Pseudo_UPD,   ARM::VLD4d32_UPD,  true,  true,  SingleSpc,  4, 2 },
+{ ARM::VLD4d8Pseudo,        ARM::VLD4d8,       true,  false, SingleSpc,  4, 8 },
+{ ARM::VLD4d8Pseudo_UPD,    ARM::VLD4d8_UPD,   true,  true,  SingleSpc,  4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD,    ARM::VLD4q16_UPD, true,  true,  EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo,     ARM::VLD4q16,     true,  false, OddDblSpc,  4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true,  true,  OddDblSpc,  4, 4 },
+{ ARM::VLD4q32Pseudo_UPD,    ARM::VLD4q32_UPD, true,  true,  EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo,     ARM::VLD4q32,     true,  false, OddDblSpc,  4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true,  true,  OddDblSpc,  4, 2 },
+{ ARM::VLD4q8Pseudo_UPD,     ARM::VLD4q8_UPD,  true,  true,  EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo,      ARM::VLD4q8,      true,  false, OddDblSpc,  4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD,  ARM::VLD4q8_UPD,  true,  true,  OddDblSpc,  4, 8 },
+
+{ ARM::VST1LNq16Pseudo,     ARM::VST1LNd16,    false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true,  EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo,     ARM::VST1LNd32,    false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true,  EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true,  EvenDblSpc, 1, 8 },
+
+{ ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, SingleSpc,  4, 1 },
+{ ARM::VST1d64QPseudo_UPD,  ARM::VST1d64Q_UPD, false, true,  SingleSpc,  4, 1 },
+{ ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, SingleSpc,  3, 1 },
+{ ARM::VST1d64TPseudo_UPD,  ARM::VST1d64T_UPD, false, true,  SingleSpc,  3, 1 },
+
+{ ARM::VST1q16Pseudo,       ARM::VST1q16,      false, false, SingleSpc,  2, 4 },
+{ ARM::VST1q16Pseudo_UPD,   ARM::VST1q16_UPD,  false, true,  SingleSpc,  2, 4 },
+{ ARM::VST1q32Pseudo,       ARM::VST1q32,      false, false, SingleSpc,  2, 2 },
+{ ARM::VST1q32Pseudo_UPD,   ARM::VST1q32_UPD,  false, true,  SingleSpc,  2, 2 },
+{ ARM::VST1q64Pseudo,       ARM::VST1q64,      false, false, SingleSpc,  2, 1 },
+{ ARM::VST1q64Pseudo_UPD,   ARM::VST1q64_UPD,  false, true,  SingleSpc,  2, 1 },
+{ ARM::VST1q8Pseudo,        ARM::VST1q8,       false, false, SingleSpc,  2, 8 },
+{ ARM::VST1q8Pseudo_UPD,    ARM::VST1q8_UPD,   false, true,  SingleSpc,  2, 8 },
+
+{ ARM::VST2LNd16Pseudo,     ARM::VST2LNd16,     false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true,  SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo,     ARM::VST2LNd32,     false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true,  SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo,      ARM::VST2LNd8,      false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD,  ARM::VST2LNd8_UPD,  false, true,  SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo,     ARM::VST2LNq16,     false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true,  EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true,  EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo,       ARM::VST2d16,      false, false, SingleSpc,  2, 4 },
+{ ARM::VST2d16Pseudo_UPD,   ARM::VST2d16_UPD,  false, true,  SingleSpc,  2, 4 },
+{ ARM::VST2d32Pseudo,       ARM::VST2d32,      false, false, SingleSpc,  2, 2 },
+{ ARM::VST2d32Pseudo_UPD,   ARM::VST2d32_UPD,  false, true,  SingleSpc,  2, 2 },
+{ ARM::VST2d8Pseudo,        ARM::VST2d8,       false, false, SingleSpc,  2, 8 },
+{ ARM::VST2d8Pseudo_UPD,    ARM::VST2d8_UPD,   false, true,  SingleSpc,  2, 8 },
+
+{ ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, SingleSpc,  4, 4 },
+{ ARM::VST2q16Pseudo_UPD,   ARM::VST2q16_UPD,  false, true,  SingleSpc,  4, 4 },
+{ ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, SingleSpc,  4, 2 },
+{ ARM::VST2q32Pseudo_UPD,   ARM::VST2q32_UPD,  false, true,  SingleSpc,  4, 2 },
+{ ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, SingleSpc,  4, 8 },
+{ ARM::VST2q8Pseudo_UPD,    ARM::VST2q8_UPD,   false, true,  SingleSpc,  4, 8 },
+
+{ ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true,  SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo,     ARM::VST3LNd32,     false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true,  SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo,      ARM::VST3LNd8,      false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD,  ARM::VST3LNd8_UPD,  false, true,  SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo,     ARM::VST3LNq16,     false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true,  EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo,     ARM::VST3LNq32,     false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true,  EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo,       ARM::VST3d16,      false, false, SingleSpc,  3, 4 },
+{ ARM::VST3d16Pseudo_UPD,   ARM::VST3d16_UPD,  false, true,  SingleSpc,  3, 4 },
+{ ARM::VST3d32Pseudo,       ARM::VST3d32,      false, false, SingleSpc,  3, 2 },
+{ ARM::VST3d32Pseudo_UPD,   ARM::VST3d32_UPD,  false, true,  SingleSpc,  3, 2 },
+{ ARM::VST3d8Pseudo,        ARM::VST3d8,       false, false, SingleSpc,  3, 8 },
+{ ARM::VST3d8Pseudo_UPD,    ARM::VST3d8_UPD,   false, true,  SingleSpc,  3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD,    ARM::VST3q16_UPD, false, true,  EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo,     ARM::VST3q16,     false, false, OddDblSpc,  3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true,  OddDblSpc,  3, 4 },
+{ ARM::VST3q32Pseudo_UPD,    ARM::VST3q32_UPD, false, true,  EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo,     ARM::VST3q32,     false, false, OddDblSpc,  3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true,  OddDblSpc,  3, 2 },
+{ ARM::VST3q8Pseudo_UPD,     ARM::VST3q8_UPD,  false, true,  EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo,      ARM::VST3q8,      false, false, OddDblSpc,  3, 8 },
+{ ARM::VST3q8oddPseudo_UPD,  ARM::VST3q8_UPD,  false, true,  OddDblSpc,  3, 8 },
+
+{ ARM::VST4LNd16Pseudo,     ARM::VST4LNd16,     false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true,  SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo,     ARM::VST4LNd32,     false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true,  SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo,      ARM::VST4LNd8,      false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD,  ARM::VST4LNd8_UPD,  false, true,  SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo,     ARM::VST4LNq16,     false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true,  EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo,     ARM::VST4LNq32,     false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true,  EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo,       ARM::VST4d16,      false, false, SingleSpc,  4, 4 },
+{ ARM::VST4d16Pseudo_UPD,   ARM::VST4d16_UPD,  false, true,  SingleSpc,  4, 4 },
+{ ARM::VST4d32Pseudo,       ARM::VST4d32,      false, false, SingleSpc,  4, 2 },
+{ ARM::VST4d32Pseudo_UPD,   ARM::VST4d32_UPD,  false, true,  SingleSpc,  4, 2 },
+{ ARM::VST4d8Pseudo,        ARM::VST4d8,       false, false, SingleSpc,  4, 8 },
+{ ARM::VST4d8Pseudo_UPD,    ARM::VST4d8_UPD,   false, true,  SingleSpc,  4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD,    ARM::VST4q16_UPD, false, true,  EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo,     ARM::VST4q16,     false, false, OddDblSpc,  4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true,  OddDblSpc,  4, 4 },
+{ ARM::VST4q32Pseudo_UPD,    ARM::VST4q32_UPD, false, true,  EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo,     ARM::VST4q32,     false, false, OddDblSpc,  4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true,  OddDblSpc,  4, 2 },
+{ ARM::VST4q8Pseudo_UPD,     ARM::VST4q8_UPD,  false, true,  EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo,      ARM::VST4q8,      false, false, OddDblSpc,  4, 8 },
+{ ARM::VST4q8oddPseudo_UPD,  ARM::VST4q8_UPD,  false, true,  OddDblSpc,  4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+  unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+  // Make sure the table is sorted.
+  static bool TableChecked = false;
+  if (!TableChecked) {
+    for (unsigned i = 0; i != NumEntries-1; ++i)
+      assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+             "NEONLdStTable is not sorted!");
+    TableChecked = true;
+  }
+#endif
+
+  const NEONLdStTableEntry *I =
+    std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+  if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+    return I;
+  return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing.  Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+                        const TargetRegisterInfo *TRI, unsigned &D0,
+                        unsigned &D1, unsigned &D2, unsigned &D3) {
+  if (RegSpc == SingleSpc) {
+    D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+  } else if (RegSpc == EvenDblSpc) {
+    D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+  } else {
+    assert(RegSpc == OddDblSpc && "unknown register spacing");
+    D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+  }
+}
+
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
+  unsigned OpIdx = 0;
+
+  bool DstIsDead = MI.getOperand(OpIdx).isDead();
+  unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+  unsigned D0, D1, D2, D3;
+  GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+  MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+  if (NumRegs > 2)
+    MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+  if (NumRegs > 3)
+    MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the addrmode6 operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // For an instruction writing double-spaced subregs, the pseudo instruction
+  // has an extra operand that is a use of the super-register.  Record the
+  // operand index and skip over it.
+  unsigned SrcOpIdx = 0;
+  if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+    SrcOpIdx = OpIdx++;
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the super-register source operand used for double-spaced subregs over
+  // to the new instruction as an implicit operand.
+  if (SrcOpIdx != 0) {
+    MachineOperand MO = MI.getOperand(SrcOpIdx);
+    MO.setImplicit(true);
+    MIB.addOperand(MO);
+  }
+  // Add an implicit def for the super-register.
+  MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+  TransferImpOps(MI, MIB, MIB);
+
+  // Transfer memoperands.
+  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
+  unsigned OpIdx = 0;
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the addrmode6 operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+  unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+  unsigned D0, D1, D2, D3;
+  GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
+  MIB.addReg(D0).addReg(D1);
+  if (NumRegs > 2)
+    MIB.addReg(D2);
+  if (NumRegs > 3)
+    MIB.addReg(D3);
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  if (SrcIsKill) // Add an implicit kill for the super-reg.
+    MIB->addRegisterKilled(SrcReg, TRI, true);
+  TransferImpOps(MI, MIB, MIB);
+
+  // Transfer memoperands.
+  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+  unsigned RegElts = TableEntry->RegElts;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
+  unsigned OpIdx = 0;
+  // The lane operand is always the 3rd from last operand, before the 2
+  // predicate operands.
+  unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+  // Adjust the lane and spacing as needed for Q registers.
+  assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+  if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+    RegSpc = OddDblSpc;
+    Lane -= RegElts;
+  }
+  assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+  unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+  unsigned DstReg = 0;
+  bool DstIsDead = false;
+  if (TableEntry->IsLoad) {
+    DstIsDead = MI.getOperand(OpIdx).isDead();
+    DstReg = MI.getOperand(OpIdx++).getReg();
+    GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+    MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 1)
+      MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 2)
+      MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 3)
+      MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+  }
+
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the addrmode6 operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Grab the super-register source.
+  MachineOperand MO = MI.getOperand(OpIdx++);
+  if (!TableEntry->IsLoad)
+    GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+  // Add the subregs as sources of the new instruction.
+  unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+                       getKillRegState(MO.isKill()));
+  MIB.addReg(D0, SrcFlags);
+  if (NumRegs > 1)
+    MIB.addReg(D1, SrcFlags);
+  if (NumRegs > 2)
+    MIB.addReg(D2, SrcFlags);
+  if (NumRegs > 3)
+    MIB.addReg(D3, SrcFlags);
+
+  // Add the lane number operand.
+  MIB.addImm(Lane);
+  OpIdx += 1;
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the super-register source to be an implicit source.
+  MO.setImplicit(true);
+  MIB.addOperand(MO);
+  if (TableEntry->IsLoad)
+    // Add an implicit def for the super-register.
+    MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+                                 unsigned Opc, bool IsExt, unsigned NumRegs) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+  unsigned OpIdx = 0;
+
+  // Transfer the destination register operand.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  if (IsExt)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+  unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+  unsigned D0, D1, D2, D3;
+  GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+  MIB.addReg(D0).addReg(D1);
+  if (NumRegs > 2)
+    MIB.addReg(D2);
+  if (NumRegs > 3)
+    MIB.addReg(D3);
+
+  // Copy the other source register operand.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  if (SrcIsKill)  // Add an implicit kill for the super-reg.
+    MIB->addRegisterKilled(SrcReg, TRI, true);
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+  const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+  MachineInstrBuilder LO16, HI16;
+
+  if (!STI->hasV6T2Ops() &&
+      (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+    // Expand into a movi + orr.
+    LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+    HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+      .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstReg);
+
+    assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+    unsigned ImmVal = (unsigned)MO.getImm();
+    unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+    unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+    LO16 = LO16.addImm(SOImmValV1);
+    HI16 = HI16.addImm(SOImmValV2);
+    LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    LO16.addImm(Pred).addReg(PredReg).addReg(0);
+    HI16.addImm(Pred).addReg(PredReg).addReg(0);
+    TransferImpOps(MI, LO16, HI16);
+    MI.eraseFromParent();
+    return;
+  }
+
+  unsigned LO16Opc = 0;
+  unsigned HI16Opc = 0;
+  if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+    LO16Opc = ARM::t2MOVi16;
+    HI16Opc = ARM::t2MOVTi16;
+  } else {
+    LO16Opc = ARM::MOVi16;
+    HI16Opc = ARM::MOVTi16;
+  }
+
+  LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+  HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+    .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstReg);
+
+  if (MO.isImm()) {
+    unsigned Imm = MO.getImm();
+    unsigned Lo16 = Imm & 0xffff;
+    unsigned Hi16 = (Imm >> 16) & 0xffff;
+    LO16 = LO16.addImm(Lo16);
+    HI16 = HI16.addImm(Hi16);
+  } else {
+    const GlobalValue *GV = MO.getGlobal();
+    unsigned TF = MO.getTargetFlags();
+    LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+    HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+  }
+
+  LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  LO16.addImm(Pred).addReg(PredReg);
+  HI16.addImm(Pred).addReg(PredReg);
+
+  TransferImpOps(MI, LO16, HI16);
+  MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+    default:
+      return false;
+    case ARM::VMOVScc:
+    case ARM::VMOVDcc: {
+      unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
+              MI.getOperand(1).getReg())
+        .addReg(MI.getOperand(2).getReg(),
+                getKillRegState(MI.getOperand(2).isKill()))
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg());
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::t2MOVCCr:
+    case ARM::MOVCCr: {
+      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+              MI.getOperand(1).getReg())
+        .addReg(MI.getOperand(2).getReg(),
+                getKillRegState(MI.getOperand(2).isKill()))
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MOVCCs: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+              (MI.getOperand(1).getReg()))
+        .addReg(MI.getOperand(2).getReg(),
+                getKillRegState(MI.getOperand(2).isKill()))
+        .addReg(MI.getOperand(3).getReg(),
+                getKillRegState(MI.getOperand(3).isKill()))
+        .addImm(MI.getOperand(4).getImm())
+        .addImm(MI.getOperand(5).getImm()) // 'pred'
+        .addReg(MI.getOperand(6).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MOVCCi16: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+              MI.getOperand(1).getReg())
+        .addImm(MI.getOperand(2).getImm())
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg());
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::t2MOVCCi:
+    case ARM::MOVCCi: {
+      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
+              MI.getOperand(1).getReg())
+        .addImm(MI.getOperand(2).getImm())
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MVNCCi: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+              MI.getOperand(1).getReg())
+        .addImm(MI.getOperand(2).getImm())
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::Int_eh_sjlj_dispatchsetup: {
+      MachineFunction &MF = *MI.getParent()->getParent();
+      const ARMBaseInstrInfo *AII =
+        static_cast<const ARMBaseInstrInfo*>(TII);
+      const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+      // For functions using a base pointer, we rematerialize it (via the frame
+      // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+      // for us. Otherwise, expand to nothing.
+      if (RI.hasBasePointer(MF)) {
+        int32_t NumBytes = AFI->getFramePtrSpillOffset();
+        unsigned FramePtr = RI.getFrameRegister(MF);
+        assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+               "base pointer without frame pointer?");
+
+        if (AFI->isThumb2Function()) {
+          llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                       FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+        } else if (AFI->isThumbFunction()) {
+          llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                          FramePtr, -NumBytes, *TII, RI);
+        } else {
+          llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                        FramePtr, -NumBytes, ARMCC::AL, 0,
+                                        *TII);
+        }
+        // If there's dynamic realignment, adjust for it.
+        if (RI.needsStackRealignment(MF)) {
+          MachineFrameInfo  *MFI = MF.getFrameInfo();
+          unsigned MaxAlign = MFI->getMaxAlignment();
+          assert (!AFI->isThumb1OnlyFunction());
+          // Emit bic r6, r6, MaxAlign
+          unsigned bicOpc = AFI->isThumbFunction() ?
+            ARM::t2BICri : ARM::BICri;
+          AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                              TII->get(bicOpc), ARM::R6)
+                                      .addReg(ARM::R6, RegState::Kill)
+                                      .addImm(MaxAlign-1)));
+        }
+
+      }
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::MOVsrl_flag:
+    case ARM::MOVsra_flag: {
+      // These are just fancy MOVs insructions.
+      AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+                             MI.getOperand(0).getReg())
+                     .addOperand(MI.getOperand(1))
+                     .addReg(0)
+                     .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
+                                                  ARM_AM::lsr : ARM_AM::asr),
+                                                 1)))
+        .addReg(ARM::CPSR, RegState::Define);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::RRX: {
+      // This encodes as "MOVs Rd, Rm, rrx
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+                               MI.getOperand(0).getReg())
+                       .addOperand(MI.getOperand(1))
+                       .addOperand(MI.getOperand(1))
+                       .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+        .addReg(0);
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::tTPsoft:
+    case ARM::TPsoft: {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
+        .addExternalSymbol("__aeabi_read_tp", 0);
+
+      MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::tLDRpci_pic:
+    case ARM::t2LDRpci_pic: {
+      unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+        ? ARM::tLDRpci : ARM::t2LDRpci;
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      MachineInstrBuilder MIB1 =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(NewLdOpc), DstReg)
+                       .addOperand(MI.getOperand(1)));
+      MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(ARM::tPICADD))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(DstReg)
+        .addOperand(MI.getOperand(2));
+      TransferImpOps(MI, MIB1, MIB2);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::MOV_ga_dyn:
+    case ARM::MOV_ga_pcrel:
+    case ARM::MOV_ga_pcrel_ldr:
+    case ARM::t2MOV_ga_dyn:
+    case ARM::t2MOV_ga_pcrel: {
+      // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+      unsigned LabelId = AFI->createPICLabelUId();
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      const MachineOperand &MO1 = MI.getOperand(1);
+      const GlobalValue *GV = MO1.getGlobal();
+      unsigned TF = MO1.getTargetFlags();
+      bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn);
+      bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+      unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
+      unsigned LO16TF = isPIC
+        ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+      unsigned HI16TF = isPIC
+        ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+      unsigned PICAddOpc = isARM
+        ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+        : ARM::tPICADD;
+      MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(LO16Opc), DstReg)
+        .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+        .addImm(LabelId);
+      MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(HI16Opc), DstReg)
+        .addReg(DstReg)
+        .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+        .addImm(LabelId);
+      if (!isPIC) {
+        TransferImpOps(MI, MIB1, MIB2);
+        MI.eraseFromParent();
+        return true;
+      }
+
+      MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(PICAddOpc))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(DstReg).addImm(LabelId);
+      if (isARM) {
+        AddDefaultPred(MIB3);
+        if (Opcode == ARM::MOV_ga_pcrel_ldr)
+          MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      }
+      TransferImpOps(MI, MIB1, MIB3);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::MOVi32imm:
+    case ARM::MOVCCi32imm:
+    case ARM::t2MOVi32imm:
+    case ARM::t2MOVCCi32imm:
+      ExpandMOV32BitImm(MBB, MBBI);
+      return true;
+
+    case ARM::VMOVQQ: {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
+      unsigned OddDst  = TRI->getSubReg(DstReg, ARM::qsub_1);
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      bool SrcIsKill = MI.getOperand(1).isKill();
+      unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
+      unsigned OddSrc  = TRI->getSubReg(SrcReg, ARM::qsub_1);
+      MachineInstrBuilder Even =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VORRq))
+                       .addReg(EvenDst,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(EvenSrc, getKillRegState(SrcIsKill))
+                       .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+      MachineInstrBuilder Odd =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VORRq))
+                       .addReg(OddDst,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(OddSrc, getKillRegState(SrcIsKill))
+                       .addReg(OddSrc, getKillRegState(SrcIsKill)));
+      TransferImpOps(MI, Even, Odd);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VLDMQIA: {
+      unsigned NewOpc = ARM::VLDMDIA;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+
+      // Grab the Q register destination.
+      bool DstIsDead = MI.getOperand(OpIdx).isDead();
+      unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+      // Copy the source register.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Copy the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Add the destination operands (D subregs).
+      unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+      unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+      MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+      // Add an implicit def for the super-register.
+      MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VSTMQIA: {
+      unsigned NewOpc = ARM::VSTMDIA;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+
+      // Grab the Q register source.
+      bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+      unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+      // Copy the destination register.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Copy the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Add the source operands (D subregs).
+      unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+      unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+      MIB.addReg(D0).addReg(D1);
+
+      if (SrcIsKill)      // Add an implicit kill for the Q register.
+        MIB->addRegisterKilled(SrcReg, TRI, true);
+
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::VDUPfqf:
+    case ARM::VDUPfdf:{
+      unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q :
+        ARM::VDUPLN32d;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+      unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+                            Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
+                            &ARM::DPR_VFP2RegClass);
+      // The lane is [0,1] for the containing DReg superregister.
+      // Copy the dst/src register operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addReg(DReg);
+      ++OpIdx;
+      // Add the lane select operand.
+      MIB.addImm(Lane);
+      // Add the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VLD1q8Pseudo:
+    case ARM::VLD1q16Pseudo:
+    case ARM::VLD1q32Pseudo:
+    case ARM::VLD1q64Pseudo:
+    case ARM::VLD1q8Pseudo_UPD:
+    case ARM::VLD1q16Pseudo_UPD:
+    case ARM::VLD1q32Pseudo_UPD:
+    case ARM::VLD1q64Pseudo_UPD:
+    case ARM::VLD2d8Pseudo:
+    case ARM::VLD2d16Pseudo:
+    case ARM::VLD2d32Pseudo:
+    case ARM::VLD2q8Pseudo:
+    case ARM::VLD2q16Pseudo:
+    case ARM::VLD2q32Pseudo:
+    case ARM::VLD2d8Pseudo_UPD:
+    case ARM::VLD2d16Pseudo_UPD:
+    case ARM::VLD2d32Pseudo_UPD:
+    case ARM::VLD2q8Pseudo_UPD:
+    case ARM::VLD2q16Pseudo_UPD:
+    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD3d8Pseudo:
+    case ARM::VLD3d16Pseudo:
+    case ARM::VLD3d32Pseudo:
+    case ARM::VLD1d64TPseudo:
+    case ARM::VLD3d8Pseudo_UPD:
+    case ARM::VLD3d16Pseudo_UPD:
+    case ARM::VLD3d32Pseudo_UPD:
+    case ARM::VLD1d64TPseudo_UPD:
+    case ARM::VLD3q8Pseudo_UPD:
+    case ARM::VLD3q16Pseudo_UPD:
+    case ARM::VLD3q32Pseudo_UPD:
+    case ARM::VLD3q8oddPseudo:
+    case ARM::VLD3q16oddPseudo:
+    case ARM::VLD3q32oddPseudo:
+    case ARM::VLD3q8oddPseudo_UPD:
+    case ARM::VLD3q16oddPseudo_UPD:
+    case ARM::VLD3q32oddPseudo_UPD:
+    case ARM::VLD4d8Pseudo:
+    case ARM::VLD4d16Pseudo:
+    case ARM::VLD4d32Pseudo:
+    case ARM::VLD1d64QPseudo:
+    case ARM::VLD4d8Pseudo_UPD:
+    case ARM::VLD4d16Pseudo_UPD:
+    case ARM::VLD4d32Pseudo_UPD:
+    case ARM::VLD1d64QPseudo_UPD:
+    case ARM::VLD4q8Pseudo_UPD:
+    case ARM::VLD4q16Pseudo_UPD:
+    case ARM::VLD4q32Pseudo_UPD:
+    case ARM::VLD4q8oddPseudo:
+    case ARM::VLD4q16oddPseudo:
+    case ARM::VLD4q32oddPseudo:
+    case ARM::VLD4q8oddPseudo_UPD:
+    case ARM::VLD4q16oddPseudo_UPD:
+    case ARM::VLD4q32oddPseudo_UPD:
+    case ARM::VLD1DUPq8Pseudo:
+    case ARM::VLD1DUPq16Pseudo:
+    case ARM::VLD1DUPq32Pseudo:
+    case ARM::VLD1DUPq8Pseudo_UPD:
+    case ARM::VLD1DUPq16Pseudo_UPD:
+    case ARM::VLD1DUPq32Pseudo_UPD:
+    case ARM::VLD2DUPd8Pseudo:
+    case ARM::VLD2DUPd16Pseudo:
+    case ARM::VLD2DUPd32Pseudo:
+    case ARM::VLD2DUPd8Pseudo_UPD:
+    case ARM::VLD2DUPd16Pseudo_UPD:
+    case ARM::VLD2DUPd32Pseudo_UPD:
+    case ARM::VLD3DUPd8Pseudo:
+    case ARM::VLD3DUPd16Pseudo:
+    case ARM::VLD3DUPd32Pseudo:
+    case ARM::VLD3DUPd8Pseudo_UPD:
+    case ARM::VLD3DUPd16Pseudo_UPD:
+    case ARM::VLD3DUPd32Pseudo_UPD:
+    case ARM::VLD4DUPd8Pseudo:
+    case ARM::VLD4DUPd16Pseudo:
+    case ARM::VLD4DUPd32Pseudo:
+    case ARM::VLD4DUPd8Pseudo_UPD:
+    case ARM::VLD4DUPd16Pseudo_UPD:
+    case ARM::VLD4DUPd32Pseudo_UPD:
+      ExpandVLD(MBBI);
+      return true;
+
+    case ARM::VST1q8Pseudo:
+    case ARM::VST1q16Pseudo:
+    case ARM::VST1q32Pseudo:
+    case ARM::VST1q64Pseudo:
+    case ARM::VST1q8Pseudo_UPD:
+    case ARM::VST1q16Pseudo_UPD:
+    case ARM::VST1q32Pseudo_UPD:
+    case ARM::VST1q64Pseudo_UPD:
+    case ARM::VST2d8Pseudo:
+    case ARM::VST2d16Pseudo:
+    case ARM::VST2d32Pseudo:
+    case ARM::VST2q8Pseudo:
+    case ARM::VST2q16Pseudo:
+    case ARM::VST2q32Pseudo:
+    case ARM::VST2d8Pseudo_UPD:
+    case ARM::VST2d16Pseudo_UPD:
+    case ARM::VST2d32Pseudo_UPD:
+    case ARM::VST2q8Pseudo_UPD:
+    case ARM::VST2q16Pseudo_UPD:
+    case ARM::VST2q32Pseudo_UPD:
+    case ARM::VST3d8Pseudo:
+    case ARM::VST3d16Pseudo:
+    case ARM::VST3d32Pseudo:
+    case ARM::VST1d64TPseudo:
+    case ARM::VST3d8Pseudo_UPD:
+    case ARM::VST3d16Pseudo_UPD:
+    case ARM::VST3d32Pseudo_UPD:
+    case ARM::VST1d64TPseudo_UPD:
+    case ARM::VST3q8Pseudo_UPD:
+    case ARM::VST3q16Pseudo_UPD:
+    case ARM::VST3q32Pseudo_UPD:
+    case ARM::VST3q8oddPseudo:
+    case ARM::VST3q16oddPseudo:
+    case ARM::VST3q32oddPseudo:
+    case ARM::VST3q8oddPseudo_UPD:
+    case ARM::VST3q16oddPseudo_UPD:
+    case ARM::VST3q32oddPseudo_UPD:
+    case ARM::VST4d8Pseudo:
+    case ARM::VST4d16Pseudo:
+    case ARM::VST4d32Pseudo:
+    case ARM::VST1d64QPseudo:
+    case ARM::VST4d8Pseudo_UPD:
+    case ARM::VST4d16Pseudo_UPD:
+    case ARM::VST4d32Pseudo_UPD:
+    case ARM::VST1d64QPseudo_UPD:
+    case ARM::VST4q8Pseudo_UPD:
+    case ARM::VST4q16Pseudo_UPD:
+    case ARM::VST4q32Pseudo_UPD:
+    case ARM::VST4q8oddPseudo:
+    case ARM::VST4q16oddPseudo:
+    case ARM::VST4q32oddPseudo:
+    case ARM::VST4q8oddPseudo_UPD:
+    case ARM::VST4q16oddPseudo_UPD:
+    case ARM::VST4q32oddPseudo_UPD:
+      ExpandVST(MBBI);
+      return true;
+
+    case ARM::VLD1LNq8Pseudo:
+    case ARM::VLD1LNq16Pseudo:
+    case ARM::VLD1LNq32Pseudo:
+    case ARM::VLD1LNq8Pseudo_UPD:
+    case ARM::VLD1LNq16Pseudo_UPD:
+    case ARM::VLD1LNq32Pseudo_UPD:
+    case ARM::VLD2LNd8Pseudo:
+    case ARM::VLD2LNd16Pseudo:
+    case ARM::VLD2LNd32Pseudo:
+    case ARM::VLD2LNq16Pseudo:
+    case ARM::VLD2LNq32Pseudo:
+    case ARM::VLD2LNd8Pseudo_UPD:
+    case ARM::VLD2LNd16Pseudo_UPD:
+    case ARM::VLD2LNd32Pseudo_UPD:
+    case ARM::VLD2LNq16Pseudo_UPD:
+    case ARM::VLD2LNq32Pseudo_UPD:
+    case ARM::VLD3LNd8Pseudo:
+    case ARM::VLD3LNd16Pseudo:
+    case ARM::VLD3LNd32Pseudo:
+    case ARM::VLD3LNq16Pseudo:
+    case ARM::VLD3LNq32Pseudo:
+    case ARM::VLD3LNd8Pseudo_UPD:
+    case ARM::VLD3LNd16Pseudo_UPD:
+    case ARM::VLD3LNd32Pseudo_UPD:
+    case ARM::VLD3LNq16Pseudo_UPD:
+    case ARM::VLD3LNq32Pseudo_UPD:
+    case ARM::VLD4LNd8Pseudo:
+    case ARM::VLD4LNd16Pseudo:
+    case ARM::VLD4LNd32Pseudo:
+    case ARM::VLD4LNq16Pseudo:
+    case ARM::VLD4LNq32Pseudo:
+    case ARM::VLD4LNd8Pseudo_UPD:
+    case ARM::VLD4LNd16Pseudo_UPD:
+    case ARM::VLD4LNd32Pseudo_UPD:
+    case ARM::VLD4LNq16Pseudo_UPD:
+    case ARM::VLD4LNq32Pseudo_UPD:
+    case ARM::VST1LNq8Pseudo:
+    case ARM::VST1LNq16Pseudo:
+    case ARM::VST1LNq32Pseudo:
+    case ARM::VST1LNq8Pseudo_UPD:
+    case ARM::VST1LNq16Pseudo_UPD:
+    case ARM::VST1LNq32Pseudo_UPD:
+    case ARM::VST2LNd8Pseudo:
+    case ARM::VST2LNd16Pseudo:
+    case ARM::VST2LNd32Pseudo:
+    case ARM::VST2LNq16Pseudo:
+    case ARM::VST2LNq32Pseudo:
+    case ARM::VST2LNd8Pseudo_UPD:
+    case ARM::VST2LNd16Pseudo_UPD:
+    case ARM::VST2LNd32Pseudo_UPD:
+    case ARM::VST2LNq16Pseudo_UPD:
+    case ARM::VST2LNq32Pseudo_UPD:
+    case ARM::VST3LNd8Pseudo:
+    case ARM::VST3LNd16Pseudo:
+    case ARM::VST3LNd32Pseudo:
+    case ARM::VST3LNq16Pseudo:
+    case ARM::VST3LNq32Pseudo:
+    case ARM::VST3LNd8Pseudo_UPD:
+    case ARM::VST3LNd16Pseudo_UPD:
+    case ARM::VST3LNd32Pseudo_UPD:
+    case ARM::VST3LNq16Pseudo_UPD:
+    case ARM::VST3LNq32Pseudo_UPD:
+    case ARM::VST4LNd8Pseudo:
+    case ARM::VST4LNd16Pseudo:
+    case ARM::VST4LNd32Pseudo:
+    case ARM::VST4LNq16Pseudo:
+    case ARM::VST4LNq32Pseudo:
+    case ARM::VST4LNd8Pseudo_UPD:
+    case ARM::VST4LNd16Pseudo_UPD:
+    case ARM::VST4LNd32Pseudo_UPD:
+    case ARM::VST4LNq16Pseudo_UPD:
+    case ARM::VST4LNq32Pseudo_UPD:
+      ExpandLaneOp(MBBI);
+      return true;
+
+    case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+    case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+  }
+
+  return false;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+    Modified |= ExpandMI(MBB, MBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  const TargetMachine &TM = MF.getTarget();
+  TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  TRI = TM.getRegisterInfo();
+  STI = &TM.getSubtarget<ARMSubtarget>();
+  AFI = MF.getInfo<ARMFunctionInfo>();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+       ++MFI)
+    Modified |= ExpandMBB(*MFI);
+  return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+  return new ARMExpandPseudo();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 000000000000..f469d7efe11a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,2099 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableARMFastISel("disable-arm-fast-isel",
+                    cl::desc("Turn off experimental ARM fast-isel support"),
+                    cl::init(false), cl::Hidden);
+
+extern cl::opt<bool> EnableARMLongCalls;
+
+namespace {
+
+  // All possible address modes, plus some.
+  typedef struct Address {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    union {
+      unsigned Reg;
+      int FI;
+    } Base;
+
+    int Offset;
+
+    // Innocuous defaults for our address.
+    Address()
+     : BaseType(RegBase), Offset(0) {
+       Base.Reg = 0;
+     }
+  } Address;
+
+class ARMFastISel : public FastISel {
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  const TargetLowering &TLI;
+  ARMFunctionInfo *AFI;
+
+  // Convenience variables to avoid some queries.
+  bool isThumb;
+  LLVMContext *Context;
+
+  public:
+    explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+    : FastISel(funcInfo),
+      TM(funcInfo.MF->getTarget()),
+      TII(*TM.getInstrInfo()),
+      TLI(*TM.getTargetLowering()) {
+      Subtarget = &TM.getSubtarget<ARMSubtarget>();
+      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+      isThumb = AFI->isThumbFunction();
+      Context = &funcInfo.Fn->getContext();
+    }
+
+    // Code from FastISel.cpp.
+    virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC);
+    virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    unsigned Op0, bool Op0IsKill);
+    virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill,
+                                     unsigned Op1, bool Op1IsKill);
+    virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      unsigned Op1, bool Op1IsKill,
+                                      unsigned Op2, bool Op2IsKill);
+    virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill,
+                                     uint64_t Imm);
+    virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill,
+                                     const ConstantFP *FPImm);
+    virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      unsigned Op1, bool Op1IsKill,
+                                      uint64_t Imm);
+    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    uint64_t Imm);
+    virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     uint64_t Imm1, uint64_t Imm2);
+
+    virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+                                                unsigned Op0, bool Op0IsKill,
+                                                uint32_t Idx);
+
+    // Backend specific FastISel code.
+    virtual bool TargetSelectInstruction(const Instruction *I);
+    virtual unsigned TargetMaterializeConstant(const Constant *C);
+    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+
+  #include "ARMGenFastISel.inc"
+
+    // Instruction selection routines.
+  private:
+    bool SelectLoad(const Instruction *I);
+    bool SelectStore(const Instruction *I);
+    bool SelectBranch(const Instruction *I);
+    bool SelectCmp(const Instruction *I);
+    bool SelectFPExt(const Instruction *I);
+    bool SelectFPTrunc(const Instruction *I);
+    bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+    bool SelectSIToFP(const Instruction *I);
+    bool SelectFPToSI(const Instruction *I);
+    bool SelectSDiv(const Instruction *I);
+    bool SelectSRem(const Instruction *I);
+    bool SelectCall(const Instruction *I);
+    bool SelectSelect(const Instruction *I);
+    bool SelectRet(const Instruction *I);
+    bool SelectIntCast(const Instruction *I);
+
+    // Utility routines.
+  private:
+    bool isTypeLegal(const Type *Ty, MVT &VT);
+    bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+    bool ARMComputeAddress(const Value *Obj, Address &Addr);
+    void ARMSimplifyAddress(Address &Addr, EVT VT);
+    unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+    unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+    unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+    unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+    unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+    unsigned ARMSelectCallOp(const GlobalValue *GV);
+
+    // Call handling routines.
+  private:
+    bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+                        unsigned &ResultReg);
+    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+                         SmallVectorImpl<unsigned> &ArgRegs,
+                         SmallVectorImpl<MVT> &ArgVTs,
+                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+                         SmallVectorImpl<unsigned> &RegArgs,
+                         CallingConv::ID CC,
+                         unsigned &NumBytes);
+    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+                    const Instruction *I, CallingConv::ID CC,
+                    unsigned &NumBytes);
+    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+    // OptionalDef handling routines.
+  private:
+    bool isARMNEONPred(const MachineInstr *MI);
+    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+    void AddLoadStoreOperands(EVT VT, Address &Addr,
+                              const MachineInstrBuilder &MIB,
+                              unsigned Flags);
+};
+
+} // end anonymous namespace
+
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.hasOptionalDef())
+    return false;
+
+  // Look to see if our OptionalDef is defining CPSR or CCR.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    if (MO.getReg() == ARM::CPSR)
+      *CPSR = true;
+  }
+  return true;
+}
+
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+  const MCInstrDesc &MCID = MI->getDesc();
+
+  // If we're a thumb2 or not NEON function we were handled via isPredicable.
+  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+       AFI->isThumb2Function())
+    return false;
+
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+    if (MCID.OpInfo[i].isPredicate())
+      return true;
+
+  return false;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+  MachineInstr *MI = &*MIB;
+
+  // Do we use a predicate? or...
+  // Are we NEON in ARM mode and have a predicate operand? If so, I know
+  // we're not predicable but add it anyways.
+  if (TII.isPredicable(MI) || isARMNEONPred(MI))
+    AddDefaultPred(MIB);
+
+  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
+  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+  bool CPSR = false;
+  if (DefinesOptionalPredicate(MI, &CPSR)) {
+    if (CPSR)
+      AddDefaultT1CC(MIB);
+    else
+      AddDefaultCC(MIB);
+  }
+  return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass* RC) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                   TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      unsigned Op1, bool Op1IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+                                       const TargetRegisterClass *RC,
+                                       unsigned Op0, bool Op0IsKill,
+                                       unsigned Op1, bool Op1IsKill,
+                                       unsigned Op2, bool Op2IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addReg(Op2, Op2IsKill * RegState::Kill));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addReg(Op2, Op2IsKill * RegState::Kill));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addImm(Imm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addImm(Imm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      const ConstantFP *FPImm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addFPImm(FPImm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addFPImm(FPImm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+                                       const TargetRegisterClass *RC,
+                                       unsigned Op0, bool Op0IsKill,
+                                       unsigned Op1, bool Op1IsKill,
+                                       uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addImm(Imm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addImm(Imm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addImm(Imm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addImm(Imm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                    .addImm(Imm1).addImm(Imm2));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                    .addImm(Imm1).addImm(Imm2));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(TargetOpcode::COPY),
+                            ResultReg)
+                    .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+                                                 unsigned Op0, bool Op0IsKill,
+                                                 uint32_t Idx) {
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+  assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+         "Cannot yet extract from physregs");
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                         DL, TII.get(TargetOpcode::COPY), ResultReg)
+                 .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+  return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+  if (VT == MVT::f64) return 0;
+
+  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VMOVRS), MoveReg)
+                  .addReg(SrcReg));
+  return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+  if (VT == MVT::i64) return 0;
+
+  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VMOVSR), MoveReg)
+                  .addReg(SrcReg));
+  return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+  const APFloat Val = CFP->getValueAPF();
+  bool is64bit = VT == MVT::f64;
+
+  // This checks to see if we can use VFP3 instructions to materialize
+  // a constant, otherwise we have to go through the constant pool.
+  if (TLI.isFPImmLegal(Val, VT)) {
+    unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                            DestReg)
+                    .addFPImm(CFP));
+    return DestReg;
+  }
+
+  // Require VFP2 for loading fp constants.
+  if (!Subtarget->hasVFP2()) return false;
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(CFP->getType());
+  }
+  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+  // The extra reg is for addrmode5.
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          DestReg)
+                  .addConstantPoolIndex(Idx)
+                  .addReg(0));
+  return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+  // For now 32-bit only.
+  if (VT != MVT::i32) return false;
+
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+  // If we can do this in a single instruction without a constant pool entry
+  // do so now.
+  const ConstantInt *CI = cast<ConstantInt>(C);
+  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
+    unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(Opc), DestReg)
+                    .addImm(CI->getSExtValue()));
+    return DestReg;
+  }
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(C->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(C->getType());
+  }
+  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+  if (isThumb)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::t2LDRpci), DestReg)
+                    .addConstantPoolIndex(Idx));
+  else
+    // The extra immediate is for addrmode2.
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::LDRcp), DestReg)
+                    .addConstantPoolIndex(Idx)
+                    .addImm(0));
+
+  return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+  // For now 32-bit only.
+  if (VT != MVT::i32) return 0;
+
+  Reloc::Model RelocM = TM.getRelocationModel();
+
+  // TODO: Need more magic for ARM PIC.
+  if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(GV->getType());
+  }
+
+  // Grab index.
+  unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+  unsigned Id = AFI->createPICLabelUId();
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
+                                                       ARMCP::CPValue, PCAdj);
+  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+  // Load value.
+  MachineInstrBuilder MIB;
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  if (isThumb) {
+    unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+          .addConstantPoolIndex(Idx);
+    if (RelocM == Reloc::PIC_)
+      MIB.addImm(Id);
+  } else {
+    // The extra immediate is for addrmode2.
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+                  DestReg)
+          .addConstantPoolIndex(Idx)
+          .addImm(0);
+  }
+  AddOptionalDefs(MIB);
+
+  if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+    if (isThumb)
+      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12),
+                    NewDestReg)
+            .addReg(DestReg)
+            .addImm(0);
+    else
+      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
+                    NewDestReg)
+            .addReg(DestReg)
+            .addImm(0);
+    DestReg = NewDestReg;
+    AddOptionalDefs(MIB);
+  }
+
+  return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+  EVT VT = TLI.getValueType(C->getType(), true);
+
+  // Only handle simple types.
+  if (!VT.isSimple()) return 0;
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+    return ARMMaterializeFP(CFP, VT);
+  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return ARMMaterializeGV(GV, VT);
+  else if (isa<ConstantInt>(C))
+    return ARMMaterializeInt(C, VT);
+
+  return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+  // Don't handle dynamic allocas.
+  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+  MVT VT;
+  if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+  DenseMap<const AllocaInst*, int>::iterator SI =
+    FuncInfo.StaticAllocaMap.find(AI);
+
+  // This will get lowered later into the correct offsets and registers
+  // via rewriteXFrameIndex.
+  if (SI != FuncInfo.StaticAllocaMap.end()) {
+    TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+    unsigned ResultReg = createResultReg(RC);
+    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+                            TII.get(Opc), ResultReg)
+                            .addFrameIndex(SI->second)
+                            .addImm(0));
+    return ResultReg;
+  }
+
+  return 0;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+  EVT evt = TLI.getValueType(Ty, true);
+
+  // Only handle simple types.
+  if (evt == MVT::Other || !evt.isSimple()) return false;
+  VT = evt.getSimpleVT();
+
+  // Handle all legal types, i.e. a register that will directly hold this
+  // value.
+  return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
+  if (isTypeLegal(Ty, VT)) return true;
+
+  // If this is a type than can be sign or zero-extended to a basic operation
+  // go ahead and accept it now.
+  if (VT == MVT::i8 || VT == MVT::i16)
+    return true;
+
+  return false;
+}
+
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
+  // Some boilerplate from the X86 FastISel.
+  const User *U = NULL;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+    // Don't walk into other basic blocks unless the object is an alloca from
+    // another block, otherwise it may not have a virtual register assigned.
+    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+      Opcode = I->getOpcode();
+      U = I;
+    }
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+
+  if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+    if (Ty->getAddressSpace() > 255)
+      // Fast instruction selection doesn't support the special
+      // address spaces.
+      return false;
+
+  switch (Opcode) {
+    default:
+    break;
+    case Instruction::BitCast: {
+      // Look through bitcasts.
+      return ARMComputeAddress(U->getOperand(0), Addr);
+    }
+    case Instruction::IntToPtr: {
+      // Look past no-op inttoptrs.
+      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+        return ARMComputeAddress(U->getOperand(0), Addr);
+      break;
+    }
+    case Instruction::PtrToInt: {
+      // Look past no-op ptrtoints.
+      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+        return ARMComputeAddress(U->getOperand(0), Addr);
+      break;
+    }
+    case Instruction::GetElementPtr: {
+      Address SavedAddr = Addr;
+      int TmpOffset = Addr.Offset;
+
+      // Iterate through the GEP folding the constants into offsets where
+      // we can.
+      gep_type_iterator GTI = gep_type_begin(U);
+      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+           i != e; ++i, ++GTI) {
+        const Value *Op = *i;
+        if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+          const StructLayout *SL = TD.getStructLayout(STy);
+          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+          TmpOffset += SL->getElementOffset(Idx);
+        } else {
+          uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+          for (;;) {
+            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+              // Constant-offset addressing.
+              TmpOffset += CI->getSExtValue() * S;
+              break;
+            }
+            if (isa<AddOperator>(Op) &&
+                (!isa<Instruction>(Op) ||
+                 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+                 == FuncInfo.MBB) &&
+                isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+              // An add (in the same block) with a constant operand. Fold the
+              // constant.
+              ConstantInt *CI =
+              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+              TmpOffset += CI->getSExtValue() * S;
+              // Iterate on the other operand.
+              Op = cast<AddOperator>(Op)->getOperand(0);
+              continue;
+            }
+            // Unsupported
+            goto unsupported_gep;
+          }
+        }
+      }
+
+      // Try to grab the base operand now.
+      Addr.Offset = TmpOffset;
+      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+      // We failed, restore everything and try the other options.
+      Addr = SavedAddr;
+
+      unsupported_gep:
+      break;
+    }
+    case Instruction::Alloca: {
+      const AllocaInst *AI = cast<AllocaInst>(Obj);
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        FuncInfo.StaticAllocaMap.find(AI);
+      if (SI != FuncInfo.StaticAllocaMap.end()) {
+        Addr.BaseType = Address::FrameIndexBase;
+        Addr.Base.FI = SI->second;
+        return true;
+      }
+      break;
+    }
+  }
+
+  // Materialize the global variable's address into a reg which can
+  // then be used later to load the variable.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+    unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+    if (Tmp == 0) return false;
+
+    Addr.Base.Reg = Tmp;
+    return true;
+  }
+
+  // Try to get this in a register if nothing else has worked.
+  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+  return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+
+  assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+  bool needsLowering = false;
+  switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      assert(false && "Unhandled load/store type!");
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      // Integer loads/stores handle 12-bit offsets.
+      needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+      break;
+    case MVT::f32:
+    case MVT::f64:
+      // Floating point operands handle 8-bit offsets.
+      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+      break;
+  }
+
+  // If this is a stack pointer and the offset needs to be simplified then
+  // put the alloca address into a register, set the base type back to
+  // register and continue. This should almost never happen.
+  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+    TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+                              ARM::GPRRegisterClass;
+    unsigned ResultReg = createResultReg(RC);
+    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+                            TII.get(Opc), ResultReg)
+                            .addFrameIndex(Addr.Base.FI)
+                            .addImm(0));
+    Addr.Base.Reg = ResultReg;
+    Addr.BaseType = Address::RegBase;
+  }
+
+  // Since the offset is too large for the load/store instruction
+  // get the reg+offset into a register.
+  if (needsLowering) {
+    Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
+    Addr.Offset = 0;
+  }
+}
+
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+                                       const MachineInstrBuilder &MIB,
+                                       unsigned Flags) {
+  // addrmode5 output depends on the selection dag addressing dividing the
+  // offset by 4 that it then later multiplies. Do this here as well.
+  if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+      VT.getSimpleVT().SimpleTy == MVT::f64)
+    Addr.Offset /= 4;
+
+  // Frame base works a bit differently. Handle it separately.
+  if (Addr.BaseType == Address::FrameIndexBase) {
+    int FI = Addr.Base.FI;
+    int Offset = Addr.Offset;
+    MachineMemOperand *MMO =
+          FuncInfo.MF->getMachineMemOperand(
+                                  MachinePointerInfo::getFixedStack(FI, Offset),
+                                  Flags,
+                                  MFI.getObjectSize(FI),
+                                  MFI.getObjectAlignment(FI));
+    // Now add the rest of the operands.
+    MIB.addFrameIndex(FI);
+
+    // ARM halfword load/stores need an additional operand.
+    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+    MIB.addImm(Addr.Offset);
+    MIB.addMemOperand(MMO);
+  } else {
+    // Now add the rest of the operands.
+    MIB.addReg(Addr.Base.Reg);
+
+    // ARM halfword load/stores need an additional operand.
+    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+    MIB.addImm(Addr.Offset);
+  }
+  AddOptionalDefs(MIB);
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
+
+  assert(VT.isSimple() && "Non-simple types are invalid here!");
+  unsigned Opc;
+  TargetRegisterClass *RC;
+  switch (VT.getSimpleVT().SimpleTy) {
+    // This is mostly going to be Neon/vector support.
+    default: return false;
+    case MVT::i16:
+      Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+      RC = ARM::GPRRegisterClass;
+      break;
+    case MVT::i8:
+      Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+      RC = ARM::GPRRegisterClass;
+      break;
+    case MVT::i32:
+      Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+      RC = ARM::GPRRegisterClass;
+      break;
+    case MVT::f32:
+      Opc = ARM::VLDRS;
+      RC = TLI.getRegClassFor(VT);
+      break;
+    case MVT::f64:
+      Opc = ARM::VLDRD;
+      RC = TLI.getRegClassFor(VT);
+      break;
+  }
+  // Simplify this down to something we can handle.
+  ARMSimplifyAddress(Addr, VT);
+
+  // Create the base instruction, then add the operands.
+  ResultReg = createResultReg(RC);
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(Opc), ResultReg);
+  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad);
+  return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+  // Verify we have a legal type before going any further.
+  MVT VT;
+  if (!isLoadTypeLegal(I->getType(), VT))
+    return false;
+
+  // See if we can handle this address.
+  Address Addr;
+  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
+
+  unsigned ResultReg;
+  if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+  unsigned StrOpc;
+  switch (VT.getSimpleVT().SimpleTy) {
+    // This is mostly going to be Neon/vector support.
+    default: return false;
+    case MVT::i1: {
+      unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+                                               ARM::GPRRegisterClass);
+      unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(Opc), Res)
+                      .addReg(SrcReg).addImm(1));
+      SrcReg = Res;
+    } // Fallthrough here.
+    case MVT::i8:
+      StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+      break;
+    case MVT::i16:
+      StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+      break;
+    case MVT::i32:
+      StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+      break;
+    case MVT::f32:
+      if (!Subtarget->hasVFP2()) return false;
+      StrOpc = ARM::VSTRS;
+      break;
+    case MVT::f64:
+      if (!Subtarget->hasVFP2()) return false;
+      StrOpc = ARM::VSTRD;
+      break;
+  }
+  // Simplify this down to something we can handle.
+  ARMSimplifyAddress(Addr, VT);
+
+  // Create the base instruction, then add the operands.
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(StrOpc))
+                            .addReg(SrcReg, getKillRegState(true));
+  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore);
+  return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+  Value *Op0 = I->getOperand(0);
+  unsigned SrcReg = 0;
+
+  // Verify we have a legal type before going any further.
+  MVT VT;
+  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+    return false;
+
+  // Get the value to be stored into a register.
+  SrcReg = getRegForValue(Op0);
+  if (SrcReg == 0) return false;
+
+  // See if we can handle this address.
+  Address Addr;
+  if (!ARMComputeAddress(I->getOperand(1), Addr))
+    return false;
+
+  if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+  return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+  switch (Pred) {
+    // Needs two compares...
+    case CmpInst::FCMP_ONE:
+    case CmpInst::FCMP_UEQ:
+    default:
+      // AL is our "false" for now. The other two need more compares.
+      return ARMCC::AL;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::FCMP_OEQ:
+      return ARMCC::EQ;
+    case CmpInst::ICMP_SGT:
+    case CmpInst::FCMP_OGT:
+      return ARMCC::GT;
+    case CmpInst::ICMP_SGE:
+    case CmpInst::FCMP_OGE:
+      return ARMCC::GE;
+    case CmpInst::ICMP_UGT:
+    case CmpInst::FCMP_UGT:
+      return ARMCC::HI;
+    case CmpInst::FCMP_OLT:
+      return ARMCC::MI;
+    case CmpInst::ICMP_ULE:
+    case CmpInst::FCMP_OLE:
+      return ARMCC::LS;
+    case CmpInst::FCMP_ORD:
+      return ARMCC::VC;
+    case CmpInst::FCMP_UNO:
+      return ARMCC::VS;
+    case CmpInst::FCMP_UGE:
+      return ARMCC::PL;
+    case CmpInst::ICMP_SLT:
+    case CmpInst::FCMP_ULT:
+      return ARMCC::LT;
+    case CmpInst::ICMP_SLE:
+    case CmpInst::FCMP_ULE:
+      return ARMCC::LE;
+    case CmpInst::FCMP_UNE:
+    case CmpInst::ICMP_NE:
+      return ARMCC::NE;
+    case CmpInst::ICMP_UGE:
+      return ARMCC::HS;
+    case CmpInst::ICMP_ULT:
+      return ARMCC::LO;
+  }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+  const BranchInst *BI = cast<BranchInst>(I);
+  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+  // Simple branch support.
+
+  // If we can, avoid recomputing the compare - redoing it could lead to wonky
+  // behavior.
+  // TODO: Factor this out.
+  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+    MVT SourceVT;
+    const Type *Ty = CI->getOperand(0)->getType();
+    if (CI->hasOneUse() && (CI->getParent() == I->getParent())
+        && isTypeLegal(Ty, SourceVT)) {
+      bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+      if (isFloat && !Subtarget->hasVFP2())
+        return false;
+
+      unsigned CmpOpc;
+      switch (SourceVT.SimpleTy) {
+        default: return false;
+        // TODO: Verify compares.
+        case MVT::f32:
+          CmpOpc = ARM::VCMPES;
+          break;
+        case MVT::f64:
+          CmpOpc = ARM::VCMPED;
+          break;
+        case MVT::i32:
+          CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+          break;
+      }
+
+      // Get the compare predicate.
+      // Try to take advantage of fallthrough opportunities.
+      CmpInst::Predicate Predicate = CI->getPredicate();
+      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+        std::swap(TBB, FBB);
+        Predicate = CmpInst::getInversePredicate(Predicate);
+      }
+
+      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
+
+      // We may not handle every CC for now.
+      if (ARMPred == ARMCC::AL) return false;
+
+      unsigned Arg1 = getRegForValue(CI->getOperand(0));
+      if (Arg1 == 0) return false;
+
+      unsigned Arg2 = getRegForValue(CI->getOperand(1));
+      if (Arg2 == 0) return false;
+
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(CmpOpc))
+                      .addReg(Arg1).addReg(Arg2));
+
+      // For floating point we need to move the result to a comparison register
+      // that we can then use for branches.
+      if (isFloat)
+        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(ARM::FMSTAT)));
+
+      unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+      FastEmitBranch(FBB, DL);
+      FuncInfo.MBB->addSuccessor(TBB);
+      return true;
+    }
+  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+    MVT SourceVT;
+    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+      unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+      unsigned OpReg = getRegForValue(TI->getOperand(0));
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(TstOpc))
+                      .addReg(OpReg).addImm(1));
+
+      unsigned CCMode = ARMCC::NE;
+      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+        std::swap(TBB, FBB);
+        CCMode = ARMCC::EQ;
+      }
+
+      unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+      FastEmitBranch(FBB, DL);
+      FuncInfo.MBB->addSuccessor(TBB);
+      return true;
+    }
+  }
+
+  unsigned CmpReg = getRegForValue(BI->getCondition());
+  if (CmpReg == 0) return false;
+
+  // We've been divorced from our compare!  Our block was split, and
+  // now our compare lives in a predecessor block.  We musn't
+  // re-compare here, as the children of the compare aren't guaranteed
+  // live across the block boundary (we *could* check for this).
+  // Regardless, the compare has been done in the predecessor block,
+  // and it left a value for us in a virtual register.  Ergo, we test
+  // the one-bit value left in the virtual register.
+  unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
+                  .addReg(CmpReg).addImm(1));
+
+  unsigned CCMode = ARMCC::NE;
+  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+    std::swap(TBB, FBB);
+    CCMode = ARMCC::EQ;
+  }
+
+  unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+  FastEmitBranch(FBB, DL);
+  FuncInfo.MBB->addSuccessor(TBB);
+  return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
+
+  MVT VT;
+  const Type *Ty = CI->getOperand(0)->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+  if (isFloat && !Subtarget->hasVFP2())
+    return false;
+
+  unsigned CmpOpc;
+  unsigned CondReg;
+  switch (VT.SimpleTy) {
+    default: return false;
+    // TODO: Verify compares.
+    case MVT::f32:
+      CmpOpc = ARM::VCMPES;
+      CondReg = ARM::FPSCR;
+      break;
+    case MVT::f64:
+      CmpOpc = ARM::VCMPED;
+      CondReg = ARM::FPSCR;
+      break;
+    case MVT::i32:
+      CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+      CondReg = ARM::CPSR;
+      break;
+  }
+
+  // Get the compare predicate.
+  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+  // We may not handle every CC for now.
+  if (ARMPred == ARMCC::AL) return false;
+
+  unsigned Arg1 = getRegForValue(CI->getOperand(0));
+  if (Arg1 == 0) return false;
+
+  unsigned Arg2 = getRegForValue(CI->getOperand(1));
+  if (Arg2 == 0) return false;
+
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+                  .addReg(Arg1).addReg(Arg2));
+
+  // For floating point we need to move the result to a comparison register
+  // that we can then use for branches.
+  if (isFloat)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::FMSTAT)));
+
+  // Now set a register based on the comparison. Explicitly set the predicates
+  // here.
+  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+  TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+                                    : ARM::GPRRegisterClass;
+  unsigned DestReg = createResultReg(RC);
+  Constant *Zero
+    = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+  unsigned ZeroReg = TargetMaterializeConstant(Zero);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+          .addReg(ZeroReg).addImm(1)
+          .addImm(ARMPred).addReg(CondReg);
+
+  UpdateValueMap(I, DestReg);
+  return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+  // Make sure we have VFP and that we're extending float to double.
+  if (!Subtarget->hasVFP2()) return false;
+
+  Value *V = I->getOperand(0);
+  if (!I->getType()->isDoubleTy() ||
+      !V->getType()->isFloatTy()) return false;
+
+  unsigned Op = getRegForValue(V);
+  if (Op == 0) return false;
+
+  unsigned Result = createResultReg(ARM::DPRRegisterClass);
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VCVTDS), Result)
+                  .addReg(Op));
+  UpdateValueMap(I, Result);
+  return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+  // Make sure we have VFP and that we're truncating double to float.
+  if (!Subtarget->hasVFP2()) return false;
+
+  Value *V = I->getOperand(0);
+  if (!(I->getType()->isFloatTy() &&
+        V->getType()->isDoubleTy())) return false;
+
+  unsigned Op = getRegForValue(V);
+  if (Op == 0) return false;
+
+  unsigned Result = createResultReg(ARM::SPRRegisterClass);
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VCVTSD), Result)
+                  .addReg(Op));
+  UpdateValueMap(I, Result);
+  return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+  // Make sure we have VFP.
+  if (!Subtarget->hasVFP2()) return false;
+
+  MVT DstVT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, DstVT))
+    return false;
+
+  // FIXME: Handle sign-extension where necessary.
+  if (!I->getOperand(0)->getType()->isIntegerTy(32))
+    return false;
+
+  unsigned Op = getRegForValue(I->getOperand(0));
+  if (Op == 0) return false;
+
+  // The conversion routine works on fp-reg to fp-reg and the operand above
+  // was an integer, move it to the fp registers if possible.
+  unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+  if (FP == 0) return false;
+
+  unsigned Opc;
+  if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+  else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+  else return 0;
+
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          ResultReg)
+                  .addReg(FP));
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+  // Make sure we have VFP.
+  if (!Subtarget->hasVFP2()) return false;
+
+  MVT DstVT;
+  const Type *RetTy = I->getType();
+  if (!isTypeLegal(RetTy, DstVT))
+    return false;
+
+  unsigned Op = getRegForValue(I->getOperand(0));
+  if (Op == 0) return false;
+
+  unsigned Opc;
+  const Type *OpTy = I->getOperand(0)->getType();
+  if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+  else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+  else return 0;
+
+  // f64->s32 or f32->s32 both need an intermediate f32 reg.
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          ResultReg)
+                  .addReg(Op));
+
+  // This result needs to be in an integer register, but the conversion only
+  // takes place in fp-regs.
+  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+  if (IntReg == 0) return false;
+
+  UpdateValueMap(I, IntReg);
+  return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  // Things need to be register sized for register moves.
+  if (VT != MVT::i32) return false;
+  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+  unsigned CondReg = getRegForValue(I->getOperand(0));
+  if (CondReg == 0) return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0) return false;
+  unsigned Op2Reg = getRegForValue(I->getOperand(2));
+  if (Op2Reg == 0) return false;
+
+  unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+                  .addReg(CondReg).addImm(1));
+  unsigned ResultReg = createResultReg(RC);
+  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+    .addReg(Op1Reg).addReg(Op2Reg)
+    .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+  MVT VT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  // If we have integer div support we should have selected this automagically.
+  // In case we have a real miss go ahead and return false and we'll pick
+  // it up later.
+  if (Subtarget->hasDivide()) return false;
+
+  // Otherwise emit a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i8)
+    LC = RTLIB::SDIV_I8;
+  else if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+  return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+  MVT VT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i8)
+    LC = RTLIB::SREM_I8;
+  else if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+  return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+  EVT VT  = TLI.getValueType(I->getType(), true);
+
+  // We can get here in the case when we want to use NEON for our fp
+  // operations, but can't figure out how to. Just use the vfp instructions
+  // if we have them.
+  // FIXME: It'd be nice to use NEON instructions.
+  const Type *Ty = I->getType();
+  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+  if (isFloat && !Subtarget->hasVFP2())
+    return false;
+
+  unsigned Op1 = getRegForValue(I->getOperand(0));
+  if (Op1 == 0) return false;
+
+  unsigned Op2 = getRegForValue(I->getOperand(1));
+  if (Op2 == 0) return false;
+
+  unsigned Opc;
+  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+  switch (ISDOpcode) {
+    default: return false;
+    case ISD::FADD:
+      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+      break;
+    case ISD::FSUB:
+      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+      break;
+    case ISD::FMUL:
+      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+      break;
+  }
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(Opc), ResultReg)
+                  .addReg(Op1).addReg(Op2));
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+                                 EVT SrcVT, unsigned &ResultReg) {
+  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+                           Src, /*TODO: Kill=*/false);
+
+  if (RR != 0) {
+    ResultReg = RR;
+    return true;
+  } else
+    return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+    // Ignore fastcc. Silence compiler warnings.
+    (void)RetFastCC_ARM_APCS;
+    (void)FastCC_ARM_APCS;
+    // Fallthrough
+  case CallingConv::C:
+    // Use target triple & subtarget features to do actual dispatch.
+    if (Subtarget->isAAPCS_ABI()) {
+      if (Subtarget->hasVFP2() &&
+          FloatABIType == FloatABI::Hard)
+        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+      else
+        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+    } else
+        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  case CallingConv::ARM_AAPCS_VFP:
+    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+                                  SmallVectorImpl<unsigned> &ArgRegs,
+                                  SmallVectorImpl<MVT> &ArgVTs,
+                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+                                  SmallVectorImpl<unsigned> &RegArgs,
+                                  CallingConv::ID CC,
+                                  unsigned &NumBytes) {
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
+  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  NumBytes = CCInfo.getNextStackOffset();
+
+  // Issue CALLSEQ_START
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(AdjStackDown))
+                  .addImm(NumBytes));
+
+  // Process the args.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    unsigned Arg = ArgRegs[VA.getValNo()];
+    MVT ArgVT = ArgVTs[VA.getValNo()];
+
+    // We don't handle NEON/vector parameters yet.
+    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+      return false;
+
+    // Handle arg promotion, etc.
+    switch (VA.getLocInfo()) {
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt: {
+        bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+        Emitted = true;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::ZExt: {
+        bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+        Emitted = true;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::AExt: {
+        bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        if (!Emitted)
+          Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                      Arg, ArgVT, Arg);
+        if (!Emitted)
+          Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                      Arg, ArgVT, Arg);
+
+        assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::BCvt: {
+        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+                                 /*TODO: Kill=*/false);
+        assert(BC != 0 && "Failed to emit a bitcast!");
+        Arg = BC;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      default: llvm_unreachable("Unknown arg promotion!");
+    }
+
+    // Now copy/store arg to correct locations.
+    if (VA.isRegLoc() && !VA.needsCustom()) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              VA.getLocReg())
+      .addReg(Arg);
+      RegArgs.push_back(VA.getLocReg());
+    } else if (VA.needsCustom()) {
+      // TODO: We need custom lowering for vector (v2f64) args.
+      if (VA.getLocVT() != MVT::f64) return false;
+
+      CCValAssign &NextVA = ArgLocs[++i];
+
+      // TODO: Only handle register args for now.
+      if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(ARM::VMOVRRD), VA.getLocReg())
+                      .addReg(NextVA.getLocReg(), RegState::Define)
+                      .addReg(Arg));
+      RegArgs.push_back(VA.getLocReg());
+      RegArgs.push_back(NextVA.getLocReg());
+    } else {
+      assert(VA.isMemLoc());
+      // Need to store on the stack.
+      Address Addr;
+      Addr.BaseType = Address::RegBase;
+      Addr.Base.Reg = ARM::SP;
+      Addr.Offset = VA.getLocMemOffset();
+
+      if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+    }
+  }
+  return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+                             const Instruction *I, CallingConv::ID CC,
+                             unsigned &NumBytes) {
+  // Issue CALLSEQ_END
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(AdjStackUp))
+                  .addImm(NumBytes).addImm(0));
+
+  // Now the return value.
+  if (RetVT != MVT::isVoid) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+    // Copy all of the result registers out of their specified physreg.
+    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+      // For this move we copy into two registers and then move into the
+      // double fp reg we want.
+      EVT DestVT = RVLocs[0].getValVT();
+      TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+      unsigned ResultReg = createResultReg(DstRC);
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(ARM::VMOVDRR), ResultReg)
+                      .addReg(RVLocs[0].getLocReg())
+                      .addReg(RVLocs[1].getLocReg()));
+
+      UsedRegs.push_back(RVLocs[0].getLocReg());
+      UsedRegs.push_back(RVLocs[1].getLocReg());
+
+      // Finally update the result.
+      UpdateValueMap(I, ResultReg);
+    } else {
+      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+      EVT CopyVT = RVLocs[0].getValVT();
+      TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+      unsigned ResultReg = createResultReg(DstRC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              ResultReg).addReg(RVLocs[0].getLocReg());
+      UsedRegs.push_back(RVLocs[0].getLocReg());
+
+      // Finally update the result.
+      UpdateValueMap(I, ResultReg);
+    }
+  }
+
+  return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+  const ReturnInst *Ret = cast<ReturnInst>(I);
+  const Function &F = *I->getParent()->getParent();
+
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  if (F.isVarArg())
+    return false;
+
+  CallingConv::ID CC = F.getCallingConv();
+  if (Ret->getNumOperands() > 0) {
+    SmallVector<ISD::OutputArg, 4> Outs;
+    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                  Outs, TLI);
+
+    // Analyze operands of the call, assigning locations to each operand.
+    SmallVector<CCValAssign, 16> ValLocs;
+    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, I->getContext());
+    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+    const Value *RV = Ret->getOperand(0);
+    unsigned Reg = getRegForValue(RV);
+    if (Reg == 0)
+      return false;
+
+    // Only handle a single return value for now.
+    if (ValLocs.size() != 1)
+      return false;
+
+    CCValAssign &VA = ValLocs[0];
+
+    // Don't bother handling odd stuff for now.
+    if (VA.getLocInfo() != CCValAssign::Full)
+      return false;
+    // Only handle register returns for now.
+    if (!VA.isRegLoc())
+      return false;
+    // TODO: For now, don't try to handle cases where getLocInfo()
+    // says Full but the types don't match.
+    if (TLI.getValueType(RV->getType()) != VA.getValVT())
+      return false;
+
+    // Make the copy.
+    unsigned SrcReg = Reg + VA.getValNo();
+    unsigned DstReg = VA.getLocReg();
+    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+    // Avoid a cross-class copy. This is very unlikely.
+    if (!SrcRC->contains(DstReg))
+      return false;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            DstReg).addReg(SrcReg);
+
+    // Mark the register as live out of the function.
+    MRI.addLiveOut(VA.getLocReg());
+  }
+
+  unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(RetOpc)));
+  return true;
+}
+
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+  // Darwin needs the r9 versions of the opcodes.
+  bool isDarwin = Subtarget->isTargetDarwin();
+  if (isThumb) {
+    return isDarwin ? ARM::tBLr9 : ARM::tBL;
+  } else  {
+    return isDarwin ? ARM::BLr9 : ARM::BL;
+  }
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+  // Handle *simple* calls for now.
+  const Type *RetTy = I->getType();
+  MVT RetVT;
+  if (RetTy->isVoidTy())
+    RetVT = MVT::isVoid;
+  else if (!isTypeLegal(RetTy, RetVT))
+    return false;
+
+  // TODO: For now if we have long calls specified we don't handle the call.
+  if (EnableARMLongCalls) return false;
+
+  // Set up the argument vectors.
+  SmallVector<Value*, 8> Args;
+  SmallVector<unsigned, 8> ArgRegs;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(I->getNumOperands());
+  ArgRegs.reserve(I->getNumOperands());
+  ArgVTs.reserve(I->getNumOperands());
+  ArgFlags.reserve(I->getNumOperands());
+  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+    Value *Op = I->getOperand(i);
+    unsigned Arg = getRegForValue(Op);
+    if (Arg == 0) return false;
+
+    const Type *ArgTy = Op->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+    ISD::ArgFlagsTy Flags;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(Op);
+    ArgRegs.push_back(Arg);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Handle the arguments now that we've gotten them.
+  SmallVector<unsigned, 4> RegArgs;
+  unsigned NumBytes;
+  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+    return false;
+
+  // Issue the call, BLr9 for darwin, BL otherwise.
+  // TODO: Turn this into the table of arm call ops.
+  MachineInstrBuilder MIB;
+  unsigned CallOpc = ARMSelectCallOp(NULL);
+  if(isThumb)
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc)))
+                         .addExternalSymbol(TLI.getLibcallName(Call));
+  else
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc))
+          .addExternalSymbol(TLI.getLibcallName(Call)));
+
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Finish off the call including any return values.
+  SmallVector<unsigned, 4> UsedRegs;
+  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = CI->getCalledValue();
+
+  // Can't handle inline asm or worry about intrinsics yet.
+  if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+  // Only handle global variable Callees.
+  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+  if (!GV)
+    return false;
+
+  // Check the calling convention.
+  ImmutableCallSite CS(CI);
+  CallingConv::ID CC = CS.getCallingConv();
+
+  // TODO: Avoid some calling conventions?
+
+  // Let SDISel handle vararg functions.
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  if (FTy->isVarArg())
+    return false;
+
+  // Handle *simple* calls for now.
+  const Type *RetTy = I->getType();
+  MVT RetVT;
+  if (RetTy->isVoidTy())
+    RetVT = MVT::isVoid;
+  else if (!isTypeLegal(RetTy, RetVT))
+    return false;
+
+  // TODO: For now if we have long calls specified we don't handle the call.
+  if (EnableARMLongCalls) return false;
+
+  // Set up the argument vectors.
+  SmallVector<Value*, 8> Args;
+  SmallVector<unsigned, 8> ArgRegs;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(CS.arg_size());
+  ArgRegs.reserve(CS.arg_size());
+  ArgVTs.reserve(CS.arg_size());
+  ArgFlags.reserve(CS.arg_size());
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    unsigned Arg = getRegForValue(*i);
+
+    if (Arg == 0)
+      return false;
+    ISD::ArgFlagsTy Flags;
+    unsigned AttrInd = i - CS.arg_begin() + 1;
+    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+      Flags.setSExt();
+    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+      Flags.setZExt();
+
+         // FIXME: Only handle *easy* calls for now.
+    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+        CS.paramHasAttr(AttrInd, Attribute::ByVal))
+      return false;
+
+    const Type *ArgTy = (*i)->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT))
+      return false;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(*i);
+    ArgRegs.push_back(Arg);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Handle the arguments now that we've gotten them.
+  SmallVector<unsigned, 4> RegArgs;
+  unsigned NumBytes;
+  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+    return false;
+
+  // Issue the call, BLr9 for darwin, BL otherwise.
+  // TODO: Turn this into the table of arm call ops.
+  MachineInstrBuilder MIB;
+  unsigned CallOpc = ARMSelectCallOp(GV);
+  // Explicitly adding the predicate here.
+  if(isThumb)
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc)))
+          .addGlobalAddress(GV, 0, 0);
+  else
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc))
+          .addGlobalAddress(GV, 0, 0));
+
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Finish off the call including any return values.
+  SmallVector<unsigned, 4> UsedRegs;
+  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+
+}
+
+bool ARMFastISel::SelectIntCast(const Instruction *I) {
+  // On ARM, in general, integer casts don't involve legal types; this code
+  // handles promotable integers.  The high bits for a type smaller than
+  // the register size are assumed to be undefined.
+  const Type *DestTy = I->getType();
+  Value *Op = I->getOperand(0);
+  const Type *SrcTy = Op->getType();
+
+  EVT SrcVT, DestVT;
+  SrcVT = TLI.getValueType(SrcTy, true);
+  DestVT = TLI.getValueType(DestTy, true);
+
+  if (isa<TruncInst>(I)) {
+    if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+      return false;
+    if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+      return false;
+
+    unsigned SrcReg = getRegForValue(Op);
+    if (!SrcReg) return false;
+
+    // Because the high bits are undefined, a truncate doesn't generate
+    // any code.
+    UpdateValueMap(I, SrcReg);
+    return true;
+  }
+  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+    return false;
+
+  unsigned Opc;
+  bool isZext = isa<ZExtInst>(I);
+  bool isBoolZext = false;
+  if (!SrcVT.isSimple())
+    return false;
+  switch (SrcVT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i16:
+    if (isZext)
+      Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr;
+    else
+      Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr;
+    break;
+  case MVT::i8:
+    if (isZext)
+      Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr;
+    else
+      Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr;
+    break;
+  case MVT::i1:
+    if (isZext) {
+      Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+      isBoolZext = true;
+      break;
+    }
+    return false;
+  }
+
+  // FIXME: We could save an instruction in many cases by special-casing
+  // load instructions.
+  unsigned SrcReg = getRegForValue(Op);
+  if (!SrcReg) return false;
+
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+  MachineInstrBuilder MIB;
+  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+        .addReg(SrcReg);
+  if (isBoolZext)
+    MIB.addImm(1);
+  AddOptionalDefs(MIB);
+  UpdateValueMap(I, DestReg);
+  return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+
+  switch (I->getOpcode()) {
+    case Instruction::Load:
+      return SelectLoad(I);
+    case Instruction::Store:
+      return SelectStore(I);
+    case Instruction::Br:
+      return SelectBranch(I);
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      return SelectCmp(I);
+    case Instruction::FPExt:
+      return SelectFPExt(I);
+    case Instruction::FPTrunc:
+      return SelectFPTrunc(I);
+    case Instruction::SIToFP:
+      return SelectSIToFP(I);
+    case Instruction::FPToSI:
+      return SelectFPToSI(I);
+    case Instruction::FAdd:
+      return SelectBinaryOp(I, ISD::FADD);
+    case Instruction::FSub:
+      return SelectBinaryOp(I, ISD::FSUB);
+    case Instruction::FMul:
+      return SelectBinaryOp(I, ISD::FMUL);
+    case Instruction::SDiv:
+      return SelectSDiv(I);
+    case Instruction::SRem:
+      return SelectSRem(I);
+    case Instruction::Call:
+      return SelectCall(I);
+    case Instruction::Select:
+      return SelectSelect(I);
+    case Instruction::Ret:
+      return SelectRet(I);
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      return SelectIntCast(I);
+    default: break;
+  }
+  return false;
+}
+
+namespace llvm {
+  llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+    // Completely untested on non-darwin.
+    const TargetMachine &TM = funcInfo.MF->getTarget();
+
+    // Darwin and thumb1 only for now.
+    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+        !DisableARMFastISel)
+      return new ARMFastISel(funcInfo);
+    return 0;
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
new file mode 100644
index 000000000000..350c92decdce
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
@@ -0,0 +1,97 @@
+//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+  // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+  // addresses
+  fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+  // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+  // the 16-bit halfwords reordered.
+  fixup_t2_ldst_pcrel_12,
+
+  // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+  // used in VFP instructions where the lower 2 bits are not encoded
+  // (so it's encoded as an 8-bit immediate).
+  fixup_arm_pcrel_10,
+  // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+  // the short-swapped encoding of Thumb2 instructions.
+  fixup_t2_pcrel_10,
+  // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+  // addresses where the lower 2 bits are not encoded (so it's encoded as an
+  // 8-bit immediate).
+  fixup_thumb_adr_pcrel_10,
+  // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+  // instruction.
+  fixup_arm_adr_pcrel_12,
+  // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+  // instruction.
+  fixup_t2_adr_pcrel_12,
+  // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+  // instructions. 
+  fixup_arm_condbranch,
+  // fixup_arm_uncondbranch - 24-bit PC relative relocation for 
+  // branch instructions. (unconditional)
+  fixup_arm_uncondbranch,
+  // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+  // uconditional branch instructions.
+  fixup_t2_condbranch,
+  // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+  // branch unconditional branch instructions.
+  fixup_t2_uncondbranch,
+
+  // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+  fixup_arm_thumb_br,
+
+  // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+  fixup_arm_thumb_bl,
+
+  // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+  fixup_arm_thumb_blx,
+
+  // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+  fixup_arm_thumb_cb,
+
+  // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+  fixup_arm_thumb_cp,
+
+  // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+  fixup_arm_thumb_bcc,
+
+  // The next two are for the movt/movw pair
+  // the 16bit imm field are split into imm{15-12} and imm{11-0}
+  fixup_arm_movt_hi16, // :upper16:
+  fixup_arm_movw_lo16, // :lower16:
+  fixup_t2_movt_hi16, // :upper16:
+  fixup_t2_movw_lo16, // :lower16:
+
+  // It is possible to create an "immediate" that happens to be pcrel.
+  // movw r0, :lower16:Foo-(Bar+8) and movt  r0, :upper16:Foo-(Bar+8)
+  // result in different reloc tags than the above two.
+  // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+  fixup_arm_movt_hi16_pcrel, // :upper16:
+  fixup_arm_movw_lo16_pcrel, // :lower16:
+  fixup_t2_movt_hi16_pcrel, // :upper16:
+  fixup_t2_movw_lo16_pcrel, // :lower16:
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 000000000000..381b404519e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1086 @@
+//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+  // Mac OS X requires FP not to be clobbered for backtracing purpose.
+  if (STI.isTargetDarwin())
+    return true;
+
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Always eliminate non-leaf frame pointers.
+  return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+          RegInfo->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function.  This eliminates the need for
+/// add/sub sp brackets around call sites.  Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified.  Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+                        const ARMBaseInstrInfo &TII,
+                        const unsigned *CSRegs) {
+  // Integer spill area is handled with "pop".
+  if (MI->getOpcode() == ARM::LDMIA_RET ||
+      MI->getOpcode() == ARM::t2LDMIA_RET ||
+      MI->getOpcode() == ARM::LDMIA_UPD ||
+      MI->getOpcode() == ARM::t2LDMIA_UPD ||
+      MI->getOpcode() == ARM::VLDMDIA_UPD) {
+    // The first two operands are predicates. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  if ((MI->getOpcode() == ARM::LDR_POST ||
+       MI->getOpcode() == ARM::t2LDR_POST) &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+      MI->getOperand(1).getReg() == ARM::SP)
+    return true;
+
+  return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+             MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             DebugLoc dl, const ARMBaseInstrInfo &TII,
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+  if (isARM)
+    emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                            ARMCC::AL, 0, TII, MIFlags);
+  else
+    emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                           ARMCC::AL, 0, TII, MIFlags);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitPrologue does not support Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  // Allocate the vararg register save area. This is not counted in NumBytes.
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+                 MachineInstr::FrameSetup);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                   MachineInstr::FrameSetup);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  // Move past area 1.
+  if (GPRCS1Size > 0) MBBI++;
+
+  // Set FP to point to the stack slot that contains the previous FP.
+  // For Darwin, FP is R7, which has now been stored in spill area 1.
+  // Otherwise, if this is not Darwin, all the callee-saved registers go
+  // into spill area 1, including the FP in R11.  In either case, it is
+  // now safe to emit this assignment.
+  bool HasFP = hasFP(MF);
+  if (HasFP) {
+    unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0)
+      .setMIFlag(MachineInstr::FrameSetup);
+    AddDefaultCC(AddDefaultPred(MIB));
+  }
+
+  // Move past area 2.
+  if (GPRCS2Size > 0) MBBI++;
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  if (HasFP)
+    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+                                NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  // Move past area 3.
+  if (DPRCSSize > 0) {
+    MBBI++;
+    // Since vpush register list cannot have gaps, there may be multiple vpush
+    // instructions in the prologue.
+    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+      MBBI++;
+  }
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Adjust SP after all the callee-save spills.
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                 MachineInstr::FrameSetup);
+    if (HasFP && isARM)
+      // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+      // Note it's not safe to do this in Thumb2 mode because it would have
+      // taken two instructions:
+      // mov sp, r7
+      // sub sp, #24
+      // If an interrupt is taken between the two instructions, then sp is in
+      // an inconsistent state (pointing to the middle of callee-saved area).
+      // The interrupt handler can end up clobbering the registers.
+      AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF))
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need dynamic stack realignment, do it here. Be paranoid and make
+  // sure if we also have VLAs, we have a base pointer for frame access.
+  if (RegInfo->needsStackRealignment(MF)) {
+    unsigned MaxAlign = MFI->getMaxAlignment();
+    assert (!AFI->isThumb1OnlyFunction());
+    if (!AFI->isThumbFunction()) {
+      // Emit bic sp, sp, MaxAlign
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::BICri), ARM::SP)
+                                  .addReg(ARM::SP, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+    } else {
+      // We cannot use sp as source/dest register here, thus we're emitting the
+      // following sequence:
+      // mov r4, sp
+      // bic r4, r4, MaxAlign
+      // mov sp, r4
+      // FIXME: It will be better just to find spare register here.
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+        .addReg(ARM::SP, RegState::Kill));
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::t2BICri), ARM::R4)
+                                  .addReg(ARM::R4, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+        .addReg(ARM::R4, RegState::Kill));
+    }
+
+    AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  // If we need a base pointer, set it up here. It's whatever the value
+  // of the stack pointer is at this point. Any variable size objects
+  // will be allocated after this, so we can still use the base pointer
+  // to reference locals.
+  // FIXME: Clarify FrameSetup flags here.
+  if (RegInfo->hasBasePointer(MF)) {
+    if (isARM)
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+        .addReg(ARM::SP)
+        .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+    else
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                             RegInfo->getBaseRegister())
+        .addReg(ARM::SP));
+  }
+
+  // If the frame has variable sized objects then the epilogue must restore
+  // the sp from fp. We can assume there's an FP here since hasFP already
+  // checks for hasVarSizedObjects.
+  if (MFI->hasVarSizedObjects())
+    AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI->getDesc().isReturn() &&
+         "Can only insert epilog into returning blocks");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitEpilogue does not support Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / VLDRD.
+    const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+      if (!isCSRestore(MBBI, TII, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    // Reset SP based on frame pointer only if the stack frame extends beyond
+    // frame pointer stack slot or target is ELF and the function has FP.
+    if (AFI->shouldRestoreSPFromFP()) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      if (NumBytes) {
+        if (isARM)
+          emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+                                  ARMCC::AL, 0, TII);
+        else {
+          // It's not possible to restore SP from FP in a single instruction.
+          // For Darwin, this looks like:
+          // mov sp, r7
+          // sub sp, #24
+          // This is bad, if an interrupt is taken after the mov, sp is in an
+          // inconsistent state.
+          // Use the first callee-saved register as a scratch register.
+          assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+                 "No scratch register to restore SP from FP!");
+          emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+                                 ARMCC::AL, 0, TII);
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(ARM::R4));
+        }
+      } else {
+        // Thumb2 or ARM.
+        if (isARM)
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+            .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+        else
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(FramePtr));
+      }
+    } else if (NumBytes)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+    // Increment past our save areas.
+    if (AFI->getDPRCalleeSavedAreaSize()) {
+      MBBI++;
+      // Since vpop register list cannot have gaps, there may be multiple vpop
+      // instructions in the epilogue.
+      while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+        MBBI++;
+    }
+    if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+    if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+  }
+
+  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+      RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+    // Tail call return: adjust the stack pointer and jump to callee.
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+    // Jump to label or value in register.
+    if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+      unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+        ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
+        : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else if (RetOpcode == ARM::TCRETURNri) {
+      BuildMI(MBB, MBBI, dl,
+              TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    } else if (RetOpcode == ARM::TCRETURNriND) {
+      BuildMI(MBB, MBBI, dl,
+              TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    MachineInstr *NewMI = prior(MBBI);
+    for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+      NewMI->addOperand(MBBI->getOperand(i));
+
+    // Delete the pseudo instruction TCRETURN.
+    MBB.erase(MBBI);
+    MBBI = NewMI;
+  }
+
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info.  It's the same as what we use for resolving the code-gen
+/// references for now.  FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+                                         unsigned &FrameReg) const {
+  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+                                             int FI, unsigned &FrameReg,
+                                             int SPAdj) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+  bool isFixed = MFI->isFixedObjectIndex(FI);
+
+  FrameReg = ARM::SP;
+  Offset += SPAdj;
+  if (AFI->isGPRCalleeSavedArea1Frame(FI))
+    return Offset - AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+    return Offset - AFI->getGPRCalleeSavedArea2Offset();
+  else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+    return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+  // When dynamically realigning the stack, use the frame pointer for
+  // parameters, and the stack/base pointer for locals.
+  if (RegInfo->needsStackRealignment(MF)) {
+    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+    if (isFixed) {
+      FrameReg = RegInfo->getFrameRegister(MF);
+      Offset = FPOffset;
+    } else if (MFI->hasVarSizedObjects()) {
+      assert(RegInfo->hasBasePointer(MF) &&
+             "VLAs and dynamic stack alignment, but missing base pointer!");
+      FrameReg = RegInfo->getBaseRegister();
+    }
+    return Offset;
+  }
+
+  // If there is a frame pointer, use it when we can.
+  if (hasFP(MF) && AFI->hasStackFrame()) {
+    // Use frame pointer to reference fixed objects. Use it for locals if
+    // there are VLAs (and thus the SP isn't reliable as a base).
+    if (isFixed || (MFI->hasVarSizedObjects() &&
+                    !RegInfo->hasBasePointer(MF))) {
+      FrameReg = RegInfo->getFrameRegister(MF);
+      return FPOffset;
+    } else if (MFI->hasVarSizedObjects()) {
+      assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+      if (AFI->isThumb2Function()) {
+        // Try to use the frame pointer if we can, else use the base pointer
+        // since it's available. This is handy for the emergency spill slot, in
+        // particular.
+        if (FPOffset >= -255 && FPOffset < 0) {
+          FrameReg = RegInfo->getFrameRegister(MF);
+          return FPOffset;
+        }
+      }
+    } else if (AFI->isThumb2Function()) {
+      // Use  add <rd>, sp, #<imm8> 
+      //      ldr <rd>, [sp, #<imm8>]
+      // if at all possible to save space.
+      if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+        return Offset;
+      // In Thumb2 mode, the negative offset is very limited. Try to avoid
+      // out of range references. ldr <rt>,[<rn>, #-<imm8>]
+      if (FPOffset >= -255 && FPOffset < 0) {
+        FrameReg = RegInfo->getFrameRegister(MF);
+        return FPOffset;
+      }
+    } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+      // Otherwise, use SP or FP, whichever is closer to the stack slot.
+      FrameReg = RegInfo->getFrameRegister(MF);
+      return FPOffset;
+    }
+  }
+  // Use the base pointer if we have one.
+  if (RegInfo->hasBasePointer(MF))
+    FrameReg = RegInfo->getBaseRegister();
+  return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                          int FI) const {
+  unsigned FrameReg;
+  return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    const std::vector<CalleeSavedInfo> &CSI,
+                                    unsigned StmOpc, unsigned StrOpc,
+                                    bool NoGap,
+                                    bool(*Func)(unsigned, bool),
+                                    unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  SmallVector<std::pair<unsigned,bool>, 4> Regs;
+  unsigned i = CSI.size();
+  while (i != 0) {
+    unsigned LastReg = 0;
+    for (; i != 0; --i) {
+      unsigned Reg = CSI[i-1].getReg();
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+      // Add the callee-saved register as live-in unless it's LR and
+      // @llvm.returnaddress is called. If LR is returned for
+      // @llvm.returnaddress then it's already added to the function and
+      // entry block live-in sets.
+      bool isKill = true;
+      if (Reg == ARM::LR) {
+        if (MF.getFrameInfo()->isReturnAddressTaken() &&
+            MF.getRegInfo().isLiveIn(Reg))
+          isKill = false;
+      }
+
+      if (isKill)
+        MBB.addLiveIn(Reg);
+
+      // If NoGap is true, push consecutive registers and then leave the rest
+      // for other instructions. e.g.
+      // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+      if (NoGap && LastReg && LastReg != Reg-1)
+        break;
+      LastReg = Reg;
+      Regs.push_back(std::make_pair(Reg, isKill));
+    }
+
+    if (Regs.empty())
+      continue;
+    if (Regs.size() > 1 || StrOpc== 0) {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+                       .addReg(ARM::SP).setMIFlags(MIFlags));
+      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+        MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+    } else if (Regs.size() == 1) {
+      MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+                                        ARM::SP)
+        .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+        .addReg(ARM::SP).setMIFlags(MIFlags);
+      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+      // that refactoring is complete (eventually).
+      if (StrOpc == ARM::STR_PRE) {
+        MIB.addReg(0);
+        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
+      } else
+        MIB.addImm(-4);
+      AddDefaultPred(MIB);
+    }
+    Regs.clear();
+  }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   unsigned LdmOpc, unsigned LdrOpc,
+                                   bool isVarArg, bool NoGap,
+                                   bool(*Func)(unsigned, bool)) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned RetOpcode = MI->getOpcode();
+  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+                     RetOpcode == ARM::TCRETURNdiND ||
+                     RetOpcode == ARM::TCRETURNri ||
+                     RetOpcode == ARM::TCRETURNriND);
+
+  SmallVector<unsigned, 4> Regs;
+  unsigned i = CSI.size();
+  while (i != 0) {
+    unsigned LastReg = 0;
+    bool DeleteRet = false;
+    for (; i != 0; --i) {
+      unsigned Reg = CSI[i-1].getReg();
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+        Reg = ARM::PC;
+        LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+        // Fold the return instruction into the LDM.
+        DeleteRet = true;
+      }
+
+      // If NoGap is true, pop consecutive registers and then leave the rest
+      // for other instructions. e.g.
+      // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+      if (NoGap && LastReg && LastReg != Reg-1)
+        break;
+
+      LastReg = Reg;
+      Regs.push_back(Reg);
+    }
+
+    if (Regs.empty())
+      continue;
+    if (Regs.size() > 1 || LdrOpc == 0) {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+                       .addReg(ARM::SP));
+      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+        MIB.addReg(Regs[i], getDefRegState(true));
+      if (DeleteRet)
+        MI->eraseFromParent();
+      MI = MIB;
+    } else if (Regs.size() == 1) {
+      // If we adjusted the reg to PC from LR above, switch it back here. We
+      // only do that for LDM.
+      if (Regs[0] == ARM::PC)
+        Regs[0] = ARM::LR;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+          .addReg(ARM::SP, RegState::Define)
+          .addReg(ARM::SP);
+      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+      // that refactoring is complete (eventually).
+      if (LdrOpc == ARM::LDR_POST) {
+        MIB.addReg(0);
+        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+      } else
+        MIB.addImm(4);
+      AddDefaultPred(MIB);
+    }
+    Regs.clear();
+  }
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+  unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+  unsigned FltOpc = ARM::VSTMDDB_UPD;
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+               MachineInstr::FrameSetup);
+
+  return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+
+  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+  unsigned FltOpc = ARM::VLDMDIA_UPD;
+  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+              &isARMArea2Register);
+  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+              &isARMArea1Register);
+
+  return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+                                       const ARMBaseInstrInfo &TII) {
+  unsigned FnSize = 0;
+  for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    const MachineBasicBlock &MBB = *MBBI;
+    for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+         I != E; ++I)
+      FnSize += TII.GetInstSizeInBytes(I);
+  }
+  return FnSize;
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+/// FIXME: Make generic?
+static unsigned estimateStackSize(MachineFunction &MF) {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  int Offset = 0;
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -MFI->getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    Offset += MFI->getObjectSize(i);
+    unsigned Align = MFI->getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += MFI->getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+  return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+                                         const TargetFrameLowering *TFI) {
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned Limit = (1 << 12) - 1;
+  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (!I->getOperand(i).isFI()) continue;
+
+        // When using ADDri to get the address of a stack object, 255 is the
+        // largest offset guaranteed to fit in the immediate offset.
+        if (I->getOpcode() == ARM::ADDri) {
+          Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        }
+
+        // Otherwise check the addressing mode.
+        switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+        case ARMII::AddrMode3:
+        case ARMII::AddrModeT2_i8:
+          Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode5:
+        case ARMII::AddrModeT2_i8s4:
+          Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+          break;
+        case ARMII::AddrModeT2_i12:
+          // i12 supports only positive offset so these will be converted to
+          // i8 opcodes. See llvm::rewriteT2FrameIndex.
+          if (TFI->hasFP(MF) && AFI->hasStackFrame())
+            Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode4:
+        case ARMII::AddrMode6:
+          // Addressing modes 4 & 6 (load/store) instructions can't encode an
+          // immediate offset for stack references.
+          return 0;
+        default:
+          break;
+        }
+        break; // At most one FI per instruction
+      }
+    }
+  }
+
+  return Limit;
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  // This tells PEI to spill the FP as if it is any other callee-save register
+  // to take advantage the eliminateFrameIndex machinery. This also ensures it
+  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+  // to combine multiple loads / stores.
+  bool CanEliminateFrame = true;
+  bool CS1Spilled = false;
+  bool LRSpilled = false;
+  unsigned NumGPRSpills = 0;
+  SmallVector<unsigned, 4> UnspilledCS1GPRs;
+  SmallVector<unsigned, 4> UnspilledCS2GPRs;
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+  // since it's not always possible to restore sp from fp in a single
+  // instruction.
+  // FIXME: It will be better just to find spare register here.
+  if (AFI->isThumb2Function() &&
+      (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+    MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
+  if (AFI->isThumb1OnlyFunction()) {
+    // Spill LR if Thumb1 function uses variable length argument lists.
+    if (AFI->getVarArgsRegSaveSize() > 0)
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
+    // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+    // for sure what the stack size will be, but for this, an estimate is good
+    // enough. If there anything changes it, it'll be a spill, which implies
+    // we've used all the registers and so R4 is already used, so not marking
+    // it here will be OK.
+    // FIXME: It will be better just to find spare register here.
+    unsigned StackSize = estimateStackSize(MF);
+    if (MFI->hasVarSizedObjects() || StackSize > 508)
+      MF.getRegInfo().setPhysRegUsed(ARM::R4);
+  }
+
+  // Spill the BasePtr if it's used.
+  if (RegInfo->hasBasePointer(MF))
+    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+
+  // Don't spill FP if the frame can be eliminated. This is determined
+  // by scanning the callee-save registers to see if any is used.
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    bool Spilled = false;
+    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+      Spilled = true;
+      CanEliminateFrame = false;
+    } else {
+      // Check alias registers too.
+      for (const unsigned *Aliases =
+             RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
+        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+          Spilled = true;
+          CanEliminateFrame = false;
+        }
+      }
+    }
+
+    if (!ARM::GPRRegisterClass->contains(Reg))
+      continue;
+
+    if (Spilled) {
+      NumGPRSpills++;
+
+      if (!STI.isTargetDarwin()) {
+        if (Reg == ARM::LR)
+          LRSpilled = true;
+        CS1Spilled = true;
+        continue;
+      }
+
+      // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+      switch (Reg) {
+      case ARM::LR:
+        LRSpilled = true;
+        // Fallthrough
+      case ARM::R4: case ARM::R5:
+      case ARM::R6: case ARM::R7:
+        CS1Spilled = true;
+        break;
+      default:
+        break;
+      }
+    } else {
+      if (!STI.isTargetDarwin()) {
+        UnspilledCS1GPRs.push_back(Reg);
+        continue;
+      }
+
+      switch (Reg) {
+      case ARM::R4: case ARM::R5:
+      case ARM::R6: case ARM::R7:
+      case ARM::LR:
+        UnspilledCS1GPRs.push_back(Reg);
+        break;
+      default:
+        UnspilledCS2GPRs.push_back(Reg);
+        break;
+      }
+    }
+  }
+
+  bool ForceLRSpill = false;
+  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+    unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+    // Force LR to be spilled if the Thumb function size is > 2048. This enables
+    // use of BL to implement far jump. If it turns out that it's not needed
+    // then the branch fix up path will undo it.
+    if (FnSize >= (1 << 11)) {
+      CanEliminateFrame = false;
+      ForceLRSpill = true;
+    }
+  }
+
+  // If any of the stack slot references may be out of range of an immediate
+  // offset, make sure a register (or a spill slot) is available for the
+  // register scavenger. Note that if we're indexing off the frame pointer, the
+  // effective stack size is 4 bytes larger since the FP points to the stack
+  // slot of the previous FP. Also, if we have variable sized objects in the
+  // function, stack slot references will often be negative, and some of
+  // our instructions are positive-offset only, so conservatively consider
+  // that case to want a spill slot (or register) as well. Similarly, if
+  // the function adjusts the stack pointer during execution and the
+  // adjustments aren't already part of our stack size estimate, our offset
+  // calculations may be off, so be conservative.
+  // FIXME: We could add logic to be more precise about negative offsets
+  //        and which instructions will need a scratch register for them. Is it
+  //        worth the effort and added fragility?
+  bool BigStack =
+    (RS &&
+     (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+      estimateRSStackSizeLimit(MF, this)))
+    || MFI->hasVarSizedObjects()
+    || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+  bool ExtraCSSpill = false;
+  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+    AFI->setHasStackFrame(true);
+
+    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+    if (!LRSpilled && CS1Spilled) {
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+      NumGPRSpills++;
+      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+      ForceLRSpill = false;
+      ExtraCSSpill = true;
+    }
+
+    if (hasFP(MF)) {
+      MF.getRegInfo().setPhysRegUsed(FramePtr);
+      NumGPRSpills++;
+    }
+
+    // If stack and double are 8-byte aligned and we are spilling an odd number
+    // of GPRs, spill one extra callee save GPR so we won't have to pad between
+    // the integer and double callee save areas.
+    unsigned TargetAlign = getStackAlignment();
+    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+          unsigned Reg = UnspilledCS1GPRs[i];
+          // Don't spill high register if the function is thumb1
+          if (!AFI->isThumb1OnlyFunction() ||
+              isARMLowRegister(Reg) || Reg == ARM::LR) {
+            MF.getRegInfo().setPhysRegUsed(Reg);
+            if (!RegInfo->isReservedReg(MF, Reg))
+              ExtraCSSpill = true;
+            break;
+          }
+        }
+      } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+        unsigned Reg = UnspilledCS2GPRs.front();
+        MF.getRegInfo().setPhysRegUsed(Reg);
+        if (!RegInfo->isReservedReg(MF, Reg))
+          ExtraCSSpill = true;
+      }
+    }
+
+    // Estimate if we might need to scavenge a register at some point in order
+    // to materialize a stack offset. If so, either spill one additional
+    // callee-saved register or reserve a special spill slot to facilitate
+    // register scavenging. Thumb1 needs a spill slot for stack pointer
+    // adjustments also, even when the frame itself is small.
+    if (BigStack && !ExtraCSSpill) {
+      // If any non-reserved CS register isn't spilled, just spill one or two
+      // extra. That should take care of it!
+      unsigned NumExtras = TargetAlign / 4;
+      SmallVector<unsigned, 2> Extras;
+      while (NumExtras && !UnspilledCS1GPRs.empty()) {
+        unsigned Reg = UnspilledCS1GPRs.back();
+        UnspilledCS1GPRs.pop_back();
+        if (!RegInfo->isReservedReg(MF, Reg) &&
+            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+             Reg == ARM::LR)) {
+          Extras.push_back(Reg);
+          NumExtras--;
+        }
+      }
+      // For non-Thumb1 functions, also check for hi-reg CS registers
+      if (!AFI->isThumb1OnlyFunction()) {
+        while (NumExtras && !UnspilledCS2GPRs.empty()) {
+          unsigned Reg = UnspilledCS2GPRs.back();
+          UnspilledCS2GPRs.pop_back();
+          if (!RegInfo->isReservedReg(MF, Reg)) {
+            Extras.push_back(Reg);
+            NumExtras--;
+          }
+        }
+      }
+      if (Extras.size() && NumExtras == 0) {
+        for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+          MF.getRegInfo().setPhysRegUsed(Extras[i]);
+        }
+      } else if (!AFI->isThumb1OnlyFunction()) {
+        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
+        // closest to SP or frame pointer.
+        const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                           RC->getAlignment(),
+                                                           false));
+      }
+    }
+  }
+
+  if (ForceLRSpill) {
+    MF.getRegInfo().setPhysRegUsed(ARM::LR);
+    AFI->setLRIsSpilledForFarJump(true);
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 000000000000..61bb8afa40f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,76 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+  const ARMSubtarget &STI;
+
+public:
+  explicit ARMFrameLowering(const ARMSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+  bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const;
+  int ResolveFrameIndexReference(const MachineFunction &MF,
+                                 int FI,
+                                 unsigned &FrameReg, int SPAdj) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+
+ private:
+  void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                    const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+                    unsigned StrOpc, bool NoGap,
+                    bool(*Func)(unsigned, bool),
+                    unsigned MIFlags = 0) const;
+  void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+                   unsigned LdrOpc, bool isVarArg, bool NoGap,
+                   bool(*Func)(unsigned, bool)) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 000000000000..8d77b2d8383e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,219 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging  --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at 
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+//    foo[i] = bar[i] * baz[i];
+// }
+//
+//  On ARM the addresses of 3 arrays should be kept in the registers, thus
+//  this code has quite large register pressure (loop body):
+//
+//  ldr     r1, [r5], #4
+//  ldr     r2, [r6], #4
+//  mul     r1, r2, r1
+//  str     r1, [r0], #4
+//
+//  Pass converts the code to something like:
+//
+//  static struct {
+//    int foo[N];
+//    int bar[N];
+//    int baz[N];
+//  } merged;
+//
+//  for (i = 0; i < N; ++i) {
+//    merged.foo[i] = merged.bar[i] * merged.baz[i];
+//  }
+//
+//  and in ARM code this becomes:
+//
+//  ldr     r0, [r5, #40]
+//  ldr     r1, [r5, #80]
+//  mul     r0, r1, r0
+//  str     r0, [r5], #4
+//
+//  note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+namespace {
+  class ARMGlobalMerge : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// target type sizes.
+    const TargetLowering *TLI;
+
+    bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                 Module &M, bool isConst) const;
+
+  public:
+    static char ID;             // Pass identification, replacement for typeid.
+    explicit ARMGlobalMerge(const TargetLowering *tli)
+      : FunctionPass(ID), TLI(tli) {}
+
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+    const char *getPassName() const {
+      return "Merge internal globals";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+    struct GlobalCmp {
+      const TargetData *TD;
+
+      GlobalCmp(const TargetData *td) : TD(td) { }
+
+      bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+        const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+        const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+        return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+      }
+    };
+  };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                             Module &M, bool isConst) const {
+  const TargetData *TD = TLI->getTargetData();
+
+  // FIXME: Infer the maximum possible offset depending on the actual users
+  // (these max offsets are different for the users inside Thumb or ARM
+  // functions)
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+  // FIXME: Find better heuristics
+  std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+  for (size_t i = 0, e = Globals.size(); i != e; ) {
+    size_t j = 0;
+    uint64_t MergedSize = 0;
+    std::vector<Type*> Tys;
+    std::vector<Constant*> Inits;
+    for (j = i; j != e; ++j) {
+      Type *Ty = Globals[j]->getType()->getElementType();
+      MergedSize += TD->getTypeAllocSize(Ty);
+      if (MergedSize > MaxOffset) {
+        break;
+      }
+      Tys.push_back(Ty);
+      Inits.push_back(Globals[j]->getInitializer());
+    }
+
+    StructType *MergedTy = StructType::get(M.getContext(), Tys);
+    Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+    GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+                                                  GlobalValue::InternalLinkage,
+                                                  MergedInit, "_MergedGlobals");
+    for (size_t k = i; k < j; ++k) {
+      Constant *Idx[2] = {
+        ConstantInt::get(Int32Ty, 0),
+        ConstantInt::get(Int32Ty, k-i)
+      };
+      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
+      Globals[k]->replaceAllUsesWith(GEP);
+      Globals[k]->eraseFromParent();
+    }
+    i = j;
+  }
+
+  return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module &M) {
+  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+  const TargetData *TD = TLI->getTargetData();
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+  bool Changed = false;
+
+  // Grab all non-const globals.
+  for (Module::global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    // Merge is safe for "normal" internal globals only
+    if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+      continue;
+
+    // Ignore fancy-aligned globals for now.
+    unsigned Alignment = I->getAlignment();
+    const Type *Ty = I->getType()->getElementType();
+    if (Alignment > TD->getABITypeAlignment(Ty))
+      continue;
+
+    // Ignore all 'special' globals.
+    if (I->getName().startswith("llvm.") ||
+        I->getName().startswith(".llvm."))
+      continue;
+
+    if (TD->getTypeAllocSize(Ty) < MaxOffset) {
+      const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+      if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+        BSSGlobals.push_back(I);
+      else if (I->isConstant())
+        ConstGlobals.push_back(I);
+      else
+        Globals.push_back(I);
+    }
+  }
+
+  if (Globals.size() > 1)
+    Changed |= doMerge(Globals, M, false);
+  if (BSSGlobals.size() > 1)
+    Changed |= doMerge(BSSGlobals, M, false);
+
+  // FIXME: This currently breaks the EH processing due to way how the 
+  // typeinfo detection works. We might want to detect the TIs and ignore 
+  // them in the future.
+  // if (ConstGlobals.size() > 1)
+  //  Changed |= doMerge(ConstGlobals, M, true);
+
+  return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function &F) {
+  return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+  return new ARMGlobalMerge(tli);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 000000000000..787f6a279187
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,120 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+                         const TargetRegisterInfo &TRI) {
+  // FIXME: Detect integer instructions properly.
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+  if (MCID.mayStore())
+    return false;
+  unsigned Opcode = MCID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+  return false;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+  MachineInstr *MI = SU->getInstr();
+
+  if (!MI->isDebugValue()) {
+    if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
+      return Hazard;
+
+    // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+    // a VMLA / VMLS will cause 4 cycle stall.
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+      MachineInstr *DefMI = LastMI;
+      const MCInstrDesc &LastMCID = LastMI->getDesc();
+      // Skip over one non-VFP / NEON instruction.
+      if (!LastMCID.isBarrier() &&
+          // On A9, AGU and NEON/FPU are muxed.
+          !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+          (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+        MachineBasicBlock::iterator I = LastMI;
+        if (I != LastMI->getParent()->begin()) {
+          I = llvm::prior(I);
+          DefMI = &*I;
+        }
+      }
+
+      if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+          (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+           hasRAWHazard(DefMI, MI, TRI))) {
+        // Try to schedule another instruction for the next 4 cycles.
+        if (FpMLxStalls == 0)
+          FpMLxStalls = 4;
+        return Hazard;
+      }
+    }
+  }
+
+  return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+  LastMI = 0;
+  FpMLxStalls = 0;
+  ITBlockSize = 0;
+  ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+  MachineInstr *MI = SU->getInstr();
+  unsigned Opcode = MI->getOpcode();
+  if (ITBlockSize) {
+    --ITBlockSize;
+  } else if (Opcode == ARM::t2IT) {
+    unsigned Mask = MI->getOperand(1).getImm();
+    unsigned NumTZ = CountTrailingZeros_32(Mask);
+    assert(NumTZ <= 3 && "Invalid IT mask!");
+    ITBlockSize = 4 - NumTZ;
+    MachineBasicBlock::iterator I = MI;
+    for (unsigned i = 0; i < ITBlockSize; ++i) {
+      // Advance to the next instruction, skipping any dbg_value instructions.
+      do {
+        ++I;
+      } while (I->isDebugValue());
+      ITBlockMIs[ITBlockSize-1-i] = &*I;
+    }
+  }
+
+  if (!MI->isDebugValue()) {
+    LastMI = MI;
+    FpMLxStalls = 0;
+  }
+
+  ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+  if (FpMLxStalls && --FpMLxStalls == 0)
+    // Stalled for 4 cycles but still can't schedule any other instructions.
+    LastMI = 0;
+  ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+  llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 000000000000..2bc218d8566b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,54 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+  const ARMBaseInstrInfo &TII;
+  const ARMBaseRegisterInfo &TRI;
+  const ARMSubtarget &STI;
+
+  MachineInstr *LastMI;
+  unsigned FpMLxStalls;
+  unsigned ITBlockSize;  // No. of MIs in current IT block yet to be scheduled.
+  MachineInstr *ITBlockMIs[4];
+
+public:
+  ARMHazardRecognizer(const InstrItineraryData *ItinData,
+                      const ARMBaseInstrInfo &tii,
+                      const ARMBaseRegisterInfo &tri,
+                      const ARMSubtarget &sti,
+                      const ScheduleDAG *DAG) :
+    ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+    TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void Reset();
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2c9481b86c55
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,3011 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMAddressingModes.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+  cl::desc("Disable isel of shifter-op"),
+  cl::init(false));
+
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+  cl::desc("Check fp vmla / vmls hazard at isel time"),
+  cl::init(true));
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+
+enum AddrMode2Type {
+  AM2_BASE, // Simple AM2 (+-imm12)
+  AM2_SHOP  // Shifter-op AM2
+};
+
+class ARMDAGToDAGISel : public SelectionDAGISel {
+  ARMBaseTargetMachine &TM;
+  const ARMBaseInstrInfo *TII;
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+
+public:
+  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
+                           CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(tm, OptLevel), TM(tm),
+      TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+      Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+  }
+
+  virtual const char *getPassName() const {
+    return "ARM Instruction Selection";
+  }
+
+  /// getI32Imm - Return a target constant of type i32 with the specified
+  /// value.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+
+  SDNode *Select(SDNode *N);
+
+
+  bool hasNoVMLxHazardUse(SDNode *N) const;
+  bool isShifterOpProfitable(const SDValue &Shift,
+                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+  bool SelectShifterOperandReg(SDValue N, SDValue &A,
+                               SDValue &B, SDValue &C,
+                               bool CheckProfitability = true);
+  bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+                                    SDValue &B, SDValue &C) {
+    // Don't apply the profitability check
+    return SelectShifterOperandReg(N, A, B, C, false);
+  }
+
+  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+                                      SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+                           SDValue &Opc) {
+    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+  }
+
+  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+                           SDValue &Opc) {
+    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+  }
+
+  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+                       SDValue &Opc) {
+    SelectAddrMode2Worker(N, Base, Offset, Opc);
+//    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+    // This always matches one way or another.
+    return true;
+  }
+
+  bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
+                             SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode3(SDValue N, SDValue &Base,
+                       SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
+                             SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode5(SDValue N, SDValue &Base,
+                       SDValue &Offset);
+  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
+
+  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+  // Thumb Addressing Modes:
+  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+                             unsigned Scale);
+  bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+                                SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+  // Thumb 2 Addressing Modes:
+  bool SelectT2ShifterOperandReg(SDValue N,
+                                 SDValue &BaseReg, SDValue &Opc);
+  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+                            SDValue &OffImm);
+  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+                                 SDValue &OffImm);
+  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
+                             SDValue &OffReg, SDValue &ShImm);
+
+  inline bool is_so_imm(unsigned Imm) const {
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  }
+
+  inline bool is_so_imm_not(unsigned Imm) const {
+    return ARM_AM::getSOImmVal(~Imm) != -1;
+  }
+
+  inline bool is_t2_so_imm(unsigned Imm) const {
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
+  }
+
+  inline bool is_t2_so_imm_not(unsigned Imm) const {
+    return ARM_AM::getT2SOImmVal(~Imm) != -1;
+  }
+
+  // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+  /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+  /// ARM.
+  SDNode *SelectARMIndexedLoad(SDNode *N);
+  SDNode *SelectT2IndexedLoad(SDNode *N);
+
+  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
+  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// loads of D registers and even subregs and odd subregs of Q registers.
+  /// For NumVecs <= 2, QOpcodes1 is not used.
+  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+                    unsigned *DOpcodes,
+                    unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+  /// SelectVST - Select NEON store intrinsics.  NumVecs should
+  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// stores of D registers and even subregs and odd subregs of Q registers.
+  /// For NumVecs <= 2, QOpcodes1 is not used.
+  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+                    unsigned *DOpcodes,
+                    unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
+  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// load/store of D registers and Q registers.
+  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+                          bool isUpdating, unsigned NumVecs,
+                          unsigned *DOpcodes, unsigned *QOpcodes);
+
+  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
+  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
+  /// for loading D registers.  (Q registers are not supported.)
+  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+                       unsigned *Opcodes);
+
+  /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
+  /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
+  /// generated to force the table registers to be consecutive.
+  SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
+  /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+
+  /// SelectCMOVOp - Select CMOV instructions for ARM.
+  SDNode *SelectCMOVOp(SDNode *N);
+  SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                              ARMCC::CondCodes CCVal, SDValue CCR,
+                              SDValue InFlag);
+  SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                               ARMCC::CondCodes CCVal, SDValue CCR,
+                               SDValue InFlag);
+  SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                              ARMCC::CondCodes CCVal, SDValue CCR,
+                              SDValue InFlag);
+  SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                               ARMCC::CondCodes CCVal, SDValue CCR,
+                               SDValue InFlag);
+
+  SDNode *SelectConcatVector(SDNode *N);
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+
+  // Form pairs of consecutive S, D, or Q registers.
+  SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
+  SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+  SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+  // Form sequences of 4 consecutive S, D, or Q registers.
+  SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+  SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+  SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+  // Get the alignment operand for a NEON VLD or VST instruction.
+  SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
+};
+}
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+    Imm = cast<ConstantSDNode>(N)->getZExtValue();
+    return true;
+  }
+  return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+  return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+  return N->getOpcode() == Opc &&
+         isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+                                    int RangeMin, int RangeMax,
+                                    int &ScaledConstant) {
+  assert(Scale && "Invalid scale!");
+
+  // Check that this is a constant.
+  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+  if (!C)
+    return false;
+
+  ScaledConstant = (int) C->getZExtValue();
+  if ((ScaledConstant % Scale) != 0)
+    return false;
+
+  ScaledConstant /= Scale;
+  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+  if (OptLevel == CodeGenOpt::None)
+    return true;
+
+  if (!CheckVMLxHazard)
+    return true;
+
+  if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+    return true;
+
+  if (!N->hasOneUse())
+    return false;
+
+  SDNode *Use = *N->use_begin();
+  if (Use->getOpcode() == ISD::CopyToReg)
+    return true;
+  if (Use->isMachineOpcode()) {
+    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
+    if (MCID.mayStore())
+      return true;
+    unsigned Opcode = MCID.getOpcode();
+    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+      return true;
+    // vmlx feeding into another vmlx. We actually want to unfold
+    // the use later in the MLxExpansion pass. e.g.
+    // vmla
+    // vmla (stall 8 cycles)
+    //
+    // vmul (5 cycles)
+    // vadd (5 cycles)
+    // vmla
+    // This adds up to about 18 - 19 cycles.
+    //
+    // vmla
+    // vmul (stall 4 cycles)
+    // vadd adds up to about 14 cycles.
+    return TII->isFpMLxInstruction(Opcode);
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+                                            ARM_AM::ShiftOpc ShOpcVal,
+                                            unsigned ShAmt) {
+  if (!Subtarget->isCortexA9())
+    return true;
+  if (Shift.hasOneUse())
+    return true;
+  // R << 2 is free.
+  return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
+                                              SDValue &BaseReg,
+                                              SDValue &ShReg,
+                                              SDValue &Opc,
+                                              bool CheckProfitability) {
+  if (DisableShifterOp)
+    return false;
+
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+    if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+      return false;
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &OffImm) {
+  // Match simple R + imm12 operands.
+
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    if (N.getOpcode() == ISD::FrameIndex) {
+      // Match frame index.
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      return true;
+    }
+
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        !(Subtarget->useMovt() &&
+                     N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    } else
+      Base = N;
+    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+      Base   = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  // Base only.
+  Base = N;
+  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
+                                      SDValue &Opc) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      // X * [3,5,9] -> X + X * [2,4,8] etc.
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC & 1) {
+        RHSC = RHSC & ~1;
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        if (isPowerOf2_32(RHSC)) {
+          unsigned ShAmt = Log2_32(RHSC);
+          Base = Offset = N.getOperand(0);
+          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+                                                            ARM_AM::lsl),
+                                          MVT::i32);
+          return true;
+        }
+      }
+    }
+  }
+
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      // ISD::OR that is equivalent to an ISD::ADD.
+      !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Leave simple R +/- imm12 operands for LDRi12
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
+      return false;
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse())
+    // Compute R +/- (R << N) and reuse it.
+    return false;
+
+  // Otherwise this is R +/- [possibly shifted] R.
+  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+  unsigned ShAmt = 0;
+
+  Base   = N.getOperand(0);
+  Offset = N.getOperand(1);
+
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh =
+           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  // Try matching (R shl C) + (R).
+  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+    if (ShOpcVal != ARM_AM::no_shift) {
+      // Check to see if the RHS of the shift is a constant, if not, we can't
+      // fold it.
+      if (ConstantSDNode *Sh =
+          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+        ShAmt = Sh->getZExtValue();
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
+      } else {
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+                                                     SDValue &Base,
+                                                     SDValue &Offset,
+                                                     SDValue &Opc) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      // X * [3,5,9] -> X + X * [2,4,8] etc.
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC & 1) {
+        RHSC = RHSC & ~1;
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        if (isPowerOf2_32(RHSC)) {
+          unsigned ShAmt = Log2_32(RHSC);
+          Base = Offset = N.getOperand(0);
+          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+                                                            ARM_AM::lsl),
+                                          MVT::i32);
+          return AM2_SHOP;
+        }
+      }
+    }
+  }
+
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      // ISD::OR that is equivalent to an ADD.
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+
+  // Match simple R +/- imm12 operands.
+  if (N.getOpcode() != ISD::SUB) {
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                                -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      Offset = CurDAG->getRegister(0, MVT::i32);
+
+      ARM_AM::AddrOpc AddSub = ARM_AM::add;
+      if (RHSC < 0) {
+        AddSub = ARM_AM::sub;
+        RHSC = - RHSC;
+      }
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+                                                        ARM_AM::no_shift),
+                                      MVT::i32);
+      return AM2_BASE;
+    }
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R +/- (R << N) and reuse it.
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+
+  // Otherwise this is R +/- [possibly shifted] R.
+  ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+  unsigned ShAmt = 0;
+
+  Base   = N.getOperand(0);
+  Offset = N.getOperand(1);
+
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh =
+           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  // Try matching (R shl C) + (R).
+  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+    if (ShOpcVal != ARM_AM::no_shift) {
+      // Check to see if the RHS of the shift is a constant, if not, we can't
+      // fold it.
+      if (ConstantSDNode *Sh =
+          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+        ShAmt = Sh->getZExtValue();
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
+      } else {
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return AM2_SHOP;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
+                                            SDValue &Offset, SDValue &Opc) {
+  unsigned Opcode = Op->getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  int Val;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return true;
+  }
+
+  Offset = N;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+  unsigned ShAmt = 0;
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+        Offset = N.getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+                                      SDValue &Base, SDValue &Offset,
+                                      SDValue &Opc) {
+  if (N.getOpcode() == ISD::SUB) {
+    // X - C  is canonicalize to X + -C, no need to handle it here.
+    Base = N.getOperand(0);
+    Offset = N.getOperand(1);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+    return true;
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+    return true;
+  }
+
+  // If the RHS is +/- imm8, fold into addr mode.
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                              -256 + 1, 256, RHSC)) { // 8 bits.
+    Base = N.getOperand(0);
+    if (Base.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (RHSC < 0) {
+      AddSub = ARM_AM::sub;
+      RHSC = -RHSC;
+    }
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+    return true;
+  }
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
+                                            SDValue &Offset, SDValue &Opc) {
+  unsigned Opcode = Op->getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  int Val;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+    return true;
+  }
+
+  Offset = N;
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+                                      SDValue &Base, SDValue &Offset) {
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                       MVT::i32);
+    return true;
+  }
+
+  // If the RHS is +/- imm8, fold into addr mode.
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+                              -256 + 1, 256, RHSC)) {
+    Base = N.getOperand(0);
+    if (Base.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (RHSC < 0) {
+      AddSub = ARM_AM::sub;
+      RHSC = -RHSC;
+    }
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+                                       MVT::i32);
+    return true;
+  }
+
+  Base = N;
+  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                     MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+                                      SDValue &Align) {
+  Addr = N;
+
+  unsigned Alignment = 0;
+  if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+    // The maximum alignment is equal to the memory size being referenced.
+    unsigned LSNAlign = LSN->getAlignment();
+    unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+    if (LSNAlign > MemSize && MemSize > 1)
+      Alignment = MemSize;
+  } else {
+    // All other uses of addrmode6 are for intrinsics.  For now just record
+    // the raw alignment value; it will be refined later based on the legal
+    // alignment operands for the intrinsic.
+    Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+  }
+
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+                                            SDValue &Offset) {
+  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+  if (AM != ISD::POST_INC)
+    return false;
+  Offset = N;
+  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+      Offset = CurDAG->getRegister(0, MVT::i32);
+  }
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
+                                       SDValue &Offset, SDValue &Label) {
+  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+    Offset = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                                      MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
+                                            SDValue &Base, SDValue &Offset){
+  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
+    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+    if (!NC || !NC->isNullValue())
+      return false;
+
+    Base = Offset = N;
+    return true;
+  }
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+                                       SDValue &Offset, unsigned Scale) {
+  if (Scale == 4) {
+    SDValue TmpBase, TmpOffImm;
+    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+      return false;  // We want to select tLDRspi / tSTRspi instead.
+
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+      return false;  // We want to select tLDRpci instead.
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Thumb does not have [sp, r] address mode.
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+  if ((LHSR && LHSR->getReg() == ARM::SP) ||
+      (RHSR && RHSR->getReg() == ARM::SP))
+    return false;
+
+  // FIXME: Why do we explicitly check for a match here and then return false?
+  // Presumably to allow something else to match, but shouldn't this be
+  // documented?
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+    return false;
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+                                          SDValue &Base, SDValue &OffImm) {
+  if (Scale == 4) {
+    SDValue TmpBase, TmpOffImm;
+    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+      return false;  // We want to select tLDRspi / tSTRspi instead.
+
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+      return false;  // We want to select tLDRpci instead.
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        !(Subtarget->useMovt() &&
+          N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    } else {
+      Base = N;
+    }
+
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+  if ((LHSR && LHSR->getReg() == ARM::SP) ||
+      (RHSR && RHSR->getReg() == ARM::SP)) {
+    ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+    ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+    unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+    // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+    if (LHSC != 0 || RHSC != 0) return false;
+
+    Base = N;
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // If the RHS is + imm5 * scale, fold into addr mode.
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+    Base = N.getOperand(0);
+    OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+    return true;
+  }
+
+  Base = N.getOperand(0);
+  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+                                            SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+      (LHSR && LHSR->getReg() == ARM::SP)) {
+    // If the RHS is + imm8 * scale, fold into addr mode.
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
+                                                SDValue &Opc) {
+  if (DisableShifterOp)
+    return false;
+
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShImmVal = RHS->getZExtValue() & 31;
+    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
+                                            SDValue &Base, SDValue &OffImm) {
+  // Match simple R + imm12 operands.
+
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    if (N.getOpcode() == ISD::FrameIndex) {
+      // Match frame index.
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      return true;
+    }
+
+    if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::TargetConstantPool)
+        return false;  // We want to select t2LDRpci instead.
+    } else
+      Base = N;
+    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    if (SelectT2AddrModeImm8(N, Base, OffImm))
+      // Let t2LDRi8 handle (R - imm8).
+      return false;
+
+    int RHSC = (int)RHS->getZExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+      Base   = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  // Base only.
+  Base = N;
+  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
+                                           SDValue &Base, SDValue &OffImm) {
+  // Match simple R - imm8 operands.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getSExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+                                                 SDValue &OffImm){
+  unsigned Opcode = Op->getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  int RHSC;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+      ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+      : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
+                                            SDValue &Base,
+                                            SDValue &OffReg, SDValue &ShImm) {
+  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+      return false;
+    else if (RHSC < 0 && RHSC >= -255) // 8 bits
+      return false;
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R + (R << [1,2,3]) and reuse it.
+    Base = N;
+    return false;
+  }
+
+  // Look for (R + R) or (R + (R << [1,2,3])).
+  unsigned ShAmt = 0;
+  Base   = N.getOperand(0);
+  OffReg = N.getOperand(1);
+
+  // Swap if it is ((R << c) + R).
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+  if (ShOpcVal != ARM_AM::lsl) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+    if (ShOpcVal == ARM_AM::lsl)
+      std::swap(Base, OffReg);
+  }
+
+  if (ShOpcVal == ARM_AM::lsl) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+        OffReg = OffReg.getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+  return true;
+}
+
+//===--------------------------------------------------------------------===//
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Offset, AMOpc;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (LoadedVT == MVT::i32 &&
+      SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+    Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+    Match = true;
+  } else if (LoadedVT == MVT::i16 &&
+             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+    Match = true;
+    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+    if (LD->getExtensionType() == ISD::SEXTLOAD) {
+      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+      }
+    } else {
+      if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+      }
+    }
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+                                  MVT::Other, Ops, 6);
+  }
+
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  EVT LoadedVT = LD->getMemoryVT();
+  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+  SDValue Offset;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
+    switch (LoadedVT.getSimpleVT().SimpleTy) {
+    case MVT::i32:
+      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+      break;
+    case MVT::i16:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+      break;
+    case MVT::i8:
+    case MVT::i1:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+      break;
+    default:
+      return NULL;
+    }
+    Match = true;
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+                                  MVT::Other, Ops, 5);
+  }
+
+  return NULL;
+}
+
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass =
+    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
+SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass =
+    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+}
+
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction.  The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+                                       bool is64BitVector) {
+  unsigned NumRegs = NumVecs;
+  if (!is64BitVector && NumVecs < 3)
+    NumRegs *= 2;
+
+  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+  if (Alignment >= 32 && NumRegs == 4)
+    Alignment = 32;
+  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+    Alignment = 16;
+  else if (Alignment >= 8)
+    Alignment = 8;
+  else
+    Alignment = 0;
+
+  return CurDAG->getTargetConstant(Alignment, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+                                   unsigned *DOpcodes, unsigned *QOpcodes0,
+                                   unsigned *QOpcodes1) {
+  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  bool is64BitVector = VT.is64BitVector();
+  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  case MVT::v1i64: OpcodeIndex = 3; break;
+    // Quad-register operations:
+  case MVT::v16i8: OpcodeIndex = 0; break;
+  case MVT::v8i16: OpcodeIndex = 1; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 2; break;
+  case MVT::v2i64: OpcodeIndex = 3;
+    assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+    break;
+  }
+
+  EVT ResTy;
+  if (NumVecs == 1)
+    ResTy = VT;
+  else {
+    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+    if (!is64BitVector)
+      ResTyElts *= 2;
+    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+  }
+  std::vector<EVT> ResTys;
+  ResTys.push_back(ResTy);
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+  SDNode *VLd;
+  SmallVector<SDValue, 7> Ops;
+
+  // Double registers and VLD1/VLD2 quad registers are directly supported.
+  if (is64BitVector || NumVecs <= 2) {
+    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                    QOpcodes0[OpcodeIndex]);
+    Ops.push_back(MemAddr);
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+    }
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+  } else {
+    // Otherwise, quad registers are loaded with two separate instructions,
+    // where one loads the even registers and the other loads the odd registers.
+    EVT AddrTy = MemAddr.getValueType();
+
+    // Load the even subregs.  This is always an updating load, so that it
+    // provides the address to the second load for the odd subregs.
+    SDValue ImplDef =
+      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+                                          ResTy, AddrTy, MVT::Other, OpsA, 7);
+    Chain = SDValue(VLdA, 2);
+
+    // Load the odd subregs.
+    Ops.push_back(SDValue(VLdA, 1));
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      assert(isa<ConstantSDNode>(Inc.getNode()) &&
+             "only constant post-increment update allowed for VLD3/4");
+      (void)Inc;
+      Ops.push_back(Reg0);
+    }
+    Ops.push_back(SDValue(VLdA, 0));
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+                                 Ops.data(), Ops.size());
+  }
+
+  // Transfer memoperands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+  if (NumVecs == 1)
+    return VLd;
+
+  // Extract out the subregisters.
+  SDValue SuperReg = SDValue(VLd, 0);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+                                   unsigned *DOpcodes, unsigned *QOpcodes0,
+                                   unsigned *QOpcodes1) {
+  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getOperand(Vec0Idx).getValueType();
+  bool is64BitVector = VT.is64BitVector();
+  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vst type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  case MVT::v1i64: OpcodeIndex = 3; break;
+    // Quad-register operations:
+  case MVT::v16i8: OpcodeIndex = 0; break;
+  case MVT::v8i16: OpcodeIndex = 1; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 2; break;
+  case MVT::v2i64: OpcodeIndex = 3;
+    assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+    break;
+  }
+
+  std::vector<EVT> ResTys;
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+  SmallVector<SDValue, 7> Ops;
+
+  // Double registers and VST1/VST2 quad registers are directly supported.
+  if (is64BitVector || NumVecs <= 2) {
+    SDValue SrcReg;
+    if (NumVecs == 1) {
+      SrcReg = N->getOperand(Vec0Idx);
+    } else if (is64BitVector) {
+      // Form a REG_SEQUENCE to force register allocation.
+      SDValue V0 = N->getOperand(Vec0Idx + 0);
+      SDValue V1 = N->getOperand(Vec0Idx + 1);
+      if (NumVecs == 2)
+        SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+      else {
+        SDValue V2 = N->getOperand(Vec0Idx + 2);
+        // If it's a vst3, form a quad D-register and leave the last part as
+        // an undef.
+        SDValue V3 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+          : N->getOperand(Vec0Idx + 3);
+        SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+      }
+    } else {
+      // Form a QQ register.
+      SDValue Q0 = N->getOperand(Vec0Idx);
+      SDValue Q1 = N->getOperand(Vec0Idx + 1);
+      SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+    }
+
+    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                    QOpcodes0[OpcodeIndex]);
+    Ops.push_back(MemAddr);
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+    }
+    Ops.push_back(SrcReg);
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    SDNode *VSt =
+      CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+    // Transfer memoperands.
+    cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+    return VSt;
+  }
+
+  // Otherwise, quad registers are stored with two separate instructions,
+  // where one stores the even registers and the other stores the odd registers.
+
+  // Form the QQQQ REG_SEQUENCE.
+  SDValue V0 = N->getOperand(Vec0Idx + 0);
+  SDValue V1 = N->getOperand(Vec0Idx + 1);
+  SDValue V2 = N->getOperand(Vec0Idx + 2);
+  SDValue V3 = (NumVecs == 3)
+    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+    : N->getOperand(Vec0Idx + 3);
+  SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+
+  // Store the even D registers.  This is always an updating store, so that it
+  // provides the address to the second store for the odd subregs.
+  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+                                        MemAddr.getValueType(),
+                                        MVT::Other, OpsA, 7);
+  cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+  Chain = SDValue(VStA, 1);
+
+  // Store the odd D registers.
+  Ops.push_back(SDValue(VStA, 0));
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(AddrOpIdx + 1);
+    assert(isa<ConstantSDNode>(Inc.getNode()) &&
+           "only constant post-increment update allowed for VST3/4");
+    (void)Inc;
+    Ops.push_back(Reg0);
+  }
+  Ops.push_back(RegSeq);
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
+  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+                                        Ops.data(), Ops.size());
+  cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+  return VStB;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+                                         bool isUpdating, unsigned NumVecs,
+                                         unsigned *DOpcodes,
+                                         unsigned *QOpcodes) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+  SDValue Chain = N->getOperand(0);
+  unsigned Lane =
+    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+  EVT VT = N->getOperand(Vec0Idx).getValueType();
+  bool is64BitVector = VT.is64BitVector();
+
+  unsigned Alignment = 0;
+  if (NumVecs != 3) {
+    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    if (Alignment > NumBytes)
+      Alignment = NumBytes;
+    if (Alignment < 8 && Alignment < NumBytes)
+      Alignment = 0;
+    // Alignment must be a power of two; make sure of that.
+    Alignment = (Alignment & -Alignment);
+    if (Alignment == 1)
+      Alignment = 0;
+  }
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld/vst lane type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+    // Quad-register operations:
+  case MVT::v8i16: OpcodeIndex = 0; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 1; break;
+  }
+
+  std::vector<EVT> ResTys;
+  if (IsLoad) {
+    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+    if (!is64BitVector)
+      ResTyElts *= 2;
+    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+                                      MVT::i64, ResTyElts));
+  }
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(MemAddr);
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(AddrOpIdx + 1);
+    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+  }
+
+  SDValue SuperReg;
+  SDValue V0 = N->getOperand(Vec0Idx + 0);
+  SDValue V1 = N->getOperand(Vec0Idx + 1);
+  if (NumVecs == 2) {
+    if (is64BitVector)
+      SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+    else
+      SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+  } else {
+    SDValue V2 = N->getOperand(Vec0Idx + 2);
+    SDValue V3 = (NumVecs == 3)
+      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+      : N->getOperand(Vec0Idx + 3);
+    if (is64BitVector)
+      SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+    else
+      SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+  }
+  Ops.push_back(SuperReg);
+  Ops.push_back(getI32Imm(Lane));
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
+
+  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                                  QOpcodes[OpcodeIndex]);
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+                                         Ops.data(), Ops.size());
+  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+  if (!IsLoad)
+    return VLdLn;
+
+  // Extract the subregisters.
+  SuperReg = SDValue(VLdLn, 0);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+                                      unsigned NumVecs, unsigned *Opcodes) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  unsigned Alignment = 0;
+  if (NumVecs != 3) {
+    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    if (Alignment > NumBytes)
+      Alignment = NumBytes;
+    if (Alignment < 8 && Alignment < NumBytes)
+      Alignment = 0;
+    // Alignment must be a power of two; make sure of that.
+    Alignment = (Alignment & -Alignment);
+    if (Alignment == 1)
+      Alignment = 0;
+  }
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld-dup type");
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  }
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+  SDValue SuperReg;
+  unsigned Opc = Opcodes[OpcodeIndex];
+  SmallVector<SDValue, 6> Ops;
+  Ops.push_back(MemAddr);
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(2);
+    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+  }
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
+
+  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+  std::vector<EVT> ResTys;
+  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+  SDNode *VLdDup =
+    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+  SuperReg = SDValue(VLdDup, 0);
+
+  // Extract the subregisters.
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  unsigned SubIdx = ARM::dsub_0;
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+                                    unsigned Opc) {
+  assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  unsigned FirstTblReg = IsExt ? 2 : 1;
+
+  // Form a REG_SEQUENCE to force register allocation.
+  SDValue RegSeq;
+  SDValue V0 = N->getOperand(FirstTblReg + 0);
+  SDValue V1 = N->getOperand(FirstTblReg + 1);
+  if (NumVecs == 2)
+    RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+  else {
+    SDValue V2 = N->getOperand(FirstTblReg + 2);
+    // If it's a vtbl3, form a quad D-register and leave the last part as
+    // an undef.
+    SDValue V3 = (NumVecs == 3)
+      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+      : N->getOperand(FirstTblReg + 3);
+    RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+  }
+
+  SmallVector<SDValue, 6> Ops;
+  if (IsExt)
+    Ops.push_back(N->getOperand(1));
+  Ops.push_back(RegSeq);
+  Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+  Ops.push_back(getAL(CurDAG)); // predicate
+  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+  return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
+                                                     bool isSigned) {
+  if (!Subtarget->hasV6T2Ops())
+    return NULL;
+
+  unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+  // For unsigned extracts, check for a shift right and mask
+  unsigned And_imm = 0;
+  if (N->getOpcode() == ISD::AND) {
+    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+      if (And_imm & (And_imm + 1))
+        return NULL;
+
+      unsigned Srl_imm = 0;
+      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+                                Srl_imm)) {
+        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+        unsigned Width = CountTrailingOnes_32(And_imm);
+        unsigned LSB = Srl_imm;
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          CurDAG->getTargetConstant(LSB, MVT::i32),
+                          CurDAG->getTargetConstant(Width, MVT::i32),
+          getAL(CurDAG), Reg0 };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+      }
+    }
+    return NULL;
+  }
+
+  // Otherwise, we're looking for a shift of a shift
+  unsigned Shl_imm = 0;
+  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+    unsigned Srl_imm = 0;
+    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
+      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+      unsigned Width = 32 - Srl_imm;
+      int LSB = Srl_imm - Shl_imm;
+      if (LSB < 0)
+        return NULL;
+      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+      SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                        CurDAG->getTargetConstant(LSB, MVT::i32),
+                        CurDAG->getTargetConstant(Width, MVT::i32),
+                        getAL(CurDAG), Reg0 };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+    }
+  }
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  SDValue CPTmp0;
+  SDValue CPTmp1;
+  if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+    unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+    unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+    unsigned Opc = 0;
+    switch (SOShOp) {
+    case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+    case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+    case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+    case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+    default:
+      llvm_unreachable("Unknown so_reg opcode!");
+      break;
+    }
+    SDValue SOShImm =
+      CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  SDValue CPTmp0;
+  SDValue CPTmp1;
+  SDValue CPTmp2;
+  if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                  ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+  if (!T)
+    return 0;
+
+  unsigned Opc = 0;
+  unsigned TrueImm = T->getZExtValue();
+  if (is_t2_so_imm(TrueImm)) {
+    Opc = ARM::t2MOVCCi;
+  } else if (TrueImm <= 0xffff) {
+    Opc = ARM::t2MOVCCi16;
+  } else if (is_t2_so_imm_not(TrueImm)) {
+    TrueImm = ~TrueImm;
+    Opc = ARM::t2MVNCCi;
+  } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+    // Large immediate.
+    Opc = ARM::t2MOVCCi32imm;
+  }
+
+  if (Opc) {
+    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+  }
+
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                   ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+  if (!T)
+    return 0;
+
+  unsigned Opc = 0;
+  unsigned TrueImm = T->getZExtValue();
+  bool isSoImm = is_so_imm(TrueImm);
+  if (isSoImm) {
+    Opc = ARM::MOVCCi;
+  } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+    Opc = ARM::MOVCCi16;
+  } else if (is_so_imm_not(TrueImm)) {
+    TrueImm = ~TrueImm;
+    Opc = ARM::MVNCCi;
+  } else if (TrueVal.getNode()->hasOneUse() &&
+             (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+    // Large immediate.
+    Opc = ARM::MOVCCi32imm;
+  }
+
+  if (Opc) {
+    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+  }
+
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue FalseVal = N->getOperand(0);
+  SDValue TrueVal  = N->getOperand(1);
+  SDValue CC = N->getOperand(2);
+  SDValue CCR = N->getOperand(3);
+  SDValue InFlag = N->getOperand(4);
+  assert(CC.getOpcode() == ISD::Constant);
+  assert(CCR.getOpcode() == ISD::Register);
+  ARMCC::CondCodes CCVal =
+    (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+  if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+    // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Pattern complexity = 18  cost = 1  size = 0
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+    SDValue CPTmp2;
+    if (Subtarget->isThumb()) {
+      SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
+                                        CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    } else {
+      SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
+                                         CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    }
+
+    // Pattern: (ARMcmov:i32 GPR:i32:$false,
+    //             (imm:i32)<<P:Pred_so_imm>>:$true,
+    //             (imm:i32):$cc)
+    // Emits: (MOVCCi:i32 GPR:i32:$false,
+    //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+    // Pattern complexity = 10  cost = 1  size = 0
+    if (Subtarget->isThumb()) {
+      SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
+                                        CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    } else {
+      SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
+                                         CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    }
+  }
+
+  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Pattern complexity = 6  cost = 1  size = 0
+  //
+  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Pattern complexity = 6  cost = 11  size = 0
+  //
+  // Also VMOVScc and VMOVDcc.
+  SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+  SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+  unsigned Opc = 0;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: assert(false && "Illegal conditional move type!");
+    break;
+  case MVT::i32:
+    Opc = Subtarget->isThumb()
+      ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+      : ARM::MOVCCr;
+    break;
+  case MVT::f32:
+    Opc = ARM::VMOVScc;
+    break;
+  case MVT::f64:
+    Opc = ARM::VMOVDcc;
+    break;
+  }
+  return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
+}
+
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+  // The only time a CONCAT_VECTORS operation can have legal types is when
+  // two 64-bit vectors are concatenated to a 128-bit vector.
+  EVT VT = N->getValueType(0);
+  if (!VT.is128BitVector() || N->getNumOperands() != 2)
+    llvm_unreachable("unexpected CONCAT_VECTORS");
+  return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+}
+
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+    bool UseCP = true;
+    if (Subtarget->hasThumb2())
+      // Thumb2-aware targets have the MOVT instruction, so all immediates can
+      // be done with MOV + MOVT, at worst.
+      UseCP = 0;
+    else {
+      if (Subtarget->isThumb()) {
+        UseCP = (Val > 255 &&                          // MOV
+                 ~Val > 255 &&                         // MOV + MVN
+                 !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
+      } else
+        UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
+                 ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
+                 !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
+    }
+
+    if (UseCP) {
+      SDValue CPIdx =
+        CurDAG->getTargetConstantPool(ConstantInt::get(
+                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
+                                      TLI.getPointerTy());
+
+      SDNode *ResNode;
+      if (Subtarget->isThumb1Only()) {
+        SDValue Pred = getAL(CurDAG);
+        SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+        SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
+                                         Ops, 4);
+      } else {
+        SDValue Ops[] = {
+          CPIdx,
+          CurDAG->getTargetConstant(0, MVT::i32),
+          getAL(CurDAG),
+          CurDAG->getRegister(0, MVT::i32),
+          CurDAG->getEntryNode()
+        };
+        ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+                                       Ops, 5);
+      }
+      ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+      return NULL;
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::FrameIndex: {
+    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    if (Subtarget->isThumb1Only()) {
+      return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+                                  CurDAG->getTargetConstant(0, MVT::i32));
+    } else {
+      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+                      ARM::t2ADDri : ARM::ADDri);
+      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+    }
+  }
+  case ISD::SRL:
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+      return I;
+    break;
+  case ISD::SRA:
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
+      return I;
+    break;
+  case ISD::MUL:
+    if (Subtarget->isThumb1Only())
+      break;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+      unsigned RHSV = C->getZExtValue();
+      if (!RHSV) break;
+      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
+        unsigned ShImm = Log2_32(RHSV-1);
+        if (ShImm >= 32)
+          break;
+        SDValue V = N->getOperand(0);
+        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        if (Subtarget->isThumb()) {
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+        } else {
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+        }
+      }
+      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
+        unsigned ShImm = Log2_32(RHSV+1);
+        if (ShImm >= 32)
+          break;
+        SDValue V = N->getOperand(0);
+        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        if (Subtarget->isThumb()) {
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+        } else {
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+        }
+      }
+    }
+    break;
+  case ISD::AND: {
+    // Check for unsigned bitfield extract
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+      return I;
+
+    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
+    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
+    // are entirely contributed by c2 and lower 16-bits are entirely contributed
+    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
+    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
+    EVT VT = N->getValueType(0);
+    if (VT != MVT::i32)
+      break;
+    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
+      ? ARM::t2MOVTi16
+      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
+    if (!Opc)
+      break;
+    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    if (!N1C)
+      break;
+    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
+      SDValue N2 = N0.getOperand(1);
+      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+      if (!N2C)
+        break;
+      unsigned N1CVal = N1C->getZExtValue();
+      unsigned N2CVal = N2C->getZExtValue();
+      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
+          (N1CVal & 0xffffU) == 0xffffU &&
+          (N2CVal & 0xffffU) == 0x0U) {
+        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
+                                                  MVT::i32);
+        SDValue Ops[] = { N0.getOperand(0), Imm16,
+                          getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+        return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+      }
+    }
+    break;
+  }
+  case ARMISD::VMOVRRD:
+    return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+                                  N->getOperand(0), getAL(CurDAG),
+                                  CurDAG->getRegister(0, MVT::i32));
+  case ISD::UMUL_LOHI: {
+    if (Subtarget->isThumb1Only())
+      break;
+    if (Subtarget->isThumb()) {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+    } else {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+                                    ARM::UMULL : ARM::UMULLv5,
+                                    dl, MVT::i32, MVT::i32, Ops, 5);
+    }
+  }
+  case ISD::SMUL_LOHI: {
+    if (Subtarget->isThumb1Only())
+      break;
+    if (Subtarget->isThumb()) {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+    } else {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+                                    ARM::SMULL : ARM::SMULLv5,
+                                    dl, MVT::i32, MVT::i32, Ops, 5);
+    }
+  }
+  case ISD::LOAD: {
+    SDNode *ResNode = 0;
+    if (Subtarget->isThumb() && Subtarget->hasThumb2())
+      ResNode = SelectT2IndexedLoad(N);
+    else
+      ResNode = SelectARMIndexedLoad(N);
+    if (ResNode)
+      return ResNode;
+    // Other cases are autogenerated.
+    break;
+  }
+  case ARMISD::BRCOND: {
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    unsigned Opc = Subtarget->isThumb() ?
+      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
+    SDValue Chain = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    SDValue InFlag = N->getOperand(4);
+    assert(N1.getOpcode() == ISD::BasicBlock);
+    assert(N2.getOpcode() == ISD::Constant);
+    assert(N3.getOpcode() == ISD::Register);
+
+    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               MVT::i32);
+    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+                                             MVT::Glue, Ops, 5);
+    Chain = SDValue(ResNode, 0);
+    if (N->getNumValues() == 2) {
+      InFlag = SDValue(ResNode, 1);
+      ReplaceUses(SDValue(N, 1), InFlag);
+    }
+    ReplaceUses(SDValue(N, 0),
+                SDValue(Chain.getNode(), Chain.getResNo()));
+    return NULL;
+  }
+  case ARMISD::CMOV:
+    return SelectCMOVOp(N);
+  case ARMISD::VZIP: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
+    case MVT::v4i16: Opc = ARM::VZIPd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VZIPd32; break;
+    case MVT::v16i8: Opc = ARM::VZIPq8; break;
+    case MVT::v8i16: Opc = ARM::VZIPq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VZIPq32; break;
+    }
+    SDValue Pred = getAL(CurDAG);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+  }
+  case ARMISD::VUZP: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
+    case MVT::v4i16: Opc = ARM::VUZPd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VUZPd32; break;
+    case MVT::v16i8: Opc = ARM::VUZPq8; break;
+    case MVT::v8i16: Opc = ARM::VUZPq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VUZPq32; break;
+    }
+    SDValue Pred = getAL(CurDAG);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+  }
+  case ARMISD::VTRN: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
+    case MVT::v4i16: Opc = ARM::VTRNd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VTRNd32; break;
+    case MVT::v16i8: Opc = ARM::VTRNq8; break;
+    case MVT::v8i16: Opc = ARM::VTRNq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VTRNq32; break;
+    }
+    SDValue Pred = getAL(CurDAG);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+  }
+  case ARMISD::BUILD_VECTOR: {
+    EVT VecVT = N->getValueType(0);
+    EVT EltVT = VecVT.getVectorElementType();
+    unsigned NumElts = VecVT.getVectorNumElements();
+    if (EltVT == MVT::f64) {
+      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+      return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+    }
+    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
+    if (NumElts == 2)
+      return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+    return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+                     N->getOperand(2), N->getOperand(3));
+  }
+
+  case ARMISD::VLD2DUP: {
+    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
+                           ARM::VLD2DUPd32Pseudo };
+    return SelectVLDDup(N, false, 2, Opcodes);
+  }
+
+  case ARMISD::VLD3DUP: {
+    unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+                           ARM::VLD3DUPd32Pseudo };
+    return SelectVLDDup(N, false, 3, Opcodes);
+  }
+
+  case ARMISD::VLD4DUP: {
+    unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
+                           ARM::VLD4DUPd32Pseudo };
+    return SelectVLDDup(N, false, 4, Opcodes);
+  }
+
+  case ARMISD::VLD2DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+                           ARM::VLD2DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 2, Opcodes);
+  }
+
+  case ARMISD::VLD3DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+                           ARM::VLD3DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 3, Opcodes);
+  }
+
+  case ARMISD::VLD4DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+                           ARM::VLD4DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 4, Opcodes);
+  }
+
+  case ARMISD::VLD1_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+                            ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+    unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+                            ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+    return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VLD2_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+                            ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+                            ARM::VLD2q32Pseudo_UPD };
+    return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VLD3_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+                            ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+                             ARM::VLD3q16Pseudo_UPD,
+                             ARM::VLD3q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+                             ARM::VLD3q16oddPseudo_UPD,
+                             ARM::VLD3q32oddPseudo_UPD };
+    return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VLD4_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+                            ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+                             ARM::VLD4q16Pseudo_UPD,
+                             ARM::VLD4q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+                             ARM::VLD4q16oddPseudo_UPD,
+                             ARM::VLD4q32oddPseudo_UPD };
+    return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VLD2LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+                            ARM::VLD2LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+                            ARM::VLD2LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VLD3LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+                            ARM::VLD3LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+                            ARM::VLD3LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VLD4LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+                            ARM::VLD4LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+                            ARM::VLD4LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST1_UPD: {
+    unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+                            ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+    unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+                            ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+    return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VST2_UPD: {
+    unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+                            ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+                            ARM::VST2q32Pseudo_UPD };
+    return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VST3_UPD: {
+    unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+                            ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+                             ARM::VST3q16Pseudo_UPD,
+                             ARM::VST3q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+                             ARM::VST3q16oddPseudo_UPD,
+                             ARM::VST3q32oddPseudo_UPD };
+    return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VST4_UPD: {
+    unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+                            ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+                             ARM::VST4q16Pseudo_UPD,
+                             ARM::VST4q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+                             ARM::VST4q16oddPseudo_UPD,
+                             ARM::VST4q32oddPseudo_UPD };
+    return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VST2LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+                            ARM::VST2LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+                            ARM::VST2LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST3LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+                            ARM::VST3LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+                            ARM::VST3LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST4LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+                            ARM::VST4LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+                            ARM::VST4LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+  }
+
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    switch (IntNo) {
+    default:
+      break;
+
+    case Intrinsic::arm_ldrexd: {
+      SDValue MemAddr = N->getOperand(2);
+      DebugLoc dl = N->getDebugLoc();
+      SDValue Chain = N->getOperand(0);
+
+      unsigned NewOpc = ARM::LDREXD;
+      if (Subtarget->isThumb() && Subtarget->hasThumb2())
+        NewOpc = ARM::t2LDREXD;
+
+      // arm_ldrexd returns a i64 value in {i32, i32}
+      std::vector<EVT> ResTys;
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::Other);
+
+      // place arguments in the right order
+      SmallVector<SDValue, 7> Ops;
+      Ops.push_back(MemAddr);
+      Ops.push_back(getAL(CurDAG));
+      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+      Ops.push_back(Chain);
+      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+                                          Ops.size());
+      // Transfer memoperands.
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+
+      // Until there's support for specifing explicit register constraints
+      // like the use of even/odd register pair, hardcode ldrexd to always
+      // use the pair [R0, R1] to hold the load result.
+      Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
+                                   SDValue(Ld, 0), SDValue(0,0));
+      Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
+                                   SDValue(Ld, 1), Chain.getValue(1));
+
+      // Remap uses.
+      SDValue Glue = Chain.getValue(1);
+      if (!SDValue(N, 0).use_empty()) {
+        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                ARM::R0, MVT::i32, Glue);
+        Glue = Result.getValue(2);
+        ReplaceUses(SDValue(N, 0), Result);
+      }
+      if (!SDValue(N, 1).use_empty()) {
+        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                ARM::R1, MVT::i32, Glue);
+        Glue = Result.getValue(2);
+        ReplaceUses(SDValue(N, 1), Result);
+      }
+
+      ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
+      return NULL;
+    }
+
+    case Intrinsic::arm_strexd: {
+      DebugLoc dl = N->getDebugLoc();
+      SDValue Chain = N->getOperand(0);
+      SDValue Val0 = N->getOperand(2);
+      SDValue Val1 = N->getOperand(3);
+      SDValue MemAddr = N->getOperand(4);
+
+      // Until there's support for specifing explicit register constraints
+      // like the use of even/odd register pair, hardcode strexd to always
+      // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
+      Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
+                                   SDValue(0, 0));
+      Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
+
+      SDValue Glue = Chain.getValue(1);
+      Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                    ARM::R2, MVT::i32, Glue);
+      Glue = Val0.getValue(1);
+      Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                    ARM::R3, MVT::i32, Glue);
+
+      // Store exclusive double return a i32 value which is the return status
+      // of the issued store.
+      std::vector<EVT> ResTys;
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::Other);
+
+      // place arguments in the right order
+      SmallVector<SDValue, 7> Ops;
+      Ops.push_back(Val0);
+      Ops.push_back(Val1);
+      Ops.push_back(MemAddr);
+      Ops.push_back(getAL(CurDAG));
+      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+      Ops.push_back(Chain);
+
+      unsigned NewOpc = ARM::STREXD;
+      if (Subtarget->isThumb() && Subtarget->hasThumb2())
+        NewOpc = ARM::t2STREXD;
+
+      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
+                                          Ops.size());
+      // Transfer memoperands.
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+      return St;
+    }
+
+    case Intrinsic::arm_neon_vld1: {
+      unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+                              ARM::VLD1d32, ARM::VLD1d64 };
+      unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+                              ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+      return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vld2: {
+      unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+                              ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+                              ARM::VLD2q32Pseudo };
+      return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vld3: {
+      unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+                              ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+      unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+                               ARM::VLD3q16Pseudo_UPD,
+                               ARM::VLD3q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+                               ARM::VLD3q16oddPseudo,
+                               ARM::VLD3q32oddPseudo };
+      return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld4: {
+      unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+                              ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+      unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+                               ARM::VLD4q16Pseudo_UPD,
+                               ARM::VLD4q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+                               ARM::VLD4q16oddPseudo,
+                               ARM::VLD4q32oddPseudo };
+      return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld2lane: {
+      unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+                              ARM::VLD2LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vld3lane: {
+      unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+                              ARM::VLD3LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vld4lane: {
+      unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+                              ARM::VLD4LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vst1: {
+      unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+                              ARM::VST1d32, ARM::VST1d64 };
+      unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+                              ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+      return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vst2: {
+      unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+                              ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+      unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+                              ARM::VST2q32Pseudo };
+      return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vst3: {
+      unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+                              ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+      unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+                               ARM::VST3q16Pseudo_UPD,
+                               ARM::VST3q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+                               ARM::VST3q16oddPseudo,
+                               ARM::VST3q32oddPseudo };
+      return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vst4: {
+      unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+                              ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+      unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+                               ARM::VST4q16Pseudo_UPD,
+                               ARM::VST4q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+                               ARM::VST4q16oddPseudo,
+                               ARM::VST4q32oddPseudo };
+      return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vst2lane: {
+      unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+                              ARM::VST2LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vst3lane: {
+      unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+                              ARM::VST3LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vst4lane: {
+      unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+                              ARM::VST4LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+    }
+    }
+    break;
+  }
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    default:
+      break;
+
+    case Intrinsic::arm_neon_vtbl2:
+      return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+    case Intrinsic::arm_neon_vtbl3:
+      return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+    case Intrinsic::arm_neon_vtbl4:
+      return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+
+    case Intrinsic::arm_neon_vtbx2:
+      return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+    case Intrinsic::arm_neon_vtbx3:
+      return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+    case Intrinsic::arm_neon_vtbx4:
+      return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+    }
+    break;
+  }
+
+  case ARMISD::VTBL1: {
+    DebugLoc dl = N->getDebugLoc();
+    EVT VT = N->getValueType(0);
+    SmallVector<SDValue, 6> Ops;
+
+    Ops.push_back(N->getOperand(0));
+    Ops.push_back(N->getOperand(1));
+    Ops.push_back(getAL(CurDAG));                    // Predicate
+    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+  }
+  case ARMISD::VTBL2: {
+    DebugLoc dl = N->getDebugLoc();
+    EVT VT = N->getValueType(0);
+
+    // Form a REG_SEQUENCE to force register allocation.
+    SDValue V0 = N->getOperand(0);
+    SDValue V1 = N->getOperand(1);
+    SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+
+    SmallVector<SDValue, 6> Ops;
+    Ops.push_back(RegSeq);
+    Ops.push_back(N->getOperand(2));
+    Ops.push_back(getAL(CurDAG));                    // Predicate
+    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+    return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+                                  Ops.data(), Ops.size());
+  }
+
+  case ISD::CONCAT_VECTORS:
+    return SelectConcatVector(N);
+  }
+
+  return SelectCode(N);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+  // Require the address to be in a register.  That is safe for all ARM
+  // variants and it is hard to do anything much smarter without knowing
+  // how the operand is used.
+  OutOps.push_back(Op);
+  return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel) {
+  return new ARMDAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 000000000000..cf8c5baa8e7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,7978 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+  cl::init(false));
+
+cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+  cl::desc("Generate calls via indirect call instructions"),
+  cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+  cl::desc("Enable / disable ARM interworking (for debugging only)"),
+  cl::init(true));
+
+namespace llvm {
+  class ARMCCState : public CCState {
+  public:
+    ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+               const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
+               LLVMContext &C, ParmContext PC)
+        : CCState(CC, isVarArg, MF, TM, locs, C) {
+      assert(((PC == Call) || (PC == Prologue)) &&
+             "ARMCCState users must specify whether their context is call"
+             "or prologue generation.");
+      CallOrPrologue = PC;
+    }
+  };
+}
+
+// The APCS parameter registers.
+static const unsigned GPRArgRegs[] = {
+  ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
+void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
+                                       EVT PromotedBitwiseVT) {
+  if (VT != PromotedLdStVT) {
+    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
+                       PromotedLdStVT.getSimpleVT());
+
+    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
+                       PromotedLdStVT.getSimpleVT());
+  }
+
+  EVT ElemTy = VT.getVectorElementType();
+  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+    setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+  if (ElemTy != MVT::i32) {
+    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
+  }
+  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
+  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+  if (VT.isInteger()) {
+    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+    setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction(VT.getSimpleVT(),
+                          (MVT::SimpleValueType)InnerVT, Expand);
+  }
+  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+
+  // Promote all bit-wise operations.
+  if (VT.isInteger() && VT != PromotedBitwiseVT) {
+    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+  }
+
+  // Neon does not support vector divide/remainder operations.
+  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+  addRegisterClass(VT, ARM::DPRRegisterClass);
+  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+  addRegisterClass(VT, ARM::QPRRegisterClass);
+  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+    return new TargetLoweringObjectFileMachO();
+
+  return new ARMElfTargetObjectFile();
+}
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+    : TargetLowering(TM, createTLOF(TM)) {
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
+  RegInfo = TM.getRegisterInfo();
+  Itins = TM.getInstrItineraryData();
+
+  if (Subtarget->isTargetDarwin()) {
+    // Uses VFP for Thumb libfuncs if available.
+    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+      // Single-precision floating-point arithmetic.
+      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+      // Double-precision floating-point arithmetic.
+      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+      // Single-precision comparisons.
+      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
+      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
+
+      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
+      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
+
+      // Double-precision comparisons.
+      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
+      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
+
+      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
+      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
+
+      // Floating-point to integer conversions.
+      // i64 conversions are done via library routines even when generating VFP
+      // instructions, so use the same ones.
+      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+      // Conversions between floating types.
+      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
+
+      // Integer to floating-point conversions.
+      // i64 conversions are done via library routines even when generating VFP
+      // instructions, so use the same ones.
+      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+      // e.g., __floatunsidf vs. __floatunssidfvfp.
+      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+    }
+  }
+
+  // These libcalls are not available in 32-bit.
+  setLibcallName(RTLIB::SHL_I128, 0);
+  setLibcallName(RTLIB::SRL_I128, 0);
+  setLibcallName(RTLIB::SRA_I128, 0);
+
+  if (Subtarget->isAAPCS_ABI()) {
+    // Double-precision floating-point arithmetic helper functions
+    // RTABI chapter 4.1.2, Table 2
+    setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+    setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+    setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+    setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+    setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+    // Double-precision floating-point comparison helper functions
+    // RTABI chapter 4.1.2, Table 3
+    setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+    setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+    setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+    setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+    setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+    setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+    setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+    setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+    setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+    setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
+    setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
+    setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
+    setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
+    setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+    // Single-precision floating-point arithmetic helper functions
+    // RTABI chapter 4.1.2, Table 4
+    setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+    setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+    setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+    setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+    setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+    // Single-precision floating-point comparison helper functions
+    // RTABI chapter 4.1.2, Table 5
+    setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+    setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+    setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+    setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+    setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+    setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+    setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+    setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+    setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+    setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
+    setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
+    setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
+    setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
+    setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+    // Floating-point to integer conversions.
+    // RTABI chapter 4.1.2, Table 6
+    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+    // Conversions between floating types.
+    // RTABI chapter 4.1.2, Table 7
+    setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+    setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
+    setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+    // Integer to floating-point conversions.
+    // RTABI chapter 4.1.2, Table 8
+    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+    // Long long helper functions
+    // RTABI chapter 4.2, Table 9
+    setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
+    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+    setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+    setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+    setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+    // Integer division functions
+    // RTABI chapter 4.3.1
+    setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
+    setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+    setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+    setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
+    setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+    setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+    setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+
+    // Memory operations
+    // RTABI chapter 4.3.4
+    setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
+    setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
+    setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
+  }
+
+  if (Subtarget->isThumb1Only())
+    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+  else
+    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+    if (!Subtarget->isFPOnlySP())
+      addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  }
+
+  if (Subtarget->hasNEON()) {
+    addDRTypeForNEON(MVT::v2f32);
+    addDRTypeForNEON(MVT::v8i8);
+    addDRTypeForNEON(MVT::v4i16);
+    addDRTypeForNEON(MVT::v2i32);
+    addDRTypeForNEON(MVT::v1i64);
+
+    addQRTypeForNEON(MVT::v4f32);
+    addQRTypeForNEON(MVT::v2f64);
+    addQRTypeForNEON(MVT::v16i8);
+    addQRTypeForNEON(MVT::v8i16);
+    addQRTypeForNEON(MVT::v4i32);
+    addQRTypeForNEON(MVT::v2i64);
+
+    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+    // neither Neon nor VFP support any arithmetic operations on it.
+    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+    setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
+    // Neon does not support some operations on v1i64 and v2i64 types.
+    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+    // Custom handling for some quad-vector types to detect VMULL.
+    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+    // Custom handling for some vector types to avoid expensive expansions
+    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+    // a destination type that is wider than the source.
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+
+    setTargetDAGCombine(ISD::INTRINSIC_VOID);
+    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+    setTargetDAGCombine(ISD::SHL);
+    setTargetDAGCombine(ISD::SRL);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::SIGN_EXTEND);
+    setTargetDAGCombine(ISD::ZERO_EXTEND);
+    setTargetDAGCombine(ISD::ANY_EXTEND);
+    setTargetDAGCombine(ISD::SELECT_CC);
+    setTargetDAGCombine(ISD::BUILD_VECTOR);
+    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+    setTargetDAGCombine(ISD::STORE);
+    setTargetDAGCombine(ISD::FP_TO_SINT);
+    setTargetDAGCombine(ISD::FP_TO_UINT);
+    setTargetDAGCombine(ISD::FDIV);
+  }
+
+  computeRegisterProperties();
+
+  // ARM does not have f32 extending load.
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+  // ARM does not have i1 sign extending load.
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  // ARM supports all 4 flavors of integer indexed load / store.
+  if (!Subtarget->isThumb1Only()) {
+    for (unsigned im = (unsigned)ISD::PRE_INC;
+         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+      setIndexedLoadAction(im,  MVT::i1,  Legal);
+      setIndexedLoadAction(im,  MVT::i8,  Legal);
+      setIndexedLoadAction(im,  MVT::i16, Legal);
+      setIndexedLoadAction(im,  MVT::i32, Legal);
+      setIndexedStoreAction(im, MVT::i1,  Legal);
+      setIndexedStoreAction(im, MVT::i8,  Legal);
+      setIndexedStoreAction(im, MVT::i16, Legal);
+      setIndexedStoreAction(im, MVT::i32, Legal);
+    }
+  }
+
+  // i64 operation support.
+  setOperationAction(ISD::MUL,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,   MVT::i32, Expand);
+  if (Subtarget->isThumb1Only()) {
+    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  }
+  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
+      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+    setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL,       MVT::i64, Custom);
+  setOperationAction(ISD::SRA,       MVT::i64, Custom);
+
+  // ARM does not have ROTL.
+  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+  // Only ARMv6 has BSWAP.
+  if (!Subtarget->hasV6Ops())
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  // These are expanded into libcalls.
+  if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
+    // v7M has a hardware divider
+    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
+    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+  }
+  setOperationAction(ISD::SREM,  MVT::i32, Expand);
+  setOperationAction(ISD::UREM,  MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
+  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
+  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
+  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
+  setOperationAction(ISD::EXCEPTIONADDR,      MVT::i32,   Expand);
+  setExceptionPointerRegister(ARM::R0);
+  setExceptionSelectorRegister(ARM::R1);
+
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+  // the default expansion.
+  if (Subtarget->hasDataBarrier() ||
+      (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
+    // membarrier needs custom lowering; the rest are legal and handled
+    // normally.
+    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+  } else {
+    // Set them all for expansion, which will force libcalls.
+    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+    // Since the libcalls include locking, fold in the fences
+    setShouldFoldAtomicFences(true);
+  }
+  // 64-bit versions are always libcalls (for now)
+  setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+
+  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
+
+  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+  if (!Subtarget->hasV6Ops()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
+  }
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+    // iff target supports vfp2.
+    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+  }
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  if (Subtarget->isTargetDarwin()) {
+    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+    setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
+    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+  }
+
+  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
+  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
+  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
+  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
+  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
+  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
+  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
+  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
+  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
+
+  // We don't support sin/cos/fmod/copysign/pow
+  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
+  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
+  setOperationAction(ISD::FREM,      MVT::f64, Expand);
+  setOperationAction(ISD::FREM,      MVT::f32, Expand);
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+  }
+  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
+  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
+
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+  // Various VFP goodness
+  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+    if (Subtarget->hasVFP2()) {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+    }
+    // Special handling for half-precision FP.
+    if (!Subtarget->hasFP16()) {
+      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+    }
+  }
+
+  // We have target-specific dag combine patterns for the following nodes:
+  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::SUB);
+  setTargetDAGCombine(ISD::MUL);
+
+  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
+    setTargetDAGCombine(ISD::OR);
+  if (Subtarget->hasNEON())
+    setTargetDAGCombine(ISD::AND);
+
+  setStackPointerRegisterToSaveRestore(ARM::SP);
+
+  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+    setSchedulingPreference(Sched::RegPressure);
+  else
+    setSchedulingPreference(Sched::Hybrid);
+
+  //// temporary - rewrite interface to use type
+  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+
+  // On ARM arguments smaller than 4 bytes are extended, so all arguments
+  // are at least 4 bytes aligned.
+  setMinStackArgumentAlignment(4);
+
+  benefitFromCodePlacementOpt = true;
+
+  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+}
+
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurrence of cross class copies and subregister insertions
+// and extractions.
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+  const TargetRegisterClass *RRC = 0;
+  uint8_t Cost = 1;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return TargetLowering::findRepresentativeClass(VT);
+  // Use DPR as representative register class for all floating point
+  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+  // the cost is 1 for both f32 and f64.
+  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+    RRC = ARM::DPRRegisterClass;
+    // When NEON is used for SP, only half of the register file is available
+    // because operations that define both SP and DP results will be constrained
+    // to the VFP2 class (D0-D15). We currently model this constraint prior to
+    // coalescing by double-counting the SP regs. See the FIXME above.
+    if (Subtarget->useNEONForSinglePrecisionFP())
+      Cost = 2;
+    break;
+  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+  case MVT::v4f32: case MVT::v2f64:
+    RRC = ARM::DPRRegisterClass;
+    Cost = 2;
+    break;
+  case MVT::v4i64:
+    RRC = ARM::DPRRegisterClass;
+    Cost = 4;
+    break;
+  case MVT::v8i64:
+    RRC = ARM::DPRRegisterClass;
+    Cost = 8;
+    break;
+  }
+  return std::make_pair(RRC, Cost);
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
+  case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
+  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
+  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
+  case ARMISD::CALL:          return "ARMISD::CALL";
+  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
+  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
+  case ARMISD::tCALL:         return "ARMISD::tCALL";
+  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
+  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
+  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
+  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
+  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
+  case ARMISD::CMP:           return "ARMISD::CMP";
+  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
+  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
+  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
+  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
+  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
+  case ARMISD::CMOV:          return "ARMISD::CMOV";
+
+  case ARMISD::RBIT:          return "ARMISD::RBIT";
+
+  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
+  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
+  case ARMISD::SITOF:         return "ARMISD::SITOF";
+  case ARMISD::UITOF:         return "ARMISD::UITOF";
+
+  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
+  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
+  case ARMISD::RRX:           return "ARMISD::RRX";
+
+  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
+  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
+
+  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+  case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
+
+  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
+
+  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
+
+  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
+  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
+
+  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
+  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
+  case ARMISD::VCGE:          return "ARMISD::VCGE";
+  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
+  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
+  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
+  case ARMISD::VCGT:          return "ARMISD::VCGT";
+  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
+  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
+  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
+  case ARMISD::VTST:          return "ARMISD::VTST";
+
+  case ARMISD::VSHL:          return "ARMISD::VSHL";
+  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
+  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
+  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
+  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
+  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
+  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
+  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
+  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
+  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
+  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
+  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
+  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
+  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
+  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
+  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
+  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
+  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
+  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
+  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
+  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
+  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
+  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
+  case ARMISD::VDUP:          return "ARMISD::VDUP";
+  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
+  case ARMISD::VEXT:          return "ARMISD::VEXT";
+  case ARMISD::VREV64:        return "ARMISD::VREV64";
+  case ARMISD::VREV32:        return "ARMISD::VREV32";
+  case ARMISD::VREV16:        return "ARMISD::VREV16";
+  case ARMISD::VZIP:          return "ARMISD::VZIP";
+  case ARMISD::VUZP:          return "ARMISD::VUZP";
+  case ARMISD::VTRN:          return "ARMISD::VTRN";
+  case ARMISD::VTBL1:         return "ARMISD::VTBL1";
+  case ARMISD::VTBL2:         return "ARMISD::VTBL2";
+  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
+  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
+  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
+  case ARMISD::FMAX:          return "ARMISD::FMAX";
+  case ARMISD::FMIN:          return "ARMISD::FMIN";
+  case ARMISD::BFI:           return "ARMISD::BFI";
+  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
+  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
+  case ARMISD::VBSL:          return "ARMISD::VBSL";
+  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
+  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
+  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
+  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
+  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
+  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
+  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
+  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
+  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
+  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
+  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
+  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
+  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
+  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
+  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
+  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
+  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
+  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
+  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
+  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
+  }
+}
+
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+  // load / store 4 to 8 consecutive D registers.
+  if (Subtarget->hasNEON()) {
+    if (VT == MVT::v4i64)
+      return ARM::QQPRRegisterClass;
+    else if (VT == MVT::v8i64)
+      return ARM::QQQQPRRegisterClass;
+  }
+  return TargetLowering::getRegClassFor(VT);
+}
+
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+  return ARM::createFastISel(funcInfo);
+}
+
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+  return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+  unsigned NumVals = N->getNumValues();
+  if (!NumVals)
+    return Sched::RegPressure;
+
+  for (unsigned i = 0; i != NumVals; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Glue || VT == MVT::Other)
+      continue;
+    if (VT.isFloatingPoint() || VT.isVector())
+      return Sched::Latency;
+  }
+
+  if (!N->isMachineOpcode())
+    return Sched::RegPressure;
+
+  // Load are scheduled for latency even if there instruction itinerary
+  // is not available.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+
+  if (MCID.getNumDefs() == 0)
+    return Sched::RegPressure;
+  if (!Itins->isEmpty() &&
+      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
+    return Sched::Latency;
+
+  return Sched::RegPressure;
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code!");
+  case ISD::SETNE:  return ARMCC::NE;
+  case ISD::SETEQ:  return ARMCC::EQ;
+  case ISD::SETGT:  return ARMCC::GT;
+  case ISD::SETGE:  return ARMCC::GE;
+  case ISD::SETLT:  return ARMCC::LT;
+  case ISD::SETLE:  return ARMCC::LE;
+  case ISD::SETUGT: return ARMCC::HI;
+  case ISD::SETUGE: return ARMCC::HS;
+  case ISD::SETULT: return ARMCC::LO;
+  case ISD::SETULE: return ARMCC::LS;
+  }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+                        ARMCC::CondCodes &CondCode2) {
+  CondCode2 = ARMCC::AL;
+  switch (CC) {
+  default: llvm_unreachable("Unknown FP condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+  case ISD::SETGT:
+  case ISD::SETOGT: CondCode = ARMCC::GT; break;
+  case ISD::SETGE:
+  case ISD::SETOGE: CondCode = ARMCC::GE; break;
+  case ISD::SETOLT: CondCode = ARMCC::MI; break;
+  case ISD::SETOLE: CondCode = ARMCC::LS; break;
+  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+  case ISD::SETO:   CondCode = ARMCC::VC; break;
+  case ISD::SETUO:  CondCode = ARMCC::VS; break;
+  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+  case ISD::SETUGT: CondCode = ARMCC::HI; break;
+  case ISD::SETUGE: CondCode = ARMCC::PL; break;
+  case ISD::SETLT:
+  case ISD::SETULT: CondCode = ARMCC::LT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: CondCode = ARMCC::LE; break;
+  case ISD::SETNE:
+  case ISD::SETUNE: CondCode = ARMCC::NE; break;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+                                                 bool Return,
+                                                 bool isVarArg) const {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+    if (Subtarget->hasVFP2() && !isVarArg) {
+      if (!Subtarget->isAAPCS_ABI())
+        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+      // For AAPCS ABI targets, just use VFP variant of the calling convention.
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    }
+    // Fallthrough
+  case CallingConv::C: {
+    // Use target triple & subtarget features to do actual dispatch.
+    if (!Subtarget->isAAPCS_ABI())
+      return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+    else if (Subtarget->hasVFP2() &&
+             FloatABIType == FloatABI::Hard && !isVarArg)
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+  }
+  case CallingConv::ARM_AAPCS_VFP:
+    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+  }
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+  CCInfo.AnalyzeCallResult(Ins,
+                           CCAssignFnForNode(CallConv, /* Return*/ true,
+                                             isVarArg));
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+
+    SDValue Val;
+    if (VA.needsCustom()) {
+      // Handle f64 or half of a v2f64.
+      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Lo.getValue(1);
+      InFlag = Lo.getValue(2);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Hi.getValue(1);
+      InFlag = Hi.getValue(2);
+      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+
+      if (VA.getLocVT() == MVT::v2f64) {
+        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+                          DAG.getConstant(0, MVT::i32));
+
+        VA = RVLocs[++i]; // skip ahead to next loc
+        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+        Chain = Lo.getValue(1);
+        InFlag = Lo.getValue(2);
+        VA = RVLocs[++i]; // skip ahead to next loc
+        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+        Chain = Hi.getValue(1);
+        InFlag = Hi.getValue(2);
+        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+                          DAG.getConstant(1, MVT::i32));
+      }
+    } else {
+      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                               InFlag);
+      Chain = Val.getValue(1);
+      InFlag = Val.getValue(2);
+    }
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+      break;
+    }
+
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+                                    SDValue StackPtr, SDValue Arg,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    ISD::ArgFlagsTy Flags) const {
+  unsigned LocMemOffset = VA.getLocMemOffset();
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+  return DAG.getStore(Chain, dl, Arg, PtrOff,
+                      MachinePointerInfo::getStack(LocMemOffset),
+                      false, false, 0);
+}
+
+void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+                                         SDValue Chain, SDValue &Arg,
+                                         RegsToPassVector &RegsToPass,
+                                         CCValAssign &VA, CCValAssign &NextVA,
+                                         SDValue &StackPtr,
+                                         SmallVector<SDValue, 8> &MemOpChains,
+                                         ISD::ArgFlagsTy Flags) const {
+
+  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
+  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+
+  if (NextVA.isRegLoc())
+    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+  else {
+    assert(NextVA.isMemLoc());
+    if (StackPtr.getNode() == 0)
+      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+                                           dl, DAG, NextVA,
+                                           Flags));
+  }
+}
+
+/// LowerCall - Lowering a call into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue
+ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  bool IsSibCall = false;
+  // Temporarily disable tail calls so things don't break.
+  if (!EnableARMTailCalls)
+    isTailCall = false;
+  if (isTailCall) {
+    // Check if it's really possible to do a tail call.
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                                                   Outs, OutVals, Ins, DAG);
+    // We don't support GuaranteedTailCallOpt for ARM, only automatically
+    // detected sibcalls.
+    if (isTailCall) {
+      ++NumTailCalls;
+      IsSibCall = true;
+    }
+  }
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+  CCInfo.AnalyzeCallOperands(Outs,
+                             CCAssignFnForNode(CallConv, /* Return*/ false,
+                                               isVarArg));
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // For tail calls, memory operands are available in our caller's stack.
+  if (IsSibCall)
+    NumBytes = 0;
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  if (!IsSibCall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+  RegsToPassVector RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.  In the case
+  // of tail call optimization, arguments are handled later.
+  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+       i != e;
+       ++i, ++realArgIdx) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[realArgIdx];
+    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+    bool isByVal = Flags.isByVal();
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+    if (VA.needsCustom()) {
+      if (VA.getLocVT() == MVT::v2f64) {
+        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                                  DAG.getConstant(0, MVT::i32));
+        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                                  DAG.getConstant(1, MVT::i32));
+
+        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
+                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+        VA = ArgLocs[++i]; // skip ahead to next loc
+        if (VA.isRegLoc()) {
+          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
+                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+        } else {
+          assert(VA.isMemLoc());
+
+          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+                                                 dl, DAG, VA, Flags));
+        }
+      } else {
+        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+                         StackPtr, MemOpChains, Flags);
+      }
+    } else if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else if (isByVal) {
+      assert(VA.isMemLoc());
+      unsigned offset = 0;
+
+      // True if this byval aggregate will be split between registers
+      // and memory.
+      if (CCInfo.isFirstByValRegValid()) {
+        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+        unsigned int i, j;
+        for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+          SDValue Const = DAG.getConstant(4*i, MVT::i32);
+          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+                                     MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(j, Load));
+        }
+        offset = ARM::R4 - CCInfo.getFirstByValReg();
+        CCInfo.clearFirstByValReg();
+      }
+
+      unsigned LocMemOffset = VA.getLocMemOffset();
+      SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+                                StkPtrOff);
+      SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+      SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+      SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+                                         MVT::i32);
+      MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+                                          Flags.getByValAlign(),
+                                          /*isVolatile=*/false,
+                                          /*AlwaysInline=*/false,
+                                          MachinePointerInfo(0),
+                                          MachinePointerInfo(0)));
+
+    } else if (!IsSibCall) {
+      assert(VA.isMemLoc());
+
+      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                             dl, DAG, VA, Flags));
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  // Tail call byval lowering might overwrite argument registers so in case of
+  // tail call optimization the copies to registers are lowered later.
+  if (!isTailCall)
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+
+  // For tail calls lower the arguments to the 'real' stack slot.
+  if (isTailCall) {
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+
+    // Do not flag preceding copytoreg stuff together with the following stuff.
+    InFlag = SDValue();
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+    InFlag =SDValue();
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  bool isDirect = false;
+  bool isARMFunc = false;
+  bool isLocalARMFunc = false;
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  if (EnableARMLongCalls) {
+    assert (getTargetMachine().getRelocationModel() == Reloc::Static
+            && "long-calls with non-static relocation model!");
+    // Handle a global address or an external symbol. If it's not one of
+    // those, the target's already in a register, so we don't need to do
+    // anything extra.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+      const GlobalValue *GV = G->getGlobal();
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+      const char *Sym = S->getSymbol();
+
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       Sym, ARMPCLabelIndex, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+    }
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    isDirect = true;
+    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
+    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+                   getTargetMachine().getRelocationModel() != Reloc::Static;
+    isARMFunc = !Subtarget->isThumb() || isStub;
+    // ARM call to a local ARM function is predicable.
+    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+    // tBX takes a register source operand.
+    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+                           getPointerTy(), Callee, PICLabel);
+    } else {
+      // On ELF targets for PIC code, direct calls should go through the PLT
+      unsigned OpFlags = 0;
+      if (Subtarget->isTargetELF() &&
+                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
+        OpFlags = ARMII::MO_PLT;
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+    }
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    isDirect = true;
+    bool isStub = Subtarget->isTargetDarwin() &&
+                  getTargetMachine().getRelocationModel() != Reloc::Static;
+    isARMFunc = !Subtarget->isThumb() || isStub;
+    // tBX takes a register source operand.
+    const char *Sym = S->getSymbol();
+    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       Sym, ARMPCLabelIndex, 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+                           getPointerTy(), Callee, PICLabel);
+    } else {
+      unsigned OpFlags = 0;
+      // On ELF targets for PIC code, direct calls should go through the PLT
+      if (Subtarget->isTargetELF() &&
+                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
+        OpFlags = ARMII::MO_PLT;
+      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+    }
+  }
+
+  // FIXME: handle tail calls differently.
+  unsigned CallOpc;
+  if (Subtarget->isThumb()) {
+    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+      CallOpc = ARMISD::CALL_NOLINK;
+    else
+      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+  } else {
+    CallOpc = (isDirect || Subtarget->hasV5TOps())
+      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+      : ARMISD::CALL_NOLINK;
+  }
+
+  std::vector<SDValue> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  if (isTailCall)
+    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+  // Returns a chain and a flag for retval copy to use.
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
+                         dl, DAG, InVals);
+}
+
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack.  Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void
+llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+  assert((State->getCallOrPrologue() == Prologue ||
+          State->getCallOrPrologue() == Call) &&
+         "unhandled ParmContext");
+  if ((!State->isFirstByValRegValid()) &&
+      (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+    State->setFirstByValReg(reg);
+    // At a call site, a byval parameter that is split between
+    // registers and memory needs its size truncated here.  In a
+    // function prologue, such byval parameters are reassembled in
+    // memory, and are not truncated.
+    if (State->getCallOrPrologue() == Call) {
+      unsigned excess = 4 * (ARM::R4 - reg);
+      assert(size >= excess && "expected larger existing stack allocation");
+      size -= excess;
+    }
+  }
+  // Confiscate any remaining parameter registers to preclude their
+  // assignment to subsequent parameters.
+  while (State->AllocateReg(GPRArgRegs, 4))
+    ;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         const ARMInstrInfo *TII) {
+  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  int FI = INT_MAX;
+  if (Arg.getOpcode() == ISD::CopyFromReg) {
+    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(VR))
+      return false;
+    MachineInstr *Def = MRI->getVRegDef(VR);
+    if (!Def)
+      return false;
+    if (!Flags.isByVal()) {
+      if (!TII->isLoadFromStackSlot(Def, FI))
+        return false;
+    } else {
+      return false;
+    }
+  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+    if (Flags.isByVal())
+      // ByVal argument is passed in as a pointer but it's now being
+      // dereferenced. e.g.
+      // define @foo(%struct.X* %A) {
+      //   tail call @bar(%struct.X* byval %A)
+      // }
+      return false;
+    SDValue Ptr = Ld->getBasePtr();
+    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+    if (!FINode)
+      return false;
+    FI = FINode->getIndex();
+  } else
+    return false;
+
+  assert(FI != INT_MAX);
+  if (!MFI->isFixedObjectIndex(FI))
+    return false;
+  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                                     bool isCalleeStructRet,
+                                                     bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                                     SelectionDAG& DAG) const {
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  // Look for obvious safe cases to perform tail call optimization that do not
+  // require ABI changes. This is what gcc calls sibcall.
+
+  // Do not sibcall optimize vararg calls unless the call site is not passing
+  // any arguments.
+  if (isVarArg && !Outs.empty())
+    return false;
+
+  // Also avoid sibcall optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
+  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+  // support in the assembler and linker to be used. This would need to be
+  // fixed to fully support tail calls in Thumb1.
+  //
+  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+  // LR.  This means if we need to reload LR, it takes an extra instructions,
+  // which outweighs the value of the tail call; but here we don't know yet
+  // whether LR is going to be used.  Probably the right approach is to
+  // generate the tail call here and turn it back into CALL/RET in
+  // emitEpilogue if LR is used.
+
+  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+  // but we need to make sure there are enough registers; the only valid
+  // registers are the 4 used for parameters.  We don't currently do this
+  // case.
+  if (Subtarget->isThumb1Only())
+    return false;
+
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+                       getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+                       getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
+  // If the callee takes no arguments then go on to check the results of the
+  // call.
+  if (!Outs.empty()) {
+    // Check if stack adjustment is needed. For now, do not do this if any
+    // argument is passed on the stack.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+                      getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+    CCInfo.AnalyzeCallOperands(Outs,
+                               CCAssignFnForNode(CalleeCC, false, isVarArg));
+    if (CCInfo.getNextStackOffset()) {
+      MachineFunction &MF = DAG.getMachineFunction();
+
+      // Check if the arguments are already laid out in the right way as
+      // the caller's fixed stack objects.
+      MachineFrameInfo *MFI = MF.getFrameInfo();
+      const MachineRegisterInfo *MRI = &MF.getRegInfo();
+      const ARMInstrInfo *TII =
+        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+           i != e;
+           ++i, ++realArgIdx) {
+        CCValAssign &VA = ArgLocs[i];
+        EVT RegVT = VA.getLocVT();
+        SDValue Arg = OutVals[realArgIdx];
+        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+        if (VA.getLocInfo() == CCValAssign::Indirect)
+          return false;
+        if (VA.needsCustom()) {
+          // f64 and vector types are split into multiple registers or
+          // register/stack-slot combinations.  The types will not match
+          // the registers; give up on memory f64 refs until we figure
+          // out what to do about this.
+          if (!VA.isRegLoc())
+            return false;
+          if (!ArgLocs[++i].isRegLoc())
+            return false;
+          if (RegVT == MVT::v2f64) {
+            if (!ArgLocs[++i].isRegLoc())
+              return false;
+            if (!ArgLocs[++i].isRegLoc())
+              return false;
+          }
+        } else if (!VA.isRegLoc()) {
+          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+                                   MFI, MRI, TII))
+            return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to a location.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slots.
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+
+  // Analyze outgoing return values.
+  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+                                               isVarArg));
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0, realRVLocIdx = 0;
+       i != RVLocs.size();
+       ++i, ++realRVLocIdx) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    SDValue Arg = OutVals[realRVLocIdx];
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.needsCustom()) {
+      if (VA.getLocVT() == MVT::v2f64) {
+        // Extract the first half and return it in two registers.
+        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                                   DAG.getConstant(0, MVT::i32));
+        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
+                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+        Flag = Chain.getValue(1);
+        VA = RVLocs[++i]; // skip ahead to next loc
+        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                                 HalfGPRs.getValue(1), Flag);
+        Flag = Chain.getValue(1);
+        VA = RVLocs[++i]; // skip ahead to next loc
+
+        // Extract the 2nd half and fall through to handle it as an f64 value.
+        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                          DAG.getConstant(1, MVT::i32));
+      }
+      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
+      // available.
+      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+      Flag = Chain.getValue(1);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+                               Flag);
+    } else
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+    // Guarantee that all emitted copies are
+    // stuck together, avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  SDValue result;
+  if (Flag.getNode())
+    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  else // Return Void
+    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+  return result;
+}
+
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  unsigned NumCopies = 0;
+  SDNode* Copies[2];
+  SDNode *Use = *N->use_begin();
+  if (Use->getOpcode() == ISD::CopyToReg) {
+    Copies[NumCopies++] = Use;
+  } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+    // f64 returned in a pair of GPRs.
+    for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() != ISD::CopyToReg)
+        return false;
+      Copies[UI.getUse().getResNo()] = *UI;
+      ++NumCopies;
+    }
+  } else if (Use->getOpcode() == ISD::BITCAST) {
+    // f32 returned in a single GPR.
+    if (!Use->hasNUsesOfValue(1, 0))
+      return false;
+    Use = *Use->use_begin();
+    if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+      return false;
+    Copies[NumCopies++] = Use;
+  } else {
+    return false;
+  }
+
+  if (NumCopies != 1 && NumCopies != 2)
+    return false;
+
+  bool HasRet = false;
+  for (unsigned i = 0; i < NumCopies; ++i) {
+    SDNode *Copy = Copies[i];
+    for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() == ISD::CopyToReg) {
+        SDNode *Use = *UI;
+        if (Use == Copies[0] || Use == Copies[1])
+          continue;
+        return false;
+      }
+      if (UI->getOpcode() != ARMISD::RET_FLAG)
+        return false;
+      HasRet = true;
+    }
+  }
+
+  return HasRet;
+}
+
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  if (!EnableARMTailCalls)
+    return false;
+
+  if (!CI->isTailCall())
+    return false;
+
+  return !Subtarget->isThumb1Only();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+  EVT PtrVT = Op.getValueType();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  SDValue Res;
+  if (CP->isMachineConstantPoolEntry())
+    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                    CP->getAlignment());
+  else
+    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                    CP->getAlignment());
+  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+  return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = 0;
+  DebugLoc DL = Op.getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  SDValue CPAddr;
+  if (RelocM == Reloc::Static) {
+    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+  } else {
+    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMPCLabelIndex = AFI->createPICLabelUId();
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
+                                                         ARMCP::CPBlockAddress,
+                                                         PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  }
+  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+                               MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+  if (RelocM == Reloc::Static)
+    return Result;
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc dl = GA->getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+  ARMConstantPoolValue *CPV =
+    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                             ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+  SDValue Chain = Argument.getValue(1);
+
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+  // call __tls_get_addr.
+  ArgListTy Args;
+  ArgListEntry Entry;
+  Entry.Node = Argument;
+  Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+  Args.push_back(Entry);
+  // FIXME: is there useful debug info available here?
+  std::pair<SDValue, SDValue> CallResult =
+    LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
+                false, false, false, false,
+                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+  return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+                                        SelectionDAG &DAG) const {
+  const GlobalValue *GV = GA->getGlobal();
+  DebugLoc dl = GA->getDebugLoc();
+  SDValue Offset;
+  SDValue Chain = DAG.getEntryNode();
+  EVT PtrVT = getPointerTy();
+  // Get the Thread Pointer
+  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+  if (GV->isDeclaration()) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    // Initial exec model.
+    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                               ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+    Chain = Offset.getValue(1);
+
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+  } else {
+    // local exec model
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+  }
+
+  // The address of the thread local variable is the add of the thread
+  // pointer with the offset of the variable.
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+  // TODO: implement the "local dynamic" model
+  assert(Subtarget->isTargetELF() &&
+         "TLS not implemented for non-ELF targets");
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+  // otherwise use the "Local Exec" TLS Model
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+    return LowerToTLSGeneralDynamicModel(GA, DAG);
+  else
+    return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  if (RelocM == Reloc::PIC_) {
+    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                                 CPAddr,
+                                 MachinePointerInfo::getConstantPool(),
+                                 false, false, 0);
+    SDValue Chain = Result.getValue(1);
+    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+    if (!UseGOTOFF)
+      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+                           MachinePointerInfo::getGOT(), false, false, 0);
+    return Result;
+  }
+
+  // If we have T2 ops, we can materialize the address directly via movt/movw
+  // pair. This is always cheaper.
+  if (Subtarget->useMovt()) {
+    ++NumMovwMovt;
+    // FIXME: Once remat is capable of dealing with instructions with register
+    // operands, expand this into two nodes.
+    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+  } else {
+    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                       MachinePointerInfo::getConstantPool(),
+                       false, false, 0);
+  }
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // FIXME: Enable this for static codegen when tool issues are fixed.
+  if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+    ++NumMovwMovt;
+    // FIXME: Once remat is capable of dealing with instructions with register
+    // operands, expand this into two nodes.
+    if (RelocM == Reloc::Static)
+      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+    unsigned Wrapper = (RelocM == Reloc::PIC_)
+      ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+    SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+    if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+      Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+                           MachinePointerInfo::getGOT(), false, false, 0);
+    return Result;
+  }
+
+  unsigned ARMPCLabelIndex = 0;
+  SDValue CPAddr;
+  if (RelocM == Reloc::Static) {
+    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+  } else {
+    ARMPCLabelIndex = AFI->createPICLabelUId();
+    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  }
+  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                               MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+  SDValue Chain = Result.getValue(1);
+
+  if (RelocM == Reloc::PIC_) {
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+  }
+
+  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+    Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
+                         false, false, 0);
+
+  return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetELF() &&
+         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       "_GLOBAL_OFFSET_TABLE_",
+                                                       ARMPCLabelIndex, PCAdj);
+  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                               MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+  const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Val = DAG.getConstant(0, MVT::i32);
+  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+                     Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+                                          const ARMSubtarget *Subtarget) const {
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+  switch (IntNo) {
+  default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::arm_thread_pointer: {
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+  }
+  case Intrinsic::eh_sjlj_lsda: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    EVT PtrVT = getPointerTy();
+    DebugLoc dl = Op.getDebugLoc();
+    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+    SDValue CPAddr;
+    unsigned PCAdj = (RelocM != Reloc::PIC_)
+      ? 0 : (Subtarget->isThumb() ? 4 : 8);
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
+                               ARMCP::CPLSDA, PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result =
+      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                  MachinePointerInfo::getConstantPool(),
+                  false, false, 0);
+
+    if (RelocM == Reloc::PIC_) {
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+    }
+    return Result;
+  }
+  case Intrinsic::arm_neon_vmulls:
+  case Intrinsic::arm_neon_vmullu: {
+    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+      ? ARMISD::VMULLs : ARMISD::VMULLu;
+    return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+  }
+}
+
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+                               const ARMSubtarget *Subtarget) {
+  DebugLoc dl = Op.getDebugLoc();
+  if (!Subtarget->hasDataBarrier()) {
+    // Some ARMv6 cpus can support data barriers with an mcr instruction.
+    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+    // here.
+    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+                       DAG.getConstant(0, MVT::i32));
+  }
+
+  SDValue Op5 = Op.getOperand(5);
+  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+  ARM_MB::MemBOpt DMBOpt;
+  if (isDeviceBarrier)
+    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+  else
+    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+                     DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+                             const ARMSubtarget *Subtarget) {
+  // ARM pre v5TE and Thumb1 does not have preload instructions.
+  if (!(Subtarget->isThumb2() ||
+        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+    // Just preserve the chain.
+    return Op.getOperand(0);
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+  if (!isRead &&
+      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+    // ARMv7 with MP extension has PLDW.
+    return Op.getOperand(0);
+
+  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+  if (Subtarget->isThumb()) {
+    // Invert the bits.
+    isRead = ~isRead & 1;
+    isData = ~isData & 1;
+  }
+
+  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+                     DAG.getConstant(isData, MVT::i32));
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  DebugLoc dl = Op.getDebugLoc();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+                                        SDValue &Root, SelectionDAG &DAG,
+                                        DebugLoc dl) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  TargetRegisterClass *RC;
+  if (AFI->isThumb1OnlyFunction())
+    RC = ARM::tGPRRegisterClass;
+  else
+    RC = ARM::GPRRegisterClass;
+
+  // Transform the arguments stored in physical registers into virtual ones.
+  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+  SDValue ArgValue2;
+  if (NextVA.isMemLoc()) {
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+
+    // Create load node to retrieve arguments from the stack.
+    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
+                            MachinePointerInfo::getFixedStack(FI),
+                            false, false, 0);
+  } else {
+    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+  }
+
+  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+void
+ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+                                  unsigned &VARegSize, unsigned &VARegSaveSize)
+  const {
+  unsigned NumGPRs;
+  if (CCInfo.isFirstByValRegValid())
+    NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
+  else {
+    unsigned int firstUnalloced;
+    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
+                                                sizeof(GPRArgRegs) /
+                                                sizeof(GPRArgRegs[0]));
+    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
+  }
+
+  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  VARegSize = NumGPRs * 4;
+  VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usuall
+// byval).  Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                                        DebugLoc dl, SDValue &Chain,
+                                        unsigned ArgOffset) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned firstRegToSaveIndex;
+  if (CCInfo.isFirstByValRegValid())
+    firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
+  else {
+    firstRegToSaveIndex = CCInfo.getFirstUnallocated
+      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+  }
+
+  unsigned VARegSize, VARegSaveSize;
+  computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+  if (VARegSaveSize) {
+    // If this function is vararg, store any remaining integer argument regs
+    // to their spots on the stack so that they may be loaded by deferencing
+    // the result of va_next.
+    AFI->setVarArgsRegSaveSize(VARegSaveSize);
+    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
+                                                     ArgOffset + VARegSaveSize
+                                                     - VARegSize,
+                                                     false));
+    SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+                                    getPointerTy());
+
+    SmallVector<SDValue, 4> MemOps;
+    for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+      TargetRegisterClass *RC;
+      if (AFI->isThumb1OnlyFunction())
+        RC = ARM::tGPRRegisterClass;
+      else
+        RC = ARM::GPRRegisterClass;
+
+      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+      SDValue Store =
+        DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+                     false, false, 0);
+      MemOps.push_back(Store);
+      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+                        DAG.getConstant(4, getPointerTy()));
+    }
+    if (!MemOps.empty())
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOps[0], MemOps.size());
+  } else
+    // This will point to the next argument passed via stack.
+    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+}
+
+SDValue
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                    getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+  CCInfo.AnalyzeFormalArguments(Ins,
+                                CCAssignFnForNode(CallConv, /* Return*/ false,
+                                                  isVarArg));
+
+  SmallVector<SDValue, 16> ArgValues;
+  int lastInsIndex = -1;
+
+  SDValue ArgValue;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored in registers.
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+
+      if (VA.needsCustom()) {
+        // f64 and vector types are split up into multiple registers or
+        // combinations of registers and stack slots.
+        if (VA.getLocVT() == MVT::v2f64) {
+          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+                                                   Chain, DAG, dl);
+          VA = ArgLocs[++i]; // skip ahead to next loc
+          SDValue ArgValue2;
+          if (VA.isMemLoc()) {
+            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+                                    MachinePointerInfo::getFixedStack(FI),
+                                    false, false, 0);
+          } else {
+            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+                                             Chain, DAG, dl);
+          }
+          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+        } else
+          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+
+      } else {
+        TargetRegisterClass *RC;
+
+        if (RegVT == MVT::f32)
+          RC = ARM::SPRRegisterClass;
+        else if (RegVT == MVT::f64)
+          RC = ARM::DPRRegisterClass;
+        else if (RegVT == MVT::v2f64)
+          RC = ARM::QPRRegisterClass;
+        else if (RegVT == MVT::i32)
+          RC = (AFI->isThumb1OnlyFunction() ?
+                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+        else
+          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+        // Transform the arguments in physical registers into virtual ones.
+        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+      }
+
+      // If this is an 8 or 16-bit value, it is really passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      switch (VA.getLocInfo()) {
+      default: llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::BCvt:
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::SExt:
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::ZExt:
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      }
+
+      InVals.push_back(ArgValue);
+
+    } else { // VA.isRegLoc()
+
+      // sanity check
+      assert(VA.isMemLoc());
+      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+      int index = ArgLocs[i].getValNo();
+
+      // Some Ins[] entries become multiple ArgLoc[] entries.
+      // Process them only once.
+      if (index != lastInsIndex)
+        {
+          ISD::ArgFlagsTy Flags = Ins[index].Flags;
+          // FIXME: For now, all byval parameter objects are marked mutable.
+          // This can be changed with more analysis.
+          // In case of tail call optimization mark all arguments mutable.
+          // Since they could be overwritten by lowering of arguments in case of
+          // a tail call.
+          if (Flags.isByVal()) {
+            unsigned VARegSize, VARegSaveSize;
+            computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+            VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
+            unsigned Bytes = Flags.getByValSize() - VARegSize;
+            if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+            int FI = MFI->CreateFixedObject(Bytes,
+                                            VA.getLocMemOffset(), false);
+            InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+          } else {
+            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+                                            VA.getLocMemOffset(), true);
+
+            // Create load nodes to retrieve arguments from the stack.
+            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                         MachinePointerInfo::getFixedStack(FI),
+                                         false, false, 0));
+          }
+          lastInsIndex = index;
+        }
+    }
+  }
+
+  // varargs
+  if (isVarArg)
+    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
+
+  return Chain;
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+    return CFP->getValueAPF().isPosZero();
+  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+    // Maybe this has already been legalized into the constant pool?
+    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+          return CFP->getValueAPF().isPosZero();
+    }
+  }
+  return false;
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                             SDValue &ARMcc, SelectionDAG &DAG,
+                             DebugLoc dl) const {
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+    unsigned C = RHSC->getZExtValue();
+    if (!isLegalICmpImmediate(C)) {
+      // Constant does not fit, try adjusting it by one?
+      switch (CC) {
+      default: break;
+      case ISD::SETLT:
+      case ISD::SETGE:
+        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+          RHS = DAG.getConstant(C-1, MVT::i32);
+        }
+        break;
+      case ISD::SETULT:
+      case ISD::SETUGE:
+        if (C != 0 && isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+          RHS = DAG.getConstant(C-1, MVT::i32);
+        }
+        break;
+      case ISD::SETLE:
+      case ISD::SETGT:
+        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+          RHS = DAG.getConstant(C+1, MVT::i32);
+        }
+        break;
+      case ISD::SETULE:
+      case ISD::SETUGT:
+        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+          RHS = DAG.getConstant(C+1, MVT::i32);
+        }
+        break;
+      }
+    }
+  }
+
+  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+  ARMISD::NodeType CompareType;
+  switch (CondCode) {
+  default:
+    CompareType = ARMISD::CMP;
+    break;
+  case ARMCC::EQ:
+  case ARMCC::NE:
+    // Uses only Z Flag
+    CompareType = ARMISD::CMPZ;
+    break;
+  }
+  ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+                             DebugLoc dl) const {
+  SDValue Cmp;
+  if (!isFloatingPointZero(RHS))
+    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+  else
+    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+  unsigned Opc = Cmp.getOpcode();
+  DebugLoc DL = Cmp.getDebugLoc();
+  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+  Cmp = Cmp.getOperand(0);
+  Opc = Cmp.getOpcode();
+  if (Opc == ARMISD::CMPFP)
+    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+  else {
+    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+  }
+  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond = Op.getOperand(0);
+  SDValue SelectTrue = Op.getOperand(1);
+  SDValue SelectFalse = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Convert:
+  //
+  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+  //
+  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+    const ConstantSDNode *CMOVTrue =
+      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+    const ConstantSDNode *CMOVFalse =
+      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+    if (CMOVTrue && CMOVFalse) {
+      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+      SDValue True;
+      SDValue False;
+      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+        True = SelectTrue;
+        False = SelectFalse;
+      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+        True = SelectFalse;
+        False = SelectTrue;
+      }
+
+      if (True.getNode() && False.getNode()) {
+        EVT VT = Op.getValueType();
+        SDValue ARMcc = Cond.getOperand(2);
+        SDValue CCR = Cond.getOperand(3);
+        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+        assert(True.getValueType() == VT);
+        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+      }
+    }
+  }
+
+  return DAG.getSelectCC(dl, Cond,
+                         DAG.getConstant(0, Cond.getValueType()),
+                         SelectTrue, SelectFalse, ISD::SETNE);
+}
+
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (LHS.getValueType() == MVT::i32) {
+    SDValue ARMcc;
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
+  }
+
+  ARMCC::CondCodes CondCode, CondCode2;
+  FPCCToARMCC(CC, CondCode, CondCode2);
+
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+                               ARMcc, CCR, Cmp);
+  if (CondCode2 != ARMCC::AL) {
+    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
+    // FIXME: Needs another CMP because flag can have but one use.
+    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+                         Result, TrueVal, ARMcc2, CCR, Cmp2);
+  }
+  return Result;
+}
+
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+                           const ARMSubtarget *Subtarget) {
+  SDNode *N = Op.getNode();
+  if (!N->hasOneUse())
+    // Otherwise it requires moving the value from fp to integer registers.
+    return false;
+  if (!N->getNumValues())
+    return false;
+  EVT VT = Op.getValueType();
+  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+    // vmrs are very slow, e.g. cortex-a8.
+    return false;
+
+  if (isFloatingPointZero(Op)) {
+    SeenZero = true;
+    return true;
+  }
+  return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+  if (isFloatingPointZero(Op))
+    return DAG.getConstant(0, MVT::i32);
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+                       Ld->isVolatile(), Ld->isNonTemporal(),
+                       Ld->getAlignment());
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+                           SDValue &RetVal1, SDValue &RetVal2) {
+  if (isFloatingPointZero(Op)) {
+    RetVal1 = DAG.getConstant(0, MVT::i32);
+    RetVal2 = DAG.getConstant(0, MVT::i32);
+    return;
+  }
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+    SDValue Ptr = Ld->getBasePtr();
+    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), Ptr,
+                          Ld->getPointerInfo(),
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          Ld->getAlignment());
+
+    EVT PtrType = Ptr.getValueType();
+    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
+    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), NewPtr,
+                          Ld->getPointerInfo().getWithOffset(4),
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          NewAlign);
+    return;
+  }
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  bool SeenZero = false;
+  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+      canChangeToInt(RHS, SeenZero, Subtarget) &&
+      // If one of the operand is zero, it's safe to ignore the NaN case since
+      // we only care about equality comparisons.
+      (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+    // If unsafe fp math optimization is enabled and there are no other uses of
+    // the CMP operands, and the condition code is EQ or NE, we can optimize it
+    // to an integer comparison.
+    if (CC == ISD::SETOEQ)
+      CC = ISD::SETEQ;
+    else if (CC == ISD::SETUNE)
+      CC = ISD::SETNE;
+
+    SDValue ARMcc;
+    if (LHS.getValueType() == MVT::f32) {
+      LHS = bitcastf32Toi32(LHS, DAG);
+      RHS = bitcastf32Toi32(RHS, DAG);
+      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                         Chain, Dest, ARMcc, CCR, Cmp);
+    }
+
+    SDValue LHS1, LHS2;
+    SDValue RHS1, RHS2;
+    expandf64Toi32(LHS, DAG, LHS1, LHS2);
+    expandf64Toi32(RHS, DAG, RHS1, RHS2);
+    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+    ARMcc = DAG.getConstant(CondCode, MVT::i32);
+    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+  }
+
+  return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (LHS.getValueType() == MVT::i32) {
+    SDValue ARMcc;
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                       Chain, Dest, ARMcc, CCR, Cmp);
+  }
+
+  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+  if (UnsafeFPMath &&
+      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+       CC == ISD::SETNE || CC == ISD::SETUNE)) {
+    SDValue Result = OptimizeVFPBrcond(Op, DAG);
+    if (Result.getNode())
+      return Result;
+  }
+
+  ARMCC::CondCodes CondCode, CondCode2;
+  FPCCToARMCC(CC, CondCode, CondCode2);
+
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+  if (CondCode2 != ARMCC::AL) {
+    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+  }
+  return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT PTy = getPointerTy();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+  if (Subtarget->isThumb2()) {
+    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+    // which does another jump to the destination. This also makes it easier
+    // to translate it to TBB / TBH later.
+    // FIXME: This might not work if the function is extremely large.
+    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+                       Addr, Op.getOperand(2), JTI, UId);
+  }
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+                       MachinePointerInfo::getJumpTable(),
+                       false, false, 0);
+    Chain = Addr.getValue(1);
+    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+  } else {
+    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
+                       MachinePointerInfo::getJumpTable(), false, false, 0);
+    Chain = Addr.getValue(1);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+  }
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::FP_TO_SINT:
+    Opc = ARMISD::FTOSI;
+    break;
+  case ISD::FP_TO_UINT:
+    Opc = ARMISD::FTOUI;
+    break;
+  }
+  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT OperandVT = Op.getOperand(0).getValueType();
+  assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
+  if (VT != MVT::v4f32)
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  unsigned CastOpc;
+  unsigned Opc;
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::SINT_TO_FP:
+    CastOpc = ISD::SIGN_EXTEND;
+    Opc = ISD::SINT_TO_FP;
+    break;
+  case ISD::UINT_TO_FP:
+    CastOpc = ISD::ZERO_EXTEND;
+    Opc = ISD::UINT_TO_FP;
+    break;
+  }
+
+  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+  return DAG.getNode(Opc, dl, VT, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  if (VT.isVector())
+    return LowerVectorINT_TO_FP(Op, DAG);
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::SINT_TO_FP:
+    Opc = ARMISD::SITOF;
+    break;
+  case ISD::UINT_TO_FP:
+    Opc = ARMISD::UITOF;
+    break;
+  }
+
+  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(Opc, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+  // Implement fcopysign with a fabs and a conditional fneg.
+  SDValue Tmp0 = Op.getOperand(0);
+  SDValue Tmp1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT SrcVT = Tmp1.getValueType();
+  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+    Tmp0.getOpcode() == ARMISD::VMOVDRR;
+  bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+  if (UseNEON) {
+    // Use VBSL to copy the sign bit.
+    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+                               DAG.getTargetConstant(EncodedVal, MVT::i32));
+    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+    if (VT == MVT::f64)
+      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+                         DAG.getConstant(32, MVT::i32));
+    else /*if (VT == MVT::f32)*/
+      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+    if (SrcVT == MVT::f32) {
+      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+      if (VT == MVT::f64)
+        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+                           DAG.getConstant(32, MVT::i32));
+    } else if (VT == MVT::f32)
+      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+                         DAG.getConstant(32, MVT::i32));
+    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+                                            MVT::i32);
+    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+    if (VT == MVT::f32) {
+      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+                        DAG.getConstant(0, MVT::i32));
+    } else {
+      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+    }
+
+    return Res;
+  }
+
+  // Bitcast operand 1 to i32.
+  if (SrcVT == MVT::f64)
+    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                       &Tmp1, 1).getValue(1);
+  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+  // Or in the signbit with integer operations.
+  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+  if (VT == MVT::f32) {
+    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+  }
+
+  // f64: Or the high part with signbit and then combine two parts.
+  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                     &Tmp0, 1);
+  SDValue Lo = Tmp0.getValue(0);
+  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  if (Depth) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset = DAG.getConstant(4, MVT::i32);
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Return LR, which contains the return address. Mark it an implicit live-in.
+  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+    ? ARM::R7 : ARM::R11;
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  return FrameAddr;
+}
+
+/// ExpandBITCAST - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+
+  // This function is only supposed to be called for i64 types, either as the
+  // source or destination of the bit convert.
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
+  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+         "ExpandBITCAST called for non-i64 type");
+
+  // Turn i64->f64 into VMOVDRR.
+  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+                             DAG.getConstant(0, MVT::i32));
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+                             DAG.getConstant(1, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, dl, DstVT,
+                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
+  }
+
+  // Turn f64->i64 into VMOVRRD.
+  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+    // Merge the pieces into a single i64 value.
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+  }
+
+  return SDValue();
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction.  However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed.  Regardless, use a canonical VMOV to create the
+/// zero vector.
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+  assert(VT.isVector() && "Expected a vector type");
+  // The canonical modified immediate encoding of a zero vector is....0!
+  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue ARMcc;
+  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, MVT::i32));
+  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+                          ARMcc, DAG, dl);
+  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
+                           CCR, Cmp);
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue ARMcc;
+
+  assert(Op.getOpcode() == ISD::SHL_PARTS);
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, MVT::i32));
+  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+                          ARMcc, DAG, dl);
+  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
+                           CCR, Cmp);
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  // The rounding mode is in bits 23:22 of the FPSCR.
+  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+  // so that the shift + and get folded into a bitfield extract.
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+                              DAG.getConstant(Intrinsic::arm_get_fpscr,
+                                              MVT::i32));
+  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+                                  DAG.getConstant(1U << 22, MVT::i32));
+  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+                              DAG.getConstant(22, MVT::i32));
+  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+                     DAG.getConstant(3, MVT::i32));
+}
+
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+                         const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (!ST->hasV6T2Ops())
+    return SDValue();
+
+  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
+static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
+                          const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (!VT.isVector())
+    return SDValue();
+
+  // Lower vector shifts on NEON to use VSHL.
+  assert(ST->hasNEON() && "unexpected vector shift");
+
+  // Left shifts translate directly to the vshiftu intrinsic.
+  if (N->getOpcode() == ISD::SHL)
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+                       N->getOperand(0), N->getOperand(1));
+
+  assert((N->getOpcode() == ISD::SRA ||
+          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+  // NEON uses the same intrinsics for both left and right shifts.  For
+  // right shifts, the shift amounts are negative, so negate the vector of
+  // shift amounts.
+  EVT ShiftVT = N->getOperand(1).getValueType();
+  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+                                     getZeroVector(ShiftVT, DAG, dl),
+                                     N->getOperand(1));
+  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+                             Intrinsic::arm_neon_vshifts :
+                             Intrinsic::arm_neon_vshiftu);
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(vshiftInt, MVT::i32),
+                     N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+                                const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // We can get here for a node like i32 = ISD::SHL i32, i64
+  if (VT != MVT::i64)
+    return SDValue();
+
+  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+         "Unknown shift to lower!");
+
+  // We only lower SRA, SRL of 1 here, all others use generic lowering.
+  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+    return SDValue();
+
+  // If we are in thumb mode, we don't have RRX.
+  if (ST->isThumb1Only()) return SDValue();
+
+  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                           DAG.getConstant(0, MVT::i32));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                           DAG.getConstant(1, MVT::i32));
+
+  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+  // captures the result into a carry flag.
+  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+
+  // The low part is an ARMISD::RRX operand, which shifts the carry in.
+  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+  // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+  SDValue TmpOp0, TmpOp1;
+  bool Invert = false;
+  bool Swap = false;
+  unsigned Opc = 0;
+
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue CC = Op.getOperand(2);
+  EVT VT = Op.getValueType();
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Illegal FP comparison"); break;
+    case ISD::SETUNE:
+    case ISD::SETNE:  Invert = true; // Fallthrough
+    case ISD::SETOEQ:
+    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
+    case ISD::SETOLT:
+    case ISD::SETLT: Swap = true; // Fallthrough
+    case ISD::SETOGT:
+    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
+    case ISD::SETOLE:
+    case ISD::SETLE:  Swap = true; // Fallthrough
+    case ISD::SETOGE:
+    case ISD::SETGE: Opc = ARMISD::VCGE; break;
+    case ISD::SETUGE: Swap = true; // Fallthrough
+    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+    case ISD::SETUGT: Swap = true; // Fallthrough
+    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+    case ISD::SETUEQ: Invert = true; // Fallthrough
+    case ISD::SETONE:
+      // Expand this to (OLT | OGT).
+      TmpOp0 = Op0;
+      TmpOp1 = Op1;
+      Opc = ISD::OR;
+      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+      break;
+    case ISD::SETUO: Invert = true; // Fallthrough
+    case ISD::SETO:
+      // Expand this to (OLT | OGE).
+      TmpOp0 = Op0;
+      TmpOp1 = Op1;
+      Opc = ISD::OR;
+      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+      break;
+    }
+  } else {
+    // Integer comparisons.
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Illegal integer comparison"); break;
+    case ISD::SETNE:  Invert = true;
+    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
+    case ISD::SETLT:  Swap = true;
+    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
+    case ISD::SETLE:  Swap = true;
+    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
+    case ISD::SETULT: Swap = true;
+    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+    case ISD::SETULE: Swap = true;
+    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+    }
+
+    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
+    if (Opc == ARMISD::VCEQ) {
+
+      SDValue AndOp;
+      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+        AndOp = Op0;
+      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
+        AndOp = Op1;
+
+      // Ignore bitconvert.
+      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
+        AndOp = AndOp.getOperand(0);
+
+      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
+        Opc = ARMISD::VTST;
+        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+        Invert = !Invert;
+      }
+    }
+  }
+
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // If one of the operands is a constant vector zero, attempt to fold the
+  // comparison to a specialized compare-against-zero form.
+  SDValue SingleOp;
+  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+    SingleOp = Op0;
+  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+    if (Opc == ARMISD::VCGE)
+      Opc = ARMISD::VCLEZ;
+    else if (Opc == ARMISD::VCGT)
+      Opc = ARMISD::VCLTZ;
+    SingleOp = Op1;
+  }
+
+  SDValue Result;
+  if (SingleOp.getNode()) {
+    switch (Opc) {
+    case ARMISD::VCEQ:
+      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGE:
+      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLEZ:
+      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGT:
+      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLTZ:
+      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+    default:
+      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+    }
+  } else {
+     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+  }
+
+  if (Invert)
+    Result = DAG.getNOT(dl, Result, VT);
+
+  return Result;
+}
+
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV).  If so, return the encoded value.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+                                 unsigned SplatBitSize, SelectionDAG &DAG,
+                                 EVT &VT, bool is128Bits, NEONModImmType type) {
+  unsigned OpCmode, Imm;
+
+  // SplatBitSize is set to the smallest size that splats the vector, so a
+  // zero vector will always have SplatBitSize == 8.  However, NEON modified
+  // immediate instructions others than VMOV do not support the 8-bit encoding
+  // of a zero vector, and the default encoding of zero is supposed to be the
+  // 32-bit version.
+  if (SplatBits == 0)
+    SplatBitSize = 32;
+
+  switch (SplatBitSize) {
+  case 8:
+    if (type != VMOVModImm)
+      return SDValue();
+    // Any 1-byte value is OK.  Op=0, Cmode=1110.
+    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+    OpCmode = 0xe;
+    Imm = SplatBits;
+    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+    break;
+
+  case 16:
+    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
+    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+    if ((SplatBits & ~0xff) == 0) {
+      // Value = 0x00nn: Op=x, Cmode=100x.
+      OpCmode = 0x8;
+      Imm = SplatBits;
+      break;
+    }
+    if ((SplatBits & ~0xff00) == 0) {
+      // Value = 0xnn00: Op=x, Cmode=101x.
+      OpCmode = 0xa;
+      Imm = SplatBits >> 8;
+      break;
+    }
+    return SDValue();
+
+  case 32:
+    // NEON's 32-bit VMOV supports splat values where:
+    // * only one byte is nonzero, or
+    // * the least significant byte is 0xff and the second byte is nonzero, or
+    // * the least significant 2 bytes are 0xff and the third is nonzero.
+    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+    if ((SplatBits & ~0xff) == 0) {
+      // Value = 0x000000nn: Op=x, Cmode=000x.
+      OpCmode = 0;
+      Imm = SplatBits;
+      break;
+    }
+    if ((SplatBits & ~0xff00) == 0) {
+      // Value = 0x0000nn00: Op=x, Cmode=001x.
+      OpCmode = 0x2;
+      Imm = SplatBits >> 8;
+      break;
+    }
+    if ((SplatBits & ~0xff0000) == 0) {
+      // Value = 0x00nn0000: Op=x, Cmode=010x.
+      OpCmode = 0x4;
+      Imm = SplatBits >> 16;
+      break;
+    }
+    if ((SplatBits & ~0xff000000) == 0) {
+      // Value = 0xnn000000: Op=x, Cmode=011x.
+      OpCmode = 0x6;
+      Imm = SplatBits >> 24;
+      break;
+    }
+
+    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+    if (type == OtherModImm) return SDValue();
+
+    if ((SplatBits & ~0xffff) == 0 &&
+        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+      // Value = 0x0000nnff: Op=x, Cmode=1100.
+      OpCmode = 0xc;
+      Imm = SplatBits >> 8;
+      SplatBits |= 0xff;
+      break;
+    }
+
+    if ((SplatBits & ~0xffffff) == 0 &&
+        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+      // Value = 0x00nnffff: Op=x, Cmode=1101.
+      OpCmode = 0xd;
+      Imm = SplatBits >> 16;
+      SplatBits |= 0xffff;
+      break;
+    }
+
+    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
+    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
+    // VMOV.I32.  A (very) minor optimization would be to replicate the value
+    // and fall through here to test for a valid 64-bit splat.  But, then the
+    // caller would also need to check and handle the change in size.
+    return SDValue();
+
+  case 64: {
+    if (type != VMOVModImm)
+      return SDValue();
+    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+    uint64_t BitMask = 0xff;
+    uint64_t Val = 0;
+    unsigned ImmMask = 1;
+    Imm = 0;
+    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+        Val |= BitMask;
+        Imm |= ImmMask;
+      } else if ((SplatBits & BitMask) != 0) {
+        return SDValue();
+      }
+      BitMask <<= 8;
+      ImmMask <<= 1;
+    }
+    // Op=1, Cmode=1110.
+    OpCmode = 0x1e;
+    SplatBits = Val;
+    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+    break;
+  }
+
+  default:
+    llvm_unreachable("unexpected size for isNEONModifiedImm");
+    return SDValue();
+  }
+
+  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+  return DAG.getTargetConstant(EncodedVal, MVT::i32);
+}
+
+static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+                       bool &ReverseVEXT, unsigned &Imm) {
+  unsigned NumElts = VT.getVectorNumElements();
+  ReverseVEXT = false;
+
+  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
+  if (M[0] < 0)
+    return false;
+
+  Imm = M[0];
+
+  // If this is a VEXT shuffle, the immediate value is the index of the first
+  // element.  The other shuffle indices must be the successive elements after
+  // the first one.
+  unsigned ExpectedElt = Imm;
+  for (unsigned i = 1; i < NumElts; ++i) {
+    // Increment the expected index.  If it wraps around, it may still be
+    // a VEXT but the source vectors must be swapped.
+    ExpectedElt += 1;
+    if (ExpectedElt == NumElts * 2) {
+      ExpectedElt = 0;
+      ReverseVEXT = true;
+    }
+
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if (ExpectedElt != static_cast<unsigned>(M[i]))
+      return false;
+  }
+
+  // Adjust the index value if the source operands will be swapped.
+  if (ReverseVEXT)
+    Imm -= NumElts;
+
+  return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize.  (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned BlockSize) {
+  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+         "Only possible block sizes for VREV are: 16, 32, 64");
+
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned BlockElts = M[0] + 1;
+  // If the first shuffle index is UNDEF, be optimistic.
+  if (M[0] < 0)
+    BlockElts = BlockSize / EltSz;
+
+  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+    return false;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+      return false;
+  }
+
+  return true;
+}
+
+static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+  // range, then 0 is placed into the resulting vector. So pretty much any mask
+  // of 8 elements can work here.
+  return VT == MVT::v8i8 && M.size() == 8;
+}
+
+static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
+      return false;
+  }
+  return true;
+}
+
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
+      return false;
+  }
+  return true;
+}
+
+static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if ((unsigned) M[i] != 2 * i + WhichResult)
+      return false;
+  }
+
+  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned Half = VT.getVectorNumElements() / 2;
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned j = 0; j != 2; ++j) {
+    unsigned Idx = WhichResult;
+    for (unsigned i = 0; i != Half; ++i) {
+      int MIdx = M[i + j * Half];
+      if (MIdx >= 0 && (unsigned) MIdx != Idx)
+        return false;
+      Idx += 2;
+    }
+  }
+
+  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
+      return false;
+    Idx += 1;
+  }
+
+  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
+      return false;
+    Idx += 1;
+  }
+
+  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction).  Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+                                     const ARMSubtarget *ST, DebugLoc dl) {
+  uint64_t Val;
+  if (!isa<ConstantSDNode>(N))
+    return SDValue();
+  Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+  if (ST->isThumb1Only()) {
+    if (Val <= 255 || ~Val <= 255)
+      return DAG.getConstant(Val, MVT::i32);
+  } else {
+    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+      return DAG.getConstant(Val, MVT::i32);
+  }
+  return SDValue();
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+                                             const ARMSubtarget *ST) const {
+  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      // Check if an immediate VMOV works.
+      EVT VmovVT;
+      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(),
+                                      VMOVModImm);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+      }
+
+      // Try an immediate VMVN.
+      uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+                             ((1LL << SplatBitSize) - 1));
+      Val = isNEONModifiedImm(NegatedImm,
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(),
+                                      VMVNModImm);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+      }
+    }
+  }
+
+  // Scan through the operands to see if only one value is used.
+  unsigned NumElts = VT.getVectorNumElements();
+  bool isOnlyLowElement = true;
+  bool usesOnlyOneValue = true;
+  bool isConstant = true;
+  SDValue Value;
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = Op.getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    if (i > 0)
+      isOnlyLowElement = false;
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+      isConstant = false;
+
+    if (!Value.getNode())
+      Value = V;
+    else if (V != Value)
+      usesOnlyOneValue = false;
+  }
+
+  if (!Value.getNode())
+    return DAG.getUNDEF(VT);
+
+  if (isOnlyLowElement)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
+  // i32 and try again.
+  if (usesOnlyOneValue && EltSize <= 32) {
+    if (!isConstant)
+      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+    if (VT.getVectorElementType().isFloatingPoint()) {
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i < NumElts; ++i)
+        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+                                  Op.getOperand(i)));
+      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+      Val = LowerBUILD_VECTOR(Val, DAG, ST);
+      if (Val.getNode())
+        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+    }
+    SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+    if (Val.getNode())
+      return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+  }
+
+  // If all elements are constants and the case above didn't get hit, fall back
+  // to the default expansion, which will generate a load from the constant
+  // pool.
+  if (isConstant)
+    return SDValue();
+
+  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+  if (NumElts >= 4) {
+    SDValue shuffle = ReconstructShuffle(Op, DAG);
+    if (shuffle != SDValue())
+      return shuffle;
+  }
+
+  // Vectors with 32- or 64-bit elements can be built by directly assigning
+  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
+  // will be legalized.
+  if (EltSize >= 32) {
+    // Do the expansion with floating-point types, since that is what the VFP
+    // registers are defined to use, and since i64 is not legal.
+    EVT EltVT = EVT::getFloatingPointVT(EltSize);
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i < NumElts; ++i)
+      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
+    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+  }
+
+  return SDValue();
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SmallVector<SDValue, 2> SourceVecs;
+  SmallVector<unsigned, 2> MinElts;
+  SmallVector<unsigned, 2> MaxElts;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = Op.getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+      // A shuffle can only come from building a vector from various
+      // elements of other vectors.
+      return SDValue();
+    }
+
+    // Record this extraction against the appropriate vector if possible...
+    SDValue SourceVec = V.getOperand(0);
+    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+    bool FoundSource = false;
+    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+      if (SourceVecs[j] == SourceVec) {
+        if (MinElts[j] > EltNo)
+          MinElts[j] = EltNo;
+        if (MaxElts[j] < EltNo)
+          MaxElts[j] = EltNo;
+        FoundSource = true;
+        break;
+      }
+    }
+
+    // Or record a new source if not...
+    if (!FoundSource) {
+      SourceVecs.push_back(SourceVec);
+      MinElts.push_back(EltNo);
+      MaxElts.push_back(EltNo);
+    }
+  }
+
+  // Currently only do something sane when at most two source vectors
+  // involved.
+  if (SourceVecs.size() > 2)
+    return SDValue();
+
+  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+  int VEXTOffsets[2] = {0, 0};
+
+  // This loop extracts the usage patterns of the source vectors
+  // and prepares appropriate SDValues for a shuffle if possible.
+  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+    if (SourceVecs[i].getValueType() == VT) {
+      // No VEXT necessary
+      ShuffleSrcs[i] = SourceVecs[i];
+      VEXTOffsets[i] = 0;
+      continue;
+    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+      // It probably isn't worth padding out a smaller vector just to
+      // break it down again in a shuffle.
+      return SDValue();
+    }
+
+    // Since only 64-bit and 128-bit vectors are legal on ARM and
+    // we've eliminated the other cases...
+    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+           "unexpected vector sizes in ReconstructShuffle");
+
+    if (MaxElts[i] - MinElts[i] >= NumElts) {
+      // Span too large for a VEXT to cope
+      return SDValue();
+    }
+
+    if (MinElts[i] >= NumElts) {
+      // The extraction can just take the second half
+      VEXTOffsets[i] = NumElts;
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                   SourceVecs[i],
+                                   DAG.getIntPtrConstant(NumElts));
+    } else if (MaxElts[i] < NumElts) {
+      // The extraction can just take the first half
+      VEXTOffsets[i] = 0;
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                   SourceVecs[i],
+                                   DAG.getIntPtrConstant(0));
+    } else {
+      // An actual VEXT is needed
+      VEXTOffsets[i] = MinElts[i];
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                     SourceVecs[i],
+                                     DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                     SourceVecs[i],
+                                     DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
+    }
+  }
+
+  SmallVector<int, 8> Mask;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Entry = Op.getOperand(i);
+    if (Entry.getOpcode() == ISD::UNDEF) {
+      Mask.push_back(-1);
+      continue;
+    }
+
+    SDValue ExtractVec = Entry.getOperand(0);
+    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+                                          .getOperand(1))->getSExtValue();
+    if (ExtractVec == SourceVecs[0]) {
+      Mask.push_back(ExtractElt - VEXTOffsets[0]);
+    } else {
+      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+    }
+  }
+
+  // Final check before we try to produce nonsense...
+  if (isShuffleMaskLegal(Mask, VT))
+    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+                                &Mask[0]);
+
+  return SDValue();
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+                                      EVT VT) const {
+  if (VT.getVectorNumElements() == 4 &&
+      (VT.is128BitVector() || VT.is64BitVector())) {
+    unsigned PFIndexes[4];
+    for (unsigned i = 0; i != 4; ++i) {
+      if (M[i] < 0)
+        PFIndexes[i] = 8;
+      else
+        PFIndexes[i] = M[i];
+    }
+
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost = (PFEntry >> 30);
+
+    if (Cost <= 4)
+      return true;
+  }
+
+  bool ReverseVEXT;
+  unsigned Imm, WhichResult;
+
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  return (EltSize >= 32 ||
+          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+          isVREVMask(M, VT, 64) ||
+          isVREVMask(M, VT, 32) ||
+          isVREVMask(M, VT, 16) ||
+          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+          isVTBLMask(M, VT) ||
+          isVTRNMask(M, VT, WhichResult) ||
+          isVUZPMask(M, VT, WhichResult) ||
+          isVZIPMask(M, VT, WhichResult) ||
+          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+          isVZIP_v_undef_Mask(M, VT, WhichResult));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+                                      SDValue RHS, SelectionDAG &DAG,
+                                      DebugLoc dl) {
+  unsigned OpNum = (PFEntry >> 26) & 0x0F;
+  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
+
+  enum {
+    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+    OP_VREV,
+    OP_VDUP0,
+    OP_VDUP1,
+    OP_VDUP2,
+    OP_VDUP3,
+    OP_VEXT1,
+    OP_VEXT2,
+    OP_VEXT3,
+    OP_VUZPL, // VUZP, left result
+    OP_VUZPR, // VUZP, right result
+    OP_VZIPL, // VZIP, left result
+    OP_VZIPR, // VZIP, right result
+    OP_VTRNL, // VTRN, left result
+    OP_VTRNR  // VTRN, right result
+  };
+
+  if (OpNum == OP_COPY) {
+    if (LHSID == (1*9+2)*9+3) return LHS;
+    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+    return RHS;
+  }
+
+  SDValue OpLHS, OpRHS;
+  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+  EVT VT = OpLHS.getValueType();
+
+  switch (OpNum) {
+  default: llvm_unreachable("Unknown shuffle opcode!");
+  case OP_VREV:
+    // VREV divides the vector in half and swaps within the half.
+    if (VT.getVectorElementType() == MVT::i32 ||
+        VT.getVectorElementType() == MVT::f32)
+      return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+    // vrev <4 x i16> -> VREV32
+    if (VT.getVectorElementType() == MVT::i16)
+      return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
+    // vrev <4 x i8> -> VREV16
+    assert(VT.getVectorElementType() == MVT::i8);
+    return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
+  case OP_VDUP0:
+  case OP_VDUP1:
+  case OP_VDUP2:
+  case OP_VDUP3:
+    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+  case OP_VEXT1:
+  case OP_VEXT2:
+  case OP_VEXT3:
+    return DAG.getNode(ARMISD::VEXT, dl, VT,
+                       OpLHS, OpRHS,
+                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+  case OP_VUZPL:
+  case OP_VUZPR:
+    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+  case OP_VZIPL:
+  case OP_VZIPR:
+    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+  case OP_VTRNL:
+  case OP_VTRNR:
+    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+  }
+}
+
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+                                       SmallVectorImpl<int> &ShuffleMask,
+                                       SelectionDAG &DAG) {
+  // Check to see if we can use the VTBL instruction.
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  DebugLoc DL = Op.getDebugLoc();
+
+  SmallVector<SDValue, 8> VTBLMask;
+  for (SmallVectorImpl<int>::iterator
+         I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+    VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+  if (V2.getNode()->getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+                                   &VTBLMask[0], 8));
+
+  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+                                 &VTBLMask[0], 8));
+}
+
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+  SmallVector<int, 8> ShuffleMask;
+
+  // Convert shuffles that are directly supported on NEON to target-specific
+  // DAG nodes, instead of keeping them as shuffles and matching them again
+  // during code selection.  This is more efficient and avoids the possibility
+  // of inconsistencies between legalization and selection.
+  // FIXME: floating-point vectors should be canonicalized to integer vectors
+  // of the same time so that they get CSEd properly.
+  SVN->getMask(ShuffleMask);
+
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  if (EltSize <= 32) {
+    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+      int Lane = SVN->getSplatIndex();
+      // If this is undef splat, generate it via "just" vdup, if possible.
+      if (Lane == -1) Lane = 0;
+
+      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+      }
+      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+                         DAG.getConstant(Lane, MVT::i32));
+    }
+
+    bool ReverseVEXT;
+    unsigned Imm;
+    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+      if (ReverseVEXT)
+        std::swap(V1, V2);
+      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+                         DAG.getConstant(Imm, MVT::i32));
+    }
+
+    if (isVREVMask(ShuffleMask, VT, 64))
+      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+    if (isVREVMask(ShuffleMask, VT, 32))
+      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+    if (isVREVMask(ShuffleMask, VT, 16))
+      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+    // Check for Neon shuffles that modify both input vectors in place.
+    // If both results are used, i.e., if there are two shuffles with the same
+    // source operands and with masks corresponding to both results of one of
+    // these operations, DAG memoization will ensure that a single node is
+    // used for both shuffles.
+    unsigned WhichResult;
+    if (isVTRNMask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                         V1, V2).getValue(WhichResult);
+    if (isVUZPMask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                         V1, V2).getValue(WhichResult);
+    if (isVZIPMask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                         V1, V2).getValue(WhichResult);
+
+    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                         V1, V1).getValue(WhichResult);
+    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                         V1, V1).getValue(WhichResult);
+    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                         V1, V1).getValue(WhichResult);
+  }
+
+  // If the shuffle is not directly supported and it has 4 elements, use
+  // the PerfectShuffle-generated table to synthesize it from other shuffles.
+  unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts == 4) {
+    unsigned PFIndexes[4];
+    for (unsigned i = 0; i != 4; ++i) {
+      if (ShuffleMask[i] < 0)
+        PFIndexes[i] = 8;
+      else
+        PFIndexes[i] = ShuffleMask[i];
+    }
+
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost = (PFEntry >> 30);
+
+    if (Cost <= 4)
+      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+  }
+
+  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
+  if (EltSize >= 32) {
+    // Do the expansion with floating-point types, since that is what the VFP
+    // registers are defined to use, and since i64 is not legal.
+    EVT EltVT = EVT::getFloatingPointVT(EltSize);
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i < NumElts; ++i) {
+      if (ShuffleMask[i] < 0)
+        Ops.push_back(DAG.getUNDEF(EltVT));
+      else
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
+                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+                                                  MVT::i32)));
+    }
+    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+  }
+
+  if (VT == MVT::v8i8) {
+    SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+    if (NewOp.getNode())
+      return NewOp;
+  }
+
+  return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
+  SDValue Lane = Op.getOperand(1);
+  if (!isa<ConstantSDNode>(Lane))
+    return SDValue();
+
+  SDValue Vec = Op.getOperand(0);
+  if (Op.getValueType() == MVT::i32 &&
+      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+  }
+
+  return Op;
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+  // The only time a CONCAT_VECTORS operation can have legal types is when
+  // two 64-bit vectors are concatenated to a 128-bit vector.
+  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+         "unexpected CONCAT_VECTORS");
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Val = DAG.getUNDEF(MVT::v2f64);
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  if (Op0.getOpcode() != ISD::UNDEF)
+    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
+                      DAG.getIntPtrConstant(0));
+  if (Op1.getOpcode() != ISD::UNDEF)
+    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
+                      DAG.getIntPtrConstant(1));
+  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+                                   bool isSigned) {
+  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+  EVT VT = N->getValueType(0);
+  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+    SDNode *BVN = N->getOperand(0).getNode();
+    if (BVN->getValueType(0) != MVT::v4i32 ||
+        BVN->getOpcode() != ISD::BUILD_VECTOR)
+      return false;
+    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    unsigned HiElt = 1 - LoElt;
+    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+      return false;
+    if (isSigned) {
+      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+        return true;
+    } else {
+      if (Hi0->isNullValue() && Hi1->isNullValue())
+        return true;
+    }
+    return false;
+  }
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDNode *Elt = N->getOperand(i).getNode();
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+      unsigned HalfSize = EltSize / 2;
+      if (isSigned) {
+        int64_t SExtVal = C->getSExtValue();
+        if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+          return false;
+      } else {
+        if ((C->getZExtValue() >> HalfSize) != 0)
+          return false;
+      }
+      continue;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+    return true;
+  if (isExtendedBUILD_VECTOR(N, DAG, true))
+    return true;
+  return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+    return true;
+  if (isExtendedBUILD_VECTOR(N, DAG, false))
+    return true;
+  return false;
+}
+
+/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
+/// load, or BUILD_VECTOR with extended elements, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+    return N->getOperand(0);
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+                       LD->isNonTemporal(), LD->getAlignment());
+  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
+  // have been legalized as a BITCAST from v4i32.
+  if (N->getOpcode() == ISD::BITCAST) {
+    SDNode *BVN = N->getOperand(0).getNode();
+    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+                       BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+  }
+  // Construct a new BUILD_VECTOR with elements truncated to half the size.
+  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+  EVT VT = N->getValueType(0);
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT TruncVT = MVT::getIntegerVT(EltSize);
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+    const APInt &CInt = C->getAPIntValue();
+    Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                     MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+}
+
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opcode = N->getOpcode();
+  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+    SDNode *N0 = N->getOperand(0).getNode();
+    SDNode *N1 = N->getOperand(1).getNode();
+    return N0->hasOneUse() && N1->hasOneUse() &&
+      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+  }
+  return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opcode = N->getOpcode();
+  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+    SDNode *N0 = N->getOperand(0).getNode();
+    SDNode *N1 = N->getOperand(1).getNode();
+    return N0->hasOneUse() && N1->hasOneUse() &&
+      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+  }
+  return false;
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+  // Multiplications are only custom-lowered for 128-bit vectors so that
+  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
+  EVT VT = Op.getValueType();
+  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+  SDNode *N0 = Op.getOperand(0).getNode();
+  SDNode *N1 = Op.getOperand(1).getNode();
+  unsigned NewOpc = 0;
+  bool isMLA = false;
+  bool isN0SExt = isSignExtended(N0, DAG);
+  bool isN1SExt = isSignExtended(N1, DAG);
+  if (isN0SExt && isN1SExt)
+    NewOpc = ARMISD::VMULLs;
+  else {
+    bool isN0ZExt = isZeroExtended(N0, DAG);
+    bool isN1ZExt = isZeroExtended(N1, DAG);
+    if (isN0ZExt && isN1ZExt)
+      NewOpc = ARMISD::VMULLu;
+    else if (isN1SExt || isN1ZExt) {
+      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+      if (isN1SExt && isAddSubSExt(N0, DAG)) {
+        NewOpc = ARMISD::VMULLs;
+        isMLA = true;
+      } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+        NewOpc = ARMISD::VMULLu;
+        isMLA = true;
+      } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+        std::swap(N0, N1);
+        NewOpc = ARMISD::VMULLu;
+        isMLA = true;
+      }
+    }
+
+    if (!NewOpc) {
+      if (VT == MVT::v2i64)
+        // Fall through to expand this.  It is not legal.
+        return SDValue();
+      else
+        // Other vector multiplications are legal.
+        return Op;
+    }
+  }
+
+  // Legalize to a VMULL instruction.
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Op0;
+  SDValue Op1 = SkipExtension(N1, DAG);
+  if (!isMLA) {
+    Op0 = SkipExtension(N0, DAG);
+    assert(Op0.getValueType().is64BitVector() &&
+           Op1.getValueType().is64BitVector() &&
+           "unexpected types for extended operands to VMULL");
+    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+  }
+
+  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+  // isel lowering to take advantage of no-stall back to back vmul + vmla.
+  //   vmull q0, d4, d6
+  //   vmlal q0, d5, d6
+  // is faster than
+  //   vaddl q0, d4, d5
+  //   vmovl q1, d6
+  //   vmul  q0, q0, q1
+  SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
+  SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+  EVT Op1VT = Op1.getValueType();
+  return DAG.getNode(N0->getOpcode(), DL, VT,
+                     DAG.getNode(NewOpc, DL, VT,
+                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+                     DAG.getNode(NewOpc, DL, VT,
+                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
+
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+  // Convert to float
+  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+  // Get reciprocal estimate.
+  // float4 recip = vrecpeq_f32(yf);
+  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+  // Because char has a smaller range than uchar, we can actually get away
+  // without any newton steps.  This requires that we use a weird bias
+  // of 0xb000, however (again, this has been exhaustively tested).
+  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+  Y = DAG.getConstant(0xb000, MVT::i32);
+  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+  // Convert back to short.
+  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+  return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+  SDValue N2;
+  // Convert to float.
+  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+  // Use reciprocal estimate and one refinement step.
+  // float4 recip = vrecpeq_f32(yf);
+  // recip *= vrecpsq_f32(yf, recip);
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   N1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  // Because short has a smaller range than ushort, we can actually get away
+  // with only a single newton step.  This requires that we use a weird bias
+  // of 89, however (again, this has been exhaustively tested).
+  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
+  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+  N1 = DAG.getConstant(0x89, MVT::i32);
+  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+  // Convert back to integer and return.
+  // return vmovn_s32(vcvt_s32_f32(result));
+  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+  return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+         "unexpected type for custom-lowering ISD::SDIV");
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2, N3;
+
+  if (VT == MVT::v8i8) {
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(4));
+    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(4));
+    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(0));
+    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(0));
+
+    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+    N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+    return N0;
+  }
+  return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+         "unexpected type for custom-lowering ISD::UDIV");
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2, N3;
+
+  if (VT == MVT::v8i8) {
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(4));
+    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(4));
+    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(0));
+    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(0));
+
+    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+    N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+                     N0);
+    return N0;
+  }
+
+  // v4i16 sdiv ... Convert to float.
+  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+  // Use reciprocal estimate and two refinement steps.
+  // float4 recip = vrecpeq_f32(yf);
+  // recip *= vrecpsq_f32(yf, recip);
+  // recip *= vrecpsq_f32(yf, recip);
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   BN1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   BN1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  // Simply multiplying by the reciprocal estimate can leave us a few ulps
+  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+  // and that it will never cause us to return an answer too large).
+  // float4 result = as_float4(as_int4(xf*recip) + 2);
+  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+  N1 = DAG.getConstant(2, MVT::i32);
+  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+  // Convert back to integer and return.
+  // return vmovn_u32(vcvt_s32_f32(result));
+  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+  return N0;
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Don't know how to custom lower this!");
+  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
+  case ISD::GlobalAddress:
+    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+      LowerGlobalAddressELF(Op, DAG);
+  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::SELECT:        return LowerSELECT(Op, DAG);
+  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
+  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
+  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
+  case ISD::VASTART:       return LowerVASTART(Op, DAG);
+  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
+  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
+  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
+  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
+  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+  case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+                                                               Subtarget);
+  case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
+  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
+  case ISD::SRL_PARTS:
+  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
+  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+  case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
+  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
+  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
+  case ISD::MUL:           return LowerMUL(Op, DAG);
+  case ISD::SDIV:          return LowerSDIV(Op, DAG);
+  case ISD::UDIV:          return LowerUDIV(Op, DAG);
+  }
+  return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const {
+  SDValue Res;
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Don't know how to custom expand this!");
+    break;
+  case ISD::BITCAST:
+    Res = ExpandBITCAST(N, DAG);
+    break;
+  case ISD::SRL:
+  case ISD::SRA:
+    Res = Expand64BitShift(N, DAG, Subtarget);
+    break;
+  }
+  if (Res.getNode())
+    Results.push_back(Res);
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+                                     MachineBasicBlock *BB,
+                                     unsigned Size) const {
+  unsigned dest    = MI->getOperand(0).getReg();
+  unsigned ptr     = MI->getOperand(1).getReg();
+  unsigned oldval  = MI->getOperand(2).getReg();
+  unsigned newval  = MI->getOperand(3).getReg();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  unsigned scratch =
+    MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
+                                       : ARM::GPRRegisterClass);
+
+  if (isThumb2) {
+    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+    MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
+    MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
+  }
+
+  unsigned ldrOpc, strOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    break;
+  }
+
+  MachineFunction *MF = BB->getParent();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It; // insert the new blocks after the current block
+
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loop1MBB
+  BB->addSuccessor(loop1MBB);
+
+  // loop1MBB:
+  //   ldrex dest, [ptr]
+  //   cmp dest, oldval
+  //   bne exitMBB
+  BB = loop1MBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                 .addReg(dest).addReg(oldval));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  BB->addSuccessor(loop2MBB);
+  BB->addSuccessor(exitMBB);
+
+  // loop2MBB:
+  //   strex scratch, newval, [ptr]
+  //   cmp scratch, #0
+  //   bne loop1MBB
+  BB = loop2MBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  BB->addSuccessor(loop1MBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                    unsigned Size, unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  if (isThumb2) {
+    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+  }
+
+  unsigned ldrOpc, strOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    break;
+  }
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  TargetRegisterClass *TRC =
+    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+  unsigned scratch = MRI.createVirtualRegister(TRC);
+  unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldrex dest, ptr
+  //   <binop> scratch2, dest, incr
+  //   strex scratch, scratch2, ptr
+  //   cmp scratch, #0
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+  if (BinOpcode) {
+    // operand order needs to go the other way for NAND
+    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+                     addReg(incr).addReg(dest)).addReg(0);
+    else
+      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+                     addReg(dest).addReg(incr)).addReg(0);
+  }
+
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
+                                          MachineBasicBlock *BB,
+                                          unsigned Size,
+                                          bool signExtend,
+                                          ARMCC::CondCodes Cond) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  unsigned oldval = dest;
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  if (isThumb2) {
+    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+  }
+
+  unsigned ldrOpc, strOpc, extendOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+    extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    extendOpc = 0;
+    break;
+  }
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  TargetRegisterClass *TRC =
+    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+  unsigned scratch = MRI.createVirtualRegister(TRC);
+  unsigned scratch2 = MRI.createVirtualRegister(TRC);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldrex dest, ptr
+  //   (sign extend dest, if required)
+  //   cmp dest, incr
+  //   cmov.cond scratch2, dest, incr
+  //   strex scratch, scratch2, ptr
+  //   cmp scratch, #0
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+
+  // Sign extend the value, if necessary.
+  if (signExtend && extendOpc) {
+    oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
+    AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
+  }
+
+  // Build compare and cmov instructions.
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                 .addReg(oldval).addReg(incr));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
+         .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I != Succ)
+      return *I;
+  llvm_unreachable("Expecting a BB with two successors!");
+}
+
+// FIXME: This opcode table should obviously be expressed in the target
+// description. We probably just need a "machine opcode" value in the pseudo
+// instruction. But the ideal solution maybe to simply remove the "S" version
+// of the opcode altogether.
+struct AddSubFlagsOpcodePair {
+  unsigned PseudoOpc;
+  unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+  {ARM::ADCSri, ARM::ADCri},
+  {ARM::ADCSrr, ARM::ADCrr},
+  {ARM::ADCSrs, ARM::ADCrs},
+  {ARM::SBCSri, ARM::SBCri},
+  {ARM::SBCSrr, ARM::SBCrr},
+  {ARM::SBCSrs, ARM::SBCrs},
+  {ARM::RSBSri, ARM::RSBri},
+  {ARM::RSBSrr, ARM::RSBrr},
+  {ARM::RSBSrs, ARM::RSBrs},
+  {ARM::RSCSri, ARM::RSCri},
+  {ARM::RSCSrs, ARM::RSCrs},
+  {ARM::t2ADCSri, ARM::t2ADCri},
+  {ARM::t2ADCSrr, ARM::t2ADCrr},
+  {ARM::t2ADCSrs, ARM::t2ADCrs},
+  {ARM::t2SBCSri, ARM::t2SBCri},
+  {ARM::t2SBCSrr, ARM::t2SBCrr},
+  {ARM::t2SBCSrs, ARM::t2SBCrs},
+  {ARM::t2RSBSri, ARM::t2RSBri},
+  {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+// Convert and Add or Subtract with Carry and Flags to a generic opcode with
+// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
+//
+// FIXME: Somewhere we should assert that CPSR<def> is in the correct
+// position to be recognized by the target descrition as the 'S' bit.
+bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
+                                             MachineBasicBlock *BB) const {
+  unsigned OldOpc = MI->getOpcode();
+  unsigned NewOpc = 0;
+
+  // This is only called for instructions that need remapping, so iterating over
+  // the tiny opcode table is not costly.
+  static const int NPairs =
+    sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+  for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
+         *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
+    if (OldOpc == Pair->PseudoOpc) {
+      NewOpc = Pair->MachineOpc;
+      break;
+    }
+  }
+  if (!NewOpc)
+    return false;
+
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+  for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+    MIB.addOperand(MI->getOperand(i));
+  AddDefaultPred(MIB);
+  MIB.addReg(ARM::CPSR, RegState::Define); // S bit
+  MI->eraseFromParent();
+  return true;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+  switch (MI->getOpcode()) {
+  default: {
+    if (RemapAddSubWithFlags(MI, BB))
+      return BB;
+
+    MI->dump();
+    llvm_unreachable("Unexpected instr type to insert");
+  }
+  case ARM::ATOMIC_LOAD_ADD_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+  case ARM::ATOMIC_LOAD_ADD_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+  case ARM::ATOMIC_LOAD_ADD_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+  case ARM::ATOMIC_LOAD_AND_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+  case ARM::ATOMIC_LOAD_AND_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+  case ARM::ATOMIC_LOAD_AND_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+  case ARM::ATOMIC_LOAD_OR_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+  case ARM::ATOMIC_LOAD_OR_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+  case ARM::ATOMIC_LOAD_OR_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+  case ARM::ATOMIC_LOAD_XOR_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+  case ARM::ATOMIC_LOAD_XOR_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+  case ARM::ATOMIC_LOAD_XOR_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+  case ARM::ATOMIC_LOAD_NAND_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+  case ARM::ATOMIC_LOAD_NAND_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+  case ARM::ATOMIC_LOAD_NAND_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+  case ARM::ATOMIC_LOAD_SUB_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+  case ARM::ATOMIC_LOAD_SUB_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+  case ARM::ATOMIC_LOAD_SUB_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+  case ARM::ATOMIC_LOAD_MIN_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
+  case ARM::ATOMIC_LOAD_MIN_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
+  case ARM::ATOMIC_LOAD_MIN_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
+
+  case ARM::ATOMIC_LOAD_MAX_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
+  case ARM::ATOMIC_LOAD_MAX_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
+  case ARM::ATOMIC_LOAD_MAX_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
+
+  case ARM::ATOMIC_LOAD_UMIN_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
+  case ARM::ATOMIC_LOAD_UMIN_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
+  case ARM::ATOMIC_LOAD_UMIN_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
+
+  case ARM::ATOMIC_LOAD_UMAX_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
+  case ARM::ATOMIC_LOAD_UMAX_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
+  case ARM::ATOMIC_LOAD_UMAX_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
+
+  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
+  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
+  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+  case ARM::tMOVCCr_pseudo: {
+    // To "insert" a SELECT_CC instruction, we actually have to insert the
+    // diamond control-flow pattern.  The incoming instruction knows the
+    // destination vreg to set, the condition code register to branch on, the
+    // true/false values to select between, and a branch opcode to use.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   cmpTY ccX, r1, r2
+    //   bCC copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *thisMBB  = BB;
+    MachineFunction *F = BB->getParent();
+    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, copy0MBB);
+    F->insert(It, sinkMBB);
+
+    // Transfer the remainder of BB and its successor edges to sinkMBB.
+    sinkMBB->splice(sinkMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+
+    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(ARM::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+
+  case ARM::BCCi64:
+  case ARM::BCCZi64: {
+    // If there is an unconditional branch to the other successor, remove it.
+    BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
+    // Compare both parts that make up the double comparison separately for
+    // equality.
+    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+    unsigned LHS1 = MI->getOperand(1).getReg();
+    unsigned LHS2 = MI->getOperand(2).getReg();
+    if (RHSisZero) {
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                     .addReg(LHS1).addImm(0));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+        .addReg(LHS2).addImm(0)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    } else {
+      unsigned RHS1 = MI->getOperand(3).getReg();
+      unsigned RHS2 = MI->getOperand(4).getReg();
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                     .addReg(LHS1).addReg(RHS1));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+        .addReg(LHS2).addReg(RHS2)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    }
+
+    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+    if (MI->getOperand(0).getImm() == ARMCC::NE)
+      std::swap(destMBB, exitMBB);
+
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+      .addMBB(exitMBB);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+                            TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = N->getValueType(0);
+  unsigned Opc = N->getOpcode();
+  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+  ISD::CondCode CC = ISD::SETCC_INVALID;
+
+  if (isSlctCC) {
+    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+  } else {
+    SDValue CCOp = Slct.getOperand(0);
+    if (CCOp.getOpcode() == ISD::SETCC)
+      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+  }
+
+  bool DoXform = false;
+  bool InvCC = false;
+  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+          "Bad input!");
+
+  if (LHS.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(LHS)->isNullValue()) {
+    DoXform = true;
+  } else if (CC != ISD::SETCC_INVALID &&
+             RHS.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHS)->isNullValue()) {
+    std::swap(LHS, RHS);
+    SDValue Op0 = Slct.getOperand(0);
+    EVT OpVT = isSlctCC ? Op0.getValueType() :
+                          Op0.getOperand(0).getValueType();
+    bool isInt = OpVT.isInteger();
+    CC = ISD::getSetCCInverse(CC, isInt);
+
+    if (!TLI.isCondCodeLegal(CC, OpVT))
+      return SDValue();         // Inverse operator isn't legal.
+
+    DoXform = true;
+    InvCC = true;
+  }
+
+  if (DoXform) {
+    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+    if (isSlctCC)
+      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+                             Slct.getOperand(0), Slct.getOperand(1), CC);
+    SDValue CCOp = Slct.getOperand(0);
+    if (InvCC)
+      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
+    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                       CCOp, OtherOp, Result);
+  }
+  return SDValue();
+}
+
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+
+  // Only perform optimization if after legalize, and if NEON is available. We
+  // also expected both operands to be BUILD_VECTORs.
+  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+      || N0.getOpcode() != ISD::BUILD_VECTOR
+      || N1.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+  EVT VT = N->getValueType(0);
+  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+    return SDValue();
+
+  // Check that the vector operands are of the right form.
+  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+  // operands, where N is the size of the formed vector.
+  // Each EXTRACT_VECTOR should have the same input vector and odd or even
+  // index such that we have a pair wise add pattern.
+
+  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+  SDValue Vec = N0->getOperand(0)->getOperand(0);
+  SDNode *V = Vec.getNode();
+  unsigned nextIndex = 0;
+
+  // For each operands to the ADD which are BUILD_VECTORs,
+  // check to see if each of their operands are an EXTRACT_VECTOR with
+  // the same vector and appropriate index.
+  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+      SDValue ExtVec0 = N0->getOperand(i);
+      SDValue ExtVec1 = N1->getOperand(i);
+
+      // First operand is the vector, verify its the same.
+      if (V != ExtVec0->getOperand(0).getNode() ||
+          V != ExtVec1->getOperand(0).getNode())
+        return SDValue();
+
+      // Second is the constant, verify its correct.
+      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+      // For the constant, we want to see all the even or all the odd.
+      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+          || C1->getZExtValue() != nextIndex+1)
+        return SDValue();
+
+      // Increment index.
+      nextIndex+=2;
+    } else
+      return SDValue();
+  }
+
+  // Create VPADDL node.
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Build operand list.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+                                TLI.getPointerTy()));
+
+  // Input is the vector.
+  Ops.push_back(Vec);
+
+  // Get widened type and narrowed type.
+  MVT widenType;
+  unsigned numElem = VT.getVectorNumElements();
+  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+    default:
+      assert(0 && "Invalid vector element type for padd optimization.");
+  }
+
+  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                            widenType, &Ops[0], Ops.size());
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1.  This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+                                          TargetLowering::DAGCombinerInfo &DCI,
+                                          const ARMSubtarget *Subtarget){
+
+  // Attempt to create vpaddl for this add.
+  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+  if (Result.getNode())
+    return Result;
+
+  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+    if (Result.getNode()) return Result;
+  }
+  return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // First try with the default operand order.
+  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
+  if (Result.getNode())
+    return Result;
+
+  // If that didn't work, try again with the operands commuted.
+  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
+static SDValue PerformSUBCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+    if (Result.getNode()) return Result;
+  }
+
+  return SDValue();
+}
+
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+///   vmul d3, d0, d2
+///   vmla d3, d1, d2
+/// is faster than
+///   vadd d3, d0, d1
+///   vmul d3, d3, d2
+static SDValue PerformVMULCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  if (!Subtarget->hasVMLxForwarding())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  unsigned Opcode = N0.getOpcode();
+  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+      Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+    Opcode = N1.getOpcode();
+    if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+        Opcode != ISD::FADD && Opcode != ISD::FSUB)
+      return SDValue();
+    std::swap(N0, N1);
+  }
+
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+  SDValue N00 = N0->getOperand(0);
+  SDValue N01 = N0->getOperand(1);
+  return DAG.getNode(Opcode, DL, VT,
+                     DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+                     DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
+static SDValue PerformMULCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+
+  if (Subtarget->isThumb1Only())
+    return SDValue();
+
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT.is64BitVector() || VT.is128BitVector())
+    return PerformVMULCombine(N, DCI, Subtarget);
+  if (VT != MVT::i32)
+    return SDValue();
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return SDValue();
+
+  uint64_t MulAmt = C->getZExtValue();
+  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+  ShiftAmt = ShiftAmt & (32 - 1);
+  SDValue V = N->getOperand(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  SDValue Res;
+  MulAmt >>= ShiftAmt;
+  if (isPowerOf2_32(MulAmt - 1)) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    Res = DAG.getNode(ISD::ADD, DL, VT,
+                      V, DAG.getNode(ISD::SHL, DL, VT,
+                                     V, DAG.getConstant(Log2_32(MulAmt-1),
+                                                        MVT::i32)));
+  } else if (isPowerOf2_32(MulAmt + 1)) {
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    Res = DAG.getNode(ISD::SUB, DL, VT,
+                      DAG.getNode(ISD::SHL, DL, VT,
+                                  V, DAG.getConstant(Log2_32(MulAmt+1),
+                                                     MVT::i32)),
+                                                     V);
+  } else
+    return SDValue();
+
+  if (ShiftAmt != 0)
+    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+                      DAG.getConstant(ShiftAmt, MVT::i32));
+
+  // Do not add new nodes to DAG combiner worklist.
+  DCI.CombineTo(N, Res, false);
+  return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+
+  // Attempt to use immediate-form VBIC
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SelectionDAG &DAG = DCI.DAG;
+
+  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN &&
+      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      EVT VbicVT;
+      SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VbicVT, VT.is128BitVector(),
+                                      OtherModImm);
+      if (Val.getNode()) {
+        SDValue Input =
+          DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const ARMSubtarget *Subtarget) {
+  // Attempt to use immediate-form VORR
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SelectionDAG &DAG = DCI.DAG;
+
+  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN && Subtarget->hasNEON() &&
+      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      EVT VorrVT;
+      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VorrVT, VT.is128BitVector(),
+                                      OtherModImm);
+      if (Val.getNode()) {
+        SDValue Input =
+          DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+      }
+    }
+  }
+
+  SDValue N0 = N->getOperand(0);
+  if (N0.getOpcode() != ISD::AND)
+    return SDValue();
+  SDValue N1 = N->getOperand(1);
+
+  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+      DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+    APInt SplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+
+    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+    APInt SplatBits0;
+    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+                                  HasAnyUndefs) && !HasAnyUndefs) {
+      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+      APInt SplatBits1;
+      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+                                    HasAnyUndefs) && !HasAnyUndefs &&
+          SplatBits0 == ~SplatBits1) {
+        // Canonicalize the vector type to make instruction selection simpler.
+        EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+        SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+                                     N0->getOperand(1), N0->getOperand(0),
+                                     N1->getOperand(0));
+        return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+      }
+    }
+  }
+
+  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+  // reasonable.
+
+  // BFI is only available on V6T2+
+  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+    return SDValue();
+
+  DebugLoc DL = N->getDebugLoc();
+  // 1) or (and A, mask), val => ARMbfi A, val, mask
+  //      iff (val & mask) == val
+  //
+  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+  //          && mask == ~mask2
+  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+  //          && ~mask == mask2
+  //  (i.e., copy a bitfield value into another bitfield of the same width)
+
+  if (VT != MVT::i32)
+    return SDValue();
+
+  SDValue N00 = N0.getOperand(0);
+
+  // The value and the mask need to be constants so we can verify this is
+  // actually a bitfield set. If the mask is 0xffff, we can do better
+  // via a movt instruction, so don't use BFI in that case.
+  SDValue MaskOp = N0.getOperand(1);
+  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+  if (!MaskC)
+    return SDValue();
+  unsigned Mask = MaskC->getZExtValue();
+  if (Mask == 0xffff)
+    return SDValue();
+  SDValue Res;
+  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N1C) {
+    unsigned Val = N1C->getZExtValue();
+    if ((Val & ~Mask) != Val)
+      return SDValue();
+
+    if (ARM::isBitFieldInvertedMask(Mask)) {
+      Val >>= CountTrailingZeros_32(~Mask);
+
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+                        DAG.getConstant(Val, MVT::i32),
+                        DAG.getConstant(Mask, MVT::i32));
+
+      // Do not add new nodes to DAG combiner worklist.
+      DCI.CombineTo(N, Res, false);
+      return SDValue();
+    }
+  } else if (N1.getOpcode() == ISD::AND) {
+    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C)
+      return SDValue();
+    unsigned Mask2 = N11C->getZExtValue();
+
+    // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+    // as is to match.
+    if (ARM::isBitFieldInvertedMask(Mask) &&
+        (Mask == ~Mask2)) {
+      // The pack halfword instruction works better for masks that fit it,
+      // so use that when it's available.
+      if (Subtarget->hasT2ExtractPack() &&
+          (Mask == 0xffff || Mask == 0xffff0000))
+        return SDValue();
+      // 2a
+      unsigned amt = CountTrailingZeros_32(Mask2);
+      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+                        DAG.getConstant(amt, MVT::i32));
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
+                        DAG.getConstant(Mask, MVT::i32));
+      // Do not add new nodes to DAG combiner worklist.
+      DCI.CombineTo(N, Res, false);
+      return SDValue();
+    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+               (~Mask == Mask2)) {
+      // The pack halfword instruction works better for masks that fit it,
+      // so use that when it's available.
+      if (Subtarget->hasT2ExtractPack() &&
+          (Mask2 == 0xffff || Mask2 == 0xffff0000))
+        return SDValue();
+      // 2b
+      unsigned lsb = CountTrailingZeros_32(Mask);
+      Res = DAG.getNode(ISD::SRL, DL, VT, N00,
+                        DAG.getConstant(lsb, MVT::i32));
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+                        DAG.getConstant(Mask2, MVT::i32));
+      // Do not add new nodes to DAG combiner worklist.
+      DCI.CombineTo(N, Res, false);
+      return SDValue();
+    }
+  }
+
+  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+      ARM::isBitFieldInvertedMask(~Mask)) {
+    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+    // where lsb(mask) == #shamt and masked bits of B are known zero.
+    SDValue ShAmt = N00.getOperand(1);
+    unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+    unsigned LSB = CountTrailingZeros_32(Mask);
+    if (ShAmtC != LSB)
+      return SDValue();
+
+    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+                      DAG.getConstant(~Mask, MVT::i32));
+
+    // Do not add new nodes to DAG combiner worklist.
+    DCI.CombineTo(N, Res, false);
+  }
+
+  return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
+static SDValue PerformBFICombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue N1 = N->getOperand(1);
+  if (N1.getOpcode() == ISD::AND) {
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C)
+      return SDValue();
+    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+    unsigned LSB = CountTrailingZeros_32(~InvMask);
+    unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+    unsigned Mask = (1 << Width)-1;
+    unsigned Mask2 = N11C->getZExtValue();
+    if ((Mask & (~Mask2)) == 0)
+      return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+                             N->getOperand(0), N1.getOperand(0),
+                             N->getOperand(2));
+  }
+  return SDValue();
+}
+
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  // vmovrrd(vmovdrr x, y) -> x,y
+  SDValue InDouble = N->getOperand(0);
+  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+  // vmovrrd(load f64) -> (load i32), (load i32)
+  SDNode *InNode = InDouble.getNode();
+  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+      InNode->getValueType(0) == MVT::f64 &&
+      InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+      !cast<LoadSDNode>(InNode)->isVolatile()) {
+    // TODO: Should this be done for non-FrameIndex operands?
+    LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+    SelectionDAG &DAG = DCI.DAG;
+    DebugLoc DL = LD->getDebugLoc();
+    SDValue BasePtr = LD->getBasePtr();
+    SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+                                 LD->getPointerInfo(), LD->isVolatile(),
+                                 LD->isNonTemporal(), LD->getAlignment());
+
+    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+                                    DAG.getConstant(4, MVT::i32));
+    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+                                 LD->getPointerInfo(), LD->isVolatile(),
+                                 LD->isNonTemporal(),
+                                 std::min(4U, LD->getAlignment() / 2));
+
+    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+    SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+    DCI.RemoveFromWorklist(LD);
+    DAG.DeleteNode(LD);
+    return Result;
+  }
+
+  return SDValue();
+}
+
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() == ISD::BITCAST)
+    Op0 = Op0.getOperand(0);
+  if (Op1.getOpcode() == ISD::BITCAST)
+    Op1 = Op1.getOperand(0);
+  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+      Op0.getNode() == Op1.getNode() &&
+      Op0.getResNo() == 0 && Op1.getResNo() == 1)
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                       N->getValueType(0), Op0.getOperand(0));
+  return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI) {
+  // Bitcast an i64 store extracted from a vector to f64.
+  // Otherwise, the i64 value will be legalized to a pair of i32 values.
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  SDValue StVal = St->getValue();
+  if (!ISD::isNormalStore(St) || St->isVolatile())
+    return SDValue();
+
+  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+      StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+    SelectionDAG  &DAG = DCI.DAG;
+    DebugLoc DL = St->getDebugLoc();
+    SDValue BasePtr = St->getBasePtr();
+    SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+                                  StVal.getNode()->getOperand(0), BasePtr,
+                                  St->getPointerInfo(), St->isVolatile(),
+                                  St->isNonTemporal(), St->getAlignment());
+
+    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+                                    DAG.getConstant(4, MVT::i32));
+    return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+                        OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+                        St->isNonTemporal(),
+                        std::min(4U, St->getAlignment() / 2));
+  }
+
+  if (StVal.getValueType() != MVT::i64 ||
+      StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = StVal.getDebugLoc();
+  SDValue IntVec = StVal.getOperand(0);
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+                                 IntVec.getValueType().getVectorNumElements());
+  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                               Vec, StVal.getOperand(1));
+  dl = N->getDebugLoc();
+  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+  // Make the DAGCombiner fold the bitcasts.
+  DCI.AddToWorklist(Vec.getNode());
+  DCI.AddToWorklist(ExtElt.getNode());
+  DCI.AddToWorklist(V.getNode());
+  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+                      St->getPointerInfo(), St->isVolatile(),
+                      St->isNonTemporal(), St->getAlignment(),
+                      St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads.  If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+  unsigned NumElts = N->getValueType(0).getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDNode *Elt = N->getOperand(i).getNode();
+    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+      return true;
+  }
+  return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+                                          TargetLowering::DAGCombinerInfo &DCI){
+  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
+  // into a pair of GPRs, which is fine when the value is used as a scalar,
+  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+  SelectionDAG &DAG = DCI.DAG;
+  if (N->getNumOperands() == 2) {
+    SDValue RV = PerformVMOVDRRCombine(N, DAG);
+    if (RV.getNode())
+      return RV;
+  }
+
+  // Load i64 elements as f64 values so that type legalization does not split
+  // them up into i32 values.
+  EVT VT = N->getValueType(0);
+  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+    return SDValue();
+  DebugLoc dl = N->getDebugLoc();
+  SmallVector<SDValue, 8> Ops;
+  unsigned NumElts = VT.getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+    Ops.push_back(V);
+    // Make the DAGCombiner fold the bitcast.
+    DCI.AddToWorklist(V.getNode());
+  }
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+                                       TargetLowering::DAGCombinerInfo &DCI) {
+  // Bitcast an i64 load inserted into a vector to f64.
+  // Otherwise, the i64 value will be legalized to a pair of i32 values.
+  EVT VT = N->getValueType(0);
+  SDNode *Elt = N->getOperand(1).getNode();
+  if (VT.getVectorElementType() != MVT::i64 ||
+      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+                                 VT.getVectorNumElements());
+  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+  // Make the DAGCombiner fold the bitcasts.
+  DCI.AddToWorklist(Vec.getNode());
+  DCI.AddToWorklist(V.getNode());
+  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+                               Vec, V, N->getOperand(2));
+  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+  // The LLVM shufflevector instruction does not require the shuffle mask
+  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
+  // operands do not match the mask length, they are extended by concatenating
+  // them with undef vectors.  That is probably the right thing for other
+  // targets, but for NEON it is better to concatenate two double-register
+  // size vector operands into a single quad-register size vector.  Do that
+  // transformation here:
+  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
+  //   shuffle(concat(v1, v2), undef)
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+      Op0.getNumOperands() != 2 ||
+      Op1.getNumOperands() != 2)
+    return SDValue();
+  SDValue Concat0Op1 = Op0.getOperand(1);
+  SDValue Concat1Op1 = Op1.getOperand(1);
+  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+      Concat1Op1.getOpcode() != ISD::UNDEF)
+    return SDValue();
+  // Skip the transformation if any of the types are illegal.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = N->getValueType(0);
+  if (!TLI.isTypeLegal(VT) ||
+      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+      !TLI.isTypeLegal(Concat1Op1.getValueType()))
+    return SDValue();
+
+  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                                  Op0.getOperand(0), Op1.getOperand(0));
+  // Translate the shuffle mask.
+  SmallVector<int, 16> NewMask;
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfElts = NumElts/2;
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  for (unsigned n = 0; n < NumElts; ++n) {
+    int MaskElt = SVN->getMaskElt(n);
+    int NewElt = -1;
+    if (MaskElt < (int)HalfElts)
+      NewElt = MaskElt;
+    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+      NewElt = HalfElts + MaskElt - NumElts;
+    NewMask.push_back(NewElt);
+  }
+  return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+                              DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+  SDValue Addr = N->getOperand(AddrOpIdx);
+
+  // Search for a use of the address operand that is an increment.
+  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+         UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() != ISD::ADD ||
+        UI.getUse().getResNo() != Addr.getResNo())
+      continue;
+
+    // Check that the add is independent of the load/store.  Otherwise, folding
+    // it would create a cycle.
+    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+      continue;
+
+    // Find the new opcode for the updating load/store.
+    bool isLoad = true;
+    bool isLaneOp = false;
+    unsigned NewOpc = 0;
+    unsigned NumVecs = 0;
+    if (isIntrinsic) {
+      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+      switch (IntNo) {
+      default: assert(0 && "unexpected intrinsic for Neon base update");
+      case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
+        NumVecs = 1; break;
+      case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
+        NumVecs = 2; break;
+      case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
+        NumVecs = 3; break;
+      case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
+        NumVecs = 4; break;
+      case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+        NumVecs = 2; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+        NumVecs = 3; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+        NumVecs = 4; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
+        NumVecs = 1; isLoad = false; break;
+      case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
+        NumVecs = 2; isLoad = false; break;
+      case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
+        NumVecs = 3; isLoad = false; break;
+      case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
+        NumVecs = 4; isLoad = false; break;
+      case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+        NumVecs = 2; isLoad = false; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+        NumVecs = 3; isLoad = false; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+        NumVecs = 4; isLoad = false; isLaneOp = true; break;
+      }
+    } else {
+      isLaneOp = true;
+      switch (N->getOpcode()) {
+      default: assert(0 && "unexpected opcode for Neon base update");
+      case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+      case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+      case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+      }
+    }
+
+    // Find the size of memory referenced by the load/store.
+    EVT VecTy;
+    if (isLoad)
+      VecTy = N->getValueType(0);
+    else
+      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+    if (isLaneOp)
+      NumBytes /= VecTy.getVectorNumElements();
+
+    // If the increment is a constant, it must match the memory ref size.
+    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+      uint64_t IncVal = CInc->getZExtValue();
+      if (IncVal != NumBytes)
+        continue;
+    } else if (NumBytes >= 3 * 16) {
+      // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+      // separate instructions that make it harder to use a non-constant update.
+      continue;
+    }
+
+    // Create the new updating load/store node.
+    EVT Tys[6];
+    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+    unsigned n;
+    for (n = 0; n < NumResultVecs; ++n)
+      Tys[n] = VecTy;
+    Tys[n++] = MVT::i32;
+    Tys[n] = MVT::Other;
+    SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+    SmallVector<SDValue, 8> Ops;
+    Ops.push_back(N->getOperand(0)); // incoming chain
+    Ops.push_back(N->getOperand(AddrOpIdx));
+    Ops.push_back(Inc);
+    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+      Ops.push_back(N->getOperand(i));
+    }
+    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+                                           Ops.data(), Ops.size(),
+                                           MemInt->getMemoryVT(),
+                                           MemInt->getMemOperand());
+
+    // Update the uses.
+    std::vector<SDValue> NewResults;
+    for (unsigned i = 0; i < NumResultVecs; ++i) {
+      NewResults.push_back(SDValue(UpdN.getNode(), i));
+    }
+    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+    DCI.CombineTo(N, NewResults);
+    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+    break;
+  }
+  return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  // vldN-dup instructions only support 64-bit vectors for N > 1.
+  if (!VT.is64BitVector())
+    return false;
+
+  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+  SDNode *VLD = N->getOperand(0).getNode();
+  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return false;
+  unsigned NumVecs = 0;
+  unsigned NewOpc = 0;
+  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+  if (IntNo == Intrinsic::arm_neon_vld2lane) {
+    NumVecs = 2;
+    NewOpc = ARMISD::VLD2DUP;
+  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+    NumVecs = 3;
+    NewOpc = ARMISD::VLD3DUP;
+  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+    NumVecs = 4;
+    NewOpc = ARMISD::VLD4DUP;
+  } else {
+    return false;
+  }
+
+  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+  // numbers match the load.
+  unsigned VLDLaneNo =
+    cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+       UI != UE; ++UI) {
+    // Ignore uses of the chain result.
+    if (UI.getUse().getResNo() == NumVecs)
+      continue;
+    SDNode *User = *UI;
+    if (User->getOpcode() != ARMISD::VDUPLANE ||
+        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+      return false;
+  }
+
+  // Create the vldN-dup node.
+  EVT Tys[5];
+  unsigned n;
+  for (n = 0; n < NumVecs; ++n)
+    Tys[n] = VT;
+  Tys[n] = MVT::Other;
+  SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+                                           Ops, 2, VLDMemInt->getMemoryVT(),
+                                           VLDMemInt->getMemOperand());
+
+  // Update the uses.
+  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+       UI != UE; ++UI) {
+    unsigned ResNo = UI.getUse().getResNo();
+    // Ignore uses of the chain result.
+    if (ResNo == NumVecs)
+      continue;
+    SDNode *User = *UI;
+    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+  }
+
+  // Now the vldN-lane intrinsic is dead except for its chain result.
+  // Update uses of the chain.
+  std::vector<SDValue> VLDDupResults;
+  for (unsigned n = 0; n < NumVecs; ++n)
+    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+  DCI.CombineTo(VLD, VLDDupResults);
+
+  return true;
+}
+
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue Op = N->getOperand(0);
+
+  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+  if (CombineVLDDUP(N, DCI))
+    return SDValue(N, 0);
+
+  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+  // redundant.  Ignore bit_converts for now; element sizes are checked below.
+  while (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+    return SDValue();
+
+  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+  // The canonical VMOV for a zero vector uses a 32-bit element size.
+  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned EltBits;
+  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+    EltSize = 8;
+  EVT VT = N->getValueType(0);
+  if (EltSize > VT.getVectorElementType().getSizeInBits())
+    return SDValue();
+
+  return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+}
+
+// isConstVecPow2 - Return true if each vector element is a power of 2, all
+// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
+static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
+{
+  integerPart cN;
+  integerPart c0 = 0;
+  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
+       I != E; I++) {
+    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
+    if (!C)
+      return false;
+
+    bool isExact;
+    APFloat APF = C->getValueAPF();
+    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
+        != APFloat::opOK || !isExact)
+      return false;
+
+    c0 = (I == 0) ? cN : c0;
+    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
+      return false;
+  }
+  C = c0;
+  return true;
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+///  vmul.f32        d16, d17, d16
+///  vcvt.s32.f32    d16, d16
+/// becomes:
+///  vcvt.s32.f32    d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op = N->getOperand(0);
+
+  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
+      Op.getOpcode() != ISD::FMUL)
+    return SDValue();
+
+  uint64_t C;
+  SDValue N0 = Op->getOperand(0);
+  SDValue ConstVec = Op->getOperand(1);
+  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+      !isConstVecPow2(ConstVec, isSigned, C))
+    return SDValue();
+
+  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+    Intrinsic::arm_neon_vcvtfp2fxu;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                     N->getValueType(0),
+                     DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+                     DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+///  vcvt.f32.s32    d16, d16
+///  vdiv.f32        d16, d17, d16
+/// becomes:
+///  vcvt.f32.s32    d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op = N->getOperand(0);
+  unsigned OpOpcode = Op.getNode()->getOpcode();
+
+  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+    return SDValue();
+
+  uint64_t C;
+  SDValue ConstVec = N->getOperand(1);
+  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+
+  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+      !isConstVecPow2(ConstVec, isSigned, C))
+    return SDValue();
+
+  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+    Intrinsic::arm_neon_vcvtfxu2fp;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                     Op.getValueType(),
+                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
+                     Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+                                      HasAnyUndefs, ElementBits) ||
+      SplatBitSize > ElementBits)
+    return false;
+  Cnt = SplatBits.getSExtValue();
+  return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation.  That value must be in the range:
+///   0 <= Value < ElementBits for a left shift; or
+///   0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  if (! getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation.  For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+///   1 <= |Value| <= ElementBits for a right shift; or
+///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+                         int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  if (! getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  if (isIntrinsic)
+    Cnt = -Cnt;
+  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+}
+
+/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+  switch (IntNo) {
+  default:
+    // Don't do anything for most intrinsics.
+    break;
+
+  // Vector shifts: check for immediate versions and lower them.
+  // Note: This is done during DAG combining instead of DAG legalizing because
+  // the build_vectors for 64-bit vector element shift counts are generally
+  // not legal, and it is hard to see their values after they get legalized to
+  // loads from a constant pool.
+  case Intrinsic::arm_neon_vshifts:
+  case Intrinsic::arm_neon_vshiftu:
+  case Intrinsic::arm_neon_vshiftls:
+  case Intrinsic::arm_neon_vshiftlu:
+  case Intrinsic::arm_neon_vshiftn:
+  case Intrinsic::arm_neon_vrshifts:
+  case Intrinsic::arm_neon_vrshiftu:
+  case Intrinsic::arm_neon_vrshiftn:
+  case Intrinsic::arm_neon_vqshifts:
+  case Intrinsic::arm_neon_vqshiftu:
+  case Intrinsic::arm_neon_vqshiftsu:
+  case Intrinsic::arm_neon_vqshiftns:
+  case Intrinsic::arm_neon_vqshiftnu:
+  case Intrinsic::arm_neon_vqshiftnsu:
+  case Intrinsic::arm_neon_vqrshiftns:
+  case Intrinsic::arm_neon_vqrshiftnu:
+  case Intrinsic::arm_neon_vqrshiftnsu: {
+    EVT VT = N->getOperand(1).getValueType();
+    int64_t Cnt;
+    unsigned VShiftOpc = 0;
+
+    switch (IntNo) {
+    case Intrinsic::arm_neon_vshifts:
+    case Intrinsic::arm_neon_vshiftu:
+      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
+        VShiftOpc = ARMISD::VSHL;
+        break;
+      }
+      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
+        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
+                     ARMISD::VSHRs : ARMISD::VSHRu);
+        break;
+      }
+      return SDValue();
+
+    case Intrinsic::arm_neon_vshiftls:
+    case Intrinsic::arm_neon_vshiftlu:
+      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
+        break;
+      llvm_unreachable("invalid shift count for vshll intrinsic");
+
+    case Intrinsic::arm_neon_vrshifts:
+    case Intrinsic::arm_neon_vrshiftu:
+      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
+        break;
+      return SDValue();
+
+    case Intrinsic::arm_neon_vqshifts:
+    case Intrinsic::arm_neon_vqshiftu:
+      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+        break;
+      return SDValue();
+
+    case Intrinsic::arm_neon_vqshiftsu:
+      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+        break;
+      llvm_unreachable("invalid shift count for vqshlu intrinsic");
+
+    case Intrinsic::arm_neon_vshiftn:
+    case Intrinsic::arm_neon_vrshiftn:
+    case Intrinsic::arm_neon_vqshiftns:
+    case Intrinsic::arm_neon_vqshiftnu:
+    case Intrinsic::arm_neon_vqshiftnsu:
+    case Intrinsic::arm_neon_vqrshiftns:
+    case Intrinsic::arm_neon_vqrshiftnu:
+    case Intrinsic::arm_neon_vqrshiftnsu:
+      // Narrowing shifts require an immediate right shift.
+      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
+        break;
+      llvm_unreachable("invalid shift count for narrowing vector shift "
+                       "intrinsic");
+
+    default:
+      llvm_unreachable("unhandled vector shift");
+    }
+
+    switch (IntNo) {
+    case Intrinsic::arm_neon_vshifts:
+    case Intrinsic::arm_neon_vshiftu:
+      // Opcode already set above.
+      break;
+    case Intrinsic::arm_neon_vshiftls:
+    case Intrinsic::arm_neon_vshiftlu:
+      if (Cnt == VT.getVectorElementType().getSizeInBits())
+        VShiftOpc = ARMISD::VSHLLi;
+      else
+        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
+                     ARMISD::VSHLLs : ARMISD::VSHLLu);
+      break;
+    case Intrinsic::arm_neon_vshiftn:
+      VShiftOpc = ARMISD::VSHRN; break;
+    case Intrinsic::arm_neon_vrshifts:
+      VShiftOpc = ARMISD::VRSHRs; break;
+    case Intrinsic::arm_neon_vrshiftu:
+      VShiftOpc = ARMISD::VRSHRu; break;
+    case Intrinsic::arm_neon_vrshiftn:
+      VShiftOpc = ARMISD::VRSHRN; break;
+    case Intrinsic::arm_neon_vqshifts:
+      VShiftOpc = ARMISD::VQSHLs; break;
+    case Intrinsic::arm_neon_vqshiftu:
+      VShiftOpc = ARMISD::VQSHLu; break;
+    case Intrinsic::arm_neon_vqshiftsu:
+      VShiftOpc = ARMISD::VQSHLsu; break;
+    case Intrinsic::arm_neon_vqshiftns:
+      VShiftOpc = ARMISD::VQSHRNs; break;
+    case Intrinsic::arm_neon_vqshiftnu:
+      VShiftOpc = ARMISD::VQSHRNu; break;
+    case Intrinsic::arm_neon_vqshiftnsu:
+      VShiftOpc = ARMISD::VQSHRNsu; break;
+    case Intrinsic::arm_neon_vqrshiftns:
+      VShiftOpc = ARMISD::VQRSHRNs; break;
+    case Intrinsic::arm_neon_vqrshiftnu:
+      VShiftOpc = ARMISD::VQRSHRNu; break;
+    case Intrinsic::arm_neon_vqrshiftnsu:
+      VShiftOpc = ARMISD::VQRSHRNsu; break;
+    }
+
+    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+  }
+
+  case Intrinsic::arm_neon_vshiftins: {
+    EVT VT = N->getOperand(1).getValueType();
+    int64_t Cnt;
+    unsigned VShiftOpc = 0;
+
+    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
+      VShiftOpc = ARMISD::VSLI;
+    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
+      VShiftOpc = ARMISD::VSRI;
+    else {
+      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
+    }
+
+    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2),
+                       DAG.getConstant(Cnt, MVT::i32));
+  }
+
+  case Intrinsic::arm_neon_vqrshifts:
+  case Intrinsic::arm_neon_vqrshiftu:
+    // No immediate versions of these to check for.
+    break;
+  }
+
+  return SDValue();
+}
+
+/// PerformShiftCombine - Checks for immediate versions of vector shifts and
+/// lowers them.  As with the vector shift intrinsics, this is done during DAG
+/// combining instead of DAG legalizing because the build_vectors for 64-bit
+/// vector element shift counts are generally not legal, and it is hard to see
+/// their values after they get legalized to loads from a constant pool.
+static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+                                   const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+
+  // Nothing to be done for scalar shifts.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!VT.isVector() || !TLI.isTypeLegal(VT))
+    return SDValue();
+
+  assert(ST->hasNEON() && "unexpected vector shift");
+  int64_t Cnt;
+
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("unexpected shift opcode");
+
+  case ISD::SHL:
+    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, MVT::i32));
+    break;
+
+  case ISD::SRA:
+  case ISD::SRL:
+    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
+                            ARMISD::VSHRs : ARMISD::VSHRu);
+      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, MVT::i32));
+    }
+  }
+  return SDValue();
+}
+
+/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
+/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
+static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
+                                    const ARMSubtarget *ST) {
+  SDValue N0 = N->getOperand(0);
+
+  // Check for sign- and zero-extensions of vector extract operations of 8-
+  // and 16-bit vector elements.  NEON supports these directly.  They are
+  // handled during DAG combining because type legalization will promote them
+  // to 32-bit types and it is messy to recognize the operations after that.
+  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    SDValue Vec = N0.getOperand(0);
+    SDValue Lane = N0.getOperand(1);
+    EVT VT = N->getValueType(0);
+    EVT EltVT = N0.getValueType();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+    if (VT == MVT::i32 &&
+        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
+        TLI.isTypeLegal(Vec.getValueType()) &&
+        isa<ConstantSDNode>(Lane)) {
+
+      unsigned Opc = 0;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("unexpected opcode");
+      case ISD::SIGN_EXTEND:
+        Opc = ARMISD::VGETLANEs;
+        break;
+      case ISD::ZERO_EXTEND:
+      case ISD::ANY_EXTEND:
+        Opc = ARMISD::VGETLANEu;
+        break;
+      }
+      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+    }
+  }
+
+  return SDValue();
+}
+
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+                                       const ARMSubtarget *ST) {
+  // If the target supports NEON, try to use vmax/vmin instructions for f32
+  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
+  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
+  // a NaN; only do the transformation when it matches that behavior.
+
+  // For now only do this when using NEON for FP operations; if using VFP, it
+  // is not obvious that the benefit outweighs the cost of switching to the
+  // NEON pipeline.
+  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+      N->getValueType(0) != MVT::f32)
+    return SDValue();
+
+  SDValue CondLHS = N->getOperand(0);
+  SDValue CondRHS = N->getOperand(1);
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+  unsigned Opcode = 0;
+  bool IsReversed;
+  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+    IsReversed = false; // x CC y ? x : y
+  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+    IsReversed = true ; // x CC y ? y : x
+  } else {
+    return SDValue();
+  }
+
+  bool IsUnordered;
+  switch (CC) {
+  default: break;
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETULT:
+  case ISD::SETULE:
+    // If LHS is NaN, an ordered comparison will be false and the result will
+    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
+    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
+    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+      break;
+    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+    // will return -0, so vmin can only be used for unsafe math or if one of
+    // the operands is known to be nonzero.
+    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+        !UnsafeFPMath &&
+        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+      break;
+    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+    break;
+
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETGT:
+  case ISD::SETGE:
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    // If LHS is NaN, an ordered comparison will be false and the result will
+    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
+    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
+    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+      break;
+    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+    // will return +0, so vmax can only be used for unsafe math or if one of
+    // the operands is known to be nonzero.
+    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+        !UnsafeFPMath &&
+        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+      break;
+    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+    break;
+  }
+
+  if (!Opcode)
+    return SDValue();
+  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
+/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
+SDValue
+ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+  SDValue Cmp = N->getOperand(4);
+  if (Cmp.getOpcode() != ARMISD::CMPZ)
+    // Only looking at EQ and NE cases.
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LHS = Cmp.getOperand(0);
+  SDValue RHS = Cmp.getOperand(1);
+  SDValue FalseVal = N->getOperand(0);
+  SDValue TrueVal = N->getOperand(1);
+  SDValue ARMcc = N->getOperand(2);
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+  // Simplify
+  //   mov     r1, r0
+  //   cmp     r1, x
+  //   mov     r0, y
+  //   moveq   r0, x
+  // to
+  //   cmp     r0, x
+  //   movne   r0, y
+  //
+  //   mov     r1, r0
+  //   cmp     r1, x
+  //   mov     r0, x
+  //   movne   r0, y
+  // to
+  //   cmp     r0, x
+  //   movne   r0, y
+  /// FIXME: Turn this into a target neutral optimization?
+  SDValue Res;
+  if (CC == ARMCC::NE && FalseVal == RHS) {
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+                      N->getOperand(3), Cmp);
+  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
+    SDValue ARMcc;
+    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+                      N->getOperand(3), NewCmp);
+  }
+
+  if (Res.getNode()) {
+    APInt KnownZero, KnownOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+    DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+    // Capture demanded bits information that would be otherwise lost.
+    if (KnownZero == 0xfffffffe)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i1));
+    else if (KnownZero == 0xffffff00)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i8));
+    else if (KnownZero == 0xffff0000)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i16));
+  }
+
+  return Res;
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
+  case ISD::SUB:        return PerformSUBCombine(N, DCI);
+  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
+  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
+  case ISD::AND:        return PerformANDCombine(N, DCI);
+  case ARMISD::BFI:     return PerformBFICombine(N, DCI);
+  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+  case ISD::STORE:      return PerformSTORECombine(N, DCI);
+  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
+  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
+  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
+  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+  case ARMISD::VLD2DUP:
+  case ARMISD::VLD3DUP:
+  case ARMISD::VLD4DUP:
+    return CombineBaseUpdate(N, DCI);
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN:
+    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+    case Intrinsic::arm_neon_vld1:
+    case Intrinsic::arm_neon_vld2:
+    case Intrinsic::arm_neon_vld3:
+    case Intrinsic::arm_neon_vld4:
+    case Intrinsic::arm_neon_vld2lane:
+    case Intrinsic::arm_neon_vld3lane:
+    case Intrinsic::arm_neon_vld4lane:
+    case Intrinsic::arm_neon_vst1:
+    case Intrinsic::arm_neon_vst2:
+    case Intrinsic::arm_neon_vst3:
+    case Intrinsic::arm_neon_vst4:
+    case Intrinsic::arm_neon_vst2lane:
+    case Intrinsic::arm_neon_vst3lane:
+    case Intrinsic::arm_neon_vst4lane:
+      return CombineBaseUpdate(N, DCI);
+    default: break;
+    }
+    break;
+  }
+  return SDValue();
+}
+
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+                                                          EVT VT) const {
+  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+  if (!Subtarget->allowsUnalignedMem())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return false;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    return true;
+  // FIXME: VLD1 etc with standard alignment is legal.
+  }
+}
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+  if (V < 0)
+    return false;
+
+  unsigned Scale = 1;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+    // Scale == 1;
+    break;
+  case MVT::i16:
+    // Scale == 2;
+    Scale = 2;
+    break;
+  case MVT::i32:
+    // Scale == 4;
+    Scale = 4;
+    break;
+  }
+
+  if ((V & (Scale - 1)) != 0)
+    return false;
+  V /= Scale;
+  return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+                                      const ARMSubtarget *Subtarget) {
+  bool isNeg = false;
+  if (V < 0) {
+    isNeg = true;
+    V = - V;
+  }
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    // + imm12 or - imm8
+    if (isNeg)
+      return V == (V & ((1LL << 8) - 1));
+    return V == (V & ((1LL << 12) - 1));
+  case MVT::f32:
+  case MVT::f64:
+    // Same as ARM mode. FIXME: NEON?
+    if (!Subtarget->hasVFP2())
+      return false;
+    if ((V & 3) != 0)
+      return false;
+    V >>= 2;
+    return V == (V & ((1LL << 8) - 1));
+  }
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
+                                    const ARMSubtarget *Subtarget) {
+  if (V == 0)
+    return true;
+
+  if (!VT.isSimple())
+    return false;
+
+  if (Subtarget->isThumb1Only())
+    return isLegalT1AddressImmediate(V, VT);
+  else if (Subtarget->isThumb2())
+    return isLegalT2AddressImmediate(V, VT, Subtarget);
+
+  // ARM mode.
+  if (V < 0)
+    V = - V;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i32:
+    // +- imm12
+    return V == (V & ((1LL << 12) - 1));
+  case MVT::i16:
+    // +- imm8
+    return V == (V & ((1LL << 8) - 1));
+  case MVT::f32:
+  case MVT::f64:
+    if (!Subtarget->hasVFP2()) // FIXME: NEON?
+      return false;
+    if ((V & 3) != 0)
+      return false;
+    V >>= 2;
+    return V == (V & ((1LL << 8) - 1));
+  }
+}
+
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+                                                      EVT VT) const {
+  int Scale = AM.Scale;
+  if (Scale < 0)
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    if (Scale == 1)
+      return true;
+    // r + r << imm
+    Scale = Scale & ~1;
+    return Scale == 2 || Scale == 4 || Scale == 8;
+  case MVT::i64:
+    // r + r
+    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+      return true;
+    return false;
+  case MVT::isVoid:
+    // Note, we allow "void" uses (basically, uses that aren't loads or
+    // stores), because arm allows folding a scale into many arithmetic
+    // operations.  This should be made more precise and revisited later.
+
+    // Allow r << imm, but the imm has to be a multiple of two.
+    if (Scale & 1) return false;
+    return isPowerOf2_32(Scale);
+  }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  EVT VT = getValueType(Ty, true);
+  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+    return false;
+
+  // Can never fold addr of global into load/store.
+  if (AM.BaseGV)
+    return false;
+
+  switch (AM.Scale) {
+  case 0:  // no scale reg, must be "r+i" or "r", or "i".
+    break;
+  case 1:
+    if (Subtarget->isThumb1Only())
+      return false;
+    // FALL THROUGH.
+  default:
+    // ARM doesn't support any R+R*scale+imm addr modes.
+    if (AM.BaseOffs)
+      return false;
+
+    if (!VT.isSimple())
+      return false;
+
+    if (Subtarget->isThumb2())
+      return isLegalT2ScaledAddressingMode(AM, VT);
+
+    int Scale = AM.Scale;
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return false;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i32:
+      if (Scale < 0) Scale = -Scale;
+      if (Scale == 1)
+        return true;
+      // r + r << imm
+      return isPowerOf2_32(Scale & ~1);
+    case MVT::i16:
+    case MVT::i64:
+      // r + r
+      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+        return true;
+      return false;
+
+    case MVT::isVoid:
+      // Note, we allow "void" uses (basically, uses that aren't loads or
+      // stores), because arm allows folding a scale into many arithmetic
+      // operations.  This should be made more precise and revisited later.
+
+      // Allow r << imm, but the imm has to be a multiple of two.
+      if (Scale & 1) return false;
+      return isPowerOf2_32(Scale);
+    }
+    break;
+  }
+  return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  if (!Subtarget->isThumb())
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  if (Subtarget->isThumb2())
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
+  return Imm >= 0 && Imm <= 255;
+}
+
+/// isLegalAddImmediate - Return true if the specified immediate is legal
+/// add immediate, that is the target has add instructions which can add
+/// a register with the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+  return ARM_AM::getSOImmVal(Imm) != -1;
+}
+
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
+                                      bool isSEXTLoad, SDValue &Base,
+                                      SDValue &Offset, bool &isInc,
+                                      SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+    return false;
+
+  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+    // AddressingMode 3
+    Base = Ptr->getOperand(0);
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -256) {
+        assert(Ptr->getOpcode() == ISD::ADD);
+        isInc = false;
+        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        return true;
+      }
+    }
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Offset = Ptr->getOperand(1);
+    return true;
+  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+    // AddressingMode 2
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -0x1000) {
+        assert(Ptr->getOpcode() == ISD::ADD);
+        isInc = false;
+        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        Base = Ptr->getOperand(0);
+        return true;
+      }
+    }
+
+    if (Ptr->getOpcode() == ISD::ADD) {
+      isInc = true;
+      ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+      if (ShOpcVal != ARM_AM::no_shift) {
+        Base = Ptr->getOperand(1);
+        Offset = Ptr->getOperand(0);
+      } else {
+        Base = Ptr->getOperand(0);
+        Offset = Ptr->getOperand(1);
+      }
+      return true;
+    }
+
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Base = Ptr->getOperand(0);
+    Offset = Ptr->getOperand(1);
+    return true;
+  }
+
+  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
+  return false;
+}
+
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
+                                     bool isSEXTLoad, SDValue &Base,
+                                     SDValue &Offset, bool &isInc,
+                                     SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+    return false;
+
+  Base = Ptr->getOperand(0);
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+      assert(Ptr->getOpcode() == ISD::ADD);
+      isInc = false;
+      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+      return true;
+    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+      isInc = Ptr->getOpcode() == ISD::ADD;
+      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                             SDValue &Offset,
+                                             ISD::MemIndexedMode &AM,
+                                             SelectionDAG &DAG) const {
+  if (Subtarget->isThumb1Only())
+    return false;
+
+  EVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    VT  = LD->getMemoryVT();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    VT  = ST->getMemoryVT();
+  } else
+    return false;
+
+  bool isInc;
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                       Offset, isInc, DAG);
+  else
+    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                        Offset, isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+  return true;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                   SDValue &Base,
+                                                   SDValue &Offset,
+                                                   ISD::MemIndexedMode &AM,
+                                                   SelectionDAG &DAG) const {
+  if (Subtarget->isThumb1Only())
+    return false;
+
+  EVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT  = LD->getMemoryVT();
+    Ptr = LD->getBasePtr();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT  = ST->getMemoryVT();
+    Ptr = ST->getBasePtr();
+  } else
+    return false;
+
+  bool isInc;
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                       isInc, DAG);
+  else
+    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  if (Ptr != Base) {
+    // Swap base ptr and offset to catch more post-index load / store when
+    // it's legal. In Thumb2 mode, offset must be an immediate.
+    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+        !Subtarget->isThumb2())
+      std::swap(Base, Offset);
+
+    // Post-indexed load / store update the base pointer.
+    if (Ptr != Base)
+      return false;
+  }
+
+  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+  return true;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                       const APInt &Mask,
+                                                       APInt &KnownZero,
+                                                       APInt &KnownOne,
+                                                       const SelectionDAG &DAG,
+                                                       unsigned Depth) const {
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  switch (Op.getOpcode()) {
+  default: break;
+  case ARMISD::CMOV: {
+    // Bits are known zero/one if known on the LHS and RHS.
+    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    if (KnownZero == 0 && KnownOne == 0) return;
+
+    APInt KnownZeroRHS, KnownOneRHS;
+    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+                          KnownZeroRHS, KnownOneRHS, Depth+1);
+    KnownZero &= KnownZeroRHS;
+    KnownOne  &= KnownOneRHS;
+    return;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  // Looking for "rev" which is V6+.
+  if (!Subtarget->hasV6Ops())
+    return false;
+
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+  std::string AsmStr = IA->getAsmString();
+  SmallVector<StringRef, 4> AsmPieces;
+  SplitString(AsmStr, AsmPieces, ";\n");
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t,");
+
+    // rev $0, $1
+    if (AsmPieces.size() == 3 &&
+        AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+        IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+      const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+      if (Ty && Ty->getBitWidth() == 32)
+        return IntrinsicLowering::LowerToByteSwap(CI);
+    }
+    break;
+  }
+
+  return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:  break;
+    case 'l': return C_RegisterClass;
+    case 'w': return C_RegisterClass;
+    case 'h': return C_RegisterClass;
+    case 'x': return C_RegisterClass;
+    case 't': return C_RegisterClass;
+    case 'j': return C_Other; // Constant for movw.
+    }
+  } else if (Constraint.size() == 2) {
+    switch (Constraint[0]) {
+    default: break;
+    // All 'U+' constraints are addresses.
+    case 'U': return C_Memory;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'l':
+    if (type->isIntegerTy()) {
+      if (Subtarget->isThumb())
+        weight = CW_SpecificReg;
+      else
+        weight = CW_Register;
+    }
+    break;
+  case 'w':
+    if (type->isFloatingPointTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC ARM Constraint Letters
+    switch (Constraint[0]) {
+    case 'l': // Low regs or general regs.
+      if (Subtarget->isThumb())
+        return RCPair(0U, ARM::tGPRRegisterClass);
+      else
+        return RCPair(0U, ARM::GPRRegisterClass);
+    case 'h': // High regs or no regs.
+      if (Subtarget->isThumb())
+	return RCPair(0U, ARM::hGPRRegisterClass);
+      break;
+    case 'r':
+      return RCPair(0U, ARM::GPRRegisterClass);
+    case 'w':
+      if (VT == MVT::f32)
+        return RCPair(0U, ARM::SPRRegisterClass);
+      if (VT.getSizeInBits() == 64)
+        return RCPair(0U, ARM::DPRRegisterClass);
+      if (VT.getSizeInBits() == 128)
+        return RCPair(0U, ARM::QPRRegisterClass);
+      break;
+    case 'x':
+      if (VT == MVT::f32)
+	return RCPair(0U, ARM::SPR_8RegisterClass);
+      if (VT.getSizeInBits() == 64)
+	return RCPair(0U, ARM::DPR_8RegisterClass);
+      if (VT.getSizeInBits() == 128)
+	return RCPair(0U, ARM::QPR_8RegisterClass);
+      break;
+    case 't':
+      if (VT == MVT::f32)
+	return RCPair(0U, ARM::SPRRegisterClass);
+      break;
+    }
+  }
+  if (StringRef("{cc}").equals_lower(Constraint))
+    return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     std::string &Constraint,
+                                                     std::vector<SDValue>&Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+
+  // Currently only support length 1 constraints.
+  if (Constraint.length() != 1) return;
+
+  char ConstraintLetter = Constraint[0];
+  switch (ConstraintLetter) {
+  default: break;
+  case 'j':
+  case 'I': case 'J': case 'K': case 'L':
+  case 'M': case 'N': case 'O':
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    if (!C)
+      return;
+
+    int64_t CVal64 = C->getSExtValue();
+    int CVal = (int) CVal64;
+    // None of these constraints allow values larger than 32 bits.  Check
+    // that the value fits in an int.
+    if (CVal != CVal64)
+      return;
+
+    switch (ConstraintLetter) {
+      case 'j':
+	// Constant suitable for movw, must be between 0 and
+	// 65535.
+	if (Subtarget->hasV6T2Ops())
+	  if (CVal >= 0 && CVal <= 65535)
+	    break;
+	return;
+      case 'I':
+        if (Subtarget->isThumb1Only()) {
+          // This must be a constant between 0 and 255, for ADD
+          // immediates.
+          if (CVal >= 0 && CVal <= 255)
+            break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant that can be used as an immediate value in a
+          // data-processing instruction.
+          if (ARM_AM::getT2SOImmVal(CVal) != -1)
+            break;
+        } else {
+          // A constant that can be used as an immediate value in a
+          // data-processing instruction.
+          if (ARM_AM::getSOImmVal(CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'J':
+        if (Subtarget->isThumb()) {  // FIXME thumb2
+          // This must be a constant between -255 and -1, for negated ADD
+          // immediates. This can be used in GCC with an "n" modifier that
+          // prints the negated value, for use with SUB instructions. It is
+          // not useful otherwise but is implemented for compatibility.
+          if (CVal >= -255 && CVal <= -1)
+            break;
+        } else {
+          // This must be a constant between -4095 and 4095. It is not clear
+          // what this constraint is intended for. Implemented for
+          // compatibility with GCC.
+          if (CVal >= -4095 && CVal <= 4095)
+            break;
+        }
+        return;
+
+      case 'K':
+        if (Subtarget->isThumb1Only()) {
+          // A 32-bit value where only one byte has a nonzero value. Exclude
+          // zero to match GCC. This constraint is used by GCC internally for
+          // constants that can be loaded with a move/shift combination.
+          // It is not useful otherwise but is implemented for compatibility.
+          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+            break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant whose bitwise inverse can be used as an immediate
+          // value in a data-processing instruction. This can be used in GCC
+          // with a "B" modifier that prints the inverted value, for use with
+          // BIC and MVN instructions. It is not useful otherwise but is
+          // implemented for compatibility.
+          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+            break;
+        } else {
+          // A constant whose bitwise inverse can be used as an immediate
+          // value in a data-processing instruction. This can be used in GCC
+          // with a "B" modifier that prints the inverted value, for use with
+          // BIC and MVN instructions. It is not useful otherwise but is
+          // implemented for compatibility.
+          if (ARM_AM::getSOImmVal(~CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'L':
+        if (Subtarget->isThumb1Only()) {
+          // This must be a constant between -7 and 7,
+          // for 3-operand ADD/SUB immediate instructions.
+          if (CVal >= -7 && CVal < 7)
+            break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant whose negation can be used as an immediate value in a
+          // data-processing instruction. This can be used in GCC with an "n"
+          // modifier that prints the negated value, for use with SUB
+          // instructions. It is not useful otherwise but is implemented for
+          // compatibility.
+          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+            break;
+        } else {
+          // A constant whose negation can be used as an immediate value in a
+          // data-processing instruction. This can be used in GCC with an "n"
+          // modifier that prints the negated value, for use with SUB
+          // instructions. It is not useful otherwise but is implemented for
+          // compatibility.
+          if (ARM_AM::getSOImmVal(-CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'M':
+        if (Subtarget->isThumb()) { // FIXME thumb2
+          // This must be a multiple of 4 between 0 and 1020, for
+          // ADD sp + immediate.
+          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+            break;
+        } else {
+          // A power of two or a constant between 0 and 32.  This is used in
+          // GCC for the shift amount on shifted register operands, but it is
+          // useful in general for any shift amounts.
+          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+            break;
+        }
+        return;
+
+      case 'N':
+        if (Subtarget->isThumb()) {  // FIXME thumb2
+          // This must be a constant between 0 and 31, for shift amounts.
+          if (CVal >= 0 && CVal <= 31)
+            break;
+        }
+        return;
+
+      case 'O':
+        if (Subtarget->isThumb()) {  // FIXME thumb2
+          // This must be a multiple of 4 between -508 and 508, for
+          // ADD/SUB sp = sp + immediate.
+          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+            break;
+        }
+        return;
+    }
+    Result = DAG.getTargetConstant(CVal, Op.getValueType());
+    break;
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The ARM target isn't yet aware of offsets.
+  return false;
+}
+
+int ARM::getVFPf32Imm(const APFloat &FPImm) {
+  APInt Imm = FPImm.bitcastToAPInt();
+  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
+  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
+
+  // We can handle 4 bits of mantissa.
+  // mantissa = (16+UInt(e:f:g:h))/16.
+  if (Mantissa & 0x7ffff)
+    return -1;
+  Mantissa >>= 19;
+  if ((Mantissa & 0xf) != Mantissa)
+    return -1;
+
+  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+  if (Exp < -3 || Exp > 4)
+    return -1;
+  Exp = ((Exp+3) & 0x7) ^ 4;
+
+  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+int ARM::getVFPf64Imm(const APFloat &FPImm) {
+  APInt Imm = FPImm.bitcastToAPInt();
+  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
+  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
+
+  // We can handle 4 bits of mantissa.
+  // mantissa = (16+UInt(e:f:g:h))/16.
+  if (Mantissa & 0xffffffffffffLL)
+    return -1;
+  Mantissa >>= 48;
+  if ((Mantissa & 0xf) != Mantissa)
+    return -1;
+
+  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+  if (Exp < -3 || Exp > 4)
+    return -1;
+  Exp = ((Exp+3) & 0x7) ^ 4;
+
+  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+  if (v == 0xffffffff)
+    return 0;
+  // there can be 1's on either or both "outsides", all the "inside"
+  // bits must be 0's
+  unsigned int lsb = 0, msb = 31;
+  while (v & (1 << msb)) --msb;
+  while (v & (1 << lsb)) ++lsb;
+  for (unsigned int i = lsb; i <= msb; ++i) {
+    if (v & (1 << i))
+      return 0;
+  }
+  return 1;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (!Subtarget->hasVFP3())
+    return false;
+  if (VT == MVT::f32)
+    return ARM::getVFPf32Imm(Imm) != -1;
+  if (VT == MVT::f64)
+    return ARM::getVFPf64Imm(Imm) != -1;
+  return false;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                           const CallInst &I,
+                                           unsigned Intrinsic) const {
+  switch (Intrinsic) {
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    // Conservatively set memVT to the entire set of vectors loaded.
+    uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+    Info.vol = false; // volatile loads with NEON intrinsics not supported
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    Info.opc = ISD::INTRINSIC_VOID;
+    // Conservatively set memVT to the entire set of vectors stored.
+    unsigned NumElts = 0;
+    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+      const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+      if (!ArgTy->isVectorTy())
+        break;
+      NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+    }
+    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+    Info.vol = false; // volatile stores with NEON intrinsics not supported
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
+  case Intrinsic::arm_strexd: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::i64;
+    Info.ptrVal = I.getArgOperand(2);
+    Info.offset = 0;
+    Info.align = 8;
+    Info.vol = true;
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
+  case Intrinsic::arm_ldrexd: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::i64;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = 8;
+    Info.vol = true;
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  default:
+    break;
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 000000000000..980fb404887e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,509 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+  class ARMConstantPoolValue;
+
+  namespace ARMISD {
+    // ARM Specific DAG Nodes
+    enum NodeType {
+      // Start the numbering where the builtin ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      Wrapper,      // Wrapper - A wrapper node for TargetConstantPool,
+                    // TargetExternalSymbol, and TargetGlobalAddress.
+      WrapperDYN,   // WrapperDYN - A wrapper node for TargetGlobalAddress in
+                    // DYN mode.
+      WrapperPIC,   // WrapperPIC - A wrapper node for TargetGlobalAddress in
+                    // PIC mode.
+      WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
+
+      CALL,         // Function call.
+      CALL_PRED,    // Function call that's predicable.
+      CALL_NOLINK,  // Function call with branch not branch-and-link.
+      tCALL,        // Thumb function call.
+      BRCOND,       // Conditional branch.
+      BR_JT,        // Jumptable branch.
+      BR2_JT,       // Jumptable branch (2 level - jumptable entry is a jump).
+      RET_FLAG,     // Return with a flag operand.
+
+      PIC_ADD,      // Add with a PC operand and a PIC label.
+
+      CMP,          // ARM compare instructions.
+      CMPZ,         // ARM compare that sets only Z flag.
+      CMPFP,        // ARM VFP compare instruction, sets FPSCR.
+      CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
+      FMSTAT,       // ARM fmstat instruction.
+      CMOV,         // ARM conditional move instructions.
+
+      BCC_i64,
+
+      RBIT,         // ARM bitreverse instruction
+
+      FTOSI,        // FP to sint within a FP register.
+      FTOUI,        // FP to uint within a FP register.
+      SITOF,        // sint to FP within a FP register.
+      UITOF,        // uint to FP within a FP register.
+
+      SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+      SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+      RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
+
+      VMOVRRD,      // double to two gprs.
+      VMOVDRR,      // Two gprs to double.
+
+      EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
+      EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
+      EH_SJLJ_DISPATCHSETUP,  // SjLj exception handling dispatch setup.
+
+      TC_RETURN,    // Tail call return pseudo.
+
+      THREAD_POINTER,
+
+      DYN_ALLOC,    // Dynamic allocation on the stack.
+
+      MEMBARRIER,   // Memory barrier (DMB)
+      MEMBARRIER_MCR, // Memory barrier (MCR)
+
+      PRELOAD,      // Preload
+
+      VCEQ,         // Vector compare equal.
+      VCEQZ,        // Vector compare equal to zero.
+      VCGE,         // Vector compare greater than or equal.
+      VCGEZ,        // Vector compare greater than or equal to zero.
+      VCLEZ,        // Vector compare less than or equal to zero.
+      VCGEU,        // Vector compare unsigned greater than or equal.
+      VCGT,         // Vector compare greater than.
+      VCGTZ,        // Vector compare greater than zero.
+      VCLTZ,        // Vector compare less than zero.
+      VCGTU,        // Vector compare unsigned greater than.
+      VTST,         // Vector test bits.
+
+      // Vector shift by immediate:
+      VSHL,         // ...left
+      VSHRs,        // ...right (signed)
+      VSHRu,        // ...right (unsigned)
+      VSHLLs,       // ...left long (signed)
+      VSHLLu,       // ...left long (unsigned)
+      VSHLLi,       // ...left long (with maximum shift count)
+      VSHRN,        // ...right narrow
+
+      // Vector rounding shift by immediate:
+      VRSHRs,       // ...right (signed)
+      VRSHRu,       // ...right (unsigned)
+      VRSHRN,       // ...right narrow
+
+      // Vector saturating shift by immediate:
+      VQSHLs,       // ...left (signed)
+      VQSHLu,       // ...left (unsigned)
+      VQSHLsu,      // ...left (signed to unsigned)
+      VQSHRNs,      // ...right narrow (signed)
+      VQSHRNu,      // ...right narrow (unsigned)
+      VQSHRNsu,     // ...right narrow (signed to unsigned)
+
+      // Vector saturating rounding shift by immediate:
+      VQRSHRNs,     // ...right narrow (signed)
+      VQRSHRNu,     // ...right narrow (unsigned)
+      VQRSHRNsu,    // ...right narrow (signed to unsigned)
+
+      // Vector shift and insert:
+      VSLI,         // ...left
+      VSRI,         // ...right
+
+      // Vector get lane (VMOV scalar to ARM core register)
+      // (These are used for 8- and 16-bit element types only.)
+      VGETLANEu,    // zero-extend vector extract element
+      VGETLANEs,    // sign-extend vector extract element
+
+      // Vector move immediate and move negated immediate:
+      VMOVIMM,
+      VMVNIMM,
+
+      // Vector duplicate:
+      VDUP,
+      VDUPLANE,
+
+      // Vector shuffles:
+      VEXT,         // extract
+      VREV64,       // reverse elements within 64-bit doublewords
+      VREV32,       // reverse elements within 32-bit words
+      VREV16,       // reverse elements within 16-bit halfwords
+      VZIP,         // zip (interleave)
+      VUZP,         // unzip (deinterleave)
+      VTRN,         // transpose
+      VTBL1,        // 1-register shuffle with mask
+      VTBL2,        // 2-register shuffle with mask
+
+      // Vector multiply long:
+      VMULLs,       // ...signed
+      VMULLu,       // ...unsigned
+
+      // Operands of the standard BUILD_VECTOR node are not legalized, which
+      // is fine if BUILD_VECTORs are always lowered to shuffles or other
+      // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+      // operands need to be legalized.  Define an ARM-specific version of
+      // BUILD_VECTOR for this purpose.
+      BUILD_VECTOR,
+
+      // Floating-point max and min:
+      FMAX,
+      FMIN,
+
+      // Bit-field insert
+      BFI,
+
+      // Vector OR with immediate
+      VORRIMM,
+      // Vector AND with NOT of immediate
+      VBICIMM,
+
+      // Vector bitwise select
+      VBSL,
+
+      // Vector load N-element structure to all lanes:
+      VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      VLD3DUP,
+      VLD4DUP,
+
+      // NEON loads with post-increment base updates:
+      VLD1_UPD,
+      VLD2_UPD,
+      VLD3_UPD,
+      VLD4_UPD,
+      VLD2LN_UPD,
+      VLD3LN_UPD,
+      VLD4LN_UPD,
+      VLD2DUP_UPD,
+      VLD3DUP_UPD,
+      VLD4DUP_UPD,
+
+      // NEON stores with post-increment base updates:
+      VST1_UPD,
+      VST2_UPD,
+      VST3_UPD,
+      VST4_UPD,
+      VST2LN_UPD,
+      VST3LN_UPD,
+      VST4LN_UPD
+    };
+  }
+
+  /// Define some predicates that are used for node matching.
+  namespace ARM {
+    /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
+    /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
+    /// instruction, returns its 8-bit integer representation. Otherwise,
+    /// returns -1.
+    int getVFPf32Imm(const APFloat &FPImm);
+    int getVFPf64Imm(const APFloat &FPImm);
+    bool isBitFieldInvertedMask(unsigned v);
+  }
+
+  //===--------------------------------------------------------------------===//
+  //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+  class ARMTargetLowering : public TargetLowering {
+  public:
+    explicit ARMTargetLowering(TargetMachine &TM);
+
+    virtual unsigned getJumpTableEncoding(void) const;
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+    bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+
+    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// icmp immediate, that is the target has icmp instructions which can
+    /// compare a register against the immediate without having to materialize
+    /// the immediate into a register.
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+    /// isLegalAddImmediate - Return true if the specified immediate is legal
+    /// add immediate, that is the target has add instructions which can
+    /// add a register and the immediate without having to materialize
+    /// the immediate into a register.
+    virtual bool isLegalAddImmediate(int64_t Imm) const;
+
+    /// getPreIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if the node's address
+    /// can be legally represented as pre-indexed load / store address.
+    virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                           SDValue &Offset,
+                                           ISD::MemIndexedMode &AM,
+                                           SelectionDAG &DAG) const;
+
+    /// getPostIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if this node can be
+    /// combined with a load / store to form a post-indexed load / store.
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base, SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth) const;
+
+
+    virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+
+    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
+    /// true it means one of the asm constraint of the inline asm instruction
+    /// being processed is 'm'.
+    virtual void LowerAsmOperandForConstraint(SDValue Op,
+                                              std::string &Constraint,
+                                              std::vector<SDValue> &Ops,
+                                              SelectionDAG &DAG) const;
+
+    const ARMSubtarget* getSubtarget() const {
+      return Subtarget;
+    }
+
+    /// getRegClassFor - Return the register class that should be used for the
+    /// specified value type.
+    virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
+    /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+    /// be used for loads / stores from the global.
+    virtual unsigned getMaximalGlobalOffset() const;
+
+    /// createFastISel - This method returns a target specific FastISel object,
+    /// or null if the target does not support "fast" ISel.
+    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+    Sched::Preference getSchedulingPreference(SDNode *N) const;
+
+    bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
+    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+    virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                    const CallInst &I,
+                                    unsigned Intrinsic) const;
+  protected:
+    std::pair<const TargetRegisterClass*, uint8_t>
+    findRepresentativeClass(EVT VT) const;
+
+  private:
+    /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const ARMSubtarget *Subtarget;
+
+    const TargetRegisterInfo *RegInfo;
+
+    const InstrItineraryData *Itins;
+
+    /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
+    ///
+    unsigned ARMPCLabelIndex;
+
+    void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
+    void addDRTypeForNEON(EVT VT);
+    void addQRTypeForNEON(EVT VT);
+
+    typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+    void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+                          SDValue Chain, SDValue &Arg,
+                          RegsToPassVector &RegsToPass,
+                          CCValAssign &VA, CCValAssign &NextVA,
+                          SDValue &StackPtr,
+                          SmallVector<SDValue, 8> &MemOpChains,
+                          ISD::ArgFlagsTy Flags) const;
+    SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+                                 SDValue &Root, SelectionDAG &DAG,
+                                 DebugLoc dl) const;
+
+    CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+                                  bool isVarArg) const;
+    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,
+                             ISD::ArgFlagsTy Flags) const;
+    SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+                                    const ARMSubtarget *Subtarget) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+                                            SelectionDAG &DAG) const;
+    SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+                                   SelectionDAG &DAG) const;
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+                              const ARMSubtarget *ST) const;
+
+    SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                              DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
+      const;
+
+    void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+                        unsigned &VARegSize, unsigned &VARegSaveSize) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    /// HandleByVal - Target-specific cleanup for ByVal support.
+    virtual void HandleByVal(CCState *, unsigned &) const;
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                           CallingConv::ID CalleeCC,
+                                           bool isVarArg,
+                                           bool isCalleeStructRet,
+                                           bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                           SelectionDAG& DAG) const;
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+    virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+    SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                      SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+                      SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
+
+    SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
+
+    MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+                                         MachineBasicBlock *BB,
+                                         unsigned Size) const;
+    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+                                        MachineBasicBlock *BB,
+                                        unsigned Size,
+                                        unsigned BinOpcode) const;
+    MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
+                                               MachineBasicBlock *BB,
+                                               unsigned Size,
+                                               bool signExtend,
+                                               ARMCC::CondCodes Cond) const;
+
+    bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+  };
+
+  enum NEONModImmType {
+    VMOVModImm,
+    VMVNModImm,
+    OtherModImm
+  };
+
+
+  namespace ARM {
+    FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+  }
+}
+
+#endif  // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 000000000000..3ccf22f80b7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,1930 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+  bits<6> Value = val;
+}
+
+def Pseudo        : Format<0>;
+def MulFrm        : Format<1>;
+def BrFrm         : Format<2>;
+def BrMiscFrm     : Format<3>;
+
+def DPFrm         : Format<4>;
+def DPSoRegFrm    : Format<5>;
+
+def LdFrm         : Format<6>;
+def StFrm         : Format<7>;
+def LdMiscFrm     : Format<8>;
+def StMiscFrm     : Format<9>;
+def LdStMulFrm    : Format<10>;
+
+def LdStExFrm     : Format<11>;
+
+def ArithMiscFrm  : Format<12>;
+def SatFrm        : Format<13>;
+def ExtFrm        : Format<14>;
+
+def VFPUnaryFrm   : Format<15>;
+def VFPBinaryFrm  : Format<16>;
+def VFPConv1Frm   : Format<17>;
+def VFPConv2Frm   : Format<18>;
+def VFPConv3Frm   : Format<19>;
+def VFPConv4Frm   : Format<20>;
+def VFPConv5Frm   : Format<21>;
+def VFPLdStFrm    : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm    : Format<24>;
+
+def ThumbFrm      : Format<25>;
+def MiscFrm       : Format<26>;
+
+def NGetLnFrm     : Format<27>;
+def NSetLnFrm     : Format<28>;
+def NDupFrm       : Format<29>;
+def NLdStFrm      : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm      : Format<32>;
+def NVCVTFrm      : Format<33>;
+def NVDupLnFrm    : Format<34>;
+def N2RegVShLFrm  : Format<35>;
+def N2RegVShRFrm  : Format<36>;
+def N3RegFrm      : Format<37>;
+def N3RegVShFrm   : Format<38>;
+def NVExtFrm      : Format<39>;
+def NVMulSLFrm    : Format<40>;
+def NVTBLFrm      : Format<41>;
+
+// Misc flags.
+
+// The instruction has an Rn register operand.
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP    { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags.  These need to match ARMBaseInstrInfo.h.
+//
+
+// FIXME: Once the JIT is MC-ized, these can go away.
+// Addressing mode.
+class AddrMode<bits<5> val> {
+  bits<5> Value = val;
+}
+def AddrModeNone    : AddrMode<0>;
+def AddrMode1       : AddrMode<1>;
+def AddrMode2       : AddrMode<2>;
+def AddrMode3       : AddrMode<3>;
+def AddrMode4       : AddrMode<4>;
+def AddrMode5       : AddrMode<5>;
+def AddrMode6       : AddrMode<6>;
+def AddrModeT1_1    : AddrMode<7>;
+def AddrModeT1_2    : AddrMode<8>;
+def AddrModeT1_4    : AddrMode<9>;
+def AddrModeT1_s    : AddrMode<10>;
+def AddrModeT2_i12  : AddrMode<11>;
+def AddrModeT2_i8   : AddrMode<12>;
+def AddrModeT2_so   : AddrMode<13>;
+def AddrModeT2_pc   : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12    : AddrMode<16>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+  bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre  : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+def IndexModeUpd  : IndexMode<3>;
+
+// Instruction execution domain.
+class Domain<bits<3> val> {
+  bits<3> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain     : Domain<1>; // Instructions in VFP domain only
+def NeonDomain    : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+
+//===----------------------------------------------------------------------===//
+// ARM special operands.
+//
+
+def CondCodeOperand : AsmOperandClass {
+  let Name = "CondCode";
+  let SuperClasses = [];
+}
+
+def CCOutOperand : AsmOperandClass {
+  let Name = "CCOut";
+  let SuperClasses = [];
+}
+
+def MemBarrierOptOperand : AsmOperandClass {
+  let Name = "MemBarrierOpt";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMemBarrierOptOperand";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+  let Name = "ProcIFlags";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseProcIFlagsOperand";
+}
+
+def MSRMaskOperand : AsmOperandClass {
+  let Name = "MSRMask";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMSRMaskOperand";
+}
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+  let PrintMethod = "printCPSIMod";
+}
+
+def iflags_op : Operand<i32> {
+  let PrintMethod = "printCPSIFlag";
+  let ParserMatchClass = ProcIFlagsOperand;
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+                                     (ops (i32 14), (i32 zero_reg))> {
+  let PrintMethod = "printPredicateOperand";
+  let ParserMatchClass = CondCodeOperand;
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+  let EncoderMethod = "getCCOutOpValue";
+  let PrintMethod = "printSBitModifierOperand";
+  let ParserMatchClass = CCOutOperand;
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+  let EncoderMethod = "getCCOutOpValue";
+  let PrintMethod = "printSBitModifierOperand";
+  let ParserMatchClass = CCOutOperand;
+}
+
+// ARM special operands for disassembly only.
+//
+def setend_op : Operand<i32> {
+  let PrintMethod = "printSetendOperand";
+}
+
+def msr_mask : Operand<i32> {
+  let PrintMethod = "printMSRMaskOperand";
+  let ParserMatchClass = MSRMaskOperand;
+}
+
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+//    Offset    Encoding
+//     8        imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+//     16       imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+//     32       imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+//     64       64 - <imm> is encoded in imm6<5:0>
+def shr_imm8  : Operand<i32> {
+  let EncoderMethod = "getShiftRight8Imm";
+}
+def shr_imm16 : Operand<i32> {
+  let EncoderMethod = "getShiftRight16Imm";
+}
+def shr_imm32 : Operand<i32> {
+  let EncoderMethod = "getShiftRight32Imm";
+}
+def shr_imm64 : Operand<i32> {
+  let EncoderMethod = "getShiftRight64Imm";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+class InstTemplate<AddrMode am, int sz, IndexMode im,
+                   Format f, Domain d, string cstr, InstrItinClass itin>
+  : Instruction {
+  let Namespace = "ARM";
+
+  AddrMode AM = am;
+  int Size = sz;
+  IndexMode IM = im;
+  bits<2> IndexModeBits = IM.Value;
+  Format F = f;
+  bits<6> Form = F.Value;
+  Domain D = d;
+  bit isUnaryDataProc = 0;
+  bit canXformTo16Bit = 0;
+
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+  // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
+  let TSFlags{4-0}   = AM.Value;
+  let TSFlags{6-5}   = IndexModeBits;
+  let TSFlags{12-7} = Form;
+  let TSFlags{13}    = isUnaryDataProc;
+  let TSFlags{14}    = canXformTo16Bit;
+  let TSFlags{17-15} = D.Value;
+
+  let Constraints = cstr;
+  let Itinerary = itin;
+}
+
+class Encoding {
+  field bits<32> Inst;
+}
+
+class InstARM<AddrMode am, int sz, IndexMode im,
+              Format f, Domain d, string cstr, InstrItinClass itin>
+  : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
+
+// This Encoding-less class is used by Thumb1 to specify the encoding bits later
+// on by adding flavors to specific instructions.
+class InstThumb<AddrMode am, int sz, IndexMode im,
+                Format f, Domain d, string cstr, InstrItinClass itin>
+  : InstTemplate<am, sz, im, f, d, cstr, itin>;
+
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+  : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
+                 GenericDomain, "", itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Pattern = pattern;
+  let isCodeGenOnly = 1;
+  let isPseudo = 1;
+}
+
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let Size = sz;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let Size = sz;
+  list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let Size = sz;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+class ARMPseudoExpand<dag oops, dag iops, int sz,
+                      InstrItinClass itin, list<dag> pattern,
+                      dag Result>
+  : ARMPseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
+class tPseudoExpand<dag oops, dag iops, int sz,
+                    InstrItinClass itin, list<dag> pattern,
+                    dag Result>
+  : tPseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
+class t2PseudoExpand<dag oops, dag iops, int sz,
+                    InstrItinClass itin, list<dag> pattern,
+                    dag Result>
+  : t2PseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, int sz,
+        IndexMode im, Format f, InstrItinClass itin,
+        string opc, string asm, string cstr,
+        list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, int sz,
+           IndexMode im, Format f, InstrItinClass itin,
+           string opc, string asm, string cstr,
+           list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = !strconcat(opc, asm);
+  let Pattern = pattern;
+  let isPredicable = 0;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, int sz,
+         IndexMode im, Format f, InstrItinClass itin,
+         string opc, string asm, string cstr,
+         list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  bits<4> p; // Predicate operand
+  bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+  let Inst{31-28} = p;
+  let Inst{20} = s;
+
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, int sz,
+         IndexMode im, Format f, InstrItinClass itin,
+         string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+         string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+      opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+       opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+       asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+         opc, asm, "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+      opc, asm, "", pattern> {
+  let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
+       asm, "", pattern> {
+  let Inst{27-24} = opcod;
+}
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
+       asm, "", pattern>;
+
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{27-23} = 0b00011;
+  let Inst{22-21} = opcod;
+  let Inst{20}    = 1;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+  let Inst{11-0}  = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rt;
+  bits<4> addr;
+  let Inst{27-23} = 0b00011;
+  let Inst{22-21} = opcod;
+  let Inst{20}    = 0;
+  let Inst{19-16} = addr;
+  let Inst{15-12} = Rd;
+  let Inst{11-4}  = 0b11111001;
+  let Inst{3-0}   = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+  : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> Rn;
+  let Inst{27-23} = 0b00010;
+  let Inst{22} = b;
+  let Inst{21-20} = 0b00;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+  let Inst{11-4} = 0b00001001;
+  let Inst{3-0} = Rt2;
+}
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = 0b00;
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+       opc, asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = 0b00;
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
+       asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = 0b00;
+}
+
+// loads
+
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+             Format f, InstrItinClass itin, string opc, string asm,
+             list<dag> pattern>
+  : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm,
+      "", pattern> {
+  let Inst{27-25} = op;
+  let Inst{24} = 1;  // 24 == P
+  // 23 == U
+  let Inst{22} = isByte;
+  let Inst{21} = 0;  // 21 == W
+  let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, 4, im, f, itin,
+      opc, asm, cstr, pattern> {
+  bits<4> Rt;
+  let Inst{27-26} = 0b01;
+  let Inst{24}    = isPre; // P bit
+  let Inst{22}    = isByte; // B bit
+  let Inst{21}    = isPre; // W bit
+  let Inst{20}    = isLd; // L bit
+  let Inst{15-12} = Rt;
+}
+class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM2 store w/ two operands: (GPR, am2offset)
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<14> offset;
+  bits<4> Rn;
+  let Inst{25} = offset{13};
+  let Inst{23} = offset{12};
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = offset{11-0};
+}
+// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
+// but for now use this class for STRT and STRBT.
+class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM2 store w/ two operands: (GPR, am2offset)
+  // {17-14}  Rn
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<18> addr;
+  let Inst{25} = addr{13};
+  let Inst{23} = addr{12};
+  let Inst{19-16} = addr{17-14};
+  let Inst{11-0} = addr{11-0};
+}
+
+// addrmode3 instructions
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+            InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = 1;            // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{21}    = 0;            // W bit
+  let Inst{20}    = op20;         // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, im, f, itin,
+      opc, asm, cstr, pattern> {
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = isPre;        // P bit
+  let Inst{21}    = isPre;        // W bit
+  let Inst{20}    = op20;         // L bit
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{7-4}   = op;
+}
+
+// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
+// but for now use this class for LDRSBT, LDRHT, LDSHT.
+class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+                  IndexMode im, Format f, InstrItinClass itin, string opc,
+                  string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, im, f, itin,
+      opc, asm, cstr, pattern> {
+  // {13}     1 == imm8, 0 == Rm
+  // {12-9}   Rn
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = isPre;        // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{20}    = op20;         // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+  let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3";
+}
+
+class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM3 store w/ two operands: (GPR, am3offset)
+  bits<14> offset;
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{27-25} = 0b000;
+  let Inst{23}    = offset{8};
+  let Inst{22}    = offset{9};
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = offset{7-4};  // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = offset{3-0};  // imm3_0/Rm
+}
+
+// stores
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = 1;            // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{21}    = 0;            // W bit
+  let Inst{20}    = 0;            // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+}
+
+// Pre-indexed stores
+class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
+}
+class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
+}
+
+// Post-indexed stores
+class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
+  // {13}     1 == imm8, 0 == Rm
+  // {12-9}   Rn
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
+}
+class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
+}
+
+// addrmode4 instructions
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+           string asm, string cstr, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> {
+  bits<4>  p;
+  bits<16> regs;
+  bits<4>  Rn;
+  let Inst{31-28} = p;
+  let Inst{27-25} = 0b100;
+  let Inst{22}    = 0; // S bit
+  let Inst{19-16} = Rn;
+  let Inst{15-0}  = regs;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
+  let Inst{7-4}   = 0b1001;
+  let Inst{20}    = 0; // S bit
+  let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+       opc, asm, "", pattern> {
+  let Inst{7-4}   = 0b1001;
+  let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+             InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{7-4}   = opc7_4;
+  let Inst{20}    = 1;
+  let Inst{27-21} = opcod;
+  let Inst{19-16} = Rd;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{4}     = 0;
+  let Inst{7}     = 1;
+  let Inst{20}    = 0;
+  let Inst{27-21} = opcod;
+  let Inst{6-5}   = bit6_5;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin,
+      opc, asm, "", pattern> {
+  // All AExtI instructions have Rd and Rm register operands.
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{15-12} = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{7-4}   = 0b0111;
+  let Inst{9-8}   = 0b00;
+  let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{27-20} = opcod;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rd;
+  let Inst{11-8}  = 0b1111;
+  let Inst{7-4}   = opc7_4;
+  let Inst{3-0}   = Rm;
+}
+
+// PKH instructions
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<8> sh;
+  let Inst{27-20} = opcod;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = sh{7-3};
+  let Inst{6}     = tb;
+  let Inst{5-4}   = 0b01;
+  let Inst{3-0}   = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV5T];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+// Thumb Instruction Format Definitions.
+//
+
+class ThumbI<dag oops, dag iops, AddrMode am, int sz,
+             InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb];
+}
+
+// TI - Thumb instruction.
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+          list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst",
+           pattern>;
+
+// tBL, tBX 32-bit instructions
+class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
+           dag oops, dag iops, InstrItinClass itin, string asm,
+           list<dag> pattern>
+    : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>,
+      Encoding {
+  let Inst{31-27} = opcod1;
+  let Inst{15-14} = opcod2;
+  let Inst{12}    = opcod3;
+}
+
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+           list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Thumb1 only
+class Thumb1I<dag oops, dag iops, AddrMode am, int sz,
+              InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1I<dag oops, dag iops, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag oops, dag iops, InstrItinClass itin,
+           string asm, string cstr, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, 2, itin,
+            asm, cstr, pattern>;
+
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = !con(oops, (outs s_cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+             "$Rn = $Rdn", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm,
+             "$Rn = $Rdn", pattern>;
+
+class T1pIs<dag oops, dag iops,
+            InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>;
+
+class Encoding16 : Encoding {
+  let Inst{31-16} = 0x0000;
+}
+
+// A6.2 16-bit Thumb instruction encoding
+class T1Encoding<bits<6> opcode> : Encoding16 {
+  let Inst{15-10} = opcode;
+}
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding.
+class T1General<bits<5> opcode> : Encoding16 {
+  let Inst{15-14} = 0b00;
+  let Inst{13-9} = opcode;
+}
+
+// A6.2.2 Data-processing encoding.
+class T1DataProcessing<bits<4> opcode> : Encoding16 {
+  let Inst{15-10} = 0b010000;
+  let Inst{9-6} = opcode;
+}
+
+// A6.2.3 Special data instructions and branch and exchange encoding.
+class T1Special<bits<4> opcode> : Encoding16 {
+  let Inst{15-10} = 0b010001;
+  let Inst{9-6}   = opcode;
+}
+
+// A6.2.4 Load/store single data item encoding.
+class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
+  let Inst{15-12} = opA;
+  let Inst{11-9}  = opB;
+}
+class T1LdStSP<bits<3> opB>   : T1LoadStore<0b1001, opB>; // SP relative
+
+class T1BranchCond<bits<4> opcode> : Encoding16 {
+  let Inst{15-12} = opcode;
+}
+
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+//   0b0110 => Immediate, 4 bytes
+//   0b1000 => Immediate, 2 bytes
+//   0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+                     InstrItinClass itin, string opc, string asm,
+                     list<dag> pattern>
+  : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+    T1LoadStore<0b0101, opcode> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{8-6} = addr{5-3};    // Rm
+  let Inst{5-3} = addr{2-0};    // Rn
+  let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
+    T1LoadStore<opA, {opB,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-6} = addr{7-3};   // imm5
+  let Inst{5-3}  = addr{2-0};   // Rn
+  let Inst{2-0}  = Rt;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions encoding.
+class T1Misc<bits<7> opcode> : Encoding16 {
+  let Inst{15-12} = 0b1011;
+  let Inst{11-5} = opcode;
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
+              InstrItinClass itin,
+              string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+  let Inst{20} = s;
+
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
+               InstrItinClass itin,
+               string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
+              InstrItinClass itin,
+              string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T2I<dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "",
+            pattern> {
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<13> addr;
+  let Inst{31-25} = 0b1110100;
+  let Inst{24}    = P;
+  let Inst{23}    = addr{8};
+  let Inst{22}    = 1;
+  let Inst{21}    = W;
+  let Inst{20}    = isLoad;
+  let Inst{19-16} = addr{12-9};
+  let Inst{15-12} = Rt{3-0};
+  let Inst{11-8}  = Rt2{3-0};
+  let Inst{7-0}   = addr{7-0};
+}
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
+
+// Move to/from coprocessor instructions
+class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern>
+  : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> {
+  let Inst{31-28} = opc;
+}
+
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
+
+// T2Iidxldst - Thumb2 indexed load / store instructions.
+class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
+                 dag oops, dag iops,
+                 AddrMode am, IndexMode im, InstrItinClass itin,
+                 string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24}    = signed;
+  let Inst{23}    = 0;
+  let Inst{22-21} = opcod;
+  let Inst{20}    = load;
+  let Inst{11}    = 1;
+  // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+  let Inst{10}    = pre; // The P bit.
+  let Inst{8}     = 1; // The W bit.
+
+  bits<9> addr;
+  let Inst{7-0} = addr{7-0};
+  let Inst{9}   = addr{8}; // Sign bit
+
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{15-12} = Rt{3-0};
+  let Inst{19-16} = Rn{3-0};
+}
+
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+// T2v6Pat - Same as Pat<>, but requires V6T2 Thumb2 mode.
+class T2v6Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb2, HasV6T2];
+}
+
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, int sz,
+           IndexMode im, Format f, InstrItinClass itin,
+           string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, int sz,
+            IndexMode im, Format f, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
+         opc, asm, "", pattern> {
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+         VFPLdStFrm, itin, opc, asm, "", pattern> {
+  // Instruction operands.
+  bits<5>  Dd;
+  bits<13> addr;
+
+  // Encode instruction operands.
+  let Inst{23}    = addr{8};      // U (add = (U == '1'))
+  let Inst{22}    = Dd{4};
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Dd{3-0};
+  let Inst{7-0}   = addr{7-0};    // imm8
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-24} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+
+  // Loads & stores operate on both NEON and VFP pipelines.
+  let D = VFPNeonDomain;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+         VFPLdStFrm, itin, opc, asm, "", pattern> {
+  // Instruction operands.
+  bits<5>  Sd;
+  bits<13> addr;
+
+  // Encode instruction operands.
+  let Inst{23}    = addr{8};      // U (add = (U == '1'))
+  let Inst{22}    = Sd{0};
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Sd{4-1};
+  let Inst{7-0}   = addr{7-0};    // imm8
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-24} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+
+  // Loads & stores operate on both NEON and VFP pipelines.
+  let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+                     list<dag> pattern>
+  : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain,
+            cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+// Load / store multiple
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode4, 4, im,
+          VFPLdStMulFrm, itin, asm, cstr, pattern> {
+  // Instruction operands.
+  bits<4>  Rn;
+  bits<13> regs;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Rn;
+  let Inst{22}    = regs{12};
+  let Inst{15-12} = regs{11-8};
+  let Inst{7-0}   = regs{7-0};
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-25} = 0b110;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+}
+
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode4, 4, im,
+          VFPLdStMulFrm, itin, asm, cstr, pattern> {
+  // Instruction operands.
+  bits<4> Rn;
+  bits<13> regs;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Rn;
+  let Inst{22}    = regs{8};
+  let Inst{15-12} = regs{12-9};
+  let Inst{7-0}   = regs{7-0};
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-25} = 0b110;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+}
+
+// Double precision, unary
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+           string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+  let Inst{7-6}   = opcod4;
+  let Inst{4}     = opcod5;
+}
+
+// Double precision, binary
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+           dag iops, InstrItinClass itin, string opc, string asm,
+           list<dag> pattern>
+  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dn;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{19-16} = Dn{3-0};
+  let Inst{7}     = Dn{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+}
+
+// Single precision, unary
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+           string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+  let Inst{7-6}   = opcod4;
+  let Inst{4}     = opcod5;
+}
+
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+            bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+            string asm, list<dag> pattern>
+  : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+         pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+// Single precision, binary
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+           InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sn;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+}
+
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+            dag iops, InstrItinClass itin, string opc, string asm,
+            list<dag> pattern>
+  : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sn;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+               dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+               list<dag> pattern>
+  : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
+  let Inst{11-8}  = opcod4;
+  let Inst{6}     = 1;
+  let Inst{4}     = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+                dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+                list<dag> pattern>
+  : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+  // size (fixed-point number): sx == 0 ? 16 : 32
+  let Inst{7} = op5; // sx
+}
+
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+               InstrItinClass itin,
+               string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+  let Inst{27-20} = opcod1;
+  let Inst{11-8}  = opcod2;
+  let Inst{4}     = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM NEON Instruction templates.
+//
+
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+            InstrItinClass itin, string opc, string dt, string asm, string cstr,
+            list<dag> pattern>
+  : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+}
+
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+             InstrItinClass itin, string opc, string asm, string cstr,
+             list<dag> pattern>
+  : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", "\t", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+          cstr, pattern> {
+  let Inst{31-24} = 0b11110100;
+  let Inst{23}    = op23;
+  let Inst{21-20} = op21_20;
+  let Inst{11-8}  = op11_8;
+  let Inst{7-4}   = op7_4;
+
+  let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+
+  bits<5> Vd;
+  bits<6> Rn;
+  bits<4> Rm;
+
+  let Inst{22}    = Vd{4};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{19-16} = Rn{3-0};
+  let Inst{3-0}   = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+          dt, asm, cstr, pattern> {
+  bits<3> lane;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+  : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+            itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+                  list<dag> pattern>
+  : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
+            itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+}
+
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+          pattern> {
+  let Inst{31-25} = 0b1111001;
+  let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+              string opc, string asm, string cstr, list<dag> pattern>
+  : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+           cstr, pattern> {
+  let Inst{31-25} = 0b1111001;
+  let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+// NEON "one register and a modified immediate" format.
+class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
+               bit op5, bit op4,
+               dag oops, dag iops, InstrItinClass itin,
+               string opc, string dt, string asm, string cstr,
+               list<dag> pattern>
+  : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{23}    = op23;
+  let Inst{21-19} = op21_19;
+  let Inst{11-8}  = op11_8;
+  let Inst{7}     = op7;
+  let Inst{6}     = op6;
+  let Inst{5}     = op5;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<13> SIMM;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{24}    = SIMM{7};
+  let Inst{18-16} = SIMM{6-4};
+  let Inst{3-0}   = SIMM{3-0};
+}
+
+// NEON 2 vector register format.
+class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+          bits<5> op11_7, bit op6, bit op4,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{24-23} = op24_23;
+  let Inst{21-20} = op21_20;
+  let Inst{19-18} = op19_18;
+  let Inst{17-16} = op17_16;
+  let Inst{11-7}  = op11_7;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+           bits<5> op11_7, bit op6, bit op4,
+           dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, string cstr, list<dag> pattern>
+  : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
+  let Inst{24-23} = op24_23;
+  let Inst{21-20} = op21_20;
+  let Inst{19-18} = op19_18;
+  let Inst{17-16} = op17_16;
+  let Inst{11-7}  = op11_7;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+// NEON 2 vector register with immediate.
+class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+             dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{24}   = op24;
+  let Inst{23}   = op23;
+  let Inst{11-8} = op11_8;
+  let Inst{7}    = op7;
+  let Inst{6}    = op6;
+  let Inst{4}    = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+  bits<6> SIMM;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+  let Inst{21-16} = SIMM{5-0};
+}
+
+// NEON 3 vector register format.
+
+class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+                bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+                string opc, string dt, string asm, string cstr,
+                list<dag> pattern>
+  : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{24}    = op24;
+  let Inst{23}    = op23;
+  let Inst{21-20} = op21_20;
+  let Inst{11-8}  = op11_8;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+}
+
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+          dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+              oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+                bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+                string opc, string dt, string asm, string cstr,
+                list<dag> pattern>
+  : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+              oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+  bit lane;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = lane;
+}
+
+class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+                bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
+                string opc, string dt, string asm, string cstr,
+                list<dag> pattern>
+  : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+              oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+  bits<2> lane;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{2-0}   = Vm{2-0};
+  let Inst{5}     = lane{1};
+  let Inst{3}     = lane{0};
+}
+
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+           bit op4,
+           dag oops, dag iops, Format f, InstrItinClass itin,
+           string opc, string asm, string cstr, list<dag> pattern>
+  : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
+  let Inst{24}    = op24;
+  let Inst{23}    = op23;
+  let Inst{21-20} = op21_20;
+  let Inst{11-8}  = op11_8;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+// NEON VMOVs between scalar and core registers.
+class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+               dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string dt, string asm, list<dag> pattern>
+  : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain,
+            "", itin> {
+  let Inst{27-20} = opcod1;
+  let Inst{11-8}  = opcod2;
+  let Inst{6-5}   = opcod3;
+  let Inst{4}     = 1;
+  // A8.6.303, A8.6.328, A8.6.329
+  let Inst{3-0}   = 0b0000;
+
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+
+  let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+
+  bits<5> V;
+  bits<4> R;
+  bits<4> p;
+  bits<4> lane;
+
+  let Inst{31-28} = p{3-0};
+  let Inst{7}     = V{4};
+  let Inst{19-16} = V{3-0};
+  let Inst{15-12} = R{3-0};
+}
+class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string dt, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
+             opc, dt, asm, pattern>;
+class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string dt, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
+             opc, dt, asm, pattern>;
+class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string dt, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
+             opc, dt, asm, pattern>;
+
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+                InstrItinClass itin, string opc, string dt, string asm,
+                list<dag> pattern>
+  : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+  let Inst{24-23} = 0b11;
+  let Inst{21-20} = 0b11;
+  let Inst{19-16} = op19_16;
+  let Inst{11-7}  = 0b11000;
+  let Inst{6}     = op6;
+  let Inst{4}     = 0;
+
+  bits<5> Vd;
+  bits<5> Vm;
+  bits<4> lane;
+
+  let Inst{22}     = Vd{4};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{5}     = Vm{4};
+  let Inst{3-0} = Vm{3-0};
+}
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 000000000000..adcbf1806fe3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,60 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
+  switch (Opc) {
+  default: break;
+  case ARM::LDR_PRE:
+  case ARM::LDR_POST:
+    return ARM::LDRi12;
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+    return ARM::LDRH;
+  case ARM::LDRB_PRE:
+  case ARM::LDRB_POST:
+    return ARM::LDRBi12;
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+    return ARM::LDRSH;
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST:
+    return ARM::LDRSB;
+  case ARM::STR_PRE:
+  case ARM::STR_POST:
+    return ARM::STRi12;
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+    return ARM::STRH;
+  case ARM::STRB_PRE:
+  case ARM::STRB_POST:
+    return ARM::STRBi12;
+  }
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 000000000000..f2c7bdc31be9
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,44 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+  ARMRegisterInfo RI;
+public:
+  explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 000000000000..a42dd1a54ec7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,4076 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall    : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMCMov    : SDTypeProfile<1, 3,
+                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                    SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond  : SDTypeProfile<0, 2,
+                                   [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT    : SDTypeProfile<0, 3,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+                                   SDTCisVT<2, i32>]>;
+
+def SDT_ARMBr2JT   : SDTypeProfile<0, 4,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+                                   SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+                                  [SDTCisVT<0, i32>,
+                                   SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+                                   SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+                                   SDTCisVT<5, OtherVT>]>;
+
+def SDT_ARMAnd     : SDTypeProfile<1, 2,
+                                   [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                    SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                          SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+                                                 SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+                                           SDTCisInt<1>]>;
+
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                      SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+// Node definitions.
+def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
+def ARMWrapperDYN    : SDNode<"ARMISD::WrapperDYN",  SDTIntUnaryOp>;
+def ARMWrapperPIC    : SDNode<"ARMISD::WrapperPIC",  SDTIntUnaryOp>;
+def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+                              [SDNPHasChain, SDNPOutGlue]>;
+def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+def ARMcall_pred    : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+
+def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
+                              [SDNPHasChain, SDNPOptInGlue]>;
+
+def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+                              [SDNPInGlue]>;
+
+def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+                              [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+                              [SDNPHasChain]>;
+def ARMbr2jt         : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+                              [SDNPHasChain]>;
+
+def ARMBcci64        : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+                              [SDNPHasChain]>;
+
+def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+                              [SDNPOutGlue]>;
+
+def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+                              [SDNPOutGlue, SDNPCommutative]>;
+
+def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag      : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInGlue ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+                               SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+                               SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+                               SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
+
+
+def ARMMemBarrier     : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+                               [SDNPHasChain]>;
+def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
+                               [SDNPHasChain]>;
+def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
+                               [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+
+def ARMtcret         : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+
+def ARMbfi           : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
+                                 AssemblerPredicate<"HasV4TOps">;
+def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T           : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
+                                 AssemblerPredicate<"HasV5TEOps">;
+def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
+                                 AssemblerPredicate<"HasV6Ops">;
+def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
+                                 AssemblerPredicate<"HasV6T2Ops">;
+def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
+                                 AssemblerPredicate<"HasV7Ops">;
+def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
+                                 AssemblerPredicate<"FeatureVFP2">;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3()">,
+                                 AssemblerPredicate<"FeatureVFP3">;
+def HasNEON          : Predicate<"Subtarget->hasNEON()">,
+                                 AssemblerPredicate<"FeatureNEON">;
+def HasFP16          : Predicate<"Subtarget->hasFP16()">,
+                                 AssemblerPredicate<"FeatureFP16">;
+def HasDivide        : Predicate<"Subtarget->hasDivide()">,
+                                 AssemblerPredicate<"FeatureHWDiv">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+                                 AssemblerPredicate<"FeatureT2XtPk">;
+def HasThumb2DSP     : Predicate<"Subtarget->hasThumb2DSP()">,
+                                 AssemblerPredicate<"FeatureDSPThumb2">;
+def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
+                                 AssemblerPredicate<"FeatureDB">;
+def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
+                                 AssemblerPredicate<"FeatureMP">;
+def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb          : Predicate<"Subtarget->isThumb()">,
+                                 AssemblerPredicate<"ModeThumb">;
+def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2         : Predicate<"Subtarget->isThumb2()">,
+                                 AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+def IsARM            : Predicate<"!Subtarget->isThumb()">,
+                                 AssemblerPredicate<"!ModeThumb">;
+def IsDarwin         : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin      : Predicate<"!Subtarget->isTargetDarwin()">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt          : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
+def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+  string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+//  ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : ImmLeaf<i32, [{
+  return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : ImmLeaf<i32, [{
+  return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
+}]>;
+
+def so_imm_neg :
+  PatLeaf<(imm), [{
+    return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
+  }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+  PatLeaf<(imm), [{
+    return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+  }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+  return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def hi16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+  // Returns true if all low 16-bits are 0.
+  return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+}], hi16>;
+
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 65536;
+}]> {
+  let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+/// adde and sube predicates - True based on whether the carry flag output
+/// will be needed or not.
+def adde_dead_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+  [{return !N->hasAnyUseOfValue(1);}]>;
+def sube_dead_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+  [{return !N->hasAnyUseOfValue(1);}]>;
+def adde_live_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+  [{return N->hasAnyUseOfValue(1);}]>;
+def sube_live_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+  [{return N->hasAnyUseOfValue(1);}]>;
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+  return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+  return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+  return hasNoVMLxHazardUse(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+  let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+  let EncoderMethod = "getARMBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+  // Encoded the same as branch targets.
+  let EncoderMethod = "getBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+  // Encoded the same as branch targets.
+  let EncoderMethod = "getARMBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
+}
+
+
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass {
+  let Name = "RegList";
+  let SuperClasses = [];
+}
+
+def DPRRegListAsmOperand : AsmOperandClass {
+  let Name = "DPRRegList";
+  let SuperClasses = [];
+}
+
+def SPRRegListAsmOperand : AsmOperandClass {
+  let Name = "SPRRegList";
+  let SuperClasses = [];
+}
+
+def reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = RegListAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
+def dpr_reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = DPRRegListAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
+def spr_reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = SPRRegListAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+  let PrintMethod = "printCPInstOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+  let PrintMethod = "printPCLabel";
+}
+
+// ADR instruction labels.
+def adrlabel : Operand<i32> {
+  let EncoderMethod = "getAdrLabelOpValue";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+  let EncoderMethod = "getNEONVcvtImm32OpValue";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm : Operand<i32>, ImmLeaf<i32, [{
+    int32_t v = (int32_t)Imm;
+    return v == 8 || v == 16 || v == 24; }]> {
+  let EncoderMethod = "getRotImmOpValue";
+}
+
+def ShifterAsmOperand : AsmOperandClass {
+  let Name = "Shifter";
+  let SuperClasses = [];
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+  let PrintMethod = "printShiftImmOperand";
+  let ParserMatchClass = ShifterAsmOperand;
+}
+
+def ShiftedRegAsmOperand : AsmOperandClass {
+  let Name = "ShiftedReg";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>,    // reg reg imm
+             ComplexPattern<i32, 3, "SelectShifterOperandReg",
+                            [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getSORegOpValue";
+  let PrintMethod = "printSORegOperand";
+  let ParserMatchClass = ShiftedRegAsmOperand;
+  let MIOperandInfo = (ops GPR, GPR, shift_imm);
+}
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg : Operand<i32>,    // reg reg imm
+                   ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+                                  [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getSORegOpValue";
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, shift_imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.
+def so_imm : Operand<i32>, ImmLeaf<i32, [{
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  }]> {
+  let EncoderMethod = "getSOImmOpValue";
+}
+
+// Break so_imm's up into two pieces.  This handles immediates with up to 16
+// bits set in them.  This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+      return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+  if (Subtarget->hasV6T2Ops())
+    return true;
+  return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// imm0_7 predicate - Immediate in the range [0,31].
+def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 8;
+}]> {
+  let ParserMatchClass = Imm0_7AsmOperand;
+}
+
+/// imm0_15 predicate - Immediate in the range [0,31].
+def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 16;
+}]> {
+  let ParserMatchClass = Imm0_15AsmOperand;
+}
+
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 32;
+}]>;
+
+/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
+def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 32;
+}]> {
+  let EncoderMethod = "getImmMinusOneOpValue";
+}
+
+// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
+// The imm is split into imm{15-12}, imm{11-0}
+//
+def i32imm_hilo16 : Operand<i32> {
+  let EncoderMethod = "getHiLo16ImmOpValue";
+}
+
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+                      PatLeaf<(imm), [{
+  return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+  let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+  let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
+
+/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
+def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{
+  return isInt<5>(Imm);
+}]>;
+
+/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
+def width_imm : Operand<i32>, ImmLeaf<i32, [{
+  return Imm > 0 &&  Imm <= 32;
+}] > {
+  let EncoderMethod = "getMsbOpValue";
+}
+
+def ssat_imm : Operand<i32>, ImmLeaf<i32, [{
+  return Imm > 0 && Imm <= 32;
+}]> {
+  let EncoderMethod = "getSsatBitPosValue";
+}
+
+// Define ARM specific addressing modes.
+
+def MemMode2AsmOperand : AsmOperandClass {
+  let Name = "MemMode2";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMemMode2Operand";
+}
+
+def MemMode3AsmOperand : AsmOperandClass {
+  let Name = "MemMode3";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMemMode3Operand";
+}
+
+// addrmode_imm12 := reg +/- imm12
+//
+def addrmode_imm12 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+  // 12-bit immediate operand. Note that instructions using this encode
+  // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+  // immediate values are as normal.
+
+  let EncoderMethod = "getAddrModeImm12OpValue";
+  let PrintMethod = "printAddrModeImm12Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def ldst_so_reg : Operand<i32>,
+                  ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+  let EncoderMethod = "getLdStSORegOpValue";
+  // FIXME: Simplify the printer
+  let PrintMethod = "printAddrMode2Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+// addrmode2 := reg +/- imm12
+//           := reg +/- reg shop imm
+//
+def addrmode2 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+  let EncoderMethod = "getAddrMode2OpValue";
+  let PrintMethod = "printAddrMode2Operand";
+  let ParserMatchClass = MemMode2AsmOperand;
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+                [], [SDNPWantRoot]> {
+  let EncoderMethod = "getAddrMode2OffsetOpValue";
+  let PrintMethod = "printAddrMode2OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+  let EncoderMethod = "getAddrMode3OpValue";
+  let PrintMethod = "printAddrMode3Operand";
+  let ParserMatchClass = MemMode3AsmOperand;
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+                               [], [SDNPWantRoot]> {
+  let EncoderMethod = "getAddrMode3OffsetOpValue";
+  let PrintMethod = "printAddrMode3OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ldstm_mode := {ia, ib, da, db}
+//
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+  let EncoderMethod = "getLdStmModeOpValue";
+  let PrintMethod = "printLdStmModeOperand";
+}
+
+def MemMode5AsmOperand : AsmOperandClass {
+  let Name = "MemMode5";
+  let SuperClasses = [];
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+  let PrintMethod = "printAddrMode5Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm);
+  let ParserMatchClass = MemMode5AsmOperand;
+  let EncoderMethod = "getAddrMode5OpValue";
+}
+
+// addrmode6 := reg with optional alignment
+//
+def addrmode6 : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+  let EncoderMethod = "getAddrMode6AddressOpValue";
+}
+
+def am6offset : Operand<i32>,
+                ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+                               [], [SDNPWantRoot]> {
+  let PrintMethod = "printAddrMode6OffsetOperand";
+  let MIOperandInfo = (ops GPR);
+  let EncoderMethod = "getAddrMode6OffsetOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
+// (single element from one lane) for size 32.
+def addrmode6oneL32 : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+  let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+  let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+                 ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+  let PrintMethod = "printAddrModePCOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def MemMode7AsmOperand : AsmOperandClass {
+  let Name = "MemMode7";
+  let SuperClasses = [];
+}
+
+// addrmode7 := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def addrmode7 : Operand<i32> {
+  let PrintMethod = "printAddrMode7Operand";
+  let MIOperandInfo = (ops GPR);
+  let ParserMatchClass = MemMode7AsmOperand;
+}
+
+def nohash_imm : Operand<i32> {
+  let PrintMethod = "printNoHashImmediate";
+}
+
+def CoprocNumAsmOperand : AsmOperandClass {
+  let Name = "CoprocNum";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseCoprocNumOperand";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+  let Name = "CoprocReg";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseCoprocRegOperand";
+}
+
+def p_imm : Operand<i32> {
+  let PrintMethod = "printPImmediate";
+  let ParserMatchClass = CoprocNumAsmOperand;
+}
+
+def c_imm : Operand<i32> {
+  let PrintMethod = "printCImmediate";
+  let ParserMatchClass = CoprocRegAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                        PatFrag opnode, string baseOpc, bit Commutable = 0> {
+  // The register-immediate version is re-materializable. This is useful
+  // in particular for taking the address of a local.
+  let isReMaterializable = 1 in {
+  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+               iii, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = imm;
+  }
+  }
+  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+               iir, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
+    let Inst{25} = 0;
+    let isCommutable = Commutable;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+               iis, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = shift;
+  }
+
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+                                                    GPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+}
+
+/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> {
+  def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+               iii, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = imm;
+  }
+  def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+               iir, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
+    let isCommutable = Commutable;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+               iis, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = shift;
+  }
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode, bit Commutable = 0> {
+  def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+               opc, "\t$Rn, $imm",
+               [(opnode GPR:$Rn, so_imm:$imm)]> {
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-0} = imm;
+  }
+  def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+               opc, "\t$Rn, $Rm",
+               [(opnode GPR:$Rn, GPR:$Rm)]> {
+    bits<4> Rn;
+    bits<4> Rm;
+    let isCommutable = Commutable;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+               opc, "\t$Rn, $shift",
+               [(opnode GPR:$Rn, so_reg:$shift)]> {
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-0} = shift;
+  }
+}
+}
+
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def r     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm",
+                 [(set GPR:$Rd, (opnode GPR:$Rm))]>,
+              Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    let Inst{19-16} = 0b1111;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = 0b00;
+    let Inst{3-0}   = Rm;
+  }
+  def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+                 [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
+              Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<2> rot;
+    let Inst{19-16} = 0b1111;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = rot;
+    let Inst{3-0}   = Rm;
+  }
+}
+
+multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
+  def r     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm",
+                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV6]> {
+    let Inst{19-16} = 0b1111;
+    let Inst{11-10} = 0b00;
+  }
+  def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV6]> {
+    bits<2> rot;
+    let Inst{19-16} = 0b1111;
+    let Inst{11-10} = rot;
+  }
+}
+
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def rr     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+                  [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+               Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<4> Rn;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = 0b00;
+    let Inst{9-4}   = 0b000111;
+    let Inst{3-0}   = Rm;
+  }
+  def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                             rot_imm:$rot),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [(set GPR:$Rd, (opnode GPR:$Rn,
+                                          (rotr GPR:$Rm, rot_imm:$rot)))]>,
+                  Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<4> Rn;
+    bits<2> rot;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = rot;
+    let Inst{9-4}   = 0b000111;
+    let Inst{3-0}   = Rm;
+  }
+}
+
+// For disassembly only.
+multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
+  def rr     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+                  [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM, HasV6]> {
+    let Inst{11-10} = 0b00;
+  }
+  def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                             rot_imm:$rot),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsARM, HasV6]> {
+    bits<4> Rn;
+    bits<2> rot;
+    let Inst{19-16} = Rn;
+    let Inst{11-10} = rot;
+  }
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             string baseOpc, bit Commutable = 0> {
+  let Uses = [CPSR] in {
+  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = imm;
+  }
+  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
+    let Inst{11-4} = 0b00000000;
+    let Inst{25} = 0;
+    let isCommutable = Commutable;
+    let Inst{3-0} = Rm;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+  }
+  def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{11-0} = shift;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+  }
+  }
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+                                                    GPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+}
+
+// Carry setting variants
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
+  def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+               4, IIC_iALUi,
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
+  def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               4, IIC_iALUr,
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+    let isCommutable = Commutable;
+  }
+  def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+               4, IIC_iALUsr,
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
+}
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+           InstrItinClass iir, PatFrag opnode> {
+  // Note: We use the complex addrmode_imm12 rather than just an input
+  // GPR and a constrained immediate so that we can use this to match
+  // frame index references and avoid matching constant pool references.
+  def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+                   AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+                  [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+    bits<4>  Rt;
+    bits<17> addr;
+    let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+  def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+                  AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+                 [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+    bits<4>  Rt;
+    bits<17> shift;
+    let shift{4}    = 0;            // Inst{4} = 0
+    let Inst{23}    = shift{12};    // U (add = ('U' == 1))
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
+}
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+           InstrItinClass iir, PatFrag opnode> {
+  // Note: We use the complex addrmode_imm12 rather than just an input
+  // GPR and a constrained immediate so that we can use this to match
+  // frame index references and avoid matching constant pool references.
+  def i12 : AI2ldst<0b010, 0, isByte, (outs),
+                   (ins GPR:$Rt, addrmode_imm12:$addr),
+                   AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+                  [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+    bits<4> Rt;
+    bits<17> addr;
+    let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+  def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+                  AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+                 [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+    bits<4> Rt;
+    bits<17> shift;
+    let shift{4}    = 0;            // Inst{4} = 0
+    let Inst{23}    = shift{12};    // U (add = ('U' == 1))
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function.  The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+                    i32imm:$size), NoItinerary, []>;
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
+           [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
+           [(ARMcallseq_start timm:$amt)]>;
+}
+
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000011;
+}
+
+def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
+             "\t$dst, $a, $b",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{27-20} = 0b01101000;
+  let Inst{7-4} = 0b1011;
+  let Inst{11-8} = 0b1111;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+              "bkpt", "\t$val", []>, Requires<[IsARM]> {
+  bits<16> val;
+  let Inst{3-0} = val{3-0};
+  let Inst{19-8} = val{15-4};
+  let Inst{27-20} = 0b00010010;
+  let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class CPS<dag iops, string asm_ops>
+  : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+        [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+  bits<2> imod;
+  bits<3> iflags;
+  bits<5> mode;
+  bit M;
+
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{19-18} = imod;
+  let Inst{17}    = M; // Enabled if mode is set;
+  let Inst{16}    = 0;
+  let Inst{8-6}   = iflags;
+  let Inst{5}     = 0;
+  let Inst{4-0}   = mode;
+}
+
+let M = 1 in
+  def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+                  "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+  def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+  def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+
+// Preload signals the memory system of possible future data/instruction access.
+// These are for disassembly only.
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
+
+  def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+                !strconcat(opc, "\t$addr"),
+                [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+    bits<4> Rt;
+    bits<17> addr;
+    let Inst{31-26} = 0b111101;
+    let Inst{25} = 0; // 0 for immediate form
+    let Inst{24} = data;
+    let Inst{23} = addr{12};        // U (add = ('U' == 1))
+    let Inst{22} = read;
+    let Inst{21-20} = 0b01;
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = 0b1111;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+
+  def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+               !strconcat(opc, "\t$shift"),
+               [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+    bits<17> shift;
+    let Inst{31-26} = 0b111101;
+    let Inst{25} = 1; // 1 for register form
+    let Inst{24} = data;
+    let Inst{23} = shift{12};    // U (add = ('U' == 1))
+    let Inst{22} = read;
+    let Inst{21-20} = 0b01;
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = 0b1111;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
+
+defm PLD  : APreLoad<1, 1, "pld">,  Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI  : APreLoad<1, 0, "pli">,  Requires<[IsARM,HasV7]>;
+
+def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
+                 "setend\t$end",
+                 [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM]> {
+  bits<1> end;
+  let Inst{31-10} = 0b1111000100000001000000;
+  let Inst{9} = end;
+  let Inst{8-0} = 0;
+}
+
+def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+             []>, Requires<[IsARM, HasV7]> {
+  bits<4> opt;
+  let Inst{27-4} = 0b001100100000111100001111;
+  let Inst{3-0} = opt;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+               "trap", [(trap)]>,
+           Requires<[IsARM]> {
+  let Inst = 0xe7ffdefe;
+}
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD  : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+                            4, IIC_iALUr,
+                            [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+def PICLDR  : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                            4, IIC_iLoad_r,
+                            [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            4, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            4, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            4, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            4, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR  : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+                                                   addrmodepc:$addr)]>;
+
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
+                 MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+  bits<4> Rd;
+  bits<12> label;
+  let Inst{27-25} = 0b001;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = label;
+}
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+                    4, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+                      (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                      4, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  // ARMV4T and above
+  def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+                  "bx", "\tlr", [(ARMretflag)]>,
+               Requires<[IsARM, HasV4T]> {
+    let Inst{27-0}  = 0b0001001011111111111100011110;
+  }
+
+  // ARMV4 only
+  def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+                  "mov", "\tpc, lr", [(ARMretflag)]>,
+               Requires<[IsARM, NoV4T]> {
+    let Inst{27-0} = 0b0001101000001111000000001110;
+  }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  // ARMV4T and above
+  def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+                  [(brind GPR:$dst)]>,
+              Requires<[IsARM, HasV4T]> {
+    bits<4> dst;
+    let Inst{31-4} = 0b1110000100101111111111110001;
+    let Inst{3-0}  = dst;
+  }
+
+  def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
+                  "bx", "\t$dst", [/* pattern left blank */]>,
+              Requires<[IsARM, HasV4T]> {
+    bits<4> dst;
+    let Inst{27-4} = 0b000100101111111111110001;
+    let Inst{3-0}  = dst;
+  }
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let isCall = 1,
+  // On non-Darwin platforms R9 is callee-saved.
+  // FIXME:  Do we really need a non-predicated version? If so, it should
+  // at least be a pseudo instruction expanding to the predicated version
+  // at MC lowering time.
+  Defs = [R0,  R1,  R2,  R3,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+  Uses = [SP] in {
+  def BL  : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+                IIC_Br, "bl\t$func",
+                [(ARMcall tglobaladdr:$func)]>,
+            Requires<[IsARM, IsNotDarwin]> {
+    let Inst{31-28} = 0b1110;
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
+
+  def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+                   IIC_Br, "bl", "\t$func",
+                   [(ARMcall_pred tglobaladdr:$func)]>,
+                Requires<[IsARM, IsNotDarwin]> {
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
+
+  // ARMv5T and above
+  def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                IIC_Br, "blx\t$func",
+                [(ARMcall GPR:$func)]>,
+            Requires<[IsARM, HasV5T, IsNotDarwin]> {
+    bits<4> func;
+    let Inst{31-4} = 0b1110000100101111111111110011;
+    let Inst{3-0}  = func;
+  }
+
+  def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                    IIC_Br, "blx", "\t$func",
+                    [(ARMcall_pred GPR:$func)]>,
+                 Requires<[IsARM, HasV5T, IsNotDarwin]> {
+    bits<4> func;
+    let Inst{27-4} = 0b000100101111111111110011;
+    let Inst{3-0}  = func;
+  }
+
+  // ARMv4T
+  // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+  def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                   8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   Requires<[IsARM, HasV4T, IsNotDarwin]>;
+
+  // ARMv4
+  def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                   8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   Requires<[IsARM, NoV4T, IsNotDarwin]>;
+}
+
+let isCall = 1,
+  // On Darwin R9 is call-clobbered.
+  // R7 is marked as a use to prevent frame-pointer assignments from being
+  // moved above / below calls.
+  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+  Uses = [R7, SP] in {
+  def BLr9  : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
+                4, IIC_Br,
+                [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
+              Requires<[IsARM, IsDarwin]>;
+
+  def BLr9_pred : ARMPseudoExpand<(outs),
+                   (ins bl_target:$func, pred:$p, variable_ops),
+                   4, IIC_Br,
+                   [(ARMcall_pred tglobaladdr:$func)],
+                   (BL_pred bl_target:$func, pred:$p)>,
+                  Requires<[IsARM, IsDarwin]>;
+
+  // ARMv5T and above
+  def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
+                4, IIC_Br,
+                [(ARMcall GPR:$func)],
+                (BLX GPR:$func)>,
+               Requires<[IsARM, HasV5T, IsDarwin]>;
+
+  def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
+                4, IIC_Br,
+                [(ARMcall_pred GPR:$func)],
+                (BLX_pred GPR:$func, pred:$p)>,
+                   Requires<[IsARM, HasV5T, IsDarwin]>;
+
+  // ARMv4T
+  // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+  def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  Requires<[IsARM, HasV4T, IsDarwin]>;
+
+  // ARMv4
+  def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  Requires<[IsARM, NoV4T, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+  // a two-value operand where a dag node expects two operands. :(
+  def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+               IIC_Br, "b", "\t$target",
+               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+    bits<24> target;
+    let Inst{23-0} = target;
+  }
+
+  let isBarrier = 1 in {
+    // B is "predicable" since it's just a Bcc with an 'always' condition.
+    let isPredicable = 1 in
+    // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
+    // should be sufficient.
+    // FIXME: Is B really a Barrier? That doesn't seem right.
+    def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
+                [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
+
+    let isNotDuplicable = 1, isIndirectBranch = 1 in {
+    def BR_JTr : ARMPseudoInst<(outs),
+                      (ins GPR:$target, i32imm:$jt, i32imm:$id),
+                      0, IIC_Br,
+                      [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+    // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+    // into i12 and rs suffixed versions.
+    def BR_JTm : ARMPseudoInst<(outs),
+                     (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+                     0, IIC_Br,
+                     [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+                       imm:$id)]>;
+    def BR_JTadd : ARMPseudoInst<(outs),
+                   (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+                   0, IIC_Br,
+                   [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+                     imm:$id)]>;
+    } // isNotDuplicable = 1, isIndirectBranch = 1
+  } // isBarrier = 1
+
+}
+
+// BLX (immediate) -- for disassembly only
+def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
+               "blx\t$target", [/* pattern left blank */]>,
+           Requires<[IsARM, HasV5T]> {
+  let Inst{31-25} = 0b1111101;
+  bits<25> target;
+  let Inst{23-0} = target{24-1};
+  let Inst{24} = target{0};
+}
+
+// Branch and Exchange Jazelle
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+              [/* pattern left blank */]> {
+  bits<4> func;
+  let Inst{23-20} = 0b0010;
+  let Inst{19-8} = 0xfff;
+  let Inst{7-4} = 0b0010;
+  let Inst{3-0} = func;
+}
+
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin versions.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
+                   Requires<[IsARM, IsDarwin]>;
+
+    def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (BX GPR:$dst)>,
+                   Requires<[IsARM, IsDarwin]>;
+
+  }
+
+  // Non-Darwin versions (the difference is R9).
+  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
+                   Requires<[IsARM, IsNotDarwin]>;
+
+    def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (BX GPR:$dst)>,
+                     Requires<[IsARM, IsNotDarwin]>;
+  }
+}
+
+
+
+
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> opt;
+  let Inst{23-4} = 0b01100000000000000111;
+  let Inst{3-0} = opt;
+}
+
+// Supervisor Call (Software Interrupt) -- for disassembly only
+let isCall = 1, Uses = [SP] in {
+def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<24> svc;
+  let Inst{23-0} = svc;
+}
+}
+
+// Store Return State is a system instruction -- for disassembly only
+let isCodeGenOnly = 1 in {  // FIXME: This should not use submode!
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+                NoItinerary, "srs${amode}\tsp!, $mode",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b110; // W = 1
+  let Inst{19-8} = 0xd05;
+  let Inst{7-5} = 0b000;
+}
+
+def SRS  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+                NoItinerary, "srs${amode}\tsp, $mode",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b100; // W = 0
+  let Inst{19-8} = 0xd05;
+  let Inst{7-5} = 0b000;
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+                NoItinerary, "rfe${amode}\t$base!",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b011; // W = 1
+  let Inst{15-0} = 0x0a00;
+}
+
+def RFE  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+                NoItinerary, "rfe${amode}\t$base",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b001; // W = 0
+  let Inst{15-0} = 0x0a00;
+}
+} // isCodeGenOnly = 1
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+// Load
+
+
+defm LDR  : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+                    UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+                    UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR  : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+                   BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+    isReMaterializable = 1 in
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+                 AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+                 []> {
+  bits<4> Rt;
+  bits<17> addr;
+  let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rt;
+  let Inst{11-0}  = addr{11-0};   // imm12
+}
+
+// Loads with zero extension
+def LDRH  : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                  IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+                  [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                   IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+                   [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                   IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+                   [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+                 (ins addrmode3:$addr), LdMiscFrm,
+                 IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr",
+                 []>, Requires<[IsARM, HasV5TE]>;
+}
+
+// Indexed loads
+multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+  def _PRE  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+                      (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+                      opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+    // {17-14}  Rn
+    // {13}     1 == Rm, 0 == imm12
+    // {12}     isAdd
+    // {11-0}   imm12/Rm
+    bits<18> addr;
+    let Inst{25} = addr{13};
+    let Inst{23} = addr{12};
+    let Inst{19-16} = addr{17-14};
+    let Inst{11-0} = addr{11-0};
+    let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+  }
+  def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+                      (ins GPR:$Rn, am2offset:$offset),
+                      IndexModePost, LdFrm, itin,
+                      opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+    // {13}     1 == Rm, 0 == imm12
+    // {12}     isAdd
+    // {11-0}   imm12/Rm
+    bits<14> offset;
+    bits<4> Rn;
+    let Inst{25} = offset{13};
+    let Inst{23} = offset{12};
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = offset{11-0};
+  }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDR  : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+}
+
+multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
+  def _PRE  : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+                        (ins addrmode3:$addr), IndexModePre,
+                        LdMiscFrm, itin,
+                        opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+    bits<14> addr;
+    let Inst{23}    = addr{8};      // U bit
+    let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+    let Inst{19-16} = addr{12-9};   // Rn
+    let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+    let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+  }
+  def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+                        (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+                        LdMiscFrm, itin,
+                        opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+    bits<10> offset;
+    bits<4> Rn;
+    let Inst{23}    = offset{8};      // U bit
+    let Inst{22}    = offset{9};      // 1 == imm8, 0 == Rm
+    let Inst{19-16} = Rn;
+    let Inst{11-8}  = offset{7-4};    // imm7_4/zero
+    let Inst{3-0}   = offset{3-0};    // imm3_0/Rm
+  }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH  : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1 in {
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+                          (ins addrmode3:$addr), IndexModePre,
+                          LdMiscFrm, IIC_iLoad_d_ru,
+                          "ldrd", "\t$Rt, $Rt2, $addr!",
+                          "$addr.base = $Rn_wb", []> {
+  bits<14> addr;
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+}
+def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+                          (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+                          LdMiscFrm, IIC_iLoad_d_ru,
+                          "ldrd", "\t$Rt, $Rt2, [$Rn], $offset",
+                          "$Rn = $Rn_wb", []> {
+  bits<10> offset;
+  bits<4> Rn;
+  let Inst{23}    = offset{8};      // U bit
+  let Inst{22}    = offset{9};      // 1 == imm8, 0 == Rm
+  let Inst{19-16} = Rn;
+  let Inst{11-8}  = offset{7-4};    // imm7_4/zero
+  let Inst{3-0}   = offset{3-0};    // imm3_0/Rm
+}
+} // hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb),
+                   (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru,
+                   "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+  // {17-14}  Rn
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<18> addr;
+  let Inst{25} = addr{13};
+  let Inst{23} = addr{12};
+  let Inst{21} = 1; // overwrite
+  let Inst{19-16} = addr{17-14};
+  let Inst{11-0} = addr{11-0};
+  let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+}
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+                  (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+                  "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+  // {17-14}  Rn
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<18> addr;
+  let Inst{25} = addr{13};
+  let Inst{23} = addr{12};
+  let Inst{21} = 1; // overwrite
+  let Inst{19-16} = addr{17-14};
+  let Inst{11-0} = addr{11-0};
+  let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+}
+def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+             (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+             "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+def LDRHT  : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+             (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+             "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+             (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+             "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+}
+
+// Store
+
+// Stores with truncate
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+               IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+               [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
+
+// Store doubleword
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
+               StMiscFrm, IIC_iStore_d_r,
+               "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>;
+
+// Indexed stores
+def STR_PRE  : AI2stridx<0, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePre, StFrm, IIC_iStore_ru,
+                     "str", "\t$Rt, [$Rn, $offset]!",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePost, StFrm, IIC_iStore_ru,
+                     "str", "\t$Rt, [$Rn], $offset",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePre, StFrm, IIC_iStore_bh_ru,
+                     "strb", "\t$Rt, [$Rn, $offset]!",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+                     [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
+                                        GPR:$Rn, am2offset:$offset))]>;
+def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePost, StFrm, IIC_iStore_bh_ru,
+                     "strb", "\t$Rt, [$Rn], $offset",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+                     [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
+                                        GPR:$Rn, am2offset:$offset))]>;
+
+def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+                     IndexModePre, StMiscFrm, IIC_iStore_ru,
+                     "strh", "\t$Rt, [$Rn, $offset]!",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+
+def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+                     IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+                     "strh", "\t$Rt, [$Rn], $offset",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+                     [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+                                        GPR:$Rn, am3offset:$offset))]>;
+
+// For disassembly only
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
+                     (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+                     StMiscFrm, IIC_iStore_d_ru,
+                     "strd", "\t$src1, $src2, [$base, $offset]!",
+                     "$base = $base_wb", []>;
+
+// For disassembly only
+def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
+                     (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+                     StMiscFrm, IIC_iStore_d_ru,
+                     "strd", "\t$src1, $src2, [$base], $offset",
+                     "$base = $base_wb", []>;
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// STRT, STRBT, and STRHT are for disassembly only.
+
+def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+                     IndexModePost, StFrm, IIC_iStore_ru,
+                     "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+                     [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+  let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+}
+
+def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+                      IndexModePost, StFrm, IIC_iStore_bh_ru,
+                      "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+                      [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+  let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+}
+
+def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
+                    StMiscFrm, IIC_iStore_bh_ru,
+                    "strht", "\t$Rt, $addr", "$addr.base = $base_wb",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+  let AsmMatchConverter = "CvtStWriteBackRegAddrMode3";
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+                         InstrItinClass itin, InstrItinClass itin_upd> {
+  // IA is the default, so no need for an explicit suffix on the
+  // mnemonic here. Without it is the cannonical spelling.
+  def IA :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def IA_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DA :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DA_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DB :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DB_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def IB :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def IB_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                                 reglist:$regs, variable_ops),
+                     4, IIC_iLoad_mBr, [],
+                     (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+      RegConstraint<"$Rn = $wb">;
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+                "mov", "\t$Rd, $Rm", []>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{19-16} = 0b0000;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+}
+
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+                IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+}
+
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
+                DPSoRegFrm, IIC_iMOVsr,
+                "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+                UnaryDP {
+  bits<4> Rd;
+  bits<12> src;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = 0b0000;
+  let Inst{11-0} = src;
+  let Inst{25} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+                "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = 0b0000;
+  let Inst{11-0} = imm;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
+                 DPFrm, IIC_iMOVi,
+                 "movw", "\t$Rd, $imm",
+                 [(set GPR:$Rd, imm0_65535:$imm)]>,
+                 Requires<[IsARM, HasV6T2]>, UnaryDP {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+  let Inst{19-16} = imm{15-12};
+  let Inst{20} = 0;
+  let Inst{25} = 1;
+}
+
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+                                (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
+                  DPFrm, IIC_iMOVi,
+                  "movt", "\t$Rd, $imm",
+                  [(set GPR:$Rd,
+                        (or (and GPR:$src, 0xffff),
+                            lo16AllZero:$imm))]>, UnaryDP,
+                  Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+  let Inst{19-16} = imm{15-12};
+  let Inst{20} = 0;
+  let Inst{25} = 1;
+}
+
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+                      (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
+def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
+      Requires<[IsARM, HasV6T2]>;
+
+let Uses = [CPSR] in
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+                    [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+                    Requires<[IsARM]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                      [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+                      Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                      [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+                      Requires<[IsARM]>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB  : AI_ext_rrot<0b01101010,
+                         "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH  : AI_ext_rrot<0b01101011,
+                         "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_exta_rrot<0b01101010,
+               "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_exta_rrot<0b01101011,
+               "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// For disassembly only
+defm SXTB16  : AI_ext_rrot_np<0b01101000, "sxtb16">;
+
+// For disassembly only
+defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB   : AI_ext_rrot<0b01101110,
+                          "uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH   : AI_ext_rrot<0b01101111,
+                          "uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_ext_rrot<0b01101100,
+                          "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+//        The transformation should probably be done as a combiner action
+//        instead so we can include a check for masking back in the upper
+//        eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+//               (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+               (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+// For disassembly only
+defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+
+
+def SBFX  : I<(outs GPR:$Rd),
+              (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
+  let Inst{27-21} = 0b0111101;
+  let Inst{6-4}   = 0b101;
+  let Inst{20-16} = width;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{3-0}   = Rn;
+}
+
+def UBFX  : I<(outs GPR:$Rd),
+              (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
+  let Inst{27-21} = 0b0111111;
+  let Inst{6-4}   = 0b101;
+  let Inst{20-16} = width;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{3-0}   = Rn;
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+defm ADD  : AsI1_bin_irs<0b0100, "add",
+                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>, "ADD", 1>;
+defm SUB  : AsI1_bin_irs<0b0010, "sub",
+                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                         BinOpFrag<(sub  node:$LHS, node:$RHS)>, "SUB">;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                          BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                          BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>,
+                          "ADC", 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>,
+                          "SBC">;
+
+// ADC and SUBC with 's' bit set.
+let usesCustomInserter = 1 in {
+defm ADCS : AI1_adde_sube_s_irs<
+              BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<
+              BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+}
+
+def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+                 IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
+}
+
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+                 IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
+                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+
+def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+
+// RSB with 's' bit set.
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                 4, IIC_iALUi,
+                 [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
+def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                 4, IIC_iALUr,
+                 [/* For disassembly only; pattern left blank */]>;
+def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 4, IIC_iALUsr,
+                 [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
+}
+
+let Uses = [CPSR] in {
+def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                 DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
+                 Requires<[IsARM]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
+}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                 DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
+                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
+                 Requires<[IsARM]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+}
+
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1, Uses = [CPSR] in {
+def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                  4, IIC_iALUi,
+                  [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
+def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                  4, IIC_iALUsr,
+                  [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
+}
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
+             (SUBri  GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
+             (SUBSri GPR:$src, so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(adde_dead_carry   GPR:$src, so_imm_not:$imm),
+             (SBCri  GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(adde_live_carry   GPR:$src, so_imm_not:$imm),
+             (SBCSri GPR:$src, so_imm_not:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+// ARM Arithmetic Instruction -- for disassembly only
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+          list<dag> pattern = [/* For disassembly only; pattern left blank */],
+          dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
+  : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{27-20} = op27_20;
+  let Inst{11-4} = op11_4;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def QADD    : AAI<0b00010000, 0b00000101, "qadd",
+                  [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
+                  (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB    : AAI<0b00010010, 0b00000101, "qsub",
+                  [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
+                  (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD   : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+                  "\t$Rd, $Rm, $Rn">;
+def QDSUB   : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+                  "\t$Rd, $Rm, $Rn">;
+
+def QADD16  : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8   : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX    : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX    : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16  : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8   : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8  : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX   : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX   : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8  : AAI<0b01100110, 0b11111111, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def SASX   : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8  : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX   : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8  : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX   : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8  : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX   : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8  : AAI<0b01100101, 0b11111111, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def SHASX   : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8  : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX   : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8  : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX   : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8  : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX   : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8  : AAI<0b01100111, 0b11111111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def USAD8  : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                MulFrm /* for convenience */, NoItinerary, "usad8",
+                "\t$Rd, $Rn, $Rm", []>,
+             Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{27-20} = 0b01111000;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b0001;
+  let Inst{19-16} = Rd;
+  let Inst{11-8} = Rm;
+  let Inst{3-0} = Rn;
+}
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                MulFrm /* for convenience */, NoItinerary, "usada8",
+                "\t$Rd, $Rn, $Rm, $Ra", []>,
+             Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<4> Ra;
+  let Inst{27-20} = 0b01111000;
+  let Inst{7-4} = 0b0001;
+  let Inst{19-16} = Rd;
+  let Inst{15-12} = Ra;
+  let Inst{11-8} = Rm;
+  let Inst{3-0} = Rn;
+}
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh),
+              SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<5> sat_imm;
+  bits<4> Rn;
+  bits<8> sh;
+  let Inst{27-21} = 0b0110101;
+  let Inst{5-4} = 0b01;
+  let Inst{20-16} = sat_imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-7} = sh{7-3};
+  let Inst{6} = sh{0};
+  let Inst{3-0} = Rn;
+}
+
+def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm,
+                NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
+                [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> sat_imm;
+  bits<4> Rn;
+  let Inst{27-20} = 0b01101010;
+  let Inst{11-4} = 0b11110011;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = sat_imm;
+  let Inst{3-0} = Rn;
+}
+
+def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+              SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<5> sat_imm;
+  bits<4> Rn;
+  bits<8> sh;
+  let Inst{27-21} = 0b0110111;
+  let Inst{5-4} = 0b01;
+  let Inst{15-12} = Rd;
+  let Inst{11-7} = sh{7-3};
+  let Inst{6} = sh{0};
+  let Inst{20-16} = sat_imm;
+  let Inst{3-0} = Rn;
+}
+
+def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
+                NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
+                [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> sat_imm;
+  bits<4> Rn;
+  let Inst{27-20} = 0b01101110;
+  let Inst{11-4} = 0b11110011;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = sat_imm;
+  let Inst{3-0} = Rn;
+}
+
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+//  Bitwise Instructions.
+//
+
+defm AND   : AsI1_bin_irs<0b0000, "and",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+defm ORR   : AsI1_bin_irs<0b1100, "orr",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>, "ORR", 1>;
+defm EOR   : AsI1_bin_irs<0b0001, "eor",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+defm BIC   : AsI1_bin_irs<0b1110, "bic",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+
+def BFC    : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfc", "\t$Rd, $imm", "$src = $Rd",
+               [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<10> imm;
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-0}   = 0b0011111;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = imm{4-0}; // lsb
+  let Inst{20-16} = imm{9-5}; // width
+}
+
+// A8.6.18  BFI - Bitfield insert (Encoding A1)
+def BFI    : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+               [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
+                                bf_inv_mask_imm:$imm))]>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<10> imm;
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-4}   = 0b001; // Rn: Inst{3-0} != 15
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = imm{4-0}; // lsb
+  let Inst{20-16} = imm{9-5}; // width
+  let Inst{3-0}   = Rn;
+}
+
+// GNU as only supports this form of bfi (w/ 4 arguments)
+let isAsmParserOnly = 1 in
+def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
+                                   lsb_pos_imm:$lsb, width_imm:$width),
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
+               []>, Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-4}   = 0b001; // Rn: Inst{3-0} != 15
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{20-16} = width; // Custom encoder => lsb+width-1
+  let Inst{3-0}   = Rn;
+}
+
+def  MVNr  : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+                  "mvn", "\t$Rd, $Rm",
+                  [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{25} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{11-4} = 0b00000000;
+  let Inst{15-12} = Rd;
+  let Inst{3-0} = Rm;
+}
+def  MVNs  : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
+                  IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+                  [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+  bits<4> Rd;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = shift;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def  MVNi  : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+                  IIC_iMVNi, "mvn", "\t$Rd, $imm",
+                  [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = imm;
+}
+
+def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
+             (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+//  Multiply Instructions.
+//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = Rd;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+
+// FIXME: The v5 pseudos are only necessary for the additional Constraint
+//        property. Remove them when it's possible to add those properties
+//        on an individual MachineInstr, not just an instuction description.
+let isCommutable = 1 in {
+def MUL  : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                   IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+                   [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+                   Requires<[IsARM, HasV6]> {
+  let Inst{15-12} = 0b0000;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                            pred:$p, cc_out:$s),
+                          4, IIC_iMUL32,
+                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
+                         (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                        Requires<[IsARM, NoV6]>;
+}
+
+def MLA  : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                    IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+                   [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+                   Requires<[IsARM, HasV6]> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
+                          (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+                          4, IIC_iMAC32,
+                        [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
+                  (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
+                        Requires<[IsARM, NoV6]>;
+
+def MLS  : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                   IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+                   [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+                   Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  bits<4> Ra;
+  let Inst{19-16} = Rd;
+  let Inst{15-12} = Ra;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+                                 (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                    "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+                                 (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                    "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            4, IIC_iMUL64, [],
+          (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+
+def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            4, IIC_iMUL64, [],
+          (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+}
+}
+
+// Multiply + accumulate
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = RdLo;
+  let Inst{15-12} = RdHi;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                              4, IIC_iMAC64, [],
+          (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                              4, IIC_iMAC64, [],
+          (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p),
+                              4, IIC_iMAC64, [],
+          (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>,
+                           Requires<[IsARM, NoV6]>;
+}
+
+} // neverHasSideEffects
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{15-12} = 0b1111;
+}
+
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+               [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+            Requires<[IsARM, HasV6]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]>;
+
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+               [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+            Requires<[IsARM, HasV6]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+  def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+                                      (sext_inreg GPR:$Rm, i16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+                                      (sra GPR:$Rm, (i32 16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+                                      (sext_inreg GPR:$Rm, i16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+                                      (sra GPR:$Rm, (i32 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+                                    (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+                                    (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+  def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra,
+                               (opnode (sext_inreg GPR:$Rn, i16),
+                                       (sext_inreg GPR:$Rm, i16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
+                                                   (sra GPR:$Rm, (i32 16)))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+                                                (sext_inreg GPR:$Rm, i16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+                                                   (sra GPR:$Rm, (i32 16)))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+                                      (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+                                        (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+// Helper class for AI_smld -- for disassembly only
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+                    InstrItinClass itin, string opc, string asm>
+  : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{4}     = 1;
+  let Inst{5}     = swap;
+  let Inst{6}     = sub;
+  let Inst{7}     = 0;
+  let Inst{21-20} = 0b00;
+  let Inst{22}    = long;
+  let Inst{27-23} = 0b01110;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+                InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> Rd;
+  let Inst{15-12} = 0b1111;
+  let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+                InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+                  InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+  def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                  NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+  def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                  NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+  def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
+                  (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+                  !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+  def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
+                  (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+                  !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+  def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                    NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+  def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                    NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
+
+//===----------------------------------------------------------------------===//
+//  Misc. Arithmetic Instructions.
+//
+
+def CLZ  : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "clz", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+           Requires<[IsARM, HasV6T2]>;
+
+def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "rev", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+let AddedComplexity = 5 in
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+               IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+               [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
+               Requires<[IsARM, HasV6]>;
+
+let AddedComplexity = 5 in
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+               IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+               [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
+               Requires<[IsARM, HasV6]>;
+
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+                   (and (srl GPR:$Rm, (i32 8)), 0xFF)),
+               (REVSH GPR:$Rm)>;
+
+def lsl_shift_imm : SDNodeXForm<imm, [{
+  unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+  return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : ImmLeaf<i32, [{
+  return Imm > 0 && Imm < 32;
+}], lsl_shift_imm>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
+                              (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+               IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+               [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
+                                  (and (shl GPR:$Rm, lsl_amt:$sh),
+                                       0xFFFF0000)))]>,
+               Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
+               (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
+               (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+  unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+  return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def asr_amt : ImmLeaf<i32, [{
+  return Imm > 0 && Imm <= 32;
+}], asr_shift_imm>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
+                              (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+               IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+               [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
+                                  (and (sra GPR:$Rm, asr_amt:$sh),
+                                       0xFFFF)))]>,
+               Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes.  Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+               (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+                   (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+               (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
+
+//===----------------------------------------------------------------------===//
+//  Comparison Instructions...
+//
+
+defm CMP  : AI1_cmp_irs<0b1010, "cmp",
+                        IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+                        BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+             (CMPri   GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+             (CMPrr   GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
+             (CMPrs   GPR:$src, so_reg:$rhs)>;
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
+// results:
+//
+//   rsbs r1, r1, 0
+//   cmp  r0, r1
+//   mov  r0, #0
+//   it   ls
+//   mov  r0, #1
+//
+// and:
+//
+//   cmn  r0, r1
+//   mov  r0, #0
+//   it   ls
+//   mov  r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// When x is 0 and unsigned:
+//
+//    x = 0
+//   ~x = 0xFFFF FFFF
+//   ~x + 1 = 0x1 0000 0000
+//   (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
+//defm CMN  : AI1_cmp_irs<0b1011, "cmn",
+//                        BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST  : AI1_cmp_irs<0b1000, "tst",
+                        IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+defm TEQ  : AI1_cmp_irs<0b1001, "teq",
+                        IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+                      BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
+
+defm CMNz  : AI1_cmp_irs<0b1011, "cmn",
+                         IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+                         BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+//             (CMNri  GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+             (CMNzri  GPR:$src, so_imm_neg:$imm)>;
+
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+    Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+    (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+     IIC_Br,
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+                           4, IIC_iCMOVr,
+  [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+      RegConstraint<"$false = $Rd">;
+def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
+                           (ins GPR:$false, so_reg:$shift, pred:$p),
+                           4, IIC_iCMOVsr,
+  [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+      RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
+                             (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
+                             4, IIC_iMOVi,
+                             []>,
+      RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
+
+let isMoveImm = 1 in
+def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
+                           (ins GPR:$false, so_imm:$imm, pred:$p),
+                           4, IIC_iCMOVi,
+   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+      RegConstraint<"$false = $Rd">;
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
+                                (ins GPR:$false, i32imm:$src, pred:$p),
+                  8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
+                           (ins GPR:$false, so_imm:$imm, pred:$p),
+                           4, IIC_iCMOVi,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+def memb_opt : Operand<i32> {
+  let PrintMethod = "printMemBOption";
+  let ParserMatchClass = MemBarrierOptOperand;
+}
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+                Requires<[IsARM, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf57ff05;
+  let Inst{3-0} = opt;
+}
+}
+
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "dsb", "\t$opt", []>,
+                Requires<[IsARM, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf57ff04;
+  let Inst{3-0} = opt;
+}
+
+// ISB has only full system option
+def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "isb", "\t$opt", []>,
+                Requires<[IsARM, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf57ff06;
+  let Inst{3-0} = opt;
+}
+
+let usesCustomInserter = 1 in {
+  let Uses = [CPSR] in {
+    def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_MIN_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_MAX_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_MIN_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_MAX_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_MIN_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_MAX_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+
+    def ATOMIC_SWAP_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+    def ATOMIC_SWAP_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+    def ATOMIC_SWAP_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+    def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+    def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+    def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+                    "ldrexb", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+                    "ldrexh", "\t$Rt, $addr", []>;
+def LDREX  : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+                    "ldrex", "\t$Rt, $addr", []>;
+let hasExtraDefRegAllocReq = 1 in
+  def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr),
+                      NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>;
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+                    NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+                    NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+def STREX  : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+                    NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
+}
+
+let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+                    (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr),
+                    NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>;
+
+// Clear-Exclusive is for disassembly only.
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+                [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV7]>  {
+  let Inst{31-0} = 0b11110101011111111111000000011111;
+}
+
+// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
+let mayLoad = 1 in {
+def SWP  : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
+             [/* For disassembly only; pattern left blank */]>;
+def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
+             [/* For disassembly only; pattern left blank */]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.
+//
+
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+            c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+            NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+            [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+                          imm:$CRm, imm:$opc2)]> {
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+               c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+               NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+               [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+                              imm:$CRm, imm:$opc2)]> {
+  let Inst{31-28} = 0b1111;
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm,
+          IndexMode im = IndexModeNone>
+  : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+         opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-25} = 0b110;
+}
+
+multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{
+
+  def _OFFSET : ACI<(outs),
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _PRE : ACI<(outs),
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _POST : ACI<(outs),
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _OPTION : ACI<(outs),
+      !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option),
+            ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{23} = 1; // U = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def L_OFFSET : ACI<(outs),
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_PRE : ACI<(outs),
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!",
+      IndexModePre> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_POST : ACI<(outs),
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr",
+      IndexModePost> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_OPTION : ACI<(outs),
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option),
+            ops),
+      !strconcat(!strconcat(opc, "l"), cond),
+      "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{23} = 1; // U = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+}
+
+defm LDC  : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc",  "${p}">;
+defm LDC2 : LdStCop<0b1111,    1, (ins),         "ldc2", "">;
+defm STC  : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc",  "${p}">;
+defm STC2 : LdStCop<0b1111,    0, (ins),         "stc2", "">;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class MovRCopro<string opc, bit direction, dag oops, dag iops,
+                list<dag> pattern>
+  : ABI<0b1110, oops, iops, NoItinerary, opc,
+        "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> {
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+                    (outs),
+                    (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                         c_imm:$CRm, imm0_7:$opc2),
+                    [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                                  imm:$CRm, imm:$opc2)]>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+                    (outs GPR:$Rt),
+                    (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
+                         i32imm:$opc2), []>;
+
+def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+             (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRCopro2<string opc, bit direction, dag oops, dag iops,
+                 list<dag> pattern>
+  : ABXI<0b1110, oops, iops, NoItinerary,
+         !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
+  let Inst{31-28} = 0b1111;
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
+                      (outs),
+                      (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                           c_imm:$CRm, imm0_7:$opc2),
+                      [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                                     imm:$CRm, imm:$opc2)]>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
+                      (outs GPR:$Rt),
+                      (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
+                           i32imm:$opc2), []>;
+
+def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
+                              imm:$CRm, imm:$opc2),
+                (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+class MovRRCopro<string opc, bit direction,
+                 list<dag> pattern = [/* For disassembly only */]>
+  : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+        GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+        NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
+                      [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+                                     imm:$CRm)]>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction,
+                  list<dag> pattern = [/* For disassembly only */]>
+  : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+         GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary,
+         !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+  let Inst{31-28} = 0b1111;
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
+                        [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+                                        imm:$CRm)]>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  let Inst{23-16} = 0b00001111;
+  let Inst{15-12} = Rd;
+  let Inst{7-4} = 0b0000;
+}
+
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  let Inst{23-16} = 0b01001111;
+  let Inst{15-12} = Rd;
+  let Inst{7-4} = 0b0000;
+}
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+              "msr", "\t$mask, $Rn",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<4> Rn;
+
+  let Inst{23} = 0;
+  let Inst{22} = mask{4}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = mask{3-0};
+  let Inst{15-12} = 0b1111;
+  let Inst{11-4} = 0b00000000;
+  let Inst{3-0} = Rn;
+}
+
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask,  so_imm:$a), NoItinerary,
+               "msr", "\t$mask, $a",
+               [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<12> a;
+
+  let Inst{23} = 0;
+  let Inst{22} = mask{4}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = mask{3-0};
+  let Inst{15-12} = 0b1111;
+  let Inst{11-0} = a;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+  def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+               [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everything out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+//   A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR,
+    QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1 in {
+  def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                               NoItinerary,
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                           Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
+  hasSideEffects = 1, isBarrier = 1 in {
+  def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                                   NoItinerary,
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                                Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+    Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+                             NoItinerary,
+                         [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                                Requires<[IsARM, IsDarwin]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary,
+            [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+              Requires<[IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ARMv4 indirect branch using (MOVr PC, dst)
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
+  def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
+                    4, IIC_Br, [(brind GPR:$dst)],
+                    (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+                  Requires<[IsARM, NoV4T]>;
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+                           [(set GPR:$dst, (arm_i32imm:$src))]>,
+                           Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                              IIC_iMOVix2addpc,
+                        [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                             IIC_iMOVix2,
+                        [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                                IIC_iMOVix2ld,
+                    [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+                    Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+            Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+            Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+             (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+          (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+          (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
+      Requires<[IsARM, IsNotDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
+      Requires<[IsARM, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr),    (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr),  (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8  addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                      (sra GPR:$b, (i32 16))),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                 (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+                (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                      (i32 16)),
+                 (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+                 (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, sext_16_node:$b)),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                           (sra GPR:$b, (i32 16)))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, (i32 16)),
+                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                           (i32 16))),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+
+// Pre-v7 uses MCR for synchronization barriers.
+def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
+         Requires<[IsARM, HasV6]>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Memory barriers
+def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+
+// System instructions
+def : MnemonicAlias<"swi", "svc">;
+
+// Load / Store Multiple
+def : MnemonicAlias<"ldmfd", "ldm">;
+def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"stmfd", "stmdb">;
+def : MnemonicAlias<"stmia", "stm">;
+def : MnemonicAlias<"stmea", "stm">;
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
new file mode 100644
index 000000000000..0df62f456343
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -0,0 +1,4937 @@
+//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
+
+def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+
+// Types for vector shift by immediates.  The "SHX" version is for long and
+// narrow operations where the source and destination vectors have different
+// types.  The "SHINS" version is for shift and insert operations.
+def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisVT<2, i32>]>;
+def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+                                         SDTCisVT<2, i32>]>;
+def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
+def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
+def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
+def NEONvshlls    : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
+def NEONvshllu    : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
+def NEONvshlli    : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
+def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+
+def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
+def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
+def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+
+def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
+def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
+def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
+def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
+def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
+def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+
+def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
+def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
+def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+
+def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
+def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+
+def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+                                         SDTCisVT<2, i32>]>;
+def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                           SDTCisVT<2, i32>]>;
+def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
+def NEONvbsl      : SDNode<"ARMISD::VBSL",
+                           SDTypeProfile<1, 3, [SDTCisVec<0>,
+                                                SDTCisSameAs<0, 1>,
+                                                SDTCisSameAs<0, 2>,
+                                                SDTCisSameAs<0, 3>]>>;
+
+def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
+                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                                SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>,
+                                         SDTCisSameAs<0, 3>]>;
+def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+
+def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+                                         SDTCisSameAs<1, 2>]>;
+def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
+def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>]>;
+def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits = 0;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits = 0;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON operand definitions
+//===----------------------------------------------------------------------===//
+
+def nModImm : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// NEON load / store instructions
+//===----------------------------------------------------------------------===//
+
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+                    IIC_fpLoad_m, "",
+                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+                    IIC_fpStore_m, "",
+                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset), itin,
+                "$addr.addr = $wb">;
+class VLDQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset), itin,
+                "$addr.addr = $wb">;
+class VLDQQQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+                "$addr.addr = $wb, $src = $dst">;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+//   VLD1     : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+          (ins addrmode6:$Rn), IIC_VLD1,
+          "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VLD1Q<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn), IIC_VLD1x2,
+          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VLD1d8   : VLD1D<{0,0,0,?}, "8">;
+def  VLD1d16  : VLD1D<{0,1,0,?}, "16">;
+def  VLD1d32  : VLD1D<{1,0,0,?}, "32">;
+def  VLD1d64  : VLD1D<{1,1,0,?}, "64">;
+
+def  VLD1q8   : VLD1Q<{0,0,?,?}, "8">;
+def  VLD1q16  : VLD1Q<{0,1,?,?}, "16">;
+def  VLD1q32  : VLD1Q<{1,0,?,?}, "32">;
+def  VLD1q64  : VLD1Q<{1,1,?,?}, "64">;
+
+def  VLD1q8Pseudo  : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
+
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
+          "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+class VLD1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
+          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8_UPD  : VLD1DWB<{0,0,0,?}, "8">;
+def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
+def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
+def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
+
+def VLD1q8_UPD  : VLD1QWB<{0,0,?,?}, "8">;
+def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
+def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
+def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VLD1D3<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VLD1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD1d8T      : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T     : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T     : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T     : VLD1D3<{1,1,0,?}, "64">;
+
+def VLD1d8T_UPD  : VLD1D3WB<{0,0,0,?}, "8">;
+def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
+def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
+def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+
+def VLD1d64TPseudo     : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VLD1D4<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VLD1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
+          []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64">;
+
+def VLD1d8Q_UPD  : VLD1D4WB<{0,0,?,?}, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
+def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+
+def VLD1d64QPseudo     : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+
+//   VLD2     : Vector Load (multiple 2-element structures)
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn), IIC_VLD2,
+          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VLD2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD2x2,
+          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VLD2d8   : VLD2D<0b1000, {0,0,?,?}, "8">;
+def  VLD2d16  : VLD2D<0b1000, {0,1,?,?}, "16">;
+def  VLD2d32  : VLD2D<0b1000, {1,0,?,?}, "32">;
+
+def  VLD2q8   : VLD2Q<{0,0,?,?}, "8">;
+def  VLD2q16  : VLD2Q<{0,1,?,?}, "16">;
+def  VLD2q32  : VLD2Q<{1,0,?,?}, "32">;
+
+def  VLD2d8Pseudo  : VLDQPseudo<IIC_VLD2>;
+def  VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def  VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+
+def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>;
+def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+class VLD2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD2d8_UPD  : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8_UPD  : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+
+def VLD2q8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8      : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16     : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32     : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD  : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+
+//   VLD3     : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn), IIC_VLD3,
+          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
+def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
+def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
+
+def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>;
+def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+//   VLD4     : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD4,
+          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
+def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
+def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
+
+def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>;
+def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst),
+                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst),
+                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst),
+                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
+//   VLD1LN   : Vector Load (single element to one lane)
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag LoadOp>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+          "$src = $Vd",
+          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+                                         (i32 (LoadOp addrmode6:$Rn)),
+                                         imm:$lane))]> {
+  let Rm = 0b1111;
+}
+class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag LoadOp>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
+          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+          "$src = $Vd",
+          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
+                                         imm:$lane))]> {
+  let Rm = 0b1111;
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+                                               (i32 (LoadOp addrmode6:$addr)),
+                                               imm:$lane))];
+}
+
+def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
+  let Inst{7} = lane{0};
+  let Inst{5} = Rn{4};
+  let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+                         (f32 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+                         (f32 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+          "\\{$Vd[$lane]\\}, $Rn$Rm",
+          "$src = $Vd, $Rn.addr = $wb", []>;
+
+def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+  let Inst{7} = lane{0};
+  let Inst{5} = Rn{4};
+  let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+
+//   VLD2LN   : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
+//   VLD3LN   : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+  let Rm = 0b1111;
+}
+
+def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VLD3lnu, "vld3", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
+          []>;
+
+def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
+//   VLD4LN   : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+          []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+//   VLD1DUP  : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
+          IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
+          [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
+  let Pattern = [(set QPR:$dst,
+                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
+}
+
+def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
+def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
+def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPd32 addrmode6:$addr)>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPq32Pseudo addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+class VLD1QDUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6dup:$Rn), IIC_VLD1dup,
+          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD1DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+          "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+class VLD1QDUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD1DUPd8_UPD  : VLD1DUPWB<{0,0,0,0}, "8">;
+def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
+def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+
+def VLD1DUPq8_UPD  : VLD1QDUPWB<{0,0,1,0}, "8">;
+def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
+def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+def VLD1DUPq8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+
+//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6dup:$Rn), IIC_VLD2dup,
+          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8">;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+
+def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8">;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD2DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
+          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8_UPD  : VLD2DUPWB<{0,0,0,0}, "8">;
+def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
+def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
+
+def VLD2DUPd8x2_UPD  : VLD2DUPWB<{0,0,1,0}, "8">;
+def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
+def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+
+def VLD2DUPd8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+
+//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6dup:$Rn), IIC_VLD3dup,
+          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8x2  : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8x2_UPD  : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
+//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
+class VLD4DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1111, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6dup:$Rn), IIC_VLD4dup,
+          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPd8x2  : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1111, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8x2_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+                "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+                "$addr.addr = $wb">;
+class VSTQQQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+                "$addr.addr = $wb">;
+
+//   VST1     : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
+          IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VST1Q<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b00,0b1010,op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
+          "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VST1d8   : VST1D<{0,0,0,?}, "8">;
+def  VST1d16  : VST1D<{0,1,0,?}, "16">;
+def  VST1d32  : VST1D<{1,0,0,?}, "32">;
+def  VST1d64  : VST1D<{1,1,0,?}, "64">;
+
+def  VST1q8   : VST1Q<{0,0,?,?}, "8">;
+def  VST1q16  : VST1Q<{0,1,?,?}, "16">;
+def  VST1q32  : VST1Q<{1,0,?,?}, "32">;
+def  VST1q64  : VST1Q<{1,1,?,?}, "64">;
+
+def  VST1q8Pseudo  : VSTQPseudo<IIC_VST1x2>;
+def  VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def  VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def  VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
+
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
+          "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+class VST1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+          IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8_UPD  : VST1DWB<{0,0,0,?}, "8">;
+def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
+def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
+def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
+
+def VST1q8_UPD  : VST1QWB<{0,0,?,?}, "8">;
+def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
+def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
+def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo_UPD  : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VST1D3<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
+          IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VST1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3),
+          IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VST1d8T      : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T     : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T     : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T     : VST1D3<{1,1,0,?}, "64">;
+
+def VST1d8T_UPD  : VST1D3WB<{0,0,0,?}, "8">;
+def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
+def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
+def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+
+def VST1d64TPseudo     : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VST1D4<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+          []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VST1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+          "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8Q      : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q     : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q     : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q     : VST1D4<{1,1,?,?}, "64">;
+
+def VST1d8Q_UPD  : VST1D4WB<{0,0,?,?}, "8">;
+def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
+def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
+def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+
+def VST1d64QPseudo     : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+
+//   VST2     : Vector Store (multiple 2-element structures)
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
+          IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VST2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VST2d8   : VST2D<0b1000, {0,0,?,?}, "8">;
+def  VST2d16  : VST2D<0b1000, {0,1,?,?}, "16">;
+def  VST2d32  : VST2D<0b1000, {1,0,?,?}, "32">;
+
+def  VST2q8   : VST2Q<{0,0,?,?}, "8">;
+def  VST2q16  : VST2Q<{0,1,?,?}, "16">;
+def  VST2q32  : VST2Q<{1,0,?,?}, "32">;
+
+def  VST2d8Pseudo  : VSTQPseudo<IIC_VST2>;
+def  VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def  VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+
+def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>;
+def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+          IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+class VST2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+          "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST2d8_UPD  : VST2DWB<0b1000, {0,0,?,?}, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8_UPD  : VST2QWB<{0,0,?,?}, "8">;
+def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
+def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo_UPD  : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+
+def VST2q8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8      : VST2D<0b1001, {0,0,?,?}, "8">;
+def VST2b16     : VST2D<0b1001, {0,1,?,?}, "16">;
+def VST2b32     : VST2D<0b1001, {1,0,?,?}, "32">;
+def VST2b8_UPD  : VST2DWB<0b1001, {0,0,?,?}, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+
+//   VST3     : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
+def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
+def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
+
+def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>;
+def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+//   VST4     : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
+def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
+def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
+
+def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>;
+def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
+//   VST1LN   : Vector Store (single element from one lane)
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag StoreOp, SDNode ExtractOp>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
+          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
+  let Rm = 0b1111;
+}
+class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag StoreOp, SDNode ExtractOp>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
+          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
+  let Rm = 0b1111;
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+  : VSTQLNPseudo<IIC_VST1ln> {
+  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+                          addrmode6:$addr)];
+}
+
+def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+                       NEONvgetlaneu> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+                       NEONvgetlaneu> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{5};
+}
+
+def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+  let Inst{7}   = lane{0};
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+               PatFrag StoreOp, SDNode ExtractOp>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+          "\\{$Vd[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb",
+          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+                                  addrmode6:$Rn, am6offset:$Rm))]>;
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+  : VSTQLNWBPseudo<IIC_VST1lnu> {
+  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+                                        addrmode6:$addr, am6offset:$offset))];
+}
+
+def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+                             NEONvgetlaneu> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+                             NEONvgetlaneu> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{5};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+                             extractelt> {
+  let Inst{7}   = lane{0};
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+//   VST2LN   : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
+//   VST3LN   : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+}
+
+def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VST3lnu, "vst3", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []>;
+
+def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
+//   VST4LN   : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VST4lnu, "vst4", Dt,
+  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+
+//===----------------------------------------------------------------------===//
+// NEON pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg  : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane  : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes
+//===----------------------------------------------------------------------===//
+
+// Basic 2-register operations: double- and quad-register.
+class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
+class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
+
+// Basic 2-register intrinsics, both double- and quad-register.
+class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType TyD, ValueType TyQ, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
+
+// Narrow 2-register intrinsics.
+class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
+
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType TyQ, ValueType TyD, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
+
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+        OpcodeStr, Dt, "$Vd, $Vm",
+        "$src1 = $Vd, $src2 = $Vm", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+                  InstrItinClass itin, string OpcodeStr, string Dt>
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+        "$src1 = $Vd, $src2 = $Vm", []>;
+
+// Basic 3-register operations: double- and quad-register.
+class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr,
+           ValueType ResTy, ValueType OpTy,
+           SDNode OpNode, bit Commutable>
+  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+         OpcodeStr, "$Vd, $Vn, $Vm", "",
+         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+  let isCommutable = Commutable;
+}
+
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType Ty, SDNode ShOp>
+  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$Vn),
+                        (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$Vn),
+                        (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+
+class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr,
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+         OpcodeStr, "$Vd, $Vn, $Vm", "",
+         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+  let isCommutable = Commutable;
+}
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType ResTy, ValueType OpTy, SDNode ShOp>
+  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$Vn),
+                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
+               ValueType ResTy, ValueType OpTy, SDNode ShOp>
+  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$Vn),
+                           (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+
+// Basic 3-register intrinsics, both double- and quad-register.
+class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (IntOp (Ty DPR:$Vn),
+                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+                                           imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (IntOp (Ty DPR:$Vn),
+                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+  let isCommutable = 0;
+}
+
+class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$Vn),
+                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                 imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt,
+                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$Vn),
+                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                 imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+  let isCommutable = 0;
+}
+
+// Multiply-Add/Sub operations: double- and quad-register.
+class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt,
+                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd),
+        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (MulOp DPR:$Vn,
+                                   (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+                                                     imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                    string OpcodeStr, string Dt,
+                    ValueType Ty, SDNode MulOp, SDNode ShOp>
+  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd),
+        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (MulOp DPR:$Vn,
+                                   (Ty (NEONvduplane (Ty DPR_8:$Vm),
+                                                     imm:$lane)))))))]>;
+
+class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
+                SDPatternOperator MulOp, SDPatternOperator OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+                  SDPatternOperator MulOp, SDPatternOperator ShOp>
+  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (MulOp QPR:$Vn,
+                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                        imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                    string OpcodeStr, string Dt,
+                    ValueType ResTy, ValueType OpTy,
+                    SDNode MulOp, SDNode ShOp>
+  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (MulOp QPR:$Vn,
+                                   (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                        imm:$lane)))))))]>;
+
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
+// Neon 3-argument intrinsics, both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+                                (TyQ (MulOp (TyD DPR:$Vn),
+                                            (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+                  InstrItinClass itin, string OpcodeStr, string Dt,
+                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set QPR:$Vd,
+          (OpNode (TyQ QPR:$src1),
+                  (TyQ (MulOp (TyD DPR:$Vn),
+                              (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+                                                 imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                    InstrItinClass itin, string OpcodeStr, string Dt,
+                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set QPR:$Vd,
+          (OpNode (TyQ QPR:$src1),
+                  (TyQ (MulOp (TyD DPR:$Vn),
+                              (TyD (NEONvduplane (TyD DPR_8:$Vm),
+                                                 imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                   InstrItinClass itin, string OpcodeStr, string Dt,
+                   ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+                   SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+                                                        (TyD DPR:$Vm)))))))]>;
+
+// Neon Long 3-argument intrinsic.  The destination register is
+// a quad-register and is also used as the first source operand register.
+class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd,
+          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                 string OpcodeStr, string Dt,
+                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                   InstrItinClass itin, string OpcodeStr, string Dt,
+                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                imm:$lane)))))]>;
+
+// Narrowing 3-register intrinsics.
+class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
+              Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType TyQ, ValueType TyD, SDNode OpNode>
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set QPR:$Vd,
+          (TyQ (OpNode (TyD DPR:$Vn),
+                       (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType TyQ, ValueType TyD, SDNode OpNode>
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set QPR:$Vd,
+          (TyQ (OpNode (TyD DPR:$Vn),
+                       (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+              bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+  let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                 InstrItinClass itin, string OpcodeStr, string Dt,
+                 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+                 bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+                                                (TyD DPR:$Vm))))))]> {
+  let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics.
+class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                  InstrItinClass itin, string OpcodeStr, string Dt,
+                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                imm:$lane)))))]>;
+
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+           SDNode OpNode, SDNode ExtOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+  let isCommutable = Commutable;
+}
+
+// Pairwise long 2-register intrinsics, both double- and quad-register.
+class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                bits<2> op17_16, bits<5> op11_7, bit op4,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                bits<2> op17_16, bits<5> op11_7, bit op4,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Pairwise long 2-register accumulate intrinsics,
+// both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                 bits<2> op17_16, bits<5> op11_7, bit op4,
+                 string OpcodeStr, string Dt,
+                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
+class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                 bits<2> op17_16, bits<5> op11_7, bit op4,
+                 string OpcodeStr, string Dt,
+                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
+
+// Shift by immediate,
+// both double- and quad-register.
+class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+             Format f, InstrItinClass itin, Operand ImmTy,
+             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4,
+           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+             Format f, InstrItinClass itin, Operand ImmTy,
+             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4,
+           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+// Long shift by immediate.
+class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+             string OpcodeStr, string Dt,
+             ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, op6, op4,
+           (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
+                                          (i32 imm:$SIMM))))]>;
+
+// Narrow shift by immediate.
+class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, op6, op4,
+           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
+                                          (i32 imm:$SIMM))))]>;
+
+// Shift right by immediate and accumulate,
+// both double- and quad-register.
+class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                Operand ImmTy, string OpcodeStr, string Dt,
+                ValueType Ty, SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set DPR:$Vd, (Ty (add DPR:$src1,
+                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
+class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                Operand ImmTy, string OpcodeStr, string Dt,
+                ValueType Ty, SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set QPR:$Vd, (Ty (add QPR:$src1,
+                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+
+// Shift by immediate and insert,
+// both double- and quad-register.
+class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                Operand ImmTy, Format f, string OpcodeStr, string Dt,
+                ValueType Ty,SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
+class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                Operand ImmTy, Format f, string OpcodeStr, string Dt,
+                ValueType Ty,SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+
+// Convert, with fractional bits immediate,
+// both double- and quad-register.
+class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+              Intrinsic IntOp>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4,
+           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+              Intrinsic IntOp>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4,
+           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Abbreviations used in multiclass suffixes:
+//   Q = quarter int (8 bit) elements
+//   H = half int (16 bit) elements
+//   S = single int (32 bit) elements
+//   D = double int (64 bit) elements
+
+// Neon 2-register vector operations and intrinsics.
+
+// Neon 2-register comparisons.
+//   source operand element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                       bits<5> op11_7, bit op4, string opc, string Dt,
+                       string asm, SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, "f32", asm, "",
+                  [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
+
+  // 128-bit vector types.
+  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, "f32", asm, "",
+                  [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
+}
+
+
+// Neon 2-register vector intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                      bits<5> op11_7, bit op4,
+                      InstrItinClass itinD, InstrItinClass itinQ,
+                      string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                    bits<5> op11_7, bit op6, bit op4,
+                    InstrItinClass itin, string OpcodeStr, string Dt,
+                    SDNode OpNode> {
+  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "16"),
+                   v8i8, v8i16, OpNode>;
+  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "32"),
+                   v4i16, v4i32, OpNode>;
+  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "64"),
+                   v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                       bits<5> op11_7, bit op6, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       Intrinsic IntOp> {
+  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "16"),
+                      v8i8, v8i16, IntOp>;
+  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "32"),
+                      v4i16, v4i32, IntOp>;
+  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "64"),
+                      v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+                    string OpcodeStr, string Dt, SDNode OpNode> {
+  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
+// Neon 3-register vector operations.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                   InstrItinClass itinD16, InstrItinClass itinD32,
+                   InstrItinClass itinQ16, InstrItinClass itinQ32,
+                   string OpcodeStr, string Dt,
+                   SDNode OpNode, bit Commutable = 0> {
+  // 64-bit vector types.
+  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v8i8, v8i8, OpNode, Commutable>;
+  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v4i16, v4i16, OpNode, Commutable>;
+  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v2i32, v2i32, OpNode, Commutable>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v16i8, v16i8, OpNode, Commutable>;
+  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v8i16, v8i16, OpNode, Commutable>;
+  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
+  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+                       v4i16, ShOp>;
+  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
+                     v2i32, ShOp>;
+  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+                       v8i16, v4i16, ShOp>;
+  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+                     v4i32, v2i32, ShOp>;
+}
+
+// ....then also with element size 64 bits:
+multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                    InstrItinClass itinD, InstrItinClass itinQ,
+                    string OpcodeStr, string Dt,
+                    SDNode OpNode, bit Commutable = 0>
+  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+            OpcodeStr, Dt, OpNode, Commutable> {
+  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v1i64, v1i64, OpNode, Commutable>;
+  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v2i64, v2i64, OpNode, Commutable>;
+}
+
+
+// Neon 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                     InstrItinClass itinD16, InstrItinClass itinD32,
+                     InstrItinClass itinQ16, InstrItinClass itinQ32,
+                     string OpcodeStr, string Dt,
+                     Intrinsic IntOp, bit Commutable = 0> {
+  // 64-bit vector types.
+  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i16, v4i16, IntOp, Commutable>;
+  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i32, v2i32, IntOp, Commutable>;
+
+  // 128-bit vector types.
+  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v8i16, v8i16, IntOp, Commutable>;
+  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v4i32, v4i32, IntOp, Commutable>;
+}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                     InstrItinClass itinD16, InstrItinClass itinD32,
+                     InstrItinClass itinQ16, InstrItinClass itinQ32,
+                     string OpcodeStr, string Dt,
+                     Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i16, v4i16, IntOp>;
+  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i32, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v8i16, v8i16, IntOp>;
+  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v4i32, v4i32, IntOp>;
+}
+
+multiclass N3VIntSL_HS<bits<4> op11_8,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
+  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                      InstrItinClass itinD16, InstrItinClass itinD32,
+                      InstrItinClass itinQ16, InstrItinClass itinQ32,
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp, bit Commutable = 0>
+  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+              OpcodeStr, Dt, IntOp, Commutable> {
+  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i8, v8i8, IntOp, Commutable>;
+  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v16i8, v16i8, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                      InstrItinClass itinD16, InstrItinClass itinD32,
+                      InstrItinClass itinQ16, InstrItinClass itinQ32,
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp>
+  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+              OpcodeStr, Dt, IntOp> {
+  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i8, v8i8, IntOp>;
+  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v16i8, v16i8, IntOp>;
+}
+
+
+// ....then also with element size of 64 bits:
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0>
+  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+               OpcodeStr, Dt, IntOp, Commutable> {
+  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v1i64, v1i64, IntOp, Commutable>;
+  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i64, v2i64, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp>
+  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+               OpcodeStr, Dt, IntOp> {
+  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v1i64, v1i64, IntOp>;
+  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i64, v2i64, IntOp>;
+}
+
+// Neon Narrowing 3-register vector intrinsics,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0> {
+  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v8i8, v8i16, IntOp, Commutable>;
+  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v4i16, v4i32, IntOp, Commutable>;
+  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i32, v2i64, IntOp, Commutable>;
+}
+
+
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                    InstrItinClass itin16, InstrItinClass itin32,
+                    string OpcodeStr, string Dt,
+                    SDNode OpNode, bit Commutable = 0> {
+  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v8i16, v8i8, OpNode, Commutable>;
+  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v4i32, v4i16, OpNode, Commutable>;
+  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+                     InstrItinClass itin, string OpcodeStr, string Dt,
+                     SDNode OpNode> {
+  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt,
+                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
+  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
+  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+// Neon Long 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                      InstrItinClass itin16, InstrItinClass itin32,
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp, bit Commutable = 0> {
+  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i32, v4i16, IntOp, Commutable>;
+  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+                        InstrItinClass itin, string OpcodeStr, string Dt,
+                        Intrinsic IntOp> {
+  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0>
+  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
+               IntOp, Commutable> {
+  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i16, v8i8, IntOp, Commutable>;
+}
+
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+                         OpcodeStr, !strconcat(Dt, "8"),
+                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
+  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+                         OpcodeStr, !strconcat(Dt, "16"),
+                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
+  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+                         OpcodeStr, !strconcat(Dt, "32"),
+                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
+
+// Neon Wide 3-register vector intrinsics,
+//   source operand element sizes of 8, 16 and 32 bits:
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                    string OpcodeStr, string Dt,
+                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
+  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
+  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+
+// Neon Multiply-Op vector operations,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itinD16, InstrItinClass itinD32,
+                        InstrItinClass itinQ16, InstrItinClass itinQ32,
+                        string OpcodeStr, string Dt, SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
+  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
+  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
+  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
+  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
+}
+
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
+                         InstrItinClass itinD16, InstrItinClass itinD32,
+                         InstrItinClass itinQ16, InstrItinClass itinQ32,
+                         string OpcodeStr, string Dt, SDNode ShOp> {
+  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
+  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
+  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+                            mul, ShOp>;
+  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+                          mul, ShOp>;
+}
+
+// Neon Intrinsic-Op vector operations,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itinD, InstrItinClass itinQ,
+                        string OpcodeStr, string Dt, Intrinsic IntOp,
+                        SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
+// Neon 3-argument intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itinD, InstrItinClass itinQ,
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
+                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
+                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+}
+
+
+// Neon Long Multiply-Op vector operations,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                         InstrItinClass itin16, InstrItinClass itin32,
+                         string OpcodeStr, string Dt, SDNode MulOp,
+                         SDNode OpNode> {
+  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+                          string Dt, SDNode MulOp, SDNode OpNode> {
+  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
+// Neon Long 3-argument intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
+  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
+                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
+                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+                         string OpcodeStr, string Dt, Intrinsic IntOp> {
+  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
+  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itin16, InstrItinClass itin32,
+                        string OpcodeStr, string Dt, Intrinsic IntOp>
+  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
+                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
+}
+
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                            InstrItinClass itin, string OpcodeStr, string Dt,
+                            Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+                           IntOp, ExtOp, OpNode>;
+  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+                           IntOp, ExtOp, OpNode>;
+  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+                           IntOp, ExtOp, OpNode>;
+}
+
+
+// Neon Pairwise long 2-register intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                        bits<5> op11_7, bit op4,
+                        string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon Pairwise long 2-register accumulate intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                         bits<5> op11_7, bit op4,
+                         string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon 2-register vector shift by immediate,
+//   with f of either N2RegVShLFrm or N2RegVShRFrm
+//   element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+                             // imm6 = xxxxxx
+}
+multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
+                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
+                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
+                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
+                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+                             // imm6 = xxxxxx
+}
+
+// Neon Shift-Accumulate vector operations,
+//   element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                         string OpcodeStr, string Dt, SDNode ShOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
+                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
+                             // imm6 = xxxxxx
+}
+
+// Neon Shift-Insert vector operations,
+//   with f of either N2RegVShLFrm or N2RegVShRFrm
+//   element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                          string OpcodeStr> {
+  // 64-bit vector types.
+  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+                             // imm6 = xxxxxx
+}
+multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                          string OpcodeStr> {
+  // 64-bit vector types.
+  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+                             // imm6 = xxxxxx
+}
+
+// Neon Shift Long operations,
+//   element sizes of 8, 16, 32 bits:
+multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+                      bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
+  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+}
+
+// Neon Shift Narrow operations,
+//   element sizes of 16, 32, 64 bits:
+multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
+                      SDNode OpNode> {
+  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+                    OpcodeStr, !strconcat(Dt, "16"),
+                    v8i8, v8i16, shr_imm8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+                     OpcodeStr, !strconcat(Dt, "32"),
+                     v4i16, v4i32, shr_imm16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+                     OpcodeStr, !strconcat(Dt, "64"),
+                     v2i32, v2i64, shr_imm32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+//===----------------------------------------------------------------------===//
+
+// Vector Add Operations.
+
+//   VADD     : Vector Add (integer and floating-point)
+defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+                         add, 1>;
+def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+                     v2f32, v2f32, fadd, 1>;
+def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+                     v4f32, v4f32, fadd, 1>;
+//   VADDL    : Vector Add Long (Q = D + D)
+defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "s", add, sext, 1>;
+defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "u", add, zext, 1>;
+//   VADDW    : Vector Add Wide (Q = Q + D)
+defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+//   VHADD    : Vector Halving Add
+defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
+//   VRHADD   : Vector Rounding Halving Add
+defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+//   VQADD    : Vector Saturating Add
+defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            "vqadd", "u", int_arm_neon_vqaddu, 1>;
+//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
+defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+                            int_arm_neon_vaddhn, 1>;
+//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
+defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+                            int_arm_neon_vraddhn, 1>;
+
+// Vector Multiply Operations.
+
+//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
+defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
+                     v2f32, v2f32, fmul, 1>;
+def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
+                     v4f32, v4f32, fmul, 1>;
+defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+                       v2f32, fmul>;
+
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                      (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                      (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+          (v4f32 (VMULslfq (v4f32 QPR:$src1),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
+//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
+defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
+                          IIC_VMULi16Q, IIC_VMULi32Q,
+                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+                            IIC_VMULi16Q, IIC_VMULi32Q,
+                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                                            imm:$lane)))),
+          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                         (DSubReg_i16_reg imm:$lane))),
+                                 (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                                            imm:$lane)))),
+          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                         (DSubReg_i32_reg imm:$lane))),
+                                 (SubReg_i32_lane imm:$lane)))>;
+
+//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
+defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
+                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+                              IIC_VMULi16Q, IIC_VMULi32Q,
+                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                                             imm:$lane)))),
+          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                          (DSubReg_i16_reg imm:$lane))),
+                                  (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                                             imm:$lane)))),
+          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                          (DSubReg_i32_reg imm:$lane))),
+                                  (SubReg_i32_lane imm:$lane)))>;
+
+//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
+defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                         "vmull", "s", NEONvmulls, 1>;
+defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                         "vmull", "u", NEONvmullu, 1>;
+def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+                        v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+
+//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
+defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+                             "vqdmull", "s", int_arm_neon_vqdmull>;
+
+// Vector Multiply-Accumulate and Multiply-Subtract Operations.
+
+//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
+defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+                          v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+                          v4f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+                            v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+                            v4f32, v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+                  (mul (v8i16 QPR:$src2),
+                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+                  (mul (v4i32 QPR:$src2),
+                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+                  (fmul_su (v4f32 QPR:$src2),
+                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+                           (v4f32 QPR:$src2),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>,
+          Requires<[HasNEON, UseFPVMLx]>;
+
+//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
+defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlal", "s", NEONvmulls, add>;
+defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlal", "u", NEONvmullu, add>;
+
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+
+//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
+defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+
+//   VMLS     : Vector Multiply Subtract (integer and floating-point)
+defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+                          v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+                          v4f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+                            v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+                            v4f32, v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+                  (mul (v8i16 QPR:$src2),
+                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+                  (mul (v4i32 QPR:$src2),
+                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+                  (fmul_su (v4f32 QPR:$src2),
+                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>,
+          Requires<[HasNEON, UseFPVMLx]>;
+
+//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
+defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlsl", "u", NEONvmullu, sub>;
+
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+
+//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
+defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Vector Subtract Operations.
+
+//   VSUB     : Vector Subtract (integer and floating-point)
+defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+                         "vsub", "i", sub, 0>;
+def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+                     v2f32, v2f32, fsub, 0>;
+def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+                     v4f32, v4f32, fsub, 0>;
+//   VSUBL    : Vector Subtract Long (Q = D - D)
+defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "s", sub, sext, 0>;
+defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "u", sub, zext, 0>;
+//   VSUBW    : Vector Subtract Wide (Q = Q - D)
+defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+//   VHSUB    : Vector Halving Subtract
+defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
+//   VQSUB    : Vector Saturing Subtract
+defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
+//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
+defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+                            int_arm_neon_vsubhn, 0>;
+//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
+defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+                            int_arm_neon_vrsubhn, 0>;
+
+// Vector Comparisons.
+
+//   VCEQ     : Vector Compare Equal
+defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+                     NEONvceq, 1>;
+def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+                     NEONvceq, 1>;
+
+defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+                            "$Vd, $Vm, #0", NEONvceqz>;
+
+//   VCGE     : Vector Compare Greater Than or Equal
+defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+                     NEONvcge, 0>;
+def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+                     NEONvcge, 0>;
+
+defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+                            "$Vd, $Vm, #0", NEONvcgez>;
+defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+                            "$Vd, $Vm, #0", NEONvclez>;
+
+//   VCGT     : Vector Compare Greater Than
+defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+                     NEONvcgt, 0>;
+def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+                     NEONvcgt, 0>;
+
+defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+                            "$Vd, $Vm, #0", NEONvcgtz>;
+defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+                            "$Vd, $Vm, #0", NEONvcltz>;
+
+//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
+def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+                        "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+                        "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
+def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+                        "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+                        "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+//   VTST     : Vector Test Bits
+defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+
+// Vector Bitwise Operations.
+
+def vnotd : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+
+
+//   VAND     : Vector Bitwise AND
+def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+                      v2i32, v2i32, and, 1>;
+def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+                      v4i32, v4i32, and, 1>;
+
+//   VEOR     : Vector Bitwise Exclusive OR
+def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+                      v2i32, v2i32, xor, 1>;
+def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+                      v4i32, v4i32, xor, 1>;
+
+//   VORR     : Vector Bitwise OR
+def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+                      v2i32, v2i32, or, 1>;
+def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+                      v4i32, v4i32, or, 1>;
+
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+
+//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
+def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+                     "vbic", "$Vd, $Vn, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+                                                 (vnotd DPR:$Vm))))]>;
+def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+                     "vbic", "$Vd, $Vn, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+                                                 (vnotq QPR:$Vm))))]>;
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+//   VORN     : Vector Bitwise OR NOT
+def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+                     "vorn", "$Vd, $Vn, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+                                                (vnotd DPR:$Vm))))]>;
+def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+                     "vorn", "$Vd, $Vn, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+                                                (vnotq QPR:$Vm))))]>;
+
+//   VMVN     : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+}
+
+//   VMVN     : Vector Bitwise NOT
+def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+                     "vmvn", "$Vd, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
+def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+                     "vmvn", "$Vd, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+
+//   VBSL     : Vector Bitwise Select
+def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                     N3RegFrm, IIC_VCNTiD,
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [(set DPR:$Vd,
+                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+
+def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
+                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
+          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+
+def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                     N3RegFrm, IIC_VCNTiQ,
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [(set QPR:$Vd,
+                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+
+def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
+                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
+          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+
+//   VBIF     : Vector Bitwise Insert if False
+//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                     N3RegFrm, IIC_VBINiD,
+                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                     N3RegFrm, IIC_VBINiQ,
+                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+
+//   VBIT     : Vector Bitwise Insert if True
+//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                     N3RegFrm, IIC_VBINiD,
+                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                     N3RegFrm, IIC_VBINiQ,
+                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+
+// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
+// for equivalent operations with different register constraints; it just
+// inserts copies.
+
+// Vector Absolute Differences.
+
+//   VABD     : Vector Absolute Difference
+defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vabd", "s", int_arm_neon_vabds, 1>;
+defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vabd", "u", int_arm_neon_vabdu, 1>;
+def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
+                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
+def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+
+//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
+defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+
+//   VABA     : Vector Absolute Difference and Accumulate
+defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                             "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                             "vaba", "u", int_arm_neon_vabdu, add>;
+
+//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
+defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
+
+// Vector Maximum and Minimum.
+
+//   VMAX     : Vector Maximum
+defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmax", "u", int_arm_neon_vmaxu, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmax", "f32",
+                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+
+//   VMIN     : Vector Minimum
+defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmin", "u", int_arm_neon_vminu, 1>;
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vmins, 1>;
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmin", "f32",
+                        v4f32, v4f32, int_arm_neon_vmins, 1>;
+
+// Vector Pairwise Operations.
+
+//   VPADD    : Vector Pairwise Add
+def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i8",
+                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i16",
+                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i32",
+                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+                        IIC_VPBIND, "vpadd", "f32",
+                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
+
+//   VPADDL   : Vector Pairwise Add Long
+defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
+                             int_arm_neon_vpaddls>;
+defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
+                             int_arm_neon_vpaddlu>;
+
+//   VPADAL   : Vector Pairwise Add and Accumulate Long
+defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
+                              int_arm_neon_vpadals>;
+defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
+                              int_arm_neon_vpadalu>;
+
+//   VPMAX    : Vector Pairwise Maximum
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+
+//   VPMIN    : Vector Pairwise Minimum
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+
+// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
+
+//   VRECPE   : Vector Reciprocal Estimate
+def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+                        IIC_VUNAD, "vrecpe", "u32",
+                        v2i32, v2i32, int_arm_neon_vrecpe>;
+def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+                        IIC_VUNAQ, "vrecpe", "u32",
+                        v4i32, v4i32, int_arm_neon_vrecpe>;
+def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+                        IIC_VUNAD, "vrecpe", "f32",
+                        v2f32, v2f32, int_arm_neon_vrecpe>;
+def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+                        IIC_VUNAQ, "vrecpe", "f32",
+                        v4f32, v4f32, int_arm_neon_vrecpe>;
+
+//   VRECPS   : Vector Reciprocal Step
+def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrecps", "f32",
+                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrecps", "f32",
+                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
+
+//   VRSQRTE  : Vector Reciprocal Square Root Estimate
+def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+                         IIC_VUNAD, "vrsqrte", "u32",
+                         v2i32, v2i32, int_arm_neon_vrsqrte>;
+def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+                         IIC_VUNAQ, "vrsqrte", "u32",
+                         v4i32, v4i32, int_arm_neon_vrsqrte>;
+def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+                         IIC_VUNAD, "vrsqrte", "f32",
+                         v2f32, v2f32, int_arm_neon_vrsqrte>;
+def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+                         IIC_VUNAQ, "vrsqrte", "f32",
+                         v4f32, v4f32, int_arm_neon_vrsqrte>;
+
+//   VRSQRTS  : Vector Reciprocal Square Root Step
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrsqrts", "f32",
+                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrsqrts", "f32",
+                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+
+// Vector Shifts.
+
+//   VSHL     : Vector Shift
+defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
+                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+                            "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
+                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+                            "vshl", "u", int_arm_neon_vshiftu>;
+
+//   VSHL     : Vector Shift Left (Immediate)
+defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+
+//   VSHR     : Vector Shift Right (Immediate)
+defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
+defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
+
+//   VSHLL    : Vector Shift Left Long
+defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
+
+//   VSHLL    : Vector Shift Left Long (with maximum shift count)
+class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
+                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
+                ValueType OpTy, SDNode OpNode>
+  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+           ResTy, OpTy, OpNode> {
+  let Inst{21-16} = op21_16;
+}
+def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
+                          v8i16, v8i8, NEONvshlli>;
+def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
+                          v4i32, v4i16, NEONvshlli>;
+def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
+                          v2i64, v2i32, NEONvshlli>;
+
+//   VSHRN    : Vector Shift Right and Narrow
+defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+                           NEONvshrn>;
+
+//   VRSHL    : Vector Rounding Shift
+defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vrshl", "u", int_arm_neon_vrshiftu>;
+//   VRSHR    : Vector Rounding Shift Right
+defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
+defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
+
+//   VRSHRN   : Vector Rounding Shift Right and Narrow
+defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
+                           NEONvrshrn>;
+
+//   VQSHL    : Vector Saturating Shift
+defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqshl", "u", int_arm_neon_vqshiftu>;
+//   VQSHL    : Vector Saturating Shift Left (Immediate)
+defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
+defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+
+//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
+defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+
+//   VQSHRN   : Vector Saturating Shift Right and Narrow
+defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
+                           NEONvqshrns>;
+defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
+                           NEONvqshrnu>;
+
+//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
+defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
+                           NEONvqshrnsu>;
+
+//   VQRSHL   : Vector Saturating Rounding Shift
+defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
+
+//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
+                           NEONvqrshrns>;
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
+                           NEONvqrshrnu>;
+
+//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
+                           NEONvqrshrnsu>;
+
+//   VSRA     : Vector Shift Right and Accumulate
+defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+//   VRSRA    : Vector Rounding Shift Right and Accumulate
+defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+
+//   VSLI     : Vector Shift Left and Insert
+defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
+
+//   VSRI     : Vector Shift Right and Insert
+defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
+
+// Vector Absolute and Saturating Absolute.
+
+//   VABS     : Vector Absolute Value
+defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
+                           int_arm_neon_vabs>;
+def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                        IIC_VUNAD, "vabs", "f32",
+                        v2f32, v2f32, int_arm_neon_vabs>;
+def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                        IIC_VUNAQ, "vabs", "f32",
+                        v4f32, v4f32, int_arm_neon_vabs>;
+
+//   VQABS    : Vector Saturating Absolute Value
+defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
+                           int_arm_neon_vqabs>;
+
+// Vector Negate.
+
+def vnegd  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
+
+//   VNEG     : Vector Negate (integer)
+def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
+def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
+
+//   VNEG     : Vector Negate (floating-point)
+def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+                    "vneg", "f32", "$Vd, $Vm", "",
+                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
+def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
+                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+                    "vneg", "f32", "$Vd, $Vm", "",
+                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+
+def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+
+//   VQNEG    : Vector Saturating Negate
+defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
+                           int_arm_neon_vqneg>;
+
+// Vector Bit Counting Operations.
+
+//   VCLS     : Vector Count Leading Sign Bits
+defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
+                           int_arm_neon_vcls>;
+//   VCLZ     : Vector Count Leading Zeros
+defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
+                           int_arm_neon_vclz>;
+//   VCNT     : Vector Count One Bits
+def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+                        IIC_VCNTiD, "vcnt", "8",
+                        v8i8, v8i8, int_arm_neon_vcnt>;
+def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+                        IIC_VCNTiQ, "vcnt", "8",
+                        v16i8, v16i8, int_arm_neon_vcnt>;
+
+// Vector Swap -- for disassembly only.
+def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+                     (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                     "vswp", "$Vd, $Vm", "", []>;
+def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+                     (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                     "vswp", "$Vd, $Vm", "", []>;
+
+// Vector Move Operations.
+
+//   VMOV     : Vector Move (Register)
+def : InstAlias<"vmov${p} $Vd, $Vm",
+                (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : InstAlias<"vmov${p} $Vd, $Vm",
+                (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+let neverHasSideEffects = 1 in {
+// Pseudo vector move instructions for QQ and QQQQ registers. This should
+// be expanded after register allocation is completed.
+def  VMOVQQ   : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
+                NoItinerary, []>;
+
+def  VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
+                NoItinerary, []>;
+} // neverHasSideEffects
+
+//   VMOV     : Vector Move (Immediate)
+
+let isReMaterializable = 1 in {
+def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i8", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i8", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i16", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i16", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i32", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i32", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i64", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i64", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+} // isReMaterializable
+
+//   VMOV     : Vector Get Lane (move scalar to ARM core register)
+
+def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
+def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{0};
+}
+// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
+def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+                           (DSubReg_i8_reg imm:$lane))),
+                     (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i16_reg imm:$lane))),
+                     (SubReg_i16_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+                           (DSubReg_i8_reg imm:$lane))),
+                     (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i16_reg imm:$lane))),
+                     (SubReg_i16_lane imm:$lane))>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i32_reg imm:$lane))),
+                     (SubReg_i32_lane imm:$lane))>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
+                          (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
+                          (SSubReg_f32_reg imm:$src2))>;
+//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
+//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+
+
+//   VMOV     : Vector Set Lane (move ARM core register to scalar)
+
+let Constraints = "$src1 = $V" in {
+def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21} = lane{0};
+}
+}
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+          (v16i8 (INSERT_SUBREG QPR:$src1,
+                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+                                   (DSubReg_i8_reg imm:$lane))),
+                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
+                  (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+          (v8i16 (INSERT_SUBREG QPR:$src1,
+                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+                                     (DSubReg_i16_reg imm:$lane))),
+                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
+                  (DSubReg_i16_reg imm:$lane)))>;
+def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
+          (v4i32 (INSERT_SUBREG QPR:$src1,
+                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+                                     (DSubReg_i32_reg imm:$lane))),
+                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
+                  (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+
+//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         dsub_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         dsub_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         dsub_0)>;
+
+//   VDUP     : Vector Duplicate (from ARM core register to all elements)
+
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+          IIC_VMOVIS, "vdup", Dt, "$V, $R",
+          [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+          IIC_VMOVIS, "vdup", Dt, "$V, $R",
+          [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+
+def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+
+//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
+
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+              ValueType Ty>
+  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+              [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy>
+  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+              [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+                                      imm:$lane)))]>;
+
+// Inst{19-16} is partially specified depending on the element size.
+
+def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+  let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+  let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+  let Inst{19} = lane{0};
+}
+def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+  let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+  let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+  let Inst{19} = lane{0};
+}
+
+def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+          (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+          (VDUPLN32q DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+                                  (DSubReg_i8_reg imm:$lane))),
+                           (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+                                    (DSubReg_i16_reg imm:$lane))),
+                            (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+                                    (DSubReg_i32_reg imm:$lane))),
+                            (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
+def  VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
+def  VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
+
+//   VMOVN    : Vector Narrowing Move
+defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+                         "vmovn", "i", trunc>;
+//   VQMOVN   : Vector Saturating Narrowing Move
+defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+                            "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+                            "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
+//   VMOVL    : Vector Lengthening Move
+defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+
+// Vector Conversions.
+
+//   VCVT     : Vector Convert Between Floating-Point and Integers
+def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+                     v2i32, v2f32, fp_to_sint>;
+def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+                     v2i32, v2f32, fp_to_uint>;
+def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+                     v2f32, v2i32, sint_to_fp>;
+def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+                     v2f32, v2i32, uint_to_fp>;
+
+def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+                     v4i32, v4f32, fp_to_sint>;
+def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+                     v4i32, v4f32, fp_to_uint>;
+def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+                     v4f32, v4i32, sint_to_fp>;
+def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+                     v4f32, v4i32, uint_to_fp>;
+
+//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+
+//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
+def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+                        IIC_VUNAQ, "vcvt", "f16.f32",
+                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+                Requires<[HasNEON, HasFP16]>;
+def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+                        IIC_VUNAQ, "vcvt", "f32.f16",
+                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+                Requires<[HasNEON, HasFP16]>;
+
+// Vector Reverse.
+
+//   VREV64   : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+
+def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+
+def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+
+//   VREV32   : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+
+def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
+
+def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+
+//   VREV16   : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+
+def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
+
+// Other Vector Shuffles.
+
+//  Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
+//   VEXT     : Vector Extract
+
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+                                      (Ty DPR:$Vm), imm:$index)))]> {
+  bits<4> index;
+  let Inst{11-8} = index{3-0};
+}
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+        (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+                                      (Ty QPR:$Vm), imm:$index)))]> {
+  bits<4> index;
+  let Inst{11-8} = index{3-0};
+}
+
+def VEXTd8  : VEXTd<"vext", "8",  v8i8> {
+  let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+  let Inst{11-9} = index{2-0};
+  let Inst{8}    = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+  let Inst{11-10} = index{1-0};
+  let Inst{9-8}    = 0b00;
+}
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
+                           (v2f32 DPR:$Vm),
+                           (i32 imm:$index))),
+          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
+def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
+  let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+  let Inst{11-9} = index{2-0};
+  let Inst{8}    = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+  let Inst{11-10} = index{1-0};
+  let Inst{9-8}    = 0b00;
+}
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
+                           (v4f32 QPR:$Vm),
+                           (i32 imm:$index))),
+          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
+//   VTRN     : Vector Transpose
+
+def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
+
+def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
+
+//   VUZP     : Vector Unzip (Deinterleave)
+
+def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+
+def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
+
+//   VZIP     : Vector Zip (Interleave)
+
+def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+
+def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
+
+// Vector Table Lookup and Table Extension.
+
+//   VTBL     : Vector Table Lookup
+def  VTBL1
+  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+        "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
+        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def  VTBL2
+  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+def  VTBL3
+  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+def  VTBL4
+  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+        NVTBLFrm, IIC_VTB4,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def  VTBL2Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def  VTBL3Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def  VTBL4Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
+//   VTBX     : Vector Table Extension
+def  VTBX1
+  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+        "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+                               DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def  VTBX2
+  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+def  VTBX3
+  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+        NVTBLFrm, IIC_VTBX3,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+        "$orig = $Vd", []>;
+def  VTBX4
+  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
+        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+        "$orig = $Vd", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def  VTBX2Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+                IIC_VTBX2, "$orig = $dst", []>;
+def  VTBX3Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+                IIC_VTBX3, "$orig = $dst", []>;
+def  VTBX4Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+                IIC_VTBX4, "$orig = $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+class N2VSPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a)),
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$acc, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// bit_convert
+def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 000000000000..bfe83eceb13f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,1397 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                       SDNPVariadic]>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+  return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+  let ParserMatchClass = imm0_255_asmoperand;
+}
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+  return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : ImmLeaf<i32, [{
+  return Imm >= 8 && Imm < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+  unsigned Val = -N->getZExtValue();
+  return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+  return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+  let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
+// Scaled 4 immediate.
+def t_imm_s4 : Operand<i32> {
+  let PrintMethod = "printThumbS4ImmOperand";
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Define Thumb specific addressing modes.
+
+let OperandType = "OPERAND_PCREL" in {
+def t_brtarget : Operand<OtherVT> {
+  let EncoderMethod = "getThumbBRTargetOpValue";
+}
+
+def t_bcctarget : Operand<i32> {
+  let EncoderMethod = "getThumbBCCTargetOpValue";
+}
+
+def t_cbtarget : Operand<i32> {
+  let EncoderMethod = "getThumbCBTargetOpValue";
+}
+
+def t_bltarget : Operand<i32> {
+  let EncoderMethod = "getThumbBLTargetOpValue";
+}
+
+def t_blxtarget : Operand<i32> {
+  let EncoderMethod = "getThumbBLXTargetOpValue";
+}
+}
+
+def MemModeRegThumbAsmOperand : AsmOperandClass {
+  let Name = "MemModeRegThumb";
+  let SuperClasses = [];
+}
+
+def MemModeImmThumbAsmOperand : AsmOperandClass {
+  let Name = "MemModeImmThumb";
+  let SuperClasses = [];
+}
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+                    ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_rrs := reg + reg
+//
+def t_addrmode_rrs1 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs2 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs4 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S4Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is2 := reg + imm5 * 2
+//
+def t_addrmode_is2 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S2Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is1 := reg + imm5
+//
+def t_addrmode_is1 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S1Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+                    ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+  let EncoderMethod = "getAddrModeThumbSPOpValue";
+  let PrintMethod = "printThumbAddrModeSPOperand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+  let EncoderMethod = "getAddrModePCOpValue";
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def tADJCALLSTACKUP :
+  PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+             [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+            Requires<[IsThumb, IsThumb1Only]>;
+
+def tADJCALLSTACKDOWN :
+  PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+             [(ARMcallseq_start imm:$amt)]>,
+            Requires<[IsThumb, IsThumb1Only]>;
+}
+
+// T1Disassembly - A simple class to make encoding some disassembly patterns
+// easier and less verbose.
+class T1Disassembly<bits<2> op1, bits<8> op2>
+  : T1Encoding<0b101111> {
+  let Inst{9-8} = op1;
+  let Inst{7-0} = op2;
+}
+
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x00>; // A8.6.110
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
+                  [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x10>; // A8.6.410
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x20>; // A8.6.408
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x30>; // A8.6.409
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x40>; // A8.6.157
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+  // A8.6.22
+  bits<8> val;
+  let Inst{7-0} = val;
+}
+
+def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
+                    [/* For disassembly only; pattern left blank */]>,
+                T1Encoding<0b101101> {
+  // A8.6.156
+  let Inst{9-5} = 0b10010;
+  let Inst{4}   = 1;
+  let Inst{3}   = 1;            // Big-Endian
+  let Inst{2-0} = 0b000;
+}
+
+def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
+                    [/* For disassembly only; pattern left blank */]>,
+                T1Encoding<0b101101> {
+  // A8.6.156
+  let Inst{9-5} = 0b10010;
+  let Inst{4}   = 1;
+  let Inst{3}   = 0;            // Little-Endian
+  let Inst{2-0} = 0b000;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+                NoItinerary, "cps$imod $iflags",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Misc<0b0110011> {
+  // A8.6.38 & B6.1.1
+  bit imod;
+  bits<3> iflags;
+
+  let Inst{4}   = imod;
+  let Inst{3}   = 0;
+  let Inst{2-0} = iflags;
+}
+
+// For both thumb1 and thumb2.
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+                  [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+              T1Special<{0,0,?,?}> {
+  // A8.6.6
+  bits<3> dst;
+  let Inst{6-3} = 0b1111; // Rm = pc
+  let Inst{2-0} = dst;
+}
+
+// PC relative add (ADR).
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
+                   "add\t$dst, pc, $rhs", []>,
+               T1Encoding<{1,0,1,0,0,?}> {
+  // A6.2 & A8.6.10
+  bits<3> dst;
+  bits<8> rhs;
+  let Inst{10-8} = dst;
+  let Inst{7-0}  = rhs;
+}
+
+// ADD <Rd>, sp, #<imm8>
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
+                   "add\t$dst, $sp, $rhs", []>,
+               T1Encoding<{1,0,1,0,1,?}> {
+  // A6.2 & A8.6.8
+  bits<3> dst;
+  bits<8> rhs;
+  let Inst{10-8} = dst;
+  let Inst{7-0}  = rhs;
+}
+
+// ADD sp, sp, #<imm7>
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+                  "add\t$dst, $rhs", []>,
+              T1Misc<{0,0,0,0,0,?,?}> {
+  // A6.2.5 & A8.6.8
+  bits<7> rhs;
+  let Inst{6-0} = rhs;
+}
+
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+                  "sub\t$dst, $rhs", []>,
+              T1Misc<{0,0,0,0,1,?,?}> {
+  // A6.2.5 & A8.6.214
+  bits<7> rhs;
+  let Inst{6-0} = rhs;
+}
+
+// ADD <Rm>, sp
+def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                  "add\t$dst, $rhs", []>,
+              T1Special<{0,0,?,?}> {
+  // A8.6.9 Encoding T1
+  bits<4> dst;
+  let Inst{7}   = dst{3};
+  let Inst{6-3} = 0b1101;
+  let Inst{2-0} = dst{2-0};
+}
+
+// ADD sp, <Rm>
+def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                  "add\t$dst, $rhs", []>,
+              T1Special<{0,0,?,?}> {
+  // A8.6.9 Encoding T2
+  bits<4> dst;
+  let Inst{7} = 1;
+  let Inst{6-3} = dst;
+  let Inst{2-0} = 0b101;
+}
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
+            T1Special<{1,1,0,?}> {
+    // A6.2.3 & A8.6.25
+    bits<4> Rm;
+    let Inst{6-3} = Rm;
+    let Inst{2-0} = 0b000;
+  }
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
+                   [(ARMretflag)], (tBX LR, pred:$p)>;
+
+  // Alternative return instruction used by vararg functions.
+  def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
+                   2, IIC_Br, [],
+                   (tBX GPR:$Rm, pred:$p)>;
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let isCall = 1,
+  // On non-Darwin platforms R9 is callee-saved.
+  Defs = [R0,  R1,  R2,  R3,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+  Uses = [SP] in {
+  // Also used for Thumb2
+  def tBL  : TIx2<0b11110, 0b11, 1,
+                  (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
+                  "bl\t$func",
+                  [(ARMtcall tglobaladdr:$func)]>,
+             Requires<[IsThumb, IsNotDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-0} = func{10-0};
+  }
+
+  // ARMv5T and above, also used for Thumb2
+  def tBLXi : TIx2<0b11110, 0b11, 0,
+                   (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
+                   "blx\t$func",
+                   [(ARMcall tglobaladdr:$func)]>,
+              Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-1} = func{10-1};
+    let Inst{0} = 0; // func{0} is assumed zero
+  }
+
+  // Also used for Thumb2
+  def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+                  "blx\t$func",
+                  [(ARMtcall GPR:$func)]>,
+              Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+              T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
+    bits<4> func;
+    let Inst{6-3} = func;
+    let Inst{2-0} = 0b000;
+  }
+
+  // ARMv4T
+  def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                  4, IIC_Br,
+                  [(ARMcall_nolink tGPR:$func)]>,
+            Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
+}
+
+let isCall = 1,
+  // On Darwin R9 is call-clobbered.
+  // R7 is marked as a use to prevent frame-pointer assignments from being
+  // moved above / below calls.
+  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
+  Uses = [R7, SP] in {
+  // Also used for Thumb2
+  def tBLr9 : TIx2<0b11110, 0b11, 1,
+                   (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+                   IIC_Br, "bl${p}\t$func",
+                   [(ARMtcall tglobaladdr:$func)]>,
+              Requires<[IsThumb, IsDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-0} = func{10-0};
+  }
+
+  // ARMv5T and above, also used for Thumb2
+  def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
+                      (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+                      IIC_Br, "blx${p}\t$func",
+                      [(ARMcall tglobaladdr:$func)]>,
+                 Requires<[IsThumb, HasV5T, IsDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-1} = func{10-1};
+    let Inst{0} = 0; // func{0} is assumed zero
+  }
+
+  // Also used for Thumb2
+  def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+                    "blx${p}\t$func",
+                    [(ARMtcall GPR:$func)]>,
+                 Requires<[IsThumb, HasV5T, IsDarwin]>,
+                 T1Special<{1,1,1,?}> {
+    // A6.2.3 & A8.6.24
+    bits<4> func;
+    let Inst{6-3} = func;
+    let Inst{2-0} = 0b000;
+  }
+
+  // ARMv4T
+  def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                   4, IIC_Br,
+                   [(ARMcall_nolink tGPR:$func)]>,
+              Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+  let isPredicable = 1 in
+  def tB   : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
+                 "b\t$target", [(br bb:$target)]>,
+             T1Encoding<{1,1,1,0,0,?}> {
+    bits<11> target;
+    let Inst{10-0} = target;
+  }
+
+  // Far jump
+  // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+  // the clobber of LR.
+  let Defs = [LR] in
+  def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target),
+                          4, IIC_Br, [], (tBL t_bltarget:$target)>;
+
+  def tBR_JTr : tPseudoInst<(outs),
+                      (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+                      0, IIC_Br,
+                      [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+    list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+  }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+  def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+                 "b${p}\t$target",
+                 [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+             T1BranchCond<{1,1,0,1}> {
+  bits<4> p;
+  bits<8> target;
+  let Inst{11-8} = p;
+  let Inst{7-0} = target;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+  def tCBZ  : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+                  "cbz\t$Rn, $target", []>,
+              T1Misc<{0,0,?,1,?,?,?}> {
+    // A8.6.27
+    bits<6> target;
+    bits<3> Rn;
+    let Inst{9}   = target{5};
+    let Inst{7-3} = target{4-0};
+    let Inst{2-0} = Rn;
+  }
+
+  def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
+                  "cbnz\t$cmp, $target", []>,
+              T1Misc<{1,0,?,1,?,?,?}> {
+    // A8.6.27
+    bits<6> target;
+    bits<3> Rn;
+    let Inst{9}   = target{5};
+    let Inst{7-3} = target{4-0};
+    let Inst{2-0} = Rn;
+  }
+}
+
+// Tail calls
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin versions.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
+    // on Darwin), so it's in ARMInstrThumb2.td.
+    def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (tBX GPR:$dst, (ops 14, zero_reg))>,
+                     Requires<[IsThumb, IsDarwin]>;
+  }
+  // Non-Darwin versions (the difference is R9).
+  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (tB t_brtarget:$dst)>,
+                 Requires<[IsThumb, IsNotDarwin]>;
+    def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (tBX GPR:$dst, (ops 14, zero_reg))>,
+                     Requires<[IsThumb, IsNotDarwin]>;
+  }
+}
+
+
+// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+                "svc", "\t$imm", []>, Encoding16 {
+  bits<8> imm;
+  let Inst{15-12} = 0b1101;
+  let Inst{11-8}  = 0b1111;
+  let Inst{7-0}   = imm;
+}
+
+// The assembler uses 0xDEFE for a trap instruction.
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+               "trap", [(trap)]>, Encoding16 {
+  let Inst = 0xdefe;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load Store Instructions.
+//
+
+// Loads: reg/reg and reg/imm5
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+                              Operand AddrMode_r, Operand AddrMode_i,
+                              AddrMode am, InstrItinClass itin_r,
+                              InstrItinClass itin_i, string asm,
+                              PatFrag opnode> {
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+  def i : // reg/imm5
+    T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+                      (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+                      am, itin_i, asm, "\t$Rt, $addr",
+                      [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+                              Operand AddrMode_r, Operand AddrMode_i,
+                              AddrMode am, InstrItinClass itin_r,
+                              InstrItinClass itin_i, string asm,
+                              PatFrag opnode> {
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+  def i : // reg/imm5
+    T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+                      (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+                      am, itin_i, asm, "\t$Rt, $addr",
+                      [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR  : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+                                t_addrmode_is4, AddrModeT1_4,
+                                IIC_iLoad_r, IIC_iLoad_i, "ldr",
+                                UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+                                t_addrmode_is1, AddrModeT1_1,
+                                IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+                                UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+                                t_addrmode_is2, AddrModeT1_2,
+                                IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+                                UnOpFrag<(zextloadi16 node:$Src)>>;
+
+let AddedComplexity = 10 in
+def tLDRSB :                    // A8.6.80
+  T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 AddrModeT1_1, IIC_iLoad_bh_r,
+                 "ldrsb", "\t$dst, $addr",
+                 [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+let AddedComplexity = 10 in
+def tLDRSH :                    // A8.6.84
+  T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 AddrModeT1_2, IIC_iLoad_bh_r,
+                 "ldrsh", "\t$dst, $addr",
+                 [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+                    "ldr", "\t$Rt, $addr",
+                    [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+              T1LdStSP<{1,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
+// Load tconstpool
+// FIXME: Use ldr.n to work around a Darwin assembler bug.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+                  "ldr", ".n\t$Rt, $addr",
+                  [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+              T1Encoding<{0,1,0,0,1,?}> {
+  // A6.2 & A8.6.59
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0}  = addr;
+}
+
+// FIXME: Remove this entry when the above ldr.n workaround is fixed.
+// For disassembly use only.
+def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+                       "ldr", "\t$Rt, $addr",
+                       [/* disassembly only */]>,
+                 T1Encoding<{0,1,0,0,1,?}> {
+  // A6.2 & A8.6.59
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0}  = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR  : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+                                t_addrmode_is4, AddrModeT1_4,
+                                IIC_iStore_r, IIC_iStore_i, "str",
+                                BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+                                t_addrmode_is1, AddrModeT1_1,
+                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+                                BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+                               t_addrmode_is2, AddrModeT1_2,
+                               IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+                               BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+                    "str", "\t$Rt, $addr",
+                    [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+              T1LdStSP<{0,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
+                           InstrItinClass itin_upd, bits<6> T1Enc,
+                           bit L_bit> {
+  def IA :
+    T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+        itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
+       T1Encoding<T1Enc> {
+    bits<3> Rn;
+    bits<8> regs;
+    let Inst{10-8} = Rn;
+    let Inst{7-0}  = regs;
+  }
+  def IA_UPD :
+    T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
+        T1Encoding<T1Enc> {
+    bits<3> Rn;
+    bits<8> regs;
+    let Inst{10-8} = Rn;
+    let Inst{7-0}  = regs;
+  }
+}
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
+                            {1,1,0,0,1,?}, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
+                            {1,1,0,0,0,?}, 0>;
+
+} // neverHasSideEffects
+
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+               IIC_iPop,
+               "pop${p}\t$regs", []>,
+           T1Misc<{1,1,0,?,?,?,?}> {
+  bits<16> regs;
+  let Inst{8}   = regs{15};
+  let Inst{7-0} = regs{7-0};
+}
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                IIC_iStore_m,
+                "push${p}\t$regs", []>,
+            T1Misc<{0,1,0,?,?,?,?}> {
+  bits<16> regs;
+  let Inst{8}   = regs{14};
+  let Inst{7-0} = regs{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+    : T1pI<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rm;
+  bits<3> Rn;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+                     string opc, string asm, list<dag> pattern>
+    : T1pI<oops, iops, itin, opc, asm, pattern>,
+      T1Misc<opA> {
+  bits<3> Rm;
+  bits<3> Rd;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rd;
+  bits<3> Rn;
+  let Inst{5-3} = Rn;
+  let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                    string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rm;
+  bits<3> Rn;
+  bits<3> Rd;
+  let Inst{8-6} = Rm;
+  let Inst{5-3} = Rn;
+  let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                       string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rd;
+  bits<3> Rm;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                    string opc, string asm, list<dag> pattern>
+    : T1sIt<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rdn;
+  bits<3> Rm;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                        string opc, string asm, list<dag> pattern>
+    : T1sIt<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rdn;
+  bits<8> imm8;
+  let Inst{10-8} = Rdn;
+  let Inst{7-0}  = imm8;
+}
+
+// Add with carry register
+let isCommutable = 1, Uses = [CPSR] in
+def tADC :                      // A8.6.2
+  T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+                "adc", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+
+// Add immediate
+def tADDi3 :                    // A8.6.4 T1
+  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+                   IIC_iALUi,
+                   "add", "\t$Rd, $Rm, $imm3",
+                   [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+  bits<3> imm3;
+  let Inst{8-6} = imm3;
+}
+
+def tADDi8 :                    // A8.6.4 T2
+  T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+                    IIC_iALUi,
+                    "add", "\t$Rdn, $imm8",
+                    [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+
+// Add register
+let isCommutable = 1 in
+def tADDrr :                    // A8.6.6 T1
+  T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "add", "\t$Rd, $Rn, $Rm",
+                [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+
+let neverHasSideEffects = 1 in
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+                     "add", "\t$Rdn, $Rm", []>,
+               T1Special<{0,0,?,?}> {
+  // A8.6.6 T2
+  bits<4> Rdn;
+  bits<4> Rm;
+  let Inst{7}   = Rdn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rdn{2-0};
+}
+
+// AND register
+let isCommutable = 1 in
+def tAND :                      // A8.6.12
+  T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "and", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+
+// ASR immediate
+def tASRri :                    // A8.6.14
+  T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "asr", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
+
+// ASR register
+def tASRrr :                    // A8.6.15
+  T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "asr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+
+// BIC register
+def tBIC :                      // A8.6.20
+  T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "bic", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+
+// CMN register
+let isCompare = 1, Defs = [CPSR] in {
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+//       Compare-to-zero still works out, just not the relationals
+//def tCMN :                     // A8.6.33
+//  T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+//               IIC_iCMPr,
+//               "cmn", "\t$lhs, $rhs",
+//               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz :                     // A8.6.33
+  T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+               IIC_iCMPr,
+               "cmn", "\t$Rn, $Rm",
+               [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
+
+// CMP immediate
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+                  "cmp", "\t$Rn, $imm8",
+                  [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+             T1General<{1,0,1,?,?}> {
+  // A8.6.35
+  bits<3> Rn;
+  bits<8> imm8;
+  let Inst{10-8} = Rn;
+  let Inst{7-0}  = imm8;
+}
+
+// CMP register
+def tCMPr :                     // A8.6.36 T1
+  T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+               IIC_iCMPr,
+               "cmp", "\t$Rn, $Rm",
+               [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+                   "cmp", "\t$Rn, $Rm", []>,
+              T1Special<{0,1,?,?}> {
+  // A8.6.36 T2
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{7}   = Rn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rn{2-0};
+}
+} // isCompare = 1, Defs = [CPSR]
+
+
+// XOR register
+let isCommutable = 1 in
+def tEOR :                      // A8.6.45
+  T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "eor", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSL immediate
+def tLSLri :                    // A8.6.88
+  T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "lsl", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
+
+// LSL register
+def tLSLrr :                    // A8.6.89
+  T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "lsl", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSR immediate
+def tLSRri :                    // A8.6.90
+  T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "lsr", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
+
+// LSR register
+def tLSRrr :                    // A8.6.91
+  T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "lsr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
+                  "mov", "\t$Rd, $imm8",
+                  [(set tGPR:$Rd, imm0_255:$imm8)]>,
+             T1General<{1,0,0,?,?}> {
+  // A8.6.96
+  bits<3> Rd;
+  bits<8> imm8;
+  let Inst{10-8} = Rd;
+  let Inst{7-0}  = imm8;
+}
+
+// A7-73: MOV(2) - mov setting flag.
+
+let neverHasSideEffects = 1 in {
+def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
+                      2, IIC_iMOVr,
+                      "mov", "\t$Rd, $Rm", "", []>,
+                  T1Special<{1,0,?,?}> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{7}   = Rd{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rd{2-0};
+}
+let Defs = [CPSR] in
+def tMOVSr      : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+                      "movs\t$Rd, $Rm", []>, Encoding16 {
+  // A8.6.97
+  bits<3> Rd;
+  bits<3> Rm;
+  let Inst{15-6} = 0b0000000000;
+  let Inst{5-3}  = Rm;
+  let Inst{2-0}  = Rd;
+}
+} // neverHasSideEffects
+
+// Multiply register
+let isCommutable = 1 in
+def tMUL :                      // A8.6.105 T1
+  T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMUL32,
+                "mul", "\t$Rdn, $Rm, $Rdn",
+                [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move inverse register
+def tMVN :                      // A8.6.107
+  T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+               "mvn", "\t$Rd, $Rn",
+               [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
+let isCommutable = 1 in
+def tORR :                      // A8.6.114
+  T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "orr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV :                      // A8.6.134
+  T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "rev", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 :                    // A8.6.135
+  T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "rev16", "\t$Rd, $Rm",
+             [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
+                Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH :                    // A8.6.136
+  T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "revsh", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR :                      // A8.6.139
+  T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "ror", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB :                      // A8.6.141
+  T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+               IIC_iALUi,
+               "rsb", "\t$Rd, $Rn, #0",
+               [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+
+// Subtract with carry register
+let Uses = [CPSR] in
+def tSBC :                      // A8.6.151
+  T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "sbc", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+
+// Subtract immediate
+def tSUBi3 :                    // A8.6.210 T1
+  T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+                   IIC_iALUi,
+                   "sub", "\t$Rd, $Rm, $imm3",
+                   [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+  bits<3> imm3;
+  let Inst{8-6} = imm3;
+}
+
+def tSUBi8 :                    // A8.6.210 T2
+  T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+                    IIC_iALUi,
+                    "sub", "\t$Rdn, $imm8",
+                    [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr :                    // A8.6.212
+  T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "sub", "\t$Rd, $Rn, $Rm",
+                [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+
+// TODO: A7-96: STMIA - store multiple.
+
+// Sign-extend byte
+def tSXTB :                     // A8.6.222
+  T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "sxtb", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH :                     // A8.6.224
+  T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "sxth", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST :                      // A8.6.230
+  T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+               "tst", "\t$Rn, $Rm",
+               [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB :                     // A8.6.262
+  T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "uxtb", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH :                     // A8.6.264
+  T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "uxth", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in  // Expanded after instruction selection.
+  def tMOVCCr_pseudo :
+  PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+              NoItinerary,
+             [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+               IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+               T1Encoding<{1,0,1,0,0,?}> {
+  bits<3> Rd;
+  bits<8> addr;
+  let Inst{10-8} = Rd;
+  let Inst{7-0} = addr;
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel   : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+                              2, IIC_iALUi, []>;
+
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+                              (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                              2, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
+def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
+                          [(set R0, ARMthread_pointer)]>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case.  Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everything out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12, CPSR ],
+    hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+                                  AddrModeNone, 0, NoItinerary, "","",
+                          [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+    Defs = [ R7, LR, SP ] in
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+                              AddrModeNone, 0, IndexModeNone,
+                              Pseudo, NoItinerary, "", "",
+                              [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                             Requires<[IsThumb, IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+            (tCMPi8  tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+            (tCMPr   tGPR:$Rn, tGPR:$Rm)>;
+
+// Add with carry
+def : T1Pat<(addc   tGPR:$lhs, imm0_7:$rhs),
+            (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, imm8_255:$rhs),
+            (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, tGPR:$rhs),
+            (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc   tGPR:$lhs, imm0_7_neg:$rhs),
+            (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, imm8_255_neg:$rhs),
+            (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc   tGPR:$lhs, tGPR:$rhs),
+            (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
+// ConstantPool, GlobalAddress
+def : T1Pat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : T1Pat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
+
+// JumpTable
+def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+      Requires<[IsThumb, IsNotDarwin]>;
+def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
+      Requires<[IsThumb, IsDarwin]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
+      Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// Indirect calls to ARM routines
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
+      Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+            (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+            (tLDRBi t_addrmode_is1:$addr)>;
+
+// extload -> zextload
+def : T1Pat<(extloadi1  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr),  (tLDRHi t_addrmode_is2:$addr)>;
+
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+            (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+            (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+            (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+            (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+            (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+            (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+            (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+            (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : T1Pat<(i32 thumb_immshifted:$src),
+            (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+                    (thumb_immshifted_shamt imm:$src))>;
+
+def : T1Pat<(i32 imm0_255_comp:$src),
+            (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+                             NoItinerary,
+               [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+                                           imm:$cp))]>,
+               Requires<[IsThumb, IsThumb1Only]>;
+
+// Pseudo-instruction for merged POP and return.
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                           2, IIC_iPop_Br, [],
+                           (tPOP pred:$p, reglist:$regs)>;
+
+// Indirect branch using "mov pc, $Rm"
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
+                  2, IIC_Br, [(brind GPR:$Rm)],
+                  (tMOVr PC, GPR:$Rm, pred:$p)>;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 000000000000..c2c6cbcac0f5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,3467 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// IT block predicate field
+def it_pred : Operand<i32> {
+  let PrintMethod = "printMandatoryPredicateOperand";
+}
+
+// IT block condition mask
+def it_mask : Operand<i32> {
+  let PrintMethod = "printThumbITMask";
+}
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>,    // reg imm
+                ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+                               [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getT2SORegOpValue";
+  let PrintMethod = "printT2SOOperand";
+  let MIOperandInfo = (ops rGPR, i32imm);
+}
+
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
+def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
+  }]> {
+  let ParserMatchClass = t2_so_imm_asmoperand;
+  let EncoderMethod = "getT2SOImmOpValue";
+}
+
+// t2_so_imm_not - Match an immediate that is a complement
+// of a t2_so_imm.
+def t2_so_imm_not : Operand<i32>,
+                    PatLeaf<(imm), [{
+  return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM>;
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg : Operand<i32>,
+                    PatLeaf<(imm), [{
+  return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_neg_XFORM>;
+
+/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
+def imm1_31 : ImmLeaf<i32, [{
+  return (int32_t)Imm >= 1 && (int32_t)Imm < 32;
+}]>;
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095 : Operand<i32>,
+                ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 4096;
+}]>;
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
+
+def imm0_255_neg : PatLeaf<(i32 imm), [{
+  return (uint32_t)(-N->getZExtValue()) < 255;
+}], imm_neg_XFORM>;
+
+def imm0_255_not : PatLeaf<(i32 imm), [{
+  return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
+def lo5AllOne : PatLeaf<(i32 imm), [{
+  // Returns true if all low 5-bits are 1.
+  return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL;
+}]>;
+
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12  := reg + imm12
+def t2addrmode_imm12 : Operand<i32>,
+                       ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+  let PrintMethod = "printAddrModeImm12Operand";
+  let EncoderMethod = "getAddrModeImm12OpValue";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2ldrlabel  := imm12
+def t2ldrlabel : Operand<i32> {
+  let EncoderMethod = "getAddrModeImm12OpValue";
+}
+
+
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+  let EncoderMethod = "getT2AdrLabelOpValue";
+}
+
+
+// t2addrmode_imm8  := reg +/- imm8
+def t2addrmode_imm8 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+  let PrintMethod = "printT2AddrModeImm8Operand";
+  let EncoderMethod = "getT2AddrModeImm8OpValue";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8_offset : Operand<i32>,
+                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+                                      [], [SDNPWantRoot]> {
+  let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+  let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2addrmode_imm8s4  := reg +/- (imm8 << 2)
+def t2addrmode_imm8s4 : Operand<i32> {
+  let PrintMethod = "printT2AddrModeImm8s4Operand";
+  let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8s4_offset : Operand<i32> {
+  let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+}
+
+// t2addrmode_so_reg  := reg + (reg << imm2)
+def t2addrmode_so_reg : Operand<i32>,
+                        ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+  let PrintMethod = "printT2AddrModeSoRegOperand";
+  let EncoderMethod = "getT2AddrModeSORegOpValue";
+  let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2addrmode_reg := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def t2addrmode_reg : Operand<i32> {
+  let PrintMethod = "printAddrMode7Operand";
+  let MIOperandInfo = (ops GPR);
+  let ParserMatchClass = MemMode7AsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{19-16}  = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<5> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{14-12} = imm{4-2};
+  let Inst{7-6}   = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<5> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{14-12} = imm{4-2};
+  let Inst{7-6}   = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<4> Ra;
+
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Ra;
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{31-23} = 0b111110111;
+  let Inst{22-20} = opc22_20;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = RdLo;
+  let Inst{11-8}  = RdHi;
+  let Inst{7-4}   = opc7_4;
+  let Inst{3-0}   = Rm;
+}
+
+
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                      PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+   // shifted imm
+   def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+                opc, "\t$Rd, $imm",
+                [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+     let isAsCheapAsAMove = Cheap;
+     let isReMaterializable = ReMat;
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15} = 0;
+   }
+   // register
+   def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+                opc, ".w\t$Rd, $Rm",
+                [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+                opc, ".w\t$Rd, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+   }
+}
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode, string baseOpc, bit Commutable = 0,
+                       string wide = ""> {
+   // shifted imm
+   def ri : T2sTwoRegImm<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+                 opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+                 opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                 opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+   }
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    t2_so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
+  def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    rGPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
+  def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    t2_so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
+}
+
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+//  the ".w" suffix to indicate that they are wide.
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, string baseOpc, bit Commutable = 0> :
+    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">;
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed.  The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, ".w\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2sThreeReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, "\t$Rd, $Rn, $Rm",
+                 [/* For disassembly only; pattern left blank */]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+   }
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> {
+   // shifted imm
+   def ri : T2TwoRegImm<
+                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2ThreeReg<
+                (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2TwoRegShiftedReg<
+                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+   }
+}
+}
+
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+                          bit Commutable = 0> {
+   // shifted imm
+   // The register-immediate version is re-materializable. This is useful
+   // in particular for taking the address of a local.
+   let isReMaterializable = 1 in {
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, ".w\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24} = 1;
+     let Inst{23-21} = op23_21;
+     let Inst{15} = 0;
+   }
+   }
+   // 12-bit imm
+   def ri12 : T2I<
+                  (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+                  !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+                  [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+     bits<4> Rd;
+     bits<4> Rn;
+     bits<12> imm;
+     let Inst{31-27} = 0b11110;
+     let Inst{26} = imm{11};
+     let Inst{25-24} = 0b10;
+     let Inst{23-21} = op23_21;
+     let Inst{20} = 0; // The S bit.
+     let Inst{19-16} = Rn;
+     let Inst{15} = 0;
+     let Inst{14-12} = imm{10-8};
+     let Inst{11-8} = Rd;
+     let Inst{7-0} = imm{7-0};
+   }
+   // register
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24} = 1;
+     let Inst{23-21} = op23_21;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24} = 1;
+     let Inst{23-21} = op23_21;
+   }
+}
+
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use the carry
+/// bit. It's not predicable.
+let Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
+   // shifted imm
+   def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+                 IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+                 Requires<[IsThumb2]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[IsThumb2]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+                 Requires<[IsThumb2]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+   }
+}
+}
+
+// Carry setting variants
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
+   // shifted imm
+   def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+                4, IIC_iALUi,
+                [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
+   // register
+   def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+                4, IIC_iALUr,
+                [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+   }
+   // shifted register
+   def rs : t2PseudoInst<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                4, IIC_iALUsi,
+                [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2TwoRegImm<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+                [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{15} = 0;
+   }
+   // shifted register
+   def rs : T2TwoRegShiftedReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+   }
+}
+}
+
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+//  rotate operation that produces a value.
+multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
+   // 5-bit imm
+   def ri : T2sTwoRegShiftImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+                 opc, ".w\t$Rd, $Rm, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-21} = 0b010010;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{5-4} = opcod;
+   }
+   // register
+   def rr : T2sThreeReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-21} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7-4} = 0b0000;
+   }
+}
+
+/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode> {
+   // shifted imm
+   def ri : T2OneRegCmpImm<
+                (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+                opc, ".w\t$Rn, $imm",
+                [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{15} = 0;
+     let Inst{11-8} = 0b1111; // Rd
+   }
+   // register
+   def rr : T2TwoRegCmp<
+                (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
+                opc, ".w\t$lhs, $rhs",
+                [(opnode GPR:$lhs, rGPR:$rhs)]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{11-8} = 0b1111; // Rd
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2OneRegCmpShiftedReg<
+                (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                opc, ".w\t$Rn, $ShiftedRm",
+                [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{11-8} = 0b1111; // Rd
+   }
+}
+}
+
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+  def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = 1;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<17> addr;
+    let addr{12}    = 1;           // add = TRUE
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm
+  }
+  def i8  : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+                   opc, "\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = 0;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+    let Inst{11} = 1;
+    // Offset: index==TRUE, wback==FALSE
+    let Inst{10} = 1; // The P bit.
+    let Inst{8} = 0; // The W bit.
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{9}     = addr{8};    // U
+    let Inst{7-0}   = addr{7-0};  // imm
+  }
+  def s   : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = 0;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+    let Inst{11-6} = 0b000000;
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<10> addr;
+    let Inst{19-16} = addr{9-6}; // Rn
+    let Inst{3-0}   = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm
+  }
+
+  // FIXME: Is the pci variant actually needed?
+  def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+    let isReMaterializable = 1;
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = ?; // add = (U == '1')
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+    let Inst{19-16} = 0b1111; // Rn
+    bits<4> Rt;
+    bits<12> addr;
+    let Inst{15-12} = Rt{3-0};
+    let Inst{11-0}  = addr{11-0};
+  }
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<bits<2> opcod, string opc,
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+  def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0001;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 0; // !load
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<17> addr;
+    let addr{12}    = 1;           // add = TRUE
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm
+  }
+  def i8  : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+                   opc, "\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0000;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 0; // !load
+    let Inst{11} = 1;
+    // Offset: index==TRUE, wback==FALSE
+    let Inst{10} = 1; // The P bit.
+    let Inst{8} = 0; // The W bit.
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{9}     = addr{8};    // U
+    let Inst{7-0}   = addr{7-0};  // imm
+  }
+  def s   : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+                   opc, ".w\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0000;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 0; // !load
+    let Inst{11-6} = 0b000000;
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<10> addr;
+    let Inst{19-16}   = addr{9-6}; // Rn
+    let Inst{3-0} = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm
+  }
+}
+
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, ".w\t$Rd, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+                  opc, ".w\t$Rd, $Rm, ror $rot",
+                 [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
+                 Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
+                  IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
+                 [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+                 Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm", []>,
+          Requires<[IsThumb2, HasT2ExtractPack]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm, ror $rot", []>,
+          Requires<[IsThumb2, HasT2ExtractPack]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+      bits<2> rot;
+      let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+  def rr     : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+                  opc, "\t$Rd, $Rn, $Rm",
+                  [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
+                  (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+                  IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [(set rGPR:$Rd, (opnode rGPR:$Rn,
+                                          (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
+  def rr     : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+                  opc, "\t$Rd, $Rn, $Rm", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+                  IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : T2XI<oops, iops, itin, asm, pattern> {
+  bits<4> Rd;
+  bits<12> label;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = label{11};
+  let Inst{14-12} = label{10-8};
+  let Inst{7-0}   = label{7-0};
+}
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+              (ins t2adrlabel:$addr, pred:$p),
+              IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-24} = 0b10;
+  // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
+  let Inst{22} = 0;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<13> addr;
+  let Inst{11-8} = Rd;
+  let Inst{23}    = addr{12};
+  let Inst{21}    = addr{12};
+  let Inst{26}    = addr{11};
+  let Inst{14-12} = addr{10-8};
+  let Inst{7-0}   = addr{7-0};
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel   : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+                                4, IIC_iALUi, []>;
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+                                (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                                4, IIC_iALUi,
+                                []>;
+
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1, isReMaterializable = 1  in
+defm t2LDR   : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+                      UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH  : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(zextloadi8  node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(sextloadi8  node:$Src)>>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def t2LDRDi8  : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+                        (ins t2addrmode_imm8s4:$addr),
+                        IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+            (t2LDRHi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
+            (t2LDRHi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+            (t2LDRHs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+            (t2LDRHpci  tconstpool:$addr)>;
+
+// FIXME: The destination register of the loads and stores can't be PC, but
+//        can be SP. We need another regclass (similar to rGPR) to represent
+//        that. Not a pressing issue since these are selected manually,
+//        not via pattern.
+
+// Indexed loads
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def t2LDR_PRE  : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+                            "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+
+def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+                          "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                         "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                         "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                        "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                        "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
+                            []>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
+// for disassembly only.
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+  : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+          "\t$Rt, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = signed;
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 1; // load
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW.
+
+  bits<4> Rt;
+  bits<13> addr;
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = addr{12-9};
+  let Inst{7-0}   = addr{7-0};
+}
+
+def t2LDRT   : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT  : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT  : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+
+// Store
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+                   BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
+                       (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+               IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
+
+// Indexed stores
+def t2STR_PRE  : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+                         "str", "\t$Rt, [$Rn, $addr]!",
+                         "$Rn = $base_wb,@earlyclobber $base_wb",
+             [(set GPR:$base_wb,
+                   (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+                          "str", "\t$Rt, [$Rn], $addr",
+                          "$Rn = $base_wb,@earlyclobber $base_wb",
+             [(set GPR:$base_wb,
+                  (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_PRE  : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+                        "strh", "\t$Rt, [$Rn, $addr]!",
+                        "$Rn = $base_wb,@earlyclobber $base_wb",
+        [(set GPR:$base_wb,
+              (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+                         "strh", "\t$Rt, [$Rn], $addr",
+                         "$Rn = $base_wb,@earlyclobber $base_wb",
+       [(set GPR:$base_wb,
+             (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_PRE  : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+                        "strb", "\t$Rt, [$Rn, $addr]!",
+                        "$Rn = $base_wb,@earlyclobber $base_wb",
+         [(set GPR:$base_wb,
+               (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+                         "strb", "\t$Rt, [$Rn], $addr",
+                         "$Rn = $base_wb,@earlyclobber $base_wb",
+        [(set GPR:$base_wb,
+              (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+  : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+          "\t$Rt, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = 0; // not signed
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 0; // store
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW
+
+  bits<4> Rt;
+  bits<13> addr;
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = addr{12-9};
+  let Inst{7-0}   = addr{7-0};
+}
+
+def t2STRT   : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT  : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT  : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
+
+// ldrd / strd pre / post variants
+// For disassembly only.
+
+def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+                 "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+                 "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+def t2STRD_PRE  : T2Ii8s4<1, 1, 0, (outs),
+                 (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
+                 (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access.  These are for disassembly only.
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2),  (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
+
+  def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+                "\t$addr",
+              [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+
+    bits<17> addr;
+    let addr{12}    = 1;           // add = TRUE
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm12
+  }
+
+  def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+                "\t$addr",
+               [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // U = 0
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-8} = 0b1100;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{7-0}   = addr{7-0};  // imm8
+  }
+
+  def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+               "\t$addr",
+             [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // add = TRUE for T1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-6} = 0000000;
+
+    bits<10> addr;
+    let Inst{19-16} = addr{9-6}; // Rn
+    let Inst{3-0}   = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm2
+  }
+}
+
+defm t2PLD  : T2Ipl<0, 0, "pld">,  Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI  : T2Ipl<0, 1, "pli">,  Requires<[IsThumb2,HasV7]>;
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+                            InstrItinClass itin_upd, bit L_bit> {
+  def IA :
+    T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b01;     // Increment After
+    let Inst{22}    = 0;
+    let Inst{21}    = 0;        // No writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def IA_UPD :
+    T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+          itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b01;     // Increment After
+    let Inst{22}    = 0;
+    let Inst{21}    = 1;        // Writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def DB :
+    T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b10;     // Decrement Before
+    let Inst{22}    = 0;
+    let Inst{21}    = 0;        // No writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def DB_UPD :
+    T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+          itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b10;     // Decrement Before
+    let Inst{22}    = 0;
+    let Inst{21}    = 1;        // Writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+                   "mov", ".w\t$Rd, $Rm", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{14-12} = 0b000;
+  let Inst{7-4} = 0b0000;
+}
+
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+    AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+                   "mov", ".w\t$Rd, $imm",
+                   [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b0010;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+}
+
+def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+                                                 pred:$p, cc_out:$s)>,
+                Requires<[IsThumb2]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+                   "movw", "\t$Rd, $imm",
+                   [(set rGPR:$Rd, imm0_65535:$imm)]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+                                (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+                    (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+                    "movt", "\t$Rd, $imm",
+                    [(set rGPR:$Rd,
+                          (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-21} = 0b0110;
+  let Inst{20} = 0; // The S bit.
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+                     (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
+
+//===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
+
+// Sign extenders
+
+defm t2SXTB  : T2I_ext_rrot<0b100, "sxtb",
+                              UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm t2SXTH  : T2I_ext_rrot<0b000, "sxth",
+                              UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+
+defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
+
+// TODO: SXT(A){B|H}16 - done for disassembly only
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm t2UXTB   : T2I_ext_rrot<0b101, "uxtb",
+                               UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm t2UXTH   : T2I_ext_rrot<0b001, "uxth",
+                               UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+                               UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+//        The transformation should probably be done as a combiner action
+//        instead so we can include a check for masking back in the upper
+//        eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+//            (t2UXTB16r_rot rGPR:$Src, 24)>,
+//          Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+            (t2UXTB16r_rot rGPR:$Src, 8)>,
+        Requires<[HasT2ExtractPack, IsThumb2]>;
+
+defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+                           BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+                           BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+defm t2ADD  : T2I_bin_ii12rs<0b000, "add",
+                             BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
+defm t2SUB  : T2I_bin_ii12rs<0b101, "sub",
+                             BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+                             BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+                             BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc",
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBC  : T2I_adde_sube_irs<0b1011, "sbc",
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS,
+                                                             node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS,
+                                                             node:$RHS)>>;
+
+// RSB
+defm t2RSB  : T2I_rbin_irs  <0b1110, "rsb",
+                             BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
+                             BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add        GPR:$src, imm0_255_neg:$imm),
+            (t2SUBri    GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBri    GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, imm0_4095_neg:$imm),
+            (t2SUBri12  GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(addc       rGPR:$src, imm0_255_neg:$imm),
+            (t2SUBSri   rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc       rGPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBSri   rGPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(adde_dead_carry       rGPR:$src, imm0_255_not:$imm),
+            (t2SBCri    rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde_dead_carry       rGPR:$src, t2_so_imm_not:$imm),
+            (t2SBCri    rGPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(adde_live_carry       rGPR:$src, imm0_255_not:$imm),
+            (t2SBCSri   rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde_live_carry       rGPR:$src, t2_so_imm_not:$imm),
+            (t2SBCSri   rGPR:$src, t2_so_imm_not:$imm)>;
+
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b010;
+  let Inst{23} = 0b1;
+  let Inst{22-20} = 0b010;
+  let Inst{15-12} = 0b1111;
+  let Inst{7} = 0b1;
+  let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+              list<dag> pat = [/* For disassembly only; pattern left blank */],
+              dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+              string asm = "\t$Rd, $Rn, $Rm">
+  : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
+    Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0101;
+  let Inst{22-20} = op22_20;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = op7_4;
+
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD    : T2I_pam<0b000, 0b1000, "qadd",
+                        [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QADD16  : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8   : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX    : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD   : T2I_pam<0b000, 0b1001, "qdadd", [],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB   : T2I_pam<0b000, 0b1011, "qdsub", [],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSAX    : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB    : T2I_pam<0b000, 0b1010, "qsub",
+                        [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSUB16  : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8   : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8  : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX   : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX   : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8  : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX    : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16  : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8   : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX    : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16  : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8   : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX    : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16  : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8   : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX    : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16  : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8   : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX   : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8  : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX   : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8  : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX   : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8  : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX   : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8  : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+  dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b011;
+  let Inst{23}    = long;
+  let Inst{22-20} = op22_20;
+  let Inst{7-4}   = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+  dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b011;
+  let Inst{23}    = long;
+  let Inst{22-20} = op22_20;
+  let Inst{7-4}   = op7_4;
+}
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def t2USAD8   : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+                                           (ins rGPR:$Rn, rGPR:$Rm),
+                        NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{15-12} = 0b1111;
+}
+def t2USADA8  : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+                       (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+                        "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Signed/Unsigned saturate -- for disassembly only
+
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> sat_imm;
+  bits<7> sh;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{4-0}   = sat_imm{4-0};
+  let Inst{21}    = sh{6};
+  let Inst{14-12} = sh{4-2};
+  let Inst{7-6}   = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+              (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+              NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1100;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+}
+
+def t2SSAT16: T2SatI<
+                (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary,
+                "ssat16", "\t$Rd, $sat_imm, $Rn",
+                [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1100;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+  let Inst{14-12} = 0b000; // imm3 = '000'
+  let Inst{7-6} = 0b00;    // imm2 = '00'
+}
+
+def t2USAT: T2SatI<
+                (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+                NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1110;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+}
+
+def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn),
+                     NoItinerary,
+                     "usat16", "\t$dst, $sat_imm, $Rn",
+                     [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1110;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+  let Inst{14-12} = 0b000; // imm3 = '000'
+  let Inst{7-6} = 0b00;    // imm2 = '00'
+}
+
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+//  Shift and rotate Instructions.
+//
+
+defm t2LSL  : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl  node:$LHS, node:$RHS)>>;
+defm t2LSR  : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl  node:$LHS, node:$RHS)>>;
+defm t2ASR  : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
+defm t2ROR  : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
+def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+          (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+
+let Uses = [CPSR] in {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                   "rrx", "\t$Rd, $Rm",
+                   [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{14-12} = 0b000;
+  let Inst{7-4} = 0b0011;
+}
+}
+
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+                        (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                        "lsrs", ".w\t$Rd, $Rm, #1",
+                        [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 1; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{5-4} = 0b01; // Shift type.
+  // Shift amount = Inst{14-12:7-6} = 1.
+  let Inst{14-12} = 0b000;
+  let Inst{7-6} = 0b01;
+}
+def t2MOVsra_flag : T2TwoRegShiftImm<
+                        (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                        "asrs", ".w\t$Rd, $Rm, #1",
+                        [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 1; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{5-4} = 0b10; // Shift type.
+  // Shift amount = Inst{14-12:7-6} = 1.
+  let Inst{14-12} = 0b000;
+  let Inst{7-6} = 0b01;
+}
+}
+
+//===----------------------------------------------------------------------===//
+//  Bitwise Instructions.
+//
+
+defm t2AND  : T2I_bin_w_irs<0b0000, "and",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+defm t2ORR  : T2I_bin_w_irs<0b0010, "orr",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(or  node:$LHS, node:$RHS)>, "t2ORR", 1>;
+defm t2EOR  : T2I_bin_w_irs<0b0100, "eor",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+
+defm t2BIC  : T2I_bin_w_irs<0b0001, "bic",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(and node:$LHS, (not node:$RHS))>,
+                            "t2BIC">;
+
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+    : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<5> msb;
+  bits<5> lsb;
+
+  let Inst{11-8}  = Rd;
+  let Inst{4-0}   = msb{4-0};
+  let Inst{14-12} = lsb{4-2};
+  let Inst{7-6}   = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+    : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+
+  let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+                IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+                [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0; // should be 0.
+  let Inst{25} = 1;
+  let Inst{24-20} = 0b10110;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+  let Inst{5} = 0; // should be 0.
+
+  bits<10> imm;
+  let msb{4-0} = imm{9-5};
+  let lsb{4-0} = imm{4-0};
+}
+
+def t2SBFX: T2TwoRegBitFI<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+                 IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-20} = 0b10100;
+  let Inst{15} = 0;
+}
+
+def t2UBFX: T2TwoRegBitFI<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+                 IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-20} = 0b11100;
+  let Inst{15} = 0;
+}
+
+// A8.6.18  BFI - Bitfield insert (Encoding T1)
+let Constraints = "$src = $Rd" in {
+  def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+                  (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+                  IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+                  [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+                                   bf_inv_mask_imm:$imm))]> {
+    let Inst{31-27} = 0b11110;
+    let Inst{26} = 0; // should be 0.
+    let Inst{25} = 1;
+    let Inst{24-20} = 0b10110;
+    let Inst{15} = 0;
+    let Inst{5} = 0; // should be 0.
+
+    bits<10> imm;
+    let msb{4-0} = imm{9-5};
+    let lsb{4-0} = imm{4-0};
+  }
+
+  // GNU as only supports this form of bfi (w/ 4 arguments)
+  let isAsmParserOnly = 1 in
+  def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
+                  (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
+                       width_imm:$width),
+                  IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
+                  []> {
+    let Inst{31-27} = 0b11110;
+    let Inst{26} = 0; // should be 0.
+    let Inst{25} = 1;
+    let Inst{24-20} = 0b10110;
+    let Inst{15} = 0;
+    let Inst{5} = 0; // should be 0.
+
+    bits<5> lsbit;
+    bits<5> width;
+    let msb{4-0} = width; // Custom encoder => lsb+width-1
+    let lsb{4-0} = lsbit;
+  }
+}
+
+defm t2ORN  : T2I_bin_irs<0b0011, "orn",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>,
+                          "t2ORN", 0, "">;
+
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN  : T2I_un_irs <0b0011, "mvn",
+                          IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+                          UnOpFrag<(not node:$Src)>, 1, 1>;
+
+
+let AddedComplexity = 1 in
+def : T2Pat<(and     rGPR:$src, t2_so_imm_not:$imm),
+            (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or      rGPR:$src, t2_so_imm_not:$imm),
+            (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
+            Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+            (t2MVNi t2_so_imm_not:$src)>;
+
+//===----------------------------------------------------------------------===//
+//  Multiply Instructions.
+//
+let isCommutable = 1 in
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                "mul", "\t$Rd, $Rn, $Rm",
+                [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b000;
+  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+  let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLA: T2FourReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "mla", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b000;
+  let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLS: T2FourReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "mls", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b000;
+  let Inst{7-4} = 0b0001; // Multiply and Subtract
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def t2SMULL : T2MulLong<0b000, 0b0000,
+                  (outs rGPR:$Rd, rGPR:$Ra),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+                   "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+                   "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // isCommutable
+
+// Multiply + accumulate
+def t2SMLAL : T2MulLong<0b100, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMLAL : T2MulLong<0b110, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+} // neverHasSideEffects
+
+// Rounding variants of the below included for disassembly only
+
+// Most significant word multiply
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                  "smmul", "\t$Rd, $Rn, $Rm",
+                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                  "smmulr", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLA : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLAR: T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLS: T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b110;
+  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLSR:T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b110;
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+  def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+                                      (sext_inreg rGPR:$Rm, i16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+                                      (sra rGPR:$Rm, (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+
+  def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+                                      (sext_inreg rGPR:$Rm, i16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b10;
+  }
+
+  def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+                                      (sra rGPR:$Rm, (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b11;
+  }
+
+  def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+  def BB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra,
+                               (opnode (sext_inreg rGPR:$Rn, i16),
+                                       (sext_inreg rGPR:$Rm, i16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def BT : T2FourReg<
+       (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+             !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+                                                 (sra rGPR:$Rm, (i32 16)))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+
+  def TB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+                                               (sext_inreg rGPR:$Rm, i16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b10;
+  }
+
+  def TT : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+                                                 (sra rGPR:$Rm, (i32 16)))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b11;
+  }
+
+  def WB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def WT : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// These are for disassembly only.
+
+def t2SMUAD: T2ThreeReg_mac<
+            0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUADX:T2ThreeReg_mac<
+            0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUSD: T2ThreeReg_mac<
+            0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX:T2ThreeReg_mac<
+            0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMLAD   : T2ThreeReg_mac<
+            0, 0b010, 0b0000, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLADX  : T2FourReg_mac<
+            0, 0b010, 0b0001, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSD   : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSDX  : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALD  : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLD  : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+
+//===----------------------------------------------------------------------===//
+//  Division Instructions.
+//  Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "sdiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011100;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "udiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011101;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc. Arithmetic Instructions.
+//
+
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+      InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-22} = 0b01010;
+  let Inst{21-20} = op1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-6} = 0b10;
+  let Inst{5-4} = op2;
+  let Rn{3-0} = Rm;
+}
+
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                    "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                      "rbit", "\t$Rd, $Rm",
+                      [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                 "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                       "rev16", ".w\t$Rd, $Rm",
+                [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
+
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                       "revsh", ".w\t$Rd, $Rm",
+                 [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
+
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+                (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+            (t2REVSH rGPR:$Rm)>;
+
+def t2PKHBT : T2ThreeReg<
+            (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+                  IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+                  [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+                                      (and (shl rGPR:$Rm, lsl_amt:$sh),
+                                           0xFFFF0000)))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-20} = 0b01100;
+  let Inst{5} = 0; // BT form
+  let Inst{4} = 0;
+
+  bits<8> sh;
+  let Inst{14-12} = sh{7-5};
+  let Inst{7-6}   = sh{4-3};
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+            (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+            (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2ThreeReg<
+                  (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+                  IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+                  [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+                                       (and (sra rGPR:$Rm, asr_amt:$sh),
+                                            0xFFFF)))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-20} = 0b01100;
+  let Inst{5} = 1; // TB form
+  let Inst{4} = 0;
+
+  bits<8> sh;
+  let Inst{14-12} = sh{7-5};
+  let Inst{7-6}   = sh{4-3};
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes.  Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+            (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+                (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+            (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+//  Comparison Instructions...
+//
+defm t2CMP  : T2I_cmp_irs<0b1101, "cmp",
+                          IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+                          BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ  GPR:$lhs, t2_so_imm:$imm),
+            (t2CMPri  GPR:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ  GPR:$lhs, rGPR:$rhs),
+            (t2CMPrr  GPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ  GPR:$lhs, t2_so_reg:$rhs),
+            (t2CMPrs  GPR:$lhs, t2_so_reg:$rhs)>;
+
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+//       Compare-to-zero still works out, just not the relationals
+//defm t2CMN  : T2I_cmp_irs<0b1000, "cmn",
+//                          BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+                          IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+                          BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
+//            (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ  GPR:$src, t2_so_imm_neg:$imm),
+            (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
+
+defm t2TST  : T2I_cmp_irs<0b0000, "tst",
+                          IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+                         BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
+                          IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+                         BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
+                            (ins rGPR:$false, rGPR:$Rm, pred:$p),
+                            4, IIC_iCMOVr,
+   [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
+                            (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+                   4, IIC_iCMOVi,
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+                   RegConstraint<"$false = $Rd">;
+
+// FIXME: Pseudo-ize these. For now, just mark codegen only.
+let isCodeGenOnly = 1 in {
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+                      IIC_iCMOVi,
+                      "movw", "\t$Rd, $imm", []>,
+                      RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+                               (ins rGPR:$false, i32imm:$src, pred:$p),
+                    IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+                   IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+                   imm:$cc, CCR:$ccr))*/]>,
+                   RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b0011;
+  let Inst{20} = 0; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+}
+
+class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+  : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{5-4} = opcod; // Shift type.
+}
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+                  "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+                  Requires<[IsThumb, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf3bf8f5;
+  let Inst{3-0} = opt;
+}
+}
+
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+                  "dsb", "\t$opt",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsThumb, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf3bf8f4;
+  let Inst{3-0} = opt;
+}
+
+// ISB has only full system option -- for disassembly only
+def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsThumb2, HasV7]> {
+  let Inst{31-4} = 0xf3bf8f6;
+  let Inst{3-0} = 0b1111;
+}
+
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+                InstrItinClass itin, string opc, string asm, string cstr,
+                list<dag> pattern, bits<4> rt2 = 0b1111>
+  : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0001101;
+  let Inst{11-8} = rt2;
+  let Inst{7-6} = 0b01;
+  let Inst{5-4} = opcod;
+  let Inst{3-0} = 0b1111;
+
+  bits<4> addr;
+  bits<4> Rt;
+  let Inst{19-16} = addr;
+  let Inst{15-12} = Rt;
+}
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+                InstrItinClass itin, string opc, string asm, string cstr,
+                list<dag> pattern, bits<4> rt2 = 0b1111>
+  : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0001100;
+  let Inst{11-8} = rt2;
+  let Inst{7-6} = 0b01;
+  let Inst{5-4} = opcod;
+
+  bits<4> Rd;
+  bits<4> addr;
+  bits<4> Rt;
+  let Inst{3-0}  = Rd;
+  let Inst{19-16} = addr;
+  let Inst{15-12} = Rt;
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "ldrexb", "\t$Rt, $addr", "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "ldrexh", "\t$Rt, $addr", "", []>;
+def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                       AddrModeNone, 4, NoItinerary,
+                       "ldrex", "\t$Rt, $addr", "", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000101;
+  let Inst{11-8} = 0b1111;
+  let Inst{7-0} = 0b00000000; // imm8 = 0
+
+  bits<4> Rt;
+  bits<4> addr;
+  let Inst{19-16} = addr;
+  let Inst{15-12} = Rt;
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
+                         (ins t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "ldrexd", "\t$Rt, $Rt2, $addr", "",
+                         [], {?, ?, ?, ?}> {
+  bits<4> Rt2;
+  let Inst{11-8} = Rt2;
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                  AddrModeNone, 4, NoItinerary,
+                  "strex", "\t$Rd, $Rt, $addr", "",
+                  []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000100;
+  let Inst{7-0} = 0b00000000; // imm8 = 0
+
+  bits<4> Rd;
+  bits<4> addr;
+  bits<4> Rt;
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = addr;
+  let Inst{15-12} = Rt;
+}
+}
+
+let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+                         {?, ?, ?, ?}> {
+  bits<4> Rt2;
+  let Inst{11-8} = Rt2;
+}
+
+// Clear-Exclusive is for disassembly only.
+def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
+                   [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsThumb2, HasV7]>  {
+  let Inst{31-16} = 0xf3bf;
+  let Inst{15-14} = 0b10;
+  let Inst{13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-8} = 0b1111;
+  let Inst{7-4} = 0b0010;
+  let Inst{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everything out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+//   $val is a scratch register for our use.
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR,
+    QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+  def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+                               AddrModeNone, 0, NoItinerary, "", "",
+                          [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+                             Requires<[IsThumb2, HasVFP2]>;
+}
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+  def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+                               AddrModeNone, 0, NoItinerary, "", "",
+                          [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+                                  Requires<[IsThumb2, NoVFP]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                                   reglist:$regs, variable_ops),
+                              4, IIC_iLoad_mBr, [],
+            (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+                         RegConstraint<"$Rn = $wb">;
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B   : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
+                 "b.w\t$target",
+                 [(br bb:$target)]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 1;
+
+  bits<20> target;
+  let Inst{26} = target{19};
+  let Inst{11} = target{18};
+  let Inst{13} = target{17};
+  let Inst{21-16} = target{16-11};
+  let Inst{10-0} = target{10-0};
+}
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JT : t2PseudoInst<(outs),
+          (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+           0, IIC_Br,
+          [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+
+// FIXME: Add a non-pc based case that can be predicated.
+def t2TBB_JT : t2PseudoInst<(outs),
+        (ins GPR:$index, i32imm:$jt, i32imm:$id),
+         0, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+        (ins GPR:$index, i32imm:$jt, i32imm:$id),
+         0, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+                    "tbb", "\t[$Rn, $Rm]", []> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{31-20} = 0b111010001101;
+  let Inst{19-16} = Rn;
+  let Inst{15-5} = 0b11110000000;
+  let Inst{4} = 0; // B form
+  let Inst{3-0} = Rm;
+}
+
+def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+                   "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{31-20} = 0b111010001101;
+  let Inst{19-16} = Rn;
+  let Inst{15-5} = 0b11110000000;
+  let Inst{4} = 1; // H form
+  let Inst{3-0} = Rm;
+}
+} // isNotDuplicable, isIndirectBranch
+
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+                "b", ".w\t$target",
+                [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+
+  bits<4> p;
+  let Inst{25-22} = p;
+
+  bits<21> target;
+  let Inst{26} = target{20};
+  let Inst{11} = target{19};
+  let Inst{13} = target{18};
+  let Inst{21-16} = target{17-12};
+  let Inst{10-0} = target{11-1};
+}
+
+// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// it goes here.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin version.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in
+  def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (t2B uncondbrtarget:$dst)>,
+                 Requires<[IsThumb2, IsDarwin]>;
+}
+
+// IT block
+let Defs = [ITSTATE] in
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+                    AddrModeNone, 2,  IIC_iALUx,
+                    "it$mask\t$cc", "", []> {
+  // 16-bit instruction.
+  let Inst{31-16} = 0x0000;
+  let Inst{15-8} = 0b10111111;
+
+  bits<4> cc;
+  bits<4> mask;
+  let Inst{7-4} = cc;
+  let Inst{3-0} = mask;
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-20} = 0b111100;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+
+  bits<4> func;
+  let Inst{19-16} = func;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+            !strconcat("cps", asm_op),
+            [/* For disassembly only; pattern left blank */]> {
+  bits<2> imod;
+  bits<3> iflags;
+  bits<5> mode;
+  bit M;
+
+  let Inst{31-27} = 0b11110;
+  let Inst{26}    = 0;
+  let Inst{25-20} = 0b111010;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-14} = 0b10;
+  let Inst{12}    = 0;
+  let Inst{10-9}  = imod;
+  let Inst{8}     = M;
+  let Inst{7-5}   = iflags;
+  let Inst{4-0}   = mode;
+}
+
+let M = 1 in
+  def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+                      "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+  def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+                      "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+  def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+// Helper class for disassembly only.
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+  : T2I<(outs), (ins), NoItinerary, opc, asm,
+        [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-20} = 0xf3a;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+  let Inst{10-8} = 0b000;
+  let Inst{7-0} = op7_0;
+}
+
+def t2NOP   : T2I_hint<0b00000000, "nop",   ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE   : T2I_hint<0b00000010, "wfe",   ".w">;
+def t2WFI   : T2I_hint<0b00000011, "wfi",   ".w">;
+def t2SEV   : T2I_hint<0b00000100, "sev",   ".w">;
+
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
+  let Inst{31-20} = 0xf3a;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+  let Inst{10-8} = 0b000;
+  let Inst{7-4} = 0b1111;
+
+  bits<4> opt;
+  let Inst{3-0} = opt;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26-20} = 0b1111111;
+  let Inst{15-12} = 0b1000;
+
+  bits<4> opt;
+  let Inst{19-16} = opt;
+}
+
+class T2SRS<bits<12> op31_20,
+           dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
+
+  bits<5> mode;
+  let Inst{4-0} = mode{4-0};
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2SRS<0b111010000010,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSDB  : T2SRS<0b111010000000,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSIAW : T2SRS<0b111010011010,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSIA  : T2SRS<0b111010011000,
+                   (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+
+// Return From Exception is a system instruction -- for disassembly only
+
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
+
+  bits<4> Rn;
+  let Inst{19-16} = Rn;
+  let Inst{15-0} = 0xc000;
+}
+
+def t2RFEDBW : T2RFE<0b111010000011,
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB  : T2RFE<0b111010000001,
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA  : T2RFE<0b111010011001,
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+                   [/* For disassembly only; pattern left blank */]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+                            [(set rGPR:$dst, (i32 imm:$src))]>,
+                            Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+                                IIC_iMOVix2addpc,
+                          [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+                          Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+                              IIC_iMOVix2,
+                          [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+                          Requires<[IsThumb2, UseMovt]>;
+}
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+           Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+           Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+                   IIC_iLoadiALU,
+              [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+                                           imm:$cp))]>,
+               Requires<[IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
+  let Inst{15-14} = op15_14{1-0};
+  let Inst{12} = op12{0};
+}
+
+class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  let Inst{11-8} = Rd;
+  let Inst{19-16} = 0b1111;
+}
+
+def t2MRS : T2MRS<0b111100111110, 0b10, 0,
+                (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+                [/* For disassembly only; pattern left blank */]>;
+def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
+                   (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+                   [/* For disassembly only; pattern left blank */]>;
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
+                         0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
+                         NoItinerary, "msr", "\t$mask, $Rn",
+                         [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<4> Rn;
+  let Inst{19-16} = Rn;
+  let Inst{20}    = mask{4}; // R Bit
+  let Inst{13}    = 0b0;
+  let Inst{11-8}  = mask{3-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register
+//
+
+class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
+                  list<dag> pattern>
+  : T2Cop<Op, oops, iops,
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+          pattern> {
+  let Inst{27-24} = 0b1110;
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+                   list<dag> pattern = []>
+  : T2Cop<Op, (outs),
+          (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+  let Inst{27-24} = 0b1100;
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+/* from ARM core register to coprocessor */
+def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
+           (outs),
+           (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                c_imm:$CRm, imm0_7:$opc2),
+           [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                         imm:$CRm, imm:$opc2)]>;
+def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
+             (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                          c_imm:$CRm, imm0_7:$opc2),
+             [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                            imm:$CRm, imm:$opc2)]>;
+
+/* from coprocessor to ARM core register */
+def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
+           (outs GPR:$Rt),
+           (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+           []>;
+
+def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
+             (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+                                  c_imm:$CRm, i32imm:$opc2), []>;
+
+def : T2v6Pat<(int_arm_mrc  imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+              (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+              (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+
+/* from ARM core register to coprocessor */
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+                        [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+                                       imm:$CRm)]>;
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
+                           [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
+                                           GPR:$Rt2, imm:$CRm)]>;
+/* from coprocessor to ARM core register */
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.
+//
+
+def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+                 c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+                 "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+                 [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+                               imm:$CRm, imm:$opc2)]> {
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+                   c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+                   "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+                   [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+                                  imm:$CRm, imm:$opc2)]> {
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 000000000000..f1f3cb9c2ecd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,1128 @@
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI    : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF    : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0  : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+                                       SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui  : SDNode<"ARMISD::FTOUI",   SDT_FTOI>;
+def arm_ftosi  : SDNode<"ARMISD::FTOSI",   SDT_FTOI>;
+def arm_sitof  : SDNode<"ARMISD::SITOF",   SDT_ITOF>;
+def arm_uitof  : SDNode<"ARMISD::UITOF",   SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT",  SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp  : SDNode<"ARMISD::CMPFP",   SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+def vfp_f32imm : Operand<f32>,
+                 PatLeaf<(f32 fpimm), [{
+      return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
+    }]> {
+  let PrintMethod = "printVFPf32ImmOperand";
+}
+
+def vfp_f64imm : Operand<f64>,
+                 PatLeaf<(f64 fpimm), [{
+      return ARM::getVFPf64Imm(N->getValueAPF()) != -1;
+    }]> {
+  let PrintMethod = "printVFPf64ImmOperand";
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+                 IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+                 [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+                 IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+                 [(set SPR:$Sd, (load addrmode5:$addr))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+                 IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+                 [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+                 IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+                 [(store SPR:$Sd, addrmode5:$addr)]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+                         InstrItinClass itin, InstrItinClass itin_upd> {
+  // Double Precision
+  def DIA :
+    AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DIA_UPD :
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+                               variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DDB_UPD :
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+                               variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+
+  // Single Precision
+  def SIA :
+    AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SIA_UPD :
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+                               variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SDB_UPD :
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+                               variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+} // neverHasSideEffects
+
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
+
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r",  (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r",  (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def VADDD  : ADbI<0b11100, 0b11, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS  : ASbIn<0b11100, 0b11, 0, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSUBD  : ADbI<0b11100, 0b11, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS  : ASbIn<0b11100, 0b11, 1, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VDIVD  : ADbI<0b11101, 0b00, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS  : ASbI<0b11101, 0b00, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD  : ADbI<0b11100, 0b10, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS  : ASbIn<0b11100, 0b10, 0, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
+          (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+          (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+                  (outs), (ins DPR:$Dd, DPR:$Dm),
+                  IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+                  [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+                  (outs), (ins SPR:$Sd, SPR:$Sm),
+                  IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+                  [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD  : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+                  (outs), (ins DPR:$Dd, DPR:$Dm),
+                  IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+                  [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+                  (outs), (ins SPR:$Sd, SPR:$Sm),
+                  IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+                  [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def VABSD  : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+                   [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+let Defs = [FPSCR] in {
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+                   (outs), (ins DPR:$Dd),
+                   IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+                   [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+}
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+                   (outs), (ins SPR:$Sd),
+                   IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+                   [(arm_cmpfp0 SPR:$Sd)]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD  : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+                   (outs), (ins DPR:$Dd),
+                   IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+}
+
+def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+                   (outs), (ins SPR:$Sd),
+                   IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+                   (outs DPR:$Dd), (ins SPR:$Sm),
+                   IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+                   [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+}
+
+// Special case encoding: bits 11-8 is 0b1011.
+def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+                    IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+                    [(set SPR:$Sd, (fround DPR:$Dm))]> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
+  let Inst{27-23} = 0b11101;
+  let Inst{21-16} = 0b110111;
+  let Inst{11-8}  = 0b1011;
+  let Inst{7-6}   = 0b11;
+  let Inst{4}     = 0;
+}
+
+// Between half-precision and single-precision.  For disassembly only.
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+             (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VNEGD  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS  : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+                   [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+                  [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+
+let neverHasSideEffects = 1 in {
+def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies.  Int <-> FP Conversions.
+//
+
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+                      (outs GPR:$Rt), (ins SPR:$Sn),
+                      IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+                      [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+  // Instruction operands.
+  bits<4> Rt;
+  bits<5> Sn;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Rt;
+
+  let Inst{6-5}   = 0b00;
+  let Inst{3-0}   = 0b0000;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+                      (outs SPR:$Sn), (ins GPR:$Rt),
+                      IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+                      [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+  // Instruction operands.
+  bits<5> Sn;
+  bits<4> Rt;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Rt;
+
+  let Inst{6-5}   = 0b00;
+  let Inst{3-0}   = 0b0000;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in {
+def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
+                        (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+                        IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
+                 [/* FIXME: Can't write pattern for multiple result instr*/]> {
+  // Instruction operands.
+  bits<5> Dm;
+  bits<4> Rt;
+  bits<4> Rt2;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+
+  let Inst{7-6} = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
+                      (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{7-6} = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+} // neverHasSideEffects
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
+                      (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+                      IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+                      [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+  // Instruction operands.
+  bits<5> Dm;
+  bits<4> Rt;
+  bits<4> Rt2;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+
+  let Inst{7-6}   = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+let neverHasSideEffects = 1 in
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+                     (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+                IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{7-6} = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX:  SPR system reg -> GPR
+// FMSRR: GPR -> SPR
+// FMXR:  GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                        bits<4> opcod4, dag oops, dag iops,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                         bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+                         string opc, string asm, list<dag> pattern>
+  : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+              pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+                               (outs DPR:$Dd), (ins SPR:$Sm),
+                               IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+                               [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+  let Inst{7} = 1; // s32
+}
+
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+                                (outs SPR:$Sd),(ins SPR:$Sm),
+                                IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+                                [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+  let Inst{7} = 1; // s32
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+                               (outs DPR:$Dd), (ins SPR:$Sm),
+                               IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+                               [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+  let Inst{7} = 0; // u32
+}
+
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+                                (outs SPR:$Sd), (ins SPR:$Sm),
+                                IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+                                [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+  let Inst{7} = 0; // u32
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                        bits<4> opcod4, dag oops, dag iops,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                         bits<4> opcod4, dag oops, dag iops,
+                         InstrItinClass itin, string opc, string asm,
+                         list<dag> pattern>
+  : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+              pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+  let Inst{7} = 1; // Z bit
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+                               (outs SPR:$Sd), (ins DPR:$Dm),
+                               IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+                               [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+  let Inst{7} = 1; // Z bit
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+  let Inst{7} = 0; // Z bit
+}
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+//   S16 (U=0, sx=0) -> SH
+//   U16 (U=1, sx=0) -> UH
+//   S32 (U=0, sx=1) -> SL
+//   U32 (U=1, sx=1) -> UL
+
+// FIXME: Marking these as codegen only seems wrong. They are real
+//        instructions(?)
+let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+
+//===----------------------------------------------------------------------===//
+// FP Multiply-Accumulate Operations.
+//
+
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+                RegConstraint<"$Ddin = $Dd">,
+                Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+                RegConstraint<"$Sdin = $Sd">,
+                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+          (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+          (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                           (f64 DPR:$Ddin)))]>,
+               RegConstraint<"$Ddin = $Dd">,
+               Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+             [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">,
+                  Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+          (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+          (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+let neverHasSideEffects = 1 in {
+def VMOVDcc  : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
+                    4, IIC_fpUNA64,
+                    [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+                 RegConstraint<"$Dn = $Dd">;
+
+def VMOVScc  : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
+                    4, IIC_fpUNA32,
+                    [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+                 RegConstraint<"$Sn = $Sd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Move from VFP System Register to ARM core register.
+//
+
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+                 list<dag> pattern>:
+  VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+  // Instruction operand.
+  bits<4> Rt;
+
+  let Inst{27-20} = 0b11101111;
+  let Inst{19-16} = opc19_16;
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7}     = 0;
+  let Inst{6-5}   = 0b00;
+  let Inst{4}     = 1;
+  let Inst{3-0}   = 0b0000;
+}
+
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+                        "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+                      "vmrs", "\t$Rt, fpscr",
+                      [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+  def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, fpexc", []>;
+  def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, fpsid", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+               list<dag> pattern>:
+  VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+  // Instruction operand.
+  bits<4> src;
+
+  // Encode instruction operand.
+  let Inst{15-12} = src;
+
+  let Inst{27-20} = 0b11101110;
+  let Inst{19-16} = opc19_16;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7}     = 0;
+  let Inst{4}     = 1;
+}
+
+let Defs = [FPSCR] in {
+  // Application level GPR -> FPSCR
+  def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+  // System level GPR -> FPEXC
+  def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpexc, $src", []>;
+  // System level GPR -> FPSID
+  def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
+                    VFPMiscFrm, IIC_fpUNA64,
+                    "vmov", ".f64\t$Dd, $imm",
+                    [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+  // Instruction operands.
+  bits<5>  Dd;
+  bits<32> imm;
+
+  // Encode instruction operands.
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+  let Inst{19}    = imm{31};
+  let Inst{18-16} = imm{22-20};
+  let Inst{3-0}   = imm{19-16};
+
+  // Encode remaining instruction bits.
+  let Inst{27-23} = 0b11101;
+  let Inst{21-20} = 0b11;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision.
+  let Inst{7-4}   = 0b0000;
+}
+
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+                     VFPMiscFrm, IIC_fpUNA32,
+                     "vmov", ".f32\t$Sd, $imm",
+                     [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+  // Instruction operands.
+  bits<5>  Sd;
+  bits<32> imm;
+
+  // Encode instruction operands.
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+  let Inst{19}    = imm{31};    // The immediate is handled as a double.
+  let Inst{18-16} = imm{22-20};
+  let Inst{3-0}   = imm{19-16};
+
+  // Encode remaining instruction bits.
+  let Inst{27-23} = 0b11101;
+  let Inst{21-20} = 0b11;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision.
+  let Inst{7-4}   = 0b0000;
+}
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 000000000000..45b7e48d0cfb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,336 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host.  This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
+// to preserve more context and manipulate the stack directly).  Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+  void ARMCompilationCallback();
+  asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl " ASMPREFIX "ARMCompilationCallback\n"
+    ASMPREFIX "ARMCompilationCallback:\n"
+    // Save caller saved registers since they may contain stuff
+    // for the real target function right now. We have to act as if this
+    // whole compilation callback doesn't exist as far as the caller is
+    // concerned, so we can't just preserve the callee saved regs.
+    "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+    "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+    // The LR contains the address of the stub function on entry.
+    // pass it as the argument to the C part of the callback
+    "mov  r0, lr\n"
+    "sub  sp, sp, #4\n"
+    // Call the C portion of the callback
+    "bl   " ASMPREFIX "ARMCompilationCallbackC\n"
+    "add  sp, sp, #4\n"
+    // Restoring the LR to the return address of the function that invoked
+    // the stub and de-allocating the stack space for it requires us to
+    // swap the two saved LR values on the stack, as they're backwards
+    // for what we need since the pop instruction has a pre-determined
+    // order for the registers.
+    //      +--------+
+    //   0  | LR     | Original return address
+    //      +--------+
+    //   1  | LR     | Stub address (start of stub)
+    // 2-5  | R3..R0 | Saved registers (we need to preserve all regs)
+    // 6-20 | D0..D7 | Saved VFP registers
+    //      +--------+
+    //
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+    // Restore VFP caller-saved registers.
+    "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+    //
+    //      We need to exchange the values in slots 0 and 1 so we can
+    //      return to the address in slot 1 with the address in slot 0
+    //      restored to the LR.
+    "ldr  r0, [sp,#20]\n"
+    "ldr  r1, [sp,#16]\n"
+    "str  r1, [sp,#20]\n"
+    "str  r0, [sp,#16]\n"
+    // Return to the (newly modified) stub to invoke the real function.
+    // The above twiddling of the saved return addresses allows us to
+    // deallocate everything, including the LR the stub saved, with two
+    // updating load instructions.
+    "ldmia  sp!, {r0, r1, r2, r3, lr}\n"
+    "ldr    pc, [sp], #4\n"
+      );
+#else  // Not an ARM host
+  void ARMCompilationCallback() {
+    llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
+  }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+  // Get the address of the compiled code for this function.
+  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+  // Rewrite the call target... so that we don't end up here every time we
+  // execute the call. We're replacing the first two instructions of the
+  // stub with:
+  //   ldr pc, [pc,#-4]
+  //   <addr>
+  if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+    llvm_unreachable("ERROR: Unable to mark stub writable");
+  }
+  *(intptr_t *)StubAddr = 0xe51ff004;  // ldr pc, [pc, #-4]
+  *(intptr_t *)(StubAddr+4) = NewVal;
+  if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+    llvm_unreachable("ERROR: Unable to mark stub executable");
+  }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  JITCompilerFunction = F;
+  return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+                                             JITCodeEmitter &JCE) {
+  uint8_t Buffer[4];
+  uint8_t *Cur = Buffer;
+  MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
+  void *PtrAddr = JCE.allocIndirectGV(
+      GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
+  addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+  return PtrAddr;
+}
+
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+  // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+  // 4-byte address.  See emitFunctionStub for details.
+  StubLayout Result = {16, 4};
+  return Result;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE) {
+  void *Addr;
+  // If this is just a call to an external function, emit a branch instead of a
+  // call.  The code is the same except for one bit of the last instruction.
+  if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+    // Branch to the corresponding function addr.
+    if (IsPIC) {
+      // The stub is 16-byte size and 4-aligned.
+      intptr_t LazyPtr = getIndirectSymAddr(Fn);
+      if (!LazyPtr) {
+        // In PIC mode, the function stub is loading a lazy-ptr.
+        LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+        DEBUG(if (F)
+                errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
+                       << "] for GV '" << F->getName() << "'\n";
+              else
+                errs() << "JIT: Stub emitted at [" << LazyPtr
+                       << "] for external function at '" << Fn << "'\n");
+      }
+      JCE.emitAlignment(4);
+      Addr = (void*)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable(Addr, 16)) {
+        llvm_unreachable("ERROR: Unable to mark stub writable");
+      }
+      JCE.emitWordLE(0xe59fc004);            // ldr ip, [pc, #+4]
+      JCE.emitWordLE(0xe08fc00c);            // L_func$scv: add ip, pc, ip
+      JCE.emitWordLE(0xe59cf000);            // ldr pc, [ip]
+      JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8));  // func - (L_func$scv+8)
+      sys::Memory::InvalidateInstructionCache(Addr, 16);
+      if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+        llvm_unreachable("ERROR: Unable to mark stub executable");
+      }
+    } else {
+      // The stub is 8-byte size and 4-aligned.
+      JCE.emitAlignment(4);
+      Addr = (void*)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable(Addr, 8)) {
+        llvm_unreachable("ERROR: Unable to mark stub writable");
+      }
+      JCE.emitWordLE(0xe51ff004);    // ldr pc, [pc, #-4]
+      JCE.emitWordLE((intptr_t)Fn);  // addr of function
+      sys::Memory::InvalidateInstructionCache(Addr, 8);
+      if (!sys::Memory::setRangeExecutable(Addr, 8)) {
+        llvm_unreachable("ERROR: Unable to mark stub executable");
+      }
+    }
+  } else {
+    // The compilation callback will overwrite the first two words of this
+    // stub with indirect branch instructions targeting the compiled code.
+    // This stub sets the return address to restart the stub, so that
+    // the new branch will be invoked when we come back.
+    //
+    // Branch and link to the compilation callback.
+    // The stub is 16-byte size and 4-byte aligned.
+    JCE.emitAlignment(4);
+    Addr = (void*)JCE.getCurrentPCValue();
+    if (!sys::Memory::setRangeWritable(Addr, 16)) {
+      llvm_unreachable("ERROR: Unable to mark stub writable");
+    }
+    // Save LR so the callback can determine which stub called it.
+    // The compilation callback is responsible for popping this prior
+    // to returning.
+    JCE.emitWordLE(0xe92d4000); // push {lr}
+    // Set the return address to go back to the start of this stub.
+    JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+    // Invoke the compilation callback.
+    JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+    // The address of the compilation callback.
+    JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+    sys::Memory::InvalidateInstructionCache(Addr, 16);
+    if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+      llvm_unreachable("ERROR: Unable to mark stub executable");
+    }
+  }
+
+  return Addr;
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+  ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+  switch (RT) {
+  default:
+    return (intptr_t)(MR->getResultPointer());
+  case ARM::reloc_arm_pic_jt:
+    // Destination address - jump table base.
+    return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+  case ARM::reloc_arm_jt_base:
+    // Jump table base address.
+    return getJumpTableBaseAddr(MR->getJumpTableIndex());
+  case ARM::reloc_arm_cp_entry:
+  case ARM::reloc_arm_vfp_cp_entry:
+    // Constant pool entry address.
+    return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+  case ARM::reloc_arm_machine_cp_entry: {
+    ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+    assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+           "Can't handle this machine constant pool entry yet!");
+    intptr_t Addr = (intptr_t)(MR->getResultPointer());
+    Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+    return Addr;
+  }
+  }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+    intptr_t ResultPtr = resolveRelocDestAddr(MR);
+    switch ((ARM::RelocationType)MR->getRelocationType()) {
+    case ARM::reloc_arm_cp_entry:
+    case ARM::reloc_arm_vfp_cp_entry:
+    case ARM::reloc_arm_relative: {
+      // It is necessary to calculate the correct PC relative value. We
+      // subtract the base addr from the target addr to form a byte offset.
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      // If the result is positive, set bit U(23) to 1.
+      if (ResultPtr >= 0)
+        *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+      else {
+        // Otherwise, obtain the absolute value and set bit U(23) to 0.
+        *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+        ResultPtr = - ResultPtr;
+      }
+      // Set the immed value calculated.
+      // VFP immediate offset is multiplied by 4.
+      if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+        ResultPtr = ResultPtr >> 2;
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      // Set register Rn to PC.
+      *((intptr_t*)RelocPos) |=
+        getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+      break;
+    }
+    case ARM::reloc_arm_pic_jt:
+    case ARM::reloc_arm_machine_cp_entry:
+    case ARM::reloc_arm_absolute: {
+      // These addresses have already been resolved.
+      *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_branch: {
+      // It is necessary to calculate the correct value of signed_immed_24
+      // field. We subtract the base addr from the target addr to form a
+      // byte offset, which must be inside the range -33554432 and +33554428.
+      // Then, we set the signed_immed_24 field of the instruction to bits
+      // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+      assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_jt_base: {
+      // JT base - (instruction addr + 8)
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_movw: {
+      ResultPtr = ResultPtr & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+      break;
+    }
+    case ARM::reloc_arm_movt: {
+      ResultPtr = (ResultPtr >> 16) & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+      break;
+    }
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 000000000000..2f9792813d32
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,183 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+  class ARMTargetMachine;
+
+  class ARMJITInfo : public TargetJITInfo {
+    // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+    // CONSTPOOL_ENTRY addresses.
+    SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+    // JumpTableId2AddrMap - A map from inline jumptable ids to the
+    // corresponding inline jump table bases.
+    SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+    // PCLabelMap - A map from PC labels to addresses.
+    DenseMap<unsigned, intptr_t> PCLabelMap;
+
+    // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+    // addresses to their indirect symbol addresses.
+    DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+    // IsPIC - True if the relocation model is PIC. This is used to determine
+    // how to codegen function stubs.
+    bool IsPIC;
+
+  public:
+    explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+    /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+    /// to emit an indirect symbol which contains the address of the specified
+    /// ptr.
+    virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                            JITCodeEmitter &JCE);
+
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on ARM.
+    virtual StubLayout getStubLayout();
+
+    /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+    /// small native function that simply calls the function at the specified
+    /// address.
+    virtual void *emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE);
+
+    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase);
+
+    /// hasCustomConstantPool - Allows a target to specify that constant
+    /// pool address resolution is handled by the target.
+    virtual bool hasCustomConstantPool() const { return true; }
+
+    /// hasCustomJumpTables - Allows a target to specify that jumptables
+    /// are emitted by the target.
+    virtual bool hasCustomJumpTables() const { return true; }
+
+    /// allocateSeparateGVMemory - If true, globals should be placed in
+    /// separately allocated heap memory rather than in the same
+    /// code memory allocated by JITCodeEmitter.
+    virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+      return true;
+#else
+      return false;
+#endif
+    }
+
+    /// Initialize - Initialize internal stage for the function being JITted.
+    /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+    /// jump table ids to jump table bases map; remember if codegen relocation
+    /// model is PIC.
+    void Initialize(const MachineFunction &MF, bool isPIC) {
+      const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+      JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+      IsPIC = isPIC;
+    }
+
+    /// getConstantPoolEntryAddr - The ARM target puts all constant
+    /// pool entries into constant islands. This returns the address of the
+    /// constant pool entry of the specified index.
+    intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+      assert(CPI < ConstPoolId2AddrMap.size());
+      return ConstPoolId2AddrMap[CPI];
+    }
+
+    /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+    /// where its associated value is stored. When relocations are processed,
+    /// this value will be used to resolve references to the constant.
+    void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+      assert(CPI < ConstPoolId2AddrMap.size());
+      ConstPoolId2AddrMap[CPI] = Addr;
+    }
+
+    /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+    /// text section of the function. This returns the address of the base of
+    /// the jump table of the specified index.
+    intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+      assert(JTI < JumpTableId2AddrMap.size());
+      return JumpTableId2AddrMap[JTI];
+    }
+
+    /// addJumpTableBaseAddr - Map a jump table index to the address where
+    /// the corresponding inline jump table is emitted. When relocations are
+    /// processed, this value will be used to resolve references to the
+    /// jump table.
+    void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+      assert(JTI < JumpTableId2AddrMap.size());
+      JumpTableId2AddrMap[JTI] = Addr;
+    }
+
+    /// getPCLabelAddr - Retrieve the address of the PC label of the
+    /// specified id.
+    intptr_t getPCLabelAddr(unsigned Id) const {
+      DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+      assert(I != PCLabelMap.end());
+      return I->second;
+    }
+
+    /// addPCLabelAddr - Remember the address of the specified PC label.
+    void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+      PCLabelMap.insert(std::make_pair(Id, Addr));
+    }
+
+    /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+    /// specified symbol located at address. Returns 0 if the indirect symbol
+    /// has not been emitted.
+    intptr_t getIndirectSymAddr(void *Addr) const {
+      DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+      if (I != Sym2IndirectSymMap.end())
+        return I->second;
+      return 0;
+    }
+
+    /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+    /// its indirect symbol address.
+    void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+      Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+    }
+
+  private:
+    /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+    /// if it's not already solved. Constantpool entries must be resolved by
+    /// ARM target.
+    intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 000000000000..c6efea1d7806
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,1839 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
+
+namespace {
+  struct ARMLoadStoreOpt : public MachineFunctionPass {
+    static char ID;
+    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    ARMFunctionInfo *AFI;
+    RegScavenger *RS;
+    bool isThumb2;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM load / store optimization pass";
+    }
+
+  private:
+    struct MemOpQueueEntry {
+      int Offset;
+      unsigned Reg;
+      bool isKill;
+      unsigned Position;
+      MachineBasicBlock::iterator MBBI;
+      bool Merged;
+      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+                      MachineBasicBlock::iterator i)
+        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+    };
+    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+    typedef MemOpQueue::iterator MemOpQueueIter;
+
+    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                  int Offset, unsigned Base, bool BaseKill, int Opcode,
+                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+    void MergeOpsUpdate(MachineBasicBlock &MBB,
+                        MemOpQueue &MemOps,
+                        unsigned memOpsBegin,
+                        unsigned memOpsEnd,
+                        unsigned insertAfter,
+                        int Offset,
+                        unsigned Base,
+                        bool BaseKill,
+                        int Opcode,
+                        ARMCC::CondCodes Pred,
+                        unsigned PredReg,
+                        unsigned Scratch,
+                        DebugLoc dl,
+                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+                      int Opcode, unsigned Size,
+                      ARMCC::CondCodes Pred, unsigned PredReg,
+                      unsigned Scratch, MemOpQueue &MemOps,
+                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+
+    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI);
+    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MBBI,
+                                  const TargetInstrInfo *TII,
+                                  bool &Advance,
+                                  MachineBasicBlock::iterator &I);
+    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   bool &Advance,
+                                   MachineBasicBlock::iterator &I);
+    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+  };
+  char ARMLoadStoreOpt::ID = 0;
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDRi12:
+    ++NumLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::LDMIA;
+    case ARM_AM::da: return ARM::LDMDA;
+    case ARM_AM::db: return ARM::LDMDB;
+    case ARM_AM::ib: return ARM::LDMIB;
+    }
+    break;
+  case ARM::STRi12:
+    ++NumSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::STMIA;
+    case ARM_AM::da: return ARM::STMDA;
+    case ARM_AM::db: return ARM::STMDB;
+    case ARM_AM::ib: return ARM::STMIB;
+    }
+    break;
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    ++NumLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2LDMIA;
+    case ARM_AM::db: return ARM::t2LDMDB;
+    }
+    break;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    ++NumSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2STMIA;
+    case ARM_AM::db: return ARM::t2STMDB;
+    }
+    break;
+  case ARM::VLDRS:
+    ++NumVLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMSIA;
+    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
+    }
+    break;
+  case ARM::VSTRS:
+    ++NumVSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMSIA;
+    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
+    }
+    break;
+  case ARM::VLDRD:
+    ++NumVLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMDIA;
+    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
+    }
+    break;
+  case ARM::VSTRD:
+    ++NumVSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMDIA;
+    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
+    }
+    break;
+  }
+
+  return 0;
+}
+
+namespace llvm {
+  namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMIA_UPD:
+  case ARM::STMIA:
+  case ARM::STMIA_UPD:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2STMIA:
+  case ARM::t2STMIA_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VLDMDIA:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDIA_UPD:
+    return ARM_AM::ia;
+
+  case ARM::LDMDA:
+  case ARM::LDMDA_UPD:
+  case ARM::STMDA:
+  case ARM::STMDA_UPD:
+    return ARM_AM::da;
+
+  case ARM::LDMDB:
+  case ARM::LDMDB_UPD:
+  case ARM::STMDB:
+  case ARM::STMDB_UPD:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMDB:
+  case ARM::t2STMDB_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VSTMSDB_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VSTMDDB_UPD:
+    return ARM_AM::db;
+
+  case ARM::LDMIB:
+  case ARM::LDMIB_UPD:
+  case ARM::STMIB:
+  case ARM::STMIB_UPD:
+    return ARM_AM::ib;
+  }
+
+  return ARM_AM::bad_am_submode;
+}
+
+  } // end namespace ARM_AM
+} // end namespace llvm
+
+static bool isT2i32Load(unsigned Opc) {
+  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+}
+
+static bool isT2i32Store(unsigned Opc) {
+  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+  return Opc == ARM::STRi12 || isT2i32Store(Opc);
+}
+
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          int Offset, unsigned Base, bool BaseKill,
+                          int Opcode, ARMCC::CondCodes Pred,
+                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
+                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+  // Only a single register to load / store. Don't bother.
+  unsigned NumRegs = Regs.size();
+  if (NumRegs <= 1)
+    return false;
+
+  ARM_AM::AMSubMode Mode = ARM_AM::ia;
+  // VFP and Thumb2 do not support IB or DA modes.
+  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+  bool haveIBAndDA = isNotVFP && !isThumb2;
+  if (Offset == 4 && haveIBAndDA)
+    Mode = ARM_AM::ib;
+  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+    Mode = ARM_AM::da;
+  else if (Offset == -4 * (int)NumRegs && isNotVFP)
+    // VLDM/VSTM do not support DB mode without also updating the base reg.
+    Mode = ARM_AM::db;
+  else if (Offset != 0) {
+    // Check if this is a supported opcode before we insert instructions to
+    // calculate a new base register.
+    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+
+    // If starting offset isn't zero, insert a MI to materialize a new base.
+    // But only do so if it is cost effective, i.e. merging more than two
+    // loads / stores.
+    if (NumRegs <= 2)
+      return false;
+
+    unsigned NewBase;
+    if (isi32Load(Opcode))
+      // If it is a load, then just use one of the destination register to
+      // use as the new base.
+      NewBase = Regs[NumRegs-1].first;
+    else {
+      // Use the scratch register to use as a new base.
+      NewBase = Scratch;
+      if (NewBase == 0)
+        return false;
+    }
+    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+    if (Offset < 0) {
+      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
+      Offset = - Offset;
+    }
+    int ImmedOffset = isThumb2
+      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
+    if (ImmedOffset == -1)
+      // FIXME: Try t2ADDri12 or t2SUBri12?
+      return false;  // Probably not worth it then.
+
+    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+      .addImm(Pred).addReg(PredReg).addReg(0);
+    Base = NewBase;
+    BaseKill = true;  // New base is always killed right its use.
+  }
+
+  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+                Opcode == ARM::VLDRD);
+  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+  if (!Opcode) return false;
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+    .addReg(Base, getKillRegState(BaseKill))
+    .addImm(Pred).addReg(PredReg);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+                     | getKillRegState(Regs[i].second));
+
+  return true;
+}
+
+// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
+// success.
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+                                     MemOpQueue &memOps,
+                                     unsigned memOpsBegin, unsigned memOpsEnd,
+                                     unsigned insertAfter, int Offset,
+                                     unsigned Base, bool BaseKill,
+                                     int Opcode,
+                                     ARMCC::CondCodes Pred, unsigned PredReg,
+                                     unsigned Scratch,
+                                     DebugLoc dl,
+                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+  // First calculate which of the registers should be killed by the merged
+  // instruction.
+  const unsigned insertPos = memOps[insertAfter].Position;
+  SmallSet<unsigned, 4> KilledRegs;
+  DenseMap<unsigned, unsigned> Killer;
+  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+    if (i == memOpsBegin) {
+      i = memOpsEnd;
+      if (i == e)
+        break;
+    }
+    if (memOps[i].Position < insertPos && memOps[i].isKill) {
+      unsigned Reg = memOps[i].Reg;
+      KilledRegs.insert(Reg);
+      Killer[Reg] = i;
+    }
+  }
+
+  SmallVector<std::pair<unsigned, bool>, 8> Regs;
+  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+    unsigned Reg = memOps[i].Reg;
+    // If we are inserting the merged operation after an operation that
+    // uses the same register, make sure to transfer any kill flag.
+    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
+    Regs.push_back(std::make_pair(Reg, isKill));
+  }
+
+  // Try to do the merge.
+  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
+  ++Loc;
+  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
+                Pred, PredReg, Scratch, dl, Regs))
+    return;
+
+  // Merge succeeded, update records.
+  Merges.push_back(prior(Loc));
+  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+    // Remove kill flags from any memops that come before insertPos.
+    if (Regs[i-memOpsBegin].second) {
+      unsigned Reg = Regs[i-memOpsBegin].first;
+      if (KilledRegs.count(Reg)) {
+        unsigned j = Killer[Reg];
+        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+        assert(Idx >= 0 && "Cannot find killing operand");
+        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
+        memOps[j].isKill = false;
+      }
+      memOps[i].isKill = true;
+    }
+    MBB.erase(memOps[i].MBBI);
+    // Update this memop to refer to the merged instruction.
+    // We may need to move kill flags again.
+    memOps[i].Merged = true;
+    memOps[i].MBBI = Merges.back();
+    memOps[i].Position = insertPos;
+  }
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+void
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+                          unsigned Base, int Opcode, unsigned Size,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          unsigned Scratch, MemOpQueue &MemOps,
+                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+  int Offset = MemOps[SIndex].Offset;
+  int SOffset = Offset;
+  unsigned insertAfter = SIndex;
+  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+  DebugLoc dl = Loc->getDebugLoc();
+  const MachineOperand &PMO = Loc->getOperand(0);
+  unsigned PReg = PMO.getReg();
+  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
+    : getARMRegisterNumbering(PReg);
+  unsigned Count = 1;
+  unsigned Limit = ~0U;
+
+  // vldm / vstm limit are 32 for S variants, 16 for D variants.
+
+  switch (Opcode) {
+  default: break;
+  case ARM::VSTRS:
+    Limit = 32;
+    break;
+  case ARM::VSTRD:
+    Limit = 16;
+    break;
+  case ARM::VLDRD:
+    Limit = 16;
+    break;
+  case ARM::VLDRS:
+    Limit = 32;
+    break;
+  }
+
+  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+    int NewOffset = MemOps[i].Offset;
+    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
+    unsigned Reg = MO.getReg();
+    unsigned RegNum = MO.isUndef() ? UINT_MAX
+      : getARMRegisterNumbering(Reg);
+    // Register numbers must be in ascending order. For VFP / NEON load and
+    // store multiples, the registers must also be consecutive and within the
+    // limit on the number of registers per instruction.
+    if (Reg != ARM::SP &&
+        NewOffset == Offset + (int)Size &&
+        ((isNotVFP && RegNum > PRegNum) ||
+         ((Count < Limit) && RegNum == PRegNum+1))) {
+      Offset += Size;
+      PRegNum = RegNum;
+      ++Count;
+    } else {
+      // Can't merge this in. Try merge the earlier ones first.
+      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
+                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+                   MemOps, Merges);
+      return;
+    }
+
+    if (MemOps[i].Position > MemOps[insertAfter].Position)
+      insertAfter = i;
+  }
+
+  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
+                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
+  return;
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+                                       unsigned Bytes, unsigned Limit,
+                                       ARMCC::CondCodes Pred, unsigned PredReg){
+  unsigned MyPredReg = 0;
+  if (!MI)
+    return false;
+  if (MI->getOpcode() != ARM::t2SUBri &&
+      MI->getOpcode() != ARM::tSUBspi &&
+      MI->getOpcode() != ARM::SUBri)
+    return false;
+
+  // Make sure the offset fits in 8 bits.
+  if (Bytes == 0 || (Limit && Bytes >= Limit))
+    return false;
+
+  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+  return (MI->getOperand(0).getReg() == Base &&
+          MI->getOperand(1).getReg() == Base &&
+          (MI->getOperand(2).getImm()*Scale) == Bytes &&
+          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+          MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+                                       unsigned Bytes, unsigned Limit,
+                                       ARMCC::CondCodes Pred, unsigned PredReg){
+  unsigned MyPredReg = 0;
+  if (!MI)
+    return false;
+  if (MI->getOpcode() != ARM::t2ADDri &&
+      MI->getOpcode() != ARM::tADDspi &&
+      MI->getOpcode() != ARM::ADDri)
+    return false;
+
+  if (Bytes == 0 || (Limit && Bytes >= Limit))
+    // Make sure the offset fits in 8 bits.
+    return false;
+
+  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+  return (MI->getOperand(0).getReg() == Base &&
+          MI->getOperand(1).getReg() == Base &&
+          (MI->getOperand(2).getImm()*Scale) == Bytes &&
+          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+          MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default: return 0;
+  case ARM::LDRi12:
+  case ARM::STRi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+  case ARM::VLDRS:
+  case ARM::VSTRS:
+    return 4;
+  case ARM::VLDRD:
+  case ARM::VSTRD:
+    return 8;
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::VLDMSIA:
+  case ARM::VSTMSIA:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+  case ARM::VLDMDIA:
+  case ARM::VSTMDIA:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+  }
+}
+
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+                                            ARM_AM::AMSubMode Mode) {
+  switch (Opc) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::LDMIA_UPD;
+    case ARM_AM::ib: return ARM::LDMIB_UPD;
+    case ARM_AM::da: return ARM::LDMDA_UPD;
+    case ARM_AM::db: return ARM::LDMDB_UPD;
+    }
+    break;
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::STMIA_UPD;
+    case ARM_AM::ib: return ARM::STMIB_UPD;
+    case ARM_AM::da: return ARM::STMDA_UPD;
+    case ARM_AM::db: return ARM::STMDB_UPD;
+    }
+    break;
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+    case ARM_AM::db: return ARM::t2LDMDB_UPD;
+    }
+    break;
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2STMIA_UPD;
+    case ARM_AM::db: return ARM::t2STMDB_UPD;
+    }
+    break;
+  case ARM::VLDMSIA:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+    case ARM_AM::db: return ARM::VLDMSDB_UPD;
+    }
+    break;
+  case ARM::VLDMDIA:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+    case ARM_AM::db: return ARM::VLDMDDB_UPD;
+    }
+    break;
+  case ARM::VSTMSIA:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+    case ARM_AM::db: return ARM::VSTMSDB_UPD;
+    }
+    break;
+  case ARM::VSTMDIA:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+    case ARM_AM::db: return ARM::VSTMDDB_UPD;
+    }
+    break;
+  }
+
+  return 0;
+}
+
+/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MBBI,
+                                               bool &Advance,
+                                               MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = MBBI;
+  unsigned Base = MI->getOperand(0).getReg();
+  bool BaseKill = MI->getOperand(0).isKill();
+  unsigned Bytes = getLSMultipleTransferSize(MI);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+  int Opcode = MI->getOpcode();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // Can't use an updating ld/st if the base register is also a dest
+  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+    if (MI->getOperand(i).getReg() == Base)
+      return false;
+
+  bool DoMerge = false;
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+
+  // Try merging with the previous instruction.
+  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+  if (MBBI != BeginMBBI) {
+    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+      --PrevMBBI;
+    if (Mode == ARM_AM::ia &&
+        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      Mode = ARM_AM::db;
+      DoMerge = true;
+    } else if (Mode == ARM_AM::ib &&
+               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      Mode = ARM_AM::da;
+      DoMerge = true;
+    }
+    if (DoMerge)
+      MBB.erase(PrevMBBI);
+  }
+
+  // Try merging with the next instruction.
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  if (!DoMerge && MBBI != EndMBBI) {
+    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+      ++NextMBBI;
+    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      DoMerge = true;
+    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      DoMerge = true;
+    }
+    if (DoMerge) {
+      if (NextMBBI == I) {
+        Advance = true;
+        ++I;
+      }
+      MBB.erase(NextMBBI);
+    }
+  }
+
+  if (!DoMerge)
+    return false;
+
+  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+    .addReg(Base, getDefRegState(true)) // WB base register
+    .addReg(Base, getKillRegState(BaseKill))
+    .addImm(Pred).addReg(PredReg);
+
+  // Transfer the rest of operands.
+  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+    MIB.addOperand(MI->getOperand(OpNum));
+
+  // Transfer memoperands.
+  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+  MBB.erase(MBBI);
+  return true;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+                                             ARM_AM::AddrOpc Mode) {
+  switch (Opc) {
+  case ARM::LDRi12:
+    return ARM::LDR_PRE;
+  case ARM::STRi12:
+    return ARM::STR_PRE;
+  case ARM::VLDRS:
+    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+  case ARM::VLDRD:
+    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+  case ARM::VSTRS:
+    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+  case ARM::VSTRD:
+    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    return ARM::t2LDR_PRE;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return ARM::t2STR_PRE;
+  default: llvm_unreachable("Unhandled opcode!");
+  }
+  return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+                                              ARM_AM::AddrOpc Mode) {
+  switch (Opc) {
+  case ARM::LDRi12:
+    return ARM::LDR_POST;
+  case ARM::STRi12:
+    return ARM::STR_POST;
+  case ARM::VLDRS:
+    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+  case ARM::VLDRD:
+    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+  case ARM::VSTRS:
+    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+  case ARM::VSTRD:
+    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    return ARM::t2LDR_POST;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return ARM::t2STR_POST;
+  default: llvm_unreachable("Unhandled opcode!");
+  }
+  return 0;
+}
+
+/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MBBI,
+                                               const TargetInstrInfo *TII,
+                                               bool &Advance,
+                                               MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = MBBI;
+  unsigned Base = MI->getOperand(1).getReg();
+  bool BaseKill = MI->getOperand(1).isKill();
+  unsigned Bytes = getLSMultipleTransferSize(MI);
+  int Opcode = MI->getOpcode();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+  if (isi32Load(Opcode) || isi32Store(Opcode))
+    if (MI->getOperand(2).getImm() != 0)
+      return false;
+  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+    return false;
+
+  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+  // Can't do the merge if the destination register is the same as the would-be
+  // writeback register.
+  if (isLd && MI->getOperand(0).getReg() == Base)
+    return false;
+
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+  bool DoMerge = false;
+  ARM_AM::AddrOpc AddSub = ARM_AM::add;
+  unsigned NewOpc = 0;
+  // AM2 - 12 bits, thumb2 - 8 bits.
+  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
+
+  // Try merging with the previous instruction.
+  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+  if (MBBI != BeginMBBI) {
+    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+      --PrevMBBI;
+    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+      DoMerge = true;
+      AddSub = ARM_AM::sub;
+    } else if (!isAM5 &&
+               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+      DoMerge = true;
+    }
+    if (DoMerge) {
+      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
+      MBB.erase(PrevMBBI);
+    }
+  }
+
+  // Try merging with the next instruction.
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  if (!DoMerge && MBBI != EndMBBI) {
+    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+      ++NextMBBI;
+    if (!isAM5 &&
+        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+      DoMerge = true;
+      AddSub = ARM_AM::sub;
+    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+      DoMerge = true;
+    }
+    if (DoMerge) {
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
+      if (NextMBBI == I) {
+        Advance = true;
+        ++I;
+      }
+      MBB.erase(NextMBBI);
+    }
+  }
+
+  if (!DoMerge)
+    return false;
+
+  unsigned Offset = 0;
+  if (isAM2)
+    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+  else if (!isAM5)
+    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+
+  if (isAM5) {
+    // VLDM[SD}_UPD, VSTM[SD]_UPD
+    // (There are no base-updating versions of VLDR/VSTR instructions, but the
+    // updating load/store-multiple instructions can be used with only one
+    // register.)
+    MachineOperand &MO = MI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+      .addReg(Base, getDefRegState(true)) // WB base register
+      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+      .addImm(Pred).addReg(PredReg)
+      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+                            getKillRegState(MO.isKill())));
+  } else if (isLd) {
+    if (isAM2)
+      // LDR_PRE, LDR_POST,
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        .addReg(Base, RegState::Define)
+        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+    else
+      // t2LDR_PRE, t2LDR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        .addReg(Base, RegState::Define)
+        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+  } else {
+    MachineOperand &MO = MI->getOperand(0);
+    if (isAM2)
+      // STR_PRE, STR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+    else
+      // t2STR_PRE, t2STR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+  }
+  MBB.erase(MBBI);
+
+  return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operation that this
+/// pass is capable of operating on.
+static bool isMemoryOp(const MachineInstr *MI) {
+  // When no memory operands are present, conservatively assume unaligned,
+  // volatile, unfoldable.
+  if (!MI->hasOneMemOperand())
+    return false;
+
+  const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+  // Don't touch volatile memory accesses - we may be changing their order.
+  if (MMO->isVolatile())
+    return false;
+
+  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+  // not.
+  if (MMO->getAlignment() < 4)
+    return false;
+
+  // str <undef> could probably be eliminated entirely, but for now we just want
+  // to avoid making a mess of it.
+  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+      MI->getOperand(0).isUndef())
+    return false;
+
+  // Likewise don't mess with references to undefined addresses.
+  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+      MI->getOperand(1).isUndef())
+    return false;
+
+  int Opcode = MI->getOpcode();
+  switch (Opcode) {
+  default: break;
+  case ARM::VLDRS:
+  case ARM::VSTRS:
+    return MI->getOperand(1).isReg();
+  case ARM::VLDRD:
+  case ARM::VSTRD:
+    return MI->getOperand(1).isReg();
+  case ARM::LDRi12:
+  case ARM::STRi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return MI->getOperand(1).isReg();
+  }
+  return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+  unsigned Position = MemOps[0].Position;
+  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+    if (MemOps[i].Position < Position) {
+      Position = MemOps[i].Position;
+      Loc = MemOps[i].MBBI;
+    }
+  }
+
+  if (Loc != MBB.begin())
+    RS->forward(prior(Loc));
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+  int Opcode = MI->getOpcode();
+  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
+    return OffField;
+
+  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+    : ARM_AM::getAM5Offset(OffField) * 4;
+  if (isAM3) {
+    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  } else {
+    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  }
+  return Offset;
+}
+
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MBBI,
+                          int Offset, bool isDef,
+                          DebugLoc dl, unsigned NewOpc,
+                          unsigned Reg, bool RegDeadKill, bool RegUndef,
+                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
+                          bool OffKill, bool OffUndef,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          const TargetInstrInfo *TII, bool isT2) {
+  if (isDef) {
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                      TII->get(NewOpc))
+      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+  } else {
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                      TII->get(NewOpc))
+      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+  }
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = &*MBBI;
+  unsigned Opcode = MI->getOpcode();
+  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+    unsigned EvenReg = MI->getOperand(0).getReg();
+    unsigned OddReg  = MI->getOperand(1).getReg();
+    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
+    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+      return false;
+
+    MachineBasicBlock::iterator NewBBI = MBBI;
+    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+    bool EvenDeadKill = isLd ?
+      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+    bool EvenUndef = MI->getOperand(0).isUndef();
+    bool OddDeadKill  = isLd ?
+      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+    bool OddUndef = MI->getOperand(1).isUndef();
+    const MachineOperand &BaseOp = MI->getOperand(2);
+    unsigned BaseReg = BaseOp.getReg();
+    bool BaseKill = BaseOp.isKill();
+    bool BaseUndef = BaseOp.isUndef();
+    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+    int OffImm = getMemoryOpOffset(MI);
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+
+    if (OddRegNum > EvenRegNum && OffImm == 0) {
+      // Ascending register numbers and no offset. It's safe to change it to a
+      // ldm or stm.
+      unsigned NewOpc = (isLd)
+        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+      if (isLd) {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+        ++NumLDRD2LDM;
+      } else {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg,
+                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+          .addReg(OddReg,
+                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
+        ++NumSTRD2STM;
+      }
+      NewBBI = llvm::prior(MBBI);
+    } else {
+      // Split into two instructions.
+      unsigned NewOpc = (isLd)
+        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+      DebugLoc dl = MBBI->getDebugLoc();
+      // If this is a load and base register is killed, it may have been
+      // re-defed by the load, make sure the first load does not clobber it.
+      if (isLd &&
+          (BaseKill || OffKill) &&
+          (TRI->regsOverlap(EvenReg, BaseReg))) {
+        assert(!TRI->regsOverlap(OddReg, BaseReg));
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, false,
+                      BaseReg, false, BaseUndef, false, OffUndef,
+                      Pred, PredReg, TII, isT2);
+        NewBBI = llvm::prior(MBBI);
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, false,
+                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                      Pred, PredReg, TII, isT2);
+      } else {
+        if (OddReg == EvenReg && EvenDeadKill) {
+          // If the two source operands are the same, the kill marker is
+          // probably on the first one. e.g.
+          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+          EvenDeadKill = false;
+          OddDeadKill = true;
+        }
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, EvenUndef,
+                      BaseReg, false, BaseUndef, false, OffUndef,
+                      Pred, PredReg, TII, isT2);
+        NewBBI = llvm::prior(MBBI);
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, OddUndef,
+                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                      Pred, PredReg, TII, isT2);
+      }
+      if (isLd)
+        ++NumLDRD2LDR;
+      else
+        ++NumSTRD2STR;
+    }
+
+    MBB.erase(MI);
+    MBBI = NewBBI;
+    return true;
+  }
+  return false;
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+  unsigned NumMerges = 0;
+  unsigned NumMemOps = 0;
+  MemOpQueue MemOps;
+  unsigned CurrBase = 0;
+  int CurrOpc = -1;
+  unsigned CurrSize = 0;
+  ARMCC::CondCodes CurrPred = ARMCC::AL;
+  unsigned CurrPredReg = 0;
+  unsigned Position = 0;
+  SmallVector<MachineBasicBlock::iterator,4> Merges;
+
+  RS->enterBasicBlock(&MBB);
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    if (FixInvalidRegPairOp(MBB, MBBI))
+      continue;
+
+    bool Advance  = false;
+    bool TryMerge = false;
+    bool Clobber  = false;
+
+    bool isMemOp = isMemoryOp(MBBI);
+    if (isMemOp) {
+      int Opcode = MBBI->getOpcode();
+      unsigned Size = getLSMultipleTransferSize(MBBI);
+      const MachineOperand &MO = MBBI->getOperand(0);
+      unsigned Reg = MO.getReg();
+      bool isKill = MO.isDef() ? false : MO.isKill();
+      unsigned Base = MBBI->getOperand(1).getReg();
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+      int Offset = getMemoryOpOffset(MBBI);
+      // Watch out for:
+      // r4 := ldr [r5]
+      // r5 := ldr [r5, #4]
+      // r6 := ldr [r5, #8]
+      //
+      // The second ldr has effectively broken the chain even though it
+      // looks like the later ldr(s) use the same base register. Try to
+      // merge the ldr's so far, including this one. But don't try to
+      // combine the following ldr(s).
+      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+      if (CurrBase == 0 && !Clobber) {
+        // Start of a new chain.
+        CurrBase = Base;
+        CurrOpc  = Opcode;
+        CurrSize = Size;
+        CurrPred = Pred;
+        CurrPredReg = PredReg;
+        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+        ++NumMemOps;
+        Advance = true;
+      } else {
+        if (Clobber) {
+          TryMerge = true;
+          Advance = true;
+        }
+
+        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+          // No need to match PredReg.
+          // Continue adding to the queue.
+          if (Offset > MemOps.back().Offset) {
+            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+                                             Position, MBBI));
+            ++NumMemOps;
+            Advance = true;
+          } else {
+            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+                 I != E; ++I) {
+              if (Offset < I->Offset) {
+                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+                                                 Position, MBBI));
+                ++NumMemOps;
+                Advance = true;
+                break;
+              } else if (Offset == I->Offset) {
+                // Collision! This can't be merged!
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    if (MBBI->isDebugValue()) {
+      ++MBBI;
+      if (MBBI == E)
+        // Reach the end of the block, try merging the memory instructions.
+        TryMerge = true;
+    } else if (Advance) {
+      ++Position;
+      ++MBBI;
+      if (MBBI == E)
+        // Reach the end of the block, try merging the memory instructions.
+        TryMerge = true;
+    } else
+      TryMerge = true;
+
+    if (TryMerge) {
+      if (NumMemOps > 1) {
+        // Try to find a free register to use as a new base in case it's needed.
+        // First advance to the instruction just before the start of the chain.
+        AdvanceRS(MBB, MemOps);
+        // Find a scratch register.
+        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+        // Process the load / store instructions.
+        RS->forward(prior(MBBI));
+
+        // Merge ops.
+        Merges.clear();
+        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
+
+        // Try folding preceding/trailing base inc/dec into the generated
+        // LDM/STM ops.
+        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
+            ++NumMerges;
+        NumMerges += Merges.size();
+
+        // Try folding preceding/trailing base inc/dec into those load/store
+        // that were not merged to form LDM/STM ops.
+        for (unsigned i = 0; i != NumMemOps; ++i)
+          if (!MemOps[i].Merged)
+            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+              ++NumMerges;
+
+        // RS may be pointing to an instruction that's deleted.
+        RS->skipTo(prior(MBBI));
+      } else if (NumMemOps == 1) {
+        // Try folding preceding/trailing base inc/dec into the single
+        // load/store.
+        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+          ++NumMerges;
+          RS->forward(prior(MBBI));
+        }
+      }
+
+      CurrBase = 0;
+      CurrOpc = -1;
+      CurrSize = 0;
+      CurrPred = ARMCC::AL;
+      CurrPredReg = 0;
+      if (NumMemOps) {
+        MemOps.clear();
+        NumMemOps = 0;
+      }
+
+      // If iterator hasn't been advanced and this is not a memory op, skip it.
+      // It can't start a new chain anyway.
+      if (!Advance && !isMemOp && MBBI != E) {
+        ++Position;
+        ++MBBI;
+      }
+    }
+  }
+  return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
+/// directly restore the value of LR into pc.
+///   ldmfd sp!, {..., lr}
+///   bx lr
+/// or
+///   ldmfd sp!, {..., lr}
+///   mov pc, lr
+/// =>
+///   ldmfd sp!, {..., pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+  if (MBB.empty()) return false;
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  if (MBBI != MBB.begin() &&
+      (MBBI->getOpcode() == ARM::BX_RET ||
+       MBBI->getOpcode() == ARM::tBX_RET ||
+       MBBI->getOpcode() == ARM::MOVPCLR)) {
+    MachineInstr *PrevMI = prior(MBBI);
+    unsigned Opcode = PrevMI->getOpcode();
+    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+      if (MO.getReg() != ARM::LR)
+        return false;
+      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
+      PrevMI->setDesc(TII->get(NewOpc));
+      MO.setReg(ARM::PC);
+      PrevMI->copyImplicitOps(&*MBBI);
+      MBB.erase(MBBI);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetMachine &TM = Fn.getTarget();
+  AFI = Fn.getInfo<ARMFunctionInfo>();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  RS = new RegScavenger();
+  isThumb2 = AFI->isThumb2Function();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= LoadStoreMultipleOpti(MBB);
+    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+      Modified |= MergeReturnIntoLDM(MBB);
+  }
+
+  delete RS;
+  return Modified;
+}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+    static char ID;
+    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+    const TargetData *TD;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
+    MachineRegisterInfo *MRI;
+    MachineFunction *MF;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM pre- register allocation load / store optimization pass";
+    }
+
+  private:
+    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+                          unsigned &NewOpc, unsigned &EvenReg,
+                          unsigned &OddReg, unsigned &BaseReg,
+                          int &Offset,
+                          unsigned &PredReg, ARMCC::CondCodes &Pred,
+                          bool &isT2);
+    bool RescheduleOps(MachineBasicBlock *MBB,
+                       SmallVector<MachineInstr*, 4> &Ops,
+                       unsigned Base, bool isLd,
+                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+  };
+  char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  TD  = Fn.getTarget().getTargetData();
+  TII = Fn.getTarget().getInstrInfo();
+  TRI = Fn.getTarget().getRegisterInfo();
+  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+  MRI = &Fn.getRegInfo();
+  MF  = &Fn;
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI)
+    Modified |= RescheduleLoadStoreInstrs(MFI);
+
+  return Modified;
+}
+
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+                                      MachineBasicBlock::iterator I,
+                                      MachineBasicBlock::iterator E,
+                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
+                                      SmallSet<unsigned, 4> &MemRegs,
+                                      const TargetRegisterInfo *TRI) {
+  // Are there stores / loads / calls between them?
+  // FIXME: This is overly conservative. We should make use of alias information
+  // some day.
+  SmallSet<unsigned, 4> AddedRegPressure;
+  while (++I != E) {
+    if (I->isDebugValue() || MemOps.count(&*I))
+      continue;
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+      return false;
+    if (isLd && MCID.mayStore())
+      return false;
+    if (!isLd) {
+      if (MCID.mayLoad())
+        return false;
+      // It's not safe to move the first 'str' down.
+      // str r1, [r0]
+      // strh r5, [r0]
+      // str r4, [r0, #+4]
+      if (MCID.mayStore())
+        return false;
+    }
+    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+      MachineOperand &MO = I->getOperand(j);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
+        return false;
+      if (Reg != Base && !MemRegs.count(Reg))
+        AddedRegPressure.insert(Reg);
+    }
+  }
+
+  // Estimate register pressure increase due to the transformation.
+  if (MemRegs.size() <= 4)
+    // Ok if we are moving small number of instructions.
+    return true;
+  return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+                                          DebugLoc &dl,
+                                          unsigned &NewOpc, unsigned &EvenReg,
+                                          unsigned &OddReg, unsigned &BaseReg,
+                                          int &Offset, unsigned &PredReg,
+                                          ARMCC::CondCodes &Pred,
+                                          bool &isT2) {
+  // Make sure we're allowed to generate LDRD/STRD.
+  if (!STI->hasV5TEOps())
+    return false;
+
+  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
+  unsigned Scale = 1;
+  unsigned Opcode = Op0->getOpcode();
+  if (Opcode == ARM::LDRi12)
+    NewOpc = ARM::LDRD;
+  else if (Opcode == ARM::STRi12)
+    NewOpc = ARM::STRD;
+  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+    NewOpc = ARM::t2LDRDi8;
+    Scale = 4;
+    isT2 = true;
+  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+    NewOpc = ARM::t2STRDi8;
+    Scale = 4;
+    isT2 = true;
+  } else
+    return false;
+
+  // Make sure the base address satisfies i64 ld / st alignment requirement.
+  if (!Op0->hasOneMemOperand() ||
+      !(*Op0->memoperands_begin())->getValue() ||
+      (*Op0->memoperands_begin())->isVolatile())
+    return false;
+
+  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+  const Function *Func = MF->getFunction();
+  unsigned ReqAlign = STI->hasV6Ops()
+    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
+    : 8;  // Pre-v6 need 8-byte align
+  if (Align < ReqAlign)
+    return false;
+
+  // Then make sure the immediate offset fits.
+  int OffImm = getMemoryOpOffset(Op0);
+  if (isT2) {
+    int Limit = (1 << 8) * Scale;
+    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
+      return false;
+    Offset = OffImm;
+  } else {
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (OffImm < 0) {
+      AddSub = ARM_AM::sub;
+      OffImm = - OffImm;
+    }
+    int Limit = (1 << 8) * Scale;
+    if (OffImm >= Limit || (OffImm & (Scale-1)))
+      return false;
+    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+  }
+  EvenReg = Op0->getOperand(0).getReg();
+  OddReg  = Op1->getOperand(0).getReg();
+  if (EvenReg == OddReg)
+    return false;
+  BaseReg = Op0->getOperand(1).getReg();
+  Pred = llvm::getInstrPredicate(Op0, PredReg);
+  dl = Op0->getDebugLoc();
+  return true;
+}
+
+namespace {
+  struct OffsetCompare {
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      int LOffset = getMemoryOpOffset(LHS);
+      int ROffset = getMemoryOpOffset(RHS);
+      assert(LHS == RHS || LOffset != ROffset);
+      return LOffset > ROffset;
+    }
+  };
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+                                 SmallVector<MachineInstr*, 4> &Ops,
+                                 unsigned Base, bool isLd,
+                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+  bool RetVal = false;
+
+  // Sort by offset (in reverse order).
+  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+  // The loads / stores of the same base are in order. Scan them from first to
+  // last and check for the following:
+  // 1. Any def of base.
+  // 2. Any gaps.
+  while (Ops.size() > 1) {
+    unsigned FirstLoc = ~0U;
+    unsigned LastLoc = 0;
+    MachineInstr *FirstOp = 0;
+    MachineInstr *LastOp = 0;
+    int LastOffset = 0;
+    unsigned LastOpcode = 0;
+    unsigned LastBytes = 0;
+    unsigned NumMove = 0;
+    for (int i = Ops.size() - 1; i >= 0; --i) {
+      MachineInstr *Op = Ops[i];
+      unsigned Loc = MI2LocMap[Op];
+      if (Loc <= FirstLoc) {
+        FirstLoc = Loc;
+        FirstOp = Op;
+      }
+      if (Loc >= LastLoc) {
+        LastLoc = Loc;
+        LastOp = Op;
+      }
+
+      unsigned Opcode = Op->getOpcode();
+      if (LastOpcode && Opcode != LastOpcode)
+        break;
+
+      int Offset = getMemoryOpOffset(Op);
+      unsigned Bytes = getLSMultipleTransferSize(Op);
+      if (LastBytes) {
+        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+          break;
+      }
+      LastOffset = Offset;
+      LastBytes = Bytes;
+      LastOpcode = Opcode;
+      if (++NumMove == 8) // FIXME: Tune this limit.
+        break;
+    }
+
+    if (NumMove <= 1)
+      Ops.pop_back();
+    else {
+      SmallPtrSet<MachineInstr*, 4> MemOps;
+      SmallSet<unsigned, 4> MemRegs;
+      for (int i = NumMove-1; i >= 0; --i) {
+        MemOps.insert(Ops[i]);
+        MemRegs.insert(Ops[i]->getOperand(0).getReg());
+      }
+
+      // Be conservative, if the instructions are too far apart, don't
+      // move them. We want to limit the increase of register pressure.
+      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
+      if (DoMove)
+        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+                                           MemOps, MemRegs, TRI);
+      if (!DoMove) {
+        for (unsigned i = 0; i != NumMove; ++i)
+          Ops.pop_back();
+      } else {
+        // This is the new location for the loads / stores.
+        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+        while (InsertPos != MBB->end()
+               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+          ++InsertPos;
+
+        // If we are moving a pair of loads / stores, see if it makes sense
+        // to try to allocate a pair of registers that can form register pairs.
+        MachineInstr *Op0 = Ops.back();
+        MachineInstr *Op1 = Ops[Ops.size()-2];
+        unsigned EvenReg = 0, OddReg = 0;
+        unsigned BaseReg = 0, PredReg = 0;
+        ARMCC::CondCodes Pred = ARMCC::AL;
+        bool isT2 = false;
+        unsigned NewOpc = 0;
+        int Offset = 0;
+        DebugLoc dl;
+        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+                                             EvenReg, OddReg, BaseReg,
+                                             Offset, PredReg, Pred, isT2)) {
+          Ops.pop_back();
+          Ops.pop_back();
+
+          const MCInstrDesc &MCID = TII->get(NewOpc);
+          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+          MRI->constrainRegClass(EvenReg, TRC);
+          MRI->constrainRegClass(OddReg, TRC);
+
+          // Form the pair instruction.
+          if (isLd) {
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+              .addReg(EvenReg, RegState::Define)
+              .addReg(OddReg, RegState::Define)
+              .addReg(BaseReg);
+            // FIXME: We're converting from LDRi12 to an insn that still
+            // uses addrmode2, so we need an explicit offset reg. It should
+            // always by reg0 since we're transforming LDRi12s.
+            if (!isT2)
+              MIB.addReg(0);
+            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+            ++NumLDRDFormed;
+          } else {
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
+              .addReg(EvenReg)
+              .addReg(OddReg)
+              .addReg(BaseReg);
+            // FIXME: We're converting from LDRi12 to an insn that still
+            // uses addrmode2, so we need an explicit offset reg. It should
+            // always by reg0 since we're transforming STRi12s.
+            if (!isT2)
+              MIB.addReg(0);
+            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+            ++NumSTRDFormed;
+          }
+          MBB->erase(Op0);
+          MBB->erase(Op1);
+
+          // Add register allocation hints to form register pairs.
+          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
+        } else {
+          for (unsigned i = 0; i != NumMove; ++i) {
+            MachineInstr *Op = Ops.back();
+            Ops.pop_back();
+            MBB->splice(InsertPos, MBB, Op);
+          }
+        }
+
+        NumLdStMoved += NumMove;
+        RetVal = true;
+      }
+    }
+  }
+
+  return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+  bool RetVal = false;
+
+  DenseMap<MachineInstr*, unsigned> MI2LocMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+  SmallVector<unsigned, 4> LdBases;
+  SmallVector<unsigned, 4> StBases;
+
+  unsigned Loc = 0;
+  MachineBasicBlock::iterator MBBI = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  while (MBBI != E) {
+    for (; MBBI != E; ++MBBI) {
+      MachineInstr *MI = MBBI;
+      const MCInstrDesc &MCID = MI->getDesc();
+      if (MCID.isCall() || MCID.isTerminator()) {
+        // Stop at barriers.
+        ++MBBI;
+        break;
+      }
+
+      if (!MI->isDebugValue())
+        MI2LocMap[MI] = ++Loc;
+
+      if (!isMemoryOp(MI))
+        continue;
+      unsigned PredReg = 0;
+      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+        continue;
+
+      int Opc = MI->getOpcode();
+      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+      unsigned Base = MI->getOperand(1).getReg();
+      int Offset = getMemoryOpOffset(MI);
+
+      bool StopHere = false;
+      if (isLd) {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2LdsMap.find(Base);
+        if (BI != Base2LdsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2LdsMap[Base] = MIs;
+          LdBases.push_back(Base);
+        }
+      } else {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2StsMap.find(Base);
+        if (BI != Base2StsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2StsMap[Base] = MIs;
+          StBases.push_back(Base);
+        }
+      }
+
+      if (StopHere) {
+        // Found a duplicate (a base+offset combination that's seen earlier).
+        // Backtrack.
+        --Loc;
+        break;
+      }
+    }
+
+    // Re-schedule loads.
+    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+      unsigned Base = LdBases[i];
+      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+      if (Lds.size() > 1)
+        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+    }
+
+    // Re-schedule stores.
+    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+      unsigned Base = StBases[i];
+      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+      if (Sts.size() > 1)
+        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+    }
+
+    if (MBBI != E) {
+      Base2LdsMap.clear();
+      Base2StsMap.clear();
+      LdBases.clear();
+      StBases.clear();
+    }
+  }
+
+  return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+  if (PreAlloc)
+    return new ARMPreAllocLoadStoreOpt();
+  return new ARMLoadStoreOpt();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
new file mode 100644
index 000000000000..39be3f0e39f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1314 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "ARMInstrInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+  ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
+
+public:
+  ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx)
+    : MCII(mcii), STI(sti) {
+  }
+
+  ~ARMMCCodeEmitter() {}
+
+  bool isThumb() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+  }
+  bool isThumb2() const {
+    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0;
+  }
+  bool isTargetDarwin() const {
+    Triple TT(STI.getTargetTriple());
+    Triple::OSType OS = TT.getOS();
+    return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+  }
+
+  unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+  /// the specified operand. This is used for operands with :lower16: and
+  /// :upper16: prefixes.
+  uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+                              unsigned &Reg, unsigned &Imm,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+  /// BL branch target.
+  uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+  /// BLX branch target.
+  uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+  /// branch target.
+  uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+  /// immediate Thumb2 direct branch target.
+  uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+  
+  /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+  /// branch target.
+  uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+  /// ADR label target.
+  uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+  /// operand.
+  uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+  uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+                                         SmallVectorImpl<MCFixup> &Fixups)const;
+
+  /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+  /// operand.
+  uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+  /// operand as needed by load/store instructions.
+  uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+  uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+    ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+    switch (Mode) {
+    default: assert(0 && "Unknown addressing sub-mode!");
+    case ARM_AM::da: return 0;
+    case ARM_AM::ia: return 1;
+    case ARM_AM::db: return 2;
+    case ARM_AM::ib: return 3;
+    }
+  }
+  /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+  ///
+  unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+    switch (ShOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::no_shift:
+    case ARM_AM::lsl: return 0;
+    case ARM_AM::lsr: return 1;
+    case ARM_AM::asr: return 2;
+    case ARM_AM::ror:
+    case ARM_AM::rrx: return 3;
+    }
+    return 0;
+  }
+
+  /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+  uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+  uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+  uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+  uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+  /// operand.
+  uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+  uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+  uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+  uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getCCOutOpValue - Return encoding of the 's' bit.
+  unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+    // '1' respectively.
+    return MI.getOperand(Op).getReg() == ARM::CPSR;
+  }
+
+  /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+  unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned SoImm = MI.getOperand(Op).getImm();
+    int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+    assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+    // Encode rotate_imm.
+    unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+      << ARMII::SoRotImmShift;
+
+    // Encode immed_8.
+    Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+    return Binary;
+  }
+
+  /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+  unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned SoImm = MI.getOperand(Op).getImm();
+    unsigned Encoded =  ARM_AM::getT2SOImmVal(SoImm);
+    assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+    return Encoded;
+  }
+
+  unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getSORegOpValue - Return an encoded so_reg shifted register value.
+  unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
+                            SmallVectorImpl<MCFixup> &Fixups) const {
+    switch (MI.getOperand(Op).getImm()) {
+    default: assert (0 && "Not a valid rot_imm value!");
+    case 0:  return 0;
+    case 8:  return 1;
+    case 16: return 2;
+    case 24: return 3;
+    }
+  }
+
+  unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
+                                 SmallVectorImpl<MCFixup> &Fixups) const {
+    return MI.getOperand(Op).getImm() - 1;
+  }
+
+  unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+                                   SmallVectorImpl<MCFixup> &Fixups) const {
+    return 64 - MI.getOperand(Op).getImm();
+  }
+
+  unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+                                      SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getSsatBitPosValue(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+                                      SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+                                        SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+                                        SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+                                      unsigned EncodedValue) const;
+  unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+                                          unsigned EncodedValue) const;
+  unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+                                    unsigned EncodedValue) const;
+
+  unsigned VFPThumb2PostEncoder(const MCInst &MI,
+                                unsigned EncodedValue) const;
+
+  void EmitByte(unsigned char C, raw_ostream &OS) const {
+    OS << (char)C;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
+                                            MCContext &Ctx) {
+  return new ARMMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (isThumb2()) {
+    // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+    // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+    // set to 1111.
+    unsigned Bit24 = EncodedValue & 0x01000000;
+    unsigned Bit28 = Bit24 << 4;
+    EncodedValue &= 0xEFFFFFFF;
+    EncodedValue |= Bit28;
+    EncodedValue |= 0x0F000000;
+  }
+
+  return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (isThumb2()) {
+    EncodedValue &= 0xF0FFFFFF;
+    EncodedValue |= 0x09000000;
+  }
+
+  return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (isThumb2()) {
+    EncodedValue &= 0x00FFFFFF;
+    EncodedValue |= 0xEE000000;
+  }
+
+  return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+  if (isThumb2()) {
+    EncodedValue &= 0x0FFFFFFF;
+    EncodedValue |= 0xE0000000;
+  }
+  return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = getARMRegisterNumbering(Reg);
+
+    // Q registers are encoded as 2x their register number.
+    switch (Reg) {
+    default:
+      return RegNo;
+    case ARM::Q0:  case ARM::Q1:  case ARM::Q2:  case ARM::Q3:
+    case ARM::Q4:  case ARM::Q5:  case ARM::Q6:  case ARM::Q7:
+    case ARM::Q8:  case ARM::Q9:  case ARM::Q10: case ARM::Q11:
+    case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+      return 2 * RegNo;
+    }
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isFPImm()) {
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+                     .bitcastToAPInt().getHiBits(32).getLimitedValue());
+  }
+
+  llvm_unreachable("Unable to encode MCOperand!");
+  return 0;
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+                       unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+  Reg = getARMRegisterNumbering(MO.getReg());
+
+  int32_t SImm = MO1.getImm();
+  bool isAdd = true;
+
+  // Special value for #-0
+  if (SImm == INT32_MIN)
+    SImm = 0;
+
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (SImm < 0) {
+    SImm = -SImm;
+    isAdd = false;
+  }
+
+  Imm = SImm;
+  return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                       unsigned FixupKind,
+                                       SmallVectorImpl<MCFixup> &Fixups) {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  // If the destination is an immediate, we have nothing to do.
+  if (MO.isImm()) return MO.getImm();
+  assert(MO.isExpr() && "Unexpected branch target type!");
+  const MCExpr *Expr = MO.getExpr();
+  MCFixupKind Kind = MCFixupKind(FixupKind);
+  Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+  // All of the information is in the fixup.
+  return 0;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+  int NumOp = MI.getNumOperands();
+  if (NumOp >= 2) {
+    for (int i = 0; i < NumOp-1; ++i) {
+      const MCOperand &MCOp1 = MI.getOperand(i);
+      const MCOperand &MCOp2 = MI.getOperand(i + 1);
+      if (MCOp1.isImm() && MCOp2.isReg() && 
+          (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+        if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) 
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // FIXME: This really, really shouldn't use TargetMachine. We don't want
+  // coupling between MC and TM anywhere we can help it.
+  if (isThumb2())
+    return
+      ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+  return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  if (HasConditionalBranch(MI)) 
+    return ::getBranchTargetOpValue(MI, OpIdx,
+                                    ARM::fixup_arm_condbranch, Fixups);
+  return ::getBranchTargetOpValue(MI, OpIdx, 
+                                  ARM::fixup_arm_uncondbranch, Fixups);
+}
+
+
+
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Val =
+    ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+  bool I  = (Val & 0x800000);
+  bool J1 = (Val & 0x400000);
+  bool J2 = (Val & 0x200000);
+  if (I ^ J1)
+    Val &= ~0x400000;
+  else
+    Val |= 0x400000;
+
+  if (I ^ J2)
+    Val &= ~0x200000;
+  else
+    Val |= 0x200000;
+
+  return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+                                  Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+                                  Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+                                  Fixups);
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &) const {
+  // [Rn, Rm]
+  //   {5-3} = Rm
+  //   {2-0} = Rn
+  const MCOperand &MO1 = MI.getOperand(OpIdx);
+  const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+  unsigned Rn = getARMRegisterNumbering(MO1.getReg());
+  unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+  return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  // {17-13} = reg
+  // {12}    = (U)nsigned (add == '1', sub == '0')
+  // {11-0}  = imm12
+  unsigned Reg, Imm12;
+  bool isAdd = true;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm12 = 0;
+    isAdd = false ; // 'U' bit is set as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+
+    MCFixupKind Kind;
+    if (isThumb2())
+      Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+    else
+      Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else
+    isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+  uint32_t Binary = Imm12 & 0xfff;
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 12);
+  Binary |= (Reg << 13);
+  return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  // {12-9} = reg
+  // {8}    = (U)nsigned (add == '1', sub == '0')
+  // {7-0}  = imm8
+  unsigned Reg, Imm8;
+  bool isAdd = true;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm8 = 0;
+    isAdd = false ; // 'U' bit is set as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else
+    isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+  uint32_t Binary = (Imm8 >> 2) & 0xff;
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 8);
+  Binary |= (Reg << 9);
+  return Binary;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+  switch (Expr->getKind()) {
+  default: assert(0 && "Unexpected expression type");
+  case MCExpr::SymbolRef: return false;
+  case MCExpr::Binary: return true;
+  }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  // {20-16} = imm{15-12}
+  // {11-0}  = imm{11-0}
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    // Hi / lo 16 bits already extracted during earlier passes.
+    return static_cast<unsigned>(MO.getImm());
+
+  // Handle :upper16: and :lower16: assembly prefixes.
+  const MCExpr *E = MO.getExpr();
+  if (E->getKind() == MCExpr::Target) {
+    const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+    E = ARM16Expr->getSubExpr();
+
+    MCFixupKind Kind;
+    switch (ARM16Expr->getKind()) {
+    default: assert(0 && "Unsupported ARMFixup");
+    case ARMMCExpr::VK_ARM_HI16:
+      if (!isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(isThumb2()
+                           ? ARM::fixup_t2_movt_hi16_pcrel
+                           : ARM::fixup_arm_movt_hi16_pcrel);
+      else
+        Kind = MCFixupKind(isThumb2()
+                           ? ARM::fixup_t2_movt_hi16
+                           : ARM::fixup_arm_movt_hi16);
+      break;
+    case ARMMCExpr::VK_ARM_LO16:
+      if (!isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(isThumb2()
+                           ? ARM::fixup_t2_movw_lo16_pcrel
+                           : ARM::fixup_arm_movw_lo16_pcrel);
+      else
+        Kind = MCFixupKind(isThumb2()
+                           ? ARM::fixup_t2_movw_lo16
+                           : ARM::fixup_arm_movw_lo16);
+      break;
+    }
+    Fixups.push_back(MCFixup::Create(0, E, Kind));
+    return 0;
+  };
+
+  llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+  return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+  unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+  bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+  ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+  unsigned SBits = getShiftOp(ShOp);
+
+  // {16-13} = Rn
+  // {12}    = isAdd
+  // {11-0}  = shifter
+  //  {3-0}  = Rm
+  //  {4}    = 0
+  //  {6-5}  = type
+  //  {11-7} = imm
+  uint32_t Binary = Rm;
+  Binary |= Rn << 13;
+  Binary |= SBits << 5;
+  Binary |= ShImm << 7;
+  if (isAdd)
+    Binary |= 1 << 12;
+  return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {17-14}  Rn
+  // {13}     1 == imm12, 0 == Rm
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+  Binary |= Rn << 14;
+  return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // {13}     1 == imm12, 0 == Rm
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  unsigned Imm = MO1.getImm();
+  bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+  bool isReg = MO.getReg() != 0;
+  uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+  if (isReg) {
+    ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+    Binary <<= 7;                    // Shift amount is bits [11:7]
+    Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+    Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+  }
+  return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // {9}      1 == imm8, 0 == Rm
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  unsigned Imm = MO1.getImm();
+  bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+  bool isImm = MO.getReg() == 0;
+  uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+  if (!isImm)
+    Imm8 = getARMRegisterNumbering(MO.getReg());
+  return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {13}     1 == imm8, 0 == Rm
+  // {12-9}   Rn
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Imm = MO2.getImm();
+  bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+  bool isImm = MO1.getReg() == 0;
+  uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+  if (!isImm)
+    Imm8 = getARMRegisterNumbering(MO1.getReg());
+  return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // [SP, #imm]
+  //   {7-0} = imm8
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+         "Unexpected base register!");
+
+  // The immediate is already shifted for the implicit zeroes, so no change
+  // here.
+  return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  // [Rn, #imm]
+  //   {7-3} = imm5
+  //   {2-0} = Rn
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Imm5 = MO1.getImm();
+  return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {12-9} = reg
+  // {8}    = (U)nsigned (add == '1', sub == '0')
+  // {7-0}  = imm8
+  unsigned Reg, Imm8;
+  bool isAdd;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm8 = 0;
+    isAdd = false; // 'U' bit is handled as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind;
+    if (isThumb2())
+      Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+    else
+      Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else {
+    EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+    isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+  }
+
+  uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 8);
+  Binary |= (Reg << 9);
+  return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+  // shifted. The second is either Rs, the amount to shift by, or reg0 in which
+  // case the imm contains the amount to shift by.
+  //
+  // {3-0} = Rm.
+  // {4}   = 1 if reg shift, 0 if imm shift
+  // {6-5} = type
+  //    If reg shift:
+  //      {11-8} = Rs
+  //      {7}    = 0
+  //    else (imm shift)
+  //      {11-7} = imm
+
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+  // Encode Rm.
+  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  unsigned Rs = MO1.getReg();
+  if (Rs) {
+    // Set shift operand (bit[7:4]).
+    // LSL - 0001
+    // LSR - 0011
+    // ASR - 0101
+    // ROR - 0111
+    // RRX - 0110 and bit[11:8] clear.
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x1; break;
+    case ARM_AM::lsr: SBits = 0x3; break;
+    case ARM_AM::asr: SBits = 0x5; break;
+    case ARM_AM::ror: SBits = 0x7; break;
+    case ARM_AM::rrx: SBits = 0x6; break;
+    }
+  } else {
+    // Set shift operand (bit[6:4]).
+    // LSL - 000
+    // LSR - 010
+    // ASR - 100
+    // ROR - 110
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x0; break;
+    case ARM_AM::lsr: SBits = 0x2; break;
+    case ARM_AM::asr: SBits = 0x4; break;
+    case ARM_AM::ror: SBits = 0x6; break;
+    }
+  }
+
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode the shift operation Rs or shift_imm (except rrx).
+  if (Rs) {
+    // Encode Rs bit[11:8].
+    assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+    return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+  }
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+  const MCOperand &MO2 = MI.getOperand(OpNum+1);
+  const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+  // Encoded as [Rn, Rm, imm].
+  // FIXME: Needs fixup support.
+  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+  Value <<= 4;
+  Value |= getARMRegisterNumbering(MO2.getReg());
+  Value <<= 2;
+  Value |= MO3.getImm();
+
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+  const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+
+  // Even though the immediate is 8 bits long, we need 9 bits in order
+  // to represent the (inverse of the) sign bit.
+  Value <<= 9;
+  int32_t tmp = (int32_t)MO2.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 256; // Set the ADD bit
+  Value |= tmp & 255;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = 0;
+  int32_t tmp = (int32_t)MO1.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 256; // Set the ADD bit
+  Value |= tmp & 255;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = 0;
+  int32_t tmp = (int32_t)MO1.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 4096; // Set the ADD bit
+  Value |= tmp & 4095;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+  // shifted. The second is the amount to shift by.
+  //
+  // {3-0} = Rm.
+  // {4}   = 0
+  // {6-5} = type
+  // {11-7} = imm
+
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+  // Encode Rm.
+  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  // Set shift operand (bit[6:4]).
+  // LSL - 000
+  // LSR - 010
+  // ASR - 100
+  // ROR - 110
+  switch (SOpc) {
+  default: llvm_unreachable("Unknown shift opc!");
+  case ARM_AM::lsl: SBits = 0x0; break;
+  case ARM_AM::lsr: SBits = 0x2; break;
+  case ARM_AM::asr: SBits = 0x4; break;
+  case ARM_AM::ror: SBits = 0x6; break;
+  }
+
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+  // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+  // msb of the mask.
+  const MCOperand &MO = MI.getOperand(Op);
+  uint32_t v = ~MO.getImm();
+  uint32_t lsb = CountTrailingZeros_32(v);
+  uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+  assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+  return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getMsbOpValue(const MCInst &MI, unsigned Op,
+              SmallVectorImpl<MCFixup> &Fixups) const {
+  // MSB - 5 bits.
+  uint32_t lsb = MI.getOperand(Op-1).getImm();
+  uint32_t width = MI.getOperand(Op).getImm();
+  uint32_t msb = lsb+width-1;
+  assert (width != 0 && msb < 32 && "Illegal bit width!");
+  return msb;
+}
+
+unsigned ARMMCCodeEmitter::
+getSsatBitPosValue(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  // For ssat instructions, the bit position should be encoded decremented by 1
+  return MI.getOperand(Op).getImm()-1;
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // VLDM/VSTM:
+  //   {12-8} = Vd
+  //   {7-0}  = Number of registers
+  //
+  // LDM/STM:
+  //   {15-0}  = Bitfield of GPRs.
+  unsigned Reg = MI.getOperand(Op).getReg();
+  bool SPRRegs = ARM::SPRRegClass.contains(Reg);
+  bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+
+  unsigned Binary = 0;
+
+  if (SPRRegs || DPRRegs) {
+    // VLDM/VSTM
+    unsigned RegNo = getARMRegisterNumbering(Reg);
+    unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+    Binary |= (RegNo & 0x1f) << 8;
+    if (SPRRegs)
+      Binary |= NumRegs;
+    else
+      Binary |= NumRegs * 2;
+  } else {
+    for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+      unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+      Binary |= 1 << RegNo;
+    }
+  }
+
+  return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &Reg = MI.getOperand(Op);
+  const MCOperand &Imm = MI.getOperand(Op + 1);
+
+  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned Align = 0;
+
+  switch (Imm.getImm()) {
+  default: break;
+  case 2:
+  case 4:
+  case 8:  Align = 0x01; break;
+  case 16: Align = 0x02; break;
+  case 32: Align = 0x03; break;
+  }
+
+  return RegNo | (Align << 4);
+}
+
+/// getAddrMode6OneLane32AddressOpValue - Encode an addrmode6 register number
+/// along  with the alignment operand for use in VST1 and VLD1 with size 32.
+unsigned ARMMCCodeEmitter::
+getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
+                                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &Reg = MI.getOperand(Op);
+  const MCOperand &Imm = MI.getOperand(Op + 1);
+
+  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned Align = 0;
+
+  switch (Imm.getImm()) {
+  default: break;
+  case 2:
+  case 4:
+  case 8:
+  case 16: Align = 0x00; break;
+  case 32: Align = 0x03; break;
+  }
+
+  return RegNo | (Align << 4);
+}
+
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions.  This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &Reg = MI.getOperand(Op);
+  const MCOperand &Imm = MI.getOperand(Op + 1);
+
+  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned Align = 0;
+
+  switch (Imm.getImm()) {
+  default: break;
+  case 2:
+  case 4:
+  case 8:  Align = 0x01; break;
+  case 16: Align = 0x03; break;
+  }
+
+  return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(Op);
+  if (MO.getReg() == 0) return 0x0D;
+  return MO.getReg();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 64 - MI.getOperand(Op).getImm();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  // Pseudo instructions don't get encoded.
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  uint64_t TSFlags = Desc.TSFlags;
+  if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+    return;
+
+  int Size;
+  if (Desc.getSize() == 2 || Desc.getSize() == 4)
+    Size = Desc.getSize();
+  else
+    llvm_unreachable("Unexpected instruction size!");
+  
+  uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+  // Thumb 32-bit wide instructions need to emit the high order halfword
+  // first.
+  if (isThumb() && Size == 4) {
+    EmitConstant(Binary >> 16, 2, OS);
+    EmitConstant(Binary & 0xffff, 2, OS);
+  } else
+    EmitConstant(Binary, Size, OS);
+  ++MCNumEmitted;  // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
new file mode 100644
index 000000000000..2727ba8c8aa5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+                       MCContext &Ctx) {
+  return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: assert(0 && "Invalid kind!");
+  case VK_ARM_HI16: OS << ":upper16:"; break;
+  case VK_ARM_LO16: OS << ":lower16:"; break;
+  }
+
+  const MCExpr *Expr = getSubExpr();
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << '(';
+  Expr->print(OS);
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                     const MCAsmLayout *Layout) const {
+  return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    assert(0 && "Can't handle nested target expr!");
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbols_(BE->getLHS(), Asm);
+    AddValueSymbols_(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+  AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
new file mode 100644
index 000000000000..0a2e883deb1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
@@ -0,0 +1,76 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_ARM_None,
+    VK_ARM_HI16,  // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+    VK_ARM_LO16   // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+    : Kind(_Kind), Expr(_Expr) {}
+  
+public:
+  /// @name Construction
+  /// @{
+
+  static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+                                      MCContext &Ctx);
+
+  static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_ARM_HI16, Expr, Ctx);
+  }
+
+  static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_ARM_LO16, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const {
+    return getSubExpr()->FindAssociatedSection();
+  }
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+  
+  static bool classof(const ARMMCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
new file mode 100644
index 000000000000..7411b599f0fa
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -0,0 +1,125 @@
+//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower ARM MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMMCExpr.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+
+MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+                                      const MCSymbol *Symbol) {
+  const MCExpr *Expr;
+  switch (MO.getTargetFlags()) {
+  default: {
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                   OutContext);
+    switch (MO.getTargetFlags()) {
+    default:
+      assert(0 && "Unknown target flag on symbol operand");
+    case 0:
+      break;
+    case ARMII::MO_LO16:
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                     OutContext);
+      Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
+      break;
+    case ARMII::MO_HI16:
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                     OutContext);
+      Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
+      break;
+    }
+    break;
+  }
+
+  case ARMII::MO_PLT:
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT,
+                                   OutContext);
+    break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(),
+                                                          OutContext),
+                                   OutContext);
+  return MCOperand::CreateExpr(Expr);
+
+}
+
+bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
+                                 MCOperand &MCOp) {
+  switch (MO.getType()) {
+  default:
+    assert(0 && "unknown operand type");
+    return false;
+  case MachineOperand::MO_Register:
+    // Ignore all non-CPSR implicit register operands.
+    if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+      return false;
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    MCOp = MCOperand::CreateReg(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::CreateImm(MO.getImm());
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+        MO.getMBB()->getSymbol(), OutContext));
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+   MCOp = GetSymbolRef(MO,
+                        GetExternalSymbolSymbol(MO.getSymbolName()));
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_BlockAddress:
+    MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+    break;
+  case MachineOperand::MO_FPImmediate: {
+    APFloat Val = MO.getFPImm()->getValueAPF();
+    bool ignored;
+    Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+    MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+    break;
+  }
+  }
+  return true;
+}
+
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                        ARMAsmPrinter &AP) {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    if (AP.lowerOperand(MO, MCOp))
+      OutMI.addOperand(MCOp);
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp
new file mode 100644
index 000000000000..a36e47da06d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+  void RecordARMScatteredRelocation(MachObjectWriter *Writer,
+                                    const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCFragment *Fragment,
+                                    const MCFixup &Fixup,
+                                    MCValue Target,
+                                    unsigned Log2Size,
+                                    uint64_t &FixedValue);
+  void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+                                   const MCAssembler &Asm,
+                                   const MCAsmLayout &Layout,
+                                   const MCFragment *Fragment,
+                                   const MCFixup &Fixup, MCValue Target,
+                                   uint64_t &FixedValue);
+
+public:
+  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/true) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue);
+};
+}
+
+static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+                              unsigned &Log2Size) {
+  RelocType = unsigned(macho::RIT_Vanilla);
+  Log2Size = ~0U;
+
+  switch (Kind) {
+  default:
+    return false;
+
+  case FK_Data_1:
+    Log2Size = llvm::Log2_32(1);
+    return true;
+  case FK_Data_2:
+    Log2Size = llvm::Log2_32(2);
+    return true;
+  case FK_Data_4:
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  case FK_Data_8:
+    Log2Size = llvm::Log2_32(8);
+    return true;
+
+    // Handle 24-bit branch kinds.
+  case ARM::fixup_arm_ldst_pcrel_12:
+  case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+    // Handle Thumb branches.
+  case ARM::fixup_arm_thumb_br:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(2);
+    return true;
+
+  case ARM::fixup_t2_uncondbranch:
+  case ARM::fixup_arm_thumb_bl:
+  case ARM::fixup_arm_thumb_blx:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movw_lo16_pcrel:
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  }
+}
+
+void ARMMachObjectWriter::
+RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+                            const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup,
+                            MCValue Target,
+                            uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_ARM_Half;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint32_t Value2 = 0;
+  uint64_t SecAddr =
+    Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_ARM_HalfDifference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+  //
+  // For these two r_type relocations they always have a pair following them and
+  // the r_length bits are used differently.  The encoding of the r_length is as
+  // follows:
+  //   low bit of r_length:
+  //      0 - :lower16: for movw instructions
+  //      1 - :upper16: for movt instructions
+  //   high bit of r_length:
+  //      0 - arm instructions
+  //      1 - thumb instructions
+  // the other half of the relocated expression is in the following pair
+  // relocation entry in the the low 16 bits of r_address field.
+  unsigned ThumbBit = 0;
+  unsigned MovtBit = 0;
+  switch ((unsigned)Fixup.getKind()) {
+  default: break;
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+    MovtBit = 1;
+    break;
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    MovtBit = 1;
+    // Fallthrough
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    ThumbBit = 1;
+    break;
+  }
+
+
+  if (Type == macho::RIT_ARM_HalfDifference) {
+    uint32_t OtherHalf = MovtBit
+      ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((OtherHalf       <<  0) |
+                 (macho::RIT_Pair << 24) |
+                 (MovtBit         << 28) |
+                 (ThumbBit        << 29) |
+                 (IsPCRel         << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (MovtBit     << 28) |
+               (ThumbBit    << 29) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
+                                                    const MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    const MCFragment *Fragment,
+                                                    const MCFixup &Fixup,
+                                                    MCValue Target,
+                                                    unsigned Log2Size,
+                                                    uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_Difference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+                                           const MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCFragment *Fragment,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
+                                           uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size;
+  unsigned RelocType = macho::RIT_Vanilla;
+  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+    report_fatal_error("unknown ARM fixup kind!");
+    return;
+  }
+
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry.  Differences always require scattered
+  // relocations.
+  if (Target.getSymB()) {
+    if (RelocType == macho::RIT_ARM_Half ||
+        RelocType == macho::RIT_ARM_HalfDifference)
+      return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                         Target, FixedValue);
+    return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                        Target, Log2Size, FixedValue);
+  }
+
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+  // FIXME: For other platforms, we need to use scattered relocations for
+  // internal relocations with offsets.  If this is an internal relocation with
+  // an offset, it also needs a scattered relocation entry.
+  //
+  // Is this right for ARM?
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel && RelocType == macho::RIT_Vanilla)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+    return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                        Target, Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // FIXME!
+    report_fatal_error("FIXME: relocations to absolute targets "
+                       "not yet implemented");
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, Writer->getSectionAddressMap())) {
+        FixedValue = Res;
+        return;
+      }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (Writer->doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
+    } else {
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&SymSD);
+    }
+    if (IsPCRel)
+      FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+    // The type is determined by the fixup kind.
+    Type = RelocType;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+                                                bool Is64Bit,
+                                                uint32_t CPUType,
+                                                uint32_t CPUSubtype) {
+  return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+                                                        CPUType,
+                                                        CPUSubtype),
+                                OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 000000000000..138f0c262271
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,250 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+  /// isThumb - True if this function is compiled under Thumb mode.
+  /// Used to initialized Align, so must precede it.
+  bool isThumb;
+
+  /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+  /// to determine if function is compiled under Thumb mode, for that use
+  /// 'isThumb'.
+  bool hasThumb2;
+
+  /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+  ///
+  unsigned VarArgsRegSaveSize;
+
+  /// HasStackFrame - True if this function has a stack frame. Set by
+  /// processFunctionBeforeCalleeSavedScan().
+  bool HasStackFrame;
+
+  /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+  /// emitPrologue.
+  bool RestoreSPFromFP;
+
+  /// LRSpilledForFarJump - True if the LR register has been for spilled to
+  /// enable far jump.
+  bool LRSpilledForFarJump;
+
+  /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+  /// spill stack offset.
+  unsigned FramePtrSpillOffset;
+
+  /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+  /// register spills areas. For Mac OS X:
+  ///
+  /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+  /// --------------------------------------------
+  /// GPR callee-saved (2) : r8, r10, r11
+  /// --------------------------------------------
+  /// DPR callee-saved : d8 - d15
+  unsigned GPRCS1Offset;
+  unsigned GPRCS2Offset;
+  unsigned DPRCSOffset;
+
+  /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+  /// areas.
+  unsigned GPRCS1Size;
+  unsigned GPRCS2Size;
+  unsigned DPRCSSize;
+
+  /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+  /// which belong to these spill areas.
+  BitVector GPRCS1Frames;
+  BitVector GPRCS2Frames;
+  BitVector DPRCSFrames;
+
+  /// JumpTableUId - Unique id for jumptables.
+  ///
+  unsigned JumpTableUId;
+
+  unsigned PICLabelUId;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+  /// HasITBlocks - True if IT blocks have been inserted.
+  bool HasITBlocks;
+
+  /// CPEClones - Track constant pool entries clones created by Constant Island
+  /// pass.
+  DenseMap<unsigned, unsigned> CPEClones;
+
+public:
+  ARMFunctionInfo() :
+    isThumb(false),
+    hasThumb2(false),
+    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    LRSpilledForFarJump(false),
+    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+    JumpTableUId(0), PICLabelUId(0),
+    VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+  explicit ARMFunctionInfo(MachineFunction &MF) :
+    isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+    hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    LRSpilledForFarJump(false),
+    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+    JumpTableUId(0), PICLabelUId(0),
+    VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+  bool isThumbFunction() const { return isThumb; }
+  bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
+  bool isThumb2Function() const { return isThumb && hasThumb2; }
+
+  unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+  void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+  bool hasStackFrame() const { return HasStackFrame; }
+  void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+  bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+  void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
+  bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+  void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+  unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+  void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+  unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+  unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+  unsigned getDPRCalleeSavedAreaOffset()  const { return DPRCSOffset; }
+
+  void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+  void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+  void setDPRCalleeSavedAreaOffset(unsigned o)  { DPRCSOffset = o; }
+
+  unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+  unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+  unsigned getDPRCalleeSavedAreaSize()  const { return DPRCSSize; }
+
+  void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+  void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+  void setDPRCalleeSavedAreaSize(unsigned s)  { DPRCSSize = s; }
+
+  bool isGPRCalleeSavedArea1Frame(int fi) const {
+    if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+      return false;
+    return GPRCS1Frames[fi];
+  }
+  bool isGPRCalleeSavedArea2Frame(int fi) const {
+    if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+      return false;
+    return GPRCS2Frames[fi];
+  }
+  bool isDPRCalleeSavedAreaFrame(int fi) const {
+    if (fi < 0 || fi >= (int)DPRCSFrames.size())
+      return false;
+    return DPRCSFrames[fi];
+  }
+
+  void addGPRCalleeSavedArea1Frame(int fi) {
+    if (fi >= 0) {
+      int Size = GPRCS1Frames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        GPRCS1Frames.resize(Size);
+      }
+      GPRCS1Frames[fi] = true;
+    }
+  }
+  void addGPRCalleeSavedArea2Frame(int fi) {
+    if (fi >= 0) {
+      int Size = GPRCS2Frames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        GPRCS2Frames.resize(Size);
+      }
+      GPRCS2Frames[fi] = true;
+    }
+  }
+  void addDPRCalleeSavedAreaFrame(int fi) {
+    if (fi >= 0) {
+      int Size = DPRCSFrames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        DPRCSFrames.resize(Size);
+      }
+      DPRCSFrames[fi] = true;
+    }
+  }
+
+  unsigned createJumpTableUId() {
+    return JumpTableUId++;
+  }
+
+  unsigned getNumJumpTables() const {
+    return JumpTableUId;
+  }
+
+  void initPICLabelUId(unsigned UId) {
+    PICLabelUId = UId;
+  }
+
+  unsigned getNumPICLabels() const {
+    return PICLabelUId;
+  }
+
+  unsigned createPICLabelUId() {
+    return PICLabelUId++;
+  }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+  bool hasITBlocks() const { return HasITBlocks; }
+  void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+  void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+    if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+      assert(0 && "Duplicate entries!");
+  }
+
+  unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+    DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+    if (I != CPEClones.end())
+      return I->second;
+    else
+      return -1U;
+  }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 000000000000..18e162000602
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+  135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+  1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+  2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+  2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+  2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+  2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+  2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+  135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+  2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+  1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+  1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+  2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+  2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+  3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+  2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+  1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+  3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+  3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+  1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+  3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+  3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+  2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+  2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+  2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+  2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+  2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+  2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+  3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+  3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+  3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+  2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+  2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+  3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+  1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+  1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+  2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+  2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+  3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+  3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+  2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+  2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+  2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+  2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+  2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+  2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+  3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+  2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+  3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+  3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+  3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+  2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+  2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+  2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+  2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+  3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+  3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+  2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+  3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+  3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+  2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+  2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+  1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+  1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+  2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+  1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+  1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+  135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+  2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+  1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+  2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+  2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+  4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+  2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+  2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+  1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+  1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+  2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+  2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+  2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+  1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+  2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+  2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+  2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+  1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+  1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+  3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+  2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+  835584U, // <0,1,2,3>: Cost 0 copy LHS
+  1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+  3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+  2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+  1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+  835584U, // <0,1,2,u>: Cost 0 copy LHS
+  2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+  2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+  2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+  2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+  2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+  2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+  2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+  2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+  2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+  2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+  4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+  2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+  2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+  2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+  1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+  1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+  3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+  2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+  3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+  2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+  2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+  2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+  2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+  2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+  2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+  2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+  3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+  2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+  3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+  2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+  3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+  2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+  1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+  1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+  2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+  2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+  2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+  2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+  2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+  3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+  2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+  2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+  2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+  1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+  1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+  835584U, // <0,1,u,3>: Cost 0 copy LHS
+  1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+  1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+  835584U, // <0,1,u,u>: Cost 0 copy LHS
+  2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+  1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+  2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+  2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+  2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+  2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+  2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+  1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+  2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+  2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+  2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+  2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+  2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+  2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+  3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+  2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+  1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+  2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+  2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+  3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+  2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+  2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+  2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+  1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+  2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+  2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+  2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+  2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+  2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+  2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+  4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+  2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+  2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+  2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+  1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+  2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+  2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+  2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+  3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+  3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+  2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+  2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+  2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+  2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+  3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+  2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+  2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+  2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+  3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+  2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+  2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+  2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+  2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+  3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+  2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+  2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+  2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+  3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+  2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+  2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+  1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+  3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+  1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+  2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+  2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+  2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+  2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+  4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+  2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+  4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+  3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+  3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+  2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+  2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+  2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+  2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+  2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+  2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+  4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+  3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+  3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+  2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+  1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+  1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+  2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+  3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+  3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+  2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+  1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+  3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+  2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+  3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+  2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+  3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+  3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+  3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+  3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+  2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+  3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+  2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+  2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+  4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+  3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+  2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+  3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+  3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+  2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+  3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+  3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+  4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+  3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+  2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+  3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+  2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+  2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+  2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+  2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+  3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+  3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+  3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+  3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+  3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+  3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+  2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+  2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+  3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+  2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+  3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+  3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+  3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+  3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+  3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+  3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+  2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+  1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+  1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+  2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+  3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+  3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+  2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+  1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+  2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+  2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+  3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+  2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+  2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+  3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+  3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+  3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+  3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+  2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+  2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+  1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+  3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+  2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+  2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+  2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+  3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+  2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+  2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+  2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+  1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+  3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+  3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+  3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+  3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+  3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+  3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+  4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+  3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+  3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+  2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+  3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+  3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+  3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+  2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+  2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+  2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+  3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+  2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+  2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+  2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+  3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+  2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+  2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+  2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+  1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+  1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+  3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+  2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+  3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+  2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+  3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+  2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+  2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+  2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+  3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+  3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+  3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+  3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+  3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+  2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+  3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+  2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+  2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+  2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+  2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+  1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+  1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+  2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+  1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+  3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+  2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+  3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+  3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+  3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+  2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+  3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+  2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+  1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+  2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+  2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+  2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+  1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+  2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+  2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+  2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+  1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+  2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+  2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+  3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+  2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+  2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+  3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+  4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+  3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+  3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+  3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+  3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+  2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+  3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+  3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+  2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+  2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+  2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+  3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+  3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+  3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+  3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+  2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+  2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+  2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+  3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+  3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+  3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+  3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+  3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+  2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+  2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+  2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+  2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+  2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+  3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+  3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+  3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+  2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+  2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+  3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+  1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+  1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+  2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+  2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+  2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+  3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+  2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+  2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+  2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+  2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+  2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+  1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+  2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+  2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+  1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+  2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+  1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+  1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+  2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+  2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+  3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+  2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+  2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+  2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+  2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+  2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+  3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+  2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+  3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+  2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+  2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+  2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+  2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+  1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+  2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+  2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+  2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+  1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+  2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+  3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+  2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+  1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+  3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+  3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+  3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+  3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+  2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+  3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+  3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+  2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+  2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+  2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+  4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+  2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+  3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+  2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+  2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+  4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+  2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+  2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+  3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+  3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+  3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+  3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+  3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+  4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+  2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+  3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+  3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+  3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+  3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+  3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+  2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+  2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+  2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+  2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+  2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+  3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+  2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+  2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+  2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+  3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+  2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+  2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+  1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+  2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+  2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+  1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+  2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+  2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+  1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+  2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+  2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+  3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+  2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+  2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+  2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+  3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+  2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+  3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+  2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+  3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+  2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+  3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+  3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+  2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+  2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+  2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+  1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+  3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+  2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+  2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+  2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+  2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+  3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+  1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+  3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+  3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+  3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+  3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+  3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+  3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+  2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+  2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+  3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+  2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+  2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+  3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+  3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+  2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+  3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+  2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+  3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+  3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+  3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+  3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+  2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+  3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+  2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+  2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+  2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+  3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+  2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+  3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+  3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+  3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+  2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+  3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+  2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+  2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+  3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+  3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+  3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+  3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+  3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+  3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+  3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+  2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+  2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+  2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+  1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+  2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+  2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+  2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+  2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+  2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+  1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+  135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+  1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+  2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+  1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+  2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+  3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+  135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+  1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+  1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+  1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+  1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+  2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+  2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+  1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+  1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+  1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+  835584U, // <0,u,2,3>: Cost 0 copy LHS
+  1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+  3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+  1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+  1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+  835584U, // <0,u,2,u>: Cost 0 copy LHS
+  2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+  2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+  2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+  2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+  2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+  2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+  2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+  1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+  2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+  1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+  2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+  2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+  3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+  2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+  2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+  2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+  1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+  1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+  2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+  2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+  2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+  2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+  2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+  2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+  1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+  1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+  2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+  2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+  2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+  2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+  2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+  2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+  2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+  2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+  2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+  135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+  1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+  835584U, // <0,u,u,3>: Cost 0 copy LHS
+  1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+  1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+  1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+  835584U, // <0,u,u,u>: Cost 0 copy LHS
+  2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+  1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+  2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+  2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+  2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+  2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+  3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+  3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+  1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+  2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+  2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+  1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+  2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+  2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+  2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+  3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+  1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+  2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+  2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+  2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+  2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+  2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+  3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+  2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+  2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+  2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+  3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+  67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+  2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+  2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+  4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+  3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+  2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+  68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+  2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+  2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+  2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+  3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+  3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+  2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+  3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+  2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+  4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+  2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+  3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+  2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+  3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+  3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+  3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+  3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+  2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+  3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+  3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+  3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+  3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+  3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+  2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+  2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+  2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+  4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+  2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+  3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+  3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+  3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+  2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+  3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+  2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+  3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+  67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+  2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+  2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+  2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+  2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+  68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+  1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+  1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+  2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+  2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+  2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+  2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+  3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+  3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+  1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+  1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+  2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+  2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+  1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+  2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+  2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+  202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+  2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+  2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+  2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+  2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+  2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+  3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+  2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+  3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+  2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+  2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+  3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+  4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+  2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+  2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+  2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+  3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+  2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+  2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+  2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+  2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+  3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+  3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+  2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+  1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+  3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+  1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+  2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+  2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+  4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+  2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+  2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+  2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+  2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+  2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+  3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+  2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+  2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+  3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+  3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+  4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+  2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+  2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+  2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+  2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+  2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+  4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+  2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+  3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+  3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+  2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+  2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+  2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+  2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+  1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+  2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+  2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+  1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+  2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+  2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+  2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+  3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+  2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+  2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+  1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+  2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+  2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+  2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+  2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+  2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+  2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+  2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+  3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+  2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+  2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+  2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+  2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+  2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+  3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+  3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+  2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+  2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+  2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+  403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+  1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+  403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+  1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+  2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+  3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+  3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+  2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+  2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+  1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+  2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+  1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+  2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+  2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+  3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+  2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+  2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+  2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+  2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+  2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+  2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+  2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+  2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+  2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+  3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+  3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+  2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+  2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+  2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+  1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+  2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+  3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+  2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+  2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+  3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+  3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+  2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+  1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+  403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+  1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+  403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+  1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+  2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+  1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+  2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+  2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+  2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+  3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+  3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+  3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+  1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+  2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+  2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+  2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+  1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+  2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+  2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+  3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+  2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+  1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+  2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+  2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+  2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+  2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+  1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+  2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+  2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+  1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+  2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+  3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+  1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+  2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+  2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+  3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+  2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+  2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+  1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+  3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+  1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+  2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+  2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+  2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+  2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+  2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+  2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+  2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+  1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+  3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+  2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+  3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+  2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+  3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+  2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+  2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+  2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+  3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+  2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+  2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+  2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+  3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+  2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+  2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+  1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+  1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+  2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+  1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+  1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+  1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+  1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+  1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+  2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+  2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+  2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+  1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+  2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+  3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+  1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+  3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+  2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+  3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+  3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+  3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+  2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+  3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+  2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+  3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+  3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+  3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+  2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+  3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+  2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+  3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+  2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+  2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+  2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+  2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+  3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+  2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+  3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+  3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+  3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+  3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+  2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+  3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+  3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+  3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+  2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+  2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+  2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+  3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+  2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+  2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+  2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+  2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+  3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+  2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+  2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+  1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+  2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+  3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+  2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+  2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+  3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+  3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+  2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+  2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+  3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+  3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+  3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+  3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+  3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+  2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+  3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+  2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+  2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+  2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+  2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+  2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+  1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+  1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+  1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+  3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+  1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+  2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+  3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+  1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+  2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+  2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+  2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+  2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+  2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+  2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+  3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+  2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+  3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+  2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+  2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+  2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+  3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+  3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+  2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+  4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+  2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+  2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+  2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+  3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+  2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+  2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+  3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+  4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+  2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+  2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+  3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+  2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+  1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+  2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+  1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+  2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+  2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+  3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+  4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+  2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+  2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+  2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+  2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+  2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+  2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+  2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+  2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+  3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+  3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+  3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+  2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+  2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+  2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+  2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+  2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+  2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+  2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+  2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+  2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+  3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+  2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+  2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+  2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+  1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+  2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+  1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+  1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+  2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+  2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+  3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+  2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+  3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+  2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+  2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+  2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+  4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+  2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+  3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+  2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+  3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+  3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+  2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+  3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+  3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+  2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+  2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+  3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+  3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+  3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+  3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+  3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+  2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+  2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+  2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+  3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+  2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+  3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+  2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+  4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+  2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+  3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+  3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+  3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+  3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+  3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+  3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+  2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+  4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+  2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+  3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+  3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+  3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+  3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+  3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+  3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+  2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+  2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+  2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+  2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+  3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+  3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+  3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+  3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+  3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+  2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+  2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+  2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+  1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+  2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+  2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+  3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+  2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+  2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+  2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+  4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+  1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+  1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+  2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+  3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+  2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+  2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+  3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+  2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+  2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+  3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+  2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+  3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+  2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+  2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+  2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+  2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+  2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+  3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+  2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+  2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+  2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+  2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+  2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+  2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+  2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+  3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+  3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+  2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+  2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+  3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+  2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+  3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+  2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+  1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+  2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+  2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+  2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+  1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+  1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+  2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+  2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+  1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+  2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+  3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+  3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+  3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+  3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+  2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+  3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+  3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+  2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+  2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+  2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+  3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+  2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+  2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+  4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+  2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+  3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+  2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+  2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+  2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+  3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+  3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+  3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+  3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+  3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+  2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+  2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+  2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+  3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+  3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+  3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+  3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+  2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+  3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+  2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+  2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+  1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+  2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+  2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+  1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+  1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+  2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+  2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+  1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+  1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+  1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+  2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+  2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+  1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+  1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+  2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+  2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+  1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+  1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+  202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+  1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+  1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+  3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+  2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+  2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+  1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+  2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+  403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+  1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+  1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+  403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+  2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+  2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+  2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+  2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+  2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+  1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+  2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+  1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+  2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+  2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+  3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+  2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+  1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+  2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+  2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+  2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+  2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+  2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+  3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+  2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+  2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+  2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+  2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+  2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+  2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+  2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+  2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+  2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+  1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+  403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+  202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+  115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+  1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+  403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+  1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+  403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+  2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+  2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+  1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+  2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+  2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+  3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+  2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+  4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+  1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+  2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+  2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+  1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+  2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+  3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+  2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+  3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+  1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+  2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+  2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+  2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+  1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+  2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+  2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+  2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+  1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+  2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+  4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+  3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+  3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+  4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+  3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+  2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+  2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+  2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+  2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+  3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+  2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+  2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+  2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+  3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+  3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+  2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+  3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+  2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+  3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+  3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+  2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+  4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+  2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+  2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+  3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+  2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+  3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+  3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+  3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+  2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+  2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+  2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+  3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+  3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+  3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+  3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+  3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+  2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+  2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+  1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+  2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+  2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+  3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+  1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+  2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+  2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+  3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+  2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+  1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+  2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+  2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+  2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+  2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+  2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+  3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+  3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+  3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+  2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+  2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+  3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+  2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+  2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+  2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+  3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+  3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+  2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+  2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+  2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+  2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+  2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+  2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+  2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+  2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+  3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+  3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+  1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+  3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+  2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+  3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+  3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+  1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+  2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+  2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+  3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+  2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+  2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+  3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+  3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+  3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+  2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+  2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+  3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+  2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+  3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+  2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+  4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+  3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+  3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+  2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+  2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+  3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+  3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+  2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+  3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+  3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+  3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+  3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+  2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+  2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+  2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+  2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+  2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+  1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+  1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+  1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+  2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+  2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+  3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+  3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+  3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+  1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+  2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+  2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+  2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+  2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+  3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+  2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+  3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+  3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+  2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+  1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+  269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+  2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+  1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+  2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+  2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+  269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+  2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+  2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+  2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+  1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+  2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+  3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+  2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+  1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+  2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+  3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+  2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+  3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+  2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+  1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+  3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+  1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+  3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+  2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+  2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+  4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+  2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+  2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+  2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+  2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+  3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+  3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+  2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+  2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+  3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+  3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+  3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+  2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+  2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+  2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+  3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+  2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+  4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+  2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+  3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+  3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+  2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+  2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+  1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+  1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+  1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+  1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+  1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+  1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+  470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+  1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+  2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+  1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+  2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+  2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+  1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+  1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+  1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+  1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+  1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+  2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+  2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+  1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+  1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+  2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+  1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+  470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+  470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+  2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+  1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+  2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+  1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+  2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+  1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+  2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+  1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+  1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+  2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+  2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+  1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+  470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+  1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+  1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+  470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+  1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+  470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+  2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+  2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+  2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+  2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+  2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+  1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+  1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+  3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+  2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+  2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+  3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+  3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+  3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+  3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+  2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+  2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+  3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+  2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+  2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+  2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+  2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+  3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+  3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+  2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+  2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+  4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+  3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+  3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+  3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+  2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+  2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+  3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+  3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+  3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+  2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+  2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+  2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+  4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+  2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+  2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+  2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+  2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+  2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+  2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+  3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+  1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+  2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+  2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+  2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+  1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+  2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+  2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+  1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+  2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+  3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+  3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+  3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+  2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+  2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+  3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+  2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+  2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+  2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+  1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+  2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+  2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+  2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+  2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+  2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+  3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+  3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+  3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+  2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+  2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+  2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+  3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+  3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+  2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+  2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+  3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+  4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+  2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+  3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+  2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+  3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+  3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+  3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+  2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+  2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+  2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+  3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+  2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+  2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+  2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+  2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+  3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+  2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+  2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+  2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+  2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+  3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+  2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+  3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+  3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+  3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+  2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+  2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+  4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+  2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+  2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+  2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+  3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+  2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+  3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+  2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+  3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+  4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+  3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+  3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+  2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+  2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+  2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+  3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+  2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+  2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+  2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+  4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+  2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+  2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+  2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+  2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+  2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+  2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+  1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+  3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+  1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+  3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+  2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+  2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+  1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+  2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+  2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+  2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+  2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+  3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+  2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+  2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+  2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+  2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+  2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+  2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+  2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+  2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+  2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+  2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+  2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+  3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+  2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+  2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+  2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+  3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+  3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+  1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+  2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+  2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+  2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+  3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+  2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+  1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+  4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+  1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+  3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+  2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+  2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+  3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+  2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+  2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+  2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+  2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+  3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+  2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+  3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+  2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+  3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+  2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+  2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+  2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+  2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+  2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+  2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+  2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+  2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+  3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+  2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+  2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+  2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+  2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+  1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+  2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+  1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+  1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+  1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+  2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+  1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+  2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+  2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+  2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+  2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+  2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+  2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+  1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+  2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+  3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+  3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+  3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+  3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+  2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+  3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+  2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+  3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+  2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+  3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+  3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+  2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+  2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+  3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+  2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+  1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+  2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+  2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+  2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+  1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+  2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+  1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+  2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+  1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+  2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+  3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+  3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+  2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+  3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+  2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+  3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+  2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+  2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+  3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+  3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+  3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+  2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+  3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+  3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+  2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+  2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+  2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+  3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+  2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+  3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+  2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+  2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+  2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+  3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+  2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+  2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+  3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+  3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+  3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+  2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+  3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+  2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+  2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+  2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+  1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+  1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+  2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+  2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+  1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+  2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+  2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+  1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+  1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+  1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+  3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+  1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+  1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+  2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+  1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+  2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+  1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+  1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+  1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+  1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+  1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+  1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+  1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+  1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+  1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+  2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+  1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+  1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+  470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+  1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+  2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+  1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+  2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+  1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+  2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+  1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+  1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+  2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+  1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+  2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+  2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+  1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+  470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+  269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+  1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+  1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+  470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+  1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+  470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+  1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+  2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+  3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+  3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+  3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+  1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+  1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+  2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+  537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+  2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+  1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+  2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+  2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+  2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+  537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+  2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+  2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+  2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+  1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+  2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+  2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+  2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+  3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+  2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+  3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+  2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+  2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+  1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+  2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+  1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+  3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+  1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+  2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+  3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+  3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+  2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+  2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+  2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+  2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+  3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+  2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+  2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+  2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+  3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+  3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+  3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+  2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+  2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+  2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+  2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+  2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+  2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+  3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+  2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+  3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+  3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+  2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+  2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+  1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+  1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+  537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+  2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+  2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+  537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+  2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+  2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+  2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+  1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+  2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+  3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+  3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+  1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+  1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+  2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+  1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+  2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+  3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+  1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+  2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+  2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+  1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+  2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+  2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+  2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+  1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+  1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+  1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+  1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+  1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+  1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+  2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+  2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+  2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+  2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+  2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+  2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+  3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+  2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+  2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+  2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+  2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+  1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+  3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+  3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+  2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+  2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+  2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+  2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+  2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+  2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+  3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+  2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+  4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+  3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+  2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+  3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+  3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+  3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+  1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+  2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+  1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+  1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+  2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+  1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+  2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+  2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+  2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+  1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+  1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+  2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+  2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+  2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+  2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+  2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+  1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+  2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+  2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+  1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+  2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+  3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+  1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+  1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+  2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+  2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+  2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+  1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+  2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+  2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+  2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+  1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+  2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+  2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+  2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+  2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+  1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+  2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+  1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+  2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+  2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+  2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+  2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+  2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+  1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+  2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+  2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+  2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+  1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+  2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+  1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+  2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+  4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+  2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+  2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+  2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+  2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+  2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+  2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+  1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+  1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+  1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+  2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+  1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+  2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+  1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+  2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+  2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+  2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+  2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+  3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+  4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+  1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+  2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+  1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+  2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+  2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+  2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+  2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+  3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+  2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+  1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+  2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+  2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+  1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+  2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+  2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+  2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+  2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+  2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+  1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+  1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+  2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+  336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+  1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+  2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+  2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+  336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+  2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+  2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+  2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+  2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+  2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+  1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+  2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+  4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+  1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+  2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+  2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+  2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+  2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+  2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+  3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+  2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+  2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+  3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+  2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+  2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+  2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+  3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+  2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+  2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+  2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+  2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+  2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+  2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+  2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+  2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+  2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+  3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+  3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+  2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+  1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+  1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+  336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+  1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+  2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+  2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+  2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+  1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+  3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+  2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+  1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+  1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+  3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+  1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+  2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+  1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+  2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+  2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+  2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+  2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+  1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+  3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+  2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+  2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+  2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+  2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+  2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+  2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+  2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+  2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+  2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+  2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+  2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+  2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+  2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+  2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+  3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+  3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+  2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+  2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+  2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+  2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+  2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+  3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+  1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+  1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+  2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+  2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+  1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+  2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+  537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+  2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+  537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+  2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+  2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+  2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+  2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+  1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+  2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+  1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+  2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+  3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+  2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+  3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+  2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+  2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+  2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+  2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+  2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+  1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+  1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+  2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+  1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+  1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+  2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+  537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+  3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+  2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+  2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+  3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+  2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+  2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+  2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+  2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+  2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+  3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+  2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+  2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+  2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+  2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+  3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+  1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+  1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+  3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+  2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+  3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+  2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+  2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+  2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+  3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+  2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+  2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+  3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+  2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+  3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+  2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+  2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+  2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+  4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+  3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+  3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+  2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+  3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+  2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+  3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+  2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+  2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+  2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+  1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+  2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+  2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+  2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+  2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+  2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+  1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+  1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+  1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+  2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+  2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+  2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+  2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+  2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+  2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+  2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+  1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+  1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+  1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+  2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+  2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+  1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+  1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+  2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+  1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+  1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+  2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+  2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+  1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+  1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+  2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+  1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+  2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+  2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+  2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+  3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+  2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+  2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+  2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+  2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+  3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+  3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+  2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+  3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+  2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+  3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+  2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+  2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+  2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+  2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+  2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+  2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+  3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+  2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+  2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+  2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+  1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+  1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+  3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+  3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+  3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+  2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+  2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+  3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+  3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+  2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+  3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+  2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+  2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+  3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+  2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+  2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+  2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+  2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+  2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+  2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+  3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+  2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+  2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+  3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+  4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+  2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+  2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+  2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+  2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+  2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+  3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+  2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+  2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+  1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+  1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+  1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+  2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+  2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+  2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+  1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+  2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+  2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+  2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+  1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+  1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+  2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+  2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+  2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+  1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+  2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+  1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+  1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+  2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+  1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+  3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+  2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+  2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+  2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+  2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+  1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+  2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+  2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+  2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+  2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+  1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+  2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+  2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+  1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+  2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+  3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+  2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+  2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+  2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+  2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+  1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+  2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+  1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+  2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+  2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+  2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+  2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+  2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+  2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+  2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+  2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+  2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+  2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+  2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+  2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+  3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+  2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+  1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+  2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+  1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+  2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+  2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+  3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+  2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+  2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+  2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+  2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+  2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+  2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+  2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+  2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+  2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+  2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+  2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+  2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+  2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+  2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+  2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+  2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+  2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+  2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+  2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+  2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+  1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+  2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+  1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+  2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+  2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+  1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+  1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+  1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+  1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+  1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+  1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+  2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+  1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+  1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+  537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+  1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+  1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+  1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+  2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+  1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+  537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+  1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+  1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+  1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+  1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+  1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+  1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+  1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+  1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+  2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+  336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+  1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+  1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+  2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+  2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+  1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+  1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+  1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+  1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+  1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+  1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+  1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+  2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+  1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+  1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+  1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+  1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+  537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+  2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+  2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+  2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+  1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+  1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+  1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+  1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+  1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+  2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+  3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+  1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+  1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+  1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+  1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+  537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+  336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+  1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+  1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+  537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+  1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+  537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+  2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+  2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+  2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+  3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+  2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+  3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+  3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+  3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+  2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+  2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+  2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+  1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+  2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+  2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+  3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+  2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+  1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+  3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+  2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+  2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+  2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+  3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+  2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+  2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+  3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+  2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+  3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+  3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+  3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+  3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+  3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+  2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+  2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+  2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+  3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+  2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+  2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+  3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+  2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+  2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+  1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+  2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+  2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+  2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+  3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+  3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+  2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+  1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+  2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+  3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+  1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+  2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+  2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+  3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+  2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+  1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+  3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+  3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+  4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+  3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+  3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+  3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+  3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+  3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+  4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+  2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+  1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+  1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+  2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+  2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+  1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+  2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+  3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+  2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+  2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+  3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+  3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+  3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+  2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+  3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+  2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+  2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+  2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+  3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+  3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+  3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+  3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+  2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+  2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+  2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+  3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+  1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+  2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+  3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+  3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+  3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+  1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+  2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+  2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+  2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+  2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+  2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+  2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+  2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+  2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+  3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+  3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+  3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+  3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+  3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+  2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+  3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+  2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+  1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+  2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+  2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+  1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+  2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+  2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+  2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+  4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+  3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+  2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+  2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+  4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+  3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+  2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+  2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+  3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+  2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+  3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+  3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+  3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+  3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+  2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+  2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+  2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+  1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+  1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+  2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+  2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+  2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+  2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+  3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+  2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+  3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+  2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+  3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+  2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+  3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+  3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+  3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+  3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+  2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+  3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+  3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+  2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+  3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+  2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+  2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+  3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+  3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+  3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+  3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+  2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+  2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+  3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+  3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+  2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+  2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+  2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+  3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+  2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+  2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+  2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+  2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+  2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+  2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+  2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+  2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+  3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+  2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+  2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+  2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+  2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+  2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+  2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+  3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+  2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+  3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+  2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+  1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+  2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+  2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+  2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+  1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+  2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+  2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+  2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+  1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+  2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+  3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+  2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+  4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+  3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+  3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+  3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+  2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+  2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+  2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+  2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+  1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+  2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+  2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+  1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+  3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+  2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+  2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+  3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+  3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+  2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+  3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+  3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+  2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+  2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+  3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+  2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+  2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+  3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+  3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+  3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+  3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+  2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+  3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+  2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+  3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+  2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+  3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+  2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+  3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+  3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+  2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+  3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+  3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+  3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+  2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+  3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+  3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+  3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+  2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+  2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+  2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+  3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+  2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+  2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+  2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+  3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+  2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+  2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+  2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+  2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+  2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+  4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+  3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+  2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+  2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+  2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+  2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+  2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+  2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+  1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+  4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+  2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+  1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+  3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+  3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+  3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+  3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+  3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+  3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+  3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+  3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+  3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+  2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+  2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+  2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+  2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+  2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+  1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+  2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+  2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+  1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+  2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+  1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+  2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+  2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+  2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+  3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+  1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+  2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+  2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+  2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+  2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+  2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+  2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+  3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+  3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+  2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+  3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+  3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+  2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+  2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+  2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+  3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+  2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+  3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+  2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+  2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+  3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+  3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+  2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+  3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+  3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+  3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+  2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+  2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+  2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+  2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+  161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+  1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+  2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+  2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+  161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+  2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+  2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+  3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+  2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+  2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+  1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+  1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+  1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+  3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+  2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+  2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+  2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+  2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+  1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+  1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+  2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+  3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+  3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+  3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+  2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+  2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+  2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+  2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+  1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+  161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+  1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+  1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+  161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+  2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+  1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+  2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+  2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+  3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+  2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+  2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+  1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+  2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+  2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+  2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+  3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+  2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+  3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+  2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+  2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+  2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+  3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+  2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+  1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+  2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+  3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+  2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+  4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+  1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+  2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+  3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+  2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+  2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+  2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+  2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+  2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+  2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+  2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+  2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+  3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+  2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+  2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+  2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+  1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+  2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+  1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+  1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+  2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+  2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+  2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+  1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+  2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+  2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+  2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+  1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+  1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+  2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+  2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+  1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+  1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+  3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+  2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+  27705344U, // <4,5,6,7>: Cost 0 copy RHS
+  27705344U, // <4,5,6,u>: Cost 0 copy RHS
+  2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+  2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+  2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+  2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+  2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+  2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+  2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+  2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+  2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+  1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+  1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+  1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+  1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+  1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+  27705344U, // <4,5,u,7>: Cost 0 copy RHS
+  27705344U, // <4,5,u,u>: Cost 0 copy RHS
+  2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+  1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+  3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+  2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+  3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+  2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+  4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+  1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+  2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+  2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+  2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+  3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+  2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+  3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+  4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+  2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+  2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+  2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+  2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+  2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+  2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+  2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+  2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+  2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+  2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+  2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+  2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+  3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+  2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+  2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+  2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+  3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+  2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+  2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+  2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+  2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+  2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+  2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+  2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+  1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+  2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+  1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+  2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+  2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+  2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+  3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+  2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+  2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+  2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+  2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+  2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+  1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+  2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+  2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+  2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+  1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+  2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+  2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+  2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+  1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+  2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+  2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+  3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+  2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+  2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+  2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+  2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+  1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+  1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+  2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+  1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+  1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+  2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+  1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+  2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+  3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+  3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+  2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+  3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+  3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+  2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+  2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+  3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+  2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+  3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+  3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+  3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+  3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+  3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+  2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+  3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+  3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+  3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+  2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+  3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+  2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+  3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+  3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+  2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+  3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+  3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+  3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+  3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+  3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+  2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+  2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+  3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+  2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+  2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+  3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+  3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+  3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+  2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+  2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+  2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+  3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+  2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+  2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+  3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+  2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+  3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+  2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+  2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+  2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+  2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+  2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+  1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+  3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+  2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+  2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+  1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+  1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+  2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+  3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+  3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+  3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+  3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+  3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+  3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+  2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+  3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+  2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+  2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+  1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+  2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+  2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+  1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+  1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+  2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+  3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+  2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+  1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+  2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+  2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+  2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+  2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+  2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+  1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+  2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+  2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+  1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+  2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+  2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+  3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+  2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+  1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+  2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+  2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+  1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+  2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+  2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+  2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+  2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+  1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+  2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+  2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+  2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+  2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+  2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+  2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+  2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+  2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+  1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+  2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+  2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+  2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+  161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+  1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+  2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+  161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+  1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+  2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+  3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+  1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+  1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+  1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+  2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+  1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+  1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+  1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+  1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+  1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+  27705344U, // <4,u,6,7>: Cost 0 copy RHS
+  27705344U, // <4,u,6,u>: Cost 0 copy RHS
+  2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+  2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+  2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+  2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+  2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+  2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+  2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+  1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+  1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+  161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+  1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  27705344U, // <4,u,u,7>: Cost 0 copy RHS
+  27705344U, // <4,u,u,u>: Cost 0 copy RHS
+  2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+  2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+  2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+  3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+  2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+  3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+  3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+  3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+  2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+  2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+  2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+  1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+  2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+  2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+  3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+  3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+  1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+  2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+  2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+  2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+  2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+  2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+  2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+  3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+  3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+  2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+  3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+  2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+  3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+  3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+  3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+  2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+  2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+  1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+  3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+  2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+  2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+  2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+  2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+  3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+  3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+  3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+  2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+  3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+  3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+  2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+  4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+  2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+  3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+  3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+  3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+  3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+  2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+  2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+  2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+  2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+  2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+  3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+  2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+  2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+  2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+  4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+  2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+  2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+  1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+  1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+  2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+  2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+  2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+  1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+  2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+  2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+  2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+  3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+  3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+  1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+  2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+  2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+  2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+  2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+  2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+  2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+  3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+  3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+  2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+  3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+  3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+  2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+  2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+  2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+  2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+  2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+  3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+  2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+  2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+  2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+  3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+  2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+  2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+  2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+  3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+  2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+  2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+  1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+  2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+  2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+  2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+  2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+  1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+  3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+  1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+  2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+  2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+  3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+  2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+  2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+  2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+  2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+  2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+  2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+  2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+  3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+  2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+  3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+  2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+  2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+  2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+  2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+  2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+  2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+  2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+  2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+  2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+  2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+  4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+  2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+  2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+  1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+  2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+  2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+  1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+  2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+  2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+  3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+  2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+  3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+  3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+  3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+  3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+  3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+  2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+  2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+  3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+  3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+  2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+  3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+  3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+  3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+  3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+  2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+  3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+  3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+  2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+  2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+  2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+  3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+  3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+  3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+  2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+  2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+  2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+  2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+  3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+  1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+  2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+  3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+  3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+  1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+  2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+  3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+  2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+  2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+  2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+  2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+  3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+  2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+  2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+  3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+  3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+  2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+  2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+  2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+  3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+  3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+  2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+  3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+  3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+  2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+  2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+  2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+  3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+  3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+  3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+  2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+  2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+  2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+  2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+  2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+  2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+  4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+  3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+  3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+  2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+  2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+  2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+  2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+  1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+  2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+  3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+  1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+  3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+  2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+  3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+  3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+  2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+  3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+  3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+  2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+  2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+  3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+  3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+  2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+  2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+  3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+  2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+  3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+  2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+  2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+  3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+  3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+  3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+  2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+  2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+  2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+  3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+  3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+  2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+  3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+  2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+  3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+  2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+  2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+  2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+  3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+  3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+  2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+  2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+  3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+  2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+  2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+  2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+  2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+  3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+  2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+  2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+  2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+  2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+  2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+  2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+  2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+  2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+  3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+  2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+  2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+  2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+  3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+  2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+  2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+  2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+  3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+  4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+  2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+  2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+  1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+  2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+  2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+  1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+  2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+  2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+  3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+  1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+  1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+  2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+  2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+  1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+  2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+  2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+  2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+  1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+  3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+  2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+  3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+  3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+  2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+  2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+  2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+  3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+  2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+  2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+  3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+  3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+  3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+  3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+  2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+  3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+  3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+  2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+  3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+  3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+  3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+  2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+  3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+  2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+  3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+  3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+  2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+  3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+  3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+  2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+  3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+  2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+  3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+  2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+  3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+  2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+  2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+  2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+  3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+  3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+  2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+  1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+  2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+  2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+  1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+  2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+  2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+  2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+  3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+  2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+  2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+  1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+  1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+  2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+  2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+  3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+  2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+  2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+  2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+  3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+  2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+  2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+  2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+  2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+  2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+  2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+  2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+  3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+  94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+  2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+  94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+  2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+  2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+  2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+  2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+  2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+  1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+  94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+  2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+  94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+  2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+  1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+  3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+  2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+  2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+  3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+  4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+  1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+  2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+  2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+  2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+  2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+  2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+  3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+  2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+  2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+  3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+  3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+  2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+  2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+  3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+  2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+  2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+  4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+  2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+  2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+  3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+  3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+  2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+  2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+  2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+  3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+  3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+  2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+  2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+  2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+  3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+  3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+  1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+  2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+  2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+  1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+  1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+  2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+  2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+  1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+  2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+  2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+  229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+  2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+  3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+  2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+  3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+  2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+  2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+  2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+  2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+  2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+  2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+  3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+  2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+  2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+  2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+  3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+  4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+  2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+  2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+  1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+  2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+  1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+  2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+  2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+  229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+  2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+  1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+  2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+  2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+  3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+  3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+  4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+  1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+  2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+  2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+  2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+  2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+  2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+  2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+  3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+  2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+  3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+  3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+  2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+  2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+  2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+  2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+  2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+  2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+  2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+  2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+  3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+  3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+  2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+  1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+  2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+  3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+  4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+  1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+  2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+  3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+  2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+  2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+  2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+  1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+  2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+  1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+  2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+  2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+  3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+  3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+  2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+  2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+  2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+  2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+  2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+  2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+  3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+  2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+  2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+  2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+  2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+  2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+  2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+  430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+  1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+  1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+  1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+  1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+  430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+  430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+  1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+  1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+  430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+  2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+  1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+  2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+  2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+  2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+  3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+  2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+  1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+  2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+  2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+  1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+  2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+  2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+  2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+  3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+  1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+  2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+  3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+  2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+  2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+  2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+  2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+  2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+  3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+  2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+  2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+  2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+  3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+  2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+  2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+  2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+  3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+  2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+  2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+  2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+  2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+  3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+  2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+  2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+  1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+  1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+  2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+  2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+  2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+  2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+  2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+  2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+  2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+  1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+  1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+  2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+  3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+  2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+  2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+  2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+  2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+  2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+  2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+  1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+  2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+  2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+  2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+  1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+  1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+  2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+  2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+  1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+  1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+  1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+  1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+  1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+  1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+  1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+  1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+  1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+  2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+  2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+  2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+  2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+  2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+  1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+  2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+  1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+  2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+  2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+  2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+  2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+  2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+  2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+  2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+  2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+  2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+  2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+  2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+  2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+  2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+  2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+  2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+  1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+  2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+  2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+  2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+  1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+  1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+  1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+  2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+  2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+  1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+  2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+  1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+  1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+  2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+  2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+  1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+  229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+  1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+  229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+  2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+  2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+  2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+  2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+  2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+  2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+  430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+  1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+  1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+  430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+  1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+  2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+  430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+  430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+  1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+  430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+  229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+  118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+  1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+  430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+  2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+  2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+  2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+  3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+  2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+  3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+  3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+  3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+  2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+  2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+  3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+  1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+  2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+  2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+  2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+  2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+  1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+  2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+  3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+  2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+  1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+  3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+  2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+  3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+  1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+  3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+  2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+  2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+  3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+  2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+  3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+  3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+  3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+  2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+  2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+  2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+  1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+  3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+  2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+  2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+  2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+  1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+  3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+  2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+  3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+  3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+  2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+  3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+  2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+  2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+  2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+  3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+  3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+  2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+  3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+  2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+  2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+  2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+  2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+  2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+  2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+  2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+  2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+  4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+  3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+  4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+  2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+  2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+  2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+  1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+  1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+  2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+  2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+  2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+  3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+  2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+  2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+  3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+  2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+  2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+  2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+  3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+  2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+  3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+  2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+  2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+  3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+  3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+  3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+  2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+  2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+  3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+  3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+  2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+  2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+  3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+  2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+  3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+  2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+  2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+  2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+  2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+  3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+  2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+  2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+  2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+  3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+  2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+  2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+  2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+  3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+  2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+  3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+  2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+  3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+  2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+  2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+  2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+  3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+  3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+  2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+  2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+  3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+  2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+  3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+  2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+  2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+  3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+  3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+  3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+  3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+  2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+  3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+  3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+  2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+  3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+  2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+  2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+  3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+  4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+  2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+  2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+  2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+  2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+  2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+  2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+  2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+  2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+  2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+  2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+  1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+  3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+  2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+  3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+  2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+  3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+  1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+  2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+  2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+  2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+  2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+  3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+  2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+  2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+  3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+  2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+  3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+  3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+  2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+  2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+  2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+  3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+  2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+  3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+  2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+  2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+  3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+  2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+  2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+  2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+  2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+  2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+  2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+  2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+  1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+  2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+  2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+  2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+  1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+  3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+  1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+  3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+  2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+  3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+  2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+  2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+  2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+  2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+  2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+  2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+  2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+  2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+  2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+  2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+  2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+  2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+  2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+  2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+  2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+  2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+  2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+  1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+  2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+  3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+  2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+  1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+  1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+  1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+  1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+  2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+  1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+  2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+  3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+  2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+  2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+  3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+  2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+  2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+  3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+  2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+  2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+  3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+  3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+  3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+  3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+  2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+  3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+  3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+  2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+  2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+  3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+  3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+  3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+  2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+  3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+  2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+  3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+  2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+  2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+  2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+  3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+  3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+  3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+  2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+  2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+  2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+  2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+  2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+  2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+  1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+  2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+  2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+  1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+  3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+  3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+  3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+  3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+  2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+  3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+  3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+  3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+  2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+  2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+  2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+  2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+  3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+  2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+  3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+  2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+  3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+  2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+  1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+  2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+  1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+  2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+  1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+  2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+  2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+  2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+  1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+  2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+  1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+  2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+  1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+  1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+  2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+  2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+  3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+  2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+  3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+  2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+  2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+  2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+  3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+  2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+  2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+  3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+  3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+  2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+  3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+  3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+  2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+  3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+  2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+  2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+  3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+  3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+  3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+  2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+  2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+  2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+  3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+  2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+  3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+  3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+  3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+  3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+  2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+  3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+  2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+  3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+  2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+  2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+  3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+  2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+  3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+  2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+  2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+  1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+  3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+  2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+  2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+  2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+  2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+  1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+  2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+  2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+  1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+  2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+  2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+  2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+  1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+  2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+  2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+  3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+  2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+  2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+  2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+  2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+  4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+  2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+  1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+  2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+  1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+  2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+  2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+  3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+  3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+  3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+  4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+  3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+  1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+  1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+  2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+  3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+  3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+  3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+  3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+  3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+  3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+  2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+  2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+  2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+  3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+  3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+  3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+  2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+  3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+  2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+  3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+  2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+  3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+  3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+  3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+  3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+  2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+  3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+  3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+  2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+  2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+  2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+  3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+  3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+  2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+  2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+  2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+  1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+  1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+  2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+  3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+  3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+  3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+  2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+  2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+  2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+  2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+  2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+  2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+  3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+  3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+  2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+  2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+  3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+  2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+  2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+  2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+  2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+  2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+  3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+  2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+  2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+  2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+  2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+  2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+  2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+  2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+  2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+  2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+  1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+  2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+  1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+  3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+  2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+  3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+  2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+  4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+  1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+  2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+  2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+  2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+  2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+  2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+  2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+  3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+  2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+  3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+  2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+  2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+  3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+  3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+  2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+  2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+  2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+  2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+  3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+  3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+  2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+  2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+  3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+  2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+  4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+  2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+  2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+  3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+  3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+  3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+  1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+  2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+  1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+  3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+  2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+  3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+  3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+  2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+  2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+  2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+  2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+  2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+  1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+  2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+  2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+  1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+  296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+  2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+  296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+  2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+  3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+  2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+  2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+  2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+  2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+  2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+  1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+  1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+  1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+  2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+  1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+  296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+  1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+  1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+  2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+  497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+  1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+  1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+  2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+  2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+  2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+  1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+  1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+  1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+  2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+  2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+  1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+  1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+  497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+  2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+  2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+  1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+  2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+  1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+  2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+  2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+  1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+  1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+  1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+  1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+  1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+  497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+  497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+  497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+  1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+  1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+  1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+  2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+  497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+  1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+  1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+  1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+  2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+  2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+  1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+  1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+  1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+  2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+  2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+  2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+  1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+  1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+  497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+  2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+  1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+  2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+  1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+  2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+  1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+  2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+  296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+  1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+  1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+  2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+  1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+  1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+  1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+  2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+  1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+  1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+  1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+  1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+  497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+  1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+  497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+  296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+  1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+  497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+  1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+  1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+  3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+  2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+  2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+  2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+  3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+  1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+  1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+  2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+  564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+  2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+  1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+  2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+  1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+  2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+  564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+  2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+  2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+  1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+  2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+  1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+  2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+  2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+  2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+  2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+  2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+  2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+  2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+  3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+  2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+  2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+  1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+  1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+  3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+  2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+  1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+  3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+  1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+  2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+  2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+  2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+  2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+  2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+  1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+  2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+  1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+  2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+  2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+  2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+  3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+  2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+  2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+  2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+  2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+  2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+  2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+  3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+  3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+  2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+  2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+  2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+  2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+  2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+  1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+  1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+  564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+  2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+  1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+  1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+  2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+  564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+  2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+  2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+  2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+  1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+  2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+  2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+  3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+  1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+  2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+  1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+  1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+  2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+  2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+  2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+  3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+  1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+  2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+  2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+  3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+  1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+  2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+  2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+  2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+  2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+  1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+  2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+  1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+  2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+  2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+  1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+  2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+  2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+  1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+  2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+  2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+  2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+  2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+  3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+  2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+  2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+  3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+  2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+  2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+  2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+  3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+  1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+  2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+  3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+  2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+  2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+  1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+  3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+  2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+  2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+  3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+  2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+  3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+  2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+  2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+  2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+  2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+  3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+  3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+  3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+  3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+  2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+  3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+  1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+  2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+  1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+  2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+  1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+  2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+  2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+  1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+  2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+  2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+  2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+  2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+  2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+  2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+  2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+  3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+  2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+  2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+  3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+  3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+  2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+  2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+  3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+  2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+  2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+  2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+  2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+  1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+  2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+  2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+  2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+  3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+  1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+  1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+  2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+  2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+  2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+  1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+  2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+  2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+  2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+  1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+  2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+  2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+  2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+  2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+  2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+  2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+  2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+  3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+  2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+  2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+  3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+  2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+  2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+  2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+  3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+  2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+  3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+  2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+  2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+  2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+  2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+  1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+  2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+  2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+  2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+  3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+  1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+  2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+  3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+  3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+  2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+  2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+  3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+  4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+  2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+  2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+  1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+  2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+  1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+  1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+  2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+  2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+  2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+  1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+  2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+  1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+  2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+  2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+  2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+  2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+  2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+  3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+  1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+  2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+  2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+  2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+  2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+  2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+  2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+  3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+  2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+  2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+  2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+  2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+  2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+  2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+  2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+  2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+  2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+  2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+  2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+  2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+  2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+  2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+  1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+  2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+  2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+  2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+  1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+  2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+  2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+  2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+  2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+  1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+  2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+  3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+  1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+  2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+  1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+  2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+  2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+  2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+  2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+  2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+  2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+  1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+  2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+  2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+  1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+  2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+  2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+  3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+  2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+  2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+  1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+  2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+  2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+  2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+  2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+  2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+  2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+  2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+  2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+  2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+  2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+  1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+  1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+  1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+  1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+  2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+  2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+  1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+  2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+  1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+  3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+  2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+  1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+  2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+  2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+  2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+  2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+  2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+  2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+  2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+  3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+  2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+  3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+  3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+  2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+  2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+  3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+  2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+  3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+  2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+  2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+  3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+  3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+  2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+  2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+  2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+  2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+  3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+  2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+  2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+  3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+  3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+  3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+  1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+  1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+  1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+  1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+  2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+  2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+  2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+  1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+  2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+  564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+  2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+  564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+  2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+  3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+  2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+  2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+  1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+  2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+  2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+  2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+  3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+  3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+  3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+  2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+  2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+  2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+  2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+  1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+  1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+  2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+  1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+  1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+  564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+  2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+  564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+  2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+  2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+  3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+  2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+  2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+  2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+  2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+  2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+  3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+  3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+  2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+  2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+  2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+  2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+  1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+  3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+  3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+  2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+  3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+  2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+  2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+  2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+  3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+  2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+  3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+  3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+  3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+  2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+  3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+  2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+  2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+  2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+  3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+  3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+  3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+  2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+  2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+  2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+  1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+  2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+  2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+  3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+  3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+  2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+  1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+  2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+  2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+  2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+  3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+  2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+  2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+  2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+  1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+  1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+  2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+  2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+  2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+  1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+  1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+  1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+  1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+  2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+  2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+  1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+  1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+  2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+  1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+  1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+  2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+  1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+  2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+  2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+  2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+  2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+  2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+  1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+  1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+  2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+  2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+  2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+  2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+  2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+  2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+  1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+  2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+  2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+  2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+  2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+  2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+  2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+  2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+  1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+  2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+  2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+  2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+  2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+  2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+  3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+  2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+  2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+  2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+  2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+  2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+  2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+  2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+  1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+  2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+  1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+  2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+  2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+  2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+  3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+  1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+  2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+  2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+  2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+  1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+  2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+  2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+  2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+  3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+  2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+  2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+  1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+  1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+  2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+  2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+  2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+  1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+  2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+  2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+  2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+  1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+  1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+  1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+  2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+  1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+  1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+  1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+  2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+  1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+  2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+  3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+  2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+  2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+  2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+  2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+  1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+  2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+  2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+  2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+  2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+  2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+  2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+  2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+  2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+  2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+  3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+  3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+  2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+  2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+  3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+  3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+  2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+  2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+  2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+  2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+  3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+  3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+  2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+  2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+  2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+  2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+  2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+  2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+  2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+  3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+  3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+  3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+  2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+  1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+  2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+  2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+  1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+  2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+  2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+  3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+  2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+  2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+  1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+  2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+  2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+  1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+  2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+  2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+  2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+  2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+  2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+  2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+  1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+  2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+  1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+  1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+  2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+  2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+  1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+  2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+  363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+  363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+  1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+  2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+  2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+  1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+  1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+  363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+  363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+  1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+  1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+  1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+  2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+  1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+  1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+  1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+  1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+  1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+  1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+  1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+  2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+  1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+  1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+  564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+  1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+  1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+  2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+  1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+  1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+  1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+  1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+  1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+  2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+  2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+  1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+  2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+  1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+  1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+  2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+  1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+  1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+  1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+  1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+  1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+  2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+  1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+  1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+  1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+  564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+  1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+  564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+  2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+  2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+  1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+  1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+  1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+  1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+  1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+  1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+  1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+  2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+  2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+  2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+  1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+  1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+  2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+  363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+  363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+  1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+  1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+  564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+  1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+  1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+  1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+  564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+  363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+  564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+  135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+  1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+  2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+  2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+  2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+  135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+  1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+  1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+  537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+  2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+  1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+  2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+  1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+  2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+  537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+  2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+  2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+  2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+  2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+  2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+  2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+  73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+  2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+  1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+  2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+  1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+  2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+  1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+  2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+  2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+  2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+  1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+  2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+  2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+  2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+  2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+  2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+  2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+  2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+  1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+  2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+  2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+  2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+  2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+  2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+  2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+  2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+  2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+  2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+  135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+  1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+  537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+  2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+  2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+  1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+  1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+  2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+  1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+  2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+  2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+  1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+  202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+  2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+  1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+  2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+  2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+  202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+  1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+  2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+  835584U, // <u,1,2,3>: Cost 0 copy LHS
+  1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+  2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+  1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+  835584U, // <u,1,2,u>: Cost 0 copy LHS
+  1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+  1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+  1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+  1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+  1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+  2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+  2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+  1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+  2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+  1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+  2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+  2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+  1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+  1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+  2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+  2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+  1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+  2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+  2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+  2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+  1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+  2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+  2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+  1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+  1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+  2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+  2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+  2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+  2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+  2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+  2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+  2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+  2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+  202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+  2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  835584U, // <u,1,u,3>: Cost 0 copy LHS
+  1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+  1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+  835584U, // <u,1,u,u>: Cost 0 copy LHS
+  1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+  1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+  2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+  2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+  2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+  1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+  1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+  2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+  2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+  2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+  2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+  2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+  1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+  2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+  269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+  1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+  2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+  2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+  269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+  408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+  1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+  408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+  1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+  1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+  2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+  2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+  1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+  2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+  2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+  2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+  2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+  2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+  2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+  1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+  1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+  2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+  2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+  1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+  2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+  1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+  2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+  2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+  1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+  2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+  2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+  2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+  1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+  408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+  1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+  1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+  1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+  1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+  408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+  1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+  471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+  1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+  1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+  2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+  471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+  1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+  1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+  1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+  2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+  1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+  1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+  336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+  1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+  2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+  1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+  1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+  2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+  1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+  471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+  471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+  2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+  1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+  2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+  2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+  1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+  1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+  2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+  2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+  1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+  1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+  1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+  1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+  2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+  1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+  2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+  2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+  1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+  1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+  471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+  1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+  1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+  471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+  1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+  471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+  2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+  1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+  2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+  1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+  2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+  2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+  1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+  2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+  2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+  1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+  2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+  2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+  2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+  2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+  2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+  2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+  2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+  2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+  2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+  2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+  2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+  2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+  2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+  2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+  2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+  2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+  2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+  1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+  161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+  1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+  2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+  2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+  1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+  1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+  537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+  2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+  537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+  1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+  2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+  2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+  2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+  1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+  2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+  2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+  2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+  2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+  2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+  2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+  2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+  2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+  96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+  1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+  1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+  2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+  161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+  1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+  2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+  537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+  2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+  1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+  2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+  2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+  1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+  1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+  1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+  1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+  2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+  2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+  2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+  1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+  2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+  2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+  1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+  2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+  2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+  1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+  2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+  2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+  3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+  1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+  2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+  2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+  2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+  2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+  2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+  3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+  2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+  2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+  2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+  1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+  1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+  2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+  1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+  1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+  2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+  2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+  2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+  1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+  229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+  2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+  1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+  2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+  2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+  1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+  1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+  2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  27705344U, // <u,5,6,7>: Cost 0 copy RHS
+  27705344U, // <u,5,6,u>: Cost 0 copy RHS
+  1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+  2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+  2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+  1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+  1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+  1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+  1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+  2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+  1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+  1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+  229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+  2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  27705344U, // <u,5,u,7>: Cost 0 copy RHS
+  27705344U, // <u,5,u,u>: Cost 0 copy RHS
+  2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+  1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+  2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+  1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+  2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+  2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+  2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+  1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+  1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+  2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+  2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+  2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+  2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+  2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+  2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+  1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+  1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+  2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+  2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+  2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+  1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+  2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+  2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+  1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+  2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+  2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+  2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+  1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+  2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+  3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+  1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+  2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+  2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+  2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+  2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+  1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+  2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+  1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+  2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+  2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+  2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+  2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+  2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+  2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+  2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+  1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+  1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+  2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+  2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+  2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+  1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+  2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+  296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+  1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+  432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+  1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+  1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+  432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+  432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+  1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+  432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+  1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+  1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+  432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+  1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+  497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+  1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+  2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+  2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+  497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+  2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+  1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+  2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+  2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+  1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+  2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+  1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+  1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+  2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+  1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+  2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+  1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+  1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+  2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+  1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+  1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+  2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+  1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+  2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+  1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+  497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+  1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+  497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+  2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+  2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+  1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+  1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+  2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+  2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+  1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+  1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+  363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+  363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+  1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+  497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+  497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+  497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+  135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+  471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+  1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+  471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+  537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+  1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+  1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+  1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+  537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+  1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+  1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+  269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+  835584U, // <u,u,2,3>: Cost 0 copy LHS
+  1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+  2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  835584U, // <u,u,2,u>: Cost 0 copy LHS
+  408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+  1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+  336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+  408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+  1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+  408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+  1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+  1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+  1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+  161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+  471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+  1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+  1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+  1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+  1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+  229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+  537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+  1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+  537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+  1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+  2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+  1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+  1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+  296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+  27705344U, // <u,u,6,7>: Cost 0 copy RHS
+  27705344U, // <u,u,6,u>: Cost 0 copy RHS
+  432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+  1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+  1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+  1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+  432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+  1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+  363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+  432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+  408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+  471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+  537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+  835584U, // <u,u,u,3>: Cost 0 copy LHS
+  408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+  471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+  537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+  27705344U, // <u,u,u,7>: Cost 0 copy RHS
+  835584U, // <u,u,u,u>: Cost 0 copy LHS
+  0
+};
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 000000000000..1cba1ba591ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,22 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+using namespace llvm;
+
+ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
+                                 const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 000000000000..8edfb9a2057f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,33 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+public:
+  ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 000000000000..76eb496bde42
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,332 @@
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+  field bits<4> Num;
+  let Namespace = "ARM";
+  let SubRegs = subregs;
+}
+
+class ARMFReg<bits<6> num, string n> : Register<n> {
+  field bits<6> Num;
+  let Namespace = "ARM";
+}
+
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0  : SubRegIndex;
+def ssub_1  : SubRegIndex;
+def ssub_2  : SubRegIndex; // In a Q reg.
+def ssub_3  : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
+// Integer registers
+def R0  : ARMReg< 0, "r0">,  DwarfRegNum<[0]>;
+def R1  : ARMReg< 1, "r1">,  DwarfRegNum<[1]>;
+def R2  : ARMReg< 2, "r2">,  DwarfRegNum<[2]>;
+def R3  : ARMReg< 3, "r3">,  DwarfRegNum<[3]>;
+def R4  : ARMReg< 4, "r4">,  DwarfRegNum<[4]>;
+def R5  : ARMReg< 5, "r5">,  DwarfRegNum<[5]>;
+def R6  : ARMReg< 6, "r6">,  DwarfRegNum<[6]>;
+def R7  : ARMReg< 7, "r7">,  DwarfRegNum<[7]>;
+// These require 32-bit instructions.
+let CostPerUse = 1 in {
+def R8  : ARMReg< 8, "r8">,  DwarfRegNum<[8]>;
+def R9  : ARMReg< 9, "r9">,  DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP  : ARMReg<13, "sp">,  DwarfRegNum<[13]>;
+def LR  : ARMReg<14, "lr">,  DwarfRegNum<[14]>;
+def PC  : ARMReg<15, "pc">,  DwarfRegNum<[15]>;
+}
+
+// Float registers
+def S0  : ARMFReg< 0, "s0">;  def S1  : ARMFReg< 1, "s1">;
+def S2  : ARMFReg< 2, "s2">;  def S3  : ARMFReg< 3, "s3">;
+def S4  : ARMFReg< 4, "s4">;  def S5  : ARMFReg< 5, "s5">;
+def S6  : ARMFReg< 6, "s6">;  def S7  : ARMFReg< 7, "s7">;
+def S8  : ARMFReg< 8, "s8">;  def S9  : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
+def D0  : ARMReg< 0,  "d0", [S0,   S1]>, DwarfRegNum<[256]>;
+def D1  : ARMReg< 1,  "d1", [S2,   S3]>, DwarfRegNum<[257]>;
+def D2  : ARMReg< 2,  "d2", [S4,   S5]>, DwarfRegNum<[258]>;
+def D3  : ARMReg< 3,  "d3", [S6,   S7]>, DwarfRegNum<[259]>;
+def D4  : ARMReg< 4,  "d4", [S8,   S9]>, DwarfRegNum<[260]>;
+def D5  : ARMReg< 5,  "d5", [S10, S11]>, DwarfRegNum<[261]>;
+def D6  : ARMReg< 6,  "d6", [S12, S13]>, DwarfRegNum<[262]>;
+def D7  : ARMReg< 7,  "d7", [S14, S15]>, DwarfRegNum<[263]>;
+def D8  : ARMReg< 8,  "d8", [S16, S17]>, DwarfRegNum<[264]>;
+def D9  : ARMReg< 9,  "d9", [S18, S19]>, DwarfRegNum<[265]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
+}
+
+// VFP3 defines 16 additional double registers
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; 
+def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
+def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
+def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
+def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
+def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; 
+def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
+def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
+def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
+def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>;
+def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>;
+def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>;
+def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>;
+def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
+def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
+
+// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+                     (ssub_3 dsub_1, ssub_1)] in {
+def Q0  : ARMReg< 0,  "q0", [D0,   D1]>;
+def Q1  : ARMReg< 1,  "q1", [D2,   D3]>;
+def Q2  : ARMReg< 2,  "q2", [D4,   D5]>;
+def Q3  : ARMReg< 3,  "q3", [D6,   D7]>;
+def Q4  : ARMReg< 4,  "q4", [D8,   D9]>;
+def Q5  : ARMReg< 5,  "q5", [D10, D11]>;
+def Q6  : ARMReg< 6,  "q6", [D12, D13]>;
+def Q7  : ARMReg< 7,  "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q8  : ARMReg< 8,  "q8", [D16, D17]>;
+def Q9  : ARMReg< 9,  "q9", [D18, D19]>;
+def Q10 : ARMReg<10, "q10", [D20, D21]>;
+def Q11 : ARMReg<11, "q11", [D22, D23]>;
+def Q12 : ARMReg<12, "q12", [D24, D25]>;
+def Q13 : ARMReg<13, "q13", [D26, D27]>;
+def Q14 : ARMReg<14, "q14", [D28, D29]>;
+def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
+let SubRegIndices = [qsub_0, qsub_1],
+ CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0,  Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2,  Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4,  Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6,  Q7]>;
+def QQ4 : ARMReg<4, "qq4", [Q8,  Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1],
+ CompositeIndices = [(qsub_2  qqsub_1, qsub_0), (qsub_3  qqsub_1, qsub_1),
+                     (dsub_4  qqsub_1, dsub_0), (dsub_5  qqsub_1, dsub_1),
+                     (dsub_6  qqsub_1, dsub_2), (dsub_7  qqsub_1, dsub_3)] in {
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+
+// Current Program Status Register.
+def CPSR    : ARMReg<0, "cpsr">;
+def FPSCR   : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
+
+// Special Registers - only available in privileged mode.
+def FPSID   : ARMReg<0, "fpsid">;
+def FPEXC   : ARMReg<8, "fpexc">;
+
+// Register classes.
+//
+// pc  == Program Counter
+// lr  == Link Register
+// sp  == Stack Pointer
+// r12 == ip (scratch)
+// r7  == Frame Pointer (thumb-style backtraces)
+// r9  == May be reserved as Thread Register
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+                                               SP, LR, PC)> {
+  // Allocate LR as the first CSR since it is always saved anyway.
+  // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+  // know how to spill them. If we make our prologue/epilogue code smarter at
+  // some point, we can go back to using the above allocation orders for the
+  // Thumb1 instructions that know how to use hi regs.
+  let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+}
+
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+  let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+
+// The high registers in thumb mode, R8-R15.
+def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!)  This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+  let AltOrders = [(and tcGPR, tGPR)];
+  let AltOrderSelect = [{
+      return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+}
+
+// Scalar single precision floating point register class..
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
+
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
+
+// Scalar double precision floating point / generic 64-bit vector register
+// class.
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+                        (sequence "D%u", 0, 31)> {
+  // Allocate non-VFP2 registers D16-D31 first.
+  let AltOrders = [(rotl DPR, 16)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+                             (trunc DPR, 16)> {
+  let SubRegClasses = [(SPR ssub_0, ssub_1)];
+}
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+                          (trunc DPR, 8)> {
+  let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
+}
+
+// Generic 128-bit vector register class.
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+                        (sequence "Q%u", 0, 15)> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1)];
+  // Allocate non-VFP2 aliases Q8-Q15 first.
+  let AltOrders = [(rotl QPR, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                             128, (trunc QPR, 8)> {
+  let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_VFP2 dsub_0, dsub_1)];
+}
+
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                           128, (trunc QPR, 4)> {
+  let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+                       (QPR qsub_0, qsub_1)];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQPR, 4)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
+  let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+                       (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+                            dsub_4, dsub_5, dsub_6, dsub_7),
+                       (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQQQPR, 2)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+  let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRelocations.h b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 000000000000..86e7206f2cc6
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,62 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace ARM {
+    enum RelocationType {
+      // reloc_arm_absolute - Absolute relocation, just add the relocated value
+      // to the value already in memory.
+      reloc_arm_absolute,
+
+      // reloc_arm_relative - PC relative relocation, add the relocated value to
+      // the value already in memory, after we adjust it for where the PC is.
+      reloc_arm_relative,
+
+      // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+      // addresses are kept locally in a map.
+      reloc_arm_cp_entry,
+
+      // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+      // should be divided by 4.
+      reloc_arm_vfp_cp_entry,
+
+      // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+      // entry.
+      reloc_arm_machine_cp_entry,
+
+      // reloc_arm_jt_base - PC relative relocation for jump tables whose
+      // addresses are kept locally in a map.
+      reloc_arm_jt_base,
+
+      // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+      reloc_arm_pic_jt,
+
+      // reloc_arm_branch - Branch address relocation.
+      reloc_arm_branch,
+
+      // reloc_arm_movt  - MOVT immediate relocation.
+      reloc_arm_movt,
+
+      // reloc_arm_movw  - MOVW immediate relocation.
+      reloc_arm_movw
+    };
+  }
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 000000000000..958c5c647013
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,261 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALUx      : InstrItinClass;
+def IIC_iALUi      : InstrItinClass;
+def IIC_iALUr      : InstrItinClass;
+def IIC_iALUsi     : InstrItinClass;
+def IIC_iALUsir    : InstrItinClass;
+def IIC_iALUsr     : InstrItinClass;
+def IIC_iBITi      : InstrItinClass;
+def IIC_iBITr      : InstrItinClass;
+def IIC_iBITsi     : InstrItinClass;
+def IIC_iBITsr     : InstrItinClass;
+def IIC_iUNAr      : InstrItinClass;
+def IIC_iUNAsi     : InstrItinClass;
+def IIC_iEXTr      : InstrItinClass;
+def IIC_iEXTAr     : InstrItinClass;
+def IIC_iEXTAsr    : InstrItinClass;
+def IIC_iCMPi      : InstrItinClass;
+def IIC_iCMPr      : InstrItinClass;
+def IIC_iCMPsi     : InstrItinClass;
+def IIC_iCMPsr     : InstrItinClass;
+def IIC_iTSTi      : InstrItinClass;
+def IIC_iTSTr      : InstrItinClass;
+def IIC_iTSTsi     : InstrItinClass;
+def IIC_iTSTsr     : InstrItinClass;
+def IIC_iMOVi      : InstrItinClass;
+def IIC_iMOVr      : InstrItinClass;
+def IIC_iMOVsi     : InstrItinClass;
+def IIC_iMOVsr     : InstrItinClass;
+def IIC_iMOVix2    : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld  : InstrItinClass;
+def IIC_iMVNi      : InstrItinClass;
+def IIC_iMVNr      : InstrItinClass;
+def IIC_iMVNsi     : InstrItinClass;
+def IIC_iMVNsr     : InstrItinClass;
+def IIC_iCMOVi     : InstrItinClass;
+def IIC_iCMOVr     : InstrItinClass;
+def IIC_iCMOVsi    : InstrItinClass;
+def IIC_iCMOVsr    : InstrItinClass;
+def IIC_iCMOVix2   : InstrItinClass;
+def IIC_iMUL16     : InstrItinClass;
+def IIC_iMAC16     : InstrItinClass;
+def IIC_iMUL32     : InstrItinClass;
+def IIC_iMAC32     : InstrItinClass;
+def IIC_iMUL64     : InstrItinClass;
+def IIC_iMAC64     : InstrItinClass;
+def IIC_iLoad_i    : InstrItinClass;
+def IIC_iLoad_r    : InstrItinClass;
+def IIC_iLoad_si   : InstrItinClass;
+def IIC_iLoad_iu   : InstrItinClass;
+def IIC_iLoad_ru   : InstrItinClass;
+def IIC_iLoad_siu  : InstrItinClass;
+def IIC_iLoad_bh_i   : InstrItinClass;
+def IIC_iLoad_bh_r   : InstrItinClass;
+def IIC_iLoad_bh_si  : InstrItinClass;
+def IIC_iLoad_bh_iu  : InstrItinClass;
+def IIC_iLoad_bh_ru  : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i  : InstrItinClass;
+def IIC_iLoad_d_r  : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mu   : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mBr  : InstrItinClass<0>;  // micro-coded
+def IIC_iPop       : InstrItinClass<0>;  // micro-coded
+def IIC_iPop_Br    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoadiALU  : InstrItinClass;
+def IIC_iStore_i   : InstrItinClass;
+def IIC_iStore_r   : InstrItinClass;
+def IIC_iStore_si  : InstrItinClass;
+def IIC_iStore_iu  : InstrItinClass;
+def IIC_iStore_ru  : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i   : InstrItinClass;
+def IIC_iStore_bh_r   : InstrItinClass;
+def IIC_iStore_bh_si  : InstrItinClass;
+def IIC_iStore_bh_iu  : InstrItinClass;
+def IIC_iStore_bh_ru  : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i   : InstrItinClass;
+def IIC_iStore_d_r   : InstrItinClass;
+def IIC_iStore_d_ru  : InstrItinClass;
+def IIC_iStore_m   : InstrItinClass<0>;  // micro-coded
+def IIC_iStore_mu  : InstrItinClass<0>;  // micro-coded
+def IIC_Preload    : InstrItinClass;
+def IIC_Br         : InstrItinClass;
+def IIC_fpSTAT     : InstrItinClass;
+def IIC_fpUNA32    : InstrItinClass;
+def IIC_fpUNA64    : InstrItinClass;
+def IIC_fpCMP32    : InstrItinClass;
+def IIC_fpCMP64    : InstrItinClass;
+def IIC_fpCVTSD    : InstrItinClass;
+def IIC_fpCVTDS    : InstrItinClass;
+def IIC_fpCVTSH    : InstrItinClass;
+def IIC_fpCVTHS    : InstrItinClass;
+def IIC_fpCVTIS    : InstrItinClass;
+def IIC_fpCVTID    : InstrItinClass;
+def IIC_fpCVTSI    : InstrItinClass;
+def IIC_fpCVTDI    : InstrItinClass;
+def IIC_fpMOVIS    : InstrItinClass;
+def IIC_fpMOVID    : InstrItinClass;
+def IIC_fpMOVSI    : InstrItinClass;
+def IIC_fpMOVDI    : InstrItinClass;
+def IIC_fpALU32    : InstrItinClass;
+def IIC_fpALU64    : InstrItinClass;
+def IIC_fpMUL32    : InstrItinClass;
+def IIC_fpMUL64    : InstrItinClass;
+def IIC_fpMAC32    : InstrItinClass;
+def IIC_fpMAC64    : InstrItinClass;
+def IIC_fpDIV32    : InstrItinClass;
+def IIC_fpDIV64    : InstrItinClass;
+def IIC_fpSQRT32   : InstrItinClass;
+def IIC_fpSQRT64   : InstrItinClass;
+def IIC_fpLoad32   : InstrItinClass;
+def IIC_fpLoad64   : InstrItinClass;
+def IIC_fpLoad_m   : InstrItinClass<0>;  // micro-coded
+def IIC_fpLoad_mu  : InstrItinClass<0>;  // micro-coded
+def IIC_fpStore32  : InstrItinClass;
+def IIC_fpStore64  : InstrItinClass;
+def IIC_fpStore_m  : InstrItinClass<0>;  // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>;  // micro-coded
+def IIC_VLD1       : InstrItinClass;
+def IIC_VLD1x2     : InstrItinClass;
+def IIC_VLD1x3     : InstrItinClass;
+def IIC_VLD1x4     : InstrItinClass;
+def IIC_VLD1u      : InstrItinClass;
+def IIC_VLD1x2u    : InstrItinClass;
+def IIC_VLD1x3u    : InstrItinClass;
+def IIC_VLD1x4u    : InstrItinClass;
+def IIC_VLD1ln     : InstrItinClass;
+def IIC_VLD1lnu    : InstrItinClass;
+def IIC_VLD1dup    : InstrItinClass;
+def IIC_VLD1dupu   : InstrItinClass;
+def IIC_VLD2       : InstrItinClass;
+def IIC_VLD2x2     : InstrItinClass;
+def IIC_VLD2u      : InstrItinClass;
+def IIC_VLD2x2u    : InstrItinClass;
+def IIC_VLD2ln     : InstrItinClass;
+def IIC_VLD2lnu    : InstrItinClass;
+def IIC_VLD2dup    : InstrItinClass;
+def IIC_VLD2dupu   : InstrItinClass;
+def IIC_VLD3       : InstrItinClass;
+def IIC_VLD3ln     : InstrItinClass;
+def IIC_VLD3u      : InstrItinClass;
+def IIC_VLD3lnu    : InstrItinClass;
+def IIC_VLD3dup    : InstrItinClass;
+def IIC_VLD3dupu   : InstrItinClass;
+def IIC_VLD4       : InstrItinClass;
+def IIC_VLD4ln     : InstrItinClass;
+def IIC_VLD4u      : InstrItinClass;
+def IIC_VLD4lnu    : InstrItinClass;
+def IIC_VLD4dup    : InstrItinClass;
+def IIC_VLD4dupu   : InstrItinClass;
+def IIC_VST1       : InstrItinClass;
+def IIC_VST1x2     : InstrItinClass;
+def IIC_VST1x3     : InstrItinClass;
+def IIC_VST1x4     : InstrItinClass;
+def IIC_VST1u      : InstrItinClass;
+def IIC_VST1x2u    : InstrItinClass;
+def IIC_VST1x3u    : InstrItinClass;
+def IIC_VST1x4u    : InstrItinClass;
+def IIC_VST1ln     : InstrItinClass;
+def IIC_VST1lnu    : InstrItinClass;
+def IIC_VST2       : InstrItinClass;
+def IIC_VST2x2     : InstrItinClass;
+def IIC_VST2u      : InstrItinClass;
+def IIC_VST2x2u    : InstrItinClass;
+def IIC_VST2ln     : InstrItinClass;
+def IIC_VST2lnu    : InstrItinClass;
+def IIC_VST3       : InstrItinClass;
+def IIC_VST3u      : InstrItinClass;
+def IIC_VST3ln     : InstrItinClass;
+def IIC_VST3lnu    : InstrItinClass;
+def IIC_VST4       : InstrItinClass;
+def IIC_VST4u      : InstrItinClass;
+def IIC_VST4ln     : InstrItinClass;
+def IIC_VST4lnu    : InstrItinClass;
+def IIC_VUNAD      : InstrItinClass;
+def IIC_VUNAQ      : InstrItinClass;
+def IIC_VBIND      : InstrItinClass;
+def IIC_VBINQ      : InstrItinClass;
+def IIC_VPBIND     : InstrItinClass;
+def IIC_VFMULD     : InstrItinClass;
+def IIC_VFMULQ     : InstrItinClass;
+def IIC_VMOV       : InstrItinClass;
+def IIC_VMOVImm    : InstrItinClass;
+def IIC_VMOVD      : InstrItinClass;
+def IIC_VMOVQ      : InstrItinClass;
+def IIC_VMOVIS     : InstrItinClass;
+def IIC_VMOVID     : InstrItinClass;
+def IIC_VMOVISL    : InstrItinClass;
+def IIC_VMOVSI     : InstrItinClass;
+def IIC_VMOVDI     : InstrItinClass;
+def IIC_VMOVN      : InstrItinClass;
+def IIC_VPERMD     : InstrItinClass;
+def IIC_VPERMQ     : InstrItinClass;
+def IIC_VPERMQ3    : InstrItinClass;
+def IIC_VMACD      : InstrItinClass;
+def IIC_VMACQ      : InstrItinClass;
+def IIC_VRECSD     : InstrItinClass;
+def IIC_VRECSQ     : InstrItinClass;
+def IIC_VCNTiD     : InstrItinClass;
+def IIC_VCNTiQ     : InstrItinClass;
+def IIC_VUNAiD     : InstrItinClass;
+def IIC_VUNAiQ     : InstrItinClass;
+def IIC_VQUNAiD    : InstrItinClass;
+def IIC_VQUNAiQ    : InstrItinClass;
+def IIC_VBINiD     : InstrItinClass;
+def IIC_VBINiQ     : InstrItinClass;
+def IIC_VSUBiD     : InstrItinClass;
+def IIC_VSUBiQ     : InstrItinClass;
+def IIC_VBINi4D    : InstrItinClass;
+def IIC_VBINi4Q    : InstrItinClass;
+def IIC_VSUBi4D    : InstrItinClass;
+def IIC_VSUBi4Q    : InstrItinClass;
+def IIC_VABAD      : InstrItinClass;
+def IIC_VABAQ      : InstrItinClass;
+def IIC_VSHLiD     : InstrItinClass;
+def IIC_VSHLiQ     : InstrItinClass;
+def IIC_VSHLi4D    : InstrItinClass;
+def IIC_VSHLi4Q    : InstrItinClass;
+def IIC_VPALiD     : InstrItinClass;
+def IIC_VPALiQ     : InstrItinClass;
+def IIC_VMULi16D   : InstrItinClass;
+def IIC_VMULi32D   : InstrItinClass;
+def IIC_VMULi16Q   : InstrItinClass;
+def IIC_VMULi32Q   : InstrItinClass;
+def IIC_VMACi16D   : InstrItinClass;
+def IIC_VMACi32D   : InstrItinClass;
+def IIC_VMACi16Q   : InstrItinClass;
+def IIC_VMACi32Q   : InstrItinClass;
+def IIC_VEXTD      : InstrItinClass;
+def IIC_VEXTQ      : InstrItinClass;
+def IIC_VTB1       : InstrItinClass;
+def IIC_VTB2       : InstrItinClass;
+def IIC_VTB3       : InstrItinClass;
+def IIC_VTB4       : InstrItinClass;
+def IIC_VTBX1      : InstrItinClass;
+def IIC_VTBX2      : InstrItinClass;
+def IIC_VTBX3      : InstrItinClass;
+def IIC_VTBX4      : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 000000000000..8d86c01dc741
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,1034 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Pipe0   : FuncUnit; // pipeline 0
+def A8_Pipe1   : FuncUnit; // pipeline 1
+def A8_LSPipe  : FuncUnit; // Load / store pipeline
+def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+  [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+  [], [
+  // Two fully-pipelined integer ALU pipelines
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
+  InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Test instructions
+  InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+  InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                             InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LSPipe]>], [5]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+
+  // Integer multiply pipeline
+  // Result written in E5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+
+  // Integer load pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  // FIXME: lsl by 2 takes 1 cycle.
+  InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple, def is the 5th operand. Pipeline 0 only.
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+                               [1, 2, 1, 1, 3]>,
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+  //
+  // Push, def is the 3th operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+                               [1, 1, 3]>,
+
+  //
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                InstrStage<1, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+  // Integer store pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                   InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple. Pipeline 0 only.
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>], [2]>,
+
+  //
+  // Preload
+  InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                              InstrStage<1, [A8_NLSPipe]>], [20]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<7, [A8_NPipe], 0>,
+                               InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NPipe], 0>,
+                               InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<9, [A8_NPipe], 0>,
+                               InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<11, [A8_NPipe], 0>,
+                               InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<20, [A8_NPipe], 0>,
+                               InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [20, 20, 1]>,
+
+  //
+  // Single-precision FP Load
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [2, 1]>,
+  //
+  // Double-precision FP Load
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [2, 1]>,
+  //
+  // FP Load Multiple
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+  //
+  // Single-precision FP Store
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Store
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [1, 1]>,
+  //
+  // FP Store Multiple
+  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                InstrStage<1, [A8_NLSPipe], 0>,
+                                InstrStage<1, [A8_LSPipe]>,
+                                InstrStage<1, [A8_NLSPipe], 0>,
+                                InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
+
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  //
+  // VLD1
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1]>,
+  // VLD1x2
+  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD1x3
+  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 1]>,
+  //
+  // VLD1x4
+  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD1u
+  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD1x2u
+  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1]>,
+  //
+  // VLD1x3u
+  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 2, 1]>,
+  //
+  // VLD1x4u
+  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD1ln
+  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 1, 1, 1]>,
+  //
+  // VLD1lnu
+  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 2, 1, 1, 1, 1]>,
+  //
+  // VLD1dup
+  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1]>,
+  //
+  // VLD1dupu
+  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1, 1]>,
+  //
+  // VLD2
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2x2
+  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD2ln
+  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 3, 1, 1, 1, 1]>,
+  //
+  // VLD2u
+  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1, 1, 1]>,
+  //
+  // VLD2x2u
+  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD2lnu
+  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 3, 2, 1, 1, 1, 1, 1]>,
+  //
+  // VLD2dup
+  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2dupu
+  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1, 1]>,
+  //
+  // VLD3
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 1]>,
+  //
+  // VLD3ln
+  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3u
+  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 2, 1]>,
+  //
+  // VLD3lnu
+  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3dup
+  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 1]>,
+  //
+  // VLD3dupu
+  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 2, 1, 1]>,
+  //
+  // VLD4
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4dup
+  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD4dupu
+  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1, 1]>,
+  //
+  // VST1
+  InstrItinData<IIC_VST1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1]>,
+  //
+  // VST1x2
+  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST1x3
+  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST1x4
+  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1u
+  InstrItinData<IIC_VST1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST1x2u
+  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST1x3u
+  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST1x4u
+  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1ln
+  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1]>,
+  //
+  // VST1lnu
+  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST2
+  InstrItinData<IIC_VST2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2x2
+  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2u
+  InstrItinData<IIC_VST2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST2x2u
+  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2ln
+  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2lnu
+  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST3
+  InstrItinData<IIC_VST3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3u
+  InstrItinData<IIC_VST3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST3ln
+  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3lnu
+  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST4
+  InstrItinData<IIC_VST4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4u
+  InstrItinData<IIC_VST4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4ln
+  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4lnu
+  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+  //
+  // VPADD, etc.
+  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+  //
+  // Double-register FP VMUL
+  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register FP VMUL
+  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+  //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [2, 1]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                            InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 000000000000..49fedf63f8bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,1827 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue0  : FuncUnit; // Issue 0
+def A9_Issue1  : FuncUnit; // Issue 1
+def A9_Branch  : FuncUnit; // Branch
+def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1    : FuncUnit; // ALU pipeline 1
+def A9_AGU     : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe   : FuncUnit; // NEON pipeline
+def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit  : FuncUnit; // L/S Unit
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN  : FuncUnit; // FP register set, NEON side
+
+// Bypasses
+def A9_LdBypass : Bypass;
+
+def CortexA9Itineraries : ProcessorItineraries<
+  [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+   A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+  [A9_LdBypass], [
+  // Two fully-pipelined integer ALU pipelines
+
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_AGU], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [5]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                              [1]>,
+  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                              [1, 1], [NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                              [2, 1]>,
+  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                              [3, 1, 1]>,
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                            [1, 1], [NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                            [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                            [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+  InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                            [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                            [3, 1, 1, 1],
+                            [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+  //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+  InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+
+  // CLZ, RBIT, etc.
+  InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+  // BFC, BFI, UBFX, SBFX
+  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [1], [A9_LdBypass]>,
+  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                                [1, 1], [A9_LdBypass, NoBypass]>,
+  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                              [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+  //
+  // Test instructions
+  InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+  //
+  // Move instructions, conditional
+  // FIXME: Correctly model the extra input dep on the destination.
+  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+
+  // Integer multiply pipeline
+  //
+  InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>],
+                              [3, 1, 1, 1]>,
+  InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>],
+                              [4, 1, 1, 1]>,
+  InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+  InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0]>],
+                              [4, 5, 1, 1]>,
+  // Integer load pipeline
+  // FIXME: The timings are some rough approximations
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 1], [A9_LdBypass]>,
+  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1], [A9_LdBypass]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1, 1], [A9_LdBypass]>,
+  //
+  // Scaled register offset
+  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit], 0>],
+                                [4, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [5, 1, 1], [A9_LdBypass]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 2, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1], [A9_LdBypass]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 2, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1, 1], [A9_LdBypass]>,
+  //
+  // Scaled register offset with update
+  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [5, 4, 1, 1], [A9_LdBypass]>,
+  //
+  // Load multiple, def is the 5th operand.
+  // FIXME: This assumes 3 to 4 registers.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [1, 1, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [2, 1, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>,
+                                InstrStage<1, [A9_Branch]>],
+                               [1, 2, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [1, 1, 3],
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Pop + branch, def is the 3rd operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>,
+                                InstrStage<1, [A9_Branch]>],
+                               [1, 1, 3],
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+
+  //
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<1, [A9_LSUnit]>,
+                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [2, 1]>,
+
+  // Integer store pipeline
+  ///
+  // Immediate offset
+  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  //
+  // Scaled register offset
+  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [2, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [3, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [3, 1, 1, 1]>,
+  //
+  // Scaled register offset with update
+  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                    InstrStage<1, [A9_MUX0], 0>,
+                                    InstrStage<1, [A9_AGU], 0>,
+                                    InstrStage<1, [A9_LSUnit]>],
+                                   [2, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                    InstrStage<1, [A9_MUX0], 0>,
+                                    InstrStage<2, [A9_AGU], 1>,
+                                    InstrStage<1, [A9_LSUnit]>],
+                                   [3, 1, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<2, [A9_LSUnit]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<2, [A9_LSUnit]>], [2]>,
+
+  //
+  // Preload
+  InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
+                                InstrStage<1, [A9_Issue1], 0>,
+                                InstrStage<1, [A9_Branch]>]>,
+
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration (including time to
+  //    register file writeback!).
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired, this models precisely
+  // "cross-domain" stalls.
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit.
+
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                              InstrStage<1, [A9_MUX0], 0>,
+                              InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                              InstrStage<1, [A9_NPipe]>],
+                             [1]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+
+  //
+  // Single to Half FP Convert
+  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Half to Single FP Convert
+  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [8, 1, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<2,  [A9_NPipe]>],
+                              [9, 1, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<10, [A9_NPipe]>],
+                              [15, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<20, [A9_NPipe]>],
+                              [25, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<13, [A9_NPipe]>],
+                              [17, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<28, [A9_NPipe]>],
+                              [32, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  //
+  // On A9 move-from-VFP is free to issue with no stall if other VFP
+  // operations are in flight. I assume it still can't dual-issue though.
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>],
+                              [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  //
+  // On A9 move-from-VFP is free to issue with no stall if other VFP
+  // operations are in flight. I assume it still can't dual-issue though.
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>],
+                              [2, 1, 1]>,
+  //
+  // Single-precision FP Load
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Load
+  // FIXME: Result latency is 1 if address is 64-bit aligned.
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1]>,
+  //
+  // FP Load Multiple
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
+  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+  //
+  // FP Load Multiple + update
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
+  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+  //
+  // Single-precision FP Store
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Store
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
+  //
+  // FP Store Multiple
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
+  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+  //
+  // FP Store Multiple + update
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
+  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                                InstrStage<1, [A9_NPipe], 0>,
+                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+  // NEON
+  // VLD1
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
+  // VLD1x2
+  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 1]>,
+  // VLD1x3
+  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 1]>,
+  // VLD1x4
+  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 2, 1]>,
+  // VLD1u
+  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 2, 1]>,
+  // VLD1x2u
+  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 2, 1]>,
+  // VLD1x3u
+  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 2, 1]>,
+  // VLD1x4u
+  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 2, 2, 1]>,
+  //
+  // VLD1ln
+  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 1, 1, 1]>,
+  //
+  // VLD1lnu
+  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 2, 1, 1, 1, 1]>,
+  //
+  // VLD1dup
+  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1]>,
+  //
+  // VLD1dupu
+  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 1, 1]>,
+  //
+  // VLD2
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2x2
+  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 3, 2, 3, 1]>,
+  //
+  // VLD2ln
+  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 1, 1, 1, 1]>,
+  //
+  // VLD2u
+  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 2, 1, 1, 1]>,
+  //
+  // VLD2x2u
+  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 3, 2, 3, 2, 1]>,
+  //
+  // VLD2lnu
+  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 2, 1, 1, 1, 1, 1]>,
+  //
+  // VLD2dup
+  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2dupu
+  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 2, 1, 1]>,
+  //
+  // VLD3
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 1]>,
+  //
+  // VLD3ln
+  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3u
+  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 2, 1]>,
+  //
+  // VLD3lnu
+  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3dup
+  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 1]>,
+  //
+  // VLD3dupu
+  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 2, 1, 1]>,
+  //
+  // VLD4
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4dup
+  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD4dupu
+  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 2, 1, 1]>,
+  //
+  // VST1
+  InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 1]>,
+  //
+  // VST1x2
+  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST1x3
+  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST1x4
+  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1u
+  InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST1x2u
+  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST1x3u
+  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST1x4u
+  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1ln
+  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 1]>,
+  //
+  // VST1lnu
+  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST2
+  InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2x2
+  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2u
+  InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST2x2u
+  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2ln
+  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2lnu
+  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST3
+  InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3u
+  InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST3ln
+  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3lnu
+  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST4
+  InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4u
+  InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4ln
+  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4lnu
+  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 2]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 2]>,
+  //
+  // Double-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 1]>,
+
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [4, 2, 2]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 2, 2]>,
+
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [9, 3, 2, 1]>,
+
+  //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1,1]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+  //
+  // Quad-register Permute Move
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 2, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 1]>,
+
+  //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1]>,
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 2]>,
+  //
+  // Double-register FP Binary
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2, 2]>,
+
+  //
+  // VPADD, etc.
+  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 1, 1]>,
+  //
+  // Double-register FP VMUL
+  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2, 1]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 2, 2]>,
+  //
+  // Quad-register FP VMUL
+  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 2, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [8, 4, 2, 1]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 10 cycles
+                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 11 cycles
+                               InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [10, 2, 2]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 4, 1, 1]>,
+
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 000000000000..c1880a72fff3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,294 @@
+//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Model based on ARM1176
+//
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
+//
+def ARMV6Itineraries : ProcessorItineraries<
+  [V6_Pipe], [], [
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [V6_Pipe]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+  //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iBITr    , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  InstrItinData<IIC_iBITsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iBITsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr    , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+  InstrItinData<IIC_iEXTAr   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iEXTAsr  , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  //
+  // Test instructions
+  InstrItinData<IIC_iTSTi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iTSTr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iTSTsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iTSTsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_iMOVix2  , [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+                                  InstrStage<1, [V6_Pipe]>,
+                                  InstrStage<1, [V6_Pipe]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+                                 InstrStage<1, [V6_Pipe]>,
+                                 InstrStage<1, [V6_Pipe]>], [5]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [V6_Pipe]>], [3]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [4]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMVNr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iMVNsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iMVNsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+
+  // Integer multiply pipeline
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
+  
+  // Integer load pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iLoad_si   , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoad_iu   , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoad_ru   , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoad_siu,   [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+  //
+  // Load multiple, def is the 5th operand.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+  //
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [3, 1]>,
+
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop     , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+  //
+  // Pop + branch, def is the 3rd operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
+  // Integer store pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStore_i   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStore_r   , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStore_si   , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStore_iu   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStore_ru,   [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStore_siu,   [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStore_m  , [InstrStage<3, [V6_Pipe]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
+  
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [V6_Pipe]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+  //
+  // Single-precision FP Load
+  InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+  //
+  // Double-precision FP Load
+  InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+  //
+  // FP Load Multiple
+  InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
+  //
+  // Single-precision FP Store
+  InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  //
+  // Double-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  //
+  // FP Store Multiple
+  InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
+]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..ef0aaf2a59eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,198 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM),
+    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                             SDValue Chain,
+                                             SDValue Dst, SDValue Src,
+                                             SDValue Size, unsigned Align,
+                                             bool isVolatile, bool AlwaysInline,
+                                             MachinePointerInfo DstPtrInfo,
+                                          MachinePointerInfo SrcPtrInfo) const {
+  // Do repeated 4-byte loads and stores. To be improved.
+  // This requires 4-byte alignment.
+  if ((Align & 3) != 0)
+    return SDValue();
+  // This requires the copy size to be a constant, preferably
+  // within a subtarget-specific limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+    return SDValue();
+
+  unsigned BytesLeft = SizeVal & 3;
+  unsigned NumMemOps = SizeVal >> 2;
+  unsigned EmittedNumMemOps = 0;
+  EVT VT = MVT::i32;
+  unsigned VTSize = 4;
+  unsigned i = 0;
+  const unsigned MAX_LOADS_IN_LDM = 6;
+  SDValue TFOps[MAX_LOADS_IN_LDM];
+  SDValue Loads[MAX_LOADS_IN_LDM];
+  uint64_t SrcOff = 0, DstOff = 0;
+
+  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+  // same number of stores.  The loads and stores will get combined into
+  // ldm/stm later on.
+  while (EmittedNumMemOps < NumMemOps) {
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      Loads[i] = DAG.getLoad(VT, dl, Chain,
+                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                         DAG.getConstant(SrcOff, MVT::i32)),
+                             SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+                             false, 0);
+      TFOps[i] = Loads[i].getValue(1);
+      SrcOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                          DAG.getConstant(DstOff, MVT::i32)),
+                              DstPtrInfo.getWithOffset(DstOff),
+                              isVolatile, false, 0);
+      DstOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    EmittedNumMemOps += i;
+  }
+
+  if (BytesLeft == 0)
+    return Chain;
+
+  // Issue loads / stores for the trailing (1 - 3) bytes.
+  unsigned BytesLeftSave = BytesLeft;
+  i = 0;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    Loads[i] = DAG.getLoad(VT, dl, Chain,
+                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                       DAG.getConstant(SrcOff, MVT::i32)),
+                           SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+    TFOps[i] = Loads[i].getValue(1);
+    ++i;
+    SrcOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+  i = 0;
+  BytesLeft = BytesLeftSave;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                        DAG.getConstant(DstOff, MVT::i32)),
+                            DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+    ++i;
+    DstOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+// Adjust parameters for memset, EABI uses format (ptr, size, value),
+// GNU library uses (ptr, value, size)
+// See RTABI section 4.3.4
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                             SDValue Chain, SDValue Dst,
+                                             SDValue Src, SDValue Size,
+                                             unsigned Align, bool isVolatile,
+                                             MachinePointerInfo DstPtrInfo) const
+{
+  // Use default for non AAPCS subtargets
+  if (!Subtarget->isAAPCS_ABI())
+    return SDValue();
+
+  const ARMTargetLowering &TLI =
+    *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  // First argument: data pointer
+  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
+  Entry.Node = Dst;
+  Entry.Ty = IntPtrTy;
+  Args.push_back(Entry);
+
+  // Second argument: buffer size
+  Entry.Node = Size;
+  Entry.Ty = IntPtrTy;
+  Entry.isSExt = false;
+  Args.push_back(Entry);
+
+  // Extend or truncate the argument to be an i32 value for the call.
+  if (Src.getValueType().bitsGT(MVT::i32))
+    Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+  else
+    Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+  // Third argument: value to fill
+  Entry.Node = Src;
+  Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+  Entry.isSExt = true;
+  Args.push_back(Entry);
+
+  // Emit __eabi_memset call
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain,
+                    Type::getVoidTy(*DAG.getContext()), // return type
+                    false, // return sign ext
+                    false, // return zero ext
+                    false, // is var arg
+                    false, // is in regs
+                    0,     // number of fixed arguments
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
+                    false, // is tail call
+                    false, // is return val used
+                    DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+                                          TLI.getPointerTy()), // callee
+                    Args, DAG, dl); // arg list, DAG and debug
+
+  return CallResult.second;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 000000000000..ec1bf5ca1941
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,51 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+
+public:
+  explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+  ~ARMSelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
+
+  // Adjust parameters for memset, see RTABI section 4.3.4
+  virtual
+  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Op1, SDValue Op2,
+                                  SDValue Op3, unsigned Align,
+                                  bool isVolatile,
+                                  MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 000000000000..1cab9e44ce75
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,214 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ReserveR9("arm-reserve-r9", cl::Hidden,
+          cl::desc("Reserve R9, making it unavailable as GPR"));
+
+static cl::opt<bool>
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+            cl::desc("Disallow all unaligned memory accesses"));
+
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS)
+  : ARMGenSubtargetInfo(TT, CPU, FS)
+  , ARMProcFamily(Others)
+  , HasV4TOps(false)
+  , HasV5TOps(false)
+  , HasV5TEOps(false)
+  , HasV6Ops(false)
+  , HasV6T2Ops(false)
+  , HasV7Ops(false)
+  , HasVFPv2(false)
+  , HasVFPv3(false)
+  , HasNEON(false)
+  , UseNEONForSinglePrecisionFP(false)
+  , SlowFPVMLx(false)
+  , HasVMLxForwarding(false)
+  , SlowFPBrcc(false)
+  , InThumbMode(false)
+  , HasThumb2(false)
+  , NoARM(false)
+  , PostRAScheduler(false)
+  , IsR9Reserved(ReserveR9)
+  , UseMovt(false)
+  , HasFP16(false)
+  , HasD16(false)
+  , HasHardwareDivide(false)
+  , HasT2ExtractPack(false)
+  , HasDataBarrier(false)
+  , Pref32BitThumb(false)
+  , AvoidCPSRPartialUpdate(false)
+  , HasMPExtension(false)
+  , FPOnlySP(false)
+  , AllowsUnalignedMem(false)
+  , Thumb2DSP(false)
+  , stackAlignment(4)
+  , CPUString(CPU)
+  , TargetTriple(TT)
+  , TargetABI(ARM_ABI_APCS) {
+  // Determine default and user specified characteristics
+  if (CPUString.empty())
+    CPUString = "generic";
+
+  // Insert the architecture feature derived from the target triple into the
+  // feature string. This is important for setting features that are implied
+  // based on the architecture version.
+  std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS;
+    else
+      ArchFS = FS;
+  }
+  ParseSubtargetFeatures(CPUString, ArchFS);
+
+  // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
+  // ARM version or CPU and then remove this.
+  if (!HasV6T2Ops && hasThumb2())
+    HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUString);
+
+  // After parsing Itineraries, set ItinData.IssueWidth.
+  computeIssueWidth();
+
+  if (TT.find("eabi") != std::string::npos)
+    TargetABI = ARM_ABI_AAPCS;
+
+  if (isAAPCS_ABI())
+    stackAlignment = 8;
+
+  if (!isTargetDarwin())
+    UseMovt = hasV6T2Ops();
+  else {
+    IsR9Reserved = ReserveR9 | !HasV6Ops;
+    UseMovt = DarwinUseMOVT && hasV6T2Ops();
+  }
+
+  if (!isThumb() || hasThumb2())
+    PostRAScheduler = true;
+
+  // v6+ may or may not support unaligned mem access depending on the system
+  // configuration.
+  if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+    AllowsUnalignedMem = true;
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+                                 Reloc::Model RelocM) const {
+  if (RelocM == Reloc::Static)
+    return false;
+
+  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+  // load from stub.
+  bool isDecl = GV->hasAvailableExternallyLinkage();
+  if (GV->isDeclaration() && !GV->isMaterializable())
+    isDecl = true;
+
+  if (!isTargetDarwin()) {
+    // Extra load is needed for all externally visible.
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return false;
+    return true;
+  } else {
+    if (RelocM == Reloc::PIC_) {
+      // If this is a strong reference to a definition, it is definitely not
+      // through a stub.
+      if (!isDecl && !GV->isWeakForLinker())
+        return false;
+
+      // Unless we have a symbol with hidden visibility, we have to go through a
+      // normal $non_lazy_ptr stub because this symbol might be resolved late.
+      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+        return true;
+
+      // If symbol visibility is hidden, we have a stub for common symbol
+      // references and external declarations.
+      if (isDecl || GV->hasCommonLinkage())
+        // Hidden $non_lazy_ptr reference.
+        return true;
+
+      return false;
+    } else {
+      // If this is a strong reference to a definition, it is definitely not
+      // through a stub.
+      if (!isDecl && !GV->isWeakForLinker())
+        return false;
+
+      // Unless we have a symbol with hidden visibility, we have to go through a
+      // normal $non_lazy_ptr stub because this symbol might be resolved late.
+      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+        return true;
+    }
+  }
+
+  return false;
+}
+
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+  // If we have a reasonable estimate of the pipeline depth, then we can
+  // estimate the penalty of a misprediction based on that.
+  if (isCortexA8())
+    return 13;
+  else if (isCortexA9())
+    return 8;
+
+  // Otherwise, just return a sensible default.
+  return 10;
+}
+
+void ARMSubtarget::computeIssueWidth() {
+  unsigned allStage1Units = 0;
+  for (const InstrItinerary *itin = InstrItins.Itineraries;
+       itin->FirstStage != ~0U; ++itin) {
+    const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
+    allStage1Units |= IS->getUnits();
+  }
+  InstrItins.IssueWidth = 0;
+  while (allStage1Units) {
+    ++InstrItins.IssueWidth;
+    // clear the lowest bit
+    allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
+  }
+  assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+}
+
+bool ARMSubtarget::enablePostRAScheduler(
+           CodeGenOpt::Level OptLevel,
+           TargetSubtargetInfo::AntiDepBreakMode& Mode,
+           RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&ARM::GPRRegClass);
+  return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 000000000000..c6508723a576
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,252 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "ARMGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class ARMSubtarget : public ARMGenSubtargetInfo {
+protected:
+  enum ARMProcFamilyEnum {
+    Others, CortexA8, CortexA9
+  };
+
+  /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+  ARMProcFamilyEnum ARMProcFamily;
+
+  /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops -
+  /// Specify whether target support specific ARM ISA variants.
+  bool HasV4TOps;
+  bool HasV5TOps;
+  bool HasV5TEOps;
+  bool HasV6Ops;
+  bool HasV6T2Ops;
+  bool HasV7Ops;
+
+  /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
+  /// supported.
+  bool HasVFPv2;
+  bool HasVFPv3;
+  bool HasNEON;
+
+  /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+  /// specified. Use the method useNEONForSinglePrecisionFP() to
+  /// determine if NEON should actually be used.
+  bool UseNEONForSinglePrecisionFP;
+
+  /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+  /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+  bool SlowFPVMLx;
+
+  /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+  /// forwarding to allow mul + mla being issued back to back.
+  bool HasVMLxForwarding;
+
+  /// SlowFPBrcc - True if floating point compare + branch is slow.
+  bool SlowFPBrcc;
+
+  /// InThumbMode - True if compiling for Thumb, false for ARM.
+  bool InThumbMode;
+
+  /// HasThumb2 - True if Thumb2 instructions are supported.
+  bool HasThumb2;
+
+  /// NoARM - True if subtarget does not support ARM mode execution.
+  bool NoARM;
+
+  /// PostRAScheduler - True if using post-register-allocation scheduler.
+  bool PostRAScheduler;
+
+  /// IsR9Reserved - True if R9 is a not available as general purpose register.
+  bool IsR9Reserved;
+
+  /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+  /// imms (including global addresses).
+  bool UseMovt;
+
+  /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
+  /// only so far)
+  bool HasFP16;
+
+  /// HasD16 - True if subtarget is limited to 16 double precision
+  /// FP registers for VFPv3.
+  bool HasD16;
+
+  /// HasHardwareDivide - True if subtarget supports [su]div
+  bool HasHardwareDivide;
+
+  /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+  /// instructions.
+  bool HasT2ExtractPack;
+
+  /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+  /// instructions.
+  bool HasDataBarrier;
+
+  /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+  /// over 16-bit ones.
+  bool Pref32BitThumb;
+
+  /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
+  /// that partially update CPSR and add false dependency on the previous
+  /// CPSR setting instruction.
+  bool AvoidCPSRPartialUpdate;
+
+  /// HasMPExtension - True if the subtarget supports Multiprocessing
+  /// extension (ARMv7 only).
+  bool HasMPExtension;
+
+  /// FPOnlySP - If true, the floating point unit only supports single
+  /// precision.
+  bool FPOnlySP;
+
+  /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+  /// accesses for some types.  For details, see
+  /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+  bool AllowsUnalignedMem;
+
+  /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
+  /// and such) instructions in Thumb2 code.
+  bool Thumb2DSP;
+
+  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  unsigned stackAlignment;
+
+  /// CPUString - String name of used CPU.
+  std::string CPUString;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
+  /// Selected instruction itineraries (one entry per itinerary class.)
+  InstrItineraryData InstrItins;
+
+ public:
+  enum {
+    isELF, isDarwin
+  } TargetType;
+
+  enum {
+    ARM_ABI_APCS,
+    ARM_ABI_AAPCS // ARM EABI
+  } TargetABI;
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  ARMSubtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS);
+
+  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+  /// that still makes it profitable to inline the call.
+  unsigned getMaxInlineSizeThreshold() const {
+    // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
+    // Change this once Thumb1 ldmia / stmia support is added.
+    return isThumb1Only() ? 0 : 64;
+  }
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  void computeIssueWidth();
+
+  bool hasV4TOps()  const { return HasV4TOps;  }
+  bool hasV5TOps()  const { return HasV5TOps;  }
+  bool hasV5TEOps() const { return HasV5TEOps; }
+  bool hasV6Ops()   const { return HasV6Ops;   }
+  bool hasV6T2Ops() const { return HasV6T2Ops; }
+  bool hasV7Ops()   const { return HasV7Ops;  }
+
+  bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+  bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
+  bool hasARMOps() const { return !NoARM; }
+
+  bool hasVFP2() const { return HasVFPv2; }
+  bool hasVFP3() const { return HasVFPv3; }
+  bool hasNEON() const { return HasNEON;  }
+  bool useNEONForSinglePrecisionFP() const {
+    return hasNEON() && UseNEONForSinglePrecisionFP; }
+
+  bool hasDivide() const { return HasHardwareDivide; }
+  bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+  bool hasDataBarrier() const { return HasDataBarrier; }
+  bool useFPVMLx() const { return !SlowFPVMLx; }
+  bool hasVMLxForwarding() const { return HasVMLxForwarding; }
+  bool isFPBrccSlow() const { return SlowFPBrcc; }
+  bool isFPOnlySP() const { return FPOnlySP; }
+  bool prefers32BitThumb() const { return Pref32BitThumb; }
+  bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+  bool hasMPExtension() const { return HasMPExtension; }
+  bool hasThumb2DSP() const { return Thumb2DSP; }
+
+  bool hasFP16() const { return HasFP16; }
+  bool hasD16() const { return HasD16; }
+
+  const Triple &getTargetTriple() const { return TargetTriple; }
+
+  bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+  bool isTargetELF() const { return !isTargetDarwin(); }
+
+  bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+  bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+  bool isThumb() const { return InThumbMode; }
+  bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
+  bool isThumb2() const { return InThumbMode && HasThumb2; }
+  bool hasThumb2() const { return HasThumb2; }
+
+  bool isR9Reserved() const { return IsR9Reserved; }
+
+  bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+
+  bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
+  const std::string & getCPUString() const { return CPUString; }
+
+  unsigned getMispredictionPenalty() const;
+
+  /// enablePostRAScheduler - True at 'More' optimization.
+  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
+
+  /// getInstrItins - Return the instruction itineraies based on subtarget
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// stack frame on entry to the function and which must be maintained by every
+  /// function for this subtarget.
+  unsigned getStackAlignment() const { return stackAlignment; }
+
+  /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+  /// symbol.
+  bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+};
+} // End llvm namespace
+
+#endif  // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 000000000000..f0b176ad6981
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,189 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMFrameLowering.h"
+#include "ARM.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
+    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+
+  if (TheTriple.isOSWindows()) {
+    llvm_unreachable("ARM does not support Windows COFF format");
+    return NULL;
+  }
+
+  return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeARMTarget() {
+  // Register the target.
+  RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
+  RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
+  TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
+
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
+                                           const std::string &TT,
+                                           const std::string &CPU,
+                                           const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
+    JITInfo(),
+    InstrItins(Subtarget.getInstrItineraryData()) {
+  DefRelocModel = getRelocationModel();
+
+  // Default to soft float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Soft;
+}
+
+ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
+                                   const std::string &FS)
+  : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
+                           "v128:32:128-v64:32:64-n32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64-"
+                           "v128:64:128-v64:64:64-n32")),
+    ELFWriterInfo(*this),
+    TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget) {
+  if (!Subtarget.hasARMOps())
+    report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+                       "support ARM mode execution!");
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
+                                       const std::string &FS)
+  : ARMBaseTargetMachine(T, TT, CPU, FS),
+    InstrInfo(Subtarget.hasThumb2()
+              ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
+              : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
+                           "i16:16:32-i8:8:32-i1:8:32-"
+                           "v128:32:128-v64:32:64-a:0:32-n32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64-"
+                           "i16:16:32-i8:8:32-i1:8:32-"
+                           "v128:64:128-v64:64:64-a:0:32-n32")),
+    ELFWriterInfo(*this),
+    TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget.hasThumb2()
+              ? new ARMFrameLowering(Subtarget)
+              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+}
+
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (OptLevel != CodeGenOpt::None)
+    PM.add(createARMGlobalMergePass(getTargetLowering()));
+
+  return false;
+}
+
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
+                                           CodeGenOpt::Level OptLevel) {
+  PM.add(createARMISelDag(*this, OptLevel));
+  return false;
+}
+
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+    PM.add(createARMLoadStoreOptimizationPass(true));
+  if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
+    PM.add(createMLxExpansionPass());
+
+  return true;
+}
+
+bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createARMLoadStoreOptimizationPass());
+    if (Subtarget.hasNEON())
+      PM.add(createNEONMoveFixPass());
+  }
+
+  // Expand some pseudo instructions into multiple instructions to allow
+  // proper scheduling.
+  PM.add(createARMExpandPseudoPass());
+
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createIfConverterPass());
+  }
+  if (Subtarget.isThumb2())
+    PM.add(createThumb2ITBlockPass());
+
+  return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
+    PM.add(createThumb2SizeReductionPass());
+
+  PM.add(createARMConstantIslandPass());
+  return true;
+}
+
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          JITCodeEmitter &JCE) {
+  // FIXME: Move this to TargetJITInfo!
+  if (DefRelocModel == Reloc::Default)
+    setRelocationModel(Reloc::Static);
+
+  // Machine code emitter pass for ARM.
+  PM.add(createARMJITCodeEmitterPass(*this, JCE));
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 000000000000..bc3d46a50ea5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,143 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "ARMInstrInfo.h"
+#include "ARMELFWriterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
+  ARMSubtarget        Subtarget;
+private:
+  ARMJITInfo          JITInfo;
+  InstrItineraryData  InstrItins;
+  Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
+
+public:
+  ARMBaseTargetMachine(const Target &T, const std::string &TT,
+                       const std::string &CPU, const std::string &FS);
+
+  virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
+  virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
+  }
+
+  // Pass Pipeline Configuration
+  virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &MCE);
+};
+
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+  ARMInstrInfo        InstrInfo;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
+  ARMTargetLowering   TLInfo;
+  ARMSelectionDAGInfo TSInfo;
+  ARMFrameLowering    FrameLowering;
+ public:
+  ARMTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &CPU, const std::string &FS);
+
+  virtual const ARMRegisterInfo  *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const ARMFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+
+  virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+///   Thumb-1 and Thumb-2.
+///
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+  // Either Thumb1InstrInfo or Thumb2InstrInfo.
+  OwningPtr<ARMBaseInstrInfo> InstrInfo;
+  const TargetData    DataLayout;   // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
+  ARMTargetLowering   TLInfo;
+  ARMSelectionDAGInfo TSInfo;
+  // Either Thumb1FrameLowering or ARMFrameLowering.
+  OwningPtr<ARMFrameLowering> FrameLowering;
+public:
+  ThumbTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &CPU, const std::string &FS);
+
+  /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
+  virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo->getRegisterInfo();
+  }
+
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+  virtual const ARMBaseInstrInfo *getInstrInfo() const {
+    return InstrInfo.get();
+  }
+  /// returns either Thumb1FrameLowering or ARMFrameLowering
+  virtual const ARMFrameLowering *getFrameLowering() const {
+    return FrameLowering.get();
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
new file mode 100644
index 000000000000..19defa1b5196
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -0,0 +1,47 @@
+//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+//                               ELF Target
+//===----------------------------------------------------------------------===//
+
+void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
+                                        const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+    StaticCtorSection =
+      getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                                 ELF::SHF_WRITE |
+                                 ELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
+    StaticDtorSection =
+      getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                                 ELF::SHF_WRITE |
+                                 ELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
+    LSDASection = NULL;
+  }
+
+  AttributesSection =
+    getContext().getELFSection(".ARM.attributes",
+                               ELF::SHT_ARM_ATTRIBUTES,
+                               0,
+                               SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 000000000000..c6a7261439d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,38 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+class MCContext;
+class TargetMachine;
+
+class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+  const MCSection *AttributesSection;
+public:
+  ARMElfTargetObjectFile() :
+    TargetLoweringObjectFileELF(),
+    AttributesSection(NULL)
+  {}
+
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *getAttributesSection() const {
+    return AttributesSection;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
new file mode 100644
index 000000000000..d9a5fa223b4b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -0,0 +1,154 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+class ARMBaseAsmLexer : public TargetAsmLexer {
+  const MCAsmInfo &AsmInfo;
+
+  const AsmToken &lexDefinite() {
+    return getLexer()->Lex();
+  }
+
+  AsmToken LexTokenUAL();
+protected:
+  typedef std::map <std::string, unsigned> rmap_ty;
+
+  rmap_ty RegisterMap;
+
+  void InitRegisterMap(const TargetRegisterInfo *info) {
+    unsigned numRegs = info->getNumRegs();
+
+    for (unsigned i = 0; i < numRegs; ++i) {
+      const char *regName = info->getName(i);
+      if (regName)
+        RegisterMap[regName] = i;
+    }
+  }
+
+  unsigned MatchRegisterName(StringRef Name) {
+    rmap_ty::iterator iter = RegisterMap.find(Name.str());
+    if (iter != RegisterMap.end())
+      return iter->second;
+    else
+      return 0;
+  }
+
+  AsmToken LexToken() {
+    if (!Lexer) {
+      SetError(SMLoc(), "No MCAsmLexer installed");
+      return AsmToken(AsmToken::Error, "", 0);
+    }
+
+    switch (AsmInfo.getAssemblerDialect()) {
+    default:
+      SetError(SMLoc(), "Unhandled dialect");
+      return AsmToken(AsmToken::Error, "", 0);
+    case 0:
+      return LexTokenUAL();
+    }
+  }
+public:
+  ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : TargetAsmLexer(T), AsmInfo(MAI) {
+  }
+};
+
+class ARMAsmLexer : public ARMBaseAsmLexer {
+public:
+  ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : ARMBaseAsmLexer(T, MAI) {
+    std::string tripleString("arm-unknown-unknown");
+    std::string featureString;
+    std::string CPU;
+    OwningPtr<const TargetMachine>
+      targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
+    InitRegisterMap(targetMachine->getRegisterInfo());
+  }
+};
+
+class ThumbAsmLexer : public ARMBaseAsmLexer {
+public:
+  ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : ARMBaseAsmLexer(T, MAI) {
+    std::string tripleString("thumb-unknown-unknown");
+    std::string featureString;
+    std::string CPU;
+    OwningPtr<const TargetMachine>
+      targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
+    InitRegisterMap(targetMachine->getRegisterInfo());
+  }
+};
+
+} // end anonymous namespace
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+
+  switch (lexedToken.getKind()) {
+  default: break;
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    break;
+  case AsmToken::Identifier: {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+
+    unsigned regID = MatchRegisterName(lowerRef);
+    // Check for register aliases.
+    //   r13 -> sp
+    //   r14 -> lr
+    //   r15 -> pc
+    //   ip  -> r12
+    //   FIXME: Some assemblers support lots of others. Do we want them all?
+    if (!regID) {
+      regID = StringSwitch<unsigned>(lowerCase)
+        .Case("r13", ARM::SP)
+        .Case("r14", ARM::LR)
+        .Case("r15", ARM::PC)
+        .Case("ip", ARM::R12)
+        .Default(0);
+    }
+
+    if (regID)
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+  }
+  }
+
+  return AsmToken(lexedToken);
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+  RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
+  RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+}
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 000000000000..a4741270c7a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,2390 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMOperand;
+
+class ARMAsmParser : public TargetAsmParser {
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  int TryParseRegister();
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+  bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+  int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
+                   ARMII::AddrMode AddrMode);
+  bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+  bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
+  const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
+                                  MCSymbolRefExpr::VariantKind Variant);
+
+
+  bool ParseMemoryOffsetReg(bool &Negative,
+                            bool &OffsetRegShifted,
+                            enum ARM_AM::ShiftOpc &ShiftType,
+                            const MCExpr *&ShiftAmount,
+                            const MCExpr *&Offset,
+                            bool &OffsetIsReg,
+                            int &OffsetRegNum,
+                            SMLoc &E);
+  bool ParseShift(enum ARM_AM::ShiftOpc &St,
+                  const MCExpr *&ShiftAmount, SMLoc &E);
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+  bool ParseDirectiveThumb(SMLoc L);
+  bool ParseDirectiveThumbFunc(SMLoc L);
+  bool ParseDirectiveCode(SMLoc L);
+  bool ParseDirectiveSyntax(SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+  void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+                             bool &CanAcceptPredicationCode);
+
+  bool isThumb() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+  }
+  bool isThumbOne() const {
+    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
+  }
+  void SwitchMode() {
+    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+    setAvailableFeatures(FB);
+  }
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
+
+  /// }
+
+  OperandMatchResultTy tryParseCoprocNumOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseCoprocRegOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMemBarrierOptOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseProcIFlagsOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMSRMaskOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMemMode2Operand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMemMode3Operand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+
+  // Asm Match Converter Methods
+  bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
+
+public:
+  ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : TargetAsmParser(), STI(_STI), Parser(_Parser) {
+    MCAsmParserExtension::Initialize(_Parser);
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// instruction.
+class ARMOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    CondCode,
+    CCOut,
+    CoprocNum,
+    CoprocReg,
+    Immediate,
+    MemBarrierOpt,
+    Memory,
+    MSRMask,
+    ProcIFlags,
+    Register,
+    RegisterList,
+    DPRRegisterList,
+    SPRRegisterList,
+    ShiftedRegister,
+    Shifter,
+    Token
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+  SmallVector<unsigned, 8> Registers;
+
+  union {
+    struct {
+      ARMCC::CondCodes Val;
+    } CC;
+
+    struct {
+      ARM_MB::MemBOpt Val;
+    } MBOpt;
+
+    struct {
+      unsigned Val;
+    } Cop;
+
+    struct {
+      ARM_PROC::IFlags Val;
+    } IFlags;
+
+    struct {
+      unsigned Val;
+    } MMask;
+
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNum;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    /// Combined record for all forms of ARM address expressions.
+    struct {
+      ARMII::AddrMode AddrMode;
+      unsigned BaseRegNum;
+      union {
+        unsigned RegNum;     ///< Offset register num, when OffsetIsReg.
+        const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
+      } Offset;
+      const MCExpr *ShiftAmount;     // used when OffsetRegShifted is true
+      enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true
+      unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
+      unsigned Preindexed       : 1;
+      unsigned Postindexed      : 1;
+      unsigned OffsetIsReg      : 1;
+      unsigned Negative         : 1; // only used when OffsetIsReg is true
+      unsigned Writeback        : 1;
+    } Mem;
+
+    struct {
+      ARM_AM::ShiftOpc ShiftTy;
+      unsigned Imm;
+    } Shift;
+    struct {
+      ARM_AM::ShiftOpc ShiftTy;
+      unsigned SrcReg;
+      unsigned ShiftReg;
+      unsigned ShiftImm;
+    } ShiftedReg;
+  };
+
+  ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case CondCode:
+      CC = o.CC;
+      break;
+    case Token:
+      Tok = o.Tok;
+      break;
+    case CCOut:
+    case Register:
+      Reg = o.Reg;
+      break;
+    case RegisterList:
+    case DPRRegisterList:
+    case SPRRegisterList:
+      Registers = o.Registers;
+      break;
+    case CoprocNum:
+    case CoprocReg:
+      Cop = o.Cop;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case MemBarrierOpt:
+      MBOpt = o.MBOpt;
+      break;
+    case Memory:
+      Mem = o.Mem;
+      break;
+    case MSRMask:
+      MMask = o.MMask;
+      break;
+    case ProcIFlags:
+      IFlags = o.IFlags;
+      break;
+    case Shifter:
+      Shift = o.Shift;
+      break;
+    case ShiftedRegister:
+      ShiftedReg = o.ShiftedReg;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  ARMCC::CondCodes getCondCode() const {
+    assert(Kind == CondCode && "Invalid access!");
+    return CC.Val;
+  }
+
+  unsigned getCoproc() const {
+    assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+    return Cop.Val;
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const {
+    assert((Kind == Register || Kind == CCOut) && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const SmallVectorImpl<unsigned> &getRegList() const {
+    assert((Kind == RegisterList || Kind == DPRRegisterList ||
+            Kind == SPRRegisterList) && "Invalid access!");
+    return Registers;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  ARM_MB::MemBOpt getMemBarrierOpt() const {
+    assert(Kind == MemBarrierOpt && "Invalid access!");
+    return MBOpt.Val;
+  }
+
+  ARM_PROC::IFlags getProcIFlags() const {
+    assert(Kind == ProcIFlags && "Invalid access!");
+    return IFlags.Val;
+  }
+
+  unsigned getMSRMask() const {
+    assert(Kind == MSRMask && "Invalid access!");
+    return MMask.Val;
+  }
+
+  /// @name Memory Operand Accessors
+  /// @{
+  ARMII::AddrMode getMemAddrMode() const {
+    return Mem.AddrMode;
+  }
+  unsigned getMemBaseRegNum() const {
+    return Mem.BaseRegNum;
+  }
+  unsigned getMemOffsetRegNum() const {
+    assert(Mem.OffsetIsReg && "Invalid access!");
+    return Mem.Offset.RegNum;
+  }
+  const MCExpr *getMemOffset() const {
+    assert(!Mem.OffsetIsReg && "Invalid access!");
+    return Mem.Offset.Value;
+  }
+  unsigned getMemOffsetRegShifted() const {
+    assert(Mem.OffsetIsReg && "Invalid access!");
+    return Mem.OffsetRegShifted;
+  }
+  const MCExpr *getMemShiftAmount() const {
+    assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+    return Mem.ShiftAmount;
+  }
+  enum ARM_AM::ShiftOpc getMemShiftType() const {
+    assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+    return Mem.ShiftType;
+  }
+  bool getMemPreindexed() const { return Mem.Preindexed; }
+  bool getMemPostindexed() const { return Mem.Postindexed; }
+  bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
+  bool getMemNegative() const { return Mem.Negative; }
+  bool getMemWriteback() const { return Mem.Writeback; }
+
+  /// @}
+
+  bool isCoprocNum() const { return Kind == CoprocNum; }
+  bool isCoprocReg() const { return Kind == CoprocReg; }
+  bool isCondCode() const { return Kind == CondCode; }
+  bool isCCOut() const { return Kind == CCOut; }
+  bool isImm() const { return Kind == Immediate; }
+  bool isImm0_255() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 256;
+  }
+  bool isImm0_7() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 8;
+  }
+  bool isImm0_15() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 16;
+  }
+  bool isImm0_65535() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 65536;
+  }
+  bool isT2SOImm() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ARM_AM::getT2SOImmVal(Value) != -1;
+  }
+  bool isReg() const { return Kind == Register; }
+  bool isRegList() const { return Kind == RegisterList; }
+  bool isDPRRegList() const { return Kind == DPRRegisterList; }
+  bool isSPRRegList() const { return Kind == SPRRegisterList; }
+  bool isToken() const { return Kind == Token; }
+  bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
+  bool isMemory() const { return Kind == Memory; }
+  bool isShifter() const { return Kind == Shifter; }
+  bool isShiftedReg() const { return Kind == ShiftedRegister; }
+  bool isMemMode2() const {
+    if (getMemAddrMode() != ARMII::AddrMode2)
+      return false;
+
+    if (getMemOffsetIsReg())
+      return true;
+
+    if (getMemNegative() &&
+        !(getMemPostindexed() || getMemPreindexed()))
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+
+    // The offset must be in the range 0-4095 (imm12).
+    if (Value > 4095 || Value < -4095)
+      return false;
+
+    return true;
+  }
+  bool isMemMode3() const {
+    if (getMemAddrMode() != ARMII::AddrMode3)
+      return false;
+
+    if (getMemOffsetIsReg()) {
+      if (getMemOffsetRegShifted())
+        return false; // No shift with offset reg allowed
+      return true;
+    }
+
+    if (getMemNegative() &&
+        !(getMemPostindexed() || getMemPreindexed()))
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+
+    // The offset must be in the range 0-255 (imm8).
+    if (Value > 255 || Value < -255)
+      return false;
+
+    return true;
+  }
+  bool isMemMode5() const {
+    if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
+        getMemNegative())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+
+    // The offset must be a multiple of 4 in the range 0-1020.
+    int64_t Value = CE->getValue();
+    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
+  }
+  bool isMemMode7() const {
+    if (!isMemory() ||
+        getMemPreindexed() ||
+        getMemPostindexed() ||
+        getMemOffsetIsReg() ||
+        getMemNegative() ||
+        getMemWriteback())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+
+    if (CE->getValue())
+      return false;
+
+    return true;
+  }
+  bool isMemModeRegThumb() const {
+    if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+      return false;
+    return true;
+  }
+  bool isMemModeImmThumb() const {
+    if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+
+    // The offset must be a multiple of 4 in the range 0-124.
+    uint64_t Value = CE->getValue();
+    return ((Value & 0x3) == 0 && Value <= 124);
+  }
+  bool isMSRMask() const { return Kind == MSRMask; }
+  bool isProcIFlags() const { return Kind == ProcIFlags; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+    unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+    Inst.addOperand(MCOperand::CreateReg(RegNum));
+  }
+
+  void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+  }
+
+  void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+  }
+
+  void addCCOutOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addShiftedRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+    assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!");
+    assert((ShiftedReg.ShiftReg == 0 ||
+            ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) &&
+           "Invalid shifted register operand!");
+    Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg));
+    Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg));
+    Inst.addOperand(MCOperand::CreateImm(
+      ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm)));
+  }
+
+  void addShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(
+      ARM_AM::getSORegOpc(Shift.ShiftTy, 0)));
+  }
+
+  void addRegListOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ++I)
+      Inst.addOperand(MCOperand::CreateReg(*I));
+  }
+
+  void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
+  void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_255Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_7Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_15Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+  }
+
+  void addMemMode7Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && isMemMode7() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    (void)CE;
+    assert((CE || CE->getValue() == 0) &&
+           "No offset operand support in mode 7");
+  }
+
+  void addMemMode2Operands(MCInst &Inst, unsigned N) const {
+    assert(isMemMode2() && "Invalid mode or number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+    if (getMemOffsetIsReg()) {
+      Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+      ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+      ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift;
+      int64_t ShiftAmount = 0;
+
+      if (getMemOffsetRegShifted()) {
+        ShOpc = getMemShiftType();
+        const MCConstantExpr *CE =
+                   dyn_cast<MCConstantExpr>(getMemShiftAmount());
+        ShiftAmount = CE->getValue();
+      }
+
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount,
+                                           ShOpc, IdxMode)));
+      return;
+    }
+
+    // Create a operand placeholder to always yield the same number of operands.
+    Inst.addOperand(MCOperand::CreateReg(0));
+
+    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+    // the difference?
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode 2 offset operand!");
+    int64_t Offset = CE->getValue();
+
+    if (Offset >= 0)
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add,
+                                           Offset, ARM_AM::no_shift, IdxMode)));
+    else
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub,
+                                          -Offset, ARM_AM::no_shift, IdxMode)));
+  }
+
+  void addMemMode3Operands(MCInst &Inst, unsigned N) const {
+    assert(isMemMode3() && "Invalid mode or number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+    if (getMemOffsetIsReg()) {
+      Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+      ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0,
+                                                             IdxMode)));
+      return;
+    }
+
+    // Create a operand placeholder to always yield the same number of operands.
+    Inst.addOperand(MCOperand::CreateReg(0));
+
+    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+    // the difference?
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode 3 offset operand!");
+    int64_t Offset = CE->getValue();
+
+    if (Offset >= 0)
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add,
+                                           Offset, IdxMode)));
+    else
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub,
+                                           -Offset, IdxMode)));
+  }
+
+  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+
+    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+    // the difference?
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode 5 offset operand!");
+
+    // The MCInst offset operand doesn't include the low two bits (like
+    // the instruction encoding).
+    int64_t Offset = CE->getValue() / 4;
+    if (Offset >= 0)
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
+                                                             Offset)));
+    else
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
+                                                             -Offset)));
+  }
+
+  void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+  }
+
+  void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode offset operand!");
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+  }
+
+  void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+  }
+
+  void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CondCode);
+    Op->CC.Val = CC;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CoprocNum);
+    Op->Cop.Val = CopVal;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CoprocReg);
+    Op->Cop.Val = CopVal;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CCOut);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
+                                           unsigned SrcReg,
+                                           unsigned ShiftReg,
+                                           unsigned ShiftImm,
+                                           SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(ShiftedRegister);
+    Op->ShiftedReg.ShiftTy = ShTy;
+    Op->ShiftedReg.SrcReg = SrcReg;
+    Op->ShiftedReg.ShiftReg = ShiftReg;
+    Op->ShiftedReg.ShiftImm = ShiftImm;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
+                                   SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Shifter);
+    Op->Shift.ShiftTy = ShTy;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *
+  CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+                SMLoc StartLoc, SMLoc EndLoc) {
+    KindTy Kind = RegisterList;
+
+    if (ARM::DPRRegClass.contains(Regs.front().first))
+      Kind = DPRRegisterList;
+    else if (ARM::SPRRegClass.contains(Regs.front().first))
+      Kind = SPRRegisterList;
+
+    ARMOperand *Op = new ARMOperand(Kind);
+    for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+           I = Regs.begin(), E = Regs.end(); I != E; ++I)
+      Op->Registers.push_back(I->first);
+    array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+    Op->StartLoc = StartLoc;
+    Op->EndLoc = EndLoc;
+    return Op;
+  }
+
+  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum,
+                               bool OffsetIsReg, const MCExpr *Offset,
+                               int OffsetRegNum, bool OffsetRegShifted,
+                               enum ARM_AM::ShiftOpc ShiftType,
+                               const MCExpr *ShiftAmount, bool Preindexed,
+                               bool Postindexed, bool Negative, bool Writeback,
+                               SMLoc S, SMLoc E) {
+    assert((OffsetRegNum == -1 || OffsetIsReg) &&
+           "OffsetRegNum must imply OffsetIsReg!");
+    assert((!OffsetRegShifted || OffsetIsReg) &&
+           "OffsetRegShifted must imply OffsetIsReg!");
+    assert((Offset || OffsetIsReg) &&
+           "Offset must exists unless register offset is used!");
+    assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
+           "Cannot have shift amount without shifted register offset!");
+    assert((!Offset || !OffsetIsReg) &&
+           "Cannot have expression offset and register offset!");
+
+    ARMOperand *Op = new ARMOperand(Memory);
+    Op->Mem.AddrMode = AddrMode;
+    Op->Mem.BaseRegNum = BaseRegNum;
+    Op->Mem.OffsetIsReg = OffsetIsReg;
+    if (OffsetIsReg)
+      Op->Mem.Offset.RegNum = OffsetRegNum;
+    else
+      Op->Mem.Offset.Value = Offset;
+    Op->Mem.OffsetRegShifted = OffsetRegShifted;
+    Op->Mem.ShiftType = ShiftType;
+    Op->Mem.ShiftAmount = ShiftAmount;
+    Op->Mem.Preindexed = Preindexed;
+    Op->Mem.Postindexed = Postindexed;
+    Op->Mem.Negative = Negative;
+    Op->Mem.Writeback = Writeback;
+
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+    Op->MBOpt.Val = Opt;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(ProcIFlags);
+    Op->IFlags.Val = IFlags;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(MSRMask);
+    Op->MMask.Val = MMask;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void ARMOperand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case CondCode:
+    OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+    break;
+  case CCOut:
+    OS << "<ccout " << getReg() << ">";
+    break;
+  case CoprocNum:
+    OS << "<coprocessor number: " << getCoproc() << ">";
+    break;
+  case CoprocReg:
+    OS << "<coprocessor register: " << getCoproc() << ">";
+    break;
+  case MSRMask:
+    OS << "<mask: " << getMSRMask() << ">";
+    break;
+  case Immediate:
+    getImm()->print(OS);
+    break;
+  case MemBarrierOpt:
+    OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+    break;
+  case Memory:
+    OS << "<memory "
+       << "am:" << ARMII::AddrModeToString(getMemAddrMode())
+       << " base:" << getMemBaseRegNum();
+    if (getMemOffsetIsReg()) {
+      OS << " offset:<register " << getMemOffsetRegNum();
+      if (getMemOffsetRegShifted()) {
+        OS << " offset-shift-type:" << getMemShiftType();
+        OS << " offset-shift-amount:" << *getMemShiftAmount();
+      }
+    } else {
+      OS << " offset:" << *getMemOffset();
+    }
+    if (getMemOffsetIsReg())
+      OS << " (offset-is-reg)";
+    if (getMemPreindexed())
+      OS << " (pre-indexed)";
+    if (getMemPostindexed())
+      OS << " (post-indexed)";
+    if (getMemNegative())
+      OS << " (negative)";
+    if (getMemWriteback())
+      OS << " (writeback)";
+    OS << ">";
+    break;
+  case ProcIFlags: {
+    OS << "<ARM_PROC::";
+    unsigned IFlags = getProcIFlags();
+    for (int i=2; i >= 0; --i)
+      if (IFlags & (1 << i))
+        OS << ARM_PROC::IFlagsToString(1 << i);
+    OS << ">";
+    break;
+  }
+  case Register:
+    OS << "<register " << getReg() << ">";
+    break;
+  case Shifter:
+    OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">";
+    break;
+  case ShiftedRegister:
+    OS << "<so_reg"
+       << ShiftedReg.SrcReg
+       << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm))
+       << ", " << ShiftedReg.ShiftReg << ", "
+       << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm)
+       << ">";
+    break;
+  case RegisterList:
+  case DPRRegisterList:
+  case SPRRegisterList: {
+    OS << "<register_list ";
+
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ) {
+      OS << *I;
+      if (++I < E) OS << ", ";
+    }
+
+    OS << ">";
+    break;
+  }
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+                                 SMLoc &StartLoc, SMLoc &EndLoc) {
+  RegNo = TryParseRegister();
+
+  return (RegNo == (unsigned)-1);
+}
+
+/// Try to parse a register name.  The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned.  Otherwise return -1.
+///
+int ARMAsmParser::TryParseRegister() {
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  // FIXME: Validate register for the current architecture; we have to do
+  // validation later, so maybe there is no need for this here.
+  std::string upperCase = Tok.getString().str();
+  std::string lowerCase = LowercaseString(upperCase);
+  unsigned RegNum = MatchRegisterName(lowerCase);
+  if (!RegNum) {
+    RegNum = StringSwitch<unsigned>(lowerCase)
+      .Case("r13", ARM::SP)
+      .Case("r14", ARM::LR)
+      .Case("r15", ARM::PC)
+      .Case("ip", ARM::R12)
+      .Default(0);
+  }
+  if (!RegNum) return -1;
+
+  Parser.Lex(); // Eat identifier token.
+  return RegNum;
+}
+
+// Try to parse a shifter  (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::TryParseShiftRegister(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  std::string upperCase = Tok.getString().str();
+  std::string lowerCase = LowercaseString(upperCase);
+  ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+      .Case("lsl", ARM_AM::lsl)
+      .Case("lsr", ARM_AM::lsr)
+      .Case("asr", ARM_AM::asr)
+      .Case("ror", ARM_AM::ror)
+      .Case("rrx", ARM_AM::rrx)
+      .Default(ARM_AM::no_shift);
+
+  if (ShiftTy == ARM_AM::no_shift)
+    return 1;
+
+  Parser.Lex(); // Eat the operator.
+
+  // The source register for the shift has already been added to the
+  // operand list, so we need to pop it off and combine it into the shifted
+  // register operand instead.
+  OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+  if (!PrevOp->isReg())
+    return Error(PrevOp->getStartLoc(), "shift must be of a register");
+  int SrcReg = PrevOp->getReg();
+  int64_t Imm = 0;
+  int ShiftReg = 0;
+  if (ShiftTy == ARM_AM::rrx) {
+    // RRX Doesn't have an explicit shift amount. The encoder expects
+    // the shift register to be the same as the source register. Seems odd,
+    // but OK.
+    ShiftReg = SrcReg;
+  } else {
+    // Figure out if this is shifted by a constant or a register (for non-RRX).
+    if (Parser.getTok().is(AsmToken::Hash)) {
+      Parser.Lex(); // Eat hash.
+      SMLoc ImmLoc = Parser.getTok().getLoc();
+      const MCExpr *ShiftExpr = 0;
+      if (getParser().ParseExpression(ShiftExpr)) {
+        Error(ImmLoc, "invalid immediate shift value");
+        return -1;
+      }
+      // The expression must be evaluatable as an immediate.
+      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr);
+      if (!CE) {
+        Error(ImmLoc, "invalid immediate shift value");
+        return -1;
+      }
+      // Range check the immediate.
+      // lsl, ror: 0 <= imm <= 31
+      // lsr, asr: 0 <= imm <= 32
+      Imm = CE->getValue();
+      if (Imm < 0 ||
+          ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) ||
+          ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) {
+        Error(ImmLoc, "immediate shift value out of range");
+        return -1;
+      }
+    } else if (Parser.getTok().is(AsmToken::Identifier)) {
+      ShiftReg = TryParseRegister();
+      SMLoc L = Parser.getTok().getLoc();
+      if (ShiftReg == -1) {
+        Error (L, "expected immediate or register in shift operand");
+        return -1;
+      }
+    } else {
+      Error (Parser.getTok().getLoc(),
+                    "expected immediate or register in shift operand");
+      return -1;
+    }
+  }
+
+  Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+                                                       ShiftReg, Imm,
+                                               S, Parser.getTok().getLoc()));
+
+  return 0;
+}
+
+
+/// Try to parse a register name.  The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  int RegNo = TryParseRegister();
+  if (RegNo == -1)
+    return true;
+
+  Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+
+  const AsmToken &ExclaimTok = Parser.getTok();
+  if (ExclaimTok.is(AsmToken::Exclaim)) {
+    Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+                                               ExclaimTok.getLoc()));
+    Parser.Lex(); // Eat exclaim token
+  }
+
+  return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+  // Use the same layout as the tablegen'erated register name matcher. Ugly,
+  // but efficient.
+  switch (Name.size()) {
+  default: break;
+  case 2:
+    if (Name[0] != CoprocOp)
+      return -1;
+    switch (Name[1]) {
+    default:  return -1;
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    }
+    break;
+  case 3:
+    if (Name[0] != CoprocOp || Name[1] != '1')
+      return -1;
+    switch (Name[2]) {
+    default:  return -1;
+    case '0': return 10;
+    case '1': return 11;
+    case '2': return 12;
+    case '3': return 13;
+    case '4': return 14;
+    case '5': return 15;
+    }
+    break;
+  }
+
+  return -1;
+}
+
+/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+  if (Num == -1)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+  if (Reg == -1)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+  return MatchOperand_Success;
+}
+
+/// Parse a register list, return it if successful else return null.  The first
+/// token must be a '{' when called.
+bool ARMAsmParser::
+ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  assert(Parser.getTok().is(AsmToken::LCurly) &&
+         "Token is not a Left Curly Brace");
+  SMLoc S = Parser.getTok().getLoc();
+
+  // Read the rest of the registers in the list.
+  unsigned PrevRegNum = 0;
+  SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
+
+  do {
+    bool IsRange = Parser.getTok().is(AsmToken::Minus);
+    Parser.Lex(); // Eat non-identifier token.
+
+    const AsmToken &RegTok = Parser.getTok();
+    SMLoc RegLoc = RegTok.getLoc();
+    if (RegTok.isNot(AsmToken::Identifier)) {
+      Error(RegLoc, "register expected");
+      return true;
+    }
+
+    int RegNum = TryParseRegister();
+    if (RegNum == -1) {
+      Error(RegLoc, "register expected");
+      return true;
+    }
+
+    if (IsRange) {
+      int Reg = PrevRegNum;
+      do {
+        ++Reg;
+        Registers.push_back(std::make_pair(Reg, RegLoc));
+      } while (Reg != RegNum);
+    } else {
+      Registers.push_back(std::make_pair(RegNum, RegLoc));
+    }
+
+    PrevRegNum = RegNum;
+  } while (Parser.getTok().is(AsmToken::Comma) ||
+           Parser.getTok().is(AsmToken::Minus));
+
+  // Process the right curly brace of the list.
+  const AsmToken &RCurlyTok = Parser.getTok();
+  if (RCurlyTok.isNot(AsmToken::RCurly)) {
+    Error(RCurlyTok.getLoc(), "'}' expected");
+    return true;
+  }
+
+  SMLoc E = RCurlyTok.getLoc();
+  Parser.Lex(); // Eat right curly brace token.
+
+  // Verify the register list.
+  SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+    RI = Registers.begin(), RE = Registers.end();
+
+  unsigned HighRegNum = getARMRegisterNumbering(RI->first);
+  bool EmittedWarning = false;
+
+  DenseMap<unsigned, bool> RegMap;
+  RegMap[HighRegNum] = true;
+
+  for (++RI; RI != RE; ++RI) {
+    const std::pair<unsigned, SMLoc> &RegInfo = *RI;
+    unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+
+    if (RegMap[Reg]) {
+      Error(RegInfo.second, "register duplicated in register list");
+      return true;
+    }
+
+    if (!EmittedWarning && Reg < HighRegNum)
+      Warning(RegInfo.second,
+              "register not in ascending order in register list");
+
+    RegMap[Reg] = true;
+    HighRegNum = std::max(Reg, HighRegNum);
+  }
+
+  Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+  return false;
+}
+
+/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef OptStr = Tok.getString();
+
+  unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+    .Case("sy",    ARM_MB::SY)
+    .Case("st",    ARM_MB::ST)
+    .Case("sh",    ARM_MB::ISH)
+    .Case("ish",   ARM_MB::ISH)
+    .Case("shst",  ARM_MB::ISHST)
+    .Case("ishst", ARM_MB::ISHST)
+    .Case("nsh",   ARM_MB::NSH)
+    .Case("un",    ARM_MB::NSH)
+    .Case("nshst", ARM_MB::NSHST)
+    .Case("unst",  ARM_MB::NSHST)
+    .Case("osh",   ARM_MB::OSH)
+    .Case("oshst", ARM_MB::OSHST)
+    .Default(~0U);
+
+  if (Opt == ~0U)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef IFlagsStr = Tok.getString();
+
+  unsigned IFlags = 0;
+  for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+    unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+    .Case("a", ARM_PROC::A)
+    .Case("i", ARM_PROC::I)
+    .Case("f", ARM_PROC::F)
+    .Default(~0U);
+
+    // If some specific iflag is already set, it means that some letter is
+    // present more than once, this is not acceptable.
+    if (Flag == ~0U || (IFlags & Flag))
+      return MatchOperand_NoMatch;
+
+    IFlags |= Flag;
+  }
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef Mask = Tok.getString();
+
+  // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+  size_t Start = 0, Next = Mask.find('_');
+  StringRef Flags = "";
+  StringRef SpecReg = Mask.slice(Start, Next);
+  if (Next != StringRef::npos)
+    Flags = Mask.slice(Next+1, Mask.size());
+
+  // FlagsVal contains the complete mask:
+  // 3-0: Mask
+  // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+  unsigned FlagsVal = 0;
+
+  if (SpecReg == "apsr") {
+    FlagsVal = StringSwitch<unsigned>(Flags)
+    .Case("nzcvq",  0x8) // same as CPSR_c
+    .Case("g",      0x4) // same as CPSR_s
+    .Case("nzcvqg", 0xc) // same as CPSR_fs
+    .Default(~0U);
+
+    if (FlagsVal == ~0U) {
+      if (!Flags.empty())
+        return MatchOperand_NoMatch;
+      else
+        FlagsVal = 0; // No flag
+    }
+  } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+    if (Flags == "all") // cpsr_all is an alias for cpsr_fc
+      Flags = "fc";
+    for (int i = 0, e = Flags.size(); i != e; ++i) {
+      unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+      .Case("c", 1)
+      .Case("x", 2)
+      .Case("s", 4)
+      .Case("f", 8)
+      .Default(~0U);
+
+      // If some specific flag is already set, it means that some letter is
+      // present more than once, this is not acceptable.
+      if (FlagsVal == ~0U || (FlagsVal & Flag))
+        return MatchOperand_NoMatch;
+      FlagsVal |= Flag;
+    }
+  } else // No match for special register.
+    return MatchOperand_NoMatch;
+
+  // Special register without flags are equivalent to "fc" flags.
+  if (!FlagsVal)
+    FlagsVal = 0x9;
+
+  // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+  if (SpecReg == "spsr")
+    FlagsVal |= 16;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+  if (ParseMemory(Operands, ARMII::AddrMode2))
+    return MatchOperand_NoMatch;
+
+  return MatchOperand_Success;
+}
+
+/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+  if (ParseMemory(Operands, ARMII::AddrMode3))
+    return MatchOperand_NoMatch;
+
+  return MatchOperand_Success;
+}
+
+/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+
+  ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+
+  ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
+/// or an error.  The first token must be a '[' when called.
+///
+/// TODO Only preindexing and postindexing addressing are started, unindexed
+/// with option, etc are still to do.
+bool ARMAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+            ARMII::AddrMode AddrMode = ARMII::AddrModeNone) {
+  SMLoc S, E;
+  assert(Parser.getTok().is(AsmToken::LBrac) &&
+         "Token is not a Left Bracket");
+  S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat left bracket token.
+
+  const AsmToken &BaseRegTok = Parser.getTok();
+  if (BaseRegTok.isNot(AsmToken::Identifier)) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return true;
+  }
+  int BaseRegNum = TryParseRegister();
+  if (BaseRegNum == -1) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return true;
+  }
+
+  // The next token must either be a comma or a closing bracket.
+  const AsmToken &Tok = Parser.getTok();
+  if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+    return true;
+
+  bool Preindexed = false;
+  bool Postindexed = false;
+  bool OffsetIsReg = false;
+  bool Negative = false;
+  bool Writeback = false;
+  ARMOperand *WBOp = 0;
+  int OffsetRegNum = -1;
+  bool OffsetRegShifted = false;
+  enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl;
+  const MCExpr *ShiftAmount = 0;
+  const MCExpr *Offset = 0;
+
+  // First look for preindexed address forms, that is after the "[Rn" we now
+  // have to see if the next token is a comma.
+  if (Tok.is(AsmToken::Comma)) {
+    Preindexed = true;
+    Parser.Lex(); // Eat comma token.
+
+    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
+                             Offset, OffsetIsReg, OffsetRegNum, E))
+      return true;
+    const AsmToken &RBracTok = Parser.getTok();
+    if (RBracTok.isNot(AsmToken::RBrac)) {
+      Error(RBracTok.getLoc(), "']' expected");
+      return true;
+    }
+    E = RBracTok.getLoc();
+    Parser.Lex(); // Eat right bracket token.
+
+    const AsmToken &ExclaimTok = Parser.getTok();
+    if (ExclaimTok.is(AsmToken::Exclaim)) {
+      // None of addrmode3 instruction uses "!"
+      if (AddrMode == ARMII::AddrMode3)
+        return true;
+
+      WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
+                                     ExclaimTok.getLoc());
+      Writeback = true;
+      Parser.Lex(); // Eat exclaim token
+    } else { // In addressing mode 2, pre-indexed mode always end with "!"
+      if (AddrMode == ARMII::AddrMode2)
+        Preindexed = false;
+    }
+  } else {
+    // The "[Rn" we have so far was not followed by a comma.
+
+    // If there's anything other than the right brace, this is a post indexing
+    // addressing form.
+    E = Tok.getLoc();
+    Parser.Lex(); // Eat right bracket token.
+
+    const AsmToken &NextTok = Parser.getTok();
+
+    if (NextTok.isNot(AsmToken::EndOfStatement)) {
+      Postindexed = true;
+      Writeback = true;
+
+      if (NextTok.isNot(AsmToken::Comma)) {
+        Error(NextTok.getLoc(), "',' expected");
+        return true;
+      }
+
+      Parser.Lex(); // Eat comma token.
+
+      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
+                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
+                               E))
+        return true;
+    }
+  }
+
+  // Force Offset to exist if used.
+  if (!OffsetIsReg) {
+    if (!Offset)
+      Offset = MCConstantExpr::Create(0, getContext());
+  } else {
+    if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) {
+      Error(E, "shift amount not supported");
+      return true;
+    }
+  }
+
+  Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg,
+                                     Offset, OffsetRegNum, OffsetRegShifted,
+                                     ShiftType, ShiftAmount, Preindexed,
+                                     Postindexed, Negative, Writeback, S, E));
+  if (WBOp)
+    Operands.push_back(WBOp);
+
+  return false;
+}
+
+/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
+/// we will parse the following (were +/- means that a plus or minus is
+/// optional):
+///   +/-Rm
+///   +/-Rm, shift
+///   #offset
+/// we return false on success or an error otherwise.
+bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
+                                        bool &OffsetRegShifted,
+                                        enum ARM_AM::ShiftOpc &ShiftType,
+                                        const MCExpr *&ShiftAmount,
+                                        const MCExpr *&Offset,
+                                        bool &OffsetIsReg,
+                                        int &OffsetRegNum,
+                                        SMLoc &E) {
+  Negative = false;
+  OffsetRegShifted = false;
+  OffsetIsReg = false;
+  OffsetRegNum = -1;
+  const AsmToken &NextTok = Parser.getTok();
+  E = NextTok.getLoc();
+  if (NextTok.is(AsmToken::Plus))
+    Parser.Lex(); // Eat plus token.
+  else if (NextTok.is(AsmToken::Minus)) {
+    Negative = true;
+    Parser.Lex(); // Eat minus token
+  }
+  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
+  const AsmToken &OffsetRegTok = Parser.getTok();
+  if (OffsetRegTok.is(AsmToken::Identifier)) {
+    SMLoc CurLoc = OffsetRegTok.getLoc();
+    OffsetRegNum = TryParseRegister();
+    if (OffsetRegNum != -1) {
+      OffsetIsReg = true;
+      E = CurLoc;
+    }
+  }
+
+  // If we parsed a register as the offset then there can be a shift after that.
+  if (OffsetRegNum != -1) {
+    // Look for a comma then a shift
+    const AsmToken &Tok = Parser.getTok();
+    if (Tok.is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat comma token.
+
+      const AsmToken &Tok = Parser.getTok();
+      if (ParseShift(ShiftType, ShiftAmount, E))
+        return Error(Tok.getLoc(), "shift expected");
+      OffsetRegShifted = true;
+    }
+  }
+  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
+    // Look for #offset following the "[Rn," or "[Rn],"
+    const AsmToken &HashTok = Parser.getTok();
+    if (HashTok.isNot(AsmToken::Hash))
+      return Error(HashTok.getLoc(), "'#' expected");
+
+    Parser.Lex(); // Eat hash token.
+
+    if (getParser().ParseExpression(Offset))
+     return true;
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  }
+  return false;
+}
+
+/// ParseShift as one of these two:
+///   ( lsl | lsr | asr | ror ) , # shift_amount
+///   rrx
+/// and returns true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
+                              const MCExpr *&ShiftAmount, SMLoc &E) {
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return true;
+  StringRef ShiftName = Tok.getString();
+  if (ShiftName == "lsl" || ShiftName == "LSL")
+    St = ARM_AM::lsl;
+  else if (ShiftName == "lsr" || ShiftName == "LSR")
+    St = ARM_AM::lsr;
+  else if (ShiftName == "asr" || ShiftName == "ASR")
+    St = ARM_AM::asr;
+  else if (ShiftName == "ror" || ShiftName == "ROR")
+    St = ARM_AM::ror;
+  else if (ShiftName == "rrx" || ShiftName == "RRX")
+    St = ARM_AM::rrx;
+  else
+    return true;
+  Parser.Lex(); // Eat shift type token.
+
+  // Rrx stands alone.
+  if (St == ARM_AM::rrx)
+    return false;
+
+  // Otherwise, there must be a '#' and a shift amount.
+  const AsmToken &HashTok = Parser.getTok();
+  if (HashTok.isNot(AsmToken::Hash))
+    return Error(HashTok.getLoc(), "'#' expected");
+  Parser.Lex(); // Eat hash token.
+
+  if (getParser().ParseExpression(ShiftAmount))
+    return true;
+
+  return false;
+}
+
+/// Parse a arm instruction operand.  For now this parses the operand regardless
+/// of the mnemonic.
+bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                StringRef Mnemonic) {
+  SMLoc S, E;
+
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  if (ResTy == MatchOperand_Success)
+    return false;
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_ParseFail)
+    return true;
+
+  switch (getLexer().getKind()) {
+  default:
+    Error(Parser.getTok().getLoc(), "unexpected token in operand");
+    return true;
+  case AsmToken::Identifier: {
+    if (!TryParseRegisterWithWriteBack(Operands))
+      return false;
+    int Res = TryParseShiftRegister(Operands);
+    if (Res == 0) // success
+      return false;
+    else if (Res == -1) // irrecoverable error
+      return true;
+
+    // Fall though for the Identifier case that is not a register or a
+    // special name.
+  }
+  case AsmToken::Integer: // things like 1f and 2b as a branch targets
+  case AsmToken::Dot: {   // . as a branch target
+    // This was not a register so parse other operands that start with an
+    // identifier (like labels) as expressions and create them as immediates.
+    const MCExpr *IdVal;
+    S = Parser.getTok().getLoc();
+    if (getParser().ParseExpression(IdVal))
+      return true;
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
+    return false;
+  }
+  case AsmToken::LBrac:
+    return ParseMemory(Operands);
+  case AsmToken::LCurly:
+    return ParseRegisterList(Operands);
+  case AsmToken::Hash:
+    // #42 -> immediate.
+    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
+    S = Parser.getTok().getLoc();
+    Parser.Lex();
+    const MCExpr *ImmVal;
+    if (getParser().ParseExpression(ImmVal))
+      return true;
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+    return false;
+  case AsmToken::Colon: {
+    // ":lower16:" and ":upper16:" expression prefixes
+    // FIXME: Check it's an expression prefix,
+    // e.g. (FOO - :lower16:BAR) isn't legal.
+    ARMMCExpr::VariantKind RefKind;
+    if (ParsePrefix(RefKind))
+      return true;
+
+    const MCExpr *SubExprVal;
+    if (getParser().ParseExpression(SubExprVal))
+      return true;
+
+    const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+                                                   getContext());
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+    return false;
+  }
+  }
+}
+
+// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+//  :lower16: and :upper16:.
+bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+  RefKind = ARMMCExpr::VK_ARM_None;
+
+  // :lower16: and :upper16: modifiers
+  assert(getLexer().is(AsmToken::Colon) && "expected a :");
+  Parser.Lex(); // Eat ':'
+
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+    return true;
+  }
+
+  StringRef IDVal = Parser.getTok().getIdentifier();
+  if (IDVal == "lower16") {
+    RefKind = ARMMCExpr::VK_ARM_LO16;
+  } else if (IDVal == "upper16") {
+    RefKind = ARMMCExpr::VK_ARM_HI16;
+  } else {
+    Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+    return true;
+  }
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::Colon)) {
+    Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+    return true;
+  }
+  Parser.Lex(); // Eat the last ':'
+  return false;
+}
+
+const MCExpr *
+ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
+                                MCSymbolRefExpr::VariantKind Variant) {
+  // Recurse over the given expression, rebuilding it to apply the given variant
+  // to the leftmost symbol.
+  if (Variant == MCSymbolRefExpr::VK_None)
+    return E;
+
+  switch (E->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle target expr yet");
+  case MCExpr::Constant:
+    llvm_unreachable("Can't handle lower16/upper16 of constant yet");
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+    if (SRE->getKind() != MCSymbolRefExpr::VK_None)
+      return 0;
+
+    return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+  }
+
+  case MCExpr::Unary:
+    llvm_unreachable("Can't handle unary expressions yet");
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
+    const MCExpr *RHS = BE->getRHS();
+    if (!LHS)
+      return 0;
+
+    return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+  return 0;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+static StringRef SplitMnemonic(StringRef Mnemonic,
+                               unsigned &PredicationCode,
+                               bool &CarrySetting,
+                               unsigned &ProcessorIMod) {
+  PredicationCode = ARMCC::AL;
+  CarrySetting = false;
+  ProcessorIMod = 0;
+
+  // Ignore some mnemonics we know aren't predicated forms.
+  //
+  // FIXME: Would be nice to autogen this.
+  if (Mnemonic == "teq" || Mnemonic == "vceq" ||
+      Mnemonic == "movs" ||
+      Mnemonic == "svc" ||
+      (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+       Mnemonic == "vmls" || Mnemonic == "vnmls") ||
+      Mnemonic == "vacge" || Mnemonic == "vcge" ||
+      Mnemonic == "vclt" ||
+      Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
+      Mnemonic == "vcle" ||
+      (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
+       Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
+       Mnemonic == "vqdmlal" || Mnemonic == "bics"))
+    return Mnemonic;
+
+  // First, split out any predication code. Ignore mnemonics we know aren't
+  // predicated but do have a carry-set and so weren't caught above.
+  if (Mnemonic != "adcs") {
+    unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+      .Case("eq", ARMCC::EQ)
+      .Case("ne", ARMCC::NE)
+      .Case("hs", ARMCC::HS)
+      .Case("cs", ARMCC::HS)
+      .Case("lo", ARMCC::LO)
+      .Case("cc", ARMCC::LO)
+      .Case("mi", ARMCC::MI)
+      .Case("pl", ARMCC::PL)
+      .Case("vs", ARMCC::VS)
+      .Case("vc", ARMCC::VC)
+      .Case("hi", ARMCC::HI)
+      .Case("ls", ARMCC::LS)
+      .Case("ge", ARMCC::GE)
+      .Case("lt", ARMCC::LT)
+      .Case("gt", ARMCC::GT)
+      .Case("le", ARMCC::LE)
+      .Case("al", ARMCC::AL)
+      .Default(~0U);
+    if (CC != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+      PredicationCode = CC;
+    }
+  }
+
+  // Next, determine if we have a carry setting bit. We explicitly ignore all
+  // the instructions we know end in 's'.
+  if (Mnemonic.endswith("s") &&
+      !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
+        Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
+        Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
+        Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
+        Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+    Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+    CarrySetting = true;
+  }
+
+  // The "cps" instruction can have a interrupt mode operand which is glued into
+  // the mnemonic. Check if this is the case, split it and parse the imod op
+  if (Mnemonic.startswith("cps")) {
+    // Split out any imod code.
+    unsigned IMod =
+      StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+      .Case("ie", ARM_PROC::IE)
+      .Case("id", ARM_PROC::ID)
+      .Default(~0U);
+    if (IMod != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+      ProcessorIMod = IMod;
+    }
+  }
+
+  return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+                      bool &CanAcceptPredicationCode) {
+  if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+      Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+      Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+      Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+      Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mvn" ||
+      Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+      Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
+      Mnemonic == "eor" || Mnemonic == "smlal" ||
+      (Mnemonic == "mov" && !isThumbOne())) {
+    CanAcceptCarrySet = true;
+  } else {
+    CanAcceptCarrySet = false;
+  }
+
+  if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+      Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+      Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+      Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+      Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
+      Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+    CanAcceptPredicationCode = false;
+  } else {
+    CanAcceptPredicationCode = true;
+  }
+
+  if (isThumb())
+    if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+        Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+      CanAcceptPredicationCode = false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create the leading tokens for the mnemonic, split by '.' characters.
+  size_t Start = 0, Next = Name.find('.');
+  StringRef Head = Name.slice(Start, Next);
+
+  // Split out the predication code and carry setting flag from the mnemonic.
+  unsigned PredicationCode;
+  unsigned ProcessorIMod;
+  bool CarrySetting;
+  Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
+                       ProcessorIMod);
+
+  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+
+  // Next, add the CCOut and ConditionCode operands, if needed.
+  //
+  // For mnemonics which can ever incorporate a carry setting bit or predication
+  // code, our matching model involves us always generating CCOut and
+  // ConditionCode operands to match the mnemonic "as written" and then we let
+  // the matcher deal with finding the right instruction or generating an
+  // appropriate error.
+  bool CanAcceptCarrySet, CanAcceptPredicationCode;
+  GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+  // If we had a carry-set on an instruction that can't do that, issue an
+  // error.
+  if (!CanAcceptCarrySet && CarrySetting) {
+    Parser.EatToEndOfStatement();
+    return Error(NameLoc, "instruction '" + Head +
+                 "' can not set flags, but 's' suffix specified");
+  }
+
+  // Add the carry setting operand, if necessary.
+  //
+  // FIXME: It would be awesome if we could somehow invent a location such that
+  // match errors on this operand would print a nice diagnostic about how the
+  // 's' character in the mnemonic resulted in a CCOut operand.
+  if (CanAcceptCarrySet)
+    Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+                                               NameLoc));
+
+  // Add the predication code operand, if necessary.
+  if (CanAcceptPredicationCode) {
+    Operands.push_back(ARMOperand::CreateCondCode(
+                         ARMCC::CondCodes(PredicationCode), NameLoc));
+  } else {
+    // This mnemonic can't ever accept a predication code, but the user wrote
+    // one (or misspelled another mnemonic).
+
+    // FIXME: Issue a nice error.
+  }
+
+  // Add the processor imod operand, if necessary.
+  if (ProcessorIMod) {
+    Operands.push_back(ARMOperand::CreateImm(
+          MCConstantExpr::Create(ProcessorIMod, getContext()),
+                                 NameLoc, NameLoc));
+  } else {
+    // This mnemonic can't ever accept a imod, but the user wrote
+    // one (or misspelled another mnemonic).
+
+    // FIXME: Issue a nice error.
+  }
+
+  // Add the remaining tokens in the mnemonic.
+  while (Next != StringRef::npos) {
+    Start = Next;
+    Next = Name.find('.', Start + 1);
+    StringRef ExtraToken = Name.slice(Start, Next);
+
+    Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
+  }
+
+  // Read the remaining operands.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (ParseOperand(Operands, Head)) {
+      Parser.EatToEndOfStatement();
+      return true;
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      if (ParseOperand(Operands, Head)) {
+        Parser.EatToEndOfStatement();
+        return true;
+      }
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    Parser.EatToEndOfStatement();
+    return TokError("unexpected token in argument list");
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement
+  return false;
+}
+
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  MCInst Inst;
+  unsigned ErrorInfo;
+  MatchResultTy MatchResult, MatchResult2;
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+  if (MatchResult != Match_Success) {
+    // If we get a Match_InvalidOperand it might be some arithmetic instruction
+    // that does not update the condition codes.  So try adding a CCOut operand
+    // with a value of reg0.
+    if (MatchResult == Match_InvalidOperand) {
+      Operands.insert(Operands.begin() + 1,
+                      ARMOperand::CreateCCOut(0,
+                                  ((ARMOperand*)Operands[0])->getStartLoc()));
+      MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+      if (MatchResult2 == Match_Success)
+        MatchResult = Match_Success;
+      else {
+        ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+        Operands.erase(Operands.begin() + 1);
+        delete CCOut;
+      }
+    }
+    // If we get a Match_MnemonicFail it might be some arithmetic instruction
+    // that updates the condition codes if it ends in 's'.  So see if the
+    // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
+    // operand with a value of CPSR.
+    else if (MatchResult == Match_MnemonicFail) {
+      // Get the instruction mnemonic, which is the first token.
+      StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
+      if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
+        // removed the 's' from the mnemonic for matching.
+        StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
+        SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
+        ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+        Operands.erase(Operands.begin());
+        delete OldMnemonic;
+        Operands.insert(Operands.begin(),
+                        ARMOperand::CreateToken(MnemonicNoS, NameLoc));
+        Operands.insert(Operands.begin() + 1,
+                        ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
+        MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+        if (MatchResult2 == Match_Success)
+          MatchResult = Match_Success;
+        else {
+          ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+          Operands.erase(Operands.begin());
+          delete OldMnemonic;
+          Operands.insert(Operands.begin(),
+                          ARMOperand::CreateToken(Mnemonic, NameLoc));
+          ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+          Operands.erase(Operands.begin() + 1);
+          delete CCOut;
+        }
+      }
+    }
+  }
+  switch (MatchResult) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  case Match_MnemonicFail:
+    return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+/// ParseDirective parses the arm specific directives
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  else if (IDVal == ".thumb")
+    return ParseDirectiveThumb(DirectiveID.getLoc());
+  else if (IDVal == ".thumb_func")
+    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
+  else if (IDVal == ".code")
+    return ParseDirectiveCode(DirectiveID.getLoc());
+  else if (IDVal == ".syntax")
+    return ParseDirectiveSyntax(DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+/// ParseDirectiveThumb
+///  ::= .thumb
+bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  // TODO: set thumb mode
+  // TODO: tell the MC streamer the mode
+  // getParser().getStreamer().Emit???();
+  return false;
+}
+
+/// ParseDirectiveThumbFunc
+///  ::= .thumbfunc symbol_name
+bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
+  const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
+  bool isMachO = MAI.hasSubsectionsViaSymbols();
+  StringRef Name;
+
+  // Darwin asm has function name after .thumb_func direction
+  // ELF doesn't
+  if (isMachO) {
+    const AsmToken &Tok = Parser.getTok();
+    if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+      return Error(L, "unexpected token in .thumb_func directive");
+    Name = Tok.getString();
+    Parser.Lex(); // Consume the identifier token.
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  // FIXME: assuming function name will be the line following .thumb_func
+  if (!isMachO) {
+    Name = Parser.getTok().getString();
+  }
+
+  // Mark symbol as a thumb symbol.
+  MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+  getParser().getStreamer().EmitThumbFunc(Func);
+  return false;
+}
+
+/// ParseDirectiveSyntax
+///  ::= .syntax unified | divided
+bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return Error(L, "unexpected token in .syntax directive");
+  StringRef Mode = Tok.getString();
+  if (Mode == "unified" || Mode == "UNIFIED")
+    Parser.Lex();
+  else if (Mode == "divided" || Mode == "DIVIDED")
+    return Error(L, "'.syntax divided' arm asssembly not supported");
+  else
+    return Error(L, "unrecognized syntax mode in .syntax directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+  Parser.Lex();
+
+  // TODO tell the MC streamer the mode
+  // getParser().getStreamer().Emit???();
+  return false;
+}
+
+/// ParseDirectiveCode
+///  ::= .code 16 | 32
+bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Integer))
+    return Error(L, "unexpected token in .code directive");
+  int64_t Val = Parser.getTok().getIntVal();
+  if (Val == 16)
+    Parser.Lex();
+  else if (Val == 32)
+    Parser.Lex();
+  else
+    return Error(L, "invalid operand to .code directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+  Parser.Lex();
+
+  if (Val == 16) {
+    if (!isThumb())
+      SwitchMode();
+    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+  } else {
+    if (isThumb())
+      SwitchMode();
+    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+  }
+
+  return false;
+}
+
+extern "C" void LLVMInitializeARMAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
+  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+  LLVMInitializeARMAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 000000000000..bdce2c4cf896
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,589 @@
+//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to implement the public interfaces of ARMDisassembler and
+// ThumbDisassembler, both of which are instances of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassembler.h"
+#include "ARMDisassemblerCore.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
+/// ARMDecoderEmitter.cpp TableGen backend.  It contains:
+///
+/// o Mappings from opcode to ARM/Thumb instruction format
+///
+/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
+/// for an ARM instruction.
+///
+/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
+/// function for a Thumb instruction.
+///
+#include "ARMGenDecoderTables.inc"
+
+#include "ARMGenEDInfo.inc"
+
+using namespace llvm;
+
+/// showBitVector - Use the raw_ostream to log a diagnostic message describing
+/// the inidividual bits of the instruction.
+///
+static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
+  // Split the bit position markers into more than one lines to fit 80 columns.
+  os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
+     << " 10  9  8  7  6  5  4  3  2  1  0 \n";
+  os << "---------------------------------------------------------------"
+     << "----------------------------------\n";
+  os << '|';
+  for (unsigned i = 32; i != 0; --i) {
+    if (insn >> (i - 1) & 0x01)
+      os << " 1";
+    else
+      os << " 0";
+    os << (i%4 == 1 ? '|' : ':');
+  }
+  os << '\n';
+  // Split the bit position markers into more than one lines to fit 80 columns.
+  os << "---------------------------------------------------------------"
+     << "----------------------------------\n";
+  os << '\n';
+}
+
+/// decodeARMInstruction is a decorator function which tries special cases of
+/// instruction matching before calling the auto-generated decoder function.
+static unsigned decodeARMInstruction(uint32_t &insn) {
+  if (slice(insn, 31, 28) == 15)
+    goto AutoGenedDecoder;
+
+  // Special case processing, if any, goes here....
+
+  // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
+  // The insufficient encoding information of the combined instruction confuses
+  // the decoder wrt BFC/BFI.  Therefore, we try to recover here.
+  // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
+  // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
+  if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
+    if (slice(insn, 3, 0) == 15)
+      return ARM::BFC;
+    else
+      return ARM::BFI;
+  }
+
+  // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
+  // A1 & A2.
+  // As a result, the decoder fails to deocode USAT properly.
+  if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+    return ARM::USAT;
+  // As a result, the decoder fails to deocode UQADD16 properly.
+  if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1)
+    return ARM::UQADD16;
+
+  // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
+  // As a result, the decoder fails to decode UMULL properly.
+  if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
+    return ARM::UMULL;
+  }
+
+  // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
+  // As a result, the decoder fails to decode SBFX properly.
+  if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
+    return ARM::SBFX;
+
+  // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
+  // As a result, the decoder fails to decode UBFX properly.
+  if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
+    return ARM::UBFX;
+
+  // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
+  // As a result, the decoder fails to deocode SSAT properly.
+  if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
+    return ARM::SSAT;
+
+  // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
+  // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
+  if (slice(insn, 27, 24) == 0) {
+    switch (slice(insn, 21, 20)) {
+    case 2:
+      switch (slice(insn, 7, 4)) {
+      case 11:
+        return ARM::STRHT;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 3:
+      switch (slice(insn, 7, 4)) {
+      case 11:
+        return ARM::LDRHT;
+      case 13:
+        return ARM::LDRSBT;
+      case 15:
+        return ARM::LDRSHT;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break;   // fallthrough
+    }
+  }
+
+  // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
+  // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
+  // properly.
+  if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
+    unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+    switch (slice(insn, 7, 4)) {
+    case 11:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::STRH;
+      case 3: // Pre-indexed
+        return ARM::STRH_PRE;
+      case 0: // Post-indexed
+        return ARM::STRH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 13:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRD;
+      case 3: // Pre-indexed
+        return ARM::LDRD_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRD_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 15:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::STRD;
+      case 3: // Pre-indexed
+        return ARM::STRD_PRE;
+      case 0: // Post-indexed
+        return ARM::STRD_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break; // fallthrough
+    }
+  }
+
+  // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
+  // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
+  // properly.
+  if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
+    unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+    switch (slice(insn, 7, 4)) {
+    case 11:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRH;
+      case 3: // Pre-indexed
+        return ARM::LDRH_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 13:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRSB;
+      case 3: // Pre-indexed
+        return ARM::LDRSB_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRSB_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 15:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRSH;
+      case 3: // Pre-indexed
+        return ARM::LDRSH_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRSH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break; // fallthrough
+    }
+  }
+
+AutoGenedDecoder:
+  // Calling the auto-generated decoder function.
+  return decodeInstruction(insn);
+}
+
+// Helper function for special case handling of LDR (literal) and friends.
+// See, for example, A6.3.7 Load word: Table A6-18 Load word.
+// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return Opcode; // Return unmorphed opcode.
+
+  case ARM::t2LDR_POST:   case ARM::t2LDR_PRE:
+  case ARM::t2LDRi12:     case ARM::t2LDRi8:
+  case ARM::t2LDRs:       case ARM::t2LDRT:
+    return ARM::t2LDRpci;
+
+  case ARM::t2LDRB_POST:  case ARM::t2LDRB_PRE:
+  case ARM::t2LDRBi12:    case ARM::t2LDRBi8:
+  case ARM::t2LDRBs:      case ARM::t2LDRBT:
+    return ARM::t2LDRBpci;
+
+  case ARM::t2LDRH_POST:  case ARM::t2LDRH_PRE:
+  case ARM::t2LDRHi12:    case ARM::t2LDRHi8:
+  case ARM::t2LDRHs:      case ARM::t2LDRHT:
+    return ARM::t2LDRHpci;
+
+  case ARM::t2LDRSB_POST:  case ARM::t2LDRSB_PRE:
+  case ARM::t2LDRSBi12:    case ARM::t2LDRSBi8:
+  case ARM::t2LDRSBs:      case ARM::t2LDRSBT:
+    return ARM::t2LDRSBpci;
+
+  case ARM::t2LDRSH_POST:  case ARM::t2LDRSH_PRE:
+  case ARM::t2LDRSHi12:    case ARM::t2LDRSHi8:
+  case ARM::t2LDRSHs:      case ARM::t2LDRSHT:
+    return ARM::t2LDRSHpci;
+  }
+}
+
+// Helper function for special case handling of PLD (literal) and friends.
+// See A8.6.117 T1 & T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2PLDLiteral(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return Opcode; // Return unmorphed opcode.
+
+  case ARM::t2PLDi8:   case ARM::t2PLDs:
+  case ARM::t2PLDWi12: case ARM::t2PLDWi8:
+  case ARM::t2PLDWs:
+    return ARM::t2PLDi12;
+
+  case ARM::t2PLIi8:   case ARM::t2PLIs:
+    return ARM::t2PLIi12;
+  }
+}
+
+/// decodeThumbSideEffect is a decorator function which can potentially twiddle
+/// the instruction or morph the returned opcode under Thumb2.
+///
+/// First it checks whether the insn is a NEON or VFP instr; if true, bit
+/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
+/// equivalent instruction and decodeInstruction is called with the transformed
+/// insn.
+///
+/// Next, there is special handling for Load byte/halfword/word instruction by
+/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
+/// Thumb2 instruction.  See comments below for further details.
+///
+/// Finally, one last check is made to see whether the insn is a NEON/VFP and
+/// decodeInstruction(insn) is invoked on the original insn.
+///
+/// Otherwise, decodeThumbInstruction is called with the original insn.
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
+  if (IsThumb2) {
+    uint16_t op1 = slice(insn, 28, 27);
+    uint16_t op2 = slice(insn, 26, 20);
+
+    // A6.3 32-bit Thumb instruction encoding
+    // Table A6-9 32-bit Thumb instruction encoding
+
+    // The coprocessor instructions of interest are transformed to their ARM
+    // equivalents.
+
+    // --------- Transform Begin Marker ---------
+    if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
+      // A7.4 Advanced SIMD data-processing instructions
+      // U bit of Thumb corresponds to Inst{24} of ARM.
+      uint16_t U = slice(op1, 1, 1);
+
+      // Inst{28-24} of ARM = {1,0,0,1,U};
+      uint16_t bits28_24 = 9 << 1 | U;
+      DEBUG(showBitVector(errs(), insn));
+      setSlice(insn, 28, 24, bits28_24);
+      return decodeInstruction(insn);
+    }
+
+    if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
+      // A7.7 Advanced SIMD element or structure load/store instructions
+      // Inst{27-24} of Thumb = 0b1001
+      // Inst{27-24} of ARM   = 0b0100
+      DEBUG(showBitVector(errs(), insn));
+      setSlice(insn, 27, 24, 4);
+      return decodeInstruction(insn);
+    }
+    // --------- Transform End Marker ---------
+
+    unsigned unmorphed = decodeThumbInstruction(insn);
+
+    // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+    // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+    // before returning it to our caller.
+    if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
+        && slice(insn, 19, 16) == 15) {
+      unsigned morphed = T2Morph2LoadLiteral(unmorphed);
+      if (morphed != unmorphed)
+        return morphed;
+    }
+
+    // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for
+    // why we morphed the opcode before returning it to our caller.
+    if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF
+        && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1
+        && slice(insn, 19, 16) == 15) {
+      unsigned morphed = T2Morph2PLDLiteral(unmorphed);
+      if (morphed != unmorphed)
+        return morphed;
+    }
+
+    // One last check for NEON/VFP instructions.
+    if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
+      return decodeInstruction(insn);
+
+    // Fall through.
+  }
+
+  return decodeThumbInstruction(insn);
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool ARMDisassembler::getInstruction(MCInst &MI,
+                                     uint64_t &Size,
+                                     const MemoryObject &Region,
+                                     uint64_t Address,
+                                     raw_ostream &os) const {
+  // The machine instruction.
+  uint32_t insn;
+  uint8_t bytes[4];
+
+  // We want to read exactly 4 bytes of data.
+  if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1)
+    return false;
+
+  // Encoded as a small-endian 32-bit word in the stream.
+  insn = (bytes[3] << 24) |
+         (bytes[2] << 16) |
+         (bytes[1] <<  8) |
+         (bytes[0] <<  0);
+
+  unsigned Opcode = decodeARMInstruction(insn);
+  ARMFormat Format = ARMFormats[Opcode];
+  Size = 4;
+
+  DEBUG({
+      errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode)
+             << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+             << ")\n";
+      showBitVector(errs(), insn);
+    });
+
+  OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
+  if (!Builder)
+    return false;
+
+  Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+                                              getDisInfoBlock(), getMCContext(),
+                                              Address);
+
+  if (!Builder->Build(MI, insn))
+    return false;
+
+  return true;
+}
+
+bool ThumbDisassembler::getInstruction(MCInst &MI,
+                                       uint64_t &Size,
+                                       const MemoryObject &Region,
+                                       uint64_t Address,
+                                       raw_ostream &os) const {
+  // The Thumb instruction stream is a sequence of halfwords.
+
+  // This represents the first halfword as well as the machine instruction
+  // passed to decodeThumbInstruction().  For 16-bit Thumb instruction, the top
+  // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
+  // the top half followed by the second halfword.
+  unsigned insn = 0;
+  // Possible second halfword.
+  uint16_t insn1 = 0;
+
+  // A6.1 Thumb instruction set encoding
+  //
+  // If bits [15:11] of the halfword being decoded take any of the following
+  // values, the halfword is the first halfword of a 32-bit instruction:
+  // o 0b11101
+  // o 0b11110
+  // o 0b11111.
+  //
+  // Otherwise, the halfword is a 16-bit instruction.
+
+  // Read 2 bytes of data first.
+  uint8_t bytes[2];
+  if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1)
+    return false;
+
+  // Encoded as a small-endian 16-bit halfword in the stream.
+  insn = (bytes[1] << 8) | bytes[0];
+  unsigned bits15_11 = slice(insn, 15, 11);
+  bool IsThumb2 = false;
+
+  // 32-bit instructions if the bits [15:11] of the halfword matches
+  // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
+  if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
+    IsThumb2 = true;
+    if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1)
+      return false;
+    // Encoded as a small-endian 16-bit halfword in the stream.
+    insn1 = (bytes[1] << 8) | bytes[0];
+    insn = (insn << 16 | insn1);
+  }
+
+  // The insn could potentially be bit-twiddled in order to be decoded as an ARM
+  // NEON/VFP opcode.  In such case, the modified insn is later disassembled as
+  // an ARM NEON/VFP instruction.
+  //
+  // This is a short term solution for lack of encoding bits specified for the
+  // Thumb2 NEON/VFP instructions.  The long term solution could be adding some
+  // infrastructure to have each instruction support more than one encodings.
+  // Which encoding is used would be based on which subtarget the compiler/
+  // disassembler is working with at the time.  This would allow the sharing of
+  // the NEON patterns between ARM and Thumb2, as well as potential greater
+  // sharing between the regular ARM instructions and the 32-bit wide Thumb2
+  // instructions as well.
+  unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
+
+  ARMFormat Format = ARMFormats[Opcode];
+  Size = IsThumb2 ? 4 : 2;
+
+  DEBUG({
+      errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+             << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+             << ")\n";
+      showBitVector(errs(), insn);
+    });
+
+  OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
+  if (!Builder)
+    return false;
+
+  Builder->SetSession(const_cast<Session *>(&SO));
+
+  Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+                                              getDisInfoBlock(), getMCContext(),
+                                              Address);
+
+  if (!Builder->Build(MI, insn))
+    return false;
+
+  return true;
+}
+
+// A8.6.50
+// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
+static unsigned short CountITSize(unsigned ITMask) {
+  // First count the trailing zeros of the IT mask.
+  unsigned TZ = CountTrailingZeros_32(ITMask);
+  if (TZ > 3) {
+    DEBUG(errs() << "Encoding error: IT Mask '0000'");
+    return 0;
+  }
+  return (4 - TZ);
+}
+
+/// Init ITState.  Note that at least one bit is always 1 in mask.
+bool Session::InitIT(unsigned short bits7_0) {
+  ITCounter = CountITSize(slice(bits7_0, 3, 0));
+  if (ITCounter == 0)
+    return false;
+
+  // A8.6.50 IT
+  unsigned short FirstCond = slice(bits7_0, 7, 4);
+  if (FirstCond == 0xF) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
+    return false;
+  }
+  if (FirstCond == 0xE && ITCounter != 1) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
+    return false;
+  }
+
+  ITState = bits7_0;
+
+  return true;
+}
+
+/// Update ITState if necessary.
+void Session::UpdateIT() {
+  assert(ITCounter);
+  --ITCounter;
+  if (ITCounter == 0)
+    ITState = 0;
+  else {
+    unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
+    setSlice(ITState, 4, 0, NewITState4_0);
+  }
+}
+
+static MCDisassembler *createARMDisassembler(const Target &T) {
+  return new ARMDisassembler;
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T) {
+  return new ThumbDisassembler;
+}
+
+extern "C" void LLVMInitializeARMDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+                                         createARMDisassembler);
+  TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+                                         createThumbDisassembler);
+}
+
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
new file mode 100644
index 000000000000..0a74a3866eed
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h
@@ -0,0 +1,99 @@
+//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the header for ARMDisassembler and ThumbDisassembler, both are
+// subclasses of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLER_H
+#define ARMDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+  
+struct EDInstInfo;
+  
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  ARMDisassembler() :
+    MCDisassembler() {
+  }
+
+  ~ARMDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+private:
+};
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+/// Session - Keep track of the IT Block progression.
+class Session {
+  friend class ARMBasicMCBuilder;
+public:
+  Session() : ITCounter(0), ITState(0) {}
+  ~Session() {}
+  /// InitIT - Initializes ITCounter/ITState.
+  bool InitIT(unsigned short bits7_0);
+  /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
+  void UpdateIT();
+
+private:
+  unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
+  unsigned ITState;   // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
+};
+
+/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  ThumbDisassembler() :
+    MCDisassembler(), SO() {
+  }
+
+  ~ThumbDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+private:
+  Session SO;
+};
+
+} // namespace llvm
+  
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
new file mode 100644
index 000000000000..d89c80a9d457
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -0,0 +1,3818 @@
+//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to represent the core concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassemblerCore.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
+/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the
+/// operand info for each ARMInsts[i].
+///
+/// Together with an instruction's encoding format, we can take advantage of the
+/// NumOperands and the OpInfo fields of the target instruction description in
+/// the quest to build out the MCOperand list for an MCInst.
+///
+/// The general guideline is that with a known format, the number of dst and src
+/// operands are well-known.  The dst is built first, followed by the src
+/// operand(s).  The operands not yet used at this point are for the Implicit
+/// Uses and Defs by this instr.  For the Uses part, the pred:$p operand is
+/// defined with two components:
+///
+/// def pred { // Operand PredicateOperand
+///   ValueType Type = OtherVT;
+///   string PrintMethod = "printPredicateOperand";
+///   string AsmOperandLowerMethod = ?;
+///   dag MIOperandInfo = (ops i32imm, CCR);
+///   AsmOperandClass ParserMatchClass = ImmAsmOperand;
+///   dag DefaultOps = (ops (i32 14), (i32 zero_reg));
+/// }
+///
+/// which is manifested by the MCOperandInfo[] of:
+///
+/// { 0, 0|(1<<MCOI::Predicate), 0 },
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 }
+///
+/// So the first predicate MCOperand corresponds to the immediate part of the
+/// ARM condition field (Inst{31-28}), and the second predicate MCOperand
+/// corresponds to a register kind of ARM::CPSR.
+///
+/// For the Defs part, in the simple case of only cc_out:$s, we have:
+///
+/// def cc_out { // Operand OptionalDefOperand
+///   ValueType Type = OtherVT;
+///   string PrintMethod = "printSBitModifierOperand";
+///   string AsmOperandLowerMethod = ?;
+///   dag MIOperandInfo = (ops CCR);
+///   AsmOperandClass ParserMatchClass = ImmAsmOperand;
+///   dag DefaultOps = (ops (i32 zero_reg));
+/// }
+///
+/// which is manifested by the one MCOperandInfo of:
+///
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 }
+///
+
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+
+using namespace llvm;
+
+const char *ARMUtils::OpcodeName(unsigned Opcode) {
+  return ARMInsts[Opcode].Name;
+}
+
+// Return the register enum Based on RegClass and the raw register number.
+// FIXME: Auto-gened?
+static unsigned
+getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
+  if (RegClassID == ARM::rGPRRegClassID) {
+    // Check for The register numbers 13 and 15 that are not permitted for many
+    // Thumb register specifiers.
+    if (RawRegister == 13 || RawRegister == 15) {
+      B->SetErr(-1);
+      return 0;
+    }
+    // For this purpose, we can treat rGPR as if it were GPR.
+    RegClassID = ARM::GPRRegClassID;
+  }
+
+  // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
+  // A7.3 register encoding
+  //     Qd -> bit[12] == 0
+  //     Qn -> bit[16] == 0
+  //     Qm -> bit[0]  == 0
+  //
+  // If one of these bits is 1, the instruction is UNDEFINED.
+  if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) {
+    B->SetErr(-1);
+    return 0;
+  }
+  unsigned RegNum =
+    RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
+
+  switch (RegNum) {
+  default:
+    break;
+  case 0:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D0;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q0;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0;
+    }
+    break;
+  case 1:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D1;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q1;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1;
+    }
+    break;
+  case 2:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D2;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q2;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2;
+    }
+    break;
+  case 3:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D3;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q3;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3;
+    }
+    break;
+  case 4:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D4;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4;
+    }
+    break;
+  case 5:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D5;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5;
+    }
+    break;
+  case 6:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D6;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6;
+    }
+    break;
+  case 7:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D7;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7;
+    }
+    break;
+  case 8:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R8;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8;
+    case ARM::QPRRegClassID: return ARM::Q8;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8;
+    }
+    break;
+  case 9:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R9;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9;
+    case ARM::QPRRegClassID: return ARM::Q9;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9;
+    }
+    break;
+  case 10:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R10;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10;
+    case ARM::QPRRegClassID: return ARM::Q10;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10;
+    }
+    break;
+  case 11:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R11;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11;
+    case ARM::QPRRegClassID: return ARM::Q11;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11;
+    }
+    break;
+  case 12:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R12;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12;
+    case ARM::QPRRegClassID: return ARM::Q12;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12;
+    }
+    break;
+  case 13:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::SP;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13;
+    case ARM::QPRRegClassID: return ARM::Q13;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13;
+    }
+    break;
+  case 14:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::LR;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14;
+    case ARM::QPRRegClassID: return ARM::Q14;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14;
+    }
+    break;
+  case 15:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::PC;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15;
+    case ARM::QPRRegClassID: return ARM::Q15;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15;
+    }
+    break;
+  case 16:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D16;
+    case ARM::SPRRegClassID: return ARM::S16;
+    }
+    break;
+  case 17:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D17;
+    case ARM::SPRRegClassID: return ARM::S17;
+    }
+    break;
+  case 18:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D18;
+    case ARM::SPRRegClassID: return ARM::S18;
+    }
+    break;
+  case 19:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D19;
+    case ARM::SPRRegClassID: return ARM::S19;
+    }
+    break;
+  case 20:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D20;
+    case ARM::SPRRegClassID: return ARM::S20;
+    }
+    break;
+  case 21:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D21;
+    case ARM::SPRRegClassID: return ARM::S21;
+    }
+    break;
+  case 22:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D22;
+    case ARM::SPRRegClassID: return ARM::S22;
+    }
+    break;
+  case 23:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D23;
+    case ARM::SPRRegClassID: return ARM::S23;
+    }
+    break;
+  case 24:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D24;
+    case ARM::SPRRegClassID: return ARM::S24;
+    }
+    break;
+  case 25:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D25;
+    case ARM::SPRRegClassID: return ARM::S25;
+    }
+    break;
+  case 26:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D26;
+    case ARM::SPRRegClassID: return ARM::S26;
+    }
+    break;
+  case 27:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D27;
+    case ARM::SPRRegClassID: return ARM::S27;
+    }
+    break;
+  case 28:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D28;
+    case ARM::SPRRegClassID: return ARM::S28;
+    }
+    break;
+  case 29:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D29;
+    case ARM::SPRRegClassID: return ARM::S29;
+    }
+    break;
+  case 30:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D30;
+    case ARM::SPRRegClassID: return ARM::S30;
+    }
+    break;
+  case 31:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D31;
+    case ARM::SPRRegClassID: return ARM::S31;
+    }
+    break;
+  }
+  DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
+  // Encoding error.  Mark the builder with error code != 0.
+  B->SetErr(-1);
+  return 0;
+}
+
+///////////////////////////////
+//                           //
+//     Utility Functions     //
+//                           //
+///////////////////////////////
+
+// Extract/Decode Rd: Inst{15-12}.
+static inline unsigned decodeRd(uint32_t insn) {
+  return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rn: Inst{19-16}.
+static inline unsigned decodeRn(uint32_t insn) {
+  return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rm: Inst{3-0}.
+static inline unsigned decodeRm(uint32_t insn) {
+  return (insn & ARMII::GPRRegMask);
+}
+
+// Extract/Decode Rs: Inst{11-8}.
+static inline unsigned decodeRs(uint32_t insn) {
+  return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask;
+}
+
+static inline unsigned getCondField(uint32_t insn) {
+  return (insn >> ARMII::CondShift);
+}
+
+static inline unsigned getIBit(uint32_t insn) {
+  return (insn >> ARMII::I_BitShift) & 1;
+}
+
+static inline unsigned getAM3IBit(uint32_t insn) {
+  return (insn >> ARMII::AM3_I_BitShift) & 1;
+}
+
+static inline unsigned getPBit(uint32_t insn) {
+  return (insn >> ARMII::P_BitShift) & 1;
+}
+
+static inline unsigned getUBit(uint32_t insn) {
+  return (insn >> ARMII::U_BitShift) & 1;
+}
+
+static inline unsigned getPUBits(uint32_t insn) {
+  return (insn >> ARMII::U_BitShift) & 3;
+}
+
+static inline unsigned getSBit(uint32_t insn) {
+  return (insn >> ARMII::S_BitShift) & 1;
+}
+
+static inline unsigned getWBit(uint32_t insn) {
+  return (insn >> ARMII::W_BitShift) & 1;
+}
+
+static inline unsigned getDBit(uint32_t insn) {
+  return (insn >> ARMII::D_BitShift) & 1;
+}
+
+static inline unsigned getNBit(uint32_t insn) {
+  return (insn >> ARMII::N_BitShift) & 1;
+}
+
+static inline unsigned getMBit(uint32_t insn) {
+  return (insn >> ARMII::M_BitShift) & 1;
+}
+
+// See A8.4 Shifts applied to a register.
+//     A8.4.2 Register controlled shifts.
+//
+// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits
+// into llvm enums for shift opcode.  The API clients should pass in the value
+// encoded with two bits, so the assert stays to signal a wrong API usage.
+//
+// A8-12: DecodeRegShift()
+static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
+  switch (bits) {
+  default: assert(0 && "No such value"); return ARM_AM::no_shift;
+  case 0:  return ARM_AM::lsl;
+  case 1:  return ARM_AM::lsr;
+  case 2:  return ARM_AM::asr;
+  case 3:  return ARM_AM::ror;
+  }
+}
+
+// See A8.4 Shifts applied to a register.
+//     A8.4.1 Constant shifts.
+//
+// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5
+// encodings into the intended ShiftOpc and shift amount.
+//
+// A8-11: DecodeImmShift()
+static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
+  if (ShImm != 0)
+    return;
+  switch (ShOp) {
+  case ARM_AM::no_shift:
+  case ARM_AM::rrx:
+    break;
+  case ARM_AM::lsl:
+    ShOp = ARM_AM::no_shift;
+    break;
+  case ARM_AM::lsr:
+  case ARM_AM::asr:
+    ShImm = 32;
+    break;
+  case ARM_AM::ror:
+    ShOp = ARM_AM::rrx;
+    break;
+  }
+}
+
+// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
+// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode.  The API
+// clients should pass in the value encoded with two bits, so the assert stays
+// to signal a wrong API usage.
+static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
+  switch (bits) {
+  default: assert(0 && "No such value"); return ARM_AM::bad_am_submode;
+  case 1:  return ARM_AM::ia;   // P=0 U=1
+  case 3:  return ARM_AM::ib;   // P=1 U=1
+  case 0:  return ARM_AM::da;   // P=0 U=0
+  case 2:  return ARM_AM::db;   // P=1 U=0
+  }
+}
+
+////////////////////////////////////////////
+//                                        //
+//    Disassemble function definitions    //
+//                                        //
+////////////////////////////////////////////
+
+/// There is a separate Disassemble*Frm function entry for disassembly of an ARM
+/// instr into a list of MCOperands in the appropriate order, with possible dst,
+/// followed by possible src(s).
+///
+/// The processing of the predicate, and the 'S' modifier bit, if MI modifies
+/// the CPSR, is factored into ARMBasicMCBuilder's method named
+/// TryPredicateAndSBitModifier.
+
+static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(0 && "Unexpected pseudo instruction!");
+  return false;
+}
+
+// A8.6.94 MLA
+// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE;
+//
+// A8.6.105 MUL
+// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+//
+// A8.6.246 UMULL
+// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+// if dHi == dLo then UNPREDICTABLE;
+static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
+  unsigned R19_16 = slice(insn, 19, 16);
+  unsigned R15_12 = slice(insn, 15, 12);
+  unsigned R11_8  = slice(insn, 11, 8);
+  unsigned R3_0   = slice(insn, 3, 0);
+  switch (Opcode) {
+  default:
+    // Did we miss an opcode?
+    DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!");
+    return false;
+  case ARM::MLA:     case ARM::MLS:     case ARM::SMLABB:  case ARM::SMLABT:
+  case ARM::SMLATB:  case ARM::SMLATT:  case ARM::SMLAWB:  case ARM::SMLAWT:
+  case ARM::SMMLA:   case ARM::SMMLAR:  case ARM::SMMLS:   case ARM::SMMLSR:
+  case ARM::USADA8:
+    if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+      return true;
+    return false;
+  case ARM::MUL:     case ARM::SMMUL:   case ARM::SMMULR:
+  case ARM::SMULBB:  case ARM::SMULBT:  case ARM::SMULTB:  case ARM::SMULTT:
+  case ARM::SMULWB:  case ARM::SMULWT:  case ARM::SMUAD:   case ARM::SMUADX:
+  // A8.6.167 SMLAD & A8.6.172 SMLSD
+  case ARM::SMLAD:   case ARM::SMLADX:  case ARM::SMLSD:   case ARM::SMLSDX:
+  case ARM::USAD8:
+    if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15)
+      return true;
+    return false;
+  case ARM::SMLAL:   case ARM::SMULL:   case ARM::UMAAL:   case ARM::UMLAL:
+  case ARM::UMULL:
+  case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT:
+  case ARM::SMLALD:  case ARM::SMLALDX: case ARM::SMLSLD:  case ARM::SMLSLDX:
+    if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+      return true;
+    if (R19_16 == R15_12)
+      return true;
+    return false;;
+  }
+}
+
+// Multiply Instructions.
+// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR,
+// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience):
+//     Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
+// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is
+// only for {d, n, m}.
+//
+// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD,
+// SMUADX, and USAD8 (for convenience):
+//     Rd{19-16} Rn{3-0} Rm{11-8}
+//
+// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT,
+// SMLALD, SMLADLX, SMLSLD, SMLSLDX:
+//     RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
+//
+// The mapping of the multiply registers to the "regular" ARM registers, where
+// there are convenience decoder functions, is:
+//
+// Inst{15-12} => Rd
+// Inst{19-16} => Rn
+// Inst{3-0} => Rm
+// Inst{11-8} => Rs
+static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm");
+  assert(NumOps >= 3
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && OpInfo[2].RegClass == ARM::GPRRegClassID
+         && "Expect three register operands");
+
+  // Sanity check for the register encodings.
+  if (BadRegsMulFrm(Opcode, insn))
+    return false;
+
+  // Instructions with two destination registers have RdLo{15-12} first.
+  if (NumDefs == 2) {
+    assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
+           "Expect 4th register operand");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // The destination register: RdHi{19-16} or Rd{19-16}.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  // The two src regsiters: Rn{3-0}, then Rm{11-8}.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+  OpIdx += 3;
+
+  // Many multiply instructions (e.g., MLA) have three src registers.
+  // The third register operand is Ra{15-12}.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// Helper routines for disassembly of coprocessor instructions.
+
+static bool LdStCopOpcode(unsigned Opcode) {
+  if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) ||
+      (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE))
+    return true;
+  return false;
+}
+static bool CoprocessorOpcode(unsigned Opcode) {
+  if (LdStCopOpcode(Opcode))
+    return true;
+
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::CDP:  case ARM::CDP2:
+  case ARM::MCR:  case ARM::MCR2:  case ARM::MRC:  case ARM::MRC2:
+  case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2:
+    return true;
+  }
+}
+static inline unsigned GetCoprocessor(uint32_t insn) {
+  return slice(insn, 11, 8);
+}
+static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) {
+  return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21);
+}
+static inline unsigned GetCopOpc2(uint32_t insn) {
+  return slice(insn, 7, 5);
+}
+static inline unsigned GetCopOpc(uint32_t insn) {
+  return slice(insn, 7, 4);
+}
+// Most of the operands are in immediate forms, except Rd and Rn, which are ARM
+// core registers.
+//
+// CDP, CDP2:                cop opc1 CRd CRn CRm opc2
+//
+// MCR, MCR2, MRC, MRC2:     cop opc1 Rd CRn CRm opc2
+//
+// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm
+//
+// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+//                                        <-- addrmode2 -->
+//
+// LDC_OPTION:                    cop CRd Rn imm8
+// and friends
+// STC_OPTION:                    cop CRd Rn imm8
+// and friends
+//
+static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr");
+
+  unsigned &OpIdx = NumOpsAdded;
+  // A8.6.92
+  // if coproc == '101x' then SEE "Advanced SIMD and VFP"
+  // But since the special instructions have more explicit encoding bits
+  // specified, if coproc == 10 or 11, we should reject it as invalid.
+  unsigned coproc = GetCoprocessor(insn);
+  if ((Opcode == ARM::MCR || Opcode == ARM::MCRR ||
+       Opcode == ARM::MRC || Opcode == ARM::MRRC) &&
+      (coproc == 10 || coproc == 11)) {
+    DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n");
+    return false;
+  }
+
+  bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
+                    Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
+
+  // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
+  bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
+  bool LdStCop = LdStCopOpcode(Opcode);
+  bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2);
+
+  OpIdx = 0;
+
+  if (RtOut) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+  MI.addOperand(MCOperand::CreateImm(coproc));
+  ++OpIdx;
+
+  if (LdStCop) {
+    // Unindex if P:W = 0b00 --> _OPTION variant
+    unsigned PW = getPBit(insn) << 1 | getWBit(insn);
+
+    MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
+
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    OpIdx += 2;
+
+    if (PW) {
+      MI.addOperand(MCOperand::CreateReg(0));
+      ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+      const MCInstrDesc &MCID = ARMInsts[Opcode];
+      unsigned IndexMode =
+                 (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+      unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
+                                          ARM_AM::no_shift, IndexMode);
+      MI.addOperand(MCOperand::CreateImm(Offset));
+      OpIdx += 2;
+    } else {
+      MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
+      ++OpIdx;
+    }
+  } else {
+    MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
+                                                 : GetCopOpc1(insn, NoGPR)));
+    ++OpIdx;
+
+    if (!RtOut) {
+      MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
+                          : MCOperand::CreateReg(
+                                getRegisterEnum(B, ARM::GPRRegClassID,
+                                                decodeRd(insn))));
+      ++OpIdx;
+    }
+
+    MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
+                                getRegisterEnum(B, ARM::GPRRegClassID,
+                                                decodeRn(insn)))
+                            : MCOperand::CreateImm(decodeRn(insn)));
+
+    MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
+
+    OpIdx += 2;
+
+    if (!OneCopOpc) {
+      MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// Branch Instructions.
+// BL: SignExtend(Imm24:'00', 32)
+// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
+// SMC: ZeroExtend(imm4, 32)
+// SVC: ZeroExtend(Imm24, 32)
+//
+// Various coprocessor instructions are assigned BrFrm arbitrarily.
+// Delegates to DisassembleCoprocessor() helper function.
+//
+// MRS/MRSsys: Rd
+// MSR/MSRsys: Rm mask=Inst{19-16}
+// BXJ:        Rm
+// MSRi/MSRsysi: so_imm
+// SRSW/SRS: ldstm_mode:$amode mode_imm
+// RFEW/RFE: ldstm_mode:$amode Rn
+static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (CoprocessorOpcode(Opcode))
+    return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  // MRS and MRSsys take one GPR reg Rd.
+  if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // BXJ takes one GPR reg Rm.
+  if (Opcode == ARM::BXJ) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
+  // bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::MSR) {
+    assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+                                       slice(insn, 19, 16) /* Special Reg */ ));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    NumOpsAdded = 2;
+    return true;
+  }
+  // MSRi take a mask, followed by one so_imm operand. The mask contains the
+  // R Bit in bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::MSRi) {
+    // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
+    // The hints instructions have more specific encodings, so if mask == 0,
+    // we should reject this as an invalid instruction.
+    if (slice(insn, 19, 16) == 0)
+      return false;
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+                                       slice(insn, 19, 16) /* Special Reg */ ));
+    // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+    // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+    // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+    unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+    unsigned Imm = insn & 0xFF;
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
+      Opcode == ARM::RFEW || Opcode == ARM::RFE) {
+    ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+    if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
+      MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+    else
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         decodeRn(insn))));
+    NumOpsAdded = 3;
+    return true;
+  }
+
+  assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred
+          || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
+         "Unexpected Opcode");
+
+  assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
+
+  int Imm32 = 0;
+  if (Opcode == ARM::SMC) {
+    // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}.
+    Imm32 = slice(insn, 3, 0);
+  } else if (Opcode == ARM::SVC) {
+    // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}.
+    Imm32 = slice(insn, 23, 0);
+  } else {
+    // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}.
+    unsigned Imm26 = slice(insn, 23, 0) << 2;
+    //Imm32 = signextend<signed int, 26>(Imm26);
+    Imm32 = SignExtend32<26>(Imm26);
+  }
+
+  MI.addOperand(MCOperand::CreateImm(Imm32));
+  NumOpsAdded = 1;
+
+  return true;
+}
+
+// Misc. Branch Instructions.
+// BX_RET, MOVPCLR
+// BLX, BLX_pred, BX, BX_pred
+// BLXi
+static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // BX_RET and MOVPCLR have only two predicate operands; do an early return.
+  if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
+    return true;
+
+  // BLX and BX take one GPR reg.
+  if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred ||
+      Opcode == ARM::BX || Opcode == ARM::BX_pred) {
+    assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    OpIdx = 1;
+    return true;
+  }
+
+  // BLXi takes imm32 (the PC offset).
+  if (Opcode == ARM::BLXi) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
+    // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}.
+    unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1;
+    int Imm32 = SignExtend32<26>(Imm26);
+    MI.addOperand(MCOperand::CreateImm(Imm32));
+    OpIdx = 1;
+    return true;
+  }
+
+  return false;
+}
+
+static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
+  uint32_t lsb = slice(insn, 11, 7);
+  uint32_t msb = slice(insn, 20, 16);
+  uint32_t Val = 0;
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
+
+  for (uint32_t i = lsb; i <= msb; ++i)
+    Val |= (1 << i);
+  mask = ~Val;
+  return true;
+}
+
+// Standard data-processing instructions allow PC as a register specifier,
+// but we should reject other DPFrm instructions with PC as registers.
+static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
+  switch (Opcode) {
+  default:
+    // Did we miss an opcode?
+    if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) {
+      DEBUG(errs() << "DPFrm with bad reg specifier(s)\n");
+      return true;
+    }
+  case ARM::ADCrr:  case ARM::ADDSrr: case ARM::ADDrr:  case ARM::ANDrr:
+  case ARM::BICrr:  case ARM::CMNzrr: case ARM::CMPrr:  case ARM::EORrr:
+  case ARM::ORRrr:  case ARM::RSBrr:  case ARM::RSCrr:  case ARM::SBCrr:
+  case ARM::SUBSrr: case ARM::SUBrr:  case ARM::TEQrr:  case ARM::TSTrr:
+    return false;
+  }
+}
+
+// A major complication is the fact that some of the saturating add/subtract
+// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
+// They are QADD, QDADD, QDSUB, and QSUB.
+static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  bool isUnary = isUnaryDP(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Disassemble register def if there is one.
+  if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // Now disassemble the src operands.
+  if (OpIdx >= NumOps)
+    return false;
+
+  // Special-case handling of BFC/BFI/SBFX/UBFX.
+  if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
+    // A8.6.17 BFC & A8.6.18 BFI
+    // Sanity check Rd.
+    if (decodeRd(insn) == 15)
+      return false;
+    MI.addOperand(MCOperand::CreateReg(0));
+    if (Opcode == ARM::BFI) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         decodeRm(insn))));
+      ++OpIdx;
+    }
+    uint32_t mask = 0;
+    if (!getBFCInvMask(insn, mask))
+      return false;
+
+    MI.addOperand(MCOperand::CreateImm(mask));
+    OpIdx += 2;
+    return true;
+  }
+  if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
+    // Sanity check Rd and Rm.
+    if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+      return false;
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
+    OpIdx += 3;
+    return true;
+  }
+
+  bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD ||
+               Opcode == ARM::QDSUB || Opcode == ARM::QSUB);
+
+  // BinaryDP has an Rn operand.
+  if (!isUnary) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::GPRRegClassID,
+                                    RmRn ? decodeRm(insn) : decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
+  if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Now disassemble operand 2.
+  if (OpIdx >= NumOps)
+    return false;
+
+  if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    // We have a reg/reg form.
+    // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
+    // routed here as well.
+    // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
+    if (BadRegsDPFrm(Opcode, insn))
+      return false;
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::GPRRegClassID,
+                                    RmRn? decodeRn(insn) : decodeRm(insn))));
+    ++OpIdx;
+  } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
+    // These two instructions don't allow d as 15.
+    if (decodeRd(insn) == 15)
+      return false;
+    // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
+    assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+    unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
+    if (!B->tryAddingSymbolicOperand(Imm16, 4, MI))
+      MI.addOperand(MCOperand::CreateImm(Imm16));
+    ++OpIdx;
+  } else {
+    // We have a reg/imm form.
+    // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+    // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+    // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+    assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+    unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+    unsigned Imm = insn & 0xFF;
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  bool isUnary = isUnaryDP(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Disassemble register def if there is one.
+  if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the src operands.
+  if (OpIdx >= NumOps)
+    return false;
+
+  // BinaryDP has an Rn operand.
+  if (!isUnary) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
+  if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Disassemble operand 2, which consists of three components.
+  if (OpIdx + 2 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+2].RegClass < 0) &&
+         "Expect 3 reg operands");
+
+  // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
+  unsigned Rs = slice(insn, 4, 4);
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  if (Rs) {
+    // If Inst{7} != 0, we should reject this insn as an invalid encoding.
+    if (slice(insn, 7, 7))
+      return false;
+
+    // A8.6.3 ADC (register-shifted register)
+    // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+    // 
+    // This also accounts for shift instructions (register) where, fortunately,
+    // Inst{19-16} = 0b0000.
+    // A8.6.89 LSL (register)
+    // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+    if (decodeRd(insn) == 15 || decodeRn(insn) == 15 ||
+        decodeRm(insn) == 15 || decodeRs(insn) == 15)
+      return false;
+    
+    // Register-controlled shifts: [Rm, Rs, shift].
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0)));
+  } else {
+    // Constant shifts: [Rm, reg0, shift_imm].
+    MI.addOperand(MCOperand::CreateReg(0)); // NoRegister
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm)));
+  }
+  OpIdx += 3;
+
+  return true;
+}
+
+static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack,
+                           bool Imm) {
+  const StringRef Name = ARMInsts[Opcode].Name;
+  unsigned Rt = decodeRd(insn);
+  unsigned Rn = decodeRn(insn);
+  unsigned Rm = decodeRm(insn);
+  unsigned P  = getPBit(insn);
+  unsigned W  = getWBit(insn);
+
+  if (Store) {
+    // Only STR (immediate, register) allows PC as the source.
+    if (Name.startswith("STRB") && Rt == 15) {
+      DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+      return true;
+    }
+    if (WBack && (Rn == 15 || Rn == Rt)) {
+      DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+      return true;
+    }
+    if (!Imm && Rm == 15) {
+      DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+      return true;
+    }
+  } else {
+    // Only LDR (immediate, register) allows PC as the destination.
+    if (Name.startswith("LDRB") && Rt == 15) {
+      DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+      return true;
+    }
+    if (Imm) {
+      // Immediate
+      if (Rn == 15) {
+        // The literal form must be in offset mode; it's an encoding error
+        // otherwise.
+        if (!(P == 1 && W == 0)) {
+          DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n");
+          return true;
+        }
+        // LDRB (literal) does not allow PC as the destination.
+        if (Opcode != ARM::LDRi12 && Rt == 15) {
+          DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+          return true;
+        }
+      } else {
+        // Write back while Rn == Rt does not make sense.
+        if (WBack && (Rn == Rt)) {
+          DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+          return true;
+        }
+      }
+    } else {
+      // Register
+      if (Rm == 15) {
+        DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+        return true;
+      }
+      if (WBack && (Rn == 15 || Rn == Rt)) {
+        DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(((!isStore && MCID.getNumDefs() > 0) ||
+          (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
+         && "Invalid arguments");
+
+  // Operand 0 of a pre- and post-indexed store is the address base writeback.
+  if (isPrePost && isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the dst/src operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // After dst of a pre- and post-indexed load is the address base writeback.
+  if (isPrePost && !isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the base operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
+         && "Index mode or tied_to operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  // For reg/reg form, base reg is followed by +/- reg shop imm.
+  // For immediate form, it is followed by +/- imm12.
+  // See also ARMAddressingModes.h (Addressing Mode #2).
+  if (OpIdx + 1 >= NumOps)
+    return false;
+
+  if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0))
+    return false;
+
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  unsigned IndexMode =
+               (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+  if (getIBit(insn) == 0) {
+    // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
+    // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
+    // been populated.
+    if (isPrePost) {
+      MI.addOperand(MCOperand::CreateReg(0));
+      OpIdx += 1;
+    }
+
+    unsigned Imm12 = slice(insn, 11, 0);
+    if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 ||
+        Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) {
+      // Disassemble the 12-bit immediate offset, which is the second operand in
+      // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm).    
+      int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12;
+      MI.addOperand(MCOperand::CreateImm(Offset));
+    } else {
+      // Disassemble the 12-bit immediate offset, which is the second operand in
+      // $am2offset => (ops GPR, i32imm).
+      unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift,
+                                          IndexMode);
+      MI.addOperand(MCOperand::CreateImm(Offset));
+    }
+    OpIdx += 1;
+  } else {
+    // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid.
+    if (slice(insn,4,4) == 1)
+      return false;
+
+    // Disassemble the offset reg (Rm), shift type, and immediate shift length.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode)));
+    OpIdx += 2;
+  }
+
+  return true;
+}
+
+static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
+}
+
+static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+static bool HasDualReg(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
+  case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
+    return true;
+  }
+}
+
+static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(((!isStore && MCID.getNumDefs() > 0) ||
+          (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
+         && "Invalid arguments");
+
+  // Operand 0 of a pre- and post-indexed store is the address base writeback.
+  if (isPrePost && isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the dst/src operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Fill in LDRD and STRD's second operand Rt operand.
+  if (HasDualReg(Opcode)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn) + 1)));
+    ++OpIdx;
+  }
+
+  // After dst of a pre- and post-indexed load is the address base writeback.
+  if (isPrePost && !isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the base operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
+         && "Offset mode or tied_to operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  // For reg/reg form, base reg is followed by +/- reg.
+  // For immediate form, it is followed by +/- imm8.
+  // See also ARMAddressingModes.h (Addressing Mode #3).
+  if (OpIdx + 1 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass < 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  unsigned IndexMode =
+                 (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+  if (getAM3IBit(insn) == 1) {
+    MI.addOperand(MCOperand::CreateReg(0));
+
+    // Disassemble the 8-bit immediate offset.
+    unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
+    unsigned Imm4L = insn & 0xF;
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L,
+                                        IndexMode);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  } else {
+    // Disassemble the offset reg (Rm).
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  }
+  OpIdx += 2;
+
+  return true;
+}
+
+static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                B);
+}
+
+static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+// The algorithm for disassembly of LdStMulFrm is different from others because
+// it explicitly populates the two predicate operands after the base register.
+// After that, we need to populate the reglist with each affected register
+// encoded as an MCOperand.
+static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4");
+  NumOpsAdded = 0;
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base, if necessary.
+  if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
+      Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
+      Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
+      Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++NumOpsAdded;
+  }
+
+  // Add the base register operand.
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Handling the two predicate operands before the reglist.
+  int64_t CondVal = getCondField(insn);
+  if (CondVal == 0xF)
+    return false;
+  MI.addOperand(MCOperand::CreateImm(CondVal));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+  NumOpsAdded += 3;
+
+  // Fill the variadic part of reglist.
+  unsigned RegListBits = insn & ((1 << 16) - 1);
+  for (unsigned i = 0; i < 16; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         i)));
+      ++NumOpsAdded;
+    }
+  }
+
+  return true;
+}
+
+// LDREX, LDREXB, LDREXH: Rd Rn
+// LDREXD:                Rd Rd+1 Rn
+// STREX, STREXB, STREXH: Rd Rm Rn
+// STREXD:                Rd Rm Rm+1 Rn
+//
+// SWP, SWPB:             Rd Rm Rn
+static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool isStore = slice(insn, 20, 20) == 0;
+  bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Store register Exclusive needs a source operand.
+  if (isStore) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    ++OpIdx;
+
+    if (isDW) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         decodeRm(insn)+1)));
+      ++OpIdx;
+    }
+  } else if (isDW) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn)+1)));
+    ++OpIdx;
+  }
+
+  // Finally add the pointer operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// Misc. Arithmetic Instructions.
+// CLZ: Rd Rm
+// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
+// RBIT, REV, REV16, REVSH: Rd Rm
+static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+  // Sanity check the registers, which should not be 15.
+  if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+    return false;
+  if (ThreeReg && decodeRn(insn) == 15)
+    return false;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    assert(NumOps >= 4 && "Expect >= 4 operands");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  // If there is still an operand info left which is an immediate operand, add
+  // an additional imm5 LSL/ASR operand.
+  if (ThreeReg && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Extract the 5-bit immediate field Inst{11-7}.
+    unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
+    ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+    if (Opcode == ARM::PKHBT)
+      Opc = ARM_AM::lsl;
+    else if (Opcode == ARM::PKHTB)
+      Opc = ARM_AM::asr;
+    getImmShiftSE(Opc, ShiftAmt);
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // A8.6.183 SSAT
+  // if d == 15 || n == 15 then UNPREDICTABLE;
+  if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+    return false;
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
+
+  // Disassemble register def.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  unsigned Pos = slice(insn, 20, 16);
+  if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+    Pos += 1;
+  MI.addOperand(MCOperand::CreateImm(Pos));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (NumOpsAdded == 4) {
+    ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 11, 7);
+    if (ShAmt == 0) {
+      // A8.6.183.  Possible ASR shift amount of 32...
+      if (Opc == ARM_AM::asr)
+        ShAmt = 32;
+      else
+        Opc = ARM_AM::no_shift;
+    }
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+  }
+  return true;
+}
+
+// Extend instructions.
+// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
+// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
+// three register operand form.  Otherwise, Rn=0b1111 and only Rm is used.
+static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // A8.6.220 SXTAB
+  // if d == 15 || m == 15 then UNPREDICTABLE;
+  if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+    return false;
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  // If there is still an operand info left which is an immediate operand, add
+  // an additional rotate immediate operand.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Extract the 2-bit rotate field Inst{11-10}.
+    unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
+    // Rotation by 8, 16, or 24 bits.
+    MI.addOperand(MCOperand::CreateImm(rot << 3));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+/////////////////////////////////////
+//                                 //
+//    Utility Functions For VFP    //
+//                                 //
+/////////////////////////////////////
+
+// Extract/Decode Dd/Sd:
+//
+// SP => d = UInt(Vd:D)
+// DP => d = UInt(D:Vd)
+static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn))
+                 : (decodeRd(insn) | getDBit(insn) << 4);
+}
+
+// Extract/Decode Dn/Sn:
+//
+// SP => n = UInt(Vn:N)
+// DP => n = UInt(N:Vn)
+static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn))
+                 : (decodeRn(insn) | getNBit(insn) << 4);
+}
+
+// Extract/Decode Dm/Sm:
+//
+// SP => m = UInt(Vm:M)
+// DP => m = UInt(M:Vm)
+static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn))
+                 : (decodeRm(insn) | getMBit(insn) << 4);
+}
+
+// A7.5.1
+static APInt VFPExpandImm(unsigned char byte, unsigned N) {
+  assert(N == 32 || N == 64);
+
+  uint64_t Result;
+  unsigned bit6 = slice(byte, 6, 6);
+  if (N == 32) {
+    Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19;
+    if (bit6)
+      Result |= 0x1f << 25;
+    else
+      Result |= 0x1 << 30;
+  } else {
+    Result = (uint64_t)slice(byte, 7, 7) << 63 |
+             (uint64_t)slice(byte, 5, 0) << 48;
+    if (bit6)
+      Result |= 0xffULL << 54;
+    else
+      Result |= 0x1ULL << 62;
+  }
+  return APInt(N, Result);
+}
+
+// VFP Unary Format Instructions:
+//
+// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero
+// VCVTDS, VCVTSD: converts between double-precision and single-precision
+// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
+static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  bool isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+  ++OpIdx;
+
+  // Early return for compare with zero instructions.
+  if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS
+      || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS)
+    return true;
+
+  RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+  ++OpIdx;
+
+  return true;
+}
+
+// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs.
+// Some of them have operand constraints which tie the first operand in the
+// InOperandList to that of the dst.  As far as asm printing is concerned, this
+// tied_to operand is simply skipped.
+static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  bool isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+  ++OpIdx;
+
+  // Skip tied_to operand constraint.
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+    assert(NumOps >= 4 && "Expect >=4 operands");
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
+  ++OpIdx;
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A8.6.295 vcvt (floating-point <-> integer)
+// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
+// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
+//
+// A8.6.297 vcvt (floating-point and fixed-point)
+// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
+static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  if (!OpInfo) return false;
+
+  bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
+  bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
+  unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  if (fixed_point) {
+    // A8.6.297
+    assert(NumOps >= 3 && "Expect >= 3 operands");
+    int size = slice(insn, 7, 7) == 0 ? 16 : 32;
+    int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, RegClassID,
+                                    decodeVFPRd(insn, SP))));
+
+    assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
+           "Tied to operand expected");
+    MI.addOperand(MI.getOperand(0));
+
+    assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
+           !OpInfo[2].isOptionalDef() && "Imm operand expected");
+    MI.addOperand(MCOperand::CreateImm(fbits));
+
+    NumOpsAdded = 3;
+  } else {
+    // A8.6.295
+    // The Rd (destination) and Rm (source) bits have different interpretations
+    // depending on their single-precisonness.
+    unsigned d, m;
+    if (slice(insn, 18, 18) == 1) { // to_integer operation
+      d = decodeVFPRd(insn, true /* Is Single Precision */);
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, ARM::SPRRegClassID, d)));
+      m = decodeVFPRm(insn, SP);
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
+    } else {
+      d = decodeVFPRd(insn, SP);
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
+      m = decodeVFPRm(insn, true /* Is Single Precision */);
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, ARM::SPRRegClassID, m)));
+    }
+    NumOpsAdded = 2;
+  }
+
+  return true;
+}
+
+// VMOVRS - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                     decodeVFPRn(insn, true))));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// VMOVRRD - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  OpIdx = 2;
+
+  if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+    unsigned Sm = decodeVFPRm(insn, true);
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm+1)));
+    OpIdx += 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::DPRRegClassID,
+                                    decodeVFPRm(insn, false))));
+    ++OpIdx;
+  }
+  return true;
+}
+
+// VMOVSR - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                     decodeVFPRn(insn, true))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// VMOVDRR - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+    unsigned Sm = decodeVFPRm(insn, true);
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm+1)));
+    OpIdx += 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::DPRRegClassID,
+                                    decodeVFPRm(insn, false))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  OpIdx += 2;
+  return true;
+}
+
+// VFP Load/Store Instructions.
+// VLDRD, VLDRS, VSTRD, VSTRS
+static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
+
+  bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
+  unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  // Extract Dd/Sd for operand 0.
+  unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Next comes the AM5 Opcode.
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  unsigned char Imm8 = insn & 0xFF;
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8)));
+
+  NumOpsAdded = 3;
+
+  return true;
+}
+
+// VFP Load/Store Multiple Instructions.
+// We have an optional write back reg, the base, and two predicate operands.
+// It is then followed by a reglist of either DPR(s) or SPR(s).
+//
+// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
+static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base, if necessary.
+  if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
+      Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
+      Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
+      Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Handling the two predicate operands before the reglist.
+  int64_t CondVal = getCondField(insn);
+  if (CondVal == 0xF)
+    return false;
+  MI.addOperand(MCOperand::CreateImm(CondVal));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+  OpIdx += 3;
+
+  bool isSPVFP = (Opcode == ARM::VLDMSIA     ||
+                  Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
+                  Opcode == ARM::VSTMSIA     ||
+                  Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
+  unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  // Extract Dd/Sd.
+  unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+  // Fill the variadic part of reglist.
+  unsigned char Imm8 = insn & 0xFF;
+  unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
+
+  // Apply some sanity checks before proceeding.
+  if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16))
+    return false;
+
+  for (unsigned i = 0; i < Regs; ++i) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
+                                                       RegD + i)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// Misc. VFP Instructions.
+// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand)
+// FCONSTD (DPR and a VFPf64Imm operand)
+// FCONSTS (SPR and a VFPf32Imm operand)
+// VMRS/VMSR (GPR operand)
+static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  if (Opcode == ARM::FMSTAT)
+    return true;
+
+  assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands");
+
+  unsigned RegEnum = 0;
+  switch (OpInfo[0].RegClass) {
+  case ARM::DPRRegClassID:
+    RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
+    break;
+  case ARM::SPRRegClassID:
+    RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
+    break;
+  case ARM::GPRRegClassID:
+    RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
+    break;
+  default:
+    assert(0 && "Invalid reg class id");
+    return false;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(RegEnum));
+  ++OpIdx;
+
+  // Extract/decode the f64/f32 immediate.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // The asm syntax specifies the floating point value, not the 8-bit literal.
+    APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+                             Opcode == ARM::FCONSTD ? 64 : 32);
+    APFloat immFP = APFloat(immRaw, true);
+    double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
+      immFP.convertToFloat();
+    MI.addOperand(MCOperand::CreateFPImm(imm));
+
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file.
+#include "ThumbDisassemblerCore.h"
+
+/////////////////////////////////////////////////////
+//                                                 //
+//     Utility Functions For ARM Advanced SIMD     //
+//                                                 //
+/////////////////////////////////////////////////////
+
+// The following NEON namings are based on A8.6.266 VABA, VABAL.  Notice that
+// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL.
+
+// A7.3 Register encoding
+
+// Extract/Decode NEON D/Vd:
+//
+// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for
+// doubleword, Dd = UInt(D:Vd).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// D = Inst{22}, Vd = Inst{15-12}
+static unsigned decodeNEONRd(uint32_t insn) {
+  return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
+    | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON N/Vn:
+//
+// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for
+// doubleword, Dn = UInt(N:Vn).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// N = Inst{7}, Vn = Inst{19-16}
+static unsigned decodeNEONRn(uint32_t insn) {
+  return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
+    | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON M/Vm:
+//
+// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for
+// doubleword, Dm = UInt(M:Vm).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// M = Inst{5}, Vm = Inst{3-0}
+static unsigned decodeNEONRm(uint32_t insn) {
+  return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
+    | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
+}
+
+namespace {
+enum ElemSize {
+  ESizeNA = 0,
+  ESize8 = 8,
+  ESize16 = 16,
+  ESize32 = 32,
+  ESize64 = 64
+};
+} // End of unnamed namespace
+
+// size        field -> Inst{11-10}
+// index_align field -> Inst{7-4}
+//
+// The Lane Index interpretation depends on the Data Size:
+//   8  (encoded as size = 0b00) -> Index = index_align[3:1]
+//   16 (encoded as size = 0b01) -> Index = index_align[3:2]
+//   32 (encoded as size = 0b10) -> Index = index_align[3]
+//
+// Ref: A8.6.317 VLD4 (single 4-element structure to one lane).
+static unsigned decodeLaneIndex(uint32_t insn) {
+  unsigned size = insn >> 10 & 3;
+  assert((size == 0 || size == 1 || size == 2) &&
+         "Encoding error: size should be either 0, 1, or 2");
+
+  unsigned index_align = insn >> 4 & 0xF;
+  return (index_align >> 1) >> size;
+}
+
+// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4)
+// op = Inst{5}, cmode = Inst{11-8}
+// i = Inst{24} (ARM architecture)
+// imm3 = Inst{18-16}, imm4 = Inst{3-0}
+// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
+static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+  unsigned char op = (insn >> 5) & 1;
+  unsigned char cmode = (insn >> 8) & 0xF;
+  unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
+                       ((insn >> 16) & 7) << 4 |
+                       (insn & 0xF);
+  return (op << 12) | (cmode << 8) | Imm8;
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7
+// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15
+static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize16:
+    return insn & 7;
+  case ESize32:
+    return insn & 0xF;
+  default:
+    assert(0 && "Unreachable code!");
+    return 0;
+  }
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7
+// ESize32 => index = Inst{5}   (M)       D0-D15
+static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize16:
+    return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1);
+  case ESize32:
+    return (insn >> 5) & 1;
+  default:
+    assert(0 && "Unreachable code!");
+    return 0;
+  }
+}
+
+// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD)
+// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}.
+static unsigned decodeVCVTFractionBits(uint32_t insn) {
+  return 64 - ((insn >> 16) & 0x3F);
+}
+
+// A8.6.302 VDUP (scalar)
+// ESize8  => index = Inst{19-17}
+// ESize16 => index = Inst{19-18}
+// ESize32 => index = Inst{19}
+static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize8:
+    return (insn >> 17) & 7;
+  case ESize16:
+    return (insn >> 18) & 3;
+  case ESize32:
+    return (insn >> 19) & 1;
+  default:
+    assert(0 && "Unspecified element size!");
+    return 0;
+  }
+}
+
+// A8.6.328 VMOV (ARM core register to scalar)
+// A8.6.329 VMOV (scalar to ARM core register)
+// ESize8  => index = Inst{21:6-5}
+// ESize16 => index = Inst{21:6}
+// ESize32 => index = Inst{21}
+static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize8:
+    return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3);
+  case ESize16:
+    return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1);
+  case ESize32:
+    return ((insn >> 21) & 1);
+  default:
+    assert(0 && "Unspecified element size!");
+    return 0;
+  }
+}
+
+// Imm6 = Inst{21-16}, L = Inst{7}
+//
+// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI):
+// case L:imm6 of
+//   '0001xxx' => esize = 8; shift_amount = imm6 - 8
+//   '001xxxx' => esize = 16; shift_amount = imm6 - 16
+//   '01xxxxx' => esize = 32; shift_amount = imm6 - 32
+//   '1xxxxxx' => esize = 64; shift_amount = imm6
+//
+// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN):
+// case L:imm6 of
+//   '0001xxx' => esize = 8; shift_amount = 16 - imm6
+//   '001xxxx' => esize = 16; shift_amount = 32 - imm6
+//   '01xxxxx' => esize = 32; shift_amount = 64 - imm6
+//   '1xxxxxx' => esize = 64; shift_amount = 64 - imm6
+//
+static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) {
+  ElemSize esize = ESizeNA;
+  unsigned L = (insn >> 7) & 1;
+  unsigned imm6 = (insn >> 16) & 0x3F;
+  if (L == 0) {
+    if (imm6 >> 3 == 1)
+      esize = ESize8;
+    else if (imm6 >> 4 == 1)
+      esize = ESize16;
+    else if (imm6 >> 5 == 1)
+      esize = ESize32;
+    else
+      assert(0 && "Wrong encoding of Inst{7:21-16}!");
+  } else
+    esize = ESize64;
+
+  if (LeftShift)
+    return esize == ESize64 ? imm6 : (imm6 - esize);
+  else
+    return esize == ESize64 ? (esize - imm6) : (2*esize - imm6);
+}
+
+// A8.6.305 VEXT
+// Imm4 = Inst{11-8}
+static unsigned decodeN3VImm(uint32_t insn) {
+  return (insn >> 8) & 0xF;
+}
+
+// VLD*
+//   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
+// VLD*LN*
+//   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx)
+// VST*
+//   Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ...
+// VST*LN*
+//   Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)]
+//
+// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
+static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
+    unsigned alignment, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+  // At least one DPR register plus addressing mode #6.
+  assert(NumOps >= 3 && "Expect >= 3 operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // We have homogeneous NEON registers for Load/Store.
+  unsigned RegClass = 0;
+
+  // Double-spaced registers have increments of 2.
+  unsigned Inc = DblSpaced ? 2 : 1;
+
+  unsigned Rn = decodeRn(insn);
+  unsigned Rm = decodeRm(insn);
+  unsigned Rd = decodeNEONRd(insn);
+
+  // A7.7.1 Advanced SIMD addressing mode.
+  bool WB = Rm != 15;
+
+  // LLVM Addressing Mode #6.
+  unsigned RmEnum = 0;
+  if (WB && Rm != 13)
+    RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
+
+  if (Store) {
+    // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
+    // then possible lane index.
+    assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         Rn)));
+      ++OpIdx;
+    }
+
+    assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+    // addrmode6 := (ops GPR:$addr, i32imm)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       Rn)));
+    MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
+    OpIdx += 2;
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(RmEnum));
+      ++OpIdx;
+    }
+
+    assert(OpIdx < NumOps &&
+           (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+            OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+           "Reg operand expected");
+
+    RegClass = OpInfo[OpIdx].RegClass;
+    while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd)));
+      Rd += Inc;
+      ++OpIdx;
+    }
+
+    // Handle possible lane index.
+    if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+      MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+      ++OpIdx;
+    }
+
+  } else {
+    // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg,
+    // possible TIED_TO DPR/QPR's (ignored), then possible lane index.
+    RegClass = OpInfo[0].RegClass;
+
+    while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd)));
+      Rd += Inc;
+      ++OpIdx;
+    }
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         Rn)));
+      ++OpIdx;
+    }
+
+    assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+    // addrmode6 := (ops GPR:$addr, i32imm)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       Rn)));
+    MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
+    OpIdx += 2;
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(RmEnum));
+      ++OpIdx;
+    }
+
+    while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+      assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 &&
+             "Tied to operand expected");
+      MI.addOperand(MCOperand::CreateReg(0));
+      ++OpIdx;
+    }
+
+    // Handle possible lane index.
+    if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+      MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+      ++OpIdx;
+    }
+  }
+
+  // Accessing registers past the end of the NEON register file is not
+  // defined.
+  if (Rd > 32)
+    return false;
+
+  return true;
+}
+
+// A8.6.308, A8.6.311, A8.6.314, A8.6.317.
+static bool Align4OneLaneInst(unsigned elem, unsigned size,
+    unsigned index_align, unsigned & alignment) {
+  unsigned bits = 0;
+  switch (elem) {
+  default:
+    return false;
+  case 1:
+    // A8.6.308
+    if (size == 0)
+      return slice(index_align, 0, 0) == 0;
+    else if (size == 1) {
+      bits = slice(index_align, 1, 0);
+      if (bits != 0 && bits != 1)
+        return false;
+      if (bits == 1)
+        alignment = 16;
+      return true;
+    } else if (size == 2) {
+      bits = slice(index_align, 2, 0);
+      if (bits != 0 && bits != 3)
+        return false;
+      if (bits == 3)
+        alignment = 32;
+      return true;;
+    }
+    return true;
+  case 2:
+    // A8.6.311
+    if (size == 0) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 16;
+      return true;
+    } if (size == 1) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 32;
+      return true;
+    } else if (size == 2) {
+      if (slice(index_align, 1, 1) != 0)
+        return false;
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 64;
+      return true;;
+    }
+    return true;
+  case 3:
+    // A8.6.314
+    if (size == 0) {
+      if (slice(index_align, 0, 0) != 0)
+        return false;
+      return true;
+    } if (size == 1) {
+      if (slice(index_align, 0, 0) != 0)
+        return false;
+      return true;
+      return true;
+    } else if (size == 2) {
+      if (slice(index_align, 1, 0) != 0)
+        return false;
+      return true;;
+    }
+    return true;
+  case 4:
+    // A8.6.317
+    if (size == 0) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 32;
+      return true;
+    } if (size == 1) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 64;
+      return true;
+    } else if (size == 2) {
+      bits = slice(index_align, 1, 0);
+      if (bits == 3)
+        return false;
+      if (bits == 1)
+        alignment = 64;
+      else if (bits == 2)
+        alignment = 128;
+      return true;;
+    }
+    return true;
+  }
+}
+
+// A7.7
+// If L (Inst{21}) == 0, store instructions.
+// Find out about double-spaced-ness of the Opcode and pass it on to
+// DisassembleNLdSt0().
+static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const StringRef Name = ARMInsts[Opcode].Name;
+  bool DblSpaced = false;
+  // 0 represents standard alignment, i.e., unaligned data access.
+  unsigned alignment = 0;
+
+  unsigned elem = 0; // legal values: {1, 2, 3, 4}
+  if (Name.startswith("VST1") || Name.startswith("VLD1"))
+    elem = 1;
+
+  if (Name.startswith("VST2") || Name.startswith("VLD2"))
+    elem = 2;
+
+  if (Name.startswith("VST3") || Name.startswith("VLD3"))
+    elem = 3;
+
+  if (Name.startswith("VST4") || Name.startswith("VLD4"))
+    elem = 4;
+
+  if (Name.find("LN") != std::string::npos) {
+    // To one lane instructions.
+    // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
+
+    // Utility function takes number of elements, size, and index_align.
+    if (!Align4OneLaneInst(elem,
+                           slice(insn, 11, 10),
+                           slice(insn, 7, 4),
+                           alignment))
+      return false;
+
+    // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
+    if (Name.endswith("16") || Name.endswith("16_UPD"))
+      DblSpaced = slice(insn, 5, 5) == 1;
+
+    // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
+    if (Name.endswith("32") || Name.endswith("32_UPD"))
+      DblSpaced = slice(insn, 6, 6) == 1;
+  } else if (Name.find("DUP") != std::string::npos) {
+    // Single element (or structure) to all lanes.
+    // Inst{9-8} encodes the number of element(s) in the structure, with:
+    // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32.
+    // 0b01 (VLD2DUP)
+    // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0)
+    // 0b11 (VLD4DUP)
+    //
+    // Inst{7-6} encodes the data size, with:
+    // 0b00 => 8, 0b01 => 16, 0b10 => 32
+    //
+    // Inst{4} (the a bit) encodes the align action (0: standard alignment)
+    unsigned elem = slice(insn, 9, 8) + 1;
+    unsigned a = slice(insn, 4, 4);
+    if (elem != 3) {
+      // 0b11 is not a valid encoding for Inst{7-6}.
+      if (slice(insn, 7, 6) == 3)
+        return false;
+      unsigned data_size = 8 << slice(insn, 7, 6);
+      // For VLD1DUP, a bit makes sense only for data size of 16 and 32.
+      if (a && data_size == 8)
+        return false;
+
+      // Now we can calculate the alignment!
+      if (a)
+        alignment = elem * data_size;
+    } else {
+      if (a) {
+        // A8.6.315 VLD3 (single 3-element structure to all lanes)
+        // The a bit must be encoded as 0.
+        return false;
+      }
+    }
+  } else {
+    // Multiple n-element structures with type encoded as Inst{11-8}.
+    // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
+
+    // Inst{5-4} encodes alignment.
+    unsigned align = slice(insn, 5, 4);
+    switch (align) {
+    default:
+      break;
+    case 1:
+      alignment = 64; break;
+    case 2:
+      alignment = 128; break;
+    case 3:
+      alignment = 256; break;
+    }
+
+    unsigned type = slice(insn, 11, 8);
+    // Reject UNDEFINED instructions based on type and align.
+    // Plus set DblSpaced flag where appropriate.
+    switch (elem) {
+    default:
+      break;
+    case 1:
+      // n == 1
+      // A8.6.307 & A8.6.391
+      if ((type == 7  && slice(align, 1, 1) == 1) ||
+          (type == 10 && align == 3) ||
+          (type == 6  && slice(align, 1, 1) == 1))
+        return false;
+      break;
+    case 2:
+      // n == 2 && type == 0b1001 -> DblSpaced = true
+      // A8.6.310 & A8.6.393
+      if ((type == 8 || type == 9) && align == 3)
+        return false;
+      DblSpaced = (type == 9);
+      break;
+    case 3:
+      // n == 3 && type == 0b0101 -> DblSpaced = true
+      // A8.6.313 & A8.6.395
+      if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1)
+        return false;
+      DblSpaced = (type == 5);
+      break;
+    case 4:
+      // n == 4 && type == 0b0001 -> DblSpaced = true
+      // A8.6.316 & A8.6.397
+      if (slice(insn, 7, 6) == 3)
+        return false;
+      DblSpaced = (type == 1);
+      break;
+    }
+  }
+  return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
+                           slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B);
+}
+
+// VMOV (immediate)
+//   Qd/Dd imm
+// VBIC (immediate)
+// VORR (immediate)
+//   Qd/Dd imm src(=Qd/Dd)
+static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass < 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+                                                     decodeNEONRd(insn))));
+
+  ElemSize esize = ESizeNA;
+  switch (Opcode) {
+  case ARM::VMOVv8i8:
+  case ARM::VMOVv16i8:
+    esize = ESize8;
+    break;
+  case ARM::VMOVv4i16:
+  case ARM::VMOVv8i16:
+  case ARM::VMVNv4i16:
+  case ARM::VMVNv8i16:
+  case ARM::VBICiv4i16:
+  case ARM::VBICiv8i16:
+  case ARM::VORRiv4i16:
+  case ARM::VORRiv8i16:
+    esize = ESize16;
+    break;
+  case ARM::VMOVv2i32:
+  case ARM::VMOVv4i32:
+  case ARM::VMVNv2i32:
+  case ARM::VMVNv4i32:
+  case ARM::VBICiv2i32:
+  case ARM::VBICiv4i32:
+  case ARM::VORRiv2i32:
+  case ARM::VORRiv4i32:
+    esize = ESize32;
+    break;
+  case ARM::VMOVv1i64:
+  case ARM::VMOVv2i64:
+    esize = ESize64;
+    break;
+  default:
+    assert(0 && "Unexpected opcode!");
+    return false;
+  }
+
+  // One register and a modified immediate value.
+  // Add the imm operand.
+  MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
+
+  NumOpsAdded = 2;
+
+  // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in.
+  if (NumOps >= 3 &&
+      (OpInfo[2].RegClass == ARM::DPRRegClassID ||
+       OpInfo[2].RegClass == ARM::QPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+                                                     decodeNEONRd(insn))));
+    NumOpsAdded += 1;
+  }
+
+  return true;
+}
+
+namespace {
+enum N2VFlag {
+  N2V_None,
+  N2V_VectorDupLane,
+  N2V_VectorConvert_Between_Float_Fixed
+};
+} // End of unnamed namespace
+
+// Vector Convert [between floating-point and fixed-point]
+//   Qd/Dd Qm/Dm [fbits]
+//
+// Vector Duplicate Lane (from scalar to all elements) Instructions.
+// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q:
+//   Qd/Dd Dm index
+//
+// Vector Move Long:
+//   Qd Dm
+//
+// Vector Move Narrow:
+//   Dd Qm
+//
+// Others
+static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opc];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 2 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  ElemSize esize = ESizeNA;
+  if (Flag == N2V_VectorDupLane) {
+    // VDUPLN has its index embedded.  Its size can be inferred from the Opcode.
+    assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q &&
+           "Unexpected Opcode");
+    esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8
+       : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16
+                                                           : ESize32);
+  }
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // VPADAL...
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  // VZIP and others have two TIED_TO reg operands.
+  int Idx;
+  while (OpIdx < NumOps &&
+         (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+    // Add TIED_TO operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Add the imm operand, if required.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+
+    unsigned imm = 0xFFFFFFFF;
+
+    if (Flag == N2V_VectorDupLane)
+      imm = decodeNVLaneDupIndex(insn, esize);
+    if (Flag == N2V_VectorConvert_Between_Float_Fixed)
+      imm = decodeVCVTFractionBits(insn);
+
+    assert(imm != 0xFFFFFFFF && "Internal error");
+    MI.addOperand(MCOperand::CreateImm(imm));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_None, B);
+}
+static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_VectorConvert_Between_Float_Fixed, B);
+}
+static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_VectorDupLane, B);
+}
+
+// Vector Shift [Accumulate] Instructions.
+// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt
+//
+// Vector Shift Left Long (with maximum shift count) Instructions.
+// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
+//
+static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+  assert(NumOps >= 3 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 3 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+          OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+         "Reg operand expected");
+
+  // Qm/Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
+
+  // Add the imm operand.
+
+  // VSHLL has maximum shift count as the imm, inferred from its size.
+  unsigned Imm;
+  switch (Opcode) {
+  default:
+    Imm = decodeNVSAmt(insn, LeftShift);
+    break;
+  case ARM::VSHLLi8:
+    Imm = 8;
+    break;
+  case ARM::VSHLLi16:
+    Imm = 16;
+    break;
+  case ARM::VSHLLi32:
+    Imm = 32;
+    break;
+  }
+  MI.addOperand(MCOperand::CreateImm(Imm));
+  ++OpIdx;
+
+  return true;
+}
+
+// Left shift instructions.
+static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
+                                 B);
+}
+// Right shift instructions have different shift amount interpretation.
+static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                 B);
+}
+
+namespace {
+enum N3VFlag {
+  N3V_None,
+  N3V_VectorExtract,
+  N3V_VectorShift,
+  N3V_Multiply_By_Scalar
+};
+} // End of unnamed namespace
+
+// NEON Three Register Instructions with Optional Immediate Operand
+//
+// Vector Extract Instructions.
+// Qd/Dd Qn/Dn Qm/Dm imm4
+//
+// Vector Shift (Register) Instructions.
+// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n)
+//
+// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions.
+// Qd/Dd Qn/Dn RestrictedDm index
+//
+// Others
+static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+
+  // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
+  assert(NumOps >= 3 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 3 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  bool VdVnVm = Flag == N3V_VectorShift ? false : true;
+  bool IsImm4 = Flag == N3V_VectorExtract ? true : false;
+  bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false;
+  ElemSize esize = ESizeNA;
+  if (Flag == N3V_Multiply_By_Scalar) {
+    unsigned size = (insn >> 20) & 3;
+    if (size == 1) esize = ESize16;
+    if (size == 2) esize = ESize32;
+    assert (esize == ESize16 || esize == ESize32);
+  }
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // VABA, VABAL, VBSLd, VBSLq, ...
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Dn = Inst{7:19-16} => NEON Rn
+  // or
+  // Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                  VdVnVm ? decodeNEONRn(insn)
+                                         : decodeNEONRm(insn))));
+  ++OpIdx;
+
+  // Dm = Inst{5:3-0} => NEON Rm
+  // or
+  // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
+  // or
+  // Dn = Inst{7:19-16} => NEON Rn
+  unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize)
+                                        : decodeNEONRm(insn))
+                      : decodeNEONRn(insn);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Add the imm operand.
+    unsigned Imm = 0;
+    if (IsImm4)
+      Imm = decodeN3VImm(insn);
+    else if (IsDmRestricted)
+      Imm = decodeRestrictedDmIndex(insn, esize);
+    else {
+      assert(0 && "Internal error: unreachable code!");
+      return false;
+    }
+
+    MI.addOperand(MCOperand::CreateImm(Imm));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_None, B);
+}
+static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_VectorShift, B);
+}
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_VectorExtract, B);
+}
+static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_Multiply_By_Scalar, B);
+}
+
+// Vector Table Lookup
+//
+// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm
+// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm
+// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
+// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
+static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::DPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         OpInfo[2].RegClass == ARM::DPRRegClassID &&
+         "Expect >= 3 operands and first 3 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Rn = decodeNEONRn(insn);
+
+  // {Dn} encoded as len = 0b00
+  // {Dn Dn+1} encoded as len = 0b01
+  // {Dn Dn+1 Dn+2 } encoded as len = 0b10
+  // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11
+  unsigned Len = slice(insn, 9, 8) + 1;
+
+  // Dd (the destination vector)
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // Process tied_to operand constraint.
+  int Idx;
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Do the <list> now.
+  for (unsigned i = 0; i < Len; ++i) {
+    assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                       Rn + i)));
+    ++OpIdx;
+  }
+
+  // Dm (the index vector)
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+         "Reg operand (index vector) expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// Vector Get Lane (move scalar to ARM core register) Instructions.
+// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
+static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  if (!OpInfo) return false;
+
+  assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         OpInfo[2].RegClass < 0 &&
+         "Expect >= 3 operands with one dst operand");
+
+  ElemSize esize =
+    Opcode == ARM::VGETLNi32 ? ESize32
+      : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
+                                                                : ESize8);
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  // Dn = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRn(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+  NumOpsAdded = 3;
+  return true;
+}
+
+// Vector Set Lane (move ARM core register to scalar) Instructions.
+// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
+static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  if (!OpInfo) return false;
+
+  assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::DPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
+         OpInfo[2].RegClass == ARM::GPRRegClassID &&
+         OpInfo[3].RegClass < 0 &&
+         "Expect >= 3 operands with one dst operand");
+
+  ElemSize esize =
+    Opcode == ARM::VSETLNi8 ? ESize8
+                            : (Opcode == ARM::VSETLNi16 ? ESize16
+                                                        : ESize32);
+
+  // Dd = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRn(insn))));
+
+  // TIED_TO operand.
+  MI.addOperand(MCOperand::CreateReg(0));
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+  NumOpsAdded = 4;
+  return true;
+}
+
+// Vector Duplicate Instructions (from ARM core register to all elements).
+// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
+static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 2 operands and first 2 as reg operand");
+
+  unsigned RegClass = OpInfo[0].RegClass;
+
+  // Qd/Dd = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
+                                                     decodeNEONRn(insn))));
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  NumOpsAdded = 2;
+  return true;
+}
+
+static inline bool PreLoadOpcode(unsigned Opcode) {
+  switch(Opcode) {
+  case ARM::PLDi12:  case ARM::PLDrs:
+  case ARM::PLDWi12: case ARM::PLDWrs:
+  case ARM::PLIi12:  case ARM::PLIrs:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // Preload Data/Instruction requires either 2 or 3 operands.
+  // PLDi12, PLDWi12, PLIi12: addrmode_imm12
+  // PLDrs, PLDWrs, PLIrs:    ldst_so_reg
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
+      || Opcode == ARM::PLIi12) {
+    unsigned Imm12 = slice(insn, 11, 0);
+    bool Negative = getUBit(insn) == 0;
+
+    // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12.
+    if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) {
+      DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n");
+      MI.setOpcode(ARM::PLDi12);
+    }
+    
+    // -0 is represented specially. All other values are as normal.
+    int Offset = Negative ? -1 * Imm12 : Imm12;
+    if (Imm12 == 0 && Negative)
+      Offset = INT32_MIN;
+
+    MI.addOperand(MCOperand::CreateImm(Offset));
+    NumOpsAdded = 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+
+    ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+    NumOpsAdded = 3;
+  }
+
+  return true;
+}
+
+static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) {
+    // Inst{3-0} encodes the memory barrier option for the variants.
+    unsigned opt = slice(insn, 3, 0);
+    switch (opt) {
+    case ARM_MB::SY:  case ARM_MB::ST:
+    case ARM_MB::ISH: case ARM_MB::ISHST:
+    case ARM_MB::NSH: case ARM_MB::NSHST:
+    case ARM_MB::OSH: case ARM_MB::OSHST:
+      MI.addOperand(MCOperand::CreateImm(opt));
+      NumOpsAdded = 1;
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  switch (Opcode) {
+  case ARM::CLREX:
+  case ARM::NOP:
+  case ARM::TRAP:
+  case ARM::YIELD:
+  case ARM::WFE:
+  case ARM::WFI:
+  case ARM::SEV:
+    return true;
+  case ARM::SWP:
+  case ARM::SWPB:
+    // SWP, SWPB: Rd Rm Rn
+    // Delegate to DisassembleLdStExFrm()....
+    return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+  default:
+    break;
+  }
+
+  if (Opcode == ARM::SETEND) {
+    NumOpsAdded = 1;
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
+    return true;
+  }
+
+  // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+  // opcodes which match the same real instruction. This is needed since there's
+  // no current handling of optional arguments. Fix here when a better handling
+  // of optional arguments is implemented.
+  if (Opcode == ARM::CPS3p) {   // M = 1
+    // Let's reject these impossible imod values by returning false:
+    // 1. (imod=0b01)
+    //
+    // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an
+    // invalid combination, so we just check for imod=0b00 here.
+    if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+      return false;
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));   // mode
+    NumOpsAdded = 3;
+    return true;
+  }
+  if (Opcode == ARM::CPS2p) { // mode = 0, M = 0
+    // Let's reject these impossible imod values by returning false:
+    // 1. (imod=0b00,M=0)
+    // 2. (imod=0b01)
+    if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+      return false;
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // DBG has its option specified in Inst{3-0}.
+  if (Opcode == ARM::DBG) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}).
+  if (Opcode == ARM::BKPT) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 |
+                                       slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  if (PreLoadOpcode(Opcode))
+    return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  assert(0 && "Unexpected misc instruction!");
+  return false;
+}
+
+/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
+/// We divide the disassembly task into different categories, with each one
+/// corresponding to a specific instruction encoding format.  There could be
+/// exceptions when handling a specific format, and that is why the Opcode is
+/// also present in the function prototype.
+static const DisassembleFP FuncPtrs[] = {
+  &DisassemblePseudo,
+  &DisassembleMulFrm,
+  &DisassembleBrFrm,
+  &DisassembleBrMiscFrm,
+  &DisassembleDPFrm,
+  &DisassembleDPSoRegFrm,
+  &DisassembleLdFrm,
+  &DisassembleStFrm,
+  &DisassembleLdMiscFrm,
+  &DisassembleStMiscFrm,
+  &DisassembleLdStMulFrm,
+  &DisassembleLdStExFrm,
+  &DisassembleArithMiscFrm,
+  &DisassembleSatFrm,
+  &DisassembleExtFrm,
+  &DisassembleVFPUnaryFrm,
+  &DisassembleVFPBinaryFrm,
+  &DisassembleVFPConv1Frm,
+  &DisassembleVFPConv2Frm,
+  &DisassembleVFPConv3Frm,
+  &DisassembleVFPConv4Frm,
+  &DisassembleVFPConv5Frm,
+  &DisassembleVFPLdStFrm,
+  &DisassembleVFPLdStMulFrm,
+  &DisassembleVFPMiscFrm,
+  &DisassembleThumbFrm,
+  &DisassembleMiscFrm,
+  &DisassembleNGetLnFrm,
+  &DisassembleNSetLnFrm,
+  &DisassembleNDupFrm,
+
+  // VLD and VST (including one lane) Instructions.
+  &DisassembleNLdSt,
+
+  // A7.4.6 One register and a modified immediate value
+  // 1-Register Instructions with imm.
+  // LLVM only defines VMOVv instructions.
+  &DisassembleN1RegModImmFrm,
+
+  // 2-Register Instructions with no imm.
+  &DisassembleN2RegFrm,
+
+  // 2-Register Instructions with imm (vector convert float/fixed point).
+  &DisassembleNVCVTFrm,
+
+  // 2-Register Instructions with imm (vector dup lane).
+  &DisassembleNVecDupLnFrm,
+
+  // Vector Shift Left Instructions.
+  &DisassembleN2RegVecShLFrm,
+
+  // Vector Shift Righ Instructions, which has different interpretation of the
+  // shift amount from the imm6 field.
+  &DisassembleN2RegVecShRFrm,
+
+  // 3-Register Data-Processing Instructions.
+  &DisassembleN3RegFrm,
+
+  // Vector Shift (Register) Instructions.
+  // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand)
+  &DisassembleN3RegVecShFrm,
+
+  // Vector Extract Instructions.
+  &DisassembleNVecExtractFrm,
+
+  // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long]
+  // By Scalar Instructions.
+  &DisassembleNVecMulScalarFrm,
+
+  // Vector Table Lookup uses byte indexes in a control vector to look up byte
+  // values in a table and generate a new vector.
+  &DisassembleNVTBLFrm,
+
+  NULL
+};
+
+/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+/// The general idea is to set the Opcode for the MCInst, followed by adding
+/// the appropriate MCOperands to the MCInst.  ARM Basic MC Builder delegates
+/// to the Format-specific disassemble function for disassembly, followed by
+/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand
+/// which follow the Dst/Src Operands.
+bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
+  // Stage 1 sets the Opcode.
+  MI.setOpcode(Opcode);
+  // If the number of operands is zero, we're done!
+  if (NumOps == 0)
+    return true;
+
+  // Stage 2 calls the format-specific disassemble function to build the operand
+  // list.
+  if (Disasm == NULL)
+    return false;
+  unsigned NumOpsAdded = 0;
+  bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
+
+  if (!OK || this->Err != 0) return false;
+  if (NumOpsAdded >= NumOps)
+    return true;
+
+  // Stage 3 deals with operands unaccounted for after stage 2 is finished.
+  // FIXME: Should this be done selectively?
+  return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
+}
+
+// A8.3 Conditional execution
+// A8.3.1 Pseudocode details of conditional execution
+// Condition bits '111x' indicate the instruction is always executed.
+static uint32_t CondCode(uint32_t CondField) {
+  if (CondField == 0xF)
+    return ARMCC::AL;
+  return CondField;
+}
+
+/// DoPredicateOperands - DoPredicateOperands process the predicate operands
+/// of some Thumb instructions which come before the reglist operands.  It
+/// returns true if the two predicate operands have been processed.
+bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
+    uint32_t /* insn */, unsigned short NumOpsRemaining) {
+
+  assert(NumOpsRemaining > 0 && "Invalid argument");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned Idx = MI.getNumOperands();
+
+  // First, we check whether this instr specifies the PredicateOperand through
+  // a pair of MCOperandInfos with isPredicate() property.
+  if (NumOpsRemaining >= 2 &&
+      OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+      OpInfo[Idx].RegClass < 0 &&
+      OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+  {
+    // If we are inside an IT block, get the IT condition bits maintained via
+    // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+    // See also A2.5.2.
+    if (InITBlock())
+      MI.addOperand(MCOperand::CreateImm(GetITCond()));
+    else
+      MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    return true;
+  }
+
+  return false;
+}
+
+/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+/// the possible Predicate and SBitModifier, to build the remaining MCOperand
+/// constituents.
+bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOpsRemaining) {
+
+  assert(NumOpsRemaining > 0 && "Invalid argument");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const std::string &Name = ARMInsts[Opcode].Name;
+  unsigned Idx = MI.getNumOperands();
+  uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
+
+  // First, we check whether this instr specifies the PredicateOperand through
+  // a pair of MCOperandInfos with isPredicate() property.
+  if (NumOpsRemaining >= 2 &&
+      OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+      OpInfo[Idx].RegClass < 0 &&
+      OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+  {
+    // If we are inside an IT block, get the IT condition bits maintained via
+    // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+    // See also A2.5.2.
+    if (InITBlock())
+      MI.addOperand(MCOperand::CreateImm(GetITCond()));
+    else {
+      if (Name.length() > 1 && Name[0] == 't') {
+        // Thumb conditional branch instructions have their cond field embedded,
+        // like ARM.
+        //
+        // A8.6.16 B
+        // Check for undefined encodings.
+        unsigned cond;
+        if (Name == "t2Bcc") {
+          if ((cond = slice(insn, 25, 22)) >= 14)
+            return false;
+          MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+        } else if (Name == "tBcc") {
+          if ((cond = slice(insn, 11, 8)) == 14)
+            return false;
+          MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+        } else
+          MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      } else {
+        // ARM instructions get their condition field from Inst{31-28}.
+        // We should reject Inst{31-28} = 0b1111 as invalid encoding.
+        if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF)
+          return false;
+        MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
+      }
+    }
+    MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    Idx += 2;
+    NumOpsRemaining -= 2;
+  }
+
+  if (NumOpsRemaining == 0)
+    return true;
+
+  // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
+  if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
+    --NumOpsRemaining;
+  }
+
+  if (NumOpsRemaining == 0)
+    return true;
+  else
+    return false;
+}
+
+/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+/// after BuildIt is finished.
+bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
+    uint32_t insn) {
+
+  if (!SP) return Status;
+
+  if (Opcode == ARM::t2IT)
+    Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
+  else if (InITBlock())
+    SP->UpdateIT();
+
+  return Status;
+}
+
+/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
+                                     unsigned short num)
+  : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
+  unsigned Idx = (unsigned)format;
+  assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
+  Disasm = FuncPtrs[Idx];
+}
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
+  // For "Unknown format", fail by returning a NULL pointer.
+  if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
+    DEBUG(errs() << "Unknown format\n");
+    return 0;
+  }
+
+  return new ARMBasicMCBuilder(Opcode, Format,
+                               ARMInsts[Opcode].getNumOperands());
+}
+
+/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+/// operand in place of the immediate Value in the MCInst.  The immediate
+/// Value has had any PC adjustment made by the caller.  If the getOpInfo()
+/// function was set as part of the setupBuilderForSymbolicDisassembly() call
+/// then that function is called to get any symbolic information at the
+/// builder's Address for this instrution.  If that returns non-zero then the
+/// symbolic information it returns is used to create an MCExpr and that is
+/// added as an operand to the MCInst.  This function returns true if it adds
+/// an operand to the MCInst and false otherwise.
+bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value,
+                                                 uint64_t InstSize,
+                                                 MCInst &MI) {
+  if (!GetOpInfo)
+    return false;
+
+  struct LLVMOpInfo1 SymbolicOp;
+  SymbolicOp.Value = Value;
+  if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp))
+    return false;
+
+  const MCExpr *Add = NULL;
+  if (SymbolicOp.AddSymbol.Present) {
+    if (SymbolicOp.AddSymbol.Name) {
+      StringRef Name(SymbolicOp.AddSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Sub = NULL;
+  if (SymbolicOp.SubtractSymbol.Present) {
+    if (SymbolicOp.SubtractSymbol.Name) {
+      StringRef Name(SymbolicOp.SubtractSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Off = NULL;
+  if (SymbolicOp.Value != 0)
+    Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+  const MCExpr *Expr;
+  if (Sub) {
+    const MCExpr *LHS;
+    if (Add)
+      LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+    else
+      LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+    else
+      Expr = LHS;
+  } else if (Add) {
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+    else
+      Expr = Add;
+  } else {
+    if (Off != 0)
+      Expr = Off;
+    else
+      Expr = MCConstantExpr::Create(0, *Ctx);
+  }
+
+  if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+    MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+  else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+    MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+  else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+    MI.addOperand(MCOperand::CreateExpr(Expr));
+  else 
+    assert("bad SymbolicOp.VariantKind");
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
new file mode 100644
index 000000000000..a7ba14141c0a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -0,0 +1,336 @@
+//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+//
+// The first part defines the enumeration type of ARM instruction format, which
+// specifies the encoding used by the instruction, as well as a helper function
+// to convert the enums to printable char strings.
+//
+// It also contains code to represent the concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLERCORE_H
+#define ARMDISASSEMBLERCORE_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm-c/Disassembler.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMDisassembler.h"
+
+namespace llvm {
+class MCContext;
+
+class ARMUtils {
+public:
+  static const char *OpcodeName(unsigned Opcode);
+};
+
+/////////////////////////////////////////////////////
+//                                                 //
+//  Enums and Utilities for ARM Instruction Format //
+//                                                 //
+/////////////////////////////////////////////////////
+
+#define ARM_FORMATS                   \
+  ENTRY(ARM_FORMAT_PSEUDO,         0) \
+  ENTRY(ARM_FORMAT_MULFRM,         1) \
+  ENTRY(ARM_FORMAT_BRFRM,          2) \
+  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
+  ENTRY(ARM_FORMAT_DPFRM,          4) \
+  ENTRY(ARM_FORMAT_DPSOREGFRM,     5) \
+  ENTRY(ARM_FORMAT_LDFRM,          6) \
+  ENTRY(ARM_FORMAT_STFRM,          7) \
+  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
+  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
+  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
+  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
+  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
+  ENTRY(ARM_FORMAT_SATFRM,        13) \
+  ENTRY(ARM_FORMAT_EXTFRM,        14) \
+  ENTRY(ARM_FORMAT_VFPUNARYFRM,   15) \
+  ENTRY(ARM_FORMAT_VFPBINARYFRM,  16) \
+  ENTRY(ARM_FORMAT_VFPCONV1FRM,   17) \
+  ENTRY(ARM_FORMAT_VFPCONV2FRM,   18) \
+  ENTRY(ARM_FORMAT_VFPCONV3FRM,   19) \
+  ENTRY(ARM_FORMAT_VFPCONV4FRM,   20) \
+  ENTRY(ARM_FORMAT_VFPCONV5FRM,   21) \
+  ENTRY(ARM_FORMAT_VFPLDSTFRM,    22) \
+  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+  ENTRY(ARM_FORMAT_VFPMISCFRM,    24) \
+  ENTRY(ARM_FORMAT_THUMBFRM,      25) \
+  ENTRY(ARM_FORMAT_MISCFRM,       26) \
+  ENTRY(ARM_FORMAT_NEONGETLNFRM,  27) \
+  ENTRY(ARM_FORMAT_NEONSETLNFRM,  28) \
+  ENTRY(ARM_FORMAT_NEONDUPFRM,    29) \
+  ENTRY(ARM_FORMAT_NLdSt,         30) \
+  ENTRY(ARM_FORMAT_N1RegModImm,   31) \
+  ENTRY(ARM_FORMAT_N2Reg,         32) \
+  ENTRY(ARM_FORMAT_NVCVT,         33) \
+  ENTRY(ARM_FORMAT_NVecDupLn,     34) \
+  ENTRY(ARM_FORMAT_N2RegVecShL,   35) \
+  ENTRY(ARM_FORMAT_N2RegVecShR,   36) \
+  ENTRY(ARM_FORMAT_N3Reg,         37) \
+  ENTRY(ARM_FORMAT_N3RegVecSh,    38) \
+  ENTRY(ARM_FORMAT_NVecExtract,   39) \
+  ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+  ENTRY(ARM_FORMAT_NVTBL,         41)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+  ARM_FORMATS
+  ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const inline char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+  switch(form) {
+    ARM_FORMATS
+  case ARM_FORMAT_NA:
+  default:
+    return "";
+  }
+#undef ENTRY
+}
+
+/// Expands on the enum definitions from ARMBaseInstrInfo.h.
+/// They are being used by the disassembler implementation.
+namespace ARMII {
+  enum {
+    NEONRegMask = 15,
+    GPRRegMask = 15,
+    NEON_RegRdShift = 12,
+    NEON_D_BitShift = 22,
+    NEON_RegRnShift = 16,
+    NEON_N_BitShift = 7,
+    NEON_RegRmShift = 0,
+    NEON_M_BitShift = 5
+  };
+}
+
+/// Utility function for extracting [From, To] bits from a uint32_t.
+static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
+  assert(From < 32 && To < 32 && From >= To);
+  return (Bits >> To) & ((1 << (From - To + 1)) - 1);
+}
+
+/// Utility function for setting [From, To] bits to Val for a uint32_t.
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+                            unsigned Val) {
+  assert(From < 32 && To < 32 && From >= To);
+  uint32_t Mask = ((1 << (From - To + 1)) - 1);
+  Bits &= ~(Mask << To);
+  Bits |= (Val & Mask) << To;
+}
+
+// Return an integer result equal to the number of bits of x that are ones.
+static inline uint32_t
+BitCount (uint64_t x)
+{
+    // c accumulates the total bits set in x
+    uint32_t c;
+    for (c = 0; x; ++c)
+    {
+        x &= x - 1; // clear the least significant bit set
+    }
+    return c;
+}
+
+static inline bool
+BitIsSet (const uint64_t value, const uint64_t bit)
+{
+    return (value & (1ull << bit)) != 0;
+}
+
+static inline bool
+BitIsClear (const uint64_t value, const uint64_t bit)
+{
+    return (value & (1ull << bit)) == 0;
+}
+
+/// Various utilities for checking the target specific flags.
+
+/// A unary data processing instruction doesn't have an Rn operand.
+static inline bool isUnaryDP(uint64_t TSFlags) {
+  return (TSFlags & ARMII::UnaryDP);
+}
+
+/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111.
+static inline bool isNEONDomain(uint64_t TSFlags) {
+  return (TSFlags & ARMII::DomainNEON) ||
+         (TSFlags & ARMII::DomainNEONA8);
+}
+
+/// This four-bit field describes the addressing mode used.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getAddrMode(uint64_t TSFlags) {
+  return (TSFlags & ARMII::AddrModeMask);
+}
+
+/// {IndexModePre, IndexModePost}
+/// Only valid for load and store ops.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getIndexMode(uint64_t TSFlags) {
+  return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+}
+
+/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
+  return (TSFlags & ARMII::IndexModeMask) != 0;
+}
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+// Builder Object is mostly ignored except in some Thumb disassemble functions.
+typedef ARMBasicMCBuilder *BO;
+
+/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
+/// and builds the MCOperand list upon disassembly.  It returns false on failure
+/// or true on success.  The number of operands added is updated upon success.
+typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+
+/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
+/// knows how to build up the MCOperand list.
+class ARMBasicMCBuilder {
+  friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+  unsigned Opcode;
+  ARMFormat Format;
+  unsigned short NumOps;
+  DisassembleFP Disasm;
+  Session *SP;
+  int Err; // !=0 if the builder encounters some error condition during build.
+
+private:
+  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
+
+public:
+  ARMBasicMCBuilder(ARMBasicMCBuilder &B)
+    : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
+      SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
+    Err = 0;
+  }
+
+  virtual ~ARMBasicMCBuilder() {}
+
+  void SetSession(Session *sp) {
+    SP = sp;
+  }
+
+  void SetErr(int ErrCode) {
+    Err = ErrCode;
+  }
+
+  /// DoPredicateOperands - DoPredicateOperands process the predicate operands
+  /// of some Thumb instructions which come before the reglist operands.  It
+  /// returns true if the two predicate operands have been processed.
+  bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
+      uint32_t insn, unsigned short NumOpsRemaning);
+  
+  /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+  /// the possible Predicate and SBitModifier, to build the remaining MCOperand
+  /// constituents.
+  bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+      uint32_t insn, unsigned short NumOpsRemaning);
+
+  /// InITBlock - InITBlock returns true if we are inside an IT block.
+  bool InITBlock() {
+    if (SP)
+      return SP->ITCounter > 0;
+
+    return false;
+  }
+
+  /// Build - Build delegates to BuildIt to perform the heavy liftling.  After
+  /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
+  virtual bool Build(MCInst &MI, uint32_t insn) {
+    bool Status = BuildIt(MI, insn);
+    return RunBuildAfterHook(Status, MI, insn);
+  }
+
+  /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+  /// The general idea is to set the Opcode for the MCInst, followed by adding
+  /// the appropriate MCOperands to the MCInst.  ARM Basic MC Builder delegates
+  /// to the Format-specific disassemble function for disassembly, followed by
+  /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
+  /// which follow the Dst/Src Operands.
+  virtual bool BuildIt(MCInst &MI, uint32_t insn);
+
+  /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+  /// after BuildIt is finished.
+  virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
+
+private:
+  /// Get condition of the current IT instruction.
+  unsigned GetITCond() {
+    assert(SP);
+    return slice(SP->ITState, 7, 4);
+  }
+
+private:
+  //
+  // Hooks for symbolic disassembly via the public 'C' interface.
+  //
+  // The function to get the symbolic information for operands.
+  LLVMOpInfoCallback GetOpInfo;
+  // The pointer to the block of symbolic information for above call back.
+  void *DisInfo;
+  // The assembly context for creating symbols and MCExprs in place of
+  // immediate operands when there is symbolic information.
+  MCContext *Ctx;
+  // The address of the instruction being disassembled.
+  uint64_t Address;
+
+public:
+  void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
+                                          void *disInfo, MCContext *ctx,
+                                          uint64_t address) {
+    GetOpInfo = getOpInfo;
+    DisInfo = disInfo;
+    Ctx = ctx;
+    Address = address;
+  }
+
+  uint64_t getBuilderAddress() const { return Address; }
+
+  /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+  /// operand in place of the immediate Value in the MCInst.  The immediate
+  /// Value has had any PC adjustment made by the caller.  If the getOpInfo()
+  /// function was set as part of the setupBuilderForSymbolicDisassembly() call
+  /// then that function is called to get any symbolic information at the
+  /// builder's Address for this instrution.  If that returns non-zero then the
+  /// symbolic information it returns is used to create an MCExpr and that is
+  /// added as an operand to the MCInst.  This function returns true if it adds
+  /// an operand to the MCInst and false otherwise.
+  bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
new file mode 100644
index 000000000000..834c6f65295d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -0,0 +1,2459 @@
+//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code for disassembling a Thumb instr.  It is to be included by
+// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm()
+// function which acts as the dispatcher to disassemble a Thumb instruction.
+//
+//===----------------------------------------------------------------------===//
+
+///////////////////////////////
+//                           //
+//     Utility Functions     //
+//                           //
+///////////////////////////////
+
+// Utilities for 16-bit Thumb instructions.
+/*
+15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+               [  tRt ]
+                      [ tRm ]  [ tRn ]  [ tRd ]
+                         D  [   Rm   ]  [  Rd ]
+
+                      [ imm3]
+               [    imm5    ]
+                   i     [    imm5   ]
+                            [       imm7      ]
+                         [       imm8         ]
+               [             imm11            ]
+
+            [   cond  ]
+*/
+
+// Extract tRt: Inst{10-8}.
+static inline unsigned getT1tRt(uint32_t insn) {
+  return slice(insn, 10, 8);
+}
+
+// Extract tRm: Inst{8-6}.
+static inline unsigned getT1tRm(uint32_t insn) {
+  return slice(insn, 8, 6);
+}
+
+// Extract tRn: Inst{5-3}.
+static inline unsigned getT1tRn(uint32_t insn) {
+  return slice(insn, 5, 3);
+}
+
+// Extract tRd: Inst{2-0}.
+static inline unsigned getT1tRd(uint32_t insn) {
+  return slice(insn, 2, 0);
+}
+
+// Extract [D:Rd]: Inst{7:2-0}.
+static inline unsigned getT1Rd(uint32_t insn) {
+  return slice(insn, 7, 7) << 3 | slice(insn, 2, 0);
+}
+
+// Extract Rm: Inst{6-3}.
+static inline unsigned getT1Rm(uint32_t insn) {
+  return slice(insn, 6, 3);
+}
+
+// Extract imm3: Inst{8-6}.
+static inline unsigned getT1Imm3(uint32_t insn) {
+  return slice(insn, 8, 6);
+}
+
+// Extract imm5: Inst{10-6}.
+static inline unsigned getT1Imm5(uint32_t insn) {
+  return slice(insn, 10, 6);
+}
+
+// Extract i:imm5: Inst{9:7-3}.
+static inline unsigned getT1Imm6(uint32_t insn) {
+  return slice(insn, 9, 9) << 5 | slice(insn, 7, 3);
+}
+
+// Extract imm7: Inst{6-0}.
+static inline unsigned getT1Imm7(uint32_t insn) {
+  return slice(insn, 6, 0);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getT1Imm8(uint32_t insn) {
+  return slice(insn, 7, 0);
+}
+
+// Extract imm11: Inst{10-0}.
+static inline unsigned getT1Imm11(uint32_t insn) {
+  return slice(insn, 10, 0);
+}
+
+// Extract cond: Inst{11-8}.
+static inline unsigned getT1Cond(uint32_t insn) {
+  return slice(insn, 11, 8);
+}
+
+static inline bool IsGPR(unsigned RegClass) {
+  return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
+}
+
+// Utilities for 32-bit Thumb instructions.
+
+static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; }
+
+// Extract imm4: Inst{19-16}.
+static inline unsigned getImm4(uint32_t insn) {
+  return slice(insn, 19, 16);
+}
+
+// Extract imm3: Inst{14-12}.
+static inline unsigned getImm3(uint32_t insn) {
+  return slice(insn, 14, 12);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getImm8(uint32_t insn) {
+  return slice(insn, 7, 0);
+}
+
+// A8.6.61 LDRB (immediate, Thumb) and friends
+// +/-: Inst{9}
+// imm8: Inst{7-0}
+static inline int decodeImm8(uint32_t insn) {
+  int Offset = getImm8(insn);
+  return slice(insn, 9, 9) ? Offset : -Offset;
+}
+
+// Extract imm12: Inst{11-0}.
+static inline unsigned getImm12(uint32_t insn) {
+  return slice(insn, 11, 0);
+}
+
+// A8.6.63 LDRB (literal) and friends
+// +/-: Inst{23}
+// imm12: Inst{11-0}
+static inline int decodeImm12(uint32_t insn) {
+  int Offset = getImm12(insn);
+  return slice(insn, 23, 23) ? Offset : -Offset;
+}
+
+// Extract imm2: Inst{7-6}.
+static inline unsigned getImm2(uint32_t insn) {
+  return slice(insn, 7, 6);
+}
+
+// For BFI, BFC, t2SBFX, and t2UBFX.
+// Extract lsb: Inst{14-12:7-6}.
+static inline unsigned getLsb(uint32_t insn) {
+  return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// For BFI and BFC.
+// Extract msb: Inst{4-0}.
+static inline unsigned getMsb(uint32_t insn) {
+  return slice(insn, 4, 0);
+}
+
+// For t2SBFX and t2UBFX.
+// Extract widthminus1: Inst{4-0}.
+static inline unsigned getWidthMinus1(uint32_t insn) {
+  return slice(insn, 4, 0);
+}
+
+// For t2ADDri12 and t2SUBri12.
+// imm12 = i:imm3:imm8;
+static inline unsigned getIImm3Imm8(uint32_t insn) {
+  return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn);
+}
+
+// For t2MOVi16 and t2MOVTi16.
+// imm16 = imm4:i:imm3:imm8;
+static inline unsigned getImm16(uint32_t insn) {
+  return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 |
+    getImm3(insn) << 8 | getImm8(insn);
+}
+
+// Inst{5-4} encodes the shift type.
+static inline unsigned getShiftTypeBits(uint32_t insn) {
+  return slice(insn, 5, 4);
+}
+
+// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount.
+static inline unsigned getShiftAmtBits(uint32_t insn) {
+  return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// A8.6.17 BFC
+// Encoding T1 ARMv6T2, ARMv7
+// LLVM-specific encoding for #<lsb> and #<width>
+static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
+  uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
+  uint32_t msb = getMsb(insn);
+  uint32_t Val = 0;
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
+  for (uint32_t i = lsb; i <= msb; ++i)
+    Val |= (1 << i);
+  mask = ~Val;
+  return true;
+}
+
+// A8.4 Shifts applied to a register
+// A8.4.1 Constant shifts
+// A8.4.3 Pseudocode details of instruction-specified shifts and rotates
+//
+// decodeImmShift() returns the shift amount and the the shift opcode.
+// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet.
+static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
+                                      ARM_AM::ShiftOpc &ShOp) {
+
+  assert(imm5 < 32 && "Invalid imm5 argument");
+  switch (bits2) {
+  default: assert(0 && "No such value");
+  case 0:
+    ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
+    return imm5;
+  case 1:
+    ShOp = ARM_AM::lsr;
+    return (imm5 == 0 ? 32 : imm5);
+  case 2:
+    ShOp = ARM_AM::asr;
+    return (imm5 == 0 ? 32 : imm5);
+  case 3:
+    ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror);
+    return (imm5 == 0 ? 1 : imm5);
+  }
+}
+
+// A6.3.2 Modified immediate constants in Thumb instructions
+//
+// ThumbExpandImm() returns the modified immediate constant given an imm12 for
+// Thumb data-processing instructions with modified immediate.
+// See also A6.3.1 Data-processing (modified immediate).
+static inline unsigned ThumbExpandImm(unsigned imm12) {
+  assert(imm12 <= 0xFFF && "Invalid imm12 argument");
+
+  // If the leading two bits is 0b00, the modified immediate constant is
+  // obtained by splatting the low 8 bits into the first byte, every other byte,
+  // or every byte of a 32-bit value.
+  //
+  // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is
+  // performed.
+
+  if (slice(imm12, 11, 10) == 0) {
+    unsigned short control = slice(imm12, 9, 8);
+    unsigned imm8 = slice(imm12, 7, 0);
+    switch (control) {
+    default:
+      assert(0 && "No such value");
+      return 0;
+    case 0:
+      return imm8;
+    case 1:
+      return imm8 << 16 | imm8;
+    case 2:
+      return imm8 << 24 | imm8 << 8;
+    case 3:
+      return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8;
+    }
+  } else {
+    // A rotate is required.
+    unsigned Val = 1 << 7 | slice(imm12, 6, 0);
+    unsigned Amt = slice(imm12, 11, 7);
+    return ARM_AM::rotr32(Val, Amt);
+  }
+}
+
+static inline int decodeImm32_B_EncodingT3(uint32_t insn) {
+  bool S = slice(insn, 26, 26);
+  bool J1 = slice(insn, 13, 13);
+  bool J2 = slice(insn, 11, 11);
+  unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm21 |= 1 << 20;
+  if (J2) Imm21 |= 1 << 19;
+  if (J1) Imm21 |= 1 << 18;
+
+  return SignExtend32<21>(Imm21);
+}
+
+static inline int decodeImm32_B_EncodingT4(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BL(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BLX(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+// See, for example, A8.6.221 SXTAB16.
+static inline unsigned decodeRotate(uint32_t insn) {
+  unsigned rotate = slice(insn, 5, 4);
+  return rotate << 3;
+}
+
+///////////////////////////////////////////////
+//                                           //
+// Thumb1 instruction disassembly functions. //
+//                                           //
+///////////////////////////////////////////////
+
+// See "Utilities for 16-bit Thumb instructions" for register naming convention.
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare
+//
+// shift immediate:         tRd CPSR tRn imm5
+// add/sub register:        tRd CPSR tRn tRm
+// add/sub 3-bit immediate: tRd CPSR tRn imm3
+// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8
+// mov/cmp immediate:       tRt [CPSR] imm8 (CPSR present for mov)
+//
+// Special case:
+// tMOVSr:                  tRd tRn
+static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID
+         && "Invalid arguments");
+
+  bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3);
+
+  // Use Rt implies use imm8.
+  bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 ||
+                Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8);
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, ARM::tGPRRegClassID,
+                                  UseRt ? getT1tRt(insn) : getT1tRd(insn))));
+  ++OpIdx;
+
+  // Check whether the next operand to be added is a CCR Register.
+  if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+    assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+    ++OpIdx;
+  }
+
+  // Check whether the next operand to be added is a Thumb1 Register.
+  assert(OpIdx < NumOps && "More operands expected");
+  if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // For UseRt, the reg operand is tied to the first reg operand.
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::tGPRRegClassID,
+                                    UseRt ? getT1tRt(insn) : getT1tRn(insn))));
+    ++OpIdx;
+  }
+
+  // Special case for tMOVSr.
+  if (OpIdx == NumOps)
+    return true;
+
+  // The next available operand is either a reg operand or an imm operand.
+  if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // Three register operand instructions.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRm(insn))));
+  } else {
+    assert(OpInfo[OpIdx].RegClass < 0 &&
+           !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+           && "Pure imm operand expected");
+    unsigned Imm = 0;
+    if (UseRt)
+      Imm = getT1Imm8(insn);
+    else if (Imm3)
+      Imm = getT1Imm3(insn);
+    else {
+      Imm = getT1Imm5(insn);
+      ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11));
+      getImmShiftSE(ShOp, Imm);
+    }
+    MI.addOperand(MCOperand::CreateImm(Imm));
+  }
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.2.2 Data-processing
+//
+// tCMPr, tTST, tCMN: tRd tRn
+// tMVN, tRSB:        tRd CPSR tRn
+// Others:            tRd CPSR tRd(TIED_TO) tRn
+static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass == ARM::CCRRegClassID
+          || OpInfo[1].RegClass == ARM::tGPRRegClassID)
+         && "Invalid arguments");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+  ++OpIdx;
+
+  // Check whether the next operand to be added is a CCR Register.
+  if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+    assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+    ++OpIdx;
+  }
+
+  // We have either { tRd(TIED_TO), tRn } or { tRn } remaining.
+  // Process the TIED_TO operand first.
+
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+         && "Thumb reg operand expected");
+  int Idx;
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+    // The reg operand is tied to the first reg operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Process possible next reg operand.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // Add tRn operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRn(insn))));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.2.3 Special data instructions and branch and exchange
+//
+// tADDhirr: Rd Rd(TIED_TO) Rm
+// tCMPhir:  Rd Rm
+// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn
+// tBX: Rm
+// tBX_RET: 0 operand
+// tBX_RET_vararg: Rm
+// tBLXr_r9: Rm
+// tBRIND: Rm
+static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // tBX_RET has 0 operand.
+  if (NumOps == 0)
+    return true;
+
+  // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm.
+  if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX || Opcode==ARM::tBRIND) {
+    if (Opcode == ARM::tBLXr_r9) {
+      // Handling the two predicate operands before the reg operand.
+      if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+        return false;
+      NumOpsAdded += 2;
+    }
+
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       getT1Rm(insn))));
+    NumOpsAdded += 1;
+
+    if (Opcode == ARM::tBX) {
+      // Handling the two predicate operands after the reg operand.
+      if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+        return false;
+      NumOpsAdded += 2;
+    }
+
+    return true;
+  }
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Add the destination operand.
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass,
+                                  IsGPR(RegClass) ? getT1Rd(insn)
+                                                  : getT1tRd(insn))));
+  ++OpIdx;
+
+  // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining.
+  // Process the TIED_TO operand first.
+
+  assert(OpIdx < NumOps && "More operands expected");
+  int Idx;
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+    // The reg operand is tied to the first reg operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // The next reg operand is either Rm or tRn.
+  assert(OpIdx < NumOps && "More operands expected");
+  RegClass = OpInfo[OpIdx].RegClass;
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass,
+                                  IsGPR(RegClass) ? getT1Rm(insn)
+                                                  : getT1tRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A8.6.59 LDR (literal)
+//
+// tLDRpci: tRt imm8*4
+static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass < 0 &&
+          !OpInfo[1].isPredicate() &&
+          !OpInfo[1].isOptionalDef())
+         && "Invalid arguments");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+
+  // And the (imm8 << 2) operand.
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2));
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+// Thumb specific addressing modes (see ARMInstrThumb.td):
+//
+// t_addrmode_rr := reg + reg
+//
+// t_addrmode_s4 := reg + reg
+//                  reg + imm5 * 4
+//
+// t_addrmode_s2 := reg + reg
+//                  reg + imm5 * 2
+//
+// t_addrmode_s1 := reg + reg
+//                  reg + imm5
+//
+// t_addrmode_sp := sp + imm8 * 4
+//
+
+// A8.6.63 LDRB (literal)
+// A8.6.79 LDRSB (literal)
+// A8.6.75 LDRH (literal)
+// A8.6.83 LDRSH (literal)
+// A8.6.59 LDR (literal)
+//
+// These instrs calculate an address from the PC value and an immediate offset.
+// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
+static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass < 0 &&
+         "Expect >= 2 operands, first as reg, and second as imm operand");
+
+  // Build the register operand, followed by the (+/-)imm12 immediate.
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register (reg|imm):      tRd tRn imm5|tRm
+// Load Register Signed Byte|Halfword: tRd tRn tRm
+static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::tGPRRegClassID
+         && OpInfo[1].RegClass == ARM::tGPRRegClassID
+         && "Expect >= 2 operands and first two as thumb reg operands");
+
+  // Add the destination reg and the base reg.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRn(insn))));
+  OpIdx = 2;
+
+  // We have either { imm5 } or { tRm } remaining.
+  // Note that STR/LDR (register) should skip the imm5 offset operand for
+  // t_addrmode_s[1|2|4].
+
+  assert(OpIdx < NumOps && "More operands expected");
+
+  if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
+      !OpInfo[OpIdx].isOptionalDef()) {
+    // Table A6-5 16-bit Thumb Load/store instructions
+    // opA = 0b0101 for STR/LDR (register) and friends.
+    // Otherwise, we have STR/LDR (immediate) and friends.
+    assert(opA != 5 && "Immediate operand expected for this opcode");
+    MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn)));
+    ++OpIdx;
+  } else {
+    // The next reg operand is tRm, the offset.
+    assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+           && "Thumb reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRm(insn))));
+    ++OpIdx;
+  }
+  return true;
+}
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register SP relative: tRt ARM::SP imm8
+static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
+         && "Unexpected opcode");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         (OpInfo[2].RegClass < 0 &&
+          !OpInfo[2].isPredicate() &&
+          !OpInfo[2].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateReg(ARM::SP));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 3;
+  return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.10 ADR
+//
+// tADDrPCi: tRt imm8
+static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass < 0 &&
+          !OpInfo[1].isPredicate() &&
+          !OpInfo[1].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.8 ADD (SP plus immediate)
+//
+// tADDrSPi: tRt ARM::SP imm8
+static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         (OpInfo[2].RegClass < 0 &&
+          !OpInfo[2].isPredicate() &&
+          !OpInfo[2].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateReg(ARM::SP));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 3;
+  return true;
+}
+
+// tPUSH, tPOP: Pred-Imm Pred-CCR register_list
+//
+// where register_list = low registers + [lr] for PUSH or
+//                       low registers + [pc] for POP
+//
+// "low registers" is specified by Inst{7-0}
+// lr|pc is specified by Inst{8}
+static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Handling the two predicate operands before the reglist.
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
+    | slice(insn, 7, 0);
+
+  // Fill the variadic part of reglist.
+  for (unsigned i = 0; i < 16; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         i)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions
+// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP.
+//
+// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7
+// t2IT:             firstcond=Inst{7-4} mask=Inst{3-0}
+// tCBNZ, tCBZ:      tRd imm6*2
+// tBKPT:            imm8
+// tNOP, tSEV, tYIELD, tWFE, tWFI:
+//   no operand (except predicate pair)
+// tSETENDBE, tSETENDLE, :
+//   no operand
+// Others:           tRd tRn
+static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (NumOps == 0)
+    return true;
+
+  if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
+    return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  // Predicate operands are handled elsewhere.
+  if (NumOps == 2 &&
+      OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
+      OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+    return true;
+  }
+
+  if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) {
+    // Special case handling for tADDspi and tSUBspi.
+    // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate)
+    MI.addOperand(MCOperand::CreateReg(ARM::SP));
+    MI.addOperand(MCOperand::CreateReg(ARM::SP));
+    MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn)));
+    NumOpsAdded = 3;
+    return true;
+  }
+
+  if (Opcode == ARM::t2IT) {
+    // Special case handling for If-Then.
+    // A8.6.50 IT
+    // Tag the (firstcond[0] bit << 4) along with mask.
+
+    // firstcond
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4)));
+
+    // firstcond[0] and mask
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+    NumOpsAdded = 2;
+    return true;
+  }
+
+  if (Opcode == ARM::tBKPT) {
+    MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // CPS has a singleton $opt operand that contains the following information:
+  // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
+  // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
+  // default one. The second get the AIF flags from Inst{2-0}.
+  if (Opcode == ARM::tCPS) {
+    MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
+    NumOpsAdded = 2;
+    return true;
+  }
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+         && "Expect >=2 operands");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+
+  if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
+    // Two register instructions.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRn(insn))));
+  } else {
+    // CBNZ, CBZ
+    assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
+    MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
+  }
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+// A8.6.53  LDM / LDMIA
+// A8.6.189 STM / STMIA
+//
+// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA:                tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
+                                     uint32_t insn, unsigned short NumOps,
+                                     unsigned &NumOpsAdded, BO B) {
+  assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
+          Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
+
+  unsigned tRt = getT1tRt(insn);
+  NumOpsAdded = 0;
+
+  // WB register, if necessary.
+  if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       tRt)));
+    ++NumOpsAdded;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     tRt)));
+  ++NumOpsAdded;
+
+  // Handling the two predicate operands before the reglist.
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+    NumOpsAdded += 2;
+  } else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = slice(insn, 7, 0);
+  if (BitCount(RegListBits) < 1) {
+    DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n");
+    return false;
+  }
+
+  // Fill the variadic part of reglist.
+  for (unsigned i = 0; i < 8; ++i)
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                         i)));
+      ++NumOpsAdded;
+    }
+
+  return true;
+}
+
+static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
+}
+
+static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
+}
+
+// A8.6.16 B Encoding T1
+// cond = Inst{11-8} & imm8 = Inst{7-0}
+// imm32 = SignExtend(imm8:'0', 32)
+//
+// tBcc: offset Pred-Imm Pred-CCR
+// tSVC: imm8 Pred-Imm Pred-CCR
+// tTRAP: 0 operand (early return)
+static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  if (Opcode == ARM::tTRAP)
+    return true;
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
+         OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
+         && "Exactly 3 operands expected");
+
+  unsigned Imm8 = getT1Imm8(insn);
+  MI.addOperand(MCOperand::CreateImm(
+                  Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1)
+                                      : (int)Imm8));
+
+  // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
+  // But note that for tBcc, if cond = '1110' then UNDEFINED.
+  if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) {
+    DEBUG(errs() << "if cond = '1110' then UNDEFINED\n");
+    return false;
+  }
+  NumOpsAdded = 1;
+
+  return true;
+}
+
+// A8.6.16 B Encoding T2
+// imm11 = Inst{10-0}
+// imm32 = SignExtend(imm11:'0', 32)
+//
+// tB: offset
+static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
+
+  unsigned Imm11 = getT1Imm11(insn);
+
+  MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1)));
+
+  NumOpsAdded = 1;
+
+  return true;
+
+}
+
+// See A6.2 16-bit Thumb instruction encoding for instruction classes
+// corresponding to op.
+//
+// Table A6-1 16-bit Thumb instruction encoding (abridged)
+// op    Instruction or instruction class
+// ------  --------------------------------------------------------------------
+// 00xxxx  Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000  Data-processing on page A6-8
+// 010001  Special data instructions and branch and exchange on page A6-9
+// 01001x  Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx  Load/store single data item on page A6-10
+// 011xxx
+// 100xxx
+// 10100x  Generate PC-relative address, see ADR on page A8-32
+// 10101x  Generate SP-relative address, see ADD (SP plus immediate) on
+//         page A8-28
+// 1011xx  Miscellaneous 16-bit instructions on page A6-11
+// 11000x  Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x  Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx  Conditional branch, and Supervisor Call on page A6-13
+// 11100x  Unconditional Branch, see B on page A8-44
+//
+static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  unsigned op1 = slice(op, 5, 4);
+  unsigned op2 = slice(op, 3, 2);
+  unsigned op3 = slice(op, 1, 0);
+  unsigned opA = slice(op, 5, 2);
+  switch (op1) {
+  case 0:
+    // A6.2.1 Shift (immediate), add, subtract, move, and compare
+    return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+  case 1:
+    switch (op2) {
+    case 0:
+      switch (op3) {
+      case 0:
+        // A6.2.2 Data-processing
+        return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      case 1:
+        // A6.2.3 Special data instructions and branch and exchange
+        return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
+      default:
+        // A8.6.59 LDR (literal)
+        return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      }
+      break;
+    default:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
+      break;
+    }
+    break;
+  case 2:
+    switch (op2) {
+    case 0:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
+    case 1:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    case 2:
+      if (op3 <= 1) {
+        // A8.6.10 ADR
+        return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+      } else {
+        // A8.6.8 ADD (SP plus immediate)
+        return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+      }
+    default:
+      // A6.2.5 Miscellaneous 16-bit instructions
+      return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    }
+    break;
+  case 3:
+    switch (op2) {
+    case 0:
+      if (op3 <= 1) {
+        // A8.6.189 STM / STMIA / STMEA
+        return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      } else {
+        // A8.6.53 LDM / LDMIA / LDMFD
+        return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      }
+    case 1:
+      // A6.2.6 Conditional branch, and Supervisor Call
+      return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    case 2:
+      // Unconditional Branch, see B on page A8-44
+      return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    default:
+      assert(0 && "Unreachable code");
+      break;
+    }
+    break;
+  default:
+    assert(0 && "Unreachable code");
+    break;
+  }
+
+  return false;
+}
+
+///////////////////////////////////////////////
+//                                           //
+// Thumb2 instruction disassembly functions. //
+//                                           //
+///////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////
+//                                                       //
+// Note: the register naming follows the ARM convention! //
+//                                                       //
+///////////////////////////////////////////////////////////
+
+static inline bool Thumb2SRSOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2SRSDBW: case ARM::t2SRSDB:
+  case ARM::t2SRSIAW: case ARM::t2SRSIA:
+    return true;
+  }
+}
+
+static inline bool Thumb2RFEOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2RFEDBW: case ARM::t2RFEDB:
+  case ARM::t2RFEIAW: case ARM::t2RFEIA:
+    return true;
+  }
+}
+
+// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0}
+static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+  MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+  NumOpsAdded = 1;
+  return true;
+}
+
+// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
+static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  unsigned Rn = decodeRn(insn);
+  if (Rn == 15) {
+    DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+    return false;
+  }
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn)));
+  NumOpsAdded = 1;
+  return true;
+}
+
+static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (Thumb2SRSOpcode(Opcode))
+    return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  if (Thumb2RFEOpcode(Opcode))
+    return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
+          Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
+          Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
+          Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
+         && "Unexpected opcode");
+  assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4");
+
+  NumOpsAdded = 0;
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base.
+  if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
+      Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++NumOpsAdded;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(Base));
+  ++NumOpsAdded;
+
+  // Handling the two predicate operands before the reglist.
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+    NumOpsAdded += 2;
+  } else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = insn & ((1 << 16) - 1);
+
+  // Fill the variadic part of reglist.
+  for (unsigned i = 0; i < 16; ++i)
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         i)));
+      ++NumOpsAdded;
+    }
+
+  return true;
+}
+
+// t2LDREX: Rd Rn
+// t2LDREXD: Rd Rs Rn
+// t2LDREXB, t2LDREXH: Rd Rn
+// t2STREX: Rs Rd Rn
+// t2STREXD: Rm Rd Rs Rn
+// t2STREXB, t2STREXH: Rm Rd Rn
+static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass > 0
+         && OpInfo[1].RegClass > 0
+         && "Expect >=2 operands and first two as reg operands");
+
+  bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
+  bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
+  bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
+
+  unsigned Rt  = decodeRd(insn);
+  unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX.
+  unsigned Rd  = decodeRm(insn);
+  unsigned Rn  = decodeRn(insn);
+
+  // Some sanity checking first.
+  if (isStore) {
+    // if d == n || d == t then UNPREDICTABLE
+    // if d == n || d == t || d == t2 then UNPREDICTABLE
+    if (isDW) {
+      if (Rd == Rn || Rd == Rt || Rd == Rt2) {
+        DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n");
+        return false;
+      }
+    } else {
+      if (isSW) {
+        if (Rt2 == Rn || Rt2 == Rt) {
+          DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+          return false;
+        }
+      } else {
+        if (Rd == Rn || Rd == Rt) {
+          DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+          return false;
+        }
+      }
+    }
+  } else {
+    // Load
+    // A8.6.71 LDREXD
+    // if t == t2 then UNPREDICTABLE
+    if (isDW && Rt == Rt2) {
+      DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+      return false;
+    }
+  }
+
+  // Add the destination operand for store.
+  if (isStore) {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                    isSW ? Rt2 : Rd)));
+    ++OpIdx;
+  }
+
+  // Source operand for store and destination operand for load.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     Rt)));
+  ++OpIdx;
+
+  // Thumb2 doubleword complication: with an extra source/destination operand.
+  if (isDW) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+                                                       Rt2)));
+    ++OpIdx;
+  }
+
+  // Finally add the pointer operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     Rn)));
+  ++OpIdx;
+
+  return true;
+}
+
+// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
+// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
+// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
+//
+// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
+// disassembly only and do not have a tied_to writeback base register operand.
+static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 4
+         && OpInfo[0].RegClass > 0
+         && OpInfo[0].RegClass == OpInfo[1].RegClass
+         && OpInfo[2].RegClass > 0
+         && OpInfo[3].RegClass < 0
+         && "Expect >= 4 operands and first 3 as reg operands");
+
+  // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1).
+  unsigned Rt  = decodeRd(insn);
+  unsigned Rt2 = decodeRs(insn);
+  unsigned Rn  = decodeRn(insn);
+
+  // Some sanity checking first.
+
+  // A8.6.67 LDRD (literal) has its W bit as (0).
+  if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) {
+    if (Rn == 15 && slice(insn, 21, 21) != 0)
+      return false;
+  } else {
+    // For Dual Store, PC cannot be used as the base register.
+    if (Rn == 15) {
+      DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+      return false;
+    }
+  }
+  if (Rt == Rt2) {
+    DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+    return false;
+  }
+  if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) {
+    if (Rn == Rt || Rn == Rt2) {
+      DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n");
+      return false;
+    }
+  }
+
+  // Add the <Rt> <Rt2> operands.
+  unsigned RegClassPair = OpInfo[0].RegClass;
+  unsigned RegClassBase = OpInfo[2].RegClass;
+  
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
+                                                     decodeRs(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase,
+                                                     decodeRn(insn))));
+
+  // Finally add (+/-)imm8*4, depending on the U bit.
+  int Offset = getImm8(insn) * 4;
+  if (getUBit(insn) == 0)
+    Offset = -Offset;
+  MI.addOperand(MCOperand::CreateImm(Offset));
+  NumOpsAdded = 4;
+
+  return true;
+}
+
+// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
+static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "Expect >= 2 operands");
+
+  // The generic version of TBB/TBH needs a base register.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  // Add the index register.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2MOVCClsl: case ARM::t2MOVCClsr:
+  case ARM::t2MOVCCasr: case ARM::t2MOVCCror:
+  case ARM::t2LSLri:    case ARM::t2LSRri:
+  case ARM::t2ASRri:    case ARM::t2RORri:
+    return true;
+  }
+}
+
+// A6.3.11 Data-processing (shifted register)
+//
+// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm
+// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm
+// Three register operands: Rs Rn Rm
+// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm
+//
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms: (Rm, ConstantShiftSpecifier).
+// Constant shift specifier: Imm = (ShOp | ShAmt<<3).
+//
+// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which
+// only require two register operands: Rd, Rm in ARM Reference Manual terms, and
+// nothing else, because the shift amount is already specified.
+// Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
+static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Special case handling.
+  if (Opcode == ARM::t2BR_JT) {
+    assert(NumOps == 4
+           && OpInfo[0].RegClass == ARM::GPRRegClassID
+           && OpInfo[1].RegClass == ARM::GPRRegClassID
+           && OpInfo[2].RegClass < 0
+           && OpInfo[3].RegClass < 0
+           && "Exactly 4 operands expect and first two as reg operands");
+    // Only need to populate the src reg operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateReg(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+    NumOpsAdded = 4;
+    return true;
+  }
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+             OpInfo[0].RegClass == ARM::rGPRRegClassID)
+         && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+             OpInfo[1].RegClass == ARM::rGPRRegClassID)
+         && "Expect >= 2 operands and first two as reg operands");
+
+  bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+                                  OpInfo[2].RegClass == ARM::rGPRRegClassID));
+  bool NoDstReg = (decodeRs(insn) == 0xF);
+
+  // Build the register operands, followed by the constant shift specifier.
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, OpInfo[0].RegClass,
+                                  NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    int Idx;
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+      // Process tied_to operand constraint.
+      MI.addOperand(MI.getOperand(Idx));
+      ++OpIdx;
+    } else if (!NoDstReg) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
+                                                         decodeRn(insn))));
+      ++OpIdx;
+    } else {
+      DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
+      return false;
+    }
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  if (NumOps == OpIdx)
+    return true;
+
+  if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+      && !OpInfo[OpIdx].isOptionalDef()) {
+
+    if (Thumb2ShiftOpcode(Opcode)) {
+      unsigned Imm = getShiftAmtBits(insn);
+      ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4));
+      getImmShiftSE(ShOp, Imm);
+      MI.addOperand(MCOperand::CreateImm(Imm));
+    } else {
+      // Build the constant shift specifier operand.
+      unsigned bits2 = getShiftTypeBits(insn);
+      unsigned imm5 = getShiftAmtBits(insn);
+      ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
+      unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
+      MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+    }
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.1 Data-processing (modified immediate)
+//
+// Two register operands: Rs Rn ModImm
+// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
+// {t2MOVi, t2MVNi}
+//
+// ModImm = ThumbExpandImm(i:imm3:imm8)
+static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RdRegClassID = OpInfo[0].RegClass;
+  assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+                         RdRegClassID == ARM::rGPRRegClassID)
+         && "Expect >= 2 operands and first one as reg operand");
+
+  unsigned RnRegClassID = OpInfo[1].RegClass;
+  bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+                 || RnRegClassID == ARM::rGPRRegClassID);
+  bool NoDstReg = (decodeRs(insn) == 0xF);
+
+  // Build the register operands, followed by the modified immediate.
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RdRegClassID,
+                                  NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+  ++OpIdx;
+
+  if (TwoReg) {
+    if (NoDstReg) {
+      DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
+      return false;
+    }
+    int Idx;
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+      // The reg operand is tied to the first reg operand.
+      MI.addOperand(MI.getOperand(Idx));
+    } else {
+      // Add second reg operand.
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+                                                         decodeRn(insn))));
+    }
+    ++OpIdx;
+  }
+
+  // The modified immediate operand should come next.
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+         !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  // i:imm3:imm8
+  // A6.3.2 Modified immediate constants in Thumb instructions
+  unsigned imm12 = getIImm3Imm8(insn);
+  MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12)));
+  ++OpIdx;
+
+  return true;
+}
+
+static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::t2SSAT: case ARM::t2SSAT16:
+  case ARM::t2USAT: case ARM::t2USAT16:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+                                 unsigned &NumOpsAdded, BO B) {
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
+
+  // Disassemble the register def.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+
+  unsigned Pos = slice(insn, 4, 0);
+  if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+    Pos += 1;
+  MI.addOperand(MCOperand::CreateImm(Pos));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRn(insn))));
+
+  if (NumOpsAdded == 4) {
+    ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+                            ARM_AM::asr : ARM_AM::lsl);
+    // Inst{14-12:7-6} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+    if (ShAmt == 0) {
+      if (Opc == ARM_AM::asr)
+        ShAmt = 32;
+      else
+        Opc = ARM_AM::no_shift;
+    }
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+  }
+  return true;
+}
+
+// A6.3.3 Data-processing (plain binary immediate)
+//
+// o t2ADDri12, t2SUBri12: Rs Rn imm12
+// o t2LEApcrel (ADR): Rs imm12
+// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
+// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm
+// o t2MOVi16: Rs imm16
+// o t2MOVTi16: Rs imm16
+// o t2SBFX (SBFX): Rs Rn lsb width
+// o t2UBFX (UBFX): Rs Rn lsb width
+// o t2BFI (BFI): Rs Rn lsb width
+static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RdRegClassID = OpInfo[0].RegClass;
+  assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+                         RdRegClassID == ARM::rGPRRegClassID)
+         && "Expect >= 2 operands and first one as reg operand");
+
+  unsigned RnRegClassID = OpInfo[1].RegClass;
+  bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+                 || RnRegClassID == ARM::rGPRRegClassID);
+
+  // Build the register operand(s), followed by the immediate(s).
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
+                                                     decodeRs(insn))));
+  ++OpIdx;
+
+  if (TwoReg) {
+    assert(NumOps >= 3 && "Expect >= 3 operands");
+    int Idx;
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
+      // Process tied_to operand constraint.
+      MI.addOperand(MI.getOperand(Idx));
+    } else {
+      // Add src reg operand.
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+                                                         decodeRn(insn))));
+    }
+    ++OpIdx;
+  }
+
+  if (Opcode == ARM::t2BFI) {
+    // Add val reg operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+         && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  // Pre-increment OpIdx.
+  ++OpIdx;
+
+  if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
+      || Opcode == ARM::t2LEApcrel)
+    MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
+  else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) {
+    if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI))
+      MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
+  } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
+    uint32_t mask = 0;
+    if (getBitfieldInvMask(insn, mask))
+      MI.addOperand(MCOperand::CreateImm(mask));
+    else
+      return false;
+  } else {
+    // Handle the case of: lsb width
+    assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+            && "Unexpected opcode");
+    MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
+    MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.4 Table A6-15 Miscellaneous control instructions
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool t2MiscCtrlInstr(uint32_t insn) {
+  if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 &&
+      slice(insn, 12, 12) == 0)
+    return true;
+
+  return false;
+}
+
+// A6.3.4 Branches and miscellaneous control
+//
+// A8.6.16 B
+// Branches: t2B, t2Bcc -> imm operand
+//
+// Branches: t2TPsoft -> no operand
+//
+// A8.6.23 BL, BLX (immediate)
+// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand
+//
+// A8.6.26
+// t2BXJ -> Rn
+//
+// Miscellaneous control:
+//   -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
+//
+// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
+//   -> no operand (except pred-imm pred-ccr)
+//
+// t2DBG -> imm4 = Inst{3-0}
+//
+// t2MRS/t2MRSsys -> Rs
+// t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
+// t2SMC -> imm4 = Inst{19-16}
+static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (NumOps == 0)
+    return true;
+
+  if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) {
+    // Inst{3-0} encodes the memory barrier option for the variants.
+    unsigned opt = slice(insn, 3, 0);
+    switch (opt) {
+    case ARM_MB::SY:  case ARM_MB::ST:
+    case ARM_MB::ISH: case ARM_MB::ISHST:
+    case ARM_MB::NSH: case ARM_MB::NSHST:
+    case ARM_MB::OSH: case ARM_MB::OSHST:
+      MI.addOperand(MCOperand::CreateImm(opt));
+      NumOpsAdded = 1;
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  if (t2MiscCtrlInstr(insn))
+    return true;
+
+  switch (Opcode) {
+  case ARM::t2CLREX:
+  case ARM::t2NOP:
+  case ARM::t2YIELD:
+  case ARM::t2WFE:
+  case ARM::t2WFI:
+  case ARM::t2SEV:
+    return true;
+  default:
+    break;
+  }
+
+  // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+  // opcodes which match the same real instruction. This is needed since there's
+  // no current handling of optional arguments. Fix here when a better handling
+  // of optional arguments is implemented.
+  if (Opcode == ARM::t2CPS3p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5)));  // iflags
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));  // mode
+    NumOpsAdded = 3;
+    return true;
+  }
+  if (Opcode == ARM::t2CPS2p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5)));  // iflags
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::t2CPS1p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // DBG has its option specified in Inst{3-0}.
+  if (Opcode == ARM::t2DBG) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // MRS and MRSsys take one GPR reg Rs.
+  if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // BXJ takes one GPR reg Rn.
+  if (Opcode == ARM::t2BXJ) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
+  // bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::t2MSR) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
+                                       slice(insn, 11, 8) /* Special Reg */));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    NumOpsAdded = 2;
+    return true;
+  }
+  // SMC take imm4.
+  if (Opcode == ARM::t2SMC) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // Some instructions have predicate operands first before the immediate.
+  if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) {
+    // Handling the two predicate operands before the imm operand.
+    if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+      NumOpsAdded += 2;
+    else {
+      DEBUG(errs() << "Expected predicate operands not found.\n");
+      return false;
+    }
+  }
+
+  // Add the imm operand.
+  int Offset = 0;
+
+  switch (Opcode) {
+  default:
+    assert(0 && "Unexpected opcode");
+    return false;
+  case ARM::t2B:
+    Offset = decodeImm32_B_EncodingT4(insn);
+    break;
+  case ARM::t2Bcc:
+    Offset = decodeImm32_B_EncodingT3(insn);
+    break;
+  case ARM::tBLr9:
+    Offset = decodeImm32_BL(insn);
+    break;
+  case ARM::tBLXi_r9:
+    Offset = decodeImm32_BLX(insn);
+    break;
+  }
+
+  if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI))
+    MI.addOperand(MCOperand::CreateImm(Offset));
+
+  // This is an increment as some predicate operands may have been added first.
+  NumOpsAdded += 1;
+
+  return true;
+}
+
+static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2PLDi12:   case ARM::t2PLDi8:
+  case ARM::t2PLDs:
+  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
+  case ARM::t2PLDWs:
+  case ARM::t2PLIi12:   case ARM::t2PLIi8:
+  case ARM::t2PLIs:
+    return true;
+  }
+}
+
+static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // Preload Data/Instruction requires either 2 or 3 operands.
+  // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
+  // t2PLDr:                      Rn Rm
+  // t2PLDs:                      Rn Rm imm2=Inst{5-4}
+  // Same pattern applies for t2PLDW* and t2PLI*.
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 2 operands and first one as reg operand");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+  } else {
+    assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+           && !OpInfo[OpIdx].isOptionalDef()
+           && "Pure imm operand expected");
+    int Offset = 0;
+    if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
+        Opcode == ARM::t2PLIi8) {
+      // A8.6.117 Encoding T2: add = FALSE
+      unsigned Imm8 = getImm8(insn);
+      Offset = -1 * Imm8;
+    } else {
+      // The i12 forms.  See, for example, A8.6.117 Encoding T1.
+      // Note that currently t2PLDi12 also handles the previously named t2PLDpci
+      // opcode, that's why we use decodeImm12(insn) which returns +/- imm12.
+      Offset = decodeImm12(insn);
+    }
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  }
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+      !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load,
+      unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) {
+
+  // Inst{22-21} encodes the data item transferred for load/store.
+  // For single word, it is encoded as ob10.
+  bool Word = (slice(insn, 22, 21) == 2);
+  bool Half = (slice(insn, 22, 21) == 1);
+  bool Byte = (slice(insn, 22, 21) == 0);
+
+  if (UseRm && BadReg(R2)) {
+    DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n");
+    return true;
+  }
+
+  if (Load) {
+    if (!Word && R0 == 13) {
+      DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n");
+      return true;
+    }
+    if (Byte) {
+      if (WB && R0 == 15 && slice(insn, 10, 8) == 3)  {
+        // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0)
+        DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+        return true;
+      }
+    }
+    // A6.3.8 Load halfword, memory hints
+    if (Half) {
+      if (WB) {
+        if (R0 == R1)  {
+          // A8.6.82 LDRSH (immediate) Encoding T2
+          DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n");
+          return true;
+        }
+        if (R0 == 15 && slice(insn, 10, 8) == 3)  {
+          // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0)
+          DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+          return true;
+        }
+      } else {
+        if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) {
+          if (R0 == 15 && slice(insn, 10, 8) == 4) {
+            // A8.6.82 LDRSH (immediate) Encoding T2
+            DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE"
+                         << " \"Unallocated memory hints\"\n");
+            return true;
+          }
+        } else {
+          if (R0 == 15) {
+            // A8.6.82 LDRSH (immediate) Encoding T1
+            DEBUG(errs() << "if Rt == '1111' then SEE"
+                         << " \"Unallocated memory hints\"\n");
+            return true;
+          }
+        }
+      }
+    }
+  } else {
+    if (WB && R0 == R1) {
+      DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+      return true;
+    }
+    if ((WB && R0 == 15) || (!WB && R1 == 15)) {
+      DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n");
+      return true;
+    }
+    if (Word) {
+      if ((WB && R1 == 15) || (!WB && R0 == 15)) {
+        DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+        return true;
+      }
+    } else {
+      if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) {
+        DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n");
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// A6.3.10 Store single data item
+// A6.3.9 Load byte, memory hints
+// A6.3.8 Load halfword, memory hints
+// A6.3.7 Load word
+//
+// For example,
+//
+// t2LDRi12:   Rd Rn (+)imm12
+// t2LDRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
+// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDR_PRE:  Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// t2STRi12:   Rd Rn (+)imm12
+// t2STRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
+// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STR_PRE:  Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated
+// correctly, as LLVM AsmPrinter depends on it.  For indexed stores, the first
+// operand is Rn; for all the other instructions, Rd is the first operand.
+//
+// Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
+// Delegates to DisassembleThumb2Ldpci() for load * literal operations.
+static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  unsigned Rn = decodeRn(insn);
+
+  if (Thumb2PreloadOpcode(Opcode))
+    return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+  if (Load && Rn == 15)
+    return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass > 0 &&
+         OpInfo[1].RegClass > 0 &&
+         "Expect >= 3 operands and first two as reg operands");
+
+  bool ThreeReg = (OpInfo[2].RegClass > 0);
+  bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1;
+  bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
+
+  // Build the register operands, followed by the immediate.
+  unsigned R0 = 0, R1 = 0, R2 = 0;
+  unsigned Rd = decodeRd(insn);
+  int Imm = 0;
+
+  if (!Load && TIED_TO) {
+    R0 = Rn;
+    R1 = Rd;
+  } else {
+    R0 = Rd;
+    R1 = Rn;
+  }
+  if (ThreeReg) {
+    if (TIED_TO) {
+      R2 = Rn;
+      Imm = decodeImm8(insn);
+    } else {
+      R2 = decodeRm(insn);
+      // See, for example, A8.6.64 LDRB (register).
+      // And ARMAsmPrinter::printT2AddrModeSoRegOperand().
+      // LSL is the default shift opc, and LLVM does not expect it to be encoded
+      // as part of the immediate operand.
+      // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4));
+      Imm = slice(insn, 5, 4);
+    }
+  } else {
+    if (Imm12)
+      Imm = getImm12(insn);
+    else
+      Imm = decodeImm8(insn);
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     R0)));
+  ++OpIdx;
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     R1)));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    // This could be an offset register or a TIED_TO register.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+                                                       R2)));
+    ++OpIdx;
+  }
+
+  if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO,
+                        TIED_TO))
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+         && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  MI.addOperand(MCOperand::CreateImm(Imm));
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.3.12 Data-processing (register)
+//
+// Two register operands [rotate]:   Rs Rm [rotation(= (rotate:'000'))]
+// Three register operands only:     Rs Rn Rm
+// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))]
+//
+// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm
+//
+// Miscellaneous operations: Rs [Rn] Rm
+static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass > 0 &&
+         OpInfo[1].RegClass > 0 &&
+         "Expect >= 2 operands and first two as reg operands");
+
+  // Build the register operands, followed by the optional rotation amount.
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeRs(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Add the rotation amount immediate.
+    MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.16 Multiply, multiply accumulate, and absolute difference
+//
+// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12}
+// t2MUL, t2SMMUL:                 Rs Rn Rm
+// t2SMLA[BB|BT|TB|TT|WB|WT]:      Rs Rn Rm Ra=Inst{15-12}
+// t2SMUL[BB|BT|TB|TT|WB|WT]:      Rs Rn Rm
+//
+// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]:
+//   Rs Rn Rm Ra=Inst{15-12}
+//
+// Unsigned Sum of Absolute Differences [and Accumulate]
+//    Rs Rn Rm [Ra=Inst{15-12}]
+static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+         "Expect >= 3 operands and first three as reg operands");
+
+  // Build the register operands.
+
+  bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRn(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (FourReg)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                       decodeRd(insn))));
+
+  NumOpsAdded = FourReg ? 4 : 3;
+
+  return true;
+}
+
+// A6.3.17 Long multiply, long multiply accumulate, and divide
+//
+// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
+static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+         "Expect >= 3 operands and first three as reg operands");
+
+  bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+  // Build the register operands.
+
+  if (FourReg)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                       decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRn(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (FourReg)
+    NumOpsAdded = 4;
+  else
+    NumOpsAdded = 3;
+
+  return true;
+}
+
+// See A6.3 32-bit Thumb instruction encoding for instruction classes
+// corresponding to (op1, op2, op).
+//
+// Table A6-9 32-bit Thumb instruction encoding
+// op1  op2    op  Instruction class, see
+// ---  -------  --  -----------------------------------------------------------
+// 01  00xx0xx  -  Load/store multiple on page A6-23
+//     00xx1xx  -  Load/store dual, load/store exclusive, table branch on
+//                 page A6-24
+//     01xxxxx  -  Data-processing (shifted register) on page A6-31
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
+// 10  x0xxxxx  0  Data-processing (modified immediate) on page A6-15
+//     x1xxxxx  0  Data-processing (plain binary immediate) on page A6-19
+//         -    1  Branches and miscellaneous control on page A6-20
+// 11  000xxx0  -  Store single data item on page A6-30
+//     001xxx0  -  Advanced SIMD element or structure load/store instructions
+//                 on page A7-27
+//     00xx001  - Load byte, memory hints on page A6-28
+//     00xx011  -  Load halfword, memory hints on page A6-26
+//     00xx101  -  Load word on page A6-25
+//     00xx111  -  UNDEFINED
+//     010xxxx  -  Data-processing (register) on page A6-33
+//     0110xxx  -  Multiply, multiply accumulate, and absolute difference on
+//                 page A6-38
+//     0111xxx  -  Long multiply, long multiply accumulate, and divide on
+//                 page A6-39
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
+//
+static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
+    MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
+    unsigned &NumOpsAdded, BO B) {
+
+  switch (op1) {
+  case 1:
+    if (slice(op2, 6, 5) == 0) {
+      if (slice(op2, 2, 2) == 0) {
+        // Load/store multiple.
+        return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
+      }
+
+      // Load/store dual, load/store exclusive, table branch, otherwise.
+      assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
+      if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
+          (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
+        // Load/store exclusive.
+        return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+      }
+      if (Opcode == ARM::t2LDRDi8 ||
+          Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
+          Opcode == ARM::t2STRDi8 ||
+          Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
+        // Load/store dual.
+        return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
+      }
+      if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
+        // Table branch.
+        return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      }
+    } else if (slice(op2, 6, 5) == 1) {
+      // Data-processing (shifted register).
+      return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    }
+
+    // FIXME: A6.3.18 Coprocessor instructions
+    // But see ThumbDisassembler::getInstruction().
+
+    break;
+  case 2:
+    if (op == 0) {
+      if (slice(op2, 5, 5) == 0)
+        // Data-processing (modified immediate)
+        return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
+      if (Thumb2SaturateOpcode(Opcode))
+        return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
+
+      // Data-processing (plain binary immediate)
+      return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+    }
+    // Branches and miscellaneous control on page A6-20.
+    return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+  case 3:
+    switch (slice(op2, 6, 5)) {
+    case 0:
+      // Load/store instructions...
+      if (slice(op2, 0, 0) == 0) {
+        if (slice(op2, 4, 4) == 0) {
+          // Store single data item on page A6-30
+          return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
+                                       B);
+        } else {
+          // FIXME: Advanced SIMD element or structure load/store instructions.
+          // But see ThumbDisassembler::getInstruction().
+          ;
+        }
+      } else {
+        // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
+        return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
+                                     NumOpsAdded, B);
+      }
+      break;
+    case 1:
+      if (slice(op2, 4, 4) == 0) {
+        // A6.3.12 Data-processing (register)
+        return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      } else if (slice(op2, 3, 3) == 0) {
+        // A6.3.16 Multiply, multiply accumulate, and absolute difference
+        return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      } else {
+        // A6.3.17 Long multiply, long multiply accumulate, and divide
+        return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
+      }
+      break;
+    default:
+      // FIXME: A6.3.18 Coprocessor instructions
+      // But see ThumbDisassembler::getInstruction().
+      ;
+      break;
+    }
+
+    break;
+  default:
+    assert(0 && "Thumb2 encoding error!");
+    break;
+  }
+
+  return false;
+}
+
+static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+  uint16_t HalfWord = slice(insn, 31, 16);
+
+  if (HalfWord == 0) {
+    // A6.2 16-bit Thumb instruction encoding
+    // op = bits[15:10]
+    uint16_t op = slice(insn, 15, 10);
+    return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded,
+                             Builder);
+  }
+
+  unsigned bits15_11 = slice(HalfWord, 15, 11);
+
+  // A6.1 Thumb instruction set encoding
+  if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
+    assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
+    return false;
+  }
+
+  // A6.3 32-bit Thumb instruction encoding
+
+  uint16_t op1 = slice(HalfWord, 12, 11);
+  uint16_t op2 = slice(HalfWord, 10, 4);
+  uint16_t op = slice(insn, 15, 15);
+
+  return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
+                           Builder);
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 000000000000..78d3e477975c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,759 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMBaseInfo.h"
+#include "ARMInstPrinter.h"
+#include "ARMAddressingModes.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "ARMGenAsmWriter.inc"
+
+StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  unsigned Opcode = MI->getOpcode();
+
+  // Check for MOVs and print canonical forms, instead.
+  if (Opcode == ARM::MOVs) {
+    // FIXME: Thumb variants?
+    const MCOperand &Dst = MI->getOperand(0);
+    const MCOperand &MO1 = MI->getOperand(1);
+    const MCOperand &MO2 = MI->getOperand(2);
+    const MCOperand &MO3 = MI->getOperand(3);
+
+    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+    printSBitModifierOperand(MI, 6, O);
+    printPredicateOperand(MI, 4, O);
+
+    O << '\t' << getRegisterName(Dst.getReg())
+      << ", " << getRegisterName(MO1.getReg());
+
+    if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+      return;
+
+    O << ", ";
+
+    if (MO2.getReg()) {
+      O << getRegisterName(MO2.getReg());
+      assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+    } else {
+      O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+    }
+    return;
+  }
+
+  // A8.6.123 PUSH
+  if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "push";
+    printPredicateOperand(MI, 2, O);
+    if (Opcode == ARM::t2STMDB_UPD)
+      O << ".w";
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.122 POP
+  if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "pop";
+    printPredicateOperand(MI, 2, O);
+    if (Opcode == ARM::t2LDMIA_UPD)
+      O << ".w";
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.355 VPUSH
+  if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "vpush";
+    printPredicateOperand(MI, 2, O);
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.354 VPOP
+  if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "vpop";
+    printPredicateOperand(MI, 2, O);
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  printInstruction(MI, O);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
+//    REG 0   0           - e.g. R5
+//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
+//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << getRegisterName(MO1.getReg());
+
+  // Print the shift opc.
+  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (ShOpc == ARM_AM::rrx)
+    return;
+  if (MO2.getReg()) {
+    O << ' ' << getRegisterName(MO2.getReg());
+    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+  } else if (ShOpc != ARM_AM::rrx) {
+    O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
+  }
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  if (!MO2.getReg()) {
+    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+      O << ", #"
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+        << ARM_AM::getAM2Offset(MO3.getImm());
+    O << "]";
+    return;
+  }
+
+  O << ", "
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+    << getRegisterName(MO2.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+    << " #" << ShImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
+                                         raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+  if (!MO2.getReg()) {
+    unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm());
+    O << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+      << ImmOffs;
+    return;
+  }
+
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+    << getRegisterName(MO2.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+    << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+  unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+
+  if (IdxMode == ARMII::IndexModePost) {
+    printAM2PostIndexOp(MI, Op, O);
+    return;
+  }
+  printAM2PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.getReg()) {
+    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+    O << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+      << ImmOffs;
+    return;
+  }
+
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+    << getRegisterName(MO1.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+    << " #" << ShImm;
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
+                                         raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+  if (MO2.getReg()) {
+    O << (char)ARM_AM::getAM3Op(MO3.getImm())
+    << getRegisterName(MO2.getReg());
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+  O << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+    << ImmOffs;
+}
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  O << '[' << getRegisterName(MO1.getReg());
+
+  if (MO2.getReg()) {
+    O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << getRegisterName(MO2.getReg()) << ']';
+    return;
+  }
+
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+    O << ", #"
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+      << ImmOffs;
+  O << ']';
+}
+
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+                                           raw_ostream &O) {
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+  unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
+
+  if (IdxMode == ARMII::IndexModePost) {
+    printAM3PostIndexOp(MI, Op, O);
+    return;
+  }
+  printAM3PreOrOffsetIndexOp(MI, Op, O);
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (MO1.getReg()) {
+    O << (char)ARM_AM::getAM3Op(MO2.getImm())
+    << getRegisterName(MO1.getReg());
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+  O << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+    << ImmOffs;
+}
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+                                                 .getImm());
+  O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+    O << ", #"
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+      << ImmOffs * 4;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO2.getImm()) {
+    // FIXME: Both darwin as and GNU as violate ARM docs here.
+    O << ", :" << (MO2.getImm() << 3);
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  O << "[" << getRegisterName(MO1.getReg()) << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getReg() == 0)
+    O << "!";
+  else
+    O << ", " << getRegisterName(MO.getReg());
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  uint32_t v = ~MO.getImm();
+  int32_t lsb = CountTrailingZeros_32(v);
+  int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+  O << '#' << lsb << ", #" << width;
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+                                     raw_ostream &O) {
+  unsigned val = MI->getOperand(OpNum).getImm();
+  O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+  ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+  switch (Opc) {
+  case ARM_AM::no_shift:
+    return;
+  case ARM_AM::lsl:
+    O << ", lsl #";
+    break;
+  case ARM_AM::asr:
+    O << ", asr #";
+    break;
+  default:
+    assert(0 && "unexpected shift opcode for shift immediate operand");
+  }
+  O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  O << "{";
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+    if (i != OpNum) O << ", ";
+    O << getRegisterName(MI->getOperand(i).getReg());
+  }
+  O << "}";
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  if (Op.getImm())
+    O << "be";
+  else
+    O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned IFlags = Op.getImm();
+  for (int i=2; i >= 0; --i)
+    if (IFlags & (1 << i))
+      O << ARM_PROC::IFlagsToString(1 << i);
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned SpecRegRBit = Op.getImm() >> 4;
+  unsigned Mask = Op.getImm() & 0xf;
+
+  if (SpecRegRBit)
+    O << "spsr";
+  else
+    O << "cpsr";
+
+  if (Mask) {
+    O << '_';
+    if (Mask & 8) O << 'f';
+    if (Mask & 4) O << 's';
+    if (Mask & 2) O << 'x';
+    if (Mask & 1) O << 'c';
+  }
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  if (CC != ARMCC::AL)
+    O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  if (MI->getOperand(OpNum).getReg()) {
+    assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+           "Expect ARM CPSR register!");
+    O << 's';
+  }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  O << "#" <<  MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  // (3 - the number of trailing zeros) is the number of then / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned CondBit0 = Mask >> 4 & 1;
+  unsigned NumTZ = CountTrailingZeros_32(Mask);
+  assert(NumTZ <= 3 && "Invalid IT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = ((Mask >> Pos) & 1) == CondBit0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned RegNum = MO2.getReg())
+    O << ", " << getRegisterName(RegNum);
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+                                                    unsigned Op,
+                                                    raw_ostream &O,
+                                                    unsigned Scale) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs * Scale;
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+                                                 raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  O << getRegisterName(Reg);
+
+  // Print the shift opc.
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (ShOpc != ARM_AM::rrx)
+    O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  bool isSub = OffImm < 0;
+  // Special value for #-0. All others are normal.
+  if (OffImm == INT32_MIN)
+    OffImm = 0;
+  if (isSub)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+                                                  unsigned OpNum,
+                                                  raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << ", #" << OffImm * 4;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+                                                      unsigned OpNum,
+                                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else if (OffImm > 0)
+    O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+                                                        unsigned OpNum,
+                                                        raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  assert(MO2.getReg() && "Invalid so_reg load / store address!");
+  O << ", " << getRegisterName(MO2.getReg());
+
+  unsigned ShAmt = MO3.getImm();
+  if (ShAmt) {
+    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+    O << ", lsl #" << ShAmt;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << '#';
+  if (MO.isFPImm()) {
+    O << (float)MO.getFPImm();
+  } else {
+    union {
+      uint32_t I;
+      float F;
+    } FPUnion;
+
+    FPUnion.I = MO.getImm();
+    O << FPUnion.F;
+  }
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << '#';
+  if (MO.isFPImm()) {
+    O << MO.getFPImm();
+  } else {
+    // We expect the binary encoding of a floating point number here.
+    union {
+      uint64_t I;
+      double D;
+    } FPUnion;
+
+    FPUnion.I = MO.getImm();
+    O << FPUnion.D;
+  }
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+  unsigned EltBits;
+  uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+  O << "#0x" << utohexstr(Val);
+}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 000000000000..d5f238bb8a61
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,120 @@
+//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTPRINTER_H
+#define ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+  ARMInstPrinter(const MCAsmInfo &MAI)
+    : MCInstPrinter(MAI) {}
+
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+  static const char *getInstructionName(unsigned Opcode);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O);
+  void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O, unsigned Scale);
+  void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O);
+  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O);
+  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O);
+  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O);
+  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O);
+  void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                raw_ostream &O);
+  void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..18645c0864a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmPrinter
+  ARMInstPrinter.cpp
+  )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
new file mode 100644
index 000000000000..65d372e44b88
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
new file mode 100644
index 000000000000..53b4c95d3801
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -0,0 +1,78 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+cl::opt<bool>
+EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
+  cl::desc("Generate ARM EHABI tables"),
+  cl::init(false));
+
+
+static const char *const arm_asm_table[] = {
+  "{r0}", "r0",
+  "{r1}", "r1",
+  "{r2}", "r2",
+  "{r3}", "r3",
+  "{r4}", "r4",
+  "{r5}", "r5",
+  "{r6}", "r6",
+  "{r7}", "r7",
+  "{r8}", "r8",
+  "{r9}", "r9",
+  "{r10}", "r10",
+  "{r11}", "r11",
+  "{r12}", "r12",
+  "{r13}", "r13",
+  "{r14}", "r14",
+  "{lr}", "lr",
+  "{sp}", "sp",
+  "{ip}", "ip",
+  "{fp}", "fp",
+  "{sl}", "sl",
+  "{memory}", "memory",
+  "{cc}", "cc",
+  0,0
+};
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
+  AsmTransCBE = arm_asm_table;
+  Data64bitsDirective = 0;
+  CommentString = "@";
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::SjLj;
+}
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
+  Data64bitsDirective = 0;
+  CommentString = "@";
+
+  HasLEB128 = true;
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  HasLCOMMDirective = true;
+
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  if (EnableARMEHABI)
+    ExceptionsType = ExceptionHandling::ARM;
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
new file mode 100644
index 000000000000..90f7822ea580
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARMTARGETASMINFO_H
+#define LLVM_ARMTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+  struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit ARMMCAsmInfoDarwin();
+  };
+
+  struct ARMELFMCAsmInfo : public MCAsmInfo {
+    explicit ARMELFMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
new file mode 100644
index 000000000000..f8fcf2b8aff1
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -0,0 +1,144 @@
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "ARMMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "ARMGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+std::string ARM_MC::ParseARMTriple(StringRef TT) {
+  // Set the boolean corresponding to the current target triple, or the default
+  // if one cannot be determined, to true.
+  unsigned Len = TT.size();
+  unsigned Idx = 0;
+
+  // FIXME: Enahnce Triple helper class to extract ARM version.
+  bool isThumb = false;
+  if (Len >= 5 && TT.substr(0, 4) == "armv")
+    Idx = 4;
+  else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+    isThumb = true;
+    if (Len >= 7 && TT[5] == 'v')
+      Idx = 6;
+  }
+
+  std::string ARMArchFeature;
+  if (Idx) {
+    unsigned SubVer = TT[Idx];
+    if (SubVer >= '7' && SubVer <= '9') {
+      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+        // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv
+        ARMArchFeature = "+v7,+noarm,+db,+hwdiv";
+      } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
+        // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+        //       FeatureT2XtPk
+        ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk";
+      } else
+        // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2
+        ARMArchFeature = "+v7,+neon,+db,+t2dsp";
+    } else if (SubVer == '6') {
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+        ARMArchFeature = "+v6t2";
+      else
+        ARMArchFeature = "+v6";
+    } else if (SubVer == '5') {
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+        ARMArchFeature = "+v5te";
+      else
+        ARMArchFeature = "+v5t";
+    } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
+      ARMArchFeature = "+v4t";
+  }
+
+  if (isThumb) {
+    if (ARMArchFeature.empty())
+      ARMArchFeature = "+thumb-mode";
+    else
+      ARMArchFeature += ",+thumb-mode";
+  }
+
+  return ARMArchFeature;
+}
+
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS.str();
+    else
+      ArchFS = FS;
+  }
+
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+                                          ARM_MC::createARMMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+                                          ARM_MC::createARMMCSubtargetInfo);
+}
+
+static MCInstrInfo *createARMMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitARMMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeARMMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+}
+
+static MCRegisterInfo *createARMMCRegisterInfo() {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitARMMCRegisterInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeARMMCRegInfo() {
+  TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+}
+
+static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
+    return new ARMMCAsmInfoDarwin();
+
+  return new ARMELFMCAsmInfo();
+}
+
+extern "C" void LLVMInitializeARMMCAsmInfo() {
+  // Register the target asm info.
+  RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
+  RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
new file mode 100644
index 000000000000..74701e3516dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -0,0 +1,52 @@
+//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCTARGETDESC_H
+#define ARMMCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheARMTarget, TheThumbTarget;
+
+namespace ARM_MC {
+  std::string ParseARMTriple(StringRef TT);
+
+  /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
+  /// This is exposed so Asm parser, etc. do not need to go through
+  /// TargetRegistry.
+  MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                            StringRef FS);
+}
+
+} // End llvm namespace
+
+// Defines symbolic names for ARM registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "ARMGenRegisterInfo.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "ARMGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..68daf42c9191
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMARMDesc
+  ARMMCTargetDesc.cpp
+  ARMMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..448ed9df2bff
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 000000000000..2df00538b39f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,331 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+  struct MLxExpansion : public MachineFunctionPass {
+    static char ID;
+    MLxExpansion() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM MLA / MLS expansion pass";
+    }
+
+  private:
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+
+    bool isA9;
+    unsigned MIIdx;
+    MachineInstr* LastMIs[4];
+    SmallPtrSet<MachineInstr*, 4> IgnoreStall;
+
+    void clearStack();
+    void pushStack(MachineInstr *MI);
+    MachineInstr *getAccDefMI(MachineInstr *MI) const;
+    unsigned getDefReg(MachineInstr *MI) const;
+    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+    bool FindMLxHazard(MachineInstr *MI);
+    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned MulOpc, unsigned AddSubOpc,
+                                bool NegAcc, bool HasLane);
+    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+  };
+  char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+  std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+  MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+  LastMIs[MIIdx] = MI;
+  if (++MIIdx == 4)
+    MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+  // Look past COPY and INSERT_SUBREG instructions to find the
+  // real definition MI. This is important for _sfp instructions.
+  unsigned Reg = MI->getOperand(1).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return 0;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *DefMI = MRI->getVRegDef(Reg);
+  while (true) {
+    if (DefMI->getParent() != MBB)
+      break;
+    if (DefMI->isCopyLike()) {
+      Reg = DefMI->getOperand(1).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        DefMI = MRI->getVRegDef(Reg);
+        continue;
+      }
+    } else if (DefMI->isInsertSubreg()) {
+      Reg = DefMI->getOperand(2).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        DefMI = MRI->getVRegDef(Reg);
+        continue;
+      }
+    }
+    break;
+  }
+  return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+  unsigned Reg = MI->getOperand(0).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+      !MRI->hasOneNonDBGUse(Reg))
+    return Reg;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+  if (UseMI->getParent() != MBB)
+    return Reg;
+
+  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+    Reg = UseMI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+        !MRI->hasOneNonDBGUse(Reg))
+      return Reg;
+    UseMI = &*MRI->use_nodbg_begin(Reg);
+    if (UseMI->getParent() != MBB)
+      return Reg;
+  }
+
+  return Reg;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+  // FIXME: Detect integer instructions properly.
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+  if (MCID.mayStore())
+    return false;
+  unsigned Opcode = MCID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(Reg, TRI);
+  return false;
+}
+
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
+  if (NumExpand >= ExpandLimit)
+    return false;
+
+  if (ForceExapnd)
+    return true;
+
+  MachineInstr *DefMI = getAccDefMI(MI);
+  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
+    // r0 = vmla
+    // r3 = vmla r0, r1, r2
+    // takes 16 - 17 cycles
+    //
+    // r0 = vmla
+    // r4 = vmul r1, r2
+    // r3 = vadd r0, r4
+    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+    IgnoreStall.insert(DefMI);
+    return true;
+  }
+
+  if (IgnoreStall.count(MI))
+    return false;
+
+  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+  // preserves the in-order retirement of the instructions.
+  // Look at the next few instructions, if *most* of them can cause hazards,
+  // then the scheduler can't *fix* this, we'd better break up the VMLA.
+  unsigned Limit1 = isA9 ? 1 : 4;
+  unsigned Limit2 = isA9 ? 1 : 4;
+  for (unsigned i = 1; i <= 4; ++i) {
+    int Idx = ((int)MIIdx - i + 4) % 4;
+    MachineInstr *NextMI = LastMIs[Idx];
+    if (!NextMI)
+      continue;
+
+    if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
+      if (i <= Limit1)
+        return true;
+    }
+
+    // Look for VMLx RAW hazard.
+    if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
+      return true;
+  }
+
+  return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned MulOpc, unsigned AddSubOpc,
+                                     bool NegAcc, bool HasLane) {
+  unsigned DstReg = MI->getOperand(0).getReg();
+  bool DstDead = MI->getOperand(0).isDead();
+  unsigned AccReg = MI->getOperand(1).getReg();
+  unsigned Src1Reg = MI->getOperand(2).getReg();
+  unsigned Src2Reg = MI->getOperand(3).getReg();
+  bool Src1Kill = MI->getOperand(2).isKill();
+  bool Src2Kill = MI->getOperand(3).isKill();
+  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+  unsigned NextOp = HasLane ? 5 : 4;
+  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+  unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+  const MCInstrDesc &MCID1 = TII->get(MulOpc);
+  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
+  unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
+
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+    .addReg(Src1Reg, getKillRegState(Src1Kill))
+    .addReg(Src2Reg, getKillRegState(Src2Kill));
+  if (HasLane)
+    MIB.addImm(LaneImm);
+  MIB.addImm(Pred).addReg(PredReg);
+
+  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+  if (NegAcc) {
+    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+    MIB.addReg(TmpReg, getKillRegState(true))
+       .addReg(AccReg, getKillRegState(AccKill));
+  } else {
+    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+  }
+  MIB.addImm(Pred).addReg(PredReg);
+
+  DEBUG({
+      dbgs() << "Expanding: " << *MI;
+      dbgs() << "  to:\n";
+      MachineBasicBlock::iterator MII = MI;
+      MII = llvm::prior(MII);
+      MachineInstr &MI2 = *MII;
+      MII = llvm::prior(MII);
+      MachineInstr &MI1 = *MII;
+      dbgs() << "    " << MI1;
+      dbgs() << "    " << MI2;
+   });
+
+  MI->eraseFromParent();
+  ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+  bool Changed = false;
+
+  clearStack();
+  IgnoreStall.clear();
+
+  unsigned Skip = 0;
+  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+  while (MII != E) {
+    MachineInstr *MI = &*MII;
+
+    if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+      ++MII;
+      continue;
+    }
+
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.isBarrier()) {
+      clearStack();
+      Skip = 0;
+      ++MII;
+      continue;
+    }
+
+    unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+    if (Domain == ARMII::DomainGeneral) {
+      if (++Skip == 2)
+        // Assume dual issues of non-VFP / NEON instructions.
+        pushStack(0);
+    } else {
+      Skip = 0;
+
+      unsigned MulOpc, AddSubOpc;
+      bool NegAcc, HasLane;
+      if (!TII->isFpMLxInstruction(MCID.getOpcode(),
+                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
+          !FindMLxHazard(MI))
+        pushStack(MI);
+      else {
+        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+        E = MBB.rend(); // May have changed if MI was the 1st instruction.
+        Changed = true;
+        continue;
+      }
+    }
+
+    ++MII;
+  }
+
+  return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+  TRI = Fn.getTarget().getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+  const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+  isA9 = STI->isCortexA9();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= ExpandFPMLxInstructions(MBB);
+  }
+
+  return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+  return new MLxExpansion();
+}
diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
new file mode 100644
index 000000000000..c85d1e99705a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp
@@ -0,0 +1,149 @@
+//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "neon-mov-fix"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumVMovs, "Number of reg-reg moves converted");
+
+namespace {
+  struct NEONMoveFixPass : public MachineFunctionPass {
+    static char ID;
+    NEONMoveFixPass() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NEON reg-reg move conversion";
+    }
+
+  private:
+    const TargetRegisterInfo *TRI;
+    const ARMBaseInstrInfo *TII;
+    bool isA8;
+
+    typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+    bool InsertMoves(MachineBasicBlock &MBB);
+  };
+  char NEONMoveFixPass::ID = 0;
+}
+
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+  return (Domain & ARMII::DomainNEON) ||
+    (isA8 && (Domain & ARMII::DomainNEONA8));
+}
+
+bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
+  RegMap Defs;
+  bool Modified = false;
+
+  // Walk over MBB tracking the def points of the registers.
+  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMII;
+  for (; MII != E; MII = NextMII) {
+    NextMII = llvm::next(MII);
+    MachineInstr *MI = &*MII;
+
+    if (MI->getOpcode() == ARM::VMOVD &&
+        !TII->isPredicated(MI)) {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      // If we do not find an instruction defining the reg, this means the
+      // register should be live-in for this BB. It's always to better to use
+      // NEON reg-reg moves.
+      unsigned Domain = ARMII::DomainNEON;
+      RegMap::iterator DefMI = Defs.find(SrcReg);
+      if (DefMI != Defs.end()) {
+        Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
+        // Instructions in general domain are subreg accesses.
+        // Map them to NEON reg-reg moves.
+        if (Domain == ARMII::DomainGeneral)
+          Domain = ARMII::DomainNEON;
+      }
+
+      if (inNEONDomain(Domain, isA8)) {
+        // Convert VMOVD to VORRd
+        unsigned DestReg = MI->getOperand(0).getReg();
+
+        DEBUG({errs() << "vmov convert: "; MI->dump();});
+
+        // It's safe to ignore imp-defs / imp-uses here, since:
+        //  - We're running late, no intelligent condegen passes should be run
+        //    afterwards
+        //  - The imp-defs / imp-uses are superregs only, we don't care about
+        //    them.
+        AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
+                             TII->get(ARM::VORRd), DestReg)
+          .addReg(SrcReg).addReg(SrcReg));
+        MBB.erase(MI);
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+
+        DEBUG({errs() << "        into: "; MI->dump();});
+
+        Modified = true;
+        ++NumVMovs;
+      } else {
+        assert((Domain & ARMII::DomainVFP) && "Invalid domain!");
+        // Do nothing.
+      }
+    }
+
+    // Update def information.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand& MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned MOReg = MO.getReg();
+
+      Defs[MOReg] = MI;
+      // Catch aliases as well.
+      for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
+        Defs[*R] = MI;
+    }
+  }
+
+  return Modified;
+}
+
+bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
+  ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
+  const TargetMachine &TM = Fn.getTarget();
+
+  if (AFI->isThumb1OnlyFunction())
+    return false;
+
+  TRI = TM.getRegisterInfo();
+  TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= InsertMoves(MBB);
+  }
+
+  return Modified;
+}
+
+/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix
+/// pass.
+FunctionPass *llvm::createNEONMoveFixPass() {
+  return new NEONMoveFixPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 000000000000..163a0a987584
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMTarget, llvm::TheThumbTarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() { 
+  RegisterTarget<Triple::arm, /*HasJIT=*/true>
+    X(TheARMTarget, "arm", "ARM");
+
+  RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+    Y(TheThumbTarget, "thumb", "Thumb");
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 000000000000..c258870e48a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,361 @@
+//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+             MachineBasicBlock::iterator &MBBI,
+             const TargetInstrInfo &TII, DebugLoc dl,
+             const Thumb1RegisterInfo &MRI,
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags)  {
+  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, MIFlags);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const Thumb1RegisterInfo *RegInfo =
+    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1InstrInfo &TII =
+    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned BasePtr = RegInfo->getBaseRegister();
+
+  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+  NumBytes = (NumBytes + 3) & ~3;
+  MFI->setStackSize(NumBytes);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+                 MachineInstr::FrameSetup);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+                   MachineInstr::FrameSetup);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+  NumBytes = DPRCSOffset;
+
+  // Adjust FP so it point to the stack slot that contains the previous FP.
+  if (hasFP(MF)) {
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0)
+      .setMIFlags(MachineInstr::FrameSetup);
+    if (NumBytes > 508)
+      // If offset is > 508 then sp cannot be adjusted in a single instruction,
+      // try restoring from fp instead.
+      AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  if (NumBytes)
+    // Insert it after all the callee-save spills.
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+                 MachineInstr::FrameSetup);
+
+  if (STI.isTargetELF() && hasFP(MF))
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need a base pointer, set it up here. It's whatever the value
+  // of the stack pointer is at this point. Any variable size objects
+  // will be allocated after this, so we can still use the base pointer
+  // to reference locals.
+  if (RegInfo->hasBasePointer(MF))
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+                   .addReg(ARM::SP));
+
+  // If the frame has variable sized objects then the epilogue must restore
+  // the sp from fp. We can assume there's an FP here since hasFP already
+  // checks for hasVarSizedObjects.
+  if (MFI->hasVarSizedObjects())
+    AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  if (MI->getOpcode() == ARM::tLDRspi &&
+      MI->getOperand(1).isFI() &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+    return true;
+  else if (MI->getOpcode() == ARM::tPOP) {
+    // The first two operands are predicates. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert((MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const Thumb1RegisterInfo *RegInfo =
+    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1InstrInfo &TII =
+    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / VLDRD.
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    if (AFI->shouldRestoreSPFromFP()) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot, the target is ELF and the function has FP, or
+      // the target uses var sized objects.
+      if (NumBytes) {
+        assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+               "No scratch register to restore SP from FP!");
+        emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+                                  TII, *RegInfo);
+        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                               ARM::SP)
+          .addReg(ARM::R4));
+      } else
+        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                               ARM::SP)
+          .addReg(FramePtr));
+    } else {
+      if (MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI &&
+          prior(MBBI)->getOpcode() == ARM::tPOP) {
+        MachineBasicBlock::iterator PMBBI = prior(MBBI);
+        emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+      } else
+        emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+    }
+  }
+
+  if (VARegSaveSize) {
+    // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+    // to LR, and we can't pop the value directly to the PC since
+    // we need to update the SP after popping the value. Therefore, we
+    // pop the old LR into R3 as a temporary.
+
+    // Move back past the callee-saved register restoration
+    while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+      ++MBBI;
+    // Epilogue for vararg functions: pop LR to R3 and branch off it.
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+      .addReg(ARM::R3, RegState::Define);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+      .addReg(ARM::R3, RegState::Kill));
+    // erase the old tBX_RET instruction
+    MBB.erase(MBBI);
+  }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+  AddDefaultPred(MIB);
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    bool isKill = true;
+
+    // Add the callee-saved register as live-in unless it's LR and
+    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+    // then it's already added to the function and entry block live-in sets.
+    if (Reg == ARM::LR) {
+      MachineFunction &MF = *MBB.getParent();
+      if (MF.getFrameInfo()->isReturnAddressTaken() &&
+          MF.getRegInfo().isLiveIn(Reg))
+        isKill = false;
+    }
+
+    if (isKill)
+      MBB.addLiveIn(Reg);
+
+    MIB.addReg(Reg, getKillRegState(isKill));
+  }
+  MIB.setMIFlags(MachineInstr::FrameSetup);
+  return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+  AddDefaultPred(MIB);
+
+  bool NumRegs = false;
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+      MI = MBB.erase(MI);
+    }
+    MIB.addReg(Reg, getDefRegState(true));
+    NumRegs = true;
+  }
+
+  // It's illegal to emit pop instruction without operands.
+  if (NumRegs)
+    MBB.insert(MI, &*MIB);
+  else
+    MF.DeleteMachineInstr(MIB);
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 000000000000..bcfc5165fad0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMB_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+  explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+    : ARMFrameLowering(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 000000000000..218311d78d30
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,100 @@
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1InstrInfo.h"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "Thumb1InstrInfo.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+  return 0;
+}
+
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)));
+  assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+         "Thumb1 can only copy GPR registers");
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  assert((RC == ARM::tGPRRegisterClass ||
+          (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+           isARMLowRegister(SrcReg))) && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass ||
+      (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+       isARMLowRegister(SrcReg))) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  }
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  assert((RC == ARM::tGPRRegisterClass ||
+          (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+           isARMLowRegister(DestReg))) && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass ||
+      (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+       isARMLowRegister(DestReg))) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 000000000000..17ef2f758ef4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,59 @@
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+  Thumb1RegisterInfo RI;
+public:
+  explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 000000000000..4eb0b6c93e1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,714 @@
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+const TargetRegisterClass*
+Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+                                                                         const {
+  if (ARM::tGPRRegClass.hasSubClassEq(RC))
+    return ARM::tGPRRegisterClass;
+  return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
+const TargetRegisterClass *
+Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return ARM::tGPRRegisterClass;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      DebugLoc dl,
+                                      unsigned DestReg, unsigned SubIdx,
+                                      int Val,
+                                      ARMCC::CondCodes Pred, unsigned PredReg,
+                                      unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  const Constant *C = ConstantInt::get(
+          Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+    .setMIFlags(MIFlags);
+}
+
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              DebugLoc dl,
+                              unsigned DestReg, unsigned BaseReg,
+                              int NumBytes, bool CanChangeCC,
+                              const TargetInstrInfo &TII,
+                              const ARMBaseRegisterInfo& MRI,
+                              unsigned MIFlags = MachineInstr::NoFlags) {
+    MachineFunction &MF = *MBB.getParent();
+    bool isHigh = !isARMLowRegister(DestReg) ||
+                  (BaseReg != 0 && !isARMLowRegister(BaseReg));
+    bool isSub = false;
+    // Subtract doesn't have high register version. Load the negative value
+    // if either base or dest register is a high register. Also, if do not
+    // issue sub as part of the sequence if condition register is to be
+    // preserved.
+    if (NumBytes < 0 && !isHigh && CanChangeCC) {
+      isSub = true;
+      NumBytes = -NumBytes;
+    }
+    unsigned LdReg = DestReg;
+    if (DestReg == ARM::SP) {
+      assert(BaseReg == ARM::SP && "Unexpected!");
+      LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+    }
+
+    if (NumBytes <= 255 && NumBytes >= 0)
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+        .addImm(NumBytes).setMIFlags(MIFlags);
+    else if (NumBytes < 0 && NumBytes >= -255) {
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+        .addImm(NumBytes).setMIFlags(MIFlags);
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+        .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
+    } else
+      MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+                            ARMCC::AL, 0, MIFlags);
+
+    // Emit add / sub.
+    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+    if (Opc != ARM::tADDhirr)
+      MIB = AddDefaultT1CC(MIB);
+    if (DestReg == ARM::SP || isSub)
+      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+    else
+      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+    AddDefaultPred(MIB);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+                          unsigned NumBits, unsigned Scale) {
+  unsigned NumMIs = 0;
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+  if (Opc == ARM::tADDrSPi) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    NumMIs++;
+    NumBits = 8;
+    Scale = 1;  // Followed by a number of tADDi8.
+    Chunk = ((1 << NumBits) - 1) * Scale;
+  }
+
+  NumMIs += Bytes / Chunk;
+  if ((Bytes % Chunk) != 0)
+    NumMIs++;
+  if (ExtraOpc)
+    NumMIs++;
+  return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator &MBBI,
+                                     DebugLoc dl,
+                                     unsigned DestReg, unsigned BaseReg,
+                                     int NumBytes, const TargetInstrInfo &TII,
+                                     const ARMBaseRegisterInfo& MRI,
+                                     unsigned MIFlags) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  bool DstNotEqBase = false;
+  unsigned NumBits = 1;
+  unsigned Scale = 1;
+  int Opc = 0;
+  int ExtraOpc = 0;
+  bool NeedCC = false;
+  bool NeedPred = false;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    NumBits = 7;
+    Scale = 4;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    // r1 = add sp, 403
+    // =>
+    // r1 = add sp, 100 * 4
+    // r1 = add r1, 3
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    NumBits = 8;
+    Scale = 4;
+    Opc = ARM::tADDrSPi;
+  } else {
+    // sp = sub sp, c
+    // r1 = sub sp, c
+    // r8 = sub sp, c
+    if (DestReg != BaseReg)
+      DstNotEqBase = true;
+    NumBits = 8;
+    if (DestReg == ARM::SP) {
+      Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+      assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+      NumBits = 7;
+      Scale = 4;
+    } else {
+      Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+      NumBits = 8;
+      NeedPred = NeedCC = true;
+    }
+    isTwoAddr = true;
+  }
+
+  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+  if (NumMIs > Threshold) {
+    // This will expand into too many instructions. Load the immediate from a
+    // constpool entry.
+    emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+                             DestReg, BaseReg, NumBytes, true,
+                             TII, MRI, MIFlags);
+    return;
+  }
+
+  if (DstNotEqBase) {
+    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+      unsigned Chunk = (1 << 3) - 1;
+      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+      Bytes -= ThisVal;
+      const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+      const MachineInstrBuilder MIB =
+        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags));
+      AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
+    } else {
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill))
+        .setMIFlags(MIFlags);
+    }
+    BaseReg = DestReg;
+  }
+
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    ThisVal /= Scale;
+    // Build the new tADD / tSUB.
+    if (isTwoAddr) {
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+      if (NeedCC)
+        MIB = AddDefaultT1CC(MIB);
+      MIB.addReg(DestReg).addImm(ThisVal);
+      if (NeedPred)
+        MIB = AddDefaultPred(MIB);
+      MIB.setMIFlags(MIFlags);
+    }
+    else {
+      bool isKill = BaseReg != ARM::SP;
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+      if (NeedCC)
+        MIB = AddDefaultT1CC(MIB);
+      MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      if (NeedPred)
+        MIB = AddDefaultPred(MIB);
+      MIB.setMIFlags(MIFlags);
+
+      BaseReg = DestReg;
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Scale = 1;
+        Chunk = ((1 << NumBits) - 1) * Scale;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        NeedPred = NeedCC = isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc) {
+    const MCInstrDesc &MCID = TII.get(ExtraOpc);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+                   .addReg(DestReg, RegState::Kill)
+                   .addImm(((unsigned)NumBytes) & 3)
+                   .setMIFlags(MIFlags));
+  }
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo &TII, DebugLoc dl,
+                         const Thumb1RegisterInfo &MRI,
+                         int NumBytes) {
+  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI);
+}
+
+void Thumb1RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+      } else {
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII,
+                              const Thumb1RegisterInfo& MRI,
+                              DebugLoc dl) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
+                                        DestReg))
+                 .addImm(ThisVal));
+  if (Imm > 0)
+    emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
+  if (isSub) {
+    const MCInstrDesc &MCID = TII.get(ARM::tRSB);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
+                   .addReg(DestReg, RegState::Kill));
+  }
+}
+
+static void removeOperands(MachineInstr &MI, unsigned i) {
+  unsigned Op = i;
+  for (unsigned e = MI.getNumOperands(); i != e; ++i)
+    MI.RemoveOperand(Op);
+}
+
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::tLDRspi:
+    return ARM::tLDRi;
+
+  case ARM::tSTRspi:
+    return ARM::tSTRi;
+  }
+
+  return Opcode;
+}
+
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+                  unsigned FrameReg, int &Offset,
+                  const ARMBaseInstrInfo &TII) const {
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+  if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+    // Can't use tADDrSPi if it's based off the frame pointer.
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (FrameReg != ARM::SP) {
+      Opcode = ARM::tADDi3;
+      MI.setDesc(TII.get(Opcode));
+      NumBits = 3;
+    } else {
+      NumBits = 8;
+      Scale = 4;
+      assert((Offset & 3) == 0 &&
+             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+    }
+
+    unsigned PredReg;
+    if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      // Remove offset and add predicate operands.
+      MI.RemoveOperand(FrameRegIdx+1);
+      MachineInstrBuilder MIB(&MI);
+      AddDefaultPred(MIB);
+      return true;
+    }
+
+    // Common case: small offset, fits into instruction.
+    unsigned Mask = (1 << NumBits) - 1;
+    if (((Offset / Scale) & ~Mask) == 0) {
+      // Replace the FrameIndex with sp / fp
+      if (Opcode == ARM::tADDi3) {
+        removeOperands(MI, FrameRegIdx);
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
+                       .addImm(Offset / Scale));
+      } else {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
+      }
+      return true;
+    }
+
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+    // MI would expand into a large number of instructions. Don't try to
+    // simplify the immediate.
+    if (NumMIs > 2) {
+      emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+                                *this);
+      MBB.erase(II);
+      return true;
+    }
+
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      if (Opcode == ARM::tADDi3) {
+        removeOperands(MI, FrameRegIdx);
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
+      } else {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
+      }
+      Offset = (Offset - Mask * Scale);
+      MachineBasicBlock::iterator NII = llvm::next(II);
+      emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
+                                *this);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+
+      MI.setDesc(TII.get(ARM::tADDhirr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
+      if (Opcode == ARM::tADDi3) {
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(MIB);
+      }
+    }
+    return true;
+  } else {
+    if (AddrMode != ARMII::AddrModeT1_s)
+      llvm_unreachable("Unsupported addressing mode!");
+
+    unsigned ImmIdx = FrameRegIdx + 1;
+    int InstrOffs = MI.getOperand(ImmIdx).getImm();
+    unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+    unsigned Scale = 4;
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
+
+    // Common case: small offset, fits into instruction.
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with the frame register (e.g., sp).
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      ImmOp.ChangeToImmediate(ImmedOffset);
+
+      // If we're using a register where sp was stored, convert the instruction
+      // to the non-SP version.
+      unsigned NewOpc = convertToNonSPOpcode(Opcode);
+      if (NewOpc != Opcode && FrameReg != ARM::SP)
+        MI.setDesc(TII.get(NewOpc));
+
+      return true;
+    }
+
+    NumBits = 5;
+    Mask = (1 << NumBits) - 1;
+
+    // If this is a thumb spill / restore, we will be using a constpool load to
+    // materialize the offset.
+    if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
+      ImmOp.ChangeToImmediate(0);
+    } else {
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask * Scale);
+    }
+  }
+
+  return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                      unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+  int Off = Offset; // ARM doesn't need the general 64-bit offsets
+  unsigned i = 0;
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  bool Done = false;
+  Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+  assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          MachineBasicBlock::iterator &UseMI,
+                                          const TargetRegisterClass *RC,
+                                          unsigned Reg) const {
+  // Thumb1 can't use the emergency spill slot on the stack because
+  // ldr/str immediate offsets must be positive, and if we're referencing
+  // off the frame pointer (if, for example, there are alloca() calls in
+  // the function, the offset will be negative. Use R12 instead since that's
+  // a call clobbered register that we know won't be used in Thumb1 mode.
+  DebugLoc DL;
+  AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+    .addReg(ARM::R12, RegState::Define)
+    .addReg(Reg, RegState::Kill));
+
+  // The UseMI is where we would like to restore the register. If there's
+  // interference with R12 before then, however, we'll need to restore it
+  // before that instead and adjust the UseMI.
+  bool done = false;
+  for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+    if (II->isDebugValue())
+      continue;
+    // If this instruction affects R12, adjust our restore point.
+    for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = II->getOperand(i);
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      if (MO.getReg() == ARM::R12) {
+        UseMI = II;
+        done = true;
+        break;
+      }
+    }
+  }
+  // Restore the register from R12
+  AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
+    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+
+  return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                        int SPAdj, RegScavenger *RS) const {
+  unsigned VReg = 0;
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+           "Unexpected");
+    // There are alloca()'s in this function, must reference off the frame
+    // pointer or base pointer instead.
+    if (!hasBasePointer(MF)) {
+      FrameReg = getFrameRegister(MF);
+      Offset -= AFI->getFramePtrSpillOffset();
+    } else
+      FrameReg = BasePtr;
+  }
+
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // Modify MI as necessary to handle as much of 'Offset' as possible
+  assert(AFI->isThumbFunction() &&
+         "This eliminateFrameIndex only supports Thumb1!");
+  if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+    return;
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MI.getDesc();
+
+  // Remove predicate first.
+  int PIdx = MI.findFirstPredOperandIdx();
+  if (PIdx != -1)
+    removeOperands(MI, PIdx);
+
+  if (Desc.mayLoad()) {
+    // Use the destination register to materialize sp + offset.
+    unsigned TmpReg = MI.getOperand(0).getReg();
+    bool UseRR = false;
+    if (Opcode == ARM::tLDRspi) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+                                 Offset, false, TII, *this);
+      else {
+        emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
+        UseRR = true;
+      }
+    } else {
+      emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+                                *this);
+    }
+
+    MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)
+      // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+      // register. The offset is already handled in the vreg value.
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+  } else if (Desc.mayStore()) {
+      VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+      bool UseRR = false;
+
+      if (Opcode == ARM::tSTRspi) {
+        if (FrameReg == ARM::SP)
+          emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+                                   Offset, false, TII, *this);
+        else {
+          emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+          UseRR = true;
+        }
+      } else
+        emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+                                  *this);
+      MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
+      MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+      if (UseRR)
+        // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+        // register. The offset is already handled in the vreg value.
+        MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+  } else {
+    assert(false && "Unexpected opcode!");
+  }
+
+  // Add predicate back if it's needed.
+  if (MI.getDesc().isPredicable()) {
+    MachineInstrBuilder MIB(&MI);
+    AddDefaultPred(MIB);
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 000000000000..9060e59e5980
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,69 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator &MBBI,
+                        DebugLoc dl,
+                        unsigned DestReg, unsigned SubIdx, int Val,
+                        ARMCC::CondCodes Pred = ARMCC::AL,
+                        unsigned PredReg = 0,
+                        unsigned MIFlags = MachineInstr::NoFlags) const;
+
+  /// Code Generation virtual methods...
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+  // however much remains to be handled. Return 'true' if no further
+  // work is required.
+  bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+                         unsigned FrameReg, int &Offset,
+                         const ARMBaseInstrInfo &TII) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
+  bool saveScavengerRegister(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I,
+                             MachineBasicBlock::iterator &UseMI,
+                             const TargetRegisterClass *RC,
+                             unsigned Reg) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 000000000000..360ec009e201
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,252 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "thumb2-it"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumITs,        "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+
+namespace {
+  class Thumb2ITBlockPass : public MachineFunctionPass {
+    bool PreRegAlloc;
+
+  public:
+    static char ID;
+    Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
+    const Thumb2InstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    ARMFunctionInfo *AFI;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Thumb IT blocks insertion pass";
+    }
+
+  private:
+    bool MoveCopyOutOfITBlock(MachineInstr *MI,
+                              ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+                              SmallSet<unsigned, 4> &Defs,
+                              SmallSet<unsigned, 4> &Uses);
+    bool InsertITInstructions(MachineBasicBlock &MBB);
+  };
+  char Thumb2ITBlockPass::ID = 0;
+}
+
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+                         SmallSet<unsigned, 4> &Defs,
+                         SmallSet<unsigned, 4> &Uses,
+                         const TargetRegisterInfo *TRI) {
+  SmallVector<unsigned, 4> LocalDefs;
+  SmallVector<unsigned, 4> LocalUses;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+      continue;
+    if (MO.isUse())
+      LocalUses.push_back(Reg);
+    else
+      LocalDefs.push_back(Reg);
+  }
+
+  for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+    unsigned Reg = LocalUses[i];
+    Uses.insert(Reg);
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg)
+      Uses.insert(*Subreg);
+  }
+
+  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+    unsigned Reg = LocalDefs[i];
+    Defs.insert(Reg);
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg)
+      Defs.insert(*Subreg);
+    if (Reg == ARM::CPSR)
+      continue;
+  }
+}
+
+static bool isCopy(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    return false;
+  case ARM::MOVr:
+  case ARM::MOVr_TC:
+  case ARM::tMOVr:
+  case ARM::t2MOVr:
+    return true;
+  }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+                                      ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+                                        SmallSet<unsigned, 4> &Defs,
+                                        SmallSet<unsigned, 4> &Uses) {
+  if (!isCopy(MI))
+    return false;
+  // llvm models select's as two-address instructions. That means a copy
+  // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+  // between selects we would end up creating multiple IT blocks.
+  assert(MI->getOperand(0).getSubReg() == 0 &&
+         MI->getOperand(1).getSubReg() == 0 &&
+         "Sub-register indices still around?");
+
+  unsigned DstReg = MI->getOperand(0).getReg();
+  unsigned SrcReg = MI->getOperand(1).getReg();
+
+  // First check if it's safe to move it.
+  if (Uses.count(DstReg) || Defs.count(SrcReg))
+    return false;
+
+  // Then peek at the next instruction to see if it's predicated on CC or OCC.
+  // If not, then there is nothing to be gained by moving the copy.
+  MachineBasicBlock::iterator I = MI; ++I;
+  MachineBasicBlock::iterator E = MI->getParent()->end();
+  while (I != E && I->isDebugValue())
+    ++I;
+  if (I != E) {
+    unsigned NPredReg = 0;
+    ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+    if (NCC == CC || NCC == OCC)
+      return true;
+  }
+  return false;
+}
+
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  SmallSet<unsigned, 4> Defs;
+  SmallSet<unsigned, 4> Uses;
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineInstr *MI = &*MBBI;
+    DebugLoc dl = MI->getDebugLoc();
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+    if (CC == ARMCC::AL) {
+      ++MBBI;
+      continue;
+    }
+
+    Defs.clear();
+    Uses.clear();
+    TrackDefUses(MI, Defs, Uses, TRI);
+
+    // Insert an IT instruction.
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+      .addImm(CC);
+
+    // Add implicit use of ITSTATE to IT block instructions.
+    MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+                                             true/*isImp*/, false/*isKill*/));
+
+    MachineInstr *LastITMI = MI;
+    MachineBasicBlock::iterator InsertPos = MIB;
+    ++MBBI;
+
+    // Form IT block.
+    ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+    unsigned Mask = 0, Pos = 3;
+    // Branches, including tricky ones like LDM_RET, need to end an IT
+    // block so check the instruction we just put in the block.
+    for (; MBBI != E && Pos &&
+           (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+      if (MBBI->isDebugValue())
+        continue;
+
+      MachineInstr *NMI = &*MBBI;
+      MI = NMI;
+
+      unsigned NPredReg = 0;
+      ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+      if (NCC == CC || NCC == OCC) {
+        Mask |= (NCC & 1) << Pos;
+        // Add implicit use of ITSTATE.
+        NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+                                               true/*isImp*/, false/*isKill*/));
+        LastITMI = NMI;
+      } else {
+        if (NCC == ARMCC::AL &&
+            MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+          --MBBI;
+          MBB.remove(NMI);
+          MBB.insert(InsertPos, NMI);
+          ++NumMovedInsts;
+          continue;
+        }
+        break;
+      }
+      TrackDefUses(NMI, Defs, Uses, TRI);
+      --Pos;
+    }
+
+    // Finalize IT mask.
+    Mask |= (1 << Pos);
+    // Tag along (firstcond[0] << 4) with the mask.
+    Mask |= (CC & 1) << 4;
+    MIB.addImm(Mask);
+
+    // Last instruction in IT block kills ITSTATE.
+    LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
+    Modified = true;
+    ++NumITs;
+  }
+
+  return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetMachine &TM = Fn.getTarget();
+  AFI = Fn.getInfo<ARMFunctionInfo>();
+  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+  TRI = TM.getRegisterInfo();
+
+  if (!AFI->isThumbFunction())
+    return false;
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
+    MachineBasicBlock &MBB = *MFI;
+    ++MFI;
+    Modified |= InsertITInstructions(MBB);
+  }
+
+  if (Modified)
+    AFI->setHasITBlocks(true);
+
+  return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+  return new Thumb2ITBlockPass();
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
new file mode 100644
index 000000000000..51b56aaeb008
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -0,0 +1,610 @@
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2InstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+           cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+           cl::init(false));
+
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+  // FIXME
+  return 0;
+}
+
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                         MachineBasicBlock *NewDest) const {
+  MachineBasicBlock *MBB = Tail->getParent();
+  ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+  if (!AFI->hasITBlocks()) {
+    TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+    return;
+  }
+
+  // If the first instruction of Tail is predicated, we may have to update
+  // the IT instruction.
+  unsigned PredReg = 0;
+  ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+  MachineBasicBlock::iterator MBBI = Tail;
+  if (CC != ARMCC::AL)
+    // Expecting at least the t2IT instruction before it.
+    --MBBI;
+
+  // Actually replace the tail.
+  TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+  // Fix up IT.
+  if (CC != ARMCC::AL) {
+    MachineBasicBlock::iterator E = MBB->begin();
+    unsigned Count = 4; // At most 4 instructions in an IT block.
+    while (Count && MBBI != E) {
+      if (MBBI->isDebugValue()) {
+        --MBBI;
+        continue;
+      }
+      if (MBBI->getOpcode() == ARM::t2IT) {
+        unsigned Mask = MBBI->getOperand(1).getImm();
+        if (Count == 4)
+          MBBI->eraseFromParent();
+        else {
+          unsigned MaskOn = 1 << Count;
+          unsigned MaskOff = ~(MaskOn - 1);
+          MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+        }
+        return;
+      }
+      --MBBI;
+      --Count;
+    }
+
+    // Ctrl flow can reach here if branch folding is run before IT block
+    // formation pass.
+  }
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI) const {
+  while (MBBI->isDebugValue()) {
+    ++MBBI;
+    if (MBBI == MBB.end())
+      return false;
+  }
+
+  unsigned PredReg = 0;
+  return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  // Handle SPR, DPR, and QPR copies.
+  if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+    return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+  AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)));
+}
+
+void Thumb2InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  if (RC == ARM::GPRRegisterClass   || RC == ARM::tGPRRegisterClass ||
+      RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    return;
+  }
+
+  ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
+}
+
+void Thumb2InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  if (RC == ARM::GPRRegisterClass   || RC == ARM::tGPRRegisterClass ||
+      RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    return;
+  }
+
+  ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
+}
+
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg, int NumBytes,
+                               ARMCC::CondCodes Pred, unsigned PredReg,
+                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  // If profitable, use a movw or movt to materialize the offset.
+  // FIXME: Use the scavenger to grab a scratch register.
+  if (DestReg != ARM::SP && DestReg != BaseReg &&
+      NumBytes >= 4096 &&
+      ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+    bool Fits = false;
+    if (NumBytes < 65536) {
+      // Use a movw to materialize the 16-bit constant.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+        .addImm(NumBytes)
+        .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+      Fits = true;
+    } else if ((NumBytes & 0xffff) == 0) {
+      // Use a movt to materialize the 32-bit constant.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+        .addReg(DestReg)
+        .addImm(NumBytes >> 16)
+        .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+      Fits = true;
+    }
+
+    if (Fits) {
+      if (isSub) {
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+          .addReg(BaseReg, RegState::Kill)
+          .addReg(DestReg, RegState::Kill)
+          .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+          .setMIFlags(MIFlags);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+          .addReg(DestReg, RegState::Kill)
+          .addReg(BaseReg, RegState::Kill)
+          .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+          .setMIFlags(MIFlags);
+      }
+      return;
+    }
+  }
+
+  while (NumBytes) {
+    unsigned ThisVal = NumBytes;
+    unsigned Opc = 0;
+    if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+      // mov sp, rn. Note t2MOVr cannot be used.
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
+        .addReg(BaseReg).setMIFlags(MIFlags));
+      BaseReg = ARM::SP;
+      continue;
+    }
+
+    bool HasCCOut = true;
+    if (BaseReg == ARM::SP) {
+      // sub sp, sp, #imm7
+      if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+        assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+        Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+        // FIXME: Fix Thumb1 immediate encoding.
+        BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+          .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags);
+        NumBytes = 0;
+        continue;
+      }
+
+      // sub rd, sp, so_imm
+      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+        NumBytes = 0;
+      } else {
+        // FIXME: Move this to ARMAddressingModes.h?
+        unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+        NumBytes &= ~ThisVal;
+        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+               "Bit extraction didn't work?");
+      }
+    } else {
+      assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+        NumBytes = 0;
+      } else if (ThisVal < 4096) {
+        Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+        HasCCOut = false;
+        NumBytes = 0;
+      } else {
+        // FIXME: Move this to ARMAddressingModes.h?
+        unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+        NumBytes &= ~ThisVal;
+        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+               "Bit extraction didn't work?");
+      }
+    }
+
+    // Build the new ADD / SUB.
+    MachineInstrBuilder MIB =
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+                     .addReg(BaseReg, RegState::Kill)
+                     .addImm(ThisVal)).setMIFlags(MIFlags);
+    if (HasCCOut)
+      AddDefaultCC(MIB);
+
+    BaseReg = DestReg;
+  }
+}
+
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi12:   return ARM::t2LDRi8;
+  case ARM::t2LDRHi12:  return ARM::t2LDRHi8;
+  case ARM::t2LDRBi12:  return ARM::t2LDRBi8;
+  case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+  case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+  case ARM::t2STRi12:   return ARM::t2STRi8;
+  case ARM::t2STRBi12:  return ARM::t2STRBi8;
+  case ARM::t2STRHi12:  return ARM::t2STRHi8;
+
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi8:   return ARM::t2LDRi12;
+  case ARM::t2LDRHi8:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBi8:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+  case ARM::t2STRi8:   return ARM::t2STRi12;
+  case ARM::t2STRBi8:  return ARM::t2STRBi12;
+  case ARM::t2STRHi8:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRs:   return ARM::t2LDRi12;
+  case ARM::t2LDRHs:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBs:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+  case ARM::t2STRs:   return ARM::t2STRi12;
+  case ARM::t2STRBs:  return ARM::t2STRBi12;
+  case ARM::t2STRHs:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                               unsigned FrameReg, int &Offset,
+                               const ARMBaseInstrInfo &TII) {
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrModeT2_i12.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+  if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+    unsigned PredReg;
+    if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      // Remove offset and remaining explicit predicate operands.
+      do MI.RemoveOperand(FrameRegIdx+1);
+      while (MI.getNumOperands() > FrameRegIdx+1);
+      MachineInstrBuilder MIB(&MI);
+      AddDefaultPred(MIB);
+      return true;
+    }
+
+    bool HasCCOut = Opcode != ARM::t2ADDri12;
+
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(ARM::t2SUBri));
+    } else {
+      MI.setDesc(TII.get(ARM::t2ADDri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      // Add cc_out operand if the original instruction did not have one.
+      if (!HasCCOut)
+        MI.addOperand(MachineOperand::CreateReg(0, false));
+      Offset = 0;
+      return true;
+    }
+    // Another common case: imm12.
+    if (Offset < 4096 &&
+        (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
+      unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+      MI.setDesc(TII.get(NewOpc));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      // Remove the cc_out operand.
+      if (HasCCOut)
+        MI.RemoveOperand(MI.getNumOperands()-1);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, extract 8 adjacent bits from the immediate into this
+    // t2ADDri/t2SUBri.
+    unsigned RotAmt = CountLeadingZeros_32(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+    // Add cc_out operand if the original instruction did not have one.
+    if (!HasCCOut)
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+
+  } else {
+
+    // AddrMode4 and AddrMode6 cannot handle any offset.
+    if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+      return false;
+
+    // AddrModeT2_so cannot handle any offset. If there is no offset
+    // register then we change to an immediate version.
+    unsigned NewOpc = Opcode;
+    if (AddrMode == ARMII::AddrModeT2_so) {
+      unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+      if (OffsetReg != 0) {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        return Offset == 0;
+      }
+
+      MI.RemoveOperand(FrameRegIdx+1);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+      NewOpc = immediateOffsetOpcode(Opcode);
+      AddrMode = ARMII::AddrModeT2_i12;
+    }
+
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+      // i8 supports only negative, and i12 supports only positive, so
+      // based on Offset sign convert Opcode to the appropriate
+      // instruction
+      Offset += MI.getOperand(FrameRegIdx+1).getImm();
+      if (Offset < 0) {
+        NewOpc = negativeOffsetOpcode(Opcode);
+        NumBits = 8;
+        isSub = true;
+        Offset = -Offset;
+      } else {
+        NewOpc = positiveOffsetOpcode(Opcode);
+        NumBits = 12;
+      }
+    } else if (AddrMode == ARMII::AddrMode5) {
+      // VFP address mode.
+      const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+      int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+      if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      Offset += InstrOffs * 4;
+      assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+      if (Offset < 0) {
+        Offset = -Offset;
+        isSub = true;
+      }
+    } else {
+      llvm_unreachable("Unsupported addressing mode!");
+    }
+
+    if (NewOpc != Opcode)
+      MI.setDesc(TII.get(NewOpc));
+
+    MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+    // Attempt to fold address computation
+    // Common case: small offset, fits into instruction.
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with fp/sp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      if (isSub) {
+        if (AddrMode == ARMII::AddrMode5)
+          // FIXME: Not consistent.
+          ImmedOffset |= 1 << NumBits;
+        else
+          ImmedOffset = -ImmedOffset;
+      }
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, offset doesn't fit. Pull in what we can to simplify
+    ImmedOffset = ImmedOffset & Mask;
+    if (isSub) {
+      if (AddrMode == ARMII::AddrMode5)
+        // FIXME: Not consistent.
+        ImmedOffset |= 1 << NumBits;
+      else {
+        ImmedOffset = -ImmedOffset;
+        if (ImmedOffset == 0)
+          // Change the opcode back if the encoded offset is zero.
+          MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+      }
+    }
+    ImmOp.ChangeToImmediate(ImmedOffset);
+    Offset &= ~(Mask*Scale);
+  }
+
+  Offset = (isSub) ? -Offset : Offset;
+  return Offset == 0;
+}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+                                       MachineInstr *UseMI,
+                                       const TargetRegisterInfo &TRI) const {
+  if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
+    return;
+
+  unsigned PredReg = 0;
+  ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+    return;
+
+  // Schedule the copy so it doesn't come between previous instructions
+  // and UseMI which can form an IT block.
+  unsigned SrcReg = SrcMI->getOperand(1).getReg();
+  ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+  MachineBasicBlock *MBB = UseMI->getParent();
+  MachineBasicBlock::iterator MBBI = SrcMI;
+  unsigned NumInsts = 0;
+  while (--MBBI != MBB->begin()) {
+    if (MBBI->isDebugValue())
+      continue;
+
+    MachineInstr *NMI = &*MBBI;
+    ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+    if (!(NCC == CC || NCC == OCC) ||
+        NMI->modifiesRegister(SrcReg, &TRI) ||
+        NMI->definesRegister(ARM::CPSR))
+      break;
+    if (++NumInsts == 4)
+      // Too many in a row!
+      return;
+  }
+
+  if (NumInsts) {
+    MBB->remove(SrcMI);
+    MBB->insert(++MBBI, SrcMI);
+  }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+    return ARMCC::AL;
+  return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 000000000000..f2637d7fbcab
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,78 @@
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+  Thumb2RegisterInfo RI;
+public:
+  explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                               MachineBasicBlock *NewDest) const;
+
+  bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI) const;
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+  /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+  /// two-addrss instruction inserted by two-address pass.
+  void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+                             const TargetRegisterInfo &TRI) const;
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 000000000000..355c3bf0352c
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,52 @@
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      DebugLoc dl,
+                                      unsigned DestReg, unsigned SubIdx,
+                                      int Val,
+                                      ARMCC::CondCodes Pred, unsigned PredReg,
+                                      unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  const Constant *C = ConstantInt::get(
+           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+    .setMIFlags(MIFlags);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 000000000000..824378aeab4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,43 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         DebugLoc dl,
+                         unsigned DestReg, unsigned SubIdx, int Val,
+                         ARMCC::CondCodes Pred = ARMCC::AL,
+                         unsigned PredReg = 0,
+                         unsigned MIFlags = MachineInstr::NoFlags) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 000000000000..c741a6e8a5b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,881 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "t2-reduce-size"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+                                cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+                                     cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+                                     cl::init(-1), cl::Hidden);
+
+namespace {
+  /// ReduceTable - A static table with information on mapping from wide
+  /// opcodes to narrow
+  struct ReduceEntry {
+    unsigned WideOpc;      // Wide opcode
+    unsigned NarrowOpc1;   // Narrow opcode to transform to
+    unsigned NarrowOpc2;   // Narrow opcode when it's two-address
+    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
+    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
+    unsigned LowRegs1 : 1; // Only possible if low-registers are used
+    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+    unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
+                           // 1 - No cc field.
+                           // 2 - Always set CPSR.
+    unsigned PredCC2  : 2;
+    unsigned PartFlag : 1; // 16-bit instruction does partial flag update
+    unsigned Special  : 1; // Needs to be dealt with specially
+  };
+
+  static const ReduceEntry ReduceTable[] = {
+    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
+    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
+    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,1 },
+    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
+    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
+    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
+    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
+    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 1,0 },
+    //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+    //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0,1 },
+    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 1,0 },
+    // FIXME: adr.n immediate offset must be multiple of 4.
+    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 1,0 },
+    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
+    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 1,0 },
+    // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+    // likely to cause issue in the loop. As a size / performance workaround,
+    // they are not marked as such.
+    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
+    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
+    // FIXME: Do we need the 16-bit 'S' variant?
+    { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,    0,   0,  1,0, 0,0 },
+    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
+    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 0,1 },
+    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0,0 },
+    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0,0 },
+    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0,0 },
+    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0,0 },
+    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0,0 },
+
+    // FIXME: Clean this up after splitting each Thumb load / store opcode
+    // into multiple ones.
+    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+
+    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 0,1 },
+    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 0,1 },
+    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 0,1 },
+    // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 0,1 },
+    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 0,1 },
+  };
+
+  class Thumb2SizeReduce : public MachineFunctionPass {
+  public:
+    static char ID;
+    Thumb2SizeReduce();
+
+    const Thumb2InstrInfo *TII;
+    const ARMSubtarget *STI;
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "Thumb2 instruction size reduction pass";
+    }
+
+  private:
+    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+
+    bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+                         bool is2Addr, ARMCC::CondCodes Pred,
+                         bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+    bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+                         const ReduceEntry &Entry);
+
+    bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+                       const ReduceEntry &Entry, bool LiveCPSR,
+                       MachineInstr *CPSRDef);
+
+    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+    /// instruction.
+    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+                       const ReduceEntry &Entry,
+                       bool LiveCPSR, MachineInstr *CPSRDef);
+
+    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+    /// non-two-address instruction.
+    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+                        const ReduceEntry &Entry,
+                        bool LiveCPSR, MachineInstr *CPSRDef);
+
+    /// ReduceMBB - Reduce width of instructions in the specified basic block.
+    bool ReduceMBB(MachineBasicBlock &MBB);
+  };
+  char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+    unsigned FromOpc = ReduceTable[i].WideOpc;
+    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+      assert(false && "Duplicated entries?");
+  }
+}
+
+static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
+  for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+    if (*Regs == ARM::CPSR)
+      return true;
+  return false;
+}
+
+/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
+/// the 's' 16-bit instruction partially update CPSR. Abort the
+/// transformation to avoid adding false dependency on last CPSR setting
+/// instruction which hurts the ability for out-of-order execution engine
+/// to do register renaming magic.
+/// This function checks if there is a read-of-write dependency between the
+/// last instruction that defines the CPSR and the current instruction. If there
+/// is, then there is no harm done since the instruction cannot be retired
+/// before the CPSR setting instruction anyway.
+/// Note, we are not doing full dependency analysis here for the sake of compile
+/// time. We're not looking for cases like:
+/// r0 = muls ...
+/// r1 = add.w r0, ...
+/// ...
+///    = mul.w r1
+/// In this case it would have been ok to narrow the mul.w to muls since there
+/// are indirect RAW dependency between the muls and the mul.w
+bool
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
+  if (!Def || !STI->avoidCPSRPartialUpdate())
+    return false;
+
+  SmallSet<unsigned, 2> Defs;
+  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = Def->getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || Reg == ARM::CPSR)
+      continue;
+    Defs.insert(Reg);
+  }
+
+  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = Use->getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Defs.count(Reg))
+      return false;
+  }
+
+  // No read-after-write dependency. The narrowing will add false dependency.
+  return true;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+                                  bool is2Addr, ARMCC::CondCodes Pred,
+                                  bool LiveCPSR, bool &HasCC, bool &CCDead) {
+  if ((is2Addr  && Entry.PredCC2 == 0) ||
+      (!is2Addr && Entry.PredCC1 == 0)) {
+    if (Pred == ARMCC::AL) {
+      // Not predicated, must set CPSR.
+      if (!HasCC) {
+        // Original instruction was not setting CPSR, but CPSR is not
+        // currently live anyway. It's ok to set it. The CPSR def is
+        // dead though.
+        if (!LiveCPSR) {
+          HasCC = true;
+          CCDead = true;
+          return true;
+        }
+        return false;
+      }
+    } else {
+      // Predicated, must not set CPSR.
+      if (HasCC)
+        return false;
+    }
+  } else if ((is2Addr  && Entry.PredCC2 == 2) ||
+             (!is2Addr && Entry.PredCC1 == 2)) {
+    /// Old opcode has an optional def of CPSR.
+    if (HasCC)
+      return true;
+    // If old opcode does not implicitly define CPSR, then it's not ok since
+    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
+    if (!HasImplicitCPSRDef(MI->getDesc()))
+      return false;
+    HasCC = true;
+  } else {
+    // 16-bit instruction does not set CPSR.
+    if (HasCC)
+      return false;
+  }
+
+  return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
+                 Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
+                 Opc == ARM::t2LDMDB_UPD);
+  bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+  bool isSPOk = isPCOk || isLROk;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || Reg == ARM::CPSR)
+      continue;
+    if (isPCOk && Reg == ARM::PC)
+      continue;
+    if (isLROk && Reg == ARM::LR)
+      continue;
+    if (Reg == ARM::SP) {
+      if (isSPOk)
+        continue;
+      if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+        // Special case for these ldr / str with sp as base register.
+        continue;
+    }
+    if (!isARMLowRegister(Reg))
+      return false;
+  }
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+                                  const ReduceEntry &Entry) {
+  if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+    return false;
+
+  unsigned Scale = 1;
+  bool HasImmOffset = false;
+  bool HasShift = false;
+  bool HasOffReg = true;
+  bool isLdStMul = false;
+  unsigned Opc = Entry.NarrowOpc1;
+  unsigned OpNum = 3; // First 'rest' of operands.
+  uint8_t  ImmLimit = Entry.Imm1Limit;
+
+  switch (Entry.WideOpc) {
+  default:
+    llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+  case ARM::t2LDRi12:
+  case ARM::t2STRi12:
+    if (MI->getOperand(1).getReg() == ARM::SP) {
+      Opc = Entry.NarrowOpc2;
+      ImmLimit = Entry.Imm2Limit;
+      HasOffReg = false;
+    }
+
+    Scale = 4;
+    HasImmOffset = true;
+    HasOffReg = false;
+    break;
+  case ARM::t2LDRBi12:
+  case ARM::t2STRBi12:
+    HasImmOffset = true;
+    HasOffReg = false;
+    break;
+  case ARM::t2LDRHi12:
+  case ARM::t2STRHi12:
+    Scale = 2;
+    HasImmOffset = true;
+    HasOffReg = false;
+    break;
+  case ARM::t2LDRs:
+  case ARM::t2LDRBs:
+  case ARM::t2LDRHs:
+  case ARM::t2LDRSBs:
+  case ARM::t2LDRSHs:
+  case ARM::t2STRs:
+  case ARM::t2STRBs:
+  case ARM::t2STRHs:
+    HasShift = true;
+    OpNum = 4;
+    break;
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB: {
+    unsigned BaseReg = MI->getOperand(0).getReg();
+    if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
+      return false;
+
+    // For the non-writeback version (this one), the base register must be
+    // one of the registers being loaded.
+    bool isOK = false;
+    for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+      if (MI->getOperand(i).getReg() == BaseReg) {
+        isOK = true;
+        break;
+      }
+    }
+
+    if (!isOK)
+      return false;
+
+    OpNum = 0;
+    isLdStMul = true;
+    break;
+  }
+  case ARM::t2LDMIA_RET: {
+    unsigned BaseReg = MI->getOperand(1).getReg();
+    if (BaseReg != ARM::SP)
+      return false;
+    Opc = Entry.NarrowOpc2; // tPOP_RET
+    OpNum = 2;
+    isLdStMul = true;
+    break;
+  }
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD: {
+    OpNum = 0;
+
+    unsigned BaseReg = MI->getOperand(1).getReg();
+    if (BaseReg == ARM::SP &&
+        (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+         Entry.WideOpc == ARM::t2STMDB_UPD)) {
+      Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+      OpNum = 2;
+    } else if (!isARMLowRegister(BaseReg) ||
+               (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+                Entry.WideOpc != ARM::t2STMIA_UPD)) {
+      return false;
+    }
+
+    isLdStMul = true;
+    break;
+  }
+  }
+
+  unsigned OffsetReg = 0;
+  bool OffsetKill = false;
+  if (HasShift) {
+    OffsetReg  = MI->getOperand(2).getReg();
+    OffsetKill = MI->getOperand(2).isKill();
+
+    if (MI->getOperand(3).getImm())
+      // Thumb1 addressing mode doesn't support shift.
+      return false;
+  }
+
+  unsigned OffsetImm = 0;
+  if (HasImmOffset) {
+    OffsetImm = MI->getOperand(2).getImm();
+    unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
+
+    if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
+      // Make sure the immediate field fits.
+      return false;
+  }
+
+  // Add the 16-bit load / store instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+  if (!isLdStMul) {
+    MIB.addOperand(MI->getOperand(0));
+    MIB.addOperand(MI->getOperand(1));
+
+    if (HasImmOffset)
+      MIB.addImm(OffsetImm / Scale);
+
+    assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+    if (HasOffReg)
+      MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+  }
+
+  // Transfer the rest of operands.
+  for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+    MIB.addOperand(MI->getOperand(OpNum));
+
+  // Transfer memoperands.
+  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++NumLdSts;
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+                                const ReduceEntry &Entry,
+                                bool LiveCPSR, MachineInstr *CPSRDef) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::t2ADDri) {
+    // If the source register is SP, try to reduce to tADDrSPi, otherwise
+    // it's a normal reduce.
+    if (MI->getOperand(1).getReg() != ARM::SP) {
+      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+        return true;
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+    }
+    // Try to reduce to tADDrSPi.
+    unsigned Imm = MI->getOperand(2).getImm();
+    // The immediate must be in range, the destination register must be a low
+    // reg, the predicate must be "always" and the condition flags must not
+    // be being set.
+    if (Imm & 3 || Imm > 1020)
+      return false;
+    if (!isARMLowRegister(MI->getOperand(0).getReg()))
+      return false;
+    if (MI->getOperand(3).getImm() != ARMCC::AL)
+      return false;
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.hasOptionalDef() &&
+        MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
+      return false;
+
+    MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+                                      TII->get(ARM::tADDrSPi))
+      .addOperand(MI->getOperand(0))
+      .addOperand(MI->getOperand(1))
+      .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+
+    // Transfer MI flags.
+    MIB.setMIFlags(MI->getFlags());
+
+    DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
+
+    MBB.erase(MI);
+    ++NumNarrows;
+    return true;
+  }
+
+  if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+    return false;
+
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.mayLoad() || MCID.mayStore())
+    return ReduceLoadStore(MBB, MI, Entry);
+
+  switch (Opc) {
+  default: break;
+  case ARM::t2ADDSri:
+  case ARM::t2ADDSrr: {
+    unsigned PredReg = 0;
+    if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+      switch (Opc) {
+      default: break;
+      case ARM::t2ADDSri: {
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+          return true;
+        // fallthrough
+      }
+      case ARM::t2ADDSrr:
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+      }
+    }
+    break;
+  }
+  case ARM::t2RSBri:
+  case ARM::t2RSBSri:
+    if (MI->getOperand(2).getImm() == 0)
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+    break;
+  case ARM::t2MOVi16:
+    // Can convert only 'pure' immediate operands, not immediates obtained as
+    // globals' addresses.
+    if (MI->getOperand(1).isImm())
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+    break;
+  case ARM::t2CMPrr: {
+    // Try to reduce to the lo-reg only version first. Why there are two
+    // versions of the instruction is a mystery.
+    // It would be nice to just have two entries in the master table that
+    // are prioritized, but the table assumes a unique entry for each
+    // source insn opcode. So for now, we hack a local entry record to use.
+    static const ReduceEntry NarrowEntry =
+      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
+      return true;
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+  }
+  }
+  return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+                                const ReduceEntry &Entry,
+                                bool LiveCPSR, MachineInstr *CPSRDef) {
+
+  if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+    return false;
+
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+  if (Reg0 != Reg1) {
+    // Try to commute the operands to make it a 2-address instruction.
+    unsigned CommOpIdx1, CommOpIdx2;
+    if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+        CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+      return false;
+    MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+    if (!CommutedMI)
+      return false;
+  }
+  if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+    return false;
+  if (Entry.Imm2Limit) {
+    unsigned Imm = MI->getOperand(2).getImm();
+    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+    if (Imm > Limit)
+      return false;
+  } else {
+    unsigned Reg2 = MI->getOperand(2).getReg();
+    if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+      return false;
+  }
+
+  // Check if it's possible / necessary to transfer the predicate.
+  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  bool SkipPred = false;
+  if (Pred != ARMCC::AL) {
+    if (!NewMCID.isPredicable())
+      // Can't transfer predicate, fail.
+      return false;
+  } else {
+    SkipPred = !NewMCID.isPredicable();
+  }
+
+  bool HasCC = false;
+  bool CCDead = false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.hasOptionalDef()) {
+    unsigned NumOps = MCID.getNumOperands();
+    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+    if (HasCC && MI->getOperand(NumOps-1).isDead())
+      CCDead = true;
+  }
+  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+    return false;
+
+  // Avoid adding a false dependency on partial flag update by some 16-bit
+  // instructions which has the 's' bit set.
+  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+      canAddPseudoFlagDep(CPSRDef, MI))
+    return false;
+
+  // Add the 16-bit instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+  MIB.addOperand(MI->getOperand(0));
+  if (NewMCID.hasOptionalDef()) {
+    if (HasCC)
+      AddDefaultT1CC(MIB, CCDead);
+    else
+      AddNoT1CC(MIB);
+  }
+
+  // Transfer the rest of operands.
+  unsigned NumOps = MCID.getNumOperands();
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+      continue;
+    if (SkipPred && MCID.OpInfo[i].isPredicate())
+      continue;
+    MIB.addOperand(MI->getOperand(i));
+  }
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++Num2Addrs;
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+                                 const ReduceEntry &Entry,
+                                 bool LiveCPSR, MachineInstr *CPSRDef) {
+  if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+    return false;
+
+  unsigned Limit = ~0U;
+  if (Entry.Imm1Limit)
+    Limit = (1 << Entry.Imm1Limit) - 1;
+
+  const MCInstrDesc &MCID = MI->getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+    if (MCID.OpInfo[i].isPredicate())
+      continue;
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg || Reg == ARM::CPSR)
+        continue;
+      if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+        return false;
+    } else if (MO.isImm() &&
+               !MCID.OpInfo[i].isPredicate()) {
+      if (((unsigned)MO.getImm()) > Limit)
+        return false;
+    }
+  }
+
+  // Check if it's possible / necessary to transfer the predicate.
+  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  bool SkipPred = false;
+  if (Pred != ARMCC::AL) {
+    if (!NewMCID.isPredicable())
+      // Can't transfer predicate, fail.
+      return false;
+  } else {
+    SkipPred = !NewMCID.isPredicable();
+  }
+
+  bool HasCC = false;
+  bool CCDead = false;
+  if (MCID.hasOptionalDef()) {
+    unsigned NumOps = MCID.getNumOperands();
+    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+    if (HasCC && MI->getOperand(NumOps-1).isDead())
+      CCDead = true;
+  }
+  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+    return false;
+
+  // Avoid adding a false dependency on partial flag update by some 16-bit
+  // instructions which has the 's' bit set.
+  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
+      canAddPseudoFlagDep(CPSRDef, MI))
+    return false;
+
+  // Add the 16-bit instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+  MIB.addOperand(MI->getOperand(0));
+  if (NewMCID.hasOptionalDef()) {
+    if (HasCC)
+      AddDefaultT1CC(MIB, CCDead);
+    else
+      AddNoT1CC(MIB);
+  }
+
+  // Transfer the rest of operands.
+  unsigned NumOps = MCID.getNumOperands();
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
+      continue;
+    if ((MCID.getOpcode() == ARM::t2RSBSri ||
+         MCID.getOpcode() == ARM::t2RSBri) && i == 2)
+      // Skip the zero immediate operand, it's now implicit.
+      continue;
+    bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
+    if (SkipPred && isPred)
+        continue;
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+      // Skip implicit def of CPSR. Either it's modeled as an optional
+      // def now or it's already an implicit def on the new instruction.
+      continue;
+    MIB.addOperand(MO);
+  }
+  if (!MCID.isPredicable() && NewMCID.isPredicable())
+    AddDefaultPred(MIB);
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++NumNarrows;
+  return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
+  bool HasDef = false;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isUse())
+      continue;
+    if (MO.getReg() != ARM::CPSR)
+      continue;
+
+    DefCPSR = true;
+    if (!MO.isDead())
+      HasDef = true;
+  }
+
+  return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isDef())
+      continue;
+    if (MO.getReg() != ARM::CPSR)
+      continue;
+    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+    if (MO.isKill()) {
+      LiveCPSR = false;
+      break;
+    }
+  }
+
+  return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  // Yes, CPSR could be livein.
+  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+  MachineInstr *CPSRDef = 0;
+
+  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMII;
+  for (; MII != E; MII = NextMII) {
+    NextMII = llvm::next(MII);
+
+    MachineInstr *MI = &*MII;
+    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+    unsigned Opcode = MI->getOpcode();
+    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+    if (OPI != ReduceOpcodeMap.end()) {
+      const ReduceEntry &Entry = ReduceTable[OPI->second];
+      // Ignore "special" cases for now.
+      if (Entry.Special) {
+        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+          Modified = true;
+          MachineBasicBlock::iterator I = prior(NextMII);
+          MI = &*I;
+        }
+        goto ProcessNext;
+      }
+
+      // Try to transform to a 16-bit two-address instruction.
+      if (Entry.NarrowOpc2 &&
+          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+        Modified = true;
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+        goto ProcessNext;
+      }
+
+      // Try to transform to a 16-bit non-two-address instruction.
+      if (Entry.NarrowOpc1 &&
+          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+        Modified = true;
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+      }
+    }
+
+  ProcessNext:
+    bool DefCPSR = false;
+    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
+    if (MI->getDesc().isCall())
+      // Calls don't really set CPSR.
+      CPSRDef = 0;
+    else if (DefCPSR)
+      // This is the last CPSR defining instruction.
+      CPSRDef = MI;
+  }
+
+  return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+  const TargetMachine &TM = MF.getTarget();
+  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+  STI = &TM.getSubtarget<ARMSubtarget>();
+
+  bool Modified = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Modified |= ReduceMBB(*I);
+  return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass() {
+  return new Thumb2SizeReduce();
+}
diff --git a/contrib/llvm/lib/Target/Alpha/Alpha.h b/contrib/llvm/lib/Target/Alpha/Alpha.h
new file mode 100644
index 000000000000..6ffaf45f4ed1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/Alpha.h
@@ -0,0 +1,43 @@
+//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Alpha back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ALPHA_H
+#define TARGET_ALPHA_H
+
+#include "MCTargetDesc/AlphaMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  namespace Alpha {
+    // These describe LDAx
+
+    static const int IMM_LOW  = -32768;
+    static const int IMM_HIGH = 32767;
+    static const int IMM_MULT = 65536;
+  }
+
+  class AlphaTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
+  FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
+  FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+                                              JITCodeEmitter &JCE);
+  FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
+  FunctionPass *createAlphaBranchSelectionPass();
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/Alpha.td b/contrib/llvm/lib/Target/Alpha/Alpha.td
new file mode 100644
index 000000000000..ae79c2e4b70e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/Alpha.td
@@ -0,0 +1,68 @@
+//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//Alpha is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+                                  "Enable CIX extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrInfo.td"
+
+def AlphaInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Alpha Processor Definitions
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", Alpha21264Itineraries, []>;
+def : Processor<"ev6"    , Alpha21264Itineraries, []>;
+def : Processor<"ev67"   , Alpha21264Itineraries, [FeatureCIX]>;
+
+//===----------------------------------------------------------------------===//
+// The Alpha Target
+//===----------------------------------------------------------------------===//
+
+
+def Alpha : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = AlphaInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
new file mode 100644
index 000000000000..46ae286895a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct AlphaAsmPrinter : public AsmPrinter {
+    /// Unique incrementer for label values for referencing Global values.
+    ///
+
+    explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+      : AsmPrinter(tm, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Alpha Assembly Printer";
+    }
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printOp(const MachineOperand &MO, raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd(); 
+    void EmitStartOfAsmFile(Module &M);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI,
+                               unsigned OpNo, unsigned AsmVariant,
+                               const char *ExtraCode, raw_ostream &O);
+  };
+} // end of anonymous namespace
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isReg()) {
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Not physreg??");
+    O << getRegisterName(MO.getReg());
+  } else if (MO.isImm()) {
+    O << MO.getImm();
+    assert(MO.getImm() < (1 << 30));
+  } else {
+    printOp(MO, O);
+  }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    return;
+
+  case MachineOperand::MO_Immediate:
+    assert(0 && "printOp() does not handle immediate values");
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    return;
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    return;
+
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    return;
+
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyStart() {
+  OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyEnd() {
+  OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
+}
+
+void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
+  OutStreamer.EmitRawText(StringRef("\t.set noat"));
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,
+                                      const char *ExtraCode, raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  O << "0(";
+  printOperand(MI, OpNo, O);
+  O << ")";
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaAsmPrinter() { 
+  RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp b/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp
new file mode 100644
index 000000000000..376811709536
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -0,0 +1,66 @@
+//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+namespace {
+  struct AlphaBSel : public MachineFunctionPass {
+    static char ID;
+    AlphaBSel() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Alpha Branch Selection";
+    }
+  };
+  char AlphaBSel::ID = 0;
+}
+
+/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createAlphaBranchSelectionPass() {
+  return new AlphaBSel();
+}
+
+bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
+
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+         MBBI != EE; ++MBBI) {
+      if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
+          MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
+        
+        // condbranch operands:
+        // 0. bc opcode
+        // 1. reg
+        // 2. target MBB
+        const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+        MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
+      }
+    }
+  }
+  
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td b/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td
new file mode 100644
index 000000000000..bde8819f46e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaCallingConv.td
@@ -0,0 +1,38 @@
+//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Alpha architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Alpha Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_Alpha : CallingConv<[
+  // i64 is returned in register R0
+  // R1 is an llvm extension, I don't know what gcc does
+  CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
+
+  // f32 / f64 are returned in F0/F1
+  CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Alpha Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_Alpha : CallingConv<[
+  // The first 6 arguments are passed in registers, whether integer or
+  // floating-point
+  CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
+                                          [F16, F17, F18, F19, F20, F21]>>,
+
+  CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
+                                               [R16, R17, R18, R19, R20, R21]>>,
+
+  // Stack slots are 8 bytes in size and 8-byte aligned.
+  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
new file mode 100644
index 000000000000..690cd1da9c1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
@@ -0,0 +1,143 @@
+//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaFrameLowering.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+static long getUpper16(long l) {
+  long y = l / Alpha::IMM_MULT;
+  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+    ++y;
+  return y;
+}
+
+static long getLower16(long l) {
+  long h = getUpper16(l);
+  return l - h * Alpha::IMM_MULT;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->hasVarSizedObjects();
+}
+
+void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
+  bool FP = hasFP(MF);
+
+  // Handle GOP offset
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
+
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+    .addGlobalAddress(MF.getFunction());
+
+  // Get the number of bytes to allocate from the FrameInfo
+  long NumBytes = MFI->getStackSize();
+
+  if (FP)
+    NumBytes += 8; //reserve space for the old FP
+
+  // Do we need to allocate space on the stack?
+  if (NumBytes == 0) return;
+
+  unsigned Align = getStackAlignment();
+  NumBytes = (NumBytes+Align-1)/Align*Align;
+
+  // Update frame info to pretend that this is part of the stack...
+  MFI->setStackSize(NumBytes);
+
+  // adjust stack pointer: r30 -= numbytes
+  NumBytes = -NumBytes;
+  if (NumBytes >= Alpha::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+      .addReg(Alpha::R30);
+  } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+      .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+      .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+  } else {
+    report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+  }
+
+  // Now if we need to, save the old FP and set the new
+  if (FP) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+      .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+    // This must be the last instr in the prolog
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+      .addReg(Alpha::R30).addReg(Alpha::R30);
+  }
+
+}
+
+void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  assert((MBBI->getOpcode() == Alpha::RETDAG ||
+          MBBI->getOpcode() == Alpha::RETDAGp)
+         && "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  bool FP = hasFP(MF);
+
+  // Get the number of bytes allocated from the FrameInfo...
+  long NumBytes = MFI->getStackSize();
+
+  //now if we need to, restore the old FP
+  if (FP) {
+    //copy the FP into the SP (discards allocas)
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+      .addReg(Alpha::R15);
+    //restore the FP
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+      .addImm(0).addReg(Alpha::R15);
+  }
+
+  if (NumBytes != 0) {
+    if (NumBytes <= Alpha::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+        .addReg(Alpha::R30);
+    } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+        .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+        .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+    } else {
+      report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
new file mode 100644
index 000000000000..ebd9e1bac190
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
@@ -0,0 +1,43 @@
+//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Alpha.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class AlphaSubtarget;
+
+class AlphaFrameLowering : public TargetFrameLowering {
+  const AlphaSubtarget &STI;
+  // FIXME: This should end in MachineFunctionInfo, not here!
+  mutable int curgpdist;
+public:
+  explicit AlphaFrameLowering(const AlphaSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
new file mode 100644
index 000000000000..7b91fea54af4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -0,0 +1,426 @@
+//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Alpha,
+// converting from a legalized dag to a Alpha dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+  //===--------------------------------------------------------------------===//
+  /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
+  /// instructions for SelectionDAG operations.
+  class AlphaDAGToDAGISel : public SelectionDAGISel {
+    static const int64_t IMM_LOW  = -32768;
+    static const int64_t IMM_HIGH = 32767;
+    static const int64_t IMM_MULT = 65536;
+    static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+    static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW  * IMM_MULT;
+
+    static int64_t get_ldah16(int64_t x) {
+      int64_t y = x / IMM_MULT;
+      if (x % IMM_MULT > IMM_HIGH)
+        ++y;
+      return y;
+    }
+
+    static int64_t get_lda16(int64_t x) {
+      return x - get_ldah16(x) * IMM_MULT;
+    }
+
+    /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+    /// instruction (if not, return 0).  Note that this code accepts partial
+    /// zap masks.  For example (and LHS, 1) is a valid zap, as long we know
+    /// that the bits 1-7 of LHS are already zero.  If LHS is non-null, we are
+    /// in checking mode.  If LHS is null, we assume that the mask has already
+    /// been validated before.
+    uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
+      uint64_t BitsToCheck = 0;
+      unsigned Result = 0;
+      for (unsigned i = 0; i != 8; ++i) {
+        if (((Constant >> 8*i) & 0xFF) == 0) {
+          // nothing to do.
+        } else {
+          Result |= 1 << i;
+          if (((Constant >> 8*i) & 0xFF) == 0xFF) {
+            // If the entire byte is set, zapnot the byte.
+          } else if (LHS.getNode() == 0) {
+            // Otherwise, if the mask was previously validated, we know its okay
+            // to zapnot this entire byte even though all the bits aren't set.
+          } else {
+            // Otherwise we don't know that the it's okay to zapnot this entire
+            // byte.  Only do this iff we can prove that the missing bits are
+            // already null, so the bytezap doesn't need to really null them.
+            BitsToCheck |= ~Constant & (0xFF << 8*i);
+          }
+        }
+      }
+      
+      // If there are missing bits in a byte (for example, X & 0xEF00), check to
+      // see if the missing bits (0x1000) are already known zero if not, the zap
+      // isn't okay to do, as it won't clear all the required bits.
+      if (BitsToCheck &&
+          !CurDAG->MaskedValueIsZero(LHS,
+                                     APInt(LHS.getValueSizeInBits(),
+                                           BitsToCheck)))
+        return 0;
+      
+      return Result;
+    }
+    
+    static uint64_t get_zapImm(uint64_t x) {
+      unsigned build = 0;
+      for(int i = 0; i != 8; ++i) {
+        if ((x & 0x00FF) == 0x00FF)
+          build |= 1 << i;
+        else if ((x & 0x00FF) != 0)
+          return 0;
+        x >>= 8;
+      }
+      return build;
+    }
+      
+    
+    static uint64_t getNearPower2(uint64_t x) {
+      if (!x) return 0;
+      unsigned at = CountLeadingZeros_64(x);
+      uint64_t complow = 1ULL << (63 - at);
+      uint64_t comphigh = 1ULL << (64 - at);
+      //cerr << x << ":" << complow << ":" << comphigh << "\n";
+      if (abs64(complow - x) <= abs64(comphigh - x))
+        return complow;
+      else
+        return comphigh;
+    }
+
+    static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+      uint64_t y = getNearPower2(x);
+      if (swap)
+        return (y - x) == r;
+      else
+        return (x - y) == r;
+    }
+
+  public:
+    explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
+      : SelectionDAGISel(TM)
+    {}
+
+    /// getI64Imm - Return a target constant with the specified value, of type
+    /// i64.
+    inline SDValue getI64Imm(int64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i64);
+    }
+
+    // Select - Convert the specified operand from a target-independent to a
+    // target-specific node if it hasn't already been changed.
+    SDNode *Select(SDNode *N);
+    
+    virtual const char *getPassName() const {
+      return "Alpha DAG->DAG Pattern Instruction Selection";
+    } 
+
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.
+    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps) {
+      SDValue Op0;
+      switch (ConstraintCode) {
+      default: return true;
+      case 'm':   // memory
+        Op0 = Op;
+        break;
+      }
+      
+      OutOps.push_back(Op0);
+      return false;
+    }
+    
+// Include the pieces autogenerated from the target description.
+#include "AlphaGenDAGISel.inc"
+    
+private:
+    /// getTargetMachine - Return a reference to the TargetMachine, casted
+    /// to the target-specific type.
+    const AlphaTargetMachine &getTargetMachine() {
+      return static_cast<const AlphaTargetMachine &>(TM);
+    }
+
+    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+    /// to the target-specific type.
+    const AlphaInstrInfo *getInstrInfo() {
+      return getTargetMachine().getInstrInfo();
+    }
+
+    SDNode *getGlobalBaseReg();
+    SDNode *getGlobalRetAddr();
+    void SelectCALL(SDNode *Op);
+
+  };
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// getGlobalRetAddr - Grab the return address.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
+  unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF);
+  return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (N->getOpcode()) {
+  default: break;
+  case AlphaISD::CALL:
+    SelectCALL(N);
+    return NULL;
+
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
+                                CurDAG->getTargetFrameIndex(FI, MVT::i32),
+                                getI64Imm(0));
+  }
+  case ISD::GLOBAL_OFFSET_TABLE:
+    return getGlobalBaseReg();
+  case AlphaISD::GlobalRetAddr:
+    return getGlobalRetAddr();
+  
+  case AlphaISD::DivCall: {
+    SDValue Chain = CurDAG->getEntryNode();
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, 
+                                 SDValue(0,0));
+    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, 
+                                 Chain.getValue(1));
+    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, 
+                                 Chain.getValue(1));
+    SDNode *CNode =
+      CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue, 
+                             Chain, Chain.getValue(1));
+    Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, 
+                                   SDValue(CNode, 1));
+    return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
+  }
+
+  case ISD::READCYCLECOUNTER: {
+    SDValue Chain = N->getOperand(0);
+    return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
+                                  Chain);
+  }
+
+  case ISD::Constant: {
+    uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
+    
+    if (uval == 0) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                Alpha::R31, MVT::i64);
+      ReplaceUses(SDValue(N, 0), Result);
+      return NULL;
+    }
+
+    int64_t val = (int64_t)uval;
+    int32_t val32 = (int32_t)val;
+    if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+        val >= IMM_LOW  + IMM_LOW  * IMM_MULT)
+      break; //(LDAH (LDA))
+    if ((uval >> 32) == 0 && //empty upper bits
+        val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+      // val32 >= IMM_LOW  + IMM_LOW  * IMM_MULT) //always true
+      break; //(zext (LDAH (LDA)))
+    //Else use the constant pool
+    ConstantInt *C = ConstantInt::get(
+                                Type::getInt64Ty(*CurDAG->getContext()), uval);
+    SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+    SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI,
+                                         SDValue(getGlobalBaseReg(), 0));
+    return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, 
+                                CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
+  }
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+    bool isDouble = N->getValueType(0) == MVT::f64;
+    EVT T = isDouble ? MVT::f64 : MVT::f32;
+    if (CN->getValueAPF().isPosZero()) {
+      return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
+                                  T, CurDAG->getRegister(Alpha::F31, T),
+                                  CurDAG->getRegister(Alpha::F31, T));
+    } else if (CN->getValueAPF().isNegZero()) {
+      return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
+                                  T, CurDAG->getRegister(Alpha::F31, T),
+                                  CurDAG->getRegister(Alpha::F31, T));
+    } else {
+      report_fatal_error("Unhandled FP constant type");
+    }
+    break;
+  }
+
+  case ISD::SETCC:
+    if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
+      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+      unsigned Opc = Alpha::WTF;
+      bool rev = false;
+      bool inv = false;
+      switch(CC) {
+      default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!");
+      case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
+        Opc = Alpha::CMPTEQ; break;
+      case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: 
+        Opc = Alpha::CMPTLT; break;
+      case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE: 
+        Opc = Alpha::CMPTLE; break;
+      case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT: 
+        Opc = Alpha::CMPTLT; rev = true; break;
+      case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE: 
+        Opc = Alpha::CMPTLE; rev = true; break;
+      case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
+        Opc = Alpha::CMPTEQ; inv = true; break;
+      case ISD::SETO:
+        Opc = Alpha::CMPTUN; inv = true; break;
+      case ISD::SETUO:
+        Opc = Alpha::CMPTUN; break;
+      };
+      SDValue tmp1 = N->getOperand(rev?1:0);
+      SDValue tmp2 = N->getOperand(rev?0:1);
+      SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
+      if (inv) 
+        cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl, 
+                                     MVT::f64, SDValue(cmp, 0), 
+                                     CurDAG->getRegister(Alpha::F31, MVT::f64));
+      switch(CC) {
+      case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
+      case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
+       {
+         SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64,
+                                               tmp1, tmp2);
+         cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64, 
+                                      SDValue(cmp2, 0), SDValue(cmp, 0));
+         break;
+       }
+      default: break;
+      }
+
+      SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl,
+                                          MVT::i64, SDValue(cmp, 0));
+      return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64, 
+                                    CurDAG->getRegister(Alpha::R31, MVT::i64),
+                                    SDValue(LD,0));
+    }
+    break;
+
+  case ISD::AND: {
+    ConstantSDNode* SC = NULL;
+    ConstantSDNode* MC = NULL;
+    if (N->getOperand(0).getOpcode() == ISD::SRL &&
+        (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+        (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+      uint64_t sval = SC->getZExtValue();
+      uint64_t mval = MC->getZExtValue();
+      // If the result is a zap, let the autogened stuff handle it.
+      if (get_zapImm(N->getOperand(0), mval))
+        break;
+      // given mask X, and shift S, we want to see if there is any zap in the
+      // mask if we play around with the botton S bits
+      uint64_t dontcare = (~0ULL) >> (64 - sval);
+      uint64_t mask = mval << sval;
+      
+      if (get_zapImm(mask | dontcare))
+        mask = mask | dontcare;
+      
+      if (get_zapImm(mask)) {
+        SDValue Z = 
+          SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64,
+                                         N->getOperand(0).getOperand(0),
+                                         getI64Imm(get_zapImm(mask))), 0);
+        return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z, 
+                                      getI64Imm(sval));
+      }
+    }
+    break;
+  }
+
+  }
+
+  return SelectCode(N);
+}
+
+void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
+  //TODO: add flag stuff to prevent nondeturministic breakage!
+
+  SDValue Chain = N->getOperand(0);
+  SDValue Addr = N->getOperand(1);
+  SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
+  DebugLoc dl = N->getDebugLoc();
+
+   if (Addr.getOpcode() == AlphaISD::GPRelLo) {
+     SDValue GOT = SDValue(getGlobalBaseReg(), 0);
+     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
+     InFlag = Chain.getValue(1);
+     Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, 
+                                            MVT::Glue, Addr.getOperand(0),
+                                            Chain, InFlag), 0);
+   } else {
+     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
+     InFlag = Chain.getValue(1);
+     Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
+                                            MVT::Glue, Chain, InFlag), 0);
+   }
+   InFlag = Chain.getValue(1);
+
+  ReplaceUses(SDValue(N, 0), Chain);
+  ReplaceUses(SDValue(N, 1), InFlag);
+}
+
+
+/// createAlphaISelDag - This pass converts a legalized DAG into a 
+/// Alpha-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
+  return new AlphaDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
new file mode 100644
index 000000000000..de003fb4c65e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -0,0 +1,956 @@
+//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaISelLowering.h"
+#include "AlphaTargetMachine.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value.  It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+                          TargetRegisterClass *RC) {
+  assert(RC->contains(PReg) && "Not the correct regclass!");
+  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+  MF.getRegInfo().addLiveIn(PReg, VReg);
+  return VReg;
+}
+
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  // Set up the TargetLowering object.
+  //I am having problems with shr n i8 1
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
+  addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
+  addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  setLoadExtAction(ISD::EXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8,  Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  //  setOperationAction(ISD::BRIND,        MVT::Other,   Expand);
+  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::FREM, MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+
+  if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
+    setOperationAction(ISD::CTPOP    , MVT::i64  , Expand);
+    setOperationAction(ISD::CTTZ     , MVT::i64  , Expand);
+    setOperationAction(ISD::CTLZ     , MVT::i64  , Expand);
+  }
+  setOperationAction(ISD::BSWAP    , MVT::i64, Expand);
+  setOperationAction(ISD::ROTL     , MVT::i64, Expand);
+  setOperationAction(ISD::ROTR     , MVT::i64, Expand);
+
+  setOperationAction(ISD::SREM     , MVT::i64, Custom);
+  setOperationAction(ISD::UREM     , MVT::i64, Custom);
+  setOperationAction(ISD::SDIV     , MVT::i64, Custom);
+  setOperationAction(ISD::UDIV     , MVT::i64, Custom);
+
+  setOperationAction(ISD::ADDC     , MVT::i64, Expand);
+  setOperationAction(ISD::ADDE     , MVT::i64, Expand);
+  setOperationAction(ISD::SUBC     , MVT::i64, Expand);
+  setOperationAction(ISD::SUBE     , MVT::i64, Expand);
+
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+
+  // We don't support sin/cos/sqrt/pow
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+  setOperationAction(ISD::FPOW , MVT::f32, Expand);
+  setOperationAction(ISD::FPOW , MVT::f64, Expand);
+
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+  setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+  setOperationAction(ISD::BITCAST, MVT::f32, Promote);
+
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  // Not implemented yet.
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+  // We want to legalize GlobalAddress and ConstantPool and
+  // ExternalSymbols nodes into the appropriate instructions to
+  // materialize the address.
+  setOperationAction(ISD::GlobalAddress,  MVT::i64, Custom);
+  setOperationAction(ISD::ConstantPool,   MVT::i64, Custom);
+  setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,   MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,   MVT::i32,   Custom);
+
+  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+  setStackPointerRegisterToSaveRestore(Alpha::R30);
+
+  setJumpBufSize(272);
+  setJumpBufAlignment(16);
+
+  setMinFunctionAlignment(4);
+
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i64;
+}
+
+const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
+  case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
+  case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
+  case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
+  case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
+  case AlphaISD::RelLit: return "Alpha::RelLit";
+  case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
+  case AlphaISD::CALL:   return "Alpha::CALL";
+  case AlphaISD::DivCall: return "Alpha::DivCall";
+  case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
+  case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
+  case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
+  }
+}
+
+static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+  // FIXME there isn't really any debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, JTI,
+                             DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+  SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
+  return Lo;
+}
+
+//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
+//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
+
+//For now, just use variable size stack frame format
+
+//In a standard call, the first six items are passed in registers $16
+//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
+//of argument-to-register correspondence.) The remaining items are
+//collected in a memory argument list that is a naturally aligned
+//array of quadwords. In a standard call, this list, if present, must
+//be passed at 0(SP).
+//7 ... n         0(SP) ... (n-7)*8(SP)
+
+// //#define FP    $15
+// //#define RA    $26
+// //#define PV    $27
+// //#define GP    $29
+// //#define SP    $30
+
+#include "AlphaGenCallingConv.inc"
+
+SDValue
+AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool &isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const {
+  // Alpha target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_Alpha);
+
+    // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: assert(0 && "Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64);
+
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emitted instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                               VA.getLocVT(), InFlag).getValue(1);
+    SDValue RetValue = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+    InVals.push_back(RetValue);
+  }
+
+  return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv, bool isVarArg,
+                                          const SmallVectorImpl<ISD::InputArg>
+                                            &Ins,
+                                          DebugLoc dl, SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+  unsigned args_int[] = {
+    Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
+  unsigned args_float[] = {
+    Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
+
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    SDValue argt;
+    EVT ObjectVT = Ins[ArgNo].VT;
+    SDValue ArgVal;
+
+    if (ArgNo  < 6) {
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
+      default:
+        assert(false && "Invalid value type!");
+      case MVT::f64:
+        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+                                      &Alpha::F8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+        break;
+      case MVT::f32:
+        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+                                      &Alpha::F4RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+        break;
+      case MVT::i64:
+        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
+                                    &Alpha::GPRCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
+        break;
+      }
+    } else { //more args
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      //from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
+    }
+    InVals.push_back(ArgVal);
+  }
+
+  // If the functions takes variable number of arguments, copy all regs to stack
+  if (isVarArg) {
+    FuncInfo->setVarArgsOffset(Ins.size() * 8);
+    std::vector<SDValue> LS;
+    for (int i = 0; i < 6; ++i) {
+      if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
+        args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
+      SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
+      int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
+      if (i == 0) FuncInfo->setVarArgsBase(FI);
+      SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+                                false, false, 0));
+
+      if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
+        args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
+      argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
+      FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
+      SDFI = DAG.getFrameIndex(FI, MVT::i64);
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+                                false, false, 0));
+    }
+
+    //Set up a token factor with all the stack traffic
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
+  }
+
+  return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 DebugLoc dl, SelectionDAG &DAG) const {
+
+  SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
+                                  DAG.getNode(AlphaISD::GlobalRetAddr,
+                                              DebugLoc(), MVT::i64),
+                                  SDValue());
+  switch (Outs.size()) {
+  default:
+    llvm_unreachable("Do not know how to return this many arguments!");
+  case 0:
+    break;
+    //return SDValue(); // ret void is legal
+  case 1: {
+    EVT ArgVT = Outs[0].VT;
+    unsigned ArgReg;
+    if (ArgVT.isInteger())
+      ArgReg = Alpha::R0;
+    else {
+      assert(ArgVT.isFloatingPoint());
+      ArgReg = Alpha::F0;
+    }
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
+                            OutVals[0], Copy.getValue(1));
+    if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
+    break;
+  }
+  case 2: {
+    EVT ArgVT = Outs[0].VT;
+    unsigned ArgReg1, ArgReg2;
+    if (ArgVT.isInteger()) {
+      ArgReg1 = Alpha::R0;
+      ArgReg2 = Alpha::R1;
+    } else {
+      assert(ArgVT.isFloatingPoint());
+      ArgReg1 = Alpha::F0;
+      ArgReg2 = Alpha::F1;
+    }
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
+                            OutVals[0], Copy.getValue(1));
+    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+                  DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
+        == DAG.getMachineFunction().getRegInfo().liveout_end())
+      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
+                            OutVals[1], Copy.getValue(1));
+    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+                   DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
+        == DAG.getMachineFunction().getRegInfo().liveout_end())
+      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
+    break;
+  }
+  }
+  return DAG.getNode(AlphaISD::RET_FLAG, dl,
+                     MVT::Other, Copy, Copy.getValue(1));
+}
+
+void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
+                                     SDValue &DataPtr,
+                                     SelectionDAG &DAG) const {
+  Chain = N->getOperand(0);
+  SDValue VAListP = N->getOperand(1);
+  const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
+                             MachinePointerInfo(VAListS),
+                             false, false, 0);
+  SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+                              DAG.getConstant(8, MVT::i64));
+  SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+                                  Tmp, MachinePointerInfo(),
+                                  MVT::i32, false, false, 0);
+  DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
+  if (N->getValueType(0).isFloatingPoint())
+  {
+    //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+    SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
+                                      DAG.getConstant(8*6, MVT::i64));
+    SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
+                                DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
+    DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
+  }
+
+  SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
+                                    DAG.getConstant(8, MVT::i64));
+  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+                            MachinePointerInfo(),
+                            MVT::i32, false, false, 0);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    default: break;    // Don't custom lower most intrinsics.
+    case Intrinsic::alpha_umulh:
+      return DAG.getNode(ISD::MULHU, dl, MVT::i64,
+                         Op.getOperand(1), Op.getOperand(2));
+    }
+  }
+
+  case ISD::SRL_PARTS: {
+    SDValue ShOpLo = Op.getOperand(0);
+    SDValue ShOpHi = Op.getOperand(1);
+    SDValue ShAmt  = Op.getOperand(2);
+    SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                             DAG.getConstant(64, MVT::i64), ShAmt);
+    SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
+                                DAG.getConstant(0, MVT::i64), ISD::SETLE);
+    // if 64 - shAmt <= 0
+    SDValue Hi_Neg = DAG.getConstant(0, MVT::i64);
+    SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                                    DAG.getConstant(0, MVT::i64), bm);
+    SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
+    // else
+    SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
+    SDValue Hi_Pos =  DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
+    SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
+    Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
+    // Merge
+    SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
+    SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
+    SDValue Ops[2] = { Lo, Hi };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+    //  case ISD::SRA_PARTS:
+
+    //  case ISD::SHL_PARTS:
+
+
+  case ISD::SINT_TO_FP: {
+    assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+           "Unhandled SINT_TO_FP type in custom expander!");
+    SDValue LD;
+    bool isDouble = Op.getValueType() == MVT::f64;
+    LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+    SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
+                               isDouble?MVT::f64:MVT::f32, LD);
+    return FP;
+  }
+  case ISD::FP_TO_SINT: {
+    bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
+    SDValue src = Op.getOperand(0);
+
+    if (!isDouble) //Promote
+      src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
+
+    src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
+
+    return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
+  }
+  case ISD::ConstantPool: {
+    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+    const Constant *C = CP->getConstVal();
+    SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
+    // FIXME there isn't really any debug info here
+
+    SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, CPI,
+                               DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+    SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
+    return Lo;
+  }
+  case ISD::GlobalTLSAddress:
+    llvm_unreachable("TLS not implemented for Alpha.");
+  case ISD::GlobalAddress: {
+    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+    const GlobalValue *GV = GSDN->getGlobal();
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
+                                            GSDN->getOffset());
+    // FIXME there isn't really any debug info here
+
+    //    if (!GV->hasWeakLinkage() && !GV->isDeclaration()
+    //        && !GV->hasLinkOnceLinkage()) {
+    if (GV->hasLocalLinkage()) {
+      SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, GA,
+                                DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+      SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
+      return Lo;
+    } else
+      return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
+                         DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+  }
+  case ISD::ExternalSymbol: {
+    return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
+                       DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
+                                                   ->getSymbol(), MVT::i64),
+                       DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+  }
+
+  case ISD::UREM:
+  case ISD::SREM:
+    //Expand only on constant case
+    if (Op.getOperand(1).getOpcode() == ISD::Constant) {
+      EVT VT = Op.getNode()->getValueType(0);
+      SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
+        BuildUDIV(Op.getNode(), DAG, NULL) :
+        BuildSDIV(Op.getNode(), DAG, NULL);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
+      Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
+      return Tmp1;
+    }
+    //fall through
+  case ISD::SDIV:
+  case ISD::UDIV:
+    if (Op.getValueType().isInteger()) {
+      if (Op.getOperand(1).getOpcode() == ISD::Constant)
+        return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
+          : BuildUDIV(Op.getNode(), DAG, NULL);
+      const char* opstr = 0;
+      switch (Op.getOpcode()) {
+      case ISD::UREM: opstr = "__remqu"; break;
+      case ISD::SREM: opstr = "__remq";  break;
+      case ISD::UDIV: opstr = "__divqu"; break;
+      case ISD::SDIV: opstr = "__divq";  break;
+      }
+      SDValue Tmp1 = Op.getOperand(0),
+        Tmp2 = Op.getOperand(1),
+        Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+      return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
+    }
+    break;
+
+  case ISD::VAARG: {
+    SDValue Chain, DataPtr;
+    LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
+
+    SDValue Result;
+    if (Op.getValueType() == MVT::i32)
+      Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+                              MachinePointerInfo(), MVT::i32, false, false, 0);
+    else
+      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+                           MachinePointerInfo(),
+                           false, false, 0);
+    return Result;
+  }
+  case ISD::VACOPY: {
+    SDValue Chain = Op.getOperand(0);
+    SDValue DestP = Op.getOperand(1);
+    SDValue SrcP = Op.getOperand(2);
+    const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+    const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
+                              MachinePointerInfo(SrcS),
+                              false, false, 0);
+    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
+                                  MachinePointerInfo(DestS),
+                                  false, false, 0);
+    SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
+                               DAG.getConstant(8, MVT::i64));
+    Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+                         NP, MachinePointerInfo(), MVT::i32, false, false, 0);
+    SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
+                                DAG.getConstant(8, MVT::i64));
+    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
+                             MachinePointerInfo(), MVT::i32,
+                             false, false, 0);
+  }
+  case ISD::VASTART: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+    SDValue Chain = Op.getOperand(0);
+    SDValue VAListP = Op.getOperand(1);
+    const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+    // vastart stores the address of the VarArgsBase and VarArgsOffset
+    SDValue FR  = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
+    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP,
+                               MachinePointerInfo(VAListS), false, false, 0);
+    SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+                                DAG.getConstant(8, MVT::i64));
+    return DAG.getTruncStore(S1, dl,
+                             DAG.getConstant(FuncInfo->getVarArgsOffset(),
+                                             MVT::i64),
+                             SA2, MachinePointerInfo(),
+                             MVT::i32, false, false, 0);
+  }
+  case ISD::RETURNADDR:
+    return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
+      //FIXME: implement
+  case ISD::FRAMEADDR:          break;
+  }
+
+  return SDValue();
+}
+
+void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
+                                             SmallVectorImpl<SDValue>&Results,
+                                             SelectionDAG &DAG) const {
+  DebugLoc dl = N->getDebugLoc();
+  assert(N->getValueType(0) == MVT::i32 &&
+         N->getOpcode() == ISD::VAARG &&
+         "Unknown node to custom promote!");
+
+  SDValue Chain, DataPtr;
+  LowerVAARG(N, Chain, DataPtr, DAG);
+  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  Results.push_back(Res);
+  Results.push_back(SDValue(Res.getNode(), 1));
+}
+
+
+//Inline Asm
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AlphaTargetLowering::ConstraintType
+AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'f':
+    case 'r':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+AlphaTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'f':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, Alpha::GPRCRegisterClass);
+    case 'f':
+      return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) :
+	std::make_pair(0U, Alpha::F4RCRegisterClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  assert((MI->getOpcode() == Alpha::CAS32 ||
+          MI->getOpcode() == Alpha::CAS64 ||
+          MI->getOpcode() == Alpha::LAS32 ||
+          MI->getOpcode() == Alpha::LAS64 ||
+          MI->getOpcode() == Alpha::SWAP32 ||
+          MI->getOpcode() == Alpha::SWAP64) &&
+         "Unexpected instr type to insert");
+
+  bool is32 = MI->getOpcode() == Alpha::CAS32 ||
+    MI->getOpcode() == Alpha::LAS32 ||
+    MI->getOpcode() == Alpha::SWAP32;
+
+  //Load locked store conditional for atomic ops take on the same form
+  //start:
+  //ll
+  //do stuff (maybe branch to exit)
+  //sc
+  //test sc and maybe branck to start
+  //exit:
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  DebugLoc dl = MI->getDebugLoc();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  sinkMBB->splice(sinkMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  thisMBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  F->insert(It, llscMBB);
+  F->insert(It, sinkMBB);
+
+  BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
+
+  unsigned reg_res = MI->getOperand(0).getReg(),
+    reg_ptr = MI->getOperand(1).getReg(),
+    reg_v2 = MI->getOperand(2).getReg(),
+    reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+
+  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
+          reg_res).addImm(0).addReg(reg_ptr);
+  switch (MI->getOpcode()) {
+  case Alpha::CAS32:
+  case Alpha::CAS64: {
+    unsigned reg_cmp
+      = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+    BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
+      .addReg(reg_v2).addReg(reg_res);
+    BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+      .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
+    BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+      .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
+    break;
+  }
+  case Alpha::LAS32:
+  case Alpha::LAS64: {
+    BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
+      .addReg(reg_res).addReg(reg_v2);
+    break;
+  }
+  case Alpha::SWAP32:
+  case Alpha::SWAP64: {
+    BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+      .addReg(reg_v2).addReg(reg_v2);
+    break;
+  }
+  }
+  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
+    .addReg(reg_store).addImm(0).addReg(reg_ptr);
+  BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+    .addImm(0).addReg(reg_store).addMBB(llscMBB);
+  BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
+
+  thisMBB->addSuccessor(llscMBB);
+  llscMBB->addSuccessor(llscMBB);
+  llscMBB->addSuccessor(sinkMBB);
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+
+  return sinkMBB;
+}
+
+bool
+AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Alpha target isn't yet aware of offsets.
+  return false;
+}
+
+bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  // +0.0   F31
+  // +0.0f  F31
+  // -0.0  -F31
+  // -0.0f -F31
+  return Imm.isZero() || Imm.isNegZero();
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
new file mode 100644
index 000000000000..13383f4430f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
@@ -0,0 +1,142 @@
+//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Alpha uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "Alpha.h"
+
+namespace llvm {
+
+  namespace AlphaISD {
+    enum NodeType {
+      // Start the numbering where the builting ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+      //These corrospond to the identical Instruction
+      CVTQT_, CVTQS_, CVTTQ_,
+
+      /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+      /// parts of a global address respectively.
+      GPRelHi, GPRelLo,
+
+      /// RetLit - Literal Relocation of a Global
+      RelLit,
+
+      /// GlobalRetAddr - used to restore the return address
+      GlobalRetAddr,
+
+      /// CALL - Normal call.
+      CALL,
+
+      /// DIVCALL - used for special library calls for div and rem
+      DivCall,
+
+      /// return flag operand
+      RET_FLAG,
+
+      /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+      /// corresponds to the COND_BRANCH pseudo instruction.
+      /// *PRC is the input register to compare to zero,
+      /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
+      /// DESTBB is the destination block to branch to, and INFLAG is
+      /// an optional input flag argument.
+      COND_BRANCH_I, COND_BRANCH_F
+
+    };
+  }
+
+  class AlphaTargetLowering : public TargetLowering {
+  public:
+    explicit AlphaTargetLowering(TargetMachine &TM);
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+    /// getSetCCResultType - Get the SETCC result ValueType
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    ///
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    // Friendly names for dumps
+    const char *getTargetNodeName(unsigned Opcode) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+				 EVT VT) const;
+
+    MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *BB) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+  private:
+    // Helpers for custom lowering.
+    void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+                    SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+  };
+}
+
+#endif   // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td
new file mode 100644
index 000000000000..6f4ebf279643
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrFormats.td
@@ -0,0 +1,268 @@
+//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//3.3:
+//Memory
+//Branch
+//Operate
+//Floating-point
+//PALcode
+
+def u8imm   : Operand<i64>;
+def s14imm  : Operand<i64>;
+def s16imm  : Operand<i64>;
+def s21imm  : Operand<i64>;
+def s64imm  : Operand<i64>;
+def u64imm  : Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Alpha instruction baseline
+class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
+  field bits<32> Inst;
+  let Namespace = "Alpha";
+  let AsmString = asmstr;
+  let Inst{31-26} = op;
+  let Itinerary = itin;
+}
+
+
+//3.3.1
+class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin> 
+        : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let canFoldAsLoad = load;
+  let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
+
+  bits<5> Ra;
+  bits<16> disp;
+  bits<5> Rb;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-0} = disp;
+}
+class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
+        : InstAlpha<opcode, asmstr, itin> {    
+  bits<5> Ra;
+
+  let OutOperandList = (outs GPRC:$RA);
+  let InOperandList = (ins);
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = 0;
+  let Inst{15-0} = fc;
+}
+class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
+        : InstAlpha<opcode, asmstr, itin> {    
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let Inst{25-21} = 0;
+  let Inst{20-16} = 0;
+  let Inst{15-0} = fc;
+}
+
+class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
+    : InstAlpha<opcode, asmstr, itin> {
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<14> disp;
+
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-14} = TB;
+  let Inst{13-0} = disp;
+}
+class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern=pattern;
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<14> disp;
+
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-14} = TB;
+  let Inst{13-0} = disp;
+}
+
+//3.3.2
+def target : Operand<OtherVT> {}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+   : InstAlpha<opcode, asmstr, itin> {
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+  bits<64> Opc; //dummy
+  bits<5> Ra;
+  bits<21> disp;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-0} = disp;
+}
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs);
+  let InOperandList = (ins target:$DISP);
+  bits<5> Ra;
+  bits<21> disp;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-0} = disp;
+}
+
+//3.3.3
+class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RC);
+  let InOperandList = (ins GPRC:$RA, GPRC:$RB);
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<7> Function = fun;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RC);
+  let InOperandList = (ins GPRC:$RB);
+
+  bits<5> Rc;
+  bits<5> Rb;
+  bits<7> Function = fun;
+
+  let Inst{25-21} = 31;
+  let Inst{20-16} = Rb;
+  let Inst{15-13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RDEST);
+  let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
+  let Constraints = "$RFALSE = $RDEST";
+  let DisableEncoding = "$RFALSE";
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<7> Function = fun;
+
+//  let Constraints = "$RFALSE = $RDEST";
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+
+class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RC);
+  let InOperandList = (ins GPRC:$RA, u8imm:$L);
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<8> LIT;
+  bits<7> Function = fun;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-13} = LIT;
+  let Inst{12} = 1;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RDEST);
+  let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
+  let Constraints = "$RFALSE = $RDEST";
+  let DisableEncoding = "$RFALSE";
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<8> LIT;
+  bits<7> Function = fun;
+
+//  let Constraints = "$RFALSE = $RDEST";
+  let Inst{25-21} = Ra;
+  let Inst{20-13} = LIT;
+  let Inst{12} = 1;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+//3.3.4
+class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+
+  bits<5> Fc;
+  bits<5> Fa;
+  bits<5> Fb;
+  bits<11> Function = fun;
+
+  let Inst{25-21} = Fa;
+  let Inst{20-16} = Fb;
+  let Inst{15-5} = Function;
+  let Inst{4-0} = Fc;
+}
+
+//3.3.5
+class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+    : InstAlpha<opcode, asmstr, itin> {
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+  bits<26> Function;
+
+  let Inst{25-0} = Function;
+}
+
+
+// Pseudo instructions.
+class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<0, nm, itin>  {
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let Pattern = pattern;
+
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
new file mode 100644
index 000000000000..4dcec8f31750
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -0,0 +1,383 @@
+//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AlphaGenInstrInfo.inc"
+using namespace llvm;
+
+AlphaInstrInfo::AlphaInstrInfo()
+  : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+    RI(*this) {
+}
+
+
+unsigned 
+AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                    int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  case Alpha::LDL:
+  case Alpha::LDQ:
+  case Alpha::LDBU:
+  case Alpha::LDWU:
+  case Alpha::LDS:
+  case Alpha::LDT:
+    if (MI->getOperand(1).isFI()) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned 
+AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                   int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  case Alpha::STL:
+  case Alpha::STQ:
+  case Alpha::STB:
+  case Alpha::STW:
+  case Alpha::STS:
+  case Alpha::STT:
+    if (MI->getOperand(1).isFI()) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+static bool isAlphaIntCondCode(unsigned Opcode) {
+  switch (Opcode) {
+  case Alpha::BEQ: 
+  case Alpha::BNE: 
+  case Alpha::BGE: 
+  case Alpha::BGT: 
+  case Alpha::BLE: 
+  case Alpha::BLT: 
+  case Alpha::BLBC: 
+  case Alpha::BLBS:
+    return true;
+  default:
+    return false;
+  }
+}
+
+unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                                      MachineBasicBlock *TBB,
+                                      MachineBasicBlock *FBB,
+                                      const SmallVectorImpl<MachineOperand> &Cond,
+                                      DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) && 
+         "Alpha branch conditions have two components!");
+
+  // One-way branch.
+  if (FBB == 0) {
+    if (Cond.empty())   // Unconditional branch
+      BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
+    else                // Conditional branch
+      if (isAlphaIntCondCode(Cond[0].getImm()))
+        BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+          .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+      else
+        BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+          .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+    return 1;
+  }
+  
+  // Two-way Conditional Branch.
+  if (isAlphaIntCondCode(Cond[0].getImm()))
+    BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+      .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+  else
+    BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+      .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
+  return 2;
+}
+
+void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
+      .addReg(SrcReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
+      .addReg(SrcReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
+      .addReg(SrcReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  } else {
+    llvm_unreachable("Attempt to copy register that is not GPR or FPR");
+  }
+}
+
+void
+AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+  //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
+  //     << FrameIdx << "\n";
+  //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  if (RC == Alpha::F4RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::STS))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::F8RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::STT))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::GPRCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::STQ))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else
+    llvm_unreachable("Unhandled register class");
+}
+
+void
+AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        unsigned DestReg, int FrameIdx,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const {
+  //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
+  //     << FrameIdx << "\n";
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  if (RC == Alpha::F4RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::F8RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::GPRCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else
+    llvm_unreachable("Unhandled register class");
+}
+
+static unsigned AlphaRevCondCode(unsigned Opcode) {
+  switch (Opcode) {
+  case Alpha::BEQ: return Alpha::BNE;
+  case Alpha::BNE: return Alpha::BEQ;
+  case Alpha::BGE: return Alpha::BLT;
+  case Alpha::BGT: return Alpha::BLE;
+  case Alpha::BLE: return Alpha::BGT;
+  case Alpha::BLT: return Alpha::BGE;
+  case Alpha::BLBC: return Alpha::BLBS;
+  case Alpha::BLBS: return Alpha::BLBC;
+  case Alpha::FBEQ: return Alpha::FBNE;
+  case Alpha::FBNE: return Alpha::FBEQ;
+  case Alpha::FBGE: return Alpha::FBLT;
+  case Alpha::FBGT: return Alpha::FBLE;
+  case Alpha::FBLE: return Alpha::FBGT;
+  case Alpha::FBLT: return Alpha::FBGE;
+  default:
+    llvm_unreachable("Unknown opcode");
+  }
+  return 0; // Not reached
+}
+
+// Branch analysis.
+bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+  
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastInst->getOpcode() == Alpha::BR) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+               LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(2).getMBB();
+      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst->getOperand(1));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+  
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+  
+  // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
+  if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+      SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) && 
+      LastInst->getOpcode() == Alpha::BR) {
+    TBB =  SecondLastInst->getOperand(2).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    Cond.push_back(SecondLastInst->getOperand(1));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+  
+  // If the block ends with two Alpha::BRs, handle it.  The second one is not
+  // executed, so remove it.
+  if (SecondLastInst->getOpcode() == Alpha::BR && 
+      LastInst->getOpcode() == Alpha::BR) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != Alpha::BR && 
+      I->getOpcode() != Alpha::COND_BRANCH_I &&
+      I->getOpcode() != Alpha::COND_BRANCH_F)
+    return 0;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (I->getOpcode() != Alpha::COND_BRANCH_I && 
+      I->getOpcode() != Alpha::COND_BRANCH_F)
+    return 1;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, 
+                                MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
+    .addReg(Alpha::R31)
+    .addReg(Alpha::R31);
+}
+
+bool AlphaInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
+  Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
+  return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+  unsigned GlobalBaseReg = AlphaFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalBaseReg).addReg(Alpha::R29);
+  RegInfo.addLiveIn(Alpha::R29);
+
+  AlphaFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
+
+/// getGlobalRetAddr - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
+  AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+  unsigned GlobalRetAddr = AlphaFI->getGlobalRetAddr();
+  if (GlobalRetAddr != 0)
+    return GlobalRetAddr;
+
+  // Insert the set of GlobalRetAddr into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalRetAddr).addReg(Alpha::R26);
+  RegInfo.addLiveIn(Alpha::R26);
+
+  AlphaFI->setGlobalRetAddr(GlobalRetAddr);
+  return GlobalRetAddr;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h
new file mode 100644
index 000000000000..337a85cdf22d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.h
@@ -0,0 +1,85 @@
+//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAINSTRUCTIONINFO_H
+#define ALPHAINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AlphaRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AlphaGenInstrInfo.inc"
+
+namespace llvm {
+
+class AlphaInstrInfo : public AlphaGenInstrInfo {
+  const AlphaRegisterInfo RI;
+public:
+  AlphaInstrInfo();
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+  
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+  
+  bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const;
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  void insertNoop(MachineBasicBlock &MBB, 
+                  MachineBasicBlock::iterator MI) const;
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+  /// getGlobalRetAddr - Return a virtual register initialized with the
+  /// the global return address register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalRetAddr(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
new file mode 100644
index 000000000000..b20171224e29
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
@@ -0,0 +1,1157 @@
+//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrFormats.td"
+
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC  : SDTypeProfile<1, 1, [
+  SDTCisFP<1>, SDTCisFP<0>
+]>;
+def Alpha_cvtqt   : SDNode<"AlphaISD::CVTQT_",    SDTFPUnaryOpUnC, []>;
+def Alpha_cvtqs   : SDNode<"AlphaISD::CVTQS_",    SDTFPUnaryOpUnC, []>;
+def Alpha_cvttq   : SDNode<"AlphaISD::CVTTQ_"  ,  SDTFPUnaryOp, []>;
+def Alpha_gprello : SDNode<"AlphaISD::GPRelLo",   SDTIntBinOp, []>;
+def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi",   SDTIntBinOp, []>;
+def Alpha_rellit  : SDNode<"AlphaISD::RelLit",    SDTIntBinOp, [SDNPMayLoad]>;
+
+def retflag       : SDNode<"AlphaISD::RET_FLAG", SDTNone,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
+def SDT_AlphaCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+                                           SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_AlphaCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+  return getI64Imm(~N->getZExtValue());
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+  return getI64Imm(~N->getZExtValue() + 1);
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+  return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+  return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+  return getI64Imm(get_lda16(N->getZExtValue()));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+  return getI64Imm(get_ldah16(N->getZExtValue()));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+  ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+  return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+  return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+  uint64_t x =
+    abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
+  return getI64Imm(Log2_64(x));
+}]>;
+
+def immUExt8  : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+  return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+}]>;
+def immUExt8inv  : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+  return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
+}], invX>;
+def immUExt8neg  : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+  return ((uint64_t)~N->getZExtValue() + 1) ==
+         (uint8_t)((uint64_t)~N->getZExtValue() + 1);
+}], negX>;
+def immSExt16  : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+         (int64_t)N->getZExtValue();
+}]>;
+def immSExt16int  : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+         ((int64_t)N->getZExtValue() << 32) >> 32;
+}], SExt16>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
+  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!RHS) return 0;
+  uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
+  return build != 0;
+}]>;
+
+def immFPZ  : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+  (void)N; // silence warning.
+  return true;
+}]>;
+
+def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
+def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
+def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
+def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
+def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
+def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
+def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
+def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
+def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
+def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+  return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
+                         N->getZExtValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+  return isPowerOf2_64(N->getZExtValue() -
+                         getNearPower2((uint64_t)N->getZExtValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+  int64_t d =  abs64((int64_t)N->getZExtValue() -
+               (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
+  if (isPowerOf2_64(d)) return false;
+  switch (d) {
+    case 1: case 3: case 5: return false; 
+    default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+  };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4  : PatFrag<(ops node:$op1, node:$op2),
+                    (add (shl node:$op1, 2), node:$op2)>;
+def sub4  : PatFrag<(ops node:$op1, node:$op2),
+                    (sub (shl node:$op1, 2), node:$op2)>;
+def add8  : PatFrag<(ops node:$op1, node:$op2),
+                    (add (shl node:$op1, 3), node:$op2)>;
+def sub8  : PatFrag<(ops node:$op1, node:$op2),
+                    (sub (shl node:$op1, 3), node:$op2)>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+//Pseudo ops for selection
+
+def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
+
+let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
+def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
+                "; ADJUP $amt", 
+                [(callseq_start timm:$amt)], s_pseudo>;
+def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+                "; ADJDOWN $amt1",
+                [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
+}
+
+def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
+def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
+def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
+         "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
+
+
+let usesCustomInserter = 1 in {   // Expanded after instruction selection.
+def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+
+def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+
+def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+}
+
+//***********************
+//Real instructions
+//***********************
+
+//Operation Form:
+
+//conditional moves, int
+
+multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
+def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+             [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+             [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+}
+
+defm CMOVEQ  : cmov_inst<0x24, "cmoveq",  CmpOpFrag<(seteq node:$R, 0)>>;
+defm CMOVNE  : cmov_inst<0x26, "cmovne",  CmpOpFrag<(setne node:$R, 0)>>;
+defm CMOVLT  : cmov_inst<0x44, "cmovlt",  CmpOpFrag<(setlt node:$R, 0)>>;
+defm CMOVLE  : cmov_inst<0x64, "cmovle",  CmpOpFrag<(setle node:$R, 0)>>;
+defm CMOVGT  : cmov_inst<0x66, "cmovgt",  CmpOpFrag<(setgt node:$R, 0)>>;
+defm CMOVGE  : cmov_inst<0x46, "cmovge",  CmpOpFrag<(setge node:$R, 0)>>;
+defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor   node:$R, 1)>>;
+defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and   node:$R, 1)>>;
+
+//General pattern for cmov
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+      (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+      (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
+
+//Invert sense when we can for constants:
+def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq, 
+                    string asmstr, PatFrag OpNode, InstrItinClass itin> {
+  def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
+               [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
+  def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
+               [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
+  def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
+               [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+  def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
+               [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+
+defm MUL   : all_inst<0x13, 0x00, 0x20, "mul",   BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
+defm ADD   : all_inst<0x10, 0x00, 0x20, "add",   BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
+defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
+defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
+defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
+defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
+defm SUB   : all_inst<0x10, 0x09, 0x29, "sub",   BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+
+multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
+}
+
+defm AND   : log_inst<0x11, 0x00, "and",   and,   s_ilog>;
+defm BIC   : inv_inst<0x11, 0x08, "bic",   and,   s_ilog>;
+defm BIS   : log_inst<0x11, 0x20, "bis",   or,    s_ilog>;
+defm ORNOT : inv_inst<0x11, 0x28, "ornot", or,    s_ilog>;
+defm XOR   : log_inst<0x11, 0x40, "xor",   xor,   s_ilog>;
+defm EQV   : inv_inst<0x11, 0x48, "eqv",   xor,   s_ilog>;
+
+defm SL    : log_inst<0x12, 0x39, "sll",   shl,   s_ishf>;
+defm SRA   : log_inst<0x12, 0x3c, "sra",   sra,   s_ishf>;
+defm SRL   : log_inst<0x12, 0x34, "srl",   srl,   s_ishf>;
+defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
+
+def CTLZ     : OForm2<0x1C, 0x32, "CTLZ $RB,$RC", 
+                      [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
+def CTPOP    : OForm2<0x1C, 0x30, "CTPOP $RB,$RC", 
+                      [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
+def CTTZ     : OForm2<0x1C, 0x33, "CTTZ $RB,$RC", 
+                      [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
+def EXTBL    : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC", 
+                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
+def EXTWL    : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC", 
+                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
+def EXTLL    : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC", 
+                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
+def SEXTB    : OForm2<0x1C, 0x00, "sextb $RB,$RC", 
+                      [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
+def SEXTW    : OForm2<0x1C, 0x01, "sextw $RB,$RC", 
+                      [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
+
+//def EXTBLi   : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
+//def EXTLH    : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
+//def EXTLHi   : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
+//def EXTLLi   : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
+//def EXTQH    : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
+//def EXTQHi   : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
+//def EXTQ     : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
+//def EXTQi    : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
+//def EXTWH    : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
+//def EXTWHi   : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
+//def EXTWLi   : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
+
+//def INSBL    : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
+//def INSBLi   : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
+//def INSLH    : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
+//def INSLHi   : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
+//def INSLL    : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
+//def INSLLi   : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
+//def INSQH    : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
+//def INSQHi   : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
+//def INSQL    : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
+//def INSQLi   : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
+//def INSWH    : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
+//def INSWHi   : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
+//def INSWL    : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
+//def INSWLi   : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
+
+//def MSKBL    : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
+//def MSKBLi   : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
+//def MSKLH    : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
+//def MSKLHi   : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
+//def MSKLL    : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
+//def MSKLLi   : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
+//def MSKQH    : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
+//def MSKQHi   : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
+//def MSKQL    : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
+//def MSKQLi   : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
+//def MSKWH    : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
+//def MSKWHi   : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
+//def MSKWL    : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
+//def MSKWLi   : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
+                      
+def ZAPNOTi  : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
+
+// Define the pattern that produces ZAPNOTi.
+def : Pat<(zappat:$imm GPRC:$RA),
+          (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+
+//Comparison, int
+//So this is a waste of what this instruction can do, but it still saves something
+def CMPBGE  : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC", 
+                     [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
+def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
+                     [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
+def CMPEQ   : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC", 
+                     [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPEQi  : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC", 
+                     [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLE   : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC", 
+                     [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLEi  : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
+                     [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLT   : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
+                     [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLTi  : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
+                     [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULE  : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
+                     [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
+                     [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULT  : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
+                     [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC", 
+                      [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
+
+//Patterns for unsupported int comparisons
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
+
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+  def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+  def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
+def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", 
+          [(brind GPRC:$RS)], s_jsr>; //Jump
+
+let isCall = 1, Ra = 26,
+    Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+            R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+            F0, F1,
+            F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+            F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
+    def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
+}
+let isCall = 1, Ra = 26, Rb = 27, disp = 0,
+    Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+            R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+            F0, F1,
+            F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+            F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
+    def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+}
+
+let isCall = 1, Ra = 23, Rb = 27, disp = 0,
+    Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
+  def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+
+
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+
+
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDQ   : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDQr  : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDL   : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDLr  : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDBU  : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDWU  : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+}
+
+
+let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STB   : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
+                 [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STBr  : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
+                 [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STW   : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
+                 [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STWr  : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
+                 [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STL   : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
+                 [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STLr  : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
+                 [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STQ   : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
+                 [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STQr  : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
+                 [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+}
+
+//Load address
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDA   : MForm<0x08, 0, "lda $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
+def LDAr  : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address
+def LDAH  : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
+                 [], s_lda>;  //Load address high
+def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
+                 [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address high
+}
+
+let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STS  : MForm<0x26, 0, "sts $RA,$DISP($RB)",
+                [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
+                [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDS  : MForm<0x22, 1, "lds $RA,$DISP($RB)",
+                [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
+                [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STT  : MForm<0x27, 0, "stt $RA,$DISP($RB)",
+                 [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
+                 [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDT  : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
+                [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
+                [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+
+
+//constpool rels
+def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDQr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDLr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDBUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDWUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
+          (LDAr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
+          (LDAHr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDSr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDTr tconstpool:$DISP, GPRC:$RB)>;
+
+//jumptable rels
+def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
+          (LDAHr tjumptable:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
+          (LDAr tjumptable:$DISP, GPRC:$RB)>;
+
+
+//misc ext patterns
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDBU   immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDWU  immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDL   immSExt16:$DISP, GPRC:$RB)>;
+
+//0 disp patterns
+def : Pat<(i64 (load GPRC:$addr)),
+          (LDQ  0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+          (LDT  0, GPRC:$addr)>;
+def : Pat<(f32 (load GPRC:$addr)),
+          (LDS  0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+          (LDL  0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+          (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+          (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+          (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+          (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+          (LDL  0, GPRC:$addr)>;
+
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+          (STQ  GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+          (STT  F8RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+          (STS  F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+          (STL  GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+          (STW GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+          (STB GPRC:$DATA, 0, GPRC:$addr)>;
+
+
+//load address, rellocated gpdist form
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+    mayLoad = 1 in {
+def LDAg  : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
+def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
+}
+
+//Load quad, rellocated literal form
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in 
+def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
+                 [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
+def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
+          (LDQl texternalsym:$ext, GPRC:$RB)>;
+
+let OutOperandList = (outs GPRC:$RR),
+    InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
+    Constraints = "$RA = $RR",
+    DisableEncoding = "$RR" in {
+def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
+def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
+}
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s64imm:$DISP, GPRC:$RB),
+    mayLoad = 1 in {
+def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
+def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
+}
+
+def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+def MB  : MfcPForm<0x18, 0x4000, "mb",  s_imisc>; //memory barrier
+def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
+
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
+          (WMB)>;
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
+          (MB)>;
+
+//Basic Floating point ops
+
+//Floats
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
+def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
+                   [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
+def ADDS  : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
+def SUBS  : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
+def DIVS  : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
+def MULS  : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
+
+def CPYSS  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+
+//Doubles
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
+                   [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
+def ADDT  : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
+def SUBT  : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
+def DIVT  : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
+def MULT  : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
+
+def CPYST  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
+
+def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
+def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
+def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
+def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
+}
+
+//More CPYS forms:
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
+def CPYSTs  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
+def CPYSSt  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
+}
+
+//conditional moves, floats
+let OutOperandList = (outs F4RC:$RDEST),
+    InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+    Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQS : FPForm<0x17, 0x02A, 
+                      "fcmoveq $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D, 
+                      "fcmovge $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F, 
+                      "fcmovgt $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E, 
+                      "fcmovle $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C,
+                      "fcmovlt $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B, 
+                      "fcmovne $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if != zero
+}
+//conditional moves, doubles
+let OutOperandList = (outs F8RC:$RDEST), 
+    InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+    Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+}
+
+//misc FP selects
+//Select double
+
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+//Select single
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+
+
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in 
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
+        [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in 
+def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
+        [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
+    	[(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
+def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
+        [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
+
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
+        [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
+        [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
+        [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
+def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
+                   [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
+                   [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
+
+def :  Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
+       (f64 (FCMOVEQT  F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>; 
+def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
+       (f32 (FCMOVEQS  F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>; 
+
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+class br_icc<bits<6> opc, string asmstr>
+  : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst), 
+    !strconcat(asmstr, " $R,$dst"),  s_icbr>;
+class br_fcc<bits<6> opc, string asmstr>
+  : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst), 
+    !strconcat(asmstr, " $R,$dst"),  s_fbr>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+let Ra = 31, isBarrier = 1 in
+def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
+
+def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), 
+                    "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", 
+                    s_icbr>;
+def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), 
+                    "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
+                    s_fbr>;
+//Branches, int
+def BEQ  : br_icc<0x39, "beq">;
+def BGE  : br_icc<0x3E, "bge">;
+def BGT  : br_icc<0x3F, "bgt">;
+def BLBC : br_icc<0x38, "blbc">;
+def BLBS : br_icc<0x3C, "blbs">;
+def BLE  : br_icc<0x3B, "ble">;
+def BLT  : br_icc<0x3A, "blt">;
+def BNE  : br_icc<0x3D, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x31, "fbeq">;
+def FBGE : br_fcc<0x36, "fbge">;
+def FBGT : br_fcc<0x37, "fbgt">;
+def FBLE : br_fcc<0x33, "fble">;
+def FBLT : br_fcc<0x32, "fblt">;
+def FBNE : br_fcc<0x36, "fbne">;
+}
+
+//An ugly trick to get the opcode as an imm I can use
+def immBRCond : SDNodeXForm<imm, [{
+  switch((uint64_t)N->getZExtValue()) {
+    default: assert(0 && "Unknown branch type");
+    case 0:  return getI64Imm(Alpha::BEQ);
+    case 1:  return getI64Imm(Alpha::BNE);
+    case 2:  return getI64Imm(Alpha::BGE);
+    case 3:  return getI64Imm(Alpha::BGT);
+    case 4:  return getI64Imm(Alpha::BLE);
+    case 5:  return getI64Imm(Alpha::BLT);
+    case 6:  return getI64Imm(Alpha::BLBS);
+    case 7:  return getI64Imm(Alpha::BLBC);
+    case 20: return getI64Imm(Alpha::FBEQ);
+    case 21: return getI64Imm(Alpha::FBNE);
+    case 22: return getI64Imm(Alpha::FBGE);
+    case 23: return getI64Imm(Alpha::FBGT);
+    case 24: return getI64Imm(Alpha::FBLE);
+    case 25: return getI64Imm(Alpha::FBLT);
+  }
+}]>;
+
+//Int cond patterns
+def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 0),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 2),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 3),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 6),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 4),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 5),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 1),  GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond GPRC:$RA, bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
+
+//FP cond patterns
+def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 20),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 22),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 23),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 24),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 25),  F8RC:$RA, bb:$DISP)>;
+
+
+def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+
+def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+
+//End Branches
+
+//S_floating : IEEE Single
+//T_floating : IEEE Double
+
+//Unused instructions
+//Mnemonic Format Opcode Description
+//CALL_PAL Pcd 00 Trap to PALcode
+//ECB Mfc 18.E800 Evict cache block
+//EXCB Mfc 18.0400 Exception barrier
+//FETCH Mfc 18.8000 Prefetch data
+//FETCH_M Mfc 18.A000 Prefetch data, modify intent
+//LDQ_U Mem 0B Load unaligned quadword
+//MB Mfc 18.4000 Memory barrier
+//STQ_U Mem 0F Store unaligned quadword
+//TRAPB Mfc 18.0000 Trap barrier
+//WH64 Mfc 18.F800 Write hint  64 bytes
+//WMB Mfc 18.4400 Write memory barrier
+//MF_FPCR F-P 17.025 Move from FPCR
+//MT_FPCR F-P 17.024 Move to FPCR
+//There are in the Multimedia extensions, so let's not use them yet
+//def MAXSB8  : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
+//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
+//def MAXUB8  : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
+//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
+//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
+//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
+//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
+//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
+//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
+//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
+//def PKWB  : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
+//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
+//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
+//CVTLQ F-P 17.010 Convert longword to quadword
+//CVTQL F-P 17.030 Convert quadword to longword
+
+
+//Constant handling
+
+def immConst2Part  : PatLeaf<(imm), [{
+  //true if imm fits in a LDAH LDA pair
+  int64_t val = (int64_t)N->getZExtValue();
+  return (val <= IMM_FULLHIGH  && val >= IMM_FULLLOW);
+}]>;
+def immConst2PartInt  : PatLeaf<(imm), [{
+  //true if imm fits in a LDAH LDA pair with zeroext
+  uint64_t uval = N->getZExtValue();
+  int32_t val32 = (int32_t)uval;
+  return ((uval >> 32) == 0 && //empty upper bits
+          val32 <= IMM_FULLHIGH);
+//          val32 >= IMM_FULLLOW  + IMM_LOW  * IMM_MULT); //Always True
+}], SExt32>;
+
+def : Pat<(i64 immConst2Part:$imm),
+          (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+          (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+          (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
+def : Pat<(i64 immConst2PartInt:$imm),
+          (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
+                        (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>;
+
+
+//TODO: I want to just define these like this!
+//def : Pat<(i64 0),
+//          (R31)>;
+//def : Pat<(f64 0.0),
+//          (F31)>;
+//def : Pat<(f64 -0.0),
+//          (CPYSNT F31, F31)>;
+//def : Pat<(f32 0.0),
+//          (F31)>;
+//def : Pat<(f32 -0.0),
+//          (CPYSNS F31, F31)>;
+
+//Misc Patterns:
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+          (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(fabs F8RC:$RB),
+          (CPYST F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+          (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+          (CPYSNT F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+          (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+          (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+          (CPYSNT F8RC:$B, F8RC:$A)>;
+def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
+          (CPYSNSt F8RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
+          (CPYSNTs F4RC:$B, F8RC:$A)>;
+
+//Yes, signed multiply high is ugly
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+          (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA), 
+                                                     (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+
+//slight tree expansion if we are multiplying near to a power of 2
+//n is above a power of 2
+def : Pat<(mul GPRC:$RA, immRem1:$imm), 
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm), 
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+          (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+//n is below a power of 2
+//FIXME: figure out why something is truncating the imm to 32bits
+// this will fix 2007-11-27-mulneg3
+//def : Pat<(mul GPRC:$RA, immRem1n:$imm), 
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
+//def : Pat<(mul GPRC:$RA, immRem2n:$imm), 
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
+//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
+} //Added complexity
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp b/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp
new file mode 100644
index 000000000000..85fbfd1affe2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaLLRP.cpp
@@ -0,0 +1,158 @@
+//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-nops"
+#include "Alpha.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+
+namespace {
+  cl::opt<bool>
+  AlignAll("alpha-align-all", cl::Hidden,
+                   cl::desc("Align all blocks"));
+
+  struct AlphaLLRPPass : public MachineFunctionPass {
+    /// Target machine description which we query for reg. names, data
+    /// layout, etc.
+    ///
+    AlphaTargetMachine &TM;
+
+    static char ID;
+    AlphaLLRPPass(AlphaTargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm) { }
+
+    virtual const char *getPassName() const {
+      return "Alpha NOP inserter";
+    }
+
+    bool runOnMachineFunction(MachineFunction &F) {
+      const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
+      bool Changed = false;
+      MachineInstr* prev[3] = {0,0,0};
+      DebugLoc dl;
+      unsigned count = 0;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI) {
+        MachineBasicBlock& MBB = *FI;
+        bool ub = false;
+        for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+          if (count%4 == 0)
+            prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
+          ++count;
+          MachineInstr *MI = I++;
+          switch (MI->getOpcode()) {
+          case Alpha::LDQ:  case Alpha::LDL:
+          case Alpha::LDWU: case Alpha::LDBU:
+          case Alpha::LDT: case Alpha::LDS:
+          case Alpha::STQ:  case Alpha::STL:
+          case Alpha::STW:  case Alpha::STB:
+          case Alpha::STT: case Alpha::STS:
+           if (MI->getOperand(2).getReg() == Alpha::R30) {
+             if (prev[0] && 
+                 prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
+                 prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
+               prev[0] = prev[1];
+               prev[1] = prev[2];
+               prev[2] = 0;
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31)
+                 .addReg(Alpha::R31); 
+               Changed = true; nopintro += 1;
+               count += 1;
+             } else if (prev[1] 
+                        && prev[1]->getOperand(2).getReg() == 
+                        MI->getOperand(2).getReg()
+                        && prev[1]->getOperand(1).getImm() == 
+                        MI->getOperand(1).getImm()) {
+               prev[0] = prev[2];
+               prev[1] = prev[2] = 0;
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31)
+                 .addReg(Alpha::R31); 
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31)
+                 .addReg(Alpha::R31);
+               Changed = true; nopintro += 2;
+               count += 2;
+             } else if (prev[2] 
+                        && prev[2]->getOperand(2).getReg() == 
+                        MI->getOperand(2).getReg()
+                        && prev[2]->getOperand(1).getImm() == 
+                        MI->getOperand(1).getImm()) {
+               prev[0] = prev[1] = prev[2] = 0;
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31).addReg(Alpha::R31);
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31).addReg(Alpha::R31);
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31).addReg(Alpha::R31);
+               Changed = true; nopintro += 3;
+               count += 3;
+             }
+             prev[0] = prev[1];
+             prev[1] = prev[2];
+             prev[2] = MI;
+             break;
+           }
+           prev[0] = prev[1];
+           prev[1] = prev[2];
+           prev[2] = 0;
+           break;
+          case Alpha::ALTENT:
+          case Alpha::MEMLABEL:
+          case Alpha::PCLABEL:
+            --count;
+            break;
+          case Alpha::BR:
+          case Alpha::JMP:
+            ub = true;
+            //fall through
+          default:
+            prev[0] = prev[1];
+            prev[1] = prev[2];
+            prev[2] = 0;
+            break;
+          }
+        }
+        if (ub || AlignAll) {
+          //we can align stuff for free at this point
+          while (count % 4) {
+            BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
+              .addReg(Alpha::R31).addReg(Alpha::R31);
+            ++count;
+            ++nopalign;
+            prev[0] = prev[1];
+            prev[1] = prev[2];
+            prev[2] = 0;
+          }
+        }
+      }
+      return Changed;
+    }
+  };
+  char AlphaLLRPPass::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
+  return new AlphaLLRPPass(tm);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h
new file mode 100644
index 000000000000..186738c20c70
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -0,0 +1,62 @@
+//====- AlphaMachineFuctionInfo.h - Alpha machine function info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Alpha-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMACHINEFUNCTIONINFO_H
+#define ALPHAMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// AlphaMachineFunctionInfo - This class is derived from MachineFunction
+/// private Alpha target-specific information for each MachineFunction.
+class AlphaMachineFunctionInfo : public MachineFunctionInfo {
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  /// GlobalRetAddr = keeps track of the virtual register initialized for
+  /// the return address value.
+  unsigned GlobalRetAddr;
+
+  /// VarArgsOffset - What is the offset to the first vaarg
+  int VarArgsOffset;
+  /// VarArgsBase - What is the base FrameIndex
+  int VarArgsBase;
+
+public:
+  AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
+                               VarArgsOffset(0), VarArgsBase(0) {}
+
+  explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
+                                                           GlobalRetAddr(0),
+                                                           VarArgsOffset(0),
+                                                           VarArgsBase(0) {}
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
+  void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
+
+  int getVarArgsOffset() const { return VarArgsOffset; }
+  void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
+
+  int getVarArgsBase() const { return VarArgsBase; }
+  void setVarArgsBase(int Base) { VarArgsBase = Base; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
new file mode 100644
index 000000000000..df8f157266e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -0,0 +1,215 @@
+//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "Alpha.h"
+#include "AlphaRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+  : AlphaGenRegisterInfo(),
+    TII(tii) {
+}
+
+static long getUpper16(long l) {
+  long y = l / Alpha::IMM_MULT;
+  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+    ++y;
+  return y;
+}
+
+static long getLower16(long l) {
+  long h = getUpper16(l);
+  return l - h * Alpha::IMM_MULT;
+}
+
+const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+                                                                         const {
+  static const unsigned CalleeSavedRegs[] = {
+    Alpha::R9, Alpha::R10,
+    Alpha::R11, Alpha::R12,
+    Alpha::R13, Alpha::R14,
+    Alpha::F2, Alpha::F3,
+    Alpha::F4, Alpha::F5,
+    Alpha::F6, Alpha::F7,
+    Alpha::F8, Alpha::F9,  0
+  };
+  return CalleeSavedRegs;
+}
+
+BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(Alpha::R15);
+  Reserved.set(Alpha::R29);
+  Reserved.set(Alpha::R30);
+  Reserved.set(Alpha::R31);
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void AlphaRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF)) {
+    // If we have a frame pointer, turn the adjcallstackup instruction into a
+    // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+    // <amt>'
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      MachineInstr *New;
+      if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+          .addImm(-Amount).addReg(Alpha::R30);
+      } else {
+         assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
+         New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+          .addImm(Amount).addReg(Alpha::R30);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+
+  MBB.erase(I);
+}
+
+//Alpha has a slightly funny stack:
+//Args
+//<- incoming SP
+//fixed locals (and spills, callee saved, etc)
+//<- FP
+//variable locals
+//<- SP
+
+void
+AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  bool FP = TFI->hasFP(MF);
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  // Add the base register of R30 (SP) or R15 (FP).
+  MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
+
+  // Now add the frame object offset to the offset from the virtual frame index.
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+  DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n");
+
+  Offset += MF.getFrameInfo()->getStackSize();
+
+  DEBUG(errs() << "Corrected Offset " << Offset
+       << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
+
+  if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
+    DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
+          << Offset << "\n");
+    //so in this case, we need to use a temporary register, and move the
+    //original inst off the SP/FP
+    //fix up the old:
+    MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
+    MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
+    //insert the new
+    MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
+                              TII.get(Alpha::LDAH), Alpha::R28)
+      .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
+    MBB.insert(II, nMI);
+  } else {
+    MI.getOperand(i).ChangeToImmediate(Offset);
+  }
+}
+
+unsigned AlphaRegisterInfo::getRARegister() const {
+  return Alpha::R26;
+}
+
+unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
+}
+
+unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
+{
+  std::string s(AlphaRegDesc[reg].Name);
+  return s;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
new file mode 100644
index 000000000000..1072bf73f199
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -0,0 +1,60 @@
+//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAREGISTERINFO_H
+#define ALPHAREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AlphaGenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
+  const TargetInstrInfo &TII;
+
+  AlphaRegisterInfo(const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+  static std::string getPrettyName(unsigned reg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td
new file mode 100644
index 000000000000..32120d750413
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -0,0 +1,133 @@
+//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Alpha register set.
+//
+//===----------------------------------------------------------------------===//
+
+class AlphaReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "Alpha";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : AlphaReg<n> {
+  let Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : AlphaReg<n> {
+  let Num = num;
+}
+
+//#define FP    $15
+//#define RA    $26
+//#define PV    $27
+//#define GP    $29
+//#define SP    $30
+
+// General-purpose registers
+def R0  : GPR< 0,  "$0">, DwarfRegNum<[0]>;
+def R1  : GPR< 1,  "$1">, DwarfRegNum<[1]>;
+def R2  : GPR< 2,  "$2">, DwarfRegNum<[2]>;
+def R3  : GPR< 3,  "$3">, DwarfRegNum<[3]>;
+def R4  : GPR< 4,  "$4">, DwarfRegNum<[4]>;
+def R5  : GPR< 5,  "$5">, DwarfRegNum<[5]>;
+def R6  : GPR< 6,  "$6">, DwarfRegNum<[6]>;
+def R7  : GPR< 7,  "$7">, DwarfRegNum<[7]>;
+def R8  : GPR< 8,  "$8">, DwarfRegNum<[8]>;
+def R9  : GPR< 9,  "$9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0  : FPR< 0,  "$f0">, DwarfRegNum<[33]>;
+def F1  : FPR< 1,  "$f1">, DwarfRegNum<[34]>;
+def F2  : FPR< 2,  "$f2">, DwarfRegNum<[35]>;
+def F3  : FPR< 3,  "$f3">, DwarfRegNum<[36]>;
+def F4  : FPR< 4,  "$f4">, DwarfRegNum<[37]>;
+def F5  : FPR< 5,  "$f5">, DwarfRegNum<[38]>;
+def F6  : FPR< 6,  "$f6">, DwarfRegNum<[39]>;
+def F7  : FPR< 7,  "$f7">, DwarfRegNum<[40]>;
+def F8  : FPR< 8,  "$f8">, DwarfRegNum<[41]>;
+def F9  : FPR< 9,  "$f9">, DwarfRegNum<[42]>;
+def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
+def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
+def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
+def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
+def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
+def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
+def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
+def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
+def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
+def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
+def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
+def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
+def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
+def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
+def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
+def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
+def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
+def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
+def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
+def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
+def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
+def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
+
+  // //#define FP    $15
+  // //#define RA    $26
+  // //#define PV    $27
+  // //#define GP    $29
+  // //#define SP    $30
+  // $28 is undefined after any and all calls
+
+/// Register classes
+def GPRC : RegisterClass<"Alpha", [i64], 64, (add
+     // Volatile
+     R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+     R23, R24, R25, R28,
+     //Special meaning, but volatile
+     R27, //procedure address
+     R26, //return address
+     R29, //global offset table address
+     // Non-volatile
+     R9, R10, R11, R12, R13, R14,
+// Don't allocate 15, 30, 31
+     R15, R30, R31)>; //zero
+
+def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1,
+        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+        // Saved:
+        F2, F3, F4, F5, F6, F7, F8, F9,
+        F31)>; //zero
+
+def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h b/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h
new file mode 100644
index 000000000000..4c92045d4696
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRelocations.h
@@ -0,0 +1,31 @@
+//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHARELOCATIONS_H
+#define ALPHARELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace Alpha {
+    enum RelocationType {
+      reloc_literal,
+      reloc_gprellow,
+      reloc_gprelhigh,
+      reloc_gpdist,
+      reloc_bsr
+    };
+  }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
new file mode 100644
index 000000000000..3703dd4fa9f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
@@ -0,0 +1,85 @@
+//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//This is table 2-2 from the 21264 compiler writers guide
+//modified some
+
+//Pipelines
+
+def L0   : FuncUnit;
+def L1   : FuncUnit;
+def FST0 : FuncUnit;
+def FST1 : FuncUnit;
+def U0   : FuncUnit;
+def U1   : FuncUnit;
+def FA   : FuncUnit;
+def FM   : FuncUnit;
+
+def s_ild   : InstrItinClass;
+def s_fld   : InstrItinClass;
+def s_ist   : InstrItinClass;
+def s_fst   : InstrItinClass;
+def s_lda   : InstrItinClass;
+def s_rpcc  : InstrItinClass;
+def s_rx    : InstrItinClass;
+def s_mxpr  : InstrItinClass;
+def s_icbr  : InstrItinClass;
+def s_ubr   : InstrItinClass;
+def s_jsr   : InstrItinClass;
+def s_iadd  : InstrItinClass;
+def s_ilog  : InstrItinClass;
+def s_ishf  : InstrItinClass;
+def s_cmov  : InstrItinClass;
+def s_imul  : InstrItinClass;
+def s_imisc : InstrItinClass;
+def s_fbr   : InstrItinClass;
+def s_fadd  : InstrItinClass;
+def s_fmul  : InstrItinClass;
+def s_fcmov : InstrItinClass;
+def s_fdivt : InstrItinClass;
+def s_fdivs : InstrItinClass;
+def s_fsqrts: InstrItinClass;
+def s_fsqrtt: InstrItinClass;
+def s_ftoi  : InstrItinClass;
+def s_itof  : InstrItinClass;
+def s_pseudo : InstrItinClass;
+
+//Table 2-4 Instruction Class Latency in Cycles
+//modified some
+
+def Alpha21264Itineraries : ProcessorItineraries<
+  [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
+  InstrItinData<s_ild    , [InstrStage<3, [L0, L1]>]>,
+  InstrItinData<s_fld    , [InstrStage<4, [L0, L1]>]>,
+  InstrItinData<s_ist    , [InstrStage<0, [L0, L1]>]>,
+  InstrItinData<s_fst    , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
+  InstrItinData<s_lda    , [InstrStage<1, [L0, L1, U0, U1]>]>,
+  InstrItinData<s_rpcc   , [InstrStage<1, [L1]>]>,
+  InstrItinData<s_rx     , [InstrStage<1, [L1]>]>,
+  InstrItinData<s_mxpr   , [InstrStage<1, [L0, L1]>]>,
+  InstrItinData<s_icbr   , [InstrStage<0, [U0, U1]>]>,
+  InstrItinData<s_ubr    , [InstrStage<3, [U0, U1]>]>,
+  InstrItinData<s_jsr    , [InstrStage<3, [L0]>]>,
+  InstrItinData<s_iadd   , [InstrStage<1, [L0, U0, L1, U1]>]>,
+  InstrItinData<s_ilog   , [InstrStage<1, [L0, U0, L1, U1]>]>,
+  InstrItinData<s_ishf   , [InstrStage<1, [U0, U1]>]>,
+  InstrItinData<s_cmov   , [InstrStage<1, [L0, U0, L1, U1]>]>,
+  InstrItinData<s_imul   , [InstrStage<7, [U1]>]>,
+  InstrItinData<s_imisc  , [InstrStage<3, [U0]>]>,
+  InstrItinData<s_fbr    , [InstrStage<0, [FA]>]>,
+  InstrItinData<s_fadd   , [InstrStage<6, [FA]>]>,
+  InstrItinData<s_fmul   , [InstrStage<6, [FM]>]>,
+  InstrItinData<s_fcmov  , [InstrStage<6, [FA]>]>,
+  InstrItinData<s_fdivs  , [InstrStage<12, [FA]>]>,
+  InstrItinData<s_fdivt  , [InstrStage<15, [FA]>]>,
+  InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
+  InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
+  InstrItinData<s_ftoi   , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
+  InstrItinData<s_itof   , [InstrStage<4, [L0, L1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..f1958fe6b5ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-selectiondag-info"
+#include "AlphaTargetMachine.h"
+using namespace llvm;
+
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h
new file mode 100644
index 000000000000..3405cc0cdedd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASELECTIONDAGINFO_H
+#define ALPHASELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AlphaTargetMachine;
+
+class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
+  ~AlphaSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
new file mode 100644
index 000000000000..624a5e2ebd09
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -0,0 +1,36 @@
+//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alpha specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaSubtarget.h"
+#include "Alpha.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AlphaGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU,
+                               const std::string &FS)
+  : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h
new file mode 100644
index 000000000000..70b311683f8b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.h
@@ -0,0 +1,49 @@
+//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASUBTARGET_H
+#define ALPHASUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AlphaGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRe;
+
+class AlphaSubtarget : public AlphaGenSubtargetInfo {
+protected:
+
+  bool HasCT;
+
+  InstrItineraryData InstrItins;
+
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  AlphaSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool hasCT() const { return HasCT; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
new file mode 100644
index 000000000000..3b65d41be892
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -0,0 +1,53 @@
+//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeAlphaTarget() { 
+  // Register the target.
+  RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
+}
+
+AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
+                                       const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    DataLayout("e-f128:128:128-n64"),
+    FrameLowering(Subtarget),
+    Subtarget(TT, CPU, FS),
+    TLInfo(*this),
+    TSInfo(*this) {
+  setRelocationModel(Reloc::PIC_);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createAlphaISelDag(*this));
+  return false;
+}
+bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel) {
+  // Must run branch selection immediately preceding the asm printer
+  PM.add(createAlphaBranchSelectionPass());
+  PM.add(createAlphaLLRPPass(*this));
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
new file mode 100644
index 000000000000..cf00e5875d34
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
@@ -0,0 +1,65 @@
+//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_TARGETMACHINE_H
+#define ALPHA_TARGETMACHINE_H
+
+#include "AlphaInstrInfo.h"
+#include "AlphaISelLowering.h"
+#include "AlphaFrameLowering.h"
+#include "AlphaSelectionDAGInfo.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+class AlphaTargetMachine : public LLVMTargetMachine {
+  const TargetData DataLayout;       // Calculates type size & alignment
+  AlphaInstrInfo InstrInfo;
+  AlphaFrameLowering FrameLowering;
+  AlphaSubtarget Subtarget;
+  AlphaTargetLowering TLInfo;
+  AlphaSelectionDAGInfo TSInfo;
+
+public:
+  AlphaTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &CPU, const std::string &FS);
+
+  virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const AlphaSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
+  virtual const AlphaRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const AlphaTargetLowering* getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
new file mode 100644
index 000000000000..a35e8846e072
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCAsmInfo.h"
+using namespace llvm;
+
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
+  AlignmentIsInBytes = false;
+  PrivateGlobalPrefix = "$";
+  GPRel32Directive = ".gprel32";
+  WeakRefDirective = "\t.weak\t";
+  HasSetDirective = false;
+}
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
new file mode 100644
index 000000000000..837844bd29a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct AlphaMCAsmInfo : public MCAsmInfo {
+    explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
new file mode 100644
index 000000000000..562052b6df67
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCTargetDesc.h"
+#include "AlphaMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AlphaGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createAlphaMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAlphaMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+}
+
+static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitAlphaMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
+                                          createAlphaMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeAlphaMCAsmInfo() {
+  RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
+}
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
new file mode 100644
index 000000000000..b0619e6cb011
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheAlphaTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Alpha registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AlphaGenRegisterInfo.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AlphaGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..ad0dd26aafb1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMAlphaDesc
+  AlphaMCTargetDesc.cpp
+  AlphaMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..d55175fa69dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
new file mode 100644
index 000000000000..f7099b9ae975
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+llvm::Target llvm::TheAlphaTarget;
+
+extern "C" void LLVMInitializeAlphaTargetInfo() { 
+  RegisterTarget<Triple::alpha, /*HasJIT=*/true>
+    X(TheAlphaTarget, "alpha", "Alpha [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/Blackfin.h b/contrib/llvm/lib/Target/Blackfin/Blackfin.h
new file mode 100644
index 000000000000..a00ff4cc3275
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/Blackfin.h
@@ -0,0 +1,31 @@
+//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Blackfin back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_BLACKFIN_H
+#define TARGET_BLACKFIN_H
+
+#include "MCTargetDesc/BlackfinMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+  class FunctionPass;
+  class BlackfinTargetMachine;
+
+  FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
+                                      CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/Blackfin.td b/contrib/llvm/lib/Target/Blackfin/Blackfin.td
new file mode 100644
index 000000000000..cd90962a9540
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/Blackfin.td
@@ -0,0 +1,202 @@
+//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Blackfin Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true",
+    "Build for SDRAM">;
+
+def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true",
+    "Assume instruction cache lookaside buffers are enabled at runtime">;
+
+//===----------------------------------------------------------------------===//
+// Bugs in the silicon becomes workarounds in the compiler.
+// See http://www.analog.com/ for the full list of IC anomalies.
+//===----------------------------------------------------------------------===//
+
+def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true",
+    "Work around 05000074 - "
+    "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">;
+
+def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true",
+    "Work around 05000244 - "
+    "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">;
+
+def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true",
+    "Work around 05000245 - "
+    "Access in the Shadow of a Conditional Branch">;
+
+def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true",
+    "Work around 05000257 - "
+    "Interrupt/Exception During Short Hardware Loop">;
+
+def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true",
+    "Work around 05000283 - "
+    "System MMR Write Is Stalled Indefinitely when Killed">;
+
+def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true",
+    "Work around 05000312 - "
+    "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">;
+
+def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly",
+                                     "wa_killed_mmr", "true",
+    "Work around 05000315 - "
+    "Killed System MMR Write Completes Erroneously on Next System MMR Access">;
+
+def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true",
+    "Work around 05000371 - "
+    "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">;
+
+def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true",
+    "Work around 05000426 - "
+    "Speculative Fetches of Indirect-Pointer Instructions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "BlackfinRegisterInfo.td"
+include "BlackfinCallingConv.td"
+include "BlackfinIntrinsics.td"
+include "BlackfinInstrInfo.td"
+
+def BlackfinInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Blackfin processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, string Suffix, list<SubtargetFeature> Features>
+ : Processor<!strconcat(Name, Suffix), NoItineraries, Features>;
+
+def : Proc<"generic", "", []>;
+
+multiclass Core<string Name,string Suffix,
+                list<SubtargetFeature> Features> {
+  def : Proc<Name, Suffix, Features>;
+  def : Proc<Name, "", Features>;
+  def : Proc<Name, "-none", []>;
+}
+
+multiclass CoreEdinburgh<string Name>
+      : Core<Name, "-0.6", [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS]> {
+  def : Proc<Name, "-0.5",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS]>;
+  def : Proc<Name, "-0.4",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+}
+multiclass CoreBraemar<string Name>
+       : Core<Name, "-0.3",
+         [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]> {
+  def  : Proc<Name, "-0.2",
+         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+  def  : Proc<Name, "-any",
+         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreStirling<string Name>
+      : Core<Name, "-0.5", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.4",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMoab<string Name>
+      : Core<Name, "-0.3", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.0",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreTeton<string Name>
+      : Core<Name, "-0.5",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]> {
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreKookaburra<string Name>
+      : Core<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMockingbird<string Name>
+      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+multiclass CoreBrodie<string Name>
+      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+
+defm BF512 : CoreBrodie<"bf512">;
+defm BF514 : CoreBrodie<"bf514">;
+defm BF516 : CoreBrodie<"bf516">;
+defm BF518 : CoreBrodie<"bf518">;
+defm BF522 : CoreMockingbird<"bf522">;
+defm BF523 : CoreKookaburra<"bf523">;
+defm BF524 : CoreMockingbird<"bf524">;
+defm BF525 : CoreKookaburra<"bf525">;
+defm BF526 : CoreMockingbird<"bf526">;
+defm BF527 : CoreKookaburra<"bf527">;
+defm BF531 : CoreEdinburgh<"bf531">;
+defm BF532 : CoreEdinburgh<"bf532">;
+defm BF533 : CoreEdinburgh<"bf533">;
+defm BF534 : CoreBraemar<"bf534">;
+defm BF536 : CoreBraemar<"bf536">;
+defm BF537 : CoreBraemar<"bf537">;
+defm BF538 : CoreStirling<"bf538">;
+defm BF539 : CoreStirling<"bf539">;
+defm BF542 : CoreMoab<"bf542">;
+defm BF544 : CoreMoab<"bf544">;
+defm BF548 : CoreMoab<"bf548">;
+defm BF549 : CoreMoab<"bf549">;
+defm BF561 : CoreTeton<"bf561">;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Blackfin : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = BlackfinInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
new file mode 100644
index 000000000000..6ba258beb2b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
@@ -0,0 +1,156 @@
+//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Blackfin.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class BlackfinAsmPrinter : public AsmPrinter {
+  public:
+    BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Blackfin Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+  };
+} // end of anonymous namespace
+
+#include "BlackfinGenAsmWriter.inc"
+
+extern "C" void LLVMInitializeBlackfinAsmPrinter() {
+  RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
+}
+
+void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Virtual registers should be already mapped!");
+    O << getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+    break;
+  }
+}
+
+void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
+                                            raw_ostream &O) {
+  printOperand(MI, opNum, O);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+
+  O << " + ";
+  printOperand(MI, opNum+1, O);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+                                         unsigned OpNo, unsigned AsmVariant,
+                                         const char *ExtraCode,
+                                         raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'r':
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+
+  return false;
+}
+
+bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                               unsigned OpNo,
+                                               unsigned AsmVariant,
+                                               const char *ExtraCode,
+                                               raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true;  // Unknown modifier
+
+  O << '[';
+  printOperand(MI, OpNo, O);
+  O << ']';
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td b/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td
new file mode 100644
index 000000000000..0abc84c3c405
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinCallingConv.td
@@ -0,0 +1,30 @@
+//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Blackfin architectures.
+//
+//===----------------------------------------------------------------------===//
+
+// Blackfin C Calling convention.
+def CC_Blackfin : CallingConv<[
+  CCIfType<[i16], CCPromoteToType<i32>>,
+  CCIfSRet<CCAssignToReg<[P0]>>,
+  CCAssignToReg<[R0, R1, R2]>,
+  CCAssignToStack<4, 4>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Blackfin C return-value convention.
+def RetCC_Blackfin : CallingConv<[
+  CCIfType<[i16], CCPromoteToType<i32>>,
+  CCAssignToReg<[R0, R1]>
+]>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
new file mode 100644
index 000000000000..0b0984d2f777
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
@@ -0,0 +1,130 @@
+//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinFrameLowering.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) ||
+    MFI->adjustsStack() || MFI->hasVarSizedObjects();
+}
+
+// Always reserve a call frame. We dont have enough registers to adjust SP.
+bool BlackfinFrameLowering::
+hasReservedCallFrame(const MachineFunction &MF) const {
+  return true;
+}
+
+// Emit a prologue that sets up a stack frame.
+// On function entry, R0-R2 and P0 may hold arguments.
+// R3, P1, and P2 may be used as scratch registers
+void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const BlackfinInstrInfo &TII =
+    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  int FrameSize = MFI->getStackSize();
+  if (FrameSize%4) {
+    FrameSize = (FrameSize+3) & ~3;
+    MFI->setStackSize(FrameSize);
+  }
+
+  if (!hasFP(MF)) {
+    assert(!MFI->adjustsStack() &&
+           "FP elimination on a non-leaf function is not supported");
+    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
+    return;
+  }
+
+  // emit a LINK instruction
+  if (FrameSize <= 0x3ffff) {
+    BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
+    return;
+  }
+
+  // Frame is too big, do a manual LINK:
+  // [--SP] = RETS;
+  // [--SP] = FP;
+  // FP = SP;
+  // P1 = -FrameSize;
+  // SP = SP + P1;
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::RETS, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::FP, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
+    .addReg(BF::SP);
+  RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
+    .addReg(BF::SP, RegState::Kill)
+    .addReg(BF::P1, RegState::Kill);
+
+}
+
+void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const BlackfinInstrInfo &TII =
+    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  int FrameSize = MFI->getStackSize();
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+  if (!hasFP(MF)) {
+    assert(!MFI->adjustsStack() &&
+           "FP elimination on a non-leaf function is not supported");
+    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
+    return;
+  }
+
+  // emit an UNLINK instruction
+  BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
+}
+
+void BlackfinFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const TargetRegisterClass *RC = BF::DPRegisterClass;
+
+  if (RegInfo->requiresRegisterScavenging(MF)) {
+    // Reserve a slot close to SP or frame pointer.
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
new file mode 100644
index 000000000000..726fa2c063f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -0,0 +1,47 @@
+//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Blackfin.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class BlackfinSubtarget;
+
+class BlackfinFrameLowering : public TargetFrameLowering {
+protected:
+  const BlackfinSubtarget &STI;
+
+public:
+  explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
new file mode 100644
index 000000000000..215ca43ea338
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -0,0 +1,180 @@
+//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Blackfin target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinTargetMachine.h"
+#include "BlackfinRegisterInfo.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine
+/// instructions for SelectionDAG operations.
+namespace {
+  class BlackfinDAGToDAGISel : public SelectionDAGISel {
+    /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we
+    /// can make the right decision when generating code for different targets.
+    //const BlackfinSubtarget &Subtarget;
+  public:
+    BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel) {}
+
+    virtual void PostprocessISelDAG();
+
+    virtual const char *getPassName() const {
+      return "Blackfin DAG->DAG Pattern Instruction Selection";
+    }
+
+    // Include the pieces autogenerated from the target description.
+#include "BlackfinGenDAGISel.inc"
+
+  private:
+    SDNode *Select(SDNode *N);
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+    // Walk the DAG after instruction selection, fixing register class issues.
+    void FixRegisterClasses(SelectionDAG &DAG);
+
+    const BlackfinInstrInfo &getInstrInfo() {
+      return *static_cast<const BlackfinTargetMachine&>(TM).getInstrInfo();
+    }
+    const BlackfinRegisterInfo *getRegisterInfo() {
+      return static_cast<const BlackfinTargetMachine&>(TM).getRegisterInfo();
+    }
+  };
+}  // end anonymous namespace
+
+FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
+                                          CodeGenOpt::Level OptLevel) {
+  return new BlackfinDAGToDAGISel(TM, OptLevel);
+}
+
+void BlackfinDAGToDAGISel::PostprocessISelDAG() {
+  FixRegisterClasses(*CurDAG);
+}
+
+SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::FrameIndex: {
+    // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp
+    // SP, Px
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+    return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI,
+                                CurDAG->getTargetConstant(0, MVT::i32));
+  }
+  }
+
+  return SelectCode(N);
+}
+
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  FrameIndexSDNode *FIN = 0;
+  if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) &&
+        (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+        (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+      // Constant positive word offset from frame index
+      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+static inline bool isCC(const TargetRegisterClass *RC) {
+  return BF::AnyCCRegClass.hasSubClassEq(RC);
+}
+
+static inline bool isDCC(const TargetRegisterClass *RC) {
+  return BF::DRegClass.hasSubClassEq(RC) || isCC(RC);
+}
+
+static void UpdateNodeOperand(SelectionDAG &DAG,
+                              SDNode *N,
+                              unsigned Num,
+                              SDValue Val) {
+  SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
+  ops[Num] = Val;
+  SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
+  DAG.ReplaceAllUsesWith(N, New);
+}
+
+// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
+// choosing the proper register classes.
+void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
+  const BlackfinInstrInfo &TII = getInstrInfo();
+  const BlackfinRegisterInfo *TRI = getRegisterInfo();
+  DAG.AssignTopologicalOrder();
+  HandleSDNode Dummy(DAG.getRoot());
+
+  for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin();
+       NI != DAG.allnodes_end(); ++NI) {
+    if (NI->use_empty() || !NI->isMachineOpcode())
+      continue;
+    const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode());
+    for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
+      if (!UI->isMachineOpcode())
+        continue;
+
+      if (UI.getUse().getResNo() >= DefMCID.getNumDefs())
+        continue;
+      const TargetRegisterClass *DefRC =
+        TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI);
+
+      const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode());
+      if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands())
+        continue;
+      const TargetRegisterClass *UseRC =
+        TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI);
+      if (!DefRC || !UseRC)
+        continue;
+      // We cannot copy CC <-> !(CC/D)
+      if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
+        SDNode *Copy =
+          DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                             NI->getDebugLoc(),
+                             MVT::i32,
+                             UI.getUse().get(),
+                             DAG.getTargetConstant(BF::DRegClassID, MVT::i32));
+        UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0));
+      }
+    }
+  }
+  DAG.setRoot(Dummy.getValue());
+}
+
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
new file mode 100644
index 000000000000..d5728324de87
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -0,0 +1,643 @@
+//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Blackfin uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinISelLowering.h"
+#include "BlackfinTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setStackPointerRegisterToSaveRestore(BF::SP);
+  setIntDivIsCheap(false);
+
+  // Set up the legal register classes.
+  addRegisterClass(MVT::i32, BF::DRegisterClass);
+  addRegisterClass(MVT::i16, BF::D16RegisterClass);
+
+  computeRegisterProperties();
+
+  // Blackfin doesn't have i1 loads or stores
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT,     MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,     MVT::Other, Expand);
+
+  // i16 registers don't do much
+  setOperationAction(ISD::AND,   MVT::i16, Promote);
+  setOperationAction(ISD::OR,    MVT::i16, Promote);
+  setOperationAction(ISD::XOR,   MVT::i16, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+  // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote
+  // immediately.
+  setOperationAction(ISD::CTLZ,  MVT::i16, Promote);
+  setOperationAction(ISD::CTTZ,  MVT::i16, Promote);
+  setOperationAction(ISD::SETCC, MVT::i16, Promote);
+
+  // Blackfin has no division
+  setOperationAction(ISD::SDIV,    MVT::i16, Expand);
+  setOperationAction(ISD::SDIV,    MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM,    MVT::i16, Expand);
+  setOperationAction(ISD::SREM,    MVT::i32, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i16, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UREM,    MVT::i16, Expand);
+  setOperationAction(ISD::UREM,    MVT::i32, Expand);
+
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+
+  // No carry-in operations.
+  setOperationAction(ISD::ADDE, MVT::i32, Custom);
+  setOperationAction(ISD::SUBE, MVT::i32, Custom);
+
+  // Blackfin has no intrinsics for these particular operations.
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  // i32 has native CTPOP, but not CTLZ/CTTZ
+  setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+
+  // READCYCLECOUNTER needs special type legalization.
+  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+  setMinFunctionAlignment(2);
+}
+
+const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case BFISD::CALL:     return "BFISD::CALL";
+  case BFISD::RET_FLAG: return "BFISD::RET_FLAG";
+  case BFISD::Wrapper:  return "BFISD::Wrapper";
+  }
+}
+
+MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
+  // SETCC always sets the CC register. Technically that is an i1 register, but
+  // that type is not legal, so we treat it as an i32 register.
+  return MVT::i32;
+}
+
+SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+  Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  int JTI = cast<JumpTableSDNode>(Op)->getIndex();
+
+  Op = DAG.getTargetJumpTable(JTI, MVT::i32);
+  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue
+BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
+                                             CallingConv::ID CallConv, bool isVarArg,
+                                            const SmallVectorImpl<ISD::InputArg>
+                                               &Ins,
+                                             DebugLoc dl, SelectionDAG &DAG,
+                                             SmallVectorImpl<SDValue> &InVals)
+                                               const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ?
+        BF::PRegisterClass : BF::DRegisterClass;
+      assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState");
+      assert(RC->hasType(RegVT) && "Unexpected regclass in CCState");
+
+      unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it is really passed promoted to 32
+      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
+      // right size.
+      if (VA.getLocInfo() == CCValAssign::SExt)
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::ZExt)
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+
+      if (VA.getLocInfo() != CCValAssign::Full)
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+      InVals.push_back(ArgValue);
+    } else {
+      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+      unsigned ObjSize = VA.getLocVT().getStoreSize();
+      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
+                                   false, false, 0));
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+BlackfinTargetLowering::LowerReturn(SDValue Chain,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue Opi = OutVals[i];
+
+    // Expand to i32 if necessary
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    case CCValAssign::ZExt:
+      Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    case CCValAssign::AExt:
+      Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    }
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue());
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode()) {
+    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  } else {
+    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain);
+  }
+}
+
+SDValue
+BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  bool &isTailCall,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<SDValue> &OutVals,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  // Blackfin target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 DAG.getTarget(), ArgLocs, *DAG.getContext());
+  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
+  CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+      int Offset = VA.getLocMemOffset();
+      assert(Offset%4 == 0 && "Unaligned LocMemOffset");
+      assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type");
+      SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32);
+      SDValue OffsetN = DAG.getIntPtrConstant(Offset);
+      OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
+                                         MachinePointerInfo(),false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because
+  // all store nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emitted instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+  std::vector<EVT> NodeTys;
+  NodeTys.push_back(MVT::Other);   // Returns a chain
+  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
+  SDValue Ops[] = { Chain, Callee, InFlag };
+  Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
+                      InFlag.getNode() ? 3 : 2);
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &RV = RVLocs[i];
+    unsigned Reg = RV.getLocReg();
+
+    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+                               RVLocs[i].getLocVT(), InFlag);
+    SDValue Val = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+    Chain = Chain.getValue(1);
+
+    // Callee is responsible for extending any i16 return values.
+    switch (RV.getLocInfo()) {
+    case CCValAssign::SExt:
+      Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val,
+                        DAG.getValueType(RV.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val,
+                        DAG.getValueType(RV.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate to valtype
+    if (RV.getLocInfo() != CCValAssign::Full)
+      Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val);
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
+// add-with-carry instructions.
+SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
+  // Operands: lhs, rhs, carry-in (AC0 flag)
+  // Results: sum, carry-out (AC0 flag)
+  DebugLoc dl = Op.getDebugLoc();
+
+  unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB;
+
+  // zext incoming carry flag in AC0 to 32 bits
+  SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+                                       /* flag= */ Op.getOperand(2));
+  CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32,
+                               SDValue(CarryIn, 0));
+
+  // Add operands, produce sum and carry flag
+  SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+                                   Op.getOperand(0), Op.getOperand(1));
+
+  // Store intermediate carry from Sum
+  SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+                                      /* flag= */ SDValue(Sum, 1));
+
+  // Add incoming carry, again producing an output flag
+  Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+                           SDValue(Sum, 0), SDValue(CarryIn, 0));
+
+  // Update AC0 with the intermediate carry, producing a flag.
+  SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
+                                        SDValue(Carry1, 0));
+
+  // Compose (i32, flag) pair
+  SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) };
+  return DAG.getMergeValues(ops, 2, dl);
+}
+
+SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default:
+    Op.getNode()->dump();
+    llvm_unreachable("Should not custom lower this!");
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    llvm_unreachable("TLS not implemented for Blackfin.");
+  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    // Frame & Return address.  Currently unimplemented
+  case ISD::FRAMEADDR:          return SDValue();
+  case ISD::RETURNADDR:         return SDValue();
+  case ISD::ADDE:
+  case ISD::SUBE:               return LowerADDE(Op, DAG);
+  }
+}
+
+void
+BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) const {
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
+    return;
+  case ISD::READCYCLECOUNTER: {
+    // The low part of the cycle counter is in CYCLES, the high part in
+    // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read
+    // CYCLES2 last.
+    SDValue TheChain = N->getOperand(0);
+    SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32);
+    SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32);
+    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi));
+    // Outgoing chain. If we were to use the chain from lo instead, it would be
+    // possible to entirely eliminate the CYCLES2 read in (i32 (trunc
+    // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2
+    // read beyond the next CYCLES read, leading to invalid results.
+    Results.push_back(hi.getValue(1));
+    return;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         Blackfin Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+BlackfinTargetLowering::ConstraintType
+BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() != 1)
+    return TargetLowering::getConstraintType(Constraint);
+
+  switch (Constraint[0]) {
+    // Standard constraints
+  case 'r':
+    return C_RegisterClass;
+
+    // Blackfin-specific constraints
+  case 'a':
+  case 'd':
+  case 'z':
+  case 'D':
+  case 'W':
+  case 'e':
+  case 'b':
+  case 'v':
+  case 'f':
+  case 'c':
+  case 't':
+  case 'u':
+  case 'k':
+  case 'x':
+  case 'y':
+  case 'w':
+    return C_RegisterClass;
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'Z':
+  case 'Y':
+    return C_Register;
+  }
+
+  // Not implemented: q0-q7, qA. Use {R2} etc instead
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+BlackfinTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+
+    // Blackfin-specific constraints
+  case 'a':
+  case 'd':
+  case 'z':
+  case 'D':
+  case 'W':
+  case 'e':
+  case 'b':
+  case 'v':
+  case 'f':
+  case 'c':
+  case 't':
+  case 'u':
+  case 'k':
+  case 'x':
+  case 'y':
+  case 'w':
+    return CW_Register;
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'Z':
+  case 'Y':
+    return CW_SpecificReg;
+  }
+  return weight;
+}
+
+/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
+/// constraint.
+std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  typedef std::pair<unsigned, const TargetRegisterClass*> Pair;
+  using namespace BF;
+
+  if (Constraint.size() != 1)
+    return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+  switch (Constraint[0]) {
+    // Standard constraints
+  case 'r':
+    return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass);
+
+    // Blackfin-specific constraints
+  case 'a': return Pair(0U, PRegisterClass);
+  case 'd': return Pair(0U, DRegisterClass);
+  case 'e': return Pair(0U, AccuRegisterClass);
+  case 'A': return Pair(A0, AccuRegisterClass);
+  case 'B': return Pair(A1, AccuRegisterClass);
+  case 'b': return Pair(0U, IRegisterClass);
+  case 'v': return Pair(0U, BRegisterClass);
+  case 'f': return Pair(0U, MRegisterClass);
+  case 'C': return Pair(CC, JustCCRegisterClass);
+  case 'x': return Pair(0U, GRRegisterClass);
+  case 'w': return Pair(0U, ALLRegisterClass);
+  case 'Z': return Pair(P3, PRegisterClass);
+  case 'Y': return Pair(P1, PRegisterClass);
+  case 'z': return Pair(0U, zConsRegisterClass);
+  case 'D': return Pair(0U, DConsRegisterClass);
+  case 'W': return Pair(0U, WConsRegisterClass);
+  case 'c': return Pair(0U, cConsRegisterClass);
+  case 't': return Pair(0U, tConsRegisterClass);
+  case 'u': return Pair(0U, uConsRegisterClass);
+  case 'k': return Pair(0U, kConsRegisterClass);
+  case 'y': return Pair(0U, yConsRegisterClass);
+  }
+
+  // Not implemented: q0-q7, qA. Use {R2} etc instead.
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool BlackfinTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Blackfin target isn't yet aware of offsets.
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
new file mode 100644
index 000000000000..b65775b9285d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -0,0 +1,83 @@
+//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Blackfin uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_ISELLOWERING_H
+#define BLACKFIN_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Blackfin.h"
+
+namespace llvm {
+
+  namespace BFISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+      CALL,                     // A call instruction.
+      RET_FLAG,                 // Return with a flag operand.
+      Wrapper                   // Address wrapper
+    };
+  }
+
+  class BlackfinTargetLowering : public TargetLowering {
+  public:
+    BlackfinTargetLowering(TargetMachine &TM);
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+    virtual void ReplaceNodeResults(SDNode *N,
+                                    SmallVectorImpl<SDValue> &Results,
+                                    SelectionDAG &DAG) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    const char *getTargetNodeName(unsigned Opcode) const;
+
+  private:
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+  };
+} // end namespace llvm
+
+#endif    // BLACKFIN_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td
new file mode 100644
index 000000000000..d8e6e252e787
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrFormats.td
@@ -0,0 +1,34 @@
+//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstBfin<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "BF";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+// Single-word (16-bit) instructions
+class F1<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstBfin<outs, ins, asmstr, pattern> {
+}
+
+// Double-word (32-bit) instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstBfin<outs, ins, asmstr, pattern> {
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
new file mode 100644
index 000000000000..d190ae7984b2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -0,0 +1,256 @@
+//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "BlackfinGenInstrInfo.inc"
+
+using namespace llvm;
+
+BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
+  : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
+    RI(ST, *this),
+    Subtarget(ST) {}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                                int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case BF::LOAD32fi:
+  case BF::LOAD16fi:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case BF::STORE32fi:
+  case BF::STORE16fi:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned BlackfinInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+             MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "Branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  llvm_unreachable("Implement conditional branches!");
+}
+
+void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I, DebugLoc DL,
+                                    unsigned DestReg, unsigned SrcReg,
+                                    bool KillSrc) const {
+  if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (BF::D16RegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc))
+      .addImm(0);
+    return;
+  }
+
+  if (BF::DRegClass.contains(DestReg)) {
+    if (SrcReg == BF::NCC) {
+      BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
+      return;
+    }
+    if (SrcReg == BF::CC) {
+      BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      return;
+    }
+  }
+
+  if (BF::DRegClass.contains(SrcReg)) {
+    if (DestReg == BF::NCC) {
+      BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+      return;
+    }
+    if (DestReg == BF::CC) {
+      BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      return;
+    }
+  }
+
+
+  if (DestReg == BF::NCC && SrcReg == BF::CC) {
+    BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (DestReg == BF::CC && SrcReg == BF::NCC) {
+    BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  llvm_unreachable("Bad reg-to-reg copy");
+}
+
+static bool inClass(const TargetRegisterClass &Test,
+                    unsigned Reg,
+                    const TargetRegisterClass *RC) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Test.contains(Reg);
+  else
+    return Test.hasSubClassEq(RC);
+}
+
+void
+BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned SrcReg,
+                                       bool isKill,
+                                       int FI,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+
+  if (inClass(BF::DPRegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE32fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::D16RegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE16fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::AnyCCRegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE8fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+
+                    RC->getName()).c_str());
+}
+
+void BlackfinInstrInfo::
+storeRegToAddr(MachineFunction &MF,
+               unsigned SrcReg,
+               bool isKill,
+               SmallVectorImpl<MachineOperand> &Addr,
+               const TargetRegisterClass *RC,
+               SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  llvm_unreachable("storeRegToAddr not implemented");
+}
+
+void
+BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned DestReg,
+                                        int FI,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  if (inClass(BF::DPRegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::D16RegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::AnyCCRegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  llvm_unreachable("Cannot load regclass from stack slot");
+}
+
+void BlackfinInstrInfo::
+loadRegFromAddr(MachineFunction &MF,
+                unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  llvm_unreachable("loadRegFromAddr not implemented");
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h
new file mode 100644
index 000000000000..d22ddf0d7313
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -0,0 +1,81 @@
+//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFININSTRUCTIONINFO_H
+#define BLACKFININSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "BlackfinRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "BlackfinGenInstrInfo.inc"
+
+namespace llvm {
+
+  class BlackfinInstrInfo : public BlackfinGenInstrInfo {
+    const BlackfinRegisterInfo RI;
+    const BlackfinSubtarget& Subtarget;
+  public:
+    explicit BlackfinInstrInfo(BlackfinSubtarget &ST);
+
+    /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+    /// such, whenever a client has an instance of instruction info, it should
+    /// always be able to get register info as well (through this method).
+    virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
+
+    virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                         int &FrameIndex) const;
+
+    virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                        int &FrameIndex) const;
+
+    virtual unsigned
+    InsertBranch(MachineBasicBlock &MBB,
+                 MachineBasicBlock *TBB,
+                 MachineBasicBlock *FBB,
+                 const SmallVectorImpl<MachineOperand> &Cond,
+                 DebugLoc DL) const;
+
+    virtual void copyPhysReg(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI, DebugLoc DL,
+                             unsigned DestReg, unsigned SrcReg,
+                             bool KillSrc) const;
+
+    virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned SrcReg, bool isKill,
+                                     int FrameIndex,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const;
+
+    virtual void storeRegToAddr(MachineFunction &MF,
+                                unsigned SrcReg, bool isKill,
+                                SmallVectorImpl<MachineOperand> &Addr,
+                                const TargetRegisterClass *RC,
+                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+    virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned DestReg, int FrameIndex,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const;
+
+    virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
new file mode 100644
index 000000000000..5b59d7769c7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -0,0 +1,862 @@
+//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Blackfin instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "BlackfinInstrFormats.td"
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_BfinCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
+                              [SDNPHasChain, SDNPOutGlue]>;
+def BfinCallseqEnd   : SDNode<"ISD::CALLSEQ_END",   SDT_BfinCallSeqEnd,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_BfinCall  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def BfinCall      : SDNode<"BFISD::CALL", SDT_BfinCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
+def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
+                    [SDNPHasChain, SDNPOptInGlue]>;
+
+def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Transformations
+//===----------------------------------------------------------------------===//
+
+def trailingZeros_xform : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+                                   MVT::i32);
+}]>;
+
+def trailingOnes_xform : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
+                                   MVT::i32);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((unsigned short)N->getZExtValue(), MVT::i16);
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediates
+//===----------------------------------------------------------------------===//
+
+def imm3  : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
+def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
+def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
+def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
+
+def uimm5m2 : PatLeaf<(imm), [{
+    uint64_t value = N->getZExtValue();
+    return value % 2 == 0 && isUInt<5>(value);
+}]>;
+
+def uimm6m4 : PatLeaf<(imm), [{
+    uint64_t value = N->getZExtValue();
+    return value % 4 == 0 && isUInt<6>(value);
+}]>;
+
+def imm7   : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
+def imm16  : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
+def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
+
+def ximm16 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value < (1<<16) && value >= -(1<<15);
+}]>;
+
+def imm17m2 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value % 2 == 0 && isInt<17>(value);
+}]>;
+
+def imm18m4 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value % 4 == 0 && isInt<18>(value);
+}]>;
+
+// 32-bit bitmask transformed to a bit number
+def uimm5mask : Operand<i32>, PatLeaf<(imm), [{
+    return isPowerOf2_32(N->getZExtValue());
+}], trailingZeros_xform>;
+
+// 32-bit inverse bitmask transformed to a bit number
+def uimm5imask : Operand<i32>, PatLeaf<(imm), [{
+    return isPowerOf2_32(~N->getZExtValue());
+}], trailingOnes_xform>;
+
+//===----------------------------------------------------------------------===//
+// Operands
+//===----------------------------------------------------------------------===//
+
+def calltarget : Operand<iPTR>;
+
+def brtarget : Operand<OtherVT>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+  let PrintMethod = "printMemoryOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstBfin<outs, ins, asmstr, pattern>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                              "${:comment}ADJCALLSTACKDOWN $amt",
+                              [(BfinCallseqStart timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                            "${:comment}ADJCALLSTACKUP $amt1 $amt2",
+                            [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-9. Program Flow Control Instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1 in {
+
+let isIndirectBranch = 1 in
+def JUMPp : F1<(outs), (ins P:$target),
+               "JUMP ($target);",
+               [(brind P:$target)]>;
+
+// TODO JUMP (PC-P)
+
+// NOTE: assembler chooses between JUMP.S and JUMP.L
+def JUMPa : F1<(outs), (ins brtarget:$target),
+               "jump $target;",
+               [(br bb:$target)]>;
+
+def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target),
+               "if $cc jump $target;",
+               [(brcond AnyCC:$cc, bb:$target)]>;
+}
+
+let isCall = 1,
+    Defs   = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in {
+def CALLa: F1<(outs), (ins calltarget:$func, variable_ops),
+              "call $func;", []>;
+def CALLp: F1<(outs), (ins P:$func, variable_ops),
+              "call ($func);", [(BfinCall P:$func)]>;
+}
+
+let isReturn     = 1,
+    isTerminator = 1,
+    isBarrier    = 1,
+    Uses         = [RETS] in
+def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-10. Load / Store Instructions
+//===----------------------------------------------------------------------===//
+
+// Immediate constant loads
+
+// sext immediate, i32 D/P regs
+def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src),
+                 "$dst = $src (x);",
+                 [(set DP:$dst, imm7:$src)]>;
+
+// zext immediate, i32 reg groups 0-3
+def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+                   "$dst = $src (z);",
+                   [(set GR:$dst, uimm16:$src)]>;
+
+// sext immediate, i32 reg groups 0-3
+def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+                  "$dst = $src (x);",
+                  [(set GR:$dst, imm16:$src)]>;
+
+// Pseudo-instruction for loading a general 32-bit constant.
+def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+                      "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);",
+                      [(set GR:$dst, imm:$src)]>;
+
+def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+                      "$dst.h = $src; $dst.l = $src;", []>;
+
+
+// 16-bit immediate, i16 reg groups 0-3
+def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src),
+                 "$dst = $src;", []>;
+
+def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)),
+          (LOAD32sym tglobaladdr:$addr)>;
+
+def : Pat<(BfinWrapper (i32 tjumptable:$addr)),
+          (LOAD32sym tjumptable:$addr)>;
+
+// We cannot copy from GR16 to D16, and codegen wants to insert copies if we
+// emit GR16 instructions. As a hack, we use this fake instruction instead.
+def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src),
+                    "$dst = $src;",
+                    [(set D16:$dst, ximm16:$src)]>;
+
+// Memory loads with patterns
+
+def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr),
+                "$dst = [$ptr];",
+                [(set DP:$dst, (load P:$ptr))]>;
+
+// Pseudo-instruction for loading a stack slot
+def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem),
+                     "${:comment}FI $dst = [$mem];",
+                     [(set DP:$dst, (load ADDRspii:$mem))]>;
+
+// Note: Expands to multiple insns
+def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem),
+                     "${:comment}FI $dst = [$mem];",
+                     [(set D16:$dst, (load ADDRspii:$mem))]>;
+
+// Pseudo-instruction for loading a stack slot, used for AnyCC regs.
+// Replaced with Load D + CC=D
+def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem),
+                    "${:comment}FI $dst = B[$mem];",
+                    [(set AnyCC:$dst, (load ADDRspii:$mem))]>;
+
+def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+                        "$dst = [$ptr + $off];",
+                        [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>;
+
+def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = [$ptr + $off];",
+                         [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>;
+
+def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr),
+                    "$dst = W[$ptr] (z);",
+                    [(set D:$dst, (zextloadi16 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>;
+
+def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (z);",
+                            [(set D:$dst, (zextloadi16 (add P:$ptr,
+                                                        uimm5m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))),
+          (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (z);",
+                            [(set D:$dst,
+                                  (zextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))),
+          (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr),
+                    "$dst = w[$ptr] (x);",
+                    [(set D:$dst, (sextloadi16 P:$ptr))]>;
+
+def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (x);",
+                            [(set D:$dst,
+                                  (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>;
+
+def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (x);",
+                            [(set D:$dst,
+                                  (sextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr),
+                "$dst = w[$ptr];",
+                [(set D16:$dst, (load PI:$ptr))]>;
+
+def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
+                   "$dst = B[$ptr] (z);",
+                   [(set D:$dst, (zextloadi8 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
+def : Pat<(i16 (extloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+def : Pat<(i16 (zextloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = b[$ptr + $off] (z);",
+                         [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
+          (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
+def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+                           lo16)>;
+def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+                           lo16)>;
+
+def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
+                   "$dst = b[$ptr] (x);",
+                   [(set D:$dst, (sextloadi8 P:$ptr))]>;
+
+def : Pat<(i16 (sextloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = b[$ptr + $off] (x);",
+                         [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
+                           lo16)>;
+// Memory loads without patterns
+
+let mayLoad = 1 in {
+
+multiclass LOAD_incdec<RegisterClass drc, RegisterClass prc,
+                       string mem="", string suf=";"> {
+  def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+                !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>;
+  def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+                !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>;
+}
+multiclass LOAD_incdecpost<RegisterClass drc, RegisterClass prc,
+                           string mem="", string suf=";">
+         : LOAD_incdec<drc, prc, mem, suf> {
+  def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off),
+                 !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>;
+}
+
+defm LOAD32p:    LOAD_incdec<DP, P>;
+defm LOAD32i:    LOAD_incdec<D, I>;
+defm LOAD8z32p:  LOAD_incdec<D, P, "b", " (z);">;
+defm LOAD8s32p:  LOAD_incdec<D, P, "b", " (x);">;
+defm LOADhi:     LOAD_incdec<D16, I, "w">;
+defm LOAD16z32p: LOAD_incdecpost<D, P, "w", " (z);">;
+defm LOAD16s32p: LOAD_incdecpost<D, P, "w", " (x);">;
+
+def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+                     "$dst = [$ptr ++ $off];", []>;
+
+// Note: $fp MUST be FP
+def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off),
+                         "$dst = [$fp - $off];", []>;
+
+def LOAD32i:      F1<(outs D:$dst), (ins I:$ptr),
+                     "$dst = [$ptr];", []>;
+def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off),
+                     "$dst = [$ptr ++ $off];", []>;
+
+
+
+def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+                    "$dst = w[$ptr ++ $off];", []>;
+
+
+}
+
+// Memory stores with patterns
+def STORE32p: F1<(outs), (ins DP:$val, P:$ptr),
+                 "[$ptr] = $val;",
+                 [(store DP:$val, P:$ptr)]>;
+
+// Pseudo-instructions for storing to a stack slot
+def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem),
+                      "${:comment}FI [$mem] = $val;",
+                      [(store DP:$val, ADDRspii:$mem)]>;
+
+// Note: This stack-storing pseudo-instruction is expanded to multiple insns
+def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem),
+                  "${:comment}FI [$mem] = $val;",
+                  [(store D16:$val, ADDRspii:$mem)]>;
+
+// Pseudo-instructions for storing AnyCC register to a stack slot.
+// Replaced with D=CC + STORE byte
+def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem),
+                      "${:comment}FI b[$mem] = $val;",
+                      [(store AnyCC:$val, ADDRspii:$mem)]>;
+
+def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+                 "[$ptr + $off] = $val;",
+                 [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>;
+
+def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+                 "[$ptr + $off] = $val;",
+                 [(store DP:$val, (add P:$ptr, imm18m4:$off))]>;
+
+def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr),
+                  "w[$ptr] = $val;",
+                  [(store D16:$val, PI:$ptr)]>;
+
+def STORE8p: F1<(outs), (ins D:$val, P:$ptr),
+                "b[$ptr] = $val;",
+                [(truncstorei8 D:$val, P:$ptr)]>;
+
+def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off),
+                 "b[$ptr + $off] = $val;",
+                 [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>;
+
+let Constraints = "$ptr = $ptr_wb" in {
+
+multiclass STORE_incdec<RegisterClass drc, RegisterClass prc,
+                        int off=4, string pre=""> {
+  def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+                !strconcat(pre, "[$ptr++] = $val;"),
+                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>;
+  def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+                !strconcat(pre, "[$ptr--] = $val;"),
+                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr,
+                                               (ineg off)))]>;
+}
+
+defm STORE32p: STORE_incdec<DP, P>;
+defm STORE16i: STORE_incdec<D16, I, 2, "w">;
+defm STORE8p:  STORE_incdec<D, P, 1, "b">;
+
+def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off),
+                      "[$ptr ++ $off] = $val;",
+                      [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>;
+
+def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off),
+                      "w[$ptr ++ $off] = $val;",
+                      [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>;
+}
+
+// Memory stores without patterns
+
+let mayStore = 1 in {
+
+// Note: only works for $fp == FP
+def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off),
+                         "[$fp - $off] = $val;", []>;
+
+def STORE32i: F1<(outs), (ins D:$val, I:$ptr),
+                 "[$ptr] = $val;", []>;
+
+def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+                 "[$ptr++] = $val;", []>;
+
+def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+                 "[$ptr--] = $val;", []>;
+
+def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
+                      "[$ptr ++ $off] = $val;", []>;
+}
+
+def : Pat<(truncstorei16 D:$val, PI:$ptr),
+          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+                                     lo16), PI:$ptr)>;
+
+def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
+          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+                                     hi16), PI:$ptr)>;
+
+def : Pat<(truncstorei8 D16L:$val, P:$ptr),
+          (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                  (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
+                                  lo16),
+                   P:$ptr)>;
+
+//===----------------------------------------------------------------------===//
+// Table C-11. Move Instructions.
+//===----------------------------------------------------------------------===//
+
+def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
+             "$dst = $src;",
+             []>;
+
+let Constraints = "$src1 = $dst" in
+def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
+               "if $cc $dst = $src2;",
+               [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
+
+let Defs = [AZ, AN, AC0, V] in {
+def MOVEzext: F1<(outs D:$dst), (ins D16L:$src),
+                 "$dst = $src (z);",
+                 [(set D:$dst, (zext D16L:$src))]>;
+
+def MOVEsext: F1<(outs D:$dst), (ins D16L:$src),
+                 "$dst = $src (x);",
+                 [(set D:$dst, (sext D16L:$src))]>;
+
+def MOVEzext8: F1<(outs D:$dst), (ins D:$src),
+                  "$dst = $src.b (z);",
+                  [(set D:$dst, (and D:$src, 0xff))]>;
+
+def MOVEsext8: F1<(outs D:$dst), (ins D:$src),
+                  "$dst = $src.b (x);",
+                  [(set D:$dst, (sext_inreg D:$src, i8))]>;
+
+}
+
+def : Pat<(sext_inreg D16L:$src, i8),
+          (EXTRACT_SUBREG (MOVEsext8
+                           (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                          D16L:$src,
+                                          lo16)),
+                          lo16)>;
+
+def : Pat<(sext_inreg D:$src, i16),
+          (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(and D:$src, 0xffff),
+          (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(i32 (anyext D16L:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                         (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
+                         lo16)>;
+
+// TODO Dreg = Dreg_byte (X/Z)
+
+// TODO Accumulator moves
+
+//===----------------------------------------------------------------------===//
+// Table C-12. Stack Control Instructions
+//===----------------------------------------------------------------------===//
+
+let Uses = [SP], Defs = [SP] in {
+def PUSH: F1<(outs), (ins ALL:$src),
+             "[--sp] = $src;", []> { let mayStore = 1; }
+
+// NOTE: POP does not work for DP regs, use LOAD instead
+def POP:  F1<(outs ALL:$dst), (ins),
+             "$dst = [sp++];", []> { let mayLoad = 1; }
+}
+
+// TODO: push/pop multiple
+
+def LINK: F2<(outs), (ins i32imm:$amount),
+             "link $amount;", []>;
+
+def UNLINK: F2<(outs), (ins),
+               "unlink;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-13. Control Code Bit Management Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SETCC<PatFrag opnode, PatFrag invnode, string cond, string suf=";"> {
+  def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              [(set JustCC:$cc, (opnode  D:$a, D:$b))]>;
+
+  def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              [(set JustCC:$cc, (opnode  DP:$a, imm3:$b))]>;
+
+  def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              []>;
+
+  def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b),
+                  !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+                  [(set NotCC:$cc, (invnode  DP:$a, imm3:$b))]>;
+}
+
+defm SETEQ  : SETCC<seteq,  setne,  "==">;
+defm SETLT  : SETCC<setlt,  setge,  "<">;
+defm SETLE  : SETCC<setle,  setgt,  "<=">;
+defm SETULT : SETCC<setult, setuge, "<",  " (iu);">;
+defm SETULE : SETCC<setule, setugt, "<=", " (iu);">;
+
+def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b),
+                 "cc = $a == $b;",
+                 [(set NotCC:$cc, (setne  D:$a, D:$b))]>;
+
+def : Pat<(setgt  D:$a, D:$b), (SETLTdd  D:$b, D:$a)>;
+def : Pat<(setge  D:$a, D:$b), (SETLEdd  D:$b, D:$a)>;
+def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>;
+def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>;
+
+// TODO: compare pointer for P-P comparisons
+// TODO: compare accumulator
+
+let Defs = [AC0] in
+def OR_ac0_cc : F1<(outs), (ins JustCC:$cc),
+                   "ac0 \\|= cc;", []>;
+
+let Uses = [AC0] in
+def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins),
+                   "cc = ac0;", []>;
+
+def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb),
+                    "cc = !cc;", []>;
+
+def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
+                    "cc = !cc;", []>;
+
+def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
+                      "$dst = $cc;", []>;
+
+def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
+                   "$dst = cc;", []>;
+
+def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src),
+                   "cc = $src;",
+                   [(set AnyCC:$cc, (setne D:$src, 0))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-14. Logical Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 & $src2;",
+            [(set D:$dst, (and D:$src1, D:$src2))]>;
+
+def NOT: F1<(outs D:$dst), (ins D:$src),
+            "$dst = ~$src;",
+            [(set D:$dst, (not D:$src))]>;
+
+def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+           "$dst = $src1 \\| $src2;",
+           [(set D:$dst, (or D:$src1, D:$src2))]>;
+
+def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 ^ $src2;",
+            [(set D:$dst, (xor D:$src1, D:$src2))]>;
+
+// missing: BXOR, BXORSHIFT
+
+//===----------------------------------------------------------------------===//
+// Table C-15. Bit Operations Instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
+              "bitclr($dst, $src2);",
+              [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
+
+def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+              "bitset($dst, $src2);",
+              [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>;
+
+def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+              "bittgl($dst, $src2);",
+              [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>;
+}
+
+def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+              "cc = bittst($src1, $src2);",
+              [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2),
+                                       (i32 0)))]>;
+
+def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+               "cc = !bittst($src1, $src2);",
+               [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2),
+                                        (i32 0)))]>;
+
+// TODO: DEPOSIT, EXTRACT, BITMUX
+
+def ONES: F2<(outs D16L:$dst), (ins D:$src),
+              "$dst = ones $src;",
+              [(set D16L:$dst, (trunc (ctpop D:$src)))]>;
+
+def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Table C-16. Shift / Rotate Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHIFT32<SDNode opnode, string ops> {
+  def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount),
+             !subst("XX", ops, "$dst XX= $amount;"),
+             [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>;
+  def r : F1<(outs D:$dst), (ins D:$src, D:$amount),
+             !subst("XX", ops, "$dst XX= $amount;"),
+             [(set D:$dst, (opnode D:$src, D:$amount))]>;
+}
+
+let Defs = [AZ, AN, V, VS],
+    Constraints = "$src = $dst" in {
+defm SRA : SHIFT32<sra, ">>>">;
+defm SRL : SHIFT32<srl, ">>">;
+defm SLL : SHIFT32<shl, "<<">;
+}
+
+// TODO: automatic switching between 2-addr and 3-addr (?)
+
+let Defs = [AZ, AN, V, VS] in {
+def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+             "$dst = lshift $src by $amount;",
+             [(set D:$dst, (shl D:$src, D16L:$amount))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+             "$dst = ashift $src by $amount;",
+             [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>;
+
+def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src >>> $amount;",
+              [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>;
+
+def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src >> $amount;",
+              [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+              "$dst = ashift $src BY $amount;",
+              [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>;
+
+def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src << $amount;",
+              [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>;
+
+def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+              "$dst = lshift $src by $amount;",
+              [(set D16:$dst, (shl D16:$src, D16L:$amount))]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-17. Arithmetic Operations Instructions
+//===----------------------------------------------------------------------===//
+
+// TODO: ABS
+
+let Defs = [AZ, AN, AC0, V, VS] in {
+
+def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 + $src2;",
+            [(set D:$dst, (add D:$src1, D:$src2))]>;
+
+def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 + $src2;",
+              [(set D16:$dst, (add D16:$src1, D16:$src2))]>;
+
+let Constraints = "$src1 = $dst" in
+def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
+                "$dst += $src2;",
+                [(set D:$dst, (add D:$src1, imm7:$src2))]>;
+
+def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 - $src2;",
+            [(set D:$dst, (sub D:$src1, D:$src2))]>;
+
+def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 - $src2;",
+              [(set D16:$dst, (sub D16:$src1, D16:$src2))]>;
+
+}
+
+def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>;
+def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>;
+
+let Defs = [AZ, AN, V, VS] in
+def NEG: F1<(outs D:$dst), (ins D:$src),
+            "$dst = -$src;",
+            [(set D:$dst, (ineg D:$src))]>;
+
+// No pattern, it would confuse isel to have two i32 = i32+i32 patterns
+def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
+              "$dst = $src1 + $src2;", []>;
+
+let Constraints = "$src1 = $dst" in
+def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
+                "$dst += $src2;", []>;
+
+let Defs = [AZ, AN, V] in
+def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2),
+                  "$dst = $src1 + $src2 (rnd20);", []>;
+
+let Defs = [V, VS] in {
+def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 * $src2 (is);",
+              [(set D16:$dst, (mul D16:$src1, D16:$src2))]>;
+
+def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (ih);",
+                [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>;
+
+def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (is);",
+                [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>;
+
+def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (is);",
+                [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>;
+}
+
+
+let Constraints = "$src1 = $dst" in
+def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst *= $src2;",
+            [(set D:$dst, (mul D:$src1, D:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-18. External Exent Management Instructions
+//===----------------------------------------------------------------------===//
+
+def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>;
+def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>;
+def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>;
+def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>;
+def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>;
+def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>;
+def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>;
+def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>;
+def NOP : F1<(outs), (ins), "nop;", []>;
+def MNOP : F2<(outs), (ins), "mnop;", []>;
+def ABORT : F1<(outs), (ins), "abort;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-19. Cache Control Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Table C-20. Video Pixel Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                "$dst = align8($src1, $src2);",
+                [(set D:$dst, (or (shl D:$src1, (i32 24)),
+                                  (srl D:$src2, (i32 8))))]>;
+
+def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                 "$dst = align16($src1, $src2);",
+                 [(set D:$dst, (or (shl D:$src1, (i32 16)),
+                                   (srl D:$src2, (i32 16))))]>;
+
+def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                 "$dst = align16($src1, $src2);",
+                 [(set D:$dst, (or (shl D:$src1, (i32 8)),
+                                   (srl D:$src2, (i32 24))))]>;
+
+def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>;
+
+// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA,
+//       BYTEPACK, BYTEUNPACK
+
+// Table C-21. Vector Operations Instructions
+
+// Patterns
+def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
+          (CALLa tglobaladdr:$dst)>;
+def : Pat<(BfinCall (i32 texternalsym:$dst)),
+          (CALLa texternalsym:$dst)>;
+def : Pat<(i16 (trunc D:$src)),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
new file mode 100644
index 000000000000..ae8ee9e2a1a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -0,0 +1,104 @@
+//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace bfinIntrinsic {
+
+  enum ID {
+    last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+    , num_bfin_intrinsics
+  };
+
+}
+
+std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+                                           unsigned numTys) const {
+  static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+  if (IntrID < Intrinsic::num_intrinsics)
+    return 0;
+  assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+unsigned
+BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  return 0;
+}
+
+bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+  // Overload Table
+  const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  if (IntrID == 0)
+    return false;
+  else
+    return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+  const Type *ResultTy = NULL;
+  std::vector<Type*> ArgTys;
+  bool IsVarArg = false;
+  
+#define GET_INTRINSIC_GENERATOR
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
+}
+
+Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                                const Type **Tys,
+                                                unsigned numTy) const {
+  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+  AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
+  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+                                               getType(M->getContext(), IntrID),
+                                               AList));
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
new file mode 100644
index 000000000000..7c4b5a9967d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
@@ -0,0 +1,32 @@
+//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef BLACKFININTRINSICS_H
+#define BLACKFININTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+  class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
+  public:
+    std::string getName(unsigned IntrID, const Type **Tys = 0,
+                        unsigned numTys = 0) const;
+    unsigned lookupName(const char *Name, unsigned Len) const;
+    bool isOverloaded(unsigned IID) const;
+    Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                             unsigned numTys = 0) const;
+  };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td
new file mode 100644
index 000000000000..ce21b082376f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsics.td
@@ -0,0 +1,34 @@
+//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the blackfin-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "bfin", isTarget = 1 in {
+
+//===----------------------------------------------------------------------===//
+// Core synchronisation etc.
+//
+// These intrinsics have sideeffects. Each represent a single instruction, but
+// workarounds are sometimes required depending on the cpu.
+
+// Execute csync instruction with workarounds
+def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
+        Intrinsic<[]>;
+
+// Execute ssync instruction with workarounds
+def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
+        Intrinsic<[]>;
+
+// Execute idle instruction with workarounds
+def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
+        Intrinsic<[]>;
+
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
new file mode 100644
index 000000000000..3a7c104ee055
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -0,0 +1,360 @@
+//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinRegisterInfo.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
+                                           const TargetInstrInfo &tii)
+  : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {}
+
+const unsigned*
+BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  using namespace BF;
+  static const unsigned CalleeSavedRegs[] = {
+    FP,
+    R4, R5, R6, R7,
+    P3, P4, P5,
+    0 };
+  return  CalleeSavedRegs;
+}
+
+BitVector
+BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  using namespace BF;
+  BitVector Reserved(getNumRegs());
+  Reserved.set(AZ);
+  Reserved.set(AN);
+  Reserved.set(AQ);
+  Reserved.set(AC0);
+  Reserved.set(AC1);
+  Reserved.set(AV0);
+  Reserved.set(AV0S);
+  Reserved.set(AV1);
+  Reserved.set(AV1S);
+  Reserved.set(V);
+  Reserved.set(VS);
+  Reserved.set(CYCLES).set(CYCLES2);
+  Reserved.set(L0);
+  Reserved.set(L1);
+  Reserved.set(L2);
+  Reserved.set(L3);
+  Reserved.set(SP);
+  Reserved.set(RETS);
+  if (TFI->hasFP(MF))
+    Reserved.set(FP);
+  return Reserved;
+}
+
+bool BlackfinRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+// Emit instructions to add delta to D/P register. ScratchReg must be of the
+// same class as Reg (P).
+void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          DebugLoc DL,
+                                          unsigned Reg,
+                                          unsigned ScratchReg,
+                                          int delta) const {
+  if (!delta)
+    return;
+  if (isInt<7>(delta)) {
+    BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg)
+      .addReg(Reg)              // No kill on two-addr operand
+      .addImm(delta);
+    return;
+  }
+
+  // We must load delta into ScratchReg and add that.
+  loadConstant(MBB, I, DL, ScratchReg, delta);
+  if (BF::PRegClass.contains(Reg)) {
+    assert(BF::PRegClass.contains(ScratchReg) &&
+           "ScratchReg must be a P register");
+    BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill);
+  } else {
+    assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register");
+    assert(BF::DRegClass.contains(ScratchReg) &&
+           "ScratchReg must be a D register");
+    BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill);
+  }
+}
+
+// Emit instructions to load a constant into D/P register
+void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        DebugLoc DL,
+                                        unsigned Reg,
+                                        int value) const {
+  if (isInt<7>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value);
+    return;
+  }
+
+  if (isUInt<16>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
+    return;
+  }
+
+  if (isInt<16>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value);
+    return;
+  }
+
+  // We must split into halves
+  BuildMI(MBB, I, DL,
+          TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
+    .addImm((value >> 16) & 0xffff)
+    .addReg(Reg, RegState::ImplicitDefine);
+  BuildMI(MBB, I, DL,
+          TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
+    .addImm(value & 0xffff)
+    .addReg(Reg, RegState::ImplicitKill)
+    .addReg(Reg, RegState::ImplicitDefine);
+}
+
+void BlackfinRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+    if (Amount != 0) {
+      assert(Amount%4 == 0 && "Unaligned call frame size");
+      if (I->getOpcode() == BF::ADJCALLSTACKDOWN) {
+        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount);
+      } else {
+        assert(I->getOpcode() == BF::ADJCALLSTACKUP &&
+               "Unknown call frame pseudo instruction");
+        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static unsigned findScratchRegister(MachineBasicBlock::iterator II,
+                                    RegScavenger *RS,
+                                    const TargetRegisterClass *RC,
+                                    int SPAdj) {
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  return Reg;
+}
+
+void
+BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj, RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned FIPos;
+  for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) {
+    assert(FIPos < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+  int FrameIndex = MI.getOperand(FIPos).getIndex();
+  assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm());
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
+    + MI.getOperand(FIPos+1).getImm();
+  unsigned BaseReg = BF::FP;
+  if (TFI->hasFP(MF)) {
+    assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
+  } else {
+    BaseReg = BF::SP;
+    Offset += MF.getFrameInfo()->getStackSize() + SPAdj;
+  }
+
+  bool isStore = false;
+
+  switch (MI.getOpcode()) {
+  case BF::STORE32fi:
+    isStore = true;
+  case BF::LOAD32fi: {
+    assert(Offset%4 == 0 && "Unaligned i32 stack access");
+    assert(FIPos==1 && "Bad frame index operand");
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    if (isUInt<6>(Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32p_uimm6m4
+                         : BF::LOAD32p_uimm6m4));
+      return;
+    }
+    if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32fp_nimm7m4
+                         : BF::LOAD32fp_nimm7m4));
+      MI.getOperand(FIPos+1).setImm(-Offset);
+      return;
+    }
+    if (isInt<18>(Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32p_imm18m4
+                         : BF::LOAD32p_imm18m4));
+      return;
+    }
+    // Use RegScavenger to calculate proper offset...
+    MI.dump();
+    llvm_unreachable("Stack frame offset too big");
+    break;
+  }
+  case BF::ADDpp: {
+    assert(MI.getOperand(0).isReg() && "ADD instruction needs a register");
+    unsigned DestReg = MI.getOperand(0).getReg();
+    // We need to produce a stack offset in a P register. We emit:
+    // P0 = offset;
+    // P0 = BR + P0;
+    assert(FIPos==1 && "Bad frame index operand");
+    loadConstant(MBB, II, DL, DestReg, Offset);
+    MI.getOperand(1).ChangeToRegister(DestReg, false, false, true);
+    MI.getOperand(2).ChangeToRegister(BaseReg, false);
+    break;
+  }
+  case BF::STORE16fi:
+    isStore = true;
+  case BF::LOAD16fi: {
+    assert(Offset%2 == 0 && "Unaligned i16 stack access");
+    assert(FIPos==1 && "Bad frame index operand");
+    // We need a P register to use as an address
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    loadConstant(MBB, II, DL, ScratchReg, Offset);
+    BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg)
+      .addReg(ScratchReg, RegState::Kill)
+      .addReg(BaseReg);
+    MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi));
+    MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true);
+    MI.RemoveOperand(2);
+    break;
+  }
+  case BF::STORE8fi: {
+    // This is an AnyCC spill, we need a scratch register.
+    assert(FIPos==1 && "Bad frame index operand");
+    MachineOperand SpillReg = MI.getOperand(0);
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    if (SpillReg.getReg()==BF::NCC) {
+      BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg)
+        .addOperand(SpillReg);
+      BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg)
+        .addReg(ScratchReg).addImm(0);
+    } else {
+      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg)
+        .addOperand(SpillReg);
+    }
+    // STORE D
+    MI.setDesc(TII.get(BF::STORE8p_imm16));
+    MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true);
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    break;
+  }
+  case BF::LOAD8fi: {
+    // This is an restore, we need a scratch register.
+    assert(FIPos==1 && "Bad frame index operand");
+    MachineOperand SpillReg = MI.getOperand(0);
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    MI.setDesc(TII.get(BF::LOAD32p_imm16_8z));
+    MI.getOperand(0).ChangeToRegister(ScratchReg, true);
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    ++II;
+    if (SpillReg.getReg()==BF::CC) {
+      // CC = D
+      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC)
+        .addReg(ScratchReg, RegState::Kill);
+    } else {
+      // Restore NCC (CC = D==0)
+      BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC)
+        .addReg(ScratchReg, RegState::Kill)
+        .addImm(0);
+    }
+    break;
+  }
+  default:
+    llvm_unreachable("Cannot eliminate frame index");
+    break;
+  }
+}
+
+unsigned BlackfinRegisterInfo::getRARegister() const {
+  return BF::RETS;
+}
+
+unsigned
+BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? BF::FP : BF::SP;
+}
+
+unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum,
+                                        bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
new file mode 100644
index 000000000000..86f45c17c625
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -0,0 +1,81 @@
+//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINREGISTERINFO_H
+#define BLACKFINREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "BlackfinGenRegisterInfo.inc"
+
+namespace llvm {
+
+  class BlackfinSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+  struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
+    BlackfinSubtarget &Subtarget;
+    const TargetInstrInfo &TII;
+
+    BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii);
+
+    /// Code Generation virtual methods...
+    const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+    BitVector getReservedRegs(const MachineFunction &MF) const;
+
+    // getSubReg implemented by tablegen
+
+    const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const {
+      return &BF::PRegClass;
+    }
+
+    bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+    void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I) const;
+
+    void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                             int SPAdj, RegScavenger *RS = NULL) const;
+
+    unsigned getFrameRegister(const MachineFunction &MF) const;
+    unsigned getRARegister() const;
+
+    // Exception handling queries.
+    unsigned getEHExceptionRegister() const;
+    unsigned getEHHandlerRegister() const;
+
+    int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+    int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+    // Utility functions
+    void adjustRegister(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator I,
+                        DebugLoc DL,
+                        unsigned Reg,
+                        unsigned ScratchReg,
+                        int delta) const;
+    void loadConstant(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator I,
+                      DebugLoc DL,
+                      unsigned Reg,
+                      int value) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
new file mode 100644
index 000000000000..1c42205eb780
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -0,0 +1,277 @@
+//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Blackfin register file
+//===----------------------------------------------------------------------===//
+
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+let Namespace = "BF" in {
+def lo16 : SubRegIndex;
+def hi16 : SubRegIndex;
+def lo32 : SubRegIndex;
+def hi32 : SubRegIndex;
+}
+
+// Registers are identified with 3-bit group and 3-bit ID numbers.
+class BlackfinReg<string n> : Register<n> {
+  field bits<3> Group;
+  field bits<3> Num;
+  let Namespace = "BF";
+}
+
+// Rc - 1-bit registers
+class Rc<bits<5> bitno, string n> : BlackfinReg<n> {
+  field bits<5> BitNum = bitno;
+}
+
+// Rs - 16-bit integer registers
+class Rs<bits<3> group, bits<3> num, bits<1> hi, string n> : BlackfinReg<n> {
+  let Group = group;
+  let Num = num;
+  field bits<1> High = hi;
+}
+
+// Ri - 32-bit integer registers with subregs
+class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
+  let Group = group;
+  let Num = num;
+}
+
+// Ra 40-bit accumulator registers
+class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
+  let SubRegs = subs;
+  let SubRegIndices = [hi32, lo32];
+  let Group = 4;
+  let Num = num;
+}
+
+// Two halves of 32-bit register
+multiclass Rss<bits<3> group, bits<3> num, string n> {
+  def H : Rs<group, num, 1, !strconcat(n, ".h")>;
+  def L : Rs<group, num, 0, !strconcat(n, ".l")>;
+}
+
+// Rii - 32-bit integer registers with subregs
+class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
+      : BlackfinReg<n> {
+  let SubRegs = subs;
+  let SubRegIndices = [hi16, lo16];
+  let Group = group;
+  let Num = num;
+}
+
+// Status bits are all part of ASTAT
+def AZ   : Rc<0,  "az">;
+def AN   : Rc<1,  "an">;
+def CC   : Rc<5,  "cc">, DwarfRegNum<[34]>;
+def NCC  : Rc<5,  "!cc"> { let Aliases = [CC]; }
+def AQ   : Rc<6,  "aq">;
+def AC0  : Rc<12, "ac0">;
+def AC1  : Rc<13, "ac1">;
+def AV0  : Rc<16, "av0">;
+def AV0S : Rc<17, "av0s">;
+def AV1  : Rc<18, "av1">;
+def AV1S : Rc<19, "av1s">;
+def V    : Rc<24, "v">;
+def VS   : Rc<25, "vs">;
+// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD
+
+// Group 0: Integer registers
+defm R0 : Rss<0, 0, "r0">;
+def  R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>;
+defm R1 : Rss<0, 1, "r1">;
+def  R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>;
+defm R2 : Rss<0, 2, "r2">;
+def  R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>;
+defm R3 : Rss<0, 3, "r3">;
+def  R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>;
+defm R4 : Rss<0, 4, "r4">;
+def  R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>;
+defm R5 : Rss<0, 5, "r5">;
+def  R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>;
+defm R6 : Rss<0, 6, "r6">;
+def  R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>;
+defm R7 : Rss<0, 7, "r7">;
+def  R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>;
+
+// Group 1: Pointer registers
+defm P0 : Rss<1, 0, "p0">;
+def  P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>;
+defm P1 : Rss<1, 1, "p1">;
+def  P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>;
+defm P2 : Rss<1, 2, "p2">;
+def  P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>;
+defm P3 : Rss<1, 3, "p3">;
+def  P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>;
+defm P4 : Rss<1, 4, "p4">;
+def  P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>;
+defm P5 : Rss<1, 5, "p5">;
+def  P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>;
+defm SP : Rss<1, 6, "sp">;
+def  SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>;
+defm FP : Rss<1, 7, "fp">;
+def  FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>;
+
+// Group 2: Index registers
+defm I0 : Rss<2, 0, "i0">;
+def  I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>;
+defm I1 : Rss<2, 1, "i1">;
+def  I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>;
+defm I2 : Rss<2, 2, "i2">;
+def  I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>;
+defm I3 : Rss<2, 3, "i3">;
+def  I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>;
+defm M0 : Rss<2, 4, "m0">;
+def  M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>;
+defm M1 : Rss<2, 5, "m1">;
+def  M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>;
+defm M2 : Rss<2, 6, "m2">;
+def  M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>;
+defm M3 : Rss<2, 7, "m3">;
+def  M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>;
+
+// Group 3: Cyclic indexing registers
+defm B0 : Rss<3, 0, "b0">;
+def  B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>;
+defm B1 : Rss<3, 1, "b1">;
+def  B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>;
+defm B2 : Rss<3, 2, "b2">;
+def  B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>;
+defm B3 : Rss<3, 3, "b3">;
+def  B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>;
+defm L0 : Rss<3, 4, "l0">;
+def  L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>;
+defm L1 : Rss<3, 5, "l1">;
+def  L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>;
+defm L2 : Rss<3, 6, "l2">;
+def  L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>;
+defm L3 : Rss<3, 7, "l3">;
+def  L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>;
+
+// Accumulators
+def  A0X : Ri <4, 0, "a0.x">;
+defm A0  : Rss<4, 1, "a0">;
+def  A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>;
+def  A0  : Ra <0, "a0", [A0X, A0W]>;
+
+def  A1X : Ri <4, 2, "a1.x">;
+defm A1  : Rss<4, 3, "a1">;
+def  A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>;
+def  A1  : Ra <2, "a1", [A1X, A1W]>;
+
+def RETS : Ri<4, 7, "rets">,  DwarfRegNum<[35]>;
+def RETI : Ri<7, 3, "reti">,  DwarfRegNum<[36]>;
+def RETX : Ri<7, 4, "retx">,  DwarfRegNum<[37]>;
+def RETN : Ri<7, 5, "retn">,  DwarfRegNum<[38]>;
+def RETE : Ri<7, 6, "rete">,  DwarfRegNum<[39]>;
+
+def ASTAT   : Ri<4, 6, "astat">,   DwarfRegNum<[40]> {
+  let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+}
+
+def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
+def USP     : Ri<7, 0, "usp">,     DwarfRegNum<[42]>;
+def EMUDAT  : Ri<7, 7, "emudat">,  DwarfRegNum<[43]>;
+def SYSCFG  : Ri<7, 2, "syscfg">;
+def CYCLES  : Ri<6, 6, "cycles">;
+def CYCLES2 : Ri<6, 7, "cycles2">;
+
+// Hardware loops
+def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>;
+def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>;
+def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>;
+def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
+def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
+def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
+
+// Register classes.
+def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>;
+
+def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>;
+
+def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>;
+
+def P16L : RegisterClass<"BF", [i16], 16,
+                         (add (sequence "P%uL", 0, 5), SPL, FPL)>;
+
+def P16H : RegisterClass<"BF", [i16], 16,
+                         (add (sequence "P%uH", 0, 5), SPH, FPH)>;
+
+def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>;
+
+def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>;
+
+def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>;
+
+def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>;
+
+def GR16 : RegisterClass<"BF", [i16], 16,
+    (add DP16,
+     I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
+     M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
+     B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
+     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>;
+
+def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> {
+  let SubRegClasses = [(D16L lo16), (D16H hi16)];
+}
+
+def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> {
+  let SubRegClasses = [(P16L lo16), (P16H hi16)];
+}
+
+def DP : RegisterClass<"BF", [i32], 32, (add D, P)> {
+  let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
+}
+
+def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>;
+def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>;
+def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>;
+def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>;
+
+def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>;
+
+def ALL : RegisterClass<"BF", [i32], 32,
+    (add GR,
+     A0X, A0W, A1X, A1W, ASTAT, RETS,
+     LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
+     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>;
+
+def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
+
+// We are going to pretend that CC and !CC are 32-bit registers, even though
+// they only can hold 1 bit.
+let CopyCost = -1, Size = 8 in {
+def JustCC  : RegisterClass<"BF", [i32], 8, (add CC)>;
+def NotCC   : RegisterClass<"BF", [i32], 8, (add NCC)>;
+def AnyCC   : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
+def StatBit : RegisterClass<"BF", [i1], 8,
+    (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
+}
+
+// Should be i40, but that isn't defined. It is not a legal type yet anyway.
+def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>;
+
+// Register classes to match inline asm constraints.
+def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>;
+def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>;
+def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>;
+def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3,
+    	    			       	   	B0, B1, B2, B3,
+						L0, L1, L2, L3)>;
+def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>;
+def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>;
+def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>;
+def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX,
+    	    			       	   	RETE, ASTAT, SEQSTAT,
+						USP)>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a21f696a62eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -0,0 +1,24 @@
+//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BlackfinSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "blackfin-selectiondag-info"
+#include "BlackfinTargetMachine.h"
+using namespace llvm;
+
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
+                                              const BlackfinTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
new file mode 100644
index 000000000000..f1ce3482f90f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINSELECTIONDAGINFO_H
+#define BLACKFINSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class BlackfinTargetMachine;
+
+class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
+  ~BlackfinSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
new file mode 100644
index 000000000000..ec919cdf0b90
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -0,0 +1,44 @@
+//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the blackfin specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "BlackfinGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
+                                     const std::string &CPU,
+                                     const std::string &FS)
+  : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false),
+    icplb(false),
+    wa_mi_shift(false),
+    wa_csync(false),
+    wa_specld(false),
+    wa_mmr_stall(false),
+    wa_lcregs(false),
+    wa_hwloop(false),
+    wa_ind_call(false),
+    wa_killed_mmr(false),
+    wa_rets(false)
+{
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h
new file mode 100644
index 000000000000..1a01a81116d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.h
@@ -0,0 +1,49 @@
+//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_SUBTARGET_H
+#define BLACKFIN_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "BlackfinGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+  class BlackfinSubtarget : public BlackfinGenSubtargetInfo {
+    bool sdram;
+    bool icplb;
+    bool wa_mi_shift;
+    bool wa_csync;
+    bool wa_specld;
+    bool wa_mmr_stall;
+    bool wa_lcregs;
+    bool wa_hwloop;
+    bool wa_ind_call;
+    bool wa_killed_mmr;
+    bool wa_rets;
+  public:
+    BlackfinSubtarget(const std::string &TT, const std::string &CPU,
+                      const std::string &FS);
+
+    /// ParseSubtargetFeatures - Parses features string setting specified
+    /// subtarget options.  Definition of function is auto generated by tblgen.
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
new file mode 100644
index 000000000000..a1c9f1c05e0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -0,0 +1,41 @@
+//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinTargetMachine.h"
+#include "Blackfin.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeBlackfinTarget() {
+  RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
+}
+
+BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
+                                             const std::string &TT,
+                                             const std::string &CPU,
+                                             const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    DataLayout("e-p:32:32-i64:32-f64:32-n32"),
+    Subtarget(TT, CPU, FS),
+    TLInfo(*this),
+    TSInfo(*this),
+    InstrInfo(Subtarget),
+    FrameLowering(Subtarget) {
+}
+
+bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel) {
+  PM.add(createBlackfinISelDag(*this, OptLevel));
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
new file mode 100644
index 000000000000..bd7dc84f04ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -0,0 +1,67 @@
+//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Blackfin specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETMACHINE_H
+#define BLACKFINTARGETMACHINE_H
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinIntrinsicInfo.h"
+#include "BlackfinISelLowering.h"
+#include "BlackfinFrameLowering.h"
+#include "BlackfinSubtarget.h"
+#include "BlackfinSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+  class BlackfinTargetMachine : public LLVMTargetMachine {
+    const TargetData DataLayout;
+    BlackfinSubtarget Subtarget;
+    BlackfinTargetLowering TLInfo;
+    BlackfinSelectionDAGInfo TSInfo;
+    BlackfinInstrInfo InstrInfo;
+    BlackfinFrameLowering FrameLowering;
+    BlackfinIntrinsicInfo IntrinsicInfo;
+  public:
+    BlackfinTargetMachine(const Target &T, const std::string &TT,
+                          const std::string &CPU, const std::string &FS);
+
+    virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
+    virtual const TargetFrameLowering *getFrameLowering() const {
+      return &FrameLowering;
+    }
+    virtual const BlackfinSubtarget *getSubtargetImpl() const {
+      return &Subtarget;
+    }
+    virtual const BlackfinRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo();
+    }
+    virtual const BlackfinTargetLowering* getTargetLowering() const {
+      return &TLInfo;
+    }
+    virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
+      return &TSInfo;
+    }
+    virtual const TargetData *getTargetData() const { return &DataLayout; }
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+    const TargetIntrinsicInfo *getIntrinsicInfo() const {
+      return &IntrinsicInfo;
+    }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
new file mode 100644
index 000000000000..5b9d4a29794e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the BlackfinMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCAsmInfo.h"
+
+using namespace llvm;
+
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
+  GlobalPrefix = "_";
+  CommentString = "//";
+  HasSetDirective = false;
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
new file mode 100644
index 000000000000..c372aa247e04
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
@@ -0,0 +1,29 @@
+//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BlackfinMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETASMINFO_H
+#define BLACKFINTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct BlackfinMCAsmInfo : public MCAsmInfo {
+    explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
new file mode 100644
index 000000000000..0fa1471ae3e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCTargetDesc.h"
+#include "BlackfinMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "BlackfinGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createBlackfinMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitBlackfinMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
+                                      createBlackfinMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
+                                                      StringRef CPU,
+                                                      StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitBlackfinMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
+                                          createBlackfinMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeBlackfinMCAsmInfo() {
+  RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
new file mode 100644
index 000000000000..5bffe94fc582
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINMCTARGETDESC_H
+#define BLACKFINMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheBlackfinTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Blackfin registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "BlackfinGenRegisterInfo.inc"
+
+// Defines symbolic names for the Blackfin instructions.
+#define GET_INSTRINFO_ENUM
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "BlackfinGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..8cd924f9236f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMBlackfinDesc
+  BlackfinMCTargetDesc.cpp
+  BlackfinMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..6b26101f4473
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMBlackfinDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
new file mode 100644
index 000000000000..402e0afde81d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- BlackfinTargetInfo.cpp - Blackfin Target Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheBlackfinTarget;
+
+extern "C" void LLVMInitializeBlackfinTargetInfo() {
+  RegisterTarget<Triple::bfin> X(TheBlackfinTarget, "bfin",
+                                 "Analog Devices Blackfin [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/CBackend/CBackend.cpp b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
new file mode 100644
index 000000000000..415beb1dd1cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
@@ -0,0 +1,3611 @@
+//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to C code, compilable by GCC and other C
+// compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+// Some ms header decided to define setjmp as _setjmp, undo this for this file.
+#ifdef _MSC_VER
+#undef setjmp
+#endif
+using namespace llvm;
+
+extern "C" void LLVMInitializeCBackendTarget() {
+  // Register the target.
+  RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
+}
+
+extern "C" void LLVMInitializeCBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCInstrInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {}
+
+namespace {
+  class CBEMCAsmInfo : public MCAsmInfo {
+  public:
+    CBEMCAsmInfo() {
+      GlobalPrefix = "";
+      PrivateGlobalPrefix = "";
+    }
+  };
+
+  /// CWriter - This class is the main chunk of code that converts an LLVM
+  /// module to a C translation unit.
+  class CWriter : public FunctionPass, public InstVisitor<CWriter> {
+    formatted_raw_ostream &Out;
+    IntrinsicLowering *IL;
+    Mangler *Mang;
+    LoopInfo *LI;
+    const Module *TheModule;
+    const MCAsmInfo* TAsm;
+    MCContext *TCtx;
+    const TargetData* TD;
+    
+    std::map<const ConstantFP *, unsigned> FPConstantMap;
+    std::set<Function*> intrinsicPrototypesAlreadyGenerated;
+    std::set<const Argument*> ByValParams;
+    unsigned FPCounter;
+    unsigned OpaqueCounter;
+    DenseMap<const Value*, unsigned> AnonValueNumbers;
+    unsigned NextAnonValueNumber;
+
+    /// UnnamedStructIDs - This contains a unique ID for each struct that is
+    /// either anonymous or has no name.
+    DenseMap<const StructType*, unsigned> UnnamedStructIDs;
+    
+  public:
+    static char ID;
+    explicit CWriter(formatted_raw_ostream &o)
+      : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
+        TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
+        NextAnonValueNumber(0) {
+      initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+      FPCounter = 0;
+    }
+
+    virtual const char *getPassName() const { return "C backend"; }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.setPreservesAll();
+    }
+
+    virtual bool doInitialization(Module &M);
+
+    bool runOnFunction(Function &F) {
+     // Do not codegen any 'available_externally' functions at all, they have
+     // definitions outside the translation unit.
+     if (F.hasAvailableExternallyLinkage())
+       return false;
+
+      LI = &getAnalysis<LoopInfo>();
+
+      // Get rid of intrinsics we can't handle.
+      lowerIntrinsics(F);
+
+      // Output all floating point constants that cannot be printed accurately.
+      printFloatingPointConstants(F);
+
+      printFunction(F);
+      return false;
+    }
+
+    virtual bool doFinalization(Module &M) {
+      // Free memory...
+      delete IL;
+      delete TD;
+      delete Mang;
+      delete TCtx;
+      delete TAsm;
+      FPConstantMap.clear();
+      ByValParams.clear();
+      intrinsicPrototypesAlreadyGenerated.clear();
+      UnnamedStructIDs.clear();
+      return false;
+    }
+
+    raw_ostream &printType(raw_ostream &Out, const Type *Ty,
+                           bool isSigned = false,
+                           const std::string &VariableName = "",
+                           bool IgnoreName = false,
+                           const AttrListPtr &PAL = AttrListPtr());
+    raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+                                 bool isSigned,
+                                 const std::string &NameSoFar = "");
+
+    void printStructReturnPointerFunctionType(raw_ostream &Out,
+                                              const AttrListPtr &PAL,
+                                              const PointerType *Ty);
+
+    std::string getStructName(const StructType *ST);
+    
+    /// writeOperandDeref - Print the result of dereferencing the specified
+    /// operand with '*'.  This is equivalent to printing '*' then using
+    /// writeOperand, but avoids excess syntax in some cases.
+    void writeOperandDeref(Value *Operand) {
+      if (isAddressExposed(Operand)) {
+        // Already something with an address exposed.
+        writeOperandInternal(Operand);
+      } else {
+        Out << "*(";
+        writeOperand(Operand);
+        Out << ")";
+      }
+    }
+
+    void writeOperand(Value *Operand, bool Static = false);
+    void writeInstComputationInline(Instruction &I);
+    void writeOperandInternal(Value *Operand, bool Static = false);
+    void writeOperandWithCast(Value* Operand, unsigned Opcode);
+    void writeOperandWithCast(Value* Operand, const ICmpInst &I);
+    bool writeInstructionCast(const Instruction &I);
+
+    void writeMemoryAccess(Value *Operand, const Type *OperandType,
+                           bool IsVolatile, unsigned Alignment);
+
+  private :
+    std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
+
+    void lowerIntrinsics(Function &F);
+    /// Prints the definition of the intrinsic function F. Supports the 
+    /// intrinsics which need to be explicitly defined in the CBackend.
+    void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
+
+    void printModuleTypes();
+    void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &);
+    void printFloatingPointConstants(Function &F);
+    void printFloatingPointConstants(const Constant *C);
+    void printFunctionSignature(const Function *F, bool Prototype);
+
+    void printFunction(Function &);
+    void printBasicBlock(BasicBlock *BB);
+    void printLoop(Loop *L);
+
+    void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+    void printConstant(Constant *CPV, bool Static);
+    void printConstantWithCast(Constant *CPV, unsigned Opcode);
+    bool printConstExprCast(const ConstantExpr *CE, bool Static);
+    void printConstantArray(ConstantArray *CPA, bool Static);
+    void printConstantVector(ConstantVector *CV, bool Static);
+
+    /// isAddressExposed - Return true if the specified value's name needs to
+    /// have its address taken in order to get a C value of the correct type.
+    /// This happens for global variables, byval parameters, and direct allocas.
+    bool isAddressExposed(const Value *V) const {
+      if (const Argument *A = dyn_cast<Argument>(V))
+        return ByValParams.count(A);
+      return isa<GlobalVariable>(V) || isDirectAlloca(V);
+    }
+
+    // isInlinableInst - Attempt to inline instructions into their uses to build
+    // trees as much as possible.  To do this, we have to consistently decide
+    // what is acceptable to inline, so that variable declarations don't get
+    // printed and an extra copy of the expr is not emitted.
+    //
+    static bool isInlinableInst(const Instruction &I) {
+      // Always inline cmp instructions, even if they are shared by multiple
+      // expressions.  GCC generates horrible code if we don't.
+      if (isa<CmpInst>(I))
+        return true;
+
+      // Must be an expression, must be used exactly once.  If it is dead, we
+      // emit it inline where it would go.
+      if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
+          isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
+          isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
+          isa<InsertValueInst>(I))
+        // Don't inline a load across a store or other bad things!
+        return false;
+
+      // Must not be used in inline asm, extractelement, or shufflevector.
+      if (I.hasOneUse()) {
+        const Instruction &User = cast<Instruction>(*I.use_back());
+        if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
+            isa<ShuffleVectorInst>(User))
+          return false;
+      }
+
+      // Only inline instruction it if it's use is in the same BB as the inst.
+      return I.getParent() == cast<Instruction>(I.use_back())->getParent();
+    }
+
+    // isDirectAlloca - Define fixed sized allocas in the entry block as direct
+    // variables which are accessed with the & operator.  This causes GCC to
+    // generate significantly better code than to emit alloca calls directly.
+    //
+    static const AllocaInst *isDirectAlloca(const Value *V) {
+      const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+      if (!AI) return 0;
+      if (AI->isArrayAllocation())
+        return 0;   // FIXME: we can also inline fixed size array allocas!
+      if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
+        return 0;
+      return AI;
+    }
+
+    // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
+    static bool isInlineAsm(const Instruction& I) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        return isa<InlineAsm>(CI->getCalledValue());
+      return false;
+    }
+
+    // Instruction visitation functions
+    friend class InstVisitor<CWriter>;
+
+    void visitReturnInst(ReturnInst &I);
+    void visitBranchInst(BranchInst &I);
+    void visitSwitchInst(SwitchInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
+    void visitInvokeInst(InvokeInst &I) {
+      llvm_unreachable("Lowerinvoke pass didn't work!");
+    }
+
+    void visitUnwindInst(UnwindInst &I) {
+      llvm_unreachable("Lowerinvoke pass didn't work!");
+    }
+    void visitUnreachableInst(UnreachableInst &I);
+
+    void visitPHINode(PHINode &I);
+    void visitBinaryOperator(Instruction &I);
+    void visitICmpInst(ICmpInst &I);
+    void visitFCmpInst(FCmpInst &I);
+
+    void visitCastInst (CastInst &I);
+    void visitSelectInst(SelectInst &I);
+    void visitCallInst (CallInst &I);
+    void visitInlineAsm(CallInst &I);
+    bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
+
+    void visitAllocaInst(AllocaInst &I);
+    void visitLoadInst  (LoadInst   &I);
+    void visitStoreInst (StoreInst  &I);
+    void visitGetElementPtrInst(GetElementPtrInst &I);
+    void visitVAArgInst (VAArgInst &I);
+
+    void visitInsertElementInst(InsertElementInst &I);
+    void visitExtractElementInst(ExtractElementInst &I);
+    void visitShuffleVectorInst(ShuffleVectorInst &SVI);
+
+    void visitInsertValueInst(InsertValueInst &I);
+    void visitExtractValueInst(ExtractValueInst &I);
+
+    void visitInstruction(Instruction &I) {
+#ifndef NDEBUG
+      errs() << "C Writer does not know about " << I;
+#endif
+      llvm_unreachable(0);
+    }
+
+    void outputLValue(Instruction *I) {
+      Out << "  " << GetValueName(I) << " = ";
+    }
+
+    bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
+    void printPHICopiesForSuccessor(BasicBlock *CurBlock,
+                                    BasicBlock *Successor, unsigned Indent);
+    void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
+                            unsigned Indent);
+    void printGEPExpression(Value *Ptr, gep_type_iterator I,
+                            gep_type_iterator E, bool Static);
+
+    std::string GetValueName(const Value *Operand);
+  };
+}
+
+char CWriter::ID = 0;
+
+
+
+static std::string CBEMangle(const std::string &S) {
+  std::string Result;
+
+  for (unsigned i = 0, e = S.size(); i != e; ++i)
+    if (isalnum(S[i]) || S[i] == '_') {
+      Result += S[i];
+    } else {
+      Result += '_';
+      Result += 'A'+(S[i]&15);
+      Result += 'A'+((S[i]>>4)&15);
+      Result += '_';
+    }
+  return Result;
+}
+
+std::string CWriter::getStructName(const StructType *ST) {
+  if (!ST->isAnonymous() && !ST->getName().empty())
+    return CBEMangle("l_"+ST->getName().str());
+  
+  return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
+}
+
+
+/// printStructReturnPointerFunctionType - This is like printType for a struct
+/// return type, except, instead of printing the type as void (*)(Struct*, ...)
+/// print it as "Struct (*)(...)", for struct return functions.
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
+                                                   const AttrListPtr &PAL,
+                                                   const PointerType *TheTy) {
+  const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+  std::string tstr;
+  raw_string_ostream FunctionInnards(tstr);
+  FunctionInnards << " (*) (";
+  bool PrintedType = false;
+
+  FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
+  const Type *RetTy = cast<PointerType>(*I)->getElementType();
+  unsigned Idx = 1;
+  for (++I, ++Idx; I != E; ++I, ++Idx) {
+    if (PrintedType)
+      FunctionInnards << ", ";
+    const Type *ArgTy = *I;
+    if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+      assert(ArgTy->isPointerTy());
+      ArgTy = cast<PointerType>(ArgTy)->getElementType();
+    }
+    printType(FunctionInnards, ArgTy,
+        /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+    PrintedType = true;
+  }
+  if (FTy->isVarArg()) {
+    if (!PrintedType)
+      FunctionInnards << " int"; //dummy argument for empty vararg functs
+    FunctionInnards << ", ...";
+  } else if (!PrintedType) {
+    FunctionInnards << "void";
+  }
+  FunctionInnards << ')';
+  printType(Out, RetTy,
+      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+}
+
+raw_ostream &
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+                         const std::string &NameSoFar) {
+  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
+         "Invalid type for printSimpleType");
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:   return Out << "void " << NameSoFar;
+  case Type::IntegerTyID: {
+    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+    if (NumBits == 1)
+      return Out << "bool " << NameSoFar;
+    else if (NumBits <= 8)
+      return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+    else if (NumBits <= 16)
+      return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+    else if (NumBits <= 32)
+      return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+    else if (NumBits <= 64)
+      return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+    else {
+      assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+      return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+    }
+  }
+  case Type::FloatTyID:  return Out << "float "   << NameSoFar;
+  case Type::DoubleTyID: return Out << "double "  << NameSoFar;
+  // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+  // present matches host 'long double'.
+  case Type::X86_FP80TyID:
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:  return Out << "long double " << NameSoFar;
+
+  case Type::X86_MMXTyID:
+    return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
+                     " __attribute__((vector_size(64))) " + NameSoFar);
+
+  case Type::VectorTyID: {
+    const VectorType *VTy = cast<VectorType>(Ty);
+    return printSimpleType(Out, VTy->getElementType(), isSigned,
+                     " __attribute__((vector_size(" +
+                     utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+  }
+
+  default:
+#ifndef NDEBUG
+    errs() << "Unknown primitive type: " << *Ty << "\n";
+#endif
+    llvm_unreachable(0);
+  }
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
+                                bool isSigned, const std::string &NameSoFar,
+                                bool IgnoreName, const AttrListPtr &PAL) {
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
+    printSimpleType(Out, Ty, isSigned, NameSoFar);
+    return Out;
+  }
+
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID: {
+    const FunctionType *FTy = cast<FunctionType>(Ty);
+    std::string tstr;
+    raw_string_ostream FunctionInnards(tstr);
+    FunctionInnards << " (" << NameSoFar << ") (";
+    unsigned Idx = 1;
+    for (FunctionType::param_iterator I = FTy->param_begin(),
+           E = FTy->param_end(); I != E; ++I) {
+      const Type *ArgTy = *I;
+      if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+        assert(ArgTy->isPointerTy());
+        ArgTy = cast<PointerType>(ArgTy)->getElementType();
+      }
+      if (I != FTy->param_begin())
+        FunctionInnards << ", ";
+      printType(FunctionInnards, ArgTy,
+        /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+      ++Idx;
+    }
+    if (FTy->isVarArg()) {
+      if (!FTy->getNumParams())
+        FunctionInnards << " int"; //dummy argument for empty vaarg functs
+      FunctionInnards << ", ...";
+    } else if (!FTy->getNumParams()) {
+      FunctionInnards << "void";
+    }
+    FunctionInnards << ')';
+    printType(Out, FTy->getReturnType(),
+      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+    return Out;
+  }
+  case Type::StructTyID: {
+    const StructType *STy = cast<StructType>(Ty);
+    
+    // Check to see if the type is named.
+    if (!IgnoreName)
+      return Out << getStructName(STy) << ' ' << NameSoFar;
+    
+    Out << NameSoFar + " {\n";
+    unsigned Idx = 0;
+    for (StructType::element_iterator I = STy->element_begin(),
+           E = STy->element_end(); I != E; ++I) {
+      Out << "  ";
+      printType(Out, *I, false, "field" + utostr(Idx++));
+      Out << ";\n";
+    }
+    Out << '}';
+    if (STy->isPacked())
+      Out << " __attribute__ ((packed))";
+    return Out;
+  }
+
+  case Type::PointerTyID: {
+    const PointerType *PTy = cast<PointerType>(Ty);
+    std::string ptrName = "*" + NameSoFar;
+
+    if (PTy->getElementType()->isArrayTy() ||
+        PTy->getElementType()->isVectorTy())
+      ptrName = "(" + ptrName + ")";
+
+    if (!PAL.isEmpty())
+      // Must be a function ptr cast!
+      return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+    return printType(Out, PTy->getElementType(), false, ptrName);
+  }
+
+  case Type::ArrayTyID: {
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    unsigned NumElements = ATy->getNumElements();
+    if (NumElements == 0) NumElements = 1;
+    // Arrays are wrapped in structs to allow them to have normal
+    // value semantics (avoiding the array "decay").
+    Out << NameSoFar << " { ";
+    printType(Out, ATy->getElementType(), false,
+              "array[" + utostr(NumElements) + "]");
+    return Out << "; }";
+  }
+
+  default:
+    llvm_unreachable("Unhandled case in getTypeProps!");
+  }
+
+  return Out;
+}
+
+void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
+
+  // As a special case, print the array as a string if it is an array of
+  // ubytes or an array of sbytes with positive values.
+  //
+  const Type *ETy = CPA->getType()->getElementType();
+  bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
+                   ETy == Type::getInt8Ty(CPA->getContext()));
+
+  // Make sure the last character is a null char, as automatically added by C
+  if (isString && (CPA->getNumOperands() == 0 ||
+                   !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
+    isString = false;
+
+  if (isString) {
+    Out << '\"';
+    // Keep track of whether the last number was a hexadecimal escape.
+    bool LastWasHex = false;
+
+    // Do not include the last character, which we know is null
+    for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
+      unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
+
+      // Print it out literally if it is a printable character.  The only thing
+      // to be careful about is when the last letter output was a hex escape
+      // code, in which case we have to be careful not to print out hex digits
+      // explicitly (the C compiler thinks it is a continuation of the previous
+      // character, sheesh...)
+      //
+      if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
+        LastWasHex = false;
+        if (C == '"' || C == '\\')
+          Out << "\\" << (char)C;
+        else
+          Out << (char)C;
+      } else {
+        LastWasHex = false;
+        switch (C) {
+        case '\n': Out << "\\n"; break;
+        case '\t': Out << "\\t"; break;
+        case '\r': Out << "\\r"; break;
+        case '\v': Out << "\\v"; break;
+        case '\a': Out << "\\a"; break;
+        case '\"': Out << "\\\""; break;
+        case '\'': Out << "\\\'"; break;
+        default:
+          Out << "\\x";
+          Out << (char)(( C/16  < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+          Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+          LastWasHex = true;
+          break;
+        }
+      }
+    }
+    Out << '\"';
+  } else {
+    Out << '{';
+    if (CPA->getNumOperands()) {
+      Out << ' ';
+      printConstant(cast<Constant>(CPA->getOperand(0)), Static);
+      for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
+        Out << ", ";
+        printConstant(cast<Constant>(CPA->getOperand(i)), Static);
+      }
+    }
+    Out << " }";
+  }
+}
+
+void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
+  Out << '{';
+  if (CP->getNumOperands()) {
+    Out << ' ';
+    printConstant(cast<Constant>(CP->getOperand(0)), Static);
+    for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+      Out << ", ";
+      printConstant(cast<Constant>(CP->getOperand(i)), Static);
+    }
+  }
+  Out << " }";
+}
+
+// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
+// textually as a double (rather than as a reference to a stack-allocated
+// variable). We decide this by converting CFP to a string and back into a
+// double, and then checking whether the conversion results in a bit-equal
+// double to the original value of CFP. This depends on us and the target C
+// compiler agreeing on the conversion process (which is pretty likely since we
+// only deal in IEEE FP).
+//
+static bool isFPCSafeToPrint(const ConstantFP *CFP) {
+  bool ignored;
+  // Do long doubles in hex for now.
+  if (CFP->getType() != Type::getFloatTy(CFP->getContext()) &&
+      CFP->getType() != Type::getDoubleTy(CFP->getContext()))
+    return false;
+  APFloat APF = APFloat(CFP->getValueAPF());  // copy
+  if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+  char Buffer[100];
+  sprintf(Buffer, "%a", APF.convertToDouble());
+  if (!strncmp(Buffer, "0x", 2) ||
+      !strncmp(Buffer, "-0x", 3) ||
+      !strncmp(Buffer, "+0x", 3))
+    return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
+  return false;
+#else
+  std::string StrVal = ftostr(APF);
+
+  while (StrVal[0] == ' ')
+    StrVal.erase(StrVal.begin());
+
+  // Check to make sure that the stringized number is not some string like "Inf"
+  // or NaN.  Check that the string matches the "[-+]?[0-9]" regex.
+  if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+      ((StrVal[0] == '-' || StrVal[0] == '+') &&
+       (StrVal[1] >= '0' && StrVal[1] <= '9')))
+    // Reparse stringized version!
+    return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
+  return false;
+#endif
+}
+
+/// Print out the casting for a cast operation. This does the double casting
+/// necessary for conversion to the destination type, if necessary.
+/// @brief Print a cast
+void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+  // Print the destination type cast
+  switch (opc) {
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::IntToPtr:
+    case Instruction::Trunc:
+    case Instruction::BitCast:
+    case Instruction::FPExt:
+    case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
+      Out << '(';
+      printType(Out, DstTy);
+      Out << ')';
+      break;
+    case Instruction::ZExt:
+    case Instruction::PtrToInt:
+    case Instruction::FPToUI: // For these, make sure we get an unsigned dest
+      Out << '(';
+      printSimpleType(Out, DstTy, false);
+      Out << ')';
+      break;
+    case Instruction::SExt:
+    case Instruction::FPToSI: // For these, make sure we get a signed dest
+      Out << '(';
+      printSimpleType(Out, DstTy, true);
+      Out << ')';
+      break;
+    default:
+      llvm_unreachable("Invalid cast opcode");
+  }
+
+  // Print the source type cast
+  switch (opc) {
+    case Instruction::UIToFP:
+    case Instruction::ZExt:
+      Out << '(';
+      printSimpleType(Out, SrcTy, false);
+      Out << ')';
+      break;
+    case Instruction::SIToFP:
+    case Instruction::SExt:
+      Out << '(';
+      printSimpleType(Out, SrcTy, true);
+      Out << ')';
+      break;
+    case Instruction::IntToPtr:
+    case Instruction::PtrToInt:
+      // Avoid "cast to pointer from integer of different size" warnings
+      Out << "(unsigned long)";
+      break;
+    case Instruction::Trunc:
+    case Instruction::BitCast:
+    case Instruction::FPExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPToSI:
+    case Instruction::FPToUI:
+      break; // These don't need a source cast.
+    default:
+      llvm_unreachable("Invalid cast opcode");
+      break;
+  }
+}
+
+// printConstant - The LLVM Constant to C Constant converter.
+void CWriter::printConstant(Constant *CPV, bool Static) {
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+    switch (CE->getOpcode()) {
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+      Out << "(";
+      printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+      if (CE->getOpcode() == Instruction::SExt &&
+          CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
+        // Make sure we really sext from bool here by subtracting from 0
+        Out << "0-";
+      }
+      printConstant(CE->getOperand(0), Static);
+      if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
+          (CE->getOpcode() == Instruction::Trunc ||
+           CE->getOpcode() == Instruction::FPToUI ||
+           CE->getOpcode() == Instruction::FPToSI ||
+           CE->getOpcode() == Instruction::PtrToInt)) {
+        // Make sure we really truncate to bool here by anding with 1
+        Out << "&1u";
+      }
+      Out << ')';
+      return;
+
+    case Instruction::GetElementPtr:
+      Out << "(";
+      printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
+                         gep_type_end(CPV), Static);
+      Out << ")";
+      return;
+    case Instruction::Select:
+      Out << '(';
+      printConstant(CE->getOperand(0), Static);
+      Out << '?';
+      printConstant(CE->getOperand(1), Static);
+      Out << ':';
+      printConstant(CE->getOperand(2), Static);
+      Out << ')';
+      return;
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::ICmp:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    {
+      Out << '(';
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
+      printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+      switch (CE->getOpcode()) {
+      case Instruction::Add:
+      case Instruction::FAdd: Out << " + "; break;
+      case Instruction::Sub:
+      case Instruction::FSub: Out << " - "; break;
+      case Instruction::Mul:
+      case Instruction::FMul: Out << " * "; break;
+      case Instruction::URem:
+      case Instruction::SRem:
+      case Instruction::FRem: Out << " % "; break;
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::FDiv: Out << " / "; break;
+      case Instruction::And: Out << " & "; break;
+      case Instruction::Or:  Out << " | "; break;
+      case Instruction::Xor: Out << " ^ "; break;
+      case Instruction::Shl: Out << " << "; break;
+      case Instruction::LShr:
+      case Instruction::AShr: Out << " >> "; break;
+      case Instruction::ICmp:
+        switch (CE->getPredicate()) {
+          case ICmpInst::ICMP_EQ: Out << " == "; break;
+          case ICmpInst::ICMP_NE: Out << " != "; break;
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_ULT: Out << " < "; break;
+          case ICmpInst::ICMP_SLE:
+          case ICmpInst::ICMP_ULE: Out << " <= "; break;
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_UGT: Out << " > "; break;
+          case ICmpInst::ICMP_SGE:
+          case ICmpInst::ICMP_UGE: Out << " >= "; break;
+          default: llvm_unreachable("Illegal ICmp predicate");
+        }
+        break;
+      default: llvm_unreachable("Illegal opcode here!");
+      }
+      printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+      if (NeedsClosingParens)
+        Out << "))";
+      Out << ')';
+      return;
+    }
+    case Instruction::FCmp: {
+      Out << '(';
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
+      if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+        Out << "0";
+      else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+        Out << "1";
+      else {
+        const char* op = 0;
+        switch (CE->getPredicate()) {
+        default: llvm_unreachable("Illegal FCmp predicate");
+        case FCmpInst::FCMP_ORD: op = "ord"; break;
+        case FCmpInst::FCMP_UNO: op = "uno"; break;
+        case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+        case FCmpInst::FCMP_UNE: op = "une"; break;
+        case FCmpInst::FCMP_ULT: op = "ult"; break;
+        case FCmpInst::FCMP_ULE: op = "ule"; break;
+        case FCmpInst::FCMP_UGT: op = "ugt"; break;
+        case FCmpInst::FCMP_UGE: op = "uge"; break;
+        case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+        case FCmpInst::FCMP_ONE: op = "one"; break;
+        case FCmpInst::FCMP_OLT: op = "olt"; break;
+        case FCmpInst::FCMP_OLE: op = "ole"; break;
+        case FCmpInst::FCMP_OGT: op = "ogt"; break;
+        case FCmpInst::FCMP_OGE: op = "oge"; break;
+        }
+        Out << "llvm_fcmp_" << op << "(";
+        printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+        Out << ", ";
+        printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+        Out << ")";
+      }
+      if (NeedsClosingParens)
+        Out << "))";
+      Out << ')';
+      return;
+    }
+    default:
+#ifndef NDEBUG
+      errs() << "CWriter Error: Unhandled constant expression: "
+           << *CE << "\n";
+#endif
+      llvm_unreachable(0);
+    }
+  } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
+    Out << "((";
+    printType(Out, CPV->getType()); // sign doesn't matter
+    Out << ")/*UNDEF*/";
+    if (!CPV->getType()->isVectorTy()) {
+      Out << "0)";
+    } else {
+      Out << "{})";
+    }
+    return;
+  }
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+    const Type* Ty = CI->getType();
+    if (Ty == Type::getInt1Ty(CPV->getContext()))
+      Out << (CI->getZExtValue() ? '1' : '0');
+    else if (Ty == Type::getInt32Ty(CPV->getContext()))
+      Out << CI->getZExtValue() << 'u';
+    else if (Ty->getPrimitiveSizeInBits() > 32)
+      Out << CI->getZExtValue() << "ull";
+    else {
+      Out << "((";
+      printSimpleType(Out, Ty, false) << ')';
+      if (CI->isMinValue(true))
+        Out << CI->getZExtValue() << 'u';
+      else
+        Out << CI->getSExtValue();
+      Out << ')';
+    }
+    return;
+  }
+
+  switch (CPV->getType()->getTypeID()) {
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID: {
+    ConstantFP *FPC = cast<ConstantFP>(CPV);
+    std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+    if (I != FPConstantMap.end()) {
+      // Because of FP precision problems we must load from a stack allocated
+      // value that holds the value in hex.
+      Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
+                       "float" :
+                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
+                       "double" :
+                       "long double")
+          << "*)&FPConstant" << I->second << ')';
+    } else {
+      double V;
+      if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
+        V = FPC->getValueAPF().convertToFloat();
+      else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
+        V = FPC->getValueAPF().convertToDouble();
+      else {
+        // Long double.  Convert the number to double, discarding precision.
+        // This is not awesome, but it at least makes the CBE output somewhat
+        // useful.
+        APFloat Tmp = FPC->getValueAPF();
+        bool LosesInfo;
+        Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
+        V = Tmp.convertToDouble();
+      }
+
+      if (IsNAN(V)) {
+        // The value is NaN
+
+        // FIXME the actual NaN bits should be emitted.
+        // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+        // it's 0x7ff4.
+        const unsigned long QuietNaN = 0x7ff8UL;
+        //const unsigned long SignalNaN = 0x7ff4UL;
+
+        // We need to grab the first part of the FP #
+        char Buffer[100];
+
+        uint64_t ll = DoubleToBits(V);
+        sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+        std::string Num(&Buffer[0], &Buffer[6]);
+        unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+        if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
+          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+              << Buffer << "\") /*nan*/ ";
+        else
+          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+              << Buffer << "\") /*nan*/ ";
+      } else if (IsInf(V)) {
+        // The value is Inf
+        if (V < 0) Out << '-';
+        Out << "LLVM_INF" <<
+            (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
+            << " /*inf*/ ";
+      } else {
+        std::string Num;
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+        // Print out the constant as a floating point number.
+        char Buffer[100];
+        sprintf(Buffer, "%a", V);
+        Num = Buffer;
+#else
+        Num = ftostr(FPC->getValueAPF());
+#endif
+       Out << Num;
+      }
+    }
+    break;
+  }
+
+  case Type::ArrayTyID:
+    // Use C99 compound expression literal initializer syntax.
+    if (!Static) {
+      Out << "(";
+      printType(Out, CPV->getType());
+      Out << ")";
+    }
+    Out << "{ "; // Arrays are wrapped in struct types.
+    if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
+      printConstantArray(CA, Static);
+    } else {
+      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+      const ArrayType *AT = cast<ArrayType>(CPV->getType());
+      Out << '{';
+      if (AT->getNumElements()) {
+        Out << ' ';
+        Constant *CZ = Constant::getNullValue(AT->getElementType());
+        printConstant(CZ, Static);
+        for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+          Out << ", ";
+          printConstant(CZ, Static);
+        }
+      }
+      Out << " }";
+    }
+    Out << " }"; // Arrays are wrapped in struct types.
+    break;
+
+  case Type::VectorTyID:
+    // Use C99 compound expression literal initializer syntax.
+    if (!Static) {
+      Out << "(";
+      printType(Out, CPV->getType());
+      Out << ")";
+    }
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
+      printConstantVector(CV, Static);
+    } else {
+      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+      const VectorType *VT = cast<VectorType>(CPV->getType());
+      Out << "{ ";
+      Constant *CZ = Constant::getNullValue(VT->getElementType());
+      printConstant(CZ, Static);
+      for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
+        Out << ", ";
+        printConstant(CZ, Static);
+      }
+      Out << " }";
+    }
+    break;
+
+  case Type::StructTyID:
+    // Use C99 compound expression literal initializer syntax.
+    if (!Static) {
+      Out << "(";
+      printType(Out, CPV->getType());
+      Out << ")";
+    }
+    if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+      const StructType *ST = cast<StructType>(CPV->getType());
+      Out << '{';
+      if (ST->getNumElements()) {
+        Out << ' ';
+        printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
+        for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
+          Out << ", ";
+          printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
+        }
+      }
+      Out << " }";
+    } else {
+      Out << '{';
+      if (CPV->getNumOperands()) {
+        Out << ' ';
+        printConstant(cast<Constant>(CPV->getOperand(0)), Static);
+        for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
+          Out << ", ";
+          printConstant(cast<Constant>(CPV->getOperand(i)), Static);
+        }
+      }
+      Out << " }";
+    }
+    break;
+
+  case Type::PointerTyID:
+    if (isa<ConstantPointerNull>(CPV)) {
+      Out << "((";
+      printType(Out, CPV->getType()); // sign doesn't matter
+      Out << ")/*NULL*/0)";
+      break;
+    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+      writeOperand(GV, Static);
+      break;
+    }
+    // FALL THROUGH
+  default:
+#ifndef NDEBUG
+    errs() << "Unknown constant type: " << *CPV << "\n";
+#endif
+    llvm_unreachable(0);
+  }
+}
+
+// Some constant expressions need to be casted back to the original types
+// because their operands were casted to the expected type. This function takes
+// care of detecting that case and printing the cast for the ConstantExpr.
+bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
+  bool NeedsExplicitCast = false;
+  const Type *Ty = CE->getOperand(0)->getType();
+  bool TypeIsSigned = false;
+  switch (CE->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // We need to cast integer arithmetic so that it is always performed
+    // as unsigned, to avoid undefined behavior on overflow.
+  case Instruction::LShr:
+  case Instruction::URem:
+  case Instruction::UDiv: NeedsExplicitCast = true; break;
+  case Instruction::AShr:
+  case Instruction::SRem:
+  case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+  case Instruction::SExt:
+    Ty = CE->getType();
+    NeedsExplicitCast = true;
+    TypeIsSigned = true;
+    break;
+  case Instruction::ZExt:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    Ty = CE->getType();
+    NeedsExplicitCast = true;
+    break;
+  default: break;
+  }
+  if (NeedsExplicitCast) {
+    Out << "((";
+    if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
+      printSimpleType(Out, Ty, TypeIsSigned);
+    else
+      printType(Out, Ty); // not integer, sign doesn't matter
+    Out << ")(";
+  }
+  return NeedsExplicitCast;
+}
+
+//  Print a constant assuming that it is the operand for a given Opcode. The
+//  opcodes that care about sign need to cast their operands to the expected
+//  type before the operation proceeds. This function does the casting.
+void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
+
+  // Extract the operand's type, we'll need it.
+  const Type* OpTy = CPV->getType();
+
+  // Indicate whether to do the cast or not.
+  bool shouldCast = false;
+  bool typeIsSigned = false;
+
+  // Based on the Opcode for which this Constant is being written, determine
+  // the new type to which the operand should be casted by setting the value
+  // of OpTy. If we change OpTy, also set shouldCast to true so it gets
+  // casted below.
+  switch (Opcode) {
+    default:
+      // for most instructions, it doesn't matter
+      break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+      // We need to cast integer arithmetic so that it is always performed
+      // as unsigned, to avoid undefined behavior on overflow.
+    case Instruction::LShr:
+    case Instruction::UDiv:
+    case Instruction::URem:
+      shouldCast = true;
+      break;
+    case Instruction::AShr:
+    case Instruction::SDiv:
+    case Instruction::SRem:
+      shouldCast = true;
+      typeIsSigned = true;
+      break;
+  }
+
+  // Write out the casted constant if we should, otherwise just write the
+  // operand.
+  if (shouldCast) {
+    Out << "((";
+    printSimpleType(Out, OpTy, typeIsSigned);
+    Out << ")";
+    printConstant(CPV, false);
+    Out << ")";
+  } else
+    printConstant(CPV, false);
+}
+
+std::string CWriter::GetValueName(const Value *Operand) {
+
+  // Resolve potential alias.
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+    if (const Value *V = GA->resolveAliasedGlobal(false))
+      Operand = V;
+  }
+
+  // Mangle globals with the standard mangler interface for LLC compatibility.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
+    SmallString<128> Str;
+    Mang->getNameWithPrefix(Str, GV, false);
+    return CBEMangle(Str.str().str());
+  }
+
+  std::string Name = Operand->getName();
+
+  if (Name.empty()) { // Assign unique names to local temporaries.
+    unsigned &No = AnonValueNumbers[Operand];
+    if (No == 0)
+      No = ++NextAnonValueNumber;
+    Name = "tmp__" + utostr(No);
+  }
+
+  std::string VarName;
+  VarName.reserve(Name.capacity());
+
+  for (std::string::iterator I = Name.begin(), E = Name.end();
+       I != E; ++I) {
+    char ch = *I;
+
+    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+          (ch >= '0' && ch <= '9') || ch == '_')) {
+      char buffer[5];
+      sprintf(buffer, "_%x_", ch);
+      VarName += buffer;
+    } else
+      VarName += ch;
+  }
+
+  return "llvm_cbe_" + VarName;
+}
+
+/// writeInstComputationInline - Emit the computation for the specified
+/// instruction inline, with no destination provided.
+void CWriter::writeInstComputationInline(Instruction &I) {
+  // We can't currently support integer types other than 1, 8, 16, 32, 64.
+  // Validate this.
+  const Type *Ty = I.getType();
+  if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+        Ty!=Type::getInt8Ty(I.getContext()) &&
+        Ty!=Type::getInt16Ty(I.getContext()) &&
+        Ty!=Type::getInt32Ty(I.getContext()) &&
+        Ty!=Type::getInt64Ty(I.getContext()))) {
+      report_fatal_error("The C backend does not currently support integer "
+                        "types of widths other than 1, 8, 16, 32, 64.\n"
+                        "This is being tracked as PR 4158.");
+  }
+
+  // If this is a non-trivial bool computation, make sure to truncate down to
+  // a 1 bit value.  This is important because we want "add i1 x, y" to return
+  // "0" when x and y are true, not "2" for example.
+  bool NeedBoolTrunc = false;
+  if (I.getType() == Type::getInt1Ty(I.getContext()) &&
+      !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
+    NeedBoolTrunc = true;
+
+  if (NeedBoolTrunc)
+    Out << "((";
+
+  visit(I);
+
+  if (NeedBoolTrunc)
+    Out << ")&1)";
+}
+
+
+void CWriter::writeOperandInternal(Value *Operand, bool Static) {
+  if (Instruction *I = dyn_cast<Instruction>(Operand))
+    // Should we inline this instruction to build a tree?
+    if (isInlinableInst(*I) && !isDirectAlloca(I)) {
+      Out << '(';
+      writeInstComputationInline(*I);
+      Out << ')';
+      return;
+    }
+
+  Constant* CPV = dyn_cast<Constant>(Operand);
+
+  if (CPV && !isa<GlobalValue>(CPV))
+    printConstant(CPV, Static);
+  else
+    Out << GetValueName(Operand);
+}
+
+void CWriter::writeOperand(Value *Operand, bool Static) {
+  bool isAddressImplicit = isAddressExposed(Operand);
+  if (isAddressImplicit)
+    Out << "(&";  // Global variables are referenced as their addresses by llvm
+
+  writeOperandInternal(Operand, Static);
+
+  if (isAddressImplicit)
+    Out << ')';
+}
+
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
+// for the Instruction.
+bool CWriter::writeInstructionCast(const Instruction &I) {
+  const Type *Ty = I.getOperand(0)->getType();
+  switch (I.getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // We need to cast integer arithmetic so that it is always performed
+    // as unsigned, to avoid undefined behavior on overflow.
+  case Instruction::LShr:
+  case Instruction::URem:
+  case Instruction::UDiv:
+    Out << "((";
+    printSimpleType(Out, Ty, false);
+    Out << ")(";
+    return true;
+  case Instruction::AShr:
+  case Instruction::SRem:
+  case Instruction::SDiv:
+    Out << "((";
+    printSimpleType(Out, Ty, true);
+    Out << ")(";
+    return true;
+  default: break;
+  }
+  return false;
+}
+
+// Write the operand with a cast to another type based on the Opcode being used.
+// This will be used in cases where an instruction has specific type
+// requirements (usually signedness) for its operands.
+void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
+
+  // Extract the operand's type, we'll need it.
+  const Type* OpTy = Operand->getType();
+
+  // Indicate whether to do the cast or not.
+  bool shouldCast = false;
+
+  // Indicate whether the cast should be to a signed type or not.
+  bool castIsSigned = false;
+
+  // Based on the Opcode for which this Operand is being written, determine
+  // the new type to which the operand should be casted by setting the value
+  // of OpTy. If we change OpTy, also set shouldCast to true.
+  switch (Opcode) {
+    default:
+      // for most instructions, it doesn't matter
+      break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+      // We need to cast integer arithmetic so that it is always performed
+      // as unsigned, to avoid undefined behavior on overflow.
+    case Instruction::LShr:
+    case Instruction::UDiv:
+    case Instruction::URem: // Cast to unsigned first
+      shouldCast = true;
+      castIsSigned = false;
+      break;
+    case Instruction::GetElementPtr:
+    case Instruction::AShr:
+    case Instruction::SDiv:
+    case Instruction::SRem: // Cast to signed first
+      shouldCast = true;
+      castIsSigned = true;
+      break;
+  }
+
+  // Write out the casted operand if we should, otherwise just write the
+  // operand.
+  if (shouldCast) {
+    Out << "((";
+    printSimpleType(Out, OpTy, castIsSigned);
+    Out << ")";
+    writeOperand(Operand);
+    Out << ")";
+  } else
+    writeOperand(Operand);
+}
+
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
+void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
+  // This has to do a cast to ensure the operand has the right signedness.
+  // Also, if the operand is a pointer, we make sure to cast to an integer when
+  // doing the comparison both for signedness and so that the C compiler doesn't
+  // optimize things like "p < NULL" to false (p may contain an integer value
+  // f.e.).
+  bool shouldCast = Cmp.isRelational();
+
+  // Write out the casted operand if we should, otherwise just write the
+  // operand.
+  if (!shouldCast) {
+    writeOperand(Operand);
+    return;
+  }
+
+  // Should this be a signed comparison?  If so, convert to signed.
+  bool castIsSigned = Cmp.isSigned();
+
+  // If the operand was a pointer, convert to a large integer type.
+  const Type* OpTy = Operand->getType();
+  if (OpTy->isPointerTy())
+    OpTy = TD->getIntPtrType(Operand->getContext());
+
+  Out << "((";
+  printSimpleType(Out, OpTy, castIsSigned);
+  Out << ")";
+  writeOperand(Operand);
+  Out << ")";
+}
+
+// generateCompilerSpecificCode - This is where we add conditional compilation
+// directives to cater to specific compilers as need be.
+//
+static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
+                                         const TargetData *TD) {
+  // Alloca is hard to get, and we don't want to include stdlib.h here.
+  Out << "/* get a declaration for alloca */\n"
+      << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+      << "#define  alloca(x) __builtin_alloca((x))\n"
+      << "#define _alloca(x) __builtin_alloca((x))\n"
+      << "#elif defined(__APPLE__)\n"
+      << "extern void *__builtin_alloca(unsigned long);\n"
+      << "#define alloca(x) __builtin_alloca(x)\n"
+      << "#define longjmp _longjmp\n"
+      << "#define setjmp _setjmp\n"
+      << "#elif defined(__sun__)\n"
+      << "#if defined(__sparcv9)\n"
+      << "extern void *__builtin_alloca(unsigned long);\n"
+      << "#else\n"
+      << "extern void *__builtin_alloca(unsigned int);\n"
+      << "#endif\n"
+      << "#define alloca(x) __builtin_alloca(x)\n"
+      << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
+      << "#define alloca(x) __builtin_alloca(x)\n"
+      << "#elif defined(_MSC_VER)\n"
+      << "#define inline _inline\n"
+      << "#define alloca(x) _alloca(x)\n"
+      << "#else\n"
+      << "#include <alloca.h>\n"
+      << "#endif\n\n";
+
+  // We output GCC specific attributes to preserve 'linkonce'ness on globals.
+  // If we aren't being compiled with GCC, just drop these attributes.
+  Out << "#ifndef __GNUC__  /* Can only support \"linkonce\" vars with GCC */\n"
+      << "#define __attribute__(X)\n"
+      << "#endif\n\n";
+
+  // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
+  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+      << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+      << "#elif defined(__GNUC__)\n"
+      << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+      << "#else\n"
+      << "#define __EXTERNAL_WEAK__\n"
+      << "#endif\n\n";
+
+  // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
+  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+      << "#define __ATTRIBUTE_WEAK__\n"
+      << "#elif defined(__GNUC__)\n"
+      << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+      << "#else\n"
+      << "#define __ATTRIBUTE_WEAK__\n"
+      << "#endif\n\n";
+
+  // Add hidden visibility support. FIXME: APPLE_CC?
+  Out << "#if defined(__GNUC__)\n"
+      << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+      << "#endif\n\n";
+
+  // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
+  // From the GCC documentation:
+  //
+  //   double __builtin_nan (const char *str)
+  //
+  // This is an implementation of the ISO C99 function nan.
+  //
+  // Since ISO C99 defines this function in terms of strtod, which we do
+  // not implement, a description of the parsing is in order. The string is
+  // parsed as by strtol; that is, the base is recognized by leading 0 or
+  // 0x prefixes. The number parsed is placed in the significand such that
+  // the least significant bit of the number is at the least significant
+  // bit of the significand. The number is truncated to fit the significand
+  // field provided. The significand is forced to be a quiet NaN.
+  //
+  // This function, if given a string literal, is evaluated early enough
+  // that it is considered a compile-time constant.
+  //
+  //   float __builtin_nanf (const char *str)
+  //
+  // Similar to __builtin_nan, except the return type is float.
+  //
+  //   double __builtin_inf (void)
+  //
+  // Similar to __builtin_huge_val, except a warning is generated if the
+  // target floating-point format does not support infinities. This
+  // function is suitable for implementing the ISO C99 macro INFINITY.
+  //
+  //   float __builtin_inff (void)
+  //
+  // Similar to __builtin_inf, except the return type is float.
+  Out << "#ifdef __GNUC__\n"
+      << "#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */\n"
+      << "#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */\n"
+      << "#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */\n"
+      << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+      << "#define LLVM_INF           __builtin_inf()         /* Double */\n"
+      << "#define LLVM_INFF          __builtin_inff()        /* Float */\n"
+      << "#define LLVM_PREFETCH(addr,rw,locality) "
+                              "__builtin_prefetch(addr,rw,locality)\n"
+      << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+      << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+      << "#define LLVM_ASM           __asm__\n"
+      << "#else\n"
+      << "#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */\n"
+      << "#define LLVM_NANF(NanStr)  0.0F                    /* Float */\n"
+      << "#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */\n"
+      << "#define LLVM_NANSF(NanStr) 0.0F                    /* Float */\n"
+      << "#define LLVM_INF           ((double)0.0)           /* Double */\n"
+      << "#define LLVM_INFF          0.0F                    /* Float */\n"
+      << "#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */\n"
+      << "#define __ATTRIBUTE_CTOR__\n"
+      << "#define __ATTRIBUTE_DTOR__\n"
+      << "#define LLVM_ASM(X)\n"
+      << "#endif\n\n";
+
+  Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
+      << "#define __builtin_stack_save() 0   /* not implemented */\n"
+      << "#define __builtin_stack_restore(X) /* noop */\n"
+      << "#endif\n\n";
+
+  // Output typedefs for 128-bit integers. If these are needed with a
+  // 32-bit target or with a C compiler that doesn't support mode(TI),
+  // more drastic measures will be needed.
+  Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
+      << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
+      << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
+      << "#endif\n\n";
+
+  // Output target-specific code that should be inserted into main.
+  Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
+}
+
+/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
+/// the StaticTors set.
+static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
+  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!InitList) return;
+
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      Constant *FP = CS->getOperand(1);
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+        if (CE->isCast())
+          FP = CE->getOperand(0);
+      if (Function *F = dyn_cast<Function>(FP))
+        StaticTors.insert(F);
+    }
+}
+
+enum SpecialGlobalClass {
+  NotSpecial = 0,
+  GlobalCtors, GlobalDtors,
+  NotPrinted
+};
+
+/// getGlobalVariableClass - If this is a global that is specially recognized
+/// by LLVM, return a code that indicates how we should handle it.
+static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
+  // If this is a global ctors/dtors list, handle it now.
+  if (GV->hasAppendingLinkage() && GV->use_empty()) {
+    if (GV->getName() == "llvm.global_ctors")
+      return GlobalCtors;
+    else if (GV->getName() == "llvm.global_dtors")
+      return GlobalDtors;
+  }
+
+  // Otherwise, if it is other metadata, don't print it.  This catches things
+  // like debug information.
+  if (GV->getSection() == "llvm.metadata")
+    return NotPrinted;
+
+  return NotSpecial;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const char *Str, unsigned Length,
+                               raw_ostream &Out) {
+  for (unsigned i = 0; i != Length; ++i) {
+    unsigned char C = Str[i];
+    if (isprint(C) && C != '\\' && C != '"')
+      Out << C;
+    else if (C == '\\')
+      Out << "\\\\";
+    else if (C == '\"')
+      Out << "\\\"";
+    else if (C == '\t')
+      Out << "\\t";
+    else
+      Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+  }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+  PrintEscapedString(Str.c_str(), Str.size(), Out);
+}
+
+bool CWriter::doInitialization(Module &M) {
+  FunctionPass::doInitialization(M);
+
+  // Initialize
+  TheModule = &M;
+
+  TD = new TargetData(&M);
+  IL = new IntrinsicLowering(*TD);
+  IL->AddPrototypes(M);
+
+#if 0
+  std::string Triple = TheModule->getTargetTriple();
+  if (Triple.empty())
+    Triple = llvm::sys::getHostTriple();
+
+  std::string E;
+  if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+    TAsm = Match->createMCAsmInfo(Triple);
+#endif
+  TAsm = new CBEMCAsmInfo();
+  TCtx = new MCContext(*TAsm, NULL);
+  Mang = new Mangler(*TCtx, *TD);
+
+  // Keep track of which functions are static ctors/dtors so they can have
+  // an attribute added to their prototypes.
+  std::set<Function*> StaticCtors, StaticDtors;
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    switch (getGlobalVariableClass(I)) {
+    default: break;
+    case GlobalCtors:
+      FindStaticTors(I, StaticCtors);
+      break;
+    case GlobalDtors:
+      FindStaticTors(I, StaticDtors);
+      break;
+    }
+  }
+
+  // get declaration for alloca
+  Out << "/* Provide Declarations */\n";
+  Out << "#include <stdarg.h>\n";      // Varargs support
+  Out << "#include <setjmp.h>\n";      // Unwind support
+  Out << "#include <limits.h>\n";      // With overflow intrinsics support.
+  generateCompilerSpecificCode(Out, TD);
+
+  // Provide a definition for `bool' if not compiling with a C++ compiler.
+  Out << "\n"
+      << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
+
+      << "\n\n/* Support for floating point constants */\n"
+      << "typedef unsigned long long ConstantDoubleTy;\n"
+      << "typedef unsigned int        ConstantFloatTy;\n"
+      << "typedef struct { unsigned long long f1; unsigned short f2; "
+         "unsigned short pad[3]; } ConstantFP80Ty;\n"
+      // This is used for both kinds of 128-bit long double; meaning differs.
+      << "typedef struct { unsigned long long f1; unsigned long long f2; }"
+         " ConstantFP128Ty;\n"
+      << "\n\n/* Global Declarations */\n";
+
+  // First output all the declarations for the program, because C requires
+  // Functions & globals to be declared before they are used.
+  //
+  if (!M.getModuleInlineAsm().empty()) {
+    Out << "/* Module asm statements */\n"
+        << "asm(";
+
+    // Split the string into lines, to make it easier to read the .ll file.
+    std::string Asm = M.getModuleInlineAsm();
+    size_t CurPos = 0;
+    size_t NewLine = Asm.find_first_of('\n', CurPos);
+    while (NewLine != std::string::npos) {
+      // We found a newline, print the portion of the asm string from the
+      // last newline up to this newline.
+      Out << "\"";
+      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+                         Out);
+      Out << "\\n\"\n";
+      CurPos = NewLine+1;
+      NewLine = Asm.find_first_of('\n', CurPos);
+    }
+    Out << "\"";
+    PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
+    Out << "\");\n"
+        << "/* End Module asm statements */\n";
+  }
+
+  // Loop over the symbol table, emitting all named constants.
+  printModuleTypes();
+
+  // Global variable declarations...
+  if (!M.global_empty()) {
+    Out << "\n/* External Global Variable Declarations */\n";
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+
+      if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
+          I->hasCommonLinkage())
+        Out << "extern ";
+      else if (I->hasDLLImportLinkage())
+        Out << "__declspec(dllimport) ";
+      else
+        continue; // Internal Global
+
+      // Thread Local Storage
+      if (I->isThreadLocal())
+        Out << "__thread ";
+
+      printType(Out, I->getType()->getElementType(), false, GetValueName(I));
+
+      if (I->hasExternalWeakLinkage())
+         Out << " __EXTERNAL_WEAK__";
+      Out << ";\n";
+    }
+  }
+
+  // Function declarations
+  Out << "\n/* Function Declarations */\n";
+  Out << "double fmod(double, double);\n";   // Support for FP rem
+  Out << "float fmodf(float, float);\n";
+  Out << "long double fmodl(long double, long double);\n";
+
+  // Store the intrinsics which will be declared/defined below.
+  SmallVector<const Function*, 8> intrinsicsToDefine;
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    // Don't print declarations for intrinsic functions.
+    // Store the used intrinsics, which need to be explicitly defined.
+    if (I->isIntrinsic()) {
+      switch (I->getIntrinsicID()) {
+        default:
+          break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          intrinsicsToDefine.push_back(I);
+          break;
+      }
+      continue;
+    }
+
+    if (I->getName() == "setjmp" ||
+        I->getName() == "longjmp" || I->getName() == "_setjmp")
+      continue;
+
+    if (I->hasExternalWeakLinkage())
+      Out << "extern ";
+    printFunctionSignature(I, true);
+    if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+      Out << " __ATTRIBUTE_WEAK__";
+    if (I->hasExternalWeakLinkage())
+      Out << " __EXTERNAL_WEAK__";
+    if (StaticCtors.count(I))
+      Out << " __ATTRIBUTE_CTOR__";
+    if (StaticDtors.count(I))
+      Out << " __ATTRIBUTE_DTOR__";
+    if (I->hasHiddenVisibility())
+      Out << " __HIDDEN__";
+
+    if (I->hasName() && I->getName()[0] == 1)
+      Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+
+    Out << ";\n";
+  }
+
+  // Output the global variable declarations
+  if (!M.global_empty()) {
+    Out << "\n\n/* Global Variable Declarations */\n";
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I)
+      if (!I->isDeclaration()) {
+        // Ignore special globals, such as debug info.
+        if (getGlobalVariableClass(I))
+          continue;
+
+        if (I->hasLocalLinkage())
+          Out << "static ";
+        else
+          Out << "extern ";
+
+        // Thread Local Storage
+        if (I->isThreadLocal())
+          Out << "__thread ";
+
+        printType(Out, I->getType()->getElementType(), false,
+                  GetValueName(I));
+
+        if (I->hasLinkOnceLinkage())
+          Out << " __attribute__((common))";
+        else if (I->hasCommonLinkage())     // FIXME is this right?
+          Out << " __ATTRIBUTE_WEAK__";
+        else if (I->hasWeakLinkage())
+          Out << " __ATTRIBUTE_WEAK__";
+        else if (I->hasExternalWeakLinkage())
+          Out << " __EXTERNAL_WEAK__";
+        if (I->hasHiddenVisibility())
+          Out << " __HIDDEN__";
+        Out << ";\n";
+      }
+  }
+
+  // Output the global variable definitions and contents...
+  if (!M.global_empty()) {
+    Out << "\n\n/* Global Variable Definitions and Initialization */\n";
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I)
+      if (!I->isDeclaration()) {
+        // Ignore special globals, such as debug info.
+        if (getGlobalVariableClass(I))
+          continue;
+
+        if (I->hasLocalLinkage())
+          Out << "static ";
+        else if (I->hasDLLImportLinkage())
+          Out << "__declspec(dllimport) ";
+        else if (I->hasDLLExportLinkage())
+          Out << "__declspec(dllexport) ";
+
+        // Thread Local Storage
+        if (I->isThreadLocal())
+          Out << "__thread ";
+
+        printType(Out, I->getType()->getElementType(), false,
+                  GetValueName(I));
+        if (I->hasLinkOnceLinkage())
+          Out << " __attribute__((common))";
+        else if (I->hasWeakLinkage())
+          Out << " __ATTRIBUTE_WEAK__";
+        else if (I->hasCommonLinkage())
+          Out << " __ATTRIBUTE_WEAK__";
+
+        if (I->hasHiddenVisibility())
+          Out << " __HIDDEN__";
+
+        // If the initializer is not null, emit the initializer.  If it is null,
+        // we try to avoid emitting large amounts of zeros.  The problem with
+        // this, however, occurs when the variable has weak linkage.  In this
+        // case, the assembler will complain about the variable being both weak
+        // and common, so we disable this optimization.
+        // FIXME common linkage should avoid this problem.
+        if (!I->getInitializer()->isNullValue()) {
+          Out << " = " ;
+          writeOperand(I->getInitializer(), true);
+        } else if (I->hasWeakLinkage()) {
+          // We have to specify an initializer, but it doesn't have to be
+          // complete.  If the value is an aggregate, print out { 0 }, and let
+          // the compiler figure out the rest of the zeros.
+          Out << " = " ;
+          if (I->getInitializer()->getType()->isStructTy() ||
+              I->getInitializer()->getType()->isVectorTy()) {
+            Out << "{ 0 }";
+          } else if (I->getInitializer()->getType()->isArrayTy()) {
+            // As with structs and vectors, but with an extra set of braces
+            // because arrays are wrapped in structs.
+            Out << "{ { 0 } }";
+          } else {
+            // Just print it out normally.
+            writeOperand(I->getInitializer(), true);
+          }
+        }
+        Out << ";\n";
+      }
+  }
+
+  if (!M.empty())
+    Out << "\n\n/* Function Bodies */\n";
+
+  // Emit some helper functions for dealing with FCMP instruction's
+  // predicates
+  Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
+  Out << "return X == X && Y == Y; }\n";
+  Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
+  Out << "return X != X || Y != Y; }\n";
+  Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
+  Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
+  Out << "return X != Y; }\n";
+  Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
+  Out << "return X <  Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
+  Out << "return X >  Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
+  Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
+  Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
+  Out << "return X == Y ; }\n";
+  Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
+  Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
+  Out << "return X <  Y ; }\n";
+  Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
+  Out << "return X >  Y ; }\n";
+  Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
+  Out << "return X <= Y ; }\n";
+  Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
+  Out << "return X >= Y ; }\n";
+
+  // Emit definitions of the intrinsics.
+  for (SmallVector<const Function*, 8>::const_iterator
+       I = intrinsicsToDefine.begin(),
+       E = intrinsicsToDefine.end(); I != E; ++I) {
+    printIntrinsicDefinition(**I, Out);
+  }
+
+  return false;
+}
+
+
+/// Output all floating point constants that cannot be printed accurately...
+void CWriter::printFloatingPointConstants(Function &F) {
+  // Scan the module for floating point constants.  If any FP constant is used
+  // in the function, we want to redirect it here so that we do not depend on
+  // the precision of the printed form, unless the printed form preserves
+  // precision.
+  //
+  for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
+       I != E; ++I)
+    printFloatingPointConstants(*I);
+
+  Out << '\n';
+}
+
+void CWriter::printFloatingPointConstants(const Constant *C) {
+  // If this is a constant expression, recursively check for constant fp values.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+      printFloatingPointConstants(CE->getOperand(i));
+    return;
+  }
+
+  // Otherwise, check for a FP constant that we need to print.
+  const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
+  if (FPC == 0 ||
+      // Do not put in FPConstantMap if safe.
+      isFPCSafeToPrint(FPC) ||
+      // Already printed this constant?
+      FPConstantMap.count(FPC))
+    return;
+
+  FPConstantMap[FPC] = FPCounter;  // Number the FP constants
+
+  if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
+    double Val = FPC->getValueAPF().convertToDouble();
+    uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+    Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
+    << " = 0x" << utohexstr(i)
+    << "ULL;    /* " << Val << " */\n";
+  } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
+    float Val = FPC->getValueAPF().convertToFloat();
+    uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
+    getZExtValue();
+    Out << "static const ConstantFloatTy FPConstant" << FPCounter++
+    << " = 0x" << utohexstr(i)
+    << "U;    /* " << Val << " */\n";
+  } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
+    // api needed to prevent premature destruction
+    APInt api = FPC->getValueAPF().bitcastToAPInt();
+    const uint64_t *p = api.getRawData();
+    Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
+    << " = { 0x" << utohexstr(p[0])
+    << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
+    << "}; /* Long double constant */\n";
+  } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
+             FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
+    APInt api = FPC->getValueAPF().bitcastToAPInt();
+    const uint64_t *p = api.getRawData();
+    Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
+    << " = { 0x"
+    << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
+    << "}; /* Long double constant */\n";
+
+  } else {
+    llvm_unreachable("Unknown float type!");
+  }
+}
+
+
+/// printSymbolTable - Run through symbol table looking for type names.  If a
+/// type name is found, emit its declaration...
+///
+void CWriter::printModuleTypes() {
+  Out << "/* Helper union for bitcasts */\n";
+  Out << "typedef union {\n";
+  Out << "  unsigned int Int32;\n";
+  Out << "  unsigned long long Int64;\n";
+  Out << "  float Float;\n";
+  Out << "  double Double;\n";
+  Out << "} llvmBitCastUnion;\n";
+
+  // Get all of the struct types used in the module.
+  std::vector<StructType*> StructTypes;
+  TheModule->findUsedStructTypes(StructTypes);
+
+  if (StructTypes.empty()) return;
+
+  Out << "/* Structure forward decls */\n";
+
+  unsigned NextTypeID = 0;
+  
+  // If any of them are missing names, add a unique ID to UnnamedStructIDs.
+  // Print out forward declarations for structure types.
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *ST = StructTypes[i];
+
+    if (ST->isAnonymous() || ST->getName().empty())
+      UnnamedStructIDs[ST] = NextTypeID++;
+
+    std::string Name = getStructName(ST);
+
+    Out << "typedef struct " << Name << ' ' << Name << ";\n";
+  }
+
+  Out << '\n';
+
+  // Keep track of which structures have been printed so far.
+  SmallPtrSet<const Type *, 16> StructPrinted;
+
+  // Loop over all structures then push them into the stack so they are
+  // printed in the correct order.
+  //
+  Out << "/* Structure contents */\n";
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i)
+    if (StructTypes[i]->isStructTy())
+      // Only print out used types!
+      printContainedStructs(StructTypes[i], StructPrinted);
+}
+
+// Push the struct onto the stack and recursively push all structs
+// this one depends on.
+//
+// TODO:  Make this work properly with vector types
+//
+void CWriter::printContainedStructs(const Type *Ty,
+                                SmallPtrSet<const Type *, 16> &StructPrinted) {
+  // Don't walk through pointers.
+  if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
+    return;
+
+  // Print all contained types first.
+  for (Type::subtype_iterator I = Ty->subtype_begin(),
+       E = Ty->subtype_end(); I != E; ++I)
+    printContainedStructs(*I, StructPrinted);
+
+  if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+    // Check to see if we have already printed this struct.
+    if (!StructPrinted.insert(Ty)) return;
+    
+    // Print structure type out.
+    printType(Out, ST, false, getStructName(ST), true);
+    Out << ";\n\n";
+  }
+}
+
+void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
+  /// isStructReturn - Should this function actually return a struct by-value?
+  bool isStructReturn = F->hasStructRetAttr();
+
+  if (F->hasLocalLinkage()) Out << "static ";
+  if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
+  if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+  switch (F->getCallingConv()) {
+   case CallingConv::X86_StdCall:
+    Out << "__attribute__((stdcall)) ";
+    break;
+   case CallingConv::X86_FastCall:
+    Out << "__attribute__((fastcall)) ";
+    break;
+   case CallingConv::X86_ThisCall:
+    Out << "__attribute__((thiscall)) ";
+    break;
+   default:
+    break;
+  }
+
+  // Loop over the arguments, printing them...
+  const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+  const AttrListPtr &PAL = F->getAttributes();
+
+  std::string tstr;
+  raw_string_ostream FunctionInnards(tstr);
+
+  // Print out the name...
+  FunctionInnards << GetValueName(F) << '(';
+
+  bool PrintedArg = false;
+  if (!F->isDeclaration()) {
+    if (!F->arg_empty()) {
+      Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+      unsigned Idx = 1;
+
+      // If this is a struct-return function, don't print the hidden
+      // struct-return argument.
+      if (isStructReturn) {
+        assert(I != E && "Invalid struct return function!");
+        ++I;
+        ++Idx;
+      }
+
+      std::string ArgName;
+      for (; I != E; ++I) {
+        if (PrintedArg) FunctionInnards << ", ";
+        if (I->hasName() || !Prototype)
+          ArgName = GetValueName(I);
+        else
+          ArgName = "";
+        const Type *ArgTy = I->getType();
+        if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+          ArgTy = cast<PointerType>(ArgTy)->getElementType();
+          ByValParams.insert(I);
+        }
+        printType(FunctionInnards, ArgTy,
+            /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
+            ArgName);
+        PrintedArg = true;
+        ++Idx;
+      }
+    }
+  } else {
+    // Loop over the arguments, printing them.
+    FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
+    unsigned Idx = 1;
+
+    // If this is a struct-return function, don't print the hidden
+    // struct-return argument.
+    if (isStructReturn) {
+      assert(I != E && "Invalid struct return function!");
+      ++I;
+      ++Idx;
+    }
+
+    for (; I != E; ++I) {
+      if (PrintedArg) FunctionInnards << ", ";
+      const Type *ArgTy = *I;
+      if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+        assert(ArgTy->isPointerTy());
+        ArgTy = cast<PointerType>(ArgTy)->getElementType();
+      }
+      printType(FunctionInnards, ArgTy,
+             /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
+      PrintedArg = true;
+      ++Idx;
+    }
+  }
+
+  if (!PrintedArg && FT->isVarArg()) {
+    FunctionInnards << "int vararg_dummy_arg";
+    PrintedArg = true;
+  }
+
+  // Finish printing arguments... if this is a vararg function, print the ...,
+  // unless there are no known types, in which case, we just emit ().
+  //
+  if (FT->isVarArg() && PrintedArg) {
+    FunctionInnards << ",...";  // Output varargs portion of signature!
+  } else if (!FT->isVarArg() && !PrintedArg) {
+    FunctionInnards << "void"; // ret() -> ret(void) in C.
+  }
+  FunctionInnards << ')';
+
+  // Get the return tpe for the function.
+  const Type *RetTy;
+  if (!isStructReturn)
+    RetTy = F->getReturnType();
+  else {
+    // If this is a struct-return function, print the struct-return type.
+    RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
+  }
+
+  // Print out the return type and the signature built above.
+  printType(Out, RetTy,
+            /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
+            FunctionInnards.str());
+}
+
+static inline bool isFPIntBitCast(const Instruction &I) {
+  if (!isa<BitCastInst>(I))
+    return false;
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DstTy = I.getType();
+  return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+         (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
+}
+
+void CWriter::printFunction(Function &F) {
+  /// isStructReturn - Should this function actually return a struct by-value?
+  bool isStructReturn = F.hasStructRetAttr();
+
+  printFunctionSignature(&F, false);
+  Out << " {\n";
+
+  // If this is a struct return function, handle the result with magic.
+  if (isStructReturn) {
+    const Type *StructTy =
+      cast<PointerType>(F.arg_begin()->getType())->getElementType();
+    Out << "  ";
+    printType(Out, StructTy, false, "StructReturn");
+    Out << ";  /* Struct return temporary */\n";
+
+    Out << "  ";
+    printType(Out, F.arg_begin()->getType(), false,
+              GetValueName(F.arg_begin()));
+    Out << " = &StructReturn;\n";
+  }
+
+  bool PrintedVar = false;
+
+  // print local variable information for the function
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+    if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+      Out << "  ";
+      printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
+      Out << ";    /* Address-exposed local */\n";
+      PrintedVar = true;
+    } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
+               !isInlinableInst(*I)) {
+      Out << "  ";
+      printType(Out, I->getType(), false, GetValueName(&*I));
+      Out << ";\n";
+
+      if (isa<PHINode>(*I)) {  // Print out PHI node temporaries as well...
+        Out << "  ";
+        printType(Out, I->getType(), false,
+                  GetValueName(&*I)+"__PHI_TEMPORARY");
+        Out << ";\n";
+      }
+      PrintedVar = true;
+    }
+    // We need a temporary for the BitCast to use so it can pluck a value out
+    // of a union to do the BitCast. This is separate from the need for a
+    // variable to hold the result of the BitCast.
+    if (isFPIntBitCast(*I)) {
+      Out << "  llvmBitCastUnion " << GetValueName(&*I)
+          << "__BITCAST_TEMPORARY;\n";
+      PrintedVar = true;
+    }
+  }
+
+  if (PrintedVar)
+    Out << '\n';
+
+  if (F.hasExternalLinkage() && F.getName() == "main")
+    Out << "  CODE_FOR_MAIN();\n";
+
+  // print the basic blocks
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Loop *L = LI->getLoopFor(BB)) {
+      if (L->getHeader() == BB && L->getParentLoop() == 0)
+        printLoop(L);
+    } else {
+      printBasicBlock(BB);
+    }
+  }
+
+  Out << "}\n\n";
+}
+
+void CWriter::printLoop(Loop *L) {
+  Out << "  do {     /* Syntactic loop '" << L->getHeader()->getName()
+      << "' to make GCC happy */\n";
+  for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
+    BasicBlock *BB = L->getBlocks()[i];
+    Loop *BBLoop = LI->getLoopFor(BB);
+    if (BBLoop == L)
+      printBasicBlock(BB);
+    else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+      printLoop(BBLoop);
+  }
+  Out << "  } while (1); /* end of syntactic loop '"
+      << L->getHeader()->getName() << "' */\n";
+}
+
+void CWriter::printBasicBlock(BasicBlock *BB) {
+
+  // Don't print the label for the basic block if there are no uses, or if
+  // the only terminator use is the predecessor basic block's terminator.
+  // We have to scan the use list because PHI nodes use basic blocks too but
+  // do not require a label to be generated.
+  //
+  bool NeedsLabel = false;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (isGotoCodeNecessary(*PI, BB)) {
+      NeedsLabel = true;
+      break;
+    }
+
+  if (NeedsLabel) Out << GetValueName(BB) << ":\n";
+
+  // Output all of the instructions in the basic block...
+  for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
+       ++II) {
+    if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
+      if (II->getType() != Type::getVoidTy(BB->getContext()) &&
+          !isInlineAsm(*II))
+        outputLValue(II);
+      else
+        Out << "  ";
+      writeInstComputationInline(*II);
+      Out << ";\n";
+    }
+  }
+
+  // Don't emit prefix or suffix for the terminator.
+  visit(*BB->getTerminator());
+}
+
+
+// Specific Instruction type classes... note that all of the casts are
+// necessary because we use the instruction classes as opaque types...
+//
+void CWriter::visitReturnInst(ReturnInst &I) {
+  // If this is a struct return function, return the temporary struct.
+  bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
+
+  if (isStructReturn) {
+    Out << "  return StructReturn;\n";
+    return;
+  }
+
+  // Don't output a void return if this is the last basic block in the function
+  if (I.getNumOperands() == 0 &&
+      &*--I.getParent()->getParent()->end() == I.getParent() &&
+      !I.getParent()->size() == 1) {
+    return;
+  }
+
+  Out << "  return";
+  if (I.getNumOperands()) {
+    Out << ' ';
+    writeOperand(I.getOperand(0));
+  }
+  Out << ";\n";
+}
+
+void CWriter::visitSwitchInst(SwitchInst &SI) {
+
+  Out << "  switch (";
+  writeOperand(SI.getOperand(0));
+  Out << ") {\n  default:\n";
+  printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
+  printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
+  Out << ";\n";
+  for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+    Out << "  case ";
+    writeOperand(SI.getOperand(i));
+    Out << ":\n";
+    BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
+    printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
+    printBranchToBlock(SI.getParent(), Succ, 2);
+    if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
+      Out << "    break;\n";
+  }
+  Out << "  }\n";
+}
+
+void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) {
+  Out << "  goto *(void*)(";
+  writeOperand(IBI.getOperand(0));
+  Out << ");\n";
+}
+
+void CWriter::visitUnreachableInst(UnreachableInst &I) {
+  Out << "  /*UNREACHABLE*/;\n";
+}
+
+bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
+  /// FIXME: This should be reenabled, but loop reordering safe!!
+  return true;
+
+  if (llvm::next(Function::iterator(From)) != Function::iterator(To))
+    return true;  // Not the direct successor, we need a goto.
+
+  //isa<SwitchInst>(From->getTerminator())
+
+  if (LI->getLoopFor(From) != LI->getLoopFor(To))
+    return true;
+  return false;
+}
+
+void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
+                                          BasicBlock *Successor,
+                                          unsigned Indent) {
+  for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    // Now we have to do the printing.
+    Value *IV = PN->getIncomingValueForBlock(CurBlock);
+    if (!isa<UndefValue>(IV)) {
+      Out << std::string(Indent, ' ');
+      Out << "  " << GetValueName(I) << "__PHI_TEMPORARY = ";
+      writeOperand(IV);
+      Out << ";   /* for PHI node */\n";
+    }
+  }
+}
+
+void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
+                                 unsigned Indent) {
+  if (isGotoCodeNecessary(CurBB, Succ)) {
+    Out << std::string(Indent, ' ') << "  goto ";
+    writeOperand(Succ);
+    Out << ";\n";
+  }
+}
+
+// Branch instruction printing - Avoid printing out a branch to a basic block
+// that immediately succeeds the current one.
+//
+void CWriter::visitBranchInst(BranchInst &I) {
+
+  if (I.isConditional()) {
+    if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
+      Out << "  if (";
+      writeOperand(I.getCondition());
+      Out << ") {\n";
+
+      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
+      printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
+
+      if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
+        Out << "  } else {\n";
+        printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+        printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+      }
+    } else {
+      // First goto not necessary, assume second one is...
+      Out << "  if (!";
+      writeOperand(I.getCondition());
+      Out << ") {\n";
+
+      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+      printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+    }
+
+    Out << "  }\n";
+  } else {
+    printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+    printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
+  }
+  Out << "\n";
+}
+
+// PHI nodes get copied into temporary values at the end of predecessor basic
+// blocks.  We now need to copy these temporary values into the REAL value for
+// the PHI.
+void CWriter::visitPHINode(PHINode &I) {
+  writeOperand(&I);
+  Out << "__PHI_TEMPORARY";
+}
+
+
+void CWriter::visitBinaryOperator(Instruction &I) {
+  // binary instructions, shift instructions, setCond instructions.
+  assert(!I.getType()->isPointerTy());
+
+  // We must cast the results of binary operations which might be promoted.
+  bool needsCast = false;
+  if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
+      (I.getType() == Type::getInt16Ty(I.getContext()))
+      || (I.getType() == Type::getFloatTy(I.getContext()))) {
+    needsCast = true;
+    Out << "((";
+    printType(Out, I.getType(), false);
+    Out << ")(";
+  }
+
+  // If this is a negation operation, print it out as such.  For FP, we don't
+  // want to print "-0.0 - X".
+  if (BinaryOperator::isNeg(&I)) {
+    Out << "-(";
+    writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
+    Out << ")";
+  } else if (BinaryOperator::isFNeg(&I)) {
+    Out << "-(";
+    writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
+    Out << ")";
+  } else if (I.getOpcode() == Instruction::FRem) {
+    // Output a call to fmod/fmodf instead of emitting a%b
+    if (I.getType() == Type::getFloatTy(I.getContext()))
+      Out << "fmodf(";
+    else if (I.getType() == Type::getDoubleTy(I.getContext()))
+      Out << "fmod(";
+    else  // all 3 flavors of long double
+      Out << "fmodl(";
+    writeOperand(I.getOperand(0));
+    Out << ", ";
+    writeOperand(I.getOperand(1));
+    Out << ")";
+  } else {
+
+    // Write out the cast of the instruction's value back to the proper type
+    // if necessary.
+    bool NeedsClosingParens = writeInstructionCast(I);
+
+    // Certain instructions require the operand to be forced to a specific type
+    // so we use writeOperandWithCast here instead of writeOperand. Similarly
+    // below for operand 1
+    writeOperandWithCast(I.getOperand(0), I.getOpcode());
+
+    switch (I.getOpcode()) {
+    case Instruction::Add:
+    case Instruction::FAdd: Out << " + "; break;
+    case Instruction::Sub:
+    case Instruction::FSub: Out << " - "; break;
+    case Instruction::Mul:
+    case Instruction::FMul: Out << " * "; break;
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem: Out << " % "; break;
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv: Out << " / "; break;
+    case Instruction::And:  Out << " & "; break;
+    case Instruction::Or:   Out << " | "; break;
+    case Instruction::Xor:  Out << " ^ "; break;
+    case Instruction::Shl : Out << " << "; break;
+    case Instruction::LShr:
+    case Instruction::AShr: Out << " >> "; break;
+    default:
+#ifndef NDEBUG
+       errs() << "Invalid operator type!" << I;
+#endif
+       llvm_unreachable(0);
+    }
+
+    writeOperandWithCast(I.getOperand(1), I.getOpcode());
+    if (NeedsClosingParens)
+      Out << "))";
+  }
+
+  if (needsCast) {
+    Out << "))";
+  }
+}
+
+void CWriter::visitICmpInst(ICmpInst &I) {
+  // We must cast the results of icmp which might be promoted.
+  bool needsCast = false;
+
+  // Write out the cast of the instruction's value back to the proper type
+  // if necessary.
+  bool NeedsClosingParens = writeInstructionCast(I);
+
+  // Certain icmp predicate require the operand to be forced to a specific type
+  // so we use writeOperandWithCast here instead of writeOperand. Similarly
+  // below for operand 1
+  writeOperandWithCast(I.getOperand(0), I);
+
+  switch (I.getPredicate()) {
+  case ICmpInst::ICMP_EQ:  Out << " == "; break;
+  case ICmpInst::ICMP_NE:  Out << " != "; break;
+  case ICmpInst::ICMP_ULE:
+  case ICmpInst::ICMP_SLE: Out << " <= "; break;
+  case ICmpInst::ICMP_UGE:
+  case ICmpInst::ICMP_SGE: Out << " >= "; break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT: Out << " < "; break;
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT: Out << " > "; break;
+  default:
+#ifndef NDEBUG
+    errs() << "Invalid icmp predicate!" << I;
+#endif
+    llvm_unreachable(0);
+  }
+
+  writeOperandWithCast(I.getOperand(1), I);
+  if (NeedsClosingParens)
+    Out << "))";
+
+  if (needsCast) {
+    Out << "))";
+  }
+}
+
+void CWriter::visitFCmpInst(FCmpInst &I) {
+  if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
+    Out << "0";
+    return;
+  }
+  if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
+    Out << "1";
+    return;
+  }
+
+  const char* op = 0;
+  switch (I.getPredicate()) {
+  default: llvm_unreachable("Illegal FCmp predicate");
+  case FCmpInst::FCMP_ORD: op = "ord"; break;
+  case FCmpInst::FCMP_UNO: op = "uno"; break;
+  case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+  case FCmpInst::FCMP_UNE: op = "une"; break;
+  case FCmpInst::FCMP_ULT: op = "ult"; break;
+  case FCmpInst::FCMP_ULE: op = "ule"; break;
+  case FCmpInst::FCMP_UGT: op = "ugt"; break;
+  case FCmpInst::FCMP_UGE: op = "uge"; break;
+  case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+  case FCmpInst::FCMP_ONE: op = "one"; break;
+  case FCmpInst::FCMP_OLT: op = "olt"; break;
+  case FCmpInst::FCMP_OLE: op = "ole"; break;
+  case FCmpInst::FCMP_OGT: op = "ogt"; break;
+  case FCmpInst::FCMP_OGE: op = "oge"; break;
+  }
+
+  Out << "llvm_fcmp_" << op << "(";
+  // Write the first operand
+  writeOperand(I.getOperand(0));
+  Out << ", ";
+  // Write the second operand
+  writeOperand(I.getOperand(1));
+  Out << ")";
+}
+
+static const char * getFloatBitCastField(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    default: llvm_unreachable("Invalid Type");
+    case Type::FloatTyID:  return "Float";
+    case Type::DoubleTyID: return "Double";
+    case Type::IntegerTyID: {
+      unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+      if (NumBits <= 32)
+        return "Int32";
+      else
+        return "Int64";
+    }
+  }
+}
+
+void CWriter::visitCastInst(CastInst &I) {
+  const Type *DstTy = I.getType();
+  const Type *SrcTy = I.getOperand(0)->getType();
+  if (isFPIntBitCast(I)) {
+    Out << '(';
+    // These int<->float and long<->double casts need to be handled specially
+    Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+        << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
+    writeOperand(I.getOperand(0));
+    Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
+        << getFloatBitCastField(I.getType());
+    Out << ')';
+    return;
+  }
+
+  Out << '(';
+  printCast(I.getOpcode(), SrcTy, DstTy);
+
+  // Make a sext from i1 work by subtracting the i1 from 0 (an int).
+  if (SrcTy == Type::getInt1Ty(I.getContext()) &&
+      I.getOpcode() == Instruction::SExt)
+    Out << "0-";
+
+  writeOperand(I.getOperand(0));
+
+  if (DstTy == Type::getInt1Ty(I.getContext()) &&
+      (I.getOpcode() == Instruction::Trunc ||
+       I.getOpcode() == Instruction::FPToUI ||
+       I.getOpcode() == Instruction::FPToSI ||
+       I.getOpcode() == Instruction::PtrToInt)) {
+    // Make sure we really get a trunc to bool by anding the operand with 1
+    Out << "&1u";
+  }
+  Out << ')';
+}
+
+void CWriter::visitSelectInst(SelectInst &I) {
+  Out << "((";
+  writeOperand(I.getCondition());
+  Out << ") ? (";
+  writeOperand(I.getTrueValue());
+  Out << ") : (";
+  writeOperand(I.getFalseValue());
+  Out << "))";
+}
+
+// Returns the macro name or value of the max or min of an integer type
+// (as defined in limits.h).
+static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+                            raw_ostream &Out) {
+  const char* type;
+  const char* sprefix = "";
+
+  unsigned NumBits = Ty.getBitWidth();
+  if (NumBits <= 8) {
+    type = "CHAR";
+    sprefix = "S";
+  } else if (NumBits <= 16) {
+    type = "SHRT";
+  } else if (NumBits <= 32) {
+    type = "INT";
+  } else if (NumBits <= 64) {
+    type = "LLONG";
+  } else {
+    llvm_unreachable("Bit widths > 64 not implemented yet");
+  }
+
+  if (isSigned)
+    Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
+  else
+    Out << "U" << type << (isMax ? "_MAX" : "0");
+}
+
+#ifndef NDEBUG
+static bool isSupportedIntegerSize(const IntegerType &T) {
+  return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
+         T.getBitWidth() == 32 || T.getBitWidth() == 64;
+}
+#endif
+
+void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
+  const FunctionType *funT = F.getFunctionType();
+  const Type *retT = F.getReturnType();
+  const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+
+  assert(isSupportedIntegerSize(*elemT) &&
+         "CBackend does not support arbitrary size integers.");
+  assert(cast<StructType>(retT)->getElementType(0) == elemT &&
+         elemT == funT->getParamType(0) && funT->getNumParams() == 2);
+
+  switch (F.getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unsupported Intrinsic.");
+  case Intrinsic::uadd_with_overflow:
+    // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
+    //   Rty r;
+    //   r.field0 = a + b;
+    //   r.field1 = (r.field0 < a);
+    //   return r;
+    // }
+    Out << "static inline ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, false);
+    Out << "a,";
+    printSimpleType(Out, elemT, false);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field0 = a + b;\n";
+    Out << "  r.field1 = (r.field0 < a);\n";
+    Out << "  return r;\n}\n";
+    break;
+    
+  case Intrinsic::sadd_with_overflow:            
+    // static inline Rty sadd_ixx(ixx a, ixx b) {
+    //   Rty r;
+    //   r.field1 = (b > 0 && a > XX_MAX - b) ||
+    //              (b < 0 && a < XX_MIN - b);
+    //   r.field0 = r.field1 ? 0 : a + b;
+    //   return r;
+    // }
+    Out << "static ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, true);
+    Out << "a,";
+    printSimpleType(Out, elemT, true);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field1 = (b > 0 && a > ";
+    printLimitValue(*elemT, true, true, Out);
+    Out << " - b) || (b < 0 && a < ";
+    printLimitValue(*elemT, true, false, Out);
+    Out << " - b);\n";
+    Out << "  r.field0 = r.field1 ? 0 : a + b;\n";
+    Out << "  return r;\n}\n";
+    break;
+  }
+}
+
+void CWriter::lowerIntrinsics(Function &F) {
+  // This is used to keep track of intrinsics that get generated to a lowered
+  // function. We must generate the prototypes before the function body which
+  // will only be expanded on first use (by the loop below).
+  std::vector<Function*> prototypesToGen;
+
+  // Examine all the instructions in this function to find the intrinsics that
+  // need to be lowered.
+  for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+      if (CallInst *CI = dyn_cast<CallInst>(I++))
+        if (Function *F = CI->getCalledFunction())
+          switch (F->getIntrinsicID()) {
+          case Intrinsic::not_intrinsic:
+          case Intrinsic::memory_barrier:
+          case Intrinsic::vastart:
+          case Intrinsic::vacopy:
+          case Intrinsic::vaend:
+          case Intrinsic::returnaddress:
+          case Intrinsic::frameaddress:
+          case Intrinsic::setjmp:
+          case Intrinsic::longjmp:
+          case Intrinsic::prefetch:
+          case Intrinsic::powi:
+          case Intrinsic::x86_sse_cmp_ss:
+          case Intrinsic::x86_sse_cmp_ps:
+          case Intrinsic::x86_sse2_cmp_sd:
+          case Intrinsic::x86_sse2_cmp_pd:
+          case Intrinsic::ppc_altivec_lvsl:
+          case Intrinsic::uadd_with_overflow:
+          case Intrinsic::sadd_with_overflow:
+              // We directly implement these intrinsics
+            break;
+          default:
+            // If this is an intrinsic that directly corresponds to a GCC
+            // builtin, we handle it.
+            const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+            // If we handle it, don't lower it.
+            if (BuiltinName[0]) break;
+
+            // All other intrinsic calls we must lower.
+            Instruction *Before = 0;
+            if (CI != &BB->front())
+              Before = prior(BasicBlock::iterator(CI));
+
+            IL->LowerIntrinsicCall(CI);
+            if (Before) {        // Move iterator to instruction after call
+              I = Before; ++I;
+            } else {
+              I = BB->begin();
+            }
+            // If the intrinsic got lowered to another call, and that call has
+            // a definition then we need to make sure its prototype is emitted
+            // before any calls to it.
+            if (CallInst *Call = dyn_cast<CallInst>(I))
+              if (Function *NewF = Call->getCalledFunction())
+                if (!NewF->isDeclaration())
+                  prototypesToGen.push_back(NewF);
+
+            break;
+          }
+
+  // We may have collected some prototypes to emit in the loop above.
+  // Emit them now, before the function that uses them is emitted. But,
+  // be careful not to emit them twice.
+  std::vector<Function*>::iterator I = prototypesToGen.begin();
+  std::vector<Function*>::iterator E = prototypesToGen.end();
+  for ( ; I != E; ++I) {
+    if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
+      Out << '\n';
+      printFunctionSignature(*I, true);
+      Out << ";\n";
+    }
+  }
+}
+
+void CWriter::visitCallInst(CallInst &I) {
+  if (isa<InlineAsm>(I.getCalledValue()))
+    return visitInlineAsm(I);
+
+  bool WroteCallee = false;
+
+  // Handle intrinsic function calls first...
+  if (Function *F = I.getCalledFunction())
+    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+      if (visitBuiltinCall(I, ID, WroteCallee))
+        return;
+
+  Value *Callee = I.getCalledValue();
+
+  const PointerType  *PTy   = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy   = cast<FunctionType>(PTy->getElementType());
+
+  // If this is a call to a struct-return function, assign to the first
+  // parameter instead of passing it to the call.
+  const AttrListPtr &PAL = I.getAttributes();
+  bool hasByVal = I.hasByValArgument();
+  bool isStructRet = I.hasStructRetAttr();
+  if (isStructRet) {
+    writeOperandDeref(I.getArgOperand(0));
+    Out << " = ";
+  }
+
+  if (I.isTailCall()) Out << " /*tail*/ ";
+
+  if (!WroteCallee) {
+    // If this is an indirect call to a struct return function, we need to cast
+    // the pointer. Ditto for indirect calls with byval arguments.
+    bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
+
+    // GCC is a real PITA.  It does not permit codegening casts of functions to
+    // function pointers if they are in a call (it generates a trap instruction
+    // instead!).  We work around this by inserting a cast to void* in between
+    // the function and the function pointer cast.  Unfortunately, we can't just
+    // form the constant expression here, because the folder will immediately
+    // nuke it.
+    //
+    // Note finally, that this is completely unsafe.  ANSI C does not guarantee
+    // that void* and function pointers have the same size. :( To deal with this
+    // in the common case, we handle casts where the number of arguments passed
+    // match exactly.
+    //
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
+      if (CE->isCast())
+        if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
+          NeedsCast = true;
+          Callee = RF;
+        }
+
+    if (NeedsCast) {
+      // Ok, just cast the pointer type.
+      Out << "((";
+      if (isStructRet)
+        printStructReturnPointerFunctionType(Out, PAL,
+                             cast<PointerType>(I.getCalledValue()->getType()));
+      else if (hasByVal)
+        printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
+      else
+        printType(Out, I.getCalledValue()->getType());
+      Out << ")(void*)";
+    }
+    writeOperand(Callee);
+    if (NeedsCast) Out << ')';
+  }
+
+  Out << '(';
+
+  bool PrintedArg = false;
+  if(FTy->isVarArg() && !FTy->getNumParams()) {
+    Out << "0 /*dummy arg*/";
+    PrintedArg = true;
+  }
+
+  unsigned NumDeclaredParams = FTy->getNumParams();
+  CallSite CS(&I);
+  CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+  unsigned ArgNo = 0;
+  if (isStructRet) {   // Skip struct return argument.
+    ++AI;
+    ++ArgNo;
+  }
+
+
+  for (; AI != AE; ++AI, ++ArgNo) {
+    if (PrintedArg) Out << ", ";
+    if (ArgNo < NumDeclaredParams &&
+        (*AI)->getType() != FTy->getParamType(ArgNo)) {
+      Out << '(';
+      printType(Out, FTy->getParamType(ArgNo),
+            /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
+      Out << ')';
+    }
+    // Check if the argument is expected to be passed by value.
+    if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
+      writeOperandDeref(*AI);
+    else
+      writeOperand(*AI);
+    PrintedArg = true;
+  }
+  Out << ')';
+}
+
+/// visitBuiltinCall - Handle the call to the specified builtin.  Returns true
+/// if the entire call is handled, return false if it wasn't handled, and
+/// optionally set 'WroteCallee' if the callee has already been printed out.
+bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
+                               bool &WroteCallee) {
+  switch (ID) {
+  default: {
+    // If this is an intrinsic that directly corresponds to a GCC
+    // builtin, we emit it here.
+    const char *BuiltinName = "";
+    Function *F = I.getCalledFunction();
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+    assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
+
+    Out << BuiltinName;
+    WroteCallee = true;
+    return false;
+  }
+  case Intrinsic::memory_barrier:
+    Out << "__sync_synchronize()";
+    return true;
+  case Intrinsic::vastart:
+    Out << "0; ";
+
+    Out << "va_start(*(va_list*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    // Output the last argument to the enclosing function.
+    if (I.getParent()->getParent()->arg_empty())
+      Out << "vararg_dummy_arg";
+    else
+      writeOperand(--I.getParent()->getParent()->arg_end());
+    Out << ')';
+    return true;
+  case Intrinsic::vaend:
+    if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
+      Out << "0; va_end(*(va_list*)";
+      writeOperand(I.getArgOperand(0));
+      Out << ')';
+    } else {
+      Out << "va_end(*(va_list*)0)";
+    }
+    return true;
+  case Intrinsic::vacopy:
+    Out << "0; ";
+    Out << "va_copy(*(va_list*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", *(va_list*)";
+    writeOperand(I.getArgOperand(1));
+    Out << ')';
+    return true;
+  case Intrinsic::returnaddress:
+    Out << "__builtin_return_address(";
+    writeOperand(I.getArgOperand(0));
+    Out << ')';
+    return true;
+  case Intrinsic::frameaddress:
+    Out << "__builtin_frame_address(";
+    writeOperand(I.getArgOperand(0));
+    Out << ')';
+    return true;
+  case Intrinsic::powi:
+    Out << "__builtin_powi(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ')';
+    return true;
+  case Intrinsic::setjmp:
+    Out << "setjmp(*(jmp_buf*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ')';
+    return true;
+  case Intrinsic::longjmp:
+    Out << "longjmp(*(jmp_buf*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ')';
+    return true;
+  case Intrinsic::prefetch:
+    Out << "LLVM_PREFETCH((const void *)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ", ";
+    writeOperand(I.getArgOperand(2));
+    Out << ")";
+    return true;
+  case Intrinsic::stacksave:
+    // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
+    // to work around GCC bugs (see PR1809).
+    Out << "0; *((void**)&" << GetValueName(&I)
+        << ") = __builtin_stack_save()";
+    return true;
+  case Intrinsic::x86_sse_cmp_ss:
+  case Intrinsic::x86_sse_cmp_ps:
+  case Intrinsic::x86_sse2_cmp_sd:
+  case Intrinsic::x86_sse2_cmp_pd:
+    Out << '(';
+    printType(Out, I.getType());
+    Out << ')';
+    // Multiple GCC builtins multiplex onto this intrinsic.
+    switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
+    default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
+    case 0: Out << "__builtin_ia32_cmpeq"; break;
+    case 1: Out << "__builtin_ia32_cmplt"; break;
+    case 2: Out << "__builtin_ia32_cmple"; break;
+    case 3: Out << "__builtin_ia32_cmpunord"; break;
+    case 4: Out << "__builtin_ia32_cmpneq"; break;
+    case 5: Out << "__builtin_ia32_cmpnlt"; break;
+    case 6: Out << "__builtin_ia32_cmpnle"; break;
+    case 7: Out << "__builtin_ia32_cmpord"; break;
+    }
+    if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
+      Out << 'p';
+    else
+      Out << 's';
+    if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
+      Out << 's';
+    else
+      Out << 'd';
+
+    Out << "(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ")";
+    return true;
+  case Intrinsic::ppc_altivec_lvsl:
+    Out << '(';
+    printType(Out, I.getType());
+    Out << ')';
+    Out << "__builtin_altivec_lvsl(0, (void*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ")";
+    return true;
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+    Out << GetValueName(I.getCalledFunction()) << "(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ")";
+    return true;
+  }
+}
+
+//This converts the llvm constraint string to something gcc is expecting.
+//TODO: work out platform independent constraints and factor those out
+//      of the per target tables
+//      handle multiple constraint codes
+std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
+  assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
+
+  // Grab the translation table from MCAsmInfo if it exists.
+  const MCAsmInfo *TargetAsm;
+  std::string Triple = TheModule->getTargetTriple();
+  if (Triple.empty())
+    Triple = llvm::sys::getHostTriple();
+
+  std::string E;
+  if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+    TargetAsm = Match->createMCAsmInfo(Triple);
+  else
+    return c.Codes[0];
+
+  const char *const *table = TargetAsm->getAsmCBE();
+
+  // Search the translation table if it exists.
+  for (int i = 0; table && table[i]; i += 2)
+    if (c.Codes[0] == table[i]) {
+      delete TargetAsm;
+      return table[i+1];
+    }
+
+  // Default is identity.
+  delete TargetAsm;
+  return c.Codes[0];
+}
+
+//TODO: import logic from AsmPrinter.cpp
+static std::string gccifyAsm(std::string asmstr) {
+  for (std::string::size_type i = 0; i != asmstr.size(); ++i)
+    if (asmstr[i] == '\n')
+      asmstr.replace(i, 1, "\\n");
+    else if (asmstr[i] == '\t')
+      asmstr.replace(i, 1, "\\t");
+    else if (asmstr[i] == '$') {
+      if (asmstr[i + 1] == '{') {
+        std::string::size_type a = asmstr.find_first_of(':', i + 1);
+        std::string::size_type b = asmstr.find_first_of('}', i + 1);
+        std::string n = "%" +
+          asmstr.substr(a + 1, b - a - 1) +
+          asmstr.substr(i + 2, a - i - 2);
+        asmstr.replace(i, b - i + 1, n);
+        i += n.size() - 1;
+      } else
+        asmstr.replace(i, 1, "%");
+    }
+    else if (asmstr[i] == '%')//grr
+      { asmstr.replace(i, 1, "%%"); ++i;}
+
+  return asmstr;
+}
+
+//TODO: assumptions about what consume arguments from the call are likely wrong
+//      handle communitivity
+void CWriter::visitInlineAsm(CallInst &CI) {
+  InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
+  InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
+
+  std::vector<std::pair<Value*, int> > ResultVals;
+  if (CI.getType() == Type::getVoidTy(CI.getContext()))
+    ;
+  else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+      ResultVals.push_back(std::make_pair(&CI, (int)i));
+  } else {
+    ResultVals.push_back(std::make_pair(&CI, -1));
+  }
+
+  // Fix up the asm string for gcc and emit it.
+  Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
+  Out << "        :";
+
+  unsigned ValueCount = 0;
+  bool IsFirst = true;
+
+  // Convert over all the output constraints.
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+       E = Constraints.end(); I != E; ++I) {
+
+    if (I->Type != InlineAsm::isOutput) {
+      ++ValueCount;
+      continue;  // Ignore non-output constraints.
+    }
+
+    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+    std::string C = InterpretASMConstraint(*I);
+    if (C.empty()) continue;
+
+    if (!IsFirst) {
+      Out << ", ";
+      IsFirst = false;
+    }
+
+    // Unpack the dest.
+    Value *DestVal;
+    int DestValNo = -1;
+
+    if (ValueCount < ResultVals.size()) {
+      DestVal = ResultVals[ValueCount].first;
+      DestValNo = ResultVals[ValueCount].second;
+    } else
+      DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+    if (I->isEarlyClobber)
+      C = "&"+C;
+
+    Out << "\"=" << C << "\"(" << GetValueName(DestVal);
+    if (DestValNo != -1)
+      Out << ".field" << DestValNo; // Multiple retvals.
+    Out << ")";
+    ++ValueCount;
+  }
+
+
+  // Convert over all the input constraints.
+  Out << "\n        :";
+  IsFirst = true;
+  ValueCount = 0;
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+       E = Constraints.end(); I != E; ++I) {
+    if (I->Type != InlineAsm::isInput) {
+      ++ValueCount;
+      continue;  // Ignore non-input constraints.
+    }
+
+    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+    std::string C = InterpretASMConstraint(*I);
+    if (C.empty()) continue;
+
+    if (!IsFirst) {
+      Out << ", ";
+      IsFirst = false;
+    }
+
+    assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
+    Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+    Out << "\"" << C << "\"(";
+    if (!I->isIndirect)
+      writeOperand(SrcVal);
+    else
+      writeOperandDeref(SrcVal);
+    Out << ")";
+  }
+
+  // Convert over the clobber constraints.
+  IsFirst = true;
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+       E = Constraints.end(); I != E; ++I) {
+    if (I->Type != InlineAsm::isClobber)
+      continue;  // Ignore non-input constraints.
+
+    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+    std::string C = InterpretASMConstraint(*I);
+    if (C.empty()) continue;
+
+    if (!IsFirst) {
+      Out << ", ";
+      IsFirst = false;
+    }
+
+    Out << '\"' << C << '"';
+  }
+
+  Out << ")";
+}
+
+void CWriter::visitAllocaInst(AllocaInst &I) {
+  Out << '(';
+  printType(Out, I.getType());
+  Out << ") alloca(sizeof(";
+  printType(Out, I.getType()->getElementType());
+  Out << ')';
+  if (I.isArrayAllocation()) {
+    Out << " * " ;
+    writeOperand(I.getOperand(0));
+  }
+  Out << ')';
+}
+
+void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
+                                 gep_type_iterator E, bool Static) {
+
+  // If there are no indices, just print out the pointer.
+  if (I == E) {
+    writeOperand(Ptr);
+    return;
+  }
+
+  // Find out if the last index is into a vector.  If so, we have to print this
+  // specially.  Since vectors can't have elements of indexable type, only the
+  // last index could possibly be of a vector element.
+  const VectorType *LastIndexIsVector = 0;
+  {
+    for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
+      LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
+  }
+
+  Out << "(";
+
+  // If the last index is into a vector, we can't print it as &a[i][j] because
+  // we can't index into a vector with j in GCC.  Instead, emit this as
+  // (((float*)&a[i])+j)
+  if (LastIndexIsVector) {
+    Out << "((";
+    printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
+    Out << ")(";
+  }
+
+  Out << '&';
+
+  // If the first index is 0 (very typical) we can do a number of
+  // simplifications to clean up the code.
+  Value *FirstOp = I.getOperand();
+  if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
+    // First index isn't simple, print it the hard way.
+    writeOperand(Ptr);
+  } else {
+    ++I;  // Skip the zero index.
+
+    // Okay, emit the first operand. If Ptr is something that is already address
+    // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
+    if (isAddressExposed(Ptr)) {
+      writeOperandInternal(Ptr, Static);
+    } else if (I != E && (*I)->isStructTy()) {
+      // If we didn't already emit the first operand, see if we can print it as
+      // P->f instead of "P[0].f"
+      writeOperand(Ptr);
+      Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+      ++I;  // eat the struct index as well.
+    } else {
+      // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
+      Out << "(*";
+      writeOperand(Ptr);
+      Out << ")";
+    }
+  }
+
+  for (; I != E; ++I) {
+    if ((*I)->isStructTy()) {
+      Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+    } else if ((*I)->isArrayTy()) {
+      Out << ".array[";
+      writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+      Out << ']';
+    } else if (!(*I)->isVectorTy()) {
+      Out << '[';
+      writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+      Out << ']';
+    } else {
+      // If the last index is into a vector, then print it out as "+j)".  This
+      // works with the 'LastIndexIsVector' code above.
+      if (isa<Constant>(I.getOperand()) &&
+          cast<Constant>(I.getOperand())->isNullValue()) {
+        Out << "))";  // avoid "+0".
+      } else {
+        Out << ")+(";
+        writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+        Out << "))";
+      }
+    }
+  }
+  Out << ")";
+}
+
+void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType,
+                                bool IsVolatile, unsigned Alignment) {
+
+  bool IsUnaligned = Alignment &&
+    Alignment < TD->getABITypeAlignment(OperandType);
+
+  if (!IsUnaligned)
+    Out << '*';
+  if (IsVolatile || IsUnaligned) {
+    Out << "((";
+    if (IsUnaligned)
+      Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
+    printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
+    if (IsUnaligned) {
+      Out << "; } ";
+      if (IsVolatile) Out << "volatile ";
+      Out << "*";
+    }
+    Out << ")";
+  }
+
+  writeOperand(Operand);
+
+  if (IsVolatile || IsUnaligned) {
+    Out << ')';
+    if (IsUnaligned)
+      Out << "->data";
+  }
+}
+
+void CWriter::visitLoadInst(LoadInst &I) {
+  writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
+                    I.getAlignment());
+
+}
+
+void CWriter::visitStoreInst(StoreInst &I) {
+  writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
+                    I.isVolatile(), I.getAlignment());
+  Out << " = ";
+  Value *Operand = I.getOperand(0);
+  Constant *BitMask = 0;
+  if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+    if (!ITy->isPowerOf2ByteWidth())
+      // We have a bit width that doesn't match an even power-of-2 byte
+      // size. Consequently we must & the value with the type's bit mask
+      BitMask = ConstantInt::get(ITy, ITy->getBitMask());
+  if (BitMask)
+    Out << "((";
+  writeOperand(Operand);
+  if (BitMask) {
+    Out << ") & ";
+    printConstant(BitMask, false);
+    Out << ")";
+  }
+}
+
+void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+  printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
+                     gep_type_end(I), false);
+}
+
+void CWriter::visitVAArgInst(VAArgInst &I) {
+  Out << "va_arg(*(va_list*)";
+  writeOperand(I.getOperand(0));
+  Out << ", ";
+  printType(Out, I.getType());
+  Out << ");\n ";
+}
+
+void CWriter::visitInsertElementInst(InsertElementInst &I) {
+  const Type *EltTy = I.getType()->getElementType();
+  writeOperand(I.getOperand(0));
+  Out << ";\n  ";
+  Out << "((";
+  printType(Out, PointerType::getUnqual(EltTy));
+  Out << ")(&" << GetValueName(&I) << "))[";
+  writeOperand(I.getOperand(2));
+  Out << "] = (";
+  writeOperand(I.getOperand(1));
+  Out << ")";
+}
+
+void CWriter::visitExtractElementInst(ExtractElementInst &I) {
+  // We know that our operand is not inlined.
+  Out << "((";
+  const Type *EltTy =
+    cast<VectorType>(I.getOperand(0)->getType())->getElementType();
+  printType(Out, PointerType::getUnqual(EltTy));
+  Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
+  writeOperand(I.getOperand(1));
+  Out << "]";
+}
+
+void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+  Out << "(";
+  printType(Out, SVI.getType());
+  Out << "){ ";
+  const VectorType *VT = SVI.getType();
+  unsigned NumElts = VT->getNumElements();
+  const Type *EltTy = VT->getElementType();
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (i) Out << ", ";
+    int SrcVal = SVI.getMaskValue(i);
+    if ((unsigned)SrcVal >= NumElts*2) {
+      Out << " 0/*undef*/ ";
+    } else {
+      Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
+      if (isa<Instruction>(Op)) {
+        // Do an extractelement of this value from the appropriate input.
+        Out << "((";
+        printType(Out, PointerType::getUnqual(EltTy));
+        Out << ")(&" << GetValueName(Op)
+            << "))[" << (SrcVal & (NumElts-1)) << "]";
+      } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+        Out << "0";
+      } else {
+        printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
+                                                           (NumElts-1)),
+                      false);
+      }
+    }
+  }
+  Out << "}";
+}
+
+void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
+  // Start by copying the entire aggregate value into the result variable.
+  writeOperand(IVI.getOperand(0));
+  Out << ";\n  ";
+
+  // Then do the insert to update the field.
+  Out << GetValueName(&IVI);
+  for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
+       i != e; ++i) {
+    const Type *IndexedTy =
+      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
+                                       ArrayRef<unsigned>(b, i+1));
+    if (IndexedTy->isArrayTy())
+      Out << ".array[" << *i << "]";
+    else
+      Out << ".field" << *i;
+  }
+  Out << " = ";
+  writeOperand(IVI.getOperand(1));
+}
+
+void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
+  Out << "(";
+  if (isa<UndefValue>(EVI.getOperand(0))) {
+    Out << "(";
+    printType(Out, EVI.getType());
+    Out << ") 0/*UNDEF*/";
+  } else {
+    Out << GetValueName(EVI.getOperand(0));
+    for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+         i != e; ++i) {
+      const Type *IndexedTy =
+        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
+                                         ArrayRef<unsigned>(b, i+1));
+      if (IndexedTy->isArrayTy())
+        Out << ".array[" << *i << "]";
+      else
+        Out << ".field" << *i;
+    }
+  }
+  Out << ")";
+}
+
+//===----------------------------------------------------------------------===//
+//                       External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                         formatted_raw_ostream &o,
+                                         CodeGenFileType FileType,
+                                         CodeGenOpt::Level OptLevel,
+                                         bool DisableVerify) {
+  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+
+  PM.add(createGCLoweringPass());
+  PM.add(createLowerInvokePass());
+  PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
+  PM.add(new CWriter(o));
+  PM.add(createGCInfoDeleter());
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
new file mode 100644
index 000000000000..e64216be0bdc
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h
@@ -0,0 +1,41 @@
+//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CTARGETMACHINE_H
+#define CTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+struct CTargetMachine : public TargetMachine {
+  CTargetMachine(const Target &T, const std::string &TT,
+                 const std::string &CPU, const std::string &FS)
+    : TargetMachine(T, TT, CPU, FS) {}
+
+  virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                   formatted_raw_ostream &Out,
+                                   CodeGenFileType FileType,
+                                   CodeGenOpt::Level OptLevel,
+                                   bool DisableVerify);
+  
+  virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
new file mode 100644
index 000000000000..f7e8ff254848
--- /dev/null
+++ b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCBackendTarget;
+
+extern "C" void LLVMInitializeCBackendTargetInfo() { 
+  RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
new file mode 100644
index 000000000000..9468aee067a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -0,0 +1,449 @@
+//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+///--==-- Arithmetic ops intrinsics --==--
+def CellSDKah:
+    RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
+def CellSDKahi:
+    RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
+def CellSDKa:
+    RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
+def CellSDKai:
+    RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
+def CellSDKsfh:
+    RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
+def CellSDKsfhi:
+    RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
+def CellSDKsf:
+    RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
+def CellSDKsfi:
+    RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
+def CellSDKaddx:
+    RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
+def CellSDKcg:
+    RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
+def CellSDKcgx:
+    RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
+def CellSDKsfx:
+    RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
+def CellSDKbg:
+    RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
+def CellSDKbgx:
+    RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
+
+def CellSDKmpy:
+    RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpy $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
+                                                (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyu:
+    RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyu $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB)))] >;
+
+def CellSDKmpyi:
+    RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "mpyi $rT, $rA, $val", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
+                                                 i16ImmSExt10:$val))]>;
+
+def CellSDKmpyui:
+    RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "mpyui $rT, $rA, $val", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
+                                                  i16ImmSExt10:$val))]>;
+
+def CellSDKmpya:
+    RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB),
+                                                 (v8i16 VECREG:$rC)))]>;
+
+def CellSDKmpyh:
+    RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyh $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpys:
+    RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpys $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhh:
+    RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhh $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
+                                                  (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhha:
+    RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhha $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
+                                                   (v8i16 VECREG:$rB)))]>;
+
+// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
+// as an intrinsic for the time being
+def CellSDKmpyhhu:
+    RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
+                                                   (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhhau:
+    RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
+                                                    (v8i16 VECREG:$rB)))]>;
+
+def CellSDKand:
+        RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "and\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandc:
+        RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "andc\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandbi:
+     RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+       "andbi\t $rT, $rA, $val", BranchResolv,
+       [(set (v16i8 VECREG:$rT),
+             (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKandhi:
+     RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "andhi\t $rT, $rA, $val", BranchResolv,
+       [(set (v8i16 VECREG:$rT),
+             (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKandi:
+     RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "andi\t $rT, $rA, $val", BranchResolv,
+       [(set (v4i32 VECREG:$rT),
+             (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKor:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "or\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorc:
+        RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "addc\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorbi:
+     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+       "orbi\t $rT, $rA, $val", BranchResolv,
+       [(set (v16i8 VECREG:$rT),
+             (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKorhi:
+     RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "orhi\t $rT, $rA, $val", BranchResolv,
+       [(set (v8i16 VECREG:$rT),
+             (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKori:
+     RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "ori\t $rT, $rA, $val", BranchResolv,
+       [(set (v4i32 VECREG:$rT),
+             (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKxor:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "xor\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT), 
+                (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKxorbi:
+     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+       "xorbi\t $rT, $rA, $val", BranchResolv,
+       [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKxorhi:
+     RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+       "xorhi\t $rT, $rA, $val", BranchResolv,
+       [(set (v8i16 VECREG:$rT), 
+             (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKxori:
+     RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "xori\t $rT, $rA, $val", BranchResolv,
+       [(set (v4i32 VECREG:$rT), 
+             (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKnor:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "nor\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT), 
+                (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKnand:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "nand\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT), 
+                (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+//===----------------------------------------------------------------------===//
+// Shift/rotate intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKshli:
+  Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
+      (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqbi:
+  Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
+      (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqii:
+  Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
+      (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqby:
+  Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
+      (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqbyi:
+  Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
+      (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+          
+//===----------------------------------------------------------------------===//
+// Branch/compare intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKceq:
+  RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        "ceq\t $rT, $rA, $rB", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKceqi:
+  RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+        "ceqi\t $rT, $rA, $val", BranchResolv,
+    [(set (v4i32 VECREG:$rT), 
+          (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKceqb:
+  RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        "ceqb\t $rT, $rA, $rB", BranchResolv,
+        [(set (v16i8 VECREG:$rT), 
+              (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKceqbi:
+  RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+        "ceqbi\t $rT, $rA, $val", BranchResolv,
+    [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKceqh:
+  RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        "ceqh\t $rT, $rA, $rB", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKceqhi:
+  RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+        "ceqhi\t $rT, $rA, $val", BranchResolv,
+    [(set (v8i16 VECREG:$rT), 
+          (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+def CellSDKcgth:
+  RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "cgth\t $rT, $rA, $rB", BranchResolv,
+        [(set (v8i16 VECREG:$rT),
+              (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKcgthi:
+  RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "cgthi\t $rT, $rA, $val", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKcgt:
+  RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "cgt\t $rT, $rA, $rB", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKcgti:
+  RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "cgti\t $rT, $rA, $val", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKcgtb:
+  RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "cgtb\t $rT, $rA, $rB", BranchResolv,
+        [(set (v16i8 VECREG:$rT), 
+              (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKcgtbi:
+  RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+    "cgtbi\t $rT, $rA, $val", BranchResolv,
+        [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKclgth:
+  RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "clgth\t $rT, $rA, $rB", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKclgthi:
+  RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "clgthi\t $rT, $rA, $val", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKclgt:
+  RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "clgt\t $rT, $rA, $rB", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKclgti:
+  RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "clgti\t $rT, $rA, $val", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKclgtb:
+  RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "clgtb\t $rT, $rA, $rB", BranchResolv,
+    [(set (v16i8 VECREG:$rT),
+          (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKclgtbi:
+  RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+    "clgtbi\t $rT, $rA, $val", BranchResolv,
+    [(set (v16i8 VECREG:$rT),
+          (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKfa:
+  RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fa\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
+                                                 (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfs:
+  RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fs\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
+                                                 (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfm:
+  RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fm\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
+                                                 (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfceq:
+  RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fceq\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
+                                                   (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcgt:
+  RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fcgt\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
+                                                   (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmeq:
+  RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fcmeq\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
+                                                    (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmgt:
+  RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fcmgt\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
+                                                    (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfma:
+  RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+    "fma\t $rT, $rA, $rB, $rC", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
+                                                  (v4f32 VECREG:$rB),
+                                                  (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfnms:
+  RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+    "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
+                                                   (v4f32 VECREG:$rB),
+                                                   (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfms:
+  RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+    "fms\t $rT, $rA, $rB, $rC", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
+                                                  (v4f32 VECREG:$rB),
+                                                  (v4f32 VECREG:$rC)))]>;
+
+//===----------------------------------------------------------------------===//
+// Double precision floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKdfa:
+  RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfa\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
+                                                  (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfs:
+  RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfs\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
+                                                  (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfm:
+  RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfm\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
+                                                  (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfma:
+  RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfma\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
+                                                   (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnma:
+  RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfnma\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
+                                                    (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnms:
+  RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfnms\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
+                                                    (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfms:
+  RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfms\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
+                                                   (v2f64 VECREG:$rB)))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..85fb258eac2c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMCellSPUDesc
+  SPUMCTargetDesc.cpp
+  SPUMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..10d9a42239ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
new file mode 100644
index 000000000000..8c1176a9d028
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCAsmInfo.h"
+using namespace llvm;
+
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+
+  ZeroDirective = "\t.space\t";
+  Data64bitsDirective = "\t.quad\t";
+  AlignmentIsInBytes = false;
+      
+  PCSymbol = ".";
+  CommentString = "#";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+
+  // Has leb128
+  HasLEB128 = true;
+
+  SupportsDebugInformation = true;
+
+  // Exception handling is not supported on CellSPU (think about it: you only
+  // have 256K for code+data. Would you support exception handling?)
+  ExceptionsType = ExceptionHandling::None;
+
+  // SPU assembly requires ".section" before ".bss" 
+  UsesELFSectionDirectiveForBSS = true;  
+}
+
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
new file mode 100644
index 000000000000..7f850d347f56
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
@@ -0,0 +1,28 @@
+//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  
+  struct SPULinuxMCAsmInfo : public MCAsmInfo {
+    explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
+  };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
new file mode 100644
index 000000000000..26c5a4bc7b33
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell SPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCTargetDesc.h"
+#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSPUMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSPUMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSPUMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+                                          createSPUMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeCellSPUMCAsmInfo() {
+  RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
new file mode 100644
index 000000000000..c5c037d4de44
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheCellSPUTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Cell registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+// Defines symbolic names for the SPU instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SPUGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.h b/contrib/llvm/lib/Target/CellSPU/SPU.h
new file mode 100644
index 000000000000..b51fbc7a5197
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.h
@@ -0,0 +1,31 @@
+//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Cell SPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IBMCELLSPU_H
+#define LLVM_TARGET_IBMCELLSPU_H
+
+#include "MCTargetDesc/SPUMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SPUTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+  FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
+
+}
+
+#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.td b/contrib/llvm/lib/Target/CellSPU/SPU.td
new file mode 100644
index 000000000000..8327fe03d7f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.td
@@ -0,0 +1,66 @@
+//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the STI Cell SPU target machine.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+  dag Fragment = frag;
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SPURegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction formats, instructions
+//===----------------------------------------------------------------------===//
+
+include "SPUNodes.td"
+include "SPUOperands.td"
+include "SPUSchedule.td"
+include "SPUInstrFormats.td"
+include "SPUInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features:
+//===----------------------------------------------------------------------===//
+
+def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
+def LargeMemFeature:
+  SubtargetFeature<"large_mem","UseLargeMem", "true",
+                   "Use large (>256) LSA memory addressing [default = false]">;
+
+def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
+
+//===----------------------------------------------------------------------===//
+// Calling convention:
+//===----------------------------------------------------------------------===//
+
+include "SPUCallingConv.td"
+
+// Target:
+
+def SPUInstrInfo : InstrInfo {
+  let isLittleEndianEncoding = 1;
+}
+
+def SPU : Target {
+  let InstructionSet = SPUInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 000000000000..3031fda54381
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,41 @@
+//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//
+//                     Cell SPU 128-bit operations
+//
+//===----------------------------------------------------------------------===//
+                                  
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+          (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+          (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+          (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 000000000000..f340edfb0f86
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,408 @@
+//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//
+//                     Cell SPU 64-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+//    constant. In the scalar case, we're only interested in the
+//    top two 32-bit slots, whereas we're interested in an exact
+//    all-four-slot match in the vector case.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+//    could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+//    mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+//    [Note: this may be moot, since gb produces v4i32 or r32.]
+//
+// 5. The code sequences for r64 and v2i64 are probably overly conservative,
+//    compared to the code that gcc produces.
+//
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+  SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+           [/* no pattern */]>;
+
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+          (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
+  Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+      (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
+
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
+  Pat<(cond R64C:$rA, R64C:$rB),
+      (XORIr32 compare.Fragment, -1)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                           (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+    CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
+
+// i64 setne:
+def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setugt/setule:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGTr64ugt:
+    CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64eq:
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+    
+def CLGTr64compare:
+    CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
+                        (XSWDv2i64 CLGTr64ugt.Fragment),
+                        CLGTr64eq.Fragment)>;
+
+def CLGTv2i64ugt:
+    CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64eq:
+    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+    
+def CLGTv2i64compare:
+    CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
+                        (XSWDv2i64 CLGTr64ugt.Fragment),
+                        CLGTv2i64eq.Fragment)>;
+
+multiclass CompareLogicalGreaterThan64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64LGT: CompareLogicalGreaterThan64;
+
+def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+//          I64LGTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setule, I64LGTr64>;
+def : I64SELECTNegCond<setule, I64LGTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setuge/setult:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGEr64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
+                                          CLGTr64eq.Fragment)), 0xb)>;
+
+def CLGEv2i64compare:
+    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
+                                          CLGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareLogicalGreaterEqual64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64LGE: CompareLogicalGreaterEqual64;
+
+def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
+def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+          I64LGEv2i64.Fragment>;
+                  
+
+// i64 setult:
+def : I64SETCCNegCond<setult, I64LGEr64>;
+def : I64SELECTNegCond<setult, I64LGEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setgt/setle:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGTr64sgt:
+    CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64eq:
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+    
+def CGTr64compare:
+    CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
+                        (XSWDv2i64 CGTr64sgt.Fragment),
+                        CGTr64eq.Fragment)>;
+
+def CGTv2i64sgt:
+    CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64eq:
+    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+    
+def CGTv2i64compare:
+    CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
+                        (XSWDv2i64 CGTr64sgt.Fragment),
+                        CGTv2i64eq.Fragment)>;
+
+multiclass CompareGreaterThan64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                             (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64GT: CompareLogicalGreaterThan64;
+
+def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+//                  I64GTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setle, I64GTr64>;
+def : I64SELECTNegCond<setle, I64GTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setge/setlt:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+    
+def CGEr64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
+                                          CGTr64eq.Fragment)), 0xb)>;
+
+def CGEv2i64compare:
+    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
+                                          CGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareGreaterEqual64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64GE: CompareGreaterEqual64;
+
+def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
+def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+          I64GEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setlt, I64GEr64>;
+def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+    CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+    CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+    v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+           (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                  (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)),
+           v2i64_add<(v2i64 VECREG:$rA),
+                     (v2i64 VECREG:$rB),
+                     (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+    CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+           (COPY_TO_REGCLASS 
+               v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                         (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                         v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                      (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)),
+           v2i64_sub<(v2i64 VECREG:$rA),
+                     (v2i64 VECREG:$rB),
+                     v2i64_sub_bg<(v2i64 VECREG:$rA),
+                                  (v2i64 VECREG:$rB)>.Fragment,
+                     (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+    CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+    CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+    CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+    CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+    CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+    CodeFrag<(Av4i32
+                (Av4i32
+                  (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment,     // a1 x b3
+                             v2i64_mul_ahi64<rA>.Fragment),
+                  (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment,      // a0 x b3
+                             v2i64_mul_bshlq4<rB>.Fragment)),
+                (Av4i32
+                  (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+                             v2i64_mul_ashlq4<rA>.Fragment),
+                  (Av4i32
+                      (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+                                 v2i64_mul_bhi64<rB>.Fragment),
+                    (Av4i32
+                      (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+                                 v2i64_mul_bhi64<rB>.Fragment),
+                      (Av4i32
+                        (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+                                   v2i64_mul_bshlq2<rB>.Fragment),
+                        (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+                                   v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+    CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+                        v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+                          (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+                                       v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+                        (ILv4i32 0),
+                        (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+                          (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+                                       v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+                        (ILv4i32 0),
+                        (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+    v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+                        v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+              v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+    v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+              (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+                         (ILv4i32 0),
+                         (FSMBIv4i32 0x0f0f)),
+              rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+          (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                 (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)),
+          v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+   SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+            [(set R64FP:$rT,
+                  (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
new file mode 100644
index 000000000000..fd96694b32fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -0,0 +1,334 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SPUAsmPrinter : public AsmPrinter {
+  public:
+    explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
+      AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "STI CBEA SPU Assembly Printer";
+    }
+
+    /// printInstruction - This method is automatically generated by tablegen
+    /// from the instruction set description.
+    void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+    static const char *getRegisterName(unsigned RegNo);
+
+
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    void printOp(const MachineOperand &MO, raw_ostream &OS);
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      if (MO.isReg()) {
+        O << getRegisterName(MO.getReg());
+      } else if (MO.isImm()) {
+        O << MO.getImm();
+      } else {
+        printOp(MO, O);
+      }
+    }
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+
+    void
+    printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      unsigned int value = MI->getOperand(OpNo).getImm();
+      assert(value < (1 << 8) && "Invalid u7 argument");
+      O << value;
+    }
+
+    void
+    printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      char value = MI->getOperand(OpNo).getImm();
+      O << (int) value;
+      O << "(";
+      printOperand(MI, OpNo+1, O);
+      O << ")";
+    }
+
+    void
+    printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      O << (short) MI->getOperand(OpNo).getImm();
+    }
+
+    void
+    printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      O << (unsigned short)MI->getOperand(OpNo).getImm();
+    }
+
+    void
+    printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      // When used as the base register, r0 reads constant zero rather than
+      // the value contained in the register.  For this reason, the darwin
+      // assembler requires that we print r0 as 0 (no r) when used as the base.
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      O << getRegisterName(MO.getReg()) << ", ";
+      printOperand(MI, OpNo+1, O);
+    }
+
+    void
+    printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      unsigned int value = MI->getOperand(OpNo).getImm();
+      assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+      O << value;
+    }
+
+    void
+    printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+                             >> 16);
+      assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+             && "Invalid s10 argument");
+      O << value;
+    }
+
+    void
+    printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+                             >> 16);
+      assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+      O << value;
+    }
+
+    void
+    printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      assert(MI->getOperand(OpNo).isImm() &&
+             "printDFormAddr first operand is not immediate");
+      int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+      int16_t value16 = int16_t(value);
+      assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+             && "Invalid dform s10 offset argument");
+      O << (value16 & ~0xf) << "(";
+      printOperand(MI, OpNo+1, O);
+      O << ")";
+    }
+
+    void
+    printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      /* Note: operand 1 is an offset or symbol name. */
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        if (MI->getOperand(OpNo+1).isImm()) {
+          int displ = int(MI->getOperand(OpNo+1).getImm());
+          if (displ > 0)
+            O << "+" << displ;
+          else if (displ < 0)
+            O << displ;
+        }
+      }
+    }
+
+    void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      // Used to generate a ".-<target>", but it turns out that the assembler
+      // really wants the target.
+      //
+      // N.B.: This operand is used for call targets. Branch hints are another
+      // animal entirely.
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        O << "@h";
+      }
+    }
+
+    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        O << "@l";
+      }
+    }
+
+    /// Print local store address
+    void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
+                          raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        int value = (int) MI->getOperand(OpNo).getImm();
+        assert((value >= 0 && value < 16)
+               && "Invalid negated immediate rotate 7-bit argument");
+        O << -value;
+      } else {
+        llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+      }
+    }
+
+    void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
+      assert(MI->getOperand(OpNo).isImm() &&
+             "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+      int value = (int) MI->getOperand(OpNo).getImm();
+      assert((value >= 0 && value <= 32)
+             && "Invalid negated immediate rotate 7-bit argument");
+      O << -value;
+    }
+  };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    report_fatal_error("printOp() does not handle immediate values");
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_ExternalSymbol:
+    // Computing the address of an external symbol, not calling it.
+    if (TM.getRelocationModel() != Reloc::Static) {
+      O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
+        << "$non_lazy_ptr";
+      return;
+    }
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    // External or weakly linked global variables need non-lazily-resolved
+    // stubs
+    if (TM.getRelocationModel() != Reloc::Static) {
+      const GlobalValue *GV = MO.getGlobal();
+      if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+            GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+        O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        return;
+      }
+    }
+    O << *Mang->getSymbol(MO.getGlobal());
+    return;
+  case MachineOperand::MO_MCSymbol:
+    O << *(MO.getMCSymbol());
+    return;
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNo, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  printMemRegReg(MI, OpNo, O);
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() { 
+  RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
new file mode 100644
index 000000000000..04fa2ae866d6
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
@@ -0,0 +1,57 @@
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the STI Cell SPU architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
+def RetCC_SPU : CallingConv<[
+  CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+  CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
+                 R12, R13, R14, R15, R16, R17, R18, R19, R20,
+                 R21, R22, R23, R24, R25, R26, R27, R28, R29,
+                 R30, R31, R32, R33, R34, R35, R36, R37, R38,
+                 R39, R40, R41, R42, R43, R44, R45, R46, R47,
+                 R48, R49, R50, R51, R52, R53, R54, R55, R56,
+                 R57, R58, R59, R60, R61, R62, R63, R64, R65,
+                 R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// CellSPU Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CCC_SPU : CallingConv<[
+  CCIfType<[i8, i16, i32, i64, i128, f32, f64, 
+            v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+            CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
+                           R12, R13, R14, R15, R16, R17, R18, R19, R20,
+                           R21, R22, R23, R24, R25, R26, R27, R28, R29,
+                           R30, R31, R32, R33, R34, R35, R36, R37, R38,
+                           R39, R40, R41, R42, R43, R44, R45, R46, R47,
+                           R48, R49, R50, R51, R52, R53, R54, R55, R56,
+                           R57, R58, R59, R60, R61, R62, R63, R64, R65,
+                           R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+  
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+              CCAssignToStack<16, 16>>
+]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 000000000000..a3e7e73ae30a
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,275 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameLowering.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+    Subtarget(sti) {
+  LR[0].first = SPU::R0;
+  LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.  This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  return MFI->getStackSize() &&
+    (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  unsigned TargetAlign = getStackAlignment();
+  unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+  assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+  unsigned AlignMask = Align - 1;
+
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SPUInstrInfo &TII =
+    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineModuleInfo &MMI = MF.getMMI();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Prepare for debug frame info.
+  bool hasDebugInfo = MMI.hasDebugInfo();
+  MCSymbol *FrameLabel = 0;
+
+  // Move MBBI back to the beginning of the function.
+  MBBI = MBB.begin();
+
+  // Work out frame sizes.
+  determineFrameLayout(MF);
+  int FrameSize = MFI->getStackSize();
+
+  assert((FrameSize & 0xf) == 0
+         && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+  // the "empty" frame size is 16 - just the register scavenger spill slot
+  if (FrameSize > 16 || MFI->adjustsStack()) {
+    FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+    if (hasDebugInfo) {
+      // Mark effective beginning of when frame pointer becomes valid.
+      FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+    }
+
+    // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+    // for the ABI
+    BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+      .addReg(SPU::R1);
+    if (isInt<10>(FrameSize)) {
+      // Spill $sp to adjusted $sp
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+        .addReg(SPU::R1);
+      // Adjust $sp by required amout
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+        .addImm(FrameSize);
+    } else if (isInt<16>(FrameSize)) {
+      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+      // $r2 to adjust $sp:
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+        .addImm(-16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+        .addImm(FrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+        .addReg(SPU::R1)
+        .addReg(SPU::R2);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+        .addReg(SPU::R2)
+        .addImm(16);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+    } else {
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+    }
+
+    if (hasDebugInfo) {
+      std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+      // Show update of SP.
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+      // Add callee saved registers to move list.
+      const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+      for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+        int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+        unsigned Reg = CSI[I].getReg();
+        if (Reg == SPU::R0) continue;
+        MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+        MachineLocation CSSrc(Reg);
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+      }
+
+      // Mark effective beginning of when frame pointer is ready.
+      MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+      MachineLocation FPDst(SPU::R1);
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+    }
+  } else {
+    // This is a leaf function -- insert a branch hint iff there are
+    // sufficient number instructions in the basic block. Note that
+    // this is just a best guess based on the basic block's size.
+    if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
+      MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+      dl = MBBI->getDebugLoc();
+
+      // Insert terminator label
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
+        .addSym(MMI.getContext().CreateTempSymbol());
+    }
+  }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SPUInstrInfo &TII =
+    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int FrameSize = MFI->getStackSize();
+  int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  assert(MBBI->getOpcode() == SPU::RET &&
+         "Can only insert epilog into returning blocks");
+  assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+  // the "empty" frame size is 16 - just the register scavenger spill slot
+  if (FrameSize > 16 || MFI->adjustsStack()) {
+    FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+    if (isInt<10>(FrameSize + LinkSlotOffset)) {
+      // Reload $lr, adjust $sp by required amount
+      // Note: We do this to slightly improve dual issue -- not by much, but it
+      // is an opportunity for dual issue.
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+        .addImm(FrameSize + LinkSlotOffset)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+        .addReg(SPU::R1)
+        .addImm(FrameSize);
+    } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+      // $r2 to adjust $sp:
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+        .addImm(16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+        .addImm(FrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+        .addReg(SPU::R1)
+        .addReg(SPU::R2);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+        .addImm(16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+        addReg(SPU::R2)
+        .addImm(16);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+    } else {
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+    }
+  }
+}
+
+void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                         const {
+  // Initial state of the frame pointer is R1.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(SPU::R1, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                        RegScavenger *RS) const{
+  // Mark LR and SP unused, since the prolog spills them to stack and
+  // we don't want anyone else to spill them for us.
+  //
+  // Also, unless R2 is really used someday, don't spill it automatically.
+  MF.getRegInfo().setPhysRegUnused(SPU::R0);
+  MF.getRegInfo().setPhysRegUnused(SPU::R1);
+  MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterClass *RC = &SPU::R32CRegClass;
+  RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                     RC->getAlignment(),
+                                                     false));
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
new file mode 100644
index 000000000000..4fee72d946a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
@@ -0,0 +1,94 @@
+//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
+
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SPUSubtarget;
+
+  class SPUFrameLowering: public TargetFrameLowering {
+    const SPUSubtarget &Subtarget;
+    std::pair<unsigned, int> LR[1];
+
+  public:
+    SPUFrameLowering(const SPUSubtarget &sti);
+
+    //! Determine the frame's layour
+    void determineFrameLayout(MachineFunction &MF) const;
+
+    /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+    /// the function.
+    void emitPrologue(MachineFunction &MF) const;
+    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+    //! Prediate: Target has dedicated frame pointer
+    bool hasFP(const MachineFunction &MF) const;
+
+    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                              RegScavenger *RS = NULL) const;
+
+    //! Perform target-specific stack frame setup.
+    void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+    //! Return a function's saved spill slots
+    /*!
+      For CellSPU, a function's saved spill slots is just the link register.
+     */
+    const std::pair<unsigned, int> *
+    getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+    //! Stack slot size (16 bytes)
+    static int stackSlotSize() {
+      return 16;
+    }
+    //! Maximum frame offset representable by a signed 10-bit integer
+    /*!
+      This is the maximum frame offset that can be expressed as a 10-bit
+      integer, used in D-form addresses.
+     */
+    static int maxFrameOffset() {
+      return ((1 << 9) - 1) * stackSlotSize();
+    }
+    //! Minimum frame offset representable by a signed 10-bit integer
+    static int minFrameOffset() {
+      return -(1 << 9) * stackSlotSize();
+    }
+    //! Minimum frame size (enough to spill LR + SP)
+    static int minStackSize() {
+      return (2 * stackSlotSize());
+    }
+    //! Convert frame index to stack offset
+    static int FItoStackOffset(int frame_index) {
+      return frame_index * stackSlotSize();
+    }
+    //! Number of instructions required to overcome hint-for-branch latency
+    /*!
+      HBR (hint-for-branch) instructions can be inserted when, for example,
+      we know that a given function is going to be called, such as printf(),
+      in the control flow graph. HBRs are only inserted if a sufficient number
+      of instructions occurs between the HBR and the target. Currently, HBRs
+      take 6 cycles, ergo, the magic number 6.
+     */
+    static int branchHintPenalty() {
+      return 6;
+    }
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
new file mode 100644
index 000000000000..403d7ef1fd9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -0,0 +1,141 @@
+//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on Cell SPU
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched"
+
+#include "SPUHazardRecognizers.h"
+#include "SPU.h"
+#include "SPUInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Cell SPU hazard recognizer
+//
+// This is the pipeline hazard recognizer for the Cell SPU processor. It does
+// very little right now.
+//===----------------------------------------------------------------------===//
+
+SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
+  TII(tii),
+  EvenOdd(0)
+{
+}
+
+/// Return the pipeline hazard type encountered or generated by this
+/// instruction. Currently returns NoHazard.
+///
+/// \return NoHazard
+ScheduleHazardRecognizer::HazardType
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
+{
+  // Initial thoughts on how to do this, but this code cannot work unless the
+  // function's prolog and epilog code are also being scheduled so that we can
+  // accurately determine which pipeline is being scheduled.
+#if 0
+  assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
+  const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+  ScheduleHazardRecognizer::HazardType retval = NoHazard;
+  bool mustBeOdd = false;
+
+  switch (Node->getOpcode()) {
+  case SPU::LQDv16i8:
+  case SPU::LQDv8i16:
+  case SPU::LQDv4i32:
+  case SPU::LQDv4f32:
+  case SPU::LQDv2f64:
+  case SPU::LQDr128:
+  case SPU::LQDr64:
+  case SPU::LQDr32:
+  case SPU::LQDr16:
+  case SPU::LQAv16i8:
+  case SPU::LQAv8i16:
+  case SPU::LQAv4i32:
+  case SPU::LQAv4f32:
+  case SPU::LQAv2f64:
+  case SPU::LQAr128:
+  case SPU::LQAr64:
+  case SPU::LQAr32:
+  case SPU::LQXv4i32:
+  case SPU::LQXr128:
+  case SPU::LQXr64:
+  case SPU::LQXr32:
+  case SPU::LQXr16:
+  case SPU::STQDv16i8:
+  case SPU::STQDv8i16:
+  case SPU::STQDv4i32:
+  case SPU::STQDv4f32:
+  case SPU::STQDv2f64:
+  case SPU::STQDr128:
+  case SPU::STQDr64:
+  case SPU::STQDr32:
+  case SPU::STQDr16:
+  case SPU::STQDr8:
+  case SPU::STQAv16i8:
+  case SPU::STQAv8i16:
+  case SPU::STQAv4i32:
+  case SPU::STQAv4f32:
+  case SPU::STQAv2f64:
+  case SPU::STQAr128:
+  case SPU::STQAr64:
+  case SPU::STQAr32:
+  case SPU::STQAr16:
+  case SPU::STQAr8:
+  case SPU::STQXv16i8:
+  case SPU::STQXv8i16:
+  case SPU::STQXv4i32:
+  case SPU::STQXv4f32:
+  case SPU::STQXv2f64:
+  case SPU::STQXr128:
+  case SPU::STQXr64:
+  case SPU::STQXr32:
+  case SPU::STQXr16:
+  case SPU::STQXr8:
+  case SPU::RET:
+    mustBeOdd = true;
+    break;
+  default:
+    // Assume that this instruction can be on the even pipe
+    break;
+  }
+
+  if (mustBeOdd && !EvenOdd)
+    retval = Hazard;
+
+  DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
+               << retval << "\n");
+  EvenOdd ^= 1;
+  return retval;
+#else
+  return NoHazard;
+#endif
+}
+
+void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
+{
+}
+
+void SPUHazardRecognizer::AdvanceCycle()
+{
+  DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
+}
+
+void SPUHazardRecognizer::EmitNoop()
+{
+  AdvanceCycle();
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
new file mode 100644
index 000000000000..675632cc7f13
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -0,0 +1,41 @@
+//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on the Cell SPU
+// processor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUHAZRECS_H
+#define SPUHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+/// SPUHazardRecognizer
+class SPUHazardRecognizer : public ScheduleHazardRecognizer
+{
+private:
+  const TargetInstrInfo &TII;
+  int EvenOdd;
+
+public:
+  SPUHazardRecognizer(const TargetInstrInfo &TII);
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void EmitNoop();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
new file mode 100644
index 000000000000..a297d036f03e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -0,0 +1,1203 @@
+//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for the Cell SPU,
+// converting from a legalized dag to a SPU-target dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "SPUFrameLowering.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+  bool
+  isI32IntS10Immediate(ConstantSDNode *CN)
+  {
+    return isInt<10>(CN->getSExtValue());
+  }
+
+  //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
+  bool
+  isI32IntU10Immediate(ConstantSDNode *CN)
+  {
+    return isUInt<10>(CN->getSExtValue());
+  }
+
+  //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
+  bool
+  isI16IntS10Immediate(ConstantSDNode *CN)
+  {
+    return isInt<10>(CN->getSExtValue());
+  }
+
+  //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
+  bool
+  isI16IntU10Immediate(ConstantSDNode *CN)
+  {
+    return isUInt<10>((short) CN->getZExtValue());
+  }
+
+  //! ConstantSDNode predicate for signed 16-bit values
+  /*!
+    \arg CN The constant SelectionDAG node holding the value
+    \arg Imm The returned 16-bit value, if returning true
+
+    This predicate tests the value in \a CN to see whether it can be
+    represented as a 16-bit, sign-extended quantity. Returns true if
+    this is the case.
+   */
+  bool
+  isIntS16Immediate(ConstantSDNode *CN, short &Imm)
+  {
+    EVT vt = CN->getValueType(0);
+    Imm = (short) CN->getZExtValue();
+    if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
+      return true;
+    } else if (vt == MVT::i32) {
+      int32_t i_val = (int32_t) CN->getZExtValue();
+      short s_val = (short) i_val;
+      return i_val == s_val;
+    } else {
+      int64_t i_val = (int64_t) CN->getZExtValue();
+      short s_val = (short) i_val;
+      return i_val == s_val;
+    }
+
+    return false;
+  }
+
+  //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
+  static bool
+  isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
+  {
+    EVT vt = FPN->getValueType(0);
+    if (vt == MVT::f32) {
+      int val = FloatToBits(FPN->getValueAPF().convertToFloat());
+      int sval = (int) ((val << 16) >> 16);
+      Imm = (short) val;
+      return val == sval;
+    }
+
+    return false;
+  }
+
+  //! Generate the carry-generate shuffle mask.
+  SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+    SmallVector<SDValue, 16 > ShufBytes;
+
+    // Create the shuffle mask for "rotating" the borrow up one register slot
+    // once the borrow is generated.
+    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                       &ShufBytes[0], ShufBytes.size());
+  }
+
+  //! Generate the borrow-generate shuffle mask
+  SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+    SmallVector<SDValue, 16 > ShufBytes;
+
+    // Create the shuffle mask for "rotating" the borrow up one register slot
+    // once the borrow is generated.
+    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                       &ShufBytes[0], ShufBytes.size());
+  }
+
+  //===------------------------------------------------------------------===//
+  /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+  /// instructions for SelectionDAG operations.
+  ///
+  class SPUDAGToDAGISel :
+    public SelectionDAGISel
+  {
+    const SPUTargetMachine &TM;
+    const SPUTargetLowering &SPUtli;
+    unsigned GlobalBaseReg;
+
+  public:
+    explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+      SelectionDAGISel(tm),
+      TM(tm),
+      SPUtli(*tm.getTargetLowering())
+    { }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      // Make sure we re-emit a set of the global base reg if necessary
+      GlobalBaseReg = 0;
+      SelectionDAGISel::runOnMachineFunction(MF);
+      return true;
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(uint32_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    /// getSmallIPtrImm - Return a target constant of pointer type.
+    inline SDValue getSmallIPtrImm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
+    }
+
+    SDNode *emitBuildVector(SDNode *bvNode) {
+      EVT vecVT = bvNode->getValueType(0);
+      DebugLoc dl = bvNode->getDebugLoc();
+
+      // Check to see if this vector can be represented as a CellSPU immediate
+      // constant by invoking all of the instruction selection predicates:
+      if (((vecVT == MVT::v8i16) &&
+           (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+          ((vecVT == MVT::v4i32) &&
+           ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+            (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+            (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+          ((vecVT == MVT::v2i64) &&
+           ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+            (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+        HandleSDNode Dummy(SDValue(bvNode, 0));
+        if (SDNode *N = Select(bvNode))
+          return N;
+        return Dummy.getValue().getNode();
+      }
+
+      // No, need to emit a constant pool spill:
+      std::vector<Constant*> CV;
+
+      for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
+        ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
+        CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+      }
+
+      const Constant *CP = ConstantVector::get(CV);
+      SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+      unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+      SDValue CGPoolOffset =
+              SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
+
+      HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+                                         CurDAG->getEntryNode(), CGPoolOffset,
+                                         MachinePointerInfo::getConstantPool(),
+                                         false, false, Alignment));
+      CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+      if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+        return N;
+      return Dummy.getValue().getNode();
+    }
+
+    /// Select - Convert the specified operand from a target-independent to a
+    /// target-specific node if it hasn't already been changed.
+    SDNode *Select(SDNode *N);
+
+    //! Emit the instruction sequence for i64 shl
+    SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
+
+    //! Emit the instruction sequence for i64 srl
+    SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
+
+    //! Emit the instruction sequence for i64 sra
+    SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
+
+    //! Emit the necessary sequence for loading i64 constants:
+    SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
+
+    //! Alternate instruction emit sequence for loading i64 constants
+    SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
+
+    //! Returns true if the address N is an A-form (local store) address
+    bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                         SDValue &Index);
+
+    //! D-form address predicate
+    bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                         SDValue &Index);
+
+    /// Alternate D-form address using i7 offset predicate
+    bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+                          SDValue &Base);
+
+    /// D-form address selection workhorse
+    bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
+                               SDValue &Base, int minOffset, int maxOffset);
+
+    //! Address predicate if N can be expressed as an indexed [r+r] operation.
+    bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                         SDValue &Index);
+
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.
+    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps) {
+      SDValue Op0, Op1;
+      switch (ConstraintCode) {
+      default: return true;
+      case 'm':   // memory
+        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
+          SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
+        break;
+      case 'o':   // offsetable
+        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
+          Op0 = Op;
+          Op1 = getSmallIPtrImm(0);
+        }
+        break;
+      case 'v':   // not offsetable
+#if 1
+        llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
+#else
+        SelectAddrIdxOnly(Op, Op, Op0, Op1);
+#endif
+        break;
+      }
+
+      OutOps.push_back(Op0);
+      OutOps.push_back(Op1);
+      return false;
+    }
+
+    virtual const char *getPassName() const {
+      return "Cell SPU DAG->DAG Pattern Instruction Selection";
+    }
+
+  private:
+    SDValue getRC( MVT );
+
+    // Include the pieces autogenerated from the target description.
+#include "SPUGenDAGISel.inc"
+  };
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address to be tested
+ \arg Base The base address
+ \arg Index The base address index
+ */
+bool
+SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                    SDValue &Index) {
+  // These match the addr256k operand type:
+  EVT OffsVT = MVT::i16;
+  SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+  int64_t val;
+
+  switch (N.getOpcode()) {
+  case ISD::Constant:
+    val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
+    Base = CurDAG->getTargetConstant( val , MVT::i32);
+    Index = Zero;
+    return true; break;
+  case ISD::ConstantPool:
+  case ISD::GlobalAddress:
+    report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
+    /*NOTREACHED*/
+
+  case ISD::TargetConstant:
+  case ISD::TargetGlobalAddress:
+  case ISD::TargetJumpTable:
+    report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+                      "not wrapped as A-form address.");
+    /*NOTREACHED*/
+
+  case SPUISD::AFormAddr:
+    // Just load from memory if there's only a single use of the location,
+    // otherwise, this will get handled below with D-form offset addresses
+    if (N.hasOneUse()) {
+      SDValue Op0 = N.getOperand(0);
+      switch (Op0.getOpcode()) {
+      case ISD::TargetConstantPool:
+      case ISD::TargetJumpTable:
+        Base = Op0;
+        Index = Zero;
+        return true;
+
+      case ISD::TargetGlobalAddress: {
+        GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
+        const GlobalValue *GV = GSDN->getGlobal();
+        if (GV->getAlignment() == 16) {
+          Base = Op0;
+          Index = Zero;
+          return true;
+        }
+        break;
+      }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+bool
+SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+                                  SDValue &Base) {
+  const int minDForm2Offset = -(1 << 7);
+  const int maxDForm2Offset = (1 << 7) - 1;
+  return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
+                               maxDForm2Offset);
+}
+
+/*!
+  \arg Op The ISD instruction (ignored)
+  \arg N The address to be tested
+  \arg Base Base address register/pointer
+  \arg Index Base address index
+
+  Examine the input address by a base register plus a signed 10-bit
+  displacement, [r+I10] (D-form address).
+
+  \return true if \a N is a D-form address with \a Base and \a Index set
+  to non-empty SDValue instances.
+*/
+bool
+SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                                 SDValue &Index) {
+  return DFormAddressPredicate(Op, N, Base, Index,
+                               SPUFrameLowering::minFrameOffset(),
+                               SPUFrameLowering::maxFrameOffset());
+}
+
+bool
+SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
+                                      SDValue &Index, int minOffset,
+                                      int maxOffset) {
+  unsigned Opc = N.getOpcode();
+  EVT PtrTy = SPUtli.getPointerTy();
+
+  if (Opc == ISD::FrameIndex) {
+    // Stack frame index must be less than 512 (divided by 16):
+    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
+    int FI = int(FIN->getIndex());
+    DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
+               << FI << "\n");
+    if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+      Base = CurDAG->getTargetConstant(0, PtrTy);
+      Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+      return true;
+    }
+  } else if (Opc == ISD::ADD) {
+    // Generated by getelementptr
+    const SDValue Op0 = N.getOperand(0);
+    const SDValue Op1 = N.getOperand(1);
+
+    if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
+        || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
+      Base = CurDAG->getTargetConstant(0, PtrTy);
+      Index = N;
+      return true;
+    } else if (Op1.getOpcode() == ISD::Constant
+               || Op1.getOpcode() == ISD::TargetConstant) {
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+      int32_t offset = int32_t(CN->getSExtValue());
+
+      if (Op0.getOpcode() == ISD::FrameIndex) {
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
+        int FI = int(FIN->getIndex());
+        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+                   << " frame index = " << FI << "\n");
+
+        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+          Base = CurDAG->getTargetConstant(offset, PtrTy);
+          Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+          return true;
+        }
+      } else if (offset > minOffset && offset < maxOffset) {
+        Base = CurDAG->getTargetConstant(offset, PtrTy);
+        Index = Op0;
+        return true;
+      }
+    } else if (Op0.getOpcode() == ISD::Constant
+               || Op0.getOpcode() == ISD::TargetConstant) {
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+      int32_t offset = int32_t(CN->getSExtValue());
+
+      if (Op1.getOpcode() == ISD::FrameIndex) {
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
+        int FI = int(FIN->getIndex());
+        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+                   << " frame index = " << FI << "\n");
+
+        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+          Base = CurDAG->getTargetConstant(offset, PtrTy);
+          Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+          return true;
+        }
+      } else if (offset > minOffset && offset < maxOffset) {
+        Base = CurDAG->getTargetConstant(offset, PtrTy);
+        Index = Op1;
+        return true;
+      }
+    }
+  } else if (Opc == SPUISD::IndirectAddr) {
+    // Indirect with constant offset -> D-Form address
+    const SDValue Op0 = N.getOperand(0);
+    const SDValue Op1 = N.getOperand(1);
+
+    if (Op0.getOpcode() == SPUISD::Hi
+        && Op1.getOpcode() == SPUISD::Lo) {
+      // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
+      Base = CurDAG->getTargetConstant(0, PtrTy);
+      Index = N;
+      return true;
+    } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
+      int32_t offset = 0;
+      SDValue idxOp;
+
+      if (isa<ConstantSDNode>(Op1)) {
+        ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+        offset = int32_t(CN->getSExtValue());
+        idxOp = Op0;
+      } else if (isa<ConstantSDNode>(Op0)) {
+        ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+        offset = int32_t(CN->getSExtValue());
+        idxOp = Op1;
+      }
+
+      if (offset >= minOffset && offset <= maxOffset) {
+        Base = CurDAG->getTargetConstant(offset, PtrTy);
+        Index = idxOp;
+        return true;
+      }
+    }
+  } else if (Opc == SPUISD::AFormAddr) {
+    Base = CurDAG->getTargetConstant(0, N.getValueType());
+    Index = N;
+    return true;
+  } else if (Opc == SPUISD::LDRESULT) {
+    Base = CurDAG->getTargetConstant(0, N.getValueType());
+    Index = N;
+    return true;
+  } else if (Opc == ISD::Register
+           ||Opc == ISD::CopyFromReg
+           ||Opc == ISD::UNDEF
+           ||Opc == ISD::Constant) {
+    unsigned OpOpc = Op->getOpcode();
+
+    if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+      // Direct load/store without getelementptr
+      SDValue Offs;
+
+      Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
+
+      if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+        if (Offs.getOpcode() == ISD::UNDEF)
+          Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+        Base = Offs;
+        Index = N;
+        return true;
+      }
+    } else {
+      /* If otherwise unadorned, default to D-form address with 0 offset: */
+      if (Opc == ISD::CopyFromReg) {
+        Index = N.getOperand(1);
+      } else {
+        Index = N;
+      }
+
+      Base = CurDAG->getTargetConstant(0, Index.getValueType());
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/*!
+  \arg Op The ISD instruction operand
+  \arg N The address operand
+  \arg Base The base pointer operand
+  \arg Index The offset/index operand
+
+  If the address \a N can be expressed as an A-form or D-form address, returns
+  false.  Otherwise, creates two operands, Base and Index that will become the
+  (r)(r) X-form address.
+*/
+bool
+SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                                 SDValue &Index) {
+  if (!SelectAFormAddr(Op, N, Base, Index)
+      && !SelectDFormAddr(Op, N, Base, Index)) {
+    // If the address is neither A-form or D-form, punt and use an X-form
+    // address:
+    Base = N.getOperand(1);
+    Index = N.getOperand(0);
+    return true;
+  }
+
+  return false;
+}
+
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+  switch( VT.SimpleTy ) {
+  case MVT::i8:
+    return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i16:
+    return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i32:
+    return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::f32:
+    return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i64:
+    return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i128:
+    return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+    break;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v4f32:
+  case MVT::v2i64:
+  case MVT::v2f64:
+    return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+    break;
+  default:
+    assert( false && "add a new case here" );
+  }
+  return SDValue();
+}
+
+//! Convert the operand from a target-independent to a target-specific node
+/*!
+ */
+SDNode *
+SPUDAGToDAGISel::Select(SDNode *N) {
+  unsigned Opc = N->getOpcode();
+  int n_ops = -1;
+  unsigned NewOpc = 0;
+  EVT OpVT = N->getValueType(0);
+  SDValue Ops[8];
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  if (Opc == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+    SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
+
+    if (FI < 128) {
+      NewOpc = SPU::AIr32;
+      Ops[0] = TFI;
+      Ops[1] = Imm0;
+      n_ops = 2;
+    } else {
+      NewOpc = SPU::Ar32;
+      Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
+      Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
+                                              N->getValueType(0), TFI),
+                       0);
+      n_ops = 2;
+    }
+  } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+    // Catch the i64 constants that end up here. Note: The backend doesn't
+    // attempt to legalize the constant (it's useless because DAGCombiner
+    // will insert 64-bit constants and we can't stop it).
+    return SelectI64Constant(N, OpVT, N->getDebugLoc());
+  } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+             && OpVT == MVT::i64) {
+    SDValue Op0 = N->getOperand(0);
+    EVT Op0VT = Op0.getValueType();
+    EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                                    Op0VT, (128 / Op0VT.getSizeInBits()));
+    EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                                   OpVT, (128 / OpVT.getSizeInBits()));
+    SDValue shufMask;
+
+    switch (Op0VT.getSimpleVT().SimpleTy) {
+    default:
+      report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
+      /*NOTREACHED*/
+    case MVT::i32:
+      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x00010203, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x08090a0b, MVT::i32));
+      break;
+
+    case MVT::i16:
+      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80800203, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80800a0b, MVT::i32));
+      break;
+
+    case MVT::i8:
+      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80808003, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x8080800b, MVT::i32));
+      break;
+    }
+
+    SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
+
+    HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+                                               Op0VecVT, Op0));
+
+    SDValue PromScalar;
+    if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+      PromScalar = SDValue(N, 0);
+    else
+      PromScalar = PromoteScalar.getValue();
+
+    SDValue zextShuffle =
+            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+                            PromScalar, PromScalar,
+                            SDValue(shufMaskLoad, 0));
+
+    HandleSDNode Dummy2(zextShuffle);
+    if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+      zextShuffle = SDValue(N, 0);
+    else
+      zextShuffle = Dummy2.getValue();
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+                                       zextShuffle));
+
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    SelectCode(Dummy.getValue().getNode());
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
+
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::TRUNCATE) {
+    SDValue Op0 = N->getOperand(0);
+    if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+        && OpVT == MVT::i32
+        && Op0.getValueType() == MVT::i64) {
+      // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
+      //
+      // Take advantage of the fact that the upper 32 bits are in the
+      // i32 preferred slot and avoid shuffle gymnastics:
+      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+      if (CN != 0) {
+        unsigned shift_amt = unsigned(CN->getZExtValue());
+
+        if (shift_amt >= 32) {
+          SDNode *hi32 =
+                  CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                         Op0.getOperand(0), getRC(MVT::i32));
+
+          shift_amt -= 32;
+          if (shift_amt > 0) {
+            // Take care of the additional shift, if present:
+            SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+            unsigned Opc = SPU::ROTMAIr32_i32;
+
+            if (Op0.getOpcode() == ISD::SRL)
+              Opc = SPU::ROTMr32;
+
+            hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
+                                          shift);
+          }
+
+          return hi32;
+        }
+      }
+    }
+  } else if (Opc == ISD::SHL) {
+    if (OpVT == MVT::i64)
+      return SelectSHLi64(N, OpVT);
+  } else if (Opc == ISD::SRL) {
+    if (OpVT == MVT::i64)
+      return SelectSRLi64(N, OpVT);
+  } else if (Opc == ISD::SRA) {
+    if (OpVT == MVT::i64)
+      return SelectSRAi64(N, OpVT);
+  } else if (Opc == ISD::FNEG
+             && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+    DebugLoc dl = N->getDebugLoc();
+    // Check if the pattern is a special form of DFNMS:
+    // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+    SDValue Op0 = N->getOperand(0);
+    if (Op0.getOpcode() == ISD::FSUB) {
+      SDValue Op00 = Op0.getOperand(0);
+      if (Op00.getOpcode() == ISD::FMUL) {
+        unsigned Opc = SPU::DFNMSf64;
+        if (OpVT == MVT::v2f64)
+          Opc = SPU::DFNMSv2f64;
+
+        return CurDAG->getMachineNode(Opc, dl, OpVT,
+                                      Op00.getOperand(0),
+                                      Op00.getOperand(1),
+                                      Op0.getOperand(1));
+      }
+    }
+
+    SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+    SDNode *signMask = 0;
+    unsigned Opc = SPU::XORfneg64;
+
+    if (OpVT == MVT::f64) {
+      signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
+    } else if (OpVT == MVT::v2f64) {
+      Opc = SPU::XORfnegvec;
+      signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+                                                 MVT::v2i64,
+                                                 negConst, negConst).getNode());
+    }
+
+    return CurDAG->getMachineNode(Opc, dl, OpVT,
+                                  N->getOperand(0), SDValue(signMask, 0));
+  } else if (Opc == ISD::FABS) {
+    if (OpVT == MVT::f64) {
+      SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+      return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
+                                    N->getOperand(0), SDValue(signMask, 0));
+    } else if (OpVT == MVT::v2f64) {
+      SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+      SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+                                       absConst, absConst);
+      SDNode *signMask = emitBuildVector(absVec.getNode());
+      return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
+                                    N->getOperand(0), SDValue(signMask, 0));
+    }
+  } else if (Opc == SPUISD::LDRESULT) {
+    // Custom select instructions for LDRESULT
+    EVT VT = N->getValueType(0);
+    SDValue Arg = N->getOperand(0);
+    SDValue Chain = N->getOperand(1);
+    SDNode *Result;
+
+    Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+                                    MVT::Other, Arg,
+                                    getRC( VT.getSimpleVT()), Chain);
+    return Result;
+
+  } else if (Opc == SPUISD::IndirectAddr) {
+    // Look at the operands: SelectCode() will catch the cases that aren't
+    // specifically handled here.
+    //
+    // SPUInstrInfo catches the following patterns:
+    // (SPUindirect (SPUhi ...), (SPUlo ...))
+    // (SPUindirect $sp, imm)
+    EVT VT = N->getValueType(0);
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    RegisterSDNode *RN;
+
+    if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+        || (Op0.getOpcode() == ISD::Register
+            && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+                && RN->getReg() != SPU::R1))) {
+      NewOpc = SPU::Ar32;
+      Ops[1] = Op1;
+      if (Op1.getOpcode() == ISD::Constant) {
+        ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+        Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
+        if (isInt<10>(CN->getSExtValue())) {
+          NewOpc = SPU::AIr32;
+          Ops[1] = Op1;
+        } else {
+          Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+                                                  N->getValueType(0),
+                                                  Op1),
+                           0);
+        }
+      }
+      Ops[0] = Op0;
+      n_ops = 2;
+    }
+  }
+
+  if (n_ops > 0) {
+    if (N->hasOneUse())
+      return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
+    else
+      return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
+  } else
+    return SelectCode(N);
+}
+
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
+  SDValue Op0 = N->getOperand(0);
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+  SDValue ShiftAmt = N->getOperand(1);
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
+  SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+  SDValue SelMaskVal;
+  DebugLoc dl = N->getDebugLoc();
+
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
+  SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+  SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+  ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
+                                    CurDAG->getTargetConstant(0, OpVT));
+  VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+                                  SDValue(ZeroFill, 0),
+                                  SDValue(VecOp0, 0),
+                                  SDValue(SelMask, 0));
+
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+    unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+    if (bytes > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
+                               SDValue(VecOp0, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+    }
+
+    if (bits > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
+    }
+  } else {
+    SDNode *Bytes =
+      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(3, ShiftAmtVT));
+    SDNode *Bits =
+      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(7, ShiftAmtVT));
+    Shift =
+      CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
+                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
+    Shift =
+      CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(Bits, 0));
+  }
+
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
+  SDValue Op0 = N->getOperand(0);
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+  SDValue ShiftAmt = N->getOperand(1);
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
+  SDNode *VecOp0, *Shift = 0;
+  DebugLoc dl = N->getDebugLoc();
+
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
+
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+    unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+    if (bytes > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+                               SDValue(VecOp0, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+    }
+
+    if (bits > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
+    }
+  } else {
+    SDNode *Bytes =
+      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(3, ShiftAmtVT));
+    SDNode *Bits =
+      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+    // Ensure that the shift amounts are negated!
+    Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                                   SDValue(Bytes, 0),
+                                   CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+    Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                                  SDValue(Bits, 0),
+                                  CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
+                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(Bits, 0));
+  }
+
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
+  // Promote Op0 to vector
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+  SDValue ShiftAmt = N->getOperand(1);
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDNode *VecOp0 =
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                           VecVT, N->getOperand(0), getRC(MVT::v2i64));
+
+  SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+  SDNode *SignRot =
+    CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+                           SDValue(VecOp0, 0), SignRotAmt);
+  SDNode *UpperHalfSign =
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                           MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
+
+  SDNode *UpperHalfSignMask =
+    CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+  SDNode *UpperLowerMask =
+    CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
+                           CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+  SDNode *UpperLowerSelect =
+    CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+                           SDValue(UpperHalfSignMask, 0),
+                           SDValue(VecOp0, 0),
+                           SDValue(UpperLowerMask, 0));
+
+  SDNode *Shift = 0;
+
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+    unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+    if (bytes > 0) {
+      bytes = 31 - bytes;
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
+                               SDValue(UpperLowerSelect, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+    }
+
+    if (bits > 0) {
+      bits = 8 - bits;
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
+    }
+  } else {
+    SDNode *NegShift =
+      CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                             ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+                             SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(NegShift, 0));
+  }
+
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
+                                           DebugLoc dl) {
+  ConstantSDNode *CN = cast<ConstantSDNode>(N);
+  return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
+                                           DebugLoc dl) {
+  EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
+  SDValue i64vec =
+          SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
+
+  // Here's where it gets interesting, because we have to parse out the
+  // subtree handed back in i64vec:
+
+  if (i64vec.getOpcode() == ISD::BITCAST) {
+    // The degenerate case where the upper and lower bits in the splat are
+    // identical:
+    SDValue Op0 = i64vec.getOperand(0);
+
+    ReplaceUses(i64vec, Op0);
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(Op0.getNode()), 0),
+                                  getRC(MVT::i64));
+  } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+    SDValue lhs = i64vec.getOperand(0);
+    SDValue rhs = i64vec.getOperand(1);
+    SDValue shufmask = i64vec.getOperand(2);
+
+    if (lhs.getOpcode() == ISD::BITCAST) {
+      ReplaceUses(lhs, lhs.getOperand(0));
+      lhs = lhs.getOperand(0);
+    }
+
+    SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+                       ? lhs.getNode()
+                       : emitBuildVector(lhs.getNode()));
+
+    if (rhs.getOpcode() == ISD::BITCAST) {
+      ReplaceUses(rhs, rhs.getOperand(0));
+      rhs = rhs.getOperand(0);
+    }
+
+    SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+                       ? rhs.getNode()
+                       : emitBuildVector(rhs.getNode()));
+
+    if (shufmask.getOpcode() == ISD::BITCAST) {
+      ReplaceUses(shufmask, shufmask.getOperand(0));
+      shufmask = shufmask.getOperand(0);
+    }
+
+    SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+                            ? shufmask.getNode()
+                            : emitBuildVector(shufmask.getNode()));
+
+   SDValue shufNode =
+            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+                                   SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+                                   SDValue(shufMaskNode, 0));
+    HandleSDNode Dummy(shufNode);
+    SDNode *SN = SelectCode(Dummy.getValue().getNode());
+    if (SN == 0) SN = Dummy.getValue().getNode();
+
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                  OpVT, SDValue(SN, 0), getRC(MVT::i64));
+  } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(i64vec.getNode()), 0),
+                                  getRC(MVT::i64));
+  } else {
+    report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+                      "condition");
+  }
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
+/// SPU-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
+  return new SPUDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
new file mode 100644
index 000000000000..f0ceee214149
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -0,0 +1,3257 @@
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUISelLowering.h"
+#include "SPUTargetMachine.h"
+#include "SPUFrameLowering.h"
+#include "SPUMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+// Used in getTargetNodeName() below
+namespace {
+  std::map<unsigned, const char *> node_names;
+
+  // Byte offset of the preferred slot (counted from the MSB)
+  int prefslotOffset(EVT VT) {
+    int retval=0;
+    if (VT==MVT::i1) retval=3;
+    if (VT==MVT::i8) retval=3;
+    if (VT==MVT::i16) retval=2;
+
+    return retval;
+  }
+
+  //! Expand a library call into an actual call DAG node
+  /*!
+   \note
+   This code is taken from SelectionDAGLegalize, since it is not exposed as
+   part of the LLVM SelectionDAG API.
+   */
+
+  SDValue
+  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
+    // The input chain to this libcall is the entry node of the function.
+    // Legalizing the call will automatically add the previous call to the
+    // dependence.
+    SDValue InChain = DAG.getEntryNode();
+
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+      EVT ArgVT = Op.getOperand(i).getValueType();
+      const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+      Entry.Node = Op.getOperand(i);
+      Entry.Ty = ArgTy;
+      Entry.isSExt = isSigned;
+      Entry.isZExt = !isSigned;
+      Args.push_back(Entry);
+    }
+    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                           TLI.getPointerTy());
+
+    // Splice the libcall in wherever FindInputOutputChains tells us to.
+    const Type *RetTy =
+                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
+    std::pair<SDValue, SDValue> CallInfo =
+            TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                            0, TLI.getLibcallCallingConv(LC), false,
+                            /*isReturnValueUsed=*/true,
+                            Callee, Args, DAG, Op.getDebugLoc());
+
+    return CallInfo.first;
+  }
+}
+
+SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+    SPUTM(TM) {
+
+  // Use _setjmp/_longjmp instead of setjmp/longjmp.
+  setUseUnderscoreSetJmp(true);
+  setUseUnderscoreLongJmp(true);
+
+  // Set RTLIB libcall names as used by SPU:
+  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
+  // Set up the SPU's register classes:
+  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
+  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
+  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
+  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
+  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
+  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
+  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+
+  // SPU has no sign or zero extended loads for i1, i8, i16:
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
+
+  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // SPU constant load actions are custom lowered:
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+  // SPU's loads and stores have to be custom lowered:
+  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
+       ++sctype) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+    setOperationAction(ISD::LOAD,   VT, Custom);
+    setOperationAction(ISD::STORE,  VT, Custom);
+    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+      setTruncStoreAction(VT, StoreVT, Expand);
+    }
+  }
+
+  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+       ++sctype) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
+
+    setOperationAction(ISD::LOAD,   VT, Custom);
+    setOperationAction(ISD::STORE,  VT, Custom);
+
+    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+      setTruncStoreAction(VT, StoreVT, Expand);
+    }
+  }
+
+  // Expand the jumptable branches
+  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
+
+  // Custom lower SELECT_CC for most cases, but expand by default
+  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
+  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
+  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
+  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
+
+  // SPU has no intrinsics for these particular operations:
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+  // SPU has no division/remainder instructions
+  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i128, Expand);
+  setOperationAction(ISD::UREM,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
+
+  // We don't support sin/cos/sqrt/fmod
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+  // for f32!)
+  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+  // SPU can do rotate right and left, so legalize it... but customize for i8
+  // because instructions don't exist.
+
+  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+  //        .td files.
+  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
+  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
+  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
+
+  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
+  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
+  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
+
+  // SPU has no native version of shift left/right for i8
+  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
+  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
+  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
+
+  // Make these operations legal and handle them during instruction selection:
+  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
+  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
+  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
+
+  // Custom lower i8, i32 and i64 multiplications
+  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
+  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
+  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
+
+  // Expand double-width multiplication
+  // FIXME: It would probably be reasonable to support some of these operations
+  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
+  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
+
+  // Need to custom handle (some) common i8, i64 math ops
+  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
+  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
+  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
+  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
+
+  // SPU does not have BSWAP. It does have i32 support CTLZ.
+  // CTPOP has to be custom lowered.
+  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
+  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
+
+  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
+  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
+
+  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
+
+  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
+  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
+  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
+  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
+
+  // SPU has a version of select that implements (a&~c)|(b&c), just like
+  // select ought to work:
+  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
+  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
+  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
+  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
+
+  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
+  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
+  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
+  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
+  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
+
+  // Custom lower i128 -> i64 truncates
+  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+  // Custom lower i32/i64 -> i128 sign extend
+  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+  // to expand to a libcall, hence the custom lowering:
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
+
+  // FDIV on SPU requires custom lowering
+  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
+
+  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+  setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+  setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+  setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+  setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+
+  // We cannot sextinreg(i1).  Expand to shifts.
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  // We want to legalize GlobalAddress and ConstantPool nodes into the
+  // appropriate instructions to materialize the address.
+  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+       ++sctype) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+    setOperationAction(ISD::GlobalAddress,  VT, Custom);
+    setOperationAction(ISD::ConstantPool,   VT, Custom);
+    setOperationAction(ISD::JumpTable,      VT, Custom);
+  }
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
+
+  // Cell SPU has instructions for converting between i64 and fp.
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+
+  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+
+  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+  // First set operation action for all vector types to expand. Then we
+  // will selectively turn on ones that can be effectively codegen'd.
+  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+
+  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+    // add/sub are legal for all supported vector VT's.
+    setOperationAction(ISD::ADD,     VT, Legal);
+    setOperationAction(ISD::SUB,     VT, Legal);
+    // mul has to be custom lowered.
+    setOperationAction(ISD::MUL,     VT, Legal);
+
+    setOperationAction(ISD::AND,     VT, Legal);
+    setOperationAction(ISD::OR,      VT, Legal);
+    setOperationAction(ISD::XOR,     VT, Legal);
+    setOperationAction(ISD::LOAD,    VT, Custom);
+    setOperationAction(ISD::SELECT,  VT, Legal);
+    setOperationAction(ISD::STORE,   VT, Custom);
+
+    // These operations need to be expanded:
+    setOperationAction(ISD::SDIV,    VT, Expand);
+    setOperationAction(ISD::SREM,    VT, Expand);
+    setOperationAction(ISD::UDIV,    VT, Expand);
+    setOperationAction(ISD::UREM,    VT, Expand);
+
+    // Custom lower build_vector, constant pool spills, insert and
+    // extract vector elements:
+    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::ConstantPool, VT, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+  }
+
+  setOperationAction(ISD::AND, MVT::v16i8, Custom);
+  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
+  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+
+  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
+  setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+  setStackPointerRegisterToSaveRestore(SPU::R1);
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::ANY_EXTEND);
+
+  setMinFunctionAlignment(3);
+
+  computeRegisterProperties();
+
+  // Set pre-RA register scheduler default to BURR, which produces slightly
+  // better code than the default (could also be TDRR, but TargetLowering.h
+  // needs a mod to support that model):
+  setSchedulingPreference(Sched::RegPressure);
+}
+
+const char *
+SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+  if (node_names.empty()) {
+    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
+    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
+    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
+    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
+    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
+    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
+    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
+    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
+    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
+    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
+    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
+    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
+    node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+    node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
+    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
+    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
+    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
+    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+            "SPUISD::ROTBYTES_LEFT_BITS";
+    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
+    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
+    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
+  }
+
+  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
+
+  return ((i != node_names.end()) ? i->second : 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
+  // i8, i16 and i32 are valid SETCC result types
+  MVT::SimpleValueType retval;
+
+  switch(VT.getSimpleVT().SimpleTy){
+    case MVT::i1:
+    case MVT::i8:
+      retval = MVT::i8; break;
+    case MVT::i16:
+      retval = MVT::i16; break;
+    case MVT::i32:
+    default:
+      retval = MVT::i32;
+  }
+  return retval;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention code:
+//===----------------------------------------------------------------------===//
+
+#include "SPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+//  LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1  v16i8,ch = load
+%2  v16i8,ch = rotate %1
+%3  v4f8, ch = bitconvert %2
+%4  f32      = vec2perfslot %3
+%5  f64      = fp_extend %4
+\endverbatim
+*/
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  LoadSDNode *LN = cast<LoadSDNode>(Op);
+  SDValue the_chain = LN->getChain();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT InVT = LN->getMemoryVT();
+  EVT OutVT = Op.getValueType();
+  ISD::LoadExtType ExtType = LN->getExtensionType();
+  unsigned alignment = LN->getAlignment();
+  int pso = prefslotOffset(InVT);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+                                                  (128 / InVT.getSizeInBits()));
+
+  // two sanity checks
+  assert( LN->getAddressingMode() == ISD::UNINDEXED
+          && "we should get only UNINDEXED adresses");
+  // clean aligned loads can be selected as-is
+  if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+    return SDValue();
+
+  // Get pointerinfos to the memory chunk(s) that contain the data to load
+  uint64_t mpi_offset = LN->getPointerInfo().Offset;
+  mpi_offset -= mpi_offset%16;
+  MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+  MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+  SDValue result;
+  SDValue basePtr = LN->getBasePtr();
+  SDValue rotate;
+
+  if ((alignment%16) == 0) {
+    ConstantSDNode *CN;
+
+    // Special cases for a known aligned load to simplify the base pointer
+    // and the rotation amount:
+    if (basePtr.getOpcode() == ISD::ADD
+        && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+      // Known offset into basePtr
+      int64_t offset = CN->getSExtValue();
+      int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+      if (rotamt < 0)
+        rotamt += 16;
+
+      rotate = DAG.getConstant(rotamt, MVT::i16);
+
+      // Simplify the base pointer for this case:
+      basePtr = basePtr.getOperand(0);
+      if ((offset & ~0xf) > 0) {
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                              basePtr,
+                              DAG.getConstant((offset & ~0xf), PtrVT));
+      }
+    } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+               || (basePtr.getOpcode() == SPUISD::IndirectAddr
+                   && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+                   && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+      // Plain aligned a-form address: rotate into preferred slot
+      // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+      int64_t rotamt = -pso;
+      if (rotamt < 0)
+        rotamt += 16;
+      rotate = DAG.getConstant(rotamt, MVT::i16);
+    } else {
+      // Offset the rotate amount by the basePtr and the preferred slot
+      // byte offset
+      int64_t rotamt = -pso;
+      if (rotamt < 0)
+        rotamt += 16;
+      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+                           basePtr,
+                           DAG.getConstant(rotamt, PtrVT));
+    }
+  } else {
+    // Unaligned load: must be more pessimistic about addressing modes:
+    if (basePtr.getOpcode() == ISD::ADD) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+      SDValue Flag;
+
+      SDValue Op0 = basePtr.getOperand(0);
+      SDValue Op1 = basePtr.getOperand(1);
+
+      if (isa<ConstantSDNode>(Op1)) {
+        // Convert the (add <ptr>, <const>) to an indirect address contained
+        // in a register. Note that this is done because we need to avoid
+        // creating a 0(reg) d-form address due to the SPU's block loads.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+      } else {
+        // Convert the (add <arg1>, <arg2>) to an indirect address, which
+        // will likely be lowered as a reg(reg) x-form address.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+      }
+    } else {
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                            basePtr,
+                            DAG.getConstant(0, PtrVT));
+   }
+
+    // Offset the rotate amount by the basePtr and the preferred slot
+    // byte offset
+    rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+                         basePtr,
+                         DAG.getConstant(-pso, PtrVT));
+  }
+
+  // Do the load as a i128 to allow possible shifting
+  SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+                       lowMemPtr,
+                       LN->isVolatile(), LN->isNonTemporal(), 16);
+
+  // When the size is not greater than alignment we get all data with just
+  // one load
+  if (alignment >= InVT.getSizeInBits()/8) {
+    // Update the chain
+    the_chain = low.getValue(1);
+
+    // Rotate into the preferred slot:
+    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+                         low.getValue(0), rotate);
+
+    // Convert the loaded v16i8 vector to the appropriate vector type
+    // specified by the operand:
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+                                 InVT, (128 / InVT.getSizeInBits()));
+    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+                         DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+  }
+  // When alignment is less than the size, we might need (known only at
+  // run-time) two loads
+  // TODO: if the memory address is composed only from constants, we have
+  // extra kowledge, and might avoid the second load
+  else {
+    // storage position offset from lower 16 byte aligned memory chunk
+    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+                                  basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+    // get a registerfull of ones. (this implementation is a workaround: LLVM
+    // cannot handle 128 bit signed int constants)
+    SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+    SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+                               DAG.getNode(ISD::ADD, dl, PtrVT,
+                                           basePtr,
+                                           DAG.getConstant(16, PtrVT)),
+                               highMemPtr,
+                               LN->isVolatile(), LN->isNonTemporal(), 16);
+
+    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+                                                              high.getValue(1));
+
+    // Shift the (possible) high part right to compensate the misalignemnt.
+    // if there is no highpart (i.e. value is i64 and offset is 4), this
+    // will zero out the high value.
+    high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+                                     DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                                 DAG.getConstant( 16, MVT::i32),
+                                                 offset
+                                                ));
+
+    // Shift the low similarly
+    // TODO: add SPUISD::SHL_BYTES
+    low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+    // Merge the two parts
+    result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+                          DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+    if (!InVT.isVector()) {
+      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+     }
+
+  }
+    // Handle extending loads by extending the scalar result:
+    if (ExtType == ISD::SEXTLOAD) {
+      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+    } else if (ExtType == ISD::ZEXTLOAD) {
+      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+    } else if (ExtType == ISD::EXTLOAD) {
+      unsigned NewOpc = ISD::ANY_EXTEND;
+
+      if (OutVT.isFloatingPoint())
+        NewOpc = ISD::FP_EXTEND;
+
+      result = DAG.getNode(NewOpc, dl, OutVT, result);
+    }
+
+    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
+    SDValue retops[2] = {
+      result,
+      the_chain
+    };
+
+    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
+                         retops, sizeof(retops) / sizeof(retops[0]));
+    return result;
+}
+
+/// Custom lower stores for CellSPU
+/*!
+ All CellSPU stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to generate a shuffle to insert the
+ requested element into its place, then store the resulting block.
+ */
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  StoreSDNode *SN = cast<StoreSDNode>(Op);
+  SDValue Value = SN->getValue();
+  EVT VT = Value.getValueType();
+  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned alignment = SN->getAlignment();
+  SDValue result;
+  EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+                                                 (128 / StVT.getSizeInBits()));
+  // Get pointerinfos to the memory chunk(s) that contain the data to load
+  uint64_t mpi_offset = SN->getPointerInfo().Offset;
+  mpi_offset -= mpi_offset%16;
+  MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+  MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+  // two sanity checks
+  assert( SN->getAddressingMode() == ISD::UNINDEXED
+          && "we should get only UNINDEXED adresses");
+  // clean aligned loads can be selected as-is
+  if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+    return SDValue();
+
+  SDValue alignLoadVec;
+  SDValue basePtr = SN->getBasePtr();
+  SDValue the_chain = SN->getChain();
+  SDValue insertEltOffs;
+
+  if ((alignment%16) == 0) {
+    ConstantSDNode *CN;
+    // Special cases for a known aligned load to simplify the base pointer
+    // and insertion byte:
+    if (basePtr.getOpcode() == ISD::ADD
+        && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+      // Known offset into basePtr
+      int64_t offset = CN->getSExtValue();
+
+      // Simplify the base pointer for this case:
+      basePtr = basePtr.getOperand(0);
+      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant((offset & 0xf), PtrVT));
+
+      if ((offset & ~0xf) > 0) {
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                              basePtr,
+                              DAG.getConstant((offset & ~0xf), PtrVT));
+      }
+    } else {
+      // Otherwise, assume it's at byte 0 of basePtr
+      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant(0, PtrVT));
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant(0, PtrVT));
+    }
+  } else {
+    // Unaligned load: must be more pessimistic about addressing modes:
+    if (basePtr.getOpcode() == ISD::ADD) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+      SDValue Flag;
+
+      SDValue Op0 = basePtr.getOperand(0);
+      SDValue Op1 = basePtr.getOperand(1);
+
+      if (isa<ConstantSDNode>(Op1)) {
+        // Convert the (add <ptr>, <const>) to an indirect address contained
+        // in a register. Note that this is done because we need to avoid
+        // creating a 0(reg) d-form address due to the SPU's block loads.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+      } else {
+        // Convert the (add <arg1>, <arg2>) to an indirect address, which
+        // will likely be lowered as a reg(reg) x-form address.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+      }
+    } else {
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                            basePtr,
+                            DAG.getConstant(0, PtrVT));
+    }
+
+    // Insertion point is solely determined by basePtr's contents
+    insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+                                basePtr,
+                                DAG.getConstant(0, PtrVT));
+  }
+
+  // Load the lower part of the memory to which to store.
+  SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+
+  // if we don't need to store over the 16 byte boundary, one store suffices
+  if (alignment >= StVT.getSizeInBits()/8) {
+    // Update the chain
+    the_chain = low.getValue(1);
+
+    LoadSDNode *LN = cast<LoadSDNode>(low);
+    SDValue theValue = SN->getValue();
+
+    if (StVT != VT
+        && (theValue.getOpcode() == ISD::AssertZext
+            || theValue.getOpcode() == ISD::AssertSext)) {
+      // Drill down and get the value for zero- and sign-extended
+      // quantities
+      theValue = theValue.getOperand(0);
+    }
+
+    // If the base pointer is already a D-form address, then just create
+    // a new D-form address with a slot offset and the orignal base pointer.
+    // Otherwise generate a D-form address with the slot offset relative
+    // to the stack pointer, which is always aligned.
+#if !defined(NDEBUG)
+      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+        errs() << "CellSPU LowerSTORE: basePtr = ";
+        basePtr.getNode()->dump(&DAG);
+        errs() << "\n";
+      }
+#endif
+
+    SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+                                      insertEltOffs);
+    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+                                      theValue);
+
+    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+                         vectorizeOp, low,
+                         DAG.getNode(ISD::BITCAST, dl,
+                                     MVT::v4i32, insertEltOp));
+
+    result = DAG.getStore(the_chain, dl, result, basePtr,
+                          lowMemPtr,
+                          LN->isVolatile(), LN->isNonTemporal(),
+                          16);
+
+  }
+  // do the store when it might cross the 16 byte memory access boundary.
+  else {
+    // TODO issue a warning if SN->isVolatile()== true? This is likely not
+    // what the user wanted.
+
+    // address offset from nearest lower 16byte alinged address
+    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+                                    SN->getBasePtr(),
+                                    DAG.getConstant(0xf, MVT::i32));
+    // 16 - offset
+    SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                           DAG.getConstant( 16, MVT::i32),
+                                           offset);
+    // 16 - sizeof(Value)
+    SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                     DAG.getConstant( 16, MVT::i32),
+                                     DAG.getConstant( VT.getSizeInBits()/8,
+                                                      MVT::i32));
+    // get a registerfull of ones
+    SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+    // Create the 128 bit masks that have ones where the data to store is
+    // located.
+    SDValue lowmask, himask;
+    // if the value to store don't fill up the an entire 128 bits, zero
+    // out the last bits of the mask so that only the value we want to store
+    // is masked.
+    // this is e.g. in the case of store i32, align 2
+    if (!VT.isVector()){
+      Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+      lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+      lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+                                                               surplus);
+      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+      Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+    }
+    else {
+      lowmask = ones;
+      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+    }
+    // this will zero, if there are no data that goes to the high quad
+    himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+                                                            offset_compl);
+    lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+                                                             offset);
+
+    // Load in the old data and zero out the parts that will be overwritten with
+    // the new data to store.
+    SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+                               DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+                                           DAG.getConstant( 16, PtrVT)),
+                               highMemPtr,
+                               SN->isVolatile(), SN->isNonTemporal(), 16);
+    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+                                                              hi.getValue(1));
+
+    low = DAG.getNode(ISD::AND, dl, MVT::i128,
+                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+                        DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+    hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+                        DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+    // Shift the Value to store into place. rlow contains the parts that go to
+    // the lower memory chunk, rhi has the parts that go to the upper one.
+    SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+    rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+    SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+                                                            offset_compl);
+
+    // Merge the old data and the new data and store the results
+    // Need to convert vectors here to integer as 'OR'ing floats assert
+    rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+    rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+    low = DAG.getStore(the_chain, dl, rlow, basePtr,
+                          lowMemPtr,
+                          SN->isVolatile(), SN->isNonTemporal(), 16);
+    hi  = DAG.getStore(the_chain, dl, rhi,
+                            DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+                                        DAG.getConstant( 16, PtrVT)),
+                            highMemPtr,
+                            SN->isVolatile(), SN->isNonTemporal(), 16);
+    result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+                                                           hi.getValue(0));
+  }
+
+  return result;
+}
+
+//! Generate the address of a constant pool entry.
+static SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  EVT PtrVT = Op.getValueType();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = CP->getConstVal();
+  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  const TargetMachine &TM = DAG.getTarget();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    if (!ST->usingLargeMem()) {
+      // Just return the SDValue with the constant pool address in it.
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
+    } else {
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+    }
+  }
+
+  llvm_unreachable("LowerConstantPool: Relocation model other than static"
+                   " not supported.");
+  return SDValue();
+}
+
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  const TargetMachine &TM = DAG.getTarget();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    if (!ST->usingLargeMem()) {
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
+    } else {
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+    }
+  }
+
+  llvm_unreachable("LowerJumpTable: Relocation model other than static"
+                   " not supported.");
+  return SDValue();
+}
+
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  EVT PtrVT = Op.getValueType();
+  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GSDN->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+                                          PtrVT, GSDN->getOffset());
+  const TargetMachine &TM = DAG.getTarget();
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    if (!ST->usingLargeMem()) {
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
+    } else {
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+    }
+  } else {
+    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
+                      "not supported.");
+    /*NOTREACHED*/
+  }
+
+  return SDValue();
+}
+
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (VT == MVT::f64) {
+    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+    assert((FP != 0) &&
+           "LowerConstantFP: Node is not ConstantFPSDNode");
+
+    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
+    SDValue T = DAG.getConstant(dbits, MVT::i64);
+    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                       DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
+  }
+
+  return SDValue();
+}
+
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
+
+  unsigned ArgOffset = SPUFrameLowering::minStackSize();
+  unsigned ArgRegIdx = 0;
+  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+  // FIXME: allow for other calling conventions
+  CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
+  // Add DAG nodes to load the arguments or copy them out of registers.
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    EVT ObjectVT = Ins[ArgNo].VT;
+    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+    SDValue ArgVal;
+    CCValAssign &VA = ArgLocs[ArgNo];
+
+    if (VA.isRegLoc()) {
+      const TargetRegisterClass *ArgRegClass;
+
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
+      default:
+        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+                           Twine(ObjectVT.getEVTString()));
+      case MVT::i8:
+        ArgRegClass = &SPU::R8CRegClass;
+        break;
+      case MVT::i16:
+        ArgRegClass = &SPU::R16CRegClass;
+        break;
+      case MVT::i32:
+        ArgRegClass = &SPU::R32CRegClass;
+        break;
+      case MVT::i64:
+        ArgRegClass = &SPU::R64CRegClass;
+        break;
+      case MVT::i128:
+        ArgRegClass = &SPU::GPRCRegClass;
+        break;
+      case MVT::f32:
+        ArgRegClass = &SPU::R32FPRegClass;
+        break;
+      case MVT::f64:
+        ArgRegClass = &SPU::R64FPRegClass;
+        break;
+      case MVT::v2f64:
+      case MVT::v4f32:
+      case MVT::v2i64:
+      case MVT::v4i32:
+      case MVT::v8i16:
+      case MVT::v16i8:
+        ArgRegClass = &SPU::VECREGRegClass;
+        break;
+      }
+
+      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
+      RegInfo.addLiveIn(VA.getLocReg(), VReg);
+      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+      ++ArgRegIdx;
+    } else {
+      // We need to load the argument to a virtual register if we determined
+      // above that we ran out of physical registers of the appropriate type
+      // or we're forced to do vararg
+      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
+      ArgOffset += StackSlotSize;
+    }
+
+    InVals.push_back(ArgVal);
+    // Update the chain
+    Chain = ArgVal.getOperand(0);
+  }
+
+  // vararg handling:
+  if (isVarArg) {
+    // FIXME: we should be able to query the argument registers from
+    //        tablegen generated code.
+    static const unsigned ArgRegs[] = {
+      SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
+      SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+      SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+      SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+      SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+      SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+      SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+      SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+      SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+      SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+      SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+    };
+    // size of ArgRegs array
+    unsigned NumArgRegs = 77;
+
+    // We will spill (79-3)+1 registers to the stack
+    SmallVector<SDValue, 79-3+1> MemOps;
+
+    // Create the frame slot
+    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
+      FuncInfo->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
+      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
+      SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
+                                   false, false, 0);
+      Chain = Store.getOperand(0);
+      MemOps.push_back(Store);
+
+      // Increment address by stack slot size for the next stored argument
+      ArgOffset += StackSlotSize;
+    }
+    if (!MemOps.empty())
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOps[0], MemOps.size());
+  }
+
+  return Chain;
+}
+
+/// isLSAAddress - Return the immediate to use if the specified
+/// value is representable as a LSA address.
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+  if (!C) return 0;
+
+  int Addr = C->getZExtValue();
+  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
+      (Addr << 14 >> 14) != Addr)
+    return 0;  // Top 14 bits have to be sext of immediate.
+
+  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
+}
+
+SDValue
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  // CellSPU target does not yet support tail call optimization.
+  isTailCall = false;
+
+  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+  unsigned NumOps     = Outs.size();
+  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+  // FIXME: allow for other calling conventions
+  CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+  const unsigned NumArgRegs = ArgLocs.size();
+
+
+  // Handy pointer type
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
+  // Figure out which arguments are going to go in registers, and which in
+  // memory.
+  unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
+  unsigned ArgRegIdx = 0;
+
+  // Keep track of registers passing arguments
+  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+  // And the arguments passed on the stack
+  SmallVector<SDValue, 8> MemOpChains;
+
+  for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+    SDValue Arg = OutVals[ArgRegIdx];
+    CCValAssign &VA = ArgLocs[ArgRegIdx];
+
+    // PtrOff will be used to store the current argument to the stack if a
+    // register cannot be found for it.
+    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    case MVT::i128:
+    case MVT::f32:
+    case MVT::f64:
+    case MVT::v2i64:
+    case MVT::v2f64:
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      if (ArgRegIdx != NumArgRegs) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      } else {
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+        ArgOffset += StackSlotSize;
+      }
+      break;
+    }
+  }
+
+  // Accumulate how many bytes are to be pushed on the stack, including the
+  // linkage area, and parameter passing area.  According to the SPU ABI,
+  // we minimally need space for [LR] and [SP].
+  unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
+
+  // Insert a call sequence start
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
+                                                            true));
+
+  if (!MemOpChains.empty()) {
+    // Adjust the stack pointer for the stack arguments.
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  SmallVector<SDValue, 8> Ops;
+  unsigned CallOpc = SPUISD::CALL;
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    EVT CalleeVT = Callee.getValueType();
+    SDValue Zero = DAG.getConstant(0, PtrVT);
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
+
+    if (!ST->usingLargeMem()) {
+      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+      // style calls, otherwise, external symbols are BRASL calls. This assumes
+      // that declared/defined symbols are in the same compilation unit and can
+      // be reached through PC-relative jumps.
+      //
+      // NOTE:
+      // This may be an unsafe assumption for JIT and really large compilation
+      // units.
+      if (GV->isDeclaration()) {
+        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
+      } else {
+        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
+      }
+    } else {
+      // "Large memory" mode: Turn all calls into indirect calls with a X-form
+      // address pairs:
+      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+    }
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    EVT CalleeVT = Callee.getValueType();
+    SDValue Zero = DAG.getConstant(0, PtrVT);
+    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+        Callee.getValueType());
+
+    if (!ST->usingLargeMem()) {
+      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+    } else {
+      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
+    }
+  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+    // If this is an absolute destination address that appears to be a legal
+    // local store address, use the munged value.
+    Callee = SDValue(Dest, 0);
+  }
+
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+  // Returns a chain and a flag for retval copy to use.
+  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
+                      &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // If the function returns void, just return the chain.
+  if (Ins.empty())
+    return Chain;
+
+  // Now handle the return value(s)
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		    getTargetMachine(), RVLocs, *DAG.getContext());
+  CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
+  // If the call has results, copy the values out of the ret val registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                                     InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+    InVals.push_back(Val);
+   }
+
+  return Chain;
+}
+
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  else
+    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering:
+//===----------------------------------------------------------------------===//
+
+static ConstantSDNode *
+getVecImm(SDNode *N) {
+  SDValue OpVal(0, 0);
+
+  // Check to see if this buildvec has a single non-undef value in its elements.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+    else if (OpVal != N->getOperand(i))
+      return 0;
+  }
+
+  if (OpVal.getNode() != 0) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+      return CN;
+    }
+  }
+
+  return 0;
+}
+
+/// get_vec_i18imm - Test if this vector is a vector filled with the same value
+/// and the value fits into an unsigned 18-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    uint64_t Value = CN->getZExtValue();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (Value <= 0x3ffff)
+      return DAG.getTargetConstant(Value, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_vec_i16imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    int64_t Value = CN->getSExtValue();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+      return DAG.getTargetConstant(Value, ValueType);
+    }
+  }
+
+  return SDValue();
+}
+
+/// get_vec_i10imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 10-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    int64_t Value = CN->getSExtValue();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (isInt<10>(Value))
+      return DAG.getTargetConstant(Value, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_vec_i8imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 8-bit constant, and if so, return the
+/// constant.
+///
+/// @note: The incoming vector is v16i8 because that's the only way we can load
+/// constant vectors. Thus, we test to see if the upper and lower bytes are the
+/// same value.
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    int Value = (int) CN->getZExtValue();
+    if (ValueType == MVT::i16
+        && Value <= 0xffff                 /* truncated from uint64_t */
+        && ((short) Value >> 8) == ((short) Value & 0xff))
+      return DAG.getTargetConstant(Value & 0xff, ValueType);
+    else if (ValueType == MVT::i8
+             && (Value & 0xff) == Value)
+      return DAG.getTargetConstant(Value, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+                               EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    uint64_t Value = CN->getZExtValue();
+    if ((ValueType == MVT::i32
+          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+      return DAG.getTargetConstant(Value >> 16, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
+  }
+
+  return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
+  }
+
+  return SDValue();
+}
+
+//! Lower a BUILD_VECTOR instruction creatively:
+static SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+  unsigned minSplatBits = EltVT.getSizeInBits();
+
+  if (minSplatBits < 16)
+    minSplatBits = 16;
+
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+
+  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                            HasAnyUndefs, minSplatBits)
+      || minSplatBits < SplatBitSize)
+    return SDValue();   // Wasn't a constant vector or splat exceeded min
+
+  uint64_t SplatBits = APSplatBits.getZExtValue();
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+                       Twine(VT.getEVTString()));
+    /*NOTREACHED*/
+  case MVT::v4f32: {
+    uint32_t Value32 = uint32_t(SplatBits);
+    assert(SplatBitSize == 32
+           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+    SDValue T = DAG.getConstant(Value32, MVT::i32);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
+    break;
+  }
+  case MVT::v2f64: {
+    uint64_t f64val = uint64_t(SplatBits);
+    assert(SplatBitSize == 64
+           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
+    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+    SDValue T = DAG.getConstant(f64val, MVT::i64);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
+    break;
+  }
+  case MVT::v16i8: {
+   // 8-bit constants have to be expanded to 16-bits
+   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+   SmallVector<SDValue, 8> Ops;
+
+   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+   return DAG.getNode(ISD::BITCAST, dl, VT,
+                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
+  }
+  case MVT::v8i16: {
+    unsigned short Value16 = SplatBits;
+    SDValue T = DAG.getConstant(Value16, EltVT);
+    SmallVector<SDValue, 8> Ops;
+
+    Ops.assign(8, T);
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+  }
+  case MVT::v4i32: {
+    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+  }
+  case MVT::v2i64: {
+    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+  }
+  }
+
+  return SDValue();
+}
+
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+                     DebugLoc dl) {
+  uint32_t upper = uint32_t(SplatVal >> 32);
+  uint32_t lower = uint32_t(SplatVal);
+
+  if (upper == lower) {
+    // Magic constant that can be matched by IL, ILA, et. al.
+    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+    return DAG.getNode(ISD::BITCAST, dl, OpVT,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                   Val, Val, Val, Val));
+  } else {
+    bool upper_special, lower_special;
+
+    // NOTE: This code creates common-case shuffle masks that can be easily
+    // detected as common expressions. It is not attempting to create highly
+    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+    // Detect if the upper or lower half is a special shuffle mask pattern:
+    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+    // Both upper and lower are special, lower to a constant pool load:
+    if (lower_special && upper_special) {
+      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+                         SplatValCN, SplatValCN);
+    }
+
+    SDValue LO32;
+    SDValue HI32;
+    SmallVector<SDValue, 16> ShufBytes;
+    SDValue Result;
+
+    // Create lower vector if not a special pattern
+    if (!lower_special) {
+      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+      LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                     LO32C, LO32C, LO32C, LO32C));
+    }
+
+    // Create upper vector if not a special pattern
+    if (!upper_special) {
+      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+      HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                     HI32C, HI32C, HI32C, HI32C));
+    }
+
+    // If either upper or lower are special, then the two input operands are
+    // the same (basically, one of them is a "don't care")
+    if (lower_special)
+      LO32 = HI32;
+    if (upper_special)
+      HI32 = LO32;
+
+    for (int i = 0; i < 4; ++i) {
+      uint64_t val = 0;
+      for (int j = 0; j < 4; ++j) {
+        SDValue V;
+        bool process_upper, process_lower;
+        val <<= 8;
+        process_upper = (upper_special && (i & 1) == 0);
+        process_lower = (lower_special && (i & 1) == 1);
+
+        if (process_upper || process_lower) {
+          if ((process_upper && upper == 0)
+                  || (process_lower && lower == 0))
+            val |= 0x80;
+          else if ((process_upper && upper == 0xffffffff)
+                  || (process_lower && lower == 0xffffffff))
+            val |= 0xc0;
+          else if ((process_upper && upper == 0x80000000)
+                  || (process_lower && lower == 0x80000000))
+            val |= (j == 0 ? 0xe0 : 0x80);
+        } else
+          val |= i * 4 + j + ((i & 1) * 16);
+      }
+
+      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+    }
+
+    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                   &ShufBytes[0], ShufBytes.size()));
+  }
+}
+
+/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
+/// which the Cell can operate. The code inspects V3 to ascertain whether the
+/// permutation vector, V3, is monotonically increasing with one "exception"
+/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// In either case, the net result is going to eventually invoke SHUFB to
+/// permute/shuffle the bytes from V1 and V2.
+/// \note
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
+/// control word for byte/halfword/word insertion. This takes care of a single
+/// element move from V2 into V1.
+/// \note
+/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+  // If we have a single element being moved from V1 to V2, this can be handled
+  // using the C*[DX] compute mask instructions, but the vector elements have
+  // to be monotonically increasing with one exception element, and the source
+  // slot of the element to move must be the same as the destination.
+  EVT VecVT = V1.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  unsigned EltsFromV2 = 0;
+  unsigned V2EltOffset = 0;
+  unsigned V2EltIdx0 = 0;
+  unsigned CurrElt = 0;
+  unsigned MaxElts = VecVT.getVectorNumElements();
+  unsigned PrevElt = 0;
+  bool monotonic = true;
+  bool rotate = true;
+  int rotamt=0;
+  EVT maskVT;             // which of the c?d instructions to use
+
+  if (EltVT == MVT::i8) {
+    V2EltIdx0 = 16;
+    maskVT = MVT::v16i8;
+  } else if (EltVT == MVT::i16) {
+    V2EltIdx0 = 8;
+    maskVT = MVT::v8i16;
+  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
+    V2EltIdx0 = 4;
+    maskVT = MVT::v4i32;
+  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+    V2EltIdx0 = 2;
+    maskVT = MVT::v2i64;
+  } else
+    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+  for (unsigned i = 0; i != MaxElts; ++i) {
+    if (SVN->getMaskElt(i) < 0)
+      continue;
+
+    unsigned SrcElt = SVN->getMaskElt(i);
+
+    if (monotonic) {
+      if (SrcElt >= V2EltIdx0) {
+        // TODO: optimize for the monotonic case when several consecutive
+        // elements are taken form V2. Do we ever get such a case?
+        if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+          V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+        else
+          monotonic = false;
+        ++EltsFromV2;
+      } else if (CurrElt != SrcElt) {
+        monotonic = false;
+      }
+
+      ++CurrElt;
+    }
+
+    if (rotate) {
+      if (PrevElt > 0 && SrcElt < MaxElts) {
+        if ((PrevElt == SrcElt - 1)
+            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+          PrevElt = SrcElt;
+        } else {
+          rotate = false;
+        }
+      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+        // First time or after a "wrap around"
+        rotamt = SrcElt-i;
+        PrevElt = SrcElt;
+      } else {
+        // This isn't a rotation, takes elements from vector 2
+        rotate = false;
+      }
+    }
+  }
+
+  if (EltsFromV2 == 1 && monotonic) {
+    // Compute mask and shuffle
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+    // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+    // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                DAG.getRegister(SPU::R1, PtrVT),
+                                DAG.getConstant(V2EltOffset, MVT::i32));
+    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+                                     maskVT, Pointer);
+
+    // Use shuffle mask in SHUFB synthetic instruction:
+    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+                       ShufMaskOp);
+  } else if (rotate) {
+    if (rotamt < 0)
+      rotamt +=MaxElts;
+    rotamt *= EltVT.getSizeInBits()/8;
+    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+                       V1, DAG.getConstant(rotamt, MVT::i16));
+  } else {
+   // Convert the SHUFFLE_VECTOR mask's input element units to the
+   // actual bytes.
+    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+    SmallVector<SDValue, 16> ResultMask;
+    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+      for (unsigned j = 0; j < BytesPerElement; ++j)
+        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
+    }
+    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+                                    &ResultMask[0], ResultMask.size());
+    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
+  }
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Op0.getNode()->getOpcode() == ISD::Constant) {
+    // For a constant, build the appropriate constant vector, which will
+    // eventually simplify to a vector register load.
+
+    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+    SmallVector<SDValue, 16> ConstVecValues;
+    EVT VT;
+    size_t n_copies;
+
+    // Create a constant vector:
+    switch (Op.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected constant value type in "
+                              "LowerSCALAR_TO_VECTOR");
+    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+    }
+
+    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
+    for (size_t j = 0; j < n_copies; ++j)
+      ConstVecValues.push_back(CValue);
+
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
+                       &ConstVecValues[0], ConstVecValues.size());
+  } else {
+    // Otherwise, copy the value from one register to another:
+    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    case MVT::f32:
+    case MVT::f64:
+      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
+    }
+  }
+
+  return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  SDValue N = Op.getOperand(0);
+  SDValue Elt = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue retval;
+
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+    // Constant argument:
+    int EltNo = (int) C->getZExtValue();
+
+    // sanity checks:
+    if (VT == MVT::i8 && EltNo >= 16)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+    else if (VT == MVT::i16 && EltNo >= 8)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+    else if (VT == MVT::i32 && EltNo >= 4)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+    else if (VT == MVT::i64 && EltNo >= 2)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+      // i32 and i64: Element 0 is the preferred slot
+      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+    }
+
+    // Need to generate shuffle mask and extract:
+    int prefslot_begin = -1, prefslot_end = -1;
+    int elt_byte = EltNo * VT.getSizeInBits() / 8;
+
+    switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      assert(false && "Invalid value type!");
+    case MVT::i8: {
+      prefslot_begin = prefslot_end = 3;
+      break;
+    }
+    case MVT::i16: {
+      prefslot_begin = 2; prefslot_end = 3;
+      break;
+    }
+    case MVT::i32:
+    case MVT::f32: {
+      prefslot_begin = 0; prefslot_end = 3;
+      break;
+    }
+    case MVT::i64:
+    case MVT::f64: {
+      prefslot_begin = 0; prefslot_end = 7;
+      break;
+    }
+    }
+
+    assert(prefslot_begin != -1 && prefslot_end != -1 &&
+           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+    unsigned int ShufBytes[16] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    for (int i = 0; i < 16; ++i) {
+      // zero fill uppper part of preferred slot, don't care about the
+      // other slots:
+      unsigned int mask_val;
+      if (i <= prefslot_end) {
+        mask_val =
+          ((i < prefslot_begin)
+           ? 0x80
+           : elt_byte + (i - prefslot_begin));
+
+        ShufBytes[i] = mask_val;
+      } else
+        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+    }
+
+    SDValue ShufMask[4];
+    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+      unsigned bidx = i * 4;
+      unsigned int bits = ((ShufBytes[bidx] << 24) |
+                           (ShufBytes[bidx+1] << 16) |
+                           (ShufBytes[bidx+2] << 8) |
+                           ShufBytes[bidx+3]);
+      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+    }
+
+    SDValue ShufMaskVec =
+      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
+
+    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+                                     N, N, ShufMaskVec));
+  } else {
+    // Variable index: Rotate the requested element into slot 0, then replicate
+    // slot 0 across the vector
+    EVT VecVT = N.getValueType();
+    if (!VecVT.isSimple() || !VecVT.isVector()) {
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+                        "vector type!");
+    }
+
+    // Make life easier by making sure the index is zero-extended to i32
+    if (Elt.getValueType() != MVT::i32)
+      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
+
+    // Scale the index to a bit/byte shift quantity
+    APInt scaleFactor =
+            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+    unsigned scaleShift = scaleFactor.logBase2();
+    SDValue vecShift;
+
+    if (scaleShift > 0) {
+      // Scale the shift factor:
+      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+                        DAG.getConstant(scaleShift, MVT::i32));
+    }
+
+    vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
+
+    // Replicate the bytes starting at byte 0 across the entire vector (for
+    // consistency with the notion of a unified register set)
+    SDValue replicate;
+
+    switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+                        "type");
+      /*NOTREACHED*/
+    case MVT::i8: {
+      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
+      break;
+    }
+    case MVT::i16: {
+      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
+      break;
+    }
+    case MVT::i32:
+    case MVT::f32: {
+      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
+      break;
+    }
+    case MVT::i64:
+    case MVT::f64: {
+      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              loFactor, hiFactor, loFactor, hiFactor);
+      break;
+    }
+    }
+
+    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+                                     vecShift, vecShift, replicate));
+  }
+
+  return retval;
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  SDValue VecOp = Op.getOperand(0);
+  SDValue ValOp = Op.getOperand(1);
+  SDValue IdxOp = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT eltVT = ValOp.getValueType();
+
+  // use 0 when the lane to insert to is 'undef'
+  int64_t Offset=0;
+  if (IdxOp.getOpcode() != ISD::UNDEF) {
+    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+    Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
+  }
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Use $sp ($1) because it's always 16-byte aligned and it's available:
+  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                DAG.getRegister(SPU::R1, PtrVT),
+                                DAG.getConstant(Offset, PtrVT));
+  // widen the mask when dealing with half vectors
+  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+                                128/ VT.getVectorElementType().getSizeInBits());
+  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
+
+  SDValue result =
+    DAG.getNode(SPUISD::SHUFB, dl, VT,
+                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
+                VecOp,
+                DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
+
+  return result;
+}
+
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+                           const TargetLowering &TLI)
+{
+  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
+  DebugLoc dl = Op.getDebugLoc();
+  EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
+
+  assert(Op.getValueType() == MVT::i8);
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled i8 math operator");
+    /*NOTREACHED*/
+    break;
+  case ISD::ADD: {
+    // 8-bit addition: Promote the arguments up to 16-bits and truncate
+    // the result:
+    SDValue N1 = Op.getOperand(1);
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+  }
+
+  case ISD::SUB: {
+    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
+    // the result:
+    SDValue N1 = Op.getOperand(1);
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+  }
+  case ISD::ROTR:
+  case ISD::ROTL: {
+    SDValue N1 = Op.getOperand(1);
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+                       ? ISD::ZERO_EXTEND
+                       : ISD::TRUNCATE;
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    // Replicate lower 8-bits into upper 8:
+    SDValue ExpandArg =
+      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+                  DAG.getNode(ISD::SHL, dl, MVT::i16,
+                              N0, DAG.getConstant(8, MVT::i32)));
+
+    // Truncate back down to i8
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
+  }
+  case ISD::SRL:
+  case ISD::SHL: {
+    SDValue N1 = Op.getOperand(1);
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = ISD::ZERO_EXTEND;
+
+      if (N1.getValueType().bitsGT(ShiftVT))
+        N1Opc = ISD::TRUNCATE;
+
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+  }
+  case ISD::SRA: {
+    SDValue N1 = Op.getOperand(1);
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = ISD::SIGN_EXTEND;
+
+      if (N1VT.bitsGT(ShiftVT))
+        N1Opc = ISD::TRUNCATE;
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+  }
+  case ISD::MUL: {
+    SDValue N1 = Op.getOperand(1);
+
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
+//! Lower byte immediate operations for v16i8 vectors:
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+  SDValue ConstVec;
+  SDValue Arg;
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  ConstVec = Op.getOperand(0);
+  Arg = Op.getOperand(1);
+  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+    if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+      ConstVec = ConstVec.getOperand(0);
+    } else {
+      ConstVec = Op.getOperand(1);
+      Arg = Op.getOperand(0);
+      if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+        ConstVec = ConstVec.getOperand(0);
+      }
+    }
+  }
+
+  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
+    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+    APInt APSplatBits, APSplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                              HasAnyUndefs, minSplatBits)
+        && minSplatBits <= SplatBitSize) {
+      uint64_t SplatBits = APSplatBits.getZExtValue();
+      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+
+      SmallVector<SDValue, 16> tcVec;
+      tcVec.assign(16, tc);
+      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
+    }
+  }
+
+  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+  // lowered.  Return the operation, rather than a null SDValue.
+  return Op;
+}
+
+//! Custom lowering for CTPOP (count population)
+/*!
+  Custom lowering code that counts the number ones in the input
+  operand. SPU has such an instruction, but it counts the number of
+  ones per byte, which then have to be accumulated.
+*/
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+                               VT, (128 / VT.getSizeInBits()));
+  DebugLoc dl = Op.getDebugLoc();
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    assert(false && "Invalid value type!");
+  case MVT::i8: {
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
+  }
+
+  case MVT::i16: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
+
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
+
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+    // CNTB_result becomes the chain to which all of the virtual registers
+    // CNTB_reg, SUM1_reg become associated:
+    SDValue CNTB_result =
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
+
+    SDValue CNTB_rescopy =
+      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
+
+    return DAG.getNode(ISD::AND, dl, MVT::i16,
+                       DAG.getNode(ISD::ADD, dl, MVT::i16,
+                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
+                                               Tmp1, Shift1),
+                                   Tmp1),
+                       Mask0);
+  }
+
+  case MVT::i32: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
+
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+    // CNTB_result becomes the chain to which all of the virtual registers
+    // CNTB_reg, SUM1_reg become associated:
+    SDValue CNTB_result =
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
+
+    SDValue CNTB_rescopy =
+      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+    SDValue Comp1 =
+      DAG.getNode(ISD::SRL, dl, MVT::i32,
+                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+                  Shift1);
+
+    SDValue Sum1 =
+      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
+
+    SDValue Sum1_rescopy =
+      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
+
+    SDValue Comp2 =
+      DAG.getNode(ISD::SRL, dl, MVT::i32,
+                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
+                  Shift2);
+    SDValue Sum2 =
+      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
+
+    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
+  }
+
+  case MVT::i64:
+    break;
+  }
+
+  return SDValue();
+}
+
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                              const SPUTargetLowering &TLI) {
+  EVT OpVT = Op.getValueType();
+  SDValue Op0 = Op.getOperand(0);
+  EVT Op0VT = Op0.getValueType();
+
+  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+      || OpVT == MVT::i64) {
+    // Convert f32 / f64 to i32 / i64 via libcall.
+    RTLIB::Libcall LC =
+            (Op.getOpcode() == ISD::FP_TO_SINT)
+             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+             : RTLIB::getFPTOUINT(Op0VT, OpVT);
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+    SDValue Dummy;
+    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+  }
+
+  return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+                              const SPUTargetLowering &TLI) {
+  EVT OpVT = Op.getValueType();
+  SDValue Op0 = Op.getOperand(0);
+  EVT Op0VT = Op0.getValueType();
+
+  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+      || Op0VT == MVT::i64) {
+    // Convert i32, i64 to f64 via libcall:
+    RTLIB::Libcall LC =
+            (Op.getOpcode() == ISD::SINT_TO_FP)
+             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+             : RTLIB::getUINTTOFP(Op0VT, OpVT);
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+    SDValue Dummy;
+    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+  }
+
+  return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+                          const TargetLowering &TLI) {
+  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+  DebugLoc dl = Op.getDebugLoc();
+  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+  SDValue lhs = Op.getOperand(0);
+  SDValue rhs = Op.getOperand(1);
+  EVT lhsVT = lhs.getValueType();
+  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+  EVT IntVT(MVT::i64);
+
+  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+  // selected to a NOP:
+  SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
+  SDValue lhsHi32 =
+          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+                      DAG.getNode(ISD::SRL, dl, IntVT,
+                                  i64lhs, DAG.getConstant(32, MVT::i32)));
+  SDValue lhsHi32abs =
+          DAG.getNode(ISD::AND, dl, MVT::i32,
+                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+  SDValue lhsLo32 =
+          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+  // SETO and SETUO only use the lhs operand:
+  if (CC->get() == ISD::SETO) {
+    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+    // SETUO
+    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+    return DAG.getNode(ISD::XOR, dl, ccResultVT,
+                       DAG.getSetCC(dl, ccResultVT,
+                                    lhs, DAG.getConstantFP(0.0, lhsVT),
+                                    ISD::SETUO),
+                       DAG.getConstant(ccResultAllOnes, ccResultVT));
+  } else if (CC->get() == ISD::SETUO) {
+    // Evaluates to true if Op0 is [SQ]NaN
+    return DAG.getNode(ISD::AND, dl, ccResultVT,
+                       DAG.getSetCC(dl, ccResultVT,
+                                    lhsHi32abs,
+                                    DAG.getConstant(0x7ff00000, MVT::i32),
+                                    ISD::SETGE),
+                       DAG.getSetCC(dl, ccResultVT,
+                                    lhsLo32,
+                                    DAG.getConstant(0, MVT::i32),
+                                    ISD::SETGT));
+  }
+
+  SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
+  SDValue rhsHi32 =
+          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+                      DAG.getNode(ISD::SRL, dl, IntVT,
+                                  i64rhs, DAG.getConstant(32, MVT::i32)));
+
+  // If a value is negative, subtract from the sign magnitude constant:
+  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+  // Convert the sign-magnitude representation into 2's complement:
+  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+                                      lhsHi32, DAG.getConstant(31, MVT::i32));
+  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+  SDValue lhsSelect =
+          DAG.getNode(ISD::SELECT, dl, IntVT,
+                      lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+                                      rhsHi32, DAG.getConstant(31, MVT::i32));
+  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+  SDValue rhsSelect =
+          DAG.getNode(ISD::SELECT, dl, IntVT,
+                      rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+  unsigned compareOp;
+
+  switch (CC->get()) {
+  case ISD::SETOEQ:
+  case ISD::SETUEQ:
+    compareOp = ISD::SETEQ; break;
+  case ISD::SETOGT:
+  case ISD::SETUGT:
+    compareOp = ISD::SETGT; break;
+  case ISD::SETOGE:
+  case ISD::SETUGE:
+    compareOp = ISD::SETGE; break;
+  case ISD::SETOLT:
+  case ISD::SETULT:
+    compareOp = ISD::SETLT; break;
+  case ISD::SETOLE:
+  case ISD::SETULE:
+    compareOp = ISD::SETLE; break;
+  case ISD::SETUNE:
+  case ISD::SETONE:
+    compareOp = ISD::SETNE; break;
+  default:
+    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
+  }
+
+  SDValue result =
+          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+                       (ISD::CondCode) compareOp);
+
+  if ((CC->get() & 0x8) == 0) {
+    // Ordered comparison:
+    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
+                                  ISD::SETO);
+    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
+                                  ISD::SETO);
+    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+  }
+
+  return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+  SELB instruction.
+
+  \note Need to revisit this in the future: if the code path through the true
+  and false value computations is longer than the latency of a branch (6
+  cycles), then it would be more advantageous to branch and insert a new basic
+  block and branch on the condition. However, this code does not make that
+  assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+                              const TargetLowering &TLI) {
+  EVT VT = Op.getValueType();
+  SDValue lhs = Op.getOperand(0);
+  SDValue rhs = Op.getOperand(1);
+  SDValue trueval = Op.getOperand(2);
+  SDValue falseval = Op.getOperand(3);
+  SDValue condition = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // NOTE: SELB's arguments: $rA, $rB, $mask
+  //
+  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+  // where bits in $mask are 1. CCond will be inverted, having 1s where the
+  // condition was true and 0s where the condition was false. Hence, the
+  // arguments to SELB get reversed.
+
+  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+  // with another "cannot select select_cc" assert:
+
+  SDValue compare = DAG.getNode(ISD::SETCC, dl,
+                                TLI.getSetCCResultType(Op.getValueType()),
+                                lhs, rhs, condition);
+  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+  // Type to truncate to
+  EVT VT = Op.getValueType();
+  MVT simpleVT = VT.getSimpleVT();
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+                               VT, (128 / VT.getSizeInBits()));
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Type to truncate from
+  SDValue Op0 = Op.getOperand(0);
+  EVT Op0VT = Op0.getValueType();
+
+  if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
+    // Create shuffle mask, least significant doubleword of quadword
+    unsigned maskHigh = 0x08090a0b;
+    unsigned maskLow = 0x0c0d0e0f;
+    // Use a shuffle to perform the truncation
+    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                   DAG.getConstant(maskHigh, MVT::i32),
+                                   DAG.getConstant(maskLow, MVT::i32),
+                                   DAG.getConstant(maskHigh, MVT::i32),
+                                   DAG.getConstant(maskLow, MVT::i32));
+
+    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+                                       Op0, Op0, shufMask);
+
+    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+  }
+
+  return SDValue();             // Leave the truncate unmolested
+}
+
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Type to extend to
+  MVT OpVT = Op.getValueType().getSimpleVT();
+
+  // Type to extend from
+  SDValue Op0 = Op.getOperand(0);
+  MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+  // extend i8 & i16 via i32
+  if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+    Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+    Op0VT = MVT::i32;
+  }
+
+  // The type to extend to needs to be a i128 and
+  // the type to extend from needs to be i64 or i32.
+  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+  // Create shuffle mask
+  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
+  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask2, MVT::i32),
+                                 DAG.getConstant(mask3, MVT::i32));
+
+  // Word wise arithmetic right shift to generate at least one byte
+  // that contains sign bits.
+  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+  SDValue sraVal = DAG.getNode(ISD::SRA,
+                 dl,
+                 mvt,
+                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+                 DAG.getConstant(31, MVT::i32));
+
+  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                        dl, Op0VT, Op0,
+                                        DAG.getTargetConstant(
+                                                  SPU::GPRCRegClass.getID(),
+                                                  MVT::i32)), 0);
+  // Shuffle bytes - Copy the sign bits into the upper 64 bits
+  // and the input value into the lower 64 bits.
+  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+        extended, sraVal, shufMask);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+  This is where LLVM's DAG selection process calls to do target-specific
+  lowering of nodes.
+ */
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  unsigned Opc = (unsigned) Op.getOpcode();
+  EVT VT = Op.getValueType();
+
+  switch (Opc) {
+  default: {
+#ifndef NDEBUG
+    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
+    Op.getNode()->dump();
+#endif
+    llvm_unreachable(0);
+  }
+  case ISD::LOAD:
+  case ISD::EXTLOAD:
+  case ISD::SEXTLOAD:
+  case ISD::ZEXTLOAD:
+    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::STORE:
+    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::ConstantPool:
+    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::ConstantFP:
+    return LowerConstantFP(Op, DAG);
+
+  // i8, i64 math ops:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::ROTR:
+  case ISD::ROTL:
+  case ISD::SRL:
+  case ISD::SHL:
+  case ISD::SRA: {
+    if (VT == MVT::i8)
+      return LowerI8Math(Op, DAG, Opc, *this);
+    break;
+  }
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    return LowerFP_TO_INT(Op, DAG, *this);
+
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    return LowerINT_TO_FP(Op, DAG, *this);
+
+  // Vector-related lowering.
+  case ISD::BUILD_VECTOR:
+    return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:
+    return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:
+    return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:
+    return LowerINSERT_VECTOR_ELT(Op, DAG);
+
+  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    return LowerByteImmed(Op, DAG);
+
+  // Vector and i8 multiply:
+  case ISD::MUL:
+    if (VT == MVT::i8)
+      return LowerI8Math(Op, DAG, Opc, *this);
+
+  case ISD::CTPOP:
+    return LowerCTPOP(Op, DAG);
+
+  case ISD::SELECT_CC:
+    return LowerSELECT_CC(Op, DAG, *this);
+
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG, *this);
+
+  case ISD::TRUNCATE:
+    return LowerTRUNCATE(Op, DAG);
+
+  case ISD::SIGN_EXTEND:
+    return LowerSIGN_EXTEND(Op, DAG);
+  }
+
+  return SDValue();
+}
+
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const
+{
+#if 0
+  unsigned Opc = (unsigned) N->getOpcode();
+  EVT OpVT = N->getValueType(0);
+
+  switch (Opc) {
+  default: {
+    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
+    N->dump();
+    abort();
+    /*NOTREACHED*/
+  }
+  }
+#endif
+
+  /* Otherwise, return unchanged */
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
+{
+#if 0
+  TargetMachine &TM = getTargetMachine();
+#endif
+  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
+  EVT NodeVT = N->getValueType(0);      // The node's value type
+  EVT Op0VT = Op0.getValueType();       // The first operand's result
+  SDValue Result;                       // Initially, empty result
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::ADD: {
+    SDValue Op1 = N->getOperand(1);
+
+    if (Op0.getOpcode() == SPUISD::IndirectAddr
+        || Op1.getOpcode() == SPUISD::IndirectAddr) {
+      // Normalize the operands to reduce repeated code
+      SDValue IndirectArg = Op0, AddArg = Op1;
+
+      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+        IndirectArg = Op1;
+        AddArg = Op0;
+      }
+
+      if (isa<ConstantSDNode>(AddArg)) {
+        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+        SDValue IndOp1 = IndirectArg.getOperand(1);
+
+        if (CN0->isNullValue()) {
+          // (add (SPUindirect <arg>, <arg>), 0) ->
+          // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+            errs() << "\n"
+                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+                 << "With:    (SPUindirect <arg>, <arg>)\n";
+          }
+#endif
+
+          return IndirectArg;
+        } else if (isa<ConstantSDNode>(IndOp1)) {
+          // (add (SPUindirect <arg>, <const>), <const>) ->
+          // (SPUindirect <arg>, <const + const>)
+          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+            errs() << "\n"
+                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+                 << "), " << CN0->getSExtValue() << ")\n"
+                 << "With:    (SPUindirect <arg>, "
+                 << combinedConst << ")\n";
+          }
+#endif
+
+          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+                             IndirectArg, combinedValue);
+        }
+      }
+    }
+    break;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: {
+    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
+      // (any_extend (SPUextract_elt0 <arg>)) ->
+      // (SPUextract_elt0 <arg>)
+      // Types must match, however...
+#if !defined(NDEBUG)
+      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+        errs() << "\nReplace: ";
+        N->dump(&DAG);
+        errs() << "\nWith:    ";
+        Op0.getNode()->dump(&DAG);
+        errs() << "\n";
+      }
+#endif
+
+      return Op0;
+    }
+    break;
+  }
+  case SPUISD::IndirectAddr: {
+    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+      if (CN != 0 && CN->isNullValue()) {
+        // (SPUindirect (SPUaform <addr>, 0), 0) ->
+        // (SPUaform <addr>, 0)
+
+        DEBUG(errs() << "Replace: ");
+        DEBUG(N->dump(&DAG));
+        DEBUG(errs() << "\nWith:    ");
+        DEBUG(Op0.getNode()->dump(&DAG));
+        DEBUG(errs() << "\n");
+
+        return Op0;
+      }
+    } else if (Op0.getOpcode() == ISD::ADD) {
+      SDValue Op1 = N->getOperand(1);
+      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+        // (SPUindirect (add <arg>, <arg>), 0) ->
+        // (SPUindirect <arg>, <arg>)
+        if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+            errs() << "\n"
+                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+                 << "With:    (SPUindirect <arg>, <arg>)\n";
+          }
+#endif
+
+          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+                             Op0.getOperand(0), Op0.getOperand(1));
+        }
+      }
+    }
+    break;
+  }
+  case SPUISD::SHL_BITS:
+  case SPUISD::SHL_BYTES:
+  case SPUISD::ROTBYTES_LEFT: {
+    SDValue Op1 = N->getOperand(1);
+
+    // Kill degenerate vector shifts:
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+      if (CN->isNullValue()) {
+        Result = Op0;
+      }
+    }
+    break;
+  }
+  case SPUISD::PREFSLOT2VEC: {
+    switch (Op0.getOpcode()) {
+    default:
+      break;
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::SIGN_EXTEND: {
+      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+      // <arg>
+      // but only if the SPUprefslot2vec and <arg> types match.
+      SDValue Op00 = Op0.getOperand(0);
+      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
+        SDValue Op000 = Op00.getOperand(0);
+        if (Op000.getValueType() == NodeVT) {
+          Result = Op000;
+        }
+      }
+      break;
+    }
+    case SPUISD::VEC2PREFSLOT: {
+      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
+      // <arg>
+      Result = Op0.getOperand(0);
+      break;
+    }
+    }
+    break;
+  }
+  }
+
+  // Otherwise, return unchanged.
+#ifndef NDEBUG
+  if (Result.getNode()) {
+    DEBUG(errs() << "\nReplace.SPU: ");
+    DEBUG(N->dump(&DAG));
+    DEBUG(errs() << "\nWith:        ");
+    DEBUG(Result.getNode()->dump(&DAG));
+    DEBUG(errs() << "\n");
+  }
+#endif
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SPUTargetLowering::ConstraintType
+SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
+  if (ConstraintLetter.size() == 1) {
+    switch (ConstraintLetter[0]) {
+    default: break;
+    case 'b':
+    case 'r':
+    case 'f':
+    case 'v':
+    case 'y':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(ConstraintLetter);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+    //FIXME: Seems like the supported constraint letters were just copied
+    // from PPC, as the following doesn't correspond to the GCC docs.
+    // I'm leaving it so until someone adds the corresponding lowering support.
+  case 'b':
+  case 'r':
+  case 'f':
+  case 'd':
+  case 'v':
+  case 'y':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const
+{
+  if (Constraint.size() == 1) {
+    // GCC RS6000 Constraint Letters
+    switch (Constraint[0]) {
+    case 'b':   // R1-R31
+    case 'r':   // R0-R31
+      if (VT == MVT::i64)
+        return std::make_pair(0U, SPU::R64CRegisterClass);
+      return std::make_pair(0U, SPU::R32CRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, SPU::R32FPRegisterClass);
+      else if (VT == MVT::f64)
+        return std::make_pair(0U, SPU::R64FPRegisterClass);
+      break;
+    case 'v':
+      return std::make_pair(0U, SPU::GPRCRegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//! Compute used/known bits for a SPU operand
+void
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                  const APInt &Mask,
+                                                  APInt &KnownZero,
+                                                  APInt &KnownOne,
+                                                  const SelectionDAG &DAG,
+                                                  unsigned Depth ) const {
+#if 0
+  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
+
+  switch (Op.getOpcode()) {
+  default:
+    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+    break;
+  case CALL:
+  case SHUFB:
+  case SHUFFLE_MASK:
+  case CNTB:
+  case SPUISD::PREFSLOT2VEC:
+  case SPUISD::LDRESULT:
+  case SPUISD::VEC2PREFSLOT:
+  case SPUISD::SHLQUAD_L_BITS:
+  case SPUISD::SHLQUAD_L_BYTES:
+  case SPUISD::VEC_ROTL:
+  case SPUISD::VEC_ROTR:
+  case SPUISD::ROTBYTES_LEFT:
+  case SPUISD::SELECT_MASK:
+  case SPUISD::SELB:
+  }
+#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   unsigned Depth) const {
+  switch (Op.getOpcode()) {
+  default:
+    return 1;
+
+  case ISD::SETCC: {
+    EVT VT = Op.getValueType();
+
+    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+      VT = MVT::i32;
+    }
+    return VT.getSizeInBits();
+  }
+  }
+}
+
+// LowerAsmOperandForConstraint
+void
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                std::string &Constraint,
+                                                std::vector<SDValue> &Ops,
+                                                SelectionDAG &DAG) const {
+  // Default, for the time being, to the base class handler
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode.
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+                                                const Type *Ty) const {
+  // SPU's addresses are 256K:
+  return (V > -(1 << 18) && V < (1 << 18) - 1);
+}
+
+bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+  return false;
+}
+
+bool
+SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The SPU target isn't yet aware of offsets.
+  return false;
+}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  //ceqi, cgti, etc. all take s10 operand
+  return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                         const Type * ) const{
+
+  // A-form: 18bit absolute address.
+  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+    return true;
+
+  // D-form: reg + 14bit offset
+  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+    return true;
+
+  // X-form: reg+reg
+  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+    return true;
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
new file mode 100644
index 000000000000..d23f6cc48c71
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
@@ -0,0 +1,186 @@
+//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Cell SPU uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_ISELLOWERING_H
+#define SPU_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SPU.h"
+
+namespace llvm {
+  namespace SPUISD {
+    enum NodeType {
+      // Start the numbering where the builting ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Pseudo instructions:
+      RET_FLAG,                 ///< Return with flag, matched by bi instruction
+
+      Hi,                       ///< High address component (upper 16)
+      Lo,                       ///< Low address component (lower 16)
+      PCRelAddr,                ///< Program counter relative address
+      AFormAddr,                ///< A-form address (local store)
+      IndirectAddr,             ///< D-Form "imm($r)" and X-form "$r($r)"
+
+      LDRESULT,                 ///< Load result (value, chain)
+      CALL,                     ///< CALL instruction
+      SHUFB,                    ///< Vector shuffle (permute)
+      SHUFFLE_MASK,             ///< Shuffle mask
+      CNTB,                     ///< Count leading ones in bytes
+      PREFSLOT2VEC,             ///< Promote scalar->vector
+      VEC2PREFSLOT,             ///< Extract element 0
+      SHL_BITS,                 ///< Shift quad left, by bits
+      SHL_BYTES,                ///< Shift quad left, by bytes
+      SRL_BYTES,                ///< Shift quad right, by bytes. Insert zeros.
+      VEC_ROTL,                 ///< Vector rotate left
+      VEC_ROTR,                 ///< Vector rotate right
+      ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI)
+      ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count
+      SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
+      SELB,                     ///< Select bits -> (b & mask) | (a & ~mask)
+      // Markers: These aren't used to generate target-dependent nodes, but
+      // are used during instruction selection.
+      ADD64_MARKER,             ///< i64 addition marker
+      SUB64_MARKER,             ///< i64 subtraction marker
+      MUL64_MARKER,             ///< i64 multiply marker
+      LAST_SPUISD               ///< Last user-defined instruction
+    };
+  }
+
+  //! Utility functions specific to CellSPU:
+  namespace SPU {
+    SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType);
+    SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType);
+    SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType);
+    SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+                            EVT ValueType);
+    SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType);
+    SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
+    SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+                              const SPUTargetMachine &TM);
+    //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
+    SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
+                             DebugLoc dl);
+  }
+
+  class SPUTargetMachine;            // forward dec'l.
+
+  class SPUTargetLowering :
+    public TargetLowering
+  {
+    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
+    SPUTargetMachine &SPUTM;
+
+  public:
+    //! The venerable constructor
+    /*!
+     This is where the CellSPU backend sets operation handling (i.e., legal,
+     custom, expand or promote.)
+     */
+    SPUTargetLowering(SPUTargetMachine &TM);
+
+    //! Get the target machine
+    SPUTargetMachine &getSPUTargetMachine() {
+      return SPUTM;
+    }
+
+    /// getTargetNodeName() - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - Return the ValueType for ISD::SETCC
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+    //! Custom lowering hooks
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    //! Custom lowering hook for nodes with illegal result types.
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   unsigned Depth = 0) const;
+
+    ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+
+    void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+                                      std::vector<SDValue> &Ops,
+                                      SelectionDAG &DAG) const;
+
+    /// isLegalAddressImmediate - Return true if the integer value can be used
+    /// as the offset of the target addressing mode.
+    virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+    virtual bool isLegalAddressImmediate(GlobalValue *) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+    virtual bool isLegalAddressingMode(const AddrMode &AM,
+                                       const Type *Ty) const;
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
new file mode 100644
index 000000000000..5e268f8767c2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -0,0 +1,43 @@
+//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRBUILDER_H
+#define SPU_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+                  bool mem = true) {
+  if (mem)
+    return MIB.addImm(Offset).addFrameIndex(FI);
+  else
+    return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
new file mode 100644
index 000000000000..bdbe2552dcdd
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
@@ -0,0 +1,320 @@
+//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Cell SPU instruction formats. Note that these are notationally similar to
+// PowerPC, like "A-Form". But the sizes of operands and fields differ.
+
+// This was kiped from the PPC instruction formats (seemed like a good idea...)
+
+class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+        : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "SPU";
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let AsmString = asmstr;
+  let Itinerary = itin;
+}
+
+// RR Format
+class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : SPUInstr<OOL, IOL, asmstr, itin> {
+  bits<7> RA;
+  bits<7> RB;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-10} = opcode;
+  let Inst{11-17} = RB;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+let RB = 0 in {
+  // RR Format, where RB is zeroed (dont care):
+  class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr, 
+                 InstrItinClass itin, list<dag> pattern>
+           : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+
+  let RA = 0 in {
+    // RR Format, where RA and RB are zeroed (dont care):
+    // Used for reads from status control registers (see FPSCRRr32)
+    class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+             : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+    { }
+  }
+}
+
+let RT = 0 in {
+  // RR Format, where RT is zeroed (don't care), or as the instruction handbook
+  // says, "RT is a false target." Used in "Halt if" instructions
+  class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+      : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+}
+
+// RRR Format
+class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<7> RA;
+  bits<7> RB;
+  bits<7> RC;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-3} = opcode;
+  let Inst{4-10} = RT;
+  let Inst{11-17} = RB;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RC;
+}
+
+// RI7 Format
+class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<7> i7;
+  bits<7> RA;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-10} = opcode;
+  let Inst{11-17} = i7;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+// CVTIntFp Format
+class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<7> RA;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-9} = opcode;
+  let Inst{10-17} = 0;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+let RA = 0 in {
+  class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+           : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+  { }
+
+  let RT = 0 in {
+    // Branch instruction format (without D/E flag settings)
+    class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+          : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+    { }
+
+    class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
+             : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
+                      pattern>
+    { }
+
+    let RB = 0 in {
+      // Return instruction (bi, branch indirect), RA is zero (LR):
+      class RETForm<string asmstr, list<dag> pattern>
+             : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
+                      pattern>
+      { }
+    }
+  }
+}
+
+// Branch indirect external data forms:
+class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
+         : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
+{
+  bits<7> Rcalldest;
+
+  let Pattern = pattern;
+
+  let Inst{0-10} = 0b11010101100;
+  let Inst{11} = 0;
+  let Inst{12-13} = DE_flag;
+  let Inst{14-17} = 0b0000;
+  let Inst{18-24} = Rcalldest;
+  let Inst{25-31} = 0b0000000;
+}
+
+// RI10 Format
+class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<10> i10;
+  bits<7> RA;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-7} = opcode;
+  let Inst{8-17} = i10;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+// RI10 Format, where the constant is zero (or effectively ignored by the
+// SPU)
+let i10 = 0 in {
+  class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+          : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+}
+
+// RI10 Format, where RT is ignored.
+// This format is used primarily by the Halt If ... Immediate set of
+// instructions
+let RT = 0 in {
+  class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+        : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+}
+
+// RI16 Format
+class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<16> i16;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-8} = opcode;
+  let Inst{9-24} = i16;
+  let Inst{25-31} = RT;
+}
+
+// Specialized version of the RI16 Format for unconditional branch relative and
+// branch absolute, branch and set link. Note that for branch and set link, the
+// link register doesn't have to be $lr, but this is actually hard coded into
+// the instruction pattern.
+
+let RT = 0 in {
+  class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+                     list<dag> pattern>
+    : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+  { }
+
+  class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+                      list<dag> pattern>
+        : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+  { }
+}
+
+//===----------------------------------------------------------------------===//
+// Specialized versions of RI16:
+//===----------------------------------------------------------------------===//
+
+// RI18 Format
+class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<18> i18;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-6} = opcode;
+  let Inst{7-24} = i18;
+  let Inst{25-31} = RT;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats for intrinsics:
+//===----------------------------------------------------------------------===//
+
+// RI10 Format for v8i16 intrinsics
+class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
+                     Intrinsic IntID> :
+  RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+           !strconcat(opc, " $rT, $rA, $val"), itin,
+           [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+                                            i16ImmSExt10:$val))] >;
+
+class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
+                     Intrinsic IntID> :
+  RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+           !strconcat(opc, " $rT, $rA, $val"), itin,
+           [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+                                            i32ImmSExt10:$val))] >;
+
+// RR Format for v8i16 intrinsics
+class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
+                   Intrinsic IntID> :
+  RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+         !strconcat(opc, " $rT, $rA, $rB"), itin,
+         [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+                                          (v8i16 VECREG:$rB)))] >;
+
+// RR Format for v4i32 intrinsics
+class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
+                   Intrinsic IntID> :
+  RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+         !strconcat(opc, " $rT, $rA, $rB"), itin,
+         [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+                                          (v4i32 VECREG:$rB)))] >;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions, like call frames:
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+    : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+  let Inst{31-0} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch hint formats
+//===----------------------------------------------------------------------===//
+// For hbrr and hbra
+class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
+        : Instruction {
+  field bits<32> Inst;
+  bits<16>i16;
+  bits<9>RO;
+
+  let Namespace = "SPU";
+  let InOperandList = IOL;
+  let OutOperandList = (outs); //no output
+  let AsmString = asmstr;
+  let Itinerary = BranchHints;
+
+  let Inst{0-6} = opcode;
+  let Inst{7-8} = RO{8-7};
+  let Inst{9-24} = i16;
+  let Inst{25-31} = RO{6-0};
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
new file mode 100644
index 000000000000..e67b10c7984d
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -0,0 +1,451 @@
+//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUInstrInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+namespace {
+  //! Predicate for an unconditional branch instruction
+  inline bool isUncondBranch(const MachineInstr *I) {
+    unsigned opc = I->getOpcode();
+
+    return (opc == SPU::BR
+            || opc == SPU::BRA
+            || opc == SPU::BI);
+  }
+
+  //! Predicate for a conditional branch instruction
+  inline bool isCondBranch(const MachineInstr *I) {
+    unsigned opc = I->getOpcode();
+
+    return (opc == SPU::BRNZr32
+            || opc == SPU::BRNZv4i32
+            || opc == SPU::BRZr32
+            || opc == SPU::BRZv4i32
+            || opc == SPU::BRHNZr16
+            || opc == SPU::BRHNZv8i16
+            || opc == SPU::BRHZr16
+            || opc == SPU::BRHZv8i16);
+  }
+}
+
+SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
+  : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
+    TM(tm),
+    RI(*TM.getSubtargetImpl(), *this)
+{ /* NOP */ }
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+  const TargetMachine *TM,
+  const ScheduleDAG *DAG) const {
+  const TargetInstrInfo *TII = TM->getInstrInfo();
+  assert(TII && "No InstrInfo?");
+  return new SPUHazardRecognizer(*TII);
+}
+
+unsigned
+SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                  int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SPU::LQDv16i8:
+  case SPU::LQDv8i16:
+  case SPU::LQDv4i32:
+  case SPU::LQDv4f32:
+  case SPU::LQDv2f64:
+  case SPU::LQDr128:
+  case SPU::LQDr64:
+  case SPU::LQDr32:
+  case SPU::LQDr16: {
+    const MachineOperand MOp1 = MI->getOperand(1);
+    const MachineOperand MOp2 = MI->getOperand(2);
+    if (MOp1.isImm() && MOp2.isFI()) {
+      FrameIndex = MOp2.getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  }
+  return 0;
+}
+
+unsigned
+SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                 int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SPU::STQDv16i8:
+  case SPU::STQDv8i16:
+  case SPU::STQDv4i32:
+  case SPU::STQDv4f32:
+  case SPU::STQDv2f64:
+  case SPU::STQDr128:
+  case SPU::STQDr64:
+  case SPU::STQDr32:
+  case SPU::STQDr16:
+  case SPU::STQDr8: {
+    const MachineOperand MOp1 = MI->getOperand(1);
+    const MachineOperand MOp2 = MI->getOperand(2);
+    if (MOp1.isImm() && MOp2.isFI()) {
+      FrameIndex = MOp2.getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  }
+  return 0;
+}
+
+void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DestReg, unsigned SrcReg,
+                               bool KillSrc) const
+{
+  // We support cross register class moves for our aliases, such as R3 in any
+  // reg class to any other reg class containing R3.  This is required because
+  // we instruction select bitconvert i64 -> f64 as a noop for example, so our
+  // types have no specific meaning.
+
+  BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MI,
+                                  unsigned SrcReg, bool isKill, int FrameIdx,
+                                  const TargetRegisterClass *RC,
+                                  const TargetRegisterInfo *TRI) const
+{
+  unsigned opc;
+  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+  if (RC == SPU::GPRCRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
+  } else if (RC == SPU::R64CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+  } else if (RC == SPU::R64FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+  } else if (RC == SPU::R32CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+  } else if (RC == SPU::R32FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+  } else if (RC == SPU::R16CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
+  } else if (RC == SPU::R8CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+  } else if (RC == SPU::VECREGRegisterClass) {
+    opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
+  } else {
+    llvm_unreachable("Unknown regclass!");
+  }
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  addFrameReference(BuildMI(MBB, MI, DL, get(opc))
+                    .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned DestReg, int FrameIdx,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const
+{
+  unsigned opc;
+  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+  if (RC == SPU::GPRCRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
+  } else if (RC == SPU::R64CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+  } else if (RC == SPU::R64FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+  } else if (RC == SPU::R32CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+  } else if (RC == SPU::R32FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+  } else if (RC == SPU::R16CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
+  } else if (RC == SPU::R8CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+  } else if (RC == SPU::VECREGRegisterClass) {
+    opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
+  } else {
+    llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
+  }
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
+}
+
+//! Branch analysis
+/*!
+  \note This code was kiped from PPC. There may be more branch analysis for
+  CellSPU than what's currently done here.
+ */
+bool
+SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                            MachineBasicBlock *&FBB,
+                            SmallVectorImpl<MachineOperand> &Cond,
+                            bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (isUncondBranch(LastInst)) {
+      // Check for jump tables
+      if (!LastInst->getOperand(0).isMBB())
+        return true;
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (isCondBranch(LastInst)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      DEBUG(errs() << "Pushing LastInst:               ");
+      DEBUG(LastInst->dump());
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a conditional and unconditional branch, handle it.
+  if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+    TBB =  SecondLastInst->getOperand(1).getMBB();
+    DEBUG(errs() << "Pushing SecondLastInst:         ");
+    DEBUG(SecondLastInst->dump());
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+// search MBB for branch hint labels and branch hit ops
+static void removeHBR( MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
+    if (I->getOpcode() == SPU::HBRA ||
+        I->getOpcode() == SPU::HBR_LABEL){
+      I=MBB.erase(I);
+    }
+  }
+}
+
+unsigned
+SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  removeHBR(MBB);
+  if (I == MBB.begin())
+    return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (!isCondBranch(I) && !isUncondBranch(I))
+    return 0;
+
+  // Remove the first branch.
+  DEBUG(errs() << "Removing branch:                ");
+  DEBUG(I->dump());
+  I->eraseFromParent();
+  I = MBB.end();
+  if (I == MBB.begin())
+    return 1;
+
+  --I;
+  if (!(isCondBranch(I) || isUncondBranch(I)))
+    return 1;
+
+  // Remove the second branch.
+  DEBUG(errs() << "Removing second branch:         ");
+  DEBUG(I->dump());
+  I->eraseFromParent();
+  return 2;
+}
+
+/** Find the optimal position for a hint branch instruction in a basic block.
+ * This should take into account:
+ *   -the branch hint delays
+ *   -congestion of the memory bus
+ *   -dual-issue scheduling (i.e. avoid insertion of nops)
+ * Current implementation is rather simplistic.
+ */
+static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
+{
+   MachineBasicBlock::iterator J = MBB.end();
+	for( int i=0; i<8; i++) {
+		if( J == MBB.begin() ) return J;
+		J--;
+	}
+	return J;
+}
+
+unsigned
+SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                           MachineBasicBlock *FBB,
+                           const SmallVectorImpl<MachineOperand> &Cond,
+                           DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "SPU branch conditions have two components!");
+
+  MachineInstrBuilder MIB;
+  //TODO: make a more accurate algorithm.
+  bool haveHBR = MBB.size()>8;
+  
+  removeHBR(MBB);
+  MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
+  // Add a label just before the branch
+  if (haveHBR)
+    MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
+
+  // One-way branch.
+  if (FBB == 0) {
+    if (Cond.empty()) {
+      // Unconditional branch
+      MIB = BuildMI(&MBB, DL, get(SPU::BR));
+      MIB.addMBB(TBB);
+
+      DEBUG(errs() << "Inserted one-way uncond branch: ");
+      DEBUG((*MIB).dump());
+
+      // basic blocks have just one branch so it is safe to add the hint a its
+      if (haveHBR) {
+        MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+        MIB.addSym(branchLabel);
+        MIB.addMBB(TBB);
+      }	
+    } else {
+      // Conditional branch
+      MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+      MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+
+      if (haveHBR) {
+        MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+        MIB.addSym(branchLabel);
+        MIB.addMBB(TBB);
+      }	
+
+      DEBUG(errs() << "Inserted one-way cond branch:   ");
+      DEBUG((*MIB).dump());
+    }
+    return 1;
+  } else {
+    MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+    MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
+
+    // Two-way Conditional Branch.
+    MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+    MIB2.addMBB(FBB);
+
+    if (haveHBR) {
+      MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+      MIB.addSym(branchLabel);
+      MIB.addMBB(FBB);
+    }	
+
+    DEBUG(errs() << "Inserted conditional branch:    ");
+    DEBUG((*MIB).dump());
+    DEBUG(errs() << "part 2: ");
+    DEBUG((*MIB2).dump());
+   return 2;
+  }
+}
+
+//! Reverses a branch's condition, returning false on success.
+bool
+SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+  const {
+  // Pretty brainless way of inverting the condition, but it works, considering
+  // there are only two conditions...
+  static struct {
+    unsigned Opc;               //! The incoming opcode
+    unsigned RevCondOpc;        //! The reversed condition opcode
+  } revconds[] = {
+    { SPU::BRNZr32, SPU::BRZr32 },
+    { SPU::BRNZv4i32, SPU::BRZv4i32 },
+    { SPU::BRZr32, SPU::BRNZr32 },
+    { SPU::BRZv4i32, SPU::BRNZv4i32 },
+    { SPU::BRHNZr16, SPU::BRHZr16 },
+    { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
+    { SPU::BRHZr16, SPU::BRHNZr16 },
+    { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
+  };
+
+  unsigned Opc = unsigned(Cond[0].getImm());
+  // Pretty dull mapping between the two conditions that SPU can generate:
+  for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
+    if (revconds[i].Opc == Opc) {
+      Cond[0].setImm(revconds[i].RevCondOpc);
+      return false;
+    }
+  }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
new file mode 100644
index 000000000000..bc1ba71f7a45
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
@@ -0,0 +1,84 @@
+//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CellSPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRUCTIONINFO_H
+#define SPU_INSTRUCTIONINFO_H
+
+#include "SPU.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SPURegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SPUGenInstrInfo.inc"
+
+namespace llvm {
+  //! Cell SPU instruction information class
+  class SPUInstrInfo : public SPUGenInstrInfo {
+    SPUTargetMachine &TM;
+    const SPURegisterInfo RI;
+  public:
+    explicit SPUInstrInfo(SPUTargetMachine &tm);
+
+    /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+    /// such, whenever a client has an instance of instruction info, it should
+    /// always be able to get register info as well (through this method).
+    ///
+    virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+
+    ScheduleHazardRecognizer *
+    CreateTargetHazardRecognizer(const TargetMachine *TM,
+                                 const ScheduleDAG *DAG) const;
+
+    unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                 int &FrameIndex) const;
+    unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                int &FrameIndex) const;
+
+    virtual void copyPhysReg(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I, DebugLoc DL,
+                             unsigned DestReg, unsigned SrcReg,
+                             bool KillSrc) const;
+
+    //! Store a register to a stack slot, based on its register class.
+    virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned SrcReg, bool isKill, int FrameIndex,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const;
+
+    //! Load a register from a stack slot, based on its register class.
+    virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned DestReg, int FrameIndex,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const;
+
+    //! Reverses a branch's condition, returning false on success.
+    virtual
+    bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+    virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                               MachineBasicBlock *&FBB,
+                               SmallVectorImpl<MachineOperand> &Cond,
+                               bool AllowModify) const;
+
+    virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+    virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                  MachineBasicBlock *FBB,
+                                  const SmallVectorImpl<MachineOperand> &Cond,
+                                  DebugLoc DL) const;
+   };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
new file mode 100644
index 000000000000..e103c9b6a5af
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
@@ -0,0 +1,4482 @@
+//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions:
+//===----------------------------------------------------------------------===//
+
+let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
+  def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
+                                "${:comment} ADJCALLSTACKDOWN",
+                                [(callseq_start timm:$amt)]>;
+  def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm_i32:$amt),
+                                "${:comment} ADJCALLSTACKUP",
+                                [(callseq_end timm:$amt)]>;
+  def HBR_LABEL        : Pseudo<(outs), (ins hbrtarget:$targ), 
+                                "$targ:\t${:comment}branch hint target",[ ]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Loads:
+// NB: The ordering is actually important, since the instruction selection
+// will try each of the instructions in sequence, i.e., the D-form first with
+// the 10-bit displacement, then the A-form with the 16 bit displacement, and
+// finally the X-form with the register-register.
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1 in {
+  class LoadDFormVec<ValueType vectype>
+    : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
+               "lqd\t$rT, $src",
+               LoadStore,
+               [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
+  { }
+
+  class LoadDForm<RegisterClass rclass>
+    : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
+               "lqd\t$rT, $src",
+               LoadStore,
+               [(set rclass:$rT, (load dform_addr:$src))]>
+  { }
+
+  multiclass LoadDForms
+  {
+    def v16i8: LoadDFormVec<v16i8>;
+    def v8i16: LoadDFormVec<v8i16>;
+    def v4i32: LoadDFormVec<v4i32>;
+    def v2i64: LoadDFormVec<v2i64>;
+    def v4f32: LoadDFormVec<v4f32>;
+    def v2f64: LoadDFormVec<v2f64>;
+
+    def r128:  LoadDForm<GPRC>;
+    def r64:   LoadDForm<R64C>;
+    def r32:   LoadDForm<R32C>;
+    def f32:   LoadDForm<R32FP>;
+    def f64:   LoadDForm<R64FP>;
+    def r16:   LoadDForm<R16C>;
+    def r8:    LoadDForm<R8C>;
+  }
+
+  class LoadAFormVec<ValueType vectype>
+    : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
+               "lqa\t$rT, $src",
+               LoadStore,
+               [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
+  { }
+
+  class LoadAForm<RegisterClass rclass>
+    : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
+               "lqa\t$rT, $src",
+               LoadStore,
+               [(set rclass:$rT, (load aform_addr:$src))]>
+  { }
+
+  multiclass LoadAForms
+  {
+    def v16i8: LoadAFormVec<v16i8>;
+    def v8i16: LoadAFormVec<v8i16>;
+    def v4i32: LoadAFormVec<v4i32>;
+    def v2i64: LoadAFormVec<v2i64>;
+    def v4f32: LoadAFormVec<v4f32>;
+    def v2f64: LoadAFormVec<v2f64>;
+
+    def r128:  LoadAForm<GPRC>;
+    def r64:   LoadAForm<R64C>;
+    def r32:   LoadAForm<R32C>;
+    def f32:   LoadAForm<R32FP>;
+    def f64:   LoadAForm<R64FP>;
+    def r16:   LoadAForm<R16C>;
+    def r8:    LoadAForm<R8C>;
+  }
+
+  class LoadXFormVec<ValueType vectype>
+    : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
+             "lqx\t$rT, $src",
+             LoadStore,
+             [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
+  { }
+
+  class LoadXForm<RegisterClass rclass>
+    : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
+             "lqx\t$rT, $src",
+             LoadStore,
+             [(set rclass:$rT, (load xform_addr:$src))]>
+  { }
+
+  multiclass LoadXForms
+  {
+    def v16i8: LoadXFormVec<v16i8>;
+    def v8i16: LoadXFormVec<v8i16>;
+    def v4i32: LoadXFormVec<v4i32>;
+    def v2i64: LoadXFormVec<v2i64>;
+    def v4f32: LoadXFormVec<v4f32>;
+    def v2f64: LoadXFormVec<v2f64>;
+
+    def r128:  LoadXForm<GPRC>;
+    def r64:   LoadXForm<R64C>;
+    def r32:   LoadXForm<R32C>;
+    def f32:   LoadXForm<R32FP>;
+    def f64:   LoadXForm<R64FP>;
+    def r16:   LoadXForm<R16C>;
+    def r8:    LoadXForm<R8C>;
+  }
+
+  defm LQA : LoadAForms;
+  defm LQD : LoadDForms;
+  defm LQX : LoadXForms;
+
+/* Load quadword, PC relative: Not much use at this point in time.
+   Might be of use later for relocatable code. It's effectively the
+   same as LQA, but uses PC-relative addressing.
+  def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
+                     "lqr\t$rT, $disp", LoadStore,
+                     [(set VECREG:$rT, (load iaddr:$disp))]>;
+ */
+}
+
+//===----------------------------------------------------------------------===//
+// Stores:
+//===----------------------------------------------------------------------===//
+class StoreDFormVec<ValueType vectype>
+  : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
+             "stqd\t$rT, $src",
+             LoadStore,
+             [(store (vectype VECREG:$rT), dform_addr:$src)]>
+{ }
+
+class StoreDForm<RegisterClass rclass>
+  : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
+             "stqd\t$rT, $src",
+             LoadStore,
+             [(store rclass:$rT, dform_addr:$src)]>
+{ }
+
+multiclass StoreDForms
+{
+  def v16i8: StoreDFormVec<v16i8>;
+  def v8i16: StoreDFormVec<v8i16>;
+  def v4i32: StoreDFormVec<v4i32>;
+  def v2i64: StoreDFormVec<v2i64>;
+  def v4f32: StoreDFormVec<v4f32>;
+  def v2f64: StoreDFormVec<v2f64>;
+
+  def r128:  StoreDForm<GPRC>;
+  def r64:   StoreDForm<R64C>;
+  def r32:   StoreDForm<R32C>;
+  def f32:   StoreDForm<R32FP>;
+  def f64:   StoreDForm<R64FP>;
+  def r16:   StoreDForm<R16C>;
+  def r8:    StoreDForm<R8C>;
+}
+
+class StoreAFormVec<ValueType vectype>
+  : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
+             "stqa\t$rT, $src",
+             LoadStore,
+             [(store (vectype VECREG:$rT), aform_addr:$src)]>;
+
+class StoreAForm<RegisterClass rclass>
+  : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
+             "stqa\t$rT, $src",
+             LoadStore,
+             [(store rclass:$rT, aform_addr:$src)]>;
+
+multiclass StoreAForms
+{
+  def v16i8: StoreAFormVec<v16i8>;
+  def v8i16: StoreAFormVec<v8i16>;
+  def v4i32: StoreAFormVec<v4i32>;
+  def v2i64: StoreAFormVec<v2i64>;
+  def v4f32: StoreAFormVec<v4f32>;
+  def v2f64: StoreAFormVec<v2f64>;
+
+  def r128:  StoreAForm<GPRC>;
+  def r64:   StoreAForm<R64C>;
+  def r32:   StoreAForm<R32C>;
+  def f32:   StoreAForm<R32FP>;
+  def f64:   StoreAForm<R64FP>;
+  def r16:   StoreAForm<R16C>;
+  def r8:    StoreAForm<R8C>;
+}
+
+class StoreXFormVec<ValueType vectype>
+  : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
+           "stqx\t$rT, $src",
+           LoadStore,
+           [(store (vectype VECREG:$rT), xform_addr:$src)]>
+{ }
+
+class StoreXForm<RegisterClass rclass>
+  : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
+           "stqx\t$rT, $src",
+           LoadStore,
+           [(store rclass:$rT, xform_addr:$src)]>
+{ }
+
+multiclass StoreXForms
+{
+  def v16i8: StoreXFormVec<v16i8>;
+  def v8i16: StoreXFormVec<v8i16>;
+  def v4i32: StoreXFormVec<v4i32>;
+  def v2i64: StoreXFormVec<v2i64>;
+  def v4f32: StoreXFormVec<v4f32>;
+  def v2f64: StoreXFormVec<v2f64>;
+
+  def r128:  StoreXForm<GPRC>;
+  def r64:   StoreXForm<R64C>;
+  def r32:   StoreXForm<R32C>;
+  def f32:   StoreXForm<R32FP>;
+  def f64:   StoreXForm<R64FP>;
+  def r16:   StoreXForm<R16C>;
+  def r8:    StoreXForm<R8C>;
+}
+
+defm STQD : StoreDForms;
+defm STQA : StoreAForms;
+defm STQX : StoreXForms;
+
+/* Store quadword, PC relative: Not much use at this point in time. Might
+   be useful for relocatable code.
+def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
+                   "stqr\t$rT, $disp", LoadStore,
+                   [(store VECREG:$rT, iaddr:$disp)]>;
+*/
+
+//===----------------------------------------------------------------------===//
+// Generate Controls for Insertion:
+//===----------------------------------------------------------------------===//
+
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cbd\t$rT, $src", ShuffleOp,
+    [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cbx\t$rT, $src", ShuffleOp,
+    [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "chd\t$rT, $src", ShuffleOp,
+    [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "chx\t$rT, $src", ShuffleOp,
+    [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cwd\t$rT, $src", ShuffleOp,
+    [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cwx\t$rT, $src", ShuffleOp,
+    [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cwd\t$rT, $src", ShuffleOp,
+    [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cwx\t$rT, $src", ShuffleOp,
+    [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cdd\t$rT, $src", ShuffleOp,
+    [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cdx\t$rT, $src", ShuffleOp,
+    [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cdd\t$rT, $src", ShuffleOp,
+    [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cdx\t$rT, $src", ShuffleOp,
+    [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Constant formation:
+//===----------------------------------------------------------------------===//
+
+def ILHv8i16:
+  RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
+    "ilh\t$rT, $val", ImmLoad,
+    [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
+
+def ILHr16:
+  RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
+    "ilh\t$rT, $val", ImmLoad,
+    [(set R16C:$rT, immSExt16:$val)]>;
+
+// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
+// the right constant")
+def ILHr8:
+  RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
+    "ilh\t$rT, $val", ImmLoad,
+    [(set R8C:$rT, immSExt8:$val)]>;
+
+// IL does sign extension!
+
+class ILInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
+           ImmLoad, pattern>;
+
+class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+  ILInst<(outs VECREG:$rT), (ins immtype:$val),
+         [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+  ILInst<(outs rclass:$rT), (ins immtype:$val),
+         [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmediateLoad
+{
+  def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
+  def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
+
+  // TODO: Need v2f64, v4f32
+
+  def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
+  def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
+  def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
+  def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
+}
+
+defm IL : ImmediateLoad;
+
+class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
+           ImmLoad, pattern>;
+
+class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+  ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
+           [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+  ILHUInst<(outs rclass:$rT), (ins immtype:$val),
+           [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadHalfwordUpper
+{
+  def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
+  def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
+
+  def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
+  def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
+
+  // Loads the high portion of an address
+  def hi: ILHURegInst<R32C, symbolHi, hi16>;
+
+  // Used in custom lowering constant SFP loads:
+  def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
+}
+
+defm ILHU : ImmLoadHalfwordUpper;
+
+// Immediate load address (can also be used to load 18-bit unsigned constants,
+// see the zext 16->32 pattern)
+
+class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
+           LoadNOP, pattern>;
+
+class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+  ILAInst<(outs VECREG:$rT), (ins immtype:$val),
+          [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+  ILAInst<(outs rclass:$rT), (ins immtype:$val),
+          [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadAddress
+{
+  def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
+  def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
+
+  def r64: ILARegInst<R64C, u18imm_i64, imm18>;
+  def r32: ILARegInst<R32C, u18imm, imm18>;
+  def f32: ILARegInst<R32FP, f18imm, fpimm18>;
+  def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+
+  def hi: ILARegInst<R32C, symbolHi, imm18>;
+  def lo: ILARegInst<R32C, symbolLo, imm18>;
+
+  def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
+                   [(set R32C:$rT, imm18:$val)]>;
+}
+
+defm ILA : ImmLoadAddress;
+
+// Immediate OR, Halfword Lower: The "other" part of loading large constants
+// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
+// Note that these are really two operand instructions, but they're encoded
+// as three operands with the first two arguments tied-to each other.
+
+class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
+           ImmLoad, pattern>,
+  RegConstraint<"$rS = $rT">,
+  NoEncode<"$rS">;
+
+class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
+  IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
+           [/* no pattern */]>;
+
+class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
+  IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
+           [/* no pattern */]>;
+
+multiclass ImmOrHalfwordLower
+{
+  def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
+  def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
+
+  def r32: IOHLRegInst<R32C, i32imm>;
+  def f32: IOHLRegInst<R32FP, f32imm>;
+
+  def lo: IOHLRegInst<R32C, symbolLo>;
+}
+
+defm IOHL: ImmOrHalfwordLower;
+
+// Form select mask for bytes using immediate, used in conjunction with the
+// SELB instruction:
+
+class FSMBIVec<ValueType vectype>:
+  RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
+          "fsmbi\t$rT, $val",
+          SelectOp,
+          [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
+
+multiclass FormSelectMaskBytesImm
+{
+  def v16i8: FSMBIVec<v16i8>;
+  def v8i16: FSMBIVec<v8i16>;
+  def v4i32: FSMBIVec<v4i32>;
+  def v2i64: FSMBIVec<v2i64>;
+}
+
+defm FSMBI : FormSelectMaskBytesImm;
+
+// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+             pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+    FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+             [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+    FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+             [(set (vectype VECREG:$rT),
+                   (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+  def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+  def v16i8:     FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
+
+// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
+// only 8-bits wide (even though it's input as 16-bits here)
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+             pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+    FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+             [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+    FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+             [(set (vectype VECREG:$rT),
+                   (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+  def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+  def v8i16:     FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
+
+// fsm: Form select mask for words. Like the other fsm* instructions,
+// only the lower 4 bits of $rA are significant.
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+             pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+    FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+            [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+    FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+            [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskWord {
+  def v4i32: FSMVecInst<v4i32>;
+
+  def r32 :  FSMRegInst<v4i32, R32C>;
+  def r16 :  FSMRegInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+  def r32 : FSMRegInst<v2i64, R32C>;
+  def r16 : FSMRegInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
+
+//===----------------------------------------------------------------------===//
+// Integer and Logical Operations:
+//===----------------------------------------------------------------------===//
+
+def AHv8i16:
+  RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "ah\t$rT, $rA, $rB", IntegerOp,
+    [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
+
+def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (AHv8i16 VECREG:$rA, VECREG:$rB)>;
+
+def AHr16:
+  RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+    "ah\t$rT, $rA, $rB", IntegerOp,
+    [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
+
+def AHIvec:
+    RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "ahi\t$rT, $rA, $val", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
+                                     v8i16SExt10Imm:$val))]>;
+
+def AHIr16:
+  RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+    "ahi\t$rT, $rA, $val", IntegerOp,
+    [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
+
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRForm<0b00000011000, OOL, IOL,
+         "a\t$rT, $rA, $rB", IntegerOp,
+         pattern>;
+
+class AVecInst<ValueType vectype>:
+  AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+                                         (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+  AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+        [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+        
+multiclass AddInstruction {
+  def v4i32: AVecInst<v4i32>;
+  def v16i8: AVecInst<v16i8>;
+  def r32:   ARegInst<R32C>;
+}
+
+defm A : AddInstruction;
+
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b00111000, OOL, IOL,
+             "ai\t$rT, $rA, $val", IntegerOp,
+             pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+    AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+            [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+    AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+            [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+    AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+           [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+    AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+           [/* no pattern */]>;
+
+multiclass AddImmediate {
+  def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+  def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+  def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+  def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
+
+def SFHvec:
+    RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "sfh\t$rT, $rA, $rB", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
+                                     (v8i16 VECREG:$rB)))]>;
+
+def SFHr16:
+    RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+      "sfh\t$rT, $rA, $rB", IntegerOp,
+      [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
+
+def SFHIvec:
+    RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "sfhi\t$rT, $rA, $val", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
+                                     (v8i16 VECREG:$rA)))]>;
+
+def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+  "sfhi\t$rT, $rA, $val", IntegerOp,
+  [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
+
+def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
+                                  (ins VECREG:$rA, VECREG:$rB),
+  "sf\t$rT, $rA, $rB", IntegerOp,
+  [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
+
+def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+  "sf\t$rT, $rA, $rB", IntegerOp,
+  [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
+
+def SFIvec:
+    RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "sfi\t$rT, $rA, $val", IntegerOp,
+      [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
+                                     (v4i32 VECREG:$rA)))]>;
+
+def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
+                                  (ins R32C:$rA, s10imm_i32:$val),
+  "sfi\t$rT, $rA, $val", IntegerOp,
+  [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
+
+// ADDX: only available in vector form, doesn't match a pattern.
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00000010110, OOL, IOL,
+      "addx\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+    ADDXInst<(outs VECREG:$rT),
+             (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+class ADDXRegInst<RegisterClass rclass>:
+    ADDXInst<(outs rclass:$rT),
+             (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+multiclass AddExtended {
+  def v2i64 : ADDXVecInst<v2i64>;
+  def v4i32 : ADDXVecInst<v4i32>;
+  def r64 : ADDXRegInst<R64C>;
+  def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01000011000, OOL, IOL,
+      "cg\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+    CGInst<(outs VECREG:$rT),
+           (ins VECREG:$rA, VECREG:$rB),
+           [/* no pattern */]>;
+
+class CGRegInst<RegisterClass rclass>:
+    CGInst<(outs rclass:$rT),
+           (ins rclass:$rA, rclass:$rB),
+           [/* no pattern */]>;
+
+multiclass CarryGenerate {
+  def v2i64 : CGVecInst<v2i64>;
+  def v4i32 : CGVecInst<v4i32>;
+  def r64 : CGRegInst<R64C>;
+  def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10000010110, OOL, IOL,
+      "sfx\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+    SFXInst<(outs VECREG:$rT),
+            (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+class SFXRegInst<RegisterClass rclass>:
+    SFXInst<(outs rclass:$rT),
+            (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+multiclass SubtractExtended {
+  def v2i64 : SFXVecInst<v2i64>;
+  def v4i32 : SFXVecInst<v4i32>;
+  def r64 : SFXRegInst<R64C>;
+  def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01000010000, OOL, IOL,
+      "bg\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+    BGInst<(outs VECREG:$rT),
+           (ins VECREG:$rA, VECREG:$rB),
+           [/* no pattern */]>;
+
+class BGRegInst<RegisterClass rclass>:
+    BGInst<(outs rclass:$rT),
+           (ins rclass:$rA, rclass:$rB),
+           [/* no pattern */]>;
+
+multiclass BorrowGenerate {
+  def v4i32 : BGVecInst<v4i32>;
+  def v2i64 : BGVecInst<v2i64>;
+  def r64 : BGRegInst<R64C>;
+  def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
+def BGXvec:
+    RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
+                                VECREG:$rCarry),
+      "bgx\t$rT, $rA, $rB", IntegerOp,
+      []>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+// Halfword multiply variants:
+// N.B: These can be used to build up larger quantities (16x16 -> 32)
+
+def MPYv8i16:
+  RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+    [/* no pattern */]>;
+
+def MPYr16:
+  RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+    "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+    [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00110011110, OOL, IOL,
+      "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+      pattern>;
+
+def MPYUv4i32:
+  MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [/* no pattern */]>;
+
+def MPYUr16:
+  MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+           [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
+
+def MPYUr32:
+  MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+           [/* no pattern */]>;
+
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI10Form<0b00101110, OOL, IOL,
+    "mpyi\t$rT, $rA, $val", IntegerMulDiv,
+    pattern>;
+
+def MPYIvec:
+  MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           [(set (v8i16 VECREG:$rT),
+                 (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+def MPYIr16:
+  MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+           [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+
+// mpyui: same issues as other multiplies, plus, this doesn't match a
+// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI10Form<0b10101110, OOL, IOL,
+           "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+           pattern>;
+    
+def MPYUIvec:
+  MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+            []>;
+
+def MPYUIr16:
+  MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+            []>;
+
+// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRRForm<0b0011, OOL, IOL,
+          "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+          pattern>;
+          
+def MPYAv4i32:
+  MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+           [(set (v4i32 VECREG:$rT),
+                 (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+                                              (v8i16 VECREG:$rB)))),
+                      (v4i32 VECREG:$rC)))]>;
+
+def MPYAr32:
+  MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+           [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+                                R32C:$rC))]>;
+                                
+def MPYAr32_sext:
+  MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+           [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+                                R32C:$rC))]>;
+
+def MPYAr32_sextinreg:
+  MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+           [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+                                     (sext_inreg R32C:$rB, i16)),
+                                R32C:$rC))]>;
+
+// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRForm<0b10100011110, OOL, IOL,
+         "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+         pattern>;
+         
+def MPYHv4i32:
+    MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [/* no pattern */]>;
+
+def MPYHr32:
+    MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+             [/* no pattern */]>;
+
+// mpys: multiply high and shift right (returns the top half of
+// a 16-bit multiply, sign extended to 32 bits.)
+
+class MPYSInst<dag OOL, dag IOL>:
+    RRForm<0b11100011110, OOL, IOL, 
+      "mpys\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYSv4i32:
+    MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYSr16:
+    MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
+
+// mpyhh: multiply high-high (returns the 32-bit result from multiplying
+// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+  RRForm<0b01100011110, OOL, IOL,
+        "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+        [/* no pattern */]>;
+        
+def MPYHHv8i16:
+    MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHr32:
+    MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhha: Multiply high-high, add to $rT:
+
+class MPYHHAInst<dag OOL, dag IOL>:
+    RRForm<0b01100010110, OOL, IOL,
+      "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYHHAvec:
+    MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYHHAr32:
+    MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+   +-------+-------+   +---------+
+// |  a0   .  a1   | x |  b0   .  b1   | = | a0 x b0 |
+// +-------+-------+   +-------+-------+   +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
+
+class MPYHHUInst<dag OOL, dag IOL>:
+    RRForm<0b01110011110, OOL, IOL,
+      "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYHHUv4i32:
+    MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYHHUr32:
+    MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhau: Multiply high-high, unsigned
+
+class MPYHHAUInst<dag OOL, dag IOL>:
+    RRForm<0b01110010110, OOL, IOL,
+      "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYHHAUvec:
+    MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYHHAUr32:
+    MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+             IntegerOp, pattern>;
+
+class CLZRegInst<RegisterClass rclass>:
+    CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+            [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+    CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+            [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+  def v4i32 : CLZVecInst<v4i32>;
+  def r32   : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
+
+// cntb: Count ones in bytes (aka "population count")
+//
+// NOTE: This instruction is really a vector instruction, but the custom
+// lowering code uses it in unorthodox ways to support CTPOP for other
+// data types!
+
+def CNTBv16i8:
+    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cntb\t$rT, $rA", IntegerOp,
+      [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
+
+def CNTBv8i16 :
+    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cntb\t$rT, $rA", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
+
+def CNTBv4i32 :
+    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cntb\t$rT, $rA", IntegerOp,
+      [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
+
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+  GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+          [/* no pattern */]>;
+
+class GBBVecInst<ValueType vectype>:
+  GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+          [/* no pattern */]>;
+
+multiclass GatherBitsFromBytes {
+  def v16i8_r32: GBBRegInst<R32C, v16i8>;
+  def v16i8_r16: GBBRegInst<R16C, v16i8>;
+  def v16i8:     GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
+
+// gbh: Gather all low order bits from each halfword in $rA into a single
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+             pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+    GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+            [/* no pattern */]>;
+
+class GBHVecInst<ValueType vectype>:
+    GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+            [/* no pattern */]>;
+
+multiclass GatherBitsHalfword {
+  def v8i16_r32: GBHRegInst<R32C, v8i16>;
+  def v8i16_r16: GBHRegInst<R16C, v8i16>;
+  def v8i16:     GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
+
+// gb: Gather all low order bits from each word in $rA into a single
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+             pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+    GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+           [/* no pattern */]>;
+
+class GBVecInst<ValueType vectype>:
+    GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+           [/* no pattern */]>;
+
+multiclass GatherBitsWord {
+  def v4i32_r32: GBRegInst<R32C, v4i32>;
+  def v4i32_r16: GBRegInst<R16C, v4i32>;
+  def v4i32:     GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
+
+// avgb: average bytes
+def AVGB:
+    RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "avgb\t$rT, $rA, $rB", ByteOp,
+      []>;
+
+// absdb: absolute difference of bytes
+def ABSDB:
+    RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "absdb\t$rT, $rA, $rB", ByteOp,
+      []>;
+
+// sumb: sum bytes into halfwords
+def SUMB:
+    RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "sumb\t$rT, $rA, $rB", ByteOp,
+      []>;
+
+// Sign extension operations:
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01101101010, OOL, IOL,
+      "xsbh\t$rDst, $rSrc",
+      IntegerOp, pattern>;
+
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
+    XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+             pattern>;
+
+multiclass ExtendByteHalfword {
+  def v16i8:     XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+                          [
+                  /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
+  def r8:        XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+                          [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+  def r16:       XSBHInRegInst<R16C,
+                               [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+
+  // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+  // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+  // pattern below). Intentionally doesn't match a pattern because we want the
+  // sext 8->32 pattern to do the work for us, namely because we need the extra
+  // XSHWr32.
+  def r32:   XSBHInRegInst<R32C, [/* no pattern */]>;
+  
+  // Same as the 32-bit version, but for i64
+  def r64:   XSBHInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign extend halfwords to words:
+
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+            IntegerOp, pattern>;
+
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+    XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+             [(set (out_vectype VECREG:$rDest),
+                   (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+    XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+             pattern>;
+             
+class XSHWRegInst<RegisterClass rclass>:
+    XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+             [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+  def v4i32: XSHWVecInst<v8i16, v4i32>;
+
+  def r16:   XSHWRegInst<R32C>;
+
+  def r32:   XSHWInRegInst<R32C,
+                          [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+  def r64:   XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
+
+// Sign-extend words to doublewords (32->64 bits)
+
+class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+              IntegerOp, pattern>;
+      
+class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
+    XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+             [/*(set (out_vectype VECREG:$rDst),
+                   (sext (out_vectype VECREG:$rSrc)))*/]>;
+      
+class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
+    XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
+             [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
+             
+multiclass ExtendWordToDoubleWord {
+  def v2i64: XSWDVecInst<v4i32, v2i64>;
+  def r64:   XSWDRegInst<R32C, R64C>;
+  
+  def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
+                          [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
+}
+
+defm XSWD : ExtendWordToDoubleWord;
+
+// AND operations
+
+class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
+    RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ANDVecInst<ValueType vectype>:
+    ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
+                                              (vectype VECREG:$rB)))]>;
+
+class ANDRegInst<RegisterClass rclass>:
+    ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseAnd
+{
+  def v16i8: ANDVecInst<v16i8>;
+  def v8i16: ANDVecInst<v8i16>;
+  def v4i32: ANDVecInst<v4i32>;
+  def v2i64: ANDVecInst<v2i64>;
+
+  def r128:  ANDRegInst<GPRC>;
+  def r64:   ANDRegInst<R64C>;
+  def r32:   ANDRegInst<R32C>;
+  def r16:   ANDRegInst<R16C>;
+  def r8:    ANDRegInst<R8C>;
+
+  //===---------------------------------------------
+  // Special instructions to perform the fabs instruction
+  def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+                      [/* Intentionally does not match a pattern */]>;
+
+  def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+                      [/* Intentionally does not match a pattern */]>;
+
+  def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [/* Intentionally does not match a pattern */]>;
+
+  //===---------------------------------------------
+
+  // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
+  // quantities -- see 16->32 zext pattern.
+  //
+  // This pattern is somewhat artificial, since it might match some
+  // compiler generated pattern but it is unlikely to do so.
+
+  def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
+                      [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
+}
+
+defm AND : BitwiseAnd;
+
+
+def vnot_cell_conv : PatFrag<(ops node:$in),
+                             (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
+
+// N.B.: vnot_cell_conv is one of those special target selection pattern
+// fragments,
+// in which we expect there to be a bit_convert on the constant. Bear in mind
+// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
+// constant -1 vector.)
+
+class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+    ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (and (vectype VECREG:$rA),
+                        (vnot_frag (vectype VECREG:$rB))))]>;
+
+class ANDCRegInst<RegisterClass rclass>:
+    ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass AndComplement
+{
+  def v16i8: ANDCVecInst<v16i8>;
+  def v8i16: ANDCVecInst<v8i16>;
+  def v4i32: ANDCVecInst<v4i32>;
+  def v2i64: ANDCVecInst<v2i64>;
+
+  def r128: ANDCRegInst<GPRC>;
+  def r64:  ANDCRegInst<R64C>;
+  def r32:  ANDCRegInst<R32C>;
+  def r16:  ANDCRegInst<R16C>;
+  def r8:   ANDCRegInst<R8C>;
+
+  // Sometimes, the xor pattern has a bitcast constant:
+  def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
+}
+
+defm ANDC : AndComplement;
+
+class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
+             ByteOp, pattern>;
+
+multiclass AndByteImm
+{
+  def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+                       [(set (v16i8 VECREG:$rT),
+                             (and (v16i8 VECREG:$rA),
+                                  (v16i8 v16i8U8Imm:$val)))]>;
+
+  def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+                    [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
+}
+
+defm ANDBI : AndByteImm;
+
+class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
+    RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
+             ByteOp, pattern>;
+
+multiclass AndHalfwordImm
+{
+  def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v8i16 VECREG:$rT),
+                             (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+  def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+                     [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
+
+  // Zero-extend i8 to i16:
+  def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
+                      [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
+}
+
+defm ANDHI : AndHalfwordImm;
+
+class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
+    RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+multiclass AndWordImm
+{
+  def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                      [(set (v4i32 VECREG:$rT),
+                            (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
+
+  def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                    [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
+
+  // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
+  // pattern below.
+  def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+                      [(set R32C:$rT,
+                            (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
+
+  // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
+  // zext 16->32 pattern below.
+  //
+  // Note that this pattern is somewhat artificial, since it might match
+  // something the compiler generates but is unlikely to occur in practice.
+  def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+                       [(set R32C:$rT,
+                             (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
+}
+
+defm ANDI : AndWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Bitwise OR group:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Bitwise "or" (N.B.: These are also register-register copy instructions...)
+class ORInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ORVecInst<ValueType vectype>:
+    ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                           (vectype VECREG:$rB)))]>;
+
+class ORRegInst<RegisterClass rclass>:
+    ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+           [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+
+
+multiclass BitwiseOr
+{
+  def v16i8: ORVecInst<v16i8>;
+  def v8i16: ORVecInst<v8i16>;
+  def v4i32: ORVecInst<v4i32>;
+  def v2i64: ORVecInst<v2i64>;
+
+  def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                    [(set (v4f32 VECREG:$rT),
+                          (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
+                                                 (v4i32 VECREG:$rB)))))]>;
+
+  def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                    [(set (v2f64 VECREG:$rT),
+                          (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
+                                                 (v2i64 VECREG:$rB)))))]>;
+
+  def r128: ORRegInst<GPRC>;
+  def r64:  ORRegInst<R64C>;
+  def r32:  ORRegInst<R32C>;
+  def r16:  ORRegInst<R16C>;
+  def r8:   ORRegInst<R8C>;
+
+  // OR instructions used to copy f32 and f64 registers.
+  def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                  [/* no pattern */]>;
+
+  def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+                  [/* no pattern */]>;
+}
+
+defm OR : BitwiseOr;
+
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+          (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
+
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+          (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
+
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+          (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
+
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+          (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
+
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+          (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
+
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+          (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+ 
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
+
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
+
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
+
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
+
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
+
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+    class LRInst<dag OOL, dag IOL>
+              : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+      bits<7> RA;
+      bits<7> RT;
+
+      let Pattern = [/*no pattern*/];
+
+      let Inst{0-10} = 0b10000010000;   /* It's an OR operation */
+      let Inst{11-17} = RA;
+      let Inst{18-24} = RA;
+      let Inst{25-31} = RT;
+    }
+
+    class LRVecInst<ValueType vectype>:
+        LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+    class LRRegInst<RegisterClass rclass>:
+        LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+    multiclass LoadRegister {
+      def v2i64: LRVecInst<v2i64>;
+      def v2f64: LRVecInst<v2f64>;
+      def v4i32: LRVecInst<v4i32>;
+      def v4f32: LRVecInst<v4f32>;
+      def v8i16: LRVecInst<v8i16>;
+      def v16i8: LRVecInst<v16i8>;
+
+      def r128:  LRRegInst<GPRC>;
+      def r64:   LRRegInst<R64C>;
+      def f64:   LRRegInst<R64FP>;
+      def r32:   LRRegInst<R32C>;
+      def f32:   LRRegInst<R32FP>;
+      def r16:   LRRegInst<R16C>;
+      def r8:    LRRegInst<R8C>;
+    }
+
+    defm LR: LoadRegister;
+}
+
+// ORC: Bitwise "or" with complement (c = a | ~b)
+
+class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ORCVecInst<ValueType vectype>:
+    ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+            [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                            (vnot (vectype VECREG:$rB))))]>;
+
+class ORCRegInst<RegisterClass rclass>:
+  ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass BitwiseOrComplement
+{
+  def v16i8: ORCVecInst<v16i8>;
+  def v8i16: ORCVecInst<v8i16>;
+  def v4i32: ORCVecInst<v4i32>;
+  def v2i64: ORCVecInst<v2i64>;
+
+  def r128:  ORCRegInst<GPRC>;
+  def r64:   ORCRegInst<R64C>;
+  def r32:   ORCRegInst<R32C>;
+  def r16:   ORCRegInst<R16C>;
+  def r8:    ORCRegInst<R8C>;
+}
+
+defm ORC : BitwiseOrComplement;
+
+// OR byte immediate
+class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
+    ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+             [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
+                                           (vectype immpred:$val)))]>;
+
+multiclass BitwiseOrByteImm
+{
+  def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
+
+  def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+                   [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
+}
+
+defm ORBI : BitwiseOrByteImm;
+
+// OR halfword immediate
+class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
+    ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+              [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                              immpred:$val))]>;
+
+multiclass BitwiseOrHalfwordImm
+{
+  def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
+
+  def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+                    [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
+
+  // Specialized ORHI form used to promote 8-bit registers to 16-bit
+  def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
+                      [(set R16C:$rT, (or (anyext R8C:$rA),
+                                          i16ImmSExt10:$val))]>;
+}
+
+defm ORHI : BitwiseOrHalfwordImm;
+
+class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+class ORIVecInst<ValueType vectype, PatLeaf immpred>:
+    ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+            [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                            immpred:$val))]>;
+
+// Bitwise "or" with immediate
+multiclass BitwiseOrImm
+{
+  def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
+
+  def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val),
+                   [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>;
+
+  // i16i32: hacked version of the ori instruction to extend 16-bit quantities
+  // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
+  // infra "anyext 16->32" pattern.)
+  def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+                      [(set R32C:$rT, (or (anyext R16C:$rA),
+                                          i32ImmSExt10:$val))]>;
+
+  // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
+  // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
+  // infra "anyext 16->32" pattern.)
+  def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+                     [(set R32C:$rT, (or (anyext R8C:$rA),
+                                         i32ImmSExt10:$val))]>;
+}
+
+defm ORI : BitwiseOrImm;
+
+// ORX: "or" across the vector: or's $rA's word slots leaving the result in
+// $rT[0], slots 1-3 are zeroed.
+//
+// FIXME: Needs to match an intrinsic pattern.
+def ORXv4i32:
+    RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "orx\t$rT, $rA, $rB", IntegerOp,
+      []>;
+
+// XOR:
+
+class XORInst<dag OOL, dag IOL, list<dag> pattern> :
+    RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class XORVecInst<ValueType vectype>:
+    XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
+                                              (vectype VECREG:$rB)))]>;
+
+class XORRegInst<RegisterClass rclass>:
+    XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseExclusiveOr
+{
+  def v16i8: XORVecInst<v16i8>;
+  def v8i16: XORVecInst<v8i16>;
+  def v4i32: XORVecInst<v4i32>;
+  def v2i64: XORVecInst<v2i64>;
+
+  def r128:  XORRegInst<GPRC>;
+  def r64:   XORRegInst<R64C>;
+  def r32:   XORRegInst<R32C>;
+  def r16:   XORRegInst<R16C>;
+  def r8:    XORRegInst<R8C>;
+
+  // XOR instructions used to negate f32 and f64 quantities.
+
+  def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+                     [/* no pattern */]>;
+
+  def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+                     [/* no pattern */]>;
+
+  def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [/* no pattern, see fneg{32,64} */]>;
+}
+
+defm XOR : BitwiseExclusiveOr;
+
+//==----------------------------------------------------------
+
+class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+multiclass XorByteImm
+{
+  def v16i8:
+    XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+              [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
+
+  def r8:
+    XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+              [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
+}
+
+defm XORBI : XorByteImm;
+
+def XORHIv8i16:
+    RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+      "xorhi\t$rT, $rA, $val", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
+                                      v8i16SExt10Imm:$val))]>;
+
+def XORHIr16:
+    RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+      "xorhi\t$rT, $rA, $val", IntegerOp,
+      [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
+
+def XORIv4i32:
+    RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
+      "xori\t$rT, $rA, $val", IntegerOp,
+      [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
+                                     v4i32SExt10Imm:$val))]>;
+
+def XORIr32:
+    RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+      "xori\t$rT, $rA, $val", IntegerOp,
+      [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
+
+// NAND:
+
+class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class NANDVecInst<ValueType vectype>:
+    NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
+                                                    (vectype VECREG:$rB))))]>;
+class NANDRegInst<RegisterClass rclass>:
+    NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNand
+{
+  def v16i8: NANDVecInst<v16i8>;
+  def v8i16: NANDVecInst<v8i16>;
+  def v4i32: NANDVecInst<v4i32>;
+  def v2i64: NANDVecInst<v2i64>;
+
+  def r128:  NANDRegInst<GPRC>;
+  def r64:   NANDRegInst<R64C>;
+  def r32:   NANDRegInst<R32C>;
+  def r16:   NANDRegInst<R16C>;
+  def r8:    NANDRegInst<R8C>;
+}
+
+defm NAND : BitwiseNand;
+
+// NOR:
+
+class NORInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class NORVecInst<ValueType vectype>:
+    NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+            [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
+                                                  (vectype VECREG:$rB))))]>;
+class NORRegInst<RegisterClass rclass>:
+    NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+            [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNor
+{
+  def v16i8: NORVecInst<v16i8>;
+  def v8i16: NORVecInst<v8i16>;
+  def v4i32: NORVecInst<v4i32>;
+  def v2i64: NORVecInst<v2i64>;
+
+  def r128:  NORRegInst<GPRC>;
+  def r64:   NORRegInst<R64C>;
+  def r32:   NORRegInst<R32C>;
+  def r16:   NORRegInst<R16C>;
+  def r8:    NORRegInst<R8C>;
+}
+
+defm NOR : BitwiseNor;
+
+// Select bits:
+class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
+            IntegerOp, pattern>;
+
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+           [(set (vectype VECREG:$rT),
+                 (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
+                     (and (vnot_frag (vectype VECREG:$rC)),
+                          (vectype VECREG:$rA))))]>;
+
+class SELBVecVCondInst<ValueType vectype>:
+  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+           [(set (vectype VECREG:$rT),
+                 (select (vectype VECREG:$rC),
+                         (vectype VECREG:$rB),
+                         (vectype VECREG:$rA)))]>;
+
+class SELBVecCondInst<ValueType vectype>:
+  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+           [(set (vectype VECREG:$rT),
+                 (select R32C:$rC,
+                         (vectype VECREG:$rB),
+                         (vectype VECREG:$rA)))]>;
+
+class SELBRegInst<RegisterClass rclass>:
+  SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
+           [(set rclass:$rT,
+                 (or (and rclass:$rB, rclass:$rC),
+                     (and rclass:$rA, (not rclass:$rC))))]>;
+
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+  SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+           [(set rclass:$rT,
+                 (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
+multiclass SelectBits
+{
+  def v16i8: SELBVecInst<v16i8>;
+  def v8i16: SELBVecInst<v8i16>;
+  def v4i32: SELBVecInst<v4i32>;
+  def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
+
+  def r128:  SELBRegInst<GPRC>;
+  def r64:   SELBRegInst<R64C>;
+  def r32:   SELBRegInst<R32C>;
+  def r16:   SELBRegInst<R16C>;
+  def r8:    SELBRegInst<R8C>;
+
+  def v16i8_cond: SELBVecCondInst<v16i8>;
+  def v8i16_cond: SELBVecCondInst<v8i16>;
+  def v4i32_cond: SELBVecCondInst<v4i32>;
+  def v2i64_cond: SELBVecCondInst<v2i64>;
+
+  def v16i8_vcond: SELBVecCondInst<v16i8>;
+  def v8i16_vcond: SELBVecCondInst<v8i16>;
+  def v4i32_vcond: SELBVecCondInst<v4i32>;
+  def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+  def v4f32_cond:
+        SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+                 [(set (v4f32 VECREG:$rT),
+                       (select (v4i32 VECREG:$rC),
+                               (v4f32 VECREG:$rB),
+                               (v4f32 VECREG:$rA)))]>;
+
+  // SELBr64_cond is defined in SPU64InstrInfo.td
+  def r32_cond:   SELBRegCondInst<R32C, R32C>;
+  def f32_cond:   SELBRegCondInst<R32C, R32FP>;
+  def r16_cond:   SELBRegCondInst<R16C, R16C>;
+  def r8_cond:    SELBRegCondInst<R8C,  R8C>;
+}
+
+defm SELB : SelectBits;
+
+class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
+   Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
+       (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+
+def : SPUselbPatVec<v16i8, SELBv16i8>;
+def : SPUselbPatVec<v8i16, SELBv8i16>;
+def : SPUselbPatVec<v4i32, SELBv4i32>;
+def : SPUselbPatVec<v2i64, SELBv2i64>;
+
+class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
+   Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
+       (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
+
+def : SPUselbPatReg<R8C,   SELBr8>;
+def : SPUselbPatReg<R16C,  SELBr16>;
+def : SPUselbPatReg<R32C,  SELBr32>;
+def : SPUselbPatReg<R64C,  SELBr64>;
+
+// EQV: Equivalence (1 for each same bit, otherwise 0)
+//
+// Note: There are a lot of ways to match this bit operator and these patterns
+// attempt to be as exhaustive as possible.
+
+class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class EQVVecInst<ValueType vectype>:
+    EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+            [(set (vectype VECREG:$rT),
+                  (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+                      (and (vnot (vectype VECREG:$rA)),
+                           (vnot (vectype VECREG:$rB)))))]>;
+
+class EQVRegInst<RegisterClass rclass>:
+    EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+            [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
+                                  (and (not rclass:$rA), (not rclass:$rB))))]>;
+
+class EQVVecPattern1<ValueType vectype>:
+  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          [(set (vectype VECREG:$rT),
+                (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern1<RegisterClass rclass>:
+  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
+
+class EQVVecPattern2<ValueType vectype>:
+  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          [(set (vectype VECREG:$rT),
+                (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+                    (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
+
+class EQVRegPattern2<RegisterClass rclass>:
+  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT,
+                (or (and rclass:$rA, rclass:$rB),
+                    (not (or rclass:$rA, rclass:$rB))))]>;
+
+class EQVVecPattern3<ValueType vectype>:
+  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          [(set (vectype VECREG:$rT),
+                (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern3<RegisterClass rclass>:
+  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitEquivalence
+{
+  def v16i8: EQVVecInst<v16i8>;
+  def v8i16: EQVVecInst<v8i16>;
+  def v4i32: EQVVecInst<v4i32>;
+  def v2i64: EQVVecInst<v2i64>;
+
+  def v16i8_1: EQVVecPattern1<v16i8>;
+  def v8i16_1: EQVVecPattern1<v8i16>;
+  def v4i32_1: EQVVecPattern1<v4i32>;
+  def v2i64_1: EQVVecPattern1<v2i64>;
+
+  def v16i8_2: EQVVecPattern2<v16i8>;
+  def v8i16_2: EQVVecPattern2<v8i16>;
+  def v4i32_2: EQVVecPattern2<v4i32>;
+  def v2i64_2: EQVVecPattern2<v2i64>;
+
+  def v16i8_3: EQVVecPattern3<v16i8>;
+  def v8i16_3: EQVVecPattern3<v8i16>;
+  def v4i32_3: EQVVecPattern3<v4i32>;
+  def v2i64_3: EQVVecPattern3<v2i64>;
+
+  def r128:  EQVRegInst<GPRC>;
+  def r64:   EQVRegInst<R64C>;
+  def r32:   EQVRegInst<R32C>;
+  def r16:   EQVRegInst<R16C>;
+  def r8:    EQVRegInst<R8C>;
+
+  def r128_1: EQVRegPattern1<GPRC>;
+  def r64_1:  EQVRegPattern1<R64C>;
+  def r32_1:  EQVRegPattern1<R32C>;
+  def r16_1:  EQVRegPattern1<R16C>;
+  def r8_1:   EQVRegPattern1<R8C>;
+
+  def r128_2: EQVRegPattern2<GPRC>;
+  def r64_2:  EQVRegPattern2<R64C>;
+  def r32_2:  EQVRegPattern2<R32C>;
+  def r16_2:  EQVRegPattern2<R16C>;
+  def r8_2:   EQVRegPattern2<R8C>;
+
+  def r128_3: EQVRegPattern3<GPRC>;
+  def r64_3:  EQVRegPattern3<R64C>;
+  def r32_3:  EQVRegPattern3<R32C>;
+  def r16_3:  EQVRegPattern3<R16C>;
+  def r8_3:   EQVRegPattern3<R8C>;
+}
+
+defm EQV: BitEquivalence;
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle...
+//===----------------------------------------------------------------------===//
+// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
+// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
+// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
+// the SPUISD::SHUFB opcode.
+//===----------------------------------------------------------------------===//
+
+class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
+            ShuffleOp, pattern>;
+
+class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
+    SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+              [(set (resultvec VECREG:$rT),
+                    (SPUshuffle (resultvec VECREG:$rA),
+                                (resultvec VECREG:$rB),
+                                (maskvec VECREG:$rC)))]>;
+
+class SHUFBGPRCInst:
+    SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+              [/* no pattern */]>;
+
+multiclass ShuffleBytes
+{
+  def v16i8     : SHUFBVecInst<v16i8, v16i8>;
+  def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
+  def v8i16     : SHUFBVecInst<v8i16, v16i8>;
+  def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
+  def v4i32     : SHUFBVecInst<v4i32, v16i8>;
+  def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+  def v2i64     : SHUFBVecInst<v2i64, v16i8>;
+  def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
+
+  def v4f32     : SHUFBVecInst<v4f32, v16i8>;
+  def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
+
+  def v2f64     : SHUFBVecInst<v2f64, v16i8>;
+  def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+  def gprc      : SHUFBGPRCInst;
+}
+
+defm SHUFB : ShuffleBytes;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate group:
+//===----------------------------------------------------------------------===//
+
+class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+class SHLHVecInst<ValueType vectype>:
+    SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass ShiftLeftHalfword
+{
+  def v8i16: SHLHVecInst<v8i16>;
+  def r16:   SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                      [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
+  def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+                        [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
+}
+
+defm SHLH : ShiftLeftHalfword;
+
+//===----------------------------------------------------------------------===//
+
+class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+class SHLHIVecInst<ValueType vectype>:
+    SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+              [(set (vectype VECREG:$rT),
+                    (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass ShiftLeftHalfwordImm
+{
+  def v8i16: SHLHIVecInst<v8i16>;
+  def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+                     [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
+}
+
+defm SHLHI : ShiftLeftHalfwordImm;
+
+def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+          (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
+
+def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
+          (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
+
+//===----------------------------------------------------------------------===//
+
+class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+multiclass ShiftLeftWord
+{
+  def v4i32:
+      SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+              [(set (v4i32 VECREG:$rT),
+                    (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+  def r32:
+      SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+              [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
+}
+
+defm SHL: ShiftLeftWord;
+
+//===----------------------------------------------------------------------===//
+
+class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+multiclass ShiftLeftWordImm
+{
+  def v4i32:
+    SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+             [(set (v4i32 VECREG:$rT),
+                   (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+  def r32:
+    SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
+             [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLI : ShiftLeftWordImm;
+
+//===----------------------------------------------------------------------===//
+// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
+// register) to the left. Vector form is here to ensure type correctness.
+//
+// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
+// of 7 bits is actually possible.
+//
+// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
+// to shift i64 and i128. SHLQBI is the residual left over after shifting by
+// bytes with SHLQBY.
+
+class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class SHLQBIVecInst<ValueType vectype>:
+    SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+               [(set (vectype VECREG:$rT),
+                     (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
+
+class SHLQBIRegInst<RegisterClass rclass>:
+    SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+               [/* no pattern */]>;
+
+multiclass ShiftLeftQuadByBits
+{
+  def v16i8: SHLQBIVecInst<v16i8>;
+  def v8i16: SHLQBIVecInst<v8i16>;
+  def v4i32: SHLQBIVecInst<v4i32>;
+  def v4f32: SHLQBIVecInst<v4f32>;
+  def v2i64: SHLQBIVecInst<v2i64>;
+  def v2f64: SHLQBIVecInst<v2f64>;
+
+  def r128:  SHLQBIRegInst<GPRC>;
+}
+
+defm SHLQBI : ShiftLeftQuadByBits;
+
+// See note above on SHLQBI. In this case, the predicate actually does then
+// enforcement, whereas with SHLQBI, we have to "take it on faith."
+class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class SHLQBIIVecInst<ValueType vectype>:
+    SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+                [(set (vectype VECREG:$rT),
+                      (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
+
+multiclass ShiftLeftQuadByBitsImm
+{
+  def v16i8 : SHLQBIIVecInst<v16i8>;
+  def v8i16 : SHLQBIIVecInst<v8i16>;
+  def v4i32 : SHLQBIIVecInst<v4i32>;
+  def v4f32 : SHLQBIIVecInst<v4f32>;
+  def v2i64 : SHLQBIIVecInst<v2i64>;
+  def v2f64 : SHLQBIIVecInst<v2f64>;
+}
+
+defm SHLQBII : ShiftLeftQuadByBitsImm;
+
+// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
+// not by bits. See notes above on SHLQBI.
+
+class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
+            RotShiftQuad, pattern>;
+
+class SHLQBYVecInst<ValueType vectype>:
+    SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+               [(set (vectype VECREG:$rT),
+                     (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass ShiftLeftQuadBytes
+{
+  def v16i8: SHLQBYVecInst<v16i8>;
+  def v8i16: SHLQBYVecInst<v8i16>;
+  def v4i32: SHLQBYVecInst<v4i32>;
+  def v4f32: SHLQBYVecInst<v4f32>;
+  def v2i64: SHLQBYVecInst<v2i64>;
+  def v2f64: SHLQBYVecInst<v2f64>;
+  def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+                       [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
+}
+
+defm SHLQBY: ShiftLeftQuadBytes;
+
+class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class SHLQBYIVecInst<ValueType vectype>:
+    SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+                [(set (vectype VECREG:$rT),
+                      (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+
+multiclass ShiftLeftQuadBytesImm
+{
+  def v16i8: SHLQBYIVecInst<v16i8>;
+  def v8i16: SHLQBYIVecInst<v8i16>;
+  def v4i32: SHLQBYIVecInst<v4i32>;
+  def v4f32: SHLQBYIVecInst<v4f32>;
+  def v2i64: SHLQBYIVecInst<v2i64>;
+  def v2f64: SHLQBYIVecInst<v2f64>;
+  def r128:  SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
+                         [(set GPRC:$rT,
+                               (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLQBYI : ShiftLeftQuadBytesImm;
+
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+    SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+    SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+                 [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+  def v16i8: SHLQBYBIVecInst<v16i8>;
+  def v8i16: SHLQBYBIVecInst<v8i16>;
+  def v4i32: SHLQBYBIVecInst<v4i32>;
+  def v4f32: SHLQBYBIVecInst<v4f32>;
+  def v2i64: SHLQBYBIVecInst<v2i64>;
+  def v2f64: SHLQBYBIVecInst<v2f64>;
+
+  def r128:  SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+class ROTHVecInst<ValueType vectype>:
+    ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
+
+class ROTHRegInst<RegisterClass rclass>:
+    ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
+
+multiclass RotateLeftHalfword
+{
+  def v8i16: ROTHVecInst<v8i16>;
+  def r16: ROTHRegInst<R16C>;
+}
+
+defm ROTH: RotateLeftHalfword;
+
+def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+                          [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword, immediate:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+class ROTHIVecInst<ValueType vectype>:
+    ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+              [(set (vectype VECREG:$rT),
+                    (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
+
+multiclass RotateLeftHalfwordImm
+{
+  def v8i16: ROTHIVecInst<v8i16>;
+  def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+                     [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
+  def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
+                         [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm ROTHI: RotateLeftHalfwordImm;
+
+def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+          (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+class ROTVecInst<ValueType vectype>:
+    ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+            [(set (vectype VECREG:$rT),
+                  (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
+
+class ROTRegInst<RegisterClass rclass>:
+    ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+            [(set rclass:$rT,
+                  (rotl rclass:$rA, R32C:$rB))]>;
+
+multiclass RotateLeftWord
+{
+  def v4i32: ROTVecInst<v4i32>;
+  def r32:   ROTRegInst<R32C>;
+}
+
+defm ROT: RotateLeftWord;
+
+// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
+// 32-bit register
+def ROTr32_r16_anyext:
+    ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
+            [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
+          (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
+          (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def ROTr32_r8_anyext:
+    ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
+            [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
+          (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
+          (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
+    ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+             [(set (vectype VECREG:$rT),
+                   (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
+
+class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+    ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+             [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
+
+multiclass RotateLeftWordImm
+{
+  def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+  def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
+  def v4i32_i8:  ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
+
+  def r32:       ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
+  def r32_i16:   ROTIRegInst<R32C, u7imm, i16, uimm7>;
+  def r32_i8:    ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
+}
+
+defm ROTI : RotateLeftWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+    ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+               [(set (type rc:$rT),
+                     (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+    ROTQBYGenInst<type, VECREG>;
+
+multiclass RotateQuadLeftByBytes
+{
+  def v16i8: ROTQBYVecInst<v16i8>;
+  def v8i16: ROTQBYVecInst<v8i16>;
+  def v4i32: ROTQBYVecInst<v4i32>;
+  def v4f32: ROTQBYVecInst<v4f32>;
+  def v2i64: ROTQBYVecInst<v2i64>;
+  def v2f64: ROTQBYVecInst<v2f64>;
+  def i128:  ROTQBYGenInst<i128, GPRC>;
+}
+
+defm ROTQBY: RotateQuadLeftByBytes;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count), immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+    ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+                [(set (type rclass:$rT),
+                      (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
+
+class ROTQBYIVecInst<ValueType vectype>:
+    ROTQBYIGenInst<vectype, VECREG>;
+
+multiclass RotateQuadByBytesImm
+{
+  def v16i8: ROTQBYIVecInst<v16i8>;
+  def v8i16: ROTQBYIVecInst<v8i16>;
+  def v4i32: ROTQBYIVecInst<v4i32>;
+  def v4f32: ROTQBYIVecInst<v4f32>;
+  def v2i64: ROTQBYIVecInst<v2i64>;
+  def vfi64: ROTQBYIVecInst<v2f64>;
+  def i128:  ROTQBYIGenInst<i128, GPRC>;
+}
+
+defm ROTQBYI: RotateQuadByBytesImm;
+
+// See ROTQBY note above.
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00110011100, OOL, IOL,
+      "rotqbybi\t$rT, $rA, $shift",
+      RotShiftQuad, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+    ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+      [(set (vectype VECREG:$rT),
+            (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+  def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+  def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+  def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+  def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// See ROTQBY note above.
+//
+// Assume that the user of this instruction knows to shift the rotate count
+// into bit 29
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQBIVecInst<ValueType vectype>:
+    ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+               [/* no pattern yet */]>;
+
+class ROTQBIRegInst<RegisterClass rclass>:
+    ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+               [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCount
+{
+  def v16i8: ROTQBIVecInst<v16i8>;
+  def v8i16: ROTQBIVecInst<v8i16>;
+  def v4i32: ROTQBIVecInst<v4i32>;
+  def v2i64: ROTQBIVecInst<v2i64>;
+
+  def r128:  ROTQBIRegInst<GPRC>;
+  def r64:   ROTQBIRegInst<R64C>;
+}
+
+defm ROTQBI: RotateQuadByBitCount;
+
+class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
+                     PatLeaf pred>:
+    ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+                [/* no pattern yet */]>;
+
+class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+                     PatLeaf pred>:
+    ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+                [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCountImm
+{
+  def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
+  def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
+  def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+  def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
+
+  def r128:  ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
+  def r64:   ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
+}
+
+defm ROTQBII : RotateQuadByBitCountImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTHM v8i16 form:
+// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
+//          so this only matches a synthetically generated/lowered code
+//          fragment.
+// NOTE(2): $rB must be negated before the right rotate!
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+def ROTHMv8i16:
+    ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+              [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
+// Note: This instruction doesn't match a pattern because rB must be negated
+// for the instruction to work. Thus, the pattern below the instruction!
+
+def ROTHMr16:
+    ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+              [/* see patterns below - $rB must be negated! */]>;
+
+def : Pat<(srl R16C:$rA, R32C:$rB),
+          (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R16C:$rA, R16C:$rB),
+          (ROTHMr16 R16C:$rA,
+                    (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R16C:$rA, R8C:$rB),
+          (ROTHMr16 R16C:$rA,
+                    (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
+// that the immediate can be complemented, so that the user doesn't have to
+// worry about it.
+
+class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+def ROTHMIv8i16:
+    ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+               [/* no pattern */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
+          (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
+         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
+         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def ROTHMIr16:
+    ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
+               [/* no pattern */]>;
+
+def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
+         (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
+         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
+         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTM v4i32 form: See the ROTHM v8i16 comments.
+class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+def ROTMv4i32:
+    ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+          (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
+
+def ROTMr32:
+    ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+             [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(srl R32C:$rA, R32C:$rB),
+          (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R32C:$rA, R16C:$rB),
+          (ROTMr32 R32C:$rA,
+                   (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R32C:$rA, R8C:$rB),
+          (ROTMr32 R32C:$rA,
+                   (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+// ROTMI v4i32 form: See the comment for ROTHM v8i16.
+def ROTMIv4i32:
+    RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+      "rotmi\t$rT, $rA, $val", RotShiftVec,
+      [(set (v4i32 VECREG:$rT),
+            (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
+          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
+          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTMI r32 form: know how to complement the immediate value.
+def ROTMIr32:
+    RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
+      "rotmi\t$rT, $rA, $val", RotShiftVec,
+      [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(srl R32C:$rA, (i16 imm:$val)),
+          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(srl R32C:$rA, (i8 imm:$val)),
+          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTQMBY: This is a vector form merely so that when used in an
+// instruction pattern, type checking will succeed. This instruction assumes
+// that the user knew to negate $rB.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQMBYVecInst<ValueType vectype>:
+    ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                [/* no pattern, $rB must be negated */]>;
+
+class ROTQMBYRegInst<RegisterClass rclass>:
+    ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+multiclass RotateQuadBytes
+{
+  def v16i8: ROTQMBYVecInst<v16i8>;
+  def v8i16: ROTQMBYVecInst<v8i16>;
+  def v4i32: ROTQMBYVecInst<v4i32>;
+  def v2i64: ROTQMBYVecInst<v2i64>;
+
+  def r128: ROTQMBYRegInst<GPRC>;
+  def r64:  ROTQMBYRegInst<R64C>;
+}
+
+defm ROTQMBY : RotateQuadBytes;
+
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+          (ROTQMBYr128  GPRC:$rA, 
+                        (SFIr32 R32C:$rB, 0))>;
+
+class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQMBYIVecInst<ValueType vectype>:
+    ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+                 [/* no pattern */]>;
+
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+                      PatLeaf pred>:
+    ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+                 [/* no pattern */]>;
+
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+    ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+                 [/* no pattern */]>;
+
+multiclass RotateQuadBytesImm
+{
+  def v16i8: ROTQMBYIVecInst<v16i8>;
+  def v8i16: ROTQMBYIVecInst<v8i16>;
+  def v4i32: ROTQMBYIVecInst<v4i32>;
+  def v2i64: ROTQMBYIVecInst<v2i64>;
+
+  def r128:  ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
+  def r64:   ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+  
+  def r128_zext_r8:  ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+  def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+  def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+  def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
+}
+
+defm ROTQMBYI : RotateQuadBytesImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate right and mask by bit count
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQMBYBIVecInst<ValueType vectype>:
+    ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                  [/* no pattern, */]>;
+
+multiclass RotateMaskQuadByBitCount
+{
+  def v16i8: ROTQMBYBIVecInst<v16i8>;
+  def v8i16: ROTQMBYBIVecInst<v8i16>;
+  def v4i32: ROTQMBYBIVecInst<v4i32>;
+  def v2i64: ROTQMBYBIVecInst<v2i64>;
+  def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+                           [/*no pattern*/]>;
+}
+
+defm ROTQMBYBI: RotateMaskQuadByBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits
+// Note that the rotate amount has to be negated
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQMBIVecInst<ValueType vectype>:
+    ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+class ROTQMBIRegInst<RegisterClass rclass>:
+    ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBits
+{
+  def v16i8: ROTQMBIVecInst<v16i8>;
+  def v8i16: ROTQMBIVecInst<v8i16>;
+  def v4i32: ROTQMBIVecInst<v4i32>;
+  def v2i64: ROTQMBIVecInst<v2i64>;
+
+  def r128:  ROTQMBIRegInst<GPRC>;
+  def r64:   ROTQMBIRegInst<R64C>;
+}
+
+defm ROTQMBI: RotateMaskQuadByBits;
+
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+          (ROTQMBYBIr128 (ROTQMBIr128  GPRC:$rA, 
+                                       (SFIr32 R32C:$rB, 0)),
+                         (SFIr32 R32C:$rB, 0))>;
+
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQMBIIVecInst<ValueType vectype>:
+   ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+                 [/* no pattern */]>;
+
+class ROTQMBIIRegInst<RegisterClass rclass>:
+   ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
+                 [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBitsImm
+{
+  def v16i8: ROTQMBIIVecInst<v16i8>;
+  def v8i16: ROTQMBIIVecInst<v8i16>;
+  def v4i32: ROTQMBIIVecInst<v4i32>;
+  def v2i64: ROTQMBIIVecInst<v2i64>;
+
+  def r128:  ROTQMBIIRegInst<GPRC>;
+  def r64:   ROTQMBIIRegInst<R64C>;
+}
+
+defm ROTQMBII: RotateMaskQuadByBitsImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def ROTMAHv8i16:
+    RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "rotmah\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+def ROTMAHr16:
+    RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+      "rotmah\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R16C:$rA, R32C:$rB),
+          (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R16C:$rA, R16C:$rB),
+          (ROTMAHr16 R16C:$rA,
+                     (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R16C:$rA, R8C:$rB),
+          (ROTMAHr16 R16C:$rA,
+                     (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHIv8i16:
+    RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+      "rotmahi\t$rT, $rA, $val", RotShiftVec,
+      [(set (v8i16 VECREG:$rT),
+            (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
+          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
+          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def ROTMAHIr16:
+    RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
+      "rotmahi\t$rT, $rA, $val", RotShiftVec,
+      [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
+
+def : Pat<(sra R16C:$rA, (i32 imm:$val)),
+          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(sra R16C:$rA, (i8 imm:$val)),
+          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def ROTMAv4i32:
+    RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "rotma\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+          (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
+
+def ROTMAr32:
+    RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+      "rotma\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R32C:$rA, R32C:$rB),
+          (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R32C:$rA, R16C:$rB),
+          (ROTMAr32 R32C:$rA,
+                    (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R32C:$rA, R8C:$rB),
+          (ROTMAr32 R32C:$rA,
+                    (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01011110000, OOL, IOL,
+      "rotmai\t$rT, $rA, $val",
+      RotShiftVec, pattern>;
+
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+    ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+      [(set (vectype VECREG:$rT),
+            (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
+
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+    ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+      [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
+
+multiclass RotateMaskAlgebraicImm {
+  def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+  def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+  def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+  def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
+
+defm ROTMAI : RotateMaskAlgebraicImm;
+
+//===----------------------------------------------------------------------===//
+// Branch and conditionals:
+//===----------------------------------------------------------------------===//
+
+let isTerminator = 1, isBarrier = 1 in {
+  // Halt If Equal (r32 preferred slot only, no vector form)
+  def HEQr32:
+    RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
+      "heq\t$rA, $rB", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HEQIr32 :
+    RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
+      "heqi\t$rA, $val", BranchResolv,
+      [/* no pattern to match */]>;
+
+  // HGT/HGTI: These instructions use signed arithmetic for the comparison,
+  // contrasting with HLGT/HLGTI, which use unsigned comparison:
+  def HGTr32:
+    RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
+      "hgt\t$rA, $rB", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HGTIr32:
+    RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
+      "hgti\t$rA, $val", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HLGTr32:
+    RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
+      "hlgt\t$rA, $rB", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HLGTIr32:
+    RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
+      "hlgti\t$rA, $val", BranchResolv,
+      [/* no pattern to match */]>;
+}
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpEqualByte
+{
+  def v16i8 :
+    CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+                                       (v8i16 VECREG:$rB)))]>;
+
+  def r8 :
+    CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+             [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
+}
+
+class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpEqualByteImm
+{
+  def v16i8 :
+    CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+              [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
+                                               v16i8SExt8Imm:$val))]>;
+  def r8:
+    CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+             [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
+}
+
+class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpEqualHalfword
+{
+  def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+                                                        (v8i16 VECREG:$rB)))]>;
+
+  def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                     [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
+}
+
+class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpEqualHalfwordImm
+{
+  def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                        [(set (v8i16 VECREG:$rT),
+                              (seteq (v8i16 VECREG:$rA),
+                                     (v8i16 v8i16SExt10Imm:$val)))]>;
+  def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+                      [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpEqualWord
+{
+  def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [(set (v4i32 VECREG:$rT),
+                            (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+  def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+                    [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
+}
+
+class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpEqualWordImm
+{
+  def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (seteq (v4i32 VECREG:$rA),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                    [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpGtrByte
+{
+  def v16i8 :
+    CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+                                       (v8i16 VECREG:$rB)))]>;
+
+  def r8 :
+    CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+             [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
+}
+
+class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpGtrByteImm
+{
+  def v16i8 :
+    CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+              [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
+                                               v16i8SExt8Imm:$val))]>;
+  def r8:
+    CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+              [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpGtrHalfword
+{
+  def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+                                                        (v8i16 VECREG:$rB)))]>;
+
+  def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                     [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
+}
+
+class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpGtrHalfwordImm
+{
+  def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                        [(set (v8i16 VECREG:$rT),
+                              (setgt (v8i16 VECREG:$rA),
+                                     (v8i16 v8i16SExt10Imm:$val)))]>;
+  def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+                      [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpGtrWord
+{
+  def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [(set (v4i32 VECREG:$rT),
+                            (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+  def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+                    [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
+}
+
+class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpGtrWordImm
+{
+  def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (setgt (v4i32 VECREG:$rA),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                    [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+  // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+  def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def f32:   CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+                      [/* no pattern */]>;
+}
+
+class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpLGtrByte
+{
+  def v16i8 :
+    CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+                                       (v8i16 VECREG:$rB)))]>;
+
+  def r8 :
+    CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+             [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
+}
+
+class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpLGtrByteImm
+{
+  def v16i8 :
+    CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+              [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
+                                               v16i8SExt8Imm:$val))]>;
+  def r8:
+    CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+             [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpLGtrHalfword
+{
+  def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+                                                        (v8i16 VECREG:$rB)))]>;
+
+  def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                     [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
+}
+
+class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpLGtrHalfwordImm
+{
+  def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                         [(set (v8i16 VECREG:$rT),
+                               (setugt (v8i16 VECREG:$rA),
+                                       (v8i16 v8i16SExt10Imm:$val)))]>;
+  def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+                       [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpLGtrWord
+{
+  def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [(set (v4i32 VECREG:$rT),
+                            (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+  def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+                     [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
+}
+
+class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpLGtrWordImm
+{
+  def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (setugt (v4i32 VECREG:$rA),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                     [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+defm CEQB   : CmpEqualByte;
+defm CEQBI  : CmpEqualByteImm;
+defm CEQH   : CmpEqualHalfword;
+defm CEQHI  : CmpEqualHalfwordImm;
+defm CEQ    : CmpEqualWord;
+defm CEQI   : CmpEqualWordImm;
+defm CGTB   : CmpGtrByte;
+defm CGTBI  : CmpGtrByteImm;
+defm CGTH   : CmpGtrHalfword;
+defm CGTHI  : CmpGtrHalfwordImm;
+defm CGT    : CmpGtrWord;
+defm CGTI   : CmpGtrWordImm;
+defm CLGTB  : CmpLGtrByte;
+defm CLGTBI : CmpLGtrByteImm;
+defm CLGTH  : CmpLGtrHalfword;
+defm CLGTHI : CmpLGtrHalfwordImm;
+defm CLGT   : CmpLGtrWord;
+defm CLGTI  : CmpLGtrWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// For SETCC primitives not supported above (setlt, setle, setge, etc.)
+// define a pattern to generate the right code, as a binary operator
+// (in a manner of speaking.)
+//
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+//    matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+//    i64 comparisons. See the tblgen fundamentals documentation for what
+//    ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+//    class for where ResultInstrs originates.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                      SPUInstr xorinst, SPUInstr cmpare>:
+  Pat<(cond rclass:$rA, rclass:$rB),
+      (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
+
+class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                      PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
+  Pat<(cond rclass:$rA, (inttype immpred:$imm)),
+      (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
+
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8,  CEQBr8>;
+def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
+
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16,     CEQHr16>;
+def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
+
+def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
+def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
+
+class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
+                    SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+    Pat<(cond rclass:$rA, rclass:$rB),
+        (binop (cmpOp1 rclass:$rA, rclass:$rB),
+               (cmpOp2 rclass:$rA, rclass:$rB))>;
+
+class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+                    ValueType immtype,
+                    SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+    Pat<(cond rclass:$rA, (immtype immpred:$imm)),
+        (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
+               (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
+
+def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
+def : Pat<(setle R8C:$rA, R8C:$rB),
+          (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def :  Pat<(setle R8C:$rA, immU8:$imm),
+           (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
+                    ORr16, CGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
+def : Pat<(setle R16C:$rA, R16C:$rB),
+          (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
+          (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
+                    ORr32, CGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
+def : Pat<(setle R32C:$rA, R32C:$rB),
+          (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
+          (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
+def : Pat<(setule R8C:$rA, R8C:$rB),
+          (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def :  Pat<(setule R8C:$rA, immU8:$imm),
+           (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
+                    ORr16, CLGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
+                    CLGTHIr16, CEQHIr16>;
+def : Pat<(setule R16C:$rA, R16C:$rB),
+          (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def :  Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
+           (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
+                    ORr32, CLGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
+def : Pat<(setule R32C:$rA, R32C:$rB),
+          (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
+          (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// select conditional patterns:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                       SPUInstr selinstr, SPUInstr cmpare>:
+  Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+              rclass:$rTrue, rclass:$rFalse),
+      (selinstr rclass:$rTrue, rclass:$rFalse,
+                (cmpare rclass:$rA, rclass:$rB))>;
+
+class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                       PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
+  Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
+              rclass:$rTrue, rclass:$rFalse),
+      (selinstr rclass:$rTrue, rclass:$rFalse,
+                (cmpare rclass:$rA, immpred:$imm))>;
+
+def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
+def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
+def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
+def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
+def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
+def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
+
+def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
+def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
+def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
+def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
+def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
+def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
+
+def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
+def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
+def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
+def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
+def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
+def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
+
+class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                     SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+                     SPUInstr cmpOp2>:
+  Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+              rclass:$rTrue, rclass:$rFalse),
+      (selinstr rclass:$rFalse, rclass:$rTrue,
+                (binop (cmpOp1 rclass:$rA, rclass:$rB),
+                       (cmpOp2 rclass:$rA, rclass:$rB)))>;
+
+class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+                     ValueType inttype,
+                     SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+                     SPUInstr cmpOp2>:
+    Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
+                rclass:$rTrue, rclass:$rFalse),
+        (selinstr rclass:$rFalse, rclass:$rTrue,
+                  (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
+                         (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
+
+def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
+                     SELBr8, ORr8, CGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
+                     SELBr16, ORr16, CGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
+def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
+                     SELBr32, ORr32, CGTIr32, CEQIr32>;
+
+def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
+                     SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
+                     SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
+def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
+                     SELBr32, ORr32, CLGTIr32, CEQIr32>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+let isCall = 1,
+  // All calls clobber the non-callee-saved registers:
+  Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
+          R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
+          R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
+          R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
+          R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
+          R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
+          R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
+          R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
+  // All of these instructions use $lr (aka $0)
+  Uses = [R0]  in {
+  // Branch relative and set link: Used if we actually know that the target
+  // is within [-32768, 32767] bytes of the target
+  def BRSL:
+    BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+      "brsl\t$$lr, $func",
+      [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
+
+  // Branch absolute and set link: Used if we actually know that the target
+  // is an absolute address
+  def BRASL:
+    BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+      "brasl\t$$lr, $func",
+      [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
+
+  // Branch indirect and set link if external data. These instructions are not
+  // actually generated, matched by an intrinsic:
+  def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
+  def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
+  def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
+  def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
+
+  // Branch indirect and set link. This is the "X-form" address version of a
+  // function call
+  def BISL:
+    BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
+}
+
+// Support calls to external symbols:      
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+          (BRSL texternalsym:$func)>;
+      
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+          (BRASL texternalsym:$func)>;
+
+// Unconditional branches:
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  let isBarrier = 1 in {
+    def BR :
+      UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+        "br\t$dest",
+        [(br bb:$dest)]>;
+
+    // Unconditional, absolute address branch
+    def BRA:
+      UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+        "bra\t$dest",
+        [/* no pattern */]>;
+
+    // Indirect branch
+    def BI:
+      BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+  }
+
+  // Conditional branches:
+  class BRNZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+             BranchResolv, pattern>;
+
+  class BRNZRegInst<RegisterClass rclass>:
+    BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+             [(brcond rclass:$rCond, bb:$dest)]>;
+
+  class BRNZVecInst<ValueType vectype>:
+    BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+             [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+  multiclass BranchNotZero {
+    def v4i32 : BRNZVecInst<v4i32>;
+    def r32   : BRNZRegInst<R32C>;
+  }
+
+  defm BRNZ : BranchNotZero;
+
+  class BRZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+             BranchResolv, pattern>;
+
+  class BRZRegInst<RegisterClass rclass>:
+    BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  class BRZVecInst<ValueType vectype>:
+    BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  multiclass BranchZero {
+    def v4i32: BRZVecInst<v4i32>;
+    def r32:   BRZRegInst<R32C>;
+  }
+
+  defm BRZ: BranchZero;
+
+  // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+  // be useful:
+  /*
+  class BINZInst<dag IOL, list<dag> pattern>:
+   BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+  class BINZRegInst<RegisterClass rclass>:
+    BINZInst<(ins rclass:$rA, brtarget:$dest),
+             [(brcond rclass:$rA, R32C:$dest)]>;
+
+  class BINZVecInst<ValueType vectype>:
+    BINZInst<(ins VECREG:$rA, R32C:$dest),
+             [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+  multiclass BranchNotZeroIndirect {
+    def v4i32: BINZVecInst<v4i32>;
+    def r32:   BINZRegInst<R32C>;
+  }
+
+  defm BINZ: BranchNotZeroIndirect;
+
+  class BIZInst<dag IOL, list<dag> pattern>:
+    BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+  class BIZRegInst<RegisterClass rclass>:
+    BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+  class BIZVecInst<ValueType vectype>:
+    BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+  multiclass BranchZeroIndirect {
+    def v4i32: BIZVecInst<v4i32>;
+    def r32:   BIZRegInst<R32C>;
+  }
+
+  defm BIZ: BranchZeroIndirect;
+  */
+
+  class BRHNZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+             pattern>;
+
+  class BRHNZRegInst<RegisterClass rclass>:
+    BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+              [(brcond rclass:$rCond, bb:$dest)]>;
+
+  class BRHNZVecInst<ValueType vectype>:
+    BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+  multiclass BranchNotZeroHalfword {
+    def v8i16: BRHNZVecInst<v8i16>;
+    def r16:   BRHNZRegInst<R16C>;
+  }
+
+  defm BRHNZ: BranchNotZeroHalfword;
+
+  class BRHZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+             pattern>;
+
+  class BRHZRegInst<RegisterClass rclass>:
+    BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  class BRHZVecInst<ValueType vectype>:
+    BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  multiclass BranchZeroHalfword {
+    def v8i16: BRHZVecInst<v8i16>;
+    def r16:   BRHZRegInst<R16C>;
+  }
+
+  defm BRHZ: BranchZeroHalfword;
+}
+
+//===----------------------------------------------------------------------===//
+// setcc and brcond patterns:
+//===----------------------------------------------------------------------===//
+
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+          (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+          (BRHNZr16 R16C:$rA, bb:$dest)>;
+
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+          (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+          (BRNZr32 R32C:$rA, bb:$dest)>;
+
+multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                  (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+                (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+  def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                   (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+                (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+                           SPUInstr orinst32, SPUInstr brinst32>
+{
+  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                  (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+                                      (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+                            bb:$dest)>;
+
+  def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+               (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
+                                   (CEQHr16 R16C:$rA, R16:$rB)),
+                         bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
+                                       (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+                             bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
+                                    (CEQr32 R32C:$rA, R32C:$rB)),
+                          bb:$dest)>;
+}
+
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
+
+multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+  def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                   (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+                (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
+
+multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+                          SPUInstr orinst32, SPUInstr brinst32>
+{
+  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                  (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+                                      (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+                            bb:$dest)>;
+
+  def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+               (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
+                                   (CEQHr16 R16C:$rA, R16:$rB)),
+                         bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
+                                       (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+                             bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
+                                    (CEQr32 R32C:$rA, R32C:$rB)),
+                          bb:$dest)>;
+}
+
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
+
+let isTerminator = 1, isBarrier = 1 in {
+  let isReturn = 1 in {
+    def RET:
+        RETForm<"bi\t$$lr", [(retflag)]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Single precision floating point instructions
+//===----------------------------------------------------------------------===//
+
+class FAInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
+           SPrecFP, pattern>;
+
+class FAVecInst<ValueType vectype>:
+    FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPAdd
+{
+  def v4f32: FAVecInst<v4f32>;
+  def f32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                    [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FA : SFPAdd;
+
+class FSInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
+           SPrecFP, pattern>;
+
+class FSVecInst<ValueType vectype>:
+    FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [(set (vectype VECREG:$rT),
+                 (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPSub
+{
+  def v4f32: FSVecInst<v4f32>;
+  def f32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                    [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FS : SFPSub;
+
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01100011010, OOL, IOL,
+      "fm\t$rT, $rA, $rB", SPrecFP,
+      pattern>;
+
+class FMVecInst<ValueType type>:
+    FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [(set (type VECREG:$rT),
+                 (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+  def v4f32: FMVecInst<v4f32>;
+  def f32:   FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                     [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; 
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fadd (v4f32 VECREG:$rC),
+                  (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+                  (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT,
+            (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+//     = - (a * b - c)
+//     = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+    RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+    RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fsub (v4f32 VECREG:$rC),
+                  (fmul (v4f32 VECREG:$rA),
+                        (v4f32 VECREG:$rB))))]>;
+
+
+
+
+// Floating point reciprocal estimate
+
+class FRESTInst<dag OOL, dag IOL>:
+  RRForm_1<0b00110111000, OOL, IOL,
+           "frest\t$rT, $rA", SPrecFP,
+           [/* no pattern */]>;
+
+def FRESTv4f32 :
+    FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+    FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
+
+// Floating point interpolate (used in conjunction with reciprocal estimate)
+def FIv4f32 :
+    RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "fi\t$rT, $rA, $rB", SPrecFP,
+      [/* no pattern */]>;
+
+def FIf32 :
+    RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fi\t$rT, $rA, $rB", SPrecFP,
+      [/* no pattern */]>;
+
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
+def FCEQf32 :
+    RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fceq\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+          (FCEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMEQf32 :
+    RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fcmeq\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+          (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCGTf32 :
+    RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fcgt\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
+          (FCGTf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMGTf32 :
+    RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fcmgt\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+          (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+          (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+          (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+
+// FP Status and Control Register Write
+// Why isn't rT a don't care in the ISA?
+// Should we create a special RRForm_3 for this guy and zero out the rT?
+def FSCRWf32 :
+    RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
+      "fscrwr\t$rA", SPrecFP,
+      [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
+
+// FP Status and Control Register Read
+def FSCRRf32 :
+    RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
+      "fscrrd\t$rT", SPrecFP,
+      [/* This instruction requires an intrinsic */]>;
+
+// llvm instruction space
+// How do these map onto cell instructions?
+// fdiv rA rB
+//   frest rC rB        # c = 1/b (both lines)
+//   fi rC rB rC
+//   fm rD rA rC        # d = a * 1/b
+//   fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
+//   fma rB rB rC rD            # b = b * c + d
+//                              = -(d *b -a) * c + d
+//                              = a * c - c ( a *b *c - a)
+
+// fcopysign (???)
+
+// Library calls:
+// These llvm instructions will actually map to library calls.
+// All that's needed, then, is to check that the appropriate library is
+// imported and do a brsl to the proper function name.
+// frem # fmod(x, y): x - (x/y) * y
+// (Note: fmod(double, double), fmodf(float,float)
+// fsqrt?
+// fsin?
+// fcos?
+// Unimplemented SPU instruction space
+// floating reciprocal absolute square root estimate (frsqest)
+
+// The following are probably just intrinsics
+// status and control register write
+// status and control register read
+
+//--------------------------------------
+// Floating Point Conversions
+// Signed conversions:
+def CSiFv4f32:
+    CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "csflt\t$rT, $rA, 0", SPrecFP,
+      [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
+
+// Convert signed integer to floating point
+def CSiFf32 :
+    CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
+      "csflt\t$rT, $rA, 0", SPrecFP,
+      [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
+
+// Convert unsigned into to float
+def CUiFv4f32 :
+    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cuflt\t$rT, $rA, 0", SPrecFP,
+      [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
+
+def CUiFf32 :
+    CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
+      "cuflt\t$rT, $rA, 0", SPrecFP,
+      [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
+
+// Convert float to unsigned int
+// Assume that scale = 0
+
+def CFUiv4f32 :
+    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cfltu\t$rT, $rA, 0", SPrecFP,
+      [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
+
+def CFUif32 :
+    CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+      "cfltu\t$rT, $rA, 0", SPrecFP,
+      [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
+
+// Convert float to signed int
+// Assume that scale = 0
+
+def CFSiv4f32 :
+    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cflts\t$rT, $rA, 0", SPrecFP,
+      [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
+
+def CFSif32 :
+    CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+      "cflts\t$rT, $rA, 0", SPrecFP,
+      [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
+
+//===----------------------------------------------------------------------==//
+// Single<->Double precision conversions
+//===----------------------------------------------------------------------==//
+
+// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
+// v4f32, output is v2f64--which goes in the name?)
+
+// Floating point extend single to double
+// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
+// operates on two double-word slots (i.e. 1st and 3rd fp numbers
+// are ignored).
+def FESDvec :
+    RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "fesd\t$rT, $rA", SPrecFP,
+      [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
+
+def FESDf32 :
+    RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
+      "fesd\t$rT, $rA", SPrecFP,
+      [(set R64FP:$rT, (fextend R32FP:$rA))]>;
+
+// Floating point round double to single
+//def FRDSvec :
+//    RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+//      "frds\t$rT, $rA,", SPrecFP,
+//      [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
+
+def FRDSf64 :
+    RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
+      "frds\t$rT, $rA", SPrecFP,
+      [(set R32FP:$rT, (fround R64FP:$rA))]>;
+
+//ToDo include anyextend?
+
+//===----------------------------------------------------------------------==//
+// Double precision floating point instructions
+//===----------------------------------------------------------------------==//
+def FAf64 :
+    RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+      "dfa\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
+
+def FAv2f64 :
+    RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "dfa\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FSf64 :
+    RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+      "dfs\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
+
+def FSv2f64 :
+    RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "dfs\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMf64 :
+    RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+      "dfm\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
+
+def FMv2f64:
+    RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "dfm\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMAf64:
+    RRForm<0b00111010110, (outs R64FP:$rT),
+                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+      "dfma\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FMAv2f64:
+    RRForm<0b00111010110, (outs VECREG:$rT),
+                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "dfma\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fadd (v2f64 VECREG:$rC),
+                  (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FMSf64 :
+    RRForm<0b10111010110, (outs R64FP:$rT),
+                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+      "dfms\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FMSv2f64 :
+    RRForm<0b10111010110, (outs VECREG:$rT),
+                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "dfms\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
+                  (v2f64 VECREG:$rC)))]>;
+
+// DFNMS: - (a * b - c)
+// - (a * b) + c => c - (a * b)
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+           DPrecFP, pattern>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+class DFNMSVecInst<list<dag> pattern>:
+    DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+              pattern>;
+
+class DFNMSRegInst<list<dag> pattern>:
+    DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+             pattern>;
+
+multiclass DFMultiplySubtract
+{
+  def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), 
+                                 (fsub (v2f64 VECREG:$rC),
+                                       (fmul (v2f64 VECREG:$rA),
+                                             (v2f64 VECREG:$rB))))]>;
+
+  def f64 : DFNMSRegInst<[(set R64FP:$rT,
+                               (fsub R64FP:$rC,
+                                     (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
+
+// - (a * b + c)
+// - (a * b) - c
+def FNMAf64 :
+    RRForm<0b11111010110, (outs R64FP:$rT),
+                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+      "dfnma\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FNMAv2f64 :
+    RRForm<0b11111010110, (outs VECREG:$rT),
+                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "dfnma\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fneg (fadd (v2f64 VECREG:$rC),
+                        (fmul (v2f64 VECREG:$rA),
+                              (v2f64 VECREG:$rB)))))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+//===----------------------------------------------------------------------==//
+// Floating point negation and absolute value
+//===----------------------------------------------------------------------==//
+
+def : Pat<(fneg (v4f32 VECREG:$rA)),
+          (XORfnegvec (v4f32 VECREG:$rA),
+                      (v4f32 (ILHUv4i32 0x8000)))>;
+
+def : Pat<(fneg R32FP:$rA),
+          (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
+
+// Floating point absolute value
+// Note: f64 fabs is custom-selected.
+
+def : Pat<(fabs R32FP:$rA),
+          (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
+
+def : Pat<(fabs (v4f32 VECREG:$rA)),
+          (ANDfabsvec (v4f32 VECREG:$rA),
+                      (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
+
+//===----------------------------------------------------------------------===//
+// Hint for branch instructions:
+//===----------------------------------------------------------------------===//
+def HBRA :
+    HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
+
+//===----------------------------------------------------------------------===//
+// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
+// in the odd pipeline)
+//===----------------------------------------------------------------------===//
+
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
+  let Pattern = [];
+
+  let Inst{0-10} = 0b10000000010;
+  let Inst{11-17} = 0;
+  let Inst{18-24} = 0;
+  let Inst{25-31} = 0;
+}
+
+def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
+  let Pattern = [];
+
+  let Inst{0-10} = 0b10000000000;
+  let Inst{11-17} = 0;
+  let Inst{18-24} = 0;
+  let Inst{25-31} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit conversions (type conversions between vector/packed types)
+// NOTE: Promotions are handled using the XS* instructions.
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
+
+def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+
+def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
+          (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
+          (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
+          (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
+          (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
+          (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
+          (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+          (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+          (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+          (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+          (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction patterns:
+//===----------------------------------------------------------------------===//
+
+// General 32-bit constants:
+def : Pat<(i32 imm:$imm),
+          (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Single precision float constants:
+def : Pat<(f32 fpimm:$imm),
+          (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
+
+// General constant 32-bit vectors
+def : Pat<(v4i32 v4i32Imm:$imm),
+          (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
+                     (LO16_vec v4i32Imm:$imm))>;
+
+// 8-bit constants
+def : Pat<(i8 imm:$imm),
+          (ILHr8 imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Zero/Any/Sign extensions
+//===----------------------------------------------------------------------===//
+
+// sext 8->32: Sign extend bytes to words
+def : Pat<(sext_inreg R32C:$rSrc, i8),
+          (XSHWr32 (XSBHr32 R32C:$rSrc))>;
+
+def : Pat<(i32 (sext R8C:$rSrc)),
+          (XSHWr16 (XSBHr8 R8C:$rSrc))>;
+
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+          (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+          
+def : Pat<(i64 (sext R8C:$rSrc)),
+          (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
+// zext 8->16: Zero extend bytes to halfwords
+def : Pat<(i16 (zext R8C:$rSrc)),
+          (ANDHIi8i16 R8C:$rSrc, 0xff)>;
+
+// zext 8->32: Zero extend bytes to words
+def : Pat<(i32 (zext R8C:$rSrc)),
+          (ANDIi8i32 R8C:$rSrc, 0xff)>;
+
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+          (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+                                    (COPY_TO_REGCLASS 
+                                       (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
+                                    0x4),
+                                  (ILv4i32 0x0),
+                                  (FSMBIv4i32 0x0f0f)), R64C)>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
+def : Pat<(i16 (anyext R8C:$rSrc)),
+          (ORHIi8i16 R8C:$rSrc, 0)>;
+
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
+def : Pat<(i32 (anyext R8C:$rSrc)),
+          (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
+
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+          (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+          
+def : Pat<(sext R16C:$rSrc),
+          (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
+// zext 16->32: Zero extend halfwords to words
+def : Pat<(i32 (zext R16C:$rSrc)),
+          (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
+          (ANDIi16i32 R16C:$rSrc, 0xf)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
+          (ANDIi16i32 R16C:$rSrc, 0xff)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
+          (ANDIi16i32 R16C:$rSrc, 0xfff)>;
+
+// anyext 16->32: Extend 16->32 bits, irrespective of sign
+def : Pat<(i32 (anyext R16C:$rSrc)),
+          (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBgprc GPRC:$src, GPRC:$src,
+                       (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv2i64_m32
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv4i32_m32
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv4i32_m32
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBgprc GPRC:$src, GPRC:$src,
+                       (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv2i64_m32
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv4i32_m32
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBgprc GPRC:$src, GPRC:$src,
+                       (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv2i64_m32
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Address generation: SPU, like PPC, has to split addresses into high and
+// low parts in order to load them into a register.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(SPUaform tglobaladdr:$in, 0),  (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0),   (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0),   (ILAlsa  tconstpool:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
+                       (SPUlo tglobaladdr:$in, 0)),
+          (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
+                       (SPUlo texternalsym:$in, 0)),
+          (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
+                       (SPUlo tjumptable:$in, 0)),
+          (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
+                       (SPUlo tconstpool:$in, 0)),
+          (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
+          (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
+          (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
+          (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
+          (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+// Intrinsics:
+include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
new file mode 100644
index 000000000000..3ef3ccbcaaee
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
@@ -0,0 +1,49 @@
+//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_MACHINE_FUNCTION_INFO_H
+#define SPU_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SPUFunctionInfo - Cell SPU target-specific information for each
+/// MachineFunction
+class SPUFunctionInfo : public MachineFunctionInfo {
+private:
+  /// UsesLR - Indicates whether LR is used in the current function.
+  ///
+  bool UsesLR;
+
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+public:
+  SPUFunctionInfo(MachineFunction& MF) 
+  : UsesLR(false),
+    VarArgsFrameIndex(0)
+  {}
+
+  void setUsesLR(bool U) { UsesLR = U; }
+  bool usesLR()          { return UsesLR; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 000000000000..ed7129e33291
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,97 @@
+//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//
+//                     Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+          (ORv4i32
+           (ANDv4i32
+            (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+                       (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+                                             (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+                       (FSMBIv8i16 0x2222)),
+            (ILAv4i32 0x0000ffff)),
+           (SHLIv4i32
+            (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+                                 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+                       (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+                                             (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+                       (FSMBIv8i16 0x2222)), 16))>;
+                        
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+                     (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+                     (FSMBIv8i16 0xcccc))>;
+                 
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+  Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+      (Av4i32
+        (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
+                       (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
+        (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
+
+def MPYi32:
+  Pat<(mul R32C:$rA, R32C:$rB),
+      (Ar32
+        (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+              (MPYHr32 R32C:$rB, R32C:$rA)),
+        (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+                               Interpf32.Fragment,
+                               DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+          (SELBf32_cond NRaphf32.Fragment,
+                        Epsilonf32.Fragment,
+                        (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+                                              (v4f32 VECREG:$rB),
+                                              (v4f32 VECREG:$rA)),
+                                   Interpv4f32.Fragment,
+                                   DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+          (SELBv4f32_cond NRaphv4f32.Fragment,
+                        Epsilonv4f32.Fragment,
+                        (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+                                              Epsilonv4f32.Fragment,
+                                              (v4f32 VECREG:$rA)), -1))>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNodes.td b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
new file mode 100644
index 000000000000..a6e621f36b35
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
@@ -0,0 +1,159 @@
+//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Type profiles and SelectionDAG nodes used by CellSPU
+//
+//===----------------------------------------------------------------------===//
+
+// Type profile for a call sequence
+def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+// SPU_GenControl: Type profile for generating control words for insertions
+def SPU_GenControl : SDTypeProfile<1, 1, []>;
+def SPUshufmask    : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPUCallSeq,
+                           [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+//===----------------------------------------------------------------------===//
+// Operand constraints:
+//===----------------------------------------------------------------------===//
+
+def SDT_SPUCall   : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SPUcall       : SDNode<"SPUISD::CALL", SDT_SPUCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
+// Operand type constraints for vector shuffle/permute operations
+def SDT_SPUshuffle   : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
+
+def SPUVecBinop: SDTypeProfile<1, 2, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
+// vector types (this avoids having to bit_convert all over the place.)
+def SPUselmask_type: SDTypeProfile<1, 1, [
+  SDTCisInt<1>
+]>;
+
+// SELB type constraints:
+def SPUselb_type: SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
+
+// SPU Vector shift pseudo-instruction type constraints
+def SPUvecshift_type: SDTypeProfile<1, 2, [
+  SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+  SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
+//===----------------------------------------------------------------------===//
+// Synthetic/pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+// SPU CNTB:
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
+
+// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
+// SPUISelLowering.h):
+def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
+
+// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
+def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
+
+def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
+def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
+
+// Vector rotate left, bits shifted out of the left are rotated in on the right
+def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
+                             SPUvecshift_type, []>;
+
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+                                   SPUvecshift_type>;
+
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
+// SPU form select mask for bytes, immediate
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
+
+// SPU select bits instruction
+def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
+
+def SPU_vec_demote   : SDTypeProfile<1, 1, []>;
+def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
+
+// Address high and low components, used for [r+r] type addressing
+def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
+def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
+
+// PC-relative address
+def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
+// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
+def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
+//===----------------------------------------------------------------------===//
+// Constraints: (taken from PPCInstrInfo.td)
+//===----------------------------------------------------------------------===//
+
+class RegConstraint<string C> {
+  string Constraints = C;
+}
+
+class NoEncode<string E> {
+  string DisableEncoding = E;
+}
+
+//===----------------------------------------------------------------------===//
+// Return (flag isn't quite what it means: the operations are flagged so that
+// instruction scheduling doesn't disassociate them.)
+//===----------------------------------------------------------------------===//
+
+def retflag     : SDNode<"SPUISD::RET_FLAG", SDTNone,
+                         [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 000000000000..e2bd2d7f4100
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to 
+// satisfy the dual issue requirements. The actual dual issue scheduling is 
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  struct SPUNopFiller : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+    const InstrItineraryData *IID;
+    bool isEvenPlace;  // the instruction slot (mem address) at hand is even/odd
+
+    static char ID;
+    SPUNopFiller(TargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), 
+        IID(tm.getInstrItineraryData()) 
+    {
+      DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+    }
+
+    virtual const char *getPassName() const {
+      return "SPU nop/lnop Filler";
+    }
+
+    void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    bool runOnMachineFunction(MachineFunction &F) {
+      isEvenPlace = true; //all functions get an .align 3 directive at start 
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        runOnMachineBasicBlock(*FI);
+      return true; //never-ever do any more modifications, just print it!
+    }
+
+    typedef enum { none   = 0, // no more instructions in this function / BB
+                   pseudo = 1, // this does not get executed
+                   even   = 2, 
+                   odd    = 3 } SPUOpPlace;
+    SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+  };
+  char SPUNopFiller::ID = 0;
+
+} 
+
+// Fill a BasicBlock to alignment. 
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define 
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB) 
+{
+  assert( isEvenPlace && "basic block start from odd address");
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+  {
+    SPUOpPlace this_optype, next_optype;
+    MachineBasicBlock::iterator J = I;
+    J++;
+
+    this_optype = getOpPlacement( *I );
+    next_optype = none;
+    while (J!=MBB.end()){
+      next_optype = getOpPlacement( *J );
+      ++J;
+      if (next_optype != pseudo ) 
+        break;
+    }
+
+    // padd: odd(wrong), even(wrong), ...
+    // to:   nop(corr), odd(corr), even(corr)...
+    if( isEvenPlace && this_optype == odd && next_optype == even ) {
+      DEBUG( dbgs() <<"Adding NOP before: "; );
+      DEBUG( I->dump(); );
+      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+      isEvenPlace=false;
+    }
+    
+    // padd: even(wrong), odd(wrong), ...
+    // to:   lnop(corr), even(corr), odd(corr)...
+    else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+      DEBUG( dbgs() <<"Adding LNOP before: "; );
+      DEBUG( I->dump(); );
+      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+      isEvenPlace=true;
+    }
+      
+    // now go to next mem slot
+    if( this_optype != pseudo )
+      isEvenPlace = !isEvenPlace;    
+
+  }
+
+  // padd basicblock end
+  if( !isEvenPlace ){
+    MachineBasicBlock::iterator J = MBB.end();
+    J--;
+    if (getOpPlacement( *J ) == odd) {
+      DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+      BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+    }  
+    else {
+      J++;
+      DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+      BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+    }
+    isEvenPlace=true;
+  }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+  return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace 
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+  int sc = instr.getDesc().getSchedClass();
+  const InstrStage *stage = IID->beginStage(sc);
+  unsigned FUs = stage->getUnits();
+  SPUOpPlace retval;
+
+  switch( FUs ) {
+    case 0: retval = pseudo; break;
+    case 1: retval = odd;    break;
+    case 2: retval = even;   break;
+    default: retval= pseudo; 
+             assert( false && "got unknown FuncUnit\n");
+             break;
+  };
+  return retval;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUOperands.td b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
new file mode 100644
index 000000000000..96cde51709ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
@@ -0,0 +1,664 @@
+//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+// Cell SPU Instruction Operands:
+//===----------------------------------------------------------------------===//
+
+// TO_IMM32 - Convert an i8/i16 to i32.
+def TO_IMM32 : SDNodeXForm<imm, [{
+  return getI32Imm(N->getZExtValue());
+}]>;
+
+// TO_IMM16 - Convert an i8/i32 to i16.
+def TO_IMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
+}]>;
+
+
+def LO16 : SDNodeXForm<imm, [{
+  unsigned val = N->getZExtValue();
+  // Transformation function: get the low 16 bits.
+  return getI32Imm(val & 0xffff);
+}]>;
+
+def LO16_vec : SDNodeXForm<scalar_to_vector, [{
+  SDValue OpVal(0, 0);
+
+  // Transformation function: get the low 16 bit immediate from a build_vector
+  // node.
+  assert(N->getOpcode() == ISD::BUILD_VECTOR
+         && "LO16_vec got something other than a BUILD_VECTOR");
+
+  // Get first constant operand...
+  for (unsigned i = 0, e = N->getNumOperands();
+       OpVal.getNode() == 0 && i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+  }
+  
+  assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
+  ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+  return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
+}]>;
+
+// Transform an immediate, returning the high 16 bits shifted down:
+def HI16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Transformation function: shift the high 16 bit immediate from a build_vector
+// node into the low 16 bits, and return a 16-bit constant.
+def HI16_vec : SDNodeXForm<scalar_to_vector, [{
+  SDValue OpVal(0, 0);
+
+  assert(N->getOpcode() == ISD::BUILD_VECTOR
+         && "HI16_vec got something other than a BUILD_VECTOR");
+  
+  // Get first constant operand...
+  for (unsigned i = 0, e = N->getNumOperands();
+       OpVal.getNode() == 0 && i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+  }
+  
+  assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
+  ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+  return getI32Imm((unsigned)CN->getZExtValue() >> 16);
+}]>;
+
+// simm7 predicate - True if the immediate fits in an 7-bit signed
+// field.
+def simm7: PatLeaf<(imm), [{
+  int sextVal = int(N->getSExtValue());
+  return (sextVal >= -64 && sextVal <= 63);
+}]>;
+
+// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
+// field.
+def uimm7: PatLeaf<(imm), [{
+  return (N->getZExtValue() <= 0x7f);
+}]>;
+
+// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
+// field.
+def immSExt8  : PatLeaf<(imm), [{
+  int Value = int(N->getSExtValue());
+  return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
+}]>;
+
+// immU8: immediate, unsigned 8-bit quantity
+def immU8 : PatLeaf<(imm), [{
+  return (N->getZExtValue() <= 0xff);
+}]>;
+
+// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
+// extended field.  Used by RI10Form instructions like 'ldq'.
+def i32ImmSExt10  : PatLeaf<(imm), [{
+  return isI32IntS10Immediate(N);
+}]>;
+
+// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
+// field.  Used by RI10Form instructions like 'ldq'.
+def i32ImmUns10  : PatLeaf<(imm), [{
+  return isI32IntU10Immediate(N);
+}]>;
+
+// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
+// extended field.  Used by RI10Form instructions like 'ldq'.
+def i16ImmSExt10  : PatLeaf<(imm), [{
+  return isI16IntS10Immediate(N);
+}]>;
+
+// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
+// value. Used by RI10Form instructions.
+def i16ImmUns10 : PatLeaf<(imm), [{
+  return isI16IntU10Immediate(N);
+}]>;
+
+def immSExt16  : PatLeaf<(imm), [{
+  // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  short Ignored;
+  return isIntS16Immediate(N, Ignored);
+}]>;
+
+def immZExt16  : PatLeaf<(imm), [{
+  // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+  // field.
+  return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+def immU16 : PatLeaf<(imm), [{
+  // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
+  return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
+}]>;
+
+def imm18  : PatLeaf<(imm), [{
+  // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
+  int Value = (int) N->getZExtValue();
+  return isUInt<18>(Value); 
+}]>;
+
+def lo16 : PatLeaf<(imm), [{
+  // lo16 predicate - returns true if the immediate has all zeros in the
+  // low order bits and is a 32-bit constant:
+  if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = N->getZExtValue();
+    return ((val & 0x0000ffff) == val);
+  }
+
+  return false;
+}], LO16>;
+
+def hi16 : PatLeaf<(imm), [{
+  // hi16 predicate - returns true if the immediate has all zeros in the
+  // low order bits and is a 32-bit constant:
+  if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = uint32_t(N->getZExtValue());
+    return ((val & 0xffff0000) == val);
+  } else if (N->getValueType(0) == MVT::i64) {
+    uint64_t val = N->getZExtValue();
+    return ((val & 0xffff0000ULL) == val);
+  }
+
+  return false;
+}], HI16>;
+
+def bitshift : PatLeaf<(imm), [{
+  // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
+  // (shift left quadword by bits immediate)
+  int64_t Val = N->getZExtValue();
+  return (Val > 0 && Val <= 7);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating point operands:
+//===----------------------------------------------------------------------===//
+
+// Transform a float, returning the high 16 bits shifted down, as if
+// the float was really an unsigned integer:
+def HI16_f32 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm(FloatToBits(fval) >> 16);
+}]>;
+
+// Transformation function on floats: get the low 16 bits as if the float was
+// an unsigned integer.
+def LO16_f32 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm(FloatToBits(fval) & 0xffff);
+}]>;
+
+def FPimm_sext16 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
+}]>;
+
+def FPimm_u18 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
+}]>;
+
+def fpimmSExt16 : PatLeaf<(fpimm), [{
+  short Ignored;
+  return isFPS16Immediate(N, Ignored);  
+}], FPimm_sext16>;
+
+// Does the SFP constant only have upp 16 bits set?
+def hi16_f32 : PatLeaf<(fpimm), [{
+  if (N->getValueType(0) == MVT::f32) {
+    uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
+    return ((val & 0xffff0000) == val);
+  }
+
+  return false;
+}], HI16_f32>;
+
+// Does the SFP constant fit into 18 bits?
+def fpimm18  : PatLeaf<(fpimm), [{
+  if (N->getValueType(0) == MVT::f32) {
+    uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
+    return isUInt<18>(Value);
+  }
+
+  return false;
+}], FPimm_u18>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands (TODO):
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// build_vector operands:
+//===----------------------------------------------------------------------===//
+
+// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8SExt8Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8SExt8Imm_xform>;
+
+// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8U8Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8U8Imm_xform>;
+
+// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt8Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt8Imm_xform>;
+
+// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt10Imm_xform>;
+
+// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
+// immediate constant load for v8i16 vectors.
+def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v8i16Uns10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns10Imm_xform>;
+
+// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt16Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns16Imm_xform>;
+
+// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt10Imm_xform>;
+
+// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v4i32Uns10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns10Imm_xform>;
+
+// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt16Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt16Imm_xform>;
+
+// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v4i32Uns18Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns18Imm_xform>;
+
+// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
+// load.
+def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
+  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
+}]>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec: PatLeaf<(build_vector), [{
+  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v4i32_get_imm: SDNodeXForm<build_vector, [{
+  return SPU::get_v4i32_imm(N, *CurDAG);
+}]>;
+
+def v4i32Imm: PatLeaf<(build_vector), [{
+  return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
+}], v4i32_get_imm>;
+
+// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt10Imm_xform>;
+
+// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt16Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt16Imm_xform>;
+
+// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v2i64 vectors.
+def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v2i64Uns18Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64Uns18Imm_xform>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec_i64: PatLeaf<(build_vector), [{
+  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v2i64_get_imm: SDNodeXForm<build_vector, [{
+  return SPU::get_v2i64_imm(N, *CurDAG);
+}]>;
+
+def v2i64Imm: PatLeaf<(build_vector), [{
+  return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
+}], v2i64_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+
+def s7imm: Operand<i8> {
+  let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
+  let PrintMethod = "printS7ImmOperand";
+}
+
+def u7imm: Operand<i16> {
+  let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i8: Operand<i8> {
+  let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i32: Operand<i32> {
+  let PrintMethod = "printU7ImmOperand";
+}
+
+// Halfword, signed 10-bit constant
+def s10imm : Operand<i16> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i8: Operand<i8> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i32: Operand<i32> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i64: Operand<i64> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+// Unsigned 10-bit integers:
+def u10imm: Operand<i16> {
+  let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i8: Operand<i8> {
+  let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i32: Operand<i32> {
+  let PrintMethod = "printU10ImmOperand";
+}
+
+def s16imm  : Operand<i16> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i8: Operand<i8> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i32: Operand<i32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i64: Operand<i64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f32: Operand<f32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f64: Operand<f64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def u16imm_i64 : Operand<i64> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm_i32 : Operand<i32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def f16imm : Operand<f32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def s18imm  : Operand<i32> {
+  let PrintMethod = "printS18ImmOperand";
+}
+
+def u18imm : Operand<i32> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+def u18imm_i64 : Operand<i64> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm : Operand<f32> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm_f64 : Operand<f64> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+// Negated 7-bit halfword rotate immediate operands
+def rothNeg7imm : Operand<i32> {
+  let PrintMethod = "printROTHNeg7Imm";
+}
+
+def rothNeg7imm_i16 : Operand<i16> {
+  let PrintMethod = "printROTHNeg7Imm";
+}
+
+// Negated 7-bit word rotate immediate operands
+def rotNeg7imm : Operand<i32> {
+  let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i16 : Operand<i16> {
+  let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i8 : Operand<i8> {
+  let PrintMethod = "printROTNeg7Imm";
+}
+
+def target : Operand<OtherVT> {
+  let PrintMethod = "printBranchOperand";
+}
+
+// Absolute address call target
+def calltarget : Operand<iPTR> {
+  let PrintMethod = "printCallOperand";
+  let MIOperandInfo = (ops u18imm:$calldest);
+}
+
+// PC relative call target
+def relcalltarget : Operand<iPTR> {
+  let PrintMethod = "printPCRelativeOperand";
+  let MIOperandInfo = (ops s16imm:$calldest);
+}
+
+// Branch targets:
+def brtarget : Operand<OtherVT> {
+  let PrintMethod = "printPCRelativeOperand";
+}
+
+// Hint for branch target
+def hbrtarget : Operand<OtherVT> {
+  let PrintMethod = "printHBROperand";
+}
+
+// Indirect call target
+def indcalltarget : Operand<iPTR> {
+  let PrintMethod = "printCallOperand";
+  let MIOperandInfo = (ops ptr_rc:$calldest);
+}
+
+def symbolHi: Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+}
+
+def symbolLo: Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+}
+
+def symbolLSA: Operand<i32> {
+  let PrintMethod = "printSymbolLSA";
+}
+
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+  let PrintMethod = "printShufAddr";
+  let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
+}
+
+// memory s10imm(reg) operand
+def dformaddr : Operand<iPTR> {
+  let PrintMethod = "printDFormAddr";
+  let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
+}
+
+// 256K local store address
+// N.B.: The tblgen code generator expects to have two operands, an offset
+// and a pointer. Of these, only the immediate is actually used.
+def addr256k : Operand<iPTR> {
+  let PrintMethod = "printAddr256K";
+  let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
+}
+
+// memory s18imm(reg) operand
+def memri18 : Operand<iPTR> {
+  let PrintMethod = "printMemRegImmS18";
+  let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
+}
+
+// memory register + register operand
+def memrr : Operand<iPTR> {
+  let PrintMethod = "printMemRegReg";
+  let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
+}
+
+// Define SPU-specific addressing modes: These come in three basic
+// flavors:
+//
+// D-form   : [r+I10] (10-bit signed offset + reg)
+// X-form   : [r+r]   (reg+reg)
+// A-form   : abs     (256K LSA offset)
+// D-form(2): [r+I7]  (7-bit signed offset + reg)
+
+def dform_addr   : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+                                  [], [SDNPWantRoot]>;
+def xform_addr   : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+                                  [], [SDNPWantRoot]>;
+def aform_addr   : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+                                  [], [SDNPWantRoot]>;
+def dform2_addr  : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+                                  [], [SDNPWantRoot]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
new file mode 100644
index 000000000000..19896c0b4be9
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -0,0 +1,373 @@
+//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUSubtarget.h"
+#include "SPUMachineFunction.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  using namespace SPU;
+  switch (RegEnum) {
+  case SPU::R0: return 0;
+  case SPU::R1: return 1;
+  case SPU::R2: return 2;
+  case SPU::R3: return 3;
+  case SPU::R4: return 4;
+  case SPU::R5: return 5;
+  case SPU::R6: return 6;
+  case SPU::R7: return 7;
+  case SPU::R8: return 8;
+  case SPU::R9: return 9;
+  case SPU::R10: return 10;
+  case SPU::R11: return 11;
+  case SPU::R12: return 12;
+  case SPU::R13: return 13;
+  case SPU::R14: return 14;
+  case SPU::R15: return 15;
+  case SPU::R16: return 16;
+  case SPU::R17: return 17;
+  case SPU::R18: return 18;
+  case SPU::R19: return 19;
+  case SPU::R20: return 20;
+  case SPU::R21: return 21;
+  case SPU::R22: return 22;
+  case SPU::R23: return 23;
+  case SPU::R24: return 24;
+  case SPU::R25: return 25;
+  case SPU::R26: return 26;
+  case SPU::R27: return 27;
+  case SPU::R28: return 28;
+  case SPU::R29: return 29;
+  case SPU::R30: return 30;
+  case SPU::R31: return 31;
+  case SPU::R32: return 32;
+  case SPU::R33: return 33;
+  case SPU::R34: return 34;
+  case SPU::R35: return 35;
+  case SPU::R36: return 36;
+  case SPU::R37: return 37;
+  case SPU::R38: return 38;
+  case SPU::R39: return 39;
+  case SPU::R40: return 40;
+  case SPU::R41: return 41;
+  case SPU::R42: return 42;
+  case SPU::R43: return 43;
+  case SPU::R44: return 44;
+  case SPU::R45: return 45;
+  case SPU::R46: return 46;
+  case SPU::R47: return 47;
+  case SPU::R48: return 48;
+  case SPU::R49: return 49;
+  case SPU::R50: return 50;
+  case SPU::R51: return 51;
+  case SPU::R52: return 52;
+  case SPU::R53: return 53;
+  case SPU::R54: return 54;
+  case SPU::R55: return 55;
+  case SPU::R56: return 56;
+  case SPU::R57: return 57;
+  case SPU::R58: return 58;
+  case SPU::R59: return 59;
+  case SPU::R60: return 60;
+  case SPU::R61: return 61;
+  case SPU::R62: return 62;
+  case SPU::R63: return 63;
+  case SPU::R64: return 64;
+  case SPU::R65: return 65;
+  case SPU::R66: return 66;
+  case SPU::R67: return 67;
+  case SPU::R68: return 68;
+  case SPU::R69: return 69;
+  case SPU::R70: return 70;
+  case SPU::R71: return 71;
+  case SPU::R72: return 72;
+  case SPU::R73: return 73;
+  case SPU::R74: return 74;
+  case SPU::R75: return 75;
+  case SPU::R76: return 76;
+  case SPU::R77: return 77;
+  case SPU::R78: return 78;
+  case SPU::R79: return 79;
+  case SPU::R80: return 80;
+  case SPU::R81: return 81;
+  case SPU::R82: return 82;
+  case SPU::R83: return 83;
+  case SPU::R84: return 84;
+  case SPU::R85: return 85;
+  case SPU::R86: return 86;
+  case SPU::R87: return 87;
+  case SPU::R88: return 88;
+  case SPU::R89: return 89;
+  case SPU::R90: return 90;
+  case SPU::R91: return 91;
+  case SPU::R92: return 92;
+  case SPU::R93: return 93;
+  case SPU::R94: return 94;
+  case SPU::R95: return 95;
+  case SPU::R96: return 96;
+  case SPU::R97: return 97;
+  case SPU::R98: return 98;
+  case SPU::R99: return 99;
+  case SPU::R100: return 100;
+  case SPU::R101: return 101;
+  case SPU::R102: return 102;
+  case SPU::R103: return 103;
+  case SPU::R104: return 104;
+  case SPU::R105: return 105;
+  case SPU::R106: return 106;
+  case SPU::R107: return 107;
+  case SPU::R108: return 108;
+  case SPU::R109: return 109;
+  case SPU::R110: return 110;
+  case SPU::R111: return 111;
+  case SPU::R112: return 112;
+  case SPU::R113: return 113;
+  case SPU::R114: return 114;
+  case SPU::R115: return 115;
+  case SPU::R116: return 116;
+  case SPU::R117: return 117;
+  case SPU::R118: return 118;
+  case SPU::R119: return 119;
+  case SPU::R120: return 120;
+  case SPU::R121: return 121;
+  case SPU::R122: return 122;
+  case SPU::R123: return 123;
+  case SPU::R124: return 124;
+  case SPU::R125: return 125;
+  case SPU::R126: return 126;
+  case SPU::R127: return 127;
+  default:
+    report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
+  }
+}
+
+SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
+                                 const TargetInstrInfo &tii) :
+  SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii)
+{
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return &SPU::R32CRegClass;
+}
+
+const unsigned *
+SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+  // Cell ABI calling convention
+  static const unsigned SPU_CalleeSaveRegs[] = {
+    SPU::R80, SPU::R81, SPU::R82, SPU::R83,
+    SPU::R84, SPU::R85, SPU::R86, SPU::R87,
+    SPU::R88, SPU::R89, SPU::R90, SPU::R91,
+    SPU::R92, SPU::R93, SPU::R94, SPU::R95,
+    SPU::R96, SPU::R97, SPU::R98, SPU::R99,
+    SPU::R100, SPU::R101, SPU::R102, SPU::R103,
+    SPU::R104, SPU::R105, SPU::R106, SPU::R107,
+    SPU::R108, SPU::R109, SPU::R110, SPU::R111,
+    SPU::R112, SPU::R113, SPU::R114, SPU::R115,
+    SPU::R116, SPU::R117, SPU::R118, SPU::R119,
+    SPU::R120, SPU::R121, SPU::R122, SPU::R123,
+    SPU::R124, SPU::R125, SPU::R126, SPU::R127,
+    SPU::R2,    /* environment pointer */
+    SPU::R1,    /* stack pointer */
+    SPU::R0,    /* link register */
+    0 /* end */
+  };
+
+  return SPU_CalleeSaveRegs;
+}
+
+/*!
+ R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
+ generally unused) are the Cell's reserved registers
+ */
+BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(SPU::R0);                // LR
+  Reserved.set(SPU::R1);                // SP
+  Reserved.set(SPU::R2);                // environment pointer
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+//--------------------------------------------------------------------------
+void
+SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                               MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator I)
+  const
+{
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+void
+SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                                     RegScavenger *RS) const
+{
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  DebugLoc dl = II->getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  MachineOperand &SPOp = MI.getOperand(i);
+  int FrameIndex = SPOp.getIndex();
+
+  // Now add the frame object offset to the offset from r1.
+  int Offset = MFI->getObjectOffset(FrameIndex);
+
+  // Most instructions, except for generated FrameIndex additions using AIr32
+  // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+  // immediate in operand 2.
+  unsigned OpNo = 1;
+  if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+    OpNo = 2;
+
+  MachineOperand &MO = MI.getOperand(OpNo);
+
+  // Offset is biased by $lr's slot at the bottom.
+  Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
+  assert((Offset & 0xf) == 0
+         && "16-byte alignment violated in eliminateFrameIndex");
+
+  // Replace the FrameIndex with base register with $sp (aka $r1)
+  SPOp.ChangeToRegister(SPU::R1, false);
+
+  // if 'Offset' doesn't fit to the D-form instruction's
+  // immediate, convert the instruction to X-form
+  // if the instruction is not an AI (which takes a s10 immediate), assume
+  // it is a load/store that can take a s14 immediate
+  if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
+      || !isInt<14>(Offset)) {
+    int newOpcode = convertDFormToXForm(MI.getOpcode());
+    unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+    BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+        .addImm(Offset);
+    BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+        .addReg(tmpReg, RegState::Kill)
+        .addReg(SPU::R1);
+    // remove the replaced D-form instruction
+    MBB.erase(II);
+  } else {
+    MO.ChangeToImmediate(Offset);
+  }
+}
+
+unsigned
+SPURegisterInfo::getRARegister() const
+{
+  return SPU::R0;
+}
+
+unsigned
+SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+  return SPU::R1;
+}
+
+int
+SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+  return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int SPURegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+  return SPUGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
+}
+
+int
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+  switch(dFormOpcode)
+  {
+    case SPU::AIr32:     return SPU::Ar32;
+    case SPU::LQDr32:    return SPU::LQXr32;
+    case SPU::LQDr128:   return SPU::LQXr128;
+    case SPU::LQDv16i8:  return SPU::LQXv16i8;
+    case SPU::LQDv4i32:  return SPU::LQXv4i32;
+    case SPU::LQDv4f32:  return SPU::LQXv4f32;
+    case SPU::STQDr32:   return SPU::STQXr32;
+    case SPU::STQDr128:  return SPU::STQXr128;
+    case SPU::STQDv16i8: return SPU::STQXv16i8;
+    case SPU::STQDv4i32: return SPU::STQXv4i32;
+    case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+    default: assert( false && "Unhandled D to X-form conversion");
+  }
+  // default will assert, but need to return something to keep the
+  // compiler happy.
+  return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+                                     RegScavenger *RS,
+                                     const TargetRegisterClass *RC,
+                                     int SPAdj) const
+{
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  assert( Reg && "Register scavenger failed");
+  return Reg;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
new file mode 100644
index 000000000000..5e014f8adbfc
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
@@ -0,0 +1,107 @@
+//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTERINFO_H
+#define SPU_REGISTERINFO_H
+
+#include "SPU.h"
+
+#define GET_REGINFO_HEADER
+#include "SPUGenRegisterInfo.inc"
+
+namespace llvm {
+  class SPUSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+  class SPURegisterInfo : public SPUGenRegisterInfo {
+  private:
+    const SPUSubtarget &Subtarget;
+    const TargetInstrInfo &TII;
+
+    //! Predicate: Does the machine function use the link register?
+    bool usesLR(MachineFunction &MF) const;
+
+  public:
+    SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
+ 
+    //! Translate a register's enum value to a register number
+    /*!
+      This method translates a register's enum value to it's regiser number,
+      e.g. SPU::R14 -> 14.
+     */
+    static unsigned getRegisterNumbering(unsigned RegEnum);
+
+    /// getPointerRegClass - Return the register class to use to hold pointers.
+    /// This is used for addressing modes.
+    virtual const TargetRegisterClass *
+    getPointerRegClass(unsigned Kind = 0) const;
+
+    /// After allocating this many registers, the allocator should feel
+    /// register pressure. The value is a somewhat random guess, based on the
+    /// number of non callee saved registers in the C calling convention.
+    virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+                                          MachineFunction &MF) const{
+      return 50;
+    }
+
+    //! Return the array of callee-saved registers
+    virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+
+    //! Allow for scavenging, so we can get scratch registers when needed.
+    virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+    { return true; }
+
+    //! Return the reserved registers
+    BitVector getReservedRegs(const MachineFunction &MF) const;
+
+    //! Eliminate the call frame setup pseudo-instructions
+    void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I) const;
+    //! Convert frame indicies into machine operands
+    void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                             RegScavenger *RS = NULL) const;
+
+    //! Get return address register (LR, aka R0)
+    unsigned getRARegister() const;
+    //! Get the stack frame register (SP, aka R1)
+    unsigned getFrameRegister(const MachineFunction &MF) const;
+
+    //------------------------------------------------------------------------
+    // New methods added:
+    //------------------------------------------------------------------------
+
+    //! Get DWARF debugging register number
+    int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+    int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+    //! Convert D-form load/store to X-form load/store
+    /*!
+      Converts a regiser displacement load/store into a register-indexed
+      load/store for large stack frames, when the stack frame exceeds the
+      range of a s10 displacement.
+     */
+    int convertDFormToXForm(int dFormOpcode) const;
+
+    //! Acquire an unused register in an emergency.
+    unsigned findScratchRegister(MachineBasicBlock::iterator II,
+                                 RegScavenger *RS,
+                                 const TargetRegisterClass *RC, 
+                                 int SPAdj) const;
+    
+  };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
new file mode 100644
index 000000000000..e16f51ff0e02
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
@@ -0,0 +1,183 @@
+//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SPUReg<string n> : Register<n> {
+  let Namespace = "SPU";
+}
+
+// The SPU's register are all 128-bits wide, which makes specifying the
+// registers relatively easy, if relatively mundane:
+
+class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
+  field bits<7> Num = num;
+}
+
+def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
+def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
+def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
+def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
+def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
+def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
+def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
+def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
+def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
+def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
+def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
+def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
+def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
+def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
+def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
+def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
+def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
+def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
+def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
+def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
+def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
+def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
+def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
+def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
+def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
+def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
+def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
+def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
+def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
+def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
+def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
+def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
+def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
+def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
+def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
+def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
+def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
+def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
+def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
+def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
+def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
+def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
+def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
+def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
+def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
+def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
+def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
+def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
+def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
+def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
+def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
+def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
+def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
+def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
+def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
+def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
+def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
+def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
+def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
+def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
+def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
+def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
+def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
+def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
+def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
+def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
+def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
+def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
+def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
+def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
+def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
+def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
+def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
+def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
+def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
+def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
+def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
+def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
+def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
+def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
+def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
+def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
+def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
+def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
+def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
+def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
+def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
+def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
+def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
+def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
+def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
+def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
+def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
+def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
+def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
+def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
+def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
+def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
+def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
+def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
+def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
+def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
+def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
+def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
+def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
+def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
+def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
+def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
+def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
+def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
+def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
+def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
+def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
+def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
+def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
+def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
+def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
+def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
+def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
+def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
+def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
+def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
+def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
+def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
+def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
+def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
+def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
+def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
+
+/* Need floating point status register here: */
+/* def FPCSR : ... */
+
+// The SPU's registers as 128-bit wide entities, and can function as general
+// purpose registers, where the operands are in the "preferred slot":
+// The non-volatile registers are allocated in reverse order, like PPC does it.
+def GPRC : RegisterClass<"SPU", [i128], 128,
+                         (add (sequence "R%u", 0, 79),
+                              (sequence "R%u", 127, 80))>;
+
+// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
+def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
+
+// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
+def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
+
+// The SPU's registers as 32-bit wide (word) "preferred slot":
+def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
+
+// The SPU's registers as single precision floating point "preferred slot":
+def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
+
+// The SPU's registers as 16-bit wide (halfword) "preferred slot":
+def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
+
+// The SPU's registers as 8-bit wide (byte) "preferred slot":
+def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
+
+// The SPU's registers as vector registers:
+def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
+                           (add GPRC)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
new file mode 100644
index 000000000000..e557ed340a28
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
@@ -0,0 +1,19 @@
+//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTER_NAMES_H
+#define SPU_REGISTER_NAMES_H
+
+// Define symbolic names for Cell registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
new file mode 100644
index 000000000000..9cd3c2327df0
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
@@ -0,0 +1,59 @@
+//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Even pipeline:
+
+def EVEN_UNIT : FuncUnit;       // Even execution unit: (PC & 0x7 == 000)
+def ODD_UNIT  : FuncUnit;       // Odd execution unit:  (PC & 0x7 == 100)
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Cell SPU
+//===----------------------------------------------------------------------===//
+
+def LoadStore    : InstrItinClass;              // ODD_UNIT
+def BranchHints  : InstrItinClass;              // ODD_UNIT
+def BranchResolv : InstrItinClass;              // ODD_UNIT
+def ChanOpSPR    : InstrItinClass;              // ODD_UNIT
+def ShuffleOp    : InstrItinClass;              // ODD_UNIT
+def SelectOp     : InstrItinClass;              // ODD_UNIT
+def GatherOp     : InstrItinClass;              // ODD_UNIT
+def LoadNOP      : InstrItinClass;              // ODD_UNIT
+def ExecNOP      : InstrItinClass;              // EVEN_UNIT
+def SPrecFP      : InstrItinClass;              // EVEN_UNIT
+def DPrecFP      : InstrItinClass;              // EVEN_UNIT
+def FPInt        : InstrItinClass;              // EVEN_UNIT (FP<->integer)
+def ByteOp       : InstrItinClass;              // EVEN_UNIT
+def IntegerOp    : InstrItinClass;              // EVEN_UNIT
+def IntegerMulDiv: InstrItinClass;              // EVEN_UNIT
+def RotShiftVec  : InstrItinClass;              // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass;              // ODD_UNIT Entire quad
+def ImmLoad      : InstrItinClass;              // EVEN_UNIT
+
+/* Note: The itinerary for the Cell SPU is somewhat contrived... */
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
+  InstrItinData<LoadStore   , [InstrStage<6,  [ODD_UNIT]>]>,
+  InstrItinData<BranchHints , [InstrStage<6,  [ODD_UNIT]>]>,
+  InstrItinData<BranchResolv, [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<ChanOpSPR   , [InstrStage<6,  [ODD_UNIT]>]>,
+  InstrItinData<ShuffleOp   , [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<SelectOp    , [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<GatherOp    , [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<LoadNOP     , [InstrStage<1,  [ODD_UNIT]>]>,
+  InstrItinData<ExecNOP     , [InstrStage<1,  [EVEN_UNIT]>]>,
+  InstrItinData<SPrecFP     , [InstrStage<6,  [EVEN_UNIT]>]>,
+  InstrItinData<DPrecFP     , [InstrStage<13, [EVEN_UNIT]>]>,
+  InstrItinData<FPInt       , [InstrStage<2,  [EVEN_UNIT]>]>,
+  InstrItinData<ByteOp      , [InstrStage<4,  [EVEN_UNIT]>]>,
+  InstrItinData<IntegerOp   , [InstrStage<2,  [EVEN_UNIT]>]>,
+  InstrItinData<RotShiftVec , [InstrStage<4,  [EVEN_UNIT]>]>, 
+  InstrItinData<RotShiftQuad, [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<IntegerMulDiv,[InstrStage<7,  [EVEN_UNIT]>]>,
+  InstrItinData<ImmLoad     , [InstrStage<2,  [EVEN_UNIT]>]>
+  ]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..5732fd43cdc2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cellspu-selectiondag-info"
+#include "SPUTargetMachine.h"
+using namespace llvm;
+
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
new file mode 100644
index 000000000000..39257d92c400
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSPUSELECTIONDAGINFO_H
+#define CELLSPUSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SPUTargetMachine;
+
+class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
+  ~SPUSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
new file mode 100644
index 000000000000..856dc82f786b
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -0,0 +1,66 @@
+//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUSubtarget.h"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SPUGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS) :
+  SPUGenSubtargetInfo(TT, CPU, FS),
+  StackAlignment(16),
+  ProcDirective(SPU::DEFAULT_PROC),
+  UseLargeMem(false)
+{
+  // Should be the target SPU processor type. For now, since there's only
+  // one, simply default to the current "v0" default:
+  std::string default_cpu("v0");
+
+  // Parse features string.
+  ParseSubtargetFeatures(default_cpu, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(default_cpu);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void SPUSubtarget::SetJITMode() {
+}
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+                       CodeGenOpt::Level OptLevel,
+                       TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                       RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+  // CriticalPathsRCs seems to be the set of
+  // RegisterClasses that antidep breakings are performed for.
+  // Do it for all register classes 
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&SPU::R8CRegClass);
+  CriticalPathRCs.push_back(&SPU::R16CRegClass);
+  CriticalPathRCs.push_back(&SPU::R32CRegClass);
+  CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+  CriticalPathRCs.push_back(&SPU::R64CRegClass);
+  CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+  return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
new file mode 100644
index 000000000000..7c4aa1430217
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
@@ -0,0 +1,97 @@
+//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSUBTARGET_H
+#define CELLSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SPUGenSubtargetInfo.inc"
+
+namespace llvm {
+  class GlobalValue;
+  class StringRef;
+
+  namespace SPU {
+    enum {
+      PROC_NONE,
+      DEFAULT_PROC
+    };
+  }
+    
+  class SPUSubtarget : public SPUGenSubtargetInfo {
+  protected:
+    /// stackAlignment - The minimum alignment known to hold of the stack frame
+    /// on entry to the function and which must be maintained by every function.
+    unsigned StackAlignment;
+    
+    /// Selected instruction itineraries (one entry per itinerary class.)
+    InstrItineraryData InstrItins;
+
+    /// Which SPU processor (this isn't really used, but it's there to keep
+    /// the C compiler happy)
+    unsigned ProcDirective;
+
+    /// Use (assume) large memory -- effectively disables the LQA/STQA
+    /// instructions that assume 259K local store.
+    bool UseLargeMem;
+    
+  public:
+    /// This constructor initializes the data members to match that
+    /// of the specified triple.
+    ///
+    SPUSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
+    
+    /// ParseSubtargetFeatures - Parses features string setting specified 
+    /// subtarget options.  Definition of function is auto generated by tblgen.
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+    /// SetJITMode - This is called to inform the subtarget info that we are
+    /// producing code for the JIT.
+    void SetJITMode();
+
+    /// getStackAlignment - Returns the minimum alignment known to hold of the
+    /// stack frame on entry to the function and which must be maintained by
+    /// every function for this subtarget.
+    unsigned getStackAlignment() const { return StackAlignment; }
+    
+    /// getInstrItins - Return the instruction itineraies based on subtarget 
+    /// selection.
+    const InstrItineraryData &getInstrItineraryData() const {
+      return InstrItins;
+    }
+
+    /// Use large memory addressing predicate
+    bool usingLargeMem() const {
+      return UseLargeMem;
+    }
+
+    /// getTargetDataString - Return the pointer size and type alignment
+    /// properties of this subtarget.
+    const char *getTargetDataString() const {
+      return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
+             "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
+             "-s:128:128-n32:64";
+    }
+
+    bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                               TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                               RegClassVector& CriticalPathRCs) const;
+  };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
new file mode 100644
index 000000000000..3542a2b87e43
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -0,0 +1,67 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeCellSPUTarget() { 
+  // Register the target.
+  RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
+}
+
+const std::pair<unsigned, int> *
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+  NumEntries = 1;
+  return &LR[0];
+}
+
+SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
+    DataLayout(Subtarget.getTargetDataString()),
+    InstrInfo(*this),
+    FrameLowering(Subtarget),
+    TLInfo(*this),
+    TSInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
+  // For the time being, use static relocations, since there's really no
+  // support for PIC yet.
+  setRelocationModel(Reloc::Static);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createSPUISelDag(*this));
+  return false;
+}
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) 
+{
+  //align instructions with nops/lnops for dual issue
+  PM.add(createSPUNopFillerPass(*this));
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
new file mode 100644
index 000000000000..d96f86dcaeb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
@@ -0,0 +1,90 @@
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CellSPU-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_TARGETMACHINE_H
+#define SPU_TARGETMACHINE_H
+
+#include "SPUSubtarget.h"
+#include "SPUInstrInfo.h"
+#include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+class TargetFrameLowering;
+
+/// SPUTargetMachine
+///
+class SPUTargetMachine : public LLVMTargetMachine {
+  SPUSubtarget        Subtarget;
+  const TargetData    DataLayout;
+  SPUInstrInfo        InstrInfo;
+  SPUFrameLowering    FrameLowering;
+  SPUTargetLowering   TLInfo;
+  SPUSelectionDAGInfo TSInfo;
+  InstrItineraryData  InstrItins;
+public:
+  SPUTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &CPU, const std::string &FS);
+
+  /// Return the subtarget implementation object
+  virtual const SPUSubtarget     *getSubtargetImpl() const {
+    return &Subtarget;
+  }
+  virtual const SPUInstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual const SPUFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  /*!
+    \note Cell SPU does not support JIT today. It could support JIT at some
+    point.
+   */
+  virtual       TargetJITInfo    *getJITInfo() {
+    return NULL;
+  }
+
+  virtual const SPUTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
+  }
+
+  virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual const SPURegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  
+  virtual const TargetData *getTargetData() const {
+    return &DataLayout;
+  }
+
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
+  }
+  
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM,
+                               CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);	
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
new file mode 100644
index 000000000000..049ea236e992
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCellSPUTarget;
+
+extern "C" void LLVMInitializeCellSPUTargetInfo() { 
+  RegisterTarget<Triple::cellspu> 
+    X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
new file mode 100644
index 000000000000..10d18f61c7e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -0,0 +1,1971 @@
+//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the writing of the LLVM IR as a set of C++ calls to the
+// LLVM IR interface. The input module is assumed to be verified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <set>
+#include <map>
+using namespace llvm;
+
+static cl::opt<std::string>
+FuncName("cppfname", cl::desc("Specify the name of the generated function"),
+         cl::value_desc("function name"));
+
+enum WhatToGenerate {
+  GenProgram,
+  GenModule,
+  GenContents,
+  GenFunction,
+  GenFunctions,
+  GenInline,
+  GenVariable,
+  GenType
+};
+
+static cl::opt<WhatToGenerate> GenerationType("cppgen", cl::Optional,
+  cl::desc("Choose what kind of output to generate"),
+  cl::init(GenProgram),
+  cl::values(
+    clEnumValN(GenProgram,  "program",   "Generate a complete program"),
+    clEnumValN(GenModule,   "module",    "Generate a module definition"),
+    clEnumValN(GenContents, "contents",  "Generate contents of a module"),
+    clEnumValN(GenFunction, "function",  "Generate a function definition"),
+    clEnumValN(GenFunctions,"functions", "Generate all function definitions"),
+    clEnumValN(GenInline,   "inline",    "Generate an inline function"),
+    clEnumValN(GenVariable, "variable",  "Generate a variable definition"),
+    clEnumValN(GenType,     "type",      "Generate a type definition"),
+    clEnumValEnd
+  )
+);
+
+static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
+  cl::desc("Specify the name of the thing to generate"),
+  cl::init("!bad!"));
+
+extern "C" void LLVMInitializeCppBackendTarget() {
+  // Register the target.
+  RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
+}
+
+extern "C" void LLVMInitializeCppBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCppBackendMCInstrInfo() {
+  RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget);
+}
+
+extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() {
+  RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget);
+}
+
+namespace {
+  typedef std::vector<const Type*> TypeList;
+  typedef std::map<const Type*,std::string> TypeMap;
+  typedef std::map<const Value*,std::string> ValueMap;
+  typedef std::set<std::string> NameSet;
+  typedef std::set<const Type*> TypeSet;
+  typedef std::set<const Value*> ValueSet;
+  typedef std::map<const Value*,std::string> ForwardRefMap;
+
+  /// CppWriter - This class is the main chunk of code that converts an LLVM
+  /// module to a C++ translation unit.
+  class CppWriter : public ModulePass {
+    formatted_raw_ostream &Out;
+    const Module *TheModule;
+    uint64_t uniqueNum;
+    TypeMap TypeNames;
+    ValueMap ValueNames;
+    NameSet UsedNames;
+    TypeSet DefinedTypes;
+    ValueSet DefinedValues;
+    ForwardRefMap ForwardRefs;
+    bool is_inline;
+    unsigned indent_level;
+
+  public:
+    static char ID;
+    explicit CppWriter(formatted_raw_ostream &o) :
+      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+
+    virtual const char *getPassName() const { return "C++ backend"; }
+
+    bool runOnModule(Module &M);
+
+    void printProgram(const std::string& fname, const std::string& modName );
+    void printModule(const std::string& fname, const std::string& modName );
+    void printContents(const std::string& fname, const std::string& modName );
+    void printFunction(const std::string& fname, const std::string& funcName );
+    void printFunctions();
+    void printInline(const std::string& fname, const std::string& funcName );
+    void printVariable(const std::string& fname, const std::string& varName );
+    void printType(const std::string& fname, const std::string& typeName );
+
+    void error(const std::string& msg);
+
+    
+    formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0);
+    inline void in() { indent_level++; }
+    inline void out() { if (indent_level >0) indent_level--; }
+    
+  private:
+    void printLinkageType(GlobalValue::LinkageTypes LT);
+    void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+    void printCallingConv(CallingConv::ID cc);
+    void printEscapedString(const std::string& str);
+    void printCFP(const ConstantFP* CFP);
+
+    std::string getCppName(const Type* val);
+    inline void printCppName(const Type* val);
+
+    std::string getCppName(const Value* val);
+    inline void printCppName(const Value* val);
+
+    void printAttributes(const AttrListPtr &PAL, const std::string &name);
+    void printType(const Type* Ty);
+    void printTypes(const Module* M);
+
+    void printConstant(const Constant *CPV);
+    void printConstants(const Module* M);
+
+    void printVariableUses(const GlobalVariable *GV);
+    void printVariableHead(const GlobalVariable *GV);
+    void printVariableBody(const GlobalVariable *GV);
+
+    void printFunctionUses(const Function *F);
+    void printFunctionHead(const Function *F);
+    void printFunctionBody(const Function *F);
+    void printInstruction(const Instruction *I, const std::string& bbname);
+    std::string getOpName(Value*);
+
+    void printModuleBody();
+  };
+} // end anonymous namespace.
+
+formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
+  Out << '\n';
+  if (delta >= 0 || indent_level >= unsigned(-delta))
+    indent_level += delta;
+  Out.indent(indent_level);
+  return Out;
+}
+
+static inline void sanitize(std::string &str) {
+  for (size_t i = 0; i < str.length(); ++i)
+    if (!isalnum(str[i]) && str[i] != '_')
+      str[i] = '_';
+}
+
+static std::string getTypePrefix(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:     return "void_";
+  case Type::IntegerTyID:
+    return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_";
+  case Type::FloatTyID:    return "float_";
+  case Type::DoubleTyID:   return "double_";
+  case Type::LabelTyID:    return "label_";
+  case Type::FunctionTyID: return "func_";
+  case Type::StructTyID:   return "struct_";
+  case Type::ArrayTyID:    return "array_";
+  case Type::PointerTyID:  return "ptr_";
+  case Type::VectorTyID:   return "packed_";
+  default:                 return "other_";
+  }
+  return "unknown_";
+}
+
+void CppWriter::error(const std::string& msg) {
+  report_fatal_error(msg);
+}
+
+// printCFP - Print a floating point constant .. very carefully :)
+// This makes sure that conversion to/from floating yields the same binary
+// result so that we don't lose precision.
+void CppWriter::printCFP(const ConstantFP *CFP) {
+  bool ignored;
+  APFloat APF = APFloat(CFP->getValueAPF());  // copy
+  if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+  Out << "ConstantFP::get(mod->getContext(), ";
+  Out << "APFloat(";
+#if HAVE_PRINTF_A
+  char Buffer[100];
+  sprintf(Buffer, "%A", APF.convertToDouble());
+  if ((!strncmp(Buffer, "0x", 2) ||
+       !strncmp(Buffer, "-0x", 3) ||
+       !strncmp(Buffer, "+0x", 3)) &&
+      APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+    if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+      Out << "BitsToDouble(" << Buffer << ")";
+    else
+      Out << "BitsToFloat((float)" << Buffer << ")";
+    Out << ")";
+  } else {
+#endif
+    std::string StrVal = ftostr(CFP->getValueAPF());
+
+    while (StrVal[0] == ' ')
+      StrVal.erase(StrVal.begin());
+
+    // Check to make sure that the stringized number is not some string like
+    // "Inf" or NaN.  Check that the string matches the "[-+]?[0-9]" regex.
+    if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+         ((StrVal[0] == '-' || StrVal[0] == '+') &&
+          (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+        (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+      if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+        Out <<  StrVal;
+      else
+        Out << StrVal << "f";
+    } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+      Out << "BitsToDouble(0x"
+          << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+          << "ULL) /* " << StrVal << " */";
+    else
+      Out << "BitsToFloat(0x"
+          << utohexstr((uint32_t)CFP->getValueAPF().
+                                      bitcastToAPInt().getZExtValue())
+          << "U) /* " << StrVal << " */";
+    Out << ")";
+#if HAVE_PRINTF_A
+  }
+#endif
+  Out << ")";
+}
+
+void CppWriter::printCallingConv(CallingConv::ID cc){
+  // Print the calling convention.
+  switch (cc) {
+  case CallingConv::C:     Out << "CallingConv::C"; break;
+  case CallingConv::Fast:  Out << "CallingConv::Fast"; break;
+  case CallingConv::Cold:  Out << "CallingConv::Cold"; break;
+  case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+  default:                 Out << cc; break;
+  }
+}
+
+void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+  switch (LT) {
+  case GlobalValue::InternalLinkage:
+    Out << "GlobalValue::InternalLinkage"; break;
+  case GlobalValue::PrivateLinkage:
+    Out << "GlobalValue::PrivateLinkage"; break;
+  case GlobalValue::LinkerPrivateLinkage:
+    Out << "GlobalValue::LinkerPrivateLinkage"; break;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
+  case GlobalValue::AvailableExternallyLinkage:
+    Out << "GlobalValue::AvailableExternallyLinkage "; break;
+  case GlobalValue::LinkOnceAnyLinkage:
+    Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+  case GlobalValue::LinkOnceODRLinkage:
+    Out << "GlobalValue::LinkOnceODRLinkage "; break;
+  case GlobalValue::WeakAnyLinkage:
+    Out << "GlobalValue::WeakAnyLinkage"; break;
+  case GlobalValue::WeakODRLinkage:
+    Out << "GlobalValue::WeakODRLinkage"; break;
+  case GlobalValue::AppendingLinkage:
+    Out << "GlobalValue::AppendingLinkage"; break;
+  case GlobalValue::ExternalLinkage:
+    Out << "GlobalValue::ExternalLinkage"; break;
+  case GlobalValue::DLLImportLinkage:
+    Out << "GlobalValue::DLLImportLinkage"; break;
+  case GlobalValue::DLLExportLinkage:
+    Out << "GlobalValue::DLLExportLinkage"; break;
+  case GlobalValue::ExternalWeakLinkage:
+    Out << "GlobalValue::ExternalWeakLinkage"; break;
+  case GlobalValue::CommonLinkage:
+    Out << "GlobalValue::CommonLinkage"; break;
+  }
+}
+
+void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+  switch (VisType) {
+  default: llvm_unreachable("Unknown GVar visibility");
+  case GlobalValue::DefaultVisibility:
+    Out << "GlobalValue::DefaultVisibility";
+    break;
+  case GlobalValue::HiddenVisibility:
+    Out << "GlobalValue::HiddenVisibility";
+    break;
+  case GlobalValue::ProtectedVisibility:
+    Out << "GlobalValue::ProtectedVisibility";
+    break;
+  }
+}
+
+// printEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+void CppWriter::printEscapedString(const std::string &Str) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    unsigned char C = Str[i];
+    if (isprint(C) && C != '"' && C != '\\') {
+      Out << C;
+    } else {
+      Out << "\\x"
+          << (char) ((C/16  < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+          << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+    }
+  }
+}
+
+std::string CppWriter::getCppName(const Type* Ty) {
+  // First, handle the primitive types .. easy
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+    switch (Ty->getTypeID()) {
+    case Type::VoidTyID:   return "Type::getVoidTy(mod->getContext())";
+    case Type::IntegerTyID: {
+      unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+      return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
+    }
+    case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+    case Type::FloatTyID:    return "Type::getFloatTy(mod->getContext())";
+    case Type::DoubleTyID:   return "Type::getDoubleTy(mod->getContext())";
+    case Type::LabelTyID:    return "Type::getLabelTy(mod->getContext())";
+    case Type::X86_MMXTyID:  return "Type::getX86_MMXTy(mod->getContext())";
+    default:
+      error("Invalid primitive type");
+      break;
+    }
+    // shouldn't be returned, but make it sensible
+    return "Type::getVoidTy(mod->getContext())";
+  }
+
+  // Now, see if we've seen the type before and return that
+  TypeMap::iterator I = TypeNames.find(Ty);
+  if (I != TypeNames.end())
+    return I->second;
+
+  // Okay, let's build a new name for this type. Start with a prefix
+  const char* prefix = 0;
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID:    prefix = "FuncTy_"; break;
+  case Type::StructTyID:      prefix = "StructTy_"; break;
+  case Type::ArrayTyID:       prefix = "ArrayTy_"; break;
+  case Type::PointerTyID:     prefix = "PointerTy_"; break;
+  case Type::VectorTyID:      prefix = "VectorTy_"; break;
+  default:                    prefix = "OtherTy_"; break; // prevent breakage
+  }
+
+  // See if the type has a name in the symboltable and build accordingly
+  std::string name;
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (STy->hasName())
+      name = STy->getName();
+  
+  if (name.empty())
+    name = utostr(uniqueNum++);
+  
+  name = std::string(prefix) + name;
+  sanitize(name);
+
+  // Save the name
+  return TypeNames[Ty] = name;
+}
+
+void CppWriter::printCppName(const Type* Ty) {
+  printEscapedString(getCppName(Ty));
+}
+
+std::string CppWriter::getCppName(const Value* val) {
+  std::string name;
+  ValueMap::iterator I = ValueNames.find(val);
+  if (I != ValueNames.end() && I->first == val)
+    return  I->second;
+
+  if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+    name = std::string("gvar_") +
+      getTypePrefix(GV->getType()->getElementType());
+  } else if (isa<Function>(val)) {
+    name = std::string("func_");
+  } else if (const Constant* C = dyn_cast<Constant>(val)) {
+    name = std::string("const_") + getTypePrefix(C->getType());
+  } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+    if (is_inline) {
+      unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+                                      Function::const_arg_iterator(Arg)) + 1;
+      name = std::string("arg_") + utostr(argNum);
+      NameSet::iterator NI = UsedNames.find(name);
+      if (NI != UsedNames.end())
+        name += std::string("_") + utostr(uniqueNum++);
+      UsedNames.insert(name);
+      return ValueNames[val] = name;
+    } else {
+      name = getTypePrefix(val->getType());
+    }
+  } else {
+    name = getTypePrefix(val->getType());
+  }
+  if (val->hasName())
+    name += val->getName();
+  else
+    name += utostr(uniqueNum++);
+  sanitize(name);
+  NameSet::iterator NI = UsedNames.find(name);
+  if (NI != UsedNames.end())
+    name += std::string("_") + utostr(uniqueNum++);
+  UsedNames.insert(name);
+  return ValueNames[val] = name;
+}
+
+void CppWriter::printCppName(const Value* val) {
+  printEscapedString(getCppName(val));
+}
+
+void CppWriter::printAttributes(const AttrListPtr &PAL,
+                                const std::string &name) {
+  Out << "AttrListPtr " << name << "_PAL;";
+  nl(Out);
+  if (!PAL.isEmpty()) {
+    Out << '{'; in(); nl(Out);
+    Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+    Out << "AttributeWithIndex PAWI;"; nl(Out);
+    for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+      unsigned index = PAL.getSlot(i).Index;
+      Attributes attrs = PAL.getSlot(i).Attrs;
+      Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+#define HANDLE_ATTR(X)                 \
+      if (attrs & Attribute::X)      \
+        Out << " | Attribute::" #X;  \
+      attrs &= ~Attribute::X;
+      
+      HANDLE_ATTR(SExt);
+      HANDLE_ATTR(ZExt);
+      HANDLE_ATTR(NoReturn);
+      HANDLE_ATTR(InReg);
+      HANDLE_ATTR(StructRet);
+      HANDLE_ATTR(NoUnwind);
+      HANDLE_ATTR(NoAlias);
+      HANDLE_ATTR(ByVal);
+      HANDLE_ATTR(Nest);
+      HANDLE_ATTR(ReadNone);
+      HANDLE_ATTR(ReadOnly);
+      HANDLE_ATTR(NoInline);
+      HANDLE_ATTR(AlwaysInline);
+      HANDLE_ATTR(OptimizeForSize);
+      HANDLE_ATTR(StackProtect);
+      HANDLE_ATTR(StackProtectReq);
+      HANDLE_ATTR(NoCapture);
+      HANDLE_ATTR(NoRedZone);
+      HANDLE_ATTR(NoImplicitFloat);
+      HANDLE_ATTR(Naked);
+      HANDLE_ATTR(InlineHint);
+#undef HANDLE_ATTR
+      if (attrs & Attribute::StackAlignment)
+        Out << " | Attribute::constructStackAlignmentFromInt("
+            << Attribute::getStackAlignmentFromAttrs(attrs)
+            << ")"; 
+      attrs &= ~Attribute::StackAlignment;
+      assert(attrs == 0 && "Unhandled attribute!");
+      Out << ";";
+      nl(Out);
+      Out << "Attrs.push_back(PAWI);";
+      nl(Out);
+    }
+    Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+    nl(Out);
+    out(); nl(Out);
+    Out << '}'; nl(Out);
+  }
+}
+
+void CppWriter::printType(const Type* Ty) {
+  // We don't print definitions for primitive types
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+    return;
+
+  // If we already defined this type, we don't need to define it again.
+  if (DefinedTypes.find(Ty) != DefinedTypes.end())
+    return;
+
+  // Everything below needs the name for the type so get it now.
+  std::string typeName(getCppName(Ty));
+
+  // Print the type definition
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID:  {
+    const FunctionType* FT = cast<FunctionType>(Ty);
+    Out << "std::vector<Type*>" << typeName << "_args;";
+    nl(Out);
+    FunctionType::param_iterator PI = FT->param_begin();
+    FunctionType::param_iterator PE = FT->param_end();
+    for (; PI != PE; ++PI) {
+      const Type* argTy = static_cast<const Type*>(*PI);
+      printType(argTy);
+      std::string argName(getCppName(argTy));
+      Out << typeName << "_args.push_back(" << argName;
+      Out << ");";
+      nl(Out);
+    }
+    printType(FT->getReturnType());
+    std::string retTypeName(getCppName(FT->getReturnType()));
+    Out << "FunctionType* " << typeName << " = FunctionType::get(";
+    in(); nl(Out) << "/*Result=*/" << retTypeName;
+    Out << ",";
+    nl(Out) << "/*Params=*/" << typeName << "_args,";
+    nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+    out();
+    nl(Out);
+    break;
+  }
+  case Type::StructTyID: {
+    const StructType* ST = cast<StructType>(Ty);
+    if (!ST->isAnonymous()) {
+      Out << "StructType *" << typeName << " = ";
+      Out << "StructType::createNamed(mod->getContext(), \"";
+      printEscapedString(ST->getName());
+      Out << "\");";
+      nl(Out);
+      // Indicate that this type is now defined.
+      DefinedTypes.insert(Ty);
+    }
+
+    Out << "std::vector<Type*>" << typeName << "_fields;";
+    nl(Out);
+    StructType::element_iterator EI = ST->element_begin();
+    StructType::element_iterator EE = ST->element_end();
+    for (; EI != EE; ++EI) {
+      const Type* fieldTy = static_cast<const Type*>(*EI);
+      printType(fieldTy);
+      std::string fieldName(getCppName(fieldTy));
+      Out << typeName << "_fields.push_back(" << fieldName;
+      Out << ");";
+      nl(Out);
+    }
+
+    if (ST->isAnonymous()) {
+      Out << "StructType *" << typeName << " = ";
+      Out << "StructType::get(" << "mod->getContext(), ";
+    } else {
+      Out << typeName << "->setBody(";
+    }
+
+    Out << typeName << "_fields, /*isPacked=*/"
+        << (ST->isPacked() ? "true" : "false") << ");";
+    nl(Out);
+    break;
+  }
+  case Type::ArrayTyID: {
+    const ArrayType* AT = cast<ArrayType>(Ty);
+    const Type* ET = AT->getElementType();
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "ArrayType* " << typeName << " = ArrayType::get("
+          << elemName
+          << ", " << utostr(AT->getNumElements()) << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Type::PointerTyID: {
+    const PointerType* PT = cast<PointerType>(Ty);
+    const Type* ET = PT->getElementType();
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "PointerType* " << typeName << " = PointerType::get("
+          << elemName
+          << ", " << utostr(PT->getAddressSpace()) << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Type::VectorTyID: {
+    const VectorType* PT = cast<VectorType>(Ty);
+    const Type* ET = PT->getElementType();
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "VectorType* " << typeName << " = VectorType::get("
+          << elemName
+          << ", " << utostr(PT->getNumElements()) << ");";
+      nl(Out);
+    }
+    break;
+  }
+  default:
+    error("Invalid TypeID");
+  }
+
+  // Indicate that this type is now defined.
+  DefinedTypes.insert(Ty);
+
+  // Finally, separate the type definition from other with a newline.
+  nl(Out);
+}
+
+void CppWriter::printTypes(const Module* M) {
+  // Add all of the global variables to the value table.
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    if (I->hasInitializer())
+      printType(I->getInitializer()->getType());
+    printType(I->getType());
+  }
+
+  // Add all the functions to the table
+  for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    printType(FI->getReturnType());
+    printType(FI->getFunctionType());
+    // Add all the function arguments
+    for (Function::const_arg_iterator AI = FI->arg_begin(),
+           AE = FI->arg_end(); AI != AE; ++AI) {
+      printType(AI->getType());
+    }
+
+    // Add all of the basic blocks and instructions
+    for (Function::const_iterator BB = FI->begin(),
+           E = FI->end(); BB != E; ++BB) {
+      printType(BB->getType());
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+           ++I) {
+        printType(I->getType());
+        for (unsigned i = 0; i < I->getNumOperands(); ++i)
+          printType(I->getOperand(i)->getType());
+      }
+    }
+  }
+}
+
+
+// printConstant - Print out a constant pool entry...
+void CppWriter::printConstant(const Constant *CV) {
+  // First, if the constant is actually a GlobalValue (variable or function)
+  // or its already in the constant list then we've printed it already and we
+  // can just return.
+  if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+    return;
+
+  std::string constName(getCppName(CV));
+  std::string typeName(getCppName(CV->getType()));
+
+  if (isa<GlobalValue>(CV)) {
+    // Skip variables and functions, we emit them elsewhere
+    return;
+  }
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    std::string constValue = CI->getValue().toString(10, true);
+    Out << "ConstantInt* " << constName
+        << " = ConstantInt::get(mod->getContext(), APInt("
+        << cast<IntegerType>(CI->getType())->getBitWidth()
+        << ", StringRef(\"" <<  constValue << "\"), 10));";
+  } else if (isa<ConstantAggregateZero>(CV)) {
+    Out << "ConstantAggregateZero* " << constName
+        << " = ConstantAggregateZero::get(" << typeName << ");";
+  } else if (isa<ConstantPointerNull>(CV)) {
+    Out << "ConstantPointerNull* " << constName
+        << " = ConstantPointerNull::get(" << typeName << ");";
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    Out << "ConstantFP* " << constName << " = ";
+    printCFP(CFP);
+    Out << ";";
+  } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+    if (CA->isString() &&
+        CA->getType()->getElementType() ==
+            Type::getInt8Ty(CA->getContext())) {
+      Out << "Constant* " << constName <<
+             " = ConstantArray::get(mod->getContext(), \"";
+      std::string tmp = CA->getAsString();
+      bool nullTerminate = false;
+      if (tmp[tmp.length()-1] == 0) {
+        tmp.erase(tmp.length()-1);
+        nullTerminate = true;
+      }
+      printEscapedString(tmp);
+      // Determine if we want null termination or not.
+      if (nullTerminate)
+        Out << "\", true"; // Indicate that the null terminator should be
+                           // added.
+      else
+        Out << "\", false";// No null terminator
+      Out << ");";
+    } else {
+      Out << "std::vector<Constant*> " << constName << "_elems;";
+      nl(Out);
+      unsigned N = CA->getNumOperands();
+      for (unsigned i = 0; i < N; ++i) {
+        printConstant(CA->getOperand(i)); // recurse to print operands
+        Out << constName << "_elems.push_back("
+            << getCppName(CA->getOperand(i)) << ");";
+        nl(Out);
+      }
+      Out << "Constant* " << constName << " = ConstantArray::get("
+          << typeName << ", " << constName << "_elems);";
+    }
+  } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+    Out << "std::vector<Constant*> " << constName << "_fields;";
+    nl(Out);
+    unsigned N = CS->getNumOperands();
+    for (unsigned i = 0; i < N; i++) {
+      printConstant(CS->getOperand(i));
+      Out << constName << "_fields.push_back("
+          << getCppName(CS->getOperand(i)) << ");";
+      nl(Out);
+    }
+    Out << "Constant* " << constName << " = ConstantStruct::get("
+        << typeName << ", " << constName << "_fields);";
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    Out << "std::vector<Constant*> " << constName << "_elems;";
+    nl(Out);
+    unsigned N = CP->getNumOperands();
+    for (unsigned i = 0; i < N; ++i) {
+      printConstant(CP->getOperand(i));
+      Out << constName << "_elems.push_back("
+          << getCppName(CP->getOperand(i)) << ");";
+      nl(Out);
+    }
+    Out << "Constant* " << constName << " = ConstantVector::get("
+        << typeName << ", " << constName << "_elems);";
+  } else if (isa<UndefValue>(CV)) {
+    Out << "UndefValue* " << constName << " = UndefValue::get("
+        << typeName << ");";
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      Out << "std::vector<Constant*> " << constName << "_indices;";
+      nl(Out);
+      printConstant(CE->getOperand(0));
+      for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+        printConstant(CE->getOperand(i));
+        Out << constName << "_indices.push_back("
+            << getCppName(CE->getOperand(i)) << ");";
+        nl(Out);
+      }
+      Out << "Constant* " << constName
+          << " = ConstantExpr::getGetElementPtr("
+          << getCppName(CE->getOperand(0)) << ", "
+          << "&" << constName << "_indices[0], "
+          << constName << "_indices.size()"
+          << ");";
+    } else if (CE->isCast()) {
+      printConstant(CE->getOperand(0));
+      Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+      switch (CE->getOpcode()) {
+      default: llvm_unreachable("Invalid cast opcode");
+      case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+      case Instruction::ZExt:  Out << "Instruction::ZExt"; break;
+      case Instruction::SExt:  Out << "Instruction::SExt"; break;
+      case Instruction::FPTrunc:  Out << "Instruction::FPTrunc"; break;
+      case Instruction::FPExt:  Out << "Instruction::FPExt"; break;
+      case Instruction::FPToUI:  Out << "Instruction::FPToUI"; break;
+      case Instruction::FPToSI:  Out << "Instruction::FPToSI"; break;
+      case Instruction::UIToFP:  Out << "Instruction::UIToFP"; break;
+      case Instruction::SIToFP:  Out << "Instruction::SIToFP"; break;
+      case Instruction::PtrToInt:  Out << "Instruction::PtrToInt"; break;
+      case Instruction::IntToPtr:  Out << "Instruction::IntToPtr"; break;
+      case Instruction::BitCast:  Out << "Instruction::BitCast"; break;
+      }
+      Out << ", " << getCppName(CE->getOperand(0)) << ", "
+          << getCppName(CE->getType()) << ");";
+    } else {
+      unsigned N = CE->getNumOperands();
+      for (unsigned i = 0; i < N; ++i ) {
+        printConstant(CE->getOperand(i));
+      }
+      Out << "Constant* " << constName << " = ConstantExpr::";
+      switch (CE->getOpcode()) {
+      case Instruction::Add:    Out << "getAdd(";  break;
+      case Instruction::FAdd:   Out << "getFAdd(";  break;
+      case Instruction::Sub:    Out << "getSub("; break;
+      case Instruction::FSub:   Out << "getFSub("; break;
+      case Instruction::Mul:    Out << "getMul("; break;
+      case Instruction::FMul:   Out << "getFMul("; break;
+      case Instruction::UDiv:   Out << "getUDiv("; break;
+      case Instruction::SDiv:   Out << "getSDiv("; break;
+      case Instruction::FDiv:   Out << "getFDiv("; break;
+      case Instruction::URem:   Out << "getURem("; break;
+      case Instruction::SRem:   Out << "getSRem("; break;
+      case Instruction::FRem:   Out << "getFRem("; break;
+      case Instruction::And:    Out << "getAnd("; break;
+      case Instruction::Or:     Out << "getOr("; break;
+      case Instruction::Xor:    Out << "getXor("; break;
+      case Instruction::ICmp:
+        Out << "getICmp(ICmpInst::ICMP_";
+        switch (CE->getPredicate()) {
+        case ICmpInst::ICMP_EQ:  Out << "EQ"; break;
+        case ICmpInst::ICMP_NE:  Out << "NE"; break;
+        case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+        case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+        case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+        case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+        case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+        case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+        case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+        case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+        default: error("Invalid ICmp Predicate");
+        }
+        break;
+      case Instruction::FCmp:
+        Out << "getFCmp(FCmpInst::FCMP_";
+        switch (CE->getPredicate()) {
+        case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+        case FCmpInst::FCMP_ORD:   Out << "ORD"; break;
+        case FCmpInst::FCMP_UNO:   Out << "UNO"; break;
+        case FCmpInst::FCMP_OEQ:   Out << "OEQ"; break;
+        case FCmpInst::FCMP_UEQ:   Out << "UEQ"; break;
+        case FCmpInst::FCMP_ONE:   Out << "ONE"; break;
+        case FCmpInst::FCMP_UNE:   Out << "UNE"; break;
+        case FCmpInst::FCMP_OLT:   Out << "OLT"; break;
+        case FCmpInst::FCMP_ULT:   Out << "ULT"; break;
+        case FCmpInst::FCMP_OGT:   Out << "OGT"; break;
+        case FCmpInst::FCMP_UGT:   Out << "UGT"; break;
+        case FCmpInst::FCMP_OLE:   Out << "OLE"; break;
+        case FCmpInst::FCMP_ULE:   Out << "ULE"; break;
+        case FCmpInst::FCMP_OGE:   Out << "OGE"; break;
+        case FCmpInst::FCMP_UGE:   Out << "UGE"; break;
+        case FCmpInst::FCMP_TRUE:  Out << "TRUE"; break;
+        default: error("Invalid FCmp Predicate");
+        }
+        break;
+      case Instruction::Shl:     Out << "getShl("; break;
+      case Instruction::LShr:    Out << "getLShr("; break;
+      case Instruction::AShr:    Out << "getAShr("; break;
+      case Instruction::Select:  Out << "getSelect("; break;
+      case Instruction::ExtractElement: Out << "getExtractElement("; break;
+      case Instruction::InsertElement:  Out << "getInsertElement("; break;
+      case Instruction::ShuffleVector:  Out << "getShuffleVector("; break;
+      default:
+        error("Invalid constant expression");
+        break;
+      }
+      Out << getCppName(CE->getOperand(0));
+      for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+        Out << ", " << getCppName(CE->getOperand(i));
+      Out << ");";
+    }
+  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    Out << "Constant* " << constName << " = ";
+    Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");";
+  } else {
+    error("Bad Constant");
+    Out << "Constant* " << constName << " = 0; ";
+  }
+  nl(Out);
+}
+
+void CppWriter::printConstants(const Module* M) {
+  // Traverse all the global variables looking for constant initializers
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I)
+    if (I->hasInitializer())
+      printConstant(I->getInitializer());
+
+  // Traverse the LLVM functions looking for constants
+  for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    // Add all of the basic blocks and instructions
+    for (Function::const_iterator BB = FI->begin(),
+           E = FI->end(); BB != E; ++BB) {
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+           ++I) {
+        for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+          if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+            printConstant(C);
+          }
+        }
+      }
+    }
+  }
+}
+
+void CppWriter::printVariableUses(const GlobalVariable *GV) {
+  nl(Out) << "// Type Definitions";
+  nl(Out);
+  printType(GV->getType());
+  if (GV->hasInitializer()) {
+    const Constant *Init = GV->getInitializer();
+    printType(Init->getType());
+    if (const Function *F = dyn_cast<Function>(Init)) {
+      nl(Out)<< "/ Function Declarations"; nl(Out);
+      printFunctionHead(F);
+    } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+      nl(Out) << "// Global Variable Declarations"; nl(Out);
+      printVariableHead(gv);
+      
+      nl(Out) << "// Global Variable Definitions"; nl(Out);
+      printVariableBody(gv);
+    } else  {
+      nl(Out) << "// Constant Definitions"; nl(Out);
+      printConstant(Init);
+    }
+  }
+}
+
+void CppWriter::printVariableHead(const GlobalVariable *GV) {
+  nl(Out) << "GlobalVariable* " << getCppName(GV);
+  if (is_inline) {
+    Out << " = mod->getGlobalVariable(mod->getContext(), ";
+    printEscapedString(GV->getName());
+    Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+    nl(Out) << "if (!" << getCppName(GV) << ") {";
+    in(); nl(Out) << getCppName(GV);
+  }
+  Out << " = new GlobalVariable(/*Module=*/*mod, ";
+  nl(Out) << "/*Type=*/";
+  printCppName(GV->getType()->getElementType());
+  Out << ",";
+  nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+  Out << ",";
+  nl(Out) << "/*Linkage=*/";
+  printLinkageType(GV->getLinkage());
+  Out << ",";
+  nl(Out) << "/*Initializer=*/0, ";
+  if (GV->hasInitializer()) {
+    Out << "// has initializer, specified below";
+  }
+  nl(Out) << "/*Name=*/\"";
+  printEscapedString(GV->getName());
+  Out << "\");";
+  nl(Out);
+
+  if (GV->hasSection()) {
+    printCppName(GV);
+    Out << "->setSection(\"";
+    printEscapedString(GV->getSection());
+    Out << "\");";
+    nl(Out);
+  }
+  if (GV->getAlignment()) {
+    printCppName(GV);
+    Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+    nl(Out);
+  }
+  if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+    printCppName(GV);
+    Out << "->setVisibility(";
+    printVisibilityType(GV->getVisibility());
+    Out << ");";
+    nl(Out);
+  }
+  if (GV->isThreadLocal()) {
+    printCppName(GV);
+    Out << "->setThreadLocal(true);";
+    nl(Out);
+  }
+  if (is_inline) {
+    out(); Out << "}"; nl(Out);
+  }
+}
+
+void CppWriter::printVariableBody(const GlobalVariable *GV) {
+  if (GV->hasInitializer()) {
+    printCppName(GV);
+    Out << "->setInitializer(";
+    Out << getCppName(GV->getInitializer()) << ");";
+    nl(Out);
+  }
+}
+
+std::string CppWriter::getOpName(Value* V) {
+  if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+    return getCppName(V);
+
+  // See if its alread in the map of forward references, if so just return the
+  // name we already set up for it
+  ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+  if (I != ForwardRefs.end())
+    return I->second;
+
+  // This is a new forward reference. Generate a unique name for it
+  std::string result(std::string("fwdref_") + utostr(uniqueNum++));
+
+  // Yes, this is a hack. An Argument is the smallest instantiable value that
+  // we can make as a placeholder for the real value. We'll replace these
+  // Argument instances later.
+  Out << "Argument* " << result << " = new Argument("
+      << getCppName(V->getType()) << ");";
+  nl(Out);
+  ForwardRefs[V] = result;
+  return result;
+}
+
+// printInstruction - This member is called for each Instruction in a function.
+void CppWriter::printInstruction(const Instruction *I,
+                                 const std::string& bbname) {
+  std::string iName(getCppName(I));
+
+  // Before we emit this instruction, we need to take care of generating any
+  // forward references. So, we get the names of all the operands in advance
+  const unsigned Ops(I->getNumOperands());
+  std::string* opNames = new std::string[Ops];
+  for (unsigned i = 0; i < Ops; i++)
+    opNames[i] = getOpName(I->getOperand(i));
+
+  switch (I->getOpcode()) {
+  default:
+    error("Invalid instruction");
+    break;
+
+  case Instruction::Ret: {
+    const ReturnInst* ret =  cast<ReturnInst>(I);
+    Out << "ReturnInst::Create(mod->getContext(), "
+        << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+    break;
+  }
+  case Instruction::Br: {
+    const BranchInst* br = cast<BranchInst>(I);
+    Out << "BranchInst::Create(" ;
+    if (br->getNumOperands() == 3) {
+      Out << opNames[2] << ", "
+          << opNames[1] << ", "
+          << opNames[0] << ", ";
+
+    } else if (br->getNumOperands() == 1) {
+      Out << opNames[0] << ", ";
+    } else {
+      error("Branch with 2 operands?");
+    }
+    Out << bbname << ");";
+    break;
+  }
+  case Instruction::Switch: {
+    const SwitchInst *SI = cast<SwitchInst>(I);
+    Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+        << opNames[0] << ", "
+        << opNames[1] << ", "
+        << SI->getNumCases() << ", " << bbname << ");";
+    nl(Out);
+    for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
+      Out << iName << "->addCase("
+          << opNames[i] << ", "
+          << opNames[i+1] << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Instruction::IndirectBr: {
+    const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+    Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+        << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+    nl(Out);
+    for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+      Out << iName << "->addDestination(" << opNames[i] << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Instruction::Invoke: {
+    const InvokeInst* inv = cast<InvokeInst>(I);
+    Out << "std::vector<Value*> " << iName << "_params;";
+    nl(Out);
+    for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) {
+      Out << iName << "_params.push_back("
+          << getOpName(inv->getArgOperand(i)) << ");";
+      nl(Out);
+    }
+    // FIXME: This shouldn't use magic numbers -3, -2, and -1.
+    Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+        << getOpName(inv->getCalledFunction()) << ", "
+        << getOpName(inv->getNormalDest()) << ", "
+        << getOpName(inv->getUnwindDest()) << ", "
+        << iName << "_params.begin(), "
+        << iName << "_params.end(), \"";
+    printEscapedString(inv->getName());
+    Out << "\", " << bbname << ");";
+    nl(Out) << iName << "->setCallingConv(";
+    printCallingConv(inv->getCallingConv());
+    Out << ");";
+    printAttributes(inv->getAttributes(), iName);
+    Out << iName << "->setAttributes(" << iName << "_PAL);";
+    nl(Out);
+    break;
+  }
+  case Instruction::Unwind: {
+    Out << "new UnwindInst("
+        << bbname << ");";
+    break;
+  }
+  case Instruction::Unreachable: {
+    Out << "new UnreachableInst("
+        << "mod->getContext(), "
+        << bbname << ");";
+    break;
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:{
+    Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+    switch (I->getOpcode()) {
+    case Instruction::Add: Out << "Instruction::Add"; break;
+    case Instruction::FAdd: Out << "Instruction::FAdd"; break;
+    case Instruction::Sub: Out << "Instruction::Sub"; break;
+    case Instruction::FSub: Out << "Instruction::FSub"; break;
+    case Instruction::Mul: Out << "Instruction::Mul"; break;
+    case Instruction::FMul: Out << "Instruction::FMul"; break;
+    case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+    case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+    case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+    case Instruction::URem:Out << "Instruction::URem"; break;
+    case Instruction::SRem:Out << "Instruction::SRem"; break;
+    case Instruction::FRem:Out << "Instruction::FRem"; break;
+    case Instruction::And: Out << "Instruction::And"; break;
+    case Instruction::Or:  Out << "Instruction::Or";  break;
+    case Instruction::Xor: Out << "Instruction::Xor"; break;
+    case Instruction::Shl: Out << "Instruction::Shl"; break;
+    case Instruction::LShr:Out << "Instruction::LShr"; break;
+    case Instruction::AShr:Out << "Instruction::AShr"; break;
+    default: Out << "Instruction::BadOpCode"; break;
+    }
+    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+    printEscapedString(I->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::FCmp: {
+    Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
+    switch (cast<FCmpInst>(I)->getPredicate()) {
+    case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+    case FCmpInst::FCMP_OEQ  : Out << "FCmpInst::FCMP_OEQ"; break;
+    case FCmpInst::FCMP_OGT  : Out << "FCmpInst::FCMP_OGT"; break;
+    case FCmpInst::FCMP_OGE  : Out << "FCmpInst::FCMP_OGE"; break;
+    case FCmpInst::FCMP_OLT  : Out << "FCmpInst::FCMP_OLT"; break;
+    case FCmpInst::FCMP_OLE  : Out << "FCmpInst::FCMP_OLE"; break;
+    case FCmpInst::FCMP_ONE  : Out << "FCmpInst::FCMP_ONE"; break;
+    case FCmpInst::FCMP_ORD  : Out << "FCmpInst::FCMP_ORD"; break;
+    case FCmpInst::FCMP_UNO  : Out << "FCmpInst::FCMP_UNO"; break;
+    case FCmpInst::FCMP_UEQ  : Out << "FCmpInst::FCMP_UEQ"; break;
+    case FCmpInst::FCMP_UGT  : Out << "FCmpInst::FCMP_UGT"; break;
+    case FCmpInst::FCMP_UGE  : Out << "FCmpInst::FCMP_UGE"; break;
+    case FCmpInst::FCMP_ULT  : Out << "FCmpInst::FCMP_ULT"; break;
+    case FCmpInst::FCMP_ULE  : Out << "FCmpInst::FCMP_ULE"; break;
+    case FCmpInst::FCMP_UNE  : Out << "FCmpInst::FCMP_UNE"; break;
+    case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+    default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+    }
+    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+    printEscapedString(I->getName());
+    Out << "\");";
+    break;
+  }
+  case Instruction::ICmp: {
+    Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+    switch (cast<ICmpInst>(I)->getPredicate()) {
+    case ICmpInst::ICMP_EQ:  Out << "ICmpInst::ICMP_EQ";  break;
+    case ICmpInst::ICMP_NE:  Out << "ICmpInst::ICMP_NE";  break;
+    case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+    case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+    case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+    case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+    case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+    case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+    case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+    case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+    default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+    }
+    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+    printEscapedString(I->getName());
+    Out << "\");";
+    break;
+  }
+  case Instruction::Alloca: {
+    const AllocaInst* allocaI = cast<AllocaInst>(I);
+    Out << "AllocaInst* " << iName << " = new AllocaInst("
+        << getCppName(allocaI->getAllocatedType()) << ", ";
+    if (allocaI->isArrayAllocation())
+      Out << opNames[0] << ", ";
+    Out << "\"";
+    printEscapedString(allocaI->getName());
+    Out << "\", " << bbname << ");";
+    if (allocaI->getAlignment())
+      nl(Out) << iName << "->setAlignment("
+          << allocaI->getAlignment() << ");";
+    break;
+  }
+  case Instruction::Load: {
+    const LoadInst* load = cast<LoadInst>(I);
+    Out << "LoadInst* " << iName << " = new LoadInst("
+        << opNames[0] << ", \"";
+    printEscapedString(load->getName());
+    Out << "\", " << (load->isVolatile() ? "true" : "false" )
+        << ", " << bbname << ");";
+    break;
+  }
+  case Instruction::Store: {
+    const StoreInst* store = cast<StoreInst>(I);
+    Out << " new StoreInst("
+        << opNames[0] << ", "
+        << opNames[1] << ", "
+        << (store->isVolatile() ? "true" : "false")
+        << ", " << bbname << ");";
+    break;
+  }
+  case Instruction::GetElementPtr: {
+    const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+    if (gep->getNumOperands() <= 2) {
+      Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+          << opNames[0];
+      if (gep->getNumOperands() == 2)
+        Out << ", " << opNames[1];
+    } else {
+      Out << "std::vector<Value*> " << iName << "_indices;";
+      nl(Out);
+      for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+        Out << iName << "_indices.push_back("
+            << opNames[i] << ");";
+        nl(Out);
+      }
+      Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+          << opNames[0] << ", " << iName << "_indices.begin(), "
+          << iName << "_indices.end()";
+    }
+    Out << ", \"";
+    printEscapedString(gep->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::PHI: {
+    const PHINode* phi = cast<PHINode>(I);
+
+    Out << "PHINode* " << iName << " = PHINode::Create("
+        << getCppName(phi->getType()) << ", "
+        << phi->getNumIncomingValues() << ", \"";
+    printEscapedString(phi->getName());
+    Out << "\", " << bbname << ");";
+    nl(Out);
+    for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
+      Out << iName << "->addIncoming("
+          << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+          << getOpName(phi->getIncomingBlock(i)) << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast: {
+    const CastInst* cst = cast<CastInst>(I);
+    Out << "CastInst* " << iName << " = new ";
+    switch (I->getOpcode()) {
+    case Instruction::Trunc:    Out << "TruncInst"; break;
+    case Instruction::ZExt:     Out << "ZExtInst"; break;
+    case Instruction::SExt:     Out << "SExtInst"; break;
+    case Instruction::FPTrunc:  Out << "FPTruncInst"; break;
+    case Instruction::FPExt:    Out << "FPExtInst"; break;
+    case Instruction::FPToUI:   Out << "FPToUIInst"; break;
+    case Instruction::FPToSI:   Out << "FPToSIInst"; break;
+    case Instruction::UIToFP:   Out << "UIToFPInst"; break;
+    case Instruction::SIToFP:   Out << "SIToFPInst"; break;
+    case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+    case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+    case Instruction::BitCast:  Out << "BitCastInst"; break;
+    default: assert(!"Unreachable"); break;
+    }
+    Out << "(" << opNames[0] << ", "
+        << getCppName(cst->getType()) << ", \"";
+    printEscapedString(cst->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::Call: {
+    const CallInst* call = cast<CallInst>(I);
+    if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+      Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+          << getCppName(ila->getFunctionType()) << ", \""
+          << ila->getAsmString() << "\", \""
+          << ila->getConstraintString() << "\","
+          << (ila->hasSideEffects() ? "true" : "false") << ");";
+      nl(Out);
+    }
+    if (call->getNumArgOperands() > 1) {
+      Out << "std::vector<Value*> " << iName << "_params;";
+      nl(Out);
+      for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
+        Out << iName << "_params.push_back(" << opNames[i] << ");";
+        nl(Out);
+      }
+      Out << "CallInst* " << iName << " = CallInst::Create("
+          << opNames[call->getNumArgOperands()] << ", "
+          << iName << "_params.begin(), "
+          << iName << "_params.end(), \"";
+    } else if (call->getNumArgOperands() == 1) {
+      Out << "CallInst* " << iName << " = CallInst::Create("
+          << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
+    } else {
+      Out << "CallInst* " << iName << " = CallInst::Create("
+          << opNames[call->getNumArgOperands()] << ", \"";
+    }
+    printEscapedString(call->getName());
+    Out << "\", " << bbname << ");";
+    nl(Out) << iName << "->setCallingConv(";
+    printCallingConv(call->getCallingConv());
+    Out << ");";
+    nl(Out) << iName << "->setTailCall("
+        << (call->isTailCall() ? "true" : "false");
+    Out << ");";
+    nl(Out);
+    printAttributes(call->getAttributes(), iName);
+    Out << iName << "->setAttributes(" << iName << "_PAL);";
+    nl(Out);
+    break;
+  }
+  case Instruction::Select: {
+    const SelectInst* sel = cast<SelectInst>(I);
+    Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+    Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+    printEscapedString(sel->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::UserOp1:
+    /// FALL THROUGH
+  case Instruction::UserOp2: {
+    /// FIXME: What should be done here?
+    break;
+  }
+  case Instruction::VAArg: {
+    const VAArgInst* va = cast<VAArgInst>(I);
+    Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+        << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+    printEscapedString(va->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::ExtractElement: {
+    const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+    Out << "ExtractElementInst* " << getCppName(eei)
+        << " = new ExtractElementInst(" << opNames[0]
+        << ", " << opNames[1] << ", \"";
+    printEscapedString(eei->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::InsertElement: {
+    const InsertElementInst* iei = cast<InsertElementInst>(I);
+    Out << "InsertElementInst* " << getCppName(iei)
+        << " = InsertElementInst::Create(" << opNames[0]
+        << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+    printEscapedString(iei->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::ShuffleVector: {
+    const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+    Out << "ShuffleVectorInst* " << getCppName(svi)
+        << " = new ShuffleVectorInst(" << opNames[0]
+        << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+    printEscapedString(svi->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::ExtractValue: {
+    const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+    Out << "std::vector<unsigned> " << iName << "_indices;";
+    nl(Out);
+    for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+      Out << iName << "_indices.push_back("
+          << evi->idx_begin()[i] << ");";
+      nl(Out);
+    }
+    Out << "ExtractValueInst* " << getCppName(evi)
+        << " = ExtractValueInst::Create(" << opNames[0]
+        << ", "
+        << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+    printEscapedString(evi->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::InsertValue: {
+    const InsertValueInst *ivi = cast<InsertValueInst>(I);
+    Out << "std::vector<unsigned> " << iName << "_indices;";
+    nl(Out);
+    for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+      Out << iName << "_indices.push_back("
+          << ivi->idx_begin()[i] << ");";
+      nl(Out);
+    }
+    Out << "InsertValueInst* " << getCppName(ivi)
+        << " = InsertValueInst::Create(" << opNames[0]
+        << ", " << opNames[1] << ", "
+        << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+    printEscapedString(ivi->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  }
+  DefinedValues.insert(I);
+  nl(Out);
+  delete [] opNames;
+}
+
+// Print out the types, constants and declarations needed by one function
+void CppWriter::printFunctionUses(const Function* F) {
+  nl(Out) << "// Type Definitions"; nl(Out);
+  if (!is_inline) {
+    // Print the function's return type
+    printType(F->getReturnType());
+
+    // Print the function's function type
+    printType(F->getFunctionType());
+
+    // Print the types of each of the function's arguments
+    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+         AI != AE; ++AI) {
+      printType(AI->getType());
+    }
+  }
+
+  // Print type definitions for every type referenced by an instruction and
+  // make a note of any global values or constants that are referenced
+  SmallPtrSet<GlobalValue*,64> gvs;
+  SmallPtrSet<Constant*,64> consts;
+  for (Function::const_iterator BB = F->begin(), BE = F->end();
+       BB != BE; ++BB){
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Print the type of the instruction itself
+      printType(I->getType());
+
+      // Print the type of each of the instruction's operands
+      for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+        Value* operand = I->getOperand(i);
+        printType(operand->getType());
+
+        // If the operand references a GVal or Constant, make a note of it
+        if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+          gvs.insert(GV);
+          if (GenerationType != GenFunction)
+            if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+              if (GVar->hasInitializer())
+                consts.insert(GVar->getInitializer());
+        } else if (Constant* C = dyn_cast<Constant>(operand)) {
+          consts.insert(C);
+          for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+            // If the operand references a GVal or Constant, make a note of it
+            Value* operand = C->getOperand(j);
+            printType(operand->getType());
+            if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+              gvs.insert(GV);
+              if (GenerationType != GenFunction)
+                if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+                  if (GVar->hasInitializer())
+                    consts.insert(GVar->getInitializer());
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Print the function declarations for any functions encountered
+  nl(Out) << "// Function Declarations"; nl(Out);
+  for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+       I != E; ++I) {
+    if (Function* Fun = dyn_cast<Function>(*I)) {
+      if (!is_inline || Fun != F)
+        printFunctionHead(Fun);
+    }
+  }
+
+  // Print the global variable declarations for any variables encountered
+  nl(Out) << "// Global Variable Declarations"; nl(Out);
+  for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+       I != E; ++I) {
+    if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+      printVariableHead(F);
+  }
+
+  // Print the constants found
+  nl(Out) << "// Constant Definitions"; nl(Out);
+  for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+         E = consts.end(); I != E; ++I) {
+    printConstant(*I);
+  }
+
+  // Process the global variables definitions now that all the constants have
+  // been emitted. These definitions just couple the gvars with their constant
+  // initializers.
+  if (GenerationType != GenFunction) {
+    nl(Out) << "// Global Variable Definitions"; nl(Out);
+    for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+         I != E; ++I) {
+      if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+        printVariableBody(GV);
+    }
+  }
+}
+
+void CppWriter::printFunctionHead(const Function* F) {
+  nl(Out) << "Function* " << getCppName(F);
+  if (is_inline) {
+    Out << " = mod->getFunction(\"";
+    printEscapedString(F->getName());
+    Out << "\", " << getCppName(F->getFunctionType()) << ");";
+    nl(Out) << "if (!" << getCppName(F) << ") {";
+    nl(Out) << getCppName(F);
+  }
+  Out<< " = Function::Create(";
+  nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+  nl(Out) << "/*Linkage=*/";
+  printLinkageType(F->getLinkage());
+  Out << ",";
+  nl(Out) << "/*Name=*/\"";
+  printEscapedString(F->getName());
+  Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+  nl(Out,-1);
+  printCppName(F);
+  Out << "->setCallingConv(";
+  printCallingConv(F->getCallingConv());
+  Out << ");";
+  nl(Out);
+  if (F->hasSection()) {
+    printCppName(F);
+    Out << "->setSection(\"" << F->getSection() << "\");";
+    nl(Out);
+  }
+  if (F->getAlignment()) {
+    printCppName(F);
+    Out << "->setAlignment(" << F->getAlignment() << ");";
+    nl(Out);
+  }
+  if (F->getVisibility() != GlobalValue::DefaultVisibility) {
+    printCppName(F);
+    Out << "->setVisibility(";
+    printVisibilityType(F->getVisibility());
+    Out << ");";
+    nl(Out);
+  }
+  if (F->hasGC()) {
+    printCppName(F);
+    Out << "->setGC(\"" << F->getGC() << "\");";
+    nl(Out);
+  }
+  if (is_inline) {
+    Out << "}";
+    nl(Out);
+  }
+  printAttributes(F->getAttributes(), getCppName(F));
+  printCppName(F);
+  Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+  nl(Out);
+}
+
+void CppWriter::printFunctionBody(const Function *F) {
+  if (F->isDeclaration())
+    return; // external functions have no bodies.
+
+  // Clear the DefinedValues and ForwardRefs maps because we can't have
+  // cross-function forward refs
+  ForwardRefs.clear();
+  DefinedValues.clear();
+
+  // Create all the argument values
+  if (!is_inline) {
+    if (!F->arg_empty()) {
+      Out << "Function::arg_iterator args = " << getCppName(F)
+          << "->arg_begin();";
+      nl(Out);
+    }
+    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+         AI != AE; ++AI) {
+      Out << "Value* " << getCppName(AI) << " = args++;";
+      nl(Out);
+      if (AI->hasName()) {
+        Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+        nl(Out);
+      }
+    }
+  }
+
+  // Create all the basic blocks
+  nl(Out);
+  for (Function::const_iterator BI = F->begin(), BE = F->end();
+       BI != BE; ++BI) {
+    std::string bbname(getCppName(BI));
+    Out << "BasicBlock* " << bbname <<
+           " = BasicBlock::Create(mod->getContext(), \"";
+    if (BI->hasName())
+      printEscapedString(BI->getName());
+    Out << "\"," << getCppName(BI->getParent()) << ",0);";
+    nl(Out);
+  }
+
+  // Output all of its basic blocks... for the function
+  for (Function::const_iterator BI = F->begin(), BE = F->end();
+       BI != BE; ++BI) {
+    std::string bbname(getCppName(BI));
+    nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+    nl(Out);
+
+    // Output all of the instructions in the basic block...
+    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+         I != E; ++I) {
+      printInstruction(I,bbname);
+    }
+  }
+
+  // Loop over the ForwardRefs and resolve them now that all instructions
+  // are generated.
+  if (!ForwardRefs.empty()) {
+    nl(Out) << "// Resolve Forward References";
+    nl(Out);
+  }
+
+  while (!ForwardRefs.empty()) {
+    ForwardRefMap::iterator I = ForwardRefs.begin();
+    Out << I->second << "->replaceAllUsesWith("
+        << getCppName(I->first) << "); delete " << I->second << ";";
+    nl(Out);
+    ForwardRefs.erase(I);
+  }
+}
+
+void CppWriter::printInline(const std::string& fname,
+                            const std::string& func) {
+  const Function* F = TheModule->getFunction(func);
+  if (!F) {
+    error(std::string("Function '") + func + "' not found in input module");
+    return;
+  }
+  if (F->isDeclaration()) {
+    error(std::string("Function '") + func + "' is external!");
+    return;
+  }
+  nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+          << getCppName(F);
+  unsigned arg_count = 1;
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI) {
+    Out << ", Value* arg_" << arg_count;
+  }
+  Out << ") {";
+  nl(Out);
+  is_inline = true;
+  printFunctionUses(F);
+  printFunctionBody(F);
+  is_inline = false;
+  Out << "return " << getCppName(F->begin()) << ";";
+  nl(Out) << "}";
+  nl(Out);
+}
+
+void CppWriter::printModuleBody() {
+  // Print out all the type definitions
+  nl(Out) << "// Type Definitions"; nl(Out);
+  printTypes(TheModule);
+
+  // Functions can call each other and global variables can reference them so
+  // define all the functions first before emitting their function bodies.
+  nl(Out) << "// Function Declarations"; nl(Out);
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I)
+    printFunctionHead(I);
+
+  // Process the global variables declarations. We can't initialze them until
+  // after the constants are printed so just print a header for each global
+  nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    printVariableHead(I);
+  }
+
+  // Print out all the constants definitions. Constants don't recurse except
+  // through GlobalValues. All GlobalValues have been declared at this point
+  // so we can proceed to generate the constants.
+  nl(Out) << "// Constant Definitions"; nl(Out);
+  printConstants(TheModule);
+
+  // Process the global variables definitions now that all the constants have
+  // been emitted. These definitions just couple the gvars with their constant
+  // initializers.
+  nl(Out) << "// Global Variable Definitions"; nl(Out);
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    printVariableBody(I);
+  }
+
+  // Finally, we can safely put out all of the function bodies.
+  nl(Out) << "// Function Definitions"; nl(Out);
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I) {
+    if (!I->isDeclaration()) {
+      nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+              << ")";
+      nl(Out) << "{";
+      nl(Out,1);
+      printFunctionBody(I);
+      nl(Out,-1) << "}";
+      nl(Out);
+    }
+  }
+}
+
+void CppWriter::printProgram(const std::string& fname,
+                             const std::string& mName) {
+  Out << "#include <llvm/LLVMContext.h>\n";
+  Out << "#include <llvm/Module.h>\n";
+  Out << "#include <llvm/DerivedTypes.h>\n";
+  Out << "#include <llvm/Constants.h>\n";
+  Out << "#include <llvm/GlobalVariable.h>\n";
+  Out << "#include <llvm/Function.h>\n";
+  Out << "#include <llvm/CallingConv.h>\n";
+  Out << "#include <llvm/BasicBlock.h>\n";
+  Out << "#include <llvm/Instructions.h>\n";
+  Out << "#include <llvm/InlineAsm.h>\n";
+  Out << "#include <llvm/Support/FormattedStream.h>\n";
+  Out << "#include <llvm/Support/MathExtras.h>\n";
+  Out << "#include <llvm/Pass.h>\n";
+  Out << "#include <llvm/PassManager.h>\n";
+  Out << "#include <llvm/ADT/SmallVector.h>\n";
+  Out << "#include <llvm/Analysis/Verifier.h>\n";
+  Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+  Out << "#include <algorithm>\n";
+  Out << "using namespace llvm;\n\n";
+  Out << "Module* " << fname << "();\n\n";
+  Out << "int main(int argc, char**argv) {\n";
+  Out << "  Module* Mod = " << fname << "();\n";
+  Out << "  verifyModule(*Mod, PrintMessageAction);\n";
+  Out << "  PassManager PM;\n";
+  Out << "  PM.add(createPrintModulePass(&outs()));\n";
+  Out << "  PM.run(*Mod);\n";
+  Out << "  return 0;\n";
+  Out << "}\n\n";
+  printModule(fname,mName);
+}
+
+void CppWriter::printModule(const std::string& fname,
+                            const std::string& mName) {
+  nl(Out) << "Module* " << fname << "() {";
+  nl(Out,1) << "// Module Construction";
+  nl(Out) << "Module* mod = new Module(\"";
+  printEscapedString(mName);
+  Out << "\", getGlobalContext());";
+  if (!TheModule->getTargetTriple().empty()) {
+    nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+  }
+  if (!TheModule->getTargetTriple().empty()) {
+    nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+            << "\");";
+  }
+
+  if (!TheModule->getModuleInlineAsm().empty()) {
+    nl(Out) << "mod->setModuleInlineAsm(\"";
+    printEscapedString(TheModule->getModuleInlineAsm());
+    Out << "\");";
+  }
+  nl(Out);
+
+  // Loop over the dependent libraries and emit them.
+  Module::lib_iterator LI = TheModule->lib_begin();
+  Module::lib_iterator LE = TheModule->lib_end();
+  while (LI != LE) {
+    Out << "mod->addLibrary(\"" << *LI << "\");";
+    nl(Out);
+    ++LI;
+  }
+  printModuleBody();
+  nl(Out) << "return mod;";
+  nl(Out,-1) << "}";
+  nl(Out);
+}
+
+void CppWriter::printContents(const std::string& fname,
+                              const std::string& mName) {
+  Out << "\nModule* " << fname << "(Module *mod) {\n";
+  Out << "\nmod->setModuleIdentifier(\"";
+  printEscapedString(mName);
+  Out << "\");\n";
+  printModuleBody();
+  Out << "\nreturn mod;\n";
+  Out << "\n}\n";
+}
+
+void CppWriter::printFunction(const std::string& fname,
+                              const std::string& funcName) {
+  const Function* F = TheModule->getFunction(funcName);
+  if (!F) {
+    error(std::string("Function '") + funcName + "' not found in input module");
+    return;
+  }
+  Out << "\nFunction* " << fname << "(Module *mod) {\n";
+  printFunctionUses(F);
+  printFunctionHead(F);
+  printFunctionBody(F);
+  Out << "return " << getCppName(F) << ";\n";
+  Out << "}\n";
+}
+
+void CppWriter::printFunctions() {
+  const Module::FunctionListType &funcs = TheModule->getFunctionList();
+  Module::const_iterator I  = funcs.begin();
+  Module::const_iterator IE = funcs.end();
+
+  for (; I != IE; ++I) {
+    const Function &func = *I;
+    if (!func.isDeclaration()) {
+      std::string name("define_");
+      name += func.getName();
+      printFunction(name, func.getName());
+    }
+  }
+}
+
+void CppWriter::printVariable(const std::string& fname,
+                              const std::string& varName) {
+  const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+
+  if (!GV) {
+    error(std::string("Variable '") + varName + "' not found in input module");
+    return;
+  }
+  Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+  printVariableUses(GV);
+  printVariableHead(GV);
+  printVariableBody(GV);
+  Out << "return " << getCppName(GV) << ";\n";
+  Out << "}\n";
+}
+
+void CppWriter::printType(const std::string &fname,
+                          const std::string &typeName) {
+  const Type* Ty = TheModule->getTypeByName(typeName);
+  if (!Ty) {
+    error(std::string("Type '") + typeName + "' not found in input module");
+    return;
+  }
+  Out << "\nType* " << fname << "(Module *mod) {\n";
+  printType(Ty);
+  Out << "return " << getCppName(Ty) << ";\n";
+  Out << "}\n";
+}
+
+bool CppWriter::runOnModule(Module &M) {
+  TheModule = &M;
+
+  // Emit a header
+  Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+  // Get the name of the function we're supposed to generate
+  std::string fname = FuncName.getValue();
+
+  // Get the name of the thing we are to generate
+  std::string tgtname = NameToGenerate.getValue();
+  if (GenerationType == GenModule ||
+      GenerationType == GenContents ||
+      GenerationType == GenProgram ||
+      GenerationType == GenFunctions) {
+    if (tgtname == "!bad!") {
+      if (M.getModuleIdentifier() == "-")
+        tgtname = "<stdin>";
+      else
+        tgtname = M.getModuleIdentifier();
+    }
+  } else if (tgtname == "!bad!")
+    error("You must use the -for option with -gen-{function,variable,type}");
+
+  switch (WhatToGenerate(GenerationType)) {
+   case GenProgram:
+    if (fname.empty())
+      fname = "makeLLVMModule";
+    printProgram(fname,tgtname);
+    break;
+   case GenModule:
+    if (fname.empty())
+      fname = "makeLLVMModule";
+    printModule(fname,tgtname);
+    break;
+   case GenContents:
+    if (fname.empty())
+      fname = "makeLLVMModuleContents";
+    printContents(fname,tgtname);
+    break;
+   case GenFunction:
+    if (fname.empty())
+      fname = "makeLLVMFunction";
+    printFunction(fname,tgtname);
+    break;
+   case GenFunctions:
+    printFunctions();
+    break;
+   case GenInline:
+    if (fname.empty())
+      fname = "makeLLVMInline";
+    printInline(fname,tgtname);
+    break;
+   case GenVariable:
+    if (fname.empty())
+      fname = "makeLLVMVariable";
+    printVariable(fname,tgtname);
+    break;
+   case GenType:
+    if (fname.empty())
+      fname = "makeLLVMType";
+    printType(fname,tgtname);
+    break;
+   default:
+    error("Invalid generation option");
+  }
+
+  return false;
+}
+
+char CppWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                       External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                           formatted_raw_ostream &o,
+                                           CodeGenFileType FileType,
+                                           CodeGenOpt::Level OptLevel,
+                                           bool DisableVerify) {
+  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+  PM.add(new CppWriter(o));
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
new file mode 100644
index 000000000000..7322e3e34f00
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
@@ -0,0 +1,43 @@
+//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C++ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CPPTARGETMACHINE_H
+#define CPPTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+
+struct CPPTargetMachine : public TargetMachine {
+  CPPTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &CPU, const std::string &FS)
+    : TargetMachine(T, TT, CPU, FS) {}
+
+  virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                   formatted_raw_ostream &Out,
+                                   CodeGenFileType FileType,
+                                   CodeGenOpt::Level OptLevel,
+                                   bool DisableVerify);
+
+  virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCppBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
new file mode 100644
index 000000000000..d0aeb12499c5
--- /dev/null
+++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- CppBackendTargetInfo.cpp - CppBackend Target Implementation -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCppBackendTarget;
+
+static unsigned CppBackend_TripleMatchQuality(const std::string &TT) {
+  // This class always works, but shouldn't be the default in most cases.
+  return 1;
+}
+
+extern "C" void LLVMInitializeCppBackendTargetInfo() { 
+  TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",    
+                                  "C++ backend",
+                                  &CppBackend_TripleMatchQuality);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..87e7cb5da561
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmParser
+  MBlazeAsmLexer.cpp
+  MBlazeAsmParser.cpp
+  )
+
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
new file mode 100644
index 000000000000..15965964452a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -0,0 +1,128 @@
+//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+  
+  class MBlazeBaseAsmLexer : public TargetAsmLexer {
+    const MCAsmInfo &AsmInfo;
+    
+    const AsmToken &lexDefinite() {
+      return getLexer()->Lex();
+    }
+    
+    AsmToken LexTokenUAL();
+  protected:
+    typedef std::map <std::string, unsigned> rmap_ty;
+    
+    rmap_ty RegisterMap;
+    
+    void InitRegisterMap(const TargetRegisterInfo *info) {
+      unsigned numRegs = info->getNumRegs();
+
+      for (unsigned i = 0; i < numRegs; ++i) {
+        const char *regName = info->getName(i);
+        if (regName)
+          RegisterMap[regName] = i;
+      }
+    }
+    
+    unsigned MatchRegisterName(StringRef Name) {
+      rmap_ty::iterator iter = RegisterMap.find(Name.str());
+      if (iter != RegisterMap.end())
+        return iter->second;
+      else
+        return 0;
+    }
+    
+    AsmToken LexToken() {
+      if (!Lexer) {
+        SetError(SMLoc(), "No MCAsmLexer installed");
+        return AsmToken(AsmToken::Error, "", 0);
+      }
+      
+      switch (AsmInfo.getAssemblerDialect()) {
+      default:
+        SetError(SMLoc(), "Unhandled dialect");
+        return AsmToken(AsmToken::Error, "", 0);
+      case 0:
+        return LexTokenUAL();
+      }
+    }
+  public:
+    MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : TargetAsmLexer(T), AsmInfo(MAI) {
+    }
+  };
+  
+  class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
+  public:
+    MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : MBlazeBaseAsmLexer(T, MAI) {
+      std::string tripleString("mblaze-unknown-unknown");
+      std::string featureString;
+      std::string CPU;
+      OwningPtr<const TargetMachine> 
+        targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+}
+
+AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Identifier:
+  {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID) {
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    } else {
+      return AsmToken(lexedToken);
+    }
+  }
+  }
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer() {
+  RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
new file mode 100644
index 000000000000..eebd9d878943
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -0,0 +1,567 @@
+//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeISelLowering.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+namespace {
+struct MBlazeOperand;
+
+class MBlazeAsmParser : public TargetAsmParser {
+  MCAsmParser &Parser;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  MBlazeOperand *ParseRegister(unsigned &RegNo);
+  MBlazeOperand *ParseImmediate();
+  MBlazeOperand *ParseFsl();
+  MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MBlazeGenAsmMatcher.inc"
+
+  /// }
+
+
+public:
+  MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : TargetAsmParser(), Parser(_Parser) {}
+
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine
+/// instruction.
+struct MBlazeOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Immediate,
+    Register,
+    Memory,
+    Fsl
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  union {
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNum;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    struct {
+      unsigned Base;
+      unsigned OffReg;
+      const MCExpr *Off;
+    } Mem;
+
+    struct {
+      const MCExpr *Val;
+    } FslImm;
+  };
+
+  MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case Register:
+      Reg = o.Reg;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Token:
+      Tok = o.Tok;
+      break;
+    case Memory:
+      Mem = o.Mem;
+      break;
+    case Fsl:
+      FslImm = o.FslImm;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getFslImm() const {
+    assert(Kind == Fsl && "Invalid access!");
+    return FslImm.Val;
+  }
+
+  unsigned getMemBase() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Base;
+  }
+
+  const MCExpr* getMemOff() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Off;
+  }
+
+  unsigned getMemOffReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.OffReg;
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate; }
+  bool isMem() const { return Kind == Memory; }
+  bool isFsl() const { return Kind == Fsl; }
+  bool isReg() const { return Kind == Register; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addFslOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getFslImm());
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+    unsigned RegOff = getMemOffReg();
+    if (RegOff)
+      Inst.addOperand(MCOperand::CreateReg(RegOff));
+    else
+      addExpr(Inst, getMemOff());
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
+    MBlazeOperand *Op = new MBlazeOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Fsl);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S,
+                                  SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Memory);
+    Op->Mem.Base = Base;
+    Op->Mem.Off = Off;
+    Op->Mem.OffReg = 0;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S,
+                                  SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Memory);
+    Op->Mem.Base = Base;
+    Op->Mem.OffReg = Off;
+    Op->Mem.Off = 0;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void MBlazeOperand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case Immediate:
+    getImm()->print(OS);
+    break;
+  case Register:
+    OS << "<register R";
+    OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">";
+    break;
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  case Memory: {
+    OS << "<memory R";
+    OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase());
+    OS << ", ";
+
+    unsigned RegOff = getMemOffReg();
+    if (RegOff)
+      OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff);
+    else
+      OS << getMemOff();
+    OS << ">";
+    }
+    break;
+  case Fsl:
+    getFslImm()->print(OS);
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+//
+bool MBlazeAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  MCInst Inst;
+  SMLoc ErrorLoc;
+  unsigned ErrorInfo;
+
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "instruction use requires an option to be enabled");
+  case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  case Match_InvalidOperand:
+    ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  if (Operands.size() != 4)
+    return 0;
+
+  MBlazeOperand &Base = *(MBlazeOperand*)Operands[2];
+  MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3];
+
+  SMLoc S = Base.getStartLoc();
+  SMLoc O = Offset.getStartLoc();
+  SMLoc E = Offset.getEndLoc();
+
+  if (!Base.isReg()) {
+    Error(S, "base address must be a register");
+    return 0;
+  }
+
+  if (!Offset.isReg() && !Offset.isImm()) {
+    Error(O, "offset must be a register or immediate");
+    return 0;
+  }
+
+  MBlazeOperand *Op;
+  if (Offset.isReg())
+    Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E);
+  else
+    Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E);
+
+  delete Operands.pop_back_val();
+  delete Operands.pop_back_val();
+  Operands.push_back(Op);
+
+  return Op;
+}
+
+bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
+                                    SMLoc &StartLoc, SMLoc &EndLoc) {
+  return (ParseRegister(RegNo) == 0);
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::Identifier:
+    RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+    if (RegNo == 0)
+      return 0;
+
+    getLexer().Lex();
+    return MBlazeOperand::CreateReg(RegNo, S, E);
+  }
+}
+
+static unsigned MatchFslRegister(StringRef String) {
+  if (!String.startswith("rfsl"))
+    return -1;
+
+  unsigned regNum;
+  if (String.substr(4).getAsInteger(10,regNum))
+    return -1;
+
+  return regNum;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseFsl() {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::Identifier:
+    unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier());
+    if (reg >= 16)
+      return 0;
+
+    getLexer().Lex();
+    const MCExpr *EVal = MCConstantExpr::Create(reg,getContext());
+    return MBlazeOperand::CreateFslImm(EVal,S,E);
+  }
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  const MCExpr *EVal;
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::LParen:
+  case AsmToken::Plus:
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Identifier:
+    if (getParser().ParseExpression(EVal))
+      return 0;
+
+    return MBlazeOperand::CreateImm(EVal, S, E);
+  }
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  MBlazeOperand *Op;
+
+  // Attempt to parse the next token as a register name
+  unsigned RegNo;
+  Op = ParseRegister(RegNo);
+
+  // Attempt to parse the next token as an FSL immediate
+  if (!Op)
+    Op = ParseFsl();
+
+  // Attempt to parse the next token as an immediate
+  if (!Op)
+    Op = ParseImmediate();
+
+  // If the token could not be parsed then fail
+  if (!Op) {
+    Error(Parser.getTok().getLoc(), "unknown operand");
+    return 0;
+  }
+
+  // Push the parsed operand into the list of operands
+  Operands.push_back(Op);
+  return Op;
+}
+
+/// Parse an mblaze instruction mnemonic followed by its operands.
+bool MBlazeAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // The first operands is the token for the instruction name
+  size_t dotLoc = Name.find('.');
+  Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc));
+  if (dotLoc < Name.size())
+    Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc));
+
+  // If there are no more operands then finish
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return false;
+
+  // Parse the first operand
+  if (!ParseOperand(Operands))
+    return true;
+
+  while (getLexer().isNot(AsmToken::EndOfStatement) &&
+         getLexer().is(AsmToken::Comma)) {
+    // Consume the comma token
+    getLexer().Lex();
+
+    // Parse the next operand
+    if (!ParseOperand(Operands))
+      return true;
+  }
+
+  // If the instruction requires a memory operand then we need to
+  // replace the last two operands (base+offset) with a single
+  // memory operand.
+  if (Name.startswith("lw") || Name.startswith("sw") ||
+      Name.startswith("lh") || Name.startswith("sh") ||
+      Name.startswith("lb") || Name.startswith("sb"))
+    return (ParseMemory(Operands) == NULL);
+
+  return false;
+}
+
+/// ParseDirective parses the arm specific directives
+bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmParser() {
+  RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+  LLVMInitializeMBlazeAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MBlazeGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
new file mode 100644
index 000000000000..611a0f473f73
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmParser
+
+# Hack: we need to include 'main' MBlaze target directory for private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..9376e68a35cf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeDisassembler
+  MBlazeDisassembler.cpp
+  )
+
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE MBlazeDisassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+
+add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
new file mode 100644
index 000000000000..88d80a12eb3a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -0,0 +1,707 @@
+//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It contains code to translate
+// the data produced by the decoder into MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeDisassembler.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+// #include "MBlazeGenDecoderTables.inc"
+// #include "MBlazeGenRegisterNames.inc"
+#include "MBlazeGenEDInfo.inc"
+
+namespace llvm {
+extern MCInstrDesc MBlazeInsts[];
+}
+
+using namespace llvm;
+
+const unsigned UNSUPPORTED = -1;
+
+static unsigned mblazeBinary2Opcode[] = {
+  MBlaze::ADD,   MBlaze::RSUB,   MBlaze::ADDC,   MBlaze::RSUBC,   //00,01,02,03
+  MBlaze::ADDK,  MBlaze::RSUBK,  MBlaze::ADDKC,  MBlaze::RSUBKC,  //04,05,06,07
+  MBlaze::ADDI,  MBlaze::RSUBI,  MBlaze::ADDIC,  MBlaze::RSUBIC,  //08,09,0A,0B
+  MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F
+
+  MBlaze::MUL,   MBlaze::BSRL,   MBlaze::IDIV,   MBlaze::GETD,    //10,11,12,13
+  UNSUPPORTED,   UNSUPPORTED,    MBlaze::FADD,   UNSUPPORTED,     //14,15,16,17
+  MBlaze::MULI,  MBlaze::BSRLI,  UNSUPPORTED,    MBlaze::GET,     //18,19,1A,1B
+  UNSUPPORTED,   UNSUPPORTED,    UNSUPPORTED,    UNSUPPORTED,     //1C,1D,1E,1F
+
+  MBlaze::OR,    MBlaze::AND,    MBlaze::XOR,    MBlaze::ANDN,    //20,21,22,23
+  MBlaze::SEXT8, MBlaze::MFS,    MBlaze::BR,     MBlaze::BEQ,     //24,25,26,27
+  MBlaze::ORI,   MBlaze::ANDI,   MBlaze::XORI,   MBlaze::ANDNI,   //28,29,2A,2B
+  MBlaze::IMM,   MBlaze::RTSD,   MBlaze::BRI,    MBlaze::BEQI,    //2C,2D,2E,2F
+
+  MBlaze::LBU,   MBlaze::LHU,    MBlaze::LW,     UNSUPPORTED,     //30,31,32,33
+  MBlaze::SB,    MBlaze::SH,     MBlaze::SW,     UNSUPPORTED,     //34,35,36,37
+  MBlaze::LBUI,  MBlaze::LHUI,   MBlaze::LWI,    UNSUPPORTED,     //38,39,3A,3B
+  MBlaze::SBI,   MBlaze::SHI,    MBlaze::SWI,    UNSUPPORTED,     //3C,3D,3E,3F
+};
+
+static unsigned getRD(uint32_t insn) {
+  if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F))
+    return UNSUPPORTED;
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
+}
+
+static unsigned getRA(uint32_t insn) {
+  if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F))
+    return UNSUPPORTED;
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
+}
+
+static unsigned getRB(uint32_t insn) {
+  if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F))
+    return UNSUPPORTED;
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
+}
+
+static int64_t getRS(uint32_t insn) {
+  if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF))
+    return UNSUPPORTED;
+  return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
+}
+
+static int64_t getIMM(uint32_t insn) {
+    int16_t val = (insn & 0xFFFF);
+    return val;
+}
+
+static int64_t getSHT(uint32_t insn) {
+    int16_t val = (insn & 0x1F);
+    return val;
+}
+
+static unsigned getFLAGS(int32_t insn) {
+    return (insn & 0x7FF);
+}
+
+static int64_t getFSL(uint32_t insn) {
+    int16_t val = (insn & 0xF);
+    return val;
+}
+
+static unsigned decodeMUL(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default: return UNSUPPORTED;
+    case 0:  return MBlaze::MUL;
+    case 1:  return MBlaze::MULH;
+    case 2:  return MBlaze::MULHSU;
+    case 3:  return MBlaze::MULHU;
+    }
+}
+
+static unsigned decodeSEXT(uint32_t insn) {
+    switch (insn&0x7FF) {
+    default:   return UNSUPPORTED;
+    case 0x60: return MBlaze::SEXT8;
+    case 0x68: return MBlaze::WIC;
+    case 0x64: return MBlaze::WDC;
+    case 0x66: return MBlaze::WDCC;
+    case 0x74: return MBlaze::WDCF;
+    case 0x61: return MBlaze::SEXT16;
+    case 0x41: return MBlaze::SRL;
+    case 0x21: return MBlaze::SRC;
+    case 0x01: return MBlaze::SRA;
+    }
+}
+
+static unsigned decodeBEQ(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:    return UNSUPPORTED;
+    case 0x00:  return MBlaze::BEQ;
+    case 0x10:  return MBlaze::BEQD;
+    case 0x05:  return MBlaze::BGE;
+    case 0x15:  return MBlaze::BGED;
+    case 0x04:  return MBlaze::BGT;
+    case 0x14:  return MBlaze::BGTD;
+    case 0x03:  return MBlaze::BLE;
+    case 0x13:  return MBlaze::BLED;
+    case 0x02:  return MBlaze::BLT;
+    case 0x12:  return MBlaze::BLTD;
+    case 0x01:  return MBlaze::BNE;
+    case 0x11:  return MBlaze::BNED;
+    }
+}
+
+static unsigned decodeBEQI(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:    return UNSUPPORTED;
+    case 0x00:  return MBlaze::BEQI;
+    case 0x10:  return MBlaze::BEQID;
+    case 0x05:  return MBlaze::BGEI;
+    case 0x15:  return MBlaze::BGEID;
+    case 0x04:  return MBlaze::BGTI;
+    case 0x14:  return MBlaze::BGTID;
+    case 0x03:  return MBlaze::BLEI;
+    case 0x13:  return MBlaze::BLEID;
+    case 0x02:  return MBlaze::BLTI;
+    case 0x12:  return MBlaze::BLTID;
+    case 0x01:  return MBlaze::BNEI;
+    case 0x11:  return MBlaze::BNEID;
+    }
+}
+
+static unsigned decodeBR(uint32_t insn) {
+    switch ((insn>>16)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::BR;
+    case 0x08: return MBlaze::BRA;
+    case 0x0C: return MBlaze::BRK;
+    case 0x10: return MBlaze::BRD;
+    case 0x14: return MBlaze::BRLD;
+    case 0x18: return MBlaze::BRAD;
+    case 0x1C: return MBlaze::BRALD;
+    }
+}
+
+static unsigned decodeBRI(uint32_t insn) {
+    switch ((insn>>16)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::BRI;
+    case 0x08: return MBlaze::BRAI;
+    case 0x0C: return MBlaze::BRKI;
+    case 0x10: return MBlaze::BRID;
+    case 0x14: return MBlaze::BRLID;
+    case 0x18: return MBlaze::BRAID;
+    case 0x1C: return MBlaze::BRALID;
+    }
+}
+
+static unsigned decodeBSRL(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x2: return MBlaze::BSLL;
+    case 0x1: return MBlaze::BSRA;
+    case 0x0: return MBlaze::BSRL;
+    }
+}
+
+static unsigned decodeBSRLI(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x2: return MBlaze::BSLLI;
+    case 0x1: return MBlaze::BSRAI;
+    case 0x0: return MBlaze::BSRLI;
+    }
+}
+
+static unsigned decodeRSUBK(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::RSUBK;
+    case 0x1: return MBlaze::CMP;
+    case 0x3: return MBlaze::CMPU;
+    }
+}
+
+static unsigned decodeFADD(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::FADD;
+    case 0x080: return MBlaze::FRSUB;
+    case 0x100: return MBlaze::FMUL;
+    case 0x180: return MBlaze::FDIV;
+    case 0x200: return MBlaze::FCMP_UN;
+    case 0x210: return MBlaze::FCMP_LT;
+    case 0x220: return MBlaze::FCMP_EQ;
+    case 0x230: return MBlaze::FCMP_LE;
+    case 0x240: return MBlaze::FCMP_GT;
+    case 0x250: return MBlaze::FCMP_NE;
+    case 0x260: return MBlaze::FCMP_GE;
+    case 0x280: return MBlaze::FLT;
+    case 0x300: return MBlaze::FINT;
+    case 0x380: return MBlaze::FSQRT;
+    }
+}
+
+static unsigned decodeGET(uint32_t insn) {
+    switch ((insn>>10)&0x3F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::GET;
+    case 0x01: return MBlaze::EGET;
+    case 0x02: return MBlaze::AGET;
+    case 0x03: return MBlaze::EAGET;
+    case 0x04: return MBlaze::TGET;
+    case 0x05: return MBlaze::TEGET;
+    case 0x06: return MBlaze::TAGET;
+    case 0x07: return MBlaze::TEAGET;
+    case 0x08: return MBlaze::CGET;
+    case 0x09: return MBlaze::ECGET;
+    case 0x0A: return MBlaze::CAGET;
+    case 0x0B: return MBlaze::ECAGET;
+    case 0x0C: return MBlaze::TCGET;
+    case 0x0D: return MBlaze::TECGET;
+    case 0x0E: return MBlaze::TCAGET;
+    case 0x0F: return MBlaze::TECAGET;
+    case 0x10: return MBlaze::NGET;
+    case 0x11: return MBlaze::NEGET;
+    case 0x12: return MBlaze::NAGET;
+    case 0x13: return MBlaze::NEAGET;
+    case 0x14: return MBlaze::TNGET;
+    case 0x15: return MBlaze::TNEGET;
+    case 0x16: return MBlaze::TNAGET;
+    case 0x17: return MBlaze::TNEAGET;
+    case 0x18: return MBlaze::NCGET;
+    case 0x19: return MBlaze::NECGET;
+    case 0x1A: return MBlaze::NCAGET;
+    case 0x1B: return MBlaze::NECAGET;
+    case 0x1C: return MBlaze::TNCGET;
+    case 0x1D: return MBlaze::TNECGET;
+    case 0x1E: return MBlaze::TNCAGET;
+    case 0x1F: return MBlaze::TNECAGET;
+    case 0x20: return MBlaze::PUT;
+    case 0x22: return MBlaze::APUT;
+    case 0x24: return MBlaze::TPUT;
+    case 0x26: return MBlaze::TAPUT;
+    case 0x28: return MBlaze::CPUT;
+    case 0x2A: return MBlaze::CAPUT;
+    case 0x2C: return MBlaze::TCPUT;
+    case 0x2E: return MBlaze::TCAPUT;
+    case 0x30: return MBlaze::NPUT;
+    case 0x32: return MBlaze::NAPUT;
+    case 0x34: return MBlaze::TNPUT;
+    case 0x36: return MBlaze::TNAPUT;
+    case 0x38: return MBlaze::NCPUT;
+    case 0x3A: return MBlaze::NCAPUT;
+    case 0x3C: return MBlaze::TNCPUT;
+    case 0x3E: return MBlaze::TNCAPUT;
+    }
+}
+
+static unsigned decodeGETD(uint32_t insn) {
+    switch ((insn>>5)&0x3F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::GETD;
+    case 0x01: return MBlaze::EGETD;
+    case 0x02: return MBlaze::AGETD;
+    case 0x03: return MBlaze::EAGETD;
+    case 0x04: return MBlaze::TGETD;
+    case 0x05: return MBlaze::TEGETD;
+    case 0x06: return MBlaze::TAGETD;
+    case 0x07: return MBlaze::TEAGETD;
+    case 0x08: return MBlaze::CGETD;
+    case 0x09: return MBlaze::ECGETD;
+    case 0x0A: return MBlaze::CAGETD;
+    case 0x0B: return MBlaze::ECAGETD;
+    case 0x0C: return MBlaze::TCGETD;
+    case 0x0D: return MBlaze::TECGETD;
+    case 0x0E: return MBlaze::TCAGETD;
+    case 0x0F: return MBlaze::TECAGETD;
+    case 0x10: return MBlaze::NGETD;
+    case 0x11: return MBlaze::NEGETD;
+    case 0x12: return MBlaze::NAGETD;
+    case 0x13: return MBlaze::NEAGETD;
+    case 0x14: return MBlaze::TNGETD;
+    case 0x15: return MBlaze::TNEGETD;
+    case 0x16: return MBlaze::TNAGETD;
+    case 0x17: return MBlaze::TNEAGETD;
+    case 0x18: return MBlaze::NCGETD;
+    case 0x19: return MBlaze::NECGETD;
+    case 0x1A: return MBlaze::NCAGETD;
+    case 0x1B: return MBlaze::NECAGETD;
+    case 0x1C: return MBlaze::TNCGETD;
+    case 0x1D: return MBlaze::TNECGETD;
+    case 0x1E: return MBlaze::TNCAGETD;
+    case 0x1F: return MBlaze::TNECAGETD;
+    case 0x20: return MBlaze::PUTD;
+    case 0x22: return MBlaze::APUTD;
+    case 0x24: return MBlaze::TPUTD;
+    case 0x26: return MBlaze::TAPUTD;
+    case 0x28: return MBlaze::CPUTD;
+    case 0x2A: return MBlaze::CAPUTD;
+    case 0x2C: return MBlaze::TCPUTD;
+    case 0x2E: return MBlaze::TCAPUTD;
+    case 0x30: return MBlaze::NPUTD;
+    case 0x32: return MBlaze::NAPUTD;
+    case 0x34: return MBlaze::TNPUTD;
+    case 0x36: return MBlaze::TNAPUTD;
+    case 0x38: return MBlaze::NCPUTD;
+    case 0x3A: return MBlaze::NCAPUTD;
+    case 0x3C: return MBlaze::TNCPUTD;
+    case 0x3E: return MBlaze::TNCAPUTD;
+    }
+}
+
+static unsigned decodeIDIV(uint32_t insn) {
+    switch (insn&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::IDIV;
+    case 0x2: return MBlaze::IDIVU;
+    }
+}
+
+static unsigned decodeLBU(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LBU;
+    case 0x1: return MBlaze::LBUR;
+    }
+}
+
+static unsigned decodeLHU(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LHU;
+    case 0x1: return MBlaze::LHUR;
+    }
+}
+
+static unsigned decodeLW(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LW;
+    case 0x1: return MBlaze::LWR;
+    case 0x2: return MBlaze::LWX;
+    }
+}
+
+static unsigned decodeSB(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SB;
+    case 0x1: return MBlaze::SBR;
+    }
+}
+
+static unsigned decodeSH(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SH;
+    case 0x1: return MBlaze::SHR;
+    }
+}
+
+static unsigned decodeSW(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SW;
+    case 0x1: return MBlaze::SWR;
+    case 0x2: return MBlaze::SWX;
+    }
+}
+
+static unsigned decodeMFS(uint32_t insn) {
+    switch ((insn>>15)&0x1) {
+    default:   return UNSUPPORTED;
+    case 0x0:
+      switch ((insn>>16)&0x1) {
+      default:   return UNSUPPORTED;
+      case 0x0: return MBlaze::MSRSET;
+      case 0x1: return MBlaze::MSRCLR;
+      }
+    case 0x1:
+      switch ((insn>>14)&0x1) {
+      default:   return UNSUPPORTED;
+      case 0x0: return MBlaze::MFS;
+      case 0x1: return MBlaze::MTS;
+      }
+    }
+}
+
+static unsigned decodeOR(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::OR;
+    case 0x400: return MBlaze::PCMPBF;
+    }
+}
+
+static unsigned decodeXOR(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::XOR;
+    case 0x400: return MBlaze::PCMPEQ;
+    }
+}
+
+static unsigned decodeANDN(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::ANDN;
+    case 0x400: return MBlaze::PCMPNE;
+    }
+}
+
+static unsigned decodeRTSD(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x10: return MBlaze::RTSD;
+    case 0x11: return MBlaze::RTID;
+    case 0x12: return MBlaze::RTBD;
+    case 0x14: return MBlaze::RTED;
+    }
+}
+
+static unsigned getOPCODE(uint32_t insn) {
+  unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ];
+  switch (opcode) {
+  case MBlaze::MUL:     return decodeMUL(insn);
+  case MBlaze::SEXT8:   return decodeSEXT(insn);
+  case MBlaze::BEQ:     return decodeBEQ(insn);
+  case MBlaze::BEQI:    return decodeBEQI(insn);
+  case MBlaze::BR:      return decodeBR(insn);
+  case MBlaze::BRI:     return decodeBRI(insn);
+  case MBlaze::BSRL:    return decodeBSRL(insn);
+  case MBlaze::BSRLI:   return decodeBSRLI(insn);
+  case MBlaze::RSUBK:   return decodeRSUBK(insn);
+  case MBlaze::FADD:    return decodeFADD(insn);
+  case MBlaze::GET:     return decodeGET(insn);
+  case MBlaze::GETD:    return decodeGETD(insn);
+  case MBlaze::IDIV:    return decodeIDIV(insn);
+  case MBlaze::LBU:     return decodeLBU(insn);
+  case MBlaze::LHU:     return decodeLHU(insn);
+  case MBlaze::LW:      return decodeLW(insn);
+  case MBlaze::SB:      return decodeSB(insn);
+  case MBlaze::SH:      return decodeSH(insn);
+  case MBlaze::SW:      return decodeSW(insn);
+  case MBlaze::MFS:     return decodeMFS(insn);
+  case MBlaze::OR:      return decodeOR(insn);
+  case MBlaze::XOR:     return decodeXOR(insn);
+  case MBlaze::ANDN:    return decodeANDN(insn);
+  case MBlaze::RTSD:    return decodeRTSD(insn);
+  default:              return opcode;
+  }
+}
+
+EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+  return instInfoMBlaze;
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool MBlazeDisassembler::getInstruction(MCInst &instr,
+                                        uint64_t &size,
+                                        const MemoryObject &region,
+                                        uint64_t address,
+                                        raw_ostream &vStream) const {
+  // The machine instruction.
+  uint32_t insn;
+  uint64_t read;
+  uint8_t bytes[4];
+
+  // By default we consume 1 byte on failure
+  size = 1;
+
+  // We want to read exactly 4 bytes of data.
+  if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
+    return false;
+
+  // Encoded as a big-endian 32-bit word in the stream.
+  insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
+
+  // Get the MCInst opcode from the binary instruction and make sure
+  // that it is a valid instruction.
+  unsigned opcode = getOPCODE(insn);
+  if (opcode == UNSUPPORTED)
+    return false;
+
+  instr.setOpcode(opcode);
+
+  unsigned RD = getRD(insn);
+  unsigned RA = getRA(insn);
+  unsigned RB = getRB(insn);
+  unsigned RS = getRS(insn);
+
+  uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
+  switch ((tsFlags & MBlazeII::FormMask)) {
+  default: 
+    return false;
+
+  case MBlazeII::FRRRR:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RB));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    break;
+
+  case MBlazeII::FRRR:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    instr.addOperand(MCOperand::CreateReg(RB));
+    break;
+
+  case MBlazeII::FRI:
+    switch (opcode) {
+    default: 
+      return false;
+    case MBlaze::MFS:
+      if (RD == UNSUPPORTED)
+        return false;
+      instr.addOperand(MCOperand::CreateReg(RD));
+      instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+      break;
+    case MBlaze::MTS:
+      if (RA == UNSUPPORTED)
+        return false;
+      instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+      instr.addOperand(MCOperand::CreateReg(RA));
+      break;
+    case MBlaze::MSRSET:
+    case MBlaze::MSRCLR:
+      if (RD == UNSUPPORTED)
+        return false;
+      instr.addOperand(MCOperand::CreateReg(RD));
+      instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
+      break;
+    }
+    break;
+
+  case MBlazeII::FRRI:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    switch (opcode) {
+    default:
+      instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+      break;
+    case MBlaze::BSRLI:
+    case MBlaze::BSRAI:
+    case MBlaze::BSLLI:
+      instr.addOperand(MCOperand::CreateImm(insn&0x1F));
+      break;
+    }
+    break;
+
+  case MBlazeII::FCRR:
+    if (RA == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RA));
+    instr.addOperand(MCOperand::CreateReg(RB));
+    break;
+
+  case MBlazeII::FCRI:
+    if (RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RA));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FRCR:
+    if (RD == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RB));
+    break;
+
+  case MBlazeII::FRCI:
+    if (RD == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FCCR:
+    if (RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RB));
+    break;
+
+  case MBlazeII::FCCI:
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FRRCI:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
+    break;
+
+  case MBlazeII::FRRC:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    break;
+
+  case MBlazeII::FRCX:
+    if (RD == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FRCS:
+    if (RD == UNSUPPORTED || RS == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RS));
+    break;
+
+  case MBlazeII::FCRCS:
+    if (RS == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RS));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    break;
+
+  case MBlazeII::FCRCX:
+    if (RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RA));
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FCX:
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FCR:
+    if (RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RB));
+    break;
+
+  case MBlazeII::FRIR:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    break;
+  }
+
+  // We always consume 4 bytes of data on success
+  size = 4;
+
+  return true;
+}
+
+static MCDisassembler *createMBlazeDisassembler(const Target &T) {
+  return new MBlazeDisassembler;
+}
+
+extern "C" void LLVMInitializeMBlazeDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget,
+                                         createMBlazeDisassembler);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
new file mode 100644
index 000000000000..d05eced0bacf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -0,0 +1,55 @@
+//===- MBlazeDisassembler.h - Disassembler for MicroBlaze  ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It it the header for
+// MBlazeDisassembler, a subclass of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEDISASSEMBLER_H
+#define MBLAZEDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+  
+/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
+class MBlazeDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  MBlazeDisassembler() :
+    MCDisassembler() {
+  }
+
+  ~MBlazeDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+};
+
+} // namespace llvm
+  
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
new file mode 100644
index 000000000000..0530b3286bc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDisassembler
+
+# Hack: we need to include 'main' MBlaze target directory to grab headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..242a573036e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+    MBlazeInstPrinter.cpp
+  )
+
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
new file mode 100644
index 000000000000..a7fd287990b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -0,0 +1,69 @@
+//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MBlaze.h"
+#include "MBlazeInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MBlazeGenAsmWriter.inc"
+
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << (int32_t)Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo,
+                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  if (MO.isImm())
+    O << "rfsl" << MO.getImm();
+  else
+    printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  if (MO.isImm())
+    O << (uint32_t)MO.getImm();
+  else
+    printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo,
+                                        raw_ostream &O, const char *Modifier) {
+  printOperand(MI, OpNo, O, NULL);
+  O << ", ";
+  printOperand(MI, OpNo+1, O, NULL);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
new file mode 100644
index 000000000000..eacca410b986
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTPRINTER_H
+#define MBLAZEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+  class MBlazeInstPrinter : public MCInstPrinter {
+  public:
+    MBlazeInstPrinter(const MCAsmInfo &MAI)
+      : MCInstPrinter(MAI) {}
+
+    virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+    // Autogenerated by tblgen.
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+    static const char *getInstructionName(unsigned Opcode);
+
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                      const char *Modifier = 0);
+    void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O);
+    void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+    void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O,
+                         const char *Modifier = 0);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
new file mode 100644
index 000000000000..9fb6e869d945
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+#       private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 000000000000..3390794c9375
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,42 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class MBlazeTargetMachine;
+  class FunctionPass;
+  class MachineCodeEmitter;
+  class MCCodeEmitter;
+  class MCInstrInfo;
+  class MCSubtargetInfo;
+  class TargetAsmBackend;
+  class formatted_raw_ostream;
+
+  MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+                                           const MCSubtargetInfo &STI,
+                                           MCContext &Ctx);
+  
+  TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
+
+  FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+  FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.td b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 000000000000..1245658d29ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,73 @@
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features                                              //
+//===----------------------------------------------------------------------===//
+
+def FeatureBarrel      : SubtargetFeature<"barrel", "HasBarrel", "true",
+                                "Implements barrel shifter">;
+def FeatureDiv         : SubtargetFeature<"div", "HasDiv", "true",
+                                "Implements hardware divider">;
+def FeatureMul         : SubtargetFeature<"mul", "HasMul", "true",
+                                "Implements hardware multiplier">;
+def FeaturePatCmp      : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+                                "Implements pattern compare instruction">;
+def FeatureFPU         : SubtargetFeature<"fpu", "HasFPU", "true",
+                                "Implements floating point unit">;
+def FeatureMul64       : SubtargetFeature<"mul64", "HasMul64", "true",
+                                "Implements multiplier with 64-bit result">;
+def FeatureSqrt        : SubtargetFeature<"sqrt", "HasSqrt", "true",
+                                "Implements sqrt and floating point convert">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+def : Processor<"mblaze",  MBlazeGenericItineraries, []>;
+def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
+def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MBlaze : Target {
+  let InstructionSet = MBlazeInstrInfo;
+  let AssemblyWriters = [MBlazeAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp
new file mode 100644
index 000000000000..08f14c365957
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp
@@ -0,0 +1,162 @@
+//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "MBlaze.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+static unsigned getFixupKindSize(unsigned Kind) {
+  switch (Kind) {
+  default: assert(0 && "invalid fixup kind!");
+  case FK_Data_1: return 1;
+  case FK_PCRel_2:
+  case FK_Data_2: return 2;
+  case FK_PCRel_4:
+  case FK_Data_4: return 4;
+  case FK_Data_8: return 8;
+  }
+}
+
+
+namespace {
+class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  MBlazeELFObjectWriter(Triple::OSType OSType)
+    : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
+                              /*HasRelocationAddend*/ true) {}
+};
+
+class MBlazeAsmBackend : public TargetAsmBackend {
+public:
+  MBlazeAsmBackend(const Target &T)
+    : TargetAsmBackend() {
+  }
+
+  unsigned getNumFixupKinds() const {
+    return 2;
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  unsigned getPointerSize() const {
+    return 4;
+  }
+};
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+    switch (Op) {
+    default:            return Op;
+    case MBlaze::ADDIK: return MBlaze::ADDIK32;
+    case MBlaze::ORI:   return MBlaze::ORI32;
+    case MBlaze::BRLID: return MBlaze::BRLID32;
+    }
+}
+
+bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+    return false;
+
+  bool hasExprOrImm = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+    hasExprOrImm |= Inst.getOperand(i).isExpr();
+
+  return hasExprOrImm;
+}
+
+void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  Res = Inst;
+  Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+}
+
+bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  if ((Count % 4) != 0)
+    return false;
+
+  for (uint64_t i = 0; i < Count; i += 4)
+      OW->Write32(0x00000000);
+
+  return true;
+}
+} // end anonymous namespace
+
+namespace {
+class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
+    : MBlazeAsmBackend(T), OSType(_OSType) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
+                                 /*IsLittleEndian*/ false);
+  }
+};
+
+void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  unsigned Size = getFixupKindSize(Fixup.getKind());
+
+  assert(Fixup.getOffset() + Size <= DataSize &&
+         "Invalid fixup offset!");
+
+  char *data = Data + Fixup.getOffset();
+  switch (Size) {
+  default: llvm_unreachable("Cannot fixup unknown value.");
+  case 1:  llvm_unreachable("Cannot fixup 1 byte value.");
+  case 8:  llvm_unreachable("Cannot fixup 8 byte value.");
+
+  case 4:
+    *(data+7) = uint8_t(Value);
+    *(data+6) = uint8_t(Value >> 8);
+    *(data+3) = uint8_t(Value >> 16);
+    *(data+2) = uint8_t(Value >> 24);
+    break;
+
+  case 2:
+    *(data+3) = uint8_t(Value >> 0);
+    *(data+2) = uint8_t(Value >> 8);
+  }
+}
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
+    assert(0 && "Mac not supported on MBlaze");
+
+  if (TheTriple.isOSWindows())
+    assert(0 && "Windows not supported on MBlaze");
+
+  return new ELFMBlazeAsmBackend(T, TheTriple.getOS());
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
new file mode 100644
index 000000000000..0016df569b93
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -0,0 +1,335 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeMCInstLower.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+  class MBlazeAsmPrinter : public AsmPrinter {
+    const MBlazeSubtarget *Subtarget;
+  public:
+    explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {
+      Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "MBlaze Assembly Printer";
+    }
+
+    void printSavedRegsBitmask();
+    void emitFrameDirective();
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd();
+    virtual void EmitFunctionEntryLabel();
+
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+      const;
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                         const char *Modifier = 0);
+
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+// #include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+//  MBlaze Asm Directives
+//
+//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+//  Describe the stack frame.
+//
+//  -- Mask directives "mask  bitmask, offset"
+//  Tells the assembler which registers are saved and where.
+//  bitmask - contain a little endian bitset indicating which registers are
+//            saved on function prologue (e.g. with a 0x80000000 mask, the
+//            assembler knows the register 31 (RA) is saved at prologue.
+//  offset  - the position before stack pointer subtraction indicating where
+//            the first saved register on prologue is located. (e.g. with a
+//
+//  Consider the following function prologue:
+//
+//    .frame  R19,48,R15
+//    .mask   0xc0000000,-8
+//       addiu R1, R1, -48
+//       sw R15, 40(R1)
+//       sw R19, 36(R1)
+//
+//    With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+//    19 (R19) are saved at prologue. As the save order on prologue is from
+//    left to right, R15 is saved first. A -8 offset means that after the
+//    stack pointer subtration, the first register in the mask (R15) will be
+//    saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+// Print a 32 bit hex number with all numbers.
+static void printHex32(unsigned int Value, raw_ostream &O) {
+  O << "0x";
+  for (int i = 7; i >= 0; i--)
+    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+  // CPU Saved Registers Bitmasks
+  unsigned int CPUBitmask = 0;
+
+  // Set the CPU Bitmasks
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+    if (MBlaze::GPRRegisterClass->contains(Reg))
+      CPUBitmask |= (1 << RegNum);
+  }
+
+  // Return Address and Frame registers must also be set in CPUBitmask.
+  if (TFI->hasFP(*MF))
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getFrameRegister(*MF)));
+
+  if (MFI->adjustsStack())
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getRARegister()));
+
+  // Print CPUBitmask
+  OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
+}
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+  if (!OutStreamer.hasRawTextSupport())
+    return;
+
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+  unsigned stkReg = RI.getFrameRegister(*MF);
+  unsigned retReg = RI.getRARegister();
+  unsigned stkSze = MF->getFrameInfo()->getStackSize();
+
+  OutStreamer.EmitRawText("\t.frame\t" +
+                          Twine(MBlazeInstPrinter::getRegisterName(stkReg)) +
+                          "," + Twine(stkSze) + "," +
+                          Twine(MBlazeInstPrinter::getRegisterName(retReg)));
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+  AsmPrinter::EmitFunctionEntryLabel();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+  if (!OutStreamer.hasRawTextSupport())
+    return;
+
+  emitFrameDirective();
+  printSavedRegsBitmask();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+//===----------------------------------------------------------------------===//
+void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                    raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << MBlazeInstPrinter::getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << (int32_t)MO.getImm();
+    break;
+
+  case MachineOperand::MO_FPImmediate: {
+    const ConstantFP *fp = MO.getFPImm();
+    printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O);
+    O << ";\t# immediate = " << *fp;
+    break;
+  }
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI"
+      << getFunctionNumber() << "_" << MO.getIndex();
+    if (MO.getOffset())
+      O << "+" << MO.getOffset();
+    break;
+
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+                                        raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (uint32_t)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << "rfsl" << (unsigned int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MBlazeAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
+  return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new MBlazeInstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+  RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+  TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+                                        createMBlazeMCInstPrinter);
+
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 000000000000..4962573f96ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,28 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+  CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MBlaze : CallingConv<[
+  // i32 are returned in registers R3, R4
+  CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>>
+]>;
+
+def CC_MBlaze : CallingConv<[
+  CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>,
+  CCIfType<[i32,f32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 000000000000..c07570a487b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,258 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that attempts to fill instructions with delay slots. If no
+// instructions can be moved into the delay slot then a NOP is placed there.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace llvm {
+cl::opt<bool> DisableDelaySlotFiller(
+  "disable-mblaze-delay-filler",
+  cl::init(false),
+  cl::desc("Disable the MBlaze delay slot filter."),
+  cl::Hidden);
+}
+
+namespace {
+  struct Filler : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Filler(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "MBlaze Delay Slot Filler";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F) {
+      bool Changed = false;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        Changed |= runOnMachineBasicBlock(*FI);
+      return Changed;
+    }
+
+  };
+  char Filler::ID = 0;
+} // end of anonymous namespace
+
+static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
+    // Any instruction with an immediate mode operand greater than
+    // 16-bits requires an implicit IMM instruction.
+    unsigned numOper = candidate->getNumOperands();
+    for (unsigned op = 0; op < numOper; ++op) {
+        MachineOperand &mop = candidate->getOperand(op);
+
+        // The operand requires more than 16-bits to represent.
+        if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff))
+          return true;
+
+        // We must assume that unknown immediate values require more than
+        // 16-bits to represent.
+        if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI())
+          return true;
+
+        // FIXME: we could probably check to see if the FP value happens
+        //        to not need an IMM instruction. For now we just always
+        //        assume that FP values do.
+        if (mop.isFPImm())
+          return true;
+    }
+
+    return false;
+}
+
+static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) {
+  switch (instr->getOpcode()) {
+  default: return instr->getNumOperands();
+
+  // These instructions have a variable number of operands but the first two
+  // are the "real" operands that we care about during hazard detection.
+  case MBlaze::BRLID:
+  case MBlaze::BRALID:
+  case MBlaze::BRLD:
+  case MBlaze::BRALD:
+    return 2;
+  }
+}
+
+static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
+                           MachineBasicBlock::iterator &slot) {
+  // Hazard check
+  MachineBasicBlock::iterator a = candidate;
+  MachineBasicBlock::iterator b = slot;
+  MCInstrDesc desc = candidate->getDesc();
+
+  // MBB layout:-
+  //    candidate := a0 = operation(a1, a2)
+  //    ...middle bit...
+  //    slot := b0 = operation(b1, b2)
+
+  // Possible hazards:-/
+  // 1. a1 or a2 was written during the middle bit
+  // 2. a0 was read or written during the middle bit
+  // 3. a0 is one or more of {b0, b1, b2}
+  // 4. b0 is one or more of {a1, a2}
+  // 5. a accesses memory, and the middle bit
+  //    contains a store operation.
+  bool a_is_memory = desc.mayLoad() || desc.mayStore();
+
+  // Determine the number of operands in the slot instruction and in the
+  // candidate instruction.
+  const unsigned aend = getLastRealOperand(a);
+  const unsigned bend = getLastRealOperand(b);
+
+  // Check hazards type 1, 2 and 5 by scanning the middle bit
+  MachineBasicBlock::iterator m = a;
+  for (++m; m != b; ++m) {
+    for (unsigned aop = 0; aop<aend; ++aop) {
+      bool aop_is_reg = a->getOperand(aop).isReg();
+      if (!aop_is_reg) continue;
+
+      bool aop_is_def = a->getOperand(aop).isDef();
+      unsigned aop_reg = a->getOperand(aop).getReg();
+
+      const unsigned mend = getLastRealOperand(m);
+      for (unsigned mop = 0; mop<mend; ++mop) {
+        bool mop_is_reg = m->getOperand(mop).isReg();
+        if (!mop_is_reg) continue;
+
+        bool mop_is_def = m->getOperand(mop).isDef();
+        unsigned mop_reg = m->getOperand(mop).getReg();
+
+        if (aop_is_def && (mop_reg == aop_reg))
+            return true; // Hazard type 2, because aop = a0
+        else if (mop_is_def && (mop_reg == aop_reg))
+            return true; // Hazard type 1, because aop in {a1, a2}
+      }
+    }
+
+    // Check hazard type 5
+    if (a_is_memory && m->getDesc().mayStore())
+      return true;
+  }
+
+  // Check hazard type 3 & 4
+  for (unsigned aop = 0; aop<aend; ++aop) {
+    if (a->getOperand(aop).isReg()) {
+      unsigned aop_reg = a->getOperand(aop).getReg();
+
+      for (unsigned bop = 0; bop<bend; ++bop) {
+        if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) {
+          unsigned bop_reg = b->getOperand(bop).getReg();
+          if (aop_reg == bop_reg)
+            return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+static bool isDelayFiller(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator candidate) {
+  if (candidate == MBB.begin())
+    return false;
+
+  MCInstrDesc brdesc = (--candidate)->getDesc();
+  return (brdesc.hasDelaySlot());
+}
+
+static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
+  if (!I->hasUnmodeledSideEffects())
+    return false;
+
+  unsigned op = I->getOpcode();
+  if (op == MBlaze::ADDK || op == MBlaze::ADDIK ||
+      op == MBlaze::ADDC || op == MBlaze::ADDIC ||
+      op == MBlaze::ADDKC || op == MBlaze::ADDIKC ||
+      op == MBlaze::RSUBK || op == MBlaze::RSUBIK ||
+      op == MBlaze::RSUBC || op == MBlaze::RSUBIC ||
+      op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC)
+    return false;
+
+  return true;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
+  MachineBasicBlock::iterator I = slot;
+  while (true) {
+    if (I == MBB.begin())
+      break;
+
+    --I;
+    MCInstrDesc desc = I->getDesc();
+    if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
+        desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+        hasUnknownSideEffects(I))
+      break;
+
+    if (hasImmInstruction(I) || delayHasHazard(I,slot))
+      continue;
+
+    return I;
+  }
+
+  return MBB.end();
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+    if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator D = MBB.end();
+      MachineBasicBlock::iterator J = I;
+
+      if (!DisableDelaySlotFiller)
+        D = findDelayInstr(MBB,I);
+
+      ++FilledSlots;
+      Changed = true;
+
+      if (D == MBB.end())
+        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+      else
+        MBB.splice(++J, &MBB, D);
+    }
+  return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+  return new Filler(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
new file mode 100644
index 000000000000..3f26ed15b284
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeELFWriterInfo.h"
+#include "MBlazeRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the MBlazeELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+                        TM.getTargetData()->isLittleEndian()) {
+}
+
+MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {}
+
+unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  switch (MachineRelTy) {
+  case MBlaze::reloc_pcrel_word:
+    return ELF::R_MICROBLAZE_64_PCREL;
+  case MBlaze::reloc_absolute_word:
+    return ELF::R_MICROBLAZE_NONE;
+  default:
+    llvm_unreachable("unknown mblaze machine relocation type");
+  }
+  return 0;
+}
+
+long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32_PCREL:
+    return Modifier - 4;
+  case ELF::R_MICROBLAZE_32:
+    return Modifier;
+  default:
+    llvm_unreachable("unknown mblaze relocation type");
+  }
+  return 0;
+}
+
+unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  // FIXME: Most of these sizes are guesses based on the name
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32:
+  case ELF::R_MICROBLAZE_32_PCREL:
+  case ELF::R_MICROBLAZE_32_PCREL_LO:
+  case ELF::R_MICROBLAZE_32_LO:
+  case ELF::R_MICROBLAZE_SRO32:
+  case ELF::R_MICROBLAZE_SRW32:
+  case ELF::R_MICROBLAZE_32_SYM_OP_SYM:
+  case ELF::R_MICROBLAZE_GOTOFF_32:
+    return 32;
+
+  case ELF::R_MICROBLAZE_64_PCREL:
+  case ELF::R_MICROBLAZE_64:
+  case ELF::R_MICROBLAZE_GOTPC_64:
+  case ELF::R_MICROBLAZE_GOT_64:
+  case ELF::R_MICROBLAZE_PLT_64:
+  case ELF::R_MICROBLAZE_GOTOFF_64:
+    return 64;
+  }
+
+  return 0;
+}
+
+bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  // FIXME: Most of these are guesses based on the name
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32_PCREL:
+  case ELF::R_MICROBLAZE_64_PCREL:
+  case ELF::R_MICROBLAZE_32_PCREL_LO:
+  case ELF::R_MICROBLAZE_GOTPC_64:
+    return true;
+  }
+
+  return false;
+}
+
+unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  return MBlaze::reloc_absolute_word;
+}
+
+long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                                unsigned RelOffset,
+                                                unsigned RelTy) const {
+  if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
+    return SymOffset - (RelOffset + 4);
+  else
+    assert("computeRelocation unknown for this relocation type");
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
new file mode 100644
index 000000000000..63bfc0da745a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_ELF_WRITER_INFO_H
+#define MBLAZE_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class MBlazeELFWriterInfo : public TargetELFWriterInfo {
+  public:
+    MBlazeELFWriterInfo(TargetMachine &TM);
+    virtual ~MBlazeELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // MBLAZE_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
new file mode 100644
index 000000000000..e7639025cf1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -0,0 +1,450 @@
+//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-lowering"
+
+#include "MBlazeFrameLowering.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+  cl::opt<bool> DisableStackAdjust(
+    "disable-mblaze-stack-adjust",
+    cl::init(false),
+    cl::desc("Disable MBlaze stack layout adjustment."),
+    cl::Hidden);
+}
+
+static void replaceFrameIndexes(MachineFunction &MF, 
+                                SmallVector<std::pair<int,int64_t>, 16> &FR) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin();
+  const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end();
+
+  SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB;
+  for (; FRI != FRE; ++FRI) {
+    MFI->RemoveStackObject(FRI->first);
+    int NFI = MFI->CreateFixedObject(4, FRI->second, true);
+    MBlazeFI->recordReplacement(FRI->first, NFI);
+
+    for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) {
+      MachineBasicBlock::iterator MBB = MB->begin();
+      const MachineBasicBlock::iterator MBE = MB->end();
+
+      for (; MBB != MBE; ++MBB) {
+        MachineInstr::mop_iterator MIB = MBB->operands_begin();
+        const MachineInstr::mop_iterator MIE = MBB->operands_end();
+
+        for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) {
+          if (!MII->isFI() || MII->getIndex() != FRI->first) continue;
+          DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n");
+          MII->setIndex(NFI);
+        }
+      }
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+static void analyzeFrameIndexes(MachineFunction &MF) {
+  if (DisableStackAdjust) return;
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
+  MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
+  const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
+  SmallVector<MachineInstr*, 16> EraseInstr;
+  SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;
+
+  MachineBasicBlock *MBB = MF.getBlockNumbered(0);
+  MachineBasicBlock::iterator MIB = MBB->begin();
+  MachineBasicBlock::iterator MIE = MBB->end();
+
+  int StackAdjust = 0;
+  int StackOffset = -28;
+
+  // In this loop we are searching frame indexes that corrospond to incoming
+  // arguments that are already in the stack. We look for instruction sequences
+  // like the following:
+  //    
+  //    LWI REG, FI1, 0
+  //    ...
+  //    SWI REG, FI2, 0
+  //
+  // As long as there are no defs of REG in the ... part, we can eliminate
+  // the SWI instruction because the value has already been stored to the
+  // stack by the caller. All we need to do is locate FI at the correct
+  // stack location according to the calling convensions.
+  //
+  // Additionally, if the SWI operation kills the def of REG then we don't
+  // need the LWI operation so we can erase it as well.
+  for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
+    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+      if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
+          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+          I->getOperand(1).getIndex() != LiveInFI[i]) continue;
+
+      unsigned FIReg = I->getOperand(0).getReg();
+      MachineBasicBlock::iterator SI = I;
+      for (SI++; SI != MIE; ++SI) {
+        if (!SI->getOperand(0).isReg() ||
+            !SI->getOperand(1).isFI() ||
+            SI->getOpcode() != MBlaze::SWI) continue;
+
+        int FI = SI->getOperand(1).getIndex();
+        if (SI->getOperand(0).getReg() != FIReg ||
+            MFI->isFixedObjectIndex(FI) ||
+            MFI->getObjectSize(FI) != 4) continue;
+
+        if (SI->getOperand(0).isDef()) break;
+
+        if (SI->getOperand(0).isKill()) {
+          DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex() 
+                       << " removed\n");
+          EraseInstr.push_back(I);
+        }
+
+        EraseInstr.push_back(SI);
+        DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");
+
+        FrameRelocate.push_back(std::make_pair(FI,StackOffset));
+        DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");
+
+        StackOffset -= 4;
+        StackAdjust += 4;
+        break;
+      }
+    }
+  }
+
+  // In this loop we are searching for frame indexes that corrospond to
+  // incoming arguments that are in registers. We look for instruction
+  // sequences like the following:
+  //    
+  //    ...  SWI REG, FI, 0
+  // 
+  // As long as the ... part does not define REG and if REG is an incoming
+  // parameter register then we know that, according to ABI convensions, the
+  // caller has allocated stack space for it already.  Instead of allocating
+  // stack space on our frame, we record the correct location in the callers
+  // frame.
+  for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
+    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+      if (I->definesRegister(LI->first))
+        break;
+
+      if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
+          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+          I->getOperand(1).getIndex() < 0) continue;
+
+      if (I->getOperand(0).getReg() == LI->first) {
+        int FI = I->getOperand(1).getIndex();
+        MBlazeFI->recordLiveIn(FI);
+
+        int FILoc = 0;
+        switch (LI->first) {
+        default: llvm_unreachable("invalid incoming parameter!");
+        case MBlaze::R5:  FILoc = -4; break;
+        case MBlaze::R6:  FILoc = -8; break;
+        case MBlaze::R7:  FILoc = -12; break;
+        case MBlaze::R8:  FILoc = -16; break;
+        case MBlaze::R9:  FILoc = -20; break;
+        case MBlaze::R10: FILoc = -24; break;
+        }
+
+        StackAdjust += 4;
+        FrameRelocate.push_back(std::make_pair(FI,FILoc));
+        DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
+        break;
+      }
+    }
+  }
+
+  // Go ahead and erase all of the instructions that we determined were
+  // no longer needed.
+  for (int i = 0, e = EraseInstr.size(); i < e; ++i)
+    MBB->erase(EraseInstr[i]);
+
+  // Replace all of the frame indexes that we have relocated with new
+  // fixed object frame indexes.
+  replaceFrameIndexes(MF, FrameRelocate);
+}
+
+static void interruptFrameLayout(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  llvm::CallingConv::ID CallConv = F->getCallingConv();
+
+  // If this function is not using either the interrupt_handler
+  // calling convention or the save_volatiles calling convention
+  // then we don't need to do any additional frame layout.
+  if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
+      CallConv != llvm::CallingConv::MBLAZE_SVOL)
+      return;
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  // Determine if the calling convention is the interrupt_handler
+  // calling convention. Some pieces of the prologue and epilogue
+  // only need to be emitted if we are lowering and interrupt handler.
+  bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Determine where to put prologue and epilogue additions
+  MachineBasicBlock &MENT   = MF.front();
+  MachineBasicBlock &MEXT   = MF.back();
+
+  MachineBasicBlock::iterator MENTI = MENT.begin();
+  MachineBasicBlock::iterator MEXTI = prior(MEXT.end());
+
+  DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
+  DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();
+
+  // Store the frame indexes generated during prologue additions for use
+  // when we are generating the epilogue additions.
+  SmallVector<int, 10> VFI;
+
+  // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
+  // always have a SWI because it is used when processing RMSR.
+  for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
+    if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
+    
+    int FI = MFI->CreateStackObject(4,4,false,false);
+    VFI.push_back(FI);
+
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
+      .addFrameIndex(FI).addImm(0);
+  }
+    
+  // Build the prologue SWI for R17, R18
+  int R17FI = MFI->CreateStackObject(4,4,false,false);
+  int R18FI = MFI->CreateStackObject(4,4,false,false);
+
+  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
+    .addFrameIndex(R17FI).addImm(0);
+    
+  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
+    .addFrameIndex(R18FI).addImm(0);
+
+  // Buid the prologue SWI and the epilogue LWI for RMSR if needed
+  if (isIntr) {
+    int MSRFI = MFI->CreateStackObject(4,4,false,false);
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
+      .addReg(MBlaze::RMSR);
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
+      .addFrameIndex(MSRFI).addImm(0);
+
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
+      .addFrameIndex(MSRFI).addImm(0);
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
+      .addReg(MBlaze::R11);
+  }
+
+  // Build the epilogue LWI for R17, R18
+  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
+    .addFrameIndex(R18FI).addImm(0);
+
+  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
+    .addFrameIndex(R17FI).addImm(0);
+
+  // Build the epilogue LWI for R3 - R12 if needed
+  for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
+    if (!MRI.isPhysRegUsed(r)) continue;
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
+      .addFrameIndex(VFI[--i]).addImm(0);
+  }
+}
+
+static void determineFrameLayout(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+  // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+  // the approach done by calculateFrameObjectOffsets to the stack frame.
+  MBlazeFI->adjustLoadArgsFI(MFI);
+  MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+  DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" );
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  // unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned AlignMask = TargetAlign - 1;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+  MFI->setStackSize(FrameSize);
+  DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" );
+}
+
+int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) 
+  const {
+  const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  if (MBlazeFI->hasReplacement(FI))
+    FI = MBlazeFI->getReplacement(FI);
+  return TargetFrameLowering::getFrameIndexOffset(MF,FI);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Determine the correct frame layout
+  determineFrameLayout(MF);
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;
+
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  // Adjust stack : addi R1, R1, -imm
+  BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(-StackSize);
+
+  // swi  R15, R1, stack_loc
+  if (MFI->adjustsStack() || requiresRA) {
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+        .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset);
+  }
+
+  if (hasFP(MF)) {
+    // swi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+      .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset);
+
+    // add R19, R1, R0
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
+      .addReg(MBlaze::R1).addReg(MBlaze::R0);
+  }
+}
+
+void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI     = MF.getInfo<MBlazeFunctionInfo>();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Get the FI's where RA and FP are saved.
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  if (hasFP(MF)) {
+    // add R1, R19, R0
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+      .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+    // lwi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+      .addReg(MBlaze::R1).addImm(FPOffset);
+  }
+
+  // lwi R15, R1, stack_loc
+  if (MFI->adjustsStack() || requiresRA) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+      .addReg(MBlaze::R1).addImm(RAOffset);
+  }
+
+  // Get the number of bytes from FrameInfo
+  int StackSize = (int) MFI->getStackSize();
+
+  // addi R1, R1, imm
+  if (StackSize) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(StackSize);
+  }
+}
+
+void MBlazeFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  if (MFI->adjustsStack() || requiresRA) {
+    MBlazeFI->setRAStackOffset(0);
+    MFI->CreateFixedObject(4,0,true);
+  }
+
+  if (hasFP(MF)) {
+    MBlazeFI->setFPStackOffset(4);
+    MFI->CreateFixedObject(4,4,true);
+  }
+
+  interruptFrameLayout(MF);
+  analyzeFrameIndexes(MF);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
new file mode 100644
index 000000000000..8be15bfb857d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -0,0 +1,53 @@
+//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_FRAMEINFO_H
+#define MBLAZE_FRAMEINFO_H
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MBlazeSubtarget;
+
+class MBlazeFrameLowering : public TargetFrameLowering {
+protected:
+  const MBlazeSubtarget &STI;
+
+public:
+  explicit MBlazeFrameLowering(const MBlazeSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) {
+  }
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const { return true; }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                    RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 000000000000..6b4349766f37
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,277 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+  /// TM - Keep a reference to MBlazeTargetMachine.
+  MBlazeTargetMachine &TM;
+
+  /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const MBlazeSubtarget &Subtarget;
+
+public:
+  explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+  SelectionDAGISel(tm),
+  TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MBlaze DAG->DAG Pattern Instruction Selection";
+  }
+private:
+  // Include the pieces autogenerated from the target description.
+  #include "MBlazeGenDAGISel.inc"
+
+  /// getTargetMachine - Return a reference to the TargetMachine, casted
+  /// to the target-specific type.
+  const MBlazeTargetMachine &getTargetMachine() {
+    return static_cast<const MBlazeTargetMachine &>(TM);
+  }
+
+  /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+  /// to the target-specific type.
+  const MBlazeInstrInfo *getInstrInfo() {
+    return getTargetMachine().getInstrInfo();
+  }
+
+  SDNode *getGlobalBaseReg();
+  SDNode *Select(SDNode *N);
+
+  // Address Selection
+  bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+  bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
+
+  // getI32Imm - Return a target constant with the specified value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value.  If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+  unsigned Opc = N->getOpcode();
+  if (Opc != ISD::Constant)
+    return false;
+
+  Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  else
+    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+  return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation.  Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
+  if (N.getOpcode() == ISD::FrameIndex) return false;
+  if (N.getOpcode() == ISD::TargetExternalSymbol ||
+      N.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // direct calls.
+
+  int32_t imm = 0;
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    if (isIntS32Immediate(N.getOperand(1), imm))
+      return false;    // r+i
+
+    if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+        N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+      return false; // jump tables.
+
+    Base = N.getOperand(0);
+    Index = N.getOperand(1);
+    return true;
+  }
+
+  return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) {
+  // If this can be more profitably realized as r+r, fail.
+  if (SelectAddrRegReg(N, Base, Disp))
+    return false;
+
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    int32_t imm = 0;
+    if (isIntS32Immediate(N.getOperand(1), imm)) {
+      Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+        Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+      } else {
+        Base = N.getOperand(0);
+      }
+      return true; // [r+i]
+    }
+  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    // Loading from a constant address.
+    uint32_t Imm = CN->getZExtValue();
+    Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+    Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+    return true;
+  }
+
+  Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+    Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+  else
+    Base = N;
+  return true;      // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode())
+    return NULL;
+
+  ///
+  // Instruction Selection not handled by the auto-generated
+  // tablegen selection should be handled here.
+  ///
+  switch (Opcode) {
+    default: break;
+
+    // Get target GOT address.
+    case ISD::GLOBAL_OFFSET_TABLE:
+      return getGlobalBaseReg();
+
+    case ISD::FrameIndex: {
+        SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+        int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+        EVT VT = Node->getValueType(0);
+        SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+        unsigned Opc = MBlaze::ADDIK;
+        if (Node->hasOneUse())
+          return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+        return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+    }
+
+
+    /// Handle direct and indirect calls when using PIC. On PIC, when
+    /// GOT is smaller than about 64k (small code) the GA target is
+    /// loaded with only one instruction. Otherwise GA's target must
+    /// be loaded with 3 instructions.
+    case MBlazeISD::JmpLink: {
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        SDValue Chain  = Node->getOperand(0);
+        SDValue Callee = Node->getOperand(1);
+        SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+        SDValue InFlag(0, 0);
+
+        if ((isa<GlobalAddressSDNode>(Callee)) ||
+            (isa<ExternalSymbolSDNode>(Callee)))
+        {
+          /// Direct call for global addresses and external symbols
+          SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+          // Use load to get GOT target
+          SDValue Ops[] = { Callee, GPReg, Chain };
+          SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+                                 MVT::i32, MVT::Other, Ops, 3), 0);
+          Chain = Load.getValue(1);
+
+          // Call target must be on T9
+          Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+        } else
+          /// Indirect call
+          Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+        // Emit Jump and Link Register
+        SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+                                                 MVT::Glue, R20Reg, Chain);
+        Chain  = SDValue(ResNode, 0);
+        InFlag = SDValue(ResNode, 1);
+        ReplaceUses(SDValue(Node, 0), Chain);
+        ReplaceUses(SDValue(Node, 1), InFlag);
+        return ResNode;
+      }
+    }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+  return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+  return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 000000000000..62dfdcc2fd10
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,1147 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags,
+                                CCState &State);
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    case MBlazeISD::JmpLink    : return "MBlazeISD::JmpLink";
+    case MBlazeISD::GPRel      : return "MBlazeISD::GPRel";
+    case MBlazeISD::Wrap       : return "MBlazeISD::Wrap";
+    case MBlazeISD::ICmp       : return "MBlazeISD::ICmp";
+    case MBlazeISD::Ret        : return "MBlazeISD::Ret";
+    case MBlazeISD::Select_CC  : return "MBlazeISD::Select_CC";
+    default                    : return NULL;
+  }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+  : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+  Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+  // MBlaze does not have i1 type, so use i32 for
+  // setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // Set up the register classes
+  addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
+  if (Subtarget->hasFPU()) {
+    addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
+    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  }
+
+  // Floating point operations which are not supported
+  setOperationAction(ISD::FREM,       MVT::f32, Expand);
+  setOperationAction(ISD::FMA,        MVT::f32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f32, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
+  setOperationAction(ISD::FSIN,       MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,       MVT::f32, Expand);
+  setOperationAction(ISD::FPOWI,      MVT::f32, Expand);
+  setOperationAction(ISD::FPOW,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,      MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,     MVT::f32, Expand);
+  setOperationAction(ISD::FEXP,       MVT::f32, Expand);
+
+  // Load extented operations for i1 types must be promoted
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+
+  // Sign extended loads must be expanded
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+  // MBlaze has no REM or DIVREM operations.
+  setOperationAction(ISD::UREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  // If the processor doesn't support multiply then expand it
+  if (!Subtarget->hasMul()) {
+    setOperationAction(ISD::MUL, MVT::i32, Expand);
+  }
+
+  // If the processor doesn't support 64-bit multiply then expand
+  if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+    setOperationAction(ISD::MULHS, MVT::i32, Expand);
+    setOperationAction(ISD::MULHS, MVT::i64, Expand);
+    setOperationAction(ISD::MULHU, MVT::i32, Expand);
+    setOperationAction(ISD::MULHU, MVT::i64, Expand);
+  }
+
+  // If the processor doesn't support division then expand
+  if (!Subtarget->hasDiv()) {
+    setOperationAction(ISD::UDIV, MVT::i32, Expand);
+    setOperationAction(ISD::SDIV, MVT::i32, Expand);
+  }
+
+  // Expand unsupported conversions
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+  // Expand SELECT_CC
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  // MBlaze doesn't have MUL_LOHI
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+  // Used by legalize types to correctly generate the setcc result.
+  // Without this, every float setcc comes with a AND/OR with the result,
+  // we don't want this, since the fpcmp result goes to a flag register,
+  // which is used implicitly by brcond and select operations.
+  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+  // MBlaze Custom Operations
+  setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
+
+  // Variable Argument support
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
+  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
+
+
+  // Operations not directly supported by MBlaze.
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
+  setOperationAction(ISD::BR_JT,              MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,              MVT::Other, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG,  MVT::i1,    Expand);
+  setOperationAction(ISD::ROTL,               MVT::i32,   Expand);
+  setOperationAction(ISD::ROTR,               MVT::i32,   Expand);
+  setOperationAction(ISD::SHL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRA_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTPOP,              MVT::i32,   Expand);
+  setOperationAction(ISD::BSWAP,              MVT::i32,   Expand);
+
+  // We don't have line number support yet.
+  setOperationAction(ISD::EH_LABEL,          MVT::Other, Expand);
+
+  // Use the default for now
+  setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
+
+  // MBlaze doesn't have extending float->double load/store
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  setMinFunctionAlignment(2);
+
+  setStackPointerRegisterToSaveRestore(MBlaze::R1);
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  switch (Op.getOpcode())
+  {
+    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+    case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock*
+MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *MBB)
+                                                  const {
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+
+  case MBlaze::ShiftRL:
+  case MBlaze::ShiftRA:
+  case MBlaze::ShiftL:
+    return EmitCustomShift(MI, MBB);
+
+  case MBlaze::Select_FCC:
+  case MBlaze::Select_CC:
+    return EmitCustomSelect(MI, MBB);
+
+  case MBlaze::CAS32:
+  case MBlaze::SWP32:
+  case MBlaze::LAA32:
+  case MBlaze::LAS32:
+  case MBlaze::LAD32:
+  case MBlaze::LAO32:
+  case MBlaze::LAX32:
+  case MBlaze::LAN32:
+    return EmitCustomAtomic(MI, MBB);
+
+  case MBlaze::MEMBARRIER:
+    // The Microblaze does not need memory barriers. Just delete the pseudo
+    // instruction and finish.
+    MI->eraseFromParent();
+    return MBB;
+  }
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
+                                      MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // To "insert" a shift left instruction, we actually have to insert a
+  // simple loop.  The incoming instruction knows the destination vreg to
+  // set, the source vreg to operate over and the shift amount.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  // start:
+  //   andi     samt, samt, 31
+  //   beqid    samt, finish
+  //   add      dst, src, r0
+  // loop:
+  //   addik    samt, samt, -1
+  //   sra      dst, dst
+  //   bneid    samt, loop
+  //   nop
+  // finish:
+  MachineFunction *F = MBB->getParent();
+  MachineRegisterInfo &R = F->getRegInfo();
+  MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loop);
+  F->insert(It, finish);
+
+  // Update machine-CFG edges by transferring adding all successors and
+  // remaining instructions from the current block to the new block which
+  // will contain the Phi node for the select.
+  finish->splice(finish->begin(), MBB,
+                 llvm::next(MachineBasicBlock::iterator(MI)),
+                 MBB->end());
+  finish->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Add the true and fallthrough blocks as its successors.
+  MBB->addSuccessor(loop);
+  MBB->addSuccessor(finish);
+
+  // Next, add the finish block as a successor of the loop block
+  loop->addSuccessor(finish);
+  loop->addSuccessor(loop);
+
+  unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
+    .addReg(MI->getOperand(2).getReg())
+    .addImm(31);
+
+  unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
+    .addReg(MI->getOperand(1).getReg())
+    .addImm(0);
+
+  BuildMI(MBB, dl, TII->get(MBlaze::BEQID))
+    .addReg(IAMT)
+    .addMBB(finish);
+
+  unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+    .addReg(IVAL).addMBB(MBB)
+    .addReg(NDST).addMBB(loop);
+
+  unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+    .addReg(IAMT).addMBB(MBB)
+    .addReg(NAMT).addMBB(loop);
+
+  if (MI->getOpcode() == MBlaze::ShiftL)
+    BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+  else if (MI->getOpcode() == MBlaze::ShiftRA)
+    BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+  else if (MI->getOpcode() == MBlaze::ShiftRL)
+    BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+  else
+    llvm_unreachable("Cannot lower unknown shift instruction");
+
+  BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT)
+    .addReg(SAMT)
+    .addImm(-1);
+
+  BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+    .addReg(NAMT)
+    .addMBB(loop);
+
+  BuildMI(*finish, finish->begin(), dl,
+          TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    .addReg(IVAL).addMBB(MBB)
+    .addReg(NDST).addMBB(loop);
+
+  // The pseudo instruction is no longer needed so remove it
+  MI->eraseFromParent();
+  return finish;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  unsigned Opc;
+  switch (MI->getOperand(4).getImm()) {
+  default: llvm_unreachable("Unknown branch condition");
+  case MBlazeCC::EQ: Opc = MBlaze::BEQID; break;
+  case MBlazeCC::NE: Opc = MBlaze::BNEID; break;
+  case MBlazeCC::GT: Opc = MBlaze::BGTID; break;
+  case MBlazeCC::LT: Opc = MBlaze::BLTID; break;
+  case MBlazeCC::GE: Opc = MBlaze::BGEID; break;
+  case MBlazeCC::LE: Opc = MBlaze::BLEID; break;
+  }
+
+  F->insert(It, flsBB);
+  F->insert(It, dneBB);
+
+  // Transfer the remainder of MBB and its successor edges to dneBB.
+  dneBB->splice(dneBB->begin(), MBB,
+                llvm::next(MachineBasicBlock::iterator(MI)),
+                MBB->end());
+  dneBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  MBB->addSuccessor(flsBB);
+  MBB->addSuccessor(dneBB);
+  flsBB->addSuccessor(dneBB);
+
+  BuildMI(MBB, dl, TII->get(Opc))
+    .addReg(MI->getOperand(3).getReg())
+    .addMBB(dneBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+  //  .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+  //  .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+  BuildMI(*dneBB, dneBB->begin(), dl,
+          TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(MBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return dneBB;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // All atomic instructions on the Microblaze are implemented using the
+  // load-linked / store-conditional style atomic instruction sequences.
+  // Thus, all operations will look something like the following:
+  //
+  //  start:
+  //    lwx     RV, RP, 0
+  //    <do stuff>
+  //    swx     RV, RP, 0
+  //    addic   RC, R0, 0
+  //    bneid   RC, start
+  //
+  //  exit:
+  //
+  // To "insert" a shift left instruction, we actually have to insert a
+  // simple loop.  The incoming instruction knows the destination vreg to
+  // set, the source vreg to operate over and the shift amount.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  // start:
+  //   andi     samt, samt, 31
+  //   beqid    samt, finish
+  //   add      dst, src, r0
+  // loop:
+  //   addik    samt, samt, -1
+  //   sra      dst, dst
+  //   bneid    samt, loop
+  //   nop
+  // finish:
+  MachineFunction *F = MBB->getParent();
+  MachineRegisterInfo &R = F->getRegInfo();
+
+  // Create the start and exit basic blocks for the atomic operation
+  MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, start);
+  F->insert(It, exit);
+
+  // Update machine-CFG edges by transferring adding all successors and
+  // remaining instructions from the current block to the new block which
+  // will contain the Phi node for the select.
+  exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
+               MBB->end());
+  exit->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Add the fallthrough block as its successors.
+  MBB->addSuccessor(start);
+
+  BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg())
+    .addReg(MBlaze::R0);
+
+  MachineBasicBlock *final = start;
+  unsigned finalReg = 0;
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Cannot lower unknown atomic instruction!");
+
+  case MBlaze::SWP32:
+    finalReg = MI->getOperand(2).getReg();
+    start->addSuccessor(exit);
+    start->addSuccessor(start);
+    break;
+
+  case MBlaze::LAN32:
+  case MBlaze::LAX32:
+  case MBlaze::LAO32:
+  case MBlaze::LAD32:
+  case MBlaze::LAS32:
+  case MBlaze::LAA32: {
+    unsigned opcode = 0;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Cannot lower unknown atomic load!");
+    case MBlaze::LAA32: opcode = MBlaze::ADDIK; break;
+    case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break;
+    case MBlaze::LAD32: opcode = MBlaze::AND; break;
+    case MBlaze::LAO32: opcode = MBlaze::OR; break;
+    case MBlaze::LAX32: opcode = MBlaze::XOR; break;
+    case MBlaze::LAN32: opcode = MBlaze::AND; break;
+    }
+
+    finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    start->addSuccessor(exit);
+    start->addSuccessor(start);
+
+    BuildMI(start, dl, TII->get(opcode), finalReg)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg());
+
+    if (MI->getOpcode() == MBlaze::LAN32) {
+      unsigned tmp = finalReg;
+      finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+      BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
+        .addReg(tmp)
+        .addImm(-1);
+    }
+    break;
+  }
+
+  case MBlaze::CAS32: {
+    finalReg = MI->getOperand(3).getReg();
+    final = F->CreateMachineBasicBlock(LLVM_BB);
+
+    F->insert(It, final);
+    start->addSuccessor(exit);
+    start->addSuccessor(final);
+    final->addSuccessor(exit);
+    final->addSuccessor(start);
+
+    unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg());
+
+    BuildMI(start, dl, TII->get(MBlaze::BNEID))
+      .addReg(CMP)
+      .addMBB(exit);
+
+    final->moveAfter(start);
+    exit->moveAfter(final);
+    break;
+  }
+  }
+
+  unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(final, dl, TII->get(MBlaze::SWX))
+    .addReg(finalReg)
+    .addReg(MI->getOperand(1).getReg())
+    .addReg(MBlaze::R0);
+
+  BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK)
+    .addReg(MBlaze::R0)
+    .addImm(0);
+
+  BuildMI(final, dl, TII->get(MBlaze::BNEID))
+    .addReg(CHK)
+    .addMBB(start);
+
+  // The pseudo instruction is no longer needed so remove it
+  MI->eraseFromParent();
+  return exit;
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    Opc = MBlazeISD::Select_CC;
+    CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+                    .getValue(1);
+  } else {
+    llvm_unreachable("Cannot lower select_cc with unknown type");
+  }
+
+  return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+                     CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+  llvm_unreachable("TLS not implemented for MicroBlaze.");
+  return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  SDValue ResNode;
+  SDValue HiPart;
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+  SDValue ResNode;
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = N->getConstVal();
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                         N->getOffset(), 0);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
+                      false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags,
+                                CCState &State) {
+  static const unsigned ArgRegs[] = {
+    MBlaze::R5, MBlaze::R6, MBlaze::R7,
+    MBlaze::R8, MBlaze::R9, MBlaze::R10
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
+  if (!Reg) return false;
+
+  unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+  State.AllocateStack(SizeInBytes, SizeInBytes);
+  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+          bool isVarArg, bool &isTailCall,
+          const SmallVectorImpl<ISD::OutputArg> &Outs,
+          const SmallVectorImpl<SDValue> &OutVals,
+          const SmallVectorImpl<ISD::InputArg> &Ins,
+          DebugLoc dl, SelectionDAG &DAG,
+          SmallVectorImpl<SDValue> &InVals) const {
+  // MBlaze does not yet support tail call optimization
+  isTailCall = false;
+
+  // The MBlaze requires stack slots for arguments passed to var arg
+  // functions even if they are passed in registers.
+  bool needsRegArgSlots = isVarArg;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // Variable argument function calls require a minimum of 24-bytes of stack
+  if (isVarArg && NumBytes < 24) NumBytes = 24;
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    MVT RegVT = VA.getLocVT();
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Register can't get to this point...
+      assert(VA.isMemLoc());
+
+      // Since we are alread passing values on the stack we don't
+      // need to worry about creating additional slots for the
+      // values passed via registers.
+      needsRegArgSlots = false;
+
+      // Create the frame index object for this incoming parameter
+      unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+      unsigned StackLoc = VA.getLocMemOffset() + 4;
+      int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true);
+
+      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+      // emit ISD::STORE whichs stores the
+      // parameter value to a stack Location
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+    }
+  }
+
+  // If we need to reserve stack space for the arguments passed via registers
+  // then create a fixed stack object at the beginning of the stack.
+  if (needsRegArgSlots && TFI.hasReservedCallFrame(MF))
+    MFI->CreateFixedObject(28,0,true);
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emitted instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                getPointerTy(), 0, 0);
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+                                getPointerTy(), 0);
+
+  // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+  }
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+                bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+  MBlazeFI->setVarArgsFrameIndex(0);
+
+  // Used with vargs to acumulate store chains.
+  std::vector<SDValue> OutChains;
+
+  // Keep track of the last register used for arguments
+  unsigned ArgRegEnd = 0;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
+  SDValue StackPtr;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored on registers
+    if (VA.isRegLoc()) {
+      MVT RegVT = VA.getLocVT();
+      ArgRegEnd = VA.getLocReg();
+      TargetRegisterClass *RC = 0;
+
+      if (RegVT == MVT::i32)
+        RC = MBlaze::GPRRegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = MBlaze::GPRRegisterClass;
+      else
+        llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+      // Transform the arguments stored on
+      // physical registers into virtual ones
+      unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size. If if is a floating point value
+      // then convert to the correct type.
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        unsigned Opcode = 0;
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          Opcode = ISD::AssertSext;
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          Opcode = ISD::AssertZext;
+        if (Opcode)
+          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+
+      InVals.push_back(ArgValue);
+    } else { // VA.isRegLoc()
+      // sanity check
+      assert(VA.isMemLoc());
+
+      // The last argument is not a register
+      ArgRegEnd = 0;
+
+      // The stack pointer offset is relative to the caller stack frame.
+      // Since the real stack size is unknown here, a negative SPOffset
+      // is used so there's a way to adjust these offsets when the stack
+      // size get known (on EliminateFrameIndex). A dummy SPOffset is
+      // used instead of a direct negative address (which is recorded to
+      // be used on emitPrologue) to avoid mis-calc of the first stack
+      // offset on PEI::calculateFrameObjectOffsets.
+      // Arguments are always 32-bit.
+      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      unsigned StackLoc = VA.getLocMemOffset() + 4;
+      int FI = MFI->CreateFixedObject(ArgSize, 0, true);
+      MBlazeFI->recordLoadArgsFI(FI, -StackLoc);
+      MBlazeFI->recordLiveIn(FI);
+
+      // Create load nodes to retrieve arguments from the stack
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0));
+    }
+  }
+
+  // To meet ABI, when VARARGS are passed on registers, the registers
+  // must have their values written to the caller stack frame. If the last
+  // argument was placed in the stack, there's no need to save any register.
+  if ((isVarArg) && ArgRegEnd) {
+    if (StackPtr.getNode() == 0)
+      StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+    // The last register argument that must be saved is MBlaze::R10
+    TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
+
+    unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
+    unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
+    unsigned End   = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
+    unsigned StackLoc = Start - Begin + 1;
+
+    for (; Start <= End; ++Start, ++StackLoc) {
+      unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
+      unsigned LiveReg = MF.addLiveIn(Reg, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
+
+      int FI = MFI->CreateFixedObject(4, 0, true);
+      MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4));
+      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+
+      // Record the frame index of the first variable argument
+      // which is a value necessary to VASTART.
+      if (!MBlazeFI->getVarArgsFrameIndex())
+        MBlazeFI->setVarArgsFrameIndex(FI);
+    }
+  }
+
+  // All stores are grouped in one node to allow the matching between
+  // the size of Ins and InVals. This only happens when on varg functions
+  if (!OutChains.empty()) {
+    OutChains.push_back(Chain);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+            const SmallVectorImpl<ISD::OutputArg> &Outs,
+            const SmallVectorImpl<SDValue> &OutVals,
+            DebugLoc dl, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // If this function is using the interrupt_handler calling convention
+  // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+  unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+                                                              : MBlazeISD::Ret;
+  unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14
+                                                              : MBlaze::R15;
+  SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+
+  if (Flag.getNode())
+    return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+
+  return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
+}
+
+//===----------------------------------------------------------------------===//
+//                           MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+  // MBlaze specific constrainy
+  //
+  // 'd' : An address register. Equivalent to r.
+  // 'y' : Equivalent to r; retained for
+  //       backwards compatibility.
+  // 'f' : Floating Point registers.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+      default : break;
+      case 'd':
+      case 'y':
+      case 'f':
+        return C_RegisterClass;
+        break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MBlazeTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'd':
+  case 'y':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, MBlaze::GPRRegisterClass);
+      // TODO: These can't possibly be right, but match what was in
+      // getRegClassForInlineAsmConstraint.
+    case 'd':
+    case 'y':
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, MBlaze::GPRRegisterClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The MBlaze target isn't yet aware of offsets.
+  return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  return VT != MVT::f32;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 000000000000..bb128da3c7c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,185 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+
+namespace llvm {
+  namespace MBlazeCC {
+    enum CC {
+      FIRST = 0,
+      EQ,
+      NE,
+      GT,
+      LT,
+      GE,
+      LE
+    };
+
+    inline static CC getOppositeCondition(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return NE;
+      case NE: return EQ;
+      case GT: return LE;
+      case LT: return GE;
+      case GE: return LT;
+      case LE: return GE;
+      }
+    }
+
+    inline static const char *MBlazeCCToString(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return "eq";
+      case NE: return "ne";
+      case GT: return "gt";
+      case LT: return "lt";
+      case GE: return "ge";
+      case LE: return "le";
+      }
+    }
+  }
+
+  namespace MBlazeISD {
+    enum NodeType {
+      // Start the numbering from where ISD NodeType finishes.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Jump and link (call)
+      JmpLink,
+
+      // Handle gp_rel (small data/bss sections) relocation.
+      GPRel,
+
+      // Select CC Pseudo Instruction
+      Select_CC,
+
+      // Wrap up multiple types of instructions
+      Wrap,
+
+      // Integer Compare
+      ICmp,
+
+      // Return from subroutine
+      Ret,
+
+      // Return from interrupt
+      IRet
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Implementation
+  //===--------------------------------------------------------------------===//
+
+  class MBlazeTargetLowering : public TargetLowering  {
+  public:
+    explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    //  DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - get the ISD::SETCC result ValueType
+    MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+  private:
+    // Subtarget Info
+    const MBlazeSubtarget *Subtarget;
+
+
+    // Lower Operand helpers
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    // Lower Operand specifics
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual MachineBasicBlock*
+      EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock*
+      EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock*
+            EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    // Inline asm support
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+              getRegForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 000000000000..4acdcfdd772c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,219 @@
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+             TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TB<op, (outs GPR:$dst), (ins memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+               TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+                  !strconcat(instr_asm, "   $dst, $addr"),
+                  [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+             InstrItinClass itin> :
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+             InstrItinClass itin> :
+             TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                 !strconcat(instr_asm, "   $dst, $c, $b"),
+                 [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIC_ALU>;
+
+let rb=0 in {
+  class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+  def FORI   : LogicFI<0x28, "ori    ">;
+  def FADD   :  ArithF<0x16, 0x000, "fadd   ", fadd, IIC_FPU>;
+  def FRSUB  : ArithFR<0x16, 0x080, "frsub  ", fsub, IIC_FPU>;
+  def FMUL   :  ArithF<0x16, 0x100, "fmul   ", fmul, IIC_FPU>;
+  def FDIV   :  ArithF<0x16, 0x180, "fdiv   ", fdiv, IIC_FPUd>;
+}
+
+let Predicates=[HasFPU], isCodeGenOnly=1 in {
+  def LWF    :   LoadFM<0x32, "lw      ", load>;
+  def LWFI   :  LoadFMI<0x3A, "lwi     ", load>;
+
+  def SWF    :  StoreFM<0x36, "sw      ", store>;
+  def SWFI   : StoreFMI<0x3E, "swi     ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+  def FLT    : ArithIF<0x16, 0x280, "flt    ", IIC_FPUf>;
+  def FINT   : ArithFI<0x16, 0x300, "fint   ", IIC_FPUi>;
+  def FSQRT  : ArithF2<0x16, 0x380, "fsqrt  ", IIC_FPUs>;
+}
+
+let isAsCheapAsAMove = 1 in {
+  def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>;
+  def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>;
+  def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>;
+  def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>;
+  def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>;
+  def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>;
+  def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>;
+}
+
+
+let usesCustomInserter = 1 in {
+  def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
+    "; SELECT_FCC PSEUDO!",
+    []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+  def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+  def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+  def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (XOR (FCMP_UN GPR:$L, GPR:$R),
+                            (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_NE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+          (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 000000000000..3082a7e227f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,229 @@
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
+                        !strconcat(instr_asm, " $dst, $b"),
+                        [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg>
+{
+    bits<5> rd;
+    bits<4> fslno;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x0;
+    let Inst{17-21} = flags; // NCTAE
+    let Inst{22-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
+                         !strconcat(instr_asm, " $dst, $b"),
+                         [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg>
+{
+    bits<5> rd;
+    bits<5> rb;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x0;
+    let Inst{22-26} = flags; // NCTAE
+    let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
+                        !strconcat(instr_asm, " $v, $b"),
+                        [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp>
+{
+    bits<5> ra;
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
+                         !strconcat(instr_asm, " $v, $b"),
+                         [(OpNode GPR:$v, GPR:$b)], IIC_FSLp>
+{
+    bits<5> ra;
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
+                         !strconcat(instr_asm, " $b"),
+                         [(OpNode immZExt4:$b)], IIC_FSLp>
+{
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+               MBlazeInst<op, FCR, (outs), (ins GPR:$b),
+                          !strconcat(instr_asm, " $b"),
+                          [(OpNode GPR:$b)], IIC_FSLp>
+{
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET      : FSLGet<0x1B, 0x00, "get      ", int_mblaze_fsl_get>;
+def AGET     : FSLGet<0x1B, 0x02, "aget     ", int_mblaze_fsl_aget>;
+def CGET     : FSLGet<0x1B, 0x08, "cget     ", int_mblaze_fsl_cget>;
+def CAGET    : FSLGet<0x1B, 0x0A, "caget    ", int_mblaze_fsl_caget>;
+def EGET     : FSLGet<0x1B, 0x01, "eget     ", int_mblaze_fsl_eget>;
+def EAGET    : FSLGet<0x1B, 0x03, "eaget    ", int_mblaze_fsl_eaget>;
+def ECGET    : FSLGet<0x1B, 0x09, "ecget    ", int_mblaze_fsl_ecget>;
+def ECAGET   : FSLGet<0x1B, 0x0B, "ecaget   ", int_mblaze_fsl_ecaget>;
+def TGET     : FSLGet<0x1B, 0x04, "tget     ", int_mblaze_fsl_tget>;
+def TAGET    : FSLGet<0x1B, 0x06, "taget    ", int_mblaze_fsl_taget>;
+def TCGET    : FSLGet<0x1B, 0x0C, "tcget    ", int_mblaze_fsl_tcget>;
+def TCAGET   : FSLGet<0x1B, 0x0E, "tcaget   ", int_mblaze_fsl_tcaget>;
+def TEGET    : FSLGet<0x1B, 0x05, "teget    ", int_mblaze_fsl_teget>;
+def TEAGET   : FSLGet<0x1B, 0x07, "teaget   ", int_mblaze_fsl_teaget>;
+def TECGET   : FSLGet<0x1B, 0x0D, "tecget   ", int_mblaze_fsl_tecget>;
+def TECAGET  : FSLGet<0x1B, 0x0F, "tecaget  ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+  def NGET     : FSLGet<0x1B, 0x10, "nget     ", int_mblaze_fsl_nget>;
+  def NAGET    : FSLGet<0x1B, 0x12, "naget    ", int_mblaze_fsl_naget>;
+  def NCGET    : FSLGet<0x1B, 0x18, "ncget    ", int_mblaze_fsl_ncget>;
+  def NCAGET   : FSLGet<0x1B, 0x1A, "ncaget   ", int_mblaze_fsl_ncaget>;
+  def NEGET    : FSLGet<0x1B, 0x11, "neget    ", int_mblaze_fsl_neget>;
+  def NEAGET   : FSLGet<0x1B, 0x13, "neaget   ", int_mblaze_fsl_neaget>;
+  def NECGET   : FSLGet<0x1B, 0x19, "necget   ", int_mblaze_fsl_necget>;
+  def NECAGET  : FSLGet<0x1B, 0x1B, "necaget  ", int_mblaze_fsl_necaget>;
+  def TNGET    : FSLGet<0x1B, 0x14, "tnget    ", int_mblaze_fsl_tnget>;
+  def TNAGET   : FSLGet<0x1B, 0x16, "tnaget   ", int_mblaze_fsl_tnaget>;
+  def TNCGET   : FSLGet<0x1B, 0x1C, "tncget   ", int_mblaze_fsl_tncget>;
+  def TNCAGET  : FSLGet<0x1B, 0x1E, "tncaget  ", int_mblaze_fsl_tncaget>;
+  def TNEGET   : FSLGet<0x1B, 0x15, "tneget   ", int_mblaze_fsl_tneget>;
+  def TNEAGET  : FSLGet<0x1B, 0x17, "tneaget  ", int_mblaze_fsl_tneaget>;
+  def TNECGET  : FSLGet<0x1B, 0x1D, "tnecget  ", int_mblaze_fsl_tnecget>;
+  def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD      : FSLGetD<0x13, 0x00, "getd     ", int_mblaze_fsl_get>;
+def AGETD     : FSLGetD<0x13, 0x02, "agetd    ", int_mblaze_fsl_aget>;
+def CGETD     : FSLGetD<0x13, 0x08, "cgetd    ", int_mblaze_fsl_cget>;
+def CAGETD    : FSLGetD<0x13, 0x0A, "cagetd   ", int_mblaze_fsl_caget>;
+def EGETD     : FSLGetD<0x13, 0x01, "egetd    ", int_mblaze_fsl_eget>;
+def EAGETD    : FSLGetD<0x13, 0x03, "eagetd   ", int_mblaze_fsl_eaget>;
+def ECGETD    : FSLGetD<0x13, 0x09, "ecgetd   ", int_mblaze_fsl_ecget>;
+def ECAGETD   : FSLGetD<0x13, 0x0B, "ecagetd  ", int_mblaze_fsl_ecaget>;
+def TGETD     : FSLGetD<0x13, 0x04, "tgetd    ", int_mblaze_fsl_tget>;
+def TAGETD    : FSLGetD<0x13, 0x06, "tagetd   ", int_mblaze_fsl_taget>;
+def TCGETD    : FSLGetD<0x13, 0x0C, "tcgetd   ", int_mblaze_fsl_tcget>;
+def TCAGETD   : FSLGetD<0x13, 0x0E, "tcagetd  ", int_mblaze_fsl_tcaget>;
+def TEGETD    : FSLGetD<0x13, 0x05, "tegetd   ", int_mblaze_fsl_teget>;
+def TEAGETD   : FSLGetD<0x13, 0x07, "teagetd  ", int_mblaze_fsl_teaget>;
+def TECGETD   : FSLGetD<0x13, 0x0D, "tecgetd  ", int_mblaze_fsl_tecget>;
+def TECAGETD  : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+  def NGETD     : FSLGetD<0x13, 0x10, "ngetd    ", int_mblaze_fsl_nget>;
+  def NAGETD    : FSLGetD<0x13, 0x12, "nagetd   ", int_mblaze_fsl_naget>;
+  def NCGETD    : FSLGetD<0x13, 0x18, "ncgetd   ", int_mblaze_fsl_ncget>;
+  def NCAGETD   : FSLGetD<0x13, 0x1A, "ncagetd  ", int_mblaze_fsl_ncaget>;
+  def NEGETD    : FSLGetD<0x13, 0x11, "negetd   ", int_mblaze_fsl_neget>;
+  def NEAGETD   : FSLGetD<0x13, 0x13, "neagetd  ", int_mblaze_fsl_neaget>;
+  def NECGETD   : FSLGetD<0x13, 0x19, "necgetd  ", int_mblaze_fsl_necget>;
+  def NECAGETD  : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+  def TNGETD    : FSLGetD<0x13, 0x14, "tngetd   ", int_mblaze_fsl_tnget>;
+  def TNAGETD   : FSLGetD<0x13, 0x16, "tnagetd  ", int_mblaze_fsl_tnaget>;
+  def TNCGETD   : FSLGetD<0x13, 0x1C, "tncgetd  ", int_mblaze_fsl_tncget>;
+  def TNCAGETD  : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+  def TNEGETD   : FSLGetD<0x13, 0x15, "tnegetd  ", int_mblaze_fsl_tneget>;
+  def TNEAGETD  : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+  def TNECGETD  : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+  def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT     :  FSLPut<0x1B, 0x0, "put      ", int_mblaze_fsl_put>;
+def APUT    :  FSLPut<0x1B, 0x1, "aput     ", int_mblaze_fsl_aput>;
+def CPUT    :  FSLPut<0x1B, 0x4, "cput     ", int_mblaze_fsl_cput>;
+def CAPUT   :  FSLPut<0x1B, 0x5, "caput    ", int_mblaze_fsl_caput>;
+def TPUT    : FSLPutT<0x1B, 0x2, "tput     ", int_mblaze_fsl_tput>;
+def TAPUT   : FSLPutT<0x1B, 0x3, "taput    ", int_mblaze_fsl_taput>;
+def TCPUT   : FSLPutT<0x1B, 0x6, "tcput    ", int_mblaze_fsl_tcput>;
+def TCAPUT  : FSLPutT<0x1B, 0x7, "tcaput   ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+  def NPUT    :  FSLPut<0x1B, 0x8, "nput     ", int_mblaze_fsl_nput>;
+  def NAPUT   :  FSLPut<0x1B, 0x9, "naput    ", int_mblaze_fsl_naput>;
+  def NCPUT   :  FSLPut<0x1B, 0xC, "ncput    ", int_mblaze_fsl_ncput>;
+  def NCAPUT  :  FSLPut<0x1B, 0xD, "ncaput   ", int_mblaze_fsl_ncaput>;
+  def TNPUT   : FSLPutT<0x1B, 0xA, "tnput    ", int_mblaze_fsl_tnput>;
+  def TNAPUT  : FSLPutT<0x1B, 0xB, "tnaput   ", int_mblaze_fsl_tnaput>;
+  def TNCPUT  : FSLPutT<0x1B, 0xE, "tncput   ", int_mblaze_fsl_tncput>;
+  def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput  ", int_mblaze_fsl_tncaput>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD     :  FSLPutD<0x13, 0x0, "putd     ", int_mblaze_fsl_put>;
+def APUTD    :  FSLPutD<0x13, 0x1, "aputd    ", int_mblaze_fsl_aput>;
+def CPUTD    :  FSLPutD<0x13, 0x4, "cputd    ", int_mblaze_fsl_cput>;
+def CAPUTD   :  FSLPutD<0x13, 0x5, "caputd   ", int_mblaze_fsl_caput>;
+def TPUTD    : FSLPutTD<0x13, 0x2, "tputd    ", int_mblaze_fsl_tput>;
+def TAPUTD   : FSLPutTD<0x13, 0x3, "taputd   ", int_mblaze_fsl_taput>;
+def TCPUTD   : FSLPutTD<0x13, 0x6, "tcputd   ", int_mblaze_fsl_tcput>;
+def TCAPUTD  : FSLPutTD<0x13, 0x7, "tcaputd  ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+  def NPUTD    :  FSLPutD<0x13, 0x8, "nputd    ", int_mblaze_fsl_nput>;
+  def NAPUTD   :  FSLPutD<0x13, 0x9, "naputd   ", int_mblaze_fsl_naput>;
+  def NCPUTD   :  FSLPutD<0x13, 0xC, "ncputd   ", int_mblaze_fsl_ncput>;
+  def NCAPUTD  :  FSLPutD<0x13, 0xD, "ncaputd  ", int_mblaze_fsl_ncaput>;
+  def TNPUTD   : FSLPutTD<0x13, 0xA, "tnputd   ", int_mblaze_fsl_tnput>;
+  def TNAPUTD  : FSLPutTD<0x13, 0xB, "tnaputd  ", int_mblaze_fsl_tnaput>;
+  def TNCPUTD  : FSLPutTD<0x13, 0xE, "tncputd  ", int_mblaze_fsl_tncput>;
+  def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 000000000000..54f605f989a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,204 @@
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+      bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR    : Format<1>;  // ADD, OR, etc.
+def FRRI    : Format<2>;  // ADDI, ORI, etc.
+def FCRR    : Format<3>;  // PUTD, WDC, WIC, BEQ, BNE, BGE, etc.
+def FCRI    : Format<4>;  // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc.
+def FRCR    : Format<5>;  // BRLD, BRALD, GETD
+def FRCI    : Format<6>;  // BRLID, BRALID, MSRCLR, MSRSET
+def FCCR    : Format<7>;  // BR, BRA, BRD, etc.
+def FCCI    : Format<8>;  // IMM, BRI, BRAI, BRID, etc.
+def FRRCI   : Format<9>;  // BSRLI, BSRAI, BSLLI
+def FRRC    : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT
+def FRCX    : Format<11>; // GET
+def FRCS    : Format<12>; // MFS
+def FCRCS   : Format<13>; // MTS
+def FCRCX   : Format<14>; // PUT
+def FCX     : Format<15>; // TPUT
+def FCR     : Format<16>; // TPUTD
+def FRIR    : Format<17>; // RSUBI
+def FRRRR   : Format<18>; // RSUB, FRSUB
+def FRI     : Format<19>; // RSUB, FRSUB
+def FC      : Format<20>; // NOP
+
+//===----------------------------------------------------------------------===//
+//  Describe MBlaze instructions format
+//
+//  CPU INSTRUCTION FORMATS
+//
+//  opcode  - operation code.
+//  rd      - dst reg.
+//  ra      - first src. reg.
+//  rb      - second src. reg.
+//  imm16   - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
+                 list<dag> pattern, InstrItinClass itin> : Instruction {
+  let Namespace = "MBlaze";
+  field bits<32> Inst;
+
+  bits<6> opcode = op;
+  Format Form = form;
+  bits<6> FormBits = Form.Value;
+
+  // Top 6 bits are the 'opcode' field
+  let Inst{0-5} = opcode;
+
+  // If the instruction is marked as a pseudo, set isCodeGenOnly so that the
+  // assembler and disassmbler ignore it.
+  let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo");
+
+  dag OutOperandList = outs;
+  dag InOperandList  = ins;
+
+  let AsmString   = asmstr;
+  let Pattern     = pattern;
+  let Itinerary   = itin;
+
+  // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+  let TSFlags{5-0}   = FormBits;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+      MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+         list<dag> pattern, InstrItinClass itin> :
+         MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin>
+{
+  bits<5> rd;
+  bits<5> ra;
+  bits<5> rb;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra;
+  let Inst{16-20} = rb;
+  let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin> :
+         MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin>
+{
+  bits<5>  rd;
+  bits<5>  ra;
+  bits<16> imm16;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra;
+  let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze but with the operands reversed
+// in the LLVM DAG : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          TA<op, flags, outs, ins, asmstr, pattern, itin>
+{
+  bits<5> rrd;
+  bits<5> rrb;
+  bits<5> rra;
+
+  let Form = FRRRR;
+
+  let rd = rrd;
+  let ra = rra;
+  let rb = rrb;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin> :
+         TB<op, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rrd;
+  bits<16> rimm16;
+  bits<5>  rra;
+
+  let Form = FRIR;
+
+  let rd = rrd;
+  let ra = rra;
+  let imm16 = rimm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<5>  ra;
+  bits<5>  imm5;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra;
+  let Inst{16-20} = 0x0;
+  let Inst{21-22} = flags;
+  let Inst{23-26} = 0x0;
+  let Inst{27-31} = imm5;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instruction class in MBlaze : <|opcode|rd|imm14|>
+//===----------------------------------------------------------------------===//
+class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<14> imm14;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = 0x0;
+  let Inst{16-17} = flags;
+  let Inst{18-31} = imm14;
+}
+
+//===----------------------------------------------------------------------===//
+// MSR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<15> imm15;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-16} = flags;
+  let Inst{17-31} = imm15;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 000000000000..188f10a3972e
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,296 @@
+//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+  : MBlazeGenInstrInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+  return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+  if (MI->getOpcode() == MBlaze::LWI) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+  if (MI->getOpcode() == MBlaze::SWI) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+void MBlazeInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+            MachineBasicBlock::iterator I, DebugLoc DL,
+            unsigned DestReg, unsigned SrcReg,
+            bool KillSrc) const {
+  llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+    .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
+      .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                    MachineBasicBlock *&TBB,
+                                    MachineBasicBlock *&FBB,
+                                    SmallVectorImpl<MachineOperand> &Cond,
+                                    bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (MBlaze::isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (MBlaze::isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with something like BEQID then BRID, handle it.
+  if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
+      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.
+  // The second one is not executed, so remove it.
+  if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
+      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "MBlaze branch conditions have two components!");
+
+  unsigned Opc = MBlaze::BRID;
+  if (!Cond.empty())
+    Opc = (unsigned)Cond[0].getImm();
+
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch
+      BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+    else              // Conditional branch
+      BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+    return 1;
+  }
+
+  BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB);
+  return 2;
+}
+
+unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+
+  if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) &&
+      !MBlaze::isCondBranchOpcode(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!MBlaze::isCondBranchOpcode(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
+  switch (Cond[0].getImm()) {
+  default:            return true;
+  case MBlaze::BEQ:   Cond[0].setImm(MBlaze::BNE); return false;
+  case MBlaze::BNE:   Cond[0].setImm(MBlaze::BEQ); return false;
+  case MBlaze::BGT:   Cond[0].setImm(MBlaze::BLE); return false;
+  case MBlaze::BGE:   Cond[0].setImm(MBlaze::BLT); return false;
+  case MBlaze::BLT:   Cond[0].setImm(MBlaze::BGE); return false;
+  case MBlaze::BLE:   Cond[0].setImm(MBlaze::BGT); return false;
+  case MBlaze::BEQI:  Cond[0].setImm(MBlaze::BNEI); return false;
+  case MBlaze::BNEI:  Cond[0].setImm(MBlaze::BEQI); return false;
+  case MBlaze::BGTI:  Cond[0].setImm(MBlaze::BLEI); return false;
+  case MBlaze::BGEI:  Cond[0].setImm(MBlaze::BLTI); return false;
+  case MBlaze::BLTI:  Cond[0].setImm(MBlaze::BGEI); return false;
+  case MBlaze::BLEI:  Cond[0].setImm(MBlaze::BGTI); return false;
+  case MBlaze::BEQD:  Cond[0].setImm(MBlaze::BNED); return false;
+  case MBlaze::BNED:  Cond[0].setImm(MBlaze::BEQD); return false;
+  case MBlaze::BGTD:  Cond[0].setImm(MBlaze::BLED); return false;
+  case MBlaze::BGED:  Cond[0].setImm(MBlaze::BLTD); return false;
+  case MBlaze::BLTD:  Cond[0].setImm(MBlaze::BGED); return false;
+  case MBlaze::BLED:  Cond[0].setImm(MBlaze::BGTD); return false;
+  case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false;
+  case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false;
+  case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false;
+  case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false;
+  case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false;
+  case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false;
+  }
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+  unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalBaseReg).addReg(MBlaze::R20);
+  RegInfo.addLiveIn(MBlaze::R20);
+
+  MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 000000000000..79f962b349bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,296 @@
+//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MBlazeRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MBlazeGenInstrInfo.inc"
+
+namespace llvm {
+
+namespace MBlaze {
+
+  // MBlaze Branch Codes
+  enum FPBranchCode {
+    BRANCH_F,
+    BRANCH_T,
+    BRANCH_FL,
+    BRANCH_TL,
+    BRANCH_INVALID
+  };
+
+  // MBlaze Condition Codes
+  enum CondCode {
+    // To be used with float branch True
+    FCOND_F,
+    FCOND_UN,
+    FCOND_EQ,
+    FCOND_UEQ,
+    FCOND_OLT,
+    FCOND_ULT,
+    FCOND_OLE,
+    FCOND_ULE,
+    FCOND_SF,
+    FCOND_NGLE,
+    FCOND_SEQ,
+    FCOND_NGL,
+    FCOND_LT,
+    FCOND_NGE,
+    FCOND_LE,
+    FCOND_NGT,
+
+    // To be used with float branch False
+    // This conditions have the same mnemonic as the
+    // above ones, but are used with a branch False;
+    FCOND_T,
+    FCOND_OR,
+    FCOND_NEQ,
+    FCOND_OGL,
+    FCOND_UGE,
+    FCOND_OGE,
+    FCOND_UGT,
+    FCOND_OGT,
+    FCOND_ST,
+    FCOND_GLE,
+    FCOND_SNE,
+    FCOND_GL,
+    FCOND_NLT,
+    FCOND_GE,
+    FCOND_NLE,
+    FCOND_GT,
+
+    // Only integer conditions
+    COND_EQ,
+    COND_GT,
+    COND_GE,
+    COND_LT,
+    COND_LE,
+    COND_NE,
+    COND_INVALID
+  };
+
+  // Turn condition code into conditional branch opcode.
+  inline static unsigned GetCondBranchFromCond(CondCode CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case COND_EQ: return MBlaze::BEQID;
+    case COND_NE: return MBlaze::BNEID;
+    case COND_GT: return MBlaze::BGTID;
+    case COND_GE: return MBlaze::BGEID;
+    case COND_LT: return MBlaze::BLTID;
+    case COND_LE: return MBlaze::BLEID;
+    }
+  }
+
+  /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+  /// e.g. turning COND_E to COND_NE.
+  // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+  /// MBlazeCCToString - Map each FP condition code to its string
+  inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case FCOND_F:
+    case FCOND_T:   return "f";
+    case FCOND_UN:
+    case FCOND_OR:  return "un";
+    case FCOND_EQ:
+    case FCOND_NEQ: return "eq";
+    case FCOND_UEQ:
+    case FCOND_OGL: return "ueq";
+    case FCOND_OLT:
+    case FCOND_UGE: return "olt";
+    case FCOND_ULT:
+    case FCOND_OGE: return "ult";
+    case FCOND_OLE:
+    case FCOND_UGT: return "ole";
+    case FCOND_ULE:
+    case FCOND_OGT: return "ule";
+    case FCOND_SF:
+    case FCOND_ST:  return "sf";
+    case FCOND_NGLE:
+    case FCOND_GLE: return "ngle";
+    case FCOND_SEQ:
+    case FCOND_SNE: return "seq";
+    case FCOND_NGL:
+    case FCOND_GL:  return "ngl";
+    case FCOND_LT:
+    case FCOND_NLT: return "lt";
+    case FCOND_NGE:
+    case FCOND_GE:  return "ge";
+    case FCOND_LE:
+    case FCOND_NLE: return "nle";
+    case FCOND_NGT:
+    case FCOND_GT:  return "gt";
+    }
+  }
+
+  inline static bool isUncondBranchOpcode(int Opc) {
+    switch (Opc) {
+    default: return false;
+    case MBlaze::BRI:
+    case MBlaze::BRAI:
+    case MBlaze::BRID:
+    case MBlaze::BRAID:
+      return true;
+    }
+  }
+
+  inline static bool isCondBranchOpcode(int Opc) {
+    switch (Opc) {
+    default: return false;
+    case MBlaze::BEQI: case MBlaze::BEQID:
+    case MBlaze::BNEI: case MBlaze::BNEID:
+    case MBlaze::BGTI: case MBlaze::BGTID:
+    case MBlaze::BGEI: case MBlaze::BGEID:
+    case MBlaze::BLTI: case MBlaze::BLTID:
+    case MBlaze::BLEI: case MBlaze::BLEID:
+      return true;
+    }
+  }
+}
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+  enum {
+    // PseudoFrm - This represents an instruction that is a pseudo instruction
+    // or one that has not been implemented yet.  It is illegal to code generate
+    // it, but tolerated for intermediate implementation stages.
+    FPseudo = 0,
+    FRRR,
+    FRRI,
+    FCRR,
+    FCRI,
+    FRCR,
+    FRCI,
+    FCCR,
+    FCCI,
+    FRRCI,
+    FRRC,
+    FRCX,
+    FRCS,
+    FCRCS,
+    FCRCX,
+    FCX,
+    FCR,
+    FRIR,
+    FRRRR,
+    FRI,
+    FC,
+    FormMask = 63
+
+    //===------------------------------------------------------------------===//
+    // MBlaze Specific MachineOperand flags.
+    // MO_NO_FLAG,
+
+    /// MO_GOT - Represents the offset into the global offset table at which
+    /// the address the relocation entry symbol resides during execution.
+    // MO_GOT,
+
+    /// MO_GOT_CALL - Represents the offset into the global offset table at
+    /// which the address of a call site relocation entry symbol resides
+    /// during execution. This is different from the above since this flag
+    /// can only be present in call instructions.
+    // MO_GOT_CALL,
+
+    /// MO_GPREL - Represents the offset from the current gp value to be used
+    /// for the relocatable object file being produced.
+    // MO_GPREL,
+
+    /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+    /// address.
+    // MO_ABS_HILO
+
+  };
+}
+
+class MBlazeInstrInfo : public MBlazeGenInstrInfo {
+  MBlazeTargetMachine &TM;
+  const MBlazeRegisterInfo RI;
+public:
+  explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  /// Branch Analysis
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+    const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  /// Insert nop instruction when hazard condition is found
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 000000000000..950f2d77029b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,884 @@
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze type profiles
+//===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+def SDT_MBlazeRet     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeIRet    : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
+
+def MBlazeRet     : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+def MBlazeIRet    : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+                           [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
+                            SDNPVariadic]>;
+
+def MBWrapper   : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+
+def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+// def HasPipe3     : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel    : Predicate<"Subtarget.hasBarrel()">;
+// def NoBarrel     : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv       : Predicate<"Subtarget.hasDiv()">;
+def HasMul       : Predicate<"Subtarget.hasMul()">;
+// def HasFSL       : Predicate<"Subtarget.hasFSL()">;
+// def HasEFSL      : Predicate<"Subtarget.hasEFSL()">;
+// def HasMSRSet    : Predicate<"Subtarget.hasMSRSet()">;
+// def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp    : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU       : Predicate<"Subtarget.hasFPU()">;
+// def HasESR       : Predicate<"Subtarget.hasESR()">;
+// def HasPVR       : Predicate<"Subtarget.hasPVR()">;
+def HasMul64     : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt      : Predicate<"Subtarget.hasSqrt()">;
+// def HasMMU       : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+def MBlazeMemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let SuperClasses = [];
+}
+
+def MBlazeFslAsmOperand : AsmOperandClass {
+  let Name = "Fsl";
+  let SuperClasses = [];
+}
+
+// Instruction operand types
+def brtarget    : Operand<OtherVT>;
+def calltarget  : Operand<i32>;
+def simm16      : Operand<i32>;
+def uimm5       : Operand<i32>;
+def uimm15      : Operand<i32>;
+def fimm        : Operand<f32>;
+
+// Unsigned Operand
+def uimm16      : Operand<i32> {
+  let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm      : Operand<i32> {
+  let PrintMethod = "printFSLImm";
+  let ParserMatchClass = MBlazeFslAsmOperand;
+}
+
+// Address operand
+def memri : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPR, simm16);
+  let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+def memrr : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPR, GPR);
+  let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+def immSExt16  : PatLeaf<(imm), [{
+  return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16  : PatLeaf<(imm), [{
+  return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+  return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+  return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+                                  "#ADJCALLSTACKDOWN $amt",
+                                  [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : MBlazePseudo<(outs),
+                                  (ins uimm16:$amt1, simm16:$amt2),
+                                  "#ADJCALLSTACKUP $amt1",
+                                  [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+            InstrItinClass itin> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>;
+
+class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+               TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                  !strconcat(instr_asm, "   $dst, $b, $c"),
+                  [], IIC_ALU>;
+
+class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+            InstrItinClass itin> :
+            TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $c, $b"),
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
+                 !strconcat(instr_asm, "   $dst, $c, $b"),
+                 [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIC_ALU>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+             TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+                IIC_ALU>;
+
+class LogicI32<bits<6> op, string instr_asm> :
+               TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+                  !strconcat(instr_asm, "   $dst, $b, $c"),
+                  [], IIC_ALU>;
+
+class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1 in {
+class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
+            TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
+               !strconcat(instr_asm, "   $dst, $addr"),
+               [], IIC_MEMl>;
+}
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+             TB<op, (outs GPR:$dst), (ins memri:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
+
+let mayStore = 1 in {
+class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [], IIC_MEMs>;
+}
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TB<op, (outs), (ins GPR:$dst, memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs), (ins GPR:$target),
+                !strconcat(instr_asm, "   $target"),
+                [], IIC_BR> {
+  let rd = 0x0;
+  let ra = br;
+  let Form = FCCR;
+}
+
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+              TB<op, (outs), (ins brtarget:$target),
+                 !strconcat(instr_asm, "   $target"),
+                 [], IIC_BR> {
+  let rd = 0;
+  let ra = br;
+  let Form = FCCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+              TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
+                 !strconcat(instr_asm, "   $link, $target"),
+                 [], IIC_BRl> {
+  let ra = br;
+  let Form = FRCR;
+}
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+               TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
+                  !strconcat(instr_asm, "   $link, $target"),
+                  [], IIC_BRl> {
+  let ra = br;
+  let Form = FRCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+              TA<op, flags, (outs),
+                 (ins GPR:$a, GPR:$b),
+                 !strconcat(instr_asm, "   $a, $b"),
+                 [], IIC_BRc> {
+  let rd = br;
+  let Form = FCRR;
+}
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
+               TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+                  !strconcat(instr_asm, "   $a, $offset"),
+                  [], IIC_BRc> {
+  let rd = br;
+  let Form = FCRI;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+  def ADDK   :  Arith<0x04, 0x000, "addk   ", add,  IIC_ALU>;
+  def AND    :  Logic<0x21, 0x000, "and    ", and>;
+  def OR     :  Logic<0x20, 0x000, "or     ", or>;
+  def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
+
+  let Predicates=[HasPatCmp] in {
+    def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+    def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+    def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+  }
+
+  let Defs = [CARRY] in {
+    def ADD    :  Arith<0x00, 0x000, "add    ", addc, IIC_ALU>;
+
+    let Uses = [CARRY] in {
+      def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIC_ALU>;
+    }
+  }
+
+  let Uses = [CARRY] in {
+    def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIC_ALU>;
+  }
+}
+
+let isAsCheapAsAMove = 1 in {
+  def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIC_ALU>;
+  def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIC_ALU>;
+  def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIC_ALU>;
+  def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", sub,  IIC_ALU>;
+
+  let Defs = [CARRY] in {
+    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", subc, IIC_ALU>;
+
+    let Uses = [CARRY] in {
+      def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIC_ALU>;
+    }
+  }
+
+  let Uses = [CARRY] in {
+    def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>;
+  }
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+  def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIC_ALUm>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+  def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIC_ALUm>;
+  def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIC_ALUm>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+  def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>;
+}
+
+let Predicates=[HasBarrel] in {
+  def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIC_SHT>;
+  def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIC_SHT>;
+  def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIC_SHT>;
+  def BSRLI  :  ShiftI<0x19, 0x0, "bsrli  ", srl, uimm5, immZExt5>;
+  def BSRAI  :  ShiftI<0x19, 0x1, "bsrai  ", sra, uimm5, immZExt5>;
+  def BSLLI  :  ShiftI<0x19, 0x2, "bslli  ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+  def IDIV   :  ArithR<0x12, 0x000, "idiv   ", sdiv, IIC_ALUd>;
+  def IDIVU  :  ArithR<0x12, 0x002, "idivu  ", udiv, IIC_ALUd>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+  def ADDIK   :   ArithI<0x0C, "addik  ", add,  simm16, immSExt16>;
+  def RSUBIK  :  ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>;
+  def ANDNI   :  ArithNI<0x2B, "andni  ", uimm16, immZExt16>;
+  def ANDI    :   LogicI<0x29, "andi   ", and>;
+  def ORI     :   LogicI<0x28, "ori    ", or>;
+  def XORI    :   LogicI<0x2A, "xori   ", xor>;
+
+  let Defs = [CARRY] in {
+    def ADDI    :   ArithI<0x08, "addi   ", addc, simm16, immSExt16>;
+    def RSUBI   :  ArithRI<0x09, "rsubi  ", subc,  simm16, immSExt16>;
+
+    let Uses = [CARRY] in {
+      def ADDIC   :   ArithI<0x0A, "addic  ", adde, simm16, immSExt16>;
+      def RSUBIC  :  ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>;
+    }
+  }
+
+  let Uses = [CARRY] in {
+    def ADDIKC  :  ArithNI<0x0E, "addikc ", simm16, immSExt16>;
+    def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>;
+  }
+}
+
+let Predicates=[HasMul] in {
+  def MULI    :   ArithI<0x18, "muli   ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def LBU  :  LoadM<0x30, 0x000, "lbu    ">;
+  def LBUR :  LoadM<0x30, 0x200, "lbur   ">;
+
+  def LHU  :  LoadM<0x31, 0x000, "lhu    ">;
+  def LHUR :  LoadM<0x31, 0x200, "lhur   ">;
+
+  def LW   :  LoadM<0x32, 0x000, "lw     ">;
+  def LWR  :  LoadM<0x32, 0x200, "lwr    ">;
+
+  let Defs = [CARRY] in {
+    def LWX  :  LoadM<0x32, 0x400, "lwx    ">;
+  }
+
+  def LBUI : LoadMI<0x38, "lbui   ", zextloadi8>;
+  def LHUI : LoadMI<0x39, "lhui   ", zextloadi16>;
+  def LWI  : LoadMI<0x3A, "lwi    ", load>;
+}
+
+def SB  :  StoreM<0x34, 0x000, "sb     ">;
+def SBR :  StoreM<0x34, 0x200, "sbr    ">;
+
+def SH  :  StoreM<0x35, 0x000, "sh     ">;
+def SHR :  StoreM<0x35, 0x200, "shr    ">;
+
+def SW  :  StoreM<0x36, 0x000, "sw     ">;
+def SWR :  StoreM<0x36, 0x200, "swr    ">;
+
+let Defs = [CARRY] in {
+  def SWX :  StoreM<0x36, 0x400, "swx    ">;
+}
+
+def SBI : StoreMI<0x3C, "sbi    ", truncstorei8>;
+def SHI : StoreMI<0x3D, "shi    ", truncstorei16>;
+def SWI : StoreMI<0x3E, "swi    ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+  def BRI    :  BranchI<0x2E, 0x00, "bri    ">;
+  def BRAI   :  BranchI<0x2E, 0x08, "brai   ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  def BEQI   : BranchCI<0x2F, 0x00, "beqi   ">;
+  def BNEI   : BranchCI<0x2F, 0x01, "bnei   ">;
+  def BLTI   : BranchCI<0x2F, 0x02, "blti   ">;
+  def BLEI   : BranchCI<0x2F, 0x03, "blei   ">;
+  def BGTI   : BranchCI<0x2F, 0x04, "bgti   ">;
+  def BGEI   : BranchCI<0x2F, 0x05, "bgei   ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+    isBarrier = 1 in {
+  def BR     :   Branch<0x26, 0x00, 0x000, "br     ">;
+  def BRA    :   Branch<0x26, 0x08, 0x000, "bra    ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  def BEQ    :  BranchC<0x27, 0x00, 0x000, "beq    ">;
+  def BNE    :  BranchC<0x27, 0x01, 0x000, "bne    ">;
+  def BLT    :  BranchC<0x27, 0x02, 0x000, "blt    ">;
+  def BLE    :  BranchC<0x27, 0x03, 0x000, "ble    ">;
+  def BGT    :  BranchC<0x27, 0x04, 0x000, "bgt    ">;
+  def BGE    :  BranchC<0x27, 0x05, 0x000, "bge    ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+    isBarrier = 1 in {
+  def BRID   :  BranchI<0x2E, 0x10, "brid   ">;
+  def BRAID  :  BranchI<0x2E, 0x18, "braid  ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+  def BEQID  : BranchCI<0x2F, 0x10, "beqid  ">;
+  def BNEID  : BranchCI<0x2F, 0x11, "bneid  ">;
+  def BLTID  : BranchCI<0x2F, 0x12, "bltid  ">;
+  def BLEID  : BranchCI<0x2F, 0x13, "bleid  ">;
+  def BGTID  : BranchCI<0x2F, 0x14, "bgtid  ">;
+  def BGEID  : BranchCI<0x2F, 0x15, "bgeid  ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+    hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+  def BRD    :   Branch<0x26, 0x10, 0x000, "brd    ">;
+  def BRAD   :   Branch<0x26, 0x18, 0x000, "brad   ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+    hasDelaySlot = 1, hasCtrlDep = 1 in {
+  def BEQD   :  BranchC<0x27, 0x10, 0x000, "beqd   ">;
+  def BNED   :  BranchC<0x27, 0x11, 0x000, "bned   ">;
+  def BLTD   :  BranchC<0x27, 0x12, 0x000, "bltd   ">;
+  def BLED   :  BranchC<0x27, 0x13, 0x000, "bled   ">;
+  def BGTD   :  BranchC<0x27, 0x14, 0x000, "bgtd   ">;
+  def BGED   :  BranchC<0x27, 0x15, 0x000, "bged   ">;
+}
+
+let isCall =1, hasDelaySlot = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+    Uses = [R1] in {
+  def BRLID  : BranchLI<0x2E, 0x14, "brlid  ">;
+  def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+    Uses = [R1] in {
+  def BRLD   : BranchL<0x26, 0x14, 0x000, "brld   ">;
+  def BRALD  : BranchL<0x26, 0x1C, 0x000, "brald  ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x10, Form=FCRI in {
+  def RTSD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtsd      $target, $imm",
+                  [],
+                  IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x11, Form=FCRI in {
+  def RTID   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtid      $target, $imm",
+                  [],
+                  IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x12, Form=FCRI in {
+  def RTBD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtbd      $target, $imm",
+                  [],
+                  IIC_BR>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x14, Form=FCRI in {
+  def RTED   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rted      $target, $imm",
+                  [],
+                  IIC_BR>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+  def NOP :  MBlazeInst< 0x20, FC, (outs), (ins), "nop    ", [], IIC_ALU>;
+}
+
+let usesCustomInserter = 1 in {
+  def Select_CC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
+    "; SELECT_CC PSEUDO!",
+    []>;
+
+  def ShiftL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
+    "; ShiftL PSEUDO!",
+    []>;
+
+  def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
+    "; ShiftRA PSEUDO!",
+    []>;
+
+  def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
+    "; ShiftRL PSEUDO!",
+    []>;
+}
+
+let rb = 0 in {
+  def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+                  "sext16    $dst, $src", [], IIC_ALU>;
+  def SEXT8  : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+                  "sext8     $dst, $src", [], IIC_ALU>;
+  let Defs = [CARRY] in {
+    def SRL    : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+                    "srl       $dst, $src", [], IIC_ALU>;
+    def SRA    : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+                    "sra       $dst, $src", [], IIC_ALU>;
+    let Uses = [CARRY] in {
+      def SRC    : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+                      "src       $dst, $src", [], IIC_ALU>;
+    }
+  }
+}
+
+let isCodeGenOnly=1 in {
+  def ADDIK32 : ArithI32<0x08, "addik  ", simm16, immSExt16>;
+  def ORI32   : LogicI32<0x28, "ori    ">;
+  def BRLID32 : BranchLI<0x2E, 0x14, "brlid  ">;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. instructions
+//===----------------------------------------------------------------------===//
+let Form=FRCS in {
+  def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
+                "mfs       $dst, $src", [], IIC_ALU>;
+}
+
+let Form=FCRCS in {
+  def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
+                "mts       $dst, $src", [], IIC_ALU>;
+}
+
+def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
+                 "msrset    $dst, $set", [], IIC_ALU>;
+
+def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
+                 "msrclr    $dst, $clr", [], IIC_ALU>;
+
+let rd=0x0, Form=FCRR in {
+  def WDC  : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
+                "wdc       $a, $b", [], IIC_WDC>;
+  def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
+                "wdc.flush $a, $b", [], IIC_WDC>;
+  def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
+                "wdc.clear $a, $b", [], IIC_WDC>;
+  def WIC  : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
+                "wic       $a, $b", [], IIC_WDC>;
+}
+
+def BRK  :  BranchL<0x26, 0x0C, 0x000, "brk    ">;
+def BRKI : BranchLI<0x2E, 0x0C, "brki   ">;
+
+def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
+                     "imm       $imm", [], IIC_ALU>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for atomic operations
+//===----------------------------------------------------------------------===//
+let usesCustomInserter=1 in {
+  def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp),
+    "# atomic compare and swap",
+    [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>;
+
+  def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp),
+    "# atomic swap",
+    [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>;
+
+  def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and add",
+    [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and sub",
+    [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and and",
+    [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and or",
+    [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and xor",
+    [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and nand",
+    [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
+
+  def MEMBARRIER : MBlazePseudo<(outs), (ins),
+    "# memory barrier",
+    [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8),  (SEXT8 GPR:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)),
+          (BRLID (i32 R15), tglobaladdr:$dst)>;
+
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),
+          (BRLID (i32 R15), texternalsym:$dst)>;
+
+def : Pat<(MBlazeJmpLink GPR:$dst),
+          (BRALD (i32 R15), GPR:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 6)>;
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+          (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>;
+
+// Ret instructions
+def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>;
+def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+          (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+          (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+          (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+          (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+          (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+          (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+          (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+          (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+          (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+          (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+          (BNEID GPR:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in),  (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in),  (ORI (i32 R0), tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8  iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8  xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
+
+// 32-bit load and store
+def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
+def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
+
+// 16-bit load and store
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
+
+// 8-bit load and store
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
+
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 000000000000..32d67b264a20
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,113 @@
+//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+  enum ID {
+    last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+    , num_mblaze_intrinsics
+  };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+                                         unsigned numTys) const {
+  static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+  if (IntrID < Intrinsic::num_intrinsics)
+    return 0;
+  assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+         "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+    return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+  // Overload Table
+  const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  if (IntrID == 0)
+    return false;
+  else
+    return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+  const Type *ResultTy = NULL;
+  std::vector<Type*> ArgTys;
+  bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                                const Type **Tys,
+                                                unsigned numTy) const {
+  assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+  AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+                                               getType(M->getContext(), IntrID),
+                                               AList));
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 000000000000..9804c7723bee
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+  class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+  public:
+    std::string getName(unsigned IntrID, const Type **Tys = 0,
+                        unsigned numTys = 0) const;
+    unsigned lookupName(const char *Name, unsigned Len) const;
+    unsigned lookupGCCName(const char *Name) const;
+    bool isOverloaded(unsigned IID) const;
+    Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                             unsigned numTys = 0) const;
+  };
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 000000000000..278afbefc165
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,131 @@
+//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in {
+  class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+  class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+
+  class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get      : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget     : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget     : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget    : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget     : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget    : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget    : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget   : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget     : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget    : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget    : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget   : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget    : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget   : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget   : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget  : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget     : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget    : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget    : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget   : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget    : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget   : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget   : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget  : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget    : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget   : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget   : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget  : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget   : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget  : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget  : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+                              MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put     : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput    : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput    : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput   : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput    : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput   : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput   : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput  : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput    : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput   : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput   : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput  : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput   : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput  : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput  : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+                             MBFSL_PutT_Intrinsic;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
new file mode 100644
index 000000000000..ddc636d0ce64
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
@@ -0,0 +1,222 @@
+//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class MBlazeMCCodeEmitter : public MCCodeEmitter {
+  MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+  const MCInstrInfo &MCII;
+
+public:
+  MBlazeMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                      MCContext &ctx)
+    : MCII(mcii) {
+  }
+
+  ~MBlazeMCCodeEmitter() {}
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const;
+  unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const {
+    return getMachineOpValue(MI, MI.getOperand(OpIdx));
+  }
+
+  static unsigned GetMBlazeRegNum(const MCOperand &MO) {
+    // FIXME: getMBlazeRegisterNumbering() is sufficient?
+    assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
+    return 0;
+  }
+
+  void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    // The MicroBlaze uses a bit reversed format so we need to reverse the
+    // order of the bits. Taken from:
+    // http://graphics.stanford.edu/~seander/bithacks.html
+    C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) const {
+    assert(Size <= 8 && "size too big in emit constant");
+
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, CurByte, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const;
+  void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const;
+
+  void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel,
+                     unsigned &CurByte, raw_ostream &OS,
+                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+                                               const MCSubtargetInfo &STI,
+                                               MCContext &Ctx) {
+  return new MBlazeMCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                             const MCOperand &MO) const {
+  if (MO.isReg())
+    return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg());
+  else if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isExpr())
+      return 0; // The relocation has already been recorded at this point.
+  else {
+#ifndef NDEBUG
+    errs() << MO;
+#endif
+    llvm_unreachable(0);
+  }
+  return 0;
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const {
+  int32_t val = (int32_t)imm.getImm();
+  if (val > 32767 || val < -32768) {
+    EmitByte(0x0D, CurByte, OS);
+    EmitByte(0x00, CurByte, OS);
+    EmitRawByte((val >> 24) & 0xFF, CurByte, OS);
+    EmitRawByte((val >> 16) & 0xFF, CurByte, OS);
+  }
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const {
+  switch (MI.getOpcode()) {
+  default: break;
+
+  case MBlaze::ADDIK32:
+  case MBlaze::ORI32:
+  case MBlaze::BRLID32:
+    EmitByte(0x0D, CurByte, OS);
+    EmitByte(0x00, CurByte, OS);
+    EmitRawByte(0, CurByte, OS);
+    EmitRawByte(0, CurByte, OS);
+  }
+}
+
+void MBlazeMCCodeEmitter::
+EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte,
+              raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getNumOperands()>opNo && "Not enought operands for instruction");
+
+  MCOperand oper = MI.getOperand(opNo);
+
+  if (oper.isImm()) {
+    EmitIMM(oper, CurByte, OS);
+  } else if (oper.isExpr()) {
+    MCFixupKind FixupKind;
+    switch (MI.getOpcode()) {
+    default:
+      FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2;
+      Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+      break;
+    case MBlaze::ORI32:
+    case MBlaze::ADDIK32:
+    case MBlaze::BRLID32:
+      FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4;
+      Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+      break;
+    }
+  }
+}
+
+
+
+void MBlazeMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MCII.get(Opcode);
+  uint64_t TSFlags = Desc.TSFlags;
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+
+  // Emit an IMM instruction if the instruction we are encoding requires it
+  EmitIMM(MI,CurByte,OS);
+
+  switch ((TSFlags & MBlazeII::FormMask)) {
+  default: break;
+  case MBlazeII::FPseudo:
+    // Pseudo instructions don't get encoded.
+    return;
+  case MBlazeII::FRRI:
+    EmitImmediate(MI, 2, false, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FRIR:
+    EmitImmediate(MI, 1, false, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FCRI:
+    EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FRCI:
+    EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+  case MBlazeII::FCCI:
+    EmitImmediate(MI, 0, true, CurByte, OS, Fixups);
+    break;
+  }
+
+  ++MCNumEmitted;  // Keep track of the # of mi's emitted
+  unsigned Value = getBinaryCodeForInstr(MI);
+  EmitConstant(Value, 4, CurByte, OS);
+}
+
+// FIXME: These #defines shouldn't be necessary. Instead, tblgen should
+// be able to generate code emitter helpers for either variant, like it
+// does for the AsmWriter.
+#define MBlazeCodeEmitter MBlazeMCCodeEmitter
+#define MachineInstr MCInst
+#include "MBlazeGenCodeEmitter.inc"
+#undef MBlazeCodeEmitter
+#undef MachineInstr
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
new file mode 100644
index 000000000000..a7e400b1d1a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -0,0 +1,166 @@
+//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MBlaze MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCInstLower.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MBlazeMCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default:
+      llvm_unreachable("Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default:
+      assert(0 && "Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MBlazeMCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+  switch (MO.getTargetFlags()) {
+  default:
+      llvm_unreachable("Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default: llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                         MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_FPImmediate:
+      bool ignored;
+      APFloat FVal = MO.getFPImm()->getValueAPF();
+      FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
+
+      APInt IVal = FVal.bitcastToAPInt();
+      uint64_t Val = *IVal.getRawData();
+      MCOp = MCOperand::CreateImm(Val);
+      break;
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
new file mode 100644
index 000000000000..92196f220225
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MCINSTLOWER_H
+#define MBLAZE_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class AsmPrinter;
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+
+  /// MBlazeMCInstLower - This class is used to lower an MachineInstr
+  /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
+  MCContext &Ctx;
+  Mangler &Mang;
+
+  AsmPrinter &Printer;
+public:
+  MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+    : Ctx(ctx), Mang(mang), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 000000000000..df395094282f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,170 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+
+private:
+  /// Holds for each function where on the stack the Frame Pointer must be
+  /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+  int FPStackOffset;
+
+  /// Holds for each function where on the stack the Return Address must be
+  /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+  int RAStackOffset;
+
+  /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+  struct MBlazeFIHolder {
+
+    int FI;
+    int SPOffset;
+
+    MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+      : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+  };
+
+  /// When PIC is used the GP must be saved on the stack on the function
+  /// prologue and must be reloaded from this stack location after every
+  /// call. A reference to its stack location and frame index must be kept
+  /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+  MBlazeFIHolder GPHolder;
+
+  /// On LowerFormalArguments the stack size is unknown, so the Stack
+  /// Pointer Offset calculation of "not in register arguments" must be
+  /// postponed to emitPrologue.
+  SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+  bool HasLoadArgs;
+
+  // When VarArgs, we must write registers back to caller stack, preserving
+  // on register arguments. Since the stack size is unknown on
+  // LowerFormalArguments, the Stack Pointer Offset calculation must be
+  // postponed to emitPrologue.
+  SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+  bool HasStoreVarArgs;
+
+  // When determining the final stack layout some of the frame indexes may
+  // be replaced by new frame indexes that reside in the caller's stack
+  // frame. The replacements are recorded in this structure.
+  DenseMap<int,int> FIReplacements;
+
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+  /// LiveInFI - keeps track of the frame indexes in a callers stack
+  /// frame that are live into a function.
+  SmallVector<int, 16> LiveInFI;
+
+public:
+  MBlazeFunctionInfo(MachineFunction& MF)
+  : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false),
+    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+    VarArgsFrameIndex(0), LiveInFI()
+  {}
+
+  int getFPStackOffset() const { return FPStackOffset; }
+  void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+  int getRAStackOffset() const { return RAStackOffset; }
+  void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+  int getGPStackOffset() const { return GPHolder.SPOffset; }
+  int getGPFI() const { return GPHolder.FI; }
+  void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+  void setGPFI(int FI) { GPHolder.FI = FI; }
+  bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+  bool hasLoadArgs() const { return HasLoadArgs; }
+  bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+  void recordLiveIn(int FI) {
+    LiveInFI.push_back(FI);
+  }
+
+  bool isLiveIn(int FI) {
+    for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i)
+      if (FI == LiveInFI[i]) return true;
+
+    return false;
+  }
+
+  const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; }
+
+  void recordReplacement(int OFI, int NFI) {
+    FIReplacements.insert(std::make_pair(OFI,NFI));
+  }
+
+  bool hasReplacement(int OFI) const {
+    return FIReplacements.find(OFI) != FIReplacements.end();
+  }
+
+  int getReplacement(int OFI) const {
+    return FIReplacements.lookup(OFI);
+  }
+
+  void recordLoadArgsFI(int FI, int SPOffset) {
+    if (!HasLoadArgs) HasLoadArgs=true;
+    FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+  }
+
+  void recordStoreVarArgsFI(int FI, int SPOffset) {
+    if (!HasStoreVarArgs) HasStoreVarArgs=true;
+    FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+  }
+
+  void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasLoadArgs()) return;
+    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+      MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset);
+  }
+
+  void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasStoreVarArgs()) return;
+    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+      MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset);
+  }
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 000000000000..f0b201a66170
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,363 @@
+//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-info"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+  : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  switch (RegEnum) {
+    case MBlaze::R0     : return 0;
+    case MBlaze::R1     : return 1;
+    case MBlaze::R2     : return 2;
+    case MBlaze::R3     : return 3;
+    case MBlaze::R4     : return 4;
+    case MBlaze::R5     : return 5;
+    case MBlaze::R6     : return 6;
+    case MBlaze::R7     : return 7;
+    case MBlaze::R8     : return 8;
+    case MBlaze::R9     : return 9;
+    case MBlaze::R10    : return 10;
+    case MBlaze::R11    : return 11;
+    case MBlaze::R12    : return 12;
+    case MBlaze::R13    : return 13;
+    case MBlaze::R14    : return 14;
+    case MBlaze::R15    : return 15;
+    case MBlaze::R16    : return 16;
+    case MBlaze::R17    : return 17;
+    case MBlaze::R18    : return 18;
+    case MBlaze::R19    : return 19;
+    case MBlaze::R20    : return 20;
+    case MBlaze::R21    : return 21;
+    case MBlaze::R22    : return 22;
+    case MBlaze::R23    : return 23;
+    case MBlaze::R24    : return 24;
+    case MBlaze::R25    : return 25;
+    case MBlaze::R26    : return 26;
+    case MBlaze::R27    : return 27;
+    case MBlaze::R28    : return 28;
+    case MBlaze::R29    : return 29;
+    case MBlaze::R30    : return 30;
+    case MBlaze::R31    : return 31;
+    case MBlaze::RPC    : return 0x0000;
+    case MBlaze::RMSR   : return 0x0001;
+    case MBlaze::REAR   : return 0x0003;
+    case MBlaze::RESR   : return 0x0005;
+    case MBlaze::RFSR   : return 0x0007;
+    case MBlaze::RBTR   : return 0x000B;
+    case MBlaze::REDR   : return 0x000D;
+    case MBlaze::RPID   : return 0x1000;
+    case MBlaze::RZPR   : return 0x1001;
+    case MBlaze::RTLBX  : return 0x1002;
+    case MBlaze::RTLBLO : return 0x1003;
+    case MBlaze::RTLBHI : return 0x1004;
+    case MBlaze::RPVR0  : return 0x2000;
+    case MBlaze::RPVR1  : return 0x2001;
+    case MBlaze::RPVR2  : return 0x2002;
+    case MBlaze::RPVR3  : return 0x2003;
+    case MBlaze::RPVR4  : return 0x2004;
+    case MBlaze::RPVR5  : return 0x2005;
+    case MBlaze::RPVR6  : return 0x2006;
+    case MBlaze::RPVR7  : return 0x2007;
+    case MBlaze::RPVR8  : return 0x2008;
+    case MBlaze::RPVR9  : return 0x2009;
+    case MBlaze::RPVR10 : return 0x200A;
+    case MBlaze::RPVR11 : return 0x200B;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
+  switch (Reg) {
+    case 0  : return MBlaze::R0;
+    case 1  : return MBlaze::R1;
+    case 2  : return MBlaze::R2;
+    case 3  : return MBlaze::R3;
+    case 4  : return MBlaze::R4;
+    case 5  : return MBlaze::R5;
+    case 6  : return MBlaze::R6;
+    case 7  : return MBlaze::R7;
+    case 8  : return MBlaze::R8;
+    case 9  : return MBlaze::R9;
+    case 10 : return MBlaze::R10;
+    case 11 : return MBlaze::R11;
+    case 12 : return MBlaze::R12;
+    case 13 : return MBlaze::R13;
+    case 14 : return MBlaze::R14;
+    case 15 : return MBlaze::R15;
+    case 16 : return MBlaze::R16;
+    case 17 : return MBlaze::R17;
+    case 18 : return MBlaze::R18;
+    case 19 : return MBlaze::R19;
+    case 20 : return MBlaze::R20;
+    case 21 : return MBlaze::R21;
+    case 22 : return MBlaze::R22;
+    case 23 : return MBlaze::R23;
+    case 24 : return MBlaze::R24;
+    case 25 : return MBlaze::R25;
+    case 26 : return MBlaze::R26;
+    case 27 : return MBlaze::R27;
+    case 28 : return MBlaze::R28;
+    case 29 : return MBlaze::R29;
+    case 30 : return MBlaze::R30;
+    case 31 : return MBlaze::R31;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
+  switch (Reg) {
+    case 0x0000 : return MBlaze::RPC;
+    case 0x0001 : return MBlaze::RMSR;
+    case 0x0003 : return MBlaze::REAR;
+    case 0x0005 : return MBlaze::RESR;
+    case 0x0007 : return MBlaze::RFSR;
+    case 0x000B : return MBlaze::RBTR;
+    case 0x000D : return MBlaze::REDR;
+    case 0x1000 : return MBlaze::RPID;
+    case 0x1001 : return MBlaze::RZPR;
+    case 0x1002 : return MBlaze::RTLBX;
+    case 0x1003 : return MBlaze::RTLBLO;
+    case 0x1004 : return MBlaze::RTLBHI;
+    case 0x2000 : return MBlaze::RPVR0;
+    case 0x2001 : return MBlaze::RPVR1;
+    case 0x2002 : return MBlaze::RPVR2;
+    case 0x2003 : return MBlaze::RPVR3;
+    case 0x2004 : return MBlaze::RPVR4;
+    case 0x2005 : return MBlaze::RPVR5;
+    case 0x2006 : return MBlaze::RPVR6;
+    case 0x2007 : return MBlaze::RPVR7;
+    case 0x2008 : return MBlaze::RPVR8;
+    case 0x2009 : return MBlaze::RPVR9;
+    case 0x200A : return MBlaze::RPVR10;
+    case 0x200B : return MBlaze::RPVR11;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+bool MBlazeRegisterInfo::isRegister(unsigned Reg) {
+  return Reg <= 31;
+}
+
+bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) {
+  switch (Reg) {
+    case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : 
+    case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : 
+    case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : 
+    case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : 
+    case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : 
+    case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : 
+      return true;
+
+    default:
+      return false;
+  }
+  return false; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+  return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const unsigned* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+  // MBlaze callee-save register range is R20 - R31
+  static const unsigned CalleeSavedRegs[] = {
+    MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+    MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+    MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(MBlaze::R0);
+  Reserved.set(MBlaze::R1);
+  Reserved.set(MBlaze::R2);
+  Reserved.set(MBlaze::R13);
+  Reserved.set(MBlaze::R14);
+  Reserved.set(MBlaze::R15);
+  Reserved.set(MBlaze::R16);
+  Reserved.set(MBlaze::R17);
+  Reserved.set(MBlaze::R18);
+  Reserved.set(MBlaze::R19);
+  return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If we have a frame pointer, turn the adjcallstackup instruction into a
+    // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+    // 'addi r1, r1, <amt>'
+    MachineInstr *Old = I;
+    int Amount = Old->getOperand(0).getImm() + 4;
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      MachineInstr *New;
+      if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(-Amount);
+      } else {
+        assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(Amount);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                    RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  unsigned i = 0;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned oi = i == 2 ? 1 : 2;
+
+  DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+        dbgs() << "<--------->\n" << MI);
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int stackSize  = MFI->getStackSize();
+  int spOffset   = MFI->getObjectOffset(FrameIndex);
+
+  DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+        dbgs() << "FrameIndex : " << FrameIndex << "\n"
+               << "spOffset   : " << spOffset << "\n"
+               << "stackSize  : " << stackSize << "\n"
+               << "isFixed    : " << MFI->isFixedObjectIndex(FrameIndex) << "\n"
+               << "isLiveIn   : " << MBlazeFI->isLiveIn(FrameIndex) << "\n"
+               << "isSpill    : " << MFI->isSpillSlotObjectIndex(FrameIndex)
+               << "\n" );
+
+  // as explained on LowerFormalArguments, detect negative offsets
+  // and adjust SPOffsets considering the final stack size.
+  int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
+  Offset += MI.getOperand(oi).getImm();
+
+  DEBUG(dbgs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  MI.getOperand(oi).ChangeToImmediate(Offset);
+  MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+}
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  // Set the stack offset where GP must be saved/loaded from.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  if (MBlazeFI->needGPSaveRestore())
+    MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getRARegister() const {
+  return MBlaze::R15;
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+  return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0);
+}
+
+int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+  return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 000000000000..7ebce21d3a80
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,85 @@
+//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MBlazeGenRegisterInfo.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+  /// SubregIndex - The index of various sized subregister classes. Note that
+  /// these indices must be kept in sync with the class indices in the
+  /// MBlazeRegisterInfo.td file.
+  enum SubregIndex {
+    SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+  };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+  const MBlazeSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+
+  MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+                     const TargetInstrInfo &tii);
+
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+  static unsigned getRegisterFromNumbering(unsigned RegEnum);
+  static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
+  static bool isRegister(unsigned RegEnum);
+  static bool isSpecialRegister(unsigned RegEnum);
+
+  /// Get PIC indirect call register
+  static unsigned getPICCallReg();
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  /// Stack Frame Processing Methods
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  /// Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  /// Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 000000000000..13c46ba1ecba
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,148 @@
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "MBlaze";
+}
+
+// Special purpose registers have 15-bit values
+class MBlazeSReg<string n> : Register<n> {
+  field bits<15> Num;
+  let Namespace = "MBlaze";
+}
+
+// MBlaze general purpose registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+  let Num = num;
+}
+
+// MBlaze special purpose registers
+class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> {
+  let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+  // General Purpose Registers
+  def R0  : MBlazeGPRReg< 0,  "r0">,   DwarfRegNum<[0]>;
+  def R1  : MBlazeGPRReg< 1,  "r1">,   DwarfRegNum<[1]>;
+  def R2  : MBlazeGPRReg< 2,  "r2">,   DwarfRegNum<[2]>;
+  def R3  : MBlazeGPRReg< 3,  "r3">,   DwarfRegNum<[3]>;
+  def R4  : MBlazeGPRReg< 4,  "r4">,   DwarfRegNum<[4]>;
+  def R5  : MBlazeGPRReg< 5,  "r5">,   DwarfRegNum<[5]>;
+  def R6  : MBlazeGPRReg< 6,  "r6">,   DwarfRegNum<[6]>;
+  def R7  : MBlazeGPRReg< 7,  "r7">,   DwarfRegNum<[7]>;
+  def R8  : MBlazeGPRReg< 8,  "r8">,   DwarfRegNum<[8]>;
+  def R9  : MBlazeGPRReg< 9,  "r9">,   DwarfRegNum<[9]>;
+  def R10 : MBlazeGPRReg< 10, "r10">,  DwarfRegNum<[10]>;
+  def R11 : MBlazeGPRReg< 11, "r11">,  DwarfRegNum<[11]>;
+  def R12 : MBlazeGPRReg< 12, "r12">,  DwarfRegNum<[12]>;
+  def R13 : MBlazeGPRReg< 13, "r13">,  DwarfRegNum<[13]>;
+  def R14 : MBlazeGPRReg< 14, "r14">,  DwarfRegNum<[14]>;
+  def R15 : MBlazeGPRReg< 15, "r15">,  DwarfRegNum<[15]>;
+  def R16 : MBlazeGPRReg< 16, "r16">,  DwarfRegNum<[16]>;
+  def R17 : MBlazeGPRReg< 17, "r17">,  DwarfRegNum<[17]>;
+  def R18 : MBlazeGPRReg< 18, "r18">,  DwarfRegNum<[18]>;
+  def R19 : MBlazeGPRReg< 19, "r19">,  DwarfRegNum<[19]>;
+  def R20 : MBlazeGPRReg< 20, "r20">,  DwarfRegNum<[20]>;
+  def R21 : MBlazeGPRReg< 21, "r21">,  DwarfRegNum<[21]>;
+  def R22 : MBlazeGPRReg< 22, "r22">,  DwarfRegNum<[22]>;
+  def R23 : MBlazeGPRReg< 23, "r23">,  DwarfRegNum<[23]>;
+  def R24 : MBlazeGPRReg< 24, "r24">,  DwarfRegNum<[24]>;
+  def R25 : MBlazeGPRReg< 25, "r25">,  DwarfRegNum<[25]>;
+  def R26 : MBlazeGPRReg< 26, "r26">,  DwarfRegNum<[26]>;
+  def R27 : MBlazeGPRReg< 27, "r27">,  DwarfRegNum<[27]>;
+  def R28 : MBlazeGPRReg< 28, "r28">,  DwarfRegNum<[28]>;
+  def R29 : MBlazeGPRReg< 29, "r29">,  DwarfRegNum<[29]>;
+  def R30 : MBlazeGPRReg< 30, "r30">,  DwarfRegNum<[30]>;
+  def R31 : MBlazeGPRReg< 31, "r31">,  DwarfRegNum<[31]>;
+
+  // Special Purpose Registers
+  def RPC    : MBlazeSPRReg<0x0000, "rpc">,    DwarfRegNum<[32]>;
+  def RMSR   : MBlazeSPRReg<0x0001, "rmsr">,   DwarfRegNum<[33]>;
+  def REAR   : MBlazeSPRReg<0x0003, "rear">,   DwarfRegNum<[34]>;
+  def RESR   : MBlazeSPRReg<0x0005, "resr">,   DwarfRegNum<[35]>;
+  def RFSR   : MBlazeSPRReg<0x0007, "rfsr">,   DwarfRegNum<[36]>;
+  def RBTR   : MBlazeSPRReg<0x000B, "rbtr">,   DwarfRegNum<[37]>;
+  def REDR   : MBlazeSPRReg<0x000D, "redr">,   DwarfRegNum<[38]>;
+  def RPID   : MBlazeSPRReg<0x1000, "rpid">,   DwarfRegNum<[39]>;
+  def RZPR   : MBlazeSPRReg<0x1001, "rzpr">,   DwarfRegNum<[40]>;
+  def RTLBX  : MBlazeSPRReg<0x1002, "rtlbx">,  DwarfRegNum<[41]>;
+  def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
+  def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
+  def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>;
+  def RPVR0  : MBlazeSPRReg<0x2000, "rpvr0">,  DwarfRegNum<[45]>;
+  def RPVR1  : MBlazeSPRReg<0x2001, "rpvr1">,  DwarfRegNum<[46]>;
+  def RPVR2  : MBlazeSPRReg<0x2002, "rpvr2">,  DwarfRegNum<[47]>;
+  def RPVR3  : MBlazeSPRReg<0x2003, "rpvr3">,  DwarfRegNum<[48]>;
+  def RPVR4  : MBlazeSPRReg<0x2004, "rpvr4">,  DwarfRegNum<[49]>;
+  def RPVR5  : MBlazeSPRReg<0x2005, "rpvr5">,  DwarfRegNum<[50]>;
+  def RPVR6  : MBlazeSPRReg<0x2006, "rpvr6">,  DwarfRegNum<[51]>;
+  def RPVR7  : MBlazeSPRReg<0x2007, "rpvr7">,  DwarfRegNum<[52]>;
+  def RPVR8  : MBlazeSPRReg<0x2008, "rpvr8">,  DwarfRegNum<[53]>;
+  def RPVR9  : MBlazeSPRReg<0x2009, "rpvr9">,  DwarfRegNum<[54]>;
+  def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>;
+  def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>;
+
+  // The carry bit. In the Microblaze this is really bit 29 of the
+  // MSR register but this is the only bit of that register that we
+  // are interested in modeling.
+  def CARRY  : MBlazeSPRReg<0x0000, "rmsr[c]">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32, (sequence "R%u", 0, 31)>;
+
+def SPR : RegisterClass<"MBlaze", [i32], 32, (add
+  // Reserved
+  RPC,
+  RMSR,
+  REAR,
+  RESR,
+  RFSR,
+  RBTR,
+  REDR,
+  RPID,
+  RZPR,
+  RTLBX,
+  RTLBLO,
+  RTLBHI,
+  RPVR0,
+  RPVR1,
+  RPVR2,
+  RPVR3,
+  RPVR4,
+  RPVR5,
+  RPVR6,
+  RPVR7,
+  RPVR8,
+  RPVR9,
+  RPVR10,
+  RPVR11
+  )>
+{
+  // None of the special purpose registers are allocatable.
+  let isAllocatable = 0;
+}
+
+def CRC : RegisterClass<"MBlaze", [i32], 32, (add CARRY)> {
+  let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
new file mode 100644
index 000000000000..c298eda2195f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
@@ -0,0 +1,47 @@
+//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZERELOCATIONS_H
+#define MBLAZERELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace MBlaze {
+    enum RelocationType {
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
+      reloc_pcrel_word = 0,
+
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
+      reloc_picrel_word = 1,
+
+      /// reloc_absolute_word - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_word = 2,
+
+      /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+      /// value to the value already in memory. In object files, it represents a
+      /// value which must be sign-extended when resolving the relocation.
+      reloc_absolute_word_sext = 3,
+
+      /// reloc_absolute_dword - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_dword = 4
+    };
+  }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 000000000000..4662f25ceb12
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,55 @@
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze functional units.
+//===----------------------------------------------------------------------===//
+def IF : FuncUnit;
+def ID : FuncUnit;
+def EX : FuncUnit;
+def MA : FuncUnit;
+def WB : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze
+//===----------------------------------------------------------------------===//
+def IIC_ALU    : InstrItinClass;
+def IIC_ALUm   : InstrItinClass;
+def IIC_ALUd   : InstrItinClass;
+def IIC_SHT    : InstrItinClass;
+def IIC_FSLg   : InstrItinClass;
+def IIC_FSLp   : InstrItinClass;
+def IIC_MEMs   : InstrItinClass;
+def IIC_MEMl   : InstrItinClass;
+def IIC_FPU    : InstrItinClass;
+def IIC_FPUd   : InstrItinClass;
+def IIC_FPUf   : InstrItinClass;
+def IIC_FPUi   : InstrItinClass;
+def IIC_FPUs   : InstrItinClass;
+def IIC_FPUc   : InstrItinClass;
+def IIC_BR     : InstrItinClass;
+def IIC_BRc    : InstrItinClass;
+def IIC_BRl    : InstrItinClass;
+def IIC_WDC    : InstrItinClass;
+def IIC_Pseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for three stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule3.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for five stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule5.td"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
new file mode 100644
index 000000000000..ccbf99dbd3a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -0,0 +1,236 @@
+//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the three stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe3Itineraries : ProcessorItineraries<
+  [IF,ID,EX], [], [
+
+  // ALU instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the stages. The
+  // two source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_ALU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU multiply instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand.  The instruction takes one cycle to execute in each of the
+  // pipeline stages except the execute stage, which takes three cycles. The
+  // two source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_ALUm,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<3,[EX]>], // three cycles in execute stage
+               [ 4                    // result ready after four cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU divide instruction with one destination register two register source
+  // operands. The instruction takes one cycle to execute in each the pipeline
+  // stages except the execute stage, which takes 34 cycles. The two
+  // source operands are read during the decode stage and the result is ready
+  // after the execute stage.
+  InstrItinData< IIC_ALUd,
+               [ InstrStage<1,[IF]>    // one cycle in fetch stage
+               , InstrStage<1,[ID]>    // one cycle in decode stage
+               , InstrStage<34,[EX]>], // 34 cycles in execute stage
+               [ 35                    // result ready after 35 cycles
+               , 1                     // first operand read after one cycle
+               , 1 ]>,                 // second operand read after one cycle
+
+  // Shift instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the pipeline stages
+  // except the execute stage, which takes two cycles.  The two source operands
+  // are read during the decode stage and the result is ready after the execute
+  // stage.
+  InstrItinData< IIC_SHT,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 3                    // result ready after three cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch instruction with one source operand register. The instruction takes
+  // one cycle to execute in each of the pipeline stages. The source operand is
+  // read during the decode stage.
+  InstrItinData< IIC_BR,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 1 ]>,                // first operand read after one cycle
+
+  // Conditional branch instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages. The
+  // two source operands are read during the decode stage.
+  InstrItinData< IIC_BRc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch and link instruction with one destination register and one source
+  // operand register. The instruction takes one cycle to execute in each of
+  // the pipeline stages. The source operand is read during the decode stage
+  // and the destination register is ready after the execute stage.
+  InstrItinData< IIC_BRl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 2                    // result ready after two cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Cache control instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages
+  // except the memory access stage, which takes two cycles. The source
+  // operands are read during the decode stage.
+  InstrItinData< IIC_WDC,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point instruction with one destination register and two source
+  // operand registers. The instruction takes one cycle to execute in each of
+  // the pipeline stages except the execute stage, which takes six cycles. The
+  // source operands are read during the decode stage and the results are ready
+  // after the execute stage.
+  InstrItinData< IIC_FPU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<6,[EX]>], // six cycles in execute stage
+               [ 7                    // result ready after seven cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point divide instruction with one destination register and two
+  // source operand registers. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes 30
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the execute stage.
+  InstrItinData< IIC_FPUd,
+               [ InstrStage<1,[IF]>    // one cycle in fetch stage
+               , InstrStage<1,[ID]>    // one cycle in decode stage
+               , InstrStage<30,[EX]>], // one cycle in execute stage
+               [ 31                    // result ready after 31 cycles
+               , 1                     // first operand read after one cycle
+               , 1 ]>,                 // second operand read after one cycle
+
+  // Convert floating point to integer instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the execute stage,
+  // which takes seven cycles. The source operands are read during the decode
+  // stage and the results are ready after the execute stage.
+  InstrItinData< IIC_FPUi,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<7,[EX]>], // seven cycles in execute stage
+               [ 8                    // result ready after eight cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Convert integer to floating point instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the execute stage,
+  // which takes six cycles. The source operands are read during the decode
+  // stage and the results are ready after the execute stage.
+  InstrItinData< IIC_FPUf,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<6,[EX]>], // six cycles in execute stage
+               [ 7                    // result ready after seven cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Floating point square root instruction with one destination register and
+  // one source operand register. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes 29
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the execute stage.
+  InstrItinData< IIC_FPUs,
+               [ InstrStage<1,[IF]>    // one cycle in fetch stage
+               , InstrStage<1,[ID]>    // one cycle in decode stage
+               , InstrStage<29,[EX]>], // 29 cycles in execute stage
+               [ 30                    // result ready after 30 cycles
+               , 1 ]>,                 // first operand read after one cycle
+
+  // Floating point comparison instruction with one destination register and
+  // two source operand registers. The instruction takes one cycle to execute
+  // in each of the pipeline stages except the execute stage, which takes three
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the execute stage.
+  InstrItinData< IIC_FPUc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<3,[EX]>], // three cycles in execute stage
+               [ 4                    // result ready after four cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // FSL get instruction with one register or immediate source operand and one
+  // destination register. The instruction takes one cycle to execute in each
+  // of the pipeline stages except the execute stage, which takes two cycles.
+  // The one source operand is read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_FSLg,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 3                    // result ready after two cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // FSL put instruction with either two register source operands or one
+  // register source operand and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes two
+  // cycles. The two source operands are read during the decode stage.
+  InstrItinData< IIC_FSLp,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Memory store instruction with either three register source operands or two
+  // register source operands and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes two
+  // cycles. All of the source operands are read during the decode stage.
+  InstrItinData< IIC_MEMs,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1                    // second operand read after one cycle
+               , 1 ]>,                // third operand read after one cycle
+
+  // Memory load instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand. The instruction takes one cycle to execute in each of the
+  // pipeline stages except the execute stage, which takes two cycles. All of
+  // the source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_MEMl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 3                    // result ready after four cycles
+               , 1                    // second operand read after one cycle
+               , 1 ]>                 // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
new file mode 100644
index 000000000000..fa88766fdb18
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -0,0 +1,267 @@
+//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the five stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe5Itineraries : ProcessorItineraries<
+  [IF,ID,EX,MA,WB], [], [
+
+  // ALU instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the stages. The
+  // two source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_ALU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU multiply instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand.  The instruction takes one cycle to execute in each of the
+  // pipeline stages. The two source operands are read during the decode stage
+  // and the result is ready after the execute stage.
+  InstrItinData< IIC_ALUm,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU divide instruction with one destination register two register source
+  // operands. The instruction takes one cycle to execute in each the pipeline
+  // stages except the memory access stage, which takes 31 cycles. The two
+  // source operands are read during the decode stage and the result is ready
+  // after the memory access stage.
+  InstrItinData< IIC_ALUd,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<31,[MA]>  // 31 cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 33                   // result ready after 33 cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Shift instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the pipeline stages.
+  // The two source operands are read during the decode stage and the result is
+  // ready after the memory access stage.
+  InstrItinData< IIC_SHT,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 3                    // result ready after three cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch instruction with one source operand register. The instruction takes
+  // one cycle to execute in each of the pipeline stages. The source operand is
+  // read during the decode stage.
+  InstrItinData< IIC_BR,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1 ]>,                // first operand read after one cycle
+
+  // Conditional branch instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages. The
+  // two source operands are read during the decode stage.
+  InstrItinData< IIC_BRc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch and link instruction with one destination register and one source
+  // operand register. The instruction takes one cycle to execute in each of
+  // the pipeline stages. The source operand is read during the decode stage
+  // and the destination register is ready after the writeback stage.
+  InstrItinData< IIC_BRl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 4                    // result ready after four cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Cache control instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages
+  // except the memory access stage, which takes two cycles. The source
+  // operands are read during the decode stage.
+  InstrItinData< IIC_WDC,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<2,[MA]>   // two cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point instruction with one destination register and two source
+  // operand registers. The instruction takes one cycle to execute in each of
+  // the pipeline stages except the memory access stage, which takes two
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the writeback stage.
+  InstrItinData< IIC_FPU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<2,[MA]>   // two cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 5                    // result ready after five cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point divide instruction with one destination register and two
+  // source operand registers. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the memory access stage, which takes 26
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the writeback stage.
+  InstrItinData< IIC_FPUd,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<26,[MA]>  // 26 cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 29                   // result ready after 29 cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Convert floating point to integer instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the memory access stage,
+  // which takes three cycles. The source operands are read during the decode
+  // stage and the results are ready after the writeback stage.
+  InstrItinData< IIC_FPUi,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<3,[MA]>   // three cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 6                   // result ready after six cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Convert integer to floating point instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the memory access stage,
+  // which takes two cycles. The source operands are read during the decode
+  // stage and the results are ready after the writeback stage.
+  InstrItinData< IIC_FPUf,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<2,[MA]>   // two cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 5                    // result ready after five cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Floating point square root instruction with one destination register and
+  // one source operand register. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the memory access stage, which takes 25
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the writeback stage.
+  InstrItinData< IIC_FPUs,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<25,[MA]>  // 25 cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 28                   // result ready after 28 cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Floating point comparison instruction with one destination register and
+  // two source operand registers. The instruction takes one cycle to execute
+  // in each of the pipeline stages. The source operands are read during the
+  // decode stage and the results are ready after the execute stage.
+  InstrItinData< IIC_FPUc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // FSL get instruction with one register or immediate source operand and one
+  // destination register. The instruction takes one cycle to execute in each
+  // of the pipeline stages. The one source operand is read during the decode
+  // stage and the result is ready after the execute stage.
+  InstrItinData< IIC_FSLg,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // FSL put instruction with either two register source operands or one
+  // register source operand and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages. The two source operands are read during the
+  // decode stage.
+  InstrItinData< IIC_FSLp,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Memory store instruction with either three register source operands or two
+  // register source operands and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages. All of the source operands are read during
+  // the decode stage.
+  InstrItinData< IIC_MEMs,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1                    // second operand read after one cycle
+               , 1 ]>,                // third operand read after one cycle
+
+  // Memory load instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand. The instruction takes one cycle to execute in each of the
+  // pipeline stages. All of the source operands are read during the decode
+  // stage and the result is ready after the writeback stage.
+  InstrItinData< IIC_MEMl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 4                    // result ready after four cycles
+               , 1                    // second operand read after one cycle
+               , 1 ]>                 // third operand read after one cycle
+]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..6a115b22bd4a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-selectiondag-info"
+#include "MBlazeTargetMachine.h"
+using namespace llvm;
+
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
new file mode 100644
index 000000000000..9f8e2aad4b20
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESELECTIONDAGINFO_H
+#define MBLAZESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MBlazeTargetMachine;
+
+class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
+  ~MBlazeSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 000000000000..eda141daf2b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,63 @@
+//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MBlazeGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT,
+                                 const std::string &CPU,
+                                 const std::string &FS):
+  MBlazeGenSubtargetInfo(TT, CPU, FS),
+  HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
+  HasFPU(false), HasMul64(false), HasSqrt(false)
+{
+  // Parse features string.
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "mblaze";
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Only use instruction scheduling if the selected CPU has an instruction
+  // itinerary (the default CPU is the only one that doesn't).
+  HasItin = CPUName != "mblaze";
+  DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n");
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
+
+  // Compute the issue width of the MBlaze itineraries
+  computeIssueWidth();
+}
+
+void MBlazeSubtarget::computeIssueWidth() {
+  InstrItins.IssueWidth = 1;
+}
+
+bool MBlazeSubtarget::
+enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                      TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                      RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
+  return HasItin && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 000000000000..43b0197ad5aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,75 @@
+//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MBlazeGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MBlazeSubtarget : public MBlazeGenSubtargetInfo {
+
+protected:
+  bool HasBarrel;
+  bool HasDiv;
+  bool HasMul;
+  bool HasPatCmp;
+  bool HasFPU;
+  bool HasMul64;
+  bool HasSqrt;
+  bool HasItin;
+
+  InstrItineraryData InstrItins;
+
+public:
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  MBlazeSubtarget(const std::string &TT, const std::string &CPU,
+                  const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  /// Compute the number of maximum number of issues per cycle for the
+  /// MBlaze scheduling itineraries.
+  void computeIssueWidth();
+
+  /// enablePostRAScheduler - True at 'More' optimization.
+  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
+
+  /// getInstrItins - Return the instruction itineraies based on subtarget.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+  bool hasItin()   const { return HasItin; }
+  bool hasPCMP()   const { return HasPatCmp; }
+  bool hasFPU()    const { return HasFPU; }
+  bool hasSqrt()   const { return HasSqrt; }
+  bool hasMul()    const { return HasMul; }
+  bool hasMul64()  const { return HasMul64; }
+  bool hasDiv()    const { return HasDiv; }
+  bool hasBarrel() const { return HasBarrel; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 000000000000..7208874aef1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,102 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin()) {
+    llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+    return NULL;
+  }
+
+  if (TheTriple.isOSWindows()) {
+    llvm_unreachable("MBlaze does not support Windows COFF format");
+    return NULL;
+  }
+
+  return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+  // Register the target.
+  RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+
+  // Register the MC code emitter
+  TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
+                                      llvm::createMBlazeMCCodeEmitter);
+
+  // Register the asm backend
+  TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
+                                     createMBlazeAsmBackend);
+
+  // Register the object streamer
+  TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
+                                         createMCStreamer);
+
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, const std::string &TT,
+                    const std::string &CPU, const std::string &FS):
+  LLVMTargetMachine(T, TT, CPU, FS),
+  Subtarget(TT, CPU, FS),
+  DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+  InstrInfo(*this),
+  FrameLowering(Subtarget),
+  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+  InstrItins(Subtarget.getInstrItineraryData()) {
+  if (getRelocationModel() == Reloc::Default) {
+      setRelocationModel(Reloc::Static);
+  }
+
+  if (getCodeModel() == CodeModel::Default)
+    setCodeModel(CodeModel::Small);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  PM.add(createMBlazeISelDag(*this));
+  return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createMBlazeDelaySlotFillerPass(*this));
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 000000000000..cd6caafbf309
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,84 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "MBlazeFrameLowering.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class formatted_raw_ostream;
+
+  class MBlazeTargetMachine : public LLVMTargetMachine {
+    MBlazeSubtarget        Subtarget;
+    const TargetData       DataLayout; // Calculates type size & alignment
+    MBlazeInstrInfo        InstrInfo;
+    MBlazeFrameLowering    FrameLowering;
+    MBlazeTargetLowering   TLInfo;
+    MBlazeSelectionDAGInfo TSInfo;
+    MBlazeIntrinsicInfo    IntrinsicInfo;
+    MBlazeELFWriterInfo    ELFWriterInfo;
+    InstrItineraryData     InstrItins;
+
+  public:
+    MBlazeTargetMachine(const Target &T, const std::string &TT,
+                        const std::string &CPU, const std::string &FS);
+
+    virtual const MBlazeInstrInfo *getInstrInfo() const
+    { return &InstrInfo; }
+
+    virtual const InstrItineraryData *getInstrItineraryData() const
+    {  return &InstrItins; }
+
+    virtual const TargetFrameLowering *getFrameLowering() const
+    { return &FrameLowering; }
+
+    virtual const MBlazeSubtarget *getSubtargetImpl() const
+    { return &Subtarget; }
+
+    virtual const TargetData *getTargetData() const
+    { return &DataLayout;}
+
+    virtual const MBlazeRegisterInfo *getRegisterInfo() const
+    { return &InstrInfo.getRegisterInfo(); }
+
+    virtual const MBlazeTargetLowering *getTargetLowering() const
+    { return &TLInfo; }
+
+    virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+    { return &TSInfo; }
+
+    const TargetIntrinsicInfo *getIntrinsicInfo() const
+    { return &IntrinsicInfo; }
+
+    virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
+      return &ELFWriterInfo;
+    }
+
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
+    virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
+  };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 000000000000..abd1b0b62c7d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,90 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  SmallDataSection =
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+
+  SmallBSSSection =
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  // We can only do this for datarel or BSS objects for now.
+  if (!Kind.isBSS() && !Kind.isDataRel())
+    return false;
+
+  // If this is a internal constant string, there is a special
+  // section for it, but not in small data/bss.
+  if (Kind.isMergeable1ByteCString())
+    return false;
+
+  const Type *Ty = GV->getType()->getElementType();
+  return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+  // sections?
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 000000000000..c313722427db
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+  class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSection *SmallDataSection;
+    const MCSection *SmallBSSSection;
+  public:
+
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM,
+                                SectionKind Kind) const;
+
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;
+
+    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+  };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..3d15708c35b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMBlazeDesc
+  MBlazeMCTargetDesc.cpp
+  MBlazeMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
new file mode 100644
index 000000000000..0d88466bb300
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,24 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+  IsLittleEndian              = false;
+  StackGrowsUp                = false;
+  SupportsDebugInformation    = true;
+  AlignmentIsInBytes          = false;
+  PrivateGlobalPrefix         = "$";
+  GPRel32Directive            = "\t.gpword\t";
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
new file mode 100644
index 000000000000..e68dd58b016b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class MBlazeMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit MBlazeMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
new file mode 100644
index 000000000000..20d6c0bd2156
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -0,0 +1,65 @@
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCTargetDesc.h"
+#include "MBlazeMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MBlazeGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMBlazeMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMBlazeMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+}
+
+static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                    StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMBlazeMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+                                          createMBlazeMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  default:
+    return new MBlazeMCAsmInfo();
+  }
+}
+
+extern "C" void LLVMInitializeMBlazeMCAsmInfo() {
+  RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
new file mode 100644
index 000000000000..b14772ef060b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MBlazeMCTargetDesc.h - MBlaze Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEMCTARGETDESC_H
+#define MBLAZEMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMBlazeTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for MBlaze registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MBlazeGenRegisterInfo.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#define GET_INSTRINFO_ENUM
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MBlazeGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..71075ffbf47c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 000000000000..16e01dbfdeed
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+  RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..f5458d59a821
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430AsmPrinter
+  MSP430InstPrinter.cpp
+  )
+add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
new file mode 100644
index 000000000000..e10d4fe7ca16
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -0,0 +1,113 @@
+//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MSP430GenAsmWriter.inc"
+
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << '#' << *Op.getExpr();
+  }
+}
+
+void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  const MCOperand &Disp = MI->getOperand(OpNo+1);
+
+  // Print displacement first
+
+  // If the global address expression is a part of displacement field with a
+  // register base, we should not emit any prefix symbol here, e.g.
+  //   mov.w &foo, r1
+  // vs
+  //   mov.w glb(r1), r2
+  // Otherwise (!) msp430-as will silently miscompile the output :(
+  if (!Base.getReg())
+    O << '&';
+
+  if (Disp.isExpr())
+    O << *Disp.getExpr();
+  else {
+    assert(Disp.isImm() && "Expected immediate in displacement field");
+    O << Disp.getImm();
+  }
+
+  // Print register base field
+  if (Base.getReg())
+    O << '(' << getRegisterName(Base.getReg()) << ')';
+}
+
+void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned CC = MI->getOperand(OpNo).getImm();
+
+  switch (CC) {
+  default:
+   llvm_unreachable("Unsupported CC code");
+   break;
+  case MSP430CC::COND_E:
+   O << "eq";
+   break;
+  case MSP430CC::COND_NE:
+   O << "ne";
+   break;
+  case MSP430CC::COND_HS:
+   O << "hs";
+   break;
+  case MSP430CC::COND_LO:
+   O << "lo";
+   break;
+  case MSP430CC::COND_GE:
+   O << "ge";
+   break;
+  case MSP430CC::COND_L:
+   O << 'l';
+   break;
+  }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
new file mode 100644
index 000000000000..50d98b7c41fd
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430INSTPRINTER_H
+#define MSP430INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+  class MSP430InstPrinter : public MCInstPrinter {
+  public:
+    MSP430InstPrinter(const MCAsmInfo &MAI)
+      : MCInstPrinter(MAI) {}
+
+    virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+    // Autogenerated by tblgen.
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                      const char *Modifier = 0);
+    void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+    void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                            const char *Modifier = 0);
+    void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
new file mode 100644
index 000000000000..a5293ab8a234
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430AsmPrinter
+
+# Hack: we need to include 'main' MSP430 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..0f3ebd303924
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMSP430Desc
+  MSP430MCTargetDesc.cpp
+  MSP430MCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
new file mode 100644
index 000000000000..ad7d380b5631
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -0,0 +1,28 @@
+//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCAsmInfo.h"
+using namespace llvm;
+
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
+  PointerSize = 2;
+
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective ="\t.weak\t";
+  PCSymbol=".";
+  CommentString = ";";
+
+  AlignmentIsInBytes = false;
+  AllowNameToStartWithDigit = true;
+  UsesELFSectionDirectiveForBSS = true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
new file mode 100644
index 000000000000..f3138a22022d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct MSP430MCAsmInfo : public MCAsmInfo {
+    explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
new file mode 100644
index 000000000000..43a704d7a7df
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "MSP430MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MSP430GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMSP430MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMSP430MCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                    StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+                                          createMSP430MCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMSP430MCAsmInfo() {
+  RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
new file mode 100644
index 000000000000..0d8a6bdb44f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMSP430Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#define GET_INSTRINFO_ENUM
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MSP430GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..bb857998eef9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
new file mode 100644
index 000000000000..4574ce5f98b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -0,0 +1,47 @@
+//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MSP430 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_H
+#define LLVM_TARGET_MSP430_H
+
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace MSP430CC {
+  // MSP430 specific condition code.
+  enum CondCodes {
+    COND_E  = 0,  // aka COND_Z
+    COND_NE = 1,  // aka COND_NZ
+    COND_HS = 2,  // aka COND_C
+    COND_LO = 3,  // aka COND_NC
+    COND_GE = 4,
+    COND_L  = 5,
+
+    COND_INVALID = -1
+  };
+}
+
+namespace llvm {
+  class MSP430TargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
+                                    CodeGenOpt::Level OptLevel);
+
+  FunctionPass *createMSP430BranchSelectionPass();
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
new file mode 100644
index 000000000000..5cc5e6e3d7c9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -0,0 +1,66 @@
+//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MSP430 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features. 
+//===----------------------------------------------------------------------===//
+def FeatureX
+ : SubtargetFeature<"ext", "ExtendedInsts", "true",
+                    "Enable MSP430-X extensions">;
+
+//===----------------------------------------------------------------------===//
+// MSP430 supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrInfo.td"
+
+def MSP430InstrInfo : InstrInfo;
+
+def MSP430InstPrinter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MSP430 : Target {
+  let InstructionSet = MSP430InstrInfo;
+  let AssemblyWriters = [MSP430InstPrinter];
+}
+
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 000000000000..2042056617ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,179 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MCInstLower.h"
+#include "MSP430TargetMachine.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class MSP430AsmPrinter : public AsmPrinter {
+  public:
+    MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "MSP430 Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum,
+                      raw_ostream &O, const char* Modifier = 0);
+    void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                            raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI,
+                               unsigned OpNo, unsigned AsmVariant,
+                               const char *ExtraCode, raw_ostream &O);
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                    raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  default: assert(0 && "Not implemented yet!");
+  case MachineOperand::MO_Register:
+    O << MSP430InstPrinter::getRegisterName(MO.getReg());
+    return;
+  case MachineOperand::MO_Immediate:
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << '#';
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    uint64_t Offset = MO.getOffset();
+
+    // If the global address expression is a part of displacement field with a
+    // register base, we should not emit any prefix symbol here, e.g.
+    //   mov.w &foo, r1
+    // vs
+    //   mov.w glb(r1), r2
+    // Otherwise (!) msp430-as will silently miscompile the output :(
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << (isMemOp ? '&' : '#');
+    if (Offset)
+      O << '(' << Offset << '+';
+
+    O << *Mang->getSymbol(MO.getGlobal());
+
+    if (Offset)
+      O << ')';
+
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    O << (isMemOp ? '&' : '#');
+    O << MAI->getGlobalPrefix() << MO.getSymbolName();
+    return;
+  }
+  }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                                          raw_ostream &O) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+  // Print displacement first
+
+  // Imm here is in fact global address - print extra modifier.
+  if (Disp.isImm() && !Base.getReg())
+    O << '&';
+  printOperand(MI, OpNum+1, O, "nohash");
+
+  // Print register base field
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    O << ')';
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                             unsigned OpNo, unsigned AsmVariant,
+                                             const char *ExtraCode,
+                                             raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    return true; // Unknown modifier.
+  }
+  printSrcMemOperand(MI, OpNo, O);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new MSP430InstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430AsmPrinter() {
+  RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+  TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+                                        createMSP430MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
new file mode 100644
index 000000000000..bd644435c76f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -0,0 +1,180 @@
+//===-- MSP430BranchSelector.cpp - Emit long conditional branches--*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 10 bits of displacement to reach their
+// target basic block.  It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass.  This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-branch-select"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+  struct MSP430BSel : public MachineFunctionPass {
+    static char ID;
+    MSP430BSel() : MachineFunctionPass(ID) {}
+
+    /// BlockSizes - The sizes of the basic blocks in the function.
+    std::vector<unsigned> BlockSizes;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "MSP430 Branch Selector";
+    }
+  };
+  char MSP430BSel::ID = 0;
+}
+
+/// createMSP430BranchSelectionPass - returns an instance of the Branch
+/// Selection Pass
+///
+FunctionPass *llvm::createMSP430BranchSelectionPass() {
+  return new MSP430BSel();
+}
+
+bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
+  const MSP430InstrInfo *TII =
+             static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
+  // Give the blocks of the function a dense, in-order, numbering.
+  Fn.RenumberBlocks();
+  BlockSizes.resize(Fn.getNumBlockIDs());
+
+  // Measure each MBB and compute a size for the entire function.
+  unsigned FuncSize = 0;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+
+    unsigned BlockSize = 0;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+         MBBI != EE; ++MBBI)
+      BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+    BlockSizes[MBB->getNumber()] = BlockSize;
+    FuncSize += BlockSize;
+  }
+
+  // If the entire function is smaller than the displacement of a branch field,
+  // we know we don't need to shrink any branches in this function.  This is a
+  // common case.
+  if (FuncSize < (1 << 9)) {
+    BlockSizes.clear();
+    return false;
+  }
+
+  // For each conditional branch, if the offset to its destination is larger
+  // than the offset field allows, transform it into a long branch sequence
+  // like this:
+  //   short branch:
+  //     bCC MBB
+  //   long branch:
+  //     b!CC $PC+6
+  //     b MBB
+  //
+  bool MadeChange = true;
+  bool EverMadeChange = false;
+  while (MadeChange) {
+    // Iteratively expand branches until we reach a fixed point.
+    MadeChange = false;
+
+    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+         ++MFI) {
+      MachineBasicBlock &MBB = *MFI;
+      unsigned MBBStartOffset = 0;
+      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+           I != E; ++I) {
+        if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) &&
+            I->getOpcode() != MSP430::JMP) {
+          MBBStartOffset += TII->GetInstSizeInBytes(I);
+          continue;
+        }
+
+        // Determine the offset from the current branch to the destination
+        // block.
+        MachineBasicBlock *Dest = I->getOperand(0).getMBB();
+
+        int BranchSize;
+        if (Dest->getNumber() <= MBB.getNumber()) {
+          // If this is a backwards branch, the delta is the offset from the
+          // start of this block to this branch, plus the sizes of all blocks
+          // from this block to the dest.
+          BranchSize = MBBStartOffset;
+
+          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        } else {
+          // Otherwise, add the size of the blocks between this block and the
+          // dest to the number of bytes left in this block.
+          BranchSize = -MBBStartOffset;
+
+          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        }
+
+        // If this branch is in range, ignore it.
+        if (isInt<10>(BranchSize)) {
+          MBBStartOffset += 2;
+          continue;
+        }
+
+        // Otherwise, we have to expand it to a long branch.
+        unsigned NewSize;
+        MachineInstr *OldBranch = I;
+        DebugLoc dl = OldBranch->getDebugLoc();
+
+        if (I->getOpcode() == MSP430::JMP) {
+          NewSize = 4;
+        } else {
+          // The BCC operands are:
+          // 0. MSP430 branch predicate
+          // 1. Target MBB
+          SmallVector<MachineOperand, 1> Cond;
+          Cond.push_back(I->getOperand(1));
+
+          // Jump over the uncond branch inst (i.e. $+6) on opposite condition.
+          TII->ReverseBranchCondition(Cond);
+          BuildMI(MBB, I, dl, TII->get(MSP430::JCC))
+            .addImm(4).addOperand(Cond[0]);
+
+          NewSize = 6;
+        }
+        // Uncond branch to the real destination.
+        I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
+
+        // Remove the old branch from the function.
+        OldBranch->eraseFromParent();
+
+        // Remember that this instruction is NewSize bytes, increase the size of the
+        // block by NewSize-2, remember to iterate.
+        BlockSizes[MBB.getNumber()] += NewSize-2;
+        MBBStartOffset += NewSize;
+
+        ++NumExpanded;
+        MadeChange = true;
+      }
+    }
+    EverMadeChange |= MadeChange;
+  }
+
+  BlockSizes.clear();
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
new file mode 100644
index 000000000000..ad27cc9122a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430CallingConv.td
@@ -0,0 +1,37 @@
+//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MSP430 architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MSP430 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_MSP430 : CallingConv<[
+  // i8 are returned in registers R15B, R14B, R13B, R12B
+  CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+
+  // i16 are returned in registers R15, R14, R13, R12
+  CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_MSP430 : CallingConv<[
+  // Promote i8 arguments to i16.
+  CCIfType<[i8], CCPromoteToType<i16>>,
+
+  // The first 4 integer arguments of non-varargs functions are passed in
+  // integer registers.
+  CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
+
+  // Integer values get stored in stack slots that are 2 bytes in
+  // size and 2-byte aligned.
+  CCIfType<[i16], CCAssignToStack<2, 2>>
+]>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
new file mode 100644
index 000000000000..c99f4ab6c2f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -0,0 +1,223 @@
+//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  return (DisableFramePointerElim(MF) ||
+          MF.getFrameInfo()->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+  const MSP430InstrInfo &TII =
+    *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  uint64_t StackSize = MFI->getStackSize();
+
+  uint64_t NumBytes = 0;
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment
+    uint64_t FrameSize = StackSize - 2;
+    NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+    // Get the offset of the stack slot for the EBP register... which is
+    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+    // Update the frame offset adjustment.
+    MFI->setOffsetAdjustment(-NumBytes);
+
+    // Save FPW into the appropriate stack slot...
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+      .addReg(MSP430::FPW, RegState::Kill);
+
+    // Update FPW with the new base value...
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+      .addReg(MSP430::SPW);
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(MSP430::FPW);
+
+  } else
+    NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+  // Skip the callee-saved push instructions.
+  while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+    ++MBBI;
+
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+    // If there is an SUB16ri of SPW immediately before this instruction, merge
+    // the two.
+    //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+    // If there is an ADD16ri or SUB16ri of SPW immediately after this
+    // instruction, merge the two instructions.
+    // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+    if (NumBytes) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(NumBytes);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  }
+}
+
+void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+  const MSP430InstrInfo &TII =
+    *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc DL = MBBI->getDebugLoc();
+
+  switch (RetOpcode) {
+  case MSP430::RET:
+  case MSP430::RETI: break;  // These are ok
+  default:
+    llvm_unreachable("Can only insert epilog into returning blocks");
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo
+  uint64_t StackSize = MFI->getStackSize();
+  unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = 0;
+
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment
+    uint64_t FrameSize = StackSize - 2;
+    NumBytes = FrameSize - CSSize;
+
+    // pop FPW.
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+  } else
+    NumBytes = StackSize - CSSize;
+
+  // Skip the callee-saved pop instructions.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    unsigned Opc = PI->getOpcode();
+    if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+      break;
+    --MBBI;
+  }
+
+  DL = MBBI->getDebugLoc();
+
+  // If there is an ADD16ri or SUB16ri of SPW immediately before this
+  // instruction, merge the two instructions.
+  //if (NumBytes || MFI->hasVarSizedObjects())
+  //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+  if (MFI->hasVarSizedObjects()) {
+    BuildMI(MBB, MBBI, DL,
+            TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+    if (CSSize) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL,
+                TII.get(MSP430::SUB16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(CSSize);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  } else {
+    // adjust stack pointer back: SPW += numbytes
+    if (NumBytes) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(NumBytes);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  }
+}
+
+// FIXME: Can we eleminate these in favour of generic code?
+bool
+MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+  MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
+      .addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool
+MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                 MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+    BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
new file mode 100644
index 000000000000..b636827da7b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -0,0 +1,53 @@
+//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_FRAMEINFO_H
+#define MSP430_FRAMEINFO_H
+
+#include "MSP430.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MSP430Subtarget;
+
+class MSP430FrameLowering : public TargetFrameLowering {
+protected:
+  const MSP430Subtarget &STI;
+
+public:
+  explicit MSP430FrameLowering(const MSP430Subtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..5430d433b650
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -0,0 +1,492 @@
+//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct MSP430ISelAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    struct {            // This is really a union, discriminated by BaseType!
+      SDValue Reg;
+      int FrameIndex;
+    } Base;
+
+    int16_t Disp;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
+    const char *ES;
+    int JT;
+    unsigned Align;    // CP alignment.
+
+    MSP430ISelAddressMode()
+      : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
+        ES(0), JT(-1), Align(0) {
+    }
+
+    bool hasSymbolicDisplacement() const {
+      return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+    }
+
+    void dump() {
+      errs() << "MSP430ISelAddressMode " << this << '\n';
+      if (BaseType == RegBase && Base.Reg.getNode() != 0) {
+        errs() << "Base.Reg ";
+        Base.Reg.getNode()->dump();
+      } else if (BaseType == FrameIndexBase) {
+        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+      }
+      errs() << " Disp " << Disp << '\n';
+      if (GV) {
+        errs() << "GV ";
+        GV->dump();
+      } else if (CP) {
+        errs() << " CP ";
+        CP->dump();
+        errs() << " Align" << Align << '\n';
+      } else if (ES) {
+        errs() << "ES ";
+        errs() << ES << '\n';
+      } else if (JT != -1)
+        errs() << " JT" << JT << " Align" << Align << '\n';
+    }
+  };
+}
+
+/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+  class MSP430DAGToDAGISel : public SelectionDAGISel {
+    const MSP430TargetLowering &Lowering;
+    const MSP430Subtarget &Subtarget;
+
+  public:
+    MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel),
+        Lowering(*TM.getTargetLowering()),
+        Subtarget(*TM.getSubtargetImpl()) { }
+
+    virtual const char *getPassName() const {
+      return "MSP430 DAG->DAG Pattern Instruction Selection";
+    }
+
+    bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM);
+    bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
+    bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
+
+    virtual bool
+    SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                                 std::vector<SDValue> &OutOps);
+
+    // Include the pieces autogenerated from the target description.
+  #include "MSP430GenDAGISel.inc"
+
+  private:
+    SDNode *Select(SDNode *N);
+    SDNode *SelectIndexedLoad(SDNode *Op);
+    SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
+                               unsigned Opc8, unsigned Opc16);
+
+    bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
+  };
+}  // end anonymous namespace
+
+/// createMSP430ISelDag - This pass converts a legalized DAG into a
+/// MSP430-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
+                                        CodeGenOpt::Level OptLevel) {
+  return new MSP430DAGToDAGISel(TM, OptLevel);
+}
+
+
+/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
+/// These wrap things that will resolve down into a symbol reference.  If no
+/// match is possible, this returns true, otherwise it returns false.
+bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
+  // If the addressing mode already has a symbol as the displacement, we can
+  // never match another symbol.
+  if (AM.hasSymbolicDisplacement())
+    return true;
+
+  SDValue N0 = N.getOperand(0);
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+    AM.GV = G->getGlobal();
+    AM.Disp += G->getOffset();
+    //AM.SymbolFlags = G->getTargetFlags();
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+    AM.CP = CP->getConstVal();
+    AM.Align = CP->getAlignment();
+    AM.Disp += CP->getOffset();
+    //AM.SymbolFlags = CP->getTargetFlags();
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+    AM.ES = S->getSymbol();
+    //AM.SymbolFlags = S->getTargetFlags();
+  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+    AM.JT = J->getIndex();
+    //AM.SymbolFlags = J->getTargetFlags();
+  } else {
+    AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+    //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+  }
+  return false;
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+    // If so, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = MSP430ISelAddressMode::RegBase;
+  AM.Base.Reg = N;
+  return false;
+}
+
+bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
+  DEBUG(errs() << "MatchAddress: "; AM.dump());
+
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    AM.Disp += Val;
+    return false;
+  }
+
+  case MSP430ISD::Wrapper:
+    if (!MatchWrapper(N, AM))
+      return false;
+    break;
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == MSP430ISelAddressMode::RegBase
+        && AM.Base.Reg.getNode() == 0) {
+      AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
+    }
+    break;
+
+  case ISD::ADD: {
+    MSP430ISelAddressMode Backup = AM;
+    if (!MatchAddress(N.getNode()->getOperand(0), AM) &&
+        !MatchAddress(N.getNode()->getOperand(1), AM))
+      return false;
+    AM = Backup;
+    if (!MatchAddress(N.getNode()->getOperand(1), AM) &&
+        !MatchAddress(N.getNode()->getOperand(0), AM))
+      return false;
+    AM = Backup;
+
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      MSP430ISelAddressMode Backup = AM;
+      uint64_t Offset = CN->getSExtValue();
+      // Start with the LHS as an addr mode.
+      if (!MatchAddress(N.getOperand(0), AM) &&
+          // Address could not have picked a GV address for the displacement.
+          AM.GV == NULL &&
+          // Check to see if the LHS & C is zero.
+          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+        AM.Disp += Offset;
+        return false;
+      }
+      AM = Backup;
+    }
+    break;
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
+                                    SDValue &Base, SDValue &Disp) {
+  MSP430ISelAddressMode AM;
+
+  if (MatchAddress(N, AM))
+    return false;
+
+  EVT VT = N.getValueType();
+  if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  Base  = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
+    CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+    AM.Base.Reg;
+
+  if (AM.GV)
+    Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
+                                          MVT::i16, AM.Disp,
+                                          0/*AM.SymbolFlags*/);
+  else if (AM.CP)
+    Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
+                                         AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+  else if (AM.ES)
+    Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
+  else if (AM.JT != -1)
+    Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
+  else if (AM.BlockAddr)
+    Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+                                   true, 0/*AM.SymbolFlags*/);
+  else
+    Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
+
+  return true;
+}
+
+bool MSP430DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1;
+  switch (ConstraintCode) {
+  default: return true;
+  case 'm':   // memory
+    if (!SelectAddr(Op, Op0, Op1))
+      return true;
+    break;
+  }
+
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  return false;
+}
+
+static bool isValidIndexedLoad(const LoadSDNode *LD) {
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  EVT VT = LD->getMemoryVT();
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i8:
+    // Sanity check
+    if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
+      return false;
+
+    break;
+  case MVT::i16:
+    // Sanity check
+    if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
+      return false;
+
+    break;
+  default:
+    return false;
+  }
+
+  return true;
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  if (!isValidIndexedLoad(LD))
+    return NULL;
+
+  MVT VT = LD->getMemoryVT().getSimpleVT();
+
+  unsigned Opcode = 0;
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    Opcode = MSP430::MOV8rm_POST;
+    break;
+  case MVT::i16:
+    Opcode = MSP430::MOV16rm_POST;
+    break;
+  default:
+    return NULL;
+  }
+
+   return CurDAG->getMachineNode(Opcode, N->getDebugLoc(),
+                                 VT, MVT::i16, MVT::Other,
+                                 LD->getBasePtr(), LD->getChain());
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
+                                               SDValue N1, SDValue N2,
+                                               unsigned Opc8, unsigned Opc16) {
+  if (N1.getOpcode() == ISD::LOAD &&
+      N1.hasOneUse() &&
+      IsLegalToFold(N1, Op, Op, OptLevel)) {
+    LoadSDNode *LD = cast<LoadSDNode>(N1);
+    if (!isValidIndexedLoad(LD))
+      return NULL;
+
+    MVT VT = LD->getMemoryVT().getSimpleVT();
+    unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
+    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+    MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+    SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
+    SDNode *ResNode =
+      CurDAG->SelectNodeTo(Op, Opc,
+                           VT, MVT::i16, MVT::Other,
+                           Ops0, 3);
+    cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+    // Transfer chain.
+    ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
+    // Transfer writeback.
+    ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1));
+    return ResNode;
+  }
+
+  return NULL;
+}
+
+
+SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: ");
+  DEBUG(Node->dump(CurDAG));
+  DEBUG(errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== ";
+          Node->dump(CurDAG);
+          errs() << "\n");
+    return NULL;
+  }
+
+  // Few custom selection stuff.
+  switch (Node->getOpcode()) {
+  default: break;
+  case ISD::FrameIndex: {
+    assert(Node->getValueType(0) == MVT::i16);
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
+    if (Node->hasOneUse())
+      return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
+                                  TFI, CurDAG->getTargetConstant(0, MVT::i16));
+    return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
+                                  TFI, CurDAG->getTargetConstant(0, MVT::i16));
+  }
+  case ISD::LOAD:
+    if (SDNode *ResNode = SelectIndexedLoad(Node))
+      return ResNode;
+    // Other cases are autogenerated.
+    break;
+  case ISD::ADD:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::SUB:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::AND:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::OR:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::XOR:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+
+  return ResNode;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
new file mode 100644
index 000000000000..0a3eab1f7a66
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -0,0 +1,1197 @@
+//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation  ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-lower"
+
+#include "MSP430ISelLowering.h"
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+typedef enum {
+  NoHWMult,
+  HWMultIntr,
+  HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+           cl::desc("Hardware multiplier use mode"),
+           cl::init(HWMultNoIntr),
+           cl::values(
+             clEnumValN(NoHWMult, "no",
+                "Do not use hardware multiplier"),
+             clEnumValN(HWMultIntr, "interrupts",
+                "Assume hardware multiplier can be used inside interrupts"),
+             clEnumValN(HWMultNoIntr, "use",
+                "Assume hardware multiplier cannot be used inside interrupts"),
+             clEnumValEnd));
+
+MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
+  TargetLowering(tm, new TargetLoweringObjectFileELF()),
+  Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+  TD = getTargetData();
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i8,  MSP430::GR8RegisterClass);
+  addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Provide all sorts of operation actions
+
+  // Division is expensive
+  setIntDivIsCheap(false);
+
+  setStackPointerRegisterToSaveRestore(MSP430::SPW);
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setSchedulingPreference(Sched::Latency);
+
+  // We have post-incremented loads / stores.
+  setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+  setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8,  Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+  // We don't have any truncstores
+  setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+  setOperationAction(ISD::SRA,              MVT::i8,    Custom);
+  setOperationAction(ISD::SHL,              MVT::i8,    Custom);
+  setOperationAction(ISD::SRL,              MVT::i8,    Custom);
+  setOperationAction(ISD::SRA,              MVT::i16,   Custom);
+  setOperationAction(ISD::SHL,              MVT::i16,   Custom);
+  setOperationAction(ISD::SRL,              MVT::i16,   Custom);
+  setOperationAction(ISD::ROTL,             MVT::i8,    Expand);
+  setOperationAction(ISD::ROTR,             MVT::i8,    Expand);
+  setOperationAction(ISD::ROTL,             MVT::i16,   Expand);
+  setOperationAction(ISD::ROTR,             MVT::i16,   Expand);
+  setOperationAction(ISD::GlobalAddress,    MVT::i16,   Custom);
+  setOperationAction(ISD::ExternalSymbol,   MVT::i16,   Custom);
+  setOperationAction(ISD::BlockAddress,     MVT::i16,   Custom);
+  setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,            MVT::i8,    Custom);
+  setOperationAction(ISD::BR_CC,            MVT::i16,   Custom);
+  setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
+  setOperationAction(ISD::SETCC,            MVT::i8,    Custom);
+  setOperationAction(ISD::SETCC,            MVT::i16,   Custom);
+  setOperationAction(ISD::SELECT,           MVT::i8,    Expand);
+  setOperationAction(ISD::SELECT,           MVT::i16,   Expand);
+  setOperationAction(ISD::SELECT_CC,        MVT::i8,    Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::i16,   Custom);
+  setOperationAction(ISD::SIGN_EXTEND,      MVT::i16,   Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+
+  setOperationAction(ISD::CTTZ,             MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i8,    Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i8,    Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i16,   Expand);
+
+  setOperationAction(ISD::SHL_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SHL_PARTS,        MVT::i16,   Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i16,   Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i16,   Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,   Expand);
+
+  // FIXME: Implement efficiently multiplication by a constant
+  setOperationAction(ISD::MUL,              MVT::i8,    Expand);
+  setOperationAction(ISD::MULHS,            MVT::i8,    Expand);
+  setOperationAction(ISD::MULHU,            MVT::i8,    Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i8,    Expand);
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i8,    Expand);
+  setOperationAction(ISD::MUL,              MVT::i16,   Expand);
+  setOperationAction(ISD::MULHS,            MVT::i16,   Expand);
+  setOperationAction(ISD::MULHU,            MVT::i16,   Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i16,   Expand);
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i16,   Expand);
+
+  setOperationAction(ISD::UDIV,             MVT::i8,    Expand);
+  setOperationAction(ISD::UDIVREM,          MVT::i8,    Expand);
+  setOperationAction(ISD::UREM,             MVT::i8,    Expand);
+  setOperationAction(ISD::SDIV,             MVT::i8,    Expand);
+  setOperationAction(ISD::SDIVREM,          MVT::i8,    Expand);
+  setOperationAction(ISD::SREM,             MVT::i8,    Expand);
+  setOperationAction(ISD::UDIV,             MVT::i16,   Expand);
+  setOperationAction(ISD::UDIVREM,          MVT::i16,   Expand);
+  setOperationAction(ISD::UREM,             MVT::i16,   Expand);
+  setOperationAction(ISD::SDIV,             MVT::i16,   Expand);
+  setOperationAction(ISD::SDIVREM,          MVT::i16,   Expand);
+  setOperationAction(ISD::SREM,             MVT::i16,   Expand);
+
+  // Libcalls names.
+  if (HWMultMode == HWMultIntr) {
+    setLibcallName(RTLIB::MUL_I8,  "__mulqi3hw");
+    setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+  } else if (HWMultMode == HWMultNoIntr) {
+    setLibcallName(RTLIB::MUL_I8,  "__mulqi3hw_noint");
+    setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+  }
+
+  setMinFunctionAlignment(1);
+  setPrefFunctionAlignment(2);
+}
+
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  case ISD::SHL: // FALLTHROUGH
+  case ISD::SRL:
+  case ISD::SRA:              return LowerShifts(Op, DAG);
+  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
+  case ISD::ExternalSymbol:   return LowerExternalSymbol(Op, DAG);
+  case ISD::SETCC:            return LowerSETCC(Op, DAG);
+  case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+  case ISD::SIGN_EXTEND:      return LowerSIGN_EXTEND(Op, DAG);
+  case ISD::RETURNADDR:       return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
+  default:
+    llvm_unreachable("unimplemented operand");
+    return SDValue();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                       MSP430 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return C_RegisterClass;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+MSP430TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'r':   // GENERAL_REGS
+      if (VT == MVT::i8)
+        return std::make_pair(0U, MSP430::GR8RegisterClass);
+
+      return std::make_pair(0U, MSP430::GR16RegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MSP430GenCallingConv.inc"
+
+SDValue
+MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
+                                           CallingConv::ID CallConv,
+                                           bool isVarArg,
+                                           const SmallVectorImpl<ISD::InputArg>
+                                             &Ins,
+                                           DebugLoc dl,
+                                           SelectionDAG &DAG,
+                                           SmallVectorImpl<SDValue> &InVals)
+                                             const {
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+    return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+  case CallingConv::MSP430_INTR:
+   if (Ins.empty())
+     return Chain;
+   else {
+    report_fatal_error("ISRs cannot have arguments");
+    return SDValue();
+   }
+  }
+}
+
+SDValue
+MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                bool &isTailCall,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                const SmallVectorImpl<SDValue> &OutVals,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                DebugLoc dl, SelectionDAG &DAG,
+                                SmallVectorImpl<SDValue> &InVals) const {
+  // MSP430 target does not yet support tail call optimization.
+  isTailCall = false;
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                          Outs, OutVals, Ins, dl, DAG, InVals);
+  case CallingConv::MSP430_INTR:
+    report_fatal_error("ISRs cannot be called directly");
+    return SDValue();
+  }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
+                                        CallingConv::ID CallConv,
+                                        bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl,
+                                        SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
+
+  assert(!isVarArg && "Varargs not supported yet");
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      EVT RegVT = VA.getLocVT();
+      switch (RegVT.getSimpleVT().SimpleTy) {
+      default:
+        {
+#ifndef NDEBUG
+          errs() << "LowerFormalArguments Unhandled argument type: "
+               << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+          llvm_unreachable(0);
+        }
+      case MVT::i16:
+        unsigned VReg =
+          RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+
+        // If this is an 8-bit value, it is really passed promoted to 16
+        // bits. Insert an assert[sz]ext to capture this, then truncate to the
+        // right size.
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+
+        if (VA.getLocInfo() != CCValAssign::Full)
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+        InVals.push_back(ArgValue);
+      }
+    } else {
+      // Sanity check
+      assert(VA.isMemLoc());
+      // Load the argument to a virtual register
+      unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+      if (ObjSize > 2) {
+        errs() << "LowerFormalArguments Unhandled argument type: "
+             << EVT(VA.getLocVT()).getEVTString()
+             << "\n";
+      }
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      //from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0));
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+MSP430TargetLowering::LowerReturn(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<SDValue> &OutVals,
+                                  DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // ISRs cannot return any value.
+  if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+    report_fatal_error("ISRs cannot return any value");
+    return SDValue();
+  }
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+                  MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
+  if (Flag.getNode())
+    return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
+
+  // Return Void
+  return DAG.getNode(Opc, dl, MVT::Other, Chain);
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     bool isTailCall,
+                                     const SmallVectorImpl<ISD::OutputArg>
+                                       &Outs,
+                                     const SmallVectorImpl<SDValue> &OutVals,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emitted instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+                         DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  unsigned Opc = Op.getOpcode();
+  SDNode* N = Op.getNode();
+  EVT VT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand non-constant shifts to loops:
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    switch (Opc) {
+    default:
+      assert(0 && "Invalid shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(MSP430ISD::SHL, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    case ISD::SRA:
+      return DAG.getNode(MSP430ISD::SRA, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    case ISD::SRL:
+      return DAG.getNode(MSP430ISD::SRL, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    }
+
+  uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+
+  // Expand the stuff into sequence of shifts.
+  // FIXME: for some shift amounts this might be done better!
+  // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
+  SDValue Victim = N->getOperand(0);
+
+  if (Opc == ISD::SRL && ShiftAmount) {
+    // Emit a special goodness here:
+    // srl A, 1 => clrc; rrc A
+    Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+    ShiftAmount -= 1;
+  }
+
+  while (ShiftAmount--)
+    Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
+                         dl, VT, Victim);
+
+  return Victim;
+}
+
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+  // Create the TargetGlobalAddress node, folding in the constant offset.
+  SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+                                              getPointerTy(), Offset);
+  return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
+                     getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
+                       ISD::CondCode CC,
+                       DebugLoc dl, SelectionDAG &DAG) {
+  // FIXME: Handle bittests someday
+  assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet");
+
+  // FIXME: Handle jump negative someday
+  MSP430CC::CondCodes TCC = MSP430CC::COND_INVALID;
+  switch (CC) {
+  default: llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:
+    TCC = MSP430CC::COND_E;     // aka COND_Z
+    // Minor optimization: if LHS is a constant, swap operands, then the
+    // constant can be folded into comparison.
+    if (LHS.getOpcode() == ISD::Constant)
+      std::swap(LHS, RHS);
+    break;
+  case ISD::SETNE:
+    TCC = MSP430CC::COND_NE;    // aka COND_NZ
+    // Minor optimization: if LHS is a constant, swap operands, then the
+    // constant can be folded into comparison.
+    if (LHS.getOpcode() == ISD::Constant)
+      std::swap(LHS, RHS);
+    break;
+  case ISD::SETULE:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETUGE:
+    // Turn lhs u>= rhs with lhs constant into rhs u< lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_LO;
+      break;
+    }
+    TCC = MSP430CC::COND_HS;    // aka COND_C
+    break;
+  case ISD::SETUGT:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETULT:
+    // Turn lhs u< rhs with lhs constant into rhs u>= lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_HS;
+      break;
+    }
+    TCC = MSP430CC::COND_LO;    // aka COND_NC
+    break;
+  case ISD::SETLE:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETGE:
+    // Turn lhs >= rhs with lhs constant into rhs < lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_L;
+      break;
+    }
+    TCC = MSP430CC::COND_GE;
+    break;
+  case ISD::SETGT:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETLT:
+    // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_GE;
+      break;
+    }
+    TCC = MSP430CC::COND_L;
+    break;
+  }
+
+  TargetCC = DAG.getConstant(TCC, MVT::i8);
+  return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
+}
+
+
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS   = Op.getOperand(2);
+  SDValue RHS   = Op.getOperand(3);
+  SDValue Dest  = Op.getOperand(4);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue TargetCC;
+  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+  return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(),
+                     Chain, Dest, TargetCC, Flag);
+}
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue LHS   = Op.getOperand(0);
+  SDValue RHS   = Op.getOperand(1);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  // If we are doing an AND and testing against zero, then the CMP
+  // will not be generated.  The AND (or BIT) will generate the condition codes,
+  // but they are different from CMP.
+  // FIXME: since we're doing a post-processing, use a pseudoinstr here, so
+  // lowering & isel wouldn't diverge.
+  bool andCC = false;
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+    if (RHSC->isNullValue() && LHS.hasOneUse() &&
+        (LHS.getOpcode() == ISD::AND ||
+         (LHS.getOpcode() == ISD::TRUNCATE &&
+          LHS.getOperand(0).getOpcode() == ISD::AND))) {
+      andCC = true;
+    }
+  }
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  SDValue TargetCC;
+  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+  // Get the condition codes directly from the status register, if its easy.
+  // Otherwise a branch will be generated.  Note that the AND and BIT
+  // instructions generate different flags than CMP, the carry bit can be used
+  // for NE/EQ.
+  bool Invert = false;
+  bool Shift = false;
+  bool Convert = true;
+  switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+   default:
+    Convert = false;
+    break;
+   case MSP430CC::COND_HS:
+     // Res = SRW & 1, no processing is required
+     break;
+   case MSP430CC::COND_LO:
+     // Res = ~(SRW & 1)
+     Invert = true;
+     break;
+   case MSP430CC::COND_NE:
+     if (andCC) {
+       // C = ~Z, thus Res = SRW & 1, no processing is required
+     } else {
+       // Res = ~((SRW >> 1) & 1)
+       Shift = true;
+       Invert = true;
+     }
+     break;
+   case MSP430CC::COND_E:
+     Shift = true;
+     // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+     // Res = (SRW >> 1) & 1 is 1 word shorter.
+     break;
+  }
+  EVT VT = Op.getValueType();
+  SDValue One  = DAG.getConstant(1, VT);
+  if (Convert) {
+    SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+                                    MVT::i16, Flag);
+    if (Shift)
+      // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+      SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+    SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+    if (Invert)
+      SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+    return SR;
+  } else {
+    SDValue Zero = DAG.getConstant(0, VT);
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+    SmallVector<SDValue, 4> Ops;
+    Ops.push_back(One);
+    Ops.push_back(Zero);
+    Ops.push_back(TargetCC);
+    Ops.push_back(Flag);
+    return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+  }
+}
+
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  SDValue LHS    = Op.getOperand(0);
+  SDValue RHS    = Op.getOperand(1);
+  SDValue TrueV  = Op.getOperand(2);
+  SDValue FalseV = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  DebugLoc dl    = Op.getDebugLoc();
+
+  SDValue TargetCC;
+  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(TrueV);
+  Ops.push_back(FalseV);
+  Ops.push_back(TargetCC);
+  Ops.push_back(Flag);
+
+  return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue Val = Op.getOperand(0);
+  EVT VT      = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  assert(VT == MVT::i16 && "Only support i16 for now!");
+
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+                     DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val),
+                     DAG.getValueType(Val.getValueType()));
+}
+
+SDValue
+MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+  int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+  if (ReturnAddrIndex == 0) {
+    // Set up a frame object for the return address.
+    uint64_t SlotSize = TD->getPointerSize();
+    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+                                                           true);
+    FuncInfo->setRAIndex(ReturnAddrIndex);
+  }
+
+  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+      DAG.getConstant(TD->getPointerSize(), MVT::i16);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Just load the return address.
+  SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                         MSP430::FPW, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  return FrameAddr;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                      SDValue &Base,
+                                                      SDValue &Offset,
+                                                      ISD::MemIndexedMode &AM,
+                                                      SelectionDAG &DAG) const {
+
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  EVT VT = LD->getMemoryVT();
+  if (VT != MVT::i8 && VT != MVT::i16)
+    return false;
+
+  if (Op->getOpcode() != ISD::ADD)
+    return false;
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+    uint64_t RHSC = RHS->getZExtValue();
+    if ((VT == MVT::i16 && RHSC != 2) ||
+        (VT == MVT::i8 && RHSC != 1))
+      return false;
+
+    Base = Op->getOperand(0);
+    Offset = DAG.getConstant(RHSC, VT);
+    AM = ISD::POST_INC;
+    return true;
+  }
+
+  return false;
+}
+
+
+const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return NULL;
+  case MSP430ISD::RET_FLAG:           return "MSP430ISD::RET_FLAG";
+  case MSP430ISD::RETI_FLAG:          return "MSP430ISD::RETI_FLAG";
+  case MSP430ISD::RRA:                return "MSP430ISD::RRA";
+  case MSP430ISD::RLA:                return "MSP430ISD::RLA";
+  case MSP430ISD::RRC:                return "MSP430ISD::RRC";
+  case MSP430ISD::CALL:               return "MSP430ISD::CALL";
+  case MSP430ISD::Wrapper:            return "MSP430ISD::Wrapper";
+  case MSP430ISD::BR_CC:              return "MSP430ISD::BR_CC";
+  case MSP430ISD::CMP:                return "MSP430ISD::CMP";
+  case MSP430ISD::SELECT_CC:          return "MSP430ISD::SELECT_CC";
+  case MSP430ISD::SHL:                return "MSP430ISD::SHL";
+  case MSP430ISD::SRA:                return "MSP430ISD::SRA";
+  }
+}
+
+bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
+                                          const Type *Ty2) const {
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+    return false;
+
+  return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
+}
+
+bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isInteger() || !VT2.isInteger())
+    return false;
+
+  return (VT1.getSizeInBits() > VT2.getSizeInBits());
+}
+
+bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+  // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+  return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
+}
+
+bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+  // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+  return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+                                     MachineBasicBlock *BB) const {
+  MachineFunction *F = BB->getParent();
+  MachineRegisterInfo &RI = F->getRegInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+  unsigned Opc;
+  const TargetRegisterClass * RC;
+  switch (MI->getOpcode()) {
+  default:
+    assert(0 && "Invalid shift opcode!");
+  case MSP430::Shl8:
+   Opc = MSP430::SHL8r1;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Shl16:
+   Opc = MSP430::SHL16r1;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  case MSP430::Sra8:
+   Opc = MSP430::SAR8r1;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Sra16:
+   Opc = MSP430::SAR16r1;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  case MSP430::Srl8:
+   Opc = MSP430::SAR8r1c;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Srl16:
+   Opc = MSP430::SAR16r1c;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  }
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  // Create loop block
+  MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *RemBB  = F->CreateMachineBasicBlock(LLVM_BB);
+
+  F->insert(I, LoopBB);
+  F->insert(I, RemBB);
+
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the block containing instructions after shift.
+  RemBB->splice(RemBB->begin(), BB,
+                llvm::next(MachineBasicBlock::iterator(MI)),
+                BB->end());
+  RemBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+  BB->addSuccessor(LoopBB);
+  BB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(LoopBB);
+
+  unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+  unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+  unsigned ShiftReg = RI.createVirtualRegister(RC);
+  unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+  unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  // BB:
+  // cmp 0, N
+  // je RemBB
+  BuildMI(BB, dl, TII.get(MSP430::CMP8ri))
+    .addReg(ShiftAmtSrcReg).addImm(0);
+  BuildMI(BB, dl, TII.get(MSP430::JCC))
+    .addMBB(RemBB)
+    .addImm(MSP430CC::COND_E);
+
+  // LoopBB:
+  // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+  // ShiftAmt = phi [%N, BB],      [%ShiftAmt2, LoopBB]
+  // ShiftReg2 = shift ShiftReg
+  // ShiftAmt2 = ShiftAmt - 1;
+  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+    .addReg(SrcReg).addMBB(BB)
+    .addReg(ShiftReg2).addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+    .addReg(ShiftAmtSrcReg).addMBB(BB)
+    .addReg(ShiftAmtReg2).addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+    .addReg(ShiftReg);
+  BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+    .addReg(ShiftAmtReg).addImm(1);
+  BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+    .addMBB(LoopBB)
+    .addImm(MSP430CC::COND_NE);
+
+  // RemBB:
+  // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+  BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
+    .addReg(SrcReg).addMBB(BB)
+    .addReg(ShiftReg2).addMBB(LoopBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return RemBB;
+}
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
+  unsigned Opc = MI->getOpcode();
+
+  if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+      Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+      Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+    return EmitShiftInstr(MI, BB);
+
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   jCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(I, copy0MBB);
+  F->insert(I, copy1MBB);
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  copy1MBB->splice(copy1MBB->begin(), BB,
+                   llvm::next(MachineBasicBlock::iterator(MI)),
+                   BB->end());
+  copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(copy1MBB);
+
+  BuildMI(BB, dl, TII.get(MSP430::JCC))
+    .addMBB(copy1MBB)
+    .addImm(MI->getOperand(3).getImm());
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to copy1MBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(copy1MBB);
+
+  //  copy1MBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = copy1MBB;
+  BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI),
+          MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
new file mode 100644
index 000000000000..bd660a0bb039
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -0,0 +1,182 @@
+//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MSP430 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
+#define LLVM_TARGET_MSP430_ISELLOWERING_H
+
+#include "MSP430.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+  namespace MSP430ISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// Return with a flag operand. Operand 0 is the chain operand.
+      RET_FLAG,
+
+      /// Same as RET_FLAG, but used for returning from ISRs.
+      RETI_FLAG,
+
+      /// Y = R{R,L}A X, rotate right (left) arithmetically
+      RRA, RLA,
+
+      /// Y = RRC X, rotate right via carry
+      RRC,
+
+      /// CALL - These operations represent an abstract call
+      /// instruction, which includes a bunch of information.
+      CALL,
+
+      /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+      /// and TargetGlobalAddress.
+      Wrapper,
+
+      /// CMP - Compare instruction.
+      CMP,
+
+      /// SetCC - Operand 0 is condition code, and operand 1 is the flag
+      /// operand produced by a CMP instruction.
+      SETCC,
+
+      /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+      /// is the block to branch if condition is true, operand 2 is the
+      /// condition code, and operand 3 is the flag operand produced by a CMP
+      /// instruction.
+      BR_CC,
+
+      /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
+      /// is condition code and operand 4 is flag operand.
+      SELECT_CC,
+
+      /// SHL, SRA, SRL - Non-constant shifts.
+      SHL, SRA, SRL
+    };
+  }
+
+  class MSP430Subtarget;
+  class MSP430TargetMachine;
+
+  class MSP430TargetLowering : public TargetLowering {
+  public:
+    explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+    TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+    /// isTruncateFree - Return true if it's free to truncate a value of type
+    /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
+    /// register R15W to i8 by referencing its sub-register R15B.
+    virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+    /// isZExtFree - Return true if any actual instruction that defines a value
+    /// of type Ty1 implicit zero-extends the value to Ty2 in the result
+    /// register. This does not necessarily include registers defined in unknown
+    /// ways, such as incoming arguments, or copies from unknown virtual
+    /// registers. Also, if isTruncateFree(Ty2, Ty1) is true, this does not
+    /// necessarily apply to truncate instructions. e.g. on msp430, all
+    /// instructions that define 8-bit values implicit zero-extend the result
+    /// out to 16 bits.
+    virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+    MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB) const;
+    MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+                                      MachineBasicBlock *BB) const;
+
+  private:
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl,
+                              SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base,
+                                            SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
+    const MSP430Subtarget &Subtarget;
+    const MSP430TargetMachine &TM;
+    const TargetData *TD;
+  };
+} // namespace llvm
+
+#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
new file mode 100644
index 000000000000..73aef1facc0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
@@ -0,0 +1,211 @@
+//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Describe MSP430 instructions format here
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<2> val> {
+  bits<2> Value = val;
+}
+
+def PseudoFrm   : Format<0>;
+def SingleOpFrm : Format<1>;
+def DoubleOpFrm : Format<2>;
+def CondJumpFrm : Format<3>;
+
+class SourceMode<bits<2> val> {
+  bits<2> Value = val;
+}
+
+def SrcReg      : SourceMode<0>;
+def SrcMem      : SourceMode<1>;
+def SrcIndReg   : SourceMode<2>;
+def SrcPostInc  : SourceMode<3>;
+def SrcImm      : SourceMode<3>;
+
+class DestMode<bit val> {
+  bit Value = val;
+}
+
+def DstReg      : DestMode<0>;
+def DstMem      : DestMode<1>;
+
+class SizeVal<bits<3> val> {
+  bits<3> Value = val;
+}
+
+def SizeUnknown : SizeVal<0>; // Unknown / unset size
+def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
+def Size2Bytes  : SizeVal<2>;
+def Size4Bytes  : SizeVal<3>;
+def Size6Bytes  : SizeVal<4>;
+
+// Generic MSP430 Format
+class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
+                 string asmstr> : Instruction {
+  field bits<16> Inst;
+
+  let Namespace = "MSP430";
+
+  dag OutOperandList = outs;
+  dag InOperandList  = ins;
+
+  Format Form = f;
+  SizeVal Sz = sz;
+
+  // Define how we want to layout our TargetSpecific information field... This
+  // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
+  let TSFlags{1-0} = Form.Value;
+  let TSFlags{4-2} = Sz.Value;
+
+  let AsmString   = asmstr;
+}
+
+// FIXME: Create different classes for different addressing modes.
+
+// MSP430 Double Operand (Format I) Instructions
+class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+  let Pattern = pattern;
+
+  DestMode ad = dest;
+  SourceMode as = src;
+  
+  let Inst{12-15} = opcode;
+  let Inst{7}     = ad.Value;
+  let Inst{6}     = bw;
+  let Inst{4-5}   = as.Value;
+}
+
+// 8 bit IForm instructions
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+             dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class I8rr<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I8ri<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8rm<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mr<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mi<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I8mm<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IForm instructions
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+              dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class I16rr<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I16ri<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16rm<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mr<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mi<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I16mm<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Single Operand (Format II) Instructions
+class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+             dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+  let Pattern = pattern;
+  
+  SourceMode as = src;
+
+  let Inst{7-15} = opcode;
+  let Inst{6}    = bw;
+  let Inst{4-5}  = as.Value;
+}
+
+// 8 bit IIForm instructions
+class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+              dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class II8r<bits<9> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II8m<bits<9> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II8i<bits<9> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IIForm instructions
+class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+               dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class II16r<bits<9> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II16m<bits<9> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II16i<bits<9> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Conditional Jumps Instructions
+class CJForm<bits<3> opcode, bits<3> cond,
+             dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+  let Pattern = pattern;
+  
+  let Inst{13-15} = opcode;
+  let Inst{10-12} = cond;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+  let Pattern = pattern;
+  let Inst{15-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
new file mode 100644
index 000000000000..846d09361b33
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -0,0 +1,336 @@
+//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MSP430GenInstrInfo.inc"
+
+using namespace llvm;
+
+MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+  : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+    RI(tm, *this), TM(tm) {}
+
+void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill, int FrameIdx,
+                                          const TargetRegisterClass *RC,
+                                          const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+
+  if (RC == &MSP430::GR16RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
+      .addFrameIndex(FrameIdx).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  else if (RC == &MSP430::GR8RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
+      .addFrameIndex(FrameIdx).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  else
+    llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                           unsigned DestReg, int FrameIdx,
+                                           const TargetRegisterClass *RC,
+                                           const TargetRegisterInfo *TRI) const{
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+
+  if (RC == &MSP430::GR16RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
+      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+  else if (RC == &MSP430::GR8RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
+      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+  else
+    llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  unsigned Opc;
+  if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
+    Opc = MSP430::MOV16rr;
+  else if (MSP430::GR8RegClass.contains(DestReg, SrcReg))
+    Opc = MSP430::MOV8rr;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() != MSP430::JMP &&
+        I->getOpcode() != MSP430::JCC &&
+        I->getOpcode() != MSP430::Br &&
+        I->getOpcode() != MSP430::Bm)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+bool MSP430InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+  MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
+
+  switch (CC) {
+  default:
+    assert(0 && "Invalid branch condition!");
+    break;
+  case MSP430CC::COND_E:
+    CC = MSP430CC::COND_NE;
+    break;
+  case MSP430CC::COND_NE:
+    CC = MSP430CC::COND_E;
+    break;
+  case MSP430CC::COND_L:
+    CC = MSP430CC::COND_GE;
+    break;
+  case MSP430CC::COND_GE:
+    CC = MSP430CC::COND_L;
+    break;
+  case MSP430CC::COND_HS:
+    CC = MSP430CC::COND_LO;
+    break;
+  case MSP430CC::COND_LO:
+    CC = MSP430CC::COND_HS;
+    break;
+  }
+
+  Cond[0].setImm(CC);
+  return false;
+}
+
+bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MCID.isBranch() && !MCID.isBarrier())
+    return true;
+  if (!MCID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                    MachineBasicBlock *&TBB,
+                                    MachineBasicBlock *&FBB,
+                                    SmallVectorImpl<MachineOperand> &Cond,
+                                    bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+
+    // Working from the bottom, when we see a non-terminator
+    // instruction, we're done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled
+    // by this analysis.
+    if (!I->getDesc().isBranch())
+      return true;
+
+    // Cannot handle indirect branches.
+    if (I->getOpcode() == MSP430::Br ||
+        I->getOpcode() == MSP430::Bm)
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == MSP430::JMP) {
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    // Handle conditional branches.
+    assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch");
+    MSP430CC::CondCodes BranchCode =
+      static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm());
+    if (BranchCode == MSP430CC::COND_INVALID)
+      return true;  // Can't handle weird stuff.
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to
+    // the same destination.
+    if (TBB != I->getOperand(0).getMBB())
+      return true;
+
+    MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm();
+    // If the conditions are the same, we can leave them alone.
+    if (OldBranchCode == BranchCode)
+      continue;
+
+    return true;
+  }
+
+  return false;
+}
+
+unsigned
+MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                              MachineBasicBlock *FBB,
+                              const SmallVectorImpl<MachineOperand> &Cond,
+                              DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "MSP430 branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  BuildMI(&MBB, DL, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm());
+  ++Count;
+
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be.  This returns the maximum number of bytes.
+///
+unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const MCInstrDesc &Desc = MI->getDesc();
+
+  switch (Desc.TSFlags & MSP430II::SizeMask) {
+  default:
+    switch (Desc.getOpcode()) {
+    default:
+      assert(0 && "Unknown instruction size!");
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::EH_LABEL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+    case TargetOpcode::DBG_VALUE:
+      return 0;
+    case TargetOpcode::INLINEASM: {
+      const MachineFunction *MF = MI->getParent()->getParent();
+      const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+      return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
+                                    *MF->getTarget().getMCAsmInfo());
+    }
+    }
+  case MSP430II::SizeSpecial:
+    switch (MI->getOpcode()) {
+    default:
+      assert(0 && "Unknown instruction size!");
+    case MSP430::SAR8r1c:
+    case MSP430::SAR16r1c:
+      return 4;
+    }
+  case MSP430II::Size2Bytes:
+    return 2;
+  case MSP430II::Size4Bytes:
+    return 4;
+  case MSP430II::Size6Bytes:
+    return 6;
+  }
+
+  return 6;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
new file mode 100644
index 000000000000..90013f5c2e70
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -0,0 +1,92 @@
+//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430INSTRINFO_H
+#define LLVM_TARGET_MSP430INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MSP430RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MSP430GenInstrInfo.inc"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+/// MSP430II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MSP430II {
+  enum {
+    SizeShift   = 2,
+    SizeMask    = 7 << SizeShift,
+
+    SizeUnknown = 0 << SizeShift,
+    SizeSpecial = 1 << SizeShift,
+    Size2Bytes  = 2 << SizeShift,
+    Size4Bytes  = 3 << SizeShift,
+    Size6Bytes  = 4 << SizeShift
+  };
+}
+
+class MSP430InstrInfo : public MSP430GenInstrInfo {
+  const MSP430RegisterInfo RI;
+  MSP430TargetMachine &TM;
+public:
+  explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill,
+                                   int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+  // Branch folding goodness
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+  bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  bool AnalyzeBranch(MachineBasicBlock &MBB,
+                     MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond,
+                        DebugLoc DL) const;
+
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
new file mode 100644
index 000000000000..59cb59873ab7
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -0,0 +1,1211 @@
+//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MSP430 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_MSP430Call         : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
+def SDT_MSP430CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDT_MSP430Wrapper      : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                                  SDTCisPtrTy<0>]>;
+def SDT_MSP430Cmp          : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_MSP430BrCC         : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
+                                                  SDTCisVT<1, i8>]>;
+def SDT_MSP430SelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+                                                  SDTCisSameAs<1, 2>, 
+                                                  SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift        : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                                  SDTCisI8<2>]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def MSP430retflag  : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+                       [SDNPHasChain, SDNPOptInGlue]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+                       [SDNPHasChain, SDNPOptInGlue]>;
+
+def MSP430rra     : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
+def MSP430rla     : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
+def MSP430rrc     : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+
+def MSP430call    : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
+                     [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def MSP430callseq_start :
+                 SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
+                        [SDNPHasChain, SDNPOutGlue]>;
+def MSP430callseq_end :
+                 SDNode<"ISD::CALLSEQ_END",   SDT_MSP430CallSeqEnd,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
+def MSP430cmp     : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>;
+def MSP430brcc    : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
+                            [SDNPHasChain, SDNPInGlue]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
+                            [SDNPInGlue]>;
+def MSP430shl     : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra     : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl     : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+def memsrc : Operand<i16> {
+  let PrintMethod = "printSrcMemOperand";
+  let MIOperandInfo = (ops GR16, i16imm);
+}
+
+def memdst : Operand<i16> {
+  let PrintMethod = "printSrcMemOperand";
+  let MIOperandInfo = (ops GR16, i16imm);
+}
+
+// Short jump targets have OtherVT type and are printed as pcrel imm values.
+def jmptarget : Operand<OtherVT> {
+  let PrintMethod = "printPCRelImmOperand";
+}
+
+// Operand for printing out a condition code.
+def cc : Operand<i8> {
+  let PrintMethod = "printCCOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Complex Pattern Definitions.
+//===----------------------------------------------------------------------===//
+
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def  extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber SRW.
+let Defs = [SPW, SRW], Uses = [SPW] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
+                              "#ADJCALLSTACKDOWN",
+                              [(MSP430callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
+                              "#ADJCALLSTACKUP",
+                              [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomInserter = 1 in {
+  def Select8  : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
+                        "# Select8 PSEUDO",
+                        [(set GR8:$dst,
+                          (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>;
+  def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc),
+                        "# Select16 PSEUDO",
+                        [(set GR16:$dst,
+                          (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
+  let Defs = [SRW] in {
+  def Shl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Shl8 PSEUDO",
+                        [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+  def Shl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Shl16 PSEUDO",
+                        [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+  def Sra8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Sra8 PSEUDO",
+                        [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+  def Sra16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Sra16 PSEUDO",
+                        [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+  def Srl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Srl8 PSEUDO",
+                        [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+  def Srl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Srl16 PSEUDO",
+                        [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+  }
+}
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "nop", []>;
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def RET  : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                     (outs), (ins), "ret",  [(MSP430retflag)]>;
+  def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+
+// FIXME: expand opcode & cond field for branches!
+
+// Direct branch
+let isBarrier = 1 in {
+  // Short branch
+  def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+                   "jmp\t$dst",
+                   [(br bb:$dst)]>;
+  let isIndirectBranch = 1 in {
+    // Long branches
+    def Bi  : I16ri<0, (outs), (ins i16imm:$brdst),
+                    "br\t$brdst",
+                    [(brind tblockaddress:$brdst)]>;
+    def Br  : I16rr<0, (outs), (ins GR16:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind GR16:$brdst)]>;
+    def Bm  : I16rm<0, (outs), (ins memsrc:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind (load addr:$brdst))]>;
+  }
+}
+
+// Conditional branches
+let Uses = [SRW] in
+  def JCC : CJForm<0, 0,
+                   (outs), (ins jmptarget:$dst, cc:$cc),
+                   "j$cc\t$dst",
+                   [(MSP430brcc bb:$dst, imm:$cc)]>;
+} // isBranch, isTerminator
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. SPW is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [R12W, R13W, R14W, R15W, SRW],
+      Uses = [SPW] in {
+    def CALLi     : II16i<0x0,
+                          (outs), (ins i16imm:$dst, variable_ops),
+                          "call\t$dst", [(MSP430call imm:$dst)]>;
+    def CALLr     : II16r<0x0,
+                          (outs), (ins GR16:$dst, variable_ops),
+                          "call\t$dst", [(MSP430call GR16:$dst)]>;
+    def CALLm     : II16m<0x0,
+                          (outs), (ins memsrc:$dst, variable_ops),
+                          "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
+  }
+
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions...
+//
+let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP16r   : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                       (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH16r  : II16r<0x0,
+                     (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// FIXME: Provide proper encoding!
+let neverHasSideEffects = 1 in {
+def MOV8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src),
+                   "mov.b\t{$src, $dst}",
+                   []>;
+def MOV16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "mov.w\t{$src, $dst}",
+                    []>;
+}
+
+// FIXME: Provide proper encoding!
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins i8imm:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins i16imm:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(set GR16:$dst, imm:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins memsrc:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins memsrc:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(set GR16:$dst, (load addr:$src))]>;
+}
+
+def MOVZX16rr8 : I8rr<0x0,
+                      (outs GR16:$dst), (ins GR8:$src),
+                      "mov.b\t{$src, $dst}",
+                      [(set GR16:$dst, (zext GR8:$src))]>;
+def MOVZX16rm8 : I8rm<0x0,
+                      (outs GR16:$dst), (ins memsrc:$src),
+                      "mov.b\t{$src, $dst}",
+                      [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
+def MOV8rm_POST  : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
+                         "mov.b\t{@$base+, $dst}", []>;
+def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
+                           "mov.w\t{@$base+, $dst}", []>;
+}
+
+// Any instruction that defines a 8-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 8-bit operation will zero-extend
+// up to 16 bits.
+def def8 : PatLeaf<(i8 GR8:$src), [{
+  return N->getOpcode() != ISD::TRUNCATE &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+         N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 8-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i16 (zext def8:$src)),
+          (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+def MOV8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(store (i16 imm:$src), addr:$dst)]>;
+
+def MOV8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(store GR16:$src, addr:$dst)]>;
+
+def MOV8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(store (i8 (load addr:$src)), addr:$dst)]>;
+def MOV16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(store (i16 (load addr:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Constraints = "$src = $dst" in {
+
+let Defs = [SRW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+
+def ADD8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "add.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def ADD16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "add.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+}
+
+def ADD8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "add.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def ADD16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "add.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "add.b\t{@$base+, $dst}", []>;
+def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb),
+                           (ins GR16:$src, GR16:$base),
+                          "add.w\t{@$base+, $dst}", []>;
+}
+
+
+def ADD8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "add.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def ADD16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "add.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (add GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADD8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "add.b\t{$src, $dst}",
+                   [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def ADD16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "add.w\t{$src, $dst}",
+                    [(store (add (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADD8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "add.b\t{$src, $dst}",
+                   [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def ADD16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "add.w\t{$src, $dst}",
+                    [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADD8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "add.b\t{$src, $dst}",
+                   [(store (add (load addr:$dst), 
+                                (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def ADD16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "add.w\t{$src, $dst}",
+                    [(store (add (load addr:$dst), 
+                                  (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+
+let isCommutable = 1 in { // X = ADDC Y, Z  == X = ADDC Z, Y
+def ADC8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "addc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def ADC16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "addc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+} // isCommutable
+
+def ADC8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "addc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def ADC16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "addc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def ADC8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "addc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def ADC16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "addc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADC8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "addc.b\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def ADC16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "addc.w\t{$src, $dst}",
+                    [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADC8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "addc.b\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def ADC16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "addc.w\t{$src, $dst}",
+                    [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADC8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "addc.b\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), 
+                                 (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def ADC16mm : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "addc.w\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), 
+                                 (i16 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+let isCommutable = 1 in { // X = AND Y, Z  == X = AND Z, Y
+def AND8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "and.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def AND16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "and.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+}
+
+def AND8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "and.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def AND16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "and.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def AND8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "and.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def AND16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "and.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "and.b\t{@$base+, $dst}", []>;
+def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb),
+                           (ins GR16:$src, GR16:$base),
+                           "and.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def AND8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "and.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def AND16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "and.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def AND8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "and.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def AND16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "and.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def AND8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "and.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), 
+                                (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def AND16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "and.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), 
+                                 (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
+def OR8rr  : I8rr<0x0,
+                  (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                  "bis.b\t{$src2, $dst}",
+                  [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
+def OR16rr : I16rr<0x0,
+                   (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                   "bis.w\t{$src2, $dst}",
+                   [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
+}
+
+def OR8ri  : I8ri<0x0,
+                  (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                  "bis.b\t{$src2, $dst}",
+                  [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
+def OR16ri : I16ri<0x0,
+                   (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                   "bis.w\t{$src2, $dst}",
+                   [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
+
+def OR8rm  : I8rm<0x0,
+                  (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                  "bis.b\t{$src2, $dst}",
+                  [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
+def OR16rm : I16rm<0x0,
+                   (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                   "bis.w\t{$src2, $dst}",
+                   [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                        (outs GR8:$dst, GR16:$base_wb),
+                        (ins GR8:$src, GR16:$base),
+                        "bis.b\t{@$base+, $dst}", []>;
+def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                          (outs GR16:$dst, GR16:$base_wb),
+                          (ins GR16:$src, GR16:$base),
+                          "bis.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def OR8mr  : I8mr<0x0,
+                  (outs), (ins memdst:$dst, GR8:$src),
+                  "bis.b\t{$src, $dst}",
+                  [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+def OR16mr : I16mr<0x0,
+                   (outs), (ins memdst:$dst, GR16:$src),
+                   "bis.w\t{$src, $dst}",
+                   [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
+
+def OR8mi  : I8mi<0x0, 
+                  (outs), (ins memdst:$dst, i8imm:$src),
+                  "bis.b\t{$src, $dst}",
+                  [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def OR16mi : I16mi<0x0,
+                   (outs), (ins memdst:$dst, i16imm:$src),
+                   "bis.w\t{$src, $dst}",
+                   [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
+
+def OR8mm  : I8mm<0x0,
+                  (outs), (ins memdst:$dst, memsrc:$src),
+                  "bis.b\t{$src, $dst}",
+                  [(store (or (i8 (load addr:$dst)),
+                              (i8 (load addr:$src))), addr:$dst)]>;
+def OR16mm : I16mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "bis.w\t{$src, $dst}",
+                   [(store (or (i16 (load addr:$dst)),
+                               (i16 (load addr:$src))), addr:$dst)]>;
+}
+
+// bic does not modify condition codes
+def BIC8rr :  I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "bic.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
+def BIC16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "bic.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
+
+def BIC8rm :  I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "bic.b\t{$src2, $dst}",
+                    [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
+def BIC16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "bic.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
+
+let Constraints = "" in {
+def BIC8mr :  I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "bic.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
+def BIC16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "bic.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
+
+def BIC8mm :  I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "bic.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst),
+                                (not (i8 (load addr:$src)))), addr:$dst)]>;
+def BIC16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "bic.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst),
+                                 (not (i16 (load addr:$src)))), addr:$dst)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
+def XOR8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "xor.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def XOR16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "xor.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+}
+
+def XOR8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "xor.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def XOR16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "xor.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def XOR8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "xor.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def XOR16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "xor.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "xor.b\t{@$base+, $dst}", []>;
+def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb),
+                           (ins GR16:$src, GR16:$base),
+                           "xor.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def XOR8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "xor.b\t{$src, $dst}",
+                   [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def XOR16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "xor.w\t{$src, $dst}",
+                    [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def XOR8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "xor.b\t{$src, $dst}",
+                   [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def XOR16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "xor.w\t{$src, $dst}",
+                    [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def XOR8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "xor.b\t{$src, $dst}",
+                   [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def XOR16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "xor.w\t{$src, $dst}",
+                    [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+
+def SUB8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "sub.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def SUB16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "sub.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+
+def SUB8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "sub.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def SUB16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "sub.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def SUB8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "sub.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def SUB16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "sub.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "sub.b\t{@$base+, $dst}", []>;
+def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                          (outs GR16:$dst, GR16:$base_wb),
+                          (ins GR16:$src, GR16:$base),
+                          "sub.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def SUB8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "sub.b\t{$src, $dst}",
+                   [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def SUB16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "sub.w\t{$src, $dst}",
+                    [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def SUB8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "sub.b\t{$src, $dst}",
+                   [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def SUB16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "sub.w\t{$src, $dst}",
+                    [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def SUB8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "sub.b\t{$src, $dst}",
+                   [(store (sub (load addr:$dst), 
+                                (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def SUB16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "sub.w\t{$src, $dst}",
+                    [(store (sub (load addr:$dst), 
+                                 (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+def SBC8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "subc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def SBC16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "subc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+
+def SBC8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "subc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def SBC16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "subc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def SBC8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "subc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def SBC16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "subc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let Constraints = "" in {
+def SBC8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "subc.b\t{$src, $dst}",
+                  [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
+                   (implicit SRW)]>;
+def SBC16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "subc.w\t{$src, $dst}",
+                    [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def SBC8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "subc.b\t{$src, $dst}",
+                   [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def SBC16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "subc.w\t{$src, $dst}",
+                    [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def SBC8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "subc.b\t{$src, $dst}",
+                   [(store (sube (load addr:$dst),
+                                 (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def SBC16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "subc.w\t{$src, $dst}",
+                    [(store (sube (load addr:$dst),
+                            (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+// FIXME: memory variant!
+def SAR8r1  : II8r<0x0,
+                   (outs GR8:$dst), (ins GR8:$src),
+                   "rra.b\t$dst",
+                   [(set GR8:$dst, (MSP430rra GR8:$src)),
+                    (implicit SRW)]>;
+def SAR16r1 : II16r<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "rra.w\t$dst",
+                    [(set GR16:$dst, (MSP430rra GR16:$src)),
+                     (implicit SRW)]>;
+
+def SHL8r1  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src),
+                   "rla.b\t$dst",
+                   [(set GR8:$dst, (MSP430rla GR8:$src)),
+                    (implicit SRW)]>;
+def SHL16r1 : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "rla.w\t$dst",
+                    [(set GR16:$dst, (MSP430rla GR16:$src)),
+                     (implicit SRW)]>;
+
+def SAR8r1c  : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+                      "clrc\n\t"
+                      "rrc.b\t$dst",
+                      [(set GR8:$dst, (MSP430rrc GR8:$src)),
+                       (implicit SRW)]>;
+def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+                      "clrc\n\t"
+                      "rrc.w\t$dst",
+                      [(set GR16:$dst, (MSP430rrc GR16:$src)),
+                       (implicit SRW)]>;
+
+// FIXME: Memory sext's ?
+def SEXT16r : II16r<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "sxt\t$dst",
+                    [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+                     (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+def ZEXT16r : I8rr<0x0,
+                   (outs GR16:$dst), (ins GR16:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
+
+// FIXME: Memory bitswaps?
+def SWPB16r : II16r<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "swpb\t$dst",
+                    [(set GR16:$dst, (bswap GR16:$src))]>;
+
+} // Constraints = "$src = $dst"
+
+// Integer comparisons
+let Defs = [SRW] in {
+def CMP8rr  : I8rr<0x0,
+                   (outs), (ins GR8:$src, GR8:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
+def CMP16rr : I16rr<0x0,
+                    (outs), (ins GR16:$src, GR16:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
+
+def CMP8ri  : I8ri<0x0,
+                   (outs), (ins GR8:$src, i8imm:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
+def CMP16ri : I16ri<0x0,
+                    (outs), (ins GR16:$src, i16imm:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
+
+def CMP8mi  : I8mi<0x0,
+                   (outs), (ins memsrc:$src, i8imm:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp (load addr:$src),
+                               (i8 imm:$src2)), (implicit SRW)]>;
+def CMP16mi : I16mi<0x0,
+                    (outs), (ins memsrc:$src, i16imm:$src2),
+                    "cmp.w\t{$src2, $src}",
+                     [(MSP430cmp (load addr:$src),
+                                 (i16 imm:$src2)), (implicit SRW)]>;
+
+def CMP8rm  : I8rm<0x0,
+                   (outs), (ins GR8:$src, memsrc:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp GR8:$src, (load addr:$src2)), 
+                    (implicit SRW)]>;
+def CMP16rm : I16rm<0x0,
+                    (outs), (ins GR16:$src, memsrc:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp GR16:$src, (load addr:$src2)),
+                     (implicit SRW)]>;
+
+def CMP8mr  : I8mr<0x0,
+                   (outs), (ins memsrc:$src, GR8:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp (load addr:$src), GR8:$src2),
+                    (implicit SRW)]>;
+def CMP16mr : I16mr<0x0,
+                    (outs), (ins memsrc:$src, GR16:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp (load addr:$src), GR16:$src2), 
+                     (implicit SRW)]>;
+
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr  : I8rr<0x0,
+                   (outs), (ins GR8:$src, GR8:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
+                    (implicit SRW)]>;
+def BIT16rr : I16rr<0x0,
+                    (outs), (ins GR16:$src, GR16:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
+                     (implicit SRW)]>;
+}
+def BIT8ri  : I8ri<0x0,
+                   (outs), (ins GR8:$src, i8imm:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
+                    (implicit SRW)]>;
+def BIT16ri : I16ri<0x0,
+                    (outs), (ins GR16:$src, i16imm:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+                     (implicit SRW)]>;
+
+def BIT8rm  : I8rm<0x0,
+                   (outs), (ins GR8:$src, memdst:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su GR8:$src,  (load addr:$src2)), 0),
+                    (implicit SRW)]>;
+def BIT16rm : I16rm<0x0,
+                    (outs), (ins GR16:$src, memdst:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su GR16:$src,  (load addr:$src2)), 0),
+                     (implicit SRW)]>;
+
+def BIT8mr  : I8mr<0x0,
+                  (outs), (ins memsrc:$src, GR8:$src2),
+                  "bit.b\t{$src2, $src}",
+                  [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
+                   (implicit SRW)]>;
+def BIT16mr : I16mr<0x0,
+                    (outs), (ins memsrc:$src, GR16:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
+                     (implicit SRW)]>;
+
+def BIT8mi  : I8mi<0x0,
+                   (outs), (ins memsrc:$src, i8imm:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+                    (implicit SRW)]>;
+def BIT16mi : I16mi<0x0,
+                    (outs), (ins memsrc:$src, i16imm:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+                     (implicit SRW)]>;
+
+def BIT8mm  : I8mm<0x0,
+                   (outs), (ins memsrc:$src, memsrc:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su (i8 (load addr:$src)),
+                                       (load addr:$src2)),
+                                 0),
+                      (implicit SRW)]>;
+def BIT16mm : I16mm<0x0,
+                    (outs), (ins memsrc:$src, memsrc:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su (i16 (load addr:$src)),
+                                        (load addr:$src2)),
+                                 0),
+                     (implicit SRW)]>;
+} // Defs = [SRW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+
+// extload
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+
+// anyext
+def : Pat<(i16 (anyext GR8:$src)),
+          (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+// truncs
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG GR16:$src, subreg_8bit)>;
+
+// GlobalAddress, ExternalSymbol
+def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
+def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
+
+def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)),
+          (ADD16ri GR16:$src, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)),
+          (ADD16ri GR16:$src, texternalsym:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)),
+          (ADD16ri GR16:$src, tblockaddress:$src2)>;
+
+def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV16mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
+          (MOV16mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV16mi addr:$dst, tblockaddress:$src)>;
+
+// calls
+def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
+          (CALLi tglobaladdr:$dst)>;
+def : Pat<(MSP430call (i16 texternalsym:$dst)),
+          (CALLi texternalsym:$dst)>;
+
+// add and sub always produce carry
+def : Pat<(addc GR16:$src, GR16:$src2),
+          (ADD16rr GR16:$src, GR16:$src2)>;
+def : Pat<(addc GR16:$src, (load addr:$src2)),
+          (ADD16rm GR16:$src, addr:$src2)>;
+def : Pat<(addc GR16:$src, imm:$src2),
+          (ADD16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
+          (ADD16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+          (ADD16mm addr:$dst, addr:$src)>;
+
+def : Pat<(addc GR8:$src, GR8:$src2),
+          (ADD8rr GR8:$src, GR8:$src2)>;
+def : Pat<(addc GR8:$src, (load addr:$src2)),
+          (ADD8rm GR8:$src, addr:$src2)>;
+def : Pat<(addc GR8:$src, imm:$src2),
+          (ADD8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
+          (ADD8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+          (ADD8mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR16:$src, GR16:$src2),
+          (SUB16rr GR16:$src, GR16:$src2)>;
+def : Pat<(subc GR16:$src, (load addr:$src2)),
+          (SUB16rm GR16:$src, addr:$src2)>;
+def : Pat<(subc GR16:$src, imm:$src2),
+          (SUB16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
+          (SUB16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+          (SUB16mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR8:$src, GR8:$src2),
+          (SUB8rr GR8:$src, GR8:$src2)>;
+def : Pat<(subc GR8:$src, (load addr:$src2)),
+          (SUB8rm GR8:$src, addr:$src2)>;
+def : Pat<(subc GR8:$src, imm:$src2),
+          (SUB8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
+          (SUB8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+          (SUB8mm addr:$dst, addr:$src)>;
+
+// peephole patterns
+def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0),
+          (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit),
+                  (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
new file mode 100644
index 000000000000..d1d9a1158635
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -0,0 +1,150 @@
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MSP430 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MSP430MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MSP430MCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      assert(0 && "unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                         MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
new file mode 100644
index 000000000000..e937696406fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_MCINSTLOWER_H
+#define MSP430_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class AsmPrinter;
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+
+  /// MSP430MCInstLower - This class is used to lower an MachineInstr
+  /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
+  MCContext &Ctx;
+  Mangler &Mang;
+
+  AsmPrinter &Printer;
+public:
+  MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+    : Ctx(ctx), Mang(mang), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
new file mode 100644
index 000000000000..383fd2e9821c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -0,0 +1,46 @@
+//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares MSP430-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MACHINEFUNCTIONINFO_H
+#define MSP430MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private MSP430 target-specific information for each MachineFunction.
+class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// ReturnAddrIndex - FrameIndex for return slot.
+  int ReturnAddrIndex;
+
+public:
+  MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+  explicit MSP430MachineFunctionInfo(MachineFunction &MF)
+    : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  int getRAIndex() const { return ReturnAddrIndex; }
+  void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
new file mode 100644
index 000000000000..1cc60bba3a55
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -0,0 +1,254 @@
+//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-reg-info"
+
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+// FIXME: Provide proper call frame setup / destroy opcodes.
+MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
+                                       const TargetInstrInfo &tii)
+  : MSP430GenRegisterInfo(), TM(tm), TII(tii) {
+  StackAlign = TM.getFrameLowering()->getStackAlignment();
+}
+
+const unsigned*
+MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+  const Function* F = MF->getFunction();
+  static const unsigned CalleeSavedRegs[] = {
+    MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+    MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+    0
+  };
+  static const unsigned CalleeSavedRegsFP[] = {
+    MSP430::R5W, MSP430::R6W, MSP430::R7W,
+    MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+    0
+  };
+  static const unsigned CalleeSavedRegsIntr[] = {
+    MSP430::FPW,  MSP430::R5W,  MSP430::R6W,  MSP430::R7W,
+    MSP430::R8W,  MSP430::R9W,  MSP430::R10W, MSP430::R11W,
+    MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+    0
+  };
+  static const unsigned CalleeSavedRegsIntrFP[] = {
+    MSP430::R5W,  MSP430::R6W,  MSP430::R7W,
+    MSP430::R8W,  MSP430::R9W,  MSP430::R10W, MSP430::R11W,
+    MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+    0
+  };
+
+  if (TFI->hasFP(*MF))
+    return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+            CalleeSavedRegsIntrFP : CalleeSavedRegsFP);
+  else
+    return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+            CalleeSavedRegsIntr : CalleeSavedRegs);
+
+}
+
+BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // Mark 4 special registers with subregisters as reserved.
+  Reserved.set(MSP430::PCB);
+  Reserved.set(MSP430::SPB);
+  Reserved.set(MSP430::SRB);
+  Reserved.set(MSP430::CGB);
+  Reserved.set(MSP430::PCW);
+  Reserved.set(MSP430::SPW);
+  Reserved.set(MSP430::SRW);
+  Reserved.set(MSP430::CGW);
+
+  // Mark frame pointer as reserved if needed.
+  if (TFI->hasFP(MF))
+    Reserved.set(MSP430::FPW);
+
+  return Reserved;
+}
+
+const TargetRegisterClass *
+MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return &MSP430::GR16RegClass;
+}
+
+void MSP430RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If the stack pointer can be changed after prologue, turn the
+    // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+    // adjcallstackdown instruction into 'add SPW, <amt>'
+    // TODO: consider using push / pop instead of sub + store / add
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+      MachineInstr *New = 0;
+      if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
+        New = BuildMI(MF, Old->getDebugLoc(),
+                      TII.get(MSP430::SUB16ri), MSP430::SPW)
+          .addReg(MSP430::SPW).addImm(Amount);
+      } else {
+        assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
+        // factor out the amount the callee already popped.
+        uint64_t CalleeAmt = Old->getOperand(1).getImm();
+        Amount -= CalleeAmt;
+        if (Amount)
+          New = BuildMI(MF, Old->getDebugLoc(),
+                        TII.get(MSP430::ADD16ri), MSP430::SPW)
+            .addReg(MSP430::SPW).addImm(Amount);
+      }
+
+      if (New) {
+        // The SRW implicit def is dead.
+        New->getOperand(3).setIsDead();
+
+        // Replace the pseudo instruction with a new instruction...
+        MBB.insert(I, New);
+      }
+    }
+  } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+    // If we are performing frame pointer elimination and if the callee pops
+    // something off the stack pointer, add it back.
+    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+      MachineInstr *Old = I;
+      MachineInstr *New =
+        BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+                MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+      // The SRW implicit def is dead.
+      New->getOperand(3).setIsDead();
+
+      MBB.insert(I, New);
+    }
+  }
+
+  MBB.erase(I);
+}
+
+void
+MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                        int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc dl = MI.getDebugLoc();
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+  // Skip the saved PC
+  Offset += 2;
+
+  if (!TFI->hasFP(MF))
+    Offset += MF.getFrameInfo()->getStackSize();
+  else
+    Offset += 2; // Skip the saved FPW
+
+  // Fold imm into offset
+  Offset += MI.getOperand(i+1).getImm();
+
+  if (MI.getOpcode() == MSP430::ADD16ri) {
+    // This is actually "load effective address" of the stack slot
+    // instruction. We have only two-address instructions, thus we need to
+    // expand it into mov + add
+
+    MI.setDesc(TII.get(MSP430::MOV16rr));
+    MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+    if (Offset == 0)
+      return;
+
+    // We need to materialize the offset via add instruction.
+    unsigned DstReg = MI.getOperand(0).getReg();
+    if (Offset < 0)
+      BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+        .addReg(DstReg).addImm(-Offset);
+    else
+      BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+        .addReg(DstReg).addImm(Offset);
+
+    return;
+  }
+
+  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+void
+MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+                                                                         const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // Create a frame entry for the FPW register that must be saved.
+  if (TFI->hasFP(MF)) {
+    int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+    (void)FrameIdx;
+    assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+           "Slot for FPW register must be last in order to be found!");
+  }
+}
+
+unsigned MSP430RegisterInfo::getRARegister() const {
+  return MSP430::PCW;
+}
+
+unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+}
+
+int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("Not implemented yet!");
+  return 0;
+}
+
+int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("Not implemented yet!");
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
new file mode 100644
index 000000000000..fb70594ab37c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -0,0 +1,71 @@
+//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
+#define LLVM_TARGET_MSP430REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MSP430GenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class MSP430TargetMachine;
+
+struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+private:
+  MSP430TargetMachine &TM;
+  const TargetInstrInfo &TII;
+
+  /// StackAlign - Default stack alignment.
+  ///
+  unsigned StackAlign;
+public:
+  MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
+
+  const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const {
+    // No sub-classes makes this really easy.
+    return A;
+  }
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  //! Get DWARF debugging register number
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430REGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
new file mode 100644
index 000000000000..d1c2e3f7915c
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -0,0 +1,85 @@
+//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the MSP430 register file
+//===----------------------------------------------------------------------===//
+
+class MSP430Reg<bits<4> num, string n> : Register<n> {
+  field bits<4> Num = num;
+  let Namespace = "MSP430";
+}
+
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs> 
+  : RegisterWithSubRegs<n, subregs> {
+  field bits<4> Num = num;
+  let Namespace = "MSP430";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+def PCB  : MSP430Reg<0,  "r0">;
+def SPB  : MSP430Reg<1,  "r1">;
+def SRB  : MSP430Reg<2,  "r2">;
+def CGB  : MSP430Reg<3,  "r3">;
+def FPB  : MSP430Reg<4,  "r4">;
+def R5B  : MSP430Reg<5,  "r5">;
+def R6B  : MSP430Reg<6,  "r6">;
+def R7B  : MSP430Reg<7,  "r7">;
+def R8B  : MSP430Reg<8,  "r8">;
+def R9B  : MSP430Reg<9,  "r9">;
+def R10B : MSP430Reg<10, "r10">;
+def R11B : MSP430Reg<11, "r11">;
+def R12B : MSP430Reg<12, "r12">;
+def R13B : MSP430Reg<13, "r13">;
+def R14B : MSP430Reg<14, "r14">;
+def R15B : MSP430Reg<15, "r15">;
+
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
+def PCW  : MSP430RegWithSubregs<0,  "r0",  [PCB]>;
+def SPW  : MSP430RegWithSubregs<1,  "r1",  [SPB]>;
+def SRW  : MSP430RegWithSubregs<2,  "r2",  [SRB]>;
+def CGW  : MSP430RegWithSubregs<3,  "r3",  [CGB]>;
+def FPW  : MSP430RegWithSubregs<4,  "r4",  [FPB]>;
+def R5W  : MSP430RegWithSubregs<5,  "r5",  [R5B]>;
+def R6W  : MSP430RegWithSubregs<6,  "r6",  [R6B]>;
+def R7W  : MSP430RegWithSubregs<7,  "r7",  [R7B]>;
+def R8W  : MSP430RegWithSubregs<8,  "r8",  [R8B]>;
+def R9W  : MSP430RegWithSubregs<9,  "r9",  [R9B]>;
+def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+}
+
+def GR8 : RegisterClass<"MSP430", [i8], 8,
+   // Volatile registers
+  (add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+   // Frame pointer, sometimes allocable
+   FPB,
+   // Volatile, but not allocable
+   PCB, SPB, SRB, CGB)>;
+
+def GR16 : RegisterClass<"MSP430", [i16], 16,
+   // Volatile registers
+  (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+   // Frame pointer, sometimes allocable
+   FPW,
+   // Volatile, but not allocable
+   PCW, SPW, SRW, CGW)>
+{
+  let SubRegClasses = [(GR8 subreg_8bit)];
+}
+
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..24f45fa3881b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-selectiondag-info"
+#include "MSP430TargetMachine.h"
+using namespace llvm;
+
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
new file mode 100644
index 000000000000..fa8194830ff8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430SELECTIONDAGINFO_H
+#define MSP430SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
+  ~MSP430SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
new file mode 100644
index 000000000000..b58c50afb982
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -0,0 +1,32 @@
+//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430Subtarget.h"
+#include "MSP430.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MSP430GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MSP430Subtarget::MSP430Subtarget(const std::string &TT,
+                                 const std::string &CPU,
+                                 const std::string &FS) :
+  MSP430GenSubtargetInfo(TT, CPU, FS) {
+  std::string CPUName = "generic";
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
new file mode 100644
index 000000000000..1ce5f11fe1bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -0,0 +1,42 @@
+//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
+#define LLVM_TARGET_MSP430_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MSP430GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+
+class MSP430Subtarget : public MSP430GenSubtargetInfo {
+  bool ExtendedInsts;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  MSP430Subtarget(const std::string &TT, const std::string &CPU,
+                  const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
new file mode 100644
index 000000000000..971f512141e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -0,0 +1,51 @@
+//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMSP430Target() {
+  // Register the target.
+  RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
+}
+
+MSP430TargetMachine::MSP430TargetMachine(const Target &T,
+                                         const std::string &TT,
+                                         const std::string &CPU,
+                                         const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
+    // FIXME: Check TargetData string.
+    DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+    FrameLowering(Subtarget) { }
+
+
+bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createMSP430ISelDag(*this, OptLevel));
+  return false;
+}
+
+bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  // Must run branch selection immediately preceding the asm printer.
+  PM.add(createMSP430BranchSelectionPass());
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
new file mode 100644
index 000000000000..2a9eea0bcd82
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -0,0 +1,69 @@
+//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
+#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430FrameLowering.h"
+#include "MSP430SelectionDAGInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// MSP430TargetMachine
+///
+class MSP430TargetMachine : public LLVMTargetMachine {
+  MSP430Subtarget        Subtarget;
+  const TargetData       DataLayout;       // Calculates type size & alignment
+  MSP430InstrInfo        InstrInfo;
+  MSP430TargetLowering   TLInfo;
+  MSP430SelectionDAGInfo TSInfo;
+  MSP430FrameLowering    FrameLowering;
+
+public:
+  MSP430TargetMachine(const Target &T, const std::string &TT,
+                      const std::string &CPU, const std::string &FS);
+
+  virtual const TargetFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const MSP430InstrInfo *getInstrInfo() const  { return &InstrInfo; }
+  virtual const TargetData *getTargetData() const     { return &DataLayout;}
+  virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  virtual const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const MSP430TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // MSP430TargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
new file mode 100644
index 000000000000..f9ca5c49c979
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMSP430Target;
+
+extern "C" void LLVMInitializeMSP430TargetInfo() { 
+  RegisterTarget<Triple::msp430> 
+    X(TheMSP430Target, "msp430", "MSP430 [experimental]");
+}
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
new file mode 100644
index 000000000000..46c687b64001
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -0,0 +1,235 @@
+//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unified name mangler for assembly backends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/Mangler.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+static bool isAcceptableChar(char C, bool AllowPeriod) {
+  if ((C < 'a' || C > 'z') &&
+      (C < 'A' || C > 'Z') &&
+      (C < '0' || C > '9') &&
+      C != '_' && C != '$' && C != '@' &&
+      !(AllowPeriod && C == '.'))
+    return false;
+  return true;
+}
+
+static char HexDigit(int V) {
+  return V < 10 ? V+'0' : V+'A'-10;
+}
+
+static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
+  OutName.push_back('_');
+  OutName.push_back(HexDigit(C >> 4));
+  OutName.push_back(HexDigit(C & 15));
+  OutName.push_back('_');
+}
+
+/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes
+/// for this assembler.
+static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
+  assert(!Str.empty() && "Cannot create an empty MCSymbol");
+  
+  // If the first character is a number and the target does not allow this, we
+  // need quotes.
+  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
+    return true;
+  
+  // If any of the characters in the string is an unacceptable character, force
+  // quotes.
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+    if (!isAcceptableChar(Str[i], AllowPeriod))
+      return true;
+  return false;
+}
+
+/// appendMangledName - Add the specified string in mangled form if it uses
+/// any unusual characters.
+static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
+                              const MCAsmInfo &MAI) {
+  // The first character is not allowed to be a number unless the target
+  // explicitly allows it.
+  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
+    MangleLetter(OutName, Str[0]);
+    Str = Str.substr(1);
+  }
+
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (!isAcceptableChar(Str[i], AllowPeriod))
+      MangleLetter(OutName, Str[i]);
+    else
+      OutName.push_back(Str[i]);
+  }
+}
+
+
+/// appendMangledQuotedName - On systems that support quoted symbols, we still
+/// have to escape some (obscure) characters like " and \n which would break the
+/// assembler's lexing.
+static void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
+                                   StringRef Str) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] == '"' || Str[i] == '\n')
+      MangleLetter(OutName, Str[i]);
+    else
+      OutName.push_back(Str[i]);
+  }
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified name as the global variable name.  GVName must not be
+/// empty.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                const Twine &GVName, ManglerPrefixTy PrefixTy) {
+  SmallString<256> TmpData;
+  StringRef Name = GVName.toStringRef(TmpData);
+  assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
+  
+  const MCAsmInfo &MAI = Context.getAsmInfo();
+  
+  // If the global name is not led with \1, add the appropriate prefixes.
+  if (Name[0] == '\1') {
+    Name = Name.substr(1);
+  } else {
+    if (PrefixTy == Mangler::Private) {
+      const char *Prefix = MAI.getPrivateGlobalPrefix();
+      OutName.append(Prefix, Prefix+strlen(Prefix));
+    } else if (PrefixTy == Mangler::LinkerPrivate) {
+      const char *Prefix = MAI.getLinkerPrivateGlobalPrefix();
+      OutName.append(Prefix, Prefix+strlen(Prefix));
+    }
+
+    const char *Prefix = MAI.getGlobalPrefix();
+    if (Prefix[0] == 0)
+      ; // Common noop, no prefix.
+    else if (Prefix[1] == 0)
+      OutName.push_back(Prefix[0]);  // Common, one character prefix.
+    else
+      OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary length prefix.
+  }
+  
+  // If this is a simple string that doesn't need escaping, just append it.
+  if (!NameNeedsEscaping(Name, MAI) ||
+      // If quotes are supported, they can be used unless the string contains
+      // a quote or newline.
+      (MAI.doesAllowQuotesInName() &&
+       Name.find_first_of("\n\"") == StringRef::npos)) {
+    OutName.append(Name.begin(), Name.end());
+    return;
+  }
+  
+  // On systems that do not allow quoted names, we need to mangle most
+  // strange characters.
+  if (!MAI.doesAllowQuotesInName())
+    return appendMangledName(OutName, Name, MAI);
+  
+  // Okay, the system allows quoted strings.  We can quote most anything, the
+  // only characters that need escaping are " and \n.
+  assert(Name.find_first_of("\n\"") != StringRef::npos);
+  return appendMangledQuotedName(OutName, Name);
+}
+
+/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
+/// a suffix on their name indicating the number of words of arguments they
+/// take.
+static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
+                                     const Function *F, const TargetData &TD) {
+  // Calculate arguments size total.
+  unsigned ArgWords = 0;
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI) {
+    const Type *Ty = AI->getType();
+    // 'Dereference' type in case of byval parameter attribute
+    if (AI->hasByValAttr())
+      Ty = cast<PointerType>(Ty)->getElementType();
+    // Size should be aligned to DWORD boundary
+    ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+  }
+  
+  raw_svector_ostream(OutName) << '@' << ArgWords;
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified global variable's name.  If the global variable doesn't
+/// have a name, this fills in a unique name for the global.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                const GlobalValue *GV,
+                                bool isImplicitlyPrivate) {
+  ManglerPrefixTy PrefixTy = Mangler::Default;
+  if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
+    PrefixTy = Mangler::Private;
+  else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+           GV->hasLinkerPrivateWeakDefAutoLinkage())
+    PrefixTy = Mangler::LinkerPrivate;
+  
+  // If this global has a name, handle it simply.
+  if (GV->hasName()) {
+    getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+  } else {
+    // Get the ID for the global, assigning a new one if we haven't got one
+    // already.
+    unsigned &ID = AnonGlobalIDs[GV];
+    if (ID == 0) ID = NextAnonGlobalID++;
+  
+    // Must mangle the global into a unique ID.
+    getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
+  }
+  
+  // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
+  // add it.
+  if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) {
+    if (const Function *F = dyn_cast<Function>(GV)) {
+      CallingConv::ID CC = F->getCallingConv();
+    
+      // fastcall functions need to start with @.
+      // FIXME: This logic seems unlikely to be right.
+      if (CC == CallingConv::X86_FastCall) {
+        if (OutName[0] == '_')
+          OutName[0] = '@';
+        else
+          OutName.insert(OutName.begin(), '@');
+      }
+    
+      // fastcall and stdcall functions usually need @42 at the end to specify
+      // the argument info.
+      const FunctionType *FT = F->getFunctionType();
+      if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+          // "Pure" variadic functions do not receive @0 suffix.
+          (!FT->isVarArg() || FT->getNumParams() == 0 ||
+           (FT->getNumParams() == 1 && F->hasStructRetAttr())))
+        AddFastCallStdCallSuffix(OutName, F, TD);
+    }
+  }
+}
+
+/// getSymbol - Return the MCSymbol for the specified global value.  This
+/// symbol is the main label that is the address of the global.
+MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
+  SmallString<60> NameStr;
+  getNameWithPrefix(NameStr, GV, false);
+  return Context.GetOrCreateSymbol(NameStr.str());
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..8852fd4126e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsAsmPrinter
+  MipsInstPrinter.cpp
+  )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile b/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile
new file mode 100644
index 000000000000..63e38ef3e6aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
new file mode 100644
index 000000000000..41c1dd3919b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -0,0 +1,127 @@
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MipsInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "MipsGenAsmWriter.inc"
+
+const char* Mips::MipsFCCToString(Mips::CondCode CC) {
+  switch (CC) {
+  case FCOND_F:
+  case FCOND_T:   return "f";
+  case FCOND_UN:
+  case FCOND_OR:  return "un";
+  case FCOND_OEQ:
+  case FCOND_UNE: return "eq";
+  case FCOND_UEQ:
+  case FCOND_ONE: return "ueq";
+  case FCOND_OLT:
+  case FCOND_UGE: return "olt";
+  case FCOND_ULT:
+  case FCOND_OGE: return "ult";
+  case FCOND_OLE:
+  case FCOND_UGT: return "ole";
+  case FCOND_ULE:
+  case FCOND_OGT: return "ule";
+  case FCOND_SF:
+  case FCOND_ST:  return "sf";
+  case FCOND_NGLE:
+  case FCOND_GLE: return "ngle";
+  case FCOND_SEQ:
+  case FCOND_SNE: return "seq";
+  case FCOND_NGL:
+  case FCOND_GL:  return "ngl";
+  case FCOND_LT:
+  case FCOND_NLT: return "lt";
+  case FCOND_NGE:
+  case FCOND_GE:  return "nge";
+  case FCOND_LE:
+  case FCOND_NLE: return "le";
+  case FCOND_NGT:
+  case FCOND_GT:  return "ngt";
+  }
+  llvm_unreachable("Impossible condition code!");
+}
+
+StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << '$' << LowercaseString(getRegisterName(RegNo));
+}
+
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+  
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+  
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  O << *Op.getExpr();
+}
+
+void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MipsInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  // Load/Store memory operands -- imm($reg)
+  // If PIC target the target is loaded as the
+  // pattern lw $25,%call16($28)
+  printOperand(MI, opNum+1, O);
+  O << "(";
+  printOperand(MI, opNum, O);
+  O << ")";
+}
+
+void MipsInstPrinter::
+printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+  return;
+}
+
+void MipsInstPrinter::
+printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  const MCOperand& MO = MI->getOperand(opNum);
+  O << MipsFCCToString((Mips::CondCode)MO.getImm());
+}
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
new file mode 100644
index 000000000000..680208eb819b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -0,0 +1,100 @@
+//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTPRINTER_H
+#define MIPSINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+// These enumeration declarations were orignally in MipsInstrInfo.h but
+// had to be moved here to avoid circular dependencies between
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter. 
+namespace Mips {
+// Mips Branch Codes
+enum FPBranchCode {
+  BRANCH_F,
+  BRANCH_T,
+  BRANCH_FL,
+  BRANCH_TL,
+  BRANCH_INVALID
+};
+
+// Mips Condition Codes
+enum CondCode {
+  // To be used with float branch True
+  FCOND_F,
+  FCOND_UN,
+  FCOND_OEQ,
+  FCOND_UEQ,
+  FCOND_OLT,
+  FCOND_ULT,
+  FCOND_OLE,
+  FCOND_ULE,
+  FCOND_SF,
+  FCOND_NGLE,
+  FCOND_SEQ,
+  FCOND_NGL,
+  FCOND_LT,
+  FCOND_NGE,
+  FCOND_LE,
+  FCOND_NGT,
+
+  // To be used with float branch False
+  // This conditions have the same mnemonic as the
+  // above ones, but are used with a branch False;
+  FCOND_T,
+  FCOND_OR,
+  FCOND_UNE,
+  FCOND_ONE,
+  FCOND_UGE,
+  FCOND_OGE,
+  FCOND_UGT,
+  FCOND_OGT,
+  FCOND_ST,
+  FCOND_GLE,
+  FCOND_SNE,
+  FCOND_GL,
+  FCOND_NLT,
+  FCOND_GE,
+  FCOND_NLE,
+  FCOND_GT
+};
+
+const char *MipsFCCToString(Mips::CondCode CC);
+} // end namespace Mips
+
+class TargetMachine;
+
+class MipsInstPrinter : public MCInstPrinter {
+public:
+  MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getInstructionName(unsigned Opcode);
+  static const char *getRegisterName(unsigned RegNo);
+  
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  
+private:
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
+  void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..97de75db5347
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMipsDesc
+  MipsMCTargetDesc.cpp
+  MipsMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..7fe2086a6e00
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
new file mode 100644
index 000000000000..5d9242500f6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -0,0 +1,38 @@
+//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::mips)
+    IsLittleEndian = false;
+
+  AlignmentIsInBytes          = false;
+  Data16bitsDirective         = "\t.2byte\t";
+  Data32bitsDirective         = "\t.4byte\t";
+  Data64bitsDirective         = 0;
+  PrivateGlobalPrefix         = "$";
+  CommentString               = "#";
+  ZeroDirective               = "\t.space\t";
+  GPRel32Directive            = "\t.gpword\t";
+  WeakRefDirective            = "\t.weak\t";
+
+  SupportsDebugInformation = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+  HasLEB128 = true;
+  DwarfRegNumForCFI = true;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
new file mode 100644
index 000000000000..41b719207b7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class MipsMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit MipsMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
new file mode 100644
index 000000000000..06f0d0bfb6b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCTargetDesc.h"
+#include "MipsMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MipsGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createMipsMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMipsMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMipsMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMipsMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMipsMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
+                                          createMipsMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMipsMCAsmInfo() {
+  RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget);
+  RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
new file mode 100644
index 000000000000..3d18f114c8bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -0,0 +1,39 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMipsTarget;
+extern Target TheMipselTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Mips registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MipsGenRegisterInfo.inc"
+
+// Defines symbolic names for the Mips instructions.
+#define GET_INSTRINFO_ENUM
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MipsGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
new file mode 100644
index 000000000000..984b5adfc5f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -0,0 +1,34 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class MipsTargetMachine;
+  class FunctionPass;
+  class MachineCodeEmitter;
+  class formatted_raw_ostream;
+
+  FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+  FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+  FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
+  FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
new file mode 100644
index 000000000000..433cd57f34e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -0,0 +1,101 @@
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the Mips target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+
+def MipsInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Mips Subtarget features                                                    //
+//===----------------------------------------------------------------------===//
+
+def FeatureGP64Bit     : SubtargetFeature<"gp64", "IsGP64bit", "true",
+                                "General Purpose Registers are 64-bit wide.">;
+def FeatureFP64Bit     : SubtargetFeature<"fp64", "IsFP64bit", "true",
+                                "Support 64-bit FP registers.">;
+def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
+                                "true", "Only supports single precision float">;
+def FeatureO32         : SubtargetFeature<"o32", "MipsABI", "O32",
+                                "Enable o32 ABI">;
+def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
+                                "Enable eabi ABI">;
+def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU",
+                                "true", "Enable vector FPU instructions.">;
+def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+                                "Enable 'signext in register' instructions.">;
+def FeatureCondMov     : SubtargetFeature<"condmov", "HasCondMov", "true",
+                                "Enable 'conditional move' instructions.">;
+def FeatureMulDivAdd   : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
+                                "Enable 'multiply add/sub' instructions.">;
+def FeatureMinMax      : SubtargetFeature<"minmax", "HasMinMax", "true",
+                                "Enable 'min/max' instructions.">;
+def FeatureSwap        : SubtargetFeature<"swap", "HasSwap", "true",
+                                "Enable 'byte/half swap' instructions.">;
+def FeatureBitCount    : SubtargetFeature<"bitcount", "HasBitCount", "true",
+                                "Enable 'count leading bits' instructions.">;
+def FeatureMips1       : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+                                "Mips1 ISA Support">;
+def FeatureMips2       : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+                                "Mips2 ISA Support">;
+def FeatureMips32      : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
+                                "Mips32 ISA Support",
+                                [FeatureCondMov, FeatureBitCount]>;
+def FeatureMips32r2    : SubtargetFeature<"mips32r2", "MipsArchVersion",
+                                "Mips32r2", "Mips32r2 ISA Support",
+                                [FeatureMips32, FeatureSEInReg]>;
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MipsGenericItineraries, Features>;
+
+def : Proc<"mips1", [FeatureMips1]>;
+def : Proc<"r2000", [FeatureMips1]>;
+def : Proc<"r3000", [FeatureMips1]>;
+
+def : Proc<"mips2", [FeatureMips2]>;
+def : Proc<"r6000", [FeatureMips2]>;
+
+def : Proc<"4ke", [FeatureMips32r2]>;
+
+// Allegrex is a 32bit subset of r4000, both for integer and fp registers,
+// but much more similar to Mips2 than Mips3. It also contains some of
+// Mips32/Mips32r2 instructions and a custom vector fpu processor.
+def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
+      FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
+      FeatureMinMax, FeatureSwap, FeatureBitCount]>;
+
+def MipsAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+def Mips : Target {
+  let InstructionSet = MipsInstrInfo;
+
+  let AssemblyWriters = [MipsAsmWriter];
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
new file mode 100644
index 000000000000..69e03bd29724
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -0,0 +1,440 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+#include "MipsAsmPrinter.h"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MipsMCInstLower.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+using namespace llvm;
+
+void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+
+  if (MI->isDebugValue()) {
+    PrintDebugValueComment(MI, OS);
+    return;
+  }
+
+  MipsMCInstLower MCInstLowering(Mang, *MF, *this);
+  MCInst TmpInst0;
+  MCInstLowering.Lower(MI, TmpInst0);
+  OutStreamer.EmitInstruction(TmpInst0);
+}
+
+//===----------------------------------------------------------------------===//
+//
+//  Mips Asm Directives
+//
+//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+//  Describe the stack frame.
+//
+//  -- Mask directives "(f)mask  bitmask, offset"
+//  Tells the assembler which registers are saved and where.
+//  bitmask - contain a little endian bitset indicating which registers are
+//            saved on function prologue (e.g. with a 0x80000000 mask, the
+//            assembler knows the register 31 (RA) is saved at prologue.
+//  offset  - the position before stack pointer subtraction indicating where
+//            the first saved register on prologue is located. (e.g. with a
+//
+//  Consider the following function prologue:
+//
+//    .frame  $fp,48,$ra
+//    .mask   0xc0000000,-8
+//       addiu $sp, $sp, -48
+//       sw $ra, 40($sp)
+//       sw $fp, 36($sp)
+//
+//    With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+//    30 (FP) are saved at prologue. As the save order on prologue is from
+//    left to right, RA is saved first. A -8 offset means that after the
+//    stack pointer subtration, the first register in the mask (RA) will be
+//    saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
+  // CPU and FPU Saved Registers Bitmasks
+  unsigned CPUBitmask = 0, FPUBitmask = 0;
+  int CPUTopSavedRegOff, FPUTopSavedRegOff;
+
+  // Set the CPU and FPU Bitmasks
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  // size of stack area to which FP callee-saved regs are saved.
+  unsigned CPURegSize = Mips::CPURegsRegisterClass->getSize();
+  unsigned FGR32RegSize = Mips::FGR32RegisterClass->getSize();
+  unsigned AFGR64RegSize = Mips::AFGR64RegisterClass->getSize();
+  bool HasAFGR64Reg = false;
+  unsigned CSFPRegsSize = 0;
+  unsigned i, e = CSI.size();
+
+  // Set FPU Bitmask.
+  for (i = 0; i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (Mips::CPURegsRegisterClass->contains(Reg))
+      break;
+
+    unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+    if (Mips::AFGR64RegisterClass->contains(Reg)) {
+      FPUBitmask |= (3 << RegNum);
+      CSFPRegsSize += AFGR64RegSize;
+      HasAFGR64Reg = true;
+      continue;
+    }
+
+    FPUBitmask |= (1 << RegNum);
+    CSFPRegsSize += FGR32RegSize;
+  }
+
+  // Set CPU Bitmask.
+  for (; i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+    CPUBitmask |= (1 << RegNum);
+  }
+
+  // FP Regs are saved right below where the virtual frame pointer points to.
+  FPUTopSavedRegOff = FPUBitmask ?
+    (HasAFGR64Reg ? -AFGR64RegSize : -FGR32RegSize) : 0;
+
+  // CPU Regs are saved below FP Regs.
+  CPUTopSavedRegOff = CPUBitmask ? -CSFPRegsSize - CPURegSize : 0;
+
+  // Print CPUBitmask
+  O << "\t.mask \t"; printHex32(CPUBitmask, O);
+  O << ',' << CPUTopSavedRegOff << '\n';
+
+  // Print FPUBitmask
+  O << "\t.fmask\t"; printHex32(FPUBitmask, O);
+  O << "," << FPUTopSavedRegOff << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
+  O << "0x";
+  for (int i = 7; i >= 0; i--)
+    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::emitFrameDirective() {
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+  unsigned stackReg  = RI.getFrameRegister(*MF);
+  unsigned returnReg = RI.getRARegister();
+  unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+  OutStreamer.EmitRawText("\t.frame\t$" +
+           Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) +
+           "," + Twine(stackSize) + ",$" +
+           Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg))));
+}
+
+/// Emit Set directives.
+const char *MipsAsmPrinter::getCurrentABIString() const {
+  switch (Subtarget->getTargetABI()) {
+  case MipsSubtarget::O32:  return "abi32";
+  case MipsSubtarget::O64:  return "abiO64";
+  case MipsSubtarget::N32:  return "abiN32";
+  case MipsSubtarget::N64:  return "abi64";
+  case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+  default: break;
+  }
+
+  llvm_unreachable("Unknown Mips ABI");
+  return NULL;
+}
+
+void MipsAsmPrinter::EmitFunctionEntryLabel() {
+  OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyStart() {
+  emitFrameDirective();
+
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  printSavedRegsBitmask(OS);
+  OutStreamer.EmitRawText(OS.str());
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyEnd() {
+  // There are instruction for this macros, but they must
+  // always be at the function end, and we can't emit and
+  // break with BB logic.
+  OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+  OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+  OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+                                                       MBB) const {
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+  // If the predecessor is a switch statement, assume a jump table
+  // implementation, so it is not a fall through.
+  if (const BasicBlock *bb = Pred->getBasicBlock())
+    if (isa<SwitchInst>(bb->getTerminator()))
+      return false;
+
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+ 
+  if (PI2 != MBB->pred_end())
+    return false;  
+
+  // The predecessor has to be immediately before this block.
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+   
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+  
+  // Otherwise, check the last instruction.
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+
+  return !I->getDesc().isBarrier();
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                     unsigned AsmVariant,const char *ExtraCode,
+                                     raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                           unsigned OpNum, unsigned AsmVariant,
+                                           const char *ExtraCode,
+                                           raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+     return true; // Unknown modifier.
+   
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline asm memory operand");
+  O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+  return false;
+}
+
+void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                  raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  bool closeP = false;
+
+  if (MO.getTargetFlags())
+    closeP = true;
+
+  switch(MO.getTargetFlags()) {
+  case MipsII::MO_GPREL:    O << "%gp_rel("; break;
+  case MipsII::MO_GOT_CALL: O << "%call16("; break;
+  case MipsII::MO_GOT:      O << "%got(";    break;
+  case MipsII::MO_ABS_HI:   O << "%hi(";     break;
+  case MipsII::MO_ABS_LO:   O << "%lo(";     break;
+  case MipsII::MO_TLSGD:    O << "%tlsgd(";  break;
+  case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
+  case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
+  case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break;
+  }
+
+  switch (MO.getType()) {
+    case MachineOperand::MO_Register:
+      O << '$'
+        << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg()));
+      break;
+
+    case MachineOperand::MO_Immediate:
+      O << MO.getImm();
+      break;
+
+    case MachineOperand::MO_MachineBasicBlock:
+      O << *MO.getMBB()->getSymbol();
+      return;
+
+    case MachineOperand::MO_GlobalAddress:
+      O << *Mang->getSymbol(MO.getGlobal());
+      break;
+
+    case MachineOperand::MO_BlockAddress: {
+      MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+      O << BA->getName();
+      break;
+    }
+
+    case MachineOperand::MO_ExternalSymbol:
+      O << *GetExternalSymbolSymbol(MO.getSymbolName());
+      break;
+
+    case MachineOperand::MO_JumpTableIndex:
+      O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+        << '_' << MO.getIndex();
+      break;
+
+    case MachineOperand::MO_ConstantPoolIndex:
+      O << MAI->getPrivateGlobalPrefix() << "CPI"
+        << getFunctionNumber() << "_" << MO.getIndex();
+      if (MO.getOffset())
+        O << "+" << MO.getOffset();
+      break;
+
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+
+  if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
+  // Load/Store memory operands -- imm($reg)
+  // If PIC target the target is loaded as the
+  // pattern lw $25,%call16($28)
+  printOperand(MI, opNum+1, O);
+  O << "(";
+  printOperand(MI, opNum, O);
+  O << ")";
+}
+
+void MipsAsmPrinter::
+printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+  return;
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  const MachineOperand& MO = MI->getOperand(opNum);
+  O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  // FIXME: Use SwitchSection.
+
+  // Tell the assembler which ABI we are using
+  OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
+
+  // TODO: handle O64 ABI
+  if (Subtarget->isABI_EABI()) {
+    if (Subtarget->isGP32bit())
+      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+    else
+      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+  }
+
+  // return to previous section
+  OutStreamer.EmitRawText(StringRef("\t.previous"));
+}
+
+MachineLocation
+MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue.
+  assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+         "Unexpected MachineOperand types");
+  return MachineLocation(MI->getOperand(0).getReg(),
+                         MI->getOperand(1).getImm());
+}
+
+void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &OS) {
+  // TODO: implement
+}
+
+// Force static initialization.
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+                                              unsigned SyntaxVariant,
+                                              const MCAsmInfo &MAI) {
+  return new MipsInstPrinter(MAI);
+}
+
+extern "C" void LLVMInitializeMipsAsmPrinter() {
+  RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
+  RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+
+  TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+                                        createMipsMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
new file mode 100644
index 000000000000..16461ff1fbb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -0,0 +1,71 @@
+//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Mips Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSASMPRINTER_H
+#define MIPSASMPRINTER_H
+
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineInstr;
+class raw_ostream;
+class MachineBasicBlock;
+class Module;
+
+class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+  const MipsSubtarget *Subtarget;
+  
+public:
+  explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<MipsSubtarget>();
+  }
+
+  virtual const char *getPassName() const {
+    return "Mips Assembly Printer";
+  }
+
+  void EmitInstruction(const MachineInstr *MI);
+  void printSavedRegsBitmask(raw_ostream &O);
+  void printHex32(unsigned int Value, raw_ostream &O);
+  void emitFrameDirective();
+  const char *getCurrentABIString() const;
+  virtual void EmitFunctionEntryLabel();
+  virtual void EmitFunctionBodyStart();
+  virtual void EmitFunctionBodyEnd();
+  virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+                                                 MBB) const;
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &O);
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                       const char *Modifier = 0);
+  void EmitStartOfAsmFile(Module &M);
+  virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 000000000000..876f0fcc83ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,86 @@
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+  CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips O32 Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Only the return rules are defined here for O32. The rules for argument
+// passing are defined in MipsISelLowering.cpp.
+def RetCC_MipsO32 : CallingConv<[
+  // i32 are returned in registers V0, V1, A0, A1
+  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+
+  // f32 are returned in registers F0, F2
+  CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+  // f64 are returned in register D0, D1
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips EABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsEABI : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+  // Single fp arguments are passed in pairs within 32-bit mode
+  CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+                  CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
+
+  CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+                  CCAssignToReg<[F12, F14, F16, F18]>>>,
+
+  // The first 4 double fp arguments are passed in single fp registers.
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+                  CCAssignToReg<[D6, D7, D8, D9]>>>,
+
+  // Integer values get stored in stack slots that are 4 bytes in
+  // size and 4-byte aligned.
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+  // Integer values get stored in stack slots that are 8 bytes in
+  // size and 8-byte aligned.
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+]>;
+
+def RetCC_MipsEABI : CallingConv<[
+  // i32 are returned in registers V0, V1
+  CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+  // f32 are returned in registers F0, F1
+  CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+  // f64 are returned in register D0
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_Mips : CallingConv<[
+  CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>
+]>;
+
+def RetCC_Mips : CallingConv<[
+  CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+  CCDelegateTo<RetCC_MipsO32>
+]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
new file mode 100644
index 000000000000..c3a6211399cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -0,0 +1,82 @@
+//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+  struct Filler : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Filler(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "Mips Delay Slot Filler";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F) {
+      bool Changed = false;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        Changed |= runOnMachineBasicBlock(*FI);
+      return Changed;
+    }
+
+  };
+  char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+    const MCInstrDesc& MCid = I->getDesc();
+    if (MCid.hasDelaySlot() &&
+        (TM.getSubtarget<MipsSubtarget>().isMips1() ||
+         MCid.isCall() || MCid.isBranch() || MCid.isReturn())) {
+      MachineBasicBlock::iterator J = I;
+      ++J;
+      BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
+      ++FilledSlots;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+  return new Filler(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp b/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp
new file mode 100644
index 000000000000..03d922fe7cd6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsEmitGPRestore.cpp
@@ -0,0 +1,94 @@
+//===-- MipsEmitGPRestore.cpp - Emit GP restore instruction----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass emits instructions that restore $gp right
+// after jalr instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "emit-gp-restore"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+  struct Inserter : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Inserter(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "Mips Emit GP Restore";
+    }
+
+    bool runOnMachineFunction(MachineFunction &F);
+  };
+  char Inserter::ID = 0;
+} // end of anonymous namespace
+
+bool Inserter::runOnMachineFunction(MachineFunction &F) {
+  if (TM.getRelocationModel() != Reloc::PIC_)
+    return false;
+
+  bool Changed = false;
+  int FI =  F.getInfo<MipsFunctionInfo>()->getGPFI();
+
+  for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
+       MFI != MFE; ++MFI) {
+    MachineBasicBlock& MBB = *MFI;
+    MachineBasicBlock::iterator I = MFI->begin();
+
+    // If MBB is a landing pad, insert instruction that restores $gp after
+    // EH_LABEL.
+    if (MBB.isLandingPad()) {
+      // Find EH_LABEL first.
+      for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ;
+      
+      // Insert lw.
+      ++I;
+      DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+      BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+                                                       .addImm(0);
+      Changed = true;
+    }
+
+    while (I != MFI->end()) {
+      if (I->getOpcode() != Mips::JALR) {
+        ++I;
+        continue;
+      }
+
+      DebugLoc dl = I->getDebugLoc();
+      // emit lw $gp, ($gp save slot on stack) after jalr
+      BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+                                                         .addImm(0);
+      Changed = true;
+    }
+  } 
+
+  return Changed;
+}
+
+/// createMipsEmitGPRestorePass - Returns a pass that emits instructions that
+/// restores $gp clobbered by jalr instructions.
+FunctionPass *llvm::createMipsEmitGPRestorePass(MipsTargetMachine &tm) {
+  return new Inserter(tm);
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
new file mode 100644
index 000000000000..a622258a4dcb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -0,0 +1,117 @@
+//===--  MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands pseudo instructions into target instructions after register
+// allocation but before post-RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-expand-pseudo"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+  struct MipsExpandPseudo : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    MipsExpandPseudo(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "Mips PseudoInstrs Expansion";
+    }
+
+    bool runOnMachineFunction(MachineFunction &F);
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+  private:
+    void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator);
+    void ExpandExtractElementF64(MachineBasicBlock&,
+                                 MachineBasicBlock::iterator);
+  };
+  char MipsExpandPseudo::ID = 0;
+} // end of anonymous namespace
+
+bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) {
+  bool Changed = false;
+
+  for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I)
+    Changed |= runOnMachineBasicBlock(*I);
+
+  return Changed;
+}
+
+bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
+
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+    const MCInstrDesc& MCid = I->getDesc();
+
+    switch(MCid.getOpcode()) {
+    default: 
+      ++I;
+      continue;
+    case Mips::BuildPairF64:
+      ExpandBuildPairF64(MBB, I);
+      break;
+    case Mips::ExtractElementF64:
+      ExpandExtractElementF64(MBB, I);
+      break;
+    } 
+
+    // delete original instr
+    MBB.erase(I++);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
+                                            MachineBasicBlock::iterator I) {  
+  unsigned DstReg = I->getOperand(0).getReg();
+  unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+  const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
+  DebugLoc dl = I->getDebugLoc();
+  const unsigned* SubReg =
+    TM.getRegisterInfo()->getSubRegisters(DstReg);
+
+  // mtc1 Lo, $fp
+  // mtc1 Hi, $fp + 1
+  BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg);
+  BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg);
+}
+
+void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
+                                               MachineBasicBlock::iterator I) {
+  unsigned DstReg = I->getOperand(0).getReg();
+  unsigned SrcReg = I->getOperand(1).getReg();
+  unsigned N = I->getOperand(2).getImm();
+  const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
+  DebugLoc dl = I->getDebugLoc();
+  const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
+
+  BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
+}
+
+/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo 
+/// instrs into real instrs
+FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
+  return new MipsExpandPseudo(tm);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
new file mode 100644
index 000000000000..a0f90a0d487a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -0,0 +1,331 @@
+//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+//  The stack frame required by the ABI (after call):
+//  Offset
+//
+//  0                 ----------
+//  4                 Args to pass
+//  .                 saved $GP  (used in PIC)
+//  .                 Alloca allocations
+//  .                 Local Area
+//  .                 CPU "Callee Saved" Registers
+//  .                 saved FP
+//  .                 saved RA
+//  .                 FPU "Callee Saved" Registers
+//  StackSize         -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+//   for any local area var there is smt like : FI >= 0, StackOffset: 4
+//     sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+//   suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+//   The emitted instruction will be something like:
+//     lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFormalArguments, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
+      || MFI->isFrameAddressTaken();
+}
+
+bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
+  return true;
+}
+
+static unsigned AlignOffset(unsigned Offset, unsigned Align) {
+  return (Offset + Align - 1) / Align * Align; 
+} 
+
+// expand pair of register and immediate if the immediate doesn't fit in the
+// 16-bit offset field.
+// e.g.
+//  if OrigImm = 0x10000, OrigReg = $sp:
+//    generate the following sequence of instrs:
+//      lui  $at, hi(0x10000)
+//      addu $at, $sp, $at
+//
+//    (NewReg, NewImm) = ($at, lo(Ox10000))
+//    return true
+static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
+                                  unsigned& NewReg, int& NewImm,
+                                  MachineBasicBlock& MBB,
+                                  MachineBasicBlock::iterator I) {
+  // OrigImm fits in the 16-bit field
+  if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+    NewReg = OrigReg;
+    NewImm = OrigImm;
+    return false;
+  }
+
+  MachineFunction* MF = MBB.getParent();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  DebugLoc DL = I->getDebugLoc();
+  int ImmLo = (short)(OrigImm & 0xffff);
+  int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
+              ((OrigImm & 0x8000) != 0);
+
+  // FIXME: change this when mips goes MC".
+  BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
+  BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+  BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
+                                                     .addReg(Mips::AT);
+  NewReg = Mips::AT;
+  NewImm = ImmLo;
+
+  return true;
+}
+
+void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  const MipsRegisterInfo *RegInfo =
+    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  bool ATUsed;
+
+  // First, compute final stack size.
+  unsigned RegSize = STI.isGP32bit() ? 4 : 8;
+  unsigned StackAlign = getStackAlignment();
+  unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ? 
+    (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) :
+    MipsFI->getMaxCallFrameSize();
+  unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) +
+    AlignOffset(MFI->getStackSize(), StackAlign);
+
+   // Update stack size
+  MFI->setStackSize(StackSize); 
+  
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+  // TODO: check need from GP here.
+  if (isPIC && STI.isABI_O32())
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
+      .addReg(RegInfo->getPICCallReg());
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  MachineLocation DstML, SrcML;
+
+  // Adjust stack : addi sp, sp, (-imm)
+  ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
+                                 MBBI);
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+    .addReg(NewReg).addImm(NewImm);
+
+  // FIXME: change this when mips goes MC".
+  if (ATUsed)
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+
+  // emit ".cfi_def_cfa_offset StackSize"
+  MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+  BuildMI(MBB, MBBI, dl,
+          TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+  DstML = MachineLocation(MachineLocation::VirtualFP);
+  SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+  Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  if (CSI.size()) {
+    // Find the instruction past the last instruction that saves a callee-saved
+    // register to the stack.
+    for (unsigned i = 0; i < CSI.size(); ++i)
+      ++MBBI;
+ 
+    // Iterate over list of callee-saved registers and emit .cfi_offset
+    // directives.
+    MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl,
+            TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+ 
+    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+           E = CSI.end(); I != E; ++I) {
+      int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+      unsigned Reg = I->getReg();
+
+      // If Reg is a double precision register, emit two cfa_offsets,
+      // one for each of the paired single precision registers.
+      if (Mips::AFGR64RegisterClass->contains(Reg)) {
+        const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+        MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+        MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+        MachineLocation SrcML0(*SubRegs);
+        MachineLocation SrcML1(*(SubRegs + 1));
+
+        if (!STI.isLittle())
+          std::swap(SrcML0, SrcML1);
+
+        Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+        Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+      }
+      else {
+        // Reg is either in CPURegs or FGR32.
+        DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+        SrcML = MachineLocation(Reg);
+        Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+      }
+    }
+  }    
+
+  // if framepointer enabled, set it to point to the stack pointer.
+  if (hasFP(MF)) {
+    // Insert instruction "move $fp, $sp" at this location.    
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+      .addReg(Mips::SP).addReg(Mips::ZERO);
+
+    // emit ".cfi_def_cfa_register $fp" 
+    MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl,
+            TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+    DstML = MachineLocation(Mips::FP);
+    SrcML = MachineLocation(MachineLocation::VirtualFP);
+    Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+  }
+
+  // Restore GP from the saved stack location
+  if (MipsFI->needGPSaveRestore())
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
+      .addImm(MFI->getObjectOffset(MipsFI->getGPFI()));
+}
+
+void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
+                                 MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  // Get the number of bytes from FrameInfo
+  unsigned StackSize = MFI->getStackSize();
+
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  bool ATUsed = false;
+
+  // if framepointer enabled, restore the stack pointer.
+  if (hasFP(MF)) {
+    // Find the first instruction that restores a callee-saved register.
+    MachineBasicBlock::iterator I = MBBI;
+    
+    for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+      --I;
+
+    // Insert instruction "move $sp, $fp" at this location.
+    BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP)
+      .addReg(Mips::FP).addReg(Mips::ZERO);
+  }
+
+  // adjust stack  : insert addi sp, sp, (imm)
+  if (StackSize) {
+    ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB,
+                                   MBBI);
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+      .addReg(NewReg).addImm(NewImm);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+  }
+}
+
+void
+MipsFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(Mips::SP, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void MipsFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineRegisterInfo& MRI = MF.getRegInfo();
+
+  // FIXME: remove this code if register allocator can correctly mark
+  //        $fp and $ra used or unused.
+
+  // Mark $fp and $ra as used or unused.
+  if (hasFP(MF))
+    MRI.setPhysRegUsed(Mips::FP);
+
+  // The register allocator might determine $ra is used after seeing 
+  // instruction "jr $ra", but we do not want PrologEpilogInserter to insert
+  // instructions to save/restore $ra unless there is a function call.
+  // To correct this, $ra is explicitly marked unused if there is no
+  // function call.
+  if (MF.getFrameInfo()->hasCalls())
+    MRI.setPhysRegUsed(Mips::RA);
+  else
+    MRI.setPhysRegUnused(Mips::RA);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
new file mode 100644
index 000000000000..78c78eea5b9a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -0,0 +1,50 @@
+//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_FRAMEINFO_H
+#define MIPS_FRAMEINFO_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MipsSubtarget;
+
+class MipsFrameLowering : public TargetFrameLowering {
+protected:
+  const MipsSubtarget &STI;
+
+public:
+  explicit MipsFrameLowering(const MipsSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+  }
+
+  bool targetHandlesStackFrameRounding() const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+  
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 000000000000..90aaeb60d06f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,482 @@
+//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+
+  /// TM - Keep a reference to MipsTargetMachine.
+  MipsTargetMachine &TM;
+
+  /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const MipsSubtarget &Subtarget;
+
+public:
+  explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
+  SelectionDAGISel(tm),
+  TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MIPS DAG->DAG Pattern Instruction Selection";
+  }
+
+
+private:
+  // Include the pieces autogenerated from the target description.
+  #include "MipsGenDAGISel.inc"
+
+  /// getTargetMachine - Return a reference to the TargetMachine, casted
+  /// to the target-specific type.
+  const MipsTargetMachine &getTargetMachine() {
+    return static_cast<const MipsTargetMachine &>(TM);
+  }
+
+  /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+  /// to the target-specific type.
+  const MipsInstrInfo *getInstrInfo() {
+    return getTargetMachine().getInstrInfo();
+  }
+
+  SDNode *getGlobalBaseReg();
+  SDNode *Select(SDNode *N);
+
+  // Complex Pattern.
+  bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
+
+  SDNode *SelectLoadFp64(SDNode *N);
+  SDNode *SelectStoreFp64(SDNode *N);
+
+  // getI32Imm - Return a target constant with the specified
+  // value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+};
+
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::
+SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+  // if Address is FI, get the TargetFrameIndex.
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // on PIC code Load GA
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    if (Addr.getOpcode() == MipsISD::WrapperPIC) {
+      Base   = CurDAG->getRegister(Mips::GP, MVT::i32);
+      Offset = Addr.getOperand(0);
+      return true;
+    }
+  } else {
+    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+        Addr.getOpcode() == ISD::TargetGlobalAddress))
+      return false;
+    else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
+      Base   = CurDAG->getRegister(Mips::GP, MVT::i32);
+      Offset = Addr;
+      return true;
+    }
+  }
+
+  // Addresses of the form FI+const or FI|const
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+    if (isInt<16>(CN->getSExtValue())) {
+
+      // If the first operand is a FI, get the TargetFI Node
+      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+                                  (Addr.getOperand(0)))
+        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      else
+        Base = Addr.getOperand(0);
+
+      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+      return true;
+    }
+  }
+
+  // Operand is a result from an ADD.
+  if (Addr.getOpcode() == ISD::ADD) {
+    // When loading from constant pools, load the lower address part in
+    // the instruction itself. Example, instead of:
+    //  lui $2, %hi($CPI1_0)
+    //  addiu $2, $2, %lo($CPI1_0)
+    //  lwc1 $f0, 0($2)
+    // Generate:
+    //  lui $2, %hi($CPI1_0)
+    //  lwc1 $f0, %lo($CPI1_0)($2)
+    if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
+         Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
+        Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+      SDValue LoVal = Addr.getOperand(1);
+      if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || 
+          isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
+        Base = Addr.getOperand(0);
+        Offset = LoVal.getOperand(0);
+        return true;
+      }
+    }
+  }
+
+  Base   = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
+  MVT::SimpleValueType NVT =
+    N->getValueType(0).getSimpleVT().SimpleTy;
+
+  if (!Subtarget.isMips1() || NVT != MVT::f64)
+    return NULL;
+
+  LoadSDNode *LN = cast<LoadSDNode>(N);
+  if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
+      LN->getAddressingMode() != ISD::UNINDEXED)
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue Offset0, Offset1, Base;
+
+  if (!SelectAddr(N1, Base, Offset0) ||
+      N1.getValueType() != MVT::i32)
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  DebugLoc dl = N->getDebugLoc();
+
+  // The second load should start after for 4 bytes.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+  else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
+                                            MVT::i32,
+                                            CP->getAlignment(),
+                                            CP->getOffset()+4,
+                                            CP->getTargetFlags());
+  else
+    return NULL;
+
+  // Choose the offsets depending on the endianess
+  if (TM.getTargetData()->isBigEndian())
+    std::swap(Offset0, Offset1);
+
+  // Instead of:
+  //    ldc $f0, X($3)
+  // Generate:
+  //    lwc $f0, X($3)
+  //    lwc $f1, X+4($3)
+  SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+                                       MVT::Other, Base, Offset0, Chain);
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+                            MVT::f64, Undef, SDValue(LD0, 0));
+
+  SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+                                       MVT::Other, Base, Offset1, SDValue(LD0, 1));
+  SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+                            MVT::f64, I0, SDValue(LD1, 0));
+
+  ReplaceUses(SDValue(N, 0), I1);
+  ReplaceUses(SDValue(N, 1), Chain);
+  cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  return I1.getNode();
+}
+
+SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
+
+  if (!Subtarget.isMips1() ||
+      N->getOperand(1).getValueType() != MVT::f64)
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+
+  StoreSDNode *SN = cast<StoreSDNode>(N);
+  if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
+    return NULL;
+
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  SDValue Offset0, Offset1, Base;
+
+  if (!SelectAddr(N2, Base, Offset0) ||
+      N1.getValueType() != MVT::f64 ||
+      N2.getValueType() != MVT::i32)
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the even and odd part from the f64 register
+  SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
+                                                 dl, MVT::f32, N1);
+  SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
+                                                 dl, MVT::f32, N1);
+
+  // The second store should start after for 4 bytes.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+  else
+    return NULL;
+
+  // Choose the offsets depending on the endianess
+  if (TM.getTargetData()->isBigEndian())
+    std::swap(Offset0, Offset1);
+
+  // Instead of:
+  //    sdc $f0, X($3)
+  // Generate:
+  //    swc $f0, X($3)
+  //    swc $f1, X+4($3)
+  SDValue Ops0[] = { FPEven, Base, Offset0, Chain };
+  Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+                                       MVT::Other, Ops0, 4), 0);
+  cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  SDValue Ops1[] = { FPOdd, Base, Offset1, Chain };
+  Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+                                       MVT::Other, Ops1, 4), 0);
+  cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  ReplaceUses(SDValue(N, 0), Chain);
+  return Chain.getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    return NULL;
+  }
+
+  ///
+  // Instruction Selection not handled by the auto-generated
+  // tablegen selection should be handled here.
+  ///
+  switch(Opcode) {
+    default: break;
+
+    case ISD::SUBE:
+    case ISD::ADDE: {
+      SDValue InFlag = Node->getOperand(2), CmpLHS;
+      unsigned Opc = InFlag.getOpcode(); (void)Opc;
+      assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+              (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+             "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+      unsigned MOp;
+      if (Opcode == ISD::ADDE) {
+        CmpLHS = InFlag.getValue(0);
+        MOp = Mips::ADDu;
+      } else {
+        CmpLHS = InFlag.getOperand(0);
+        MOp = Mips::SUBu;
+      }
+
+      SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+      SDValue LHS = Node->getOperand(0);
+      SDValue RHS = Node->getOperand(1);
+
+      EVT VT = LHS.getValueType();
+      SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
+      SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
+                                                SDValue(Carry,0), RHS);
+
+      return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
+                                  LHS, SDValue(AddCarry,0));
+    }
+
+    /// Mul with two results
+    case ISD::SMUL_LOHI:
+    case ISD::UMUL_LOHI: {
+      SDValue Op1 = Node->getOperand(0);
+      SDValue Op2 = Node->getOperand(1);
+
+      unsigned Op;
+      Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+
+      SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+
+      SDValue InFlag = SDValue(Mul, 0);
+      SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
+                                          MVT::Glue, InFlag);
+      InFlag = SDValue(Lo,1);
+      SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
+
+      if (!SDValue(Node, 1).use_empty())
+        ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
+
+      return NULL;
+    }
+
+    /// Special Muls
+    case ISD::MUL:
+      if (Subtarget.isMips32())
+        break;
+    case ISD::MULHS:
+    case ISD::MULHU: {
+      SDValue MulOp1 = Node->getOperand(0);
+      SDValue MulOp2 = Node->getOperand(1);
+
+      unsigned MulOp  = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+      SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
+                                               MVT::Glue, MulOp1, MulOp2);
+
+      SDValue InFlag = SDValue(MulNode, 0);
+
+      if (Opcode == ISD::MUL)
+        return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag);
+      else
+        return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+    }
+
+    // Get target GOT address.
+    case ISD::GLOBAL_OFFSET_TABLE:
+      return getGlobalBaseReg();
+
+    case ISD::ConstantFP: {
+      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+      if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                        Mips::ZERO, MVT::i32);
+        SDValue Undef = SDValue(
+          CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
+        SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
+        SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+                            MVT::f64, Undef, SDValue(MTC, 0));
+        SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+                            MVT::f64, I0, SDValue(MTC, 0));
+        ReplaceUses(SDValue(Node, 0), I1);
+        return I1.getNode();
+      }
+      break;
+    }
+
+    case ISD::LOAD:
+      if (SDNode *ResNode = SelectLoadFp64(Node))
+        return ResNode;
+      // Other cases are autogenerated.
+      break;
+
+    case ISD::STORE:
+      if (SDNode *ResNode = SelectStoreFp64(Node))
+        return ResNode;
+      // Other cases are autogenerated.
+      break;
+
+    case MipsISD::ThreadPointer: {
+      unsigned SrcReg = Mips::HWR29;
+      unsigned DestReg = Mips::V1;
+      SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
+          Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
+      SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
+          SDValue(Rdhwr, 0));
+      SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
+      ReplaceUses(SDValue(Node, 0), ResNode);
+      return ResNode.getNode();
+    }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+  return ResNode;
+}
+
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+  OutOps.push_back(Op);
+  return false;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+  return new MipsDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 000000000000..b4f4b1b4bf04
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,2371 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-lower"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  case MipsISD::JmpLink:           return "MipsISD::JmpLink";
+  case MipsISD::Hi:                return "MipsISD::Hi";
+  case MipsISD::Lo:                return "MipsISD::Lo";
+  case MipsISD::GPRel:             return "MipsISD::GPRel";
+  case MipsISD::TlsGd:             return "MipsISD::TlsGd";
+  case MipsISD::TprelHi:           return "MipsISD::TprelHi";
+  case MipsISD::TprelLo:           return "MipsISD::TprelLo";
+  case MipsISD::ThreadPointer:     return "MipsISD::ThreadPointer";
+  case MipsISD::Ret:               return "MipsISD::Ret";
+  case MipsISD::FPBrcond:          return "MipsISD::FPBrcond";
+  case MipsISD::FPCmp:             return "MipsISD::FPCmp";
+  case MipsISD::CMovFP_T:          return "MipsISD::CMovFP_T";
+  case MipsISD::CMovFP_F:          return "MipsISD::CMovFP_F";
+  case MipsISD::FPRound:           return "MipsISD::FPRound";
+  case MipsISD::MAdd:              return "MipsISD::MAdd";
+  case MipsISD::MAddu:             return "MipsISD::MAddu";
+  case MipsISD::MSub:              return "MipsISD::MSub";
+  case MipsISD::MSubu:             return "MipsISD::MSubu";
+  case MipsISD::DivRem:            return "MipsISD::DivRem";
+  case MipsISD::DivRemU:           return "MipsISD::DivRemU";
+  case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
+  case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
+  case MipsISD::WrapperPIC:        return "MipsISD::WrapperPIC";
+  case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
+  default:                         return NULL;
+  }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM)
+  : TargetLowering(TM, new MipsTargetObjectFile()) {
+  Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
+  // Mips does not have i1 type, so use i32 for
+  // setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // Set up the register classes
+  addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+  addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+  // When dealing with single precision only, use libcalls
+  if (!Subtarget->isSingleFloat())
+    if (!Subtarget->isFP64bit())
+      addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+
+  // Load extented operations for i1 types must be promoted
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+
+  // MIPS doesn't have extending float->double load/store
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // Used by legalize types to correctly generate the setcc result.
+  // Without this, every float setcc comes with a AND/OR with the result,
+  // we don't want this, since the fpcmp result goes to a flag register,
+  // which is used implicitly by brcond and select operations.
+  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+
+  // Mips Custom Operations
+  setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
+  setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
+  setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Custom);
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+
+  setOperationAction(ISD::SDIV, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIV, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+  // Operations not directly supported by Mips.
+  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,             MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
+  setOperationAction(ISD::UINT_TO_FP,        MVT::i32,   Expand);
+  setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,    Expand);
+  setOperationAction(ISD::CTPOP,             MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
+  setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
+
+  if (!Subtarget->isMips32r2())
+    setOperationAction(ISD::ROTR, MVT::i32,   Expand);
+
+  setOperationAction(ISD::SHL_PARTS,         MVT::i32,   Expand);
+  setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
+  setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
+  setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Custom);
+  setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Custom);
+  setOperationAction(ISD::FSIN,              MVT::f32,   Expand);
+  setOperationAction(ISD::FSIN,              MVT::f64,   Expand);
+  setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
+  setOperationAction(ISD::FCOS,              MVT::f64,   Expand);
+  setOperationAction(ISD::FPOWI,             MVT::f32,   Expand);
+  setOperationAction(ISD::FPOW,              MVT::f32,   Expand);
+  setOperationAction(ISD::FPOW,              MVT::f64,   Expand);
+  setOperationAction(ISD::FLOG,              MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG2,             MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG10,            MVT::f32,   Expand);
+  setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
+  setOperationAction(ISD::FMA,               MVT::f32,   Expand);
+  setOperationAction(ISD::FMA,               MVT::f64,   Expand);
+
+  setOperationAction(ISD::EXCEPTIONADDR,     MVT::i32, Expand);
+  setOperationAction(ISD::EHSELECTION,       MVT::i32, Expand);
+
+  setOperationAction(ISD::VAARG,             MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
+  setOperationAction(ISD::VAEND,             MVT::Other, Expand);
+
+  // Use the default for now
+  setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
+  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Expand);
+
+  if (Subtarget->isSingleFloat())
+    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+  if (!Subtarget->hasSEInReg()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+  }
+
+  if (!Subtarget->hasBitCount())
+    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+  if (!Subtarget->hasSwap())
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  setTargetDAGCombine(ISD::ADDE);
+  setTargetDAGCombine(ISD::SUBE);
+  setTargetDAGCombine(ISD::SDIVREM);
+  setTargetDAGCombine(ISD::UDIVREM);
+  setTargetDAGCombine(ISD::SETCC);
+
+  setMinFunctionAlignment(2);
+
+  setStackPointerRegisterToSaveRestore(Mips::SP);
+  computeRegisterProperties();
+
+  setExceptionPointerRegister(Mips::A0);
+  setExceptionSelectorRegister(Mips::A1);
+}
+
+MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+// SelectMadd -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+  // ADDENode's second operand must be a flag output of an ADDC node in order
+  // for the matching to be successful.
+  SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+
+  if (ADDCNode->getOpcode() != ISD::ADDC)
+    return false;
+
+  SDValue MultHi = ADDENode->getOperand(0);
+  SDValue MultLo = ADDCNode->getOperand(0);
+  SDNode* MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than ADDENode or ADDCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MADD instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  SDValue Chain = CurDAG->getEntryNode();
+  DebugLoc dl = ADDENode->getDebugLoc();
+
+  // create MipsMAdd(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+  SDValue MAdd = CurDAG->getNode(MultOpc, dl,
+                                 MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 ADDCNode->getOperand(1),// Lo0
+                                 ADDENode->getOperand(1));// Hi0
+
+  // create CopyFromReg nodes
+  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+                                              MAdd);
+  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+                                              Mips::HI, MVT::i32,
+                                              CopyFromLo.getValue(2));
+
+  // replace uses of adde and addc here
+  if (!SDValue(ADDCNode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
+
+  if (!SDValue(ADDENode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
+
+  return true;
+}
+
+// SelectMsub -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+  // SUBENode's second operand must be a flag output of an SUBC node in order
+  // for the matching to be successful.
+  SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+
+  if (SUBCNode->getOpcode() != ISD::SUBC)
+    return false;
+
+  SDValue MultHi = SUBENode->getOperand(1);
+  SDValue MultLo = SUBCNode->getOperand(1);
+  SDNode* MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than SUBENode or SUBCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MSUB instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  SDValue Chain = CurDAG->getEntryNode();
+  DebugLoc dl = SUBENode->getDebugLoc();
+
+  // create MipsSub(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+  SDValue MSub = CurDAG->getNode(MultOpc, dl,
+                                 MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 SUBCNode->getOperand(0),// Lo0
+                                 SUBENode->getOperand(0));// Hi0
+
+  // create CopyFromReg nodes
+  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+                                              MSub);
+  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+                                              Mips::HI, MVT::i32,
+                                              CopyFromLo.getValue(2));
+
+  // replace uses of sube and subc here
+  if (!SDValue(SUBCNode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
+
+  if (!SDValue(SUBENode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
+
+  return true;
+}
+
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->isMips32() && SelectMadd(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->isMips32() && SelectMsub(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
+                                                  MipsISD::DivRemU;
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+                               N->getOperand(0), N->getOperand(1));
+  SDValue InChain = DAG.getEntryNode();
+  SDValue InGlue = DivRem;
+
+  // insert MFLO
+  if (N->hasAnyUseOfValue(0)) {
+    SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32,
+                                            InGlue);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
+    InChain = CopyFromLo.getValue(1);
+    InGlue = CopyFromLo.getValue(2);
+  }
+
+  // insert MFHI
+  if (N->hasAnyUseOfValue(1)) {
+    SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+                                            Mips::HI, MVT::i32, InGlue);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
+  }
+
+  return SDValue();
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown fp condition code!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: return Mips::FCOND_OEQ;
+  case ISD::SETUNE: return Mips::FCOND_UNE;
+  case ISD::SETLT:
+  case ISD::SETOLT: return Mips::FCOND_OLT;
+  case ISD::SETGT:
+  case ISD::SETOGT: return Mips::FCOND_OGT;
+  case ISD::SETLE:
+  case ISD::SETOLE: return Mips::FCOND_OLE;
+  case ISD::SETGE:
+  case ISD::SETOGE: return Mips::FCOND_OGE;
+  case ISD::SETULT: return Mips::FCOND_ULT;
+  case ISD::SETULE: return Mips::FCOND_ULE;
+  case ISD::SETUGT: return Mips::FCOND_UGT;
+  case ISD::SETUGE: return Mips::FCOND_UGE;
+  case ISD::SETUO:  return Mips::FCOND_UN;
+  case ISD::SETO:   return Mips::FCOND_OR;
+  case ISD::SETNE:
+  case ISD::SETONE: return Mips::FCOND_ONE;
+  case ISD::SETUEQ: return Mips::FCOND_UEQ;
+  }
+}
+
+
+// Returns true if condition code has to be inverted.
+static bool InvertFPCondCode(Mips::CondCode CC) {
+  if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+    return false;
+
+  if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+    return true;
+
+  assert(false && "Illegal Condition Code");
+  return false;
+}
+
+// Creates and returns an FPCmp node from a setcc node.
+// Returns Op if setcc is not a floating point comparison.
+static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
+  // must be a SETCC node
+  if (Op.getOpcode() != ISD::SETCC)
+    return Op;
+
+  SDValue LHS = Op.getOperand(0);
+
+  if (!LHS.getValueType().isFloatingPoint())
+    return Op;
+
+  SDValue RHS = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
+  // node if necessary.
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+                     DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+// Creates and returns a CMovFPT/F node.
+static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
+                            SDValue False, DebugLoc DL) {
+  bool invert = InvertFPCondCode((Mips::CondCode)
+                                 cast<ConstantSDNode>(Cond.getOperand(2))
+                                 ->getSExtValue());
+
+  return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
+                     True.getValueType(), True, False, Cond);
+}
+
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+
+  if (Cond.getOpcode() != MipsISD::FPCmp)
+    return SDValue();
+
+  SDValue True  = DAG.getConstant(1, MVT::i32);
+  SDValue False = DAG.getConstant(0, MVT::i32);
+
+  return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+}
+
+SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+  const {
+  SelectionDAG &DAG = DCI.DAG;
+  unsigned opc = N->getOpcode();
+
+  switch (opc) {
+  default: break;
+  case ISD::ADDE:
+    return PerformADDECombine(N, DAG, DCI, Subtarget);
+  case ISD::SUBE:
+    return PerformSUBECombine(N, DAG, DCI, Subtarget);
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+  case ISD::SETCC:
+    return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+  }
+
+  return SDValue();
+}
+
+SDValue MipsTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  switch (Op.getOpcode())
+  {
+    case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
+    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+    case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+    case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    case ISD::SELECT:             return LowerSELECT(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
+    case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
+    case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// AddLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value.  It also creates a corresponding
+// virtual register for it.
+static unsigned
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+{
+  assert(RC->contains(PReg) && "Not the correct regclass!");
+  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+  MF.getRegInfo().addLiveIn(PReg, VReg);
+  return VReg;
+}
+
+// Get fp branch code (not opcode) from condition code.
+static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
+  if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+    return Mips::BRANCH_T;
+
+  if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+    return Mips::BRANCH_F;
+
+  return Mips::BRANCH_INVALID;
+}
+
+static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
+                                        DebugLoc dl,
+                                        const MipsSubtarget* Subtarget,
+                                        const TargetInstrInfo *TII,
+                                        bool isFPCmp, unsigned Opc) {
+  // There is no need to expand CMov instructions if target has
+  // conditional moves.
+  if (Subtarget->hasCondMov())
+    return BB;
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB  = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  // Emit the right instruction according to the type of the operands compared
+  if (isFPCmp)
+    BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+  else
+    BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
+      .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+  //  ...
+  BB = sinkMBB;
+
+  if (isFPCmp)
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(Mips::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+  else
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(Mips::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
+      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  switch (MI->getOpcode()) {
+  default:
+    assert(false && "Unexpected instr type to insert");
+    return NULL;
+  case Mips::MOVT:
+  case Mips::MOVT_S:
+  case Mips::MOVT_D:
+    return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1F);
+  case Mips::MOVF:
+  case Mips::MOVF_S:
+  case Mips::MOVF_D:
+    return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1T);
+  case Mips::MOVZ_I:
+  case Mips::MOVZ_S:
+  case Mips::MOVZ_D:
+    return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BNE);
+  case Mips::MOVN_I:
+  case Mips::MOVN_S:
+  case Mips::MOVN_D:
+    return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BEQ);
+
+  case Mips::ATOMIC_LOAD_ADD_I8:
+    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
+  case Mips::ATOMIC_LOAD_ADD_I16:
+    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
+  case Mips::ATOMIC_LOAD_ADD_I32:
+    return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+
+  case Mips::ATOMIC_LOAD_AND_I8:
+    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
+  case Mips::ATOMIC_LOAD_AND_I16:
+    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
+  case Mips::ATOMIC_LOAD_AND_I32:
+    return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+
+  case Mips::ATOMIC_LOAD_OR_I8:
+    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
+  case Mips::ATOMIC_LOAD_OR_I16:
+    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
+  case Mips::ATOMIC_LOAD_OR_I32:
+    return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+
+  case Mips::ATOMIC_LOAD_XOR_I8:
+    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
+  case Mips::ATOMIC_LOAD_XOR_I16:
+    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
+  case Mips::ATOMIC_LOAD_XOR_I32:
+    return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+
+  case Mips::ATOMIC_LOAD_NAND_I8:
+    return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
+  case Mips::ATOMIC_LOAD_NAND_I16:
+    return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
+  case Mips::ATOMIC_LOAD_NAND_I32:
+    return EmitAtomicBinary(MI, BB, 4, 0, true);
+
+  case Mips::ATOMIC_LOAD_SUB_I8:
+    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
+  case Mips::ATOMIC_LOAD_SUB_I16:
+    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
+  case Mips::ATOMIC_LOAD_SUB_I32:
+    return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+
+  case Mips::ATOMIC_SWAP_I8:
+    return EmitAtomicBinaryPartword(MI, BB, 1, 0);
+  case Mips::ATOMIC_SWAP_I16:
+    return EmitAtomicBinaryPartword(MI, BB, 2, 0);
+  case Mips::ATOMIC_SWAP_I32:
+    return EmitAtomicBinary(MI, BB, 4, 0);
+
+  case Mips::ATOMIC_CMP_SWAP_I8:
+    return EmitAtomicCmpSwapPartword(MI, BB, 1);
+  case Mips::ATOMIC_CMP_SWAP_I16:
+    return EmitAtomicCmpSwapPartword(MI, BB, 2);
+  case Mips::ATOMIC_CMP_SWAP_I32:
+    return EmitAtomicCmpSwap(MI, BB, 4);
+  }
+}
+
+// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
+// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                     unsigned Size, unsigned BinOpcode,
+                                     bool Nand) const {
+  assert(Size == 4 && "Unsupported size for EmitAtomicBinary.");
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Ptr = MI->getOperand(1).getReg();
+  unsigned Incr = MI->getOperand(2).getReg();
+
+  unsigned Oldval = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = BB;
+  ++It;
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  //  thisMBB:
+  //    ...
+  //    sw incr, fi(sp)           // store incr to stack (when BinOpcode == 0)
+  //    fallthrough --> loopMBB
+
+  // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before
+  // the loop and then loading it from stack in block loopMBB is necessary to
+  // prevent MachineLICM pass to hoist "or" instruction out of the block
+  // loopMBB.
+
+  int fi = 0;
+  if (BinOpcode == 0 && !Nand) {
+    // Get or create a temporary stack location.
+    MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+    fi = MipsFI->getAtomicFrameIndex();
+    if (fi == -1) {
+      fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
+      MipsFI->setAtomicFrameIndex(fi);
+    }
+
+    BuildMI(BB, dl, TII->get(Mips::SW))
+        .addReg(Incr).addFrameIndex(fi).addImm(0);
+  }
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //    ll oldval, 0(ptr)
+  //    or dest, $0, oldval
+  //    <binop> tmp1, oldval, incr
+  //    sc tmp1, 0(ptr)
+  //    beq tmp1, $0, loopMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
+  if (Nand) {
+    //  and tmp2, oldval, incr
+    //  nor tmp1, $0, tmp2
+    BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr);
+    BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+  } else if (BinOpcode) {
+    //  <binop> tmp1, oldval, incr
+    BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr);
+  } else {
+    //  lw tmp2, fi(sp)              // load incr from stack
+    //  or tmp1, $zero, tmp2
+    BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
+    BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+  }
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::BEQ))
+    .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB);
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
+                                             MachineBasicBlock *BB,
+                                             unsigned Size, unsigned BinOpcode,
+                                             bool Nand) const {
+  assert((Size == 1 || Size == 2) &&
+      "Unsupported size for EmitAtomicBinaryPartial.");
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Ptr = MI->getOperand(1).getReg();
+  unsigned Incr = MI->getOperand(2).getReg();
+
+  unsigned Addr = RegInfo.createVirtualRegister(RC);
+  unsigned Shift = RegInfo.createVirtualRegister(RC);
+  unsigned Mask = RegInfo.createVirtualRegister(RC);
+  unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+  unsigned Newval = RegInfo.createVirtualRegister(RC);
+  unsigned Oldval = RegInfo.createVirtualRegister(RC);
+  unsigned Incr2 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp10 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp11 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp12 = RegInfo.createVirtualRegister(RC);
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = BB;
+  ++It;
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  //  thisMBB:
+  //    addiu   tmp1,$0,-4                # 0xfffffffc
+  //    and     addr,ptr,tmp1
+  //    andi    tmp2,ptr,3
+  //    sll     shift,tmp2,3
+  //    ori     tmp3,$0,255               # 0xff
+  //    sll     mask,tmp3,shift
+  //    nor     mask2,$0,mask
+  //    andi    tmp4,incr,255
+  //    sll     incr2,tmp4,shift
+  //    sw      incr2, fi(sp)      // store incr2 to stack (when BinOpcode == 0)
+
+  // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before
+  // the loop and then loading it from stack in block loopMBB is necessary to
+  // prevent MachineLICM pass to hoist "or" instruction out of the block
+  // loopMBB.
+
+  int64_t MaskImm = (Size == 1) ? 255 : 65535;
+  BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
+  BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
+  BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
+  BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
+  BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
+  BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+  BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+  if (BinOpcode != Mips::SUBu) {
+    BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm);
+    BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift);
+  } else {
+    BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr);
+    BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm);
+    BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift);
+  }
+
+  int fi = 0;
+  if (BinOpcode == 0 && !Nand) {
+    // Get or create a temporary stack location.
+    MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+    fi = MipsFI->getAtomicFrameIndex();
+    if (fi == -1) {
+      fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
+      MipsFI->setAtomicFrameIndex(fi);
+    }
+
+    BuildMI(BB, dl, TII->get(Mips::SW))
+        .addReg(Incr2).addFrameIndex(fi).addImm(0);
+  }
+  BB->addSuccessor(loopMBB);
+
+  // loopMBB:
+  //   ll      oldval,0(addr)
+  //   binop   tmp7,oldval,incr2
+  //   and     newval,tmp7,mask
+  //   and     tmp8,oldval,mask2
+  //   or      tmp9,tmp8,newval
+  //   sc      tmp9,0(addr)
+  //   beq     tmp9,$0,loopMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0);
+  if (Nand) {
+    //  and tmp6, oldval, incr2
+    //  nor tmp7, $0, tmp6
+    BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2);
+    BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
+  } else if (BinOpcode == Mips::SUBu) {
+    //  addu tmp7, oldval, incr2
+    BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2);
+  } else if (BinOpcode) {
+    //  <binop> tmp7, oldval, incr2
+    BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2);
+  } else {
+    //  lw tmp6, fi(sp)              // load incr2 from stack
+    //  or tmp7, $zero, tmp6
+    BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0);
+    BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
+  }
+  BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
+  BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
+  BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::BEQ))
+      .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB);
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //    and     tmp10,oldval,mask
+  //    srl     tmp11,tmp10,shift
+  //    sll     tmp12,tmp11,24
+  //    sra     dest,tmp12,24
+  BB = exitMBB;
+  int64_t ShiftImm = (Size == 1) ? 24 : 16;
+  // reverse order
+  BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
+      .addReg(Tmp12).addImm(ShiftImm);
+  BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12)
+      .addReg(Tmp11).addImm(ShiftImm);
+  BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11)
+      .addReg(Tmp10).addReg(Shift);
+  BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10)
+    .addReg(Oldval).addReg(Mask);
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+                                      MachineBasicBlock *BB,
+                                      unsigned Size) const {
+  assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap.");
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned Dest    = MI->getOperand(0).getReg();
+  unsigned Ptr     = MI->getOperand(1).getReg();
+  unsigned Oldval  = MI->getOperand(2).getReg();
+  unsigned Newval  = MI->getOperand(3).getReg();
+
+  unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = BB;
+  ++It;
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Get or create a temporary stack location.
+  MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+  int fi = MipsFI->getAtomicFrameIndex();
+  if (fi == -1) {
+    fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
+    MipsFI->setAtomicFrameIndex(fi);
+  }
+
+  //  thisMBB:
+  //    ...
+  //    sw newval, fi(sp)           // store newval to stack
+  //    fallthrough --> loop1MBB
+
+  // Note: storing newval to stack before the loop and then loading it from
+  // stack in block loop2MBB is necessary to prevent MachineLICM pass to
+  // hoist "or" instruction out of the block loop2MBB.
+
+  BuildMI(BB, dl, TII->get(Mips::SW))
+      .addReg(Newval).addFrameIndex(fi).addImm(0);
+  BB->addSuccessor(loop1MBB);
+
+  // loop1MBB:
+  //   ll dest, 0(ptr)
+  //   bne dest, oldval, exitMBB
+  BB = loop1MBB;
+  BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::BNE))
+    .addReg(Dest).addReg(Oldval).addMBB(exitMBB);
+  BB->addSuccessor(exitMBB);
+  BB->addSuccessor(loop2MBB);
+
+  // loop2MBB:
+  //   lw tmp2, fi(sp)              // load newval from stack
+  //   or tmp1, $0, tmp2
+  //   sc tmp1, 0(ptr)
+  //   beq tmp1, $0, loop1MBB
+  BB = loop2MBB;
+  BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::BEQ))
+    .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB);
+  BB->addSuccessor(loop1MBB);
+  BB->addSuccessor(exitMBB);
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
+                                              MachineBasicBlock *BB,
+                                              unsigned Size) const {
+  assert((Size == 1 || Size == 2) &&
+      "Unsupported size for EmitAtomicCmpSwapPartial.");
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned Dest    = MI->getOperand(0).getReg();
+  unsigned Ptr     = MI->getOperand(1).getReg();
+  unsigned Oldval  = MI->getOperand(2).getReg();
+  unsigned Newval  = MI->getOperand(3).getReg();
+
+  unsigned Addr = RegInfo.createVirtualRegister(RC);
+  unsigned Shift = RegInfo.createVirtualRegister(RC);
+  unsigned Mask = RegInfo.createVirtualRegister(RC);
+  unsigned Mask2 = RegInfo.createVirtualRegister(RC);
+  unsigned Oldval2 = RegInfo.createVirtualRegister(RC);
+  unsigned Oldval3 = RegInfo.createVirtualRegister(RC);
+  unsigned Oldval4 = RegInfo.createVirtualRegister(RC);
+  unsigned Newval2 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = BB;
+  ++It;
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  //  thisMBB:
+  //    addiu   tmp1,$0,-4                # 0xfffffffc
+  //    and     addr,ptr,tmp1
+  //    andi    tmp2,ptr,3
+  //    sll     shift,tmp2,3
+  //    ori     tmp3,$0,255               # 0xff
+  //    sll     mask,tmp3,shift
+  //    nor     mask2,$0,mask
+  //    andi    tmp4,oldval,255
+  //    sll     oldval2,tmp4,shift
+  //    andi    tmp5,newval,255
+  //    sll     newval2,tmp5,shift
+  int64_t MaskImm = (Size == 1) ? 255 : 65535;
+  BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
+  BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
+  BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
+  BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
+  BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
+  BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+  BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+  BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm);
+  BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift);
+  BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm);
+  BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift);
+  BB->addSuccessor(loop1MBB);
+
+  //  loop1MBB:
+  //    ll      oldval3,0(addr)
+  //    and     oldval4,oldval3,mask
+  //    bne     oldval4,oldval2,exitMBB
+  BB = loop1MBB;
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask);
+  BuildMI(BB, dl, TII->get(Mips::BNE))
+      .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB);
+  BB->addSuccessor(exitMBB);
+  BB->addSuccessor(loop2MBB);
+
+  //  loop2MBB:
+  //    and     tmp6,oldval3,mask2
+  //    or      tmp7,tmp6,newval2
+  //    sc      tmp7,0(addr)
+  //    beq     tmp7,$0,loop1MBB
+  BB = loop2MBB;
+  BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2);
+  BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp7)
+      .addReg(Tmp7).addReg(Addr).addImm(0);
+  BuildMI(BB, dl, TII->get(Mips::BEQ))
+      .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB);
+  BB->addSuccessor(loop1MBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //    srl     tmp8,oldval4,shift
+  //    sll     tmp9,tmp8,24
+  //    sra     dest,tmp9,24
+  BB = exitMBB;
+  int64_t ShiftImm = (Size == 1) ? 24 : 16;
+  // reverse order
+  BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
+      .addReg(Tmp9).addImm(ShiftImm);
+  BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9)
+      .addReg(Tmp8).addImm(ShiftImm);
+  BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8)
+      .addReg(Oldval4).addReg(Shift);
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+SDValue MipsTargetLowering::
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
+{
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
+         cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
+         "Cannot lower if the alignment of the allocated space is larger than \
+          that of the stack.");
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get a reference from Mips stack pointer
+  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+
+  // Subtract the dynamic size from the actual stack size to
+  // obtain the new stack size.
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+  // The Sub result contains the new stack start address, so it
+  // must be placed in the stack pointer register.
+  Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
+                           SDValue());
+
+  // This node always has two return values: a new stack pointer
+  // value and a chain
+  SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+  SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
+  SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
+
+  return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
+}
+
+SDValue MipsTargetLowering::
+LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+  // The first operand is the chain, the second is the condition, the third is
+  // the block to branch to if the condition is true.
+  SDValue Chain = Op.getOperand(0);
+  SDValue Dest = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+
+  // Return if flag is not set by a floating point comparison.
+  if (CondRes.getOpcode() != MipsISD::FPCmp)
+    return Op;
+
+  SDValue CCNode  = CondRes.getOperand(2);
+  Mips::CondCode CC =
+    (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
+  SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+
+  return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+                     Dest, CondRes);
+}
+
+SDValue MipsTargetLowering::
+LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
+
+  // Return if flag is not set by a floating point comparison.
+  if (Cond.getOpcode() != MipsISD::FPCmp)
+    return Op;
+
+  return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+                      Op.getDebugLoc());
+}
+
+SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+
+    MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
+
+    // %gp_rel relocation
+    if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+      SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                              MipsII::MO_GPREL);
+      SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
+      SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+      return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+    }
+    // %hi/%lo relocation
+    SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                              MipsII::MO_ABS_HI);
+    SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                              MipsII::MO_ABS_LO);
+    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+  }
+
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                          MipsII::MO_GOT);
+  GA = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, GA);
+  SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+                                DAG.getEntryNode(), GA, MachinePointerInfo(),
+                                false, false, 0);
+  // On functions and global targets not internal linked only
+  // a load from got/GP is necessary for PIC to work.
+  if (!GV->hasInternalLinkage() &&
+      (!GV->hasLocalLinkage() || isa<Function>(GV)))
+    return ResNode;
+  SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                            MipsII::MO_ABS_LO);
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
+  return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
+}
+
+SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    // %hi/%lo relocation
+    SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
+                                       MipsII::MO_ABS_HI);
+    SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true,
+                                       MipsII::MO_ABS_LO);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+  }
+
+  SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+                                            MipsII::MO_GOT);
+  BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset);
+  SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+                                           MipsII::MO_ABS_LO);
+  SDValue Load = DAG.getLoad(MVT::i32, dl,
+                             DAG.getEntryNode(), BAGOTOffset,
+                             MachinePointerInfo(), false, false, 0);
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
+  return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  // If the relocation model is PIC, use the General Dynamic TLS Model,
+  // otherwise use the Initial Exec or Local Exec TLS Model.
+  // TODO: implement Local Dynamic TLS model
+
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  DebugLoc dl = GA->getDebugLoc();
+  const GlobalValue *GV = GA->getGlobal();
+  EVT PtrVT = getPointerTy();
+
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    // General Dynamic TLS Model
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
+                                             0, MipsII::MO_TLSGD);
+    SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
+    SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
+    SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+
+    ArgListTy Args;
+    ArgListEntry Entry;
+    Entry.Node = Argument;
+    Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+    Args.push_back(Entry);
+    std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(DAG.getEntryNode(),
+                    (const Type *) Type::getInt32Ty(*DAG.getContext()),
+                    false, false, false, false, 0, CallingConv::C, false, true,
+                    DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
+                    dl);
+
+    return CallResult.first;
+  }
+
+  SDValue Offset;
+  if (GV->isDeclaration()) {
+    // Initial Exec TLS Model
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                             MipsII::MO_GOTTPREL);
+    Offset = DAG.getLoad(MVT::i32, dl,
+                         DAG.getEntryNode(), TGA, MachinePointerInfo(),
+                         false, false, 0);
+  } else {
+    // Local Exec TLS Model
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_HI);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_LO);
+    SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
+    Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+  }
+
+  SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue MipsTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue ResNode;
+  SDValue HiPart;
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
+
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+
+  if (!IsPIC) {
+    SDValue Ops[] = { JTI };
+    HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+  } else {// Emit Load from Global Pointer
+    JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
+    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
+                         MachinePointerInfo(),
+                         false, false, 0);
+  }
+
+  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+                                         MipsII::MO_ABS_LO);
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
+  ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+  return ResNode;
+}
+
+SDValue MipsTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue ResNode;
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = N->getConstVal();
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  // gp_rel relocation
+  // FIXME: we should reference the constant pool using small data sections,
+  // but the asm printer currently doesn't support this feature without
+  // hacking it. This feature should come soon so we can uncomment the
+  // stuff below.
+  //if (IsInSmallSection(C->getType())) {
+  //  SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
+  //  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+  //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                             N->getOffset(), MipsII::MO_ABS_HI);
+    SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                             N->getOffset(), MipsII::MO_ABS_LO);
+    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
+    ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+  } else {
+    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                           N->getOffset(), MipsII::MO_GOT);
+    CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP);
+    SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+                               CP, MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+    SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                             N->getOffset(), MipsII::MO_ABS_LO);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
+    ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+  }
+
+  return ResNode;
+}
+
+SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
+                      false, false, 0);
+}
+
+static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
+  // FIXME: Use ext/ins instructions if target architecture is Mips32r2.
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
+  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
+  SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
+                             DAG.getConstant(0x7fffffff, MVT::i32));
+  SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
+                             DAG.getConstant(0x80000000, MVT::i32));
+  SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+}
+
+static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
+  // FIXME:
+  //  Use ext/ins instructions if target architecture is Mips32r2.
+  //  Eliminate redundant mfc1 and mtc1 instructions.
+  unsigned LoIdx = 0, HiIdx = 1;
+
+  if (!isLittle)
+    std::swap(LoIdx, HiIdx);
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+                              Op.getOperand(0),
+                              DAG.getConstant(LoIdx, MVT::i32));
+  SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+                            Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32));
+  SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+                            Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32));
+  SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0,
+                             DAG.getConstant(0x7fffffff, MVT::i32));
+  SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1,
+                             DAG.getConstant(0x80000000, MVT::i32));
+  SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
+
+  if (!isLittle)
+    std::swap(Word0, Word1);
+
+  return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1);
+}
+
+SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
+  const {
+  EVT Ty = Op.getValueType();
+
+  assert(Ty == MVT::f32 || Ty == MVT::f64);
+
+  if (Ty == MVT::f32)
+    return LowerFCOPYSIGN32(Op, DAG);
+  else
+    return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+}
+
+SDValue MipsTargetLowering::
+LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  // check the depth
+  assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
+         "Frame address can only be determined for current frame.");
+
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Mips::FP, VT);
+  return FrameAddr;
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TODO: Implement a generic logic using tblgen that can support this.
+// Mips O32 ABI rules:
+// ---
+// i32 - Passed in A0, A1, A2, A3 and stack
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+//       an argument. Otherwise, passed in A1, A2, A3 and stack.
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+//       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+//       not used, it must be shadowed. If only A3 is avaiable, shadow it and
+//       go to stack.
+//
+//  For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
+//===----------------------------------------------------------------------===//
+
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+                       MVT LocVT, CCValAssign::LocInfo LocInfo,
+                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  static const unsigned IntRegsSize=4, FloatRegsSize=2;
+
+  static const unsigned IntRegs[] = {
+      Mips::A0, Mips::A1, Mips::A2, Mips::A3
+  };
+  static const unsigned F32Regs[] = {
+      Mips::F12, Mips::F14
+  };
+  static const unsigned F64Regs[] = {
+      Mips::D6, Mips::D7
+  };
+
+  // ByVal Args
+  if (ArgFlags.isByVal()) {
+    State.HandleByVal(ValNo, ValVT, LocVT, LocInfo,
+                      1 /*MinSize*/, 4 /*MinAlign*/, ArgFlags);
+    unsigned NextReg = (State.getNextStackOffset() + 3) / 4;
+    for (unsigned r = State.getFirstUnallocated(IntRegs, IntRegsSize);
+         r < std::min(IntRegsSize, NextReg); ++r)
+      State.AllocateReg(IntRegs[r]);
+    return false;
+  }
+
+  // Promote i8 and i16
+  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  unsigned Reg;
+
+  // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following
+  // is true: function is vararg, argument is 3rd or higher, there is previous
+  // argument which is not f32 or f64.
+  bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1
+      || State.getFirstUnallocated(F32Regs, FloatRegsSize) != ValNo;
+  unsigned OrigAlign = ArgFlags.getOrigAlign();
+  bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+
+  if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    // If this is the first part of an i64 arg,
+    // the allocated register must be either A0 or A2.
+    if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3))
+      Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    LocVT = MVT::i32;
+  } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) {
+    // Allocate int register and shadow next int register. If first
+    // available register is Mips::A1 or Mips::A3, shadow it too.
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    if (Reg == Mips::A1 || Reg == Mips::A3)
+      Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    State.AllocateReg(IntRegs, IntRegsSize);
+    LocVT = MVT::i32;
+  } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) {
+    // we are guaranteed to find an available float register
+    if (ValVT == MVT::f32) {
+      Reg = State.AllocateReg(F32Regs, FloatRegsSize);
+      // Shadow int register
+      State.AllocateReg(IntRegs, IntRegsSize);
+    } else {
+      Reg = State.AllocateReg(F64Regs, FloatRegsSize);
+      // Shadow int registers
+      unsigned Reg2 = State.AllocateReg(IntRegs, IntRegsSize);
+      if (Reg2 == Mips::A1 || Reg2 == Mips::A3)
+        State.AllocateReg(IntRegs, IntRegsSize);
+      State.AllocateReg(IntRegs, IntRegsSize);
+    }
+  } else
+    llvm_unreachable("Cannot handle this ValVT.");
+
+  unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+  unsigned Offset = State.AllocateStack(SizeInBytes, OrigAlign);
+
+  if (!Reg)
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+  return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static const unsigned O32IntRegsSize = 4;
+
+static const unsigned O32IntRegs[] = {
+  Mips::A0, Mips::A1, Mips::A2, Mips::A3
+};
+
+// Write ByVal Arg to arg registers and stack.
+static void
+WriteByValArg(SDValue& Chain, DebugLoc dl,
+              SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
+              SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+              MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+              const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+              MVT PtrType) {
+  unsigned FirstWord = VA.getLocMemOffset() / 4;
+  unsigned NumWords = (Flags.getByValSize() + 3) / 4;
+  unsigned LastWord = FirstWord + NumWords;
+  unsigned CurWord;
+
+  // copy the first 4 words of byval arg to registers A0 - A3
+  for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize);
+       ++CurWord) {
+    SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+                                  DAG.getConstant((CurWord - FirstWord) * 4,
+                                                  MVT::i32));
+    SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+    MemOpChains.push_back(LoadVal.getValue(1));
+    unsigned DstReg = O32IntRegs[CurWord];
+    RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
+  }
+
+  // copy remaining part of byval arg to stack.
+  if (CurWord < LastWord) {
+    unsigned SizeInBytes = (LastWord - CurWord) * 4;
+    SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+                              DAG.getConstant((CurWord - FirstWord) * 4,
+                                              MVT::i32));
+    LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true);
+    SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
+    Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+                          DAG.getConstant(SizeInBytes, MVT::i32),
+                          /*Align*/4,
+                          /*isVolatile=*/false, /*AlwaysInline=*/false,
+                          MachinePointerInfo(0), MachinePointerInfo(0));
+    MemOpChains.push_back(Chain);
+  }
+}
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isTailCall.
+SDValue
+MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                              CallingConv::ID CallConv, bool isVarArg,
+                              bool &isTailCall,
+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
+                              const SmallVectorImpl<SDValue> &OutVals,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const {
+  // MIPs target does not yet support tail call optimization.
+  isTailCall = false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  if (Subtarget->isABI_O32())
+    CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
+  else
+    CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NextStackOffset = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NextStackOffset,
+                                                            true));
+
+  // If this is the first call, create a stack frame object that points to
+  // a location to which .cprestore saves $gp.
+  if (IsPIC && !MipsFI->getGPFI())
+    MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
+
+  // Get the frame index of the stack frame object that points to the location
+  // of dynamically allocated area on the stack.
+  int DynAllocFI = MipsFI->getDynAllocFI();
+
+  // Update size of the maximum argument space.
+  // For O32, a minimum of four words (16 bytes) of argument space is
+  // allocated.
+  if (Subtarget->isABI_O32())
+    NextStackOffset = std::max(NextStackOffset, (unsigned)16);
+
+  unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
+
+  if (MaxCallFrameSize < NextStackOffset) {
+    MipsFI->setMaxCallFrameSize(NextStackOffset);
+
+    // Set the offsets relative to $sp of the $gp restore slot and dynamically
+    // allocated stack space. These offsets must be aligned to a boundary
+    // determined by the stack alignment of the ABI.
+    unsigned StackAlignment = TFL->getStackAlignment();
+    NextStackOffset = (NextStackOffset + StackAlignment - 1) /
+                      StackAlignment * StackAlignment;
+
+    if (IsPIC)
+      MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+
+    MFI->setObjectOffset(DynAllocFI, NextStackOffset);
+  }
+
+  // With EABI is it possible to have 16 args on registers.
+  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    SDValue Arg = OutVals[i];
+    CCValAssign &VA = ArgLocs[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full:
+      if (Subtarget->isABI_O32() && VA.isRegLoc()) {
+        if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
+          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+        if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+          SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+                                   Arg, DAG.getConstant(0, MVT::i32));
+          SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+                                   Arg, DAG.getConstant(1, MVT::i32));
+          if (!Subtarget->isLittle())
+            std::swap(Lo, Hi);
+          RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
+          RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
+          continue;
+        }
+      }
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
+    }
+
+    // Register can't get to this point...
+    assert(VA.isMemLoc());
+
+    // ByVal Arg.
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    if (Flags.isByVal()) {
+      assert(Subtarget->isABI_O32() &&
+             "No support for ByVal args by ABIs other than O32 yet.");
+      assert(Flags.getByValSize() &&
+             "ByVal args of size 0 should have been ignored by front-end.");
+      WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
+                    VA, Flags, getPointerTy());
+      continue;
+    }
+
+    // Create the frame index object for this incoming parameter
+    LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+                                    VA.getLocMemOffset(), true);
+    SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
+
+    // emit ISD::STORE whichs stores the
+    // parameter value to a stack Location
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+  // Extend range of indices of frame objects for outgoing arguments that were
+  // created during this function call. Skip this step if no such objects were
+  // created.
+  if (LastFI)
+    MipsFI->extendOutArgFIRange(FirstFI, LastFI);
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
+  bool LoadSymAddr = false;
+  SDValue CalleeLo;
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    if (IsPIC && G->getGlobal()->hasInternalLinkage()) {
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                          getPointerTy(), 0,MipsII:: MO_GOT);
+      CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
+                                            0, MipsII::MO_ABS_LO);
+    } else {
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                          getPointerTy(), 0, OpFlag);
+    }
+
+    LoadSymAddr = true;
+  }
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+                                getPointerTy(), OpFlag);
+    LoadSymAddr = true;
+  }
+
+  SDValue InFlag;
+
+  // Create nodes that load address of callee and copy it to T9
+  if (IsPIC) {
+    if (LoadSymAddr) {
+      // Load callee address
+      Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee);
+      SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee,
+                                      MachinePointerInfo::getGOT(),
+                                      false, false, 0);
+
+      // Use GOT+LO if callee has internal linkage.
+      if (CalleeLo.getNode()) {
+        SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo);
+        Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
+      } else
+        Callee = LoadValue;
+    }
+
+    // copy to T9
+    Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0));
+    InFlag = Chain.getValue(1);
+    Callee = DAG.getRegister(Mips::T9, MVT::i32);
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emitted instructions must be
+  // stuck together.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // MipsJmpLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getIntPtrConstant(NextStackOffset, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
+                         std::vector<SDValue>& OutChains,
+                         SelectionDAG &DAG, unsigned NumWords, SDValue FIN,
+                         const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) {
+  unsigned LocMem = VA.getLocMemOffset();
+  unsigned FirstWord = LocMem / 4;
+
+  // copy register A0 - A3 to frame object
+  for (unsigned i = 0; i < NumWords; ++i) {
+    unsigned CurWord = FirstWord + i;
+    if (CurWord >= O32IntRegsSize)
+      break;
+
+    unsigned SrcReg = O32IntRegs[CurWord];
+    unsigned Reg = AddLiveIn(MF, SrcReg, Mips::CPURegsRegisterClass);
+    SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN,
+                                   DAG.getConstant(i * 4, MVT::i32));
+    SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32),
+                                 StorePtr, MachinePointerInfo(), false,
+                                 false, 0);
+    OutChains.push_back(Store);
+  }
+}
+
+/// LowerFormalArguments - transform physical registers into virtual registers
+/// and generate load operations for arguments places on the stack.
+SDValue
+MipsTargetLowering::LowerFormalArguments(SDValue Chain,
+                                         CallingConv::ID CallConv,
+                                         bool isVarArg,
+                                         const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                         DebugLoc dl, SelectionDAG &DAG,
+                                         SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  MipsFI->setVarArgsFrameIndex(0);
+
+  // Used with vargs to acumulate store chains.
+  std::vector<SDValue> OutChains;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  if (Subtarget->isABI_O32())
+    CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
+  else
+    CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
+
+  int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function.
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored on registers
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      unsigned ArgReg = VA.getLocReg();
+      TargetRegisterClass *RC = 0;
+
+      if (RegVT == MVT::i32)
+        RC = Mips::CPURegsRegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = Mips::FGR32RegisterClass;
+      else if (RegVT == MVT::f64) {
+        if (!Subtarget->isSingleFloat())
+          RC = Mips::AFGR64RegisterClass;
+      } else
+        llvm_unreachable("RegVT not supported by FormalArguments Lowering");
+
+      // Transform the arguments stored on
+      // physical registers into virtual ones
+      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        unsigned Opcode = 0;
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          Opcode = ISD::AssertSext;
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          Opcode = ISD::AssertZext;
+        if (Opcode)
+          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+
+      // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
+      if (Subtarget->isABI_O32()) {
+        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+          ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
+        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
+          unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+                                    VA.getLocReg()+1, RC);
+          SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+          if (!Subtarget->isLittle())
+            std::swap(ArgValue, ArgValue2);
+          ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+                                 ArgValue, ArgValue2);
+        }
+      }
+
+      InVals.push_back(ArgValue);
+    } else { // VA.isRegLoc()
+
+      // sanity check
+      assert(VA.isMemLoc());
+
+      ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+      if (Flags.isByVal()) {
+        assert(Subtarget->isABI_O32() &&
+               "No support for ByVal args by ABIs other than O32 yet.");
+        assert(Flags.getByValSize() &&
+               "ByVal args of size 0 should have been ignored by front-end.");
+        unsigned NumWords = (Flags.getByValSize() + 3) / 4;
+        LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(),
+                                        true);
+        SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+        InVals.push_back(FIN);
+        ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags);
+
+        continue;
+      }
+
+      // The stack pointer offset is relative to the caller stack frame.
+      LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+                                      VA.getLocMemOffset(), true);
+
+      // Create load nodes to retrieve arguments from the stack
+      SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(LastFI),
+                                   false, false, 0));
+    }
+  }
+
+  // The mips ABIs for returning structs by value requires that we copy
+  // the sret argument into $v0 for the return. Save the argument into
+  // a virtual register so that we can access it from the return points.
+  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    unsigned Reg = MipsFI->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
+      MipsFI->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+  }
+
+  if (isVarArg && Subtarget->isABI_O32()) {
+    // Record the frame index of the first variable argument
+    // which is a value necessary to VASTART.
+    unsigned NextStackOffset = CCInfo.getNextStackOffset();
+    assert(NextStackOffset % 4 == 0 &&
+           "NextStackOffset must be aligned to 4-byte boundaries.");
+    LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+    MipsFI->setVarArgsFrameIndex(LastFI);
+
+    // If NextStackOffset is smaller than o32's 16-byte reserved argument area,
+    // copy the integer registers that have not been used for argument passing
+    // to the caller's stack frame.
+    for (; NextStackOffset < 16; NextStackOffset += 4) {
+      TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
+      unsigned Idx = NextStackOffset / 4;
+      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
+      LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+      SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+    }
+  }
+
+  MipsFI->setLastInArgFI(LastFI);
+
+  // All stores are grouped in one node to allow the matching between
+  // the size of Ins and InVals. This only happens when on varg functions
+  if (!OutChains.empty()) {
+    OutChains.push_back(Chain);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                const SmallVectorImpl<SDValue> &OutVals,
+                                DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // The mips ABIs for returning structs by value requires that we copy
+  // the sret argument into $v0 for the return. We saved the argument into
+  // a virtual register in the entry block, so now we copy the value out
+  // and into $v0.
+  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    MachineFunction &MF      = DAG.getMachineFunction();
+    MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+    unsigned Reg = MipsFI->getSRetReturnReg();
+
+    if (!Reg)
+      llvm_unreachable("sret virtual register not created in the entry block");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+    Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  // Return on Mips is always a "jr $ra"
+  if (Flag.getNode())
+    return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
+  else // Return Void
+    return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(Mips::RA, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+//                           Mips Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MipsTargetLowering::ConstraintType MipsTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+  // Mips specific constrainy
+  // GCC config/mips/constraints.md
+  //
+  // 'd' : An address register. Equivalent to r
+  //       unless generating MIPS16 code.
+  // 'y' : Equivalent to r; retained for
+  //       backwards compatibility.
+  // 'f' : Floating Point registers.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+      default : break;
+      case 'd':
+      case 'y':
+      case 'f':
+        return C_RegisterClass;
+        break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MipsTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'd':
+  case 'y':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
+    case 'y': // Same as 'r'. Exists for compatibility.
+    case 'r':
+      return std::make_pair(0U, Mips::CPURegsRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, Mips::FGR32RegisterClass);
+      if (VT == MVT::f64)
+        if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+          return std::make_pair(0U, Mips::AFGR64RegisterClass);
+      break;
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool
+MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Mips target isn't yet aware of offsets.
+  return false;
+}
+
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  if (Imm.isNegZero())
+    return false;
+  return Imm.isZero();
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 000000000000..bda26a229e72
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,191 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+
+namespace llvm {
+  namespace MipsISD {
+    enum NodeType {
+      // Start the numbering from where ISD NodeType finishes.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Jump and link (call)
+      JmpLink,
+
+      // Get the Higher 16 bits from a 32-bit immediate
+      // No relation with Mips Hi register
+      Hi,
+
+      // Get the Lower 16 bits from a 32-bit immediate
+      // No relation with Mips Lo register
+      Lo,
+
+      // Handle gp_rel (small data/bss sections) relocation.
+      GPRel,
+
+      // General Dynamic TLS
+      TlsGd,
+
+      // Local Exec TLS
+      TprelHi,
+      TprelLo,
+
+      // Thread Pointer
+      ThreadPointer,
+
+      // Floating Point Branch Conditional
+      FPBrcond,
+
+      // Floating Point Compare
+      FPCmp,
+
+      // Floating Point Conditional Moves
+      CMovFP_T,
+      CMovFP_F,
+
+      // Floating Point Rounding
+      FPRound,
+
+      // Return
+      Ret,
+
+      // MAdd/Sub nodes
+      MAdd,
+      MAddu,
+      MSub,
+      MSubu,
+
+      // DivRem(u)
+      DivRem,
+      DivRemU,
+
+      BuildPairF64,
+      ExtractElementF64,
+
+      WrapperPIC,
+
+      DynAlloc
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Implementation
+  //===--------------------------------------------------------------------===//
+
+  class MipsTargetLowering : public TargetLowering  {
+  public:
+    explicit MipsTargetLowering(MipsTargetMachine &TM);
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    //  DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - get the ISD::SETCC result ValueType
+    MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  private:
+    // Subtarget Info
+    const MipsSubtarget *Subtarget;
+
+
+    // Lower Operand helpers
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    // Lower Operand specifics
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    // Inline asm support
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+              getRegForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                    unsigned Size, unsigned BinOpcode, bool Nand = false) const;
+    MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
+                    MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
+                    bool Nand = false) const;
+    MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+                                  MachineBasicBlock *BB, unsigned Size) const;
+    MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
+                                  MachineBasicBlock *BB, unsigned Size) const;
+  };
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
new file mode 100644
index 000000000000..021c16799779
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -0,0 +1,374 @@
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Mips FPU instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+// ------------------------
+// * 64bit fp:
+//    - 32 64-bit registers (default mode)
+//    - 16 even 32-bit registers (32-bit compatible mode) for
+//      single and double access.
+// * 32bit fp:
+//    - 16 even 32-bit registers - single and double (aliased)
+//    - 32 32-bit registers (within single-only mode)
+//===----------------------------------------------------------------------===//
+
+// Floating Point Compare and Branch
+def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
+                                            SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
+                                         SDTCisInt<2>]>;
+def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                          SDTCisSameAs<1, 2>]>;
+def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
+                                                SDTCisVT<1, i32>,
+                                                SDTCisSameAs<1, 2>]>;
+def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+                                                     SDTCisVT<1, f64>,
+                                                     SDTCisVT<0, i32>]>;
+
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
+def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+                          [SDNPHasChain, SDNPOptInGlue]>;
+def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
+def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",
+                                   SDT_MipsExtractElementF64>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printFCCOperand" in
+  def condcode : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+def In32BitMode      : Predicate<"!Subtarget.isFP64bit()">;
+def IsSingleFloat    : Predicate<"Subtarget.isSingleFloat()">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+def IsNotMipsI       : Predicate<"!Subtarget.isMips1()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//
+// A set of multiclasses is used to address the register usage.
+//
+// S32 - single precision in 16 32bit even fp registers
+//       single precision in 32 32bit fp registers in SingleOnly mode
+// S64 - single precision in 32 64bit fp registers (In64BitMode)
+// D32 - double precision in 16 32bit even fp registers
+// D64 - double precision in 32 64bit fp registers (In64BitMode)
+//
+// Only S32 and D32 are supported right now.
+//===----------------------------------------------------------------------===//
+
+multiclass FFR1_1<bits<6> funct, string asmstr>
+{
+  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+      !strconcat(asmstr, ".s\t$fd, $fs"), []>;
+
+  def _D32  : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs),
+      !strconcat(asmstr, ".d\t$fd, $fs"), []>, Requires<[In32BitMode]>;
+}
+
+multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
+{
+  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+                 !strconcat(asmstr, ".s\t$fd, $fs"),
+                 [(set FGR32:$fd, (FOp FGR32:$fs))]>;
+
+  def _D32  : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+                 !strconcat(asmstr, ".d\t$fd, $fs"),
+                 [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
+}
+
+class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
+              RegisterClass RcDst, string asmstr>:
+  FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
+      !strconcat(asmstr, "\t$fd, $fs"), []>;
+
+
+multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp, bit isComm = 0> {
+  let isCommutable = isComm in {
+  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
+                 (ins FGR32:$fs, FGR32:$ft),
+                 !strconcat(asmstr, ".s\t$fd, $fs, $ft"),
+                 [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
+
+  def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
+                 (ins AFGR64:$fs, AFGR64:$ft),
+                 !strconcat(asmstr, ".d\t$fd, $fs, $ft"),
+                 [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
+                 Requires<[In32BitMode]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+//===----------------------------------------------------------------------===//
+
+let ft = 0 in {
+  defm FLOOR_W : FFR1_1<0b001111, "floor.w">;
+  defm CEIL_W  : FFR1_1<0b001110, "ceil.w">;
+  defm ROUND_W : FFR1_1<0b001100, "round.w">;
+  defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
+  defm CVTW    : FFR1_1<0b100100, "cvt.w">;
+
+  defm FABS    : FFR1_2<0b000101, "abs",  fabs>;
+  defm FNEG    : FFR1_2<0b000111, "neg",  fneg>;
+  defm FSQRT   : FFR1_2<0b000100, "sqrt", fsqrt>;
+
+  /// Convert to Single Precison
+  def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32,  FGR32,  "cvt.s.w">;
+
+  let Predicates = [IsNotSingleFloat] in {
+    /// Ceil to long signed integer
+    def CEIL_LS   : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">;
+    def CEIL_LD   : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">;
+
+    /// Round to long signed integer
+    def ROUND_LS  : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">;
+    def ROUND_LD  : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">;
+
+    /// Floor to long signed integer
+    def FLOOR_LS  : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">;
+    def FLOOR_LD  : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">;
+
+    /// Trunc to long signed integer
+    def TRUNC_LS  : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">;
+    def TRUNC_LD  : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
+
+    /// Convert to long signed integer
+    def CVTL_S    : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
+    def CVTL_D    : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
+
+    /// Convert to Double Precison
+    def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
+    def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
+    def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
+
+    /// Convert to Single Precison
+    def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
+    def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
+  }
+}
+
+// The odd-numbered registers are only referenced when doing loads,
+// stores, and moves between floating-point and integer registers.
+// When defining instructions, we reference all 32-bit registers,
+// regardless of register aliasing.
+let fd = 0 in {
+  /// Move Control Registers From/To CPU Registers
+  def CFC1  : FFR<0x11, 0x0, 0x2, (outs CPURegs:$rt), (ins CCR:$fs),
+                  "cfc1\t$rt, $fs", []>;
+
+  def CTC1  : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
+                  "ctc1\t$fs, $rt", []>;
+
+  def MFC1  : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
+                  "mfc1\t$rt, $fs", []>;
+
+  def MTC1  : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
+                  "mtc1\t$rt, $fs", []>;
+}
+
+def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+                   "mov.s\t$fd, $fs", []>;
+def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+                   "mov.d\t$fd, $fs", []>;
+
+/// Floating Point Memory Instructions
+let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
+  def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
+                 "ldc1\t$ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
+
+  def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
+                 "sdc1\t$ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
+}
+
+// LWC1 and SWC1 can always be emitted with odd registers.
+def LWC1  : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1\t$ft, $addr",
+               [(set FGR32:$ft, (load addr:$addr))]>;
+def SWC1  : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr),
+               "swc1\t$ft, $addr", [(store FGR32:$ft, addr:$addr)]>;
+
+/// Floating-point Aritmetic
+defm FADD : FFR1_4<0x10, "add", fadd, 1>;
+defm FDIV : FFR1_4<0x03, "div", fdiv>;
+defm FMUL : FFR1_4<0x02, "mul", fmul, 1>;
+defm FSUB : FFR1_4<0x01, "sub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Branch Codes
+//===----------------------------------------------------------------------===//
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_BRANCH_F  : PatLeaf<(i32 0)>;
+def MIPS_BRANCH_T  : PatLeaf<(i32 1)>;
+def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
+def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
+
+/// Floating Point Branch of False/True (Likely)
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
+  class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
+        (ins brtarget:$dst), !strconcat(asmstr, "\t$dst"),
+        [(MipsFPBrcond op, bb:$dst)]>;
+
+def BC1F  : FBRANCH<MIPS_BRANCH_F,  "bc1f">;
+def BC1T  : FBRANCH<MIPS_BRANCH_T,  "bc1t">;
+def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
+def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Flag Conditions
+//===----------------------------------------------------------------------===//
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_FCOND_F    : PatLeaf<(i32 0)>;
+def MIPS_FCOND_UN   : PatLeaf<(i32 1)>;
+def MIPS_FCOND_OEQ  : PatLeaf<(i32 2)>;
+def MIPS_FCOND_UEQ  : PatLeaf<(i32 3)>;
+def MIPS_FCOND_OLT  : PatLeaf<(i32 4)>;
+def MIPS_FCOND_ULT  : PatLeaf<(i32 5)>;
+def MIPS_FCOND_OLE  : PatLeaf<(i32 6)>;
+def MIPS_FCOND_ULE  : PatLeaf<(i32 7)>;
+def MIPS_FCOND_SF   : PatLeaf<(i32 8)>;
+def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
+def MIPS_FCOND_SEQ  : PatLeaf<(i32 10)>;
+def MIPS_FCOND_NGL  : PatLeaf<(i32 11)>;
+def MIPS_FCOND_LT   : PatLeaf<(i32 12)>;
+def MIPS_FCOND_NGE  : PatLeaf<(i32 13)>;
+def MIPS_FCOND_LE   : PatLeaf<(i32 14)>;
+def MIPS_FCOND_NGT  : PatLeaf<(i32 15)>;
+
+/// Floating Point Compare
+let hasDelaySlot = 1, Defs=[FCR31] in {
+  def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
+                     "c.$cc.s\t$fs, $ft",
+                     [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
+
+  def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
+                     "c.$cc.d\t$fs, $ft",
+                     [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
+                     Requires<[In32BitMode]>;
+}
+
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:float
+let usesCustomInserter = 1, Constraints = "$F = $dst" in
+class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
+                   string instr_asm> :
+  FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F),
+      !strconcat(instr_asm, "\t$dst, $T, $cond"), []>;
+
+def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
+def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
+
+let Predicates = [In32BitMode] in {
+  def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
+  def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
+}
+
+defm : MovzPats<FGR32, MOVZ_S>;
+defm : MovnPats<FGR32, MOVN_S>;
+
+let Predicates = [In32BitMode] in {
+  defm : MovzPats<AFGR64, MOVZ_D>;
+  defm : MovnPats<AFGR64, MOVN_D>;
+}
+
+let usesCustomInserter = 1, Uses = [FCR31], Constraints = "$F = $dst" in {
+// flag:float, data:int
+class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> :
+  FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F),
+        !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+        [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>;
+
+// flag:float, data:float
+class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
+                  string instr_asm> :
+  FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F),
+         !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+         [(set RC:$dst, (cmov RC:$T, RC:$F))]>;
+}
+
+def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">;
+def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
+def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
+def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+
+let Predicates = [In32BitMode] in {
+  def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+  def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+                             "# MOVCCRToCCR", []>;
+
+// This pseudo instr gets expanded into 2 mtc1 instrs after register
+// allocation.
+def BuildPairF64 :
+  MipsPseudo<(outs AFGR64:$dst),
+             (ins CPURegs:$lo, CPURegs:$hi), "",
+             [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+
+// This pseudo instr gets expanded into 2 mfc1 instrs after register
+// allocation.
+// if n is 0, lower part of src is extracted.
+// if n is 1, higher part of src is extracted.
+def ExtractElementF64 :
+  MipsPseudo<(outs CPURegs:$dst),
+             (ins AFGR64:$src, i32imm:$n), "",
+             [(set CPURegs:$dst,
+               (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Patterns
+//===----------------------------------------------------------------------===//
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimm0neg : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
+def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>;
+
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
+
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
+def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
+
+def : Pat<(i32 (bitconvert FGR32:$src)),  (MFC1 FGR32:$src)>;
+def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
+
+let Predicates = [In32BitMode] in {
+  def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
+  def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
+}
+
+// MipsFPRound is only emitted for MipsI targets.
+def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>;
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 000000000000..9f55fb38b959
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,227 @@
+//===- MipsInstrFormats.td - Mips Instruction Formats ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Describe MIPS instructions format
+//
+//  CPU INSTRUCTION FORMATS
+//
+//  opcode  - operation code.
+//  rs      - src reg.
+//  rt      - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+//  rd      - dst reg, only used on 3 regs instr.
+//  shamt   - only used on shift instructions, contains the shift amount.
+//  funct   - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic Mips Format
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+               InstrItinClass itin>: Instruction
+{
+  field bits<32> Inst;
+
+  let Namespace = "Mips";
+
+  bits<6> opcode;
+
+  // Top 5 bits are the 'opcode' field
+  let Inst{31-26} = opcode;
+
+  dag OutOperandList = outs;
+  dag InOperandList  = ins;
+
+  let AsmString   = asmstr;
+  let Pattern     = pattern;
+  let Itinerary   = itin;
+}
+
+// Mips Pseudo Instructions Format
+class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+      MipsInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
+         list<dag> pattern, InstrItinClass itin>:
+      MipsInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<5>  shamt;
+  bits<6>  funct;
+
+  let opcode = op;
+  let funct  = _funct;
+
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = shamt;
+  let Inst{5-0}   = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5>  rt;
+  bits<5>  rs;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<26> addr;
+
+  let opcode = op;
+
+  let Inst{25-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//
+//  FLOATING POINT INSTRUCTION FORMATS
+//
+//  opcode  - operation code.
+//  fs      - src reg.
+//  ft      - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+//  fd      - dst reg, only used on 3 regs instr.
+//  fmt     - double or single precision.
+//  funct   - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
+//===----------------------------------------------------------------------===//
+
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+          string asmstr, list<dag> pattern> :
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  fd;
+  bits<5>  fs;
+  bits<5>  ft;
+  bits<5>  fmt;
+  bits<6>  funct;
+
+  let opcode = op;
+  let funct  = _funct;
+  let fmt    = _fmt;
+
+  let Inst{25-21} = fmt;
+  let Inst{20-16} = ft;
+  let Inst{15-11} = fs;
+  let Inst{10-6}  = fd;
+  let Inst{5-0}   = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  ft;
+  bits<5>  base;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{25-21} = base;
+  let Inst{20-16} = ft;
+  let Inst{15-0}  = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
+//===----------------------------------------------------------------------===//
+
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  fs;
+  bits<5>  ft;
+  bits<4>  cc;
+  bits<5>  fmt;
+
+  let opcode = 0x11;
+  let fmt    = _fmt;
+
+  let Inst{25-21} = fmt;
+  let Inst{20-16} = ft;
+  let Inst{15-11} = fs;
+  let Inst{10-6}  = 0;
+  let Inst{5-4}   = 0b11;
+  let Inst{3-0}   = cc;
+}
+
+
+class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
+            list<dag> pattern> :
+  MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<3>  N;
+  bits<1>  tf;
+
+  let opcode = 0;
+  let tf = _tf;
+
+  let Inst{25-21} = rs;
+  let Inst{20-18} = N;
+  let Inst{17} = 0;
+  let Inst{16} = tf;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = 1;
+}
+
+class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
+             list<dag> pattern> :
+  MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  fd;
+  bits<5>  fs;
+  bits<3>  N;
+  bits<5>  fmt;
+  bits<1>  tf;
+
+  let opcode = 17;
+  let fmt = _fmt;
+  let tf = _tf;
+
+  let Inst{25-21} = fmt;
+  let Inst{20-18} = N;
+  let Inst{17} = 0;
+  let Inst{16} = tf;
+  let Inst{15-11} = fs;
+  let Inst{10-6}  = fd;
+  let Inst{5-0}   = 17;
+}
+\ No newline at end of file
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 000000000000..0a7a7f2dfe4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,461 @@
+//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+  : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+
+const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { 
+  return RI;
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+  return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
+      (MI->getOpcode() == Mips::LDC1)) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
+      (MI->getOpcode() == Mips::SDC1)) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MipsInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+{
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(Mips::NOP));
+}
+
+void MipsInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+            MachineBasicBlock::iterator I, DebugLoc DL,
+            unsigned DestReg, unsigned SrcReg,
+            bool KillSrc) const {
+  bool DestCPU = Mips::CPURegsRegClass.contains(DestReg);
+  bool SrcCPU  = Mips::CPURegsRegClass.contains(SrcReg);
+
+  // CPU-CPU is the most common.
+  if (DestCPU && SrcCPU) {
+    BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  // Copy to CPU from other registers.
+  if (DestCPU) {
+    if (Mips::CCRRegClass.contains(SrcReg))
+      BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (Mips::FGR32RegClass.contains(SrcReg))
+      BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (SrcReg == Mips::HI)
+      BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg);
+    else if (SrcReg == Mips::LO)
+      BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg);
+    else
+      llvm_unreachable("Copy to CPU from invalid register");
+    return;
+  }
+
+  // Copy to other registers from CPU.
+  if (SrcCPU) {
+    if (Mips::CCRRegClass.contains(DestReg))
+      BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (Mips::FGR32RegClass.contains(DestReg))
+      BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (DestReg == Mips::HI)
+      BuildMI(MBB, I, DL, get(Mips::MTHI))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (DestReg == Mips::LO)
+      BuildMI(MBB, I, DL, get(Mips::MTLO))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else
+      llvm_unreachable("Copy from CPU to invalid register");
+    return;
+  }
+
+  if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (Mips::CCRRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  llvm_unreachable("Cannot copy registers");
+}
+
+void MipsInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == Mips::CPURegsRegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
+                                      .addFrameIndex(FI).addImm(0);
+  else if (RC == Mips::FGR32RegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
+                                        .addFrameIndex(FI).addImm(0);
+  else if (RC == Mips::AFGR64RegisterClass) {
+    if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+      BuildMI(MBB, I, DL, get(Mips::SDC1))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0);
+    } else {
+      const TargetRegisterInfo *TRI =
+        MBB.getParent()->getTarget().getRegisterInfo();
+      const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
+      BuildMI(MBB, I, DL, get(Mips::SWC1))
+        .addReg(SubSet[0], getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0);
+      BuildMI(MBB, I, DL, get(Mips::SWC1))
+        .addReg(SubSet[1], getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(4);
+    }
+  } else
+    llvm_unreachable("Register class not handled!");
+}
+
+void MipsInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const
+{
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == Mips::CPURegsRegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0);
+  else if (RC == Mips::FGR32RegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0);
+  else if (RC == Mips::AFGR64RegisterClass) {
+    if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+      BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0);
+    } else {
+      const TargetRegisterInfo *TRI =
+        MBB.getParent()->getTarget().getRegisterInfo();
+      const unsigned *SubSet = TRI->getSubRegisters(DestReg);
+      BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
+        .addFrameIndex(FI).addImm(0);
+      BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
+        .addFrameIndex(FI).addImm(4);
+    }
+  } else
+    llvm_unreachable("Register class not handled!");
+}
+
+MachineInstr*
+MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                        uint64_t Offset, const MDNode *MDPtr,
+                                        DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static unsigned GetAnalyzableBrOpc(unsigned Opc) {
+  return (Opc == Mips::BEQ  || Opc == Mips::BNE  || Opc == Mips::BGTZ ||
+          Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+          Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
+{
+  switch (Opc) {
+  default: llvm_unreachable("Illegal opcode!");
+  case Mips::BEQ  : return Mips::BNE;
+  case Mips::BNE  : return Mips::BEQ;
+  case Mips::BGTZ : return Mips::BLEZ;
+  case Mips::BGEZ : return Mips::BLTZ;
+  case Mips::BLTZ : return Mips::BGEZ;
+  case Mips::BLEZ : return Mips::BGTZ;
+  case Mips::BC1T : return Mips::BC1F;
+  case Mips::BC1F : return Mips::BC1T;
+  }
+}
+
+static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
+                          MachineBasicBlock *&BB,
+                          SmallVectorImpl<MachineOperand>& Cond) {
+  assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
+  int NumOp = Inst->getNumExplicitOperands();
+  
+  // for both int and fp branches, the last explicit operand is the
+  // MBB.
+  BB = Inst->getOperand(NumOp-1).getMBB();
+  Cond.push_back(MachineOperand::CreateImm(Opc));
+
+  for (int i=0; i<NumOp-1; i++)
+    Cond.push_back(Inst->getOperand(i));
+}
+
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                  MachineBasicBlock *&TBB,
+                                  MachineBasicBlock *&FBB,
+                                  SmallVectorImpl<MachineOperand> &Cond,
+                                  bool AllowModify) const
+{
+  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+  // Skip all the debug instructions.
+  while (I != REnd && I->isDebugValue())
+    ++I;
+
+  if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+    // If this block ends with no branches (it just falls through to its succ)
+    // just return false, leaving TBB/FBB null.
+    TBB = FBB = NULL;
+    return false;
+  }
+
+  MachineInstr *LastInst = &*I;
+  unsigned LastOpc = LastInst->getOpcode();
+
+  // Not an analyzable branch (must be an indirect jump).
+  if (!GetAnalyzableBrOpc(LastOpc))
+    return true;
+
+  // Get the second to last instruction in the block.
+  unsigned SecondLastOpc = 0;
+  MachineInstr *SecondLastInst = NULL;
+
+  if (++I != REnd) {
+    SecondLastInst = &*I;
+    SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+    // Not an analyzable branch (must be an indirect jump).
+    if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
+      return true;
+  }
+
+  // If there is only one terminator instruction, process it.
+  if (!SecondLastOpc) {
+    // Unconditional branch
+    if (LastOpc == Mips::J) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+
+    // Conditional branch
+    AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+    return false;
+  }
+
+  // If we reached here, there are two branches.
+  // If there are three terminators, we don't know what sort of block this is.
+  if (++I != REnd && isUnpredicatedTerminator(&*I))
+    return true;
+
+  // If second to last instruction is an unconditional branch,
+  // analyze it and remove the last instruction.
+  if (SecondLastOpc == Mips::J) {
+    // Return if the last instruction cannot be removed.
+    if (!AllowModify)
+      return true;
+
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    LastInst->eraseFromParent();
+    return false;
+  }
+
+  // Conditional branch followed by an unconditional branch.
+  // The last one must be unconditional.
+  if (LastOpc != Mips::J)
+    return true;
+
+  AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+  FBB = LastInst->getOperand(0).getMBB();
+
+  return false;
+} 
+  
+void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
+                                MachineBasicBlock *TBB, DebugLoc DL,
+                                const SmallVectorImpl<MachineOperand>& Cond)
+  const {
+  unsigned Opc = Cond[0].getImm();
+  const MCInstrDesc &MCID = get(Opc);
+  MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
+
+  for (unsigned i = 1; i < Cond.size(); ++i)
+    MIB.addReg(Cond[i].getReg());
+
+  MIB.addMBB(TBB);
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  // # of condition operands:
+  //  Unconditional branches: 0
+  //  Floating point branches: 1 (opc)
+  //  Int BranchZero: 2 (opc, reg)
+  //  Int Branch: 3 (opc, reg0, reg1)
+  assert((Cond.size() <= 3) &&
+         "# of Mips branch conditions must be <= 3!");
+
+  // Two-way Conditional branch.
+  if (FBB) {
+    BuildCondBr(MBB, TBB, DL, Cond);
+    BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+    return 2;
+  }
+
+  // One way branch.
+  // Unconditional branch.
+  if (Cond.empty())
+    BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+  else // Conditional branch.
+    BuildCondBr(MBB, TBB, DL, Cond);
+  return 1;
+}
+
+unsigned MipsInstrInfo::
+RemoveBranch(MachineBasicBlock &MBB) const
+{
+  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+  MachineBasicBlock::reverse_iterator FirstBr;
+  unsigned removed;
+
+  // Skip all the debug instructions.
+  while (I != REnd && I->isDebugValue())
+    ++I;
+
+  FirstBr = I;
+
+  // Up to 2 branches are removed.
+  // Note that indirect branches are not removed.
+  for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
+    if (!GetAnalyzableBrOpc(I->getOpcode()))
+      break;
+
+  MBB.erase(I.base(), FirstBr.base());
+
+  return removed;
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool MipsInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+  assert( (Cond.size() && Cond.size() <= 3) &&
+          "Invalid Mips branch condition!");
+  Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
+  return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+  unsigned GlobalBaseReg = MipsFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalBaseReg).addReg(Mips::GP);
+  RegInfo.addLiveIn(Mips::GP);
+
+  MipsFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 000000000000..4421c4862fa0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,160 @@
+//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MipsRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "MipsGenInstrInfo.inc"
+
+namespace llvm {
+
+namespace Mips {
+  /// GetOppositeBranchOpc - Return the inverse of the specified
+  /// opcode, e.g. turning BEQ to BNE.
+  unsigned GetOppositeBranchOpc(unsigned Opc);
+}
+
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // Mips Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_GOT - Represents the offset into the global offset table at which
+    /// the address the relocation entry symbol resides during execution.
+    MO_GOT,
+
+    /// MO_GOT_CALL - Represents the offset into the global offset table at
+    /// which the address of a call site relocation entry symbol resides
+    /// during execution. This is different from the above since this flag
+    /// can only be present in call instructions.
+    MO_GOT_CALL,
+
+    /// MO_GPREL - Represents the offset from the current gp value to be used
+    /// for the relocatable object file being produced.
+    MO_GPREL,
+
+    /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+    /// address.
+    MO_ABS_HI,
+    MO_ABS_LO,
+
+    /// MO_TLSGD - Represents the offset into the global offset table at which
+    // the module ID and TSL block offset reside during execution (General
+    // Dynamic TLS).
+    MO_TLSGD,
+
+    /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
+    // Exec TLS).
+    MO_GOTTPREL,
+
+    /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
+    // the thread pointer (Local Exec TLS).
+    MO_TPREL_HI,
+    MO_TPREL_LO
+  };
+}
+
+class MipsInstrInfo : public MipsGenInstrInfo {
+  MipsTargetMachine &TM;
+  const MipsRegisterInfo RI;
+public:
+  explicit MipsInstrInfo(MipsTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  /// Branch Analysis
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+private:
+  void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+                   const SmallVectorImpl<MachineOperand>& Cond) const;
+
+public:
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx, uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// Insert nop instruction when hazard condition is found
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 000000000000..d1a058712459
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,881 @@
+//===- MipsInstrInfo.td - Target Description for Mips Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsRet          : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MipsJmpLink      : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_MipsCMov         : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+                                                SDTCisSameAs<1, 2>,
+                                                SDTCisSameAs<3, 4>,
+                                                SDTCisInt<4>]>;
+def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_MipsMAddMSub     : SDTypeProfile<0, 4,
+                                         [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+                                          SDTCisSameAs<1, 2>,
+                                          SDTCisSameAs<2, 3>]>;
+def SDT_MipsDivRem       : SDTypeProfile<0, 2,
+                                         [SDTCisVT<0, i32>,
+                                          SDTCisSameAs<0, 1>]>;
+
+def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+
+def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                               SDTCisVT<1, iPTR>]>;
+
+// Call
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
+                         [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                          SDNPVariadic]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model. (nothing to do with Mips Registers Hi and Lo)
+def MipsHi    : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo    : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+
+// TlsGd node is used to handle General Dynamic TLS
+def MipsTlsGd : SDNode<"MipsISD::TlsGd", SDTIntUnaryOp>;
+
+// TprelHi and TprelLo nodes are used to handle Local Exec TLS
+def MipsTprelHi    : SDNode<"MipsISD::TprelHi", SDTIntUnaryOp>;
+def MipsTprelLo    : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
+
+// Thread pointer
+def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
+
+// Return
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+                     SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// MAdd*/MSub* nodes
+def MipsMAdd      : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAddu     : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSub      : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSubu     : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+
+// DivRem(u) nodes
+def MipsDivRem    : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
+                           [SDNPOutGlue]>;
+def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+                           [SDNPOutGlue]>;
+
+// Target constant nodes that are not part of any isel patterns and remain
+// unchanged can cause instructions with illegal operands to be emitted.
+// Wrapper node patterns give the instruction selector a chance to replace
+// target constant nodes that would otherwise remain unchanged with ADDiu
+// nodes. Without these wrapper node patterns, the following conditional move
+// instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
+// compiled: 
+//  movn  %got(d)($gp), %got(c)($gp), $4
+// This instruction is illegal since movn can take only register operands.
+
+def MipsWrapperPIC    : SDNode<"MipsISD::WrapperPIC",  SDTIntUnaryOp>;
+
+// Pointer to dynamically allocated stack area.
+def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
+                           [SDNPHasChain, SDNPInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasSEInReg  : Predicate<"Subtarget.hasSEInReg()">;
+def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
+def HasSwap     : Predicate<"Subtarget.hasSwap()">;
+def HasCondMov  : Predicate<"Subtarget.hasCondMov()">;
+def IsMips32    : Predicate<"Subtarget.isMips32()">;
+def IsMips32r2  : Predicate<"Subtarget.isMips32r2()">;
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget    : Operand<OtherVT>;
+def calltarget  : Operand<i32>;
+def simm16      : Operand<i32>;
+def shamt       : Operand<i32>;
+
+// Unsigned Operand
+def uimm16      : Operand<i32> {
+  let PrintMethod = "printUnsignedImm";
+}
+
+// Address operand
+def mem : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+def mem_ea : Operand<i32> {
+  let PrintMethod = "printMemOperandEA";
+  let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16  : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16  : PatLeaf<(imm), [{
+  if (N->getValueType(0) == MVT::i32)
+    return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+  else
+    return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+  return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic 3 register operands
+class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+             InstrItinClass itin, bit isComm = 0>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin> {
+  let isCommutable = isComm;
+}
+
+class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
+                     bit isComm = 0>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu> {
+  let isCommutable = isComm;
+}
+
+// Arithmetic 2 register operands
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
+
+// Arithmetic Multiply ADD/SUB
+let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
+class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
+  FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+     !strconcat(instr_asm, "\t$rs, $rt"),
+     [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
+  let isCommutable = isComm;
+}
+
+//  Logical
+let isCommutable = 1 in
+class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
+
+let isCommutable = 1 in
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+
+// Shifts
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
+                              SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> {
+  let rs = _rs;
+}
+
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
+                              SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> {
+  let shamt = _shamt;
+}
+
+// Load Upper Imediate
+class LoadUpper<bits<6> op, string instr_asm>:
+  FI< op,
+      (outs CPURegs:$dst),
+      (ins uimm16:$imm),
+      !strconcat(instr_asm, "\t$dst, $imm"),
+      [], IIAlu>;
+
+// Memory Load/Store
+let canFoldAsLoad = 1, hasDelaySlot = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
+  FI<op, (outs CPURegs:$dst), (ins mem:$addr),
+     !strconcat(instr_asm, "\t$dst, $addr"),
+     [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
+  FI<op, (outs), (ins CPURegs:$dst, mem:$addr),
+     !strconcat(instr_asm, "\t$dst, $addr"),
+     [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+
+// Conditional Branch
+let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
+  FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+     !strconcat(instr_asm, "\t$a, $b, $offset"),
+     [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+     IIBranch>;
+
+class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
+  FI<op, (outs), (ins CPURegs:$src, brtarget:$offset),
+     !strconcat(instr_asm, "\t$src, $offset"),
+     [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
+     IIBranch>;
+}
+
+// SetCC
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
+      PatFrag cond_op>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
+     IIAlu>;
+
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
+      Operand Od, PatLeaf imm_type>:
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+     IIAlu>;
+
+// Unconditional branch
+let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFJ<bits<6> op, string instr_asm>:
+  FJ<op, (outs), (ins brtarget:$target),
+     !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
+  FR<op, func, (outs), (ins CPURegs:$target),
+     !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>;
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1,
+  // All calls clobber the non-callee saved registers...
+  Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+          K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
+  class JumpLink<bits<6> op, string instr_asm>:
+    FJ<op, (outs), (ins calltarget:$target, variable_ops),
+       !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
+       IIBranch>;
+
+  let rd=31 in
+  class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
+    FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
+       !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+
+  class BranchLink<string instr_asm>:
+    FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
+       !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>;
+}
+
+// Mul, Div
+let Defs = [HI, LO] in {
+  let isCommutable = 1 in
+  class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+    FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+       !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+  class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+            FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+            !strconcat(instr_asm, "\t$$zero, $a, $b"),
+            [(op CPURegs:$a, CPURegs:$b)], itin>;
+}
+
+// Move from Hi/Lo
+class MoveFromLOHI<bits<6> func, string instr_asm>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins),
+     !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+
+class MoveToLOHI<bits<6> func, string instr_asm>:
+  FR<0x00, func, (outs), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+
+class EffectiveAddress<string instr_asm> :
+  FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr),
+     instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+
+// Count Leading Ones/Zeros in Word
+class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
+  FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+     Requires<[HasBitCount]> {
+  let shamt = 0;
+  let rt = rd;
+}
+
+// Sign Extend in Register.
+class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
+  FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"),
+     [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+
+// Byte Swap
+class ByteSwap<bits<6> func, string instr_asm>:
+  FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"),
+     [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
+
+// Conditional Move
+class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
+     CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
+     [], NoItinerary>;
+
+// Read Hardware
+class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$dst), (ins HWRegs:$src),
+    "rdhwr\t$dst, $src", [], IIAlu> {
+  let rs = 0;
+  let shamt = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+                                  "!ADJCALLSTACKDOWN $amt",
+                                  [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+                                  "!ADJCALLSTACKUP $amt1",
+                                  [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO     : MipsPseudo<(outs), (ins), ".set\tmacro",     []>;
+def REORDER   : MipsPseudo<(outs), (ins), ".set\treorder",   []>;
+def NOMACRO   : MipsPseudo<(outs), (ins), ".set\tnomacro",   []>;
+def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
+
+// These macros are inserted to prevent GAS from complaining
+// when using the AT register.
+def NOAT      : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
+def ATMACRO   : MipsPseudo<(outs), (ins), ".set\tat", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
+
+let usesCustomInserter = 1 in {
+  def ATOMIC_LOAD_ADD_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_add_8\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_ADD_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_add_16\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_ADD_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_add_32\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+  def ATOMIC_LOAD_SUB_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_sub_8\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_SUB_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_sub_16\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_SUB_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_sub_32\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+  def ATOMIC_LOAD_AND_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_and_8\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_AND_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_and_16\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_AND_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_and_32\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+  def ATOMIC_LOAD_OR_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_or_8\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_OR_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_or_16\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_OR_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_or_32\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+  def ATOMIC_LOAD_XOR_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_xor_8\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_XOR_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_xor_16\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_XOR_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_xor_32\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+  def ATOMIC_LOAD_NAND_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_nand_8\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_NAND_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_nand_16\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>;
+  def ATOMIC_LOAD_NAND_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+    "atomic_load_nand_32\t$dst, $ptr, $incr",
+    [(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>;
+
+  def ATOMIC_SWAP_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
+    "atomic_swap_8\t$dst, $ptr, $val",
+    [(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>;
+  def ATOMIC_SWAP_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
+    "atomic_swap_16\t$dst, $ptr, $val",
+    [(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>;
+  def ATOMIC_SWAP_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
+    "atomic_swap_32\t$dst, $ptr, $val",
+    [(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>;
+
+  def ATOMIC_CMP_SWAP_I8 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
+    "atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval",
+    [(set CPURegs:$dst,
+         (atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+  def ATOMIC_CMP_SWAP_I16 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
+    "atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval",
+    [(set CPURegs:$dst,
+         (atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+  def ATOMIC_CMP_SWAP_I32 : MipsPseudo<
+    (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
+    "atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval",
+    [(set CPURegs:$dst,
+         (atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsI Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDiu   : ArithI<0x09, "addiu", add, simm16, immSExt16>;
+def ADDi    : ArithOverflowI<0x08, "addi",  add, simm16, immSExt16>;
+def SLTi    : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
+def SLTiu   : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>;
+def ANDi    : LogicI<0x0c, "andi", and>;
+def ORi     : LogicI<0x0d, "ori",  or>;
+def XORi    : LogicI<0x0e, "xori",  xor>;
+def LUi     : LoadUpper<0x0f, "lui">;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def ADDu    : ArithR<0x00, 0x21, "addu", add, IIAlu, 1>;
+def SUBu    : ArithR<0x00, 0x23, "subu", sub, IIAlu>;
+def ADD     : ArithOverflowR<0x00, 0x20, "add", 1>;
+def SUB     : ArithOverflowR<0x00, 0x22, "sub">;
+def SLT     : SetCC_R<0x00, 0x2a, "slt", setlt>;
+def SLTu    : SetCC_R<0x00, 0x2b, "sltu", setult>;
+def AND     : LogicR<0x24, "and", and>;
+def OR      : LogicR<0x25, "or",  or>;
+def XOR     : LogicR<0x26, "xor", xor>;
+def NOR     : LogicNOR<0x00, 0x27, "nor">;
+
+/// Shift Instructions
+def SLL     : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
+def SRL     : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
+def SRA     : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
+def SLLV    : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
+def SRLV    : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
+def SRAV    : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+
+// Rotate Instructions
+let Predicates = [IsMips32r2] in {
+    def ROTR    : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
+    def ROTRV   : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+}
+
+/// Load and Store Instructions
+def LB      : LoadM<0x20, "lb",  sextloadi8>;
+def LBu     : LoadM<0x24, "lbu", zextloadi8>;
+def LH      : LoadM<0x21, "lh",  sextloadi16>;
+def LHu     : LoadM<0x25, "lhu", zextloadi16>;
+def LW      : LoadM<0x23, "lw",  load>;
+def SB      : StoreM<0x28, "sb", truncstorei8>;
+def SH      : StoreM<0x29, "sh", truncstorei16>;
+def SW      : StoreM<0x2b, "sw", store>;
+
+/// Load-linked, Store-conditional
+let hasDelaySlot = 1 in
+  def LL    : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr),
+              "ll\t$dst, $addr", [], IILoad>;
+let Constraints = "$src = $dst" in
+  def SC    : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr),
+              "sc\t$src, $addr", [], IIStore>;
+
+/// Jump and Branch Instructions
+def J       : JumpFJ<0x02, "j">;
+def JR      : JumpFR<0x00, 0x08, "jr">;
+def JAL     : JumpLink<0x03, "jal">;
+def JALR    : JumpLinkReg<0x00, 0x09, "jalr">;
+def BEQ     : CBranch<0x04, "beq", seteq>;
+def BNE     : CBranch<0x05, "bne", setne>;
+
+let rt=1 in
+  def BGEZ  : CBranchZero<0x01, "bgez", setge>;
+
+let rt=0 in {
+  def BGTZ  : CBranchZero<0x07, "bgtz", setgt>;
+  def BLEZ  : CBranchZero<0x07, "blez", setle>;
+  def BLTZ  : CBranchZero<0x01, "bltz", setlt>;
+}
+
+def BGEZAL  : BranchLink<"bgezal">;
+def BLTZAL  : BranchLink<"bltzal">;
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+    isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in
+  def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
+                "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
+
+/// Multiply and Divide Instructions.
+def MULT    : Mul<0x18, "mult", IIImul>;
+def MULTu   : Mul<0x19, "multu", IIImul>;
+def SDIV    : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV    : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+
+let Defs = [HI] in
+  def MTHI  : MoveToLOHI<0x11, "mthi">;
+let Defs = [LO] in
+  def MTLO  : MoveToLOHI<0x13, "mtlo">;
+
+let Uses = [HI] in
+  def MFHI  : MoveFromLOHI<0x10, "mfhi">;
+let Uses = [LO] in
+  def MFLO  : MoveFromLOHI<0x12, "mflo">;
+
+/// Sign Ext In Register Instructions.
+let Predicates = [HasSEInReg] in {
+  let shamt = 0x10, rs = 0 in
+    def SEB : SignExtInReg<0x21, "seb", i8>;
+
+  let shamt = 0x18, rs = 0 in
+    def SEH : SignExtInReg<0x20, "seh", i16>;
+}
+
+/// Count Leading
+def CLZ : CountLeading<0b100000, "clz",
+                       [(set CPURegs:$dst, (ctlz CPURegs:$src))]>;
+def CLO : CountLeading<0b100001, "clo",
+                       [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>;
+
+/// Byte Swap
+let Predicates = [HasSwap] in {
+  let shamt = 0x3, rs = 0 in
+    def WSBW : ByteSwap<0x20, "wsbw">;
+}
+
+/// Conditional Move
+def MIPS_CMOV_ZERO  : PatLeaf<(i32 0)>;
+def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:int
+let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in
+  class CondMovIntInt<bits<6> funct, string instr_asm> :
+    FR<0, funct, (outs CPURegs:$dst),
+       (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F),
+       !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>;
+
+def MOVZ_I : CondMovIntInt<0x0a, "movz">;
+def MOVN_I : CondMovIntInt<0x0b, "movn">;
+
+/// No operation
+let addr=0 in
+  def NOP   : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+
+// FrameIndexes are legalized when they are operands from load/store
+// instructions. The same not happens for stack address copies, so an
+// add op with mem ComplexPattern is used and the stack address copy
+// can be matched. It's similar to Sparc LEA_ADDRi
+def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">;
+
+// DynAlloc node points to dynamically allocated stack space.
+// $sp is added to the list of implicitly used registers to prevent dead code
+// elimination from removing instructions that modify $sp.
+let Uses = [SP] in
+def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">;
+
+// MADD*/MSUB*
+def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
+def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
+def MSUB  : MArithR<4, "msub", MipsMSub>;
+def MSUBU : MArithR<5, "msubu", MipsMSubu>;
+
+// MUL is a assembly macro in the current used ISAs. In recent ISA's
+// it is a real instruction.
+def MUL   : ArithR<0x1c, 0x02, "mul", mul, IIImul, 1>, Requires<[IsMips32]>;
+
+def RDHWR : ReadHardware;
+
+//===----------------------------------------------------------------------===//
+//  Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 immSExt16:$in),
+          (ADDiu ZERO, imm:$in)>;
+def : Pat<(i32 immZExt16:$in),
+          (ORi ZERO, imm:$in)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm),
+          (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Carry patterns
+def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
+          (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
+          (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc  CPURegs:$src, immSExt16:$imm),
+          (ADDiu CPURegs:$src, imm:$imm)>;
+
+// Call
+def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+          (JAL tglobaladdr:$dst)>;
+def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+          (JAL texternalsym:$dst)>;
+//def : Pat<(MipsJmpLink CPURegs:$dst),
+//          (JALR CPURegs:$dst)>;
+
+// hi/lo relocs
+def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+          (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+          (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
+
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+          (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+          (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+
+// gp_rel relocs
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+          (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+          (ADDiu CPURegs:$gp, tconstpool:$in)>;
+
+// tlsgd
+def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
+          (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
+
+// tprel hi/lo
+def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
+          (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
+
+// wrapper_pic
+class WrapperPICPat<SDNode node>:
+      Pat<(MipsWrapperPIC node:$in),
+          (ADDiu GP, node:$in)>;
+
+def : WrapperPICPat<tglobaladdr>;
+def : WrapperPICPat<tconstpool>;
+def : WrapperPICPat<texternalsym>;
+def : WrapperPICPat<tblockaddress>;
+def : WrapperPICPat<tjumptable>;
+
+// Mips does not have "not", so we expand our way
+def : Pat<(not CPURegs:$in),
+          (NOR CPURegs:$in, ZERO)>;
+
+// extended load and stores
+def : Pat<(extloadi1  addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi8  addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>;
+
+// peepholes
+def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+
+// brcond patterns
+def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst),
+          (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
+def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst),
+          (BEQ CPURegs:$lhs, ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+          (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+          (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond CPURegs:$cond, bb:$dst),
+          (BNE CPURegs:$cond, ZERO, bb:$dst)>;
+
+// select patterns
+multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
+  def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
+  def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
+  def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+  def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+  def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F),
+            (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
+  def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
+            (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
+  def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F),
+            (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+defm : MovzPats<CPURegs, MOVZ_I>;
+defm : MovnPats<CPURegs, MOVN_I>;
+
+// setcc patterns
+def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
+          (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs),
+          (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>;
+def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>;
+
+def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs),
+          (SLT CPURegs:$rhs, CPURegs:$lhs)>;
+def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs),
+          (SLTu CPURegs:$rhs, CPURegs:$lhs)>;
+
+def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>;
+
+def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
+          (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
+          (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFPU.td"
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
new file mode 100644
index 000000000000..f5cc3aa25f1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -0,0 +1,118 @@
+//===-- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCInstLower.h"
+#include "MipsAsmPrinter.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
+                                 MipsAsmPrinter &asmprinter)
+  : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
+
+MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                              MachineOperandType MOTy) const {
+  MipsMCSymbolRefExpr::VariantKind Kind;
+  const MCSymbol *Symbol;
+  int Offset = 0;
+
+  switch(MO.getTargetFlags()) {
+  default:                  assert(0 && "Invalid target flag!");
+  case MipsII::MO_NO_FLAG:  Kind = MipsMCSymbolRefExpr::VK_Mips_None; break;
+  case MipsII::MO_GPREL:    Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break;
+  case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+  case MipsII::MO_GOT:      Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break;
+  case MipsII::MO_ABS_HI:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break;
+  case MipsII::MO_ABS_LO:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break;
+  case MipsII::MO_TLSGD:    Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break;
+  case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+  case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+  case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+  }
+
+  switch (MOTy) {
+    case MachineOperand::MO_MachineBasicBlock:
+      Symbol = MO.getMBB()->getSymbol();
+      break;
+
+    case MachineOperand::MO_GlobalAddress:
+      Symbol = Mang->getSymbol(MO.getGlobal());
+      break;
+
+    case MachineOperand::MO_BlockAddress:
+      Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+      break;
+
+    case MachineOperand::MO_ExternalSymbol:
+      Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+      break;
+
+    case MachineOperand::MO_JumpTableIndex:
+      Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+      break;
+
+    case MachineOperand::MO_ConstantPoolIndex:
+      Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+      if (MO.getOffset())
+        Offset = MO.getOffset();  
+      break;
+
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+  
+  return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset,
+                                                           Ctx));
+}
+
+void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    MCOperand MCOp;
+    MachineOperandType MOTy = MO.getType();
+
+    switch (MOTy) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+    case MachineOperand::MO_JumpTableIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, MOTy);
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
new file mode 100644
index 000000000000..ec5201be7f6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
@@ -0,0 +1,43 @@
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCINSTLOWER_H
+#define MIPSMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineFunction;
+  class Mangler;
+  class MipsAsmPrinter;
+  
+/// MipsMCInstLower - This class is used to lower an MachineInstr into an
+//                    MCInst.
+class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
+  typedef MachineOperand::MachineOperandType MachineOperandType;
+  MCContext &Ctx;
+  Mangler *Mang;
+  MipsAsmPrinter &AsmPrinter;
+public:
+  MipsMCInstLower(Mangler *mang, const MachineFunction &MF,
+                  MipsAsmPrinter &asmprinter);  
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+private:
+  MCOperand LowerSymbolOperand(const MachineOperand &MO,
+                               MachineOperandType MOTy) const;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
new file mode 100644
index 000000000000..9a2bdae0e339
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
@@ -0,0 +1,63 @@
+//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mipsmcsymbolrefexpr"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+const MipsMCSymbolRefExpr*
+MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
+                            int Offset, MCContext &Ctx) {
+  return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
+}
+
+void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: assert(0 && "Invalid kind!");
+  case VK_Mips_None:     break;
+  case VK_Mips_GPREL:    OS << "%gp_rel("; break;
+  case VK_Mips_GOT_CALL: OS << "%call16("; break;
+  case VK_Mips_GOT:      OS << "%got(";    break;
+  case VK_Mips_ABS_HI:   OS << "%hi(";     break;
+  case VK_Mips_ABS_LO:   OS << "%lo(";     break;
+  case VK_Mips_TLSGD:    OS << "%tlsgd(";  break;
+  case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
+  case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
+  case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+  }
+
+  OS << *Symbol;
+
+  if (Offset) {
+    if (Offset > 0)
+      OS << '+';
+    OS << Offset;
+  }
+
+  if (Kind != VK_Mips_None)
+    OS << ')';
+}
+
+bool
+MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                              const MCAsmLayout *Layout) const {
+  return false;
+}
+
+void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
+  Asm->getOrCreateSymbolData(*Symbol);
+}
+
+const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
+  return Symbol->isDefined() ? &Symbol->getSection() : NULL;
+}
+  
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
new file mode 100644
index 000000000000..3e695963709e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h
@@ -0,0 +1,62 @@
+//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCSYMBOLREFEXPR_H
+#define MIPSMCSYMBOLREFEXPR_H
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class MipsMCSymbolRefExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_Mips_None,
+    VK_Mips_GPREL,
+    VK_Mips_GOT_CALL,
+    VK_Mips_GOT,
+    VK_Mips_ABS_HI,
+    VK_Mips_ABS_LO,
+    VK_Mips_TLSGD,
+    VK_Mips_GOTTPREL,
+    VK_Mips_TPREL_HI,
+    VK_Mips_TPREL_LO
+  };
+
+private:
+  const VariantKind Kind;
+  const MCSymbol *Symbol;
+  int Offset;
+
+  explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
+                               int _Offset)
+    : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
+  
+public:
+  static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
+                                           const MCSymbol *Symbol, int Offset,
+                                           MCContext &Ctx);
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+  static bool classof(const MipsMCSymbolRefExpr *) { return true; }
+
+  int getOffset() const { return Offset; }
+  void setOffset(int O) { Offset = O; }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 000000000000..dbb7a6744224
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,114 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include <utility>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+
+private:
+  MachineFunction& MF;
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+  // Range of frame object indices.
+  // InArgFIRange: Range of indices of all frame objects created during call to
+  //               LowerFormalArguments.
+  // OutArgFIRange: Range of indices of all frame objects created during call to
+  //                LowerCall except for the frame object for restoring $gp. 
+  std::pair<int, int> InArgFIRange, OutArgFIRange;
+  int GPFI; // Index of the frame object for restoring $gp 
+  mutable int DynAllocFI; // Frame index of dynamically allocated stack area.   
+  unsigned MaxCallFrameSize;
+
+  /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
+  /// intrinsics, it is necessary to use a temporary stack location.
+  /// This field holds the frame index of this location.
+  int AtomicFrameIndex;
+public:
+  MipsFunctionInfo(MachineFunction& MF)
+  : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
+    VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
+    OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
+    MaxCallFrameSize(0), AtomicFrameIndex(-1)
+  {}
+
+  bool isInArgFI(int FI) const {
+    return FI <= InArgFIRange.first && FI >= InArgFIRange.second;
+  }
+  void setLastInArgFI(int FI) { InArgFIRange.second = FI; }
+
+  bool isOutArgFI(int FI) const { 
+    return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second;
+  }
+  void extendOutArgFIRange(int FirstFI, int LastFI) {
+    if (!OutArgFIRange.second)
+      // this must be the first time this function was called.
+      OutArgFIRange.first = FirstFI;
+    OutArgFIRange.second = LastFI;
+  }
+
+  int getGPFI() const { return GPFI; }
+  void setGPFI(int FI) { GPFI = FI; }
+  bool needGPSaveRestore() const { return getGPFI(); }
+  bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
+
+  // The first call to this function creates a frame object for dynamically
+  // allocated stack area.
+  int getDynAllocFI() const {
+    if (!DynAllocFI)
+      DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
+
+    return DynAllocFI;
+  }
+  bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+  unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
+  void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+
+  int getAtomicFrameIndex() const { return AtomicFrameIndex; }
+  void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MIPS_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 000000000000..24390daff75c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,278 @@
+//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsRegisterInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
+                                   const TargetInstrInfo &tii)
+  : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// Mips::RA, return the number that it corresponds to (e.g. 31).
+unsigned MipsRegisterInfo::
+getRegisterNumbering(unsigned RegEnum)
+{
+  switch (RegEnum) {
+    case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
+    case Mips::AT   : case Mips::F1 : return 1;
+    case Mips::V0   : case Mips::F2 : case Mips::D1 : return 2;
+    case Mips::V1   : case Mips::F3 : return 3;
+    case Mips::A0   : case Mips::F4 : case Mips::D2 : return 4;
+    case Mips::A1   : case Mips::F5 : return 5;
+    case Mips::A2   : case Mips::F6 : case Mips::D3 : return 6;
+    case Mips::A3   : case Mips::F7 : return 7;
+    case Mips::T0   : case Mips::F8 : case Mips::D4 : return 8;
+    case Mips::T1   : case Mips::F9 : return 9;
+    case Mips::T2   : case Mips::F10: case Mips::D5: return 10;
+    case Mips::T3   : case Mips::F11: return 11;
+    case Mips::T4   : case Mips::F12: case Mips::D6: return 12;
+    case Mips::T5   : case Mips::F13: return 13;
+    case Mips::T6   : case Mips::F14: case Mips::D7: return 14;
+    case Mips::T7   : case Mips::F15: return 15;
+    case Mips::S0   : case Mips::F16: case Mips::D8: return 16;
+    case Mips::S1   : case Mips::F17: return 17;
+    case Mips::S2   : case Mips::F18: case Mips::D9: return 18;
+    case Mips::S3   : case Mips::F19: return 19;
+    case Mips::S4   : case Mips::F20: case Mips::D10: return 20;
+    case Mips::S5   : case Mips::F21: return 21;
+    case Mips::S6   : case Mips::F22: case Mips::D11: return 22;
+    case Mips::S7   : case Mips::F23: return 23;
+    case Mips::T8   : case Mips::F24: case Mips::D12: return 24;
+    case Mips::T9   : case Mips::F25: return 25;
+    case Mips::K0   : case Mips::F26: case Mips::D13: return 26;
+    case Mips::K1   : case Mips::F27: return 27;
+    case Mips::GP   : case Mips::F28: case Mips::D14: return 28;
+    case Mips::SP   : case Mips::F29: return 29;
+    case Mips::FP   : case Mips::F30: case Mips::D15: return 30;
+    case Mips::RA   : case Mips::F31: return 31;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// Mips Callee Saved Registers
+const unsigned* MipsRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const
+{
+  // Mips callee-save register range is $16-$23, $f20-$f30
+  static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
+    Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
+    Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20,
+    Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
+    Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
+  };
+
+  static const unsigned Mips32CalleeSavedRegs[] = {
+    Mips::D15, Mips::D14, Mips::D13, Mips::D12, Mips::D11, Mips::D10,
+    Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
+    Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
+  };
+
+  if (Subtarget.isSingleFloat())
+    return SingleFloatOnlyCalleeSavedRegs;
+  else
+    return Mips32CalleeSavedRegs;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(Mips::ZERO);
+  Reserved.set(Mips::AT);
+  Reserved.set(Mips::K0);
+  Reserved.set(Mips::K1);
+  Reserved.set(Mips::GP);
+  Reserved.set(Mips::SP);
+  Reserved.set(Mips::FP);
+  Reserved.set(Mips::RA);
+  Reserved.set(Mips::F31);
+  Reserved.set(Mips::D15);
+
+  // SRV4 requires that odd register can't be used.
+  if (!Subtarget.isSingleFloat() && !Subtarget.isMips32())
+    for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
+      Reserved.set(FReg);
+
+  return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                    RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  unsigned i = 0;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+        errs() << "<--------->\n" << MI);
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int stackSize  = MF.getFrameInfo()->getStackSize();
+  int spOffset   = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+  DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+               << "spOffset   : " << spOffset << "\n"
+               << "stackSize  : " << stackSize << "\n");
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  // The following stack frame objects are always referenced relative to $sp:
+  //  1. Outgoing arguments.
+  //  2. Pointer to dynamically allocated stack space.
+  //  3. Locations for callee-saved registers.
+  // Everything else is referenced relative to whatever register 
+  // getFrameRegister() returns.
+  unsigned FrameReg;
+
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
+      (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+    FrameReg = Mips::SP;
+  else
+    FrameReg = getFrameRegister(MF); 
+  
+  // Calculate final offset.
+  // - There is no need to change the offset if the frame object is one of the
+  //   following: an outgoing argument, pointer to a dynamically allocated
+  //   stack space or a $gp restore location,
+  // - If the frame object is any of the following, its offset must be adjusted
+  //   by adding the size of the stack:
+  //   incoming argument, callee-saved register location or local variable.  
+  int Offset;
+
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
+      MipsFI->isDynAllocFI(FrameIndex))
+    Offset = spOffset;
+  else
+    Offset = spOffset + stackSize;
+
+  Offset    += MI.getOperand(i+1).getImm();
+
+  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+  // field. 
+  if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) {
+    MachineBasicBlock &MBB = *MI.getParent();
+    DebugLoc DL = II->getDebugLoc();
+    int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) +
+                ((Offset & 0x8000) != 0);
+
+    // FIXME: change this when mips goes MC".
+    BuildMI(MBB, II, DL, TII.get(Mips::NOAT));
+    BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi);
+    BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg)
+                                                       .addReg(Mips::AT);
+    FrameReg = Mips::AT;
+    Offset = (short)(Offset & 0xffff);
+
+    BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+  }
+
+  MI.getOperand(i).ChangeToRegister(FrameReg, false);
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+unsigned MipsRegisterInfo::
+getRARegister() const {
+  return Mips::RA;
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int MipsRegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return MipsGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+  return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 000000000000..646369b5966f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,73 @@
+//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "MipsGenRegisterInfo.inc"
+
+namespace llvm {
+class MipsSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct MipsRegisterInfo : public MipsGenRegisterInfo {
+  const MipsSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+
+  MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
+
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// Mips::RA, return the number that it corresponds to (e.g. 31).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+
+  /// Get PIC indirect call register
+  static unsigned getPICCallReg();
+
+  /// Adjust the Mips stack frame.
+  void adjustMipsStackFrame(MachineFunction &MF) const;
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  /// Stack Frame Processing Methods
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  /// Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  /// Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 000000000000..f0db518b754b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,198 @@
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MipsReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "Mips";
+}
+
+class MipsRegWithSubRegs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  field bits<5> Num;
+  let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
+  let Num = num;
+}
+
+// Mips 32-bit FPU Registers
+class FPR<bits<5> num, string n> : MipsReg<n> {
+  let Num = num;
+}
+
+// Mips 64-bit (aliased) FPU Registers
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd  : SubRegIndex;
+}
+class AFPR<bits<5> num, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<n, subregs> {
+  let Num = num;
+  let SubRegIndices = [sub_fpeven, sub_fpodd];
+}
+
+// Mips Hardware Registers
+class HWR<bits<5> num, string n> : MipsReg<n> {
+  let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Mips" in {
+
+  // General Purpose Registers
+  def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
+  def AT   : MipsGPRReg< 1, "AT">,   DwarfRegNum<[1]>;
+  def V0   : MipsGPRReg< 2, "2">,    DwarfRegNum<[2]>;
+  def V1   : MipsGPRReg< 3, "3">,    DwarfRegNum<[3]>;
+  def A0   : MipsGPRReg< 4, "4">,    DwarfRegNum<[4]>;
+  def A1   : MipsGPRReg< 5, "5">,    DwarfRegNum<[5]>;
+  def A2   : MipsGPRReg< 6, "6">,    DwarfRegNum<[6]>;
+  def A3   : MipsGPRReg< 7, "7">,    DwarfRegNum<[7]>;
+  def T0   : MipsGPRReg< 8, "8">,    DwarfRegNum<[8]>;
+  def T1   : MipsGPRReg< 9, "9">,    DwarfRegNum<[9]>;
+  def T2   : MipsGPRReg< 10, "10">,  DwarfRegNum<[10]>;
+  def T3   : MipsGPRReg< 11, "11">,  DwarfRegNum<[11]>;
+  def T4   : MipsGPRReg< 12, "12">,  DwarfRegNum<[12]>;
+  def T5   : MipsGPRReg< 13, "13">,  DwarfRegNum<[13]>;
+  def T6   : MipsGPRReg< 14, "14">,  DwarfRegNum<[14]>;
+  def T7   : MipsGPRReg< 15, "15">,  DwarfRegNum<[15]>;
+  def S0   : MipsGPRReg< 16, "16">,  DwarfRegNum<[16]>;
+  def S1   : MipsGPRReg< 17, "17">,  DwarfRegNum<[17]>;
+  def S2   : MipsGPRReg< 18, "18">,  DwarfRegNum<[18]>;
+  def S3   : MipsGPRReg< 19, "19">,  DwarfRegNum<[19]>;
+  def S4   : MipsGPRReg< 20, "20">,  DwarfRegNum<[20]>;
+  def S5   : MipsGPRReg< 21, "21">,  DwarfRegNum<[21]>;
+  def S6   : MipsGPRReg< 22, "22">,  DwarfRegNum<[22]>;
+  def S7   : MipsGPRReg< 23, "23">,  DwarfRegNum<[23]>;
+  def T8   : MipsGPRReg< 24, "24">,  DwarfRegNum<[24]>;
+  def T9   : MipsGPRReg< 25, "25">,  DwarfRegNum<[25]>;
+  def K0   : MipsGPRReg< 26, "26">,  DwarfRegNum<[26]>;
+  def K1   : MipsGPRReg< 27, "27">,  DwarfRegNum<[27]>;
+  def GP   : MipsGPRReg< 28, "GP">,  DwarfRegNum<[28]>;
+  def SP   : MipsGPRReg< 29, "SP">,  DwarfRegNum<[29]>;
+  def FP   : MipsGPRReg< 30, "FP">,  DwarfRegNum<[30]>;
+  def RA   : MipsGPRReg< 31, "RA">,  DwarfRegNum<[31]>;
+
+  /// Mips Single point precision FPU Registers
+  def F0  : FPR< 0,  "F0">, DwarfRegNum<[32]>;
+  def F1  : FPR< 1,  "F1">, DwarfRegNum<[33]>;
+  def F2  : FPR< 2,  "F2">, DwarfRegNum<[34]>;
+  def F3  : FPR< 3,  "F3">, DwarfRegNum<[35]>;
+  def F4  : FPR< 4,  "F4">, DwarfRegNum<[36]>;
+  def F5  : FPR< 5,  "F5">, DwarfRegNum<[37]>;
+  def F6  : FPR< 6,  "F6">, DwarfRegNum<[38]>;
+  def F7  : FPR< 7,  "F7">, DwarfRegNum<[39]>;
+  def F8  : FPR< 8,  "F8">, DwarfRegNum<[40]>;
+  def F9  : FPR< 9,  "F9">, DwarfRegNum<[41]>;
+  def F10 : FPR<10, "F10">, DwarfRegNum<[42]>;
+  def F11 : FPR<11, "F11">, DwarfRegNum<[43]>;
+  def F12 : FPR<12, "F12">, DwarfRegNum<[44]>;
+  def F13 : FPR<13, "F13">, DwarfRegNum<[45]>;
+  def F14 : FPR<14, "F14">, DwarfRegNum<[46]>;
+  def F15 : FPR<15, "F15">, DwarfRegNum<[47]>;
+  def F16 : FPR<16, "F16">, DwarfRegNum<[48]>;
+  def F17 : FPR<17, "F17">, DwarfRegNum<[49]>;
+  def F18 : FPR<18, "F18">, DwarfRegNum<[50]>;
+  def F19 : FPR<19, "F19">, DwarfRegNum<[51]>;
+  def F20 : FPR<20, "F20">, DwarfRegNum<[52]>;
+  def F21 : FPR<21, "F21">, DwarfRegNum<[53]>;
+  def F22 : FPR<22, "F22">, DwarfRegNum<[54]>;
+  def F23 : FPR<23, "F23">, DwarfRegNum<[55]>;
+  def F24 : FPR<24, "F24">, DwarfRegNum<[56]>;
+  def F25 : FPR<25, "F25">, DwarfRegNum<[57]>;
+  def F26 : FPR<26, "F26">, DwarfRegNum<[58]>;
+  def F27 : FPR<27, "F27">, DwarfRegNum<[59]>;
+  def F28 : FPR<28, "F28">, DwarfRegNum<[60]>;
+  def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
+  def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
+  def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
+
+  /// Mips Double point precision FPU Registers (aliased
+  /// with the single precision to hold 64 bit values)
+  def D0  : AFPR< 0,  "F0", [F0,   F1]>;
+  def D1  : AFPR< 2,  "F2", [F2,   F3]>;
+  def D2  : AFPR< 4,  "F4", [F4,   F5]>;
+  def D3  : AFPR< 6,  "F6", [F6,   F7]>;
+  def D4  : AFPR< 8,  "F8", [F8,   F9]>;
+  def D5  : AFPR<10, "F10", [F10, F11]>;
+  def D6  : AFPR<12, "F12", [F12, F13]>;
+  def D7  : AFPR<14, "F14", [F14, F15]>;
+  def D8  : AFPR<16, "F16", [F16, F17]>;
+  def D9  : AFPR<18, "F18", [F18, F19]>;
+  def D10 : AFPR<20, "F20", [F20, F21]>;
+  def D11 : AFPR<22, "F22", [F22, F23]>;
+  def D12 : AFPR<24, "F24", [F24, F25]>;
+  def D13 : AFPR<26, "F26", [F26, F27]>;
+  def D14 : AFPR<28, "F28", [F28, F29]>;
+  def D15 : AFPR<30, "F30", [F30, F31]>;
+
+  // Hi/Lo registers
+  def HI  : Register<"hi">, DwarfRegNum<[64]>;
+  def LO  : Register<"lo">, DwarfRegNum<[65]>;
+
+  // Status flags register
+  def FCR31 : Register<"31">;
+
+  // Hardware register $29
+  def HWR29 : Register<"29">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"Mips", [i32], 32, (add
+  // Return Values and Arguments
+  V0, V1, A0, A1, A2, A3,
+  // Not preserved across procedure calls
+  T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+  // Callee save
+  S0, S1, S2, S3, S4, S5, S6, S7,
+  // Reserved
+  ZERO, AT, K0, K1, GP, SP, FP, RA)>;
+
+// 64bit fp:
+// * FGR64  - 32 64-bit registers
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+//
+// 32bit fp:
+// * FGR32 - 16 32-bit even registers
+// * FGR32 - 32 32-bit registers (single float only mode)
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
+  // Return Values and Arguments
+  D0, D1, D6, D7,
+  // Not preserved across procedure calls
+  D2, D3, D4, D5, D8, D9,
+  // Callee save
+  D10, D11, D12, D13, D14,
+  // Reserved
+  D15)> {
+  let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
+}
+
+// Condition Register for floating point operations
+def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
+
+// Hi/Lo Registers
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
+
+// Hardware registers
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
new file mode 100644
index 000000000000..00be8ee94431
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -0,0 +1,63 @@
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across Mips chips sets. Based on GCC/Mips backend files.
+//===----------------------------------------------------------------------===//
+def ALU     : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Mips
+//===----------------------------------------------------------------------===//
+def IIAlu              : InstrItinClass;
+def IILoad             : InstrItinClass;
+def IIStore            : InstrItinClass;
+def IIXfer             : InstrItinClass;
+def IIBranch           : InstrItinClass;
+def IIHiLo             : InstrItinClass;
+def IIImul             : InstrItinClass;
+def IIIdiv             : InstrItinClass;
+def IIFcvt             : InstrItinClass;
+def IIFmove            : InstrItinClass;
+def IIFcmp             : InstrItinClass;
+def IIFadd             : InstrItinClass;
+def IIFmulSingle       : InstrItinClass;
+def IIFmulDouble       : InstrItinClass;
+def IIFdivSingle       : InstrItinClass;
+def IIFdivDouble       : InstrItinClass;
+def IIFsqrtSingle      : InstrItinClass;
+def IIFsqrtDouble      : InstrItinClass;
+def IIFrecipFsqrtStep  : InstrItinClass;
+def IIPseudo           : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Mips Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
+  InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIXfer             , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIHiLo             , [InstrStage<1,  [IMULDIV]>]>,
+  InstrItinData<IIImul             , [InstrStage<17, [IMULDIV]>]>,
+  InstrItinData<IIIdiv             , [InstrStage<38, [IMULDIV]>]>,
+  InstrItinData<IIFcvt             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIFmove            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIFcmp             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIFadd             , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<IIFmulSingle       , [InstrStage<7,  [ALU]>]>,
+  InstrItinData<IIFmulDouble       , [InstrStage<8,  [ALU]>]>,
+  InstrItinData<IIFdivSingle       , [InstrStage<23, [ALU]>]>,
+  InstrItinData<IIFdivDouble       , [InstrStage<36, [ALU]>]>,
+  InstrItinData<IIFsqrtSingle      , [InstrStage<54, [ALU]>]>,
+  InstrItinData<IIFsqrtDouble      , [InstrStage<12, [ALU]>]>,
+  InstrItinData<IIFrecipFsqrtStep  , [InstrStage<5,  [ALU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..e4d70fcec591
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-selectiondag-info"
+#include "MipsTargetMachine.h"
+using namespace llvm;
+
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
new file mode 100644
index 000000000000..6cafb558b35a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSELECTIONDAGINFO_H
+#define MIPSSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MipsTargetMachine;
+
+class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
+  ~MipsSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 000000000000..6eee3333d584
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,62 @@
+//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MipsGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
+                             const std::string &FS, bool little) :
+  MipsGenSubtargetInfo(TT, CPU, FS),
+  MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
+  IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
+  HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
+  HasSwap(false), HasBitCount(false)
+{
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "mips1";
+  MipsArchVersion = Mips1;
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
+
+  // Is the target system Linux ?
+  if (TT.find("linux") == std::string::npos)
+    IsLinux = false;
+
+  // When only the target triple is specified and is
+  // a allegrex target, set the features. We also match
+  // big and little endian allegrex cores (dont really
+  // know if a big one exists)
+  if (TT.find("mipsallegrex") != std::string::npos ||
+      TT.find("psp") != std::string::npos) {
+    MipsABI = EABI;
+    IsSingleFloat = true;
+    MipsArchVersion = Mips2;
+    HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet)
+    HasSEInReg = true;
+    HasBitCount = true;
+    HasSwap = true;
+    HasCondMov = true;
+  }
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 000000000000..533d4afe073e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,128 @@
+//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MipsGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class MipsSubtarget : public MipsGenSubtargetInfo {
+
+public:
+  enum MipsABIEnum {
+    O32, O64, N32, N64, EABI
+  };
+
+protected:
+
+  enum MipsArchEnum {
+    Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
+  };
+
+  // Mips architecture version
+  MipsArchEnum MipsArchVersion;
+
+  // Mips supported ABIs
+  MipsABIEnum MipsABI;
+
+  // IsLittle - The target is Little Endian
+  bool IsLittle;
+
+  // IsSingleFloat - The target only supports single precision float
+  // point operations. This enable the target to use all 32 32-bit
+  // floating point registers instead of only using even ones.
+  bool IsSingleFloat;
+
+  // IsFP64bit - The target processor has 64-bit floating point registers.
+  bool IsFP64bit;
+
+  // IsFP64bit - General-purpose registers are 64 bits wide
+  bool IsGP64bit;
+
+  // HasVFPU - Processor has a vector floating point unit.
+  bool HasVFPU;
+
+  // isLinux - Target system is Linux. Is false we consider ELFOS for now.
+  bool IsLinux;
+
+  /// Features related to the presence of specific instructions.
+
+  // HasSEInReg - SEB and SEH (signext in register) instructions.
+  bool HasSEInReg;
+
+  // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
+  bool HasCondMov;
+
+  // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
+  // instructions.
+  bool HasMulDivAdd;
+
+  // HasMinMax - MIN and MAX instructions.
+  bool HasMinMax;
+
+  // HasSwap - Byte and half swap instructions.
+  bool HasSwap;
+
+  // HasBitCount - Count leading '1' and '0' bits.
+  bool HasBitCount;
+
+  InstrItineraryData InstrItins;
+
+public:
+
+  /// Only O32 and EABI supported right now.
+  bool isABI_EABI() const { return MipsABI == EABI; }
+  bool isABI_O32() const { return MipsABI == O32; }
+  unsigned getTargetABI() const { return MipsABI; }
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  MipsSubtarget(const std::string &TT, const std::string &CPU,
+                const std::string &FS, bool little);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool isMips1() const { return MipsArchVersion == Mips1; }
+  bool isMips32() const { return MipsArchVersion >= Mips32; }
+  bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
+
+  bool isLittle() const { return IsLittle; }
+  bool isFP64bit() const { return IsFP64bit; }
+  bool isGP64bit() const { return IsGP64bit; }
+  bool isGP32bit() const { return !IsGP64bit; }
+  bool isSingleFloat() const { return IsSingleFloat; }
+  bool isNotSingleFloat() const { return !IsSingleFloat; }
+  bool hasVFPU() const { return HasVFPU; }
+  bool isLinux() const { return IsLinux; }
+
+  /// Features related to the presence of specific instructions.
+  bool hasSEInReg()   const { return HasSEInReg; }
+  bool hasCondMov()   const { return HasCondMov; }
+  bool hasMulDivAdd() const { return HasMulDivAdd; }
+  bool hasMinMax()    const { return HasMinMax; }
+  bool hasSwap()      const { return HasSwap; }
+  bool hasBitCount()  const { return HasBitCount; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 000000000000..20b9f4ea3853
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,88 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMipsTarget() {
+  // Register the target.
+  RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
+  RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+// Using CodeModel::Large enables different CALL behavior.
+MipsTargetMachine::
+MipsTargetMachine(const Target &T, const std::string &TT,
+                  const std::string &CPU, const std::string &FS,
+                  bool isLittle=false):
+  LLVMTargetMachine(T, TT, CPU, FS),
+  Subtarget(TT, CPU, FS, isLittle),
+  DataLayout(isLittle ? 
+             std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+             std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+  InstrInfo(*this),
+  FrameLowering(Subtarget),
+  TLInfo(*this), TSInfo(*this) {
+  // Abicall enables PIC by default
+  if (getRelocationModel() == Reloc::Default) {
+    if (Subtarget.isABI_O32())
+      setRelocationModel(Reloc::PIC_);
+    else
+      setRelocationModel(Reloc::Static);
+  }
+}
+
+MipselTargetMachine::
+MipselTargetMachine(const Target &T, const std::string &TT,
+                    const std::string &CPU, const std::string &FS) :
+  MipsTargetMachine(T, TT, CPU, FS, true) {}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+  PM.add(createMipsISelDag(*this));
+  return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MipsTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+  PM.add(createMipsDelaySlotFillerPass(*this));
+  return true;
+}
+
+bool MipsTargetMachine::
+addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+  PM.add(createMipsEmitGPRestorePass(*this));
+  return true;
+}
+
+bool MipsTargetMachine::
+addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+  PM.add(createMipsExpandPseudoPass(*this));
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 000000000000..a021af2ff16d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,82 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsISelLowering.h"
+#include "MipsFrameLowering.h"
+#include "MipsSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class formatted_raw_ostream;
+
+  class MipsTargetMachine : public LLVMTargetMachine {
+    MipsSubtarget       Subtarget;
+    const TargetData    DataLayout; // Calculates type size & alignment
+    MipsInstrInfo       InstrInfo;
+    MipsFrameLowering   FrameLowering;
+    MipsTargetLowering  TLInfo;
+    MipsSelectionDAGInfo TSInfo;
+  public:
+    MipsTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &CPU, const std::string &FS,
+                      bool isLittle);
+
+    virtual const MipsInstrInfo   *getInstrInfo()     const
+    { return &InstrInfo; }
+    virtual const TargetFrameLowering *getFrameLowering()     const
+    { return &FrameLowering; }
+    virtual const MipsSubtarget   *getSubtargetImpl() const
+    { return &Subtarget; }
+    virtual const TargetData      *getTargetData()    const
+    { return &DataLayout;}
+
+    virtual const MipsRegisterInfo *getRegisterInfo()  const {
+      return &InstrInfo.getRegisterInfo();
+    }
+
+    virtual const MipsTargetLowering *getTargetLowering() const {
+      return &TLInfo;
+    }
+
+    virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+      return &TSInfo;
+    }
+
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+    virtual bool addPreEmitPass(PassManagerBase &PM,
+                                CodeGenOpt::Level OptLevel);
+    virtual bool addPreRegAlloc(PassManagerBase &PM,
+                                CodeGenOpt::Level OptLevel);
+    virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+  };
+
+/// MipselTargetMachine - Mipsel target machine.
+///
+class MipselTargetMachine : public MipsTargetMachine {
+public:
+  MipselTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &CPU, const std::string &FS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
new file mode 100644
index 000000000000..cf5d1b58addd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -0,0 +1,102 @@
+//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+            cl::desc("Small data and bss section threshold size (default=8)"),
+            cl::init(8));
+
+void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  SmallDataSection =
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+
+  SmallBSSSection =
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= SSThreshold;
+}
+
+bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+                                                const TargetMachine &TM) const {
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+
+  // Only use small section for non linux targets.
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+  if (Subtarget.isLinux())
+    return false;
+
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  // We can only do this for datarel or BSS objects for now.
+  if (!Kind.isBSS() && !Kind.isDataRel())
+    return false;
+
+  // If this is a internal constant string, there is a special
+  // section for it, but not in small data/bss.
+  if (Kind.isMergeable1ByteCString())
+    return false;
+
+  const Type *Ty = GV->getType()->getElementType();
+  return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+
+
+const MCSection *MipsTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+  // sections?
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
new file mode 100644
index 000000000000..c394a9dc02e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+  class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSection *SmallDataSection;
+    const MCSection *SmallBSSSection;
+  public:
+
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM, SectionKind Kind)const;
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;
+
+    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+
+    // TODO: Classify globals as mips wishes.
+  };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
new file mode 100644
index 000000000000..a8d6fe94b1ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+
+extern "C" void LLVMInitializeMipsTargetInfo() {
+  RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
+
+  RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
+}
diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
new file mode 100644
index 000000000000..331266da30b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LLVM_TARGET_DEFINITIONS PTX.td)
+
+tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(PTXGenDAGISel.inc -gen-dag-isel)
+tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
+tablegen(PTXGenInstrNames.inc -gen-instr-enums)
+tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
+tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PTXGenRegisterNames.inc -gen-register-enums)
+tablegen(PTXGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PTXCodeGen
+  PTXAsmPrinter.cpp
+  PTXISelDAGToDAG.cpp
+  PTXISelLowering.cpp
+  PTXInstrInfo.cpp
+  PTXFrameLowering.cpp
+  PTXMCAsmInfo.cpp
+  PTXMCAsmStreamer.cpp
+  PTXMFInfoExtract.cpp
+  PTXRegisterInfo.cpp
+  PTXSubtarget.cpp
+  PTXTargetMachine.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..df0f63fdba60
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPTXDesc
+  PTXMCTargetDesc.cpp
+  PTXMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..35f5a7b2e6ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
new file mode 100644
index 000000000000..efefead5341d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -0,0 +1,35 @@
+//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::ptx64)
+    PointerSize = 8;
+
+  CommentString = "//";
+
+  PrivateGlobalPrefix = "$L__";
+
+  AllowPeriodsInName = false;
+
+  HasSetDirective = false;
+
+  HasDotTypeDotSizeDirective = false;
+
+  HasSingleParameterDotFile = false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
new file mode 100644
index 000000000000..03f5d66b3d60
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MCASM_INFO_H
+#define PTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct PTXMCAsmInfo : public MCAsmInfo {
+    explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+} // namespace llvm
+
+#endif // PTX_MCASM_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
new file mode 100644
index 000000000000..23f70bd13787
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCTargetDesc.h"
+#include "PTXMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPTXMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitPTXMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializePTXMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+}
+
+static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitPTXMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializePTXMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
+                                          createPTXMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
+                                          createPTXMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializePTXMCAsmInfo() {
+  RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+  RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
new file mode 100644
index 000000000000..1003b0b5ece9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXMCTARGETDESC_H
+#define PTXMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePTX32Target;
+extern Target ThePTX64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "PTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PTXGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h
new file mode 100644
index 000000000000..28cab2429c81
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.h
@@ -0,0 +1,48 @@
+//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_H
+#define PTX_H
+
+#include "MCTargetDesc/PTXMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class PTXTargetMachine;
+  class FunctionPass;
+
+  namespace PTX {
+    enum StateSpace {
+      GLOBAL = 0, // default to global state space
+      CONSTANT = 1,
+      LOCAL = 2,
+      PARAMETER = 3,
+      SHARED = 4
+    };
+
+    enum Predicate {
+      PRED_NORMAL = 0,
+      PRED_NEGATE = 1
+    };
+  } // namespace PTX
+
+  FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
+                                 CodeGenOpt::Level OptLevel);
+
+  FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel);
+
+} // namespace llvm;
+
+#endif // PTX_H
diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td
new file mode 100644
index 000000000000..f6fbe9fffc6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.td
@@ -0,0 +1,135 @@
+//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+                                     "Do not demote .f64 to .f32">;
+
+def FeatureNoFMA  : SubtargetFeature<"no-fma","SupportsFMA", "false",
+                                     "Disable Fused-Multiply Add">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+                                    "Use PTX Language Version 2.0">;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+                                    "Use PTX Language Version 2.1">;
+
+def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
+                                    "Use PTX Language Version 2.2">;
+
+def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
+                                    "Use PTX Language Version 2.3">;
+
+//===- PTX Target ---------------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
+                                   "Use Shader Model 1.0">;
+def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
+                                   "Use Shader Model 1.1">;
+def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
+                                   "Use Shader Model 1.2">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
+                                   "Use Shader Model 1.3">;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
+                                   "Use Shader Model 2.0">;
+def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
+                                   "Use Shader Model 2.1">;
+def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
+                                   "Use Shader Model 2.2">;
+def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
+                                   "Use Shader Model 2.3">;
+
+def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
+                                        "PTX_COMPUTE_1_0",
+                                        "Use Compute Compatibility 1.0">;
+def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
+                                        "PTX_COMPUTE_1_1",
+                                        "Use Compute Compatibility 1.1">;
+def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
+                                        "PTX_COMPUTE_1_2",
+                                        "Use Compute Compatibility 1.2">;
+def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
+                                        "PTX_COMPUTE_1_3",
+                                        "Use Compute Compatibility 1.3">;
+def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
+                                        "PTX_COMPUTE_2_0",
+                                        "Use Compute Compatibility 2.0">;
+
+//===----------------------------------------------------------------------===//
+// PTX supported processors
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+  : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+// Processor definitions for compute/shader models
+def : Proc<"compute_10", [FeatureCOMPUTE10]>;
+def : Proc<"compute_11", [FeatureCOMPUTE11]>;
+def : Proc<"compute_12", [FeatureCOMPUTE12]>;
+def : Proc<"compute_13", [FeatureCOMPUTE13]>;
+def : Proc<"compute_20", [FeatureCOMPUTE20]>;
+def : Proc<"sm_10",      [FeatureSM10]>;
+def : Proc<"sm_11",      [FeatureSM11]>;
+def : Proc<"sm_12",      [FeatureSM12]>;
+def : Proc<"sm_13",      [FeatureSM13]>;
+def : Proc<"sm_20",      [FeatureSM20]>;
+def : Proc<"sm_21",      [FeatureSM21]>;
+def : Proc<"sm_22",      [FeatureSM22]>;
+def : Proc<"sm_23",      [FeatureSM23]>;
+
+// Processor definitions for common GPU architectures
+def : Proc<"g80",        [FeatureSM10]>;
+def : Proc<"gt200",      [FeatureSM13]>;
+def : Proc<"gf100",      [FeatureSM20, FeatureDouble]>;
+def : Proc<"fermi",      [FeatureSM20, FeatureDouble]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PTXRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PTXCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrInfo.td"
+
+def PTXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def PTX : Target {
+  let InstructionSet = PTXInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
new file mode 100644
index 000000000000..2848d5460eee
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -0,0 +1,605 @@
+//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-asm-printer"
+
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class PTXAsmPrinter : public AsmPrinter {
+public:
+  explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {}
+
+  const char *getPassName() const { return "PTX Assembly Printer"; }
+
+  bool doFinalization(Module &M);
+
+  virtual void EmitStartOfAsmFile(Module &M);
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual void EmitFunctionBodyStart();
+  virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
+
+  virtual void EmitInstruction(const MachineInstr *MI);
+
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                       const char *Modifier = 0);
+  void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                         const char *Modifier = 0);
+  void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                          const char *Modifier = 0); 
+  void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
+
+  unsigned GetOrCreateSourceID(StringRef FileName,
+                               StringRef DirName);
+
+  // autogen'd.
+  void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+  static const char *getRegisterName(unsigned RegNo);
+
+private:
+  void EmitVariableDeclaration(const GlobalVariable *gv);
+  void EmitFunctionDeclaration();
+
+  StringMap<unsigned> SourceIdMap;
+}; // class PTXAsmPrinter
+} // namespace
+
+static const char PARAM_PREFIX[] = "__param_";
+static const char RETURN_PREFIX[] = "__ret_";
+
+static const char *getRegisterTypeName(unsigned RegNo) {
+#define TEST_REGCLS(cls, clsstr)                \
+  if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+  TEST_REGCLS(RegPred, pred);
+  TEST_REGCLS(RegI16, b16);
+  TEST_REGCLS(RegI32, b32);
+  TEST_REGCLS(RegI64, b64);
+  TEST_REGCLS(RegF32, b32);
+  TEST_REGCLS(RegF64, b64);
+#undef TEST_REGCLS
+
+  llvm_unreachable("Not in any register class!");
+  return NULL;
+}
+
+static const char *getStateSpaceName(unsigned addressSpace) {
+  switch (addressSpace) {
+  default: llvm_unreachable("Unknown state space");
+  case PTX::GLOBAL:    return "global";
+  case PTX::CONSTANT:  return "const";
+  case PTX::LOCAL:     return "local";
+  case PTX::PARAMETER: return "param";
+  case PTX::SHARED:    return "shared";
+  }
+  return NULL;
+}
+
+static const char *getTypeName(const Type* type) {
+  while (true) {
+    switch (type->getTypeID()) {
+      default: llvm_unreachable("Unknown type");
+      case Type::FloatTyID: return ".f32";
+      case Type::DoubleTyID: return ".f64";
+      case Type::IntegerTyID:
+        switch (type->getPrimitiveSizeInBits()) {
+          default: llvm_unreachable("Unknown integer bit-width");
+          case 16: return ".u16";
+          case 32: return ".u32";
+          case 64: return ".u64";
+        }
+      case Type::ArrayTyID:
+      case Type::PointerTyID:
+        type = dyn_cast<const SequentialType>(type)->getElementType();
+        break;
+    }
+  }
+  return NULL;
+}
+
+bool PTXAsmPrinter::doFinalization(Module &M) {
+  // XXX Temproarily remove global variables so that doFinalization() will not
+  // emit them again (global variables are emitted at beginning).
+
+  Module::GlobalListType &global_list = M.getGlobalList();
+  int i, n = global_list.size();
+  GlobalVariable **gv_array = new GlobalVariable* [n];
+
+  // first, back-up GlobalVariable in gv_array
+  i = 0;
+  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+       I != E; ++I)
+    gv_array[i++] = &*I;
+
+  // second, empty global_list
+  while (!global_list.empty())
+    global_list.remove(global_list.begin());
+
+  // call doFinalization
+  bool ret = AsmPrinter::doFinalization(M);
+
+  // now we restore global variables
+  for (i = 0; i < n; i ++)
+    global_list.insert(global_list.end(), gv_array[i]);
+
+  delete[] gv_array;
+  return ret;
+}
+
+void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+  const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+  OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+  OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+                                (ST.supportsDouble() ? ""
+                                                     : ", map_f64_to_f32")));
+  // .address_size directive is optional, but it must immediately follow
+  // the .target directive if present within a module
+  if (ST.supportsPTX23()) {
+    std::string addrSize = ST.is64Bit() ? "64" : "32";
+    OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+  }
+
+  OutStreamer.AddBlankLine();
+
+  // Define any .file directives
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(M);
+
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+       E = DbgFinder.compile_unit_end(); I != E; ++I) {
+    DICompileUnit DIUnit(*I);
+    StringRef FN = DIUnit.getFilename();
+    StringRef Dir = DIUnit.getDirectory();
+    GetOrCreateSourceID(FN, Dir);
+  }
+
+  OutStreamer.AddBlankLine();
+
+  // declare global variables
+  for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
+       i != e; ++i)
+    EmitVariableDeclaration(i);
+}
+
+bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+  EmitFunctionDeclaration();
+  EmitFunctionBody();
+  return false;
+}
+
+void PTXAsmPrinter::EmitFunctionBodyStart() {
+  OutStreamer.EmitRawText(Twine("{"));
+
+  const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+  // Print local variable definition
+  for (PTXMachineFunctionInfo::reg_iterator
+       i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
+    unsigned reg = *i;
+
+    std::string def = "\t.reg .";
+    def += getRegisterTypeName(reg);
+    def += ' ';
+    def += getRegisterName(reg);
+    def += ';';
+    OutStreamer.EmitRawText(Twine(def));
+  }
+
+  const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+  DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+               << " frame object(s)\n");
+  for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+    DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+    if (FrameInfo->getObjectSize(i) > 0) {
+      std::string def = "\t.reg .b";
+      def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+      def += " s";
+      def += utostr(i);
+      def += ";";
+      OutStreamer.EmitRawText(Twine(def));
+    }
+  }
+}
+
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  std::string str;
+  str.reserve(64);
+
+  raw_string_ostream OS(str);
+
+  DebugLoc DL = MI->getDebugLoc();
+  if (!DL.isUnknown()) {
+
+    const MDNode *S = DL.getScope(MF->getFunction()->getContext());
+
+    // This is taken from DwarfDebug.cpp, which is conveniently not a public
+    // LLVM class.
+    StringRef Fn;
+    StringRef Dir;
+    unsigned Src = 1;
+    if (S) {
+      DIDescriptor Scope(S);
+      if (Scope.isCompileUnit()) {
+        DICompileUnit CU(S);
+        Fn = CU.getFilename();
+        Dir = CU.getDirectory();
+      } else if (Scope.isFile()) {
+        DIFile F(S);
+        Fn = F.getFilename();
+        Dir = F.getDirectory();
+      } else if (Scope.isSubprogram()) {
+        DISubprogram SP(S);
+        Fn = SP.getFilename();
+        Dir = SP.getDirectory();
+      } else if (Scope.isLexicalBlock()) {
+        DILexicalBlock DB(S);
+        Fn = DB.getFilename();
+        Dir = DB.getDirectory();
+      } else
+        assert(0 && "Unexpected scope info");
+
+      Src = GetOrCreateSourceID(Fn, Dir);
+    }
+    OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(),
+                                     0, 0, 0, Fn);
+
+    const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc();
+
+    OS << "\t.loc ";
+    OS << utostr(MDL.getFileNum());
+    OS << " ";
+    OS << utostr(MDL.getLine());
+    OS << " ";
+    OS << utostr(MDL.getColumn());
+    OS << "\n";
+  }
+
+
+  // Emit predicate
+  printPredicateOperand(MI, OS);
+
+  // Write instruction to str
+  printInstruction(MI, OS);
+  OS << ';';
+  OS.flush();
+
+  StringRef strref = StringRef(str);
+  OutStreamer.EmitRawText(strref);
+}
+
+void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                 raw_ostream &OS) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+    default:
+      llvm_unreachable("<unknown operand type>");
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      OS << *Mang->getSymbol(MO.getGlobal());
+      break;
+    case MachineOperand::MO_Immediate:
+      OS << (long) MO.getImm();
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      OS << *MO.getMBB()->getSymbol();
+      break;
+    case MachineOperand::MO_Register:
+      OS << getRegisterName(MO.getReg());
+      break;
+    case MachineOperand::MO_FPImmediate:
+      APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
+      bool  isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
+      // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
+      if (isFloat) {
+        OS << "0F";
+      }
+      else {
+        OS << "0D";
+      }
+      // Emit the encoded floating-point value.
+      if (constFP.getZExtValue() > 0) {
+        OS << constFP.toString(16, false);
+      }
+      else {
+        OS << "00000000";
+        // If We have a double-precision zero, pad to 8-bytes.
+        if (!isFloat) {
+          OS << "00000000";
+        }
+      }
+      break;
+  }
+}
+
+void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                    raw_ostream &OS, const char *Modifier) {
+  printOperand(MI, opNum, OS);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return; // don't print "+0"
+
+  OS << "+";
+  printOperand(MI, opNum+1, OS);
+}
+
+void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &OS, const char *Modifier) {
+  OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
+void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
+                                       raw_ostream &OS, const char *Modifier) {
+  OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
+void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (EmitSpecialLLVMGlobal(gv))
+    return;
+
+  MCSymbol *gvsym = Mang->getSymbol(gv);
+
+  assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
+
+  std::string decl;
+
+  // check if it is defined in some other translation unit
+  if (gv->isDeclaration())
+    decl += ".extern ";
+
+  // state space: e.g., .global
+  decl += ".";
+  decl += getStateSpaceName(gv->getType()->getAddressSpace());
+  decl += " ";
+
+  // alignment (optional)
+  unsigned alignment = gv->getAlignment();
+  if (alignment != 0) {
+    decl += ".align ";
+    decl += utostr(Log2_32(gv->getAlignment()));
+    decl += " ";
+  }
+
+
+  if (PointerType::classof(gv->getType())) {
+    const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType());
+    const Type* elementTy = pointerTy->getElementType();
+
+    decl += ".b8 ";
+    decl += gvsym->getName();
+    decl += "[";
+
+    if (elementTy->isArrayTy())
+    {
+      assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
+
+      const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
+      elementTy = arrayTy->getElementType();
+
+      unsigned numElements = arrayTy->getNumElements();
+
+      while (elementTy->isArrayTy()) {
+
+        arrayTy = dyn_cast<const ArrayType>(elementTy);
+        elementTy = arrayTy->getElementType();
+
+        numElements *= arrayTy->getNumElements();
+      }
+
+      // FIXME: isPrimitiveType() == false for i16?
+      assert(elementTy->isSingleValueType() &&
+              "Non-primitive types are not handled");
+
+      // Compute the size of the array, in bytes.
+      uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
+                        * numElements;
+
+      decl += utostr(arraySize);
+    }
+
+    decl += "]";
+
+    // handle string constants (assume ConstantArray means string)
+
+    if (gv->hasInitializer())
+    {
+      const Constant *C = gv->getInitializer();  
+      if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
+      {
+        decl += " = {";
+
+        for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+        {
+          if (i > 0)   decl += ",";
+
+          decl += "0x" +
+                utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+        }
+
+        decl += "}";
+      }
+    }
+  }
+  else {
+    // Note: this is currently the fall-through case and most likely generates
+    //       incorrect code.
+    decl += getTypeName(gv->getType());
+    decl += " ";
+
+    decl += gvsym->getName();
+
+    if (ArrayType::classof(gv->getType()) ||
+        PointerType::classof(gv->getType()))
+      decl += "[]";
+  }
+
+  decl += ";";
+
+  OutStreamer.EmitRawText(Twine(decl));
+
+  OutStreamer.AddBlankLine();
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration() {
+  // The function label could have already been emitted if two symbols end up
+  // conflicting due to asm renaming.  Detect this and emit an error.
+  if (!CurrentFnSym->isUndefined()) {
+    report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+                       "' label emitted multiple times to assembly file");
+    return;
+  }
+
+  const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+  const bool isKernel = MFI->isKernel();
+  const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+  std::string decl = isKernel ? ".entry" : ".func";
+
+  unsigned cnt = 0;
+
+  if (!isKernel) {
+    decl += " (";
+    for (PTXMachineFunctionInfo::ret_iterator
+         i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
+         i != e; ++i) {
+      if (i != b) {
+        decl += ", ";
+      }
+      decl += ".reg .";
+      decl += getRegisterTypeName(*i);
+      decl += " ";
+      decl += getRegisterName(*i);
+    }
+    decl += ")";
+  }
+
+  // Print function name
+  decl += " ";
+  decl += CurrentFnSym->getName().str();
+
+  decl += " (";
+
+  cnt = 0;
+
+  // Print parameters
+  for (PTXMachineFunctionInfo::reg_iterator
+       i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+       i != e; ++i) {
+    if (i != b) {
+      decl += ", ";
+    }
+    if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+      decl += ".param .b";
+      decl += utostr(*i);
+      decl += " ";
+      decl += PARAM_PREFIX;
+      decl += utostr(++cnt);
+    } else {
+      decl += ".reg .";
+      decl += getRegisterTypeName(*i);
+      decl += " ";
+      decl += getRegisterName(*i);
+    }
+  }
+  decl += ")";
+
+  OutStreamer.EmitRawText(Twine(decl));
+}
+
+void PTXAsmPrinter::
+printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
+  int i = MI->findFirstPredOperandIdx();
+  if (i == -1)
+    llvm_unreachable("missing predicate operand");
+
+  unsigned reg = MI->getOperand(i).getReg();
+  int predOp = MI->getOperand(i+1).getImm();
+
+  DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n");
+
+  if (reg != PTX::NoRegister) {
+    O << '@';
+    if (predOp == PTX::PRED_NEGATE)
+      O << '!';
+    O << getRegisterName(reg);
+  }
+}
+
+unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
+                                            StringRef DirName) {
+  // If FE did not provide a file name, then assume stdin.
+  if (FileName.empty())
+    return GetOrCreateSourceID("<stdin>", StringRef());
+
+  // MCStream expects full path name as filename.
+  if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+    SmallString<128> FullPathName = DirName;
+    sys::path::append(FullPathName, FileName);
+    // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+    return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+  }
+
+  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+  if (Entry.getValue())
+    return Entry.getValue();
+
+  unsigned SrcId = SourceIdMap.size();
+  Entry.setValue(SrcId);
+
+  // Print out a .file directive to specify files for .loc directives.
+  OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+  return SrcId;
+}
+
+#include "PTXGenAsmWriter.inc"
+
+// Force static initialization.
+extern "C" void LLVMInitializePTXAsmPrinter() {
+  RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
+  RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
new file mode 100644
index 000000000000..3e3ff4896621
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
@@ -0,0 +1,29 @@
+
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
+  CCIfType<[i16],     CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
+  CCIfType<[i16],     CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
+]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
new file mode 100644
index 000000000000..b621b9d634d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
@@ -0,0 +1,24 @@
+//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXFrameLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+
+void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
new file mode 100644
index 000000000000..9320676150df
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
@@ -0,0 +1,44 @@
+//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_FRAMEINFO_H
+#define PTX_FRAMEINFO_H
+
+#include "PTX.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class PTXSubtarget;
+
+class PTXFrameLowering : public TargetFrameLowering {
+protected:
+  const PTXSubtarget &STI;
+
+public:
+  explicit PTXFrameLowering(const PTXSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
new file mode 100644
index 000000000000..9adfa624b29e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -0,0 +1,182 @@
+//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// PTXDAGToDAGISel - PTX specific code to select PTX machine
+// instructions for SelectionDAG operations.
+class PTXDAGToDAGISel : public SelectionDAGISel {
+  public:
+    PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+    virtual const char *getPassName() const {
+      return "PTX DAG->DAG Pattern Instruction Selection";
+    }
+
+    SDNode *Select(SDNode *Node);
+
+    // Complex Pattern Selectors.
+    bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
+    bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+
+    // Include the pieces auto'gened from the target description
+#include "PTXGenDAGISel.inc"
+
+  private:
+    // We need this only because we can't match intruction BRAdp
+    // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
+    SDNode *SelectBRCOND(SDNode *Node);
+
+    bool isImm(const SDValue &operand);
+    bool SelectImm(const SDValue &operand, SDValue &imm);
+
+    const PTXSubtarget& getSubtarget() const;
+}; // class PTXDAGToDAGISel
+} // namespace
+
+// createPTXISelDag - This pass converts a legalized DAG into a
+// PTX-specific DAG, ready for instruction scheduling
+FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel) {
+  return new PTXDAGToDAGISel(TM, OptLevel);
+}
+
+PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
+                                 CodeGenOpt::Level OptLevel)
+  : SelectionDAGISel(TM, OptLevel) {}
+
+SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
+  switch (Node->getOpcode()) {
+    case ISD::BRCOND:
+      return SelectBRCOND(Node);
+    default:
+      return SelectCode(Node);
+  }
+}
+
+SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
+  assert(Node->getNumOperands() >= 3);
+
+  SDValue Chain  = Node->getOperand(0);
+  SDValue Pred   = Node->getOperand(1);
+  SDValue Target = Node->getOperand(2); // branch target
+  SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+  DebugLoc dl = Node->getDebugLoc();
+
+  assert(Target.getOpcode()  == ISD::BasicBlock);
+  assert(Pred.getValueType() == MVT::i1);
+
+  // Emit BRAdp
+  SDValue Ops[] = { Target, Pred, PredOp, Chain };
+  return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
+}
+
+// Match memory operand of the form [reg+reg]
+bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
+  if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
+      isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
+    return false;
+
+  assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+  R1 = Addr;
+  R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+  return true;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
+                                   SDValue &Offset) {
+  if (Addr.getOpcode() != ISD::ADD) {
+    // let SelectADDRii handle the [imm] case
+    if (isImm(Addr))
+      return false;
+    // it is [reg]
+
+    assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+    return true;
+  }
+
+  if (Addr.getNumOperands() < 2)
+    return false;
+
+  // let SelectADDRii handle the [imm+imm] case
+  if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
+    return false;
+
+  // try [reg+imm] and [imm+reg]
+  for (int i = 0; i < 2; i ++)
+    if (SelectImm(Addr.getOperand(1-i), Offset)) {
+      Base = Addr.getOperand(i);
+      return true;
+    }
+
+  // neither [reg+imm] nor [imm+reg]
+  return false;
+}
+
+// Match memory operand of the form [imm+imm] and [imm]
+bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
+                                   SDValue &Offset) {
+  // is [imm+imm]?
+  if (Addr.getOpcode() == ISD::ADD) {
+    return SelectImm(Addr.getOperand(0), Base) &&
+           SelectImm(Addr.getOperand(1), Offset);
+  }
+
+  // is [imm]?
+  if (SelectImm(Addr, Base)) {
+    assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+    Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+    return true;
+  }
+
+  return false;
+}
+
+bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
+  return ConstantSDNode::classof(operand.getNode());
+}
+
+bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
+  SDNode *node = operand.getNode();
+  if (!ConstantSDNode::classof(node))
+    return false;
+
+  ConstantSDNode *CN = cast<ConstantSDNode>(node);
+  imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
+                                  operand.getValueType());
+  return true;
+}
+
+const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
+{
+  return TM.getSubtarget<PTXSubtarget>();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
new file mode 100644
index 000000000000..6fcf710e3f1f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -0,0 +1,347 @@
+//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXISelLowering.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXRegisterInfo.h"
+#include "PTXSubtarget.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "PTXGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  // Set up the register classes.
+  addRegisterClass(MVT::i1,  PTX::RegPredRegisterClass);
+  addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
+  addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
+  addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
+  addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
+  addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setMinFunctionAlignment(2);
+  
+  ////////////////////////////////////
+  /////////// Expansion //////////////
+  ////////////////////////////////////
+  
+  // (any/zero/sign) extload => load + (any/zero/sign) extend
+  
+  setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+  
+  // f32 extload => load + fextend
+  
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);  
+  
+  // f64 truncstore => trunc + store
+  
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand); 
+  
+  // sign_extend_inreg => sign_extend
+  
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  
+  // br_cc => brcond
+  
+  setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+  // select_cc => setcc
+  
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+  
+  ////////////////////////////////////
+  //////////// Legal /////////////////
+  ////////////////////////////////////
+  
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  
+  ////////////////////////////////////
+  //////////// Custom ////////////////
+  ////////////////////////////////////
+  
+  // customise setcc to use bitwise logic if possible
+  
+  setOperationAction(ISD::SETCC, MVT::i1, Custom);
+
+  // customize translation of memory addresses
+  
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i1;
+}
+
+SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+    default:
+      llvm_unreachable("Unimplemented operand");
+    case ISD::SETCC:
+      return LowerSETCC(Op, DAG);
+    case ISD::GlobalAddress:
+      return LowerGlobalAddress(Op, DAG);
+  }
+}
+
+const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    default:
+      llvm_unreachable("Unknown opcode");
+    case PTXISD::COPY_ADDRESS:
+      return "PTXISD::COPY_ADDRESS";
+    case PTXISD::LOAD_PARAM:
+      return "PTXISD::LOAD_PARAM";
+    case PTXISD::STORE_PARAM:
+      return "PTXISD::STORE_PARAM";
+    case PTXISD::EXIT:
+      return "PTXISD::EXIT";
+    case PTXISD::RET:
+      return "PTXISD::RET";
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                      Custom Lower Operation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  // Look for X == 0, X == 1, X != 0, or X != 1  
+  // We can simplify these to bitwise logic
+
+  if (Op1.getOpcode() == ISD::Constant &&
+      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+       cast<ConstantSDNode>(Op1)->isNullValue()) &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+    return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+  }
+
+  return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
+}
+
+SDValue PTXTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+  assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
+
+  SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+  SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
+                                 dl,
+                                 PtrVT.getSimpleVT(),
+                                 targetGlobal);
+
+  return movInstr;
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::
+  LowerFormalArguments(SDValue Chain,
+                       CallingConv::ID CallConv,
+                       bool isVarArg,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl,
+                       SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals) const {
+  if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+
+  switch (CallConv) {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+      break;
+    case CallingConv::PTX_Kernel:
+      MFI->setKernel(true);
+      break;
+    case CallingConv::PTX_Device:
+      MFI->setKernel(false);
+      break;
+  }
+
+  // We do one of two things here:
+  // IsKernel || SM >= 2.0  ->  Use param space for arguments
+  // SM < 2.0               ->  Use registers for arguments
+  if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
+    // We just need to emit the proper LOAD_PARAM ISDs
+    for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+
+      assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
+             "Kernels cannot take pred operands");
+
+      SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+                                     DAG.getTargetConstant(i, MVT::i32));
+      InVals.push_back(ArgValue);
+
+      // Instead of storing a physical register in our argument list, we just
+      // store the total size of the parameter, in bits.  The ASM printer
+      // knows how to process this.
+      MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
+    }
+  }
+  else {
+    // For device functions, we use the PTX calling convention to do register
+    // assignments then create CopyFromReg ISDs for the allocated registers
+
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
+                   *DAG.getContext());
+
+    CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
+
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+      CCValAssign&         VA    = ArgLocs[i];
+      EVT                  RegVT = VA.getLocVT();
+      TargetRegisterClass* TRC   = 0;
+
+      assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+      // Determine which register class we need
+      if (RegVT == MVT::i1) {
+        TRC = PTX::RegPredRegisterClass;
+      }
+      else if (RegVT == MVT::i16) {
+        TRC = PTX::RegI16RegisterClass;
+      }
+      else if (RegVT == MVT::i32) {
+        TRC = PTX::RegI32RegisterClass;
+      }
+      else if (RegVT == MVT::i64) {
+        TRC = PTX::RegI64RegisterClass;
+      }
+      else if (RegVT == MVT::f32) {
+        TRC = PTX::RegF32RegisterClass;
+      }
+      else if (RegVT == MVT::f64) {
+        TRC = PTX::RegF64RegisterClass;
+      }
+      else {
+        llvm_unreachable("Unknown parameter type");
+      }
+
+      unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+      InVals.push_back(ArgValue);
+
+      MFI->addArgReg(VA.getLocReg());
+    }
+  }
+
+  return Chain;
+}
+
+SDValue PTXTargetLowering::
+  LowerReturn(SDValue Chain,
+              CallingConv::ID CallConv,
+              bool isVarArg,
+              const SmallVectorImpl<ISD::OutputArg> &Outs,
+              const SmallVectorImpl<SDValue> &OutVals,
+              DebugLoc dl,
+              SelectionDAG &DAG) const {
+  if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+  switch (CallConv) {
+    default:
+      llvm_unreachable("Unsupported calling convention.");
+    case CallingConv::PTX_Kernel:
+      assert(Outs.size() == 0 && "Kernel must return void.");
+      return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
+    case CallingConv::PTX_Device:
+      //assert(Outs.size() <= 1 && "Can at most return one value.");
+      break;
+  }
+
+  MachineFunction& MF = DAG.getMachineFunction();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+
+  SDValue Flag;
+
+  // Even though we could use the .param space for return arguments for
+  // device functions if SM >= 2.0 and the number of return arguments is
+  // only 1, we just always use registers since this makes the codegen
+  // easier.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
+
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign& VA  = RVLocs[i];
+
+    assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+    unsigned Reg = VA.getLocReg();
+
+    DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
+
+    Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad
+    Flag = Chain.getValue(1);
+
+    MFI->addRetReg(Reg);
+  }
+
+  if (Flag.getNode() == 0) {
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+  }
+  else {
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+  }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
new file mode 100644
index 000000000000..43185416e1fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -0,0 +1,70 @@
+//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_ISEL_LOWERING_H
+#define PTX_ISEL_LOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+class PTXSubtarget;
+class PTXTargetMachine;
+
+namespace PTXISD {
+  enum NodeType {
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+    LOAD_PARAM,
+    STORE_PARAM,
+    EXIT,
+    RET,
+    COPY_ADDRESS
+  };
+}                               // namespace PTXISD
+
+class PTXTargetLowering : public TargetLowering {
+  public:
+    explicit PTXTargetLowering(TargetMachine &TM);
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl,
+                           SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv,
+                  bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl,
+                  SelectionDAG &DAG) const;
+
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+  private:
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+}; // class PTXTargetLowering
+} // namespace llvm
+
+#endif // PTX_ISEL_LOWERING_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
new file mode 100644
index 000000000000..8cee351ee0df
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
@@ -0,0 +1,24 @@
+//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+// Leave PrintMethod empty; predicate printing is defined elsewhere.
+def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
+                                     (ops (i1 zero_reg), (i32 0))>;
+
+let Namespace = "PTX" in {
+  class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
+    : Instruction {
+      dag OutOperandList = oops;
+      dag InOperandList = !con(iops, (ins pred:$_p));
+      let AsmString = asmstr; // Predicate printing is defined elsewhere.
+      let Pattern = pattern;
+      let isPredicable = 1;
+  }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
new file mode 100644
index 000000000000..425265a2fdb7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
@@ -0,0 +1,410 @@
+//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-instrinfo"
+
+#include "PTX.h"
+#include "PTXInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PTXGenInstrInfo.inc"
+
+using namespace llvm;
+
+PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
+  : PTXGenInstrInfo(),
+    RI(_TM, *this), TM(_TM) {}
+
+static const struct map_entry {
+  const TargetRegisterClass *cls;
+  const int opcode;
+} map[] = {
+  { &PTX::RegI16RegClass, PTX::MOVU16rr },
+  { &PTX::RegI32RegClass, PTX::MOVU32rr },
+  { &PTX::RegI64RegClass, PTX::MOVU64rr },
+  { &PTX::RegF32RegClass, PTX::MOVF32rr },
+  { &PTX::RegF64RegClass, PTX::MOVF64rr },
+  { &PTX::RegPredRegClass,   PTX::MOVPREDrr }
+};
+
+void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DstReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
+    if (map[i].cls->contains(DstReg, SrcReg)) {
+      const MCInstrDesc &MCID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
+        addReg(SrcReg, getKillRegState(KillSrc));
+      AddDefaultPredicate(MI);
+      return;
+    }
+  }
+
+  llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I,
+                                unsigned DstReg, unsigned SrcReg,
+                                const TargetRegisterClass *DstRC,
+                                const TargetRegisterClass *SrcRC,
+                                DebugLoc DL) const {
+  if (DstRC != SrcRC)
+    return false;
+
+  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+    if (DstRC == map[i].cls) {
+      const MCInstrDesc &MCID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
+      AddDefaultPredicate(MI);
+      return true;
+    }
+
+  return false;
+}
+
+bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
+                               unsigned &SrcReg, unsigned &DstReg,
+                               unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+  switch (MI.getOpcode()) {
+    default:
+      return false;
+    case PTX::MOVU16rr:
+    case PTX::MOVU32rr:
+    case PTX::MOVU64rr:
+    case PTX::MOVF32rr:
+    case PTX::MOVF64rr:
+    case PTX::MOVPREDrr:
+      assert(MI.getNumOperands() >= 2 &&
+             MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+             "Invalid register-register move instruction");
+      SrcSubIdx = DstSubIdx = 0; // No sub-registers
+      DstReg = MI.getOperand(0).getReg();
+      SrcReg = MI.getOperand(1).getReg();
+      return true;
+  }
+}
+
+// predicate support
+
+bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
+  int i = MI->findFirstPredOperandIdx();
+  return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
+}
+
+bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+}
+
+bool PTXInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  if (Pred.size() < 2)
+    llvm_unreachable("lesser than 2 predicate operands are provided");
+
+  int i = MI->findFirstPredOperandIdx();
+  if (i == -1)
+    llvm_unreachable("missing predicate operand");
+
+  MI->getOperand(i).setReg(Pred[0].getReg());
+  MI->getOperand(i+1).setImm(Pred[1].getImm());
+
+  return true;
+}
+
+bool PTXInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  const MachineOperand &PredReg1 = Pred1[0];
+  const MachineOperand &PredReg2 = Pred2[0];
+  if (PredReg1.getReg() != PredReg2.getReg())
+    return false;
+
+  const MachineOperand &PredOp1 = Pred1[1];
+  const MachineOperand &PredOp2 = Pred2[1];
+  if (PredOp1.getImm() != PredOp2.getImm())
+    return false;
+
+  return true;
+}
+
+bool PTXInstrInfo::
+DefinesPredicate(MachineInstr *MI,
+                 std::vector<MachineOperand> &Pred) const {
+  // If an instruction sets a predicate register, it defines a predicate.
+
+  // TODO supprot 5-operand format of setp instruction
+
+  if (MI->getNumOperands() < 1)
+    return false;
+
+  const MachineOperand &MO = MI->getOperand(0);
+
+  if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
+    return false;
+
+  Pred.push_back(MO);
+  Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+  return true;
+}
+
+// branch support
+
+bool PTXInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB,
+              MachineBasicBlock *&TBB,
+              MachineBasicBlock *&FBB,
+              SmallVectorImpl<MachineOperand> &Cond,
+              bool AllowModify) const {
+  // TODO implement cases when AllowModify is true
+
+  if (MBB.empty())
+    return true;
+
+  MachineBasicBlock::const_iterator iter = MBB.end();
+  const MachineInstr& instLast1 = *--iter;
+  const MCInstrDesc &desc1 = instLast1.getDesc();
+  // for special case that MBB has only 1 instruction
+  const bool IsSizeOne = MBB.size() == 1;
+  // if IsSizeOne is true, *--iter and instLast2 are invalid
+  // we put a dummy value in instLast2 and desc2 since they are used
+  const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
+  const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
+
+  DEBUG(dbgs() << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: MBB:    " << MBB.getName().str() << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: TBB:    " << TBB << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: FBB:    " << FBB << "\n");
+
+  // this block ends with no branches
+  if (!IsAnyKindOfBranch(instLast1)) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
+    return false;
+  }
+
+  // this block ends with only an unconditional branch
+  if (desc1.isUnconditionalBranch() &&
+      // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
+      (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
+    TBB = GetBranchTarget(instLast1);
+    return false;
+  }
+
+  // this block ends with a conditional branch and
+  // it falls through to a successor block
+  if (desc1.isConditionalBranch() &&
+      IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
+    TBB = GetBranchTarget(instLast1);
+    int i = instLast1.findFirstPredOperandIdx();
+    Cond.push_back(instLast1.getOperand(i));
+    Cond.push_back(instLast1.getOperand(i+1));
+    return false;
+  }
+
+  // when IsSizeOne is true, we are done
+  if (IsSizeOne)
+    return true;
+
+  // this block ends with a conditional branch
+  // followed by an unconditional branch
+  if (desc2.isConditionalBranch() &&
+      desc1.isUnconditionalBranch()) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
+    TBB = GetBranchTarget(instLast2);
+    FBB = GetBranchTarget(instLast1);
+    int i = instLast2.findFirstPredOperandIdx();
+    Cond.push_back(instLast2.getOperand(i));
+    Cond.push_back(instLast2.getOperand(i+1));
+    return false;
+  }
+
+  // branch cannot be understood
+  DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
+  return true;
+}
+
+unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  unsigned count = 0;
+  while (!MBB.empty())
+    if (IsAnyKindOfBranch(MBB.back())) {
+      MBB.pop_back();
+      ++count;
+    } else
+      break;
+  DEBUG(dbgs() << "RemoveBranch: MBB:   " << MBB.getName().str() << "\n");
+  DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
+  return count;
+}
+
+unsigned PTXInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+             MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
+  DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
+                        << "\n";
+        else     dbgs() << "InsertBranch: TBB: (NULL)\n");
+  DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
+                        << "\n";
+        else     dbgs() << "InsertBranch: FBB: (NULL)\n");
+  DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
+
+  assert(TBB && "TBB is NULL");
+
+  if (FBB) {
+    BuildMI(&MBB, DL, get(PTX::BRAdp))
+      .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+    BuildMI(&MBB, DL, get(PTX::BRAd))
+      .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+    return 2;
+  } else if (Cond.size()) {
+    BuildMI(&MBB, DL, get(PTX::BRAdp))
+      .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+    return 1;
+  } else {
+    BuildMI(&MBB, DL, get(PTX::BRAd))
+      .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+    return 1;
+  }
+}
+
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MII,
+                                     unsigned SrcReg, bool isKill, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKSTOREI16;
+  }  else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKSTOREF32;
+  }  else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKSTOREF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the store instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addFrameIndex(FrameIdx);
+  MIB.addReg(SrcReg);
+
+  AddDefaultPredicate(MIB);
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MII,
+                                        unsigned DestReg, int FrameIdx,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKLOADI16;
+  } else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKLOADF32;
+  } else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKLOADF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the load instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addReg(DestReg);
+  MIB.addFrameIndex(FrameIdx);
+
+  AddDefaultPredicate(MIB);
+}
+
+// static helper routines
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                  DebugLoc dl, EVT VT, SDValue Op1) {
+  SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+  SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+  SDValue ops[] = { Op1, predReg, predOp };
+  return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                  DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
+  SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+  SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+  SDValue ops[] = { Op1, Op2, predReg, predOp };
+  return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
+  if (MI->findFirstPredOperandIdx() == -1) {
+    MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+    MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+  }
+}
+
+bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
+  const MCInstrDesc &desc = inst.getDesc();
+  return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+}
+
+bool PTXInstrInfo::
+IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
+  for (MachineBasicBlock::const_succ_iterator
+      i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
+    if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
+      return true;
+  return false;
+}
+
+MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
+  // FIXME So far all branch instructions put destination in 1st operand
+  const MachineOperand& target = inst.getOperand(0);
+  assert(target.isMBB() && "FIXME: detect branch target operand");
+  return target.getMBB();
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
new file mode 100644
index 000000000000..871f1ac8d376
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
@@ -0,0 +1,133 @@
+//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_INSTR_INFO_H
+#define PTX_INSTR_INFO_H
+
+#include "PTXRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PTXGenInstrInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+
+class MachineSDNode;
+class SDValue;
+class SelectionDAG;
+
+class PTXInstrInfo : public PTXGenInstrInfo {
+private:
+  const PTXRegisterInfo RI;
+  PTXTargetMachine &TM;
+
+public:
+  explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+  virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DstReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DstReg, unsigned SrcReg,
+                            const TargetRegisterClass *DstRC,
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
+
+  virtual bool isMoveInstr(const MachineInstr& MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  // predicate support
+
+  virtual bool isPredicated(const MachineInstr *MI) const;
+
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  // PTX is fully-predicable
+  virtual bool isPredicable(MachineInstr *MI) const { return true; }
+
+  // branch support
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  // Memory operand folding for spills
+  // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+  //       loadRegFromStackSlot.  Doing so will get rid of the "stack" registers
+  //       we currently use to spill, though I doubt the overall effect on ptxas
+  //       output will be large.  I have yet to see a case where ptxas is unable
+  //       to see through the "stack" register usage and hence generates
+  //       efficient code anyway.
+  // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+  //                                             MachineInstr* MI,
+  //                                       const SmallVectorImpl<unsigned> &Ops,
+  //                                             int FrameIndex) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+                                   MachineBasicBlock::iterator MII,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass* RC,
+                                   const TargetRegisterInfo* TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MII,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  // static helper routines
+
+  static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                          DebugLoc dl, EVT VT,
+                                          SDValue Op1);
+
+  static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                          DebugLoc dl, EVT VT,
+                                          SDValue Op1, SDValue Op2);
+
+  static void AddDefaultPredicate(MachineInstr *MI);
+
+  static bool IsAnyKindOfBranch(const MachineInstr& inst);
+
+  static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
+
+  static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
+}; // class PTXInstrInfo
+} // namespace llvm
+
+#endif // PTX_INSTR_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
new file mode 100644
index 000000000000..6bfe906d40ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -0,0 +1,1102 @@
+//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Code Generation Predicates
+//===----------------------------------------------------------------------===//
+
+// Addressing
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+// Shader Model Support
+def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
+def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
+def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
+def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
+
+// PTX Version Support
+def SupportsPTX21       : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+def SupportsPTX22       : Predicate<"getSubtarget().supportsPTX22()">;
+def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
+def SupportsPTX23       : Predicate<"getSubtarget().supportsPTX23()">;
+def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
+
+// Fused-Multiply Add
+def SupportsFMA         : Predicate<"getSubtarget().supportsFMA()">;
+def DoesNotSupportFMA   : Predicate<"!getSubtarget().supportsFMA()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::GLOBAL;
+  return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::CONSTANT;
+  return false;
+}]>;
+
+def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::LOCAL;
+  return false;
+}]>;
+
+def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::PARAMETER;
+  return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::SHARED;
+  return false;
+}]>;
+
+def store_global
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::GLOBAL;
+  return false;
+}]>;
+
+def store_local
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::LOCAL;
+  return false;
+}]>;
+
+def store_parameter
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::PARAMETER;
+  return false;
+}]>;
+
+def store_shared
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::SHARED;
+  return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+
+// Address operands
+def MEMri32 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops RegI32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops RegI64, i64imm);
+}
+def MEMii32 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+  let PrintMethod = "printParamOperand";
+  let MIOperandInfo = (ops i32imm);
+}
+def MEMret : Operand<i32> {
+  let PrintMethod = "printReturnOperand";
+  let MIOperandInfo = (ops i32imm);
+}
+
+// Branch & call targets have OtherVT type.
+def brtarget   : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// PTX Specific Node Definitions
+//===----------------------------------------------------------------------===//
+
+// PTX allow generic 3-reg shifts like shl r0, r1, r2
+def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
+def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
+def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
+
+def PTXexit
+  : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
+def PTXret
+  : SDNode<"PTXISD::RET",  SDTNone,
+           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PTXcopyaddress
+  : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
+
+// Load/store .param space
+def PTXloadparam
+  : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+           [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+  : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+           [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
+multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+  def rr32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
+                     !strconcat(opcstr, ".f32\t$d, $a"),
+                     [(set RegF32:$d, (opnode RegF32:$a))]>;
+  def ri32 : InstPTX<(outs RegF32:$d),
+                     (ins f32imm:$a),
+                     !strconcat(opcstr, ".f32\t$d, $a"),
+                     [(set RegF32:$d, (opnode fpimm:$a))]>;
+  def rr64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
+                     !strconcat(opcstr, ".f64\t$d, $a"),
+                     [(set RegF64:$d, (opnode RegF64:$a))]>;
+  def ri64 : InstPTX<(outs RegF64:$d),
+                     (ins f64imm:$a),
+                     !strconcat(opcstr, ".f64\t$d, $a"),
+                     [(set RegF64:$d, (opnode fpimm:$a))]>;
+}
+
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
+  def rr32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a, RegF32:$b),
+                     !strconcat(opcstr, ".f32\t$d, $a, $b"),
+                     [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>;
+  def ri32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a, f32imm:$b),
+                     !strconcat(opcstr, ".f32\t$d, $a, $b"),
+                     [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>;
+  def rr64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a, RegF64:$b),
+                     !strconcat(opcstr, ".f64\t$d, $a, $b"),
+                     [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>;
+  def ri64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a, f64imm:$b),
+                     !strconcat(opcstr, ".f64\t$d, $a, $b"),
+                     [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>;
+}
+
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+  def rrr32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a, RegF32:$b, RegF32:$c),
+                      !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+                      [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+                                                          RegF32:$b),
+                                                 RegF32:$c))]>;
+  def rri32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a, RegF32:$b, f32imm:$c),
+                      !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+                      [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+                                                          RegF32:$b),
+                                                 fpimm:$c))]>;
+  def rrr64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a, RegF64:$b, RegF64:$c),
+                      !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+                      [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+                                                          RegF64:$b),
+                                                 RegF64:$c))]>;
+  def rri64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a, RegF64:$b, f64imm:$c),
+                      !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+                      [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+                                                          RegF64:$b),
+                                                 fpimm:$c))]>;
+}
+
+multiclass INT3<string opcstr, SDNode opnode> {
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
+                     !strconcat(opcstr, ".u16\t$d, $a, $b"),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
+                     !strconcat(opcstr, ".u16\t$d, $a, $b"),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
+                     !strconcat(opcstr, ".u32\t$d, $a, $b"),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
+                     !strconcat(opcstr, ".u32\t$d, $a, $b"),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
+                     !strconcat(opcstr, ".u64\t$d, $a, $b"),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
+                     !strconcat(opcstr, ".u64\t$d, $a, $b"),
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
+  def ripreds : InstPTX<(outs RegPred:$d),
+                     (ins RegPred:$a, i1imm:$b),
+                     !strconcat(opcstr, ".pred\t$d, $a, $b"),
+                     [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>;
+  def rrpreds : InstPTX<(outs RegPred:$d),
+                     (ins RegPred:$a, RegPred:$b),
+                     !strconcat(opcstr, ".pred\t$d, $a, $b"),
+                     [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>;
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
+                     !strconcat(opcstr, ".b16\t$d, $a, $b"),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
+                     !strconcat(opcstr, ".b16\t$d, $a, $b"),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
+                     !strconcat(opcstr, ".b32\t$d, $a, $b"),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
+                     !strconcat(opcstr, ".b32\t$d, $a, $b"),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
+                     !strconcat(opcstr, ".b64\t$d, $a, $b"),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
+                     !strconcat(opcstr, ".b64\t$d, $a, $b"),
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+multiclass INT3ntnc<string opcstr, SDNode opnode> {
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
+                     !strconcat(opcstr, "16\t$d, $a, $b"),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
+                     !strconcat(opcstr, "32\t$d, $a, $b"),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
+                     !strconcat(opcstr, "64\t$d, $a, $b"),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
+                     !strconcat(opcstr, "16\t$d, $a, $b"),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
+                     !strconcat(opcstr, "32\t$d, $a, $b"),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
+                     !strconcat(opcstr, "64\t$d, $a, $b"),
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+  def ir16 : InstPTX<(outs RegI16:$d),
+                     (ins i16imm:$a, RegI16:$b),
+                     !strconcat(opcstr, "16\t$d, $a, $b"),
+                     [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>;
+  def ir32 : InstPTX<(outs RegI32:$d),
+                     (ins i32imm:$a, RegI32:$b),
+                     !strconcat(opcstr, "32\t$d, $a, $b"),
+                     [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>;
+  def ir64 : InstPTX<(outs RegI64:$d),
+                     (ins i64imm:$a, RegI64:$b),
+                     !strconcat(opcstr, "64\t$d, $a, $b"),
+                     [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
+}
+
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
+                        CondCode cmp, string cmpstr> {
+  // TODO support 5-operand format: p|q, a, b, c
+
+  def rr
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+              !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>;
+  def ri
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+              !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+              [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>;
+
+  def rr_and_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+  def ri_and_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+  def rr_or_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+  def ri_or_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+  def rr_xor_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+  def ri_xor_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+
+  def rr_and_not_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+  def ri_and_not_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+  def rr_or_not_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+  def ri_or_not_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+  def rr_xor_not_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+  def ri_xor_not_r
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+}
+
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+                        CondCode ucmp, CondCode ocmp, string cmpstr> {
+  // TODO support 5-operand format: p|q, a, b, c
+
+  def rr_u
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+              !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+  def rr_o
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+              !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+  def rr_and_r_u
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+  def rr_and_r_o
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+  def rr_or_r_u
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+  def rr_or_r_o
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+  def rr_xor_r_u
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+  def rr_xor_r_o
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+  def rr_and_not_r_u
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+  def rr_and_not_r_o
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+
+  def rr_or_not_r_u
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+  def rr_or_not_r_o
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+
+  def rr_xor_not_r_u
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+  def rr_xor_not_r_o
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+}
+
+multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+  def rr
+    : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
+              !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+              [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
+}
+
+multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
+  def rr32 : InstPTX<(outs RC:$d),
+                     (ins MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
+  def rr64 : InstPTX<(outs RC:$d),
+                     (ins MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
+  def ri32 : InstPTX<(outs RC:$d),
+                     (ins MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
+  def ri64 : InstPTX<(outs RC:$d),
+                     (ins MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
+  def ii32 : InstPTX<(outs RC:$d),
+                     (ins MEMii32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
+  def ii64 : InstPTX<(outs RC:$d),
+                     (ins MEMii64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+  defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+  defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+  defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+  defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+  defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
+  def rr32 : InstPTX<(outs),
+                     (ins RC:$d, MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                     [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
+  def rr64 : InstPTX<(outs),
+                     (ins RC:$d, MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                     [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
+  def ri32 : InstPTX<(outs),
+                   (ins RC:$d, MEMri32:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
+  def ri64 : InstPTX<(outs),
+                   (ins RC:$d, MEMri64:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
+  def ii32 : InstPTX<(outs),
+                   (ins RC:$d, MEMii32:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
+  def ii64 : InstPTX<(outs),
+                   (ins RC:$d, MEMii64:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+  defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+  defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+  defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+  defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+  defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+///===- Integer Arithmetic Instructions -----------------------------------===//
+
+defm ADD : INT3<"add", add>;
+defm SUB : INT3<"sub", sub>;
+defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : INT3<"div", udiv>;
+defm REM : INT3<"rem", urem>;
+
+///===- Floating-Point Arithmetic Instructions ----------------------------===//
+
+// Standard Unary Operations
+defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
+
+// For floating-point division:
+// SM_13+ defaults to .rn for f32 and f64,
+// SM10 must *not* provide a rounding
+
+// TODO: 
+//     - Allow user selection of rounding modes for fdiv
+//     - Add support for -prec-div=false (.approx)
+
+def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, RegF32:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, f32imm:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, RegF32:$b),
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, f32imm:$b),
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+
+def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, RegF64:$b),
+                           "div.rn.f64\t$d, $a, $b",
+                           [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, f64imm:$b),
+                           "div.rn.f64\t$d, $a, $b",
+                           [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, RegF64:$b),
+                           "div.f64\t$d, $a, $b",
+                           [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, f64imm:$b),
+                           "div.f64\t$d, $a, $b",
+                           [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+
+
+
+// Multi-operation hybrid instructions
+
+// The selection of mad/fma is tricky.  In some cases, they are the *same*
+// instruction, but in other cases we may prefer one or the other.  Also,
+// different PTX versions differ on whether rounding mode flags are required.
+// In the short term, mad is supported on all PTX versions and we use a
+// default rounding mode no matter what shader model or PTX version.
+// TODO: Allow the rounding mode to be selectable through llc.
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
+                Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
+            Requires<[FMadNoRoundingMode, SupportsFMA]>;
+
+///===- Floating-Point Intrinsic Instructions -----------------------------===//
+
+def FSQRT32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a),
+                      "sqrt.rn.f32\t$d, $a",
+                      [(set RegF32:$d, (fsqrt RegF32:$a))]>;
+
+def FSQRT64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a),
+                      "sqrt.rn.f64\t$d, $a",
+                      [(set RegF64:$d, (fsqrt RegF64:$a))]>;
+
+def FSIN32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
+                     "sin.approx.f32\t$d, $a",
+                     [(set RegF32:$d, (fsin RegF32:$a))]>;
+
+def FSIN64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
+                     "sin.approx.f64\t$d, $a",
+                     [(set RegF64:$d, (fsin RegF64:$a))]>;
+
+def FCOS32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
+                     "cos.approx.f32\t$d, $a",
+                     [(set RegF32:$d, (fcos RegF32:$a))]>;
+
+def FCOS64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
+                     "cos.approx.f64\t$d, $a",
+                     [(set RegF64:$d, (fcos RegF64:$a))]>;
+
+
+///===- Comparison and Selection Instructions -----------------------------===//
+
+// .setp
+
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ,  "eq">;
+defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE,  "ne">;
+defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT,  "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE,  "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT,  "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE,  "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ,  "eq">;
+defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE,  "ne">;
+defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT,  "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE,  "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT,  "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE,  "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ,  "eq">;
+defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE,  "ne">;
+defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT,  "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE,  "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT,  "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE,  "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">;
+
+// .selp
+
+defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">;
+defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">;
+defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">;
+defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">;
+defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">;
+
+///===- Logic and Shift Instructions --------------------------------------===//
+
+defm SHL : INT3ntnc<"shl.b", PTXshl>;
+defm SRL : INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s", PTXsra>;
+
+defm AND : PTX_LOGIC<"and", and>;
+defm OR  : PTX_LOGIC<"or",  or>;
+defm XOR : PTX_LOGIC<"xor", xor>;
+
+///===- Data Movement and Conversion Instructions -------------------------===//
+
+let neverHasSideEffects = 1 in {
+  def MOVPREDrr
+    : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
+  def MOVU16rr
+    : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
+  def MOVU32rr
+    : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
+  def MOVU64rr
+    : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
+  def MOVF32rr
+    : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
+  def MOVF64rr
+    : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def MOVPREDri
+    : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+              [(set RegPred:$d, imm:$a)]>;
+  def MOVU16ri
+    : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+              [(set RegI16:$d, imm:$a)]>;
+  def MOVU32ri
+    : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RegI32:$d, imm:$a)]>;
+  def MOVU64ri
+    : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RegI64:$d, imm:$a)]>;
+  def MOVF32ri
+    : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+              [(set RegF32:$d, fpimm:$a)]>;
+  def MOVF64ri
+    : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+              [(set RegF64:$d, fpimm:$a)]>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def MOVaddr32
+    : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+  def MOVaddr64
+    : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+}
+
+// Loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const",  load_constant>;
+defm LDl : PTX_LD_ALL<"ld.local",  load_local>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// These instructions are used to load/store from the .param space for
+// device and kernel parameters
+
+let hasSideEffects = 1 in {
+  def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
+                         "ld.param.pred\t$d, [$a]",
+                         [(set RegPred:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU16  : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
+                         "ld.param.u16\t$d, [$a]",
+                         [(set RegI16:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU32  : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
+                         "ld.param.u32\t$d, [$a]",
+                         [(set RegI32:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU64  : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
+                         "ld.param.u64\t$d, [$a]",
+                         [(set RegI64:$d, (PTXloadparam timm:$a))]>;
+  def LDpiF32  : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
+                         "ld.param.f32\t$d, [$a]",
+                         [(set RegF32:$d, (PTXloadparam timm:$a))]>;
+  def LDpiF64  : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
+                         "ld.param.f64\t$d, [$a]",
+                         [(set RegF64:$d, (PTXloadparam timm:$a))]>;
+
+  def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
+                         "st.param.pred\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegPred:$a)]>;
+  def STpiU16  : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
+                         "st.param.u16\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI16:$a)]>;
+  def STpiU32  : InstPTX<(outs), (ins MEMret:$d, RegI32:$a),
+                         "st.param.u32\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI32:$a)]>;
+  def STpiU64  : InstPTX<(outs), (ins MEMret:$d, RegI64:$a),
+                         "st.param.u64\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI64:$a)]>;
+  def STpiF32  : InstPTX<(outs), (ins MEMret:$d, RegF32:$a),
+                         "st.param.f32\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegF32:$a)]>;
+  def STpiF64  : InstPTX<(outs), (ins MEMret:$d, RegF64:$a),
+                         "st.param.f64\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegF64:$a)]>;
+}
+
+// Stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STl : PTX_ST_ALL<"st.local",  store_local>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// defm STp : PTX_ST_ALL<"st.param",  store_parameter>;
+// defm LDp : PTX_LD_ALL<"ld.param",  load_parameter>;
+// TODO: Do something with st.param if/when it is needed.
+
+// Conversion to pred
+// PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero.  This follows
+// the C convention that any non-zero value is equivalent to 'true'.
+def CVT_pred_u16
+  : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI16:$a))]>;
+
+def CVT_pred_u32
+  : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI32:$a))]>;
+
+def CVT_pred_u64
+  : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI64:$a))]>;
+
+def CVT_pred_f32
+  : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
+            [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_pred_f64
+  : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
+            [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to u16
+// PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+def CVT_u16_preda
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (anyext RegPred:$a))]>;
+
+def CVT_u16_pred
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (zext RegPred:$a))]>;
+
+def CVT_u16_preds
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (sext RegPred:$a))]>;
+
+def CVT_u16_u32
+  : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
+            [(set RegI16:$d, (trunc RegI32:$a))]>;
+
+def CVT_u16_u64
+  : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a",
+            [(set RegI16:$d, (trunc RegI64:$a))]>;
+
+def CVT_u16_f32
+  : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
+            [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_u16_f64
+  : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
+            [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to u32
+
+def CVT_u32_pred
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+            [(set RegI32:$d, (zext RegPred:$a))]>;
+
+def CVT_u32_b16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+            [(set RegI32:$d, (anyext RegI16:$a))]>;
+
+def CVT_u32_u16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+            [(set RegI32:$d, (zext RegI16:$a))]>;
+
+def CVT_u32_preds
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+            [(set RegI32:$d, (sext RegPred:$a))]>;
+
+def CVT_u32_s16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
+            [(set RegI32:$d, (sext RegI16:$a))]>;
+
+def CVT_u32_u64
+  : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
+            [(set RegI32:$d, (trunc RegI64:$a))]>;
+
+def CVT_u32_f32
+  : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
+            [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_u32_f64
+  : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
+            [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to u64
+
+def CVT_u64_pred
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+            [(set RegI64:$d, (zext RegPred:$a))]>;
+
+def CVT_u64_preds
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+            [(set RegI64:$d, (sext RegPred:$a))]>;
+
+def CVT_u64_u16
+  : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
+            [(set RegI64:$d, (zext RegI16:$a))]>;
+
+def CVT_u64_s16
+  : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
+            [(set RegI64:$d, (sext RegI16:$a))]>;
+
+def CVT_u64_u32
+  : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
+            [(set RegI64:$d, (zext RegI32:$a))]>;
+
+def CVT_u64_s32
+  : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
+            [(set RegI64:$d, (sext RegI32:$a))]>;
+
+def CVT_u64_f32
+  : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
+            [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_u64_f64
+  : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
+            [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
+
+// Conversion to f32
+
+def CVT_f32_pred
+  : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
+            "selp.f32\t$d, 0F3F800000, 0F00000000, $a",  // 1.0
+            [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
+
+def CVT_f32_u16
+  : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
+
+def CVT_f32_u32
+  : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI32:$a))]>;
+
+def CVT_f32_u64
+  : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI64:$a))]>;
+
+def CVT_f32_f64
+  : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a",
+            [(set RegF32:$d, (fround RegF64:$a))]>;
+
+// Conversion to f64
+
+def CVT_f64_pred
+  : InstPTX<(outs RegF64:$d), (ins RegPred:$a), 
+            "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a",  // 1.0
+            [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
+
+def CVT_f64_u16
+  : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
+
+def CVT_f64_u32
+  : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI32:$a))]>;
+
+def CVT_f64_u64
+  : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI64:$a))]>;
+
+def CVT_f64_f32
+  : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a",
+            [(set RegF64:$d, (fextend RegF32:$a))]>;
+
+///===- Control Flow Instructions -----------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+  def BRAd
+    : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  // FIXME: The pattern part is blank because I cannot (or do not yet know
+  // how to) use the first operand of PredicateOperand (a RegPred register) here
+  def BRAdp
+    : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+              [/*(brcond pred:$_p, bb:$d)*/]>;
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+  def RET  : InstPTX<(outs), (ins), "ret",  [(PTXret)]>;
+}
+
+///===- Spill Instructions ------------------------------------------------===//
+// Special instructions used for stack spilling
+def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
+                            "mov.u16\ts$d, $a", []>;
+def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
+                            "mov.u32\ts$d, $a", []>;
+def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
+                            "mov.u64\ts$d, $a", []>;
+def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
+                            "mov.f32\ts$d, $a", []>;
+def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
+                            "mov.f64\ts$d, $a", []>;
+
+def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
+                           "mov.u16\t$d, s$a", []>;
+def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
+                           "mov.u32\t$d, s$a", []>;
+def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
+                           "mov.u64\t$d, s$a", []>;
+def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
+                           "mov.f32\t$d, s$a", []>;
+def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
+                           "mov.f64\t$d, s$a", []>;
+
+///===- Intrinsic Instructions --------------------------------------------===//
+
+include "PTXIntrinsicInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
new file mode 100644
index 000000000000..8d97909d339a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -0,0 +1,84 @@
+//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsic instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+  : InstPTX<(outs RegI64:$d), (ins),
+            !strconcat("mov.u64\t$d, %", regname),
+            [(set RegI64:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+  : InstPTX<(outs RegI32:$d), (ins),
+            !strconcat("mov.u32\t$d, %", regname),
+            [(set RegI32:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
+def PTX_READ_TID_X   : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
+def PTX_READ_TID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
+def PTX_READ_TID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
+def PTX_READ_TID_W   : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
+
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID  : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
+def PTX_READ_WARPID  : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
+
+//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
+
+//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID  : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
+def PTX_READ_NSMID  : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
+def PTX_READ_GRIDID  : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+  : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+  : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
+                           [(int_ptx_bar_sync imm:$i)]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
new file mode 100644
index 000000000000..b13a3dace130
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -0,0 +1,541 @@
+//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class PTXMCAsmStreamer : public MCStreamer {
+  formatted_raw_ostream &OS;
+  const MCAsmInfo &MAI;
+  OwningPtr<MCInstPrinter> InstPrinter;
+  OwningPtr<MCCodeEmitter> Emitter;
+
+  SmallString<128> CommentToEmit;
+  raw_svector_ostream CommentStream;
+
+  unsigned IsVerboseAsm : 1;
+  unsigned ShowInst : 1;
+
+public:
+  PTXMCAsmStreamer(MCContext &Context,
+                   formatted_raw_ostream &os,
+                   bool isVerboseAsm, bool useLoc,
+                   MCInstPrinter *printer,
+                   MCCodeEmitter *emitter,
+                   bool showInst)
+    : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+      InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
+      IsVerboseAsm(isVerboseAsm),
+      ShowInst(showInst) {
+    if (InstPrinter && IsVerboseAsm)
+      InstPrinter->setCommentStream(CommentStream);
+  }
+
+  ~PTXMCAsmStreamer() {}
+
+  inline void EmitEOL() {
+    // If we don't have any comments, just emit a \n.
+    if (!IsVerboseAsm) {
+      OS << '\n';
+      return;
+    }
+    EmitCommentsAndEOL();
+  }
+  void EmitCommentsAndEOL();
+
+  /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+  /// all.
+  virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+  /// hasRawTextSupport - We support EmitRawText.
+  virtual bool hasRawTextSupport() const { return true; }
+
+  /// AddComment - Add a comment that can be emitted to the generated .s
+  /// file if applicable as a QoI issue to make the output of the compiler
+  /// more readable.  This only affects the MCAsmStreamer, and only when
+  /// verbose assembly output is enabled.
+  virtual void AddComment(const Twine &T);
+
+  /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+  virtual void AddEncodingComment(const MCInst &Inst);
+
+  /// GetCommentOS - Return a raw_ostream that comments can be written to.
+  /// Unlike AddComment, you are required to terminate comments with \n if you
+  /// use this method.
+  virtual raw_ostream &GetCommentOS() {
+    if (!IsVerboseAsm)
+      return nulls();  // Discard comments unless in verbose asm mode.
+    return CommentStream;
+  }
+
+  /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+  virtual void AddBlankLine() {
+    EmitEOL();
+  }
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void InitSections() {}
+
+  virtual void EmitLabel(MCSymbol *Symbol);
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+
+  virtual void EmitThumbFunc(MCSymbol *Func);
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize);
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+
+  /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+  ///
+  /// @param Symbol - The common symbol to emit.
+  /// @param Size - The size of the common symbol.
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace);
+  virtual void EmitULEB128Value(const MCExpr *Value);
+  virtual void EmitSLEB128Value(const MCExpr *Value);
+  virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace);
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+
+  virtual void EmitInstruction(const MCInst &Inst);
+
+  /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+  /// the specified string in the output .s file.  This capability is
+  /// indicated by the hasRawTextSupport() predicate.
+  virtual void EmitRawText(StringRef String);
+
+  virtual void Finish();
+
+  /// @}
+
+}; // class PTXMCAsmStreamer
+
+}
+
+/// TODO: Add appropriate implementation of Emit*() methods when needed
+
+void PTXMCAsmStreamer::AddComment(const Twine &T) {
+  if (!IsVerboseAsm) return;
+
+  // Make sure that CommentStream is flushed.
+  CommentStream.flush();
+
+  T.toVector(CommentToEmit);
+  // Each comment goes on its own line.
+  CommentToEmit.push_back('\n');
+
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+void PTXMCAsmStreamer::EmitCommentsAndEOL() {
+  if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+    OS << '\n';
+    return;
+  }
+
+  CommentStream.flush();
+  StringRef Comments = CommentToEmit.str();
+
+  assert(Comments.back() == '\n' &&
+         "Comment array not newline terminated");
+  do {
+    // Emit a line of comments.
+    OS.PadToColumn(MAI.getCommentColumn());
+    size_t Position = Comments.find('\n');
+    OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+    Comments = Comments.substr(Position+1);
+  } while (!Comments.empty());
+
+  CommentToEmit.clear();
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+  assert(Bytes && "Invalid size!");
+  return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+}
+
+void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  //assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  OS << *Symbol << MAI.getLabelSuffix();
+  EmitEOL();
+  Symbol->setSection(*getCurrentSection());
+}
+
+void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+
+void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
+
+void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  OS << *Symbol << " = " << *Value;
+  EmitEOL();
+
+  // FIXME: Lift context changes into super class.
+  Symbol->setVariableValue(Value);
+}
+
+void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
+                                         const MCSymbol *Symbol) {
+  OS << ".weakref " << *Alias << ", " << *Symbol;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                                const MCSymbol *LastLabel,
+                                                const MCSymbol *Label,
+                                                unsigned PointerSize) {
+  report_fatal_error("Unimplemented.");
+}
+
+void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                           MCSymbolAttr Attribute) {}
+
+void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
+
+void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
+
+void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+
+void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                        unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                    unsigned Size, unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
+                                      MCSymbol *Symbol,
+                                      uint64_t Size, unsigned ByteAlignment) {}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+  OS << '"';
+
+  for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+    unsigned char C = Data[i];
+    if (C == '"' || C == '\\') {
+      OS << '\\' << (char)C;
+      continue;
+    }
+
+    if (isprint((unsigned char)C)) {
+      OS << (char)C;
+      continue;
+    }
+
+    switch (C) {
+      case '\b': OS << "\\b"; break;
+      case '\f': OS << "\\f"; break;
+      case '\n': OS << "\\n"; break;
+      case '\r': OS << "\\r"; break;
+      case '\t': OS << "\\t"; break;
+      default:
+        OS << '\\';
+        OS << toOctal(C >> 6);
+        OS << toOctal(C >> 3);
+        OS << toOctal(C >> 0);
+        break;
+    }
+  }
+
+  OS << '"';
+}
+
+void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  if (Data.empty()) return;
+
+  if (Data.size() == 1) {
+    OS << MAI.getData8bitsDirective(AddrSpace);
+    OS << (unsigned)(unsigned char)Data[0];
+    EmitEOL();
+    return;
+  }
+
+  // If the data ends with 0 and the target supports .asciz, use it, otherwise
+  // use .ascii
+  if (MAI.getAscizDirective() && Data.back() == 0) {
+    OS << MAI.getAscizDirective();
+    Data = Data.substr(0, Data.size()-1);
+  } else {
+    OS << MAI.getAsciiDirective();
+  }
+
+  OS << ' ';
+  PrintQuotedString(Data, OS);
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                     unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  const char *Directive = 0;
+  switch (Size) {
+  default: break;
+  case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+  case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+  case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+  case 8:
+    Directive = MAI.getData64bitsDirective(AddrSpace);
+    // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+    if (Directive) break;
+    int64_t IntValue;
+    if (!Value->EvaluateAsAbsolute(IntValue))
+      report_fatal_error("Don't know how to emit this value.");
+    if (getContext().getAsmInfo().isLittleEndian()) {
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+    } else {
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+    }
+    return;
+  }
+
+  assert(Directive && "Invalid size for machine code value!");
+  OS << Directive << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
+  assert(MAI.hasLEB128() && "Cannot print a .uleb");
+  OS << ".uleb128 " << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
+  assert(MAI.hasLEB128() && "Cannot print a .sleb");
+  OS << ".sleb128 " << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  assert(MAI.getGPRel32Directive() != 0);
+  OS << MAI.getGPRel32Directive() << *Value;
+  EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue.  This implements directives such as '.space'.
+void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                                unsigned AddrSpace) {
+  if (NumBytes == 0) return;
+
+  if (AddrSpace == 0)
+    if (const char *ZeroDirective = MAI.getZeroDirective()) {
+      OS << ZeroDirective << NumBytes;
+      if (FillValue != 0)
+        OS << ',' << (int)FillValue;
+      EmitEOL();
+      return;
+    }
+
+  // Emit a byte at a time.
+  MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                            int64_t Value,
+                                            unsigned ValueSize,
+                                            unsigned MaxBytesToEmit) {
+  // Some assemblers don't support non-power of two alignments, so we always
+  // emit alignments as a power of two if possible.
+  if (isPowerOf2_32(ByteAlignment)) {
+    switch (ValueSize) {
+    default: llvm_unreachable("Invalid size for machine code value!");
+    case 1: OS << MAI.getAlignDirective(); break;
+    // FIXME: use MAI for this!
+    case 2: OS << ".p2alignw "; break;
+    case 4: OS << ".p2alignl "; break;
+    case 8: llvm_unreachable("Unsupported alignment size!");
+    }
+
+    if (MAI.getAlignmentIsInBytes())
+      OS << ByteAlignment;
+    else
+      OS << Log2_32(ByteAlignment);
+
+    if (Value || MaxBytesToEmit) {
+      OS << ", 0x";
+      OS.write_hex(truncateToSize(Value, ValueSize));
+
+      if (MaxBytesToEmit)
+        OS << ", " << MaxBytesToEmit;
+    }
+    EmitEOL();
+    return;
+  }
+
+  // Non-power of two alignment.  This is not widely supported by assemblers.
+  // FIXME: Parameterize this based on MAI.
+  switch (ValueSize) {
+  default: llvm_unreachable("Invalid size for machine code value!");
+  case 1: OS << ".balign";  break;
+  case 2: OS << ".balignw"; break;
+  case 4: OS << ".balignl"; break;
+  case 8: llvm_unreachable("Unsupported alignment size!");
+  }
+
+  OS << ' ' << ByteAlignment;
+  OS << ", " << truncateToSize(Value, ValueSize);
+  if (MaxBytesToEmit)
+    OS << ", " << MaxBytesToEmit;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                         unsigned MaxBytesToEmit) {}
+
+void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                         unsigned char Value) {}
+
+
+void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
+  assert(MAI.hasSingleParameterDotFile());
+  OS << "\t.file\t";
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+}
+
+// FIXME: should we inherit from MCAsmStreamer?
+bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
+                                              StringRef Filename){
+  OS << "\t.file\t" << FileNo << ' ';
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
+
+void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+  // Show the encoding in a comment if we have a code emitter.
+  if (Emitter)
+    AddEncodingComment(Inst);
+
+  // Show the MCInst if enabled.
+  if (ShowInst) {
+    Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+    GetCommentOS() << "\n";
+  }
+
+  // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+  if (InstPrinter)
+    InstPrinter->printInst(&Inst, OS);
+  else
+    Inst.print(OS, &MAI);
+  EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file.  This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void PTXMCAsmStreamer::EmitRawText(StringRef String) {
+  if (!String.empty() && String.back() == '\n')
+    String = String.substr(0, String.size()-1);
+  OS << String;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::Finish() {}
+
+namespace llvm {
+  MCStreamer *createPTXAsmStreamer(MCContext &Context,
+                                   formatted_raw_ostream &OS,
+                                   bool isVerboseAsm, bool useLoc, bool useCFI,
+                                   MCInstPrinter *IP,
+                                   MCCodeEmitter *CE, TargetAsmBackend *TAB,
+                                   bool ShowInst) {
+    return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+                                IP, CE, ShowInst);
+  }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
new file mode 100644
index 000000000000..6fe9e6c3f657
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -0,0 +1,92 @@
+//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an information extractor for PTX machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-mf-info-extract"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "PTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXMFInfoExtract must after register allocation!
+
+namespace llvm {
+  /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
+  /// function information for PTXAsmPrinter
+  ///
+  class PTXMFInfoExtract : public MachineFunctionPass {
+    private:
+      static char ID;
+
+    public:
+      PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+        : MachineFunctionPass(ID) {}
+
+      virtual bool runOnMachineFunction(MachineFunction &MF);
+
+      virtual const char *getPassName() const {
+        return "PTX Machine Function Info Extractor";
+      }
+  }; // class PTXMFInfoExtract
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXMFInfoExtract::ID = 0;
+
+bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
+
+  DEBUG(dbgs()
+        << "PTX::NoRegister == " << PTX::NoRegister << "\n"
+        << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
+
+  DEBUG(for (unsigned reg = PTX::NoRegister + 1;
+             reg < PTX::NUM_TARGET_REGS; ++reg)
+          if (MRI.isPhysRegUsed(reg))
+            dbgs() << "Used Reg: " << reg << "\n";);
+
+  // FIXME: This is a slow linear scanning
+  for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
+    if (MRI.isPhysRegUsed(reg) &&
+        !MFI->isRetReg(reg) &&
+        (MFI->isKernel() || !MFI->isArgReg(reg)))
+      MFI->addLocalVarReg(reg);
+
+  // Notify MachineFunctionInfo that I've done adding local var reg
+  MFI->doneAddLocalVar();
+
+  DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+             i = MFI->argRegBegin(), e = MFI->argRegEnd();
+             i != e; ++i)
+        dbgs() << "Arg Reg: " << *i << "\n";);
+
+  DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+             i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
+             i != e; ++i)
+        dbgs() << "Local Var Reg: " << *i << "\n";);
+
+  return false;
+}
+
+FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
+                                           CodeGenOpt::Level OptLevel) {
+  return new PTXMFInfoExtract(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
new file mode 100644
index 000000000000..9d65f5bd1ade
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -0,0 +1,91 @@
+//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PTX-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MACHINE_FUNCTION_INFO_H
+#define PTX_MACHINE_FUNCTION_INFO_H
+
+#include "PTX.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private PTX target-specific information for each MachineFunction.
+///
+class PTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+  bool is_kernel;
+  std::vector<unsigned> reg_arg, reg_local_var;
+  std::vector<unsigned> reg_ret;
+  bool _isDoneAddArg;
+
+public:
+  PTXMachineFunctionInfo(MachineFunction &MF)
+    : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
+      reg_arg.reserve(8);
+      reg_local_var.reserve(32);
+    }
+
+  void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
+
+  void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
+  void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
+  void addRetReg(unsigned reg) {
+    if (!isRetReg(reg)) {
+      reg_ret.push_back(reg);
+    }
+  }
+
+  void doneAddArg(void) {
+    _isDoneAddArg = true;
+  }
+  void doneAddLocalVar(void) {}
+
+  bool isKernel() const { return is_kernel; }
+
+  typedef std::vector<unsigned>::const_iterator         reg_iterator;
+  typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
+  typedef std::vector<unsigned>::const_iterator         ret_iterator;
+
+  bool         argRegEmpty() const { return reg_arg.empty(); }
+  int          getNumArg() const { return reg_arg.size(); }
+  reg_iterator argRegBegin() const { return reg_arg.begin(); }
+  reg_iterator argRegEnd()   const { return reg_arg.end(); }
+  reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
+  reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
+
+  bool         localVarRegEmpty() const { return reg_local_var.empty(); }
+  reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
+  reg_iterator localVarRegEnd()   const { return reg_local_var.end(); }
+
+  bool         retRegEmpty() const { return reg_ret.empty(); }
+  int          getNumRet() const { return reg_ret.size(); }
+  ret_iterator retRegBegin() const { return reg_ret.begin(); }
+  ret_iterator retRegEnd()   const { return reg_ret.end(); }
+
+  bool isArgReg(unsigned reg) const {
+    return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
+  }
+
+  bool isRetReg(unsigned reg) const {
+    return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
+  }
+
+  bool isLocalVarReg(unsigned reg) const {
+    return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
+      != reg_local_var.end();
+  }
+}; // class PTXMachineFunctionInfo
+} // namespace llvm
+
+#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
new file mode 100644
index 000000000000..cb56ea98a2b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -0,0 +1,51 @@
+//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
+                                 const TargetInstrInfo &TII)
+  : PTXGenRegisterInfo() {
+}
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj,
+                                          RegScavenger *RS) const {
+  unsigned Index;
+  MachineInstr& MI = *II;
+
+  Index = 0;
+  while (!MI.getOperand(Index).isFI()) {
+    ++Index;
+    assert(Index < MI.getNumOperands() &&
+           "Instr does not have a FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(Index).getIndex();
+
+  DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
+  DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
+  DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+
+  // This frame index is post stack slot re-use assignments
+  MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
new file mode 100644
index 000000000000..0b63cb6d458e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
@@ -0,0 +1,65 @@
+//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_REGISTER_INFO_H
+#define PTX_REGISTER_INFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_HEADER
+#include "PTXGenRegisterInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+class MachineFunction;
+
+struct PTXRegisterInfo : public PTXGenRegisterInfo {
+  PTXRegisterInfo(PTXTargetMachine &TM,
+                  const TargetInstrInfo &TII);
+
+  virtual const unsigned
+    *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
+    static const unsigned CalleeSavedRegs[] = { 0 };
+    return CalleeSavedRegs; // save nothing
+  }
+
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+    BitVector Reserved(getNumRegs());
+    return Reserved; // reserve no regs
+  }
+
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                   int SPAdj,
+                                   RegScavenger *RS = NULL) const;
+
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const {
+    llvm_unreachable("PTX does not have a frame register");
+    return 0;
+  }
+
+  virtual unsigned getRARegister() const {
+    llvm_unreachable("PTX does not have a return address register");
+    return 0;
+  }
+
+  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
+    return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+  }
+  virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const {
+    return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
+  }
+}; // struct PTXRegisterInfo
+} // namespace llvm
+
+#endif // PTX_REGISTER_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
new file mode 100644
index 000000000000..1313d248325e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
@@ -0,0 +1,555 @@
+
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+  let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+///===- Predicate Registers -----------------------------------------------===//
+
+def P0 : PTXReg<"p0">;
+def P1 : PTXReg<"p1">;
+def P2 : PTXReg<"p2">;
+def P3 : PTXReg<"p3">;
+def P4 : PTXReg<"p4">;
+def P5 : PTXReg<"p5">;
+def P6 : PTXReg<"p6">;
+def P7 : PTXReg<"p7">;
+def P8 : PTXReg<"p8">;
+def P9 : PTXReg<"p9">;
+def P10 : PTXReg<"p10">;
+def P11 : PTXReg<"p11">;
+def P12 : PTXReg<"p12">;
+def P13 : PTXReg<"p13">;
+def P14 : PTXReg<"p14">;
+def P15 : PTXReg<"p15">;
+def P16 : PTXReg<"p16">;
+def P17 : PTXReg<"p17">;
+def P18 : PTXReg<"p18">;
+def P19 : PTXReg<"p19">;
+def P20 : PTXReg<"p20">;
+def P21 : PTXReg<"p21">;
+def P22 : PTXReg<"p22">;
+def P23 : PTXReg<"p23">;
+def P24 : PTXReg<"p24">;
+def P25 : PTXReg<"p25">;
+def P26 : PTXReg<"p26">;
+def P27 : PTXReg<"p27">;
+def P28 : PTXReg<"p28">;
+def P29 : PTXReg<"p29">;
+def P30 : PTXReg<"p30">;
+def P31 : PTXReg<"p31">;
+def P32 : PTXReg<"p32">;
+def P33 : PTXReg<"p33">;
+def P34 : PTXReg<"p34">;
+def P35 : PTXReg<"p35">;
+def P36 : PTXReg<"p36">;
+def P37 : PTXReg<"p37">;
+def P38 : PTXReg<"p38">;
+def P39 : PTXReg<"p39">;
+def P40 : PTXReg<"p40">;
+def P41 : PTXReg<"p41">;
+def P42 : PTXReg<"p42">;
+def P43 : PTXReg<"p43">;
+def P44 : PTXReg<"p44">;
+def P45 : PTXReg<"p45">;
+def P46 : PTXReg<"p46">;
+def P47 : PTXReg<"p47">;
+def P48 : PTXReg<"p48">;
+def P49 : PTXReg<"p49">;
+def P50 : PTXReg<"p50">;
+def P51 : PTXReg<"p51">;
+def P52 : PTXReg<"p52">;
+def P53 : PTXReg<"p53">;
+def P54 : PTXReg<"p54">;
+def P55 : PTXReg<"p55">;
+def P56 : PTXReg<"p56">;
+def P57 : PTXReg<"p57">;
+def P58 : PTXReg<"p58">;
+def P59 : PTXReg<"p59">;
+def P60 : PTXReg<"p60">;
+def P61 : PTXReg<"p61">;
+def P62 : PTXReg<"p62">;
+def P63 : PTXReg<"p63">;
+def P64 : PTXReg<"p64">;
+def P65 : PTXReg<"p65">;
+def P66 : PTXReg<"p66">;
+def P67 : PTXReg<"p67">;
+def P68 : PTXReg<"p68">;
+def P69 : PTXReg<"p69">;
+def P70 : PTXReg<"p70">;
+def P71 : PTXReg<"p71">;
+def P72 : PTXReg<"p72">;
+def P73 : PTXReg<"p73">;
+def P74 : PTXReg<"p74">;
+def P75 : PTXReg<"p75">;
+def P76 : PTXReg<"p76">;
+def P77 : PTXReg<"p77">;
+def P78 : PTXReg<"p78">;
+def P79 : PTXReg<"p79">;
+def P80 : PTXReg<"p80">;
+def P81 : PTXReg<"p81">;
+def P82 : PTXReg<"p82">;
+def P83 : PTXReg<"p83">;
+def P84 : PTXReg<"p84">;
+def P85 : PTXReg<"p85">;
+def P86 : PTXReg<"p86">;
+def P87 : PTXReg<"p87">;
+def P88 : PTXReg<"p88">;
+def P89 : PTXReg<"p89">;
+def P90 : PTXReg<"p90">;
+def P91 : PTXReg<"p91">;
+def P92 : PTXReg<"p92">;
+def P93 : PTXReg<"p93">;
+def P94 : PTXReg<"p94">;
+def P95 : PTXReg<"p95">;
+def P96 : PTXReg<"p96">;
+def P97 : PTXReg<"p97">;
+def P98 : PTXReg<"p98">;
+def P99 : PTXReg<"p99">;
+def P100 : PTXReg<"p100">;
+def P101 : PTXReg<"p101">;
+def P102 : PTXReg<"p102">;
+def P103 : PTXReg<"p103">;
+def P104 : PTXReg<"p104">;
+def P105 : PTXReg<"p105">;
+def P106 : PTXReg<"p106">;
+def P107 : PTXReg<"p107">;
+def P108 : PTXReg<"p108">;
+def P109 : PTXReg<"p109">;
+def P110 : PTXReg<"p110">;
+def P111 : PTXReg<"p111">;
+def P112 : PTXReg<"p112">;
+def P113 : PTXReg<"p113">;
+def P114 : PTXReg<"p114">;
+def P115 : PTXReg<"p115">;
+def P116 : PTXReg<"p116">;
+def P117 : PTXReg<"p117">;
+def P118 : PTXReg<"p118">;
+def P119 : PTXReg<"p119">;
+def P120 : PTXReg<"p120">;
+def P121 : PTXReg<"p121">;
+def P122 : PTXReg<"p122">;
+def P123 : PTXReg<"p123">;
+def P124 : PTXReg<"p124">;
+def P125 : PTXReg<"p125">;
+def P126 : PTXReg<"p126">;
+def P127 : PTXReg<"p127">;
+
+///===- 16-Bit Registers --------------------------------------------------===//
+
+def RH0 : PTXReg<"rh0">;
+def RH1 : PTXReg<"rh1">;
+def RH2 : PTXReg<"rh2">;
+def RH3 : PTXReg<"rh3">;
+def RH4 : PTXReg<"rh4">;
+def RH5 : PTXReg<"rh5">;
+def RH6 : PTXReg<"rh6">;
+def RH7 : PTXReg<"rh7">;
+def RH8 : PTXReg<"rh8">;
+def RH9 : PTXReg<"rh9">;
+def RH10 : PTXReg<"rh10">;
+def RH11 : PTXReg<"rh11">;
+def RH12 : PTXReg<"rh12">;
+def RH13 : PTXReg<"rh13">;
+def RH14 : PTXReg<"rh14">;
+def RH15 : PTXReg<"rh15">;
+def RH16 : PTXReg<"rh16">;
+def RH17 : PTXReg<"rh17">;
+def RH18 : PTXReg<"rh18">;
+def RH19 : PTXReg<"rh19">;
+def RH20 : PTXReg<"rh20">;
+def RH21 : PTXReg<"rh21">;
+def RH22 : PTXReg<"rh22">;
+def RH23 : PTXReg<"rh23">;
+def RH24 : PTXReg<"rh24">;
+def RH25 : PTXReg<"rh25">;
+def RH26 : PTXReg<"rh26">;
+def RH27 : PTXReg<"rh27">;
+def RH28 : PTXReg<"rh28">;
+def RH29 : PTXReg<"rh29">;
+def RH30 : PTXReg<"rh30">;
+def RH31 : PTXReg<"rh31">;
+def RH32 : PTXReg<"rh32">;
+def RH33 : PTXReg<"rh33">;
+def RH34 : PTXReg<"rh34">;
+def RH35 : PTXReg<"rh35">;
+def RH36 : PTXReg<"rh36">;
+def RH37 : PTXReg<"rh37">;
+def RH38 : PTXReg<"rh38">;
+def RH39 : PTXReg<"rh39">;
+def RH40 : PTXReg<"rh40">;
+def RH41 : PTXReg<"rh41">;
+def RH42 : PTXReg<"rh42">;
+def RH43 : PTXReg<"rh43">;
+def RH44 : PTXReg<"rh44">;
+def RH45 : PTXReg<"rh45">;
+def RH46 : PTXReg<"rh46">;
+def RH47 : PTXReg<"rh47">;
+def RH48 : PTXReg<"rh48">;
+def RH49 : PTXReg<"rh49">;
+def RH50 : PTXReg<"rh50">;
+def RH51 : PTXReg<"rh51">;
+def RH52 : PTXReg<"rh52">;
+def RH53 : PTXReg<"rh53">;
+def RH54 : PTXReg<"rh54">;
+def RH55 : PTXReg<"rh55">;
+def RH56 : PTXReg<"rh56">;
+def RH57 : PTXReg<"rh57">;
+def RH58 : PTXReg<"rh58">;
+def RH59 : PTXReg<"rh59">;
+def RH60 : PTXReg<"rh60">;
+def RH61 : PTXReg<"rh61">;
+def RH62 : PTXReg<"rh62">;
+def RH63 : PTXReg<"rh63">;
+def RH64 : PTXReg<"rh64">;
+def RH65 : PTXReg<"rh65">;
+def RH66 : PTXReg<"rh66">;
+def RH67 : PTXReg<"rh67">;
+def RH68 : PTXReg<"rh68">;
+def RH69 : PTXReg<"rh69">;
+def RH70 : PTXReg<"rh70">;
+def RH71 : PTXReg<"rh71">;
+def RH72 : PTXReg<"rh72">;
+def RH73 : PTXReg<"rh73">;
+def RH74 : PTXReg<"rh74">;
+def RH75 : PTXReg<"rh75">;
+def RH76 : PTXReg<"rh76">;
+def RH77 : PTXReg<"rh77">;
+def RH78 : PTXReg<"rh78">;
+def RH79 : PTXReg<"rh79">;
+def RH80 : PTXReg<"rh80">;
+def RH81 : PTXReg<"rh81">;
+def RH82 : PTXReg<"rh82">;
+def RH83 : PTXReg<"rh83">;
+def RH84 : PTXReg<"rh84">;
+def RH85 : PTXReg<"rh85">;
+def RH86 : PTXReg<"rh86">;
+def RH87 : PTXReg<"rh87">;
+def RH88 : PTXReg<"rh88">;
+def RH89 : PTXReg<"rh89">;
+def RH90 : PTXReg<"rh90">;
+def RH91 : PTXReg<"rh91">;
+def RH92 : PTXReg<"rh92">;
+def RH93 : PTXReg<"rh93">;
+def RH94 : PTXReg<"rh94">;
+def RH95 : PTXReg<"rh95">;
+def RH96 : PTXReg<"rh96">;
+def RH97 : PTXReg<"rh97">;
+def RH98 : PTXReg<"rh98">;
+def RH99 : PTXReg<"rh99">;
+def RH100 : PTXReg<"rh100">;
+def RH101 : PTXReg<"rh101">;
+def RH102 : PTXReg<"rh102">;
+def RH103 : PTXReg<"rh103">;
+def RH104 : PTXReg<"rh104">;
+def RH105 : PTXReg<"rh105">;
+def RH106 : PTXReg<"rh106">;
+def RH107 : PTXReg<"rh107">;
+def RH108 : PTXReg<"rh108">;
+def RH109 : PTXReg<"rh109">;
+def RH110 : PTXReg<"rh110">;
+def RH111 : PTXReg<"rh111">;
+def RH112 : PTXReg<"rh112">;
+def RH113 : PTXReg<"rh113">;
+def RH114 : PTXReg<"rh114">;
+def RH115 : PTXReg<"rh115">;
+def RH116 : PTXReg<"rh116">;
+def RH117 : PTXReg<"rh117">;
+def RH118 : PTXReg<"rh118">;
+def RH119 : PTXReg<"rh119">;
+def RH120 : PTXReg<"rh120">;
+def RH121 : PTXReg<"rh121">;
+def RH122 : PTXReg<"rh122">;
+def RH123 : PTXReg<"rh123">;
+def RH124 : PTXReg<"rh124">;
+def RH125 : PTXReg<"rh125">;
+def RH126 : PTXReg<"rh126">;
+def RH127 : PTXReg<"rh127">;
+
+///===- 32-Bit Registers --------------------------------------------------===//
+
+def R0 : PTXReg<"r0">;
+def R1 : PTXReg<"r1">;
+def R2 : PTXReg<"r2">;
+def R3 : PTXReg<"r3">;
+def R4 : PTXReg<"r4">;
+def R5 : PTXReg<"r5">;
+def R6 : PTXReg<"r6">;
+def R7 : PTXReg<"r7">;
+def R8 : PTXReg<"r8">;
+def R9 : PTXReg<"r9">;
+def R10 : PTXReg<"r10">;
+def R11 : PTXReg<"r11">;
+def R12 : PTXReg<"r12">;
+def R13 : PTXReg<"r13">;
+def R14 : PTXReg<"r14">;
+def R15 : PTXReg<"r15">;
+def R16 : PTXReg<"r16">;
+def R17 : PTXReg<"r17">;
+def R18 : PTXReg<"r18">;
+def R19 : PTXReg<"r19">;
+def R20 : PTXReg<"r20">;
+def R21 : PTXReg<"r21">;
+def R22 : PTXReg<"r22">;
+def R23 : PTXReg<"r23">;
+def R24 : PTXReg<"r24">;
+def R25 : PTXReg<"r25">;
+def R26 : PTXReg<"r26">;
+def R27 : PTXReg<"r27">;
+def R28 : PTXReg<"r28">;
+def R29 : PTXReg<"r29">;
+def R30 : PTXReg<"r30">;
+def R31 : PTXReg<"r31">;
+def R32 : PTXReg<"r32">;
+def R33 : PTXReg<"r33">;
+def R34 : PTXReg<"r34">;
+def R35 : PTXReg<"r35">;
+def R36 : PTXReg<"r36">;
+def R37 : PTXReg<"r37">;
+def R38 : PTXReg<"r38">;
+def R39 : PTXReg<"r39">;
+def R40 : PTXReg<"r40">;
+def R41 : PTXReg<"r41">;
+def R42 : PTXReg<"r42">;
+def R43 : PTXReg<"r43">;
+def R44 : PTXReg<"r44">;
+def R45 : PTXReg<"r45">;
+def R46 : PTXReg<"r46">;
+def R47 : PTXReg<"r47">;
+def R48 : PTXReg<"r48">;
+def R49 : PTXReg<"r49">;
+def R50 : PTXReg<"r50">;
+def R51 : PTXReg<"r51">;
+def R52 : PTXReg<"r52">;
+def R53 : PTXReg<"r53">;
+def R54 : PTXReg<"r54">;
+def R55 : PTXReg<"r55">;
+def R56 : PTXReg<"r56">;
+def R57 : PTXReg<"r57">;
+def R58 : PTXReg<"r58">;
+def R59 : PTXReg<"r59">;
+def R60 : PTXReg<"r60">;
+def R61 : PTXReg<"r61">;
+def R62 : PTXReg<"r62">;
+def R63 : PTXReg<"r63">;
+def R64 : PTXReg<"r64">;
+def R65 : PTXReg<"r65">;
+def R66 : PTXReg<"r66">;
+def R67 : PTXReg<"r67">;
+def R68 : PTXReg<"r68">;
+def R69 : PTXReg<"r69">;
+def R70 : PTXReg<"r70">;
+def R71 : PTXReg<"r71">;
+def R72 : PTXReg<"r72">;
+def R73 : PTXReg<"r73">;
+def R74 : PTXReg<"r74">;
+def R75 : PTXReg<"r75">;
+def R76 : PTXReg<"r76">;
+def R77 : PTXReg<"r77">;
+def R78 : PTXReg<"r78">;
+def R79 : PTXReg<"r79">;
+def R80 : PTXReg<"r80">;
+def R81 : PTXReg<"r81">;
+def R82 : PTXReg<"r82">;
+def R83 : PTXReg<"r83">;
+def R84 : PTXReg<"r84">;
+def R85 : PTXReg<"r85">;
+def R86 : PTXReg<"r86">;
+def R87 : PTXReg<"r87">;
+def R88 : PTXReg<"r88">;
+def R89 : PTXReg<"r89">;
+def R90 : PTXReg<"r90">;
+def R91 : PTXReg<"r91">;
+def R92 : PTXReg<"r92">;
+def R93 : PTXReg<"r93">;
+def R94 : PTXReg<"r94">;
+def R95 : PTXReg<"r95">;
+def R96 : PTXReg<"r96">;
+def R97 : PTXReg<"r97">;
+def R98 : PTXReg<"r98">;
+def R99 : PTXReg<"r99">;
+def R100 : PTXReg<"r100">;
+def R101 : PTXReg<"r101">;
+def R102 : PTXReg<"r102">;
+def R103 : PTXReg<"r103">;
+def R104 : PTXReg<"r104">;
+def R105 : PTXReg<"r105">;
+def R106 : PTXReg<"r106">;
+def R107 : PTXReg<"r107">;
+def R108 : PTXReg<"r108">;
+def R109 : PTXReg<"r109">;
+def R110 : PTXReg<"r110">;
+def R111 : PTXReg<"r111">;
+def R112 : PTXReg<"r112">;
+def R113 : PTXReg<"r113">;
+def R114 : PTXReg<"r114">;
+def R115 : PTXReg<"r115">;
+def R116 : PTXReg<"r116">;
+def R117 : PTXReg<"r117">;
+def R118 : PTXReg<"r118">;
+def R119 : PTXReg<"r119">;
+def R120 : PTXReg<"r120">;
+def R121 : PTXReg<"r121">;
+def R122 : PTXReg<"r122">;
+def R123 : PTXReg<"r123">;
+def R124 : PTXReg<"r124">;
+def R125 : PTXReg<"r125">;
+def R126 : PTXReg<"r126">;
+def R127 : PTXReg<"r127">;
+
+///===- 64-Bit Registers --------------------------------------------------===//
+
+def RD0 : PTXReg<"rd0">;
+def RD1 : PTXReg<"rd1">;
+def RD2 : PTXReg<"rd2">;
+def RD3 : PTXReg<"rd3">;
+def RD4 : PTXReg<"rd4">;
+def RD5 : PTXReg<"rd5">;
+def RD6 : PTXReg<"rd6">;
+def RD7 : PTXReg<"rd7">;
+def RD8 : PTXReg<"rd8">;
+def RD9 : PTXReg<"rd9">;
+def RD10 : PTXReg<"rd10">;
+def RD11 : PTXReg<"rd11">;
+def RD12 : PTXReg<"rd12">;
+def RD13 : PTXReg<"rd13">;
+def RD14 : PTXReg<"rd14">;
+def RD15 : PTXReg<"rd15">;
+def RD16 : PTXReg<"rd16">;
+def RD17 : PTXReg<"rd17">;
+def RD18 : PTXReg<"rd18">;
+def RD19 : PTXReg<"rd19">;
+def RD20 : PTXReg<"rd20">;
+def RD21 : PTXReg<"rd21">;
+def RD22 : PTXReg<"rd22">;
+def RD23 : PTXReg<"rd23">;
+def RD24 : PTXReg<"rd24">;
+def RD25 : PTXReg<"rd25">;
+def RD26 : PTXReg<"rd26">;
+def RD27 : PTXReg<"rd27">;
+def RD28 : PTXReg<"rd28">;
+def RD29 : PTXReg<"rd29">;
+def RD30 : PTXReg<"rd30">;
+def RD31 : PTXReg<"rd31">;
+def RD32 : PTXReg<"rd32">;
+def RD33 : PTXReg<"rd33">;
+def RD34 : PTXReg<"rd34">;
+def RD35 : PTXReg<"rd35">;
+def RD36 : PTXReg<"rd36">;
+def RD37 : PTXReg<"rd37">;
+def RD38 : PTXReg<"rd38">;
+def RD39 : PTXReg<"rd39">;
+def RD40 : PTXReg<"rd40">;
+def RD41 : PTXReg<"rd41">;
+def RD42 : PTXReg<"rd42">;
+def RD43 : PTXReg<"rd43">;
+def RD44 : PTXReg<"rd44">;
+def RD45 : PTXReg<"rd45">;
+def RD46 : PTXReg<"rd46">;
+def RD47 : PTXReg<"rd47">;
+def RD48 : PTXReg<"rd48">;
+def RD49 : PTXReg<"rd49">;
+def RD50 : PTXReg<"rd50">;
+def RD51 : PTXReg<"rd51">;
+def RD52 : PTXReg<"rd52">;
+def RD53 : PTXReg<"rd53">;
+def RD54 : PTXReg<"rd54">;
+def RD55 : PTXReg<"rd55">;
+def RD56 : PTXReg<"rd56">;
+def RD57 : PTXReg<"rd57">;
+def RD58 : PTXReg<"rd58">;
+def RD59 : PTXReg<"rd59">;
+def RD60 : PTXReg<"rd60">;
+def RD61 : PTXReg<"rd61">;
+def RD62 : PTXReg<"rd62">;
+def RD63 : PTXReg<"rd63">;
+def RD64 : PTXReg<"rd64">;
+def RD65 : PTXReg<"rd65">;
+def RD66 : PTXReg<"rd66">;
+def RD67 : PTXReg<"rd67">;
+def RD68 : PTXReg<"rd68">;
+def RD69 : PTXReg<"rd69">;
+def RD70 : PTXReg<"rd70">;
+def RD71 : PTXReg<"rd71">;
+def RD72 : PTXReg<"rd72">;
+def RD73 : PTXReg<"rd73">;
+def RD74 : PTXReg<"rd74">;
+def RD75 : PTXReg<"rd75">;
+def RD76 : PTXReg<"rd76">;
+def RD77 : PTXReg<"rd77">;
+def RD78 : PTXReg<"rd78">;
+def RD79 : PTXReg<"rd79">;
+def RD80 : PTXReg<"rd80">;
+def RD81 : PTXReg<"rd81">;
+def RD82 : PTXReg<"rd82">;
+def RD83 : PTXReg<"rd83">;
+def RD84 : PTXReg<"rd84">;
+def RD85 : PTXReg<"rd85">;
+def RD86 : PTXReg<"rd86">;
+def RD87 : PTXReg<"rd87">;
+def RD88 : PTXReg<"rd88">;
+def RD89 : PTXReg<"rd89">;
+def RD90 : PTXReg<"rd90">;
+def RD91 : PTXReg<"rd91">;
+def RD92 : PTXReg<"rd92">;
+def RD93 : PTXReg<"rd93">;
+def RD94 : PTXReg<"rd94">;
+def RD95 : PTXReg<"rd95">;
+def RD96 : PTXReg<"rd96">;
+def RD97 : PTXReg<"rd97">;
+def RD98 : PTXReg<"rd98">;
+def RD99 : PTXReg<"rd99">;
+def RD100 : PTXReg<"rd100">;
+def RD101 : PTXReg<"rd101">;
+def RD102 : PTXReg<"rd102">;
+def RD103 : PTXReg<"rd103">;
+def RD104 : PTXReg<"rd104">;
+def RD105 : PTXReg<"rd105">;
+def RD106 : PTXReg<"rd106">;
+def RD107 : PTXReg<"rd107">;
+def RD108 : PTXReg<"rd108">;
+def RD109 : PTXReg<"rd109">;
+def RD110 : PTXReg<"rd110">;
+def RD111 : PTXReg<"rd111">;
+def RD112 : PTXReg<"rd112">;
+def RD113 : PTXReg<"rd113">;
+def RD114 : PTXReg<"rd114">;
+def RD115 : PTXReg<"rd115">;
+def RD116 : PTXReg<"rd116">;
+def RD117 : PTXReg<"rd117">;
+def RD118 : PTXReg<"rd118">;
+def RD119 : PTXReg<"rd119">;
+def RD120 : PTXReg<"rd120">;
+def RD121 : PTXReg<"rd121">;
+def RD122 : PTXReg<"rd122">;
+def RD123 : PTXReg<"rd123">;
+def RD124 : PTXReg<"rd124">;
+def RD125 : PTXReg<"rd125">;
+def RD126 : PTXReg<"rd126">;
+def RD127 : PTXReg<"rd127">;
+
+//===----------------------------------------------------------------------===//
+//  Register classes
+//===----------------------------------------------------------------------===//
+def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
new file mode 100644
index 000000000000..8ec646e46f68
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
@@ -0,0 +1,66 @@
+//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXSubtarget.h"
+#include "PTX.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PTXGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, bool is64Bit)
+  : PTXGenSubtargetInfo(TT, CPU, FS),
+    PTXTarget(PTX_COMPUTE_1_0),
+    PTXVersion(PTX_VERSION_2_0),
+    SupportsDouble(false),
+    SupportsFMA(true),
+    Is64Bit(is64Bit) {
+  std::string TARGET = CPU;
+  if (TARGET.empty())
+    TARGET = "generic";
+  ParseSubtargetFeatures(TARGET, FS);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+  switch(PTXTarget) {
+    default: llvm_unreachable("Unknown PTX target");
+    case PTX_SM_1_0: return "sm_10";
+    case PTX_SM_1_1: return "sm_11";
+    case PTX_SM_1_2: return "sm_12";
+    case PTX_SM_1_3: return "sm_13";
+    case PTX_SM_2_0: return "sm_20";
+    case PTX_SM_2_1: return "sm_21";
+    case PTX_SM_2_2: return "sm_22";
+    case PTX_SM_2_3: return "sm_23";
+    case PTX_COMPUTE_1_0: return "compute_10";
+    case PTX_COMPUTE_1_1: return "compute_11";
+    case PTX_COMPUTE_1_2: return "compute_12";
+    case PTX_COMPUTE_1_3: return "compute_13";
+    case PTX_COMPUTE_2_0: return "compute_20";
+  }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+  switch(PTXVersion) {
+    default: llvm_unreachable("Unknown PTX version");
+    case PTX_VERSION_2_0: return "2.0";
+    case PTX_VERSION_2_1: return "2.1";
+    case PTX_VERSION_2_2: return "2.2";
+    case PTX_VERSION_2_3: return "2.3";
+  }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
new file mode 100644
index 000000000000..0921f1f22c49
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
@@ -0,0 +1,121 @@
+//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_SUBTARGET_H
+#define PTX_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PTXGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+  class PTXSubtarget : public PTXGenSubtargetInfo {
+    public:
+
+      /**
+       * Enumeration of Shader Models supported by the back-end.
+       */
+      enum PTXTargetEnum {
+        PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
+        PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
+        PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
+        PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
+        PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
+        PTX_LAST_COMPUTE,
+
+        PTX_SM_1_0, /*< Shader Model 1.0 */
+        PTX_SM_1_1, /*< Shader Model 1.1 */
+        PTX_SM_1_2, /*< Shader Model 1.2 */
+        PTX_SM_1_3, /*< Shader Model 1.3 */
+        PTX_SM_2_0, /*< Shader Model 2.0 */
+        PTX_SM_2_1, /*< Shader Model 2.1 */
+        PTX_SM_2_2, /*< Shader Model 2.2 */
+        PTX_SM_2_3, /*< Shader Model 2.3 */
+        PTX_LAST_SM
+      };
+
+      /**
+       * Enumeration of PTX versions supported by the back-end.
+       *
+       * Currently, PTX 2.0 is the minimum supported version.
+       */
+      enum PTXVersionEnum {
+        PTX_VERSION_2_0,  /*< PTX Version 2.0 */
+        PTX_VERSION_2_1,  /*< PTX Version 2.1 */
+        PTX_VERSION_2_2,  /*< PTX Version 2.2 */
+        PTX_VERSION_2_3   /*< PTX Version 2.3 */
+      };
+
+  private:
+
+      /// Shader Model supported on the target GPU.
+      PTXTargetEnum PTXTarget;
+
+      /// PTX Language Version.
+      PTXVersionEnum PTXVersion;
+
+      // The native .f64 type is supported on the hardware.
+      bool SupportsDouble;
+
+      // Support the fused-multiply add (FMA) and multiply-add (MAD)
+      // instructions
+      bool SupportsFMA;
+
+      // Use .u64 instead of .u32 for addresses.
+      bool Is64Bit;
+
+    public:
+
+      PTXSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS, bool is64Bit);
+
+      // Target architecture accessors
+      std::string getTargetString() const;
+
+      std::string getPTXVersionString() const;
+
+      bool supportsDouble() const { return SupportsDouble; }
+
+      bool is64Bit() const { return Is64Bit; }
+
+      bool supportsFMA() const { return SupportsFMA; }
+
+      bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
+      bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
+
+      bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
+
+      bool fdivNeedsRoundingMode() const {
+        return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+      bool fmadNeedsRoundingMode() const {
+        return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+      bool useParamSpaceForDeviceArgs() const {
+        return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+  }; // class PTXSubtarget
+} // namespace llvm
+
+#endif // PTX_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
new file mode 100644
index 000000000000..ab926e02d66f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
@@ -0,0 +1,87 @@
+//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+  MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+                                   bool isVerboseAsm, bool useLoc,
+                                   bool useCFI,
+                                   MCInstPrinter *InstPrint,
+                                   MCCodeEmitter *CE,
+                                   TargetAsmBackend *TAB,
+                                   bool ShowInst);
+}
+
+extern "C" void LLVMInitializePTXTarget() {
+
+  RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
+  RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
+
+  TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
+  TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
+}
+
+namespace {
+  const char* DataLayout32 =
+    "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+  const char* DataLayout64 =
+    "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+}
+
+// DataLayout and FrameLowering are filled with dummy data
+PTXTargetMachine::PTXTargetMachine(const Target &T,
+                                   const std::string &TT,
+                                   const std::string &CPU,
+                                   const std::string &FS,
+                                   bool is64Bit)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    DataLayout(is64Bit ? DataLayout64 : DataLayout32),
+    Subtarget(TT, CPU, FS, is64Bit),
+    FrameLowering(Subtarget),
+    InstrInfo(*this),
+    TLInfo(*this) {
+}
+
+PTX32TargetMachine::PTX32TargetMachine(const Target &T,
+                                       const std::string& TT,
+                                       const std::string& CPU,
+                                       const std::string& FS)
+  : PTXTargetMachine(T, TT, CPU, FS, false) {
+}
+
+PTX64TargetMachine::PTX64TargetMachine(const Target &T,
+                                       const std::string& TT,
+                                       const std::string& CPU,
+                                       const std::string& FS)
+  : PTXTargetMachine(T, TT, CPU, FS, true) {
+}
+
+bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createPTXISelDag(*this, OptLevel));
+  return false;
+}
+
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // PTXMFInfoExtract must after register allocation!
+  PM.add(createPTXMFInfoExtract(*this, OptLevel));
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
new file mode 100644
index 000000000000..ae4215325211
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
@@ -0,0 +1,77 @@
+//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_TARGET_MACHINE_H
+#define PTX_TARGET_MACHINE_H
+
+#include "PTXISelLowering.h"
+#include "PTXInstrInfo.h"
+#include "PTXFrameLowering.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PTXTargetMachine : public LLVMTargetMachine {
+  private:
+    const TargetData  DataLayout;
+    PTXSubtarget      Subtarget; // has to be initialized before FrameLowering
+    PTXFrameLowering  FrameLowering;
+    PTXInstrInfo      InstrInfo;
+    PTXTargetLowering TLInfo;
+
+  public:
+    PTXTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &CPU, const std::string &FS,
+                     bool is64Bit);
+
+    virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+    virtual const TargetFrameLowering *getFrameLowering() const {
+      return &FrameLowering;
+    }
+
+    virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+    virtual const TargetRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo(); }
+
+    virtual const PTXTargetLowering *getTargetLowering() const {
+      return &TLInfo; }
+
+    virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+    virtual bool addPostRegAlloc(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+}; // class PTXTargetMachine
+
+
+class PTX32TargetMachine : public PTXTargetMachine {
+public:
+
+  PTX32TargetMachine(const Target &T, const std::string &TT,
+                     const std::string& CPU, const std::string& FS);
+}; // class PTX32TargetMachine
+
+class PTX64TargetMachine : public PTXTargetMachine {
+public:
+
+  PTX64TargetMachine(const Target &T, const std::string &TT,
+                     const std::string& CPU, const std::string& FS);
+}; // class PTX32TargetMachine
+
+} // namespace llvm
+
+#endif // PTX_TARGET_MACHINE_H
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..4b09cf5ce099
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPTXInfo
+  PTXTargetInfo.cpp
+  )
+
+add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
new file mode 100644
index 000000000000..8619785889aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
new file mode 100644
index 000000000000..9df6c7567bd1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -0,0 +1,25 @@
+//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::ThePTX32Target;
+Target llvm::ThePTX64Target;
+
+extern "C" void LLVMInitializePTXTargetInfo() {
+  // see llvm/ADT/Triple.h
+  RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
+                                    "PTX (32-bit) [Experimental]");
+  RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
+                                    "PTX (64-bit) [Experimental]");
+}
diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py
new file mode 100755
index 000000000000..15286908961d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/generate-register-td.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+##===- generate-register-td.py --------------------------------*-python-*--===##
+##
+##                     The LLVM Compiler Infrastructure
+##
+## This file is distributed under the University of Illinois Open Source
+## License. See LICENSE.TXT for details.
+##
+##===----------------------------------------------------------------------===##
+##
+## This file describes the PTX register file generator.
+##
+##===----------------------------------------------------------------------===##
+
+from sys import argv, exit, stdout
+
+
+if len(argv) != 5:
+    print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
+    exit(1)
+
+try:
+    num_pred  = int(argv[1])
+    num_16bit = int(argv[2])
+    num_32bit = int(argv[3])
+    num_64bit = int(argv[4])
+except:
+    print('ERROR: Invalid integer parameter')
+    exit(1)
+
+## Print the register definition file
+td_file = open('PTXRegisterInfo.td', 'w')
+
+td_file.write('''
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+  let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print predicate registers
+td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
+for r in range(0, num_pred):
+    td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
+
+# Print 16-bit registers
+td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_16bit):
+    td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
+
+# Print 32-bit registers
+td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_32bit):
+    td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
+
+# Print 64-bit registers
+td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_64bit):
+    td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
+
+
+td_file.write('''
+//===----------------------------------------------------------------------===//
+//  Register classes
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print register classes
+
+td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
+td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
+td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+
+
+td_file.close()
+
+## Now write the PTXCallingConv.td file
+td_file = open('PTXCallingConv.td', 'w')
+
+# Reserve 10% of the available registers for return values, and the other 90%
+# for parameters
+num_ret_pred    = int(0.1 * num_pred)
+num_ret_16bit   = int(0.1 * num_16bit)
+num_ret_32bit   = int(0.1 * num_32bit)
+num_ret_64bit   = int(0.1 * num_64bit)
+num_param_pred  = num_pred - num_ret_pred
+num_param_16bit = num_16bit - num_ret_16bit
+num_param_32bit = num_32bit - num_ret_32bit
+num_param_64bit = num_64bit - num_ret_64bit
+
+param_regs_pred  = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
+ret_regs_pred    = ['P%d' % i for i in range(0, num_ret_pred)]
+param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
+ret_regs_16bit   = ['RH%d' % i for i in range(0, num_ret_16bit)]
+param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
+ret_regs_32bit   = ['R%d' % i for i in range(0, num_ret_32bit)]
+param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
+ret_regs_64bit   = ['RD%d' % i for i in range(0, num_ret_64bit)]
+
+param_list_pred  = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
+ret_list_pred    = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
+param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
+ret_list_16bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
+param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
+ret_list_32bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
+param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
+ret_list_64bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
+
+td_file.write('''
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i16],     CCAssignToReg<[%s]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i16],     CCAssignToReg<[%s]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
+       ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
+
+
+td_file.close()
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..389ea7742b06
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCAsmPrinter
+  PPCInstPrinter.cpp
+  )
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
new file mode 100644
index 000000000000..f097e84248ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmPrinter
+
+# Hack: we need to include 'main' powerpc target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 000000000000..1a9bd761359c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,295 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PPCInstPrinter.h"
+#include "PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PPCGenAsmWriter.inc"
+
+StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
+}
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  // Check for slwi/srwi mnemonics.
+  if (MI->getOpcode() == PPC::RLWINM) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char MB = MI->getOperand(3).getImm();
+    unsigned char ME = MI->getOperand(4).getImm();
+    bool useSubstituteMnemonic = false;
+    if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+      O << "\tslwi "; useSubstituteMnemonic = true;
+    }
+    if (SH <= 31 && MB == (32-SH) && ME == 31) {
+      O << "\tsrwi "; useSubstituteMnemonic = true;
+      SH = 32-SH;
+    }
+    if (useSubstituteMnemonic) {
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+      return;
+    }
+  }
+  
+  if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+      MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+    O << "\tmr ";
+    printOperand(MI, 0, O);
+    O << ", ";
+    printOperand(MI, 1, O);
+    return;
+  }
+  
+  if (MI->getOpcode() == PPC::RLDICR) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char ME = MI->getOperand(3).getImm();
+    // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+    if (63-SH == ME) {
+      O << "\tsldi ";
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+      return;
+    }
+  }
+  
+  printInstruction(MI, O);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O, 
+                                           const char *Modifier) {
+  assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+  unsigned Code = MI->getOperand(OpNo).getImm();
+  if (StringRef(Modifier) == "cc") {
+    switch ((PPC::Predicate)Code) {
+    default: assert(0 && "Invalid predicate");
+    case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+    case PPC::PRED_LT: O << "lt"; return;
+    case PPC::PRED_LE: O << "le"; return;
+    case PPC::PRED_EQ: O << "eq"; return;
+    case PPC::PRED_GE: O << "ge"; return;
+    case PPC::PRED_GT: O << "gt"; return;
+    case PPC::PRED_NE: O << "ne"; return;
+    case PPC::PRED_UN: O << "un"; return;
+    case PPC::PRED_NU: O << "nu"; return;
+    }
+  }
+  
+  assert(StringRef(Modifier) == "reg" &&
+         "Need to specify 'cc' or 'reg' as predicate op modifier!");
+  // Don't print the register for 'always'.
+  if (Code == PPC::PRED_ALWAYS) return;
+  printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  char Value = MI->getOperand(OpNo).getImm();
+  Value = (Value << (32-5)) >> (32-5);
+  O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned char Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 31 && "Invalid u5imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned char Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 63 && "Invalid u6imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  O << (short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  O << (unsigned short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    O << (short)(MI->getOperand(OpNo).getImm()*4);
+  else
+    printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (!MI->getOperand(OpNo).isImm())
+    return printOperand(MI, OpNo, O);
+
+  // Branches can take an immediate operand.  This is used by the branch
+  // selection pass to print $+8, an eight byte displacement from the PC.
+  O << "$+";
+  printAbsAddrOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  unsigned CCReg = MI->getOperand(OpNo).getReg();
+  unsigned RegNo;
+  switch (CCReg) {
+  default: assert(0 && "Unknown CR register");
+  case PPC::CR0: RegNo = 0; break;
+  case PPC::CR1: RegNo = 1; break;
+  case PPC::CR2: RegNo = 2; break;
+  case PPC::CR3: RegNo = 3; break;
+  case PPC::CR4: RegNo = 4; break;
+  case PPC::CR5: RegNo = 5; break;
+  case PPC::CR6: RegNo = 6; break;
+  case PPC::CR7: RegNo = 7; break;
+  }
+  O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  printSymbolLo(MI, OpNo, O);
+  O << '(';
+  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo+1, O);
+  O << ')';
+}
+
+void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    printS16X4ImmOperand(MI, OpNo, O);
+  else
+    printSymbolLo(MI, OpNo, O);
+  O << '(';
+  
+  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo+1, O);
+  O << ')';
+}
+
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  // When used as the base register, r0 reads constant zero rather than
+  // the value contained in the register.  For this reason, the darwin
+  // assembler requires that we print r0 as 0 (no r) when used as the base.
+  if (MI->getOperand(OpNo).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo, O);
+  O << ", ";
+  printOperand(MI, OpNo+1, O);
+}
+
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left.  Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+  switch (RegName[0]) {
+  case 'r':
+  case 'f':
+  case 'v': return RegName + 1;
+  case 'c': if (RegName[1] == 'r') return RegName + 2;
+  }
+  
+  return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    const char *RegName = getRegisterName(Op.getReg());
+    // The linux and AIX assembler does not take register prefixes.
+    if (!isDarwinSyntax())
+      RegName = stripRegisterPrefix(RegName);
+    
+    O << RegName;
+    return;
+  }
+  
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+  
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  O << *Op.getExpr();
+}
+  
+void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    return printS16ImmOperand(MI, OpNo, O);
+  
+  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
+  if (MI->getOperand(OpNo).isExpr() &&
+      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+    O << "lo16(";
+    printOperand(MI, OpNo, O);
+    O << ')';
+  } else {
+    printOperand(MI, OpNo, O);
+  }
+}
+
+void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    return printS16ImmOperand(MI, OpNo, O);
+
+  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
+  if (MI->getOperand(OpNo).isExpr() &&
+      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+    O << "ha16(";
+    printOperand(MI, OpNo, O);
+    O << ')';
+  } else {
+    printOperand(MI, OpNo, O);
+  }
+}
+
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 000000000000..d022a4496e84
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,71 @@
+//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+  // 0 -> AIX, 1 -> Darwin.
+  unsigned SyntaxVariant;
+public:
+  PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+    : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+  
+  bool isDarwinSyntax() const {
+    return SyntaxVariant == 1;
+  }
+  
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  
+  static const char *getInstructionName(unsigned Opcode);
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                             raw_ostream &O, const char *Modifier);
+
+
+  void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  
+  // FIXME: Remove
+  void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..a1b81662115a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPowerPCDesc
+  PPCMCTargetDesc.cpp
+  PPCMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..9db66622cced
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
new file mode 100644
index 000000000000..b6dca835b18d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -0,0 +1,62 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+using namespace llvm;
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+  if (is64Bit)
+    PointerSize = 8;
+  IsLittleEndian = false;
+
+  PCSymbol = ".";
+  CommentString = ";";
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
+
+  AssemblerDialect = 1;           // New-Style mnemonics.
+  SupportsDebugInformation= true; // Debug information.
+}
+
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
+  CommentString = "#";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  
+  // Uses '.section' before '.bss' directive
+  UsesELFSectionDirectiveForBSS = true;  
+
+  // Debug Information
+  SupportsDebugInformation = true;
+
+  PCSymbol = ".";
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+
+  // Exceptions handling
+  if (!is64Bit)
+    ExceptionsType = ExceptionHandling::DwarfCFI;
+    
+  ZeroDirective = "\t.space\t";
+  Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+  HasLCOMMDirective = true;
+  AssemblerDialect = 0;           // Old-Style mnemonics.
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
new file mode 100644
index 000000000000..96ae6fbba0e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+  struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit PPCMCAsmInfoDarwin(bool is64Bit);
+  };
+
+  struct PPCLinuxMCAsmInfo : public MCAsmInfo {
+    explicit PPCLinuxMCAsmInfo(bool is64Bit);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 000000000000..02b887f4d5dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,70 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "PPCMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitPPCMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitPPCMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
+                                          createPPCMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
+                                          createPPCMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+  if (TheTriple.isOSDarwin())
+    return new PPCMCAsmInfoDarwin(isPPC64);
+  return new PPCLinuxMCAsmInfo(isPPC64);
+  
+}
+
+extern "C" void LLVMInitializePowerPCMCAsmInfo() {
+  RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
+  RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo);  
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 000000000000..cee235097a0a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCTARGETDESC_H
+#define PPCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+  
+} // End llvm namespace
+
+// Defines symbolic names for PowerPC registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
new file mode 100644
index 000000000000..7191dd105f3c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,85 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+  class PPCTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+  class JITCodeEmitter;
+  class Target;
+  class MachineInstr;
+  class AsmPrinter;
+  class MCInst;
+  class MCCodeEmitter;
+  class MCContext;
+  class MCInstrInfo;
+  class MCSubtargetInfo;
+  class TargetMachine;
+  class TargetAsmBackend;
+  
+  FunctionPass *createPPCBranchSelectionPass();
+  FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+  FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+                                            JITCodeEmitter &MCE);
+  MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+                                        const MCSubtargetInfo &STI,
+                                        MCContext &Ctx);
+  TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
+  
+  void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                    AsmPrinter &AP, bool isDarwin);
+  
+  namespace PPCII {
+    
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // PPC Specific MachineOperand flags.
+    MO_NO_FLAG,
+    
+    /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$stub" symbol.  This is used for calls
+    /// and jumps to external functions on Tiger and earlier.
+    MO_DARWIN_STUB = 1,
+    
+    /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+    MO_LO16 = 4, MO_HA16 = 8,
+
+    /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+    /// the function's picbase, e.g. lo16(symbol-picbase).
+    MO_PIC_FLAG = 16,
+
+    /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+    /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+    MO_NLP_FLAG = 32,
+    
+    /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+    /// symbol with hidden visibility.  This causes a different kind of
+    /// non-lazy-pointer to be generated.
+    MO_NLP_HIDDEN_FLAG = 64
+  };
+  } // end namespace PPCII
+  
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
new file mode 100644
index 000000000000..aabf494012e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,112 @@
+//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+ 
+//===----------------------------------------------------------------------===//
+// CPU Directives                                                             //
+//===----------------------------------------------------------------------===//
+
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+
+def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
+                                        "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+                              "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec   : SubtargetFeature<"altivec","HasAltivec", "true",
+                                        "Enable Altivec instructions">;
+def FeatureGPUL      : SubtargetFeature<"gpul","IsGigaProcessor", "true",
+                                        "Enable GPUL instructions">;
+def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+                                        "Enable the fsqrt instruction">; 
+def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+                                        "Enable the stfiwx instruction">; 
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603]>;
+def : Processor<"603e", G3Itineraries, [Directive603]>;
+def : Processor<"603ev", G3Itineraries, [Directive603]>;
+def : Processor<"604", G3Itineraries, [Directive604]>;
+def : Processor<"604e", G3Itineraries, [Directive604]>;
+def : Processor<"620", G3Itineraries, [Directive620]>;
+def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"970", G5Itineraries,
+                  [Directive970, FeatureAltivec,
+                   FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"g5", G5Itineraries,
+                  [Directive970, FeatureAltivec,
+                   FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc64", G5Itineraries,
+                  [Directive64, FeatureAltivec,
+                   FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+  let isLittleEndianEncoding = 1;
+}
+
+def PPCAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+def PPC : Target {
+  // Information about the instructions.
+  let InstructionSet = PPCInstrInfo;
+  
+  let AssemblyWriters = [PPCAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp
new file mode 100644
index 000000000000..4b8cbb711833
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -0,0 +1,123 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "PPC.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) {}
+};
+
+class PPCAsmBackend : public TargetAsmBackend {
+const Target &TheTarget;
+public:
+  PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {}
+
+  unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+      // name                    offset  bits  flags
+      { "fixup_ppc_br24",        6,      24,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_lo16",        16,     16,   0 },
+      { "fixup_ppc_ha16",        16,     16,   0 },
+      { "fixup_ppc_lo14",        16,     14,   0 }
+    };
+  
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+  
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+  
+  bool MayNeedRelaxation(const MCInst &Inst) const {
+    // FIXME.
+    return false;
+  }
+  
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+    // FIXME.
+    assert(0 && "RelaxInstruction() unimplemented");
+  }
+  
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+    // FIXME: Zero fill for now. That's not right, but at least will get the
+    // section size right.
+    for (uint64_t i = 0; i != Count; ++i)
+      OW->Write8(0);
+    return true;
+  }      
+  
+  unsigned getPointerSize() const {
+    StringRef Name = TheTarget.getName();
+    if (Name == "ppc64") return 8;
+    assert(Name == "ppc32" && "Unknown target name!");
+    return 4;
+  }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+  class DarwinPPCAsmBackend : public PPCAsmBackend {
+  public:
+    DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+    
+    void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                    uint64_t Value) const {
+      assert(0 && "UNIMP");
+    }
+    
+    MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+      bool is64 = getPointerSize() == 8;
+      return createMachObjectWriter(new PPCMachObjectWriter(
+                                      /*Is64Bit=*/is64,
+                                      (is64 ? object::mach::CTM_PowerPC64 :
+                                       object::mach::CTM_PowerPC),
+                                      object::mach::CSPPC_ALL),
+                                    OS, /*IsLittleEndian=*/false);
+    }
+    
+    virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+      return false;
+    }
+  };
+} // end anonymous namespace
+
+
+
+
+TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  if (Triple(TT).isOSDarwin())
+    return new DarwinPPCAsmBackend(T);
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 000000000000..9de2200296e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,696 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "InstPrinter/PPCInstPrinter.h"
+using namespace llvm;
+
+namespace {
+  class PPCAsmPrinter : public AsmPrinter {
+  protected:
+    DenseMap<MCSymbol*, MCSymbol*> TOC;
+    const PPCSubtarget &Subtarget;
+    uint64_t TOCLabelID;
+  public:
+    explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer),
+        Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+
+    virtual const char *getPassName() const {
+      return "PowerPC Assembly Printer";
+    }
+
+
+    virtual void EmitInstruction(const MachineInstr *MI);
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+    MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+      MachineLocation Location;
+      assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+      // Frame address.  Currently handles register +- offset only.
+      if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+        Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+      else {
+        DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+      }
+      return Location;
+    }
+  };
+
+  /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+  class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+  public:
+    explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : PPCAsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Linux PPC Assembly Printer";
+    }
+
+    bool doFinalization(Module &M);
+
+    virtual void EmitFunctionEntryLabel();
+  };
+
+  /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+  /// OS X
+  class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+  public:
+    explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : PPCAsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Darwin PPC Assembly Printer";
+    }
+
+    bool doFinalization(Module &M);
+    void EmitStartOfAsmFile(Module &M);
+
+    void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+  };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left.  Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+  switch (RegName[0]) {
+    case 'r':
+    case 'f':
+    case 'v': return RegName + 1;
+    case 'c': if (RegName[1] == 'r') return RegName + 2;
+  }
+  
+  return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register: {
+    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+    // Linux assembler (Others?) does not take register mnemonics.
+    // FIXME - What about special registers used in mfspr/mtspr?
+    if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+    O << RegName;
+    return;
+  }
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    // FIXME: PIC relocation model
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_BlockAddress:
+    O << *GetBlockAddressSymbol(MO.getBlockAddress());
+    return;
+  case MachineOperand::MO_ExternalSymbol: {
+    // Computing the address of an external symbol, not calling it.
+    if (TM.getRelocationModel() == Reloc::Static) {
+      O << *GetExternalSymbolSymbol(MO.getSymbolName());
+      return;
+    }
+
+    MCSymbol *NLPSym = 
+      OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
+                                   MO.getSymbolName()+"$non_lazy_ptr");
+    MachineModuleInfoImpl::StubValueTy &StubSym = 
+      MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
+    if (StubSym.getPointer() == 0)
+      StubSym = MachineModuleInfoImpl::
+        StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
+    
+    O << *NLPSym;
+    return;
+  }
+  case MachineOperand::MO_GlobalAddress: {
+    // Computing the address of a global symbol, not calling it.
+    const GlobalValue *GV = MO.getGlobal();
+    MCSymbol *SymToPrint;
+
+    // External or weakly linked global variables need non-lazily-resolved stubs
+    if (TM.getRelocationModel() != Reloc::Static &&
+        (GV->isDeclaration() || GV->isWeakForLinker())) {
+      if (!GV->hasHiddenVisibility()) {
+        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
+          MMI->getObjFileInfo<MachineModuleInfoMachO>()
+            .getGVStubEntry(SymToPrint);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+      } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+                 GV->hasAvailableExternallyLinkage()) {
+        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
+          MMI->getObjFileInfo<MachineModuleInfoMachO>().
+                    getHiddenGVStubEntry(SymToPrint);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+      } else {
+        SymToPrint = Mang->getSymbol(GV);
+      }
+    } else {
+      SymToPrint = Mang->getSymbol(GV);
+    }
+    
+    O << *SymToPrint;
+
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'c': // Don't print "$" before a global var name or constant.
+      break; // PPC never has a prefix.
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    case 'I':
+      // Write 'i' if an integer constant, otherwise nothing.  Used to print
+      // addi vs add, etc.
+      if (MI->getOperand(OpNo).isImm())
+        O << "i";
+      return false;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                          unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  assert(MI->getOperand(OpNo).isReg());
+  O << "0(";
+  printOperand(MI, OpNo, O);
+  O << ")";
+  return false;
+}
+
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MCInst TmpInst;
+  
+  // Lower multi-instruction pseudo operations.
+  switch (MI->getOpcode()) {
+  default: break;
+  case TargetOpcode::DBG_VALUE: {
+    if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
+      
+    SmallString<32> Str;
+    raw_svector_ostream O(Str);
+    unsigned NOps = MI->getNumOperands();
+    assert(NOps==4);
+    O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason.
+    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+    O << V.getName();
+    O << " <- ";
+    // Frame address.  Currently handles register +- offset only.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+    O << ']';
+    O << "+";
+    printOperand(MI, NOps-2, O);
+    OutStreamer.EmitRawText(O.str());
+    return;
+  }
+      
+  case PPC::MovePCtoLR:
+  case PPC::MovePCtoLR8: {
+    // Transform %LR = MovePCtoLR
+    // Into this, where the label is the PIC base: 
+    //     bl L1$pb
+    // L1$pb:
+    MCSymbol *PICBase = MF->getPICBaseSymbol();
+    
+    // Emit the 'bl'.
+    TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
+    
+    
+    // FIXME: We would like an efficient form for this, so we don't have to do
+    // a lot of extra uniquing.
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
+                                             Create(PICBase, OutContext)));
+    OutStreamer.EmitInstruction(TmpInst);
+    
+    // Emit the label.
+    OutStreamer.EmitLabel(PICBase);
+    return;
+  }
+  case PPC::LDtoc: {
+    // Transform %X3 = LDtoc <ga:@min1>, %X2
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+      
+    // Change the opcode to LD, and the global address operand to be a
+    // reference to the TOC entry we will synthesize later.
+    TmpInst.setOpcode(PPC::LD);
+    const MachineOperand &MO = MI->getOperand(1);
+    assert(MO.isGlobal());
+      
+    // Map symbol -> label of TOC entry.
+    MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+    if (TOCEntry == 0)
+      TOCEntry = GetTempSymbol("C", TOCLabelID++);
+      
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+      
+  case PPC::MFCRpseud:
+    // Transform: %R3 = MFCRpseud %CR7
+    // Into:      %R3 = MFCR      ;; cr7
+    OutStreamer.AddComment(PPCInstPrinter::
+                           getRegisterName(MI->getOperand(1).getReg()));
+    TmpInst.setOpcode(PPC::MFCR);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+
+  LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+  if (!Subtarget.isPPC64())  // linux/ppc32 - Normal entry label.
+    return AsmPrinter::EmitFunctionEntryLabel();
+    
+  // Emit an official procedure descriptor.
+  // FIXME 64-bit SVR4: Use MCSection here!
+  OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
+  OutStreamer.EmitRawText(StringRef("\t.align 3"));
+  OutStreamer.EmitLabel(CurrentFnSym);
+  OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
+                          ",.TOC.@tocbase");
+  OutStreamer.EmitRawText(StringRef("\t.previous"));
+  OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
+}
+
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+  const TargetData *TD = TM.getTargetData();
+
+  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+  if (isPPC64 && !TOC.empty()) {
+    // FIXME 64-bit SVR4: Use MCSection here?
+    OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
+
+    // FIXME: This is nondeterminstic!
+    for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+         E = TOC.end(); I != E; ++I) {
+      OutStreamer.EmitLabel(I->second);
+      OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
+                              "[TC]," + I->first->getName());
+    }
+  }
+
+  return AsmPrinter::doFinalization(M);
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  static const char *const CPUDirectives[] = {
+    "",
+    "ppc",
+    "ppc601",
+    "ppc602",
+    "ppc603",
+    "ppc7400",
+    "ppc750",
+    "ppc970",
+    "ppc64"
+  };
+
+  unsigned Directive = Subtarget.getDarwinDirective();
+  if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+    Directive = PPC::DIR_970;
+  if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+    Directive = PPC::DIR_7400;
+  if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+    Directive = PPC::DIR_64;
+  assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+  
+  // FIXME: This is a total hack, finish mc'izing the PPC backend.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+
+  // Prime text sections so they are adjacent.  This reduces the likelihood a
+  // large data or debug section causes a branch to exceed 16M limit.
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    OutStreamer.SwitchSection(
+           OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      32, SectionKind::getText()));
+  } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+    OutStreamer.SwitchSection(
+           OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      16, SectionKind::getText()));
+  }
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+  // Remove $stub suffix, add $lazy_ptr.
+  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
+  TmpStr += "$lazy_ptr";
+  return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+  // Add $tmp suffix to $stub, yielding $stub$tmp.
+  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
+  TmpStr += "$tmp";
+  return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+  
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+  // .lazy_symbol_pointer
+  const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+  
+  // Output stubs for dynamically-linked functions
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    const MCSection *StubSection = 
+    OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               32, SectionKind::getText());
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      OutStreamer.SwitchSection(StubSection);
+      EmitAlignment(4);
+      
+      MCSymbol *Stub = Stubs[i].first;
+      MCSymbol *RawSym = Stubs[i].second.getPointer();
+      MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+      MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+                                           
+      OutStreamer.EmitLabel(Stub);
+      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      // FIXME: MCize this.
+      OutStreamer.EmitRawText(StringRef("\tmflr r0"));
+      OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+      OutStreamer.EmitLabel(AnonSymbol);
+      OutStreamer.EmitRawText(StringRef("\tmflr r11"));
+      OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
+                              "-" + AnonSymbol->getName() + ")");
+      OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
+      
+      if (isPPC64)
+        OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+                                "-" + AnonSymbol->getName() + ")(r11)");
+      else
+        OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+                                "-" + AnonSymbol->getName() + ")(r11)");
+      OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+      OutStreamer.EmitRawText(StringRef("\tbctr"));
+      
+      OutStreamer.SwitchSection(LSPSection);
+      OutStreamer.EmitLabel(LazyPtr);
+      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      
+      if (isPPC64)
+        OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+      else
+        OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+    }
+    OutStreamer.AddBlankLine();
+    return;
+  }
+  
+  const MCSection *StubSection =
+    OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               16, SectionKind::getText());
+  for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+    MCSymbol *Stub = Stubs[i].first;
+    MCSymbol *RawSym = Stubs[i].second.getPointer();
+    MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+
+    OutStreamer.SwitchSection(StubSection);
+    EmitAlignment(4);
+    OutStreamer.EmitLabel(Stub);
+    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
+    if (isPPC64)
+      OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+                              ")(r11)");
+    else
+      OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+                              ")(r11)");
+    OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+    OutStreamer.EmitRawText(StringRef("\tbctr"));
+    OutStreamer.SwitchSection(LSPSection);
+    OutStreamer.EmitLabel(LazyPtr);
+    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    
+    if (isPPC64)
+      OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+    else
+      OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+  }
+  
+  OutStreamer.AddBlankLine();
+}
+
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+
+  // Darwin/PPC always uses mach-o.
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  MachineModuleInfoMachO &MMIMacho =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  
+  MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+  if (!Stubs.empty())
+    EmitFunctionStubs(Stubs);
+
+  if (MAI->doesSupportExceptionHandling() && MMI) {
+    // Add the (possibly multiple) personalities to the set of global values.
+    // Only referenced functions get into the Personalities list.
+    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
+         E = Personalities.end(); I != E; ++I) {
+      if (*I) {
+        MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+          MMIMacho.getGVStubEntry(NLPSym);
+        StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+      }
+    }
+  }
+
+  // Output stubs for dynamically-linked functions.
+  Stubs = MMIMacho.GetGVStubList();
+  
+  // Output macho stubs for external and common global variables.
+  if (!Stubs.empty()) {
+    // Switch with ".non_lazy_symbol_pointer" directive.
+    OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .indirect_symbol _foo
+      MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+      OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+      if (MCSym.getInt())
+        // External to current translation unit.
+        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+      else
+        // Internal to current translation unit.
+        //
+        // When we place the LSDA into the TEXT section, the type info pointers
+        // need to be indirect and pc-rel. We accomplish this by using NLPs.
+        // However, sometimes the types are local to the file. So we need to
+        // fill in the value for the NLP in those cases.
+        OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                      OutContext),
+                              isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+    }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
+  }
+
+  Stubs = MMIMacho.GetHiddenGVStubList();
+  if (!Stubs.empty()) {
+    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .long _foo
+      OutStreamer.EmitValue(MCSymbolRefExpr::
+                            Create(Stubs[i].second.getPointer(),
+                                   OutContext),
+                            isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+    }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
+  }
+
+  // Funny Darwin hack: This flag tells the linker that no global symbols
+  // contain code that falls through to other global symbols (e.g. the obvious
+  // implementation of multiple entry points).  If this doesn't occur, the
+  // linker can safely perform dead code stripping.  Since LLVM never generates
+  // code that does this, it is always safe to set.
+  OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+  return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
+                                           MCStreamer &Streamer) {
+  const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+  if (Subtarget->isDarwin())
+    return new PPCDarwinAsmPrinter(tm, Streamer);
+  return new PPCLinuxAsmPrinter(tm, Streamer);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI) {
+  return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() { 
+  TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+  
+  TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 000000000000..e161d23600e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,174 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block.  It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass.  This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCPredicates.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+  struct PPCBSel : public MachineFunctionPass {
+    static char ID;
+    PPCBSel() : MachineFunctionPass(ID) {}
+
+    /// BlockSizes - The sizes of the basic blocks in the function.
+    std::vector<unsigned> BlockSizes;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "PowerPC Branch Selector";
+    }
+  };
+  char PPCBSel::ID = 0;
+}
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+  return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+  const PPCInstrInfo *TII =
+                static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+  // Give the blocks of the function a dense, in-order, numbering.
+  Fn.RenumberBlocks();
+  BlockSizes.resize(Fn.getNumBlockIDs());
+
+  // Measure each MBB and compute a size for the entire function.
+  unsigned FuncSize = 0;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+
+    unsigned BlockSize = 0;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+         MBBI != EE; ++MBBI)
+      BlockSize += TII->GetInstSizeInBytes(MBBI);
+    
+    BlockSizes[MBB->getNumber()] = BlockSize;
+    FuncSize += BlockSize;
+  }
+  
+  // If the entire function is smaller than the displacement of a branch field,
+  // we know we don't need to shrink any branches in this function.  This is a
+  // common case.
+  if (FuncSize < (1 << 15)) {
+    BlockSizes.clear();
+    return false;
+  }
+  
+  // For each conditional branch, if the offset to its destination is larger
+  // than the offset field allows, transform it into a long branch sequence
+  // like this:
+  //   short branch:
+  //     bCC MBB
+  //   long branch:
+  //     b!CC $PC+8
+  //     b MBB
+  //
+  bool MadeChange = true;
+  bool EverMadeChange = false;
+  while (MadeChange) {
+    // Iteratively expand branches until we reach a fixed point.
+    MadeChange = false;
+  
+    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+         ++MFI) {
+      MachineBasicBlock &MBB = *MFI;
+      unsigned MBBStartOffset = 0;
+      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+           I != E; ++I) {
+        if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+          MBBStartOffset += TII->GetInstSizeInBytes(I);
+          continue;
+        }
+        
+        // Determine the offset from the current branch to the destination
+        // block.
+        MachineBasicBlock *Dest = I->getOperand(2).getMBB();
+        
+        int BranchSize;
+        if (Dest->getNumber() <= MBB.getNumber()) {
+          // If this is a backwards branch, the delta is the offset from the
+          // start of this block to this branch, plus the sizes of all blocks
+          // from this block to the dest.
+          BranchSize = MBBStartOffset;
+          
+          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        } else {
+          // Otherwise, add the size of the blocks between this block and the
+          // dest to the number of bytes left in this block.
+          BranchSize = -MBBStartOffset;
+
+          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        }
+
+        // If this branch is in range, ignore it.
+        if (isInt<16>(BranchSize)) {
+          MBBStartOffset += 4;
+          continue;
+        }
+        
+        // Otherwise, we have to expand it to a long branch.
+        // The BCC operands are:
+        // 0. PPC branch predicate
+        // 1. CR register
+        // 2. Target MBB
+        PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+        unsigned CRReg = I->getOperand(1).getReg();
+        
+        MachineInstr *OldBranch = I;
+        DebugLoc dl = OldBranch->getDebugLoc();
+        
+        // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+        BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+          .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+        
+        // Uncond branch to the real destination.
+        I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+        // Remove the old branch from the function.
+        OldBranch->eraseFromParent();
+        
+        // Remember that this instruction is 8-bytes, increase the size of the
+        // block by 4, remember to iterate.
+        BlockSizes[MBB.getNumber()] += 4;
+        MBBStartOffset += 8;
+        ++NumExpanded;
+        MadeChange = true;
+      }
+    }
+    EverMadeChange |= MadeChange;
+  }
+  
+  BlockSizes.clear();
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 000000000000..441db94581ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,132 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+  CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+  
+  CCIfType<[f32], CCAssignToReg<[F1]>>,
+  CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+  
+  // Vector types are always returned in V2.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+/*
+def CC_PPC : CallingConv<[
+  // The first 8 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+  CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+  
+  // Common sub-targets passes FP values in F1 - F13
+  CCIfType<[f32, f64], 
+           CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
+           
+  // The first 12 Vector arguments are passed in altivec registers.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+              CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
+
+/*
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+  
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+              CCAssignToStack<16, 16>>*/
+]>;
+
+*/
+
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 ABI
+//===----------------------------------------------------------------------===//
+
+def CC_PPC_SVR4_Common : CallingConv<[
+  // The ABI requires i64 to be passed in two adjacent registers with the first
+  // register having an odd register number.
+  CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+
+  // The first 8 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+
+  // Make sure the i64 words from a long double are either both passed in
+  // registers or both passed on the stack.
+  CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+  
+  // FP values are passed in F1 - F8.
+  CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+  // Split arguments have an alignment of 8 bytes on the stack.
+  CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+  
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  
+  // Floats are stored in double precision format, thus they have the same
+  // alignment and size as doubles.
+  CCIfType<[f32,f64], CCAssignToStack<8, 8>>,  
+
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC_SVR4_VarArg : CallingConv<[
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
+// vector arguments in vector registers before putting them on the stack.
+def CC_PPC_SVR4 : CallingConv<[
+  // The first 12 Vector arguments are passed in AltiVec registers.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+           
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;  
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC_SVR4 calling convention.
+//
+// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+ 
+def CC_PPC_SVR4_ByVal : CallingConv<[
+  CCIfByVal<CCPassByVal<4, 4>>,
+  
+  CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+]>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 000000000000..42232a07535b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,261 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPCRelocations.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+  class PPCCodeEmitter : public MachineFunctionPass {
+    TargetMachine &TM;
+    JITCodeEmitter &MCE;
+    MachineModuleInfo *MMI;
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    
+    static char ID;
+    
+    /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+    /// its address in the function into this pointer.
+    void *MovePCtoLROffset;
+  public:
+    
+    PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+      : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
+
+    /// getBinaryCodeForInstr - This function, generated by the
+    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+    /// machine instructions.
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+    
+    MachineRelocation GetRelocation(const MachineOperand &MO,
+                                    unsigned RelocID) const;
+    
+    /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+    unsigned getMachineOpValue(const MachineInstr &MI,
+                               const MachineOperand &MO) const;
+
+    unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+    unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+    const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+    /// runOnMachineFunction - emits the given MachineFunction to memory
+    ///
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    /// emitBasicBlock - emits the given MachineBasicBlock to memory
+    ///
+    void emitBasicBlock(MachineBasicBlock &MBB);
+  };
+}
+
+char PPCCodeEmitter::ID = 0;
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
+  return new PPCCodeEmitter(TM, JCE);
+}
+
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+  assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+          MF.getTarget().getRelocationModel() != Reloc::Static) &&
+         "JIT relocation model must be set to static or default!");
+
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
+  do {
+    MovePCtoLROffset = 0;
+    MCE.startFunction(MF);
+    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+      emitBasicBlock(*BB);
+  } while (MCE.finishFunction(MF));
+
+  return false;
+}
+
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+  MCE.StartMachineBasicBlock(&MBB);
+
+  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+    const MachineInstr &MI = *I;
+    MCE.processDebugLoc(MI.getDebugLoc(), true);
+    switch (MI.getOpcode()) {
+    default:
+      MCE.emitWordBE(getBinaryCodeForInstr(MI));
+      break;
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::EH_LABEL:
+      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+      break;
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+      break; // pseudo opcode, no side effects
+    case PPC::MovePCtoLR:
+    case PPC::MovePCtoLR8:
+      assert(TM.getRelocationModel() == Reloc::PIC_);
+      MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+      MCE.emitWordBE(0x48000005);   // bl 1
+      break;
+    }
+    MCE.processDebugLoc(MI.getDebugLoc(), false);
+  }
+}
+
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+                                             unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+         (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+  return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, 
+                                                unsigned RelocID) const {
+  // If in PIC mode, we need to encode the negated address of the
+  // 'movepctolr' into the unrelocated field.  After relocation, we'll have
+  // &gv-&movepctolr-4 in the imm field.  Once &movepctolr is added to the imm
+  // field, we get &gv.  This doesn't happen for branch relocations, which are
+  // always implicitly pc relative.
+  intptr_t Cst = 0;
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+    Cst = -(intptr_t)MovePCtoLROffset - 4;
+  }
+  
+  if (MO.isGlobal())
+    return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+                                    const_cast<GlobalValue *>(MO.getGlobal()),
+                                    Cst, isa<Function>(MO.getGlobal()));
+  if (MO.isSymbol())
+    return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                        RelocID, MO.getSymbolName(), Cst);
+  if (MO.isCPI())
+    return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                           RelocID, MO.getIndex(), Cst);
+
+  if (MO.isMBB())
+    return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                    RelocID, MO.getMBB());
+  
+  assert(MO.isJTI());
+  return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                         RelocID, MO.getIndex(), Cst);
+}
+
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+                                             unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
+                                         unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
+                                         unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+                                          unsigned OpNo) const {
+  // Encode (imm, reg) as a memri, which has the low 16-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+  
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+  
+  // Add a fixup for the displacement field.
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+  return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+  
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+  return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+                                           const MachineOperand &MO) const {
+
+  if (MO.isReg()) {
+    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+    // The GPR operand should come through here though.
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+    return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+  }
+  
+  assert(MO.isImm() &&
+         "Relocation required in an instruction that we cannot encode!");
+  return MO.getImm();
+}
+
+#include "PPCGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h
new file mode 100644
index 000000000000..b3c889e3f8da
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h
@@ -0,0 +1,45 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+  // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+  // and 'bl'.
+  fixup_ppc_br24 = FirstTargetFixupKind,
+  
+  /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+  /// branches.
+  fixup_ppc_brcond14,
+  
+  /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
+  /// like 'li'.
+  fixup_ppc_lo16,
+  
+  /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
+  /// like 'lis'.
+  fixup_ppc_ha16,
+  
+  /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
+  /// like 'std'.
+  fixup_ppc_lo14,
+  
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 000000000000..375e000fe401
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,978 @@
+//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary bigger than
+// the default (16 bytes on Darwin) when there is a stack local of greater
+// alignment.  This does not currently work, because the delta between old and
+// new stack pointers is added to offsets that reference incoming parameters
+// after the prolog is generated, and the code that does that doesn't handle a
+// variable delta.  You don't want to do that anyway; a better approach is to
+// reserve another register that retains to the incoming stack pointer, and
+// reference parameters relative to that.
+#define ALIGN_STACK 0
+
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function.  Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+  MachineBasicBlock *Entry = MI->getParent();
+  MachineFunction *MF = Entry->getParent();
+
+  // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
+  MachineBasicBlock::iterator MBBI = MI;
+  ++MBBI;
+  assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+  MBBI->eraseFromParent();
+
+  bool RemovedAllMTVRSAVEs = true;
+  // See if we can find and remove the MTVRSAVE instruction from all of the
+  // epilog blocks.
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+    // If last instruction is a return instruction, add an epilogue
+    if (!I->empty() && I->back().getDesc().isReturn()) {
+      bool FoundIt = false;
+      for (MBBI = I->end(); MBBI != I->begin(); ) {
+        --MBBI;
+        if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+          MBBI->eraseFromParent();  // remove it.
+          FoundIt = true;
+          break;
+        }
+      }
+      RemovedAllMTVRSAVEs &= FoundIt;
+    }
+  }
+
+  // If we found and removed all MTVRSAVE instructions, remove the read of
+  // VRSAVE as well.
+  if (RemovedAllMTVRSAVEs) {
+    MBBI = MI;
+    assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+    --MBBI;
+    assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+    MBBI->eraseFromParent();
+  }
+
+  // Finally, nuke the UPDATE_VRSAVE.
+  MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector.  Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+  MachineFunction *MF = MI->getParent()->getParent();
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned UsedRegMask = 0;
+  for (unsigned i = 0; i != 32; ++i)
+    if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+      UsedRegMask |= 1 << (31-i);
+
+  // Live in and live out values already must be in the mask, so don't bother
+  // marking them.
+  for (MachineRegisterInfo::livein_iterator
+       I = MF->getRegInfo().livein_begin(),
+       E = MF->getRegInfo().livein_end(); I != E; ++I) {
+    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+    if (VRRegNo[RegNo] == I->first)        // If this really is a vector reg.
+      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
+  }
+  for (MachineRegisterInfo::liveout_iterator
+       I = MF->getRegInfo().liveout_begin(),
+       E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+    if (VRRegNo[RegNo] == *I)              // If this really is a vector reg.
+      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
+  }
+
+  // If no registers are used, turn this into a copy.
+  if (UsedRegMask == 0) {
+    // Remove all VRSAVE code.
+    RemoveVRSaveCode(MI);
+    return;
+  }
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask);
+  } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask >> 16);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask >> 16);
+  } else {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask >> 16);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask >> 16);
+
+    BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+      .addReg(DstReg, RegState::Kill)
+      .addImm(UsedRegMask & 0xFFFF);
+  }
+
+  // Remove the old UPDATE_VRSAVE instruction.
+  MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned TargetAlign = getStackAlignment();
+  unsigned AlignMask = TargetAlign - 1;  //
+
+  // If we are a leaf function, and use up to 224 bytes of stack space,
+  // don't have a frame pointer, calls, or dynamic alloca then we do not need
+  // to adjust the stack pointer (we fit in the Red Zone).
+  bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
+  if (!DisableRedZone &&
+      FrameSize <= 224 &&                          // Fits in red zone.
+      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
+      !MFI->adjustsStack() &&                      // No calls.
+      (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+    // No need for frame
+    MFI->setStackSize(0);
+    return;
+  }
+
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // Maximum call frame needs to be at least big enough for linkage and 8 args.
+  unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
+                                                  Subtarget.isDarwinABI());
+  maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // FIXME: This is pretty much broken by design: hasFP() might be called really
+  // early, before the stack layout was calculated and thus hasFP() might return
+  // true or false here depending on the time of call.
+  return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Naked functions have no stack frame pushed, so we don't have a frame
+  // pointer.
+  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+    return false;
+
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
+    (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  DebugLoc dl;
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+    MF.getFunction()->needsUnwindTableEntry();
+
+  // Prepare for frame info.
+  MCSymbol *FrameLabel = 0;
+
+  // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
+  // process it.
+  for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+    if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+      HandleVRSaveUpdate(MBBI, TII);
+      break;
+    }
+  }
+
+  // Move MBBI back to the beginning of the function.
+  MBBI = MBB.begin();
+
+  // Work out frame sizes.
+  // FIXME: determineFrameLayout() may change the frame size. This should be
+  // moved upper, to some hook.
+  determineFrameLayout(MF);
+  unsigned FrameSize = MFI->getStackSize();
+
+  int NegFrameSize = -FrameSize;
+
+  // Get processor type.
+  bool isPPC64 = Subtarget.isPPC64();
+  // Get operating system
+  bool isDarwinABI = Subtarget.isDarwinABI();
+  // Check if the link register (LR) must be saved.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  bool MustSaveLR = FI->mustSaveLR();
+  // Do we have a frame pointer for this function?
+  bool HasFP = hasFP(MF);
+
+  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    }
+  }
+
+  if (isPPC64) {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+        .addReg(PPC::X31)
+        .addImm(FPOffset/4)
+        .addReg(PPC::X1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+        .addReg(PPC::X0)
+        .addImm(LROffset / 4)
+        .addReg(PPC::X1);
+  } else {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+        .addReg(PPC::R31)
+        .addImm(FPOffset)
+        .addReg(PPC::R1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+        .addReg(PPC::R0)
+        .addImm(LROffset)
+        .addReg(PPC::R1);
+  }
+
+  // Skip if a leaf routine.
+  if (!FrameSize) return;
+
+  // Get stack alignments.
+  unsigned TargetAlign = getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Adjust stack pointer: r1 += NegFrameSize.
+  // If there is a preferred stack alignment, align R1 now
+  if (!isPPC64) {
+    // PPC32.
+    if (ALIGN_STACK && MaxAlign > TargetAlign) {
+      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+             "Invalid alignment!");
+      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+        .addReg(PPC::R1)
+        .addImm(0)
+        .addImm(32 - Log2_32(MaxAlign))
+        .addImm(31);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+        .addReg(PPC::R0, RegState::Kill)
+        .addImm(NegFrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+        .addReg(PPC::R1)
+        .addReg(PPC::R1)
+        .addReg(PPC::R0);
+    } else if (isInt<16>(NegFrameSize)) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+        .addReg(PPC::R1)
+        .addImm(NegFrameSize)
+        .addReg(PPC::R1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+        .addImm(NegFrameSize >> 16);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+        .addReg(PPC::R0, RegState::Kill)
+        .addImm(NegFrameSize & 0xFFFF);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+        .addReg(PPC::R1)
+        .addReg(PPC::R1)
+        .addReg(PPC::R0);
+    }
+  } else {    // PPC64.
+    if (ALIGN_STACK && MaxAlign > TargetAlign) {
+      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+             "Invalid alignment!");
+      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+        .addReg(PPC::X1)
+        .addImm(0)
+        .addImm(64 - Log2_32(MaxAlign));
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+        .addReg(PPC::X0)
+        .addImm(NegFrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X1)
+        .addReg(PPC::X1)
+        .addReg(PPC::X0);
+    } else if (isInt<16>(NegFrameSize)) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+        .addReg(PPC::X1)
+        .addImm(NegFrameSize / 4)
+        .addReg(PPC::X1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+        .addImm(NegFrameSize >> 16);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+        .addReg(PPC::X0, RegState::Kill)
+        .addImm(NegFrameSize & 0xFFFF);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X1)
+        .addReg(PPC::X1)
+        .addReg(PPC::X0);
+    }
+  }
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+  // Add the "machine moves" for the instructions we generated above, but in
+  // reverse order.
+  if (needsFrameMoves) {
+    // Mark effective beginning of when frame pointer becomes valid.
+    FrameLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
+
+    // Show update of SP.
+    if (NegFrameSize) {
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+    } else {
+      MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
+      Moves.push_back(MachineMove(FrameLabel, SP, SP));
+    }
+
+    if (HasFP) {
+      MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+      MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    if (MustSaveLR) {
+      MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+      MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
+      Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+    }
+  }
+
+  MCSymbol *ReadyLabel = 0;
+
+  // If there is a frame pointer, copy R1 into R31
+  if (HasFP) {
+    if (!isPPC64) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+        .addReg(PPC::R1)
+        .addReg(PPC::R1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+        .addReg(PPC::X1)
+        .addReg(PPC::X1);
+    }
+
+    if (needsFrameMoves) {
+      ReadyLabel = MMI.getContext().CreateTempSymbol();
+
+      // Mark effective beginning of when frame pointer is ready.
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
+
+      MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+                                    (isPPC64 ? PPC::X1 : PPC::R1));
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+    }
+  }
+
+  if (needsFrameMoves) {
+    MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
+
+    // Add callee saved registers to move list.
+    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+      unsigned Reg = CSI[I].getReg();
+      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+
+      // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
+      // subregisters of CR2. We just need to emit a move of CR2.
+      if (Reg == PPC::CR2LT || Reg == PPC::CR2GT || Reg == PPC::CR2EQ)
+        continue;
+      if (Reg == PPC::CR2UN)
+        Reg = PPC::CR2;
+
+      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+      MachineLocation CSSrc(Reg);
+      Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+    }
+  }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI != MBB.end() && "Returning block has no terminator");
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl;
+
+  assert((RetOpcode == PPC::BLR ||
+          RetOpcode == PPC::TCRETURNri ||
+          RetOpcode == PPC::TCRETURNdi ||
+          RetOpcode == PPC::TCRETURNai ||
+          RetOpcode == PPC::TCRETURNri8 ||
+          RetOpcode == PPC::TCRETURNdi8 ||
+          RetOpcode == PPC::TCRETURNai8) &&
+         "Can only insert epilog into returning blocks");
+
+  // Get alignment info so we know how to restore r1
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned TargetAlign = getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Get the number of bytes allocated from the FrameInfo.
+  int FrameSize = MFI->getStackSize();
+
+  // Get processor type.
+  bool isPPC64 = Subtarget.isPPC64();
+  // Get operating system
+  bool isDarwinABI = Subtarget.isDarwinABI();
+  // Check if the link register (LR) has been saved.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  bool MustSaveLR = FI->mustSaveLR();
+  // Do we have a frame pointer for this function?
+  bool HasFP = hasFP(MF);
+
+  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    }
+  }
+
+  bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
+    RetOpcode == PPC::TCRETURNdi ||
+    RetOpcode == PPC::TCRETURNai ||
+    RetOpcode == PPC::TCRETURNri8 ||
+    RetOpcode == PPC::TCRETURNdi8 ||
+    RetOpcode == PPC::TCRETURNai8;
+
+  if (UsesTCRet) {
+    int MaxTCRetDelta = FI->getTailCallSPDelta();
+    MachineOperand &StackAdjust = MBBI->getOperand(1);
+    assert(StackAdjust.isImm() && "Expecting immediate value.");
+    // Adjust stack pointer.
+    int StackAdj = StackAdjust.getImm();
+    int Delta = StackAdj - MaxTCRetDelta;
+    assert((Delta >= 0) && "Delta must be positive");
+    if (MaxTCRetDelta>0)
+      FrameSize += (StackAdj +Delta);
+    else
+      FrameSize += StackAdj;
+  }
+
+  if (FrameSize) {
+    // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+    // on entry to the function.  Add this offset back now.
+    if (!isPPC64) {
+      // If this function contained a fastcc call and GuaranteedTailCallOpt is
+      // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+      // call which invalidates the stack pointer value in SP(0). So we use the
+      // value of R31 in this case.
+      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+          .addReg(PPC::R31).addImm(FrameSize);
+      } else if(FI->hasFastCall()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+          .addImm(FrameSize >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+          .addReg(PPC::R0, RegState::Kill)
+          .addImm(FrameSize & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+          .addReg(PPC::R1)
+          .addReg(PPC::R31)
+          .addReg(PPC::R0);
+      } else if (isInt<16>(FrameSize) &&
+                 (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+                 !MFI->hasVarSizedObjects()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+          .addReg(PPC::R1).addImm(FrameSize);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+          .addImm(0).addReg(PPC::R1);
+      }
+    } else {
+      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+          .addReg(PPC::X31).addImm(FrameSize);
+      } else if(FI->hasFastCall()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+          .addImm(FrameSize >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+          .addReg(PPC::X0, RegState::Kill)
+          .addImm(FrameSize & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+          .addReg(PPC::X1)
+          .addReg(PPC::X31)
+          .addReg(PPC::X0);
+      } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+            !MFI->hasVarSizedObjects()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+           .addReg(PPC::X1).addImm(FrameSize);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+           .addImm(0).addReg(PPC::X1);
+      }
+    }
+  }
+
+  if (isPPC64) {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+        .addImm(LROffset/4).addReg(PPC::X1);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+        .addImm(FPOffset/4).addReg(PPC::X1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+  } else {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+          .addImm(LROffset).addReg(PPC::R1);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+          .addImm(FPOffset).addReg(PPC::R1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+  }
+
+  // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+  // call optimization
+  if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+      MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+     PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+     unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+     unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+     unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+     unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+     unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+     unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+     unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+     unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
+
+     if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+       BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+         .addReg(StackReg).addImm(CallerAllocatedAmt);
+     } else {
+       BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+          .addImm(CallerAllocatedAmt >> 16);
+       BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+          .addReg(TmpReg, RegState::Kill)
+          .addImm(CallerAllocatedAmt & 0xFFFF);
+       BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+          .addReg(StackReg)
+          .addReg(FPReg)
+          .addReg(TmpReg);
+     }
+  } else if (RetOpcode == PPC::TCRETURNdi) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+  } else if (RetOpcode == PPC::TCRETURNri) {
+    MBBI = MBB.getLastNonDebugInstr();
+    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+  } else if (RetOpcode == PPC::TCRETURNai) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+  } else if (RetOpcode == PPC::TCRETURNdi8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+  } else if (RetOpcode == PPC::TCRETURNri8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+  } else if (RetOpcode == PPC::TCRETURNai8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+  }
+}
+
+void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+  // Initial state of the frame pointer is R1.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(PPC::R1, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->isCRSpilled();
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+  // We need a save/restore of LR if there is any def of LR (which is
+  // defined by calls, including the PIC setup sequence), or if there is
+  // some use of the LR stack slot (e.g. for builtin_return_address).
+  // (LR comes in 32 and 64 bit versions.)
+  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                   RegScavenger *RS) const {
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+  //  Save and clear the LR state.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  unsigned LR = RegInfo->getRARegister();
+  FI->setMustSaveLR(MustSaveLR(MF, LR));
+  MF.getRegInfo().setPhysRegUnused(LR);
+
+  //  Save R31 if necessary
+  int FPSI = FI->getFramePointerSaveIndex();
+  bool isPPC64 = Subtarget.isPPC64();
+  bool isDarwinABI  = Subtarget.isDarwinABI();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // If the frame pointer save index hasn't been defined yet.
+  if (!FPSI && needsFP(MF)) {
+    // Find out what the fix offset of the frame pointer save area.
+    int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    // Allocate the frame index for frame pointer save area.
+    FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+    // Save the result.
+    FI->setFramePointerSaveIndex(FPSI);
+  }
+
+  // Reserve stack space to move the linkage area to in case of a tail call.
+  int TCSPDelta = 0;
+  if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+    MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+  }
+
+  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+  // a large stack, which will require scavenging a register to materialize a
+  // large offset.
+  // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+  // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+  //        r0 for now.
+
+  if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+    if (needsFP(MF) || spillsCR(MF)) {
+      const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+      const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+      const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+      RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                         RC->getAlignment(),
+                                                         false));
+    }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+                                                                        const {
+  // Early exit if not using the SVR4 ABI.
+  if (!Subtarget.isSVR4ABI())
+    return;
+
+  // Get callee saved register information.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty() && !needsFP(MF)) {
+    return;
+  }
+
+  unsigned MinGPR = PPC::R31;
+  unsigned MinG8R = PPC::X31;
+  unsigned MinFPR = PPC::F31;
+  unsigned MinVR = PPC::V31;
+
+  bool HasGPSaveArea = false;
+  bool HasG8SaveArea = false;
+  bool HasFPSaveArea = false;
+  bool HasCRSaveArea = false;
+  bool HasVRSAVESaveArea = false;
+  bool HasVRSaveArea = false;
+
+  SmallVector<CalleeSavedInfo, 18> GPRegs;
+  SmallVector<CalleeSavedInfo, 18> G8Regs;
+  SmallVector<CalleeSavedInfo, 18> FPRegs;
+  SmallVector<CalleeSavedInfo, 18> VRegs;
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (PPC::GPRCRegisterClass->contains(Reg)) {
+      HasGPSaveArea = true;
+
+      GPRegs.push_back(CSI[i]);
+
+      if (Reg < MinGPR) {
+        MinGPR = Reg;
+      }
+    } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+      HasG8SaveArea = true;
+
+      G8Regs.push_back(CSI[i]);
+
+      if (Reg < MinG8R) {
+        MinG8R = Reg;
+      }
+    } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+      HasFPSaveArea = true;
+
+      FPRegs.push_back(CSI[i]);
+
+      if (Reg < MinFPR) {
+        MinFPR = Reg;
+      }
+// FIXME SVR4: Disable CR save area for now.
+    } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+               || PPC::CRRCRegisterClass->contains(Reg)) {
+//      HasCRSaveArea = true;
+    } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+      HasVRSAVESaveArea = true;
+    } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+      HasVRSaveArea = true;
+
+      VRegs.push_back(CSI[i]);
+
+      if (Reg < MinVR) {
+        MinVR = Reg;
+      }
+    } else {
+      llvm_unreachable("Unknown RegisterClass!");
+    }
+  }
+
+  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+
+  int64_t LowerBound = 0;
+
+  // Take into account stack space reserved for tail calls.
+  int TCSPDelta = 0;
+  if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+    LowerBound = TCSPDelta;
+  }
+
+  // The Floating-point register save area is right below the back chain word
+  // of the previous stack frame.
+  if (HasFPSaveArea) {
+    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+      int FI = FPRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8;
+  }
+
+  // Check whether the frame pointer register is allocated. If so, make sure it
+  // is spilled to the correct offset.
+  if (needsFP(MF)) {
+    HasGPSaveArea = true;
+
+    int FI = PFI->getFramePointerSaveIndex();
+    assert(FI && "No Frame Pointer Save Slot!");
+
+    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+  }
+
+  // General register save area starts right below the Floating-point
+  // register save area.
+  if (HasGPSaveArea || HasG8SaveArea) {
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+      int FI = GPRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+      int FI = G8Regs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    unsigned MinReg =
+      std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR),
+                         PPCRegisterInfo::getRegisterNumbering(MinG8R));
+
+    if (Subtarget.isPPC64()) {
+      LowerBound -= (31 - MinReg + 1) * 8;
+    } else {
+      LowerBound -= (31 - MinReg + 1) * 4;
+    }
+  }
+
+  // The CR save area is below the general register save area.
+  if (HasCRSaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the CR/CRBIT register class?
+    // Adjust the frame index of the CR spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+
+      if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+          PPC::CRRCRegisterClass->contains(Reg)) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+
+    LowerBound -= 4; // The CR save area is always 4 bytes long.
+  }
+
+  if (HasVRSAVESaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the VRSAVE register class?
+    // Adjust the frame index of the VRSAVE spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+
+      if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+
+    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+  }
+
+  if (HasVRSaveArea) {
+    // Insert alignment padding, we need 16-byte alignment.
+    LowerBound = (LowerBound - 15) & ~(15);
+
+    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+      int FI = VRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 000000000000..0c18de1e2e26
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,322 @@
+//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+  class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+  const PPCSubtarget &Subtarget;
+
+public:
+  PPCFrameLowering(const PPCSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+      Subtarget(sti) {
+  }
+
+  void determineFrameLayout(MachineFunction &MF) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool needsFP(const MachineFunction &MF) const;
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const { return true; }
+
+  /// getReturnSaveOffset - Return the previous frame offset to save the
+  /// return address.
+  static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
+    if (isDarwinABI)
+      return isPPC64 ? 16 : 8;
+    // SVR4 ABI:
+    return isPPC64 ? 16 : 4;
+  }
+
+  /// getFramePointerSaveOffset - Return the previous frame offset to save the
+  /// frame pointer.
+  static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+    // For the Darwin ABI:
+    // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+    // for saving the frame pointer (if needed.)  While the published ABI has
+    // not used this slot since at least MacOSX 10.2, there is older code
+    // around that does use it, and that needs to continue to work.
+    if (isDarwinABI)
+      return isPPC64 ? -8U : -4U;
+
+    // SVR4 ABI: First slot in the general register save area.
+    return isPPC64 ? -8U : -4U;
+  }
+
+  /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+  ///
+  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+    if (isDarwinABI || isPPC64)
+      return 6 * (isPPC64 ? 8 : 4);
+
+    // SVR4 ABI:
+    return 8;
+  }
+
+  /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+  /// argument area.
+  static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+    // For the Darwin ABI / 64-bit SVR4 ABI:
+    // The prolog code of the callee may store up to 8 GPR argument registers to
+    // the stack, allowing va_start to index over them in memory if its varargs.
+    // Because we cannot tell if this is needed on the caller side, we have to
+    // conservatively assume that it is needed.  As such, make sure we have at
+    // least enough stack space for the caller to store the 8 GPRs.
+    if (isDarwinABI || isPPC64)
+      return 8 * (isPPC64 ? 8 : 4);
+
+    // 32-bit SVR4 ABI:
+    // There is no default stack allocated for the 8 first GPR arguments.
+    return 0;
+  }
+
+  /// getMinCallFrameSize - Return the minimum size a call frame can be using
+  /// the PowerPC ABI.
+  static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
+    // The call frame needs to be at least big enough for linkage and 8 args.
+    return getLinkageSize(isPPC64, isDarwinABI) +
+           getMinCallArgumentsSize(isPPC64, isDarwinABI);
+  }
+
+  // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+  const SpillSlot *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    if (Subtarget.isDarwinABI()) {
+      NumEntries = 1;
+      if (Subtarget.isPPC64()) {
+        static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+        return &darwin64Offsets;
+      } else {
+        static const SpillSlot darwinOffsets = {PPC::R31, -4};
+        return &darwinOffsets;
+      }
+    }
+
+    // Early exit if not using the SVR4 ABI.
+    if (!Subtarget.isSVR4ABI()) {
+      NumEntries = 0;
+      return 0;
+    }
+
+    static const SpillSlot Offsets[] = {
+      // Floating-point register save area offsets.
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
+      // General register save area offsets.
+      {PPC::R31, -4},
+      {PPC::R30, -8},
+      {PPC::R29, -12},
+      {PPC::R28, -16},
+      {PPC::R27, -20},
+      {PPC::R26, -24},
+      {PPC::R25, -28},
+      {PPC::R24, -32},
+      {PPC::R23, -36},
+      {PPC::R22, -40},
+      {PPC::R21, -44},
+      {PPC::R20, -48},
+      {PPC::R19, -52},
+      {PPC::R18, -56},
+      {PPC::R17, -60},
+      {PPC::R16, -64},
+      {PPC::R15, -68},
+      {PPC::R14, -72},
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
+
+      // VRSAVE save area offset.
+      {PPC::VRSAVE, -4},
+
+      // Vector register save area
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
+    };
+
+    static const SpillSlot Offsets64[] = {
+      // Floating-point register save area offsets.
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
+      // General register save area offsets.
+      // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
+      //                    mode?
+      {PPC::R31, -4},
+      {PPC::R30, -12},
+      {PPC::R29, -20},
+      {PPC::R28, -28},
+      {PPC::R27, -36},
+      {PPC::R26, -44},
+      {PPC::R25, -52},
+      {PPC::R24, -60},
+      {PPC::R23, -68},
+      {PPC::R22, -76},
+      {PPC::R21, -84},
+      {PPC::R20, -92},
+      {PPC::R19, -100},
+      {PPC::R18, -108},
+      {PPC::R17, -116},
+      {PPC::R16, -124},
+      {PPC::R15, -132},
+      {PPC::R14, -140},
+
+      {PPC::X31, -8},
+      {PPC::X30, -16},
+      {PPC::X29, -24},
+      {PPC::X28, -32},
+      {PPC::X27, -40},
+      {PPC::X26, -48},
+      {PPC::X25, -56},
+      {PPC::X24, -64},
+      {PPC::X23, -72},
+      {PPC::X22, -80},
+      {PPC::X21, -88},
+      {PPC::X20, -96},
+      {PPC::X19, -104},
+      {PPC::X18, -112},
+      {PPC::X17, -120},
+      {PPC::X16, -128},
+      {PPC::X15, -136},
+      {PPC::X14, -144},
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
+
+      // VRSAVE save area offset.
+      {PPC::VRSAVE, -4},
+
+      // Vector register save area
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
+    };
+
+    if (Subtarget.isPPC64()) {
+      NumEntries = array_lengthof(Offsets64);
+
+      return Offsets64;
+    } else {
+      NumEntries = array_lengthof(Offsets);
+
+      return Offsets;
+    }
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 000000000000..cddc9d858adf
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,308 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor.  Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group.  Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970.  These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen.  These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group.  To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+//   1. Modeling of microcoded instructions.
+//   2. Handling of serialized operations.
+//   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+  : TII(tii) {
+  EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+  DEBUG(errs() << "=== Start of dispatch group\n");
+  NumIssued = 0;
+
+  // Structural hazard info.
+  HasCTRSet = false;
+  NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+                                     bool &isFirst, bool &isSingle,
+                                     bool &isCracked,
+                                     bool &isLoad, bool &isStore) {
+  if ((int)Opcode >= 0) {
+    isFirst = isSingle = isCracked = isLoad = isStore = false;
+    return PPCII::PPC970_Pseudo;
+  }
+  Opcode = ~Opcode;
+
+  const MCInstrDesc &MCID = TII.get(Opcode);
+
+  isLoad  = MCID.mayLoad();
+  isStore = MCID.mayStore();
+
+  uint64_t TSFlags = MCID.TSFlags;
+
+  isFirst   = TSFlags & PPCII::PPC970_First;
+  isSingle  = TSFlags & PPCII::PPC970_Single;
+  isCracked = TSFlags & PPCII::PPC970_Cracked;
+  return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+  for (unsigned i = 0, e = NumStores; i != e; ++i) {
+    // Handle exact and commuted addresses.
+    if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
+      return true;
+    if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+      return true;
+
+    // Okay, we don't have an exact match, if this is an indexed offset, see if
+    // we have overlap (which happens during fp->int conversion for example).
+    if (StorePtr2[i] == Ptr2) {
+      if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
+        if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
+          // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
+          // to see if the load and store actually overlap.
+          int StoreOffs = StoreOffset->getZExtValue();
+          int LoadOffs  = LoadOffset->getZExtValue();
+          if (StoreOffs < LoadOffs) {
+            if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
+          } else {
+            if (int(LoadOffs+LoadSize) > StoreOffs) return true;
+          }
+        }
+    }
+  }
+  return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate the dispatch group.  We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU, int Stalls) {
+  assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  bool isFirst, isSingle, isCracked, isLoad, isStore;
+  PPCII::PPC970_Unit InstrType =
+    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+                 isLoad, isStore);
+  if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+  unsigned Opcode = Node->getMachineOpcode();
+
+  // We can only issue a PPC970_First/PPC970_Single instruction (such as
+  // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+  if (NumIssued != 0 && (isFirst || isSingle))
+    return Hazard;
+
+  // If this instruction is cracked into two ops by the decoder, we know that
+  // it is not a branch and that it cannot issue if 3 other instructions are
+  // already in the dispatch group.
+  if (isCracked && NumIssued > 2)
+    return Hazard;
+
+  switch (InstrType) {
+  default: llvm_unreachable("Unknown instruction type!");
+  case PPCII::PPC970_FXU:
+  case PPCII::PPC970_LSU:
+  case PPCII::PPC970_FPU:
+  case PPCII::PPC970_VALU:
+  case PPCII::PPC970_VPERM:
+    // We can only issue a branch as the last instruction in a group.
+    if (NumIssued == 4) return Hazard;
+    break;
+  case PPCII::PPC970_CRU:
+    // We can only issue a CR instruction in the first two slots.
+    if (NumIssued >= 2) return Hazard;
+    break;
+  case PPCII::PPC970_BRU:
+    break;
+  }
+
+  // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+    return NoopHazard;
+
+  // If this is a load following a store, make sure it's not to the same or
+  // overlapping address.
+  if (isLoad && NumStores) {
+    unsigned LoadSize;
+    switch (Opcode) {
+    default: llvm_unreachable("Unknown load!");
+    case PPC::LBZ:   case PPC::LBZU:
+    case PPC::LBZX:
+    case PPC::LBZ8:  case PPC::LBZU8:
+    case PPC::LBZX8:
+    case PPC::LVEBX:
+      LoadSize = 1;
+      break;
+    case PPC::LHA:   case PPC::LHAU:
+    case PPC::LHAX:
+    case PPC::LHZ:   case PPC::LHZU:
+    case PPC::LHZX:
+    case PPC::LVEHX:
+    case PPC::LHBRX:
+    case PPC::LHA8:   case PPC::LHAU8:
+    case PPC::LHAX8:
+    case PPC::LHZ8:   case PPC::LHZU8:
+    case PPC::LHZX8:
+      LoadSize = 2;
+      break;
+    case PPC::LFS:    case PPC::LFSU:
+    case PPC::LFSX:
+    case PPC::LWZ:    case PPC::LWZU:
+    case PPC::LWZX:
+    case PPC::LWA:
+    case PPC::LWAX:
+    case PPC::LVEWX:
+    case PPC::LWBRX:
+    case PPC::LWZ8:
+    case PPC::LWZX8:
+      LoadSize = 4;
+      break;
+    case PPC::LFD:    case PPC::LFDU:
+    case PPC::LFDX:
+    case PPC::LD:     case PPC::LDU:
+    case PPC::LDX:
+      LoadSize = 8;
+      break;
+    case PPC::LVX:
+    case PPC::LVXL:
+      LoadSize = 16;
+      break;
+    }
+
+    if (isLoadOfStoredAddress(LoadSize,
+                              Node->getOperand(0), Node->getOperand(1)))
+      return NoopHazard;
+  }
+
+  return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  bool isFirst, isSingle, isCracked, isLoad, isStore;
+  PPCII::PPC970_Unit InstrType =
+    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+                 isLoad, isStore);
+  if (InstrType == PPCII::PPC970_Pseudo) return;
+  unsigned Opcode = Node->getMachineOpcode();
+
+  // Update structural hazard information.
+  if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
+
+  // Track the address stored to.
+  if (isStore) {
+    unsigned ThisStoreSize;
+    switch (Opcode) {
+    default: llvm_unreachable("Unknown store instruction!");
+    case PPC::STB:    case PPC::STB8:
+    case PPC::STBU:   case PPC::STBU8:
+    case PPC::STBX:   case PPC::STBX8:
+    case PPC::STVEBX:
+      ThisStoreSize = 1;
+      break;
+    case PPC::STH:    case PPC::STH8:
+    case PPC::STHU:   case PPC::STHU8:
+    case PPC::STHX:   case PPC::STHX8:
+    case PPC::STVEHX:
+    case PPC::STHBRX:
+      ThisStoreSize = 2;
+      break;
+    case PPC::STFS:
+    case PPC::STFSU:
+    case PPC::STFSX:
+    case PPC::STWX:   case PPC::STWX8:
+    case PPC::STWUX:
+    case PPC::STW:    case PPC::STW8:
+    case PPC::STWU:
+    case PPC::STVEWX:
+    case PPC::STFIWX:
+    case PPC::STWBRX:
+      ThisStoreSize = 4;
+      break;
+    case PPC::STD_32:
+    case PPC::STDX_32:
+    case PPC::STD:
+    case PPC::STDU:
+    case PPC::STFD:
+    case PPC::STFDX:
+    case PPC::STDX:
+    case PPC::STDUX:
+      ThisStoreSize = 8;
+      break;
+    case PPC::STVX:
+    case PPC::STVXL:
+      ThisStoreSize = 16;
+      break;
+    }
+
+    StoreSize[NumStores] = ThisStoreSize;
+    StorePtr1[NumStores] = Node->getOperand(1);
+    StorePtr2[NumStores] = Node->getOperand(2);
+    ++NumStores;
+  }
+
+  if (InstrType == PPCII::PPC970_BRU || isSingle)
+    NumIssued = 4;  // Terminate a d-group.
+  ++NumIssued;
+
+  // If this instruction is cracked into two ops by the decoder, remember that
+  // we issued two pieces.
+  if (isCracked)
+    ++NumIssued;
+
+  if (NumIssued == 5)
+    EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+  assert(NumIssued < 5 && "Illegal dispatch group!");
+  ++NumIssued;
+  if (NumIssued == 5)
+    EndDispatchGroup();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 000000000000..2f81f0f7c7f1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,73 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "PPCInstrInfo.h"
+
+namespace llvm {
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor.  This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+  const TargetInstrInfo &TII;
+
+  unsigned NumIssued;  // Number of insts issued, including advanced cycles.
+
+  // Various things that can cause a structural hazard.
+
+  // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+  bool HasCTRSet;
+
+  // StoredPtr - Keep track of the address of any store.  If we see a load from
+  // the same address (or one that aliases it), disallow the store.  We can have
+  // up to four stores in one dispatch group, hence we track up to 4.
+  //
+  // This is null if we haven't seen a store yet.  We keep track of both
+  // operands of the store here, since we support [r+r] and [r+i] addressing.
+  SDValue StorePtr1[4], StorePtr2[4];
+  unsigned  StoreSize[4];
+  unsigned NumStores;
+
+public:
+  PPCHazardRecognizer970(const TargetInstrInfo &TII);
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+
+private:
+  /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+  ///
+  void EndDispatchGroup();
+
+  /// GetInstrType - Classify the specified powerpc opcode according to its
+  /// pipeline.
+  PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+                                  bool &isFirst, bool &isSingle,bool &isCracked,
+                                  bool &isLoad, bool &isStore);
+
+  bool isLoadOfStoredAddress(unsigned LoadSize,
+                             SDValue Ptr1, SDValue Ptr2) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2176c02c8503
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1087 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+  /// instructions for SelectionDAG operations.
+  ///
+  class PPCDAGToDAGISel : public SelectionDAGISel {
+    const PPCTargetMachine &TM;
+    const PPCTargetLowering &PPCLowering;
+    const PPCSubtarget &PPCSubTarget;
+    unsigned GlobalBaseReg;
+  public:
+    explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+      : SelectionDAGISel(tm), TM(tm),
+        PPCLowering(*TM.getTargetLowering()),
+        PPCSubTarget(*TM.getSubtargetImpl()) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      // Make sure we re-emit a set of the global base reg if necessary
+      GlobalBaseReg = 0;
+      SelectionDAGISel::runOnMachineFunction(MF);
+
+      InsertVRSaveCode(MF);
+      return true;
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    /// getI64Imm - Return a target constant with the specified value, of type
+    /// i64.
+    inline SDValue getI64Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i64);
+    }
+
+    /// getSmallIPtrImm - Return a target constant of pointer type.
+    inline SDValue getSmallIPtrImm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+    }
+
+    /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+    /// with any number of 0s on either side.  The 1s are allowed to wrap from
+    /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+    /// 0x0F0F0000 is not, since all 1s are not contiguous.
+    static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+    /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+    /// rotate and mask opcode and mask operation.
+    static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
+                                unsigned &SH, unsigned &MB, unsigned &ME);
+
+    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+    /// base register.  Return the virtual register that holds this value.
+    SDNode *getGlobalBaseReg();
+
+    // Select - Convert the specified operand from a target-independent to a
+    // target-specific node if it hasn't already been changed.
+    SDNode *Select(SDNode *N);
+
+    SDNode *SelectBitfieldInsert(SDNode *N);
+
+    /// SelectCC - Select a comparison of the specified values with the
+    /// specified condition code, returning the CR# of the expression.
+    SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+
+    /// SelectAddrImm - Returns true if the address N can be represented by
+    /// a base register plus a signed 16-bit displacement [r+imm].
+    bool SelectAddrImm(SDValue N, SDValue &Disp,
+                       SDValue &Base) {
+      return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+    }
+
+    /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+    /// immediate field.  Because preinc imms have already been validated, just
+    /// accept it.
+    bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+      Out = N;
+      return true;
+    }
+
+    /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+    /// represented as an indexed [r+r] operation.  Returns false if it can
+    /// be represented by [r+imm], which are preferred.
+    bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+    }
+
+    /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+    /// represented as an indexed [r+r] operation.
+    bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+    }
+
+    /// SelectAddrImmShift - Returns true if the address N can be represented by
+    /// a base register plus a signed 14-bit displacement [r+imm*4].  Suitable
+    /// for use by STD and friends.
+    bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
+      return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+    }
+
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.  It is always correct to compute the value into
+    /// a register.  The case of adding a (possibly relocatable) constant to a
+    /// register can be improved, but it is wrong to substitute Reg+Reg for
+    /// Reg in an asm, because the load or store opcode would have to change.
+   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps) {
+      OutOps.push_back(Op);
+      return false;
+    }
+
+    void InsertVRSaveCode(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "PowerPC DAG->DAG Pattern Instruction Selection";
+    }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+    SDNode *SelectSETCC(SDNode *N);
+  };
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE.  If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
+  // Check to see if this function uses vector registers, which means we have to
+  // save and restore the VRSAVE register and update it with the regs we use.
+  //
+  // In this case, there will be virtual registers of vector type created
+  // by the scheduler.  Detect them now.
+  bool HasVectorVReg = false;
+  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
+      HasVectorVReg = true;
+      break;
+    }
+  }
+  if (!HasVectorVReg) return;  // nothing to do.
+
+  // If we have a vector register, we want to emit code into the entry and exit
+  // blocks to save and restore the VRSAVE register.  We do this here (instead
+  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+  //
+  // 1. This (trivially) reduces the load on the register allocator, by not
+  //    having to represent the live range of the VRSAVE register.
+  // 2. This (more significantly) allows us to create a temporary virtual
+  //    register to hold the saved VRSAVE value, allowing this temporary to be
+  //    register allocated, instead of forcing it to be spilled to the stack.
+
+  // Create two vregs - one to hold the VRSAVE register that is live-in to the
+  // function and one for the value after having bits or'd into it.
+  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  MachineBasicBlock &EntryBB = *Fn.begin();
+  DebugLoc dl;
+  // Emit the following code into the entry block:
+  // InVRSAVE = MFVRSAVE
+  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+  // MTVRSAVE UpdatedVRSAVE
+  MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
+  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+          UpdatedVRSAVE).addReg(InVRSAVE);
+  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+  // Find all return blocks, outputting a restore in each epilog.
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    if (!BB->empty() && BB->back().getDesc().isReturn()) {
+      IP = BB->end(); --IP;
+
+      // Skip over all terminator instructions, which are part of the return
+      // sequence.
+      MachineBasicBlock::iterator I2 = IP;
+      while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+        IP = I2;
+
+      // Emit: MTVRSAVE InVRSave
+      BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+    }
+  }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+  if (!GlobalBaseReg) {
+    const TargetInstrInfo &TII = *TM.getInstrInfo();
+    // Insert the set of GlobalBaseReg into the first MBB of the function
+    MachineBasicBlock &FirstMBB = MF->front();
+    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+    DebugLoc dl;
+
+    if (PPCLowering.getPointerTy() == MVT::i32) {
+      GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+    } else {
+      GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+    }
+  }
+  return CurDAG->getRegister(GlobalBaseReg,
+                             PPCLowering.getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value.  If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+  if (N->getOpcode() != ISD::Constant)
+    return false;
+
+  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  else
+    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+  return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+    Imm = cast<ConstantSDNode>(N)->getZExtValue();
+    return true;
+  }
+  return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand.  If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+    Imm = cast<ConstantSDNode>(N)->getZExtValue();
+    return true;
+  }
+  return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+  return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+  return N->getOpcode() == Opc
+         && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+  if (isShiftedMask_32(Val)) {
+    // look for the first non-zero bit
+    MB = CountLeadingZeros_32(Val);
+    // look for the first zero bit after the run of ones
+    ME = CountLeadingZeros_32((Val - 1) ^ Val);
+    return true;
+  } else {
+    Val = ~Val; // invert mask
+    if (isShiftedMask_32(Val)) {
+      // effectively look for the first zero bit
+      ME = CountLeadingZeros_32(Val) - 1;
+      // effectively look for the first one bit after the run of zeros
+      MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+      return true;
+    }
+  }
+  // no run present
+  return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+                                      bool isShiftMask, unsigned &SH,
+                                      unsigned &MB, unsigned &ME) {
+  // Don't even go down this path for i64, since different logic will be
+  // necessary for rldicl/rldicr/rldimi.
+  if (N->getValueType(0) != MVT::i32)
+    return false;
+
+  unsigned Shift  = 32;
+  unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
+  unsigned Opcode = N->getOpcode();
+  if (N->getNumOperands() != 2 ||
+      !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+    return false;
+
+  if (Opcode == ISD::SHL) {
+    // apply shift left to mask if it comes first
+    if (isShiftMask) Mask = Mask << Shift;
+    // determine which bits are made indeterminant by shift
+    Indeterminant = ~(0xFFFFFFFFu << Shift);
+  } else if (Opcode == ISD::SRL) {
+    // apply shift right to mask if it comes first
+    if (isShiftMask) Mask = Mask >> Shift;
+    // determine which bits are made indeterminant by shift
+    Indeterminant = ~(0xFFFFFFFFu >> Shift);
+    // adjust for the left rotate
+    Shift = 32 - Shift;
+  } else if (Opcode == ISD::ROTL) {
+    Indeterminant = 0;
+  } else {
+    return false;
+  }
+
+  // if the mask doesn't intersect any Indeterminant bits
+  if (Mask && !(Mask & Indeterminant)) {
+    SH = Shift & 31;
+    // make sure the mask is still a mask (wrap arounds may not be)
+    return isRunOfOnes(Mask, MB, ME);
+  }
+  return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  APInt LKZ, LKO, RKZ, RKO;
+  CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
+  CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+
+  unsigned TargetMask = LKZ.getZExtValue();
+  unsigned InsertMask = RKZ.getZExtValue();
+
+  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+    unsigned Op0Opc = Op0.getOpcode();
+    unsigned Op1Opc = Op1.getOpcode();
+    unsigned Value, SH = 0;
+    TargetMask = ~TargetMask;
+    InsertMask = ~InsertMask;
+
+    // If the LHS has a foldable shift and the RHS does not, then swap it to the
+    // RHS so that we can fold the shift into the insert.
+    if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+      if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+          Op0.getOperand(0).getOpcode() == ISD::SRL) {
+        if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+            Op1.getOperand(0).getOpcode() != ISD::SRL) {
+          std::swap(Op0, Op1);
+          std::swap(Op0Opc, Op1Opc);
+          std::swap(TargetMask, InsertMask);
+        }
+      }
+    } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+      if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+          Op1.getOperand(0).getOpcode() != ISD::SRL) {
+        std::swap(Op0, Op1);
+        std::swap(Op0Opc, Op1Opc);
+        std::swap(TargetMask, InsertMask);
+      }
+    }
+
+    unsigned MB, ME;
+    if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+      SDValue Tmp1, Tmp2;
+
+      if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+          isInt32Immediate(Op1.getOperand(1), Value)) {
+        Op1 = Op1.getOperand(0);
+        SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+      }
+      if (Op1Opc == ISD::AND) {
+        unsigned SHOpc = Op1.getOperand(0).getOpcode();
+        if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+            isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+          Op1 = Op1.getOperand(0).getOperand(0);
+          SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+        } else {
+          Op1 = Op1.getOperand(0);
+        }
+      }
+
+      SH &= 31;
+      SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
+                          getI32Imm(ME) };
+      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+    }
+  }
+  return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+                                    ISD::CondCode CC, DebugLoc dl) {
+  // Always select the LHS.
+  unsigned Opc;
+
+  if (LHS.getValueType() == MVT::i32) {
+    unsigned Imm;
+    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+      if (isInt32Immediate(RHS, Imm)) {
+        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+        if (isUInt<16>(Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+        // If this is a 16-bit signed immediate, fold it.
+        if (isInt<16>((int)Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+
+        // For non-equality comparisons, the default code would materialize the
+        // constant, then compare against it, like this:
+        //   lis r2, 4660
+        //   ori r2, r2, 22136
+        //   cmpw cr0, r3, r2
+        // Since we are just comparing for equality, we can emit this instead:
+        //   xoris r0,r3,0x1234
+        //   cmplwi cr0,r0,0x5678
+        //   beq cr0,L6
+        SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+                                           getI32Imm(Imm >> 16)), 0);
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+                                              getI32Imm(Imm & 0xFFFF)), 0);
+      }
+      Opc = PPC::CMPLW;
+    } else if (ISD::isUnsignedIntSetCC(CC)) {
+      if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+                                              getI32Imm(Imm & 0xFFFF)), 0);
+      Opc = PPC::CMPLW;
+    } else {
+      short SImm;
+      if (isIntS16Immediate(RHS, SImm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+                                              getI32Imm((int)SImm & 0xFFFF)),
+                         0);
+      Opc = PPC::CMPW;
+    }
+  } else if (LHS.getValueType() == MVT::i64) {
+    uint64_t Imm;
+    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+      if (isInt64Immediate(RHS.getNode(), Imm)) {
+        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+        if (isUInt<16>(Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+        // If this is a 16-bit signed immediate, fold it.
+        if (isInt<16>(Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+
+        // For non-equality comparisons, the default code would materialize the
+        // constant, then compare against it, like this:
+        //   lis r2, 4660
+        //   ori r2, r2, 22136
+        //   cmpd cr0, r3, r2
+        // Since we are just comparing for equality, we can emit this instead:
+        //   xoris r0,r3,0x1234
+        //   cmpldi cr0,r0,0x5678
+        //   beq cr0,L6
+        if (isUInt<32>(Imm)) {
+          SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+                                             getI64Imm(Imm >> 16)), 0);
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+                                                getI64Imm(Imm & 0xFFFF)), 0);
+        }
+      }
+      Opc = PPC::CMPLD;
+    } else if (ISD::isUnsignedIntSetCC(CC)) {
+      if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+                                              getI64Imm(Imm & 0xFFFF)), 0);
+      Opc = PPC::CMPLD;
+    } else {
+      short SImm;
+      if (isIntS16Immediate(RHS, SImm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+                                              getI64Imm(SImm & 0xFFFF)),
+                         0);
+      Opc = PPC::CMPD;
+    }
+  } else if (LHS.getValueType() == MVT::f32) {
+    Opc = PPC::FCMPUS;
+  } else {
+    assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+    Opc = PPC::FCMPUD;
+  }
+  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+  switch (CC) {
+  case ISD::SETUEQ:
+  case ISD::SETONE:
+  case ISD::SETOLE:
+  case ISD::SETOGE:
+    llvm_unreachable("Should be lowered by legalize!");
+  default: llvm_unreachable("Unknown condition!");
+  case ISD::SETOEQ:
+  case ISD::SETEQ:  return PPC::PRED_EQ;
+  case ISD::SETUNE:
+  case ISD::SETNE:  return PPC::PRED_NE;
+  case ISD::SETOLT:
+  case ISD::SETLT:  return PPC::PRED_LT;
+  case ISD::SETULE:
+  case ISD::SETLE:  return PPC::PRED_LE;
+  case ISD::SETOGT:
+  case ISD::SETGT:  return PPC::PRED_GT;
+  case ISD::SETUGE:
+  case ISD::SETGE:  return PPC::PRED_GE;
+  case ISD::SETO:   return PPC::PRED_NU;
+  case ISD::SETUO:  return PPC::PRED_UN;
+    // These two are invalid for floating point.  Assume we have int.
+  case ISD::SETULT: return PPC::PRED_LT;
+  case ISD::SETUGT: return PPC::PRED_GT;
+  }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted.  That is, lt = 0; ge = 0 inverted.
+///
+/// If this returns with Other != -1, then the returned comparison is an or of
+/// two simpler comparisons.  In this case, Invert is guaranteed to be false.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+  Invert = false;
+  Other = -1;
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition!");
+  case ISD::SETOLT:
+  case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
+  case ISD::SETOGT:
+  case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
+  case ISD::SETOEQ:
+  case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
+  case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
+  case ISD::SETUGE:
+  case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
+  case ISD::SETULE:
+  case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
+  case ISD::SETUNE:
+  case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
+  case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
+  case ISD::SETUEQ:
+  case ISD::SETOGE:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+    llvm_unreachable("Invalid branch code: should be expanded by legalize");
+  // These are invalid for floating point.  Assume integer.
+  case ISD::SETULT: return 0;
+  case ISD::SETUGT: return 1;
+  }
+  return 0;
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  unsigned Imm;
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = (PtrVT == MVT::i64);
+
+  if (isInt32Immediate(N->getOperand(1), Imm)) {
+    // We can codegen setcc op, imm very efficiently compared to a brcond.
+    // Check for those cases here.
+    // setcc op, 0
+    if (Imm == 0) {
+      SDValue Op = N->getOperand(0);
+      switch (CC) {
+      default: break;
+      case ISD::SETEQ: {
+        Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+        SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      case ISD::SETNE: {
+        if (isPPC64) break;
+        SDValue AD =
+          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                         Op, getI32Imm(~0U)), 0);
+        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+                                    AD.getValue(1));
+      }
+      case ISD::SETLT: {
+        SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      case ISD::SETGT: {
+        SDValue T =
+          SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+        T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+        SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      }
+    } else if (Imm == ~0U) {        // setcc op, -1
+      SDValue Op = N->getOperand(0);
+      switch (CC) {
+      default: break;
+      case ISD::SETEQ:
+        if (isPPC64) break;
+        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                            Op, getI32Imm(1)), 0);
+        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+                                                             MVT::i32,
+                                                             getI32Imm(0)), 0),
+                                      Op.getValue(1));
+      case ISD::SETNE: {
+        if (isPPC64) break;
+        Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                            Op, getI32Imm(~0U));
+        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+                                    Op, SDValue(AD, 1));
+      }
+      case ISD::SETLT: {
+        SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+                                                    getI32Imm(1)), 0);
+        SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+                                                    Op), 0);
+        SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      case ISD::SETGT: {
+        SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+                     0);
+        return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+                                    getI32Imm(1));
+      }
+      }
+    }
+  }
+
+  bool Inv;
+  int OtherCondIdx;
+  unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+  SDValue IntCR;
+
+  // Force the ccreg into CR7.
+  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+  SDValue InFlag(0, 0);  // Null incoming flag value.
+  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+                               InFlag).getValue(1);
+
+  if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+                                           CCReg), 0);
+ else
+    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+                                           CR7Reg, CCReg), 0);
+
+  SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+                      getI32Imm(31), getI32Imm(31) };
+  if (OtherCondIdx == -1 && !Inv)
+    return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+
+  // Get the specified bit.
+  SDValue Tmp =
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+  if (Inv) {
+    assert(OtherCondIdx == -1 && "Can't have split plus negation");
+    return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+  }
+
+  // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
+  // We already got the bit for the first part of the comparison (e.g. SETULE).
+
+  // Get the other bit of the comparison.
+  Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
+  SDValue OtherCond =
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+
+  return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+
+  case ISD::Constant: {
+    if (N->getValueType(0) == MVT::i64) {
+      // Get 64 bit value.
+      int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+      // Assume no remaining bits.
+      unsigned Remainder = 0;
+      // Assume no shift required.
+      unsigned Shift = 0;
+
+      // If it can't be represented as a 32 bit value.
+      if (!isInt<32>(Imm)) {
+        Shift = CountTrailingZeros_64(Imm);
+        int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+        // If the shifted value fits 32 bits.
+        if (isInt<32>(ImmSh)) {
+          // Go with the shifted value.
+          Imm = ImmSh;
+        } else {
+          // Still stuck with a 64 bit value.
+          Remainder = Imm;
+          Shift = 32;
+          Imm >>= 32;
+        }
+      }
+
+      // Intermediate operand.
+      SDNode *Result;
+
+      // Handle first 32 bits.
+      unsigned Lo = Imm & 0xFFFF;
+      unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+      // Simple value.
+      if (isInt<16>(Imm)) {
+       // Just the Lo bits.
+        Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+      } else if (Lo) {
+        // Handle the Hi bits.
+        unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+        Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+        // And Lo bits.
+        Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Lo));
+      } else {
+       // Just the Hi bits.
+        Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+      }
+
+      // If no shift, we're done.
+      if (!Shift) return Result;
+
+      // Shift for next step if the upper 32-bits were not zero.
+      if (Imm) {
+        Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+                                        SDValue(Result, 0),
+                                        getI32Imm(Shift),
+                                        getI32Imm(63 - Shift));
+      }
+
+      // Add in the last bits as required.
+      if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+        Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Hi));
+      }
+      if ((Lo = Remainder & 0xFFFF)) {
+        Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Lo));
+      }
+
+      return Result;
+    }
+    break;
+  }
+
+  case ISD::SETCC:
+    return SelectSETCC(N);
+  case PPCISD::GlobalBaseReg:
+    return getGlobalBaseReg();
+
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+    unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+    if (N->hasOneUse())
+      return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
+                                  getSmallIPtrImm(0));
+    return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+                                  getSmallIPtrImm(0));
+  }
+
+  case PPCISD::MFCR: {
+    SDValue InFlag = N->getOperand(1);
+    // Use MFOCRF if supported.
+    if (PPCSubTarget.isGigaProcessor())
+      return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+                                    N->getOperand(0), InFlag);
+    else
+      return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+                                    N->getOperand(0), InFlag);
+  }
+
+  case ISD::SDIV: {
+    // FIXME: since this depends on the setting of the carry flag from the srawi
+    //        we should really be making notes about that for the scheduler.
+    // FIXME: It sure would be nice if we could cheaply recognize the
+    //        srl/add/sra pattern the dag combiner will generate for this as
+    //        sra/addze rather than having to handle sdiv ourselves.  oh well.
+    unsigned Imm;
+    if (isInt32Immediate(N->getOperand(1), Imm)) {
+      SDValue N0 = N->getOperand(0);
+      if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+        SDNode *Op =
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+                                 N0, getI32Imm(Log2_32(Imm)));
+        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+                                    SDValue(Op, 0), SDValue(Op, 1));
+      } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+        SDNode *Op =
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+                                 N0, getI32Imm(Log2_32(-Imm)));
+        SDValue PT =
+          SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
+                                         SDValue(Op, 0), SDValue(Op, 1)),
+                    0);
+        return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+      }
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+
+  case ISD::LOAD: {
+    // Handle preincrement loads.
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT LoadedVT = LD->getMemoryVT();
+
+    // Normal loads are handled by code generated from the .td file.
+    if (LD->getAddressingMode() != ISD::PRE_INC)
+      break;
+
+    SDValue Offset = LD->getOffset();
+    if (isa<ConstantSDNode>(Offset) ||
+        Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+      unsigned Opcode;
+      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+      if (LD->getValueType(0) != MVT::i64) {
+        // Handle PPC32 integer and normal FP loads.
+        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+        switch (LoadedVT.getSimpleVT().SimpleTy) {
+          default: llvm_unreachable("Invalid PPC load type!");
+          case MVT::f64: Opcode = PPC::LFDU; break;
+          case MVT::f32: Opcode = PPC::LFSU; break;
+          case MVT::i32: Opcode = PPC::LWZU; break;
+          case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+          case MVT::i1:
+          case MVT::i8:  Opcode = PPC::LBZU; break;
+        }
+      } else {
+        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+        switch (LoadedVT.getSimpleVT().SimpleTy) {
+          default: llvm_unreachable("Invalid PPC load type!");
+          case MVT::i64: Opcode = PPC::LDU; break;
+          case MVT::i32: Opcode = PPC::LWZU8; break;
+          case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+          case MVT::i1:
+          case MVT::i8:  Opcode = PPC::LBZU8; break;
+        }
+      }
+
+      SDValue Chain = LD->getChain();
+      SDValue Base = LD->getBasePtr();
+      SDValue Ops[] = { Offset, Base, Chain };
+      // FIXME: PPC64
+      return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+                                    PPCLowering.getPointerTy(),
+                                    MVT::Other, Ops, 3);
+    } else {
+      llvm_unreachable("R+R preindex loads not supported yet!");
+    }
+  }
+
+  case ISD::AND: {
+    unsigned Imm, Imm2, SH, MB, ME;
+
+    // If this is an and of a value rotated between 0 and 31 bits and then and'd
+    // with a mask, emit rlwinm
+    if (isInt32Immediate(N->getOperand(1), Imm) &&
+        isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+      SDValue Val = N->getOperand(0).getOperand(0);
+      SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+    // If this is just a masked value where the input is not handled above, and
+    // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+    if (isInt32Immediate(N->getOperand(1), Imm) &&
+        isRunOfOnes(Imm, MB, ME) &&
+        N->getOperand(0).getOpcode() != ISD::ROTL) {
+      SDValue Val = N->getOperand(0);
+      SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+    // AND X, 0 -> 0, not "rlwinm 32".
+    if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+      ReplaceUses(SDValue(N, 0), N->getOperand(1));
+      return NULL;
+    }
+    // ISD::OR doesn't get all the bitfield insertion fun.
+    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+    if (isInt32Immediate(N->getOperand(1), Imm) &&
+        N->getOperand(0).getOpcode() == ISD::OR &&
+        isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+      unsigned MB, ME;
+      Imm = ~(Imm^Imm2);
+      if (isRunOfOnes(Imm, MB, ME)) {
+        SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                            N->getOperand(0).getOperand(1),
+                            getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+      }
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::OR:
+    if (N->getValueType(0) == MVT::i32)
+      if (SDNode *I = SelectBitfieldInsert(N))
+        return I;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::SHL: {
+    unsigned Imm, SH, MB, ME;
+    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+      SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::SRL: {
+    unsigned Imm, SH, MB, ME;
+    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+      SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::SELECT_CC: {
+    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+    EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+    bool isPPC64 = (PtrVT == MVT::i64);
+
+    // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
+    if (!isPPC64)
+      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+            if (N1C->isNullValue() && N3C->isNullValue() &&
+                N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+                // FIXME: Implement this optzn for PPC64.
+                N->getValueType(0) == MVT::i32) {
+              SDNode *Tmp =
+                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                       N->getOperand(0), getI32Imm(~0U));
+              return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+                                          SDValue(Tmp, 0), N->getOperand(0),
+                                          SDValue(Tmp, 1));
+            }
+
+    SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+    unsigned BROpc = getPredicateForSetCC(CC);
+
+    unsigned SelectCCOp;
+    if (N->getValueType(0) == MVT::i32)
+      SelectCCOp = PPC::SELECT_CC_I4;
+    else if (N->getValueType(0) == MVT::i64)
+      SelectCCOp = PPC::SELECT_CC_I8;
+    else if (N->getValueType(0) == MVT::f32)
+      SelectCCOp = PPC::SELECT_CC_F4;
+    else if (N->getValueType(0) == MVT::f64)
+      SelectCCOp = PPC::SELECT_CC_F8;
+    else
+      SelectCCOp = PPC::SELECT_CC_VRRC;
+
+    SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+                        getI32Imm(BROpc) };
+    return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+  }
+  case PPCISD::COND_BRANCH: {
+    // Op #0 is the Chain.
+    // Op #1 is the PPC::PRED_* number.
+    // Op #2 is the CR#
+    // Op #3 is the Dest MBB
+    // Op #4 is the Flag.
+    // Prevent PPC::PRED_* from being selected into LI.
+    SDValue Pred =
+      getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+    SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+      N->getOperand(0), N->getOperand(4) };
+    return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+  }
+  case ISD::BR_CC: {
+    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+    SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+    SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+                        N->getOperand(4), N->getOperand(0) };
+    return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+  }
+  case ISD::BRIND: {
+    // FIXME: Should custom lower this.
+    SDValue Chain = N->getOperand(0);
+    SDValue Target = N->getOperand(1);
+    unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+    unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
+    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+                                           Chain), 0);
+    return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
+  }
+  }
+
+  return SelectCode(N);
+}
+
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+  return new PPCDAGToDAGISel(TM);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 000000000000..9741a3902af7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,5768 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State);
+
+static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
+cl::desc("enable preincrement load/store generation on PPC (experimental)"),
+                                     cl::Hidden);
+
+static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
+  if (TM.getSubtargetImpl()->isDarwin())
+    return new TargetLoweringObjectFileMachO();
+
+  return new TargetLoweringObjectFileELF();
+}
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+  : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+
+  setPow2DivIsCheap();
+
+  // Use _setjmp/_longjmp instead of setjmp/longjmp.
+  setUseUnderscoreSetJmp(true);
+  setUseUnderscoreLongJmp(true);
+
+  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+  // arguments are at least 4/8 bytes aligned.
+  setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+
+  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // PowerPC has pre-inc load and store's.
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+  // This is used in the ppcf128->int sequence.  Note it has different semantics
+  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
+  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+  // PowerPC has no SREM/UREM instructions
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i64, Expand);
+  setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+  // We don't support sin/cos/sqrt/fmod/pow
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::FPOW , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Expand);
+
+  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+  // If we're enabling GP optimizations, use hardware square root
+  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+  }
+
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+  // PowerPC does not have BSWAP, CTPOP or CTTZ
+  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
+  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
+  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
+  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
+  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
+  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+
+  // PowerPC does not have ROTR
+  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
+  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
+
+  // PowerPC does not have Select
+  setOperationAction(ISD::SELECT, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT, MVT::i64, Expand);
+  setOperationAction(ISD::SELECT, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+  // PowerPC wants to turn select_cc of FP into fsel when possible.
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  // PowerPC wants to optimize integer setcc a bit
+  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+  // PowerPC does not have BRCOND which requires SetCC
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
+
+  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+  // PowerPC does not have [U|S]INT_TO_FP
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+
+  // We cannot sextinreg(i1).  Expand to shifts.
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
+
+
+  // We want to legalize GlobalAddress and ConstantPool nodes into the
+  // appropriate instructions to materialize the address.
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
+  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
+  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
+
+  // TRAP is legal.
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // TRAMPOLINE is custom lowered.
+  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+  // VAARG is custom lowered with the 32-bit SVR4 ABI.
+  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+      && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+    setOperationAction(ISD::VAARG, MVT::Other, Custom);
+    setOperationAction(ISD::VAARG, MVT::i64, Custom);
+  } else
+    setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  // Comparisons that require checking two conditions.
+  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+    // They also have instructions for converting between i64 and fp.
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+    // This is just the low 32 bits of a (signed) fp->i64 conversion.
+    // We cannot do this with Promote because i64 is not a legal type.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+    // FIXME: disable this lowered code.  This generates 64-bit register values,
+    // and we don't model the fact that the top part is clobbered by calls.  We
+    // need to flag these together so that the value isn't live across a call.
+    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+  } else {
+    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  }
+
+  if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+    // 64-bit PowerPC implementations can support i64 types directly
+    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+    // 64-bit PowerPC wants to expand i128 shifts itself.
+    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+  } else {
+    // 32-bit PowerPC wants to expand i64 shifts itself.
+    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+  }
+
+  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+    // First set operation action for all vector types to expand. Then we
+    // will selectively turn on ones that can be effectively codegen'd.
+    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+      // add/sub are legal for all supported vector VT's.
+      setOperationAction(ISD::ADD , VT, Legal);
+      setOperationAction(ISD::SUB , VT, Legal);
+
+      // We promote all shuffles to v16i8.
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+      // We promote all non-typed operations to v4i32.
+      setOperationAction(ISD::AND   , VT, Promote);
+      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
+      setOperationAction(ISD::OR    , VT, Promote);
+      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
+      setOperationAction(ISD::XOR   , VT, Promote);
+      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
+      setOperationAction(ISD::LOAD  , VT, Promote);
+      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+      setOperationAction(ISD::STORE, VT, Promote);
+      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+      // No other operations are legal.
+      setOperationAction(ISD::MUL , VT, Expand);
+      setOperationAction(ISD::SDIV, VT, Expand);
+      setOperationAction(ISD::SREM, VT, Expand);
+      setOperationAction(ISD::UDIV, VT, Expand);
+      setOperationAction(ISD::UREM, VT, Expand);
+      setOperationAction(ISD::FDIV, VT, Expand);
+      setOperationAction(ISD::FNEG, VT, Expand);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::UDIVREM, VT, Expand);
+      setOperationAction(ISD::SDIVREM, VT, Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+      setOperationAction(ISD::FPOW, VT, Expand);
+      setOperationAction(ISD::CTPOP, VT, Expand);
+      setOperationAction(ISD::CTLZ, VT, Expand);
+      setOperationAction(ISD::CTTZ, VT, Expand);
+    }
+
+    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+    // with merges, splats, etc.
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
+    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
+    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
+    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
+    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+
+    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
+    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
+    addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
+    addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+
+    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+  }
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+    setStackPointerRegisterToSaveRestore(PPC::X1);
+    setExceptionPointerRegister(PPC::X3);
+    setExceptionSelectorRegister(PPC::X4);
+  } else {
+    setStackPointerRegisterToSaveRestore(PPC::R1);
+    setExceptionPointerRegister(PPC::R3);
+    setExceptionSelectorRegister(PPC::R4);
+  }
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::SINT_TO_FP);
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::BR_CC);
+  setTargetDAGCombine(ISD::BSWAP);
+
+  // Darwin long double math library functions have $LDBL128 appended.
+  if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+  }
+
+  setMinFunctionAlignment(2);
+  if (PPCSubTarget.isDarwin())
+    setPrefFunctionAlignment(4);
+
+  computeRegisterProperties();
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+  const TargetMachine &TM = getTargetMachine();
+  // Darwin passes everything on 4 byte boundary.
+  if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+    return 4;
+  // FIXME SVR4 TBD
+  return 4;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case PPCISD::FSEL:            return "PPCISD::FSEL";
+  case PPCISD::FCFID:           return "PPCISD::FCFID";
+  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
+  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
+  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
+  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
+  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
+  case PPCISD::VPERM:           return "PPCISD::VPERM";
+  case PPCISD::Hi:              return "PPCISD::Hi";
+  case PPCISD::Lo:              return "PPCISD::Lo";
+  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
+  case PPCISD::TOC_RESTORE:     return "PPCISD::TOC_RESTORE";
+  case PPCISD::LOAD:            return "PPCISD::LOAD";
+  case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
+  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
+  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
+  case PPCISD::SRL:             return "PPCISD::SRL";
+  case PPCISD::SRA:             return "PPCISD::SRA";
+  case PPCISD::SHL:             return "PPCISD::SHL";
+  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
+  case PPCISD::STD_32:          return "PPCISD::STD_32";
+  case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
+  case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
+  case PPCISD::NOP:             return "PPCISD::NOP";
+  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
+  case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
+  case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
+  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
+  case PPCISD::MFCR:            return "PPCISD::MFCR";
+  case PPCISD::VCMP:            return "PPCISD::VCMP";
+  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
+  case PPCISD::LBRX:            return "PPCISD::LBRX";
+  case PPCISD::STBRX:           return "PPCISD::STBRX";
+  case PPCISD::LARX:            return "PPCISD::LARX";
+  case PPCISD::STCX:            return "PPCISD::STCX";
+  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
+  case PPCISD::MFFS:            return "PPCISD::MFFS";
+  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
+  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
+  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
+  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
+  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
+  }
+}
+
+MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+    return CFP->getValueAPF().isZero();
+  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+    // Maybe this has already been legalized into the constant pool?
+    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+        return CFP->getValueAPF().isZero();
+  }
+  return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+  return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+  if (!isUnary) {
+    for (unsigned i = 0; i != 16; ++i)
+      if (!isConstantOrUndef(N->getMaskElt(i),  i*2+1))
+        return false;
+  } else {
+    for (unsigned i = 0; i != 8; ++i)
+      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+1) ||
+          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+1))
+        return false;
+  }
+  return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+  if (!isUnary) {
+    for (unsigned i = 0; i != 16; i += 2)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
+        return false;
+  } else {
+    for (unsigned i = 0; i != 8; i += 2)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3) ||
+          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+3))
+        return false;
+  }
+  return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+                     unsigned LHSStart, unsigned RHSStart) {
+  assert(N->getValueType(0) == MVT::v16i8 &&
+         "PPC only supports shuffles by bytes!");
+  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+         "Unsupported merge size!");
+
+  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
+    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
+      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+                             LHSStart+j+i*UnitSize) ||
+          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+                             RHSStart+j+i*UnitSize))
+        return false;
+    }
+  return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                             bool isUnary) {
+  if (!isUnary)
+    return isVMerge(N, UnitSize, 8, 24);
+  return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                             bool isUnary) {
+  if (!isUnary)
+    return isVMerge(N, UnitSize, 0, 16);
+  return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+  assert(N->getValueType(0) == MVT::v16i8 &&
+         "PPC only supports shuffles by bytes!");
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+  // Find the first non-undef value in the shuffle mask.
+  unsigned i;
+  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+    /*search*/;
+
+  if (i == 16) return -1;  // all undef.
+
+  // Otherwise, check to see if the rest of the elements are consecutively
+  // numbered from this value.
+  unsigned ShiftAmt = SVOp->getMaskElt(i);
+  if (ShiftAmt < i) return -1;
+  ShiftAmt -= i;
+
+  if (!isUnary) {
+    // Check the rest of the elements to see if they are consecutive.
+    for (++i; i != 16; ++i)
+      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+        return -1;
+  } else {
+    // Check the rest of the elements to see if they are consecutive.
+    for (++i; i != 16; ++i)
+      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+        return -1;
+  }
+  return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+  assert(N->getValueType(0) == MVT::v16i8 &&
+         (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+  // This is a splat operation if each element of the permute is the same, and
+  // if the value doesn't reference the second vector.
+  unsigned ElementBase = N->getMaskElt(0);
+
+  // FIXME: Handle UNDEF elements too!
+  if (ElementBase >= 16)
+    return false;
+
+  // Check that the indices are consecutive, in the case of a multi-byte element
+  // splatted with a v16i8 mask.
+  for (unsigned i = 1; i != EltSize; ++i)
+    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+      return false;
+
+  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+    if (N->getMaskElt(i) < 0) continue;
+    for (unsigned j = 0; j != EltSize; ++j)
+      if (N->getMaskElt(i+j) != N->getMaskElt(j))
+        return false;
+  }
+  return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+  APInt APVal, APUndef;
+  unsigned BitSize;
+  bool HasAnyUndefs;
+
+  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
+    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+      return CFP->getValueAPF().isNegZero();
+
+  return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  assert(isSplatShuffleMask(SVOp, EltSize));
+  return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted.  The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+  SDValue OpVal(0, 0);
+
+  // If ByteSize of the splat is bigger than the element size of the
+  // build_vector, then we have a case where we are checking for a splat where
+  // multiple elements of the buildvector are folded together into a single
+  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+  unsigned EltSize = 16/N->getNumOperands();
+  if (EltSize < ByteSize) {
+    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
+    SDValue UniquedVals[4];
+    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+    // See if all of the elements in the buildvector agree across.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+      // If the element isn't a constant, bail fully out.
+      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+      if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+        return SDValue();  // no match.
+    }
+
+    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+    // either constant or undef values that are identical for each chunk.  See
+    // if these chunks can form into a larger vspltis*.
+
+    // Check to see if all of the leading entries are either 0 or -1.  If
+    // neither, then this won't fit into the immediate field.
+    bool LeadingZero = true;
+    bool LeadingOnes = true;
+    for (unsigned i = 0; i != Multiple-1; ++i) {
+      if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
+
+      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+    }
+    // Finally, check the least significant entry.
+    if (LeadingZero) {
+      if (UniquedVals[Multiple-1].getNode() == 0)
+        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
+      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+      if (Val < 16)
+        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
+    }
+    if (LeadingOnes) {
+      if (UniquedVals[Multiple-1].getNode() == 0)
+        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
+      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
+        return DAG.getTargetConstant(Val, MVT::i32);
+    }
+
+    return SDValue();
+  }
+
+  // Check to see if this buildvec has a single non-undef value in its elements.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+    else if (OpVal != N->getOperand(i))
+      return SDValue();
+  }
+
+  if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
+
+  unsigned ValSizeInBytes = EltSize;
+  uint64_t Value = 0;
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+    Value = CN->getZExtValue();
+  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+    Value = FloatToBits(CN->getValueAPF().convertToFloat());
+  }
+
+  // If the splat value is larger than the element value, then we can never do
+  // this splat.  The only case that we could fit the replicated bits into our
+  // immediate field for would be zero, and we prefer to use vxor for it.
+  if (ValSizeInBytes < ByteSize) return SDValue();
+
+  // If the element value is larger than the splat value, cut it in half and
+  // check to see if the two halves are equal.  Continue doing this until we
+  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
+  while (ValSizeInBytes > ByteSize) {
+    ValSizeInBytes >>= 1;
+
+    // If the top half equals the bottom half, we're still ok.
+    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
+      return SDValue();
+  }
+
+  // Properly sign extend the value.
+  int ShAmt = (4-ByteSize)*8;
+  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+
+  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+  if (MaskVal == 0) return SDValue();
+
+  // Finally, if this value fits in a 5 bit sext field, return it
+  if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+    return DAG.getTargetConstant(MaskVal, MVT::i32);
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value.  If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+  if (N->getOpcode() != ISD::Constant)
+    return false;
+
+  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  else
+    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+  return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation.  Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+                                            SDValue &Index,
+                                            SelectionDAG &DAG) const {
+  short imm = 0;
+  if (N.getOpcode() == ISD::ADD) {
+    if (isIntS16Immediate(N.getOperand(1), imm))
+      return false;    // r+i
+    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+      return false;    // r+i
+
+    Base = N.getOperand(0);
+    Index = N.getOperand(1);
+    return true;
+  } else if (N.getOpcode() == ISD::OR) {
+    if (isIntS16Immediate(N.getOperand(1), imm))
+      return false;    // r+i can fold it if we can.
+
+    // If this is an or of disjoint bitfields, we can codegen this as an add
+    // (for better address arithmetic) if the LHS and RHS of the OR are provably
+    // disjoint.
+    APInt LHSKnownZero, LHSKnownOne;
+    APInt RHSKnownZero, RHSKnownOne;
+    DAG.ComputeMaskedBits(N.getOperand(0),
+                          APInt::getAllOnesValue(N.getOperand(0)
+                            .getValueSizeInBits()),
+                          LHSKnownZero, LHSKnownOne);
+
+    if (LHSKnownZero.getBoolValue()) {
+      DAG.ComputeMaskedBits(N.getOperand(1),
+                            APInt::getAllOnesValue(N.getOperand(1)
+                              .getValueSizeInBits()),
+                            RHSKnownZero, RHSKnownOne);
+      // If all of the bits are known zero on the LHS or RHS, the add won't
+      // carry.
+      if (~(LHSKnownZero | RHSKnownZero) == 0) {
+        Base = N.getOperand(0);
+        Index = N.getOperand(1);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+                                            SDValue &Base,
+                                            SelectionDAG &DAG) const {
+  // FIXME dl should come from parent load or store, not from address
+  DebugLoc dl = N.getDebugLoc();
+  // If this can be more profitably realized as r+r, fail.
+  if (SelectAddressRegReg(N, Disp, Base, DAG))
+    return false;
+
+  if (N.getOpcode() == ISD::ADD) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm)) {
+      Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+      } else {
+        Base = N.getOperand(0);
+      }
+      return true; // [r+i]
+    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+      // Match LOAD (ADD (X, Lo(G))).
+     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+             && "Cannot handle constant offsets yet!");
+      Disp = N.getOperand(1).getOperand(0);  // The global address.
+      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+             Disp.getOpcode() == ISD::TargetConstantPool ||
+             Disp.getOpcode() == ISD::TargetJumpTable);
+      Base = N.getOperand(0);
+      return true;  // [&g+r]
+    }
+  } else if (N.getOpcode() == ISD::OR) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm)) {
+      // If this is an or of disjoint bitfields, we can codegen this as an add
+      // (for better address arithmetic) if the LHS and RHS of the OR are
+      // provably disjoint.
+      APInt LHSKnownZero, LHSKnownOne;
+      DAG.ComputeMaskedBits(N.getOperand(0),
+                            APInt::getAllOnesValue(N.getOperand(0)
+                                                   .getValueSizeInBits()),
+                            LHSKnownZero, LHSKnownOne);
+
+      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+        // If all of the bits are known zero on the LHS or RHS, the add won't
+        // carry.
+        Base = N.getOperand(0);
+        Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+        return true;
+      }
+    }
+  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    // Loading from a constant address.
+
+    // If this address fits entirely in a 16-bit sext immediate field, codegen
+    // this as "d, 0"
+    short Imm;
+    if (isIntS16Immediate(CN, Imm)) {
+      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                             CN->getValueType(0));
+      return true;
+    }
+
+    // Handle 32-bit sext immediates with LIS + addr mode.
+    if (CN->getValueType(0) == MVT::i32 ||
+        (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+      int Addr = (int)CN->getZExtValue();
+
+      // Otherwise, break this down into an LIS + disp.
+      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
+      return true;
+    }
+  }
+
+  Disp = DAG.getTargetConstant(0, getPointerTy());
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+  else
+    Base = N;
+  return true;      // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+                                                SDValue &Index,
+                                                SelectionDAG &DAG) const {
+  // Check to see if we can easily represent this as an [r+r] address.  This
+  // will fail if it thinks that the address is more profitably represented as
+  // reg+imm, e.g. where imm = 0.
+  if (SelectAddressRegReg(N, Base, Index, DAG))
+    return true;
+
+  // If the operand is an addition, always emit this as [r+r], since this is
+  // better (for code size, and execution, as the memop does the add for free)
+  // than emitting an explicit add.
+  if (N.getOpcode() == ISD::ADD) {
+    Base = N.getOperand(0);
+    Index = N.getOperand(1);
+    return true;
+  }
+
+  // Otherwise, do it the hard way, using R0 as the base register.
+  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                         N.getValueType());
+  Index = N;
+  return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4].  Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
+                                                 SDValue &Base,
+                                                 SelectionDAG &DAG) const {
+  // FIXME dl should come from the parent load or store, not the address
+  DebugLoc dl = N.getDebugLoc();
+  // If this can be more profitably realized as r+r, fail.
+  if (SelectAddressRegReg(N, Disp, Base, DAG))
+    return false;
+
+  if (N.getOpcode() == ISD::ADD) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+      Disp =  DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+      } else {
+        Base = N.getOperand(0);
+      }
+      return true; // [r+i]
+    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+      // Match LOAD (ADD (X, Lo(G))).
+     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+             && "Cannot handle constant offsets yet!");
+      Disp = N.getOperand(1).getOperand(0);  // The global address.
+      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+             Disp.getOpcode() == ISD::TargetConstantPool ||
+             Disp.getOpcode() == ISD::TargetJumpTable);
+      Base = N.getOperand(0);
+      return true;  // [&g+r]
+    }
+  } else if (N.getOpcode() == ISD::OR) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+      // If this is an or of disjoint bitfields, we can codegen this as an add
+      // (for better address arithmetic) if the LHS and RHS of the OR are
+      // provably disjoint.
+      APInt LHSKnownZero, LHSKnownOne;
+      DAG.ComputeMaskedBits(N.getOperand(0),
+                            APInt::getAllOnesValue(N.getOperand(0)
+                                                   .getValueSizeInBits()),
+                            LHSKnownZero, LHSKnownOne);
+      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+        // If all of the bits are known zero on the LHS or RHS, the add won't
+        // carry.
+        Base = N.getOperand(0);
+        Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+        return true;
+      }
+    }
+  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    // Loading from a constant address.  Verify low two bits are clear.
+    if ((CN->getZExtValue() & 3) == 0) {
+      // If this address fits entirely in a 14-bit sext immediate field, codegen
+      // this as "d, 0"
+      short Imm;
+      if (isIntS16Immediate(CN, Imm)) {
+        Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                               CN->getValueType(0));
+        return true;
+      }
+
+      // Fold the low-part of 32-bit absolute addresses into addr mode.
+      if (CN->getValueType(0) == MVT::i32 ||
+          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+        int Addr = (int)CN->getZExtValue();
+
+        // Otherwise, break this down into an LIS + disp.
+        Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+        Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+        unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+        Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
+        return true;
+      }
+    }
+  }
+
+  Disp = DAG.getTargetConstant(0, getPointerTy());
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+  else
+    Base = N;
+  return true;      // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                                  SDValue &Offset,
+                                                  ISD::MemIndexedMode &AM,
+                                                  SelectionDAG &DAG) const {
+  // Disabled by default for now.
+  if (!EnablePPCPreinc) return false;
+
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    VT = LD->getMemoryVT();
+
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    VT  = ST->getMemoryVT();
+  } else
+    return false;
+
+  // PowerPC doesn't have preinc load/store instructions for vectors.
+  if (VT.isVector())
+    return false;
+
+  // TODO: Check reg+reg first.
+
+  // LDU/STU use reg+imm*4, others use reg+imm.
+  if (VT != MVT::i64) {
+    // reg + imm
+    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+      return false;
+  } else {
+    // reg + imm * 4.
+    if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+      return false;
+  }
+
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
+    // sext i32 to i64 when addr mode is r+i.
+    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+        LD->getExtensionType() == ISD::SEXTLOAD &&
+        isa<ConstantSDNode>(Offset))
+      return false;
+  }
+
+  AM = ISD::PRE_INC;
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//  LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+                               unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+  HiOpFlags = PPCII::MO_HA16;
+  LoOpFlags = PPCII::MO_LO16;
+
+  // Don't use the pic base if not in PIC relocation model.  Or if we are on a
+  // non-darwin platform.  We don't support PIC on other platforms yet.
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
+               TM.getSubtarget<PPCSubtarget>().isDarwin();
+  if (isPIC) {
+    HiOpFlags |= PPCII::MO_PIC_FLAG;
+    LoOpFlags |= PPCII::MO_PIC_FLAG;
+  }
+
+  // If this is a reference to a global value that requires a non-lazy-ptr, make
+  // sure that instruction lowering adds it.
+  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+    HiOpFlags |= PPCII::MO_NLP_FLAG;
+    LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+    if (GV->hasHiddenVisibility()) {
+      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+    }
+  }
+
+  return isPIC;
+}
+
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+                             SelectionDAG &DAG) {
+  EVT PtrVT = HiPart.getValueType();
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  DebugLoc DL = HiPart.getDebugLoc();
+
+  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
+
+  // With PIC, the first instruction is actually "GR+hi(&G)".
+  if (isPIC)
+    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
+
+  // Generate non-pic code that has direct accesses to the constant pool.
+  // The address of the global is just (hi(&g)+lo(&g)).
+  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = CP->getConstVal();
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue CPIHi =
+    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+  SDValue CPILo =
+    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
+  SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+  DebugLoc DL = GSDN->getDebugLoc();
+  const GlobalValue *GV = GSDN->getGlobal();
+
+  // 64-bit SVR4 ABI code is always position-independent.
+  // The actual address of the GlobalValue is stored in the TOC.
+  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
+                       DAG.getRegister(PPC::X2, MVT::i64));
+  }
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+
+  SDValue GAHi =
+    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+  SDValue GALo =
+    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
+
+  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
+
+  // If the global reference is actually to a non-lazy-pointer, we have to do an
+  // extra load to get the address of the global.
+  if (MOHiFlag & PPCII::MO_NLP_FLAG)
+    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+                      false, false, 0);
+  return Ptr;
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  DebugLoc dl = Op.getDebugLoc();
+
+  // If we're comparing for equality to zero, expose the fact that this is
+  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+  // fold the new nodes.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (C->isNullValue() && CC == ISD::SETEQ) {
+      EVT VT = Op.getOperand(0).getValueType();
+      SDValue Zext = Op.getOperand(0);
+      if (VT.bitsLT(MVT::i32)) {
+        VT = MVT::i32;
+        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+      }
+      unsigned Log2b = Log2_32(VT.getSizeInBits());
+      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+                                DAG.getConstant(Log2b, MVT::i32));
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+    }
+    // Leave comparisons against 0 and -1 alone for now, since they're usually
+    // optimized.  FIXME: revisit this when we can custom lower all setcc
+    // optimizations.
+    if (C->isAllOnesValue() || C->isNullValue())
+      return SDValue();
+  }
+
+  // If we have an integer seteq/setne, turn it into a compare against zero
+  // by xor'ing the rhs with the lhs, which is faster than setting a
+  // condition register, reading it back out, and masking the correct bit.  The
+  // normal approach here uses sub to do this instead of xor.  Using xor exposes
+  // the result to other bit-twiddling opportunities.
+  EVT LHSVT = Op.getOperand(0).getValueType();
+  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    EVT VT = Op.getValueType();
+    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+                                Op.getOperand(1));
+    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+  }
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+                                      const PPCSubtarget &Subtarget) const {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  DebugLoc dl = Node->getDebugLoc();
+
+  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+  // gpr_index
+  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = GprIndex.getValue(1);
+
+  if (VT == MVT::i64) {
+    // Check if GprIndex is even
+    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+                                 DAG.getConstant(1, MVT::i32));
+    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
+    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+                                          DAG.getConstant(1, MVT::i32));
+    // Align GprIndex to be even if it isn't
+    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+                           GprIndex);
+  }
+
+  // fpr index is 1 byte after gpr
+  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                               DAG.getConstant(1, MVT::i32));
+
+  // fpr
+  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = FprIndex.getValue(1);
+
+  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                       DAG.getConstant(8, MVT::i32));
+
+  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                        DAG.getConstant(4, MVT::i32));
+
+  // areas
+  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+                                     MachinePointerInfo(), false, false, 0);
+  InChain = OverflowArea.getValue(1);
+
+  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+                                    MachinePointerInfo(), false, false, 0);
+  InChain = RegSaveArea.getValue(1);
+
+  // select overflow_area if index > 8
+  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+  // adjustment constant gpr_index * 4/8
+  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+                                    VT.isInteger() ? GprIndex : FprIndex,
+                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                                    MVT::i32));
+
+  // OurReg = RegSaveArea + RegConstant
+  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+                               RegConstant);
+
+  // Floating types are 32 bytes into RegSaveArea
+  if (VT.isFloatingPoint())
+    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+                         DAG.getConstant(32, MVT::i32));
+
+  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   VT.isInteger() ? GprIndex : FprIndex,
+                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+                                                   MVT::i32));
+
+  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+                              VT.isInteger() ? VAListPtr : FprPtr,
+                              MachinePointerInfo(SV),
+                              MVT::i8, false, false, 0);
+
+  // determine if we should load from reg_save_area or overflow_area
+  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+  // increase overflow_area by 4/8 if gpr/fpr > 8
+  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                          MVT::i32));
+
+  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+                             OverflowAreaPlusN);
+
+  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+                              OverflowAreaPtr,
+                              MachinePointerInfo(),
+                              MVT::i32, false, false, 0);
+
+  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = (PtrVT == MVT::i64);
+  const Type *IntPtrTy =
+    DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+                                                             *DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = Trmp; Args.push_back(Entry);
+
+  // TrampSize == (isPPC64 ? 48 : 40);
+  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+                               isPPC64 ? MVT::i64 : MVT::i32);
+  Args.push_back(Entry);
+
+  Entry.Node = FPtr; Args.push_back(Entry);
+  Entry.Node = Nest; Args.push_back(Entry);
+
+  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+  std::pair<SDValue, SDValue> CallResult =
+    LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
+                false, false, false, false, 0, CallingConv::C, false,
+                /*isReturnValueUsed=*/true,
+                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+                Args, DAG, dl);
+
+  SDValue Ops[] =
+    { CallResult.first, CallResult.second };
+
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+                                        const PPCSubtarget &Subtarget) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+    // vastart just stores the address of the VarArgsFrameIndex slot into the
+    // memory location argument.
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV),
+                        false, false, 0);
+  }
+
+  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
+  // We suppose the given va_list is already allocated.
+  //
+  // typedef struct {
+  //  char gpr;     /* index into the array of 8 GPRs
+  //                 * stored in the register save area
+  //                 * gpr=0 corresponds to r3,
+  //                 * gpr=1 to r4, etc.
+  //                 */
+  //  char fpr;     /* index into the array of 8 FPRs
+  //                 * stored in the register save area
+  //                 * fpr=0 corresponds to f1,
+  //                 * fpr=1 to f2, etc.
+  //                 */
+  //  char *overflow_arg_area;
+  //                /* location on stack that holds
+  //                 * the next overflow argument
+  //                 */
+  //  char *reg_save_area;
+  //               /* where r3:r10 and f1:f8 (if saved)
+  //                * are stored
+  //                */
+  // } va_list[1];
+
+
+  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+                                            PtrVT);
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 PtrVT);
+
+  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+  uint64_t FPROffset = 1;
+  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+  // Store first byte : number of int regs
+  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+                                         Op.getOperand(1),
+                                         MachinePointerInfo(SV),
+                                         MVT::i8, false, false, 0);
+  uint64_t nextOffset = FPROffset;
+  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+                                  ConstFPROffset);
+
+  // Store second byte : number of float regs
+  SDValue secondStore =
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset), MVT::i8,
+                      false, false, 0);
+  nextOffset += StackOffset;
+  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+  // Store second word : arguments given on stack
+  SDValue thirdStore =
+    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+                 MachinePointerInfo(SV, nextOffset),
+                 false, false, 0);
+  nextOffset += FrameOffset;
+  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+  // Store third word : arguments given in registers
+  return DAG.getStore(thirdStore, dl, FR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset),
+                      false, false, 0);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State) {
+  return true;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // Skip one register if the first unallocated register has an even register
+  // number and there are still argument registers available which have not been
+  // allocated yet. RegNum is actually an index into ArgRegs, which means we
+  // need to skip a register if RegNum is odd.
+  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the first
+  // unallocated register has an odd register number and does not actually
+  // allocate a register for the current argument.
+  return false;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // If there is only one Floating-point register left we need to put both f64
+  // values of a split ppc_fp128 value on the stack.
+  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the two f64
+  // values a ppc_fp128 value is split into are both passed in registers or both
+  // passed on the stack and does not actually allocate a register for the
+  // current argument.
+  return false;
+}
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// on Darwin.
+static const unsigned *GetFPR() {
+  static const unsigned FPR[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+  };
+
+  return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
+                                       unsigned PtrByteSize) {
+  unsigned ArgSize = ArgVT.getSizeInBits()/8;
+  if (Flags.isByVal())
+    ArgSize = Flags.getByValSize();
+  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+  return ArgSize;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+    return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
+                                     dl, DAG, InVals);
+  } else {
+    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+                                       dl, DAG, InVals);
+  }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_SVR4(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+
+  // 32-bit SVR4 ABI Stack Frame Layout:
+  //              +-----------------------------------+
+  //        +-->  |            Back chain             |
+  //        |     +-----------------------------------+
+  //        |     | Floating-point register save area |
+  //        |     +-----------------------------------+
+  //        |     |    General register save area     |
+  //        |     +-----------------------------------+
+  //        |     |          CR save word             |
+  //        |     +-----------------------------------+
+  //        |     |         VRSAVE save word          |
+  //        |     +-----------------------------------+
+  //        |     |         Alignment padding         |
+  //        |     +-----------------------------------+
+  //        |     |     Vector register save area     |
+  //        |     +-----------------------------------+
+  //        |     |       Local variable space        |
+  //        |     +-----------------------------------+
+  //        |     |        Parameter list area        |
+  //        |     +-----------------------------------+
+  //        |     |           LR save word            |
+  //        |     +-----------------------------------+
+  // SP-->  +---  |            Back chain             |
+  //              +-----------------------------------+
+  //
+  // Specifications:
+  //   System V Application Binary Interface PowerPC Processor Supplement
+  //   AltiVec Technology Programming Interface Manual
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Potential tail calls could cause overwriting of argument stack slots.
+  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  unsigned PtrByteSize = 4;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored in registers.
+    if (VA.isRegLoc()) {
+      TargetRegisterClass *RC;
+      EVT ValVT = VA.getValVT();
+
+      switch (ValVT.getSimpleVT().SimpleTy) {
+        default:
+          llvm_unreachable("ValVT not supported by formal arguments Lowering");
+        case MVT::i32:
+          RC = PPC::GPRCRegisterClass;
+          break;
+        case MVT::f32:
+          RC = PPC::F4RCRegisterClass;
+          break;
+        case MVT::f64:
+          RC = PPC::F8RCRegisterClass;
+          break;
+        case MVT::v16i8:
+        case MVT::v8i16:
+        case MVT::v4i32:
+        case MVT::v4f32:
+          RC = PPC::VRRCRegisterClass;
+          break;
+      }
+
+      // Transform the arguments stored in physical registers into virtual ones.
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+
+      InVals.push_back(ArgValue);
+    } else {
+      // Argument stored in memory.
+      assert(VA.isMemLoc());
+
+      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+                                      isImmutable);
+
+      // Create load nodes to retrieve arguments from the stack.
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
+                                   false, false, 0));
+    }
+  }
+
+  // Assign locations to all of the incoming aggregate by value arguments.
+  // Aggregates passed by value are stored in the local variable space of the
+  // caller's stack frame, right above the parameter list area.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		      getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+
+  // Area that is at least reserved in the caller of this function.
+  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized function's reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+  MinReservedArea =
+    std::max(MinReservedArea,
+             PPCFrameLowering::getMinCallFrameSize(false, false));
+
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+    getStackAlignment();
+  unsigned AlignMask = TargetAlign-1;
+  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+
+  FI->setMinReservedArea(MinReservedArea);
+
+  SmallVector<SDValue, 8> MemOps;
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    static const unsigned GPArgRegs[] = {
+      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+    };
+    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+    static const unsigned FPArgRegs[] = {
+      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+      PPC::F8
+    };
+    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+                                                          NumGPArgRegs));
+    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+                                                          NumFPArgRegs));
+
+    // Make room for NumGPArgRegs and NumFPArgRegs.
+    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+                NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+
+    FuncInfo->setVarArgsStackOffset(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             CCInfo.getNextStackOffset(), true));
+
+    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+    // The fixed integer arguments of a variadic function are stored to the
+    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+    // the result of va_next.
+    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+    // is set.
+    // The double arguments are stored to the VarArgsFrameIndex
+    // on the stack.
+    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
+      MemOps.push_back(Store);
+      // Increment the address by eight for the next argument to store
+      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+                                         PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl,
+                        MVT::Other, &MemOps[0], MemOps.size());
+
+  return Chain;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_Darwin(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  // TODO: add description of PPC stack frame format, or at least some docs.
+  //
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = PtrVT == MVT::i64;
+  // Potential tail calls could cause overwriting of argument stack slots.
+  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  // Area that is at least reserved in caller of this function.
+  unsigned MinReservedArea = ArgOffset;
+
+  static const unsigned GPR_32[] = {           // 32-bit registers.
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  static const unsigned GPR_64[] = {           // 64-bit registers.
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+
+  static const unsigned *FPR = GetFPR();
+
+  static const unsigned VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+
+  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+  const unsigned Num_FPR_Regs = 13;
+  const unsigned Num_VR_Regs  = array_lengthof( VR);
+
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+  // In 32-bit non-varargs functions, the stack space for vectors is after the
+  // stack space for non-vectors.  We do not use this space unless we have
+  // too many vectors to fit in registers, something that only occurs in
+  // constructed examples:), but we have to walk the arglist to figure
+  // that out...for the pathological case, compute VecArgOffset as the
+  // start of the vector parameter area.  Computing VecArgOffset is the
+  // entire point of the following loop.
+  unsigned VecArgOffset = ArgOffset;
+  if (!isVarArg && !isPPC64) {
+    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
+         ++ArgNo) {
+      EVT ObjectVT = Ins[ArgNo].VT;
+      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+      if (Flags.isByVal()) {
+        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+        ObjSize = Flags.getByValSize();
+        unsigned ArgSize =
+                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+        VecArgOffset += ArgSize;
+        continue;
+      }
+
+      switch(ObjectVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unhandled argument type!");
+      case MVT::i32:
+      case MVT::f32:
+        VecArgOffset += isPPC64 ? 8 : 4;
+        break;
+      case MVT::i64:  // PPC64
+      case MVT::f64:
+        VecArgOffset += 8;
+        break;
+      case MVT::v4f32:
+      case MVT::v4i32:
+      case MVT::v8i16:
+      case MVT::v16i8:
+        // Nothing to do, we're only looking at Nonvector args here.
+        break;
+      }
+    }
+  }
+  // We've found where the vector parameter area in memory is.  Skip the
+  // first 12 parameters; these don't use that memory.
+  VecArgOffset = ((VecArgOffset+15)/16)*16;
+  VecArgOffset += 12*16;
+
+  // Add DAG nodes to load the arguments or copy them out of registers.  On
+  // entry to a function on PPC, the arguments start after the linkage area,
+  // although the first ones are often in registers.
+
+  SmallVector<SDValue, 8> MemOps;
+  unsigned nAltivecParamsAtEnd = 0;
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    SDValue ArgVal;
+    bool needsLoad = false;
+    EVT ObjectVT = Ins[ArgNo].VT;
+    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+    unsigned ArgSize = ObjSize;
+    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+    unsigned CurArgOffset = ArgOffset;
+
+    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+      if (isVarArg || isPPC64) {
+        MinReservedArea = ((MinReservedArea+15)/16)*16;
+        MinReservedArea += CalculateStackSlotSize(ObjectVT,
+                                                  Flags,
+                                                  PtrByteSize);
+      } else  nAltivecParamsAtEnd++;
+    } else
+      // Calculate min reserved area.
+      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+                                                Flags,
+                                                PtrByteSize);
+
+    // FIXME the codegen can be much improved in some cases.
+    // We do not have to keep everything in memory.
+    if (Flags.isByVal()) {
+      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+      ObjSize = Flags.getByValSize();
+      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      // Objects of size 1 and 2 are right justified, everything else is
+      // left justified.  This means the memory address is adjusted forwards.
+      if (ObjSize==1 || ObjSize==2) {
+        CurArgOffset = CurArgOffset + (4 - ObjSize);
+      }
+      // The value of the object is its address.
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      InVals.push_back(FIN);
+      if (ObjSize==1 || ObjSize==2) {
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+                                            MachinePointerInfo(),
+                                            ObjSize==1 ? MVT::i8 : MVT::i16,
+                                            false, false, 0);
+          MemOps.push_back(Store);
+          ++GPR_idx;
+        }
+
+        ArgOffset += PtrByteSize;
+
+        continue;
+      }
+      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+        // Store whatever pieces of the object are in registers
+        // to memory.  ArgVal will be address of the beginning of
+        // the object.
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                       MachinePointerInfo(),
+                                       false, false, 0);
+          MemOps.push_back(Store);
+          ++GPR_idx;
+          ArgOffset += PtrByteSize;
+        } else {
+          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+          break;
+        }
+      }
+      continue;
+    }
+
+    switch (ObjectVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unhandled argument type!");
+    case MVT::i32:
+      if (!isPPC64) {
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+          ++GPR_idx;
+        } else {
+          needsLoad = true;
+          ArgSize = PtrByteSize;
+        }
+        // All int arguments reserve stack space in the Darwin ABI.
+        ArgOffset += PtrByteSize;
+        break;
+      }
+      // FALLTHROUGH
+    case MVT::i64:  // PPC64
+      if (GPR_idx != Num_GPR_Regs) {
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+        if (ObjectVT == MVT::i32) {
+          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+          // value to MVT::i64 and then truncate to the correct register size.
+          if (Flags.isSExt())
+            ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+                                 DAG.getValueType(ObjectVT));
+          else if (Flags.isZExt())
+            ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+                                 DAG.getValueType(ObjectVT));
+
+          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+        }
+
+        ++GPR_idx;
+      } else {
+        needsLoad = true;
+        ArgSize = PtrByteSize;
+      }
+      // All int arguments reserve stack space in the Darwin ABI.
+      ArgOffset += 8;
+      break;
+
+    case MVT::f32:
+    case MVT::f64:
+      // Every 4 bytes of argument space consumes one of the GPRs available for
+      // argument passing.
+      if (GPR_idx != Num_GPR_Regs) {
+        ++GPR_idx;
+        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+          ++GPR_idx;
+      }
+      if (FPR_idx != Num_FPR_Regs) {
+        unsigned VReg;
+
+        if (ObjectVT == MVT::f32)
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+        else
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        ++FPR_idx;
+      } else {
+        needsLoad = true;
+      }
+
+      // All FP arguments reserve stack space in the Darwin ABI.
+      ArgOffset += isPPC64 ? 8 : ObjSize;
+      break;
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      // Note that vector arguments in registers don't reserve stack space,
+      // except in varargs functions.
+      if (VR_idx != Num_VR_Regs) {
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        if (isVarArg) {
+          while ((ArgOffset % 16) != 0) {
+            ArgOffset += PtrByteSize;
+            if (GPR_idx != Num_GPR_Regs)
+              GPR_idx++;
+          }
+          ArgOffset += 16;
+          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+        }
+        ++VR_idx;
+      } else {
+        if (!isVarArg && !isPPC64) {
+          // Vectors go after all the nonvectors.
+          CurArgOffset = VecArgOffset;
+          VecArgOffset += 16;
+        } else {
+          // Vectors are aligned.
+          ArgOffset = ((ArgOffset+15)/16)*16;
+          CurArgOffset = ArgOffset;
+          ArgOffset += 16;
+        }
+        needsLoad = true;
+      }
+      break;
+    }
+
+    // We need to load the argument to a virtual register if we determined above
+    // that we ran out of physical registers of the appropriate type.
+    if (needsLoad) {
+      int FI = MFI->CreateFixedObject(ObjSize,
+                                      CurArgOffset + (ArgSize - ObjSize),
+                                      isImmutable);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
+    }
+
+    InVals.push_back(ArgVal);
+  }
+
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized function's reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  // Add the Altivec parameters at the end, if needed.
+  if (nAltivecParamsAtEnd) {
+    MinReservedArea = ((MinReservedArea+15)/16)*16;
+    MinReservedArea += 16*nAltivecParamsAtEnd;
+  }
+  MinReservedArea =
+    std::max(MinReservedArea,
+             PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+    getStackAlignment();
+  unsigned AlignMask = TargetAlign-1;
+  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+  FI->setMinReservedArea(MinReservedArea);
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    int Depth = ArgOffset;
+
+    FuncInfo->setVarArgsFrameIndex(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             Depth, true));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+    // If this function is vararg, store any remaining integer argument regs
+    // to their spots on the stack so that they may be loaded by deferencing the
+    // result of va_next.
+    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+      unsigned VReg;
+
+      if (isPPC64)
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+      else
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl,
+                        MVT::Other, &MemOps[0], MemOps.size());
+
+  return Chain;
+}
+
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area for the Darwin ABI.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+                                     bool isPPC64,
+                                     bool isVarArg,
+                                     unsigned CC,
+                                     const SmallVectorImpl<ISD::OutputArg>
+                                       &Outs,
+                                     const SmallVectorImpl<SDValue> &OutVals,
+                                     unsigned &nAltivecParamsAtEnd) {
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, and parameter passing area.  We start with 24/48 bytes, which is
+  // prereserved space for [SP][CR][LR][3 x unused].
+  unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  unsigned NumOps = Outs.size();
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+  // Add up all the space actually used.
+  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+  // they all go in registers, but we must reserve stack space for them for
+  // possible use by the caller.  In varargs or 64-bit calls, parameters are
+  // assigned stack space in order, with padding so Altivec parameters are
+  // 16-byte aligned.
+  nAltivecParamsAtEnd = 0;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    EVT ArgVT = Outs[i].VT;
+    // Varargs Altivec parameters are padded to a 16 byte boundary.
+    if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+        ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+      if (!isVarArg && !isPPC64) {
+        // Non-varargs Altivec parameters go after all the non-Altivec
+        // parameters; handle those later so we know how much padding we need.
+        nAltivecParamsAtEnd++;
+        continue;
+      }
+      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+      NumBytes = ((NumBytes+15)/16)*16;
+    }
+    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+  }
+
+   // Allow for Altivec parameters at the end, if needed.
+  if (nAltivecParamsAtEnd) {
+    NumBytes = ((NumBytes+15)/16)*16;
+    NumBytes += 16*nAltivecParamsAtEnd;
+  }
+
+  // The prolog code of the callee may store up to 8 GPR argument registers to
+  // the stack, allowing va_start to index over them in memory if its varargs.
+  // Because we cannot tell if this is needed on the caller side, we have to
+  // conservatively assume that it is needed.  As such, make sure we have at
+  // least enough stack space for the caller to store the 8 GPRs.
+  NumBytes = std::max(NumBytes,
+                      PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+
+  // Tail call needs the stack to be aligned.
+  if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
+    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+      getStackAlignment();
+    unsigned AlignMask = TargetAlign-1;
+    NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+  }
+
+  return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accommodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
+                                   unsigned ParamSize) {
+
+  if (!isTailCall) return 0;
+
+  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+  unsigned CallerMinReservedArea = FI->getMinReservedArea();
+  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+  // Remember only if the new adjustement is bigger.
+  if (SPDiff < FI->getTailCallSPDelta())
+    FI->setTailCallSPDelta(SPDiff);
+
+  return SPDiff;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                                     SelectionDAG& DAG) const {
+  if (!GuaranteedTailCallOpt)
+    return false;
+
+  // Variable argument functions are not supported.
+  if (isVarArg)
+    return false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+    // Functions containing by val parameters are not supported.
+    for (unsigned i = 0; i != Ins.size(); i++) {
+       ISD::ArgFlagsTy Flags = Ins[i].Flags;
+       if (Flags.isByVal()) return false;
+    }
+
+    // Non PIC/GOT  tail calls are supported.
+    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+      return true;
+
+    // At the moment we can only do local tail calls (in same module, hidden
+    // or protected) if we are generating PIC.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+      return G->getGlobal()->hasHiddenVisibility()
+          || G->getGlobal()->hasProtectedVisibility();
+  }
+
+  return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+  if (!C) return 0;
+
+  int Addr = C->getZExtValue();
+  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
+      (Addr << 6 >> 6) != Addr)
+    return 0;  // Top 6 bits have to be sext of immediate.
+
+  return DAG.getConstant((int)C->getZExtValue() >> 2,
+                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+  SDValue Arg;
+  SDValue FrameIdxOp;
+  int       FrameIdx;
+
+  TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+                                           SDValue Chain,
+                   const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+                   SmallVector<SDValue, 8> &MemOpChains,
+                   DebugLoc dl) {
+  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+    SDValue Arg = TailCallArgs[i].Arg;
+    SDValue FIN = TailCallArgs[i].FrameIdxOp;
+    int FI = TailCallArgs[i].FrameIdx;
+    // Store relative to framepointer.
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+                                       MachinePointerInfo::getFixedStack(FI),
+                                       false, false, 0));
+  }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+                                               MachineFunction &MF,
+                                               SDValue Chain,
+                                               SDValue OldRetAddr,
+                                               SDValue OldFP,
+                                               int SPDiff,
+                                               bool isPPC64,
+                                               bool isDarwinABI,
+                                               DebugLoc dl) {
+  if (SPDiff) {
+    // Calculate the new stack slot for the return address.
+    int SlotSize = isPPC64 ? 8 : 4;
+    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
+                                                                   isDarwinABI);
+    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+                                                          NewRetAddrLoc, true);
+    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+                         MachinePointerInfo::getFixedStack(NewRetAddr),
+                         false, false, 0);
+
+    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+    // slot as the FP is never overwritten.
+    if (isDarwinABI) {
+      int NewFPLoc =
+        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+                                                          true);
+      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+                           MachinePointerInfo::getFixedStack(NewFPIdx),
+                           false, false, 0);
+    }
+  }
+  return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+                         SDValue Arg, int SPDiff, unsigned ArgOffset,
+                      SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+  int Offset = ArgOffset + SPDiff;
+  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+  SDValue FIN = DAG.getFrameIndex(FI, VT);
+  TailCallArgumentInfo Info;
+  Info.Arg = Arg;
+  Info.FrameIdxOp = FIN;
+  Info.FrameIdx = FI;
+  TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+                                                        int SPDiff,
+                                                        SDValue Chain,
+                                                        SDValue &LROpOut,
+                                                        SDValue &FPOpOut,
+                                                        bool isDarwinABI,
+                                                        DebugLoc dl) const {
+  if (SPDiff) {
+    // Load the LR and FP stack slot for later adjusting.
+    EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+    LROpOut = getReturnAddrFrameIndex(DAG);
+    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
+                          false, false, 0);
+    Chain = SDValue(LROpOut.getNode(), 1);
+
+    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+    // slot as the FP is never overwritten.
+    if (isDarwinABI) {
+      FPOpOut = getFramePointerFrameIndex(DAG);
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
+                            false, false, 0);
+      Chain = SDValue(FPOpOut.getNode(), 1);
+    }
+  }
+  return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size".  Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       false, false, MachinePointerInfo(0),
+                       MachinePointerInfo(0));
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+                 SDValue Arg, SDValue PtrOff, int SPDiff,
+                 unsigned ArgOffset, bool isPPC64, bool isTailCall,
+                 bool isVector, SmallVector<SDValue, 8> &MemOpChains,
+                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+                 DebugLoc dl) {
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  if (!isTailCall) {
+    if (isVector) {
+      SDValue StackPtr;
+      if (isPPC64)
+        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+      else
+        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+                           DAG.getConstant(ArgOffset, PtrVT));
+    }
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0));
+  // Calculate and remember argument location.
+  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+                                  TailCallArguments);
+}
+
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
+                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+  // might overwrite each other in case of tail call optimization.
+  SmallVector<SDValue, 8> MemOpChains2;
+  // Do not flag preceding copytoreg stuff together with the following stuff.
+  InFlag = SDValue();
+  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+                                    MemOpChains2, dl);
+  if (!MemOpChains2.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains2[0], MemOpChains2.size());
+
+  // Store the return address to the appropriate stack slot.
+  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+                                        isPPC64, isDarwinABI, dl);
+
+  // Emit callseq_end just before tailcall node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                     SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+                     const PPCSubtarget &PPCSubTarget) {
+
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  NodeTys.push_back(MVT::Other);   // Returns a chain
+  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
+
+  unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+
+  bool needIndirectCall = true;
+  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+    // If this is an absolute destination address, use the munged value.
+    Callee = SDValue(Dest, 0);
+    needIndirectCall = false;
+  }
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+    // Use indirect calls for ALL functions calls in JIT mode, since the
+    // far-call stubs may be outside relocation limits for a BL instruction.
+    if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+      unsigned OpFlags = 0;
+      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+          (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+           PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+          (G->getGlobal()->isDeclaration() ||
+           G->getGlobal()->isWeakForLinker())) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = PPCII::MO_DARWIN_STUB;
+      }
+
+      // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+      // every direct call is) turn it into a TargetGlobalAddress /
+      // TargetExternalSymbol node so that legalize doesn't hack it.
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                          Callee.getValueType(),
+                                          0, OpFlags);
+      needIndirectCall = false;
+    }
+  }
+
+  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    unsigned char OpFlags = 0;
+
+    if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+        (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+         PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = PPCII::MO_DARWIN_STUB;
+    }
+
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+                                         OpFlags);
+    needIndirectCall = false;
+  }
+
+  if (needIndirectCall) {
+    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
+    // to do the call, we can't use PPCISD::CALL.
+    SDValue MTCTROps[] = {Chain, Callee, InFlag};
+
+    if (isSVR4ABI && isPPC64) {
+      // Function pointers in the 64-bit SVR4 ABI do not point to the function
+      // entry point, but to the function descriptor (the function entry point
+      // address is part of the function descriptor though).
+      // The function descriptor is a three doubleword structure with the
+      // following fields: function entry point, TOC base address and
+      // environment pointer.
+      // Thus for a call through a function pointer, the following actions need
+      // to be performed:
+      //   1. Save the TOC of the caller in the TOC save area of its stack
+      //      frame (this is done in LowerCall_Darwin()).
+      //   2. Load the address of the function entry point from the function
+      //      descriptor.
+      //   3. Load the TOC of the callee from the function descriptor into r2.
+      //   4. Load the environment pointer from the function descriptor into
+      //      r11.
+      //   5. Branch to the function entry point address.
+      //   6. On return of the callee, the TOC of the caller needs to be
+      //      restored (this is done in FinishCall()).
+      //
+      // All those operations are flagged together to ensure that no other
+      // operations can be scheduled in between. E.g. without flagging the
+      // operations together, a TOC access in the caller could be scheduled
+      // between the load of the callee TOC and the branch to the callee, which
+      // results in the TOC access going through the TOC of the callee instead
+      // of going through the TOC of the caller, which leads to incorrect code.
+
+      // Load the address of the function entry point from the function
+      // descriptor.
+      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
+      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
+                                        InFlag.getNode() ? 3 : 2);
+      Chain = LoadFuncPtr.getValue(1);
+      InFlag = LoadFuncPtr.getValue(2);
+
+      // Load environment pointer into r11.
+      // Offset of the environment pointer within the function descriptor.
+      SDValue PtrOff = DAG.getIntPtrConstant(16);
+
+      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+      SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
+                                       InFlag);
+      Chain = LoadEnvPtr.getValue(1);
+      InFlag = LoadEnvPtr.getValue(2);
+
+      SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+                                        InFlag);
+      Chain = EnvVal.getValue(0);
+      InFlag = EnvVal.getValue(1);
+
+      // Load TOC of the callee into r2. We are using a target-specific load
+      // with r2 hard coded, because the result of a target-independent load
+      // would never go directly into r2, since r2 is a reserved register (which
+      // prevents the register allocator from allocating it), resulting in an
+      // additional register being allocated and an unnecessary move instruction
+      // being generated.
+      VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+      SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
+                                       Callee, InFlag);
+      Chain = LoadTOCPtr.getValue(0);
+      InFlag = LoadTOCPtr.getValue(1);
+
+      MTCTROps[0] = Chain;
+      MTCTROps[1] = LoadFuncPtr;
+      MTCTROps[2] = InFlag;
+    }
+
+    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+                        2 + (InFlag.getNode() != 0));
+    InFlag = Chain.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Other);
+    NodeTys.push_back(MVT::Glue);
+    Ops.push_back(Chain);
+    CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+    Callee.setNode(0);
+    // Add CTR register as callee so a bctr can be emitted later.
+    if (isTailCall)
+      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
+  }
+
+  // If this is a direct call, pass the chain and the callee.
+  if (Callee.getNode()) {
+    Ops.push_back(Chain);
+    Ops.push_back(Callee);
+  }
+  // If this is a tail call add stack pointer delta.
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  return CallOpc;
+}
+
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) const {
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		    getTargetMachine(), RVLocs, *DAG.getContext());
+  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign &VA = RVLocs[i];
+    EVT VT = VA.getValVT();
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyFromReg(Chain, dl,
+                               VA.getLocReg(), VT, InFlag).getValue(1);
+    InVals.push_back(Chain.getValue(0));
+    InFlag = Chain.getValue(2);
+  }
+
+  return Chain;
+}
+
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+                              bool isTailCall, bool isVarArg,
+                              SelectionDAG &DAG,
+                              SmallVector<std::pair<unsigned, SDValue>, 8>
+                                &RegsToPass,
+                              SDValue InFlag, SDValue Chain,
+                              SDValue &Callee,
+                              int SPDiff, unsigned NumBytes,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              SmallVectorImpl<SDValue> &InVals) const {
+  std::vector<EVT> NodeTys;
+  SmallVector<SDValue, 8> Ops;
+  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+                                 isTailCall, RegsToPass, Ops, NodeTys,
+                                 PPCSubTarget);
+
+  // When performing tail call optimization the callee pops its arguments off
+  // the stack. Account for this here so these bytes can be pushed back on in
+  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+  int BytesCalleePops =
+    (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  // Emit tail call.
+  if (isTailCall) {
+    // If this is the first return lowered for this function, add the regs
+    // to the liveout set for the function.
+    if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+      SmallVector<CCValAssign, 16> RVLocs;
+      CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		     getTargetMachine(), RVLocs, *DAG.getContext());
+      CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+      for (unsigned i = 0; i != RVLocs.size(); ++i)
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+    }
+
+    assert(((Callee.getOpcode() == ISD::Register &&
+             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+            Callee.getOpcode() == ISD::TargetExternalSymbol ||
+            Callee.getOpcode() == ISD::TargetGlobalAddress ||
+            isa<ConstantSDNode>(Callee)) &&
+    "Expecting an global address, external symbol, absolute value or register");
+
+    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+  }
+
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Add a NOP immediately after the branch instruction when using the 64-bit
+  // SVR4 ABI. At link time, if caller and callee are in a different module and
+  // thus have a different TOC, the call will be replaced with a call to a stub
+  // function which saves the current TOC, loads the TOC of the callee and
+  // branches to the callee. The NOP will be replaced with a load instruction
+  // which restores the TOC of the caller from the TOC save slot of the current
+  // stack frame. If caller and callee belong to the same module (and have the
+  // same TOC), the NOP will remain unchanged.
+  if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+    if (CallOpc == PPCISD::BCTRL_SVR4) {
+      // This is a call through a function pointer.
+      // Restore the caller TOC from the save area into R2.
+      // See PrepareCall() for more information about calls through function
+      // pointers in the 64-bit SVR4 ABI.
+      // We are using a target-specific load with r2 hard coded, because the
+      // result of a target-independent load would never go directly into r2,
+      // since r2 is a reserved register (which prevents the register allocator
+      // from allocating it), resulting in an additional register being
+      // allocated and an unnecessary move instruction being generated.
+      Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+      InFlag = Chain.getValue(1);
+    } else {
+      // Otherwise insert NOP.
+      InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+    }
+  }
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(BytesCalleePops, true),
+                             InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  if (isTailCall)
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+                                                   Ins, DAG);
+
+  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+    return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
+                          isTailCall, Outs, OutVals, Ins,
+                          dl, DAG, InVals);
+
+  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+                          isTailCall, Outs, OutVals, Ins,
+                          dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  bool isTailCall,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<SDValue> &OutVals,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+  // of the 32-bit SVR4 ABI stack frame layout.
+
+  assert((CallConv == CallingConv::C ||
+          CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+  unsigned PtrByteSize = 4;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Mark this function as potentially containing a function that contains a
+  // tail call. As a consequence the frame pointer will be used for dynamicalloc
+  // and restoring the callers stack pointer in this functions epilog. This is
+  // done because by tail calling the called function might overwrite the value
+  // in this function's (MF) stack pointer stack slot 0(SP).
+  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, parameter list area and the part of the local variable space which
+  // contains copies of aggregates which are passed by value.
+
+  // Assign locations to all of the outgoing arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+  if (isVarArg) {
+    // Handle fixed and variable vector arguments differently.
+    // Fixed vector arguments go into registers as long as registers are
+    // available. Variable vector arguments always go into memory.
+    unsigned NumArgs = Outs.size();
+
+    for (unsigned i = 0; i != NumArgs; ++i) {
+      MVT ArgVT = Outs[i].VT;
+      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+      bool Result;
+
+      if (Outs[i].IsFixed) {
+        Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+                             CCInfo);
+      } else {
+        Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+                                    ArgFlags, CCInfo);
+      }
+
+      if (Result) {
+#ifndef NDEBUG
+        errs() << "Call operand #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString() << "\n";
+#endif
+        llvm_unreachable(0);
+      }
+    }
+  } else {
+    // All arguments are treated the same.
+    CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+  }
+
+  // Assign locations to all of the outgoing aggregate by value arguments.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		      getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+
+  // Size of the linkage area, parameter list area and the part of the local
+  // space variable where copies of aggregates which are passed by value are
+  // stored.
+  unsigned NumBytes = CCByValInfo.getNextStackOffset();
+
+  // Calculate by how many bytes the stack has to be adjusted in case of tail
+  // call optimization.
+  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  SDValue CallSeqStart = Chain;
+
+  // Load the return address and frame pointer so it can be moved somewhere else
+  // later.
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+                                       dl);
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, j = 0, e = ArgLocs.size();
+       i != e;
+       ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    if (Flags.isByVal()) {
+      // Argument is an aggregate which is passed by value, thus we need to
+      // create a copy of it in the local variable space of the current stack
+      // frame (which is the stack frame of the caller) and pass the address of
+      // this copy to the callee.
+      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+      CCValAssign &ByValVA = ByValArgLocs[j++];
+      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+      // Memory reserved in the local variable space of the callers stack frame.
+      unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+      // Create a copy of the argument in the local area of the current
+      // stack frame.
+      SDValue MemcpyCall =
+        CreateCopyOfByValArgument(Arg, PtrOff,
+                                  CallSeqStart.getNode()->getOperand(0),
+                                  Flags, DAG, dl);
+
+      // This must go outside the CALLSEQ_START..END.
+      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                           CallSeqStart.getNode()->getOperand(1));
+      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                             NewCallSeqStart.getNode());
+      Chain = CallSeqStart = NewCallSeqStart;
+
+      // Pass the address of the aggregate copy on the stack either in a
+      // physical register or in the parameter list area of the current stack
+      // frame to the callee.
+      Arg = PtrOff;
+    }
+
+    if (VA.isRegLoc()) {
+      // Put argument in a physical register.
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Put argument in the parameter list area of the current stack frame.
+      assert(VA.isMemLoc());
+      unsigned LocMemOffset = VA.getLocMemOffset();
+
+      if (!isTailCall) {
+        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+      } else {
+        // Calculate and remember argument location.
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+                                 TailCallArguments);
+      }
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Set CR6 to true if this is a vararg call.
+  if (isVarArg) {
+    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+    RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  if (isTailCall)
+    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+                    false, TailCallArguments);
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+                    Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+
+  unsigned NumOps  = Outs.size();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = PtrVT == MVT::i64;
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Mark this function as potentially containing a function that contains a
+  // tail call. As a consequence the frame pointer will be used for dynamicalloc
+  // and restoring the callers stack pointer in this functions epilog. This is
+  // done because by tail calling the called function might overwrite the value
+  // in this function's (MF) stack pointer stack slot 0(SP).
+  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+  unsigned nAltivecParamsAtEnd = 0;
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, and parameter passing area.  We start with 24/48 bytes, which is
+  // prereserved space for [SP][CR][LR][3 x unused].
+  unsigned NumBytes =
+    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
+                                         Outs, OutVals,
+                                         nAltivecParamsAtEnd);
+
+  // Calculate by how many bytes the stack has to be adjusted in case of tail
+  // call optimization.
+  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+  // To protect arguments on the stack from being clobbered in a tail call,
+  // force all the loads to happen before doing any other lowering.
+  if (isTailCall)
+    Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  SDValue CallSeqStart = Chain;
+
+  // Load the return address and frame pointer so it can be move somewhere else
+  // later.
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+                                       dl);
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr;
+  if (isPPC64)
+    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+  else
+    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+  // Figure out which arguments are going to go in registers, and which in
+  // memory.  Also, if this is a vararg function, floating point operations
+  // must be stored to our stack, and loaded into integer regs as well, if
+  // any integer regs are available for argument passing.
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+  static const unsigned GPR_32[] = {           // 32-bit registers.
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  static const unsigned GPR_64[] = {           // 64-bit registers.
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+  static const unsigned *FPR = GetFPR();
+
+  static const unsigned VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+  const unsigned NumGPRs = array_lengthof(GPR_32);
+  const unsigned NumFPRs = 13;
+  const unsigned NumVRs  = array_lengthof(VR);
+
+  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+  SmallVector<SDValue, 8> MemOpChains;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // PtrOff will be used to store the current argument to the stack if a
+    // register cannot be found for it.
+    SDValue PtrOff;
+
+    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+    // On PPC64, promote integers to 64-bit values.
+    if (isPPC64 && Arg.getValueType() == MVT::i32) {
+      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+    }
+
+    // FIXME memcpy is used way more than necessary.  Correctness first.
+    if (Flags.isByVal()) {
+      unsigned Size = Flags.getByValSize();
+      if (Size==1 || Size==2) {
+        // Very small objects are passed right-justified.
+        // Everything else is passed left-justified.
+        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+        if (GPR_idx != NumGPRs) {
+          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+                                        MachinePointerInfo(), VT,
+                                        false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+          ArgOffset += PtrByteSize;
+        } else {
+          SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+          SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+                                CallSeqStart.getNode()->getOperand(0),
+                                Flags, DAG, dl);
+          // This must go outside the CALLSEQ_START..END.
+          SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                               CallSeqStart.getNode()->getOperand(1));
+          DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                                 NewCallSeqStart.getNode());
+          Chain = CallSeqStart = NewCallSeqStart;
+          ArgOffset += PtrByteSize;
+        }
+        continue;
+      }
+      // Copy entire object into memory.  There are cases where gcc-generated
+      // code assumes it is there, even if it could be put entirely into
+      // registers.  (This is not what the doc says.)
+      SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+                            CallSeqStart.getNode()->getOperand(0),
+                            Flags, DAG, dl);
+      // This must go outside the CALLSEQ_START..END.
+      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                           CallSeqStart.getNode()->getOperand(1));
+      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
+      Chain = CallSeqStart = NewCallSeqStart;
+      // And copy the pieces of it that fit into registers.
+      for (unsigned j=0; j<Size; j+=PtrByteSize) {
+        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+        if (GPR_idx != NumGPRs) {
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+                                     MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          ArgOffset += PtrByteSize;
+        } else {
+          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+          break;
+        }
+      }
+      continue;
+    }
+
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i32:
+    case MVT::i64:
+      if (GPR_idx != NumGPRs) {
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+      } else {
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         isPPC64, isTailCall, false, MemOpChains,
+                         TailCallArguments, dl);
+      }
+      ArgOffset += PtrByteSize;
+      break;
+    case MVT::f32:
+    case MVT::f64:
+      if (FPR_idx != NumFPRs) {
+        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+        if (isVarArg) {
+          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
+          MemOpChains.push_back(Store);
+
+          // Float varargs are always shadowed in available integer registers
+          if (GPR_idx != NumGPRs) {
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
+            MemOpChains.push_back(Load.getValue(1));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          }
+          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0);
+            MemOpChains.push_back(Load.getValue(1));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          }
+        } else {
+          // If we have any FPRs remaining, we may also have GPRs remaining.
+          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+          // GPRs.
+          if (GPR_idx != NumGPRs)
+            ++GPR_idx;
+          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
+            ++GPR_idx;
+        }
+      } else {
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         isPPC64, isTailCall, false, MemOpChains,
+                         TailCallArguments, dl);
+      }
+      if (isPPC64)
+        ArgOffset += 8;
+      else
+        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+      break;
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      if (isVarArg) {
+        // These go aligned on the stack, or in the corresponding R registers
+        // when within range.  The Darwin PPC ABI doc claims they also go in
+        // V registers; in fact gcc does this only for arguments that are
+        // prototyped, not for those that match the ...  We do it for all
+        // arguments, seems to work.
+        while (ArgOffset % 16 !=0) {
+          ArgOffset += PtrByteSize;
+          if (GPR_idx != NumGPRs)
+            GPR_idx++;
+        }
+        // We could elide this store in the case where the object fits
+        // entirely in R registers.  Maybe later.
+        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+                            DAG.getConstant(ArgOffset, PtrVT));
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                     MachinePointerInfo(), false, false, 0);
+        MemOpChains.push_back(Store);
+        if (VR_idx != NumVRs) {
+          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+                                     MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+        }
+        ArgOffset += 16;
+        for (unsigned i=0; i<16; i+=PtrByteSize) {
+          if (GPR_idx == NumGPRs)
+            break;
+          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+                                  DAG.getConstant(i, PtrVT));
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+        }
+        break;
+      }
+
+      // Non-varargs Altivec params generally go in registers, but have
+      // stack space allocated at the end.
+      if (VR_idx != NumVRs) {
+        // Doesn't have GPR space allocated.
+        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+      } else if (nAltivecParamsAtEnd==0) {
+        // We are emitting Altivec params in order.
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         isPPC64, isTailCall, true, MemOpChains,
+                         TailCallArguments, dl);
+        ArgOffset += 16;
+      }
+      break;
+    }
+  }
+  // If all Altivec parameters fit in registers, as they usually do,
+  // they get stack space following the non-Altivec parameters.  We
+  // don't track this here because nobody below needs it.
+  // If there are more Altivec parameters than fit in registers emit
+  // the stores here.
+  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+    unsigned j = 0;
+    // Offset is aligned; skip 1st 12 params which go in V registers.
+    ArgOffset = ((ArgOffset+15)/16)*16;
+    ArgOffset += 12*16;
+    for (unsigned i = 0; i != NumOps; ++i) {
+      SDValue Arg = OutVals[i];
+      EVT ArgType = Outs[i].VT;
+      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+        if (++j > NumVRs) {
+          SDValue PtrOff;
+          // We are emitting Altivec params in order.
+          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                           isPPC64, isTailCall, true, MemOpChains,
+                           TailCallArguments, dl);
+          ArgOffset += 16;
+        }
+      }
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Check if this is an indirect call (MTCTR/BCTRL).
+  // See PrepareCall() for more information about calls through function
+  // pointers in the 64-bit SVR4 ABI.
+  if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
+      !dyn_cast<GlobalAddressSDNode>(Callee) &&
+      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+      !isBLACompatibleAddress(Callee, DAG)) {
+    // Load r2 into a virtual register and store it to the TOC save area.
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+    // TOC save area offset.
+    SDValue PtrOff = DAG.getIntPtrConstant(40);
+    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+                         false, false, 0);
+  }
+
+  // On Darwin, R12 must contain the address of an indirect callee.  This does
+  // not mean the MTCTR instruction must use R12; it's easier to model this as
+  // an extra parameter, so do that.
+  if (!isTailCall &&
+      !dyn_cast<GlobalAddressSDNode>(Callee) &&
+      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+      !isBLACompatibleAddress(Callee, DAG))
+    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+                                                   PPC::R12), Callee));
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  if (isTailCall)
+    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+                    FPOp, true, TailCallArguments);
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+                    Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  else
+    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+                                   const PPCSubtarget &Subtarget) const {
+  // When we pop the dynamic allocation we need to restore the SP link.
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get the corect type for pointers.
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Construct the stack pointer operand.
+  bool isPPC64 = Subtarget.isPPC64();
+  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
+  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+  // Get the operands for the STACKRESTORE.
+  SDValue Chain = Op.getOperand(0);
+  SDValue SaveSP = Op.getOperand(1);
+
+  // Load the old link SP.
+  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+                                   MachinePointerInfo(),
+                                   false, false, 0);
+
+  // Restore the stack pointer.
+  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+  // Store the old link SP.
+  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
+                      false, false, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Get current frame pointer save index.  The users of this index will be
+  // primarily DYNALLOC instructions.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  int RASI = FI->getReturnAddrSaveIndex();
+
+  // If the frame pointer save index hasn't been defined yet.
+  if (!RASI) {
+    // Find out what the fix offset of the frame pointer save area.
+    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+    // Allocate the frame index for frame pointer save area.
+    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
+    // Save the result.
+    FI->setReturnAddrSaveIndex(RASI);
+  }
+  return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Get current frame pointer save index.  The users of this index will be
+  // primarily DYNALLOC instructions.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  int FPSI = FI->getFramePointerSaveIndex();
+
+  // If the frame pointer save index hasn't been defined yet.
+  if (!FPSI) {
+    // Find out what the fix offset of the frame pointer save area.
+    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
+                                                           isDarwinABI);
+
+    // Allocate the frame index for frame pointer save area.
+    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+    // Save the result.
+    FI->setFramePointerSaveIndex(FPSI);
+  }
+  return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                         SelectionDAG &DAG,
+                                         const PPCSubtarget &Subtarget) const {
+  // Get the inputs.
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get the corect type for pointers.
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Negate the size.
+  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+                                  DAG.getConstant(0, PtrVT), Size);
+  // Construct a node for the frame pointer save index.
+  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+  // Build a DYNALLOC node.
+  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  // Not FP? Not a fsel.
+  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+      !Op.getOperand(2).getValueType().isFloatingPoint())
+    return Op;
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+  // Cannot handle SETEQ/SETNE.
+  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+
+  EVT ResVT = Op.getValueType();
+  EVT CmpVT = Op.getOperand(0).getValueType();
+  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // If the RHS of the comparison is a 0.0, we don't need to do the
+  // subtraction at all.
+  if (isFloatingPointZero(RHS))
+    switch (CC) {
+    default: break;       // SETUO etc aren't handled by fsel.
+    case ISD::SETULT:
+    case ISD::SETLT:
+      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
+    case ISD::SETOGE:
+    case ISD::SETGE:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+    case ISD::SETUGT:
+    case ISD::SETGT:
+      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
+    case ISD::SETOLE:
+    case ISD::SETLE:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+    }
+
+  SDValue Cmp;
+  switch (CC) {
+  default: break;       // SETUO etc aren't handled by fsel.
+  case ISD::SETULT:
+  case ISD::SETLT:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+  case ISD::SETOGE:
+  case ISD::SETGE:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+  case ISD::SETUGT:
+  case ISD::SETGT:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+  case ISD::SETOLE:
+  case ISD::SETLE:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+  }
+  return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                                           DebugLoc dl) const {
+  assert(Op.getOperand(0).getValueType().isFloatingPoint());
+  SDValue Src = Op.getOperand(0);
+  if (Src.getValueType() == MVT::f32)
+    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+  SDValue Tmp;
+  switch (Op.getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+  case MVT::i32:
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+                                                         PPCISD::FCTIDZ,
+                      dl, MVT::f64, Src);
+    break;
+  case MVT::i64:
+    Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+    break;
+  }
+
+  // Convert the FP value to an int value through memory.
+  SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+
+  // Emit a store to the stack slot.
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
+  // add in a bias.
+  if (Op.getValueType() == MVT::i32)
+    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+                        DAG.getConstant(4, FIPtr.getValueType()));
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+    return SDValue();
+
+  if (Op.getOperand(0).getValueType() == MVT::i64) {
+    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
+    if (Op.getValueType() == MVT::f32)
+      FP = DAG.getNode(ISD::FP_ROUND, dl,
+                       MVT::f32, FP, DAG.getIntPtrConstant(0));
+    return FP;
+  }
+
+  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+         "Unhandled SINT_TO_FP type in custom expander!");
+  // Since we only generate this in 64-bit mode, we can take advantage of
+  // 64-bit registers.  In particular, sign extend the input value into the
+  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+  // then lfd it and fcfid it.
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+                                Op.getOperand(0));
+
+  // STD the extended value into the stack slot.
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                            MachineMemOperand::MOStore, 8, 8);
+  SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
+  SDValue Store =
+    DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
+                            Ops, 4, MVT::i64, MMO);
+  // Load the value as a double.
+  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
+                           false, false, 0);
+
+  // FCFID it and return it.
+  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
+  if (Op.getValueType() == MVT::f32)
+    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+  return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  /*
+   The rounding mode is in bits 30:31 of FPSR, and has the following
+   settings:
+     00 Round to nearest
+     01 Round to 0
+     10 Round to +inf
+     11 Round to -inf
+
+  FLT_ROUNDS, on the other hand, expects the following:
+    -1 Undefined
+     0 Round to 0
+     1 Round to nearest
+     2 Round to +inf
+     3 Round to -inf
+
+  To perform the conversion, we do:
+    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+  */
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  EVT VT = Op.getValueType();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  std::vector<EVT> NodeTys;
+  SDValue MFFSreg, InFlag;
+
+  // Save FP Control Word to register
+  NodeTys.push_back(MVT::f64);    // return register
+  NodeTys.push_back(MVT::Glue);   // unused in this context
+  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+
+  // Save FP register to stack slot
+  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+                               StackSlot, MachinePointerInfo(), false, false,0);
+
+  // Load FP Control Word from low 32 bits of stack slot.
+  SDValue Four = DAG.getConstant(4, PtrVT);
+  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
+                            false, false, 0);
+
+  // Transform as necessary
+  SDValue CWD1 =
+    DAG.getNode(ISD::AND, dl, MVT::i32,
+                CWD, DAG.getConstant(3, MVT::i32));
+  SDValue CWD2 =
+    DAG.getNode(ISD::SRL, dl, MVT::i32,
+                DAG.getNode(ISD::AND, dl, MVT::i32,
+                            DAG.getNode(ISD::XOR, dl, MVT::i32,
+                                        CWD, DAG.getConstant(3, MVT::i32)),
+                            DAG.getConstant(3, MVT::i32)),
+                DAG.getConstant(1, MVT::i32));
+
+  SDValue RetVal =
+    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+  return DAG.getNode((VT.getSizeInBits() < 16 ?
+                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  unsigned BitWidth = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  assert(Op.getNumOperands() == 3 &&
+         VT == Op.getOperand(1).getValueType() &&
+         "Unexpected SHL!");
+
+  // Expand into a bunch of logical ops.  Note that these ops
+  // depend on the PPC behavior for oversized shift amounts.
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Amt = Op.getOperand(2);
+  EVT AmtVT = Amt.getValueType();
+
+  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+                             DAG.getConstant(BitWidth, AmtVT), Amt);
+  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+                             DAG.getConstant(-BitWidth, AmtVT));
+  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+  SDValue OutOps[] = { OutLo, OutHi };
+  return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned BitWidth = VT.getSizeInBits();
+  assert(Op.getNumOperands() == 3 &&
+         VT == Op.getOperand(1).getValueType() &&
+         "Unexpected SRL!");
+
+  // Expand into a bunch of logical ops.  Note that these ops
+  // depend on the PPC behavior for oversized shift amounts.
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Amt = Op.getOperand(2);
+  EVT AmtVT = Amt.getValueType();
+
+  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+                             DAG.getConstant(BitWidth, AmtVT), Amt);
+  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+                             DAG.getConstant(-BitWidth, AmtVT));
+  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+  SDValue OutOps[] = { OutLo, OutHi };
+  return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  unsigned BitWidth = VT.getSizeInBits();
+  assert(Op.getNumOperands() == 3 &&
+         VT == Op.getOperand(1).getValueType() &&
+         "Unexpected SRA!");
+
+  // Expand into a bunch of logical ops, followed by a select_cc.
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Amt = Op.getOperand(2);
+  EVT AmtVT = Amt.getValueType();
+
+  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+                             DAG.getConstant(BitWidth, AmtVT), Amt);
+  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+                             DAG.getConstant(-BitWidth, AmtVT));
+  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+                                  Tmp4, Tmp6, ISD::SETLE);
+  SDValue OutOps[] = { OutLo, OutHi };
+  return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize.  Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
+                             SelectionDAG &DAG, DebugLoc dl) {
+  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+  static const EVT VTys[] = { // canonical VT to use for each size.
+    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+  };
+
+  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+  if (Val == -1)
+    SplatSize = 1;
+
+  EVT CanonicalVT = VTys[SplatSize-1];
+
+  // Build a canonical splat for this value.
+  SDValue Elt = DAG.getConstant(Val, MVT::i32);
+  SmallVector<SDValue, 8> Ops;
+  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+                              &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+                                SelectionDAG &DAG, DebugLoc dl,
+                                EVT DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+                                SDValue Op2, SelectionDAG &DAG,
+                                DebugLoc dl, EVT DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount.  The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+  // Force LHS/RHS to be the right type.
+  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
+
+  int Ops[16];
+  for (unsigned i = 0; i != 16; ++i)
+    Ops[i] = i + Amt;
+  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+  return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.  If we CAN select this case, and if it
+// selects to a single instruction, return Op.  Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+  // Check if this is a splat of a constant value.
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
+    return SDValue();
+
+  unsigned SplatBits = APSplatBits.getZExtValue();
+  unsigned SplatUndef = APSplatUndef.getZExtValue();
+  unsigned SplatSize = SplatBitSize / 8;
+
+  // First, handle single instruction cases.
+
+  // All zeros?
+  if (SplatBits == 0) {
+    // Canonicalize all zero vectors to be v4i32.
+    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+      SDValue Z = DAG.getConstant(0, MVT::i32);
+      Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
+    }
+    return Op;
+  }
+
+  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+                    (32-SplatBitSize));
+  if (SextVal >= -16 && SextVal <= 15)
+    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+  // Two instruction sequences.
+
+  // If this value is in the range [-32,30] and is even, use:
+  //    tmp = VSPLTI[bhw], result = add tmp, tmp
+  if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+    SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
+    Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+  }
+
+  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
+  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
+  // for fneg/fabs.
+  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+    // Make -1 and vspltisw -1:
+    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+    // Make the VSLW intrinsic, computing 0x8000_0000.
+    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+                                   OnesV, DAG, dl);
+
+    // xor by OnesV to invert it.
+    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+  }
+
+  // Check to see if this is a wide variety of vsplti*, binop self cases.
+  static const signed char SplatCsts[] = {
+    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+  };
+
+  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
+    int i = SplatCsts[idx];
+
+    // Figure out what shift amount will be used by altivec if shifted by i in
+    // this splat size.
+    unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+    // vsplti + shl self.
+    if (SextVal == (i << (int)TypeShiftAmt)) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+        Intrinsic::ppc_altivec_vslw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // vsplti + srl self.
+    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+        Intrinsic::ppc_altivec_vsrw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // vsplti + sra self.
+    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+        Intrinsic::ppc_altivec_vsraw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // vsplti + rol self.
+    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+        Intrinsic::ppc_altivec_vrlw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // t = vsplti c, result = vsldoi t, t, 1
+    if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+      return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+    }
+    // t = vsplti c, result = vsldoi t, t, 2
+    if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+      return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+    }
+    // t = vsplti c, result = vsldoi t, t, 3
+    if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+      return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+    }
+  }
+
+  // Three instruction sequences.
+
+  // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
+  if (SextVal >= 0 && SextVal <= 31) {
+    SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
+    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+    LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+  }
+  // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
+  if (SextVal >= -31 && SextVal <= 0) {
+    SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
+    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+    LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+  }
+
+  return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+                                      SDValue RHS, SelectionDAG &DAG,
+                                      DebugLoc dl) {
+  unsigned OpNum = (PFEntry >> 26) & 0x0F;
+  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
+
+  enum {
+    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+    OP_VMRGHW,
+    OP_VMRGLW,
+    OP_VSPLTISW0,
+    OP_VSPLTISW1,
+    OP_VSPLTISW2,
+    OP_VSPLTISW3,
+    OP_VSLDOI4,
+    OP_VSLDOI8,
+    OP_VSLDOI12
+  };
+
+  if (OpNum == OP_COPY) {
+    if (LHSID == (1*9+2)*9+3) return LHS;
+    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+    return RHS;
+  }
+
+  SDValue OpLHS, OpRHS;
+  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+  int ShufIdxs[16];
+  switch (OpNum) {
+  default: llvm_unreachable("Unknown i32 permute!");
+  case OP_VMRGHW:
+    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
+    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
+    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+    break;
+  case OP_VMRGLW:
+    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+    break;
+  case OP_VSPLTISW0:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+0;
+    break;
+  case OP_VSPLTISW1:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+4;
+    break;
+  case OP_VSPLTISW2:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+8;
+    break;
+  case OP_VSPLTISW3:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+12;
+    break;
+  case OP_VSLDOI4:
+    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+  case OP_VSLDOI8:
+    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+  case OP_VSLDOI12:
+    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+  }
+  EVT VT = OpLHS.getValueType();
+  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
+  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+  return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
+/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
+/// return the code it can be lowered into.  Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  EVT VT = Op.getValueType();
+
+  // Cases that are handled by instructions that take permute immediates
+  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+  // selected by the instruction selector.
+  if (V2.getOpcode() == ISD::UNDEF) {
+    if (PPC::isSplatShuffleMask(SVOp, 1) ||
+        PPC::isSplatShuffleMask(SVOp, 2) ||
+        PPC::isSplatShuffleMask(SVOp, 4) ||
+        PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+        PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+        PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+        PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+        PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+        PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+        PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+        PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+        PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+      return Op;
+    }
+  }
+
+  // Altivec has a variety of "shuffle immediates" that take two vector inputs
+  // and produce a fixed permutation.  If any of these match, do not lower to
+  // VPERM.
+  if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+      PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+      PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+      PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+      PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+      PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+      PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+      PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+      PPC::isVMRGHShuffleMask(SVOp, 4, false))
+    return Op;
+
+  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
+  // perfect shuffle table to emit an optimal matching sequence.
+  SmallVector<int, 16> PermMask;
+  SVOp->getMask(PermMask);
+
+  unsigned PFIndexes[4];
+  bool isFourElementShuffle = true;
+  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+    unsigned EltNo = 8;   // Start out undef.
+    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
+      if (PermMask[i*4+j] < 0)
+        continue;   // Undef, ignore it.
+
+      unsigned ByteSource = PermMask[i*4+j];
+      if ((ByteSource & 3) != j) {
+        isFourElementShuffle = false;
+        break;
+      }
+
+      if (EltNo == 8) {
+        EltNo = ByteSource/4;
+      } else if (EltNo != ByteSource/4) {
+        isFourElementShuffle = false;
+        break;
+      }
+    }
+    PFIndexes[i] = EltNo;
+  }
+
+  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+  // perfect shuffle vector to determine if it is cost effective to do this as
+  // discrete instructions, or whether we should use a vperm.
+  if (isFourElementShuffle) {
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost  = (PFEntry >> 30);
+
+    // Determining when to avoid vperm is tricky.  Many things affect the cost
+    // of vperm, particularly how many times the perm mask needs to be computed.
+    // For example, if the perm mask can be hoisted out of a loop or is already
+    // used (perhaps because there are multiple permutes with the same shuffle
+    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
+    // the loop requires an extra register.
+    //
+    // As a compromise, we only emit discrete instructions if the shuffle can be
+    // generated in 3 or fewer operations.  When we have loop information
+    // available, if this block is within a loop, we should avoid using vperm
+    // for 3-operation perms and use a constant pool load instead.
+    if (Cost < 3)
+      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+  }
+
+  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+  // vector that will get spilled to the constant pool.
+  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+  // that it is in input element units, not in bytes.  Convert now.
+  EVT EltVT = V1.getValueType().getVectorElementType();
+  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+  SmallVector<SDValue, 16> ResultMask;
+  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+    for (unsigned j = 0; j != BytesPerElement; ++j)
+      ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+                                           MVT::i32));
+  }
+
+  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+                                    &ResultMask[0], ResultMask.size());
+  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison.  If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+                                  bool &isDot) {
+  unsigned IntrinsicID =
+    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+  CompareOpc = -1;
+  isDot = false;
+  switch (IntrinsicID) {
+  default: return false;
+    // Comparison predicates.
+  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+    // Normal Comparisons.
+  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
+  }
+  return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  // If this is a lowered altivec predicate compare, CompareOpc is set to the
+  // opcode number of the comparison.
+  DebugLoc dl = Op.getDebugLoc();
+  int CompareOpc;
+  bool isDot;
+  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+    return SDValue();    // Don't custom lower most intrinsics.
+
+  // If this is a non-dot comparison, make the VCMP node and we are done.
+  if (!isDot) {
+    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+                              Op.getOperand(1), Op.getOperand(2),
+                              DAG.getConstant(CompareOpc, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
+  }
+
+  // Create the PPCISD altivec 'dot' comparison node.
+  SDValue Ops[] = {
+    Op.getOperand(2),  // LHS
+    Op.getOperand(3),  // RHS
+    DAG.getConstant(CompareOpc, MVT::i32)
+  };
+  std::vector<EVT> VTs;
+  VTs.push_back(Op.getOperand(2).getValueType());
+  VTs.push_back(MVT::Glue);
+  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+  // Now that we have the comparison, emit a copy from the CR to a GPR.
+  // This is flagged to the above dot comparison.
+  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+                                DAG.getRegister(PPC::CR6, MVT::i32),
+                                CompNode.getValue(1));
+
+  // Unpack the result based on how the target uses it.
+  unsigned BitNo;   // Bit # of CR6.
+  bool InvertBit;   // Invert result?
+  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+  default:  // Can't happen, don't crash on invalid number though.
+  case 0:   // Return the value of the EQ bit of CR6.
+    BitNo = 0; InvertBit = false;
+    break;
+  case 1:   // Return the inverted value of the EQ bit of CR6.
+    BitNo = 0; InvertBit = true;
+    break;
+  case 2:   // Return the value of the LT bit of CR6.
+    BitNo = 2; InvertBit = false;
+    break;
+  case 3:   // Return the inverted value of the LT bit of CR6.
+    BitNo = 2; InvertBit = true;
+    break;
+  }
+
+  // Shift the bit into the low position.
+  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+                      DAG.getConstant(8-(3-BitNo), MVT::i32));
+  // Isolate the bit.
+  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+                      DAG.getConstant(1, MVT::i32));
+
+  // If we are supposed to, toggle the bit.
+  if (InvertBit)
+    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+                        DAG.getConstant(1, MVT::i32));
+  return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // Create a stack slot that is 16-byte aligned.
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  EVT PtrVT = getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  // Store the input value into Value#0 of the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+                               Op.getOperand(0), FIdx, MachinePointerInfo(),
+                               false, false, 0);
+  // Load it out.
+  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  if (Op.getValueType() == MVT::v4i32) {
+    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
+    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+    SDValue RHSSwap =   // = vrlw RHS, 16
+      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+    // Shrinkify inputs to v8i16.
+    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
+
+    // Low parts multiplied together, generating 32-bit results (we ignore the
+    // top parts).
+    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+                                        LHS, RHS, DAG, dl, MVT::v4i32);
+
+    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+    // Shift the high parts up 16 bits.
+    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+                              Neg16, DAG, dl);
+    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+  } else if (Op.getValueType() == MVT::v8i16) {
+    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+                            LHS, RHS, Zero, DAG, dl);
+  } else if (Op.getValueType() == MVT::v16i8) {
+    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+    // Multiply the even 8-bit parts, producing 16-bit sums.
+    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+                                           LHS, RHS, DAG, dl, MVT::v8i16);
+    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
+
+    // Multiply the odd 8-bit parts, producing 16-bit sums.
+    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+                                          LHS, RHS, DAG, dl, MVT::v8i16);
+    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
+
+    // Merge the results together.
+    int Ops[16];
+    for (unsigned i = 0; i != 8; ++i) {
+      Ops[i*2  ] = 2*i+1;
+      Ops[i*2+1] = 2*i+1+16;
+    }
+    return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+  } else {
+    llvm_unreachable("Unknown mul to lower!");
+  }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   llvm_unreachable("TLS not implemented for PPC");
+  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+  case ISD::SETCC:              return LowerSETCC(Op, DAG);
+  case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG, PPCSubTarget);
+
+  case ISD::VAARG:
+    return LowerVAARG(Op, DAG, PPCSubTarget);
+
+  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+  case ISD::DYNAMIC_STACKALLOC:
+    return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
+                                                       Op.getDebugLoc());
+  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
+
+  // Lower 64-bit shifts.
+  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
+  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
+  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
+
+  // Vector-related lowering.
+  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::MUL:                return LowerMUL(Op, DAG);
+
+  // Frame & Return address.
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  }
+  return SDValue();
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const {
+  const TargetMachine &TM = getTargetMachine();
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default:
+    assert(false && "Do not know how to custom type legalize this operation!");
+    return;
+  case ISD::VAARG: {
+    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+        || TM.getSubtarget<PPCSubtarget>().isPPC64())
+      return;
+
+    EVT VT = N->getValueType(0);
+
+    if (VT == MVT::i64) {
+      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+      Results.push_back(NewNode);
+      Results.push_back(NewNode.getValue(1));
+    }
+    return;
+  }
+  case ISD::FP_ROUND_INREG: {
+    assert(N->getValueType(0) == MVT::ppcf128);
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                             MVT::f64, N->getOperand(0),
+                             DAG.getIntPtrConstant(0));
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                             MVT::f64, N->getOperand(0),
+                             DAG.getIntPtrConstant(1));
+
+    // This sequence changes FPSCR to do round-to-zero, adds the two halves
+    // of the long double, and puts FPSCR back the way it was.  We do not
+    // actually model FPSCR.
+    std::vector<EVT> NodeTys;
+    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+    NodeTys.push_back(MVT::f64);   // Return register
+    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
+    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+    MFFSreg = Result.getValue(0);
+    InFlag = Result.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
+    Ops[0] = DAG.getConstant(31, MVT::i32);
+    Ops[1] = InFlag;
+    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
+    InFlag = Result.getValue(0);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
+    Ops[0] = DAG.getConstant(30, MVT::i32);
+    Ops[1] = InFlag;
+    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
+    InFlag = Result.getValue(0);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::f64);    // result of add
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
+    Ops[0] = Lo;
+    Ops[1] = Hi;
+    Ops[2] = InFlag;
+    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
+    FPreg = Result.getValue(0);
+    InFlag = Result.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::f64);
+    Ops[0] = DAG.getConstant(1, MVT::i32);
+    Ops[1] = MFFSreg;
+    Ops[2] = FPreg;
+    Ops[3] = InFlag;
+    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
+    FPreg = Result.getValue(0);
+
+    // We know the low half is about to be thrown away, so just use something
+    // convenient.
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+                                FPreg, FPreg));
+    return;
+  }
+  case ISD::FP_TO_SINT:
+    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+    return;
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                    bool is64bit, unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *F = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptrA = MI->getOperand(1).getReg();
+  unsigned ptrB = MI->getOperand(2).getReg();
+  unsigned incr = MI->getOperand(3).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loopMBB);
+  F->insert(It, exitMBB);
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  MachineRegisterInfo &RegInfo = F->getRegInfo();
+  unsigned TmpReg = (!BinOpcode) ? incr :
+    RegInfo.createVirtualRegister(
+       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+                 (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   l[wd]arx dest, ptr
+  //   add r0, dest, incr
+  //   st[wd]cx. r0, ptr
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+    .addReg(ptrA).addReg(ptrB);
+  if (BinOpcode)
+    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+  BuildMI(BB, dl, TII->get(PPC::BCC))
+    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+  return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+                                            MachineBasicBlock *BB,
+                                            bool is8bit,    // operation
+                                            unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  // In 64 bit mode we have to use 64 bits for addresses, even though the
+  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
+  // registers without caring whether they're 32 or 64, but here we're
+  // doing actual arithmetic on the addresses.
+  bool is64bit = PPCSubTarget.isPPC64();
+  unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *F = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptrA = MI->getOperand(1).getReg();
+  unsigned ptrB = MI->getOperand(2).getReg();
+  unsigned incr = MI->getOperand(3).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loopMBB);
+  F->insert(It, exitMBB);
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  MachineRegisterInfo &RegInfo = F->getRegInfo();
+  const TargetRegisterClass *RC =
+    is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+              (const TargetRegisterClass *) &PPC::GPRCRegClass;
+  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+  unsigned Ptr1Reg;
+  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  // The 4-byte load must be aligned, while a char or short may be
+  // anywhere in the word.  Hence all this nasty bookkeeping code.
+  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
+  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+  //   xori shift, shift1, 24 [16]
+  //   rlwinm ptr, ptr1, 0, 0, 29
+  //   slw incr2, incr, shift
+  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+  //   slw mask, mask2, shift
+  //  loopMBB:
+  //   lwarx tmpDest, ptr
+  //   add tmp, tmpDest, incr2
+  //   andc tmp2, tmpDest, mask
+  //   and tmp3, tmp, mask
+  //   or tmp4, tmp3, tmp2
+  //   stwcx. tmp4, ptr
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  //   srw dest, tmpDest, shift
+  if (ptrA != ZeroReg) {
+    Ptr1Reg = RegInfo.createVirtualRegister(RC);
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+      .addReg(ptrA).addReg(ptrB);
+  } else {
+    Ptr1Reg = ptrB;
+  }
+  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+  if (is64bit)
+    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+      .addReg(Ptr1Reg).addImm(0).addImm(61);
+  else
+    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+      .addReg(incr).addReg(ShiftReg);
+  if (is8bit)
+    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+  else {
+    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+  }
+  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+      .addReg(Mask2Reg).addReg(ShiftReg);
+
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+    .addReg(ZeroReg).addReg(PtrReg);
+  if (BinOpcode)
+    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+      .addReg(Incr2Reg).addReg(TmpDestReg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+    .addReg(TmpDestReg).addReg(MaskReg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+    .addReg(TmpReg).addReg(MaskReg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+    .addReg(Tmp3Reg).addReg(Tmp2Reg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+  BuildMI(BB, dl, TII->get(PPC::BCC))
+    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+    .addReg(ShiftReg);
+  return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // To "insert" these instructions we actually have to insert their
+  // control-flow patterns.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  MachineFunction *F = BB->getParent();
+
+  if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+      MI->getOpcode() == PPC::SELECT_CC_I8 ||
+      MI->getOpcode() == PPC::SELECT_CC_F4 ||
+      MI->getOpcode() == PPC::SELECT_CC_F8 ||
+      MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+    // The incoming instruction knows the destination vreg to set, the
+    // condition code register to branch on, the true/false values to
+    // select between, and a branch opcode to use.
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   cmpTY ccX, r1, r2
+    //   bCC copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *thisMBB = BB;
+    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    unsigned SelectPred = MI->getOperand(4).getImm();
+    DebugLoc dl = MI->getDebugLoc();
+    F->insert(It, copy0MBB);
+    F->insert(It, sinkMBB);
+
+    // Transfer the remainder of BB and its successor edges to sinkMBB.
+    sinkMBB->splice(sinkMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    // Next, add the true and fallthrough blocks as its successors.
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(PPC::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+  }
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+    BB = EmitAtomicBinary(MI, BB, false, 0);
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+    BB = EmitAtomicBinary(MI, BB, true, 0);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+    unsigned dest   = MI->getOperand(0).getReg();
+    unsigned ptrA   = MI->getOperand(1).getReg();
+    unsigned ptrB   = MI->getOperand(2).getReg();
+    unsigned oldval = MI->getOperand(3).getReg();
+    unsigned newval = MI->getOperand(4).getReg();
+    DebugLoc dl     = MI->getDebugLoc();
+
+    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, loop1MBB);
+    F->insert(It, loop2MBB);
+    F->insert(It, midMBB);
+    F->insert(It, exitMBB);
+    exitMBB->splice(exitMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    //  thisMBB:
+    //   ...
+    //   fallthrough --> loopMBB
+    BB->addSuccessor(loop1MBB);
+
+    // loop1MBB:
+    //   l[wd]arx dest, ptr
+    //   cmp[wd] dest, oldval
+    //   bne- midMBB
+    // loop2MBB:
+    //   st[wd]cx. newval, ptr
+    //   bne- loopMBB
+    //   b exitBB
+    // midMBB:
+    //   st[wd]cx. dest, ptr
+    // exitBB:
+    BB = loop1MBB;
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+      .addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+      .addReg(oldval).addReg(dest);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+    BB->addSuccessor(loop2MBB);
+    BB->addSuccessor(midMBB);
+
+    BB = loop2MBB;
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+      .addReg(newval).addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+    BB->addSuccessor(loop1MBB);
+    BB->addSuccessor(exitMBB);
+
+    BB = midMBB;
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+      .addReg(dest).addReg(ptrA).addReg(ptrB);
+    BB->addSuccessor(exitMBB);
+
+    //  exitMBB:
+    //   ...
+    BB = exitMBB;
+  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+    // We must use 64-bit registers for addresses when targeting 64-bit,
+    // since we're actually doing arithmetic on them.  Other registers
+    // can be 32-bit.
+    bool is64bit = PPCSubTarget.isPPC64();
+    bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+    unsigned dest   = MI->getOperand(0).getReg();
+    unsigned ptrA   = MI->getOperand(1).getReg();
+    unsigned ptrB   = MI->getOperand(2).getReg();
+    unsigned oldval = MI->getOperand(3).getReg();
+    unsigned newval = MI->getOperand(4).getReg();
+    DebugLoc dl     = MI->getDebugLoc();
+
+    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, loop1MBB);
+    F->insert(It, loop2MBB);
+    F->insert(It, midMBB);
+    F->insert(It, exitMBB);
+    exitMBB->splice(exitMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    const TargetRegisterClass *RC =
+      is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+                (const TargetRegisterClass *) &PPC::GPRCRegClass;
+    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+    unsigned Ptr1Reg;
+    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+    unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+    //  thisMBB:
+    //   ...
+    //   fallthrough --> loopMBB
+    BB->addSuccessor(loop1MBB);
+
+    // The 4-byte load must be aligned, while a char or short may be
+    // anywhere in the word.  Hence all this nasty bookkeeping code.
+    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
+    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+    //   xori shift, shift1, 24 [16]
+    //   rlwinm ptr, ptr1, 0, 0, 29
+    //   slw newval2, newval, shift
+    //   slw oldval2, oldval,shift
+    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+    //   slw mask, mask2, shift
+    //   and newval3, newval2, mask
+    //   and oldval3, oldval2, mask
+    // loop1MBB:
+    //   lwarx tmpDest, ptr
+    //   and tmp, tmpDest, mask
+    //   cmpw tmp, oldval3
+    //   bne- midMBB
+    // loop2MBB:
+    //   andc tmp2, tmpDest, mask
+    //   or tmp4, tmp2, newval3
+    //   stwcx. tmp4, ptr
+    //   bne- loop1MBB
+    //   b exitBB
+    // midMBB:
+    //   stwcx. tmpDest, ptr
+    // exitBB:
+    //   srw dest, tmpDest, shift
+    if (ptrA != ZeroReg) {
+      Ptr1Reg = RegInfo.createVirtualRegister(RC);
+      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+        .addReg(ptrA).addReg(ptrB);
+    } else {
+      Ptr1Reg = ptrB;
+    }
+    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+    if (is64bit)
+      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+        .addReg(Ptr1Reg).addImm(0).addImm(61);
+    else
+      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+        .addReg(newval).addReg(ShiftReg);
+    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+        .addReg(oldval).addReg(ShiftReg);
+    if (is8bit)
+      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+    else {
+      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+        .addReg(Mask3Reg).addImm(65535);
+    }
+    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+        .addReg(Mask2Reg).addReg(ShiftReg);
+    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+        .addReg(NewVal2Reg).addReg(MaskReg);
+    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+        .addReg(OldVal2Reg).addReg(MaskReg);
+
+    BB = loop1MBB;
+    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+        .addReg(ZeroReg).addReg(PtrReg);
+    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+        .addReg(TmpDestReg).addReg(MaskReg);
+    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+        .addReg(TmpReg).addReg(OldVal3Reg);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+    BB->addSuccessor(loop2MBB);
+    BB->addSuccessor(midMBB);
+
+    BB = loop2MBB;
+    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+        .addReg(TmpDestReg).addReg(MaskReg);
+    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+        .addReg(Tmp2Reg).addReg(NewVal3Reg);
+    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+        .addReg(ZeroReg).addReg(PtrReg);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+    BB->addSuccessor(loop1MBB);
+    BB->addSuccessor(exitMBB);
+
+    BB = midMBB;
+    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+      .addReg(ZeroReg).addReg(PtrReg);
+    BB->addSuccessor(exitMBB);
+
+    //  exitMBB:
+    //   ...
+    BB = exitMBB;
+    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+      .addReg(ShiftReg);
+  } else {
+    llvm_unreachable("Unexpected instr type to insert");
+  }
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  const TargetMachine &TM = getTargetMachine();
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default: break;
+  case PPCISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      if (C->isNullValue())   // 0 << V -> 0.
+        return N->getOperand(0);
+    }
+    break;
+  case PPCISD::SRL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      if (C->isNullValue())   // 0 >>u V -> 0.
+        return N->getOperand(0);
+    }
+    break;
+  case PPCISD::SRA:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      if (C->isNullValue() ||   //  0 >>s V -> 0.
+          C->isAllOnesValue())    // -1 >>s V -> -1.
+        return N->getOperand(0);
+    }
+    break;
+
+  case ISD::SINT_TO_FP:
+    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+        // We allow the src/dst to be either f32/f64, but the intermediate
+        // type must be i64.
+        if (N->getOperand(0).getValueType() == MVT::i64 &&
+            N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+          SDValue Val = N->getOperand(0).getOperand(0);
+          if (Val.getValueType() == MVT::f32) {
+            Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+            DCI.AddToWorklist(Val.getNode());
+          }
+
+          Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+          DCI.AddToWorklist(Val.getNode());
+          Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+          DCI.AddToWorklist(Val.getNode());
+          if (N->getValueType(0) == MVT::f32) {
+            Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+                              DAG.getIntPtrConstant(0));
+            DCI.AddToWorklist(Val.getNode());
+          }
+          return Val;
+        } else if (N->getOperand(0).getValueType() == MVT::i32) {
+          // If the intermediate type is i32, we can avoid the load/store here
+          // too.
+        }
+      }
+    }
+    break;
+  case ISD::STORE:
+    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+        !cast<StoreSDNode>(N)->isTruncatingStore() &&
+        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+        N->getOperand(1).getValueType() == MVT::i32 &&
+        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+      SDValue Val = N->getOperand(1).getOperand(0);
+      if (Val.getValueType() == MVT::f32) {
+        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+        DCI.AddToWorklist(Val.getNode());
+      }
+      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+      DCI.AddToWorklist(Val.getNode());
+
+      Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
+                        N->getOperand(2), N->getOperand(3));
+      DCI.AddToWorklist(Val.getNode());
+      return Val;
+    }
+
+    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+    if (cast<StoreSDNode>(N)->isUnindexed() &&
+        N->getOperand(1).getOpcode() == ISD::BSWAP &&
+        N->getOperand(1).getNode()->hasOneUse() &&
+        (N->getOperand(1).getValueType() == MVT::i32 ||
+         N->getOperand(1).getValueType() == MVT::i16)) {
+      SDValue BSwapOp = N->getOperand(1).getOperand(0);
+      // Do an any-extend to 32-bits if this is a half-word input.
+      if (BSwapOp.getValueType() == MVT::i16)
+        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+      SDValue Ops[] = {
+        N->getOperand(0), BSwapOp, N->getOperand(2),
+        DAG.getValueType(N->getOperand(1).getValueType())
+      };
+      return
+        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+                                Ops, array_lengthof(Ops),
+                                cast<StoreSDNode>(N)->getMemoryVT(),
+                                cast<StoreSDNode>(N)->getMemOperand());
+    }
+    break;
+  case ISD::BSWAP:
+    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+        N->getOperand(0).hasOneUse() &&
+        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+      SDValue Load = N->getOperand(0);
+      LoadSDNode *LD = cast<LoadSDNode>(Load);
+      // Create the byte-swapping load.
+      SDValue Ops[] = {
+        LD->getChain(),    // Chain
+        LD->getBasePtr(),  // Ptr
+        DAG.getValueType(N->getValueType(0)) // VT
+      };
+      SDValue BSLoad =
+        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+                                DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
+                                LD->getMemoryVT(), LD->getMemOperand());
+
+      // If this is an i16 load, insert the truncate.
+      SDValue ResVal = BSLoad;
+      if (N->getValueType(0) == MVT::i16)
+        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+      // First, combine the bswap away.  This makes the value produced by the
+      // load dead.
+      DCI.CombineTo(N, ResVal);
+
+      // Next, combine the load away, we give it a bogus result value but a real
+      // chain result.  The result value is dead because the bswap is dead.
+      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+      // Return N so it doesn't get rechecked!
+      return SDValue(N, 0);
+    }
+
+    break;
+  case PPCISD::VCMP: {
+    // If a VCMPo node already exists with exactly the same operands as this
+    // node, use its result instead of this node (VCMPo computes both a CR6 and
+    // a normal output).
+    //
+    if (!N->getOperand(0).hasOneUse() &&
+        !N->getOperand(1).hasOneUse() &&
+        !N->getOperand(2).hasOneUse()) {
+
+      // Scan all of the users of the LHS, looking for VCMPo's that match.
+      SDNode *VCMPoNode = 0;
+
+      SDNode *LHSN = N->getOperand(0).getNode();
+      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+           UI != E; ++UI)
+        if (UI->getOpcode() == PPCISD::VCMPo &&
+            UI->getOperand(1) == N->getOperand(1) &&
+            UI->getOperand(2) == N->getOperand(2) &&
+            UI->getOperand(0) == N->getOperand(0)) {
+          VCMPoNode = *UI;
+          break;
+        }
+
+      // If there is no VCMPo node, or if the flag value has a single use, don't
+      // transform this.
+      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+        break;
+
+      // Look at the (necessarily single) use of the flag value.  If it has a
+      // chain, this transformation is more complex.  Note that multiple things
+      // could use the value result, which we should ignore.
+      SDNode *FlagUser = 0;
+      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+           FlagUser == 0; ++UI) {
+        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+        SDNode *User = *UI;
+        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+            FlagUser = User;
+            break;
+          }
+        }
+      }
+
+      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
+      // give up for right now.
+      if (FlagUser->getOpcode() == PPCISD::MFCR)
+        return SDValue(VCMPoNode, 0);
+    }
+    break;
+  }
+  case ISD::BR_CC: {
+    // If this is a branch on an altivec predicate comparison, lower this so
+    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
+    // lowering is done pre-legalize, because the legalizer lowers the predicate
+    // compare down to code that is difficult to reassemble.
+    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+    int CompareOpc;
+    bool isDot;
+
+    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+      assert(isDot && "Can't compare against a vector result!");
+
+      // If this is a comparison against something other than 0/1, then we know
+      // that the condition is never/always true.
+      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+      if (Val != 0 && Val != 1) {
+        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
+          return N->getOperand(0);
+        // Always !=, turn it into an unconditional branch.
+        return DAG.getNode(ISD::BR, dl, MVT::Other,
+                           N->getOperand(0), N->getOperand(4));
+      }
+
+      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+      // Create the PPCISD altivec 'dot' comparison node.
+      std::vector<EVT> VTs;
+      SDValue Ops[] = {
+        LHS.getOperand(2),  // LHS of compare
+        LHS.getOperand(3),  // RHS of compare
+        DAG.getConstant(CompareOpc, MVT::i32)
+      };
+      VTs.push_back(LHS.getOperand(2).getValueType());
+      VTs.push_back(MVT::Glue);
+      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+      // Unpack the result based on how the target uses it.
+      PPC::Predicate CompOpc;
+      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+      default:  // Can't happen, don't crash on invalid number though.
+      case 0:   // Branch on the value of the EQ bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+        break;
+      case 1:   // Branch on the inverted value of the EQ bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+        break;
+      case 2:   // Branch on the value of the LT bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+        break;
+      case 3:   // Branch on the inverted value of the LT bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+        break;
+      }
+
+      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+                         DAG.getConstant(CompOpc, MVT::i32),
+                         DAG.getRegister(PPC::CR6, MVT::i32),
+                         N->getOperand(4), CompNode.getValue(1));
+    }
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                       const APInt &Mask,
+                                                       APInt &KnownZero,
+                                                       APInt &KnownOne,
+                                                       const SelectionDAG &DAG,
+                                                       unsigned Depth) const {
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  switch (Op.getOpcode()) {
+  default: break;
+  case PPCISD::LBRX: {
+    // lhbrx is known to have the top bits cleared out.
+    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
+      KnownZero = 0xFFFF0000;
+    break;
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+    default: break;
+    case Intrinsic::ppc_altivec_vcmpbfp_p:
+    case Intrinsic::ppc_altivec_vcmpeqfp_p:
+    case Intrinsic::ppc_altivec_vcmpequb_p:
+    case Intrinsic::ppc_altivec_vcmpequh_p:
+    case Intrinsic::ppc_altivec_vcmpequw_p:
+    case Intrinsic::ppc_altivec_vcmpgefp_p:
+    case Intrinsic::ppc_altivec_vcmpgtfp_p:
+    case Intrinsic::ppc_altivec_vcmpgtsb_p:
+    case Intrinsic::ppc_altivec_vcmpgtsh_p:
+    case Intrinsic::ppc_altivec_vcmpgtsw_p:
+    case Intrinsic::ppc_altivec_vcmpgtub_p:
+    case Intrinsic::ppc_altivec_vcmpgtuh_p:
+    case Intrinsic::ppc_altivec_vcmpgtuw_p:
+      KnownZero = ~1U;  // All bits but the low one are known to be zero.
+      break;
+    }
+  }
+  }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'b':
+    case 'r':
+    case 'f':
+    case 'v':
+    case 'y':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'b':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  case 'd':
+    if (type->isDoubleTy())
+      weight = CW_Register;
+    break;
+  case 'v':
+    if (type->isVectorTy())
+      weight = CW_Register;
+    break;
+  case 'y':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC RS6000 Constraint Letters
+    switch (Constraint[0]) {
+    case 'b':   // R1-R31
+    case 'r':   // R0-R31
+      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+        return std::make_pair(0U, PPC::G8RCRegisterClass);
+      return std::make_pair(0U, PPC::GPRCRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, PPC::F4RCRegisterClass);
+      else if (VT == MVT::f64)
+        return std::make_pair(0U, PPC::F8RCRegisterClass);
+      break;
+    case 'v':
+      return std::make_pair(0U, PPC::VRRCRegisterClass);
+    case 'y':   // crrc
+      return std::make_pair(0U, PPC::CRRCRegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     std::string &Constraint,
+                                                     std::vector<SDValue>&Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0,0);
+
+  // Only support length 1 constraints.
+  if (Constraint.length() > 1) return;
+
+  char Letter = Constraint[0];
+  switch (Letter) {
+  default: break;
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P': {
+    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+    if (!CST) return; // Must be an immediate to match.
+    unsigned Value = CST->getZExtValue();
+    switch (Letter) {
+    default: llvm_unreachable("Unknown constraint letter!");
+    case 'I':  // "I" is a signed 16-bit constant.
+      if ((short)Value == (int)Value)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
+    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
+      if ((short)Value == 0)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
+      if ((Value >> 16) == 0)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'M':  // "M" is a constant that is greater than 31.
+      if (Value > 31)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'N':  // "N" is a positive constant that is an exact power of two.
+      if ((int)Value > 0 && isPowerOf2_32(Value))
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'O':  // "O" is the constant zero.
+      if (Value == 0)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
+      if ((short)-Value == (int)-Value)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    }
+    break;
+  }
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+
+  // Handle standard constraint letters.
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+  // PPC allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  // PPC only support r+r,
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  default:
+    // No other scales are supported.
+    return false;
+  }
+
+  return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+  // PPC allows a sign-extended 16-bit immediate field.
+  return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+  return false;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+  // Make sure the function does not optimize away the store of the RA to
+  // the stack.
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setLRStoreRequired();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+
+      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
+                      isPPC64? MVT::i64 : MVT::i32);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Just load the return address off the stack.
+  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = PtrVT == MVT::i64;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+  bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+                  MFI->getStackSize() &&
+                  !MF.getFunction()->hasFnAttr(Attribute::Naked);
+  unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
+                                (is31 ? PPC::R31 : PPC::R1);
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+                                         PtrVT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+                            FrameAddr, MachinePointerInfo(), false, false, 0);
+  return FrameAddr;
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The PowerPC target isn't yet aware of offsets.
+  return false;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+                                           unsigned DstAlign, unsigned SrcAlign,
+                                           bool NonScalarIntSafe,
+                                           bool MemcpyStrSrc,
+                                           MachineFunction &MF) const {
+  if (this->PPCSubTarget.isPPC64()) {
+    return MVT::i64;
+  } else {
+    return MVT::i32;
+  }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 000000000000..986b4e73fbd3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,486 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "PPC.h"
+#include "PPCSubtarget.h"
+
+namespace llvm {
+  namespace PPCISD {
+    enum NodeType {
+      // Start the numbering where the builtin ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// FSEL - Traditional three-operand fsel node.
+      ///
+      FSEL,
+
+      /// FCFID - The FCFID instruction, taking an f64 operand and producing
+      /// and f64 value containing the FP representation of the integer that
+      /// was temporarily in the f64 operand.
+      FCFID,
+
+      /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+      /// operand, producing an f64 value containing the integer representation
+      /// of that FP value.
+      FCTIDZ, FCTIWZ,
+
+      /// STFIWX - The STFIWX instruction.  The first operand is an input token
+      /// chain, then an f64 value to store, then an address to store it to.
+      STFIWX,
+
+      // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+      // three v4f32 operands and producing a v4f32 result.
+      VMADDFP, VNMSUBFP,
+
+      /// VPERM - The PPC VPERM Instruction.
+      ///
+      VPERM,
+
+      /// Hi/Lo - These represent the high and low 16-bit parts of a global
+      /// address respectively.  These nodes have two operands, the first of
+      /// which must be a TargetGlobalAddress, and the second of which must be a
+      /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
+      /// though these are usually folded into other nodes.
+      Hi, Lo,
+
+      TOC_ENTRY,
+
+      /// The following three target-specific nodes are used for calls through
+      /// function pointers in the 64-bit SVR4 ABI.
+
+      /// Restore the TOC from the TOC save area of the current stack frame.
+      /// This is basically a hard coded load instruction which additionally
+      /// takes/produces a flag.
+      TOC_RESTORE,
+
+      /// Like a regular LOAD but additionally taking/producing a flag.
+      LOAD,
+
+      /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
+      /// a hard coded load instruction.
+      LOAD_TOC,
+
+      /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+      /// compute an allocation on the stack.
+      DYNALLOC,
+
+      /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+      /// at function entry, used for PIC code.
+      GlobalBaseReg,
+
+      /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+      /// shift amounts.  These nodes are generated by the multi-precision shift
+      /// code.
+      SRL, SRA, SHL,
+
+      /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+      /// registers.
+      EXTSW_32,
+
+      /// CALL - A direct function call.
+      CALL_Darwin, CALL_SVR4,
+
+      /// NOP - Special NOP which follows 64-bit SVR4 calls.
+      NOP,
+
+      /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+      /// MTCTR instruction.
+      MTCTR,
+
+      /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+      /// BCTRL instruction.
+      BCTRL_Darwin, BCTRL_SVR4,
+
+      /// Return with a flag operand, matched by 'blr'
+      RET_FLAG,
+
+      /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+      /// instructions.  This copies the bits corresponding to the specified
+      /// CRREG into the resultant GPR.  Bits corresponding to other CR regs
+      /// are undefined.
+      MFCR,
+
+      /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+      /// instructions.  For lack of better number, we use the opcode number
+      /// encoding for the OPC field to identify the compare.  For example, 838
+      /// is VCMPGTSH.
+      VCMP,
+
+      /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+      /// altivec VCMP*o instructions.  For lack of better number, we use the
+      /// opcode number encoding for the OPC field to identify the compare.  For
+      /// example, 838 is VCMPGTSH.
+      VCMPo,
+
+      /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+      /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
+      /// condition register to branch on, OPC is the branch opcode to use (e.g.
+      /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+      /// an optional input flag argument.
+      COND_BRANCH,
+
+      // The following 5 instructions are used only as part of the
+      // long double-to-int conversion sequence.
+
+      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+      /// register.
+      MFFS,
+
+      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+      MTFSB0,
+
+      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+      MTFSB1,
+
+      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+      /// rounding towards zero.  It has flags added so it won't move past the
+      /// FPSCR-setting instructions.
+      FADDRTZ,
+
+      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+      MTFSF,
+
+      /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+      /// reserve indexed. This is used to implement atomic operations.
+      LARX,
+
+      /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+      /// indexed. This is used to implement atomic operations.
+      STCX,
+
+      /// TC_RETURN - A tail call return.
+      ///   operand #0 chain
+      ///   operand #1 callee (register or absolute)
+      ///   operand #2 stack adjustment
+      ///   operand #3 optional in flag
+      TC_RETURN,
+
+      /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+      STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+      /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
+      /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
+      /// i32.
+      STBRX,
+
+      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+      /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
+      /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
+      /// or i32.
+      LBRX
+    };
+  }
+
+  /// Define some predicates that are used for node matching.
+  namespace PPC {
+    /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+    /// VPKUHUM instruction.
+    bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+    /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+    /// VPKUWUM instruction.
+    bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+    /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+    /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+    bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                            bool isUnary);
+
+    /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+    /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+    bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                            bool isUnary);
+
+    /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+    /// amount, otherwise return -1.
+    int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+    /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a splat of a single element that is suitable for input to
+    /// VSPLTB/VSPLTH/VSPLTW.
+    bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+    /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+    /// are -0.0.
+    bool isAllNegativeZeroVector(SDNode *N);
+
+    /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+    /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+    unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+    /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+    /// formed by using a vspltis[bhw] instruction of the specified element
+    /// size, return the constant being splatted.  The ByteSize field indicates
+    /// the number of bytes of each element [124] -> [bhw].
+    SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+  }
+
+  class PPCTargetLowering : public TargetLowering {
+    const PPCSubtarget &PPCSubTarget;
+
+  public:
+    explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+    /// getTargetNodeName() - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+    /// getSetCCResultType - Return the ISD::SETCC ValueType
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    /// getPreIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if the node's address
+    /// can be legally represented as pre-indexed load / store address.
+    virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                           SDValue &Offset,
+                                           ISD::MemIndexedMode &AM,
+                                           SelectionDAG &DAG) const;
+
+    /// SelectAddressRegReg - Given the specified addressed, check to see if it
+    /// can be represented as an indexed [r+r] operation.  Returns false if it
+    /// can be more efficiently represented with [r+imm].
+    bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                             SelectionDAG &DAG) const;
+
+    /// SelectAddressRegImm - Returns true if the address N can be represented
+    /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+    /// is not better represented as reg+reg.
+    bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+                             SelectionDAG &DAG) const;
+
+    /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+    /// represented as an indexed [r+r] operation.
+    bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+                                 SelectionDAG &DAG) const;
+
+    /// SelectAddressRegImmShift - Returns true if the address N can be
+    /// represented by a base register plus a signed 14-bit displacement
+    /// [r+imm*4].  Suitable for use by STD and friends.
+    bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
+                                  SelectionDAG &DAG) const;
+
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    ///
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+                                        MachineBasicBlock *MBB, bool is64Bit,
+                                        unsigned BinOpcode) const;
+    MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+                                                MachineBasicBlock *MBB,
+                                            bool is8bit, unsigned Opcode) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+
+    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+    /// function arguments in the caller parameter area.  This is the actual
+    /// alignment, not its logarithm.
+    unsigned getByValTypeAlignment(const Type *Ty) const;
+
+    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+    /// vector.  If it is invalid, don't add anything to Ops.
+    virtual void LowerAsmOperandForConstraint(SDValue Op,
+                                              std::string &Constraint,
+                                              std::vector<SDValue> &Ops,
+                                              SelectionDAG &DAG) const;
+
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+    /// isLegalAddressImmediate - Return true if the integer value can be used
+    /// as the offset of the target addressing mode for load / store of the
+    /// given type.
+    virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+
+    /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+    /// the offset of the target addressing mode.
+    virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getOptimalMemOpType - Returns the target specific optimal type for load
+    /// and store operations as a result of memset, memcpy, and memmove
+    /// lowering. If DstAlign is zero that means it's safe to destination
+    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+    /// means there isn't a need to check it against alignment requirement,
+    /// probably because the source does not need to be loaded. If
+    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
+    virtual EVT
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
+
+  private:
+    SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+    SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+    bool
+    IsEligibleForTailCallOptimization(SDValue Callee,
+                                      CallingConv::ID CalleeCC,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SelectionDAG& DAG) const;
+
+    SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+                                         int SPDiff,
+                                         SDValue Chain,
+                                         SDValue &LROpOut,
+                                         SDValue &FPOpOut,
+                                         bool isDarwinABI,
+                                         DebugLoc dl) const;
+
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+                         const PPCSubtarget &Subtarget) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+                       const PPCSubtarget &Subtarget) const;
+    SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+                                const PPCSubtarget &Subtarget) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+                                      const PPCSubtarget &Subtarget) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+                       bool isVarArg,
+                       SelectionDAG &DAG,
+                       SmallVector<std::pair<unsigned, SDValue>, 8>
+                         &RegsToPass,
+                       SDValue InFlag, SDValue Chain,
+                       SDValue &Callee,
+                       int SPDiff, unsigned NumBytes,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    SDValue
+      LowerFormalArguments_Darwin(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const;
+    SDValue
+      LowerFormalArguments_SVR4(SDValue Chain,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                DebugLoc dl, SelectionDAG &DAG,
+                                SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue
+      LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                       CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                       const SmallVectorImpl<ISD::OutputArg> &Outs,
+                       const SmallVectorImpl<SDValue> &OutVals,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl, SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals) const;
+    SDValue
+      LowerCall_SVR4(SDValue Chain, SDValue Callee,
+                     CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     const SmallVectorImpl<SDValue> &OutVals,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const;
+  };
+}
+
+#endif   // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 000000000000..e88ad378ccd9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,749 @@
+//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions.  These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+  let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+  let PrintMethod = "printSymbolHi";
+  let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo64 : Operand<i64> {
+  let PrintMethod = "printSymbolLo";
+  let EncoderMethod = "getLO16Encoding";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+  // Transformation function: 63 - imm
+  return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+  // Transformation function: 64 - imm
+  return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Defs = [LR8] in
+  def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>,
+                    PPC970_Unit_BRU;
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the PPC64 non-callee saved registers.
+  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR8,CTR8,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL8_Darwin  : IForm<18, 0, 1,
+                            (outs), (ins calltarget:$func, variable_ops), 
+                            "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA8_Darwin : IForm<18, 1, 1,
+                          (outs), (ins aaddr:$func, variable_ops),
+                          "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
+  }
+  let Uses = [CTR8, RM] in {
+    def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+  }
+}
+
+// ELF 64 ABI Calls = Darwin ABI Calls
+// Used to define BL8_ELF and BLA8_ELF
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the PPC64 non-callee saved registers.
+  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR8,CTR8,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL8_ELF  : IForm<18, 0, 1,
+                         (outs), (ins calltarget:$func, variable_ops), 
+                         "bl $func", BrB, []>;  // See Pat patterns below.                            
+    def BLA8_ELF : IForm<18, 1, 1,
+                         (outs), (ins aaddr:$func, variable_ops),
+                         "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+  }
+  let Uses = [CTR8, RM] in {
+    def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+                               (outs), (ins variable_ops),
+                               "bctrl", BrB,
+                               [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+  }
+}
+
+
+// Calls
+def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
+          (BL8_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
+          (BL8_Darwin texternalsym:$dst)>;
+
+def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
+          (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
+          (BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCnop),
+          (NOP)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+  let Defs = [CR0] in {
+    def ATOMIC_LOAD_ADD_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_OR_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_AND_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+
+    def ATOMIC_CMP_SWAP_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
+      [(set G8RC:$dst, 
+                    (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+
+    def ATOMIC_SWAP_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
+      [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+  }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31,  84, (outs G8RC:$rD), (ins memrr:$ptr),
+                   "ldarx $rD, $ptr", LdStLDARX,
+                   [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdcx. $rS, $dst", LdStSTDCX,
+                   [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+                   isDOT;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+                        (ins calltarget:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNd8 $dst $offset",
+                 []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+                 "#TC_RETURNa8 $func $offset",
+                 [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNr8 $dst $offset",
+                 []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+    isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
+  let isReturn = 1 in {
+    def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+        Requires<[In64BitMode]>;
+  }
+
+  def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+      Requires<[In64BitMode]>;
+}
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8   : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+                  "b $dst", BrB,
+                  []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+                  "ba $dst", BrB,
+                  []>;
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst),  imm:$imm),
+          (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+          (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+          (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+                           "mfctr $rT", SprMFSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+                           "mtctr $rS", SprMTSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
+                       [(set G8RC:$result,
+                             (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+                           "mtlr $rS", SprMTSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+                           "mflr $rT", SprMFSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+
+// Copies, extends, truncates.
+def OR4To8  : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   []>;
+def OR8To4  : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   []>;
+
+def LI8  : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+                      "li $rD, $imm", IntGeneral,
+                      [(set G8RC:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+                      "lis $rD, $imm", IntGeneral,
+                      [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "nand $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+def AND8 : XForm_6<31,  28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "and $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+def ANDC8: XForm_6<31,  60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "andc $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+def OR8  : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "nor $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "orc $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "eqv $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "xor $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8  : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "andi. $dst, $src1, $src2", IntGeneral,
+                      [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+                      isDOT;
+def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                     "andis. $dst, $src1, $src2", IntGeneral,
+                    [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+                     isDOT;
+def ORI8    : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "ori $dst, $src1, $src2", IntGeneral,
+                      [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+def ORIS8   : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "oris $dst, $src1, $src2", IntGeneral,
+                    [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8   : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "xori $dst, $src1, $src2", IntGeneral,
+                      [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "xoris $dst, $src1, $src2", IntGeneral,
+                   [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "add $rT, $rA, $rB", IntGeneral,
+                     [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+                     
+let Defs = [CARRY] in {
+def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "addc $rT, $rA, $rB", IntGeneral,
+                     [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+                     PPC970_DGroup_Cracked;
+def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+                     "addic $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+}
+def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+                     "addi $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+                     "addis $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+
+let Defs = [CARRY] in {
+def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+                     "subfic $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                      "subfc $rT, $rA, $rB", IntGeneral,
+                      [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+                      PPC970_DGroup_Cracked;
+}
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "subf $rT, $rA, $rB", IntGeneral,
+                     [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "neg $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "adde $rT, $rA, $rB", IntGeneral,
+                       [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "addme $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "addze $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "subfe $rT, $rA, $rB", IntGeneral,
+                       [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "subfme $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "subfze $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+}
+
+
+def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "mulhd $rT, $rA, $rB", IntMulHW,
+                     [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "mulhdu $rT, $rA, $rB", IntMulHWU,
+                     [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+
+def CMPD   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+                          "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+                          "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI  : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+                         "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                         "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD  : XForm_6<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                   "sld $rA, $rS, $rB", IntRotateD,
+                   [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRD  : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                   "srd $rA, $rS, $rB", IntRotateD,
+                   [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+let Defs = [CARRY] in {
+def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                   "srad $rA, $rS, $rB", IntRotateD,
+                   [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+}
+                   
+def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "extsb $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "extsh $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+
+def EXTSW  : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "extsw $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "extsw $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+                      "extsw $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+
+let Defs = [CARRY] in {
+def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+                      "sradi $rA, $rS, $SH", IntRotateD,
+                      [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+}
+def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "cntlzd $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+
+def DIVD  : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "divd $rT, $rA, $rB", IntDivD,
+                     [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "divdu $rT, $rA, $rB", IntDivD,
+                     [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "mulld $rT, $rA, $rB", IntMulHD,
+                     [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+                      (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+                      "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+                      []>, isPPC64, RegConstraint<"$rSi = $rA">,
+                      NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDCL  : MDForm_1<30, 0,
+                      (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
+                      "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+                      []>, isPPC64;
+def RLDICL : MDForm_1<30, 0,
+                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
+                      "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+                      []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
+                      "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+                      []>, isPPC64;
+}  // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+                  "lha $rD, $src", LdStLHA,
+                  [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+                  PPC970_DGroup_Cracked;
+def LWA  : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+                    "lwa $rD, $src", LdStLWA,
+                    [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+                    PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+                   "lhax $rD, $src", LdStLHA,
+                   [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+                   PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+                   "lwax $rD, $src", LdStLHA,
+                   [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+                   PPC970_DGroup_Cracked;
+
+// Update forms.
+let mayLoad = 1 in
+def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+                            ptr_rc:$rA),
+                    "lhau $rD, $disp($rA)", LdStGeneral,
+                    []>, RegConstraint<"$rA = $ea_result">,
+                    NoEncode<"$ea_result">;
+// NO LWAU!
+
+}
+
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+                  "lbz $rD, $src", LdStGeneral,
+                  [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+                  "lhz $rD, $src", LdStGeneral,
+                  [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+                  "lwz $rD, $src", LdStGeneral,
+                  [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31,  87, (outs G8RC:$rD), (ins memrr:$src),
+                   "lbzx $rD, $src", LdStGeneral,
+                   [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+                   "lhzx $rD, $src", LdStGeneral,
+                   [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31,  23, (outs G8RC:$rD), (ins memrr:$src),
+                   "lwzx $rD, $src", LdStGeneral,
+                   [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+                   
+                   
+// Update forms.
+let mayLoad = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                    "lbzu $rD, $addr", LdStGeneral,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
+                    NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                    "lhzu $rD, $addr", LdStGeneral,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
+                    NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                    "lwzu $rD, $addr", LdStGeneral,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
+                    NoEncode<"$ea_result">;
+}
+}
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+                    "ld $rD, $src", LdStLD,
+                    [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+                  "",
+                  [(set G8RC:$rD,
+                     (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+                     
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
+                    "ld 2, 8($reg)", LdStLD,
+                    [(PPCload_toc G8RC:$reg)]>, isPPC64;
+                    
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
+                    "ld 2, 40(1)", LdStLD,
+                    [(PPCtoc_restore)]>, isPPC64;
+def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
+                   "ldx $rD, $src", LdStLD,
+                   [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+                   
+let mayLoad = 1 in
+def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+                    "ldu $rD, $addr", LdStLD,
+                    []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+                    NoEncode<"$ea_result">;
+
+}
+
+def : Pat<(PPCload ixaddr:$src),
+          (LD ixaddr:$src)>;
+def : Pat<(PPCload xaddr:$src),
+          (LDX xaddr:$src)>;
+
+let PPC970_Unit = 2 in {
+// Truncating stores.                       
+def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+                   "stb $rS, $src", LdStGeneral,
+                   [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+                   "sth $rS, $src", LdStGeneral,
+                   [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+                   "stw $rS, $src", LdStGeneral,
+                   [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stbx $rS, $dst", LdStGeneral,
+                   [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "sthx $rS, $dst", LdStGeneral,
+                   [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stwx $rS, $dst", LdStGeneral,
+                   [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+                   PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD  : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+                    "std $rS, $dst", LdStSTD,
+                    [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdx $rS, $dst", LdStSTD,
+                   [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+                   PPC970_DGroup_Cracked;
+}
+
+let PPC970_Unit = 2 in {
+
+def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                          (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, 
+                                         iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                        (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, 
+                                        iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
+def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+                                        s16immX4:$ptroff, ptr_rc:$ptrreg),
+                    "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+                    [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, 
+                                                     iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    isPPC64;
+
+let mayStore = 1 in
+def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdux $rS, $dst", LdStSTD,
+                   []>, isPPC64;
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32  : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
+                       "std $rT, $dst", LdStSTD,
+                       [(PPCstd_32  GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32  : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
+                       "stdx $rT, $dst", LdStSTD,
+                       [(PPCstd_32  GPRC:$rT, xaddr:$dst)]>, isPPC64,
+                       PPC970_DGroup_Cracked;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, Uses = [RM] in {  // FPU Operations.
+def FCFID  : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fcfid $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fctidz $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext GPRC:$in)),
+          (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+def : Pat<(i64 (anyext GPRC:$in)),
+          (OR4To8 GPRC:$in, GPRC:$in)>;
+def : Pat<(i32 (trunc G8RC:$in)),
+          (OR8To4 G8RC:$in, G8RC:$in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+          (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+          (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+          (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+          (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+          (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+          (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+          (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+          (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+          (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+          (LWZX8 xaddr:$src)>;
+
+// Standard shifts.  These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra G8RC:$rS, GPRC:$rB),
+          (SRAD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(srl G8RC:$rS, GPRC:$rB),
+          (SRD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(shl G8RC:$rS, GPRC:$rB),
+          (SLD G8RC:$rS, GPRC:$rB)>;
+
+// SHL/SRL
+def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
+          (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
+          (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl G8RC:$in, GPRC:$sh),
+          (RLDCL G8RC:$in, GPRC:$sh, 0)>;
+def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
+          (RLDICL G8RC:$in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8  tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8  tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8  tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8  tblockaddress:$in)>;
+def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS8 G8RC:$in, tconstpool:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS8 G8RC:$in, tjumptable:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS8 G8RC:$in, tblockaddress:$g)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 000000000000..256370fa5f52
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,695 @@
+//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+                       (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                              (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                              (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                               (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+  return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+  return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+  return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+  return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+  return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+  return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+def V_immneg0 : PatLeaf<(build_vector), [{
+  return PPC::isAllNegativeZeroVector(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int - A VAForm_1a intrinsic definition.
+class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+              !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+                       [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition.
+class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+             !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+             [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+
+// VX2_Int - A VXForm_2 intrinsic definition.
+class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+             !strconcat(opc, " $vD, $vB"), VecFP,
+             [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def DSS      : DSS_Form<822, (outs),
+                        (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+                        "dss $STRM", LdStGeneral /*FIXME*/, []>;
+def DSSALL   : DSS_Form<822, (outs),
+                        (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+                        "dssall", LdStGeneral /*FIXME*/, []>;
+def DST      : DSS_Form<342, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT     : DSS_Form<342, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST    : DSS_Form<374, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT   : DSS_Form<374, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def DST64    : DSS_Form<342, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT64   : DSS_Form<342, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST64  : DSS_Form<374, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT64 : DSS_Form<374, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+                      "mfvscr $vD", LdStGeneral,
+                      [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; 
+def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+                      "mtvscr $vB", LdStGeneral,
+                      [(int_ppc_altivec_mtvscr VRRC:$vB)]>; 
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
+def LVEBX: XForm_1<31,   7, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvebx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31,  39, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvehx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31,  71, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvewx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX  : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvxl $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31,   6, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvsl $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+                   PPC970_Unit_LSU;
+def LVSR : XForm_1<31,  38, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvsr $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+                   PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in {   // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvebx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvehx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvewx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+def STVX  : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvxl $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in {  // VALU Operations.
+// VA-Form instructions.  3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+                       "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+                       [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
+                                             VRRC:$vB))]>,
+                       Requires<[FPContractions]>;
+def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+                       "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+                       [(set VRRC:$vD, (fsub V_immneg0,
+                                             (fsub (fmul VRRC:$vA, VRRC:$vC),
+                                                   VRRC:$vB)))]>,
+                       Requires<[FPContractions]>;
+
+def VMHADDSHS  : VA1a_Int<32, "vmhaddshs",  int_ppc_altivec_vmhaddshs>;
+def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
+def VMLADDUHM  : VA1a_Int<34, "vmladduhm",  int_ppc_altivec_vmladduhm>;
+def VPERM      : VA1a_Int<43, "vperm",      int_ppc_altivec_vperm>;
+def VSEL       : VA1a_Int<42, "vsel",       int_ppc_altivec_vsel>;
+
+// Shuffles.
+def VSLDOI  : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+                       "vsldoi $vD, $vA, $vB, $SH", VecFP,
+                       [(set VRRC:$vD, 
+                         (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+
+// VX-Form instructions.  AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vaddfp $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+                      
+def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vaddubm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vadduhm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vadduwm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      
+def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
+def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
+def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
+def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
+def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
+def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
+def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+                             
+                             
+def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                    "vand $vD, $vA, $vB", VecFP,
+                    [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                     "vandc $vD, $vA, $vB", VecFP,
+                     [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
+                                          (vnot_ppc VRRC:$vB)))]>;
+
+def VCFSX  : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vcfsx $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+def VCFUX  : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vcfux $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vctsxs $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vctuxs $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP  : VX2_Int<458, "vlogefp",  int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
+def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
+def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
+def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
+def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
+def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
+
+def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
+def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
+def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
+def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
+def VMAXUB : VX1_Int<   2, "vmaxub", int_ppc_altivec_vmaxub>;
+def VMAXUH : VX1_Int<  66, "vmaxuh", int_ppc_altivec_vmaxuh>;
+def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
+def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
+def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
+def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
+def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
+def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
+def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
+def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+
+def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrghb $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrghh $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrghw $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrglb $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrglh $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrglw $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
+
+def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
+def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
+def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
+def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
+def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
+def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
+
+def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
+def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
+def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
+def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
+def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
+def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
+def VMULOUB : VX1_Int<  8, "vmuloub", int_ppc_altivec_vmuloub>;
+def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+                       
+def VREFP     : VX2_Int<266, "vrefp",     int_ppc_altivec_vrefp>;
+def VRFIM     : VX2_Int<714, "vrfim",     int_ppc_altivec_vrfim>;
+def VRFIN     : VX2_Int<522, "vrfin",     int_ppc_altivec_vrfin>;
+def VRFIP     : VX2_Int<650, "vrfip",     int_ppc_altivec_vrfip>;
+def VRFIZ     : VX2_Int<586, "vrfiz",     int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+
+def VSUBFP  : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsubfp $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsububm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsubuhm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsubuwm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      
+def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
+def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
+def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
+def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
+def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
+def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
+def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
+def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
+def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
+def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
+def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+
+def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                    "vnor $vD, $vA, $vB", VecFP,
+                    [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
+                                                  VRRC:$vB)))]>;
+def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vor $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vxor $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VRLB   : VX1_Int<   4, "vrlb", int_ppc_altivec_vrlb>;
+def VRLH   : VX1_Int<  68, "vrlh", int_ppc_altivec_vrlh>;
+def VRLW   : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+
+def VSL    : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
+def VSLO   : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
+def VSLB   : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
+def VSLH   : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
+def VSLW   : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+
+def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vspltb $vD, $vB, $UIMM", VecPerm,
+                      [(set VRRC:$vD,
+                        (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vsplth $vD, $vB, $UIMM", VecPerm,
+                      [(set VRRC:$vD,
+                        (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vspltw $vD, $vB, $UIMM", VecPerm,
+                      [(set VRRC:$vD, 
+                        (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+
+def VSR    : VX1_Int< 708, "vsr"  , int_ppc_altivec_vsr>;
+def VSRO   : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
+def VSRAB  : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
+def VSRAH  : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
+def VSRAW  : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
+def VSRB   : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
+def VSRH   : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
+def VSRW   : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+
+
+def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+                       "vspltisb $vD, $SIMM", VecPerm,
+                       [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+                       "vspltish $vD, $SIMM", VecPerm,
+                       [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+                       "vspltisw $vD, $SIMM", VecPerm,
+                       [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX   : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
+def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
+def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
+def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
+def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                       "vpkuhum $vD, $vA, $vB", VecFP,
+                       [(set VRRC:$vD,
+                         (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                       "vpkuwum $vD, $vA, $vB", VecFP,
+                       [(set VRRC:$vD,
+                         (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
+def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
+def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
+def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
+def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
+def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+              [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+              [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+  let Defs = [CR6];
+  let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP   : VCMP <966, "vcmpbfp $vD, $vA, $vB"  , v4f32>;
+def VCMPBFPo  : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP  : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP  : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP  : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB  : VCMP <  6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo<  6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB  : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB  : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH  : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH  : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH  : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW  : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW  : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+                      
+def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+                      "vxor $vD, $vD, $vD", VecFP,
+                      [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+//  * 32-bit
+def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+
+//  * 64-bit
+def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
+          (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
+        (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
+def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VPKUWUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGLB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGLH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGLW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGHB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGHH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGHW VRRC:$vA, VRRC:$vA)>;
+
+// Logical Operations
+def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+
+def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
+          (VNOR VRRC:$A, VRRC:$B)>;
+def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
+          (VANDC VRRC:$A, VRRC:$B)>;
+
+def : Pat<(fmul VRRC:$vA, VRRC:$vB),
+          (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; 
+
+// Fused multiply add and multiply sub for packed float.  These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+          (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 000000000000..b424d1101416
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+                  bool mem = true) {
+  if (mem)
+    return MIB.addImm(Offset).addFrameIndex(FI);
+  else
+    return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 000000000000..84a15b1ca942
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,907 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+        : Instruction {
+  field bits<32> Inst;
+
+  bit PPC64 = 0;  // Default value, override with isPPC64
+
+  let Namespace = "PPC";
+  let Inst{0-5} = opcode;
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let AsmString = asmstr;
+  let Itinerary = itin;
+
+  bits<1> PPC970_First = 0;
+  bits<1> PPC970_Single = 0;
+  bits<1> PPC970_Cracked = 0;
+  bits<3> PPC970_Unit = 0;
+
+  /// These fields correspond to the fields in PPCInstrInfo.h.  Any changes to
+  /// these must be reflected there!  See comments there for what these are.
+  let TSFlags{0}   = PPC970_First;
+  let TSFlags{1}   = PPC970_Single;
+  let TSFlags{2}   = PPC970_Cracked;
+  let TSFlags{5-3} = PPC970_Unit;
+}
+
+class PPC970_DGroup_First   { bits<1> PPC970_First = 1;  }
+class PPC970_DGroup_Single  { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo   { bits<3> PPC970_Unit = 0;   }
+class PPC970_Unit_FXU      { bits<3> PPC970_Unit = 1;   }
+class PPC970_Unit_LSU      { bits<3> PPC970_Unit = 2;   }
+class PPC970_Unit_FPU      { bits<3> PPC970_Unit = 3;   }
+class PPC970_Unit_CRU      { bits<3> PPC970_Unit = 4;   }
+class PPC970_Unit_VALU     { bits<3> PPC970_Unit = 5;   }
+class PPC970_Unit_VPERM    { bits<3> PPC970_Unit = 6;   }
+class PPC970_Unit_BRU      { bits<3> PPC970_Unit = 7;   }
+
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+            InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+  bits<24> LI;
+
+  let Inst{6-29}  = LI;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+  : I<opcode, OOL, IOL, asmstr, BrB> {
+  bits<7> BIBO;  // 2 bits of BI and 5 bits of BO.
+  bits<3>  CR;
+  bits<14> BD;
+
+  bits<5> BI;
+  let BI{0-1} = BIBO{5-6};
+  let BI{2-4} = CR{0-2};
+
+  let Inst{6-10}  = BIBO{4-0};
+  let Inst{11-15} = BI;
+  let Inst{16-29} = BD;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+}
+
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<5>  B;
+  bits<16> C;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = B;
+  let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<21> Addr;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = Addr{20-16}; // Base Reg
+  let Inst{16-31} = Addr{15-0};  // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<16> C;
+  bits<5>  B;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = B;
+  let Inst{16-31} = C;
+}
+
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<16> B;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = 0;
+  let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  B;
+  bits<5>  A;
+  bits<16> C;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = B;
+  let Inst{16-31} = C;
+}
+              
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+  : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+  let A = 0;
+  let Addr = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3>  BF;
+  bits<1>  L;
+  bits<5>  RA;
+  bits<16> I;
+
+  let Inst{6-8}   = BF;
+  let Inst{9}     = 0;
+  let Inst{10}    = L;
+  let Inst{11-15} = RA;
+  let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                  InstrItinClass itin>
+  : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+  let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin> 
+  : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                  InstrItinClass itin>
+  : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+  let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RST;
+  bits<19> DS_RA;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = RST;
+  let Inst{11-15} = DS_RA{18-14};  // Register #
+  let Inst{16-29} = DS_RA{13-0};   // Displacement.
+  let Inst{30-31} = xo;
+}
+
+class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+   bits<5>  RST;
+   bits<14> DS;
+   bits<5>  RA;
+ 
+   let Pattern = pattern;
+   
+   let Inst{6-10}  = RST;
+   let Inst{11-15} = RA;
+   let Inst{16-29} = DS;
+   let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RST;
+  bits<5> A;
+  bits<5> B;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+        <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+        InstrItinClass itin> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> A;
+  bits<5> RST;
+  bits<5> B;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+    let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+  let B = 0;
+  let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<1> L; 
+  bits<5> RA;
+  bits<5> RB;
+  
+  let Inst{6-8}   = BF;
+  let Inst{9}     = 0;
+  let Inst{10}    = L;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin>
+  : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+  let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<5> FRA;
+  bits<5> FRB;
+  
+  let Inst{6-8}   = BF;
+  let Inst{9-10}  = 0;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+  let Inst{6-10}  = 31;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+               string asmstr, InstrItinClass itin, list<dag> pattern> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+  let Inst{6-10}  = 0;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1.  42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+  bits<5> FM;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FM;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, 
+                      InstrItinClass itin, list<dag> pattern>
+  : I<31, OOL, IOL, asmstr, itin> {
+  bits<5> A;
+  bits<5> B;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = immfield;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      InstrItinClass itin, list<dag> pattern>
+  : I<31, OOL, IOL, asmstr, itin> {
+  bits<1> T;
+  bits<2> STRM;
+  bits<5> A;
+  bits<5> B;
+
+  let Pattern = pattern;
+
+  let Inst{6}     = T;
+  let Inst{7-8}   = 0;
+  let Inst{9-10}  = STRM;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> CRD;
+  bits<5> CRA;
+  bits<5> CRB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = CRD;
+  let Inst{11-15} = CRA;
+  let Inst{16-20} = CRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> CRD;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = CRD;
+  let Inst{11-15} = CRD;
+  let Inst{16-20} = CRD;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr, 
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> BO;
+  bits<5> BI;
+  bits<2> BH;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = BO;
+  let Inst{11-15} = BI;
+  let Inst{16-18} = 0;
+  let Inst{19-20} = BH;
+  let Inst{21-30} = xo;
+  let Inst{31}    = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+                  dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+  bits<7> BIBO;  // 2 bits of BI and 5 bits of BO.
+  bits<3>  CR;
+  
+  let BO = BIBO{2-6};
+  let BI{0-1} = BIBO{0-1};
+  let BI{2-4} = CR;
+  let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo,  bits<5> bi, bit lk,
+                  dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+  let BO = bo;
+  let BI = bi;
+  let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<3> BFA;
+  
+  let Inst{6-8}   = BF;
+  let Inst{9-10}  = 0;
+  let Inst{11-13} = BFA;
+  let Inst{14-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RT;
+  bits<10> SPR;
+
+  let Inst{6-10}  = RT;
+  let Inst{11}    = SPR{4};
+  let Inst{12}    = SPR{3};
+  let Inst{13}    = SPR{2};
+  let Inst{14}    = SPR{1};
+  let Inst{15}    = SPR{0};
+  let Inst{16}    = SPR{9};
+  let Inst{17}    = SPR{8};
+  let Inst{18}    = SPR{7};
+  let Inst{19}    = SPR{6};
+  let Inst{20}    = SPR{5};
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr, 
+                   dag OOL, dag IOL, string asmstr, InstrItinClass itin> 
+  : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+  let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RT;
+   
+  let Inst{6-10}  = RT;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<8>  FXM;
+  bits<5>  ST;
+   
+  let Inst{6-10}  = ST;
+  let Inst{11}    = 0;
+  let Inst{12-19} = FXM;
+  let Inst{20}    = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  ST;
+  bits<8>  FXM;
+   
+  let Inst{6-10}  = ST;
+  let Inst{11}    = 1;
+  let Inst{12-19} = FXM;
+  let Inst{20}    = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+  : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr, 
+                    dag OOL, dag IOL, string asmstr, InstrItinClass itin> 
+  : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+  let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      string cstr, InstrItinClass itin, list<dag>pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<8> FM;
+  bits<5> RT;
+
+  bit RC = 0;    // set by isDOT
+  let Pattern = pattern;
+  let Constraints = cstr;
+
+  let Inst{6} = 0;
+  let Inst{7-14}  = FM;
+  let Inst{15} = 0;
+  let Inst{16-20} = RT;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> A;
+  bits<5> RS;
+  bits<6> SH;
+
+  bit RC = 0;    // set by isDOT
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = A;
+  let Inst{16-20} = SH{4,3,2,1,0};
+  let Inst{21-29} = xo;
+  let Inst{30}    = SH{5};
+  let Inst{31}    = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RT;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RT;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21}    = oe;
+  let Inst{22-30} = xo;
+  let Inst{31}    = RC;  
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe, 
+               dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+  let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRC;
+  bits<5> FRB;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-25} = FRC;
+  let Inst{26-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRB = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RA;
+  bits<5> RS;
+  bits<5> RB;
+  bits<5> MB;
+  bits<5> ME;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-25} = MB;
+  let Inst{26-30} = ME;
+  let Inst{31}    = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RA;
+  bits<5> RS;
+  bits<6> SH;
+  bits<6> MBE;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = SH{4,3,2,1,0};
+  let Inst{21-26} = MBE{4,3,2,1,0,5};
+  let Inst{27-29} = xo;
+  let Inst{30}    = SH{5};
+  let Inst{31}    = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VC;
+  bits<5> VB;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21-25} = VC;
+  let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  bits<5> VC;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21-25} = VC;
+  let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  bits<4> SH;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21}    = 0;
+  let Inst{22-25} = SH;
+  let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+  let VA = VD;
+  let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> IMM;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = IMM;
+  let Inst{16-20} = 0;
+  let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = 0;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  bit RC = 0;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21}    = RC;
+  let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+    : I<0, OOL, IOL, asmstr, NoItinerary> {
+  let PPC64 = 0;
+  let Pattern = pattern;
+  let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 000000000000..143444fdc22b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,655 @@
+//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCHazardRecognizers.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+}
+
+using namespace llvm;
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+  : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
+  const TargetMachine *TM,
+  const ScheduleDAG *DAG) const {
+  // Should use subtarget info to pick the right hazard recognizer.  For
+  // now, always return a PPC970 recognizer.
+  const TargetInstrInfo *TII = TM->getInstrInfo();
+  assert(TII && "No InstrInfo?");
+  return new PPCHazardRecognizer970(*TII);
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                           int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case PPC::LD:
+  case PPC::LWZ:
+  case PPC::LFS:
+  case PPC::LFD:
+    if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+        MI->getOperand(2).isFI()) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                          int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case PPC::STD:
+  case PPC::STW:
+  case PPC::STFS:
+  case PPC::STFD:
+    if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+        MI->getOperand(2).isFI()) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero.  We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+  MachineFunction &MF = *MI->getParent()->getParent();
+
+  // Normal instructions can be commuted the obvious way.
+  if (MI->getOpcode() != PPC::RLWIMI)
+    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+
+  // Cannot commute if it has a non-zero rotate count.
+  if (MI->getOperand(3).getImm() != 0)
+    return 0;
+
+  // If we have a zero rotate count, we have:
+  //   M = mask(MB,ME)
+  //   Op0 = (Op1 & ~M) | (Op2 & M)
+  // Change this to:
+  //   M = mask((ME+1)&31, (MB-1)&31)
+  //   Op0 = (Op2 & ~M) | (Op1 & M)
+
+  // Swap op1/op2
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+  unsigned Reg2 = MI->getOperand(2).getReg();
+  bool Reg1IsKill = MI->getOperand(1).isKill();
+  bool Reg2IsKill = MI->getOperand(2).isKill();
+  bool ChangeReg0 = false;
+  // If machine instrs are no longer in two-address forms, update
+  // destination register as well.
+  if (Reg0 == Reg1) {
+    // Must be two address instruction!
+    assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+           "Expecting a two-address instruction!");
+    Reg2IsKill = false;
+    ChangeReg0 = true;
+  }
+
+  // Masks.
+  unsigned MB = MI->getOperand(4).getImm();
+  unsigned ME = MI->getOperand(5).getImm();
+
+  if (NewMI) {
+    // Create a new instruction.
+    unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+    bool Reg0IsDead = MI->getOperand(0).isDead();
+    return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+      .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+      .addReg(Reg2, getKillRegState(Reg2IsKill))
+      .addReg(Reg1, getKillRegState(Reg1IsKill))
+      .addImm((ME+1) & 31)
+      .addImm((MB-1) & 31);
+  }
+
+  if (ChangeReg0)
+    MI->getOperand(0).setReg(Reg2);
+  MI->getOperand(2).setReg(Reg1);
+  MI->getOperand(1).setReg(Reg2);
+  MI->getOperand(2).setIsKill(Reg1IsKill);
+  MI->getOperand(1).setIsKill(Reg2IsKill);
+
+  // Swap the mask around.
+  MI->getOperand(4).setImm((ME+1) & 31);
+  MI->getOperand(5).setImm((MB-1) & 31);
+  return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastInst->getOpcode() == PPC::B) {
+      if (!LastInst->getOperand(0).isMBB())
+        return true;
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (LastInst->getOpcode() == PPC::BCC) {
+      if (!LastInst->getOperand(2).isMBB())
+        return true;
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(2).getMBB();
+      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst->getOperand(1));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with PPC::B and PPC:BCC, handle it.
+  if (SecondLastInst->getOpcode() == PPC::BCC &&
+      LastInst->getOpcode() == PPC::B) {
+    if (!SecondLastInst->getOperand(2).isMBB() ||
+        !LastInst->getOperand(0).isMBB())
+      return true;
+    TBB =  SecondLastInst->getOperand(2).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    Cond.push_back(SecondLastInst->getOperand(1));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two PPC:Bs, handle it.  The second one is not
+  // executed, so remove it.
+  if (SecondLastInst->getOpcode() == PPC::B &&
+      LastInst->getOpcode() == PPC::B) {
+    if (!SecondLastInst->getOperand(0).isMBB())
+      return true;
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (I->getOpcode() != PPC::BCC)
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                           MachineBasicBlock *FBB,
+                           const SmallVectorImpl<MachineOperand> &Cond,
+                           DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "PPC branch conditions have two components!");
+
+  // One-way branch.
+  if (FBB == 0) {
+    if (Cond.empty())   // Unconditional branch
+      BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+    else                // Conditional branch
+      BuildMI(&MBB, DL, get(PPC::BCC))
+        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+    return 1;
+  }
+
+  // Two-way Conditional Branch.
+  BuildMI(&MBB, DL, get(PPC::BCC))
+    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
+  return 2;
+}
+
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DestReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  unsigned Opc;
+  if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::OR;
+  else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::OR8;
+  else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::FMR;
+  else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::MCRF;
+  else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::VOR;
+  else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::CROR;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  const MCInstrDesc &MCID = get(Opc);
+  if (MCID.getNumOperands() == 3)
+    BuildMI(MBB, I, DL, MCID, DestReg)
+      .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+  else
+    BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+                                  unsigned SrcReg, bool isKill,
+                                  int FrameIdx,
+                                  const TargetRegisterClass *RC,
+                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
+  DebugLoc DL;
+  if (RC == PPC::GPRCRegisterClass) {
+    if (SrcReg != PPC::LR) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                         .addReg(SrcReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    } else {
+      // FIXME: this spills LR immediately to memory in one step.  To do this,
+      // we use R11, which we know cannot be used in the prolog/epilog.  This is
+      // a hack.
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                         .addReg(PPC::R11,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    }
+  } else if (RC == PPC::G8RCRegisterClass) {
+    if (SrcReg != PPC::LR8) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+                                         .addReg(SrcReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    } else {
+      // FIXME: this spills LR immediately to memory in one step.  To do this,
+      // we use R11, which we know cannot be used in the prolog/epilog.  This is
+      // a hack.
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+                                         .addReg(PPC::X11,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    }
+  } else if (RC == PPC::F8RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+  } else if (RC == PPC::F4RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+  } else if (RC == PPC::CRRCRegisterClass) {
+    if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+        (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+      // FIXME (64-bit): Enable
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+                                         .addReg(SrcReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+      return true;
+    } else {
+      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+      // it's possible for the stack frame to be so big the save location is
+      // out of range of immediate offsets, necessitating another register.
+      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+      // at the moment.
+
+      // We need to store the CR in the low 4-bits of the saved value.  First,
+      // issue a MFCR to save all of the CRBits.
+      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                           PPC::R2 : PPC::R0;
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+                               .addReg(SrcReg, getKillRegState(isKill)));
+
+      // If the saved register wasn't CR0, shift the bits left so that they are
+      // in CR0's slot.
+      if (SrcReg != PPC::CR0) {
+        unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+        // rlwinm scratch, scratch, ShiftBits, 0, 31.
+        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                       .addReg(ScratchReg).addImm(ShiftBits)
+                       .addImm(0).addImm(31));
+      }
+
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                         .addReg(ScratchReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    }
+  } else if (RC == PPC::CRBITRCRegisterClass) {
+    // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
+    // backend currently only uses CR1EQ as an individual bit, this should
+    // not cause any bug. If we need other uses of CR bits, the following
+    // code may be invalid.
+    unsigned Reg = 0;
+    if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+        SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+      Reg = PPC::CR0;
+    else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+             SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+      Reg = PPC::CR1;
+    else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+             SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+      Reg = PPC::CR2;
+    else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+             SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+      Reg = PPC::CR3;
+    else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+             SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+      Reg = PPC::CR4;
+    else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+             SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+      Reg = PPC::CR5;
+    else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+             SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+      Reg = PPC::CR6;
+    else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+             SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+      Reg = PPC::CR7;
+
+    return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+                               PPC::CRRCRegisterClass, NewMIs);
+
+  } else if (RC == PPC::VRRCRegisterClass) {
+    // We don't have indexed addressing for vector loads.  Emit:
+    // R0 = ADDI FI#
+    // STVX VAL, 0, R0
+    //
+    // FIXME: We use R0 here, because it isn't available for RA.
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+                                       FrameIdx, 0, 0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addReg(PPC::R0)
+                     .addReg(PPC::R0));
+  } else {
+    llvm_unreachable("Unknown regclass!");
+  }
+
+  return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MI,
+                                  unsigned SrcReg, bool isKill, int FrameIdx,
+                                  const TargetRegisterClass *RC,
+                                  const TargetRegisterInfo *TRI) const {
+  MachineFunction &MF = *MBB.getParent();
+  SmallVector<MachineInstr*, 4> NewMIs;
+
+  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
+    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+    FuncInfo->setSpillsCR();
+  }
+
+  for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+    MBB.insert(MI, NewMIs[i]);
+
+  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+  NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+void
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+                                   unsigned DestReg, int FrameIdx,
+                                   const TargetRegisterClass *RC,
+                                   SmallVectorImpl<MachineInstr*> &NewMIs)const{
+  if (RC == PPC::GPRCRegisterClass) {
+    if (DestReg != PPC::LR) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                                 DestReg), FrameIdx));
+    } else {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                                 PPC::R11), FrameIdx));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
+    }
+  } else if (RC == PPC::G8RCRegisterClass) {
+    if (DestReg != PPC::LR8) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+                                         FrameIdx));
+    } else {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
+                                                 PPC::R11), FrameIdx));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+    }
+  } else if (RC == PPC::F8RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+                                       FrameIdx));
+  } else if (RC == PPC::F4RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+                                       FrameIdx));
+  } else if (RC == PPC::CRRCRegisterClass) {
+    // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+    // it's possible for the stack frame to be so big the save location is
+    // out of range of immediate offsets, necessitating another register.
+    // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+    // at the moment.
+    unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                          PPC::R2 : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                       ScratchReg), FrameIdx));
+
+    // If the reloaded register isn't CR0, shift the bits right so that they are
+    // in the right CR's slot.
+    if (DestReg != PPC::CR0) {
+      unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+      // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                    .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+                    .addImm(31));
+    }
+
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+                     .addReg(ScratchReg));
+  } else if (RC == PPC::CRBITRCRegisterClass) {
+
+    unsigned Reg = 0;
+    if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
+        DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
+      Reg = PPC::CR0;
+    else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
+             DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
+      Reg = PPC::CR1;
+    else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
+             DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
+      Reg = PPC::CR2;
+    else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
+             DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
+      Reg = PPC::CR3;
+    else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
+             DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
+      Reg = PPC::CR4;
+    else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
+             DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
+      Reg = PPC::CR5;
+    else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
+             DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
+      Reg = PPC::CR6;
+    else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
+             DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
+      Reg = PPC::CR7;
+
+    return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+                                PPC::CRRCRegisterClass, NewMIs);
+
+  } else if (RC == PPC::VRRCRegisterClass) {
+    // We don't have indexed addressing for vector loads.  Emit:
+    // R0 = ADDI FI#
+    // Dest = LVX 0, R0
+    //
+    // FIXME: We use R0 here, because it isn't available for RA.
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+                                       FrameIdx, 0, 0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
+                     .addReg(PPC::R0));
+  } else {
+    llvm_unreachable("Unknown regclass!");
+  }
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned DestReg, int FrameIdx,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
+  MachineFunction &MF = *MBB.getParent();
+  SmallVector<MachineInstr*, 4> NewMIs;
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+  for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+    MBB.insert(MI, NewMIs[i]);
+
+  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+  NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+MachineInstr*
+PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
+  addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+  // Leave the CR# the same, but invert the condition.
+  Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+  return false;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be.  This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case PPC::INLINEASM: {       // Inline Asm: Variable size.
+    const MachineFunction *MF = MI->getParent()->getParent();
+    const char *AsmStr = MI->getOperand(0).getSymbolName();
+    return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+  }
+  case PPC::PROLOG_LABEL:
+  case PPC::EH_LABEL:
+  case PPC::GC_LABEL:
+  case PPC::DBG_VALUE:
+    return 0;
+  default:
+    return 4; // PowerPC instructions are all 4 bytes
+  }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 000000000000..90bacc96c87e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,149 @@
+//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_INSTRUCTIONINFO_H
+#define POWERPC32_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "PPCRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags.  These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+  // PPC970 Instruction Flags.  These flags describe the characteristics of the
+  // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+  // raw machine instructions.
+
+  /// PPC970_First - This instruction starts a new dispatch group, so it will
+  /// always be the first one in the group.
+  PPC970_First = 0x1,
+
+  /// PPC970_Single - This instruction starts a new dispatch group and
+  /// terminates it, so it will be the sole instruction in the group.
+  PPC970_Single = 0x2,
+
+  /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+  /// two dispatch pipes to be available to issue.
+  PPC970_Cracked = 0x4,
+
+  /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+  /// an instruction is issued to.
+  PPC970_Shift = 3,
+  PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+  /// These are the various PPC970 execution unit pipelines.  Each instruction
+  /// is one of these.
+  PPC970_Pseudo = 0 << PPC970_Shift,   // Pseudo instruction
+  PPC970_FXU    = 1 << PPC970_Shift,   // Fixed Point (aka Integer/ALU) Unit
+  PPC970_LSU    = 2 << PPC970_Shift,   // Load Store Unit
+  PPC970_FPU    = 3 << PPC970_Shift,   // Floating Point Unit
+  PPC970_CRU    = 4 << PPC970_Shift,   // Control Register Unit
+  PPC970_VALU   = 5 << PPC970_Shift,   // Vector ALU
+  PPC970_VPERM  = 6 << PPC970_Shift,   // Vector Permute Unit
+  PPC970_BRU    = 7 << PPC970_Shift    // Branch Unit
+};
+} // end namespace PPCII
+
+
+class PPCInstrInfo : public PPCGenInstrInfo {
+  PPCTargetMachine &TM;
+  const PPCRegisterInfo RI;
+
+  bool StoreRegToStackSlot(MachineFunction &MF,
+                           unsigned SrcReg, bool isKill, int FrameIdx,
+                           const TargetRegisterClass *RC,
+                           SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+                            unsigned DestReg, int FrameIdx,
+                            const TargetRegisterClass *RC,
+                            SmallVectorImpl<MachineInstr*> &NewMIs) const;
+public:
+  explicit PPCInstrInfo(PPCTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+  ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                               int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI,
+                              int &FrameIndex) const;
+
+  // commuteInstruction - We can commute rlwimi instructions, but only if the
+  // rotate amt is zero.  We also have to munge the immediates a bit.
+  virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+
+  // Branch analysis.
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// GetInstSize - Return the number of bytes of code the specified
+  /// instruction may be.  This returns the maximum number of bytes.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 000000000000..773578c6ea81
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1476 @@
+//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                         SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm   : SDTypeProfile<1, 3, [
+  SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+  SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+  SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+  SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+  SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+def SDT_PPCnop : SDTypeProfile<0, 0, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfcfid  : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+                       [SDNPHasChain, SDNPMayStore]>;
+
+// This sequence is used for long double->int conversions.  It changes the
+// bits in the FPSCR which is not modelled.  
+def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+                        [SDNPOutGlue]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInGlue, SDNPOutGlue]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+                       [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
+                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+                        SDTCisVT<3, f64>]>,
+                       [SDNPInGlue]>;
+
+def PPCfsel   : SDNode<"PPCISD::FSEL",  
+   // Type constraint for fsel.
+   SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, 
+                        SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi       : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo       : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts.  These nodes are generated by the multi-precision shift code.
+def PPCsrl        : SDNode<"PPCISD::SRL"       , SDTIntShiftOp>;
+def PPCsra        : SDNode<"PPCISD::SRA"       , SDTIntShiftOp>;
+def PPCshl        : SDNode<"PPCISD::SHL"       , SDTIntShiftOp>;
+
+def PPCextsw_32   : SDNode<"PPCISD::EXTSW_32"  , SDTIntUnaryOp>;
+def PPCstd_32     : SDNode<"PPCISD::STD_32"    , SDTStore,
+                           [SDNPHasChain, SDNPMayStore]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                             SDNPVariadic]>;
+def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
+                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
+                          [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
+                            [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+
+def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                             SDNPVariadic]>;
+
+def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def PPClbrx       : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+                           [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx      : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+                           [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support atomic operations
+def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx,
+                          [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx      : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+                          [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp  : SDTypeProfile<1, 2, []>;
+def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+  // Transformation function: 31 - imm
+  return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+  // Transformation function: 32 - imm
+  return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+  // Transformation function: get the low 16 bits.
+  return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  signed int Val = N->getZExtValue();
+  return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+  // Transformation function: get the start bit of a mask
+  unsigned mb = 0, me;
+  (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+  return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+  // Transformation function: get the end bit of a mask
+  unsigned mb, me = 0;
+  (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+  return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+  // maskImm predicate - True if immediate is a run of ones.
+  unsigned mb, me;
+  if (N->getValueType(0) == MVT::i32)
+    return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+  else
+    return false;
+}]>;
+
+def immSExt16  : PatLeaf<(imm), [{
+  // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.  Used by instructions like 'addi'.
+  if (N->getValueType(0) == MVT::i32)
+    return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+  else
+    return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+def immZExt16  : PatLeaf<(imm), [{
+  // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+  // field.  Used by instructions like 'ori'.
+  return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero.  There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt.  These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+  // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+  // immediate are set.  Used by instructions like 'xoris'.
+  return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+  // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+  // immediate are set.  Used by instructions like 'addis'.  Identical to 
+  // imm16ShiftedZExt in 32-bit mode.
+  if (N->getZExtValue() & 0xFFFF) return false;
+  if (N->getValueType(0) == MVT::i32)
+    return true;
+  // For 64-bit, make sure it is sext right.
+  return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT   {
+  list<Register> Defs = [CR0];
+  bit RC  = 1;
+}
+
+class RegConstraint<string C> {
+  string Constraints = C;
+}
+class NoEncode<string E> {
+  string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm   : Operand<i32> {
+  let PrintMethod = "printS5ImmOperand";
+}
+def u5imm   : Operand<i32> {
+  let PrintMethod = "printU5ImmOperand";
+}
+def u6imm   : Operand<i32> {
+  let PrintMethod = "printU6ImmOperand";
+}
+def s16imm  : Operand<i32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+def u16imm  : Operand<i32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+def s16immX4  : Operand<i32> {   // Multiply imm by 4 before printing.
+  let PrintMethod = "printS16X4ImmOperand";
+}
+def directbrtarget : Operand<OtherVT> {
+  let PrintMethod = "printBranchOperand";
+  let EncoderMethod = "getDirectBrEncoding";
+}
+def condbrtarget : Operand<OtherVT> {
+  let PrintMethod = "printBranchOperand";
+  let EncoderMethod = "getCondBrEncoding";
+}
+def calltarget : Operand<iPTR> {
+  let EncoderMethod = "getDirectBrEncoding";
+}
+def aaddr : Operand<iPTR> {
+  let PrintMethod = "printAbsAddrOperand";
+}
+def symbolHi: Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+  let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo: Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+  let EncoderMethod = "getLO16Encoding";
+}
+def crbitm: Operand<i8> {
+  let PrintMethod = "printcrbitm";
+  let EncoderMethod = "get_crbitm_encoding";
+}
+// Address operands
+def memri : Operand<iPTR> {
+  let PrintMethod = "printMemRegImm";
+  let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let EncoderMethod = "getMemRIEncoding";
+}
+def memrr : Operand<iPTR> {
+  let PrintMethod = "printMemRegReg";
+  let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
+  let PrintMethod = "printMemRegImmShifted";
+  let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let EncoderMethod = "getMemRIXEncoding";
+}
+def tocentry : Operand<iPTR> {
+  let MIOperandInfo = (ops i32imm:$imm);
+}
+
+// PowerPC Predicate operand.  20 = (0<<5)|20 = always, CR0 is a dummy reg
+// that doesn't matter.
+def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
+                                     (ops (i32 20), (i32 zero_reg))> {
+  let PrintMethod = "printPredicateOperand";
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr  : ComplexPattern<iPTR, 2, "SelectAddrImm",    [], []>;
+def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",    [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
+def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
+                              [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
+                              [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+                              "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
+                       [(set GPRC:$result,
+                             (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+                         
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1,    // Expanded after instruction selection.
+    PPC970_Single = 1 in {
+  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_F8  : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+                     "", []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+  let isReturn = 1, Uses = [LR, RM] in
+    def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
+                          "b${p:cc}lr ${p:reg}", BrB, 
+                          [(retflag)]>;
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+    def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+}
+
+let Defs = [LR] in
+  def MovePCtoLR : Pseudo<(outs), (ins), "", []>,
+                   PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+  let isBarrier = 1 in {
+  def B   : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+                  "b $dst", BrB,
+                  [(br bb:$dst)]>;
+  }
+
+  // BCC represents an arbitrary conditional branch on a predicate.
+  // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+  // a two-value operand where a dag node expects two operands. :( 
+  def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+                  "b${cond:cc} ${cond:reg}, $dst"
+                  /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+}
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the non-callee saved registers...
+  Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR,CTR,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL_Darwin  : IForm<18, 0, 1,
+                           (outs), (ins calltarget:$func, variable_ops), 
+                           "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA_Darwin : IForm<18, 1, 1, 
+                          (outs), (ins aaddr:$func, variable_ops),
+                          "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
+  }
+  let Uses = [CTR, RM] in {
+    def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+  }
+}
+
+// SVR4 ABI Calls.
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the non-callee saved registers...
+  Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR,CTR,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL_SVR4  : IForm<18, 0, 1,
+                        (outs), (ins calltarget:$func, variable_ops), 
+                        "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA_SVR4 : IForm<18, 1, 1,
+                        (outs), (ins aaddr:$func, variable_ops),
+                        "bla $func", BrB,
+                        [(PPCcall_SVR4 (i32 imm:$func))]>;
+  }
+  let Uses = [CTR, RM] in {
+    def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
+                                (outs), (ins variable_ops),
+                                "bctrl", BrB,
+                                [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+  }
+}
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+                        (ins calltarget:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNd $dst $offset",
+                 []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+                 "#TC_RETURNa $func $offset",
+                 [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNr $dst $offset",
+                 []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+    isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM]  in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+     Requires<[In32BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB   : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+                  "b $dst", BrB,
+                  []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+                  "ba $dst", BrB,
+                  []>;
+
+
+// DCB* instructions.
+def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst),
+                      "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBF   : DCB_Form<86, 0, (outs), (ins memrr:$dst),
+                      "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBI   : DCB_Form<470, 0, (outs), (ins memrr:$dst),
+                      "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBST  : DCB_Form<54, 0, (outs), (ins memrr:$dst),
+                      "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBT   : DCB_Form<278, 0, (outs), (ins memrr:$dst),
+                      "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
+                      "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBZ   : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
+                      "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
+                      "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+  let Defs = [CR0] in {
+    def ATOMIC_LOAD_ADD_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_AND_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_OR_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_ADD_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_AND_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_OR_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_ADD_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_AND_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_OR_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+
+    def ATOMIC_CMP_SWAP_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      [(set GPRC:$dst, 
+                    (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+    def ATOMIC_CMP_SWAP_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      [(set GPRC:$dst, 
+                    (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+    def ATOMIC_CMP_SWAP_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      [(set GPRC:$dst, 
+                    (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+
+    def ATOMIC_SWAP_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+    def ATOMIC_SWAP_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+    def ATOMIC_SWAP_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+  }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$src),
+                   "lwarx $rD, $src", LdStLWARX,
+                   [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stwcx. $rS, $dst", LdStSTWCX,
+                   [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+                   isDOT;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads. 
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+                  "lbz $rD, $src", LdStGeneral,
+                  [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+                  "lha $rD, $src", LdStLHA,
+                  [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+                  PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+                  "lhz $rD, $src", LdStGeneral,
+                  [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+                  "lwz $rD, $src", LdStGeneral,
+                  [(set GPRC:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+                  "lfs $rD, $src", LdStLFDU,
+                  [(set F4RC:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+                  "lfd $rD, $src", LdStLFD,
+                  [(set F8RC:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lbzu $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lhau $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lhzu $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lwzu $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                  "lfs $rD, $addr", LdStLFDU,
+                  []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                  "lfd $rD, $addr", LdStLFD,
+                  []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31,  87, (outs GPRC:$rD), (ins memrr:$src),
+                   "lbzx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+                   "lhax $rD, $src", LdStLHA,
+                   [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+                   PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+                   "lhzx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
+                   "lwzx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (load xaddr:$src))]>;
+                   
+                   
+def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+                   "lhbrx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
+                   "lwbrx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+
+def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+                      "lfsx $frD, $src", LdStLFDU,
+                      [(set F4RC:$frD, (load xaddr:$src))]>;
+def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+                      "lfdx $frD, $src", LdStLFDU,
+                      [(set F8RC:$frD, (load xaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB  : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+                   "stb $rS, $src", LdStGeneral,
+                   [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+def STH  : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+                   "sth $rS, $src", LdStGeneral,
+                   [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+def STW  : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+                   "stw $rS, $src", LdStGeneral,
+                   [(store GPRC:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+                   "stfs $rS, $dst", LdStUX,
+                   [(store F4RC:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+                   "stfd $rS, $dst", LdStUX,
+                   [(store F8RC:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2 in {
+def STBU  : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                          (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, 
+                                         iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU  : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                        (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, 
+                                        iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU  : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, 
+                                                     iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res, (pre_store F4RC:$rS,  ptr_rc:$ptrreg, 
+                                          iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, 
+                                          iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+
+// Indexed (r+r) Stores.
+//
+let PPC970_Unit = 2 in {
+def STBX  : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stbx $rS, $dst", LdStGeneral,
+                   [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STHX  : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "sthx $rS, $dst", LdStGeneral,
+                   [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STWX  : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stwx $rS, $dst", LdStGeneral,
+                   [(store GPRC:$rS, xaddr:$dst)]>,
+                   PPC970_DGroup_Cracked;
+                   
+let mayStore = 1 in {
+def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
+                   "stwux $rS, $rA, $rB", LdStGeneral,
+                   []>;
+}
+def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "sthbrx $rS, $dst", LdStGeneral,
+                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, 
+                   PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stwbrx $rS, $dst", LdStGeneral,
+                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+                   PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+                     "stfiwx $frS, $dst", LdStUX,
+                     [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+                     
+def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+                     "stfsx $frS, $dst", LdStUX,
+                     [(store F4RC:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+                     "stfdx $frS, $dst", LdStUX,
+                     [(store F8RC:$frS, xaddr:$dst)]>;
+}
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins),
+                        "sync", LdStSync,
+                        [(int_ppc_sync)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "addi $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "addic $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+                     PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "addic. $rD, $rA, $imm", IntGeneral,
+                     []>;
+}
+def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+                     "addis $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
+def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+                     "la $rD, $sym($rA)", IntGeneral,
+                     [(set GPRC:$rD, (add GPRC:$rA,
+                                          (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI  : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "mulli $rD, $rA, $imm", IntMulLI,
+                     [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "subfic $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+}
+
+let isReMaterializable = 1 in {
+  def LI  : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+                       "li $rD, $imm", IntGeneral,
+                       [(set GPRC:$rD, immSExt16:$imm)]>;
+  def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+                       "lis $rD, $imm", IntGeneral,
+                       [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "andi. $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+                    isDOT;
+def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "andis. $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+                    isDOT;
+def ORI   : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "ori $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+def ORIS  : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "oris $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI  : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "xori $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "xoris $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+def NOP   : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+                         []>;
+def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+                        "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                         "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "nand $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+def AND  : XForm_6<31,  28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "and $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+def ANDC : XForm_6<31,  60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "andc $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+def OR   : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+def NOR  : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "nor $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+def ORC  : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "orc $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+def EQV  : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "eqv $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+def XOR  : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "xor $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+def SLW  : XForm_6<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "slw $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+def SRW  : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "srw $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "sraw $rA, $rS, $rB", IntShift,
+                   [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+}
+}
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+let Defs = [CARRY] in {
+def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
+                     "srawi $rA, $rS, $SH", IntShift,
+                     [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+}
+def CNTLZW : XForm_11<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "cntlzw $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+def EXTSB  : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "extsb $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+def EXTSH  : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "extsh $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+
+def CMPW   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+                          "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+                          "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in {  // FPU Operations.
+//def FCMPO  : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+//                      "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+                      "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+                      "fcmpu $crD, $fA, $fB", FPCompare>;
+
+let Uses = [RM] in {
+  def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fctiwz $frD, $frB", FPGeneral,
+                        [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+  def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+                        "frsp $frD, $frB", FPGeneral,
+                        [(set F4RC:$frD, (fround F8RC:$frB))]>;
+  def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fsqrt $frD, $frB", FPSqrt,
+                        [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+  def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "fsqrts $frD, $frB", FPSqrt,
+                        [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+  }
+}
+
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMR   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+                     "fmr $frD, $frB", FPGeneral,
+                     []>,  // (set F4RC:$frD, F4RC:$frB)
+                     PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in {  // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS  : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+                      "fabs $frD, $frB", FPGeneral,
+                      [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+def FABSD  : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fabs $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+                      "fnabs $frD, $frB", FPGeneral,
+                      [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fnabs $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+def FNEGS  : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+                      "fneg $frD, $frB", FPGeneral,
+                      [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+def FNEGD  : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fneg $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+}
+                      
+
+// XL-Form instructions.  condition register logical ops.
+//
+def MCRF   : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+                      "mcrf $BF, $BFA", BrMCR>,
+             PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV  : XLForm_1<19, 289, (outs CRBITRC:$CRD),
+                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+                      "creqv $CRD, $CRA, $CRB", BrCR,
+                      []>;
+
+def CROR  : XLForm_1<19, 449, (outs CRBITRC:$CRD),
+                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+                      "cror $CRD, $CRA, $CRB", BrCR,
+                      []>;
+
+def CRSET  : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+              "creqv $dst, $dst, $dst", BrCR,
+              []>;
+
+// XFX-Form instructions.  Instructions that deal with SPRs.
+//
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+                          "mfctr $rT", SprMFSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+                          "mtctr $rS", SprMTSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+                          "mtlr $rS", SprMTSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR  : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+                          "mflr $rT", SprMFSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970.  As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+                             "mtspr 256, $rS", IntGeneral>,
+               PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+                             "mfspr $rT, 256", IntGeneral>,
+               PPC970_DGroup_First, PPC970_Unit_FXU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+                      "mtcrf $FXM, $rS", BrMCRX>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
+//  vreg = MCRF  CR0
+//  MFCR  <kill of whatever preg got assigned to vreg>
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it.  Represent this in the
+// instruction to keep the register allocator from becoming confused.
+//
+// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+                       "", SprMFCR>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+            
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+                       "mfcr $rT, $FXM", SprMFCR>,
+            PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// Instructions to manipulate FPSCR.  Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+let Uses = [RM], Defs = [RM] in { 
+  def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+                         "mtfsb0 $FM", IntMTFSB0,
+                        [(PPCmtfsb0 (i32 imm:$FM))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+  def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+                         "mtfsb1 $FM", IntMTFSB0,
+                        [(PPCmtfsb1 (i32 imm:$FM))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+  // MTFSF does not actually produce an FP result.  We pretend it copies
+  // input reg B to the output.  If we didn't do this it would look like the
+  // instruction had no outputs (because we aren't modelling the FPSCR) and
+  // it would be deleted.
+  def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
+                                (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+                         "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+                         [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), 
+                                                     F8RC:$rT, F8RC:$FRB))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+  def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
+                         "mffs $rT", IntMFFS,
+                         [(set F8RC:$rT, (PPCmffs))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+  def FADDrtz: AForm_2<63, 21,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fadd $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+
+// XO-Form instructions.  Arithmetic instructions that can set overflow bit
+//
+def ADD4  : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "add $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def ADDC  : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "addc $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+                     PPC970_DGroup_Cracked;
+}
+def DIVW  : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "divw $rT, $rA, $rB", IntDivW,
+                     [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "divwu $rT, $rA, $rB", IntDivW,
+                     [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "mulhw $rT, $rA, $rB", IntMulHW,
+                     [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "mulhwu $rT, $rA, $rB", IntMulHWU,
+                     [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "mullw $rT, $rA, $rB", IntMulHW,
+                     [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+def SUBF  : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "subf $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+let Defs = [CARRY] in {
+def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "subfc $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+                     PPC970_DGroup_Cracked;
+}
+def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "neg $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                      "adde $rT, $rA, $rB", IntGeneral,
+                      [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "addme $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "addze $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                      "subfe $rT, $rA, $rB", IntGeneral,
+                      [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "subfme $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "subfze $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+}
+}
+
+// A-Form instructions.  Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in {  // FPU Operations.
+let Uses = [RM] in {
+  def FMADD : AForm_1<63, 29, 
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+                                             F8RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FMADDS : AForm_1<59, 29,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+                                             F4RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FMSUB : AForm_1<63, 28,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+                                             F8RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FMSUBS : AForm_1<59, 28,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+                                             F4RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FNMADD : AForm_1<63, 31,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+                                                   F8RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+  def FNMADDS : AForm_1<59, 31,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+                                                   F4RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+  def FNMSUB : AForm_1<63, 30,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+                                                   F8RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+  def FNMSUBS : AForm_1<59, 30,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+                                                   F4RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result.  To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+                    (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                    "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                    [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+                     (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                     "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                    [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+let Uses = [RM] in {
+  def FADD  : AForm_2<63, 21,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fadd $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+  def FADDS : AForm_2<59, 21,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fadds $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+  def FDIV  : AForm_2<63, 18,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fdiv $FRT, $FRA, $FRB", FPDivD,
+                      [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+  def FDIVS : AForm_2<59, 18,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fdivs $FRT, $FRA, $FRB", FPDivS,
+                      [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+  def FMUL  : AForm_3<63, 25,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fmul $FRT, $FRA, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+  def FMULS : AForm_3<59, 25,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fmuls $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+  def FSUB  : AForm_2<63, 20,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fsub $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+  def FSUBS : AForm_2<59, 20,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fsubs $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+  }
+}
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+// M-Form instructions.  rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+                     (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, 
+                      u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+                      []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+                      NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                     []>;
+def RLWINMo : MForm_2<21,
+                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                     []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM  : MForm_2<23,
+                     (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+                     "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+                     []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support.  Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+          (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not GPRC:$in),
+              (NOR GPRC:$in, GPRC:$in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add GPRC:$in, imm:$imm),
+          (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or GPRC:$in, imm:$imm),
+          (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor GPRC:$in, imm:$imm),
+          (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub  immSExt16:$imm, GPRC:$in),
+          (SUBFIC GPRC:$in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
+          (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
+          (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl GPRC:$in, GPRC:$sh),
+          (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
+def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
+          (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
+          (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
+          (BL_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
+          (BL_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
+          (BL_SVR4 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
+          (BL_SVR4 texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
+          (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+          (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+          (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS GPRC:$in, tglobaladdr:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS GPRC:$in, tconstpool:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS GPRC:$in, tjumptable:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS GPRC:$in, tblockaddress:$g)>;
+
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+          (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+          Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+          (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+          Requires<[FPContractions]>;
+
+// Standard shifts.  These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra GPRC:$rS, GPRC:$rB),
+          (SRAW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(srl GPRC:$rS, GPRC:$rB),
+          (SRW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(shl GPRC:$rS, GPRC:$rB),
+          (SLW GPRC:$rS, GPRC:$rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+          (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+          (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+          (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+          (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+          (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+          (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+          (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+          (LHZX xaddr:$src)>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+          (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+          (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend F4RC:$src)),
+          (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+
+// Memory barriers
+def : Pat<(membarrier (i32 imm /*ll*/),
+                      (i32 imm /*ls*/),
+                      (i32 imm /*sl*/),
+                      (i32 imm /*ss*/),
+                      (i32 imm /*device*/)),
+           (SYNC)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 000000000000..4590f0045641
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,468 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+  ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+  ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+  ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+  ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+   (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR)      \
+  ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+  ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+  ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16)    BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS)        BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK)       BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+  intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+  unsigned *AtI = (unsigned*)(intptr_t)At;
+
+  if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
+    AtI[0] = BUILD_B(Offset, isCall);     // b/bl target
+  } else if (!is64Bit) {
+    AtI[0] = BUILD_LIS(12, To >> 16);     // lis r12, hi16(address)
+    AtI[1] = BUILD_ORI(12, 12, To);       // ori r12, r12, lo16(address)
+    AtI[2] = BUILD_MTCTR(12);             // mtctr r12
+    AtI[3] = BUILD_BCTR(isCall);          // bctr/bctrl
+  } else {
+    AtI[0] = BUILD_LIS(12, To >> 48);      // lis r12, hi16(address)
+    AtI[1] = BUILD_ORI(12, 12, To >> 32);  // ori r12, r12, lo16(address)
+    AtI[2] = BUILD_SLDI(12, 12, 32);       // sldi r12, r12, 32
+    AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+    AtI[4] = BUILD_ORI(12, 12, To);        // ori r12, r12, lo16(address)
+    AtI[5] = BUILD_MTCTR(12);              // mtctr r12
+    AtI[6] = BUILD_BCTR(isCall);           // bctr/bctrl
+  }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+    !(defined(__ppc64__) || defined(__FreeBSD__))
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
+    // FIXME: need to save v[0-19] for altivec?
+    // FIXME: could shrink frame
+    // Set up a proper stack frame
+    // FIXME Layout
+    //   PowerPC32 ABI linkage    -  24 bytes
+    //                 parameters -  32 bytes
+    //   13 double registers      - 104 bytes
+    //   8 int registers          -  32 bytes
+    "mflr r0\n"
+    "stw r0,  8(r1)\n"
+    "stwu r1, -208(r1)\n"
+    // Save all int arg registers
+    "stw r10, 204(r1)\n"    "stw r9,  200(r1)\n"
+    "stw r8,  196(r1)\n"    "stw r7,  192(r1)\n"
+    "stw r6,  188(r1)\n"    "stw r5,  184(r1)\n"
+    "stw r4,  180(r1)\n"    "stw r3,  176(r1)\n"
+    // Save all call-clobbered FP regs.
+    "stfd f13, 168(r1)\n"   "stfd f12, 160(r1)\n"
+    "stfd f11, 152(r1)\n"   "stfd f10, 144(r1)\n"
+    "stfd f9,  136(r1)\n"   "stfd f8,  128(r1)\n"
+    "stfd f7,  120(r1)\n"   "stfd f6,  112(r1)\n"
+    "stfd f5,  104(r1)\n"   "stfd f4,   96(r1)\n"
+    "stfd f3,   88(r1)\n"   "stfd f2,   80(r1)\n"
+    "stfd f1,   72(r1)\n"
+    // Arguments to Compilation Callback:
+    // r3 - our lr (address of the call instruction in stub plus 4)
+    // r4 - stub's lr (address of instruction that called the stub plus 4)
+    // r5 - is64Bit - always 0.
+    "mr   r3, r0\n"
+    "lwz  r2, 208(r1)\n" // stub's frame
+    "lwz  r4, 8(r2)\n" // stub's lr
+    "li   r5, 0\n"       // 0 == 32 bit
+    "bl _PPCCompilationCallbackC\n"
+    "mtctr r3\n"
+    // Restore all int arg registers
+    "lwz r10, 204(r1)\n"    "lwz r9,  200(r1)\n"
+    "lwz r8,  196(r1)\n"    "lwz r7,  192(r1)\n"
+    "lwz r6,  188(r1)\n"    "lwz r5,  184(r1)\n"
+    "lwz r4,  180(r1)\n"    "lwz r3,  176(r1)\n"
+    // Restore all FP arg registers
+    "lfd f13, 168(r1)\n"    "lfd f12, 160(r1)\n"
+    "lfd f11, 152(r1)\n"    "lfd f10, 144(r1)\n"
+    "lfd f9,  136(r1)\n"    "lfd f8,  128(r1)\n"
+    "lfd f7,  120(r1)\n"    "lfd f6,  112(r1)\n"
+    "lfd f5,  104(r1)\n"    "lfd f4,   96(r1)\n"
+    "lfd f3,   88(r1)\n"    "lfd f2,   80(r1)\n"
+    "lfd f1,   72(r1)\n"
+    // Pop 3 frames off the stack and branch to target
+    "lwz  r1, 208(r1)\n"
+    "lwz  r2, 8(r1)\n"
+    "mtlr r2\n"
+    "bctr\n"
+    );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+    // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the 
+    // FIXME: need to save v[0-19] for altivec?
+    // FIXME: could shrink frame
+    // Set up a proper stack frame
+    // FIXME Layout
+    //   8 double registers       -  64 bytes
+    //   8 int registers          -  32 bytes
+    "mflr 0\n"
+    "stw 0,  4(1)\n"
+    "stwu 1, -104(1)\n"
+    // Save all int arg registers
+    "stw 10, 100(1)\n"   "stw 9,  96(1)\n"
+    "stw 8,  92(1)\n"    "stw 7,  88(1)\n"
+    "stw 6,  84(1)\n"    "stw 5,  80(1)\n"
+    "stw 4,  76(1)\n"    "stw 3,  72(1)\n"
+    // Save all call-clobbered FP regs.
+    "stfd 8,  64(1)\n"
+    "stfd 7,  56(1)\n"   "stfd 6,  48(1)\n"
+    "stfd 5,  40(1)\n"   "stfd 4,  32(1)\n"
+    "stfd 3,  24(1)\n"   "stfd 2,  16(1)\n"
+    "stfd 1,  8(1)\n"
+    // Arguments to Compilation Callback:
+    // r3 - our lr (address of the call instruction in stub plus 4)
+    // r4 - stub's lr (address of instruction that called the stub plus 4)
+    // r5 - is64Bit - always 0.
+    "mr   3, 0\n"
+    "lwz  5, 104(1)\n" // stub's frame
+    "lwz  4, 4(5)\n" // stub's lr
+    "li   5, 0\n"       // 0 == 32 bit
+    "bl PPCCompilationCallbackC\n"
+    "mtctr 3\n"
+    // Restore all int arg registers
+    "lwz 10, 100(1)\n"   "lwz 9,  96(1)\n"
+    "lwz 8,  92(1)\n"    "lwz 7,  88(1)\n"
+    "lwz 6,  84(1)\n"    "lwz 5,  80(1)\n"
+    "lwz 4,  76(1)\n"    "lwz 3,  72(1)\n"
+    // Restore all FP arg registers
+    "lfd 8,  64(1)\n"
+    "lfd 7,  56(1)\n"    "lfd 6,  48(1)\n"
+    "lfd 5,  40(1)\n"    "lfd 4,  32(1)\n"
+    "lfd 3,  24(1)\n"    "lfd 2,  16(1)\n"
+    "lfd 1,  8(1)\n"
+    // Pop 3 frames off the stack and branch to target
+    "lwz  1, 104(1)\n"
+    "lwz  0, 4(1)\n"
+    "mtlr 0\n"
+    "bctr\n"
+    );
+#else
+void PPC32CompilationCallback() {
+  llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+    defined(__ppc64__)
+#ifdef __ELF__
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl PPC64CompilationCallback\n"
+    ".section \".opd\",\"aw\"\n"
+    ".align 3\n"
+"PPC64CompilationCallback:\n"
+    ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+    ".size PPC64CompilationCallback,24\n"
+    ".previous\n"
+    ".align 4\n"
+    ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#else
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+#endif
+    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
+    // FIXME: need to save v[0-19] for altivec?
+    // Set up a proper stack frame
+    // Layout
+    //   PowerPC64 ABI linkage    -  48 bytes
+    //                 parameters -  64 bytes
+    //   13 double registers      - 104 bytes
+    //   8 int registers          -  64 bytes
+    "mflr 0\n"
+    "std  0,  16(1)\n"
+    "stdu 1, -280(1)\n"
+    // Save all int arg registers
+    "std 10, 272(1)\n"    "std 9,  264(1)\n"
+    "std 8,  256(1)\n"    "std 7,  248(1)\n"
+    "std 6,  240(1)\n"    "std 5,  232(1)\n"
+    "std 4,  224(1)\n"    "std 3,  216(1)\n"
+    // Save all call-clobbered FP regs.
+    "stfd 13, 208(1)\n"    "stfd 12, 200(1)\n"
+    "stfd 11, 192(1)\n"    "stfd 10, 184(1)\n"
+    "stfd 9,  176(1)\n"    "stfd 8,  168(1)\n"
+    "stfd 7,  160(1)\n"    "stfd 6,  152(1)\n"
+    "stfd 5,  144(1)\n"    "stfd 4,  136(1)\n"
+    "stfd 3,  128(1)\n"    "stfd 2,  120(1)\n"
+    "stfd 1,  112(1)\n"
+    // Arguments to Compilation Callback:
+    // r3 - our lr (address of the call instruction in stub plus 4)
+    // r4 - stub's lr (address of instruction that called the stub plus 4)
+    // r5 - is64Bit - always 1.
+    "mr   3, 0\n"      // return address (still in r0)
+    "ld   5, 280(1)\n" // stub's frame
+    "ld   4, 16(5)\n"  // stub's lr
+    "li   5, 1\n"      // 1 == 64 bit
+#ifdef __ELF__
+    "bl PPCCompilationCallbackC\n"
+    "nop\n"
+#else
+    "bl _PPCCompilationCallbackC\n"
+#endif
+    "mtctr 3\n"
+    // Restore all int arg registers
+    "ld 10, 272(1)\n"    "ld 9,  264(1)\n"
+    "ld 8,  256(1)\n"    "ld 7,  248(1)\n"
+    "ld 6,  240(1)\n"    "ld 5,  232(1)\n"
+    "ld 4,  224(1)\n"    "ld 3,  216(1)\n"
+    // Restore all FP arg registers
+    "lfd 13, 208(1)\n"    "lfd 12, 200(1)\n"
+    "lfd 11, 192(1)\n"    "lfd 10, 184(1)\n"
+    "lfd 9,  176(1)\n"    "lfd 8,  168(1)\n"
+    "lfd 7,  160(1)\n"    "lfd 6,  152(1)\n"
+    "lfd 5,  144(1)\n"    "lfd 4,  136(1)\n"
+    "lfd 3,  128(1)\n"    "lfd 2,  120(1)\n"
+    "lfd 1,  112(1)\n"
+    // Pop 3 frames off the stack and branch to target
+    "ld  1, 280(1)\n"
+    "ld  0, 16(1)\n"
+    "mtlr 0\n"
+    // XXX: any special TOC handling in the ELF case for JIT?
+    "bctr\n"
+    );
+#else
+void PPC64CompilationCallback() {
+  llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+                                         unsigned *OrigCallAddrPlus4,
+                                         bool is64Bit) {
+  // Adjust the pointer to the address of the call instruction in the stub
+  // emitted by emitFunctionStub, rather than the instruction after it.
+  unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+  unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+  void *Target = JITCompilerFunction(StubCallAddr);
+
+  // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+  // it to branch directly to the destination.  If so, rewrite it so it does not
+  // need to go through the stub anymore.
+  unsigned OrigCallInst = *OrigCallAddr;
+  if ((OrigCallInst >> 26) == 18) {     // Direct call.
+    intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+    
+    if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
+      // Clear the original target out.
+      OrigCallInst &= (63 << 26) | 3;
+      // Fill in the new target.
+      OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+      // Replace the call.
+      *OrigCallAddr = OrigCallInst;
+    }
+  }
+
+  // Assert that we are coming from a stub that was created with our
+  // emitFunctionStub.
+  if ((*StubCallAddr >> 26) == 18)
+    StubCallAddr -= 3;
+  else {
+  assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+    StubCallAddr -= is64Bit ? 9 : 6;
+  }
+
+  // Rewrite the stub with an unconditional branch to the target, for any users
+  // who took the address of the stub.
+  EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+  sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
+
+  // Put the address of the target function to call and the address to return to
+  // after calling the target function in a place that is easy to get on the
+  // stack after we restore all regs.
+  return Target;
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+  JITCompilerFunction = Fn;
+  return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+  // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+  // instructions to save the caller's address if this is a lazy-compilation
+  // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+  // into a register and jump through it.
+  StubLayout Result = {10*4, 4};
+  return Result;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE) {
+  // If this is just a call to an external function, emit a branch instead of a
+  // call.  The code is the same except for one bit of the last instruction.
+  if (Fn != (void*)(intptr_t)PPC32CompilationCallback && 
+      Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+    void *Addr = (void*)JCE.getCurrentPCValue();
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+    sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+    return Addr;
+  }
+
+  void *Addr = (void*)JCE.getCurrentPCValue();
+  if (is64Bit) {
+    JCE.emitWordBE(0xf821ffb1);     // stdu r1,-80(r1)
+    JCE.emitWordBE(0x7d6802a6);     // mflr r11
+    JCE.emitWordBE(0xf9610060);     // std r11, 96(r1)
+  } else if (TM.getSubtargetImpl()->isDarwinABI()){
+    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
+    JCE.emitWordBE(0x7d6802a6);     // mflr r11
+    JCE.emitWordBE(0x91610028);     // stw r11, 40(r1)
+  } else {
+    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
+    JCE.emitWordBE(0x7d6802a6);     // mflr r11
+    JCE.emitWordBE(0x91610024);     // stw r11, 36(r1)
+  }
+  intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+  sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+  return Addr;
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+    switch ((PPC::RelocationType)MR->getRelocationType()) {
+    default: llvm_unreachable("Unknown relocation type!");
+    case PPC::reloc_pcrel_bx:
+      // PC-relative relocation for b and bl instructions.
+      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+      assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+             "Relocation out of range!");
+      *RelocPos |= (ResultPtr & ((1 << 24)-1))  << 2;
+      break;
+    case PPC::reloc_pcrel_bcx:
+      // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+      // bcx instructions.
+      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+      assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+             "Relocation out of range!");
+      *RelocPos |= (ResultPtr & ((1 << 14)-1))  << 2;
+      break;
+    case PPC::reloc_absolute_high:     // high bits of ref -> low 16 of instr
+    case PPC::reloc_absolute_low: {    // low bits of ref  -> low 16 of instr
+      ResultPtr += MR->getConstantVal();
+
+      // If this is a high-part access, get the high-part.
+      if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+        // If the low part will have a carry (really a borrow) from the low
+        // 16-bits into the high 16, add a bit to borrow from.
+        if (((int)ResultPtr << 16) < 0)
+          ResultPtr += 1 << 16;
+        ResultPtr >>= 16;
+      }
+
+      // Do the addition then mask, so the addition does not overflow the 16-bit
+      // immediate section of the instruction.
+      unsigned LowBits  = (*RelocPos + ResultPtr) & 65535;
+      unsigned HighBits = *RelocPos & ~65535;
+      *RelocPos = LowBits | HighBits;  // Slam into low 16-bits
+      break;
+    }
+    case PPC::reloc_absolute_low_ix: {  // low bits of ref  -> low 14 of instr
+      ResultPtr += MR->getConstantVal();
+      // Do the addition then mask, so the addition does not overflow the 16-bit
+      // immediate section of the instruction.
+      unsigned LowBits  = (*RelocPos + ResultPtr) & 0xFFFC;
+      unsigned HighBits = *RelocPos & 0xFFFF0003;
+      *RelocPos = LowBits | HighBits;  // Slam into low 14-bits.
+      break;
+    }
+    }
+  }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+  sys::Memory::InvalidateInstructionCache(Old, 7*4);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 000000000000..47ead59b587d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,49 @@
+//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+namespace llvm {
+  class PPCTargetMachine;
+
+  class PPCJITInfo : public TargetJITInfo {
+  protected:
+    PPCTargetMachine &TM;
+    bool is64Bit;
+  public:
+    PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+      useGOT = 0;
+      is64Bit = tmIs64Bit;
+    }
+
+    virtual StubLayout getStubLayout();
+    virtual void *emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE);
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase);
+    
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
new file mode 100644
index 000000000000..cf73d861fa4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
@@ -0,0 +1,194 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+  PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const PPCMCCodeEmitter &);   // DO NOT IMPLEMENT
+  
+public:
+  PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx) {
+  }
+  
+  ~PPCMCCodeEmitter() {}
+
+  unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+    
+    // Output the constant in big endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      OS << (char)(Bits >> 24);
+      Bits <<= 8;
+    }
+    
+    ++MCNumEmitted;  // Keep track of the # of mi's emitted.
+  }
+  
+};
+  
+} // end anonymous namespace
+  
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
+                                            MCContext &Ctx) {
+  return new PPCMCCodeEmitter(MCII, STI, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_br24));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_brcond14));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_ha16));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups) const {
+  // Encode (imm, reg) as a memri, which has the low 16-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+  
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+  
+  // Add a fixup for the displacement field.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+  
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo14));
+  return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+         (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+  return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+    // The GPR operand should come through here though.
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+    return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+  }
+  
+  assert(MO.isImm() &&
+         "Relocation required in an instruction that we cannot encode!");
+  return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 000000000000..33af4269a3ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,173 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+  return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+  MCContext &Ctx = AP.OutContext;
+
+  SmallString<128> Name;
+  if (!MO.isGlobal()) {
+    assert(MO.isSymbol() && "Isn't a symbol reference");
+    Name += AP.MAI->getGlobalPrefix();
+    Name += MO.getSymbolName();
+  } else {    
+    const GlobalValue *GV = MO.getGlobal();
+    bool isImplicitlyPrivate = false;
+    if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
+        (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
+      isImplicitlyPrivate = true;
+    
+    AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+  }
+  
+  // If the target flags on the operand changes the name of the symbol, do that
+  // before we return the symbol.
+  if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+    Name += "$stub";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI(AP).getFnStubEntry(Sym);
+    if (StubSym.getPointer())
+      return Sym;
+    
+    if (MO.isGlobal()) {
+      StubSym =
+      MachineModuleInfoImpl::
+      StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+                  !MO.getGlobal()->hasInternalLinkage());
+    } else {
+      Name.erase(Name.end()-5, Name.end());
+      StubSym =
+      MachineModuleInfoImpl::
+      StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+    }
+    return Sym;
+  }
+
+  // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+  // then add the suffix.
+  if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+  
+    MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+    
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? 
+         MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+    
+    if (StubSym.getPointer() == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym = MachineModuleInfoImpl::
+                   StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+                               !MO.getGlobal()->hasInternalLinkage());
+    }
+    return Sym;
+  }
+  
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+                              AsmPrinter &Printer, bool isDarwin) {
+  MCContext &Ctx = Printer.OutContext;
+  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+  if (MO.getTargetFlags() & PPCII::MO_LO16)
+    RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : MCSymbolRefExpr::VK_PPC_GAS_LO16;
+  else if (MO.getTargetFlags() & PPCII::MO_HA16)
+    RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : MCSymbolRefExpr::VK_PPC_GAS_HA16;
+
+  // FIXME: This isn't right, but we don't have a good way to express this in
+  // the MC Level, see below.
+  if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
+    RefKind = MCSymbolRefExpr::VK_None;
+  
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+
+  // Subtract off the PIC base if required.
+  if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+    const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+    
+    const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+    Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+    // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
+    // since it is not a symbol!
+  }
+  
+  return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                        AsmPrinter &AP, bool isDarwin) {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      assert(0 && "unknown operand type");
+    case MachineOperand::MO_Register:
+      assert(!MO.getSubReg() && "Subregs should be eliminated!");
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                                      MO.getMBB()->getSymbol(), AP.OutContext));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
+                          isDarwin);
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 000000000000..e2649c8b380f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,132 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+private:
+  /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+  /// stored.  Also used as an anchor for instructions that need to be altered
+  /// when using frame pointers (dyna_add, dyna_sub.)
+  int FramePointerSaveIndex;
+  
+  /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+  ///
+  int ReturnAddrSaveIndex;
+
+  /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+  /// function.  This is only valid after the initial scan of the function by
+  /// PEI.
+  bool MustSaveLR;
+
+  /// SpillsCR - Indicates whether CR is spilled in the current function.
+  bool SpillsCR;
+
+  /// LRStoreRequired - The bool indicates whether there is some explicit use of
+  /// the LR/LR8 stack slot that is not obvious from scanning the code.  This
+  /// requires that the code generator produce a store of LR to the stack on
+  /// entry, even though LR may otherwise apparently not be used.
+  bool LRStoreRequired;
+
+  /// MinReservedArea - This is the frame size that is at least reserved in a
+  /// potential caller (parameter+linkage area).
+  unsigned MinReservedArea;
+
+  /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+  /// amount the stack pointer is adjusted to make the frame bigger for tail
+  /// calls. Used for creating an area before the register spill area.
+  int TailCallSPDelta;
+
+  /// HasFastCall - Does this function contain a fast call. Used to determine
+  /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+  bool HasFastCall;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// VarArgsStackOffset - StackOffset for start of stack
+  /// arguments.
+  int VarArgsStackOffset;
+  /// VarArgsNumGPR - Index of the first unused integer
+  /// register for parameter passing.
+  unsigned VarArgsNumGPR;
+  /// VarArgsNumFPR - Index of the first unused double
+  /// register for parameter passing.
+  unsigned VarArgsNumFPR;
+
+public:
+  explicit PPCFunctionInfo(MachineFunction &MF) 
+    : FramePointerSaveIndex(0),
+      ReturnAddrSaveIndex(0),
+      SpillsCR(false),
+      LRStoreRequired(false),
+      MinReservedArea(0),
+      TailCallSPDelta(0),
+      HasFastCall(false),
+      VarArgsFrameIndex(0),
+      VarArgsStackOffset(0),
+      VarArgsNumGPR(0),
+      VarArgsNumFPR(0) {}
+
+  int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+  void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+  
+  int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+  void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+  unsigned getMinReservedArea() const { return MinReservedArea; }
+  void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+  int getTailCallSPDelta() const { return TailCallSPDelta; }
+  void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+  /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+  /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+  /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+  /// which is used in PIC generation), or if the LR stack slot is explicitly
+  /// referenced by builtin_return_address.
+  void setMustSaveLR(bool U) { MustSaveLR = U; }
+  bool mustSaveLR() const    { return MustSaveLR; }
+
+  void setSpillsCR()       { SpillsCR = true; }
+  bool isCRSpilled() const { return SpillsCR; }
+
+  void setLRStoreRequired() { LRStoreRequired = true; }
+  bool isLRStoreRequired() const { return LRStoreRequired; }
+
+  void setHasFastCall() { HasFastCall = true; }
+  bool hasFastCall() const { return HasFastCall;}
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+  int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+  void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+  unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+  void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+  unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+  void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 000000000000..3164e33faae9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+  202162278U,	// <0,0,0,0>: Cost 1 vspltisw0 LHS
+  1140850790U,	// <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+  2617247181U,	// <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+  2635163787U,	// <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+  1543507254U,	// <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+  2281701705U,	// <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+  2617250133U,	// <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+  2659054575U,	// <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+  202162278U,	// <0,0,0,u>: Cost 1 vspltisw0 LHS
+  1141686282U,	// <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+  67944550U,	// <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+  1685241958U,	// <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+  2215870716U,	// <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+  1141727570U,	// <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+  2215428562U,	// <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+  2215428589U,	// <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+  2659062768U,	// <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+  67945117U,	// <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+  2684356045U,	// <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+  2216009830U,	// <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+  2216009901U,	// <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+  2698290853U,	// <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+  3289751890U,	// <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+  3758098275U,	// <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+  2684356538U,	// <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+  3758098410U,	// <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+  2216010397U,	// <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+  2702272651U,	// <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+  2216656998U,	// <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+  3844669704U,	// <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+  2216657148U,	// <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+  2684357122U,	// <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+  3732820066U,	// <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+  3778005624U,	// <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+  3374713464U,	// <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+  2216657565U,	// <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+  2217361408U,	// <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+  1143619686U,	// <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+  3291103405U,	// <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+  3827269988U,	// <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+  1143619922U,	// <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+  1610616118U,	// <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+  3758099833U,	// <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+  3854107016U,	// <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+  1143620253U,	// <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+  2284396544U,	// <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+  2218025062U,	// <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+  3758100203U,	// <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+  3395966100U,	// <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+  3804549052U,	// <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+  2302314964U,	// <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+  2785821138U,	// <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+  3395966428U,	// <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+  2787148260U,	// <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+  2684358997U,	// <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+  2218631270U,	// <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+  2684359162U,	// <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+  3758101042U,	// <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+  3732843830U,	// <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+  3758101227U,	// <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+  2684359480U,	// <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+  2724836173U,	// <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+  2725499806U,	// <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+  2726163439U,	// <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+  2219311206U,	// <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+  3868557900U,	// <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+  3377400112U,	// <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+  2684360038U,	// <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+  3732852834U,	// <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+  3871507060U,	// <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+  2303658616U,	// <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+  2726163439U,	// <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+  202162278U,	// <0,0,u,0>: Cost 1 vspltisw0 LHS
+  72589414U,	// <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+  1685242525U,	// <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+  2220073212U,	// <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+  1146331474U,	// <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+  1610619034U,	// <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+  2785821138U,	// <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+  2659120119U,	// <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+  72589981U,	// <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+  2698297344U,	// <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+  1624555622U,	// <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+  2758984428U,	// <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+  2635237524U,	// <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+  2693652818U,	// <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+  2281701714U,	// <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+  2698297846U,	// <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+  2659128312U,	// <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+  1624556189U,	// <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+  1543585802U,	// <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+  1141728052U,	// <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+  1141728150U,	// <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+  2295644334U,	// <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+  1543589174U,	// <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+  2290999634U,	// <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+  2617332135U,	// <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+  2617332720U,	// <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+  1142171004U,	// <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+  1561509990U,	// <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+  2623308516U,	// <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+  2698298984U,	// <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+  835584U,	// <0,1,2,3>: Cost 0 copy LHS
+  1561513270U,	// <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+  2647199304U,	// <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+  2698299322U,	// <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+  1585402874U,	// <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+  835584U,	// <0,1,2,u>: Cost 0 copy LHS
+  2698299540U,	// <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+  3290399540U,	// <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+  2698299720U,	// <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+  2698299804U,	// <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+  2698299906U,	// <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+  3832726521U,	// <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+  2724842160U,	// <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+  2706926275U,	// <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+  2698300190U,	// <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+  2635268198U,	// <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+  2217362228U,	// <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+  2217362326U,	// <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+  2635270296U,	// <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+  2635271478U,	// <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+  1624558902U,	// <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+  2659160910U,	// <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+  2659161084U,	// <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+  1624559145U,	// <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+  3832726639U,	// <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+  2714889871U,	// <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+  2302314646U,	// <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+  3834717321U,	// <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+  3832726679U,	// <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+  2717544403U,	// <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+  2718208036U,	// <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+  3792613493U,	// <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+  2719535302U,	// <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+  2659172454U,	// <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+  3832726735U,	// <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+  2724844026U,	// <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+  3775361608U,	// <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+  2659175734U,	// <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+  3832726771U,	// <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+  2724844344U,	// <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+  1651102542U,	// <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+  1651766175U,	// <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+  2724844536U,	// <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+  3377397770U,	// <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+  2698302636U,	// <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+  2728162531U,	// <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+  2724844902U,	// <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+  3377398098U,	// <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+  2724845076U,	// <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+  2724845164U,	// <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+  2724845186U,	// <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+  1561559142U,	// <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+  1146331956U,	// <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+  1146332054U,	// <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+  835584U,	// <0,1,u,3>: Cost 0 copy LHS
+  1561562422U,	// <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+  1624561818U,	// <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+  2220074191U,	// <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+  1585452032U,	// <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+  835584U,	// <0,1,u,u>: Cost 0 copy LHS
+  2214593997U,	// <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+  2214675999U,	// <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+  2214594152U,	// <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+  1207959654U,	// <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+  3709054262U,	// <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+  3375350836U,	// <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+  2214594490U,	// <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+  3288336362U,	// <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+  1207959659U,	// <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+  2215871994U,	// <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+  2215470623U,	// <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+  1141728872U,	// <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+  1141728934U,	// <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+  2215872323U,	// <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+  2215872405U,	// <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+  1141729210U,	// <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  2215430122U,	// <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+  1141729368U,	// <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+  3289736698U,	// <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+  3289744927U,	// <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+  2216011368U,	// <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+  2216019622U,	// <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+  3289769795U,	// <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+  3289778069U,	// <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+  2216044474U,	// <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+  3732960259U,	// <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+  2216061016U,	// <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+  2758985382U,	// <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+  2758985392U,	// <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+  3290400360U,	// <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+  2758985408U,	// <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+  2758985422U,	// <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+  2785822424U,	// <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+  3290400698U,	// <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+  2765915876U,	// <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+  2758985453U,	// <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+  3291104762U,	// <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+  2217362979U,	// <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+  2217363048U,	// <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+  2217363110U,	// <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+  3291105087U,	// <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+  3291105173U,	// <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+  2217363386U,	// <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+  3788639688U,	// <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+  2217363515U,	// <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+  3376054371U,	// <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+  3788639888U,	// <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+  3376055912U,	// <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+  2302312550U,	// <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+  3376054375U,	// <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+  3374728244U,	// <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+  3805229154U,	// <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+  3376055512U,	// <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+  2302312555U,	// <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+  3709100134U,	// <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+  3709100950U,	// <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+  3709102010U,	// <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+  2758985658U,	// <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+  3709103414U,	// <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+  3732992098U,	// <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+  3292374970U,	// <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+  3798594383U,	// <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+  2758985703U,	// <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+  3788641274U,	// <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+  3377398508U,	// <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+  3377398590U,	// <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+  2303656038U,	// <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+  3709111606U,	// <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+  3377398836U,	// <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+  3803903447U,	// <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+  3293054954U,	// <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+  2303656043U,	// <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+  2220074490U,	// <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+  2220074527U,	// <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+  1146332776U,	// <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+  1146332838U,	// <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+  2220074819U,	// <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+  2220074901U,	// <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+  1146333114U,	// <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  2220074986U,	// <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+  1146333243U,	// <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+  2629410816U,	// <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+  2753530006U,	// <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+  2629412301U,	// <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+  2214594972U,	// <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+  2758985908U,	// <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+  3733016674U,	// <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+  3777364488U,	// <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+  2281703354U,	// <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+  2758985941U,	// <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+  1141729430U,	// <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+  2215471334U,	// <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+  2215471425U,	// <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+  1141729692U,	// <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+  1141729794U,	// <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+  2215430738U,	// <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+  2215430776U,	// <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+  2295646138U,	// <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+  1141730078U,	// <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+  2758986032U,	// <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+  3709141910U,	// <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+  3289753921U,	// <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+  2770929992U,	// <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+  3289754114U,	// <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+  3362095460U,	// <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+  3832727910U,	// <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+  3365414842U,	// <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+  2771298677U,	// <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+  2216659094U,	// <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+  3290409190U,	// <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+  2703624496U,	// <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+  2216683932U,	// <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+  2216692226U,	// <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+  3733041250U,	// <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+  3832727988U,	// <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+  3374712762U,	// <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+  2216725278U,	// <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+  2217363606U,	// <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+  3291105510U,	// <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+  3291105601U,	// <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+  2217363868U,	// <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+  2217363970U,	// <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+  2758986242U,	// <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+  3727077685U,	// <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+  3364767674U,	// <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+  2217364254U,	// <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+  3832728102U,	// <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+  3405916003U,	// <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+  3376055840U,	// <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+  3376055679U,	// <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+  3376055194U,	// <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+  3859565138U,	// <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+  2727514210U,	// <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+  3376056250U,	// <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+  2727514210U,	// <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+  2758986360U,	// <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+  3709174678U,	// <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+  3795284411U,	// <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+  3709175980U,	// <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+  3833096860U,	// <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+  3376728235U,	// <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+  3859565229U,	// <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+  2773879472U,	// <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+  2758986360U,	// <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+  2303656854U,	// <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+  3807229018U,	// <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+  2727515284U,	// <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+  3377399410U,	// <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+  3377398682U,	// <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+  3801257409U,	// <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+  3377399980U,	// <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+  3375409082U,	// <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+  2731497082U,	// <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+  1146333334U,	// <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+  2220075238U,	// <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+  2220075329U,	// <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+  1146333596U,	// <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+  1146333698U,	// <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+  2758986566U,	// <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+  2803739472U,	// <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+  2295703482U,	// <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+  1146333982U,	// <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+  2214595473U,	// <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+  2693677158U,	// <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+  3839437689U,	// <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+  3709200559U,	// <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+  2693677394U,	// <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+  1140854070U,	// <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+  3767419409U,	// <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+  3854109604U,	// <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+  1140854313U,	// <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+  1141689234U,	// <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+  2215431114U,	// <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+  2215431221U,	// <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+  2635466928U,	// <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+  1141689552U,	// <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+  67947830U,	// <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+  2215431545U,	// <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+  2659357716U,	// <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+  67948073U,	// <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+  3767420369U,	// <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+  3767420451U,	// <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+  3767420520U,	// <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+  2698323625U,	// <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+  3709218102U,	// <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+  2216013110U,	// <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+  3767420858U,	// <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+  3774719981U,	// <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+  2216013353U,	// <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+  3767421078U,	// <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+  3776710880U,	// <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+  3833097325U,	// <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+  3767421340U,	// <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+  3767421442U,	// <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+  2216660278U,	// <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+  3833097361U,	// <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+  3780692678U,	// <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+  2216660521U,	// <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+  2617573416U,	// <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+  2217364450U,	// <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+  3691316771U,	// <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+  3709233331U,	// <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+  2785823952U,	// <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+  1143622966U,	// <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+  3691319723U,	// <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+  3854109932U,	// <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+  1143623209U,	// <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+  2635497574U,	// <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+  2635498390U,	// <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+  3709240936U,	// <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+  2635499700U,	// <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+  2635500854U,	// <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+  2785824044U,	// <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+  1685245238U,	// <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2659390488U,	// <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+  1685245256U,	// <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  3839438161U,	// <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+  3798610347U,	// <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+  3798610426U,	// <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+  3795956237U,	// <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+  3733138742U,	// <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+  2218634550U,	// <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+  3798610744U,	// <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+  2724868945U,	// <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+  2725532578U,	// <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+  3383371465U,	// <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+  3800601668U,	// <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+  3775386826U,	// <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+  3801928934U,	// <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+  3721202998U,	// <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+  2780368328U,	// <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+  3383372686U,	// <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+  3854110170U,	// <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+  2780368328U,	// <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+  1146334098U,	// <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+  2220076002U,	// <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+  2220076085U,	// <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+  2635524279U,	// <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+  1146334416U,	// <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+  72592694U,	// <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+  1685245481U,	// <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2659415067U,	// <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+  72592937U,	// <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+  2281704337U,	// <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+  2704965734U,	// <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+  3778707666U,	// <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+  3778707708U,	// <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+  2687050057U,	// <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+  2214596612U,	// <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+  2785824372U,	// <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+  3854110332U,	// <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+  2704966301U,	// <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+  1567768678U,	// <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+  2312236570U,	// <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+  2215431915U,	// <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+  2641512598U,	// <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+  1567771538U,	// <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+  1141690372U,	// <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+  1141690466U,	// <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+  2641515514U,	// <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+  1141690615U,	// <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+  3772736973U,	// <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+  3778709024U,	// <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+  3778709096U,	// <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+  3778709158U,	// <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+  3772737275U,	// <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+  3859566351U,	// <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+  3778709434U,	// <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+  3805251562U,	// <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+  3775391807U,	// <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+  2704967830U,	// <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+  3776719073U,	// <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+  3777382706U,	// <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+  3778709887U,	// <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+  2704968148U,	// <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+  3857428317U,	// <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+  3364096514U,	// <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+  3780700871U,	// <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+  2707622680U,	// <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+  2728856466U,	// <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+  3697361674U,	// <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+  3697362601U,	// <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+  3364766635U,	// <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+  2217365428U,	// <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+  2704969014U,	// <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+  2785824700U,	// <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+  3364766963U,	// <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+  2704969257U,	// <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+  3846148050U,	// <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+  2326203282U,	// <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+  3291746027U,	// <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+  3376054482U,	// <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+  3790655366U,	// <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+  2785824772U,	// <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+  2724876386U,	// <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+  3858903057U,	// <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+  2736820484U,	// <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+  2659467366U,	// <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+  3859566643U,	// <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+  3798618618U,	// <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+  3852857410U,	// <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+  2659470646U,	// <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+  2659471458U,	// <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+  3832729696U,	// <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+  1712083042U,	// <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+  1712156779U,	// <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+  2731512826U,	// <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+  3859566717U,	// <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+  3798619284U,	// <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+  3778712803U,	// <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+  2728858936U,	// <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+  3859566753U,	// <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+  3377398135U,	// <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+  3798619686U,	// <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+  2731513468U,	// <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+  1567826022U,	// <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+  2704971566U,	// <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+  2220076779U,	// <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+  2641569942U,	// <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+  1567828889U,	// <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+  1146335236U,	// <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+  1146335330U,	// <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+  1713410308U,	// <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+  1713484045U,	// <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+  2214596949U,	// <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+  2214678951U,	// <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+  2214597114U,	// <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+  3852857653U,	// <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+  3832729919U,	// <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+  3721293427U,	// <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+  2214597432U,	// <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+  1207962934U,	// <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+  1207962935U,	// <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+  2215432481U,	// <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+  2215432615U,	// <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+  1141690874U,	// <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+  2215432754U,	// <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+  2215432817U,	// <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+  2215432939U,	// <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+  1141691192U,	// <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+  1221905718U,	// <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+  1221905719U,	// <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+  3852857787U,	// <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+  3289764265U,	// <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+  3289690618U,	// <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+  3862589907U,	// <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+  3733253430U,	// <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+  3733254242U,	// <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+  3777390522U,	// <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+  2785825274U,	// <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+  2785825283U,	// <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+  3777390742U,	// <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+  3863106066U,	// <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+  3777390899U,	// <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+  3290436146U,	// <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+  3779381762U,	// <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+  3779381798U,	// <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+  3733262920U,	// <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+  2300972342U,	// <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+  2300972343U,	// <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+  3802606482U,	// <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+  2217365931U,	// <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+  2217366010U,	// <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+  3291107890U,	// <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+  3291099805U,	// <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+  3777391926U,	// <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+  2217366328U,	// <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+  2291027254U,	// <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+  2291027255U,	// <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+  3852858033U,	// <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+  3395964532U,	// <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+  3864507069U,	// <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+  3376056678U,	// <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+  3721334070U,	// <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+  3395964860U,	// <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+  3864802017U,	// <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+  2302315830U,	// <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+  2302315831U,	// <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+  3852858108U,	// <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+  3398624745U,	// <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+  2218668538U,	// <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+  3292418610U,	// <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+  3733286198U,	// <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+  3797299889U,	// <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+  2785825592U,	// <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+  2785825602U,	// <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+  2785825611U,	// <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+  2785825614U,	// <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+  2758988632U,	// <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+  3377400084U,	// <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+  2792166248U,	// <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+  2785825654U,	// <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+  2785825664U,	// <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+  3859567493U,	// <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+  2303659318U,	// <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+  2303659319U,	// <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+  2785825695U,	// <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+  2220077479U,	// <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+  1146335738U,	// <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+  2792829881U,	// <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+  2785825735U,	// <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+  2785825664U,	// <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+  1146336056U,	// <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+  1221963062U,	// <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+  1221963063U,	// <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+  2653593600U,	// <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+  2706309222U,	// <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+  3709421498U,	// <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+  2281705978U,	// <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+  2785825816U,	// <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+  2785825826U,	// <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+  2653598037U,	// <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+  2214598252U,	// <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+  2706309789U,	// <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+  1141691386U,	// <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+  2215433290U,	// <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+  2706310038U,	// <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+  2322190842U,	// <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+  1141691750U,	// <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+  2215433654U,	// <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+  2653606230U,	// <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+  1141692012U,	// <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+  1141692034U,	// <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+  2785825940U,	// <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+  3768108576U,	// <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+  3780052584U,	// <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+  2794820780U,	// <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+  3859641528U,	// <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+  3733327970U,	// <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+  3778062266U,	// <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+  3733328944U,	// <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+  2795189465U,	// <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+  2324861026U,	// <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+  3780053233U,	// <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+  3780053296U,	// <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+  3778062725U,	// <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+  3780053506U,	// <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+  3803941469U,	// <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+  2706311800U,	// <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+  3398603586U,	// <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+  2707639066U,	// <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+  2217366522U,	// <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+  3727369110U,	// <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+  3291108500U,	// <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+  3727370872U,	// <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+  2217366886U,	// <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+  2706312502U,	// <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+  3786026321U,	// <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+  2217367148U,	// <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+  2706312745U,	// <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+  2322223202U,	// <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+  3399946987U,	// <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+  3291780244U,	// <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+  3727378582U,	// <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+  3727379766U,	// <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+  3859568054U,	// <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+  2785826241U,	// <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+  3395965762U,	// <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+  2787153363U,	// <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+  2785826268U,	// <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+  3780055420U,	// <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+  3859568110U,	// <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+  3874534903U,	// <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+  3859641856U,	// <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+  3733360738U,	// <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+  3859568145U,	// <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+  2797770260U,	// <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+  2797843997U,	// <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+  2785826342U,	// <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+  3727393686U,	// <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+  3868563003U,	// <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+  3377397988U,	// <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+  2219349350U,	// <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+  3859568217U,	// <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+  2730202588U,	// <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+  2785826412U,	// <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+  2731529854U,	// <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+  1146336250U,	// <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+  2706315054U,	// <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+  2653660845U,	// <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+  2322248186U,	// <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+  1146336614U,	// <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+  2706315418U,	// <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+  2653663581U,	// <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+  1146336876U,	// <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+  1146336898U,	// <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+  202162278U,	// <0,u,0,0>: Cost 1 vspltisw0 LHS
+  1624612966U,	// <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+  2629780986U,	// <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+  1207959708U,	// <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+  1544097078U,	// <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+  1140856986U,	// <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+  2698355253U,	// <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+  1207962952U,	// <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+  202162278U,	// <0,u,0,u>: Cost 1 vspltisw0 LHS
+  1142134483U,	// <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+  67950382U,	// <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+  1142175624U,	// <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+  1142175676U,	// <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+  1142134847U,	// <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+  67950746U,	// <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+  1142175952U,	// <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+  1221905736U,	// <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+  67950949U,	// <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+  1562026086U,	// <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+  2216015662U,	// <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+  2698356328U,	// <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+  835584U,	// <0,u,2,3>: Cost 0 copy LHS
+  1562029366U,	// <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+  2216016026U,	// <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+  2698356666U,	// <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+  1585919033U,	// <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+  835584U,	// <0,u,2,u>: Cost 0 copy LHS
+  2758989756U,	// <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+  2216662830U,	// <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+  2703665461U,	// <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+  2758989782U,	// <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+  2758989796U,	// <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+  2216663194U,	// <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+  2706319993U,	// <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+  2300972360U,	// <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+  2216663397U,	// <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+  2217367251U,	// <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+  1143625518U,	// <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+  2217367432U,	// <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+  2217367484U,	// <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+  1143619922U,	// <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+  1143625882U,	// <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+  2217367760U,	// <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+  2291027272U,	// <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+  1143626085U,	// <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+  2635792486U,	// <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+  2635793302U,	// <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+  2302314646U,	// <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+  2635794648U,	// <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+  2635795766U,	// <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+  2717601754U,	// <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+  1685248154U,	// <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2302315848U,	// <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+  1685248172U,	// <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2759358645U,	// <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+  2218637102U,	// <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+  2724901370U,	// <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+  2758990032U,	// <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+  2659691830U,	// <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+  2659471458U,	// <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+  2724901688U,	// <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+  1651159893U,	// <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+  1651823526U,	// <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+  2785827072U,	// <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+  2803964168U,	// <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+  2727556249U,	// <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+  2303656092U,	// <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+  2785827112U,	// <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+  2785827122U,	// <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+  2730210781U,	// <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+  2303659336U,	// <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+  2303656097U,	// <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+  202162278U,	// <0,u,u,0>: Cost 1 vspltisw0 LHS
+  72595246U,	// <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+  1146337160U,	// <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+  835584U,	// <0,u,u,3>: Cost 0 copy LHS
+  1146337343U,	// <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+  72595610U,	// <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+  1146337488U,	// <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+  1221963080U,	// <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+  835584U,	// <0,u,u,u>: Cost 0 copy LHS
+  2756853760U,	// <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+  1677803530U,	// <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+  3759497387U,	// <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+  2686419196U,	// <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+  2751766565U,	// <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+  2687746462U,	// <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+  3776086518U,	// <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+  2689073728U,	// <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+  1678319689U,	// <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+  2287091712U,	// <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+  1147568230U,	// <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+  1683112038U,	// <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  3294970108U,	// <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+  2623892790U,	// <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+  2647781007U,	// <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+  2791948430U,	// <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+  3721524218U,	// <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+  1683112092U,	// <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  2222112768U,	// <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+  1148371046U,	// <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+  3356862524U,	// <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+  2702345894U,	// <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+  2222113106U,	// <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+  2299709908U,	// <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+  3760162746U,	// <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+  3369470584U,	// <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+  1148371613U,	// <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+  2686421142U,	// <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+  2283128486U,	// <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+  3296305326U,	// <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+  3760163199U,	// <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+  3760163330U,	// <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+  3779406377U,	// <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+  3865690416U,	// <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+  3366824568U,	// <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+  2707655452U,	// <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+  2734861202U,	// <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+  2756854098U,	// <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+  3830595931U,	// <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+  3296968960U,	// <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+  3830595949U,	// <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+  2686422326U,	// <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+  3297378806U,	// <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+  3810594248U,	// <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+  2686422569U,	// <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+  2284470272U,	// <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+  2284471974U,	// <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+  3809267435U,	// <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+  3297968384U,	// <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+  2284471977U,	// <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+  3721555603U,	// <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+  3792679010U,	// <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+  3792679037U,	// <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+  2284471981U,	// <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+  3356893184U,	// <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+  2224676966U,	// <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+  3298295985U,	// <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+  3298345212U,	// <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+  2224972114U,	// <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+  3808604907U,	// <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+  3799978808U,	// <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+  2726237006U,	// <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+  2224677522U,	// <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+  2726237176U,	// <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+  2285815462U,	// <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+  3805951193U,	// <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+  3807941859U,	// <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+  3799979366U,	// <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+  3803297165U,	// <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+  3799979540U,	// <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+  3799979628U,	// <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+  2731546240U,	// <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+  2284494848U,	// <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+  1683112594U,	// <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+  1683112605U,	// <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  2734200772U,	// <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+  2757075629U,	// <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+  2686425242U,	// <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+  2791948430U,	// <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+  2736855304U,	// <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+  1683112659U,	// <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  1610694666U,	// <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+  1616003174U,	// <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+  2283767958U,	// <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+  3357507596U,	// <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+  2689745234U,	// <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+  3357507922U,	// <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+  3294397647U,	// <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+  3373433334U,	// <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+  1616003730U,	// <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+  1550221414U,	// <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+  269271142U,	// <1,1,1,1>: Cost 1 vspltisw1 LHS
+  2287093910U,	// <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+  2287092615U,	// <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+  1550224694U,	// <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+  2287092050U,	// <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+  2689746127U,	// <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+  2659800138U,	// <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+  269271142U,	// <1,1,1,u>: Cost 1 vspltisw1 LHS
+  2222113516U,	// <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+  2756854663U,	// <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+  1148371862U,	// <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  2689746598U,	// <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+  2618002742U,	// <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+  2299707730U,	// <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+  2689746874U,	// <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+  3361506511U,	// <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+  1148371862U,	// <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  2689747094U,	// <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+  2691074278U,	// <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+  3356870806U,	// <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+  2283126958U,	// <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+  2689747458U,	// <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+  3356868946U,	// <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+  3811265144U,	// <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+  3362841807U,	// <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+  2689747742U,	// <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+  2623987814U,	// <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+  2758181931U,	// <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+  2223408022U,	// <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+  3697731734U,	// <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+  2283798784U,	// <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+  1616006454U,	// <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+  3297379535U,	// <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+  3373466102U,	// <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+  1616006697U,	// <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+  2760762479U,	// <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+  2284470282U,	// <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+  2284472470U,	// <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+  3358212270U,	// <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+  2284470285U,	// <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+  1210728786U,	// <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  2737524834U,	// <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+  3360867535U,	// <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+  1210728786U,	// <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  3697746022U,	// <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+  2756854991U,	// <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+  2737525242U,	// <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+  3839149281U,	// <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+  3697749302U,	// <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+  3356893522U,	// <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+  2283151537U,	// <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+  2791949566U,	// <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+  2792613127U,	// <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+  2737525754U,	// <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+  2291786386U,	// <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+  3365528292U,	// <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+  3365528455U,	// <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+  2737526118U,	// <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+  3365527890U,	// <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+  3365528377U,	// <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+  2291786959U,	// <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+  2737526402U,	// <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+  1550221414U,	// <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+  269271142U,	// <1,1,u,1>: Cost 1 vspltisw1 LHS
+  1148371862U,	// <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  2689750972U,	// <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+  1550224694U,	// <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+  1616009370U,	// <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+  2689751248U,	// <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+  2736863497U,	// <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+  269271142U,	// <1,1,u,u>: Cost 1 vspltisw1 LHS
+  2702360576U,	// <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+  1628618854U,	// <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+  2685771949U,	// <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+  2283765862U,	// <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+  2702360914U,	// <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+  3788046813U,	// <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+  2688426481U,	// <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+  2726249024U,	// <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+  1628619421U,	// <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+  2690417380U,	// <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+  2702361396U,	// <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+  2287093352U,	// <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+  1213349990U,	// <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+  3764159522U,	// <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+  3295053672U,	// <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+  2221311930U,	// <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+  3799991593U,	// <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+  1213349995U,	// <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+  2624045158U,	// <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+  2702362144U,	// <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+  2283120232U,	// <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+  1225965670U,	// <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+  2624048438U,	// <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+  3356860763U,	// <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+  2222114746U,	// <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+  2299708632U,	// <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+  1225965675U,	// <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+  470597734U,	// <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1544340276U,	// <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1544341096U,	// <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544341916U,	// <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+  470601014U,	// <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1592119300U,	// <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+  1592119802U,	// <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1592120314U,	// <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  470603566U,	// <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+  2708335471U,	// <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+  3838043908U,	// <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+  3357541992U,	// <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+  2283798630U,	// <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+  2726251728U,	// <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+  1628622134U,	// <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+  3297077178U,	// <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+  2726251976U,	// <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+  1628622377U,	// <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+  2714308168U,	// <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+  3297633827U,	// <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+  2284471912U,	// <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+  1210728550U,	// <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+  3776106420U,	// <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+  2726252548U,	// <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+  2726252642U,	// <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+  3799994538U,	// <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+  1210728555U,	// <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+  2720280865U,	// <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+  2702365096U,	// <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+  2726253050U,	// <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+  2283151462U,	// <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+  3697823030U,	// <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+  3298715497U,	// <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+  2726253368U,	// <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+  2724926296U,	// <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+  2283151467U,	// <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+  1652511738U,	// <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+  3371500916U,	// <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+  3365529192U,	// <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+  2291785830U,	// <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+  2726253926U,	// <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+  3788051845U,	// <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+  3794023894U,	// <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+  2726254119U,	// <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+  1657820802U,	// <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+  470638699U,	// <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+  1544381236U,	// <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1544382056U,	// <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544382614U,	// <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+  470641974U,	// <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+  1628625050U,	// <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+  1592160762U,	// <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1592161274U,	// <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  470644526U,	// <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+  2769389708U,	// <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+  2685780070U,	// <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2685780142U,	// <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+  2686443775U,	// <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+  2769684656U,	// <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+  3357507940U,	// <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+  3759522294U,	// <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+  3357509562U,	// <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+  2685780637U,	// <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2287092630U,	// <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+  2221312230U,	// <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+  2691752839U,	// <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+  2287093362U,	// <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+  2287092634U,	// <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+  3360835107U,	// <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+  3759523041U,	// <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+  2287093690U,	// <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+  2287092638U,	// <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+  2222114966U,	// <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+  2222115057U,	// <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+  2630092320U,	// <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+  2685781670U,	// <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+  2222115330U,	// <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+  3373449572U,	// <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+  2222115448U,	// <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+  2299709370U,	// <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+  2222115614U,	// <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+  2771380607U,	// <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+  3356874468U,	// <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+  3759524168U,	// <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+  2283792796U,	// <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+  3356869530U,	// <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+  3721760428U,	// <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+  3296496248U,	// <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+  3356870586U,	// <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+  2771970503U,	// <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+  2772044240U,	// <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+  3362186135U,	// <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+  3297151280U,	// <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+  3357542002U,	// <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+  3357540626U,	// <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+  2685783350U,	// <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+  3357546622U,	// <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+  3357542330U,	// <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+  2685783593U,	// <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+  2284471190U,	// <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+  3358213015U,	// <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+  2630116899U,	// <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+  2284471922U,	// <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+  2284471194U,	// <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+  2284471843U,	// <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+  3358218366U,	// <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+  2284472250U,	// <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+  2284471198U,	// <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+  2224752790U,	// <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+  3832736385U,	// <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+  3703866916U,	// <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+  3356894834U,	// <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+  3356894106U,	// <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+  3356894755U,	// <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+  3356899130U,	// <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+  2283153338U,	// <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+  2283153338U,	// <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+  2774035139U,	// <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+  3703874767U,	// <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+  3703875109U,	// <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+  3365529202U,	// <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+  3365528474U,	// <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+  3789387159U,	// <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+  3865692927U,	// <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+  3363538874U,	// <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+  2774625035U,	// <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+  2284495766U,	// <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+  2685785902U,	// <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2630141478U,	// <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+  2283169880U,	// <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+  2284495770U,	// <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+  2685786266U,	// <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+  2222115448U,	// <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+  2284496826U,	// <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+  2685786469U,	// <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2684461069U,	// <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+  2686451814U,	// <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+  3759530159U,	// <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+  2686451968U,	// <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+  2684461394U,	// <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+  1701989266U,	// <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+  3776119286U,	// <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+  2689106500U,	// <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+  1702210477U,	// <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+  2221312914U,	// <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+  2691097399U,	// <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+  3760194454U,	// <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+  3766166489U,	// <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+  2334870736U,	// <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+  1147571510U,	// <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+  3760194794U,	// <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+  3867315188U,	// <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+  1147571753U,	// <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+  2222115730U,	// <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+  2222115812U,	// <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+  3760195176U,	// <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+  2702378662U,	// <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+  2323598544U,	// <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+  1148374326U,	// <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+  3760195514U,	// <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+  3373451932U,	// <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+  1148374569U,	// <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+  2702379160U,	// <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+  3760195840U,	// <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+  3776121160U,	// <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+  3760195996U,	// <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+  2686454274U,	// <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+  3356870350U,	// <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+  3800009392U,	// <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+  3366824604U,	// <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+  2707688224U,	// <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+  2775731368U,	// <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+  3830820018U,	// <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+  3691980454U,	// <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+  3357541282U,	// <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+  2781039824U,	// <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+  2686455094U,	// <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+  3357541528U,	// <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+  3810627020U,	// <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+  2686455337U,	// <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+  2624217190U,	// <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+  2284470309U,	// <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+  2618246822U,	// <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+  3358212297U,	// <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+  2284470312U,	// <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+  2284470637U,	// <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+  1683115318U,	// <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  3721851898U,	// <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+  1683115336U,	// <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  3794039075U,	// <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+  3830820186U,	// <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+  3800011258U,	// <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+  3807973938U,	// <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+  3298716880U,	// <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+  2224680246U,	// <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+  3800011576U,	// <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+  2726269774U,	// <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+  2224680489U,	// <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+  2726269948U,	// <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+  3383444141U,	// <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+  3805983961U,	// <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+  3807974667U,	// <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+  2736887142U,	// <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+  3365528403U,	// <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+  3800012308U,	// <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+  3800012396U,	// <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+  2731579012U,	// <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+  2624241766U,	// <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+  2686457646U,	// <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+  2618271398U,	// <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+  2734233544U,	// <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+  2689775679U,	// <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+  1152355638U,	// <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+  1683115561U,	// <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  2736888076U,	// <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+  1683115579U,	// <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  2687123456U,	// <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+  1613381734U,	// <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  3759538352U,	// <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+  3760865532U,	// <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+  1613381970U,	// <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+  2687787427U,	// <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+  2781777524U,	// <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+  3733828717U,	// <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+  1613382301U,	// <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  2781040271U,	// <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+  2687124276U,	// <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+  2687124374U,	// <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+  3760866297U,	// <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+  2693096491U,	// <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+  2687124591U,	// <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+  2687124723U,	// <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+  3360834803U,	// <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+  2687124860U,	// <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+  2323598792U,	// <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+  2687125027U,	// <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+  2687125096U,	// <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+  2687125158U,	// <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+  2642185188U,	// <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+  2323598554U,	// <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+  2687125434U,	// <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+  3373450483U,	// <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+  2687125563U,	// <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+  2687125654U,	// <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+  2312990234U,	// <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+  3760867649U,	// <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+  2687125916U,	// <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+  2687126018U,	// <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+  3386731738U,	// <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+  3356871170U,	// <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+  3808643779U,	// <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+  2687126302U,	// <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+  2642198630U,	// <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+  2687126498U,	// <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+  3715941923U,	// <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+  3709970701U,	// <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+  2687126736U,	// <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+  1613385014U,	// <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+  2283801090U,	// <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+  3733861489U,	// <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+  1613385257U,	// <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+  2624290918U,	// <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+  2624291676U,	// <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+  3698034211U,	// <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+  2284471211U,	// <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+  2624294198U,	// <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+  2284471132U,	// <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+  2284472834U,	// <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+  2284471539U,	// <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+  2284471216U,	// <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+  2785316900U,	// <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+  2781040691U,	// <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+  2734903802U,	// <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+  3848736834U,	// <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+  3298717620U,	// <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+  3298717700U,	// <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+  2734904120U,	// <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+  2781040738U,	// <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+  2781040747U,	// <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+  2734904314U,	// <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+  2315677210U,	// <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+  3808646292U,	// <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+  3808646371U,	// <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+  2734904678U,	// <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+  3389418714U,	// <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+  3365528656U,	// <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+  2734904940U,	// <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+  2734904962U,	// <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+  2687129299U,	// <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+  1613387566U,	// <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  2687129480U,	// <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+  2687129532U,	// <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+  1661163546U,	// <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+  1613387930U,	// <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+  2687129808U,	// <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+  2781040900U,	// <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+  1613388133U,	// <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  3759546368U,	// <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+  2685804646U,	// <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+  2685804721U,	// <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+  3861270834U,	// <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+  3759546706U,	// <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+  2687795620U,	// <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+  2688459253U,	// <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+  2283769142U,	// <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+  2685805213U,	// <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+  3698073702U,	// <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+  3759547188U,	// <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+  2221314554U,	// <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+  3759547401U,	// <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+  3698076982U,	// <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+  3767510141U,	// <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+  2334872376U,	// <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+  1213353270U,	// <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+  1213353271U,	// <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+  3704053862U,	// <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+  3759547961U,	// <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+  2222117370U,	// <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+  3759548070U,	// <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+  3704057142U,	// <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+  3373451057U,	// <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+  2685806522U,	// <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+  1225968950U,	// <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+  1225968951U,	// <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+  3759548566U,	// <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+  3842912793U,	// <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+  3759548774U,	// <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+  3759548828U,	// <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+  3759548930U,	// <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+  3809315421U,	// <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+  3386733368U,	// <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+  2283130166U,	// <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+  2283130167U,	// <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+  3704070246U,	// <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+  3862229608U,	// <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+  3704071741U,	// <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+  3721988610U,	// <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+  3704073526U,	// <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+  2685807926U,	// <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+  3865621141U,	// <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+  2283801910U,	// <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+  2685808169U,	// <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+  3710050406U,	// <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+  3710051571U,	// <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+  3405989597U,	// <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+  3358214502U,	// <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+  3710053686U,	// <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+  3721998025U,	// <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+  2332250936U,	// <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+  1210731830U,	// <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+  1210731831U,	// <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+  2791289597U,	// <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+  3698115430U,	// <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+  3698116538U,	// <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+  3356894132U,	// <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+  3698117942U,	// <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+  3722006218U,	// <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+  2781041464U,	// <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+  2283154742U,	// <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+  2283154743U,	// <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+  1718211406U,	// <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+  2792026967U,	// <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+  2765411170U,	// <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+  3854783336U,	// <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+  2781041526U,	// <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+  3365528664U,	// <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+  2791953290U,	// <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+  2291789110U,	// <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+  1718801302U,	// <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+  1718875039U,	// <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+  2685810478U,	// <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+  2792764337U,	// <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+  3759552444U,	// <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+  2781041607U,	// <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+  2685810842U,	// <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+  2689792208U,	// <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+  1210756406U,	// <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+  1210756407U,	// <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+  2793280496U,	// <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+  2694439014U,	// <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+  3393343912U,	// <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+  3397325306U,	// <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+  2793575444U,	// <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+  3722030797U,	// <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+  2688467446U,	// <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+  2689131079U,	// <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+  2694439570U,	// <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+  2654265354U,	// <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+  2794017866U,	// <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+  3768181639U,	// <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+  2334872058U,	// <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+  2654268726U,	// <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+  3792069797U,	// <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+  2694440143U,	// <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+  2334872386U,	// <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+  2695767409U,	// <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+  2654273638U,	// <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+  2222117973U,	// <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+  2299711912U,	// <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+  2654275734U,	// <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+  2654276918U,	// <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+  3385397675U,	// <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+  2654278056U,	// <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+  2323599627U,	// <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+  2654279470U,	// <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+  2795271395U,	// <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+  3768183059U,	// <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+  3728025254U,	// <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+  3768183196U,	// <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+  3768183298U,	// <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+  3792071255U,	// <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+  3780127361U,	// <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+  3847779617U,	// <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+  2795861291U,	// <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+  2795935028U,	// <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+  3728032975U,	// <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+  3839153480U,	// <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+  3397358074U,	// <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+  3854783835U,	// <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+  2694442294U,	// <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+  3786100058U,	// <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+  3722065254U,	// <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+  2694442537U,	// <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+  2654298214U,	// <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+  3854783893U,	// <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+  3710126010U,	// <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+  2332250618U,	// <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+  2654301494U,	// <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+  2284474795U,	// <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+  2718330931U,	// <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+  2332250946U,	// <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+  2719658197U,	// <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+  2332921954U,	// <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+  3768185254U,	// <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+  3710134202U,	// <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+  3710134561U,	// <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+  3710135606U,	// <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+  3864884745U,	// <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+  3854784017U,	// <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+  2791953940U,	// <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+  2792617501U,	// <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+  2797925927U,	// <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+  3365528426U,	// <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+  3728058022U,	// <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+  3365528509U,	// <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+  3854784079U,	// <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+  3722088148U,	// <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+  3728060845U,	// <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+  2781042284U,	// <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+  2798515823U,	// <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+  2654322705U,	// <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+  2694444846U,	// <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+  2299711912U,	// <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+  2323649018U,	// <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+  2654326070U,	// <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+  2694445210U,	// <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+  2654327214U,	// <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+  2323649346U,	// <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+  2694445413U,	// <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+  1610752017U,	// <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+  1613406310U,	// <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+  2685821107U,	// <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+  2283765916U,	// <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+  1613406549U,	// <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+  1725880054U,	// <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+  2688475639U,	// <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+  2283769160U,	// <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+  1613406877U,	// <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+  1550221414U,	// <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+  269271142U,	// <1,u,1,1>: Cost 1 vspltisw1 LHS
+  1683117870U,	// <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  1213350044U,	// <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+  1550224694U,	// <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+  1147574426U,	// <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+  2687149326U,	// <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+  1213353288U,	// <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+  269271142U,	// <1,u,1,u>: Cost 1 vspltisw1 LHS
+  2222118611U,	// <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+  1148376878U,	// <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+  1148371862U,	// <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  1225965724U,	// <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+  2222118975U,	// <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+  1148377242U,	// <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+  2687150010U,	// <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+  1225968968U,	// <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+  1148377445U,	// <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+  471040156U,	// <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1544782644U,	// <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1544783464U,	// <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544784022U,	// <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+  471043382U,	// <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1592561668U,	// <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+  1592562170U,	// <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1592562682U,	// <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  471045934U,	// <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+  2708384629U,	// <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+  2687151101U,	// <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+  2223408022U,	// <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+  2283798684U,	// <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+  2642422785U,	// <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+  1613409590U,	// <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+  2283801090U,	// <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+  2283801928U,	// <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+  1613409833U,	// <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+  2284471235U,	// <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+  2284472046U,	// <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+  2284472533U,	// <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+  1210728604U,	// <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+  2284471239U,	// <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+  1210728786U,	// <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  1683118234U,	// <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  1210731848U,	// <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+  1210728609U,	// <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+  2720330023U,	// <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+  2757376190U,	// <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+  2726302202U,	// <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+  2283151516U,	// <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+  2224972114U,	// <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+  2224683162U,	// <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+  2726302520U,	// <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+  2283154760U,	// <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+  2283151521U,	// <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+  1652560896U,	// <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+  2333590225U,	// <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+  2765412628U,	// <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+  2291785884U,	// <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+  2781042984U,	// <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+  3365527953U,	// <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+  2791954748U,	// <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+  2291789128U,	// <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+  1657869960U,	// <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+  471081121U,	// <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+  269271142U,	// <1,u,u,1>: Cost 1 vspltisw1 LHS
+  1544824424U,	// <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544824982U,	// <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+  471084342U,	// <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+  1613412506U,	// <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+  1683118477U,	// <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  1210756424U,	// <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+  471086894U,	// <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+  2226757632U,	// <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+  2226757734U,	// <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+  3826622483U,	// <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+  3843211292U,	// <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+  3300499794U,	// <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+  3356256724U,	// <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+  3825664056U,	// <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+  3762889289U,	// <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+  2226758301U,	// <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+  2227429386U,	// <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+  2227429478U,	// <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+  1691156582U,	// <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+  2666358997U,	// <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+  2227462482U,	// <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+  3722186464U,	// <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+  3867099278U,	// <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+  3366881912U,	// <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+  1691156636U,	// <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+  2228027392U,	// <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+  1154285670U,	// <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+  2228027565U,	// <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+  3301769468U,	// <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+  2228027730U,	// <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+  3301769635U,	// <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+  3780806586U,	// <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+  3368880760U,	// <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+  1154286237U,	// <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+  1213440000U,	// <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+  1213441702U,	// <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+  2228535470U,	// <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+  2636515632U,	// <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+  2287182962U,	// <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+  2660405346U,	// <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+  2228535798U,	// <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+  2660406420U,	// <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+  1213441709U,	// <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+  3368894464U,	// <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+  2764898642U,	// <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+  3826622811U,	// <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+  3843211620U,	// <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+  3838640493U,	// <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+  2732944694U,	// <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+  3797396857U,	// <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+  3867099528U,	// <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+  2764898705U,	// <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+  3364257792U,	// <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+  2230124646U,	// <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+  3304235184U,	// <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+  3364260144U,	// <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+  3303817554U,	// <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+  3364260146U,	// <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+  3867099602U,	// <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+  3364260472U,	// <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+  2230125213U,	// <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+  2230796288U,	// <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+  1157054566U,	// <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+  2230796465U,	// <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+  3304538364U,	// <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+  2230796626U,	// <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+  3797398205U,	// <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+  3304538614U,	// <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+  3798725471U,	// <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+  1157055133U,	// <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+  3371573248U,	// <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+  2231189606U,	// <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+  3801380003U,	// <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+  3802043636U,	// <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+  3806688614U,	// <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+  3356317308U,	// <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+  3804034535U,	// <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+  3806688876U,	// <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+  2231190173U,	// <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+  1208836096U,	// <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+  1208837798U,	// <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+  1691157149U,	// <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+  2636556597U,	// <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+  2282579625U,	// <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+  2660446306U,	// <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+  2228535798U,	// <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+  2660447385U,	// <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+  1208837805U,	// <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+  3692388523U,	// <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+  2757526244U,	// <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+  2330290974U,	// <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+  3843212020U,	// <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+  3692391734U,	// <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+  3300533362U,	// <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+  3794084337U,	// <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+  3374170614U,	// <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+  2758042403U,	// <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+  2690482924U,	// <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+  2764899124U,	// <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+  2695791510U,	// <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+  3362235271U,	// <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+  3692399926U,	// <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+  3832226649U,	// <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+  3301205235U,	// <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+  3768870179U,	// <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+  2695791988U,	// <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+  2618663085U,	// <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+  2228028212U,	// <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+  2618664552U,	// <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+  2759000984U,	// <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+  2618666294U,	// <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+  2295136594U,	// <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+  3769534376U,	// <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+  2793358266U,	// <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+  2618668846U,	// <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+  2282536969U,	// <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+  1208795146U,	// <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+  1213442198U,	// <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  2287181998U,	// <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+  2618674486U,	// <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+  1208795474U,	// <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2287182001U,	// <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  2287183055U,	// <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+  1208795153U,	// <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+  3692421295U,	// <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+  3838641195U,	// <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+  2330323742U,	// <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+  3692423318U,	// <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+  3692424502U,	// <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+  2695793974U,	// <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+  3799395705U,	// <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+  3368895695U,	// <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+  2695794217U,	// <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+  3692429488U,	// <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+  3364257802U,	// <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+  3692431253U,	// <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+  3692431874U,	// <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+  3692432694U,	// <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+  3364258130U,	// <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+  3303875827U,	// <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+  3867100333U,	// <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+  3692435246U,	// <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+  2618695857U,	// <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+  2230797108U,	// <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+  2618697658U,	// <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+  3692439702U,	// <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+  2618699062U,	// <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+  3364929874U,	// <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+  3692442424U,	// <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+  3798733664U,	// <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+  2618701614U,	// <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+  3799397370U,	// <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+  3371573258U,	// <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+  2330351234U,	// <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+  3799397658U,	// <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+  3799397734U,	// <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+  3371573586U,	// <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+  3799397870U,	// <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+  3799397956U,	// <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+  2330351234U,	// <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+  2282577929U,	// <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+  1208836106U,	// <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+  1208838294U,	// <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  2282578094U,	// <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+  2282577933U,	// <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+  1208836434U,	// <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2282578097U,	// <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  2287224015U,	// <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+  1208836113U,	// <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+  2226759117U,	// <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+  1624047718U,	// <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+  2697789613U,	// <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+  2226767526U,	// <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+  2697789778U,	// <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+  3300657000U,	// <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+  2226988986U,	// <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+  3734271139U,	// <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+  1624048285U,	// <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+  3831268868U,	// <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+  2293138804U,	// <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+  2697790358U,	// <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+  2293137510U,	// <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+  3771532331U,	// <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+  3767551106U,	// <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+  3301173178U,	// <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+  3372853169U,	// <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+  2293137515U,	// <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+  1556938854U,	// <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  2295137733U,	// <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+  336380006U,	// <2,2,2,2>: Cost 1 vspltisw2 LHS
+  1221394534U,	// <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+  1556942134U,	// <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  2295138061U,	// <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+  2228029370U,	// <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+  2660545701U,	// <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+  336380006U,	// <2,2,2,u>: Cost 1 vspltisw2 LHS
+  2697791638U,	// <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+  2765489840U,	// <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+  1213441640U,	// <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+  135053414U,	// <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+  2697792002U,	// <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+  2330313780U,	// <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+  2287183549U,	// <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+  2660553894U,	// <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+  135053419U,	// <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+  2630697062U,	// <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+  3771534282U,	// <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+  2764900109U,	// <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+  2295152742U,	// <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+  2295154282U,	// <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+  1624050998U,	// <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+  2229675962U,	// <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+  3368896433U,	// <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+  1624051241U,	// <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+  3771534920U,	// <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+  3364258540U,	// <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+  2296489576U,	// <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+  2290516070U,	// <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+  3771535284U,	// <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+  2290517044U,	// <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+  2697793634U,	// <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+  3370231729U,	// <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+  2290516075U,	// <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+  2230797801U,	// <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+  3304539679U,	// <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+  2764900273U,	// <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+  2764900282U,	// <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+  2230798129U,	// <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+  3304540008U,	// <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+  1157056442U,	// <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2725000033U,	// <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+  1157056442U,	// <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2793359338U,	// <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+  3371574725U,	// <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+  2297833064U,	// <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+  2297831526U,	// <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+  2697794918U,	// <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+  3371575053U,	// <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+  3304933297U,	// <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+  2297833393U,	// <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+  2297831531U,	// <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+  1556938854U,	// <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  1624053550U,	// <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+  336380006U,	// <2,2,u,2>: Cost 1 vspltisw2 LHS
+  135094374U,	// <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+  1556942134U,	// <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  1624053914U,	// <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+  1157056442U,	// <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2660594859U,	// <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+  135094379U,	// <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+  1611448320U,	// <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+  537706598U,	// <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2689835181U,	// <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  2689835260U,	// <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+  1611448658U,	// <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  2732966354U,	// <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+  2732966390U,	// <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+  2660603052U,	// <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+  537707165U,	// <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+  2689835748U,	// <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+  1611449140U,	// <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+  1611449238U,	// <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+  3763577805U,	// <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+  2689836112U,	// <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+  2689836143U,	// <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+  2689836239U,	// <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+  3366881210U,	// <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+  1616094588U,	// <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+  2689836493U,	// <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+  2685191711U,	// <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+  1611449960U,	// <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+  1611450022U,	// <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+  2689836822U,	// <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+  2689836904U,	// <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+  1611450298U,	// <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  2295138234U,	// <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+  1611450456U,	// <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+  1213440918U,	// <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+  2282538527U,	// <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+  1557022322U,	// <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+  1208796786U,	// <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+  1213440922U,	// <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+  2282538531U,	// <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+  2287188094U,	// <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+  1213441978U,	// <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  1208796791U,	// <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+  1551056998U,	// <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+  1551057818U,	// <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+  2624800360U,	// <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+  2624800918U,	// <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+  1551060278U,	// <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+  537709878U,	// <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2732969337U,	// <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+  2660635824U,	// <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+  537710121U,	// <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+  2689838664U,	// <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+  2732969615U,	// <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+  2732969707U,	// <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+  3763580721U,	// <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+  2689839028U,	// <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+  1659228164U,	// <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+  1659228258U,	// <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+  3364259770U,	// <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+  1659228420U,	// <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+  2230798486U,	// <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+  2732970407U,	// <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+  1659228666U,	// <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+  2230798748U,	// <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+  2230798850U,	// <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+  2732970731U,	// <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+  1659228984U,	// <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+  1659229006U,	// <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+  1659229087U,	// <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+  1659229178U,	// <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+  2726999125U,	// <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+  2727662758U,	// <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+  2732971235U,	// <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+  1659229542U,	// <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+  2732971446U,	// <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+  2732971484U,	// <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+  1659229804U,	// <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+  1659229826U,	// <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+  1208837014U,	// <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+  537712430U,	// <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+  1616099205U,	// <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+  1208837746U,	// <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+  1208837018U,	// <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+  537712794U,	// <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+  1616099536U,	// <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+  1208838074U,	// <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  537712997U,	// <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+  3771547648U,	// <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+  2697805926U,	// <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+  3770884269U,	// <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+  3806716164U,	// <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+  3771547986U,	// <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+  2226761014U,	// <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+  3853462427U,	// <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+  3867102116U,	// <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+  2226761257U,	// <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+  3849186231U,	// <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+  3301207010U,	// <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+  3766240150U,	// <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+  3766240226U,	// <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+  3301207248U,	// <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+  2227432758U,	// <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+  3758941400U,	// <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+  3768894758U,	// <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+  2227433001U,	// <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+  2228030354U,	// <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+  3770885657U,	// <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+  2697807466U,	// <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+  3368880468U,	// <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+  2228030672U,	// <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+  1154288950U,	// <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+  3771549617U,	// <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+  3368880796U,	// <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+  1154289193U,	// <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+  2636808294U,	// <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+  2287181861U,	// <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+  2228866102U,	// <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+  2636810580U,	// <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+  1256574160U,	// <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+  1213441742U,	// <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+  2228866430U,	// <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+  2660701368U,	// <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+  1213441745U,	// <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+  3704586342U,	// <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+  3782831051U,	// <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+  3704587900U,	// <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+  3368896123U,	// <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+  2793360592U,	// <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+  2697809206U,	// <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+  3303198078U,	// <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+  3867102444U,	// <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+  2697809449U,	// <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+  2630852710U,	// <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+  2624881572U,	// <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+  2630854269U,	// <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+  2666686677U,	// <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+  2630855990U,	// <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+  2230127926U,	// <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+  1691159862U,	// <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  3867102520U,	// <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+  1691159880U,	// <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2230799250U,	// <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+  3304541130U,	// <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+  2230799417U,	// <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+  3304541323U,	// <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+  2230799568U,	// <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+  1157057846U,	// <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+  3304541566U,	// <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+  3798758243U,	// <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+  1157058089U,	// <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+  3806721018U,	// <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+  3853831590U,	// <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+  3801412775U,	// <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+  3802076408U,	// <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+  3401436368U,	// <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+  2793360840U,	// <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+  3804067307U,	// <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+  3867102682U,	// <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+  2793360867U,	// <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+  2630877286U,	// <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+  2282580144U,	// <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+  2630878848U,	// <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+  2636851545U,	// <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+  1256615120U,	// <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+  1208837838U,	// <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+  1691160105U,	// <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2660742333U,	// <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+  1208837841U,	// <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+  3766910976U,	// <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+  2693169254U,	// <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+  3760939181U,	// <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+  3843214936U,	// <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+  3760939355U,	// <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+  3867102827U,	// <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+  3867102836U,	// <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+  3867102844U,	// <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+  2693169821U,	// <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+  3766911724U,	// <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+  3766911796U,	// <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+  2693170070U,	// <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+  3384798262U,	// <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+  2693170228U,	// <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+  3301208068U,	// <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+  3366879607U,	// <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+  3867102925U,	// <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+  2695824760U,	// <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+  2642845798U,	// <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+  2295139218U,	// <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+  2699142760U,	// <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+  3766912678U,	// <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+  2699142925U,	// <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+  2228031492U,	// <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+  2295138818U,	// <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+  3368879347U,	// <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+  2295138820U,	// <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+  2287184866U,	// <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+  1256573842U,	// <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+  2642855630U,	// <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+  2287182763U,	// <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+  2287184870U,	// <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+  1256574170U,	// <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+  1213442562U,	// <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  2287183091U,	// <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+  1213442564U,	// <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+  3716604006U,	// <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+  3716604822U,	// <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+  3766914099U,	// <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+  3368895403U,	// <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+  3716607031U,	// <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+  2693172534U,	// <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+  3363588610U,	// <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+  3368895731U,	// <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+  2693172777U,	// <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+  3704668262U,	// <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+  3704669078U,	// <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+  3704669830U,	// <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+  3364259460U,	// <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+  3704671542U,	// <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+  2793361412U,	// <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+  3364258167U,	// <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+  3867103249U,	// <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+  2793361412U,	// <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+  2642878566U,	// <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+  3386166810U,	// <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+  2723033594U,	// <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+  3848523842U,	// <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+  2723033713U,	// <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+  2230800388U,	// <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+  2230800482U,	// <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+  2785841252U,	// <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+  2785914989U,	// <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+  3796775930U,	// <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+  3800757335U,	// <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+  3853463689U,	// <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+  3796776218U,	// <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+  3796776294U,	// <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+  3803411867U,	// <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+  3371575081U,	// <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+  3796776516U,	// <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+  3371575083U,	// <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+  2287225826U,	// <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+  1256614802U,	// <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+  2642896590U,	// <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+  2287223723U,	// <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+  2287225830U,	// <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+  1256615130U,	// <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+  1208838658U,	// <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  2287224051U,	// <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+  1208838660U,	// <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+  3772227584U,	// <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+  2698485862U,	// <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+  3759620282U,	// <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+  3710675299U,	// <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+  3767583058U,	// <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+  3378153265U,	// <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+  3865186637U,	// <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+  2330291510U,	// <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+  2698486429U,	// <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+  3734569062U,	// <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+  3764929346U,	// <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+  3772228502U,	// <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+  3734571158U,	// <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+  3734572342U,	// <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+  3767583878U,	// <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+  3768247511U,	// <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+  2293140790U,	// <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+  2293140791U,	// <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+  3704717414U,	// <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+  3395424589U,	// <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+  2228031993U,	// <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+  2698487485U,	// <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+  3704720694U,	// <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+  3773556575U,	// <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+  2698487738U,	// <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+  1221397814U,	// <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+  1221397815U,	// <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+  2636955750U,	// <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+  2330314217U,	// <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+  2636957626U,	// <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+  2287184230U,	// <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+  2636959030U,	// <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+  2648903448U,	// <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+  1256575800U,	// <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+  135056694U,	// <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+  135056695U,	// <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+  3710705766U,	// <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+  3698762677U,	// <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+  3710707389U,	// <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+  3710708071U,	// <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+  3710709046U,	// <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+  2698489142U,	// <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+  3796782457U,	// <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+  2295156022U,	// <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+  2295156023U,	// <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+  3303870753U,	// <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+  3788820134U,	// <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+  3779530520U,	// <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+  3303871026U,	// <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+  3303871117U,	// <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+  3791474666U,	// <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+  3792138299U,	// <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+  2290519350U,	// <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+  2290519351U,	// <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+  2631008358U,	// <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+  3372893673U,	// <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+  2791445264U,	// <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+  2230800968U,	// <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+  2631011638U,	// <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+  3372894001U,	// <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+  2793362232U,	// <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+  2295835958U,	// <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+  2295835959U,	// <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+  2793362254U,	// <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+  2792035160U,	// <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+  2792108897U,	// <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+  2769474408U,	// <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+  2793362294U,	// <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+  3371575089U,	// <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+  2792403845U,	// <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+  2297834806U,	// <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+  2297834807U,	// <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+  2636996710U,	// <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+  2698491694U,	// <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+  2636998631U,	// <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+  2282580326U,	// <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+  2636999990U,	// <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+  2698492058U,	// <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+  1256616760U,	// <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+  135097654U,	// <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+  135097655U,	// <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+  2666864742U,	// <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+  1719620602U,	// <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+  3768254637U,	// <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+  3393417722U,	// <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+  2666868022U,	// <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+  3867104290U,	// <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+  3728667127U,	// <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+  2666869817U,	// <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+  1720136761U,	// <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+  3728670822U,	// <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+  3774227252U,	// <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+  3774227350U,	// <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+  2323001850U,	// <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+  3728674102U,	// <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+  3774227567U,	// <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+  2694513880U,	// <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+  3396744002U,	// <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+  2323001850U,	// <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+  2654937190U,	// <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+  3728679732U,	// <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+  2700486248U,	// <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+  2321682938U,	// <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+  2654940470U,	// <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+  3859584196U,	// <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+  2700486577U,	// <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+  2228033132U,	// <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+  2701813843U,	// <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+  1581203558U,	// <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+  2654946100U,	// <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+  2637031354U,	// <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+  1256575482U,	// <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+  1581206838U,	// <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+  2654949380U,	// <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+  1581208058U,	// <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+  1256575810U,	// <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+  1581209390U,	// <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+  3728695398U,	// <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+  3869758782U,	// <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+  3728696936U,	// <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+  3393450490U,	// <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+  3728698678U,	// <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+  2700487990U,	// <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+  3728699899U,	// <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+  3867104626U,	// <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+  2700488233U,	// <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+  3855160709U,	// <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+  3728704406U,	// <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+  3370233956U,	// <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+  2320380410U,	// <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+  3728706870U,	// <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+  3867104694U,	// <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+  3792146492U,	// <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+  3394122562U,	// <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+  2320380410U,	// <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+  2230801402U,	// <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+  3768258984U,	// <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+  2730349050U,	// <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+  3372894575U,	// <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+  2230801766U,	// <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+  3304543670U,	// <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+  3728716285U,	// <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+  2230802028U,	// <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+  2730349050U,	// <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+  2793362983U,	// <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+  3728721112U,	// <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+  3371574933U,	// <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+  2327695866U,	// <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+  3728723254U,	// <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+  3371574855U,	// <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+  2730350062U,	// <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+  2793363052U,	// <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+  2798671471U,	// <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+  1581244518U,	// <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+  1724929666U,	// <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+  2637072314U,	// <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+  1256616442U,	// <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+  1581247798U,	// <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+  2700490906U,	// <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+  1581249023U,	// <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+  1256616770U,	// <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+  1581250350U,	// <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+  1611489280U,	// <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+  537747563U,	// <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2685231277U,	// <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  2685231356U,	// <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+  1611489618U,	// <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  2226763930U,	// <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+  2733007350U,	// <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+  2660971737U,	// <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+  537748125U,	// <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+  2689876708U,	// <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+  1611490100U,	// <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+  1611490198U,	// <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+  2293137564U,	// <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+  2689877072U,	// <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+  2689877103U,	// <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+  2689877199U,	// <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+  2293140808U,	// <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+  1616135548U,	// <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+  1556938854U,	// <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  1154291502U,	// <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+  336380006U,	// <2,u,2,2>: Cost 1 vspltisw2 LHS
+  1611490982U,	// <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+  1556942134U,	// <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  1154291866U,	// <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+  1611491258U,	// <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  1221397832U,	// <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+  336380006U,	// <2,u,2,u>: Cost 1 vspltisw2 LHS
+  1611491478U,	// <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+  1213440073U,	// <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+  1213442261U,	// <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+  135053468U,	// <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+  1611491842U,	// <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+  1213440401U,	// <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+  1213442589U,	// <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+  135056712U,	// <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+  135053473U,	// <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+  1551425638U,	// <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+  1551426503U,	// <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+  2625169000U,	// <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+  2625169558U,	// <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+  1551428918U,	// <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+  537750838U,	// <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2733010297U,	// <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+  2295156040U,	// <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+  537751081U,	// <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+  2689879624U,	// <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+  2230130478U,	// <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+  2631149217U,	// <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+  2290516124U,	// <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+  2689879988U,	// <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+  1659269124U,	// <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+  1691162778U,	// <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2290519368U,	// <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+  1691162796U,	// <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2230802131U,	// <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+  1157060398U,	// <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+  1659269626U,	// <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+  2764904656U,	// <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+  2230802495U,	// <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+  1157060762U,	// <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+  1659269944U,	// <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+  1659269966U,	// <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+  1157060965U,	// <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+  1659270138U,	// <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+  2727040090U,	// <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+  2727703723U,	// <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+  2297831580U,	// <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+  1659270502U,	// <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+  2733012406U,	// <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+  2730358255U,	// <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+  1659270764U,	// <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+  1659270786U,	// <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+  1213481923U,	// <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+  537753390U,	// <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+  336380006U,	// <2,u,u,2>: Cost 1 vspltisw2 LHS
+  135094428U,	// <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+  1213481927U,	// <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+  537753754U,	// <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+  1208838685U,	// <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+  135097672U,	// <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+  135094433U,	// <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+  1678557184U,	// <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+  1678557194U,	// <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+  2631181989U,	// <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+  2289223984U,	// <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+  2756943909U,	// <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+  3362965729U,	// <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+  3362966054U,	// <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+  2289224312U,	// <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+  1683202121U,	// <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+  1557446758U,	// <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+  2752741467U,	// <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+  604815462U,	// <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+  2631190676U,	// <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+  1557450038U,	// <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+  2667024388U,	// <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+  2800074894U,	// <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+  2661053667U,	// <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+  604815516U,	// <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+  2696521165U,	// <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+  2752741549U,	// <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+  2691876456U,	// <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+  2691876518U,	// <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+  3830685895U,	// <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+  3765618536U,	// <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+  2691876794U,	// <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+  2701166596U,	// <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+  2756944108U,	// <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+  2691877014U,	// <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+  1161003110U,	// <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+  2691877168U,	// <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+  2691877246U,	// <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+  2691877378U,	// <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+  3765619238U,	// <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+  2691877496U,	// <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+  3368962680U,	// <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+  1161003677U,	// <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+  2289254400U,	// <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+  1678557522U,	// <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+  2631214761U,	// <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+  2235580672U,	// <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+  2756944237U,	// <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+  1618136374U,	// <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+  3309322742U,	// <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+  3362998904U,	// <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+  1683202449U,	// <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+  3765620296U,	// <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+  2752299427U,	// <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+  3789508346U,	// <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+  3403486842U,	// <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+  3765620660U,	// <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+  2733682692U,	// <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+  2800075218U,	// <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+  3873817044U,	// <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+  2800075234U,	// <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+  2752299501U,	// <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+  2236547174U,	// <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+  2733683194U,	// <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+  3844473352U,	// <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+  3310289234U,	// <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+  3873817114U,	// <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+  2733683512U,	// <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+  2725057384U,	// <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+  2236547741U,	// <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+  2297905152U,	// <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+  2297906854U,	// <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+  2727711916U,	// <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+  3371649328U,	// <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+  2733684070U,	// <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+  3734843490U,	// <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+  3798799895U,	// <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+  2733684332U,	// <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+  2297906861U,	// <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+  1557504102U,	// <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+  1678557842U,	// <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+  604816029U,	// <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+  2691880892U,	// <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+  1557507382U,	// <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+  1618139290U,	// <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+  2691881168U,	// <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+  2661111018U,	// <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+  604816083U,	// <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+  2619310332U,	// <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+  2756944612U,	// <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+  2289221724U,	// <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+  2619312278U,	// <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+  2619313462U,	// <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+  2289221970U,	// <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+  2232599768U,	// <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+  3362964687U,	// <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+  2619316014U,	// <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+  2756944683U,	// <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+  1678558004U,	// <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  2691883927U,	// <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+  3826631496U,	// <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+  2756944723U,	// <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+  2756944732U,	// <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+  3830686561U,	// <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+  3734869228U,	// <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+  1678558004U,	// <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  2696529358U,	// <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+  2756944775U,	// <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+  2294548630U,	// <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+  1678558102U,	// <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+  2631273782U,	// <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+  2756944811U,	// <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+  3830686644U,	// <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+  2800075706U,	// <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+  1679000515U,	// <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+  2619334911U,	// <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+  2295218186U,	// <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+  2293229718U,	// <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+  2619337116U,	// <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+  2619338038U,	// <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+  2295218514U,	// <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+  3830686729U,	// <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+  3368961231U,	// <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+  2619340590U,	// <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+  2619343104U,	// <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+  2289254410U,	// <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+  2289256598U,	// <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+  2619345410U,	// <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+  2619346230U,	// <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+  2756944976U,	// <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+  3362996401U,	// <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+  3362997455U,	// <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+  2619348782U,	// <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+  2756945007U,	// <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+  3830686840U,	// <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+  3358361750U,	// <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+  3830686857U,	// <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+  2756945047U,	// <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+  2294571346U,	// <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+  3806105698U,	// <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+  3873817774U,	// <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+  2756945079U,	// <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+  3830686912U,	// <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+  2756945103U,	// <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+  2236547990U,	// <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+  3826631905U,	// <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+  3830686952U,	// <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+  2756945139U,	// <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+  3830686972U,	// <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+  2800076030U,	// <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+  2756945166U,	// <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+  3699081318U,	// <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+  2297905162U,	// <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+  2297907350U,	// <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+  3365675182U,	// <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+  3699084598U,	// <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+  2297905490U,	// <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+  2297905329U,	// <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+  3368330447U,	// <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+  2297905169U,	// <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+  2619375876U,	// <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+  1678558004U,	// <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  2289289366U,	// <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+  1679000956U,	// <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+  2619378998U,	// <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+  2756945297U,	// <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+  2297905329U,	// <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+  2800076192U,	// <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+  1683203497U,	// <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+  3362964203U,	// <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+  2289222380U,	// <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+  2289222462U,	// <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+  1215479910U,	// <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+  3362964207U,	// <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+  2289222708U,	// <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+  2232600506U,	// <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+  3396142296U,	// <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+  1215479915U,	// <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+  3699105894U,	// <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+  3765633844U,	// <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+  2691892120U,	// <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+  2752300575U,	// <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+  3699109174U,	// <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+  3830687280U,	// <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+  3830687289U,	// <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+  3874260548U,	// <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+  2752742988U,	// <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+  2631344230U,	// <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+  2697201184U,	// <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+  1678558824U,	// <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+  1678558834U,	// <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+  2631347510U,	// <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+  3368953613U,	// <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+  2234304442U,	// <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+  3368953777U,	// <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+  1679001247U,	// <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+  1678558886U,	// <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+  2752300719U,	// <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+  2752300729U,	// <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+  1221476454U,	// <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+  1678558926U,	// <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+  2800076503U,	// <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+  2234746810U,	// <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+  2800076516U,	// <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+  1678558958U,	// <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+  3699130470U,	// <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+  3362996972U,	// <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+  2289256040U,	// <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+  1215512678U,	// <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+  3362998676U,	// <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+  2691894582U,	// <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+  2235582394U,	// <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+  3734967544U,	// <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+  1215512683U,	// <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+  3705110630U,	// <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+  3368313985U,	// <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+  3368314472U,	// <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+  2756945768U,	// <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+  3705113910U,	// <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+  3310061416U,	// <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+  3310135226U,	// <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+  3370305457U,	// <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+  2752743317U,	// <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+  2631376998U,	// <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+  3705119540U,	// <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+  2631378621U,	// <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+  1678559162U,	// <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+  2631380278U,	// <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+  3370976956U,	// <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+  2237065146U,	// <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+  3798815594U,	// <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+  1679001575U,	// <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+  2800076778U,	// <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+  3371647724U,	// <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+  2297906792U,	// <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+  1224163430U,	// <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+  3705130294U,	// <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+  3371648052U,	// <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+  2297906877U,	// <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+  3371648702U,	// <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+  1224163435U,	// <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+  1679001659U,	// <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+  2752743492U,	// <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+  1678558824U,	// <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+  1678559320U,	// <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+  1679001699U,	// <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+  2691897498U,	// <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+  2237908922U,	// <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+  2800519289U,	// <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+  1679001731U,	// <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+  1215480726U,	// <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+  1678559382U,	// <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+  2631403200U,	// <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+  2289223282U,	// <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+  2752301232U,	// <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+  3362965027U,	// <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+  3362965352U,	// <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+  2289223610U,	// <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+  1678559445U,	// <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+  3830687964U,	// <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+  2752301286U,	// <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+  2752301297U,	// <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+  2305157532U,	// <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+  3830688000U,	// <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+  3830688009U,	// <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+  3830688019U,	// <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+  3362973626U,	// <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+  2752743719U,	// <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+  2631417958U,	// <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+  3826043193U,	// <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+  1624131186U,	// <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+  2752301384U,	// <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+  2631421238U,	// <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+  3826485602U,	// <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+  2752301414U,	// <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+  2771249519U,	// <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+  1628112984U,	// <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+  1563656294U,	// <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+  2301855911U,	// <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+  2697873730U,	// <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+  403488870U,	// <3,3,3,3>: Cost 1 vspltisw3 LHS
+  1563659574U,	// <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+  2301856239U,	// <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+  2697874067U,	// <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+  2295220154U,	// <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+  403488870U,	// <3,3,3,u>: Cost 1 vspltisw3 LHS
+  2289255318U,	// <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+  2631435162U,	// <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+  2631435972U,	// <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+  2289256050U,	// <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+  1215513498U,	// <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+  1679002114U,	// <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+  3362998120U,	// <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+  2289256378U,	// <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+  1679002141U,	// <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+  3831130657U,	// <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+  3376277671U,	// <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+  3771617012U,	// <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+  2302536092U,	// <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+  3831130697U,	// <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+  2294572579U,	// <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+  2800519773U,	// <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+  3368314810U,	// <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+  2800519791U,	// <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+  2800077432U,	// <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+  3310291185U,	// <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+  2789165706U,	// <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+  2764982931U,	// <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+  2800077468U,	// <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+  3873819301U,	// <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+  2297235304U,	// <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+  2725081963U,	// <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+  2725745596U,	// <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+  2631458918U,	// <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+  3705201460U,	// <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+  2631460551U,	// <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+  2297906802U,	// <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+  2631462198U,	// <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+  3371648547U,	// <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+  3371648548U,	// <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+  1224165306U,	// <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+  1224165306U,	// <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+  1215480726U,	// <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+  1679002398U,	// <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+  1659967368U,	// <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+  403488870U,	// <3,3,u,3>: Cost 1 vspltisw3 LHS
+  1563659574U,	// <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+  1679002438U,	// <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+  2756946764U,	// <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+  1224165306U,	// <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+  403488870U,	// <3,3,u,u>: Cost 1 vspltisw3 LHS
+  2691907584U,	// <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+  1618165862U,	// <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+  2631476937U,	// <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+  2232601732U,	// <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+  2691907922U,	// <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+  1158860086U,	// <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+  3306343806U,	// <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+  3366947484U,	// <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+  1618166429U,	// <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+  2631483494U,	// <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+  2691908404U,	// <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+  1618166682U,	// <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+  3765650393U,	// <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+  2631486774U,	// <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+  2756946914U,	// <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+  3765650639U,	// <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+  3735090439U,	// <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+  1622148480U,	// <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+  3765650893U,	// <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+  3831131154U,	// <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+  2691909224U,	// <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+  2691909286U,	// <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+  2699208469U,	// <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+  2233863478U,	// <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+  2691909562U,	// <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+  2701199368U,	// <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+  2691909691U,	// <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+  2691909782U,	// <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+  3765651686U,	// <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+  2691909972U,	// <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+  2691910044U,	// <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+  2691910096U,	// <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+  1161006390U,	// <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+  2691910300U,	// <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+  3368962716U,	// <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+  1161006633U,	// <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+  2631508070U,	// <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+  2631508890U,	// <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+  2631509709U,	// <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+  2289256788U,	// <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+  1726336208U,	// <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+  1618169142U,	// <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+  3362998858U,	// <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+  2289257116U,	// <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+  1618169385U,	// <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+  1557774438U,	// <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+  2631516980U,	// <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+  1557776078U,	// <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+  2631518358U,	// <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+  1557777718U,	// <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+  2296563406U,	// <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+  604818742U,	// <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+  2661381387U,	// <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+  604818760U,	// <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+  3705266278U,	// <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+  3831131482U,	// <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+  2733715962U,	// <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+  3844771180U,	// <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+  2800078197U,	// <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+  2236550454U,	// <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+  2733716280U,	// <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+  2725090156U,	// <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+  2236550697U,	// <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+  2733716474U,	// <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+  3371647013U,	// <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+  2727744688U,	// <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+  3371649364U,	// <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+  2733716838U,	// <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+  2297906894U,	// <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+  3371647180U,	// <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+  2733717100U,	// <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+  2297906897U,	// <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+  1557799014U,	// <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+  1618171694U,	// <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+  1557800657U,	// <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+  2691913660U,	// <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+  1557802294U,	// <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+  1618172058U,	// <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+  604818985U,	// <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+  2661405966U,	// <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+  604819003U,	// <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+  2643492966U,	// <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+  2756947528U,	// <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+  2331029019U,	// <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+  2643495062U,	// <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+  2756947554U,	// <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+  2800078443U,	// <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+  2289224194U,	// <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+  3362964723U,	// <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+  2756947590U,	// <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+  2800078479U,	// <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+  2333027218U,	// <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+  2691916699U,	// <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+  3832901294U,	// <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+  2800078519U,	// <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+  3830689467U,	// <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+  3830689481U,	// <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+  3873820365U,	// <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+  2800078551U,	// <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+  3770967487U,	// <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+  2697225763U,	// <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+  3830689523U,	// <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+  2699216590U,	// <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+  2699216662U,	// <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+  2783047439U,	// <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+  2783121176U,	// <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+  3856936737U,	// <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+  2701871194U,	// <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+  2643517542U,	// <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+  2331052946U,	// <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+  3699345010U,	// <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+  2705189276U,	// <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+  2705189359U,	// <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+  2331053274U,	// <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+  2295220738U,	// <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+  3368961267U,	// <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+  2295220740U,	// <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+  2643525734U,	// <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+  2331061138U,	// <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+  2235584280U,	// <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+  2643528194U,	// <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+  2735713498U,	// <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+  2756947892U,	// <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+  2289256962U,	// <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+  3362997491U,	// <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+  2756947919U,	// <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+  2800078803U,	// <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+  2800078812U,	// <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+  2631591639U,	// <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+  3832901616U,	// <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+  2800078843U,	// <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+  1726337028U,	// <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  2800078862U,	// <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+  3368314099U,	// <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+  1726337028U,	// <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  2800078884U,	// <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+  2800078899U,	// <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+  2631599832U,	// <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+  2800078914U,	// <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+  2800078924U,	// <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+  2800078935U,	// <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+  2297235970U,	// <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+  1726337122U,	// <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+  1726337131U,	// <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+  3699376230U,	// <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+  2333739922U,	// <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+  3699378106U,	// <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+  3371647915U,	// <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+  3699379510U,	// <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+  2333740250U,	// <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+  2297907714U,	// <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+  3370984691U,	// <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+  2297907716U,	// <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+  2800079046U,	// <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+  2756948176U,	// <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+  2331029019U,	// <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+  2800079076U,	// <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+  2800079085U,	// <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+  1726337028U,	// <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  2289289730U,	// <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+  1726337284U,	// <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+  1726337293U,	// <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+  3773628416U,	// <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+  2699886694U,	// <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+  2789167401U,	// <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+  3362965862U,	// <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+  3773628754U,	// <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+  3723284326U,	// <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+  2800079181U,	// <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+  1215483190U,	// <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+  1215483191U,	// <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+  3873821032U,	// <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+  3773629236U,	// <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+  2691924892U,	// <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+  3830690184U,	// <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+  3873821072U,	// <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+  3873821082U,	// <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+  3403453240U,	// <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+  2289233206U,	// <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+  2289233207U,	// <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+  2661498982U,	// <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+  3770975780U,	// <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+  2631640797U,	// <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+  3771639485U,	// <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+  2661502262U,	// <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+  2699888488U,	// <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+  2661503482U,	// <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+  1715425786U,	// <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+  1715499523U,	// <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+  3773630614U,	// <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+  3372942825U,	// <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+  2234749434U,	// <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+  3368962406U,	// <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+  2699889154U,	// <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+  3773631068U,	// <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+  2331054904U,	// <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+  1221479734U,	// <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+  1221479735U,	// <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+  2235584801U,	// <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+  3717342106U,	// <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+  2789167729U,	// <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+  2235585074U,	// <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+  2235585165U,	// <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+  2699889974U,	// <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+  2800079509U,	// <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+  1215515958U,	// <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+  1215515959U,	// <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+  3873821356U,	// <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+  3372959209U,	// <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+  3862909629U,	// <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+  3773632358U,	// <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+  3873821396U,	// <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+  3873821405U,	// <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+  3862909672U,	// <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+  2294574390U,	// <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+  2294574391U,	// <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+  2800079613U,	// <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+  3873821446U,	// <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+  2789167888U,	// <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+  3844920090U,	// <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+  2800079653U,	// <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+  3723333484U,	// <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+  1726337848U,	// <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+  1726337858U,	// <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+  1726337867U,	// <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+  1726337870U,	// <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+  2297906665U,	// <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+  2792117090U,	// <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+  2297907558U,	// <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+  1726337910U,	// <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+  2297906993U,	// <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+  2297906832U,	// <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+  1224166710U,	// <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+  1224166711U,	// <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+  1726337951U,	// <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+  2699892526U,	// <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+  2789168049U,	// <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+  2792854460U,	// <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+  1726337991U,	// <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+  2699892890U,	// <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+  1726337848U,	// <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+  1215548726U,	// <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+  1215548727U,	// <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+  2700558336U,	// <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+  1626816614U,	// <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  2700558513U,	// <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+  2331030010U,	// <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+  2700558674U,	// <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+  2800079906U,	// <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+  2655588936U,	// <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+  2800079919U,	// <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+  1626817181U,	// <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  3774300899U,	// <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+  2700559156U,	// <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+  2700559254U,	// <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+  3774301148U,	// <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+  3774301227U,	// <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+  3774301295U,	// <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+  3768329441U,	// <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+  3403453250U,	// <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+  2700559740U,	// <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+  2700559849U,	// <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+  3770983973U,	// <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+  2700559976U,	// <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+  2698569415U,	// <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+  2700560177U,	// <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+  3773638505U,	// <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+  1626818490U,	// <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+  2795140307U,	// <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+  1628145756U,	// <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+  2700560534U,	// <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+  3774302438U,	// <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+  2700560742U,	// <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+  2700560796U,	// <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+  2700560898U,	// <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+  3774302821U,	// <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+  2700561079U,	// <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+  2700561091U,	// <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+  2700561182U,	// <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+  2655617126U,	// <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+  3774303178U,	// <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+  2655619002U,	// <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+  2331062778U,	// <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+  2655620406U,	// <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+  1626819894U,	// <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+  2655621708U,	// <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+  2800080247U,	// <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+  1626820137U,	// <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+  3774303816U,	// <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+  3873822093U,	// <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+  3774303998U,	// <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+  3862910368U,	// <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+  3774304180U,	// <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+  2800080310U,	// <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+  2800080321U,	// <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+  3873822147U,	// <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+  2800080339U,	// <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+  2800080348U,	// <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+  3873822181U,	// <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+  2789168622U,	// <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+  2700563016U,	// <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+  2800080384U,	// <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+  3862910472U,	// <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+  2700563256U,	// <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+  2800080404U,	// <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+  2793149988U,	// <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+  2637725798U,	// <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+  3371649227U,	// <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+  2637727674U,	// <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+  2297907567U,	// <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+  2637729078U,	// <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+  3371649312U,	// <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+  2655646287U,	// <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+  1726338668U,	// <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+  1726338668U,	// <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+  2700564179U,	// <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+  1626822446U,	// <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  2700564357U,	// <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+  2700564412U,	// <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+  2700564543U,	// <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+  1626822810U,	// <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+  1662654672U,	// <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+  1726338668U,	// <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+  1626823013U,	// <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  1678557184U,	// <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+  1679005395U,	// <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+  2289221787U,	// <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+  1215479964U,	// <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+  2752747245U,	// <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+  1158863002U,	// <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+  2289224221U,	// <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+  1215483208U,	// <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+  1679005458U,	// <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+  1558036582U,	// <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+  1678558004U,	// <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  604821294U,	// <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+  2752747317U,	// <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+  1558039862U,	// <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+  2756949830U,	// <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+  2800080726U,	// <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+  2289233224U,	// <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+  604821348U,	// <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+  2696586709U,	// <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+  2757392246U,	// <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+  1624172151U,	// <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+  1679005576U,	// <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+  2631789878U,	// <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+  2699904874U,	// <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+  1626826683U,	// <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+  1726338988U,	// <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+  1683208117U,	// <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+  1679005628U,	// <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+  1161008942U,	// <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+  2752747471U,	// <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+  403488870U,	// <3,u,3,3>: Cost 1 vspltisw3 LHS
+  1679005668U,	// <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+  1161009306U,	// <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+  2691943104U,	// <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+  1221479752U,	// <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+  403488870U,	// <3,u,3,u>: Cost 1 vspltisw3 LHS
+  2289255363U,	// <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+  1161844526U,	// <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+  2289256661U,	// <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+  1215512732U,	// <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+  1215513498U,	// <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+  1679005759U,	// <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+  2289256989U,	// <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+  1215515976U,	// <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+  1679005786U,	// <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+  1558069350U,	// <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+  2631811892U,	// <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+  1558071026U,	// <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+  2752747646U,	// <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+  1558072630U,	// <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+  1726337028U,	// <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  604821658U,	// <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+  2294574408U,	// <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+  604821676U,	// <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+  2631819366U,	// <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+  2757392574U,	// <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+  2631821043U,	// <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+  1679005904U,	// <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+  2631822646U,	// <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+  2236553370U,	// <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+  1726337848U,	// <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+  1726339309U,	// <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+  1683208445U,	// <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+  1726339328U,	// <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+  2297905225U,	// <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+  2631829236U,	// <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+  1224163484U,	// <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+  1726339368U,	// <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+  2297905553U,	// <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+  2297905392U,	// <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+  1224166728U,	// <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+  1224163489U,	// <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+  1683208529U,	// <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+  1679006043U,	// <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+  604821861U,	// <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+  403488870U,	// <3,u,u,3>: Cost 1 vspltisw3 LHS
+  1683208569U,	// <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+  1679006083U,	// <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+  604821901U,	// <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+  1215548744U,	// <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+  604821915U,	// <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+  2759016448U,	// <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+  1165115494U,	// <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+  3717531337U,	// <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+  3369675785U,	// <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+  2751791144U,	// <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+  2238857630U,	// <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+  3312591341U,	// <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+  3369676113U,	// <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+  1165116061U,	// <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+  2637824102U,	// <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+  2637824922U,	// <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+  1685274726U,	// <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2637826512U,	// <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+  2637827382U,	// <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+  2661716070U,	// <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+  3729486427U,	// <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+  2661717300U,	// <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+  1685274780U,	// <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  3711574118U,	// <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+  2240200806U,	// <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+  3771663992U,	// <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+  2698585801U,	// <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+  3373672105U,	// <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+  3810813795U,	// <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+  3772327866U,	// <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+  3386280568U,	// <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+  2701903966U,	// <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+  3699638374U,	// <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+  2753560832U,	// <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+  3772328276U,	// <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+  3827302674U,	// <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+  3699641654U,	// <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+  3779627588U,	// <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+  3772328604U,	// <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+  3780954854U,	// <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+  2753560832U,	// <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+  2725129106U,	// <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+  1167720550U,	// <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+  3839172953U,	// <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+  3772329051U,	// <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+  2241462610U,	// <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+  2698587446U,	// <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+  3772329297U,	// <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+  3735483703U,	// <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+  1167721117U,	// <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+  1168556032U,	// <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+  94814310U,	// <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+  2242298029U,	// <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+  2637859284U,	// <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+  1168556370U,	// <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+  2242306530U,	// <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+  2242298358U,	// <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+  2661750072U,	// <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+  94814877U,	// <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+  3316580362U,	// <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+  2242846822U,	// <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+  3798872570U,	// <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+  3796218413U,	// <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+  3834528273U,	// <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+  3798872811U,	// <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+  3316621876U,	// <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+  2725131121U,	// <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+  2242847389U,	// <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+  3377692672U,	// <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+  2243493990U,	// <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+  3775648970U,	// <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+  3802191110U,	// <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+  3317236050U,	// <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+  3803518376U,	// <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+  3317236214U,	// <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+  3798873708U,	// <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+  2243494557U,	// <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+  1170546688U,	// <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+  96804966U,	// <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+  1685275293U,	// <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2637883863U,	// <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+  1170547026U,	// <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+  2698590362U,	// <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+  2244289014U,	// <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+  2661774651U,	// <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+  96805533U,	// <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+  2667749478U,	// <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+  2689966182U,	// <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+  2238571418U,	// <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+  3711633880U,	// <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+  2689966418U,	// <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+  3361046866U,	// <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+  3741495802U,	// <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+  3741496314U,	// <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+  2689966765U,	// <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+  3764372222U,	// <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+  2758206263U,	// <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+  2698593178U,	// <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+  3361057810U,	// <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+  3827303250U,	// <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+  2287313234U,	// <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+  3763709171U,	// <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+  3361058138U,	// <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+  2239759744U,	// <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+  2637906022U,	// <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+  2637906842U,	// <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+  3763709544U,	// <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+  1685275546U,	// <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+  2637909302U,	// <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+  3361063250U,	// <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+  3763709882U,	// <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+  3735541054U,	// <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+  1685644231U,	// <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+  2702575792U,	// <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+  3832759257U,	// <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+  3833349090U,	// <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+  3763710364U,	// <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+  2707884546U,	// <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+  3361071442U,	// <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+  3772336796U,	// <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+  3775654595U,	// <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+  2707884856U,	// <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+  2667782246U,	// <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+  2241463092U,	// <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+  2241553306U,	// <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+  3827303484U,	// <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+  2667785424U,	// <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+  2689969462U,	// <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+  3763711322U,	// <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+  3867116636U,	// <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+  2689969705U,	// <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+  1546273106U,	// <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+  1168556852U,	// <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+  1168556950U,	// <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+  2620016790U,	// <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+  1546276150U,	// <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+  2620018692U,	// <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+  2242299087U,	// <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+  2667795450U,	// <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+  1546278702U,	// <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+  3781628193U,	// <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+  3832759503U,	// <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+  3316261786U,	// <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+  3781628466U,	// <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+  3827303658U,	// <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+  3361096018U,	// <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+  3788264248U,	// <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+  3788264270U,	// <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+  3832759566U,	// <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+  2726466580U,	// <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+  3377692682U,	// <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+  3377694870U,	// <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+  3802199303U,	// <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+  2731775334U,	// <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+  3377693010U,	// <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+  3365749804U,	// <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+  3788265068U,	// <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+  2731775644U,	// <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+  1546297685U,	// <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+  1170547508U,	// <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+  1170547606U,	// <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+  1689257344U,	// <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+  1546300726U,	// <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+  2284716370U,	// <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+  2244289743U,	// <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+  2667820026U,	// <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+  1546303278U,	// <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+  3729621094U,	// <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+  3763716198U,	// <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+  2238858856U,	// <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+  2295930982U,	// <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+  3763716434U,	// <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+  2238859107U,	// <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+  2238859194U,	// <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+  3312601066U,	// <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+  2295930987U,	// <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+  3699769446U,	// <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+  3313255971U,	// <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+  3361056360U,	// <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+  2287312998U,	// <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+  3788932148U,	// <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+  3313256290U,	// <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+  3838289469U,	// <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+  3369682865U,	// <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+  2287313003U,	// <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+  3838658133U,	// <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+  3711722394U,	// <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+  2759018088U,	// <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+  2759018098U,	// <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+  3838658168U,	// <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+  3369027341U,	// <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+  2240227258U,	// <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+  3735614791U,	// <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+  2759018143U,	// <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+  2759018150U,	// <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+  3831948975U,	// <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+  3832759993U,	// <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+  2759018180U,	// <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+  2759018185U,	// <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+  3839542998U,	// <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+  3314640826U,	// <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+  2765948648U,	// <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+  2759018222U,	// <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+  3838658295U,	// <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+  3315205667U,	// <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+  2241463912U,	// <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+  1234829414U,	// <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+  2241464085U,	// <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+  2241546087U,	// <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+  2241464250U,	// <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+  3741602873U,	// <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+  1234829419U,	// <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+  2626060390U,	// <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+  2626061364U,	// <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+  1168557672U,	// <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+  1222230118U,	// <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+  2626063670U,	// <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+  2242299752U,	// <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+  1168558010U,	// <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+  2242299882U,	// <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+  1222230123U,	// <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+  3711754342U,	// <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+  3711755162U,	// <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+  3838658481U,	// <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+  2759018426U,	// <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+  3838658499U,	// <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+  3735646310U,	// <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+  3316590522U,	// <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+  3798889331U,	// <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+  2759018471U,	// <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+  3874564074U,	// <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+  3800880230U,	// <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+  3371722344U,	// <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+  2303950950U,	// <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+  3371722346U,	// <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+  3371722509U,	// <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+  3317237690U,	// <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+  3317237738U,	// <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+  2303950955U,	// <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+  2759018555U,	// <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+  2626085943U,	// <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+  1170548328U,	// <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+  1222254694U,	// <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+  2759018595U,	// <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+  2244290408U,	// <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+  1170548666U,	// <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+  2769266813U,	// <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+  1222254699U,	// <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+  2238859414U,	// <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+  2759018646U,	// <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+  3312314708U,	// <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+  2238859676U,	// <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+  2295931802U,	// <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+  3735670886U,	// <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+  3312315036U,	// <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+  3369674682U,	// <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+  2759018709U,	// <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+  3361055638U,	// <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+  3831949542U,	// <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+  2703917978U,	// <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+  3361056370U,	// <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+  2295939994U,	// <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+  3361056291U,	// <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+  3378972520U,	// <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+  3361056698U,	// <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+  2703917978U,	// <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+  3832760624U,	// <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+  3711796122U,	// <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+  3832760641U,	// <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+  2770962764U,	// <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+  2759018836U,	// <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+  3827304802U,	// <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+  3832760678U,	// <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+  3859597679U,	// <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+  2771331449U,	// <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+  2240841878U,	// <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+  3776997635U,	// <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+  2703919444U,	// <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+  2759018908U,	// <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+  2759018918U,	// <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+  3386951446U,	// <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+  3777661596U,	// <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+  3375007674U,	// <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+  2707901242U,	// <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+  2759018960U,	// <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+  2759018970U,	// <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+  2632099605U,	// <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+  2241464732U,	// <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+  2759019000U,	// <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+  2753563138U,	// <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+  3777662316U,	// <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+  2308573114U,	// <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+  2759019032U,	// <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+  1168558230U,	// <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+  2242300134U,	// <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+  2632107798U,	// <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+  1168558492U,	// <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+  1168558594U,	// <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+  2295973654U,	// <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+  2242300536U,	// <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+  2295973818U,	// <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+  1168558878U,	// <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+  3832760952U,	// <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+  3711828890U,	// <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+  3316484436U,	// <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+  3711830512U,	// <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+  2759019164U,	// <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+  3361097251U,	// <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+  3316624045U,	// <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+  2773912244U,	// <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+  2759019164U,	// <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+  3377693590U,	// <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+  3365751680U,	// <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+  2727810232U,	// <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+  3377694322U,	// <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+  2303951770U,	// <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+  3741700198U,	// <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+  3377695216U,	// <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+  3375703994U,	// <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+  2731792030U,	// <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+  1170548886U,	// <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+  2759019294U,	// <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+  2632132377U,	// <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+  1170549148U,	// <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+  1170549250U,	// <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+  2759019334U,	// <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+  2244291192U,	// <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+  2295998394U,	// <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+  1170549534U,	// <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+  1165118354U,	// <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+  1637482598U,	// <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+  3711854285U,	// <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+  3827305344U,	// <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+  2711224658U,	// <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+  1165118774U,	// <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+  3312602489U,	// <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+  3369675420U,	// <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+  1165119017U,	// <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+  3369682633U,	// <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+  2287313581U,	// <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+  2759019466U,	// <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+  3369683284U,	// <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+  2311204048U,	// <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+  2239319350U,	// <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+  3784967411U,	// <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+  3369683612U,	// <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+  2763000832U,	// <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+  3711869030U,	// <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+  3711869850U,	// <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+  2240203830U,	// <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+  2698618573U,	// <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+  2711226133U,	// <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+  2240204086U,	// <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+  2711226298U,	// <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+  3832761416U,	// <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+  2701936738U,	// <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+  2711226518U,	// <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+  3777005828U,	// <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+  3832761453U,	// <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+  2301266260U,	// <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+  2705254903U,	// <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+  2240843062U,	// <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+  3832761489U,	// <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+  3375008412U,	// <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+  2301266260U,	// <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+  1570373734U,	// <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+  2308574089U,	// <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+  2644117096U,	// <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+  2638146039U,	// <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+  229035318U,	// <4,4,4,4>: Cost 1 vspltisw0 RHS
+  1167723830U,	// <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+  2644120058U,	// <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+  2662036827U,	// <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+  229035318U,	// <4,4,4,u>: Cost 1 vspltisw0 RHS
+  1168558994U,	// <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+  2638152602U,	// <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+  2242300981U,	// <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+  2638154232U,	// <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+  1168559322U,	// <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+  94817590U,	// <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+  1685278006U,	// <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  2242309576U,	// <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+  94817833U,	// <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+  3316591506U,	// <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+  3758428587U,	// <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+  2711228922U,	// <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+  3796251185U,	// <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+  2711229085U,	// <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+  2242850102U,	// <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+  2242850169U,	// <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+  2725163893U,	// <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+  2242850345U,	// <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+  2711229434U,	// <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+  3377694410U,	// <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+  3868593584U,	// <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+  3377695060U,	// <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+  2729145691U,	// <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+  2243497270U,	// <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+  3871542744U,	// <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+  2303953564U,	// <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+  2243497513U,	// <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+  1170549650U,	// <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+  1637488430U,	// <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+  2244291637U,	// <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+  2638178811U,	// <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+  229035318U,	// <4,4,u,4>: Cost 1 vspltisw0 RHS
+  96808246U,	// <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+  1685278249U,	// <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  2244292040U,	// <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+  96808489U,	// <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+  2698625024U,	// <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+  1624883302U,	// <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+  2638186190U,	// <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+  2638187004U,	// <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+  2687345005U,	// <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+  2238861316U,	// <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+  2662077302U,	// <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+  2662077792U,	// <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+  1624883869U,	// <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+  3361057762U,	// <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+  2691326803U,	// <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+  2698625942U,	// <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+  3361055659U,	// <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+  3761087567U,	// <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+  2693981335U,	// <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+  2305231362U,	// <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+  3361055987U,	// <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+  2695972234U,	// <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+  2638200934U,	// <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+  3761088035U,	// <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+  2697963133U,	// <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+  1624884942U,	// <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+  2698626838U,	// <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+  3772368744U,	// <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+  2698627002U,	// <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+  3775023122U,	// <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+  1628203107U,	// <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+  2698627222U,	// <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+  3765070057U,	// <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+  2698627404U,	// <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+  2698627484U,	// <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+  2698627580U,	// <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+  3779668553U,	// <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+  2725169844U,	// <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+  2707253995U,	// <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+  2698627870U,	// <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+  2638217318U,	// <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+  2308574098U,	// <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+  2698628150U,	// <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+  2638219776U,	// <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+  2698628314U,	// <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+  1624886582U,	// <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+  2698628478U,	// <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+  2662110564U,	// <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+  1624886825U,	// <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+  1570455654U,	// <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+  2312564250U,	// <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+  2644199118U,	// <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+  2295974966U,	// <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+  1570458842U,	// <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+  1168568324U,	// <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+  1168568418U,	// <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+  2295975294U,	// <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+  1168716036U,	// <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+  1564491878U,	// <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+  2626290768U,	// <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+  2632263465U,	// <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+  1564494338U,	// <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+  1564495158U,	// <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+  2638237464U,	// <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+  2656154253U,	// <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+  27705344U,	// <4,5,6,7>: Cost 0 copy RHS
+  27705344U,	// <4,5,6,u>: Cost 0 copy RHS
+  2725172218U,	// <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+  3859599489U,	// <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+  2698630320U,	// <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+  2728490251U,	// <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+  2725172576U,	// <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+  3317239812U,	// <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+  2725172760U,	// <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+  2725172844U,	// <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+  2725172866U,	// <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+  1564508262U,	// <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+  1624889134U,	// <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+  2698631045U,	// <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+  1564510724U,	// <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+  1564511542U,	// <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+  1624889498U,	// <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+  1170550882U,	// <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+  27705344U,	// <4,5,u,7>: Cost 0 copy RHS
+  27705344U,	// <4,5,u,u>: Cost 0 copy RHS
+  3312595285U,	// <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+  3763748966U,	// <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+  2238861818U,	// <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+  3767730432U,	// <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+  3763749202U,	// <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+  2238862059U,	// <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+  2238862136U,	// <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+  2295934262U,	// <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+  2295934263U,	// <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+  3378973999U,	// <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+  3378974648U,	// <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+  3779675034U,	// <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+  3378974002U,	// <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+  3378974003U,	// <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+  3767731352U,	// <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+  3378974734U,	// <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+  2287316278U,	// <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+  2287316279U,	// <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+  3735904358U,	// <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+  3763750435U,	// <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+  3313938937U,	// <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+  3772376782U,	// <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+  3852890591U,	// <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+  3735908454U,	// <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+  3801573306U,	// <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+  2785858042U,	// <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+  2785858051U,	// <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+  3863065101U,	// <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+  3314586024U,	// <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+  3863212575U,	// <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+  3863286312U,	// <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+  3767732738U,	// <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+  3779676746U,	// <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+  3398898488U,	// <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+  2301267254U,	// <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+  2301267255U,	// <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+  3852890715U,	// <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+  3315208615U,	// <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+  2241466874U,	// <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+  3852890745U,	// <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+  2241467037U,	// <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+  2241549039U,	// <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+  2241467192U,	// <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+  1234832694U,	// <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+  1234832695U,	// <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+  2242302241U,	// <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+  2242310567U,	// <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+  1168568826U,	// <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+  2242302514U,	// <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+  2242302605U,	// <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+  2242310891U,	// <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+  1168569144U,	// <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+  1222233398U,	// <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+  1222233399U,	// <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+  3316576545U,	// <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+  3316584871U,	// <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+  2242851322U,	// <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+  3316601394U,	// <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+  3852890916U,	// <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+  3316617963U,	// <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+  2242884408U,	// <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+  2785858370U,	// <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+  2785858379U,	// <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+  2785858382U,	// <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+  3859600215U,	// <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+  3317240314U,	// <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+  2792199020U,	// <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+  2785858422U,	// <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+  3856651132U,	// <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+  3317240632U,	// <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+  2303954230U,	// <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+  2303954231U,	// <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+  2244292897U,	// <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+  2244293031U,	// <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+  1170551290U,	// <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+  2244293170U,	// <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+  2244293261U,	// <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+  2244293355U,	// <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+  1170551608U,	// <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+  1222257974U,	// <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+  1222257975U,	// <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+  2238862330U,	// <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+  2706604134U,	// <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+  3312604308U,	// <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+  3768402176U,	// <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+  2238862648U,	// <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+  3859600418U,	// <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+  3729994393U,	// <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+  2238862956U,	// <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+  2706604701U,	// <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+  3385610338U,	// <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+  3780346676U,	// <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+  2706604954U,	// <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+  3385610746U,	// <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+  3385610342U,	// <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+  3385610667U,	// <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+  3768403178U,	// <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+  3385611074U,	// <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+  2706604954U,	// <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+  3859600532U,	// <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+  3712091034U,	// <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+  3774375528U,	// <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+  2794853552U,	// <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+  2785858744U,	// <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+  3735982182U,	// <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+  3774375875U,	// <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+  3735983476U,	// <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+  2795222237U,	// <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+  3780348054U,	// <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+  3730015130U,	// <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+  3780348244U,	// <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+  3778357673U,	// <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+  2325155942U,	// <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+  3779684939U,	// <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+  2706606748U,	// <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+  3398898498U,	// <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+  2707934014U,	// <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+  2785858868U,	// <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+  3780348874U,	// <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+  3780349000U,	// <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+  2308575738U,	// <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+  2656283856U,	// <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+  2706607414U,	// <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+  2656285341U,	// <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+  2241468012U,	// <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+  2706607657U,	// <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+  1168569338U,	// <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+  2242311242U,	// <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+  2242303178U,	// <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+  2242311395U,	// <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+  1168569702U,	// <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+  2242311606U,	// <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+  2242311662U,	// <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+  1168569964U,	// <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+  1168569986U,	// <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+  3316593658U,	// <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+  3316593738U,	// <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+  3316634800U,	// <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+  3386978810U,	// <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+  2785859072U,	// <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+  3736014950U,	// <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+  3316594158U,	// <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+  2797803032U,	// <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+  2797876769U,	// <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+  2243499002U,	// <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+  3718103962U,	// <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+  3317257418U,	// <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+  3377695816U,	// <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+  2243532134U,	// <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+  3317282230U,	// <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+  2730497536U,	// <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+  2243556972U,	// <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+  2243565186U,	// <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+  1170551802U,	// <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+  2706609966U,	// <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+  2244293797U,	// <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+  2244293859U,	// <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+  1170552166U,	// <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+  2706610330U,	// <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+  2244294126U,	// <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+  1170552428U,	// <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+  1170552450U,	// <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+  1165118354U,	// <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+  1624907878U,	// <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+  2638407377U,	// <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+  2295931036U,	// <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+  2687369584U,	// <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+  1165121690U,	// <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+  2662298489U,	// <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+  2295934280U,	// <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+  1624908445U,	// <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+  2638413926U,	// <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+  2691351382U,	// <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+  1685280558U,	// <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2287313052U,	// <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+  2299257799U,	// <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+  2694005914U,	// <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+  2305231362U,	// <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+  2287316296U,	// <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+  1685280612U,	// <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2638422118U,	// <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+  2240206638U,	// <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+  2697987712U,	// <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+  1624909521U,	// <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+  2759391121U,	// <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+  2240207002U,	// <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+  2698651578U,	// <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+  2785859500U,	// <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+  1628227686U,	// <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+  2759022524U,	// <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+  2801342408U,	// <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+  2703960409U,	// <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+  2759022554U,	// <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+  2759022564U,	// <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+  2240845978U,	// <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+  2706614941U,	// <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+  2301267272U,	// <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+  2759022596U,	// <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+  1570668646U,	// <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+  1167726382U,	// <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+  2698652753U,	// <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+  1234829468U,	// <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+  229035318U,	// <4,u,4,4>: Cost 1 vspltisw0 RHS
+  1624911158U,	// <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+  2698653081U,	// <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+  1234832712U,	// <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+  229035318U,	// <4,u,4,u>: Cost 1 vspltisw0 RHS
+  1168561875U,	// <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+  94820142U,	// <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+  1168562053U,	// <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+  1222230172U,	// <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+  1168562239U,	// <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+  94820506U,	// <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+  1685280922U,	// <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  1222233416U,	// <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+  94820709U,	// <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+  1564713062U,	// <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+  2626511979U,	// <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+  2632484676U,	// <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+  1564715549U,	// <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+  1564716342U,	// <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+  2242853018U,	// <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+  2656375464U,	// <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+  27705344U,	// <4,u,6,7>: Cost 0 copy RHS
+  27705344U,	// <4,u,6,u>: Cost 0 copy RHS
+  2785859840U,	// <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+  2243499822U,	// <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+  2727851197U,	// <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+  2303951004U,	// <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+  2785859880U,	// <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+  2243500186U,	// <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+  2730505729U,	// <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+  2303954248U,	// <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+  2303951009U,	// <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+  1564729446U,	// <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+  96810798U,	// <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+  1685281125U,	// <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  1222254748U,	// <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+  229035318U,	// <4,u,u,4>: Cost 1 vspltisw0 RHS
+  96811162U,	// <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+  1685281165U,	// <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  27705344U,	// <4,u,u,7>: Cost 0 copy RHS
+  27705344U,	// <4,u,u,u>: Cost 0 copy RHS
+  2754232320U,	// <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+  2754232330U,	// <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+  3718194894U,	// <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+  3376385762U,	// <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+  2754232357U,	// <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+  3845816370U,	// <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+  3782353389U,	// <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+  3376386090U,	// <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+  2757402697U,	// <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+  2626543718U,	// <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+  2626544751U,	// <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+  1680490598U,	// <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  3766428665U,	// <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+  2626546998U,	// <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+  2650435539U,	// <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+  3783017715U,	// <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+  3385019000U,	// <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+  1680490652U,	// <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  3376398336U,	// <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+  2245877862U,	// <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+  3773064808U,	// <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+  2705295054U,	// <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+  3827974343U,	// <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+  3845816530U,	// <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+  3779037114U,	// <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+  3810887658U,	// <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+  2245878429U,	// <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+  2710603926U,	// <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+  3827974396U,	// <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+  3779037516U,	// <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+  3779037596U,	// <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+  2705295868U,	// <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+  3379726804U,	// <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+  3802925748U,	// <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+  3363138168U,	// <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+  2707950400U,	// <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+  2626568294U,	// <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+  1680490834U,	// <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+  3828048219U,	// <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+  2710604932U,	// <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+  2754232685U,	// <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+  2705296694U,	// <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+  3779038590U,	// <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+  2713259464U,	// <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+  1680490834U,	// <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+  2311307264U,	// <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+  1174437990U,	// <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+  3779038946U,	// <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+  3845816752U,	// <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+  2248180050U,	// <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+  2248180194U,	// <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+  3779039274U,	// <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+  3385051768U,	// <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+  1174438557U,	// <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+  2302689280U,	// <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+  1175208038U,	// <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+  3787002362U,	// <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+  3376432160U,	// <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+  2248950098U,	// <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+  2248950180U,	// <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+  3376433702U,	// <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+  2729186166U,	// <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+  1175208605U,	// <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+  2713261050U,	// <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+  3365823599U,	// <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+  3808900317U,	// <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+  3784348899U,	// <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+  2729186656U,	// <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+  3787003268U,	// <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+  3802928664U,	// <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+  3787003431U,	// <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+  2731841188U,	// <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+  2626601062U,	// <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+  1683145366U,	// <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+  1680491165U,	// <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  2705295054U,	// <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+  2754233005U,	// <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+  2705299610U,	// <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+  3779041488U,	// <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+  2737150252U,	// <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+  1680491219U,	// <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  2713927680U,	// <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+  1640185958U,	// <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  2310607866U,	// <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+  3787669756U,	// <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+  2713928018U,	// <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+  2306621778U,	// <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+  3787670006U,	// <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+  3736188301U,	// <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+  1640186525U,	// <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  2650505318U,	// <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+  2754233140U,	// <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+  2311276694U,	// <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+  2311278315U,	// <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+  2758435667U,	// <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+  2754233180U,	// <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+  3385016497U,	// <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+  2311278643U,	// <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+  2758730615U,	// <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+  3700367462U,	// <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+  3830629255U,	// <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+  2713929320U,	// <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+  2754233238U,	// <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+  2759099300U,	// <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+  2754233259U,	// <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+  2713929658U,	// <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+  3872359354U,	// <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+  2754233283U,	// <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+  2713929878U,	// <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+  3363135498U,	// <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+  3363137686U,	// <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+  2713930140U,	// <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+  2713930242U,	// <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+  2289394002U,	// <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+  3787672184U,	// <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+  3787672259U,	// <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+  2713930526U,	// <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+  1634880402U,	// <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+  2760205355U,	// <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+  2760279092U,	// <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+  3787672708U,	// <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+  2713930960U,	// <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+  1640189238U,	// <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+  3786345848U,	// <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+  3787009481U,	// <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+  1640189466U,	// <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+  2754233455U,	// <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+  2713931407U,	// <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+  2713931499U,	// <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+  3827975305U,	// <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+  2754233495U,	// <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+  2288746834U,	// <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+  2713931827U,	// <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+  3787673725U,	// <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+  2754233527U,	// <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+  2668462182U,	// <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+  2290746002U,	// <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+  2302691478U,	// <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+  3364488071U,	// <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+  2302689536U,	// <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+  2754233587U,	// <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+  2713932600U,	// <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+  2713932622U,	// <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+  2302689297U,	// <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+  2713932794U,	// <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+  3365822474U,	// <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+  3365824662U,	// <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+  3787674851U,	// <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+  2713933158U,	// <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+  2292080978U,	// <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+  3365823613U,	// <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+  2713933420U,	// <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+  2713933442U,	// <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+  1658771190U,	// <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+  1640191790U,	// <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  2762933624U,	// <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+  2754233724U,	// <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+  2763081098U,	// <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+  1640192154U,	// <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+  2713934032U,	// <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+  2713934080U,	// <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+  1640192357U,	// <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  3779051520U,	// <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+  2705309798U,	// <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+  3838813637U,	// <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+  2302640230U,	// <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+  3765117266U,	// <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+  3381027892U,	// <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+  3842794985U,	// <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+  3408232554U,	// <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+  2302640235U,	// <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+  3700432998U,	// <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+  3765117785U,	// <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+  2311276136U,	// <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+  1237532774U,	// <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+  3700436278U,	// <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+  3381036084U,	// <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+  3385018045U,	// <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+  3385017560U,	// <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+  1237532779U,	// <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+  3700441190U,	// <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+  3700442242U,	// <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+  2754233960U,	// <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+  2754233970U,	// <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+  2765071997U,	// <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+  3834021508U,	// <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+  3842795152U,	// <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+  3376402492U,	// <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+  2754234015U,	// <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+  2754234022U,	// <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+  3827975855U,	// <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+  2644625102U,	// <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+  2289393766U,	// <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+  1691993806U,	// <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+  2785052375U,	// <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+  3854812897U,	// <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+  3802942187U,	// <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+  1692288754U,	// <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+  3839846139U,	// <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+  2709294052U,	// <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+  2766251789U,	// <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+  2765735702U,	// <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+  3840141087U,	// <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+  2705313078U,	// <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+  2712612217U,	// <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+  3787017674U,	// <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+  2765735747U,	// <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+  3834021704U,	// <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+  3834021714U,	// <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+  2311308904U,	// <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+  1237565542U,	// <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+  3834021744U,	// <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+  3369124916U,	// <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+  2248181690U,	// <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+  3786354825U,	// <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+  1237565547U,	// <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+  3700473958U,	// <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+  3700475014U,	// <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+  2296718952U,	// <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+  1228947558U,	// <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+  3700477238U,	// <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+  3834021836U,	// <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+  2248951738U,	// <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+  3370461105U,	// <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+  1228947563U,	// <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+  3786355706U,	// <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+  3783038037U,	// <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+  3365824104U,	// <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+  2292080742U,	// <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+  3842131986U,	// <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+  3371795508U,	// <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+  3786356206U,	// <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+  3786356332U,	// <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+  2292080747U,	// <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+  2754234427U,	// <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+  2705315630U,	// <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+  2296735336U,	// <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+  1228963942U,	// <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+  1695311971U,	// <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+  2705315994U,	// <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+  2769201269U,	// <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+  3370477489U,	// <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+  1695606919U,	// <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+  3827976331U,	// <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+  2754234518U,	// <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+  3706472290U,	// <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+  3700500630U,	// <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+  2754234544U,	// <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+  3376383766U,	// <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+  3769770513U,	// <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+  3376383930U,	// <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+  2754234581U,	// <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+  2311275414U,	// <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+  2305967971U,	// <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+  2692047787U,	// <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+  2311276146U,	// <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+  2311275418U,	// <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+  3765789807U,	// <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+  3765789939U,	// <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+  2311276474U,	// <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+  2696029585U,	// <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+  2311288709U,	// <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+  3765790243U,	// <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+  3827976513U,	// <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+  2765736268U,	// <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+  2246248962U,	// <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+  3765790563U,	// <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+  3827976550U,	// <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+  3842795887U,	// <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+  2769054073U,	// <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+  3827976575U,	// <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+  3765790963U,	// <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+  3839478162U,	// <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+  2754234780U,	// <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+  2771708327U,	// <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+  3363137059U,	// <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+  3375081320U,	// <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+  3363137466U,	// <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+  2772003275U,	// <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+  2772077012U,	// <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+  3765791714U,	// <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+  2709965878U,	// <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+  2772298223U,	// <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+  2772371960U,	// <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+  2754234882U,	// <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+  3839478282U,	// <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+  3376416698U,	// <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+  2754234909U,	// <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+  2311308182U,	// <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+  3765792421U,	// <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+  2715938575U,	// <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+  2311308914U,	// <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+  2311308186U,	// <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+  2248182354U,	// <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+  3765792837U,	// <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+  2311309242U,	// <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+  2311308190U,	// <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+  2632777830U,	// <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+  3706520372U,	// <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+  2632779624U,	// <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+  2632780290U,	// <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+  2632781110U,	// <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+  2248952413U,	// <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+  2302691176U,	// <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+  2302691258U,	// <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+  2632783662U,	// <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+  3365823382U,	// <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+  3706529011U,	// <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+  3706529641U,	// <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+  3365824114U,	// <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+  2774362859U,	// <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+  3365824035U,	// <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+  3383740183U,	// <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+  3363833786U,	// <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+  2774657807U,	// <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+  2632794214U,	// <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+  2754235166U,	// <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+  2632796010U,	// <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+  2632796676U,	// <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+  2632797494U,	// <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+  2754235206U,	// <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+  2302691176U,	// <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+  2302707642U,	// <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+  2754235229U,	// <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+  3765133325U,	// <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+  2705326182U,	// <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+  3718489806U,	// <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+  3718490624U,	// <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+  2709307730U,	// <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+  2302641870U,	// <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+  3376383695U,	// <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+  3384351018U,	// <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+  2705326749U,	// <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+  2305971057U,	// <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+  3765134171U,	// <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+  3766461338U,	// <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+  3766461437U,	// <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+  2311277776U,	// <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+  2754235362U,	// <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+  3783050483U,	// <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+  3385019036U,	// <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+  2311276241U,	// <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+  3718504550U,	// <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+  3783050787U,	// <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+  3773097576U,	// <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+  2705327822U,	// <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+  3773097767U,	// <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+  2765737014U,	// <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+  3779069882U,	// <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+  3376401052U,	// <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+  2245881370U,	// <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+  3779070102U,	// <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+  3363135525U,	// <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+  3779070284U,	// <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+  3779070364U,	// <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+  2705328640U,	// <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+  2307311310U,	// <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+  3866021012U,	// <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+  3363138204U,	// <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+  2707983172U,	// <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+  2708646805U,	// <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+  2709310438U,	// <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+  3779071030U,	// <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+  2710637704U,	// <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+  2754235600U,	// <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+  1704676570U,	// <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+  3779071358U,	// <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+  2713292236U,	// <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+  1704897781U,	// <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+  2626871398U,	// <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+  2626872471U,	// <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+  2765737230U,	// <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+  3700615318U,	// <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+  2626874678U,	// <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+  1174441270U,	// <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+  1680493878U,	// <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  3385051804U,	// <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+  1680493896U,	// <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  2248952722U,	// <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+  2302692152U,	// <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+  3382406107U,	// <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+  3700623874U,	// <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+  2248953040U,	// <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+  1175211318U,	// <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+  3376432280U,	// <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+  2729218934U,	// <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+  1175211561U,	// <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+  3787035642U,	// <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+  3365822501U,	// <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+  3808933085U,	// <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+  3784381707U,	// <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+  2713294182U,	// <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+  2309998286U,	// <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+  3383740111U,	// <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+  3787036239U,	// <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+  2731873960U,	// <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+  2626895974U,	// <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+  2626897050U,	// <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+  2644813518U,	// <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+  2705327822U,	// <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+  2626899254U,	// <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+  1707331102U,	// <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+  1680494121U,	// <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  2737183024U,	// <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+  1680494139U,	// <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  2302642684U,	// <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+  1640218726U,	// <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+  3376384510U,	// <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+  3376385078U,	// <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+  2754236002U,	// <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+  2717942242U,	// <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+  2244907106U,	// <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+  3376385406U,	// <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+  1640219293U,	// <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+  2305969365U,	// <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+  1237536282U,	// <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  2713961366U,	// <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+  3766469630U,	// <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+  2782326455U,	// <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+  2311277786U,	// <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+  2311277058U,	// <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+  3385017587U,	// <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+  1237536282U,	// <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  3376400892U,	// <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+  3827977963U,	// <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+  2302659070U,	// <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+  2765737726U,	// <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+  3839479558U,	// <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+  2781073167U,	// <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+  2713962426U,	// <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+  3376401790U,	// <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+  2769055531U,	// <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+  2713962646U,	// <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+  3765143786U,	// <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+  3839479621U,	// <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+  2289394603U,	// <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+  2713963010U,	// <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+  2313285150U,	// <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+  3363138050U,	// <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+  3363136755U,	// <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+  2713963294U,	// <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+  2713963410U,	// <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+  3827978127U,	// <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+  3839479704U,	// <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+  3376417846U,	// <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+  1637567706U,	// <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+  1640222006U,	// <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+  2310640998U,	// <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+  3376418174U,	// <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+  1640222238U,	// <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+  1577091174U,	// <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+  2311310226U,	// <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+  2713964303U,	// <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+  2311311119U,	// <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+  1577094454U,	// <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+  296144182U,	// <5,5,5,5>: Cost 1 vspltisw1 RHS
+  2311309826U,	// <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+  2311311447U,	// <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+  296144182U,	// <5,5,5,u>: Cost 1 vspltisw1 RHS
+  2248953460U,	// <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+  2326580114U,	// <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+  2713965050U,	// <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+  3700697602U,	// <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+  2785644620U,	// <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+  2781073495U,	// <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+  1228950018U,	// <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  2713965390U,	// <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+  1228950018U,	// <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  2713965562U,	// <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+  3383741330U,	// <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+  3718620878U,	// <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+  3365823403U,	// <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+  2713965926U,	// <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+  2717947318U,	// <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+  3365825026U,	// <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+  2292081907U,	// <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+  2713966210U,	// <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+  1577091174U,	// <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+  1640224558U,	// <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+  2713966469U,	// <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+  2713966524U,	// <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+  1577094454U,	// <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+  296144182U,	// <5,5,u,5>: Cost 1 vspltisw1 RHS
+  1228950018U,	// <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  2713966848U,	// <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+  296144182U,	// <5,5,u,u>: Cost 1 vspltisw1 RHS
+  2705342464U,	// <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+  1631600742U,	// <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+  3773112493U,	// <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+  2705342720U,	// <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+  2705342802U,	// <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+  3779084708U,	// <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+  3779084790U,	// <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+  2302643510U,	// <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+  1631601309U,	// <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+  3767141092U,	// <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+  2705343284U,	// <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+  2705343382U,	// <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+  3779085282U,	// <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+  2693399632U,	// <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+  3767805089U,	// <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+  2311279416U,	// <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+  1237536054U,	// <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+  1237536055U,	// <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+  3773113789U,	// <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+  3779085855U,	// <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+  2699372136U,	// <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+  2705344166U,	// <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+  2699372329U,	// <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+  2705344360U,	// <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+  2705344442U,	// <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+  2302659894U,	// <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+  2702026861U,	// <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+  2705344662U,	// <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+  3767142661U,	// <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+  3773114689U,	// <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+  2705344924U,	// <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+  1631603202U,	// <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+  3842945597U,	// <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+  3779086962U,	// <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+  2289397046U,	// <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+  1634257734U,	// <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+  2644926566U,	// <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+  3779087306U,	// <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+  2790142577U,	// <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+  2644929026U,	// <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+  2711317723U,	// <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+  1631604022U,	// <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+  2712644989U,	// <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+  2302676278U,	// <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+  1631604265U,	// <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+  3842945708U,	// <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+  3767144133U,	// <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+  2705346328U,	// <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+  3779088207U,	// <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+  2717290420U,	// <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+  2705346574U,	// <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+  2705346596U,	// <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+  1237568822U,	// <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+  1237568823U,	// <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+  2650914918U,	// <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+  3364490949U,	// <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+  2248954362U,	// <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+  2302693144U,	// <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+  2650918198U,	// <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+  2650918926U,	// <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+  2302693390U,	// <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+  1228950838U,	// <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+  1228950839U,	// <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+  497467494U,	// <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1571210036U,	// <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+  1571210856U,	// <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1571211414U,	// <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  497470774U,	// <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1571213316U,	// <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+  1571213818U,	// <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+  1571214956U,	// <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+  497473326U,	// <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+  497475686U,	// <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+  1631606574U,	// <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+  1571219048U,	// <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1571219606U,	// <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  497478967U,	// <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+  1631606938U,	// <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+  1571222010U,	// <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+  1228967222U,	// <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+  497481518U,	// <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+  3768475648U,	// <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+  2694733926U,	// <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  3718711395U,	// <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+  3384349178U,	// <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+  2694734162U,	// <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+  3384347884U,	// <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+  3730658026U,	// <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+  3718714362U,	// <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+  2694734493U,	// <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  2311278690U,	// <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+  2305970923U,	// <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+  3768476566U,	// <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+  2311279098U,	// <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+  2311278694U,	// <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+  3768476783U,	// <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+  2694735091U,	// <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+  2311279426U,	// <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+  2696062357U,	// <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+  3383701602U,	// <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+  3768477219U,	// <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+  3768477288U,	// <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+  2309960186U,	// <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+  3383701606U,	// <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+  3768477545U,	// <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+  3766486970U,	// <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+  3383702338U,	// <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+  2309960186U,	// <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+  3768477846U,	// <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+  3768477975U,	// <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+  3786393932U,	// <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+  3768478108U,	// <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+  2795599115U,	// <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+  3385037470U,	// <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+  3780422309U,	// <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+  3848107301U,	// <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+  2795894063U,	// <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+  2795967800U,	// <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+  3768478690U,	// <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+  3718744163U,	// <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+  3784404107U,	// <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+  2796262748U,	// <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+  2694737206U,	// <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+  2712653182U,	// <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+  2713316815U,	// <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+  2694737449U,	// <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+  2311311458U,	// <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+  3768479433U,	// <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+  3768479521U,	// <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+  2311311866U,	// <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+  2311311462U,	// <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+  2248185270U,	// <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+  2718625879U,	// <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+  2311312194U,	// <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+  2311311466U,	// <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+  2248954874U,	// <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+  3322696778U,	// <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+  2248955028U,	// <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+  2656963074U,	// <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+  2248955238U,	// <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+  2248955329U,	// <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+  2656965360U,	// <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+  2248955500U,	// <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+  2248955522U,	// <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+  3718766694U,	// <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+  3724739827U,	// <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+  3718768739U,	// <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+  3365826337U,	// <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+  2798253647U,	// <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+  3365826258U,	// <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+  3730715377U,	// <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+  2310665836U,	// <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+  2798548595U,	// <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+  2311336034U,	// <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+  2694739758U,	// <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  2248955028U,	// <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+  2311336442U,	// <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+  2311336038U,	// <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+  2694740122U,	// <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+  2656981746U,	// <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+  2311336770U,	// <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+  2694740325U,	// <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  2705358848U,	// <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+  1631617126U,	// <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+  2310607866U,	// <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+  2302640284U,	// <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+  2754238189U,	// <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+  2305296114U,	// <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+  2244907106U,	// <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+  2302643528U,	// <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+  1631617693U,	// <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+  2627133542U,	// <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+  1237536282U,	// <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  1680496430U,	// <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  1237532828U,	// <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+  2693416018U,	// <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+  2756892486U,	// <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+  2694743284U,	// <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+  1237536072U,	// <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+  1680496484U,	// <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  2311288709U,	// <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+  2245883694U,	// <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+  2699388520U,	// <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+  2754238344U,	// <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+  2699388715U,	// <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+  2757408666U,	// <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+  2705360826U,	// <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+  2302659912U,	// <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+  2754238389U,	// <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+  2754238396U,	// <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+  3827980229U,	// <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+  2644625102U,	// <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+  2289393820U,	// <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+  1631619588U,	// <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+  2785056749U,	// <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+  3363138077U,	// <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+  2289397064U,	// <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+  1634274120U,	// <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+  1634937753U,	// <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+  1728272410U,	// <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+  2710006843U,	// <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+  2765740076U,	// <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+  1637592285U,	// <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+  1631620406U,	// <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+  2712661375U,	// <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+  2302676296U,	// <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+  1631620649U,	// <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+  1577091174U,	// <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+  1174443822U,	// <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+  2766035058U,	// <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+  1237565596U,	// <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+  1577094454U,	// <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+  296144182U,	// <5,u,5,5>: Cost 1 vspltisw1 RHS
+  1680496794U,	// <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  1237568840U,	// <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+  296144182U,	// <5,u,5,u>: Cost 1 vspltisw1 RHS
+  2633146470U,	// <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+  1175213870U,	// <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+  2633148309U,	// <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+  1228947612U,	// <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+  2633149750U,	// <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+  1175214234U,	// <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+  1228950018U,	// <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  1228950856U,	// <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+  1228947617U,	// <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+  497614950U,	// <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1571357492U,	// <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+  1571358312U,	// <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1571358870U,	// <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  497618248U,	// <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1571360772U,	// <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+  1571361274U,	// <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+  1571361786U,	// <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+  497620782U,	// <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+  497623142U,	// <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+  1631622958U,	// <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+  1680496997U,	// <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  1228963996U,	// <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+  497626441U,	// <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+  296144182U,	// <5,u,u,5>: Cost 1 vspltisw1 RHS
+  1680497037U,	// <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  1228967240U,	// <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+  497628974U,	// <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+  2772451328U,	// <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+  2772451338U,	// <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+  3771146417U,	// <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+  3383095739U,	// <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+  3846193189U,	// <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+  3724832803U,	// <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+  3383095985U,	// <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+  3383096067U,	// <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+  2772451401U,	// <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+  2651095142U,	// <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+  2251612262U,	// <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+  1698709606U,	// <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2651097602U,	// <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+  2651098422U,	// <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+  2651099172U,	// <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+  2657071869U,	// <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+  3724841978U,	// <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+  1698709660U,	// <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2252292096U,	// <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+  1178550374U,	// <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+  3826655418U,	// <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+  3777783485U,	// <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+  2252292434U,	// <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+  3785746280U,	// <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+  2252292593U,	// <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+  3736794583U,	// <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+  1178550941U,	// <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+  3375153152U,	// <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+  2772451584U,	// <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+  3777784163U,	// <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+  3846193426U,	// <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+  2712005122U,	// <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+  3724857382U,	// <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+  3802335864U,	// <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+  3801672410U,	// <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+  2772451647U,	// <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+  3383123968U,	// <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+  2772451666U,	// <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+  3773803577U,	// <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+  3724864002U,	// <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+  3846193517U,	// <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+  2712005935U,	// <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+  3327009265U,	// <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+  3383126648U,	// <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+  2772451729U,	// <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+  3373178880U,	// <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+  2254266470U,	// <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+  3785748248U,	// <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+  3790393190U,	// <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+  3328000338U,	// <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+  3785748494U,	// <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+  3785748516U,	// <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+  3379153528U,	// <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+  2254267037U,	// <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+  2254897152U,	// <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+  1181155430U,	// <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+  3785748923U,	// <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+  3785749042U,	// <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+  2254897490U,	// <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+  3785749169U,	// <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+  2724614962U,	// <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+  3787739982U,	// <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+  1181155997U,	// <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+  1235664896U,	// <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+  1235666598U,	// <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+  3712943720U,	// <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+  2639202936U,	// <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+  2639203638U,	// <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+  2309409236U,	// <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+  3712946517U,	// <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+  2309409400U,	// <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  1235666605U,	// <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+  1235673088U,	// <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+  1235674790U,	// <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+  1698710173U,	// <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2639211129U,	// <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+  2639211830U,	// <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+  2712008858U,	// <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+  2657129220U,	// <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+  2309417592U,	// <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  1698710227U,	// <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  3775799296U,	// <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+  2702057574U,	// <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+  3373143763U,	// <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+  3695045122U,	// <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+  3775799634U,	// <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+  3383091538U,	// <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+  3368493233U,	// <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+  3362522319U,	// <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+  2702058141U,	// <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+  3834250027U,	// <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+  2772452148U,	// <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+  3832038210U,	// <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+  3373150660U,	// <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+  3834250067U,	// <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+  3373146450U,	// <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+  3826656102U,	// <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+  3362530511U,	// <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+  2772452148U,	// <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+  2669092966U,	// <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+  2252292916U,	// <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+  2252293014U,	// <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+  2772452246U,	// <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+  2669096246U,	// <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+  3846194091U,	// <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+  2702059450U,	// <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+  3870081978U,	// <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+  2702059633U,	// <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+  3775801494U,	// <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+  3777128723U,	// <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+  3775801702U,	// <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+  3775801756U,	// <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+  3775801858U,	// <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+  3375153490U,	// <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+  3826656265U,	// <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+  3775802051U,	// <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+  3775802142U,	// <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+  3846194206U,	// <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+  3846194219U,	// <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+  3846194228U,	// <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+  3846194236U,	// <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+  3846194246U,	// <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+  2760508496U,	// <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+  3368526001U,	// <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+  3870082144U,	// <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+  2760729707U,	// <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+  2714668660U,	// <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+  3834619005U,	// <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+  3834692742U,	// <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+  3846194317U,	// <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+  3834840216U,	// <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+  3834913953U,	// <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+  2719977570U,	// <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+  3367208143U,	// <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+  2719977724U,	// <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+  2669125734U,	// <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+  2254897972U,	// <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+  2254898070U,	// <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+  3775803929U,	// <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+  2669129014U,	// <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+  2322006354U,	// <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+  2725950264U,	// <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+  3793720142U,	// <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+  2254898556U,	// <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+  2627330150U,	// <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+  1235664906U,	// <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+  1235667094U,	// <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+  2309406894U,	// <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+  2627333430U,	// <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+  1235665234U,	// <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+  2309406897U,	// <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+  2309407222U,	// <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+  1235664913U,	// <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+  2627338342U,	// <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+  1235673098U,	// <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+  1235675286U,	// <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+  2772452732U,	// <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+  2627341622U,	// <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+  1235673426U,	// <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+  2309415089U,	// <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+  2309415414U,	// <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+  1235673105U,	// <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+  3324683725U,	// <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+  2725290086U,	// <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+  3771162801U,	// <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+  2309349478U,	// <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+  3730951478U,	// <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+  3840738784U,	// <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+  3842655721U,	// <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+  3736925671U,	// <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+  2309349483U,	// <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+  3367840468U,	// <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+  3325355551U,	// <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+  3373147752U,	// <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+  2299404390U,	// <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+  3701099830U,	// <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+  3767846054U,	// <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+  3826656825U,	// <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+  3373147838U,	// <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+  2299404395U,	// <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+  2657222758U,	// <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+  3771164219U,	// <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+  2766481000U,	// <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+  2772452978U,	// <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+  2657226038U,	// <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+  3790407528U,	// <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+  2252294074U,	// <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+  2252294148U,	// <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+  2772453023U,	// <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+  2772453030U,	// <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+  3834250930U,	// <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+  2765596349U,	// <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+  2301411430U,	// <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+  2772453070U,	// <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+  2765817560U,	// <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+  2252933050U,	// <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+  2796340968U,	// <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+  2766038771U,	// <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+  3725008998U,	// <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+  3368530217U,	// <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+  3840222989U,	// <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+  2309382246U,	// <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+  3725012278U,	// <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+  2766481193U,	// <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+  3842656049U,	// <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+  3327010820U,	// <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+  2766702404U,	// <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+  3713073254U,	// <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+  3789082310U,	// <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+  3840665439U,	// <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+  2766997352U,	// <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+  3713076534U,	// <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+  3791736842U,	// <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+  3373180605U,	// <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+  3793064108U,	// <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+  2767366037U,	// <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+  3701137510U,	// <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+  3701138647U,	// <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+  2254898792U,	// <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+  1248264294U,	// <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+  3701140790U,	// <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+  3725029435U,	// <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+  2254899130U,	// <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+  2725294981U,	// <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+  1248264299U,	// <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+  2633375846U,	// <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+  2309407468U,	// <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+  1235666536U,	// <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+  161923174U,	// <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+  2633379126U,	// <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+  2309407796U,	// <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+  2309408445U,	// <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+  2309407960U,	// <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+  161923179U,	// <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+  2633384038U,	// <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+  2309415660U,	// <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+  1235674728U,	// <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+  161931366U,	// <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+  2633387318U,	// <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+  2769135725U,	// <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+  2309416637U,	// <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+  2309416152U,	// <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+  161931371U,	// <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+  3777806336U,	// <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+  2704064614U,	// <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+  3765862577U,	// <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+  3843393708U,	// <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+  2250516994U,	// <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+  3725054014U,	// <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+  3383093096U,	// <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+  3368495034U,	// <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+  2704065181U,	// <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+  2251622550U,	// <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+  3777807156U,	// <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+  3765863348U,	// <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+  3373147762U,	// <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+  3834251525U,	// <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+  3373147683U,	// <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+  3391727545U,	// <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+  2299406266U,	// <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+  2251622550U,	// <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+  2252294294U,	// <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+  3326036198U,	// <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+  3771836045U,	// <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+  2252294556U,	// <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+  2252294658U,	// <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+  3840739677U,	// <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+  2704066490U,	// <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+  3368511418U,	// <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+  2252294942U,	// <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+  3707158630U,	// <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+  3765864692U,	// <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+  2704066918U,	// <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+  2772453788U,	// <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+  2772453799U,	// <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+  3789752888U,	// <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+  3840739770U,	// <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+  2301413306U,	// <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+  2775108043U,	// <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+  2651340902U,	// <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+  3846195674U,	// <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+  3845974503U,	// <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+  2651343362U,	// <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+  2651344182U,	// <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+  1698712066U,	// <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+  3383125864U,	// <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+  3368527802U,	// <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+  1698933277U,	// <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+  3373179798U,	// <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+  3707176179U,	// <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+  2716012312U,	// <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+  3373180530U,	// <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+  2254309890U,	// <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+  3785773070U,	// <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+  3840739932U,	// <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+  2299439034U,	// <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+  2719994110U,	// <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+  2254899350U,	// <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+  3328641254U,	// <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+  2633443257U,	// <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+  2254899612U,	// <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+  2254899714U,	// <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+  3785773772U,	// <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+  2725966648U,	// <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+  2322007994U,	// <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+  2254899998U,	// <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+  1559707750U,	// <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+  2633450292U,	// <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+  1559709626U,	// <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+  1235666546U,	// <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+  1559711030U,	// <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+  2309408291U,	// <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+  2633454152U,	// <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+  1235666874U,	// <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+  1559713582U,	// <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+  1559715942U,	// <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+  2633458484U,	// <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+  1559717819U,	// <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+  1235674738U,	// <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+  1559719222U,	// <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+  1701366598U,	// <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+  2633462353U,	// <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+  1235675066U,	// <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+  1559721774U,	// <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+  3785777152U,	// <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+  2712035430U,	// <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+  3771179185U,	// <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+  3846196096U,	// <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+  3785777490U,	// <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+  2250517814U,	// <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+  3324259703U,	// <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+  3383092458U,	// <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+  2712035997U,	// <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+  3325356946U,	// <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+  3785777972U,	// <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+  3846196170U,	// <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+  3325365380U,	// <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+  3852168155U,	// <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+  2251615542U,	// <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+  3325357432U,	// <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+  3870084088U,	// <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+  2251615785U,	// <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+  2252295058U,	// <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+  3771180605U,	// <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+  3785778792U,	// <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+  3777816253U,	// <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+  2252295376U,	// <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+  1178553654U,	// <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+  2252295545U,	// <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+  3326037448U,	// <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+  1178553897U,	// <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+  3785779350U,	// <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+  3383118648U,	// <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+  3777816935U,	// <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+  3785779612U,	// <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+  2712037890U,	// <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+  2252754230U,	// <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+  3784452764U,	// <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+  3801705178U,	// <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+  2252754473U,	// <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+  3787770770U,	// <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+  3383126840U,	// <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+  3327380534U,	// <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+  3784453265U,	// <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+  2253630672U,	// <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+  2778426587U,	// <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+  3383128789U,	// <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+  3381799580U,	// <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+  2778647798U,	// <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+  2651422822U,	// <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+  3701277928U,	// <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+  3701278650U,	// <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+  2651425282U,	// <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+  2651426102U,	// <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+  2651426892U,	// <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+  1698712886U,	// <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  3725169658U,	// <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+  1698712904U,	// <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  2254900114U,	// <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+  3389115192U,	// <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+  3785781727U,	// <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+  3785781810U,	// <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+  2254900432U,	// <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+  1181158710U,	// <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+  2254900605U,	// <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+  3787772750U,	// <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+  1181158953U,	// <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+  2639495270U,	// <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+  2639496090U,	// <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+  3707267011U,	// <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+  2639497884U,	// <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+  1237658832U,	// <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+  1235666638U,	// <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+  3713241753U,	// <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+  2309409436U,	// <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  1235666641U,	// <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+  2639503462U,	// <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+  2639504282U,	// <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+  3701303226U,	// <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+  2639506077U,	// <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+  1235676368U,	// <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+  1235674830U,	// <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+  1698713129U,	// <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  2309417628U,	// <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  1698713147U,	// <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  3775832064U,	// <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+  2702090342U,	// <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+  3775832241U,	// <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+  3719227906U,	// <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+  3775832402U,	// <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+  3385085146U,	// <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+  2309351938U,	// <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+  3376459134U,	// <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+  2702090909U,	// <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+  3719233546U,	// <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+  3775832884U,	// <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+  3775832982U,	// <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+  3846196909U,	// <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+  3719236984U,	// <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+  3856150209U,	// <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+  3834252997U,	// <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+  3870084817U,	// <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+  3769861532U,	// <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+  2645500006U,	// <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+  3719242548U,	// <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+  3775833704U,	// <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+  3775833766U,	// <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+  2645503353U,	// <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+  2252296196U,	// <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+  2702092218U,	// <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+  3719246842U,	// <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+  2702092405U,	// <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+  3775834262U,	// <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+  3777161495U,	// <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+  3775834470U,	// <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+  3775834524U,	// <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+  3775834626U,	// <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+  3385109722U,	// <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+  2309376514U,	// <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+  3775834819U,	// <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+  2309376514U,	// <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+  3719258214U,	// <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+  3385117586U,	// <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+  3327242008U,	// <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+  3719260674U,	// <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+  3719261563U,	// <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+  2702093622U,	// <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+  2309384706U,	// <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+  3870085060U,	// <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+  2702093865U,	// <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+  3719266406U,	// <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+  3789106889U,	// <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+  3785789208U,	// <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+  3373183950U,	// <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+  2717355964U,	// <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+  2791772164U,	// <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+  2772455438U,	// <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+  3373183549U,	// <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+  2720010496U,	// <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+  2772455460U,	// <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+  2322008978U,	// <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+  3840225335U,	// <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+  2772455490U,	// <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+  2772455500U,	// <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+  2254901252U,	// <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+  2772455520U,	// <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+  2785874024U,	// <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+  2772455532U,	// <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+  2627625062U,	// <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+  1235667858U,	// <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+  2309409278U,	// <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+  2309407659U,	// <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+  2627628342U,	// <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+  1235668186U,	// <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+  1235667458U,	// <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+  2309407987U,	// <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+  1235667460U,	// <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+  2627633254U,	// <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+  1235676050U,	// <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+  2309417470U,	// <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+  2309415851U,	// <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+  2627636534U,	// <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+  1235676378U,	// <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+  1235675650U,	// <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+  2309416179U,	// <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+  1235675652U,	// <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+  2309352751U,	// <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+  1650917478U,	// <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+  2250584570U,	// <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+  3846197554U,	// <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+  2724659538U,	// <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+  3725275225U,	// <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+  2791772493U,	// <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+  2309352758U,	// <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+  1650918045U,	// <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+  3325358368U,	// <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+  2299406449U,	// <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+  2724660118U,	// <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+  3373148518U,	// <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+  3834253712U,	// <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+  3373147953U,	// <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+  2323297080U,	// <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+  2299407670U,	// <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+  2299407671U,	// <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+  2252296489U,	// <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+  3326038394U,	// <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+  1178554874U,	// <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2724660902U,	// <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+  2252296817U,	// <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+  3840741864U,	// <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+  2252296976U,	// <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+  2785874426U,	// <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+  1178554874U,	// <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2724661398U,	// <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+  3375154665U,	// <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+  3375154909U,	// <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+  2301413734U,	// <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+  2772455986U,	// <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+  3375154993U,	// <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+  2323313464U,	// <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+  2301414710U,	// <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+  2301414711U,	// <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+  2724662162U,	// <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+  3326939559U,	// <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+  2253271546U,	// <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+  3383127346U,	// <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+  2309385523U,	// <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+  1650920758U,	// <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+  2724662653U,	// <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+  2309385526U,	// <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+  1650921001U,	// <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+  3725312102U,	// <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+  3373180393U,	// <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+  3791769368U,	// <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+  3373181286U,	// <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+  3725315382U,	// <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+  2299439221U,	// <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+  2724663394U,	// <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+  2299440438U,	// <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+  2299440439U,	// <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+  1583808614U,	// <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  2322010445U,	// <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+  2254574074U,	// <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+  2322010609U,	// <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+  1583811894U,	// <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  2322010773U,	// <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+  363253046U,	// <6,6,6,6>: Cost 1 vspltisw2 RHS
+  1248267574U,	// <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+  363253046U,	// <6,6,6,u>: Cost 1 vspltisw2 RHS
+  2309410095U,	// <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+  2309408233U,	// <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+  2311402373U,	// <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+  2309409126U,	// <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+  2309410099U,	// <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+  2309408561U,	// <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+  1237660472U,	// <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+  161926454U,	// <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+  161926455U,	// <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+  1583808614U,	// <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  1650923310U,	// <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+  1178554874U,	// <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2309417318U,	// <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+  1583811894U,	// <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  1650923674U,	// <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+  363253046U,	// <6,6,u,6>: Cost 1 vspltisw2 RHS
+  161934646U,	// <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+  161934647U,	// <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+  1638318080U,	// <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+  564576358U,	// <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+  2712060077U,	// <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+  2712060156U,	// <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+  1638318418U,	// <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+  1577865314U,	// <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+  2712060406U,	// <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+  2651608058U,	// <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+  564576925U,	// <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+  2712060643U,	// <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+  1638318900U,	// <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+  1638318998U,	// <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+  3766559753U,	// <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+  2712060971U,	// <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+  2712061039U,	// <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+  2712061135U,	// <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+  3373148612U,	// <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+  1638319484U,	// <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+  2712061373U,	// <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+  2712061471U,	// <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+  1638319720U,	// <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+  1638319782U,	// <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+  2712061709U,	// <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+  2712061800U,	// <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+  1638320058U,	// <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+  2252297836U,	// <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+  1638320187U,	// <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+  1638320278U,	// <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+  2712062182U,	// <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+  2712062256U,	// <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+  1638320540U,	// <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+  1638320642U,	// <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+  2712062546U,	// <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+  2712062584U,	// <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+  2712062659U,	// <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+  1638320926U,	// <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+  1638321042U,	// <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+  2712062922U,	// <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+  2712063029U,	// <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+  2712063108U,	// <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+  1638321360U,	// <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+  564579638U,	// <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+  2712063357U,	// <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+  2712063439U,	// <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+  564579881U,	// <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+  2712063560U,	// <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+  2714054287U,	// <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+  2712063742U,	// <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+  3373181295U,	// <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+  2712063924U,	// <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+  1638322180U,	// <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+  1638322274U,	// <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+  3373181380U,	// <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+  1640313092U,	// <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+  2712064289U,	// <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+  2712064423U,	// <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+  1638322682U,	// <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  2712064562U,	// <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+  2712064653U,	// <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+  2712064747U,	// <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+  1638323000U,	// <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+  1638323022U,	// <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+  1638323168U,	// <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+  1237659746U,	// <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+  2309411158U,	// <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+  2639718330U,	// <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+  1235669498U,	// <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+  1237659750U,	// <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+  2309411243U,	// <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+  1583895362U,	// <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+  1235669826U,	// <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+  1235669503U,	// <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+  1638323923U,	// <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+  564582190U,	// <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+  1638324101U,	// <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+  1638324156U,	// <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+  1638324287U,	// <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+  564582554U,	// <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+  1638324432U,	// <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+  1235678018U,	// <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+  564582757U,	// <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+  1638326272U,	// <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+  564584550U,	// <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+  2712068269U,	// <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+  2309349532U,	// <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+  1638326610U,	// <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+  1577939051U,	// <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+  2712068598U,	// <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+  2309352776U,	// <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+  564585117U,	// <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+  2712068835U,	// <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+  1638327092U,	// <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+  1698715438U,	// <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2299404444U,	// <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+  2712069163U,	// <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+  2712069231U,	// <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+  2712069327U,	// <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+  2299407688U,	// <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+  1698715492U,	// <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2712069565U,	// <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+  1178556206U,	// <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+  1638327912U,	// <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+  1638327974U,	// <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+  2712069901U,	// <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+  1178556570U,	// <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+  1638328250U,	// <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+  2252298496U,	// <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+  1638328379U,	// <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+  1638328470U,	// <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+  2712070374U,	// <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+  2704107883U,	// <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+  1638328732U,	// <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+  1638328834U,	// <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+  2712070738U,	// <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+  2712070776U,	// <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+  2301414728U,	// <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+  1638329118U,	// <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+  1638329234U,	// <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+  2712071114U,	// <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+  2712071221U,	// <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+  2309382300U,	// <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+  1638329552U,	// <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+  564587831U,	// <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+  2712071545U,	// <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+  2309385544U,	// <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+  564588073U,	// <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+  2712071752U,	// <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+  2714062479U,	// <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+  2712071934U,	// <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+  2299437212U,	// <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+  2712072116U,	// <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+  1638330372U,	// <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+  1698715802U,	// <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  2299440456U,	// <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+  1698715820U,	// <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  1583808614U,	// <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  1181161262U,	// <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+  1638330874U,	// <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  1248264348U,	// <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+  1583811894U,	// <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  1181161626U,	// <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+  363253046U,	// <6,u,6,6>: Cost 1 vspltisw2 RHS
+  1638331214U,	// <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+  363253046U,	// <6,u,6,u>: Cost 1 vspltisw2 RHS
+  1560076390U,	// <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+  1235664969U,	// <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+  1560078311U,	// <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+  161923228U,	// <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+  1560079670U,	// <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+  1235665297U,	// <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+  1235667485U,	// <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+  161926472U,	// <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+  161923233U,	// <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+  1560084582U,	// <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+  564590382U,	// <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+  1560086504U,	// <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+  161931420U,	// <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+  1560087862U,	// <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+  564590746U,	// <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+  363253046U,	// <6,u,u,6>: Cost 1 vspltisw2 RHS
+  161934664U,	// <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+  161931425U,	// <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+  1705426944U,	// <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+  1705426954U,	// <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+  3713550266U,	// <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+  2316063892U,	// <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+  2779168805U,	// <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+  2663698530U,	// <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+  2657727309U,	// <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+  2316064220U,	// <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+  1705427017U,	// <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+  1583988838U,	// <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+  2779168859U,	// <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+  631685222U,	// <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+  2639817411U,	// <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+  1583992118U,	// <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+  2657734660U,	// <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+  1583993678U,	// <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+  2657735672U,	// <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+  631685276U,	// <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+  2779168933U,	// <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+  2767667377U,	// <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+  2718713448U,	// <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+  2718713510U,	// <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+  3841409228U,	// <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+  3852910802U,	// <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+  2718713786U,	// <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+  3847160036U,	// <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+  2767667440U,	// <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+  2718714006U,	// <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+  2779169020U,	// <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+  3852910853U,	// <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+  2718714268U,	// <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+  2718714370U,	// <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+  2718714461U,	// <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+  2706770608U,	// <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+  3847160114U,	// <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+  2779169083U,	// <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+  2718714770U,	// <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+  1705427282U,	// <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+  3713583034U,	// <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+  3713583814U,	// <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+  2779169133U,	// <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+  1644973366U,	// <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+  2657760081U,	// <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+  2259468868U,	// <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+  1705427345U,	// <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+  2718715508U,	// <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+  2260123750U,	// <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+  3792457451U,	// <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+  3852911024U,	// <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+  2718715836U,	// <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+  2718715908U,	// <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+  1644974178U,	// <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+  3792457853U,	// <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+  1646301444U,	// <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+  2720706901U,	// <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+  2779169270U,	// <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+  2718716410U,	// <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+  2722697800U,	// <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+  3852911121U,	// <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+  3852911130U,	// <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+  2718716728U,	// <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+  2718716750U,	// <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+  2779169333U,	// <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+  2718716922U,	// <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+  1187872870U,	// <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+  2718717076U,	// <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+  3847160408U,	// <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+  2718717286U,	// <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+  2718717377U,	// <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+  2718717404U,	// <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+  2718717478U,	// <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+  1187873437U,	// <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+  1584046182U,	// <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+  1705427602U,	// <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+  631685789U,	// <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+  2639874762U,	// <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+  1584049462U,	// <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+  1644976282U,	// <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+  1584051029U,	// <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+  2718718208U,	// <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+  631685843U,	// <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+  2721374218U,	// <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+  2779169507U,	// <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+  2779169516U,	// <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+  3852911348U,	// <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+  2669743414U,	// <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+  2316058962U,	// <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+  2316059044U,	// <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+  2669745146U,	// <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+  2779169570U,	// <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+  2779169579U,	// <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+  1705427764U,	// <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  2779169598U,	// <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+  3713632972U,	// <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+  2779169619U,	// <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+  2779169628U,	// <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+  2657809239U,	// <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+  3835290474U,	// <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+  1705427764U,	// <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  2779169660U,	// <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+  2779169671U,	// <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+  2779169680U,	// <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+  1705427862U,	// <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+  2779169700U,	// <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+  2779169707U,	// <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+  2657817432U,	// <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+  2803057594U,	// <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+  1705427907U,	// <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+  3776538827U,	// <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+  2319400970U,	// <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+  2316085398U,	// <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+  3852911591U,	// <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+  3852911600U,	// <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+  2319401298U,	// <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+  3833668617U,	// <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+  3367265487U,	// <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+  2319400977U,	// <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+  2724031378U,	// <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+  2779169835U,	// <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+  2779169844U,	// <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+  3852911672U,	// <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+  2669776182U,	// <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+  2779169872U,	// <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+  3835290712U,	// <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+  2669778278U,	// <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+  2779169898U,	// <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+  2779169903U,	// <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+  3835585661U,	// <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+  3841410182U,	// <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+  3852911753U,	// <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+  2779169943U,	// <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+  2318754130U,	// <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+  2718724195U,	// <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+  3859178670U,	// <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+  2779169975U,	// <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+  2720715094U,	// <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+  2761549007U,	// <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+  2779170008U,	// <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+  3835438305U,	// <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+  3835512042U,	// <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+  2761843955U,	// <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+  3835659516U,	// <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+  2803057918U,	// <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+  2762065166U,	// <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+  2669797478U,	// <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+  2322087946U,	// <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+  2317448186U,	// <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+  3395829934U,	// <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+  2669800758U,	// <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+  2322088274U,	// <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+  3375923377U,	// <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+  2731996780U,	// <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+  2322087953U,	// <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+  2779170146U,	// <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+  1705427764U,	// <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  2779170164U,	// <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+  1705428348U,	// <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+  2779170186U,	// <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+  2763171221U,	// <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+  2657866590U,	// <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+  2803058080U,	// <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+  1705428393U,	// <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+  3713695846U,	// <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+  2779170237U,	// <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+  2779170245U,	// <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+  1242316902U,	// <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+  3713699126U,	// <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+  3852912096U,	// <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+  2767668713U,	// <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+  2256488426U,	// <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+  1242316907U,	// <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+  3852912132U,	// <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+  3852912141U,	// <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+  3852912149U,	// <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+  2779170335U,	// <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+  3852912172U,	// <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+  3840747062U,	// <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+  3841410617U,	// <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+  3795125538U,	// <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+  2779170380U,	// <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+  2779170389U,	// <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+  3852912222U,	// <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+  1705428584U,	// <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+  1705428594U,	// <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+  2779170429U,	// <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+  3852912259U,	// <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+  2767668880U,	// <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+  3841336981U,	// <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+  1705428639U,	// <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+  1705428646U,	// <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+  2779170479U,	// <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+  2767668925U,	// <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+  1245659238U,	// <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+  1705428686U,	// <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+  2779170519U,	// <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+  2657899362U,	// <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+  2319406574U,	// <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+  1705428718U,	// <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+  3713728614U,	// <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+  3852912388U,	// <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+  2779170573U,	// <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+  1242349670U,	// <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+  3713731894U,	// <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+  2779170601U,	// <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+  2767669041U,	// <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+  3389834456U,	// <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+  1242349675U,	// <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+  3852912456U,	// <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+  3852912466U,	// <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+  3852912475U,	// <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+  2779170664U,	// <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+  3852912496U,	// <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+  3792474116U,	// <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+  2718732388U,	// <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+  3841337228U,	// <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+  2779170709U,	// <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+  2640003174U,	// <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+  2721386920U,	// <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+  2767595441U,	// <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+  1693927354U,	// <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+  2640006454U,	// <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+  3841558476U,	// <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+  2657923941U,	// <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+  3841337310U,	// <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+  1694296039U,	// <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+  2803058666U,	// <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+  3852912632U,	// <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+  2322089576U,	// <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+  1248346214U,	// <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+  3841337362U,	// <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+  3395830836U,	// <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+  2261616570U,	// <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+  3371943857U,	// <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+  1248346219U,	// <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+  1705429051U,	// <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+  2779170884U,	// <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+  1705428584U,	// <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+  1695254620U,	// <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+  1705429091U,	// <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+  2779170924U,	// <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+  2767669361U,	// <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+  2803058809U,	// <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+  1695623305U,	// <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+  2779170955U,	// <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+  1705429142U,	// <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+  2634057732U,	// <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+  2779170983U,	// <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+  2779170992U,	// <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+  3852912829U,	// <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+  2657948520U,	// <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+  2316060602U,	// <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+  1705429205U,	// <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+  3852912860U,	// <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+  2779171046U,	// <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+  2779171057U,	// <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+  3852912887U,	// <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+  3852912896U,	// <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+  3852912905U,	// <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+  3835291923U,	// <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+  3841411356U,	// <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+  2779171111U,	// <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+  2779171120U,	// <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+  3852912952U,	// <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+  2779171137U,	// <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+  2779171144U,	// <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+  2779171156U,	// <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+  3852912989U,	// <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+  2767669606U,	// <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+  2767669615U,	// <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+  2779171189U,	// <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+  2779171198U,	// <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+  3852913032U,	// <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+  2704140655U,	// <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+  1705429404U,	// <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+  2779171238U,	// <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+  3852913070U,	// <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+  2657973099U,	// <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+  2767669700U,	// <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+  1705429404U,	// <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+  2779171280U,	// <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+  2779171290U,	// <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+  2634090504U,	// <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+  2779171311U,	// <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+  2779171319U,	// <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+  1705429506U,	// <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+  2722057593U,	// <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+  2316093370U,	// <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+  1705429533U,	// <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+  3852913185U,	// <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+  3795799695U,	// <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+  3852913203U,	// <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+  3852913214U,	// <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+  3852913225U,	// <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+  2779171410U,	// <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+  2718740581U,	// <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+  3841411685U,	// <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+  2720067847U,	// <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+  2773420664U,	// <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+  3847236225U,	// <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+  1648316922U,	// <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+  2773641875U,	// <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+  2773715612U,	// <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+  3847531173U,	// <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+  2722059024U,	// <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+  2767669943U,	// <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+  1652298720U,	// <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+  2767669955U,	// <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+  3841411788U,	// <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+  2767669978U,	// <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+  2722059546U,	// <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+  2767669995U,	// <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+  3852913396U,	// <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+  2722059758U,	// <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+  2302183354U,	// <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+  2767670027U,	// <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+  2774747930U,	// <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+  1705429790U,	// <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+  1660262316U,	// <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+  1705429404U,	// <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+  2775042878U,	// <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+  1705429830U,	// <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+  2779171660U,	// <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+  2767670101U,	// <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+  1705429853U,	// <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+  2718744576U,	// <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+  1645002854U,	// <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+  3852913527U,	// <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+  3852913536U,	// <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+  2316061904U,	// <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+  1705429906U,	// <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+  2658022257U,	// <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+  2256489928U,	// <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+  1707420589U,	// <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+  3852913590U,	// <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+  2718745396U,	// <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+  2779171786U,	// <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+  3852913616U,	// <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+  3852913627U,	// <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+  2779171810U,	// <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+  3792487631U,	// <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+  3394456220U,	// <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+  2779171837U,	// <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+  3852913673U,	// <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+  3852913682U,	// <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+  2718746216U,	// <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+  2718746278U,	// <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+  2779171885U,	// <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+  2779171893U,	// <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+  2718746554U,	// <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+  3847457864U,	// <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+  2779171921U,	// <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+  2718746774U,	// <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+  3852913762U,	// <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+  3852913772U,	// <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+  2718747036U,	// <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+  2718747138U,	// <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+  2779171972U,	// <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+  2706803380U,	// <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+  3847457946U,	// <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+  2781162655U,	// <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+  2718747538U,	// <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+  3852913842U,	// <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+  3852913852U,	// <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+  2316096696U,	// <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+  1705430224U,	// <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+  1705430234U,	// <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+  2658055029U,	// <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+  2316097024U,	// <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+  1707420917U,	// <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+  1584316518U,	// <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+  2658059060U,	// <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+  2640144314U,	// <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+  2640145131U,	// <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+  1584319798U,	// <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+  2779172134U,	// <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+  631688502U,	// <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+  2658063354U,	// <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+  631688520U,	// <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+  3852914001U,	// <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+  3852914010U,	// <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+  2718749178U,	// <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+  2722730572U,	// <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+  2723394205U,	// <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+  2779172221U,	// <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+  2718749496U,	// <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+  2718749518U,	// <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+  2779172249U,	// <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+  2718749690U,	// <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+  3847458214U,	// <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+  2718749880U,	// <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+  3847458236U,	// <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+  2718750004U,	// <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+  1187876150U,	// <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+  2718750208U,	// <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+  2718750286U,	// <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+  1187876393U,	// <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+  1584341094U,	// <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+  1645008686U,	// <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+  2640168890U,	// <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+  2640169710U,	// <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+  1584344374U,	// <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+  1705430554U,	// <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+  631688745U,	// <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+  2718750976U,	// <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+  631688763U,	// <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+  2646147174U,	// <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+  2779172424U,	// <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+  3852914258U,	// <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+  3852914268U,	// <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+  2779172450U,	// <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+  2316061914U,	// <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+  2316061186U,	// <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+  2646152186U,	// <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+  2779172486U,	// <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+  2781163151U,	// <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+  2321378194U,	// <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+  3852914339U,	// <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+  3852914350U,	// <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+  2781163191U,	// <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+  3852914363U,	// <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+  3835588297U,	// <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+  3835588306U,	// <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+  2781163223U,	// <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+  3852914400U,	// <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+  2781163243U,	// <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+  3852914419U,	// <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+  2779172606U,	// <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+  3780552497U,	// <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+  2781163279U,	// <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+  2779172632U,	// <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+  3835588385U,	// <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+  2779172650U,	// <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+  3852914481U,	// <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+  2319403922U,	// <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+  2319404409U,	// <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+  3852914510U,	// <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+  3779226131U,	// <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+  2319404250U,	// <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+  2319403522U,	// <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+  3852914547U,	// <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+  2319403524U,	// <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+  2646179942U,	// <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+  2316094354U,	// <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+  3852914582U,	// <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+  3852914592U,	// <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+  2646183372U,	// <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+  2779172788U,	// <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+  2316093954U,	// <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+  2646185318U,	// <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+  2779172815U,	// <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+  2781163475U,	// <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+  2781163484U,	// <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+  3852914662U,	// <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+  3852914672U,	// <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+  2781163515U,	// <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+  1705431044U,	// <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  2779172878U,	// <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+  3835588632U,	// <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+  1705431044U,	// <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  2779172900U,	// <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+  2781163571U,	// <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+  3852914743U,	// <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+  2779172930U,	// <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+  2779172940U,	// <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+  2781163607U,	// <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+  2779172960U,	// <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+  1705431138U,	// <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+  1705578603U,	// <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+  2646204518U,	// <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+  2322090898U,	// <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+  3719947880U,	// <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+  3719948438U,	// <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+  2646207951U,	// <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+  2322091226U,	// <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+  2322090498U,	// <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+  2646210156U,	// <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+  2646210350U,	// <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+  2779173062U,	// <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+  2779173072U,	// <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+  2319404409U,	// <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+  2779173092U,	// <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+  2779173101U,	// <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+  1705431044U,	// <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  2779173118U,	// <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+  1705578756U,	// <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+  1707421965U,	// <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+  3852914966U,	// <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+  2779173153U,	// <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+  2256491002U,	// <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+  3852914994U,	// <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+  3852915003U,	// <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+  2316062652U,	// <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+  2316063544U,	// <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+  1242320182U,	// <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+  1242320183U,	// <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+  3852915048U,	// <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+  3377866217U,	// <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+  3852915068U,	// <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+  3833672072U,	// <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+  3852915088U,	// <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+  3395122056U,	// <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+  3389813560U,	// <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+  2779173287U,	// <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+  2779320752U,	// <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+  2658181222U,	// <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+  3852915140U,	// <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+  2257973754U,	// <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+  3841413589U,	// <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+  2658184502U,	// <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+  3852915176U,	// <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+  2658186117U,	// <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+  1705431546U,	// <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+  1705579011U,	// <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+  3714015334U,	// <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+  3777243425U,	// <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+  2319405957U,	// <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+  3375229286U,	// <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+  2779173426U,	// <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+  3375228721U,	// <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+  2319405880U,	// <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+  1245662518U,	// <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+  1245662519U,	// <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+  3852915291U,	// <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+  3389834729U,	// <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+  2259472890U,	// <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+  3852915321U,	// <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+  3852915330U,	// <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+  2779173517U,	// <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+  2316096312U,	// <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+  1242352950U,	// <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+  1242352951U,	// <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+  3852915372U,	// <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+  3835294392U,	// <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+  3852915395U,	// <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+  3852915404U,	// <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+  3852915412U,	// <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+  3377899313U,	// <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+  2718765160U,	// <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+  2779173611U,	// <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+  2779321076U,	// <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+  2658213990U,	// <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+  3852915462U,	// <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+  2718765562U,	// <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+  3714042622U,	// <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+  2658217270U,	// <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+  2724074224U,	// <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+  1705431864U,	// <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+  1705431874U,	// <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+  1705579339U,	// <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+  1705431886U,	// <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+  2779173719U,	// <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+  2779173729U,	// <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+  2779173736U,	// <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+  1705431926U,	// <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+  2779173759U,	// <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+  2779173765U,	// <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+  1248349494U,	// <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+  1705431958U,	// <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+  1705579423U,	// <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+  2779173801U,	// <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+  2779321266U,	// <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+  2779321273U,	// <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+  1705579463U,	// <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+  2779173841U,	// <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+  1705431864U,	// <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+  1705432032U,	// <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+  1705579495U,	// <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+  1242320994U,	// <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+  1705432058U,	// <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+  3841414146U,	// <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+  2316063226U,	// <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+  2779173908U,	// <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+  2658242658U,	// <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+  2658243468U,	// <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+  2316063554U,	// <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+  1705432121U,	// <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+  3852915777U,	// <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+  2779173962U,	// <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+  2779173973U,	// <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+  3389813242U,	// <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+  3852915813U,	// <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+  3852915821U,	// <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+  3835294839U,	// <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+  2329343596U,	// <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+  2779174027U,	// <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+  2803061908U,	// <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+  3852915869U,	// <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+  2779174053U,	// <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+  2779174060U,	// <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+  2803061944U,	// <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+  3852915905U,	// <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+  2767672522U,	// <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+  2791855315U,	// <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+  2768999644U,	// <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+  2779174115U,	// <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+  3852915948U,	// <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+  3841414394U,	// <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+  1245663738U,	// <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+  2779174155U,	// <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+  3852915988U,	// <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+  2706827959U,	// <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+  2319405890U,	// <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+  1245663738U,	// <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+  2779174200U,	// <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+  3852916030U,	// <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+  3714099130U,	// <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+  2316095994U,	// <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+  1242353766U,	// <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+  1705432422U,	// <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+  2658276240U,	// <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+  2316096322U,	// <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+  1705432449U,	// <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+  3852916101U,	// <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+  3854906765U,	// <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+  3852916121U,	// <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+  3389846010U,	// <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+  3852916141U,	// <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+  2779174326U,	// <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+  2779174337U,	// <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+  2329376364U,	// <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+  2779321811U,	// <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+  2658287718U,	// <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+  3852916197U,	// <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+  2779174382U,	// <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+  2316112378U,	// <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+  2658290998U,	// <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+  3852916233U,	// <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+  1651004226U,	// <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+  2779174420U,	// <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+  1652331492U,	// <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+  1590526054U,	// <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+  2328728623U,	// <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+  2724746451U,	// <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+  2322092538U,	// <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+  1590529334U,	// <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+  2328728951U,	// <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+  2724746770U,	// <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+  430361910U,	// <7,7,7,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <7,7,7,u>: Cost 1 vspltisw3 RHS
+  1242320994U,	// <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+  1705580162U,	// <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+  2779321996U,	// <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+  1245663738U,	// <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+  1242353766U,	// <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+  1705580202U,	// <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+  1662949620U,	// <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+  430361910U,	// <7,7,u,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <7,7,u,u>: Cost 1 vspltisw3 RHS
+  1705426944U,	// <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+  1705432787U,	// <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+  2316060885U,	// <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+  1242316956U,	// <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+  2779174637U,	// <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+  1182750874U,	// <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+  2316061213U,	// <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+  1242320200U,	// <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+  1705432850U,	// <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+  1584578662U,	// <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+  1705427764U,	// <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  631691054U,	// <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+  2640407307U,	// <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+  1584581942U,	// <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+  2779174726U,	// <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+  1584583574U,	// <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+  2779322201U,	// <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+  631691108U,	// <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+  2779174763U,	// <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+  2779174774U,	// <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+  1705428584U,	// <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+  1705432965U,	// <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+  2779174801U,	// <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+  2779174810U,	// <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+  2767673251U,	// <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+  1705580460U,	// <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+  1705433010U,	// <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+  1705433020U,	// <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+  2779174853U,	// <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+  2767673299U,	// <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+  1245659292U,	// <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+  1705433060U,	// <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+  2779174893U,	// <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+  2706836152U,	// <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+  1245662536U,	// <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+  1705433092U,	// <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+  2779174925U,	// <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+  1185732398U,	// <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+  2316093653U,	// <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+  1242349724U,	// <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+  1705430224U,	// <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+  1705433151U,	// <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+  2316093981U,	// <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+  1242352968U,	// <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+  1705433178U,	// <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+  1584611430U,	// <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+  2781165670U,	// <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+  2640439226U,	// <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+  2640440079U,	// <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+  1584614710U,	// <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+  1705431044U,	// <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  631691418U,	// <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+  2779322525U,	// <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+  631691436U,	// <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+  2779175087U,	// <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+  2779175102U,	// <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+  1648357887U,	// <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+  1705433296U,	// <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+  2779175127U,	// <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+  2779175138U,	// <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+  1651012419U,	// <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+  1705580788U,	// <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+  1705433341U,	// <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+  1705580800U,	// <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+  1187878702U,	// <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+  2768042263U,	// <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+  1248346268U,	// <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+  1705580840U,	// <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+  1187879066U,	// <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+  2779322679U,	// <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+  430361910U,	// <7,u,7,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <7,u,7,u>: Cost 1 vspltisw3 RHS
+  1705433425U,	// <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+  1705433435U,	// <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+  631691621U,	// <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+  1705433451U,	// <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+  1705433465U,	// <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+  1705433475U,	// <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+  631691661U,	// <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+  430361910U,	// <7,u,u,7>: Cost 1 vspltisw3 RHS
+  631691675U,	// <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+  202162278U,	// <u,0,0,0>: Cost 1 vspltisw0 LHS
+  1678598154U,	// <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+  2634500154U,	// <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+  2289596269U,	// <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+  1548815670U,	// <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+  2663698530U,	// <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+  2658390942U,	// <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+  2289596597U,	// <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+  202162278U,	// <u,0,0,u>: Cost 1 vspltisw0 LHS
+  1560764518U,	// <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+  115720294U,	// <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+  604856427U,	// <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+  2634508438U,	// <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+  1560767798U,	// <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+  2652426438U,	// <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+  1584657311U,	// <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+  2658399226U,	// <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+  604856476U,	// <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+  2696889850U,	// <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+  1190174822U,	// <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+  2692245096U,	// <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+  2692245158U,	// <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+  2263916882U,	// <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+  2299709908U,	// <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+  2692245434U,	// <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+  2701535281U,	// <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+  1190175389U,	// <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+  1209237504U,	// <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+  1209239206U,	// <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+  2704189813U,	// <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+  2692245916U,	// <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+  2282981033U,	// <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+  2664386658U,	// <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+  2691877496U,	// <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+  2664388218U,	// <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+  1209239213U,	// <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+  2289623040U,	// <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+  1678598482U,	// <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+  2634532926U,	// <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+  2235580672U,	// <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+  1143619922U,	// <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+  1618505014U,	// <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+  2658423714U,	// <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+  2713259464U,	// <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+  1683243409U,	// <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+  1192443904U,	// <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+  118702182U,	// <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+  2266185901U,	// <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+  2640513816U,	// <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+  1192444242U,	// <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+  2718789636U,	// <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+  1645047915U,	// <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+  2664404604U,	// <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+  118702749U,	// <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+  2302910464U,	// <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+  1192886374U,	// <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+  2718790138U,	// <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+  2722771537U,	// <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+  2266628434U,	// <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+  2248950180U,	// <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+  2718790456U,	// <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+  2718790478U,	// <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+  1192886941U,	// <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+  1235812352U,	// <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+  1235814054U,	// <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+  2728080601U,	// <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+  2640530202U,	// <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+  2640530742U,	// <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+  2309556692U,	// <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+  2730735133U,	// <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+  2309556856U,	// <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  1235814061U,	// <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+  202162278U,	// <u,0,u,0>: Cost 1 vspltisw0 LHS
+  120365158U,	// <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+  604856989U,	// <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+  2692249532U,	// <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+  1560825142U,	// <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+  1618507930U,	// <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+  1584714662U,	// <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+  2309565048U,	// <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  604857043U,	// <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+  1611210825U,	// <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+  1616519270U,	// <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+  2287605459U,	// <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+  2640546588U,	// <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+  2622631222U,	// <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+  2289590610U,	// <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+  2664436630U,	// <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+  2664437376U,	// <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+  1616519889U,	// <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+  1548894866U,	// <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+  269271142U,	// <u,1,1,1>: Cost 1 vspltisw1 LHS
+  1189462934U,	// <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+  2622638230U,	// <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+  1548897590U,	// <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+  2756985692U,	// <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+  2658472872U,	// <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+  2287614142U,	// <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+  269271142U,	// <u,1,1,u>: Cost 1 vspltisw1 LHS
+  1566818406U,	// <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+  2756985735U,	// <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+  1148371862U,	// <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  835584U,	// <u,1,2,3>: Cost 0 copy LHS
+  1566821686U,	// <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+  2756985771U,	// <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+  2690262970U,	// <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+  1590711938U,	// <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+  835584U,	// <u,1,2,u>: Cost 0 copy LHS
+  2282979337U,	// <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+  1209237514U,	// <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+  1209239702U,	// <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  2282979502U,	// <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+  2282979341U,	// <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+  1209237842U,	// <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2282979505U,	// <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  2287625423U,	// <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+  1209237521U,	// <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+  1635101613U,	// <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+  2289623050U,	// <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+  2289625238U,	// <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+  2640579360U,	// <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+  2622663990U,	// <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+  1616522550U,	// <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+  2664469398U,	// <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+  2664470148U,	// <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+  1616522793U,	// <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+  1548927638U,	// <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+  1192444724U,	// <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+  1192444822U,	// <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+  2622670998U,	// <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+  1548930358U,	// <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+  1210728786U,	// <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  2714153058U,	// <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+  2670449658U,	// <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+  1548932910U,	// <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+  2622677655U,	// <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+  2756986063U,	// <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+  2302912662U,	// <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+  3696421014U,	// <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+  2622680374U,	// <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+  2756986099U,	// <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+  2714153784U,	// <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+  1651692438U,	// <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+  1652356071U,	// <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+  2628657254U,	// <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+  1235812362U,	// <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+  1235814550U,	// <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+  2309554350U,	// <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+  2628660534U,	// <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+  1235812690U,	// <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+  2309554353U,	// <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+  2309554678U,	// <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+  1235812369U,	// <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+  1548952217U,	// <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+  269271142U,	// <u,1,u,1>: Cost 1 vspltisw1 LHS
+  1209280662U,	// <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  835584U,	// <u,1,u,3>: Cost 0 copy LHS
+  1548954934U,	// <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+  1209278802U,	// <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2283020465U,	// <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  1590761096U,	// <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+  835584U,	// <u,1,u,u>: Cost 0 copy LHS
+  2702876672U,	// <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+  1629134950U,	// <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+  2289591912U,	// <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+  1215848550U,	// <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+  2702877010U,	// <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+  2289222708U,	// <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+  2779178473U,	// <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+  2726249024U,	// <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+  1215848555U,	// <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+  2690933539U,	// <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+  2628683124U,	// <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+  1189463656U,	// <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+  1213866086U,	// <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+  2628685110U,	// <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+  2263205736U,	// <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+  1189463994U,	// <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  2263205866U,	// <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+  1213866091U,	// <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+  1556938854U,	// <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  2697569869U,	// <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+  336380006U,	// <u,2,2,2>: Cost 1 vspltisw2 LHS
+  1678599794U,	// <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+  1556942134U,	// <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  2295138061U,	// <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+  2702878650U,	// <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+  2300229831U,	// <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+  336380006U,	// <u,2,2,u>: Cost 1 vspltisw2 LHS
+  475243165U,	// <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1548985140U,	// <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1209239144U,	// <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+  135495782U,	// <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+  475245878U,	// <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1596764164U,	// <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+  1596764666U,	// <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1596765178U,	// <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  135495787U,	// <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+  2708851630U,	// <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+  2217362979U,	// <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+  2289624680U,	// <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+  1215881318U,	// <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+  2726767824U,	// <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+  1629138230U,	// <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+  2779178801U,	// <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+  2726251976U,	// <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+  1215881323U,	// <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+  2628714598U,	// <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+  2628715896U,	// <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+  1192445544U,	// <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+  1213898854U,	// <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+  2628717878U,	// <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+  2726768644U,	// <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+  1192445882U,	// <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+  2266187754U,	// <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+  1213898859U,	// <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+  2634694758U,	// <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+  2721460657U,	// <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+  2296940136U,	// <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+  1678600122U,	// <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+  2634698038U,	// <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+  3370682125U,	// <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+  1157056442U,	// <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2725442455U,	// <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+  1678600167U,	// <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+  1653027897U,	// <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+  2309554924U,	// <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+  1235813992U,	// <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+  162070630U,	// <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+  2634706230U,	// <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+  2309555252U,	// <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+  2309555901U,	// <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+  2309555416U,	// <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+  162070635U,	// <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+  475284130U,	// <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+  1549026100U,	// <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  336380006U,	// <u,2,u,2>: Cost 1 vspltisw2 LHS
+  135536742U,	// <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+  475286838U,	// <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+  1629141146U,	// <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+  1194108858U,	// <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  1596806138U,	// <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  135536747U,	// <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+  1611890688U,	// <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+  538149020U,	// <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2685632685U,	// <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  2685632764U,	// <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+  1611891026U,	// <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  2733408722U,	// <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+  2658612153U,	// <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+  2289592250U,	// <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+  538149533U,	// <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+  1189464214U,	// <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+  1611891508U,	// <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+  1611891606U,	// <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+  1189464476U,	// <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+  1189464578U,	// <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+  2690278511U,	// <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+  2690278607U,	// <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+  2287609786U,	// <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+  1611892092U,	// <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+  2685634042U,	// <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+  2685634079U,	// <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+  1611892328U,	// <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+  1611892390U,	// <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+  2685634371U,	// <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+  2685634453U,	// <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+  1611892666U,	// <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  2300225466U,	// <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+  1611892795U,	// <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+  1209238422U,	// <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+  2282980247U,	// <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+  1561004120U,	// <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+  403488870U,	// <u,3,3,3>: Cost 1 vspltisw3 LHS
+  1209238426U,	// <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+  2282980899U,	// <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+  2282985598U,	// <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+  1209239482U,	// <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  403488870U,	// <u,3,3,u>: Cost 1 vspltisw3 LHS
+  1555038310U,	// <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+  1555039616U,	// <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+  2628781672U,	// <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+  2289624690U,	// <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+  1555041590U,	// <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+  538152246U,	// <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2658644925U,	// <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+  2289625018U,	// <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+  538152489U,	// <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+  1192446102U,	// <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+  2733411983U,	// <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+  2634762330U,	// <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+  1192446364U,	// <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+  1192446466U,	// <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+  1659670532U,	// <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+  1659670626U,	// <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+  2287642554U,	// <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+  1659670788U,	// <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+  2634768486U,	// <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+  2733412775U,	// <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+  1648390659U,	// <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+  2634770973U,	// <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+  2634771766U,	// <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+  2733413099U,	// <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+  1659671352U,	// <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+  1659671374U,	// <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+  1652372457U,	// <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+  1561034854U,	// <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+  2634777396U,	// <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+  1561036892U,	// <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+  1235814002U,	// <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+  1561038134U,	// <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+  2309555747U,	// <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+  2309556072U,	// <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+  1235814330U,	// <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+  1561040686U,	// <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+  1611896531U,	// <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+  538154798U,	// <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+  1611896712U,	// <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+  403488870U,	// <u,3,u,3>: Cost 1 vspltisw3 LHS
+  1611896895U,	// <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+  538155162U,	// <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+  1611897040U,	// <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+  1209280442U,	// <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  538155365U,	// <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+  1165118354U,	// <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+  1618534502U,	// <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+  2634795102U,	// <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+  2686451968U,	// <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+  2692276562U,	// <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+  1705438098U,	// <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+  2658685890U,	// <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+  2256489928U,	// <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+  1618535069U,	// <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+  1189464978U,	// <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+  2692277044U,	// <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+  1618535367U,	// <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+  2640775992U,	// <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+  1189465296U,	// <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+  115723574U,	// <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+  2263207289U,	// <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+  2664666780U,	// <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+  115723817U,	// <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+  2263919506U,	// <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+  2222115812U,	// <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+  2692277864U,	// <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+  2692277926U,	// <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+  2324114640U,	// <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+  1190178102U,	// <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+  2692278202U,	// <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+  2701568053U,	// <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+  1190178345U,	// <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+  2692278422U,	// <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+  2282981552U,	// <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+  2704222585U,	// <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+  2692278684U,	// <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+  1257016528U,	// <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+  1209239246U,	// <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+  2691910300U,	// <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+  2664683166U,	// <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+  1209239249U,	// <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+  1573027942U,	// <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+  2634826695U,	// <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+  2634827874U,	// <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+  2289629073U,	// <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+  229035318U,	// <u,4,4,4>: Cost 1 vspltisw0 RHS
+  1618537782U,	// <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+  2658718662U,	// <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+  2289629401U,	// <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+  229035318U,	// <u,4,4,u>: Cost 1 vspltisw0 RHS
+  1561092198U,	// <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+  2628863370U,	// <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+  1561094243U,	// <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+  2634836118U,	// <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+  1561095478U,	// <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+  118705462U,	// <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+  604859702U,	// <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+  2658726906U,	// <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+  604859720U,	// <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+  2266631058U,	// <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+  2302692152U,	// <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+  2718822906U,	// <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+  2722804309U,	// <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+  2723467942U,	// <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+  1192889654U,	// <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+  2718823224U,	// <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+  2718823246U,	// <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+  1192889897U,	// <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+  2640822374U,	// <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+  2640823194U,	// <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+  2728113373U,	// <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+  2640825150U,	// <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+  1235815632U,	// <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+  1235814094U,	// <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+  2730767905U,	// <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+  2309556892U,	// <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  1235814097U,	// <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+  1561116774U,	// <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+  1618540334U,	// <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+  1561118822U,	// <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+  2692282300U,	// <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+  229035318U,	// <u,4,u,4>: Cost 1 vspltisw0 RHS
+  120368438U,	// <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+  604859945U,	// <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+  2309565084U,	// <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  604859963U,	// <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+  2690293760U,	// <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+  1616552038U,	// <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+  2640840434U,	// <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+  2640841536U,	// <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+  1613381970U,	// <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+  2316135642U,	// <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+  2289592834U,	// <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+  2664732324U,	// <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+  1616552661U,	// <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+  1573077094U,	// <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+  1237536282U,	// <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  2690294678U,	// <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+  2646821014U,	// <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+  1573080602U,	// <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+  1189466116U,	// <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+  1189466210U,	// <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+  2646823930U,	// <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+  1573082926U,	// <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+  2640855142U,	// <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+  2697594448U,	// <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+  2690295400U,	// <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+  1625179890U,	// <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+  2699585347U,	// <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+  2781171471U,	// <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+  2690295738U,	// <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+  3775318070U,	// <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+  1628498055U,	// <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+  2287627234U,	// <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+  1257016210U,	// <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+  2646836942U,	// <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+  2287625131U,	// <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+  2287627238U,	// <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+  1257016538U,	// <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+  1209240066U,	// <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  2287625459U,	// <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+  1209240068U,	// <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+  2640871526U,	// <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+  2316168082U,	// <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+  2640873202U,	// <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+  2640874308U,	// <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+  1637788917U,	// <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+  1616555318U,	// <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+  2287638591U,	// <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+  2664765096U,	// <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+  1616555561U,	// <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+  1573109862U,	// <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+  2646852404U,	// <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+  2646853224U,	// <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+  2287646618U,	// <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+  1573113374U,	// <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+  296144182U,	// <u,5,5,5>: Cost 1 vspltisw1 RHS
+  1192448098U,	// <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+  2287646946U,	// <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+  296144182U,	// <u,5,5,u>: Cost 1 vspltisw1 RHS
+  1567146086U,	// <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+  2628945300U,	// <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+  2634917997U,	// <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+  1567148870U,	// <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+  1567149366U,	// <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+  2781171799U,	// <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+  1228950018U,	// <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  27705344U,	// <u,5,6,7>: Cost 0 copy RHS
+  27705344U,	// <u,5,6,u>: Cost 0 copy RHS
+  2628952166U,	// <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+  1235815314U,	// <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+  2309556734U,	// <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+  2309555115U,	// <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+  2628955446U,	// <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+  1235815642U,	// <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+  1235814914U,	// <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+  2309555443U,	// <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+  1235814916U,	// <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+  1567162470U,	// <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+  1616557870U,	// <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+  2690299781U,	// <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+  1567165256U,	// <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+  1567165750U,	// <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+  296144182U,	// <u,5,u,5>: Cost 1 vspltisw1 RHS
+  1209281026U,	// <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  27705344U,	// <u,5,u,7>: Cost 0 copy RHS
+  27705344U,	// <u,5,u,u>: Cost 0 copy RHS
+  2705563648U,	// <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+  1631821926U,	// <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+  2262462970U,	// <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+  2646886941U,	// <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+  2705563986U,	// <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+  2316062652U,	// <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+  2316137272U,	// <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+  1215851830U,	// <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+  1215851831U,	// <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+  2634948710U,	// <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+  2705564468U,	// <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+  1189466618U,	// <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+  2263208498U,	// <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+  2693620843U,	// <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+  2652868860U,	// <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+  1189466936U,	// <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+  1213869366U,	// <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+  1213869367U,	// <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+  2658844774U,	// <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+  3771344465U,	// <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+  1178554874U,	// <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2698929907U,	// <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+  2699593540U,	// <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+  2700257173U,	// <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+  2705565626U,	// <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+  1226485046U,	// <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+  1226485047U,	// <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+  2705565846U,	// <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+  2330756585U,	// <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+  2330756829U,	// <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+  2282981734U,	// <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+  1631824413U,	// <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+  2652885246U,	// <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+  1257018168U,	// <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+  135499062U,	// <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+  135499063U,	// <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+  2646917222U,	// <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+  2217365931U,	// <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+  2790167156U,	// <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+  2646919709U,	// <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+  2711538934U,	// <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+  1631825206U,	// <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+  2316170040U,	// <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+  1215884598U,	// <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+  1215884599U,	// <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+  2634981478U,	// <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+  2266190247U,	// <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+  1192448506U,	// <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+  2266190386U,	// <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+  2634984758U,	// <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+  2652901632U,	// <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+  1192448824U,	// <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+  1213902134U,	// <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+  1213902135U,	// <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+  1583808614U,	// <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  2322010445U,	// <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+  2718839290U,	// <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+  2670823965U,	// <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+  1583811894U,	// <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  2724147961U,	// <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+  363253046U,	// <u,6,6,6>: Cost 1 vspltisw2 RHS
+  1229172022U,	// <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+  363253046U,	// <u,6,6,u>: Cost 1 vspltisw2 RHS
+  499458150U,	// <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1573200692U,	// <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+  1573201512U,	// <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1573202070U,	// <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  499461673U,	// <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1573203972U,	// <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+  1235817272U,	// <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+  162073910U,	// <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+  162073911U,	// <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+  499466342U,	// <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+  1631827758U,	// <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+  1573209704U,	// <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1573210262U,	// <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  499469866U,	// <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+  1631828122U,	// <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+  363253046U,	// <u,6,u,6>: Cost 1 vspltisw2 RHS
+  135540022U,	// <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+  135540023U,	// <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+  1638465536U,	// <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+  564723814U,	// <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+  2712207533U,	// <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+  2712207612U,	// <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+  1638465874U,	// <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+  1579192580U,	// <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+  2712207862U,	// <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+  2316137282U,	// <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+  564724381U,	// <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+  1189467130U,	// <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+  1638466356U,	// <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+  1638466454U,	// <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+  2311500282U,	// <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+  1189467494U,	// <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+  2712208495U,	// <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+  2694956302U,	// <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+  1189467756U,	// <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+  1638466940U,	// <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+  2712208829U,	// <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+  2712208927U,	// <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+  1638467176U,	// <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+  1638467238U,	// <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+  2712209165U,	// <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+  2712209256U,	// <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+  1627187175U,	// <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+  2324116290U,	// <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+  1628514441U,	// <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+  1638467734U,	// <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+  2712209638U,	// <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+  2700929387U,	// <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+  1638467996U,	// <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+  1638468098U,	// <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+  2712210002U,	// <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+  1585189856U,	// <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+  1257018178U,	// <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+  1638468382U,	// <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+  1638468498U,	// <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+  2712210378U,	// <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+  2712210485U,	// <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+  2712210564U,	// <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+  1638468816U,	// <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+  564727112U,	// <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+  2712210809U,	// <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+  2712210888U,	// <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+  564727337U,	// <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+  1192449018U,	// <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+  2714201743U,	// <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+  2712211198U,	// <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+  2311533050U,	// <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+  1192449382U,	// <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+  1638469636U,	// <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+  1638469730U,	// <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+  1192449644U,	// <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+  1638469892U,	// <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+  2712211745U,	// <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+  2712211879U,	// <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+  1638470138U,	// <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  2712212018U,	// <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+  2712212109U,	// <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+  2712212203U,	// <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+  1638470456U,	// <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+  1638470478U,	// <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+  1638470559U,	// <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+  1235816546U,	// <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+  2309558371U,	// <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+  2641045434U,	// <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+  1235816954U,	// <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+  1235816550U,	// <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+  2309558375U,	// <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+  1585222628U,	// <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+  430361910U,	// <u,7,7,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <u,7,7,u>: Cost 1 vspltisw3 RHS
+  1638471379U,	// <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+  564729646U,	// <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+  1638471557U,	// <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+  1638471612U,	// <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+  1638471743U,	// <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+  564730010U,	// <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+  1638471888U,	// <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+  430361910U,	// <u,7,u,7>: Cost 1 vspltisw3 RHS
+  564730213U,	// <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+  202162278U,	// <u,u,0,0>: Cost 1 vspltisw0 LHS
+  538189985U,	// <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2685673645U,	// <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  1215848604U,	// <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+  1611931986U,	// <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  1579266317U,	// <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+  2289592861U,	// <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+  1215851848U,	// <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+  538190493U,	// <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+  1549411025U,	// <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+  115726126U,	// <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+  604862254U,	// <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+  1213866140U,	// <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+  1549413686U,	// <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+  115726490U,	// <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+  1585247207U,	// <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+  1213869384U,	// <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+  604862308U,	// <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+  1567334502U,	// <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+  1190180654U,	// <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+  336380006U,	// <u,u,2,2>: Cost 1 vspltisw2 LHS
+  835584U,	// <u,u,2,3>: Cost 0 copy LHS
+  1567337782U,	// <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+  1190181018U,	// <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+  1611933626U,	// <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  1226485064U,	// <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+  835584U,	// <u,u,2,u>: Cost 0 copy LHS
+  475685587U,	// <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1209239278U,	// <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+  1209239765U,	// <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+  135495836U,	// <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+  475688246U,	// <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1209239282U,	// <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+  1209240093U,	// <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+  135499080U,	// <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+  135495841U,	// <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+  1555406950U,	// <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+  1555408301U,	// <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+  2289625301U,	// <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+  1215881372U,	// <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+  229035318U,	// <u,u,4,4>: Cost 1 vspltisw0 RHS
+  538193206U,	// <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2289625629U,	// <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+  1215884616U,	// <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+  538193449U,	// <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+  1549443797U,	// <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+  118708014U,	// <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+  1561389191U,	// <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+  1213898908U,	// <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+  1549446454U,	// <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+  118708378U,	// <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+  604862618U,	// <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+  1213902152U,	// <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+  604862636U,	// <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+  1567367270U,	// <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+  1192892206U,	// <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+  1638478330U,	// <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  1679046864U,	// <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+  1567370550U,	// <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+  1192892570U,	// <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+  363253046U,	// <u,u,6,6>: Cost 1 vspltisw2 RHS
+  27705344U,	// <u,u,6,7>: Cost 0 copy RHS
+  27705344U,	// <u,u,6,u>: Cost 0 copy RHS
+  499605606U,	// <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1235812425U,	// <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+  1561405577U,	// <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+  162070684U,	// <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+  499609147U,	// <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1235812753U,	// <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+  1235814941U,	// <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+  162073928U,	// <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+  162070689U,	// <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+  475726552U,	// <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+  538195758U,	// <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+  604862821U,	// <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+  835584U,	// <u,u,u,3>: Cost 0 copy LHS
+  475729206U,	// <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+  538196122U,	// <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+  604862861U,	// <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+  27705344U,	// <u,u,u,7>: Cost 0 copy RHS
+  835584U,	// <u,u,u,u>: Cost 0 copy LHS
+  0
+};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp
new file mode 100644
index 000000000000..12bb0a143406
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp
@@ -0,0 +1,31 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown PPC branch opcode!");
+  case PPC::PRED_EQ: return PPC::PRED_NE;
+  case PPC::PRED_NE: return PPC::PRED_EQ;
+  case PPC::PRED_LT: return PPC::PRED_GE;
+  case PPC::PRED_GE: return PPC::PRED_LT;
+  case PPC::PRED_GT: return PPC::PRED_LE;
+  case PPC::PRED_LE: return PPC::PRED_GT;
+  case PPC::PRED_NU: return PPC::PRED_UN;
+  case PPC::PRED_UN: return PPC::PRED_NU;
+  }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h
new file mode 100644
index 000000000000..b2c831579f79
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h
@@ -0,0 +1,39 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+#include "PPC.h"
+
+namespace llvm {
+namespace PPC {
+  /// Predicate - These are "(BI << 5) | BO"  for various predicates.
+  enum Predicate {
+    PRED_ALWAYS = (0 << 5) | 20,
+    PRED_LT     = (0 << 5) | 12,
+    PRED_LE     = (1 << 5) |  4,
+    PRED_EQ     = (2 << 5) | 12,
+    PRED_GE     = (0 << 5) |  4,
+    PRED_GT     = (1 << 5) | 12,
+    PRED_NE     = (2 << 5) |  4,
+    PRED_UN     = (3 << 5) | 12,
+    PRED_NU     = (3 << 5) |  4
+  };
+  
+  /// Invert the specified predicate.  != -> ==, < -> >=.
+  Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 000000000000..9c2428b92e65
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,714 @@
+//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
+// FIXME (64-bit): Eventually enable by default.
+namespace llvm {
+cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+                                   cl::init(false),
+                                   cl::desc("Enable PPC32 register scavenger"),
+                                   cl::Hidden);
+cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+                                   cl::init(false),
+                                   cl::desc("Enable PPC64 register scavenger"),
+                                   cl::Hidden);
+}
+
+using namespace llvm;
+
+// FIXME (64-bit): Should be inlined.
+bool
+PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
+  return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
+          (EnablePPC64RS && Subtarget.isPPC64()));
+}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  using namespace PPC;
+  switch (RegEnum) {
+  case 0: return 0;
+  case R0 :  case X0 :  case F0 :  case V0 : case CR0:  case CR0LT: return  0;
+  case R1 :  case X1 :  case F1 :  case V1 : case CR1:  case CR0GT: return  1;
+  case R2 :  case X2 :  case F2 :  case V2 : case CR2:  case CR0EQ: return  2;
+  case R3 :  case X3 :  case F3 :  case V3 : case CR3:  case CR0UN: return  3;
+  case R4 :  case X4 :  case F4 :  case V4 : case CR4:  case CR1LT: return  4;
+  case R5 :  case X5 :  case F5 :  case V5 : case CR5:  case CR1GT: return  5;
+  case R6 :  case X6 :  case F6 :  case V6 : case CR6:  case CR1EQ: return  6;
+  case R7 :  case X7 :  case F7 :  case V7 : case CR7:  case CR1UN: return  7;
+  case R8 :  case X8 :  case F8 :  case V8 : case CR2LT: return  8;
+  case R9 :  case X9 :  case F9 :  case V9 : case CR2GT: return  9;
+  case R10:  case X10:  case F10:  case V10: case CR2EQ: return 10;
+  case R11:  case X11:  case F11:  case V11: case CR2UN: return 11;
+  case R12:  case X12:  case F12:  case V12: case CR3LT: return 12;
+  case R13:  case X13:  case F13:  case V13: case CR3GT: return 13;
+  case R14:  case X14:  case F14:  case V14: case CR3EQ: return 14;
+  case R15:  case X15:  case F15:  case V15: case CR3UN: return 15;
+  case R16:  case X16:  case F16:  case V16: case CR4LT: return 16;
+  case R17:  case X17:  case F17:  case V17: case CR4GT: return 17;
+  case R18:  case X18:  case F18:  case V18: case CR4EQ: return 18;
+  case R19:  case X19:  case F19:  case V19: case CR4UN: return 19;
+  case R20:  case X20:  case F20:  case V20: case CR5LT: return 20;
+  case R21:  case X21:  case F21:  case V21: case CR5GT: return 21;
+  case R22:  case X22:  case F22:  case V22: case CR5EQ: return 22;
+  case R23:  case X23:  case F23:  case V23: case CR5UN: return 23;
+  case R24:  case X24:  case F24:  case V24: case CR6LT: return 24;
+  case R25:  case X25:  case F25:  case V25: case CR6GT: return 25;
+  case R26:  case X26:  case F26:  case V26: case CR6EQ: return 26;
+  case R27:  case X27:  case F27:  case V27: case CR6UN: return 27;
+  case R28:  case X28:  case F28:  case V28: case CR7LT: return 28;
+  case R29:  case X29:  case F29:  case V29: case CR7GT: return 29;
+  case R30:  case X30:  case F30:  case V30: case CR7EQ: return 30;
+  case R31:  case X31:  case F31:  case V31: case CR7UN: return 31;
+  default:
+    llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
+  }
+}
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+                                 const TargetInstrInfo &tii)
+  : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) {
+  ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
+  ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
+  ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
+  ImmToIdxMap[PPC::LWZ]  = PPC::LWZX;   ImmToIdxMap[PPC::LWA]  = PPC::LWAX;
+  ImmToIdxMap[PPC::LFS]  = PPC::LFSX;   ImmToIdxMap[PPC::LFD]  = PPC::LFDX;
+  ImmToIdxMap[PPC::STH]  = PPC::STHX;   ImmToIdxMap[PPC::STW]  = PPC::STWX;
+  ImmToIdxMap[PPC::STFS] = PPC::STFSX;  ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+  ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+
+  // 64-bit
+  ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+  ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+  ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+  ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+  ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
+  if (Subtarget.isPPC64())
+    return &PPC::G8RCRegClass;
+  return &PPC::GPRCRegClass;
+}
+
+const unsigned*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  // 32-bit Darwin calling convention. 
+  static const unsigned Darwin32_CalleeSavedRegs[] = {
+              PPC::R13, PPC::R14, PPC::R15,
+    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+    
+    PPC::CR2, PPC::CR3, PPC::CR4,
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+    
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+    
+    PPC::LR,  0
+  };
+
+  // 32-bit SVR4 calling convention.
+  static const unsigned SVR4_CalleeSavedRegs[] = {
+                        PPC::R14, PPC::R15,
+    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+    
+    PPC::CR2, PPC::CR3, PPC::CR4,
+    
+    PPC::VRSAVE,
+    
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+    
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+    
+    0
+  };
+  // 64-bit Darwin calling convention. 
+  static const unsigned Darwin64_CalleeSavedRegs[] = {
+    PPC::X14, PPC::X15,
+    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+    
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+    
+    PPC::CR2, PPC::CR3, PPC::CR4,
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+    
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+    
+    PPC::LR8,  0
+  };
+
+  // 64-bit SVR4 calling convention.
+  static const unsigned SVR4_64_CalleeSavedRegs[] = {
+    PPC::X14, PPC::X15,
+    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+
+    PPC::CR2, PPC::CR3, PPC::CR4,
+
+    PPC::VRSAVE,
+
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+    0
+  };
+  
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
+                                 Darwin32_CalleeSavedRegs;
+
+  return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const PPCFrameLowering *PPCFI =
+    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
+  Reserved.set(PPC::R0);
+  Reserved.set(PPC::R1);
+  Reserved.set(PPC::LR);
+  Reserved.set(PPC::LR8);
+  Reserved.set(PPC::RM);
+
+  // The SVR4 ABI reserves r2 and r13
+  if (Subtarget.isSVR4ABI()) {
+    Reserved.set(PPC::R2);  // System-reserved register
+    Reserved.set(PPC::R13); // Small Data Area pointer register
+  }
+  // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+  // when the stack frame is too big to address directly; we need two regs.
+  // This is a hack.
+  if (Subtarget.isDarwinABI()) {
+    Reserved.set(PPC::R2);
+  }
+  
+  // On PPC64, r13 is the thread pointer. Never allocate this register.
+  // Note that this is over conservative, as it also prevents allocation of R31
+  // when the FP is not needed.
+  if (Subtarget.isPPC64()) {
+    Reserved.set(PPC::R13);
+    Reserved.set(PPC::R31);
+
+    if (!requiresRegisterScavenging(MF))
+      Reserved.set(PPC::R0);    // FIXME (64-bit): Remove
+
+    Reserved.set(PPC::X0);
+    Reserved.set(PPC::X1);
+    Reserved.set(PPC::X13);
+    Reserved.set(PPC::X31);
+
+    // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+    if (Subtarget.isSVR4ABI()) {
+      Reserved.set(PPC::X2);
+    }
+    // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+    // when the stack frame is too big to address directly; we need two regs.
+    // This is a hack.
+    if (Subtarget.isDarwinABI()) {
+      Reserved.set(PPC::X2);
+    }
+  }
+
+  if (PPCFI->needsFP(MF))
+    Reserved.set(PPC::R31);
+
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void PPCRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+    // Add (actually subtract) back the amount the callee popped on return.
+    if (int CalleeAmt =  I->getOperand(1).getImm()) {
+      bool is64Bit = Subtarget.isPPC64();
+      CalleeAmt *= -1;
+      unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+      unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+      unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+      unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+      unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+      unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+      MachineInstr *MI = I;
+      DebugLoc dl = MI->getDebugLoc();
+
+      if (isInt<16>(CalleeAmt)) {
+        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
+          addImm(CalleeAmt);
+      } else {
+        MachineBasicBlock::iterator MBBI = I;
+        BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+          .addImm(CalleeAmt >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+          .addReg(TmpReg, RegState::Kill)
+          .addImm(CalleeAmt & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+          .addReg(StackReg)
+          .addReg(StackReg)
+          .addReg(TmpReg);
+      }
+    }
+  }
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
+                             const TargetRegisterClass *RC, int SPAdj) {
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  // FIXME: move ARM callee-saved reg scan to target independent code, then 
+  // search for already spilled CS register here.
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  return Reg;
+}
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame.  The sequence of code with be in the general form
+///
+///   addi   R0, SP, \#frameSize ; get the address of the previous frame
+///   stwxu  R0, SP, Rnegsize   ; add and update the SP with the negated size
+///   addi   Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
+                                        int SPAdj, RegScavenger *RS) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  // Get the basic block's function.
+  MachineFunction &MF = *MBB.getParent();
+  // Get the frame info.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Determine whether 64-bit pointers are used.
+  bool LP64 = Subtarget.isPPC64();
+  DebugLoc dl = MI.getDebugLoc();
+
+  // Get the maximum call stack size.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+  // Get the total frame size.
+  unsigned FrameSize = MFI->getStackSize();
+  
+  // Get stack alignments.
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  if (MaxAlign > TargetAlign)
+    report_fatal_error("Dynamic alloca with large aligns not supported");
+
+  // Determine the previous frame's address.  If FrameSize can't be
+  // represented as 16 bits or we need special alignment, then we load the
+  // previous frame's address from 0(SP).  Why not do an addis of the hi? 
+  // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. 
+  // Constructing the constant and adding would take 3 instructions. 
+  // Fortunately, a frame greater than 32K is rare.
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
+
+  // FIXME (64-bit): Use "findScratchRegister"
+  unsigned Reg;
+  if (requiresRegisterScavenging(MF))
+    Reg = findScratchRegister(II, RS, RC, SPAdj);
+  else
+    Reg = PPC::R0;
+  
+  if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
+    BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+      .addReg(PPC::R31)
+      .addImm(FrameSize);
+  } else if (LP64) {
+    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+      BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+        .addImm(0)
+        .addReg(PPC::X1);
+    else
+      BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
+        .addImm(0)
+        .addReg(PPC::X1);
+  } else {
+    BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+      .addImm(0)
+      .addReg(PPC::R1);
+  }
+  
+  // Grow the stack and update the stack pointer link, then determine the
+  // address of new allocated space.
+  if (LP64) {
+    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+      BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+        .addReg(Reg, RegState::Kill)
+        .addReg(PPC::X1)
+        .addReg(MI.getOperand(1).getReg());
+    else
+      BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X0, RegState::Kill)
+        .addReg(PPC::X1)
+        .addReg(MI.getOperand(1).getReg());
+
+    if (!MI.getOperand(1).isKill())
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+        .addReg(PPC::X1)
+        .addImm(maxCallFrameSize);
+    else
+      // Implicitly kill the register.
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+        .addReg(PPC::X1)
+        .addImm(maxCallFrameSize)
+        .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+  } else {
+    BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+      .addReg(Reg, RegState::Kill)
+      .addReg(PPC::R1)
+      .addReg(MI.getOperand(1).getReg());
+
+    if (!MI.getOperand(1).isKill())
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+        .addReg(PPC::R1)
+        .addImm(maxCallFrameSize);
+    else
+      // Implicitly kill the register.
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+        .addReg(PPC::R1)
+        .addImm(maxCallFrameSize)
+        .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+  }
+  
+  // Discard the DYNALLOC instruction.
+  MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+///   mfcr rA                  ; Move the conditional register into GPR rA.
+///   rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+///   stw rA, FI               ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+                                      unsigned FrameIndex, int SPAdj,
+                                      RegScavenger *RS) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>, <FI>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+  unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+  unsigned SrcReg = MI.getOperand(0).getReg();
+  bool LP64 = Subtarget.isPPC64();
+
+  // We need to store the CR in the low 4-bits of the saved value. First, issue
+  // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+  BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+    
+  // If the saved register wasn't CR0, shift the bits left so that they are in
+  // CR0's slot.
+  if (SrcReg != PPC::CR0)
+    // rlwinm rA, rA, ShiftBits, 0, 31.
+    BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4)
+      .addImm(0)
+      .addImm(31);
+
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+                    .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+                    FrameIndex);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
+void
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                     int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  // Get the instruction.
+  MachineInstr &MI = *II;
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  // Get the basic block's function.
+  MachineFunction &MF = *MBB.getParent();
+  // Get the frame info.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc dl = MI.getDebugLoc();
+
+  // Find out which operand is the frame index.
+  unsigned FIOperandNo = 0;
+  while (!MI.getOperand(FIOperandNo).isFI()) {
+    ++FIOperandNo;
+    assert(FIOperandNo != MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+  // Take into account whether it's an add or mem instruction
+  unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+  if (MI.isInlineAsm())
+    OffsetOperandNo = FIOperandNo-1;
+
+  // Get the frame index.
+  int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+
+  // Get the frame pointer save index.  Users of this index are primarily
+  // DYNALLOC instructions.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  int FPSI = FI->getFramePointerSaveIndex();
+  // Get the instruction opcode.
+  unsigned OpC = MI.getOpcode();
+  
+  // Special case for dynamic alloca.
+  if (FPSI && FrameIndex == FPSI &&
+      (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+    lowerDynamicAlloc(II, SPAdj, RS);
+    return;
+  }
+
+  // Special case for pseudo-op SPILL_CR.
+  if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+    if (OpC == PPC::SPILL_CR) {
+      lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+      return;
+    }
+
+  // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+  MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+                                              PPC::R31 : PPC::R1,
+                                              false);
+
+  // Figure out if the offset in the instruction is shifted right two bits. This
+  // is true for instructions like "STD", which the machine implicitly adds two
+  // low zeros to.
+  bool isIXAddr = false;
+  switch (OpC) {
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::STD:
+  case PPC::STD_32:
+    isIXAddr = true;
+    break;
+  }
+  
+  // Now add the frame object offset to the offset from r1.
+  int Offset = MFI->getObjectOffset(FrameIndex);
+  if (!isIXAddr)
+    Offset += MI.getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+  // If we're not using a Frame Pointer that has been set to the value of the
+  // SP before having the stack size subtracted from it, then add the stack size
+  // to Offset to get the correct offset.
+  // Naked functions have stack size 0, although getStackSize may not reflect that
+  // because we didn't call all the pieces that compute it for naked functions.
+  if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+    Offset += MFI->getStackSize();
+
+  // If we can, encode the offset directly into the instruction.  If this is a
+  // normal PPC "ri" instruction, any 16-bit value can be safely encoded.  If
+  // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+  // clear can be encoded.  This is extremely uncommon, because normally you
+  // only "std" to a stack slot that is at least 4-byte aligned, but it can
+  // happen in invalid code.
+  if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+    if (isIXAddr)
+      Offset >>= 2;    // The actual encoded value has the low two bits zero.
+    MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // The offset doesn't fit into a single register, scavenge one to build the
+  // offset in.
+  // FIXME: figure out what SPAdj is doing here.
+
+  // FIXME (64-bit): Use "findScratchRegister".
+  unsigned SReg;
+  if (requiresRegisterScavenging(MF))
+    SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
+  else
+    SReg = PPC::R0;
+
+  // Insert a set of rA with the full offset value before the ld, st, or add
+  BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+    .addImm(Offset >> 16);
+  BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+    .addReg(SReg, RegState::Kill)
+    .addImm(Offset);
+
+  // Convert into indexed form of the instruction:
+  // 
+  //   sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+  //   addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+  unsigned OperandBase;
+
+  if (OpC != TargetOpcode::INLINEASM) {
+    assert(ImmToIdxMap.count(OpC) &&
+           "No indexed form of load or store available!");
+    unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+    MI.setDesc(TII.get(NewOpcode));
+    OperandBase = 1;
+  } else {
+    OperandBase = OffsetOperandNo;
+  }
+
+  unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+  MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+  MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+}
+
+unsigned PPCRegisterInfo::getRARegister() const {
+  return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!Subtarget.isPPC64())
+    return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
+  else
+    return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+  return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+  return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+  enum {
+    PPC64 = 0, PPC32 = 1
+  };
+}
+
+int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+  unsigned Flavour = Subtarget.isPPC64() ?
+    DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
+
+  return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, Flavour);
+}
+
+int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+  // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+  unsigned Flavour = Subtarget.isPPC64() ?
+    DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
+
+  return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 000000000000..33fe5ebcf4cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,78 @@
+//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include <map>
+
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+  std::map<unsigned, unsigned> ImmToIdxMap;
+  const PPCSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+public:
+  PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+  
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// PPC::F14, return the number that it corresponds to (e.g. 14).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+
+  /// getPointerRegClass - Return the register class to use to hold pointers.
+  /// This is used for addressing modes.
+  virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;  
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  /// requiresRegisterScavenging - We require a register scavenger.
+  /// FIXME (64-bit): Should be inlined.
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void lowerDynamicAlloc(MachineBasicBlock::iterator II,
+                         int SPAdj, RegScavenger *RS) const;
+  void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+                       int SPAdj, RegScavenger *RS) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 000000000000..1acdf4eb853b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,326 @@
+//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
+class PPCReg<string n> : Register<n> {
+  let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+  field bits<5> Num = SubReg.Num;
+  let SubRegs = [SubReg];
+  let SubRegIndices = [sub_32];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+  field bits<10> Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+  field bits<3> Num = num;
+  let SubRegs = subregs;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+
+// General-purpose registers
+def R0  : GPR< 0,  "r0">, DwarfRegNum<[-2, 0]>;
+def R1  : GPR< 1,  "r1">, DwarfRegNum<[-2, 1]>;
+def R2  : GPR< 2,  "r2">, DwarfRegNum<[-2, 2]>;
+def R3  : GPR< 3,  "r3">, DwarfRegNum<[-2, 3]>;
+def R4  : GPR< 4,  "r4">, DwarfRegNum<[-2, 4]>;
+def R5  : GPR< 5,  "r5">, DwarfRegNum<[-2, 5]>;
+def R6  : GPR< 6,  "r6">, DwarfRegNum<[-2, 6]>;
+def R7  : GPR< 7,  "r7">, DwarfRegNum<[-2, 7]>;
+def R8  : GPR< 8,  "r8">, DwarfRegNum<[-2, 8]>;
+def R9  : GPR< 9,  "r9">, DwarfRegNum<[-2, 9]>;
+def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
+def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
+def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
+def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
+def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
+def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
+def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
+def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
+def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
+def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
+def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
+def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
+def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
+def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
+def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
+def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
+def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
+def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
+def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
+def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
+def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
+def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+
+// 64-bit General-purpose registers
+def X0  : GP8< R0,  "r0">, DwarfRegNum<[0, -2]>;
+def X1  : GP8< R1,  "r1">, DwarfRegNum<[1, -2]>;
+def X2  : GP8< R2,  "r2">, DwarfRegNum<[2, -2]>;
+def X3  : GP8< R3,  "r3">, DwarfRegNum<[3, -2]>;
+def X4  : GP8< R4,  "r4">, DwarfRegNum<[4, -2]>;
+def X5  : GP8< R5,  "r5">, DwarfRegNum<[5, -2]>;
+def X6  : GP8< R6,  "r6">, DwarfRegNum<[6, -2]>;
+def X7  : GP8< R7,  "r7">, DwarfRegNum<[7, -2]>;
+def X8  : GP8< R8,  "r8">, DwarfRegNum<[8, -2]>;
+def X9  : GP8< R9,  "r9">, DwarfRegNum<[9, -2]>;
+def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
+def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
+def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
+def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
+def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
+def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
+def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
+def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
+def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
+def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
+def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
+def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
+def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
+def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
+def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
+def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
+def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
+def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
+def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
+def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
+def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
+def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+
+// Floating-point registers
+def F0  : FPR< 0,  "f0">, DwarfRegNum<[32, 32]>;
+def F1  : FPR< 1,  "f1">, DwarfRegNum<[33, 33]>;
+def F2  : FPR< 2,  "f2">, DwarfRegNum<[34, 34]>;
+def F3  : FPR< 3,  "f3">, DwarfRegNum<[35, 35]>;
+def F4  : FPR< 4,  "f4">, DwarfRegNum<[36, 36]>;
+def F5  : FPR< 5,  "f5">, DwarfRegNum<[37, 37]>;
+def F6  : FPR< 6,  "f6">, DwarfRegNum<[38, 38]>;
+def F7  : FPR< 7,  "f7">, DwarfRegNum<[39, 39]>;
+def F8  : FPR< 8,  "f8">, DwarfRegNum<[40, 40]>;
+def F9  : FPR< 9,  "f9">, DwarfRegNum<[41, 41]>;
+def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
+def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
+def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
+def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
+def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
+def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
+def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
+def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
+def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
+def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
+def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
+def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
+def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
+def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
+def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
+def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
+def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
+def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
+def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
+def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
+def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
+def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+
+// Vector registers
+def V0  : VR< 0,  "v0">, DwarfRegNum<[77, 77]>;
+def V1  : VR< 1,  "v1">, DwarfRegNum<[78, 78]>;
+def V2  : VR< 2,  "v2">, DwarfRegNum<[79, 79]>;
+def V3  : VR< 3,  "v3">, DwarfRegNum<[80, 80]>;
+def V4  : VR< 4,  "v4">, DwarfRegNum<[81, 81]>;
+def V5  : VR< 5,  "v5">, DwarfRegNum<[82, 82]>;
+def V6  : VR< 6,  "v6">, DwarfRegNum<[83, 83]>;
+def V7  : VR< 7,  "v7">, DwarfRegNum<[84, 84]>;
+def V8  : VR< 8,  "v8">, DwarfRegNum<[85, 85]>;
+def V9  : VR< 9,  "v9">, DwarfRegNum<[86, 86]>;
+def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
+def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
+def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
+def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
+def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
+def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
+def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
+def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
+def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
+def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
+def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
+def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
+def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
+def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
+def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
+def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
+def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
+def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
+def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
+def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
+def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
+def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">;
+def CR0GT : CRBIT< 1, "1">;
+def CR0EQ : CRBIT< 2, "2">;
+def CR0UN : CRBIT< 3, "3">;
+def CR1LT : CRBIT< 4, "4">;
+def CR1GT : CRBIT< 5, "5">;
+def CR1EQ : CRBIT< 6, "6">;
+def CR1UN : CRBIT< 7, "7">;
+def CR2LT : CRBIT< 8, "8">;
+def CR2GT : CRBIT< 9, "9">;
+def CR2EQ : CRBIT<10, "10">;
+def CR2UN : CRBIT<11, "11">;
+def CR3LT : CRBIT<12, "12">;
+def CR3GT : CRBIT<13, "13">;
+def CR3EQ : CRBIT<14, "14">;
+def CR3UN : CRBIT<15, "15">;
+def CR4LT : CRBIT<16, "16">;
+def CR4GT : CRBIT<17, "17">;
+def CR4EQ : CRBIT<18, "18">;
+def CR4UN : CRBIT<19, "19">;
+def CR5LT : CRBIT<20, "20">;
+def CR5GT : CRBIT<21, "21">;
+def CR5EQ : CRBIT<22, "22">;
+def CR5UN : CRBIT<23, "23">;
+def CR6LT : CRBIT<24, "24">;
+def CR6GT : CRBIT<25, "25">;
+def CR6EQ : CRBIT<26, "26">;
+def CR6UN : CRBIT<27, "27">;
+def CR7LT : CRBIT<28, "28">;
+def CR7GT : CRBIT<29, "29">;
+def CR7EQ : CRBIT<30, "30">;
+def CR7UN : CRBIT<31, "31">;
+
+// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
+}
+
+// Link register
+def LR  : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>;
+
+// Count register
+def CTR  : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[109]>;
+
+// Carry bit.  In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1);  this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">;
+
+// FP rounding mode:  bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs.  The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+                                                (sequence "R%u", 30, 13),
+                                                R31, R0, R1, LR)>;
+
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+                                                (sequence "X%u", 30, 14),
+                                                X31, X13, X0, X1, LR8)>;
+
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+                                                (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+                         (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+                             V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+                             V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+
+def CRBITRC : RegisterClass<"PPC", [i32], 32,
+  (add CR0LT, CR0GT, CR0EQ, CR0UN,
+       CR1LT, CR1GT, CR1EQ, CR1UN,
+       CR2LT, CR2GT, CR2EQ, CR2UN,
+       CR3LT, CR3GT, CR3EQ, CR3UN,
+       CR4LT, CR4GT, CR4EQ, CR4UN,
+       CR5LT, CR5GT, CR5EQ, CR5UN,
+       CR6LT, CR6GT, CR6EQ, CR6UN,
+       CR7LT, CR7GT, CR7EQ, CR7UN)>
+{
+  let CopyCost = -1;
+}
+
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+                                                CR7, CR2, CR3, CR4)> {
+  let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
+}
+
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
+  let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 000000000000..a33e7e03370c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC32RELOCATIONS_H
+#define PPC32RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+  namespace PPC {
+    enum RelocationType {
+      // reloc_vanilla - A standard relocation, where the address of the
+      // relocated object completely overwrites the address of the relocation.
+      reloc_vanilla,
+    
+      // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+      reloc_pcrel_bx,
+
+      // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+      // and other bcx instructions.
+      reloc_pcrel_bcx,
+
+      // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+      // (which is really addis).  Add the high 16-bits of the specified global
+      // address into the low 16-bits of the instruction.
+      reloc_absolute_high,
+
+      // reloc_absolute_low - Absolute relocation, for the la instruction (which
+      // is really an addi).  Add the low 16-bits of the specified global
+      // address into the low 16-bits of the instruction.
+      reloc_absolute_low,
+      
+      // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+      // instruction which have two implicit zero bits.
+      reloc_absolute_low_ix
+    };
+  }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 000000000000..9664f1457171
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,505 @@
+//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU    : FuncUnit; // Branch unit
+def SLU    : FuncUnit; // Store/load unit
+def SRU    : FuncUnit; // special register unit
+def IU1    : FuncUnit; // integer unit 1 (simple)
+def IU2    : FuncUnit; // integer unit 2 (complex)
+def FPU1   : FuncUnit; // floating point unit 1
+def FPU2   : FuncUnit; // floating point unit 2
+def VPU    : FuncUnit; // vector permutation unit
+def VIU1   : FuncUnit; // vector integer unit 1 (simple)
+def VIU2   : FuncUnit; // vector integer unit 2 (complex)
+def VFPU   : FuncUnit; // vector floating point unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntGeneral   : InstrItinClass;
+def IntCompare   : InstrItinClass;
+def IntDivD      : InstrItinClass;
+def IntDivW      : InstrItinClass;
+def IntMFFS      : InstrItinClass;
+def IntMFVSCR    : InstrItinClass;
+def IntMTFSB0    : InstrItinClass;
+def IntMTSRD     : InstrItinClass;
+def IntMulHD     : InstrItinClass;
+def IntMulHW     : InstrItinClass;
+def IntMulHWU    : InstrItinClass;
+def IntMulLI     : InstrItinClass;
+def IntRFID      : InstrItinClass;
+def IntRotateD   : InstrItinClass;
+def IntRotate    : InstrItinClass;
+def IntShift     : InstrItinClass;
+def IntTrapD     : InstrItinClass;
+def IntTrapW     : InstrItinClass;
+def BrB          : InstrItinClass;
+def BrCR         : InstrItinClass;
+def BrMCR        : InstrItinClass;
+def BrMCRX       : InstrItinClass;
+def LdStDCBA     : InstrItinClass;
+def LdStDCBF     : InstrItinClass;
+def LdStDCBI     : InstrItinClass;
+def LdStGeneral  : InstrItinClass;
+def LdStDSS      : InstrItinClass;
+def LdStICBI     : InstrItinClass;
+def LdStUX       : InstrItinClass;
+def LdStLD       : InstrItinClass;
+def LdStLDARX    : InstrItinClass;
+def LdStLFD      : InstrItinClass;
+def LdStLFDU     : InstrItinClass;
+def LdStLHA      : InstrItinClass;
+def LdStLMW      : InstrItinClass;
+def LdStLVecX    : InstrItinClass;
+def LdStLWA      : InstrItinClass;
+def LdStLWARX    : InstrItinClass;
+def LdStSLBIA    : InstrItinClass;
+def LdStSLBIE    : InstrItinClass;
+def LdStSTD      : InstrItinClass;
+def LdStSTDCX    : InstrItinClass;
+def LdStSTVEBX   : InstrItinClass;
+def LdStSTWCX    : InstrItinClass;
+def LdStSync     : InstrItinClass;
+def SprISYNC     : InstrItinClass;
+def SprMFSR      : InstrItinClass;
+def SprMTMSR     : InstrItinClass;
+def SprMTSR      : InstrItinClass;
+def SprTLBSYNC   : InstrItinClass;
+def SprMFCR      : InstrItinClass;
+def SprMFMSR     : InstrItinClass;
+def SprMFSPR     : InstrItinClass;
+def SprMFTB      : InstrItinClass;
+def SprMTSPR     : InstrItinClass;
+def SprMTSRIN    : InstrItinClass;
+def SprRFI       : InstrItinClass;
+def SprSC        : InstrItinClass;
+def FPGeneral    : InstrItinClass;
+def FPCompare    : InstrItinClass;
+def FPDivD       : InstrItinClass;
+def FPDivS       : InstrItinClass;
+def FPFused      : InstrItinClass;
+def FPRes        : InstrItinClass;
+def FPSqrt       : InstrItinClass;
+def VecGeneral   : InstrItinClass;
+def VecFP        : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex   : InstrItinClass;
+def VecPerm      : InstrItinClass;
+def VecFPRound   : InstrItinClass;
+def VecVSL       : InstrItinClass;
+def VecVSR       : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+//    opcode     itinerary class
+//    ======     ===============
+//    add        IntGeneral
+//    addc       IntGeneral
+//    adde       IntGeneral
+//    addi       IntGeneral
+//    addic      IntGeneral
+//    addic.     IntGeneral
+//    addis      IntGeneral
+//    addme      IntGeneral
+//    addze      IntGeneral
+//    and        IntGeneral
+//    andc       IntGeneral
+//    andi.      IntGeneral
+//    andis.     IntGeneral
+//    b          BrB
+//    bc         BrB
+//    bcctr      BrB
+//    bclr       BrB
+//    cmp        IntCompare
+//    cmpi       IntCompare
+//    cmpl       IntCompare
+//    cmpli      IntCompare
+//    cntlzd     IntRotateD
+//    cntlzw     IntGeneral
+//    crand      BrCR
+//    crandc     BrCR
+//    creqv      BrCR
+//    crnand     BrCR
+//    crnor      BrCR
+//    cror       BrCR
+//    crorc      BrCR
+//    crxor      BrCR
+//    dcba       LdStDCBA
+//    dcbf       LdStDCBF
+//    dcbi       LdStDCBI
+//    dcbst      LdStDCBF
+//    dcbt       LdStGeneral
+//    dcbtst     LdStGeneral
+//    dcbz       LdStDCBF
+//    divd       IntDivD
+//    divdu      IntDivD
+//    divw       IntDivW
+//    divwu      IntDivW
+//    dss        LdStDSS
+//    dst        LdStDSS
+//    dstst      LdStDSS
+//    eciwx      LdStGeneral
+//    ecowx      LdStGeneral
+//    eieio      LdStGeneral
+//    eqv        IntGeneral
+//    extsb      IntGeneral
+//    extsh      IntGeneral
+//    extsw      IntRotateD
+//    fabs       FPGeneral
+//    fadd       FPGeneral
+//    fadds      FPGeneral
+//    fcfid      FPGeneral
+//    fcmpo      FPCompare
+//    fcmpu      FPCompare
+//    fctid      FPGeneral
+//    fctidz     FPGeneral
+//    fctiw      FPGeneral
+//    fctiwz     FPGeneral
+//    fdiv       FPDivD
+//    fdivs      FPDivS
+//    fmadd      FPFused
+//    fmadds     FPGeneral
+//    fmr        FPGeneral
+//    fmsub      FPFused
+//    fmsubs     FPGeneral
+//    fmul       FPFused
+//    fmuls      FPGeneral
+//    fnabs      FPGeneral
+//    fneg       FPGeneral
+//    fnmadd     FPFused
+//    fnmadds    FPGeneral
+//    fnmsub     FPFused
+//    fnmsubs    FPGeneral
+//    fres       FPRes
+//    frsp       FPGeneral
+//    frsqrte    FPGeneral
+//    fsel       FPGeneral
+//    fsqrt      FPSqrt
+//    fsqrts     FPSqrt
+//    fsub       FPGeneral
+//    fsubs      FPGeneral
+//    icbi       LdStICBI
+//    isync      SprISYNC
+//    lbz        LdStGeneral
+//    lbzu       LdStGeneral
+//    lbzux      LdStUX
+//    lbzx       LdStGeneral
+//    ld         LdStLD
+//    ldarx      LdStLDARX
+//    ldu        LdStLD
+//    ldux       LdStLD
+//    ldx        LdStLD
+//    lfd        LdStLFD
+//    lfdu       LdStLFDU
+//    lfdux      LdStLFDU
+//    lfdx       LdStLFDU
+//    lfs        LdStLFDU
+//    lfsu       LdStLFDU
+//    lfsux      LdStLFDU
+//    lfsx       LdStLFDU
+//    lha        LdStLHA
+//    lhau       LdStLHA
+//    lhaux      LdStLHA
+//    lhax       LdStLHA
+//    lhbrx      LdStGeneral
+//    lhz        LdStGeneral
+//    lhzu       LdStGeneral
+//    lhzux      LdStUX
+//    lhzx       LdStGeneral
+//    lmw        LdStLMW
+//    lswi       LdStLMW
+//    lswx       LdStLMW
+//    lvebx      LdStLVecX
+//    lvehx      LdStLVecX
+//    lvewx      LdStLVecX
+//    lvsl       LdStLVecX
+//    lvsr       LdStLVecX
+//    lvx        LdStLVecX
+//    lvxl       LdStLVecX
+//    lwa        LdStLWA
+//    lwarx      LdStLWARX
+//    lwaux      LdStLHA
+//    lwax       LdStLHA
+//    lwbrx      LdStGeneral
+//    lwz        LdStGeneral
+//    lwzu       LdStGeneral
+//    lwzux      LdStUX
+//    lwzx       LdStGeneral
+//    mcrf       BrMCR
+//    mcrfs      FPGeneral
+//    mcrxr      BrMCRX
+//    mfcr       SprMFCR
+//    mffs       IntMFFS
+//    mfmsr      SprMFMSR
+//    mfspr      SprMFSPR
+//    mfsr       SprMFSR
+//    mfsrin     SprMFSR
+//    mftb       SprMFTB
+//    mfvscr     IntMFVSCR
+//    mtcrf      BrMCRX
+//    mtfsb0     IntMTFSB0
+//    mtfsb1     IntMTFSB0
+//    mtfsf      IntMTFSB0
+//    mtfsfi     IntMTFSB0
+//    mtmsr      SprMTMSR
+//    mtmsrd     LdStLD
+//    mtspr      SprMTSPR
+//    mtsr       SprMTSR
+//    mtsrd      IntMTSRD
+//    mtsrdin    IntMTSRD
+//    mtsrin     SprMTSRIN
+//    mtvscr     IntMFVSCR
+//    mulhd      IntMulHD
+//    mulhdu     IntMulHD
+//    mulhw      IntMulHW
+//    mulhwu     IntMulHWU
+//    mulld      IntMulHD
+//    mulli      IntMulLI
+//    mullw      IntMulHW
+//    nand       IntGeneral
+//    neg        IntGeneral
+//    nor        IntGeneral
+//    or         IntGeneral
+//    orc        IntGeneral
+//    ori        IntGeneral
+//    oris       IntGeneral
+//    rfi        SprRFI
+//    rfid       IntRFID
+//    rldcl      IntRotateD
+//    rldcr      IntRotateD
+//    rldic      IntRotateD
+//    rldicl     IntRotateD
+//    rldicr     IntRotateD
+//    rldimi     IntRotateD
+//    rlwimi     IntRotate
+//    rlwinm     IntGeneral
+//    rlwnm      IntGeneral
+//    sc         SprSC
+//    slbia      LdStSLBIA
+//    slbie      LdStSLBIE
+//    sld        IntRotateD
+//    slw        IntGeneral
+//    srad       IntRotateD
+//    sradi      IntRotateD
+//    sraw       IntShift
+//    srawi      IntShift
+//    srd        IntRotateD
+//    srw        IntGeneral
+//    stb        LdStGeneral
+//    stbu       LdStGeneral
+//    stbux      LdStGeneral
+//    stbx       LdStGeneral
+//    std        LdStSTD
+//    stdcx.     LdStSTDCX
+//    stdu       LdStSTD
+//    stdux      LdStSTD
+//    stdx       LdStSTD
+//    stfd       LdStUX
+//    stfdu      LdStUX
+//    stfdux     LdStUX
+//    stfdx      LdStUX
+//    stfiwx     LdStUX
+//    stfs       LdStUX
+//    stfsu      LdStUX
+//    stfsux     LdStUX
+//    stfsx      LdStUX
+//    sth        LdStGeneral
+//    sthbrx     LdStGeneral
+//    sthu       LdStGeneral
+//    sthux      LdStGeneral
+//    sthx       LdStGeneral
+//    stmw       LdStLMW
+//    stswi      LdStLMW
+//    stswx      LdStLMW
+//    stvebx     LdStSTVEBX
+//    stvehx     LdStSTVEBX
+//    stvewx     LdStSTVEBX
+//    stvx       LdStSTVEBX
+//    stvxl      LdStSTVEBX
+//    stw        LdStGeneral
+//    stwbrx     LdStGeneral
+//    stwcx.     LdStSTWCX
+//    stwu       LdStGeneral
+//    stwux      LdStGeneral
+//    stwx       LdStGeneral
+//    subf       IntGeneral
+//    subfc      IntGeneral
+//    subfe      IntGeneral
+//    subfic     IntGeneral
+//    subfme     IntGeneral
+//    subfze     IntGeneral
+//    sync       LdStSync
+//    td         IntTrapD
+//    tdi        IntTrapD
+//    tlbia      LdStSLBIA
+//    tlbie      LdStDCBF
+//    tlbsync    SprTLBSYNC
+//    tw         IntTrapW
+//    twi        IntTrapW
+//    vaddcuw    VecGeneral
+//    vaddfp     VecFP
+//    vaddsbs    VecGeneral
+//    vaddshs    VecGeneral
+//    vaddsws    VecGeneral
+//    vaddubm    VecGeneral
+//    vaddubs    VecGeneral
+//    vadduhm    VecGeneral
+//    vadduhs    VecGeneral
+//    vadduwm    VecGeneral
+//    vadduws    VecGeneral
+//    vand       VecGeneral
+//    vandc      VecGeneral
+//    vavgsb     VecGeneral
+//    vavgsh     VecGeneral
+//    vavgsw     VecGeneral
+//    vavgub     VecGeneral
+//    vavguh     VecGeneral
+//    vavguw     VecGeneral
+//    vcfsx      VecFP
+//    vcfux      VecFP
+//    vcmpbfp    VecFPCompare
+//    vcmpeqfp   VecFPCompare
+//    vcmpequb   VecGeneral
+//    vcmpequh   VecGeneral
+//    vcmpequw   VecGeneral
+//    vcmpgefp   VecFPCompare
+//    vcmpgtfp   VecFPCompare
+//    vcmpgtsb   VecGeneral
+//    vcmpgtsh   VecGeneral
+//    vcmpgtsw   VecGeneral
+//    vcmpgtub   VecGeneral
+//    vcmpgtuh   VecGeneral
+//    vcmpgtuw   VecGeneral
+//    vctsxs     VecFP
+//    vctuxs     VecFP
+//    vexptefp   VecFP
+//    vlogefp    VecFP
+//    vmaddfp    VecFP
+//    vmaxfp     VecFPCompare
+//    vmaxsb     VecGeneral
+//    vmaxsh     VecGeneral
+//    vmaxsw     VecGeneral
+//    vmaxub     VecGeneral
+//    vmaxuh     VecGeneral
+//    vmaxuw     VecGeneral
+//    vmhaddshs  VecComplex
+//    vmhraddshs VecComplex
+//    vminfp     VecFPCompare
+//    vminsb     VecGeneral
+//    vminsh     VecGeneral
+//    vminsw     VecGeneral
+//    vminub     VecGeneral
+//    vminuh     VecGeneral
+//    vminuw     VecGeneral
+//    vmladduhm  VecComplex
+//    vmrghb     VecPerm
+//    vmrghh     VecPerm
+//    vmrghw     VecPerm
+//    vmrglb     VecPerm
+//    vmrglh     VecPerm
+//    vmrglw     VecPerm
+//    vmsubfp    VecFP
+//    vmsummbm   VecComplex
+//    vmsumshm   VecComplex
+//    vmsumshs   VecComplex
+//    vmsumubm   VecComplex
+//    vmsumuhm   VecComplex
+//    vmsumuhs   VecComplex
+//    vmulesb    VecComplex
+//    vmulesh    VecComplex
+//    vmuleub    VecComplex
+//    vmuleuh    VecComplex
+//    vmulosb    VecComplex
+//    vmulosh    VecComplex
+//    vmuloub    VecComplex
+//    vmulouh    VecComplex
+//    vnor       VecGeneral
+//    vor        VecGeneral
+//    vperm      VecPerm
+//    vpkpx      VecPerm
+//    vpkshss    VecPerm
+//    vpkshus    VecPerm
+//    vpkswss    VecPerm
+//    vpkswus    VecPerm
+//    vpkuhum    VecPerm
+//    vpkuhus    VecPerm
+//    vpkuwum    VecPerm
+//    vpkuwus    VecPerm
+//    vrefp      VecFPRound
+//    vrfim      VecFPRound
+//    vrfin      VecFPRound
+//    vrfip      VecFPRound
+//    vrfiz      VecFPRound
+//    vrlb       VecGeneral
+//    vrlh       VecGeneral
+//    vrlw       VecGeneral
+//    vrsqrtefp  VecFP
+//    vsel       VecGeneral
+//    vsl        VecVSL
+//    vslb       VecGeneral
+//    vsldoi     VecPerm
+//    vslh       VecGeneral
+//    vslo       VecPerm
+//    vslw       VecGeneral
+//    vspltb     VecPerm
+//    vsplth     VecPerm
+//    vspltisb   VecPerm
+//    vspltish   VecPerm
+//    vspltisw   VecPerm
+//    vspltw     VecPerm
+//    vsr        VecVSR
+//    vsrab      VecGeneral
+//    vsrah      VecGeneral
+//    vsraw      VecGeneral
+//    vsrb       VecGeneral
+//    vsrh       VecGeneral
+//    vsro       VecPerm
+//    vsrw       VecGeneral
+//    vsubcuw    VecGeneral
+//    vsubfp     VecFP
+//    vsubsbs    VecGeneral
+//    vsubshs    VecGeneral
+//    vsubsws    VecGeneral
+//    vsububm    VecGeneral
+//    vsububs    VecGeneral
+//    vsubuhm    VecGeneral
+//    vsubuhs    VecGeneral
+//    vsubuwm    VecGeneral
+//    vsubuws    VecGeneral
+//    vsum2sws   VecComplex
+//    vsum4sbs   VecComplex
+//    vsum4shs   VecComplex
+//    vsum4ubs   VecComplex
+//    vsumsws    VecComplex
+//    vupkhpx    VecPerm
+//    vupkhsb    VecPerm
+//    vupkhsh    VecPerm
+//    vupklpx    VecPerm
+//    vupklsb    VecPerm
+//    vupklsh    VecPerm
+//    vxor       VecGeneral
+//    xor        IntGeneral
+//    xori       IntGeneral
+//    xoris      IntGeneral
+//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 000000000000..ad4da1fe224f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,64 @@
+//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<
+  [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<3, [FPU1]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<5, [IU1]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<6, [IU1]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<3, [IU1]>]>,
+  InstrItinData<IntRotate   , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntShift    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCR       , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<1, [SRU]>]>,
+  InstrItinData<LdStDCBA    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDCBI    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStUX      , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<34, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<8, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprRFI      , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprSC       , [InstrStage<2, [SRU]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPCompare   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPDivD      , [InstrStage<31, [FPU1]>]>,
+  InstrItinData<FPDivS      , [InstrStage<17, [FPU1]>]>,
+  InstrItinData<FPFused     , [InstrStage<2, [FPU1]>]>,
+  InstrItinData<FPRes       , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 000000000000..03c3b29cc101
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,74 @@
+//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<3, [FPU1]>]>,
+  InstrItinData<IntMFVSCR   , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<3, [FPU1]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<5, [IU1]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<6, [IU1]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<3, [IU1]>]>,
+  InstrItinData<IntRotate   , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntShift    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCR       , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<1, [SRU]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStDCBI    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStDSS     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStUX      , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<34, [SLU]>]>,
+  InstrItinData<LdStLVecX   , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTVEBX  , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<8, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<8, [SRU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprRFI      , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprSC       , [InstrStage<2, [SRU]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPCompare   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPDivD      , [InstrStage<31, [FPU1]>]>,
+  InstrItinData<FPDivS      , [InstrStage<17, [FPU1]>]>,
+  InstrItinData<FPFused     , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPRes       , [InstrStage<10, [FPU1]>]>,
+  InstrItinData<VecGeneral  , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecFP       , [InstrStage<4, [VFPU]>]>,
+  InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecComplex  , [InstrStage<3, [VIU2]>]>,
+  InstrItinData<VecPerm     , [InstrStage<1, [VPU]>]>,
+  InstrItinData<VecFPRound  , [InstrStage<4, [VFPU]>]>,
+  InstrItinData<VecVSL      , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecVSR      , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 000000000000..00cac3c7cab2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,80 @@
+//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def IU3    : FuncUnit; // integer unit 3 (7450 simple)
+def IU4    : FuncUnit; // integer unit 4 (7450 simple)
+
+def G4PlusItineraries : ProcessorItineraries<
+  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntDivW     , [InstrStage<23, [IU2]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<IntMFVSCR   , [InstrStage<2, [VFPU]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<4, [IU2]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<4, [IU2]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<IntRotate   , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntShift    , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<2, [IU2]>]>,
+  InstrItinData<BrMCR       , [InstrStage<2, [IU2]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<2, [IU2]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDCBI    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDSS     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<LdStUX      , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<4, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<4, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<37, [SLU]>]>,
+  InstrItinData<LdStLVecX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLWA     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTD     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTVEBX  , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<35, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<4, [IU2]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<4, [IU2]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<5, [IU2]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprRFI      , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<SprSC       , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<FPCompare   , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<FPDivD      , [InstrStage<35, [FPU1]>]>,
+  InstrItinData<FPDivS      , [InstrStage<21, [FPU1]>]>,
+  InstrItinData<FPFused     , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<FPRes       , [InstrStage<14, [FPU1]>]>,
+  InstrItinData<VecGeneral  , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecFP       , [InstrStage<4, [VFPU]>]>,
+  InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+  InstrItinData<VecComplex  , [InstrStage<4, [VIU2]>]>,
+  InstrItinData<VecPerm     , [InstrStage<2, [VPU]>]>,
+  InstrItinData<VecFPRound  , [InstrStage<4, [VIU1]>]>,
+  InstrItinData<VecVSL      , [InstrStage<2, [VPU]>]>,
+  InstrItinData<VecVSR      , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 000000000000..1671f22b30ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,84 @@
+//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<IntCompare  , [InstrStage<3, [IU1, IU2]>]>,
+  InstrItinData<IntDivD     , [InstrStage<68, [IU1]>]>,
+  InstrItinData<IntDivW     , [InstrStage<36, [IU1]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<6, [IU2]>]>,
+  InstrItinData<IntMFVSCR   , [InstrStage<1, [VFPU]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<IntMulHD    , [InstrStage<7, [IU1, IU2]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<5, [IU1, IU2]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<5, [IU1, IU2]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<4, [IU1, IU2]>]>,
+  InstrItinData<IntRFID     , [InstrStage<1, [IU2]>]>,
+  InstrItinData<IntRotateD  , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<IntRotate   , [InstrStage<4, [IU1, IU2]>]>,
+  InstrItinData<IntShift    , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<IntTrapD    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<4, [BPU]>]>,
+  InstrItinData<BrMCR       , [InstrStage<2, [BPU]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<3, [BPU]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDSS     , [InstrStage<10, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<40, [SLU]>]>,
+  InstrItinData<LdStUX      , [InstrStage<4, [SLU]>]>,
+  InstrItinData<LdStLD      , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLDARX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<64, [SLU]>]>,
+  InstrItinData<LdStLVecX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLWA     , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStSLBIA   , [InstrStage<40, [SLU]>]>, // needs work
+  InstrItinData<LdStSLBIE   , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStSTD     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStSTVEBX  , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<35, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<40, [SLU]>]>, // needs work
+  InstrItinData<SprMFSR     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<10, [IU2]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<8, [IU2]>]>,
+  InstrItinData<SprSC       , [InstrStage<1, [IU2]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<FPCompare   , [InstrStage<8, [FPU1, FPU2]>]>,
+  InstrItinData<FPDivD      , [InstrStage<33, [FPU1, FPU2]>]>,
+  InstrItinData<FPDivS      , [InstrStage<33, [FPU1, FPU2]>]>,
+  InstrItinData<FPFused     , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<FPRes       , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<FPSqrt      , [InstrStage<40, [FPU1, FPU2]>]>,
+  InstrItinData<VecGeneral  , [InstrStage<2, [VIU1]>]>,
+  InstrItinData<VecFP       , [InstrStage<8, [VFPU]>]>,
+  InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+  InstrItinData<VecComplex  , [InstrStage<5, [VIU2]>]>,
+  InstrItinData<VecPerm     , [InstrStage<3, [VPU]>]>,
+  InstrItinData<VecFPRound  , [InstrStage<8, [VFPU]>]>,
+  InstrItinData<VecVSL      , [InstrStage<2, [VIU1]>]>,
+  InstrItinData<VecVSR      , [InstrStage<3, [VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..d4258b4a0eb1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+#include "PPCTargetMachine.h"
+using namespace llvm;
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 000000000000..341b69cdfb5f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCTargetMachine;
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+  ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 000000000000..5ea9b0f6596c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,141 @@
+//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include <cstdlib>
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+
+/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
+static const char *GetCurrentPowerPCCPU() {
+  host_basic_info_data_t hostInfo;
+  mach_msg_type_number_t infoCount;
+
+  infoCount = HOST_BASIC_INFO_COUNT;
+  host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, 
+            &infoCount);
+            
+  if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+  switch(hostInfo.cpu_subtype) {
+  case CPU_SUBTYPE_POWERPC_601:   return "601";
+  case CPU_SUBTYPE_POWERPC_602:   return "602";
+  case CPU_SUBTYPE_POWERPC_603:   return "603";
+  case CPU_SUBTYPE_POWERPC_603e:  return "603e";
+  case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+  case CPU_SUBTYPE_POWERPC_604:   return "604";
+  case CPU_SUBTYPE_POWERPC_604e:  return "604e";
+  case CPU_SUBTYPE_POWERPC_620:   return "620";
+  case CPU_SUBTYPE_POWERPC_750:   return "750";
+  case CPU_SUBTYPE_POWERPC_7400:  return "7400";
+  case CPU_SUBTYPE_POWERPC_7450:  return "7450";
+  case CPU_SUBTYPE_POWERPC_970:   return "970";
+  default: ;
+  }
+  
+  return "generic";
+}
+#endif
+
+
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, bool is64Bit)
+  : PPCGenSubtargetInfo(TT, CPU, FS)
+  , StackAlignment(16)
+  , DarwinDirective(PPC::DIR_NONE)
+  , IsGigaProcessor(false)
+  , Has64BitSupport(false)
+  , Use64BitRegs(false)
+  , IsPPC64(is64Bit)
+  , HasAltivec(false)
+  , HasFSQRT(false)
+  , HasSTFIWX(false)
+  , HasLazyResolverStubs(false)
+  , IsJITCodeModel(false)
+  , TargetTriple(TT) {
+
+  // Determine default and user specified characteristics
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
+#if defined(__APPLE__)
+  if (CPUName == "generic")
+    CPUName = GetCurrentPowerPCCPU();
+#endif
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
+
+  // If we are generating code for ppc64, verify that options make sense.
+  if (is64Bit) {
+    Has64BitSupport = true;
+    // Silently force 64-bit register use on ppc64.
+    Use64BitRegs = true;
+  }
+  
+  // If the user requested use of 64-bit regs, but the cpu selected doesn't
+  // support it, ignore.
+  if (use64BitRegs() && !has64BitSupport())
+    Use64BitRegs = false;
+
+  // Set up darwin-specific properties.
+  if (isDarwin())
+    HasLazyResolverStubs = true;
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+  // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+  // everything is.  This matters for PPC64, which codegens in PIC mode without
+  // stubs.
+  HasLazyResolverStubs = false;
+
+  // Calls to external functions need to use indirect calls
+  IsJITCodeModel = true;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub.  This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
+                                       const TargetMachine &TM) const {
+  // We never have stubs if HasLazyResolverStubs=false or if in static mode.
+  if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+    return false;
+  // If symbol visibility is hidden, the extra load is not needed if
+  // the symbol is definitely defined in the current translation unit.
+  bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+  if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+    return false;
+  return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+         GV->hasCommonLinkage() || isDecl;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 000000000000..e028de6b09de
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,151 @@
+//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+class StringRef;
+
+namespace PPC {
+  // -m directive values.
+  enum {
+    DIR_NONE,
+    DIR_32,
+    DIR_601, 
+    DIR_602, 
+    DIR_603, 
+    DIR_7400,
+    DIR_750, 
+    DIR_970, 
+    DIR_64  
+  };
+}
+
+class GlobalValue;
+class TargetMachine;
+  
+class PPCSubtarget : public PPCGenSubtargetInfo {
+protected:
+  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  unsigned StackAlignment;
+  
+  /// Selected instruction itineraries (one entry per itinerary class.)
+  InstrItineraryData InstrItins;
+  
+  /// Which cpu directive was used.
+  unsigned DarwinDirective;
+
+  /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+  bool IsGigaProcessor;
+  bool Has64BitSupport;
+  bool Use64BitRegs;
+  bool IsPPC64;
+  bool HasAltivec;
+  bool HasFSQRT;
+  bool HasSTFIWX;
+  bool HasLazyResolverStubs;
+  bool IsJITCodeModel;
+  
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  PPCSubtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS, bool is64Bit);
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+  
+  /// SetJITMode - This is called to inform the subtarget info that we are
+  /// producing code for the JIT.
+  void SetJITMode();
+
+  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// stack frame on entry to the function and which must be maintained by every
+  /// function for this subtarget.
+  unsigned getStackAlignment() const { return StackAlignment; }
+  
+  /// getDarwinDirective - Returns the -m directive specified for the cpu.
+  ///
+  unsigned getDarwinDirective() const { return DarwinDirective; }
+  
+  /// getInstrItins - Return the instruction itineraies based on subtarget 
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+  /// getTargetDataString - Return the pointer size and type alignment
+  /// properties of this subtarget.
+  const char *getTargetDataString() const {
+    // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+    // documentation are wrong; these are correct (i.e. "what gcc does").
+    return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
+                     : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
+  }
+
+  /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+  ///
+  bool isPPC64() const { return IsPPC64; }
+  
+  /// has64BitSupport - Return true if the selected CPU supports 64-bit
+  /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+  bool has64BitSupport() const { return Has64BitSupport; }
+  
+  /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+  /// registers in 32-bit mode when possible.  This can only true if
+  /// has64BitSupport() returns true.
+  bool use64BitRegs() const { return Use64BitRegs; }
+  
+  /// hasLazyResolverStub - Return true if accesses to the specified global have
+  /// to go through a dyld lazy resolution stub.  This means that an extra load
+  /// is required to get the address of the global.
+  bool hasLazyResolverStub(const GlobalValue *GV, 
+                           const TargetMachine &TM) const;
+  
+  // isJITCodeModel - True if we're generating code for the JIT
+  bool isJITCodeModel() const { return IsJITCodeModel; }
+
+  // Specific obvious features.
+  bool hasFSQRT() const { return HasFSQRT; }
+  bool hasSTFIWX() const { return HasSTFIWX; }
+  bool hasAltivec() const { return HasAltivec; }
+  bool isGigaProcessor() const { return IsGigaProcessor; }
+
+  const Triple &getTargetTriple() const { return TargetTriple; }
+
+  /// isDarwin - True if this is any darwin platform.
+  bool isDarwin() const { return TargetTriple.isMacOSX(); }
+
+  bool isDarwinABI() const { return isDarwin(); }
+  bool isSVR4ABI() const { return !isDarwin(); }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 000000000000..e0ea5adba751
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,135 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  if (Triple(TT).isOSDarwin())
+    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+
+  return NULL;
+}
+
+extern "C" void LLVMInitializePowerPCTarget() {
+  // Register the targets
+  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);  
+  RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+  
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+  
+  
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend);
+  TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend);
+  
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer);
+}
+
+
+PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
+                                   const std::string &FS, bool is64Bit)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, is64Bit),
+    DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+    FrameLowering(Subtarget), JITInfo(*this, is64Bit),
+    TLInfo(*this), TSInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
+
+  if (getRelocationModel() == Reloc::Default) {
+    if (Subtarget.isDarwin())
+      setRelocationModel(Reloc::DynamicNoPIC);
+    else
+      setRelocationModel(Reloc::Static);
+  }
+}
+
+/// Override this for PowerPC.  Tail merging happily breaks up instruction issue
+/// groups, which typically degrades performance.
+bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU,
+                                       const std::string &FS) 
+  : PPCTargetMachine(T, TT, CPU, FS, false) {
+}
+
+
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU, 
+                                       const std::string &FS)
+  : PPCTargetMachine(T, TT, CPU, FS, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createPPCISelDag(*this));
+  return false;
+}
+
+bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  // Must run branch selection immediately preceding the asm printer.
+  PM.add(createPPCBranchSelectionPass());
+  return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      JITCodeEmitter &JCE) {
+  // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+  // FIXME: This should be moved to TargetJITInfo!!
+  if (Subtarget.isPPC64()) {
+    // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+    // instructions to materialize arbitrary global variable + function +
+    // constant pool addresses.
+    setRelocationModel(Reloc::PIC_);
+    // Temporary workaround for the inability of PPC64 JIT to handle jump
+    // tables.
+    DisableJumpTables = true;      
+  } else {
+    setRelocationModel(Reloc::Static);
+  }
+  
+  // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
+  // writing?
+  Subtarget.SetJITMode();
+  
+  // Machine code emitter pass for PowerPC.
+  PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 000000000000..baf07e3498f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,94 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+  PPCSubtarget        Subtarget;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  PPCInstrInfo        InstrInfo;
+  PPCFrameLowering    FrameLowering;
+  PPCJITInfo          JITInfo;
+  PPCTargetLowering   TLInfo;
+  PPCSelectionDAGInfo TSInfo;
+  InstrItineraryData  InstrItins;
+
+public:
+  PPCTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &CPU, const std::string &FS,
+                   bool is64Bit);
+
+  virtual const PPCInstrInfo      *getInstrInfo() const { return &InstrInfo; }
+  virtual const PPCFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual       PPCJITInfo        *getJITInfo()         { return &JITInfo; }
+  virtual const PPCTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
+  }
+  virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const PPCRegisterInfo   *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  
+  virtual const TargetData    *getTargetData() const    { return &DataLayout; }
+  virtual const PPCSubtarget  *getSubtargetImpl() const { return &Subtarget; }
+  virtual const InstrItineraryData *getInstrItineraryData() const {  
+    return &InstrItins;
+  }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &JCE);
+  virtual bool getEnableTailMergeDefault() const;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+public:
+  PPC32TargetMachine(const Target &T, const std::string &TT,
+                     const std::string &CPU, const std::string &FS);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+public:
+  PPC64TargetMachine(const Target &T, const std::string &TT,
+                     const std::string &CPU, const std::string &FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 000000000000..ad607d0ade6a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() { 
+  RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+    X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+  RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+    Y(ThePPC64Target, "ppc64", "PowerPC 64");
+}
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 000000000000..dab35e5e4e6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,323 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+static cl::opt<bool> DisableDelaySlotFiller(
+  "disable-sparc-delay-filler",
+  cl::init(false),
+  cl::desc("Disable the Sparc delay slot filler."),
+  cl::Hidden);
+
+namespace {
+  struct Filler : public MachineFunctionPass {
+    /// Target machine description which we query for reg. names, data
+    /// layout, etc.
+    ///
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Filler(TargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "SPARC Delay Slot Filler";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F) {
+      bool Changed = false;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        Changed |= runOnMachineBasicBlock(*FI);
+      return Changed;
+    }
+
+    bool isDelayFiller(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator candidate);
+
+    void insertCallUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    void insertDefsUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegDefs,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+                    unsigned Reg);
+
+    bool delayHasHazard(MachineBasicBlock::iterator candidate,
+                        bool &sawLoad, bool &sawStore,
+                        SmallSet<unsigned, 32> &RegDefs,
+                        SmallSet<unsigned, 32> &RegUses);
+
+    MachineBasicBlock::iterator
+    findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+    bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
+
+  };
+  char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+  return new Filler(tm);
+}
+
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// We assume there is only one delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+    if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator D = MBB.end();
+      MachineBasicBlock::iterator J = I;
+
+      if (!DisableDelaySlotFiller)
+        D = findDelayInstr(MBB, I);
+
+      ++FilledSlots;
+      Changed = true;
+
+      if (D == MBB.end())
+        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+      else
+        MBB.splice(++J, &MBB, D);
+      unsigned structSize = 0;
+      if (needsUnimp(I, structSize)) {
+        MachineBasicBlock::iterator J = I;
+        ++J; //skip the delay filler.
+        BuildMI(MBB, ++J, I->getDebugLoc(),
+                TII->get(SP::UNIMP)).addImm(structSize);
+      }
+    }
+  return Changed;
+}
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator slot)
+{
+  SmallSet<unsigned, 32> RegDefs;
+  SmallSet<unsigned, 32> RegUses;
+  bool sawLoad = false;
+  bool sawStore = false;
+
+  MachineBasicBlock::iterator I = slot;
+
+  if (slot->getOpcode() == SP::RET)
+    return MBB.end();
+
+  if (slot->getOpcode() == SP::RETL) {
+    --I;
+    if (I->getOpcode() != SP::RESTORErr)
+      return MBB.end();
+    //change retl to ret
+    slot->setDesc(TII->get(SP::RET));
+    return I;
+  }
+
+  //Call's delay filler can def some of call's uses.
+  if (slot->getDesc().isCall())
+    insertCallUses(slot, RegUses);
+  else
+    insertDefsUses(slot, RegDefs, RegUses);
+
+  bool done = false;
+
+  while (!done) {
+    done = (I == MBB.begin());
+
+    if (!done)
+      --I;
+
+    // skip debug value
+    if (I->isDebugValue())
+      continue;
+
+
+    if (I->hasUnmodeledSideEffects()
+        || I->isInlineAsm()
+        || I->isLabel()
+        || I->getDesc().hasDelaySlot()
+        || isDelayFiller(MBB, I))
+      break;
+
+    if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+      insertDefsUses(I, RegDefs, RegUses);
+      continue;
+    }
+
+    return I;
+  }
+  return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+                            bool &sawLoad,
+                            bool &sawStore,
+                            SmallSet<unsigned, 32> &RegDefs,
+                            SmallSet<unsigned, 32> &RegUses)
+{
+
+  if (candidate->isImplicitDef() || candidate->isKill())
+    return true;
+
+  if (candidate->getDesc().mayLoad()) {
+    sawLoad = true;
+    if (sawStore)
+      return true;
+  }
+
+  if (candidate->getDesc().mayStore()) {
+    if (sawStore)
+      return true;
+    sawStore = true;
+    if (sawLoad)
+      return true;
+  }
+
+  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+    const MachineOperand &MO = candidate->getOperand(i);
+    if (!MO.isReg())
+      continue; // skip
+
+    unsigned Reg = MO.getReg();
+
+    if (MO.isDef()) {
+      //check whether Reg is defined or used before delay slot.
+      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+        return true;
+    }
+    if (MO.isUse()) {
+      //check whether Reg is defined before delay slot.
+      if (IsRegInSet(RegDefs, Reg))
+        return true;
+    }
+  }
+  return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+
+  switch(MI->getOpcode()) {
+  default: llvm_unreachable("Unknown opcode.");
+  case SP::CALL: break;
+  case SP::JMPLrr:
+  case SP::JMPLri:
+    assert(MI->getNumOperands() >= 2);
+    const MachineOperand &Reg = MI->getOperand(0);
+    assert(Reg.isReg() && "JMPL first operand is not a register.");
+    assert(Reg.isUse() && "JMPL first operand is not a use.");
+    RegUses.insert(Reg.getReg());
+
+    const MachineOperand &RegOrImm = MI->getOperand(1);
+    if (RegOrImm.isImm())
+        break;
+    assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+    assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+    RegUses.insert(RegOrImm.getReg());
+    break;
+  }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegDefs,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+    if (MO.isDef())
+      RegDefs.insert(Reg);
+    if (MO.isUse())
+      RegUses.insert(Reg);
+
+  }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+  if (RegSet.count(Reg))
+    return true;
+  // check Aliased Registers
+  for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+       *Alias; ++ Alias)
+    if (RegSet.count(*Alias))
+      return true;
+
+  return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator candidate)
+{
+  if (candidate == MBB.begin())
+    return false;
+  if (candidate->getOpcode() == SP::UNIMP)
+    return true;
+  const MCInstrDesc &prevdesc = (--candidate)->getDesc();
+  return prevdesc.hasDelaySlot();
+}
+
+bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
+{
+  if (!I->getDesc().isCall())
+    return false;
+
+  unsigned structSizeOpNum = 0;
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unknown call opcode.");
+  case SP::CALL: structSizeOpNum = 1; break;
+  case SP::JMPLrr:
+  case SP::JMPLri: structSizeOpNum = 2; break;
+  }
+
+  const MachineOperand &MO = I->getOperand(structSizeOpNum);
+  if (!MO.isImm())
+    return false;
+  StructSize = MO.getImm();
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/FPMover.cpp b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 000000000000..1423b1e64d66
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,141 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+  struct FPMover : public MachineFunctionPass {
+    /// Target machine description which we query for reg. names, data
+    /// layout, etc.
+    ///
+    TargetMachine &TM;
+    
+    static char ID;
+    explicit FPMover(TargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm) { }
+
+    virtual const char *getPassName() const {
+      return "Sparc Double-FP Move Fixer";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F);
+  };
+  char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+  return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+                             unsigned &OddReg) {
+  static const unsigned EvenHalvesOfPairs[] = {
+    SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+    SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+  };
+  static const unsigned OddHalvesOfPairs[] = {
+    SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+    SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+  };
+  static const unsigned DoubleRegsInOrder[] = {
+    SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+    SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+  };
+  for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+    if (DoubleRegsInOrder[i] == DoubleReg) {
+      EvenReg = EvenHalvesOfPairs[i];
+      OddReg = OddHalvesOfPairs[i];
+      return;
+    }
+  llvm_unreachable("Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+    MachineInstr *MI = I++;
+    DebugLoc dl = MI->getDebugLoc();
+    if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+        MI->getOpcode() == SP::FpNEGD) {
+      Changed = true;
+      unsigned DestDReg = MI->getOperand(0).getReg();
+      unsigned SrcDReg  = MI->getOperand(1).getReg();
+      if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+        MBB.erase(MI);   // Eliminate the noop copy.
+        ++NoopFpDs;
+        continue;
+      }
+      
+      unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+      getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+      getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+      const TargetInstrInfo *TII = TM.getInstrInfo();
+      if (MI->getOpcode() == SP::FpMOVD)
+        MI->setDesc(TII->get(SP::FMOVS));
+      else if (MI->getOpcode() == SP::FpNEGD)
+        MI->setDesc(TII->get(SP::FNEGS));
+      else if (MI->getOpcode() == SP::FpABSD)
+        MI->setDesc(TII->get(SP::FABSS));
+      else
+        llvm_unreachable("Unknown opcode!");
+        
+      MI->getOperand(0).setReg(EvenDestReg);
+      MI->getOperand(1).setReg(EvenSrcReg);
+      DEBUG(errs() << "FPMover: the modified instr is: " << *MI);
+      // Insert copy for the other half of the double.
+      if (DestDReg != SrcDReg) {
+        MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+          .addReg(OddSrcReg);
+        DEBUG(errs() << "FPMover: the inserted instr is: " << *MI);
+      }
+      ++NumFpDs;
+    }
+  }
+  return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+  // If the target has V9 instructions, the fp-mover pseudos will never be
+  // emitted.  Avoid a scan of the instructions to improve compile time.
+  if (TM.getSubtarget<SparcSubtarget>().isV9())
+    return false;
+  
+  bool Changed = false;
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+       FI != FE; ++FI)
+    Changed |= runOnMachineBasicBlock(*FI);
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..1e8c02979887
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMSparcDesc
+  SparcMCTargetDesc.cpp
+  SparcMCAsmInfo.cpp
+  )
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..abcbe2da18ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
new file mode 100644
index 000000000000..6a7e0902354e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::sparcv9)
+    PointerSize = 8;
+
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = 0;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "!";
+  HasLEB128 = true;
+  SupportsDebugInformation = true;
+  
+  SunStyleELFSectionSwitchSyntax = true;
+  UsesELFSectionDirectiveForBSS = true;
+
+  WeakRefDirective = "\t.weak\t";
+
+  PrivateGlobalPrefix = ".L";
+}
+
+
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
new file mode 100644
index 000000000000..0cb6827d2771
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct SparcELFMCAsmInfo : public MCAsmInfo {
+    explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
new file mode 100644
index 000000000000..cb92a2bfd417
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCTargetDesc.h"
+#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SparcGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSparcMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSparcMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeSparcMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSparcMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeSparcMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+                                          createSparcMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSparcMCAsmInfo() {
+  RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
+  RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
new file mode 100644
index 000000000000..2fd9e3f4cbd3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCMCTARGETDESC_H
+#define SPARCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSparcTarget;
+extern Target TheSparcV9Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for Sparc registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SparcGenRegisterInfo.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SparcGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm/lib/Target/Sparc/Sparc.h
new file mode 100644
index 000000000000..7b2c6141dbf8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,109 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+  class FunctionPass;
+  class SparcTargetMachine;
+  class formatted_raw_ostream;
+
+  FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
+  FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+  FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+
+} // end namespace llvm;
+
+namespace llvm {
+  // Enums corresponding to Sparc condition codes, both icc's and fcc's.  These
+  // values must be kept in sync with the ones in the .td file.
+  namespace SPCC {
+    enum CondCodes {
+      //ICC_A   =  8   ,  // Always
+      //ICC_N   =  0   ,  // Never
+      ICC_NE  =  9   ,  // Not Equal
+      ICC_E   =  1   ,  // Equal
+      ICC_G   = 10   ,  // Greater
+      ICC_LE  =  2   ,  // Less or Equal
+      ICC_GE  = 11   ,  // Greater or Equal
+      ICC_L   =  3   ,  // Less
+      ICC_GU  = 12   ,  // Greater Unsigned
+      ICC_LEU =  4   ,  // Less or Equal Unsigned
+      ICC_CC  = 13   ,  // Carry Clear/Great or Equal Unsigned
+      ICC_CS  =  5   ,  // Carry Set/Less Unsigned
+      ICC_POS = 14   ,  // Positive
+      ICC_NEG =  6   ,  // Negative
+      ICC_VC  = 15   ,  // Overflow Clear
+      ICC_VS  =  7   ,  // Overflow Set
+      
+      //FCC_A   =  8+16,  // Always
+      //FCC_N   =  0+16,  // Never
+      FCC_U   =  7+16,  // Unordered
+      FCC_G   =  6+16,  // Greater
+      FCC_UG  =  5+16,  // Unordered or Greater
+      FCC_L   =  4+16,  // Less
+      FCC_UL  =  3+16,  // Unordered or Less
+      FCC_LG  =  2+16,  // Less or Greater
+      FCC_NE  =  1+16,  // Not Equal
+      FCC_E   =  9+16,  // Equal
+      FCC_UE  = 10+16,  // Unordered or Equal
+      FCC_GE  = 11+16,  // Greater or Equal
+      FCC_UGE = 12+16,  // Unordered or Greater or Equal
+      FCC_LE  = 13+16,  // Less or Equal
+      FCC_ULE = 14+16,  // Unordered or Less or Equal
+      FCC_O   = 15+16   // Ordered
+    };
+  }
+  
+  inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case SPCC::ICC_NE:  return "ne";
+    case SPCC::ICC_E:   return "e";
+    case SPCC::ICC_G:   return "g";
+    case SPCC::ICC_LE:  return "le";
+    case SPCC::ICC_GE:  return "ge";
+    case SPCC::ICC_L:   return "l";
+    case SPCC::ICC_GU:  return "gu";
+    case SPCC::ICC_LEU: return "leu";
+    case SPCC::ICC_CC:  return "cc";
+    case SPCC::ICC_CS:  return "cs";
+    case SPCC::ICC_POS: return "pos";
+    case SPCC::ICC_NEG: return "neg";
+    case SPCC::ICC_VC:  return "vc";
+    case SPCC::ICC_VS:  return "vs";
+    case SPCC::FCC_U:   return "u";
+    case SPCC::FCC_G:   return "g";
+    case SPCC::FCC_UG:  return "ug";
+    case SPCC::FCC_L:   return "l";
+    case SPCC::FCC_UL:  return "ul";
+    case SPCC::FCC_LG:  return "lg";
+    case SPCC::FCC_NE:  return "ne";
+    case SPCC::FCC_E:   return "e";
+    case SPCC::FCC_UE:  return "ue";
+    case SPCC::FCC_GE:  return "ge";
+    case SPCC::FCC_UGE: return "uge";
+    case SPCC::FCC_LE:  return "le";
+    case SPCC::FCC_ULE: return "ule";
+    case SPCC::FCC_O:   return "o";
+    }       
+  }
+}  // end namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
new file mode 100644
index 000000000000..764336665d0b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,72 @@
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+ 
+def FeatureV9
+  : SubtargetFeature<"v9", "IsV9", "true",
+                     "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+  : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+                     "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+  : SubtargetFeature<"vis", "IsVIS", "true",
+                     "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+include "SparcCallingConv.td"
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+def : Proc<"v8",              []>;
+def : Proc<"supersparc",      []>;
+def : Proc<"sparclite",       []>;
+def : Proc<"f934",            []>;
+def : Proc<"hypersparc",      []>;
+def : Proc<"sparclite86x",    []>;
+def : Proc<"sparclet",        []>;
+def : Proc<"tsc701",          []>;
+def : Proc<"v9",              [FeatureV9]>;
+def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = SparcInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
new file mode 100644
index 000000000000..edde8427aa89
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -0,0 +1,251 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SparcAsmPrinter : public AsmPrinter {
+  public:
+    explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Sparc Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                         const char *Modifier = 0);
+    void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+
+    virtual void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+    bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
+    
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+                       const;
+  };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand (opNum);
+  bool CloseParen = false;
+  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+    O << "%hi(";
+    CloseParen = true;
+  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+             !MO.isReg() && !MO.isImm()) {
+    O << "%lo(";
+    CloseParen = true;
+  }
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << "%" << LowercaseString(getRegisterName(MO.getReg()));
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << (int)MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+  if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O, const char *Modifier) {
+  printOperand(MI, opNum, O);
+
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    O << ", ";
+    printOperand(MI, opNum+1, O);
+    return;
+  }
+
+  if (MI->getOperand(opNum+1).isReg() &&
+      MI->getOperand(opNum+1).getReg() == SP::G0)
+    return;   // don't print "+%g0"
+  if (MI->getOperand(opNum+1).isImm() &&
+      MI->getOperand(opNum+1).getImm() == 0)
+    return;   // don't print "+0"
+
+  O << "+";
+  if (MI->getOperand(opNum+1).isGlobal() ||
+      MI->getOperand(opNum+1).isCPI()) {
+    O << "%lo(";
+    printOperand(MI, opNum+1, O);
+    O << ")";
+  } else {
+    printOperand(MI, opNum+1, O);
+  }
+}
+
+bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
+                                  raw_ostream &O) {
+  std::string operand = "";
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  default: assert(0 && "Operand is not a register ");
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Operand is not a physical register ");
+    assert(MO.getReg() != SP::O7 && 
+           "%o7 is assigned as destination for getpcx!");
+    operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+    break;
+  }
+
+  unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
+  unsigned bbNum = MI->getParent()->getNumber();
+
+  O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+  O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
+
+  O << "\t  sethi\t"
+    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum 
+    << ")), "  << operand << '\n' ;
+
+  O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
+  O << "\tor\t" << operand  
+    << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+    << ")), " << operand << '\n';
+  O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; 
+  
+  return true;
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
+                                     raw_ostream &O) {
+  int CC = (int)MI->getOperand(opNum).getImm();
+  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,
+                                      const char *ExtraCode,
+                                      raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'r':
+     break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+
+  return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true;  // Unknown modifier
+
+  O << '[';
+  printMemOperand(MI, OpNo, O);
+  O << ']';
+
+  return false;
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+  
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
+  return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() { 
+  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+  RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
new file mode 100644
index 000000000000..856f87ad1d37
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -0,0 +1,36 @@
+//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Sparc architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Sparc 32-bit C return-value convention.
+def RetCC_Sparc32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 32-bit C Calling convention.
+def CC_Sparc32 : CallingConv<[
+  //Custom assign SRet to [sp+64].
+  CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+  // i32 f32 arguments get passed in integer registers if there is space.
+  CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  // f64 arguments are split and passed through registers or through stack.
+  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 000000000000..320c8ca26d7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,80 @@
+//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  int NumBytes = (int) MFI->getStackSize();
+
+  // Emit the correct save instruction based on the number of bytes in
+  // the frame. Minimum stack frame size according to V8 ABI is:
+  //   16 words for register window spill
+  //    1 word for address of returned aggregate-value
+  // +  6 words for passing parameters on the stack
+  // ----------
+  //   23 words * 4 bytes per word = 92 bytes
+  NumBytes += 92;
+
+  // Round up to next doubleword boundary -- a double-word boundary
+  // is required by the ABI.
+  NumBytes = (NumBytes + 7) & ~7;
+  NumBytes = -NumBytes;
+
+  if (NumBytes >= -4096) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+      .addReg(SP::O6).addImm(NumBytes);
+  } else {
+    // Emit this the hard way.  This clobbers G1 which we always know is
+    // available here.
+    unsigned OffHi = (unsigned)NumBytes >> 10U;
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+      .addReg(SP::O6).addReg(SP::G1);
+  }
+}
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+  assert(MBBI->getOpcode() == SP::RETL &&
+         "Can only put epilog before 'retl' instruction!");
+  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+    .addReg(SP::G0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 000000000000..9a2ddc83f5aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,41 @@
+//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &STI;
+public:
+  explicit SparcFrameLowering(const SparcSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 000000000000..8c6103dd8a39
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,212 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+  /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const SparcSubtarget &Subtarget;
+  SparcTargetMachine& TM;
+public:
+  explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
+    : SelectionDAGISel(tm),
+      Subtarget(tm.getSubtarget<SparcSubtarget>()),
+      TM(tm) {
+  }
+
+  SDNode *Select(SDNode *N);
+
+  // Complex Pattern Selectors.
+  bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+
+  virtual const char *getPassName() const {
+    return "SPARC DAG->DAG Pattern Instruction Selection";
+  }
+
+  // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+
+private:
+  SDNode* getGlobalBaseReg();
+};
+}  // end anonymous namespace
+
+SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
+                                     SDValue &Base, SDValue &Offset) {
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // direct calls.
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+      if (isInt<13>(CN->getSExtValue())) {
+        if (FrameIndexSDNode *FIN =
+                dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+          // Constant offset from frame ref.
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+        } else {
+          Base = Addr.getOperand(0);
+        }
+        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+        return true;
+      }
+    }
+    if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+      Base = Addr.getOperand(1);
+      Offset = Addr.getOperand(0).getOperand(0);
+      return true;
+    }
+    if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+      Base = Addr.getOperand(0);
+      Offset = Addr.getOperand(1).getOperand(0);
+      return true;
+    }
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
+  if (Addr.getOpcode() == ISD::FrameIndex) return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // direct calls.
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      if (isInt<13>(CN->getSExtValue()))
+        return false;  // Let the reg+imm pattern catch this!
+    if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+        Addr.getOperand(1).getOpcode() == SPISD::Lo)
+      return false;  // Let the reg+imm pattern catch this!
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+    return true;
+  }
+
+  R1 = Addr;
+  R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+  return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case SPISD::GLOBAL_BASE_REG:
+    return getGlobalBaseReg();
+
+  case ISD::SDIV:
+  case ISD::UDIV: {
+    // FIXME: should use a custom expander to expose the SRA to the dag.
+    SDValue DivLHS = N->getOperand(0);
+    SDValue DivRHS = N->getOperand(1);
+
+    // Set the Y register to the high-part.
+    SDValue TopPart;
+    if (N->getOpcode() == ISD::SDIV) {
+      TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
+                                   CurDAG->getTargetConstant(31, MVT::i32)), 0);
+    } else {
+      TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+    }
+    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
+                                     CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+    // FIXME: Handle div by immediate.
+    unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+    return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+                                TopPart);
+  }
+  case ISD::MULHU:
+  case ISD::MULHS: {
+    // FIXME: Handle mul by immediate.
+    SDValue MulLHS = N->getOperand(0);
+    SDValue MulRHS = N->getOperand(1);
+    unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+                                         MulLHS, MulRHS);
+    // The high part is in the Y register.
+    return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+    return NULL;
+  }
+  }
+
+  return SelectCode(N);
+}
+
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool
+SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                                char ConstraintCode,
+                                                std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1;
+  switch (ConstraintCode) {
+  default: return true;
+  case 'm':   // memory
+   if (!SelectADDRrr(Op, Op0, Op1))
+     SelectADDRri(Op, Op0, Op1);
+   break;
+  }
+
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  return false;
+}
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) {
+  return new SparcDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
new file mode 100644
index 000000000000..6f30d3fd6c35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -0,0 +1,1273 @@
+//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+                                 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                 ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  assert (ArgFlags.isSRet());
+
+  //Assign SRet argument
+  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                         0,
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+                                MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  static const unsigned RegList[] = {
+    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+  };
+  //Try to get first reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  } else {
+    //Assign whole thing in stack
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8,4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  //Try to get second reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4,4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
+#include "SparcGenCallingConv.inc"
+
+SDValue
+SparcTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 DebugLoc dl, SelectionDAG &DAG) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (MF.getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+  }
+
+  unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
+  // If the function returns a struct, copy the SRetReturnReg to I0
+  if (MF.getFunction()->hasStructRetAttr()) {
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg)
+      llvm_unreachable("sret virtual register not created in the entry block");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    Flag = Chain.getValue(1);
+    if (MF.getRegInfo().liveout_empty())
+      MF.getRegInfo().addLiveOut(SP::I0);
+    RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
+  }
+
+  SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+
+  if (Flag.getNode())
+    return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+                       RetAddrOffsetNode, Flag);
+  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+                     RetAddrOffsetNode);
+}
+
+/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values.  TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue
+SparcTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv, bool isVarArg,
+                                          const SmallVectorImpl<ISD::InputArg>
+                                            &Ins,
+                                          DebugLoc dl, SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
+
+  const unsigned StackOffset = 92;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    if (i == 0  && Ins[i].Flags.isSRet()) {
+      //Get SRet from [%fp+64]
+      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+      SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+      SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                MachinePointerInfo(),
+                                false, false, 0);
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    if (VA.isRegLoc()) {
+      if (VA.needsCustom()) {
+        assert(VA.getLocVT() == MVT::f64);
+        unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+        MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+        SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+        assert(i+1 < e);
+        CCValAssign &NextVA = ArgLocs[++i];
+
+        SDValue LoVal;
+        if (NextVA.isMemLoc()) {
+          int FrameIdx = MF.getFrameInfo()->
+            CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
+          SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                              MachinePointerInfo(),
+                              false, false, 0);
+        } else {
+          unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+                                        &SP::IntRegsRegClass);
+          LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
+        }
+        SDValue WholeValue =
+          DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+        WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+        InVals.push_back(WholeValue);
+        continue;
+      }
+      unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+      SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+      if (VA.getLocVT() == MVT::f32)
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+      else if (VA.getLocVT() != MVT::i32) {
+        Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+                          DAG.getValueType(VA.getLocVT()));
+        Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+      }
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+    if (VA.needsCustom()) {
+      assert(VA.getValVT() == MVT::f64);
+      //If it is double-word aligned, just load.
+      if (Offset % 8 == 0) {
+        int FI = MF.getFrameInfo()->CreateFixedObject(8,
+                                                      Offset,
+                                                      true);
+        SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+        SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                                   MachinePointerInfo(),
+                                   false,false, 0);
+        InVals.push_back(Load);
+        continue;
+      }
+
+      int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                    Offset,
+                                                    true);
+      SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+      int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+                                                     Offset+4,
+                                                     true);
+      SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+      SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+
+      SDValue WholeValue =
+        DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+      WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+      InVals.push_back(WholeValue);
+      continue;
+    }
+
+    int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                  Offset,
+                                                  true);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue Load ;
+    if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+      Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                         MachinePointerInfo(),
+                         false, false, 0);
+    } else {
+      ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+      // Sparc is big endian, so add an offset based on the ObjectVT.
+      unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+      FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+                          DAG.getConstant(Offset, MVT::i32));
+      Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+                            MachinePointerInfo(),
+                            VA.getValVT(), false, false,0);
+      Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+    }
+    InVals.push_back(Load);
+  }
+
+  if (MF.getFunction()->hasStructRetAttr()) {
+    //Copy the SRet Argument to SRetReturnReg
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+      SFI->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+  }
+
+  // Store remaining ArgRegs to the stack if this is a varargs function.
+  if (isVarArg) {
+    static const unsigned ArgRegs[] = {
+      SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+    };
+    unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+    const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+    unsigned ArgOffset = CCInfo.getNextStackOffset();
+    if (NumAllocated == 6)
+      ArgOffset += StackOffset;
+    else {
+      assert(!ArgOffset);
+      ArgOffset = 68+4*NumAllocated;
+    }
+
+    // Remember the vararg offset for the va_start implementation.
+    FuncInfo->setVarArgsFrameOffset(ArgOffset);
+
+    std::vector<SDValue> OutChains;
+
+    for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+      unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+      MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
+      SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
+
+      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+                                                          true);
+      SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+      ArgOffset += 4;
+    }
+
+    if (!OutChains.empty()) {
+      OutChains.push_back(Chain);
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &OutChains[0], OutChains.size());
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool &isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const {
+  // Sparc target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 DAG.getTarget(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+  // Keep stack frames 8-byte aligned.
+  ArgsSize = (ArgsSize+7) & ~7;
+
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+  //Create local copies for byval args.
+  SmallVector<SDValue, 8> ByValArgs;
+  for (unsigned i = 0,  e = Outs.size(); i != e; ++i) {
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    if (!Flags.isByVal())
+      continue;
+
+    SDValue Arg = OutVals[i];
+    unsigned Size = Flags.getByValSize();
+    unsigned Align = Flags.getByValAlign();
+
+    int FI = MFI->CreateStackObject(Size, Align, false);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+    Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+                          false,        //isVolatile,
+                          (Size <= 32), //AlwaysInline if size <= 32
+                          MachinePointerInfo(), MachinePointerInfo());
+    ByValArgs.push_back(FIPtr);
+  }
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  const unsigned StackOffset = 92;
+  bool hasStructRetAttr = false;
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+       i != e;
+       ++i, ++realArgIdx) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[realArgIdx];
+
+    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+    //Use local copy if it is a byval arg.
+    if (Flags.isByVal())
+      Arg = ByValArgs[byvalArgIdx++];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (Flags.isSRet()) {
+      assert(VA.needsCustom());
+      // store SRet argument in %sp+64
+      SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+      SDValue PtrOff = DAG.getIntPtrConstant(64);
+      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+      hasStructRetAttr = true;
+      continue;
+    }
+
+    if (VA.needsCustom()) {
+      assert(VA.getLocVT() == MVT::f64);
+
+      if (VA.isMemLoc()) {
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        //if it is double-word aligned, just store.
+        if (Offset % 8 == 0) {
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+          continue;
+        }
+      }
+
+      SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
+      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+                                   Arg, StackPtr, MachinePointerInfo(),
+                                   false, false, 0);
+      // Sparc is big-endian, so the high part comes first.
+      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+      // Increment the pointer to the other half.
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             DAG.getIntPtrConstant(4));
+      // Load the low part.
+      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+      if (VA.isRegLoc()) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+        assert(i+1 != e);
+        CCValAssign &NextVA = ArgLocs[++i];
+        if (NextVA.isRegLoc()) {
+          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+        } else {
+          //Store the low part in stack.
+          unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+        }
+      } else {
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        // Store the high part.
+        SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+        // Store the low part.
+        PtrOff = DAG.getIntPtrConstant(Offset+4);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+      }
+      continue;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      if (VA.getLocVT() != MVT::f32) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+        continue;
+      }
+      Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+    PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+
+  // Emit all stores, make sure the occur before any copies into physregs.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emitted instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    unsigned Reg = RegsToPass[i].first;
+    // Remap I0->I7 -> O0->O7.
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+  // Returns a chain & a flag for retval copy to use
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  if (hasStructRetAttr)
+    Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    unsigned Reg = RegsToPass[i].first;
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+  }
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    unsigned Reg = RVLocs[i].getLocReg();
+
+    // Remap I0->I7 -> O0->O7.
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+unsigned
+SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
+{
+  const Function *CalleeFn = 0;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    CalleeFn = dyn_cast<Function>(G->getGlobal());
+  } else if (ExternalSymbolSDNode *E =
+             dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    const Function *Fn = DAG.getMachineFunction().getFunction();
+    const Module *M = Fn->getParent();
+    CalleeFn = M->getFunction(E->getSymbol());
+  }
+
+  if (!CalleeFn)
+    return 0;
+
+  assert(CalleeFn->hasStructRetAttr() &&
+         "Callee does not have the StructRet attribute.");
+
+  const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+  const Type *ElementTy = Ty->getElementType();
+  return getTargetData()->getTypeAllocSize(ElementTy);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ISD::SETEQ:  return SPCC::ICC_E;
+  case ISD::SETNE:  return SPCC::ICC_NE;
+  case ISD::SETLT:  return SPCC::ICC_L;
+  case ISD::SETGT:  return SPCC::ICC_G;
+  case ISD::SETLE:  return SPCC::ICC_LE;
+  case ISD::SETGE:  return SPCC::ICC_GE;
+  case ISD::SETULT: return SPCC::ICC_CS;
+  case ISD::SETULE: return SPCC::ICC_LEU;
+  case ISD::SETUGT: return SPCC::ICC_GU;
+  case ISD::SETUGE: return SPCC::ICC_CC;
+  }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown fp condition code!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: return SPCC::FCC_E;
+  case ISD::SETNE:
+  case ISD::SETUNE: return SPCC::FCC_NE;
+  case ISD::SETLT:
+  case ISD::SETOLT: return SPCC::FCC_L;
+  case ISD::SETGT:
+  case ISD::SETOGT: return SPCC::FCC_G;
+  case ISD::SETLE:
+  case ISD::SETOLE: return SPCC::FCC_LE;
+  case ISD::SETGE:
+  case ISD::SETOGE: return SPCC::FCC_GE;
+  case ISD::SETULT: return SPCC::FCC_UL;
+  case ISD::SETULE: return SPCC::FCC_ULE;
+  case ISD::SETUGT: return SPCC::FCC_UG;
+  case ISD::SETUGE: return SPCC::FCC_UGE;
+  case ISD::SETUO:  return SPCC::FCC_U;
+  case ISD::SETO:   return SPCC::FCC_O;
+  case ISD::SETONE: return SPCC::FCC_LG;
+  case ISD::SETUEQ: return SPCC::FCC_UE;
+  }
+}
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+  addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+  addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+  // Turn FP extload into load/fextend
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  // Sparc doesn't have i1 sign extending load
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  // Turn FP truncstore into trunc + store.
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // Custom legalize GlobalAddress nodes into LO/HI parts.
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+  // Sparc doesn't have sext_inreg, replace them with shl/sra
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+  // Sparc has no REM or DIVREM operations.
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  // Custom expand fp<->sint
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+  // Expand fp<->uint
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+  // Sparc has no select or setcc: expand to SELECT_CC.
+  setOperationAction(ISD::SELECT, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT, MVT::f64, Expand);
+  setOperationAction(ISD::SETCC, MVT::i32, Expand);
+  setOperationAction(ISD::SETCC, MVT::f32, Expand);
+  setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+  // Sparc doesn't have BRCOND either, it has BR_CC.
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  // SPARC has no intrinsics for these particular operations.
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Expand);
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+  setOperationAction(ISD::ROTL , MVT::i32, Expand);
+  setOperationAction(ISD::ROTR , MVT::i32, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+  // FIXME: Sparc provides these multiplies, but we don't have them yet.
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+  // VAARG needs to be lowered to not do unaligned accesses for doubles.
+  setOperationAction(ISD::VAARG             , MVT::Other, Custom);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
+
+  // No debug info support yet.
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  setStackPointerRegisterToSaveRestore(SP::O6);
+
+  if (TM.getSubtarget<SparcSubtarget>().isV9())
+    setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+  setMinFunctionAlignment(2);
+
+  computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case SPISD::CMPICC:     return "SPISD::CMPICC";
+  case SPISD::CMPFCC:     return "SPISD::CMPFCC";
+  case SPISD::BRICC:      return "SPISD::BRICC";
+  case SPISD::BRFCC:      return "SPISD::BRFCC";
+  case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+  case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+  case SPISD::Hi:         return "SPISD::Hi";
+  case SPISD::Lo:         return "SPISD::Lo";
+  case SPISD::FTOI:       return "SPISD::FTOI";
+  case SPISD::ITOF:       return "SPISD::ITOF";
+  case SPISD::CALL:       return "SPISD::CALL";
+  case SPISD::RET_FLAG:   return "SPISD::RET_FLAG";
+  case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+  case SPISD::FLUSHW:     return "SPISD::FLUSHW";
+  }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                         const APInt &Mask,
+                                                         APInt &KnownZero,
+                                                         APInt &KnownOne,
+                                                         const SelectionDAG &DAG,
+                                                         unsigned Depth) const {
+  APInt KnownZero2, KnownOne2;
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);   // Don't know anything.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case SPISD::SELECT_ICC:
+  case SPISD::SELECT_FCC:
+    DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
+                          Depth+1);
+    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
+                          Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction.  If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+                             ISD::CondCode CC, unsigned &SPCC) {
+  if (isa<ConstantSDNode>(RHS) &&
+      cast<ConstantSDNode>(RHS)->isNullValue() &&
+      CC == ISD::SETNE &&
+      ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+        LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+       (LHS.getOpcode() == SPISD::SELECT_FCC &&
+        LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+      isa<ConstantSDNode>(LHS.getOperand(0)) &&
+      isa<ConstantSDNode>(LHS.getOperand(1)) &&
+      cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+      cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+    SDValue CMPCC = LHS.getOperand(3);
+    SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+    LHS = CMPCC.getOperand(0);
+    RHS = CMPCC.getOperand(1);
+  }
+}
+
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  // FIXME there isn't really any debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
+  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+                                   getPointerTy());
+  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                GlobalBase, RelAddr);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  // FIXME there isn't really any debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  const Constant *C = N->getConstVal();
+  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
+  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+                                   getPointerTy());
+  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                GlobalBase, RelAddr);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  // Convert the fp value to integer in an FP register.
+  assert(Op.getValueType() == MVT::i32);
+  Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  assert(Op.getOperand(0).getValueType() == MVT::i32);
+  SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+  // Convert the int value to FP in an FP register.
+  return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc, SPCC = ~0U;
+
+  // If this is a br_cc of a "setcc", and if the setcc got lowered into
+  // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+  LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+  // Get the condition flag.
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    std::vector<EVT> VTs;
+    VTs.push_back(MVT::i32);
+    VTs.push_back(MVT::Glue);
+    SDValue Ops[2] = { LHS, RHS };
+    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+    if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+    Opc = SPISD::BRICC;
+  } else {
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+    if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+    Opc = SPISD::BRFCC;
+  }
+  return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
+                     DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc, SPCC = ~0U;
+
+  // If this is a select_cc of a "setcc", and if the setcc got lowered into
+  // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+  LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    std::vector<EVT> VTs;
+    VTs.push_back(LHS.getValueType());   // subcc returns a value
+    VTs.push_back(MVT::Glue);
+    SDValue Ops[2] = { LHS, RHS };
+    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+    Opc = SPISD::SELECT_ICC;
+    if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+  } else {
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+    Opc = SPISD::SELECT_FCC;
+    if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+  }
+  return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+                     DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+                            const SparcTargetLowering &TLI) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Offset =
+    DAG.getNode(ISD::ADD, dl, MVT::i32,
+                DAG.getRegister(SP::I6, MVT::i32),
+                DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
+                                MVT::i32));
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
+}
+
+static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+                               MachinePointerInfo(SV), false, false, 0);
+  // Increment the pointer, VAList, to the next vaarg
+  SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
+                                  DAG.getConstant(VT.getSizeInBits()/8,
+                                                  MVT::i32));
+  // Store the incremented VAList to the legalized pointer
+  InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+                         VAListPtr, MachinePointerInfo(SV), false, false, 0);
+  // Load the actual argument out of the pointer VAList, unless this is an
+  // f64 load.
+  if (VT != MVT::f64)
+    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+                       false, false, 0);
+
+  // Otherwise, load it as i64, then do a bitconvert.
+  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
+                          false, false, 0);
+
+  // Bit-Convert the value to f64.
+  SDValue Ops[2] = {
+    DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
+    V.getValue(1)
+  };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+  SDValue Chain = Op.getOperand(0);  // Legalize the chain.
+  SDValue Size  = Op.getOperand(1);  // Legalize the size.
+  DebugLoc dl = Op.getDebugLoc();
+
+  unsigned SPReg = SP::O6;
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+  SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+  Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP);    // Output chain
+
+  // The resultant pointer is actually 16 words from the bottom of the stack,
+  // to provide a register spill area.
+  SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
+                                 DAG.getConstant(96, MVT::i32));
+  SDValue Ops[2] = { NewVal, Chain };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+                              dl, MVT::Other, DAG.getEntryNode());
+  return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned FrameReg = SP::I6;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue FrameAddr;
+  if (depth == 0)
+    FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                FrameAddr, DAG.getIntPtrConstant(56));
+      FrameAddr = DAG.getLoad(MVT::i32, dl,
+                              Chain,
+                              Ptr,
+                              MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned RetReg = SP::I7;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue RetAddr;
+  if (depth == 0)
+    RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                RetAddr,
+                                DAG.getIntPtrConstant((i == depth-1)?60:56));
+      RetAddr = DAG.getLoad(MVT::i32, dl,
+                            Chain,
+                            Ptr,
+                            MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return RetAddr;
+}
+
+SDValue SparcTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Should not custom lower this!");
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    llvm_unreachable("TLS not implemented for Sparc.");
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
+  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::BR_CC:              return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+  case ISD::VASTART:            return LowerVASTART(Op, DAG, *this);
+  case ISD::VAARG:              return LowerVAARG(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  }
+}
+
+MachineBasicBlock *
+SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) const {
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+  unsigned BROpcode;
+  unsigned CC;
+  DebugLoc dl = MI->getDebugLoc();
+  // Figure out the conditional branch opcode to use for this select_cc.
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown SELECT_CC!");
+  case SP::SELECT_CC_Int_ICC:
+  case SP::SELECT_CC_FP_ICC:
+  case SP::SELECT_CC_DFP_ICC:
+    BROpcode = SP::BCOND;
+    break;
+  case SP::SELECT_CC_Int_FCC:
+  case SP::SELECT_CC_FP_FCC:
+  case SP::SELECT_CC_DFP_FCC:
+    BROpcode = SP::FBCOND;
+    break;
+  }
+
+  CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   [f]bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = sinkMBB;
+  BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Sparc Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SparcTargetLowering::ConstraintType
+SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:  break;
+    case 'r': return C_RegisterClass;
+    }
+  }
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                  EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, SP::IntRegsRegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+bool
+SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Sparc target isn't yet aware of offsets.
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
new file mode 100644
index 000000000000..8a1886a856e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -0,0 +1,103 @@
+//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_ISELLOWERING_H
+#define SPARC_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Sparc.h"
+
+namespace llvm {
+  namespace SPISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+      CMPICC,      // Compare two GPR operands, set icc.
+      CMPFCC,      // Compare two FP operands, set fcc.
+      BRICC,       // Branch to dest on icc condition
+      BRFCC,       // Branch to dest on fcc condition
+      SELECT_ICC,  // Select between two values using the current ICC flags.
+      SELECT_FCC,  // Select between two values using the current FCC flags.
+
+      Hi, Lo,      // Hi/Lo operations, typically on a global address.
+
+      FTOI,        // FP to Int within a FP register.
+      ITOF,        // Int to FP within a FP register.
+
+      CALL,        // A call instruction.
+      RET_FLAG,    // Return with a flag operand.
+      GLOBAL_BASE_REG, // Global base reg for PIC
+      FLUSHW       // FLUSH register windows to stack
+    };
+  }
+
+  class SparcTargetLowering : public TargetLowering {
+  public:
+    SparcTargetLowering(TargetMachine &TM);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+    /// in Mask are known to be either zero or one and return them in the
+    /// KnownZero/KnownOne bitsets.
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+    unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+  };
+} // end namespace llvm
+
+#endif    // SPARC_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 000000000000..6535259e16ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,114 @@
+//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "SP";
+
+  bits<2> op;
+  let Inst{31-30} = op;               // Top two bits are the 'op' field
+  
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstSP<outs, ins, asmstr, pattern> {
+  bits<3>  op2;
+  bits<22> imm22;
+  let op          = 0;    // op = 0
+  let Inst{24-22} = op2;
+  let Inst{21-0}  = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
+   : F2<outs, ins, asmstr, pattern> {
+  bits<5>  rd;
+
+  let op2         = op2Val;
+
+  let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, 
+           list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
+  bits<4>   cond;
+  bit       annul = 0;     // currently unused
+
+  let cond        = condVal;
+  let op2         = op2Val;
+
+  let Inst{29}    = annul;
+  let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstSP<outs, ins, asmstr, pattern> {
+  bits<5> rd;
+  bits<6> op3;
+  bits<5> rs1;
+  let op{1} = 1;   // Op = 2 or 3
+  let Inst{29-25} = rd;
+  let Inst{24-19} = op3;
+  let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<8> asi = 0; // asi not currently used
+  bits<5> rs2;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 0;     // i field = 0
+  let Inst{12-5} = asi;   // address space identifier
+  let Inst{4-0}  = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins, 
+           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<13> simm13;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 1;     // i field = 1
+  let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<5> rs2;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13-5} = opfval;   // fp opcode
+  let Inst{4-0}  = rs2;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 000000000000..4e3ddf839985
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,346 @@
+//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "Sparc.h"
+#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SparcGenInstrInfo.inc"
+
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+  : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+    RI(ST, *this), Subtarget(ST) {
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+  if (MI->getOpcode() == SP::LDri ||
+      MI->getOpcode() == SP::LDFri ||
+      MI->getOpcode() == SP::LDDFri) {
+    if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                            int &FrameIndex) const {
+  if (MI->getOpcode() == SP::STri ||
+      MI->getOpcode() == SP::STFri ||
+      MI->getOpcode() == SP::STDFri) {
+    if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+        MI->getOperand(1).getImm() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(2).getReg();
+    }
+  }
+  return 0;
+}
+
+static bool IsIntegerCC(unsigned CC)
+{
+  return  (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+  switch(CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case SPCC::ICC_NE:   return SPCC::ICC_E;
+  case SPCC::ICC_E:    return SPCC::ICC_NE;
+  case SPCC::ICC_G:    return SPCC::ICC_LE;
+  case SPCC::ICC_LE:   return SPCC::ICC_G;
+  case SPCC::ICC_GE:   return SPCC::ICC_L;
+  case SPCC::ICC_L:    return SPCC::ICC_GE;
+  case SPCC::ICC_GU:   return SPCC::ICC_LEU;
+  case SPCC::ICC_LEU:  return SPCC::ICC_GU;
+  case SPCC::ICC_CC:   return SPCC::ICC_CS;
+  case SPCC::ICC_CS:   return SPCC::ICC_CC;
+  case SPCC::ICC_POS:  return SPCC::ICC_NEG;
+  case SPCC::ICC_NEG:  return SPCC::ICC_POS;
+  case SPCC::ICC_VC:   return SPCC::ICC_VS;
+  case SPCC::ICC_VS:   return SPCC::ICC_VC;
+
+  case SPCC::FCC_U:    return SPCC::FCC_O;
+  case SPCC::FCC_O:    return SPCC::FCC_U;
+  case SPCC::FCC_G:    return SPCC::FCC_LE;
+  case SPCC::FCC_LE:   return SPCC::FCC_G;
+  case SPCC::FCC_UG:   return SPCC::FCC_ULE;
+  case SPCC::FCC_ULE:  return SPCC::FCC_UG;
+  case SPCC::FCC_L:    return SPCC::FCC_GE;
+  case SPCC::FCC_GE:   return SPCC::FCC_L;
+  case SPCC::FCC_UL:   return SPCC::FCC_UGE;
+  case SPCC::FCC_UGE:  return SPCC::FCC_UL;
+  case SPCC::FCC_LG:   return SPCC::FCC_UE;
+  case SPCC::FCC_UE:   return SPCC::FCC_LG;
+  case SPCC::FCC_NE:   return SPCC::FCC_E;
+  case SPCC::FCC_E:    return SPCC::FCC_NE;
+  }
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const
+{
+
+  MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    //When we see a non-terminator, we are done
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    //Terminator is not a branch
+    if (!I->getDesc().isBranch())
+      return true;
+
+    //Handle Unconditional branches
+    if (I->getOpcode() == SP::BA) {
+      UnCondBrIter = I;
+
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        UnCondBrIter = MBB.end();
+        continue;
+      }
+
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    unsigned Opcode = I->getOpcode();
+    if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+      return true; //Unknown Opcode
+
+    SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+    if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+
+        //Transform the code
+        //
+        //    brCC L1
+        //    ba L2
+        // L1:
+        //    ..
+        // L2:
+        //
+        // into
+        //
+        //   brnCC L2
+        // L1:
+        //   ...
+        // L2:
+        //
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+    //FIXME: Handle subsequent conditional branches
+    //For now, we can't handle multiple conditional branches
+    return true;
+  }
+  return false;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+                             MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond,
+                             DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "Sparc branch conditions should have one component!");
+
+  if (Cond.empty()) {
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+    return 1;
+  }
+
+  //Conditional branch
+  unsigned CC = Cond[0].getImm();
+
+  if (IsIntegerCC(CC))
+    BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+  else
+    BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+  if (!FBB)
+    return 1;
+
+  BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+  return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    if (I->getOpcode() != SP::BA
+        && I->getOpcode() != SP::BCOND
+        && I->getOpcode() != SP::FBCOND)
+      break; // Not a branch
+
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+  return Count;
+}
+
+void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
+    BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+    BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
+    BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void SparcInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+  if (RC == SP::IntRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill));
+  else if (RC == SP::FPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg,  getKillRegState(isKill));
+  else if (RC == SP::DFPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg,  getKillRegState(isKill));
+  else
+    llvm_unreachable("Can't store this register to stack slot");
+}
+
+void SparcInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == SP::IntRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+  else if (RC == SP::FPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+  else if (RC == SP::DFPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+  else
+    llvm_unreachable("Can't load this register from stack slot");
+}
+
+unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
+{
+  SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
+  unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+
+
+  DebugLoc dl;
+
+  BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg);
+  SparcFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 000000000000..eda64efb7a03
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,100 @@
+//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SparcRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SparcGenInstrInfo.inc"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+  enum {
+    Pseudo = (1<<0),
+    Load = (1<<1),
+    Store = (1<<2),
+    DelaySlot = (1<<3)
+  };
+}
+
+class SparcInstrInfo : public SparcGenInstrInfo {
+  const SparcRegisterInfo RI;
+  const SparcSubtarget& Subtarget;
+public:
+  explicit SparcInstrInfo(SparcSubtarget &ST);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const ;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+  
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 000000000000..cf5c48fd18d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,825 @@
+//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions.  Note that the machine may be running in 32-bit mode.
+def HasV9   : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions.  Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate.  In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11  : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
+
+def simm13  : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
+
+def LO10 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+                                   MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+  return (((unsigned)N->getZExtValue() >> 10) << 10) ==
+         (unsigned)N->getZExtValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+def MEMri : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops IntRegs, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+  def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc : 
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc : 
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+
+def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo    : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi  : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
+
+//  These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def call          : SDNode<"SPISD::CALL", SDT_SPCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
+def SDT_SPRet     : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def retflag       : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def flushw        : SDNode<"SPISD::FLUSHW", SDTNone,
+                           [SDNPHasChain]>;
+
+def getPCX        : Operand<i32> {
+  let PrintMethod = "printGetPCX";
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE  : ICC_VAL< 9>;  // Not Equal
+def ICC_E   : ICC_VAL< 1>;  // Equal
+def ICC_G   : ICC_VAL<10>;  // Greater
+def ICC_LE  : ICC_VAL< 2>;  // Less or Equal
+def ICC_GE  : ICC_VAL<11>;  // Greater or Equal
+def ICC_L   : ICC_VAL< 3>;  // Less
+def ICC_GU  : ICC_VAL<12>;  // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>;  // Less or Equal Unsigned
+def ICC_CC  : ICC_VAL<13>;  // Carry Clear/Great or Equal Unsigned
+def ICC_CS  : ICC_VAL< 5>;  // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>;  // Positive
+def ICC_NEG : ICC_VAL< 6>;  // Negative
+def ICC_VC  : ICC_VAL<15>;  // Overflow Clear
+def ICC_VS  : ICC_VAL< 7>;  // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U   : FCC_VAL<23>;  // Unordered
+def FCC_G   : FCC_VAL<22>;  // Greater
+def FCC_UG  : FCC_VAL<21>;  // Unordered or Greater
+def FCC_L   : FCC_VAL<20>;  // Less
+def FCC_UL  : FCC_VAL<19>;  // Unordered or Less
+def FCC_LG  : FCC_VAL<18>;  // Less or Greater
+def FCC_NE  : FCC_VAL<17>;  // Not Equal
+def FCC_E   : FCC_VAL<25>;  // Equal
+def FCC_UE  : FCC_VAL<24>;  // Unordered or Equal
+def FCC_GE  : FCC_VAL<25>;  // Greater or Equal
+def FCC_UGE : FCC_VAL<26>;  // Unordered or Greater or Equal
+def FCC_LE  : FCC_VAL<27>;  // Less or Equal
+def FCC_ULE : FCC_VAL<28>;  // Unordered or Less or Equal
+def FCC_O   : FCC_VAL<29>;  // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+  def rr  : F3_1<2, Op3Val, 
+                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri  : F3_2<2, Op3Val,
+                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+  def rr  : F3_1<2, Op3Val, 
+                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"), []>;
+  def ri  : F3_2<2, Op3Val,
+                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstSP<outs, ins, asmstr, pattern>;
+
+// GETPCX for PIC
+let Defs = [O7] in {
+  def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
+}
+
+let Defs = [O6], Uses = [O6] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                               "!ADJCALLSTACKDOWN $amt",
+                               [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                            "!ADJCALLSTACKUP $amt1",
+                            [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let hasSideEffects = 1, mayStore = 1 in {
+  let rd = 0, rs1 = 0, rs2 = 0 in
+    def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+                      "flushw",
+                      [(flushw)]>, Requires<[HasV9]>;
+  let rd = 0, rs1 = 1, simm13 = 3 in
+    def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+                   "ta 3",
+                   [(flushw)]>;
+}
+
+def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+                "unimp $val", []>;
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the 
+// fpmover pass.
+let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
+  def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+                      "!FpMOVD $src, $dst", []>;
+  def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+                      "!FpNEGD $src, $dst",
+                      [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+  def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+                      "!FpABSD $src, $dst",
+                      [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.  This has to handle all
+// permutations of selection between i32/f32/f64 on ICC and FCC.
+  // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in { 
+  def SELECT_CC_Int_ICC
+   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_Int_ICC PSEUDO!",
+            [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
+                                             imm:$Cond))]>;
+  def SELECT_CC_FP_ICC
+   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_FP_ICC PSEUDO!",
+            [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
+                                            imm:$Cond))]>;
+
+  def SELECT_CC_DFP_ICC
+   : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_DFP_ICC PSEUDO!",
+            [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
+                                             imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+  def SELECT_CC_Int_FCC
+   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_Int_FCC PSEUDO!",
+            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+                                             imm:$Cond))]>;
+
+  def SELECT_CC_FP_FCC
+   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_FP_FCC PSEUDO!",
+            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+                                            imm:$Cond))]>;
+  def SELECT_CC_DFP_FCC
+   : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_DFP_FCC PSEUDO!",
+            [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
+                                             imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
+  let rd = O7.Num, rs1 = G0.Num in
+    def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                   "jmp %o7+$val", [(retflag simm13:$val)]>;
+
+  let rd = I7.Num, rs1 = G0.Num in
+    def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                  "jmp %i7+$val", []>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ldsb [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ldsb [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ldsh [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ldsh [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ldub [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ldub [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "lduh [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "lduh [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr   : F3_1<3, 0b000000,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ld [$addr], $dst",
+                  [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+def LDri   : F3_2<3, 0b000000,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ld [$addr], $dst",
+                  [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr  : F3_1<3, 0b100000,
+                  (outs FPRegs:$dst), (ins MEMrr:$addr),
+                  "ld [$addr], $dst",
+                  [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDFri  : F3_2<3, 0b100000,
+                  (outs FPRegs:$dst), (ins MEMri:$addr),
+                  "ld [$addr], $dst",
+                  [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+                  (outs DFPRegs:$dst), (ins MEMrr:$addr),
+                  "ldd [$addr], $dst",
+                  [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+                  (outs DFPRegs:$dst), (ins MEMri:$addr),
+                  "ldd [$addr], $dst",
+                  [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+                 (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "stb $src, [$addr]",
+                 [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+                 (outs), (ins MEMri:$addr, IntRegs:$src),
+                 "stb $src, [$addr]",
+                 [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+                 (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "sth $src, [$addr]",
+                 [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+                 (outs), (ins MEMri:$addr, IntRegs:$src),
+                 "sth $src, [$addr]",
+                 [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+def STrr  : F3_1<3, 0b000100,
+                 (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "st $src, [$addr]",
+                 [(store IntRegs:$src, ADDRrr:$addr)]>;
+def STri  : F3_2<3, 0b000100,
+                 (outs), (ins MEMri:$addr, IntRegs:$src),
+                 "st $src, [$addr]",
+                 [(store IntRegs:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr   : F3_1<3, 0b100100,
+                   (outs), (ins MEMrr:$addr, FPRegs:$src),
+                   "st $src, [$addr]",
+                   [(store FPRegs:$src, ADDRrr:$addr)]>;
+def STFri   : F3_2<3, 0b100100,
+                   (outs), (ins MEMri:$addr, FPRegs:$src),
+                   "st $src, [$addr]",
+                   [(store FPRegs:$src, ADDRri:$addr)]>;
+def STDFrr  : F3_1<3, 0b100111,
+                   (outs), (ins MEMrr:$addr, DFPRegs:$src),
+                   "std  $src, [$addr]",
+                   [(store DFPRegs:$src, ADDRrr:$addr)]>;
+def STDFri  : F3_2<3, 0b100111,
+                   (outs), (ins MEMri:$addr, DFPRegs:$src),
+                   "std $src, [$addr]",
+                   [(store DFPRegs:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+                 (outs IntRegs:$dst), (ins i32imm:$src),
+                 "sethi $src, $dst",
+                 [(set IntRegs:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+  def NOP : F2_1<0b100, (outs), (ins), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND    : F3_12<"and", 0b000001, and>;
+
+def ANDNrr  : F3_1<2, 0b000101,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "andn $b, $c, $dst",
+                   [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+def ANDNri  : F3_2<2, 0b000101,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   "andn $b, $c, $dst", []>;
+
+defm OR     : F3_12<"or", 0b000010, or>;
+
+def ORNrr   : F3_1<2, 0b000110,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "orn $b, $c, $dst",
+                   [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+def ORNri   : F3_2<2, 0b000110,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   "orn $b, $c, $dst", []>;
+defm XOR    : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr  : F3_1<2, 0b000111,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "xnor $b, $c, $dst",
+                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+def XNORri  : F3_2<2, 0b000111,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD   : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri   : F3_2<2, 0b000000,
+                   (outs IntRegs:$dst), (ins MEMri:$addr),
+                   "add ${addr:arith}, $dst",
+                   [(set IntRegs:$dst, ADDRri:$addr)]>;
+
+let Defs = [ICC] in                   
+  defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
+
+let Uses = [ICC] in
+  defm ADDX  : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB    : F3_12  <"sub"  , 0b000100, sub>;
+let Uses = [ICC] in 
+  defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
+
+let Defs = [ICC] in 
+  defm SUBCC  : F3_12  <"subcc", 0b010100, SPcmpicc>;
+
+let Uses = [ICC], Defs = [ICC] in
+  def SUBXCCrr: F3_1<2, 0b011100, 
+                (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                "subxcc $b, $c, $dst", []>;
+
+
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+  defm UMUL : F3_12np<"umul", 0b001010>;
+  defm SMUL : F3_12  <"smul", 0b001011, mul>;
+}
+
+// Section B.19 - Divide Instructions, p. 115
+let Defs = [Y] in {
+  defm UDIV : F3_12np<"udiv", 0b001110>;
+  defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE    : F3_12np<"save"   , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasDelaySlot = 1;
+}
+
+let isBarrier = 1 in
+  def BA   : BranchSP<0b1000, (ins brtarget:$dst),
+                      "ba $dst",
+                      [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [ICC] in
+  def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                         "b$cc $dst",
+                        [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasDelaySlot = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [FCC] in
+  def FBCOND  : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                              "fb$cc $dst",
+                              [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O6],
+    hasDelaySlot = 1, isCall = 1,
+    Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+    D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+        ICC, FCC, Y] in {
+  def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
+                    "call $dst", []> {
+    bits<30> disp;
+    let op = 1;
+    let Inst{29-0} = disp;
+  }
+  
+  // indirect calls
+  def JMPLrr : F3_1<2, 0b111000,
+                    (outs), (ins MEMrr:$ptr, variable_ops),
+                    "call $ptr",
+                    [(call ADDRrr:$ptr)]>;
+  def JMPLri : F3_2<2, 0b111000,
+                    (outs), (ins MEMri:$ptr, variable_ops),
+                    "call $ptr",
+                    [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+let Uses = [Y] in 
+  def RDY : F3_1<2, 0b101000,
+                 (outs IntRegs:$dst), (ins),
+                 "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+let Defs = [Y] in {
+  def WRYrr : F3_1<2, 0b110000,
+                   (outs), (ins IntRegs:$b, IntRegs:$c),
+                   "wr $b, $c, %y", []>;
+  def WRYri : F3_2<2, 0b110000,
+                   (outs), (ins IntRegs:$b, i32imm:$c),
+                   "wr $b, $c, %y", []>;
+}
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fitos $src, $dst",
+                 [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000, 
+                 (outs DFPRegs:$dst), (ins FPRegs:$src),
+                 "fitod $src, $dst",
+                 [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fstoi $src, $dst",
+                 [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+                 (outs FPRegs:$dst), (ins DFPRegs:$src),
+                 "fdtoi $src, $dst",
+                 [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001, 
+                 (outs DFPRegs:$dst), (ins FPRegs:$src),
+                 "fstod $src, $dst",
+                 [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+                 (outs FPRegs:$dst), (ins DFPRegs:$src),
+                 "fdtos $src, $dst",
+                 [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101, 
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fnegs $src, $dst",
+                 [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001, 
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fabss $src, $dst",
+                 [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001, 
+                  (outs FPRegs:$dst), (ins FPRegs:$src),
+                  "fsqrts $src, $dst",
+                  [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010, 
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                  "fsqrtd $src, $dst",
+                  [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS  : F3_3<2, 0b110100, 0b001000001,
+                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fadds $src1, $src2, $dst",
+                  [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+def FADDD  : F3_3<2, 0b110100, 0b001000010,
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                  "faddd $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSUBS  : F3_3<2, 0b110100, 0b001000101,
+                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fsubs $src1, $src2, $dst",
+                  [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+def FSUBD  : F3_3<2, 0b110100, 0b001000110,
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                  "fsubd $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS  : F3_3<2, 0b110100, 0b001001001,
+                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fmuls $src1, $src2, $dst",
+                  [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+def FMULD  : F3_3<2, 0b110100, 0b001001010,
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                  "fmuld $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+                  (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fsmuld $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
+                                            (fextend FPRegs:$src2)))]>;
+def FDIVS  : F3_3<2, 0b110100, 0b001001101,
+                 (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                 "fdivs $src1, $src2, $dst",
+                 [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+def FDIVD  : F3_3<2, 0b110100, 0b001001110,
+                 (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                 "fdivd $src1, $src2, $dst",
+                 [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+let Defs = [FCC] in {
+  def FCMPS  : F3_3<2, 0b110101, 0b001010001,
+                   (outs), (ins FPRegs:$src1, FPRegs:$src2),
+                   "fcmps $src1, $src2\n\tnop",
+                   [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+  def FCMPD  : F3_3<2, 0b110101, 0b001010010,
+                   (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+                   "fcmpd $src1, $src2\n\tnop",
+                   [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], Constraints = "$T = $dst" in {
+  // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+  // FIXME: Add instruction encodings for the JIT some day.
+  let Uses = [ICC] in {
+    def MOVICCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVICCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def MOVFCCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVFCCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [ICC] in {
+    def FMOVS_ICC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %icc, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_ICC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %icc, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def FMOVS_FCC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %fcc0, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_FCC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %fcc0, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+  def FMOVD : F3_3<2, 0b110100, 0b000000010,
+                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                   "fmovd $src, $dst", []>;
+  def FNEGD : F3_3<2, 0b110100, 0b000000110, 
+                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                   "fnegd $src, $dst",
+                   [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+  def FABSD : F3_3<2, 0b110100, 0b000001010, 
+                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                   "fabsd $src, $dst",
+                   [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register.  As such, we have to clear
+// the top 32-bits before using it.  To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110, 
+                  (outs IntRegs:$dst), (ins IntRegs:$src),
+                  "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop IntRegs:$src),
+          (POPCrr (SLLri IntRegs:$src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+          (ORri G0, imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+          (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc IntRegs:$b, IntRegs:$c),
+          (SUBCCrr IntRegs:$b, IntRegs:$c)>;
+def : Pat<(subc IntRegs:$b, simm13:$val),
+          (SUBCCri IntRegs:$b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+
+// Add reg, lo.  This is used when taking the addr of a global/constpool entry.
+def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
+          (ADDri IntRegs:$r, tglobaladdr:$in)>;
+def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
+          (ADDri IntRegs:$r, tconstpool:$in)>;
+
+// Calls: 
+def : Pat<(call tglobaladdr:$dst),
+          (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+          (CALL texternalsym:$dst)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
new file mode 100644
index 000000000000..0b74308eb0ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -0,0 +1,47 @@
+//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares  Sparc specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SPARCMACHINEFUNCTIONINFO_H
+#define SPARCMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+  class SparcMachineFunctionInfo : public MachineFunctionInfo {
+  private:
+    unsigned GlobalBaseReg;
+
+    /// VarArgsFrameOffset - Frame offset to start of varargs area.
+    int VarArgsFrameOffset;
+
+    /// SRetReturnReg - Holds the virtual register into which the sret
+    /// argument is passed.
+    unsigned SRetReturnReg;
+  public:
+    SparcMachineFunctionInfo()
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+    explicit SparcMachineFunctionInfo(MachineFunction &MF)
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+
+    unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+    void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+    void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+    unsigned getSRetReturnReg() const { return SRetReturnReg; }
+    void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 000000000000..0acdd2c55d6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,140 @@
+//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcRegisterInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+                                     const TargetInstrInfo &tii)
+  : SparcGenRegisterInfo(), Subtarget(st), TII(tii) {
+}
+
+const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+                                                                         const {
+  static const unsigned CalleeSavedRegs[] = { 0 };
+  return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  // FIXME: G1 reserved for now for large imm generation by frame code.
+  Reserved.set(SP::G1);
+  Reserved.set(SP::G2);
+  Reserved.set(SP::G3);
+  Reserved.set(SP::G4);
+  Reserved.set(SP::O6);
+  Reserved.set(SP::I6);
+  Reserved.set(SP::I7);
+  Reserved.set(SP::G0);
+  Reserved.set(SP::G5);
+  Reserved.set(SP::G6);
+  Reserved.set(SP::G7);
+  return Reserved;
+}
+
+void SparcRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+  DebugLoc dl = MI.getDebugLoc();
+  int Size = MI.getOperand(0).getImm();
+  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+    Size = -Size;
+  if (Size)
+    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+  MBB.erase(I);
+}
+
+void
+SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  DebugLoc dl = MI.getDebugLoc();
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  // Addressable stack objects are accessed using neg. offsets from %fp
+  MachineFunction &MF = *MI.getParent()->getParent();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MI.getOperand(i+1).getImm();
+
+  // Replace frame index with a frame pointer reference.
+  if (Offset >= -4096 && Offset <= 4095) {
+    // If the offset is small enough to fit in the immediate field, directly
+    // encode it.
+    MI.getOperand(i).ChangeToRegister(SP::I6, false);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+  } else {
+    // Otherwise, emit a G1 = SETHI %hi(offset).  FIXME: it would be better to 
+    // scavenge a register here instead of reserving G1 all of the time.
+    unsigned OffHi = (unsigned)Offset >> 10U;
+    BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+      .addReg(SP::I6);
+    // Insert: G1+%lo(offset) into the user.
+    MI.getOperand(i).ChangeToRegister(SP::G1, false);
+    MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+  }
+}
+
+void SparcRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+
+unsigned SparcRegisterInfo::getRARegister() const {
+  return SP::I7;
+}
+
+unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  return SP::I6;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+  return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 000000000000..ec9e63a686bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,62 @@
+//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SparcGenRegisterInfo.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+  SparcSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+
+  SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 000000000000..cf928293c169
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,159 @@
+//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Sparc register file 
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "SP";
+}
+
+class SparcCtrlReg<string n>: Register<n> {
+  let Namespace = "SP";
+}
+
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd  : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+  let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+  let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+  let Num = num;
+  let SubRegs = subregs;
+  let SubRegIndices = [sub_even, sub_odd];
+}
+
+// Control Registers
+def ICC : SparcCtrlReg<"ICC">;
+def FCC : SparcCtrlReg<"FCC">;
+
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; 
+def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; 
+def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; 
+def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; 
+def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; 
+def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; 
+def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; 
+def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; 
+def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0  : Rf< 0,  "F0">, DwarfRegNum<[32]>;
+def F1  : Rf< 1,  "F1">, DwarfRegNum<[33]>;
+def F2  : Rf< 2,  "F2">, DwarfRegNum<[34]>; 
+def F3  : Rf< 3,  "F3">, DwarfRegNum<[35]>;
+def F4  : Rf< 4,  "F4">, DwarfRegNum<[36]>;
+def F5  : Rf< 5,  "F5">, DwarfRegNum<[37]>; 
+def F6  : Rf< 6,  "F6">, DwarfRegNum<[38]>;
+def F7  : Rf< 7,  "F7">, DwarfRegNum<[39]>;
+def F8  : Rf< 8,  "F8">, DwarfRegNum<[40]>; 
+def F9  : Rf< 9,  "F9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; 
+def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; 
+def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; 
+def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; 
+def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; 
+def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; 
+def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0  : Rd< 0,  "F0", [F0,   F1]>, DwarfRegNum<[72]>;
+def D1  : Rd< 2,  "F2", [F2,   F3]>, DwarfRegNum<[73]>;
+def D2  : Rd< 4,  "F4", [F4,   F5]>, DwarfRegNum<[74]>;
+def D3  : Rd< 6,  "F6", [F6,   F7]>, DwarfRegNum<[75]>;
+def D4  : Rd< 8,  "F8", [F8,   F9]>, DwarfRegNum<[76]>;
+def D5  : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[77]>;
+def D6  : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[78]>;
+def D7  : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[79]>;
+def D8  : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[80]>;
+def D9  : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[81]>;
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[82]>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[83]>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[84]>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>;
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"SP", [i32], 32,
+                            (add L0, L1, L2, L3, L4, L5, L6,
+                                 L7, I0, I1, I2, I3, I4, I5,
+                                 O0, O1, O2, O3, O4, O5, O7,
+                                 G1,
+                                 // Non-allocatable regs:
+                                 G2, G3, G4, // FIXME: OK for use only in
+                                             // applications, not libraries.
+                                 O6, // stack ptr
+                                 I6, // frame ptr
+                                 I7, // return address
+                                 G0, // constant zero
+                                 G5, G6, G7 // reserved for kernel
+                                 )>;
+
+def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..190c575a52de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SparcSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparc-selectiondag-info"
+#include "SparcTargetMachine.h"
+using namespace llvm;
+
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
new file mode 100644
index 000000000000..dcd42037253d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCSELECTIONDAGINFO_H
+#define SPARCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SparcTargetMachine;
+
+class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
+  ~SparcSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 000000000000..de647e8221a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,44 @@
+//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "Sparc.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SparcGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+                               const std::string &FS,  bool is64Bit) :
+  SparcGenSubtargetInfo(TT, CPU, FS),
+  IsV9(false),
+  V8DeprecatedInsts(false),
+  IsVIS(false),
+  Is64Bit(is64Bit) {
+  
+  // Determine default and user specified characteristics
+  std::string CPUName = CPU;
+  if (CPUName.empty()) {
+    if (is64Bit)
+      CPUName = "v9";
+    else
+      CPUName = "v8";
+  }
+  IsV9 = CPUName == "v9";
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 000000000000..00a04c3bea57
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,58 @@
+//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SparcGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class SparcSubtarget : public SparcGenSubtargetInfo {
+  bool IsV9;
+  bool V8DeprecatedInsts;
+  bool IsVIS;
+  bool Is64Bit;
+  
+public:
+  SparcSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS, bool is64bit);
+
+  bool isV9() const { return IsV9; }
+  bool isVIS() const { return IsVIS; }
+  bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+  
+  bool is64Bit() const { return Is64Bit; }
+  std::string getDataLayout() const {
+    const char *p;
+    if (is64Bit()) {
+      p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64";
+    } else {
+      p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32";
+    }
+    return std::string(p);
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 000000000000..cbe6d8754efd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,65 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSparcTarget() {
+  // Register the target.
+  RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
+  RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU,
+                                       const std::string &FS, bool is64bit)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, is64bit),
+    DataLayout(Subtarget.getDataLayout()),
+    TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+    FrameLowering(Subtarget) {
+}
+
+bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createSparcISelDag(*this));
+  return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted.  This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel){
+  PM.add(createSparcFPMoverPass(*this));
+  PM.add(createSparcDelaySlotFillerPass(*this));
+  return true;
+}
+
+SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
+                                           const std::string &TT, 
+                                           const std::string &CPU,
+                                           const std::string &FS)
+  : SparcTargetMachine(T, TT, CPU, FS, false) {
+}
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, 
+                                           const std::string &TT, 
+                                           const std::string &CPU,
+                                           const std::string &FS)
+  : SparcTargetMachine(T, TT, CPU, FS, true) {
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 000000000000..799fc497f4ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,79 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "SparcInstrInfo.h"
+#include "SparcISelLowering.h"
+#include "SparcFrameLowering.h"
+#include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class SparcTargetMachine : public LLVMTargetMachine {
+  SparcSubtarget Subtarget;
+  const TargetData DataLayout;       // Calculates type size & alignment
+  SparcTargetLowering TLInfo;
+  SparcSelectionDAGInfo TSInfo;
+  SparcInstrInfo InstrInfo;
+  SparcFrameLowering FrameLowering;
+public:
+  SparcTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &CPU, const std::string &FS,
+                     bool is64bit);
+
+  virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const SparcSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
+  virtual const SparcRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const SparcTargetLowering* getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+/// SparcV8TargetMachine - Sparc 32-bit target machine
+///
+class SparcV8TargetMachine : public SparcTargetMachine {
+public:
+  SparcV8TargetMachine(const Target &T, const std::string &TT,
+                       const std::string &CPU, const std::string &FS);
+};
+
+/// SparcV9TargetMachine - Sparc 64-bit target machine
+///
+class SparcV9TargetMachine : public SparcTargetMachine {
+public:
+  SparcV9TargetMachine(const Target &T, const std::string &TT,
+                       const std::string &CPU, const std::string &FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
new file mode 100644
index 000000000000..5c06f0727e96
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSparcTarget;
+Target llvm::TheSparcV9Target;
+
+extern "C" void LLVMInitializeSparcTargetInfo() { 
+  RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
+  RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..2ac90164721f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMSystemZDesc
+  SystemZMCTargetDesc.cpp
+  SystemZMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..08f1a9d51fb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
new file mode 100644
index 000000000000..8540546b62d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -0,0 +1,32 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SystemZMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+  PointerSize = 8;
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  PCSymbol = ".";
+}
+
+const MCSection *SystemZMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const{
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
new file mode 100644
index 000000000000..a6a27e2f4b6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -0,0 +1,30 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SystemZMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct SystemZMCAsmInfo : public MCAsmInfo {
+    explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+  };
+  
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 000000000000..5a826a6ef887
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSystemZMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+                                      createSystemZMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+                                                     StringRef CPU,
+                                                     StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+                                          createSystemZMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSystemZMCAsmInfo() {
+  RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 000000000000..e2ad5afd6e57
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSystemZTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 000000000000..88960b9cc601
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,52 @@
+//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_H
+#define LLVM_TARGET_SystemZ_H
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SystemZTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  namespace SystemZCC {
+    // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in
+    // SystemZInstrInfo.td. They must be kept in synch.
+    enum CondCodes {
+      O   = 0,
+      H   = 1,
+      NLE = 2,
+      L   = 3,
+      NHE = 4,
+      LH  = 5,
+      NE  = 6,
+      E   = 7,
+      NLH = 8,
+      HE  = 9,
+      NL  = 10,
+      LE  = 11,
+      NH  = 12,
+      NO  = 13,
+      INVALID = -1
+    };
+  }
+
+  FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+                                    CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm;
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 000000000000..4c08c087225e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,61 @@
+//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the SystemZ target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features. 
+//===----------------------------------------------------------------------===//
+def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true",
+                                  "Support Z10 instructions">;
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z9",  []>;
+def : Proc<"z10", [FeatureZ10]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {} 
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+  let InstructionSet = SystemZInstrInfo;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 000000000000..fd4d8b70c75e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,223 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the SystemZ assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SystemZAsmPrinter : public AsmPrinter {
+  public:
+    SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "SystemZ Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                      const char* Modifier = 0);
+    void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+    void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                            const char* Modifier = 0);
+    void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                             const char* Modifier = 0);
+    void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (int16_t)MI->getOperand(OpNum).getImm();
+    }
+    void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (uint16_t)MI->getOperand(OpNum).getImm();
+    }
+    void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (int32_t)MI->getOperand(OpNum).getImm();
+    }
+    void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (uint32_t)MI->getOperand(OpNum).getImm();
+    }
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  printInstruction(MI, OS);
+  OutStreamer.EmitRawText(OS.str());
+}
+
+void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
+                                             raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    O << *Mang->getSymbol(GV);
+
+    // Assemble calls via PLT for externally visible symbols if PIC.
+    if (TM.getRelocationModel() == Reloc::PIC_ &&
+        !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+        !GV->hasLocalLinkage())
+      O << "@PLT";
+
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    std::string Name(MAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    O << Name;
+
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      O << "@PLT";
+
+    return;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+}
+
+
+void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                     raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register: {
+    assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+            "Virtual registers should be already mapped!");
+    unsigned Reg = MO.getReg();
+    if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
+      if (strncmp(Modifier + 7, "even", 4) == 0)
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
+      else if (strncmp(Modifier + 7, "odd", 3) == 0)
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
+      else
+        assert(0 && "Invalid subreg modifier");
+    }
+
+    O << '%' << getRegisterName(Reg);
+    return;
+  }
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol: {
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case SystemZII::MO_NO_FLAG:
+    break;
+  case SystemZII::MO_GOTENT:    O << "@GOTENT";    break;
+  case SystemZII::MO_PLT:       O << "@PLT";       break;
+  }
+
+  printOffset(MO.getOffset(), O);
+}
+
+void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1, O);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    O << ')';
+  }
+}
+
+void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                            raw_ostream &O,
+                                            const char *Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Index = MI->getOperand(OpNum+2);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1, O);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    if (Index.getReg()) {
+      O << ',';
+      printOperand(MI, OpNum+2, O);
+    }
+    O << ')';
+  } else
+    assert(!Index.getReg() && "Should allocate base register first!");
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 000000000000..c799a9e501aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,46 @@
+//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for SystemZ architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // i64 is returned in register R2
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+  // f32 / f64 are returned in F0
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // The first 5 integer arguments of non-varargs functions are passed in
+  // integer registers.
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+  // The first 4 floating point arguments of non-varargs functions are passed
+  // in FP registers.
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>,
+
+  // Integer values get stored in stack slots that are 8 bytes in
+  // size and 8-byte aligned.
+  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 000000000000..2ad84a2d052e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,386 @@
+//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
+  // Fill the spill offsets map
+  static const unsigned SpillOffsTab[][2] = {
+    { SystemZ::R2D,  0x10 },
+    { SystemZ::R3D,  0x18 },
+    { SystemZ::R4D,  0x20 },
+    { SystemZ::R5D,  0x28 },
+    { SystemZ::R6D,  0x30 },
+    { SystemZ::R7D,  0x38 },
+    { SystemZ::R8D,  0x40 },
+    { SystemZ::R9D,  0x48 },
+    { SystemZ::R10D, 0x50 },
+    { SystemZ::R11D, 0x58 },
+    { SystemZ::R12D, 0x60 },
+    { SystemZ::R13D, 0x68 },
+    { SystemZ::R14D, 0x70 },
+    { SystemZ::R15D, 0x78 }
+  };
+
+  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+
+  for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
+    RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
+}
+
+/// needsFP - Return true if the specified function should have a dedicated
+/// frame pointer register.  This is true if the function has variable sized
+/// allocas or if frame pointer elimination is disabled.
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  int64_t NumBytes, const TargetInstrInfo &TII) {
+  unsigned Opc; uint64_t Chunk;
+  bool isSub = NumBytes < 0;
+  uint64_t Offset = isSub ? -NumBytes : NumBytes;
+
+  if (Offset >= (1LL << 15) - 1) {
+    Opc = SystemZ::ADD64ri32;
+    Chunk = (1LL << 31) - 1;
+  } else {
+    Opc = SystemZ::ADD64ri16;
+    Chunk = (1LL << 15) - 1;
+  }
+
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  while (Offset) {
+    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
+      .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
+    // The PSW implicit def is dead.
+    MI->getOperand(3).setIsDead();
+    Offset -= ThisVal;
+  }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SystemZInstrInfo &TII =
+    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize = MFI->getStackSize();
+  StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+  // Skip the callee-saved push instructions.
+  while (MBBI != MBB.end() &&
+         (MBBI->getOpcode() == SystemZ::MOV64mr ||
+          MBBI->getOpcode() == SystemZ::MOV64mrm))
+    ++MBBI;
+
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  // adjust stack pointer: R15 -= numbytes
+  if (StackSize || MFI->hasCalls()) {
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+    emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
+  }
+
+  if (hasFP(MF)) {
+    // Update R11 with the new base value...
+    BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
+      .addReg(SystemZ::R15D);
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(SystemZ::R11D);
+
+  }
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SystemZInstrInfo &TII =
+    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  switch (RetOpcode) {
+  case SystemZ::RET: break;  // These are ok
+  default:
+    assert(0 && "Can only insert epilog into returning blocks");
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize =
+    MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+  // Skip the final terminator instruction.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    --MBBI;
+    if (!PI->getDesc().isTerminator())
+      break;
+  }
+
+  // During callee-saved restores emission stack frame was not yet finialized
+  // (and thus - the stack size was unknown). Tune the offset having full stack
+  // size in hands.
+  if (StackSize || MFI->hasCalls()) {
+    assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
+            MBBI->getOpcode() == SystemZ::MOV64rm) &&
+           "Expected to see callee-save register restore code");
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+
+    unsigned i = 0;
+    MachineInstr &MI = *MBBI;
+    while (!MI.getOperand(i).isImm()) {
+      ++i;
+      assert(i < MI.getNumOperands() && "Unexpected restore code!");
+    }
+
+    uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
+    // If Offset does not fit into 20-bit signed displacement field we need to
+    // emit some additional code...
+    if (Offset > 524287) {
+      // Fold the displacement into load instruction as much as possible.
+      NumBytes = Offset - 524287;
+      Offset = 524287;
+      emitSPUpdate(MBB, MBBI, NumBytes, TII);
+    }
+
+    MI.getOperand(i).ChangeToImmediate(Offset);
+  }
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                          int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
+  uint64_t StackSize = MFI->getStackSize();
+
+  // Fixed objects are really located in the "previous" frame.
+  if (FI < 0)
+    StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  Offset += StackSize - getOffsetOfLocalArea();
+
+  // Skip the register save area if we generated the stack frame.
+  if (StackSize || MFI->hasCalls())
+    Offset -= getOffsetOfLocalArea();
+
+  return Offset;
+}
+
+bool
+SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                            MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned CalleeFrameSize = 0;
+
+  // Scan the callee-saved and find the bounds of register spill area.
+  unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (!SystemZ::FP64RegClass.contains(Reg)) {
+      unsigned Offset = RegSpillOffsets[Reg];
+      CalleeFrameSize += 8;
+      if (StartOffset > Offset) {
+        LowReg = Reg; StartOffset = Offset;
+      }
+      if (EndOffset < Offset) {
+        HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
+      }
+    }
+  }
+
+  // Save information for epilogue inserter.
+  MFI->setCalleeSavedFrameSize(CalleeFrameSize);
+  MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
+
+  // Save GPRs
+  if (StartOffset) {
+    // Build a store instruction. Use STORE MULTIPLE instruction if there are many
+    // registers to store, otherwise - just STORE.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+                                    SystemZ::MOV64mr : SystemZ::MOV64mrm)));
+
+    // Add store operands.
+    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+    MIB.addReg(LowReg, RegState::Kill);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Kill);
+
+    // Do a second scan adding regs as being killed by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      // Add the callee-saved register as live-in. It's killed at the spill.
+      MBB.addLiveIn(Reg);
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitKill);
+    }
+  }
+
+  // Save FPRs
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (SystemZ::FP64RegClass.contains(Reg)) {
+      MBB.addLiveIn(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+                              &SystemZ::FP64RegClass, TRI);
+    }
+  }
+
+  return true;
+}
+
+bool
+SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                              MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+  // Restore FP registers
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (SystemZ::FP64RegClass.contains(Reg))
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                               &SystemZ::FP64RegClass, TRI);
+  }
+
+  // Restore GP registers
+  unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
+  unsigned StartOffset = RegSpillOffsets[LowReg];
+
+  if (StartOffset) {
+    // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
+    // registers to load, otherwise - just LOAD.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+                                    SystemZ::MOV64rm : SystemZ::MOV64rmm)));
+    // Add store operands.
+    MIB.addReg(LowReg, RegState::Define);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Define);
+
+    MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+    MIB.addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+
+    // Do a second scan adding regs as being defined by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitDefine);
+    }
+  }
+
+  return true;
+}
+
+void
+SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  // Determine whether R15/R14 will ever be clobbered inside the function. And
+  // if yes - mark it as 'callee' saved.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Check whether high FPRs are ever used, if yes - we need to save R15 as
+  // well.
+  static const unsigned HighFPRs[] = {
+    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+    SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
+    SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+  };
+
+  bool HighFPRsUsed = false;
+  for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
+    HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
+
+  if (FFI->hasCalls())
+    /* FIXME: function is varargs */
+    /* FIXME: function grabs RA */
+    /* FIXME: function calls eh_return */
+    MRI.setPhysRegUsed(SystemZ::R14D);
+
+  if (HighFPRsUsed ||
+      FFI->hasCalls() ||
+      FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
+      FFI->hasVarSizedObjects() // Function calls dynamic alloca's
+      /* FIXME: function is varargs */)
+    MRI.setPhysRegUsed(SystemZ::R15D);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 000000000000..1284b6802b3a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,57 @@
+//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_FRAMEINFO_H
+#define SYSTEMZ_FRAMEINFO_H
+
+#include "SystemZ.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/IndexedMap.h"
+
+namespace llvm {
+  class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+  IndexedMap<unsigned> RegSpillOffsets;
+protected:
+  const SystemZSubtarget &STI;
+
+public:
+  explicit SystemZFrameLowering(const SystemZSubtarget &sti);
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
+  bool hasFP(const MachineFunction &MF) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2186ff1fed54
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,779 @@
+//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
+  /// instead of register numbers for the leaves of the matched tree.
+  struct SystemZRRIAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    struct {            // This is really a union, discriminated by BaseType!
+      SDValue Reg;
+      int FrameIndex;
+    } Base;
+
+    SDValue IndexReg;
+    int64_t Disp;
+    bool isRI;
+
+    SystemZRRIAddressMode(bool RI = false)
+      : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
+    }
+
+    void dump() {
+      errs() << "SystemZRRIAddressMode " << this << '\n';
+      if (BaseType == RegBase) {
+        errs() << "Base.Reg ";
+        if (Base.Reg.getNode() != 0)
+          Base.Reg.getNode()->dump();
+        else
+          errs() << "nul";
+        errs() << '\n';
+      } else {
+        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+      }
+      if (!isRI) {
+        errs() << "IndexReg ";
+        if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+        else errs() << "nul";
+      }
+      errs() << " Disp " << Disp << '\n';
+    }
+  };
+}
+
+/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+  class SystemZDAGToDAGISel : public SelectionDAGISel {
+    const SystemZTargetLowering &Lowering;
+    const SystemZSubtarget &Subtarget;
+
+    void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp);
+    void getAddressOperands(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp,
+                            SDValue &Index);
+
+  public:
+    SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel),
+        Lowering(*TM.getTargetLowering()),
+        Subtarget(*TM.getSubtargetImpl()) { }
+
+    virtual const char *getPassName() const {
+      return "SystemZ DAG->DAG Pattern Instruction Selection";
+    }
+
+    /// getI8Imm - Return a target constant with the specified value, of type
+    /// i8.
+    inline SDValue getI8Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i8);
+    }
+
+    /// getI16Imm - Return a target constant with the specified value, of type
+    /// i16.
+    inline SDValue getI16Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i16);
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    // Include the pieces autogenerated from the target description.
+    #include "SystemZGenDAGISel.inc"
+
+  private:
+    bool SelectAddrRI12Only(SDValue& Addr,
+                            SDValue &Base, SDValue &Disp);
+    bool SelectAddrRI12(SDValue& Addr,
+                        SDValue &Base, SDValue &Disp,
+                        bool is12BitOnly = false);
+    bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
+    bool SelectAddrRRI12(SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectAddrRRI20(SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectLAAddr(SDValue Addr,
+                      SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    SDNode *Select(SDNode *Node);
+
+    bool TryFoldLoad(SDNode *P, SDValue N,
+                     SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+                      bool is12Bit, unsigned Depth = 0);
+    bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
+  };
+}  // end anonymous namespace
+
+/// createSystemZISelDag - This pass converts a legalized DAG into a
+/// SystemZ-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+                                        CodeGenOpt::Level OptLevel) {
+  return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+/// isImmSExt20 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 20-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmSExt20(int64_t Val, int64_t &Imm) {
+  if (Val >= -524288 && Val <= 524287) {
+    Imm = Val;
+    return true;
+  }
+  return false;
+}
+
+/// isImmZExt12 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// zero extension from a 12-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmZExt12(int64_t Val, int64_t &Imm) {
+  if (Val >= 0 && Val <= 0xFFF) {
+    Imm = Val;
+    return true;
+  }
+  return false;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done.  This just pattern matches for the
+/// addressing mode.
+bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+                                       bool is12Bit, unsigned Depth) {
+  DebugLoc dl = N.getDebugLoc();
+  DEBUG(errs() << "MatchAddress: "; AM.dump());
+  // Limit recursion.
+  if (Depth > 5)
+    return MatchAddressBase(N, AM);
+
+  // FIXME: We can perform better here. If we have something like
+  // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
+  // imm into addressing mode.
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    int64_t Imm = 0;
+    bool Match = (is12Bit ?
+                  isImmZExt12(AM.Disp + Val, Imm) :
+                  isImmSExt20(AM.Disp + Val, Imm));
+    if (Match) {
+      AM.Disp = Imm;
+      return false;
+    }
+    break;
+  }
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        AM.Base.Reg.getNode() == 0) {
+      AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
+    }
+    break;
+
+  case ISD::SUB: {
+    // Given A-B, if A can be completely folded into the address and
+    // the index field with the index field unused, use -B as the index.
+    // This is a win if a has multiple parts that can be folded into
+    // the address. Also, this saves a mov if the base register has
+    // other uses, since it avoids a two-address sub instruction, however
+    // it costs an additional mov if the index register has other uses.
+
+    // Test if the LHS of the sub can be folded.
+    SystemZRRIAddressMode Backup = AM;
+    if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
+      AM = Backup;
+      break;
+    }
+    // Test if the index field is free for use.
+    if (AM.IndexReg.getNode() || AM.isRI) {
+      AM = Backup;
+      break;
+    }
+
+    // If the base is a register with multiple uses, this transformation may
+    // save a mov. Otherwise it's probably better not to do it.
+    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
+      AM = Backup;
+      break;
+    }
+
+    // Ok, the transformation is legal and appears profitable. Go for it.
+    SDValue RHS = N.getNode()->getOperand(1);
+    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+    AM.IndexReg = Neg;
+
+    // Insert the new nodes into the topological ordering.
+    if (Zero.getNode()->getNodeId() == -1 ||
+        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    if (Neg.getNode()->getNodeId() == -1 ||
+        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    return false;
+  }
+
+  case ISD::ADD: {
+    SystemZRRIAddressMode Backup = AM;
+    if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
+      return false;
+    AM = Backup;
+    if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
+      return false;
+    AM = Backup;
+
+    // If we couldn't fold both operands into the address at the same time,
+    // see if we can just put each operand into a register and fold at least
+    // the add.
+    if (!AM.isRI &&
+        AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
+      AM.Base.Reg = N.getNode()->getOperand(0);
+      AM.IndexReg = N.getNode()->getOperand(1);
+      return false;
+    }
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      SystemZRRIAddressMode Backup = AM;
+      int64_t Offset = CN->getSExtValue();
+      int64_t Imm = 0;
+      bool MatchOffset = (is12Bit ?
+                          isImmZExt12(AM.Disp + Offset, Imm) :
+                          isImmSExt20(AM.Disp + Offset, Imm));
+      // The resultant disp must fit in 12 or 20-bits.
+      if (MatchOffset &&
+          // LHS should be an addr mode.
+          !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
+          // Check to see if the LHS & C is zero.
+          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+        AM.Disp = Imm;
+        return false;
+      }
+      AM = Backup;
+    }
+    break;
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
+                                           SystemZRRIAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
+    // If so, check to see if the index register is set.
+    if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
+      AM.IndexReg = N;
+      return false;
+    }
+
+    // Otherwise, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = SystemZRRIAddressMode::RegBase;
+  AM.Base.Reg = N;
+  return false;
+}
+
+void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                                               SDValue &Base, SDValue &Disp) {
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+    Base = AM.Base.Reg;
+  else
+    Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
+  Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
+                                             SDValue &Base, SDValue &Disp,
+                                             SDValue &Index) {
+  getAddressOperandsRI(AM, Base, Disp);
+  Index = AM.IndexReg;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// an unsigned 12-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
+                                             SDValue &Base, SDValue &Disp) {
+  return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
+}
+
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
+                                         SDValue &Base, SDValue &Disp,
+                                         bool is12BitOnly) {
+  SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !is12BitOnly &&
+      !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM12, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// a signed 20-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
+                                       SDValue &Base, SDValue &Disp) {
+  SystemZRRIAddressMode AM(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+    return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus an unsigned 12-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM20, AM12;
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM12.IndexReg.getNode())
+    AM12.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM12, Base, Disp, Index);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus a signed 20-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM;
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+    return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM.IndexReg.getNode())
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM, Base, Disp, Index);
+
+  return true;
+}
+
+/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
+                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM;
+
+  if (MatchAddress(Addr, AM, false))
+    return false;
+
+  EVT VT = Addr.getValueType();
+  unsigned Complexity = 0;
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+    if (AM.Base.Reg.getNode())
+      Complexity = 1;
+    else
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
+    Complexity = 4;
+
+  if (AM.IndexReg.getNode())
+    Complexity += 1;
+  else
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+    Complexity += 1;
+
+  if (Complexity > 2) {
+    getAddressOperands(AM, Base, Disp, Index);
+    return true;
+  }
+
+  return false;
+}
+
+bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
+  if (ISD::isNON_EXTLoad(N.getNode()) &&
+      IsLegalToFold(N, P, P, OptLevel))
+    return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
+  return false;
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+  EVT NVT = Node->getValueType(0);
+  DebugLoc dl = Node->getDebugLoc();
+  unsigned Opcode = Node->getOpcode();
+
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    return NULL; // Already selected.
+  }
+
+  switch (Opcode) {
+  default: break;
+  case ISD::SDIVREM: {
+    unsigned Opc, MOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    EVT ResVT;
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: assert(0 && "Unsupported VT!");
+    case MVT::i32:
+      Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
+      ResVT = MVT::v2i64;
+      is32Bit = true;
+      break;
+    case MVT::i64:
+      Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
+      ResVT = MVT::v2i64;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend;
+    if (is32Bit)
+      Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
+    else
+      Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                         dl, ResVT);
+    Dividend =
+      CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+                             SDValue(Tmp, 0), SDValue(Dividend, 0),
+                     CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
+
+    SDNode *Result;
+    SDValue DivVal = SDValue(Dividend, 0);
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+                                      Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+    } else {
+      Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+
+      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_32bit : SystemZ::subreg_even);
+      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+
+      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    return NULL;
+  }
+  case ISD::UDIVREM: {
+    unsigned Opc, MOpc, ClrOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    EVT ResVT;
+
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: assert(0 && "Unsupported VT!");
+    case MVT::i32:
+      Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
+      ClrOpc = SystemZ::MOV64Pr0_even;
+      ResVT = MVT::v2i32;
+      is32Bit = true;
+      break;
+    case MVT::i64:
+      Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
+      ClrOpc = SystemZ::MOV128r0_even;
+      ResVT = MVT::v2i64;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                         dl, ResVT);
+    {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+      Dividend =
+        CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+                               SDValue(Tmp, 0), SDValue(Dividend, 0),
+                               CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
+    }
+
+    // Zero out even subreg
+    Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
+
+    SDValue DivVal = SDValue(Dividend, 0);
+    SDNode *Result;
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+                                      Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+    } else {
+      Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_32bit : SystemZ::subreg_even);
+      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    return NULL;
+  }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        errs() << "\n";
+        );
+  return ResNode;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 000000000000..871c2972a8c4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,867 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation  -----==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
+  TargetLowering(tm, new TargetLoweringObjectFileELF()),
+  Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+  RegInfo = TM.getRegisterInfo();
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32,  SystemZ::GR32RegisterClass);
+  addRegisterClass(MVT::i64,  SystemZ::GR64RegisterClass);
+  addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
+  addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
+
+  if (!UseSoftFloat) {
+    addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
+    addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
+  }
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Provide all sorts of operation actions
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
+
+  setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+  // TODO: It may be better to default to latency-oriented scheduling, however
+  // LLVM's current latency-oriented scheduler can't handle physreg definitions
+  // such as SystemZ has with PSW, so set this to the register-pressure
+  // scheduler, because it can.
+  setSchedulingPreference(Sched::RegPressure);
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,            MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::i64, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::f64, Custom);
+  setOperationAction(ISD::ConstantPool,     MVT::i32, Custom);
+  setOperationAction(ISD::ConstantPool,     MVT::i64, Custom);
+  setOperationAction(ISD::GlobalAddress,    MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable,        MVT::i64, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+  setOperationAction(ISD::SDIV,             MVT::i32, Expand);
+  setOperationAction(ISD::UDIV,             MVT::i32, Expand);
+  setOperationAction(ISD::SDIV,             MVT::i64, Expand);
+  setOperationAction(ISD::UDIV,             MVT::i64, Expand);
+  setOperationAction(ISD::SREM,             MVT::i32, Expand);
+  setOperationAction(ISD::UREM,             MVT::i32, Expand);
+  setOperationAction(ISD::SREM,             MVT::i64, Expand);
+  setOperationAction(ISD::UREM,             MVT::i64, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::CTPOP,            MVT::i32, Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i64, Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i64, Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i32, Promote);
+  setOperationAction(ISD::CTLZ,             MVT::i64, Legal);
+
+  // FIXME: Can we lower these 2 efficiently?
+  setOperationAction(ISD::SETCC,            MVT::i32, Expand);
+  setOperationAction(ISD::SETCC,            MVT::i64, Expand);
+  setOperationAction(ISD::SETCC,            MVT::f32, Expand);
+  setOperationAction(ISD::SETCC,            MVT::f64, Expand);
+  setOperationAction(ISD::SELECT,           MVT::i32, Expand);
+  setOperationAction(ISD::SELECT,           MVT::i64, Expand);
+  setOperationAction(ISD::SELECT,           MVT::f32, Expand);
+  setOperationAction(ISD::SELECT,           MVT::f64, Expand);
+  setOperationAction(ISD::SELECT_CC,        MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::i64, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::f64, Custom);
+
+  setOperationAction(ISD::MULHS,            MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i64, Expand);
+
+  // FIXME: Can we support these natively?
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i64, Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS,        MVT::i64, Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i64, Expand);
+
+  // Lower some FP stuff
+  setOperationAction(ISD::FSIN,             MVT::f32, Expand);
+  setOperationAction(ISD::FSIN,             MVT::f64, Expand);
+  setOperationAction(ISD::FCOS,             MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,             MVT::f64, Expand);
+  setOperationAction(ISD::FREM,             MVT::f32, Expand);
+  setOperationAction(ISD::FREM,             MVT::f64, Expand);
+  setOperationAction(ISD::FMA,              MVT::f32, Expand);
+  setOperationAction(ISD::FMA,              MVT::f64, Expand);
+
+  // We have only 64-bit bitconverts
+  setOperationAction(ISD::BITCAST,          MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST,          MVT::i32, Expand);
+
+  setOperationAction(ISD::UINT_TO_FP,       MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_UINT,       MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  setMinFunctionAlignment(1);
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::JumpTable:        return LowerJumpTable(Op, DAG);
+  case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
+  default:
+    llvm_unreachable("Should not custom lower this!");
+    return SDValue();
+  }
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64))
+    return false;
+
+  // +0.0  lzer
+  // +0.0f lzdr
+  // -0.0  lzer + lner
+  // -0.0f lzdr + lndr
+  return Imm.isZero() || Imm.isNegZero();
+}
+
+//===----------------------------------------------------------------------===//
+//                       SystemZ Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return C_RegisterClass;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'r':   // GENERAL_REGS
+      if (VT == MVT::i32)
+        return std::make_pair(0U, SystemZ::GR32RegisterClass);
+      else if (VT == MVT::i128)
+        return std::make_pair(0U, SystemZ::GR128RegisterClass);
+
+      return std::make_pair(0U, SystemZ::GR64RegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+SDValue
+SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
+                                            CallingConv::ID CallConv,
+                                            bool isVarArg,
+                                            const SmallVectorImpl<ISD::InputArg>
+                                              &Ins,
+                                            DebugLoc dl,
+                                            SelectionDAG &DAG,
+                                            SmallVectorImpl<SDValue> &InVals)
+                                              const {
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+    return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+  }
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 bool &isTailCall,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 DebugLoc dl, SelectionDAG &DAG,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+  // SystemZ target does not yet support tail call optimization.
+  isTailCall = false;
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                          Outs, OutVals, Ins, dl, DAG, InVals);
+  }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
+                                         CallingConv::ID CallConv,
+                                         bool isVarArg,
+                                         const SmallVectorImpl<ISD::InputArg>
+                                           &Ins,
+                                         DebugLoc dl,
+                                         SelectionDAG &DAG,
+                                         SmallVectorImpl<SDValue> &InVals)
+                                           const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+  if (isVarArg)
+    report_fatal_error("Varargs not supported yet");
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    SDValue ArgValue;
+    CCValAssign &VA = ArgLocs[i];
+    EVT LocVT = VA.getLocVT();
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      TargetRegisterClass *RC;
+      switch (LocVT.getSimpleVT().SimpleTy) {
+      default:
+#ifndef NDEBUG
+        errs() << "LowerFormalArguments Unhandled argument type: "
+             << LocVT.getSimpleVT().SimpleTy
+             << "\n";
+#endif
+        llvm_unreachable(0);
+      case MVT::i64:
+        RC = SystemZ::GR64RegisterClass;
+        break;
+      case MVT::f32:
+        RC = SystemZ::FP32RegisterClass;
+        break;
+      case MVT::f64:
+        RC = SystemZ::FP64RegisterClass;
+        break;
+      }
+
+      unsigned VReg = RegInfo.createVirtualRegister(RC);
+      RegInfo.addLiveIn(VA.getLocReg(), VReg);
+      ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+    } else {
+      // Sanity check
+      assert(VA.isMemLoc());
+
+      // Create the nodes corresponding to a load from this parameter slot.
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
+                                      VA.getLocMemOffset(), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      // from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
+                             MachinePointerInfo::getFixedStack(FI),
+                             false, false, 0);
+    }
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+    InVals.push_back(ArgValue);
+  }
+
+  return Chain;
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      bool isTailCall,
+                                      const SmallVectorImpl<ISD::OutputArg>
+                                        &Outs,
+                                      const SmallVectorImpl<SDValue> &OutVals,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+
+  // Offset to first argument stack slot.
+  const unsigned FirstArgOffset = 160;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: assert(0 && "Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr =
+          DAG.getCopyFromReg(Chain, dl,
+                             (TFI->hasFP(MF) ?
+                              SystemZ::R11D : SystemZ::R15D),
+                             getPointerTy());
+
+      unsigned Offset = FirstArgOffset + VA.getLocMemOffset();
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(Offset));
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emitted instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+                         DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                       CallingConv::ID CallConv, bool isVarArg,
+                                       const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                       DebugLoc dl, SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                               VA.getLocVT(), InFlag).getValue(1);
+    SDValue RetValue = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+    InVals.push_back(RetValue);
+  }
+
+  return Chain;
+}
+
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    SDValue ResValue = OutVals[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // If this is an 8/16/32-bit value, it is really should be passed promoted
+    // to 64 bits.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue);
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue);
+    else if (VA.getLocInfo() == CCValAssign::AExt)
+      ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue);
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+  // Return Void
+  return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
+                                       ISD::CondCode CC, SDValue &SystemZCC,
+                                       SelectionDAG &DAG) const {
+  // FIXME: Emit a test if RHS is zero
+
+  bool isUnsigned = false;
+  SystemZCC::CondCodes TCC;
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    TCC = SystemZCC::E;
+    break;
+  case ISD::SETUEQ:
+    TCC = SystemZCC::NLH;
+    break;
+  case ISD::SETNE:
+  case ISD::SETONE:
+    TCC = SystemZCC::NE;
+    break;
+  case ISD::SETUNE:
+    TCC = SystemZCC::LH;
+    break;
+  case ISD::SETO:
+    TCC = SystemZCC::O;
+    break;
+  case ISD::SETUO:
+    TCC = SystemZCC::NO;
+    break;
+  case ISD::SETULE:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NH;
+      break;
+    }
+    isUnsigned = true;   // FALLTHROUGH
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    TCC = SystemZCC::LE;
+    break;
+  case ISD::SETUGE:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NL;
+      break;
+    }
+    isUnsigned = true;   // FALLTHROUGH
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    TCC = SystemZCC::HE;
+    break;
+  case ISD::SETUGT:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NLE;
+      break;
+    }
+    isUnsigned = true;  // FALLTHROUGH
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    TCC = SystemZCC::H;
+    break;
+  case ISD::SETULT:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NHE;
+      break;
+    }
+    isUnsigned = true;  // FALLTHROUGH
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    TCC = SystemZCC::L;
+    break;
+  }
+
+  SystemZCC = DAG.getConstant(TCC, MVT::i32);
+
+  DebugLoc dl = LHS.getDebugLoc();
+  return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+                     dl, MVT::i64, LHS, RHS);
+}
+
+
+SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS   = Op.getOperand(2);
+  SDValue RHS   = Op.getOperand(3);
+  SDValue Dest  = Op.getOperand(4);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue SystemZCC;
+  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+  return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(),
+                     Chain, Dest, SystemZCC, Flag);
+}
+
+SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDValue LHS    = Op.getOperand(0);
+  SDValue RHS    = Op.getOperand(1);
+  SDValue TrueV  = Op.getOperand(2);
+  SDValue FalseV = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue SystemZCC;
+  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(TrueV);
+  Ops.push_back(FalseV);
+  Ops.push_back(SystemZCC);
+  Ops.push_back(Flag);
+
+  return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+  bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  bool ExtraLoadRequired =
+    Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+  SDValue Result;
+  if (!IsPic && !ExtraLoadRequired) {
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+    Offset = 0;
+  } else {
+    unsigned char OpFlags = 0;
+    if (ExtraLoadRequired)
+      OpFlags = SystemZII::MO_GOTENT;
+
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+  }
+
+  Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
+                       getPointerTy(), Result);
+
+  if (ExtraLoadRequired)
+    Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, 0);
+
+  // If there was a non-zero offset that we didn't fold, create an explicit
+  // addition for it.
+  if (Offset != 0)
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+                         DAG.getConstant(Offset, getPointerTy()));
+
+  return Result;
+}
+
+// FIXME: PIC here
+SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+
+  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+
+// FIXME: PIC here
+// FIXME: This is just dirty hack. We need to lower cpool properly
+SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+                                             CP->getAlignment(),
+                                             CP->getOffset());
+
+  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  case SystemZISD::RET_FLAG:           return "SystemZISD::RET_FLAG";
+  case SystemZISD::CALL:               return "SystemZISD::CALL";
+  case SystemZISD::BRCOND:             return "SystemZISD::BRCOND";
+  case SystemZISD::CMP:                return "SystemZISD::CMP";
+  case SystemZISD::UCMP:               return "SystemZISD::UCMP";
+  case SystemZISD::SELECT:             return "SystemZISD::SELECT";
+  case SystemZISD::PCRelativeWrapper:  return "SystemZISD::PCRelativeWrapper";
+  default: return NULL;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB) const {
+  const SystemZInstrInfo &TII = *TM.getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  assert((MI->getOpcode() == SystemZ::Select32  ||
+          MI->getOpcode() == SystemZ::SelectF32 ||
+          MI->getOpcode() == SystemZ::Select64  ||
+          MI->getOpcode() == SystemZ::SelectF64) &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   jCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
+  F->insert(I, copy0MBB);
+  F->insert(I, copy1MBB);
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  copy1MBB->splice(copy1MBB->begin(), BB,
+                   llvm::next(MachineBasicBlock::iterator(MI)),
+                   BB->end());
+  copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(copy1MBB);
+
+  BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to copy1MBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(copy1MBB);
+
+  //  copy1MBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = copy1MBB;
+  BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
+          MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 000000000000..bab3dc23eead
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,145 @@
+//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+  namespace SystemZISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// Return with a flag operand. Operand 0 is the chain operand.
+      RET_FLAG,
+
+      /// CALL - These operations represent an abstract call
+      /// instruction, which includes a bunch of information.
+      CALL,
+
+      /// PCRelativeWrapper - PC relative address
+      PCRelativeWrapper,
+
+      /// CMP, UCMP - Compare instruction
+      CMP,
+      UCMP,
+
+      /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is
+      /// the block to branch if condition is true, operand 2 is condition code
+      /// and operand 3 is the flag operand produced by a CMP instruction.
+      BRCOND,
+
+      /// SELECT - Operands 0 and 1 are selection variables, operand 2 is
+      /// condition code and operand 3 is the flag operand.
+      SELECT
+    };
+  }
+
+  class SystemZSubtarget;
+  class SystemZTargetMachine;
+
+  class SystemZTargetLowering : public TargetLowering {
+  public:
+    explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const;
+
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue EmitCmp(SDValue LHS, SDValue RHS,
+                    ISD::CondCode CC, SDValue &SystemZCC,
+                    SelectionDAG &DAG) const;
+
+
+    MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+  private:
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl,
+                              SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    const SystemZSubtarget &Subtarget;
+    const SystemZTargetMachine &TM;
+    const SystemZRegisterInfo *RegInfo;
+  };
+} // namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 000000000000..ab45ec5984e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,128 @@
+//===- SystemZInstrBuilder.h - Functions to aid building  insts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Displacement, Index.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// SystemZAddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with R15 or R11 and Disp being offsetted accordingly.
+struct SystemZAddressMode {
+  enum {
+    RegBase,
+    FrameIndexBase
+  } BaseType;
+
+  union {
+    unsigned Reg;
+    int FrameIndex;
+  } Base;
+
+  unsigned IndexReg;
+  int32_t Disp;
+  const GlobalValue *GV;
+
+  SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
+    Base.Reg = 0;
+  }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no index or displacement.
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+  // Because memory references are always represented with 3
+  // values, this adds: Reg, [0, NoReg] to the instruction.
+  return MIB.addReg(Reg).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+  return MIB.addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no or index, but with a
+/// displacement. An example is: 10(%r15).
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+             unsigned Reg, bool isKill, int Offset) {
+  return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &
+addRegReg(const MachineInstrBuilder &MIB,
+            unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) {
+  return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0)
+    .addReg(Reg2, getKillRegState(isKill2));
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) {
+  if (AM.BaseType == SystemZAddressMode::RegBase)
+    MIB.addReg(AM.Base.Reg);
+  else if (AM.BaseType == SystemZAddressMode::FrameIndexBase)
+    MIB.addFrameIndex(AM.Base.FrameIndex);
+  else
+    assert(0);
+
+  return MIB.addImm(AM.Disp).addReg(AM.IndexReg);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+  MachineInstr *MI = MIB;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Flags = 0;
+  if (MCID.mayLoad())
+    Flags |= MachineMemOperand::MOLoad;
+  if (MCID.mayStore())
+    Flags |= MachineMemOperand::MOStore;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(
+                                PseudoSourceValue::getFixedStack(FI), Offset),
+                            Flags, MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+  return addOffset(MIB.addFrameIndex(FI), Offset)
+            .addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 000000000000..a65828061d3b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,340 @@
+//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ (binary) floating point instructions in 
+// TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: multiclassify!
+
+//===----------------------------------------------------------------------===//
+// FP Pattern fragments
+
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+  def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
+                        "# SelectF32 PSEUDO",
+                        [(set FP32:$dst,
+                              (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>;
+  def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
+                        "# SelectF64 PSEUDO",
+                        [(set FP64:$dst,
+                              (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// Floating point constant loads.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins),
+                      "lzer\t{$dst}",
+                      [(set FP32:$dst, fpimm0)]>;
+def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins),
+                      "lzdr\t{$dst}",
+                      [(set FP64:$dst, fpimm0)]>;
+}
+
+let neverHasSideEffects = 1 in {
+def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                      "ler\t{$dst, $src}",
+                      []>;
+def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                      "ldr\t{$dst, $src}",
+                      []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FMOV32rm  : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+                      "le\t{$dst, $src}",
+                      [(set FP32:$dst, (load rriaddr12:$src))]>;
+def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src),
+                      "ley\t{$dst, $src}",
+                      [(set FP32:$dst, (load rriaddr:$src))]>;
+def FMOV64rm  : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                      "ld\t{$dst, $src}",
+                      [(set FP64:$dst, (load rriaddr12:$src))]>;
+def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src),
+                      "ldy\t{$dst, $src}",
+                      [(set FP64:$dst, (load rriaddr:$src))]>;
+}
+
+def FMOV32mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src),
+                       "ste\t{$src, $dst}",
+                       [(store FP32:$src, rriaddr12:$dst)]>;
+def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src),
+                       "stey\t{$src, $dst}",
+                       [(store FP32:$src, rriaddr:$dst)]>;
+def FMOV64mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src),
+                       "std\t{$src, $dst}",
+                       [(store FP64:$src, rriaddr12:$dst)]>;
+def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src),
+                       "stdy\t{$src, $dst}",
+                       [(store FP64:$src, rriaddr:$dst)]>;
+
+def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                         "cpsdr\t{$dst, $src2, $src1}",
+                         [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>;
+def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                         "cpsdr\t{$dst, $src2, $src1}",
+                         [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+
+let Defs = [PSW] in {
+def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lcebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fneg FP32:$src)),
+                        (implicit PSW)]>;
+def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lcdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fneg FP64:$src)),
+                        (implicit PSW)]>;
+
+def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lpebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fabs FP32:$src)),
+                        (implicit PSW)]>;
+def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lpdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fabs FP64:$src)),
+                        (implicit PSW)]>;
+
+def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lnebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fneg(fabs FP32:$src))),
+                        (implicit PSW)]>;
+def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lndbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fneg(fabs FP64:$src))),
+                        (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+let Defs = [PSW] in {
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "aebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)),
+                        (implicit PSW)]>;
+def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "adbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)),
+                        (implicit PSW)]>;
+}
+
+def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "aeb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "adb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+
+def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "sebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)),
+                        (implicit PSW)]>;
+def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "sdbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)),
+                        (implicit PSW)]>;
+
+def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "seb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "sdb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z  == X = MUL Z, Y
+def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "meebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>;
+def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "mdbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>;
+}
+
+def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "meeb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>;
+def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "mdb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>;
+
+def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+                       "maebr\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3),
+                                              FP32:$src1))]>;
+def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+                       "maeb\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2),
+                                                     FP32:$src3),
+                                              FP32:$src1))]>;
+
+def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+                       "madbr\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3),
+                                              FP64:$src1))]>;
+def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+                       "madb\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2),
+                                                     FP64:$src3),
+                                              FP64:$src1))]>;
+
+def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+                       "msebr\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3),
+                                              FP32:$src1))]>;
+def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+                       "mseb\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2),
+                                                     FP32:$src3),
+                                              FP32:$src1))]>;
+
+def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+                       "msdbr\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3),
+                                              FP64:$src1))]>;
+def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+                       "msdb\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2),
+                                                     FP64:$src3),
+                                              FP64:$src1))]>;
+
+def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "debr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>;
+def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "ddbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>;
+
+def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "deb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>;
+def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "ddb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
+
+} // Constraints = "$src1 = $dst"
+
+def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "sqebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fsqrt FP32:$src))]>;
+def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "sqdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fsqrt FP64:$src))]>;
+
+def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+                       "sqeb\t{$dst, $src}",
+                       [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>;
+def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                       "sqdb\t{$dst, $src}",
+                       [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>;
+
+def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src),
+                         "ledbr\t{$dst, $src}",
+                         [(set FP32:$dst, (fround FP64:$src))]>;
+
+def FEXT32r64   : Pseudo<(outs FP64:$dst), (ins FP32:$src),
+                         "ldebr\t{$dst, $src}",
+                         [(set FP64:$dst, (fextend FP32:$src))]>;
+def FEXT32m64   : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                         "ldeb\t{$dst, $src}",
+                         [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>;
+
+let Defs = [PSW] in {
+def FCONVFP32   : Pseudo<(outs FP32:$dst), (ins GR32:$src),
+                         "cefbr\t{$dst, $src}",
+                         [(set FP32:$dst, (sint_to_fp GR32:$src)),
+                          (implicit PSW)]>;
+def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src),
+                         "cegbr\t{$dst, $src}",
+                         [(set FP32:$dst, (sint_to_fp GR64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src),
+                         "cdfbr\t{$dst, $src}",
+                         [(set FP64:$dst, (sint_to_fp GR32:$src)),
+                          (implicit PSW)]>;
+def FCONVFP64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+                         "cdgbr\t{$dst, $src}",
+                         [(set FP64:$dst, (sint_to_fp GR64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVGR32   : Pseudo<(outs GR32:$dst), (ins FP32:$src),
+                         "cfebr\t{$dst, 5, $src}",
+                         [(set GR32:$dst, (fp_to_sint FP32:$src)),
+                          (implicit PSW)]>;
+def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src),
+                         "cfdbr\t{$dst, 5, $src}",
+                         [(set GR32:$dst, (fp_to_sint FP64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src),
+                         "cgebr\t{$dst, 5, $src}",
+                         [(set GR64:$dst, (fp_to_sint FP32:$src)),
+                          (implicit PSW)]>;
+def FCONVGR64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+                         "cgdbr\t{$dst, 5, $src}",
+                         [(set GR64:$dst, (fp_to_sint FP64:$src)),
+                          (implicit PSW)]>;
+} // Defs = [PSW]
+
+def FBCONVG64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+                         "lgdr\t{$dst, $src}",
+                         [(set GR64:$dst, (bitconvert FP64:$src))]>;
+def FBCONVF64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+                         "ldgr\t{$dst, $src}",
+                         [(set FP64:$dst, (bitconvert GR64:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
+                      "cebr\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
+def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
+                      "cdbr\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
+
+def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
+                      "ceb\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP32:$src1,
+                                             (load rriaddr12:$src2)))]>;
+def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
+                      "cdb\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP64:$src1,
+                                             (load rriaddr12:$src2)))]>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Floating point constant -0.0
+def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>;
+def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 000000000000..b4a8993c1971
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,133 @@
+//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+  bits<5> Value = val;
+}
+
+def Pseudo   : Format<0>;
+def EForm    : Format<1>;
+def IForm    : Format<2>;
+def RIForm   : Format<3>;
+def RIEForm  : Format<4>;
+def RILForm  : Format<5>;
+def RISForm  : Format<6>;
+def RRForm   : Format<7>;
+def RREForm  : Format<8>;
+def RRFForm  : Format<9>;
+def RRRForm  : Format<10>;
+def RRSForm  : Format<11>;
+def RSForm   : Format<12>;
+def RSIForm  : Format<13>;
+def RSILForm : Format<14>;
+def RSYForm  : Format<15>;
+def RXForm   : Format<16>;
+def RXEForm  : Format<17>;
+def RXFForm  : Format<18>;
+def RXYForm  : Format<19>;
+def SForm    : Format<20>;
+def SIForm   : Format<21>;
+def SILForm  : Format<22>;
+def SIYForm  : Format<23>;
+def SSForm   : Format<24>;
+def SSEForm  : Format<25>;
+def SSFForm  : Format<26>;
+
+class InstSystemZ<bits<16> op, Format f, dag outs, dag ins> : Instruction {
+  let Namespace = "SystemZ";
+
+  bits<16> Opcode = op;
+
+  Format Form = f;
+  bits<5> FormBits = Form.Value;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+}
+
+class I8<bits<8> op, Format f, dag outs, dag ins, string asmstr, 
+         list<dag> pattern> 
+  : InstSystemZ<0, f, outs, ins> {
+  let Opcode{0-7} = op;
+  let Opcode{8-15} = 0;
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class I12<bits<12> op, Format f, dag outs, dag ins, string asmstr, 
+         list<dag> pattern> 
+  : InstSystemZ<0, f, outs, ins> {
+  let Opcode{0-11} = op;
+  let Opcode{12-15} = 0;
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class I16<bits<16> op, Format f, dag outs, dag ins, string asmstr,
+         list<dag> pattern>
+  : InstSystemZ<op, f, outs, ins> {
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class RRI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RRForm, outs, ins, asmstr, pattern>;
+
+class RII<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I12<op, RIForm, outs, ins, asmstr, pattern>;
+
+class RILI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I12<op, RILForm, outs, ins, asmstr, pattern>;
+
+class RREI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RREForm, outs, ins, asmstr, pattern>;
+
+class RXI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RXForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class RXYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RXYForm, outs, ins, asmstr, pattern>;
+
+class RSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RSForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class RSYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RSYForm, outs, ins, asmstr, pattern>;
+
+class SII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, SIForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class SIYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, SIYForm, outs, ins, asmstr, pattern>;
+
+class SILI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, SILForm, outs, ins, asmstr, pattern>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstSystemZ<0, Pseudo, outs, ins> {
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 000000000000..99e2730609e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,439 @@
+//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SystemZGenInstrInfo.inc"
+
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+  : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
+    RI(tm, *this), TM(tm) {
+}
+
+/// isGVStub - Return true if the GV requires an extra load to get the
+/// real address.
+static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
+  return TM.getSubtarget<SystemZSubtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill, int FrameIdx,
+                                           const TargetRegisterClass *RC,
+                                           const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+  if (RC == &SystemZ::GR32RegClass ||
+      RC == &SystemZ::ADDR32RegClass)
+    Opc = SystemZ::MOV32mr;
+  else if (RC == &SystemZ::GR64RegClass ||
+           RC == &SystemZ::ADDR64RegClass) {
+    Opc = SystemZ::MOV64mr;
+  } else if (RC == &SystemZ::FP32RegClass) {
+    Opc = SystemZ::FMOV32mr;
+  } else if (RC == &SystemZ::FP64RegClass) {
+    Opc = SystemZ::FMOV64mr;
+  } else if (RC == &SystemZ::GR64PRegClass) {
+    Opc = SystemZ::MOV64Pmr;
+  } else if (RC == &SystemZ::GR128RegClass) {
+    Opc = SystemZ::MOV128mr;
+  } else
+    llvm_unreachable("Unsupported regclass to store");
+
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+    .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                           unsigned DestReg, int FrameIdx,
+                                            const TargetRegisterClass *RC,
+                                            const TargetRegisterInfo *TRI) const{
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+  if (RC == &SystemZ::GR32RegClass ||
+      RC == &SystemZ::ADDR32RegClass)
+    Opc = SystemZ::MOV32rm;
+  else if (RC == &SystemZ::GR64RegClass ||
+           RC == &SystemZ::ADDR64RegClass) {
+    Opc = SystemZ::MOV64rm;
+  } else if (RC == &SystemZ::FP32RegClass) {
+    Opc = SystemZ::FMOV32rm;
+  } else if (RC == &SystemZ::FP64RegClass) {
+    Opc = SystemZ::FMOV64rm;
+  } else if (RC == &SystemZ::GR64PRegClass) {
+    Opc = SystemZ::MOV64Prm;
+  } else if (RC == &SystemZ::GR128RegClass) {
+    Opc = SystemZ::MOV128rm;
+  } else
+    llvm_unreachable("Unsupported regclass to load");
+
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  unsigned Opc;
+  if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV64rr;
+  else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV32rr;
+  else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV64rrP;
+  else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV128rr;
+  else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::FMOV32rr;
+  else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::FMOV64rr;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SystemZ::MOV32rm:
+  case SystemZ::MOV32rmy:
+  case SystemZ::MOV64rm:
+  case SystemZ::MOVSX32rm8:
+  case SystemZ::MOVSX32rm16y:
+  case SystemZ::MOVSX64rm8:
+  case SystemZ::MOVSX64rm16:
+  case SystemZ::MOVSX64rm32:
+  case SystemZ::MOVZX32rm8:
+  case SystemZ::MOVZX32rm16:
+  case SystemZ::MOVZX64rm8:
+  case SystemZ::MOVZX64rm16:
+  case SystemZ::MOVZX64rm32:
+  case SystemZ::FMOV32rm:
+  case SystemZ::FMOV32rmy:
+  case SystemZ::FMOV64rm:
+  case SystemZ::FMOV64rmy:
+  case SystemZ::MOV64Prm:
+  case SystemZ::MOV64Prmy:
+  case SystemZ::MOV128rm:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
+        MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SystemZ::MOV32mr:
+  case SystemZ::MOV32mry:
+  case SystemZ::MOV64mr:
+  case SystemZ::MOV32m8r:
+  case SystemZ::MOV32m8ry:
+  case SystemZ::MOV32m16r:
+  case SystemZ::MOV32m16ry:
+  case SystemZ::MOV64m8r:
+  case SystemZ::MOV64m8ry:
+  case SystemZ::MOV64m16r:
+  case SystemZ::MOV64m16ry:
+  case SystemZ::MOV64m32r:
+  case SystemZ::MOV64m32ry:
+  case SystemZ::FMOV32mr:
+  case SystemZ::FMOV32mry:
+  case SystemZ::FMOV64mr:
+  case SystemZ::FMOV64mry:
+  case SystemZ::MOV64Pmr:
+  case SystemZ::MOV64Pmry:
+  case SystemZ::MOV128mr:
+    if (MI->getOperand(0).isFI() &&
+        MI->getOperand(1).isImm() && MI->getOperand(2).isReg() &&
+        MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(3).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+  SystemZCC::CondCodes CC = static_cast<SystemZCC::CondCodes>(Cond[0].getImm());
+  Cond[0].setImm(getOppositeCondition(CC));
+  return false;
+}
+
+bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MCID.isBranch() && !MCID.isBarrier())
+    return true;
+  if (!MCID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                     MachineBasicBlock *&FBB,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    // Working from the bottom, when we see a non-terminator
+    // instruction, we're done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled
+    // by this analysis.
+    if (!I->getDesc().isBranch())
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == SystemZ::JMP) {
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    // Handle conditional branches.
+    SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode());
+    if (BranchCode == SystemZCC::INVALID)
+      return true;  // Can't handle indirect branch.
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to
+    // the same destination.
+    if (TBB != I->getOperand(0).getMBB())
+      return true;
+
+    SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm();
+    // If the conditions are the same, we can leave them alone.
+    if (OldBranchCode == BranchCode)
+      continue;
+
+    return true;
+  }
+
+  return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() != SystemZ::JMP &&
+        getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "SystemZ branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
+  BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
+  ++Count;
+
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+const MCInstrDesc&
+SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
+  switch (CC) {
+  default:
+   llvm_unreachable("Unknown condition code!");
+  case SystemZCC::O:   return get(SystemZ::JO);
+  case SystemZCC::H:   return get(SystemZ::JH);
+  case SystemZCC::NLE: return get(SystemZ::JNLE);
+  case SystemZCC::L:   return get(SystemZ::JL);
+  case SystemZCC::NHE: return get(SystemZ::JNHE);
+  case SystemZCC::LH:  return get(SystemZ::JLH);
+  case SystemZCC::NE:  return get(SystemZ::JNE);
+  case SystemZCC::E:   return get(SystemZ::JE);
+  case SystemZCC::NLH: return get(SystemZ::JNLH);
+  case SystemZCC::HE:  return get(SystemZ::JHE);
+  case SystemZCC::NL:  return get(SystemZ::JNL);
+  case SystemZCC::LE:  return get(SystemZ::JLE);
+  case SystemZCC::NH:  return get(SystemZ::JNH);
+  case SystemZCC::NO:  return get(SystemZ::JNO);
+  }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const {
+  switch (Opc) {
+  default:            return SystemZCC::INVALID;
+  case SystemZ::JO:   return SystemZCC::O;
+  case SystemZ::JH:   return SystemZCC::H;
+  case SystemZ::JNLE: return SystemZCC::NLE;
+  case SystemZ::JL:   return SystemZCC::L;
+  case SystemZ::JNHE: return SystemZCC::NHE;
+  case SystemZ::JLH:  return SystemZCC::LH;
+  case SystemZ::JNE:  return SystemZCC::NE;
+  case SystemZ::JE:   return SystemZCC::E;
+  case SystemZ::JNLH: return SystemZCC::NLH;
+  case SystemZ::JHE:  return SystemZCC::HE;
+  case SystemZ::JNL:  return SystemZCC::NL;
+  case SystemZ::JLE:  return SystemZCC::LE;
+  case SystemZ::JNH:  return SystemZCC::NH;
+  case SystemZ::JNO:  return SystemZCC::NO;
+  }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid condition!");
+  case SystemZCC::O:   return SystemZCC::NO;
+  case SystemZCC::H:   return SystemZCC::NH;
+  case SystemZCC::NLE: return SystemZCC::LE;
+  case SystemZCC::L:   return SystemZCC::NL;
+  case SystemZCC::NHE: return SystemZCC::HE;
+  case SystemZCC::LH:  return SystemZCC::NLH;
+  case SystemZCC::NE:  return SystemZCC::E;
+  case SystemZCC::E:   return SystemZCC::NE;
+  case SystemZCC::NLH: return SystemZCC::LH;
+  case SystemZCC::HE:  return SystemZCC::NHE;
+  case SystemZCC::NL:  return SystemZCC::L;
+  case SystemZCC::LE:  return SystemZCC::NLE;
+  case SystemZCC::NH:  return SystemZCC::H;
+  case SystemZCC::NO:  return SystemZCC::O;
+  }
+}
+
+const MCInstrDesc&
+SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
+  switch (Opc) {
+  default:
+    llvm_unreachable("Don't have long disp version of this instruction");
+  case SystemZ::MOV32mr:   return get(SystemZ::MOV32mry);
+  case SystemZ::MOV32rm:   return get(SystemZ::MOV32rmy);
+  case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y);
+  case SystemZ::MOV32m8r:  return get(SystemZ::MOV32m8ry);
+  case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry);
+  case SystemZ::MOV64m8r:  return get(SystemZ::MOV64m8ry);
+  case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry);
+  case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry);
+  case SystemZ::MOV8mi:    return get(SystemZ::MOV8miy);
+  case SystemZ::MUL32rm:   return get(SystemZ::MUL32rmy);
+  case SystemZ::CMP32rm:   return get(SystemZ::CMP32rmy);
+  case SystemZ::UCMP32rm:  return get(SystemZ::UCMP32rmy);
+  case SystemZ::FMOV32mr:  return get(SystemZ::FMOV32mry);
+  case SystemZ::FMOV64mr:  return get(SystemZ::FMOV64mry);
+  case SystemZ::FMOV32rm:  return get(SystemZ::FMOV32rmy);
+  case SystemZ::FMOV64rm:  return get(SystemZ::FMOV64rmy);
+  case SystemZ::MOV64Pmr:  return get(SystemZ::MOV64Pmry);
+  case SystemZ::MOV64Prm:  return get(SystemZ::MOV64Prmy);
+  }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 000000000000..6a31e9496365
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,113 @@
+//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+/// SystemZII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SystemZII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // SystemZ Specific MachineOperand flags.
+
+    MO_NO_FLAG = 0,
+
+    /// MO_GOTENT - On a symbol operand this indicates that the immediate is
+    /// the offset to the location of the symbol name from the base of the GOT.
+    ///
+    ///    SYMBOL_LABEL @GOTENT
+    MO_GOTENT = 1,
+
+    /// MO_PLT - On a symbol operand this indicates that the immediate is
+    /// offset to the PLT entry of symbol name from the current code location.
+    ///
+    ///    SYMBOL_LABEL @PLT
+    MO_PLT = 2
+  };
+}
+
+class SystemZInstrInfo : public SystemZGenInstrInfo {
+  const SystemZRegisterInfo RI;
+  SystemZTargetMachine &TM;
+public:
+  explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill,
+                                   int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
+  SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
+  const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
+  const MCInstrDesc& getLongDispOpc(unsigned Opc) const;
+
+  const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
+    if (Offset < 0 || Offset >= 4096)
+      return getLongDispOpc(Opc);
+    else
+      return get(Opc);
+  }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 000000000000..11a39fcd023a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,1147 @@
+//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Instruction Predicate Definitions.
+def IsZ10 : Predicate<"Subtarget.isZ10()">;
+
+include "SystemZInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+class SDTCisI32<int OpNum> : SDTCisVT<OpNum, i32>;
+class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_SystemZCall         : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
+def SDT_SystemZCallSeqEnd   : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
+def SDT_CmpTest             : SDTypeProfile<1, 2, [SDTCisI64<0>,
+                                                   SDTCisSameAs<1, 2>]>;
+def SDT_BrCond              : SDTypeProfile<0, 3,
+                                           [SDTCisVT<0, OtherVT>,
+                                            SDTCisI8<1>, SDTCisVT<2, i64>]>;
+def SDT_SelectCC            : SDTypeProfile<1, 4,
+                                           [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+                                            SDTCisI8<3>, SDTCisVT<4, i64>]>;
+def SDT_Address             : SDTypeProfile<1, 1,
+                                            [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+                     [SDNPHasChain, SDNPOptInGlue]>;
+def SystemZcall    : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
+                     [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def SystemZcallseq_start :
+                 SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
+                        [SDNPHasChain, SDNPOutGlue]>;
+def SystemZcallseq_end :
+                 SDNode<"ISD::CALLSEQ_END",   SDT_SystemZCallSeqEnd,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def SystemZcmp     : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
+def SystemZucmp    : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
+def SystemZbrcond  : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
+                            [SDNPHasChain]>;
+def SystemZselect  : SDNode<"SystemZISD::SELECT", SDT_SelectCC>;
+def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
+
+
+include "SystemZOperands.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+                              "#ADJCALLSTACKDOWN",
+                              [(SystemZcallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                              "#ADJCALLSTACKUP",
+                              [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+  def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
+                        "# Select32 PSEUDO",
+                        [(set GR32:$dst,
+                              (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>;
+  def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
+                        "# Select64 PSEUDO",
+                        [(set GR64:$dst,
+                              (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  let isBarrier = 1 in {
+    def JMP  : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>;
+
+    let isIndirectBranch = 1 in
+      def JMPr   : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>;
+  }
+
+  let Uses = [PSW] in {
+    def JO  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jo\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>;
+    def JH  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>;
+    def JNLE: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnle\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>;
+    def JL  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jl\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>;
+    def JNHE: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnhe\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>;
+    def JLH : Pseudo<(outs), (ins brtarget:$dst),
+                     "jlh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>;
+    def JNE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jne\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>;
+    def JE  : Pseudo<(outs), (ins brtarget:$dst),
+                     "je\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>;
+    def JNLH: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnlh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>;
+    def JHE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jhe\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>;
+    def JNL : Pseudo<(outs), (ins brtarget:$dst),
+                     "jnl\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>;
+    def JLE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jle\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>;
+    def JNH : Pseudo<(outs), (ins brtarget:$dst),
+                     "jnh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>;
+    def JNO : Pseudo<(outs), (ins brtarget:$dst),
+                     "jno\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>;
+  } // Uses = [PSW]
+} // isBranch = 1
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. Uses for argument
+  // registers are added manually.
+  let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+              F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in {
+    def CALLi     : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops),
+                           "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>;
+    def CALLr     : Pseudo<(outs), (ins ADDR64:$dst, variable_ops),
+                           "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>;
+  }
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let isReMaterializable = 1 in
+// FIXME: Provide imm12 variant
+// FIXME: Address should be halfword aligned...
+def LA64r  : RXI<0x47,
+                 (outs GR64:$dst), (ins laaddr:$src),
+                 "lay\t{$dst, $src}",
+                 [(set GR64:$dst, laaddr:$src)]>;
+def LA64rm : RXYI<0x71E3,
+                  (outs GR64:$dst), (ins i64imm:$src),
+                  "larl\t{$dst, $src}",
+                  [(set GR64:$dst,
+                        (SystemZpcrelwrapper tglobaladdr:$src))]>;
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "# no-op", []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+let neverHasSideEffects = 1 in {
+def MOV32rr : RRI<0x18,
+                  (outs GR32:$dst), (ins GR32:$src),
+                  "lr\t{$dst, $src}",
+                  []>;
+def MOV64rr : RREI<0xB904,
+                   (outs GR64:$dst), (ins GR64:$src),
+                   "lgr\t{$dst, $src}",
+                   []>;
+def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+                     "# MOV128 PSEUDO!\n"
+                     "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+                     "\tlgr\t${dst:subreg_even}, ${src:subreg_even}",
+                     []>;
+def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+                     "# MOV64P PSEUDO!\n"
+                     "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+                     "\tlr\t${dst:subreg_even}, ${src:subreg_even}",
+                     []>;
+}
+
+def MOVSX64rr32 : RREI<0xB914,
+                       (outs GR64:$dst), (ins GR32:$src),
+                       "lgfr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVZX64rr32 : RREI<0xB916,
+                       (outs GR64:$dst), (ins GR32:$src),
+                       "llgfr\t{$dst, $src}",
+                       [(set GR64:$dst, (zext GR32:$src))]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV32ri16 : RII<0x8A7,
+                    (outs GR32:$dst), (ins s16imm:$src),
+                    "lhi\t{$dst, $src}",
+                    [(set GR32:$dst, immSExt16:$src)]>;
+def MOV64ri16 : RII<0x9A7,
+                    (outs GR64:$dst), (ins s16imm64:$src),
+                    "lghi\t{$dst, $src}",
+                    [(set GR64:$dst, immSExt16:$src)]>;
+
+def MOV64rill16 : RII<0xFA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llill\t{$dst, $src}",
+                      [(set GR64:$dst, i64ll16:$src)]>;
+def MOV64rilh16 : RII<0xEA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llilh\t{$dst, $src}",
+                      [(set GR64:$dst, i64lh16:$src)]>;
+def MOV64rihl16 : RII<0xDA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llihl\t{$dst, $src}",
+                      [(set GR64:$dst, i64hl16:$src)]>;
+def MOV64rihh16 : RII<0xCA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llihh\t{$dst, $src}",
+                      [(set GR64:$dst, i64hh16:$src)]>;
+
+def MOV64ri32 : RILI<0x1C0,
+                     (outs GR64:$dst), (ins s32imm64:$src),
+                     "lgfi\t{$dst, $src}",
+                     [(set GR64:$dst, immSExt32:$src)]>;
+def MOV64rilo32 : RILI<0xFC0,
+                       (outs GR64:$dst), (ins u32imm:$src),
+                       "llilf\t{$dst, $src}",
+                       [(set GR64:$dst, i64lo32:$src)]>;
+def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
+                       "llihf\t{$dst, $src}",
+                       [(set GR64:$dst, i64hi32:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV32rm  : RXI<0x58,
+                   (outs GR32:$dst), (ins rriaddr12:$src),
+                   "l\t{$dst, $src}",
+                   [(set GR32:$dst, (load rriaddr12:$src))]>;
+def MOV32rmy : RXYI<0x58E3,
+                    (outs GR32:$dst), (ins rriaddr:$src),
+                    "ly\t{$dst, $src}",
+                    [(set GR32:$dst, (load rriaddr:$src))]>;
+def MOV64rm  : RXYI<0x04E3,
+                    (outs GR64:$dst), (ins rriaddr:$src),
+                    "lg\t{$dst, $src}",
+                    [(set GR64:$dst, (load rriaddr:$src))]>;
+def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src),
+                      "# MOV64P PSEUDO!\n"
+                      "\tl\t${dst:subreg_odd},  $src\n"
+                      "\tl\t${dst:subreg_even}, 4+$src",
+                      [(set GR64P:$dst, (load rriaddr12:$src))]>;
+def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src),
+                       "# MOV64P PSEUDO!\n"
+                       "\tly\t${dst:subreg_odd},  $src\n"
+                       "\tly\t${dst:subreg_even}, 4+$src",
+                       [(set GR64P:$dst, (load rriaddr:$src))]>;
+def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src),
+                      "# MOV128 PSEUDO!\n"
+                      "\tlg\t${dst:subreg_odd},  $src\n"
+                      "\tlg\t${dst:subreg_even}, 8+$src",
+                      [(set GR128:$dst, (load rriaddr:$src))]>;
+}
+
+def MOV32mr  : RXI<0x50,
+                   (outs), (ins rriaddr12:$dst, GR32:$src),
+                   "st\t{$src, $dst}",
+                   [(store GR32:$src, rriaddr12:$dst)]>;
+def MOV32mry : RXYI<0x50E3,
+                    (outs), (ins rriaddr:$dst, GR32:$src),
+                    "sty\t{$src, $dst}",
+                    [(store GR32:$src, rriaddr:$dst)]>;
+def MOV64mr  : RXYI<0x24E3,
+                    (outs), (ins rriaddr:$dst, GR64:$src),
+                    "stg\t{$src, $dst}",
+                    [(store GR64:$src, rriaddr:$dst)]>;
+def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src),
+                      "# MOV64P PSEUDO!\n"
+                      "\tst\t${src:subreg_odd}, $dst\n"
+                      "\tst\t${src:subreg_even}, 4+$dst",
+                      [(store GR64P:$src, rriaddr12:$dst)]>;
+def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src),
+                       "# MOV64P PSEUDO!\n"
+                       "\tsty\t${src:subreg_odd}, $dst\n"
+                       "\tsty\t${src:subreg_even}, 4+$dst",
+                       [(store GR64P:$src, rriaddr:$dst)]>;
+def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src),
+                      "# MOV128 PSEUDO!\n"
+                      "\tstg\t${src:subreg_odd}, $dst\n"
+                      "\tstg\t${src:subreg_even}, 8+$dst",
+                      [(store GR128:$src, rriaddr:$dst)]>;
+
+def MOV8mi    : SII<0x92,
+                    (outs), (ins riaddr12:$dst, i32i8imm:$src),
+                    "mvi\t{$dst, $src}",
+                    [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>;
+def MOV8miy   : SIYI<0x52EB,
+                     (outs), (ins riaddr:$dst, i32i8imm:$src),
+                     "mviy\t{$dst, $src}",
+                     [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>;
+
+let AddedComplexity = 2 in {
+def MOV16mi   : SILI<0xE544,
+                     (outs), (ins riaddr12:$dst, s16imm:$src),
+                     "mvhhi\t{$dst, $src}",
+                     [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+def MOV32mi16 : SILI<0xE54C,
+                     (outs), (ins riaddr12:$dst, s32imm:$src),
+                     "mvhi\t{$dst, $src}",
+                     [(store (i32 immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+def MOV64mi16 : SILI<0xE548,
+                     (outs), (ins riaddr12:$dst, s32imm64:$src),
+                     "mvghi\t{$dst, $src}",
+                     [(store (i64 immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+}
+
+// sexts
+def MOVSX32rr8  : RREI<0xB926,
+                       (outs GR32:$dst), (ins GR32:$src),
+                       "lbr\t{$dst, $src}",
+                       [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>;
+def MOVSX64rr8  : RREI<0xB906,
+                       (outs GR64:$dst), (ins GR64:$src),
+                       "lgbr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>;
+def MOVSX32rr16 : RREI<0xB927,
+                       (outs GR32:$dst), (ins GR32:$src),
+                       "lhr\t{$dst, $src}",
+                       [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>;
+def MOVSX64rr16 : RREI<0xB907,
+                       (outs GR64:$dst), (ins GR64:$src),
+                       "lghr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>;
+
+// extloads
+def MOVSX32rm8   : RXYI<0x76E3,
+                        (outs GR32:$dst), (ins rriaddr:$src),
+                        "lb\t{$dst, $src}",
+                        [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>;
+def MOVSX32rm16  : RXI<0x48,
+                       (outs GR32:$dst), (ins rriaddr12:$src),
+                       "lh\t{$dst, $src}",
+                       [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>;
+def MOVSX32rm16y : RXYI<0x78E3,
+                        (outs GR32:$dst), (ins rriaddr:$src),
+                        "lhy\t{$dst, $src}",
+                        [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>;
+def MOVSX64rm8   : RXYI<0x77E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgb\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>;
+def MOVSX64rm16  : RXYI<0x15E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgh\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>;
+def MOVSX64rm32  : RXYI<0x14E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgf\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>;
+
+def MOVZX32rm8  : RXYI<0x94E3,
+                       (outs GR32:$dst), (ins rriaddr:$src),
+                       "llc\t{$dst, $src}",
+                       [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>;
+def MOVZX32rm16 : RXYI<0x95E3,
+                       (outs GR32:$dst), (ins rriaddr:$src),
+                       "llh\t{$dst, $src}",
+                       [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>;
+def MOVZX64rm8  : RXYI<0x90E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgc\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>;
+def MOVZX64rm16 : RXYI<0x91E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgh\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>;
+def MOVZX64rm32 : RXYI<0x16E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgf\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>;
+
+// truncstores
+def MOV32m8r   : RXI<0x42,
+                     (outs), (ins rriaddr12:$dst, GR32:$src),
+                     "stc\t{$src, $dst}",
+                     [(truncstorei8 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m8ry  : RXYI<0x72E3,
+                      (outs), (ins rriaddr:$dst, GR32:$src),
+                      "stcy\t{$src, $dst}",
+                      [(truncstorei8 GR32:$src, rriaddr:$dst)]>;
+
+def MOV32m16r  : RXI<0x40,
+                     (outs), (ins rriaddr12:$dst, GR32:$src),
+                     "sth\t{$src, $dst}",
+                     [(truncstorei16 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m16ry : RXYI<0x70E3,
+                      (outs), (ins rriaddr:$dst, GR32:$src),
+                      "sthy\t{$src, $dst}",
+                      [(truncstorei16 GR32:$src, rriaddr:$dst)]>;
+
+def MOV64m8r   : RXI<0x42,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "stc\t{$src, $dst}",
+                     [(truncstorei8 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m8ry  : RXYI<0x72E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "stcy\t{$src, $dst}",
+                      [(truncstorei8 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m16r  : RXI<0x40,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "sth\t{$src, $dst}",
+                     [(truncstorei16 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m16ry : RXYI<0x70E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "sthy\t{$src, $dst}",
+                      [(truncstorei16 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m32r  : RXI<0x50,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "st\t{$src, $dst}",
+                     [(truncstorei32 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m32ry : RXYI<0x50E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "sty\t{$src, $dst}",
+                      [(truncstorei32 GR64:$src, rriaddr:$dst)]>;
+
+// multiple regs moves
+// FIXME: should we use multiple arg nodes?
+def MOV32mrm  : RSYI<0x90EB,
+                     (outs), (ins riaddr:$dst, GR32:$from, GR32:$to),
+                     "stmy\t{$from, $to, $dst}",
+                     []>;
+def MOV64mrm  : RSYI<0x24EB,
+                     (outs), (ins riaddr:$dst, GR64:$from, GR64:$to),
+                     "stmg\t{$from, $to, $dst}",
+                     []>;
+def MOV32rmm  : RSYI<0x90EB,
+                     (outs GR32:$from, GR32:$to), (ins riaddr:$dst),
+                     "lmy\t{$from, $to, $dst}",
+                     []>;
+def MOV64rmm  : RSYI<0x04EB,
+                     (outs GR64:$from, GR64:$to), (ins riaddr:$dst),
+                     "lmg\t{$from, $to, $dst}",
+                     []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+    Constraints = "$src = $dst" in {
+def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+                           "lhi\t${dst:subreg_even}, 0",
+                           []>;
+def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+                           "lghi\t${dst:subreg_even}, 0",
+                           []>;
+}
+
+// Byte swaps
+def BSWAP32rr : RREI<0xB91F,
+                     (outs GR32:$dst), (ins GR32:$src),
+                     "lrvr\t{$dst, $src}",
+                     [(set GR32:$dst, (bswap GR32:$src))]>;
+def BSWAP64rr : RREI<0xB90F,
+                     (outs GR64:$dst), (ins GR64:$src),
+                     "lrvgr\t{$dst, $src}",
+                     [(set GR64:$dst, (bswap GR64:$src))]>;
+
+// FIXME: this is invalid pattern for big-endian
+//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src),
+//                     "lrvh\t{$dst, $src}",
+//                     [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>;
+def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src),
+                     "lrv\t{$dst, $src}",
+                     [(set GR32:$dst, (bswap (load rriaddr:$src)))]>;
+def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src),
+                     "lrvg\t{$dst, $src}",
+                     [(set GR64:$dst, (bswap (load rriaddr:$src)))]>;
+
+//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src),
+//                     "strvh\t{$src, $dst}",
+//                     [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src),
+                     "strv\t{$src, $dst}",
+                     [(store (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src),
+                     "strvg\t{$src, $dst}",
+                     [(store (bswap GR64:$src), rriaddr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Defs = [PSW] in {
+def NEG32rr : RRI<0x13,
+                  (outs GR32:$dst), (ins GR32:$src),
+                  "lcr\t{$dst, $src}",
+                  [(set GR32:$dst, (ineg GR32:$src)),
+                   (implicit PSW)]>;
+def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src),
+                   "lcgr\t{$dst, $src}",
+                   [(set GR64:$dst, (ineg GR64:$src)),
+                    (implicit PSW)]>;
+def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
+                     "lcgfr\t{$dst, $src}",
+                     [(set GR64:$dst, (ineg (sext GR32:$src))),
+                      (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+
+let Defs = [PSW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "ar\t{$dst, $src2}",
+                  [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+                   (implicit PSW)]>;
+def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "agr\t{$dst, $src2}",
+                   [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
+                    (implicit PSW)]>;
+}
+
+def ADD32rm   : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "a\t{$dst, $src2}",
+                    [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def ADD32rmy  : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "ay\t{$dst, $src2}",
+                     [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def ADD64rm   : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "ag\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+
+def ADD32ri16 : RII<0xA7A,
+                    (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+                    "ahi\t{$dst, $src2}",
+                    [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)),
+                     (implicit PSW)]>;
+def ADD32ri   : RILI<0xC29,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "afi\t{$dst, $src2}",
+                     [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+                      (implicit PSW)]>;
+def ADD64ri16 : RILI<0xA7B,
+                     (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+                     "aghi\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)),
+                      (implicit PSW)]>;
+def ADD64ri32 : RILI<0xC28,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "agfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)),
+                      (implicit PSW)]>;
+
+let isCommutable = 1 in { // X = ADC Y, Z  == X = ADC Z, Y
+def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "alr\t{$dst, $src2}",
+                  [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>;
+def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "algr\t{$dst, $src2}",
+                   [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>;
+}
+
+def ADC32ri   : RILI<0xC2B,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "alfi\t{$dst, $src2}",
+                     [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>;
+def ADC64ri32 : RILI<0xC2A,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "algfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "alcr\t{$dst, $src2}",
+                    [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)),
+                     (implicit PSW)]>;
+def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "alcgr\t{$dst, $src2}",
+                    [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)),
+                     (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = AND Y, Z  == X = AND Z, Y
+def AND32rr : RRI<0x14,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "nr\t{$dst, $src2}",
+                  [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
+def AND64rr : RREI<0xB980,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "ngr\t{$dst, $src2}",
+                   [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
+}
+
+def AND32rm   : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "n\t{$dst, $src2}",
+                    [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def AND32rmy  : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "ny\t{$dst, $src2}",
+                     [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def AND64rm   : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "ng\t{$dst, $src2}",
+                     [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+def AND32rill16 : RII<0xA57,
+                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+                      "nill\t{$dst, $src2}",
+                      [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
+def AND64rill16 : RII<0xA57,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nill\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
+
+def AND32rilh16 : RII<0xA56,
+                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+                      "nilh\t{$dst, $src2}",
+                      [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
+def AND64rilh16 : RII<0xA56,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nilh\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
+
+def AND64rihl16 : RII<0xA55,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nihl\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
+def AND64rihh16 : RII<0xA54,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nihh\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
+
+def AND32ri     : RILI<0xC0B,
+                       (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                       "nilf\t{$dst, $src2}",
+                       [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
+def AND64rilo32 : RILI<0xC0B,
+                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                       "nilf\t{$dst, $src2}",
+                       [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
+def AND64rihi32 : RILI<0xC0A,
+                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                       "nihf\t{$dst, $src2}",
+                       [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
+
+let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
+def OR32rr : RRI<0x16,
+                 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                 "or\t{$dst, $src2}",
+                 [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
+def OR64rr : RREI<0xB981,
+                  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                  "ogr\t{$dst, $src2}",
+                  [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
+}
+
+def OR32rm   : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                   "o\t{$dst, $src2}",
+                   [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))),
+                    (implicit PSW)]>;
+def OR32rmy  : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                    "oy\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))),
+                     (implicit PSW)]>;
+def OR64rm   : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                    "og\t{$dst, $src2}",
+                    [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))),
+                     (implicit PSW)]>;
+
+ // FIXME: Provide proper encoding!
+def OR32ri16  : RII<0xA5B,
+                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                    "oill\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
+def OR32ri16h : RII<0xA5A,
+                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                    "oilh\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
+def OR32ri : RILI<0xC0D,
+                  (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                  "oilf\t{$dst, $src2}",
+                  [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
+
+def OR64rill16 : RII<0xA5B,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oill\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
+def OR64rilh16 : RII<0xA5A,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oilh\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
+def OR64rihl16 : RII<0xA59,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oihl\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
+def OR64rihh16 : RII<0xA58,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oihh\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
+
+def OR64rilo32 : RILI<0xC0D,
+                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                      "oilf\t{$dst, $src2}",
+                      [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
+def OR64rihi32 : RILI<0xC0C,
+                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                      "oihf\t{$dst, $src2}",
+                      [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
+
+def SUB32rr : RRI<0x1B,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "sr\t{$dst, $src2}",
+                  [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+def SUB64rr : RREI<0xB909,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "sgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+
+def SUB32rm   : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "s\t{$dst, $src2}",
+                    [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def SUB32rmy  : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "sy\t{$dst, $src2}",
+                     [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def SUB64rm   : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "sg\t{$dst, $src2}",
+                     [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+ 
+def SBC32rr : RRI<0x1F,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "slr\t{$dst, $src2}",
+                  [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>;
+def SBC64rr : RREI<0xB90B,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "slgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>;
+
+def SBC32ri   : RILI<0xC25,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "sllfi\t{$dst, $src2}",
+                     [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>;
+def SBC64ri32 : RILI<0xC24,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "slgfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "slbr\t{$dst, $src2}",
+                    [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)),
+                     (implicit PSW)]>;
+def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "slbgr\t{$dst, $src2}",
+                    [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)),
+                     (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
+def XOR32rr : RRI<0x17,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "xr\t{$dst, $src2}",
+                  [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+def XOR64rr : RREI<0xB982,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "xgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
+}
+
+def XOR32rm   : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "x\t{$dst, $src2}",
+                    [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def XOR32rmy  : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "xy\t{$dst, $src2}",
+                     [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def XOR64rm   : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "xg\t{$dst, $src2}",
+                     [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+def XOR32ri : RILI<0xC07,
+                   (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                   "xilf\t{$dst, $src2}",
+                   [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
+
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
+def MUL32rr : RREI<0xB252,
+                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                   "msr\t{$dst, $src2}",
+                   [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>;
+def MUL64rr : RREI<0xB90C,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "msgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>;
+}
+
+def MUL64rrP   : RRI<0x1C,
+                     (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                     "mr\t{$dst, $src2}",
+                     []>;
+def UMUL64rrP  : RREI<0xB996,
+                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                      "mlr\t{$dst, $src2}",
+                      []>;
+def UMUL128rrP : RREI<0xB986,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "mlgr\t{$dst, $src2}",
+                      []>;
+
+def MUL32ri16   : RII<0xA7C,
+                      (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+                      "mhi\t{$dst, $src2}",
+                      [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>;
+def MUL64ri16   : RII<0xA7D,
+                      (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+                      "mghi\t{$dst, $src2}",
+                      [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>;
+
+let AddedComplexity = 2 in {
+def MUL32ri     : RILI<0xC21,
+                       (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                       "msfi\t{$dst, $src2}",
+                       [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>,
+                       Requires<[IsZ10]>;
+def MUL64ri32   : RILI<0xC20,
+                       (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                       "msgfi\t{$dst, $src2}",
+                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>,
+                       Requires<[IsZ10]>;
+}
+
+def MUL32rm : RXI<0x71,
+                  (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                  "ms\t{$dst, $src2}",
+                  [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>;
+def MUL32rmy : RXYI<0xE351,
+                    (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                    "msy\t{$dst, $src2}",
+                    [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>;
+def MUL64rm  : RXYI<0xE30C,
+                    (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                    "msg\t{$dst, $src2}",
+                    [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>;
+
+def MULSX64rr32 : RREI<0xB91C,
+                       (outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
+                       "msgfr\t{$dst, $src2}",
+                       [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
+
+def SDIVREM32r : RREI<0xB91D,
+                      (outs GR128:$dst), (ins GR128:$src1, GR32:$src2),
+                      "dsgfr\t{$dst, $src2}",
+                      []>;
+def SDIVREM64r : RREI<0xB90D,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "dsgr\t{$dst, $src2}",
+                      []>;
+
+def UDIVREM32r : RREI<0xB997,
+                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                      "dlr\t{$dst, $src2}",
+                      []>;
+def UDIVREM64r : RREI<0xB987,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "dlgr\t{$dst, $src2}",
+                      []>;
+let mayLoad = 1 in {
+def SDIVREM32m : RXYI<0xE31D,
+                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dsgf\t{$dst, $src2}",
+                      []>;
+def SDIVREM64m : RXYI<0xE30D,
+                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dsg\t{$dst, $src2}",
+                      []>;
+
+def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
+                      "dl\t{$dst, $src2}",
+                      []>;
+def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dlg\t{$dst, $src2}",
+                      []>;
+} // mayLoad
+} // Constraints = "$src1 = $dst"
+
+//===----------------------------------------------------------------------===//
+// Shifts
+
+let Constraints = "$src = $dst" in
+def SRL32rri : RSI<0x88,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "srl\t{$src, $amt}",
+                   [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>;
+def SRL64rri : RSYI<0xEB0C,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "srlg\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
+
+let Constraints = "$src = $dst" in
+def SHL32rri : RSI<0x89,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "sll\t{$src, $amt}",
+                   [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>;
+def SHL64rri : RSYI<0xEB0D,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "sllg\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
+
+let Defs = [PSW] in {
+let Constraints = "$src = $dst" in
+def SRA32rri : RSI<0x8A,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "sra\t{$src, $amt}",
+                   [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)),
+                    (implicit PSW)]>;
+
+def SRA64rri : RSYI<0xEB0A,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "srag\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)),
+                     (implicit PSW)]>;
+} // Defs = [PSW]
+
+def ROTL32rri : RSYI<0xEB1D,
+                     (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                     "rll\t{$dst, $src, $amt}",
+                     [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>;
+def ROTL64rri : RSYI<0xEB1C,
+                     (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                     "rllg\t{$dst, $src, $amt}",
+                     [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def CMP32rr : RRI<0x19,
+                  (outs), (ins GR32:$src1, GR32:$src2),
+                  "cr\t$src1, $src2",
+                  [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>; 
+def CMP64rr : RREI<0xB920,
+                   (outs), (ins GR64:$src1, GR64:$src2),
+                   "cgr\t$src1, $src2",
+                   [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
+
+def CMP32ri   : RILI<0xC2D,
+                     (outs), (ins GR32:$src1, s32imm:$src2),
+                     "cfi\t$src1, $src2",
+                     [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
+def CMP64ri32 : RILI<0xC2C,
+                     (outs), (ins GR64:$src1, s32imm64:$src2),
+                     "cgfi\t$src1, $src2",
+                     [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
+
+def CMP32rm : RXI<0x59,
+                  (outs), (ins GR32:$src1, rriaddr12:$src2),
+                  "c\t$src1, $src2",
+                  [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
+def CMP32rmy : RXYI<0xE359,
+                    (outs), (ins GR32:$src1, rriaddr:$src2),
+                    "cy\t$src1, $src2",
+                    [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
+def CMP64rm  : RXYI<0xE320,
+                    (outs), (ins GR64:$src1, rriaddr:$src2),
+                    "cg\t$src1, $src2",
+                    [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
+
+def UCMP32rr : RRI<0x15,
+                   (outs), (ins GR32:$src1, GR32:$src2),
+                   "clr\t$src1, $src2",
+                   [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
+def UCMP64rr : RREI<0xB921,
+                    (outs), (ins GR64:$src1, GR64:$src2),
+                    "clgr\t$src1, $src2",
+                    [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
+
+def UCMP32ri   : RILI<0xC2F,
+                      (outs), (ins GR32:$src1, i32imm:$src2),
+                      "clfi\t$src1, $src2",
+                      [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
+def UCMP64ri32 : RILI<0xC2E,
+                      (outs), (ins GR64:$src1, i64i32imm:$src2),
+                      "clgfi\t$src1, $src2",
+                      [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
+
+def UCMP32rm  : RXI<0x55,
+                    (outs), (ins GR32:$src1, rriaddr12:$src2),
+                    "cl\t$src1, $src2",
+                    [(set PSW, (SystemZucmp GR32:$src1,
+                                            (load rriaddr12:$src2)))]>;
+def UCMP32rmy : RXYI<0xE355,
+                     (outs), (ins GR32:$src1, rriaddr:$src2),
+                     "cly\t$src1, $src2",
+                     [(set PSW, (SystemZucmp GR32:$src1,
+                                             (load rriaddr:$src2)))]>;
+def UCMP64rm  : RXYI<0xE351,
+                     (outs), (ins GR64:$src1, rriaddr:$src2),
+                     "clg\t$src1, $src2",
+                     [(set PSW, (SystemZucmp GR64:$src1,
+                                             (load rriaddr:$src2)))]>;
+
+def CMPSX64rr32  : RREI<0xB930,
+                        (outs), (ins GR64:$src1, GR32:$src2),
+                        "cgfr\t$src1, $src2",
+                        [(set PSW, (SystemZucmp GR64:$src1,
+                                                (sext GR32:$src2)))]>;
+def UCMPZX64rr32 : RREI<0xB931,
+                        (outs), (ins GR64:$src1, GR32:$src2),
+                        "clgfr\t$src1, $src2",
+                        [(set PSW, (SystemZucmp GR64:$src1,
+                                                (zext GR32:$src2)))]>;
+
+def CMPSX64rm32   : RXYI<0xE330,
+                         (outs), (ins GR64:$src1, rriaddr:$src2),
+                         "cgf\t$src1, $src2",
+                         [(set PSW, (SystemZucmp GR64:$src1,
+                                             (sextloadi64i32 rriaddr:$src2)))]>;
+def UCMPZX64rm32  : RXYI<0xE331,
+                         (outs), (ins GR64:$src1, rriaddr:$src2),
+                         "clgf\t$src1, $src2",
+                         [(set PSW, (SystemZucmp GR64:$src1,
+                                             (zextloadi64i32 rriaddr:$src2)))]>;
+
+// FIXME: Add other crazy ucmp forms
+
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Other crazy stuff
+let Defs = [PSW] in {
+def FLOGR64 : RREI<0xB983,
+                   (outs GR128:$dst), (ins GR64:$src),
+                   "flogr\t{$dst, $src}",
+                   []>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns.
+//===----------------------------------------------------------------------===//
+
+// ConstPools, JumpTables
+def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>;
+def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>;
+
+// anyext
+def : Pat<(i64 (anyext GR32:$src)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// calls
+def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>;
+def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// FIXME: use add/sub tricks with 32678/-32768
+
+// Arbitrary immediate support.
+def : Pat<(i32 imm:$src),
+          (EXTRACT_SUBREG (MOV64ri32 (GetI64FromI32 (i32 imm:$src))),
+             subreg_32bit)>;
+
+// Implement in terms of LLIHF/OILF.
+def : Pat<(i64 imm:$imm),
+          (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>;
+
+// trunc patterns
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR64:$src, i32),
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// extload patterns
+def : Pat<(extloadi32i8  rriaddr:$src), (MOVZX32rm8  rriaddr:$src)>;
+def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i8  rriaddr:$src), (MOVZX64rm8  rriaddr:$src)>;
+def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>;
+
+// muls
+def : Pat<(mulhs GR32:$src1, GR32:$src2),
+          (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+                                                   GR32:$src1, subreg_odd32),
+                                    GR32:$src2),
+                          subreg_32bit)>;
+
+def : Pat<(mulhu GR32:$src1, GR32:$src2),
+          (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+                                                    GR32:$src1, subreg_odd32),
+                                     GR32:$src2),
+                          subreg_32bit)>;
+def : Pat<(mulhu GR64:$src1, GR64:$src2),
+          (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+                                                     GR64:$src1, subreg_odd),
+                                      GR64:$src2),
+                          subreg_even)>;
+
+def : Pat<(ctlz GR64:$src),
+          (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 000000000000..fd6e330344b6
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,51 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares SystemZ-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private SystemZ target-specific information for each MachineFunction.
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// LowReg - Low register of range of callee-saved registers to store.
+  unsigned LowReg;
+
+  /// HighReg - High register of range of callee-saved registers to store.
+  unsigned HighReg;
+public:
+  SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+  explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+    : CalleeSavedFrameSize(0) {}
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  unsigned getLowReg() const { return LowReg; }
+  void setLowReg(unsigned Reg) { LowReg = Reg; }
+
+  unsigned getHighReg() const { return HighReg; }
+  void setHighReg(unsigned Reg) { HighReg = Reg; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 000000000000..8b835cc26e29
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,325 @@
+//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various SystemZ instruction operands.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff.
+//===----------------------------------------------------------------------===//
+
+// SystemZ specific condition code. These correspond to CondCode in
+// SystemZ.h. They must be kept in synch.
+def SYSTEMZ_COND_O   : PatLeaf<(i8 0)>;
+def SYSTEMZ_COND_H   : PatLeaf<(i8 1)>;
+def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>;
+def SYSTEMZ_COND_L   : PatLeaf<(i8 3)>;
+def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>;
+def SYSTEMZ_COND_LH  : PatLeaf<(i8 5)>;
+def SYSTEMZ_COND_NE  : PatLeaf<(i8 6)>;
+def SYSTEMZ_COND_E   : PatLeaf<(i8 7)>;
+def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>;
+def SYSTEMZ_COND_HE  : PatLeaf<(i8 9)>;
+def SYSTEMZ_COND_NL  : PatLeaf<(i8 10)>;
+def SYSTEMZ_COND_LE  : PatLeaf<(i8 11)>;
+def SYSTEMZ_COND_NH  : PatLeaf<(i8 12)>;
+def SYSTEMZ_COND_NO  : PatLeaf<(i8 13)>;
+
+def LO8 : SDNodeXForm<imm, [{
+  // Transformation function: return low 8 bits.
+  return getI8Imm(N->getZExtValue() & 0x00000000000000FFULL);
+}]>;
+
+def LL16 : SDNodeXForm<imm, [{
+  // Transformation function: return low 16 bits.
+  return getI16Imm(N->getZExtValue() & 0x000000000000FFFFULL);
+}]>;
+
+def LH16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 16-31.
+  return getI16Imm((N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16);
+}]>;
+
+def HL16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 32-47.
+  return getI16Imm((N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32);
+}]>;
+
+def HH16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 48-63.
+  return getI16Imm((N->getZExtValue() & 0xFFFF000000000000ULL) >> 48);
+}]>;
+
+def LO32 : SDNodeXForm<imm, [{
+  // Transformation function: return low 32 bits.
+  return getI32Imm(N->getZExtValue() & 0x00000000FFFFFFFFULL);
+}]>;
+
+def HI32 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 32-63.
+  return getI32Imm(N->getZExtValue() >> 32);
+}]>;
+
+def GetI64FromI32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+}]>;
+
+def i32ll16 : PatLeaf<(i32 imm), [{
+  // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16 : PatLeaf<(i32 imm), [{
+  // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i32ll16c : PatLeaf<(i32 imm), [{
+  // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set.
+  return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16c : PatLeaf<(i32 imm), [{
+  // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16
+  //  bits set.
+  return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64ll16 : PatLeaf<(i64 imm), [{  
+  // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16 : PatLeaf<(i64 imm), [{  
+  // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16 : PatLeaf<(i64 imm), [{  
+  // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set.
+  return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16 : PatLeaf<(i64 imm), [{  
+  // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set.
+  return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue());
+}], HH16>;
+
+def i64ll16c : PatLeaf<(i64 imm), [{  
+  // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16c : PatLeaf<(i64 imm), [{  
+  // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16c : PatLeaf<(i64 imm), [{  
+  // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set.
+  return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16c : PatLeaf<(i64 imm), [{  
+  // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set.
+  return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue());
+}], HH16>;
+
+def immSExt16 : PatLeaf<(imm), [{
+  // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended
+  // field.
+  if (N->getValueType(0) == MVT::i64) {
+    uint64_t val = N->getZExtValue();
+    return ((int64_t)val == (int16_t)val);
+  } else if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = N->getZExtValue();
+    return ((int32_t)val == (int16_t)val);
+  }
+
+  return false;
+}], LL16>;
+
+def immSExt32 : PatLeaf<(i64 imm), [{
+  // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended
+  // field.
+  uint64_t val = N->getZExtValue();
+  return ((int64_t)val == (int32_t)val);
+}], LO32>;
+
+def i64lo32 : PatLeaf<(i64 imm), [{
+  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+  // bits set.
+  return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32 : PatLeaf<(i64 imm), [{
+  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+  return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], HI32>;
+
+def i64lo32c : PatLeaf<(i64 imm), [{
+  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+  // bits set.
+  return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32c : PatLeaf<(i64 imm), [{
+  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+  return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], HI32>;
+
+def i32immSExt8  : PatLeaf<(i32 imm), [{
+  // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+  // sign extended field.
+  return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}], LO8>;
+
+def i32immSExt16 : PatLeaf<(i32 imm), [{
+  // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit
+  // sign extended field.
+  return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue();
+}], LL16>;
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+  // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // sign extended field.
+  return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
+}], LO32>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // zero extended field.
+  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}], LO32>;
+
+// extloads
+def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8  node:$ptr))>;
+def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8  node:$ptr))>;
+def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+def sextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (sextloadi8  node:$ptr))>;
+def sextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (sextloadi8  node:$ptr))>;
+def sextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (zextloadi8  node:$ptr))>;
+def zextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (zextloadi8  node:$ptr))>;
+def zextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+// A couple of more descriptive operand definitions.
+// 32-bits but only 8 bits are significant.
+def i32i8imm  : Operand<i32>;
+// 32-bits but only 16 bits are significant.
+def i32i16imm : Operand<i32>;
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+// Unsigned i12
+def u12imm : Operand<i32> {
+  let PrintMethod = "printU12ImmOperand";
+}
+def u12imm64 : Operand<i64> {
+  let PrintMethod = "printU12ImmOperand";
+}
+
+// Signed i16
+def s16imm : Operand<i32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+def s16imm64 : Operand<i64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+// Unsigned i16
+def u16imm : Operand<i32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+// Signed i20
+def s20imm : Operand<i32> {
+  let PrintMethod = "printS20ImmOperand";
+}
+def s20imm64 : Operand<i64> {
+  let PrintMethod = "printS20ImmOperand";
+}
+// Signed i32
+def s32imm : Operand<i32> {
+  let PrintMethod = "printS32ImmOperand";
+}
+def s32imm64 : Operand<i64> {
+  let PrintMethod = "printS32ImmOperand";
+}
+// Unsigned i32
+def u32imm : Operand<i32> {
+  let PrintMethod = "printU32ImmOperand";
+}
+def u32imm64 : Operand<i64> {
+  let PrintMethod = "printU32ImmOperand";
+}
+
+def imm_pcrel : Operand<i64> {
+  let PrintMethod = "printPCRelImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// SystemZ Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+
+// riaddr := reg + imm
+def riaddr32 : Operand<i64>,
+               ComplexPattern<i64, 2, "SelectAddrRI12Only", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp);
+}
+
+def riaddr12 : Operand<i64>,
+               ComplexPattern<i64, 2, "SelectAddrRI12", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp);
+}
+
+def riaddr : Operand<i64>,
+             ComplexPattern<i64, 2, "SelectAddrRI", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp);
+}
+
+//===----------------------------------------------------------------------===//
+
+// rriaddr := reg + reg + imm
+def rriaddr12 : Operand<i64>,
+                ComplexPattern<i64, 3, "SelectAddrRRI12", [], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index);
+}
+def rriaddr : Operand<i64>,
+              ComplexPattern<i64, 3, "SelectAddrRRI20", [], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
+def laaddr : Operand<i64>,
+             ComplexPattern<i64, 3, "SelectLAAddr", [add, sub, or, frameindex], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 000000000000..59692e883366
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,158 @@
+//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+                                         const SystemZInstrInfo &tii)
+  : SystemZGenRegisterInfo(), TM(tm), TII(tii) {
+}
+
+const unsigned*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const unsigned CalleeSavedRegs[] = {
+    SystemZ::R6D,  SystemZ::R7D,  SystemZ::R8D,  SystemZ::R9D,
+    SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+    SystemZ::R14D, SystemZ::R15D,
+    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF)) {
+    // R11D is the frame pointer. Reserve all aliases.
+    Reserved.set(SystemZ::R11D);
+    Reserved.set(SystemZ::R11W);
+    Reserved.set(SystemZ::R10P);
+    Reserved.set(SystemZ::R10Q);
+  }
+
+  Reserved.set(SystemZ::R14D);
+  Reserved.set(SystemZ::R15D);
+  Reserved.set(SystemZ::R14W);
+  Reserved.set(SystemZ::R15W);
+  Reserved.set(SystemZ::R14P);
+  Reserved.set(SystemZ::R14Q);
+  return Reserved;
+}
+
+const TargetRegisterClass*
+SystemZRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                              const TargetRegisterClass *B,
+                                              unsigned Idx) const {
+  switch(Idx) {
+  // Exact sub-classes don't exist for the other sub-register indexes.
+  default: return 0;
+  case SystemZ::subreg_32bit:
+    if (B == SystemZ::ADDR32RegisterClass)
+      return A->getSize() == 8 ? SystemZ::ADDR64RegisterClass : 0;
+    return A;
+  }
+}
+
+void SystemZRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MBB.erase(I);
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                         int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unxpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+
+  // This must be part of a rri or ri operand memory reference.  Replace the
+  // FrameIndex with base register with BasePtr.  Add an offset to the
+  // displacement field.
+  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+  // Offset is a either 12-bit unsigned or 20-bit signed integer.
+  // FIXME: handle "too long" displacements.
+  int Offset =
+    TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+
+  // Check whether displacement is too long to fit into 12 bit zext field.
+  MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
+
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+unsigned SystemZRegisterInfo::getRARegister() const {
+  assert(0 && "What is the return address register");
+  return 0;
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  assert(0 && "What is the frame register");
+  return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHExceptionRegister() const {
+  assert(0 && "What is the exception register");
+  return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
+  assert(0 && "What is the exception handler register");
+  return 0;
+}
+
+int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  assert(0 && "What is the dwarf register number");
+  return -1;
+}
+
+int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+  assert(0 && "What is the dwarf register number");
+  return -1;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 000000000000..2e262e1acc30
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,64 @@
+//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
+
+namespace llvm {
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+class Type;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+  SystemZTargetMachine &TM;
+  const SystemZInstrInfo &TII;
+
+  SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  const TargetRegisterClass*
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 000000000000..a24cbcf4ccd8
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,205 @@
+//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+  let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  let Namespace = "SystemZ";
+}
+
+// We identify all our registers with a 4-bit ID, for consistency's sake.
+
+// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<4> num, string n> : SystemZReg<n> {
+  field bits<4> Num = num;
+}
+
+// GPR64 - One of the 16 64-bit general-purpose registers
+class GPR64<bits<4> num, string n, list<Register> subregs,
+            list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+  let Aliases = aliases;
+}
+
+// GPR128 - 8 even-odd register pairs
+class GPR128<bits<4> num, string n, list<Register> subregs,
+             list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+  let Aliases = aliases;
+}
+
+// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPRS<bits<4> num, string n> : SystemZReg<n> {
+  field bits<4> Num = num;
+}
+
+// FPRL - One of the 16 64-bit floating-point registers
+class FPRL<bits<4> num, string n, list<Register> subregs>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+}
+
+let Namespace = "SystemZ" in {
+def subreg_32bit  : SubRegIndex;
+def subreg_odd32  : SubRegIndex;
+def subreg_even   : SubRegIndex;
+def subreg_odd    : SubRegIndex;
+}
+
+// General-purpose registers
+def R0W  : GPR32< 0,  "r0">;
+def R1W  : GPR32< 1,  "r1">;
+def R2W  : GPR32< 2,  "r2">;
+def R3W  : GPR32< 3,  "r3">;
+def R4W  : GPR32< 4,  "r4">;
+def R5W  : GPR32< 5,  "r5">;
+def R6W  : GPR32< 6,  "r6">;
+def R7W  : GPR32< 7,  "r7">;
+def R8W  : GPR32< 8,  "r8">;
+def R9W  : GPR32< 9,  "r9">;
+def R10W : GPR32<10, "r10">;
+def R11W : GPR32<11, "r11">;
+def R12W : GPR32<12, "r12">;
+def R13W : GPR32<13, "r13">;
+def R14W : GPR32<14, "r14">;
+def R15W : GPR32<15, "r15">;
+
+let SubRegIndices = [subreg_32bit] in {
+def R0D  : GPR64< 0,  "r0", [R0W]>,  DwarfRegNum<[0]>;
+def R1D  : GPR64< 1,  "r1", [R1W]>,  DwarfRegNum<[1]>;
+def R2D  : GPR64< 2,  "r2", [R2W]>,  DwarfRegNum<[2]>;
+def R3D  : GPR64< 3,  "r3", [R3W]>,  DwarfRegNum<[3]>;
+def R4D  : GPR64< 4,  "r4", [R4W]>,  DwarfRegNum<[4]>;
+def R5D  : GPR64< 5,  "r5", [R5W]>,  DwarfRegNum<[5]>;
+def R6D  : GPR64< 6,  "r6", [R6W]>,  DwarfRegNum<[6]>;
+def R7D  : GPR64< 7,  "r7", [R7W]>,  DwarfRegNum<[7]>;
+def R8D  : GPR64< 8,  "r8", [R8W]>,  DwarfRegNum<[8]>;
+def R9D  : GPR64< 9,  "r9", [R9W]>,  DwarfRegNum<[9]>;
+def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>;
+def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>;
+def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
+def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
+def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
+def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+}
+
+// Register pairs
+let SubRegIndices = [subreg_32bit, subreg_odd32] in {
+def R0P  : GPR64< 0,  "r0", [R0W,  R1W],  [R0D,  R1D]>;
+def R2P  : GPR64< 2,  "r2", [R2W,  R3W],  [R2D,  R3D]>;
+def R4P  : GPR64< 4,  "r4", [R4W,  R5W],  [R4D,  R5D]>;
+def R6P  : GPR64< 6,  "r6", [R6W,  R7W],  [R6D,  R7D]>;
+def R8P  : GPR64< 8,  "r8", [R8W,  R9W],  [R8D,  R9D]>;
+def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>;
+def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>;
+def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>;
+}
+
+let SubRegIndices = [subreg_even, subreg_odd],
+ CompositeIndices = [(subreg_odd32  subreg_odd,  subreg_32bit)] in {
+def R0Q  : GPR128< 0,  "r0", [R0D,  R1D],  [R0P]>;
+def R2Q  : GPR128< 2,  "r2", [R2D,  R3D],  [R2P]>;
+def R4Q  : GPR128< 4,  "r4", [R4D,  R5D],  [R4P]>;
+def R6Q  : GPR128< 6,  "r6", [R6D,  R7D],  [R6P]>;
+def R8Q  : GPR128< 8,  "r8", [R8D,  R9D],  [R8P]>;
+def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>;
+def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>;
+def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>;
+}
+
+// Floating-point registers
+def F0S  : FPRS< 0,  "f0">, DwarfRegNum<[16]>;
+def F1S  : FPRS< 1,  "f1">, DwarfRegNum<[17]>;
+def F2S  : FPRS< 2,  "f2">, DwarfRegNum<[18]>;
+def F3S  : FPRS< 3,  "f3">, DwarfRegNum<[19]>;
+def F4S  : FPRS< 4,  "f4">, DwarfRegNum<[20]>;
+def F5S  : FPRS< 5,  "f5">, DwarfRegNum<[21]>;
+def F6S  : FPRS< 6,  "f6">, DwarfRegNum<[22]>;
+def F7S  : FPRS< 7,  "f7">, DwarfRegNum<[23]>;
+def F8S  : FPRS< 8,  "f8">, DwarfRegNum<[24]>;
+def F9S  : FPRS< 9,  "f9">, DwarfRegNum<[25]>;
+def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>;
+def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>;
+def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>;
+def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
+def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
+def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
+
+let SubRegIndices = [subreg_32bit] in {
+def F0L  : FPRL< 0,  "f0", [F0S]>;
+def F1L  : FPRL< 1,  "f1", [F1S]>;
+def F2L  : FPRL< 2,  "f2", [F2S]>;
+def F3L  : FPRL< 3,  "f3", [F3S]>;
+def F4L  : FPRL< 4,  "f4", [F4S]>;
+def F5L  : FPRL< 5,  "f5", [F5S]>;
+def F6L  : FPRL< 6,  "f6", [F6S]>;
+def F7L  : FPRL< 7,  "f7", [F7S]>;
+def F8L  : FPRL< 8,  "f8", [F8S]>;
+def F9L  : FPRL< 9,  "f9", [F9S]>;
+def F10L : FPRL<10, "f10", [F10S]>;
+def F11L : FPRL<11, "f11", [F11S]>;
+def F12L : FPRL<12, "f12", [F12S]>;
+def F13L : FPRL<13, "f13", [F13S]>;
+def F14L : FPRL<14, "f14", [F14S]>;
+def F15L : FPRL<15, "f15", [F15S]>;
+}
+
+// Status register
+def PSW : SystemZReg<"psw">;
+
+/// Register classes.
+/// Allocate the callee-saved R6-R12 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW",  0, 5),
+                                                    (sequence "R%uW", 15, 6))>;
+
+/// Registers used to generate address. Everything except R0.
+def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>;
+
+def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD",  0, 5),
+                                                    (sequence "R%uD", 15, 6))> {
+  let SubRegClasses = [(GR32 subreg_32bit)];
+}
+
+def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> {
+  let SubRegClasses = [(ADDR32 subreg_32bit)];
+}
+
+// Even-odd register pairs
+def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P,
+                                                       R12P, R10P, R8P, R6P,
+                                                       R14P)> {
+  let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
+}
+
+def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q,
+                                                        R12Q, R10Q, R8Q, R6Q,
+                                                        R14Q)> {
+  let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
+                       (GR64 subreg_even, subreg_odd)];
+}
+
+def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>;
+
+def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> {
+  let SubRegClasses = [(FP32 subreg_32bit)];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..3eabcd24c598
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZTargetMachine.h"
+using namespace llvm;
+
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 000000000000..1450401d0403
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+  ~SystemZSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 000000000000..b3ed06639758
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,54 @@
+//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "SystemZ.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT, 
+                                   const std::string &CPU,
+                                   const std::string &FS):
+  SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "z9";
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+}
+
+/// True if accessing the GV requires an extra load.
+bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+                                           const TargetMachine& TM,
+                                           bool isDirectCall) const {
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    // Extra load is needed for all externally visible.
+    if (isDirectCall)
+      return false;
+
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return false;
+
+    return true;
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 000000000000..55cfd80002bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,48 @@
+//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
+#define LLVM_TARGET_SystemZ_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetMachine;
+
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
+  bool HasZ10Insts;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  SystemZSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool isZ10() const { return HasZ10Insts; }
+
+  bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+                           bool isDirectCall) const;
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_SystemZ_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 000000000000..48298cc744e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,43 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "SystemZ.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+  // Register the target.
+  RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+}
+
+/// SystemZTargetMachine ctor - Create an ILP64 architecture model
+///
+SystemZTargetMachine::SystemZTargetMachine(const Target &T,
+                                           const std::string &TT,
+                                           const std::string &CPU,
+                                           const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
+    DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+               "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+    FrameLowering(Subtarget) {
+
+  if (getRelocationModel() == Reloc::Default)
+    setRelocationModel(Reloc::Static);
+}
+
+bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createSystemZISelDag(*this, OptLevel));
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 000000000000..e40b556c0c3c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,67 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+
+#include "SystemZInstrInfo.h"
+#include "SystemZISelLowering.h"
+#include "SystemZFrameLowering.h"
+#include "SystemZSelectionDAGInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// SystemZTargetMachine
+///
+class SystemZTargetMachine : public LLVMTargetMachine {
+  SystemZSubtarget        Subtarget;
+  const TargetData        DataLayout;       // Calculates type size & alignment
+  SystemZInstrInfo        InstrInfo;
+  SystemZTargetLowering   TLInfo;
+  SystemZSelectionDAGInfo TSInfo;
+  SystemZFrameLowering    FrameLowering;
+public:
+  SystemZTargetMachine(const Target &T, const std::string &TT,
+                       const std::string &CPU, const std::string &FS);
+
+  virtual const TargetFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const SystemZInstrInfo *getInstrInfo() const  { return &InstrInfo; }
+  virtual const TargetData *getTargetData() const     { return &DataLayout;}
+  virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  virtual const SystemZRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const SystemZTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // SystemZTargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 000000000000..8272b1188201
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+  RegisterTarget<Triple::systemz> X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
new file mode 100644
index 000000000000..a42ce548c895
--- /dev/null
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -0,0 +1,102 @@
+//===-- Target.cpp --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMTarget.a, which implements target information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeTarget(PassRegistry &Registry) {
+  initializeTargetDataPass(Registry);
+  initializeTargetLibraryInfoPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+  initializeTarget(*unwrap(R));
+}
+
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
+  return wrap(new TargetData(StringRep));
+}
+
+void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
+  unwrap(PM)->add(new TargetData(*unwrap(TD)));
+}
+
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
+  std::string StringRep = unwrap(TD)->getStringRepresentation();
+  return strdup(StringRep.c_str());
+}
+
+LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
+  return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian;
+}
+
+unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
+  return unwrap(TD)->getPointerSize();
+}
+
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
+  return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
+}
+
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
+}
+
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getTypeStoreSize(unwrap(Ty));
+}
+
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getTypeAllocSize(unwrap(Ty));
+}
+
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
+                                        LLVMValueRef GlobalVar) {
+  return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+}
+
+unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+                             unsigned long long Offset) {
+  const StructType *STy = unwrap<StructType>(StructTy);
+  return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
+}
+
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+                                       unsigned Element) {
+  const StructType *STy = unwrap<StructType>(StructTy);
+  return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
+}
+
+void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
+  delete unwrap(TD);
+}
diff --git a/contrib/llvm/lib/Target/TargetAsmInfo.cpp b/contrib/llvm/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 000000000000..a97b0e868989
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,23 @@
+//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
+  TLOF = &TM.getTargetLowering()->getObjFileLowering();
+  TFI = TM.getFrameLowering();
+  TRI = TM.getRegisterInfo();
+  TFI->getInitialFrameState(InitialFrameState);
+}
diff --git a/contrib/llvm/lib/Target/TargetAsmLexer.cpp b/contrib/llvm/lib/Target/TargetAsmLexer.cpp
new file mode 100644
index 000000000000..d4893ff2521e
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetAsmLexer.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/Target/TargetAsmLexer.cpp - Target Assembly Lexer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmLexer.h"
+using namespace llvm;
+
+TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {}
+TargetAsmLexer::~TargetAsmLexer() {}
diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp
new file mode 100644
index 000000000000..17d022a339e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetData.cpp
@@ -0,0 +1,589 @@
+//===-- TargetData.cpp - Data size & alignment routines --------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&.  None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetData.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+
+// Register the default SparcV9 implementation...
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
+char TargetData::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+  assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
+  StructAlignment = 0;
+  StructSize = 0;
+  NumElements = ST->getNumElements();
+
+  // Loop over each of the elements, placing them in memory.
+  for (unsigned i = 0, e = NumElements; i != e; ++i) {
+    const Type *Ty = ST->getElementType(i);
+    unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+    // Add padding if necessary to align the data element properly.
+    if ((StructSize & (TyAlign-1)) != 0)
+      StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign);
+
+    // Keep track of maximum alignment constraint.
+    StructAlignment = std::max(TyAlign, StructAlignment);
+
+    MemberOffsets[i] = StructSize;
+    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+  }
+
+  // Empty structures have alignment of 1 byte.
+  if (StructAlignment == 0) StructAlignment = 1;
+
+  // Add padding to the end of the struct so that it could be put in an array
+  // and all array elements would be aligned correctly.
+  if ((StructSize & (StructAlignment-1)) != 0)
+    StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+  const uint64_t *SI =
+    std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+  assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+  --SI;
+  assert(*SI <= Offset && "upper_bound didn't work");
+  assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+         (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+         "Upper bound didn't work!");
+
+  // Multiple fields can have the same offset if any of them are zero sized.
+  // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+  // at the i32 element, because it is the last element at that offset.  This is
+  // the right one to return, because anything after it will have a higher
+  // offset, implying that this element is non-empty.
+  return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetAlignElem, TargetAlign support
+//===----------------------------------------------------------------------===//
+
+TargetAlignElem
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+                     unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  TargetAlignElem retval;
+  retval.AlignType = align_type;
+  retval.ABIAlign = abi_align;
+  retval.PrefAlign = pref_align;
+  retval.TypeBitWidth = bit_width;
+  return retval;
+}
+
+bool
+TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
+  return (AlignType == rhs.AlignType
+          && ABIAlign == rhs.ABIAlign
+          && PrefAlign == rhs.PrefAlign
+          && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const TargetAlignElem TargetData::InvalidAlignmentElem =
+                TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+//                       TargetData Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// getInt - Get an integer ignoring errors.
+static unsigned getInt(StringRef R) {
+  unsigned Result = 0;
+  R.getAsInteger(10, Result);
+  return Result;
+}
+
+void TargetData::init(StringRef Desc) {
+  initializeTargetDataPass(*PassRegistry::getPassRegistry());
+  
+  LayoutMap = 0;
+  LittleEndian = false;
+  PointerMemSize = 8;
+  PointerABIAlign = 8;
+  PointerPrefAlign = PointerABIAlign;
+
+  // Default alignments
+  setAlignment(INTEGER_ALIGN,   1,  1, 1);   // i1
+  setAlignment(INTEGER_ALIGN,   1,  1, 8);   // i8
+  setAlignment(INTEGER_ALIGN,   2,  2, 16);  // i16
+  setAlignment(INTEGER_ALIGN,   4,  4, 32);  // i32
+  setAlignment(INTEGER_ALIGN,   4,  8, 64);  // i64
+  setAlignment(FLOAT_ALIGN,     4,  4, 32);  // float
+  setAlignment(FLOAT_ALIGN,     8,  8, 64);  // double
+  setAlignment(VECTOR_ALIGN,    8,  8, 64);  // v2i32, v1i64, ...
+  setAlignment(VECTOR_ALIGN,   16, 16, 128); // v16i8, v8i16, v4i32, ...
+  setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
+
+  while (!Desc.empty()) {
+    std::pair<StringRef, StringRef> Split = Desc.split('-');
+    StringRef Token = Split.first;
+    Desc = Split.second;
+
+    if (Token.empty())
+      continue;
+
+    Split = Token.split(':');
+    StringRef Specifier = Split.first;
+    Token = Split.second;
+
+    assert(!Specifier.empty() && "Can't be empty here");
+
+    switch (Specifier[0]) {
+    case 'E':
+      LittleEndian = false;
+      break;
+    case 'e':
+      LittleEndian = true;
+      break;
+    case 'p':
+      Split = Token.split(':');
+      PointerMemSize = getInt(Split.first) / 8;
+      Split = Split.second.split(':');
+      PointerABIAlign = getInt(Split.first) / 8;
+      Split = Split.second.split(':');
+      PointerPrefAlign = getInt(Split.first) / 8;
+      if (PointerPrefAlign == 0)
+        PointerPrefAlign = PointerABIAlign;
+      break;
+    case 'i':
+    case 'v':
+    case 'f':
+    case 'a':
+    case 's': {
+      AlignTypeEnum AlignType;
+      switch (Specifier[0]) {
+      default:
+      case 'i': AlignType = INTEGER_ALIGN; break;
+      case 'v': AlignType = VECTOR_ALIGN; break;
+      case 'f': AlignType = FLOAT_ALIGN; break;
+      case 'a': AlignType = AGGREGATE_ALIGN; break;
+      case 's': AlignType = STACK_ALIGN; break;
+      }
+      unsigned Size = getInt(Specifier.substr(1));
+      Split = Token.split(':');
+      unsigned ABIAlign = getInt(Split.first) / 8;
+
+      Split = Split.second.split(':');
+      unsigned PrefAlign = getInt(Split.first) / 8;
+      if (PrefAlign == 0)
+        PrefAlign = ABIAlign;
+      setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+      break;
+    }
+    case 'n':  // Native integer types.
+      Specifier = Specifier.substr(1);
+      do {
+        if (unsigned Width = getInt(Specifier))
+          LegalIntWidths.push_back(Width);
+        Split = Token.split(':');
+        Specifier = Split.first;
+        Token = Split.second;
+      } while (!Specifier.empty() || !Token.empty());
+      break;
+
+    default:
+      break;
+    }
+  }
+}
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetData::TargetData() : ImmutablePass(ID) {
+  report_fatal_error("Bad TargetData ctor used.  "
+                    "Tool did not specify a TargetData to use?");
+}
+
+TargetData::TargetData(const Module *M)
+  : ImmutablePass(ID) {
+  init(M->getDataLayout());
+}
+
+void
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+                         unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    if (Alignments[i].AlignType == align_type &&
+        Alignments[i].TypeBitWidth == bit_width) {
+      // Update the abi, preferred alignments.
+      Alignments[i].ABIAlign = abi_align;
+      Alignments[i].PrefAlign = pref_align;
+      return;
+    }
+  }
+
+  Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
+                                            pref_align, bit_width));
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the target wants for the specified datatype.
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+                                      uint32_t BitWidth, bool ABIInfo,
+                                      const Type *Ty) const {
+  // Check to see if we have an exact match and remember the best match we see.
+  int BestMatchIdx = -1;
+  int LargestInt = -1;
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    if (Alignments[i].AlignType == AlignType &&
+        Alignments[i].TypeBitWidth == BitWidth)
+      return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+    // The best match so far depends on what we're looking for.
+     if (AlignType == INTEGER_ALIGN &&
+         Alignments[i].AlignType == INTEGER_ALIGN) {
+      // The "best match" for integers is the smallest size that is larger than
+      // the BitWidth requested.
+      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+           Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+        BestMatchIdx = i;
+      // However, if there isn't one that's larger, then we must use the
+      // largest one we have (see below)
+      if (LargestInt == -1 ||
+          Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+        LargestInt = i;
+    }
+  }
+
+  // Okay, we didn't find an exact solution.  Fall back here depending on what
+  // is being looked for.
+  if (BestMatchIdx == -1) {
+    // If we didn't find an integer alignment, fall back on most conservative.
+    if (AlignType == INTEGER_ALIGN) {
+      BestMatchIdx = LargestInt;
+    } else {
+      assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+      // By default, use natural alignment for vector types. This is consistent
+      // with what clang and llvm-gcc do.
+      unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+      Align *= cast<VectorType>(Ty)->getNumElements();
+      // If the alignment is not a power of 2, round up to the next power of 2.
+      // This happens for non-power-of-2 length vectors.
+      if (Align & (Align-1))
+        Align = llvm::NextPowerOf2(Align);
+      return Align;
+    }
+  }
+
+  // Since we got a "best match" index, just return it.
+  return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+                 : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+class StructLayoutMap {
+  typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
+  LayoutInfoTy LayoutInfo;
+
+public:
+  virtual ~StructLayoutMap() {
+    // Remove any layouts.
+    for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
+         I != E; ++I) {
+      StructLayout *Value = I->second;
+      Value->~StructLayout();
+      free(Value);
+    }
+  }
+
+  StructLayout *&operator[](const StructType *STy) {
+    return LayoutInfo[STy];
+  }
+
+  // for debugging...
+  virtual void dump() const {}
+};
+
+} // end anonymous namespace
+
+TargetData::~TargetData() {
+  delete static_cast<StructLayoutMap*>(LayoutMap);
+}
+
+const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+  if (!LayoutMap)
+    LayoutMap = new StructLayoutMap();
+
+  StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+  StructLayout *&SL = (*STM)[Ty];
+  if (SL) return SL;
+
+  // Otherwise, create the struct layout.  Because it is variable length, we
+  // malloc it, then use placement new.
+  int NumElts = Ty->getNumElements();
+  StructLayout *L =
+    (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
+
+  // Set SL before calling StructLayout's ctor.  The ctor could cause other
+  // entries to be added to TheMap, invalidating our reference.
+  SL = L;
+
+  new (L) StructLayout(Ty, *this);
+
+  return L;
+}
+
+std::string TargetData::getStringRepresentation() const {
+  std::string Result;
+  raw_string_ostream OS(Result);
+
+  OS << (LittleEndian ? "e" : "E")
+     << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
+     << ':' << PointerPrefAlign*8;
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    const TargetAlignElem &AI = Alignments[i];
+    OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+       << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+  }
+
+  if (!LegalIntWidths.empty()) {
+    OS << "-n" << (unsigned)LegalIntWidths[0];
+
+    for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+      OS << ':' << (unsigned)LegalIntWidths[i];
+  }
+  return OS.str();
+}
+
+
+uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+  switch (Ty->getTypeID()) {
+  case Type::LabelTyID:
+  case Type::PointerTyID:
+    return getPointerSizeInBits();
+  case Type::ArrayTyID: {
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
+  }
+  case Type::StructTyID:
+    // Get the layout annotation... which is lazily created on demand.
+    return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+  case Type::IntegerTyID:
+    return cast<IntegerType>(Ty)->getBitWidth();
+  case Type::VoidTyID:
+    return 8;
+  case Type::FloatTyID:
+    return 32;
+  case Type::DoubleTyID:
+  case Type::X86_MMXTyID:
+    return 64;
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:
+    return 128;
+  // In memory objects this is always aligned to a higher boundary, but
+  // only 80 bits contain information.
+  case Type::X86_FP80TyID:
+    return 80;
+  case Type::VectorTyID:
+    return cast<VectorType>(Ty)->getBitWidth();
+  default:
+    llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
+    break;
+  }
+  return 0;
+}
+
+/*!
+  \param abi_or_pref Flag that determines which alignment is returned. true
+  returns the ABI alignment, false returns the preferred alignment.
+  \param Ty The underlying type for which alignment is determined.
+
+  Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+  == false) for the requested type \a Ty.
+ */
+unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+  int AlignType = -1;
+
+  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+  switch (Ty->getTypeID()) {
+  // Early escape for the non-numeric types.
+  case Type::LabelTyID:
+  case Type::PointerTyID:
+    return (abi_or_pref
+            ? getPointerABIAlignment()
+            : getPointerPrefAlignment());
+  case Type::ArrayTyID:
+    return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+  case Type::StructTyID: {
+    // Packed structure types always have an ABI alignment of one.
+    if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+      return 1;
+
+    // Get the layout annotation... which is lazily created on demand.
+    const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+    unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+    return std::max(Align, Layout->getAlignment());
+  }
+  case Type::IntegerTyID:
+  case Type::VoidTyID:
+    AlignType = INTEGER_ALIGN;
+    break;
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  // PPC_FP128TyID and FP128TyID have different data contents, but the
+  // same size and alignment, so they look the same here.
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:
+  case Type::X86_FP80TyID:
+    AlignType = FLOAT_ALIGN;
+    break;
+  case Type::X86_MMXTyID:
+  case Type::VectorTyID:
+    AlignType = VECTOR_ALIGN;
+    break;
+  default:
+    llvm_unreachable("Bad type for getAlignment!!!");
+    break;
+  }
+
+  return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+                          abi_or_pref, Ty);
+}
+
+unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
+  return getAlignment(Ty, true);
+}
+
+/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+/// an integer type of the specified bitwidth.
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+  return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+}
+
+
+unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+    if (Alignments[i].AlignType == STACK_ALIGN)
+      return Alignments[i].ABIAlign;
+
+  return getABITypeAlignment(Ty);
+}
+
+unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
+  return getAlignment(Ty, false);
+}
+
+unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+  unsigned Align = getPrefTypeAlignment(Ty);
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+  return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an unsigned integer type that is the same size or
+/// greater to the host pointer size.
+IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
+  return IntegerType::get(C, getPointerSizeInBits());
+}
+
+
+uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
+                                      unsigned NumIndices) const {
+  const Type *Ty = ptrTy;
+  assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
+  uint64_t Result = 0;
+
+  generic_gep_type_iterator<Value* const*>
+    TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
+  for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
+    if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+      assert(Indices[CurIDX]->getType() ==
+             Type::getInt32Ty(ptrTy->getContext()) &&
+             "Illegal struct idx");
+      unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+      // Get structure layout information...
+      const StructLayout *Layout = getStructLayout(STy);
+
+      // Add in the offset, as calculated by the structure layout info...
+      Result += Layout->getElementOffset(FieldNo);
+
+      // Update Ty to refer to current element
+      Ty = STy->getElementType(FieldNo);
+    } else {
+      // Update Ty to refer to current element
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // Get the array index and the size of each array element.
+      if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+        Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
+    }
+  }
+
+  return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global.  This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
+  const Type *ElemType = GV->getType()->getElementType();
+  unsigned Alignment = getPrefTypeAlignment(ElemType);
+  unsigned GVAlignment = GV->getAlignment();
+  if (GVAlignment >= Alignment) {
+    Alignment = GVAlignment;
+  } else if (GVAlignment != 0) {
+    Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
+  }
+
+  if (GV->hasInitializer() && GVAlignment == 0) {
+    if (Alignment < 16) {
+      // If the global is not external, see if it is large.  If so, give it a
+      // larger alignment.
+      if (getTypeSizeInBits(ElemType) > 128)
+        Alignment = 16;    // 16-byte alignment.
+    }
+  }
+  return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form.  This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+  return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
new file mode 100644
index 000000000000..a661ee9c0c65
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
@@ -0,0 +1,25 @@
+//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
+  is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
+}
+
+TargetELFWriterInfo::~TargetELFWriterInfo() {}
+
diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
new file mode 100644
index 000000000000..19fd581c7dd5
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                         const {
+  // Default is to do nothing.
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+    getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                             int FI, unsigned &FrameReg) const {
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // By default, assume all frame indices are referenced via whatever
+  // getFrameRegister() says. The target can override this if it's doing
+  // something different.
+  FrameReg = RI->getFrameRegister(MF);
+  return getFrameIndexOffset(MF, FI);
+}
diff --git a/contrib/llvm/lib/Target/TargetInstrInfo.cpp b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
new file mode 100644
index 000000000000..d52ecb32cf75
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
@@ -0,0 +1,174 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  TargetInstrInfo
+//===----------------------------------------------------------------------===//
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+                             const TargetRegisterInfo *TRI) const {
+  if (OpNum >= MCID.getNumOperands())
+    return 0;
+
+  short RegClass = MCID.OpInfo[OpNum].RegClass;
+  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+    return TRI->getPointerRegClass(RegClass);
+
+  // Instructions like INSERT_SUBREG do not have fixed register classes.
+  if (RegClass < 0)
+    return 0;
+
+  // Otherwise just look it up normally.
+  return TRI->getRegClass(RegClass);
+}
+
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                const MachineInstr *MI) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Class = MI->getDesc().getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
+
+  // The # of u-ops is dynamically determined. The specific target should
+  // override this function to return the right number.
+  return 1;
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  unsigned UseClass = UseMI->getDesc().getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     const MachineInstr *MI,
+                                     unsigned *PredCost) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *DefMI,
+                                       unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+  return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI) const {
+  llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MCID.isBranch() && !MCID.isBarrier())
+    return true;
+  if (!MCID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+                                             const MCAsmInfo &MAI) const {
+
+
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+                                strlen(MAI.getSeparatorString())) == 0)
+      atInsnStart = true;
+    if (atInsnStart && !std::isspace(*Str)) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+
+  return Length;
+}
diff --git a/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
new file mode 100644
index 000000000000..e049a1d3b62f
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
@@ -0,0 +1,30 @@
+//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetIntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+TargetIntrinsicInfo::TargetIntrinsicInfo() {
+}
+
+TargetIntrinsicInfo::~TargetIntrinsicInfo() {
+}
+
+unsigned TargetIntrinsicInfo::getIntrinsicID(Function *F) const {
+  const ValueName *ValName = F->getValueName();
+  if (!ValName)
+    return 0;
+  return lookupName(ValName->getKeyData(), ValName->getKeyLength());
+}
diff --git a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
new file mode 100644
index 000000000000..709dfd283f98
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
@@ -0,0 +1,74 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+// Register the default implementation.
+INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
+                "Target Library Information", false, true)
+char TargetLibraryInfo::ID = 0;
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple.  This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+  initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+
+  
+  // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
+  if (T.isMacOSX()) {
+    if (T.isMacOSXVersionLT(10, 5))
+      TLI.setUnavailable(LibFunc::memset_pattern16);
+  } else if (T.getOS() == Triple::IOS) {
+    if (T.isOSVersionLT(3, 0))
+      TLI.setUnavailable(LibFunc::memset_pattern16);
+  } else {
+    TLI.setUnavailable(LibFunc::memset_pattern16);
+  }
+
+  // iprintf and friends are only available on XCore and TCE.
+  if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+    TLI.setUnavailable(LibFunc::iprintf);
+    TLI.setUnavailable(LibFunc::siprintf);
+    TLI.setUnavailable(LibFunc::fiprintf);
+  }
+}
+
+
+TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+
+  initialize(*this, Triple());
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+  
+  initialize(*this, T);
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
+  : ImmutablePass(ID) {
+  memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+}
+
+
+/// disableAllFunctions - This disables all builtins, which is used for options
+/// like -fno-builtin.
+void TargetLibraryInfo::disableAllFunctions() {
+  memset(AvailableArray, 0, sizeof(AvailableArray));
+}
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
new file mode 100644
index 000000000000..703431b3806e
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -0,0 +1,360 @@
+//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                              Generic Code
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFile::TargetLoweringObjectFile() :
+  Ctx(0),
+  TextSection(0),
+  DataSection(0),
+  BSSSection(0),
+  ReadOnlySection(0),
+  StaticCtorSection(0),
+  StaticDtorSection(0),
+  LSDASection(0),
+  CompactUnwindSection(0),
+  DwarfAbbrevSection(0),
+  DwarfInfoSection(0),
+  DwarfLineSection(0),
+  DwarfFrameSection(0),
+  DwarfPubNamesSection(0),
+  DwarfPubTypesSection(0),
+  DwarfDebugInlineSection(0),
+  DwarfStrSection(0),
+  DwarfLocSection(0),
+  DwarfARangesSection(0),
+  DwarfRangesSection(0),
+  DwarfMacroInfoSection(0),
+  TLSExtraDataSection(0),
+  CommDirectiveSupportsAlignment(true),
+  SupportsWeakOmittedEHFrame(true), 
+  IsFunctionEHFrameSymbolPrivate(true) {
+}
+
+TargetLoweringObjectFile::~TargetLoweringObjectFile() {
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV) {
+  const Constant *C = GV->getInitializer();
+
+  // Must have zero initializer.
+  if (!C->isNullValue())
+    return false;
+
+  // Leave constant zeros in readonly constant sections, so they can be shared.
+  if (GV->isConstant())
+    return false;
+
+  // If the global has an explicit section specified, don't put it in BSS.
+  if (!GV->getSection().empty())
+    return false;
+
+  // If -nozero-initialized-in-bss is specified, don't ever use BSS.
+  if (NoZerosInBSS)
+    return false;
+
+  // Otherwise, put it in BSS!
+  return true;
+}
+
+/// IsNullTerminatedString - Return true if the specified constant (which is
+/// known to have a type that is an array of 1/2/4 byte elements) ends with a
+/// nul value and contains no other nuls in it.
+static bool IsNullTerminatedString(const Constant *C) {
+  const ArrayType *ATy = cast<ArrayType>(C->getType());
+
+  // First check: is we have constant array of i8 terminated with zero
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
+    if (ATy->getNumElements() == 0) return false;
+
+    ConstantInt *Null =
+      dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
+    if (Null == 0 || !Null->isZero())
+      return false; // Not null terminated.
+
+    // Verify that the null doesn't occur anywhere else in the string.
+    for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i)
+      // Reject constantexpr elements etc.
+      if (!isa<ConstantInt>(CVA->getOperand(i)) ||
+          CVA->getOperand(i) == Null)
+        return false;
+    return true;
+  }
+
+  // Another possibility: [1 x i8] zeroinitializer
+  if (isa<ConstantAggregateZero>(C))
+    return ATy->getNumElements() == 1;
+
+  return false;
+}
+
+MCSymbol *TargetLoweringObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI) const {
+  return Mang->getSymbol(GV);
+}
+
+void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
+                                                    const TargetMachine &TM,
+                                                    const MCSymbol *Sym) const {
+}
+
+
+/// getKindForGlobal - This is a top-level target-independent classifier for
+/// a global variable.  Given an global variable and information from TM, it
+/// classifies the global in a variety of ways that make various target
+/// implementations simpler.  The target implementation is free to ignore this
+/// extra info of course.
+SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
+                                                       const TargetMachine &TM){
+  assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() &&
+         "Can only be used for global definitions");
+
+  Reloc::Model ReloModel = TM.getRelocationModel();
+
+  // Early exit - functions should be always in text sections.
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (GVar == 0)
+    return SectionKind::getText();
+
+  // Handle thread-local data first.
+  if (GVar->isThreadLocal()) {
+    if (isSuitableForBSS(GVar))
+      return SectionKind::getThreadBSS();
+    return SectionKind::getThreadData();
+  }
+
+  // Variables with common linkage always get classified as common.
+  if (GVar->hasCommonLinkage())
+    return SectionKind::getCommon();
+
+  // Variable can be easily put to BSS section.
+  if (isSuitableForBSS(GVar)) {
+    if (GVar->hasLocalLinkage())
+      return SectionKind::getBSSLocal();
+    else if (GVar->hasExternalLinkage())
+      return SectionKind::getBSSExtern();
+    return SectionKind::getBSS();
+  }
+
+  const Constant *C = GVar->getInitializer();
+
+  // If the global is marked constant, we can put it into a mergable section,
+  // a mergable string section, or general .data if it contains relocations.
+  if (GVar->isConstant()) {
+    // If the initializer for the global contains something that requires a
+    // relocation, then we may have to drop this into a wriable data section
+    // even though it is marked const.
+    switch (C->getRelocationInfo()) {
+    default: assert(0 && "unknown relocation info kind");
+    case Constant::NoRelocation:
+      // If the global is required to have a unique address, it can't be put
+      // into a mergable section: just drop it into the general read-only
+      // section instead.
+      if (!GVar->hasUnnamedAddr())
+        return SectionKind::getReadOnly();
+        
+      // If initializer is a null-terminated string, put it in a "cstring"
+      // section of the right width.
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+        if (const IntegerType *ITy =
+              dyn_cast<IntegerType>(ATy->getElementType())) {
+          if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
+               ITy->getBitWidth() == 32) &&
+              IsNullTerminatedString(C)) {
+            if (ITy->getBitWidth() == 8)
+              return SectionKind::getMergeable1ByteCString();
+            if (ITy->getBitWidth() == 16)
+              return SectionKind::getMergeable2ByteCString();
+
+            assert(ITy->getBitWidth() == 32 && "Unknown width");
+            return SectionKind::getMergeable4ByteCString();
+          }
+        }
+      }
+
+      // Otherwise, just drop it into a mergable constant section.  If we have
+      // a section for this size, use it, otherwise use the arbitrary sized
+      // mergable section.
+      switch (TM.getTargetData()->getTypeAllocSize(C->getType())) {
+      case 4:  return SectionKind::getMergeableConst4();
+      case 8:  return SectionKind::getMergeableConst8();
+      case 16: return SectionKind::getMergeableConst16();
+      default: return SectionKind::getMergeableConst();
+      }
+
+    case Constant::LocalRelocation:
+      // In static relocation model, the linker will resolve all addresses, so
+      // the relocation entries will actually be constants by the time the app
+      // starts up.  However, we can't put this into a mergable section, because
+      // the linker doesn't take relocations into consideration when it tries to
+      // merge entries in the section.
+      if (ReloModel == Reloc::Static)
+        return SectionKind::getReadOnly();
+
+      // Otherwise, the dynamic linker needs to fix it up, put it in the
+      // writable data.rel.local section.
+      return SectionKind::getReadOnlyWithRelLocal();
+
+    case Constant::GlobalRelocations:
+      // In static relocation model, the linker will resolve all addresses, so
+      // the relocation entries will actually be constants by the time the app
+      // starts up.  However, we can't put this into a mergable section, because
+      // the linker doesn't take relocations into consideration when it tries to
+      // merge entries in the section.
+      if (ReloModel == Reloc::Static)
+        return SectionKind::getReadOnly();
+
+      // Otherwise, the dynamic linker needs to fix it up, put it in the
+      // writable data.rel section.
+      return SectionKind::getReadOnlyWithRel();
+    }
+  }
+
+  // Okay, this isn't a constant.  If the initializer for the global is going
+  // to require a runtime relocation by the dynamic linker, put it into a more
+  // specific section to improve startup time of the app.  This coalesces these
+  // globals together onto fewer pages, improving the locality of the dynamic
+  // linker.
+  if (ReloModel == Reloc::Static)
+    return SectionKind::getDataNoRel();
+
+  switch (C->getRelocationInfo()) {
+  default: assert(0 && "unknown relocation info kind");
+  case Constant::NoRelocation:
+    return SectionKind::getDataNoRel();
+  case Constant::LocalRelocation:
+    return SectionKind::getDataRelLocal();
+  case Constant::GlobalRelocations:
+    return SectionKind::getDataRel();
+  }
+}
+
+/// SectionForGlobal - This method computes the appropriate section to emit
+/// the specified global variable or function definition.  This should not
+/// be passed external (or available externally) globals.
+const MCSection *TargetLoweringObjectFile::
+SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang,
+                 const TargetMachine &TM) const {
+  // Select section name.
+  if (GV->hasSection())
+    return getExplicitSectionGlobal(GV, Kind, Mang, TM);
+
+
+  // Use default section depending on the 'type' of global
+  return SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+
+// Lame default implementation. Calculate the section name for global.
+const MCSection *
+TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                                 SectionKind Kind,
+                                                 Mangler *Mang,
+                                                 const TargetMachine &TM) const{
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  if (Kind.isText())
+    return getTextSection();
+
+  if (Kind.isBSS() && BSSSection != 0)
+    return BSSSection;
+
+  if (Kind.isReadOnly() && ReadOnlySection != 0)
+    return ReadOnlySection;
+
+  return getDataSection();
+}
+
+/// getSectionForConstant - Given a mergable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *
+TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isReadOnly() && ReadOnlySection != 0)
+    return ReadOnlySection;
+
+  return DataSection;
+}
+
+/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
+/// reference to the specified global variable from exception
+/// handling information.
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
+  const MCSymbol *Sym = Mang->getSymbol(GV);
+  return getExprForDwarfReference(Sym, Encoding, Streamer);
+}
+
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
+                         MCStreamer &Streamer) const {
+  const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
+
+  switch (Encoding & 0x70) {
+  default:
+    report_fatal_error("We do not support this DWARF encoding yet!");
+  case dwarf::DW_EH_PE_absptr:
+    // Do nothing special
+    return Res;
+  case dwarf::DW_EH_PE_pcrel: {
+    // Emit a label to the streamer for the current position.  This gives us
+    // .-foo addressing.
+    MCSymbol *PCSym = getContext().CreateTempSymbol();
+    Streamer.EmitLabel(PCSym);
+    const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+    return MCBinaryExpr::CreateSub(Res, PC, getContext());
+  }
+  }
+}
+
+unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
new file mode 100644
index 000000000000..74a1f4e8da56
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -0,0 +1,315 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+  bool LessPreciseFPMADOption;
+  bool PrintMachineCode;
+  bool NoFramePointerElim;
+  bool NoFramePointerElimNonLeaf;
+  bool NoExcessFPPrecision;
+  bool UnsafeFPMath;
+  bool NoInfsFPMath;
+  bool NoNaNsFPMath;
+  bool HonorSignDependentRoundingFPMathOption;
+  bool UseSoftFloat;
+  FloatABI::ABIType FloatABIType;
+  bool NoImplicitFloat;
+  bool NoZerosInBSS;
+  bool JITExceptionHandling;
+  bool JITEmitDebugInfo;
+  bool JITEmitDebugInfoToDisk;
+  Reloc::Model RelocationModel;
+  CodeModel::Model CMModel;
+  bool GuaranteedTailCallOpt;
+  unsigned StackAlignmentOverride;
+  bool RealignStack;
+  bool DisableJumpTables;
+  bool StrongPHIElim;
+  bool HasDivModLibcall;
+  bool AsmVerbosityDefault(false);
+}
+
+static cl::opt<bool, true>
+PrintCode("print-machineinstrs",
+  cl::desc("Print generated machine code"),
+  cl::location(PrintMachineCode), cl::init(false));
+static cl::opt<bool, true>
+DisableFPElim("disable-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization"),
+  cl::location(NoFramePointerElim),
+  cl::init(false));
+static cl::opt<bool, true>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::location(NoFramePointerElimNonLeaf),
+  cl::init(false));
+static cl::opt<bool, true>
+DisableExcessPrecision("disable-excess-fp-precision",
+  cl::desc("Disable optimizations that may increase FP precision"),
+  cl::location(NoExcessFPPrecision),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableFPMAD("enable-fp-mad",
+  cl::desc("Enable less precise MAD instructions to be generated"),
+  cl::location(LessPreciseFPMADOption),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+  cl::desc("Enable optimizations that may decrease FP precision"),
+  cl::location(UnsafeFPMath),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+  cl::desc("Enable FP math optimizations that assume no +-Infs"),
+  cl::location(NoInfsFPMath),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+  cl::desc("Enable FP math optimizations that assume no NaNs"),
+  cl::location(NoNaNsFPMath),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+  cl::Hidden,
+  cl::desc("Force codegen to assume rounding mode can change dynamically"),
+  cl::location(HonorSignDependentRoundingFPMathOption),
+  cl::init(false));
+static cl::opt<bool, true>
+GenerateSoftFloatCalls("soft-float",
+  cl::desc("Generate software floating point library calls"),
+  cl::location(UseSoftFloat),
+  cl::init(false));
+static cl::opt<llvm::FloatABI::ABIType, true>
+FloatABIForCalls("float-abi",
+  cl::desc("Choose float ABI type"),
+  cl::location(FloatABIType),
+  cl::init(FloatABI::Default),
+  cl::values(
+    clEnumValN(FloatABI::Default, "default",
+               "Target default float ABI type"),
+    clEnumValN(FloatABI::Soft, "soft",
+               "Soft float ABI (implied by -soft-float)"),
+    clEnumValN(FloatABI::Hard, "hard",
+               "Hard float ABI (uses FP registers)"),
+    clEnumValEnd));
+static cl::opt<bool, true>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+  cl::desc("Don't place zero-initialized symbols into bss section"),
+  cl::location(NoZerosInBSS),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableJITExceptionHandling("jit-enable-eh",
+  cl::desc("Emit exception handling information"),
+  cl::location(JITExceptionHandling),
+  cl::init(false));
+// In debug builds, make this default to true.
+#ifdef NDEBUG
+#define EMIT_DEBUG false
+#else
+#define EMIT_DEBUG true
+#endif
+static cl::opt<bool, true>
+EmitJitDebugInfo("jit-emit-debug",
+  cl::desc("Emit debug information to debugger"),
+  cl::location(JITEmitDebugInfo),
+  cl::init(EMIT_DEBUG));
+#undef EMIT_DEBUG
+static cl::opt<bool, true>
+EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
+  cl::Hidden,
+  cl::desc("Emit debug info objfiles to disk"),
+  cl::location(JITEmitDebugInfoToDisk),
+  cl::init(false));
+
+static cl::opt<llvm::Reloc::Model, true>
+DefRelocationModel("relocation-model",
+  cl::desc("Choose relocation model"),
+  cl::location(RelocationModel),
+  cl::init(Reloc::Default),
+  cl::values(
+    clEnumValN(Reloc::Default, "default",
+               "Target default relocation model"),
+    clEnumValN(Reloc::Static, "static",
+               "Non-relocatable code"),
+    clEnumValN(Reloc::PIC_, "pic",
+               "Fully relocatable, position independent code"),
+    clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+               "Relocatable external references, non-relocatable code"),
+    clEnumValEnd));
+static cl::opt<llvm::CodeModel::Model, true>
+DefCodeModel("code-model",
+  cl::desc("Choose code model"),
+  cl::location(CMModel),
+  cl::init(CodeModel::Default),
+  cl::values(
+    clEnumValN(CodeModel::Default, "default",
+               "Target default code model"),
+    clEnumValN(CodeModel::Small, "small",
+               "Small code model"),
+    clEnumValN(CodeModel::Kernel, "kernel",
+               "Kernel code model"),
+    clEnumValN(CodeModel::Medium, "medium",
+               "Medium code model"),
+    clEnumValN(CodeModel::Large, "large",
+               "Large code model"),
+    clEnumValEnd));
+static cl::opt<bool, true>
+EnableGuaranteedTailCallOpt("tailcallopt",
+  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+  cl::location(GuaranteedTailCallOpt),
+  cl::init(false));
+static cl::opt<unsigned, true>
+OverrideStackAlignment("stack-alignment",
+  cl::desc("Override default stack alignment"),
+  cl::location(StackAlignmentOverride),
+  cl::init(0));
+static cl::opt<bool, true>
+EnableRealignStack("realign-stack",
+  cl::desc("Realign stack if needed"),
+  cl::location(RealignStack),
+  cl::init(true));
+static cl::opt<bool, true>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables", 
+  cl::desc("Do not generate jump tables."),
+  cl::location(DisableJumpTables),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
+  cl::desc("Use strong PHI elimination."),
+  cl::location(StrongPHIElim),
+  cl::init(false));
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+  cl::desc("Emit a call to trap function rather than a trap instruction"),
+  cl::init(""));
+static cl::opt<bool>
+DataSections("fdata-sections",
+  cl::desc("Emit data into separate sections"),
+  cl::init(false));
+static cl::opt<bool>
+FunctionSections("ffunction-sections",
+  cl::desc("Emit functions into separate sections"),
+  cl::init(false));
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::TargetMachine(const Target &T,
+                             StringRef TT, StringRef CPU, StringRef FS)
+  : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0),
+    MCRelaxAll(false),
+    MCNoExecStack(false),
+    MCSaveTempLabels(false),
+    MCUseLoc(true),
+    MCUseCFI(true) {
+  // Typically it will be subtargets that will adjust FloatABIType from Default
+  // to Soft or Hard.
+  if (UseSoftFloat)
+    FloatABIType = FloatABI::Soft;
+}
+
+TargetMachine::~TargetMachine() {
+  delete AsmInfo;
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() {
+  return RelocationModel;
+}
+
+/// setRelocationModel - Sets the code generation relocation model.
+void TargetMachine::setRelocationModel(Reloc::Model Model) {
+  RelocationModel = Model;
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() {
+  return CMModel;
+}
+
+/// setCodeModel - Sets the code model.
+void TargetMachine::setCodeModel(CodeModel::Model Model) {
+  CMModel = Model;
+}
+
+bool TargetMachine::getAsmVerbosityDefault() {
+  return AsmVerbosityDefault;
+}
+
+void TargetMachine::setAsmVerbosityDefault(bool V) {
+  AsmVerbosityDefault = V;
+}
+
+bool TargetMachine::getFunctionSections() {
+  return FunctionSections;
+}
+
+bool TargetMachine::getDataSections() {
+  return DataSections;
+}
+
+void TargetMachine::setFunctionSections(bool V) {
+  FunctionSections = V;
+}
+
+void TargetMachine::setDataSections(bool V) {
+  DataSections = V;
+}
+
+namespace llvm {
+  /// DisableFramePointerElim - This returns true if frame pointer elimination
+  /// optimization should be disabled for the given machine function.
+  bool DisableFramePointerElim(const MachineFunction &MF) {
+    // Check to see if we should eliminate non-leaf frame pointers and then
+    // check to see if we should eliminate all frame pointers.
+    if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+      const MachineFrameInfo *MFI = MF.getFrameInfo();
+      return MFI->hasCalls();
+    }
+
+    return NoFramePointerElim;
+  }
+
+  /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+  /// is specified on the command line.  When this flag is off(default), the
+  /// code generator is not allowed to generate mad (multiply add) if the
+  /// result is "less precise" than doing those operations individually.
+  bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
+
+  /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+  /// that the rounding mode of the FPU can change from its default.
+  bool HonorSignDependentRoundingFPMath() {
+    return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+  }
+
+  /// getTrapFunctionName - If this returns a non-empty string, this means isel
+  /// should lower Intrinsic::trap to a call to the specified function name
+  /// instead of an ISD::TRAP node.
+  StringRef getTrapFunctionName() {
+    return TrapFuncName;
+  }
+}
diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
new file mode 100644
index 000000000000..90a8f8d8fdcc
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
@@ -0,0 +1,141 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+                             regclass_iterator RCB, regclass_iterator RCE,
+                             const char *const *subregindexnames)
+  : InfoDesc(ID), SubRegIndexNames(subregindexnames),
+    RegClassBegin(RCB), RegClassEnd(RCE) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+  if (!Reg)
+    OS << "%noreg";
+  else if (TargetRegisterInfo::isStackSlot(Reg))
+    OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+  else if (TargetRegisterInfo::isVirtualRegister(Reg))
+    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+  else if (TRI && Reg < TRI->getNumRegs())
+    OS << '%' << TRI->getName(Reg);
+  else
+    OS << "%physreg" << Reg;
+  if (SubIdx) {
+    if (TRI)
+      OS << ':' << TRI->getSubRegIndexName(SubIdx);
+    else
+      OS << ":sub(" << SubIdx << ')';
+  }
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+  assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+  // Pick the most sub register class of the right type that contains
+  // this physreg.
+  const TargetRegisterClass* BestRC = 0;
+  for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+    const TargetRegisterClass* RC = *I;
+    if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+        (!BestRC || BestRC->hasSubClass(RC)))
+      BestRC = RC;
+  }
+
+  assert(BestRC && "Couldn't find the register class");
+  return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+                                   const TargetRegisterClass *RC, BitVector &R){
+  ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+  for (unsigned i = 0; i != Order.size(); ++i)
+    R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+                                          const TargetRegisterClass *RC) const {
+  BitVector Allocatable(getNumRegs());
+  if (RC) {
+    getAllocatableSetForRC(MF, RC, Allocatable);
+  } else {
+    for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+         E = regclass_end(); I != E; ++I)
+      if ((*I)->isAllocatable())
+        getAllocatableSetForRC(MF, *I, Allocatable);
+  }
+
+  // Mask out the reserved registers
+  BitVector Reserved = getReservedRegs(MF);
+  Allocatable &= Reserved.flip();
+
+  return Allocatable;
+}
+
+const TargetRegisterClass *
+llvm::getCommonSubClass(const TargetRegisterClass *A,
+                        const TargetRegisterClass *B) {
+  // First take care of the trivial cases
+  if (A == B)
+    return A;
+  if (!A || !B)
+    return 0;
+
+  // If B is a subclass of A, it will be handled in the loop below
+  if (B->hasSubClass(A))
+    return A;
+
+  const TargetRegisterClass *Best = 0;
+  for (TargetRegisterClass::sc_iterator I = A->subclasses_begin();
+       const TargetRegisterClass *X = *I; ++I) {
+    if (X == B)
+      return B;                 // B is a subclass of A
+
+    // X must be a common subclass of A and B
+    if (!B->hasSubClass(X))
+      continue;
+
+    // A superclass is definitely better.
+    if (!Best || Best->hasSuperClass(X)) {
+      Best = X;
+      continue;
+    }
+
+    // A subclass is definitely worse
+    if (Best->hasSubClass(X))
+      continue;
+
+    // Best and *I have no super/sub class relation - pick the larger class, or
+    // the smaller spill size.
+    int nb = std::distance(Best->begin(), Best->end());
+    int ni = std::distance(X->begin(), X->end());
+    if (ni>nb || (ni==nb && X->getSize() < Best->getSize()))
+      Best = X;
+  }
+  return Best;
+}
diff --git a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
new file mode 100644
index 000000000000..59ffdea00ea6
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
@@ -0,0 +1,33 @@
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtargetInfo Class
+//
+TargetSubtargetInfo::TargetSubtargetInfo() {}
+
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
+
+bool TargetSubtargetInfo::enablePostRAScheduler(
+          CodeGenOpt::Level OptLevel,
+          AntiDepBreakMode& Mode,
+          RegClassVector& CriticalPathRCs) const {
+  Mode = ANTIDEP_NONE;
+  CriticalPathRCs.clear();
+  return false;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
new file mode 100644
index 000000000000..ec73087a3305
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -0,0 +1,165 @@
+//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "X86.h"
+
+using namespace llvm;
+
+namespace {
+  
+class X86AsmLexer : public TargetAsmLexer {
+  const MCAsmInfo &AsmInfo;
+  
+  bool tentativeIsValid;
+  AsmToken tentativeToken;
+  
+  const AsmToken &lexTentative() {
+    tentativeToken = getLexer()->Lex();
+    tentativeIsValid = true;
+    return tentativeToken;
+  }
+  
+  const AsmToken &lexDefinite() {
+    if (tentativeIsValid) {
+      tentativeIsValid = false;
+      return tentativeToken;
+    }
+    return getLexer()->Lex();
+  }
+  
+  AsmToken LexTokenATT();
+  AsmToken LexTokenIntel();
+protected:
+  AsmToken LexToken() {
+    if (!Lexer) {
+      SetError(SMLoc(), "No MCAsmLexer installed");
+      return AsmToken(AsmToken::Error, "", 0);
+    }
+    
+    switch (AsmInfo.getAssemblerDialect()) {
+    default:
+      SetError(SMLoc(), "Unhandled dialect");
+      return AsmToken(AsmToken::Error, "", 0);
+    case 0:
+      return LexTokenATT();
+    case 1:
+      return LexTokenIntel();
+    }
+  }
+public:
+  X86AsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
+  }
+};
+
+} // end anonymous namespace
+
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
+
+AsmToken X86AsmLexer::LexTokenATT() {
+  AsmToken lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return lexedToken;
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return lexedToken;
+      
+  case AsmToken::Percent: {
+    const AsmToken &nextToken = lexTentative();
+    if (nextToken.getKind() != AsmToken::Identifier)
+      return lexedToken;
+
+      
+    if (unsigned regID = MatchRegisterName(nextToken.getString())) {
+      lexDefinite();
+        
+      // FIXME: This is completely wrong when there is a space or other
+      // punctuation between the % and the register name.
+      StringRef regStr(lexedToken.getString().data(),
+                       lexedToken.getString().size() + 
+                       nextToken.getString().size());
+      
+      return AsmToken(AsmToken::Register, regStr, 
+                      static_cast<int64_t>(regID));
+    }
+    
+    // Match register name failed.  If this is "db[0-7]", match it as an alias
+    // for dr[0-7].
+    if (nextToken.getString().size() == 3 &&
+        nextToken.getString().startswith("db")) {
+      int RegNo = -1;
+      switch (nextToken.getString()[2]) {
+      case '0': RegNo = X86::DR0; break;
+      case '1': RegNo = X86::DR1; break;
+      case '2': RegNo = X86::DR2; break;
+      case '3': RegNo = X86::DR3; break;
+      case '4': RegNo = X86::DR4; break;
+      case '5': RegNo = X86::DR5; break;
+      case '6': RegNo = X86::DR6; break;
+      case '7': RegNo = X86::DR7; break;
+      }
+      
+      if (RegNo != -1) {
+        lexDefinite();
+
+        // FIXME: This is completely wrong when there is a space or other
+        // punctuation between the % and the register name.
+        StringRef regStr(lexedToken.getString().data(),
+                         lexedToken.getString().size() + 
+                         nextToken.getString().size());
+        return AsmToken(AsmToken::Register, regStr, 
+                        static_cast<int64_t>(RegNo));
+      }
+    }
+      
+   
+    return lexedToken;
+  }    
+  }
+}
+
+AsmToken X86AsmLexer::LexTokenIntel() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch(lexedToken.getKind()) {
+  default:
+    return lexedToken;
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return lexedToken;
+  case AsmToken::Identifier: {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID)
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    return lexedToken;
+  }
+  }
+}
+
+extern "C" void LLVMInitializeX86AsmLexer() {
+  RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
+  RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
+}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
new file mode 100644
index 000000000000..d45dd352fbc4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -0,0 +1,1141 @@
+//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmParser.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+struct X86Operand;
+
+class X86ATTAsmParser : public TargetAsmParser {
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+
+private:
+  MCAsmParser &getParser() const { return Parser; }
+
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  X86Operand *ParseOperand();
+  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+
+  /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+  bool isSrcOp(X86Operand &Op);
+
+  /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
+  /// or %es:(%edi) in 32bit mode.
+  bool isDstOp(X86Operand &Op);
+
+  bool is64BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+  }
+
+  /// @name Auto-generated Matcher Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
+
+  /// }
+
+public:
+  X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+    : TargetAsmParser(), STI(sti), Parser(parser) {
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+} // end anonymous namespace
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+namespace {
+
+/// X86Operand - Instances of this class represent a parsed X86 machine
+/// instruction.
+struct X86Operand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Register,
+    Immediate,
+    Memory
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  union {
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNo;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    struct {
+      unsigned SegReg;
+      const MCExpr *Disp;
+      unsigned BaseReg;
+      unsigned IndexReg;
+      unsigned Scale;
+    } Mem;
+  };
+
+  X86Operand(KindTy K, SMLoc Start, SMLoc End)
+    : Kind(K), StartLoc(Start), EndLoc(End) {}
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  virtual void print(raw_ostream &OS) const {}
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+  void setTokenValue(StringRef Value) {
+    assert(Kind == Token && "Invalid access!");
+    Tok.Data = Value.data();
+    Tok.Length = Value.size();
+  }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNo;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getMemDisp() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Disp;
+  }
+  unsigned getMemSegReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.SegReg;
+  }
+  unsigned getMemBaseReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.BaseReg;
+  }
+  unsigned getMemIndexReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.IndexReg;
+  }
+  unsigned getMemScale() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Scale;
+  }
+
+  bool isToken() const {return Kind == Token; }
+
+  bool isImm() const { return Kind == Immediate; }
+
+  bool isImmSExti16i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti32i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti64i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti64i32() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000007FFFFFFFULL)||
+            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+
+  bool isMem() const { return Kind == Memory; }
+
+  bool isAbsMem() const {
+    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+      !getMemIndexReg() && getMemScale() == 1;
+  }
+
+  bool isReg() const { return Kind == Register; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert((N == 5) && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+    addExpr(Inst, getMemDisp());
+    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+  }
+
+  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
+    assert((N == 1) && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+  }
+
+  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
+    X86Operand *Res = new X86Operand(Token, Loc, Loc);
+    Res->Tok.Data = Str.data();
+    Res->Tok.Length = Str.size();
+    return Res;
+  }
+
+  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
+    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
+    Res->Reg.RegNo = RegNo;
+    return Res;
+  }
+
+  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
+    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
+    Res->Imm.Val = Val;
+    return Res;
+  }
+
+  /// Create an absolute memory operand.
+  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
+                               SMLoc EndLoc) {
+    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+    Res->Mem.SegReg   = 0;
+    Res->Mem.Disp     = Disp;
+    Res->Mem.BaseReg  = 0;
+    Res->Mem.IndexReg = 0;
+    Res->Mem.Scale    = 1;
+    return Res;
+  }
+
+  /// Create a generalized memory operand.
+  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
+                               unsigned BaseReg, unsigned IndexReg,
+                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
+    // We should never just have a displacement, that should be parsed as an
+    // absolute memory operand.
+    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+
+    // The scale should always be one of {1,2,4,8}.
+    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
+           "Invalid scale!");
+    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+    Res->Mem.SegReg   = SegReg;
+    Res->Mem.Disp     = Disp;
+    Res->Mem.BaseReg  = BaseReg;
+    Res->Mem.IndexReg = IndexReg;
+    Res->Mem.Scale    = Scale;
+    return Res;
+  }
+};
+
+} // end anonymous namespace.
+
+bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+  unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
+
+  return (Op.isMem() &&
+    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+    isa<MCConstantExpr>(Op.Mem.Disp) &&
+    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+  unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
+
+  return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+    isa<MCConstantExpr>(Op.Mem.Disp) &&
+    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
+
+bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
+                                    SMLoc &StartLoc, SMLoc &EndLoc) {
+  RegNo = 0;
+  const AsmToken &TokPercent = Parser.getTok();
+  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+  StartLoc = TokPercent.getLoc();
+  Parser.Lex(); // Eat percent token.
+
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return Error(Tok.getLoc(), "invalid register name");
+
+  // FIXME: Validate register for the current architecture; we have to do
+  // validation later, so maybe there is no need for this here.
+  RegNo = MatchRegisterName(Tok.getString());
+
+  // If the match failed, try the register name as lowercase.
+  if (RegNo == 0)
+    RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
+  // FIXME: This should be done using Requires<In32BitMode> and
+  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
+  // can be also checked.
+  if (RegNo == X86::RIZ && !is64BitMode())
+    return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+
+  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
+    RegNo = X86::ST0;
+    EndLoc = Tok.getLoc();
+    Parser.Lex(); // Eat 'st'
+
+    // Check to see if we have '(4)' after %st.
+    if (getLexer().isNot(AsmToken::LParen))
+      return false;
+    // Lex the paren.
+    getParser().Lex();
+
+    const AsmToken &IntTok = Parser.getTok();
+    if (IntTok.isNot(AsmToken::Integer))
+      return Error(IntTok.getLoc(), "expected stack index");
+    switch (IntTok.getIntVal()) {
+    case 0: RegNo = X86::ST0; break;
+    case 1: RegNo = X86::ST1; break;
+    case 2: RegNo = X86::ST2; break;
+    case 3: RegNo = X86::ST3; break;
+    case 4: RegNo = X86::ST4; break;
+    case 5: RegNo = X86::ST5; break;
+    case 6: RegNo = X86::ST6; break;
+    case 7: RegNo = X86::ST7; break;
+    default: return Error(IntTok.getLoc(), "invalid stack index");
+    }
+
+    if (getParser().Lex().isNot(AsmToken::RParen))
+      return Error(Parser.getTok().getLoc(), "expected ')'");
+
+    EndLoc = Tok.getLoc();
+    Parser.Lex(); // Eat ')'
+    return false;
+  }
+
+  // If this is "db[0-7]", match it as an alias
+  // for dr[0-7].
+  if (RegNo == 0 && Tok.getString().size() == 3 &&
+      Tok.getString().startswith("db")) {
+    switch (Tok.getString()[2]) {
+    case '0': RegNo = X86::DR0; break;
+    case '1': RegNo = X86::DR1; break;
+    case '2': RegNo = X86::DR2; break;
+    case '3': RegNo = X86::DR3; break;
+    case '4': RegNo = X86::DR4; break;
+    case '5': RegNo = X86::DR5; break;
+    case '6': RegNo = X86::DR6; break;
+    case '7': RegNo = X86::DR7; break;
+    }
+
+    if (RegNo != 0) {
+      EndLoc = Tok.getLoc();
+      Parser.Lex(); // Eat it.
+      return false;
+    }
+  }
+
+  if (RegNo == 0)
+    return Error(Tok.getLoc(), "invalid register name");
+
+  EndLoc = Tok.getLoc();
+  Parser.Lex(); // Eat identifier token.
+  return false;
+}
+
+X86Operand *X86ATTAsmParser::ParseOperand() {
+  switch (getLexer().getKind()) {
+  default:
+    // Parse a memory operand with no segment register.
+    return ParseMemOperand(0, Parser.getTok().getLoc());
+  case AsmToken::Percent: {
+    // Read the register.
+    unsigned RegNo;
+    SMLoc Start, End;
+    if (ParseRegister(RegNo, Start, End)) return 0;
+    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+      Error(Start, "eiz and riz can only be used as index registers");
+      return 0;
+    }
+
+    // If this is a segment register followed by a ':', then this is the start
+    // of a memory reference, otherwise this is a normal register reference.
+    if (getLexer().isNot(AsmToken::Colon))
+      return X86Operand::CreateReg(RegNo, Start, End);
+
+
+    getParser().Lex(); // Eat the colon.
+    return ParseMemOperand(RegNo, Start);
+  }
+  case AsmToken::Dollar: {
+    // $42 -> immediate.
+    SMLoc Start = Parser.getTok().getLoc(), End;
+    Parser.Lex();
+    const MCExpr *Val;
+    if (getParser().ParseExpression(Val, End))
+      return 0;
+    return X86Operand::CreateImm(Val, Start, End);
+  }
+  }
+}
+
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+
+  // We have to disambiguate a parenthesized expression "(4+5)" from the start
+  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
+  // only way to do this without lookahead is to eat the '(' and see what is
+  // after it.
+  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+  if (getLexer().isNot(AsmToken::LParen)) {
+    SMLoc ExprEnd;
+    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
+
+    // After parsing the base expression we could either have a parenthesized
+    // memory address or not.  If not, return now.  If so, eat the (.
+    if (getLexer().isNot(AsmToken::LParen)) {
+      // Unless we have a segment register, treat this as an immediate.
+      if (SegReg == 0)
+        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
+      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+    }
+
+    // Eat the '('.
+    Parser.Lex();
+  } else {
+    // Okay, we have a '('.  We don't know if this is an expression or not, but
+    // so we have to eat the ( to see beyond it.
+    SMLoc LParenLoc = Parser.getTok().getLoc();
+    Parser.Lex(); // Eat the '('.
+
+    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
+      // Nothing to do here, fall into the code below with the '(' part of the
+      // memory operand consumed.
+    } else {
+      SMLoc ExprEnd;
+
+      // It must be an parenthesized expression, parse it now.
+      if (getParser().ParseParenExpression(Disp, ExprEnd))
+        return 0;
+
+      // After parsing the base expression we could either have a parenthesized
+      // memory address or not.  If not, return now.  If so, eat the (.
+      if (getLexer().isNot(AsmToken::LParen)) {
+        // Unless we have a segment register, treat this as an immediate.
+        if (SegReg == 0)
+          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
+        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+      }
+
+      // Eat the '('.
+      Parser.Lex();
+    }
+  }
+
+  // If we reached here, then we just ate the ( of the memory operand.  Process
+  // the rest of the memory operand.
+  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+
+  if (getLexer().is(AsmToken::Percent)) {
+    SMLoc L;
+    if (ParseRegister(BaseReg, L, L)) return 0;
+    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+      Error(L, "eiz and riz can only be used as index registers");
+      return 0;
+    }
+  }
+
+  if (getLexer().is(AsmToken::Comma)) {
+    Parser.Lex(); // Eat the comma.
+
+    // Following the comma we should have either an index register, or a scale
+    // value. We don't support the later form, but we want to parse it
+    // correctly.
+    //
+    // Not that even though it would be completely consistent to support syntax
+    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+    if (getLexer().is(AsmToken::Percent)) {
+      SMLoc L;
+      if (ParseRegister(IndexReg, L, L)) return 0;
+
+      if (getLexer().isNot(AsmToken::RParen)) {
+        // Parse the scale amount:
+        //  ::= ',' [scale-expression]
+        if (getLexer().isNot(AsmToken::Comma)) {
+          Error(Parser.getTok().getLoc(),
+                "expected comma in scale expression");
+          return 0;
+        }
+        Parser.Lex(); // Eat the comma.
+
+        if (getLexer().isNot(AsmToken::RParen)) {
+          SMLoc Loc = Parser.getTok().getLoc();
+
+          int64_t ScaleVal;
+          if (getParser().ParseAbsoluteExpression(ScaleVal))
+            return 0;
+
+          // Validate the scale amount.
+          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
+            return 0;
+          }
+          Scale = (unsigned)ScaleVal;
+        }
+      }
+    } else if (getLexer().isNot(AsmToken::RParen)) {
+      // A scale amount without an index is ignored.
+      // index.
+      SMLoc Loc = Parser.getTok().getLoc();
+
+      int64_t Value;
+      if (getParser().ParseAbsoluteExpression(Value))
+        return 0;
+
+      if (Value != 1)
+        Warning(Loc, "scale factor without index register is ignored");
+      Scale = 1;
+    }
+  }
+
+  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+  if (getLexer().isNot(AsmToken::RParen)) {
+    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
+    return 0;
+  }
+  SMLoc MemEnd = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat the ')'.
+
+  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+                               MemStart, MemEnd);
+}
+
+bool X86ATTAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  StringRef PatchedName = Name;
+
+  // FIXME: Hack to recognize setneb as setne.
+  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+      PatchedName != "setb" && PatchedName != "setnb")
+    PatchedName = PatchedName.substr(0, Name.size()-1);
+  
+  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+  const MCExpr *ExtraImmOp = 0;
+  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
+      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+    bool IsVCMP = PatchedName.startswith("vcmp");
+    unsigned SSECCIdx = IsVCMP ? 4 : 3;
+    unsigned SSEComparisonCode = StringSwitch<unsigned>(
+      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+      .Case("eq",          0)
+      .Case("lt",          1)
+      .Case("le",          2)
+      .Case("unord",       3)
+      .Case("neq",         4)
+      .Case("nlt",         5)
+      .Case("nle",         6)
+      .Case("ord",         7)
+      .Case("eq_uq",       8)
+      .Case("nge",         9)
+      .Case("ngt",      0x0A)
+      .Case("false",    0x0B)
+      .Case("neq_oq",   0x0C)
+      .Case("ge",       0x0D)
+      .Case("gt",       0x0E)
+      .Case("true",     0x0F)
+      .Case("eq_os",    0x10)
+      .Case("lt_oq",    0x11)
+      .Case("le_oq",    0x12)
+      .Case("unord_s",  0x13)
+      .Case("neq_us",   0x14)
+      .Case("nlt_uq",   0x15)
+      .Case("nle_uq",   0x16)
+      .Case("ord_s",    0x17)
+      .Case("eq_us",    0x18)
+      .Case("nge_uq",   0x19)
+      .Case("ngt_uq",   0x1A)
+      .Case("false_os", 0x1B)
+      .Case("neq_os",   0x1C)
+      .Case("ge_oq",    0x1D)
+      .Case("gt_oq",    0x1E)
+      .Case("true_us",  0x1F)
+      .Default(~0U);
+    if (SSEComparisonCode != ~0U) {
+      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+                                          getParser().getContext());
+      if (PatchedName.endswith("ss")) {
+        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
+      } else if (PatchedName.endswith("sd")) {
+        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
+      } else if (PatchedName.endswith("ps")) {
+        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
+      } else {
+        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+      }
+    }
+  }
+
+  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+
+  if (ExtraImmOp)
+    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
+
+  // Determine whether this is an instruction prefix.
+  bool isPrefix =
+    Name == "lock" || Name == "rep" ||
+    Name == "repe" || Name == "repz" ||
+    Name == "repne" || Name == "repnz" ||
+    Name == "rex64" || Name == "data16";
+
+
+  // This does the actual operand parsing.  Don't parse any more if we have a
+  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+  // just want to parse the "lock" as the first instruction and the "incl" as
+  // the next one.
+  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
+
+    // Parse '*' modifier.
+    if (getLexer().is(AsmToken::Star)) {
+      SMLoc Loc = Parser.getTok().getLoc();
+      Operands.push_back(X86Operand::CreateToken("*", Loc));
+      Parser.Lex(); // Eat the star.
+    }
+
+    // Read the first operand.
+    if (X86Operand *Op = ParseOperand())
+      Operands.push_back(Op);
+    else {
+      Parser.EatToEndOfStatement();
+      return true;
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      if (X86Operand *Op = ParseOperand())
+        Operands.push_back(Op);
+      else {
+        Parser.EatToEndOfStatement();
+        return true;
+      }
+    }
+
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      SMLoc Loc = getLexer().getLoc();
+      Parser.EatToEndOfStatement();
+      return Error(Loc, "unexpected token in argument list");
+    }
+  }
+
+  if (getLexer().is(AsmToken::EndOfStatement))
+    Parser.Lex(); // Consume the EndOfStatement
+  else if (isPrefix && getLexer().is(AsmToken::Slash))
+    Parser.Lex(); // Consume the prefix separator Slash
+
+  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
+  // documented form in various unofficial manuals, so a lot of code uses it.
+  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+      Operands.size() == 3) {
+    X86Operand &Op = *(X86Operand*)Operands.back();
+    if (Op.isMem() && Op.Mem.SegReg == 0 &&
+        isa<MCConstantExpr>(Op.Mem.Disp) &&
+        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+      SMLoc Loc = Op.getEndLoc();
+      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+      delete &Op;
+    }
+  }
+  // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+  if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+      Operands.size() == 3) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    if (Op.isMem() && Op.Mem.SegReg == 0 &&
+        isa<MCConstantExpr>(Op.Mem.Disp) &&
+        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+      SMLoc Loc = Op.getEndLoc();
+      Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+      delete &Op;
+    }
+  }
+  // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+  if (Name.startswith("ins") && Operands.size() == 3 &&
+      (Name == "insb" || Name == "insw" || Name == "insl")) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+
+  // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+  if (Name.startswith("outs") && Operands.size() == 3 &&
+      (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+
+  // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+  if (Name.startswith("movs") && Operands.size() == 3 &&
+      (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+       (is64BitMode() && Name == "movsq"))) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (isSrcOp(Op) && isDstOp(Op2)) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+  // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+  if (Name.startswith("lods") && Operands.size() == 3 &&
+      (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+       Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+    if (isSrcOp(*Op1) && Op2->isReg()) {
+      const char *ins;
+      unsigned reg = Op2->getReg();
+      bool isLods = Name == "lods";
+      if (reg == X86::AL && (isLods || Name == "lodsb"))
+        ins = "lodsb";
+      else if (reg == X86::AX && (isLods || Name == "lodsw"))
+        ins = "lodsw";
+      else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+        ins = "lodsl";
+      else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+        ins = "lodsq";
+      else
+        ins = NULL;
+      if (ins != NULL) {
+        Operands.pop_back();
+        Operands.pop_back();
+        delete Op1;
+        delete Op2;
+        if (Name != ins)
+          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+      }
+    }
+  }
+  // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+  if (Name.startswith("stos") && Operands.size() == 3 &&
+      (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+       Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+    if (isDstOp(*Op2) && Op1->isReg()) {
+      const char *ins;
+      unsigned reg = Op1->getReg();
+      bool isStos = Name == "stos";
+      if (reg == X86::AL && (isStos || Name == "stosb"))
+        ins = "stosb";
+      else if (reg == X86::AX && (isStos || Name == "stosw"))
+        ins = "stosw";
+      else if (reg == X86::EAX && (isStos || Name == "stosl"))
+        ins = "stosl";
+      else if (reg == X86::RAX && (isStos || Name == "stosq"))
+        ins = "stosq";
+      else
+        ins = NULL;
+      if (ins != NULL) {
+        Operands.pop_back();
+        Operands.pop_back();
+        delete Op1;
+        delete Op2;
+        if (Name != ins)
+          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+      }
+    }
+  }
+
+  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
+  // "shift <op>".
+  if ((Name.startswith("shr") || Name.startswith("sar") ||
+       Name.startswith("shl") || Name.startswith("sal") ||
+       Name.startswith("rcl") || Name.startswith("rcr") ||
+       Name.startswith("rol") || Name.startswith("ror")) &&
+      Operands.size() == 3) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+      delete Operands[1];
+      Operands.erase(Operands.begin() + 1);
+    }
+  }
+  
+  // Transforms "int $3" into "int3" as a size optimization.  We can't write an
+  // instalias with an immediate operand yet.
+  if (Name == "int" && Operands.size() == 2) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+        cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+      delete Operands[1];
+      Operands.erase(Operands.begin() + 1);
+      static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+    }
+  }
+
+  return false;
+}
+
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  assert(!Operands.empty() && "Unexpect empty operand list!");
+  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
+  // First, handle aliases that expand to multiple instructions.
+  // FIXME: This should be replaced with a real .td file alias mechanism.
+  // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+  // call.
+  if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+      Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+      Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+      Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+    MCInst Inst;
+    Inst.setOpcode(X86::WAIT);
+    Out.EmitInstruction(Inst);
+
+    const char *Repl =
+      StringSwitch<const char*>(Op->getToken())
+        .Case("finit",  "fninit")
+        .Case("fsave",  "fnsave")
+        .Case("fstcw",  "fnstcw")
+        .Case("fstcww",  "fnstcw")
+        .Case("fstenv", "fnstenv")
+        .Case("fstsw",  "fnstsw")
+        .Case("fstsww", "fnstsw")
+        .Case("fclex",  "fnclex")
+        .Default(0);
+    assert(Repl && "Unknown wait-prefixed instruction");
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+  }
+
+  bool WasOriginallyInvalidOperand = false;
+  unsigned OrigErrorInfo;
+  MCInst Inst;
+
+  // First, try a direct match.
+  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  case Match_InvalidOperand:
+    WasOriginallyInvalidOperand = true;
+    break;
+  case Match_MnemonicFail:
+    break;
+  }
+
+  // FIXME: Ideally, we would only attempt suffix matches for things which are
+  // valid prefixes, and we could just infer the right unambiguous
+  // type. However, that requires substantially more matcher support than the
+  // following hack.
+
+  // Change the operand to point to a temporary token.
+  StringRef Base = Op->getToken();
+  SmallString<16> Tmp;
+  Tmp += Base;
+  Tmp += ' ';
+  Op->setTokenValue(Tmp.str());
+
+  // If this instruction starts with an 'f', then it is a floating point stack
+  // instruction.  These come in up to three forms for 32-bit, 64-bit, and
+  // 80-bit floating point, which use the suffixes s,l,t respectively.
+  //
+  // Otherwise, we assume that this may be an integer instruction, which comes
+  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+  
+  // Check for the various suffix matches.
+  Tmp[Base.size()] = Suffixes[0];
+  unsigned ErrorInfoIgnore;
+  MatchResultTy Match1, Match2, Match3, Match4;
+  
+  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[1];
+  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[2];
+  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[3];
+  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+
+  // Restore the old token.
+  Op->setTokenValue(Base);
+
+  // If exactly one matched, then we treat that as a successful match (and the
+  // instruction will already have been filled in correctly, since the failing
+  // matches won't have modified it).
+  unsigned NumSuccessfulMatches =
+    (Match1 == Match_Success) + (Match2 == Match_Success) +
+    (Match3 == Match_Success) + (Match4 == Match_Success);
+  if (NumSuccessfulMatches == 1) {
+    Out.EmitInstruction(Inst);
+    return false;
+  }
+
+  // Otherwise, the match failed, try to produce a decent error message.
+
+  // If we had multiple suffix matches, then identify this as an ambiguous
+  // match.
+  if (NumSuccessfulMatches > 1) {
+    char MatchChars[4];
+    unsigned NumMatches = 0;
+    if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+    if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+    if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+    if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
+
+    SmallString<126> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ambiguous instructions require an explicit suffix (could be ";
+    for (unsigned i = 0; i != NumMatches; ++i) {
+      if (i != 0)
+        OS << ", ";
+      if (i + 1 == NumMatches)
+        OS << "or ";
+      OS << "'" << Base << MatchChars[i] << "'";
+    }
+    OS << ")";
+    Error(IDLoc, OS.str());
+    return true;
+  }
+
+  // Okay, we know that none of the variants matched successfully.
+
+  // If all of the instructions reported an invalid mnemonic, then the original
+  // mnemonic was invalid.
+  if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+      (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+    if (!WasOriginallyInvalidOperand) {
+      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+      return true;
+    }
+
+    // Recover location info for the operand if we know which was the problem.
+    SMLoc ErrorLoc = IDLoc;
+    if (OrigErrorInfo != ~0U) {
+      if (OrigErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  // If one instruction matched with a missing feature, report this as a
+  // missing feature.
+  if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+      (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  }
+
+  // If one instruction matched with an invalid operand, report this as an
+  // operand failure.
+  if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+      (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+    Error(IDLoc, "invalid operand for instruction");
+    return true;
+  }
+
+  // If all of these were an outright failure, report it in a useless way.
+  // FIXME: We should give nicer diagnostics about the exact failure.
+  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+  return true;
+}
+
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+      
+      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+      
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+      
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+  
+  Parser.Lex();
+  return false;
+}
+
+
+
+
+extern "C" void LLVMInitializeX86AsmLexer();
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmParser() {
+  RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+  RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+  LLVMInitializeX86AsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "X86GenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 000000000000..4a0d2ec727a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,560 @@
+//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+//  MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+#include "X86GenEDInfo.inc"
+
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+void x86DisassemblerDebug(const char *file,
+                          unsigned line,
+                          const char *s) {
+  dbgs() << file << ":" << line << ": " << s;
+}
+
+#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
+
+namespace llvm {  
+  
+// Fill-ins to make the compiler happy.  These constants are never actually
+//   assigned; they are just filler to make an automatically-generated switch
+//   statement work.
+namespace X86 {
+  enum {
+    BX_SI = 500,
+    BX_DI = 501,
+    BP_SI = 502,
+    BP_DI = 503,
+    sib   = 504,
+    sib64 = 505
+  };
+}
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+}
+
+static bool translateInstruction(MCInst &target,
+                                InternalInstruction &source);
+
+X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
+    MCDisassembler(),
+    fMode(mode) {
+}
+
+X86GenericDisassembler::~X86GenericDisassembler() {
+}
+
+EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+  return instInfoX86;
+}
+
+/// regionReader - a callback function that wraps the readByte method from
+///   MemoryObject.
+///
+/// @param arg      - The generic callback parameter.  In this case, this should
+///                   be a pointer to a MemoryObject.
+/// @param byte     - A pointer to the byte to be read.
+/// @param address  - The address to be read.
+static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
+  MemoryObject* region = static_cast<MemoryObject*>(arg);
+  return region->readByte(address, byte);
+}
+
+/// logger - a callback function that wraps the operator<< method from
+///   raw_ostream.
+///
+/// @param arg      - The generic callback parameter.  This should be a pointe
+///                   to a raw_ostream.
+/// @param log      - A string to be logged.  logger() adds a newline.
+static void logger(void* arg, const char* log) {
+  if (!arg)
+    return;
+  
+  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+  vStream << log << "\n";
+}  
+  
+//
+// Public interface for the disassembler
+//
+
+bool X86GenericDisassembler::getInstruction(MCInst &instr,
+                                            uint64_t &size,
+                                            const MemoryObject &region,
+                                            uint64_t address,
+                                            raw_ostream &vStream) const {
+  InternalInstruction internalInstr;
+  
+  int ret = decodeInstruction(&internalInstr,
+                              regionReader,
+                              (void*)&region,
+                              logger,
+                              (void*)&vStream,
+                              address,
+                              fMode);
+
+  if (ret) {
+    size = internalInstr.readerCursor - address;
+    return false;
+  }
+  else {
+    size = internalInstr.length;
+    return !translateInstruction(instr, internalInstr);
+  }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+///   register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst     - The MCInst to append to.
+/// @param reg        - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+  uint8_t llvmRegnums[] = {
+    ALL_REGS
+    0
+  };
+#undef ENTRY
+
+  uint8_t llvmRegnum = llvmRegnums[reg];
+  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+}
+
+/// translateImmediate  - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param immediate    - The immediate value to append.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The internal instruction.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+                               const OperandSpecifier &operand,
+                               InternalInstruction &insn) {
+  // Sign-extend the immediate if necessary.
+
+  OperandType type = operand.type;
+
+  if (type == TYPE_RELv) {
+    switch (insn.displacementSize) {
+    default:
+      break;
+    case 1:
+      type = TYPE_MOFFS8;
+      break;
+    case 2:
+      type = TYPE_MOFFS16;
+      break;
+    case 4:
+      type = TYPE_MOFFS32;
+      break;
+    case 8:
+      type = TYPE_MOFFS64;
+      break;
+    }
+  }
+
+  switch (type) {
+  case TYPE_MOFFS8:
+  case TYPE_REL8:
+    if(immediate & 0x80)
+      immediate |= ~(0xffull);
+    break;
+  case TYPE_MOFFS16:
+    if(immediate & 0x8000)
+      immediate |= ~(0xffffull);
+    break;
+  case TYPE_MOFFS32:
+  case TYPE_REL32:
+  case TYPE_REL64:
+    if(immediate & 0x80000000)
+      immediate |= ~(0xffffffffull);
+    break;
+  case TYPE_MOFFS64:
+  default:
+    // operand is 64 bits wide.  Do nothing.
+    break;
+  }
+    
+  mcInst.addOperand(MCOperand::CreateImm(immediate));
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst       - The MCInst to append to.
+/// @param insn         - The internal instruction to extract the R/M field
+///                       from.
+/// @return             - 0 on success; -1 otherwise
+static bool translateRMRegister(MCInst &mcInst,
+                                InternalInstruction &insn) {
+  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+    debug("A R/M register operand may not have a SIB byte");
+    return true;
+  }
+  
+  switch (insn.eaBase) {
+  default:
+    debug("Unexpected EA base register");
+    return true;
+  case EA_BASE_NONE:
+    debug("EA_BASE_NONE for ModR/M base");
+    return true;
+#define ENTRY(x) case EA_BASE_##x:
+  ALL_EA_BASES
+#undef ENTRY
+    debug("A R/M register operand may not have a base; "
+          "the operand must be a register.");
+    return true;
+#define ENTRY(x)                                                      \
+  case EA_REG_##x:                                                    \
+    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+  ALL_REGS
+#undef ENTRY
+  }
+  
+  return false;
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+///   fields of an internal instruction (and possibly its SIB byte) to a memory
+///   operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
+///                       from.
+/// @return             - 0 on success; nonzero otherwise
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
+  // Addresses in an MCInst are represented as five operands:
+  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the 
+  //                                SIB base
+  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified 
+  //                                scale amount
+  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
+  //                                the index (which is multiplied by the 
+  //                                scale amount)
+  //   4. displacement  (immediate) 0, or the displacement if there is one
+  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
+  //                                if we have segment overrides
+  
+  MCOperand baseReg;
+  MCOperand scaleAmount;
+  MCOperand indexReg;
+  MCOperand displacement;
+  MCOperand segmentReg;
+  
+  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+    if (insn.sibBase != SIB_BASE_NONE) {
+      switch (insn.sibBase) {
+      default:
+        debug("Unexpected sibBase");
+        return true;
+#define ENTRY(x)                                          \
+      case SIB_BASE_##x:                                  \
+        baseReg = MCOperand::CreateReg(X86::x); break;
+      ALL_SIB_BASES
+#undef ENTRY
+      }
+    } else {
+      baseReg = MCOperand::CreateReg(0);
+    }
+    
+    if (insn.sibIndex != SIB_INDEX_NONE) {
+      switch (insn.sibIndex) {
+      default:
+        debug("Unexpected sibIndex");
+        return true;
+#define ENTRY(x)                                          \
+      case SIB_INDEX_##x:                                 \
+        indexReg = MCOperand::CreateReg(X86::x); break;
+      EA_BASES_32BIT
+      EA_BASES_64BIT
+#undef ENTRY
+      }
+    } else {
+      indexReg = MCOperand::CreateReg(0);
+    }
+    
+    scaleAmount = MCOperand::CreateImm(insn.sibScale);
+  } else {
+    switch (insn.eaBase) {
+    case EA_BASE_NONE:
+      if (insn.eaDisplacement == EA_DISP_NONE) {
+        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+        return true;
+      }
+      if (insn.mode == MODE_64BIT)
+        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+      else
+        baseReg = MCOperand::CreateReg(0);
+      
+      indexReg = MCOperand::CreateReg(0);
+      break;
+    case EA_BASE_BX_SI:
+      baseReg = MCOperand::CreateReg(X86::BX);
+      indexReg = MCOperand::CreateReg(X86::SI);
+      break;
+    case EA_BASE_BX_DI:
+      baseReg = MCOperand::CreateReg(X86::BX);
+      indexReg = MCOperand::CreateReg(X86::DI);
+      break;
+    case EA_BASE_BP_SI:
+      baseReg = MCOperand::CreateReg(X86::BP);
+      indexReg = MCOperand::CreateReg(X86::SI);
+      break;
+    case EA_BASE_BP_DI:
+      baseReg = MCOperand::CreateReg(X86::BP);
+      indexReg = MCOperand::CreateReg(X86::DI);
+      break;
+    default:
+      indexReg = MCOperand::CreateReg(0);
+      switch (insn.eaBase) {
+      default:
+        debug("Unexpected eaBase");
+        return true;
+        // Here, we will use the fill-ins defined above.  However,
+        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+        //   sib and sib64 were handled in the top-level if, so they're only
+        //   placeholders to keep the compiler happy.
+#define ENTRY(x)                                        \
+      case EA_BASE_##x:                                 \
+        baseReg = MCOperand::CreateReg(X86::x); break; 
+      ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+      ALL_REGS
+#undef ENTRY
+        debug("A R/M memory operand may not be a register; "
+              "the base field must be a base.");
+        return true;
+      }
+    }
+    
+    scaleAmount = MCOperand::CreateImm(1);
+  }
+  
+  displacement = MCOperand::CreateImm(insn.displacement);
+  
+  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+    0,        // SEG_OVERRIDE_NONE
+    X86::CS,
+    X86::SS,
+    X86::DS,
+    X86::ES,
+    X86::FS,
+    X86::GS
+  };
+  
+  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+  
+  mcInst.addOperand(baseReg);
+  mcInst.addOperand(scaleAmount);
+  mcInst.addOperand(indexReg);
+  mcInst.addOperand(displacement);
+  mcInst.addOperand(segmentReg);
+  return false;
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+///   byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
+///                       from.
+/// @return             - 0 on success; nonzero otherwise
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+                        InternalInstruction &insn) {
+  switch (operand.type) {
+  default:
+    debug("Unexpected type for a R/M operand");
+    return true;
+  case TYPE_R8:
+  case TYPE_R16:
+  case TYPE_R32:
+  case TYPE_R64:
+  case TYPE_Rv:
+  case TYPE_MM:
+  case TYPE_MM32:
+  case TYPE_MM64:
+  case TYPE_XMM:
+  case TYPE_XMM32:
+  case TYPE_XMM64:
+  case TYPE_XMM128:
+  case TYPE_XMM256:
+  case TYPE_DEBUGREG:
+  case TYPE_CONTROLREG:
+    return translateRMRegister(mcInst, insn);
+  case TYPE_M:
+  case TYPE_M8:
+  case TYPE_M16:
+  case TYPE_M32:
+  case TYPE_M64:
+  case TYPE_M128:
+  case TYPE_M256:
+  case TYPE_M512:
+  case TYPE_Mv:
+  case TYPE_M32FP:
+  case TYPE_M64FP:
+  case TYPE_M80FP:
+  case TYPE_M16INT:
+  case TYPE_M32INT:
+  case TYPE_M64INT:
+  case TYPE_M1616:
+  case TYPE_M1632:
+  case TYPE_M1664:
+  case TYPE_LEA:
+    return translateRMMemory(mcInst, insn);
+  }
+}
+  
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+///   LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param stackPos     - The stack position to translate.
+/// @return             - 0 on success; nonzero otherwise.
+static bool translateFPRegister(MCInst &mcInst,
+                               uint8_t stackPos) {
+  if (stackPos >= 8) {
+    debug("Invalid FP stack position");
+    return true;
+  }
+  
+  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+
+  return false;
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction 
+///   to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The internal instruction.
+/// @return             - false on success; true otherwise.
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+                             InternalInstruction &insn) {
+  switch (operand.encoding) {
+  default:
+    debug("Unhandled operand encoding during translation");
+    return true;
+  case ENCODING_REG:
+    translateRegister(mcInst, insn.reg);
+    return false;
+  case ENCODING_RM:
+    return translateRM(mcInst, operand, insn);
+  case ENCODING_CB:
+  case ENCODING_CW:
+  case ENCODING_CD:
+  case ENCODING_CP:
+  case ENCODING_CO:
+  case ENCODING_CT:
+    debug("Translation of code offsets isn't supported.");
+    return true;
+  case ENCODING_IB:
+  case ENCODING_IW:
+  case ENCODING_ID:
+  case ENCODING_IO:
+  case ENCODING_Iv:
+  case ENCODING_Ia:
+    translateImmediate(mcInst,
+                       insn.immediates[insn.numImmediatesTranslated++],
+                       operand,
+                       insn);
+    return false;
+  case ENCODING_RB:
+  case ENCODING_RW:
+  case ENCODING_RD:
+  case ENCODING_RO:
+    translateRegister(mcInst, insn.opcodeRegister);
+    return false;
+  case ENCODING_I:
+    return translateFPRegister(mcInst, insn.opcodeModifier);
+  case ENCODING_Rv:
+    translateRegister(mcInst, insn.opcodeRegister);
+    return false;
+  case ENCODING_VVVV:
+    translateRegister(mcInst, insn.vvvv);
+    return false;
+  case ENCODING_DUP:
+    return translateOperand(mcInst,
+                            insn.spec->operands[operand.type - TYPE_DUP0],
+                            insn);
+  }
+}
+  
+/// translateInstruction - Translates an internal instruction and all its
+///   operands to an MCInst.
+///
+/// @param mcInst       - The MCInst to populate with the instruction's data.
+/// @param insn         - The internal instruction.
+/// @return             - false on success; true otherwise.
+static bool translateInstruction(MCInst &mcInst,
+                                InternalInstruction &insn) {  
+  if (!insn.spec) {
+    debug("Instruction has no specification");
+    return true;
+  }
+  
+  mcInst.setOpcode(insn.instructionID);
+  
+  int index;
+  
+  insn.numImmediatesTranslated = 0;
+  
+  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+    if (insn.spec->operands[index].encoding != ENCODING_NONE) {
+      if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
+        return true;
+      }
+    }
+  }
+  
+  return false;
+}
+
+static MCDisassembler *createX86_32Disassembler(const Target &T) {
+  return new X86Disassembler::X86_32Disassembler;
+}
+
+static MCDisassembler *createX86_64Disassembler(const Target &T) {
+  return new X86Disassembler::X86_64Disassembler;
+}
+
+extern "C" void LLVMInitializeX86Disassembler() { 
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 
+                                         createX86_32Disassembler);
+  TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+                                         createX86_64Disassembler);
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 000000000000..550cf9d40de2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,155 @@
+//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets.  The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+//    These attributes, recorded in enum attributeBits
+//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
+//    provides a mapping from bitmasks to contexts, which are represented by
+//    enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is.  The
+//    disassembler distinguishes four kinds of opcodes, which are enumerated in
+//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 
+//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
+//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
+//    a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
+//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+//    ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
+//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+//    meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+//    (X86DisassemblerDecoderCommon.h) and its type is an entry from
+//    OperandType (ibid.).  The encoding indicates how to read it from the
+//    instruction; the type indicates how to interpret the value once it has
+//    been read.  For example, a register operand could be stored in the R/M
+//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
+//    register, for instance).  Given this information, the operands can be
+//    extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+//    and operands into a format understandable by the client - in this case, an
+//    MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself.  The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+//   adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+//   invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+//   table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+//   factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+//   responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLER_H
+#define X86DISASSEMBLER_H
+
+#define INSTRUCTION_SPECIFIER_FIELDS  \
+  const char*             name;
+
+#define INSTRUCTION_IDS               \
+  const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+  
+namespace X86Disassembler {
+
+/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
+///   All each platform class should have to do is subclass the constructor, and
+///   provide a different disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+protected:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  /// @param mode     - The X86 architecture mode to decode for.
+  X86GenericDisassembler(DisassemblerMode mode);
+public:
+  ~X86GenericDisassembler();
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+private:
+  DisassemblerMode              fMode;
+};
+
+/// X86_16Disassembler - 16-bit X86 disassembler.
+class X86_16Disassembler : public X86GenericDisassembler {
+public:
+  X86_16Disassembler() :
+    X86GenericDisassembler(MODE_16BIT) {
+  }
+};  
+
+/// X86_16Disassembler - 32-bit X86 disassembler.
+class X86_32Disassembler : public X86GenericDisassembler {
+public:
+  X86_32Disassembler() :
+    X86GenericDisassembler(MODE_32BIT) {
+  }
+};
+
+/// X86_16Disassembler - 64-bit X86 disassembler.
+class X86_64Disassembler : public X86GenericDisassembler {
+public:
+  X86_64Disassembler() :
+    X86GenericDisassembler(MODE_64BIT) {
+  }
+};
+
+} // namespace X86Disassembler
+  
+} // namespace llvm
+  
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
new file mode 100644
index 000000000000..de1610ba3d66
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -0,0 +1,1610 @@
+/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the implementation of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include <stdarg.h>   /* for va_*()       */
+#include <stdio.h>    /* for vsnprintf()  */
+#include <stdlib.h>   /* for exit()       */
+#include <string.h>   /* for memset()     */
+
+#include "X86DisassemblerDecoder.h"
+
+#include "X86GenDisassemblerTables.inc"
+
+#define TRUE  1
+#define FALSE 0
+
+typedef int8_t bool;
+
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
+#else
+#define debug(s) do { } while (0)
+#endif
+
+
+/*
+ * contextForAttrs - Client for the instruction context table.  Takes a set of
+ *   attributes and returns the appropriate decode context.
+ *
+ * @param attrMask  - Attributes, from the enumeration attributeBits.
+ * @return          - The InstructionContext to use when looking up an
+ *                    an instruction with these attributes.
+ */
+static InstructionContext contextForAttrs(uint8_t attrMask) {
+  return CONTEXTS_SYM[attrMask];
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ *   the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type        - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ *                      contextForAttrs.
+ * @param opcode      - The last byte of the instruction's opcode, not counting
+ *                      ModR/M extensions and escapes.
+ * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
+ */
+static int modRMRequired(OpcodeType type,
+                                InstructionContext insnContext,
+                                uint8_t opcode) {
+  const struct ContextDecision* decision = 0;
+  
+  switch (type) {
+  case ONEBYTE:
+    decision = &ONEBYTE_SYM;
+    break;
+  case TWOBYTE:
+    decision = &TWOBYTE_SYM;
+    break;
+  case THREEBYTE_38:
+    decision = &THREEBYTE38_SYM;
+    break;
+  case THREEBYTE_3A:
+    decision = &THREEBYTE3A_SYM;
+    break;
+  case THREEBYTE_A6:
+    decision = &THREEBYTEA6_SYM;
+    break;
+  case THREEBYTE_A7:
+    decision = &THREEBYTEA7_SYM;
+    break;
+  }
+  
+  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+    modrm_type != MODRM_ONEENTRY;
+  
+  return 0;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ *   an instruction.
+ *
+ * @param type        - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode      - See modRMRequired().
+ * @param modRM       - The ModR/M byte if required, or any value if not.
+ * @return            - The UID of the instruction, or 0 on failure.
+ */
+static InstrUID decode(OpcodeType type,
+                       InstructionContext insnContext,
+                       uint8_t opcode,
+                       uint8_t modRM) {
+  const struct ModRMDecision* dec;
+  
+  switch (type) {
+  default:
+    debug("Unknown opcode type");
+    return 0;
+  case ONEBYTE:
+    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case TWOBYTE:
+    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_38:
+    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_3A:
+    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_A6:
+    dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_A7:
+    dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  }
+  
+  switch (dec->modrm_type) {
+  default:
+    debug("Corrupt table!  Unknown modrm_type");
+    return 0;
+  case MODRM_ONEENTRY:
+    return dec->instructionIDs[0];
+  case MODRM_SPLITRM:
+    if (modFromModRM(modRM) == 0x3)
+      return dec->instructionIDs[1];
+    else
+      return dec->instructionIDs[0];
+  case MODRM_FULL:
+    return dec->instructionIDs[modRM];
+  }
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ *   that instruction.
+ *
+ * @param uid - The unique ID for the instruction.  This should be returned by
+ *              decode(); specifierForUID will not check bounds.
+ * @return    - A pointer to the specification for that instruction.
+ */
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
+  return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ *   byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn  - The instruction with the reader function to use.  The cursor
+ *                for this instruction is advanced.
+ * @param byte  - A pointer to a pre-allocated memory buffer to be populated
+ *                with the data read.
+ * @return      - 0 if the read was successful; nonzero otherwise.
+ */
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+  
+  if (!ret)
+    ++(insn->readerCursor);
+  
+  return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn  - See consumeByte().
+ * @param byte  - See consumeByte().
+ * @return      - See consumeByte().
+ */
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+  return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static void unconsumeByte(struct InternalInstruction* insn) {
+  insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type)                                  \
+  static int name(struct InternalInstruction* insn, type* ptr) {  \
+    type combined = 0;                                            \
+    unsigned offset;                                              \
+    for (offset = 0; offset < sizeof(type); ++offset) {           \
+      uint8_t byte;                                               \
+      int ret = insn->reader(insn->readerArg,                     \
+                             &byte,                               \
+                             insn->readerCursor + offset);        \
+      if (ret)                                                    \
+        return ret;                                               \
+      combined = combined | ((type)byte << ((type)offset * 8));   \
+    }                                                             \
+    *ptr = combined;                                              \
+    insn->readerCursor += sizeof(type);                           \
+    return 0;                                                     \
+  }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ *   values of various sizes from the instruction's memory and advance the
+ *   cursor appropriately.  These readers perform endian conversion.
+ *
+ * @param insn    - See consumeByte().
+ * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
+ *                  be populated with the data read.
+ * @return        - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dbgprintf - Uses the logging function provided by the user to log a single
+ *   message, typically without a carriage-return.
+ *
+ * @param insn    - The instruction containing the logging function.
+ * @param format  - See printf().
+ * @param ...     - See printf().
+ */
+static void dbgprintf(struct InternalInstruction* insn,
+                      const char* format,
+                      ...) {  
+  char buffer[256];
+  va_list ap;
+  
+  if (!insn->dlog)
+    return;
+    
+  va_start(ap, format);
+  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+  va_end(ap);
+  
+  insn->dlog(insn->dlogArg, buffer);
+  
+  return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ *   location.
+ *
+ * @param insn      - The instruction to be marked as having the prefix.
+ * @param prefix    - The prefix that is present.
+ * @param location  - The location where the prefix is located (in the address
+ *                    space of the instruction's reader).
+ */
+static void setPrefixPresent(struct InternalInstruction* insn,
+                                    uint8_t prefix,
+                                    uint64_t location)
+{
+  insn->prefixPresent[prefix] = 1;
+  insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ *   present at a given location.
+ *
+ * @param insn      - The instruction to be queried.
+ * @param prefix    - The prefix.
+ * @param location  - The location to query.
+ * @return          - Whether the prefix is at that location.
+ */
+static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+                               uint8_t prefix,
+                               uint64_t location)
+{
+  if (insn->prefixPresent[prefix] == 1 &&
+     insn->prefixLocations[prefix] == location)
+    return TRUE;
+  else
+    return FALSE;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ *   instruction as having them.  Also sets the instruction's default operand,
+ *   address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn  - The instruction whose prefixes are to be read.
+ * @return      - 0 if the instruction could be read until the end of the prefix
+ *                bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+  BOOL isPrefix = TRUE;
+  BOOL prefixGroups[4] = { FALSE };
+  uint64_t prefixLocation;
+  uint8_t byte = 0;
+  
+  BOOL hasAdSize = FALSE;
+  BOOL hasOpSize = FALSE;
+  
+  dbgprintf(insn, "readPrefixes()");
+    
+  while (isPrefix) {
+    prefixLocation = insn->readerCursor;
+    
+    if (consumeByte(insn, &byte))
+      return -1;
+    
+    switch (byte) {
+    case 0xf0:  /* LOCK */
+    case 0xf2:  /* REPNE/REPNZ */
+    case 0xf3:  /* REP or REPE/REPZ */
+      if (prefixGroups[0])
+        dbgprintf(insn, "Redundant Group 1 prefix");
+      prefixGroups[0] = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x2e:  /* CS segment override -OR- Branch not taken */
+    case 0x36:  /* SS segment override -OR- Branch taken */
+    case 0x3e:  /* DS segment override */
+    case 0x26:  /* ES segment override */
+    case 0x64:  /* FS segment override */
+    case 0x65:  /* GS segment override */
+      switch (byte) {
+      case 0x2e:
+        insn->segmentOverride = SEG_OVERRIDE_CS;
+        break;
+      case 0x36:
+        insn->segmentOverride = SEG_OVERRIDE_SS;
+        break;
+      case 0x3e:
+        insn->segmentOverride = SEG_OVERRIDE_DS;
+        break;
+      case 0x26:
+        insn->segmentOverride = SEG_OVERRIDE_ES;
+        break;
+      case 0x64:
+        insn->segmentOverride = SEG_OVERRIDE_FS;
+        break;
+      case 0x65:
+        insn->segmentOverride = SEG_OVERRIDE_GS;
+        break;
+      default:
+        debug("Unhandled override");
+        return -1;
+      }
+      if (prefixGroups[1])
+        dbgprintf(insn, "Redundant Group 2 prefix");
+      prefixGroups[1] = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x66:  /* Operand-size override */
+      if (prefixGroups[2])
+        dbgprintf(insn, "Redundant Group 3 prefix");
+      prefixGroups[2] = TRUE;
+      hasOpSize = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x67:  /* Address-size override */
+      if (prefixGroups[3])
+        dbgprintf(insn, "Redundant Group 4 prefix");
+      prefixGroups[3] = TRUE;
+      hasAdSize = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    default:    /* Not a prefix byte */
+      isPrefix = FALSE;
+      break;
+    }
+    
+    if (isPrefix)
+      dbgprintf(insn, "Found prefix 0x%hhx", byte);
+  }
+    
+  insn->vexSize = 0;
+  
+  if (byte == 0xc4) {
+    uint8_t byte1;
+      
+    if (lookAtByte(insn, &byte1)) {
+      dbgprintf(insn, "Couldn't read second byte of VEX");
+      return -1;
+    }
+    
+    if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+      insn->vexSize = 3;
+      insn->necessaryPrefixLocation = insn->readerCursor - 1;
+    }
+    else {
+      unconsumeByte(insn);
+      insn->necessaryPrefixLocation = insn->readerCursor - 1;
+    }
+    
+    if (insn->vexSize == 3) {
+      insn->vexPrefix[0] = byte;
+      consumeByte(insn, &insn->vexPrefix[1]);
+      consumeByte(insn, &insn->vexPrefix[2]);
+
+      /* We simulate the REX prefix for simplicity's sake */
+    
+      insn->rexPrefix = 0x40 
+                      | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+                      | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+                      | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+                      | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+    
+      switch (ppFromVEX3of3(insn->vexPrefix[2]))
+      {
+      default:
+        break;
+      case VEX_PREFIX_66:
+        hasOpSize = TRUE;      
+        break;
+      }
+    
+      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+    }
+  }
+  else if (byte == 0xc5) {
+    uint8_t byte1;
+    
+    if (lookAtByte(insn, &byte1)) {
+      dbgprintf(insn, "Couldn't read second byte of VEX");
+      return -1;
+    }
+      
+    if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+      insn->vexSize = 2;
+    }
+    else {
+      unconsumeByte(insn);
+    }
+    
+    if (insn->vexSize == 2) {
+      insn->vexPrefix[0] = byte;
+      consumeByte(insn, &insn->vexPrefix[1]);
+        
+      insn->rexPrefix = 0x40 
+                      | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+        
+      switch (ppFromVEX2of2(insn->vexPrefix[1]))
+      {
+      default:
+        break;
+      case VEX_PREFIX_66:
+        hasOpSize = TRUE;      
+        break;
+      }
+         
+      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+    }
+  }
+  else {
+    if (insn->mode == MODE_64BIT) {
+      if ((byte & 0xf0) == 0x40) {
+        uint8_t opcodeByte;
+          
+        if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+          dbgprintf(insn, "Redundant REX prefix");
+          return -1;
+        }
+          
+        insn->rexPrefix = byte;
+        insn->necessaryPrefixLocation = insn->readerCursor - 2;
+          
+        dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+      } else {                
+        unconsumeByte(insn);
+        insn->necessaryPrefixLocation = insn->readerCursor - 1;
+      }
+    } else {
+      unconsumeByte(insn);
+      insn->necessaryPrefixLocation = insn->readerCursor - 1;
+    }
+  }
+
+  if (insn->mode == MODE_16BIT) {
+    insn->registerSize       = (hasOpSize ? 4 : 2);
+    insn->addressSize        = (hasAdSize ? 4 : 2);
+    insn->displacementSize   = (hasAdSize ? 4 : 2);
+    insn->immediateSize      = (hasOpSize ? 4 : 2);
+  } else if (insn->mode == MODE_32BIT) {
+    insn->registerSize       = (hasOpSize ? 2 : 4);
+    insn->addressSize        = (hasAdSize ? 2 : 4);
+    insn->displacementSize   = (hasAdSize ? 2 : 4);
+    insn->immediateSize      = (hasOpSize ? 2 : 4);
+  } else if (insn->mode == MODE_64BIT) {
+    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+      insn->registerSize       = 8;
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = 4;
+      insn->immediateSize      = 4;
+    } else if (insn->rexPrefix) {
+      insn->registerSize       = (hasOpSize ? 2 : 4);
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = (hasOpSize ? 2 : 4);
+      insn->immediateSize      = (hasOpSize ? 2 : 4);
+    } else {
+      insn->registerSize       = (hasOpSize ? 2 : 4);
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = (hasOpSize ? 2 : 4);
+      insn->immediateSize      = (hasOpSize ? 2 : 4);
+    }
+  }
+  
+  return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ *   extended or escape opcodes).
+ *
+ * @param insn  - The instruction whose opcode is to be read.
+ * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {  
+  /* Determine the length of the primary opcode */
+  
+  uint8_t current;
+  
+  dbgprintf(insn, "readOpcode()");
+  
+  insn->opcodeType = ONEBYTE;
+    
+  if (insn->vexSize == 3)
+  {
+    switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+    {
+    default:
+      dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+      return -1;      
+    case 0:
+      break;
+    case VEX_LOB_0F:
+      insn->twoByteEscape = 0x0f;
+      insn->opcodeType = TWOBYTE;
+      return consumeByte(insn, &insn->opcode);
+    case VEX_LOB_0F38:
+      insn->twoByteEscape = 0x0f;
+      insn->threeByteEscape = 0x38;
+      insn->opcodeType = THREEBYTE_38;
+      return consumeByte(insn, &insn->opcode);
+    case VEX_LOB_0F3A:    
+      insn->twoByteEscape = 0x0f;
+      insn->threeByteEscape = 0x3a;
+      insn->opcodeType = THREEBYTE_3A;
+      return consumeByte(insn, &insn->opcode);
+    }
+  }
+  else if (insn->vexSize == 2)
+  {
+    insn->twoByteEscape = 0x0f;
+    insn->opcodeType = TWOBYTE;
+    return consumeByte(insn, &insn->opcode);
+  }
+    
+  if (consumeByte(insn, &current))
+    return -1;
+  
+  if (current == 0x0f) {
+    dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+    
+    insn->twoByteEscape = current;
+    
+    if (consumeByte(insn, &current))
+      return -1;
+    
+    if (current == 0x38) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_38;
+    } else if (current == 0x3a) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_3A;
+    } else if (current == 0xa6) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_A6;
+    } else if (current == 0xa7) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_A7;
+    } else {
+      dbgprintf(insn, "Didn't find a three-byte escape prefix");
+      
+      insn->opcodeType = TWOBYTE;
+    }
+  }
+  
+  /*
+   * At this point we have consumed the full opcode.
+   * Anything we consume from here on must be unconsumed.
+   */
+  
+  insn->opcode = current;
+  
+  return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ *   the ModR/M byte as appropriate for extended and escape opcodes,
+ *   and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ *                        instruction.
+ * @param insn          - The instruction whose ID is to be determined.
+ * @param attrMask      - The attribute mask to search.
+ * @return              - 0 if the ModR/M could be read when needed or was not
+ *                        needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+                             struct InternalInstruction* insn,
+                             uint8_t attrMask) {
+  BOOL hasModRMExtension;
+  
+  uint8_t instructionClass;
+
+  instructionClass = contextForAttrs(attrMask);
+  
+  hasModRMExtension = modRMRequired(insn->opcodeType,
+                                    instructionClass,
+                                    insn->opcode);
+  
+  if (hasModRMExtension) {
+    if (readModRM(insn))
+      return -1;
+    
+    *instructionID = decode(insn->opcodeType,
+                            instructionClass,
+                            insn->opcode,
+                            insn->modRM);
+  } else {
+    *instructionID = decode(insn->opcodeType,
+                            instructionClass,
+                            insn->opcode,
+                            0);
+  }
+      
+  return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig  - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+  off_t i;
+  
+  for (i = 0;; i++) {
+    if (orig[i] == '\0' && equiv[i] == '\0')
+      return TRUE;
+    if (orig[i] == '\0' || equiv[i] == '\0')
+      return FALSE;
+    if (orig[i] != equiv[i]) {
+      if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+        continue;
+      if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+        continue;
+      if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+        continue;
+      return FALSE;
+    }
+  }
+}
+
+/*
+ * is64BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 64-bit whereas the other is not.
+ *
+ * @param orig  - The instruction that is not 64-bit
+ * @param equiv - The instruction that is 64-bit
+ */
+static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
+  off_t i;
+  
+  for (i = 0;; i++) {
+    if (orig[i] == '\0' && equiv[i] == '\0')
+      return TRUE;
+    if (orig[i] == '\0' || equiv[i] == '\0')
+      return FALSE;
+    if (orig[i] != equiv[i]) {
+      if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+        continue;
+      if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+        continue;
+      if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+        continue;
+      return FALSE;
+    }
+  }
+}
+
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
+ *   appropriate for extended and escape opcodes.  Determines the attributes and 
+ *   context for the instruction before doing so.
+ *
+ * @param insn  - The instruction whose ID is to be determined.
+ * @return      - 0 if the ModR/M could be read when needed or was not needed;
+ *                nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn) {  
+  uint8_t attrMask;
+  uint16_t instructionID;
+  
+  dbgprintf(insn, "getID()");
+    
+  attrMask = ATTR_NONE;
+
+  if (insn->mode == MODE_64BIT)
+    attrMask |= ATTR_64BIT;
+    
+  if (insn->vexSize) {
+    attrMask |= ATTR_VEX;
+
+    if (insn->vexSize == 3) {
+      switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+      case VEX_PREFIX_66:
+        attrMask |= ATTR_OPSIZE;    
+        break;
+      case VEX_PREFIX_F3:
+        attrMask |= ATTR_XS;
+        break;
+      case VEX_PREFIX_F2:
+        attrMask |= ATTR_XD;
+        break;
+      }
+    
+      if (wFromVEX3of3(insn->vexPrefix[2]))
+        attrMask |= ATTR_REXW;
+      if (lFromVEX3of3(insn->vexPrefix[2]))
+        attrMask |= ATTR_VEXL;
+    }
+    else if (insn->vexSize == 2) {
+      switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+      case VEX_PREFIX_66:
+        attrMask |= ATTR_OPSIZE;    
+        break;
+      case VEX_PREFIX_F3:
+        attrMask |= ATTR_XS;
+        break;
+      case VEX_PREFIX_F2:
+        attrMask |= ATTR_XD;
+        break;
+      }
+    
+      if (lFromVEX2of2(insn->vexPrefix[1]))
+        attrMask |= ATTR_VEXL;
+    }
+    else {
+      return -1;
+    }
+  }
+  else {
+    if (insn->rexPrefix & 0x08)
+      attrMask |= ATTR_REXW;
+  
+    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_OPSIZE;
+    else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_XS;
+    else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_XD;
+    
+  }
+
+  if (getIDWithAttrMask(&instructionID, insn, attrMask))
+    return -1;
+  
+  /* The following clauses compensate for limitations of the tables. */
+  
+  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
+    /*
+     * Although for SSE instructions it is usually necessary to treat REX.W+F2
+     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
+     * an occasional instruction where F2 is incidental and REX.W is the more
+     * significant.  If the decoded instruction is 32-bit and adding REX.W
+     * instead of F2 changes a 32 to a 64, we adopt the new encoding.
+     */
+    
+    const struct InstructionSpecifier *spec;
+    uint16_t instructionIDWithREXw;
+    const struct InstructionSpecifier *specWithREXw;
+    
+    spec = specifierForUID(instructionID);
+    
+    if (getIDWithAttrMask(&instructionIDWithREXw,
+                          insn,
+                          attrMask & (~ATTR_XD))) {
+      /*
+       * Decoding with REX.w would yield nothing; give up and return original
+       * decode.
+       */
+      
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+      return 0;
+    }
+    
+    specWithREXw = specifierForUID(instructionIDWithREXw);
+    
+    if (is64BitEquivalent(spec->name, specWithREXw->name)) {
+      insn->instructionID = instructionIDWithREXw;
+      insn->spec = specWithREXw;
+    } else {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+    }
+    return 0;
+  }
+  
+  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
+    /*
+     * The instruction tables make no distinction between instructions that
+     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+     * particular spot (i.e., many MMX operations).  In general we're
+     * conservative, but in the specific case where OpSize is present but not
+     * in the right place we check if there's a 16-bit operation.
+     */
+    
+    const struct InstructionSpecifier *spec;
+    uint16_t instructionIDWithOpsize;
+    const struct InstructionSpecifier *specWithOpsize;
+    
+    spec = specifierForUID(instructionID);
+    
+    if (getIDWithAttrMask(&instructionIDWithOpsize,
+                          insn,
+                          attrMask | ATTR_OPSIZE)) {
+      /* 
+       * ModRM required with OpSize but not present; give up and return version
+       * without OpSize set
+       */
+      
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+      return 0;
+    }
+    
+    specWithOpsize = specifierForUID(instructionIDWithOpsize);
+    
+    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+      insn->instructionID = instructionIDWithOpsize;
+      insn->spec = specWithOpsize;
+    } else {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+    }
+    return 0;
+  }
+  
+  insn->instructionID = instructionID;
+  insn->spec = specifierForUID(insn->instructionID);
+  
+  return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ *   instruction.
+ *
+ * @param insn  - The instruction whose SIB byte is to be read.
+ * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+  SIBIndex sibIndexBase = 0;
+  SIBBase sibBaseBase = 0;
+  uint8_t index, base;
+  
+  dbgprintf(insn, "readSIB()");
+  
+  if (insn->consumedSIB)
+    return 0;
+  
+  insn->consumedSIB = TRUE;
+  
+  switch (insn->addressSize) {
+  case 2:
+    dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+    return -1;
+    break;
+  case 4:
+    sibIndexBase = SIB_INDEX_EAX;
+    sibBaseBase = SIB_BASE_EAX;
+    break;
+  case 8:
+    sibIndexBase = SIB_INDEX_RAX;
+    sibBaseBase = SIB_BASE_RAX;
+    break;
+  }
+
+  if (consumeByte(insn, &insn->sib))
+    return -1;
+  
+  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+  
+  switch (index) {
+  case 0x4:
+    insn->sibIndex = SIB_INDEX_NONE;
+    break;
+  default:
+    insn->sibIndex = (SIBIndex)(sibIndexBase + index);
+    if (insn->sibIndex == SIB_INDEX_sib ||
+        insn->sibIndex == SIB_INDEX_sib64)
+      insn->sibIndex = SIB_INDEX_NONE;
+    break;
+  }
+  
+  switch (scaleFromSIB(insn->sib)) {
+  case 0:
+    insn->sibScale = 1;
+    break;
+  case 1:
+    insn->sibScale = 2;
+    break;
+  case 2:
+    insn->sibScale = 4;
+    break;
+  case 3:
+    insn->sibScale = 8;
+    break;
+  }
+  
+  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+  
+  switch (base) {
+  case 0x5:
+    switch (modFromModRM(insn->modRM)) {
+    case 0x0:
+      insn->eaDisplacement = EA_DISP_32;
+      insn->sibBase = SIB_BASE_NONE;
+      break;
+    case 0x1:
+      insn->eaDisplacement = EA_DISP_8;
+      insn->sibBase = (insn->addressSize == 4 ? 
+                       SIB_BASE_EBP : SIB_BASE_RBP);
+      break;
+    case 0x2:
+      insn->eaDisplacement = EA_DISP_32;
+      insn->sibBase = (insn->addressSize == 4 ? 
+                       SIB_BASE_EBP : SIB_BASE_RBP);
+      break;
+    case 0x3:
+      debug("Cannot have Mod = 0b11 and a SIB byte");
+      return -1;
+    }
+    break;
+  default:
+    insn->sibBase = (SIBBase)(sibBaseBase + base);
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn  - The instruction whose displacement is to be read.
+ * @return      - 0 if the displacement byte was successfully read; nonzero 
+ *                otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {  
+  int8_t d8;
+  int16_t d16;
+  int32_t d32;
+  
+  dbgprintf(insn, "readDisplacement()");
+  
+  if (insn->consumedDisplacement)
+    return 0;
+  
+  insn->consumedDisplacement = TRUE;
+  
+  switch (insn->eaDisplacement) {
+  case EA_DISP_NONE:
+    insn->consumedDisplacement = FALSE;
+    break;
+  case EA_DISP_8:
+    if (consumeInt8(insn, &d8))
+      return -1;
+    insn->displacement = d8;
+    break;
+  case EA_DISP_16:
+    if (consumeInt16(insn, &d16))
+      return -1;
+    insn->displacement = d16;
+    break;
+  case EA_DISP_32:
+    if (consumeInt32(insn, &d32))
+      return -1;
+    insn->displacement = d32;
+    break;
+  }
+  
+  insn->consumedDisplacement = TRUE;
+  return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ *   displacement) for an instruction and interprets it.
+ *
+ * @param insn  - The instruction whose addressing information is to be read.
+ * @return      - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {  
+  uint8_t mod, rm, reg;
+  
+  dbgprintf(insn, "readModRM()");
+  
+  if (insn->consumedModRM)
+    return 0;
+  
+  if (consumeByte(insn, &insn->modRM))
+    return -1;
+  insn->consumedModRM = TRUE;
+  
+  mod     = modFromModRM(insn->modRM);
+  rm      = rmFromModRM(insn->modRM);
+  reg     = regFromModRM(insn->modRM);
+  
+  /*
+   * This goes by insn->registerSize to pick the correct register, which messes
+   * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
+   * fixupReg().
+   */
+  switch (insn->registerSize) {
+  case 2:
+    insn->regBase = MODRM_REG_AX;
+    insn->eaRegBase = EA_REG_AX;
+    break;
+  case 4:
+    insn->regBase = MODRM_REG_EAX;
+    insn->eaRegBase = EA_REG_EAX;
+    break;
+  case 8:
+    insn->regBase = MODRM_REG_RAX;
+    insn->eaRegBase = EA_REG_RAX;
+    break;
+  }
+  
+  reg |= rFromREX(insn->rexPrefix) << 3;
+  rm  |= bFromREX(insn->rexPrefix) << 3;
+  
+  insn->reg = (Reg)(insn->regBase + reg);
+  
+  switch (insn->addressSize) {
+  case 2:
+    insn->eaBaseBase = EA_BASE_BX_SI;
+     
+    switch (mod) {
+    case 0x0:
+      if (rm == 0x6) {
+        insn->eaBase = EA_BASE_NONE;
+        insn->eaDisplacement = EA_DISP_16;
+        if (readDisplacement(insn))
+          return -1;
+      } else {
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        insn->eaDisplacement = EA_DISP_NONE;
+      }
+      break;
+    case 0x1:
+      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+      insn->eaDisplacement = EA_DISP_8;
+      if (readDisplacement(insn))
+        return -1;
+      break;
+    case 0x2:
+      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+      insn->eaDisplacement = EA_DISP_16;
+      if (readDisplacement(insn))
+        return -1;
+      break;
+    case 0x3:
+      insn->eaBase = (EABase)(insn->eaRegBase + rm);
+      if (readDisplacement(insn))
+        return -1;
+      break;
+    }
+    break;
+  case 4:
+  case 8:
+    insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+    
+    switch (mod) {
+    case 0x0:
+      insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+      switch (rm) {
+      case 0x4:
+      case 0xc:   /* in case REXW.b is set */
+        insn->eaBase = (insn->addressSize == 4 ? 
+                        EA_BASE_sib : EA_BASE_sib64);
+        readSIB(insn);
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      case 0x5:
+        insn->eaBase = EA_BASE_NONE;
+        insn->eaDisplacement = EA_DISP_32;
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      default:
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        break;
+      }
+      break;
+    case 0x1:
+    case 0x2:
+      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+      switch (rm) {
+      case 0x4:
+      case 0xc:   /* in case REXW.b is set */
+        insn->eaBase = EA_BASE_sib;
+        readSIB(insn);
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      default:
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      }
+      break;
+    case 0x3:
+      insn->eaDisplacement = EA_DISP_NONE;
+      insn->eaBase = (EABase)(insn->eaRegBase + rm);
+      break;
+    }
+    break;
+  } /* switch (insn->addressSize) */
+  
+  return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix)            \
+  static uint8_t name(struct InternalInstruction *insn,   \
+                      OperandType type,                   \
+                      uint8_t index,                      \
+                      uint8_t *valid) {                   \
+    *valid = 1;                                           \
+    switch (type) {                                       \
+    default:                                              \
+      debug("Unhandled register type");                   \
+      *valid = 0;                                         \
+      return 0;                                           \
+    case TYPE_Rv:                                         \
+      return base + index;                                \
+    case TYPE_R8:                                         \
+      if (insn->rexPrefix &&                              \
+         index >= 4 && index <= 7) {                      \
+        return prefix##_SPL + (index - 4);                \
+      } else {                                            \
+        return prefix##_AL + index;                       \
+      }                                                   \
+    case TYPE_R16:                                        \
+      return prefix##_AX + index;                         \
+    case TYPE_R32:                                        \
+      return prefix##_EAX + index;                        \
+    case TYPE_R64:                                        \
+      return prefix##_RAX + index;                        \
+    case TYPE_XMM256:                                     \
+      return prefix##_YMM0 + index;                       \
+    case TYPE_XMM128:                                     \
+    case TYPE_XMM64:                                      \
+    case TYPE_XMM32:                                      \
+    case TYPE_XMM:                                        \
+      return prefix##_XMM0 + index;                       \
+    case TYPE_MM64:                                       \
+    case TYPE_MM32:                                       \
+    case TYPE_MM:                                         \
+      if (index > 7)                                      \
+        *valid = 0;                                       \
+      return prefix##_MM0 + index;                        \
+    case TYPE_SEGMENTREG:                                 \
+      if (index > 5)                                      \
+        *valid = 0;                                       \
+      return prefix##_ES + index;                         \
+    case TYPE_DEBUGREG:                                   \
+      if (index > 7)                                      \
+        *valid = 0;                                       \
+      return prefix##_DR0 + index;                        \
+    case TYPE_CONTROLREG:                                 \
+      if (index > 8)                                      \
+        *valid = 0;                                       \
+      return prefix##_CR0 + index;                        \
+    }                                                     \
+  }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ *   reg or R/M field.  If the operand is an XMM operand, for example, an
+ *   operand would be XMM0 instead of AX, which readModRM() would otherwise
+ *   misinterpret it as.
+ *
+ * @param insn  - The instruction containing the operand.
+ * @param type  - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t.  The target is set to 1 if the
+ *                field is valid for the register class; 0 if not.
+ * @return      - The proper value.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn  - See fixup*Value().
+ * @param op    - The operand specifier.
+ * @return      - 0 if fixup was successful; -1 if the register returned was
+ *                invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn, 
+                    const struct OperandSpecifier *op) {
+  uint8_t valid;
+  
+  dbgprintf(insn, "fixupReg()");
+  
+  switch ((OperandEncoding)op->encoding) {
+  default:
+    debug("Expected a REG or R/M encoding in fixupReg");
+    return -1;
+  case ENCODING_VVVV:
+    insn->vvvv = (Reg)fixupRegValue(insn,
+                                    (OperandType)op->type,
+                                    insn->vvvv,
+                                    &valid);
+    if (!valid)
+      return -1;
+    break;
+  case ENCODING_REG:
+    insn->reg = (Reg)fixupRegValue(insn,
+                                   (OperandType)op->type,
+                                   insn->reg - insn->regBase,
+                                   &valid);
+    if (!valid)
+      return -1;
+    break;
+  case ENCODING_RM:
+    if (insn->eaBase >= insn->eaRegBase) {
+      insn->eaBase = (EABase)fixupRMValue(insn,
+                                          (OperandType)op->type,
+                                          insn->eaBase - insn->eaRegBase,
+                                          &valid);
+      if (!valid)
+        return -1;
+    }
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readOpcodeModifier - Reads an operand from the opcode field of an 
+ *   instruction.  Handles AddRegFrm instructions.
+ *
+ * @param insn    - The instruction whose opcode field is to be read.
+ * @param inModRM - Indicates that the opcode field is to be read from the
+ *                  ModR/M extension; useful for escape opcodes
+ * @return        - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeModifier(struct InternalInstruction* insn) {
+  dbgprintf(insn, "readOpcodeModifier()");
+  
+  if (insn->consumedOpcodeModifier)
+    return 0;
+  
+  insn->consumedOpcodeModifier = TRUE;
+  
+  switch (insn->spec->modifierType) {
+  default:
+    debug("Unknown modifier type.");
+    return -1;
+  case MODIFIER_NONE:
+    debug("No modifier but an operand expects one.");
+    return -1;
+  case MODIFIER_OPCODE:
+    insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
+    return 0;
+  case MODIFIER_MODRM:
+    insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
+    return 0;
+  }  
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an 
+ *   instruction and interprets it appropriately given the operand width.
+ *   Handles AddRegFrm instructions.
+ *
+ * @param insn  - See readOpcodeModifier().
+ * @param size  - The width (in bytes) of the register being specified.
+ *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ *                RAX.
+ * @return      - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+  dbgprintf(insn, "readOpcodeRegister()");
+
+  if (readOpcodeModifier(insn))
+    return -1;
+  
+  if (size == 0)
+    size = insn->registerSize;
+  
+  switch (size) {
+  case 1:
+    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 
+                                                  | insn->opcodeModifier));
+    if (insn->rexPrefix && 
+        insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+        insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+      insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+                                   + (insn->opcodeRegister - MODRM_REG_AL - 4));
+    }
+      
+    break;
+  case 2:
+    insn->opcodeRegister = (Reg)(MODRM_REG_AX
+                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                    | insn->opcodeModifier));
+    break;
+  case 4:
+    insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                    | insn->opcodeModifier));
+    break;
+  case 8:
+    insn->opcodeRegister = (Reg)(MODRM_REG_RAX 
+                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                    | insn->opcodeModifier));
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ *   desired operand size.
+ *
+ * @param insn  - The instruction whose operand is to be read.
+ * @param size  - The width (in bytes) of the operand.
+ * @return      - 0 if the immediate was successfully consumed; nonzero
+ *                otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+  uint8_t imm8;
+  uint16_t imm16;
+  uint32_t imm32;
+  uint64_t imm64;
+  
+  dbgprintf(insn, "readImmediate()");
+  
+  if (insn->numImmediatesConsumed == 2) {
+    debug("Already consumed two immediates");
+    return -1;
+  }
+  
+  if (size == 0)
+    size = insn->immediateSize;
+  else
+    insn->immediateSize = size;
+  
+  switch (size) {
+  case 1:
+    if (consumeByte(insn, &imm8))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm8;
+    break;
+  case 2:
+    if (consumeUInt16(insn, &imm16))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm16;
+    break;
+  case 4:
+    if (consumeUInt32(insn, &imm32))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm32;
+    break;
+  case 8:
+    if (consumeUInt64(insn, &imm64))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm64;
+    break;
+  }
+  
+  insn->numImmediatesConsumed++;
+  
+  return 0;
+}
+
+/*
+ * readVVVV - Consumes an immediate operand from an instruction, given the
+ *   desired operand size.
+ *
+ * @param insn  - The instruction whose operand is to be read.
+ * @return      - 0 if the immediate was successfully consumed; nonzero
+ *                otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+  dbgprintf(insn, "readVVVV()");
+        
+  if (insn->vexSize == 3)
+    insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+  else if (insn->vexSize == 2)
+    insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+  else
+    return -1;
+
+  return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ *   operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn  - The instruction whose operands are to be read and interpreted.
+ * @return      - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+  int index;
+  
+  dbgprintf(insn, "readOperands()");
+  
+  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+    switch (insn->spec->operands[index].encoding) {
+    case ENCODING_NONE:
+      break;
+    case ENCODING_REG:
+    case ENCODING_RM:
+      if (readModRM(insn))
+        return -1;
+      if (fixupReg(insn, &insn->spec->operands[index]))
+        return -1;
+      break;
+    case ENCODING_CB:
+    case ENCODING_CW:
+    case ENCODING_CD:
+    case ENCODING_CP:
+    case ENCODING_CO:
+    case ENCODING_CT:
+      dbgprintf(insn, "We currently don't hande code-offset encodings");
+      return -1;
+    case ENCODING_IB:
+      if (readImmediate(insn, 1))
+        return -1;
+      if (insn->spec->operands[index].type == TYPE_IMM3 &&
+          insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+        return -1;
+      break;
+    case ENCODING_IW:
+      if (readImmediate(insn, 2))
+        return -1;
+      break;
+    case ENCODING_ID:
+      if (readImmediate(insn, 4))
+        return -1;
+      break;
+    case ENCODING_IO:
+      if (readImmediate(insn, 8))
+        return -1;
+      break;
+    case ENCODING_Iv:
+      if (readImmediate(insn, insn->immediateSize))
+        return -1;
+      break;
+    case ENCODING_Ia:
+      if (readImmediate(insn, insn->addressSize))
+        return -1;
+      break;
+    case ENCODING_RB:
+      if (readOpcodeRegister(insn, 1))
+        return -1;
+      break;
+    case ENCODING_RW:
+      if (readOpcodeRegister(insn, 2))
+        return -1;
+      break;
+    case ENCODING_RD:
+      if (readOpcodeRegister(insn, 4))
+        return -1;
+      break;
+    case ENCODING_RO:
+      if (readOpcodeRegister(insn, 8))
+        return -1;
+      break;
+    case ENCODING_Rv:
+      if (readOpcodeRegister(insn, 0))
+        return -1;
+      break;
+    case ENCODING_I:
+      if (readOpcodeModifier(insn))
+        return -1;
+      break;
+    case ENCODING_VVVV:
+      if (readVVVV(insn))
+        return -1;
+      if (fixupReg(insn, &insn->spec->operands[index]))
+        return -1;
+      break;
+    case ENCODING_DUP:
+      break;
+    default:
+      dbgprintf(insn, "Encountered an operand with an unknown encoding.");
+      return -1;
+    }
+  }
+  
+  return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ *   user.
+ *
+ * @param insn      - A pointer to the instruction to be populated.  Must be 
+ *                    pre-allocated.
+ * @param reader    - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ *                    any internal state.
+ * @param logger    - If non-NULL, the function to be used to write log messages
+ *                    and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ *                    any internal state.
+ * @param startLoc  - The address (in the reader's address space) of the first
+ *                    byte in the instruction.
+ * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ *                    decode the instruction in.
+ * @return          - 0 if the instruction's memory could be read; nonzero if
+ *                    not.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+                      byteReader_t reader,
+                      void* readerArg,
+                      dlog_t logger,
+                      void* loggerArg,
+                      uint64_t startLoc,
+                      DisassemblerMode mode) {
+  memset(insn, 0, sizeof(struct InternalInstruction));
+    
+  insn->reader = reader;
+  insn->readerArg = readerArg;
+  insn->dlog = logger;
+  insn->dlogArg = loggerArg;
+  insn->startLocation = startLoc;
+  insn->readerCursor = startLoc;
+  insn->mode = mode;
+  insn->numImmediatesConsumed = 0;
+  
+  if (readPrefixes(insn)       ||
+      readOpcode(insn)         ||
+      getID(insn)              ||
+      insn->instructionID == 0 ||
+      readOperands(insn))
+    return -1;
+  
+  insn->length = insn->readerCursor - insn->startLocation;
+  
+  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+            startLoc, insn->readerCursor, insn->length);
+    
+  if (insn->length > 15)
+    dbgprintf(insn, "Instruction exceeds 15-byte limit");
+  
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 000000000000..a9c90f8f9bda
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,575 @@
+/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the public interface of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef X86DISASSEMBLERDECODER_H
+#define X86DISASSEMBLERDECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+  
+#define INSTRUCTION_SPECIFIER_FIELDS  \
+  const char*             name;
+
+#define INSTRUCTION_IDS     \
+  const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+  
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+  
+/*
+ * Accessor functions for various fields of an Intel instruction
+ */
+#define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM)   ((modRM) & 0x7)
+#define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib)     ((sib) & 0x7)
+#define wFromREX(rex)        (((rex) & 0x8) >> 3)
+#define rFromREX(rex)        (((rex) & 0x4) >> 2)
+#define xFromREX(rex)        (((rex) & 0x2) >> 1)
+#define bFromREX(rex)        ((rex) & 0x1)
+    
+#define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
+#define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex)      ((vex) & 0x3)
+
+#define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex)      ((vex) & 0x3)
+
+/*
+ * These enums represent Intel registers for use by the decoder.
+ */
+
+#define REGS_8BIT     \
+  ENTRY(AL)           \
+  ENTRY(CL)           \
+  ENTRY(DL)           \
+  ENTRY(BL)           \
+  ENTRY(AH)           \
+  ENTRY(CH)           \
+  ENTRY(DH)           \
+  ENTRY(BH)           \
+  ENTRY(R8B)          \
+  ENTRY(R9B)          \
+  ENTRY(R10B)         \
+  ENTRY(R11B)         \
+  ENTRY(R12B)         \
+  ENTRY(R13B)         \
+  ENTRY(R14B)         \
+  ENTRY(R15B)         \
+  ENTRY(SPL)          \
+  ENTRY(BPL)          \
+  ENTRY(SIL)          \
+  ENTRY(DIL)
+
+#define EA_BASES_16BIT  \
+  ENTRY(BX_SI)          \
+  ENTRY(BX_DI)          \
+  ENTRY(BP_SI)          \
+  ENTRY(BP_DI)          \
+  ENTRY(SI)             \
+  ENTRY(DI)             \
+  ENTRY(BP)             \
+  ENTRY(BX)             \
+  ENTRY(R8W)            \
+  ENTRY(R9W)            \
+  ENTRY(R10W)           \
+  ENTRY(R11W)           \
+  ENTRY(R12W)           \
+  ENTRY(R13W)           \
+  ENTRY(R14W)           \
+  ENTRY(R15W)
+
+#define REGS_16BIT    \
+  ENTRY(AX)           \
+  ENTRY(CX)           \
+  ENTRY(DX)           \
+  ENTRY(BX)           \
+  ENTRY(SP)           \
+  ENTRY(BP)           \
+  ENTRY(SI)           \
+  ENTRY(DI)           \
+  ENTRY(R8W)          \
+  ENTRY(R9W)          \
+  ENTRY(R10W)         \
+  ENTRY(R11W)         \
+  ENTRY(R12W)         \
+  ENTRY(R13W)         \
+  ENTRY(R14W)         \
+  ENTRY(R15W)
+
+#define EA_BASES_32BIT  \
+  ENTRY(EAX)            \
+  ENTRY(ECX)            \
+  ENTRY(EDX)            \
+  ENTRY(EBX)            \
+  ENTRY(sib)            \
+  ENTRY(EBP)            \
+  ENTRY(ESI)            \
+  ENTRY(EDI)            \
+  ENTRY(R8D)            \
+  ENTRY(R9D)            \
+  ENTRY(R10D)           \
+  ENTRY(R11D)           \
+  ENTRY(R12D)           \
+  ENTRY(R13D)           \
+  ENTRY(R14D)           \
+  ENTRY(R15D)
+
+#define REGS_32BIT  \
+  ENTRY(EAX)        \
+  ENTRY(ECX)        \
+  ENTRY(EDX)        \
+  ENTRY(EBX)        \
+  ENTRY(ESP)        \
+  ENTRY(EBP)        \
+  ENTRY(ESI)        \
+  ENTRY(EDI)        \
+  ENTRY(R8D)        \
+  ENTRY(R9D)        \
+  ENTRY(R10D)       \
+  ENTRY(R11D)       \
+  ENTRY(R12D)       \
+  ENTRY(R13D)       \
+  ENTRY(R14D)       \
+  ENTRY(R15D)
+
+#define EA_BASES_64BIT  \
+  ENTRY(RAX)            \
+  ENTRY(RCX)            \
+  ENTRY(RDX)            \
+  ENTRY(RBX)            \
+  ENTRY(sib64)          \
+  ENTRY(RBP)            \
+  ENTRY(RSI)            \
+  ENTRY(RDI)            \
+  ENTRY(R8)             \
+  ENTRY(R9)             \
+  ENTRY(R10)            \
+  ENTRY(R11)            \
+  ENTRY(R12)            \
+  ENTRY(R13)            \
+  ENTRY(R14)            \
+  ENTRY(R15)
+
+#define REGS_64BIT  \
+  ENTRY(RAX)        \
+  ENTRY(RCX)        \
+  ENTRY(RDX)        \
+  ENTRY(RBX)        \
+  ENTRY(RSP)        \
+  ENTRY(RBP)        \
+  ENTRY(RSI)        \
+  ENTRY(RDI)        \
+  ENTRY(R8)         \
+  ENTRY(R9)         \
+  ENTRY(R10)        \
+  ENTRY(R11)        \
+  ENTRY(R12)        \
+  ENTRY(R13)        \
+  ENTRY(R14)        \
+  ENTRY(R15)
+
+#define REGS_MMX  \
+  ENTRY(MM0)      \
+  ENTRY(MM1)      \
+  ENTRY(MM2)      \
+  ENTRY(MM3)      \
+  ENTRY(MM4)      \
+  ENTRY(MM5)      \
+  ENTRY(MM6)      \
+  ENTRY(MM7)
+
+#define REGS_XMM  \
+  ENTRY(XMM0)     \
+  ENTRY(XMM1)     \
+  ENTRY(XMM2)     \
+  ENTRY(XMM3)     \
+  ENTRY(XMM4)     \
+  ENTRY(XMM5)     \
+  ENTRY(XMM6)     \
+  ENTRY(XMM7)     \
+  ENTRY(XMM8)     \
+  ENTRY(XMM9)     \
+  ENTRY(XMM10)    \
+  ENTRY(XMM11)    \
+  ENTRY(XMM12)    \
+  ENTRY(XMM13)    \
+  ENTRY(XMM14)    \
+  ENTRY(XMM15)
+
+#define REGS_YMM  \
+  ENTRY(YMM0)     \
+  ENTRY(YMM1)     \
+  ENTRY(YMM2)     \
+  ENTRY(YMM3)     \
+  ENTRY(YMM4)     \
+  ENTRY(YMM5)     \
+  ENTRY(YMM6)     \
+  ENTRY(YMM7)     \
+  ENTRY(YMM8)     \
+  ENTRY(YMM9)     \
+  ENTRY(YMM10)    \
+  ENTRY(YMM11)    \
+  ENTRY(YMM12)    \
+  ENTRY(YMM13)    \
+  ENTRY(YMM14)    \
+  ENTRY(YMM15)
+    
+#define REGS_SEGMENT \
+  ENTRY(ES)          \
+  ENTRY(CS)          \
+  ENTRY(SS)          \
+  ENTRY(DS)          \
+  ENTRY(FS)          \
+  ENTRY(GS)
+  
+#define REGS_DEBUG  \
+  ENTRY(DR0)        \
+  ENTRY(DR1)        \
+  ENTRY(DR2)        \
+  ENTRY(DR3)        \
+  ENTRY(DR4)        \
+  ENTRY(DR5)        \
+  ENTRY(DR6)        \
+  ENTRY(DR7)
+
+#define REGS_CONTROL  \
+  ENTRY(CR0)          \
+  ENTRY(CR1)          \
+  ENTRY(CR2)          \
+  ENTRY(CR3)          \
+  ENTRY(CR4)          \
+  ENTRY(CR5)          \
+  ENTRY(CR6)          \
+  ENTRY(CR7)          \
+  ENTRY(CR8)
+  
+#define ALL_EA_BASES  \
+  EA_BASES_16BIT      \
+  EA_BASES_32BIT      \
+  EA_BASES_64BIT
+  
+#define ALL_SIB_BASES \
+  REGS_32BIT          \
+  REGS_64BIT
+
+#define ALL_REGS      \
+  REGS_8BIT           \
+  REGS_16BIT          \
+  REGS_32BIT          \
+  REGS_64BIT          \
+  REGS_MMX            \
+  REGS_XMM            \
+  REGS_YMM            \
+  REGS_SEGMENT        \
+  REGS_DEBUG          \
+  REGS_CONTROL        \
+  ENTRY(RIP)
+
+/*
+ * EABase - All possible values of the base field for effective-address 
+ *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
+ *   distinguish between bases (EA_BASE_*) and registers that just happen to be
+ *   referred to when Mod == 0b11 (EA_REG_*).
+ */
+typedef enum {
+  EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+  ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+  ALL_REGS
+#undef ENTRY
+  EA_max
+} EABase;
+  
+/* 
+ * SIBIndex - All possible values of the SIB index field.
+ *   Borrows entries from ALL_EA_BASES with the special case that
+ *   sib is synonymous with NONE.
+ */
+typedef enum {
+  SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+  ALL_EA_BASES
+#undef ENTRY
+  SIB_INDEX_max
+} SIBIndex;
+  
+/*
+ * SIBBase - All possible values of the SIB base field.
+ */
+typedef enum {
+  SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+  ALL_SIB_BASES
+#undef ENTRY
+  SIB_BASE_max
+} SIBBase;
+
+/*
+ * EADisplacement - Possible displacement types for effective-address
+ *   computations.
+ */
+typedef enum {
+  EA_DISP_NONE,
+  EA_DISP_8,
+  EA_DISP_16,
+  EA_DISP_32
+} EADisplacement;
+
+/*
+ * Reg - All possible values of the reg field in the ModR/M byte.
+ */
+typedef enum {
+#define ENTRY(x) MODRM_REG_##x,
+  ALL_REGS
+#undef ENTRY
+  MODRM_REG_max
+} Reg;
+  
+/*
+ * SegmentOverride - All possible segment overrides.
+ */
+typedef enum {
+  SEG_OVERRIDE_NONE,
+  SEG_OVERRIDE_CS,
+  SEG_OVERRIDE_SS,
+  SEG_OVERRIDE_DS,
+  SEG_OVERRIDE_ES,
+  SEG_OVERRIDE_FS,
+  SEG_OVERRIDE_GS,
+  SEG_OVERRIDE_max
+} SegmentOverride;
+    
+/*
+ * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
+ */
+
+typedef enum {
+  VEX_LOB_0F = 0x1,
+  VEX_LOB_0F38 = 0x2,
+  VEX_LOB_0F3A = 0x3
+} VEXLeadingOpcodeByte;
+
+/*
+ * VEXPrefixCode - Possible values for the VEX.pp field
+ */
+
+typedef enum {
+  VEX_PREFIX_NONE = 0x0,
+  VEX_PREFIX_66 = 0x1,
+  VEX_PREFIX_F3 = 0x2,
+  VEX_PREFIX_F2 = 0x3
+} VEXPrefixCode;
+
+typedef uint8_t BOOL;
+
+/*
+ * byteReader_t - Type for the byte reader that the consumer must provide to
+ *   the decoder.  Reads a single byte from the instruction's address space.
+ * @param arg     - A baton that the consumer can associate with any internal
+ *                  state that it needs.
+ * @param byte    - A pointer to a single byte in memory that should be set to
+ *                  contain the value at address.
+ * @param address - The address in the instruction's address space that should
+ *                  be read from.
+ * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
+ */
+typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+
+/*
+ * dlog_t - Type for the logging function that the consumer can provide to
+ *   get debugging output from the decoder.
+ * @param arg     - A baton that the consumer can associate with any internal
+ *                  state that it needs.
+ * @param log     - A string that contains the message.  Will be reused after
+ *                  the logger returns.
+ */
+typedef void (*dlog_t)(void* arg, const char *log);
+
+/*
+ * The x86 internal instruction, which is produced by the decoder.
+ */
+struct InternalInstruction {
+  /* Reader interface (C) */
+  byteReader_t reader;
+  /* Opaque value passed to the reader */
+  void* readerArg;
+  /* The address of the next byte to read via the reader */
+  uint64_t readerCursor;
+
+  /* Logger interface (C) */
+  dlog_t dlog;
+  /* Opaque value passed to the logger */
+  void* dlogArg;
+
+  /* General instruction information */
+  
+  /* The mode to disassemble for (64-bit, protected, real) */
+  DisassemblerMode mode;
+  /* The start of the instruction, usable with the reader */
+  uint64_t startLocation;
+  /* The length of the instruction, in bytes */
+  size_t length;
+  
+  /* Prefix state */
+  
+  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+  uint8_t prefixPresent[0x100];
+  /* contains the location (for use with the reader) of the prefix byte */
+  uint64_t prefixLocations[0x100];
+  /* The value of the VEX prefix, if present */
+  uint8_t vexPrefix[3];
+  /* The length of the VEX prefix (0 if not present) */
+  uint8_t vexSize;
+  /* The value of the REX prefix, if present */
+  uint8_t rexPrefix;
+  /* The location where a mandatory prefix would have to be (i.e., right before
+     the opcode, or right before the REX prefix if one is present) */
+  uint64_t necessaryPrefixLocation;
+  /* The segment override type */
+  SegmentOverride segmentOverride;
+  
+  /* Sizes of various critical pieces of data, in bytes */
+  uint8_t registerSize;
+  uint8_t addressSize;
+  uint8_t displacementSize;
+  uint8_t immediateSize;
+  
+  /* opcode state */
+  
+  /* The value of the two-byte escape prefix (usually 0x0f) */
+  uint8_t twoByteEscape;
+  /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
+  uint8_t threeByteEscape;
+  /* The last byte of the opcode, not counting any ModR/M extension */
+  uint8_t opcode;
+  /* The ModR/M byte of the instruction, if it is an opcode extension */
+  uint8_t modRMExtension;
+  
+  /* decode state */
+  
+  /* The type of opcode, used for indexing into the array of decode tables */
+  OpcodeType opcodeType;
+  /* The instruction ID, extracted from the decode table */
+  uint16_t instructionID;
+  /* The specifier for the instruction, from the instruction info table */
+  const struct InstructionSpecifier *spec;
+  
+  /* state for additional bytes, consumed during operand decode.  Pattern:
+     consumed___ indicates that the byte was already consumed and does not
+     need to be consumed again */
+
+  /* The VEX.vvvv field, which contains a third register operand for some AVX
+     instructions */
+  Reg                           vvvv;
+  
+  /* The ModR/M byte, which contains most register operands and some portion of
+     all memory operands */
+  BOOL                          consumedModRM;
+  uint8_t                       modRM;
+  
+  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
+  BOOL                          consumedSIB;
+  uint8_t                       sib;
+
+  /* The displacement, used for memory operands */
+  BOOL                          consumedDisplacement;
+  int32_t                       displacement;
+  
+  /* Immediates.  There can be two in some cases */
+  uint8_t                       numImmediatesConsumed;
+  uint8_t                       numImmediatesTranslated;
+  uint64_t                      immediates[2];
+  
+  /* A register or immediate operand encoded into the opcode */
+  BOOL                          consumedOpcodeModifier;
+  uint8_t                       opcodeModifier;
+  Reg                           opcodeRegister;
+  
+  /* Portions of the ModR/M byte */
+  
+  /* These fields determine the allowable values for the ModR/M fields, which
+     depend on operand and address widths */
+  EABase                        eaBaseBase;
+  EABase                        eaRegBase;
+  Reg                           regBase;
+
+  /* The Mod and R/M fields can encode a base for an effective address, or a
+     register.  These are separated into two fields here */
+  EABase                        eaBase;
+  EADisplacement                eaDisplacement;
+  /* The reg field always encodes a register */
+  Reg                           reg;
+  
+  /* SIB state */
+  SIBIndex                      sibIndex;
+  uint8_t                       sibScale;
+  SIBBase                       sibBase;
+};
+
+/* decodeInstruction - Decode one instruction and store the decoding results in
+ *   a buffer provided by the consumer.
+ * @param insn      - The buffer to store the instruction in.  Allocated by the
+ *                    consumer.
+ * @param reader    - The byteReader_t for the bytes to be read.
+ * @param readerArg - An argument to pass to the reader for storing context
+ *                    specific to the consumer.  May be NULL.
+ * @param logger    - The dlog_t to be used in printing status messages from the
+ *                    disassembler.  May be NULL.
+ * @param loggerArg - An argument to pass to the logger for storing context
+ *                    specific to the logger.  May be NULL.
+ * @param startLoc  - The address (in the reader's address space) of the first
+ *                    byte in the instruction.
+ * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
+ * @return          - Nonzero if there was an error during decode, 0 otherwise.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+                      byteReader_t reader,
+                      void* readerArg,
+                      dlog_t logger,
+                      void* loggerArg,
+                      uint64_t startLoc,
+                      DisassemblerMode mode);
+
+/* x86DisassemblerDebug - C-accessible function for printing a message to
+ *   debugs()
+ * @param file  - The name of the file printing the debug message.
+ * @param line  - The line number that printed the debug message.
+ * @param s     - The message to print.
+ */
+  
+void x86DisassemblerDebug(const char *file,
+                          unsigned line,
+                          const char *s);
+
+#ifdef __cplusplus 
+}
+#endif
+  
+#endif
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 000000000000..70315ed572b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,379 @@
+/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains common definitions used by both the disassembler and the table
+ *  generator.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+/*
+ * This header file provides those definitions that need to be shared between
+ * the decoder and the table generator in a C-friendly manner.
+ */
+
+#ifndef X86DISASSEMBLERDECODERCOMMON_H
+#define X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/Support/DataTypes.h"
+
+#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM      x86DisassemblerContexts
+#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
+#define THREEBYTEA6_SYM   x86DisassemblerThreeByteA6Opcodes
+#define THREEBYTEA7_SYM   x86DisassemblerThreeByteA7Opcodes
+
+#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR      "x86DisassemblerContexts"
+#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
+#define THREEBYTEA6_STR   "x86DisassemblerThreeByteA6Opcodes"
+#define THREEBYTEA7_STR   "x86DisassemblerThreeByteA7Opcodes"
+
+/*
+ * Attributes of an instruction that must be known before the opcode can be
+ * processed correctly.  Most of these indicate the presence of particular
+ * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+ */
+#define ATTRIBUTE_BITS          \
+  ENUM_ENTRY(ATTR_NONE,   0x00) \
+  ENUM_ENTRY(ATTR_64BIT,  0x01) \
+  ENUM_ENTRY(ATTR_XS,     0x02) \
+  ENUM_ENTRY(ATTR_XD,     0x04) \
+  ENUM_ENTRY(ATTR_REXW,   0x08) \
+  ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
+  ENUM_ENTRY(ATTR_VEX,    0x20) \
+  ENUM_ENTRY(ATTR_VEXL,   0x40)
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+  ATTRIBUTE_BITS
+  ATTR_max
+};
+#undef ENUM_ENTRY
+
+/*
+ * Combinations of the above attributes that are relevant to instruction
+ * decode.  Although other combinations are possible, they can be reduced to
+ * these without affecting the ultimately decoded instruction.
+ */
+
+/*           Class name           Rank  Rationale for rank assignment         */
+#define INSTRUCTION_CONTEXTS                                                   \
+  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
+  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
+                                        "64-bit mode but no more")             \
+  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
+                                        "operands change width")               \
+  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
+                                        "but not the operands")                \
+  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
+                                        "but not the operands")                \
+  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
+                                        "change width; overrides IC_OPSIZE")   \
+  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
+  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
+                                        "secondary")                           \
+  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
+  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
+                                        "opcode")                              \
+  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
+                                        "IC_64BIT_REXW_XS")                    \
+  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
+                                        "else because this changes most "      \
+                                        "operands' meaning")                   \
+  ENUM_ENTRY(IC_VEX,                1,  "requires a VEX prefix")               \
+  ENUM_ENTRY(IC_VEX_XS,             2,  "requires VEX and the XS prefix")      \
+  ENUM_ENTRY(IC_VEX_XD,             2,  "requires VEX and the XD prefix")      \
+  ENUM_ENTRY(IC_VEX_OPSIZE,         2,  "requires VEX and the OpSize prefix")  \
+  ENUM_ENTRY(IC_VEX_W,              3,  "requires VEX and the W prefix")       \
+  ENUM_ENTRY(IC_VEX_W_XS,           4,  "requires VEX, W, and XS prefix")      \
+  ENUM_ENTRY(IC_VEX_W_XD,           4,  "requires VEX, W, and XD prefix")      \
+  ENUM_ENTRY(IC_VEX_W_OPSIZE,       4,  "requires VEX, W, and OpSize")         \
+  ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
+  ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
+  ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XS prefix")\
+  ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")
+
+
+#define ENUM_ENTRY(n, r, d) n,    
+typedef enum {
+  INSTRUCTION_CONTEXTS
+  IC_max
+} InstructionContext;
+#undef ENUM_ENTRY
+
+/*
+ * Opcode types, which determine which decode table to use, both in the Intel
+ * manual and also for the decoder.
+ */
+typedef enum {
+  ONEBYTE       = 0,
+  TWOBYTE       = 1,
+  THREEBYTE_38  = 2,
+  THREEBYTE_3A  = 3,
+  THREEBYTE_A6  = 4,
+  THREEBYTE_A7  = 5
+} OpcodeType;
+
+/*
+ * The following structs are used for the hierarchical decode table.  After
+ * determining the instruction's class (i.e., which IC_* constant applies to
+ * it), the decoder reads the opcode.  Some instructions require specific
+ * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+ *
+ * If a ModR/M byte is not required, "required" is left unset, and the values
+ * for each instructionID are identical.
+ */
+ 
+typedef uint16_t InstrUID;
+
+/*
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the 
+ * consumer to determine the number of entries in it.
+ *
+ * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+ *                  instruction is the same.
+ * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+ *                  corresponds to one instruction; otherwise, it corresponds to
+ *                  a different instruction.
+ * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
+ *                  to a different instruction.
+ */
+
+#define MODRMTYPES            \
+  ENUM_ENTRY(MODRM_ONEENTRY)  \
+  ENUM_ENTRY(MODRM_SPLITRM)   \
+  ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,    
+typedef enum {
+  MODRMTYPES
+  MODRM_max
+} ModRMDecisionType;
+#undef ENUM_ENTRY
+
+/*
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 
+ *  instruction each possible value of the ModR/M byte corresponds to.  Once
+ *  this information is known, we have narrowed down to a single instruction.
+ */
+struct ModRMDecision {
+  uint8_t     modrm_type;
+  
+  /* The macro below must be defined wherever this file is included. */
+  INSTRUCTION_IDS
+};
+
+/*
+ * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
+ *   given a particular opcode.
+ */
+struct OpcodeDecision {
+  struct ModRMDecision modRMDecisions[256];
+};
+
+/*
+ * ContextDecision - Specifies which opcode->instruction tables to look at given
+ *   a particular context (set of attributes).  Since there are many possible
+ *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
+ *   applies given a specific set of attributes.  Hence there are only IC_max
+ *   entries in this table, rather than 2^(ATTR_max).
+ */
+struct ContextDecision {
+  struct OpcodeDecision opcodeDecisions[IC_max];
+};
+
+/* 
+ * Physical encodings of instruction operands.
+ */
+
+#define ENCODINGS                                                              \
+  ENUM_ENTRY(ENCODING_NONE,   "")                                              \
+  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
+  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
+  ENUM_ENTRY(ENCODING_VVVV,   "Register operand in VEX.vvvv byte.")            \
+  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
+  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
+  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
+  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
+  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
+  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
+  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
+  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
+  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
+  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
+  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
+                              "the opcode byte")                               \
+  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
+  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
+  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
+  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
+                              "opcode byte")                                   \
+                                                                               \
+  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
+  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
+  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
+                              "opcode byte")                                   \
+  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
+                              "in type")
+
+#define ENUM_ENTRY(n, d) n,    
+  typedef enum {
+    ENCODINGS
+    ENCODING_max
+  } OperandEncoding;
+#undef ENUM_ENTRY
+
+/* 
+ * Semantic interpretations of instruction operands.
+ */
+
+#define TYPES                                                                  \
+  ENUM_ENTRY(TYPE_NONE,       "")                                              \
+  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
+  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
+  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
+  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
+  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
+  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
+  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
+  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
+  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
+  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
+  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
+  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
+  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
+  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
+  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
+  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
+  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
+  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
+  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
+  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
+  ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
+  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
+  ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
+  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
+  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
+  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
+  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
+  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
+  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
+  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
+  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
+                              "base)")                                         \
+  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
+  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
+  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
+  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
+                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
+  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
+  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
+  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
+  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
+                              "floating-point instructions")                   \
+  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
+  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
+  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
+  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
+  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
+  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
+  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
+  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
+  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
+  ENUM_ENTRY(TYPE_XMM256,     "32-byte")                                       \
+  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
+  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
+  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
+  ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
+                                                                               \
+  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
+  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
+  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
+  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
+  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
+  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
+  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
+  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
+  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
+  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,    
+typedef enum {
+  TYPES
+  TYPE_max
+} OperandType;
+#undef ENUM_ENTRY
+
+/* 
+ * OperandSpecifier - The specification for how to extract and interpret one
+ *   operand.
+ */
+struct OperandSpecifier {
+  OperandEncoding  encoding;
+  OperandType      type;
+};
+
+/*
+ * Indicates where the opcode modifier (if any) is to be found.  Extended
+ * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
+ */
+
+#define MODIFIER_TYPES        \
+  ENUM_ENTRY(MODIFIER_NONE)   \
+  ENUM_ENTRY(MODIFIER_OPCODE) \
+  ENUM_ENTRY(MODIFIER_MODRM)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+  MODIFIER_TYPES
+  MODIFIER_max
+} ModifierType;
+#undef ENUM_ENTRY
+
+#define X86_MAX_OPERANDS 5
+
+/*
+ * The specification for how to extract and interpret a full instruction and
+ * its operands.
+ */
+struct InstructionSpecifier {
+  ModifierType modifierType;
+  uint8_t modifierBase;
+  struct OperandSpecifier operands[X86_MAX_OPERANDS];
+  
+  /* The macro below must be defined wherever this file is included. */
+  INSTRUCTION_SPECIFIER_FIELDS
+};
+
+/*
+ * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
+ * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+ * respectively.
+ */
+typedef enum {
+  MODE_16BIT,
+  MODE_32BIT,
+  MODE_64BIT
+} DisassemblerMode;
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..033973eeeff9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmPrinter
+  X86ATTInstPrinter.cpp
+  X86IntelInstPrinter.cpp
+  X86InstComments.cpp
+  )
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
new file mode 100644
index 000000000000..c82aa330a20c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 000000000000..c37d8797b39c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,140 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <map>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter.inc"
+
+X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
+  : MCInstPrinter(MAI) {
+}
+
+void X86ATTInstPrinter::printRegName(raw_ostream &OS,
+                                     unsigned RegNo) const {
+  OS << '%' << getRegisterName(RegNo);
+}
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+  // Try to print any aliases first.
+  if (!printAliasInstr(MI, OS))
+    printInstruction(MI, OS);
+  
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+                                   raw_ostream &O) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls).  These
+/// print slightly differently than normal immediates.  For example, a $ is not
+/// emitted.
+void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    // Print this as a signed 32-bit value.
+    O << (int)Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << '%' << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << '$' << Op.getImm();
+    
+    if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+      *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
+    
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << '$' << *Op.getExpr();
+  }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                          raw_ostream &O) {
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  const MCOperand &SegReg = MI->getOperand(Op+4);
+  
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+4, O);
+    O << ':';
+  }
+  
+  if (DispSpec.isImm()) {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+      O << DispVal;
+  } else {
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    O << *DispSpec.getExpr();
+  }
+  
+  if (IndexReg.getReg() || BaseReg.getReg()) {
+    O << '(';
+    if (BaseReg.getReg())
+      printOperand(MI, Op, O);
+    
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2, O);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
new file mode 100644
index 000000000000..5426e5cf38d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,86 @@
+//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+  
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+  X86ATTInstPrinter(const MCAsmInfo &MAI);
+  
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+  virtual void printInst(const MCInst *MI, raw_ostream &OS);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+  // Autogenerated by tblgen, returns true if we successfully printed an
+  // alias.
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &OS);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+};
+  
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
new file mode 100644
index 000000000000..4e28dfe7fa81
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -0,0 +1,260 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "../Utils/X86ShuffleDecode.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired.  This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                                  const char *(*getRegName)(unsigned)) {
+  // If this is a shuffle operation, the switch should fill in this state.
+  SmallVector<unsigned, 8> ShuffleMask;
+  const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+  switch (MI->getOpcode()) {
+  case X86::INSERTPSrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+    break;
+
+  case X86::MOVLHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVLHPSMask(2, ShuffleMask);
+    break;
+
+  case X86::MOVHLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVHLPSMask(2, ShuffleMask);
+    break;
+
+  case X86::PSHUFDri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFDmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    break;
+
+  case X86::PSHUFHWri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFHWmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+  case X86::PSHUFLWri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFLWmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+
+  case X86::PUNPCKHBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHBWrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(16, ShuffleMask);
+    break;
+  case X86::PUNPCKHWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHWDrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(8, ShuffleMask);
+    break;
+  case X86::PUNPCKHDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(4, ShuffleMask);
+    break;
+  case X86::PUNPCKHQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHQDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(2, ShuffleMask);
+    break;
+
+  case X86::PUNPCKLBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLBWrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLBWMask(16, ShuffleMask);
+    break;
+  case X86::PUNPCKLWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLWDrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLWDMask(8, ShuffleMask);
+    break;
+  case X86::PUNPCKLDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLDQMask(4, ShuffleMask);
+    break;
+  case X86::PUNPCKLQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLQDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLQDQMask(2, ShuffleMask);
+    break;
+
+  case X86::SHUFPDrri:
+    DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    break;
+
+  case X86::SHUFPSrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::SHUFPSrmi:
+    DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::UNPCKLPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKLPDrm:
+    DecodeUNPCKLPDMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VUNPCKLPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPDrm:
+    DecodeUNPCKLPDMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKLPDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPDYrm:
+    DecodeUNPCKLPDMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::UNPCKLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKLPSrm:
+    DecodeUNPCKLPSMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VUNPCKLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPSrm:
+    DecodeUNPCKLPSMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKLPSYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPSYrm:
+    DecodeUNPCKLPSMask(8, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::UNPCKHPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKHPDrm:
+    DecodeUNPCKHPMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::UNPCKHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKHPSrm:
+    DecodeUNPCKHPMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  }
+
+
+  // If this was a shuffle operation, print the shuffle mask.
+  if (!ShuffleMask.empty()) {
+    if (DestName == 0) DestName = Src1Name;
+    OS << (DestName ? DestName : "mem") << " = ";
+
+    // If the two sources are the same, canonicalize the input elements to be
+    // from the first src so that we get larger element spans.
+    if (Src1Name == Src2Name) {
+      for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+        if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+            ShuffleMask[i] >= e)        // From second mask.
+          ShuffleMask[i] -= e;
+      }
+    }
+
+    // The shuffle mask specifies which elements of the src1/src2 fill in the
+    // destination, with a few sentinel values.  Loop through and print them
+    // out.
+    for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+      if (i != 0)
+        OS << ',';
+      if (ShuffleMask[i] == SM_SentinelZero) {
+        OS << "zero";
+        continue;
+      }
+
+      // Otherwise, it must come from src1 or src2.  Print the span of elements
+      // that comes from this src.
+      bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+      const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+      OS << (SrcName ? SrcName : "mem") << '[';
+      bool IsFirst = true;
+      while (i != e &&
+             (int)ShuffleMask[i] >= 0 &&
+             (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+        if (!IsFirst)
+          OS << ',';
+        else
+          IsFirst = false;
+        OS << ShuffleMask[i] % ShuffleMask.size();
+        ++i;
+      }
+      OS << ']';
+      --i;  // For loop increments element #.
+    }
+    //MI->print(OS, 0);
+    OS << "\n";
+  }
+
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
new file mode 100644
index 000000000000..6b86db4f9e5c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+  class MCInst;
+  class raw_ostream;
+  void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                              const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 000000000000..506e26cbf7cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,143 @@
+//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
+}
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+  printInstruction(MI, OS);
+  
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+                                     raw_ostream &O) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+  for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+    O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    PrintRegName(O, getRegisterName(Op.getReg()));
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                            raw_ostream &O) {
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  unsigned ScaleVal         = MI->getOperand(Op+1).getImm();
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  const MCOperand &SegReg   = MI->getOperand(Op+4);
+  
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+4, O);
+    O << ':';
+  }
+  
+  O << '[';
+  
+  bool NeedPlus = false;
+  if (BaseReg.getReg()) {
+    printOperand(MI, Op, O);
+    NeedPlus = true;
+  }
+  
+  if (IndexReg.getReg()) {
+    if (NeedPlus) O << " + ";
+    if (ScaleVal != 1)
+      O << ScaleVal << '*';
+    printOperand(MI, Op+2, O);
+    NeedPlus = true;
+  }
+  
+  
+  if (!DispSpec.isImm()) {
+    if (NeedPlus) O << " + ";
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    O << *DispSpec.getExpr();
+  } else {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+      if (NeedPlus) {
+        if (DispVal > 0)
+          O << " + ";
+        else {
+          O << " - ";
+          DispVal = -DispVal;
+        }
+      }
+      O << DispVal;
+    }
+  }
+  
+  O << ']';
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
new file mode 100644
index 000000000000..e84a1940017d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,96 @@
+//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCOperand;
+  
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+  X86IntelInstPrinter(const MCAsmInfo &MAI)
+    : MCInstPrinter(MAI) {}
+
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+  virtual void printInst(const MCInst *MI, raw_ostream &OS);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "OPAQUE PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "BYTE PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "WORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+};
+  
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..ca88f8ffd08c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMX86Desc
+  X86MCTargetDesc.cpp
+  X86MCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..b19774ee379e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
new file mode 100644
index 000000000000..27031005bd09
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -0,0 +1,138 @@
+//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+enum AsmWriterFlavorTy {
+  // Note: This numbering has to match the GCC assembler dialects for inline
+  // asm alternatives to work right.
+  ATT = 0, Intel = 1
+};
+
+static cl::opt<AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
+  cl::desc("Choose style of code to emit from X86 backend:"),
+  cl::values(clEnumValN(ATT,   "att",   "Emit AT&T-style assembly"),
+             clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
+             clEnumValEnd));
+
+
+static const char *const x86_asm_table[] = {
+  "{si}", "S",
+  "{di}", "D",
+  "{ax}", "a",
+  "{cx}", "c",
+  "{memory}", "memory",
+  "{flags}", "",
+  "{dirflag}", "",
+  "{fpsr}", "",
+  "{fpcr}", "",
+  "{cc}", "cc",
+  0,0};
+
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
+  bool is64Bit = T.getArch() == Triple::x86_64;
+  if (is64Bit)
+    PointerSize = 8;
+
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  TextAlignFillValue = 0x90;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;       // we can't emit a 64-bit unit
+
+  // Use ## as a comment string so that .s files generated by llvm can go
+  // through the GCC preprocessor without causing an error.  This is needed
+  // because "clang foo.s" runs the C preprocessor, which is usually reserved
+  // for .S files on other systems.  Perhaps this is because the file system
+  // wasn't always case preserving or something.
+  CommentString = "##";
+  PCSymbol = ".";
+
+  SupportsDebugInformation = true;
+  DwarfUsesInlineInfoSection = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+  : X86MCAsmInfoDarwin(Triple) {
+}
+
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+  if (T.getArch() == Triple::x86_64)
+    PointerSize = 8;
+
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  TextAlignFillValue = 0x90;
+
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  PCSymbol = ".";
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+
+  // Debug Information
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
+  // .words.
+  if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+    Data64bitsDirective = 0;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                                   unsigned Encoding,
+                                                   MCStreamer &Streamer) const {
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+  const MCExpr *Four = MCConstantExpr::Create(4, Context);
+  return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+const MCSection *X86ELFMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const {
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
+}
+
+X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+  if (Triple.getArch() == Triple::x86_64) {
+    GlobalPrefix = "";
+    PrivateGlobalPrefix = ".L";
+  }
+
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  TextAlignFillValue = 0x90;
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
new file mode 100644
index 000000000000..2cd4c8eb30ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -0,0 +1,46 @@
+//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+  class Triple;
+
+  struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit X86MCAsmInfoDarwin(const Triple &Triple);
+  };
+
+  struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+    explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+    virtual const MCExpr *
+    getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                unsigned Encoding,
+                                MCStreamer &Streamer) const;
+  };
+
+  struct X86ELFMCAsmInfo : public MCAsmInfo {
+    explicit X86ELFMCAsmInfo(const Triple &Triple);
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+  };
+
+  struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
+    explicit X86MCAsmInfoCOFF(const Triple &Triple);
+  };
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
new file mode 100644
index 000000000000..b77f37b03f19
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -0,0 +1,185 @@
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+
+#define GET_REGINFO_MC_DESC
+#include "X86GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+
+std::string X86_MC::ParseX86Triple(StringRef TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::x86_64)
+    return "+64bit-mode";
+  return "-64bit-mode";
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                             unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
+                               unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  std::string ArchFS = X86_MC::ParseX86Triple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS.str();
+    else
+      ArchFS = FS;
+  }
+
+  std::string CPUName = CPU;
+  if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+    CPUName = sys::getHostCPUName();
+#else
+    CPUName = "generic";
+#endif
+  }
+
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86MCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+                                          X86_MC::createX86MCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+                                          X86_MC::createX86MCSubtargetInfo);
+}
+
+static MCInstrInfo *createX86MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitX86MCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeX86MCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+}
+
+static MCRegisterInfo *createX86MCRegisterInfo() {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitX86MCRegisterInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeX86MCRegInfo() {
+  TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+}
+
+
+static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+    if (TheTriple.getArch() == Triple::x86_64)
+      return new X86_64MCAsmInfoDarwin(TheTriple);
+    else
+      return new X86MCAsmInfoDarwin(TheTriple);
+  }
+
+  if (TheTriple.isOSWindows())
+    return new X86MCAsmInfoCOFF(TheTriple);
+
+  return new X86ELFMCAsmInfo(TheTriple);
+}
+
+extern "C" void LLVMInitializeX86MCAsmInfo() {
+  // Register the target asm info.
+  RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
+  RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
new file mode 100644
index 000000000000..89ea22b31be2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -0,0 +1,60 @@
+//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCTARGETDESC_H
+#define X86MCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+namespace X86_MC {
+  std::string ParseX86Triple(StringRef TT);
+
+  /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+  /// the specified arguments.  If we can't run cpuid on the host, return true.
+  bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                       unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+
+  void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
+
+  /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+  /// This is exposed so Asm parser, etc. do not need to go through
+  /// TargetRegistry.
+  MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                            StringRef FS);
+}
+
+} // End llvm namespace
+
+
+// Defines symbolic names for X86 registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp b/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644
index 000000000000..13680c592e01
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp
@@ -0,0 +1,506 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+  // Basic reference counting.
+  unsigned Refs;
+
+  // Bitmask of available domains. For an open DomainValue, it is the still
+  // possible domains for collapsing. For a collapsed DomainValue it is the
+  // domains where the register is available for free.
+  unsigned AvailableDomains;
+
+  // Position of the last defining instruction.
+  unsigned Dist;
+
+  // Twiddleable instructions using or defining these registers.
+  SmallVector<MachineInstr*, 8> Instrs;
+
+  // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+  // track of the domains where the registers are already available.
+  bool isCollapsed() const { return Instrs.empty(); }
+
+  // Is domain available?
+  bool hasDomain(unsigned domain) const {
+    return AvailableDomains & (1u << domain);
+  }
+
+  // Mark domain as available.
+  void addDomain(unsigned domain) {
+    AvailableDomains |= 1u << domain;
+  }
+
+  // Restrict to a single domain available.
+  void setSingleDomain(unsigned domain) {
+    AvailableDomains = 1u << domain;
+  }
+
+  // Return bitmask of domains that are available and in mask.
+  unsigned getCommonDomains(unsigned mask) const {
+    return AvailableDomains & mask;
+  }
+
+  // First domain available.
+  unsigned getFirstDomain() const {
+    return CountTrailingZeros_32(AvailableDomains);
+  }
+
+  DomainValue() { clear(); }
+
+  void clear() {
+    Refs = AvailableDomains = Dist = 0;
+    Instrs.clear();
+  }
+};
+}
+
+static const unsigned NumRegs = 16;
+
+namespace {
+class SSEDomainFixPass : public MachineFunctionPass {
+  static char ID;
+  SpecificBumpPtrAllocator<DomainValue> Allocator;
+  SmallVector<DomainValue*,16> Avail;
+
+  MachineFunction *MF;
+  const X86InstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  MachineBasicBlock *MBB;
+  DomainValue **LiveRegs;
+  typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+  LiveOutMap LiveOuts;
+  unsigned Distance;
+
+public:
+  SSEDomainFixPass() : MachineFunctionPass(ID) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual const char *getPassName() const {
+    return "SSE execution domain fixup";
+  }
+
+private:
+  // Register mapping.
+  int RegIndex(unsigned Reg);
+
+  // DomainValue allocation.
+  DomainValue *Alloc(int domain = -1);
+  void Recycle(DomainValue*);
+
+  // LiveRegs manipulations.
+  void SetLiveReg(int rx, DomainValue *DV);
+  void Kill(int rx);
+  void Force(int rx, unsigned domain);
+  void Collapse(DomainValue *dv, unsigned domain);
+  bool Merge(DomainValue *A, DomainValue *B);
+
+  void enterBasicBlock();
+  void visitGenericInstr(MachineInstr*);
+  void visitSoftInstr(MachineInstr*, unsigned mask);
+  void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char SSEDomainFixPass::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+  assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+  reg -= X86::XMM0;
+  return reg < NumRegs ? (int) reg : -1;
+}
+
+DomainValue *SSEDomainFixPass::Alloc(int domain) {
+  DomainValue *dv = Avail.empty() ?
+                      new(Allocator.Allocate()) DomainValue :
+                      Avail.pop_back_val();
+  dv->Dist = Distance;
+  if (domain >= 0)
+    dv->addDomain(domain);
+  return dv;
+}
+
+void SSEDomainFixPass::Recycle(DomainValue *dv) {
+  assert(dv && "Cannot recycle NULL");
+  dv->clear();
+  Avail.push_back(dv);
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  if (!LiveRegs) {
+    LiveRegs = new DomainValue*[NumRegs];
+    std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
+  }
+
+  if (LiveRegs[rx] == dv)
+    return;
+  if (LiveRegs[rx]) {
+    assert(LiveRegs[rx]->Refs && "Bad refcount");
+    if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
+  }
+  LiveRegs[rx] = dv;
+  if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  if (!LiveRegs || !LiveRegs[rx]) return;
+
+  // Before killing the last reference to an open DomainValue, collapse it to
+  // the first available domain.
+  if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
+    Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
+  else
+    SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  DomainValue *dv;
+  if (LiveRegs && (dv = LiveRegs[rx])) {
+    if (dv->isCollapsed())
+      dv->addDomain(domain);
+    else if (dv->hasDomain(domain))
+      Collapse(dv, domain);
+    else {
+      // This is an incompatible open DomainValue. Collapse it to whatever and force
+      // the new value into domain. This costs a domain crossing.
+      Collapse(dv, dv->getFirstDomain());
+      assert(LiveRegs[rx] && "Not live after collapse?");
+      LiveRegs[rx]->addDomain(domain);
+    }
+  } else {
+    // Set up basic collapsed DomainValue.
+    SetLiveReg(rx, Alloc(domain));
+  }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+  assert(dv->hasDomain(domain) && "Cannot collapse");
+
+  // Collapse all the instructions.
+  while (!dv->Instrs.empty())
+    TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain);
+  dv->setSingleDomain(domain);
+
+  // If there are multiple users, give them new, unique DomainValues.
+  if (LiveRegs && dv->Refs > 1)
+    for (unsigned rx = 0; rx != NumRegs; ++rx)
+      if (LiveRegs[rx] == dv)
+        SetLiveReg(rx, Alloc(domain));
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+  assert(!A->isCollapsed() && "Cannot merge into collapsed");
+  assert(!B->isCollapsed() && "Cannot merge from collapsed");
+  if (A == B)
+    return true;
+  // Restrict to the domains that A and B have in common.
+  unsigned common = A->getCommonDomains(B->AvailableDomains);
+  if (!common)
+    return false;
+  A->AvailableDomains = common;
+  A->Dist = std::max(A->Dist, B->Dist);
+  A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+  for (unsigned rx = 0; rx != NumRegs; ++rx)
+    if (LiveRegs[rx] == B)
+      SetLiveReg(rx, A);
+  return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+  // Try to coalesce live-out registers from predecessors.
+  for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+         e = MBB->livein_end(); i != e; ++i) {
+    int rx = RegIndex(*i);
+    if (rx < 0) continue;
+    for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+           pe = MBB->pred_end(); pi != pe; ++pi) {
+      LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+      if (fi == LiveOuts.end()) continue;
+      DomainValue *pdv = fi->second[rx];
+      if (!pdv) continue;
+      if (!LiveRegs || !LiveRegs[rx]) {
+        SetLiveReg(rx, pdv);
+        continue;
+      }
+
+      // We have a live DomainValue from more than one predecessor.
+      if (LiveRegs[rx]->isCollapsed()) {
+        // We are already collapsed, but predecessor is not. Force him.
+        unsigned domain = LiveRegs[rx]->getFirstDomain();
+        if (!pdv->isCollapsed() && pdv->hasDomain(domain))
+          Collapse(pdv, domain);
+        continue;
+      }
+
+      // Currently open, merge in predecessor.
+      if (!pdv->isCollapsed())
+        Merge(LiveRegs[rx], pdv);
+      else
+        Force(rx, pdv->getFirstDomain());
+    }
+  }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+  // Collapse all uses.
+  for (unsigned i = mi->getDesc().getNumDefs(),
+                e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Force(rx, domain);
+  }
+
+  // Kill all defs and force them.
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+    Force(rx, domain);
+  }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+  // Bitmask of available domains for this instruction after taking collapsed
+  // operands into account.
+  unsigned available = mask;
+
+  // Scan the explicit use operands for incoming domains.
+  SmallVector<int, 4> used;
+  if (LiveRegs)
+    for (unsigned i = mi->getDesc().getNumDefs(),
+                  e = mi->getDesc().getNumOperands(); i != e; ++i) {
+      MachineOperand &mo = mi->getOperand(i);
+      if (!mo.isReg()) continue;
+      int rx = RegIndex(mo.getReg());
+      if (rx < 0) continue;
+      if (DomainValue *dv = LiveRegs[rx]) {
+        // Bitmask of domains that dv and available have in common.
+        unsigned common = dv->getCommonDomains(available);
+        // Is it possible to use this collapsed register for free?
+        if (dv->isCollapsed()) {
+          // Restrict available domains to the ones in common with the operand.
+          // If there are no common domains, we must pay the cross-domain 
+          // penalty for this operand.
+          if (common) available = common;
+        } else if (common)
+          // Open DomainValue is compatible, save it for merging.
+          used.push_back(rx);
+        else
+          // Open DomainValue is not compatible with instruction. It is useless
+          // now.
+          Kill(rx);
+      }
+    }
+
+  // If the collapsed operands force a single domain, propagate the collapse.
+  if (isPowerOf2_32(available)) {
+    unsigned domain = CountTrailingZeros_32(available);
+    TII->SetSSEDomain(mi, domain);
+    visitHardInstr(mi, domain);
+    return;
+  }
+
+  // Kill off any remaining uses that don't match available, and build a list of
+  // incoming DomainValues that we want to merge.
+  SmallVector<DomainValue*,4> doms;
+  for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+    int rx = *i;
+    DomainValue *dv = LiveRegs[rx];
+    // This useless DomainValue could have been missed above.
+    if (!dv->getCommonDomains(available)) {
+      Kill(*i);
+      continue;
+    }
+    // sorted, uniqued insert.
+    bool inserted = false;
+    for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+           i != e && !inserted; ++i) {
+      if (dv == *i)
+        inserted = true;
+      else if (dv->Dist < (*i)->Dist) {
+        inserted = true;
+        doms.insert(i, dv);
+      }
+    }
+    if (!inserted)
+      doms.push_back(dv);
+  }
+
+  // doms are now sorted in order of appearance. Try to merge them all, giving
+  // priority to the latest ones.
+  DomainValue *dv = 0;
+  while (!doms.empty()) {
+    if (!dv) {
+      dv = doms.pop_back_val();
+      continue;
+    }
+
+    DomainValue *latest = doms.pop_back_val();
+    if (Merge(dv, latest)) continue;
+
+    // If latest didn't merge, it is useless now. Kill all registers using it.
+    for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+      if (LiveRegs[*i] == latest)
+        Kill(*i);
+  }
+
+  // dv is the DomainValue we are going to use for this instruction.
+  if (!dv)
+    dv = Alloc();
+  dv->Dist = Distance;
+  dv->AvailableDomains = available;
+  dv->Instrs.push_back(mi);
+
+  // Finally set all defs and non-collapsed uses to dv.
+  for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+      Kill(rx);
+      SetLiveReg(rx, dv);
+    }
+  }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+  // Process explicit defs, kill any XMM registers redefined.
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+  }
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+  TRI = MF->getTarget().getRegisterInfo();
+  MBB = 0;
+  LiveRegs = 0;
+  Distance = 0;
+  assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
+
+  // If no XMM registers are used in the function, we can skip it completely.
+  bool anyregs = false;
+  for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
+         E = X86::VR128RegClass.end(); I != E; ++I)
+    if (MF->getRegInfo().isPhysRegUsed(*I)) {
+      anyregs = true;
+      break;
+    }
+  if (!anyregs) return false;
+
+  MachineBasicBlock *Entry = MF->begin();
+  SmallPtrSet<MachineBasicBlock*, 16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
+         DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+         DFI != DFE; ++DFI) {
+    MBB = *DFI;
+    enterBasicBlock();
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+        ++I) {
+      MachineInstr *mi = I;
+      if (mi->isDebugValue()) continue;
+      ++Distance;
+      std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+      if (domp.first)
+        if (domp.second)
+          visitSoftInstr(mi, domp.second);
+        else
+          visitHardInstr(mi, domp.first);
+      else if (LiveRegs)
+        visitGenericInstr(mi);
+    }
+
+    // Save live registers at end of MBB - used by enterBasicBlock().
+    if (LiveRegs)
+      LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+    LiveRegs = 0;
+  }
+
+  // Clear the LiveOuts vectors. Should we also collapse any remaining
+  // DomainValues?
+  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+         i != e; ++i)
+    delete[] i->second;
+  LiveOuts.clear();
+  Avail.clear();
+  Allocator.DestroyAll();
+
+  return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+  return new SSEDomainFixPass();
+}
diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
new file mode 100644
index 000000000000..08d4d84f8a8a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+
+extern "C" void LLVMInitializeX86TargetInfo() { 
+  RegisterTarget<Triple::x86, /*HasJIT=*/true>
+    X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+
+  RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
+    Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+}
diff --git a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
new file mode 100644
index 000000000000..3ad5f991c865
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Utils
+  X86ShuffleDecode.cpp
+  )
+add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/Utils/Makefile b/contrib/llvm/lib/Target/X86/Utils/Makefile
new file mode 100644
index 000000000000..1df6f0f561d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Utils
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
new file mode 100644
index 000000000000..cd06060748b7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -0,0 +1,190 @@
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+
+//===----------------------------------------------------------------------===//
+//  Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+  // Defaults the copying the dest value.
+  ShuffleMask.push_back(0);
+  ShuffleMask.push_back(1);
+  ShuffleMask.push_back(2);
+  ShuffleMask.push_back(3);
+
+  // Decode the immediate.
+  unsigned ZMask = Imm & 15;
+  unsigned CountD = (Imm >> 4) & 3;
+  unsigned CountS = (Imm >> 6) & 3;
+
+  // CountS selects which input element to use.
+  unsigned InVal = 4+CountS;
+  // CountD specifies which element of destination to update.
+  ShuffleMask[CountD] = InVal;
+  // ZMask zaps values, potentially overriding the CountD elt.
+  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = NElts/2; i != NElts; ++i)
+    ShuffleMask.push_back(NElts+i);
+
+  for (unsigned i = NElts/2; i != NElts; ++i)
+    ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i)
+    ShuffleMask.push_back(i);
+
+  for (unsigned i = 0; i != NElts/2; ++i)
+    ShuffleMask.push_back(NElts+i);
+}
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts; ++i) {
+    ShuffleMask.push_back(Imm % NElts);
+    Imm /= NElts;
+  }
+}
+
+void DecodePSHUFHWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  ShuffleMask.push_back(0);
+  ShuffleMask.push_back(1);
+  ShuffleMask.push_back(2);
+  ShuffleMask.push_back(3);
+  for (unsigned i = 0; i != 4; ++i) {
+    ShuffleMask.push_back(4+(Imm & 3));
+    Imm >>= 2;
+  }
+}
+
+void DecodePSHUFLWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != 4; ++i) {
+    ShuffleMask.push_back((Imm & 3));
+    Imm >>= 2;
+  }
+  ShuffleMask.push_back(4);
+  ShuffleMask.push_back(5);
+  ShuffleMask.push_back(6);
+  ShuffleMask.push_back(7);
+}
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+                          SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(VT, ShuffleMask);
+}
+
+void DecodePUNPCKHMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i+NElts/2);
+    ShuffleMask.push_back(i+NElts+NElts/2);
+  }
+}
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+                      SmallVectorImpl<unsigned> &ShuffleMask) {
+  // Part that reads from dest.
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(Imm % NElts);
+    Imm /= NElts;
+  }
+  // Part that reads from src.
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(Imm % NElts + NElts);
+    Imm /= NElts;
+  }
+}
+
+void DecodeUNPCKHPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i+NElts/2);        // Reads from dest
+    ShuffleMask.push_back(i+NElts+NElts/2);  // Reads from src
+  }
+}
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc.  VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElts / NumSections;
+
+  unsigned Start = 0;
+  unsigned End = NumSectionElts / 2;
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = Start; i != End; ++i) {
+      ShuffleMask.push_back(i);                 // Reads from dest/src1
+      ShuffleMask.push_back(i+NumSectionElts);  // Reads from src/src2
+    }
+    // Process the next 128 bits.
+    Start += NumSectionElts;
+    End += NumSectionElts;
+  }
+}
+
+} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 000000000000..b18f67033096
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,87 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+//===----------------------------------------------------------------------===//
+//  Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+  SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+                          SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+                      SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc.  VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
new file mode 100644
index 000000000000..ec52dfb3e7d1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -0,0 +1,91 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class JITCodeEmitter;
+class MachineCodeEmitter;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCSubtargetInfo;
+class Target;
+class TargetAsmBackend;
+class X86TargetMachine;
+class formatted_raw_ostream;
+class raw_ostream;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a 
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM,
+                               CodeGenOpt::Level OptLevel);
+
+/// createGlobalBaseRegPass - This pass initializes a global base
+/// register for PIC on x86-32.
+FunctionPass* createGlobalBaseRegPass();
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+                                          JITCodeEmitter &JCE);
+
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCSubtargetInfo &STI,
+                                      MCContext &Ctx);
+
+TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
+TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
+/// which determines whether the frame pointer register should be
+/// reserved in case dynamic stack alignment is later required.
+///
+FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+                                          bool Is64Bit,
+                                          uint32_t CPUType,
+                                          uint32_t CPUSubtype);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
new file mode 100644
index 000000000000..4ccb43fe18cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -0,0 +1,234 @@
+//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, referred to
+// here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget state.
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+                                  "64-bit mode (x86_64)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
+                                      "Enable conditional move instructions">;
+
+def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+                                       "Support POPCNT instruction">;
+
+
+def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+                                      "Enable MMX instructions">;
+def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+                                      "Enable SSE instructions",
+                                      // SSE codegen depends on cmovs, and all
+                                      // SSE1+ processors support them.
+                                      [FeatureMMX, FeatureCMOV]>;
+def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+                                      "Enable SSE2 instructions",
+                                      [FeatureSSE1]>;
+def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+                                      "Enable SSE3 instructions",
+                                      [FeatureSSE2]>;
+def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+                                      "Enable SSSE3 instructions",
+                                      [FeatureSSE3]>;
+def FeatureSSE41   : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+                                      "Enable SSE 4.1 instructions",
+                                      [FeatureSSSE3]>;
+def FeatureSSE42   : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+                                      "Enable SSE 4.2 instructions",
+                                      [FeatureSSE41, FeaturePOPCNT]>;
+def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+                                      "Enable 3DNow! instructions",
+                                      [FeatureMMX]>;
+def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+                                      "Enable 3DNow! Athlon instructions",
+                                      [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
+                                      "Support 64-bit instructions",
+                                      [FeatureCMOV]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+                                       "Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+                                        "IsUAMemFast", "true",
+                                        "Fast unaligned memory access">;
+def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+                                      "Support SSE 4a instructions",
+                                      [FeaturePOPCNT]>;
+
+def FeatureAVX     : SubtargetFeature<"avx", "HasAVX", "true",
+                                      "Enable AVX instructions">;
+def FeatureCLMUL   : SubtargetFeature<"clmul", "HasCLMUL", "true",
+                               "Enable carry-less multiplication instructions">;
+def FeatureFMA3    : SubtargetFeature<"fma3", "HasFMA3", "true",
+                                     "Enable three-operand fused multiple-add">;
+def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
+                                      "Enable four-operand fused multiple-add">;
+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
+                                          "HasVectorUAMem", "true",
+                 "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
+                                      "Enable AES instructions">;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+def : Proc<"i386",            []>;
+def : Proc<"i486",            []>;
+def : Proc<"i586",            []>;
+def : Proc<"pentium",         []>;
+def : Proc<"pentium-mmx",     [FeatureMMX]>;
+def : Proc<"i686",            []>;
+def : Proc<"pentiumpro",      [FeatureCMOV]>;
+def : Proc<"pentium2",        [FeatureMMX, FeatureCMOV]>;
+def : Proc<"pentium3",        [FeatureSSE1]>;
+def : Proc<"pentium3m",       [FeatureSSE1, FeatureSlowBTMem]>;
+def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"pentium4",        [FeatureSSE2]>;
+def : Proc<"pentium4m",       [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"x86-64",          [FeatureSSE2,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona",          [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+// "Arrandale" along with corei3 and corei5
+def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem, FeatureAES]>;
+def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem]>;
+// Westmere is a similar machine to nehalem with some additional features.
+// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
+def : Proc<"westmere",        [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"corei7-avx",      [FeatureSSE42, Feature64Bit,
+                               FeatureAES, FeatureCLMUL]>;
+
+def : Proc<"k6",              [FeatureMMX]>;
+def : Proc<"k6-2",            [Feature3DNow]>;
+def : Proc<"k6-3",            [Feature3DNow]>;
+def : Proc<"athlon",          [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird",    [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4",        [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8",              [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"opteron",         [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"athlon64",        [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"athlon-fx",       [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"k8-sse3",         [FeatureSSE3,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"opteron-sse3",    [FeatureSSE3,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"athlon64-sse3",   [FeatureSSE3,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"amdfam10",        [FeatureSSE3,   FeatureSSE4A,
+                               Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"barcelona",       [FeatureSSE3,   FeatureSSE4A,
+                               Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"istanbul",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
+def : Proc<"shanghai",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
+
+def : Proc<"winchip-c6",      [FeatureMMX]>;
+def : Proc<"winchip2",        [Feature3DNow]>;
+def : Proc<"c3",              [Feature3DNow]>;
+def : Proc<"c3-2",            [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Parser
+//===----------------------------------------------------------------------===//
+
+// Currently the X86 assembly parser only supports ATT syntax.
+def ATTAsmParser : AsmParser {
+  string AsmParserClassName = "ATTAsmParser";
+  int Variant = 0;
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = "#";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "%";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "ATTInstPrinter";
+  int Variant = 0;
+  bit isMCAsmWriter = 1;
+}
+def IntelAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "IntelInstPrinter";
+  int Variant = 1;
+  bit isMCAsmWriter = 1;
+}
+
+def X86 : Target {
+  // Information about the instructions...
+  let InstructionSet = X86InstrInfo;
+
+  let AssemblyParsers = [ATTAsmParser];
+
+  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
new file mode 100644
index 000000000000..9b556a55efd9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
@@ -0,0 +1,453 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+         cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default: assert(0 && "invalid fixup kind!");
+  case FK_PCRel_1:
+  case FK_Data_1: return 0;
+  case FK_PCRel_2:
+  case FK_Data_2: return 1;
+  case FK_PCRel_4:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case X86::reloc_global_offset_table:
+  case FK_Data_4: return 2;
+  case FK_PCRel_8:
+  case FK_Data_8: return 3;
+  }
+}
+
+namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+                     bool HasRelocationAddend)
+    : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
+class X86AsmBackend : public TargetAsmBackend {
+public:
+  X86AsmBackend(const Target &T)
+    : TargetAsmBackend() {}
+
+  unsigned getNumFixupKinds() const {
+    return X86::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+      { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+      { "reloc_signed_4byte", 0, 4 * 8, 0},
+      { "reloc_global_offset_table", 0, 4 * 8, 0}
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const {
+    unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
+
+    assert(Fixup.getOffset() + Size <= DataSize &&
+           "Invalid fixup offset!");
+    for (unsigned i = 0; i != Size; ++i)
+      Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+};
+} // end anonymous namespace
+
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
+  switch (Op) {
+  default:
+    return Op;
+
+  case X86::JAE_1: return X86::JAE_4;
+  case X86::JA_1:  return X86::JA_4;
+  case X86::JBE_1: return X86::JBE_4;
+  case X86::JB_1:  return X86::JB_4;
+  case X86::JE_1:  return X86::JE_4;
+  case X86::JGE_1: return X86::JGE_4;
+  case X86::JG_1:  return X86::JG_4;
+  case X86::JLE_1: return X86::JLE_4;
+  case X86::JL_1:  return X86::JL_4;
+  case X86::JMP_1: return X86::JMP_4;
+  case X86::JNE_1: return X86::JNE_4;
+  case X86::JNO_1: return X86::JNO_4;
+  case X86::JNP_1: return X86::JNP_4;
+  case X86::JNS_1: return X86::JNS_4;
+  case X86::JO_1:  return X86::JO_4;
+  case X86::JP_1:  return X86::JP_4;
+  case X86::JS_1:  return X86::JS_4;
+  }
+}
+
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+  switch (Op) {
+  default:
+    return Op;
+
+    // IMUL
+  case X86::IMUL16rri8: return X86::IMUL16rri;
+  case X86::IMUL16rmi8: return X86::IMUL16rmi;
+  case X86::IMUL32rri8: return X86::IMUL32rri;
+  case X86::IMUL32rmi8: return X86::IMUL32rmi;
+  case X86::IMUL64rri8: return X86::IMUL64rri32;
+  case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+    // AND
+  case X86::AND16ri8: return X86::AND16ri;
+  case X86::AND16mi8: return X86::AND16mi;
+  case X86::AND32ri8: return X86::AND32ri;
+  case X86::AND32mi8: return X86::AND32mi;
+  case X86::AND64ri8: return X86::AND64ri32;
+  case X86::AND64mi8: return X86::AND64mi32;
+
+    // OR
+  case X86::OR16ri8: return X86::OR16ri;
+  case X86::OR16mi8: return X86::OR16mi;
+  case X86::OR32ri8: return X86::OR32ri;
+  case X86::OR32mi8: return X86::OR32mi;
+  case X86::OR64ri8: return X86::OR64ri32;
+  case X86::OR64mi8: return X86::OR64mi32;
+
+    // XOR
+  case X86::XOR16ri8: return X86::XOR16ri;
+  case X86::XOR16mi8: return X86::XOR16mi;
+  case X86::XOR32ri8: return X86::XOR32ri;
+  case X86::XOR32mi8: return X86::XOR32mi;
+  case X86::XOR64ri8: return X86::XOR64ri32;
+  case X86::XOR64mi8: return X86::XOR64mi32;
+
+    // ADD
+  case X86::ADD16ri8: return X86::ADD16ri;
+  case X86::ADD16mi8: return X86::ADD16mi;
+  case X86::ADD32ri8: return X86::ADD32ri;
+  case X86::ADD32mi8: return X86::ADD32mi;
+  case X86::ADD64ri8: return X86::ADD64ri32;
+  case X86::ADD64mi8: return X86::ADD64mi32;
+
+    // SUB
+  case X86::SUB16ri8: return X86::SUB16ri;
+  case X86::SUB16mi8: return X86::SUB16mi;
+  case X86::SUB32ri8: return X86::SUB32ri;
+  case X86::SUB32mi8: return X86::SUB32mi;
+  case X86::SUB64ri8: return X86::SUB64ri32;
+  case X86::SUB64mi8: return X86::SUB64mi32;
+
+    // CMP
+  case X86::CMP16ri8: return X86::CMP16ri;
+  case X86::CMP16mi8: return X86::CMP16mi;
+  case X86::CMP32ri8: return X86::CMP32ri;
+  case X86::CMP32mi8: return X86::CMP32mi;
+  case X86::CMP64ri8: return X86::CMP64ri32;
+  case X86::CMP64mi8: return X86::CMP64mi32;
+
+    // PUSH
+  case X86::PUSHi8: return X86::PUSHi32;
+  case X86::PUSHi16: return X86::PUSHi32;
+  case X86::PUSH64i8: return X86::PUSH64i32;
+  case X86::PUSH64i16: return X86::PUSH64i32;
+  }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+  unsigned R = getRelaxedOpcodeArith(Op);
+  if (R != Op)
+    return R;
+  return getRelaxedOpcodeBranch(Op);
+}
+
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // Branches can always be relaxed.
+  if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+    return true;
+
+  if (MCDisableArithRelaxation)
+    return false;
+
+  // Check if this instruction is ever relaxable.
+  if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
+    return false;
+
+
+  // Check if it has an expression and is not RIP relative.
+  bool hasExp = false;
+  bool hasRIP = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+    const MCOperand &Op = Inst.getOperand(i);
+    if (Op.isExpr())
+      hasExp = true;
+
+    if (Op.isReg() && Op.getReg() == X86::RIP)
+      hasRIP = true;
+  }
+
+  // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+  // how we do relaxations?
+  return hasExp && !hasRIP;
+}
+
+// FIXME: Can tblgen help at all here to verify there aren't other instructions
+// we can relax?
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
+  unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+  if (RelaxedOp == Inst.getOpcode()) {
+    SmallString<256> Tmp;
+    raw_svector_ostream OS(Tmp);
+    Inst.dump_pretty(OS);
+    OS << "\n";
+    report_fatal_error("unexpected instruction to relax: " + OS.str());
+  }
+
+  Res = Inst;
+  Res.setOpcode(RelaxedOp);
+}
+
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes.  This returns the number of bytes written.  It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  static const uint8_t Nops[10][10] = {
+    // nop
+    {0x90},
+    // xchg %ax,%ax
+    {0x66, 0x90},
+    // nopl (%[re]ax)
+    {0x0f, 0x1f, 0x00},
+    // nopl 0(%[re]ax)
+    {0x0f, 0x1f, 0x40, 0x00},
+    // nopl 0(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopw 0(%[re]ax,%[re]ax,1)
+    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopl 0L(%[re]ax)
+    {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+    // nopl 0L(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    // nopw 0L(%[re]ax,%[re]ax,1)
+    {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    // nopw %cs:0L(%[re]ax,%[re]ax,1)
+    {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+  };
+
+  // Write an optimal sequence for the first 15 bytes.
+  const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+  const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+  for (uint64_t i = 0, e = Prefixes; i != e; i++)
+    OW->Write8(0x66);
+  const uint64_t Rest = OptimalCount - Prefixes;
+  for (uint64_t i = 0, e = Rest; i != e; i++)
+    OW->Write8(Nops[Rest - 1][i]);
+
+  // Finish with single byte nops.
+  for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
+   OW->Write8(0x90);
+
+  return true;
+}
+
+/* *** */
+
+namespace {
+class ELFX86AsmBackend : public X86AsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+    : X86AsmBackend(T), OSType(_OSType) {
+    HasReliableSymbolDifference = true;
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+    return ES.getFlags() & ELF::SHF_MERGE;
+  }
+};
+
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+  ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(createELFObjectTargetWriter(),
+                                 OS, /*IsLittleEndian*/ true);
+  }
+
+  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+    return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+  }
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+  ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(createELFObjectTargetWriter(),
+                                 OS, /*IsLittleEndian*/ true);
+  }
+
+  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+    return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+  }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+  bool Is64Bit;
+
+public:
+  WindowsX86AsmBackend(const Target &T, bool is64Bit)
+    : X86AsmBackend(T)
+    , Is64Bit(is64Bit) {
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createWinCOFFObjectWriter(OS, Is64Bit);
+  }
+};
+
+class DarwinX86AsmBackend : public X86AsmBackend {
+public:
+  DarwinX86AsmBackend(const Target &T)
+    : X86AsmBackend(T) { }
+};
+
+class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+public:
+  DarwinX86_32AsmBackend(const Target &T)
+    : DarwinX86AsmBackend(T) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_i386,
+                                     object::mach::CSX86_ALL);
+  }
+};
+
+class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+public:
+  DarwinX86_64AsmBackend(const Target &T)
+    : DarwinX86AsmBackend(T) {
+    HasReliableSymbolDifference = true;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+                                     object::mach::CTM_x86_64,
+                                     object::mach::CSX86_ALL);
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    // Temporary labels in the string literals sections require symbols. The
+    // issue is that the x86_64 relocation format does not allow symbol +
+    // offset, and so the linker does not have enough information to resolve the
+    // access to the appropriate atom unless an external relocation is used. For
+    // non-cstring sections, we expect the compiler to use a non-temporary label
+    // for anything that could have an addend pointing outside the symbol.
+    //
+    // See <rdar://problem/4765733>.
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
+  }
+
+  virtual bool isSectionAtomizable(const MCSection &Section) const {
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+    switch (SMO.getType()) {
+    default:
+      return true;
+
+    case MCSectionMachO::S_4BYTE_LITERALS:
+    case MCSectionMachO::S_8BYTE_LITERALS:
+    case MCSectionMachO::S_16BYTE_LITERALS:
+    case MCSectionMachO::S_LITERAL_POINTERS:
+    case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+    case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+    case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+    case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+    case MCSectionMachO::S_INTERPOSING:
+      return false;
+    }
+  }
+};
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+                                               const std::string &TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+    return new DarwinX86_32AsmBackend(T);
+
+  if (TheTriple.isOSWindows())
+    return new WindowsX86AsmBackend(T, false);
+
+  return new ELFX86_32AsmBackend(T, TheTriple.getOS());
+}
+
+TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+                                               const std::string &TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+    return new DarwinX86_64AsmBackend(T);
+
+  if (TheTriple.isOSWindows())
+    return new WindowsX86AsmBackend(T, true);
+
+  return new ELFX86_64AsmBackend(T, TheTriple.getOS());
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
new file mode 100644
index 000000000000..99b4479a9fc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -0,0 +1,728 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to X86 machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+/// runOnMachineFunction - Emit the function body.
+///
+bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+    bool Intrn = MF.getFunction()->hasInternalLinkage();
+    OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+                                              : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+    OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
+    OutStreamer.EndCOFFSymbolDef();
+  }
+
+  // Have common code print out the function header with linkage info etc.
+  EmitFunctionHeader();
+
+  // Emit the rest of the function body.
+  EmitFunctionBody();
+
+  // We didn't modify anything.
+  return false;
+}
+
+/// printSymbolOperand - Print a raw symbol reference operand.  This handles
+/// jump tables, constant pools, global address and external symbols, all of
+/// which print to a label with various suffixes for relocation types etc.
+void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
+                                       raw_ostream &O) {
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown symbol type!");
+  case MachineOperand::MO_JumpTableIndex:
+    O << *GetJTISymbol(MO.getIndex());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << *GetCPISymbol(MO.getIndex());
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+
+    MCSymbol *GVSym;
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
+      GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
+    else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+             MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+             MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+      GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+    else
+      GVSym = Mang->getSymbol(GV);
+
+    // Handle dllimport linkage.
+    if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
+      GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
+      MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+    } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
+      MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+    } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+      MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+    }
+
+    // If the name begins with a dollar-sign, enclose it in parens.  We do this
+    // to avoid having it look like an integer immediate to the assembler.
+    if (GVSym->getName()[0] != '$')
+      O << *GVSym;
+    else
+      O << '(' << *GVSym << ')';
+    printOffset(MO.getOffset(), O);
+    break;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    const MCSymbol *SymToPrint;
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+      SmallString<128> TempNameStr;
+      TempNameStr += StringRef(MO.getSymbolName());
+      TempNameStr += StringRef("$stub");
+
+      MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+      if (StubSym.getPointer() == 0) {
+        TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
+                      true);
+      }
+      SymToPrint = StubSym.getPointer();
+    } else {
+      SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
+    }
+
+    // If the name begins with a dollar-sign, enclose it in parens.  We do this
+    // to avoid having it look like an integer immediate to the assembler.
+    if (SymToPrint->getName()[0] != '$')
+      O << *SymToPrint;
+    else
+      O << '(' << *SymToPrint << '(';
+    break;
+  }
+  }
+
+  switch (MO.getTargetFlags()) {
+  default:
+    llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+    break;
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DLLIMPORT:
+  case X86II::MO_DARWIN_STUB:
+    // These affect the name of the symbol, not any suffix.
+    break;
+  case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+    O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+    break;
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    O << '-' << *MF->getPICBaseSymbol();
+    break;
+  case X86II::MO_TLSGD:     O << "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     O << "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    O << "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  O << "@GOTPCREL";  break;
+  case X86II::MO_GOT:       O << "@GOT";       break;
+  case X86II::MO_GOTOFF:    O << "@GOTOFF";    break;
+  case X86II::MO_PLT:       O << "@PLT";       break;
+  case X86II::MO_TLVP:      O << "@TLVP";      break;
+  case X86II::MO_TLVP_PIC_BASE:
+    O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
+    break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: llvm_unreachable("Unknown pcrel immediate operand");
+  case MachineOperand::MO_Register:
+    // pc-relativeness was handled when computing the value in the reg.
+    printOperand(MI, OpNo, O);
+    return;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+    printSymbolOperand(MO, O);
+    return;
+  }
+}
+
+
+void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                 raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown operand type!");
+  case MachineOperand::MO_Register: {
+    O << '%';
+    unsigned Reg = MO.getReg();
+    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+      EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+        MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+                    ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+      Reg = getX86SubSuperRegister(Reg, VT);
+    }
+    O << X86ATTInstPrinter::getRegisterName(Reg);
+    return;
+  }
+
+  case MachineOperand::MO_Immediate:
+    O << '$' << MO.getImm();
+    return;
+
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol: {
+    O << '$';
+    printSymbolOperand(MO, O);
+    break;
+  }
+  }
+}
+
+void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
+                               raw_ostream &O) {
+  unsigned char value = MI->getOperand(Op).getImm();
+  assert(value <= 7 && "Invalid ssecc argument!");
+  switch (value) {
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+                                         raw_ostream &O, const char *Modifier) {
+  const MachineOperand &BaseReg  = MI->getOperand(Op);
+  const MachineOperand &IndexReg = MI->getOperand(Op+2);
+  const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+  // If we really don't want to print out (rip), don't.
+  bool HasBaseReg = BaseReg.getReg() != 0;
+  if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+      BaseReg.getReg() == X86::RIP)
+    HasBaseReg = false;
+
+  // HasParenPart - True if we will print out the () part of the mem ref.
+  bool HasParenPart = IndexReg.getReg() || HasBaseReg;
+
+  if (DispSpec.isImm()) {
+    int DispVal = DispSpec.getImm();
+    if (DispVal || !HasParenPart)
+      O << DispVal;
+  } else {
+    assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+           DispSpec.isJTI() || DispSpec.isSymbol());
+    printSymbolOperand(MI->getOperand(Op+3), O);
+  }
+
+  if (Modifier && strcmp(Modifier, "H") == 0)
+    O << "+8";
+
+  if (HasParenPart) {
+    assert(IndexReg.getReg() != X86::ESP &&
+           "X86 doesn't allow scaling by ESP");
+
+    O << '(';
+    if (HasBaseReg)
+      printOperand(MI, Op, O, Modifier);
+
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2, O, Modifier);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
+
+void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+                                      raw_ostream &O, const char *Modifier) {
+  assert(isMem(MI, Op) && "Invalid memory reference!");
+  const MachineOperand &Segment = MI->getOperand(Op+4);
+  if (Segment.getReg()) {
+    printOperand(MI, Op+4, O, Modifier);
+    O << ':';
+  }
+  printLeaMemReference(MI, Op, O, Modifier);
+}
+
+void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
+                                  raw_ostream &O) {
+  O << *MF->getPICBaseSymbol() << '\n';
+  O << *MF->getPICBaseSymbol() << ':';
+}
+
+bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
+                                      raw_ostream &O) {
+  unsigned Reg = MO.getReg();
+  switch (Mode) {
+  default: return true;  // Unknown mode.
+  case 'b': // Print QImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i8);
+    break;
+  case 'h': // Print QImode high register
+    Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+    break;
+  case 'w': // Print HImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i16);
+    break;
+  case 'k': // Print SImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i32);
+    break;
+  case 'q': // Print DImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i64);
+    break;
+  }
+
+  O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+  return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    const MachineOperand &MO = MI->getOperand(OpNo);
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'a': // This is an address.  Currently only 'i' and 'r' are expected.
+      if (MO.isImm()) {
+        O << MO.getImm();
+        return false;
+      }
+      if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
+        printSymbolOperand(MO, O);
+        if (Subtarget->isPICStyleRIPRel())
+          O << "(%rip)";
+        return false;
+      }
+      if (MO.isReg()) {
+        O << '(';
+        printOperand(MI, OpNo, O);
+        O << ')';
+        return false;
+      }
+      return true;
+
+    case 'c': // Don't print "$" before a global var name or constant.
+      if (MO.isImm())
+        O << MO.getImm();
+      else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
+        printSymbolOperand(MO, O);
+      else
+        printOperand(MI, OpNo, O);
+      return false;
+
+    case 'A': // Print '*' before a register (it must be a register)
+      if (MO.isReg()) {
+        O << '*';
+        printOperand(MI, OpNo, O);
+        return false;
+      }
+      return true;
+
+    case 'b': // Print QImode register
+    case 'h': // Print QImode high register
+    case 'w': // Print HImode register
+    case 'k': // Print SImode register
+    case 'q': // Print DImode register
+      if (MO.isReg())
+        return printAsmMRegister(MO, ExtraCode[0], O);
+      printOperand(MI, OpNo, O);
+      return false;
+
+    case 'P': // This is the operand of a call, treat specially.
+      print_pcrel_imm(MI, OpNo, O);
+      return false;
+
+    case 'n':  // Negate the immediate or print a '-' before the operand.
+      // Note: this is a temporary solution. It should be handled target
+      // independently as part of the 'MC' work.
+      if (MO.isImm()) {
+        O << -MO.getImm();
+        return false;
+      }
+      O << '-';
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNo, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'b': // Print QImode register
+    case 'h': // Print QImode high register
+    case 'w': // Print HImode register
+    case 'k': // Print SImode register
+    case 'q': // Print SImode register
+      // These only apply to registers, ignore on mem.
+      break;
+    case 'H':
+      printMemReference(MI, OpNo, O, "H");
+      return false;
+    case 'P': // Don't print @PLT, but do print as memory.
+      printMemReference(MI, OpNo, O, "no-rip");
+      return false;
+    }
+  }
+  printMemReference(MI, OpNo, O);
+  return false;
+}
+
+void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (Subtarget->isTargetEnvMacho())
+    OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+
+void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetEnvMacho()) {
+    // All darwin targets use mach-o.
+    MachineModuleInfoMachO &MMIMacho =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    // Output stubs for dynamically-linked functions.
+    MachineModuleInfoMachO::SymbolListTy Stubs;
+
+    Stubs = MMIMacho.GetFnStubList();
+    if (!Stubs.empty()) {
+      const MCSection *TheSection =
+        OutContext.getMachOSection("__IMPORT", "__jump_table",
+                                   MCSectionMachO::S_SYMBOL_STUBS |
+                                   MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   5, SectionKind::getMetadata());
+      OutStreamer.SwitchSection(TheSection);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$stub:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        //   .indirect_symbol _foo
+        OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
+                                        MCSA_IndirectSymbol);
+        // hlt; hlt; hlt; hlt; hlt     hlt = 0xf4 = -12.
+        const char HltInsts[] = { -12, -12, -12, -12, -12 };
+        OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+      }
+
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    // Output stubs for external and common global variables.
+    Stubs = MMIMacho.GetGVStubList();
+    if (!Stubs.empty()) {
+      const MCSection *TheSection =
+        OutContext.getMachOSection("__IMPORT", "__pointers",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
+      OutStreamer.SwitchSection(TheSection);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$non_lazy_ptr:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        // .indirect_symbol _foo
+        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+        OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
+                                        MCSA_IndirectSymbol);
+        // .long 0
+        if (MCSym.getInt())
+          // External to current translation unit.
+          OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+        else
+          // Internal to current translation unit.
+          //
+          // When we place the LSDA into the TEXT section, the type info
+          // pointers need to be indirect and pc-rel. We accomplish this by
+          // using NLPs.  However, sometimes the types are local to the file. So
+          // we need to fill in the value for the NLP in those cases.
+          OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                        OutContext),
+                                4/*size*/, 0/*addrspace*/);
+      }
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    Stubs = MMIMacho.GetHiddenGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+      EmitAlignment(2);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$non_lazy_ptr:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        // .long _foo
+        OutStreamer.EmitValue(MCSymbolRefExpr::
+                              Create(Stubs[i].second.getPointer(),
+                                     OutContext),
+                              4/*size*/, 0/*addrspace*/);
+      }
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    // Funny Darwin hack: This flag tells the linker that no global symbols
+    // contain code that falls through to other global symbols (e.g. the obvious
+    // implementation of multiple entry points).  If this doesn't occur, the
+    // linker can safely perform dead code stripping.  Since LLVM never
+    // generates code that does this, it is always safe to set.
+    OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+  }
+
+  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+      MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+    StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+    MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+    OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+  }
+
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+    X86COFFMachineModuleInfo &COFFMMI =
+      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+
+    // Emit type information for external functions
+    typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+    for (externals_iterator I = COFFMMI.externals_begin(),
+                            E = COFFMMI.externals_end();
+                            I != E; ++I) {
+      OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+      OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+      OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
+      OutStreamer.EndCOFFSymbolDef();
+    }
+
+    // Necessary for dllexport support
+    std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+    const TargetLoweringObjectFileCOFF &TLOFCOFF =
+      static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedFns.push_back(Mang->getSymbol(I));
+
+    for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+    // Output linker support code for dllexported globals on windows.
+    if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+      OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+      SmallString<128> name;
+      for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+        if (Subtarget->isTargetWindows())
+          name = " /EXPORT:";
+        else
+          name = " -export:";
+        name += DLLExportedGlobals[i]->getName();
+        if (Subtarget->isTargetWindows())
+          name += ",DATA";
+        else
+        name += ",data";
+        OutStreamer.EmitBytes(name, 0);
+      }
+
+      for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+        if (Subtarget->isTargetWindows())
+          name = " /EXPORT:";
+        else
+          name = " -export:";
+        name += DLLExportedFns[i]->getName();
+        OutStreamer.EmitBytes(name, 0);
+      }
+    }
+  }
+
+  if (Subtarget->isTargetELF()) {
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const TargetData *TD = TM.getTargetData();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        OutStreamer.EmitLabel(Stubs[i].first);
+        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+                                    TD->getPointerSize(), 0);
+      }
+      Stubs.clear();
+    }
+  }
+}
+
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  MachineLocation Location;
+  assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+  // Frame address.  Currently handles register +- offset only.
+
+  if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+    Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+  else {
+    DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+  }
+  return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &O) {
+  // Only the target-dependent form of DBG_VALUE should get here.
+  // Referencing the offset and metadata as NOps-2 and NOps-1 is
+  // probably portable to other targets; frame pointer location is not.
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==7);
+  O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  if (V.getContext().isSubprogram())
+    O << DISubprogram(V.getContext()).getDisplayName() << ":";
+  O << V.getName();
+  O << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  O << '[';
+  if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+    printOperand(MI, 0, O);
+  else
+    O << "undef";
+  O << '+'; printOperand(MI, 3, O);
+  O << ']';
+  O << "+";
+  printOperand(MI, NOps-2, O);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new X86ATTInstPrinter(MAI);
+  if (SyntaxVariant == 1)
+    return new X86IntelInstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmPrinter() {
+  RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
+  RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+
+  TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
new file mode 100644
index 000000000000..3a50435d38ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -0,0 +1,87 @@
+//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+class MCContext;
+class MCInst;
+class MCStreamer;
+class MCSymbol;
+
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
+  const X86Subtarget *Subtarget;
+ public:
+  explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<X86Subtarget>();
+  }
+
+  virtual const char *getPassName() const {
+    return "X86 AT&T-Style Assembly Printer";
+  }
+  
+  const X86Subtarget &getSubtarget() const { return *Subtarget; }
+
+  virtual void EmitStartOfAsmFile(Module &M);
+
+  virtual void EmitEndOfAsmFile(Module &M);
+  
+  virtual void EmitInstruction(const MachineInstr *MI);
+  
+  void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
+
+  // These methods are used by the tablegen'erated instruction printer.
+  void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+                    const char *Modifier = 0);
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+  bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &OS);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &OS);
+
+  void printMachineInstruction(const MachineInstr *MI);
+  void printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+  void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+                         const char *Modifier=NULL);
+  void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+                            const char *Modifier=NULL);
+
+  void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+
+  bool runOnMachineFunction(MachineFunction &F);
+  
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
new file mode 100644
index 000000000000..4326814a7a96
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -0,0 +1,20 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86COFFMachineModuleInfo.h"
+using namespace llvm;
+
+
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
new file mode 100644
index 000000000000..98ab2a66a17f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -0,0 +1,46 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_MACHINEMODULEINFO_H
+#define X86COFF_MACHINEMODULEINFO_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "X86MachineFunctionInfo.h"
+
+namespace llvm {
+  class X86MachineFunctionInfo;
+  class TargetData;
+
+/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
+/// for X86 COFF targets.
+class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
+  DenseSet<MCSymbol const *> Externals;
+public:
+  X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
+  virtual ~X86COFFMachineModuleInfo();
+
+  void addExternalFunction(MCSymbol* Symbol) {
+    Externals.insert(Symbol);
+  }
+    
+  typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+  externals_iterator externals_begin() const { return Externals.begin(); }
+  externals_iterator externals_end() const { return Externals.end(); }
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 000000000000..77b99056ae00
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,401 @@
+//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+  // Scalar values are returned in AX first, then DX.  For i8, the ABI
+  // requires the values to be in AL and AH, however this code uses AL and DL
+  // instead. This is because using AH for the second register conflicts with
+  // the way LLVM does multiple return values -- a return of {i16,i8} would end
+  // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
+  // for functions that return two i8 values are currently expected to pack the
+  // values into an i16 (which uses AX, and thus AL:AH).
+  CCIfType<[i8] , CCAssignToReg<[AL, DL]>>,
+  CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
+  CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
+  CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+
+  // Vector types are returned in XMM0 and XMM1, when they fit.  XMM2 and XMM3
+  // can only be used by ABI non-compliant code. If the target doesn't have XMM
+  // registers, it won't have vector types.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+  // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+  // can only be used by ABI non-compliant code. This vector type is only
+  // supported while using the AVX target feature.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+            CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+  // MMX vector types are always returned in MM0. If the target doesn't have
+  // MM0, it doesn't support these vector types.
+  CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
+
+  // Long double types are always returned in ST0 (even with SSE).
+  CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+  // The X86-32 calling convention returns FP values in ST0, unless marked
+  // with "inreg" (used here to distinguish one kind of reg from another,
+  // weirdly; this is really the sse-regparm calling convention) in which
+  // case they use XMM0, otherwise it is the same as the common X86 calling
+  // conv.
+  CCIfInReg<CCIfSubtarget<"hasXMMInt()",
+    CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+  CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+  // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
+  // SSE2.
+  // This can happen when a float, 2 x float, or 3 x float vector is split by
+  // target lowering, and is returned in 1-3 sse regs.
+  CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+  CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+  // For integers, ECX can be used as an extra return register
+  CCIfType<[i8],  CCAssignToReg<[AL, DL, CL]>>,
+  CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+  CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+  // Otherwise, it is the same as the common X86 calling convention.
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+  // The X86-64 calling convention always returns FP values in XMM0.
+  CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
+  CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+
+  // MMX vector types are always returned in XMM0.
+  CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-Win64 C return-value convention.
+def RetCC_X86_Win64_C : CallingConv<[
+  // The X86-Win64 calling convention always returns __m64 values in RAX.
+  CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+  // Otherwise, everything is the same as 'normal' X86-64 C CC.
+  CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+  // If FastCC, use RetCC_X86_32_Fast.
+  CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+  // Otherwise, use RetCC_X86_32_C.
+  CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+  // Mingw64 and native Win64 use Win64 CC
+  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
+
+  // Otherwise, drop to normal X86-64 CC
+  CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+  CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+  CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<8, 8>>,
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in R10.
+  CCIfNest<CCAssignToReg<[R10]>>,
+
+  // The first 6 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+  CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+  // The first 8 MMX vector arguments are passed in XMM registers on Darwin.
+  CCIfType<[x86mmx],
+            CCIfSubtarget<"isTargetDarwin()",
+            CCIfSubtarget<"hasXMMInt()",
+            CCPromoteToType<v2i64>>>>,
+
+  // The first 8 FP/Vector arguments are passed in XMM registers.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCIfSubtarget<"hasXMM()",
+            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+  // The first 8 256-bit vector arguments are passed in YMM registers.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+            CCIfSubtarget<"hasAVX()",
+            CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+  // Long doubles get stack slots whose size and alignment depends on the
+  // subtarget.
+  CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+  // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToStack<32, 32>>
+]>;
+
+// Calling convention used on Win64
+def CC_X86_Win64_C : CallingConv<[
+  // FIXME: Handle byval stuff.
+  // FIXME: Handle varargs.
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in R10.
+  CCIfNest<CCAssignToReg<[R10]>>,
+
+  // 128 bit vectors are passed by pointer
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+  // The first 4 MMX vector arguments are passed in GPRs.
+  CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+
+  // The first 4 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
+                                          [XMM0, XMM1, XMM2, XMM3]>>,
+  
+  // Do not pass the sret argument in RCX, the Win64 thiscall calling
+  // convention requires "this" to be passed in RCX.                                        
+  CCIfCC<"CallingConv::X86_ThisCall", 
+    CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8  , R9  ],
+                                                     [XMM1, XMM2, XMM3]>>>>,
+
+  CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8  , R9  ],
+                                          [XMM0, XMM1, XMM2, XMM3]>>,
+
+  // The first 4 FP/Vector arguments are passed in XMM registers.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+           CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+                                   [RCX , RDX , R8  , R9  ]>>,
+
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+  // Long doubles get stack slots whose size and alignment depends on the
+  // subtarget.
+  CCIfType<[f80], CCAssignToStack<0, 0>>
+]>;
+
+def CC_X86_64_GHC : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+  CCIfType<[i64],
+            CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
+
+  // Pass in STG registers: F1, F2, F3, F4, D1, D2
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCIfSubtarget<"hasXMM()",
+            CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
+  // The first 3 float or double arguments, if marked 'inreg' and if the call
+  // is not a vararg call and if SSE2 is available, are passed in SSE registers.
+  CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
+                CCIfSubtarget<"hasXMMInt()",
+                CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+
+  // The first 3 __m64 vector arguments are passed in mmx registers if the
+  // call is not a vararg call.
+  CCIfNotVarArg<CCIfType<[x86mmx],
+                CCAssignToReg<[MM0, MM1, MM2]>>>,
+
+  // Integer/Float values get stored in stack slots that are 4 bytes in
+  // size and 4-byte aligned.
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  
+  // Doubles get 8-byte slots that are 4-byte aligned.
+  CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+  // Long doubles get slots whose size depends on the subtarget.
+  CCIfType<[f80], CCAssignToStack<0, 4>>,
+
+  // The first 4 SSE vector arguments are passed in XMM registers.
+  CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+  // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                CCIfSubtarget<"hasAVX()",
+                CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
+  // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+  // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToStack<32, 32>>,
+
+  // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
+  // passed in the parameter area.
+  CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
+
+def CC_X86_32_C : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in ECX.
+  CCIfNest<CCAssignToReg<[ECX]>>,
+
+  // The first 3 integer arguments, if marked 'inreg' and if the call is not
+  // a vararg call, are passed in integer registers.
+  CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCall : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in EAX.
+  CCIfNest<CCAssignToReg<[EAX]>>,
+
+  // The first 2 integer arguments are passed in ECX/EDX
+  CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_ThisCall : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in EAX.
+  CCIfNest<CCAssignToReg<[EAX]>>,
+
+  // The first integer argument is passed in ECX
+  CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCC : CallingConv<[
+  // Handles byval parameters.  Note that we can't rely on the delegation
+  // to CC_X86_32_Common for this because that happens after code that
+  // puts arguments in registers.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in EAX.
+  CCIfNest<CCAssignToReg<[EAX]>>,
+
+  // The first 2 integer arguments are passed in ECX/EDX
+  CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+  // The first 3 float or double arguments, if the call is not a vararg
+  // call and if SSE2 is available, are passed in SSE registers.
+  CCIfNotVarArg<CCIfType<[f32,f64],
+                CCIfSubtarget<"hasXMMInt()",
+                CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
+  // Doubles get 8-byte slots that are 8-byte aligned.
+  CCIfType<[f64], CCAssignToStack<8, 8>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_GHC : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1
+  CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+  CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+  CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+  CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+  CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+
+  // Otherwise, drop to normal X86-32 CC
+  CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+  CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+
+  // Mingw64 and native Win64 use Win64 CC
+  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+  // Otherwise, drop to normal X86-64 CC
+  CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+  CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+  CCDelegateTo<CC_X86_32>
+]>;
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 000000000000..4b11db7c0331
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,1005 @@
+//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "X86Relocations.h"
+#include "X86.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+  template<class CodeEmitter>
+  class Emitter : public MachineFunctionPass {
+    const X86InstrInfo  *II;
+    const TargetData    *TD;
+    X86TargetMachine    &TM;
+    CodeEmitter         &MCE;
+    MachineModuleInfo   *MMI;
+    intptr_t PICBaseOffset;
+    bool Is64BitMode;
+    bool IsPIC;
+  public:
+    static char ID;
+    explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
+      : MachineFunctionPass(ID), II(0), TD(0), TM(tm), 
+      MCE(mce), PICBaseOffset(0), Is64BitMode(false),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+    Emitter(X86TargetMachine &tm, CodeEmitter &mce,
+            const X86InstrInfo &ii, const TargetData &td, bool is64)
+      : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), 
+      MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "X86 Machine Code Emitter";
+    }
+
+    void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+                           intptr_t Disp = 0, intptr_t PCAdj = 0,
+                           bool Indirect = false);
+    void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+    void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
+                              intptr_t PCAdj = 0);
+    void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+                              intptr_t PCAdj = 0);
+
+    void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+                               intptr_t Adj = 0, bool IsPCRel = true);
+
+    void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+    void emitRegModRMByte(unsigned RegOpcodeField);
+    void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+    void emitConstant(uint64_t Val, unsigned Size);
+
+    void emitMemModRMByte(const MachineInstr &MI,
+                          unsigned Op, unsigned RegOpcodeField,
+                          intptr_t PCAdj = 0);
+
+    unsigned getX86RegNum(unsigned RegNo) const;
+  };
+
+template<class CodeEmitter>
+  char Emitter<CodeEmitter>::ID = 0;
+} // end anonymous namespace.
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified templated MachineCodeEmitter object.
+FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
+  return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
+  
+  II = TM.getInstrInfo();
+  TD = TM.getTargetData();
+  Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
+  IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+  
+  do {
+    DEBUG(dbgs() << "JITTing function '" 
+          << MF.getFunction()->getName() << "'\n");
+    MCE.startFunction(MF);
+    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
+         MBB != E; ++MBB) {
+      MCE.StartMachineBasicBlock(MBB);
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I) {
+        const MCInstrDesc &Desc = I->getDesc();
+        emitInstruction(*I, &Desc);
+        // MOVPC32r is basically a call plus a pop instruction.
+        if (Desc.getOpcode() == X86::MOVPC32r)
+          emitInstruction(*I, &II->get(X86::POP32r));
+        ++NumEmitted;  // Keep track of the # of mi's emitted
+      }
+    }
+  } while (MCE.finishFunction(MF));
+
+  return false;
+}
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+  unsigned REX = 0;
+  const MCInstrDesc &Desc = MI.getDesc();
+  
+  // Pseudo instructions do not need REX prefix byte.
+  if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+    return 0;
+  if (Desc.TSFlags & X86II::REX_W)
+    REX |= 1 << 3;
+  
+  unsigned NumOps = Desc.getNumOperands();
+  if (NumOps) {
+    bool isTwoAddr = NumOps > 1 &&
+    Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+    
+    // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+    unsigned i = isTwoAddr ? 1 : 0;
+    for (unsigned e = NumOps; i != e; ++i) {
+      const MachineOperand& MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        unsigned Reg = MO.getReg();
+        if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+          REX |= 0x40;
+      }
+    }
+    
+    switch (Desc.TSFlags & X86II::FormMask) {
+      case X86II::MRMInitReg:
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= (1 << 0) | (1 << 2);
+        break;
+      case X86II::MRMSrcReg: {
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= 1 << 2;
+        i = isTwoAddr ? 2 : 1;
+        for (unsigned e = NumOps; i != e; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (X86InstrInfo::isX86_64ExtendedReg(MO))
+            REX |= 1 << 0;
+        }
+        break;
+      }
+      case X86II::MRMSrcMem: {
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= 1 << 2;
+        unsigned Bit = 0;
+        i = isTwoAddr ? 2 : 1;
+        for (; i != NumOps; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (MO.isReg()) {
+            if (X86InstrInfo::isX86_64ExtendedReg(MO))
+              REX |= 1 << Bit;
+            Bit++;
+          }
+        }
+        break;
+      }
+      case X86II::MRM0m: case X86II::MRM1m:
+      case X86II::MRM2m: case X86II::MRM3m:
+      case X86II::MRM4m: case X86II::MRM5m:
+      case X86II::MRM6m: case X86II::MRM7m:
+      case X86II::MRMDestMem: {
+        unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+        i = isTwoAddr ? 1 : 0;
+        if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+          REX |= 1 << 2;
+        unsigned Bit = 0;
+        for (; i != e; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (MO.isReg()) {
+            if (X86InstrInfo::isX86_64ExtendedReg(MO))
+              REX |= 1 << Bit;
+            Bit++;
+          }
+        }
+        break;
+      }
+      default: {
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= 1 << 0;
+        i = isTwoAddr ? 2 : 1;
+        for (unsigned e = NumOps; i != e; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (X86InstrInfo::isX86_64ExtendedReg(MO))
+            REX |= 1 << 2;
+        }
+        break;
+      }
+    }
+  }
+  return REX;
+}
+
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+  // Remember where this reference was and where it is to so we can
+  // deal with it later.
+  MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                             X86::reloc_pcrel_word, MBB));
+  MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
+                                unsigned Reloc,
+                                intptr_t Disp /* = 0 */,
+                                intptr_t PCAdj /* = 0 */,
+                                bool Indirect /* = false */) {
+  intptr_t RelocCST = Disp;
+  if (Reloc == X86::reloc_picrel_word)
+    RelocCST = PICBaseOffset;
+  else if (Reloc == X86::reloc_pcrel_word)
+    RelocCST = PCAdj;
+  MachineRelocation MR = Indirect
+    ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+                                           const_cast<GlobalValue *>(GV),
+                                           RelocCST, false)
+    : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+                               const_cast<GlobalValue *>(GV), RelocCST, false);
+  MCE.addRelocation(MR);
+  // The relocated value will be added to the displacement
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(Disp);
+  else
+    MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+                                                     unsigned Reloc) {
+  intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+
+  // X86 never needs stubs because instruction selection will always pick
+  // an instruction sequence that is large enough to hold any address
+  // to a symbol.
+  // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
+  bool NeedStub = false;
+  MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                                 Reloc, ES, RelocCST,
+                                                 0, NeedStub));
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(0);
+  else
+    MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+                                   intptr_t Disp /* = 0 */,
+                                   intptr_t PCAdj /* = 0 */) {
+  intptr_t RelocCST = 0;
+  if (Reloc == X86::reloc_picrel_word)
+    RelocCST = PICBaseOffset;
+  else if (Reloc == X86::reloc_pcrel_word)
+    RelocCST = PCAdj;
+  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                                    Reloc, CPI, RelocCST));
+  // The relocated value will be added to the displacement
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(Disp);
+  else
+    MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+                                   intptr_t PCAdj /* = 0 */) {
+  intptr_t RelocCST = 0;
+  if (Reloc == X86::reloc_picrel_word)
+    RelocCST = PICBaseOffset;
+  else if (Reloc == X86::reloc_pcrel_word)
+    RelocCST = PCAdj;
+  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                                    Reloc, JTI, RelocCST));
+  // The relocated value will be added to the displacement
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(0);
+  else
+    MCE.emitWordLE(0);
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
+  return X86RegisterInfo::getX86RegNum(RegNo);
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+                                      unsigned RM) {
+  assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+  return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
+                                            unsigned RegOpcodeFld){
+  MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
+  MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS, 
+                                       unsigned Index,
+                                       unsigned Base) {
+  // SIB byte is in the same format as the ModRMByte...
+  MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
+  // Output the constant in little endian byte order...
+  for (unsigned i = 0; i != Size; ++i) {
+    MCE.emitByte(Val & 255);
+    Val >>= 8;
+  }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit 
+/// sign-extended field. 
+static bool isDisp8(int Value) {
+  return Value == (signed char)Value;
+}
+
+static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
+                              const TargetMachine &TM) {
+  // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
+  // mechanism as 32-bit mode.
+  if (TM.getSubtarget<X86Subtarget>().is64Bit() && 
+      !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
+    return false;
+  
+  // Return true if this is a reference to a stub containing the address of the
+  // global, not the global itself.
+  return isGlobalStubReference(GVOp.getTargetFlags());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
+                                                 int DispVal,
+                                                 intptr_t Adj /* = 0 */,
+                                                 bool IsPCRel /* = true */) {
+  // If this is a simple integer displacement that doesn't require a relocation,
+  // emit it now.
+  if (!RelocOp) {
+    emitConstant(DispVal, 4);
+    return;
+  }
+
+  // Otherwise, this is something that requires a relocation.  Emit it as such
+  // now.
+  unsigned RelocType = Is64BitMode ?
+    (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
+    : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+  if (RelocOp->isGlobal()) {
+    // In 64-bit static small code model, we could potentially emit absolute.
+    // But it's probably not beneficial. If the MCE supports using RIP directly
+    // do it, otherwise fallback to absolute (this is determined by IsPCRel). 
+    //  89 05 00 00 00 00     mov    %eax,0(%rip)  # PC-relative
+    //  89 04 25 00 00 00 00  mov    %eax,0x0      # Absolute
+    bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
+    emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
+                      Adj, Indirect);
+  } else if (RelocOp->isSymbol()) {
+    emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
+  } else if (RelocOp->isCPI()) {
+    emitConstPoolAddress(RelocOp->getIndex(), RelocType,
+                         RelocOp->getOffset(), Adj);
+  } else {
+    assert(RelocOp->isJTI() && "Unexpected machine operand!");
+    emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
+  }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
+                                            unsigned Op,unsigned RegOpcodeField,
+                                            intptr_t PCAdj) {
+  const MachineOperand &Op3 = MI.getOperand(Op+3);
+  int DispVal = 0;
+  const MachineOperand *DispForReloc = 0;
+  
+  // Figure out what sort of displacement we have to handle here.
+  if (Op3.isGlobal()) {
+    DispForReloc = &Op3;
+  } else if (Op3.isSymbol()) {
+    DispForReloc = &Op3;
+  } else if (Op3.isCPI()) {
+    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+      DispForReloc = &Op3;
+    } else {
+      DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
+      DispVal += Op3.getOffset();
+    }
+  } else if (Op3.isJTI()) {
+    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+      DispForReloc = &Op3;
+    } else {
+      DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
+    }
+  } else {
+    DispVal = Op3.getImm();
+  }
+
+  const MachineOperand &Base     = MI.getOperand(Op);
+  const MachineOperand &Scale    = MI.getOperand(Op+1);
+  const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+  unsigned BaseReg = Base.getReg();
+  
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP ||
+      (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
+    assert(IndexReg.getReg() == 0 && Is64BitMode &&
+           "Invalid rip-relative address");
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+    return;
+  }
+
+  // Indicate that the displacement will use an pcrel or absolute reference
+  // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
+  // while others, unless explicit asked to use RIP, use absolute references.
+  bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
+
+  // Is a SIB byte needed?
+  // If no BaseReg, issue a RIP relative instruction only if the MCE can 
+  // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+  // 2-7) and absolute references.
+  unsigned BaseRegNo = -1U;
+  if (BaseReg != 0 && BaseReg != X86::RIP)
+    BaseRegNo = getX86RegNum(BaseReg);
+
+  if (// The SIB byte must be used if there is an index register.
+      IndexReg.getReg() == 0 && 
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
+      // If there is no base register and we're in 64-bit mode, we need a SIB
+      // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+      (!Is64BitMode || BaseReg != 0)) {
+    if (BaseReg == 0 ||          // [disp32]     in X86-32 mode
+        BaseReg == X86::RIP) {   // [disp32+RIP] in X86-64 mode
+      MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+      emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+      return;
+    }
+    
+    // If the base is not EBP/ESP and there is no displacement, use simple
+    // indirect register encoding, this handles addresses like [EAX].  The
+    // encoding for [EBP] with no displacement means [disp32] so we handle it
+    // by emitting a displacement of 0 below.
+    if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+      MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+      return;
+    }
+    
+    // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+    if (!DispForReloc && isDisp8(DispVal)) {
+      MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+      emitConstant(DispVal, 1);
+      return;
+    }
+    
+    // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+    return;
+  }
+  
+  // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
+  assert(IndexReg.getReg() != X86::ESP &&
+         IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+  bool ForceDisp32 = false;
+  bool ForceDisp8  = false;
+  if (BaseReg == 0) {
+    // If there is no base register, we emit the special case SIB byte with
+    // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+    ForceDisp32 = true;
+  } else if (DispForReloc) {
+    // Emit the normal disp32 encoding.
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+    ForceDisp32 = true;
+  } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
+    // Emit no displacement ModR/M byte
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+  } else if (isDisp8(DispVal)) {
+    // Emit the disp8 encoding...
+    MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+    ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
+  } else {
+    // Emit the normal disp32 encoding...
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+  }
+
+  // Calculate what the SS field value should be...
+  static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  unsigned SS = SSTable[Scale.getImm()];
+
+  if (BaseReg == 0) {
+    // Handle the SIB byte for the case where there is no base, see Intel 
+    // Manual 2A, table 2-7. The displacement has already been output.
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = getX86RegNum(IndexReg.getReg());
+    else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+      IndexRegNo = 4;
+    emitSIBByte(SS, IndexRegNo, 5);
+  } else {
+    unsigned BaseRegNo = getX86RegNum(BaseReg);
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = getX86RegNum(IndexReg.getReg());
+    else
+      IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
+    emitSIBByte(SS, IndexRegNo, BaseRegNo);
+  }
+
+  // Do we need to output a displacement?
+  if (ForceDisp8) {
+    emitConstant(DispVal, 1);
+  } else if (DispVal != 0 || ForceDisp32) {
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+  }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
+                                           const MCInstrDesc *Desc) {
+  DEBUG(dbgs() << MI);
+  
+  // If this is a pseudo instruction, lower it.
+  switch (Desc->getOpcode()) {
+  case X86::ADD16rr_DB:   Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+  case X86::ADD32rr_DB:   Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+  case X86::ADD64rr_DB:   Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+  case X86::ADD16ri_DB:   Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+  case X86::ADD32ri_DB:   Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+  case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+  case X86::ADD16ri8_DB:  Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+  case X86::ADD32ri8_DB:  Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+  case X86::ADD64ri8_DB:  Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+  }
+  
+
+  MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+  unsigned Opcode = Desc->Opcode;
+
+  // Emit the lock opcode prefix as needed.
+  if (Desc->TSFlags & X86II::LOCK)
+    MCE.emitByte(0xF0);
+
+  // Emit segment override opcode prefix as needed.
+  switch (Desc->TSFlags & X86II::SegOvrMask) {
+  case X86II::FS:
+    MCE.emitByte(0x64);
+    break;
+  case X86II::GS:
+    MCE.emitByte(0x65);
+    break;
+  default: llvm_unreachable("Invalid segment!");
+  case 0: break;  // No segment override!
+  }
+
+  // Emit the repeat opcode prefix as needed.
+  if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
+    MCE.emitByte(0xF3);
+
+  // Emit the operand size opcode prefix as needed.
+  if (Desc->TSFlags & X86II::OpSize)
+    MCE.emitByte(0x66);
+
+  // Emit the address size opcode prefix as needed.
+  if (Desc->TSFlags & X86II::AdSize)
+    MCE.emitByte(0x67);
+
+  bool Need0FPrefix = false;
+  switch (Desc->TSFlags & X86II::Op0Mask) {
+  case X86II::TB:  // Two-byte opcode prefix
+  case X86II::T8:  // 0F 38
+  case X86II::TA:  // 0F 3A
+  case X86II::A6:  // 0F A6
+  case X86II::A7:  // 0F A7
+    Need0FPrefix = true;
+    break;
+  case X86II::TF: // F2 0F 38
+    MCE.emitByte(0xF2);
+    Need0FPrefix = true;
+    break;
+  case X86II::REP: break; // already handled.
+  case X86II::XS:   // F3 0F
+    MCE.emitByte(0xF3);
+    Need0FPrefix = true;
+    break;
+  case X86II::XD:   // F2 0F
+    MCE.emitByte(0xF2);
+    Need0FPrefix = true;
+    break;
+  case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+  case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+    MCE.emitByte(0xD8+
+                 (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+                                   >> X86II::Op0Shift));
+    break; // Two-byte opcode prefix
+  default: llvm_unreachable("Invalid prefix!");
+  case 0: break;  // No prefix!
+  }
+
+  // Handle REX prefix.
+  if (Is64BitMode) {
+    if (unsigned REX = determineREX(MI))
+      MCE.emitByte(0x40 | REX);
+  }
+
+  // 0x0F escape code must be emitted just before the opcode.
+  if (Need0FPrefix)
+    MCE.emitByte(0x0F);
+
+  switch (Desc->TSFlags & X86II::Op0Mask) {
+  case X86II::TF:    // F2 0F 38
+  case X86II::T8:    // 0F 38
+    MCE.emitByte(0x38);
+    break;
+  case X86II::TA:    // 0F 3A
+    MCE.emitByte(0x3A);
+    break;
+  case X86II::A6:    // 0F A6
+    MCE.emitByte(0xA6);
+    break;
+  case X86II::A7:    // 0F A7
+    MCE.emitByte(0xA7);
+    break;
+  }
+
+  // If this is a two-address instruction, skip one of the register operands.
+  unsigned NumOps = Desc->getNumOperands();
+  unsigned CurOp = 0;
+  if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
+    ++CurOp;
+  else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0)
+    // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+    --NumOps;
+
+  unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
+  switch (Desc->TSFlags & X86II::FormMask) {
+  default:
+    llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
+  case X86II::Pseudo:
+    // Remember the current PC offset, this is the PIC relocation
+    // base address.
+    switch (Opcode) {
+    default: 
+      llvm_unreachable("pseudo instructions should be removed before code"
+                       " emission");
+      break;
+    // Do nothing for Int_MemBarrier - it's just a comment.  Add a debug
+    // to make it slightly easier to see.
+    case X86::Int_MemBarrier:
+      DEBUG(dbgs() << "#MEMBARRIER\n");
+      break;
+    
+    case TargetOpcode::INLINEASM:
+      // We allow inline assembler nodes with empty bodies - they can
+      // implicitly define registers, which is ok for JIT.
+      if (MI.getOperand(0).getSymbolName()[0])
+        report_fatal_error("JIT does not support inline asm!");
+      break;
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::GC_LABEL:
+    case TargetOpcode::EH_LABEL:
+      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+      break;
+    
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+      break;
+    case X86::MOVPC32r: {
+      // This emits the "call" portion of this pseudo instruction.
+      MCE.emitByte(BaseOpcode);
+      emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
+      // Remember PIC base.
+      PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
+      X86JITInfo *JTI = TM.getJITInfo();
+      JTI->setPICBase(MCE.getCurrentPCValue());
+      break;
+    }
+    }
+    CurOp = NumOps;
+    break;
+  case X86II::RawFrm: {
+    MCE.emitByte(BaseOpcode);
+
+    if (CurOp == NumOps)
+      break;
+      
+    const MachineOperand &MO = MI.getOperand(CurOp++);
+
+    DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
+    DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
+    DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
+    DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
+    DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
+
+    if (MO.isMBB()) {
+      emitPCRelativeBlockAddress(MO.getMBB());
+      break;
+    }
+    
+    if (MO.isGlobal()) {
+      emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+                        MO.getOffset(), 0);
+      break;
+    }
+    
+    if (MO.isSymbol()) {
+      emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+      break;
+    }
+
+    // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
+    if (MO.isJTI()) {
+      emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
+      break;
+    }
+    
+    assert(MO.isImm() && "Unknown RawFrm operand!");
+    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+        Opcode == X86::WINCALL64pcrel32) {
+      // Fix up immediate operand for pc relative calls.
+      intptr_t Imm = (intptr_t)MO.getImm();
+      Imm = Imm - MCE.getCurrentPCValue() - 4;
+      emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
+    } else
+      emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+      
+  case X86II::AddRegFrm: {
+    MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+    
+    if (CurOp == NumOps)
+      break;
+      
+    const MachineOperand &MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+    if (MO1.isImm()) {
+      emitConstant(MO1.getImm(), Size);
+      break;
+    }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64ri64i32)
+      rt = X86::reloc_absolute_word;  // FIXME: add X86II flag?
+    // This should not occur on Darwin for relocatable objects.
+    if (Opcode == X86::MOV64ri)
+      rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
+    if (MO1.isGlobal()) {
+      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                        Indirect);
+    } else if (MO1.isSymbol())
+      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+    else if (MO1.isCPI())
+      emitConstPoolAddress(MO1.getIndex(), rt);
+    else if (MO1.isJTI())
+      emitJumpTableAddress(MO1.getIndex(), rt);
+    break;
+  }
+
+  case X86II::MRMDestReg: {
+    MCE.emitByte(BaseOpcode);
+    emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+                     getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+    CurOp += 2;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+  case X86II::MRMDestMem: {
+    MCE.emitByte(BaseOpcode);
+    emitMemModRMByte(MI, CurOp,
+                     getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
+                                  .getReg()));
+    CurOp +=  X86::AddrNumOperands + 1;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+
+  case X86II::MRMSrcReg:
+    MCE.emitByte(BaseOpcode);
+    emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+                     getX86RegNum(MI.getOperand(CurOp).getReg()));
+    CurOp += 2;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+
+  case X86II::MRMSrcMem: {
+    int AddrOperands = X86::AddrNumOperands;
+
+    intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
+      X86II::getSizeOfImm(Desc->TSFlags) : 0;
+
+    MCE.emitByte(BaseOpcode);
+    emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
+                     PCAdj);
+    CurOp += AddrOperands + 1;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+
+  case X86II::MRM0r: case X86II::MRM1r:
+  case X86II::MRM2r: case X86II::MRM3r:
+  case X86II::MRM4r: case X86II::MRM5r:
+  case X86II::MRM6r: case X86II::MRM7r: {
+    MCE.emitByte(BaseOpcode);
+    emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+                     (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+    if (CurOp == NumOps)
+      break;
+    
+    const MachineOperand &MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+    if (MO1.isImm()) {
+      emitConstant(MO1.getImm(), Size);
+      break;
+    }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64ri32)
+      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
+    if (MO1.isGlobal()) {
+      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                        Indirect);
+    } else if (MO1.isSymbol())
+      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+    else if (MO1.isCPI())
+      emitConstPoolAddress(MO1.getIndex(), rt);
+    else if (MO1.isJTI())
+      emitJumpTableAddress(MO1.getIndex(), rt);
+    break;
+  }
+
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m: {
+    intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
+      (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? 
+          X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
+
+    MCE.emitByte(BaseOpcode);
+    emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+                     PCAdj);
+    CurOp += X86::AddrNumOperands;
+
+    if (CurOp == NumOps)
+      break;
+    
+    const MachineOperand &MO = MI.getOperand(CurOp++);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+    if (MO.isImm()) {
+      emitConstant(MO.getImm(), Size);
+      break;
+    }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64mi32)
+      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
+    if (MO.isGlobal()) {
+      bool Indirect = gvNeedsNonLazyPtr(MO, TM);
+      emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+                        Indirect);
+    } else if (MO.isSymbol())
+      emitExternalSymbolAddress(MO.getSymbolName(), rt);
+    else if (MO.isCPI())
+      emitConstPoolAddress(MO.getIndex(), rt);
+    else if (MO.isJTI())
+      emitJumpTableAddress(MO.getIndex(), rt);
+    break;
+  }
+
+  case X86II::MRMInitReg:
+    MCE.emitByte(BaseOpcode);
+    // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+    emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+                     getX86RegNum(MI.getOperand(CurOp).getReg()));
+    ++CurOp;
+    break;
+      
+  case X86II::MRM_C1:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC1);
+    break;
+  case X86II::MRM_C8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC8);
+    break;
+  case X86II::MRM_C9:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC9);
+    break;
+  case X86II::MRM_E8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xE8);
+    break;
+  case X86II::MRM_F0:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xF0);
+    break;
+  }
+
+  if (!Desc->isVariadic() && CurOp != NumOps) {
+#ifndef NDEBUG
+    dbgs() << "Cannot encode all operands of: " << MI << "\n";
+#endif
+    llvm_unreachable(0);
+  }
+
+  MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
new file mode 100644
index 000000000000..f1d7edea7210
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -0,0 +1,153 @@
+//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ELFWriterInfo.h"
+#include "X86Relocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the X86ELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+  : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
+    EMachine = is64Bit ? EM_X86_64 : EM_386;
+  }
+
+X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  if (is64Bit) {
+    switch(MachineRelTy) {
+    case X86::reloc_pcrel_word:
+      return ELF::R_X86_64_PC32;
+    case X86::reloc_absolute_word:
+      return ELF::R_X86_64_32;
+    case X86::reloc_absolute_word_sext:
+      return ELF::R_X86_64_32S;
+    case X86::reloc_absolute_dword:
+      return ELF::R_X86_64_64;
+    case X86::reloc_picrel_word:
+    default:
+      llvm_unreachable("unknown x86_64 machine relocation type");
+    }
+  } else {
+    switch(MachineRelTy) {
+    case X86::reloc_pcrel_word:
+      return ELF::R_386_PC32;
+    case X86::reloc_absolute_word:
+      return ELF::R_386_32;
+    case X86::reloc_absolute_word_sext:
+    case X86::reloc_absolute_dword:
+    case X86::reloc_picrel_word:
+    default:
+      llvm_unreachable("unknown x86 machine relocation type");
+    }
+  }
+  return 0;
+}
+
+long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  if (is64Bit) {
+    switch(RelTy) {
+    case ELF::R_X86_64_PC32: return Modifier - 4;
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+    case ELF::R_X86_64_64:
+      return Modifier;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+    case ELF::R_386_PC32: return Modifier - 4;
+    case ELF::R_386_32: return Modifier;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  if (is64Bit) {
+    switch(RelTy) {
+    case ELF::R_X86_64_PC32:
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+        return 32;
+    case ELF::R_X86_64_64:
+        return 64;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+    case ELF::R_386_PC32:
+    case ELF::R_386_32:
+        return 32;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  if (is64Bit) {
+    switch(RelTy) {
+    case ELF::R_X86_64_PC32:
+        return true;
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+    case ELF::R_X86_64_64:
+        return false;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+    case ELF::R_386_PC32:
+        return true;
+    case ELF::R_386_32:
+        return false;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  return is64Bit ?
+    X86::reloc_absolute_dword : X86::reloc_absolute_word;
+}
+
+long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                             unsigned RelOffset,
+                                             unsigned RelTy) const {
+
+  if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
+    return SymOffset - (RelOffset + 4);
+  else
+    assert("computeRelocation unknown for this relocation type");
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
new file mode 100644
index 000000000000..a45b5bb66a07
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
@@ -0,0 +1,59 @@
+//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ELF_WRITER_INFO_H
+#define X86_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class X86ELFWriterInfo : public TargetELFWriterInfo {
+
+  public:
+    X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+    virtual ~X86ELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // X86_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
new file mode 100644
index 000000000000..21e163a30054
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -0,0 +1,2133 @@
+//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel : public FastISel {
+  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const X86Subtarget *Subtarget;
+
+  /// StackPtr - Register used as the stack pointer.
+  ///
+  unsigned StackPtr;
+
+  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+  /// floating point ops.
+  /// When SSE is available, use it for f32 operations.
+  /// When SSE2 is available, use it for f64 operations.
+  bool X86ScalarSSEf64;
+  bool X86ScalarSSEf32;
+
+public:
+  explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+    Subtarget = &TM.getSubtarget<X86Subtarget>();
+    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+    X86ScalarSSEf64 = Subtarget->hasSSE2();
+    X86ScalarSSEf32 = Subtarget->hasSSE1();
+  }
+
+  virtual bool TargetSelectInstruction(const Instruction *I);
+
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                             const LoadInst *LI);
+
+#include "X86GenFastISel.inc"
+
+private:
+  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
+
+  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
+
+  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+  bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+
+  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+                         unsigned &ResultReg);
+
+  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
+
+  bool X86SelectLoad(const Instruction *I);
+
+  bool X86SelectStore(const Instruction *I);
+
+  bool X86SelectRet(const Instruction *I);
+
+  bool X86SelectCmp(const Instruction *I);
+
+  bool X86SelectZExt(const Instruction *I);
+
+  bool X86SelectBranch(const Instruction *I);
+
+  bool X86SelectShift(const Instruction *I);
+
+  bool X86SelectSelect(const Instruction *I);
+
+  bool X86SelectTrunc(const Instruction *I);
+
+  bool X86SelectFPExt(const Instruction *I);
+  bool X86SelectFPTrunc(const Instruction *I);
+
+  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
+  bool X86SelectCall(const Instruction *I);
+
+  bool DoSelectCall(const Instruction *I, const char *MemIntName);
+
+  const X86InstrInfo *getInstrInfo() const {
+    return getTargetMachine()->getInstrInfo();
+  }
+  const X86TargetMachine *getTargetMachine() const {
+    return static_cast<const X86TargetMachine *>(&TM);
+  }
+
+  unsigned TargetMaterializeConstant(const Constant *C);
+
+  unsigned TargetMaterializeAlloca(const AllocaInst *C);
+
+  unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
+  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+  /// computed in an SSE register, not on the X87 floating point stack.
+  bool isScalarFPTypeInSSEReg(EVT VT) const {
+    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
+  }
+
+  bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+
+  bool IsMemcpySmall(uint64_t Len);
+
+  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+                          X86AddressMode SrcAM, uint64_t Len);
+};
+
+} // end anonymous namespace.
+
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+  if (evt == MVT::Other || !evt.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  VT = evt.getSimpleVT();
+  // For now, require SSE/SSE2 for performing floating-point operations,
+  // since x87 requires additional work.
+  if (VT == MVT::f64 && !X86ScalarSSEf64)
+     return false;
+  if (VT == MVT::f32 && !X86ScalarSSEf32)
+     return false;
+  // Similarly, no f80 support yet.
+  if (VT == MVT::f80)
+    return false;
+  // We only handle legal types. For example, on x86-32 the instruction
+  // selector contains all of the 64-bit instructions from x86-64,
+  // under the assumption that i64 won't be used if the target doesn't
+  // support it.
+  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
+                                  unsigned &ResultReg) {
+  // Get opcode and regclass of the output for the given load instruction.
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+    Opc = X86::MOV8rm;
+    RC  = X86::GR8RegisterClass;
+    break;
+  case MVT::i16:
+    Opc = X86::MOV16rm;
+    RC  = X86::GR16RegisterClass;
+    break;
+  case MVT::i32:
+    Opc = X86::MOV32rm;
+    RC  = X86::GR32RegisterClass;
+    break;
+  case MVT::i64:
+    // Must be in x86-64 mode.
+    Opc = X86::MOV64rm;
+    RC  = X86::GR64RegisterClass;
+    break;
+  case MVT::f32:
+    if (Subtarget->hasSSE1()) {
+      Opc = X86::MOVSSrm;
+      RC  = X86::FR32RegisterClass;
+    } else {
+      Opc = X86::LD_Fp32m;
+      RC  = X86::RFP32RegisterClass;
+    }
+    break;
+  case MVT::f64:
+    if (Subtarget->hasSSE2()) {
+      Opc = X86::MOVSDrm;
+      RC  = X86::FR64RegisterClass;
+    } else {
+      Opc = X86::LD_Fp64m;
+      RC  = X86::RFP64RegisterClass;
+    }
+    break;
+  case MVT::f80:
+    // No f80 support yet.
+    return false;
+  }
+
+  ResultReg = createResultReg(RC);
+  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                         DL, TII.get(Opc), ResultReg), AM);
+  return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+  // Get opcode and regclass of the output for the given store instruction.
+  unsigned Opc = 0;
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f80: // No f80 support yet.
+  default: return false;
+  case MVT::i1: {
+    // Mask out all but lowest bit.
+    unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
+    Val = AndResult;
+  }
+  // FALLTHROUGH, handling i1 as i8.
+  case MVT::i8:  Opc = X86::MOV8mr;  break;
+  case MVT::i16: Opc = X86::MOV16mr; break;
+  case MVT::i32: Opc = X86::MOV32mr; break;
+  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+  case MVT::f32:
+    Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+    break;
+  case MVT::f64:
+    Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+    break;
+  }
+
+  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                         DL, TII.get(Opc)), AM).addReg(Val);
+  return true;
+}
+
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
+                                   const X86AddressMode &AM) {
+  // Handle 'null' like i32/i64 0.
+  if (isa<ConstantPointerNull>(Val))
+    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
+
+  // If this is a store of a simple constant, fold the constant into the store.
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    unsigned Opc = 0;
+    bool Signed = true;
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: break;
+    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
+    case MVT::i8:  Opc = X86::MOV8mi;  break;
+    case MVT::i16: Opc = X86::MOV16mi; break;
+    case MVT::i32: Opc = X86::MOV32mi; break;
+    case MVT::i64:
+      // Must be a 32-bit sign extended value.
+      if ((int)CI->getSExtValue() == CI->getSExtValue())
+        Opc = X86::MOV64mi32;
+      break;
+    }
+
+    if (Opc) {
+      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                             DL, TII.get(Opc)), AM)
+                             .addImm(Signed ? (uint64_t) CI->getSExtValue() :
+                                              CI->getZExtValue());
+      return true;
+    }
+  }
+
+  unsigned ValReg = getRegForValue(Val);
+  if (ValReg == 0)
+    return false;
+
+  return X86FastEmitStore(VT, ValReg, AM);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+                                    unsigned Src, EVT SrcVT,
+                                    unsigned &ResultReg) {
+  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+                           Src, /*TODO: Kill=*/false);
+
+  if (RR != 0) {
+    ResultReg = RR;
+    return true;
+  } else
+    return false;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Don't walk into other basic blocks; it's possible we haven't
+    // visited them yet, so the instructions may not yet be assigned
+    // virtual registers.
+    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+      Opcode = I->getOpcode();
+      U = I;
+    }
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+
+  if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+    if (Ty->getAddressSpace() > 255)
+      // Fast instruction selection doesn't support the special
+      // address spaces.
+      return false;
+
+  switch (Opcode) {
+  default: break;
+  case Instruction::BitCast:
+    // Look past bitcasts.
+    return X86SelectAddress(U->getOperand(0), AM);
+
+  case Instruction::IntToPtr:
+    // Look past no-op inttoptrs.
+    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      return X86SelectAddress(U->getOperand(0), AM);
+    break;
+
+  case Instruction::PtrToInt:
+    // Look past no-op ptrtoints.
+    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      return X86SelectAddress(U->getOperand(0), AM);
+    break;
+
+  case Instruction::Alloca: {
+    // Do static allocas.
+    const AllocaInst *A = cast<AllocaInst>(V);
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(A);
+    if (SI != FuncInfo.StaticAllocaMap.end()) {
+      AM.BaseType = X86AddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = SI->second;
+      return true;
+    }
+    break;
+  }
+
+  case Instruction::Add: {
+    // Adds of constants are common and easy enough.
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+      // They have to fit in the 32-bit signed displacement field though.
+      if (isInt<32>(Disp)) {
+        AM.Disp = (uint32_t)Disp;
+        return X86SelectAddress(U->getOperand(0), AM);
+      }
+    }
+    break;
+  }
+
+  case Instruction::GetElementPtr: {
+    X86AddressMode SavedAM = AM;
+
+    // Pattern-match simple GEPs.
+    uint64_t Disp = (int32_t)AM.Disp;
+    unsigned IndexReg = AM.IndexReg;
+    unsigned Scale = AM.Scale;
+    gep_type_iterator GTI = gep_type_begin(U);
+    // Iterate through the indices, folding what we can. Constants can be
+    // folded, and one dynamic index can be handled, if the scale is supported.
+    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+         i != e; ++i, ++GTI) {
+      const Value *Op = *i;
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD.getStructLayout(STy);
+        Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+        continue;
+      }
+
+      // A array/variable index is always of the form i*S where S is the
+      // constant scale size.  See if we can push the scale into immediates.
+      uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+      for (;;) {
+        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+          // Constant-offset addressing.
+          Disp += CI->getSExtValue() * S;
+          break;
+        }
+        if (isa<AddOperator>(Op) &&
+            (!isa<Instruction>(Op) ||
+             FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+               == FuncInfo.MBB) &&
+            isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+          // An add (in the same block) with a constant operand. Fold the
+          // constant.
+          ConstantInt *CI =
+            cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+          Disp += CI->getSExtValue() * S;
+          // Iterate on the other operand.
+          Op = cast<AddOperator>(Op)->getOperand(0);
+          continue;
+        }
+        if (IndexReg == 0 &&
+            (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+            (S == 1 || S == 2 || S == 4 || S == 8)) {
+          // Scaled-index addressing.
+          Scale = S;
+          IndexReg = getRegForGEPIndex(Op).first;
+          if (IndexReg == 0)
+            return false;
+          break;
+        }
+        // Unsupported.
+        goto unsupported_gep;
+      }
+    }
+    // Check for displacement overflow.
+    if (!isInt<32>(Disp))
+      break;
+    // Ok, the GEP indices were covered by constant-offset and scaled-index
+    // addressing. Update the address state and move on to examining the base.
+    AM.IndexReg = IndexReg;
+    AM.Scale = Scale;
+    AM.Disp = (uint32_t)Disp;
+    if (X86SelectAddress(U->getOperand(0), AM))
+      return true;
+
+    // If we couldn't merge the gep value into this addr mode, revert back to
+    // our address and just match the value instead of completely failing.
+    AM = SavedAM;
+    break;
+  unsupported_gep:
+    // Ok, the GEP indices weren't all covered.
+    break;
+  }
+  }
+
+  // Handle constant address.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // Can't handle alternate code models or TLS yet.
+    if (TM.getCodeModel() != CodeModel::Small)
+      return false;
+
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+      if (GVar->isThreadLocal())
+        return false;
+
+    // RIP-relative addresses can't have additional register operands, so if
+    // we've already folded stuff into the addressing mode, just force the
+    // global value into its own register, which we can use as the basereg.
+    if (!Subtarget->isPICStyleRIPRel() ||
+        (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+      // Okay, we've committed to selecting this global. Set up the address.
+      AM.GV = GV;
+
+      // Allow the subtarget to classify the global.
+      unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+      // If this reference is relative to the pic base, set it now.
+      if (isGlobalRelativeToPICBase(GVFlags)) {
+        // FIXME: How do we know Base.Reg is free??
+        AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+      }
+
+      // Unless the ABI requires an extra load, return a direct reference to
+      // the global.
+      if (!isGlobalStubReference(GVFlags)) {
+        if (Subtarget->isPICStyleRIPRel()) {
+          // Use rip-relative addressing if we can.  Above we verified that the
+          // base and index registers are unused.
+          assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+          AM.Base.Reg = X86::RIP;
+        }
+        AM.GVOpFlags = GVFlags;
+        return true;
+      }
+
+      // Ok, we need to do a load from a stub.  If we've already loaded from
+      // this stub, reuse the loaded pointer, otherwise emit the load now.
+      DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+      unsigned LoadReg;
+      if (I != LocalValueMap.end() && I->second != 0) {
+        LoadReg = I->second;
+      } else {
+        // Issue load from stub.
+        unsigned Opc = 0;
+        const TargetRegisterClass *RC = NULL;
+        X86AddressMode StubAM;
+        StubAM.Base.Reg = AM.Base.Reg;
+        StubAM.GV = GV;
+        StubAM.GVOpFlags = GVFlags;
+
+        // Prepare for inserting code in the local-value area.
+        SavePoint SaveInsertPt = enterLocalValueArea();
+
+        if (TLI.getPointerTy() == MVT::i64) {
+          Opc = X86::MOV64rm;
+          RC  = X86::GR64RegisterClass;
+
+          if (Subtarget->isPICStyleRIPRel())
+            StubAM.Base.Reg = X86::RIP;
+        } else {
+          Opc = X86::MOV32rm;
+          RC  = X86::GR32RegisterClass;
+        }
+
+        LoadReg = createResultReg(RC);
+        MachineInstrBuilder LoadMI =
+          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+        addFullAddress(LoadMI, StubAM);
+
+        // Ok, back to normal mode.
+        leaveLocalValueArea(SaveInsertPt);
+
+        // Prevent loading GV stub multiple times in same MBB.
+        LocalValueMap[V] = LoadReg;
+      }
+
+      // Now construct the final address. Note that the Disp, Scale,
+      // and Index values may already be set here.
+      AM.Base.Reg = LoadReg;
+      AM.GV = 0;
+      return true;
+    }
+  }
+
+  // If all else fails, try to materialize the value in a register.
+  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+    if (AM.Base.Reg == 0) {
+      AM.Base.Reg = getRegForValue(V);
+      return AM.Base.Reg != 0;
+    }
+    if (AM.IndexReg == 0) {
+      assert(AM.Scale == 1 && "Scale with no index!");
+      AM.IndexReg = getRegForValue(V);
+      return AM.IndexReg != 0;
+    }
+  }
+
+  return false;
+}
+
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    Opcode = I->getOpcode();
+    U = I;
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+
+  switch (Opcode) {
+  default: break;
+  case Instruction::BitCast:
+    // Look past bitcasts.
+    return X86SelectCallAddress(U->getOperand(0), AM);
+
+  case Instruction::IntToPtr:
+    // Look past no-op inttoptrs.
+    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      return X86SelectCallAddress(U->getOperand(0), AM);
+    break;
+
+  case Instruction::PtrToInt:
+    // Look past no-op ptrtoints.
+    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      return X86SelectCallAddress(U->getOperand(0), AM);
+    break;
+  }
+
+  // Handle constant address.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // Can't handle alternate code models yet.
+    if (TM.getCodeModel() != CodeModel::Small)
+      return false;
+
+    // RIP-relative addresses can't have additional register operands.
+    if (Subtarget->isPICStyleRIPRel() &&
+        (AM.Base.Reg != 0 || AM.IndexReg != 0))
+      return false;
+
+    // Can't handle DLLImport.
+    if (GV->hasDLLImportLinkage())
+      return false;
+
+    // Can't handle TLS.
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+      if (GVar->isThreadLocal())
+        return false;
+
+    // Okay, we've committed to selecting this global. Set up the basic address.
+    AM.GV = GV;
+
+    // No ABI requires an extra load for anything other than DLLImport, which
+    // we rejected above. Return a direct reference to the global.
+    if (Subtarget->isPICStyleRIPRel()) {
+      // Use rip-relative addressing if we can.  Above we verified that the
+      // base and index registers are unused.
+      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+      AM.Base.Reg = X86::RIP;
+    } else if (Subtarget->isPICStyleStubPIC()) {
+      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+    } else if (Subtarget->isPICStyleGOT()) {
+      AM.GVOpFlags = X86II::MO_GOTOFF;
+    }
+
+    return true;
+  }
+
+  // If all else fails, try to materialize the value in a register.
+  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+    if (AM.Base.Reg == 0) {
+      AM.Base.Reg = getRegForValue(V);
+      return AM.Base.Reg != 0;
+    }
+    if (AM.IndexReg == 0) {
+      assert(AM.Scale == 1 && "Scale with no index!");
+      AM.IndexReg = getRegForValue(V);
+      return AM.IndexReg != 0;
+    }
+  }
+
+  return false;
+}
+
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(const Instruction *I) {
+  MVT VT;
+  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
+    return false;
+
+  X86AddressMode AM;
+  if (!X86SelectAddress(I->getOperand(1), AM))
+    return false;
+
+  return X86FastEmitStore(VT, I->getOperand(0), AM);
+}
+
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+  const ReturnInst *Ret = cast<ReturnInst>(I);
+  const Function &F = *I->getParent()->getParent();
+
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  CallingConv::ID CC = F.getCallingConv();
+  if (CC != CallingConv::C &&
+      CC != CallingConv::Fast &&
+      CC != CallingConv::X86_FastCall)
+    return false;
+
+  if (Subtarget->isTargetWin64())
+    return false;
+
+  // Don't handle popping bytes on return for now.
+  if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
+        ->getBytesToPopOnReturn() != 0)
+    return 0;
+
+  // fastcc with -tailcallopt is intended to provide a guaranteed
+  // tail call optimization. Fastisel doesn't know how to do that.
+  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+    return false;
+
+  // Let SDISel handle vararg functions.
+  if (F.isVarArg())
+    return false;
+
+  if (Ret->getNumOperands() > 0) {
+    SmallVector<ISD::OutputArg, 4> Outs;
+    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                  Outs, TLI);
+
+    // Analyze operands of the call, assigning locations to each operand.
+    SmallVector<CCValAssign, 16> ValLocs;
+    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
+		   I->getContext());
+    CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+    const Value *RV = Ret->getOperand(0);
+    unsigned Reg = getRegForValue(RV);
+    if (Reg == 0)
+      return false;
+
+    // Only handle a single return value for now.
+    if (ValLocs.size() != 1)
+      return false;
+
+    CCValAssign &VA = ValLocs[0];
+
+    // Don't bother handling odd stuff for now.
+    if (VA.getLocInfo() != CCValAssign::Full)
+      return false;
+    // Only handle register returns for now.
+    if (!VA.isRegLoc())
+      return false;
+
+    // The calling-convention tables for x87 returns don't tell
+    // the whole story.
+    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+      return false;
+
+    unsigned SrcReg = Reg + VA.getValNo();
+    EVT SrcVT = TLI.getValueType(RV->getType());
+    EVT DstVT = VA.getValVT();
+    // Special handling for extended integers.
+    if (SrcVT != DstVT) {
+      if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
+        return false;
+
+      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+        return false;
+
+      assert(DstVT == MVT::i32 && "X86 should always ext to i32");
+
+      if (SrcVT == MVT::i1) {
+        if (Outs[0].Flags.isSExt())
+          return false;
+        SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+        SrcVT = MVT::i8;
+      }
+      unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
+                                             ISD::SIGN_EXTEND;
+      SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+                          SrcReg, /*TODO: Kill=*/false);
+    }
+
+    // Make the copy.
+    unsigned DstReg = VA.getLocReg();
+    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+    // Avoid a cross-class copy. This is very unlikely.
+    if (!SrcRC->contains(DstReg))
+      return false;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            DstReg).addReg(SrcReg);
+
+    // Mark the register as live out of the function.
+    MRI.addLiveOut(VA.getLocReg());
+  }
+
+  // Now emit the RET.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+  return true;
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(const Instruction *I)  {
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
+    return false;
+
+  X86AddressMode AM;
+  if (!X86SelectAddress(I->getOperand(0), AM))
+    return false;
+
+  unsigned ResultReg = 0;
+  if (X86FastEmitLoad(VT, AM, ResultReg)) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+  return false;
+}
+
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:       return 0;
+  case MVT::i8:  return X86::CMP8rr;
+  case MVT::i16: return X86::CMP16rr;
+  case MVT::i32: return X86::CMP32rr;
+  case MVT::i64: return X86::CMP64rr;
+  case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
+  case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
+  }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  // Otherwise, we can't fold the immediate into this comparison.
+  default: return 0;
+  case MVT::i8: return X86::CMP8ri;
+  case MVT::i16: return X86::CMP16ri;
+  case MVT::i32: return X86::CMP32ri;
+  case MVT::i64:
+    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+    // field.
+    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+      return X86::CMP64ri32;
+    return 0;
+  }
+}
+
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+                                     EVT VT) {
+  unsigned Op0Reg = getRegForValue(Op0);
+  if (Op0Reg == 0) return false;
+
+  // Handle 'null' like i32/i64 0.
+  if (isa<ConstantPointerNull>(Op1))
+    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
+
+  // We have two options: compare with register or immediate.  If the RHS of
+  // the compare is an immediate that we can fold into this compare, use
+  // CMPri, otherwise use CMPrr.
+  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
+        .addReg(Op0Reg)
+        .addImm(Op1C->getSExtValue());
+      return true;
+    }
+  }
+
+  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
+  if (CompareOpc == 0) return false;
+
+  unsigned Op1Reg = getRegForValue(Op1);
+  if (Op1Reg == 0) return false;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
+    .addReg(Op0Reg)
+    .addReg(Op1Reg);
+
+  return true;
+}
+
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
+
+  MVT VT;
+  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+    return false;
+
+  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+  unsigned SetCCOpc;
+  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
+  switch (CI->getPredicate()) {
+  case CmpInst::FCMP_OEQ: {
+    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+      return false;
+
+    unsigned EReg = createResultReg(&X86::GR8RegClass);
+    unsigned NPReg = createResultReg(&X86::GR8RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::SETNPr), NPReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+  case CmpInst::FCMP_UNE: {
+    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+      return false;
+
+    unsigned NEReg = createResultReg(&X86::GR8RegClass);
+    unsigned PReg = createResultReg(&X86::GR8RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
+      .addReg(PReg).addReg(NEReg);
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
+  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
+  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
+  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
+  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
+  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
+  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
+  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
+  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
+  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+
+  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
+  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
+  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
+  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
+  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
+  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
+  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
+  default:
+    return false;
+  }
+
+  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+  if (SwapArgs)
+    std::swap(Op0, Op1);
+
+  // Emit a compare of Op0/Op1.
+  if (!X86FastEmitCompare(Op0, Op1, VT))
+    return false;
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
+  // Handle zero-extension from i1 to i8, which is common.
+  if (!I->getOperand(0)->getType()->isIntegerTy(1))
+    return false;
+
+  EVT DstVT = TLI.getValueType(I->getType());
+  if (!TLI.isTypeLegal(DstVT))
+    return false;
+
+  unsigned ResultReg = getRegForValue(I->getOperand(0));
+  if (ResultReg == 0)
+    return false;
+
+  // Set the high bits to zero.
+  ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+  if (ResultReg == 0)
+    return false;
+
+  if (DstVT != MVT::i8) {
+    ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+                           ResultReg, /*Kill=*/true);
+    if (ResultReg == 0)
+      return false;
+  }
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
+  // Unconditional branches are selected by tablegen-generated code.
+  // Handle a conditional branch.
+  const BranchInst *BI = cast<BranchInst>(I);
+  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+  // Fold the common case of a conditional branch with a comparison
+  // in the same block (values defined on other blocks may not have
+  // initialized registers).
+  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+      // Try to take advantage of fallthrough opportunities.
+      CmpInst::Predicate Predicate = CI->getPredicate();
+      if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+        std::swap(TrueMBB, FalseMBB);
+        Predicate = CmpInst::getInversePredicate(Predicate);
+      }
+
+      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
+      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+
+      switch (Predicate) {
+      case CmpInst::FCMP_OEQ:
+        std::swap(TrueMBB, FalseMBB);
+        Predicate = CmpInst::FCMP_UNE;
+        // FALL THROUGH
+      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
+      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
+      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+
+      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
+      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
+      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
+      default:
+        return false;
+      }
+
+      const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+      if (SwapArgs)
+        std::swap(Op0, Op1);
+
+      // Emit a compare of the LHS and RHS, setting the flags.
+      if (!X86FastEmitCompare(Op0, Op1, VT))
+        return false;
+
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
+        .addMBB(TrueMBB);
+
+      if (Predicate == CmpInst::FCMP_UNE) {
+        // X86 requires a second branch to handle UNE (and OEQ,
+        // which is mapped to UNE above).
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
+          .addMBB(TrueMBB);
+      }
+
+      FastEmitBranch(FalseMBB, DL);
+      FuncInfo.MBB->addSuccessor(TrueMBB);
+      return true;
+    }
+  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+    // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+    // typically happen for _Bool and C++ bools.
+    MVT SourceVT;
+    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+        isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+      unsigned TestOpc = 0;
+      switch (SourceVT.SimpleTy) {
+      default: break;
+      case MVT::i8:  TestOpc = X86::TEST8ri; break;
+      case MVT::i16: TestOpc = X86::TEST16ri; break;
+      case MVT::i32: TestOpc = X86::TEST32ri; break;
+      case MVT::i64: TestOpc = X86::TEST64ri32; break;
+      }
+      if (TestOpc) {
+        unsigned OpReg = getRegForValue(TI->getOperand(0));
+        if (OpReg == 0) return false;
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+          .addReg(OpReg).addImm(1);
+
+        unsigned JmpOpc = X86::JNE_4;
+        if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+          std::swap(TrueMBB, FalseMBB);
+          JmpOpc = X86::JE_4;
+        }
+
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+          .addMBB(TrueMBB);
+        FastEmitBranch(FalseMBB, DL);
+        FuncInfo.MBB->addSuccessor(TrueMBB);
+        return true;
+      }
+    }
+  }
+
+  // Otherwise do a clumsy setcc and re-test it.
+  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+  // in an explicit cast, so make sure to handle that correctly.
+  unsigned OpReg = getRegForValue(BI->getCondition());
+  if (OpReg == 0) return false;
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+    .addReg(OpReg).addImm(1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
+    .addMBB(TrueMBB);
+  FastEmitBranch(FalseMBB, DL);
+  FuncInfo.MBB->addSuccessor(TrueMBB);
+  return true;
+}
+
+bool X86FastISel::X86SelectShift(const Instruction *I) {
+  unsigned CReg = 0, OpReg = 0;
+  const TargetRegisterClass *RC = NULL;
+  if (I->getType()->isIntegerTy(8)) {
+    CReg = X86::CL;
+    RC = &X86::GR8RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
+    default: return false;
+    }
+  } else if (I->getType()->isIntegerTy(16)) {
+    CReg = X86::CX;
+    RC = &X86::GR16RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
+    default: return false;
+    }
+  } else if (I->getType()->isIntegerTy(32)) {
+    CReg = X86::ECX;
+    RC = &X86::GR32RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
+    default: return false;
+    }
+  } else if (I->getType()->isIntegerTy(64)) {
+    CReg = X86::RCX;
+    RC = &X86::GR64RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
+    default: return false;
+    }
+  } else {
+    return false;
+  }
+
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  unsigned Op0Reg = getRegForValue(I->getOperand(0));
+  if (Op0Reg == 0) return false;
+
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0) return false;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+          CReg).addReg(Op1Reg);
+
+  // The shift instruction uses X86::CL. If we defined a super-register
+  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
+  if (CReg != X86::CL)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::KILL), X86::CL)
+      .addReg(CReg, RegState::Kill);
+
+  unsigned ResultReg = createResultReg(RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
+    .addReg(Op0Reg);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  // We only use cmov here, if we don't have a cmov instruction bail.
+  if (!Subtarget->hasCMov()) return false;
+
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  if (VT == MVT::i16) {
+    Opc = X86::CMOVE16rr;
+    RC = &X86::GR16RegClass;
+  } else if (VT == MVT::i32) {
+    Opc = X86::CMOVE32rr;
+    RC = &X86::GR32RegClass;
+  } else if (VT == MVT::i64) {
+    Opc = X86::CMOVE64rr;
+    RC = &X86::GR64RegClass;
+  } else {
+    return false;
+  }
+
+  unsigned Op0Reg = getRegForValue(I->getOperand(0));
+  if (Op0Reg == 0) return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0) return false;
+  unsigned Op2Reg = getRegForValue(I->getOperand(2));
+  if (Op2Reg == 0) return false;
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+    .addReg(Op0Reg).addReg(Op0Reg);
+  unsigned ResultReg = createResultReg(RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+    .addReg(Op1Reg).addReg(Op2Reg);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
+  // fpext from float to double.
+  if (Subtarget->hasSSE2() &&
+      I->getType()->isDoubleTy()) {
+    const Value *V = I->getOperand(0);
+    if (V->getType()->isFloatTy()) {
+      unsigned OpReg = getRegForValue(V);
+      if (OpReg == 0) return false;
+      unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(X86::CVTSS2SDrr), ResultReg)
+        .addReg(OpReg);
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
+  if (Subtarget->hasSSE2()) {
+    if (I->getType()->isFloatTy()) {
+      const Value *V = I->getOperand(0);
+      if (V->getType()->isDoubleTy()) {
+        unsigned OpReg = getRegForValue(V);
+        if (OpReg == 0) return false;
+        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                TII.get(X86::CVTSD2SSrr), ResultReg)
+          .addReg(OpReg);
+        UpdateValueMap(I, ResultReg);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+
+  // This code only handles truncation to byte.
+  if (DstVT != MVT::i8 && DstVT != MVT::i1)
+    return false;
+  if (!TLI.isTypeLegal(SrcVT))
+    return false;
+
+  unsigned InputReg = getRegForValue(I->getOperand(0));
+  if (!InputReg)
+    // Unhandled operand.  Halt "fast" selection and bail.
+    return false;
+
+  if (SrcVT == MVT::i8) {
+    // Truncate from i8 to i1; no code needed.
+    UpdateValueMap(I, InputReg);
+    return true;
+  }
+
+  if (!Subtarget->is64Bit()) {
+    // If we're on x86-32; we can't extract an i8 from a general register.
+    // First issue a copy to GR16_ABCD or GR32_ABCD.
+    const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+      ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+    unsigned CopyReg = createResultReg(CopyRC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            CopyReg).addReg(InputReg);
+    InputReg = CopyReg;
+  }
+
+  // Issue an extract_subreg.
+  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+                                                  InputReg, /*Kill=*/true,
+                                                  X86::sub_8bit);
+  if (!ResultReg)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+  return Len <= (Subtarget->is64Bit() ? 32 : 16);
+}
+
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+                                     X86AddressMode SrcAM, uint64_t Len) {
+
+  // Make sure we don't bloat code by inlining very large memcpy's.
+  if (!IsMemcpySmall(Len))
+    return false;
+
+  bool i64Legal = Subtarget->is64Bit();
+
+  // We don't care about alignment here since we just emit integer accesses.
+  while (Len) {
+    MVT VT;
+    if (Len >= 8 && i64Legal)
+      VT = MVT::i64;
+    else if (Len >= 4)
+      VT = MVT::i32;
+    else if (Len >= 2)
+      VT = MVT::i16;
+    else {
+      assert(Len == 1);
+      VT = MVT::i8;
+    }
+
+    unsigned Reg;
+    bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+    RV &= X86FastEmitStore(VT, Reg, DestAM);
+    assert(RV && "Failed to emit load or store??");
+
+    unsigned Size = VT.getSizeInBits()/8;
+    Len -= Size;
+    DestAM.Disp += Size;
+    SrcAM.Disp += Size;
+  }
+
+  return true;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
+  // FIXME: Handle more intrinsics.
+  switch (I.getIntrinsicID()) {
+  default: return false;
+  case Intrinsic::memcpy: {
+    const MemCpyInst &MCI = cast<MemCpyInst>(I);
+    // Don't handle volatile or variable length memcpys.
+    if (MCI.isVolatile())
+      return false;
+
+    if (isa<ConstantInt>(MCI.getLength())) {
+      // Small memcpy's are common enough that we want to do them
+      // without a call if possible.
+      uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+      if (IsMemcpySmall(Len)) {
+        X86AddressMode DestAM, SrcAM;
+        if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+            !X86SelectAddress(MCI.getRawSource(), SrcAM))
+          return false;
+        TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+        return true;
+      }
+    }
+
+    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+    if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
+      return false;
+
+    if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
+      return false;
+
+    return DoSelectCall(&I, "memcpy");
+  }
+  case Intrinsic::memset: {
+    const MemSetInst &MSI = cast<MemSetInst>(I);
+
+    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+    if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
+      return false;
+
+    if (MSI.getDestAddressSpace() > 255)
+      return false;
+
+    return DoSelectCall(&I, "memset");
+  }
+  case Intrinsic::stackprotector: {
+    // Emit code inline code to store the stack guard onto the stack.
+    EVT PtrTy = TLI.getPointerTy();
+
+    const Value *Op1 = I.getArgOperand(0); // The guard's value.
+    const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+    // Grab the frame index.
+    X86AddressMode AM;
+    if (!X86SelectAddress(Slot, AM)) return false;
+    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+    return true;
+  }
+  case Intrinsic::dbg_declare: {
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+    X86AddressMode AM;
+    assert(DI->getAddress() && "Null address should be checked earlier!");
+    if (!X86SelectAddress(DI->getAddress(), AM))
+      return false;
+    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    // FIXME may need to add RegState::Debug to any registers produced,
+    // although ESP/EBP should be the only ones at the moment.
+    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
+      addImm(0).addMetadata(DI->getVariable());
+    return true;
+  }
+  case Intrinsic::trap: {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
+    return true;
+  }
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow: {
+    // FIXME: Should fold immediates.
+
+    // Replace "add with overflow" intrinsics with an "add" instruction followed
+    // by a seto/setc instruction.
+    const Function *Callee = I.getCalledFunction();
+    const Type *RetTy =
+      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+    MVT VT;
+    if (!isTypeLegal(RetTy, VT))
+      return false;
+
+    const Value *Op1 = I.getArgOperand(0);
+    const Value *Op2 = I.getArgOperand(1);
+    unsigned Reg1 = getRegForValue(Op1);
+    unsigned Reg2 = getRegForValue(Op2);
+
+    if (Reg1 == 0 || Reg2 == 0)
+      // FIXME: Handle values *not* in registers.
+      return false;
+
+    unsigned OpC = 0;
+    if (VT == MVT::i32)
+      OpC = X86::ADD32rr;
+    else if (VT == MVT::i64)
+      OpC = X86::ADD64rr;
+    else
+      return false;
+
+    // The call to CreateRegs builds two sequential registers, to store the
+    // both the the returned values.
+    unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
+      .addReg(Reg1).addReg(Reg2);
+
+    unsigned Opc = X86::SETBr;
+    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
+      Opc = X86::SETOr;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
+
+    UpdateValueMap(&I, ResultReg, 2);
+    return true;
+  }
+  }
+}
+
+bool X86FastISel::X86SelectCall(const Instruction *I) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = CI->getCalledValue();
+
+  // Can't handle inline asm yet.
+  if (isa<InlineAsm>(Callee))
+    return false;
+
+  // Handle intrinsic calls.
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+    return X86VisitIntrinsicCall(*II);
+
+  return DoSelectCall(I, 0);
+}
+
+// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
+bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = CI->getCalledValue();
+
+  // Handle only C and fastcc calling conventions for now.
+  ImmutableCallSite CS(CI);
+  CallingConv::ID CC = CS.getCallingConv();
+  if (CC != CallingConv::C && CC != CallingConv::Fast &&
+      CC != CallingConv::X86_FastCall)
+    return false;
+
+  // fastcc with -tailcallopt is intended to provide a guaranteed
+  // tail call optimization. Fastisel doesn't know how to do that.
+  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+    return false;
+
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  bool isVarArg = FTy->isVarArg();
+
+  // Don't know how to handle Win64 varargs yet.  Nothing special needed for
+  // x86-32.  Special handling for x86-64 is implemented.
+  if (isVarArg && Subtarget->isTargetWin64())
+    return false;
+
+  // Fast-isel doesn't know about callee-pop yet.
+  if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+                       GuaranteedTailCallOpt))
+    return false;
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<ISD::OutputArg, 4> Outs;
+  SmallVector<uint64_t, 4> Offsets;
+  GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
+                Outs, TLI, &Offsets);
+  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+					   *FuncInfo.MF, FTy->isVarArg(),
+					   Outs, FTy->getContext());
+  if (!CanLowerReturn)
+    return false;
+
+  // Materialize callee address in a register. FIXME: GV address can be
+  // handled with a CALLpcrel32 instead.
+  X86AddressMode CalleeAM;
+  if (!X86SelectCallAddress(Callee, CalleeAM))
+    return false;
+  unsigned CalleeOp = 0;
+  const GlobalValue *GV = 0;
+  if (CalleeAM.GV != 0) {
+    GV = CalleeAM.GV;
+  } else if (CalleeAM.Base.Reg != 0) {
+    CalleeOp = CalleeAM.Base.Reg;
+  } else
+    return false;
+
+  // Deal with call operands first.
+  SmallVector<const Value *, 8> ArgVals;
+  SmallVector<unsigned, 8> Args;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(CS.arg_size());
+  ArgVals.reserve(CS.arg_size());
+  ArgVTs.reserve(CS.arg_size());
+  ArgFlags.reserve(CS.arg_size());
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    // If we're lowering a mem intrinsic instead of a regular call, skip the
+    // last two arguments, which should not passed to the underlying functions.
+    if (MemIntName && e-i <= 2)
+      break;
+    Value *ArgVal = *i;
+    ISD::ArgFlagsTy Flags;
+    unsigned AttrInd = i - CS.arg_begin() + 1;
+    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+      Flags.setSExt();
+    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+      Flags.setZExt();
+
+    if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+      const PointerType *Ty = cast<PointerType>(ArgVal->getType());
+      const Type *ElementTy = Ty->getElementType();
+      unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
+      unsigned FrameAlign = CS.getParamAlignment(AttrInd);
+      if (!FrameAlign)
+        FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+      Flags.setByVal();
+      Flags.setByValSize(FrameSize);
+      Flags.setByValAlign(FrameAlign);
+      if (!IsMemcpySmall(FrameSize))
+        return false;
+    }
+
+    if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+      Flags.setInReg();
+    if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+      Flags.setNest();
+
+    // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+    // instruction.  This is safe because it is common to all fastisel supported
+    // calling conventions on x86.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+      if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+          CI->getBitWidth() == 16) {
+        if (Flags.isSExt())
+          ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+        else
+          ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+      }
+    }
+
+    unsigned ArgReg;
+
+    // Passing bools around ends up doing a trunc to i1 and passing it.
+    // Codegen this as an argument + "and 1".
+    if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+        cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+        ArgVal->hasOneUse()) {
+      ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+      ArgReg = getRegForValue(ArgVal);
+      if (ArgReg == 0) return false;
+
+      MVT ArgVT;
+      if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+
+      ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+                           ArgVal->hasOneUse(), 1);
+    } else {
+      ArgReg = getRegForValue(ArgVal);
+    }
+
+    if (ArgReg == 0) return false;
+
+    const Type *ArgTy = ArgVal->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT))
+      return false;
+    if (ArgVT == MVT::x86mmx)
+      return false;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(ArgReg);
+    ArgVals.push_back(ArgVal);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
+		 I->getParent()->getContext());
+
+  // Allocate shadow area for Win64
+  if (Subtarget->isTargetWin64())
+    CCInfo.AllocateStack(32, 8);
+
+  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // Issue CALLSEQ_START
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
+    .addImm(NumBytes);
+
+  // Process argument: walk the register/memloc assignments, inserting
+  // copies / loads.
+  SmallVector<unsigned, 4> RegArgs;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    unsigned Arg = Args[VA.getValNo()];
+    EVT ArgVT = ArgVTs[VA.getValNo()];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt: {
+      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+             "Unexpected extend");
+      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                       Arg, ArgVT, Arg);
+      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    case CCValAssign::ZExt: {
+      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+             "Unexpected extend");
+      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                       Arg, ArgVT, Arg);
+      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    case CCValAssign::AExt: {
+      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+             "Unexpected extend");
+      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+                                       Arg, ArgVT, Arg);
+      if (!Emitted)
+        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                    Arg, ArgVT, Arg);
+      if (!Emitted)
+        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                    Arg, ArgVT, Arg);
+
+      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    case CCValAssign::BCvt: {
+      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+                               ISD::BITCAST, Arg, /*TODO: Kill=*/false);
+      assert(BC != 0 && "Failed to emit a bitcast!");
+      Arg = BC;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    }
+
+    if (VA.isRegLoc()) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              VA.getLocReg()).addReg(Arg);
+      RegArgs.push_back(VA.getLocReg());
+    } else {
+      unsigned LocMemOffset = VA.getLocMemOffset();
+      X86AddressMode AM;
+      AM.Base.Reg = StackPtr;
+      AM.Disp = LocMemOffset;
+      const Value *ArgVal = ArgVals[VA.getValNo()];
+      ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
+
+      if (Flags.isByVal()) {
+        X86AddressMode SrcAM;
+        SrcAM.Base.Reg = Arg;
+        bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
+        assert(Res && "memcpy length already checked!"); (void)Res;
+      } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+        // If this is a really simple value, emit this with the Value* version
+        //of X86FastEmitStore.  If it isn't simple, we don't want to do this,
+        // as it can cause us to reevaluate the argument.
+        X86FastEmitStore(ArgVT, ArgVal, AM);
+      } else {
+        X86FastEmitStore(ArgVT, Arg, AM);
+      }
+    }
+  }
+
+  // ELF / PIC requires GOT in the EBX register before function calls via PLT
+  // GOT pointer.
+  if (Subtarget->isPICStyleGOT()) {
+    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            X86::EBX).addReg(Base);
+  }
+
+  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+    // Count the number of XMM registers allocated.
+    static const unsigned XMMArgRegs[] = {
+      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+    };
+    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+            X86::AL).addImm(NumXMMRegs);
+  }
+
+  // Issue the call.
+  MachineInstrBuilder MIB;
+  if (CalleeOp) {
+    // Register-indirect call.
+    unsigned CallOpc;
+    if (Subtarget->isTargetWin64())
+      CallOpc = X86::WINCALL64r;
+    else if (Subtarget->is64Bit())
+      CallOpc = X86::CALL64r;
+    else
+      CallOpc = X86::CALL32r;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+      .addReg(CalleeOp);
+
+  } else {
+    // Direct call.
+    assert(GV && "Not a direct call");
+    unsigned CallOpc;
+    if (Subtarget->isTargetWin64())
+      CallOpc = X86::WINCALL64pcrel32;
+    else if (Subtarget->is64Bit())
+      CallOpc = X86::CALL64pcrel32;
+    else
+      CallOpc = X86::CALLpcrel32;
+
+    // See if we need any target-specific flags on the GV operand.
+    unsigned char OpFlags = 0;
+
+    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+    // external symbols most go through the PLT in PIC mode.  If the symbol
+    // has hidden or protected visibility, or if it is static or local, then
+    // we don't need to use the PLT - we can directly call it.
+    if (Subtarget->isTargetELF() &&
+        TM.getRelocationModel() == Reloc::PIC_ &&
+        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+      OpFlags = X86II::MO_PLT;
+    } else if (Subtarget->isPICStyleStubAny() &&
+               (GV->isDeclaration() || GV->isWeakForLinker()) &&
+               (!Subtarget->getTargetTriple().isMacOSX() ||
+                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = X86II::MO_DARWIN_STUB;
+    }
+
+
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
+    if (MemIntName)
+      MIB.addExternalSymbol(MemIntName, OpFlags);
+    else
+      MIB.addGlobalAddress(GV, 0, OpFlags);
+  }
+
+  // Add an implicit use GOT pointer in EBX.
+  if (Subtarget->isPICStyleGOT())
+    MIB.addReg(X86::EBX);
+
+  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+    MIB.addReg(X86::AL);
+
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Issue CALLSEQ_END
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+  unsigned NumBytesCallee = 0;
+  if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+    NumBytesCallee = 4;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
+    .addImm(NumBytes).addImm(NumBytesCallee);
+
+  // Build info for return calling conv lowering code.
+  // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
+  SmallVector<ISD::InputArg, 32> Ins;
+  SmallVector<EVT, 4> RetTys;
+  ComputeValueVTs(TLI, I->getType(), RetTys);
+  for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
+    EVT VT = RetTys[i];
+    EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+    unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
+    for (unsigned j = 0; j != NumRegs; ++j) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT.getSimpleVT();
+      MyFlags.Used = !CS.getInstruction()->use_empty();
+      if (CS.paramHasAttr(0, Attribute::SExt))
+        MyFlags.Flags.setSExt();
+      if (CS.paramHasAttr(0, Attribute::ZExt))
+        MyFlags.Flags.setZExt();
+      if (CS.paramHasAttr(0, Attribute::InReg))
+        MyFlags.Flags.setInReg();
+      Ins.push_back(MyFlags);
+    }
+  }
+
+  // Now handle call return values.
+  SmallVector<unsigned, 4> UsedRegs;
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
+		    I->getParent()->getContext());
+  unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
+  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    EVT CopyVT = RVLocs[i].getValVT();
+    unsigned CopyReg = ResultReg + i;
+
+    // If this is a call to a function that returns an fp value on the x87 fp
+    // stack, but where we prefer to use the value in xmm registers, copy it
+    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+    if ((RVLocs[i].getLocReg() == X86::ST0 ||
+         RVLocs[i].getLocReg() == X86::ST1)) {
+      if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+        CopyVT = MVT::f80;
+        CopyReg = createResultReg(X86::RFP80RegisterClass);
+      }
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
+              CopyReg);
+    } else {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              CopyReg).addReg(RVLocs[i].getLocReg());
+      UsedRegs.push_back(RVLocs[i].getLocReg());
+    }
+
+    if (CopyVT != RVLocs[i].getValVT()) {
+      // Round the F80 the right size, which also moves to the appropriate xmm
+      // register. This is accomplished by storing the F80 value in memory and
+      // then loading it back. Ewww...
+      EVT ResVT = RVLocs[i].getValVT();
+      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+      unsigned MemSize = ResVT.getSizeInBits()/8;
+      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(Opc)), FI)
+        .addReg(CopyReg);
+      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(Opc), ResultReg + i), FI);
+    }
+  }
+
+  if (RVLocs.size())
+    UpdateValueMap(I, ResultReg, RVLocs.size());
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+}
+
+
+bool
+X86FastISel::TargetSelectInstruction(const Instruction *I)  {
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::Load:
+    return X86SelectLoad(I);
+  case Instruction::Store:
+    return X86SelectStore(I);
+  case Instruction::Ret:
+    return X86SelectRet(I);
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    return X86SelectCmp(I);
+  case Instruction::ZExt:
+    return X86SelectZExt(I);
+  case Instruction::Br:
+    return X86SelectBranch(I);
+  case Instruction::Call:
+    return X86SelectCall(I);
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::Shl:
+    return X86SelectShift(I);
+  case Instruction::Select:
+    return X86SelectSelect(I);
+  case Instruction::Trunc:
+    return X86SelectTrunc(I);
+  case Instruction::FPExt:
+    return X86SelectFPExt(I);
+  case Instruction::FPTrunc:
+    return X86SelectFPTrunc(I);
+  case Instruction::IntToPtr: // Deliberate fall-through.
+  case Instruction::PtrToInt: {
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
+    if (DstVT.bitsGT(SrcVT))
+      return X86SelectZExt(I);
+    if (DstVT.bitsLT(SrcVT))
+      return X86SelectTrunc(I);
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0) return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+  }
+
+  return false;
+}
+
+unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
+  MVT VT;
+  if (!isTypeLegal(C->getType(), VT))
+    return false;
+
+  // Get opcode and regclass of the output for the given load instruction.
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  switch (VT.SimpleTy) {
+  default: return false;
+  case MVT::i8:
+    Opc = X86::MOV8rm;
+    RC  = X86::GR8RegisterClass;
+    break;
+  case MVT::i16:
+    Opc = X86::MOV16rm;
+    RC  = X86::GR16RegisterClass;
+    break;
+  case MVT::i32:
+    Opc = X86::MOV32rm;
+    RC  = X86::GR32RegisterClass;
+    break;
+  case MVT::i64:
+    // Must be in x86-64 mode.
+    Opc = X86::MOV64rm;
+    RC  = X86::GR64RegisterClass;
+    break;
+  case MVT::f32:
+    if (Subtarget->hasSSE1()) {
+      Opc = X86::MOVSSrm;
+      RC  = X86::FR32RegisterClass;
+    } else {
+      Opc = X86::LD_Fp32m;
+      RC  = X86::RFP32RegisterClass;
+    }
+    break;
+  case MVT::f64:
+    if (Subtarget->hasSSE2()) {
+      Opc = X86::MOVSDrm;
+      RC  = X86::FR64RegisterClass;
+    } else {
+      Opc = X86::LD_Fp64m;
+      RC  = X86::RFP64RegisterClass;
+    }
+    break;
+  case MVT::f80:
+    // No f80 support yet.
+    return false;
+  }
+
+  // Materialize addresses with LEA instructions.
+  if (isa<GlobalValue>(C)) {
+    X86AddressMode AM;
+    if (X86SelectAddress(C, AM)) {
+      // If the expression is just a basereg, then we're done, otherwise we need
+      // to emit an LEA.
+      if (AM.BaseType == X86AddressMode::RegBase &&
+          AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+        return AM.Base.Reg;
+
+      Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+      unsigned ResultReg = createResultReg(RC);
+      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                             TII.get(Opc), ResultReg), AM);
+      return ResultReg;
+    }
+    return 0;
+  }
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(C->getType());
+  if (Align == 0) {
+    // Alignment of vector types.  FIXME!
+    Align = TD.getTypeAllocSize(C->getType());
+  }
+
+  // x86-32 PIC requires a PIC base register for constant pools.
+  unsigned PICBase = 0;
+  unsigned char OpFlag = 0;
+  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+  } else if (Subtarget->isPICStyleGOT()) {
+    OpFlag = X86II::MO_GOTOFF;
+    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+  } else if (Subtarget->isPICStyleRIPRel() &&
+             TM.getCodeModel() == CodeModel::Small) {
+    PICBase = X86::RIP;
+  }
+
+  // Create the load from the constant pool.
+  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+  unsigned ResultReg = createResultReg(RC);
+  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                   TII.get(Opc), ResultReg),
+                           MCPOffset, PICBase, OpFlag);
+
+  return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
+  // Fail on dynamic allocas. At this point, getRegForValue has already
+  // checked its CSE maps, so if we're here trying to handle a dynamic
+  // alloca, we're not going to succeed. X86SelectAddress has a
+  // check for dynamic allocas, because it's called directly from
+  // various places, but TargetMaterializeAlloca also needs a check
+  // in order to avoid recursion between getRegForValue,
+  // X86SelectAddrss, and TargetMaterializeAlloca.
+  if (!FuncInfo.StaticAllocaMap.count(C))
+    return 0;
+
+  X86AddressMode AM;
+  if (!X86SelectAddress(C, AM))
+    return 0;
+  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+  unsigned ResultReg = createResultReg(RC);
+  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(Opc), ResultReg), AM);
+  return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+  MVT VT;
+  if (!isTypeLegal(CF->getType(), VT))
+    return false;
+
+  // Get opcode and regclass for the given zero.
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  switch (VT.SimpleTy) {
+    default: return false;
+    case MVT::f32:
+      if (Subtarget->hasSSE1()) {
+        Opc = X86::FsFLD0SS;
+        RC  = X86::FR32RegisterClass;
+      } else {
+        Opc = X86::LD_Fp032;
+        RC  = X86::RFP32RegisterClass;
+      }
+      break;
+    case MVT::f64:
+      if (Subtarget->hasSSE2()) {
+        Opc = X86::FsFLD0SD;
+        RC  = X86::FR64RegisterClass;
+      } else {
+        Opc = X86::LD_Fp064;
+        RC  = X86::RFP64RegisterClass;
+      }
+      break;
+    case MVT::f80:
+      // No f80 support yet.
+      return false;
+  }
+
+  unsigned ResultReg = createResultReg(RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+  return ResultReg;
+}
+
+
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  X86AddressMode AM;
+  if (!X86SelectAddress(LI->getOperand(0), AM))
+    return false;
+
+  X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+  unsigned Size = TD.getTypeAllocSize(LI->getType());
+  unsigned Alignment = LI->getAlignment();
+
+  SmallVector<MachineOperand, 8> AddrOps;
+  AM.getFullAddress(AddrOps);
+
+  MachineInstr *Result =
+    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+  if (Result == 0) return false;
+
+  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+  MI->eraseFromParent();
+  return true;
+}
+
+
+namespace llvm {
+  llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+    return new X86FastISel(funcInfo);
+  }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/X86FixupKinds.h
new file mode 100644
index 000000000000..17d242ab761e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FixupKinds.h
@@ -0,0 +1,33 @@
+//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+  reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+  reloc_riprel_4byte_movq_load,              // 32-bit rip-relative in movq
+  reloc_signed_4byte,                        // 32-bit signed. Unlike FK_Data_4
+                                             // this will be sign extended at
+                                             // runtime.
+  reloc_global_offset_table,                 // 32-bit, relative to the start
+                                             // of the instruction. Used only
+                                             // for _GLOBAL_OFFSET_TABLE_.
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 000000000000..6eed6abd43e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,1709 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// pseudo registers into register stack instructions.  This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
+//
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP  , "Number of floating point instructions");
+
+namespace {
+  struct FPS : public MachineFunctionPass {
+    static char ID;
+    FPS() : MachineFunctionPass(ID) {
+      initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+      // This is really only to keep valgrind quiet.
+      // The logic in isLive() is too much for it.
+      memset(Stack, 0, sizeof(Stack));
+      memset(RegMap, 0, sizeof(RegMap));
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<EdgeBundles>();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+  private:
+    const TargetInstrInfo *TII; // Machine instruction info.
+
+    // Two CFG edges are related if they leave the same block, or enter the same
+    // block. The transitive closure of an edge under this relation is a
+    // LiveBundle. It represents a set of CFG edges where the live FP stack
+    // registers must be allocated identically in the x87 stack.
+    //
+    // A LiveBundle is usually all the edges leaving a block, or all the edges
+    // entering a block, but it can contain more edges if critical edges are
+    // present.
+    //
+    // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+    // but the exact mapping of FP registers to stack slots is fixed later.
+    struct LiveBundle {
+      // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+      unsigned Mask;
+
+      // Number of pre-assigned live registers in FixStack. This is 0 when the
+      // stack order has not yet been fixed.
+      unsigned FixCount;
+
+      // Assigned stack order for live-in registers.
+      // FixStack[i] == getStackEntry(i) for all i < FixCount.
+      unsigned char FixStack[8];
+
+      LiveBundle() : Mask(0), FixCount(0) {}
+
+      // Have the live registers been assigned a stack order yet?
+      bool isFixed() const { return !Mask || FixCount; }
+    };
+
+    // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+    // with no live FP registers.
+    SmallVector<LiveBundle, 8> LiveBundles;
+
+    // The edge bundle analysis provides indices into the LiveBundles vector.
+    EdgeBundles *Bundles;
+
+    // Return a bitmask of FP registers in block's live-in list.
+    unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+      unsigned Mask = 0;
+      for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+           E = MBB->livein_end(); I != E; ++I) {
+        unsigned Reg = *I - X86::FP0;
+        if (Reg < 8)
+          Mask |= 1 << Reg;
+      }
+      return Mask;
+    }
+
+    // Partition all the CFG edges into LiveBundles.
+    void bundleCFG(MachineFunction &MF);
+
+    MachineBasicBlock *MBB;     // Current basic block
+
+    // The hardware keeps track of how many FP registers are live, so we have
+    // to model that exactly. Usually, each live register corresponds to an
+    // FP<n> register, but when dealing with calls, returns, and inline
+    // assembly, it is sometimes neccesary to have live scratch registers.
+    unsigned Stack[8];          // FP<n> Registers in each stack slot...
+    unsigned StackTop;          // The current top of the FP stack.
+
+    enum {
+      NumFPRegs = 16            // Including scratch pseudo-registers.
+    };
+
+    // For each live FP<n> register, point to its Stack[] entry.
+    // The first entries correspond to FP0-FP6, the rest are scratch registers
+    // used when we need slightly different live registers than what the
+    // register allocator thinks.
+    unsigned RegMap[NumFPRegs];
+
+    // Pending fixed registers - Inline assembly needs FP registers to appear
+    // in fixed stack slot positions. This is handled by copying FP registers
+    // to ST registers before the instruction, and copying back after the
+    // instruction.
+    //
+    // This is modeled with pending ST registers. NumPendingSTs is the number
+    // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
+    // register that holds the ST value. The ST registers are not moved into
+    // place until immediately before the instruction that needs them.
+    //
+    // It can happen that we need an ST register to be live when no FP register
+    // holds the value:
+    //
+    //   %ST0 = COPY %FP4<kill>
+    //
+    // When that happens, we allocate a scratch FP register to hold the ST
+    // value. That means every register in PendingST must be live.
+
+    unsigned NumPendingSTs;
+    unsigned char PendingST[8];
+
+    // Set up our stack model to match the incoming registers to MBB.
+    void setupBlockStack();
+
+    // Shuffle live registers to match the expectations of successor blocks.
+    void finishBlockStack();
+
+    void dumpStack() const {
+      dbgs() << "Stack contents:";
+      for (unsigned i = 0; i != StackTop; ++i) {
+        dbgs() << " FP" << Stack[i];
+        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+      }
+      for (unsigned i = 0; i != NumPendingSTs; ++i)
+        dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
+      dbgs() << "\n";
+    }
+
+    /// getSlot - Return the stack slot number a particular register number is
+    /// in.
+    unsigned getSlot(unsigned RegNo) const {
+      assert(RegNo < NumFPRegs && "Regno out of range!");
+      return RegMap[RegNo];
+    }
+
+    /// isLive - Is RegNo currently live in the stack?
+    bool isLive(unsigned RegNo) const {
+      unsigned Slot = getSlot(RegNo);
+      return Slot < StackTop && Stack[Slot] == RegNo;
+    }
+
+    /// getScratchReg - Return an FP register that is not currently in use.
+    unsigned getScratchReg() {
+      for (int i = NumFPRegs - 1; i >= 8; --i)
+        if (!isLive(i))
+          return i;
+      llvm_unreachable("Ran out of scratch FP registers");
+    }
+
+    /// isScratchReg - Returns trus if RegNo is a scratch FP register.
+    bool isScratchReg(unsigned RegNo) {
+      return RegNo > 8 && RegNo < NumFPRegs;
+    }
+
+    /// getStackEntry - Return the X86::FP<n> register in register ST(i).
+    unsigned getStackEntry(unsigned STi) const {
+      if (STi >= StackTop)
+        report_fatal_error("Access past stack top!");
+      return Stack[StackTop-1-STi];
+    }
+
+    /// getSTReg - Return the X86::ST(i) register which contains the specified
+    /// FP<RegNo> register.
+    unsigned getSTReg(unsigned RegNo) const {
+      return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+    }
+
+    // pushReg - Push the specified FP<n> register onto the stack.
+    void pushReg(unsigned Reg) {
+      assert(Reg < NumFPRegs && "Register number out of range!");
+      if (StackTop >= 8)
+        report_fatal_error("Stack overflow!");
+      Stack[StackTop] = Reg;
+      RegMap[Reg] = StackTop++;
+    }
+
+    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
+      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+      if (isAtTop(RegNo)) return;
+
+      unsigned STReg = getSTReg(RegNo);
+      unsigned RegOnTop = getStackEntry(0);
+
+      // Swap the slots the regs are in.
+      std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+      // Swap stack slot contents.
+      if (RegMap[RegOnTop] >= StackTop)
+        report_fatal_error("Access past stack top!");
+      std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+      // Emit an fxch to update the runtime processors version of the state.
+      BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
+      ++NumFXCH;
+    }
+
+    void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+      unsigned STReg = getSTReg(RegNo);
+      pushReg(AsReg);   // New register on top of stack
+
+      BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
+    }
+
+    /// popStackAfter - Pop the current value off of the top of the FP stack
+    /// after the specified instruction.
+    void popStackAfter(MachineBasicBlock::iterator &I);
+
+    /// freeStackSlotAfter - Free the specified register from the register
+    /// stack, so that it is no longer in a register.  If the register is
+    /// currently at the top of the stack, we just pop the current instruction,
+    /// otherwise we store the current top-of-stack into the specified slot,
+    /// then pop the top of stack.
+    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+    /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+    /// instruction.
+    MachineBasicBlock::iterator
+    freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+    /// Adjust the live registers to be the set in Mask.
+    void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+    /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
+    /// st(0), FP reg FixStack[1] is st(1) etc.
+    void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+                         MachineBasicBlock::iterator I);
+
+    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+    void handleZeroArgFP(MachineBasicBlock::iterator &I);
+    void handleOneArgFP(MachineBasicBlock::iterator &I);
+    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+    void handleTwoArgFP(MachineBasicBlock::iterator &I);
+    void handleCompareFP(MachineBasicBlock::iterator &I);
+    void handleCondMovFP(MachineBasicBlock::iterator &I);
+    void handleSpecialFP(MachineBasicBlock::iterator &I);
+
+    // Check if a COPY instruction is using FP registers.
+    bool isFPCopy(MachineInstr *MI) {
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned SrcReg = MI->getOperand(1).getReg();
+
+      return X86::RFP80RegClass.contains(DstReg) ||
+        X86::RFP80RegClass.contains(SrcReg);
+    }
+  };
+  char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// getFPReg - Return the X86::FPx register number for the specified operand.
+/// For example, this returns 3 for X86::FP3.
+static unsigned getFPReg(const MachineOperand &MO) {
+  assert(MO.isReg() && "Expected an FP register!");
+  unsigned Reg = MO.getReg();
+  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+  return Reg - X86::FP0;
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+  // We only need to run this pass if there are any FP registers used in this
+  // function.  If it is all integer, there is nothing for us to do!
+  bool FPIsUsed = false;
+
+  assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+  for (unsigned i = 0; i <= 6; ++i)
+    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+      FPIsUsed = true;
+      break;
+    }
+
+  // Early exit.
+  if (!FPIsUsed) return false;
+
+  Bundles = &getAnalysis<EdgeBundles>();
+  TII = MF.getTarget().getInstrInfo();
+
+  // Prepare cross-MBB liveness.
+  bundleCFG(MF);
+
+  StackTop = 0;
+
+  // Process the function in depth first order so that we process at least one
+  // of the predecessors for every reachable block in the function.
+  SmallPtrSet<MachineBasicBlock*, 8> Processed;
+  MachineBasicBlock *Entry = MF.begin();
+
+  bool Changed = false;
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
+         I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+       I != E; ++I)
+    Changed |= processBasicBlock(MF, **I);
+
+  // Process any unreachable blocks in arbitrary order now.
+  if (MF.size() != Processed.size())
+    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+      if (Processed.insert(BB))
+        Changed |= processBasicBlock(MF, *BB);
+
+  LiveBundles.clear();
+
+  return Changed;
+}
+
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+  assert(LiveBundles.empty() && "Stale data in LiveBundles");
+  LiveBundles.resize(Bundles->getNumBundles());
+
+  // Gather the actual live-in masks for all MBBs.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    const unsigned Mask = calcLiveInMask(MBB);
+    if (!Mask)
+      continue;
+    // Update MBB ingoing bundle mask.
+    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+  }
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+  bool Changed = false;
+  MBB = &BB;
+  NumPendingSTs = 0;
+
+  setupBlockStack();
+
+  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+    MachineInstr *MI = I;
+    uint64_t Flags = MI->getDesc().TSFlags;
+
+    unsigned FPInstClass = Flags & X86II::FPTypeMask;
+    if (MI->isInlineAsm())
+      FPInstClass = X86II::SpecialFP;
+
+    if (MI->isCopy() && isFPCopy(MI))
+      FPInstClass = X86II::SpecialFP;
+
+    if (FPInstClass == X86II::NotFP)
+      continue;  // Efficiently ignore non-fp insts!
+
+    MachineInstr *PrevMI = 0;
+    if (I != BB.begin())
+      PrevMI = prior(I);
+
+    ++NumFP;  // Keep track of # of pseudo instrs
+    DEBUG(dbgs() << "\nFPInst:\t" << *MI);
+
+    // Get dead variables list now because the MI pointer may be deleted as part
+    // of processing!
+    SmallVector<unsigned, 8> DeadRegs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDead())
+        DeadRegs.push_back(MO.getReg());
+    }
+
+    switch (FPInstClass) {
+    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
+    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
+    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
+    case X86II::CompareFP:  handleCompareFP(I); break;
+    case X86II::CondMovFP:  handleCondMovFP(I); break;
+    case X86II::SpecialFP:  handleSpecialFP(I); break;
+    default: llvm_unreachable("Unknown FP Type!");
+    }
+
+    // Check to see if any of the values defined by this instruction are dead
+    // after definition.  If so, pop them.
+    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+      unsigned Reg = DeadRegs[i];
+      if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+        DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
+        freeStackSlotAfter(I, Reg-X86::FP0);
+      }
+    }
+
+    // Print out all of the instructions expanded to if -debug
+    DEBUG(
+      MachineBasicBlock::iterator PrevI(PrevMI);
+      if (I == PrevI) {
+        dbgs() << "Just deleted pseudo instruction\n";
+      } else {
+        MachineBasicBlock::iterator Start = I;
+        // Rewind to first instruction newly inserted.
+        while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+        dbgs() << "Inserted instructions:\n\t";
+        Start->print(dbgs(), &MF.getTarget());
+        while (++Start != llvm::next(I)) {}
+      }
+      dumpStack();
+    );
+
+    Changed = true;
+  }
+
+  finishBlockStack();
+
+  return Changed;
+}
+
+/// setupBlockStack - Use the live bundles to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+               << " derived from " << MBB->getName() << ".\n");
+  StackTop = 0;
+  // Get the live-in bundle for MBB.
+  const LiveBundle &Bundle =
+    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
+
+  if (!Bundle.Mask) {
+    DEBUG(dbgs() << "Block has no FP live-ins.\n");
+    return;
+  }
+
+  // Depth-first iteration should ensure that we always have an assigned stack.
+  assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+  // Push the fixed live-in registers.
+  for (unsigned i = Bundle.FixCount; i > 0; --i) {
+    MBB->addLiveIn(X86::ST0+i-1);
+    DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+                 << unsigned(Bundle.FixStack[i-1]) << '\n');
+    pushReg(Bundle.FixStack[i-1]);
+  }
+
+  // Kill off unwanted live-ins. This can happen with a critical edge.
+  // FIXME: We could keep these live registers around as zombies. They may need
+  // to be revived at the end of a short block. It might save a few instrs.
+  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+  DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+  // The RET handling below takes care of return blocks for us.
+  if (MBB->succ_empty())
+    return;
+
+  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+               << " derived from " << MBB->getName() << ".\n");
+
+  // Get MBB's live-out bundle.
+  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
+  LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+  // We may need to kill and define some registers to match successors.
+  // FIXME: This can probably be combined with the shuffle below.
+  MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+  adjustLiveRegs(Bundle.Mask, Term);
+
+  if (!Bundle.Mask) {
+    DEBUG(dbgs() << "No live-outs.\n");
+    return;
+  }
+
+  // Has the stack order been fixed yet?
+  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+  if (Bundle.isFixed()) {
+    DEBUG(dbgs() << "Shuffling stack to match.\n");
+    shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+  } else {
+    // Not fixed yet, we get to choose.
+    DEBUG(dbgs() << "Fixing stack order now.\n");
+    Bundle.FixCount = StackTop;
+    for (unsigned i = 0; i < StackTop; ++i)
+      Bundle.FixStack[i] = getStackEntry(i);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+  struct TableEntry {
+    unsigned from;
+    unsigned to;
+    bool operator<(const TableEntry &TE) const { return from < TE.from; }
+    friend bool operator<(const TableEntry &TE, unsigned V) {
+      return TE.from < V;
+    }
+    friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+                                              const TableEntry &TE) {
+      return V < TE.from;
+    }
+  };
+}
+
+#ifndef NDEBUG
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+  for (unsigned i = 0; i != NumEntries-1; ++i)
+    if (!(Table[i] < Table[i+1])) return false;
+  return true;
+}
+#endif
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+  if (I != Table+N && I->from == Opcode)
+    return I->to;
+  return -1;
+}
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE)                                              \
+  { static bool TABLE##Checked = false;                                   \
+    if (!TABLE##Checked) {                                                \
+       assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
+              "All lookup tables must be sorted for efficient access!");  \
+       TABLE##Checked = true;                                             \
+    }                                                                     \
+  }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+  { X86::ABS_Fp32     , X86::ABS_F     },
+  { X86::ABS_Fp64     , X86::ABS_F     },
+  { X86::ABS_Fp80     , X86::ABS_F     },
+  { X86::ADD_Fp32m    , X86::ADD_F32m  },
+  { X86::ADD_Fp64m    , X86::ADD_F64m  },
+  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
+  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
+  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
+  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
+  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
+  { X86::CHS_Fp32     , X86::CHS_F     },
+  { X86::CHS_Fp64     , X86::CHS_F     },
+  { X86::CHS_Fp80     , X86::CHS_F     },
+  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
+  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
+  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
+  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
+  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
+  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
+  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
+  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
+  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
+  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
+  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
+  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
+  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
+  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
+  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
+  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
+  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
+  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
+  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
+  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
+  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
+  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
+  { X86::COS_Fp32     , X86::COS_F     },
+  { X86::COS_Fp64     , X86::COS_F     },
+  { X86::COS_Fp80     , X86::COS_F     },
+  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
+  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
+  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
+  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
+  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
+  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
+  { X86::DIV_Fp32m    , X86::DIV_F32m  },
+  { X86::DIV_Fp64m    , X86::DIV_F64m  },
+  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
+  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
+  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
+  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
+  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
+  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
+  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
+  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
+  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
+  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
+  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
+  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
+  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
+  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
+  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
+  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
+  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
+  { X86::IST_Fp16m32  , X86::IST_F16m  },
+  { X86::IST_Fp16m64  , X86::IST_F16m  },
+  { X86::IST_Fp16m80  , X86::IST_F16m  },
+  { X86::IST_Fp32m32  , X86::IST_F32m  },
+  { X86::IST_Fp32m64  , X86::IST_F32m  },
+  { X86::IST_Fp32m80  , X86::IST_F32m  },
+  { X86::IST_Fp64m32  , X86::IST_FP64m },
+  { X86::IST_Fp64m64  , X86::IST_FP64m },
+  { X86::IST_Fp64m80  , X86::IST_FP64m },
+  { X86::LD_Fp032     , X86::LD_F0     },
+  { X86::LD_Fp064     , X86::LD_F0     },
+  { X86::LD_Fp080     , X86::LD_F0     },
+  { X86::LD_Fp132     , X86::LD_F1     },
+  { X86::LD_Fp164     , X86::LD_F1     },
+  { X86::LD_Fp180     , X86::LD_F1     },
+  { X86::LD_Fp32m     , X86::LD_F32m   },
+  { X86::LD_Fp32m64   , X86::LD_F32m   },
+  { X86::LD_Fp32m80   , X86::LD_F32m   },
+  { X86::LD_Fp64m     , X86::LD_F64m   },
+  { X86::LD_Fp64m80   , X86::LD_F64m   },
+  { X86::LD_Fp80m     , X86::LD_F80m   },
+  { X86::MUL_Fp32m    , X86::MUL_F32m  },
+  { X86::MUL_Fp64m    , X86::MUL_F64m  },
+  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
+  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
+  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
+  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
+  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
+  { X86::SIN_Fp32     , X86::SIN_F     },
+  { X86::SIN_Fp64     , X86::SIN_F     },
+  { X86::SIN_Fp80     , X86::SIN_F     },
+  { X86::SQRT_Fp32    , X86::SQRT_F    },
+  { X86::SQRT_Fp64    , X86::SQRT_F    },
+  { X86::SQRT_Fp80    , X86::SQRT_F    },
+  { X86::ST_Fp32m     , X86::ST_F32m   },
+  { X86::ST_Fp64m     , X86::ST_F64m   },
+  { X86::ST_Fp64m32   , X86::ST_F32m   },
+  { X86::ST_Fp80m32   , X86::ST_F32m   },
+  { X86::ST_Fp80m64   , X86::ST_F64m   },
+  { X86::ST_FpP80m    , X86::ST_FP80m  },
+  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
+  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
+  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
+  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
+  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
+  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
+  { X86::SUB_Fp32m    , X86::SUB_F32m  },
+  { X86::SUB_Fp64m    , X86::SUB_F64m  },
+  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
+  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
+  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
+  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
+  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
+  { X86::TST_Fp32     , X86::TST_F     },
+  { X86::TST_Fp64     , X86::TST_F     },
+  { X86::TST_Fp80     , X86::TST_F     },
+  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
+  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
+  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
+  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
+  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
+  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+  ASSERT_SORTED(OpcodeTable);
+  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+  return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version.  The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
+
+  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
+
+  { X86::IST_F16m  , X86::IST_FP16m   },
+  { X86::IST_F32m  , X86::IST_FP32m   },
+
+  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
+
+  { X86::ST_F32m   , X86::ST_FP32m    },
+  { X86::ST_F64m   , X86::ST_FP64m    },
+  { X86::ST_Frr    , X86::ST_FPrr     },
+
+  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
+
+  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
+
+  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
+  { X86::UCOM_Fr   , X86::UCOM_FPr    },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction.  This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible.  The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+  MachineInstr* MI = I;
+  DebugLoc dl = MI->getDebugLoc();
+  ASSERT_SORTED(PopTable);
+  if (StackTop == 0)
+    report_fatal_error("Cannot pop empty stack!");
+  RegMap[Stack[--StackTop]] = ~0;     // Update state
+
+  // Check to see if there is a popping version of this instruction...
+  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+  if (Opcode != -1) {
+    I->setDesc(TII->get(Opcode));
+    if (Opcode == X86::UCOM_FPPr)
+      I->RemoveOperand(0);
+  } else {    // Insert an explicit pop
+    I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+  }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register.  If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
+    popStackAfter(I);
+    return;
+  }
+
+  // Otherwise, store the top of stack into the dead slot, killing the operand
+  // without having to add in an explicit xchg then pop.
+  //
+  I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
+  unsigned STReg    = getSTReg(FPRegNo);
+  unsigned OldSlot  = getSlot(FPRegNo);
+  unsigned TopReg   = Stack[StackTop-1];
+  Stack[OldSlot]    = TopReg;
+  RegMap[TopReg]    = OldSlot;
+  RegMap[FPRegNo]   = ~0;
+  Stack[--StackTop] = ~0;
+  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+  unsigned Defs = Mask;
+  unsigned Kills = 0;
+  for (unsigned i = 0; i < StackTop; ++i) {
+    unsigned RegNo = Stack[i];
+    if (!(Defs & (1 << RegNo)))
+      // This register is live, but we don't want it.
+      Kills |= (1 << RegNo);
+    else
+      // We don't need to imp-def this live register.
+      Defs &= ~(1 << RegNo);
+  }
+  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+  // Produce implicit-defs for free by using killed registers.
+  while (Kills && Defs) {
+    unsigned KReg = CountTrailingZeros_32(Kills);
+    unsigned DReg = CountTrailingZeros_32(Defs);
+    DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+    std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+    std::swap(RegMap[KReg], RegMap[DReg]);
+    Kills &= ~(1 << KReg);
+    Defs &= ~(1 << DReg);
+  }
+
+  // Kill registers by popping.
+  if (Kills && I != MBB->begin()) {
+    MachineBasicBlock::iterator I2 = llvm::prior(I);
+    while (StackTop) {
+      unsigned KReg = getStackEntry(0);
+      if (!(Kills & (1 << KReg)))
+        break;
+      DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+      popStackAfter(I2);
+      Kills &= ~(1 << KReg);
+    }
+  }
+
+  // Manually kill the rest.
+  while (Kills) {
+    unsigned KReg = CountTrailingZeros_32(Kills);
+    DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+    freeStackSlotBefore(I, KReg);
+    Kills &= ~(1 << KReg);
+  }
+
+  // Load zeros for all the imp-defs.
+  while(Defs) {
+    unsigned DReg = CountTrailingZeros_32(Defs);
+    DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+    BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+    pushReg(DReg);
+    Defs &= ~(1 << DReg);
+  }
+
+  // Now we should have the correct registers live.
+  DEBUG(dumpStack());
+  assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+                          unsigned FixCount,
+                          MachineBasicBlock::iterator I) {
+  // Move items into place, starting from the desired stack bottom.
+  while (FixCount--) {
+    // Old register at position FixCount.
+    unsigned OldReg = getStackEntry(FixCount);
+    // Desired register at position FixCount.
+    unsigned Reg = FixStack[FixCount];
+    if (Reg == OldReg)
+      continue;
+    // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+    moveToTop(Reg, I);
+    if (FixCount > 0)
+      moveToTop(OldReg, I);
+  }
+  DEBUG(dumpStack());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+  unsigned DestReg = getFPReg(MI->getOperand(0));
+
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+  
+  // Result gets pushed on the stack.
+  pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
+         "Can only handle fst* & ftst instructions!");
+
+  // Is this the last use of the source register?
+  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+  // FISTP64m is strange because there isn't a non-popping versions.
+  // If we have one _and_ we don't want to pop the operand, duplicate the value
+  // on the stack instead of moving it.  This ensure that popping the value is
+  // always ok.
+  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
+  //
+  if (!KillsSrc &&
+      (MI->getOpcode() == X86::IST_Fp64m32 ||
+       MI->getOpcode() == X86::ISTT_Fp16m32 ||
+       MI->getOpcode() == X86::ISTT_Fp32m32 ||
+       MI->getOpcode() == X86::ISTT_Fp64m32 ||
+       MI->getOpcode() == X86::IST_Fp64m64 ||
+       MI->getOpcode() == X86::ISTT_Fp16m64 ||
+       MI->getOpcode() == X86::ISTT_Fp32m64 ||
+       MI->getOpcode() == X86::ISTT_Fp64m64 ||
+       MI->getOpcode() == X86::IST_Fp64m80 ||
+       MI->getOpcode() == X86::ISTT_Fp16m80 ||
+       MI->getOpcode() == X86::ISTT_Fp32m80 ||
+       MI->getOpcode() == X86::ISTT_Fp64m80 ||
+       MI->getOpcode() == X86::ST_FpP80m)) {
+    duplicateToTop(Reg, getScratchReg(), I);
+  } else {
+    moveToTop(Reg, I);            // Move to the top of the stack...
+  }
+  
+  // Convert from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+  if (MI->getOpcode() == X86::IST_FP64m ||
+      MI->getOpcode() == X86::ISTT_FP16m ||
+      MI->getOpcode() == X86::ISTT_FP32m ||
+      MI->getOpcode() == X86::ISTT_FP64m ||
+      MI->getOpcode() == X86::ST_FP80m) {
+    if (StackTop == 0)
+      report_fatal_error("Stack empty??");
+    --StackTop;
+  } else if (KillsSrc) { // Last use of operand?
+    popStackAfter(I);
+  }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value.  These instructions may have
+/// non-fp operands after their FP operands.
+///
+///  Examples:
+///     R1 = fchs R2
+///     R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+#ifndef NDEBUG
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+#endif
+
+  // Is this the last use of the source register?
+  unsigned Reg = getFPReg(MI->getOperand(1));
+  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+  if (KillsSrc) {
+    // If this is the last use of the source register, just make sure it's on
+    // the top of the stack.
+    moveToTop(Reg, I);
+    if (StackTop == 0)
+      report_fatal_error("Stack cannot be empty!");
+    --StackTop;
+    pushReg(getFPReg(MI->getOperand(0)));
+  } else {
+    // If this is not the last use of the source register, _copy_ it to the top
+    // of the stack.
+    duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+  }
+
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(1);   // Drop the source operand.
+  MI->RemoveOperand(0);   // Drop the destination operand.
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+  { X86::ADD_Fp32  , X86::ADD_FST0r },
+  { X86::ADD_Fp64  , X86::ADD_FST0r },
+  { X86::ADD_Fp80  , X86::ADD_FST0r },
+  { X86::DIV_Fp32  , X86::DIV_FST0r },
+  { X86::DIV_Fp64  , X86::DIV_FST0r },
+  { X86::DIV_Fp80  , X86::DIV_FST0r },
+  { X86::MUL_Fp32  , X86::MUL_FST0r },
+  { X86::MUL_Fp64  , X86::MUL_FST0r },
+  { X86::MUL_Fp80  , X86::MUL_FST0r },
+  { X86::SUB_Fp32  , X86::SUB_FST0r },
+  { X86::SUB_Fp64  , X86::SUB_FST0r },
+  { X86::SUB_Fp80  , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
+  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
+  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
+  { X86::DIV_Fp32  , X86::DIVR_FST0r },
+  { X86::DIV_Fp64  , X86::DIVR_FST0r },
+  { X86::DIV_Fp80  , X86::DIVR_FST0r },
+  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
+  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
+  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
+  { X86::SUB_Fp32  , X86::SUBR_FST0r },
+  { X86::SUB_Fp64  , X86::SUBR_FST0r },
+  { X86::SUB_Fp80  , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
+  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
+  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
+  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
+  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
+  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
+  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
+  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
+  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
+  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
+  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
+  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+  { X86::ADD_Fp32  , X86::ADD_FrST0 },
+  { X86::ADD_Fp64  , X86::ADD_FrST0 },
+  { X86::ADD_Fp80  , X86::ADD_FrST0 },
+  { X86::DIV_Fp32  , X86::DIV_FrST0 },
+  { X86::DIV_Fp64  , X86::DIV_FrST0 },
+  { X86::DIV_Fp80  , X86::DIV_FrST0 },
+  { X86::MUL_Fp32  , X86::MUL_FrST0 },
+  { X86::MUL_Fp64  , X86::MUL_FrST0 },
+  { X86::MUL_Fp80  , X86::MUL_FrST0 },
+  { X86::SUB_Fp32  , X86::SUB_FrST0 },
+  { X86::SUB_Fp64  , X86::SUB_FrST0 },
+  { X86::SUB_Fp80  , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub  ST(0), ST(i)
+///         ST(i) = fsub  ST(0), ST(i)
+///         ST(0) = fsubr ST(0), ST(i)
+///         ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+  MachineInstr *MI = I;
+
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+  unsigned Dest = getFPReg(MI->getOperand(0));
+  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned TOS = getStackEntry(0);
+
+  // One of our operands must be on the top of the stack.  If neither is yet, we
+  // need to move one.
+  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
+    // We can choose to move either operand to the top of the stack.  If one of
+    // the operands is killed by this instruction, we want that one so that we
+    // can update right on top of the old version.
+    if (KillsOp0) {
+      moveToTop(Op0, I);         // Move dead operand to TOS.
+      TOS = Op0;
+    } else if (KillsOp1) {
+      moveToTop(Op1, I);
+      TOS = Op1;
+    } else {
+      // All of the operands are live after this instruction executes, so we
+      // cannot update on top of any operand.  Because of this, we must
+      // duplicate one of the stack elements to the top.  It doesn't matter
+      // which one we pick.
+      //
+      duplicateToTop(Op0, Dest, I);
+      Op0 = TOS = Dest;
+      KillsOp0 = true;
+    }
+  } else if (!KillsOp0 && !KillsOp1) {
+    // If we DO have one of our operands at the top of the stack, but we don't
+    // have a dead operand, we must duplicate one of the operands to a new slot
+    // on the stack.
+    duplicateToTop(Op0, Dest, I);
+    Op0 = TOS = Dest;
+    KillsOp0 = true;
+  }
+
+  // Now we know that one of our operands is on the top of the stack, and at
+  // least one of our operands is killed by this instruction.
+  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+         "Stack conditions not set up right!");
+
+  // We decide which form to use based on what is on the top of the stack, and
+  // which operand is killed by this instruction.
+  const TableEntry *InstTable;
+  bool isForward = TOS == Op0;
+  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+  if (updateST0) {
+    if (isForward)
+      InstTable = ForwardST0Table;
+    else
+      InstTable = ReverseST0Table;
+  } else {
+    if (isForward)
+      InstTable = ForwardSTiTable;
+    else
+      InstTable = ReverseSTiTable;
+  }
+
+  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
+                      MI->getOpcode());
+  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+  // NotTOS - The register which is not on the top of stack...
+  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+  // Replace the old instruction with a new instruction
+  MBB->remove(I++);
+  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+  // If both operands are killed, pop one off of the stack in addition to
+  // overwriting the other one.
+  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+    assert(!updateST0 && "Should have updated other operand!");
+    popStackAfter(I);   // Pop the top of stack
+  }
+
+  // Update stack information so that we know the destination register is now on
+  // the stack.
+  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+  assert(UpdatedSlot < StackTop && Dest < 7);
+  Stack[UpdatedSlot]   = Dest;
+  RegMap[Dest]         = UpdatedSlot;
+  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+  MachineInstr *MI = I;
+
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+  // Make sure the first operand is on the top of stack, the other one can be
+  // anywhere.
+  moveToTop(Op0, I);
+
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->getOperand(0).setReg(getSTReg(Op1));
+  MI->RemoveOperand(1);
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+  // If any of the operands are killed by this instruction, free them.
+  if (KillsOp0) freeStackSlotAfter(I, Op0);
+  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions.  These
+/// instructions move a st(i) register to st(0) iff a condition is true.  These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+
+  unsigned Op0 = getFPReg(MI->getOperand(0));
+  unsigned Op1 = getFPReg(MI->getOperand(2));
+  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+  // The first operand *must* be on the top of the stack.
+  moveToTop(Op0, I);
+
+  // Change the second operand to the stack register that the operand is in.
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(0);
+  MI->RemoveOperand(1);
+  MI->getOperand(0).setReg(getSTReg(Op1));
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+  
+  // If we kill the second operand, make sure to pop it from the stack.
+  if (Op0 != Op1 && KillsOp1) {
+    // Get this value off of the register stack.
+    freeStackSlotAfter(I, Op1);
+  }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions.  This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown SpecialFP instruction!");
+  case TargetOpcode::COPY: {
+    // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
+    const MachineOperand &MO1 = MI->getOperand(1);
+    const MachineOperand &MO0 = MI->getOperand(0);
+    unsigned DstST = MO0.getReg() - X86::ST0;
+    unsigned SrcST = MO1.getReg() - X86::ST0;
+    bool KillsSrc = MI->killsRegister(MO1.getReg());
+
+    // ST = COPY FP. Set up a pending ST register.
+    if (DstST < 8) {
+      unsigned SrcFP = getFPReg(MO1);
+      assert(isLive(SrcFP) && "Cannot copy dead register");
+      assert(!MO0.isDead() && "Cannot copy to dead ST register");
+
+      // Unallocated STs are marked as the nonexistent FP255.
+      while (NumPendingSTs <= DstST)
+        PendingST[NumPendingSTs++] = NumFPRegs;
+
+      // STi could still be live from a previous inline asm.
+      if (isScratchReg(PendingST[DstST])) {
+        DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
+                     << '\n');
+        freeStackSlotBefore(MI, PendingST[DstST]);
+      }
+
+      // When the source is killed, allocate a scratch FP register.
+      if (KillsSrc) {
+        unsigned Slot = getSlot(SrcFP);
+        unsigned SR = getScratchReg();
+        PendingST[DstST] = SR;
+        Stack[Slot] = SR;
+        RegMap[SR] = Slot;
+      } else
+        PendingST[DstST] = SrcFP;
+      break;
+    }
+
+    // FP = COPY ST. Extract fixed stack value.
+    // Any instruction defining ST registers must have assigned them to a
+    // scratch register.
+    if (SrcST < 8) {
+      unsigned DstFP = getFPReg(MO0);
+      assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
+      assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
+      unsigned SrcFP = PendingST[SrcST];
+      assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
+      assert(isLive(SrcFP) && "Scratch holding ST is dead");
+
+      // DstFP steals the stack slot from SrcFP.
+      unsigned Slot = getSlot(SrcFP);
+      Stack[Slot] = DstFP;
+      RegMap[DstFP] = Slot;
+
+      // Always treat the ST as killed.
+      PendingST[SrcST] = NumFPRegs;
+      while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+        --NumPendingSTs;
+      break;
+    }
+
+    // FP <- FP copy.
+    unsigned DstFP = getFPReg(MO0);
+    unsigned SrcFP = getFPReg(MO1);
+    assert(isLive(SrcFP) && "Cannot copy dead register");
+    if (KillsSrc) {
+      // If the input operand is killed, we can just change the owner of the
+      // incoming stack slot into the result.
+      unsigned Slot = getSlot(SrcFP);
+      Stack[Slot] = DstFP;
+      RegMap[DstFP] = Slot;
+    } else {
+      // For COPY we just duplicate the specified value to a new stack slot.
+      // This could be made better, but would require substantial changes.
+      duplicateToTop(SrcFP, DstFP, I);
+    }
+    break;
+  }
+
+  case X86::FpPOP_RETVAL: {
+    // The FpPOP_RETVAL instruction is used after calls that return a value on
+    // the floating point stack. We cannot model this with ST defs since CALL
+    // instructions have fixed clobber lists. This instruction is interpreted
+    // to mean that there is one more live register on the stack than we
+    // thought.
+    //
+    // This means that StackTop does not match the hardware stack between a
+    // call and the FpPOP_RETVAL instructions.  We do tolerate FP instructions
+    // between CALL and FpPOP_RETVAL as long as they don't overflow the
+    // hardware stack.
+    unsigned DstFP = getFPReg(MI->getOperand(0));
+
+    // Move existing stack elements up to reflect reality.
+    assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
+    if (StackTop) {
+      std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
+      for (unsigned i = 0; i != NumFPRegs; ++i)
+        ++RegMap[i];
+    }
+    ++StackTop;
+
+    // DstFP is the new bottom of the stack.
+    Stack[0] = DstFP;
+    RegMap[DstFP] = 0;
+
+    // DstFP will be killed by processBasicBlock if this was a dead def.
+    break;
+  }
+
+  case TargetOpcode::INLINEASM: {
+    // The inline asm MachineInstr currently only *uses* FP registers for the
+    // 'f' constraint.  These should be turned into the current ST(x) register
+    // in the machine instr.
+    //
+    // There are special rules for x87 inline assembly. The compiler must know
+    // exactly how many registers are popped and pushed implicitly by the asm.
+    // Otherwise it is not possible to restore the stack state after the inline
+    // asm.
+    //
+    // There are 3 kinds of input operands:
+    //
+    // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
+    //    popped input operand must be in a fixed stack slot, and it is either
+    //    tied to an output operand, or in the clobber list. The MI has ST use
+    //    and def operands for these inputs.
+    //
+    // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
+    //    preserved by the inline asm. The fixed stack slots must be STn-STm
+    //    following the popped inputs. A fixed input operand cannot be tied to
+    //    an output or appear in the clobber list. The MI has ST use operands
+    //    and no defs for these inputs.
+    //
+    // 3. Preserved inputs. These inputs use the "f" constraint which is
+    //    represented as an FP register. The inline asm won't change these
+    //    stack slots.
+    //
+    // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
+    // registers do not count as output operands. The inline asm changes the
+    // stack as if it popped all the popped inputs and then pushed all the
+    // output operands.
+
+    // Scan the assembly for ST registers used, defined and clobbered. We can
+    // only tell clobbers from defs by looking at the asm descriptor.
+    unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
+    unsigned NumOps = 0;
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+         i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
+      unsigned Flags = MI->getOperand(i).getImm();
+      NumOps = InlineAsm::getNumOperandRegisters(Flags);
+      if (NumOps != 1)
+        continue;
+      const MachineOperand &MO = MI->getOperand(i + 1);
+      if (!MO.isReg())
+        continue;
+      unsigned STReg = MO.getReg() - X86::ST0;
+      if (STReg >= 8)
+        continue;
+
+      switch (InlineAsm::getKind(Flags)) {
+      case InlineAsm::Kind_RegUse:
+        STUses |= (1u << STReg);
+        break;
+      case InlineAsm::Kind_RegDef:
+      case InlineAsm::Kind_RegDefEarlyClobber:
+        STDefs |= (1u << STReg);
+        if (MO.isDead())
+          STDeadDefs |= (1u << STReg);
+        break;
+      case InlineAsm::Kind_Clobber:
+        STClobbers |= (1u << STReg);
+        break;
+      default:
+        break;
+      }
+    }
+
+    if (STUses && !isMask_32(STUses))
+      MI->emitError("fixed input regs must be last on the x87 stack");
+    unsigned NumSTUses = CountTrailingOnes_32(STUses);
+
+    // Defs must be contiguous from the stack top. ST0-STn.
+    if (STDefs && !isMask_32(STDefs)) {
+      MI->emitError("output regs must be last on the x87 stack");
+      STDefs = NextPowerOf2(STDefs) - 1;
+    }
+    unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+
+    // So must the clobbered stack slots. ST0-STm, m >= n.
+    if (STClobbers && !isMask_32(STDefs | STClobbers))
+      MI->emitError("clobbers must be last on the x87 stack");
+
+    // Popped inputs are the ones that are also clobbered or defined.
+    unsigned STPopped = STUses & (STDefs | STClobbers);
+    if (STPopped && !isMask_32(STPopped))
+      MI->emitError("implicitly popped regs must be last on the x87 stack");
+    unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+
+    DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
+                 << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
+
+    // Scan the instruction for FP uses corresponding to "f" constraints.
+    // Collect FP registers to kill afer the instruction.
+    // Always kill all the scratch regs.
+    unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
+    unsigned FPUsed = 0;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+        continue;
+      if (!Op.isUse())
+        MI->emitError("illegal \"f\" output constraint");
+      unsigned FPReg = getFPReg(Op);
+      FPUsed |= 1U << FPReg;
+
+      // If we kill this operand, make sure to pop it from the stack after the
+      // asm.  We just remember it for now, and pop them all off at the end in
+      // a batch.
+      if (Op.isKill())
+        FPKills |= 1U << FPReg;
+    }
+
+    // The popped inputs will be killed by the instruction, so duplicate them
+    // if the FP register needs to be live after the instruction, or if it is
+    // used in the instruction itself. We effectively treat the popped inputs
+    // as early clobbers.
+    for (unsigned i = 0; i < NumSTPopped; ++i) {
+      if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
+        continue;
+      unsigned SR = getScratchReg();
+      duplicateToTop(PendingST[i], SR, I);
+      DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+                   << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
+      PendingST[i] = SR;
+    }
+
+    // Make sure we have a unique live register for every fixed use. Some of
+    // them could be undef uses, and we need to emit LD_F0 instructions.
+    for (unsigned i = 0; i < NumSTUses; ++i) {
+      if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
+        // Check for shared assignments.
+        for (unsigned j = 0; j < i; ++j) {
+          if (PendingST[j] != PendingST[i])
+            continue;
+          // STi and STj are inn the same register, create a copy.
+          unsigned SR = getScratchReg();
+          duplicateToTop(PendingST[i], SR, I);
+          DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+                       << unsigned(PendingST[i])
+                       << " to avoid collision with ST" << j << '\n');
+          PendingST[i] = SR;
+        }
+        continue;
+      }
+      unsigned SR = getScratchReg();
+      DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
+      BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+      pushReg(SR);
+      PendingST[i] = SR;
+      if (NumPendingSTs == i)
+        ++NumPendingSTs;
+    }
+    assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+
+    // Now we can rearrange the live registers to match what was requested.
+    shuffleStackTop(PendingST, NumPendingSTs, I);
+    DEBUG({dbgs() << "Before asm: "; dumpStack();});
+
+    // With the stack layout fixed, rewrite the FP registers.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+        continue;
+      unsigned FPReg = getFPReg(Op);
+      Op.setReg(getSTReg(FPReg));
+    }
+
+    // Simulate the inline asm popping its inputs and pushing its outputs.
+    StackTop -= NumSTPopped;
+
+    // Hold the fixed output registers in scratch FP registers. They will be
+    // transferred to real FP registers by copies.
+    NumPendingSTs = 0;
+    for (unsigned i = 0; i < NumSTDefs; ++i) {
+      unsigned SR = getScratchReg();
+      pushReg(SR);
+      FPKills &= ~(1u << SR);
+    }
+    for (unsigned i = 0; i < NumSTDefs; ++i)
+      PendingST[NumPendingSTs++] = getStackEntry(i);
+    DEBUG({dbgs() << "After asm: "; dumpStack();});
+
+    // If any of the ST defs were dead, pop them immediately. Our caller only
+    // handles dead FP defs.
+    MachineBasicBlock::iterator InsertPt = MI;
+    for (unsigned i = 0; STDefs & (1u << i); ++i) {
+      if (!(STDeadDefs & (1u << i)))
+        continue;
+      freeStackSlotAfter(InsertPt, PendingST[i]);
+      PendingST[i] = NumFPRegs;
+    }
+    while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+      --NumPendingSTs;
+
+    // If this asm kills any FP registers (is the last use of them) we must
+    // explicitly emit pop instructions for them.  Do this now after the asm has
+    // executed so that the ST(x) numbers are not off (which would happen if we
+    // did this inline with operand rewriting).
+    //
+    // Note: this might be a non-optimal pop sequence.  We might be able to do
+    // better by trying to pop in stack order or something.
+    while (FPKills) {
+      unsigned FPReg = CountTrailingZeros_32(FPKills);
+      if (isLive(FPReg))
+        freeStackSlotAfter(InsertPt, FPReg);
+      FPKills &= ~(1U << FPReg);
+    }
+    // Don't delete the inline asm!
+    return;
+  }
+
+  case X86::RET:
+  case X86::RETI:
+    // If RET has an FP register use operand, pass the first one in ST(0) and
+    // the second one in ST(1).
+
+    // Find the register operands.
+    unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
+    unsigned LiveMask = 0;
+
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+        continue;
+      // FP Register uses must be kills unless there are two uses of the same
+      // register, in which case only one will be a kill.
+      assert(Op.isUse() &&
+             (Op.isKill() ||                        // Marked kill.
+              getFPReg(Op) == FirstFPRegOp ||       // Second instance.
+              MI->killsRegister(Op.getReg())) &&    // Later use is marked kill.
+             "Ret only defs operands, and values aren't live beyond it");
+
+      if (FirstFPRegOp == ~0U)
+        FirstFPRegOp = getFPReg(Op);
+      else {
+        assert(SecondFPRegOp == ~0U && "More than two fp operands!");
+        SecondFPRegOp = getFPReg(Op);
+      }
+      LiveMask |= (1 << getFPReg(Op));
+
+      // Remove the operand so that later passes don't see it.
+      MI->RemoveOperand(i);
+      --i, --e;
+    }
+
+    // We may have been carrying spurious live-ins, so make sure only the returned
+    // registers are left live.
+    adjustLiveRegs(LiveMask, MI);
+    if (!LiveMask) return;  // Quick check to see if any are possible.
+
+    // There are only four possibilities here:
+    // 1) we are returning a single FP value.  In this case, it has to be in
+    //    ST(0) already, so just declare success by removing the value from the
+    //    FP Stack.
+    if (SecondFPRegOp == ~0U) {
+      // Assert that the top of stack contains the right FP register.
+      assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
+             "Top of stack not the right register for RET!");
+      
+      // Ok, everything is good, mark the value as not being on the stack
+      // anymore so that our assertion about the stack being empty at end of
+      // block doesn't fire.
+      StackTop = 0;
+      return;
+    }
+    
+    // Otherwise, we are returning two values:
+    // 2) If returning the same value for both, we only have one thing in the FP
+    //    stack.  Consider:  RET FP1, FP1
+    if (StackTop == 1) {
+      assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
+             "Stack misconfiguration for RET!");
+      
+      // Duplicate the TOS so that we return it twice.  Just pick some other FPx
+      // register to hold it.
+      unsigned NewReg = getScratchReg();
+      duplicateToTop(FirstFPRegOp, NewReg, MI);
+      FirstFPRegOp = NewReg;
+    }
+    
+    /// Okay we know we have two different FPx operands now:
+    assert(StackTop == 2 && "Must have two values live!");
+    
+    /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
+    ///    in ST(1).  In this case, emit an fxch.
+    if (getStackEntry(0) == SecondFPRegOp) {
+      assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
+      moveToTop(FirstFPRegOp, MI);
+    }
+    
+    /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
+    /// ST(1).  Just remove both from our understanding of the stack and return.
+    assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
+    assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
+    StackTop = 0;
+    return;
+  }
+
+  I = MBB->erase(I);  // Remove the pseudo instruction
+
+  // We want to leave I pointing to the previous instruction, but what if we
+  // just erased the first instruction?
+  if (I == MBB->begin()) {
+    DEBUG(dbgs() << "Inserting dummy KILL\n");
+    I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+  } else
+    --I;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 000000000000..ed45a9a4c1c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,1210 @@
+//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineModuleInfo &MMI = MF.getMMI();
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+  return (DisableFramePointerElim(MF) ||
+          RI->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken() ||
+          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+          MMI.callsUnwindInit());
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::SUB64ri8;
+    return X86::SUB64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::SUB32ri8;
+    return X86::SUB32ri;
+  }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::ADD64ri8;
+    return X86::ADD64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::ADD32ri8;
+    return X86::ADD32ri;
+  }
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator &MBBI,
+                                       const TargetRegisterInfo &TRI,
+                                       bool Is64Bit) {
+  const MachineFunction *MF = MBB.getParent();
+  const Function *F = MF->getFunction();
+  if (!F || MF->getMMI().callsEHReturn())
+    return 0;
+
+  static const unsigned CallerSavedRegs32Bit[] = {
+    X86::EAX, X86::EDX, X86::ECX
+  };
+
+  static const unsigned CallerSavedRegs64Bit[] = {
+    X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+    X86::R8,  X86::R9,  X86::R10, X86::R11
+  };
+
+  unsigned Opc = MBBI->getOpcode();
+  switch (Opc) {
+  default: return 0;
+  case X86::RET:
+  case X86::RETI:
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    SmallSet<unsigned, 8> Uses;
+    for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MBBI->getOperand(i);
+      if (!MO.isReg() || MO.isDef())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+        Uses.insert(*AsI);
+    }
+
+    const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+    for (; *CS; ++CS)
+      if (!Uses.count(*CS))
+        return *CS;
+  }
+  }
+
+  return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  unsigned StackPtr, int64_t NumBytes,
+                  bool Is64Bit, const TargetInstrInfo &TII,
+                  const TargetRegisterInfo &TRI) {
+  bool isSub = NumBytes < 0;
+  uint64_t Offset = isSub ? -NumBytes : NumBytes;
+  unsigned Opc = isSub ?
+    getSUBriOpcode(Is64Bit, Offset) :
+    getADDriOpcode(Is64Bit, Offset);
+  uint64_t Chunk = (1LL << 31) - 1;
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  while (Offset) {
+    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    if (ThisVal == (Is64Bit ? 8 : 4)) {
+      // Use push / pop instead.
+      unsigned Reg = isSub
+        ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+        : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+      if (Reg) {
+        Opc = isSub
+          ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+          : (Is64Bit ? X86::POP64r  : X86::POP32r);
+        MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
+          .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+        if (isSub)
+          MI->setFlag(MachineInstr::FrameSetup);
+        Offset -= ThisVal;
+        continue;
+      }
+    }
+
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+      .addReg(StackPtr)
+      .addImm(ThisVal);
+    if (isSub)
+      MI->setFlag(MachineInstr::FrameSetup);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+    Offset -= ThisVal;
+  }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                      unsigned StackPtr, uint64_t *NumBytes = NULL) {
+  if (MBBI == MBB.begin()) return;
+
+  MachineBasicBlock::iterator PI = prior(MBBI);
+  unsigned Opc = PI->getOpcode();
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      PI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes += PI->getOperand(2).getImm();
+    MBB.erase(PI);
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             PI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes -= PI->getOperand(2).getImm();
+    MBB.erase(PI);
+  }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator &MBBI,
+                        unsigned StackPtr, uint64_t *NumBytes = NULL) {
+  // FIXME: THIS ISN'T RUN!!!
+  return;
+
+  if (MBBI == MBB.end()) return;
+
+  MachineBasicBlock::iterator NI = llvm::next(MBBI);
+  if (NI == MBB.end()) return;
+
+  unsigned Opc = NI->getOpcode();
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      NI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes -= NI->getOperand(2).getImm();
+    MBB.erase(NI);
+    MBBI = NI;
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             NI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes += NI->getOperand(2).getImm();
+    MBB.erase(NI);
+    MBBI = NI;
+  }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI,
+                           unsigned StackPtr,
+                           bool doMergeWithPrevious) {
+  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+      (!doMergeWithPrevious && MBBI == MBB.end()))
+    return 0;
+
+  MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+  unsigned Opc = PI->getOpcode();
+  int Offset = 0;
+
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      PI->getOperand(0).getReg() == StackPtr){
+    Offset += PI->getOperand(2).getImm();
+    MBB.erase(PI);
+    if (!doMergeWithPrevious) MBBI = NI;
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             PI->getOperand(0).getReg() == StackPtr) {
+    Offset -= PI->getOperand(2).getImm();
+    MBB.erase(PI);
+    if (!doMergeWithPrevious) MBBI = NI;
+  }
+
+  return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+  for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+       EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+    unsigned Reg = II->first;
+
+    if (Reg == X86::EAX || Reg == X86::AX ||
+        Reg == X86::AH || Reg == X86::AL)
+      return true;
+  }
+
+  return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+                                             MCSymbol *Label,
+                                             unsigned FramePtr) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+
+  // Add callee saved registers to move list.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  if (CSI.empty()) return;
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const TargetData *TD = TM.getTargetData();
+  bool HasFP = hasFP(MF);
+
+  // Calculate amount of bytes used for return address storing.
+  int stackGrowth = -TD->getPointerSize();
+
+  // FIXME: This is dirty hack. The code itself is pretty mess right now.
+  // It should be rewritten from scratch and generalized sometimes.
+
+  // Determine maximum offset (minimum due to stack growth).
+  int64_t MaxOffset = 0;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I)
+    MaxOffset = std::min(MaxOffset,
+                         MFI->getObjectOffset(I->getFrameIdx()));
+
+  // Calculate offsets.
+  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+    unsigned Reg = I->getReg();
+    Offset = MaxOffset - Offset + saveAreaOffset;
+
+    // Don't output a new machine move if we're re-saving the frame
+    // pointer. This happens when the PrologEpilogInserter has inserted an extra
+    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+    // generates one when frame pointers are used. If we generate a "machine
+    // move" for this extra "PUSH", the linker will lose track of the fact that
+    // the frame pointer should have the value of the first "PUSH" when it's
+    // trying to unwind.
+    //
+    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+    //        another bug. I.e., one where we generate a prolog like this:
+    //
+    //          pushl  %ebp
+    //          movl   %esp, %ebp
+    //          pushl  %ebp
+    //          pushl  %esi
+    //           ...
+    //
+    //        The immediate re-push of EBP is unnecessary. At the least, it's an
+    //        optimization bug. EBP can be used as a scratch register in certain
+    //        cases, but probably not when we have a frame pointer.
+    if (HasFP && FramePtr == Reg)
+      continue;
+
+    MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+    MachineLocation CSSrc(Reg);
+    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+  }
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *Fn = MF.getFunction();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+    Fn->needsUnwindTableEntry();
+  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
+  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
+  bool HasFP = hasFP(MF);
+  bool Is64Bit = STI.is64Bit();
+  bool IsWin64 = STI.isTargetWin64();
+  unsigned StackAlign = getStackAlignment();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned StackPtr = RegInfo->getStackRegister();
+
+  DebugLoc DL;
+
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum SlotSize.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else if (MaxAlign < SlotSize)
+      MaxAlign = SlotSize;
+  }
+
+  // Add RETADDR move area to callee saved frame size.
+  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+  if (TailCallReturnAddrDelta < 0)
+    X86FI->setCalleeSavedFrameSize(
+      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+  // function, and use up to 128 bytes of stack space, don't have a frame
+  // pointer, calls, or dynamic alloca then we do not need to adjust the
+  // stack pointer (we fit in the Red Zone).
+  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+      !RegInfo->needsStackRealignment(MF) &&
+      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
+      !MFI->adjustsStack() &&                      // No calls.
+      !IsWin64) {                                  // Win64 has no Red Zone
+    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+    if (HasFP) MinSize += SlotSize;
+    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+    MFI->setStackSize(StackSize);
+  }
+
+  // Insert stack pointer adjustment for later moving of return addr.  Only
+  // applies to tail call optimized functions where the callee argument stack
+  // size is bigger than the callers.
+  if (TailCallReturnAddrDelta < 0) {
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL,
+              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+              StackPtr)
+        .addReg(StackPtr)
+        .addImm(-TailCallReturnAddrDelta)
+        .setMIFlag(MachineInstr::FrameSetup);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+  }
+
+  // Mapping for machine moves:
+  //
+  //   DST: VirtualFP AND
+  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
+  //        ELSE                        => DW_CFA_def_cfa
+  //
+  //   SRC: VirtualFP AND
+  //        DST: Register               => DW_CFA_def_cfa_register
+  //
+  //   ELSE
+  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
+  //        REG < 64                    => DW_CFA_offset + Reg
+  //        ELSE                        => DW_CFA_offset_extended
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const TargetData *TD = MF.getTarget().getTargetData();
+  uint64_t NumBytes = 0;
+  int stackGrowth = -TD->getPointerSize();
+
+  if (HasFP) {
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    if (RegInfo->needsStackRealignment(MF))
+      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+
+    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+    // Get the offset of the stack slot for the EBP register, which is
+    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+    // Update the frame offset adjustment.
+    MFI->setOffsetAdjustment(-NumBytes);
+
+    // Save EBP/RBP into the appropriate stack slot.
+    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+      .addReg(FramePtr, RegState::Kill)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+    if (needsFrameMoves) {
+      // Mark the place where EBP/RBP was saved.
+      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+      } else {
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+      }
+
+      // Change the rule for the FramePtr to be an "offset" rule.
+      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+      MachineLocation FPSrc(FramePtr);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    // Update EBP with the new base value...
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+        .addReg(StackPtr)
+        .setMIFlag(MachineInstr::FrameSetup);
+
+    if (needsFrameMoves) {
+      // Mark effective beginning of when frame pointer becomes valid.
+      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+      // Define the current CFA to use the EBP/RBP register.
+      MachineLocation FPDst(FramePtr);
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(FramePtr);
+
+    // Realign stack
+    if (RegInfo->needsStackRealignment(MF)) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL,
+                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+                StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
+      // The EFLAGS implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  } else {
+    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+  }
+
+  // Skip the callee-saved push instructions.
+  bool PushedRegs = false;
+  int StackOffset = 2 * stackGrowth;
+
+  while (MBBI != MBB.end() &&
+         (MBBI->getOpcode() == X86::PUSH32r ||
+          MBBI->getOpcode() == X86::PUSH64r)) {
+    PushedRegs = true;
+    ++MBBI;
+
+    if (!HasFP && needsFrameMoves) {
+      // Mark callee-saved push instruction.
+      MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+      // Define the current CFA rule to use the provided offset.
+      unsigned Ptr = StackSize ?
+        MachineLocation::VirtualFP : StackPtr;
+      MachineLocation SPDst(Ptr);
+      MachineLocation SPSrc(Ptr, StackOffset);
+      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      StackOffset += stackGrowth;
+    }
+  }
+
+  DL = MBB.findDebugLoc(MBBI);
+
+  // If there is an SUB32ri of ESP immediately before this instruction, merge
+  // the two. This can be the case when tail call elimination is enabled and
+  // the callee has more arguments then the caller.
+  NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+  // If there is an ADD32ri or SUB32ri of ESP immediately after this
+  // instruction, merge the two instructions.
+  mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+  // Adjust stack pointer: ESP -= numbytes.
+
+  // Windows and cygwin/mingw require a prologue helper routine when allocating
+  // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
+  // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
+  // stack and adjust the stack pointer in one go.  The 64-bit version of
+  // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
+  // responsible for adjusting the stack pointer.  Touching the stack at 4K
+  // increments is necessary to ensure that the guard pages used by the OS
+  // virtual memory manager are allocated in correct sequence.
+  if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+    const char *StackProbeSymbol;
+    bool isSPUpdateNeeded = false;
+
+    if (Is64Bit) {
+      if (STI.isTargetCygMing())
+        StackProbeSymbol = "___chkstk";
+      else {
+        StackProbeSymbol = "__chkstk";
+        isSPUpdateNeeded = true;
+      }
+    } else if (STI.isTargetCygMing())
+      StackProbeSymbol = "_alloca";
+    else
+      StackProbeSymbol = "_chkstk";
+
+    // Check whether EAX is livein for this function.
+    bool isEAXAlive = isEAXLiveIn(MF);
+
+    if (isEAXAlive) {
+      // Sanity check that EAX is not livein for this function.
+      // It should not be, so throw an assert.
+      assert(!Is64Bit && "EAX is livein in x64 case!");
+
+      // Save EAX
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+        .addReg(X86::EAX, RegState::Kill);
+    }
+
+    if (Is64Bit) {
+      // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+      // Function prologue is responsible for adjusting the stack pointer.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+        .addImm(NumBytes);
+    } else {
+      // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
+      // We'll also use 4 already allocated bytes for EAX.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
+    }
+
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+      .addExternalSymbol(StackProbeSymbol)
+      .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+    // MSVC x64's __chkstk needs to adjust %rsp.
+    // FIXME: %rax preserves the offset and should be available.
+    if (isSPUpdateNeeded)
+      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+                   TII, *RegInfo);
+
+    if (isEAXAlive) {
+        // Restore EAX
+        MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+                                                X86::EAX),
+                                        StackPtr, false, NumBytes - 4);
+        MBB.insert(MBBI, MI);
+    }
+  } else if (NumBytes)
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+                 TII, *RegInfo);
+
+  if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+    // Mark end of stack pointer adjustment.
+    MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+    if (!HasFP && NumBytes) {
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP,
+                              -StackSize + stackGrowth);
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      } else {
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      }
+    }
+
+    // Emit DWARF info specifying the offsets of the callee-saved registers.
+    if (PushedRegs)
+      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+  }
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI != MBB.end() && "Returning block has no instructions");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc DL = MBBI->getDebugLoc();
+  bool Is64Bit = STI.is64Bit();
+  unsigned StackAlign = getStackAlignment();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned StackPtr = RegInfo->getStackRegister();
+
+  switch (RetOpcode) {
+  default:
+    llvm_unreachable("Can only insert epilog into returning blocks");
+  case X86::RET:
+  case X86::RETI:
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64:
+    break;  // These are ok
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  uint64_t StackSize = MFI->getStackSize();
+  uint64_t MaxAlign  = MFI->getMaxAlignment();
+  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = 0;
+
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else
+      MaxAlign = MaxAlign ? MaxAlign : 4;
+  }
+
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    if (RegInfo->needsStackRealignment(MF))
+      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+    NumBytes = FrameSize - CSSize;
+
+    // Pop EBP.
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+  } else {
+    NumBytes = StackSize - CSSize;
+  }
+
+  // Skip the callee-saved pop instructions.
+  MachineBasicBlock::iterator LastCSPop = MBBI;
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    unsigned Opc = PI->getOpcode();
+
+    if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+        !PI->getDesc().isTerminator())
+      break;
+
+    --MBBI;
+  }
+
+  DL = MBBI->getDebugLoc();
+
+  // If there is an ADD32ri or SUB32ri of ESP immediately before this
+  // instruction, merge the two instructions.
+  if (NumBytes || MFI->hasVarSizedObjects())
+    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+  // If dynamic alloca is used, then reset esp to point to the last callee-saved
+  // slot before popping them off! Same applies for the case, when stack was
+  // realigned.
+  if (RegInfo->needsStackRealignment(MF)) {
+    // We cannot use LEA here, because stack pointer was realigned. We need to
+    // deallocate local frame back.
+    if (CSSize) {
+      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+      MBBI = prior(LastCSPop);
+    }
+
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+            StackPtr).addReg(FramePtr);
+  } else if (MFI->hasVarSizedObjects()) {
+    if (CSSize) {
+      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+      MachineInstr *MI =
+        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+                     FramePtr, false, -CSSize);
+      MBB.insert(MBBI, MI);
+    } else {
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+        .addReg(FramePtr);
+    }
+  } else if (NumBytes) {
+    // Adjust stack pointer back: ESP += numbytes.
+    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+  }
+
+  // We're returning from function via eh_return.
+  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &DestAddr  = MBBI->getOperand(0);
+    assert(DestAddr.isReg() && "Offset should be in register!");
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+            StackPtr).addReg(DestAddr.getReg());
+  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+             RetOpcode == X86::TCRETURNmi ||
+             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+             RetOpcode == X86::TCRETURNmi64) {
+    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+    // Tail call return: adjust the stack pointer and jump to callee.
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+    assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+    // Adjust stack pointer.
+    int StackAdj = StackAdjust.getImm();
+    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+    int Offset = 0;
+    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+    // Incoporate the retaddr area.
+    Offset = StackAdj-MaxTCDelta;
+    assert(Offset >= 0 && "Offset should never be negative");
+
+    if (Offset) {
+      // Check for possible merge with preceding ADD instruction.
+      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+    }
+
+    // Jump to label or value in register.
+    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+                                       ? X86::TAILJMPd : X86::TAILJMPd64));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+                                       ? X86::TAILJMPm : X86::TAILJMPm64));
+      for (unsigned i = 0; i != 5; ++i)
+        MIB.addOperand(MBBI->getOperand(i));
+    } else if (RetOpcode == X86::TCRETURNri64) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    MachineInstr *NewMI = prior(MBBI);
+    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+      NewMI->addOperand(MBBI->getOperand(i));
+
+    // Delete the pseudo instruction TCRETURN.
+    MBB.erase(MBBI);
+  } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+             (X86FI->getTCReturnAddrDelta() < 0)) {
+    // Add the return addr area delta back since we are not tail calling.
+    int delta = -1*X86FI->getTCReturnAddrDelta();
+    MBBI = MBB.getLastNonDebugInstr();
+
+    // Check for possible merge with preceding ADD instruction.
+    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+  }
+}
+
+void
+X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+  // Calculate amount of bytes used for return address storing
+  int stackGrowth = (STI.is64Bit() ? -8 : -4);
+  const X86RegisterInfo *RI = TM.getRegisterInfo();
+
+  // Initial state of the frame pointer is esp+stackGrowth.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(RI->getStackRegister(), stackGrowth);
+  Moves.push_back(MachineMove(0, Dst, Src));
+
+  // Add return address to move list
+  MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
+  MachineLocation CSSrc(RI->getRARegister());
+  Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+  const X86RegisterInfo *RI =
+    static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  uint64_t StackSize = MFI->getStackSize();
+
+  if (RI->needsStackRealignment(MF)) {
+    if (FI < 0) {
+      // Skip the saved EBP.
+      Offset += RI->getSlotSize();
+    } else {
+      unsigned Align = MFI->getObjectAlignment(FI);
+      assert((-(Offset + StackSize)) % Align == 0);
+      Align = 0;
+      return Offset + StackSize;
+    }
+    // FIXME: Support tail calls
+  } else {
+    if (!hasFP(MF))
+      return Offset + StackSize;
+
+    // Skip the saved EBP.
+    Offset += RI->getSlotSize();
+
+    // Skip the RETADDR move area
+    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+    if (TailCallReturnAddrDelta < 0)
+      Offset -= TailCallReturnAddrDelta;
+  }
+
+  return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = MBB.findDebugLoc(MI);
+
+  MachineFunction &MF = *MBB.getParent();
+
+  unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+  unsigned FPReg = TRI->getFrameRegister(MF);
+  unsigned CalleeFrameSize = 0;
+
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+  // Push GPRs. It increases frame size.
+  unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (!X86::GR64RegClass.contains(Reg) &&
+        !X86::GR32RegClass.contains(Reg))
+      continue;
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+      continue;
+    CalleeFrameSize += SlotSize;
+    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
+      .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+  // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+  // It can be done by spilling XMMs to stack frame.
+  // Note that only Win64 ABI might spill XMMs.
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (X86::GR64RegClass.contains(Reg) ||
+        X86::GR32RegClass.contains(Reg))
+      continue;
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+                            RC, TRI);
+  }
+
+  return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = MBB.findDebugLoc(MI);
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // Reload XMMs from stack frame.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (X86::GR64RegClass.contains(Reg) ||
+        X86::GR32RegClass.contains(Reg))
+      continue;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                             RC, TRI);
+  }
+
+  // POP GPRs.
+  unsigned FPReg = TRI->getFrameRegister(MF);
+  unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (!X86::GR64RegClass.contains(Reg) &&
+        !X86::GR32RegClass.contains(Reg))
+      continue;
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+      continue;
+    BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+  }
+  return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                   RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  unsigned SlotSize = RegInfo->getSlotSize();
+
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+  if (TailCallReturnAddrDelta < 0) {
+    // create RETURNADDR area
+    //   arg
+    //   arg
+    //   RETADDR
+    //   { ...
+    //     RETADDR area
+    //     ...
+    //   }
+    //   [EBP]
+    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+                           (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+  }
+
+  if (hasFP(MF)) {
+    assert((TailCallReturnAddrDelta <= 0) &&
+           "The Delta should always be zero or negative");
+    const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+    // Create a frame entry for the EBP register that must be saved.
+    int FrameIdx = MFI->CreateFixedObject(SlotSize,
+                                          -(int)SlotSize +
+                                          TFI.getOffsetOfLocalArea() +
+                                          TailCallReturnAddrDelta,
+                                          true);
+    assert(FrameIdx == MFI->getObjectIndexBegin() &&
+           "Slot for EBP register must be last in order to be found!");
+    FrameIdx = 0;
+  }
+}
+
+/// permuteEncode - Create the permutation encoding used with frameless
+/// stacks. It is passed the number of registers to be saved and an array of the
+/// registers saved.
+static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) {
+  // The saved registers are numbered from 1 to 6. In order to encode the order
+  // in which they were saved, we re-number them according to their place in the
+  // register order. The re-numbering is relative to the last re-numbered
+  // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+  //
+  //    Orig  Re-Num
+  //    ----  ------
+  //     6       6
+  //     2       2
+  //     4       3
+  //     5       3
+  //
+  bool Used[7] = { false, false, false, false, false, false, false };
+  uint32_t RenumRegs[6];
+  for (unsigned I = 0; I < SavedCount; ++I) {
+    uint32_t Renum = 0;
+    for (unsigned U = 1; U < 7; ++U) {
+      if (U == Registers[I])
+        break;
+      if (!Used[U])
+        ++Renum;
+    }
+
+    Used[Registers[I]] = true;
+    RenumRegs[I] = Renum;
+  }
+
+  // Take the renumbered values and encode them into a 10-bit number.
+  uint32_t permutationEncoding = 0;
+  switch (SavedCount) {
+  case 6:
+    permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+                           + 6 * RenumRegs[2] +  2 * RenumRegs[3]
+                           +     RenumRegs[4];
+    break;
+  case 5:
+    permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+                           + 6 * RenumRegs[2] +  2 * RenumRegs[3]
+                           +     RenumRegs[4];
+    break;
+  case 4:
+    permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1]
+                          + 3 * RenumRegs[2] +      RenumRegs[3];
+    break;
+  case 3:
+    permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1]
+                              + RenumRegs[2];
+    break;
+  case 2:
+    permutationEncoding |=  5 * RenumRegs[0] +     RenumRegs[1];
+    break;
+  case 1:
+    permutationEncoding |=      RenumRegs[0];
+    break;
+  }
+
+  return permutationEncoding;
+}
+
+uint32_t X86FrameLowering::
+getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                         int DataAlignmentFactor, bool IsEH) const {
+  uint32_t Encoding = 0;
+  int CFAOffset = 0;
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+  unsigned SavedRegIdx = 0;
+  int FramePointerReg = -1;
+
+  for (ArrayRef<MCCFIInstruction>::const_iterator
+         I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+    const MCCFIInstruction &Inst = *I;
+    MCSymbol *Label = Inst.getLabel();
+
+    // Ignore invalid labels.
+    if (Label && !Label->isDefined()) continue;
+
+    unsigned Operation = Inst.getOperation();
+    if (Operation != MCCFIInstruction::Move &&
+        Operation != MCCFIInstruction::RelMove)
+      // FIXME: We can't handle this frame just yet.
+      return 0;
+
+    const MachineLocation &Dst = Inst.getDestination();
+    const MachineLocation &Src = Inst.getSource();
+    const bool IsRelative = (Operation == MCCFIInstruction::RelMove);
+
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      if (Src.getReg() != MachineLocation::VirtualFP) {
+        // DW_CFA_def_cfa
+        assert(FramePointerReg == -1 &&"Defining more than one frame pointer?");
+        if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP &&
+            TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP)
+          // The frame pointer isn't EBP/RBP. Cannot make unwind information
+          // compact.
+          return 0;
+        FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH);
+      } // else DW_CFA_def_cfa_offset
+
+      if (IsRelative)
+        CFAOffset += Src.getOffset();
+      else
+        CFAOffset -= Src.getOffset();
+
+      continue;
+    }
+
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      // DW_CFA_def_cfa_register
+      assert(FramePointerReg == -1 && "Defining more than one frame pointer?");
+
+      if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP &&
+          TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP)
+        // The frame pointer isn't EBP/RBP. Cannot make unwind information
+        // compact.
+        return 0;
+
+      FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH);
+      if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg))
+        return 0;
+
+      SavedRegs[0] = 0;
+      SavedRegIdx = 0;
+      continue;
+    }
+
+    unsigned Reg = Src.getReg();
+    int Offset = Dst.getOffset();
+    if (IsRelative)
+      Offset -= CFAOffset;
+    Offset /= DataAlignmentFactor;
+
+    if (Offset < 0) {
+      // FIXME: Handle?
+      // DW_CFA_offset_extended_sf
+      return 0;
+    } else if (Reg < 64) {
+      // DW_CFA_offset + Reg
+      if (SavedRegIdx >= 6) return 0;
+      int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH);
+      if (CURegNum == -1) return 0;
+      SavedRegs[SavedRegIdx++] = CURegNum;
+    } else {
+      // FIXME: Handle?
+      // DW_CFA_offset_extended
+      return 0;
+    }
+  }
+
+  // Bail if there are too many registers to encode.
+  if (SavedRegIdx > 6) return 0;
+
+  // Check if the offset is too big.
+  CFAOffset /= 4;
+  if ((CFAOffset & 0xFF) != CFAOffset)
+    return 0;
+  Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding.
+
+  if (FramePointerReg != -1) {
+    Encoding |= 0x01000000;     // EBP/RBP Unwind Frame
+    for (unsigned I = 0; I != SavedRegIdx; ++I) {
+      unsigned Reg = SavedRegs[I];
+      if (Reg == unsigned(FramePointerReg)) continue;
+      Encoding |= (Reg & 0x7) << (I * 3); // Register encoding
+    }
+  } else {
+    Encoding |= 0x02000000;     // Frameless unwind with small stack
+    Encoding |= (SavedRegIdx & 0x7) << 10;
+    Encoding |= permuteEncode(SavedRegIdx, SavedRegs);
+  }
+
+  return Encoding;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 000000000000..14c31ed47cf1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,69 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MCSymbol;
+  class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+  const X86TargetMachine &TM;
+  const X86Subtarget &STI;
+public:
+  explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+    : TargetFrameLowering(StackGrowsDown,
+                          sti.getStackAlignment(),
+                          (sti.is64Bit() ? -8 : -4)),
+      TM(tm), STI(sti) {
+  }
+
+  void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+                                 unsigned FramePtr) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                                    int DataAlignmentFactor, bool IsEH) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..2b0f283bec75
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,2253 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+//===----------------------------------------------------------------------===//
+//                      Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+  /// SDValue's instead of register numbers for the leaves of the matched
+  /// tree.
+  struct X86ISelAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    // This is really a union, discriminated by BaseType!
+    SDValue Base_Reg;
+    int Base_FrameIndex;
+
+    unsigned Scale;
+    SDValue IndexReg; 
+    int32_t Disp;
+    SDValue Segment;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
+    const char *ES;
+    int JT;
+    unsigned Align;    // CP alignment.
+    unsigned char SymbolFlags;  // X86II::MO_*
+
+    X86ISelAddressMode()
+      : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+        Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
+        SymbolFlags(X86II::MO_NO_FLAG) {
+    }
+
+    bool hasSymbolicDisplacement() const {
+      return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
+    }
+    
+    bool hasBaseOrIndexReg() const {
+      return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+    }
+    
+    /// isRIPRelative - Return true if this addressing mode is already RIP
+    /// relative.
+    bool isRIPRelative() const {
+      if (BaseType != RegBase) return false;
+      if (RegisterSDNode *RegNode =
+            dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
+        return RegNode->getReg() == X86::RIP;
+      return false;
+    }
+    
+    void setBaseReg(SDValue Reg) {
+      BaseType = RegBase;
+      Base_Reg = Reg;
+    }
+
+    void dump() {
+      dbgs() << "X86ISelAddressMode " << this << '\n';
+      dbgs() << "Base_Reg ";
+      if (Base_Reg.getNode() != 0)
+        Base_Reg.getNode()->dump(); 
+      else
+        dbgs() << "nul";
+      dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
+             << " Scale" << Scale << '\n'
+             << "IndexReg ";
+      if (IndexReg.getNode() != 0)
+        IndexReg.getNode()->dump();
+      else
+        dbgs() << "nul"; 
+      dbgs() << " Disp " << Disp << '\n'
+             << "GV ";
+      if (GV)
+        GV->dump();
+      else
+        dbgs() << "nul";
+      dbgs() << " CP ";
+      if (CP)
+        CP->dump();
+      else
+        dbgs() << "nul";
+      dbgs() << '\n'
+             << "ES ";
+      if (ES)
+        dbgs() << ES;
+      else
+        dbgs() << "nul";
+      dbgs() << " JT" << JT << " Align" << Align << '\n';
+    }
+  };
+}
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  /// ISel - X86 specific code to select X86 machine instructions for
+  /// SelectionDAG operations.
+  ///
+  class X86DAGToDAGISel : public SelectionDAGISel {
+    /// X86Lowering - This object fully describes how to lower LLVM code to an
+    /// X86-specific SelectionDAG.
+    const X86TargetLowering &X86Lowering;
+
+    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const X86Subtarget *Subtarget;
+
+    /// OptForSize - If true, selector should try to optimize for code size
+    /// instead of performance.
+    bool OptForSize;
+
+  public:
+    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(tm, OptLevel),
+        X86Lowering(*tm.getTargetLowering()),
+        Subtarget(&tm.getSubtarget<X86Subtarget>()),
+        OptForSize(false) {}
+
+    virtual const char *getPassName() const {
+      return "X86 DAG->DAG Instruction Selection";
+    }
+
+    virtual void EmitFunctionEntryCode();
+
+    virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+    virtual void PreprocessISelDAG();
+
+    inline bool immSext8(SDNode *N) const {
+      return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+    }
+
+    // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+    // sign extended field.
+    inline bool i64immSExt32(SDNode *N) const {
+      uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+      return (int64_t)v == (int32_t)v;
+    }
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+  private:
+    SDNode *Select(SDNode *N);
+    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+    SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
+    SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
+
+    bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
+    bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
+    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
+    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                 unsigned Depth);
+    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
+    bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+                    SDValue &Scale, SDValue &Index, SDValue &Disp,
+                    SDValue &Segment);
+    bool SelectLEAAddr(SDValue N, SDValue &Base,
+                       SDValue &Scale, SDValue &Index, SDValue &Disp,
+                       SDValue &Segment);
+    bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+                           SDValue &Scale, SDValue &Index, SDValue &Disp,
+                           SDValue &Segment);
+    bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+                             SDValue &Base, SDValue &Scale,
+                             SDValue &Index, SDValue &Disp,
+                             SDValue &Segment,
+                             SDValue &NodeWithChain);
+    
+    bool TryFoldLoad(SDNode *P, SDValue N,
+                     SDValue &Base, SDValue &Scale,
+                     SDValue &Index, SDValue &Disp,
+                     SDValue &Segment);
+    
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.
+    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps);
+    
+    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 
+                                   SDValue &Scale, SDValue &Index,
+                                   SDValue &Disp, SDValue &Segment) {
+      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+        AM.Base_Reg;
+      Scale = getI8Imm(AM.Scale);
+      Index = AM.IndexReg;
+      // These are 32-bit even in 64-bit mode since RIP relative offset
+      // is 32-bit.
+      if (AM.GV)
+        Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+                                              MVT::i32, AM.Disp,
+                                              AM.SymbolFlags);
+      else if (AM.CP)
+        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
+                                             AM.Align, AM.Disp, AM.SymbolFlags);
+      else if (AM.ES)
+        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+      else if (AM.JT != -1)
+        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
+      else if (AM.BlockAddr)
+        Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+                                       true, AM.SymbolFlags);
+      else
+        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+
+      if (AM.Segment.getNode())
+        Segment = AM.Segment;
+      else
+        Segment = CurDAG->getRegister(0, MVT::i32);
+    }
+
+    /// getI8Imm - Return a target constant with the specified value, of type
+    /// i8.
+    inline SDValue getI8Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i8);
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    /// getGlobalBaseReg - Return an SDNode that returns the value of
+    /// the global base register. Output instructions required to
+    /// initialize the global base register, if necessary.
+    ///
+    SDNode *getGlobalBaseReg();
+
+    /// getTargetMachine - Return a reference to the TargetMachine, casted
+    /// to the target-specific type.
+    const X86TargetMachine &getTargetMachine() {
+      return static_cast<const X86TargetMachine &>(TM);
+    }
+
+    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+    /// to the target-specific type.
+    const X86InstrInfo *getInstrInfo() {
+      return getTargetMachine().getInstrInfo();
+    }
+  };
+}
+
+
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+
+  if (!N.hasOneUse())
+    return false;
+
+  if (N.getOpcode() != ISD::LOAD)
+    return true;
+
+  // If N is a load, do additional profitability checks.
+  if (U == Root) {
+    switch (U->getOpcode()) {
+    default: break;
+    case X86ISD::ADD:
+    case X86ISD::SUB:
+    case X86ISD::AND:
+    case X86ISD::XOR:
+    case X86ISD::OR:
+    case ISD::ADD:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR: {
+      SDValue Op1 = U->getOperand(1);
+
+      // If the other operand is a 8-bit immediate we should fold the immediate
+      // instead. This reduces code size.
+      // e.g.
+      // movl 4(%esp), %eax
+      // addl $4, %eax
+      // vs.
+      // movl $4, %eax
+      // addl 4(%esp), %eax
+      // The former is 2 bytes shorter. In case where the increment is 1, then
+      // the saving can be 4 bytes (by using incl %eax).
+      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
+        if (Imm->getAPIntValue().isSignedIntN(8))
+          return false;
+
+      // If the other operand is a TLS address, we should fold it instead.
+      // This produces
+      // movl    %gs:0, %eax
+      // leal    i@NTPOFF(%eax), %eax
+      // instead of
+      // movl    $i@NTPOFF, %eax
+      // addl    %gs:0, %eax
+      // if the block also has an access to a second TLS address this will save
+      // a load.
+      // FIXME: This is probably also true for non TLS addresses.
+      if (Op1.getOpcode() == X86ISD::Wrapper) {
+        SDValue Val = Op1.getOperand(0);
+        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+          return false;
+      }
+    }
+    }
+  }
+
+  return true;
+}
+
+/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// load's chain operand and move load below the call's chain operand.
+static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+                                  SDValue Call, SDValue OrigChain) {
+  SmallVector<SDValue, 8> Ops;
+  SDValue Chain = OrigChain.getOperand(0);
+  if (Chain.getNode() == Load.getNode())
+    Ops.push_back(Load.getOperand(0));
+  else {
+    assert(Chain.getOpcode() == ISD::TokenFactor &&
+           "Unexpected chain operand");
+    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+      if (Chain.getOperand(i).getNode() == Load.getNode())
+        Ops.push_back(Load.getOperand(0));
+      else
+        Ops.push_back(Chain.getOperand(i));
+    SDValue NewChain =
+      CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
+                      MVT::Other, &Ops[0], Ops.size());
+    Ops.clear();
+    Ops.push_back(NewChain);
+  }
+  for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
+    Ops.push_back(OrigChain.getOperand(i));
+  CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
+                             Load.getOperand(1), Load.getOperand(2));
+  Ops.clear();
+  Ops.push_back(SDValue(Load.getNode(), 1));
+  for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+    Ops.push_back(Call.getOperand(i));
+  CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
+}
+
+/// isCalleeLoad - Return true if call address is a load and it can be
+/// moved below CALLSEQ_START and the chains leading up to the call.
+/// Return the CALLSEQ_START by reference as a second output.
+/// In the case of a tail call, there isn't a callseq node between the call
+/// chain and the load.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
+    return false;
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
+  if (!LD ||
+      LD->isVolatile() ||
+      LD->getAddressingMode() != ISD::UNINDEXED ||
+      LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  // Now let's find the callseq_start.
+  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
+    if (!Chain.hasOneUse())
+      return false;
+    Chain = Chain.getOperand(0);
+  }
+
+  if (!Chain.getNumOperands())
+    return false;
+  if (Chain.getOperand(0).getNode() == Callee.getNode())
+    return true;
+  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+      Callee.getValue(1).hasOneUse())
+    return true;
+  return false;
+}
+
+void X86DAGToDAGISel::PreprocessISelDAG() {
+  // OptForSize is used in pattern predicates that isel is matching.
+  OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ) {
+    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+
+    if (OptLevel != CodeGenOpt::None &&
+        (N->getOpcode() == X86ISD::CALL ||
+         N->getOpcode() == X86ISD::TC_RETURN)) {
+      /// Also try moving call address load from outside callseq_start to just
+      /// before the call to allow it to be folded.
+      ///
+      ///     [Load chain]
+      ///         ^
+      ///         |
+      ///       [Load]
+      ///       ^    ^
+      ///       |    |
+      ///      /      \--
+      ///     /          |
+      ///[CALLSEQ_START] |
+      ///     ^          |
+      ///     |          |
+      /// [LOAD/C2Reg]   |
+      ///     |          |
+      ///      \        /
+      ///       \      /
+      ///       [CALL]
+      bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
+      SDValue Chain = N->getOperand(0);
+      SDValue Load  = N->getOperand(1);
+      if (!isCalleeLoad(Load, Chain, HasCallSeq))
+        continue;
+      MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+      ++NumLoadMoved;
+      continue;
+    }
+    
+    // Lower fpround and fpextend nodes that target the FP stack to be store and
+    // load to the stack.  This is a gross hack.  We would like to simply mark
+    // these as being illegal, but when we do that, legalize produces these when
+    // it expands calls, then expands these in the same legalize pass.  We would
+    // like dag combine to be able to hack on these between the call expansion
+    // and the node legalization.  As such this pass basically does "really
+    // late" legalization of these inline with the X86 isel pass.
+    // FIXME: This should only happen when not compiled with -O0.
+    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+      continue;
+    
+    // If the source and destination are SSE registers, then this is a legal
+    // conversion that should not be lowered.
+    EVT SrcVT = N->getOperand(0).getValueType();
+    EVT DstVT = N->getValueType(0);
+    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+    if (SrcIsSSE && DstIsSSE)
+      continue;
+
+    if (!SrcIsSSE && !DstIsSSE) {
+      // If this is an FPStack extension, it is a noop.
+      if (N->getOpcode() == ISD::FP_EXTEND)
+        continue;
+      // If this is a value-preserving FPStack truncation, it is a noop.
+      if (N->getConstantOperandVal(1))
+        continue;
+    }
+   
+    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+    // FPStack has extload and truncstore.  SSE can fold direct loads into other
+    // operations.  Based on this, decide what we want to do.
+    EVT MemVT;
+    if (N->getOpcode() == ISD::FP_ROUND)
+      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+    else
+      MemVT = SrcIsSSE ? SrcVT : DstVT;
+    
+    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+    DebugLoc dl = N->getDebugLoc();
+    
+    // FIXME: optimize the case where the src/dest is a load or store?
+    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
+                                          N->getOperand(0),
+                                          MemTmp, MachinePointerInfo(), MemVT,
+                                          false, false, 0);
+    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                        MachinePointerInfo(),
+                                        MemVT, false, false, 0);
+
+    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+    // extload we created.  This will cause general havok on the dag because
+    // anything below the conversion could be folded into other existing nodes.
+    // To avoid invalidating 'I', back it up to the convert node.
+    --I;
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+    
+    // Now that we did that, the node is dead.  Increment the iterator to the
+    // next node to process, then delete N.
+    ++I;
+    CurDAG->DeleteNode(N);
+  }  
+}
+
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+                                             MachineFrameInfo *MFI) {
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  if (Subtarget->isTargetCygMing()) {
+    unsigned CallOp =
+      Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+    BuildMI(BB, DebugLoc(),
+            TII->get(CallOp)).addExternalSymbol("__main");
+  }
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
+  // If this is main, emit special code for main.
+  if (const Function *Fn = MF->getFunction())
+    if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+      EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
+}
+
+static bool isDispSafeForFrameIndex(int64_t Val) {
+  // On 64-bit platforms, we can run into an issue where a frame index
+  // includes a displacement that, when added to the explicit displacement,
+  // will overflow the displacement field. Assuming that the frame index
+  // displacement fits into a 31-bit integer  (which is only slightly more
+  // aggressive than the current fundamental assumption that it fits into
+  // a 32-bit integer), a 31-bit disp should always be safe.
+  return isInt<31>(Val);
+}
+
+bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+                                            X86ISelAddressMode &AM) {
+  int64_t Val = AM.Disp + Offset;
+  CodeModel::Model M = TM.getCodeModel();
+  if (Subtarget->is64Bit()) {
+    if (!X86::isOffsetSuitableForCodeModel(Val, M,
+                                           AM.hasSymbolicDisplacement()))
+      return true;
+    // In addition to the checks required for a register base, check that
+    // we do not try to use an unsafe Disp with a frame index.
+    if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
+        !isDispSafeForFrameIndex(Val))
+      return true;
+  }
+  AM.Disp = Val;
+  return false;
+
+}
+
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+  SDValue Address = N->getOperand(1);
+  
+  // load gs:0 -> GS segment register.
+  // load fs:0 -> FS segment register.
+  //
+  // This optimization is valid because the GNU TLS model defines that
+  // gs:0 (or fs:0 on X86-64) contains its own address.
+  // For more information see http://people.redhat.com/drepper/tls.pdf
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+    if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+        Subtarget->isTargetELF())
+      switch (N->getPointerInfo().getAddrSpace()) {
+      case 256:
+        AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+        return false;
+      case 257:
+        AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+        return false;
+      }
+  
+  return true;
+}
+
+/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
+/// into an addressing mode.  These wrap things that will resolve down into a
+/// symbol reference.  If no match is possible, this returns true, otherwise it
+/// returns false.
+bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+  // If the addressing mode already has a symbol as the displacement, we can
+  // never match another symbol.
+  if (AM.hasSymbolicDisplacement())
+    return true;
+
+  SDValue N0 = N.getOperand(0);
+  CodeModel::Model M = TM.getCodeModel();
+
+  // Handle X86-64 rip-relative addresses.  We check this before checking direct
+  // folding because RIP is preferable to non-RIP accesses.
+  if (Subtarget->is64Bit() &&
+      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
+      // they cannot be folded into immediate fields.
+      // FIXME: This can be improved for kernel and other models?
+      (M == CodeModel::Small || M == CodeModel::Kernel) &&
+      // Base and index reg must be 0 in order to use %rip as base and lowering
+      // must allow RIP.
+      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      X86ISelAddressMode Backup = AM;
+      AM.GV = G->getGlobal();
+      AM.SymbolFlags = G->getTargetFlags();
+      if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+        AM = Backup;
+        return true;
+      }
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      X86ISelAddressMode Backup = AM;
+      AM.CP = CP->getConstVal();
+      AM.Align = CP->getAlignment();
+      AM.SymbolFlags = CP->getTargetFlags();
+      if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+        AM = Backup;
+        return true;
+      }
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
+    } else {
+      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+    }
+
+    if (N.getOpcode() == X86ISD::WrapperRIP)
+      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
+    return false;
+  }
+
+  // Handle the case when globals fit in our immediate field: This is true for
+  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
+  // mode, this results in a non-RIP-relative computation.
+  if (!Subtarget->is64Bit() ||
+      ((M == CodeModel::Small || M == CodeModel::Kernel) &&
+       TM.getRelocationModel() == Reloc::Static)) {
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      AM.GV = G->getGlobal();
+      AM.Disp += G->getOffset();
+      AM.SymbolFlags = G->getTargetFlags();
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      AM.CP = CP->getConstVal();
+      AM.Align = CP->getAlignment();
+      AM.Disp += CP->getOffset();
+      AM.SymbolFlags = CP->getTargetFlags();
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
+    } else {
+      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+    }
+    return false;
+  }
+
+  return true;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done.  This just pattern matches for the
+/// addressing mode.
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
+  if (MatchAddressRecursively(N, AM, 0))
+    return true;
+
+  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
+  // a smaller encoding and avoids a scaled-index.
+  if (AM.Scale == 2 &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() == 0) {
+    AM.Base_Reg = AM.IndexReg;
+    AM.Scale = 1;
+  }
+
+  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
+  // because it has a smaller encoding.
+  // TODO: Which other code models can use this?
+  if (TM.getCodeModel() == CodeModel::Small &&
+      Subtarget->is64Bit() &&
+      AM.Scale == 1 &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() == 0 &&
+      AM.IndexReg.getNode() == 0 &&
+      AM.SymbolFlags == X86II::MO_NO_FLAG &&
+      AM.hasSymbolicDisplacement())
+    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+
+  return false;
+}
+
+bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                              unsigned Depth) {
+  DebugLoc dl = N.getDebugLoc();
+  DEBUG({
+      dbgs() << "MatchAddress: ";
+      AM.dump();
+    });
+  // Limit recursion.
+  if (Depth > 5)
+    return MatchAddressBase(N, AM);
+
+  // If this is already a %rip relative address, we can only merge immediates
+  // into it.  Instead of handling this in every case, we handle it here.
+  // RIP relative addressing: %rip + 32-bit displacement!
+  if (AM.isRIPRelative()) {
+    // FIXME: JumpTable and ExternalSymbol address currently don't like
+    // displacements.  It isn't very important, but this should be fixed for
+    // consistency.
+    if (!AM.ES && AM.JT != -1) return true;
+
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
+      if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
+        return false;
+    return true;
+  }
+
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    if (!FoldOffsetIntoAddress(Val, AM))
+      return false;
+    break;
+  }
+
+  case X86ISD::Wrapper:
+  case X86ISD::WrapperRIP:
+    if (!MatchWrapper(N, AM))
+      return false;
+    break;
+
+  case ISD::LOAD:
+    if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+      return false;
+    break;
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == X86ISelAddressMode::RegBase &&
+        AM.Base_Reg.getNode() == 0 &&
+        (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
+      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+      AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
+    }
+    break;
+
+  case ISD::SHL:
+    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
+      break;
+      
+    if (ConstantSDNode
+          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
+      unsigned Val = CN->getZExtValue();
+      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
+      // that the base operand remains free for further matching. If
+      // the base doesn't end up getting used, a post-processing step
+      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
+      if (Val == 1 || Val == 2 || Val == 3) {
+        AM.Scale = 1 << Val;
+        SDValue ShVal = N.getNode()->getOperand(0);
+
+        // Okay, we know that we have a scale by now.  However, if the scaled
+        // value is an add of something and a constant, we can fold the
+        // constant into the disp field here.
+        if (CurDAG->isBaseWithConstantOffset(ShVal)) {
+          AM.IndexReg = ShVal.getNode()->getOperand(0);
+          ConstantSDNode *AddVal =
+            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
+          uint64_t Disp = AddVal->getSExtValue() << Val;
+          if (!FoldOffsetIntoAddress(Disp, AM))
+            return false;
+        }
+
+        AM.IndexReg = ShVal;
+        return false;
+      }
+    break;
+    }
+
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI:
+    // A mul_lohi where we need the low part can be folded as a plain multiply.
+    if (N.getResNo() != 0) break;
+    // FALL THROUGH
+  case ISD::MUL:
+  case X86ISD::MUL_IMM:
+    // X*[3,5,9] -> X+X*[2,4,8]
+    if (AM.BaseType == X86ISelAddressMode::RegBase &&
+        AM.Base_Reg.getNode() == 0 &&
+        AM.IndexReg.getNode() == 0) {
+      if (ConstantSDNode
+            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
+        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
+            CN->getZExtValue() == 9) {
+          AM.Scale = unsigned(CN->getZExtValue())-1;
+
+          SDValue MulVal = N.getNode()->getOperand(0);
+          SDValue Reg;
+
+          // Okay, we know that we have a scale by now.  However, if the scaled
+          // value is an add of something and a constant, we can fold the
+          // constant into the disp field here.
+          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
+            Reg = MulVal.getNode()->getOperand(0);
+            ConstantSDNode *AddVal =
+              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
+            uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
+            if (FoldOffsetIntoAddress(Disp, AM))
+              Reg = N.getNode()->getOperand(0);
+          } else {
+            Reg = N.getNode()->getOperand(0);
+          }
+
+          AM.IndexReg = AM.Base_Reg = Reg;
+          return false;
+        }
+    }
+    break;
+
+  case ISD::SUB: {
+    // Given A-B, if A can be completely folded into the address and
+    // the index field with the index field unused, use -B as the index.
+    // This is a win if a has multiple parts that can be folded into
+    // the address. Also, this saves a mov if the base register has
+    // other uses, since it avoids a two-address sub instruction, however
+    // it costs an additional mov if the index register has other uses.
+
+    // Add an artificial use to this node so that we can keep track of
+    // it if it gets CSE'd with a different node.
+    HandleSDNode Handle(N);
+
+    // Test if the LHS of the sub can be folded.
+    X86ISelAddressMode Backup = AM;
+    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+      AM = Backup;
+      break;
+    }
+    // Test if the index field is free for use.
+    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
+      AM = Backup;
+      break;
+    }
+
+    int Cost = 0;
+    SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+    // If the RHS involves a register with multiple uses, this
+    // transformation incurs an extra mov, due to the neg instruction
+    // clobbering its operand.
+    if (!RHS.getNode()->hasOneUse() ||
+        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
+        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
+        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
+        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
+         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
+      ++Cost;
+    // If the base is a register with multiple uses, this
+    // transformation may save a mov.
+    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
+         AM.Base_Reg.getNode() &&
+         !AM.Base_Reg.getNode()->hasOneUse()) ||
+        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+      --Cost;
+    // If the folded LHS was interesting, this transformation saves
+    // address arithmetic.
+    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
+        ((AM.Disp != 0) && (Backup.Disp == 0)) +
+        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
+      --Cost;
+    // If it doesn't look like it may be an overall win, don't do it.
+    if (Cost >= 0) {
+      AM = Backup;
+      break;
+    }
+
+    // Ok, the transformation is legal and appears profitable. Go for it.
+    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+    AM.IndexReg = Neg;
+    AM.Scale = 1;
+
+    // Insert the new nodes into the topological ordering.
+    if (Zero.getNode()->getNodeId() == -1 ||
+        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    if (Neg.getNode()->getNodeId() == -1 ||
+        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    return false;
+  }
+
+  case ISD::ADD: {
+    // Add an artificial use to this node so that we can keep track of
+    // it if it gets CSE'd with a different node.
+    HandleSDNode Handle(N);
+
+    X86ISelAddressMode Backup = AM;
+    if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+        !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+      return false;
+    AM = Backup;
+    
+    // Try again after commuting the operands.
+    if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+        !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+      return false;
+    AM = Backup;
+
+    // If we couldn't fold both operands into the address at the same time,
+    // see if we can just put each operand into a register and fold at least
+    // the add.
+    if (AM.BaseType == X86ISelAddressMode::RegBase &&
+        !AM.Base_Reg.getNode() &&
+        !AM.IndexReg.getNode()) {
+      N = Handle.getValue();
+      AM.Base_Reg = N.getOperand(0);
+      AM.IndexReg = N.getOperand(1);
+      AM.Scale = 1;
+      return false;
+    }
+    N = Handle.getValue();
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (CurDAG->isBaseWithConstantOffset(N)) {
+      X86ISelAddressMode Backup = AM;
+      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
+
+      // Start with the LHS as an addr mode.
+      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+          !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
+        return false;
+      AM = Backup;
+    }
+    break;
+      
+  case ISD::AND: {
+    // Perform some heroic transforms on an and of a constant-count shift
+    // with a constant to enable use of the scaled offset field.
+
+    SDValue Shift = N.getOperand(0);
+    if (Shift.getNumOperands() != 2) break;
+
+    // Scale must not be used already.
+    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+    SDValue X = Shift.getOperand(0);
+    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+    if (!C1 || !C2) break;
+
+    // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+    // allows us to convert the shift and and into an h-register extract and
+    // a scaled index.
+    if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+      unsigned ScaleLog = 8 - C1->getZExtValue();
+      if (ScaleLog > 0 && ScaleLog < 4 &&
+          C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+        SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+        SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+        SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+                                      X, Eight);
+        SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+                                      Srl, Mask);
+        SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+        SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+                                      And, ShlCount);
+
+        // Insert the new nodes into the topological ordering.
+        if (Eight.getNode()->getNodeId() == -1 ||
+            Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+          Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+        }
+        if (Mask.getNode()->getNodeId() == -1 ||
+            Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+          Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+        }
+        if (Srl.getNode()->getNodeId() == -1 ||
+            Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+          Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+        }
+        if (And.getNode()->getNodeId() == -1 ||
+            And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(N.getNode(), And.getNode());
+          And.getNode()->setNodeId(N.getNode()->getNodeId());
+        }
+        if (ShlCount.getNode()->getNodeId() == -1 ||
+            ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
+          ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
+        }
+        if (Shl.getNode()->getNodeId() == -1 ||
+            Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(N.getNode(), Shl.getNode());
+          Shl.getNode()->setNodeId(N.getNode()->getNodeId());
+        }
+        CurDAG->ReplaceAllUsesWith(N, Shl);
+        AM.IndexReg = And;
+        AM.Scale = (1 << ScaleLog);
+        return false;
+      }
+    }
+
+    // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+    // allows us to fold the shift into this addressing mode.
+    if (Shift.getOpcode() != ISD::SHL) break;
+
+    // Not likely to be profitable if either the AND or SHIFT node has more
+    // than one use (unless all uses are for address computation). Besides,
+    // isel mechanism requires their node ids to be reused.
+    if (!N.hasOneUse() || !Shift.hasOneUse())
+      break;
+    
+    // Verify that the shift amount is something we can fold.
+    unsigned ShiftCst = C1->getZExtValue();
+    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+      break;
+    
+    // Get the new AND mask, this folds to a constant.
+    SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+                                         SDValue(C2, 0), SDValue(C1, 0));
+    SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 
+                                     NewANDMask);
+    SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+                                       NewAND, SDValue(C1, 0));
+
+    // Insert the new nodes into the topological ordering.
+    if (C1->getNodeId() > X.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(X.getNode(), C1);
+      C1->setNodeId(X.getNode()->getNodeId());
+    }
+    if (NewANDMask.getNode()->getNodeId() == -1 ||
+        NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
+      NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
+    }
+    if (NewAND.getNode()->getNodeId() == -1 ||
+        NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
+      NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
+    }
+    if (NewSHIFT.getNode()->getNodeId() == -1 ||
+        NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
+      NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+
+    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+    
+    AM.Scale = 1 << ShiftCst;
+    AM.IndexReg = NewAND;
+    return false;
+  }
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
+    // If so, check to see if the scale index register is set.
+    if (AM.IndexReg.getNode() == 0) {
+      AM.IndexReg = N;
+      AM.Scale = 1;
+      return false;
+    }
+
+    // Otherwise, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = X86ISelAddressMode::RegBase;
+  AM.Base_Reg = N;
+  return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+///
+/// Parent is the parent node of the addr operand that is being matched.  It
+/// is always a load, store, atomic node, or null.  It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+                                 SDValue &Scale, SDValue &Index,
+                                 SDValue &Disp, SDValue &Segment) {
+  X86ISelAddressMode AM;
+  
+  if (Parent &&
+      // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+      // that are not a MemSDNode, and thus don't have proper addrspace info.
+      Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+      Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+      Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+    unsigned AddrSpace =
+      cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+    // AddrSpace 256 -> GS, 257 -> FS.
+    if (AddrSpace == 256)
+      AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+    if (AddrSpace == 257)
+      AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+  }
+  
+  if (MatchAddress(N, AM))
+    return false;
+
+  EVT VT = N.getValueType();
+  if (AM.BaseType == X86ISelAddressMode::RegBase) {
+    if (!AM.Base_Reg.getNode())
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM.IndexReg.getNode())
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
+/// match a load whose top elements are either undef or zeros.  The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+///
+/// We also return:
+///   PatternChainNode: this is the matched node that has a chain input and
+///   output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+                                          SDValue N, SDValue &Base,
+                                          SDValue &Scale, SDValue &Index,
+                                          SDValue &Disp, SDValue &Segment,
+                                          SDValue &PatternNodeWithChain) {
+  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    PatternNodeWithChain = N.getOperand(0);
+    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+        PatternNodeWithChain.hasOneUse() &&
+        IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+      if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+        return false;
+      return true;
+    }
+  }
+
+  // Also handle the case where we explicitly require zeros in the top
+  // elements.  This is a vector shuffle from the zero vector.
+  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
+      // Check to see if the top elements are all zeros (or bitcast of zeros).
+      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 
+      N.getOperand(0).getNode()->hasOneUse() &&
+      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
+      N.getOperand(0).getOperand(0).hasOneUse() &&
+      IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+    // Okay, this is a zero extending load.  Fold it.
+    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
+    if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+      return false;
+    PatternNodeWithChain = SDValue(LD, 0);
+    return true;
+  }
+  return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+                                    SDValue &Base, SDValue &Scale,
+                                    SDValue &Index, SDValue &Disp,
+                                    SDValue &Segment) {
+  X86ISelAddressMode AM;
+
+  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+  // segments.
+  SDValue Copy = AM.Segment;
+  SDValue T = CurDAG->getRegister(0, MVT::i32);
+  AM.Segment = T;
+  if (MatchAddress(N, AM))
+    return false;
+  assert (T == AM.Segment);
+  AM.Segment = Copy;
+
+  EVT VT = N.getValueType();
+  unsigned Complexity = 0;
+  if (AM.BaseType == X86ISelAddressMode::RegBase)
+    if (AM.Base_Reg.getNode())
+      Complexity = 1;
+    else
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
+  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+    Complexity = 4;
+
+  if (AM.IndexReg.getNode())
+    Complexity++;
+  else
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+  // a simple shift.
+  if (AM.Scale > 1)
+    Complexity++;
+
+  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+  // to a LEA. This is determined with some expermentation but is by no means
+  // optimal (especially for code size consideration). LEA is nice because of
+  // its three-address nature. Tweak the cost function again when we can run
+  // convertToThreeAddress() at register allocation time.
+  if (AM.hasSymbolicDisplacement()) {
+    // For X86-64, we should always use lea to materialize RIP relative
+    // addresses.
+    if (Subtarget->is64Bit())
+      Complexity = 4;
+    else
+      Complexity += 2;
+  }
+
+  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
+    Complexity++;
+
+  // If it isn't worth using an LEA, reject it.
+  if (Complexity <= 2)
+    return false;
+  
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+                                        SDValue &Scale, SDValue &Index,
+                                        SDValue &Disp, SDValue &Segment) {
+  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+    
+  X86ISelAddressMode AM;
+  AM.GV = GA->getGlobal();
+  AM.Disp += GA->getOffset();
+  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
+  AM.SymbolFlags = GA->getTargetFlags();
+
+  if (N.getValueType() == MVT::i32) {
+    AM.Scale = 1;
+    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+  } else {
+    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+  }
+  
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+
+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+                                  SDValue &Base, SDValue &Scale,
+                                  SDValue &Index, SDValue &Disp,
+                                  SDValue &Segment) {
+  if (!ISD::isNON_EXTLoad(N.getNode()) ||
+      !IsProfitableToFold(N, P, P) ||
+      !IsLegalToFold(N, P, P, OptLevel))
+    return false;
+  
+  return SelectAddr(N.getNode(),
+                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
+}
+
+/// getGlobalBaseReg - Return an SDNode that returns the value of
+/// the global base register. Output instructions required to
+/// initialize the global base register, if necessary.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+  SDValue Chain = Node->getOperand(0);
+  SDValue In1 = Node->getOperand(1);
+  SDValue In2L = Node->getOperand(2);
+  SDValue In2H = Node->getOperand(3);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+    return NULL;
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+                                           MVT::i32, MVT::i32, MVT::Other, Ops,
+                                           array_lengthof(Ops));
+  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+  return ResNode;
+}
+
+// FIXME: Figure out some way to unify this with the 'or' and other code
+// below.
+SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
+  if (Node->hasAnyUseOfValue(0))
+    return 0;
+
+  // Optimize common patterns for __sync_add_and_fetch and
+  // __sync_sub_and_fetch where the result is not used. This allows us
+  // to use "lock" version of add, sub, inc, dec instructions.
+  // FIXME: Do not use special instructions but instead add the "lock"
+  // prefix to the target node somehow. The extra information will then be
+  // transferred to machine instruction and it denotes the prefix.
+  SDValue Chain = Node->getOperand(0);
+  SDValue Ptr = Node->getOperand(1);
+  SDValue Val = Node->getOperand(2);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+    return 0;
+
+  bool isInc = false, isDec = false, isSub = false, isCN = false;
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+  if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
+    isCN = true;
+    int64_t CNVal = CN->getSExtValue();
+    if (CNVal == 1)
+      isInc = true;
+    else if (CNVal == -1)
+      isDec = true;
+    else if (CNVal >= 0)
+      Val = CurDAG->getTargetConstant(CNVal, NVT);
+    else {
+      isSub = true;
+      Val = CurDAG->getTargetConstant(-CNVal, NVT);
+    }
+  } else if (Val.hasOneUse() &&
+             Val.getOpcode() == ISD::SUB &&
+             X86::isZeroNode(Val.getOperand(0))) {
+    isSub = true;
+    Val = Val.getOperand(1);
+  }
+
+  DebugLoc dl = Node->getDebugLoc();
+  unsigned Opc = 0;
+  switch (NVT.getSimpleVT().SimpleTy) {
+  default: return 0;
+  case MVT::i8:
+    if (isInc)
+      Opc = X86::LOCK_INC8m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC8m;
+    else if (isSub) {
+      if (isCN)
+        Opc = X86::LOCK_SUB8mi;
+      else
+        Opc = X86::LOCK_SUB8mr;
+    } else {
+      if (isCN)
+        Opc = X86::LOCK_ADD8mi;
+      else
+        Opc = X86::LOCK_ADD8mr;
+    }
+    break;
+  case MVT::i16:
+    if (isInc)
+      Opc = X86::LOCK_INC16m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC16m;
+    else if (isSub) {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_SUB16mi8;
+        else
+          Opc = X86::LOCK_SUB16mi;
+      } else
+        Opc = X86::LOCK_SUB16mr;
+    } else {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_ADD16mi8;
+        else
+          Opc = X86::LOCK_ADD16mi;
+      } else
+        Opc = X86::LOCK_ADD16mr;
+    }
+    break;
+  case MVT::i32:
+    if (isInc)
+      Opc = X86::LOCK_INC32m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC32m;
+    else if (isSub) {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_SUB32mi8;
+        else
+          Opc = X86::LOCK_SUB32mi;
+      } else
+        Opc = X86::LOCK_SUB32mr;
+    } else {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_ADD32mi8;
+        else
+          Opc = X86::LOCK_ADD32mi;
+      } else
+        Opc = X86::LOCK_ADD32mr;
+    }
+    break;
+  case MVT::i64:
+    if (isInc)
+      Opc = X86::LOCK_INC64m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC64m;
+    else if (isSub) {
+      Opc = X86::LOCK_SUB64mr;
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_SUB64mi8;
+        else if (i64immSExt32(Val.getNode()))
+          Opc = X86::LOCK_SUB64mi32;
+      }
+    } else {
+      Opc = X86::LOCK_ADD64mr;
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_ADD64mi8;
+        else if (i64immSExt32(Val.getNode()))
+          Opc = X86::LOCK_ADD64mi32;
+      }
+    }
+    break;
+  }
+
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  if (isInc || isDec) {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
+    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+    SDValue RetVals[] = { Undef, Ret };
+    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+  } else {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+    SDValue RetVals[] = { Undef, Ret };
+    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+  }
+}
+
+enum AtomicOpc {
+  OR,
+  AND,
+  XOR,
+  AtomicOpcEnd
+};
+
+enum AtomicSz {
+  ConstantI8,
+  I8,
+  SextConstantI16,
+  ConstantI16,
+  I16,
+  SextConstantI32,
+  ConstantI32,
+  I32,
+  SextConstantI64,
+  ConstantI64,
+  I64,
+  AtomicSzEnd
+};
+
+static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+  {
+    X86::LOCK_OR8mi,
+    X86::LOCK_OR8mr,
+    X86::LOCK_OR16mi8,
+    X86::LOCK_OR16mi,
+    X86::LOCK_OR16mr,
+    X86::LOCK_OR32mi8,
+    X86::LOCK_OR32mi,
+    X86::LOCK_OR32mr,
+    X86::LOCK_OR64mi8,
+    X86::LOCK_OR64mi32,
+    X86::LOCK_OR64mr
+  },
+  {
+    X86::LOCK_AND8mi,
+    X86::LOCK_AND8mr,
+    X86::LOCK_AND16mi8,
+    X86::LOCK_AND16mi,
+    X86::LOCK_AND16mr,
+    X86::LOCK_AND32mi8,
+    X86::LOCK_AND32mi,
+    X86::LOCK_AND32mr,
+    X86::LOCK_AND64mi8,
+    X86::LOCK_AND64mi32,
+    X86::LOCK_AND64mr
+  },
+  {
+    X86::LOCK_XOR8mi,
+    X86::LOCK_XOR8mr,
+    X86::LOCK_XOR16mi8,
+    X86::LOCK_XOR16mi,
+    X86::LOCK_XOR16mr,
+    X86::LOCK_XOR32mi8,
+    X86::LOCK_XOR32mi,
+    X86::LOCK_XOR32mr,
+    X86::LOCK_XOR64mi8,
+    X86::LOCK_XOR64mi32,
+    X86::LOCK_XOR64mr
+  }
+};
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+  if (Node->hasAnyUseOfValue(0))
+    return 0;
+  
+  // Optimize common patterns for __sync_or_and_fetch and similar arith
+  // operations where the result is not used. This allows us to use the "lock"
+  // version of the arithmetic instruction.
+  // FIXME: Same as for 'add' and 'sub', try to merge those down here.
+  SDValue Chain = Node->getOperand(0);
+  SDValue Ptr = Node->getOperand(1);
+  SDValue Val = Node->getOperand(2);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+    return 0;
+
+  // Which index into the table.
+  enum AtomicOpc Op;
+  switch (Node->getOpcode()) {
+    case ISD::ATOMIC_LOAD_OR:
+      Op = OR;
+      break;
+    case ISD::ATOMIC_LOAD_AND:
+      Op = AND;
+      break;
+    case ISD::ATOMIC_LOAD_XOR:
+      Op = XOR;
+      break;
+    default:
+      return 0;
+  }
+  
+  bool isCN = false;
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+  if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
+    isCN = true;
+    Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+  }
+  
+  unsigned Opc = 0;
+  switch (NVT.getSimpleVT().SimpleTy) {
+    default: return 0;
+    case MVT::i8:
+      if (isCN)
+        Opc = AtomicOpcTbl[Op][ConstantI8];
+      else
+        Opc = AtomicOpcTbl[Op][I8];
+      break;
+    case MVT::i16:
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = AtomicOpcTbl[Op][SextConstantI16];
+        else
+          Opc = AtomicOpcTbl[Op][ConstantI16];
+      } else
+        Opc = AtomicOpcTbl[Op][I16];
+      break;
+    case MVT::i32:
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = AtomicOpcTbl[Op][SextConstantI32];
+        else
+          Opc = AtomicOpcTbl[Op][ConstantI32];
+      } else
+        Opc = AtomicOpcTbl[Op][I32];
+      break;
+    case MVT::i64:
+      Opc = AtomicOpcTbl[Op][I64];
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = AtomicOpcTbl[Op][SextConstantI64];
+        else if (i64immSExt32(Val.getNode()))
+          Opc = AtomicOpcTbl[Op][ConstantI64];
+      }
+      break;
+  }
+  
+  assert(Opc != 0 && "Invalid arith lock transform!");
+
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+  SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+  cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+  SDValue RetVals[] = { Undef, Ret };
+  return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+}
+
+/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
+/// any uses which require the SF or OF bits to be accurate.
+static bool HasNoSignedComparisonUses(SDNode *N) {
+  // Examine each user of the node.
+  for (SDNode::use_iterator UI = N->use_begin(),
+         UE = N->use_end(); UI != UE; ++UI) {
+    // Only examine CopyToReg uses.
+    if (UI->getOpcode() != ISD::CopyToReg)
+      return false;
+    // Only examine CopyToReg uses that copy to EFLAGS.
+    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
+          X86::EFLAGS)
+      return false;
+    // Examine each user of the CopyToReg use.
+    for (SDNode::use_iterator FlagUI = UI->use_begin(),
+           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
+      // Only examine the Flag result.
+      if (FlagUI.getUse().getResNo() != 1) continue;
+      // Anything unusual: assume conservatively.
+      if (!FlagUI->isMachineOpcode()) return false;
+      // Examine the opcode of the user.
+      switch (FlagUI->getMachineOpcode()) {
+      // These comparisons don't treat the most significant bit specially.
+      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
+      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
+      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
+      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
+      case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+      case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
+      case X86::CMOVA16rr: case X86::CMOVA16rm:
+      case X86::CMOVA32rr: case X86::CMOVA32rm:
+      case X86::CMOVA64rr: case X86::CMOVA64rm:
+      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
+      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
+      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
+      case X86::CMOVB16rr: case X86::CMOVB16rm:
+      case X86::CMOVB32rr: case X86::CMOVB32rm:
+      case X86::CMOVB64rr: case X86::CMOVB64rm:
+      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
+      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
+      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
+      case X86::CMOVE16rr: case X86::CMOVE16rm:
+      case X86::CMOVE32rr: case X86::CMOVE32rm:
+      case X86::CMOVE64rr: case X86::CMOVE64rm:
+      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
+      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
+      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
+      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
+      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
+      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
+      case X86::CMOVP16rr: case X86::CMOVP16rm:
+      case X86::CMOVP32rr: case X86::CMOVP32rm:
+      case X86::CMOVP64rr: case X86::CMOVP64rm:
+        continue;
+      // Anything else: assume conservatively.
+      default: return false;
+      }
+    }
+  }
+  return true;
+}
+
+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
+  EVT NVT = Node->getValueType(0);
+  unsigned Opc, MOpc;
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc dl = Node->getDebugLoc();
+  
+  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
+
+  if (Node->isMachineOpcode()) {
+    DEBUG(dbgs() << "== ";  Node->dump(CurDAG); dbgs() << '\n');
+    return NULL;   // Already selected.
+  }
+
+  switch (Opcode) {
+  default: break;
+  case X86ISD::GlobalBaseReg:
+    return getGlobalBaseReg();
+
+  case X86ISD::ATOMOR64_DAG:
+    return SelectAtomic64(Node, X86::ATOMOR6432);
+  case X86ISD::ATOMXOR64_DAG:
+    return SelectAtomic64(Node, X86::ATOMXOR6432);
+  case X86ISD::ATOMADD64_DAG:
+    return SelectAtomic64(Node, X86::ATOMADD6432);
+  case X86ISD::ATOMSUB64_DAG:
+    return SelectAtomic64(Node, X86::ATOMSUB6432);
+  case X86ISD::ATOMNAND64_DAG:
+    return SelectAtomic64(Node, X86::ATOMNAND6432);
+  case X86ISD::ATOMAND64_DAG:
+    return SelectAtomic64(Node, X86::ATOMAND6432);
+  case X86ISD::ATOMSWAP64_DAG:
+    return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+  case ISD::ATOMIC_LOAD_ADD: {
+    SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+    if (RetVal)
+      return RetVal;
+    break;
+  }
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR: {
+    SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+    if (RetVal)
+      return RetVal;
+    break;
+  }
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    // For operations of the form (x << C1) op C2, check if we can use a smaller
+    // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+      break;
+
+    // i8 is unshrinkable, i16 should be promoted to i32.
+    if (NVT != MVT::i32 && NVT != MVT::i64)
+      break;
+
+    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+    ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    if (!Cst || !ShlCst)
+      break;
+
+    int64_t Val = Cst->getSExtValue();
+    uint64_t ShlVal = ShlCst->getZExtValue();
+
+    // Make sure that we don't change the operation by removing bits.
+    // This only matters for OR and XOR, AND is unaffected.
+    if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+      break;
+
+    unsigned ShlOp, Op = 0;
+    EVT CstVT = NVT;
+
+    // Check the minimum bitwidth for the new constant.
+    // TODO: AND32ri is the same as AND64ri32 with zext imm.
+    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+    // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+      CstVT = MVT::i8;
+    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+      CstVT = MVT::i32;
+
+    // Bail if there is no smaller encoding.
+    if (NVT == CstVT)
+      break;
+
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i32:
+      assert(CstVT == MVT::i8);
+      ShlOp = X86::SHL32ri;
+
+      switch (Opcode) {
+      case ISD::AND: Op = X86::AND32ri8; break;
+      case ISD::OR:  Op =  X86::OR32ri8; break;
+      case ISD::XOR: Op = X86::XOR32ri8; break;
+      }
+      break;
+    case MVT::i64:
+      assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+      ShlOp = X86::SHL64ri;
+
+      switch (Opcode) {
+      case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+      case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
+      case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+      }
+      break;
+    }
+
+    // Emit the smaller op and the shift.
+    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+    SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+    return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+                                getI8Imm(ShlVal));
+    break;
+  }
+  case X86ISD::UMUL: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    
+    unsigned LoReg;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:  LoReg = X86::AL;  Opc = X86::MUL8r; break;
+    case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
+    case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+    case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+    }
+    
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+                                          N0, SDValue()).getValue(1);
+    
+    SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+    SDValue Ops[] = {N1, InFlag};
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+    
+    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+    ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+    return NULL;
+  }
+      
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    bool isSigned = Opcode == ISD::SMUL_LOHI;
+    if (!isSigned) {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
+      case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+      case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+      case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+      }
+    } else {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
+      case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+      case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+      case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+      }
+    }
+
+    unsigned LoReg, HiReg;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
+    case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
+    case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+    case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    // Multiply is commmutative.
+    if (!foldedLoad) {
+      foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+      if (foldedLoad)
+        std::swap(N0, N1);
+    }
+
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+                                            N0, SDValue()).getValue(1);
+
+    if (foldedLoad) {
+      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+                        InFlag };
+      SDNode *CNode =
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+                               array_lengthof(Ops));
+      InFlag = SDValue(CNode, 1);
+
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+    } else {
+      SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+      InFlag = SDValue(CNode, 0);
+    }
+
+    // Prevent use of AH in a REX instruction by referencing AX instead.
+    if (HiReg == X86::AH && Subtarget->is64Bit() &&
+        !SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::AX, MVT::i16, InFlag);
+      InFlag = Result.getValue(2);
+      // Get the low part if needed. Don't use getCopyFromReg for aliasing
+      // registers.
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 1),
+          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+      // Shift AX down 8 bits.
+      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                              Result,
+                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
+      // Then truncate it down to i8.
+      ReplaceUses(SDValue(Node, 1),
+        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+    }
+    // Copy the low half of the result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                LoReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 0), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    // Copy the high half of the result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              HiReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 1), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    
+    return NULL;
+  }
+
+  case ISD::SDIVREM:
+  case ISD::UDIVREM: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    bool isSigned = Opcode == ISD::SDIVREM;
+    if (!isSigned) {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
+      case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+      case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+      case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+      }
+    } else {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
+      case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+      case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+      case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+      }
+    }
+
+    unsigned LoReg, HiReg, ClrReg;
+    unsigned ClrOpcode, SExtOpcode;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:
+      LoReg = X86::AL;  ClrReg = HiReg = X86::AH;
+      ClrOpcode  = 0;
+      SExtOpcode = X86::CBW;
+      break;
+    case MVT::i16:
+      LoReg = X86::AX;  HiReg = X86::DX;
+      ClrOpcode  = X86::MOV16r0; ClrReg = X86::DX;
+      SExtOpcode = X86::CWD;
+      break;
+    case MVT::i32:
+      LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
+      ClrOpcode  = X86::MOV32r0;
+      SExtOpcode = X86::CDQ;
+      break;
+    case MVT::i64:
+      LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
+      ClrOpcode  = X86::MOV64r0;
+      SExtOpcode = X86::CQO;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+    SDValue InFlag;
+    if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+      // Special case for div8, just use a move with zero extension to AX to
+      // clear the upper 8 bits (AH).
+      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+      if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+        Move =
+          SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
+                                         MVT::Other, Ops,
+                                         array_lengthof(Ops)), 0);
+        Chain = Move.getValue(1);
+        ReplaceUses(N0.getValue(1), Chain);
+      } else {
+        Move =
+          SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
+        Chain = CurDAG->getEntryNode();
+      }
+      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
+      InFlag = Chain.getValue(1);
+    } else {
+      InFlag =
+        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+                             LoReg, N0, SDValue()).getValue(1);
+      if (isSigned && !signBitIsZero) {
+        // Sign extend the low part into the high part.
+        InFlag =
+          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
+      } else {
+        // Zero out the high part, effectively zero extending the input.
+        SDValue ClrNode =
+          SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
+        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
+                                      ClrNode, InFlag).getValue(1);
+      }
+    }
+
+    if (foldedLoad) {
+      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+                        InFlag };
+      SDNode *CNode =
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+                               array_lengthof(Ops));
+      InFlag = SDValue(CNode, 1);
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+    } else {
+      InFlag =
+        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
+    }
+
+    // Prevent use of AH in a REX instruction by referencing AX instead.
+    // Shift it down 8 bits.
+    if (HiReg == X86::AH && Subtarget->is64Bit() &&
+        !SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::AX, MVT::i16, InFlag);
+      InFlag = Result.getValue(2);
+
+      // If we also need AL (the quotient), get it by extracting a subreg from
+      // Result. The fast register allocator does not like multiple CopyFromReg
+      // nodes using aliasing registers.
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 0),
+          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+      // Shift AX right by 8 bits instead of using AH.
+      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                         Result,
+                                         CurDAG->getTargetConstant(8, MVT::i8)),
+                       0);
+      ReplaceUses(SDValue(Node, 1),
+        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+    }
+    // Copy the division (low) result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                LoReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 0), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    // Copy the remainder (high) result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              HiReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 1), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    return NULL;
+  }
+
+  case X86ISD::CMP: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+    // use a smaller encoding.
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+        HasNoSignedComparisonUses(Node))
+      // Look past the truncate if CMP is the only use of it.
+      N0 = N0.getOperand(0);
+    if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+        N0.getValueType() != MVT::i8 &&
+        X86::isZeroNode(N1)) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
+      if (!C) break;
+
+      // For example, convert "testl %eax, $8" to "testb %al, $8"
+      if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
+          (!(C->getZExtValue() & 0x80) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // On x86-32, only the ABCD registers have 8-bit subregisters.
+        if (!Subtarget->is64Bit()) {
+          TargetRegisterClass *TRC = 0;
+          switch (N0.getValueType().getSimpleVT().SimpleTy) {
+          case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+          case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+          default: llvm_unreachable("Unsupported TEST operand type!");
+          }
+          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+          Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+                                               Reg.getValueType(), Reg, RC), 0);
+        }
+
+        // Extract the l-register.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
+                                                        MVT::i8, Reg);
+
+        // Emit a testb.
+        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+      }
+
+      // For example, "testl %eax, $2048" to "testb %ah, $8".
+      if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
+          (!(C->getZExtValue() & 0x8000) ||
+           HasNoSignedComparisonUses(Node))) {
+        // Shift the immediate right by 8 bits.
+        SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
+                                                       MVT::i8);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Put the value in an ABCD register.
+        TargetRegisterClass *TRC = 0;
+        switch (N0.getValueType().getSimpleVT().SimpleTy) {
+        case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
+        case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+        case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+        default: llvm_unreachable("Unsupported TEST operand type!");
+        }
+        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+        Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+                                             Reg.getValueType(), Reg, RC), 0);
+
+        // Extract the h-register.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
+                                                        MVT::i8, Reg);
+
+        // Emit a testb. No special NOREX tricks are needed since there's
+        // only one GPR operand!
+        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+                                      Subreg, ShiftedImm);
+      }
+
+      // For example, "testl %eax, $32776" to "testw %ax, $32776".
+      if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
+          N0.getValueType() != MVT::i16 &&
+          (!(C->getZExtValue() & 0x8000) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Extract the 16-bit subregister.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
+                                                        MVT::i16, Reg);
+
+        // Emit a testw.
+        return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+      }
+
+      // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
+      if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
+          N0.getValueType() == MVT::i64 &&
+          (!(C->getZExtValue() & 0x80000000) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Extract the 32-bit subregister.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
+                                                        MVT::i32, Reg);
+
+        // Emit a testl.
+        return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+      }
+    }
+    break;
+  }
+  }
+
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(dbgs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        dbgs() << '\n');
+
+  return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1, Op2, Op3, Op4;
+  switch (ConstraintCode) {
+  case 'o':   // offsetable        ??
+  case 'v':   // not offsetable    ??
+  default: return true;
+  case 'm':   // memory
+    if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
+      return true;
+    break;
+  }
+  
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  OutOps.push_back(Op2);
+  OutOps.push_back(Op3);
+  OutOps.push_back(Op4);
+  return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a 
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
+                                     llvm::CodeGenOpt::Level OptLevel) {
+  return new X86DAGToDAGISel(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 000000000000..5096d9ae2edf
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,13157 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// Forward declarations.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                       SDValue V2);
+
+static SDValue Insert128BitVector(SDValue Result,
+                                  SDValue Vec,
+                                  SDValue Idx,
+                                  SelectionDAG &DAG,
+                                  DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+                                   SDValue Idx,
+                                   SelectionDAG &DAG,
+                                   DebugLoc dl);
+
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
+
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference.  Idx is an index in the 128 bits we
+/// want.  It need not be aligned to a 128-bit bounday.  That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+                                   SDValue Idx,
+                                   SelectionDAG &DAG,
+                                   DebugLoc dl) {
+  EVT VT = Vec.getValueType();
+  assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+  EVT ElVT = VT.getVectorElementType();
+
+  int Factor = VT.getSizeInBits() / 128;
+
+  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+                                  ElVT,
+                                  VT.getVectorNumElements() / Factor);
+
+  // Extract from UNDEF is UNDEF.
+  if (Vec.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+  if (isa<ConstantSDNode>(Idx)) {
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    // Extract the relevant 128 bits.  Generate an EXTRACT_SUBVECTOR
+    // we can match to VEXTRACTF128.
+    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+    // This is the index of the first element of the 128-bit chunk
+    // we want.
+    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+                                 * ElemsPerChunk);
+
+    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+    SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+                                 VecIdx);
+
+    return Result;
+  }
+
+  return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits.  This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference.  Idx is an index in the 128 bits
+/// we want.  It need not be aligned to a 128-bit bounday.  That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+                                  SDValue Vec,
+                                  SDValue Idx,
+                                  SelectionDAG &DAG,
+                                  DebugLoc dl) {
+  if (isa<ConstantSDNode>(Idx)) {
+    EVT VT = Vec.getValueType();
+    assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+    EVT ElVT = VT.getVectorElementType();
+
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    EVT ResultVT = Result.getValueType();
+
+    // Insert the relevant 128 bits.
+    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+    // This is the index of the first element of the 128-bit chunk
+    // we want.
+    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+                                 * ElemsPerChunk);
+
+    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+    Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+                         VecIdx);
+    return Result;
+  }
+
+  return SDValue();
+}
+
+/// Given two vectors, concat them.
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
+  DebugLoc dl = Lower.getDebugLoc();
+
+  assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
+
+  EVT VT = EVT::getVectorVT(*DAG.getContext(),
+                            Lower.getValueType().getVectorElementType(),
+                            Lower.getValueType().getVectorNumElements() * 2);
+
+  // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
+  assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
+
+  // Insert the upper subvector.
+  SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+                                   DAG.getConstant(
+                                     // This is half the length of the result
+                                     // vector.  Start inserting the upper 128
+                                     // bits here.
+                                     Lower.getValueType().getVectorNumElements(),
+                                     MVT::i32),
+                                   DAG, dl);
+
+  // Insert the lower subvector.
+  Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
+  return Vec;
+}
+
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  bool is64Bit = Subtarget->is64Bit();
+
+  if (Subtarget->isTargetEnvMacho()) {
+    if (is64Bit)
+      return new X8664_MachoTargetObjectFile();
+    return new TargetLoweringObjectFileMachO();
+  }
+
+  if (Subtarget->isTargetELF()) {
+    if (is64Bit)
+      return new X8664_ELFTargetObjectFile(TM);
+    return new X8632_ELFTargetObjectFile(TM);
+  }
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+    return new TargetLoweringObjectFileCOFF();
+  llvm_unreachable("unknown subtarget type");
+}
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
+  : TargetLowering(TM, createTLOF(TM)) {
+  Subtarget = &TM.getSubtarget<X86Subtarget>();
+  X86ScalarSSEf64 = Subtarget->hasXMMInt();
+  X86ScalarSSEf32 = Subtarget->hasXMM();
+  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
+  RegInfo = TM.getRegisterInfo();
+  TD = getTargetData();
+
+  // Set up the TargetLowering object.
+  static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
+
+  // X86 is weird, it always uses i8 for shift amounts and setcc results.
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // For 64-bit since we have so many registers use the ILP scheduler, for
+  // 32-bit code use the register pressure specific scheduling.
+  if (Subtarget->is64Bit())
+    setSchedulingPreference(Sched::ILP);
+  else
+    setSchedulingPreference(Sched::RegPressure);
+  setStackPointerRegisterToSaveRestore(X86StackPtr);
+
+  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+    // Setup Windows compiler runtime calls.
+    setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+    setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+    setLibcallName(RTLIB::SREM_I64, "_allrem");
+    setLibcallName(RTLIB::UREM_I64, "_aullrem");
+    setLibcallName(RTLIB::MUL_I64, "_allmul");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+  }
+
+  if (Subtarget->isTargetDarwin()) {
+    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+    setUseUnderscoreSetJmp(false);
+    setUseUnderscoreLongJmp(false);
+  } else if (Subtarget->isTargetMingw()) {
+    // MS runtime is weird: it exports _setjmp, but longjmp!
+    setUseUnderscoreSetJmp(true);
+    setUseUnderscoreLongJmp(false);
+  } else {
+    setUseUnderscoreSetJmp(true);
+    setUseUnderscoreLongJmp(true);
+  }
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
+  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+  if (Subtarget->is64Bit())
+    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  // We don't accept any truncstore of integer registers.
+  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+  setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
+
+  // SETOEQ and SETUNE require checking two conditions.
+  setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+
+  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+  // operation.
+  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
+  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
+  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
+
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
+  } else if (!UseSoftFloat) {
+    // We have an algorithm for SSE2->double, and we turn this into a
+    // 64-bit FILD followed by conditional FADD for other targets.
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
+    // We have an algorithm for SSE2, and we turn this into a 64-bit
+    // FILD for other targets.
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
+  }
+
+  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+  // this operation.
+  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
+  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
+
+  if (!UseSoftFloat) {
+    // SSE has no i16 to fp conversion, only i32
+    if (X86ScalarSSEf32) {
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
+      // f32 and f64 cases are Legal, f80 case is not
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
+    } else {
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
+    }
+  } else {
+    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
+    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
+  }
+
+  // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
+  // are Legal, f80 is custom lowered.
+  setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
+  setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
+
+  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+  // this operation.
+  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
+  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
+
+  if (X86ScalarSSEf32) {
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
+    // f32 and f64 cases are Legal, f80 case is not
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+  } else {
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+  }
+
+  // Handle FP_TO_UINT by promoting the destination to a larger signed
+  // conversion.
+  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
+  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
+  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
+
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
+    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
+  } else if (!UseSoftFloat) {
+    if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
+      // Expand FP_TO_UINT into a select.
+      // FIXME: We would like to use a Custom expander here eventually to do
+      // the optimal thing for SSE vs. the default expansion in the legalizer.
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
+    else
+      // With SSE3 we can use fisttpll to convert to a signed i64; without
+      // SSE, we're stuck with a fistpll.
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
+  }
+
+  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+  if (!X86ScalarSSEf64) {
+    setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
+    setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
+      // Without SSE, i64->f64 goes through memory.
+      setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
+    }
+  }
+
+  // Scalar integer divide and remainder are lowered to use operations that
+  // produce two results, to match the available instructions. This exposes
+  // the two-result form to trivial CSE, which is able to combine x/y and x%y
+  // into a single instruction.
+  //
+  // Scalar integer multiply-high is also lowered to use two-result
+  // operations, to match the available instructions. However, plain multiply
+  // (low) operations are left as Legal, as there are single-result
+  // instructions for this in x86. Using the two-result multiply instructions
+  // when both high and low results are needed must be arranged by dagcombine.
+  for (unsigned i = 0, e = 4; i != e; ++i) {
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::MULHS, VT, Expand);
+    setOperationAction(ISD::MULHU, VT, Expand);
+    setOperationAction(ISD::SDIV, VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+
+    // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+    setOperationAction(ISD::ADDC, VT, Custom);
+    setOperationAction(ISD::ADDE, VT, Custom);
+    setOperationAction(ISD::SUBC, VT, Custom);
+    setOperationAction(ISD::SUBE, VT, Custom);
+  }
+
+  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
+  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
+  if (Subtarget->is64Bit())
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
+  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
+  setOperationAction(ISD::FREM             , MVT::f32  , Expand);
+  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
+  setOperationAction(ISD::FREM             , MVT::f80  , Expand);
+  setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
+
+  setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
+  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
+  setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::CTTZ           , MVT::i64  , Custom);
+    setOperationAction(ISD::CTLZ           , MVT::i64  , Custom);
+  }
+
+  if (Subtarget->hasPOPCNT()) {
+    setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
+  } else {
+    setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
+    setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
+    setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
+    if (Subtarget->is64Bit())
+      setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
+  }
+
+  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
+  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
+
+  // These should be promoted to a larger select which is supported.
+  setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
+  // X86 wants to expand cmov itself.
+  setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
+  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
+    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
+  }
+  setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
+
+  // Darwin ABI issue.
+  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
+  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
+  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
+  if (Subtarget->is64Bit())
+    setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
+  setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
+    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
+    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
+    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
+    setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
+  }
+  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
+  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
+  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
+    setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
+    setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
+  }
+
+  if (Subtarget->hasXMM())
+    setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
+
+  // We may not have a libcall for MEMBARRIER so we should lower this.
+  setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);
+
+  // On X86 and X86-64, atomic operations are lowered to locked instructions.
+  // Locked instructions, in turn, have implicit fence semantics (all memory
+  // operations are flushed before issuing the locked instruction, and they
+  // are not buffered), so we can fold away the common pattern of
+  // fence-atomic-fence.
+  setShouldFoldAtomicFences(true);
+
+  // Expand certain atomics
+  for (unsigned i = 0, e = 4; i != e; ++i) {
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+  }
+
+  if (!Subtarget->is64Bit()) {
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+  }
+
+  // FIXME - use subtarget debug flags
+  if (!Subtarget->isTargetDarwin() &&
+      !Subtarget->isTargetELF() &&
+      !Subtarget->isTargetCygMing()) {
+    setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+  }
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
+  if (Subtarget->is64Bit()) {
+    setExceptionPointerRegister(X86::RAX);
+    setExceptionSelectorRegister(X86::RDX);
+  } else {
+    setExceptionPointerRegister(X86::EAX);
+    setExceptionSelectorRegister(X86::EDX);
+  }
+  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
+
+  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::VAARG           , MVT::Other, Custom);
+    setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
+  } else {
+    setOperationAction(ISD::VAARG           , MVT::Other, Expand);
+    setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
+  }
+
+  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                     (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
+                     (Subtarget->isTargetCOFF()
+                      && !Subtarget->isTargetEnvMacho()
+                      ? Custom : Expand));
+
+  if (!UseSoftFloat && X86ScalarSSEf64) {
+    // f32 and f64 use SSE.
+    // Set up the FP register classes.
+    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+
+    // Use ANDPD to simulate FABS.
+    setOperationAction(ISD::FABS , MVT::f64, Custom);
+    setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+    // Use XORP to simulate FNEG.
+    setOperationAction(ISD::FNEG , MVT::f64, Custom);
+    setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+    // Use ANDPD and ORPD to simulate FCOPYSIGN.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+    // Lower this to FGETSIGNx86 plus an AND.
+    setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
+    setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
+
+    // We don't support sin/cos/fmod
+    setOperationAction(ISD::FSIN , MVT::f64, Expand);
+    setOperationAction(ISD::FCOS , MVT::f64, Expand);
+    setOperationAction(ISD::FSIN , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+    // Expand FP immediates into loads from the stack, except for the special
+    // cases we handle.
+    addLegalFPImmediate(APFloat(+0.0)); // xorpd
+    addLegalFPImmediate(APFloat(+0.0f)); // xorps
+  } else if (!UseSoftFloat && X86ScalarSSEf32) {
+    // Use SSE for f32, x87 for f64.
+    // Set up the FP register classes.
+    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+    // Use ANDPS to simulate FABS.
+    setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+    // Use XORP to simulate FNEG.
+    setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
+
+    // Use ANDPS and ORPS to simulate FCOPYSIGN.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+    // We don't support sin/cos/fmod
+    setOperationAction(ISD::FSIN , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+    // Special cases we handle for FP constants.
+    addLegalFPImmediate(APFloat(+0.0f)); // xorps
+    addLegalFPImmediate(APFloat(+0.0)); // FLD0
+    addLegalFPImmediate(APFloat(+1.0)); // FLD1
+    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+    if (!UnsafeFPMath) {
+      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
+      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
+    }
+  } else if (!UseSoftFloat) {
+    // f32 and f64 in x87.
+    // Set up the FP register classes.
+    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+    addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+
+    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
+    setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+    if (!UnsafeFPMath) {
+      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
+      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
+    }
+    addLegalFPImmediate(APFloat(+0.0)); // FLD0
+    addLegalFPImmediate(APFloat(+1.0)); // FLD1
+    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+    addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+    addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+    addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+    addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+  }
+
+  // We don't support FMA.
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+  // Long double always uses X87.
+  if (!UseSoftFloat) {
+    addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+    setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+    {
+      APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
+      addLegalFPImmediate(TmpFlt);  // FLD0
+      TmpFlt.changeSign();
+      addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
+
+      bool ignored;
+      APFloat TmpFlt2(+1.0);
+      TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+                      &ignored);
+      addLegalFPImmediate(TmpFlt2);  // FLD1
+      TmpFlt2.changeSign();
+      addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
+    }
+
+    if (!UnsafeFPMath) {
+      setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
+      setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
+    }
+
+    setOperationAction(ISD::FMA, MVT::f80, Expand);
+  }
+
+  // Always use a library call for pow.
+  setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
+  setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
+  setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
+
+  setOperationAction(ISD::FLOG, MVT::f80, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f80, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f80, Expand);
+  setOperationAction(ISD::FEXP, MVT::f80, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+
+  // First set operation action for all vector types to either promote
+  // (for widening) or expand (for scalarization). Then we will selectively
+  // turn on ones that can be effectively codegen'd.
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+    setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::TRUNCATE,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ZERO_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ANY_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction((MVT::SimpleValueType)VT,
+                          (MVT::SimpleValueType)InnerVT, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+  }
+
+  // FIXME: In order to prevent SSE instructions being expanded to MMX ones
+  // with -msoft-float, disable use of MMX as well.
+  if (!UseSoftFloat && Subtarget->hasMMX()) {
+    addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+    // No operations on x86mmx supported, everything uses intrinsics.
+  }
+
+  // MMX-sized vectors (other than x86mmx) are expected to be expanded
+  // into smaller operations.
+  setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
+  setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
+  setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::AND,                MVT::v4i16, Expand);
+  setOperationAction(ISD::AND,                MVT::v2i32, Expand);
+  setOperationAction(ISD::AND,                MVT::v1i64, Expand);
+  setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
+  setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
+  setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
+  setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
+  setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
+  setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
+  setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
+  setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
+  setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
+
+  if (!UseSoftFloat && Subtarget->hasXMM()) {
+    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
+    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4f32, Custom);
+  }
+
+  if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+
+    // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+    // registers cannot be used even for integer operations.
+    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
+    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
+    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
+    setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
+    setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
+    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
+    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
+    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
+    setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
+    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
+    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
+
+    setOperationAction(ISD::VSETCC,             MVT::v2f64, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v16i8, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4i32, Custom);
+
+    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
+
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2f64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2i64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i8, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i16, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i32, Custom);
+
+    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
+      EVT VT = (MVT::SimpleValueType)i;
+      // Do not attempt to custom lower non-power-of-2 vectors
+      if (!isPowerOf2_32(VT.getVectorNumElements()))
+        continue;
+      // Do not attempt to custom lower non-128-bit vectors
+      if (!VT.is128BitVector())
+        continue;
+      setOperationAction(ISD::BUILD_VECTOR,
+                         VT.getSimpleVT().SimpleTy, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,
+                         VT.getSimpleVT().SimpleTy, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT,
+                         VT.getSimpleVT().SimpleTy, Custom);
+    }
+
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+    }
+
+    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
+
+      // Do not attempt to promote non-128-bit vectors
+      if (!VT.is128BitVector())
+        continue;
+
+      setOperationAction(ISD::AND,    SVT, Promote);
+      AddPromotedToType (ISD::AND,    SVT, MVT::v2i64);
+      setOperationAction(ISD::OR,     SVT, Promote);
+      AddPromotedToType (ISD::OR,     SVT, MVT::v2i64);
+      setOperationAction(ISD::XOR,    SVT, Promote);
+      AddPromotedToType (ISD::XOR,    SVT, MVT::v2i64);
+      setOperationAction(ISD::LOAD,   SVT, Promote);
+      AddPromotedToType (ISD::LOAD,   SVT, MVT::v2i64);
+      setOperationAction(ISD::SELECT, SVT, Promote);
+      AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+    }
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+    // Custom lower v2i64 and v2f64 selects.
+    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
+    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
+  }
+
+  if (Subtarget->hasSSE41()) {
+    setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
+    setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
+    setOperationAction(ISD::FRINT,              MVT::f32,   Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::f32,   Legal);
+    setOperationAction(ISD::FFLOOR,             MVT::f64,   Legal);
+    setOperationAction(ISD::FCEIL,              MVT::f64,   Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::f64,   Legal);
+    setOperationAction(ISD::FRINT,              MVT::f64,   Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);
+
+    // FIXME: Do we need to handle scalar-to-vector here?
+    setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
+
+    // Can turn SHL into an integer multiply.
+    setOperationAction(ISD::SHL,                MVT::v4i32, Custom);
+    setOperationAction(ISD::SHL,                MVT::v16i8, Custom);
+
+    // i8 and i16 vectors are custom , because the source register and source
+    // source memory operand types are not the same width.  f32 vectors are
+    // custom since the immediate controlling the insert encodes additional
+    // information.
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Legal);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+    }
+  }
+
+  if (Subtarget->hasSSE2()) {
+    setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
+    setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
+    setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
+
+    setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
+    setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
+    setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
+
+    setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
+    setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
+  }
+
+  if (Subtarget->hasSSE42())
+    setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
+
+  if (!UseSoftFloat && Subtarget->hasAVX()) {
+    addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+
+    setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
+
+    setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
+
+    setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
+
+    // Custom lower several nodes for 256-bit types.
+    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+                  i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
+
+      // Extract subvector is special because the value type
+      // (result) is 128-bit but the source is 256-bit wide.
+      if (VT.is128BitVector())
+        setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+
+      // Do not attempt to custom lower other non-256-bit vectors
+      if (!VT.is256BitVector())
+        continue;
+
+      setOperationAction(ISD::BUILD_VECTOR,       SVT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     SVT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  SVT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
+      setOperationAction(ISD::SCALAR_TO_VECTOR,   SVT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR,   SVT, Custom);
+    }
+
+    // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+    for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
+
+      // Do not attempt to promote non-256-bit vectors
+      if (!VT.is256BitVector())
+        continue;
+
+      setOperationAction(ISD::AND,    SVT, Promote);
+      AddPromotedToType (ISD::AND,    SVT, MVT::v4i64);
+      setOperationAction(ISD::OR,     SVT, Promote);
+      AddPromotedToType (ISD::OR,     SVT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    SVT, Promote);
+      AddPromotedToType (ISD::XOR,    SVT, MVT::v4i64);
+      setOperationAction(ISD::LOAD,   SVT, Promote);
+      AddPromotedToType (ISD::LOAD,   SVT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, SVT, Promote);
+      AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+    }
+  }
+
+  // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+  // of this type with custom code.
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+  }
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+
+  // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
+  // handle type legalization for these operations here.
+  //
+  // FIXME: We really should do custom legalization for addition and
+  // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
+  // than generic legalization for 64-bit multiplication-with-overflow, though.
+  for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+    // Add/Sub/Mul with overflow operations are custom lowered.
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::SADDO, VT, Custom);
+    setOperationAction(ISD::UADDO, VT, Custom);
+    setOperationAction(ISD::SSUBO, VT, Custom);
+    setOperationAction(ISD::USUBO, VT, Custom);
+    setOperationAction(ISD::SMULO, VT, Custom);
+    setOperationAction(ISD::UMULO, VT, Custom);
+  }
+
+  // There are no 8-bit 3-address imul/mul instructions
+  setOperationAction(ISD::SMULO, MVT::i8, Expand);
+  setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
+  if (!Subtarget->is64Bit()) {
+    // These libcalls are not available in 32-bit.
+    setLibcallName(RTLIB::SHL_I128, 0);
+    setLibcallName(RTLIB::SRL_I128, 0);
+    setLibcallName(RTLIB::SRA_I128, 0);
+  }
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::BUILD_VECTOR);
+  setTargetDAGCombine(ISD::SELECT);
+  setTargetDAGCombine(ISD::SHL);
+  setTargetDAGCombine(ISD::SRA);
+  setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::OR);
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::SUB);
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::SINT_TO_FP);
+  if (Subtarget->is64Bit())
+    setTargetDAGCombine(ISD::MUL);
+
+  computeRegisterProperties();
+
+  // On Darwin, -Os means optimize for size without hurting performance,
+  // do not reduce the limit.
+  maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+  maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+  maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+  maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  setPrefLoopAlignment(16);
+  benefitFromCodePlacementOpt = true;
+
+  setPrefFunctionAlignment(4);
+}
+
+
+MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i8;
+}
+
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+  if (MaxAlign == 16)
+    return;
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    if (VTy->getBitWidth() == 128)
+      MaxAlign = 16;
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    unsigned EltAlign = 0;
+    getMaxByValAlign(ATy->getElementType(), EltAlign);
+    if (EltAlign > MaxAlign)
+      MaxAlign = EltAlign;
+  } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      unsigned EltAlign = 0;
+      getMaxByValAlign(STy->getElementType(i), EltAlign);
+      if (EltAlign > MaxAlign)
+        MaxAlign = EltAlign;
+      if (MaxAlign == 16)
+        break;
+    }
+  }
+  return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+  if (Subtarget->is64Bit()) {
+    // Max of 8 and alignment of type.
+    unsigned TyAlign = TD->getABITypeAlignment(Ty);
+    if (TyAlign > 8)
+      return TyAlign;
+    return 8;
+  }
+
+  unsigned Align = 4;
+  if (Subtarget->hasXMM())
+    getMaxByValAlign(Ty, Align);
+  return Align;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+                                       unsigned DstAlign, unsigned SrcAlign,
+                                       bool NonScalarIntSafe,
+                                       bool MemcpyStrSrc,
+                                       MachineFunction &MF) const {
+  // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+  // linux.  This is because the stack realignment code can't handle certain
+  // cases like PR2962.  This should be removed when PR2962 is fixed.
+  const Function *F = MF.getFunction();
+  if (NonScalarIntSafe &&
+      !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+    if (Size >= 16 &&
+        (Subtarget->isUnalignedMemAccessFast() ||
+         ((DstAlign == 0 || DstAlign >= 16) &&
+          (SrcAlign == 0 || SrcAlign >= 16))) &&
+        Subtarget->getStackAlignment() >= 16) {
+      if (Subtarget->hasSSE2())
+        return MVT::v4i32;
+      if (Subtarget->hasSSE1())
+        return MVT::v4f32;
+    } else if (!MemcpyStrSrc && Size >= 8 &&
+               !Subtarget->is64Bit() &&
+               Subtarget->getStackAlignment() >= 8 &&
+               Subtarget->hasXMMInt()) {
+      // Do not use f64 to lower memcpy if source is string constant. It's
+      // better to use i32 to avoid the loads.
+      return MVT::f64;
+    }
+  }
+  if (Subtarget->is64Bit() && Size >= 8)
+    return MVT::i64;
+  return MVT::i32;
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function.  The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned X86TargetLowering::getJumpTableEncoding() const {
+  // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
+  // symbol.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+      Subtarget->isPICStyleGOT())
+    return MachineJumpTableInfo::EK_Custom32;
+
+  // Otherwise, use the normal jump table encoding heuristics.
+  return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *
+X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                             const MachineBasicBlock *MBB,
+                                             unsigned uid,MCContext &Ctx) const{
+  assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+         Subtarget->isPICStyleGOT());
+  // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
+  // entries.
+  return MCSymbolRefExpr::Create(MBB->getSymbol(),
+                                 MCSymbolRefExpr::VK_GOTOFF, Ctx);
+}
+
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                    SelectionDAG &DAG) const {
+  if (!Subtarget->is64Bit())
+    // This doesn't have DebugLoc associated with it, but is not really the
+    // same as a Register.
+    return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
+  return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *X86TargetLowering::
+getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
+                             MCContext &Ctx) const {
+  // X86-64 uses RIP relative addressing based on the jump table label.
+  if (Subtarget->isPICStyleRIPRel())
+    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+  // Otherwise, the reference is relative to the PIC base.
+  return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+}
+
+// FIXME: Why this routine is here? Move to RegInfo!
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(EVT VT) const{
+  const TargetRegisterClass *RRC = 0;
+  uint8_t Cost = 1;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return TargetLowering::findRepresentativeClass(VT);
+  case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+    RRC = (Subtarget->is64Bit()
+           ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+    break;
+  case MVT::x86mmx:
+    RRC = X86::VR64RegisterClass;
+    break;
+  case MVT::f32: case MVT::f64:
+  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+  case MVT::v4f32: case MVT::v2f64:
+  case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+  case MVT::v4f64:
+    RRC = X86::VR128RegisterClass;
+    break;
+  }
+  return std::make_pair(RRC, Cost);
+}
+
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+                                               unsigned &Offset) const {
+  if (!Subtarget->isTargetLinux())
+    return false;
+
+  if (Subtarget->is64Bit()) {
+    // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+    Offset = 0x28;
+    if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+      AddressSpace = 256;
+    else
+      AddressSpace = 257;
+  } else {
+    // %gs:0x14 on i386
+    Offset = 0x14;
+    AddressSpace = 256;
+  }
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+bool
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+				  MachineFunction &MF, bool isVarArg,
+                        const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+                 RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC_X86);
+}
+
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+  // Add the regs to the liveout set for the function.
+  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+  for (unsigned i = 0; i != RVLocs.size(); ++i)
+    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
+      MRI.addLiveOut(RVLocs[i].getLocReg());
+
+  SDValue Flag;
+
+  SmallVector<SDValue, 6> RetOps;
+  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+  // Operand #1 = Bytes To Pop
+  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+                   MVT::i16));
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue ValToCopy = OutVals[i];
+    EVT ValVT = ValToCopy.getValueType();
+
+    // If this is x86-64, and we disabled SSE, we can't return FP values,
+    // or SSE or MMX vectors.
+    if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+         VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+          (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
+      report_fatal_error("SSE register return with SSE disabled");
+    }
+    // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
+    // llvm-gcc has never done it right and no one has noticed, so this
+    // should be OK for now.
+    if (ValVT == MVT::f64 &&
+        (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
+      report_fatal_error("SSE2 register return with SSE2 disabled");
+
+    // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+    // the RET instruction and handled by the FP Stackifier.
+    if (VA.getLocReg() == X86::ST0 ||
+        VA.getLocReg() == X86::ST1) {
+      // If this is a copy from an xmm register to ST(0), use an FPExtend to
+      // change the value to the FP stack register class.
+      if (isScalarFPTypeInSSEReg(VA.getValVT()))
+        ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+      RetOps.push_back(ValToCopy);
+      // Don't emit a copytoreg.
+      continue;
+    }
+
+    // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+    // which is returned in RAX / RDX.
+    if (Subtarget->is64Bit()) {
+      if (ValVT == MVT::x86mmx) {
+        if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+          ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
+          ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+                                  ValToCopy);
+          // If we don't have SSE2 available, convert to v4f32 so the generated
+          // register is legal.
+          if (!Subtarget->hasSSE2())
+            ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
+        }
+      }
+    }
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  // The x86-64 ABI for returning structs by value requires that we copy
+  // the sret argument into %rax for the return. We saved the argument into
+  // a virtual register in the entry block, so now we copy the value out
+  // and into %rax.
+  if (Subtarget->is64Bit() &&
+      DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+    unsigned Reg = FuncInfo->getSRetReturnReg();
+    assert(Reg &&
+           "SRetReturnReg should have been set in LowerFormalArguments().");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+    Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+    Flag = Chain.getValue(1);
+
+    // RAX now acts like a return value.
+    MRI.addLiveOut(X86::RAX);
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(X86ISD::RET_FLAG, dl,
+                     MVT::Other, &RetOps[0], RetOps.size());
+}
+
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  SDNode *Copy = *N->use_begin();
+  if (Copy->getOpcode() != ISD::CopyToReg &&
+      Copy->getOpcode() != ISD::FP_EXTEND)
+    return false;
+
+  bool HasRet = false;
+  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+       UI != UE; ++UI) {
+    if (UI->getOpcode() != X86ISD::RET_FLAG)
+      return false;
+    HasRet = true;
+  }
+
+  return HasRet;
+}
+
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+                                            ISD::NodeType ExtendKind) const {
+  MVT ReturnMVT;
+  // TODO: Is this also valid on 32-bit?
+  if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+    ReturnMVT = MVT::i8;
+  else
+    ReturnMVT = MVT::i32;
+
+  EVT MinVT = getRegisterType(Context, ReturnMVT);
+  return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  bool Is64Bit = Subtarget->is64Bit();
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    EVT CopyVT = VA.getValVT();
+
+    // If this is x86-64, and we disabled SSE, we can't return FP values
+    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
+      report_fatal_error("SSE register return with SSE disabled");
+    }
+
+    SDValue Val;
+
+    // If this is a call to a function that returns an fp value on the floating
+    // point stack, we must guarantee the the value is popped from the stack, so
+    // a CopyFromReg is not good enough - the copy instruction may be eliminated
+    // if the return value is not used. We use the FpPOP_RETVAL instruction
+    // instead.
+    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+      // If we prefer to use the value in xmm registers, copy it out as f80 and
+      // use a truncate to move it from fp stack reg to xmm reg.
+      if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+      SDValue Ops[] = { Chain, InFlag };
+      Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
+                                         MVT::Other, MVT::Glue, Ops, 2), 1);
+      Val = Chain.getValue(0);
+
+      // Round the f80 to the right size, which also moves it to the appropriate
+      // xmm register.
+      if (CopyVT != VA.getValVT())
+        Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+                          // This truncation won't change the value.
+                          DAG.getIntPtrConstant(1));
+    } else {
+      Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                                 CopyVT, InFlag).getValue(1);
+      Val = Chain.getValue(0);
+    }
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+//  StdCall calling convention seems to be standard for many Windows' API
+//  routines and around. It differs from C calling convention just a little:
+//  callee should clean up the stack, not caller. Symbols should be also
+//  decorated in some fancy way :) It doesn't support any vector arguments.
+//  For info on fast calling convention see Fast Calling Convention (tail call)
+//  implementation LowerX86_32FastCCCallTo.
+
+/// CallIsStructReturn - Determines whether a call uses struct return
+/// semantics.
+static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  if (Outs.empty())
+    return false;
+
+  return Outs[0].Flags.isSRet();
+}
+
+/// ArgsAreStructReturn - Determines whether a function uses struct
+/// return semantics.
+static bool
+ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+  if (Ins.empty())
+    return false;
+
+  return Ins[0].Flags.isSRet();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*isVolatile*/false, /*AlwaysInline=*/true,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
+/// IsTailCallConvention - Return true if the calling convention is one that
+/// supports tail call optimization.
+static bool IsTailCallConvention(CallingConv::ID CC) {
+  return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+}
+
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  if (!CI->isTailCall())
+    return false;
+
+  CallSite CS(CI);
+  CallingConv::ID CalleeCC = CS.getCallingConv();
+  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+    return false;
+
+  return true;
+}
+
+/// FuncIsMadeTailCallSafe - Return true if the function is being made into
+/// a tailcall target by changing its ABI.
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+  return GuaranteedTailCallOpt && IsTailCallConvention(CC);
+}
+
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    MachineFrameInfo *MFI,
+                                    unsigned i) const {
+  // Create the nodes corresponding to a load from this parameter slot.
+  ISD::ArgFlagsTy Flags = Ins[i].Flags;
+  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+  EVT ValVT;
+
+  // If value is passed by pointer we have address passed instead of the value
+  // itself.
+  if (VA.getLocInfo() == CCValAssign::Indirect)
+    ValVT = VA.getLocVT();
+  else
+    ValVT = VA.getValVT();
+
+  // FIXME: For now, all byval parameter objects are marked mutable. This can be
+  // changed with more analysis.
+  // In case of tail call optimization mark all arguments mutable. Since they
+  // could be overwritten by lowering of arguments in case of a tail call.
+  if (Flags.isByVal()) {
+    unsigned Bytes = Flags.getByValSize();
+    if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+    int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+    return DAG.getFrameIndex(FI, getPointerTy());
+  } else {
+    int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+                                    VA.getLocMemOffset(), isImmutable);
+    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+    return DAG.getLoad(ValVT, dl, Chain, FIN,
+                       MachinePointerInfo::getFixedStack(FI),
+                       false, false, 0);
+  }
+}
+
+SDValue
+X86TargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv,
+                                        bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                        DebugLoc dl,
+                                        SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+  const Function* Fn = MF.getFunction();
+  if (Fn->hasExternalLinkage() &&
+      Subtarget->isTargetCygMing() &&
+      Fn->getName() == "main")
+    FuncInfo->setForceFramePointer(true);
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool Is64Bit = Subtarget->is64Bit();
+  bool IsWin64 = Subtarget->isTargetWin64();
+
+  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+         "Var args not supported with calling convention fastcc or ghc");
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  // Allocate shadow area for Win64
+  if (IsWin64) {
+    CCInfo.AllocateStack(32, 8);
+  }
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
+
+  unsigned LastVal = ~0U;
+  SDValue ArgValue;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+    // places.
+    assert(VA.getValNo() != LastVal &&
+           "Don't support value assigned to multiple locs yet");
+    LastVal = VA.getValNo();
+
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      TargetRegisterClass *RC = NULL;
+      if (RegVT == MVT::i32)
+        RC = X86::GR32RegisterClass;
+      else if (Is64Bit && RegVT == MVT::i64)
+        RC = X86::GR64RegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = X86::FR32RegisterClass;
+      else if (RegVT == MVT::f64)
+        RC = X86::FR64RegisterClass;
+      else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+        RC = X86::VR256RegisterClass;
+      else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+        RC = X86::VR128RegisterClass;
+      else if (RegVT == MVT::x86mmx)
+        RC = X86::VR64RegisterClass;
+      else
+        llvm_unreachable("Unknown argument type!");
+
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it is really passed promoted to 32
+      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
+      // right size.
+      if (VA.getLocInfo() == CCValAssign::SExt)
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::ZExt)
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::BCvt)
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+
+      if (VA.isExtInLoc()) {
+        // Handle MMX values passed in XMM regs.
+        if (RegVT.isVector()) {
+          ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+                                 ArgValue);
+        } else
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+    } else {
+      assert(VA.isMemLoc());
+      ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
+    }
+
+    // If value is passed via pointer - do a load.
+    if (VA.getLocInfo() == CCValAssign::Indirect)
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+                             MachinePointerInfo(), false, false, 0);
+
+    InVals.push_back(ArgValue);
+  }
+
+  // The x86-64 ABI for returning structs by value requires that we copy
+  // the sret argument into %rax for the return. Save the argument into
+  // a virtual register so that we can access it from the return points.
+  if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+    unsigned Reg = FuncInfo->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+      FuncInfo->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+  }
+
+  unsigned StackSize = CCInfo.getNextStackOffset();
+  // Align stack specially for tail calls.
+  if (FuncIsMadeTailCallSafe(CallConv))
+    StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+                    CallConv != CallingConv::X86_ThisCall)) {
+      FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
+    }
+    if (Is64Bit) {
+      unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+      // FIXME: We should really autogenerate these arrays
+      static const unsigned GPR64ArgRegsWin64[] = {
+        X86::RCX, X86::RDX, X86::R8,  X86::R9
+      };
+      static const unsigned GPR64ArgRegs64Bit[] = {
+        X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+      };
+      static const unsigned XMMArgRegs64Bit[] = {
+        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+        X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+      };
+      const unsigned *GPR64ArgRegs;
+      unsigned NumXMMRegs = 0;
+
+      if (IsWin64) {
+        // The XMM registers which might contain var arg parameters are shadowed
+        // in their paired GPR.  So we only need to save the GPR to their home
+        // slots.
+        TotalNumIntRegs = 4;
+        GPR64ArgRegs = GPR64ArgRegsWin64;
+      } else {
+        TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+        GPR64ArgRegs = GPR64ArgRegs64Bit;
+
+        NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
+      }
+      unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+                                                       TotalNumIntRegs);
+
+      bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+      assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
+             "SSE register cannot be used when SSE is disabled!");
+      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+             "SSE register cannot be used when SSE is disabled!");
+      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+        // Kernel mode asks for SSE to be disabled, so don't push them
+        // on the stack.
+        TotalNumXMMRegs = 0;
+
+      if (IsWin64) {
+        const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+        // Get to the caller-allocated home save location.  Add 8 to account
+        // for the return address.
+        int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+        // Fixup to set vararg frame on shadow area (4 x i64).
+        if (NumIntRegs < 4)
+          FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+      } else {
+        // For X86-64, if there are vararg parameters that are passed via
+        // registers, then we must store them to their spots on the stack so they
+        // may be loaded by deferencing the result of va_next.
+        FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+        FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+                               false));
+      }
+
+      // Store the integer parameter registers.
+      SmallVector<SDValue, 8> MemOps;
+      SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                        getPointerTy());
+      unsigned Offset = FuncInfo->getVarArgsGPOffset();
+      for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+        SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+                                  DAG.getIntPtrConstant(Offset));
+        unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
+                                     X86::GR64RegisterClass);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+        SDValue Store =
+          DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                       MachinePointerInfo::getFixedStack(
+                         FuncInfo->getRegSaveFrameIndex(), Offset),
+                       false, false, 0);
+        MemOps.push_back(Store);
+        Offset += 8;
+      }
+
+      if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+        // Now store the XMM (fp + vector) parameter registers.
+        SmallVector<SDValue, 11> SaveXMMOps;
+        SaveXMMOps.push_back(Chain);
+
+        unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+        SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+        SaveXMMOps.push_back(ALVal);
+
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getRegSaveFrameIndex()));
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getVarArgsFPOffset()));
+
+        for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+          unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+                                       X86::VR128RegisterClass);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
+          SaveXMMOps.push_back(Val);
+        }
+        MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+                                     MVT::Other,
+                                     &SaveXMMOps[0], SaveXMMOps.size()));
+      }
+
+      if (!MemOps.empty())
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
+    }
+  }
+
+  // Some CCs need callee pop.
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+    FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+  } else {
+    FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
+    // If this is an sret function, the return should pop the hidden pointer.
+    if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
+      FuncInfo->setBytesToPopOnReturn(4);
+  }
+
+  if (!Is64Bit) {
+    // RegSaveFrameIndex is X86-64 only.
+    FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+    if (CallConv == CallingConv::X86_FastCall ||
+        CallConv == CallingConv::X86_ThisCall)
+      // fastcc functions can't have varargs.
+      FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
+  }
+
+  return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
+                                    SDValue StackPtr, SDValue Arg,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    ISD::ArgFlagsTy Flags) const {
+  unsigned LocMemOffset = VA.getLocMemOffset();
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+  if (Flags.isByVal())
+    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+  return DAG.getStore(Chain, dl, Arg, PtrOff,
+                      MachinePointerInfo::getStack(LocMemOffset),
+                      false, false, 0);
+}
+
+/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+                                           SDValue &OutRetAddr, SDValue Chain,
+                                           bool IsTailCall, bool Is64Bit,
+                                           int FPDiff, DebugLoc dl) const {
+  // Adjust the Return address stack slot.
+  EVT VT = getPointerTy();
+  OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+  // Load the "old" Return address.
+  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+                           false, false, 0);
+  return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+                         SDValue Chain, SDValue RetAddrFrIdx,
+                         bool Is64Bit, int FPDiff, DebugLoc dl) {
+  // Store the return address to the appropriate stack slot.
+  if (!FPDiff) return Chain;
+  // Calculate the new stack slot for the return address.
+  int SlotSize = Is64Bit ? 8 : 4;
+  int NewReturnAddrFI =
+    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
+  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+  Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+                       MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+                       false, false, 0);
+  return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool Is64Bit        = Subtarget->is64Bit();
+  bool IsWin64        = Subtarget->isTargetWin64();
+  bool IsStructRet    = CallIsStructReturn(Outs);
+  bool IsSibcall      = false;
+
+  if (isTailCall) {
+    // Check if it's really possible to do a tail call.
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                                                   Outs, OutVals, Ins, DAG);
+
+    // Sibcalls are automatically detected tailcalls which do not require
+    // ABI changes.
+    if (!GuaranteedTailCallOpt && isTailCall)
+      IsSibcall = true;
+
+    if (isTailCall)
+      ++NumTailCalls;
+  }
+
+  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+         "Var args not supported with calling convention fastcc or ghc");
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  // Allocate shadow area for Win64
+  if (IsWin64) {
+    CCInfo.AllocateStack(32, 8);
+  }
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  if (IsSibcall)
+    // This is a sibcall. The memory operands are available in caller's
+    // own caller's stack.
+    NumBytes = 0;
+  else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+    NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+  int FPDiff = 0;
+  if (isTailCall && !IsSibcall) {
+    // Lower arguments at fp - stackoffset + fpdiff.
+    unsigned NumBytesCallerPushed =
+      MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+    FPDiff = NumBytesCallerPushed - NumBytes;
+
+    // Set the delta of movement of the returnaddr stackslot.
+    // But only set if delta is greater than previous delta.
+    if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+      MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+  }
+
+  if (!IsSibcall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SDValue RetAddrFrIdx;
+  // Load return address for tail calls.
+  if (isTailCall && FPDiff)
+    Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+                                    Is64Bit, FPDiff, dl);
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.  In the case
+  // of tail call optimization arguments are handle later.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    EVT RegVT = VA.getLocVT();
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    bool isByVal = Flags.isByVal();
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::AExt:
+      if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+        // Special case: passing MMX values in XMM registers.
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+        Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+        Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+      } else
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
+      break;
+    case CCValAssign::Indirect: {
+      // Store the argument.
+      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+                           MachinePointerInfo::getFixedStack(FI),
+                           false, false, 0);
+      Arg = SpillSlot;
+      break;
+    }
+    }
+
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      if (isVarArg && IsWin64) {
+        // Win64 ABI requires argument XMM reg to be copied to the corresponding
+        // shadow reg if callee is a varargs function.
+        unsigned ShadowReg = 0;
+        switch (VA.getLocReg()) {
+        case X86::XMM0: ShadowReg = X86::RCX; break;
+        case X86::XMM1: ShadowReg = X86::RDX; break;
+        case X86::XMM2: ShadowReg = X86::R8; break;
+        case X86::XMM3: ShadowReg = X86::R9; break;
+        }
+        if (ShadowReg)
+          RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+      }
+    } else if (!IsSibcall && (!isTailCall || isByVal)) {
+      assert(VA.isMemLoc());
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                             dl, DAG, VA, Flags));
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into registers.
+  SDValue InFlag;
+  // Tail call byval lowering might overwrite argument registers so in case of
+  // tail call optimization the copies to registers are lowered later.
+  if (!isTailCall)
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+
+  if (Subtarget->isPICStyleGOT()) {
+    // ELF / PIC requires GOT in the EBX register before function calls via PLT
+    // GOT pointer.
+    if (!isTailCall) {
+      Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+                               DAG.getNode(X86ISD::GlobalBaseReg,
+                                           DebugLoc(), getPointerTy()),
+                               InFlag);
+      InFlag = Chain.getValue(1);
+    } else {
+      // If we are tail calling and generating PIC/GOT style code load the
+      // address of the callee into ECX. The value in ecx is used as target of
+      // the tail jump. This is done to circumvent the ebx/callee-saved problem
+      // for tail calls on PIC/GOT architectures. Normally we would just put the
+      // address of GOT into ebx and then call target@PLT. But for tail calls
+      // ebx would be restored (since ebx is callee saved) before jumping to the
+      // target@PLT.
+
+      // Note: The actual moving to ECX is done further down.
+      GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+      if (G && !G->getGlobal()->hasHiddenVisibility() &&
+          !G->getGlobal()->hasProtectedVisibility())
+        Callee = LowerGlobalAddress(Callee, DAG);
+      else if (isa<ExternalSymbolSDNode>(Callee))
+        Callee = LowerExternalSymbol(Callee, DAG);
+    }
+  }
+
+  if (Is64Bit && isVarArg && !IsWin64) {
+    // From AMD64 ABI document:
+    // For calls that may call functions that use varargs or stdargs
+    // (prototype-less calls or calls to functions containing ellipsis (...) in
+    // the declaration) %al is used as hidden argument to specify the number
+    // of SSE registers used. The contents of %al do not need to match exactly
+    // the number of registers, but must be an ubound on the number of SSE
+    // registers used and is in the range 0 - 8 inclusive.
+
+    // Count the number of XMM registers allocated.
+    static const unsigned XMMArgRegs[] = {
+      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+    };
+    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+    assert((Subtarget->hasXMM() || !NumXMMRegs)
+           && "SSE registers cannot be used when SSE is disabled");
+
+    Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
+                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+
+  // For tail calls lower the arguments to the 'real' stack slot.
+  if (isTailCall) {
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+    SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
+    SmallVector<SDValue, 8> MemOpChains2;
+    SDValue FIN;
+    int FI = 0;
+    // Do not flag preceding copytoreg stuff together with the following stuff.
+    InFlag = SDValue();
+    if (GuaranteedTailCallOpt) {
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        if (VA.isRegLoc())
+          continue;
+        assert(VA.isMemLoc());
+        SDValue Arg = OutVals[i];
+        ISD::ArgFlagsTy Flags = Outs[i].Flags;
+        // Create frame index.
+        int32_t Offset = VA.getLocMemOffset()+FPDiff;
+        uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+        FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+        FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+        if (Flags.isByVal()) {
+          // Copy relative to framepointer.
+          SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+          if (StackPtr.getNode() == 0)
+            StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+                                          getPointerTy());
+          Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+          MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+                                                           ArgChain,
+                                                           Flags, DAG, dl));
+        } else {
+          // Store relative to framepointer.
+          MemOpChains2.push_back(
+            DAG.getStore(ArgChain, dl, Arg, FIN,
+                         MachinePointerInfo::getFixedStack(FI),
+                         false, false, 0));
+        }
+      }
+    }
+
+    if (!MemOpChains2.empty())
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOpChains2[0], MemOpChains2.size());
+
+    // Copy arguments to their registers.
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+    InFlag =SDValue();
+
+    // Store the return address to the appropriate stack slot.
+    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+                                     FPDiff, dl);
+  }
+
+  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+    assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+    // In the 64-bit large code model, we have to make all calls
+    // through a register, since the call instruction's 32-bit
+    // pc-relative offset may not be large enough to hold the whole
+    // address.
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    // If the callee is a GlobalAddress node (quite common, every direct call
+    // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+    // it.
+
+    // We should use extra load for direct calls to dllimported functions in
+    // non-JIT mode.
+    const GlobalValue *GV = G->getGlobal();
+    if (!GV->hasDLLImportLinkage()) {
+      unsigned char OpFlags = 0;
+      bool ExtraLoad = false;
+      unsigned WrapperKind = ISD::DELETED_NODE;
+
+      // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+      // external symbols most go through the PLT in PIC mode.  If the symbol
+      // has hidden or protected visibility, or if it is static or local, then
+      // we don't need to use the PLT - we can directly call it.
+      if (Subtarget->isTargetELF() &&
+          getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+          GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+        OpFlags = X86II::MO_PLT;
+      } else if (Subtarget->isPICStyleStubAny() &&
+                 (GV->isDeclaration() || GV->isWeakForLinker()) &&
+                 (!Subtarget->getTargetTriple().isMacOSX() ||
+                  Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = X86II::MO_DARWIN_STUB;
+      } else if (Subtarget->isPICStyleRIPRel() &&
+                 isa<Function>(GV) &&
+                 cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+        // If the function is marked as non-lazy, generate an indirect call
+        // which loads from the GOT directly. This avoids runtime overhead
+        // at the cost of eager binding (and one extra byte of encoding).
+        OpFlags = X86II::MO_GOTPCREL;
+        WrapperKind = X86ISD::WrapperRIP;
+        ExtraLoad = true;
+      }
+
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+                                          G->getOffset(), OpFlags);
+
+      // Add a wrapper if needed.
+      if (WrapperKind != ISD::DELETED_NODE)
+        Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+      // Add extra indirection if needed.
+      if (ExtraLoad)
+        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+                             MachinePointerInfo::getGOT(),
+                             false, false, 0);
+    }
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    unsigned char OpFlags = 0;
+
+    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+    // external symbols should go through the PLT.
+    if (Subtarget->isTargetELF() &&
+        getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+      OpFlags = X86II::MO_PLT;
+    } else if (Subtarget->isPICStyleStubAny() &&
+               (!Subtarget->getTargetTriple().isMacOSX() ||
+                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = X86II::MO_DARWIN_STUB;
+    }
+
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+                                         OpFlags);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+
+  if (!IsSibcall && isTailCall) {
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                           DAG.getIntPtrConstant(0, true), InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Add an implicit use GOT pointer in EBX.
+  if (!isTailCall && Subtarget->isPICStyleGOT())
+    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+  // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+  if (Is64Bit && isVarArg && !IsWin64)
+    Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  if (isTailCall) {
+    // We used to do:
+    //// If this is the first return lowered for this function, add the regs
+    //// to the liveout set for the function.
+    // This isn't right, although it's probably harmless on x86; liveouts
+    // should be computed from returns not tail calls.  Consider a void
+    // function making a tail call to a function returning int.
+    return DAG.getNode(X86ISD::TC_RETURN, dl,
+                       NodeTys, &Ops[0], Ops.size());
+  }
+
+  Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  unsigned NumBytesForCalleeToPush;
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+    NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
+  else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
+    // If this is a call to a struct-return function, the callee
+    // pops the hidden struct pointer, so we have to push it back.
+    // This is common for Darwin/X86, Linux & Mingw32 targets.
+    NumBytesForCalleeToPush = 4;
+  else
+    NumBytesForCalleeToPush = 0;  // Callee pops nothing.
+
+  // Returns a flag for retval copy to use.
+  if (!IsSibcall) {
+    Chain = DAG.getCALLSEQ_END(Chain,
+                               DAG.getIntPtrConstant(NumBytes, true),
+                               DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+                                                     true),
+                               InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+//  Like std call, callee cleans arguments, convention except that ECX is
+//  reserved for storing the tail called function address. Only 2 registers are
+//  free for argument passing (inreg). Tail call optimization is performed
+//  provided:
+//                * tailcallopt is enabled
+//                * caller/callee are fastcc
+//  On X86_64 architecture with GOT-style position independent code only local
+//  (within module) calls are supported at the moment.
+//  To keep the stack aligned according to platform abi the function
+//  GetAlignedArgumentStackSize ensures that argument delta is always multiples
+//  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+//  If a tail called function callee has more arguments than the caller the
+//  caller needs to make sure that there is room to move the RETADDR to. This is
+//  achieved by reserving an area the size of the argument delta right after the
+//  original REtADDR, but before the saved framepointer or the spilled registers
+//  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+//  stack layout:
+//    arg1
+//    arg2
+//    RETADDR
+//    [ new RETADDR
+//      move area ]
+//    (possible EBP)
+//    ESI
+//    EDI
+//    local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+                                               SelectionDAG& DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
+  unsigned StackAlignment = TFI.getStackAlignment();
+  uint64_t AlignMask = StackAlignment - 1;
+  int64_t Offset = StackSize;
+  uint64_t SlotSize = TD->getPointerSize();
+  if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+    // Number smaller than 12 so just add the difference.
+    Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+  } else {
+    // Mask out lower bits, add stackalignment once plus the 12 bytes.
+    Offset = ((~AlignMask) & Offset) + StackAlignment +
+      (StackAlignment-SlotSize);
+  }
+  return Offset;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         const X86InstrInfo *TII) {
+  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  int FI = INT_MAX;
+  if (Arg.getOpcode() == ISD::CopyFromReg) {
+    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(VR))
+      return false;
+    MachineInstr *Def = MRI->getVRegDef(VR);
+    if (!Def)
+      return false;
+    if (!Flags.isByVal()) {
+      if (!TII->isLoadFromStackSlot(Def, FI))
+        return false;
+    } else {
+      unsigned Opcode = Def->getOpcode();
+      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+          Def->getOperand(1).isFI()) {
+        FI = Def->getOperand(1).getIndex();
+        Bytes = Flags.getByValSize();
+      } else
+        return false;
+    }
+  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+    if (Flags.isByVal())
+      // ByVal argument is passed in as a pointer but it's now being
+      // dereferenced. e.g.
+      // define @foo(%struct.X* %A) {
+      //   tail call @bar(%struct.X* byval %A)
+      // }
+      return false;
+    SDValue Ptr = Ld->getBasePtr();
+    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+    if (!FINode)
+      return false;
+    FI = FINode->getIndex();
+  } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
+    FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
+    FI = FINode->getIndex();
+    Bytes = Flags.getByValSize();
+  } else
+    return false;
+
+  assert(FI != INT_MAX);
+  if (!MFI->isFixedObjectIndex(FI))
+    return false;
+  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                                     bool isCalleeStructRet,
+                                                     bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                                     SelectionDAG& DAG) const {
+  if (!IsTailCallConvention(CalleeCC) &&
+      CalleeCC != CallingConv::C)
+    return false;
+
+  // If -tailcallopt is specified, make fastcc functions tail-callable.
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  if (GuaranteedTailCallOpt) {
+    if (IsTailCallConvention(CalleeCC) && CCMatch)
+      return true;
+    return false;
+  }
+
+  // Look for obvious safe cases to perform tail call optimization that do not
+  // require ABI changes. This is what gcc calls sibcall.
+
+  // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+  // emit a special epilogue.
+  if (RegInfo->needsStackRealignment(MF))
+    return false;
+
+  // Also avoid sibcall optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
+  // An stdcall caller is expected to clean up its arguments; the callee
+  // isn't going to do that.
+  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+    return false;
+
+  // Do not sibcall optimize vararg calls unless all arguments are passed via
+  // registers.
+  if (isVarArg && !Outs.empty()) {
+
+    // Optimizing for varargs on Win64 is unlikely to be safe without
+    // additional testing.
+    if (Subtarget->isTargetWin64())
+      return false;
+
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+		   getTargetMachine(), ArgLocs, *DAG.getContext());
+
+    CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+      if (!ArgLocs[i].isRegLoc())
+        return false;
+  }
+
+  // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
+  // Therefore if it's not used by the call it is not safe to optimize this into
+  // a sibcall.
+  bool Unused = false;
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    if (!Ins[i].Used) {
+      Unused = true;
+      break;
+    }
+  }
+  if (Unused) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
+		   getTargetMachine(), RVLocs, *DAG.getContext());
+    CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+    for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+      CCValAssign &VA = RVLocs[i];
+      if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+        return false;
+    }
+  }
+
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+		    getTargetMachine(), RVLocs1, *DAG.getContext());
+    CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+		    getTargetMachine(), RVLocs2, *DAG.getContext());
+    CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
+  // If the callee takes no arguments then go on to check the results of the
+  // call.
+  if (!Outs.empty()) {
+    // Check if stack adjustment is needed. For now, do not do this if any
+    // argument is passed on the stack.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+		   getTargetMachine(), ArgLocs, *DAG.getContext());
+
+    // Allocate shadow area for Win64
+    if (Subtarget->isTargetWin64()) {
+      CCInfo.AllocateStack(32, 8);
+    }
+
+    CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+    if (CCInfo.getNextStackOffset()) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
+        return false;
+
+      // Check if the arguments are already laid out in the right way as
+      // the caller's fixed stack objects.
+      MachineFrameInfo *MFI = MF.getFrameInfo();
+      const MachineRegisterInfo *MRI = &MF.getRegInfo();
+      const X86InstrInfo *TII =
+        ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        SDValue Arg = OutVals[i];
+        ISD::ArgFlagsTy Flags = Outs[i].Flags;
+        if (VA.getLocInfo() == CCValAssign::Indirect)
+          return false;
+        if (!VA.isRegLoc()) {
+          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+                                   MFI, MRI, TII))
+            return false;
+        }
+      }
+    }
+
+    // If the tailcall address may be in a register, then make sure it's
+    // possible to register allocate for it. In 32-bit, the call address can
+    // only target EAX, EDX, or ECX since the tail call must be scheduled after
+    // callee-saved registers are restored. These happen to be the same
+    // registers used to pass 'inreg' arguments so watch out for those.
+    if (!Subtarget->is64Bit() &&
+        !isa<GlobalAddressSDNode>(Callee) &&
+        !isa<ExternalSymbolSDNode>(Callee)) {
+      unsigned NumInRegs = 0;
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        if (!VA.isRegLoc())
+          continue;
+        unsigned Reg = VA.getLocReg();
+        switch (Reg) {
+        default: break;
+        case X86::EAX: case X86::EDX: case X86::ECX:
+          if (++NumInRegs == 3)
+            return false;
+          break;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+FastISel *
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+  return X86::createFastISel(funcInfo);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                           Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+static bool MayFoldLoad(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+  switch(Opcode) {
+  default: return false;
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+  case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
+  case X86ISD::SHUFPS:
+  case X86ISD::MOVLHPS:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+    return true;
+  }
+  return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                                               SDValue V1, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
+    return DAG.getNode(Opc, dl, VT, V1);
+  }
+
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                          SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+    return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+  }
+
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+               SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::PALIGN:
+  case X86ISD::SHUFPD:
+  case X86ISD::SHUFPS:
+    return DAG.getNode(Opc, dl, VT, V1, V2,
+                       DAG.getConstant(TargetMask, MVT::i8));
+  }
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                                    SDValue V1, SDValue V2, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::MOVLHPS:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+    return DAG.getNode(Opc, dl, VT, V1, V2);
+  }
+  return SDValue();
+}
+
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+  int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+  if (ReturnAddrIndex == 0) {
+    // Set up a frame object for the return address.
+    uint64_t SlotSize = TD->getPointerSize();
+    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+                                                           false);
+    FuncInfo->setRAIndex(ReturnAddrIndex);
+  }
+
+  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+                                       bool hasSymbolicDisplacement) {
+  // Offset should fit into 32 bit immediate field.
+  if (!isInt<32>(Offset))
+    return false;
+
+  // If we don't have a symbolic displacement - we don't have any extra
+  // restrictions.
+  if (!hasSymbolicDisplacement)
+    return true;
+
+  // FIXME: Some tweaks might be needed for medium code model.
+  if (M != CodeModel::Small && M != CodeModel::Kernel)
+    return false;
+
+  // For small code model we assume that latest object is 16MB before end of 31
+  // bits boundary. We may also accept pretty large negative constants knowing
+  // that all objects are in the positive half of address space.
+  if (M == CodeModel::Small && Offset < 16*1024*1024)
+    return true;
+
+  // For kernel code model we know that all object resist in the negative half
+  // of 32bits address space. We may not accept negative offsets, since they may
+  // be just off and we may accept pretty large positive ones.
+  if (M == CodeModel::Kernel && Offset > 0)
+    return true;
+
+  return false;
+}
+
+/// isCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86::isCalleePop(CallingConv::ID CallingConv,
+                      bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+  if (IsVarArg)
+    return false;
+
+  switch (CallingConv) {
+  default:
+    return false;
+  case CallingConv::X86_StdCall:
+    return !is64Bit;
+  case CallingConv::X86_FastCall:
+    return !is64Bit;
+  case CallingConv::X86_ThisCall:
+    return !is64Bit;
+  case CallingConv::Fast:
+    return TailCallOpt;
+  case CallingConv::GHC:
+    return TailCallOpt;
+  }
+}
+
+/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code, returning the condition code and the LHS/RHS of the
+/// comparison to make.
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+                               SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
+  if (!isFP) {
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+        // X > -1   -> X == 0, jump !sign.
+        RHS = DAG.getConstant(0, RHS.getValueType());
+        return X86::COND_NS;
+      } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+        // X < 0   -> X == 0, jump on sign.
+        return X86::COND_S;
+      } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+        // X < 1   -> X <= 0
+        RHS = DAG.getConstant(0, RHS.getValueType());
+        return X86::COND_LE;
+      }
+    }
+
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Invalid integer condition!");
+    case ISD::SETEQ:  return X86::COND_E;
+    case ISD::SETGT:  return X86::COND_G;
+    case ISD::SETGE:  return X86::COND_GE;
+    case ISD::SETLT:  return X86::COND_L;
+    case ISD::SETLE:  return X86::COND_LE;
+    case ISD::SETNE:  return X86::COND_NE;
+    case ISD::SETULT: return X86::COND_B;
+    case ISD::SETUGT: return X86::COND_A;
+    case ISD::SETULE: return X86::COND_BE;
+    case ISD::SETUGE: return X86::COND_AE;
+    }
+  }
+
+  // First determine if it is required or is profitable to flip the operands.
+
+  // If LHS is a foldable load, but RHS is not, flip the condition.
+  if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+      !ISD::isNON_EXTLoad(RHS.getNode())) {
+    SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+    std::swap(LHS, RHS);
+  }
+
+  switch (SetCCOpcode) {
+  default: break;
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    std::swap(LHS, RHS);
+    break;
+  }
+
+  // On a floating point condition, the flags are set as follows:
+  // ZF  PF  CF   op
+  //  0 | 0 | 0 | X > Y
+  //  0 | 0 | 1 | X < Y
+  //  1 | 0 | 0 | X == Y
+  //  1 | 1 | 1 | unordered
+  switch (SetCCOpcode) {
+  default: llvm_unreachable("Condcode should be pre-legalized away");
+  case ISD::SETUEQ:
+  case ISD::SETEQ:   return X86::COND_E;
+  case ISD::SETOLT:              // flipped
+  case ISD::SETOGT:
+  case ISD::SETGT:   return X86::COND_A;
+  case ISD::SETOLE:              // flipped
+  case ISD::SETOGE:
+  case ISD::SETGE:   return X86::COND_AE;
+  case ISD::SETUGT:              // flipped
+  case ISD::SETULT:
+  case ISD::SETLT:   return X86::COND_B;
+  case ISD::SETUGE:              // flipped
+  case ISD::SETULE:
+  case ISD::SETLE:   return X86::COND_BE;
+  case ISD::SETONE:
+  case ISD::SETNE:   return X86::COND_NE;
+  case ISD::SETUO:   return X86::COND_P;
+  case ISD::SETO:    return X86::COND_NP;
+  case ISD::SETOEQ:
+  case ISD::SETUNE:  return X86::COND_INVALID;
+  }
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+  switch (X86CC) {
+  default:
+    return false;
+  case X86::COND_B:
+  case X86::COND_BE:
+  case X86::COND_E:
+  case X86::COND_P:
+  case X86::COND_A:
+  case X86::COND_AE:
+  case X86::COND_NE:
+  case X86::COND_NP:
+    return true;
+  }
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
+    if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+      return true;
+  }
+  return false;
+}
+
+/// isUndefOrInRange - Return true if Val is undef or if its value falls within
+/// the specified range (L, H].
+static bool isUndefOrInRange(int Val, int Low, int Hi) {
+  return (Val < 0) || (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
+/// specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+  if (Val < 0 || Val == CmpVal)
+    return true;
+  return false;
+}
+
+/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
+/// the second operand.
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT == MVT::v4f32 || VT == MVT::v4i32 )
+    return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
+  if (VT == MVT::v2f64 || VT == MVT::v2i64)
+    return (Mask[0] < 2 && Mask[1] < 2);
+  return false;
+}
+
+bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPSHUFDMask(M, N->getValueType(0));
+}
+
+/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFHW.
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT != MVT::v8i16)
+    return false;
+
+  // Lower quadword copied in order or undef.
+  for (int i = 0; i != 4; ++i)
+    if (Mask[i] >= 0 && Mask[i] != i)
+      return false;
+
+  // Upper quadword shuffled.
+  for (int i = 4; i != 8; ++i)
+    if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+      return false;
+
+  return true;
+}
+
+bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPSHUFHWMask(M, N->getValueType(0));
+}
+
+/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFLW.
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT != MVT::v8i16)
+    return false;
+
+  // Upper quadword copied in order.
+  for (int i = 4; i != 8; ++i)
+    if (Mask[i] >= 0 && Mask[i] != i)
+      return false;
+
+  // Lower quadword shuffled.
+  for (int i = 0; i != 4; ++i)
+    if (Mask[i] >= 4)
+      return false;
+
+  return true;
+}
+
+bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPSHUFLWMask(M, N->getValueType(0));
+}
+
+/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PALIGNR.
+static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                          bool hasSSSE3) {
+  int i, e = VT.getVectorNumElements();
+
+  // Do not handle v2i64 / v2f64 shuffles with palignr.
+  if (e < 4 || !hasSSSE3)
+    return false;
+
+  for (i = 0; i != e; ++i)
+    if (Mask[i] >= 0)
+      break;
+
+  // All undef, not a palignr.
+  if (i == e)
+    return false;
+
+  // Determine if it's ok to perform a palignr with only the LHS, since we
+  // don't have access to the actual shuffle elements to see if RHS is undef.
+  bool Unary = Mask[i] < (int)e;
+  bool NeedsUnary = false;
+
+  int s = Mask[i] - i;
+
+  // Check the rest of the elements to see if they are consecutive.
+  for (++i; i != e; ++i) {
+    int m = Mask[i];
+    if (m < 0)
+      continue;
+
+    Unary = Unary && (m < (int)e);
+    NeedsUnary = NeedsUnary || (m < s);
+
+    if (NeedsUnary && !Unary)
+      return false;
+    if (Unary && m != ((s+i) & (e-1)))
+      return false;
+    if (!Unary && m != (s+i))
+      return false;
+  }
+  return true;
+}
+
+bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPALIGNRMask(M, N->getValueType(0), true);
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+
+  int Half = NumElems / 2;
+  for (int i = 0; i < Half; ++i)
+    if (!isUndefOrInRange(Mask[i], 0, NumElems))
+      return false;
+  for (int i = Half; i < NumElems; ++i)
+    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+      return false;
+
+  return true;
+}
+
+bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isSHUFPMask(M, N->getValueType(0));
+}
+
+/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
+/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
+/// half elements to come from vector 1 (which would equal the dest.) and
+/// the upper half to come from vector 2.
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+
+  int Half = NumElems / 2;
+  for (int i = 0; i < Half; ++i)
+    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+      return false;
+  for (int i = Half; i < NumElems; ++i)
+    if (!isUndefOrInRange(Mask[i], 0, NumElems))
+      return false;
+  return true;
+}
+
+static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return isCommutedSHUFPMask(M, N->getValueType(0));
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
+  if (N->getValueType(0).getVectorNumElements() != 4)
+    return false;
+
+  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+  return isUndefOrEqual(N->getMaskElt(0), 6) &&
+         isUndefOrEqual(N->getMaskElt(1), 7) &&
+         isUndefOrEqual(N->getMaskElt(2), 2) &&
+         isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+  if (NumElems != 4)
+    return false;
+
+  return isUndefOrEqual(N->getMaskElt(0), 2) &&
+  isUndefOrEqual(N->getMaskElt(1), 3) &&
+  isUndefOrEqual(N->getMaskElt(2), 2) &&
+  isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+
+  for (unsigned i = 0; i < NumElems/2; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+      return false;
+
+  for (unsigned i = NumElems/2; i < NumElems; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i))
+      return false;
+
+  return true;
+}
+
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+  if ((NumElems != 2 && NumElems != 4)
+      || N->getValueType(0).getSizeInBits() > 128)
+    return false;
+
+  for (unsigned i = 0; i < NumElems/2; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i))
+      return false;
+
+  for (unsigned i = 0; i < NumElems/2; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+      return false;
+
+  return true;
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                         bool V2IsSplat = false) {
+  int NumElts = VT.getVectorNumElements();
+  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+    return false;
+
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElts / NumSections;
+
+  unsigned Start = 0;
+  unsigned End = NumSectionElts;
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = Start, j = s * NumSectionElts;
+         i != End;
+         i += 2, ++j) {
+      int BitI  = Mask[i];
+      int BitI1 = Mask[i+1];
+      if (!isUndefOrEqual(BitI, j))
+        return false;
+      if (V2IsSplat) {
+        if (!isUndefOrEqual(BitI1, NumElts))
+          return false;
+      } else {
+        if (!isUndefOrEqual(BitI1, j + NumElts))
+          return false;
+      }
+    }
+    // Process the next 128 bits.
+    Start += NumSectionElts;
+    End += NumSectionElts;
+  }
+
+  return true;
+}
+
+bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                         bool V2IsSplat = false) {
+  int NumElts = VT.getVectorNumElements();
+  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+    return false;
+
+  for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
+    int BitI  = Mask[i];
+    int BitI1 = Mask[i+1];
+    if (!isUndefOrEqual(BitI, j + NumElts/2))
+      return false;
+    if (V2IsSplat) {
+      if (isUndefOrEqual(BitI1, NumElts))
+        return false;
+    } else {
+      if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+        return false;
+    }
+  }
+  return true;
+}
+
+bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+    return false;
+
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElems / NumSections;
+
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+         i != NumSectionElts * (s + 1);
+         i += 2, ++j) {
+      int BitI  = Mask[i];
+      int BitI1 = Mask[i+1];
+
+      if (!isUndefOrEqual(BitI, j))
+        return false;
+      if (!isUndefOrEqual(BitI1, j))
+        return false;
+    }
+  }
+
+  return true;
+}
+
+bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+    return false;
+
+  for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+    int BitI  = Mask[i];
+    int BitI1 = Mask[i+1];
+    if (!isUndefOrEqual(BitI, j))
+      return false;
+    if (!isUndefOrEqual(BitI1, j))
+      return false;
+  }
+  return true;
+}
+
+bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT.getVectorElementType().getSizeInBits() < 32)
+    return false;
+
+  int NumElts = VT.getVectorNumElements();
+
+  if (!isUndefOrEqual(Mask[0], NumElts))
+    return false;
+
+  for (int i = 1; i < NumElts; ++i)
+    if (!isUndefOrEqual(Mask[i], i))
+      return false;
+
+  return true;
+}
+
+bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isMOVLMask(M, N->getValueType(0));
+}
+
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                               bool V2IsSplat = false, bool V2IsUndef = false) {
+  int NumOps = VT.getVectorNumElements();
+  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+    return false;
+
+  if (!isUndefOrEqual(Mask[0], 0))
+    return false;
+
+  for (int i = 1; i < NumOps; ++i)
+    if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
+          (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
+          (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+      return false;
+
+  return true;
+}
+
+static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
+                           bool V2IsUndef = false) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
+  if (N->getValueType(0).getVectorNumElements() != 4)
+    return false;
+
+  // Expect 1, 1, 3, 3
+  for (unsigned i = 0; i < 2; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt >= 0 && Elt != 1)
+      return false;
+  }
+
+  bool HasHi = false;
+  for (unsigned i = 2; i < 4; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt >= 0 && Elt != 3)
+      return false;
+    if (Elt == 3)
+      HasHi = true;
+  }
+  // Don't use movshdup if it can be done with a shufps.
+  // FIXME: verify that matching u, u, 3, 3 is what we want.
+  return HasHi;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
+  if (N->getValueType(0).getVectorNumElements() != 4)
+    return false;
+
+  // Expect 0, 0, 2, 2
+  for (unsigned i = 0; i < 2; ++i)
+    if (N->getMaskElt(i) > 0)
+      return false;
+
+  bool HasHi = false;
+  for (unsigned i = 2; i < 4; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt >= 0 && Elt != 2)
+      return false;
+    if (Elt == 2)
+      HasHi = true;
+  }
+  // Don't use movsldup if it can be done with a shufps.
+  return HasHi;
+}
+
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
+  int e = N->getValueType(0).getVectorNumElements() / 2;
+
+  for (int i = 0; i < e; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i))
+      return false;
+  for (int i = 0; i < e; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+      return false;
+  return true;
+}
+
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+    return false;
+
+  // The index should be aligned on a 128-bit boundary.
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+  unsigned VL = N->getValueType(0).getVectorNumElements();
+  unsigned VBits = N->getValueType(0).getSizeInBits();
+  unsigned ElSize = VBits / VL;
+  bool Result = (Index * ElSize) % 128 == 0;
+
+  return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+    return false;
+
+  // The index should be aligned on a 128-bit boundary.
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+  unsigned VL = N->getValueType(0).getVectorNumElements();
+  unsigned VBits = N->getValueType(0).getSizeInBits();
+  unsigned ElSize = VBits / VL;
+  bool Result = (Index * ElSize) % 128 == 0;
+
+  return Result;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
+unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+
+  unsigned Shift = (NumOperands == 4) ? 2 : 1;
+  unsigned Mask = 0;
+  for (int i = 0; i < NumOperands; ++i) {
+    int Val = SVOp->getMaskElt(NumOperands-i-1);
+    if (Val < 0) Val = 0;
+    if (Val >= NumOperands) Val -= NumOperands;
+    Mask |= Val;
+    if (i != NumOperands - 1)
+      Mask <<= Shift;
+  }
+  return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  unsigned Mask = 0;
+  // 8 nodes, but we only care about the last 4.
+  for (unsigned i = 7; i >= 4; --i) {
+    int Val = SVOp->getMaskElt(i);
+    if (Val >= 0)
+      Mask |= (Val - 4);
+    if (i != 4)
+      Mask <<= 2;
+  }
+  return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  unsigned Mask = 0;
+  // 8 nodes, but we only care about the first 4.
+  for (int i = 3; i >= 0; --i) {
+    int Val = SVOp->getMaskElt(i);
+    if (Val >= 0)
+      Mask |= Val;
+    if (i != 0)
+      Mask <<= 2;
+  }
+  return Mask;
+}
+
+/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  EVT VVT = N->getValueType(0);
+  unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
+  int Val = 0;
+
+  unsigned i, e;
+  for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+    Val = SVOp->getMaskElt(i);
+    if (Val >= 0)
+      break;
+  }
+  return (Val - i) * EltSize;
+}
+
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+    llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+  EVT VecVT = N->getOperand(0).getValueType();
+  EVT ElVT = VecVT.getVectorElementType();
+
+  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+  return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+    llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+  EVT VecVT = N->getValueType(0);
+  EVT ElVT = VecVT.getVectorElementType();
+
+  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+  return Index / NumElemsPerChunk;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+  return ((isa<ConstantSDNode>(Elt) &&
+           cast<ConstantSDNode>(Elt)->isNullValue()) ||
+          (isa<ConstantFPSDNode>(Elt) &&
+           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
+/// their permute mask.
+static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
+                                    SelectionDAG &DAG) {
+  EVT VT = SVOp->getValueType(0);
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 8> MaskVec;
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int idx = SVOp->getMaskElt(i);
+    if (idx < 0)
+      MaskVec.push_back(idx);
+    else if (idx < (int)NumElems)
+      MaskVec.push_back(idx + NumElems);
+    else
+      MaskVec.push_back(idx - NumElems);
+  }
+  return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
+                              SVOp->getOperand(0), &MaskVec[0]);
+}
+
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
+  unsigned NumElems = VT.getVectorNumElements();
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int idx = Mask[i];
+    if (idx < 0)
+      continue;
+    else if (idx < (int)NumElems)
+      Mask[i] = idx + NumElems;
+    else
+      Mask[i] = idx - NumElems;
+  }
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
+  if (Op->getValueType(0).getVectorNumElements() != 4)
+    return false;
+  for (unsigned i = 0, e = 2; i != e; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+      return false;
+  for (unsigned i = 2; i != 4; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+      return false;
+  return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+  if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+    return false;
+  N = N->getOperand(0).getNode();
+  if (!ISD::isNON_EXTLoad(N))
+    return false;
+  if (LD)
+    *LD = cast<LoadSDNode>(N);
+  return true;
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
+                               ShuffleVectorSDNode *Op) {
+  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+    return false;
+  // Is V2 is a vector load, don't do this transformation. We will try to use
+  // load folding shufps op.
+  if (ISD::isNON_EXTLoad(V2))
+    return false;
+
+  unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i))
+      return false;
+  for (unsigned i = NumElems/2; i != NumElems; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+      return false;
+  return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  SDValue SplatValue = N->getOperand(0);
+  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i) != SplatValue)
+      return false;
+  return true;
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
+static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+  SDValue V1 = N->getOperand(0);
+  SDValue V2 = N->getOperand(1);
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int Idx = N->getMaskElt(i);
+    if (Idx >= (int)NumElems) {
+      unsigned Opc = V2.getOpcode();
+      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+        continue;
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
+        return false;
+    } else if (Idx >= 0) {
+      unsigned Opc = V1.getOpcode();
+      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+        continue;
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V1.getOperand(Idx)))
+        return false;
+    }
+  }
+  return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
+                             DebugLoc dl) {
+  assert(VT.isVector() && "Expected a vector type");
+
+  // Always build SSE zero vectors as <4 x i32> bitcasted
+  // to their dest type. This ensures they get CSE'd.
+  SDValue Vec;
+  if (VT.getSizeInBits() == 128) {  // SSE
+    if (HasSSE2) {  // SSE2
+      SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+    } else { // SSE1
+      SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+    }
+  } else if (VT.getSizeInBits() == 256) { // AVX
+    // 256-bit logic and arithmetic instructions in AVX are
+    // all floating-point, no support for integer ops. Default
+    // to emitting fp zeroed vectors then.
+    SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+  }
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
+/// their original type, ensuring they get CSE'd.
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+  assert(VT.isVector() && "Expected a vector type");
+  assert((VT.is128BitVector() || VT.is256BitVector())
+         && "Expected a 128-bit or 256-bit vector type");
+
+  SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+
+  SDValue Vec;
+  if (VT.is256BitVector()) {
+    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+  } else
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+  EVT VT = SVOp->getValueType(0);
+  unsigned NumElems = VT.getVectorNumElements();
+
+  bool Changed = false;
+  SmallVector<int, 8> MaskVec;
+  SVOp->getMask(MaskVec);
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    if (MaskVec[i] > (int)NumElems) {
+      MaskVec[i] = NumElems;
+      Changed = true;
+    }
+  }
+  if (Changed)
+    return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
+                                SVOp->getOperand(1), &MaskVec[0]);
+  return SDValue(SVOp, 0);
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                       SDValue V2) {
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 8> Mask;
+  Mask.push_back(NumElems);
+  for (unsigned i = 1; i != NumElems; ++i)
+    Mask.push_back(i);
+  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                          SDValue V2) {
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 8> Mask;
+  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+    Mask.push_back(i);
+    Mask.push_back(i + NumElems);
+  }
+  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                          SDValue V2) {
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned Half = NumElems/2;
+  SmallVector<int, 8> Mask;
+  for (unsigned i = 0; i != Half; ++i) {
+    Mask.push_back(i + Half);
+    Mask.push_back(i + NumElems + Half);
+  }
+  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
+  EVT PVT = MVT::v4f32;
+  EVT VT = SV->getValueType(0);
+  DebugLoc dl = SV->getDebugLoc();
+  SDValue V1 = SV->getOperand(0);
+  int NumElems = VT.getVectorNumElements();
+  int EltNo = SV->getSplatIndex();
+
+  // unpack elements to the correct location
+  while (NumElems > 4) {
+    if (EltNo < NumElems/2) {
+      V1 = getUnpackl(DAG, dl, VT, V1, V1);
+    } else {
+      V1 = getUnpackh(DAG, dl, VT, V1, V1);
+      EltNo -= NumElems/2;
+    }
+    NumElems >>= 1;
+  }
+
+  // Perform the splat.
+  int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+  V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
+  V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
+  return DAG.getNode(ISD::BITCAST, dl, VT, V1);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector.  This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
+                                             bool isZero, bool HasSSE2,
+                                             SelectionDAG &DAG) {
+  EVT VT = V2.getValueType();
+  SDValue V1 = isZero
+    ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 16> MaskVec;
+  for (unsigned i = 0; i != NumElems; ++i)
+    // If this is the insertion idx, put the low elt of V2 here.
+    MaskVec.push_back(i == Idx ? NumElems : i);
+  return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+}
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+                                   unsigned Depth) {
+  if (Depth == 6)
+    return SDValue();  // Limit search depth.
+
+  SDValue V = SDValue(N, 0);
+  EVT VT = V.getValueType();
+  unsigned Opcode = V.getOpcode();
+
+  // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+  if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+    Index = SV->getMaskElt(Index);
+
+    if (Index < 0)
+      return DAG.getUNDEF(VT.getVectorElementType());
+
+    int NumElems = VT.getVectorNumElements();
+    SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+  }
+
+  // Recurse into target specific vector shuffles to find scalars.
+  if (isTargetShuffle(Opcode)) {
+    int NumElems = VT.getVectorNumElements();
+    SmallVector<unsigned, 16> ShuffleMask;
+    SDValue ImmN;
+
+    switch(Opcode) {
+    case X86ISD::SHUFPS:
+    case X86ISD::SHUFPD:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodeSHUFPSMask(NumElems,
+                       cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                       ShuffleMask);
+      break;
+    case X86ISD::PUNPCKHBW:
+    case X86ISD::PUNPCKHWD:
+    case X86ISD::PUNPCKHDQ:
+    case X86ISD::PUNPCKHQDQ:
+      DecodePUNPCKHMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::UNPCKHPS:
+    case X86ISD::UNPCKHPD:
+      DecodeUNPCKHPMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::PUNPCKLBW:
+    case X86ISD::PUNPCKLWD:
+    case X86ISD::PUNPCKLDQ:
+    case X86ISD::PUNPCKLQDQ:
+      DecodePUNPCKLMask(VT, ShuffleMask);
+      break;
+    case X86ISD::UNPCKLPS:
+    case X86ISD::UNPCKLPD:
+    case X86ISD::VUNPCKLPS:
+    case X86ISD::VUNPCKLPD:
+    case X86ISD::VUNPCKLPSY:
+    case X86ISD::VUNPCKLPDY:
+      DecodeUNPCKLPMask(VT, ShuffleMask);
+      break;
+    case X86ISD::MOVHLPS:
+      DecodeMOVHLPSMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::MOVLHPS:
+      DecodeMOVLHPSMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::PSHUFD:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFMask(NumElems,
+                      cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                      ShuffleMask);
+      break;
+    case X86ISD::PSHUFHW:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                        ShuffleMask);
+      break;
+    case X86ISD::PSHUFLW:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                        ShuffleMask);
+      break;
+    case X86ISD::MOVSS:
+    case X86ISD::MOVSD: {
+      // The index 0 always comes from the first element of the second source,
+      // this is why MOVSS and MOVSD are used in the first place. The other
+      // elements come from the other positions of the first source vector.
+      unsigned OpNum = (Index == 0) ? 1 : 0;
+      return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+                                 Depth+1);
+    }
+    default:
+      assert("not implemented for target shuffle node");
+      return SDValue();
+    }
+
+    Index = ShuffleMask[Index];
+    if (Index < 0)
+      return DAG.getUNDEF(VT.getVectorElementType());
+
+    SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+                               Depth+1);
+  }
+
+  // Actual nodes that may contain scalar elements
+  if (Opcode == ISD::BITCAST) {
+    V = V.getOperand(0);
+    EVT SrcVT = V.getValueType();
+    unsigned NumElems = VT.getVectorNumElements();
+
+    if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+      return SDValue();
+  }
+
+  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    return (Index == 0) ? V.getOperand(0)
+                          : DAG.getUNDEF(VT.getVectorElementType());
+
+  if (V.getOpcode() == ISD::BUILD_VECTOR)
+    return V.getOperand(Index);
+
+  return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two different directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+                                  bool ZerosFromLeft, SelectionDAG &DAG) {
+  int i = 0;
+
+  while (i < NumElems) {
+    unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+    SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+    if (!(Elt.getNode() &&
+         (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+      break;
+    ++i;
+  }
+
+  return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+                              int OpIdx, int NumElems, unsigned &OpNum) {
+  bool SeenV1 = false;
+  bool SeenV2 = false;
+
+  for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+    int Idx = SVOp->getMaskElt(i);
+    // Ignore undef indicies
+    if (Idx < 0)
+      continue;
+
+    if (Idx < NumElems)
+      SeenV1 = true;
+    else
+      SeenV2 = true;
+
+    // Only accept consecutive elements from the same vector
+    if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
+      return false;
+  }
+
+  OpNum = SeenV1 ? 0 : 1;
+  return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                               bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+              false /* check zeros from right */, DAG);
+  unsigned OpSrc;
+
+  if (!NumZeros)
+    return false;
+
+  // Considering the elements in the mask that are not consecutive zeros,
+  // check if they consecutively come from only one of the source vectors.
+  //
+  //               V1 = {X, A, B, C}     0
+  //                         \  \  \    /
+  //   vector_shuffle V1, V2 <1, 2, 3, X>
+  //
+  if (!isShuffleMaskConsecutive(SVOp,
+            0,                   // Mask Start Index
+            NumElems-NumZeros-1, // Mask End Index
+            NumZeros,            // Where to start looking in the src vector
+            NumElems,            // Number of elements in vector
+            OpSrc))              // Which source operand ?
+    return false;
+
+  isLeft = false;
+  ShAmt = NumZeros;
+  ShVal = SVOp->getOperand(OpSrc);
+  return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                              bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+              true /* check zeros from left */, DAG);
+  unsigned OpSrc;
+
+  if (!NumZeros)
+    return false;
+
+  // Considering the elements in the mask that are not consecutive zeros,
+  // check if they consecutively come from only one of the source vectors.
+  //
+  //                           0    { A, B, X, X } = V2
+  //                          / \    /  /
+  //   vector_shuffle V1, V2 <X, X, 4, 5>
+  //
+  if (!isShuffleMaskConsecutive(SVOp,
+            NumZeros,     // Mask Start Index
+            NumElems-1,   // Mask End Index
+            0,            // Where to start looking in the src vector
+            NumElems,     // Number of elements in vector
+            OpSrc))       // Which source operand ?
+    return false;
+
+  isLeft = true;
+  ShAmt = NumZeros;
+  ShVal = SVOp->getOperand(OpSrc);
+  return true;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                          bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+      isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+    return true;
+
+  return false;
+}
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+                                       unsigned NumNonZero, unsigned NumZero,
+                                       SelectionDAG &DAG,
+                                       const TargetLowering &TLI) {
+  if (NumNonZero > 8)
+    return SDValue();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V(0, 0);
+  bool First = true;
+  for (unsigned i = 0; i < 16; ++i) {
+    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+    if (ThisIsNonZero && First) {
+      if (NumZero)
+        V = getZeroVector(MVT::v8i16, true, DAG, dl);
+      else
+        V = DAG.getUNDEF(MVT::v8i16);
+      First = false;
+    }
+
+    if ((i & 1) != 0) {
+      SDValue ThisElt(0, 0), LastElt(0, 0);
+      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+      if (LastIsNonZero) {
+        LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
+                              MVT::i16, Op.getOperand(i-1));
+      }
+      if (ThisIsNonZero) {
+        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
+        ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
+                              ThisElt, DAG.getConstant(8, MVT::i8));
+        if (LastIsNonZero)
+          ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
+      } else
+        ThisElt = LastElt;
+
+      if (ThisElt.getNode())
+        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+                        DAG.getIntPtrConstant(i/2));
+    }
+  }
+
+  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+                                     unsigned NumNonZero, unsigned NumZero,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
+  if (NumNonZero > 4)
+    return SDValue();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V(0, 0);
+  bool First = true;
+  for (unsigned i = 0; i < 8; ++i) {
+    bool isNonZero = (NonZeros & (1 << i)) != 0;
+    if (isNonZero) {
+      if (First) {
+        if (NumZero)
+          V = getZeroVector(MVT::v8i16, true, DAG, dl);
+        else
+          V = DAG.getUNDEF(MVT::v8i16);
+        First = false;
+      }
+      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                      MVT::v8i16, V, Op.getOperand(i),
+                      DAG.getIntPtrConstant(i));
+    }
+  }
+
+  return V;
+}
+
+/// getVShift - Return a vector logical shift node.
+///
+static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
+                         unsigned NumBits, SelectionDAG &DAG,
+                         const TargetLowering &TLI, DebugLoc dl) {
+  EVT ShVT = MVT::v2i64;
+  unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+  SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     DAG.getNode(Opc, dl, ShVT, SrcOp,
+                             DAG.getConstant(NumBits,
+                                  TLI.getShiftAmountTy(SrcOp.getValueType()))));
+}
+
+SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+                                          SelectionDAG &DAG) const {
+
+  // Check if the scalar load can be widened into a vector load. And if
+  // the address is "base + cst" see if the cst can be "absorbed" into
+  // the shuffle mask.
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+    SDValue Ptr = LD->getBasePtr();
+    if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+      return SDValue();
+    EVT PVT = LD->getValueType(0);
+    if (PVT != MVT::i32 && PVT != MVT::f32)
+      return SDValue();
+
+    int FI = -1;
+    int64_t Offset = 0;
+    if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+      FI = FINode->getIndex();
+      Offset = 0;
+    } else if (DAG.isBaseWithConstantOffset(Ptr) &&
+               isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+      FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+      Offset = Ptr.getConstantOperandVal(1);
+      Ptr = Ptr.getOperand(0);
+    } else {
+      return SDValue();
+    }
+
+    SDValue Chain = LD->getChain();
+    // Make sure the stack object alignment is at least 16.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    if (DAG.InferPtrAlignment(Ptr) < 16) {
+      if (MFI->isFixedObjectIndex(FI)) {
+        // Can't change the alignment. FIXME: It's possible to compute
+        // the exact stack offset and reference FI + adjust offset instead.
+        // If someone *really* cares about this. That's the way to implement it.
+        return SDValue();
+      } else {
+        MFI->setObjectAlignment(FI, 16);
+      }
+    }
+
+    // (Offset % 16) must be multiple of 4. Then address is then
+    // Ptr + (Offset & ~15).
+    if (Offset < 0)
+      return SDValue();
+    if ((Offset % 16) & 3)
+      return SDValue();
+    int64_t StartOffset = Offset & ~15;
+    if (StartOffset)
+      Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+                        Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+    int EltNo = (Offset - StartOffset) >> 2;
+    int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+    EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+                             LD->getPointerInfo().getWithOffset(StartOffset),
+                             false, false, 0);
+    // Canonicalize it to a v4i32 shuffle.
+    V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+    return DAG.getNode(ISD::BITCAST, dl, VT,
+                       DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+                                            DAG.getUNDEF(MVT::v4i32),&Mask[0]));
+  }
+
+  return SDValue();
+}
+
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+                                        DebugLoc &DL, SelectionDAG &DAG) {
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElems = Elts.size();
+
+  LoadSDNode *LDBase = NULL;
+  unsigned LastLoadedElt = -1U;
+
+  // For each element in the initializer, see if we've found a load or an undef.
+  // If we don't find an initial load element, or later load elements are
+  // non-consecutive, bail out.
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue Elt = Elts[i];
+
+    if (!Elt.getNode() ||
+        (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+      return SDValue();
+    if (!LDBase) {
+      if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+        return SDValue();
+      LDBase = cast<LoadSDNode>(Elt.getNode());
+      LastLoadedElt = i;
+      continue;
+    }
+    if (Elt.getOpcode() == ISD::UNDEF)
+      continue;
+
+    LoadSDNode *LD = cast<LoadSDNode>(Elt);
+    if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+      return SDValue();
+    LastLoadedElt = i;
+  }
+
+  // If we have found an entire vector of loads and undefs, then return a large
+  // load of the entire vector width starting at the base pointer.  If we found
+  // consecutive loads for the low half, generate a vzext_load node.
+  if (LastLoadedElt == NumElems - 1) {
+    if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+      return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                         LDBase->getPointerInfo(),
+                         LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+    return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                       LDBase->getPointerInfo(),
+                       LDBase->isVolatile(), LDBase->isNonTemporal(),
+                       LDBase->getAlignment());
+  } else if (NumElems == 4 && LastLoadedElt == 1) {
+    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+    SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+    SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+                                              Ops, 2, MVT::i32,
+                                              LDBase->getMemOperand());
+    return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT VT = Op.getValueType();
+  EVT ExtVT = VT.getVectorElementType();
+
+  unsigned NumElems = Op.getNumOperands();
+
+  // For AVX-length vectors, build the individual 128-bit pieces and
+  // use shuffles to put them in place.
+  if (VT.getSizeInBits() > 256 &&
+      Subtarget->hasAVX() &&
+      !ISD::isBuildVectorAllZeros(Op.getNode())) {
+    SmallVector<SDValue, 8> V;
+    V.resize(NumElems);
+    for (unsigned i = 0; i < NumElems; ++i) {
+      V[i] = Op.getOperand(i);
+    }
+
+    EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+    // Build the lower subvector.
+    SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+    // Build the upper subvector.
+    SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+                                NumElems/2);
+
+    return ConcatVectors(Lower, Upper, DAG);
+  }
+
+  // All zero's:
+  //  - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
+  // All one's:
+  //  - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
+  if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+      ISD::isBuildVectorAllOnes(Op.getNode())) {
+    // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
+    // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+    // eliminated on x86-32 hosts.
+    if (Op.getValueType() == MVT::v4i32 ||
+        Op.getValueType() == MVT::v8i32)
+      return Op;
+
+    if (ISD::isBuildVectorAllOnes(Op.getNode()))
+      return getOnesVector(Op.getValueType(), DAG, dl);
+    return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+  }
+
+  unsigned EVTBits = ExtVT.getSizeInBits();
+
+  unsigned NumZero  = 0;
+  unsigned NumNonZero = 0;
+  unsigned NonZeros = 0;
+  bool IsAllConstants = true;
+  SmallSet<SDValue, 8> Values;
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue Elt = Op.getOperand(i);
+    if (Elt.getOpcode() == ISD::UNDEF)
+      continue;
+    Values.insert(Elt);
+    if (Elt.getOpcode() != ISD::Constant &&
+        Elt.getOpcode() != ISD::ConstantFP)
+      IsAllConstants = false;
+    if (X86::isZeroNode(Elt))
+      NumZero++;
+    else {
+      NonZeros |= (1 << i);
+      NumNonZero++;
+    }
+  }
+
+  // All undef vector. Return an UNDEF.  All zero vectors were handled above.
+  if (NumNonZero == 0)
+    return DAG.getUNDEF(VT);
+
+  // Special case for single non-zero, non-undef, element.
+  if (NumNonZero == 1) {
+    unsigned Idx = CountTrailingZeros_32(NonZeros);
+    SDValue Item = Op.getOperand(Idx);
+
+    // If this is an insertion of an i64 value on x86-32, and if the top bits of
+    // the value are obviously zero, truncate the value to i32 and do the
+    // insertion that way.  Only do this if the value is non-constant or if the
+    // value is a constant being inserted into element 0.  It is cheaper to do
+    // a constant pool load than it is to do a movd + shuffle.
+    if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
+        (!IsAllConstants || Idx == 0)) {
+      if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+        // Handle SSE only.
+        assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+        EVT VecVT = MVT::v4i32;
+        unsigned VecElts = 4;
+
+        // Truncate the value (which may itself be a constant) to i32, and
+        // convert it to a vector with movd (S2V+shuffle to zero extend).
+        Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+                                           Subtarget->hasSSE2(), DAG);
+
+        // Now we have our 32-bit value zero extended in the low element of
+        // a vector.  If Idx != 0, swizzle it into place.
+        if (Idx != 0) {
+          SmallVector<int, 4> Mask;
+          Mask.push_back(Idx);
+          for (unsigned i = 1; i != VecElts; ++i)
+            Mask.push_back(i);
+          Item = DAG.getVectorShuffle(VecVT, dl, Item,
+                                      DAG.getUNDEF(Item.getValueType()),
+                                      &Mask[0]);
+        }
+        return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
+      }
+    }
+
+    // If we have a constant or non-constant insertion into the low element of
+    // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+    // the rest of the elements.  This will be matched as movd/movq/movss/movsd
+    // depending on what the source datatype is.
+    if (Idx == 0) {
+      if (NumZero == 0) {
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+      } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+          (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+        // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+        return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+                                           DAG);
+      } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
+        Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+        EVT MiddleVT = MVT::v4i32;
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+                                           Subtarget->hasSSE2(), DAG);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+      }
+    }
+
+    // Is it a vector logical left shift?
+    if (NumElems == 2 && Idx == 1 &&
+        X86::isZeroNode(Op.getOperand(0)) &&
+        !X86::isZeroNode(Op.getOperand(1))) {
+      unsigned NumBits = VT.getSizeInBits();
+      return getVShift(true, VT,
+                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                   VT, Op.getOperand(1)),
+                       NumBits/2, DAG, *this, dl);
+    }
+
+    if (IsAllConstants) // Otherwise, it's better to do a constpool load.
+      return SDValue();
+
+    // Otherwise, if this is a vector with i32 or f32 elements, and the element
+    // is a non-constant being inserted into an element other than the low one,
+    // we can't use a constant pool load.  Instead, use SCALAR_TO_VECTOR (aka
+    // movd/movss) to move this into the low element, then shuffle it into
+    // place.
+    if (EVTBits == 32) {
+      Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+      // Turn it into a shuffle of zero and zero-extended scalar to vector.
+      Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+                                         Subtarget->hasSSE2(), DAG);
+      SmallVector<int, 8> MaskVec;
+      for (unsigned i = 0; i < NumElems; i++)
+        MaskVec.push_back(i == Idx ? 0 : 1);
+      return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+    }
+  }
+
+  // Splat is obviously ok. Let legalizer expand it to a shuffle.
+  if (Values.size() == 1) {
+    if (EVTBits == 32) {
+      // Instead of a shuffle like this:
+      // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+      // Check if it's possible to issue this instead.
+      // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+      unsigned Idx = CountTrailingZeros_32(NonZeros);
+      SDValue Item = Op.getOperand(Idx);
+      if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+        return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+    }
+    return SDValue();
+  }
+
+  // A vector full of immediates; various special cases are already
+  // handled, so this is best done with a single constant-pool load.
+  if (IsAllConstants)
+    return SDValue();
+
+  // Let legalizer expand 2-wide build_vectors.
+  if (EVTBits == 64) {
+    if (NumNonZero == 1) {
+      // One half is zero or undef.
+      unsigned Idx = CountTrailingZeros_32(NonZeros);
+      SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
+                                 Op.getOperand(Idx));
+      return getShuffleVectorZeroOrUndef(V2, Idx, true,
+                                         Subtarget->hasSSE2(), DAG);
+    }
+    return SDValue();
+  }
+
+  // If element VT is < 32 bits, convert it to inserts into a zero vector.
+  if (EVTBits == 8 && NumElems == 16) {
+    SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+                                        *this);
+    if (V.getNode()) return V;
+  }
+
+  if (EVTBits == 16 && NumElems == 8) {
+    SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+                                      *this);
+    if (V.getNode()) return V;
+  }
+
+  // If element VT is == 32 bits, turn it into a number of shuffles.
+  SmallVector<SDValue, 8> V;
+  V.resize(NumElems);
+  if (NumElems == 4 && NumZero > 0) {
+    for (unsigned i = 0; i < 4; ++i) {
+      bool isZero = !(NonZeros & (1 << i));
+      if (isZero)
+        V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+      else
+        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+    }
+
+    for (unsigned i = 0; i < 2; ++i) {
+      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+        default: break;
+        case 0:
+          V[i] = V[i*2];  // Must be a zero vector.
+          break;
+        case 1:
+          V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+          break;
+        case 2:
+          V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+          break;
+        case 3:
+          V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+          break;
+      }
+    }
+
+    SmallVector<int, 8> MaskVec;
+    bool Reverse = (NonZeros & 0x3) == 2;
+    for (unsigned i = 0; i < 2; ++i)
+      MaskVec.push_back(Reverse ? 1-i : i);
+    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+    for (unsigned i = 0; i < 2; ++i)
+      MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+    return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+  }
+
+  if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+    // Check for a build vector of consecutive loads.
+    for (unsigned i = 0; i < NumElems; ++i)
+      V[i] = Op.getOperand(i);
+
+    // Check for elements which are consecutive loads.
+    SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+    if (LD.getNode())
+      return LD;
+
+    // For SSE 4.1, use insertps to put the high elements into the low element.
+    if (getSubtarget()->hasSSE41()) {
+      SDValue Result;
+      if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+        Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+      else
+        Result = DAG.getUNDEF(VT);
+
+      for (unsigned i = 1; i < NumElems; ++i) {
+        if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
+                             Op.getOperand(i), DAG.getIntPtrConstant(i));
+      }
+      return Result;
+    }
+
+    // Otherwise, expand into a number of unpckl*, start by extending each of
+    // our (non-undef) elements to the full vector width with the element in the
+    // bottom slot of the vector (which generates no code for SSE).
+    for (unsigned i = 0; i < NumElems; ++i) {
+      if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+      else
+        V[i] = DAG.getUNDEF(VT);
+    }
+
+    // Next, we iteratively mix elements, e.g. for v4f32:
+    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
+    unsigned EltStride = NumElems >> 1;
+    while (EltStride != 0) {
+      for (unsigned i = 0; i < EltStride; ++i) {
+        // If V[i+EltStride] is undef and this is the first round of mixing,
+        // then it is safe to just drop this shuffle: V[i] is already in the
+        // right place, the one element (since it's the first round) being
+        // inserted as undef can be dropped.  This isn't safe for successive
+        // rounds because they will permute elements within both vectors.
+        if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+            EltStride == NumElems/2)
+          continue;
+
+        V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+      }
+      EltStride >>= 1;
+    }
+    return V[0];
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+  // We support concatenate two MMX registers and place them in a MMX
+  // register.  This is better than doing a stack convert.
+  DebugLoc dl = Op.getDebugLoc();
+  EVT ResVT = Op.getValueType();
+  assert(Op.getNumOperands() == 2);
+  assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
+         ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
+  int Mask[2];
+  SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
+  SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+  InVec = Op.getOperand(1);
+  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    unsigned NumElts = ResVT.getVectorNumElements();
+    VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
+                       InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
+  } else {
+    InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
+    SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+    Mask[0] = 0; Mask[1] = 2;
+    VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
+  }
+  return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+}
+
+// v8i16 shuffles - Prefer shuffles in the following order:
+// 1. [all]   pshuflw, pshufhw, optional move
+// 2. [ssse3] 1 x pshufb
+// 3. [ssse3] 2 x pshufb + 1 x por
+// 4. [all]   mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  SmallVector<int, 8> MaskVals;
+
+  // Determine if more than 1 of the words in each of the low and high quadwords
+  // of the result come from the same quadword of one of the two inputs.  Undef
+  // mask values count as coming from any quadword, for better codegen.
+  SmallVector<unsigned, 4> LoQuad(4);
+  SmallVector<unsigned, 4> HiQuad(4);
+  BitVector InputQuads(4);
+  for (unsigned i = 0; i < 8; ++i) {
+    SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+    int EltIdx = SVOp->getMaskElt(i);
+    MaskVals.push_back(EltIdx);
+    if (EltIdx < 0) {
+      ++Quad[0];
+      ++Quad[1];
+      ++Quad[2];
+      ++Quad[3];
+      continue;
+    }
+    ++Quad[EltIdx / 4];
+    InputQuads.set(EltIdx / 4);
+  }
+
+  int BestLoQuad = -1;
+  unsigned MaxQuad = 1;
+  for (unsigned i = 0; i < 4; ++i) {
+    if (LoQuad[i] > MaxQuad) {
+      BestLoQuad = i;
+      MaxQuad = LoQuad[i];
+    }
+  }
+
+  int BestHiQuad = -1;
+  MaxQuad = 1;
+  for (unsigned i = 0; i < 4; ++i) {
+    if (HiQuad[i] > MaxQuad) {
+      BestHiQuad = i;
+      MaxQuad = HiQuad[i];
+    }
+  }
+
+  // For SSSE3, If all 8 words of the result come from only 1 quadword of each
+  // of the two input vectors, shuffle them into one input vector so only a
+  // single pshufb instruction is necessary. If There are more than 2 input
+  // quads, disable the next transformation since it does not help SSSE3.
+  bool V1Used = InputQuads[0] || InputQuads[1];
+  bool V2Used = InputQuads[2] || InputQuads[3];
+  if (Subtarget->hasSSSE3()) {
+    if (InputQuads.count() == 2 && V1Used && V2Used) {
+      BestLoQuad = InputQuads.find_first();
+      BestHiQuad = InputQuads.find_next(BestLoQuad);
+    }
+    if (InputQuads.count() > 2) {
+      BestLoQuad = -1;
+      BestHiQuad = -1;
+    }
+  }
+
+  // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
+  // the shuffle mask.  If a quad is scored as -1, that means that it contains
+  // words from all 4 input quadwords.
+  SDValue NewV;
+  if (BestLoQuad >= 0 || BestHiQuad >= 0) {
+    SmallVector<int, 8> MaskV;
+    MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
+    MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+    NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+    NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
+
+    // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
+    // source words for the shuffle, to aid later transformations.
+    bool AllWordsInNewV = true;
+    bool InOrder[2] = { true, true };
+    for (unsigned i = 0; i != 8; ++i) {
+      int idx = MaskVals[i];
+      if (idx != (int)i)
+        InOrder[i/4] = false;
+      if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
+        continue;
+      AllWordsInNewV = false;
+      break;
+    }
+
+    bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
+    if (AllWordsInNewV) {
+      for (int i = 0; i != 8; ++i) {
+        int idx = MaskVals[i];
+        if (idx < 0)
+          continue;
+        idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+        if ((idx != i) && idx < 4)
+          pshufhw = false;
+        if ((idx != i) && idx > 3)
+          pshuflw = false;
+      }
+      V1 = NewV;
+      V2Used = false;
+      BestLoQuad = 0;
+      BestHiQuad = 1;
+    }
+
+    // If we've eliminated the use of V2, and the new mask is a pshuflw or
+    // pshufhw, that's as cheap as it gets.  Return the new shuffle.
+    if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
+      unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+      unsigned TargetMask = 0;
+      NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+                                  DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+      TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+                             X86::getShufflePSHUFLWImmediate(NewV.getNode());
+      V1 = NewV.getOperand(0);
+      return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
+    }
+  }
+
+  // If we have SSSE3, and all words of the result are from 1 input vector,
+  // case 2 is generated, otherwise case 3 is generated.  If no SSSE3
+  // is present, fall back to case 4.
+  if (Subtarget->hasSSSE3()) {
+    SmallVector<SDValue,16> pshufbMask;
+
+    // If we have elements from both input vectors, set the high bit of the
+    // shuffle mask element to zero out elements that come from V2 in the V1
+    // mask, and elements that come from V1 in the V2 mask, so that the two
+    // results can be OR'd together.
+    bool TwoInputs = V1Used && V2Used;
+    for (unsigned i = 0; i != 8; ++i) {
+      int EltIdx = MaskVals[i] * 2;
+      if (TwoInputs && (EltIdx >= 16)) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx,   MVT::i8));
+      pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
+    }
+    V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
+    V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    if (!TwoInputs)
+      return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+
+    // Calculate the shuffle mask for the second input, shuffle it, and
+    // OR it with the first shuffled input.
+    pshufbMask.clear();
+    for (unsigned i = 0; i != 8; ++i) {
+      int EltIdx = MaskVals[i] * 2;
+      if (EltIdx < 16) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+      pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
+    }
+    V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
+    V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+  }
+
+  // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
+  // and update MaskVals with new element order.
+  BitVector InOrder(8);
+  if (BestLoQuad >= 0) {
+    SmallVector<int, 8> MaskV;
+    for (int i = 0; i != 4; ++i) {
+      int idx = MaskVals[i];
+      if (idx < 0) {
+        MaskV.push_back(-1);
+        InOrder.set(i);
+      } else if ((idx / 4) == BestLoQuad) {
+        MaskV.push_back(idx & 3);
+        InOrder.set(i);
+      } else {
+        MaskV.push_back(-1);
+      }
+    }
+    for (unsigned i = 4; i != 8; ++i)
+      MaskV.push_back(i);
+    NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+                                &MaskV[0]);
+
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+      NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+                               NewV.getOperand(0),
+                               X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+                               DAG);
+  }
+
+  // If BestHi >= 0, generate a pshufhw to put the high elements in order,
+  // and update MaskVals with the new element order.
+  if (BestHiQuad >= 0) {
+    SmallVector<int, 8> MaskV;
+    for (unsigned i = 0; i != 4; ++i)
+      MaskV.push_back(i);
+    for (unsigned i = 4; i != 8; ++i) {
+      int idx = MaskVals[i];
+      if (idx < 0) {
+        MaskV.push_back(-1);
+        InOrder.set(i);
+      } else if ((idx / 4) == BestHiQuad) {
+        MaskV.push_back((idx & 3) + 4);
+        InOrder.set(i);
+      } else {
+        MaskV.push_back(-1);
+      }
+    }
+    NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+                                &MaskV[0]);
+
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+      NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+                              NewV.getOperand(0),
+                              X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+                              DAG);
+  }
+
+  // In case BestHi & BestLo were both -1, which means each quadword has a word
+  // from each of the four input quadwords, calculate the InOrder bitvector now
+  // before falling through to the insert/extract cleanup.
+  if (BestLoQuad == -1 && BestHiQuad == -1) {
+    NewV = V1;
+    for (int i = 0; i != 8; ++i)
+      if (MaskVals[i] < 0 || MaskVals[i] == i)
+        InOrder.set(i);
+  }
+
+  // The other elements are put in the right place using pextrw and pinsrw.
+  for (unsigned i = 0; i != 8; ++i) {
+    if (InOrder[i])
+      continue;
+    int EltIdx = MaskVals[i];
+    if (EltIdx < 0)
+      continue;
+    SDValue ExtOp = (EltIdx < 8)
+    ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+                  DAG.getIntPtrConstant(EltIdx))
+    : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+                  DAG.getIntPtrConstant(EltIdx - 8));
+    NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
+                       DAG.getIntPtrConstant(i));
+  }
+  return NewV;
+}
+
+// v16i8 shuffles - Prefer shuffles in the following order:
+// 1. [ssse3] 1 x pshufb
+// 2. [ssse3] 2 x pshufb + 1 x por
+// 3. [all]   v8i16 shuffle + N x pextrw + rotate + pinsrw
+static
+SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+                                 SelectionDAG &DAG,
+                                 const X86TargetLowering &TLI) {
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  SmallVector<int, 16> MaskVals;
+  SVOp->getMask(MaskVals);
+
+  // If we have SSSE3, case 1 is generated when all result bytes come from
+  // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is
+  // present, fall back to case 3.
+  // FIXME: kill V2Only once shuffles are canonizalized by getNode.
+  bool V1Only = true;
+  bool V2Only = true;
+  for (unsigned i = 0; i < 16; ++i) {
+    int EltIdx = MaskVals[i];
+    if (EltIdx < 0)
+      continue;
+    if (EltIdx < 16)
+      V2Only = false;
+    else
+      V1Only = false;
+  }
+
+  // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
+  if (TLI.getSubtarget()->hasSSSE3()) {
+    SmallVector<SDValue,16> pshufbMask;
+
+    // If all result elements are from one input vector, then only translate
+    // undef mask values to 0x80 (zero out result) in the pshufb mask.
+    //
+    // Otherwise, we have elements from both input vectors, and must zero out
+    // elements that come from V2 in the first mask, and V1 in the second mask
+    // so that we can OR them together.
+    bool TwoInputs = !(V1Only || V2Only);
+    for (unsigned i = 0; i != 16; ++i) {
+      int EltIdx = MaskVals[i];
+      if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+    }
+    // If all the elements are from V2, assign it to V1 and return after
+    // building the first pshufb.
+    if (V2Only)
+      V1 = V2;
+    V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    if (!TwoInputs)
+      return V1;
+
+    // Calculate the shuffle mask for the second input, shuffle it, and
+    // OR it with the first shuffled input.
+    pshufbMask.clear();
+    for (unsigned i = 0; i != 16; ++i) {
+      int EltIdx = MaskVals[i];
+      if (EltIdx < 16) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+    }
+    V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+  }
+
+  // No SSSE3 - Calculate in place words and then fix all out of place words
+  // With 0-16 extracts & inserts.  Worst case is 16 bytes out of order from
+  // the 16 different words that comprise the two doublequadword input vectors.
+  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+  V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
+  SDValue NewV = V2Only ? V2 : V1;
+  for (int i = 0; i != 8; ++i) {
+    int Elt0 = MaskVals[i*2];
+    int Elt1 = MaskVals[i*2+1];
+
+    // This word of the result is all undef, skip it.
+    if (Elt0 < 0 && Elt1 < 0)
+      continue;
+
+    // This word of the result is already in the correct place, skip it.
+    if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
+      continue;
+    if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+      continue;
+
+    SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
+    SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
+    SDValue InsElt;
+
+    // If Elt0 and Elt1 are defined, are consecutive, and can be load
+    // using a single extract together, load it and store it.
+    if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
+      InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+                           DAG.getIntPtrConstant(Elt1 / 2));
+      NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+                        DAG.getIntPtrConstant(i));
+      continue;
+    }
+
+    // If Elt1 is defined, extract it from the appropriate source.  If the
+    // source byte is not also odd, shift the extracted word left 8 bits
+    // otherwise clear the bottom 8 bits if we need to do an or.
+    if (Elt1 >= 0) {
+      InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+                           DAG.getIntPtrConstant(Elt1 / 2));
+      if ((Elt1 & 1) == 0)
+        InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
+                             DAG.getConstant(8,
+                                  TLI.getShiftAmountTy(InsElt.getValueType())));
+      else if (Elt0 >= 0)
+        InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
+                             DAG.getConstant(0xFF00, MVT::i16));
+    }
+    // If Elt0 is defined, extract it from the appropriate source.  If the
+    // source byte is not also even, shift the extracted word right 8 bits. If
+    // Elt1 was also defined, OR the extracted values together before
+    // inserting them in the result.
+    if (Elt0 >= 0) {
+      SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+                                    Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
+      if ((Elt0 & 1) != 0)
+        InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
+                              DAG.getConstant(8,
+                                 TLI.getShiftAmountTy(InsElt0.getValueType())));
+      else if (Elt1 >= 0)
+        InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
+                             DAG.getConstant(0x00FF, MVT::i16));
+      InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
+                         : InsElt0;
+    }
+    NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+                       DAG.getIntPtrConstant(i));
+  }
+  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
+static
+SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
+                                 SelectionDAG &DAG, DebugLoc dl) {
+  EVT VT = SVOp->getValueType(0);
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+  EVT NewVT;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: assert(false && "Unexpected!");
+  case MVT::v4f32: NewVT = MVT::v2f64; break;
+  case MVT::v4i32: NewVT = MVT::v2i64; break;
+  case MVT::v8i16: NewVT = MVT::v4i32; break;
+  case MVT::v16i8: NewVT = MVT::v4i32; break;
+  }
+
+  int Scale = NumElems / NewWidth;
+  SmallVector<int, 8> MaskVec;
+  for (unsigned i = 0; i < NumElems; i += Scale) {
+    int StartIdx = -1;
+    for (int j = 0; j < Scale; ++j) {
+      int EltIdx = SVOp->getMaskElt(i+j);
+      if (EltIdx < 0)
+        continue;
+      if (StartIdx == -1)
+        StartIdx = EltIdx - (EltIdx % Scale);
+      if (EltIdx != StartIdx + j)
+        return SDValue();
+    }
+    if (StartIdx == -1)
+      MaskVec.push_back(-1);
+    else
+      MaskVec.push_back(StartIdx / Scale);
+  }
+
+  V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+  V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+  return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+}
+
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDValue getVZextMovL(EVT VT, EVT OpVT,
+                            SDValue SrcOp, SelectionDAG &DAG,
+                            const X86Subtarget *Subtarget, DebugLoc dl) {
+  if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+    LoadSDNode *LD = NULL;
+    if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
+      LD = dyn_cast<LoadSDNode>(SrcOp);
+    if (!LD) {
+      // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+      // instead.
+      MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+      if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
+          SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+          SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
+          SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
+        // PR2108
+        OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+        return DAG.getNode(ISD::BITCAST, dl, VT,
+                           DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                                   OpVT,
+                                                   SrcOp.getOperand(0)
+                                                          .getOperand(0))));
+      }
+    }
+  }
+
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+                                 DAG.getNode(ISD::BITCAST, dl,
+                                             OpVT, SrcOp)));
+}
+
+/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
+/// shuffles.
+static SDValue
+LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  EVT VT = SVOp->getValueType(0);
+
+  SmallVector<std::pair<int, int>, 8> Locs;
+  Locs.resize(4);
+  SmallVector<int, 8> Mask1(4U, -1);
+  SmallVector<int, 8> PermMask;
+  SVOp->getMask(PermMask);
+
+  unsigned NumHi = 0;
+  unsigned NumLo = 0;
+  for (unsigned i = 0; i != 4; ++i) {
+    int Idx = PermMask[i];
+    if (Idx < 0) {
+      Locs[i] = std::make_pair(-1, -1);
+    } else {
+      assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
+      if (Idx < 4) {
+        Locs[i] = std::make_pair(0, NumLo);
+        Mask1[NumLo] = Idx;
+        NumLo++;
+      } else {
+        Locs[i] = std::make_pair(1, NumHi);
+        if (2+NumHi < 4)
+          Mask1[2+NumHi] = Idx;
+        NumHi++;
+      }
+    }
+  }
+
+  if (NumLo <= 2 && NumHi <= 2) {
+    // If no more than two elements come from either vector. This can be
+    // implemented with two shuffles. First shuffle gather the elements.
+    // The second shuffle, which takes the first shuffle as both of its
+    // vector operands, put the elements into the right order.
+    V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+    SmallVector<int, 8> Mask2(4U, -1);
+
+    for (unsigned i = 0; i != 4; ++i) {
+      if (Locs[i].first == -1)
+        continue;
+      else {
+        unsigned Idx = (i < 2) ? 0 : 4;
+        Idx += Locs[i].first * 2 + Locs[i].second;
+        Mask2[i] = Idx;
+      }
+    }
+
+    return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+  } else if (NumLo == 3 || NumHi == 3) {
+    // Otherwise, we must have three elements from one vector, call it X, and
+    // one element from the other, call it Y.  First, use a shufps to build an
+    // intermediate vector with the one element from Y and the element from X
+    // that will be in the same half in the final destination (the indexes don't
+    // matter). Then, use a shufps to build the final vector, taking the half
+    // containing the element from Y from the intermediate, and the other half
+    // from X.
+    if (NumHi == 3) {
+      // Normalize it so the 3 elements come from V1.
+      CommuteVectorShuffleMask(PermMask, VT);
+      std::swap(V1, V2);
+    }
+
+    // Find the element from V2.
+    unsigned HiIndex;
+    for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+      int Val = PermMask[HiIndex];
+      if (Val < 0)
+        continue;
+      if (Val >= 4)
+        break;
+    }
+
+    Mask1[0] = PermMask[HiIndex];
+    Mask1[1] = -1;
+    Mask1[2] = PermMask[HiIndex^1];
+    Mask1[3] = -1;
+    V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+    if (HiIndex >= 2) {
+      Mask1[0] = PermMask[0];
+      Mask1[1] = PermMask[1];
+      Mask1[2] = HiIndex & 1 ? 6 : 4;
+      Mask1[3] = HiIndex & 1 ? 4 : 6;
+      return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+    } else {
+      Mask1[0] = HiIndex & 1 ? 2 : 0;
+      Mask1[1] = HiIndex & 1 ? 0 : 2;
+      Mask1[2] = PermMask[2];
+      Mask1[3] = PermMask[3];
+      if (Mask1[2] >= 0)
+        Mask1[2] += 4;
+      if (Mask1[3] >= 0)
+        Mask1[3] += 4;
+      return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+    }
+  }
+
+  // Break it into (shuffle shuffle_hi, shuffle_lo).
+  Locs.clear();
+  Locs.resize(4);
+  SmallVector<int,8> LoMask(4U, -1);
+  SmallVector<int,8> HiMask(4U, -1);
+
+  SmallVector<int,8> *MaskPtr = &LoMask;
+  unsigned MaskIdx = 0;
+  unsigned LoIdx = 0;
+  unsigned HiIdx = 2;
+  for (unsigned i = 0; i != 4; ++i) {
+    if (i == 2) {
+      MaskPtr = &HiMask;
+      MaskIdx = 1;
+      LoIdx = 0;
+      HiIdx = 2;
+    }
+    int Idx = PermMask[i];
+    if (Idx < 0) {
+      Locs[i] = std::make_pair(-1, -1);
+    } else if (Idx < 4) {
+      Locs[i] = std::make_pair(MaskIdx, LoIdx);
+      (*MaskPtr)[LoIdx] = Idx;
+      LoIdx++;
+    } else {
+      Locs[i] = std::make_pair(MaskIdx, HiIdx);
+      (*MaskPtr)[HiIdx] = Idx;
+      HiIdx++;
+    }
+  }
+
+  SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
+  SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
+  SmallVector<int, 8> MaskOps;
+  for (unsigned i = 0; i != 4; ++i) {
+    if (Locs[i].first == -1) {
+      MaskOps.push_back(-1);
+    } else {
+      unsigned Idx = Locs[i].first * 4 + Locs[i].second;
+      MaskOps.push_back(Idx);
+    }
+  }
+  return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+}
+
+static bool MayFoldVectorLoad(SDValue V) {
+  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    V = V.getOperand(0);
+  if (MayFoldLoad(V))
+    return true;
+  return false;
+}
+
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    V = V.getOperand(0);
+  if (ISD::isNormalLoad(V.getNode()))
+    return true;
+  return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+                                         const TargetLowering &TLI) {
+  EVT VT = V.getValueType();
+  ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+  // Be sure that the vector shuffle is present in a pattern like this:
+  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+  if (!V.hasOneUse())
+    return false;
+
+  SDNode *N = *V.getNode()->use_begin();
+  if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return false;
+
+  SDValue EltNo = N->getOperand(1);
+  if (!isa<ConstantSDNode>(EltNo))
+    return false;
+
+  // If the bit convert changed the number of elements, it is unsafe
+  // to examine the mask.
+  bool HasShuffleIntoBitcast = false;
+  if (V.getOpcode() == ISD::BITCAST) {
+    EVT SrcVT = V.getOperand(0).getValueType();
+    if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+      return false;
+    V = V.getOperand(0);
+    HasShuffleIntoBitcast = true;
+  }
+
+  // Select the input vector, guarding against out of range extract vector.
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+  V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+  // Skip one more bit_convert if necessary
+  if (V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+
+  if (ISD::isNormalLoad(V.getNode())) {
+    // Is the original load suitable?
+    LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+    // FIXME: avoid the multi-use bug that is preventing lots of
+    // of foldings to be detected, this is still wrong of course, but
+    // give the temporary desired behavior, and if it happens that
+    // the load has real more uses, during isel it will not fold, and
+    // will generate poor code.
+    if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+      return false;
+
+    if (!HasShuffleIntoBitcast)
+      return true;
+
+    // If there's a bitcast before the shuffle, check if the load type and
+    // alignment is valid.
+    unsigned Align = LN0->getAlignment();
+    unsigned NewAlign =
+      TLI.getTargetData()->getABITypeAlignment(
+                                    VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+      return false;
+  }
+
+  return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+
+  // Canonizalize to v2f64.
+  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+                                          V1, DAG));
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+                        bool HasSSE2) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+  if (HasSSE2 && VT == MVT::v2f64)
+    return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+  // v4f32 or v4i32
+  return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+         "unsupported shuffle type");
+
+  if (V2.getOpcode() == ISD::UNDEF)
+    V2 = V1;
+
+  // v4i32 or v4f32
+  return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  unsigned NumElems = VT.getVectorNumElements();
+
+  // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+  // operand of these instructions is only memory, so check if there's a
+  // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+  // same masks.
+  bool CanFoldLoad = false;
+
+  // Trivial case, when V2 comes from a load.
+  if (MayFoldVectorLoad(V2))
+    CanFoldLoad = true;
+
+  // When V1 is a load, it can be folded later into a store in isel, example:
+  //  (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+  //    turns into:
+  //  (MOVLPSmr addr:$src1, VR128:$src2)
+  // So, recognize this potential and also use MOVLPS or MOVLPD
+  if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+    CanFoldLoad = true;
+
+  // Both of them can't be memory operations though.
+  if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+    CanFoldLoad = false;
+
+  if (CanFoldLoad) {
+    if (HasSSE2 && NumElems == 2)
+      return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+    if (NumElems == 4)
+      return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+  }
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  // movl and movlp will both match v2i64, but v2i64 is never matched by
+  // movl earlier because we make it strict to avoid messing with the movlp load
+  // folding logic (see the code above getMOVLP call). Match it here then,
+  // this is horrible, but will stay like this until we move all shuffle
+  // matching to x86 specific nodes. Note that for the 1st condition all
+  // types are matched with movsd.
+  if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+    return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+  else if (HasSSE2)
+    return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+  assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+  // Invert the operand order and use SHUFPS to match it.
+  return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+                              X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
+  switch(VT.getSimpleVT().SimpleTy) {
+  case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+  case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+  case MVT::v4f32:
+    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
+  case MVT::v2f64:
+    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+  case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+  case MVT::v4f64: return X86ISD::VUNPCKLPDY;
+  case MVT::v16i8: return X86ISD::PUNPCKLBW;
+  case MVT::v8i16: return X86ISD::PUNPCKLWD;
+  default:
+    llvm_unreachable("Unknown type for unpckl");
+  }
+  return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+  switch(VT.getSimpleVT().SimpleTy) {
+  case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+  case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+  case MVT::v4f32: return X86ISD::UNPCKHPS;
+  case MVT::v2f64: return X86ISD::UNPCKHPD;
+  case MVT::v16i8: return X86ISD::PUNPCKHBW;
+  case MVT::v8i16: return X86ISD::PUNPCKHWD;
+  default:
+    llvm_unreachable("Unknown type for unpckh");
+  }
+  return 0;
+}
+
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+                               const TargetLowering &TLI,
+                               const X86Subtarget *Subtarget) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+
+  if (isZeroShuffle(SVOp))
+    return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+
+  // Handle splat operations
+  if (SVOp->isSplat()) {
+    // Special case, this is the only place now where it's
+    // allowed to return a vector_shuffle operation without
+    // using a target specific node, because *hopefully* it
+    // will be optimized away by the dag combiner.
+    if (VT.getVectorNumElements() <= 4 &&
+        CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
+      return Op;
+
+    // Handle splats by matching through known masks
+    if (VT.getVectorNumElements() <= 4)
+      return SDValue();
+
+    // Canonicalize all of the remaining to v4f32.
+    return PromoteSplat(SVOp, DAG);
+  }
+
+  // If the shuffle can be profitably rewritten as a narrower shuffle, then
+  // do it!
+  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+    if (NewOp.getNode())
+      return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
+  } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+    // FIXME: Figure out a cleaner way to do this.
+    // Try to make use of movq to zero out the top part.
+    if (ISD::isBuildVectorAllZeros(V2.getNode())) {
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+      if (NewOp.getNode()) {
+        if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
+          return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+                              DAG, Subtarget, dl);
+      }
+    } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+      if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
+        return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+                            DAG, Subtarget, dl);
+    }
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned NumElems = VT.getVectorNumElements();
+  bool isMMX = VT.getSizeInBits() == 64;
+  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+  bool V1IsSplat = false;
+  bool V2IsSplat = false;
+  bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+  bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+  bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+  // Shuffle operations on MMX not supported.
+  if (isMMX)
+    return Op;
+
+  // Vector shuffle lowering takes 3 steps:
+  //
+  // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+  //    narrowing and commutation of operands should be handled.
+  // 2) Matching of shuffles with known shuffle masks to x86 target specific
+  //    shuffle nodes.
+  // 3) Rewriting of unmatched masks into new generic shuffle operations,
+  //    so the shuffle can be broken into other shuffles and the legalizer can
+  //    try the lowering again.
+  //
+  // The general ideia is that no vector_shuffle operation should be left to
+  // be matched during isel, all of them must be converted to a target specific
+  // node here.
+
+  // Normalize the input vectors. Here splats, zeroed vectors, profitable
+  // narrowing and commutation of operands should be handled. The actual code
+  // doesn't include all of those, work in progress...
+  SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+  if (NewOp.getNode())
+    return NewOp;
+
+  // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+  // unpckh_undef). Only use pshufd if speed is more important than size.
+  if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
+  if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+      RelaxedMayFoldVectorLoad(V1))
+    return getMOVDDup(Op, dl, V1, DAG);
+
+  if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
+
+  // Use to match splats
+  if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+      (VT == MVT::v2f64 || VT == MVT::v2i64))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  if (X86::isPSHUFDMask(SVOp)) {
+    // The actual implementation will match the mask in the if above and then
+    // during isel it can match several different instructions, not only pshufd
+    // as its name says, sad but true, emulate the behavior for now...
+    if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+        return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+    unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+    if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+    if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+      return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
+                                  TargetMask, DAG);
+
+    if (VT == MVT::v4f32)
+      return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
+                                  TargetMask, DAG);
+  }
+
+  // Check if this can be converted into a logical shift.
+  bool isLeft = false;
+  unsigned ShAmt = 0;
+  SDValue ShVal;
+  bool isShift = getSubtarget()->hasSSE2() &&
+    isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+  if (isShift && ShVal.hasOneUse()) {
+    // If the shifted value has multiple uses, it may be cheaper to use
+    // v_set0 + movlhps or movhlps, etc.
+    EVT EltVT = VT.getVectorElementType();
+    ShAmt *= EltVT.getSizeInBits();
+    return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+  }
+
+  if (X86::isMOVLMask(SVOp)) {
+    if (V1IsUndef)
+      return V2;
+    if (ISD::isBuildVectorAllZeros(V1.getNode()))
+      return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
+    if (!X86::isMOVLPMask(SVOp)) {
+      if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+        return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+      if (VT == MVT::v4i32 || VT == MVT::v4f32)
+        return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+    }
+  }
+
+  // FIXME: fold these into legal mask.
+  if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+    return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+  if (X86::isMOVHLPSMask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
+
+  if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+  if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+  if (X86::isMOVLPMask(SVOp))
+    return getMOVLP(Op, dl, DAG, HasSSE2);
+
+  if (ShouldXformToMOVHLPS(SVOp) ||
+      ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+    return CommuteVectorShuffle(SVOp, DAG);
+
+  if (isShift) {
+    // No better options. Use a vshl / vsrl.
+    EVT EltVT = VT.getVectorElementType();
+    ShAmt *= EltVT.getSizeInBits();
+    return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+  }
+
+  bool Commuted = false;
+  // FIXME: This should also accept a bitcast of a splat?  Be careful, not
+  // 1,1,1,1 -> v8i16 though.
+  V1IsSplat = isSplatVector(V1.getNode());
+  V2IsSplat = isSplatVector(V2.getNode());
+
+  // Canonicalize the splat or undef, if present, to be on the RHS.
+  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+    Op = CommuteVectorShuffle(SVOp, DAG);
+    SVOp = cast<ShuffleVectorSDNode>(Op);
+    V1 = SVOp->getOperand(0);
+    V2 = SVOp->getOperand(1);
+    std::swap(V1IsSplat, V2IsSplat);
+    std::swap(V1IsUndef, V2IsUndef);
+    Commuted = true;
+  }
+
+  if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+    // Shuffling low element of v1 into undef, just return v1.
+    if (V2IsUndef)
+      return V1;
+    // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
+    // the instruction selector will not match, so get a canonical MOVL with
+    // swapped operands to undo the commute.
+    return getMOVL(DAG, dl, VT, V2, V1);
+  }
+
+  if (X86::isUNPCKLMask(SVOp))
+    return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                dl, VT, V1, V2, DAG);
+
+  if (X86::isUNPCKHMask(SVOp))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+
+  if (V2IsSplat) {
+    // Normalize mask so all entries that point to V2 points to its first
+    // element then try to match unpck{h|l} again. If match, return a
+    // new vector_shuffle with the corrected mask.
+    SDValue NewMask = NormalizeMask(SVOp, DAG);
+    ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
+    if (NSVOp != SVOp) {
+      if (X86::isUNPCKLMask(NSVOp, true)) {
+        return NewMask;
+      } else if (X86::isUNPCKHMask(NSVOp, true)) {
+        return NewMask;
+      }
+    }
+  }
+
+  if (Commuted) {
+    // Commute is back and try unpck* again.
+    // FIXME: this seems wrong.
+    SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
+    ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+
+    if (X86::isUNPCKLMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                  dl, VT, V2, V1, DAG);
+
+    if (X86::isUNPCKHMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+  }
+
+  // Normalize the node to match x86 shuffle ops if needed
+  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+    return CommuteVectorShuffle(SVOp, DAG);
+
+  // The checks below are all present in isShuffleMaskLegal, but they are
+  // inlined here right now to enable us to directly emit target specific
+  // nodes, and remove one by one until they don't return Op anymore.
+  SmallVector<int, 16> M;
+  SVOp->getMask(M);
+
+  if (isPALIGNRMask(M, VT, HasSSSE3))
+    return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+                                X86::getShufflePALIGNRImmediate(SVOp),
+                                DAG);
+
+  if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+      SVOp->getSplatIndex() == 0 && V2IsUndef) {
+    if (VT == MVT::v2f64) {
+      X86ISD::NodeType Opcode =
+        getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+      return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
+    }
+    if (VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+  }
+
+  if (isPSHUFHWMask(M, VT))
+    return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+                                X86::getShufflePSHUFHWImmediate(SVOp),
+                                DAG);
+
+  if (isPSHUFLWMask(M, VT))
+    return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+                                X86::getShufflePSHUFLWImmediate(SVOp),
+                                DAG);
+
+  if (isSHUFPMask(M, VT)) {
+    unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+    if (VT == MVT::v4f32 || VT == MVT::v4i32)
+      return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+                                  TargetMask, DAG);
+    if (VT == MVT::v2f64 || VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+                                  TargetMask, DAG);
+  }
+
+  if (X86::isUNPCKL_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                  dl, VT, V1, V1, DAG);
+  if (X86::isUNPCKH_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+  if (VT == MVT::v8i16) {
+    SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
+    if (NewOp.getNode())
+      return NewOp;
+  }
+
+  if (VT == MVT::v16i8) {
+    SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+    if (NewOp.getNode())
+      return NewOp;
+  }
+
+  // Handle all 4 wide cases with a number of shuffles.
+  if (NumElems == 4)
+    return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  if (VT.getSizeInBits() == 8) {
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
+                                    Op.getOperand(0), Op.getOperand(1));
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+                                    DAG.getValueType(VT));
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+  } else if (VT.getSizeInBits() == 16) {
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    // If Idx is 0, it's cheaper to do a move instead of a pextrw.
+    if (Idx == 0)
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                     DAG.getNode(ISD::BITCAST, dl,
+                                                 MVT::v4i32,
+                                                 Op.getOperand(0)),
+                                     Op.getOperand(1)));
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+                                    Op.getOperand(0), Op.getOperand(1));
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+                                    DAG.getValueType(VT));
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+  } else if (VT == MVT::f32) {
+    // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+    // the result back to FR32 register. It's only worth matching if the
+    // result has a single use which is a store or a bitcast to i32.  And in
+    // the case of a store, it's not worth it if the index is a constant 0,
+    // because a MOVSSmr can be used instead, which is smaller and faster.
+    if (!Op.hasOneUse())
+      return SDValue();
+    SDNode *User = *Op.getNode()->use_begin();
+    if ((User->getOpcode() != ISD::STORE ||
+         (isa<ConstantSDNode>(Op.getOperand(1)) &&
+          cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+        (User->getOpcode() != ISD::BITCAST ||
+         User->getValueType(0) != MVT::i32))
+      return SDValue();
+    SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
+                                              Op.getOperand(0)),
+                                              Op.getOperand(1));
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
+  } else if (VT == MVT::i32) {
+    // ExtractPS works with constant index.
+    if (isa<ConstantSDNode>(Op.getOperand(1)))
+      return Op;
+  }
+  return SDValue();
+}
+
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  if (!isa<ConstantSDNode>(Op.getOperand(1)))
+    return SDValue();
+
+  SDValue Vec = Op.getOperand(0);
+  EVT VecVT = Vec.getValueType();
+
+  // If this is a 256-bit vector result, first extract the 128-bit
+  // vector and then extract from the 128-bit vector.
+  if (VecVT.getSizeInBits() > 128) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    unsigned NumElems = VecVT.getVectorNumElements();
+    SDValue Idx = Op.getOperand(1);
+
+    if (!isa<ConstantSDNode>(Idx))
+      return SDValue();
+
+    unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    // Get the 128-bit vector.
+    bool Upper = IdxVal >= ExtractNumElems;
+    Vec = Extract128BitVector(Vec, Idx, DAG, dl);
+
+    // Extract from it.
+    SDValue ScaledIdx = Idx;
+    if (Upper)
+      ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
+                              DAG.getConstant(ExtractNumElems,
+                                              Idx.getValueType()));
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+                       ScaledIdx);
+  }
+
+  assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
+  if (Subtarget->hasSSE41()) {
+    SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+    if (Res.getNode())
+      return Res;
+  }
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  // TODO: handle v16i8.
+  if (VT.getSizeInBits() == 16) {
+    SDValue Vec = Op.getOperand(0);
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    if (Idx == 0)
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                     DAG.getNode(ISD::BITCAST, dl,
+                                                 MVT::v4i32, Vec),
+                                     Op.getOperand(1)));
+    // Transform it so it match pextrw which produces a 32-bit result.
+    EVT EltVT = MVT::i32;
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
+                                    Op.getOperand(0), Op.getOperand(1));
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
+                                    DAG.getValueType(VT));
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+  } else if (VT.getSizeInBits() == 32) {
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    if (Idx == 0)
+      return Op;
+
+    // SHUFPS the element to the lowest double word, then movss.
+    int Mask[4] = { Idx, -1, -1, -1 };
+    EVT VVT = Op.getOperand(0).getValueType();
+    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+                                       DAG.getUNDEF(VVT), Mask);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+                       DAG.getIntPtrConstant(0));
+  } else if (VT.getSizeInBits() == 64) {
+    // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+    // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+    //        to match extract_elt for f64.
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    if (Idx == 0)
+      return Op;
+
+    // UNPCKHPD the element to the lowest double word, then movsd.
+    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+    int Mask[2] = { 1, -1 };
+    EVT VVT = Op.getOperand(0).getValueType();
+    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+                                       DAG.getUNDEF(VVT), Mask);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+                       DAG.getIntPtrConstant(0));
+  }
+
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2 = Op.getOperand(2);
+
+  if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
+      isa<ConstantSDNode>(N2)) {
+    unsigned Opc;
+    if (VT == MVT::v8i16)
+      Opc = X86ISD::PINSRW;
+    else if (VT == MVT::v16i8)
+      Opc = X86ISD::PINSRB;
+    else
+      Opc = X86ISD::PINSRB;
+
+    // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+    // argument.
+    if (N1.getValueType() != MVT::i32)
+      N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+    if (N2.getValueType() != MVT::i32)
+      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+    return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+  } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+    // Bits [7:6] of the constant are the source select.  This will always be
+    //  zero here.  The DAG Combiner may combine an extract_elt index into these
+    //  bits.  For example (insert (extract, 3), 2) could be matched by putting
+    //  the '3' into bits [7:6] of X86ISD::INSERTPS.
+    // Bits [5:4] of the constant are the destination select.  This is the
+    //  value of the incoming immediate.
+    // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
+    //   combine either bitwise AND or insert of float 0.0 to set these bits.
+    N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+    // Create this as a scalar to vector..
+    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+    return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+  } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+    // PINSR* works with constant index.
+    return Op;
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2 = Op.getOperand(2);
+
+  // If this is a 256-bit vector result, first insert into a 128-bit
+  // vector and then insert into the 256-bit vector.
+  if (VT.getSizeInBits() > 128) {
+    if (!isa<ConstantSDNode>(N2))
+      return SDValue();
+
+    // Get the 128-bit vector.
+    unsigned NumElems = VT.getVectorNumElements();
+    unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+    bool Upper = IdxVal >= NumElems / 2;
+
+    SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+
+    // Insert into it.
+    SDValue ScaledN2 = N2;
+    if (Upper)
+      ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
+                             DAG.getConstant(NumElems /
+                                             (VT.getSizeInBits() / 128),
+                                             N2.getValueType()));
+    Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
+                     N1, ScaledN2);
+
+    // Insert the 128-bit vector
+    // FIXME: Why UNDEF?
+    return Insert128BitVector(N0, Op, N2, DAG, dl);
+  }
+
+  if (Subtarget->hasSSE41())
+    return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
+  if (EltVT == MVT::i8)
+    return SDValue();
+
+  if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+    // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+    // as its second argument.
+    if (N1.getValueType() != MVT::i32)
+      N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+    if (N2.getValueType() != MVT::i32)
+      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+    return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT OpVT = Op.getValueType();
+
+  // If this is a 256-bit vector result, first insert into a 128-bit
+  // vector and then insert into the 256-bit vector.
+  if (OpVT.getSizeInBits() > 128) {
+    // Insert into a 128-bit vector.
+    EVT VT128 = EVT::getVectorVT(*Context,
+                                 OpVT.getVectorElementType(),
+                                 OpVT.getVectorNumElements() / 2);
+
+    Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+    // Insert the 128-bit vector.
+    return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+                              DAG.getConstant(0, MVT::i32),
+                              DAG, dl);
+  }
+
+  if (Op.getValueType() == MVT::v1i64 &&
+      Op.getOperand(0).getValueType() == MVT::i64)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
+  SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
+  assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+         "Expected an SSE type!");
+  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+                     DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode.  This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasAVX()) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    SDValue Vec = Op.getNode()->getOperand(0);
+    SDValue Idx = Op.getNode()->getOperand(1);
+
+    if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+        && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+        return Extract128BitVector(Vec, Idx, DAG, dl);
+    }
+  }
+  return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode.  This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasAVX()) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    SDValue Vec = Op.getNode()->getOperand(0);
+    SDValue SubVec = Op.getNode()->getOperand(1);
+    SDValue Idx = Op.getNode()->getOperand(2);
+
+    if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+        && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+      return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+    }
+  }
+  return SDValue();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDValue
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+                                             CP->getAlignment(),
+                                             CP->getOffset(), OpFlag);
+  DebugLoc DL = CP->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc(), getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+                                          OpFlag);
+  DebugLoc DL = JT->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag)
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc(), getPointerTy()),
+                         Result);
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
+  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+
+  DebugLoc DL = Op.getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+
+  // With PIC, the address is actually $g + Offset.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+      !Subtarget->is64Bit()) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc(), getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+  // Create the TargetBlockAddressAddress node.
+  unsigned char OpFlags =
+    Subtarget->ClassifyBlockAddressReference();
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
+                                       /*isTarget=*/true, OpFlags);
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+  else
+    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+  // With PIC, the address is actually $g + Offset.
+  if (isGlobalRelativeToPICBase(OpFlags)) {
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+                                      int64_t Offset,
+                                      SelectionDAG &DAG) const {
+  // Create the TargetGlobalAddress node, folding in the constant
+  // offset if it is legal.
+  unsigned char OpFlags =
+    Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  SDValue Result;
+  if (OpFlags == X86II::MO_NO_FLAG &&
+      X86::isOffsetSuitableForCodeModel(Offset, M)) {
+    // A direct static reference to a global.
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+    Offset = 0;
+  } else {
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+  }
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+  else
+    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+  // With PIC, the address is actually $g + Offset.
+  if (isGlobalRelativeToPICBase(OpFlags)) {
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+                         Result);
+  }
+
+  // For globals that require a load from a stub to get the address, emit the
+  // load.
+  if (isGlobalStubReference(OpFlags))
+    Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, 0);
+
+  // If there was a non-zero offset that we didn't fold, create an explicit
+  // addition for it.
+  if (Offset != 0)
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+                         DAG.getConstant(Offset, getPointerTy()));
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+  return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
+}
+
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+           SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
+           unsigned char OperandFlags) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  DebugLoc dl = GA->getDebugLoc();
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+                                           GA->getValueType(0),
+                                           GA->getOffset(),
+                                           OperandFlags);
+  if (InFlag) {
+    SDValue Ops[] = { Chain,  TGA, *InFlag };
+    Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+  } else {
+    SDValue Ops[]  = { Chain, TGA };
+    Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+  }
+
+  // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+  MFI->setAdjustsStack(true);
+
+  SDValue Flag = Chain.getValue(1);
+  return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                const EVT PtrVT) {
+  SDValue InFlag;
+  DebugLoc dl = GA->getDebugLoc();  // ? function entry point might be better
+  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+                                     DAG.getNode(X86ISD::GlobalBaseReg,
+                                                 DebugLoc(), PtrVT), InFlag);
+  InFlag = Chain.getValue(1);
+
+  return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                const EVT PtrVT) {
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::RAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
+// "local exec" model.
+static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                   const EVT PtrVT, TLSModel::Model model,
+                                   bool is64Bit) {
+  DebugLoc dl = GA->getDebugLoc();
+
+  // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+  Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+                                                         is64Bit ? 257 : 256));
+
+  SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                                      DAG.getIntPtrConstant(0),
+                                      MachinePointerInfo(Ptr), false, false, 0);
+
+  unsigned char OperandFlags = 0;
+  // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
+  // initialexec.
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (model == TLSModel::LocalExec) {
+    OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+  } else if (is64Bit) {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_GOTTPOFF;
+    WrapperKind = X86ISD::WrapperRIP;
+  } else {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_INDNTPOFF;
+  }
+
+  // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
+  // exec)
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+                                           GA->getValueType(0),
+                                           GA->getOffset(), OperandFlags);
+  SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+  if (model == TLSModel::InitialExec)
+    Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
+                         MachinePointerInfo::getGOT(), false, false, 0);
+
+  // The address of the thread local variable is the add of the thread
+  // pointer with the offset of the variable.
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GA->getGlobal();
+
+  if (Subtarget->isTargetELF()) {
+    // TODO: implement the "local dynamic" model
+    // TODO: implement the "initial exec"model for pic executables
+
+    // If GV is an alias then use the aliasee for determining
+    // thread-localness.
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GV = GA->resolveAliasedGlobal(false);
+
+    TLSModel::Model model
+      = getTLSModel(GV, getTargetMachine().getRelocationModel());
+
+    switch (model) {
+      case TLSModel::GeneralDynamic:
+      case TLSModel::LocalDynamic: // not implemented
+        if (Subtarget->is64Bit())
+          return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+        return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+      case TLSModel::InitialExec:
+      case TLSModel::LocalExec:
+        return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+                                   Subtarget->is64Bit());
+    }
+  } else if (Subtarget->isTargetDarwin()) {
+    // Darwin only has one model of TLS.  Lower to that.
+    unsigned char OpFlag = 0;
+    unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+                           X86ISD::WrapperRIP : X86ISD::Wrapper;
+
+    // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+    // global base reg.
+    bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
+                  !Subtarget->is64Bit();
+    if (PIC32)
+      OpFlag = X86II::MO_TLVP_PIC_BASE;
+    else
+      OpFlag = X86II::MO_TLVP;
+    DebugLoc DL = Op.getDebugLoc();
+    SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+                                                GA->getValueType(0),
+                                                GA->getOffset(), OpFlag);
+    SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+    // With PIC32, the address is actually $g + Offset.
+    if (PIC32)
+      Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                           DAG.getNode(X86ISD::GlobalBaseReg,
+                                       DebugLoc(), getPointerTy()),
+                           Offset);
+
+    // Lowering the machine isd will make sure everything is in the right
+    // location.
+    SDValue Chain = DAG.getEntryNode();
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Args[] = { Chain, Offset };
+    Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
+    // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    MFI->setAdjustsStack(true);
+
+    // And our return value (tls address) is in the standard call return value
+    // location.
+    unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+    return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+  }
+
+  assert(false &&
+         "TLS not implemented for this target.");
+
+  llvm_unreachable("Unreachable");
+  return SDValue();
+}
+
+
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+                                     DAG.getConstant(VTBits - 1, MVT::i8))
+                       : DAG.getConstant(0, VT);
+
+  SDValue Tmp2, Tmp3;
+  if (Op.getOpcode() == ISD::SHL_PARTS) {
+    Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  } else {
+    Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+    Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+  }
+
+  SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+                                DAG.getConstant(VTBits, MVT::i8));
+  SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+                             AndNode, DAG.getConstant(0, MVT::i8));
+
+  SDValue Hi, Lo;
+  SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+  SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+  SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
+
+  if (Op.getOpcode() == ISD::SHL_PARTS) {
+    Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+    Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+  } else {
+    Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+    Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+  }
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  EVT SrcVT = Op.getOperand(0).getValueType();
+
+  if (SrcVT.isVector())
+    return SDValue();
+
+  assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+         "Unknown SINT_TO_FP to lower!");
+
+  // These are really Legal; return the operand so the caller accepts it as
+  // Legal.
+  if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+    return Op;
+  if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+      Subtarget->is64Bit()) {
+    return Op;
+  }
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Size = SrcVT.getSizeInBits()/8;
+  MachineFunction &MF = DAG.getMachineFunction();
+  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                               StackSlot,
+                               MachinePointerInfo::getFixedStack(SSFI),
+                               false, false, 0);
+  return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+}
+
+SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
+                                     SDValue StackSlot,
+                                     SelectionDAG &DAG) const {
+  // Build the FILD
+  DebugLoc DL = Op.getDebugLoc();
+  SDVTList Tys;
+  bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+  if (useSSE)
+    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
+  else
+    Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+  unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
+  MachineMemOperand *MMO;
+  if (FI) {
+    int SSFI = FI->getIndex();
+    MMO =
+      DAG.getMachineFunction()
+      .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOLoad, ByteSize, ByteSize);
+  } else {
+    MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+    StackSlot = StackSlot.getOperand(1);
+  }
+  SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
+  SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+                                           X86ISD::FILD, DL,
+                                           Tys, Ops, array_lengthof(Ops),
+                                           SrcVT, MMO);
+
+  if (useSSE) {
+    Chain = Result.getValue(1);
+    SDValue InFlag = Result.getValue(2);
+
+    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+    // shouldn't be necessary except that RFP cannot be live across
+    // multiple blocks. When stackifier is fixed, they can be uncoupled.
+    MachineFunction &MF = DAG.getMachineFunction();
+    unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+    int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
+    SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+    Tys = DAG.getVTList(MVT::Other);
+    SDValue Ops[] = {
+      Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
+    };
+    MachineMemOperand *MMO =
+      DAG.getMachineFunction()
+      .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+    Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+                                    Ops, array_lengthof(Ops),
+                                    Op.getValueType(), MMO);
+    Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
+                         false, false, 0);
+  }
+
+  return Result;
+}
+
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // This algorithm is not obvious. Here it is in C code, more or less:
+  /*
+    double uint64_to_double( uint32_t hi, uint32_t lo ) {
+      static const __m128i exp = { 0x4330000045300000ULL, 0 };
+      static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+      // Copy ints to xmm registers.
+      __m128i xh = _mm_cvtsi32_si128( hi );
+      __m128i xl = _mm_cvtsi32_si128( lo );
+
+      // Combine into low half of a single xmm register.
+      __m128i x = _mm_unpacklo_epi32( xh, xl );
+      __m128d d;
+      double sd;
+
+      // Merge in appropriate exponents to give the integer bits the right
+      // magnitude.
+      x = _mm_unpacklo_epi32( x, exp );
+
+      // Subtract away the biases to deal with the IEEE-754 double precision
+      // implicit 1.
+      d = _mm_sub_pd( (__m128d) x, bias );
+
+      // All conversions up to here are exact. The correctly rounded result is
+      // calculated using the current rounding mode using the following
+      // horizontal add.
+      d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+      _mm_store_sd( &sd, d );   // Because we are returning doubles in XMM, this
+                                // store doesn't really need to be here (except
+                                // maybe to zero the other double)
+      return sd;
+    }
+  */
+
+  DebugLoc dl = Op.getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
+
+  // Build some magic constants.
+  std::vector<Constant*> CV0;
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  Constant *C0 = ConstantVector::get(CV0);
+  SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+
+  std::vector<Constant*> CV1;
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+  Constant *C1 = ConstantVector::get(CV1);
+  SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+
+  SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+                            DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                        Op.getOperand(0),
+                                        DAG.getIntPtrConstant(1)));
+  SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+                            DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                        Op.getOperand(0),
+                                        DAG.getIntPtrConstant(0)));
+  SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+  SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
+  SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
+  SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+
+  // Add the halves; easiest way is to swap them into another reg first.
+  int ShufMask[2] = { 1, -1 };
+  SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
+                                      DAG.getUNDEF(MVT::v2f64), ShufMask);
+  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+                     DAG.getIntPtrConstant(0));
+}
+
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // FP constant to bias correct the final result.
+  SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+                                   MVT::f64);
+
+  // Load the 32-bit value into an XMM register.
+  SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+                             DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                         Op.getOperand(0),
+                                         DAG.getIntPtrConstant(0)));
+
+  Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                     DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
+                     DAG.getIntPtrConstant(0));
+
+  // Or the load with the bias.
+  SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
+                           DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                                   MVT::v2f64, Load)),
+                           DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                                   MVT::v2f64, Bias)));
+  Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                   DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
+                   DAG.getIntPtrConstant(0));
+
+  // Subtract the bias.
+  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
+
+  // Handle final rounding.
+  EVT DestVT = Op.getValueType();
+
+  if (DestVT.bitsLT(MVT::f64)) {
+    return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+                       DAG.getIntPtrConstant(0));
+  } else if (DestVT.bitsGT(MVT::f64)) {
+    return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+  }
+
+  // Handle final rounding.
+  return Sub;
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue N0 = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
+  // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+  // the optimization here.
+  if (DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
+
+  EVT SrcVT = N0.getValueType();
+  EVT DstVT = Op.getValueType();
+  if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
+    return LowerUINT_TO_FP_i64(Op, DAG);
+  else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
+    return LowerUINT_TO_FP_i32(Op, DAG);
+
+  // Make a 64-bit buffer, and use it to build an FILD.
+  SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+  if (SrcVT == MVT::i32) {
+    SDValue WordOff = DAG.getConstant(4, getPointerTy());
+    SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+                                     getPointerTy(), StackSlot, WordOff);
+    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                                  StackSlot, MachinePointerInfo(),
+                                  false, false, 0);
+    SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+                                  OffsetSlot, MachinePointerInfo(),
+                                  false, false, 0);
+    SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+    return Fild;
+  }
+
+  assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                                StackSlot, MachinePointerInfo(),
+                               false, false, 0);
+  // For i64 source, we need to add the appropriate power of 2 if the input
+  // was negative.  This is the same as the optimization in
+  // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+  // we must be careful to do the computation in x87 extended precision, not
+  // in SSE. (The generic code can't know it's OK to do this, or how to.)
+  int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction()
+    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                          MachineMemOperand::MOLoad, 8, 8);
+
+  SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+  SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+                                         MVT::i64, MMO);
+
+  APInt FF(32, 0x5F800000ULL);
+
+  // Check whether the sign bit is set.
+  SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+                                 Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+                                 ISD::SETLT);
+
+  // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+  SDValue FudgePtr = DAG.getConstantPool(
+                             ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+                                         getPointerTy());
+
+  // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+  SDValue Zero = DAG.getIntPtrConstant(0);
+  SDValue Four = DAG.getIntPtrConstant(4);
+  SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+                               Zero, Four);
+  FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+  // Load the value out, extending it from f32 to f80.
+  // FIXME: Avoid the extend by constructing the right constant pool?
+  SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+                                 FudgePtr, MachinePointerInfo::getConstantPool(),
+                                 MVT::f32, false, false, 4);
+  // Extend everything to 80 bits to force it to be done on x87.
+  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
+}
+
+std::pair<SDValue,SDValue> X86TargetLowering::
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
+  DebugLoc DL = Op.getDebugLoc();
+
+  EVT DstTy = Op.getValueType();
+
+  if (!IsSigned) {
+    assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
+    DstTy = MVT::i64;
+  }
+
+  assert(DstTy.getSimpleVT() <= MVT::i64 &&
+         DstTy.getSimpleVT() >= MVT::i16 &&
+         "Unknown FP_TO_SINT to lower!");
+
+  // These are really Legal.
+  if (DstTy == MVT::i32 &&
+      isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+    return std::make_pair(SDValue(), SDValue());
+  if (Subtarget->is64Bit() &&
+      DstTy == MVT::i64 &&
+      isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+    return std::make_pair(SDValue(), SDValue());
+
+  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+  // stack slot.
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned MemSize = DstTy.getSizeInBits()/8;
+  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+
+  unsigned Opc;
+  switch (DstTy.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+  case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+  case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+  case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+  }
+
+  SDValue Chain = DAG.getEntryNode();
+  SDValue Value = Op.getOperand(0);
+  EVT TheVT = Op.getOperand(0).getValueType();
+  if (isScalarFPTypeInSSEReg(TheVT)) {
+    assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+    Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
+                         false, false, 0);
+    SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+    SDValue Ops[] = {
+      Chain, StackSlot, DAG.getValueType(TheVT)
+    };
+
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                              MachineMemOperand::MOLoad, MemSize, MemSize);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+                                    DstTy, MMO);
+    Chain = Value.getValue(1);
+    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+  }
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, MemSize, MemSize);
+
+  // Build the FP_TO_INT*_IN_MEM
+  SDValue Ops[] = { Chain, Value, StackSlot };
+  SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+                                         Ops, 3, DstTy, MMO);
+
+  return std::make_pair(FIST, StackSlot);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  if (Op.getValueType().isVector())
+    return SDValue();
+
+  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+  SDValue FIST = Vals.first, StackSlot = Vals.second;
+  // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+  if (FIST.getNode() == 0) return Op;
+
+  // Load the result.
+  return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+  SDValue FIST = Vals.first, StackSlot = Vals.second;
+  assert(FIST.getNode() && "Unexpected failure");
+
+  // Load the result.
+  return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op,
+                                     SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT;
+  if (VT.isVector())
+    EltVT = VT.getVectorElementType();
+  std::vector<Constant*> CV;
+  if (EltVT == MVT::f64) {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+    CV.push_back(C);
+    CV.push_back(C);
+  } else {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+  }
+  Constant *C = ConstantVector::get(CV);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, 16);
+  return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT;
+  if (VT.isVector())
+    EltVT = VT.getVectorElementType();
+  std::vector<Constant*> CV;
+  if (EltVT == MVT::f64) {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+    CV.push_back(C);
+    CV.push_back(C);
+  } else {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+  }
+  Constant *C = ConstantVector::get(CV);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, 16);
+  if (VT.isVector()) {
+    return DAG.getNode(ISD::BITCAST, dl, VT,
+                       DAG.getNode(ISD::XOR, dl, MVT::v2i64,
+                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                                Op.getOperand(0)),
+                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+  } else {
+    return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+  }
+}
+
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT SrcVT = Op1.getValueType();
+
+  // If second operand is smaller, extend it first.
+  if (SrcVT.bitsLT(VT)) {
+    Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
+    SrcVT = VT;
+  }
+  // And if it is bigger, shrink it first.
+  if (SrcVT.bitsGT(VT)) {
+    Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+    SrcVT = VT;
+  }
+
+  // At this point the operands and the result should have the same
+  // type, and that won't be f80 since that is not custom lowered.
+
+  // First get the sign bit of second operand.
+  std::vector<Constant*> CV;
+  if (SrcVT == MVT::f64) {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+  } else {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+  }
+  Constant *C = ConstantVector::get(CV);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
+
+  // Shift sign bit right or left if the two operands have different types.
+  if (SrcVT.bitsGT(VT)) {
+    // Op0 is MVT::f32, Op1 is MVT::f64.
+    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
+    SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
+                          DAG.getConstant(32, MVT::i32));
+    SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
+    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
+                          DAG.getIntPtrConstant(0));
+  }
+
+  // Clear first operand sign bit.
+  CV.clear();
+  if (VT == MVT::f64) {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+  } else {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+  }
+  C = ConstantVector::get(CV);
+  CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
+
+  // Or the value with the sign bit.
+  return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
+}
+
+SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const {
+  SDValue N0 = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
+  SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
+                                  DAG.getConstant(1, VT));
+  return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+                                    SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+
+  // CF and OF aren't always set the way we want. Determine which
+  // of these we need.
+  bool NeedCF = false;
+  bool NeedOF = false;
+  switch (X86CC) {
+  default: break;
+  case X86::COND_A: case X86::COND_AE:
+  case X86::COND_B: case X86::COND_BE:
+    NeedCF = true;
+    break;
+  case X86::COND_G: case X86::COND_GE:
+  case X86::COND_L: case X86::COND_LE:
+  case X86::COND_O: case X86::COND_NO:
+    NeedOF = true;
+    break;
+  }
+
+  // See if we can use the EFLAGS value from the operand instead of
+  // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+  // we prove that the arithmetic won't overflow, we can't use OF or CF.
+  if (Op.getResNo() != 0 || NeedOF || NeedCF)
+    // Emit a CMP with 0, which is the TEST pattern.
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                       DAG.getConstant(0, Op.getValueType()));
+
+  unsigned Opcode = 0;
+  unsigned NumOperands = 0;
+  switch (Op.getNode()->getOpcode()) {
+  case ISD::ADD:
+    // Due to an isel shortcoming, be conservative if this add is likely to be
+    // selected as part of a load-modify-store instruction. When the root node
+    // in a match is a store, isel doesn't know how to remap non-chain non-flag
+    // uses of other nodes in the match, such as the ADD in this case. This
+    // leads to the ADD being left around and reselected, with the result being
+    // two adds in the output.  Alas, even if none our users are stores, that
+    // doesn't prove we're O.K.  Ergo, if we have any parents that aren't
+    // CopyToReg or SETCC, eschew INC/DEC.  A better fix seems to require
+    // climbing the DAG back to the root, and it doesn't seem to be worth the
+    // effort.
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI)
+      if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+        goto default_case;
+
+    if (ConstantSDNode *C =
+        dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+      // An add of one will be selected as an INC.
+      if (C->getAPIntValue() == 1) {
+        Opcode = X86ISD::INC;
+        NumOperands = 1;
+        break;
+      }
+
+      // An add of negative one (subtract of one) will be selected as a DEC.
+      if (C->getAPIntValue().isAllOnesValue()) {
+        Opcode = X86ISD::DEC;
+        NumOperands = 1;
+        break;
+      }
+    }
+
+    // Otherwise use a regular EFLAGS-setting add.
+    Opcode = X86ISD::ADD;
+    NumOperands = 2;
+    break;
+  case ISD::AND: {
+    // If the primary and result isn't used, don't bother using X86ISD::AND,
+    // because a TEST instruction will be better.
+    bool NonFlagUse = false;
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+      SDNode *User = *UI;
+      unsigned UOpNo = UI.getOperandNo();
+      if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+        // Look pass truncate.
+        UOpNo = User->use_begin().getOperandNo();
+        User = *User->use_begin();
+      }
+
+      if (User->getOpcode() != ISD::BRCOND &&
+          User->getOpcode() != ISD::SETCC &&
+          (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+        NonFlagUse = true;
+        break;
+      }
+    }
+
+    if (!NonFlagUse)
+      break;
+  }
+    // FALL THROUGH
+  case ISD::SUB:
+  case ISD::OR:
+  case ISD::XOR:
+    // Due to the ISEL shortcoming noted above, be conservative if this op is
+    // likely to be selected as part of a load-modify-store instruction.
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI)
+      if (UI->getOpcode() == ISD::STORE)
+        goto default_case;
+
+    // Otherwise use a regular EFLAGS-setting instruction.
+    switch (Op.getNode()->getOpcode()) {
+    default: llvm_unreachable("unexpected operator!");
+    case ISD::SUB: Opcode = X86ISD::SUB; break;
+    case ISD::OR:  Opcode = X86ISD::OR;  break;
+    case ISD::XOR: Opcode = X86ISD::XOR; break;
+    case ISD::AND: Opcode = X86ISD::AND; break;
+    }
+
+    NumOperands = 2;
+    break;
+  case X86ISD::ADD:
+  case X86ISD::SUB:
+  case X86ISD::INC:
+  case X86ISD::DEC:
+  case X86ISD::OR:
+  case X86ISD::XOR:
+  case X86ISD::AND:
+    return SDValue(Op.getNode(), 1);
+  default:
+  default_case:
+    break;
+  }
+
+  if (Opcode == 0)
+    // Emit a CMP with 0, which is the TEST pattern.
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                       DAG.getConstant(0, Op.getValueType()));
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned i = 0; i != NumOperands; ++i)
+    Ops.push_back(Op.getOperand(i));
+
+  SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+  DAG.ReplaceAllUsesWith(Op, New);
+  return SDValue(New.getNode(), 1);
+}
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                                   SelectionDAG &DAG) const {
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+    if (C->getAPIntValue() == 0)
+      return EmitTest(Op0, X86CC, DAG);
+
+  DebugLoc dl = Op0.getDebugLoc();
+  return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+}
+
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
+                                     DebugLoc dl, SelectionDAG &DAG) const {
+  SDValue Op0 = And.getOperand(0);
+  SDValue Op1 = And.getOperand(1);
+  if (Op0.getOpcode() == ISD::TRUNCATE)
+    Op0 = Op0.getOperand(0);
+  if (Op1.getOpcode() == ISD::TRUNCATE)
+    Op1 = Op1.getOperand(0);
+
+  SDValue LHS, RHS;
+  if (Op1.getOpcode() == ISD::SHL)
+    std::swap(Op0, Op1);
+  if (Op0.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+      if (And00C->getZExtValue() == 1) {
+        // If we looked past a truncate, check that it's only truncating away
+        // known zeros.
+        unsigned BitWidth = Op0.getValueSizeInBits();
+        unsigned AndBitWidth = And.getValueSizeInBits();
+        if (BitWidth > AndBitWidth) {
+          APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+          DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+          if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+            return SDValue();
+        }
+        LHS = Op1;
+        RHS = Op0.getOperand(1);
+      }
+  } else if (Op1.getOpcode() == ISD::Constant) {
+    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+    SDValue AndLHS = Op0;
+    if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+      LHS = AndLHS.getOperand(0);
+      RHS = AndLHS.getOperand(1);
+    }
+  }
+
+  if (LHS.getNode()) {
+    // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
+    // instruction.  Since the shift amount is in-range-or-undefined, we know
+    // that doing a bittest on the i32 value is ok.  We extend to i32 because
+    // the encoding for the i16 version is larger than the i32 version.
+    // Also promote i16 to i32 for performance / code size reason.
+    if (LHS.getValueType() == MVT::i8 ||
+        LHS.getValueType() == MVT::i16)
+      LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+    // If the operand types disagree, extend the shift amount to match.  Since
+    // BT ignores high bits (like shifts) we can use anyextend.
+    if (LHS.getValueType() != RHS.getValueType())
+      RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+    SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+    unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                       DAG.getConstant(Cond, MVT::i8), BT);
+  }
+
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  // Optimize to BT if possible.
+  // Lower (X & (1 << N)) == 0 to BT(X, N).
+  // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+  // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+  if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+      Op1.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op1)->isNullValue() &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+    if (NewSetCC.getNode())
+      return NewSetCC;
+  }
+
+  // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
+  // these.
+  if (Op1.getOpcode() == ISD::Constant &&
+      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+       cast<ConstantSDNode>(Op1)->isNullValue()) &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+    // If the input is a setcc, then reuse the input setcc or use a new one with
+    // the inverted condition.
+    if (Op0.getOpcode() == X86ISD::SETCC) {
+      X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+      bool Invert = (CC == ISD::SETNE) ^
+        cast<ConstantSDNode>(Op1)->isNullValue();
+      if (!Invert) return Op0;
+
+      CCode = X86::GetOppositeBranchCondition(CCode);
+      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                         DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+    }
+  }
+
+  bool isFP = Op1.getValueType().isFloatingPoint();
+  unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+  if (X86CC == X86::COND_INVALID)
+    return SDValue();
+
+  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                     DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond;
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue CC = Op.getOperand(2);
+  EVT VT = Op.getValueType();
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (isFP) {
+    unsigned SSECC = 8;
+    EVT VT0 = Op0.getValueType();
+    assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
+    unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+    bool Swap = false;
+
+    switch (SetCCOpcode) {
+    default: break;
+    case ISD::SETOEQ:
+    case ISD::SETEQ:  SSECC = 0; break;
+    case ISD::SETOGT:
+    case ISD::SETGT: Swap = true; // Fallthrough
+    case ISD::SETLT:
+    case ISD::SETOLT: SSECC = 1; break;
+    case ISD::SETOGE:
+    case ISD::SETGE: Swap = true; // Fallthrough
+    case ISD::SETLE:
+    case ISD::SETOLE: SSECC = 2; break;
+    case ISD::SETUO:  SSECC = 3; break;
+    case ISD::SETUNE:
+    case ISD::SETNE:  SSECC = 4; break;
+    case ISD::SETULE: Swap = true;
+    case ISD::SETUGE: SSECC = 5; break;
+    case ISD::SETULT: Swap = true;
+    case ISD::SETUGT: SSECC = 6; break;
+    case ISD::SETO:   SSECC = 7; break;
+    }
+    if (Swap)
+      std::swap(Op0, Op1);
+
+    // In the two special cases we can't handle, emit two comparisons.
+    if (SSECC == 8) {
+      if (SetCCOpcode == ISD::SETUEQ) {
+        SDValue UNORD, EQ;
+        UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+        EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+        return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
+      }
+      else if (SetCCOpcode == ISD::SETONE) {
+        SDValue ORD, NEQ;
+        ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
+        NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+        return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+      }
+      llvm_unreachable("Illegal FP comparison");
+    }
+    // Handle all other FP comparisons here.
+    return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+  }
+
+  // We are handling one of the integer comparisons here.  Since SSE only has
+  // GT and EQ comparisons for integer, swapping operands and multiple
+  // operations may be required for some comparisons.
+  unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+  bool Swap = false, Invert = false, FlipSigns = false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: break;
+  case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+  case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+  case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
+  case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
+  }
+
+  switch (SetCCOpcode) {
+  default: break;
+  case ISD::SETNE:  Invert = true;
+  case ISD::SETEQ:  Opc = EQOpc; break;
+  case ISD::SETLT:  Swap = true;
+  case ISD::SETGT:  Opc = GTOpc; break;
+  case ISD::SETGE:  Swap = true;
+  case ISD::SETLE:  Opc = GTOpc; Invert = true; break;
+  case ISD::SETULT: Swap = true;
+  case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+  case ISD::SETUGE: Swap = true;
+  case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+  }
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // Since SSE has no unsigned integer comparisons, we need to flip  the sign
+  // bits of the inputs before performing those operations.
+  if (FlipSigns) {
+    EVT EltVT = VT.getVectorElementType();
+    SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+                                      EltVT);
+    std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+    SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+                                    SignBits.size());
+    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+  }
+
+  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+
+  // If the logical-not of the result is required, perform that now.
+  if (Invert)
+    Result = DAG.getNOT(dl, Result, VT);
+
+  return Result;
+}
+
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(SDValue Op) {
+  unsigned Opc = Op.getNode()->getOpcode();
+  if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+    return true;
+  if (Op.getResNo() == 1 &&
+      (Opc == X86ISD::ADD ||
+       Opc == X86ISD::SUB ||
+       Opc == X86ISD::ADC ||
+       Opc == X86ISD::SBB ||
+       Opc == X86ISD::SMUL ||
+       Opc == X86ISD::UMUL ||
+       Opc == X86ISD::INC ||
+       Opc == X86ISD::DEC ||
+       Opc == X86ISD::OR ||
+       Opc == X86ISD::XOR ||
+       Opc == X86ISD::AND))
+    return true;
+
+  if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+    return true;
+
+  return false;
+}
+
+static bool isZero(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isAllOnesValue();
+}
+
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  bool addTest = true;
+  SDValue Cond  = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue CC;
+
+  if (Cond.getOpcode() == ISD::SETCC) {
+    SDValue NewCond = LowerSETCC(Cond, DAG);
+    if (NewCond.getNode())
+      Cond = NewCond;
+  }
+
+  // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+  // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+  // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+  // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
+  if (Cond.getOpcode() == X86ISD::SETCC &&
+      Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+      isZero(Cond.getOperand(1).getOperand(1))) {
+    SDValue Cmp = Cond.getOperand(1);
+
+    unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+    if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+        (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+      SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+      SDValue CmpOp0 = Cmp.getOperand(0);
+      Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+                        CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+      SDValue Res =   // Res = 0 or -1.
+        DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+                    DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+      if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+        Res = DAG.getNOT(DL, Res, Res.getValueType());
+
+      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
+      if (N2C == 0 || !N2C->isNullValue())
+        Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+      return Res;
+    }
+  }
+
+  // Look past (and (setcc_carry (cmp ...)), 1).
+  if (Cond.getOpcode() == ISD::AND &&
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (C && C->getAPIntValue() == 1)
+      Cond = Cond.getOperand(0);
+  }
+
+  // If condition flag is set by a X86ISD::CMP, then use it as the condition
+  // setting operand in place of the X86ISD::SETCC.
+  if (Cond.getOpcode() == X86ISD::SETCC ||
+      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+    CC = Cond.getOperand(0);
+
+    SDValue Cmp = Cond.getOperand(1);
+    unsigned Opc = Cmp.getOpcode();
+    EVT VT = Op.getValueType();
+
+    bool IllegalFPCMov = false;
+    if (VT.isFloatingPoint() && !VT.isVector() &&
+        !isScalarFPTypeInSSEReg(VT))  // FPStack?
+      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
+
+    if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+        Opc == X86ISD::BT) { // FIXME
+      Cond = Cmp;
+      addTest = false;
+    }
+  }
+
+  if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
+    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
+  }
+
+  // a <  b ? -1 :  0 -> RES = ~setcc_carry
+  // a <  b ?  0 : -1 -> RES = setcc_carry
+  // a >= b ? -1 :  0 -> RES = setcc_carry
+  // a >= b ?  0 : -1 -> RES = ~setcc_carry
+  if (Cond.getOpcode() == X86ISD::CMP) {
+    unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+    if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+        (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+      SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+                                DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+      if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+        return DAG.getNOT(DL, Res, Res.getValueType());
+      return Res;
+    }
+  }
+
+  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+  // condition is true.
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SDValue Ops[] = { Op2, Op1, CC, Cond };
+  return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
+}
+
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+  Opc = Op.getOpcode();
+  if (Opc != ISD::OR && Opc != ISD::AND)
+    return false;
+  return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+          Op.getOperand(0).hasOneUse() &&
+          Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+          Op.getOperand(1).hasOneUse());
+}
+
+// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
+// 1 and that the SETCC node has a single use.
+static bool isXor1OfSetCC(SDValue Op) {
+  if (Op.getOpcode() != ISD::XOR)
+    return false;
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (N1C && N1C->getAPIntValue() == 1) {
+    return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+      Op.getOperand(0).hasOneUse();
+  }
+  return false;
+}
+
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+  bool addTest = true;
+  SDValue Chain = Op.getOperand(0);
+  SDValue Cond  = Op.getOperand(1);
+  SDValue Dest  = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue CC;
+
+  if (Cond.getOpcode() == ISD::SETCC) {
+    SDValue NewCond = LowerSETCC(Cond, DAG);
+    if (NewCond.getNode())
+      Cond = NewCond;
+  }
+#if 0
+  // FIXME: LowerXALUO doesn't handle these!!
+  else if (Cond.getOpcode() == X86ISD::ADD  ||
+           Cond.getOpcode() == X86ISD::SUB  ||
+           Cond.getOpcode() == X86ISD::SMUL ||
+           Cond.getOpcode() == X86ISD::UMUL)
+    Cond = LowerXALUO(Cond, DAG);
+#endif
+
+  // Look pass (and (setcc_carry (cmp ...)), 1).
+  if (Cond.getOpcode() == ISD::AND &&
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (C && C->getAPIntValue() == 1)
+      Cond = Cond.getOperand(0);
+  }
+
+  // If condition flag is set by a X86ISD::CMP, then use it as the condition
+  // setting operand in place of the X86ISD::SETCC.
+  if (Cond.getOpcode() == X86ISD::SETCC ||
+      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+    CC = Cond.getOperand(0);
+
+    SDValue Cmp = Cond.getOperand(1);
+    unsigned Opc = Cmp.getOpcode();
+    // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
+    if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
+      Cond = Cmp;
+      addTest = false;
+    } else {
+      switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+      default: break;
+      case X86::COND_O:
+      case X86::COND_B:
+        // These can only come from an arithmetic instruction with overflow,
+        // e.g. SADDO, UADDO.
+        Cond = Cond.getNode()->getOperand(1);
+        addTest = false;
+        break;
+      }
+    }
+  } else {
+    unsigned CondOpc;
+    if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+      SDValue Cmp = Cond.getOperand(0).getOperand(1);
+      if (CondOpc == ISD::OR) {
+        // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+        // two branches instead of an explicit OR instruction with a
+        // separate test.
+        if (Cmp == Cond.getOperand(1).getOperand(1) &&
+            isX86LogicalCmp(Cmp)) {
+          CC = Cond.getOperand(0).getOperand(0);
+          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                              Chain, Dest, CC, Cmp);
+          CC = Cond.getOperand(1).getOperand(0);
+          Cond = Cmp;
+          addTest = false;
+        }
+      } else { // ISD::AND
+        // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+        // two branches instead of an explicit AND instruction with a
+        // separate test. However, we only do this if this block doesn't
+        // have a fall-through edge, because this requires an explicit
+        // jmp when the condition is false.
+        if (Cmp == Cond.getOperand(1).getOperand(1) &&
+            isX86LogicalCmp(Cmp) &&
+            Op.getNode()->hasOneUse()) {
+          X86::CondCode CCode =
+            (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+          CCode = X86::GetOppositeBranchCondition(CCode);
+          CC = DAG.getConstant(CCode, MVT::i8);
+          SDNode *User = *Op.getNode()->use_begin();
+          // Look for an unconditional branch following this conditional branch.
+          // We need this because we need to reverse the successors in order
+          // to implement FCMP_OEQ.
+          if (User->getOpcode() == ISD::BR) {
+            SDValue FalseBB = User->getOperand(1);
+            SDNode *NewBR =
+              DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+            assert(NewBR == User);
+            (void)NewBR;
+            Dest = FalseBB;
+
+            Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                                Chain, Dest, CC, Cmp);
+            X86::CondCode CCode =
+              (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+            CCode = X86::GetOppositeBranchCondition(CCode);
+            CC = DAG.getConstant(CCode, MVT::i8);
+            Cond = Cmp;
+            addTest = false;
+          }
+        }
+      }
+    } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
+      // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
+      // It should be transformed during dag combiner except when the condition
+      // is set by a arithmetics with overflow node.
+      X86::CondCode CCode =
+        (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+      CCode = X86::GetOppositeBranchCondition(CCode);
+      CC = DAG.getConstant(CCode, MVT::i8);
+      Cond = Cond.getOperand(0).getOperand(1);
+      addTest = false;
+    }
+  }
+
+  if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
+    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
+  }
+  return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                     Chain, Dest, CC, Cond);
+}
+
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDValue
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+         "This should be used only on Windows targets");
+  assert(!Subtarget->isTargetEnvMacho());
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get the inputs.
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  // FIXME: Ensure alignment here
+
+  SDValue Flag;
+
+  EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+  unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+
+  Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
+  Flag = Chain.getValue(1);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
+  Flag = Chain.getValue(1);
+
+  Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+
+  SDValue Ops1[2] = { Chain.getValue(0), Chain };
+  return DAG.getMergeValues(Ops1, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  DebugLoc DL = Op.getDebugLoc();
+
+  if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+    // vastart just stores the address of the VarArgsFrameIndex slot into the
+    // memory location argument.
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                   getPointerTy());
+    return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV), false, false, 0);
+  }
+
+  // __va_list_tag:
+  //   gp_offset         (0 - 6 * 8)
+  //   fp_offset         (48 - 48 + 8 * 16)
+  //   overflow_arg_area (point to parameters coming in memory).
+  //   reg_save_area
+  SmallVector<SDValue, 8> MemOps;
+  SDValue FIN = Op.getOperand(1);
+  // Store gp_offset
+  SDValue Store = DAG.getStore(Op.getOperand(0), DL,
+                               DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
+                                               MVT::i32),
+                               FIN, MachinePointerInfo(SV), false, false, 0);
+  MemOps.push_back(Store);
+
+  // Store fp_offset
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                    FIN, DAG.getIntPtrConstant(4));
+  Store = DAG.getStore(Op.getOperand(0), DL,
+                       DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+                                       MVT::i32),
+                       FIN, MachinePointerInfo(SV, 4), false, false, 0);
+  MemOps.push_back(Store);
+
+  // Store ptr to overflow_arg_area
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                    FIN, DAG.getIntPtrConstant(4));
+  SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                    getPointerTy());
+  Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+                       MachinePointerInfo(SV, 8),
+                       false, false, 0);
+  MemOps.push_back(Store);
+
+  // Store ptr to reg_save_area.
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                    FIN, DAG.getIntPtrConstant(8));
+  SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                    getPointerTy());
+  Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+                       MachinePointerInfo(SV, 16), false, false, 0);
+  MemOps.push_back(Store);
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                     &MemOps[0], MemOps.size());
+}
+
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+  assert(Subtarget->is64Bit() &&
+         "LowerVAARG only handles 64-bit va_arg!");
+  assert((Subtarget->isTargetLinux() ||
+          Subtarget->isTargetDarwin()) &&
+          "Unhandled target in LowerVAARG");
+  assert(Op.getNode()->getNumOperands() == 4);
+  SDValue Chain = Op.getOperand(0);
+  SDValue SrcPtr = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  unsigned Align = Op.getConstantOperandVal(3);
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT ArgVT = Op.getNode()->getValueType(0);
+  const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+  uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+  uint8_t ArgMode;
+
+  // Decide which area this value should be read from.
+  // TODO: Implement the AMD64 ABI in its entirety. This simple
+  // selection mechanism works only for the basic types.
+  if (ArgVT == MVT::f80) {
+    llvm_unreachable("va_arg for f80 not yet implemented");
+  } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+    ArgMode = 2;  // Argument passed in XMM register. Use fp_offset.
+  } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+    ArgMode = 1;  // Argument passed in GPR64 register(s). Use gp_offset.
+  } else {
+    llvm_unreachable("Unhandled argument type in LowerVAARG");
+  }
+
+  if (ArgMode == 2) {
+    // Sanity Check: Make sure using fp_offset makes sense.
+    assert(!UseSoftFloat &&
+           !(DAG.getMachineFunction()
+                .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+           Subtarget->hasXMM());
+  }
+
+  // Insert VAARG_64 node into the DAG
+  // VAARG_64 returns two values: Variable Argument Address, Chain
+  SmallVector<SDValue, 11> InstOps;
+  InstOps.push_back(Chain);
+  InstOps.push_back(SrcPtr);
+  InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+  InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+  InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+  SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+  SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+                                          VTs, &InstOps[0], InstOps.size(),
+                                          MVT::i64,
+                                          MachinePointerInfo(SV),
+                                          /*Align=*/0,
+                                          /*Volatile=*/false,
+                                          /*ReadMem=*/true,
+                                          /*WriteMem=*/true);
+  Chain = VAARG.getValue(1);
+
+  // Load the next argument and return it
+  return DAG.getLoad(ArgVT, dl,
+                     Chain,
+                     VAARG,
+                     MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+  assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+  SDValue Chain = Op.getOperand(0);
+  SDValue DstPtr = Op.getOperand(1);
+  SDValue SrcPtr = Op.getOperand(2);
+  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  DebugLoc DL = Op.getDebugLoc();
+
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
+                       DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+                       false,
+                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (IntNo) {
+  default: return SDValue();    // Don't custom lower most intrinsics.
+  // Comparison intrinsics.
+  case Intrinsic::x86_sse_comieq_ss:
+  case Intrinsic::x86_sse_comilt_ss:
+  case Intrinsic::x86_sse_comile_ss:
+  case Intrinsic::x86_sse_comigt_ss:
+  case Intrinsic::x86_sse_comige_ss:
+  case Intrinsic::x86_sse_comineq_ss:
+  case Intrinsic::x86_sse_ucomieq_ss:
+  case Intrinsic::x86_sse_ucomilt_ss:
+  case Intrinsic::x86_sse_ucomile_ss:
+  case Intrinsic::x86_sse_ucomigt_ss:
+  case Intrinsic::x86_sse_ucomige_ss:
+  case Intrinsic::x86_sse_ucomineq_ss:
+  case Intrinsic::x86_sse2_comieq_sd:
+  case Intrinsic::x86_sse2_comilt_sd:
+  case Intrinsic::x86_sse2_comile_sd:
+  case Intrinsic::x86_sse2_comigt_sd:
+  case Intrinsic::x86_sse2_comige_sd:
+  case Intrinsic::x86_sse2_comineq_sd:
+  case Intrinsic::x86_sse2_ucomieq_sd:
+  case Intrinsic::x86_sse2_ucomilt_sd:
+  case Intrinsic::x86_sse2_ucomile_sd:
+  case Intrinsic::x86_sse2_ucomigt_sd:
+  case Intrinsic::x86_sse2_ucomige_sd:
+  case Intrinsic::x86_sse2_ucomineq_sd: {
+    unsigned Opc = 0;
+    ISD::CondCode CC = ISD::SETCC_INVALID;
+    switch (IntNo) {
+    default: break;
+    case Intrinsic::x86_sse_comieq_ss:
+    case Intrinsic::x86_sse2_comieq_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETEQ;
+      break;
+    case Intrinsic::x86_sse_comilt_ss:
+    case Intrinsic::x86_sse2_comilt_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETLT;
+      break;
+    case Intrinsic::x86_sse_comile_ss:
+    case Intrinsic::x86_sse2_comile_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETLE;
+      break;
+    case Intrinsic::x86_sse_comigt_ss:
+    case Intrinsic::x86_sse2_comigt_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETGT;
+      break;
+    case Intrinsic::x86_sse_comige_ss:
+    case Intrinsic::x86_sse2_comige_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETGE;
+      break;
+    case Intrinsic::x86_sse_comineq_ss:
+    case Intrinsic::x86_sse2_comineq_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETNE;
+      break;
+    case Intrinsic::x86_sse_ucomieq_ss:
+    case Intrinsic::x86_sse2_ucomieq_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETEQ;
+      break;
+    case Intrinsic::x86_sse_ucomilt_ss:
+    case Intrinsic::x86_sse2_ucomilt_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETLT;
+      break;
+    case Intrinsic::x86_sse_ucomile_ss:
+    case Intrinsic::x86_sse2_ucomile_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETLE;
+      break;
+    case Intrinsic::x86_sse_ucomigt_ss:
+    case Intrinsic::x86_sse2_ucomigt_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETGT;
+      break;
+    case Intrinsic::x86_sse_ucomige_ss:
+    case Intrinsic::x86_sse2_ucomige_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETGE;
+      break;
+    case Intrinsic::x86_sse_ucomineq_ss:
+    case Intrinsic::x86_sse2_ucomineq_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETNE;
+      break;
+    }
+
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+    unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+    assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
+    SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                DAG.getConstant(X86CC, MVT::i8), Cond);
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+  }
+  // ptest and testp intrinsics. The intrinsic these come from are designed to
+  // return an integer value, not just an instruction so lower it to the ptest
+  // or testp pattern and a setcc for the result.
+  case Intrinsic::x86_sse41_ptestz:
+  case Intrinsic::x86_sse41_ptestc:
+  case Intrinsic::x86_sse41_ptestnzc:
+  case Intrinsic::x86_avx_ptestz_256:
+  case Intrinsic::x86_avx_ptestc_256:
+  case Intrinsic::x86_avx_ptestnzc_256:
+  case Intrinsic::x86_avx_vtestz_ps:
+  case Intrinsic::x86_avx_vtestc_ps:
+  case Intrinsic::x86_avx_vtestnzc_ps:
+  case Intrinsic::x86_avx_vtestz_pd:
+  case Intrinsic::x86_avx_vtestc_pd:
+  case Intrinsic::x86_avx_vtestnzc_pd:
+  case Intrinsic::x86_avx_vtestz_ps_256:
+  case Intrinsic::x86_avx_vtestc_ps_256:
+  case Intrinsic::x86_avx_vtestnzc_ps_256:
+  case Intrinsic::x86_avx_vtestz_pd_256:
+  case Intrinsic::x86_avx_vtestc_pd_256:
+  case Intrinsic::x86_avx_vtestnzc_pd_256: {
+    bool IsTestPacked = false;
+    unsigned X86CC = 0;
+    switch (IntNo) {
+    default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    case Intrinsic::x86_avx_vtestz_ps:
+    case Intrinsic::x86_avx_vtestz_pd:
+    case Intrinsic::x86_avx_vtestz_ps_256:
+    case Intrinsic::x86_avx_vtestz_pd_256:
+      IsTestPacked = true; // Fallthrough
+    case Intrinsic::x86_sse41_ptestz:
+    case Intrinsic::x86_avx_ptestz_256:
+      // ZF = 1
+      X86CC = X86::COND_E;
+      break;
+    case Intrinsic::x86_avx_vtestc_ps:
+    case Intrinsic::x86_avx_vtestc_pd:
+    case Intrinsic::x86_avx_vtestc_ps_256:
+    case Intrinsic::x86_avx_vtestc_pd_256:
+      IsTestPacked = true; // Fallthrough
+    case Intrinsic::x86_sse41_ptestc:
+    case Intrinsic::x86_avx_ptestc_256:
+      // CF = 1
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_avx_vtestnzc_ps:
+    case Intrinsic::x86_avx_vtestnzc_pd:
+    case Intrinsic::x86_avx_vtestnzc_ps_256:
+    case Intrinsic::x86_avx_vtestnzc_pd_256:
+      IsTestPacked = true; // Fallthrough
+    case Intrinsic::x86_sse41_ptestnzc:
+    case Intrinsic::x86_avx_ptestnzc_256:
+      // ZF and CF = 0
+      X86CC = X86::COND_A;
+      break;
+    }
+
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+    unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+    SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
+    SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+  }
+
+  // Fix vector shift instructions where the last operand is a non-immediate
+  // i32 value.
+  case Intrinsic::x86_sse2_pslli_w:
+  case Intrinsic::x86_sse2_pslli_d:
+  case Intrinsic::x86_sse2_pslli_q:
+  case Intrinsic::x86_sse2_psrli_w:
+  case Intrinsic::x86_sse2_psrli_d:
+  case Intrinsic::x86_sse2_psrli_q:
+  case Intrinsic::x86_sse2_psrai_w:
+  case Intrinsic::x86_sse2_psrai_d:
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = Op.getOperand(2);
+    if (isa<ConstantSDNode>(ShAmt))
+      return SDValue();
+
+    unsigned NewIntNo = 0;
+    EVT ShAmtVT = MVT::v4i32;
+    switch (IntNo) {
+    case Intrinsic::x86_sse2_pslli_w:
+      NewIntNo = Intrinsic::x86_sse2_psll_w;
+      break;
+    case Intrinsic::x86_sse2_pslli_d:
+      NewIntNo = Intrinsic::x86_sse2_psll_d;
+      break;
+    case Intrinsic::x86_sse2_pslli_q:
+      NewIntNo = Intrinsic::x86_sse2_psll_q;
+      break;
+    case Intrinsic::x86_sse2_psrli_w:
+      NewIntNo = Intrinsic::x86_sse2_psrl_w;
+      break;
+    case Intrinsic::x86_sse2_psrli_d:
+      NewIntNo = Intrinsic::x86_sse2_psrl_d;
+      break;
+    case Intrinsic::x86_sse2_psrli_q:
+      NewIntNo = Intrinsic::x86_sse2_psrl_q;
+      break;
+    case Intrinsic::x86_sse2_psrai_w:
+      NewIntNo = Intrinsic::x86_sse2_psra_w;
+      break;
+    case Intrinsic::x86_sse2_psrai_d:
+      NewIntNo = Intrinsic::x86_sse2_psra_d;
+      break;
+    default: {
+      ShAmtVT = MVT::v2i32;
+      switch (IntNo) {
+      case Intrinsic::x86_mmx_pslli_w:
+        NewIntNo = Intrinsic::x86_mmx_psll_w;
+        break;
+      case Intrinsic::x86_mmx_pslli_d:
+        NewIntNo = Intrinsic::x86_mmx_psll_d;
+        break;
+      case Intrinsic::x86_mmx_pslli_q:
+        NewIntNo = Intrinsic::x86_mmx_psll_q;
+        break;
+      case Intrinsic::x86_mmx_psrli_w:
+        NewIntNo = Intrinsic::x86_mmx_psrl_w;
+        break;
+      case Intrinsic::x86_mmx_psrli_d:
+        NewIntNo = Intrinsic::x86_mmx_psrl_d;
+        break;
+      case Intrinsic::x86_mmx_psrli_q:
+        NewIntNo = Intrinsic::x86_mmx_psrl_q;
+        break;
+      case Intrinsic::x86_mmx_psrai_w:
+        NewIntNo = Intrinsic::x86_mmx_psra_w;
+        break;
+      case Intrinsic::x86_mmx_psrai_d:
+        NewIntNo = Intrinsic::x86_mmx_psra_d;
+        break;
+      default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+      }
+      break;
+    }
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    SDValue ShOps[4];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    if (ShAmtVT == MVT::v4i32) {
+      ShOps[2] = DAG.getUNDEF(MVT::i32);
+      ShOps[3] = DAG.getUNDEF(MVT::i32);
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
+    } else {
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
+    }
+
+    EVT VT = Op.getValueType();
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(NewIntNo, MVT::i32),
+                       Op.getOperand(1), ShAmt);
+  }
+  }
+}
+
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+      DAG.getConstant(TD->getPointerSize(),
+                      Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Just load the return address.
+  SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  return FrameAddr;
+}
+
+SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  return DAG.getIntPtrConstant(2*TD->getPointerSize());
+}
+
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  DebugLoc dl       = Op.getDebugLoc();
+
+  SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                     Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+                                     getPointerTy());
+  unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
+
+  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+                                  DAG.getIntPtrConstant(TD->getPointerSize()));
+  StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+                       false, false, 0);
+  Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
+  MF.getRegInfo().addLiveOut(StoreAddrReg);
+
+  return DAG.getNode(X86ISD::EH_RETURN, dl,
+                     MVT::Other,
+                     Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+}
+
+SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  SDValue Root = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+  DebugLoc dl  = Op.getDebugLoc();
+
+  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+  if (Subtarget->is64Bit()) {
+    SDValue OutChains[6];
+
+    // Large code-model.
+    const unsigned char JMP64r  = 0xFF; // 64-bit jmp through register opcode.
+    const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
+
+    const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
+    const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
+
+    const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+    // Load the pointer to the nested function into R11.
+    unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+    SDValue Addr = Trmp;
+    OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                                Addr, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(2, MVT::i64));
+    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+                                MachinePointerInfo(TrmpAddr, 2),
+                                false, false, 2);
+
+    // Load the 'nest' parameter value into R10.
+    // R10 is specified in X86CallingConv.td
+    OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(10, MVT::i64));
+    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                                Addr, MachinePointerInfo(TrmpAddr, 10),
+                                false, false, 0);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(12, MVT::i64));
+    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 12),
+                                false, false, 2);
+
+    // Jump to the nested function.
+    OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(20, MVT::i64));
+    OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                                Addr, MachinePointerInfo(TrmpAddr, 20),
+                                false, false, 0);
+
+    unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(22, MVT::i64));
+    OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
+                                MachinePointerInfo(TrmpAddr, 22),
+                                false, false, 0);
+
+    SDValue Ops[] =
+      { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
+    return DAG.getMergeValues(Ops, 2, dl);
+  } else {
+    const Function *Func =
+      cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+    CallingConv::ID CC = Func->getCallingConv();
+    unsigned NestReg;
+
+    switch (CC) {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+    case CallingConv::C:
+    case CallingConv::X86_StdCall: {
+      // Pass 'nest' parameter in ECX.
+      // Must be kept in sync with X86CallingConv.td
+      NestReg = X86::ECX;
+
+      // Check that ECX wasn't needed by an 'inreg' parameter.
+      const FunctionType *FTy = Func->getFunctionType();
+      const AttrListPtr &Attrs = Func->getAttributes();
+
+      if (!Attrs.isEmpty() && !Func->isVarArg()) {
+        unsigned InRegCount = 0;
+        unsigned Idx = 1;
+
+        for (FunctionType::param_iterator I = FTy->param_begin(),
+             E = FTy->param_end(); I != E; ++I, ++Idx)
+          if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+            // FIXME: should only count parameters that are lowered to integers.
+            InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+
+        if (InRegCount > 2) {
+          report_fatal_error("Nest register in use - reduce number of inreg"
+                             " parameters!");
+        }
+      }
+      break;
+    }
+    case CallingConv::X86_FastCall:
+    case CallingConv::X86_ThisCall:
+    case CallingConv::Fast:
+      // Pass 'nest' parameter in EAX.
+      // Must be kept in sync with X86CallingConv.td
+      NestReg = X86::EAX;
+      break;
+    }
+
+    SDValue OutChains[4];
+    SDValue Addr, Disp;
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(10, MVT::i32));
+    Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
+
+    // This is storing the opcode for MOV32ri.
+    const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
+    const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
+    OutChains[0] = DAG.getStore(Root, dl,
+                                DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+                                Trmp, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(1, MVT::i32));
+    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 1),
+                                false, false, 1);
+
+    const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(5, MVT::i32));
+    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
+                                MachinePointerInfo(TrmpAddr, 5),
+                                false, false, 1);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(6, MVT::i32));
+    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+                                MachinePointerInfo(TrmpAddr, 6),
+                                false, false, 1);
+
+    SDValue Ops[] =
+      { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+}
+
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  /*
+   The rounding mode is in bits 11:10 of FPSR, and has the following
+   settings:
+     00 Round to nearest
+     01 Round to -inf
+     10 Round to +inf
+     11 Round to 0
+
+  FLT_ROUNDS, on the other hand, expects the following:
+    -1 Undefined
+     0 Round to 0
+     1 Round to nearest
+     2 Round to +inf
+     3 Round to -inf
+
+  To perform the conversion, we do:
+    (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+  */
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
+  unsigned StackAlignment = TFI.getStackAlignment();
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Save FP Control Word to stack slot
+  int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+  MachineMemOperand *MMO =
+   MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                           MachineMemOperand::MOStore, 2, 2);
+
+  SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+  SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+                                          DAG.getVTList(MVT::Other),
+                                          Ops, 2, MVT::i16, MMO);
+
+  // Load FP Control Word from stack slot
+  SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+                            MachinePointerInfo(), false, false, 0);
+
+  // Transform as necessary
+  SDValue CWD1 =
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
+                            CWD, DAG.getConstant(0x800, MVT::i16)),
+                DAG.getConstant(11, MVT::i8));
+  SDValue CWD2 =
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
+                            CWD, DAG.getConstant(0x400, MVT::i16)),
+                DAG.getConstant(9, MVT::i8));
+
+  SDValue RetVal =
+    DAG.getNode(ISD::AND, DL, MVT::i16,
+                DAG.getNode(ISD::ADD, DL, MVT::i16,
+                            DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
+                            DAG.getConstant(1, MVT::i16)),
+                DAG.getConstant(3, MVT::i16));
+
+
+  return DAG.getNode((VT.getSizeInBits() < 16 ?
+                      ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
+}
+
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT OpVT = VT;
+  unsigned NumBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+
+  Op = Op.getOperand(0);
+  if (VT == MVT::i8) {
+    // Zero extend to i32 since there is not an i8 bsr.
+    OpVT = MVT::i32;
+    Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+  }
+
+  // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+  Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+  // If src is zero (i.e. bsr sets ZF), returns NumBits.
+  SDValue Ops[] = {
+    Op,
+    DAG.getConstant(NumBits+NumBits-1, OpVT),
+    DAG.getConstant(X86::COND_E, MVT::i8),
+    Op.getValue(1)
+  };
+  Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+  // Finally xor with NumBits-1.
+  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+  if (VT == MVT::i8)
+    Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+  return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT OpVT = VT;
+  unsigned NumBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+
+  Op = Op.getOperand(0);
+  if (VT == MVT::i8) {
+    OpVT = MVT::i32;
+    Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+  }
+
+  // Issue a bsf (scan bits forward) which also sets EFLAGS.
+  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+  Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+
+  // If src is zero (i.e. bsf sets ZF), returns NumBits.
+  SDValue Ops[] = {
+    Op,
+    DAG.getConstant(NumBits, OpVT),
+    DAG.getConstant(X86::COND_E, MVT::i8),
+    Op.getValue(1)
+  };
+  Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+  if (VT == MVT::i8)
+    Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+  return Op;
+}
+
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+  DebugLoc dl = Op.getDebugLoc();
+
+  //  ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+  //  ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+  //  ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+  //  ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+  //  ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+  //
+  //  AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+  //  AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+  //  return AloBlo + AloBhi + AhiBlo;
+
+  SDValue A = Op.getOperand(0);
+  SDValue B = Op.getOperand(1);
+
+  SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+                       A, DAG.getConstant(32, MVT::i32));
+  SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+                       B, DAG.getConstant(32, MVT::i32));
+  SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+                       A, B);
+  SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+                       A, Bhi);
+  SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+                       Ahi, B);
+  AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+                       AloBhi, DAG.getConstant(32, MVT::i32));
+  AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+                       AhiBlo, DAG.getConstant(32, MVT::i32));
+  SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
+  Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+  return Res;
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue R = Op.getOperand(0);
+  SDValue Amt = Op.getOperand(1);
+
+  LLVMContext *Context = DAG.getContext();
+
+  // Must have SSE2.
+  if (!Subtarget->hasSSE2()) return SDValue();
+
+  // Optimize shl/srl/sra with constant shift amount.
+  if (isSplatVector(Amt.getNode())) {
+    SDValue SclrAmt = Amt->getOperand(0);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+      uint64_t ShiftAmt = C->getZExtValue();
+
+      if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+      if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+      if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+      if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+      if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+      if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+      if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+      if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+                     R, DAG.getConstant(ShiftAmt, MVT::i32));
+    }
+  }
+
+  // Lower SHL with variable shift amount.
+  // Cannot lower SHL without SSE2 or later.
+  if (!Subtarget->hasSSE2()) return SDValue();
+
+  if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
+    Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+                     Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+    ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
+
+    std::vector<Constant*> CV(4, CI);
+    Constant *C = ConstantVector::get(CV);
+    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+    SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                                 MachinePointerInfo::getConstantPool(),
+                                 false, false, 16);
+
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+    Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+    Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+    return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+  }
+  if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
+    // a = a << 5;
+    Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+                     Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+
+    ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
+    ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+
+    std::vector<Constant*> CVM1(16, CM1);
+    std::vector<Constant*> CVM2(16, CM2);
+    Constant *C = ConstantVector::get(CVM1);
+    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+    SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                            MachinePointerInfo::getConstantPool(),
+                            false, false, 16);
+
+    // r = pblendv(r, psllw(r & (char16)15, 4), a);
+    M = DAG.getNode(ISD::AND, dl, VT, R, M);
+    M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                    DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+                    DAG.getConstant(4, MVT::i32));
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+    // a += a
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+    C = ConstantVector::get(CVM2);
+    CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+    M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                    MachinePointerInfo::getConstantPool(),
+                    false, false, 16);
+
+    // r = pblendv(r, psllw(r & (char16)63, 2), a);
+    M = DAG.getNode(ISD::AND, dl, VT, R, M);
+    M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                    DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+                    DAG.getConstant(2, MVT::i32));
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+    // a += a
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+    // return pblendv(r, r+r, a);
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
+                    R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+    return R;
+  }
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+  // looks for this combo and may remove the "setcc" instruction if the "setcc"
+  // has only one use.
+  SDNode *N = Op.getNode();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  unsigned BaseOp = 0;
+  unsigned Cond = 0;
+  DebugLoc DL = Op.getDebugLoc();
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown ovf instruction!");
+  case ISD::SADDO:
+    // A subtract of one will be selected as a INC. Note that INC doesn't
+    // set CF, so we can't do this for UADDO.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+      if (C->isOne()) {
+        BaseOp = X86ISD::INC;
+        Cond = X86::COND_O;
+        break;
+      }
+    BaseOp = X86ISD::ADD;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UADDO:
+    BaseOp = X86ISD::ADD;
+    Cond = X86::COND_B;
+    break;
+  case ISD::SSUBO:
+    // A subtract of one will be selected as a DEC. Note that DEC doesn't
+    // set CF, so we can't do this for USUBO.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+      if (C->isOne()) {
+        BaseOp = X86ISD::DEC;
+        Cond = X86::COND_O;
+        break;
+      }
+    BaseOp = X86ISD::SUB;
+    Cond = X86::COND_O;
+    break;
+  case ISD::USUBO:
+    BaseOp = X86ISD::SUB;
+    Cond = X86::COND_B;
+    break;
+  case ISD::SMULO:
+    BaseOp = X86ISD::SMUL;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+    SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+                                 MVT::i32);
+    SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+    SDValue SetCC =
+      DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                  DAG.getConstant(X86::COND_O, MVT::i32),
+                  SDValue(Sum.getNode(), 2));
+
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+    return Sum;
+  }
+  }
+
+  // Also sets EFLAGS.
+  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+  SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+
+  SDValue SetCC =
+    DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+                DAG.getConstant(Cond, MVT::i32),
+                SDValue(Sum.getNode(), 1));
+
+  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+  return Sum;
+}
+
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+  DebugLoc dl = Op.getDebugLoc();
+  SDNode* Node = Op.getNode();
+  EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+  EVT VT = Node->getValueType(0);
+
+  if (Subtarget->hasSSE2() && VT.isVector()) {
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
+    SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+    unsigned SHLIntrinsicsID = 0;
+    unsigned SRAIntrinsicsID = 0;
+    switch (VT.getSimpleVT().SimpleTy) {
+      default:
+        return SDValue();
+      case MVT::v2i64: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
+        SRAIntrinsicsID = 0;
+        break;
+      }
+      case MVT::v4i32: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
+        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
+        break;
+      }
+      case MVT::v8i16: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
+        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
+        break;
+      }
+    }
+
+    SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                         DAG.getConstant(SHLIntrinsicsID, MVT::i32),
+                         Node->getOperand(0), ShAmt);
+
+    // In case of 1 bit sext, no need to shr
+    if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+
+    if (SRAIntrinsicsID) {
+      Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                         DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+                         Tmp1, ShAmt);
+    }
+    return Tmp1;
+  }
+
+  return SDValue();
+}
+
+
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+  // There isn't any reason to disable it if the target processor supports it.
+  if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
+    SDValue Chain = Op.getOperand(0);
+    SDValue Zero = DAG.getConstant(0, MVT::i32);
+    SDValue Ops[] = {
+      DAG.getRegister(X86::ESP, MVT::i32), // Base
+      DAG.getTargetConstant(1, MVT::i8),   // Scale
+      DAG.getRegister(0, MVT::i32),        // Index
+      DAG.getTargetConstant(0, MVT::i32),  // Disp
+      DAG.getRegister(0, MVT::i32),        // Segment.
+      Zero,
+      Chain
+    };
+    SDNode *Res =
+      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+                          array_lengthof(Ops));
+    return SDValue(Res, 0);
+  }
+
+  unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+  if (!isDev)
+    return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+  unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+  unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+  // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+  if (!Op1 && !Op2 && !Op3 && Op4)
+    return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+  // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+  if (Op1 && !Op2 && !Op3 && !Op4)
+    return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+  // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+  //           (MFENCE)>;
+  return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
+  EVT T = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  unsigned Reg = 0;
+  unsigned size = 0;
+  switch(T.getSimpleVT().SimpleTy) {
+  default:
+    assert(false && "Invalid value type!");
+  case MVT::i8:  Reg = X86::AL;  size = 1; break;
+  case MVT::i16: Reg = X86::AX;  size = 2; break;
+  case MVT::i32: Reg = X86::EAX; size = 4; break;
+  case MVT::i64:
+    assert(Subtarget->is64Bit() && "Node not type legal!");
+    Reg = X86::RAX; size = 8;
+    break;
+  }
+  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
+                                    Op.getOperand(2), SDValue());
+  SDValue Ops[] = { cpIn.getValue(0),
+                    Op.getOperand(1),
+                    Op.getOperand(3),
+                    DAG.getTargetConstant(size, MVT::i8),
+                    cpIn.getValue(1) };
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+  SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+                                           Ops, 5, T, MMO);
+  SDValue cpOut =
+    DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+  return cpOut;
+}
+
+SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  assert(Subtarget->is64Bit() && "Result not type legalized?");
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue TheChain = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+  SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
+  SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
+                                   rax.getValue(2));
+  SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
+                            DAG.getConstant(32, MVT::i8));
+  SDValue Ops[] = {
+    DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
+    rdx.getValue(1)
+  };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  EVT SrcVT = Op.getOperand(0).getValueType();
+  EVT DstVT = Op.getValueType();
+  assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+         Subtarget->hasMMX() && "Unexpected custom BITCAST");
+  assert((DstVT == MVT::i64 ||
+          (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+         "Unexpected custom BITCAST");
+  // i64 <=> MMX conversions are Legal.
+  if (SrcVT==MVT::i64 && DstVT.isVector())
+    return Op;
+  if (DstVT==MVT::i64 && SrcVT.isVector())
+    return Op;
+  // MMX <=> MMX conversions are Legal.
+  if (SrcVT.isVector() && DstVT.isVector())
+    return Op;
+  // All other conversions need to be expanded.
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+  EVT T = Node->getValueType(0);
+  SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
+                              DAG.getConstant(0, T), Node->getOperand(2));
+  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
+                       cast<AtomicSDNode>(Node)->getMemoryVT(),
+                       Node->getOperand(0),
+                       Node->getOperand(1), negOp,
+                       cast<AtomicSDNode>(Node)->getSrcValue(),
+                       cast<AtomicSDNode>(Node)->getAlignment());
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getNode()->getValueType(0);
+
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+  unsigned Opc;
+  bool ExtraOp = false;
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Invalid code");
+  case ISD::ADDC: Opc = X86ISD::ADD; break;
+  case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+  case ISD::SUBC: Opc = X86ISD::SUB; break;
+  case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+  }
+
+  if (!ExtraOp)
+    return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+                       Op.getOperand(1));
+  return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+                     Op.getOperand(1), Op.getOperand(2));
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Should not custom lower this!");
+  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op,DAG);
+  case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op,DAG);
+  case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
+  case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
+  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::EXTRACT_SUBVECTOR:  return LowerEXTRACT_SUBVECTOR(Op, DAG);
+  case ISD::INSERT_SUBVECTOR:   return LowerINSERT_SUBVECTOR(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+  case ISD::SHL_PARTS:
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS:          return LowerShiftParts(Op, DAG);
+  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
+  case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
+  case ISD::FABS:               return LowerFABS(Op, DAG);
+  case ISD::FNEG:               return LowerFNEG(Op, DAG);
+  case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
+  case ISD::FGETSIGN:           return LowerFGETSIGN(Op, DAG);
+  case ISD::SETCC:              return LowerSETCC(Op, DAG);
+  case ISD::VSETCC:             return LowerVSETCC(Op, DAG);
+  case ISD::SELECT:             return LowerSELECT(Op, DAG);
+  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
+  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+  case ISD::VASTART:            return LowerVASTART(Op, DAG);
+  case ISD::VAARG:              return LowerVAARG(Op, DAG);
+  case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  case ISD::FRAME_TO_ARGS_OFFSET:
+                                return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
+  case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
+  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
+  case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
+  case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
+  case ISD::MUL:                return LowerMUL_V2I64(Op, DAG);
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::SHL:                return LowerShift(Op, DAG);
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:              return LowerXALUO(Op, DAG);
+  case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, DAG);
+  case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
+  case ISD::ADDC:
+  case ISD::ADDE:
+  case ISD::SUBC:
+  case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+  }
+}
+
+void X86TargetLowering::
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+                        SelectionDAG &DAG, unsigned NewOp) const {
+  EVT T = Node->getValueType(0);
+  DebugLoc dl = Node->getDebugLoc();
+  assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+
+  SDValue Chain = Node->getOperand(0);
+  SDValue In1 = Node->getOperand(1);
+  SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             Node->getOperand(2), DAG.getIntPtrConstant(0));
+  SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             Node->getOperand(2), DAG.getIntPtrConstant(1));
+  SDValue Ops[] = { Chain, In1, In2L, In2H };
+  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+  SDValue Result =
+    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+                            cast<MemSDNode>(Node)->getMemOperand());
+  SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+  Results.push_back(Result.getValue(2));
+}
+
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const {
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default:
+    assert(false && "Do not know how to custom type legalize this operation!");
+    return;
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::ADDC:
+  case ISD::ADDE:
+  case ISD::SUBC:
+  case ISD::SUBE:
+    // We don't want to expand or promote these.
+    return;
+  case ISD::FP_TO_SINT: {
+    std::pair<SDValue,SDValue> Vals =
+        FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+    SDValue FIST = Vals.first, StackSlot = Vals.second;
+    if (FIST.getNode() != 0) {
+      EVT VT = N->getValueType(0);
+      // Return a load from the stack slot.
+      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+                                    MachinePointerInfo(), false, false, 0));
+    }
+    return;
+  }
+  case ISD::READCYCLECOUNTER: {
+    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue TheChain = N->getOperand(0);
+    SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+    SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
+                                     rd.getValue(1));
+    SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
+                                     eax.getValue(2));
+    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+    SDValue Ops[] = { eax, edx };
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+    Results.push_back(edx.getValue(1));
+    return;
+  }
+  case ISD::ATOMIC_CMP_SWAP: {
+    EVT T = N->getValueType(0);
+    assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+    SDValue cpInL, cpInH;
+    cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+                        DAG.getConstant(0, MVT::i32));
+    cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+                        DAG.getConstant(1, MVT::i32));
+    cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue());
+    cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH,
+                             cpInL.getValue(1));
+    SDValue swapInL, swapInH;
+    swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+                          DAG.getConstant(0, MVT::i32));
+    swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+                          DAG.getConstant(1, MVT::i32));
+    swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL,
+                               cpInH.getValue(1));
+    swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH,
+                               swapInL.getValue(1));
+    SDValue Ops[] = { swapInH.getValue(0),
+                      N->getOperand(1),
+                      swapInH.getValue(1) };
+    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+    MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+    SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+                                             Ops, 3, T, MMO);
+    SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
+                                        MVT::i32, Result.getValue(1));
+    SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
+                                        MVT::i32, cpOutL.getValue(2));
+    SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+    Results.push_back(cpOutH.getValue(1));
+    return;
+  }
+  case ISD::ATOMIC_LOAD_ADD:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_AND:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_NAND:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_OR:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_SUB:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_XOR:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
+    return;
+  case ISD::ATOMIC_SWAP:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+    return;
+  }
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return NULL;
+  case X86ISD::BSF:                return "X86ISD::BSF";
+  case X86ISD::BSR:                return "X86ISD::BSR";
+  case X86ISD::SHLD:               return "X86ISD::SHLD";
+  case X86ISD::SHRD:               return "X86ISD::SHRD";
+  case X86ISD::FAND:               return "X86ISD::FAND";
+  case X86ISD::FOR:                return "X86ISD::FOR";
+  case X86ISD::FXOR:               return "X86ISD::FXOR";
+  case X86ISD::FSRL:               return "X86ISD::FSRL";
+  case X86ISD::FILD:               return "X86ISD::FILD";
+  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
+  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+  case X86ISD::FLD:                return "X86ISD::FLD";
+  case X86ISD::FST:                return "X86ISD::FST";
+  case X86ISD::CALL:               return "X86ISD::CALL";
+  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
+  case X86ISD::BT:                 return "X86ISD::BT";
+  case X86ISD::CMP:                return "X86ISD::CMP";
+  case X86ISD::COMI:               return "X86ISD::COMI";
+  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
+  case X86ISD::SETCC:              return "X86ISD::SETCC";
+  case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
+  case X86ISD::FSETCCsd:           return "X86ISD::FSETCCsd";
+  case X86ISD::FSETCCss:           return "X86ISD::FSETCCss";
+  case X86ISD::CMOV:               return "X86ISD::CMOV";
+  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
+  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
+  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
+  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
+  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
+  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
+  case X86ISD::WrapperRIP:         return "X86ISD::WrapperRIP";
+  case X86ISD::PEXTRB:             return "X86ISD::PEXTRB";
+  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
+  case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
+  case X86ISD::PINSRB:             return "X86ISD::PINSRB";
+  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
+  case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
+  case X86ISD::ANDNP:              return "X86ISD::ANDNP";
+  case X86ISD::PSIGNB:             return "X86ISD::PSIGNB";
+  case X86ISD::PSIGNW:             return "X86ISD::PSIGNW";
+  case X86ISD::PSIGND:             return "X86ISD::PSIGND";
+  case X86ISD::PBLENDVB:           return "X86ISD::PBLENDVB";
+  case X86ISD::FMAX:               return "X86ISD::FMAX";
+  case X86ISD::FMIN:               return "X86ISD::FMIN";
+  case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
+  case X86ISD::FRCP:               return "X86ISD::FRCP";
+  case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
+  case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
+  case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
+  case X86ISD::TC_RETURN:          return "X86ISD::TC_RETURN";
+  case X86ISD::FNSTCW16m:          return "X86ISD::FNSTCW16m";
+  case X86ISD::LCMPXCHG_DAG:       return "X86ISD::LCMPXCHG_DAG";
+  case X86ISD::LCMPXCHG8_DAG:      return "X86ISD::LCMPXCHG8_DAG";
+  case X86ISD::ATOMADD64_DAG:      return "X86ISD::ATOMADD64_DAG";
+  case X86ISD::ATOMSUB64_DAG:      return "X86ISD::ATOMSUB64_DAG";
+  case X86ISD::ATOMOR64_DAG:       return "X86ISD::ATOMOR64_DAG";
+  case X86ISD::ATOMXOR64_DAG:      return "X86ISD::ATOMXOR64_DAG";
+  case X86ISD::ATOMAND64_DAG:      return "X86ISD::ATOMAND64_DAG";
+  case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
+  case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
+  case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VSHL:               return "X86ISD::VSHL";
+  case X86ISD::VSRL:               return "X86ISD::VSRL";
+  case X86ISD::CMPPD:              return "X86ISD::CMPPD";
+  case X86ISD::CMPPS:              return "X86ISD::CMPPS";
+  case X86ISD::PCMPEQB:            return "X86ISD::PCMPEQB";
+  case X86ISD::PCMPEQW:            return "X86ISD::PCMPEQW";
+  case X86ISD::PCMPEQD:            return "X86ISD::PCMPEQD";
+  case X86ISD::PCMPEQQ:            return "X86ISD::PCMPEQQ";
+  case X86ISD::PCMPGTB:            return "X86ISD::PCMPGTB";
+  case X86ISD::PCMPGTW:            return "X86ISD::PCMPGTW";
+  case X86ISD::PCMPGTD:            return "X86ISD::PCMPGTD";
+  case X86ISD::PCMPGTQ:            return "X86ISD::PCMPGTQ";
+  case X86ISD::ADD:                return "X86ISD::ADD";
+  case X86ISD::SUB:                return "X86ISD::SUB";
+  case X86ISD::ADC:                return "X86ISD::ADC";
+  case X86ISD::SBB:                return "X86ISD::SBB";
+  case X86ISD::SMUL:               return "X86ISD::SMUL";
+  case X86ISD::UMUL:               return "X86ISD::UMUL";
+  case X86ISD::INC:                return "X86ISD::INC";
+  case X86ISD::DEC:                return "X86ISD::DEC";
+  case X86ISD::OR:                 return "X86ISD::OR";
+  case X86ISD::XOR:                return "X86ISD::XOR";
+  case X86ISD::AND:                return "X86ISD::AND";
+  case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
+  case X86ISD::PTEST:              return "X86ISD::PTEST";
+  case X86ISD::TESTP:              return "X86ISD::TESTP";
+  case X86ISD::PALIGN:             return "X86ISD::PALIGN";
+  case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
+  case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
+  case X86ISD::PSHUFHW_LD:         return "X86ISD::PSHUFHW_LD";
+  case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";
+  case X86ISD::PSHUFLW_LD:         return "X86ISD::PSHUFLW_LD";
+  case X86ISD::SHUFPS:             return "X86ISD::SHUFPS";
+  case X86ISD::SHUFPD:             return "X86ISD::SHUFPD";
+  case X86ISD::MOVLHPS:            return "X86ISD::MOVLHPS";
+  case X86ISD::MOVLHPD:            return "X86ISD::MOVLHPD";
+  case X86ISD::MOVHLPS:            return "X86ISD::MOVHLPS";
+  case X86ISD::MOVHLPD:            return "X86ISD::MOVHLPD";
+  case X86ISD::MOVLPS:             return "X86ISD::MOVLPS";
+  case X86ISD::MOVLPD:             return "X86ISD::MOVLPD";
+  case X86ISD::MOVDDUP:            return "X86ISD::MOVDDUP";
+  case X86ISD::MOVSHDUP:           return "X86ISD::MOVSHDUP";
+  case X86ISD::MOVSLDUP:           return "X86ISD::MOVSLDUP";
+  case X86ISD::MOVSHDUP_LD:        return "X86ISD::MOVSHDUP_LD";
+  case X86ISD::MOVSLDUP_LD:        return "X86ISD::MOVSLDUP_LD";
+  case X86ISD::MOVSD:              return "X86ISD::MOVSD";
+  case X86ISD::MOVSS:              return "X86ISD::MOVSS";
+  case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS";
+  case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD";
+  case X86ISD::VUNPCKLPS:          return "X86ISD::VUNPCKLPS";
+  case X86ISD::VUNPCKLPD:          return "X86ISD::VUNPCKLPD";
+  case X86ISD::VUNPCKLPSY:         return "X86ISD::VUNPCKLPSY";
+  case X86ISD::VUNPCKLPDY:         return "X86ISD::VUNPCKLPDY";
+  case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS";
+  case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD";
+  case X86ISD::PUNPCKLBW:          return "X86ISD::PUNPCKLBW";
+  case X86ISD::PUNPCKLWD:          return "X86ISD::PUNPCKLWD";
+  case X86ISD::PUNPCKLDQ:          return "X86ISD::PUNPCKLDQ";
+  case X86ISD::PUNPCKLQDQ:         return "X86ISD::PUNPCKLQDQ";
+  case X86ISD::PUNPCKHBW:          return "X86ISD::PUNPCKHBW";
+  case X86ISD::PUNPCKHWD:          return "X86ISD::PUNPCKHWD";
+  case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ";
+  case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ";
+  case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
+  case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
+  case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
+  }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  // X86 supports extremely general addressing modes.
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  Reloc::Model R = getTargetMachine().getRelocationModel();
+
+  // X86 allows a sign-extended 32-bit immediate field as a displacement.
+  if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
+    return false;
+
+  if (AM.BaseGV) {
+    unsigned GVFlags =
+      Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+
+    // If a reference to this global requires an extra load, we can't fold it.
+    if (isGlobalStubReference(GVFlags))
+      return false;
+
+    // If BaseGV requires a register for the PIC base, we cannot also have a
+    // BaseReg specified.
+    if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
+      return false;
+
+    // If lower 4G is not available, then we must use rip-relative addressing.
+    if ((M != CodeModel::Small || R != Reloc::Static) &&
+        Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+      return false;
+  }
+
+  switch (AM.Scale) {
+  case 0:
+  case 1:
+  case 2:
+  case 4:
+  case 8:
+    // These scales always work.
+    break;
+  case 3:
+  case 5:
+  case 9:
+    // These scales are formed with basereg+scalereg.  Only accept if there is
+    // no basereg yet.
+    if (AM.HasBaseReg)
+      return false;
+    break;
+  default:  // Other stuff never works.
+    return false;
+  }
+
+  return true;
+}
+
+
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+    return false;
+  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+  if (NumBits1 <= NumBits2)
+    return false;
+  return true;
+}
+
+bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isInteger() || !VT2.isInteger())
+    return false;
+  unsigned NumBits1 = VT1.getSizeInBits();
+  unsigned NumBits2 = VT2.getSizeInBits();
+  if (NumBits1 <= NumBits2)
+    return false;
+  return true;
+}
+
+bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+  // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+  return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+  // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+  return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
+  // i16 instructions are longer (0x66 prefix) and potentially slower.
+  return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+                                      EVT VT) const {
+  // Very little shuffling can be done for 64-bit vectors right now.
+  if (VT.getSizeInBits() == 64)
+    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
+
+  // FIXME: pshufb, blends, shifts.
+  return (VT.getVectorNumElements() == 2 ||
+          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+          isMOVLMask(M, VT) ||
+          isSHUFPMask(M, VT) ||
+          isPSHUFDMask(M, VT) ||
+          isPSHUFHWMask(M, VT) ||
+          isPSHUFLWMask(M, VT) ||
+          isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
+          isUNPCKLMask(M, VT) ||
+          isUNPCKHMask(M, VT) ||
+          isUNPCKL_v_undef_Mask(M, VT) ||
+          isUNPCKH_v_undef_Mask(M, VT));
+}
+
+bool
+X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+                                          EVT VT) const {
+  unsigned NumElts = VT.getVectorNumElements();
+  // FIXME: This collection of masks seems suspect.
+  if (NumElts == 2)
+    return true;
+  if (NumElts == 4 && VT.getSizeInBits() == 128) {
+    return (isMOVLMask(Mask, VT)  ||
+            isCommutedMOVLMask(Mask, VT, true) ||
+            isSHUFPMask(Mask, VT) ||
+            isCommutedSHUFPMask(Mask, VT));
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                           X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+                                                       MachineBasicBlock *MBB,
+                                                       unsigned regOpc,
+                                                       unsigned immOpc,
+                                                       unsigned LoadOpc,
+                                                       unsigned CXchgOpc,
+                                                       unsigned notOpc,
+                                                       unsigned EAXreg,
+                                                       TargetRegisterClass *RC,
+                                                       bool invSrc) const {
+  // For the atomic bitwise operator, we generate
+  //   thisMBB:
+  //   newMBB:
+  //     ld  t1 = [bitinstr.addr]
+  //     op  t2 = t1, [bitinstr.val]
+  //     mov EAX = t1
+  //     lcs dest = [bitinstr.addr], t2  [EAX is implicit]
+  //     bz  newMBB
+  //     fallthrough -->nextMBB
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+
+  /// First build the CFG
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, newMBB);
+  F->insert(MBBIter, nextMBB);
+
+  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+  nextMBB->splice(nextMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(bInstr)),
+                  thisMBB->end());
+  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  // Update thisMBB to fall through to newMBB
+  thisMBB->addSuccessor(newMBB);
+
+  // newMBB jumps to itself and fall through to nextMBB
+  newMBB->addSuccessor(nextMBB);
+  newMBB->addSuccessor(newMBB);
+
+  // Insert instructions into newMBB based on incoming instruction
+  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+         "unexpected number of operands");
+  DebugLoc dl = bInstr->getDebugLoc();
+  MachineOperand& destOper = bInstr->getOperand(0);
+  MachineOperand* argOpers[2 + X86::AddrNumOperands];
+  int numArgs = bInstr->getNumOperands() - 1;
+  for (int i=0; i < numArgs; ++i)
+    argOpers[i] = &bInstr->getOperand(i+1);
+
+  // x86 address has 4 operands: base, index, scale, and displacement
+  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+  int valArgIndx = lastAddrIndx + 1;
+
+  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+
+  unsigned tt = F->getRegInfo().createVirtualRegister(RC);
+  if (invSrc) {
+    MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
+  }
+  else
+    tt = t1;
+
+  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+  assert((argOpers[valArgIndx]->isReg() ||
+          argOpers[valArgIndx]->isImm()) &&
+         "invalid operand");
+  if (argOpers[valArgIndx]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
+  MIB.addReg(tt);
+  (*MIB).addOperand(*argOpers[valArgIndx]);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
+  MIB.addReg(t1);
+
+  MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  MIB.addReg(t2);
+  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+  (*MIB).setMemRefs(bInstr->memoperands_begin(),
+                    bInstr->memoperands_end());
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+  MIB.addReg(EAXreg);
+
+  // insert branch
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
+  return nextMBB;
+}
+
+// private utility function:  64 bit atomics on 32 bit host.
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+                                                       MachineBasicBlock *MBB,
+                                                       unsigned regOpcL,
+                                                       unsigned regOpcH,
+                                                       unsigned immOpcL,
+                                                       unsigned immOpcH,
+                                                       bool invSrc) const {
+  // For the atomic bitwise operator, we generate
+  //   thisMBB (instructions are in pairs, except cmpxchg8b)
+  //     ld t1,t2 = [bitinstr.addr]
+  //   newMBB:
+  //     out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+  //     op  t5, t6 <- out1, out2, [bitinstr.val]
+  //      (for SWAP, substitute:  mov t5, t6 <- [bitinstr.val])
+  //     mov ECX, EBX <- t5, t6
+  //     mov EAX, EDX <- t1, t2
+  //     cmpxchg8b [bitinstr.addr]  [EAX, EDX, EBX, ECX implicit]
+  //     mov t3, t4 <- EAX, EDX
+  //     bz  newMBB
+  //     result in out1, out2
+  //     fallthrough -->nextMBB
+
+  const TargetRegisterClass *RC = X86::GR32RegisterClass;
+  const unsigned LoadOpc = X86::MOV32rm;
+  const unsigned NotOpc = X86::NOT32r;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+
+  /// First build the CFG
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, newMBB);
+  F->insert(MBBIter, nextMBB);
+
+  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+  nextMBB->splice(nextMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(bInstr)),
+                  thisMBB->end());
+  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  // Update thisMBB to fall through to newMBB
+  thisMBB->addSuccessor(newMBB);
+
+  // newMBB jumps to itself and fall through to nextMBB
+  newMBB->addSuccessor(nextMBB);
+  newMBB->addSuccessor(newMBB);
+
+  DebugLoc dl = bInstr->getDebugLoc();
+  // Insert instructions into newMBB based on incoming instruction
+  // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
+         "unexpected number of operands");
+  MachineOperand& dest1Oper = bInstr->getOperand(0);
+  MachineOperand& dest2Oper = bInstr->getOperand(1);
+  MachineOperand* argOpers[2 + X86::AddrNumOperands];
+  for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
+    argOpers[i] = &bInstr->getOperand(i+2);
+
+    // We use some of the operands multiple times, so conservatively just
+    // clear any kill flags that might be present.
+    if (argOpers[i]->isReg() && argOpers[i]->isUse())
+      argOpers[i]->setIsKill(false);
+  }
+
+  // x86 address has 5 operands: base, index, scale, displacement, and segment.
+  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+
+  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+  MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+  MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
+  // add 4 to displacement.
+  for (int i=0; i <= lastAddrIndx-2; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  MachineOperand newOp3 = *(argOpers[3]);
+  if (newOp3.isImm())
+    newOp3.setImm(newOp3.getImm()+4);
+  else
+    newOp3.setOffset(newOp3.getOffset()+4);
+  (*MIB).addOperand(newOp3);
+  (*MIB).addOperand(*argOpers[lastAddrIndx]);
+
+  // t3/4 are defined later, at the bottom of the loop
+  unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+  unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+  BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
+    .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
+    .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+  // The subsequent operations should be using the destination registers of
+  //the PHI instructions.
+  if (invSrc) {
+    t1 = F->getRegInfo().createVirtualRegister(RC);
+    t2 = F->getRegInfo().createVirtualRegister(RC);
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
+  } else {
+    t1 = dest1Oper.getReg();
+    t2 = dest2Oper.getReg();
+  }
+
+  int valArgIndx = lastAddrIndx + 1;
+  assert((argOpers[valArgIndx]->isReg() ||
+          argOpers[valArgIndx]->isImm()) &&
+         "invalid operand");
+  unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+  unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+  if (argOpers[valArgIndx]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
+  if (regOpcL != X86::MOV32rr)
+    MIB.addReg(t1);
+  (*MIB).addOperand(*argOpers[valArgIndx]);
+  assert(argOpers[valArgIndx + 1]->isReg() ==
+         argOpers[valArgIndx]->isReg());
+  assert(argOpers[valArgIndx + 1]->isImm() ==
+         argOpers[valArgIndx]->isImm());
+  if (argOpers[valArgIndx + 1]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
+  if (regOpcH != X86::MOV32rr)
+    MIB.addReg(t2);
+  (*MIB).addOperand(*argOpers[valArgIndx + 1]);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+  MIB.addReg(t1);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
+  MIB.addReg(t2);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
+  MIB.addReg(t5);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
+  MIB.addReg(t6);
+
+  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+
+  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+  (*MIB).setMemRefs(bInstr->memoperands_begin(),
+                    bInstr->memoperands_end());
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
+  MIB.addReg(X86::EAX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
+  MIB.addReg(X86::EDX);
+
+  // insert branch
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
+  return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+                                                      MachineBasicBlock *MBB,
+                                                      unsigned cmovOpc) const {
+  // For the atomic min/max operator, we generate
+  //   thisMBB:
+  //   newMBB:
+  //     ld t1 = [min/max.addr]
+  //     mov t2 = [min/max.val]
+  //     cmp  t1, t2
+  //     cmov[cond] t2 = t1
+  //     mov EAX = t1
+  //     lcs dest = [bitinstr.addr], t2  [EAX is implicit]
+  //     bz   newMBB
+  //     fallthrough -->nextMBB
+  //
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+
+  /// First build the CFG
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, newMBB);
+  F->insert(MBBIter, nextMBB);
+
+  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+  nextMBB->splice(nextMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(mInstr)),
+                  thisMBB->end());
+  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  // Update thisMBB to fall through to newMBB
+  thisMBB->addSuccessor(newMBB);
+
+  // newMBB jumps to newMBB and fall through to nextMBB
+  newMBB->addSuccessor(nextMBB);
+  newMBB->addSuccessor(newMBB);
+
+  DebugLoc dl = mInstr->getDebugLoc();
+  // Insert instructions into newMBB based on incoming instruction
+  assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+         "unexpected number of operands");
+  MachineOperand& destOper = mInstr->getOperand(0);
+  MachineOperand* argOpers[2 + X86::AddrNumOperands];
+  int numArgs = mInstr->getNumOperands() - 1;
+  for (int i=0; i < numArgs; ++i)
+    argOpers[i] = &mInstr->getOperand(i+1);
+
+  // x86 address has 4 operands: base, index, scale, and displacement
+  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+  int valArgIndx = lastAddrIndx + 1;
+
+  unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+
+  // We only support register and immediate values
+  assert((argOpers[valArgIndx]->isReg() ||
+          argOpers[valArgIndx]->isImm()) &&
+         "invalid operand");
+
+  unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+  if (argOpers[valArgIndx]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+  (*MIB).addOperand(*argOpers[valArgIndx]);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+  MIB.addReg(t1);
+
+  MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
+  MIB.addReg(t1);
+  MIB.addReg(t2);
+
+  // Generate movc
+  unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+  MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
+  MIB.addReg(t2);
+  MIB.addReg(t1);
+
+  // Cmp and exchange if none has modified the memory location
+  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  MIB.addReg(t3);
+  assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+  (*MIB).setMemRefs(mInstr->memoperands_begin(),
+                    mInstr->memoperands_end());
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+  MIB.addReg(X86::EAX);
+
+  // insert branch
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+  mInstr->eraseFromParent();   // The pseudo instruction is gone now.
+  return nextMBB;
+}
+
+// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
+MachineBasicBlock *
+X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
+                            unsigned numArgs, bool memArg) const {
+  assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+         "Target must have SSE4.2 or AVX features enabled");
+
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  unsigned Opc;
+  if (!Subtarget->hasAVX()) {
+    if (memArg)
+      Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+    else
+      Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+  } else {
+    if (memArg)
+      Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
+    else
+      Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
+  for (unsigned i = 0; i < numArgs; ++i) {
+    MachineOperand &Op = MI->getOperand(i+1);
+    if (!(Op.isReg() && Op.isImplicit()))
+      MIB.addOperand(Op);
+  }
+  BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+    .addReg(X86::XMM0);
+
+  MI->eraseFromParent();
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // Address into RAX/EAX, other two args into ECX, EDX.
+  unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+  unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+  for (int i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(i));
+
+  unsigned ValOps = X86::AddrNumOperands;
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+    .addReg(MI->getOperand(ValOps).getReg());
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+    .addReg(MI->getOperand(ValOps+1).getReg());
+
+  // The instruction doesn't actually take any operands though.
+  BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // First arg in ECX, the second in EAX.
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+    .addReg(MI->getOperand(0).getReg());
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+    .addReg(MI->getOperand(1).getReg());
+
+  // The instruction doesn't actually take any operands though.
+  BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+                   MachineInstr *MI,
+                   MachineBasicBlock *MBB) const {
+  // Emit va_arg instruction on X86-64.
+
+  // Operands to this pseudo-instruction:
+  // 0  ) Output        : destination address (reg)
+  // 1-5) Input         : va_list address (addr, i64mem)
+  // 6  ) ArgSize       : Size (in bytes) of vararg type
+  // 7  ) ArgMode       : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+  // 8  ) Align         : Alignment of type
+  // 9  ) EFLAGS (implicit-def)
+
+  assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+  assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+  unsigned DestReg = MI->getOperand(0).getReg();
+  MachineOperand &Base = MI->getOperand(1);
+  MachineOperand &Scale = MI->getOperand(2);
+  MachineOperand &Index = MI->getOperand(3);
+  MachineOperand &Disp = MI->getOperand(4);
+  MachineOperand &Segment = MI->getOperand(5);
+  unsigned ArgSize = MI->getOperand(6).getImm();
+  unsigned ArgMode = MI->getOperand(7).getImm();
+  unsigned Align = MI->getOperand(8).getImm();
+
+  // Memory Reference
+  assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  // Machine Information
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+  const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+  DebugLoc DL = MI->getDebugLoc();
+
+  // struct va_list {
+  //   i32   gp_offset
+  //   i32   fp_offset
+  //   i64   overflow_area (address)
+  //   i64   reg_save_area (address)
+  // }
+  // sizeof(va_list) = 24
+  // alignment(va_list) = 8
+
+  unsigned TotalNumIntRegs = 6;
+  unsigned TotalNumXMMRegs = 8;
+  bool UseGPOffset = (ArgMode == 1);
+  bool UseFPOffset = (ArgMode == 2);
+  unsigned MaxOffset = TotalNumIntRegs * 8 +
+                       (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+  /* Align ArgSize to a multiple of 8 */
+  unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+  bool NeedsAlign = (Align > 8);
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *overflowMBB;
+  MachineBasicBlock *offsetMBB;
+  MachineBasicBlock *endMBB;
+
+  unsigned OffsetDestReg = 0;    // Argument address computed by offsetMBB
+  unsigned OverflowDestReg = 0;  // Argument address computed by overflowMBB
+  unsigned OffsetReg = 0;
+
+  if (!UseGPOffset && !UseFPOffset) {
+    // If we only pull from the overflow region, we don't create a branch.
+    // We don't need to alter control flow.
+    OffsetDestReg = 0; // unused
+    OverflowDestReg = DestReg;
+
+    offsetMBB = NULL;
+    overflowMBB = thisMBB;
+    endMBB = thisMBB;
+  } else {
+    // First emit code to check if gp_offset (or fp_offset) is below the bound.
+    // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+    // If not, pull from overflow_area. (branch to overflowMBB)
+    //
+    //       thisMBB
+    //         |     .
+    //         |        .
+    //     offsetMBB   overflowMBB
+    //         |        .
+    //         |     .
+    //        endMBB
+
+    // Registers for the PHI in endMBB
+    OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+    OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+    MachineFunction *MF = MBB->getParent();
+    overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+    MachineFunction::iterator MBBIter = MBB;
+    ++MBBIter;
+
+    // Insert the new basic blocks
+    MF->insert(MBBIter, offsetMBB);
+    MF->insert(MBBIter, overflowMBB);
+    MF->insert(MBBIter, endMBB);
+
+    // Transfer the remainder of MBB and its successor edges to endMBB.
+    endMBB->splice(endMBB->begin(), thisMBB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    thisMBB->end());
+    endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+    // Make offsetMBB and overflowMBB successors of thisMBB
+    thisMBB->addSuccessor(offsetMBB);
+    thisMBB->addSuccessor(overflowMBB);
+
+    // endMBB is a successor of both offsetMBB and overflowMBB
+    offsetMBB->addSuccessor(endMBB);
+    overflowMBB->addSuccessor(endMBB);
+
+    // Load the offset value into a register
+    OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Check if there is enough room left to pull this argument.
+    BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+      .addReg(OffsetReg)
+      .addImm(MaxOffset + 8 - ArgSizeA8);
+
+    // Branch to "overflowMBB" if offset >= max
+    // Fall through to "offsetMBB" otherwise
+    BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+      .addMBB(overflowMBB);
+  }
+
+  // In offsetMBB, emit code to use the reg_save_area.
+  if (offsetMBB) {
+    assert(OffsetReg != 0);
+
+    // Read the reg_save_area address.
+    unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, 16)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Zero-extend the offset
+    unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+        .addImm(0)
+        .addReg(OffsetReg)
+        .addImm(X86::sub_32bit);
+
+    // Add the offset to the reg_save_area to get the final address.
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+      .addReg(OffsetReg64)
+      .addReg(RegSaveReg);
+
+    // Compute the offset for the next argument
+    unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+      .addReg(OffsetReg)
+      .addImm(UseFPOffset ? 16 : 8);
+
+    // Store it back into the va_list.
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .addReg(NextOffsetReg)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Jump to endMBB
+    BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+      .addMBB(endMBB);
+  }
+
+  //
+  // Emit code to use overflow area
+  //
+
+  // Load the overflow_area address into a register.
+  unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we need to align it, do so. Otherwise, just copy the address
+  // to OverflowDestReg.
+  if (NeedsAlign) {
+    // Align the overflow address
+    assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+    unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+    // aligned_addr = (addr + (align-1)) & ~(align-1)
+    BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+      .addReg(OverflowAddrReg)
+      .addImm(Align-1);
+
+    BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+      .addReg(TmpReg)
+      .addImm(~(uint64_t)(Align-1));
+  } else {
+    BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+      .addReg(OverflowAddrReg);
+  }
+
+  // Compute the next overflow address after this argument.
+  // (the overflow address should be kept 8-byte aligned)
+  unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+    .addReg(OverflowDestReg)
+    .addImm(ArgSizeA8);
+
+  // Store the new overflow address.
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .addReg(NextAddrReg)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we branched, emit the PHI to the front of endMBB.
+  if (offsetMBB) {
+    BuildMI(*endMBB, endMBB->begin(), DL,
+            TII->get(X86::PHI), DestReg)
+      .addReg(OffsetDestReg).addMBB(offsetMBB)
+      .addReg(OverflowDestReg).addMBB(overflowMBB);
+  }
+
+  // Erase the pseudo instruction
+  MI->eraseFromParent();
+
+  return endMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+                                                 MachineInstr *MI,
+                                                 MachineBasicBlock *MBB) const {
+  // Emit code to save XMM registers to the stack. The ABI says that the
+  // number of registers to save is given in %al, so it's theoretically
+  // possible to do an indirect jump trick to avoid saving all of them,
+  // however this code takes a simpler approach and just executes all
+  // of the stores if %al is non-zero. It's less code, and it's probably
+  // easier on the hardware branch predictor, and stores aren't all that
+  // expensive anyway.
+
+  // Create the new basic blocks. One block contains all the XMM stores,
+  // and one block is the final destination regardless of whether any
+  // stores were performed.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction *F = MBB->getParent();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+  MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, XMMSaveMBB);
+  F->insert(MBBIter, EndMBB);
+
+  // Transfer the remainder of MBB and its successor edges to EndMBB.
+  EndMBB->splice(EndMBB->begin(), MBB,
+                 llvm::next(MachineBasicBlock::iterator(MI)),
+                 MBB->end());
+  EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // The original block will now fall through to the XMM save block.
+  MBB->addSuccessor(XMMSaveMBB);
+  // The XMMSaveMBB will fall through to the end block.
+  XMMSaveMBB->addSuccessor(EndMBB);
+
+  // Now add the instructions.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned CountReg = MI->getOperand(0).getReg();
+  int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+  int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+  if (!Subtarget->isTargetWin64()) {
+    // If %al is 0, branch around the XMM save block.
+    BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+    BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
+    MBB->addSuccessor(EndMBB);
+  }
+
+  // In the XMM save block, save all the XMM argument registers.
+  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+    int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+    MachineMemOperand *MMO =
+      F->getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+        MachineMemOperand::MOStore,
+        /*Size=*/16, /*Align=*/16);
+    BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+      .addFrameIndex(RegSaveFrameIndex)
+      .addImm(/*Scale=*/1)
+      .addReg(/*IndexReg=*/0)
+      .addImm(/*Disp=*/Offset)
+      .addReg(/*Segment=*/0)
+      .addReg(MI->getOperand(i).getReg())
+      .addMemOperand(MMO);
+  }
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+
+  return EndMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+                                     MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // If the EFLAGS register isn't dead in the terminator, then claim that it's
+  // live into the sink and copy blocks.
+  const MachineFunction *MF = BB->getParent();
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  BitVector ReservedRegs = TRI->getReservedRegs(*MF);
+
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg != X86::EFLAGS) continue;
+    copy0MBB->addLiveIn(Reg);
+    sinkMBB->addLiveIn(Reg);
+  }
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  // Create the conditional branch instruction.
+  unsigned Opc =
+    X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+  BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  copy0MBB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(X86::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  assert(!Subtarget->isTargetEnvMacho());
+
+  // The lowering is pretty easy: we're just emitting the call to _alloca.  The
+  // non-trivial part is impdef of ESP.
+
+  if (Subtarget->isTargetWin64()) {
+    if (Subtarget->isTargetCygMing()) {
+      // ___chkstk(Mingw64):
+      // Clobbers R10, R11, RAX and EFLAGS.
+      // Updates RSP.
+      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+        .addExternalSymbol("___chkstk")
+        .addReg(X86::RAX, RegState::Implicit)
+        .addReg(X86::RSP, RegState::Implicit)
+        .addReg(X86::RAX, RegState::Define | RegState::Implicit)
+        .addReg(X86::RSP, RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+    } else {
+      // __chkstk(MSVCRT): does not update stack pointer.
+      // Clobbers R10, R11 and EFLAGS.
+      // FIXME: RAX(allocated size) might be reused and not killed.
+      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+        .addExternalSymbol("__chkstk")
+        .addReg(X86::RAX, RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+      // RAX has the offset to subtracted from RSP.
+      BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
+        .addReg(X86::RSP)
+        .addReg(X86::RAX);
+    }
+  } else {
+    const char *StackProbeSymbol =
+      Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
+    BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+      .addExternalSymbol(StackProbeSymbol)
+      .addReg(X86::EAX, RegState::Implicit)
+      .addReg(X86::ESP, RegState::Implicit)
+      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+      .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+  }
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
+                                      MachineBasicBlock *BB) const {
+  // This is pretty easy.  We're taking the value that we received from
+  // our load from the relocation, sticking it in either RDI (x86-64)
+  // or EAX and doing an indirect call.  The return value will then
+  // be in the normal return register.
+  const X86InstrInfo *TII
+    = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction *F = BB->getParent();
+
+  assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
+  assert(MI->getOperand(3).isGlobal() && "This should be a global");
+
+  if (Subtarget->is64Bit()) {
+    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+                                      TII->get(X86::MOV64rm), X86::RDI)
+    .addReg(X86::RIP)
+    .addImm(0).addReg(0)
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+                      MI->getOperand(3).getTargetFlags())
+    .addReg(0);
+    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+    addDirectMem(MIB, X86::RDI);
+  } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+                                      TII->get(X86::MOV32rm), X86::EAX)
+    .addReg(0)
+    .addImm(0).addReg(0)
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+                      MI->getOperand(3).getTargetFlags())
+    .addReg(0);
+    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+    addDirectMem(MIB, X86::EAX);
+  } else {
+    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+                                      TII->get(X86::MOV32rm), X86::EAX)
+    .addReg(TII->getGlobalBaseReg(F))
+    .addImm(0).addReg(0)
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+                      MI->getOperand(3).getTargetFlags())
+    .addReg(0);
+    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+    addDirectMem(MIB, X86::EAX);
+  }
+
+  MI->eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+  case X86::TAILJMPd64:
+  case X86::TAILJMPr64:
+  case X86::TAILJMPm64:
+    assert(!"TAILJMP64 would not be touched here.");
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+    // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+    // On AMD64, additional defs should be added before register allocation.
+    if (!Subtarget->isTargetWin64()) {
+      MI->addRegisterDefined(X86::RSI);
+      MI->addRegisterDefined(X86::RDI);
+      MI->addRegisterDefined(X86::XMM6);
+      MI->addRegisterDefined(X86::XMM7);
+      MI->addRegisterDefined(X86::XMM8);
+      MI->addRegisterDefined(X86::XMM9);
+      MI->addRegisterDefined(X86::XMM10);
+      MI->addRegisterDefined(X86::XMM11);
+      MI->addRegisterDefined(X86::XMM12);
+      MI->addRegisterDefined(X86::XMM13);
+      MI->addRegisterDefined(X86::XMM14);
+      MI->addRegisterDefined(X86::XMM15);
+    }
+    return BB;
+  case X86::WIN_ALLOCA:
+    return EmitLoweredWinAlloca(MI, BB);
+  case X86::TLSCall_32:
+  case X86::TLSCall_64:
+    return EmitLoweredTLSCall(MI, BB);
+  case X86::CMOV_GR8:
+  case X86::CMOV_FR32:
+  case X86::CMOV_FR64:
+  case X86::CMOV_V4F32:
+  case X86::CMOV_V2F64:
+  case X86::CMOV_V2I64:
+  case X86::CMOV_GR16:
+  case X86::CMOV_GR32:
+  case X86::CMOV_RFP32:
+  case X86::CMOV_RFP64:
+  case X86::CMOV_RFP80:
+    return EmitLoweredSelect(MI, BB);
+
+  case X86::FP32_TO_INT16_IN_MEM:
+  case X86::FP32_TO_INT32_IN_MEM:
+  case X86::FP32_TO_INT64_IN_MEM:
+  case X86::FP64_TO_INT16_IN_MEM:
+  case X86::FP64_TO_INT32_IN_MEM:
+  case X86::FP64_TO_INT64_IN_MEM:
+  case X86::FP80_TO_INT16_IN_MEM:
+  case X86::FP80_TO_INT32_IN_MEM:
+  case X86::FP80_TO_INT64_IN_MEM: {
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    DebugLoc DL = MI->getDebugLoc();
+
+    // Change the floating point control register to use "round towards zero"
+    // mode when truncating to an integer value.
+    MachineFunction *F = BB->getParent();
+    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
+    addFrameReference(BuildMI(*BB, MI, DL,
+                              TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+    // Load the old value of the high byte of the control word...
+    unsigned OldCW =
+      F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
+                      CWFrameIdx);
+
+    // Set the high part to be round to zero...
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+      .addImm(0xC7F);
+
+    // Reload the modified control word now...
+    addFrameReference(BuildMI(*BB, MI, DL,
+                              TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+    // Restore the memory image of control word to original value
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+      .addReg(OldCW);
+
+    // Get the X86 opcode to use.
+    unsigned Opc;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+    case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+    case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+    case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+    case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+    case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+    case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
+    case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
+    case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+    }
+
+    X86AddressMode AM;
+    MachineOperand &Op = MI->getOperand(0);
+    if (Op.isReg()) {
+      AM.BaseType = X86AddressMode::RegBase;
+      AM.Base.Reg = Op.getReg();
+    } else {
+      AM.BaseType = X86AddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = Op.getIndex();
+    }
+    Op = MI->getOperand(1);
+    if (Op.isImm())
+      AM.Scale = Op.getImm();
+    Op = MI->getOperand(2);
+    if (Op.isImm())
+      AM.IndexReg = Op.getImm();
+    Op = MI->getOperand(3);
+    if (Op.isGlobal()) {
+      AM.GV = Op.getGlobal();
+    } else {
+      AM.Disp = Op.getImm();
+    }
+    addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+                      .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
+
+    // Reload the original control word now.
+    addFrameReference(BuildMI(*BB, MI, DL,
+                              TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+    // String/text processing lowering.
+  case X86::PCMPISTRM128REG:
+  case X86::VPCMPISTRM128REG:
+    return EmitPCMP(MI, BB, 3, false /* in-mem */);
+  case X86::PCMPISTRM128MEM:
+  case X86::VPCMPISTRM128MEM:
+    return EmitPCMP(MI, BB, 3, true /* in-mem */);
+  case X86::PCMPESTRM128REG:
+  case X86::VPCMPESTRM128REG:
+    return EmitPCMP(MI, BB, 5, false /* in mem */);
+  case X86::PCMPESTRM128MEM:
+  case X86::VPCMPESTRM128MEM:
+    return EmitPCMP(MI, BB, 5, true /* in mem */);
+
+    // Thread synchronization.
+  case X86::MONITOR:
+    return EmitMonitor(MI, BB);
+  case X86::MWAIT:
+    return EmitMwait(MI, BB);
+
+    // Atomic Lowering.
+  case X86::ATOMAND32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+                                               X86::AND32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass);
+  case X86::ATOMOR32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+                                               X86::OR32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass);
+  case X86::ATOMXOR32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+                                               X86::XOR32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass);
+  case X86::ATOMNAND32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+                                               X86::AND32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass, true);
+  case X86::ATOMMIN32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+  case X86::ATOMMAX32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+  case X86::ATOMUMIN32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+  case X86::ATOMUMAX32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+
+  case X86::ATOMAND16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+                                               X86::AND16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass);
+  case X86::ATOMOR16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
+                                               X86::OR16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass);
+  case X86::ATOMXOR16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
+                                               X86::XOR16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass);
+  case X86::ATOMNAND16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+                                               X86::AND16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass, true);
+  case X86::ATOMMIN16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
+  case X86::ATOMMAX16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
+  case X86::ATOMUMIN16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
+  case X86::ATOMUMAX16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+
+  case X86::ATOMAND8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+                                               X86::AND8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass);
+  case X86::ATOMOR8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
+                                               X86::OR8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass);
+  case X86::ATOMXOR8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
+                                               X86::XOR8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass);
+  case X86::ATOMNAND8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+                                               X86::AND8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass, true);
+  // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+  // This group is for 64-bit host.
+  case X86::ATOMAND64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+                                               X86::AND64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass);
+  case X86::ATOMOR64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
+                                               X86::OR64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass);
+  case X86::ATOMXOR64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
+                                               X86::XOR64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass);
+  case X86::ATOMNAND64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+                                               X86::AND64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass, true);
+  case X86::ATOMMIN64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
+  case X86::ATOMMAX64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
+  case X86::ATOMUMIN64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
+  case X86::ATOMUMAX64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+  // This group does 64-bit operations on a 32-bit host.
+  case X86::ATOMAND6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::AND32rr, X86::AND32rr,
+                                               X86::AND32ri, X86::AND32ri,
+                                               false);
+  case X86::ATOMOR6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::OR32rr, X86::OR32rr,
+                                               X86::OR32ri, X86::OR32ri,
+                                               false);
+  case X86::ATOMXOR6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::XOR32rr, X86::XOR32rr,
+                                               X86::XOR32ri, X86::XOR32ri,
+                                               false);
+  case X86::ATOMNAND6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::AND32rr, X86::AND32rr,
+                                               X86::AND32ri, X86::AND32ri,
+                                               true);
+  case X86::ATOMADD6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::ADD32rr, X86::ADC32rr,
+                                               X86::ADD32ri, X86::ADC32ri,
+                                               false);
+  case X86::ATOMSUB6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::SUB32rr, X86::SBB32rr,
+                                               X86::SUB32ri, X86::SBB32ri,
+                                               false);
+  case X86::ATOMSWAP6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::MOV32rr, X86::MOV32rr,
+                                               X86::MOV32ri, X86::MOV32ri,
+                                               false);
+  case X86::VASTART_SAVE_XMM_REGS:
+    return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+  case X86::VAARG_64:
+    return EmitVAARG64WithCustomInserter(MI, BB);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                       const APInt &Mask,
+                                                       APInt &KnownZero,
+                                                       APInt &KnownOne,
+                                                       const SelectionDAG &DAG,
+                                                       unsigned Depth) const {
+  unsigned Opc = Op.getOpcode();
+  assert((Opc >= ISD::BUILTIN_OP_END ||
+          Opc == ISD::INTRINSIC_WO_CHAIN ||
+          Opc == ISD::INTRINSIC_W_CHAIN ||
+          Opc == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);   // Don't know anything.
+  switch (Opc) {
+  default: break;
+  case X86ISD::ADD:
+  case X86ISD::SUB:
+  case X86ISD::ADC:
+  case X86ISD::SBB:
+  case X86ISD::SMUL:
+  case X86ISD::UMUL:
+  case X86ISD::INC:
+  case X86ISD::DEC:
+  case X86ISD::OR:
+  case X86ISD::XOR:
+  case X86ISD::AND:
+    // These nodes' second result is a boolean.
+    if (Op.getResNo() == 0)
+      break;
+    // Fallthrough
+  case X86ISD::SETCC:
+    KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
+                                       Mask.getBitWidth() - 1);
+    break;
+  }
+}
+
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         unsigned Depth) const {
+  // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+  if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+    return Op.getValueType().getScalarType().getSizeInBits();
+
+  // Fallback case.
+  return 1;
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+                                       const GlobalValue* &GA,
+                                       int64_t &Offset) const {
+  if (N->getOpcode() == X86ISD::Wrapper) {
+    if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
+      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+      Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
+      return true;
+    }
+  }
+  return TargetLowering::isGAPlusOffset(N, GA, Offset);
+}
+
+/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
+/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
+/// if the load addresses are consecutive, non-overlapping, and in the right
+/// order.
+static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  if (VT.getSizeInBits() != 128)
+    return SDValue();
+
+  // Don't create instructions with illegal types after legalize types has run.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+    return SDValue();
+
+  SmallVector<SDValue, 16> Elts;
+  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+    Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
+  return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+}
+
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
+static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+                                                const TargetLowering &TLI) {
+  SDValue InputVector = N->getOperand(0);
+
+  // Only operate on vectors of 4 elements, where the alternative shuffling
+  // gets to be more expensive.
+  if (InputVector.getValueType() != MVT::v4i32)
+    return SDValue();
+
+  // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
+  // single use which is a sign-extend or zero-extend, and all elements are
+  // used.
+  SmallVector<SDNode *, 4> Uses;
+  unsigned ExtractedElements = 0;
+  for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
+       UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
+    if (UI.getUse().getResNo() != InputVector.getResNo())
+      return SDValue();
+
+    SDNode *Extract = *UI;
+    if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+      return SDValue();
+
+    if (Extract->getValueType(0) != MVT::i32)
+      return SDValue();
+    if (!Extract->hasOneUse())
+      return SDValue();
+    if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
+        Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+    if (!isa<ConstantSDNode>(Extract->getOperand(1)))
+      return SDValue();
+
+    // Record which element was extracted.
+    ExtractedElements |=
+      1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
+
+    Uses.push_back(Extract);
+  }
+
+  // If not all the elements were used, this may not be worthwhile.
+  if (ExtractedElements != 15)
+    return SDValue();
+
+  // Ok, we've now decided to do the transformation.
+  DebugLoc dl = InputVector.getDebugLoc();
+
+  // Store the value to a temporary stack slot.
+  SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
+
+  // Replace each use (extract) with a load of the appropriate element.
+  for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+       UE = Uses.end(); UI != UE; ++UI) {
+    SDNode *Extract = *UI;
+
+    // cOMpute the element's address.
+    SDValue Idx = Extract->getOperand(1);
+    unsigned EltSize =
+        InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
+    uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+    SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+
+    SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+                                     StackPtr, OffsetVal);
+
+    // Load the scalar.
+    SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+                                     ScalarAddr, MachinePointerInfo(),
+                                     false, false, 0);
+
+    // Replace the exact with the load.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
+  }
+
+  // The replacement was made in place; don't return anything.
+  return SDValue();
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+                                    const X86Subtarget *Subtarget) {
+  DebugLoc DL = N->getDebugLoc();
+  SDValue Cond = N->getOperand(0);
+  // Get the LHS/RHS of the select.
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+
+  // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+  // instructions match the semantics of the common C idiom x<y?x:y but not
+  // x<=y?x:y, because of how they handle negative zero (which can be
+  // ignored in unsafe-math mode).
+  if (Subtarget->hasSSE2() &&
+      (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
+      Cond.getOpcode() == ISD::SETCC) {
+    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+    unsigned Opcode = 0;
+    // Check for x CC y ? x : y.
+    if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+        DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+      switch (CC) {
+      default: break;
+      case ISD::SETULT:
+        // Converting this to a min would handle NaNs incorrectly, and swapping
+        // the operands would cause it to handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+          if (!UnsafeFPMath &&
+              !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETOLE:
+        // Converting this to a min would handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+          break;
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETULE:
+        // Converting this to a min would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
+      case ISD::SETLT:
+      case ISD::SETLE:
+        Opcode = X86ISD::FMIN;
+        break;
+
+      case ISD::SETOGE:
+        // Converting this to a max would handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+          break;
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETUGT:
+        // Converting this to a max would handle NaNs incorrectly, and swapping
+        // the operands would cause it to handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+          if (!UnsafeFPMath &&
+              !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETUGE:
+        // Converting this to a max would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
+      case ISD::SETGT:
+      case ISD::SETGE:
+        Opcode = X86ISD::FMAX;
+        break;
+      }
+    // Check for x CC y ? y : x -- a min/max with reversed arms.
+    } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+               DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+      switch (CC) {
+      default: break;
+      case ISD::SETOGE:
+        // Converting this to a min would handle comparisons between positive
+        // and negative zero incorrectly, and swapping the operands would
+        // cause it to handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+          if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETUGT:
+        // Converting this to a min would handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+          break;
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETUGE:
+        // Converting this to a min would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
+      case ISD::SETGT:
+      case ISD::SETGE:
+        Opcode = X86ISD::FMIN;
+        break;
+
+      case ISD::SETULT:
+        // Converting this to a max would handle NaNs incorrectly.
+        if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+          break;
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETOLE:
+        // Converting this to a max would handle comparisons between positive
+        // and negative zero incorrectly, and swapping the operands would
+        // cause it to handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+          if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETULE:
+        // Converting this to a max would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
+      case ISD::SETLT:
+      case ISD::SETLE:
+        Opcode = X86ISD::FMAX;
+        break;
+      }
+    }
+
+    if (Opcode)
+      return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+  }
+
+  // If this is a select between two integer constants, try to do some
+  // optimizations.
+  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+      // Don't do this for crazy integer types.
+      if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+        // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+        // so that TrueC (the true value) is larger than FalseC.
+        bool NeedsCondInvert = false;
+
+        if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+            // Efficiently invertible.
+            (Cond.getOpcode() == ISD::SETCC ||  // setcc -> invertible.
+             (Cond.getOpcode() == ISD::XOR &&   // xor(X, C) -> invertible.
+              isa<ConstantSDNode>(Cond.getOperand(1))))) {
+          NeedsCondInvert = true;
+          std::swap(TrueC, FalseC);
+        }
+
+        // Optimize C ? 8 : 0 -> zext(C) << 3.  Likewise for any pow2/0.
+        if (FalseC->getAPIntValue() == 0 &&
+            TrueC->getAPIntValue().isPowerOf2()) {
+          if (NeedsCondInvert) // Invert the condition if needed.
+            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(1, Cond.getValueType()));
+
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+          unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+          return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+                             DAG.getConstant(ShAmt, MVT::i8));
+        }
+
+        // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+        if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+          if (NeedsCondInvert) // Invert the condition if needed.
+            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(1, Cond.getValueType()));
+
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                             FalseC->getValueType(0), Cond);
+          return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                             SDValue(FalseC, 0));
+        }
+
+        // Optimize cases that will turn into an LEA instruction.  This requires
+        // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+        if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+          uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+          if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+          bool isFastMultiplier = false;
+          if (Diff < 10) {
+            switch ((unsigned char)Diff) {
+              default: break;
+              case 1:  // result = add base, cond
+              case 2:  // result = lea base(    , cond*2)
+              case 3:  // result = lea base(cond, cond*2)
+              case 4:  // result = lea base(    , cond*4)
+              case 5:  // result = lea base(cond, cond*4)
+              case 8:  // result = lea base(    , cond*8)
+              case 9:  // result = lea base(cond, cond*8)
+                isFastMultiplier = true;
+                break;
+            }
+          }
+
+          if (isFastMultiplier) {
+            APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+            if (NeedsCondInvert) // Invert the condition if needed.
+              Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(1, Cond.getValueType()));
+
+            // Zero extend the condition if needed.
+            Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                               Cond);
+            // Scale the condition by the difference.
+            if (Diff != 1)
+              Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(Diff, Cond.getValueType()));
+
+            // Add the base if non-zero.
+            if (FalseC->getAPIntValue() != 0)
+              Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                                 SDValue(FalseC, 0));
+            return Cond;
+          }
+        }
+      }
+  }
+
+  return SDValue();
+}
+
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI) {
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the flag operand isn't dead, don't touch this CMOV.
+  if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+    return SDValue();
+
+  SDValue FalseOp = N->getOperand(0);
+  SDValue TrueOp = N->getOperand(1);
+  X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+  SDValue Cond = N->getOperand(3);
+  if (CC == X86::COND_E || CC == X86::COND_NE) {
+    switch (Cond.getOpcode()) {
+    default: break;
+    case X86ISD::BSR:
+    case X86ISD::BSF:
+      // If operand of BSR / BSF are proven never zero, then ZF cannot be set.
+      if (DAG.isKnownNeverZero(Cond.getOperand(0)))
+        return (CC == X86::COND_E) ? FalseOp : TrueOp;
+    }
+  }
+
+  // If this is a select between two integer constants, try to do some
+  // optimizations.  Note that the operands are ordered the opposite of SELECT
+  // operands.
+  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
+    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
+      // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+      // larger than FalseC (the false value).
+      if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+        CC = X86::GetOppositeBranchCondition(CC);
+        std::swap(TrueC, FalseC);
+      }
+
+      // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3.  Likewise for any pow2/0.
+      // This is efficient for any integer data type (including i8/i16) and
+      // shift amount.
+      if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                           DAG.getConstant(CC, MVT::i8), Cond);
+
+        // Zero extend the condition if needed.
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+        unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+        Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+                           DAG.getConstant(ShAmt, MVT::i8));
+        if (N->getNumValues() == 2)  // Dead flag value?
+          return DCI.CombineTo(N, Cond, SDValue());
+        return Cond;
+      }
+
+      // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient
+      // for any integer data type, including i8/i16.
+      if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                           DAG.getConstant(CC, MVT::i8), Cond);
+
+        // Zero extend the condition if needed.
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                           FalseC->getValueType(0), Cond);
+        Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                           SDValue(FalseC, 0));
+
+        if (N->getNumValues() == 2)  // Dead flag value?
+          return DCI.CombineTo(N, Cond, SDValue());
+        return Cond;
+      }
+
+      // Optimize cases that will turn into an LEA instruction.  This requires
+      // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+      if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+        uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+        if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+        bool isFastMultiplier = false;
+        if (Diff < 10) {
+          switch ((unsigned char)Diff) {
+          default: break;
+          case 1:  // result = add base, cond
+          case 2:  // result = lea base(    , cond*2)
+          case 3:  // result = lea base(cond, cond*2)
+          case 4:  // result = lea base(    , cond*4)
+          case 5:  // result = lea base(cond, cond*4)
+          case 8:  // result = lea base(    , cond*8)
+          case 9:  // result = lea base(cond, cond*8)
+            isFastMultiplier = true;
+            break;
+          }
+        }
+
+        if (isFastMultiplier) {
+          APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+          Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                             DAG.getConstant(CC, MVT::i8), Cond);
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                             Cond);
+          // Scale the condition by the difference.
+          if (Diff != 1)
+            Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(Diff, Cond.getValueType()));
+
+          // Add the base if non-zero.
+          if (FalseC->getAPIntValue() != 0)
+            Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                               SDValue(FalseC, 0));
+          if (N->getNumValues() == 2)  // Dead flag value?
+            return DCI.CombineTo(N, Cond, SDValue());
+          return Cond;
+        }
+      }
+    }
+  }
+  return SDValue();
+}
+
+
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64)
+    return SDValue();
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return SDValue();
+  uint64_t MulAmt = C->getZExtValue();
+  if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+    return SDValue();
+
+  uint64_t MulAmt1 = 0;
+  uint64_t MulAmt2 = 0;
+  if ((MulAmt % 9) == 0) {
+    MulAmt1 = 9;
+    MulAmt2 = MulAmt / 9;
+  } else if ((MulAmt % 5) == 0) {
+    MulAmt1 = 5;
+    MulAmt2 = MulAmt / 5;
+  } else if ((MulAmt % 3) == 0) {
+    MulAmt1 = 3;
+    MulAmt2 = MulAmt / 3;
+  }
+  if (MulAmt2 &&
+      (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+    DebugLoc DL = N->getDebugLoc();
+
+    if (isPowerOf2_64(MulAmt2) &&
+        !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+      // If second multiplifer is pow2, issue it first. We want the multiply by
+      // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+      // is an add.
+      std::swap(MulAmt1, MulAmt2);
+
+    SDValue NewMul;
+    if (isPowerOf2_64(MulAmt1))
+      NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                           DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+    else
+      NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+                           DAG.getConstant(MulAmt1, VT));
+
+    if (isPowerOf2_64(MulAmt2))
+      NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+                           DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+    else
+      NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+                           DAG.getConstant(MulAmt2, VT));
+
+    // Do not add new nodes to DAG combiner worklist.
+    DCI.CombineTo(N, NewMul, false);
+  }
+  return SDValue();
+}
+
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+  // since the result of setcc_c is all zero's or all ones.
+  if (N1C && N0.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+        ((N00.getOpcode() == ISD::ANY_EXTEND ||
+          N00.getOpcode() == ISD::ZERO_EXTEND) &&
+         N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+      APInt ShAmt = N1C->getAPIntValue();
+      Mask = Mask.shl(ShAmt);
+      if (Mask != 0)
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           N00, DAG.getConstant(Mask, VT));
+    }
+  }
+
+  return SDValue();
+}
+
+/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
+///                       when possible.
+static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+                                   const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() && VT.isInteger() &&
+      N->getOpcode() == ISD::SHL)
+    return PerformSHLCombine(N, DAG);
+
+  // On X86 with SSE2 support, we can transform this to a vector shift if
+  // all elements are shifted by the same amount.  We can't do this in legalize
+  // because the a constant vector is typically transformed to a constant pool
+  // so we have no knowledge of the shift amount.
+  if (!Subtarget->hasSSE2())
+    return SDValue();
+
+  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+    return SDValue();
+
+  SDValue ShAmtOp = N->getOperand(1);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc DL = N->getDebugLoc();
+  SDValue BaseShAmt = SDValue();
+  if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
+    unsigned NumElts = VT.getVectorNumElements();
+    unsigned i = 0;
+    for (; i != NumElts; ++i) {
+      SDValue Arg = ShAmtOp.getOperand(i);
+      if (Arg.getOpcode() == ISD::UNDEF) continue;
+      BaseShAmt = Arg;
+      break;
+    }
+    for (; i != NumElts; ++i) {
+      SDValue Arg = ShAmtOp.getOperand(i);
+      if (Arg.getOpcode() == ISD::UNDEF) continue;
+      if (Arg != BaseShAmt) {
+        return SDValue();
+      }
+    }
+  } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
+             cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
+    SDValue InVec = ShAmtOp.getOperand(0);
+    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElts = InVec.getValueType().getVectorNumElements();
+      unsigned i = 0;
+      for (; i != NumElts; ++i) {
+        SDValue Arg = InVec.getOperand(i);
+        if (Arg.getOpcode() == ISD::UNDEF) continue;
+        BaseShAmt = Arg;
+        break;
+      }
+    } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+         unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+         if (C->getZExtValue() == SplatIdx)
+           BaseShAmt = InVec.getOperand(1);
+       }
+    }
+    if (BaseShAmt.getNode() == 0)
+      BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+                              DAG.getIntPtrConstant(0));
+  } else
+    return SDValue();
+
+  // The shift amount is an i32.
+  if (EltVT.bitsGT(MVT::i32))
+    BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
+  else if (EltVT.bitsLT(MVT::i32))
+    BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
+
+  // The shift amount is identical so we can do a vector shift.
+  SDValue  ValOp = N->getOperand(0);
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Unknown shift opcode!");
+    break;
+  case ISD::SHL:
+    if (VT == MVT::v2i64)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v4i32)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v8i16)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+                         ValOp, BaseShAmt);
+    break;
+  case ISD::SRA:
+    if (VT == MVT::v4i32)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v8i16)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+                         ValOp, BaseShAmt);
+    break;
+  case ISD::SRL:
+    if (VT == MVT::v2i64)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v4i32)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT ==  MVT::v8i16)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+                         ValOp, BaseShAmt);
+    break;
+  }
+  return SDValue();
+}
+
+
+// CMPEQCombine - Recognize the distinctive  (AND (setcc ...) (setcc ..))
+// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
+// and friends.  Likewise for OR -> CMPNEQSS.
+static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
+                            TargetLowering::DAGCombinerInfo &DCI,
+                            const X86Subtarget *Subtarget) {
+  unsigned opcode;
+
+  // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
+  // we're requiring SSE2 for both.
+  if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue CMP0 = N0->getOperand(1);
+    SDValue CMP1 = N1->getOperand(1);
+    DebugLoc DL = N->getDebugLoc();
+
+    // The SETCCs should both refer to the same CMP.
+    if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
+      return SDValue();
+
+    SDValue CMP00 = CMP0->getOperand(0);
+    SDValue CMP01 = CMP0->getOperand(1);
+    EVT     VT    = CMP00.getValueType();
+
+    if (VT == MVT::f32 || VT == MVT::f64) {
+      bool ExpectingFlags = false;
+      // Check for any users that want flags:
+      for (SDNode::use_iterator UI = N->use_begin(),
+             UE = N->use_end();
+           !ExpectingFlags && UI != UE; ++UI)
+        switch (UI->getOpcode()) {
+        default:
+        case ISD::BR_CC:
+        case ISD::BRCOND:
+        case ISD::SELECT:
+          ExpectingFlags = true;
+          break;
+        case ISD::CopyToReg:
+        case ISD::SIGN_EXTEND:
+        case ISD::ZERO_EXTEND:
+        case ISD::ANY_EXTEND:
+          break;
+        }
+
+      if (!ExpectingFlags) {
+        enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
+        enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+
+        if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
+          X86::CondCode tmp = cc0;
+          cc0 = cc1;
+          cc1 = tmp;
+        }
+
+        if ((cc0 == X86::COND_E  && cc1 == X86::COND_NP) ||
+            (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
+          bool is64BitFP = (CMP00.getValueType() == MVT::f64);
+          X86ISD::NodeType NTOperator = is64BitFP ?
+            X86ISD::FSETCCsd : X86ISD::FSETCCss;
+          // FIXME: need symbolic constants for these magic numbers.
+          // See X86ATTInstPrinter.cpp:printSSECC().
+          unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+          SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+                                              DAG.getConstant(x86cc, MVT::i8));
+          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
+                                              OnesOrZeroesF);
+          SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
+                                      DAG.getConstant(1, MVT::i32));
+          SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
+          return OneBitOfTruth;
+        }
+      }
+    }
+  }
+  return SDValue();
+}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+  if (R.getNode())
+    return R;
+
+  // Want to form ANDNP nodes:
+  // 1) In the hopes of then easily combining them with OR and AND nodes
+  //    to form PBLEND/PSIGN.
+  // 2) To match ANDN packed intrinsics
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v2i64 && VT != MVT::v4i64)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Check LHS for vnot
+  if (N0.getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+    return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
+
+  // Check RHS for vnot
+  if (N1.getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+    return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
+
+  return SDValue();
+}
+
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+  if (R.getNode())
+    return R;
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // look for psign/blend
+  if (Subtarget->hasSSSE3()) {
+    if (VT == MVT::v2i64) {
+      // Canonicalize pandn to RHS
+      if (N0.getOpcode() == X86ISD::ANDNP)
+        std::swap(N0, N1);
+      // or (and (m, x), (pandn m, y))
+      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
+        SDValue Mask = N1.getOperand(0);
+        SDValue X    = N1.getOperand(1);
+        SDValue Y;
+        if (N0.getOperand(0) == Mask)
+          Y = N0.getOperand(1);
+        if (N0.getOperand(1) == Mask)
+          Y = N0.getOperand(0);
+
+        // Check to see if the mask appeared in both the AND and ANDNP and
+        if (!Y.getNode())
+          return SDValue();
+
+        // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+        if (Mask.getOpcode() != ISD::BITCAST ||
+            X.getOpcode() != ISD::BITCAST ||
+            Y.getOpcode() != ISD::BITCAST)
+          return SDValue();
+
+        // Look through mask bitcast.
+        Mask = Mask.getOperand(0);
+        EVT MaskVT = Mask.getValueType();
+
+        // Validate that the Mask operand is a vector sra node.  The sra node
+        // will be an intrinsic.
+        if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+          return SDValue();
+
+        // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+        // there is no psrai.b
+        switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+        case Intrinsic::x86_sse2_psrai_w:
+        case Intrinsic::x86_sse2_psrai_d:
+          break;
+        default: return SDValue();
+        }
+
+        // Check that the SRA is all signbits.
+        SDValue SraC = Mask.getOperand(2);
+        unsigned SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
+        unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+        if ((SraAmt + 1) != EltBits)
+          return SDValue();
+
+        DebugLoc DL = N->getDebugLoc();
+
+        // Now we know we at least have a plendvb with the mask val.  See if
+        // we can form a psignb/w/d.
+        // psign = x.type == y.type == mask.type && y = sub(0, x);
+        X = X.getOperand(0);
+        Y = Y.getOperand(0);
+        if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+            ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+            X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+          unsigned Opc = 0;
+          switch (EltBits) {
+          case 8: Opc = X86ISD::PSIGNB; break;
+          case 16: Opc = X86ISD::PSIGNW; break;
+          case 32: Opc = X86ISD::PSIGND; break;
+          default: break;
+          }
+          if (Opc) {
+            SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+            return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+          }
+        }
+        // PBLENDVB only available on SSE 4.1
+        if (!Subtarget->hasSSE41())
+          return SDValue();
+
+        X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+        Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+        Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+        Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+        return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+      }
+    }
+  }
+
+  // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+  if (!N0.hasOneUse() || !N1.hasOneUse())
+    return SDValue();
+
+  SDValue ShAmt0 = N0.getOperand(1);
+  if (ShAmt0.getValueType() != MVT::i8)
+    return SDValue();
+  SDValue ShAmt1 = N1.getOperand(1);
+  if (ShAmt1.getValueType() != MVT::i8)
+    return SDValue();
+  if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+    ShAmt0 = ShAmt0.getOperand(0);
+  if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+    ShAmt1 = ShAmt1.getOperand(0);
+
+  DebugLoc DL = N->getDebugLoc();
+  unsigned Opc = X86ISD::SHLD;
+  SDValue Op0 = N0.getOperand(0);
+  SDValue Op1 = N1.getOperand(0);
+  if (ShAmt0.getOpcode() == ISD::SUB) {
+    Opc = X86ISD::SHRD;
+    std::swap(Op0, Op1);
+    std::swap(ShAmt0, ShAmt1);
+  }
+
+  unsigned Bits = VT.getSizeInBits();
+  if (ShAmt1.getOpcode() == ISD::SUB) {
+    SDValue Sum = ShAmt1.getOperand(0);
+    if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+      SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+      if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+        ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+      if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
+        return DAG.getNode(Opc, DL, VT,
+                           Op0, Op1,
+                           DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+    }
+  } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+    ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+    if (ShAmt0C &&
+        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
+      return DAG.getNode(Opc, DL, VT,
+                         N0.getOperand(0), N1.getOperand(0),
+                         DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+  }
+
+  return SDValue();
+}
+
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
+                                   const X86Subtarget *Subtarget) {
+  // Turn load->store of MMX types into GPR load/stores.  This avoids clobbering
+  // the FP state in cases where an emms may be missing.
+  // A preferable solution to the general problem is to figure out the right
+  // places to insert EMMS.  This qualifies as a quick hack.
+
+  // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  EVT VT = St->getValue().getValueType();
+  if (VT.getSizeInBits() != 64)
+    return SDValue();
+
+  const Function *F = DAG.getMachineFunction().getFunction();
+  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+    && Subtarget->hasSSE2();
+  if ((VT.isVector() ||
+       (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+      isa<LoadSDNode>(St->getValue()) &&
+      !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+      St->getChain().hasOneUse() && !St->isVolatile()) {
+    SDNode* LdVal = St->getValue().getNode();
+    LoadSDNode *Ld = 0;
+    int TokenFactorIndex = -1;
+    SmallVector<SDValue, 8> Ops;
+    SDNode* ChainVal = St->getChain().getNode();
+    // Must be a store of a load.  We currently handle two cases:  the load
+    // is a direct child, and it's under an intervening TokenFactor.  It is
+    // possible to dig deeper under nested TokenFactors.
+    if (ChainVal == LdVal)
+      Ld = cast<LoadSDNode>(St->getChain());
+    else if (St->getValue().hasOneUse() &&
+             ChainVal->getOpcode() == ISD::TokenFactor) {
+      for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+        if (ChainVal->getOperand(i).getNode() == LdVal) {
+          TokenFactorIndex = i;
+          Ld = cast<LoadSDNode>(St->getValue());
+        } else
+          Ops.push_back(ChainVal->getOperand(i));
+      }
+    }
+
+    if (!Ld || !ISD::isNormalLoad(Ld))
+      return SDValue();
+
+    // If this is not the MMX case, i.e. we are just turning i64 load/store
+    // into f64 load/store, avoid the transformation if there are multiple
+    // uses of the loaded value.
+    if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+      return SDValue();
+
+    DebugLoc LdDL = Ld->getDebugLoc();
+    DebugLoc StDL = N->getDebugLoc();
+    // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+    // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
+    // pair instead.
+    if (Subtarget->is64Bit() || F64IsLegal) {
+      EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+      SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+                                  Ld->getPointerInfo(), Ld->isVolatile(),
+                                  Ld->isNonTemporal(), Ld->getAlignment());
+      SDValue NewChain = NewLd.getValue(1);
+      if (TokenFactorIndex != -1) {
+        Ops.push_back(NewChain);
+        NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+                               Ops.size());
+      }
+      return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
+                          St->getPointerInfo(),
+                          St->isVolatile(), St->isNonTemporal(),
+                          St->getAlignment());
+    }
+
+    // Otherwise, lower to two pairs of 32-bit loads / stores.
+    SDValue LoAddr = Ld->getBasePtr();
+    SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
+                                 DAG.getConstant(4, MVT::i32));
+
+    SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
+                               Ld->getPointerInfo(),
+                               Ld->isVolatile(), Ld->isNonTemporal(),
+                               Ld->getAlignment());
+    SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
+                               Ld->getPointerInfo().getWithOffset(4),
+                               Ld->isVolatile(), Ld->isNonTemporal(),
+                               MinAlign(Ld->getAlignment(), 4));
+
+    SDValue NewChain = LoLd.getValue(1);
+    if (TokenFactorIndex != -1) {
+      Ops.push_back(LoLd);
+      Ops.push_back(HiLd);
+      NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+                             Ops.size());
+    }
+
+    LoAddr = St->getBasePtr();
+    HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
+                         DAG.getConstant(4, MVT::i32));
+
+    SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
+                                St->getPointerInfo(),
+                                St->isVolatile(), St->isNonTemporal(),
+                                St->getAlignment());
+    SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
+                                St->getPointerInfo().getWithOffset(4),
+                                St->isVolatile(),
+                                St->isNonTemporal(),
+                                MinAlign(St->getAlignment(), 4));
+    return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+  }
+  return SDValue();
+}
+
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+  // F[X]OR(0.0, x) -> x
+  // F[X]OR(x, 0.0) -> x
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(1);
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(0);
+  return SDValue();
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+  // FAND(0.0, x) -> 0.0
+  // FAND(x, 0.0) -> 0.0
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(0);
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(1);
+  return SDValue();
+}
+
+static SDValue PerformBTCombine(SDNode *N,
+                                SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  // BT ignores high bits in the bit index operand.
+  SDValue Op1 = N->getOperand(1);
+  if (Op1.hasOneUse()) {
+    unsigned BitWidth = Op1.getValueSizeInBits();
+    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
+    APInt KnownZero, KnownOne;
+    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                          !DCI.isBeforeLegalizeOps());
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+        TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+      DCI.CommitTargetLoweringOpt(TLO);
+  }
+  return SDValue();
+}
+
+static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op = N->getOperand(0);
+  if (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  EVT VT = N->getValueType(0), OpVT = Op.getValueType();
+  if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
+      VT.getVectorElementType().getSizeInBits() ==
+      OpVT.getVectorElementType().getSizeInBits()) {
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+  }
+  return SDValue();
+}
+
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
+  // (i32 zext (and (i8  x86isd::setcc_carry), 1)) ->
+  //           (and (i32 x86isd::setcc_carry), 1)
+  // This eliminates the zext. This transformation is necessary because
+  // ISD::SETCC is always legalized to i8.
+  DebugLoc dl = N->getDebugLoc();
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  if (N0.getOpcode() == ISD::AND &&
+      N0.hasOneUse() &&
+      N0.getOperand(0).hasOneUse()) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() != X86ISD::SETCC_CARRY)
+      return SDValue();
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!C || C->getZExtValue() != 1)
+      return SDValue();
+    return DAG.getNode(ISD::AND, dl, VT,
+                       DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+                                   N00.getOperand(0), N00.getOperand(1)),
+                       DAG.getConstant(1, VT));
+  }
+
+  return SDValue();
+}
+
+// Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  unsigned X86CC = N->getConstantOperandVal(0);
+  SDValue EFLAG = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+  // a zext and produces an all-ones bit which is more useful than 0/1 in some
+  // cases.
+  if (X86CC == X86::COND_B)
+    return DAG.getNode(ISD::AND, DL, MVT::i8,
+                       DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+                                   DAG.getConstant(X86CC, MVT::i8), EFLAG),
+                       DAG.getConstant(1, MVT::i8));
+
+  return SDValue();
+}
+
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+                                        const X86TargetLowering *XTLI) {
+  SDValue Op0 = N->getOperand(0);
+  // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
+  // a 32-bit target where SSE doesn't support i64->FP operations.
+  if (Op0.getOpcode() == ISD::LOAD) {
+    LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
+    EVT VT = Ld->getValueType(0);
+    if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+        ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
+        !XTLI->getSubtarget()->is64Bit() &&
+        !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+                                          Ld->getChain(), Op0, DAG);
+      DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
+      return FILDChain;
+    }
+  }
+  return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+                                 X86TargetLowering::DAGCombinerInfo &DCI) {
+  // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+  // the result is either zero or one (depending on the input carry bit).
+  // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+  if (X86::isZeroNode(N->getOperand(0)) &&
+      X86::isZeroNode(N->getOperand(1)) &&
+      // We don't have a good way to replace an EFLAGS use, so only do this when
+      // dead right now.
+      SDValue(N, 1).use_empty()) {
+    DebugLoc DL = N->getDebugLoc();
+    EVT VT = N->getValueType(0);
+    SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+    SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+                               DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+                                           DAG.getConstant(X86::COND_B,MVT::i8),
+                                           N->getOperand(2)),
+                               DAG.getConstant(1, VT));
+    return DCI.CombineTo(N, Res1, CarryOut);
+  }
+
+  return SDValue();
+}
+
+// fold (add Y, (sete  X, 0)) -> adc  0, Y
+//      (add Y, (setne X, 0)) -> sbb -1, Y
+//      (sub (sete  X, 0), Y) -> sbb  0, Y
+//      (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+  DebugLoc DL = N->getDebugLoc();
+
+  // Look through ZExts.
+  SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+  if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+    return SDValue();
+
+  SDValue SetCC = Ext.getOperand(0);
+  if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+    return SDValue();
+
+  X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+  if (CC != X86::COND_E && CC != X86::COND_NE)
+    return SDValue();
+
+  SDValue Cmp = SetCC.getOperand(1);
+  if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+      !X86::isZeroNode(Cmp.getOperand(1)) ||
+      !Cmp.getOperand(0).getValueType().isInteger())
+    return SDValue();
+
+  SDValue CmpOp0 = Cmp.getOperand(0);
+  SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+                               DAG.getConstant(1, CmpOp0.getValueType()));
+
+  SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+  if (CC == X86::COND_NE)
+    return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+                       DL, OtherVal.getValueType(), OtherVal,
+                       DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+  return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+                     DL, OtherVal.getValueType(), OtherVal,
+                     DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
+SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::EXTRACT_VECTOR_ELT:
+    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+  case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
+  case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
+  case ISD::ADD:
+  case ISD::SUB:            return OptimizeConditonalInDecrement(N, DAG);
+  case X86ISD::ADC:         return PerformADCCombine(N, DAG, DCI);
+  case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
+  case ISD::AND:            return PerformAndCombine(N, DAG, DCI, Subtarget);
+  case ISD::OR:             return PerformOrCombine(N, DAG, DCI, Subtarget);
+  case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
+  case ISD::SINT_TO_FP:     return PerformSINT_TO_FPCombine(N, DAG, this);
+  case X86ISD::FXOR:
+  case X86ISD::FOR:         return PerformFORCombine(N, DAG);
+  case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
+  case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
+  case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
+  case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG);
+  case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG);
+  case X86ISD::SHUFPS:      // Handle all target specific shuffles
+  case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLHPS:
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
+  }
+
+  return SDValue();
+}
+
+/// isTypeDesirableForOp - Return true if the target has native support for
+/// the specified value type and it is 'desirable' to use the type for the
+/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+/// instruction encodings are longer and some i16 instructions are slow.
+bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+  if (!isTypeLegal(VT))
+    return false;
+  if (VT != MVT::i16)
+    return true;
+
+  switch (Opc) {
+  default:
+    return true;
+  case ISD::LOAD:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SUB:
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    return false;
+  }
+}
+
+/// IsDesirableToPromoteOp - This method query the target whether it is
+/// beneficial for dag combiner to promote the specified node. If true, it
+/// should return the desired promotion type by reference.
+bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+  EVT VT = Op.getValueType();
+  if (VT != MVT::i16)
+    return false;
+
+  bool Promote = false;
+  bool Commute = false;
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    // If the non-extending load has a single use and it's not live out, then it
+    // might be folded.
+    if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
+                                                     Op.hasOneUse()*/) {
+      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+             UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+        // The only case where we'd want to promote LOAD (rather then it being
+        // promoted as an operand is when it's only use is liveout.
+        if (UI->getOpcode() != ISD::CopyToReg)
+          return false;
+      }
+    }
+    Promote = true;
+    break;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Promote = true;
+    break;
+  case ISD::SHL:
+  case ISD::SRL: {
+    SDValue N0 = Op.getOperand(0);
+    // Look out for (store (shl (load), x)).
+    if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
+      return false;
+    Promote = true;
+    break;
+  }
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    Commute = true;
+    // fallthrough
+  case ISD::SUB: {
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    if (!Commute && MayFoldLoad(N1))
+      return false;
+    // Avoid disabling potential load folding opportunities.
+    if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
+      return false;
+    if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
+      return false;
+    Promote = true;
+  }
+  }
+
+  PVT = MVT::i32;
+  return Promote;
+}
+
+//===----------------------------------------------------------------------===//
+//                           X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+
+  std::string AsmStr = IA->getAsmString();
+
+  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+  SmallVector<StringRef, 4> AsmPieces;
+  SplitString(AsmStr, AsmPieces, ";\n");
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
+
+    // FIXME: this should verify that we are targeting a 486 or better.  If not,
+    // we will turn this bswap into something that will be lowered to logical ops
+    // instead of emitting the bswap asm.  For now, we don't support 486 or lower
+    // so don't worry about this.
+    // bswap $0
+    if (AsmPieces.size() == 2 &&
+        (AsmPieces[0] == "bswap" ||
+         AsmPieces[0] == "bswapq" ||
+         AsmPieces[0] == "bswapl") &&
+        (AsmPieces[1] == "$0" ||
+         AsmPieces[1] == "${0:q}")) {
+      // No need to check constraints, nothing other than the equivalent of
+      // "=r,0" would be valid here.
+      const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+      if (!Ty || Ty->getBitWidth() % 16 != 0)
+        return false;
+      return IntrinsicLowering::LowerToByteSwap(CI);
+    }
+    // rorw $$8, ${0:w}  -->  llvm.bswap.i16
+    if (CI->getType()->isIntegerTy(16) &&
+        AsmPieces.size() == 3 &&
+        (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
+        AsmPieces[1] == "$$8," &&
+        AsmPieces[2] == "${0:w}" &&
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+      AsmPieces.clear();
+      const std::string &ConstraintsStr = IA->getConstraintString();
+      SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+      std::sort(AsmPieces.begin(), AsmPieces.end());
+      if (AsmPieces.size() == 4 &&
+          AsmPieces[0] == "~{cc}" &&
+          AsmPieces[1] == "~{dirflag}" &&
+          AsmPieces[2] == "~{flags}" &&
+          AsmPieces[3] == "~{fpsr}") {
+        const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+        if (!Ty || Ty->getBitWidth() % 16 != 0)
+          return false;
+        return IntrinsicLowering::LowerToByteSwap(CI);
+      }
+    }
+    break;
+  case 3:
+    if (CI->getType()->isIntegerTy(32) &&
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+      SmallVector<StringRef, 4> Words;
+      SplitString(AsmPieces[0], Words, " \t,");
+      if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+          Words[2] == "${0:w}") {
+        Words.clear();
+        SplitString(AsmPieces[1], Words, " \t,");
+        if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
+            Words[2] == "$0") {
+          Words.clear();
+          SplitString(AsmPieces[2], Words, " \t,");
+          if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+              Words[2] == "${0:w}") {
+            AsmPieces.clear();
+            const std::string &ConstraintsStr = IA->getConstraintString();
+            SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+            std::sort(AsmPieces.begin(), AsmPieces.end());
+            if (AsmPieces.size() == 4 &&
+                AsmPieces[0] == "~{cc}" &&
+                AsmPieces[1] == "~{dirflag}" &&
+                AsmPieces[2] == "~{flags}" &&
+                AsmPieces[3] == "~{fpsr}") {
+              const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+              if (!Ty || Ty->getBitWidth() % 16 != 0)
+                return false;
+              return IntrinsicLowering::LowerToByteSwap(CI);
+            }
+          }
+        }
+      }
+    }
+
+    if (CI->getType()->isIntegerTy(64)) {
+      InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+      if (Constraints.size() >= 2 &&
+          Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+          Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+        // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+        SmallVector<StringRef, 4> Words;
+        SplitString(AsmPieces[0], Words, " \t");
+        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+          Words.clear();
+          SplitString(AsmPieces[1], Words, " \t");
+          if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+            Words.clear();
+            SplitString(AsmPieces[2], Words, " \t,");
+            if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+                Words[2] == "%edx") {
+              const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+              if (!Ty || Ty->getBitWidth() % 16 != 0)
+                return false;
+              return IntrinsicLowering::LowerToByteSwap(CI);
+            }
+          }
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'R':
+    case 'q':
+    case 'Q':
+    case 'f':
+    case 't':
+    case 'u':
+    case 'y':
+    case 'x':
+    case 'Y':
+    case 'l':
+      return C_RegisterClass;
+    case 'a':
+    case 'b':
+    case 'c':
+    case 'd':
+    case 'S':
+    case 'D':
+    case 'A':
+      return C_Register;
+    case 'I':
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'G':
+    case 'C':
+    case 'e':
+    case 'Z':
+      return C_Other;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  X86TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+  case 'R':
+  case 'q':
+  case 'Q':
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'S':
+  case 'D':
+  case 'A':
+    if (CallOperandVal->getType()->isIntegerTy())
+      weight = CW_SpecificReg;
+    break;
+  case 'f':
+  case 't':
+  case 'u':
+      if (type->isFloatingPointTy())
+        weight = CW_SpecificReg;
+      break;
+  case 'y':
+      if (type->isX86_MMXTy() && Subtarget->hasMMX())
+        weight = CW_SpecificReg;
+      break;
+  case 'x':
+  case 'Y':
+    if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+      weight = CW_Register;
+    break;
+  case 'I':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+      if (C->getZExtValue() <= 31)
+        weight = CW_Constant;
+    }
+    break;
+  case 'J':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 63)
+        weight = CW_Constant;
+    }
+    break;
+  case 'K':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+        weight = CW_Constant;
+    }
+    break;
+  case 'L':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+        weight = CW_Constant;
+    }
+    break;
+  case 'M':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 3)
+        weight = CW_Constant;
+    }
+    break;
+  case 'N':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 0xff)
+        weight = CW_Constant;
+    }
+    break;
+  case 'G':
+  case 'C':
+    if (dyn_cast<ConstantFP>(CallOperandVal)) {
+      weight = CW_Constant;
+    }
+    break;
+  case 'e':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -0x80000000LL) &&
+          (C->getSExtValue() <= 0x7fffffffLL))
+        weight = CW_Constant;
+    }
+    break;
+  case 'Z':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 0xffffffff)
+        weight = CW_Constant;
+    }
+    break;
+  }
+  return weight;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *X86TargetLowering::
+LowerXConstraint(EVT ConstraintVT) const {
+  // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+  // 'f' like normal targets.
+  if (ConstraintVT.isFloatingPoint()) {
+    if (Subtarget->hasXMMInt())
+      return "Y";
+    if (Subtarget->hasXMM())
+      return "x";
+  }
+
+  return TargetLowering::LowerXConstraint(ConstraintVT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     std::string &Constraint,
+                                                     std::vector<SDValue>&Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+
+  // Only support length 1 constraints for now.
+  if (Constraint.length() > 1) return;
+
+  char ConstraintLetter = Constraint[0];
+  switch (ConstraintLetter) {
+  default: break;
+  case 'I':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (C->getZExtValue() <= 31) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'J':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (C->getZExtValue() <= 63) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'K':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'N':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (C->getZExtValue() <= 255) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'e': {
+    // 32-bit signed value
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+                                           C->getSExtValue())) {
+        // Widen to 64 bits here to get it sign extended.
+        Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+        break;
+      }
+    // FIXME gcc accepts some relocatable values here too, but only in certain
+    // memory models; it's complicated.
+    }
+    return;
+  }
+  case 'Z': {
+    // 32-bit unsigned value
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+                                           C->getZExtValue())) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    // FIXME gcc accepts some relocatable values here too, but only in certain
+    // memory models; it's complicated.
+    return;
+  }
+  case 'i': {
+    // Literal immediates are always ok.
+    if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+      // Widen to 64 bits here to get it sign extended.
+      Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+      break;
+    }
+
+    // In any sort of PIC mode addresses need to be computed at runtime by
+    // adding in a register or some sort of table lookup.  These can't
+    // be used as immediates.
+    if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+      return;
+
+    // If we are in non-pic codegen mode, we allow the address of a global (with
+    // an optional displacement) to be used with 'i'.
+    GlobalAddressSDNode *GA = 0;
+    int64_t Offset = 0;
+
+    // Match either (GA), (GA+C), (GA+C1+C2), etc.
+    while (1) {
+      if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
+        Offset += GA->getOffset();
+        break;
+      } else if (Op.getOpcode() == ISD::ADD) {
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+          Offset += C->getZExtValue();
+          Op = Op.getOperand(0);
+          continue;
+        }
+      } else if (Op.getOpcode() == ISD::SUB) {
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+          Offset += -C->getZExtValue();
+          Op = Op.getOperand(0);
+          continue;
+        }
+      }
+
+      // Otherwise, this isn't something we can handle, reject it.
+      return;
+    }
+
+    const GlobalValue *GV = GA->getGlobal();
+    // If we require an extra load to get this address, as in PIC mode, we
+    // can't accept it.
+    if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+                                                        getTargetMachine())))
+      return;
+
+    Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+                                        GA->getValueType(0), Offset);
+    break;
+  }
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const {
+  // First, see if this is a constraint that directly corresponds to an LLVM
+  // register class.
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+      // TODO: Slight differences here in allocation order and leaving
+      // RIP in the class. Do they matter any more here than they do
+      // in the normal allocation?
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+	if (VT == MVT::i32 || VT == MVT::f32)
+	  return std::make_pair(0U, X86::GR32RegisterClass);
+	else if (VT == MVT::i16)
+	  return std::make_pair(0U, X86::GR16RegisterClass);
+	else if (VT == MVT::i8 || VT == MVT::i1)
+	  return std::make_pair(0U, X86::GR8RegisterClass);
+	else if (VT == MVT::i64 || VT == MVT::f64)
+	  return std::make_pair(0U, X86::GR64RegisterClass);
+	break;
+      }
+      // 32-bit fallthrough
+    case 'Q':   // Q_REGS
+      if (VT == MVT::i32 || VT == MVT::f32)
+	return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
+      else if (VT == MVT::i16)
+	return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
+      else if (VT == MVT::i8 || VT == MVT::i1)
+	return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
+      else if (VT == MVT::i64)
+	return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
+      break;
+    case 'r':   // GENERAL_REGS
+    case 'l':   // INDEX_REGS
+      if (VT == MVT::i8 || VT == MVT::i1)
+        return std::make_pair(0U, X86::GR8RegisterClass);
+      if (VT == MVT::i16)
+        return std::make_pair(0U, X86::GR16RegisterClass);
+      if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
+        return std::make_pair(0U, X86::GR32RegisterClass);
+      return std::make_pair(0U, X86::GR64RegisterClass);
+    case 'R':   // LEGACY_REGS
+      if (VT == MVT::i8 || VT == MVT::i1)
+        return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
+      if (VT == MVT::i16)
+        return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
+      if (VT == MVT::i32 || !Subtarget->is64Bit())
+        return std::make_pair(0U, X86::GR32_NOREXRegisterClass);
+      return std::make_pair(0U, X86::GR64_NOREXRegisterClass);
+    case 'f':  // FP Stack registers.
+      // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+      // value to the correct fpstack register class.
+      if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+        return std::make_pair(0U, X86::RFP32RegisterClass);
+      if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+        return std::make_pair(0U, X86::RFP64RegisterClass);
+      return std::make_pair(0U, X86::RFP80RegisterClass);
+    case 'y':   // MMX_REGS if MMX allowed.
+      if (!Subtarget->hasMMX()) break;
+      return std::make_pair(0U, X86::VR64RegisterClass);
+    case 'Y':   // SSE_REGS if SSE2 allowed
+      if (!Subtarget->hasXMMInt()) break;
+      // FALL THROUGH.
+    case 'x':   // SSE_REGS if SSE1 allowed
+      if (!Subtarget->hasXMM()) break;
+
+      switch (VT.getSimpleVT().SimpleTy) {
+      default: break;
+      // Scalar SSE types.
+      case MVT::f32:
+      case MVT::i32:
+        return std::make_pair(0U, X86::FR32RegisterClass);
+      case MVT::f64:
+      case MVT::i64:
+        return std::make_pair(0U, X86::FR64RegisterClass);
+      // Vector types.
+      case MVT::v16i8:
+      case MVT::v8i16:
+      case MVT::v4i32:
+      case MVT::v2i64:
+      case MVT::v4f32:
+      case MVT::v2f64:
+        return std::make_pair(0U, X86::VR128RegisterClass);
+      }
+      break;
+    }
+  }
+
+  // Use the default implementation in TargetLowering to convert the register
+  // constraint into a member of a register class.
+  std::pair<unsigned, const TargetRegisterClass*> Res;
+  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+  // Not found as a standard register?
+  if (Res.second == 0) {
+    // Map st(0) -> st(7) -> ST0
+    if (Constraint.size() == 7 && Constraint[0] == '{' &&
+        tolower(Constraint[1]) == 's' &&
+        tolower(Constraint[2]) == 't' &&
+        Constraint[3] == '(' &&
+        (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+        Constraint[5] == ')' &&
+        Constraint[6] == '}') {
+
+      Res.first = X86::ST0+Constraint[4]-'0';
+      Res.second = X86::RFP80RegisterClass;
+      return Res;
+    }
+
+    // GCC allows "st(0)" to be called just plain "st".
+    if (StringRef("{st}").equals_lower(Constraint)) {
+      Res.first = X86::ST0;
+      Res.second = X86::RFP80RegisterClass;
+      return Res;
+    }
+
+    // flags -> EFLAGS
+    if (StringRef("{flags}").equals_lower(Constraint)) {
+      Res.first = X86::EFLAGS;
+      Res.second = X86::CCRRegisterClass;
+      return Res;
+    }
+
+    // 'A' means EAX + EDX.
+    if (Constraint == "A") {
+      Res.first = X86::EAX;
+      Res.second = X86::GR32_ADRegisterClass;
+      return Res;
+    }
+    return Res;
+  }
+
+  // Otherwise, check to see if this is a register class of the wrong value
+  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+  // turn into {ax},{dx}.
+  if (Res.second->hasType(VT))
+    return Res;   // Correct type already, nothing to do.
+
+  // All of the single-register GCC register classes map their values onto
+  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
+  // really want an 8-bit or 32-bit register, map to the appropriate register
+  // class and return the appropriate register.
+  if (Res.second == X86::GR16RegisterClass) {
+    if (VT == MVT::i8) {
+      unsigned DestReg = 0;
+      switch (Res.first) {
+      default: break;
+      case X86::AX: DestReg = X86::AL; break;
+      case X86::DX: DestReg = X86::DL; break;
+      case X86::CX: DestReg = X86::CL; break;
+      case X86::BX: DestReg = X86::BL; break;
+      }
+      if (DestReg) {
+        Res.first = DestReg;
+        Res.second = X86::GR8RegisterClass;
+      }
+    } else if (VT == MVT::i32) {
+      unsigned DestReg = 0;
+      switch (Res.first) {
+      default: break;
+      case X86::AX: DestReg = X86::EAX; break;
+      case X86::DX: DestReg = X86::EDX; break;
+      case X86::CX: DestReg = X86::ECX; break;
+      case X86::BX: DestReg = X86::EBX; break;
+      case X86::SI: DestReg = X86::ESI; break;
+      case X86::DI: DestReg = X86::EDI; break;
+      case X86::BP: DestReg = X86::EBP; break;
+      case X86::SP: DestReg = X86::ESP; break;
+      }
+      if (DestReg) {
+        Res.first = DestReg;
+        Res.second = X86::GR32RegisterClass;
+      }
+    } else if (VT == MVT::i64) {
+      unsigned DestReg = 0;
+      switch (Res.first) {
+      default: break;
+      case X86::AX: DestReg = X86::RAX; break;
+      case X86::DX: DestReg = X86::RDX; break;
+      case X86::CX: DestReg = X86::RCX; break;
+      case X86::BX: DestReg = X86::RBX; break;
+      case X86::SI: DestReg = X86::RSI; break;
+      case X86::DI: DestReg = X86::RDI; break;
+      case X86::BP: DestReg = X86::RBP; break;
+      case X86::SP: DestReg = X86::RSP; break;
+      }
+      if (DestReg) {
+        Res.first = DestReg;
+        Res.second = X86::GR64RegisterClass;
+      }
+    }
+  } else if (Res.second == X86::FR32RegisterClass ||
+             Res.second == X86::FR64RegisterClass ||
+             Res.second == X86::VR128RegisterClass) {
+    // Handle references to XMM physical registers that got mapped into the
+    // wrong class.  This can happen with constraints like {xmm0} where the
+    // target independent register mapper will just pick the first match it can
+    // find, ignoring the required type.
+    if (VT == MVT::f32)
+      Res.second = X86::FR32RegisterClass;
+    else if (VT == MVT::f64)
+      Res.second = X86::FR64RegisterClass;
+    else if (X86::VR128RegisterClass->hasType(VT))
+      Res.second = X86::VR128RegisterClass;
+  }
+
+  return Res;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 000000000000..b6036782b865
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,955 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86Subtarget.h"
+#include "X86RegisterInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+  namespace X86ISD {
+    // X86 Specific DAG Nodes
+    enum NodeType {
+      // Start the numbering where the builtin ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// BSF - Bit scan forward.
+      /// BSR - Bit scan reverse.
+      BSF,
+      BSR,
+
+      /// SHLD, SHRD - Double shift instructions. These correspond to
+      /// X86::SHLDxx and X86::SHRDxx instructions.
+      SHLD,
+      SHRD,
+
+      /// FAND - Bitwise logical AND of floating point values. This corresponds
+      /// to X86::ANDPS or X86::ANDPD.
+      FAND,
+
+      /// FOR - Bitwise logical OR of floating point values. This corresponds
+      /// to X86::ORPS or X86::ORPD.
+      FOR,
+
+      /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+      /// to X86::XORPS or X86::XORPD.
+      FXOR,
+
+      /// FSRL - Bitwise logical right shift of floating point values. These
+      /// corresponds to X86::PSRLDQ.
+      FSRL,
+
+      /// CALL - These operations represent an abstract X86 call
+      /// instruction, which includes a bunch of information.  In particular the
+      /// operands of these node are:
+      ///
+      ///     #0 - The incoming token chain
+      ///     #1 - The callee
+      ///     #2 - The number of arg bytes the caller pushes on the stack.
+      ///     #3 - The number of arg bytes the callee pops off the stack.
+      ///     #4 - The value to pass in AL/AX/EAX (optional)
+      ///     #5 - The value to pass in DL/DX/EDX (optional)
+      ///
+      /// The result values of these nodes are:
+      ///
+      ///     #0 - The outgoing token chain
+      ///     #1 - The first register result value (optional)
+      ///     #2 - The second register result value (optional)
+      ///
+      CALL,
+
+      /// RDTSC_DAG - This operation implements the lowering for
+      /// readcyclecounter
+      RDTSC_DAG,
+
+      /// X86 compare and logical compare instructions.
+      CMP, COMI, UCOMI,
+
+      /// X86 bit-test instructions.
+      BT,
+
+      /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+      /// operand, usually produced by a CMP instruction.
+      SETCC,
+
+      // Same as SETCC except it's materialized with a sbb and the value is all
+      // one's or all zero's.
+      SETCC_CARRY,  // R = carry_bit ? ~0 : 0
+
+      /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+      /// Operands are two FP values to compare; result is a mask of
+      /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
+      FSETCCss, FSETCCsd,
+
+      /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
+      /// result in an integer GPR.  Needs masking for scalar result.
+      FGETSIGNx86,
+
+      /// X86 conditional moves. Operand 0 and operand 1 are the two values
+      /// to select from. Operand 2 is the condition code, and operand 3 is the
+      /// flag operand produced by a CMP or TEST instruction. It also writes a
+      /// flag result.
+      CMOV,
+
+      /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+      /// is the block to branch if condition is true, operand 2 is the
+      /// condition code, and operand 3 is the flag operand produced by a CMP
+      /// or TEST instruction.
+      BRCOND,
+
+      /// Return with a flag operand. Operand 0 is the chain operand, operand
+      /// 1 is the number of bytes of stack to pop.
+      RET_FLAG,
+
+      /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+      REP_STOS,
+
+      /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+      REP_MOVS,
+
+      /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+      /// at function entry, used for PIC code.
+      GlobalBaseReg,
+
+      /// Wrapper - A wrapper node for TargetConstantPool,
+      /// TargetExternalSymbol, and TargetGlobalAddress.
+      Wrapper,
+
+      /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+      /// relative displacements.
+      WrapperRIP,
+
+      /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+      /// of an XMM vector, with the high word zero filled.
+      MOVQ2DQ,
+
+      /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+      /// to an MMX vector.  If you think this is too close to the previous
+      /// mnemonic, so do I; blame Intel.
+      MOVDQ2Q,
+
+      /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+      /// i32, corresponds to X86::PEXTRB.
+      PEXTRB,
+
+      /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+      /// i32, corresponds to X86::PEXTRW.
+      PEXTRW,
+
+      /// INSERTPS - Insert any element of a 4 x float vector into any element
+      /// of a destination 4 x floatvector.
+      INSERTPS,
+
+      /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+      /// corresponds to X86::PINSRB.
+      PINSRB,
+
+      /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+      /// corresponds to X86::PINSRW.
+      PINSRW, MMX_PINSRW,
+
+      /// PSHUFB - Shuffle 16 8-bit values within a vector.
+      PSHUFB,
+
+      /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+      ANDNP,
+
+      /// PSIGNB/W/D - Copy integer sign.
+      PSIGNB, PSIGNW, PSIGND,
+
+      /// PBLENDVB - Variable blend
+      PBLENDVB,
+
+      /// FMAX, FMIN - Floating point max and min.
+      ///
+      FMAX, FMIN,
+
+      /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+      /// approximation.  Note that these typically require refinement
+      /// in order to obtain suitable precision.
+      FRSQRT, FRCP,
+
+      // TLSADDR - Thread Local Storage.
+      TLSADDR,
+
+      // TLSCALL - Thread Local Storage.  When calling to an OS provided
+      // thunk at the address from an earlier relocation.
+      TLSCALL,
+
+      // EH_RETURN - Exception Handling helpers.
+      EH_RETURN,
+
+      /// TC_RETURN - Tail call return.
+      ///   operand #0 chain
+      ///   operand #1 callee (register or absolute)
+      ///   operand #2 stack adjustment
+      ///   operand #3 optional in flag
+      TC_RETURN,
+
+      // VZEXT_MOVL - Vector move low and zero extend.
+      VZEXT_MOVL,
+
+      // VSHL, VSRL - Vector logical left / right shift.
+      VSHL, VSRL,
+
+      // CMPPD, CMPPS - Vector double/float comparison.
+      // CMPPD, CMPPS - Vector double/float comparison.
+      CMPPD, CMPPS,
+
+      // PCMP* - Vector integer comparisons.
+      PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
+      PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+
+      // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+      ADD, SUB, ADC, SBB, SMUL,
+      INC, DEC, OR, XOR, AND,
+
+      UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+
+      // MUL_IMM - X86 specific multiply by immediate.
+      MUL_IMM,
+
+      // PTEST - Vector bitwise comparisons
+      PTEST,
+
+      // TESTP - Vector packed fp sign bitwise comparisons
+      TESTP,
+
+      // Several flavors of instructions with vector shuffle behaviors.
+      PALIGN,
+      PSHUFD,
+      PSHUFHW,
+      PSHUFLW,
+      PSHUFHW_LD,
+      PSHUFLW_LD,
+      SHUFPD,
+      SHUFPS,
+      MOVDDUP,
+      MOVSHDUP,
+      MOVSLDUP,
+      MOVSHDUP_LD,
+      MOVSLDUP_LD,
+      MOVLHPS,
+      MOVLHPD,
+      MOVHLPS,
+      MOVHLPD,
+      MOVLPS,
+      MOVLPD,
+      MOVSD,
+      MOVSS,
+      UNPCKLPS,
+      UNPCKLPD,
+      VUNPCKLPS,
+      VUNPCKLPD,
+      VUNPCKLPSY,
+      VUNPCKLPDY,
+      UNPCKHPS,
+      UNPCKHPD,
+      PUNPCKLBW,
+      PUNPCKLWD,
+      PUNPCKLDQ,
+      PUNPCKLQDQ,
+      PUNPCKHBW,
+      PUNPCKHWD,
+      PUNPCKHDQ,
+      PUNPCKHQDQ,
+
+      // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+      // according to %al. An operator is needed so that this can be expanded
+      // with control flow.
+      VASTART_SAVE_XMM_REGS,
+
+      // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+      WIN_ALLOCA,
+
+      // Memory barrier
+      MEMBARRIER,
+      MFENCE,
+      SFENCE,
+      LFENCE,
+
+      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+      // Atomic 64-bit binary operations.
+      ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      ATOMSUB64_DAG,
+      ATOMOR64_DAG,
+      ATOMXOR64_DAG,
+      ATOMAND64_DAG,
+      ATOMNAND64_DAG,
+      ATOMSWAP64_DAG,
+
+      // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+      LCMPXCHG_DAG,
+      LCMPXCHG8_DAG,
+
+      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+      VZEXT_LOAD,
+
+      // FNSTCW16m - Store FP control world into i16 memory.
+      FNSTCW16m,
+
+      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+      /// integer destination in memory and a FP reg source.  This corresponds
+      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+      /// has two inputs (token chain and address) and two outputs (int value
+      /// and token chain).
+      FP_TO_INT16_IN_MEM,
+      FP_TO_INT32_IN_MEM,
+      FP_TO_INT64_IN_MEM,
+
+      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+      /// integer source in memory and FP reg result.  This corresponds to the
+      /// X86::FILD*m instructions. It has three inputs (token chain, address,
+      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+      /// also produces a flag).
+      FILD,
+      FILD_FLAG,
+
+      /// FLD - This instruction implements an extending load to FP stack slots.
+      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+      /// operand, ptr to load from, and a ValueType node indicating the type
+      /// to load to.
+      FLD,
+
+      /// FST - This instruction implements a truncating store to FP stack
+      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+      /// chain operand, value to store, address, and a ValueType to store it
+      /// as.
+      FST,
+
+      /// VAARG_64 - This instruction grabs the address of the next argument
+      /// from a va_list. (reads and modifies the va_list in memory)
+      VAARG_64
+
+      // WARNING: Do not add anything in the end unless you want the node to
+      // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
+      // thought as target memory ops!
+    };
+  }
+
+  /// Define some predicates that are used for node matching.
+  namespace X86 {
+    /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFDMask(ShuffleVectorSDNode *N);
+
+    /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFHWMask(ShuffleVectorSDNode *N);
+
+    /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFLWMask(ShuffleVectorSDNode *N);
+
+    /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+    bool isSHUFPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+    bool isMOVHLPSMask(ShuffleVectorSDNode *N);
+
+    /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+    /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+    /// <2, 3, 2, 3>
+    bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
+
+    /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
+    bool isMOVLPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
+    /// as well as MOVLHPS.
+    bool isMOVLHPSMask(ShuffleVectorSDNode *N);
+
+    /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+    bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+    /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+    bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+    /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+    /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+    /// <0, 0, 1, 1>
+    bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
+
+    /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+    /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+    /// <2, 2, 3, 3>
+    bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
+
+    /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSS,
+    /// MOVSD, and MOVD, i.e. setting the lowest element.
+    bool isMOVLMask(ShuffleVectorSDNode *N);
+
+    /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+    bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+    bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+    bool isMOVDDUPMask(ShuffleVectorSDNode *N);
+
+    /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PALIGNR.
+    bool isPALIGNRMask(ShuffleVectorSDNode *N);
+
+    /// isVEXTRACTF128Index - Return true if the specified
+    /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+    /// suitable for input to VEXTRACTF128.
+    bool isVEXTRACTF128Index(SDNode *N);
+
+    /// isVINSERTF128Index - Return true if the specified
+    /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+    /// suitable for input to VINSERTF128.
+    bool isVINSERTF128Index(SDNode *N);
+
+    /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+    /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+    /// instructions.
+    unsigned getShuffleSHUFImmediate(SDNode *N);
+
+    /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+    /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
+    unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+    /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+    /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
+    unsigned getShufflePSHUFLWImmediate(SDNode *N);
+
+    /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+    /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+    unsigned getShufflePALIGNRImmediate(SDNode *N);
+
+    /// getExtractVEXTRACTF128Immediate - Return the appropriate
+    /// immediate to extract the specified EXTRACT_SUBVECTOR index
+    /// with VEXTRACTF128 instructions.
+    unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+    /// getInsertVINSERTF128Immediate - Return the appropriate
+    /// immediate to insert at the specified INSERT_SUBVECTOR index
+    /// with VINSERTF128 instructions.
+    unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
+    /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+    /// constant +0.0.
+    bool isZeroNode(SDValue Elt);
+
+    /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+    /// fit into displacement field of the instruction.
+    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+                                      bool hasSymbolicDisplacement = true);
+
+
+    /// isCalleePop - Determines whether the callee is required to pop its
+    /// own arguments. Callee pop is necessary to support tail calls.
+    bool isCalleePop(CallingConv::ID CallingConv,
+                     bool is64Bit, bool IsVarArg, bool TailCallOpt);
+  }
+
+  //===--------------------------------------------------------------------===//
+  //  X86TargetLowering - X86 Implementation of the TargetLowering interface
+  class X86TargetLowering : public TargetLowering {
+  public:
+    explicit X86TargetLowering(X86TargetMachine &TM);
+
+    virtual unsigned getJumpTableEncoding() const;
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+    virtual const MCExpr *
+    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                              const MachineBasicBlock *MBB, unsigned uid,
+                              MCContext &Ctx) const;
+
+    /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+    /// jumptable.
+    virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+                                             SelectionDAG &DAG) const;
+    virtual const MCExpr *
+    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                 unsigned JTI, MCContext &Ctx) const;
+
+    /// getStackPtrReg - Return the stack pointer register we are using: either
+    /// ESP or RSP.
+    unsigned getStackPtrReg() const { return X86StackPtr; }
+
+    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+    /// function arguments in the caller parameter area. For X86, aggregates
+    /// that contains are placed at 16-byte boundaries while the rest are at
+    /// 4-byte boundaries.
+    virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
+    /// getOptimalMemOpType - Returns the target specific optimal type for load
+    /// and store operations as a result of memset, memcpy, and memmove
+    /// lowering. If DstAlign is zero that means it's safe to destination
+    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+    /// means there isn't a need to check it against alignment requirement,
+    /// probably because the source does not need to be loaded. If
+    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
+    virtual EVT
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+      return true;
+    }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    ///
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    /// isTypeDesirableForOp - Return true if the target has native support for
+    /// the specified value type and it is 'desirable' to use the type for the
+    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+    /// instruction encodings are longer and some i16 instructions are slow.
+    virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
+
+    /// isTypeDesirable - Return true if the target has native support for the
+    /// specified value type and it is 'desirable' to use the type. e.g. On x86
+    /// i16 is legal, but undesirable since i16 instruction encodings are longer
+    /// and some i16 instructions are slow.
+    virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - Return the ISD::SETCC ValueType
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+    /// in Mask are known to be either zero or one and return them in the
+    /// KnownZero/KnownOne bitsets.
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+    // operation that are sign bits.
+    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                     unsigned Depth) const;
+
+    virtual bool
+    isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+    virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    virtual ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
+    /// true it means one of the asm constraint of the inline asm instruction
+    /// being processed is 'm'.
+    virtual void LowerAsmOperandForConstraint(SDValue Op,
+                                              std::string &Constraint,
+                                              std::vector<SDValue> &Ops,
+                                              SelectionDAG &DAG) const;
+
+    /// getRegForInlineAsmConstraint - Given a physical register constraint
+    /// (e.g. {edx}), return the register number and the register class for the
+    /// register.  This should only be used for C_Register constraints.  On
+    /// error, this returns a register number of 0.
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+    /// isTruncateFree - Return true if it's free to truncate a value of
+    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+    /// register EAX to i16 by referencing its sub-register AX.
+    virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+    /// isZExtFree - Return true if any actual instruction that defines a
+    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+    /// register. This does not necessarily include registers defined in
+    /// unknown ways, such as incoming arguments, or copies from unknown
+    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+    /// all instructions that define 32-bit values implicit zero-extend the
+    /// result out to 64 bits.
+    virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+    /// isNarrowingProfitable - Return true if it's profitable to narrow
+    /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+    /// from i32 to i8 but not from i32 to i16.
+    virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+    /// isShuffleMaskLegal - Targets can use this to indicate that they only
+    /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+    /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+    /// values are assumed to be legal.
+    virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+                                    EVT VT) const;
+
+    /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+    /// used by Targets can use this to indicate if there is a suitable
+    /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+    /// pool entry.
+    virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+                                        EVT VT) const;
+
+    /// ShouldShrinkFPConstant - If true, then instruction selection should
+    /// seek to shrink the FP constant of the specified type to a smaller type
+    /// in order to save space and / or reduce runtime.
+    virtual bool ShouldShrinkFPConstant(EVT VT) const {
+      // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+      // expensive than a straight movsd. On the other hand, it's important to
+      // shrink long double fp constant since fldt is very slow.
+      return !X86ScalarSSEf64 || VT == MVT::f80;
+    }
+
+    const X86Subtarget* getSubtarget() const {
+      return Subtarget;
+    }
+
+    /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+    /// computed in an SSE register, not on the X87 floating point stack.
+    bool isScalarFPTypeInSSEReg(EVT VT) const {
+      return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
+    }
+
+    /// createFastISel - This method returns a target specific FastISel object,
+    /// or null if the target does not support "fast" ISel.
+    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+    /// getStackCookieLocation - Return true if the target stores stack
+    /// protector cookies at a fixed offset in some non-standard address
+    /// space, and populates the address space and offset as
+    /// appropriate.
+    virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+
+    SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
+                      SelectionDAG &DAG) const;
+
+  protected:
+    std::pair<const TargetRegisterClass*, uint8_t>
+    findRepresentativeClass(EVT VT) const;
+
+  private:
+    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const X86Subtarget *Subtarget;
+    const X86RegisterInfo *RegInfo;
+    const TargetData *TD;
+
+    /// X86StackPtr - X86 physical register used as stack ptr.
+    unsigned X86StackPtr;
+
+    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+    /// floating point ops.
+    /// When SSE is available, use it for f32 operations.
+    /// When SSE2 is available, use it for f64 operations.
+    bool X86ScalarSSEf32;
+    bool X86ScalarSSEf64;
+
+    /// LegalFPImmediates - A list of legal fp immediates.
+    std::vector<APFloat> LegalFPImmediates;
+
+    /// addLegalFPImmediate - Indicate that this x86 target can instruction
+    /// select the specified FP immediate natively.
+    void addLegalFPImmediate(const APFloat& Imm) {
+      LegalFPImmediates.push_back(Imm);
+    }
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerMemArgument(SDValue Chain,
+                             CallingConv::ID CallConv,
+                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,  MachineFrameInfo *MFI,
+                              unsigned i) const;
+    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,
+                             ISD::ArgFlagsTy Flags) const;
+
+    // Call lowering helpers.
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                           CallingConv::ID CalleeCC,
+                                           bool isVarArg,
+                                           bool isCalleeStructRet,
+                                           bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                           SelectionDAG& DAG) const;
+    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
+    SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
+                                SDValue Chain, bool IsTailCall, bool Is64Bit,
+                                int FPDiff, DebugLoc dl) const;
+
+    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
+                                         SelectionDAG &DAG) const;
+
+    std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+                                               bool isSigned) const;
+
+    SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+                                   SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+                               int64_t Offset, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerToBT(SDValue And, ISD::CondCode CC,
+                      DebugLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+    // Utility functions to help LowerVECTOR_SHUFFLE
+    SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+    virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+    virtual EVT
+    getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+                             ISD::NodeType ExtendKind) const;
+
+    virtual bool
+    CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+		   bool isVarArg,
+		   const SmallVectorImpl<ISD::OutputArg> &Outs,
+		   LLVMContext &Context) const;
+
+    void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                                 SelectionDAG &DAG, unsigned NewOp) const;
+
+    /// Utility function to emit string processing sse4.2 instructions
+    /// that return in xmm0.
+    /// This takes the instruction to expand, the associated machine basic
+    /// block, the number of args, and whether or not the second arg is
+    /// in memory or not.
+    MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
+                                unsigned argNum, bool inMem) const;
+
+    /// Utility functions to emit monitor and mwait instructions. These
+    /// need to make sure that the arguments to the intrinsic are in the
+    /// correct registers.
+    MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+                                   MachineBasicBlock *BB) const;
+    MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+    /// Utility function to emit atomic bitwise operations (and, or, xor).
+    /// It takes the bitwise instruction to expand, the associated machine basic
+    /// block, and the associated X86 opcodes for reg/reg and reg/imm.
+    MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
+                                                    MachineInstr *BInstr,
+                                                    MachineBasicBlock *BB,
+                                                    unsigned regOpc,
+                                                    unsigned immOpc,
+                                                    unsigned loadOpc,
+                                                    unsigned cxchgOpc,
+                                                    unsigned notOpc,
+                                                    unsigned EAXreg,
+                                                    TargetRegisterClass *RC,
+                                                    bool invSrc = false) const;
+
+    MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
+                                                    MachineInstr *BInstr,
+                                                    MachineBasicBlock *BB,
+                                                    unsigned regOpcL,
+                                                    unsigned regOpcH,
+                                                    unsigned immOpcL,
+                                                    unsigned immOpcH,
+                                                    bool invSrc = false) const;
+
+    /// Utility function to emit atomic min and max.  It takes the min/max
+    /// instruction to expand, the associated basic block, and the associated
+    /// cmov opcode for moving the min or max value.
+    MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
+                                                          MachineBasicBlock *BB,
+                                                        unsigned cmovOpc) const;
+
+    // Utility function to emit the low-level va_arg code for X86-64.
+    MachineBasicBlock *EmitVAARG64WithCustomInserter(
+                       MachineInstr *MI,
+                       MachineBasicBlock *MBB) const;
+
+    /// Utility function to emit the xmm reg save portion of va_start.
+    MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+                                                   MachineInstr *BInstr,
+                                                   MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
+                                         MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
+                                              MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const;
+
+    /// Emit nodes that will be selected as "test Op0,Op0", or something
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
+
+    /// Emit nodes that will be selected as "cmp Op0,Op1", or something
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                    SelectionDAG &DAG) const;
+  };
+
+  namespace X86 {
+    FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+  }
+}
+
+#endif    // X86ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 000000000000..dd4f6a5a85a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,102 @@
+//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+      : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+        Has3DNow0F0FOpcode {
+  // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src1 = $dst";
+}
+
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+        Has3DNow0F0FOpcode {
+  // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+  let isAsmParserOnly = 1;
+}
+
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+  def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+  def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+  def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+  def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+        (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+  def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+  def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
+
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+  def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+  def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn))
+        (bitconvert (load_mmx addr:$src))))]>;
+}
+
+defm PAVGUSB  : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID    : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC    : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD    : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ  : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE  : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT  : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX    : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN    : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL    : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP    : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT  : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB    : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR   : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+def PREFETCH  : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
+                       "prefetch $addr", []>;
+
+// FIXME: Diassembler gets a bogus decode conflict.
+let isAsmParserOnly = 1 in
+def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
+                       "prefetchw $addr", []>;
+
+// "3DNowA" instructions
+defm PF2IW    : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW    : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC   : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC  : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD   : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 000000000000..9f7a4b06dc6f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1125 @@
+//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+
+let neverHasSideEffects = 1 in
+def LEA16r   : I<0x8D, MRMSrcMem,
+                 (outs GR16:$dst), (ins i32mem:$src),
+                 "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r   : I<0x8D, MRMSrcMem,
+                 (outs GR32:$dst), (ins i32mem:$src),
+                 "lea{l}\t{$src|$dst}, {$dst|$src}",
+                 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+                  (outs GR32:$dst), (ins lea64_32mem:$src),
+                  "lea{l}\t{$src|$dst}, {$dst|$src}",
+                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "lea{q}\t{$src|$dst}, {$dst|$src}",
+                  [(set GR64:$dst, lea64addr:$src)]>;
+
+
+
+//===----------------------------------------------------------------------===//
+//  Fixed-Register Multiplication and Division Instructions.
+//
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
+               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+               // This probably ought to be moved to a def : Pat<> if the
+               // syntax can be accepted.
+               [(set AL, (mul AL, GR8:$src)),
+                (implicit EFLAGS)]>;     // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
+               "mul{w}\t$src", 
+               []>, OpSize;    // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
+               "mul{l}\t$src",   // EAX,EDX = EAX*GR32
+               [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+                "mul{q}\t$src",          // RAX,RDX = RAX*GR64
+                [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+               "mul{b}\t$src",
+               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+               // This probably ought to be moved to a def : Pat<> if the
+               // syntax can be accepted.
+               [(set AL, (mul AL, (loadi8 addr:$src))),
+                (implicit EFLAGS)]>;   // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+               "mul{w}\t$src",
+               []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+              "mul{l}\t$src",
+              []>;          // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+                "mul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
+              // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
+              OpSize;    // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
+              // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
+              // RAX,RDX = RAX*GR64
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+                "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+                "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+                "imul{l}\t$src", []>;  // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR16:$dst, EFLAGS,
+                       (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, EFLAGS,
+                       (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+                                   (ins GR64:$src1, GR64:$src2),
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, EFLAGS,
+                        (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+                                  (ins GR16:$src1, i16mem:$src2),
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR16:$dst, EFLAGS,
+                       (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+               TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), 
+                 (ins GR32:$src1, i32mem:$src2),
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, EFLAGS,
+                       (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+                                   (ins GR64:$src1, i64mem:$src2),
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, EFLAGS,
+                        (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Surprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
+                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS, 
+                            (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
+                     (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR16:$dst, EFLAGS,
+                           (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+                 OpSize;
+def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
+                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR32:$dst, EFLAGS,
+                            (X86smul_flag GR32:$src1, imm:$src2))]>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
+                     (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32:$dst, EFLAGS,
+                           (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
+                        (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       [(set GR64:$dst, EFLAGS,
+                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
+                      (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR64:$dst, EFLAGS,
+                            (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
+                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1), imm:$src2))]>,
+                 OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
+                     (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR16:$dst, EFLAGS,
+                           (X86smul_flag (load addr:$src1),
+                                         i16immSExt8:$src2))]>, OpSize;
+def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
+                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR32:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1), imm:$src2))]>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
+                     (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32:$dst, EFLAGS,
+                           (X86smul_flag (load addr:$src1),
+                                         i32immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
+                        (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set GR64:$dst, EFLAGS,
+                              (X86smul_flag (load addr:$src1),
+                                            i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
+                      (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR64:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1),
+                                          i64immSExt8:$src2))]>;
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
+               "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
+               "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
+               "div{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+                "div{q}\t$src", []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
+               "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
+               "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+               "div{l}\t$src", []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+                "div{q}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
+               "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
+               "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
+               "idiv{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+                "idiv{q}\t$src", []>;
+               
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
+               "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
+               "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), 
+               "idiv{l}\t$src", []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+                "idiv{q}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "neg{b}\t$dst",
+               [(set GR8:$dst, (ineg GR8:$src1)),
+                (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+               "neg{w}\t$dst",
+               [(set GR16:$dst, (ineg GR16:$src1)),
+                (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+               "neg{l}\t$dst",
+               [(set GR32:$dst, (ineg GR32:$src1)),
+                (implicit EFLAGS)]>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+                [(set GR64:$dst, (ineg GR64:$src1)),
+                 (implicit EFLAGS)]>;
+} // Constraints = "$src1 = $dst"
+
+def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+               "neg{b}\t$dst",
+               [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+               "neg{w}\t$dst",
+               [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+               "neg{l}\t$dst",
+               [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+                [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+                 (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst" in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "not{b}\t$dst",
+               [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+               "not{w}\t$dst",
+               [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+               "not{l}\t$dst",
+               [(set GR32:$dst, (not GR32:$src1))]>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+                [(set GR64:$dst, (not GR64:$src1))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+               "not{b}\t$dst",
+               [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+               "not{w}\t$dst",
+               [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+               "not{l}\t$dst",
+               [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+                [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "inc{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+               "inc{w}\t$dst",
+               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+             OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+               "inc{l}\t$dst",
+               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+             Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), 
+                  "inc{w}\t$dst",
+                  [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+                OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), 
+                  "inc{l}\t$dst",
+                  [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+                Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), 
+                  "dec{w}\t$dst",
+                  [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+                OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), 
+                  "dec{l}\t$dst",
+                  [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+                Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst"
+
+let CodeSize = 2 in {
+  def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+               [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>;
+  def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+               [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>,
+               OpSize, Requires<[In32BitMode]>;
+  def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+               [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>,
+               Requires<[In32BitMode]>;
+  def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+                  [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+                   (implicit EFLAGS)]>;
+                   
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+                  [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+                  [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+                  [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+                  [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                Requires<[In64BitMode]>;
+} // CodeSize = 2
+
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "dec{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+               "dec{w}\t$dst",
+               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+             OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+               "dec{l}\t$dst",
+               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+             Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst"
+
+
+let CodeSize = 2 in {
+  def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+               [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>;
+  def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+               [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>,
+               OpSize, Requires<[In32BitMode]>;
+  def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+               [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>,
+               Requires<[In32BitMode]>;
+  def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+                  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+                   (implicit EFLAGS)]>;
+} // CodeSize = 2
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types.  For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+                  PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+                  Operand immoperand, SDPatternOperator immoperator,
+                  Operand imm8operand, SDPatternOperator imm8operator,
+                  bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+  /// VT - This is the value type itself.
+  ValueType VT = vt;
+  
+  /// InstrSuffix - This is the suffix used on instructions with this type.  For
+  /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+  string InstrSuffix = instrsuffix;
+  
+  /// RegClass - This is the register class associated with this type.  For
+  /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+  RegisterClass RegClass = regclass;
+  
+  /// LoadNode - This is the load node associated with this type.  For
+  /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+  PatFrag LoadNode = loadnode;
+  
+  /// MemOperand - This is the memory operand associated with this type.  For
+  /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+  X86MemOperand MemOperand = memoperand;
+  
+  /// ImmEncoding - This is the encoding of an immediate of this type.  For
+  /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32.  Note that i64 -> Imm32
+  /// since the immediate fields of i64 instructions is a 32-bit sign extended
+  /// value.
+  ImmType ImmEncoding = immkind;
+  
+  /// ImmOperand - This is the operand kind of an immediate of this type.  For
+  /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm.  Note that i64 ->
+  /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+  /// extended value.
+  Operand ImmOperand = immoperand;
+  
+  /// ImmOperator - This is the operator that should be used to match an
+  /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+  SDPatternOperator ImmOperator = immoperator;
+  
+  /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+  /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm.  This is
+  /// only used for instructions that have a sign-extended imm8 field form.
+  Operand Imm8Operand = imm8operand;
+  
+  /// Imm8Operator - This is the operator that should be used to match an 8-bit
+  /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+  SDPatternOperator Imm8Operator = imm8operator;
+  
+  /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+  /// opposed to even) opcode.  Operations on i8 are usually even, operations on
+  /// other datatypes are odd.
+  bit HasOddOpcode = hasOddOpcode;
+  
+  /// HasOpSizePrefix - This bit is set to true if the instruction should have
+  /// the 0x66 operand size prefix.  This is set for i16 types.
+  bit HasOpSizePrefix = hasOpSizePrefix;
+  
+  /// HasREX_WPrefix - This bit is set to true if the instruction should have
+  /// the 0x40 REX prefix.  This is set for i64 types.
+  bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8  : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+                       Imm8 , i8imm ,    imm,          i8imm   , invalid_node,
+                       0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+                       Imm16, i16imm,    imm,          i16i8imm, i16immSExt8,
+                       1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+                       Imm32, i32imm,    imm,          i32i8imm, i32immSExt8,
+                       1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+                       Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+                       1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+///    suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+///    or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, 
+          string mnemonic, string args, list<dag> pattern>
+  : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+       opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+      f, outs, ins, 
+      !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+  // Infer instruction prefixes from type info.
+  let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+  let hasREX_WPrefix  = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              dag outlist, list<dag> pattern, Format f = MRMDestReg>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f = MRMDestReg>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+            [(set EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+            f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+                          EFLAGS))]>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+  : ITy<opcode, MRMSrcReg, typeinfo,
+        (outs typeinfo.RegClass:$dst),
+        (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+        mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+  // The disassembler should know about this, but not the asmparser.
+  let isCodeGenOnly = 1;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              dag outlist, list<dag> pattern>
+  : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              SDPatternOperator opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+            [(set EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+                    EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              Format f, dag outlist, list<dag> pattern>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+  let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+            [(set EFLAGS,
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS, 
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS, 
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+                        EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+               Format f, dag outlist, list<dag> pattern>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+  let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+               
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+             [(set EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst, EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst, EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+                       EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              list<dag> pattern>
+  : ITy<opcode, MRMDestMem, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+          [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+           (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                    SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+          [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+                  addr:$dst),
+           (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+            [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+              Format f, list<dag> pattern, bits<8> opcode = 0x80>
+  : ITy<opcode, f, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+  let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(store (opnode (typeinfo.VT (load addr:$dst)),
+                            typeinfo.ImmOperator:$src), addr:$dst),
+             (implicit EFLAGS)]>;
+
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(store (opnode (typeinfo.VT (load addr:$dst)),
+                            typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+             (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+                                               typeinfo.ImmOperator:$src))],
+            opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+               Format f, list<dag> pattern>
+  : ITy<0x82, f, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+  let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(store (opnode (load addr:$dst),
+                             typeinfo.Imm8Operator:$src), addr:$dst),
+              (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(store (opnode (load addr:$dst),
+                             typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+              (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(set EFLAGS, (opnode (load addr:$dst),
+                                   typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              Register areg>
+  : ITy<opcode, RawFrm, typeinfo,
+        (outs), (ins typeinfo.ImmOperand:$src),
+        mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
+                               areg.AsmName, ", $src}"), []> {
+  let ImmT = typeinfo.ImmEncoding;
+  let Uses = [areg];
+  let Defs = [areg];
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                         string mnemonic, Format RegMRM, Format MemMRM,
+                         SDNode opnodeflag, SDNode opnode,
+                         bit CommutableRR, bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let Constraints = "$src1 = $dst" in {
+      let isCommutable = CommutableRR,
+          isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        def #NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+        def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+        def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+        def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+      } // isCommutable
+
+      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+      def #NAME#8rm   : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+      def #NAME#16rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+      def #NAME#32rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+      def #NAME#64rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+      let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        // NOTE: These are order specific, we want the ri8 forms to be listed
+        // first so that they are slightly preferred to the ri forms.
+        def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+        def #NAME#8ri   : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+        def #NAME#16ri  : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def #NAME#32ri  : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+      }
+    } // Constraints = "$src1 = $dst"
+
+    def #NAME#8mr    : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                          string mnemonic, Format RegMRM, Format MemMRM,
+                          SDNode opnode, bit CommutableRR,
+                           bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let Constraints = "$src1 = $dst" in {
+      let isCommutable = CommutableRR,
+          isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        def #NAME#8rr  : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+        def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+        def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+        def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+      } // isCommutable
+
+      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+      def #NAME#8rm   : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+      def #NAME#16rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+      def #NAME#32rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+      def #NAME#64rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+      let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        // NOTE: These are order specific, we want the ri8 forms to be listed
+        // first so that they are slightly preferred to the ri forms.
+        def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+        def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+        def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+        def #NAME#8ri   : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+        def #NAME#16ri  : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+        def #NAME#32ri  : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+        def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+      }
+    } // Constraints = "$src1 = $dst"
+
+    def #NAME#8mr    : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...".  It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                        string mnemonic, Format RegMRM, Format MemMRM,
+                        SDNode opnode,
+                        bit CommutableRR, bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let isCommutable = CommutableRR,
+        isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+      def #NAME#8rr  : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+      def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+      def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+      def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+    } // isCommutable
+
+    def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+    def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+    def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+    def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+    def #NAME#8rm   : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+    def #NAME#16rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+    def #NAME#32rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+    def #NAME#64rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+    let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+      // NOTE: These are order specific, we want the ri8 forms to be listed
+      // first so that they are slightly preferred to the ri forms.
+      def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+      def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+      def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+      
+      def #NAME#8ri   : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+      def #NAME#16ri  : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+      def #NAME#32ri  : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+      def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+    }
+
+    def #NAME#8mr    : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+                         X86and_flag, and, 1, 0>;
+defm OR  : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+                         X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+                         X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+                         X86add_flag, add, 1, 1>;
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+                         X86sub_flag, sub, 0, 0>;
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+  defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+                            1, 0>;
+  defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+                            0, 0>;
+}
+
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result.  From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+                         (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let Defs = [EFLAGS] in {
+  let isCommutable = 1 in {
+    def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+    def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+    def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+    def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+  } // isCommutable
+
+  def TEST8rm    : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+  def TEST16rm   : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+  def TEST32rm   : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+  def TEST64rm   : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+  def TEST8ri    : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+  def TEST16ri   : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+  def TEST32ri   : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+  def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+  def TEST8mi    : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+  def TEST16mi   : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+  def TEST32mi   : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+  def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+                     
+  def TEST8i8    : BinOpAI<0xA8, "test", Xi8 , AL>;
+  def TEST16i16  : BinOpAI<0xA8, "test", Xi16, AX>;
+  def TEST32i32  : BinOpAI<0xA8, "test", Xi32, EAX>;
+  def TEST64i32  : BinOpAI<0xA8, "test", Xi64, RAX>;
+}                          
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 000000000000..0245e5c09644
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,184 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call.  X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense.  Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly.  The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+  enum {
+    RegBase,
+    FrameIndexBase
+  } BaseType;
+
+  union {
+    unsigned Reg;
+    int FrameIndex;
+  } Base;
+
+  unsigned Scale;
+  unsigned IndexReg;
+  int Disp;
+  const GlobalValue *GV;
+  unsigned GVOpFlags;
+
+  X86AddressMode()
+    : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
+    Base.Reg = 0;
+  }
+  
+  
+  void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+    assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+    
+    if (BaseType == X86AddressMode::RegBase)
+      MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+                                             false, false, false, 0, false));
+    else {
+      assert(BaseType == X86AddressMode::FrameIndexBase);
+      MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+    }
+    
+    MO.push_back(MachineOperand::CreateImm(Scale));
+    MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+                                           false, false, false, 0, false));
+    
+    if (GV)
+      MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+    else
+      MO.push_back(MachineOperand::CreateImm(Disp));
+    
+    MO.push_back(MachineOperand::CreateReg(0, false, false,
+                                           false, false, false, 0, false));
+  }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+  // Because memory references are always represented with five
+  // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction.
+  return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+  return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+             unsigned Reg, bool isKill, int Offset) {
+  return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+                                            unsigned Reg1, bool isKill1,
+                                            unsigned Reg2, bool isKill2) {
+  return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
+    .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB,
+               const X86AddressMode &AM) {
+  assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+  
+  if (AM.BaseType == X86AddressMode::RegBase)
+    MIB.addReg(AM.Base.Reg);
+  else {
+    assert(AM.BaseType == X86AddressMode::FrameIndexBase);
+    MIB.addFrameIndex(AM.Base.FrameIndex);
+  }
+
+  MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+  if (AM.GV)
+    MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+  else
+    MIB.addImm(AM.Disp);
+    
+  return MIB.addReg(0);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+  MachineInstr *MI = MIB;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Flags = 0;
+  if (MCID.mayLoad())
+    Flags |= MachineMemOperand::MOLoad;
+  if (MCID.mayStore())
+    Flags |= MachineMemOperand::MOStore;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+                            Flags, MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+  return addOffset(MIB.addFrameIndex(FI), Offset)
+            .addMemOperand(MMO);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool.  The
+/// reference uses the abstract ConstantPoolIndex which is retained until
+/// either machine code emission or assembly output. In PIC mode on x86-32,
+/// the GlobalBaseReg parameter can be used to make this a
+/// GlobalBaseReg-relative reference.
+///
+static inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+                         unsigned GlobalBaseReg, unsigned char OpFlags) {
+  //FIXME: factor this
+  return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
+    .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 000000000000..3a43b22ddf3d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,104 @@
+//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      isCommutable = 1 in {
+    def #NAME#16rr
+      : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst,
+                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+    def #NAME#32rr
+      : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst,
+                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+    def #NAME#64rr
+      :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst,
+                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+  }
+
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+    def #NAME#16rm
+      : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB, OpSize;
+    def #NAME#32rm
+      : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+    def #NAME#64rm
+      :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+  } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO  : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB  : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE  : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA  : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS  : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP  : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL  : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG  : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+  let Uses = [EFLAGS] in {
+    def r    : I<opc, MRM0r,  (outs GR8:$dst), (ins),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+    def m    : I<opc, MRM0m,  (outs), (ins i8mem:$dst),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+  } // Uses = [EFLAGS]
+}
+
+defm SETO  : SETCC<0x90, "seto",  X86_COND_O>;   // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>;  // is overflow bit not set
+defm SETB  : SETCC<0x92, "setb",  X86_COND_B>;   // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>;  // unsigned greater or equal
+defm SETE  : SETCC<0x94, "sete",  X86_COND_E>;   // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>;  // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>;  // unsigned less than or equal
+defm SETA  : SETCC<0x97, "seta",  X86_COND_A>;   // unsigned greater than
+defm SETS  : SETCC<0x98, "sets",  X86_COND_S>;   // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>;  // is not signed
+defm SETP  : SETCC<0x9A, "setp",  X86_COND_P>;   // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>;  // is parity bit not set
+defm SETL  : SETCC<0x9C, "setl",  X86_COND_L>;   // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>;  // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>;  // signed less than or equal
+defm SETG  : SETCC<0x9F, "setg",  X86_COND_G>;   // signed greater than
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 000000000000..adcc747eb4b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1675 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+  // Transformation function: get the low 32 bits.
+  return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+  // Transformation function: get the low 8 bits.
+  return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction.  This expands to code that looks like this:
+//     call  $next_inst
+//     popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+                      "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+                           "#ADJCALLSTACKDOWN",
+                           [(X86callseq_start timm:$amt)]>,
+                          Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+                           "#ADJCALLSTACKUP",
+                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+                          Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+                           "#ADJCALLSTACKDOWN",
+                           [(X86callseq_start timm:$amt)]>,
+                          Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+                           "#ADJCALLSTACKUP",
+                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+                          Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+                              (outs),
+                              (ins GR8:$al,
+                                   i64imm:$regsavefi, i64imm:$offset,
+                                   variable_ops),
+                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+                              [(X86vastart_save_xmm_regs GR8:$al,
+                                                         imm:$regsavefi,
+                                                         imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+                 (outs GR64:$dst),
+                 (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+                 "#VAARG_64 $dst, $ap, $size, $mode, $align",
+                 [(set GR64:$dst,
+                    (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+                  (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets.  These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+  def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+                     "# dynamic stack allocation",
+                     [(X86WinAlloca)]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+                    "ret\t#eh_return, addr: $addr",
+                    [(X86ehret GR32:$addr)]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+                     "ret\t#eh_return, addr: $addr",
+                     [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+    isCodeGenOnly = 1 in {
+def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+                 [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+                 [(set GR32:$dst, 0)]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+    AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+                 [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+    isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
+                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
+                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+                OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
+                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
+                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type.  When
+// this happens, it is great.  However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+          (SETBr)>;
+
+// (add OP, SETB) -> (adc OP, 0)
+def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
+          (ADC8ri GR8:$op, 0)>;
+def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
+          (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
+          (ADC64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETB) -> (sbb OP, 0)
+def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+          (SBB8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+          (SBB32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+          (SBB64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETCC_CARRY) -> (adc OP, 0)
+def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
+          (ADC8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
+          (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
+          (ADC64ri8 GR64:$op, 0)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+                  [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+                  [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+                  [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+                   [(X86rep_movs i64)]>, REP;
+
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+                  [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+                  [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+                  [(X86rep_stos i32)]>, REP;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+                   [(X86rep_stos i64)]>, REP;
+
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                  "# TLS_addr32",
+                  [(X86tlsaddr tls32addr:$sym)]>,
+                  Requires<[In32BitMode]>;
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                   "# TLS_addr64",
+                  [(X86tlsaddr tls64addr:$sym)]>,
+                  Requires<[In64BitMode]>;
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax.  %ecx is trashed during the function
+// call.  All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+    Uses = [ESP],
+    usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                "# TLSCall_32",
+                [(X86TLSCall addr:$sym)]>,
+                Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax.  All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+    Uses = [RSP, RDI],
+    usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                  "# TLSCall_64",
+                  [(X86TLSCall addr:$sym)]>,
+                  Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+let Constraints = "$src1 = $dst" in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+                 "#CMOV_GR8 PSEUDO!",
+                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+                                          imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+                    (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+                    "#CMOV_GR32* PSEUDO!",
+                    [(set GR32:$dst,
+                      (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+                    (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+                    "#CMOV_GR16* PSEUDO!",
+                    [(set GR16:$dst,
+                      (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+                    (outs RFP32:$dst),
+                    (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+                    "#CMOV_RFP32 PSEUDO!",
+                    [(set RFP32:$dst,
+                      (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+                    (outs RFP64:$dst),
+                    (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+                    "#CMOV_RFP64 PSEUDO!",
+                    [(set RFP64:$dst,
+                      (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+                    (outs RFP80:$dst),
+                    (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+                    "#CMOV_RFP80 PSEUDO!",
+                    [(set RFP80:$dst,
+                      (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
+} // Uses = [EFLAGS]
+
+} // Constraints = "$src1 = $dst" in
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+                  usesCustomInserter = 1 in {
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMAND8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMOR8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMXOR8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMNAND8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMAND16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMOR16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMXOR16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMNAND16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+               "#ATOMMIN16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMMAX16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMUMIN16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMUMAX16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMAND32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMOR32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMXOR32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMNAND32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+               "#ATOMMIN32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMMAX32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMUMIN32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMUMAX32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+
+
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMAND64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMOR64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMXOR64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMNAND64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+               "#ATOMMIN64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMMAX64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMUMIN64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMUMAX64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+                  Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+                  Uses = [EAX, EBX, ECX, EDX],
+                  mayLoad = 1, mayStore = 1,
+                  usesCustomInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1 in
+def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+                      "lock\n\t"
+                      "or{l}\t{$zero, $dst|$dst, $zero}",
+                      []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+                     "#MEMBARRIER",
+                     [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
+def Int_MemBarrierNoSSE64  : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+                           "lock\n\t"
+                           "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+                           [(X86MemBarrierNoSSE GR64:$zero)]>,
+                           Requires<[In64BitMode]>, LOCK;
+
+
+// RegOpc corresponds to the mr version of the instruction
+// ImmOpc corresponds to the mi version of the instruction
+// ImmOpc8 corresponds to the mi8 version of the instruction
+// ImmMod corresponds to the instruction format of the mi and mi8 versions
+multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
+                           Format ImmMod, string mnemonic> {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+
+def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+                   MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+                   !strconcat("lock\n\t", mnemonic, "{b}\t",
+                              "{$src2, $dst|$dst, $src2}"),
+                   []>, LOCK;
+def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+                    RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+                    MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                    !strconcat("lock\n\t", mnemonic, "{w}\t",
+                               "{$src2, $dst|$dst, $src2}"),
+                    []>, OpSize, LOCK;
+def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+                    RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+                    MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                    !strconcat("lock\n\t", mnemonic, "{l}\t",
+                               "{$src2, $dst|$dst, $src2}"),
+                    []>, LOCK;
+def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+                     RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+                     MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                     !strconcat("lock\n\t", mnemonic, "{q}\t",
+                                "{$src2, $dst|$dst, $src2}"),
+                     []>, LOCK;
+
+def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                     ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+                     ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+                     !strconcat("lock\n\t", mnemonic, "{b}\t",
+                                "{$src2, $dst|$dst, $src2}"),
+                     []>, LOCK;
+
+def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                       ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+                       ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+                       !strconcat("lock\n\t", mnemonic, "{w}\t",
+                                  "{$src2, $dst|$dst, $src2}"),
+                       []>, LOCK;
+
+def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                       ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+                       ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+                       !strconcat("lock\n\t", mnemonic, "{l}\t",
+                                  "{$src2, $dst|$dst, $src2}"),
+                       []>, LOCK;
+
+def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                          ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+                          ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+                          !strconcat("lock\n\t", mnemonic, "{q}\t",
+                                     "{$src2, $dst|$dst, $src2}"),
+                          []>, LOCK;
+
+def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                       ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                       ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+                       !strconcat("lock\n\t", mnemonic, "{w}\t",
+                                  "{$src2, $dst|$dst, $src2}"),
+                       []>, LOCK;
+def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                       ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                       ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+                       !strconcat("lock\n\t", mnemonic, "{l}\t",
+                                  "{$src2, $dst|$dst, $src2}"),
+                       []>, LOCK;
+def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                        ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                        ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+                        !strconcat("lock\n\t", mnemonic, "{q}\t",
+                                   "{$src2, $dst|$dst, $src2}"),
+                        []>, LOCK;
+
+}
+
+}
+
+defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
+defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
+defm LOCK_OR  : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
+defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">;
+
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+
+def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "inc{l}\t$dst", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+                     "lock\n\t"
+                     "inc{q}\t$dst", []>, LOCK;
+
+def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "dec{l}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+                      "lock\n\t"
+                      "dec{q}\t$dst", []>, LOCK;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+    isCodeGenOnly = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+               "lock\n\t"
+               "cmpxchg8b\t$ptr",
+               [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+               "lock\n\t"
+               "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+               "lock\n\t"
+               "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+
+let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+               "lock\n\t"
+               "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+
+let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+               "lock\n\t"
+               "cmpxchgq\t$swap,$ptr",
+               [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+               "lock\n\t"
+               "xadd{b}\t{$val, $ptr|$ptr, $val}",
+               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+                TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
+               "lock\n\t"
+               "xadd{w}\t{$val, $ptr|$ptr, $val}",
+               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+                TB, OpSize, LOCK;
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
+               "lock\n\t"
+               "xadd{l}\t{$val, $ptr|$ptr, $val}",
+               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+                TB, LOCK;
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
+               "lock\n\t"
+               "xadd\t$val, $ptr",
+               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+                TB, LOCK;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+  def CMOV_FR32 : I<0, Pseudo,
+                    (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+                    "#CMOV_FR32 PSEUDO!",
+                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+                                                  EFLAGS))]>;
+  def CMOV_FR64 : I<0, Pseudo,
+                    (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+                    "#CMOV_FR64 PSEUDO!",
+                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+                                                  EFLAGS))]>;
+  def CMOV_V4F32 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V4F32 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+  def CMOV_V2F64 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V2F64 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+  def CMOV_V2I64 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V2I64 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool  :$dst)), (MOV32ri tconstpool  :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+          (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+          (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+          (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+          (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+          (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+          (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'.  FIXME: This is really a hack, the
+//  'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri64i32 tconstpool  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tconstpool:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tjumptable:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, texternalsym:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+          Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+          (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+          (MOV64rm tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+          (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+          (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+
+// tailcall stuff
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+          (TCRETURNri GR32_TC:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi addr:$dst, imm:$off)>,
+          Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+          (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi64 addr:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+          (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+          (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+          (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+          (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+          (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+                  Instruction Inst64> {
+  def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+            (Inst16 GR16:$src2, addr:$src1)>;
+  def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+            (Inst32 GR32:$src2, addr:$src1)>;
+  def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+            (Inst64 GR64:$src2, addr:$src1)>;
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1  addr:$src), (MOV8rm     addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src),  (MOVZX64rm8  addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src),  (MOVZX64rm8  addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+                         sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
+                                     (MOVZX32rr8 GR8 :$src), sub_16bit)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+  return N->getOpcode() != ISD::TRUNCATE &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+         N->getOpcode() != ISD::CopyFromReg &&
+         N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies.  However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read.  To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero0, KnownOne0;
+  CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+  APInt KnownZero1, KnownOne1;
+  CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+  return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+
+let isConvertibleToThreeAddress = 1,
+    Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+                    "", // orw/addw REG, REG
+                    [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "", // orl/addl REG, REG
+                    [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB  : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "", // orq/addq REG, REG
+                    [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+                    (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+                    "", // orw/addw REG, imm8
+                    [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                    "", // orw/addw REG, imm
+                    [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+                    (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+                    "", // orl/addl REG, imm8
+                    [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                    "", // orl/addl REG, imm
+                    [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+                    (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+                    "", // orq/addq REG, imm8
+                    [(set GR64:$dst, (or_is_add GR64:$src1,
+                                                i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+                     (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+                      "", // orq/addq REG, imm
+                      [(set GR64:$dst, (or_is_add GR64:$src1,
+                                                  i64immSExt32:$src2))]>;
+}
+} // AddedComplexity
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+          (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+          (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+          (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+          (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+          (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+          (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+          (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+          (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+          (SUBREG_TO_REG
+            (i64 0),
+            (AND32ri8
+              (EXTRACT_SUBREG GR64:$src, sub_32bit),
+              (i32 (GetLo8XForm imm:$imm))),
+            sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+          (SUBREG_TO_REG
+            (i64 0),
+            (AND32ri
+              (EXTRACT_SUBREG GR64:$src, sub_32bit),
+              (i32 (GetLo32XForm imm:$imm))),
+            sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+                                                             GR32_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+           (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
+            (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
+             sub_16bit)>,
+      Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+      Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+           (EXTRACT_SUBREG (MOVZX32rr8 (i8
+            (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
+      Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+          (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR16:$src, i8),
+           (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
+            (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
+             sub_16bit)>,
+      Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+      Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+           (EXTRACT_SUBREG (MOVSX32rr8
+            (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
+      Requires<[In64BitMode]>;
+
+// sext, sext_load, zext, zext_load
+def: Pat<(i16 (sext GR8:$src)),
+          (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(sextloadi16i8 addr:$src),
+          (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
+def: Pat<(i16 (zext GR8:$src)),
+          (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(zextloadi16i8 addr:$src),
+          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+          (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                          sub_8bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                          sub_8bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+          (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+      Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+      Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                          sub_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                          sub_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32rr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_16bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                                   GR32_ABCD)),
+                                             sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_16bit)>,
+      Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+                            sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+// Note that if x is undef (immediate or otherwise), we could theoretically
+// end up with the two uses of x getting different values, producing a result
+// where the least significant bit is not 0. However, the probability of this
+// happening is considered low enough that this is officially not a
+// "real problem".
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, 31)),
+          (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, 31)),
+          (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, 31)),
+          (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, 31)),
+          (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, 31)),
+          (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, 31)),
+          (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, 31)),
+          (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, 31)),
+          (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, 31)),
+          (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, 63)),
+          (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, 63)),
+          (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, 63)),
+          (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+          (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+          (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+          (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri  GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+          (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+          (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+          (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+          (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+          (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+          (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+          (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+          (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+          (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+          (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+          (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+          (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+          (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+          (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+          (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+          (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+          (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+          (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+          (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+          (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+          (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+          (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+          (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+          (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+          (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+          (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+          (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+          (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+          (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+          (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+          (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+          (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r     GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r    GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r    GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r    GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r     GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r    GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r    GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r    GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+          (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+          (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+          (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+          (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri  GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+          (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+          (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+          (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+          (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+          (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+          (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+          (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+          (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+          (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+          (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+          (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+          (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+          (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+          (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+          (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+          (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+          (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+          (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+          (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+          (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+          (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+          (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+          (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+          (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+          (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+          (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 000000000000..c228a0aed59c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,304 @@
+//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP in {
+  def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret",
+                    [(X86retflag 0)]>;
+  def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "ret\t$amt",
+                    [(X86retflag timm:$amt)]>;
+  def RETIW  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "retw\t$amt",
+                    []>, OpSize;
+  def LRETL  : I   <0xCB, RawFrm, (outs), (ins),
+                    "lretl", []>;
+  def LRETQ  : RI  <0xCB, RawFrm, (outs), (ins),
+                    "lretq", []>;
+  def LRETI  : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "lret\t$amt", []>;
+  def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "lretw\t$amt", []>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+  def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                        "jmp\t$dst", [(br bb:$dst)]>;
+  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jmp\t$dst", []>;
+  def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                       "jmp{q}\t$dst", []>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+    def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+  }
+}
+
+defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+  // These are the 32-bit versions of this instruction for the asmparser.  In
+  // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+  // jecxz.
+  let Uses = [CX] in
+    def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                        "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+  let Uses = [ECX] in
+    def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                           "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+
+  // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+  // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+  // is jrcxz.
+  let Uses = [ECX] in
+    def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                            "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+  let Uses = [RCX] in
+    def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                           "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+                     [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+                     [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+
+  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+
+  def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
+                          (ins i16imm:$off, i16imm:$seg),
+                          "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+  def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
+                          (ins i32imm:$off, i16imm:$seg),
+                          "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+                      "ljmp{q}\t{*}$dst", []>;
+
+  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+                     "ljmp{w}\t{*}$dst", []>, OpSize;
+  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+                     "ljmp{l}\t{*}$dst", []>;
+}
+
+
+// Loop instructions
+
+def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
+                           "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+                        "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+                         Requires<[In32BitMode]>;
+    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+                        Requires<[In32BitMode]>;
+
+    def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
+                             (ins i16imm:$off, i16imm:$seg),
+                             "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+    def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
+                             (ins i32imm:$off, i16imm:$seg),
+                             "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+
+    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+                        "lcall{w}\t{*}$dst", []>, OpSize;
+    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+                        "lcall{l}\t{*}$dst", []>;
+
+    // callw for 16 bit code for the assembler.
+    let isAsmParserOnly = 1 in
+      def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+                       (outs), (ins i16imm_pcrel:$dst, variable_ops),
+                       "callw\t$dst", []>, OpSize;
+  }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+  def TCRETURNdi : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+  def TCRETURNri : PseudoI<(outs),
+                     (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+  let mayLoad = 1 in
+  def TCRETURNmi : PseudoI<(outs),
+                     (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+  // FIXME: The should be pseudo instructions that are lowered when going to
+  // mcinst.
+  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, variable_ops),
+                 "jmp\t$dst  # TAILCALL",
+                 []>;
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+                   "", []>;  // FIXME: Remove encoding when JIT is dead.
+  let mayLoad = 1 in
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+                   "jmp{l}\t{*}$dst  # TAILCALL", []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+    // NOTE: this pattern doesn't match "X86call imm", because we do not know
+    // that the offset between an arbitrary immediate and the call will fit in
+    // the 32-bit pcrel field that we have.
+    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                          "call{q}\t$dst", []>,
+                        Requires<[In64BitMode, NotWin64]>;
+    def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                          "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+                        Requires<[In64BitMode, NotWin64]>;
+    def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+                          "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+                        Requires<[In64BitMode, NotWin64]>;
+
+    def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+                         "lcall{q}\t{*}$dst", []>;
+  }
+
+  // FIXME: We need to teach codegen about single list of call-clobbered
+  // registers.
+let isCall = 1, isCodeGenOnly = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+      Uses = [RSP] in {
+    def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                             (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                             "call{q}\t$dst", []>,
+                           Requires<[IsWin64]>;
+    def WINCALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                             "call{q}\t{*}$dst",
+                             [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+    def WINCALL64m       : I<0xFF, MRM2m, (outs),
+                              (ins i64mem:$dst,variable_ops),
+                             "call{q}\t{*}$dst",
+                             [(X86call (loadi64 addr:$dst))]>,
+                           Requires<[IsWin64]>;
+  }
+
+let isCall = 1, isCodeGenOnly = 1 in
+  // __chkstk(MSVC):     clobber R10, R11 and EFLAGS.
+  // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
+  let Defs = [RAX, R10, R11, RSP, EFLAGS],
+      Uses = [RSP] in {
+    def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
+                      (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                      "call{q}\t$dst", []>,
+                    Requires<[IsWin64]>;
+  }
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+      Uses = [RSP],
+      usesCustomInserter = 1 in {
+  def TCRETURNdi64 : PseudoI<(outs),
+                      (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+                      []>;
+  def TCRETURNri64 : PseudoI<(outs),
+                      (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+  let mayLoad = 1 in
+  def TCRETURNmi64 : PseudoI<(outs),
+                       (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+                                      (ins i64i32imm_pcrel:$dst, variable_ops),
+                   "jmp\t$dst  # TAILCALL", []>;
+  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+
+  let mayLoad = 1 in
+  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 000000000000..2e1d523ea16d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,151 @@
+//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+  let Defs = [AX], Uses = [AL] in
+  def CBW : I<0x98, RawFrm, (outs), (ins),
+              "{cbtw|cbw}", []>, OpSize;   // AX = signext(AL)
+  let Defs = [EAX], Uses = [AX] in
+  def CWDE : I<0x98, RawFrm, (outs), (ins),
+              "{cwtl|cwde}", []>;   // EAX = signext(AX)
+
+  let Defs = [AX,DX], Uses = [AX] in
+  def CWD : I<0x99, RawFrm, (outs), (ins),
+              "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+  let Defs = [EAX,EDX], Uses = [EAX] in
+  def CDQ : I<0x99, RawFrm, (outs), (ins),
+              "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+  let Defs = [RAX], Uses = [EAX] in
+  def CDQE : RI<0x98, RawFrm, (outs), (ins),
+               "{cltq|cdqe}", []>;     // RAX = signext(EAX)
+
+  let Defs = [RAX,RDX], Uses = [RAX] in
+  def CQO  : RI<0x99, RawFrm, (outs), (ins),
+                "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+// Sign/Zero extenders
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB;
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                   "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                   "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                   "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                   "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+                    "", [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 000000000000..d868773d2d69
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,60 @@
+//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+  def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+  def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+  def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+  def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+}
+
+multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                     string OpcodeStr, string PackTy> {
+  defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+  defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+  defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+}
+
+let isAsmParserOnly = 1 in {
+  // Fused Multiply-Add
+  defm VFMADDPS    : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+  defm VFMADDPD    : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+  defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+  defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+  defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+  defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+  defm VFMSUBPS    : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+  defm VFMSUBPD    : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+
+  // Fused Negative Multiply-Add
+  defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+  defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+  defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+  defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 000000000000..7cb870fabd62
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,648 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, 
+                                           SDTCisVT<1, f80>]>;
+def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>, 
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst       : SDTypeProfile<0, 3, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>, 
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild      : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
+                             [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+                              SDNPMemOperand]>;
+def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+                             [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+                              SDNPMemOperand]>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
+                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+                              SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomInserter = 1 in {  // Expanded after instruction selection.
+  def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
+                              [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
+                              [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
+                              [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+  def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
+                              [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
+                              [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
+                              [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+  def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
+                              [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
+                              [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
+                              [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+
+// All FP Stack operations are represented with four instructions here.  The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values.  These sizes apply to the values, 
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information.  These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler.  These use "RST" registers, although frequently
+// the actual register(s) used are implicit.  These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation 
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// Pseudo Instruction for FP stack return values.
+def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
+
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+                [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+                [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+                [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m  : FpIf32<(outs RFP32:$dst), 
+                     (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+                  [(set RFP32:$dst, 
+                    (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m  : FpIf64<(outs RFP64:$dst), 
+                     (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+                  [(set RFP64:$dst, 
+                    (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst), 
+                     (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+                  [(set RFP64:$dst, 
+                    (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst), 
+                   (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+                  [(set RFP80:$dst, 
+                    (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst), 
+                   (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+                  [(set RFP80:$dst, 
+                    (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src), 
+                 !strconcat("f", asmstring, "{s}\t$src")> { 
+  let mayLoad = 1; 
+}
+def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src), 
+                 !strconcat("f", asmstring, "{l}\t$src")> { 
+  let mayLoad = 1; 
+}
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP32:$dst, (OpNode RFP32:$src1,
+                                       (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP32:$dst, (OpNode RFP32:$src1,
+                                       (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP64:$dst, (OpNode RFP64:$src1,
+                                       (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP64:$dst, (OpNode RFP64:$src1,
+                                       (X86fild addr:$src2, i32)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP80:$dst, (OpNode RFP80:$src1,
+                                       (X86fild addr:$src2, i16)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP80:$dst, (OpNode RFP80:$src1,
+                                       (X86fild addr:$src2, i32)))]>;
+def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src), 
+                  !strconcat("fi", asmstring, "{s}\t$src")> { 
+  let mayLoad = 1; 
+}
+def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src), 
+                  !strconcat("fi", asmstring, "{l}\t$src")> { 
+  let mayLoad = 1; 
+}
+}
+
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+
+class FPST0rInst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r   : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0   : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0  : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r  : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0   : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0  : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r   : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0  : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r   : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0   : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0  : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r  : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0   : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0  : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r   : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0  : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
+
+def COM_FST0r   : FPST0rInst <0xD0, "fcom\t$op">;
+def COMP_FST0r  : FPST0rInst <0xD8, "fcomp\t$op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32  : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+                 [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64  : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+                 [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+                 [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F     : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
+}
+
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+let neverHasSideEffects = 1 in {
+def TST_Fp32  : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64  : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80  : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+}
+def TST_F  : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+
+// Versions of FP instructions that take a single memory operand.  Added for the
+//   disassembler; remove as they are included with patterns elsewhere.
+def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+
+def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm  : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+
+def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
+
+def FRSTORm  : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
+def FSAVEm   : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
+def FNSTSWm  : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
+
+def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
+def FBSTPm   : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+
+// Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
+multiclass FPCMov<PatLeaf cc> {
+  def _Fp32  : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+                       CondMovFP,
+                     [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp64  : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+                       CondMovFP,
+                     [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+                     CondMovFP,
+                     [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+                                        cc, EFLAGS))]>,
+                                        Requires<[HasCMov]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
+defm CMOVB  : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE  : FPCMov<X86_COND_E>;
+defm CMOVP  : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
+
+let Predicates = [HasCMov] in {
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F  : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVE_F  : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVP_F  : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+} // Predicates = [HasCMov]
+
+// Floating point loads & stores.
+let canFoldAsLoad = 1 in {
+def LD_Fp32m   : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1 in
+  def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m   : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (loadf80 addr:$src))]>;
+}
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+                  [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+                  [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m   : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+                  [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def ST_FpP32m    : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32  : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m    : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32  : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64  : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+def ST_FpP80m    : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+                    [(store RFP80:$src, addr:$op)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
+def IST_Fp16m32  : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32  : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32  : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64  : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64  : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64  : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80  : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+
+let mayLoad = 1 in {
+def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m   : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1 in {
+def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m  : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m  : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m  : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m  : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+
+let mayStore = 1 in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), 
+  "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+def LD_Frr   : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
+def ST_Frr   : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
+def ST_FPrr  : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
+def XCH_F    : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm1)]>;
+}
+
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9;
+
+
+// Floating point compares.
+let Defs = [EFLAGS] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                        []>;  // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                        []>;  // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                        []>;  // FPSW = cmp ST(0) with ST(i)
+                        
+// CC = ST(0) cmp ST(i)
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+}
+
+let Defs = [EFLAGS], Uses = [ST0] in {
+def UCOM_Fr    : FPI<0xE0, AddRegFrm,    // FPSW = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg),
+                    "fucom\t$reg">, DD;
+def UCOM_FPr   : FPI<0xE8, AddRegFrm,    // FPSW = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg),
+                    "fucomp\t$reg">, DD;
+def UCOM_FPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
+                    (outs), (ins),
+                    "fucompp">, DA;
+
+def UCOM_FIr   : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg),
+                    "fucomi\t$reg">, DB;
+def UCOM_FIPr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg),
+                    "fucompi\t$reg">, DF;
+}
+
+def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+                  "fcomi\t$reg">, DB;
+def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+                   "fcompi\t$reg">, DF;
+
+// Floating point flag ops.
+let Defs = [AX] in
+def FNSTSW8r  : I<0xE0, RawFrm,                  // AX = fp flags
+                  (outs), (ins), "fnstsw %ax", []>, DF;
+
+def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
+                  (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+                  [(X86fp_cwd_get16 addr:$dst)]>;
+                  
+let mayLoad = 1 in
+def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
+                  (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
+
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
+def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
+                "ffree\t$reg">, DD;
+
+// Clear exceptions
+
+def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
+
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+
+def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
+def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
+def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", []>, D9;
+def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", []>, D9;
+def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", []>, D9;
+def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", []>, D9;
+def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", []>, D9;
+def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", []>, D9;
+def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", []>, D9;
+def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", []>, D9;
+def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", []>, D9;
+def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", []>, D9;
+def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", []>, D9;
+def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", []>, D9;
+def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", []>, D9;
+def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", []>, D9;
+def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", []>, D9;
+def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", []>, D9;
+def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", []>, D9;
+def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", []>, D9;
+def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
+
+def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+               "fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+                 "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+                "fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+                  "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, 
+                                                          RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, 
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, 
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
+                                                         RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+           Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack.  We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+           Requires<[FPStackf64]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
new file mode 100644
index 000000000000..6d89bcc29e7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -0,0 +1,535 @@
+//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+  bits<6> Value = val;
+}
+
+def Pseudo     : Format<0>; def RawFrm     : Format<1>;
+def AddRegFrm  : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg  : Format<5>;
+def MRMSrcMem  : Format<6>;
+def MRM0r  : Format<16>; def MRM1r  : Format<17>; def MRM2r  : Format<18>;
+def MRM3r  : Format<19>; def MRM4r  : Format<20>; def MRM5r  : Format<21>;
+def MRM6r  : Format<22>; def MRM7r  : Format<23>;
+def MRM0m  : Format<24>; def MRM1m  : Format<25>; def MRM2m  : Format<26>;
+def MRM3m  : Format<27>; def MRM4m  : Format<28>; def MRM5m  : Format<29>;
+def MRM6m  : Format<30>; def MRM7m  : Format<31>;
+def MRMInitReg : Format<32>;
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+  bits<3> Value = val;
+}
+def NoImm      : ImmType<0>;
+def Imm8       : ImmType<1>;
+def Imm8PCRel  : ImmType<2>;
+def Imm16      : ImmType<3>;
+def Imm16PCRel : ImmType<4>;
+def Imm32      : ImmType<5>;
+def Imm32PCRel : ImmType<6>;
+def Imm64      : ImmType<7>;
+
+// FPFormat - This specifies what form this FP instruction has.  This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+  bits<3> Value = val;
+}
+def NotFP      : FPFormat<0>;
+def ZeroArgFP  : FPFormat<1>;
+def OneArgFP   : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP   : FPFormat<4>;
+def CompareFP  : FPFormat<5>;
+def CondMovFP  : FPFormat<6>;
+def SpecialFP  : FPFormat<7>;
+
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+  bits<2> Value = val;
+}
+def GenericDomain   : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt    : Domain<3>;
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W  { bit hasREX_WPrefix = 1; }
+class LOCK   { bit hasLockPrefix = 1; }
+class SegFS  { bits<2> SegOvrBits = 1; }
+class SegGS  { bits<2> SegOvrBits = 2; }
+class TB     { bits<5> Prefix = 1; }
+class REP    { bits<5> Prefix = 2; }
+class D8     { bits<5> Prefix = 3; }
+class D9     { bits<5> Prefix = 4; }
+class DA     { bits<5> Prefix = 5; }
+class DB     { bits<5> Prefix = 6; }
+class DC     { bits<5> Prefix = 7; }
+class DD     { bits<5> Prefix = 8; }
+class DE     { bits<5> Prefix = 9; }
+class DF     { bits<5> Prefix = 10; }
+class XD     { bits<5> Prefix = 11; }
+class XS     { bits<5> Prefix = 12; }
+class T8     { bits<5> Prefix = 13; }
+class TA     { bits<5> Prefix = 14; }
+class A6     { bits<5> Prefix = 15; }
+class A7     { bits<5> Prefix = 16; }
+class TF     { bits<5> Prefix = 17; }
+class VEX    { bit hasVEXPrefix = 1; }
+class VEX_W  { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L  { bit hasVEX_L = 1; }
+class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+              string AsmStr, Domain d = GenericDomain>
+  : Instruction {
+  let Namespace = "X86";
+
+  bits<8> Opcode = opcod;
+  Format Form = f;
+  bits<6> FormBits = Form.Value;
+  ImmType ImmT = i;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  string AsmString = AsmStr;
+
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+  //
+  // Attributes specific to X86 instructions...
+  //
+  bit hasOpSizePrefix = 0;  // Does this inst have a 0x66 prefix?
+  bit hasAdSizePrefix = 0;  // Does this inst have a 0x67 prefix?
+
+  bits<5> Prefix = 0;       // Which prefix byte does this inst have?
+  bit hasREX_WPrefix  = 0;  // Does this inst require the REX.W prefix?
+  FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
+  bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
+  bits<2> SegOvrBits = 0;   // Segment override prefix.
+  Domain ExeDomain = d;
+  bit hasVEXPrefix = 0;     // Does this inst require a VEX prefix?
+  bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
+  bit hasVEX_4VPrefix = 0;  // Does this inst require the VEX.VVVV field?
+  bit hasVEX_i8ImmReg = 0;  // Does this inst require the last source register
+                            // to be encoded in a immediate field?
+  bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
+  bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+
+  // TSFlags layout should be kept in sync with X86InstrInfo.h.
+  let TSFlags{5-0}   = FormBits;
+  let TSFlags{6}     = hasOpSizePrefix;
+  let TSFlags{7}     = hasAdSizePrefix;
+  let TSFlags{12-8}  = Prefix;
+  let TSFlags{13}    = hasREX_WPrefix;
+  let TSFlags{16-14} = ImmT.Value;
+  let TSFlags{19-17} = FPForm.Value;
+  let TSFlags{20}    = hasLockPrefix;
+  let TSFlags{22-21} = SegOvrBits;
+  let TSFlags{24-23} = ExeDomain.Value;
+  let TSFlags{32-25} = Opcode;
+  let TSFlags{33}    = hasVEXPrefix;
+  let TSFlags{34}    = hasVEX_WPrefix;
+  let TSFlags{35}    = hasVEX_4VPrefix;
+  let TSFlags{36}    = hasVEX_i8ImmReg;
+  let TSFlags{37}    = hasVEX_L;
+  let TSFlags{38}    = has3DNow0F0FOpcode;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+  let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+        list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+               list<dag> pattern>
+  : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+  : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+  let FPForm = fp;
+  let Pattern = pattern;
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+//  their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+//   Iseg16 - 16-bit segment selector, 16-bit offset
+//   Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+              list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+              list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern> {
+  let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+            !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern> {
+  let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+            !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+         Domain d>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+        !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
+        !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+}
+
+// SSE1 Instruction Templates:
+// 
+//   SSI   - SSE1 instructions with XS prefix.
+//   PSI   - SSE1 instructions with TB prefix.
+//   PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+//   VSSI  - SSE1 instructions with XS prefix in AVX form.
+//   VPSI  - SSE1 instructions with TB prefix in AVX form.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+        Requires<[HasSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+        Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+        Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
+        Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+// 
+//   SDI    - SSE2 instructions with XD prefix.
+//   SDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix.
+//   SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+//   PDI    - SSE2 instructions with TB and OpSize prefixes.
+//   PDIi8  - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+//   VSDI   - SSE2 instructions with XD prefix in AVX form.
+//   VPDI   - SSE2 instructions with TB and OpSize prefixes in AVX form.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+        Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
+        OpSize, Requires<[HasAVX]>;
+
+// SSE3 Instruction Templates:
+// 
+//   S3I   - SSE3 instructions with TB and OpSize prefixes.
+//   S3SI  - SSE3 instructions with XS prefix.
+//   S3DI  - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+        Requires<[HasSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+        Requires<[HasSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+// 
+//   SS38I - SSSE3 instructions with T8 prefix.
+//   SS3AI - SSSE3 instructions with TA prefix.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. We put those instructions here because they better
+// fit into the SSSE3 instruction category rather than the MMX category.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+// 
+//   SS48I - SSE 4.1 instructions with T8 prefix.
+//   SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSE41]>;
+
+// SSE4.2 Instruction Templates:
+// 
+//   SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSE42]>;
+
+//   SS42FI - SSE 4.2 instructions with TF prefix.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
+      
+//   SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSE42]>;
+
+// AVX Instruction Templates:
+//   Instructions introduced in AVX (no SSE equivalent forms)
+//
+//   AVX8I - AVX instructions with T8 and OpSize prefix.
+//   AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+        Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+        Requires<[HasAVX]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasAES]>;
+
+// CLMUL Instruction Templates
+class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, Requires<[HasCLMUL]>;
+
+class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+                  list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        OpSize, VEX_4V, Requires<[HasFMA3]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
+            list<dag> pattern>
+  : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI   - MMX instructions with TB prefix.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I  - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID  - MMX instructions with XD prefix.
+// MMXIS  - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, 
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, 
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
new file mode 100644
index 000000000000..b00109c9fa4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -0,0 +1,467 @@
+//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file provides pattern fragments useful for SIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+                                            SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+                                       SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
+def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
+def X86fand    : SDNode<"X86ISD::FAND",      SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86for     : SDNode<"X86ISD::FOR",       SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86fxor    : SDNode<"X86ISD::FXOR",      SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt  : SDNode<"X86ISD::FRSQRT",    SDTFPUnaryOp>;
+def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
+def X86fsrl    : SDNode<"X86ISD::FSRL",      SDTX86FPShiftOp>;
+def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
+def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
+def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
+def X86cmpss   : SDNode<"X86ISD::FSETCCss",    SDTX86Cmpss>;
+def X86cmpsd   : SDNode<"X86ISD::FSETCCsd",    SDTX86Cmpsd>;
+def X86pshufb  : SDNode<"X86ISD::PSHUFB",
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86andnp   : SDNode<"X86ISD::ANDNP",
+                 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignb  : SDNode<"X86ISD::PSIGNB", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignw  : SDNode<"X86ISD::PSIGNW", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignd  : SDNode<"X86ISD::PSIGND", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86pblendv : SDNode<"X86ISD::PBLENDVB", 
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
+def X86pextrb  : SDNode<"X86ISD::PEXTRB",
+                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw  : SDNode<"X86ISD::PEXTRW",
+                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb  : SDNode<"X86ISD::PINSRB",
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw  : SDNode<"X86ISD::PINSRW",
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+                                      SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+                                      SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
+                 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
+def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
+def X86cmpps   : SDNode<"X86ISD::CMPPS",     SDTX86VFCMP>;
+def X86cmppd   : SDNode<"X86ISD::CMPPD",     SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+                                          SDTCisVec<1>,
+                                          SDTCisSameAs<2, 1>]>;
+def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+                                 SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                 SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup  : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Punpcklbw  : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd  : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq  : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw  : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd  : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq  : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements.  These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
+
+def ssmem : Operand<v4f32> {
+  let PrintMethod = "printf32mem";
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+def sdmem : Operand<v2f64> {
+  let PrintMethod = "printf64mem";
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+// 128-bit load pattern fragments
+def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32    : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// 256-bit load pattern fragments
+def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv8i32    : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+
+// Like 'store', but always requires vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                           (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+                               (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+                               (f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+                               (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+                               (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+                               (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+                               (v2i64 (alignedload node:$ptr))>;
+
+// 256-bit aligned load pattern fragments
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+                               (v8f32 (alignedload node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+                               (v4f64 (alignedload node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+                               (v8i32 (alignedload node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+                               (v4i64 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others.  If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return    Subtarget->hasVectorUAMem()
+         || cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64   (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// 256-bit memop pattern fragments
+def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopmmx  : PatFrag<(ops node:$ptr), (x86mmx  (memop64 node:$ptr))>;
+
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+                           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal();
+  return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+			           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+           ST->getAddressingMode() == ISD::UNINDEXED &&
+           ST->getAlignment() >= 16;
+  return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+			           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal() &&
+           ST->getAlignment() < 16;
+  return false;
+}]>;
+
+// 128-bit bitconvert pattern fragments
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+// 256-bit bitconvert pattern fragments
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+                           (bitconvert (v2i64 (X86vzmovl
+                             (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+                           (bitconvert (v4i32 (X86vzmovl
+                             (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+                           (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm  : SDNodeXForm<imm, [{
+  // Transformation function: imm >> 3
+  return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
+// a PALIGNR imm.
+def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShufflePALIGNRImmediate(N));
+}]>;
+
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+  return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to 
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+  return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+                       (vector_shuffle node:$lhs, node:$rhs), [{
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                            (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
+                       (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
+                       (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                           (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                           (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+def palign : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_palign_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+                                   (extract_subvector node:$bigvec,
+                                                      node:$index), [{
+  return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+                                      node:$index),
+                                 (insert_subvector node:$bigvec, node:$smallvec,
+                                                   node:$index), [{
+  return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 000000000000..55b5835f52a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,3240 @@
+//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include <limits>
+
+#define GET_INSTRINFO_CTOR
+#include "X86GenInstrInfo.inc"
+
+using namespace llvm;
+
+static cl::opt<bool>
+NoFusing("disable-spill-fusing",
+         cl::desc("Disable fusing of spill code into instructions"));
+static cl::opt<bool>
+PrintFailedFusing("print-failed-fuse-candidates",
+                  cl::desc("Print instructions that the allocator wants to"
+                           " fuse, but the X86 backend currently can't"),
+                  cl::Hidden);
+static cl::opt<bool>
+ReMatPICStubLoad("remat-pic-stub-load",
+                 cl::desc("Re-materialize load from stub in PIC mode"),
+                 cl::init(false), cl::Hidden);
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+  : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+                     ? X86::ADJCALLSTACKDOWN64
+                     : X86::ADJCALLSTACKDOWN32),
+                    (tm.getSubtarget<X86Subtarget>().is64Bit()
+                     ? X86::ADJCALLSTACKUP64
+                     : X86::ADJCALLSTACKUP32)),
+    TM(tm), RI(tm, *this) {
+  enum {
+    TB_NOT_REVERSABLE = 1U << 31,
+    TB_FLAGS = TB_NOT_REVERSABLE
+  };
+
+  static const unsigned OpTbl2Addr[][2] = {
+    { X86::ADC32ri,     X86::ADC32mi },
+    { X86::ADC32ri8,    X86::ADC32mi8 },
+    { X86::ADC32rr,     X86::ADC32mr },
+    { X86::ADC64ri32,   X86::ADC64mi32 },
+    { X86::ADC64ri8,    X86::ADC64mi8 },
+    { X86::ADC64rr,     X86::ADC64mr },
+    { X86::ADD16ri,     X86::ADD16mi },
+    { X86::ADD16ri8,    X86::ADD16mi8 },
+    { X86::ADD16ri_DB,  X86::ADD16mi  | TB_NOT_REVERSABLE },
+    { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
+    { X86::ADD16rr,     X86::ADD16mr },
+    { X86::ADD16rr_DB,  X86::ADD16mr | TB_NOT_REVERSABLE },
+    { X86::ADD32ri,     X86::ADD32mi },
+    { X86::ADD32ri8,    X86::ADD32mi8 },
+    { X86::ADD32ri_DB,  X86::ADD32mi | TB_NOT_REVERSABLE },
+    { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
+    { X86::ADD32rr,     X86::ADD32mr },
+    { X86::ADD32rr_DB,  X86::ADD32mr | TB_NOT_REVERSABLE },
+    { X86::ADD64ri32,   X86::ADD64mi32 },
+    { X86::ADD64ri8,    X86::ADD64mi8 },
+    { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+    { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
+    { X86::ADD64rr,     X86::ADD64mr },
+    { X86::ADD64rr_DB,  X86::ADD64mr | TB_NOT_REVERSABLE },
+    { X86::ADD8ri,      X86::ADD8mi },
+    { X86::ADD8rr,      X86::ADD8mr },
+    { X86::AND16ri,     X86::AND16mi },
+    { X86::AND16ri8,    X86::AND16mi8 },
+    { X86::AND16rr,     X86::AND16mr },
+    { X86::AND32ri,     X86::AND32mi },
+    { X86::AND32ri8,    X86::AND32mi8 },
+    { X86::AND32rr,     X86::AND32mr },
+    { X86::AND64ri32,   X86::AND64mi32 },
+    { X86::AND64ri8,    X86::AND64mi8 },
+    { X86::AND64rr,     X86::AND64mr },
+    { X86::AND8ri,      X86::AND8mi },
+    { X86::AND8rr,      X86::AND8mr },
+    { X86::DEC16r,      X86::DEC16m },
+    { X86::DEC32r,      X86::DEC32m },
+    { X86::DEC64_16r,   X86::DEC64_16m },
+    { X86::DEC64_32r,   X86::DEC64_32m },
+    { X86::DEC64r,      X86::DEC64m },
+    { X86::DEC8r,       X86::DEC8m },
+    { X86::INC16r,      X86::INC16m },
+    { X86::INC32r,      X86::INC32m },
+    { X86::INC64_16r,   X86::INC64_16m },
+    { X86::INC64_32r,   X86::INC64_32m },
+    { X86::INC64r,      X86::INC64m },
+    { X86::INC8r,       X86::INC8m },
+    { X86::NEG16r,      X86::NEG16m },
+    { X86::NEG32r,      X86::NEG32m },
+    { X86::NEG64r,      X86::NEG64m },
+    { X86::NEG8r,       X86::NEG8m },
+    { X86::NOT16r,      X86::NOT16m },
+    { X86::NOT32r,      X86::NOT32m },
+    { X86::NOT64r,      X86::NOT64m },
+    { X86::NOT8r,       X86::NOT8m },
+    { X86::OR16ri,      X86::OR16mi },
+    { X86::OR16ri8,     X86::OR16mi8 },
+    { X86::OR16rr,      X86::OR16mr },
+    { X86::OR32ri,      X86::OR32mi },
+    { X86::OR32ri8,     X86::OR32mi8 },
+    { X86::OR32rr,      X86::OR32mr },
+    { X86::OR64ri32,    X86::OR64mi32 },
+    { X86::OR64ri8,     X86::OR64mi8 },
+    { X86::OR64rr,      X86::OR64mr },
+    { X86::OR8ri,       X86::OR8mi },
+    { X86::OR8rr,       X86::OR8mr },
+    { X86::ROL16r1,     X86::ROL16m1 },
+    { X86::ROL16rCL,    X86::ROL16mCL },
+    { X86::ROL16ri,     X86::ROL16mi },
+    { X86::ROL32r1,     X86::ROL32m1 },
+    { X86::ROL32rCL,    X86::ROL32mCL },
+    { X86::ROL32ri,     X86::ROL32mi },
+    { X86::ROL64r1,     X86::ROL64m1 },
+    { X86::ROL64rCL,    X86::ROL64mCL },
+    { X86::ROL64ri,     X86::ROL64mi },
+    { X86::ROL8r1,      X86::ROL8m1 },
+    { X86::ROL8rCL,     X86::ROL8mCL },
+    { X86::ROL8ri,      X86::ROL8mi },
+    { X86::ROR16r1,     X86::ROR16m1 },
+    { X86::ROR16rCL,    X86::ROR16mCL },
+    { X86::ROR16ri,     X86::ROR16mi },
+    { X86::ROR32r1,     X86::ROR32m1 },
+    { X86::ROR32rCL,    X86::ROR32mCL },
+    { X86::ROR32ri,     X86::ROR32mi },
+    { X86::ROR64r1,     X86::ROR64m1 },
+    { X86::ROR64rCL,    X86::ROR64mCL },
+    { X86::ROR64ri,     X86::ROR64mi },
+    { X86::ROR8r1,      X86::ROR8m1 },
+    { X86::ROR8rCL,     X86::ROR8mCL },
+    { X86::ROR8ri,      X86::ROR8mi },
+    { X86::SAR16r1,     X86::SAR16m1 },
+    { X86::SAR16rCL,    X86::SAR16mCL },
+    { X86::SAR16ri,     X86::SAR16mi },
+    { X86::SAR32r1,     X86::SAR32m1 },
+    { X86::SAR32rCL,    X86::SAR32mCL },
+    { X86::SAR32ri,     X86::SAR32mi },
+    { X86::SAR64r1,     X86::SAR64m1 },
+    { X86::SAR64rCL,    X86::SAR64mCL },
+    { X86::SAR64ri,     X86::SAR64mi },
+    { X86::SAR8r1,      X86::SAR8m1 },
+    { X86::SAR8rCL,     X86::SAR8mCL },
+    { X86::SAR8ri,      X86::SAR8mi },
+    { X86::SBB32ri,     X86::SBB32mi },
+    { X86::SBB32ri8,    X86::SBB32mi8 },
+    { X86::SBB32rr,     X86::SBB32mr },
+    { X86::SBB64ri32,   X86::SBB64mi32 },
+    { X86::SBB64ri8,    X86::SBB64mi8 },
+    { X86::SBB64rr,     X86::SBB64mr },
+    { X86::SHL16rCL,    X86::SHL16mCL },
+    { X86::SHL16ri,     X86::SHL16mi },
+    { X86::SHL32rCL,    X86::SHL32mCL },
+    { X86::SHL32ri,     X86::SHL32mi },
+    { X86::SHL64rCL,    X86::SHL64mCL },
+    { X86::SHL64ri,     X86::SHL64mi },
+    { X86::SHL8rCL,     X86::SHL8mCL },
+    { X86::SHL8ri,      X86::SHL8mi },
+    { X86::SHLD16rrCL,  X86::SHLD16mrCL },
+    { X86::SHLD16rri8,  X86::SHLD16mri8 },
+    { X86::SHLD32rrCL,  X86::SHLD32mrCL },
+    { X86::SHLD32rri8,  X86::SHLD32mri8 },
+    { X86::SHLD64rrCL,  X86::SHLD64mrCL },
+    { X86::SHLD64rri8,  X86::SHLD64mri8 },
+    { X86::SHR16r1,     X86::SHR16m1 },
+    { X86::SHR16rCL,    X86::SHR16mCL },
+    { X86::SHR16ri,     X86::SHR16mi },
+    { X86::SHR32r1,     X86::SHR32m1 },
+    { X86::SHR32rCL,    X86::SHR32mCL },
+    { X86::SHR32ri,     X86::SHR32mi },
+    { X86::SHR64r1,     X86::SHR64m1 },
+    { X86::SHR64rCL,    X86::SHR64mCL },
+    { X86::SHR64ri,     X86::SHR64mi },
+    { X86::SHR8r1,      X86::SHR8m1 },
+    { X86::SHR8rCL,     X86::SHR8mCL },
+    { X86::SHR8ri,      X86::SHR8mi },
+    { X86::SHRD16rrCL,  X86::SHRD16mrCL },
+    { X86::SHRD16rri8,  X86::SHRD16mri8 },
+    { X86::SHRD32rrCL,  X86::SHRD32mrCL },
+    { X86::SHRD32rri8,  X86::SHRD32mri8 },
+    { X86::SHRD64rrCL,  X86::SHRD64mrCL },
+    { X86::SHRD64rri8,  X86::SHRD64mri8 },
+    { X86::SUB16ri,     X86::SUB16mi },
+    { X86::SUB16ri8,    X86::SUB16mi8 },
+    { X86::SUB16rr,     X86::SUB16mr },
+    { X86::SUB32ri,     X86::SUB32mi },
+    { X86::SUB32ri8,    X86::SUB32mi8 },
+    { X86::SUB32rr,     X86::SUB32mr },
+    { X86::SUB64ri32,   X86::SUB64mi32 },
+    { X86::SUB64ri8,    X86::SUB64mi8 },
+    { X86::SUB64rr,     X86::SUB64mr },
+    { X86::SUB8ri,      X86::SUB8mi },
+    { X86::SUB8rr,      X86::SUB8mr },
+    { X86::XOR16ri,     X86::XOR16mi },
+    { X86::XOR16ri8,    X86::XOR16mi8 },
+    { X86::XOR16rr,     X86::XOR16mr },
+    { X86::XOR32ri,     X86::XOR32mi },
+    { X86::XOR32ri8,    X86::XOR32mi8 },
+    { X86::XOR32rr,     X86::XOR32mr },
+    { X86::XOR64ri32,   X86::XOR64mi32 },
+    { X86::XOR64ri8,    X86::XOR64mi8 },
+    { X86::XOR64rr,     X86::XOR64mr },
+    { X86::XOR8ri,      X86::XOR8mi },
+    { X86::XOR8rr,      X86::XOR8mr }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
+    unsigned RegOp = OpTbl2Addr[i][0];
+    unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+    assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 0, folded load and store, no alignment requirement.
+    unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
+
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+            "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+
+  // If the third value is 1, then it's folding either a load or a store.
+  static const unsigned OpTbl0[][4] = {
+    { X86::BT16ri8,     X86::BT16mi8, 1, 0 },
+    { X86::BT32ri8,     X86::BT32mi8, 1, 0 },
+    { X86::BT64ri8,     X86::BT64mi8, 1, 0 },
+    { X86::CALL32r,     X86::CALL32m, 1, 0 },
+    { X86::CALL64r,     X86::CALL64m, 1, 0 },
+    { X86::WINCALL64r,  X86::WINCALL64m, 1, 0 },
+    { X86::CMP16ri,     X86::CMP16mi, 1, 0 },
+    { X86::CMP16ri8,    X86::CMP16mi8, 1, 0 },
+    { X86::CMP16rr,     X86::CMP16mr, 1, 0 },
+    { X86::CMP32ri,     X86::CMP32mi, 1, 0 },
+    { X86::CMP32ri8,    X86::CMP32mi8, 1, 0 },
+    { X86::CMP32rr,     X86::CMP32mr, 1, 0 },
+    { X86::CMP64ri32,   X86::CMP64mi32, 1, 0 },
+    { X86::CMP64ri8,    X86::CMP64mi8, 1, 0 },
+    { X86::CMP64rr,     X86::CMP64mr, 1, 0 },
+    { X86::CMP8ri,      X86::CMP8mi, 1, 0 },
+    { X86::CMP8rr,      X86::CMP8mr, 1, 0 },
+    { X86::DIV16r,      X86::DIV16m, 1, 0 },
+    { X86::DIV32r,      X86::DIV32m, 1, 0 },
+    { X86::DIV64r,      X86::DIV64m, 1, 0 },
+    { X86::DIV8r,       X86::DIV8m, 1, 0 },
+    { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
+    { X86::FsMOVAPDrr,  X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+    { X86::FsMOVAPSrr,  X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
+    { X86::IDIV16r,     X86::IDIV16m, 1, 0 },
+    { X86::IDIV32r,     X86::IDIV32m, 1, 0 },
+    { X86::IDIV64r,     X86::IDIV64m, 1, 0 },
+    { X86::IDIV8r,      X86::IDIV8m, 1, 0 },
+    { X86::IMUL16r,     X86::IMUL16m, 1, 0 },
+    { X86::IMUL32r,     X86::IMUL32m, 1, 0 },
+    { X86::IMUL64r,     X86::IMUL64m, 1, 0 },
+    { X86::IMUL8r,      X86::IMUL8m, 1, 0 },
+    { X86::JMP32r,      X86::JMP32m, 1, 0 },
+    { X86::JMP64r,      X86::JMP64m, 1, 0 },
+    { X86::MOV16ri,     X86::MOV16mi, 0, 0 },
+    { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
+    { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
+    { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
+    { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
+    { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
+    { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
+    { X86::MOV8rr,      X86::MOV8mr, 0, 0 },
+    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
+    { X86::MOVAPDrr,    X86::MOVAPDmr, 0, 16 },
+    { X86::MOVAPSrr,    X86::MOVAPSmr, 0, 16 },
+    { X86::MOVDQArr,    X86::MOVDQAmr, 0, 16 },
+    { X86::VMOVAPDYrr,  X86::VMOVAPDYmr, 0, 32 },
+    { X86::VMOVAPSYrr,  X86::VMOVAPSYmr, 0, 32 },
+    { X86::VMOVDQAYrr,  X86::VMOVDQAYmr, 0, 32 },
+    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
+    { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
+    { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
+    { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0, 0 },
+    { X86::MOVUPDrr,    X86::MOVUPDmr, 0, 0 },
+    { X86::MOVUPSrr,    X86::MOVUPSmr, 0, 0 },
+    { X86::VMOVUPDYrr,  X86::VMOVUPDYmr, 0, 0 },
+    { X86::VMOVUPSYrr,  X86::VMOVUPSYmr, 0, 0 },
+    { X86::MUL16r,      X86::MUL16m, 1, 0 },
+    { X86::MUL32r,      X86::MUL32m, 1, 0 },
+    { X86::MUL64r,      X86::MUL64m, 1, 0 },
+    { X86::MUL8r,       X86::MUL8m, 1, 0 },
+    { X86::SETAEr,      X86::SETAEm, 0, 0 },
+    { X86::SETAr,       X86::SETAm, 0, 0 },
+    { X86::SETBEr,      X86::SETBEm, 0, 0 },
+    { X86::SETBr,       X86::SETBm, 0, 0 },
+    { X86::SETEr,       X86::SETEm, 0, 0 },
+    { X86::SETGEr,      X86::SETGEm, 0, 0 },
+    { X86::SETGr,       X86::SETGm, 0, 0 },
+    { X86::SETLEr,      X86::SETLEm, 0, 0 },
+    { X86::SETLr,       X86::SETLm, 0, 0 },
+    { X86::SETNEr,      X86::SETNEm, 0, 0 },
+    { X86::SETNOr,      X86::SETNOm, 0, 0 },
+    { X86::SETNPr,      X86::SETNPm, 0, 0 },
+    { X86::SETNSr,      X86::SETNSm, 0, 0 },
+    { X86::SETOr,       X86::SETOm, 0, 0 },
+    { X86::SETPr,       X86::SETPm, 0, 0 },
+    { X86::SETSr,       X86::SETSm, 0, 0 },
+    { X86::TAILJMPr,    X86::TAILJMPm, 1, 0 },
+    { X86::TAILJMPr64,  X86::TAILJMPm64, 1, 0 },
+    { X86::TEST16ri,    X86::TEST16mi, 1, 0 },
+    { X86::TEST32ri,    X86::TEST32mi, 1, 0 },
+    { X86::TEST64ri32,  X86::TEST64mi32, 1, 0 },
+    { X86::TEST8ri,     X86::TEST8mi, 1, 0 }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
+    unsigned RegOp      = OpTbl0[i][0];
+    unsigned MemOp      = OpTbl0[i][1] & ~TB_FLAGS;
+    unsigned FoldedLoad = OpTbl0[i][2];
+    unsigned Align      = OpTbl0[i][3];
+    assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 0, folded load or store.
+    unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+
+  static const unsigned OpTbl1[][3] = {
+    { X86::CMP16rr,         X86::CMP16rm, 0 },
+    { X86::CMP32rr,         X86::CMP32rm, 0 },
+    { X86::CMP64rr,         X86::CMP64rm, 0 },
+    { X86::CMP8rr,          X86::CMP8rm, 0 },
+    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm, 0 },
+    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm, 0 },
+    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm, 0 },
+    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm, 0 },
+    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm, 0 },
+    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm, 0 },
+    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm, 0 },
+    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm, 0 },
+    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm, 0 },
+    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm, 0 },
+    { X86::FsMOVAPDrr,      X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+    { X86::FsMOVAPSrr,      X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
+    { X86::IMUL16rri,       X86::IMUL16rmi, 0 },
+    { X86::IMUL16rri8,      X86::IMUL16rmi8, 0 },
+    { X86::IMUL32rri,       X86::IMUL32rmi, 0 },
+    { X86::IMUL32rri8,      X86::IMUL32rmi8, 0 },
+    { X86::IMUL64rri32,     X86::IMUL64rmi32, 0 },
+    { X86::IMUL64rri8,      X86::IMUL64rmi8, 0 },
+    { X86::Int_COMISDrr,    X86::Int_COMISDrm, 0 },
+    { X86::Int_COMISSrr,    X86::Int_COMISSrm, 0 },
+    { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm, 16 },
+    { X86::Int_CVTDQ2PSrr,  X86::Int_CVTDQ2PSrm, 16 },
+    { X86::Int_CVTPD2DQrr,  X86::Int_CVTPD2DQrm, 16 },
+    { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm, 16 },
+    { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm, 16 },
+    { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm, 0 },
+    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm, 0 },
+    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm, 0 },
+    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm, 0 },
+    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm, 0 },
+    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm, 0 },
+    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm, 0 },
+    { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
+    { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm, 0 },
+    { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm, 16 },
+    { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm, 16 },
+    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm, 0 },
+    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
+    { X86::MOV16rr,         X86::MOV16rm, 0 },
+    { X86::MOV32rr,         X86::MOV32rm, 0 },
+    { X86::MOV64rr,         X86::MOV64rm, 0 },
+    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
+    { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
+    { X86::MOV8rr,          X86::MOV8rm, 0 },
+    { X86::MOVAPDrr,        X86::MOVAPDrm, 16 },
+    { X86::MOVAPSrr,        X86::MOVAPSrm, 16 },
+    { X86::VMOVAPDYrr,      X86::VMOVAPDYrm, 32 },
+    { X86::VMOVAPSYrr,      X86::VMOVAPSYrm, 32 },
+    { X86::MOVDDUPrr,       X86::MOVDDUPrm, 0 },
+    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm, 0 },
+    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm, 0 },
+    { X86::MOVDQArr,        X86::MOVDQArm, 16 },
+    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm, 16 },
+    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm, 16 },
+    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm, 16 },
+    { X86::MOVSX16rr8,      X86::MOVSX16rm8, 0 },
+    { X86::MOVSX32rr16,     X86::MOVSX32rm16, 0 },
+    { X86::MOVSX32rr8,      X86::MOVSX32rm8, 0 },
+    { X86::MOVSX64rr16,     X86::MOVSX64rm16, 0 },
+    { X86::MOVSX64rr32,     X86::MOVSX64rm32, 0 },
+    { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
+    { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
+    { X86::MOVUPSrr,        X86::MOVUPSrm, 0 },
+    { X86::VMOVUPDYrr,      X86::VMOVUPDYrm, 0 },
+    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm, 0 },
+    { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
+    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
+    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
+    { X86::MOVZX16rr8,      X86::MOVZX16rm8, 0 },
+    { X86::MOVZX32rr16,     X86::MOVZX32rm16, 0 },
+    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+    { X86::MOVZX32rr8,      X86::MOVZX32rm8, 0 },
+    { X86::MOVZX64rr16,     X86::MOVZX64rm16, 0 },
+    { X86::MOVZX64rr32,     X86::MOVZX64rm32, 0 },
+    { X86::MOVZX64rr8,      X86::MOVZX64rm8, 0 },
+    { X86::PSHUFDri,        X86::PSHUFDmi, 16 },
+    { X86::PSHUFHWri,       X86::PSHUFHWmi, 16 },
+    { X86::PSHUFLWri,       X86::PSHUFLWmi, 16 },
+    { X86::RCPPSr,          X86::RCPPSm, 16 },
+    { X86::RCPPSr_Int,      X86::RCPPSm_Int, 16 },
+    { X86::RSQRTPSr,        X86::RSQRTPSm, 16 },
+    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int, 16 },
+    { X86::RSQRTSSr,        X86::RSQRTSSm, 0 },
+    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int, 0 },
+    { X86::SQRTPDr,         X86::SQRTPDm, 16 },
+    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int, 16 },
+    { X86::SQRTPSr,         X86::SQRTPSm, 16 },
+    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int, 16 },
+    { X86::SQRTSDr,         X86::SQRTSDm, 0 },
+    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int, 0 },
+    { X86::SQRTSSr,         X86::SQRTSSm, 0 },
+    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int, 0 },
+    { X86::TEST16rr,        X86::TEST16rm, 0 },
+    { X86::TEST32rr,        X86::TEST32rm, 0 },
+    { X86::TEST64rr,        X86::TEST64rm, 0 },
+    { X86::TEST8rr,         X86::TEST8rm, 0 },
+    // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+    { X86::UCOMISDrr,       X86::UCOMISDrm, 0 },
+    { X86::UCOMISSrr,       X86::UCOMISSrm, 0 }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
+    unsigned RegOp = OpTbl1[i][0];
+    unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
+    unsigned Align = OpTbl1[i][2];
+    assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+    RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 1, folded load
+    unsigned AuxInfo = 1 | (1 << 4);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+
+  static const unsigned OpTbl2[][3] = {
+    { X86::ADC32rr,         X86::ADC32rm, 0 },
+    { X86::ADC64rr,         X86::ADC64rm, 0 },
+    { X86::ADD16rr,         X86::ADD16rm, 0 },
+    { X86::ADD16rr_DB,      X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
+    { X86::ADD32rr,         X86::ADD32rm, 0 },
+    { X86::ADD32rr_DB,      X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
+    { X86::ADD64rr,         X86::ADD64rm, 0 },
+    { X86::ADD64rr_DB,      X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
+    { X86::ADD8rr,          X86::ADD8rm, 0 },
+    { X86::ADDPDrr,         X86::ADDPDrm, 16 },
+    { X86::ADDPSrr,         X86::ADDPSrm, 16 },
+    { X86::ADDSDrr,         X86::ADDSDrm, 0 },
+    { X86::ADDSSrr,         X86::ADDSSrm, 0 },
+    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm, 16 },
+    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm, 16 },
+    { X86::AND16rr,         X86::AND16rm, 0 },
+    { X86::AND32rr,         X86::AND32rm, 0 },
+    { X86::AND64rr,         X86::AND64rm, 0 },
+    { X86::AND8rr,          X86::AND8rm, 0 },
+    { X86::ANDNPDrr,        X86::ANDNPDrm, 16 },
+    { X86::ANDNPSrr,        X86::ANDNPSrm, 16 },
+    { X86::ANDPDrr,         X86::ANDPDrm, 16 },
+    { X86::ANDPSrr,         X86::ANDPSrm, 16 },
+    { X86::CMOVA16rr,       X86::CMOVA16rm, 0 },
+    { X86::CMOVA32rr,       X86::CMOVA32rm, 0 },
+    { X86::CMOVA64rr,       X86::CMOVA64rm, 0 },
+    { X86::CMOVAE16rr,      X86::CMOVAE16rm, 0 },
+    { X86::CMOVAE32rr,      X86::CMOVAE32rm, 0 },
+    { X86::CMOVAE64rr,      X86::CMOVAE64rm, 0 },
+    { X86::CMOVB16rr,       X86::CMOVB16rm, 0 },
+    { X86::CMOVB32rr,       X86::CMOVB32rm, 0 },
+    { X86::CMOVB64rr,       X86::CMOVB64rm, 0 },
+    { X86::CMOVBE16rr,      X86::CMOVBE16rm, 0 },
+    { X86::CMOVBE32rr,      X86::CMOVBE32rm, 0 },
+    { X86::CMOVBE64rr,      X86::CMOVBE64rm, 0 },
+    { X86::CMOVE16rr,       X86::CMOVE16rm, 0 },
+    { X86::CMOVE32rr,       X86::CMOVE32rm, 0 },
+    { X86::CMOVE64rr,       X86::CMOVE64rm, 0 },
+    { X86::CMOVG16rr,       X86::CMOVG16rm, 0 },
+    { X86::CMOVG32rr,       X86::CMOVG32rm, 0 },
+    { X86::CMOVG64rr,       X86::CMOVG64rm, 0 },
+    { X86::CMOVGE16rr,      X86::CMOVGE16rm, 0 },
+    { X86::CMOVGE32rr,      X86::CMOVGE32rm, 0 },
+    { X86::CMOVGE64rr,      X86::CMOVGE64rm, 0 },
+    { X86::CMOVL16rr,       X86::CMOVL16rm, 0 },
+    { X86::CMOVL32rr,       X86::CMOVL32rm, 0 },
+    { X86::CMOVL64rr,       X86::CMOVL64rm, 0 },
+    { X86::CMOVLE16rr,      X86::CMOVLE16rm, 0 },
+    { X86::CMOVLE32rr,      X86::CMOVLE32rm, 0 },
+    { X86::CMOVLE64rr,      X86::CMOVLE64rm, 0 },
+    { X86::CMOVNE16rr,      X86::CMOVNE16rm, 0 },
+    { X86::CMOVNE32rr,      X86::CMOVNE32rm, 0 },
+    { X86::CMOVNE64rr,      X86::CMOVNE64rm, 0 },
+    { X86::CMOVNO16rr,      X86::CMOVNO16rm, 0 },
+    { X86::CMOVNO32rr,      X86::CMOVNO32rm, 0 },
+    { X86::CMOVNO64rr,      X86::CMOVNO64rm, 0 },
+    { X86::CMOVNP16rr,      X86::CMOVNP16rm, 0 },
+    { X86::CMOVNP32rr,      X86::CMOVNP32rm, 0 },
+    { X86::CMOVNP64rr,      X86::CMOVNP64rm, 0 },
+    { X86::CMOVNS16rr,      X86::CMOVNS16rm, 0 },
+    { X86::CMOVNS32rr,      X86::CMOVNS32rm, 0 },
+    { X86::CMOVNS64rr,      X86::CMOVNS64rm, 0 },
+    { X86::CMOVO16rr,       X86::CMOVO16rm, 0 },
+    { X86::CMOVO32rr,       X86::CMOVO32rm, 0 },
+    { X86::CMOVO64rr,       X86::CMOVO64rm, 0 },
+    { X86::CMOVP16rr,       X86::CMOVP16rm, 0 },
+    { X86::CMOVP32rr,       X86::CMOVP32rm, 0 },
+    { X86::CMOVP64rr,       X86::CMOVP64rm, 0 },
+    { X86::CMOVS16rr,       X86::CMOVS16rm, 0 },
+    { X86::CMOVS32rr,       X86::CMOVS32rm, 0 },
+    { X86::CMOVS64rr,       X86::CMOVS64rm, 0 },
+    { X86::CMPPDrri,        X86::CMPPDrmi, 16 },
+    { X86::CMPPSrri,        X86::CMPPSrmi, 16 },
+    { X86::CMPSDrr,         X86::CMPSDrm, 0 },
+    { X86::CMPSSrr,         X86::CMPSSrm, 0 },
+    { X86::DIVPDrr,         X86::DIVPDrm, 16 },
+    { X86::DIVPSrr,         X86::DIVPSrm, 16 },
+    { X86::DIVSDrr,         X86::DIVSDrm, 0 },
+    { X86::DIVSSrr,         X86::DIVSSrm, 0 },
+    { X86::FsANDNPDrr,      X86::FsANDNPDrm, 16 },
+    { X86::FsANDNPSrr,      X86::FsANDNPSrm, 16 },
+    { X86::FsANDPDrr,       X86::FsANDPDrm, 16 },
+    { X86::FsANDPSrr,       X86::FsANDPSrm, 16 },
+    { X86::FsORPDrr,        X86::FsORPDrm, 16 },
+    { X86::FsORPSrr,        X86::FsORPSrm, 16 },
+    { X86::FsXORPDrr,       X86::FsXORPDrm, 16 },
+    { X86::FsXORPSrr,       X86::FsXORPSrm, 16 },
+    { X86::HADDPDrr,        X86::HADDPDrm, 16 },
+    { X86::HADDPSrr,        X86::HADDPSrm, 16 },
+    { X86::HSUBPDrr,        X86::HSUBPDrm, 16 },
+    { X86::HSUBPSrr,        X86::HSUBPSrm, 16 },
+    { X86::IMUL16rr,        X86::IMUL16rm, 0 },
+    { X86::IMUL32rr,        X86::IMUL32rm, 0 },
+    { X86::IMUL64rr,        X86::IMUL64rm, 0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
+    { X86::MAXPDrr,         X86::MAXPDrm, 16 },
+    { X86::MAXPDrr_Int,     X86::MAXPDrm_Int, 16 },
+    { X86::MAXPSrr,         X86::MAXPSrm, 16 },
+    { X86::MAXPSrr_Int,     X86::MAXPSrm_Int, 16 },
+    { X86::MAXSDrr,         X86::MAXSDrm, 0 },
+    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int, 0 },
+    { X86::MAXSSrr,         X86::MAXSSrm, 0 },
+    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int, 0 },
+    { X86::MINPDrr,         X86::MINPDrm, 16 },
+    { X86::MINPDrr_Int,     X86::MINPDrm_Int, 16 },
+    { X86::MINPSrr,         X86::MINPSrm, 16 },
+    { X86::MINPSrr_Int,     X86::MINPSrm_Int, 16 },
+    { X86::MINSDrr,         X86::MINSDrm, 0 },
+    { X86::MINSDrr_Int,     X86::MINSDrm_Int, 0 },
+    { X86::MINSSrr,         X86::MINSSrm, 0 },
+    { X86::MINSSrr_Int,     X86::MINSSrm_Int, 0 },
+    { X86::MULPDrr,         X86::MULPDrm, 16 },
+    { X86::MULPSrr,         X86::MULPSrm, 16 },
+    { X86::MULSDrr,         X86::MULSDrm, 0 },
+    { X86::MULSSrr,         X86::MULSSrm, 0 },
+    { X86::OR16rr,          X86::OR16rm, 0 },
+    { X86::OR32rr,          X86::OR32rm, 0 },
+    { X86::OR64rr,          X86::OR64rm, 0 },
+    { X86::OR8rr,           X86::OR8rm, 0 },
+    { X86::ORPDrr,          X86::ORPDrm, 16 },
+    { X86::ORPSrr,          X86::ORPSrm, 16 },
+    { X86::PACKSSDWrr,      X86::PACKSSDWrm, 16 },
+    { X86::PACKSSWBrr,      X86::PACKSSWBrm, 16 },
+    { X86::PACKUSWBrr,      X86::PACKUSWBrm, 16 },
+    { X86::PADDBrr,         X86::PADDBrm, 16 },
+    { X86::PADDDrr,         X86::PADDDrm, 16 },
+    { X86::PADDQrr,         X86::PADDQrm, 16 },
+    { X86::PADDSBrr,        X86::PADDSBrm, 16 },
+    { X86::PADDSWrr,        X86::PADDSWrm, 16 },
+    { X86::PADDWrr,         X86::PADDWrm, 16 },
+    { X86::PANDNrr,         X86::PANDNrm, 16 },
+    { X86::PANDrr,          X86::PANDrm, 16 },
+    { X86::PAVGBrr,         X86::PAVGBrm, 16 },
+    { X86::PAVGWrr,         X86::PAVGWrm, 16 },
+    { X86::PCMPEQBrr,       X86::PCMPEQBrm, 16 },
+    { X86::PCMPEQDrr,       X86::PCMPEQDrm, 16 },
+    { X86::PCMPEQWrr,       X86::PCMPEQWrm, 16 },
+    { X86::PCMPGTBrr,       X86::PCMPGTBrm, 16 },
+    { X86::PCMPGTDrr,       X86::PCMPGTDrm, 16 },
+    { X86::PCMPGTWrr,       X86::PCMPGTWrm, 16 },
+    { X86::PINSRWrri,       X86::PINSRWrmi, 16 },
+    { X86::PMADDWDrr,       X86::PMADDWDrm, 16 },
+    { X86::PMAXSWrr,        X86::PMAXSWrm, 16 },
+    { X86::PMAXUBrr,        X86::PMAXUBrm, 16 },
+    { X86::PMINSWrr,        X86::PMINSWrm, 16 },
+    { X86::PMINUBrr,        X86::PMINUBrm, 16 },
+    { X86::PMULDQrr,        X86::PMULDQrm, 16 },
+    { X86::PMULHUWrr,       X86::PMULHUWrm, 16 },
+    { X86::PMULHWrr,        X86::PMULHWrm, 16 },
+    { X86::PMULLDrr,        X86::PMULLDrm, 16 },
+    { X86::PMULLWrr,        X86::PMULLWrm, 16 },
+    { X86::PMULUDQrr,       X86::PMULUDQrm, 16 },
+    { X86::PORrr,           X86::PORrm, 16 },
+    { X86::PSADBWrr,        X86::PSADBWrm, 16 },
+    { X86::PSLLDrr,         X86::PSLLDrm, 16 },
+    { X86::PSLLQrr,         X86::PSLLQrm, 16 },
+    { X86::PSLLWrr,         X86::PSLLWrm, 16 },
+    { X86::PSRADrr,         X86::PSRADrm, 16 },
+    { X86::PSRAWrr,         X86::PSRAWrm, 16 },
+    { X86::PSRLDrr,         X86::PSRLDrm, 16 },
+    { X86::PSRLQrr,         X86::PSRLQrm, 16 },
+    { X86::PSRLWrr,         X86::PSRLWrm, 16 },
+    { X86::PSUBBrr,         X86::PSUBBrm, 16 },
+    { X86::PSUBDrr,         X86::PSUBDrm, 16 },
+    { X86::PSUBSBrr,        X86::PSUBSBrm, 16 },
+    { X86::PSUBSWrr,        X86::PSUBSWrm, 16 },
+    { X86::PSUBWrr,         X86::PSUBWrm, 16 },
+    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm, 16 },
+    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm, 16 },
+    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm, 16 },
+    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm, 16 },
+    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm, 16 },
+    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm, 16 },
+    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm, 16 },
+    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm, 16 },
+    { X86::PXORrr,          X86::PXORrm, 16 },
+    { X86::SBB32rr,         X86::SBB32rm, 0 },
+    { X86::SBB64rr,         X86::SBB64rm, 0 },
+    { X86::SHUFPDrri,       X86::SHUFPDrmi, 16 },
+    { X86::SHUFPSrri,       X86::SHUFPSrmi, 16 },
+    { X86::SUB16rr,         X86::SUB16rm, 0 },
+    { X86::SUB32rr,         X86::SUB32rm, 0 },
+    { X86::SUB64rr,         X86::SUB64rm, 0 },
+    { X86::SUB8rr,          X86::SUB8rm, 0 },
+    { X86::SUBPDrr,         X86::SUBPDrm, 16 },
+    { X86::SUBPSrr,         X86::SUBPSrm, 16 },
+    { X86::SUBSDrr,         X86::SUBSDrm, 0 },
+    { X86::SUBSSrr,         X86::SUBSSrm, 0 },
+    // FIXME: TEST*rr -> swapped operand of TEST*mr.
+    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm, 16 },
+    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm, 16 },
+    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm, 16 },
+    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm, 16 },
+    { X86::XOR16rr,         X86::XOR16rm, 0 },
+    { X86::XOR32rr,         X86::XOR32rm, 0 },
+    { X86::XOR64rr,         X86::XOR64rm, 0 },
+    { X86::XOR8rr,          X86::XOR8rm, 0 },
+    { X86::XORPDrr,         X86::XORPDrm, 16 },
+    { X86::XORPSrr,         X86::XORPSrm, 16 }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
+    unsigned RegOp = OpTbl2[i][0];
+    unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
+    unsigned Align = OpTbl2[i][2];
+
+    assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+    RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 2, folded load
+    unsigned AuxInfo = 2 | (1 << 4);
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+           "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+}
+
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                    unsigned &SrcReg, unsigned &DstReg,
+                                    unsigned &SubIdx) const {
+  switch (MI.getOpcode()) {
+  default: break;
+  case X86::MOVSX16rr8:
+  case X86::MOVZX16rr8:
+  case X86::MOVSX32rr8:
+  case X86::MOVZX32rr8:
+  case X86::MOVSX64rr8:
+  case X86::MOVZX64rr8:
+    if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+      // It's not always legal to reference the low 8-bit of the larger
+      // register in 32-bit mode.
+      return false;
+  case X86::MOVSX32rr16:
+  case X86::MOVZX32rr16:
+  case X86::MOVSX64rr16:
+  case X86::MOVZX64rr16:
+  case X86::MOVSX64rr32:
+  case X86::MOVZX64rr32: {
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    switch (MI.getOpcode()) {
+    default:
+      llvm_unreachable(0);
+      break;
+    case X86::MOVSX16rr8:
+    case X86::MOVZX16rr8:
+    case X86::MOVSX32rr8:
+    case X86::MOVZX32rr8:
+    case X86::MOVSX64rr8:
+    case X86::MOVZX64rr8:
+      SubIdx = X86::sub_8bit;
+      break;
+    case X86::MOVSX32rr16:
+    case X86::MOVZX32rr16:
+    case X86::MOVSX64rr16:
+    case X86::MOVZX64rr16:
+      SubIdx = X86::sub_16bit;
+      break;
+    case X86::MOVSX64rr32:
+    case X86::MOVZX64rr32:
+      SubIdx = X86::sub_32bit;
+      break;
+    }
+    return true;
+  }
+  }
+  return false;
+}
+
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+                                  int &FrameIndex) const {
+  if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+      MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+      MI->getOperand(Op+1).getImm() == 1 &&
+      MI->getOperand(Op+2).getReg() == 0 &&
+      MI->getOperand(Op+3).getImm() == 0) {
+    FrameIndex = MI->getOperand(Op).getIndex();
+    return true;
+  }
+  return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+  switch (Opcode) {
+  default: break;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp64m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+    return true;
+    break;
+  }
+  return false;
+}
+
+static bool isFrameStoreOpcode(int Opcode) {
+  switch (Opcode) {
+  default: break;
+  case X86::MOV8mr:
+  case X86::MOV16mr:
+  case X86::MOV32mr:
+  case X86::MOV64mr:
+  case X86::ST_FpP64m:
+  case X86::MOVSSmr:
+  case X86::MOVSDmr:
+  case X86::MOVAPSmr:
+  case X86::MOVAPDmr:
+  case X86::MOVDQAmr:
+  case X86::VMOVAPSYmr:
+  case X86::VMOVAPDYmr:
+  case X86::VMOVDQAYmr:
+  case X86::MMX_MOVD64mr:
+  case X86::MMX_MOVQ64mr:
+  case X86::MMX_MOVNTQmr:
+    return true;
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                           int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode()))
+    if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
+      return MI->getOperand(0).getReg();
+  return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                                 int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    const MachineMemOperand *Dummy;
+    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+  }
+  return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                        const MachineMemOperand *&MMO,
+                                        int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isLoad() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                          int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode()))
+    if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+        isFrameOperand(MI, 0, FrameIndex))
+      return MI->getOperand(X86::AddrNumOperands).getReg();
+  return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                                int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    const MachineMemOperand *Dummy;
+    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+  }
+  return 0;
+}
+
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                       const MachineMemOperand *&MMO,
+                                       int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isStore() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
+/// X86::MOVPC32r.
+static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+  bool isPICBase = false;
+  for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+         E = MRI.def_end(); I != E; ++I) {
+    MachineInstr *DefMI = I.getOperand().getParent();
+    if (DefMI->getOpcode() != X86::MOVPC32r)
+      return false;
+    assert(!isPICBase && "More than one PIC base?");
+    isPICBase = true;
+  }
+  return isPICBase;
+}
+
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                AliasAnalysis *AA) const {
+  switch (MI->getOpcode()) {
+  default: break;
+    case X86::MOV8rm:
+    case X86::MOV16rm:
+    case X86::MOV32rm:
+    case X86::MOV64rm:
+    case X86::LD_Fp64m:
+    case X86::MOVSSrm:
+    case X86::MOVSDrm:
+    case X86::MOVAPSrm:
+    case X86::MOVUPSrm:
+    case X86::MOVAPDrm:
+    case X86::MOVDQArm:
+    case X86::VMOVAPSYrm:
+    case X86::VMOVUPSYrm:
+    case X86::VMOVAPDYrm:
+    case X86::VMOVDQAYrm:
+    case X86::MMX_MOVD64rm:
+    case X86::MMX_MOVQ64rm:
+    case X86::FsMOVAPSrm:
+    case X86::FsMOVAPDrm: {
+      // Loads from constant pools are trivially rematerializable.
+      if (MI->getOperand(1).isReg() &&
+          MI->getOperand(2).isImm() &&
+          MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+          MI->isInvariantLoad(AA)) {
+        unsigned BaseReg = MI->getOperand(1).getReg();
+        if (BaseReg == 0 || BaseReg == X86::RIP)
+          return true;
+        // Allow re-materialization of PIC load.
+        if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+          return false;
+        const MachineFunction &MF = *MI->getParent()->getParent();
+        const MachineRegisterInfo &MRI = MF.getRegInfo();
+        bool isPICBase = false;
+        for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+               E = MRI.def_end(); I != E; ++I) {
+          MachineInstr *DefMI = I.getOperand().getParent();
+          if (DefMI->getOpcode() != X86::MOVPC32r)
+            return false;
+          assert(!isPICBase && "More than one PIC base?");
+          isPICBase = true;
+        }
+        return isPICBase;
+      }
+      return false;
+    }
+
+     case X86::LEA32r:
+     case X86::LEA64r: {
+       if (MI->getOperand(2).isImm() &&
+           MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+           !MI->getOperand(4).isReg()) {
+         // lea fi#, lea GV, etc. are all rematerializable.
+         if (!MI->getOperand(1).isReg())
+           return true;
+         unsigned BaseReg = MI->getOperand(1).getReg();
+         if (BaseReg == 0)
+           return true;
+         // Allow re-materialization of lea PICBase + x.
+         const MachineFunction &MF = *MI->getParent()->getParent();
+         const MachineRegisterInfo &MRI = MF.getRegInfo();
+         return regIsPICBase(BaseReg, MRI);
+       }
+       return false;
+     }
+  }
+
+  // All other instructions marked M_REMATERIALIZABLE are always trivially
+  // rematerializable.
+  return true;
+}
+
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// a few instructions in each direction it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I) {
+  MachineBasicBlock::iterator E = MBB.end();
+
+  // It's always safe to clobber EFLAGS at the end of a block.
+  if (I == E)
+    return true;
+
+  // For compile time consideration, if we are not able to determine the
+  // safety after visiting 4 instructions in each direction, we will assume
+  // it's not safe.
+  MachineBasicBlock::iterator Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
+    bool SeenDef = false;
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
+      if (!MO.isReg())
+        continue;
+      if (MO.getReg() == X86::EFLAGS) {
+        if (MO.isUse())
+          return false;
+        SeenDef = true;
+      }
+    }
+
+    if (SeenDef)
+      // This instruction defines EFLAGS, no need to look any further.
+      return true;
+    ++Iter;
+    // Skip over DBG_VALUE.
+    while (Iter != E && Iter->isDebugValue())
+      ++Iter;
+
+    // If we make it to the end of the block, it's safe to clobber EFLAGS.
+    if (Iter == E)
+      return true;
+  }
+
+  MachineBasicBlock::iterator B = MBB.begin();
+  Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
+    // If we make it to the beginning of the block, it's safe to clobber
+    // EFLAGS iff EFLAGS is not live-in.
+    if (Iter == B)
+      return !MBB.isLiveIn(X86::EFLAGS);
+
+    --Iter;
+    // Skip over DBG_VALUE.
+    while (Iter != B && Iter->isDebugValue())
+      --Iter;
+
+    bool SawKill = false;
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
+      if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+        if (MO.isDef()) return MO.isDead();
+        if (MO.isKill()) SawKill = true;
+      }
+    }
+
+    if (SawKill)
+      // This instruction kills EFLAGS and doesn't redefine it, so
+      // there's no need to look further.
+      return true;
+  }
+
+  // Conservative answer.
+  return false;
+}
+
+void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I,
+                                 unsigned DestReg, unsigned SubIdx,
+                                 const MachineInstr *Orig,
+                                 const TargetRegisterInfo &TRI) const {
+  DebugLoc DL = Orig->getDebugLoc();
+
+  // MOV32r0 etc. are implemented with xor which clobbers condition code.
+  // Re-materialize them as movri instructions to avoid side effects.
+  bool Clone = true;
+  unsigned Opc = Orig->getOpcode();
+  switch (Opc) {
+  default: break;
+  case X86::MOV8r0:
+  case X86::MOV16r0:
+  case X86::MOV32r0:
+  case X86::MOV64r0: {
+    if (!isSafeToClobberEFLAGS(MBB, I)) {
+      switch (Opc) {
+      default: break;
+      case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
+      case X86::MOV16r0: Opc = X86::MOV16ri; break;
+      case X86::MOV32r0: Opc = X86::MOV32ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
+      }
+      Clone = false;
+    }
+    break;
+  }
+  }
+
+  if (Clone) {
+    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+    MBB.insert(I, MI);
+  } else {
+    BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
+  }
+
+  MachineInstr *NewMI = prior(I);
+  NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+}
+
+/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
+/// is not marked dead.
+static bool hasLiveCondCodeDef(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef() &&
+        MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+                                           MachineFunction::iterator &MFI,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           LiveVariables *LV) const {
+  MachineInstr *MI = MBBI;
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+  bool isDead = MI->getOperand(0).isDead();
+  bool isKill = MI->getOperand(1).isKill();
+
+  unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+    ? X86::LEA64_32r : X86::LEA32r;
+  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+  // Build and insert into an implicit UNDEF value. This is OK because
+  // well be shifting and then extracting the lower 16-bits.
+  // This has the potential to cause partial register stall. e.g.
+  //   movw    (%rbp,%rcx,2), %dx
+  //   leal    -65(%rdx), %esi
+  // But testing has shown this *does* help performance in 64-bit mode (at
+  // least on modern x86 machines).
+  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+  MachineInstr *InsMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+    .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+    .addReg(Src, getKillRegState(isKill));
+
+  MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+                                    get(Opc), leaOutReg);
+  switch (MIOpc) {
+  default:
+    llvm_unreachable(0);
+    break;
+  case X86::SHL16ri: {
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    MIB.addReg(0).addImm(1 << ShAmt)
+       .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+    break;
+  }
+  case X86::INC16r:
+  case X86::INC64_16r:
+    addRegOffset(MIB, leaInReg, true, 1);
+    break;
+  case X86::DEC16r:
+  case X86::DEC64_16r:
+    addRegOffset(MIB, leaInReg, true, -1);
+    break;
+  case X86::ADD16ri:
+  case X86::ADD16ri8:
+  case X86::ADD16ri_DB:
+  case X86::ADD16ri8_DB:
+    addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+    break;
+  case X86::ADD16rr:
+  case X86::ADD16rr_DB: {
+    unsigned Src2 = MI->getOperand(2).getReg();
+    bool isKill2 = MI->getOperand(2).isKill();
+    unsigned leaInReg2 = 0;
+    MachineInstr *InsMI2 = 0;
+    if (Src == Src2) {
+      // ADD16rr %reg1028<kill>, %reg1028
+      // just a single insert_subreg.
+      addRegReg(MIB, leaInReg, true, leaInReg, false);
+    } else {
+      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+      // Build and insert into an implicit UNDEF value. This is OK because
+      // well be shifting and then extracting the lower 16-bits.
+      BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+      InsMI2 =
+        BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+        .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+        .addReg(Src2, getKillRegState(isKill2));
+      addRegReg(MIB, leaInReg, true, leaInReg2, true);
+    }
+    if (LV && isKill2 && InsMI2)
+      LV->replaceKillInstruction(Src2, MI, InsMI2);
+    break;
+  }
+  }
+
+  MachineInstr *NewMI = MIB;
+  MachineInstr *ExtMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+    .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+    .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+
+  if (LV) {
+    // Update live variables
+    LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+    LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+    if (isKill)
+      LV->replaceKillInstruction(Src, MI, InsMI);
+    if (isDead)
+      LV->replaceKillInstruction(Dest, MI, ExtMI);
+  }
+
+  return ExtMI;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand.  This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                    MachineBasicBlock::iterator &MBBI,
+                                    LiveVariables *LV) const {
+  MachineInstr *MI = MBBI;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  // All instructions input are two-addr instructions.  Get the known operands.
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+  bool isDead = MI->getOperand(0).isDead();
+  bool isKill = MI->getOperand(1).isKill();
+
+  MachineInstr *NewMI = NULL;
+  // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
+  // we have better subtarget support, enable the 16-bit LEA generation here.
+  // 16-bit LEA is also slow on Core2.
+  bool DisableLEA16 = true;
+  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+  unsigned MIOpc = MI->getOpcode();
+  switch (MIOpc) {
+  case X86::SHUFPSrri: {
+    assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+    if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+    unsigned B = MI->getOperand(1).getReg();
+    unsigned C = MI->getOperand(2).getReg();
+    if (B != C) return 0;
+    unsigned A = MI->getOperand(0).getReg();
+    unsigned M = MI->getOperand(3).getImm();
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+      .addReg(A, RegState::Define | getDeadRegState(isDead))
+      .addReg(B, getKillRegState(isKill)).addImm(M);
+    break;
+  }
+  case X86::SHL64ri: {
+    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+    // LEA can't handle RSP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+      return 0;
+
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill))
+      .addImm(0).addReg(0);
+    break;
+  }
+  case X86::SHL32ri: {
+    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+    // LEA can't handle ESP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+      return 0;
+
+    unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
+    break;
+  }
+  case X86::SHL16ri: {
+    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+    if (DisableLEA16)
+      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill))
+      .addImm(0).addReg(0);
+    break;
+  }
+  default: {
+    // The following opcodes also sets the condition code register(s). Only
+    // convert them to equivalent lea if the condition code register def's
+    // are dead!
+    if (hasLiveCondCodeDef(MI))
+      return 0;
+
+    switch (MIOpc) {
+    default: return 0;
+    case X86::INC64r:
+    case X86::INC32r:
+    case X86::INC64_32r: {
+      assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+      unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, 1);
+      break;
+    }
+    case X86::INC16r:
+    case X86::INC64_16r:
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                           .addReg(Dest, RegState::Define |
+                                   getDeadRegState(isDead)),
+                           Src, isKill, 1);
+      break;
+    case X86::DEC64r:
+    case X86::DEC32r:
+    case X86::DEC64_32r: {
+      assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+      unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, -1);
+      break;
+    }
+    case X86::DEC16r:
+    case X86::DEC64_16r:
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                           .addReg(Dest, RegState::Define |
+                                   getDeadRegState(isDead)),
+                           Src, isKill, -1);
+      break;
+    case X86::ADD64rr:
+    case X86::ADD64rr_DB:
+    case X86::ADD32rr:
+    case X86::ADD32rr_DB: {
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      unsigned Opc;
+      TargetRegisterClass *RC;
+      if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+        Opc = X86::LEA64r;
+        RC = X86::GR64_NOSPRegisterClass;
+      } else {
+        Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+        RC = X86::GR32_NOSPRegisterClass;
+      }
+
+
+      unsigned Src2 = MI->getOperand(2).getReg();
+      bool isKill2 = MI->getOperand(2).isKill();
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+          !MF.getRegInfo().constrainRegClass(Src2, RC))
+        return 0;
+
+      NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                        .addReg(Dest, RegState::Define |
+                                getDeadRegState(isDead)),
+                        Src, isKill, Src2, isKill2);
+      if (LV && isKill2)
+        LV->replaceKillInstruction(Src2, MI, NewMI);
+      break;
+    }
+    case X86::ADD16rr:
+    case X86::ADD16rr_DB: {
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      unsigned Src2 = MI->getOperand(2).getReg();
+      bool isKill2 = MI->getOperand(2).isKill();
+      NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                        .addReg(Dest, RegState::Define |
+                                getDeadRegState(isDead)),
+                        Src, isKill, Src2, isKill2);
+      if (LV && isKill2)
+        LV->replaceKillInstruction(Src2, MI, NewMI);
+      break;
+    }
+    case X86::ADD64ri32:
+    case X86::ADD64ri8:
+    case X86::ADD64ri32_DB:
+    case X86::ADD64ri8_DB:
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
+      break;
+    case X86::ADD32ri:
+    case X86::ADD32ri8:
+    case X86::ADD32ri_DB:
+    case X86::ADD32ri8_DB: {
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                                Src, isKill, MI->getOperand(2).getImm());
+      break;
+    }
+    case X86::ADD16ri:
+    case X86::ADD16ri8:
+    case X86::ADD16ri_DB:
+    case X86::ADD16ri8_DB:
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
+      break;
+    }
+  }
+  }
+
+  if (!NewMI) return 0;
+
+  if (LV) {  // Update live variables
+    if (isKill)
+      LV->replaceKillInstruction(Src, MI, NewMI);
+    if (isDead)
+      LV->replaceKillInstruction(Dest, MI, NewMI);
+  }
+
+  MFI->insert(MBBI, NewMI);          // Insert the new inst
+  return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+  switch (MI->getOpcode()) {
+  case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+  case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+  case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+  case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+  case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
+  case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+    unsigned Opc;
+    unsigned Size;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Unreachable!");
+    case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+    case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+    case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+    case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+    case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
+    case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+    }
+    unsigned Amt = MI->getOperand(3).getImm();
+    if (NewMI) {
+      MachineFunction &MF = *MI->getParent()->getParent();
+      MI = MF.CloneMachineInstr(MI);
+      NewMI = false;
+    }
+    MI->setDesc(get(Opc));
+    MI->getOperand(3).setImm(Size-Amt);
+    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+  }
+  case X86::CMOVB16rr:
+  case X86::CMOVB32rr:
+  case X86::CMOVB64rr:
+  case X86::CMOVAE16rr:
+  case X86::CMOVAE32rr:
+  case X86::CMOVAE64rr:
+  case X86::CMOVE16rr:
+  case X86::CMOVE32rr:
+  case X86::CMOVE64rr:
+  case X86::CMOVNE16rr:
+  case X86::CMOVNE32rr:
+  case X86::CMOVNE64rr:
+  case X86::CMOVBE16rr:
+  case X86::CMOVBE32rr:
+  case X86::CMOVBE64rr:
+  case X86::CMOVA16rr:
+  case X86::CMOVA32rr:
+  case X86::CMOVA64rr:
+  case X86::CMOVL16rr:
+  case X86::CMOVL32rr:
+  case X86::CMOVL64rr:
+  case X86::CMOVGE16rr:
+  case X86::CMOVGE32rr:
+  case X86::CMOVGE64rr:
+  case X86::CMOVLE16rr:
+  case X86::CMOVLE32rr:
+  case X86::CMOVLE64rr:
+  case X86::CMOVG16rr:
+  case X86::CMOVG32rr:
+  case X86::CMOVG64rr:
+  case X86::CMOVS16rr:
+  case X86::CMOVS32rr:
+  case X86::CMOVS64rr:
+  case X86::CMOVNS16rr:
+  case X86::CMOVNS32rr:
+  case X86::CMOVNS64rr:
+  case X86::CMOVP16rr:
+  case X86::CMOVP32rr:
+  case X86::CMOVP64rr:
+  case X86::CMOVNP16rr:
+  case X86::CMOVNP32rr:
+  case X86::CMOVNP64rr:
+  case X86::CMOVO16rr:
+  case X86::CMOVO32rr:
+  case X86::CMOVO64rr:
+  case X86::CMOVNO16rr:
+  case X86::CMOVNO32rr:
+  case X86::CMOVNO64rr: {
+    unsigned Opc = 0;
+    switch (MI->getOpcode()) {
+    default: break;
+    case X86::CMOVB16rr:  Opc = X86::CMOVAE16rr; break;
+    case X86::CMOVB32rr:  Opc = X86::CMOVAE32rr; break;
+    case X86::CMOVB64rr:  Opc = X86::CMOVAE64rr; break;
+    case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
+    case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
+    case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
+    case X86::CMOVE16rr:  Opc = X86::CMOVNE16rr; break;
+    case X86::CMOVE32rr:  Opc = X86::CMOVNE32rr; break;
+    case X86::CMOVE64rr:  Opc = X86::CMOVNE64rr; break;
+    case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
+    case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
+    case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
+    case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
+    case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
+    case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
+    case X86::CMOVA16rr:  Opc = X86::CMOVBE16rr; break;
+    case X86::CMOVA32rr:  Opc = X86::CMOVBE32rr; break;
+    case X86::CMOVA64rr:  Opc = X86::CMOVBE64rr; break;
+    case X86::CMOVL16rr:  Opc = X86::CMOVGE16rr; break;
+    case X86::CMOVL32rr:  Opc = X86::CMOVGE32rr; break;
+    case X86::CMOVL64rr:  Opc = X86::CMOVGE64rr; break;
+    case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
+    case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
+    case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
+    case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
+    case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
+    case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
+    case X86::CMOVG16rr:  Opc = X86::CMOVLE16rr; break;
+    case X86::CMOVG32rr:  Opc = X86::CMOVLE32rr; break;
+    case X86::CMOVG64rr:  Opc = X86::CMOVLE64rr; break;
+    case X86::CMOVS16rr:  Opc = X86::CMOVNS16rr; break;
+    case X86::CMOVS32rr:  Opc = X86::CMOVNS32rr; break;
+    case X86::CMOVS64rr:  Opc = X86::CMOVNS64rr; break;
+    case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
+    case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
+    case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
+    case X86::CMOVP16rr:  Opc = X86::CMOVNP16rr; break;
+    case X86::CMOVP32rr:  Opc = X86::CMOVNP32rr; break;
+    case X86::CMOVP64rr:  Opc = X86::CMOVNP64rr; break;
+    case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
+    case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
+    case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
+    case X86::CMOVO16rr:  Opc = X86::CMOVNO16rr; break;
+    case X86::CMOVO32rr:  Opc = X86::CMOVNO32rr; break;
+    case X86::CMOVO64rr:  Opc = X86::CMOVNO64rr; break;
+    case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
+    case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
+    case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
+    }
+    if (NewMI) {
+      MachineFunction &MF = *MI->getParent()->getParent();
+      MI = MF.CloneMachineInstr(MI);
+      NewMI = false;
+    }
+    MI->setDesc(get(Opc));
+    // Fallthrough intended.
+  }
+  default:
+    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+  }
+}
+
+static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+  switch (BrOpc) {
+  default: return X86::COND_INVALID;
+  case X86::JE_4:  return X86::COND_E;
+  case X86::JNE_4: return X86::COND_NE;
+  case X86::JL_4:  return X86::COND_L;
+  case X86::JLE_4: return X86::COND_LE;
+  case X86::JG_4:  return X86::COND_G;
+  case X86::JGE_4: return X86::COND_GE;
+  case X86::JB_4:  return X86::COND_B;
+  case X86::JBE_4: return X86::COND_BE;
+  case X86::JA_4:  return X86::COND_A;
+  case X86::JAE_4: return X86::COND_AE;
+  case X86::JS_4:  return X86::COND_S;
+  case X86::JNS_4: return X86::COND_NS;
+  case X86::JP_4:  return X86::COND_P;
+  case X86::JNP_4: return X86::COND_NP;
+  case X86::JO_4:  return X86::COND_O;
+  case X86::JNO_4: return X86::COND_NO;
+  }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case X86::COND_E:  return X86::JE_4;
+  case X86::COND_NE: return X86::JNE_4;
+  case X86::COND_L:  return X86::JL_4;
+  case X86::COND_LE: return X86::JLE_4;
+  case X86::COND_G:  return X86::JG_4;
+  case X86::COND_GE: return X86::JGE_4;
+  case X86::COND_B:  return X86::JB_4;
+  case X86::COND_BE: return X86::JBE_4;
+  case X86::COND_A:  return X86::JA_4;
+  case X86::COND_AE: return X86::JAE_4;
+  case X86::COND_S:  return X86::JS_4;
+  case X86::COND_NS: return X86::JNS_4;
+  case X86::COND_P:  return X86::JP_4;
+  case X86::COND_NP: return X86::JNP_4;
+  case X86::COND_O:  return X86::JO_4;
+  case X86::COND_NO: return X86::JNO_4;
+  }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case X86::COND_E:  return X86::COND_NE;
+  case X86::COND_NE: return X86::COND_E;
+  case X86::COND_L:  return X86::COND_GE;
+  case X86::COND_LE: return X86::COND_G;
+  case X86::COND_G:  return X86::COND_LE;
+  case X86::COND_GE: return X86::COND_L;
+  case X86::COND_B:  return X86::COND_AE;
+  case X86::COND_BE: return X86::COND_A;
+  case X86::COND_A:  return X86::COND_BE;
+  case X86::COND_AE: return X86::COND_B;
+  case X86::COND_S:  return X86::COND_NS;
+  case X86::COND_NS: return X86::COND_S;
+  case X86::COND_P:  return X86::COND_NP;
+  case X86::COND_NP: return X86::COND_P;
+  case X86::COND_O:  return X86::COND_NO;
+  case X86::COND_NO: return X86::COND_O;
+  }
+}
+
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MCID.isBranch() && !MCID.isBarrier())
+    return true;
+  if (!MCID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                 MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+
+    // Working from the bottom, when we see a non-terminator instruction, we're
+    // done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled by this
+    // analysis.
+    if (!I->getDesc().isBranch())
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == X86::JMP_4) {
+      UnCondBrIter = I;
+
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        UnCondBrIter = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditional destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    // Handle conditional branches.
+    X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+    if (BranchCode == X86::COND_INVALID)
+      return true;  // Can't handle indirect branch.
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+        // If we can modify the code and it ends in something like:
+        //
+        //     jCC L1
+        //     jmp L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Then we can change this to:
+        //
+        //     jnCC L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Which is a bit more efficient.
+        // We conditionally jump to the fall-through block.
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        unsigned JNCC = GetCondBranchFromCond(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB());
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+          .addMBB(TargetBB);
+
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        // Restart the analysis.
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination and their condition
+    // opcodes fit one of the special multi-branch idioms.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to the same
+    // destination.
+    if (TBB != I->getOperand(0).getMBB())
+      return true;
+
+    // If the conditions are the same, we can leave them alone.
+    X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+    if (OldBranchCode == BranchCode)
+      continue;
+
+    // If they differ, see if they fit one of the known patterns. Theoretically,
+    // we could handle more patterns here, but we shouldn't expect to see them
+    // if instruction selection has done a reasonable job.
+    if ((OldBranchCode == X86::COND_NP &&
+         BranchCode == X86::COND_E) ||
+        (OldBranchCode == X86::COND_E &&
+         BranchCode == X86::COND_NP))
+      BranchCode = X86::COND_NP_OR_E;
+    else if ((OldBranchCode == X86::COND_P &&
+              BranchCode == X86::COND_NE) ||
+             (OldBranchCode == X86::COND_NE &&
+              BranchCode == X86::COND_P))
+      BranchCode = X86::COND_NE_OR_P;
+    else
+      return true;
+
+    // Update the MachineOperand.
+    Cond[0].setImm(BranchCode);
+  }
+
+  return false;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() != X86::JMP_4 &&
+        GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                           MachineBasicBlock *FBB,
+                           const SmallVectorImpl<MachineOperand> &Cond,
+                           DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "X86 branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+  switch (CC) {
+  case X86::COND_NP_OR_E:
+    // Synthesize NP_OR_E with two branches.
+    BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
+    ++Count;
+    BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
+    ++Count;
+    break;
+  case X86::COND_NE_OR_P:
+    // Synthesize NE_OR_P with two branches.
+    BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
+    ++Count;
+    BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
+    ++Count;
+    break;
+  default: {
+    unsigned Opc = GetCondBranchFromCond(CC);
+    BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+    ++Count;
+  }
+  }
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+/// isHReg - Test if the given register is a physical h register.
+static bool isHReg(unsigned Reg) {
+  return X86::GR8_ABCD_HRegClass.contains(Reg);
+}
+
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+  // SrcReg(VR128) -> DestReg(GR64)
+  // SrcReg(VR64)  -> DestReg(GR64)
+  // SrcReg(GR64)  -> DestReg(VR128)
+  // SrcReg(GR64)  -> DestReg(VR64)
+
+  if (X86::GR64RegClass.contains(DestReg)) {
+    if (X86::VR128RegClass.contains(SrcReg)) {
+      // Copy from a VR128 register to a GR64 register.
+      return X86::MOVPQIto64rr;
+    } else if (X86::VR64RegClass.contains(SrcReg)) {
+      // Copy from a VR64 register to a GR64 register.
+      return X86::MOVSDto64rr;
+    }
+  } else if (X86::GR64RegClass.contains(SrcReg)) {
+    // Copy from a GR64 register to a VR128 register.
+    if (X86::VR128RegClass.contains(DestReg))
+      return X86::MOV64toPQIrr;
+    // Copy from a GR64 register to a VR64 register.
+    else if (X86::VR64RegClass.contains(DestReg))
+      return X86::MOV64toSDrr;
+  }
+
+  return 0;
+}
+
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI, DebugLoc DL,
+                               unsigned DestReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  // First deal with the normal symmetric copies.
+  unsigned Opc = 0;
+  if (X86::GR64RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOV64rr;
+  else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOV32rr;
+  else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOV16rr;
+  else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+    // Copying to or from a physical H register on x86-64 requires a NOREX
+    // move.  Otherwise use a normal move.
+    if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+        TM.getSubtarget<X86Subtarget>().is64Bit())
+      Opc = X86::MOV8rr_NOREX;
+    else
+      Opc = X86::MOV8rr;
+  } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOVAPSrr;
+  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+    Opc = X86::VMOVAPSYrr;
+  else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MMX_MOVQ64rr;
+  else
+    Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
+
+  if (Opc) {
+    BuildMI(MBB, MI, DL, get(Opc), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  // Moving EFLAGS to / from another register requires a push and a pop.
+  if (SrcReg == X86::EFLAGS) {
+    if (X86::GR64RegClass.contains(DestReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSHF64));
+      BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+      return;
+    } else if (X86::GR32RegClass.contains(DestReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSHF32));
+      BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
+      return;
+    }
+  }
+  if (DestReg == X86::EFLAGS) {
+    if (X86::GR64RegClass.contains(SrcReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      BuildMI(MBB, MI, DL, get(X86::POPF64));
+      return;
+    } else if (X86::GR32RegClass.contains(SrcReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      BuildMI(MBB, MI, DL, get(X86::POPF32));
+      return;
+    }
+  }
+
+  DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+               << " to " << RI.getName(DestReg) << '\n');
+  llvm_unreachable("Cannot emit physreg copy instruction");
+}
+
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+                                      const TargetRegisterClass *RC,
+                                      bool isStackAligned,
+                                      const TargetMachine &TM,
+                                      bool load) {
+  switch (RC->getSize()) {
+  default:
+    llvm_unreachable("Unknown spill size");
+  case 1:
+    assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      // Copying to or from a physical H register on x86-64 requires a NOREX
+      // move.  Otherwise use a normal move.
+      if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
+        return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+    return load ? X86::MOV8rm : X86::MOV8mr;
+  case 2:
+    assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
+    return load ? X86::MOV16rm : X86::MOV16mr;
+  case 4:
+    if (X86::GR32RegClass.hasSubClassEq(RC))
+      return load ? X86::MOV32rm : X86::MOV32mr;
+    if (X86::FR32RegClass.hasSubClassEq(RC))
+      return load ? X86::MOVSSrm : X86::MOVSSmr;
+    if (X86::RFP32RegClass.hasSubClassEq(RC))
+      return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+    llvm_unreachable("Unknown 4-byte regclass");
+  case 8:
+    if (X86::GR64RegClass.hasSubClassEq(RC))
+      return load ? X86::MOV64rm : X86::MOV64mr;
+    if (X86::FR64RegClass.hasSubClassEq(RC))
+      return load ? X86::MOVSDrm : X86::MOVSDmr;
+    if (X86::VR64RegClass.hasSubClassEq(RC))
+      return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+    if (X86::RFP64RegClass.hasSubClassEq(RC))
+      return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+    llvm_unreachable("Unknown 8-byte regclass");
+  case 10:
+    assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
+    return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+  case 16:
+    assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
+    // If stack is realigned we can use aligned stores.
+    if (isStackAligned)
+      return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+    else
+      return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+  case 32:
+    assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+    // If stack is realigned we can use aligned stores.
+    if (isStackAligned)
+      return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
+    else
+      return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
+  }
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+                                  const TargetRegisterClass *RC,
+                                  bool isStackAligned,
+                                  TargetMachine &TM) {
+  return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
+
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+                                 const TargetRegisterClass *RC,
+                                 bool isStackAligned,
+                                 const TargetMachine &TM) {
+  return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
+}
+
+void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MI,
+                                       unsigned SrcReg, bool isKill, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  const MachineFunction &MF = *MBB.getParent();
+  assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+         "Stack slot too small for store");
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
+  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+    .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                  bool isKill,
+                                  SmallVectorImpl<MachineOperand> &Addr,
+                                  const TargetRegisterClass *RC,
+                                  MachineInstr::mmo_iterator MMOBegin,
+                                  MachineInstr::mmo_iterator MMOEnd,
+                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+  DebugLoc DL;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  MIB.addReg(SrcReg, getKillRegState(isKill));
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
+  NewMIs.push_back(MIB);
+}
+
+
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        unsigned DestReg, int FrameIdx,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  const MachineFunction &MF = *MBB.getParent();
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
+  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 MachineInstr::mmo_iterator MMOBegin,
+                                 MachineInstr::mmo_iterator MMOEnd,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+  DebugLoc DL;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
+  NewMIs.push_back(MIB);
+}
+
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  X86AddressMode AM;
+  AM.BaseType = X86AddressMode::FrameIndexBase;
+  AM.Base.FrameIndex = FrameIx;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+  addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
+                                     const SmallVectorImpl<MachineOperand> &MOs,
+                                     MachineInstr *MI,
+                                     const TargetInstrInfo &TII) {
+  // Create the base instruction with the memory operand as the first part.
+  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+                                              MI->getDebugLoc(), true);
+  MachineInstrBuilder MIB(NewMI);
+  unsigned NumAddrOps = MOs.size();
+  for (unsigned i = 0; i != NumAddrOps; ++i)
+    MIB.addOperand(MOs[i]);
+  if (NumAddrOps < 4)  // FrameIndex only
+    addOffset(MIB, 0);
+
+  // Loop over the rest of the ri operands, converting them over.
+  unsigned NumOps = MI->getDesc().getNumOperands()-2;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MachineOperand &MO = MI->getOperand(i+2);
+    MIB.addOperand(MO);
+  }
+  for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    MIB.addOperand(MO);
+  }
+  return MIB;
+}
+
+static MachineInstr *FuseInst(MachineFunction &MF,
+                              unsigned Opcode, unsigned OpNo,
+                              const SmallVectorImpl<MachineOperand> &MOs,
+                              MachineInstr *MI, const TargetInstrInfo &TII) {
+  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+                                              MI->getDebugLoc(), true);
+  MachineInstrBuilder MIB(NewMI);
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (i == OpNo) {
+      assert(MO.isReg() && "Expected to fold into reg operand!");
+      unsigned NumAddrOps = MOs.size();
+      for (unsigned i = 0; i != NumAddrOps; ++i)
+        MIB.addOperand(MOs[i]);
+      if (NumAddrOps < 4)  // FrameIndex only
+        addOffset(MIB, 0);
+    } else {
+      MIB.addOperand(MO);
+    }
+  }
+  return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
+                                const SmallVectorImpl<MachineOperand> &MOs,
+                                MachineInstr *MI) {
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
+
+  unsigned NumAddrOps = MOs.size();
+  for (unsigned i = 0; i != NumAddrOps; ++i)
+    MIB.addOperand(MOs[i]);
+  if (NumAddrOps < 4)  // FrameIndex only
+    addOffset(MIB, 0);
+  return MIB.addImm(0);
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                    MachineInstr *MI, unsigned i,
+                                    const SmallVectorImpl<MachineOperand> &MOs,
+                                    unsigned Size, unsigned Align) const {
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  bool isTwoAddrFold = false;
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  bool isTwoAddr = NumOps > 1 &&
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+  // FIXME: AsmPrinter doesn't know how to handle
+  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+  if (MI->getOpcode() == X86::ADD32ri &&
+      MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+    return NULL;
+
+  MachineInstr *NewMI = NULL;
+  // Folding a memory location into the two-address part of a two-address
+  // instruction is different than folding it other places.  It requires
+  // replacing the *two* registers with the memory location.
+  if (isTwoAddr && NumOps >= 2 && i < 2 &&
+      MI->getOperand(0).isReg() &&
+      MI->getOperand(1).isReg() &&
+      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+    isTwoAddrFold = true;
+  } else if (i == 0) { // If operand 0
+    if (MI->getOpcode() == X86::MOV64r0)
+      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV32r0)
+      NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV16r0)
+      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV8r0)
+      NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+    if (NewMI)
+      return NewMI;
+
+    OpcodeTablePtr = &RegOp2MemOpTable0;
+  } else if (i == 1) {
+    OpcodeTablePtr = &RegOp2MemOpTable1;
+  } else if (i == 2) {
+    OpcodeTablePtr = &RegOp2MemOpTable2;
+  }
+
+  // If table selected...
+  if (OpcodeTablePtr) {
+    // Find the Opcode to fuse
+    DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+      OpcodeTablePtr->find(MI->getOpcode());
+    if (I != OpcodeTablePtr->end()) {
+      unsigned Opcode = I->second.first;
+      unsigned MinAlign = I->second.second;
+      if (Align < MinAlign)
+        return NULL;
+      bool NarrowToMOV32rm = false;
+      if (Size) {
+        unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
+        if (Size < RCSize) {
+          // Check if it's safe to fold the load. If the size of the object is
+          // narrower than the load width, then it's not.
+          if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+            return NULL;
+          // If this is a 64-bit load, but the spill slot is 32, then we can do
+          // a 32-bit load which is implicitly zero-extended. This likely is due
+          // to liveintervalanalysis remat'ing a load from stack slot.
+          if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+            return NULL;
+          Opcode = X86::MOV32rm;
+          NarrowToMOV32rm = true;
+        }
+      }
+
+      if (isTwoAddrFold)
+        NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+      else
+        NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+      if (NarrowToMOV32rm) {
+        // If this is the special case where we use a MOV32rm to load a 32-bit
+        // value and zero-extend the top bits. Change the destination register
+        // to a 32-bit one.
+        unsigned DstReg = NewMI->getOperand(0).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+          NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+                                                   X86::sub_32bit));
+        else
+          NewMI->getOperand(0).setSubReg(X86::sub_32bit);
+      }
+      return NewMI;
+    }
+  }
+
+  // No fusion
+  if (PrintFailedFusing && !MI->isCopy())
+    dbgs() << "We failed to fuse operand " << i << " in " << *MI;
+  return NULL;
+}
+
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                  MachineInstr *MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                                  int FrameIndex) const {
+  // Check switch flag
+  if (NoFusing) return NULL;
+
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    switch (MI->getOpcode()) {
+    case X86::CVTSD2SSrr:
+    case X86::Int_CVTSD2SSrr:
+    case X86::CVTSS2SDrr:
+    case X86::Int_CVTSS2SDrr:
+    case X86::RCPSSr:
+    case X86::RCPSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
+    case X86::RSQRTSSr:
+    case X86::RSQRTSSr_Int:
+    case X86::SQRTSSr:
+    case X86::SQRTSSr_Int:
+      return 0;
+    }
+
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned Size = MFI->getObjectSize(FrameIndex);
+  unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+    unsigned NewOpc = 0;
+    unsigned RCSize = 0;
+    switch (MI->getOpcode()) {
+    default: return NULL;
+    case X86::TEST8rr:  NewOpc = X86::CMP8ri; RCSize = 1; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
+    }
+    // Check if it's safe to fold the load. If the size of the object is
+    // narrower than the load width, then it's not.
+    if (Size < RCSize)
+      return NULL;
+    // Change to CMPXXri r, 0 first.
+    MI->setDesc(get(NewOpc));
+    MI->getOperand(1).ChangeToImmediate(0);
+  } else if (Ops.size() != 1)
+    return NULL;
+
+  SmallVector<MachineOperand,4> MOs;
+  MOs.push_back(MachineOperand::CreateFI(FrameIndex));
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                  MachineInstr *MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                                  MachineInstr *LoadMI) const {
+  // Check switch flag
+  if (NoFusing) return NULL;
+
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    switch (MI->getOpcode()) {
+    case X86::CVTSD2SSrr:
+    case X86::Int_CVTSD2SSrr:
+    case X86::CVTSS2SDrr:
+    case X86::Int_CVTSS2SDrr:
+    case X86::RCPSSr:
+    case X86::RCPSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
+    case X86::RSQRTSSr:
+    case X86::RSQRTSSr_Int:
+    case X86::SQRTSSr:
+    case X86::SQRTSSr_Int:
+      return 0;
+    }
+
+  // Determine the alignment of the load.
+  unsigned Alignment = 0;
+  if (LoadMI->hasOneMemOperand())
+    Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+  else
+    switch (LoadMI->getOpcode()) {
+    case X86::AVX_SET0PSY:
+    case X86::AVX_SET0PDY:
+      Alignment = 32;
+      break;
+    case X86::V_SET0PS:
+    case X86::V_SET0PD:
+    case X86::V_SET0PI:
+    case X86::V_SETALLONES:
+    case X86::AVX_SET0PS:
+    case X86::AVX_SET0PD:
+    case X86::AVX_SET0PI:
+      Alignment = 16;
+      break;
+    case X86::FsFLD0SD:
+    case X86::VFsFLD0SD:
+      Alignment = 8;
+      break;
+    case X86::FsFLD0SS:
+    case X86::VFsFLD0SS:
+      Alignment = 4;
+      break;
+    default:
+      return 0;
+    }
+  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+    unsigned NewOpc = 0;
+    switch (MI->getOpcode()) {
+    default: return NULL;
+    case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
+    }
+    // Change to CMPXXri r, 0 first.
+    MI->setDesc(get(NewOpc));
+    MI->getOperand(1).ChangeToImmediate(0);
+  } else if (Ops.size() != 1)
+    return NULL;
+
+  // Make sure the subregisters match.
+  // Otherwise we risk changing the size of the load.
+  if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+    return NULL;
+
+  SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
+  switch (LoadMI->getOpcode()) {
+  case X86::V_SET0PS:
+  case X86::V_SET0PD:
+  case X86::V_SET0PI:
+  case X86::V_SETALLONES:
+  case X86::AVX_SET0PS:
+  case X86::AVX_SET0PD:
+  case X86::AVX_SET0PI:
+  case X86::AVX_SET0PSY:
+  case X86::AVX_SET0PDY:
+  case X86::FsFLD0SD:
+  case X86::FsFLD0SS: {
+    // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
+    // Create a constant-pool entry and operands to load from it.
+
+    // Medium and large mode can't fold loads this way.
+    if (TM.getCodeModel() != CodeModel::Small &&
+        TM.getCodeModel() != CodeModel::Kernel)
+      return NULL;
+
+    // x86-32 PIC requires a PIC base register for constant pools.
+    unsigned PICBase = 0;
+    if (TM.getRelocationModel() == Reloc::PIC_) {
+      if (TM.getSubtarget<X86Subtarget>().is64Bit())
+        PICBase = X86::RIP;
+      else
+        // FIXME: PICBase = getGlobalBaseReg(&MF);
+        // This doesn't work for several reasons.
+        // 1. GlobalBaseReg may have been spilled.
+        // 2. It may not be live at MI.
+        return NULL;
+    }
+
+    // Create a constant-pool entry.
+    MachineConstantPool &MCP = *MF.getConstantPool();
+    const Type *Ty;
+    unsigned Opc = LoadMI->getOpcode();
+    if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+      Ty = Type::getFloatTy(MF.getFunction()->getContext());
+    else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+      Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+    else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+      Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+    else
+      Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+    const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+                    Constant::getAllOnesValue(Ty) :
+                    Constant::getNullValue(Ty);
+    unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
+
+    // Create operands to load from the constant pool entry.
+    MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+    MOs.push_back(MachineOperand::CreateImm(1));
+    MOs.push_back(MachineOperand::CreateReg(0, false));
+    MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+    MOs.push_back(MachineOperand::CreateReg(0, false));
+    break;
+  }
+  default: {
+    // Folding a normal load. Just copy the load's address operands.
+    unsigned NumOps = LoadMI->getDesc().getNumOperands();
+    for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
+      MOs.push_back(LoadMI->getOperand(i));
+    break;
+  }
+  }
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+}
+
+
+bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                  const SmallVectorImpl<unsigned> &Ops) const {
+  // Check switch flag
+  if (NoFusing) return 0;
+
+  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+    switch (MI->getOpcode()) {
+    default: return false;
+    case X86::TEST8rr:
+    case X86::TEST16rr:
+    case X86::TEST32rr:
+    case X86::TEST64rr:
+      return true;
+    case X86::ADD32ri:
+      // FIXME: AsmPrinter doesn't know how to handle
+      // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+      if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+        return false;
+      break;
+    }
+  }
+
+  if (Ops.size() != 1)
+    return false;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  bool isTwoAddr = NumOps > 1 &&
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+  // Folding a memory location into the two-address part of a two-address
+  // instruction is different than folding it other places.  It requires
+  // replacing the *two* registers with the memory location.
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+  } else if (OpNum == 0) { // If operand 0
+    switch (Opc) {
+    case X86::MOV8r0:
+    case X86::MOV16r0:
+    case X86::MOV32r0:
+    case X86::MOV64r0: return true;
+    default: break;
+    }
+    OpcodeTablePtr = &RegOp2MemOpTable0;
+  } else if (OpNum == 1) {
+    OpcodeTablePtr = &RegOp2MemOpTable1;
+  } else if (OpNum == 2) {
+    OpcodeTablePtr = &RegOp2MemOpTable2;
+  }
+
+  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+    return true;
+  return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+}
+
+bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                                unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(MI->getOpcode());
+  if (I == MemOp2RegOpTable.end())
+    return false;
+  unsigned Opc = I->second.first;
+  unsigned Index = I->second.second & 0xf;
+  bool FoldedLoad = I->second.second & (1 << 4);
+  bool FoldedStore = I->second.second & (1 << 5);
+  if (UnfoldLoad && !FoldedLoad)
+    return false;
+  UnfoldLoad &= FoldedLoad;
+  if (UnfoldStore && !FoldedStore)
+    return false;
+  UnfoldStore &= FoldedStore;
+
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+  if (!MI->hasOneMemOperand() &&
+      RC == &X86::VR128RegClass &&
+      !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+    // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+    // conservatively assume the address is unaligned. That's bad for
+    // performance.
+    return false;
+  SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
+  SmallVector<MachineOperand,2> BeforeOps;
+  SmallVector<MachineOperand,2> AfterOps;
+  SmallVector<MachineOperand,4> ImpOps;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &Op = MI->getOperand(i);
+    if (i >= Index && i < Index + X86::AddrNumOperands)
+      AddrOps.push_back(Op);
+    else if (Op.isReg() && Op.isImplicit())
+      ImpOps.push_back(Op);
+    else if (i < Index)
+      BeforeOps.push_back(Op);
+    else if (i > Index)
+      AfterOps.push_back(Op);
+  }
+
+  // Emit the load instruction.
+  if (UnfoldLoad) {
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(MI->memoperands_begin(),
+                            MI->memoperands_end());
+    loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+    if (UnfoldStore) {
+      // Address operands cannot be marked isKill.
+      for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
+        MachineOperand &MO = NewMIs[0]->getOperand(i);
+        if (MO.isReg())
+          MO.setIsKill(false);
+      }
+    }
+  }
+
+  // Emit the data processing instruction.
+  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
+  MachineInstrBuilder MIB(DataMI);
+
+  if (FoldedStore)
+    MIB.addReg(Reg, RegState::Define);
+  for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
+    MIB.addOperand(BeforeOps[i]);
+  if (FoldedLoad)
+    MIB.addReg(Reg);
+  for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
+    MIB.addOperand(AfterOps[i]);
+  for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
+    MachineOperand &MO = ImpOps[i];
+    MIB.addReg(MO.getReg(),
+               getDefRegState(MO.isDef()) |
+               RegState::Implicit |
+               getKillRegState(MO.isKill()) |
+               getDeadRegState(MO.isDead()) |
+               getUndefRegState(MO.isUndef()));
+  }
+  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+  unsigned NewOpc = 0;
+  switch (DataMI->getOpcode()) {
+  default: break;
+  case X86::CMP64ri32:
+  case X86::CMP64ri8:
+  case X86::CMP32ri:
+  case X86::CMP32ri8:
+  case X86::CMP16ri:
+  case X86::CMP16ri8:
+  case X86::CMP8ri: {
+    MachineOperand &MO0 = DataMI->getOperand(0);
+    MachineOperand &MO1 = DataMI->getOperand(1);
+    if (MO1.getImm() == 0) {
+      switch (DataMI->getOpcode()) {
+      default: break;
+      case X86::CMP64ri8:
+      case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+      case X86::CMP32ri8:
+      case X86::CMP32ri:   NewOpc = X86::TEST32rr; break;
+      case X86::CMP16ri8:
+      case X86::CMP16ri:   NewOpc = X86::TEST16rr; break;
+      case X86::CMP8ri:    NewOpc = X86::TEST8rr; break;
+      }
+      DataMI->setDesc(get(NewOpc));
+      MO1.ChangeToRegister(MO0.getReg(), false);
+    }
+  }
+  }
+  NewMIs.push_back(DataMI);
+
+  // Emit the store instruction.
+  if (UnfoldStore) {
+    const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(MI->memoperands_begin(),
+                             MI->memoperands_end());
+    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+  }
+
+  return true;
+}
+
+bool
+X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                                  SmallVectorImpl<SDNode*> &NewNodes) const {
+  if (!N->isMachineOpcode())
+    return false;
+
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(N->getMachineOpcode());
+  if (I == MemOp2RegOpTable.end())
+    return false;
+  unsigned Opc = I->second.first;
+  unsigned Index = I->second.second & 0xf;
+  bool FoldedLoad = I->second.second & (1 << 4);
+  bool FoldedStore = I->second.second & (1 << 5);
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+  unsigned NumDefs = MCID.NumDefs;
+  std::vector<SDValue> AddrOps;
+  std::vector<SDValue> BeforeOps;
+  std::vector<SDValue> AfterOps;
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumOps = N->getNumOperands();
+  for (unsigned i = 0; i != NumOps-1; ++i) {
+    SDValue Op = N->getOperand(i);
+    if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
+      AddrOps.push_back(Op);
+    else if (i < Index-NumDefs)
+      BeforeOps.push_back(Op);
+    else if (i > Index-NumDefs)
+      AfterOps.push_back(Op);
+  }
+  SDValue Chain = N->getOperand(NumOps-1);
+  AddrOps.push_back(Chain);
+
+  // Emit the load instruction.
+  SDNode *Load = 0;
+  MachineFunction &MF = DAG.getMachineFunction();
+  if (FoldedLoad) {
+    EVT VT = *RC->vt_begin();
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                            cast<MachineSDNode>(N)->memoperands_end());
+    if (!(*MMOs.first) &&
+        RC == &X86::VR128RegClass &&
+        !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+      // Do not introduce a slow unaligned load.
+      return false;
+    bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+    Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+                              VT, MVT::Other, &AddrOps[0], AddrOps.size());
+    NewNodes.push_back(Load);
+
+    // Preserve memory reference information.
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+  }
+
+  // Emit the data processing instruction.
+  std::vector<EVT> VTs;
+  const TargetRegisterClass *DstRC = 0;
+  if (MCID.getNumDefs() > 0) {
+    DstRC = getRegClass(MCID, 0, &RI);
+    VTs.push_back(*DstRC->vt_begin());
+  }
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
+      VTs.push_back(VT);
+  }
+  if (Load)
+    BeforeOps.push_back(SDValue(Load, 0));
+  std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+                                      BeforeOps.size());
+  NewNodes.push_back(NewNode);
+
+  // Emit the store instruction.
+  if (FoldedStore) {
+    AddrOps.pop_back();
+    AddrOps.push_back(SDValue(NewNode, 0));
+    AddrOps.push_back(Chain);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                             cast<MachineSDNode>(N)->memoperands_end());
+    if (!(*MMOs.first) &&
+        RC == &X86::VR128RegClass &&
+        !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+      // Do not introduce a slow unaligned store.
+      return false;
+    bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+    SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+                                                         isAligned, TM),
+                                       dl, MVT::Other,
+                                       &AddrOps[0], AddrOps.size());
+    NewNodes.push_back(Store);
+
+    // Preserve memory reference information.
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+  }
+
+  return true;
+}
+
+unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex) const {
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(Opc);
+  if (I == MemOp2RegOpTable.end())
+    return 0;
+  bool FoldedLoad = I->second.second & (1 << 4);
+  bool FoldedStore = I->second.second & (1 << 5);
+  if (UnfoldLoad && !FoldedLoad)
+    return 0;
+  if (UnfoldStore && !FoldedStore)
+    return 0;
+  if (LoadRegIndex)
+    *LoadRegIndex = I->second.second & 0xf;
+  return I->second.first;
+}
+
+bool
+X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                     int64_t &Offset1, int64_t &Offset2) const {
+  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+    return false;
+  unsigned Opc1 = Load1->getMachineOpcode();
+  unsigned Opc2 = Load2->getMachineOpcode();
+  switch (Opc1) {
+  default: return false;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+  case X86::FsMOVAPSrm:
+  case X86::FsMOVAPDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVUPSrm:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::MOVDQUrm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVUPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
+    break;
+  }
+  switch (Opc2) {
+  default: return false;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+  case X86::FsMOVAPSrm:
+  case X86::FsMOVAPDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVUPSrm:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::MOVDQUrm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVUPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
+    break;
+  }
+
+  // Check if chain operands and base addresses match.
+  if (Load1->getOperand(0) != Load2->getOperand(0) ||
+      Load1->getOperand(5) != Load2->getOperand(5))
+    return false;
+  // Segment operands should match as well.
+  if (Load1->getOperand(4) != Load2->getOperand(4))
+    return false;
+  // Scale should be 1, Index should be Reg0.
+  if (Load1->getOperand(1) == Load2->getOperand(1) &&
+      Load1->getOperand(2) == Load2->getOperand(2)) {
+    if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
+      return false;
+
+    // Now let's examine the displacements.
+    if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
+        isa<ConstantSDNode>(Load2->getOperand(3))) {
+      Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
+      Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
+      return true;
+    }
+  }
+  return false;
+}
+
+bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                           int64_t Offset1, int64_t Offset2,
+                                           unsigned NumLoads) const {
+  assert(Offset2 > Offset1);
+  if ((Offset2 - Offset1) / 8 > 64)
+    return false;
+
+  unsigned Opc1 = Load1->getMachineOpcode();
+  unsigned Opc2 = Load2->getMachineOpcode();
+  if (Opc1 != Opc2)
+    return false;  // FIXME: overly conservative?
+
+  switch (Opc1) {
+  default: break;
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+    return false;
+  }
+
+  EVT VT = Load1->getValueType(0);
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    // XMM registers. In 64-bit mode we can be a bit more aggressive since we
+    // have 16 of them to play with.
+    if (TM.getSubtargetImpl()->is64Bit()) {
+      if (NumLoads >= 3)
+        return false;
+    } else if (NumLoads) {
+      return false;
+    }
+    break;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+  case MVT::i64:
+  case MVT::f32:
+  case MVT::f64:
+    if (NumLoads)
+      return false;
+    break;
+  }
+
+  return true;
+}
+
+
+bool X86InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+  X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
+  if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
+    return true;
+  Cond[0].setImm(GetOppositeBranchCondition(CC));
+  return false;
+}
+
+bool X86InstrInfo::
+isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+  // FIXME: Return false for x87 stack register classes for now. We can't
+  // allow any loads of these registers before FpGet_ST0_80.
+  return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
+           RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+}
+
+
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
+/// register?  e.g. r8, xmm8, xmm13, etc.
+bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
+  switch (RegNo) {
+  default: break;
+  case X86::R8:    case X86::R9:    case X86::R10:   case X86::R11:
+  case X86::R12:   case X86::R13:   case X86::R14:   case X86::R15:
+  case X86::R8D:   case X86::R9D:   case X86::R10D:  case X86::R11D:
+  case X86::R12D:  case X86::R13D:  case X86::R14D:  case X86::R15D:
+  case X86::R8W:   case X86::R9W:   case X86::R10W:  case X86::R11W:
+  case X86::R12W:  case X86::R13W:  case X86::R14W:  case X86::R15W:
+  case X86::R8B:   case X86::R9B:   case X86::R10B:  case X86::R11B:
+  case X86::R12B:  case X86::R13B:  case X86::R14B:  case X86::R15B:
+  case X86::XMM8:  case X86::XMM9:  case X86::XMM10: case X86::XMM11:
+  case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+  case X86::YMM8:  case X86::YMM9:  case X86::YMM10: case X86::YMM11:
+  case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+  case X86::CR8:   case X86::CR9:   case X86::CR10:  case X86::CR11:
+  case X86::CR12:  case X86::CR13:  case X86::CR14:  case X86::CR15:
+    return true;
+  }
+  return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
+///
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+         "X86-64 PIC uses RIP relative addressing");
+
+  X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Create the register. The code to initialize it is inserted
+  // later, by the CGBR pass (below).
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+  X86FI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+  //PackedSingle     PackedDouble    PackedInt
+  { X86::MOVAPSmr,   X86::MOVAPDmr,  X86::MOVDQAmr  },
+  { X86::MOVAPSrm,   X86::MOVAPDrm,  X86::MOVDQArm  },
+  { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
+  { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
+  { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
+  { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
+  { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
+  { X86::ANDPSrm,    X86::ANDPDrm,   X86::PANDrm    },
+  { X86::ANDPSrr,    X86::ANDPDrr,   X86::PANDrr    },
+  { X86::ORPSrm,     X86::ORPDrm,    X86::PORrm     },
+  { X86::ORPSrr,     X86::ORPDrr,    X86::PORrr     },
+  { X86::V_SET0PS,   X86::V_SET0PD,  X86::V_SET0PI  },
+  { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },
+  { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },
+  // AVX 128-bit support
+  { X86::VMOVAPSmr,  X86::VMOVAPDmr,  X86::VMOVDQAmr  },
+  { X86::VMOVAPSrm,  X86::VMOVAPDrm,  X86::VMOVDQArm  },
+  { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
+  { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
+  { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+  { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
+  { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
+  { X86::VANDPSrm,   X86::VANDPDrm,   X86::VPANDrm    },
+  { X86::VANDPSrr,   X86::VANDPDrr,   X86::VPANDrr    },
+  { X86::VORPSrm,    X86::VORPDrm,    X86::VPORrm     },
+  { X86::VORPSrr,    X86::VORPDrr,    X86::VPORrr     },
+  { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+  { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
+  { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
+  // AVX 256-bit support
+  { X86::VMOVAPSYmr,   X86::VMOVAPDYmr,   X86::VMOVDQAYmr  },
+  { X86::VMOVAPSYrm,   X86::VMOVAPDYrm,   X86::VMOVDQAYrm  },
+  { X86::VMOVAPSYrr,   X86::VMOVAPDYrr,   X86::VMOVDQAYrr  },
+  { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
+  { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
+  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+    if (ReplaceableInstrs[i][domain-1] == opcode)
+      return ReplaceableInstrs[i];
+  return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+  uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  return std::make_pair(domain,
+                        domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+  assert(Domain>0 && Domain<4 && "Invalid execution domain");
+  uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  assert(dom && "Not an SSE instruction");
+  const unsigned *table = lookup(MI->getOpcode(), dom);
+  assert(table && "Cannot change domain");
+  MI->setDesc(get(table[Domain-1]));
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(X86::NOOP);
+}
+
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+  switch (opc) {
+  default: return false;
+  case X86::DIVSDrm:
+  case X86::DIVSDrm_Int:
+  case X86::DIVSDrr:
+  case X86::DIVSDrr_Int:
+  case X86::DIVSSrm:
+  case X86::DIVSSrm_Int:
+  case X86::DIVSSrr:
+  case X86::DIVSSrr_Int:
+  case X86::SQRTPDm:
+  case X86::SQRTPDm_Int:
+  case X86::SQRTPDr:
+  case X86::SQRTPDr_Int:
+  case X86::SQRTPSm:
+  case X86::SQRTPSm_Int:
+  case X86::SQRTPSr:
+  case X86::SQRTPSr_Int:
+  case X86::SQRTSDm:
+  case X86::SQRTSDm_Int:
+  case X86::SQRTSDr:
+  case X86::SQRTSDr_Int:
+  case X86::SQRTSSm:
+  case X86::SQRTSSm_Int:
+  case X86::SQRTSSr:
+  case X86::SQRTSSr_Int:
+    return true;
+  }
+}
+
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  return isHighLatencyDef(DefMI->getOpcode());
+}
+
+namespace {
+  /// CGBR - Create Global Base Reg pass. This initializes the PIC
+  /// global base register for x86-32.
+  struct CGBR : public MachineFunctionPass {
+    static char ID;
+    CGBR() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      const X86TargetMachine *TM =
+        static_cast<const X86TargetMachine *>(&MF.getTarget());
+
+      assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+             "X86-64 PIC uses RIP relative addressing");
+
+      // Only emit a global base reg in PIC mode.
+      if (TM->getRelocationModel() != Reloc::PIC_)
+        return false;
+
+      X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+      // If we didn't need a GlobalBaseReg, don't insert code.
+      if (GlobalBaseReg == 0)
+        return false;
+
+      // Insert the set of GlobalBaseReg into the first MBB of the function
+      MachineBasicBlock &FirstMBB = MF.front();
+      MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+      DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      const X86InstrInfo *TII = TM->getInstrInfo();
+
+      unsigned PC;
+      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+        PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+      else
+        PC = GlobalBaseReg;
+
+      // Operand of MovePCtoStack is completely ignored by asm printer. It's
+      // only used in JIT code emission as displacement to pc.
+      BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+      // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+      // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+        // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+        BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+          .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+                                        X86II::MO_GOT_ABSOLUTE_ADDRESS);
+      }
+
+      return true;
+    }
+
+    virtual const char *getPassName() const {
+      return "X86 PIC Global Base Reg Initialization";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 000000000000..5f2eba34ac45
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,886 @@
+//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+
+#define GET_INSTRINFO_HEADER
+#include "X86GenInstrInfo.inc"
+
+namespace llvm {
+  class X86RegisterInfo;
+  class X86TargetMachine;
+
+namespace X86 {
+  // Enums for memory operand decoding.  Each memory operand is represented with
+  // a 5 operand sequence in the form:
+  //   [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+  // These enums help decode this.
+  enum {
+    AddrBaseReg = 0,
+    AddrScaleAmt = 1,
+    AddrIndexReg = 2,
+    AddrDisp = 3,
+
+    /// AddrSegmentReg - The operand # of the segment in the memory operand.
+    AddrSegmentReg = 4,
+
+    /// AddrNumOperands - Total number of operands in a memory reference.
+    AddrNumOperands = 5
+  };
+
+
+  // X86 specific condition code. These correspond to X86_*_COND in
+  // X86InstrInfo.td. They must be kept in synch.
+  enum CondCode {
+    COND_A  = 0,
+    COND_AE = 1,
+    COND_B  = 2,
+    COND_BE = 3,
+    COND_E  = 4,
+    COND_G  = 5,
+    COND_GE = 6,
+    COND_L  = 7,
+    COND_LE = 8,
+    COND_NE = 9,
+    COND_NO = 10,
+    COND_NP = 11,
+    COND_NS = 12,
+    COND_O  = 13,
+    COND_P  = 14,
+    COND_S  = 15,
+
+    // Artificial condition codes. These are used by AnalyzeBranch
+    // to indicate a block terminated with two conditional branches to
+    // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+    // which can't be represented on x86 with a single condition. These
+    // are never used in MachineInstrs.
+    COND_NE_OR_P,
+    COND_NP_OR_E,
+
+    COND_INVALID
+  };
+
+  // Turn condition code into conditional branch opcode.
+  unsigned GetCondBranchFromCond(CondCode CC);
+
+  /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+  /// e.g. turning COND_E to COND_NE.
+  CondCode GetOppositeBranchCondition(X86::CondCode CC);
+
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // X86 Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+    /// relocation of:
+    ///    SYMBOL_LABEL + [. - PICBASELABEL]
+    MO_GOT_ABSOLUTE_ADDRESS,
+
+    /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+    /// immediate should get the value of the symbol minus the PIC base label:
+    ///    SYMBOL_LABEL - PICBASELABEL
+    MO_PIC_BASE_OFFSET,
+
+    /// MO_GOT - On a symbol operand this indicates that the immediate is the
+    /// offset to the GOT entry for the symbol name from the base of the GOT.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @GOT
+    MO_GOT,
+
+    /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+    /// the offset to the location of the symbol name from the base of the GOT.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @GOTOFF
+    MO_GOTOFF,
+
+    /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+    /// offset to the GOT entry for the symbol name from the current code
+    /// location.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @GOTPCREL
+    MO_GOTPCREL,
+
+    /// MO_PLT - On a symbol operand this indicates that the immediate is
+    /// offset to the PLT entry of symbol name from the current code location.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @PLT
+    MO_PLT,
+
+    /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @TLSGD
+    MO_TLSGD,
+
+    /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @GOTTPOFF
+    MO_GOTTPOFF,
+
+    /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @INDNTPOFF
+    MO_INDNTPOFF,
+
+    /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @TPOFF
+    MO_TPOFF,
+
+    /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @NTPOFF
+    MO_NTPOFF,
+
+    /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "__imp_FOO" symbol.  This is used for
+    /// dllimport linkage on windows.
+    MO_DLLIMPORT,
+
+    /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$stub" symbol.  This is used for calls
+    /// and jumps to external functions on Tiger and earlier.
+    MO_DARWIN_STUB,
+
+    /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+    /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+    MO_DARWIN_NONLAZY,
+
+    /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+    /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+    /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+    MO_DARWIN_NONLAZY_PIC_BASE,
+
+    /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+    /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+    /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+    /// stub.
+    MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+    /// MO_TLVP - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// This is the TLS offset for the Darwin TLS mechanism.
+    MO_TLVP,
+
+    /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+    /// is some TLS offset from the picbase.
+    ///
+    /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+    MO_TLVP_PIC_BASE
+  };
+}
+
+/// isGlobalStubReference - Return true if the specified TargetFlag operand is
+/// a reference to a stub for a global, not the global itself.
+inline static bool isGlobalStubReference(unsigned char TargetFlag) {
+  switch (TargetFlag) {
+  case X86II::MO_DLLIMPORT: // dllimport stub.
+  case X86II::MO_GOTPCREL:  // rip-relative GOT reference.
+  case X86II::MO_GOT:       // normal GOT reference.
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:        // Normal $non_lazy_ptr ref.
+  case X86II::MO_DARWIN_NONLAZY:                 // Normal $non_lazy_ptr ref.
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref.
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isGlobalRelativeToPICBase - Return true if the specified global value
+/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg).  If this
+/// is true, the addressing mode has the PIC base register added in (e.g. EBX).
+inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
+  switch (TargetFlag) {
+  case X86II::MO_GOTOFF:                         // isPICStyleGOT: local global.
+  case X86II::MO_GOT:                            // isPICStyleGOT: other global.
+  case X86II::MO_PIC_BASE_OFFSET:                // Darwin local global.
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:        // Darwin/32 external global.
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+  case X86II::MO_TLVP:                           // ??? Pretty sure..
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction encodings.  These are the standard/most common forms for X86
+    // instructions.
+    //
+
+    // PseudoFrm - This represents an instruction that is a pseudo instruction
+    // or one that has not been implemented yet.  It is illegal to code generate
+    // it, but tolerated for intermediate implementation stages.
+    Pseudo         = 0,
+
+    /// Raw - This form is for instructions that don't have any operands, so
+    /// they are just a fixed opcode value, like 'leave'.
+    RawFrm         = 1,
+
+    /// AddRegFrm - This form is used for instructions like 'push r32' that have
+    /// their one register operand added to their opcode.
+    AddRegFrm      = 2,
+
+    /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+    /// to specify a destination, which in this case is a register.
+    ///
+    MRMDestReg     = 3,
+
+    /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+    /// to specify a destination, which in this case is memory.
+    ///
+    MRMDestMem     = 4,
+
+    /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+    /// to specify a source, which in this case is a register.
+    ///
+    MRMSrcReg      = 5,
+
+    /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+    /// to specify a source, which in this case is memory.
+    ///
+    MRMSrcMem      = 6,
+
+    /// MRM[0-7][rm] - These forms are used to represent instructions that use
+    /// a Mod/RM byte, and use the middle field to hold extended opcode
+    /// information.  In the intel manual these are represented as /0, /1, ...
+    ///
+
+    // First, instructions that operate on a register r/m operand...
+    MRM0r = 16,  MRM1r = 17,  MRM2r = 18,  MRM3r = 19, // Format /0 /1 /2 /3
+    MRM4r = 20,  MRM5r = 21,  MRM6r = 22,  MRM7r = 23, // Format /4 /5 /6 /7
+
+    // Next, instructions that operate on a memory r/m operand...
+    MRM0m = 24,  MRM1m = 25,  MRM2m = 26,  MRM3m = 27, // Format /0 /1 /2 /3
+    MRM4m = 28,  MRM5m = 29,  MRM6m = 30,  MRM7m = 31, // Format /4 /5 /6 /7
+
+    // MRMInitReg - This form is used for instructions whose source and
+    // destinations are the same register.
+    MRMInitReg = 32,
+
+    //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+    MRM_C1 = 33,
+    MRM_C2 = 34,
+    MRM_C3 = 35,
+    MRM_C4 = 36,
+    MRM_C8 = 37,
+    MRM_C9 = 38,
+    MRM_E8 = 39,
+    MRM_F0 = 40,
+    MRM_F8 = 41,
+    MRM_F9 = 42,
+    MRM_D0 = 45,
+    MRM_D1 = 46,
+
+    /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+    /// immediates, the first of which is a 16-bit immediate (specified by
+    /// the imm encoding) and the second is a 8-bit fixed value.
+    RawFrmImm8 = 43,
+
+    /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+    /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+    /// the imm encoding) and the second is a 16-bit fixed value.  In the AMD
+    /// manual, this operand is described as pntr16:32 and pntr16:16
+    RawFrmImm16 = 44,
+
+    FormMask       = 63,
+
+    //===------------------------------------------------------------------===//
+    // Actual flags...
+
+    // OpSize - Set if this instruction requires an operand size prefix (0x66),
+    // which most often indicates that the instruction operates on 16 bit data
+    // instead of 32 bit data.
+    OpSize      = 1 << 6,
+
+    // AsSize - Set if this instruction requires an operand size prefix (0x67),
+    // which most often indicates that the instruction address 16 bit address
+    // instead of 32 bit address (or 32 bit address in 64 bit mode).
+    AdSize      = 1 << 7,
+
+    //===------------------------------------------------------------------===//
+    // Op0Mask - There are several prefix bytes that are used to form two byte
+    // opcodes.  These are currently 0x0F, 0xF3, and 0xD8-0xDF.  This mask is
+    // used to obtain the setting of this field.  If no bits in this field is
+    // set, there is no prefix byte for obtaining a multibyte opcode.
+    //
+    Op0Shift    = 8,
+    Op0Mask     = 0x1F << Op0Shift,
+
+    // TB - TwoByte - Set if this instruction has a two byte opcode, which
+    // starts with a 0x0F byte before the real opcode.
+    TB          = 1 << Op0Shift,
+
+    // REP - The 0xF3 prefix byte indicating repetition of the following
+    // instruction.
+    REP         = 2 << Op0Shift,
+
+    // D8-DF - These escape opcodes are used by the floating point unit.  These
+    // values must remain sequential.
+    D8 = 3 << Op0Shift,   D9 = 4 << Op0Shift,
+    DA = 5 << Op0Shift,   DB = 6 << Op0Shift,
+    DC = 7 << Op0Shift,   DD = 8 << Op0Shift,
+    DE = 9 << Op0Shift,   DF = 10 << Op0Shift,
+
+    // XS, XD - These prefix codes are for single and double precision scalar
+    // floating point operations performed in the SSE registers.
+    XD = 11 << Op0Shift,  XS = 12 << Op0Shift,
+
+    // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
+    T8 = 13 << Op0Shift,  TA = 14 << Op0Shift,
+    A6 = 15 << Op0Shift,  A7 = 16 << Op0Shift,
+
+    // TF - Prefix before and after 0x0F
+    TF = 17 << Op0Shift,
+
+    //===------------------------------------------------------------------===//
+    // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+    // They are used to specify GPRs and SSE registers, 64-bit operand size,
+    // etc. We only cares about REX.W and REX.R bits and only the former is
+    // statically determined.
+    //
+    REXShift    = Op0Shift + 5,
+    REX_W       = 1 << REXShift,
+
+    //===------------------------------------------------------------------===//
+    // This three-bit field describes the size of an immediate operand.  Zero is
+    // unused so that we can tell if we forgot to set a value.
+    ImmShift = REXShift + 1,
+    ImmMask    = 7 << ImmShift,
+    Imm8       = 1 << ImmShift,
+    Imm8PCRel  = 2 << ImmShift,
+    Imm16      = 3 << ImmShift,
+    Imm16PCRel = 4 << ImmShift,
+    Imm32      = 5 << ImmShift,
+    Imm32PCRel = 6 << ImmShift,
+    Imm64      = 7 << ImmShift,
+
+    //===------------------------------------------------------------------===//
+    // FP Instruction Classification...  Zero is non-fp instruction.
+
+    // FPTypeMask - Mask for all of the FP types...
+    FPTypeShift = ImmShift + 3,
+    FPTypeMask  = 7 << FPTypeShift,
+
+    // NotFP - The default, set for instructions that do not use FP registers.
+    NotFP      = 0 << FPTypeShift,
+
+    // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+    ZeroArgFP  = 1 << FPTypeShift,
+
+    // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+    OneArgFP   = 2 << FPTypeShift,
+
+    // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+    // result back to ST(0).  For example, fcos, fsqrt, etc.
+    //
+    OneArgFPRW = 3 << FPTypeShift,
+
+    // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+    // explicit argument, storing the result to either ST(0) or the implicit
+    // argument.  For example: fadd, fsub, fmul, etc...
+    TwoArgFP   = 4 << FPTypeShift,
+
+    // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+    // explicit argument, but have no destination.  Example: fucom, fucomi, ...
+    CompareFP  = 5 << FPTypeShift,
+
+    // CondMovFP - "2 operand" floating point conditional move instructions.
+    CondMovFP  = 6 << FPTypeShift,
+
+    // SpecialFP - Special instruction forms.  Dispatch by opcode explicitly.
+    SpecialFP  = 7 << FPTypeShift,
+
+    // Lock prefix
+    LOCKShift = FPTypeShift + 3,
+    LOCK = 1 << LOCKShift,
+
+    // Segment override prefixes. Currently we just need ability to address
+    // stuff in gs and fs segments.
+    SegOvrShift = LOCKShift + 1,
+    SegOvrMask  = 3 << SegOvrShift,
+    FS          = 1 << SegOvrShift,
+    GS          = 2 << SegOvrShift,
+
+    // Execution domain for SSE instructions in bits 23, 24.
+    // 0 in bits 23-24 means normal, non-SSE instruction.
+    SSEDomainShift = SegOvrShift + 2,
+
+    OpcodeShift   = SSEDomainShift + 2,
+
+    //===------------------------------------------------------------------===//
+    /// VEX - The opcode prefix used by AVX instructions
+    VEXShift = OpcodeShift + 8,
+    VEX         = 1U << 0,
+
+    /// VEX_W - Has a opcode specific functionality, but is used in the same
+    /// way as REX_W is for regular SSE instructions.
+    VEX_W       = 1U << 1,
+
+    /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+    /// address instructions in SSE are represented as 3 address ones in AVX
+    /// and the additional register is encoded in VEX_VVVV prefix.
+    VEX_4V      = 1U << 2,
+
+    /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+    /// must be encoded in the i8 immediate field. This usually happens in
+    /// instructions with 4 operands.
+    VEX_I8IMM   = 1U << 3,
+
+    /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+    /// instruction uses 256-bit wide registers. This is usually auto detected
+    /// if a VR256 register is used, but some AVX instructions also have this
+    /// field marked when using a f256 memory references.
+    VEX_L       = 1U << 4,
+
+    /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+    /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
+    /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+    /// storing a classifier in the imm8 field.  To simplify our implementation,
+    /// we handle this by storeing the classifier in the opcode field and using
+    /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+    Has3DNow0F0FOpcode = 1U << 5
+  };
+
+  // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+  // specified machine instruction.
+  //
+  static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+    return TSFlags >> X86II::OpcodeShift;
+  }
+
+  static inline bool hasImm(uint64_t TSFlags) {
+    return (TSFlags & X86II::ImmMask) != 0;
+  }
+
+  /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+  /// of the specified instruction.
+  static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+    switch (TSFlags & X86II::ImmMask) {
+    default: assert(0 && "Unknown immediate size");
+    case X86II::Imm8:
+    case X86II::Imm8PCRel:  return 1;
+    case X86II::Imm16:
+    case X86II::Imm16PCRel: return 2;
+    case X86II::Imm32:
+    case X86II::Imm32PCRel: return 4;
+    case X86II::Imm64:      return 8;
+    }
+  }
+
+  /// isImmPCRel - Return true if the immediate of the specified instruction's
+  /// TSFlags indicates that it is pc relative.
+  static inline unsigned isImmPCRel(uint64_t TSFlags) {
+    switch (TSFlags & X86II::ImmMask) {
+    default: assert(0 && "Unknown immediate size");
+    case X86II::Imm8PCRel:
+    case X86II::Imm16PCRel:
+    case X86II::Imm32PCRel:
+      return true;
+    case X86II::Imm8:
+    case X86II::Imm16:
+    case X86II::Imm32:
+    case X86II::Imm64:
+      return false;
+    }
+  }
+
+  /// getMemoryOperandNo - The function returns the MCInst operand # for the
+  /// first field of the memory operand.  If the instruction doesn't have a
+  /// memory operand, this returns -1.
+  ///
+  /// Note that this ignores tied operands.  If there is a tied register which
+  /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+  /// counted as one operand.
+  ///
+  static inline int getMemoryOperandNo(uint64_t TSFlags) {
+    switch (TSFlags & X86II::FormMask) {
+    case X86II::MRMInitReg:  assert(0 && "FIXME: Remove this form");
+    default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+    case X86II::Pseudo:
+    case X86II::RawFrm:
+    case X86II::AddRegFrm:
+    case X86II::MRMDestReg:
+    case X86II::MRMSrcReg:
+    case X86II::RawFrmImm8:
+    case X86II::RawFrmImm16:
+       return -1;
+    case X86II::MRMDestMem:
+      return 0;
+    case X86II::MRMSrcMem: {
+      bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+      unsigned FirstMemOp = 1;
+      if (HasVEX_4V)
+        ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+      // FIXME: Maybe lea should have its own form?  This is a horrible hack.
+      //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+      //    Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+      return FirstMemOp;
+    }
+    case X86II::MRM0r: case X86II::MRM1r:
+    case X86II::MRM2r: case X86II::MRM3r:
+    case X86II::MRM4r: case X86II::MRM5r:
+    case X86II::MRM6r: case X86II::MRM7r:
+      return -1;
+    case X86II::MRM0m: case X86II::MRM1m:
+    case X86II::MRM2m: case X86II::MRM3m:
+    case X86II::MRM4m: case X86II::MRM5m:
+    case X86II::MRM6m: case X86II::MRM7m:
+      return 0;
+    case X86II::MRM_C1:
+    case X86II::MRM_C2:
+    case X86II::MRM_C3:
+    case X86II::MRM_C4:
+    case X86II::MRM_C8:
+    case X86II::MRM_C9:
+    case X86II::MRM_E8:
+    case X86II::MRM_F0:
+    case X86II::MRM_F8:
+    case X86II::MRM_F9:
+    case X86II::MRM_D0:
+    case X86II::MRM_D1:
+      return -1;
+    }
+  }
+}
+
+inline static bool isScale(const MachineOperand &MO) {
+  return MO.isImm() &&
+    (MO.getImm() == 1 || MO.getImm() == 2 ||
+     MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) {
+  if (MI->getOperand(Op).isFI()) return true;
+  return Op+4 <= MI->getNumOperands() &&
+    MI->getOperand(Op  ).isReg() && isScale(MI->getOperand(Op+1)) &&
+    MI->getOperand(Op+2).isReg() &&
+    (MI->getOperand(Op+3).isImm() ||
+     MI->getOperand(Op+3).isGlobal() ||
+     MI->getOperand(Op+3).isCPI() ||
+     MI->getOperand(Op+3).isJTI());
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+  if (MI->getOperand(Op).isFI()) return true;
+  return Op+5 <= MI->getNumOperands() &&
+    MI->getOperand(Op+4).isReg() &&
+    isLeaMem(MI, Op);
+}
+
+class X86InstrInfo : public X86GenInstrInfo {
+  X86TargetMachine &TM;
+  const X86RegisterInfo RI;
+
+  /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+  /// RegOp2MemOpTable2 - Load / store folding opcode maps.
+  ///
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+
+  /// MemOp2RegOpTable - Load / store unfolding opcode map.
+  ///
+  DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+
+public:
+  explicit X86InstrInfo(X86TargetMachine &tm);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+  /// extension instruction. That is, it's like a copy where it's legal for the
+  /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+  /// true, then it's expected the pre-extension value is available as a subreg
+  /// of the result register. This also returns the sub-register index in
+  /// SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+                                     unsigned &SrcReg, unsigned &DstReg,
+                                     unsigned &SubIdx) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                     int &FrameIndex) const;
+
+  /// hasLoadFromStackSlot - If the specified machine instruction has
+  /// a load from a stack slot, return true along with the FrameIndex
+  /// of the loaded stack slot and the machine mem operand containing
+  /// the reference.  If not, return false.  Unlike
+  /// isLoadFromStackSlot, this returns true for any instructions that
+  /// loads from the stack.  This is a hint only and may not catch all
+  /// cases.
+  bool hasLoadFromStackSlot(const MachineInstr *MI,
+                            const MachineMemOperand *&MMO,
+                            int &FrameIndex) const;
+
+  unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                    int &FrameIndex) const;
+
+  /// hasStoreToStackSlot - If the specified machine instruction has a
+  /// store to a stack slot, return true along with the FrameIndex of
+  /// the loaded stack slot and the machine mem operand containing the
+  /// reference.  If not, return false.  Unlike isStoreToStackSlot,
+  /// this returns true for any instructions that loads from the
+  /// stack.  This is a hint only and may not catch all cases.
+  bool hasStoreToStackSlot(const MachineInstr *MI,
+                           const MachineMemOperand *&MMO,
+                           int &FrameIndex) const;
+
+  bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const;
+  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                     unsigned DestReg, unsigned SubIdx,
+                     const MachineInstr *Orig,
+                     const TargetRegisterInfo &TRI) const;
+
+  /// convertToThreeAddress - This method must be implemented by targets that
+  /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
+  /// may be able to convert a two-address instruction into a true
+  /// three-address instruction on demand.  This allows the X86 target (for
+  /// example) to convert ADD and SHL instructions into LEA instructions if they
+  /// would require register copies due to two-addressness.
+  ///
+  /// This method returns a null pointer if the transformation cannot be
+  /// performed, otherwise it returns the new instruction.
+  ///
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  /// commuteInstruction - We have a few instructions that must be hacked on to
+  /// commute them.
+  ///
+  virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+  // Branch analysis.
+  virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              MachineInstr::mmo_iterator MMOBegin,
+                              MachineInstr::mmo_iterator MMOEnd,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               MachineInstr::mmo_iterator MMOBegin,
+                               MachineInstr::mmo_iterator MMOEnd,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  virtual
+  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                         int FrameIx, uint64_t Offset,
+                                         const MDNode *MDPtr,
+                                         DebugLoc DL) const;
+
+  /// foldMemoryOperand - If this target supports it, fold a load or store of
+  /// the specified stack slot into the specified machine instruction for the
+  /// specified operand(s).  If this is possible, the target should perform the
+  /// folding and return true, otherwise it should return false.  If it folds
+  /// the instruction, it is likely that the MachineInstruction the iterator
+  /// references has been changed.
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+
+  /// foldMemoryOperand - Same as the previous version except it allows folding
+  /// of any load and store from / to any address, not just from a specific
+  /// stack slot.
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const;
+
+  /// canFoldMemoryOperand - Returns true if the specified load / store is
+  /// folding is possible.
+  virtual bool canFoldMemoryOperand(const MachineInstr*,
+                                    const SmallVectorImpl<unsigned> &) const;
+
+  /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+  /// a store or a load and a store into two or more instruction. If this is
+  /// possible, returns true as well as the new instructions by reference.
+  virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                           unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                           SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                           SmallVectorImpl<SDNode*> &NewNodes) const;
+
+  /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+  /// instruction after load / store are unfolded from an instruction of the
+  /// specified opcode. It returns zero if the specified unfolding is not
+  /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+  /// index of the operand which will hold the register holding the loaded
+  /// value.
+  virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex = 0) const;
+
+  /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
+  /// to determine if two loads are loading from the same base address. It
+  /// should only return true if the base pointers are the same and the
+  /// only differences between the two addresses are the offset. It also returns
+  /// the offsets by reference.
+  virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                       int64_t &Offset1, int64_t &Offset2) const;
+
+  /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
+  /// be scheduled togther. On some targets if two loads are loading from
+  /// addresses in the same cache line, it's better if they are scheduled
+  /// together. This function takes two integers that represent the load offsets
+  /// from the common base address. It returns true if it decides it's desirable
+  /// to schedule the two loads together. "NumLoads" is the number of loads that
+  /// have already been scheduled after Load1.
+  virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                       int64_t Offset1, int64_t Offset2,
+                                       unsigned NumLoads) const;
+
+  virtual void getNoopForMachoTarget(MCInst &NopInst) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+  /// instruction that defines the specified register class.
+  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+  static bool isX86_64NonExtLowByteReg(unsigned reg) {
+    return (reg == X86::SPL || reg == X86::BPL ||
+          reg == X86::SIL || reg == X86::DIL);
+  }
+
+  static bool isX86_64ExtendedReg(const MachineOperand &MO) {
+    if (!MO.isReg()) return false;
+    return isX86_64ExtendedReg(MO.getReg());
+  }
+
+  /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+  /// higher) register?  e.g. r8, xmm8, xmm13, etc.
+  static bool isX86_64ExtendedReg(unsigned RegNo);
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+  /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+  /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+  std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+  /// SetSSEDomain - Set the SSEDomain of MI.
+  void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      unsigned OpNum,
+                                      const SmallVectorImpl<MachineOperand> &MOs,
+                                      unsigned Size, unsigned Alignment) const;
+
+  bool isHighLatencyDef(int opc) const;
+
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const;
+
+private:
+  MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+                                              MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  /// isFrameOperand - Return true and the FrameIndex if the specified
+  /// operand and follow operands form a reference to the stack frame.
+  bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+                      int &FrameIndex) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 000000000000..7eb07b0a97bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,1648 @@
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                   SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
+
+def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+
+def SDTX86Cmov    : SDTypeProfile<1, 4,
+                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+                                   SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+                                           [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+                                            [SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+                                            [SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>, SDTCisVT<1, i32>]>;
+def SDTX86BrCond  : SDTypeProfile<0, 3,
+                                  [SDTCisVT<0, OtherVT>,
+                                   SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC   : SDTypeProfile<1, 2,
+                                  [SDTCisVT<0, i8>,
+                                   SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86SetCC_C : SDTypeProfile<1, 2,
+                                  [SDTCisInt<0>,
+                                   SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+                                     SDTCisVT<2, i8>]>;
+def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+                                SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
+def SDTX86Ret     : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_X86CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32>]>;
+
+def SDT_X86Call   : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+                                                         SDTCisVT<1, iPTR>,
+                                                         SDTCisVT<2, iPTR>]>;
+
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+                                            SDTCisPtrTy<1>,
+                                            SDTCisVT<2, i32>,
+                                            SDTCisVT<3, i8>,
+                                            SDTCisVT<4, i32>]>;
+
+def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86Void    : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+                            [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+                                [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+                        [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+                        [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+                        [SDNPHasChain]>;
+
+
+def X86bsf     : SDNode<"X86ISD::BSF",      SDTUnaryArithWithFlags>;
+def X86bsr     : SDNode<"X86ISD::BSR",      SDTUnaryArithWithFlags>;
+def X86shld    : SDNode<"X86ISD::SHLD",     SDTIntShiftDOp>;
+def X86shrd    : SDNode<"X86ISD::SHRD",     SDTIntShiftDOp>;
+
+def X86cmp     : SDNode<"X86ISD::CMP" ,     SDTX86CmpTest>;
+def X86bt      : SDNode<"X86ISD::BT",       SDTX86CmpTest>;
+
+def X86cmov    : SDNode<"X86ISD::CMOV",     SDTX86Cmov>;
+def X86brcond  : SDNode<"X86ISD::BRCOND",   SDTX86BrCond,
+                        [SDNPHasChain]>;
+def X86setcc   : SDNode<"X86ISD::SETCC",    SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86vastart_save_xmm_regs :
+                 SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+                        SDT_X86VASTART_SAVE_XMM_REGS,
+                        [SDNPHasChain, SDNPVariadic]>;
+def X86vaarg64 :
+                 SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+                         SDNPMemOperand]>;
+def X86callseq_start :
+                 SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+                        [SDNPHasChain, SDNPOutGlue]>;
+def X86callseq_end :
+                 SDNode<"ISD::CALLSEQ_END",   SDT_X86CallSeqEnd,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86call    : SDNode<"X86ISD::CALL",     SDT_X86Call,
+                        [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                         SDNPVariadic]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad]>;
+
+def X86rdtsc   : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
+                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP",  SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+                        [SDNPHasChain]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86sub_flag  : SDNode<"X86ISD::SUB",  SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86adc_flag  : SDNode<"X86ISD::ADC",  SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag  : SDNode<"X86ISD::SBB",  SDTBinaryArithWithFlagsInOut>;
+
+def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
+def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
+def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+                          [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+def X86MemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let SuperClasses = [];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+  let Name = "AbsMem";
+  let SuperClasses = [X86MemAsmOperand];
+}
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+  let PrintMethod = printMethod;
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+let OperandType = "OPERAND_MEMORY" in {
+def opaque32mem : X86MemOperand<"printopaquemem">;
+def opaque48mem : X86MemOperand<"printopaquemem">;
+def opaque80mem : X86MemOperand<"printopaquemem">;
+def opaque512mem : X86MemOperand<"printopaquemem">;
+
+def i8mem   : X86MemOperand<"printi8mem">;
+def i16mem  : X86MemOperand<"printi16mem">;
+def i32mem  : X86MemOperand<"printi32mem">;
+def i64mem  : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
+def i256mem : X86MemOperand<"printi256mem">;
+def f32mem  : X86MemOperand<"printf32mem">;
+def f64mem  : X86MemOperand<"printf64mem">;
+def f80mem  : X86MemOperand<"printf80mem">;
+def f128mem : X86MemOperand<"printf128mem">;
+def f256mem : X86MemOperand<"printf256mem">;
+}
+
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+  let PrintMethod = "printi8mem";
+  let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
+// Special i32mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i32mem_TC : Operand<i32> {
+  let PrintMethod = "printi32mem";
+  let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+  let PrintMethod = "printi64mem";
+  let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+                       ptr_rc_tailcall, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+
+let OperandType = "OPERAND_PCREL",
+    ParserMatchClass = X86AbsMemAsmOperand,
+    PrintMethod = "print_pcrel_imm" in {
+def i32imm_pcrel : Operand<i32>;
+def i16imm_pcrel : Operand<i16>;
+
+def offset8 : Operand<i64>;
+def offset16 : Operand<i64>;
+def offset32 : Operand<i64>;
+def offset64 : Operand<i64>;
+
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
+}
+
+def SSECC : Operand<i8> {
+  let PrintMethod = "printSSECC";
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+class ImmSExtAsmOperandClass : AsmOperandClass {
+  let SuperClasses = [ImmAsmOperand];
+  let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF]                                            |
+//   [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+//   [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti16i8";
+  let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+//   [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F]                                            |
+//   [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti64i8";
+  let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+                      ImmSExti64i32AsmOperand];
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm  : Operand<i16> {
+  let ParserMatchClass = ImmSExti16i8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+// 32-bits but only 8 bits are significant.
+def i32i8imm  : Operand<i32> {
+  let ParserMatchClass = ImmSExti32i8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm  : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i32AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+  let PrintMethod = "print_pcrel_imm";
+  let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm   : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+  let PrintMethod = "printi32mem";
+  let AsmOperandLowerMethod = "lower_lea64_32mem";
+  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+                               [add, sub, mul, X86mul_imm, shl, or, frameindex],
+                               []>;
+def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+                        [add, sub, mul, X86mul_imm, shl, or, frameindex,
+                         X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasCMov      : Predicate<"Subtarget->hasCMov()">;
+def NoCMov       : Predicate<"!Subtarget->hasCMov()">;
+
+def HasMMX       : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow     : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA    : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1      : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2      : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3      : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3     : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41     : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42     : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">;
+
+def HasAVX       : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt    : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasAES       : Predicate<"Subtarget->hasAES()">;
+def HasCLMUL     : Predicate<"Subtarget->hasCLMUL()">;
+def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
+def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
+def In32BitMode  : Predicate<"!Subtarget->is64Bit()">,
+                             AssemblerPredicate<"!Mode64Bit">;
+def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
+                             AssemblerPredicate<"Mode64Bit">;
+def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
+def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def KernelCode   : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def FarData      : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
+                             "TM.getCodeModel() != CodeModel::Kernel">;
+def NearData     : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+                             "TM.getCodeModel() == CodeModel::Kernel">;
+def IsStatic     : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def IsNotPIC     : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
+def OptForSize   : Predicate<"OptForSize">;
+def OptForSpeed  : Predicate<"!OptForSize">;
+def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
+def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+include "X86InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments.
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A   : PatLeaf<(i8 0)>;  // alt. COND_NBE
+def X86_COND_AE  : PatLeaf<(i8 1)>;  // alt. COND_NC
+def X86_COND_B   : PatLeaf<(i8 2)>;  // alt. COND_C
+def X86_COND_BE  : PatLeaf<(i8 3)>;  // alt. COND_NA
+def X86_COND_E   : PatLeaf<(i8 4)>;  // alt. COND_Z
+def X86_COND_G   : PatLeaf<(i8 5)>;  // alt. COND_NLE
+def X86_COND_GE  : PatLeaf<(i8 6)>;  // alt. COND_NL
+def X86_COND_L   : PatLeaf<(i8 7)>;  // alt. COND_NGE
+def X86_COND_LE  : PatLeaf<(i8 8)>;  // alt. COND_NG
+def X86_COND_NE  : PatLeaf<(i8 9)>;  // alt. COND_NZ
+def X86_COND_NO  : PatLeaf<(i8 10)>;
+def X86_COND_NP  : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_NS  : PatLeaf<(i8 12)>;
+def X86_COND_O   : PatLeaf<(i8 13)>;
+def X86_COND_P   : PatLeaf<(i8 14)>; // alt. COND_PE
+def X86_COND_S   : PatLeaf<(i8 15)>;
+
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+  def i16immSExt8  : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+  def i32immSExt8  : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+  def i64immSExt8  : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
+
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
+
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+  return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
+}]>;
+
+// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType == ISD::NON_EXTLOAD)
+    return true;
+  if (ExtType == ISD::EXTLOAD)
+    return LD->getAlignment() >= 2 && !LD->isVolatile();
+  return false;
+}]>;
+
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType == ISD::EXTLOAD)
+    return LD->getAlignment() >= 2 && !LD->isVolatile();
+  return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType == ISD::NON_EXTLOAD)
+    return true;
+  if (ExtType == ISD::EXTLOAD)
+    return LD->getAlignment() >= 4 && !LD->isVolatile();
+  return false;
+}]>;
+
+def loadi8  : PatFrag<(ops node:$ptr), (i8  (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+
+def sextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi8i1   : PatFrag<(ops node:$ptr), (i8  (zextloadi1 node:$ptr))>;
+def zextloadi16i1  : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1  : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1  : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi8i1    : PatFrag<(ops node:$ptr), (i8  (extloadi1 node:$ptr))>;
+def extloadi16i1   : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1   : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8   : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1   : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+  return N->hasOneUse();
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list.
+//
+
+// Nop
+let neverHasSideEffects = 1 in {
+  def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+  def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
+                "nop{w}\t$zero", []>, TB, OpSize;
+  def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
+                "nop{l}\t$zero", []>, TB;
+}
+
+
+// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+                 "enter\t$len, $lvl", []>;
+
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+def LEAVE    : I<0xC9, RawFrm,
+                 (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
+
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64  : I<0xC9, RawFrm,
+                 (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP16r  : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+  OpSize;
+def POP32r  : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+  OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
+  OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+               Requires<[In32BitMode]>;
+}
+
+let mayStore = 1 in {
+def PUSH16r  : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+  OpSize;
+def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+  OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
+  OpSize;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
+
+def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+                      "push{l}\t$imm", []>;
+def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+                      "push{w}\t$imm", []>, OpSize;
+def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+                      "push{l}\t$imm", []>;
+
+def PUSHF16  : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32  : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+               Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r   : I<0x58, AddRegFrm,
+                 (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r  : I<0x50, AddRegFrm,
+                 (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
+                     "push{q}\t$imm", []>;
+def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+                      "push{q}\t$imm", []>;
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+                      "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+               Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+                 Requires<[In64BitMode]>;
+
+
+
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+    mayLoad=1, neverHasSideEffects=1 in {
+def POPA32   : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+               Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+    mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32  : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+               Requires<[In32BitMode]>;
+}
+
+let Constraints = "$src = $dst" in {    // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+                 (outs GR32:$dst), (ins GR32:$src),
+                 "bswap{l}\t$dst",
+                 [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+                  "bswap{q}\t$dst",
+                  [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                 "bsf{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize;
+def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                 "bsf{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB,
+                 OpSize;
+def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                 "bsf{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
+def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                 "bsf{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
+
+def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                 "bsr{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize;
+def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                 "bsr{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB,
+                 OpSize;
+def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                 "bsr{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
+def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                 "bsr{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
+} // Defs = [EFLAGS]
+
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+}
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
+let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
+let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
+
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
+
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+
+let neverHasSideEffects = 1 in {
+def MOV8rr  : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+                "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+                   "mov{b}\t{$src, $dst|$dst, $src}",
+                   [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+                   "mov{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, imm:$src)]>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+                   "mov{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+                    "movabs{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(set GR64:$dst, i64immSExt32:$src)]>;
+}
+
+def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+                   "mov{b}\t{$src, $dst|$dst, $src}",
+                   [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+                   "mov{w}\t{$src, $dst|$dst, $src}",
+                   [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+                   "mov{l}\t{$src, $dst|$dst, $src}",
+                   [(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(store i64immSExt32:$src, addr:$dst)]>;
+
+/// moffs8, moffs16 and moffs32 versions of moves.  The immediate is a
+/// 32-bit offset from the PC.  These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
+                   "mov{b}\t{$src, %al|%al, $src}", []>,
+                   Requires<[In32BitMode]>;
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
+                      "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+                     Requires<[In32BitMode]>;
+def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
+                      "mov{l}\t{$src, %eax|%eax, $src}", []>,
+                     Requires<[In32BitMode]>;
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+                   "mov{b}\t{%al, $dst|$dst, %al}", []>,
+                  Requires<[In32BitMode]>;
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+                      "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+                     Requires<[In32BitMode]>;
+def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
+                      "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+                     Requires<[In32BitMode]>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+                      "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
+
+let isCodeGenOnly = 1 in {
+def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
+                   "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                    "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                    "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                     "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+                "mov{b}\t{$src, $dst|$dst, $src}",
+                [(set GR8:$dst, (loadi8 addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}",
+                [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}",
+                [(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(set GR64:$dst, (load addr:$src))]>;
+}
+
+def MOV8mr  : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+                "mov{b}\t{$src, $dst|$dst, $src}",
+                [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}",
+                [(store GR16:$src, addr:$dst)]>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}",
+                [(store GR32:$src, addr:$dst)]>;
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(store GR64:$src, addr:$dst)]>;
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
+let neverHasSideEffects = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+                     (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
+let mayStore = 1 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+                     (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
+let mayLoad = 1,
+    canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+                     (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
+}
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
+def SAHF     : I<0x9E, RawFrm, (outs),  (ins), "sahf", []>;  // flags = AH
+let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>;  // AH = flags
+
+
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
+
+let Defs = [EFLAGS] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+               "bt{w}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+               "bt{l}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Make these instructions disassembly
+// only for now.
+
+def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+               "bt{w}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi16 addr:$src1), GR16:$src2),
+//                (implicit EFLAGS)]
+               []
+               >, OpSize, TB, Requires<[FastBTMem]>;
+def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+               "bt{l}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi32 addr:$src1), GR32:$src2),
+//                (implicit EFLAGS)]
+               []
+               >, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi64 addr:$src1), GR64:$src2),
+//                (implicit EFLAGS)]
+                []
+                >, TB;
+
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                "bt{w}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+                OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                "bt{l}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                "bt{w}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
+                 ]>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                "bt{l}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
+                 ]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+                                     i64immSExt8:$src2))]>, TB;
+
+
+def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+                "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+                "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+                "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                    "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                    "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                    "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                    "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+                "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+                "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+                "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                    "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                    "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                    "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                    "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+                "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+                "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+                "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                    "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                    "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                    "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                    "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // Defs = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+let Constraints = "$val = $dst" in {
+def XCHG8rm  : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+               "xchg{b}\t{$val, $ptr|$ptr, $val}",
+               [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
+               "xchg{w}\t{$val, $ptr|$ptr, $val}",
+               [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+                OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
+               "xchg{l}\t{$val, $ptr|$ptr, $val}",
+               [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
+                  "xchg{q}\t{$val, $ptr|$ptr, $val}",
+                  [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
+
+def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
+                "xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+                 "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+                 "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+                  "xchg{q}\t{$val, $src|$src, $val}", []>;
+}
+
+def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
+                  "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
+                  "xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+                  "xchg{q}\t{$src, %rax|%rax, $src}", []>;
+
+
+
+def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+                "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                 "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                 "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+                 "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rm  : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+                 "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rm  : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+}
+
+def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+                   "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                    "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                     "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+                     "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rm  : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+                     "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                     "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+}
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
+def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
+                  "cmpxchg8b\t$dst", []>, TB;
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+                    "cmpxchg16b\t$dst", []>, TB;
+
+
+
+// Lock instruction prefix
+def LOCK_PREFIX : I<0xF0, RawFrm, (outs),  (ins), "lock", []>;
+
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs),  (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs),  (ins), "data16", []>;
+
+// Repeat string operation instruction prefixes
+// These uses the DF flag in the EFLAGS register to inc or dec ECX
+let Defs = [ECX], Uses = [ECX,EFLAGS] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, RawFrm, (outs),  (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
+}
+
+
+// String manipulation instructions
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
+
+def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
+def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
+def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
+
+
+// Flag instructions
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
+def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>;
+def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
+
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
+
+// Table lookup instructions
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
+
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+                 "aad\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+                 "aam\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
+
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+                   Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>,
+                   Requires<[In32BitMode]>;
+
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Subsystems.
+//===----------------------------------------------------------------------===//
+
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
+
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
+
+// SIMD support (SSE, MMX and AVX)
+include "X86InstrFragmentsSIMD.td"
+
+// FMA - Fused Multiply-Add support (requires FMA)
+include "X86InstrFMA.td"
+
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler Mnemonic Aliases
+//===----------------------------------------------------------------------===//
+
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw",  "cbtw">;
+def : MnemonicAlias<"cwd",  "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cdqe", "cltq">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd",  "popfl">;
+
+// FIXME: This is wrong for "push reg".  "push %bx" should turn into pushw in
+// all modes.  However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode.  Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz",   "fcmove">;
+def : MnemonicAlias<"fcmova",   "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna",  "fcmovbe">;
+def : MnemonicAlias<"fcmovae",  "fcmovnb">;
+def : MnemonicAlias<"fcomip",   "fcompi">;
+def : MnemonicAlias<"fildq",    "fildll">;
+def : MnemonicAlias<"fldcww",   "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip",  "fucompi">;
+def : MnemonicAlias<"fwait",    "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+  : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+                  !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+  def C   : CondCodeAlias<Prefix, Suffix, "c",   "b">;   // setc   -> setb
+  def Z   : CondCodeAlias<Prefix, Suffix, "z" ,  "e">;   // setz   -> sete
+  def NA  : CondCodeAlias<Prefix, Suffix, "na",  "be">;  // setna  -> setbe
+  def NB  : CondCodeAlias<Prefix, Suffix, "nb",  "ae">;  // setnb  -> setae
+  def NC  : CondCodeAlias<Prefix, Suffix, "nc",  "ae">;  // setnc  -> setae
+  def NG  : CondCodeAlias<Prefix, Suffix, "ng",  "le">;  // setng  -> setle
+  def NL  : CondCodeAlias<Prefix, Suffix, "nl",  "ge">;  // setnl  -> setge
+  def NZ  : CondCodeAlias<Prefix, Suffix, "nz",  "ne">;  // setnz  -> setne
+  def PE  : CondCodeAlias<Prefix, Suffix, "pe",  "p">;   // setpe  -> setp
+  def PO  : CondCodeAlias<Prefix, Suffix, "po",  "np">;  // setpo  -> setnp
+
+  def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">;   // setnae -> setb
+  def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">;   // setnbe -> seta
+  def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">;   // setnge -> setl
+  def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">;   // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler Instruction Aliases
+//===----------------------------------------------------------------------===//
+
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al",  (DIV8r  GR8 :$src)>;
+def : InstAlias<"divw $src, %ax",  (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al",  (DIV8m  i8mem :$src)>;
+def : InstAlias<"divw $src, %ax",  (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al",  (IDIV8r  GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax",  (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al",  (IDIV8m  i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax",  (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp",        (ADD_FPrST0  ST1), 0>;
+def : InstAlias<"fsubp",        (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp",       (SUB_FPrST0  ST1)>;
+def : InstAlias<"fmulp",        (MUL_FPrST0  ST1)>;
+def : InstAlias<"fdivp",        (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp",       (DIV_FPrST0  ST1)>;
+def : InstAlias<"fxch",         (XCH_F       ST1)>;
+def : InstAlias<"fcomi",        (COM_FIr     ST1)>;
+def : InstAlias<"fcompi",       (COM_FIPr    ST1)>;
+def : InstAlias<"fucom",        (UCOM_Fr     ST1)>;
+def : InstAlias<"fucomp",       (UCOM_FPr    ST1)>;
+def : InstAlias<"fucomi",       (UCOM_FIr    ST1)>;
+def : InstAlias<"fucompi",      (UCOM_FIPr   ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)".  We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+                 (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+                 (Inst ST0), EmitAlias>;
+}
+
+defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
+defm : FpUnaryAlias<"faddp",  ADD_FPrST0, 0>;
+defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp",  SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul",   MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp",  MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp",  DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi",   COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi",  UCOM_FIr, 0>;
+defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
+defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubp %st(0), $op", (SUBR_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw"     , (FNSTSW8r)>;
+
+// lcall and ljmp aliases.  This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst",      (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst",       (FARJMP32m  opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri  GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri  GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp.  jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off",  (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off",   (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off",  (FARJMP16i  i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move.  All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst",
+                (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+                (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, 1
+def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
+def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
+
+def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+
+/*  FIXME: This is disabled because the asm matcher is currently incapable of
+ *  matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm  GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm  GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 000000000000..b2d9fca97b02
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,454 @@
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+  multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+                               bit Commutable = 0> {
+    def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                 (ins VR64:$src1, VR64:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+      let isCommutable = Commutable;
+    }
+    def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                 (ins VR64:$src1, i64mem:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1,
+                                   (bitconvert (load_mmx addr:$src2))))]>;
+  }
+
+  multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+                                string OpcodeStr, Intrinsic IntId,
+                                Intrinsic IntId2> {
+    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                                  (ins VR64:$src1, VR64:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                                  (ins VR64:$src1, i64mem:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1,
+                                    (bitconvert (load_mmx addr:$src2))))]>;
+    def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+                                   (ins VR64:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
+  }
+}
+
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                               Intrinsic IntId64> {
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst,
+                     (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                             Intrinsic IntId64> {
+  let isCommutable = 0 in
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+       (ins VR64:$src1, VR64:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+       (ins VR64:$src1, i64mem:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst,
+         (IntId64 VR64:$src1,
+          (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+  def R64irr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+      (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 
+      [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+  def R64irm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+      (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1,
+                       (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+                   (ins DstRC:$src1, x86memop:$src2), asm,
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS Instruction
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms",
+                     [(int_x86_mmx_emms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst, 
+                         (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+              [(set VR64:$dst,
+               (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+                        "movd\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
+                        "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+                             []>;
+
+// These are 64 bit moves, but since the OS X assembler doesn't
+// recognize a register-register movq, we write them as
+// movd.
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
+                               (outs GR64:$dst), (ins VR64:$src),
+                               "movd\t{$src, $dst|$dst, $src}", 
+                             [(set GR64:$dst,
+                              (bitconvert VR64:$src))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+                             [(set VR64:$dst,
+                              (bitconvert GR64:$src))]>;
+let neverHasSideEffects = 1 in
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst, (load_mmx addr:$src))]>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (x86mmx VR64:$src), addr:$dst)]>;
+
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+                          "movdq2q\t{$src, $dst|$dst, $src}",
+                          [(set VR64:$dst,
+                            (x86mmx (bitconvert
+                            (i64 (vector_extract (v2i64 VR128:$src),
+                                  (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+                           "movq2dq\t{$src, $dst|$dst, $src}",
+          [(set VR128:$dst,
+            (v2i64 (scalar_to_vector
+                              (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
+                           "movq2dq\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+                           "movdq2q\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                         "movntq\t{$src, $dst|$dst, $src}",
+                         [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+              [(set VR64:$dst,
+                    (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
+                           (ins i32mem:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+          [(set VR64:$dst,
+                (x86mmx (X86vzmovl (x86mmx
+                                   (scalar_to_vector (loadi32 addr:$src))))))]>;
+
+// Arithmetic Instructions
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
+defm MMX_PADDSB  : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
+defm MMX_PADDSW  : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+
+defm MMX_PHADDW  : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD   : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
+
+defm MMX_PSUBSB  : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
+defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+
+defm MMX_PHSUBW  : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD  : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
+// -- Multiplication
+defm MMX_PMULLW  : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
+
+defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw",  int_x86_mmx_pmulh_w,  1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+                                     int_x86_ssse3_pmul_hr_sw>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+                                     int_x86_ssse3_pmadd_ub_sw>;
+defm MMX_PAVGB   : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
+defm MMX_PAVGW   : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
+
+defm MMX_PMINUB  : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
+defm MMX_PMINSW  : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
+
+defm MMX_PMAXUB  : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
+defm MMX_PMAXSW  : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
+
+defm MMX_PSADBW  : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
+
+defm MMX_PSIGNB :  SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW :  SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND :  SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+  defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR  : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,  1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn>;
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+                                    int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+                                    int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+                                    int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+                                    int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+                                    int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+                                    int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+                                    int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+                                    int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+
+// -- Unpack Instructions
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", 
+                                       int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", 
+                                       int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", 
+                                       int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", 
+                                       int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", 
+                                       int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+                                       int_x86_mmx_punpckldq>;
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+
+// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+                          (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+                          (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+                                                   imm:$src2))]>;
+
+
+
+
+
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+                         SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+  defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+                         int_x86_sse_cvtpi2ps,
+                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+                           (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
+                           "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                           [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+                                             (iPTR imm:$src2)))]>;
+let Constraints = "$src1 = $dst" in {
+  def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
+                      (outs VR64:$dst), 
+                      (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
+                      "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                        GR32:$src2, (iPTR imm:$src3)))]>;
+
+  def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
+                     (outs VR64:$dst),
+                     (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
+                     "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                         (i32 (anyext (loadi16 addr:$src2))),
+                                       (iPTR imm:$src3)))]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+                          "pmovmskb\t{$src, $dst|$dst, $src}",
+                          [(set GR32:$dst, 
+                                (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
+// MMX to XMM for vector types
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
+          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
+          (v2i64 (MOVQI2PQIrm addr:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq 
+                    (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+          (v2i64 (MOVDI2PDIrm addr:$src))>;
+
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+          (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+          (x86mmx (MMX_MOVQ64rm addr:$src))>;
+
+// Misc.
+let Uses = [EDI] in
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                        "maskmovq\t{$mask, $src|$src, $mask}",
+                        [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                           "maskmovq\t{$mask, $src|$src, $mask}",
+                           [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+
+// 64-bit bit convert.
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
+          (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
+          (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
+          (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 000000000000..fe11d776804c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,5865 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 Instructions Classes
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           RegisterClass RC, X86MemOperand x86memop,
+                           bit Is2Addr = 1> {
+  let isCommutable = 1 in {
+    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+  }
+  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                             string asm, string SSEVer, string FPSizeStr,
+                             Operand memopr, ComplexPattern mem_cpat,
+                             bit Is2Addr = 1> {
+  def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (!cast<Intrinsic>(
+                 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
+             RC:$src1, RC:$src2))]>;
+  def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+                                          SSEVer, "_", OpcodeStr, FPSizeStr))
+             RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           RegisterClass RC, ValueType vt,
+                           X86MemOperand x86memop, PatFrag mem_frag,
+                           Domain d, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+  let mayLoad = 1 in
+    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+                                      string OpcodeStr, X86MemOperand x86memop,
+                                      list<dag> pat_rr, list<dag> pat_rm,
+                                      bit Is2Addr = 1> {
+  let isCommutable = 1 in
+    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       pat_rr, d>;
+  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                           string asm, string SSEVer, string FPSizeStr,
+                           X86MemOperand x86memop, PatFrag mem_frag,
+                           Domain d, bit Is2Addr = 1> {
+  def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+           [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+                 RC:$src1, RC:$src2))], d>;
+  def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+             RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Instructions
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+      SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+      [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+                    PatFrag mem_pat, string OpcodeStr> :
+      SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                        [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+                "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+                "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+  let AddedComplexity = 20 in
+    def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+}
+
+let Constraints = "$src1 = $dst" in {
+  def MOVSSrr : sse12_move_rr<FR32, v4f32,
+                          "movss\t{$src2, $dst|$dst, $src2}">, XS;
+  def MOVSDrr : sse12_move_rr<FR64, v2f64,
+                          "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+  let AddedComplexity = 20 in
+    def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+
+let AddedComplexity = 15 in {
+// Extract the low 32-bit value from one vector and insert it into another.
+def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4f32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// Extract the low 64-bit value from one vector and insert it into another.
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2f64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+}
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+
+let AddedComplexity = 20 in {
+// MOVSSrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+}
+
+// Store scalar value to memory.
+def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+                  "movss\t{$src, $dst|$dst, $src}",
+                  [(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+                  "movsd\t{$src, $dst|$dst, $src}",
+                  [(store FR64:$src, addr:$dst)]>;
+
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+                  "movss\t{$src, $dst|$dst, $src}",
+                  [(store FR32:$src, addr:$dst)]>, XS, VEX;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+                  "movsd\t{$src, $dst|$dst, $src}",
+                  [(store FR64:$src, addr:$dst)]>, XD, VEX;
+
+// Extract and store.
+def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+                 addr:$dst),
+          (MOVSSmr addr:$dst,
+                   (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+                 addr:$dst),
+          (MOVSDmr addr:$dst,
+                   (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Move Aligned/Unaligned floating point values
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+                            X86MemOperand x86memop, PatFrag ld_frag,
+                            string asm, Domain d,
+                            bit IsReMaterializable = 1> {
+let neverHasSideEffects = 1 in
+  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+                   [(set RC:$dst, (ld_frag addr:$src))], d>;
+}
+
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+                              "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+                              "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+                              "movups", SSEPackedSingle>, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+                              "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
+                              "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
+                              "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
+                              "movups", SSEPackedSingle>, VEX;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
+                              "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+                              "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+                              "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+                              "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+                              "movupd", SSEPackedDouble, 0>, TB, OpSize;
+
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+          (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+          (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v2f64 VR128:$src), addr:$dst)]>;
+
+// Intrinsic forms of MOVUPS/D load and store
+def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+           (ins f128mem:$dst, VR128:$src),
+           "movups\t{$src, $dst|$dst, $src}",
+           [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+           (ins f128mem:$dst, VR128:$src),
+           "movupd\t{$src, $dst|$dst, $src}",
+           [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
+
+def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                       "movups\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                       "movupd\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
+
+// Move Low/High packed floating point values
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+                                 PatFrag mov_frag, string base_opc,
+                                 string asm_opr> {
+  def PSrm : PI<opc, MRMSrcMem,
+         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+         !strconcat(base_opc, "s", asm_opr),
+     [(set RC:$dst,
+       (mov_frag RC:$src1,
+              (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+              SSEPackedSingle>, TB;
+
+  def PDrm : PI<opc, MRMSrcMem,
+         (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+         !strconcat(base_opc, "d", asm_opr),
+     [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+                              (scalar_to_vector (loadf64 addr:$src2)))))],
+              SSEPackedDouble>, TB, OpSize;
+}
+
+let AddedComplexity = 20 in {
+  defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+  defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+  defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+                                   "\t{$src2, $dst|$dst, $src2}">;
+  defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+                                   "\t{$src2, $dst|$dst, $src2}">;
+}
+
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                                 (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                                 (iPTR 0))), addr:$dst)]>, VEX;
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                                 (iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                                 (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                         (undef)), (iPTR 0))), addr:$dst)]>,
+                   VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (v2f64 (unpckh VR128:$src, (undef))),
+                                 (iPTR 0))), addr:$dst)]>,
+                   VEX;
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                         (undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (v2f64 (unpckh VR128:$src, (undef))),
+                                 (iPTR 0))), addr:$dst)]>;
+
+let AddedComplexity = 20 in {
+  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+                      VEX_4V;
+  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+                      VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movlhps\t{$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movhlps\t{$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
+
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+let AddedComplexity = 20 in {
+  def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+            (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+  def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+            (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                     SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+                     string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                          X86MemOperand x86memop, string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        []>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        []>;
+}
+
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                          X86MemOperand x86memop, string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+              (ins DstRC:$src1, x86memop:$src),
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+}
+
+defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+                                VEX_W;
+defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
+                                VEX, VEX_W;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
+                                  VEX_4V;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
+                                  VEX_4V, VEX_W;
+defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
+                                  VEX_4V;
+defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
+                                  VEX_4V;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
+                                  VEX_4V, VEX_W;
+
+let Predicates = [HasAVX] in {
+  def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+            (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+            (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+  def : Pat<(f32 (sint_to_fp GR32:$src)),
+            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f32 (sint_to_fp GR64:$src)),
+            (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+  def : Pat<(f64 (sint_to_fp GR32:$src)),
+            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f64 (sint_to_fp GR64:$src)),
+            (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+                      "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+                      "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+                      "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+                      "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+defm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+                      "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+                      "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+              [(set DstRC:$dst, (Int SrcRC:$src))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+              [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, bit Is2Addr = 1> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+              !if(Is2Addr,
+                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+              [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+              (ins DstRC:$src1, x86memop:$src2),
+              !if(Is2Addr,
+                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+}
+
+defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                      f32mem, load, "cvtss2si">, XS, VEX;
+defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+                        int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+                        XS, VEX, VEX_W;
+defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                      f128mem, load, "cvtsd2si">, XD, VEX;
+defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+                      int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+                      XD, VEX, VEX_W;
+
+// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+// Get rid of this hack or rename the intrinsics, there are several
+// intructions that only match with the intrinsic form, why create duplicates
+// to let them be recognized by the assembler?
+defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI64   : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
+defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                      f32mem, load, "cvtss2si">, XS;
+defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+                      f32mem, load, "cvtss2si{q}">, XS, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+                  f128mem, load, "cvtsd2si{q}">, XD, REX_W;
+
+
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+          VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+          VEX_4V, VEX_W;
+
+let Constraints = "$src1 = $dst" in {
+  defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        int_x86_sse_cvtsi2ss, i32mem, loadi32,
+                        "cvtsi2ss">, XS;
+  defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+                        int_x86_sse_cvtsi642ss, i64mem, loadi64,
+                        "cvtsi2ss{q}">, XS, REX_W;
+  defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+                        "cvtsi2sd">, XD;
+  defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+                        int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+                        "cvtsi2sd">, XD, REX_W;
+}
+
+/// SSE 1 Only
+
+// Aliases for intrinsics
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+                                    f32mem, load, "cvttss2si">, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse_cvttss2si64, f32mem, load,
+                                    "cvttss2si">, XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+                                    f128mem, load, "cvttsd2si">, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse2_cvttsd2si64, f128mem, load,
+                                    "cvttsd2si">, XD, VEX, VEX_W;
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+                                    f32mem, load, "cvttss2si">, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse_cvttss2si64, f32mem, load,
+                                    "cvttss2si{q}">, XS, REX_W;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+                                    f128mem, load, "cvttsd2si">, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse2_cvttsd2si64, f128mem, load,
+                                    "cvttsd2si{q}">, XD, REX_W;
+
+let Pattern = []<dag> in {
+defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+                               "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
+                               "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+                               VEX_W;
+defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+                               "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                               SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+                               "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                               SSEPackedSingle>, TB, VEX;
+}
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+                          "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
+                          "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
+                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                            SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
+}
+
+/// SSE 2 Only
+
+// Convert scalar double to scalar single
+def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+                       (ins FR64:$src1, FR64:$src2),
+                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+                       (ins FR64:$src1, f64mem:$src2),
+                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+        Requires<[HasAVX]>;
+
+def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
+                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
+                  Requires<[HasSSE2, OptForSize]>;
+
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+                      XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
+
+// Convert scalar single to scalar double
+// SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+                    (ins FR32:$src1, FR32:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    []>, XS, Requires<[HasAVX]>, VEX_4V;
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+                    (ins FR32:$src1, f32mem:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
+def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
+        Requires<[HasAVX]>;
+
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
+                   "cvtss2sd\t{$src, $dst|$dst, $src}",
+                   [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+                 Requires<[HasSSE2]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
+                   "cvtss2sd\t{$src, $dst|$dst, $src}",
+                   [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+                 Requires<[HasSSE2, OptForSize]>;
+
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       VR128:$src2))]>, XS, VEX_4V,
+                    Requires<[HasAVX]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       (load addr:$src2)))]>, XS, VEX_4V,
+                    Requires<[HasAVX]>;
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       VR128:$src2))]>, XS,
+                    Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       (load addr:$src2)))]>, XS,
+                    Requires<[HasSSE2]>;
+}
+
+def : Pat<(extloadf32 addr:$src),
+          (CVTSS2SDrr (MOVSSrm addr:$src))>,
+      Requires<[HasSSE2, OptForSpeed]>;
+
+// Convert doubleword to packed single/double fp
+// SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+                     TB, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                      "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     TB, VEX, Requires<[HasAVX]>;
+def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+                     TB, Requires<[HasSSE2]>;
+def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                      "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     TB, Requires<[HasSSE2]>;
+
+// FIXME: why the non-intrinsic version is described as SSE3?
+// SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+                     XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     XS, VEX, Requires<[HasAVX]>;
+def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+                     XS, Requires<[HasSSE2]>;
+def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "cvtdq2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     XS, Requires<[HasSSE2]>;
+
+
+// Convert packed single/double fp to doubleword
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+                        VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+                         (ins f128mem:$src),
+                         "cvtps2dq\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+                                            (memop addr:$src)))]>, VEX;
+def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvtps2dq\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+                                            (memop addr:$src)))]>;
+
+// SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                     XD, VEX, Requires<[HasAVX]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+                                          (memop addr:$src)))]>,
+                     XD, VEX, Requires<[HasAVX]>;
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                     XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+                                          (memop addr:$src)))]>,
+                     XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+// SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq VR128:$src))]>;
+def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
+
+
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                              (int_x86_sse2_cvttps2dq VR128:$src))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+                                           (memop addr:$src)))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+
+def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
+                            (ins VR128:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
+                       VEX;
+def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
+                          (ins f128mem:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                             (memop addr:$src)))]>, VEX;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                        (memop addr:$src)))]>;
+
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+
+// Convert packed single to packed double
+let Predicates = [HasAVX] in {
+                  // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+                     VEX, Requires<[HasAVX]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+                                          (load addr:$src)))]>,
+                     VEX, Requires<[HasAVX]>;
+def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+                     TB, Requires<[HasSSE2]>;
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+                                          (load addr:$src)))]>,
+                     TB, Requires<[HasSSE2]>;
+
+// Convert packed double to packed single
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+
+
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+                         (ins f128mem:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+                                            (memop addr:$src)))]>;
+def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+                                            (memop addr:$src)))]>;
+
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+          (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+          (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+          (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+          (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+          (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+          (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+          (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+          (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+          (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+          (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+          (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+          (VCVTTPS2DQYrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+                            string asm, string asm_alt> {
+  let isAsmParserOnly = 1 in {
+    def rr : SIi8<0xC2, MRMSrcReg,
+                  (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+                  asm, []>;
+    let mayLoad = 1 in
+    def rm : SIi8<0xC2, MRMSrcMem,
+                  (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+                  asm, []>;
+  }
+
+  // Accept explicit immediate argument form instead of comparison code.
+  def rr_alt : SIi8<0xC2, MRMSrcReg,
+                (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+                asm_alt, []>;
+  let mayLoad = 1 in
+  def rm_alt : SIi8<0xC2, MRMSrcMem,
+                (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+                asm_alt, []>;
+}
+
+let neverHasSideEffects = 1 in {
+  defm VCMPSS  : sse12_cmp_scalar<FR32, f32mem,
+                  "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+                  "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+                  XS, VEX_4V;
+  defm VCMPSD  : sse12_cmp_scalar<FR64, f64mem,
+                  "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                  "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+                  XD, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+def CMPSSrr : SIi8<0xC2, MRMSrcReg,
+                  (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
+                  "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
+def CMPSSrm : SIi8<0xC2, MRMSrcMem,
+                  (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
+                  "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
+def CMPSDrr : SIi8<0xC2, MRMSrcReg,
+                  (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
+                  "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
+def CMPSDrm : SIi8<0xC2, MRMSrcMem,
+                  (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
+                  "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
+}
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
+                  (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+                  "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
+                  (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+                  "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
+                  (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+                  "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
+                  (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+                  "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+                         Intrinsic Int, string asm> {
+  def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+                      (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+                        [(set VR128:$dst, (Int VR128:$src1,
+                                               VR128:$src, imm:$cc))]>;
+  def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+                        [(set VR128:$dst, (Int VR128:$src1,
+                                               (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+defm Int_VCMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+                     "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                     XS, VEX_4V;
+defm Int_VCMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+                     "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                     XD, VEX_4V;
+let Constraints = "$src1 = $dst" in {
+  defm Int_CMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+                       "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+  defm Int_CMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+                       "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+                            ValueType vt, X86MemOperand x86memop,
+                            PatFrag ld_frag, string OpcodeStr, Domain d> {
+  def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+  def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1),
+                                           (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+                                  "ucomiss", SSEPackedSingle>, VEX;
+  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+                                  "ucomisd", SSEPackedDouble>, OpSize, VEX;
+  let Pattern = []<dag> in {
+    defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+                                    "comiss", SSEPackedSingle>, VEX;
+    defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+                                    "comisd", SSEPackedDouble>, OpSize, VEX;
+  }
+
+  defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+                            load, "ucomiss", SSEPackedSingle>, VEX;
+  defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+                            load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+  defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+                            load, "comiss", SSEPackedSingle>, VEX;
+  defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+                            load, "comisd", SSEPackedDouble>, OpSize, VEX;
+  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+                                  "ucomiss", SSEPackedSingle>, TB;
+  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+                                  "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+  let Pattern = []<dag> in {
+    defm COMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+                                    "comiss", SSEPackedSingle>, TB;
+    defm COMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+                                    "comisd", SSEPackedDouble>, TB, OpSize;
+  }
+
+  defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+                              load, "ucomiss", SSEPackedSingle>, TB;
+  defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+                              load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+  defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+                                  "comiss", SSEPackedSingle>, TB;
+  defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+                                  "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+                            Intrinsic Int, string asm, string asm_alt,
+                            Domain d> {
+  let isAsmParserOnly = 1 in {
+    def rri : PIi8<0xC2, MRMSrcReg,
+               (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+               [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+    def rmi : PIi8<0xC2, MRMSrcMem,
+               (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+               [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+  }
+
+  // Accept explicit immediate argument form instead of comparison code.
+  def rri_alt : PIi8<0xC2, MRMSrcReg,
+             (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+             asm_alt, [], d>;
+  def rmi_alt : PIi8<0xC2, MRMSrcMem,
+             (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+             asm_alt, [], d>;
+}
+
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+               "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedSingle>, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+               "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedDouble>, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+               "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedSingle>, VEX_4V;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+               "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedDouble>, OpSize, VEX_4V;
+let Constraints = "$src1 = $dst" in {
+  defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+                 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+                 "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+                 SSEPackedSingle>, TB;
+  defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+                 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+                 "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+                 SSEPackedDouble>, TB, OpSize;
+}
+
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+                         ValueType vt, string asm, PatFrag mem_frag,
+                         Domain d, bit IsConvertibleToThreeAddress = 0> {
+  def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
+                   [(set RC:$dst, (vt (shufp:$src3
+                            RC:$src1, (mem_frag addr:$src2))))], d>;
+  let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+    def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+                   [(set RC:$dst,
+                            (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
+}
+
+defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
+           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
+           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+           memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+           memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+let Constraints = "$src1 = $dst" in {
+  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+                    TB;
+  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+                                   PatFrag mem_frag, RegisterClass RC,
+                                   X86MemOperand x86memop, string asm,
+                                   Domain d> {
+    def rr : PI<opc, MRMSrcReg,
+                (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                asm, [(set RC:$dst,
+                           (vt (OpNode RC:$src1, RC:$src2)))], d>;
+    def rm : PI<opc, MRMSrcMem,
+                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+                asm, [(set RC:$dst,
+                           (vt (OpNode RC:$src1,
+                                       (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+  defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+        VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+        VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
+  defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+        VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+        VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
+
+  defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+        VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+        VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
+  defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+        VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+        VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
+
+  let Constraints = "$src1 = $dst" in {
+    defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+          VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+    defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+          VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedDouble>, TB, OpSize;
+    defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+          VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+    defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+          VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedDouble>, TB, OpSize;
+  } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+                                Domain d> {
+  def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+                     [(set GR32:$dst, (Int RC:$src))], d>;
+  def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
+}
+
+// Mask creation
+defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+                                      "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+                                      "movmskpd", SSEPackedDouble>, OpSize,
+                                      VEX;
+defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+                                      "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+                                      "movmskpd", SSEPackedDouble>, OpSize,
+                                      VEX;
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+                                     SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+                                     SSEPackedDouble>, TB, OpSize;
+
+// X86fgetsign
+def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
+                    "movmskpd\t{$src, $dst|$dst, $src}",
+                    [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
+                    "movmskpd\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
+                    "movmskps\t{$src, $dst|$dst, $src}",
+                    [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
+def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
+                    "movmskps\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
+
+// Assembler Only
+def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+           "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+           "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+           VEX;
+def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+           "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+           "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+           VEX;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
+//===----------------------------------------------------------------------===//
+
+// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
+// names that start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+    canFoldAsLoad = 1 in {
+  // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                 [(set FR32:$dst, fp32imm0)]>,
+                 Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                 [(set FR64:$dst, fpimm0)]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                  [(set FR32:$dst, fp32imm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                  [(set FR64:$dst, fpimm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded.
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                     "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+                     "movapd\t{$src, $dst|$dst, $src}", []>;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded.
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+                     "movaps\t{$src, $dst|$dst, $src}",
+                     [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+                     "movapd\t{$src, $dst|$dst, $src}",
+                     [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+                                       SDNode OpNode> {
+  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+              FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+
+  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+        FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
+
+  let Constraints = "$src1 = $dst" in {
+    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+                f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+
+    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+                f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+  }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let mayLoad = 0 in {
+  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+}
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+  defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
+///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+                                   SDNode OpNode> {
+  let Pattern = []<dag> in {
+    defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+         !strconcat(OpcodeStr, "ps"), f128mem,
+         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))], 0>, VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+         !strconcat(OpcodeStr, "pd"), f128mem,
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (bc_v2i64 (v2f64 VR128:$src2))))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))], 0>,
+                                                   OpSize, VEX_4V;
+  }
+  let Constraints = "$src1 = $dst" in {
+    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+         !strconcat(OpcodeStr, "ps"), f128mem,
+         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))]>, TB;
+
+    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+         !strconcat(OpcodeStr, "pd"), f128mem,
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (bc_v2i64 (v2f64 VR128:$src2))))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))]>, TB, OpSize;
+  }
+}
+
+/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
+///
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
+                                     SDNode OpNode> {
+    defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+          !strconcat(OpcodeStr, "ps"), f256mem,
+          [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+                                    (memopv4i64 addr:$src2)))], 0>, VEX_4V;
+
+    defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+          !strconcat(OpcodeStr, "pd"), f256mem,
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                    (bc_v4i64 (v4f64 VR256:$src2))))],
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                    (memopv4i64 addr:$src2)))], 0>,
+                                    OpSize, VEX_4V;
+}
+
+// AVX 256-bit packed logical ops forms
+defm VAND  : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR   : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR  : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
+
+defm AND  : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR   : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation.  This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  bit Is2Addr = 1> {
+  defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+                            OpNode, FR32, f32mem, Is2Addr>, XS;
+  defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+                            OpNode, FR64, f64mem, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                   bit Is2Addr = 1> {
+  let mayLoad = 0 in {
+  defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+              v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+  defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+              v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+  }
+}
+
+multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
+                                    SDNode OpNode> {
+  let mayLoad = 0 in {
+    defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+                v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+    defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+                v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+  }
+}
+
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+                                      bit Is2Addr = 1> {
+  defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+  defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+                                      bit Is2Addr = 1> {
+  defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
+                                              SSEPackedSingle, Is2Addr>, TB;
+
+  defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
+                                      SSEPackedDouble, Is2Addr>, TB, OpSize;
+}
+
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+  defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+     !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+      SSEPackedSingle, 0>, TB;
+
+  defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+     !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+      SSEPackedDouble, 0>, TB, OpSize;
+}
+
+// Binary Arithmetic instructions
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+            basic_sse12_fp_binop_s_int<0x58, "add", 0>,
+            basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+            basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+            basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
+            basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+            basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+
+let isCommutable = 0 in {
+  defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+              basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
+              basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+  defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+              basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
+              basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+              basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+  defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+              basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
+              basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+              basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+              basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+              basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+  defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+              basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
+              basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+              basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+              basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+              basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
+             basic_sse12_fp_binop_p<0x58, "add", fadd>,
+             basic_sse12_fp_binop_s_int<0x58, "add">;
+  defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
+             basic_sse12_fp_binop_p<0x59, "mul", fmul>,
+             basic_sse12_fp_binop_s_int<0x59, "mul">;
+
+  let isCommutable = 0 in {
+    defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
+               basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
+               basic_sse12_fp_binop_s_int<0x5C, "sub">;
+    defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
+               basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
+               basic_sse12_fp_binop_s_int<0x5E, "div">;
+    defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
+               basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
+               basic_sse12_fp_binop_s_int<0x5F, "max">,
+               basic_sse12_fp_binop_p_int<0x5F, "max">;
+    defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
+               basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
+               basic_sse12_fp_binop_s_int<0x5D, "min">,
+               basic_sse12_fp_binop_p_int<0x5D, "min">;
+  }
+}
+
+/// Unop Arithmetic
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation.  This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic F32Int> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode FR32:$src))]>;
+  // For scalar unary operations, fold a load into the operation
+  // only in OptForSize mode. It eliminates an instruction, but it also
+  // eliminates a whole-register clobber (the load), so it introduces a
+  // partial register update condition.
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+            Requires<[HasSSE1, OptForSize]>;
+  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F32Int VR128:$src))]>;
+  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+                              SDNode OpNode, Intrinsic F32Int> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                []>, XS, Requires<[HasAVX, OptForSize]>;
+  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+                [(set VR128:$dst, (F32Int VR128:$src))]>;
+  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+                [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in packed form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+              [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+  def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic V4F32Int> {
+  def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+  def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic V4F32Int> {
+  def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+  def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic F64Int> {
+  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                [(set FR64:$dst, (OpNode FR64:$src))]>;
+  // See the comments in sse1_fp_unop_s for why this is OptForSize.
+  def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+            Requires<[HasSSE2, OptForSize]>;
+  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F64Int VR128:$src))]>;
+  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+                              SDNode OpNode, Intrinsic F64Int> {
+  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+               !strconcat(OpcodeStr,
+                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+               (ins FR64:$src1, f64mem:$src2),
+               !strconcat(OpcodeStr,
+                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+           [(set VR128:$dst, (F64Int VR128:$src))]>;
+  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+           !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+           [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode> {
+  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+              [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+  def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic V2F64Int> {
+  def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V2F64Int VR128:$src))]>;
+  def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic V2F64Int> {
+  def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+  def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
+let Predicates = [HasAVX] in {
+  // Square root.
+  defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
+                sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+                VEX_4V;
+
+  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
+                sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
+                sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+                sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+                sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
+                VEX;
+
+  // Reciprocal approximations. Note that these typically require refinement
+  // in order to obtain suitable precision.
+  defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
+                                   int_x86_sse_rsqrt_ss>, VEX_4V;
+  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
+                sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
+                sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
+
+  defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
+                                   VEX_4V;
+  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
+                sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
+                sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
+}
+
+// Square root.
+defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss>,
+             sse1_fp_unop_p<0x51, "sqrt",  fsqrt>,
+             sse1_fp_unop_p_int<0x51, "sqrt",  int_x86_sse_sqrt_ps>,
+             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd>,
+             sse2_fp_unop_p<0x51, "sqrt",  fsqrt>,
+             sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
+             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
+defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+             sse1_fp_unop_p<0x53, "rcp", X86frcp>,
+             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+  def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+                       (ins f128mem:$dst, VR128:$src),
+                       "movntps\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v4f32 VR128:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+                       (ins f128mem:$dst, VR128:$src),
+                       "movntpd\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v2f64 VR128:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+                        (ins f128mem:$dst, VR128:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(alignednontemporalstore (v2f64 VR128:$src),
+                                                  addr:$dst)]>, VEX;
+
+  let ExeDomain = SSEPackedInt in
+  def VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
+                           (ins f128mem:$dst, VR128:$src),
+                           "movntdq\t{$src, $dst|$dst, $src}",
+                           [(alignednontemporalstore (v4f32 VR128:$src),
+                                                     addr:$dst)]>, VEX;
+
+  def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+            (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+  def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+                       (ins f256mem:$dst, VR256:$src),
+                       "movntps\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v8f32 VR256:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+                       (ins f256mem:$dst, VR256:$src),
+                       "movntpd\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v4f64 VR256:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
+                        (ins f256mem:$dst, VR256:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(alignednontemporalstore (v4f64 VR256:$src),
+                                                  addr:$dst)]>, VEX;
+  let ExeDomain = SSEPackedInt in
+  def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+                      (ins f256mem:$dst, VR256:$src),
+                      "movntdq\t{$src, $dst|$dst, $src}",
+                      [(alignednontemporalstore (v8f32 VR256:$src),
+                                                addr:$dst)]>, VEX;
+}
+
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+          (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+          (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+          (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntps\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntpd\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "movnti{l}\t{$src, $dst|$dst, $src}",
+                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+               TB, Requires<[HasSSE2]>;
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                     "movnti{q}\t{$src, $dst|$dst, $src}",
+                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+                  TB, Requires<[HasSSE2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc Instructions (No AVX form)
+//===----------------------------------------------------------------------===//
+
+// Prefetch intrinsic.
+def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
+def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
+def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
+def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
+
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+             TB, Requires<[HasSSE1]>;
+def : Pat<(X86SFence), (SFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version, and
+// doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS  : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD  : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+                   [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+                   [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI  : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
+
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+def : Pat<(extloadf32 addr:$src),
+          (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
+      Requires<[HasAVX, OptForSpeed]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let neverHasSideEffects = 1 in {
+def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def VMOVDQUrr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+  def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                    "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+  def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                    "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
+}
+
+let mayStore = 1 in {
+def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
+                     (ins i128mem:$dst, VR128:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+                     (ins i256mem:$dst, VR256:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                  "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+                  "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
+}
+
+let neverHasSideEffects = 1 in
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}", []>;
+
+def MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                   "movdqu\t{$src, $dst|$dst, $src}",
+                   []>, XS, Requires<[HasSSE2]>;
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}",
+                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "movdqu\t{$src, $dst|$dst, $src}",
+                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+                 XS, Requires<[HasSSE2]>;
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}",
+                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                   "movdqu\t{$src, $dst|$dst, $src}",
+                   [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+                 XS, Requires<[HasSSE2]>;
+}
+
+// Intrinsic forms of MOVDQU load and store
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                       "vmovdqu\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+                     XS, VEX, Requires<[HasAVX]>;
+
+def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                       "movdqu\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+                     XS, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+          (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+                            bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1,
+                                (bitconvert (memopv2i64 addr:$src2))))]>;
+}
+
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+                             string OpcodeStr, Intrinsic IntId,
+                             Intrinsic IntId2, bit Is2Addr = 1> {
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1,
+                                      (bitconvert (memopv2i64 addr:$src2))))]>;
+  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
+       (ins VR128:$src1, i32i8imm:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+}
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                        ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+                                     (bitconvert (memopv2i64 addr:$src2)))))]>;
+}
+
+/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
+///
+/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
+/// to collapse (bitconvert VT to VT) into its operand.
+///
+multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+// 128-bit Integer Arithmetic
+
+let Predicates = [HasAVX] in {
+defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ  : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+                                 VEX_4V;
+defm VPSUBSW  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+                                 VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+                                 VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+                                 VEX_4V;
+defm VPADDSB  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+                                 VEX_4V;
+defm VPADDSW  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+                                 VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+                                 VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULHW  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+                                 VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+                                 VEX_4V;
+defm VPAVGB   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+                                 VEX_4V;
+defm VPAVGW   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+                                 VEX_4V;
+defm VPMINUB  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+                                 VEX_4V;
+defm VPMINSW  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+                                 VEX_4V;
+defm VPMAXUB  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+                                 VEX_4V;
+defm VPMAXSW  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+                                 VEX_4V;
+defm VPSADBW  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+                                 VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PADDB  : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW  : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD  : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ  : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
+defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+
+// Intrinsic forms
+defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
+defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
+defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
+defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
+defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB   : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW   : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB  : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW  : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB  : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW  : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+                                int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+                                VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+                                int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+                                VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+                                int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+                                VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+                                int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+                                VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+                                int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+                                VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+                                int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+                                VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+                                int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+                                VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+                                int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+                                VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR  : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
+
+let ExeDomain = SSEPackedInt in {
+  let neverHasSideEffects = 1 in {
+    // 128-bit logical shifts.
+    def VPSLLDQri : PDIi8<0x73, MRM7r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    def VPSRLDQri : PDIi8<0x73, MRM3r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    // PSRADQri doesn't exist in SSE[1-3].
+  }
+  def VPANDNrr : PDI<0xDF, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              VR128:$src2)))]>, VEX_4V;
+
+  def VPANDNrm : PDI<0xDF, MRMSrcMem,
+                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              (memopv2i64 addr:$src2))))]>,
+                                              VEX_4V;
+}
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+                               int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+                               int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+                               int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+                               int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+                               int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+                               int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+                               int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+                               int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
+
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR  : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+  let neverHasSideEffects = 1 in {
+    // 128-bit logical shifts.
+    def PSLLDQri : PDIi8<0x73, MRM7r,
+                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                         "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+    def PSRLDQri : PDIi8<0x73, MRM3r,
+                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                         "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+    // PSRADQri doesn't exist in SSE[1-3].
+  }
+  def PANDNrr : PDI<0xDF, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
+
+  def PANDNrm : PDI<0xDF, MRMSrcMem,
+                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
+}
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX] in {
+  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+            (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+            (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+            (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+  // Shift up / down and insert zero's.
+  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+let Predicates = [HasSSE2] in {
+  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+            (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+            (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+            (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+  // Shift up / down and insert zero's.
+  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+  defm VPCMPEQB  : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+                                    0>, VEX_4V;
+  defm VPCMPEQW  : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+                                    0>, VEX_4V;
+  defm VPCMPEQD  : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+                                    0>, VEX_4V;
+  defm VPCMPGTB  : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+                                    0>, VEX_4V;
+  defm VPCMPGTW  : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+                                    0>, VEX_4V;
+  defm VPCMPGTD  : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+                                    0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm PCMPEQB  : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+  defm PCMPEQW  : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+  defm PCMPEQD  : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+  defm PCMPGTB  : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+  defm PCMPGTW  : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+  defm PCMPGTD  : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
+
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+          (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+          (PCMPEQBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+          (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+          (PCMPEQWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+          (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+          (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+          (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+          (PCMPGTBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+          (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+          (PCMPGTWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+          (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+          (PCMPGTDrm VR128:$src1, addr:$src2)>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
+
+let Predicates = [HasAVX] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+                                  0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+                                  0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+                                  0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+                         PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+              (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+              !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+                                                      (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+              (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+              !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst, (vt (pshuf_frag:$src2
+                                      (bc_frag (memopv2i64 addr:$src1)),
+                                      (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
+
+let Predicates = [HasAVX] in {
+  let AddedComplexity = 5 in
+  defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+                               VEX;
+
+  // SSE2 with ImmT == Imm8 and XS prefix.
+  defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+                               VEX;
+
+  // SSE2 with ImmT == Imm8 and XD prefix.
+  defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+                               VEX;
+}
+
+let Predicates = [HasSSE2] in {
+  let AddedComplexity = 5 in
+  defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+  // SSE2 with ImmT == Imm8 and XS prefix.
+  defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+  // SSE2 with ImmT == Imm8 and XD prefix.
+  defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
+                       PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
+  def rr : PDI<opc, MRMSrcReg,
+      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+  def rm : PDI<opc, MRMSrcMem,
+      (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set VR128:$dst, (unp_frag VR128:$src1,
+                                  (bc_frag (memopv2i64
+                                               addr:$src2))))]>;
+}
+
+let Predicates = [HasAVX] in {
+  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
+                                 0>, VEX_4V;
+  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
+                                 0>, VEX_4V;
+  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
+                                 0>, VEX_4V;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+  def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                         "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
+                                 0>, VEX_4V;
+  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
+                                 0>, VEX_4V;
+  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
+                                 0>, VEX_4V;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+  def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+                        (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                        "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>;
+  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>;
+  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>;
+
+  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>;
+  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>;
+  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+                        (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                        "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+  def rri : Ii8<0xC4, MRMSrcReg,
+       (outs VR128:$dst), (ins VR128:$src1,
+        GR32:$src2, i32i8imm:$src3),
+       !if(Is2Addr,
+           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+       [(set VR128:$dst,
+         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+  def rmi : Ii8<0xC4, MRMSrcMem,
+                       (outs VR128:$dst), (ins VR128:$src1,
+                        i16mem:$src2, i32i8imm:$src3),
+       !if(Is2Addr,
+           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+       [(set VR128:$dst,
+         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+                    imm:$src3))]>;
+}
+
+// Extract
+let Predicates = [HasAVX] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+                    (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+                                                imm:$src2))]>, OpSize, VEX;
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+                    (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+                                                imm:$src2))]>;
+
+// Insert
+let Predicates = [HasAVX] in {
+  defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+  def  VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+       "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+       []>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in
+  defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}",
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}",
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+           (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+           (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+
+// Move Int Doubleword to Packed Double Int
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+                      VEX;
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector GR32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert GR64:$src))]>;
+
+
+// Move Int Doubleword to Single Scalar
+def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+                      VEX;
+def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert GR32:$src))]>;
+
+def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+
+// Move Packed Doubleword Int to Packed Double Int
+def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+                                        (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
+                       (ins i32mem:$dst, VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(store (i32 (vector_extract (v4i32 VR128:$src),
+                                     (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+                                        (iPTR 0)))]>;
+def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(store (i32 (vector_extract (v4i32 VR128:$src),
+                                     (iPTR 0))), addr:$dst)]>;
+
+def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                         "mov{d|q}\t{$src, $dst|$dst, $src}",
+                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+                                           (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+                       "movq\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+// Move Scalar Single to Double Int
+def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set GR32:$dst, (bitconvert FR32:$src))]>;
+def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+
+// movd / movq to XMM register zero-extends
+let AddedComplexity = 15 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (v4i32 (X86vzmovl
+                                      (v4i32 (scalar_to_vector GR32:$src)))))]>,
+                                      VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       [(set VR128:$dst, (v2i64 (X86vzmovl
+                                      (v2i64 (scalar_to_vector GR64:$src)))))]>,
+                                      VEX, VEX_W;
+}
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (v4i32 (X86vzmovl
+                                      (v4i32 (scalar_to_vector GR32:$src)))))]>;
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       [(set VR128:$dst, (v2i64 (X86vzmovl
+                                      (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+                                                   (loadi32 addr:$src))))))]>,
+                                                   VEX;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+                                                   (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+            (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+}
+
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+// Move Quadword Int to Packed Quadword Int
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                    "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst,
+                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+                    VEX, Requires<[HasAVX]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                    "movq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst,
+                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+                    Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+// Move Packed Quadword Int to Quadword Int
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}",
+                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                                    (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}",
+                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                                    (iPTR 0))), addr:$dst)]>;
+
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Store / copy lower 64-bits of a XMM register.
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                     "movq\t{$src, $dst|$dst, $src}",
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                     "movq\t{$src, $dst|$dst, $src}",
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 20 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "vmovq\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+                                                 (loadi64 addr:$src))))))]>,
+                     XS, VEX, Requires<[HasAVX]>;
+
+let AddedComplexity = 20 in {
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "movq\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+                                                 (loadi64 addr:$src))))))]>,
+                     XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+            (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+            (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
+// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 15 in
+def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+                      XS, Requires<[HasSSE2]>;
+
+let AddedComplexity = 20 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                        "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl
+                                             (loadv2i64 addr:$src))))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 20 in {
+def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl
+                                             (loadv2i64 addr:$src))))]>,
+                      XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+            (MOVZPQILo2PQIrm addr:$src)>;
+}
+
+// Instructions to match in the assembler
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                          "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+
+// Instructions for the disassembler
+// xr = XMM register
+// xm = mem64
+
+let Predicates = [HasAVX] in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
+def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 "movq\t{$src, $dst|$dst, $src}", []>, XS;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+              TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+  // FIXME: Change encoding to pseudo.
+  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+// Convert Packed Double FP to Packed DW Integers
+let Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
+def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+// Convert Packed DW Integers to Packed Double FP
+let Predicates = [HasAVX] in {
+def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm  : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr  : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+           (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+           (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+          (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+          (VCVTPD2DQYrm addr:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Instructions
+//===---------------------------------------------------------------------===//
+
+// Replicate Single FP
+multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(set VR128:$dst, (v4f32 (rep_frag
+                                                VR128:$src, (undef))))]>;
+def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(set VR128:$dst, (rep_frag
+                                         (memopv4f32 addr:$src), (undef)))]>;
+}
+
+multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
+                                string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+}
+
+let Predicates = [HasAVX] in {
+  // FIXME: Merge above classes when we have patterns for the ymm version
+  defm VMOVSHDUP  : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+  defm VMOVSLDUP  : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+  defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
+  defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
+
+// Replicate Double FP
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst,
+                      (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
+                                      (undef))))]>;
+}
+
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    []>;
+def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    []>;
+}
+
+let Predicates = [HasAVX] in {
+  // FIXME: Merge above classes when we have patterns for the ymm version
+  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
+  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+}
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+// Move Unaligned Integer
+let Predicates = [HasAVX] in {
+  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "vlddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+  def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                   "vlddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+}
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "lddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+
+def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+                   (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// Several Move patterns
+let AddedComplexity = 5 in {
+def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (memopv2i64 addr:$src), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+}
+
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+          (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+          (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+  def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+            (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+  def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+            (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+                       X86MemOperand x86memop, bit Is2Addr = 1> {
+  def rr : I<0xD0, MRMSrcReg,
+       (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
+  def rm : I<0xD0, MRMSrcMem,
+       (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
+}
+
+let Predicates = [HasAVX],
+  ExeDomain = SSEPackedDouble in {
+  defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+                               f128mem, 0>, TB, XD, VEX_4V;
+  defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+                               f128mem, 0>, TB, OpSize, VEX_4V;
+  defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+                               f256mem, 0>, TB, XD, VEX_4V;
+  defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+                               f256mem, 0>, TB, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+    ExeDomain = SSEPackedDouble in {
+  defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+                              f128mem>, TB, XD;
+  defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+                              f128mem>, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
+
+// Horizontal ops
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+                   X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+  def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+  def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+                  X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+  def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+  def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+
+let Predicates = [HasAVX] in {
+  defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+                          int_x86_sse3_hadd_ps, 0>, VEX_4V;
+  defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+                          int_x86_sse3_hadd_pd, 0>, VEX_4V;
+  defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+                          int_x86_sse3_hsub_ps, 0>, VEX_4V;
+  defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+                          int_x86_sse3_hsub_pd, 0>, VEX_4V;
+  defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+                          int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+  defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+                          int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+  defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+                          int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+  defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+                          int_x86_avx_hsub_pd_256, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
+                        int_x86_sse3_hadd_ps>;
+  defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
+                       int_x86_sse3_hadd_pd>;
+  defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
+                        int_x86_sse3_hsub_ps>;
+  defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
+                       int_x86_sse3_hsub_pd>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Absolute Instructions
+//===---------------------------------------------------------------------===//
+
+
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+                            PatFrag mem_frag128, Intrinsic IntId128> {
+  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+                    (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (IntId128 VR128:$src))]>,
+                    OpSize;
+
+  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+                    (ins i128mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst,
+                      (IntId128
+                       (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+                                  int_x86_ssse3_pabs_b_128>, VEX;
+  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+                                  int_x86_ssse3_pabs_w_128>, VEX;
+  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+                                  int_x86_ssse3_pabs_d_128>, VEX;
+}
+
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+                              int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+                              int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+                              int_x86_ssse3_pabs_d_128>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
+
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                             PatFrag mem_frag128, Intrinsic IntId128,
+                             bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       OpSize;
+  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+let isCommutable = 0 in {
+  defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+                                      int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
+  defm VPHADDD    : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+                                      int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
+  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+                                      int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
+  defm VPHSUBW    : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+                                      int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
+  defm VPHSUBD    : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+                                      int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
+  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+                                      int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
+  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+                                      int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
+  defm VPSHUFB    : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+                                      int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
+  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+                                      int_x86_ssse3_psign_b_128, 0>, VEX_4V;
+  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+                                      int_x86_ssse3_psign_w_128, 0>, VEX_4V;
+  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+                                      int_x86_ssse3_psign_d_128, 0>, VEX_4V;
+}
+defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+                                      int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
+}
+
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+                                     int_x86_ssse3_phadd_w_128>;
+  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+                                     int_x86_ssse3_phadd_d_128>;
+  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+                                     int_x86_ssse3_phadd_sw_128>;
+  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+                                     int_x86_ssse3_phsub_w_128>;
+  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+                                     int_x86_ssse3_phsub_d_128>;
+  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+                                     int_x86_ssse3_phsub_sw_128>;
+  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+                                     int_x86_ssse3_pmadd_ub_sw_128>;
+  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+                                     int_x86_ssse3_pshuf_b_128>;
+  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+                                     int_x86_ssse3_psign_b_128>;
+  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+                                     int_x86_ssse3_psign_w_128>;
+  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+                                       int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+                                     int_x86_ssse3_pmul_hr_sw_128>;
+}
+
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+          (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+          (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+          (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+          (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+          (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+  def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      []>, OpSize;
+  def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      []>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+  defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PALIGN : ssse3_palign<"palignr">;
+
+let AddedComplexity = 5 in {
+def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Thread synchronization
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+                [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+                 Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+                Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+      Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+      Requires<[In64BitMode]>;
+
+//===---------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+// extload f32 -> f64.  This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+let Predicates = [HasSSE2] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+           (CVTSS2SDrm addr:$src)>;
+
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+let Predicates = [HasAVX] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+           (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
+                        addr:$src)>;
+
+// bit_convert
+let Predicates = [HasXMMInt] in {
+  def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+  def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+}
+
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 15 in {
+// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+          (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+          (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+          (MOVSSrr (v4f32 (V_SET0PS)),
+                   (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+          (MOVSSrr (v4i32 (V_SET0PI)),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+}
+
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+          (UNPCKLPDrr VR128:$src, VR128:$src)>,   Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
+          (UNPCKHPDrr VR128:$src, VR128:$src)>,   Requires<[HasSSE2]>;
+def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+          (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
+          (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// Special unary SHUFPSrri case.
+def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+          (SHUFPSrri VR128:$src1, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>;
+let AddedComplexity = 5 in
+def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+          (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+      Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+          (SHUFPDrri VR128:$src1, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+      Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+          (SHUFPDrri VR128:$src1, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+      Requires<[HasSSE2]>;
+// Unary v4f32 shuffle with PSHUF* in order to fold a load.
+def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+          (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+      Requires<[HasSSE2]>;
+
+// Special binary v4i32 shuffle cases with SHUFPS.
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+          (SHUFPSrri VR128:$src1, VR128:$src2,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+           Requires<[HasSSE2]>;
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (SHUFPSrmi VR128:$src1, addr:$src2,
+                    (SHUFFLE_get_shuf_imm VR128:$src3))>,
+           Requires<[HasSSE2]>;
+// Special binary v2i64 shuffle cases using SHUFPDrri.
+def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+          (SHUFPDrri VR128:$src1, VR128:$src2,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+          Requires<[HasSSE2]>;
+
+// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
+          (UNPCKLPSrr VR128:$src, VR128:$src)>;
+def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
+          (PUNPCKLBWrr VR128:$src, VR128:$src)>;
+def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
+          (PUNPCKLWDrr VR128:$src, VR128:$src)>;
+def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
+          (PUNPCKLDQrr VR128:$src, VR128:$src)>;
+}
+
+// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
+          (UNPCKHPSrr VR128:$src, VR128:$src)>;
+def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
+          (PUNPCKHBWrr VR128:$src, VR128:$src)>;
+def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
+          (PUNPCKHWDrr VR128:$src, VR128:$src)>;
+def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
+          (PUNPCKHDQrr VR128:$src, VR128:$src)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+          (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+          (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+}
+
+// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+                 addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
+
+let AddedComplexity = 15 in {
+// Setting the lowest element in the vector.
+def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4i32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2i64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
+      Requires<[HasSSE2]>;
+def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
+      Requires<[HasSSE2]>;
+}
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+          (SHUFPSrri VR128:$src2, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>;
+
+// Set lowest element and zero upper elements.
+def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+          (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// vector -> vector casts
+def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+          (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+          (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [HasSSE1] in {
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (MOVUPSrm addr:$src)>;
+  def : Pat<(alignedloadv2i64 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(loadv2i64 addr:$src),
+            (MOVUPSrm addr:$src)>;
+
+  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX integer load/store.
+let Predicates = [HasAVX] in {
+  // 128-bit load/store
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (VMOVAPSrm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (VMOVUPSrm addr:$src)>;
+  def : Pat<(alignedloadv2i64 addr:$src),
+            (VMOVAPSrm addr:$src)>;
+  def : Pat<(loadv2i64 addr:$src),
+            (VMOVUPSrm addr:$src)>;
+
+  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+  // 256-bit load/store
+  def : Pat<(alignedloadv4i64 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(loadv4i64 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(alignedloadv8i32 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(loadv8i32 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
+            (VMOVAPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
+            (VMOVAPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+            (VMOVUPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+            (VMOVUPSYmr addr:$dst, VR256:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Move with Sign/Zero Extend
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+       [(set VR128:$dst,
+         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+       OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+                                     VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+                                     VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+                                     VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+                                     VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+                                     VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+                                     VEX;
+}
+
+defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+
+// Common patterns involving scalar load.
+def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+          (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+          (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+          (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+          (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+          (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+          (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+          (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+          (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+          (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+          (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+          (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+          (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+       [(set VR128:$dst,
+         (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+          OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+                                     VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+                                     VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+                                     VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+                                     VEX;
+}
+
+defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+          (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+          (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+          (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+          (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+  // Expecting a i16 load any extended to i32 value.
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId (bitconvert
+                     (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+                 OpSize;
+}
+
+let Predicates = [HasAVX] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+                                     VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+                                     VEX;
+}
+defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbq
+            (bitconvert (v4i32 (X86vzmovl
+                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+          (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbq
+            (bitconvert (v4i32 (X86vzmovl
+                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+          (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+                 OpSize;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in {
+  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+  def  VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+         (ins VR128:$src1, i32i8imm:$src2),
+         "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
+
+defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
+
+
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let Predicates = [HasAVX] in
+  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+
+defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
+
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR32:$dst,
+                  (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+                          addr:$dst)]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+  defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
+
+defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR64:$dst,
+                  (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+                          addr:$dst)]>, OpSize, REX_W;
+}
+
+let Predicates = [HasAVX] in
+  defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+
+defm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
+
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR32:$dst,
+                    (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+           OpSize;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+                          addr:$dst)]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+  defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+  def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+                  (ins VR128:$src1, i32i8imm:$src2),
+                  "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  []>, OpSize, VEX;
+}
+defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
+
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+                                              imm:$src2))),
+                 addr:$dst),
+          (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+         Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+                   imm:$src3))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+      OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+                          imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+  defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+      OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+                          imm:$src3)))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+  defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+let Constraints = "$src1 = $dst" in
+  defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
+
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+      OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86insrtps VR128:$src1,
+                   (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+                    imm:$src3))]>, OpSize;
+}
+
+let Constraints = "$src1 = $dst" in
+  defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let Predicates = [HasAVX] in
+  defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+          (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+          Requires<[HasAVX]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+          (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+          Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+                            X86MemOperand x86memop, RegisterClass RC,
+                            PatFrag mem_frag32, PatFrag mem_frag64,
+                            Intrinsic V4F32Int, Intrinsic V2F64Int> {
+  // Intrinsic operation, reg.
+  // Vector intrinsic operation, reg
+  def PSr : SS4AIi8<opcps, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+                    OpSize;
+
+  // Vector intrinsic operation, mem
+  def PSm : Ii8<opcps, MRMSrcMem,
+                    (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst,
+                          (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+                    TA, OpSize,
+                Requires<[HasSSE41]>;
+
+  // Vector intrinsic operation, reg
+  def PDr : SS4AIi8<opcpd, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+                    OpSize;
+
+  // Vector intrinsic operation, mem
+  def PDm : SS4AIi8<opcpd, MRMSrcMem,
+                    (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst,
+                          (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+                    OpSize;
+}
+
+multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
+                   RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
+  // Intrinsic operation, reg.
+  // Vector intrinsic operation, reg
+  def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, OpSize;
+
+  // Vector intrinsic operation, mem
+  def PSm_AVX : Ii8<opcps, MRMSrcMem,
+                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, TA, OpSize, Requires<[HasSSE41]>;
+
+  // Vector intrinsic operation, reg
+  def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, OpSize;
+
+  // Vector intrinsic operation, mem
+  def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
+                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, OpSize;
+}
+
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+                            string OpcodeStr,
+                            Intrinsic F32Int,
+                            Intrinsic F64Int, bit Is2Addr = 1> {
+  // Intrinsic operation, reg.
+  def SSr : SS4AIi8<opcss, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+        OpSize;
+
+  // Intrinsic operation, mem.
+  def SSm : SS4AIi8<opcss, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst,
+             (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+        OpSize;
+
+  // Intrinsic operation, reg.
+  def SDr : SS4AIi8<opcsd, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+        OpSize;
+
+  // Intrinsic operation, mem.
+  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst,
+              (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+        OpSize;
+}
+
+multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
+                                   string OpcodeStr> {
+  // Intrinsic operation, reg.
+  def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+
+  // Intrinsic operation, mem.
+  def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+        !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+
+  // Intrinsic operation, reg.
+  def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+
+  // Intrinsic operation, mem.
+  def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+let Predicates = [HasAVX] in {
+  // Intrinsic form
+  defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+                                  memopv4f32, memopv2f64,
+                                  int_x86_sse41_round_ps,
+                                  int_x86_sse41_round_pd>, VEX;
+  defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+                                  memopv8f32, memopv4f64,
+                                  int_x86_avx_round_ps_256,
+                                  int_x86_avx_round_pd_256>, VEX;
+  defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+                                  int_x86_sse41_round_ss,
+                                  int_x86_sse41_round_sd, 0>, VEX_4V;
+
+  // Instructions for the assembler
+  defm VROUND  : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
+                                        VEX;
+  defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
+                                        VEX;
+  defm VROUND  : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
+}
+
+defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+                               memopv4f32, memopv2f64,
+                               int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+                               int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+def VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+                OpSize, VEX;
+def VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+                OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+                OpSize, VEX;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+                OpSize, VEX;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+              "ptest \t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+              OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+              "ptest \t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+              OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+            OpSize, VEX;
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
+defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
+
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+
+
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+                                 Intrinsic IntId128> {
+  def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+                    (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+  def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+                     (ins i128mem:$src),
+                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                     [(set VR128:$dst,
+                       (IntId128
+                       (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+                                         int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+                                         int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic IntId128, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in {
+  let isCommutable = 0 in
+  defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+                                                         0>, VEX_4V;
+  defm VPCMPEQQ  : SS41I_binop_rm_int<0x29, "vpcmpeqq",  int_x86_sse41_pcmpeqq,
+                                                         0>, VEX_4V;
+  defm VPMINSB   : SS41I_binop_rm_int<0x38, "vpminsb",   int_x86_sse41_pminsb,
+                                                         0>, VEX_4V;
+  defm VPMINSD   : SS41I_binop_rm_int<0x39, "vpminsd",   int_x86_sse41_pminsd,
+                                                         0>, VEX_4V;
+  defm VPMINUD   : SS41I_binop_rm_int<0x3B, "vpminud",   int_x86_sse41_pminud,
+                                                         0>, VEX_4V;
+  defm VPMINUW   : SS41I_binop_rm_int<0x3A, "vpminuw",   int_x86_sse41_pminuw,
+                                                         0>, VEX_4V;
+  defm VPMAXSB   : SS41I_binop_rm_int<0x3C, "vpmaxsb",   int_x86_sse41_pmaxsb,
+                                                         0>, VEX_4V;
+  defm VPMAXSD   : SS41I_binop_rm_int<0x3D, "vpmaxsd",   int_x86_sse41_pmaxsd,
+                                                         0>, VEX_4V;
+  defm VPMAXUD   : SS41I_binop_rm_int<0x3F, "vpmaxud",   int_x86_sse41_pmaxud,
+                                                         0>, VEX_4V;
+  defm VPMAXUW   : SS41I_binop_rm_int<0x3E, "vpmaxuw",   int_x86_sse41_pmaxuw,
+                                                         0>, VEX_4V;
+  defm VPMULDQ   : SS41I_binop_rm_int<0x28, "vpmuldq",   int_x86_sse41_pmuldq,
+                                                         0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  let isCommutable = 0 in
+  defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+  defm PCMPEQQ  : SS41I_binop_rm_int<0x29, "pcmpeqq",  int_x86_sse41_pcmpeqq>;
+  defm PMINSB   : SS41I_binop_rm_int<0x38, "pminsb",   int_x86_sse41_pminsb>;
+  defm PMINSD   : SS41I_binop_rm_int<0x39, "pminsd",   int_x86_sse41_pminsd>;
+  defm PMINUD   : SS41I_binop_rm_int<0x3B, "pminud",   int_x86_sse41_pminud>;
+  defm PMINUW   : SS41I_binop_rm_int<0x3A, "pminuw",   int_x86_sse41_pminuw>;
+  defm PMAXSB   : SS41I_binop_rm_int<0x3C, "pmaxsb",   int_x86_sse41_pmaxsb>;
+  defm PMAXSD   : SS41I_binop_rm_int<0x3D, "pmaxsd",   int_x86_sse41_pmaxsd>;
+  defm PMAXUD   : SS41I_binop_rm_int<0x3F, "pmaxud",   int_x86_sse41_pmaxud>;
+  defm PMAXUW   : SS41I_binop_rm_int<0x3E, "pmaxuw",   int_x86_sse41_pmaxuw>;
+  defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",   int_x86_sse41_pmuldq>;
+}
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+          (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+          (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                        ValueType OpVT, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+       OpSize;
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpNode VR128:$src1,
+                                  (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+       OpSize;
+}
+
+let Predicates = [HasAVX] in
+  defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+                 X86MemOperand x86memop, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+        OpSize;
+  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+        (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set RC:$dst,
+          (IntId RC:$src1,
+           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+        OpSize;
+}
+
+let Predicates = [HasAVX] in {
+  let isCommutable = 0 in {
+  defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+            int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+  defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+            int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+  defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  }
+  defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+                                   VR256, memopv32i8, i256mem, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  let isCommutable = 0 in {
+  defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+                                     VR128, memopv16i8, i128mem>;
+  defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+                                     VR128, memopv16i8, i128mem>;
+  defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+                                     VR128, memopv16i8, i128mem>;
+  defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+                                     VR128, memopv16i8, i128mem>;
+  }
+  defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+                                  VR128, memopv16i8, i128mem>;
+  defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+                                  VR128, memopv16i8, i128mem>;
+}
+
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let Predicates = [HasAVX] in {
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+                                    RegisterClass RC, X86MemOperand x86memop,
+                                    PatFrag mem_frag, Intrinsic IntId> {
+  def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+                  (ins RC:$src1, RC:$src2, RC:$src3),
+                  !strconcat(OpcodeStr,
+                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                  [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+                  SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+  def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+                  (ins RC:$src1, x86memop:$src2, RC:$src3),
+                  !strconcat(OpcodeStr,
+                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                  [(set RC:$dst,
+                        (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+                               RC:$src3))],
+                  SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+}
+
+defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+                                           memopv16i8, int_x86_sse41_blendvpd>;
+defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+                                           memopv16i8, int_x86_sse41_blendvps>;
+defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+                                           memopv16i8, int_x86_sse41_pblendvb>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+                                         memopv32i8, int_x86_avx_blendv_pd_256>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+                                         memopv32i8, int_x86_avx_blendv_ps_256>;
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+    def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src2),
+                    !strconcat(OpcodeStr,
+                     "\t{$src2, $dst|$dst, $src2}"),
+                    [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+                    OpSize;
+
+    def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src2),
+                    !strconcat(OpcodeStr,
+                     "\t{$src2, $dst|$dst, $src2}"),
+                    [(set VR128:$dst,
+                      (IntId VR128:$src1,
+                       (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+  }
+}
+
+defm BLENDVPD     : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS     : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB     : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+          (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+
+let Predicates = [HasAVX] in
+def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                       "vmovntdqa\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+                       OpSize, VEX;
+def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                       "movntdqa\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+                       OpSize;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
+multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic IntId128, bit Is2Addr = 1> {
+  def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       OpSize;
+  def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let Predicates = [HasAVX] in
+  defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
+                                     0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+          (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+          (PCMPGTQrm VR128:$src1, addr:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - String/text Processing Instructions
+//===----------------------------------------------------------------------===//
+
+// Packed Compare Implicit Length Strings, Return Mask
+multiclass pseudo_pcmpistrm<string asm> {
+  def REG : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+                                                  imm:$src3))]>;
+  def MEM : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
+                       VR128:$src1, (load addr:$src2), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+  defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+  defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
+}
+
+let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
+  def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+  def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+  def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+  def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+multiclass pseudo_pcmpestrm<string asm> {
+  def REG : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+                       VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+  def MEM : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+                       VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+  defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+  defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
+}
+
+let Predicates = [HasAVX],
+    Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+  def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+      "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+  def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+      "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+  def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+      "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+  def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+      "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS] in {
+  multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+    def rr : SS42AI<0x63, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+      [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+       (implicit EFLAGS)]>, OpSize;
+    def rm : SS42AI<0x63, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+      [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+       (implicit EFLAGS)]>, OpSize;
+  }
+}
+
+let Predicates = [HasAVX] in {
+defm VPCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
+                                    VEX;
+}
+
+defm PCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+// Packed Compare Explicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
+  multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+    def rr : SS42AI<0x61, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+      !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+      [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+       (implicit EFLAGS)]>, OpSize;
+    def rm : SS42AI<0x61, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+      !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+       [(set ECX,
+             (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
+        (implicit EFLAGS)]>, OpSize;
+  }
+}
+
+let Predicates = [HasAVX] in {
+defm VPCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
+                                    VEX;
+}
+
+defm PCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - CRC Instructions
+//===----------------------------------------------------------------------===//
+
+// No CRC instructions have AVX equivalents
+
+// crc intrinsic instruction
+// This set of instructions are only rm, the only difference is the size
+// of r and m.
+let Constraints = "$src1 = $dst" in {
+  def CRC32r32m8  : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i8mem:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32_8 GR32:$src1,
+                         (load addr:$src2)))]>;
+  def CRC32r32r8  : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR8:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
+  def CRC32r32m16  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i16mem:$src2),
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32_16 GR32:$src1,
+                         (load addr:$src2)))]>,
+                         OpSize;
+  def CRC32r32r16  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR16:$src2),
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
+                         OpSize;
+  def CRC32r32m32  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i32mem:$src2),
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32_32 GR32:$src1,
+                         (load addr:$src2)))]>;
+  def CRC32r32r32  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR32:$src2),
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
+  def CRC32r64m8  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i8mem:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc32_64_8 GR64:$src1,
+                         (load addr:$src2)))]>,
+                         REX_W;
+  def CRC32r64r8  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR8:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
+                         REX_W;
+  def CRC32r64m64  : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i64mem:$src2),
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc32_64_64 GR64:$src1,
+                         (load addr:$src2)))]>,
+                         REX_W;
+  def CRC32r64r64  : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR64:$src2),
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
+                         REX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic IntId128, bit Is2Addr = 1> {
+  def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       OpSize;
+  def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+// Perform One Round of an AES Encryption/Decryption Flow
+let Predicates = [HasAVX, HasAES] in {
+  defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
+                         int_x86_aesni_aesenc, 0>, VEX_4V;
+  defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
+                         int_x86_aesni_aesenclast, 0>, VEX_4V;
+  defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
+                         int_x86_aesni_aesdec, 0>, VEX_4V;
+  defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
+                         int_x86_aesni_aesdeclast, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
+                         int_x86_aesni_aesenc>;
+  defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
+                         int_x86_aesni_aesenclast>;
+  defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
+                         int_x86_aesni_aesdec>;
+  defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
+                         int_x86_aesni_aesdeclast>;
+}
+
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+          (AESENCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+          (AESENCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+          (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+          (AESENCLASTrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+          (AESDECrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+          (AESDECrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+          (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+          (AESDECLASTrm VR128:$src1, addr:$src2)>;
+
+// Perform the AES InvMixColumn Transformation
+let Predicates = [HasAVX, HasAES] in {
+  def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1),
+      "vaesimc\t{$src1, $dst|$dst, $src1}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aesimc VR128:$src1))]>,
+      OpSize, VEX;
+  def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+      (ins i128mem:$src1),
+      "vaesimc\t{$src1, $dst|$dst, $src1}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+      OpSize, VEX;
+}
+def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+  (ins VR128:$src1),
+  "aesimc\t{$src1, $dst|$dst, $src1}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aesimc VR128:$src1))]>,
+  OpSize;
+def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+  (ins i128mem:$src1),
+  "aesimc\t{$src1, $dst|$dst, $src1}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+  OpSize;
+
+// AES Round Key Generation Assist
+let Predicates = [HasAVX, HasAES] in {
+  def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, i8imm:$src2),
+      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+      OpSize, VEX;
+  def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+      (ins i128mem:$src1, i8imm:$src2),
+      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+                                        imm:$src2))]>,
+      OpSize, VEX;
+}
+def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+  (ins VR128:$src1, i8imm:$src2),
+  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+  OpSize;
+def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+  (ins i128mem:$src1, i8imm:$src2),
+  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+                                    imm:$src2))]>,
+  OpSize;
+
+//===----------------------------------------------------------------------===//
+// CLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// Carry-less Multiplication instructions
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           []>;
+
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           []>;
+}
+
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           []>;
+
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           []>;
+
+
+multiclass pclmul_alias<string asm, int immop> {
+  def : InstAlias<!strconcat("pclmul", asm, 
+                           "dq {$src, $dst|$dst, $src}"),
+                  (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+
+  def : InstAlias<!strconcat("pclmul", asm, 
+                             "dq {$src, $dst|$dst, $src}"),
+                  (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+
+  def : InstAlias<!strconcat("vpclmul", asm, 
+                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+
+  def : InstAlias<!strconcat("vpclmul", asm, 
+                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+}
+defm : pclmul_alias<"hqhq", 0x11>;
+defm : pclmul_alias<"hqlq", 0x01>;
+defm : pclmul_alias<"lqhq", 0x10>;
+defm : pclmul_alias<"lqlq", 0x00>;
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+
+// Load from memory and broadcast to all elements of the destination operand
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                    X86MemOperand x86memop, Intrinsic Int> :
+  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+        [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+def VBROADCASTSS   : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+                                   int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY  : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+                                   int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD   : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+                                   int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+                                   int_x86_avx_vbroadcastf128_pd_256>;
+
+// Insert packed floating-point values
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+          (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+
+// Extract packed floating-point values
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+          (ins VR256:$src1, i8imm:$src2),
+          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+          []>, VEX;
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+          (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+          []>, VEX;
+
+// Conditional SIMD Packed Loads and Stores
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+                          Intrinsic IntLd, Intrinsic IntLd256,
+                          Intrinsic IntSt, Intrinsic IntSt256,
+                          PatFrag pf128, PatFrag pf256> {
+  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, f128mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+             VEX_4V;
+  def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+             (ins VR256:$src1, f256mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+             VEX_4V;
+  def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
+             (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+  def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+             (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+                                 int_x86_avx_maskload_ps,
+                                 int_x86_avx_maskload_ps_256,
+                                 int_x86_avx_maskstore_ps,
+                                 int_x86_avx_maskstore_ps_256,
+                                 memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+                                 int_x86_avx_maskload_pd,
+                                 int_x86_avx_maskload_pd_256,
+                                 int_x86_avx_maskstore_pd,
+                                 int_x86_avx_maskstore_pd_256,
+                                 memopv2f64, memopv4f64>;
+
+// Permute Floating-Point Values
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+                      RegisterClass RC, X86MemOperand x86memop_f,
+                      X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+                      Intrinsic IntVar, Intrinsic IntImm> {
+  def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+             (ins RC:$src1, RC:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+             (ins RC:$src1, x86memop_i:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
+  def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+             (ins RC:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+  def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+             (ins x86memop_f:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
+}
+
+defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+                             memopv4f32, memopv4i32,
+                             int_x86_avx_vpermilvar_ps,
+                             int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+                             memopv8f32, memopv8i32,
+                             int_x86_avx_vpermilvar_ps_256,
+                             int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+                             memopv2f64, memopv2i64,
+                             int_x86_avx_vpermilvar_pd,
+                             int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+                             memopv4f64, memopv4i64,
+                             int_x86_avx_vpermilvar_pd_256,
+                             int_x86_avx_vpermil_pd_256>;
+
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+
+// Zero All YMM registers
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+                 [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
+
+// Zero Upper bits of YMM registers
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+                   [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4f32 (VEXTRACTF128rr
+                    (v8f32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2f64 (VEXTRACTF128rr
+                    (v4f64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTF128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2i64 (VEXTRACTF128rr
+                    (v4i64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+          (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+                  VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+                  VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+                  VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+                     (memopv2f64 addr:$src2), (i8 imm:$imm))),
+          (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+                     (memopv2f64 addr:$src2), (i8 imm:$imm))),
+          (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+                     (memopv4f32 addr:$src2), (i8 imm:$imm))),
+          (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+                     (memopv4f32 addr:$src2), (i8 imm:$imm))),
+          (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+                     (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+          (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+                     (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+          (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+                           (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+                           (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+          (MOVDDUPrm addr:$src)>;
+
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+          (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+          (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+          (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+          (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+          (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+          (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+          (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+          (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+          (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+          (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+          (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (UNPCKLPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+          (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+          (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+          (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (UNPCKHPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+          (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+          (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+                                   (bc_v16i8 (memopv2i64 addr:$src2)))),
+          (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+          (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+                                   (bc_v8i16 (memopv2i64 addr:$src2)))),
+          (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+          (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+                                   (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+          (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+          (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+          (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+                                   (bc_v16i8 (memopv2i64 addr:$src2)))),
+          (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+          (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+                                   (bc_v8i16 (memopv2i64 addr:$src2)))),
+          (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+          (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+                                   (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+          (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+          (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+          (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+                    (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+                    (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+          (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+                    (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+                    (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+          (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4i32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4f32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+                    (bc_v4i32 (v2i64 (load addr:$src2)))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+          (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2i64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2f64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+          (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+          (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+          (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+          (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+          (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+          (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+          (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+          (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+          (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+          (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+          (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+          (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+                    (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+
+def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; 
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+                            (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+          (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+          (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+          (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+          (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+                 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 000000000000..8278568184ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,746 @@
+//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "shl{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+                   
+let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
+def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "shl{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "shl{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "shl{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+                 "shl{b}\t$dst", []>;
+def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shl{w}\t$dst", []>, OpSize;
+def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shl{l}\t$dst", []>;
+def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+                 "shl{q}\t$dst", []>;
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst" 
+
+
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "shl{b}\t{$src, $dst|$dst, $src}",
+                [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "shl{w}\t{$src, $dst|$dst, $src}",
+               [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "shl{l}\t{$src, $dst|$dst, $src}",
+               [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+                  "shl{q}\t{$src, $dst|$dst, $src}",
+                 [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHL8m1   : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+                 "shl{b}\t$dst",
+                [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL16m1  : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+                 "shl{w}\t$dst",
+               [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+                 "shl{l}\t$dst",
+               [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+                  "shl{q}\t$dst",
+                 [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+}
+
+def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+                   "shr{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "shr{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "shr{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+                  "shr{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift right by 1
+def SHR8r1   : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+                 "shr{b}\t$dst",
+                 [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1  : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shr{w}\t$dst",
+                 [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shr{l}\t$dst",
+                 [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+                 "shr{q}\t$dst",
+                 [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+                 OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "shr{b}\t{$src, $dst|$dst, $src}",
+                [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "shr{w}\t{$src, $dst|$dst, $src}",
+               [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "shr{l}\t{$src, $dst|$dst, $src}",
+               [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+                  "shr{q}\t{$src, $dst|$dst, $src}",
+                 [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHR8m1   : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+                 "shr{b}\t$dst",
+                [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR16m1  : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+                 "shr{w}\t$dst",
+               [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+                 "shr{l}\t$dst",
+               [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+                  "shr{q}\t$dst",
+                 [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
+                 [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+}
+
+def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "sar{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "sar{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+                   OpSize;
+def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "sar{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+                    (ins GR64:$src1, i8imm:$src2),
+                    "sar{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1   : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "sar{b}\t$dst",
+                 [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1  : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+                 "sar{w}\t$dst",
+                 [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+                 "sar{l}\t$dst",
+                 [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+                 "sar{q}\t$dst",
+                 [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), 
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
+                 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "sar{b}\t{$src, $dst|$dst, $src}",
+                [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "sar{w}\t{$src, $dst|$dst, $src}",
+               [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "sar{l}\t{$src, $dst|$dst, $src}",
+               [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+                    "sar{q}\t{$src, $dst|$dst, $src}",
+                 [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SAR8m1   : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+                 "sar{b}\t$dst",
+                [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR16m1  : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+                 "sar{w}\t$dst",
+               [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+                 "sar{l}\t$dst",
+               [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+                  "sar{q}\t$dst",
+                 [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+               "rcl{b}\t$dst", []>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+  
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+                "rcl{w}\t$dst", []>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+                "rcl{l}\t$dst", []>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+                 "rcl{q}\t$dst", []>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+               "rcr{b}\t$dst", []>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+  
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+                "rcr{w}\t$dst", []>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+                "rcr{l}\t$dst", []>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+                 
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+                 "rcr{q}\t$dst", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+
+} // Constraints = "$src = $dst"
+
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+               "rcl{b}\t$dst", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+                "rcl{w}\t$dst", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+                "rcl{l}\t$dst", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+                 "rcl{q}\t$dst", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+               "rcr{b}\t$dst", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+                "rcr{w}\t$dst", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+                "rcr{l}\t$dst", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+                 "rcr{q}\t$dst", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rol{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+}
+
+def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "rol{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "rol{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, 
+                   OpSize;
+def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "rol{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "rol{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "rol{b}\t$dst",
+                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1  : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rol{w}\t$dst",
+                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rol{l}\t$dst",
+                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rol{q}\t$dst",
+                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+                   "rol{q}\t{%cl, $dst|$dst, %CL}",
+                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+                   "rol{b}\t{$src1, $dst|$dst, $src1}",
+               [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+                   "rol{w}\t{$src1, $dst|$dst, $src1}",
+              [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+                   OpSize;
+def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+                   "rol{l}\t{$src1, $dst|$dst, $src1}",
+              [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+                    "rol{q}\t{$src1, $dst|$dst, $src1}",
+                [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+
+// Rotate by 1
+def ROL8m1   : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+                 "rol{b}\t$dst",
+               [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL16m1  : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+                 "rol{w}\t$dst",
+              [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+                 "rol{l}\t$dst",
+              [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+                 "rol{q}\t$dst",
+               [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+}
+
+def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "ror{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "ror{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, 
+                   OpSize;
+def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "ror{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "ror{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "ror{b}\t$dst",
+                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+                 "ror{w}\t$dst",
+                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+                 "ror{l}\t$dst",
+                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+                  "ror{q}\t$dst",
+                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), 
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "ror{b}\t{$src, $dst|$dst, $src}",
+               [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "ror{w}\t{$src, $dst|$dst, $src}",
+              [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "ror{l}\t{$src, $dst|$dst, $src}",
+              [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+                    "ror{q}\t{$src, $dst|$dst, $src}",
+                [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Rotate by 1
+def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+                 "ror{b}\t$dst",
+               [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+                 "ror{w}\t$dst",
+              [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+                 "ror{l}\t$dst",
+              [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+                 "ror{q}\t$dst",
+               [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
+                   (ins GR16:$src1, GR16:$src2),
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+                   TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), 
+                   (ins GR16:$src1, GR16:$src2),
+                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+                   TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), 
+                   (ins GR32:$src1, GR32:$src2),
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+                   (ins GR32:$src1, GR32:$src2),
+                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), 
+                    (ins GR64:$src1, GR64:$src2),
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, 
+                    TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), 
+                    (ins GR64:$src1, GR64:$src2),
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, 
+                    TB;
+}
+
+let isCommutable = 1 in {  // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+                     (outs GR16:$dst), 
+                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+                                      (i8 imm:$src3)))]>,
+                     TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+                     (outs GR16:$dst), 
+                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+                                      (i8 imm:$src3)))]>,
+                     TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+                     (outs GR32:$dst), 
+                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+                                      (i8 imm:$src3)))]>,
+                 TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+                     (outs GR32:$dst), 
+                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+                                      (i8 imm:$src3)))]>,
+                 TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+                      (outs GR64:$dst), 
+                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+                                       (i8 imm:$src3)))]>,
+                 TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+                      (outs GR64:$dst), 
+                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+                                       (i8 imm:$src3)))]>,
+                 TB;
+}
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+                     addr:$dst)]>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                  "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                  [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+                    addr:$dst)]>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+                     addr:$dst)]>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                  "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                  [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+                    addr:$dst)]>, TB;
+                    
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+                      addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+                      addr:$dst)]>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+                    (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                    "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                    TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem, 
+                     (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                     TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+                    (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                    "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                    TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem, 
+                     (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                     TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                 TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem, 
+                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                 TB;
+
+} // Defs = [EFLAGS]
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 000000000000..31de878343ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,429 @@
+//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes.  These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, RDX] in
+  def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+  def UD2B    : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+  def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+              [(int_x86_int (i8 3))]>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+              [(int_x86_int imm:$trap)]>;
+
+
+def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRETL  : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
+def SYSRETQ  :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+               Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+                 
+def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+                Requires<[In32BitMode]>;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+                Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+             Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+//  Input/Output Instructions.
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
+               "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+               "in{w}\t{%dx, %ax|%AX, %DX}", []>,  OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+               "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+                  "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+                  "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+                  "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr  : I<0xEE, RawFrm, (outs), (ins),
+                "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+                "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+                "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+                   "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+                   "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+                   "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+def IN8  : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>,  OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), 
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), 
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; 
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; 
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; 
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+               "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+               "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+                "str{q}\t{$dst}", []>, TB;
+def STRm   : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+               "str{w}\t{$dst}", []>, TB;
+
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+             "ltr{w}\t{$src}", []>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+             "ltr{w}\t{$src}", []>, TB;
+             
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+                 "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+                 "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+                 "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+                 "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+                 "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+                 "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+                 "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+                 "push{l}\t%es", []>, Requires<[In32BitMode]>;
+                 
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{w}\t%fs", []>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{w}\t%gs", []>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{q}\t%fs", []>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{q}\t%gs", []>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+                "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+                "pop{l}\t%ss", []>        , Requires<[In32BitMode]>;
+                
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+                "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+                "pop{l}\t%ds", []>        , Requires<[In32BitMode]>;
+                
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+                "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+                "pop{l}\t%es", []>        , Requires<[In32BitMode]>;
+                
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{w}\t%fs", []>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{l}\t%fs", []>, TB    , Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{q}\t%fs", []>, TB;
+                
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{w}\t%gs", []>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{l}\t%gs", []>, TB    , Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{q}\t%gs", []>, TB;
+                 
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lds{l}\t{$src, $dst|$dst, $src}", []>;
+                
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "les{l}\t{$src, $dst|$dst, $src}", []>;
+                
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+              "verr\t$seg", []>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+              "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+              "verw\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+              "verw\t$seg", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+              "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+              "sgdt\t$dst", []>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+              "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+              "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+                "sldt{l}\t$dst", []>, TB;
+                
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+//   extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+              "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+              "lgdt\t$src", []>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+              "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+              "lidt\t$src", []>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+                "lldt{w}\t$src", []>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+                "lldt{w}\t$src", []>, TB;
+                
+//===----------------------------------------------------------------------===//
+// Specialized register support
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), 
+                "smsw{w}\t$dst", []>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), 
+                "smsw{l}\t$dst", []>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), 
+                 "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+                "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+                "lmsw{w}\t$src", []>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+                "lmsw{w}\t$src", []>, TB;
+                
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+
+let Defs = [RDX, RAX], Uses = [RCX] in
+  def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+  def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI] in
+  def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
+
+def : InstAlias<"xstorerng", (XSTORE)>;
+
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+  def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
+  def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
+  def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7;
+  def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7;
+  def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+  def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6;
+  def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+  def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 000000000000..daf61e4625d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,54 @@
+//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+// 66 0F 38 81
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmclear\t$vmcs", []>, OpSize, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+  "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+  "vmxon\t{$vmxon}", []>, XS;
+
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 000000000000..3f88fa69d0ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,574 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Valgrind.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  unsigned char *OldByte = (unsigned char *)Old;
+  *OldByte++ = 0xE9;                // Emit JMP opcode.
+  unsigned *OldWord = (unsigned *)OldByte;
+  unsigned NewAddr = (intptr_t)New;
+  unsigned OldAddr = (intptr_t)OldWord;
+  *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+
+  // X86 doesn't need to invalidate the processor cache, so just invalidate
+  // Valgrind's cache directly.
+  sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host.  This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// For ELF targets, use a .size and .type directive, to let tools
+// know the extent of functions defined in assembler.
+#if defined(__ELF__)
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+#else
+# define SIZE(sym)
+# define TYPE_FUNCTION(sym)
+#endif
+
+// Provide a convenient way for disabling usage of CFI directives.
+// This is needed for old/broken assemblers (for example, gas on
+// Darwin is pretty old and doesn't support these directives)
+#if defined(__APPLE__)
+# define CFI(x)
+#else
+// FIXME: Disable this until we really want to use it. Also, we will
+//        need to add some workarounds for compilers, which support
+//        only subset of these directives.
+# define CFI(x)
+#endif
+
+// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(X86_64_JIT)
+# ifndef _MSC_VER
+  // No need to save EAX/EDX for X86-64.
+  void X86CompilationCallback(void);
+  asm(
+    ".text\n"
+    ".align 8\n"
+    ".globl " ASMPREFIX "X86CompilationCallback\n"
+    TYPE_FUNCTION(X86CompilationCallback)
+  ASMPREFIX "X86CompilationCallback:\n"
+    CFI(".cfi_startproc\n")
+    // Save RBP
+    "pushq   %rbp\n"
+    CFI(".cfi_def_cfa_offset 16\n")
+    CFI(".cfi_offset %rbp, -16\n")
+    // Save RSP
+    "movq    %rsp, %rbp\n"
+    CFI(".cfi_def_cfa_register %rbp\n")
+    // Save all int arg registers
+    "pushq   %rdi\n"
+    CFI(".cfi_rel_offset %rdi, 0\n")
+    "pushq   %rsi\n"
+    CFI(".cfi_rel_offset %rsi, 8\n")
+    "pushq   %rdx\n"
+    CFI(".cfi_rel_offset %rdx, 16\n")
+    "pushq   %rcx\n"
+    CFI(".cfi_rel_offset %rcx, 24\n")
+    "pushq   %r8\n"
+    CFI(".cfi_rel_offset %r8, 32\n")
+    "pushq   %r9\n"
+    CFI(".cfi_rel_offset %r9, 40\n")
+    // Align stack on 16-byte boundary. ESP might not be properly aligned
+    // (8 byte) if this is called from an indirect stub.
+    "andq    $-16, %rsp\n"
+    // Save all XMM arg registers
+    "subq    $128, %rsp\n"
+    "movaps  %xmm0, (%rsp)\n"
+    "movaps  %xmm1, 16(%rsp)\n"
+    "movaps  %xmm2, 32(%rsp)\n"
+    "movaps  %xmm3, 48(%rsp)\n"
+    "movaps  %xmm4, 64(%rsp)\n"
+    "movaps  %xmm5, 80(%rsp)\n"
+    "movaps  %xmm6, 96(%rsp)\n"
+    "movaps  %xmm7, 112(%rsp)\n"
+    // JIT callee
+#ifdef _WIN64
+    "subq    $32, %rsp\n"
+    "movq    %rbp, %rcx\n"    // Pass prev frame and return address
+    "movq    8(%rbp), %rdx\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "addq    $32, %rsp\n"
+#else
+    "movq    %rbp, %rdi\n"    // Pass prev frame and return address
+    "movq    8(%rbp), %rsi\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+#endif
+    // Restore all XMM arg registers
+    "movaps  112(%rsp), %xmm7\n"
+    "movaps  96(%rsp), %xmm6\n"
+    "movaps  80(%rsp), %xmm5\n"
+    "movaps  64(%rsp), %xmm4\n"
+    "movaps  48(%rsp), %xmm3\n"
+    "movaps  32(%rsp), %xmm2\n"
+    "movaps  16(%rsp), %xmm1\n"
+    "movaps  (%rsp), %xmm0\n"
+    // Restore RSP
+    "movq    %rbp, %rsp\n"
+    CFI(".cfi_def_cfa_register %rsp\n")
+    // Restore all int arg registers
+    "subq    $48, %rsp\n"
+    CFI(".cfi_adjust_cfa_offset 48\n")
+    "popq    %r9\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %r9\n")
+    "popq    %r8\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %r8\n")
+    "popq    %rcx\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rcx\n")
+    "popq    %rdx\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rdx\n")
+    "popq    %rsi\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rsi\n")
+    "popq    %rdi\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rdi\n")
+    // Restore RBP
+    "popq    %rbp\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rbp\n")
+    "ret\n"
+    CFI(".cfi_endproc\n")
+    SIZE(X86CompilationCallback)
+  );
+# else
+  // No inline assembler support on this platform. The routine is in external
+  // file.
+  void X86CompilationCallback();
+
+# endif
+#elif defined (X86_32_JIT)
+# ifndef _MSC_VER
+  void X86CompilationCallback(void);
+  asm(
+    ".text\n"
+    ".align 8\n"
+    ".globl " ASMPREFIX "X86CompilationCallback\n"
+    TYPE_FUNCTION(X86CompilationCallback)
+  ASMPREFIX "X86CompilationCallback:\n"
+    CFI(".cfi_startproc\n")
+    "pushl   %ebp\n"
+    CFI(".cfi_def_cfa_offset 8\n")
+    CFI(".cfi_offset %ebp, -8\n")
+    "movl    %esp, %ebp\n"    // Standard prologue
+    CFI(".cfi_def_cfa_register %ebp\n")
+    "pushl   %eax\n"
+    CFI(".cfi_rel_offset %eax, 0\n")
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    CFI(".cfi_rel_offset %edx, 4\n")
+    "pushl   %ecx\n"
+    CFI(".cfi_rel_offset %ecx, 8\n")
+#  if defined(__APPLE__)
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+#  endif
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "movl    %ebp, %esp\n"    // Restore ESP
+    CFI(".cfi_def_cfa_register %esp\n")
+    "subl    $12, %esp\n"
+    CFI(".cfi_adjust_cfa_offset 12\n")
+    "popl    %ecx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ecx\n")
+    "popl    %edx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %edx\n")
+    "popl    %eax\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %eax\n")
+    "popl    %ebp\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ebp\n")
+    "ret\n"
+    CFI(".cfi_endproc\n")
+    SIZE(X86CompilationCallback)
+  );
+
+  // Same as X86CompilationCallback but also saves XMM argument registers.
+  void X86CompilationCallback_SSE(void);
+  asm(
+    ".text\n"
+    ".align 8\n"
+    ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+    TYPE_FUNCTION(X86CompilationCallback_SSE)
+  ASMPREFIX "X86CompilationCallback_SSE:\n"
+    CFI(".cfi_startproc\n")
+    "pushl   %ebp\n"
+    CFI(".cfi_def_cfa_offset 8\n")
+    CFI(".cfi_offset %ebp, -8\n")
+    "movl    %esp, %ebp\n"    // Standard prologue
+    CFI(".cfi_def_cfa_register %ebp\n")
+    "pushl   %eax\n"
+    CFI(".cfi_rel_offset %eax, 0\n")
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    CFI(".cfi_rel_offset %edx, 4\n")
+    "pushl   %ecx\n"
+    CFI(".cfi_rel_offset %ecx, 8\n")
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+    // Save all XMM arg registers
+    "subl    $64, %esp\n"
+    // FIXME: provide frame move information for xmm registers.
+    // This can be tricky, because CFA register is ebp (unaligned)
+    // and we need to produce offsets relative to it.
+    "movaps  %xmm0, (%esp)\n"
+    "movaps  %xmm1, 16(%esp)\n"
+    "movaps  %xmm2, 32(%esp)\n"
+    "movaps  %xmm3, 48(%esp)\n"
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "addl    $16, %esp\n"
+    "movaps  48(%esp), %xmm3\n"
+    CFI(".cfi_restore %xmm3\n")
+    "movaps  32(%esp), %xmm2\n"
+    CFI(".cfi_restore %xmm2\n")
+    "movaps  16(%esp), %xmm1\n"
+    CFI(".cfi_restore %xmm1\n")
+    "movaps  (%esp), %xmm0\n"
+    CFI(".cfi_restore %xmm0\n")
+    "movl    %ebp, %esp\n"    // Restore ESP
+    CFI(".cfi_def_cfa_register esp\n")
+    "subl    $12, %esp\n"
+    CFI(".cfi_adjust_cfa_offset 12\n")
+    "popl    %ecx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ecx\n")
+    "popl    %edx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %edx\n")
+    "popl    %eax\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %eax\n")
+    "popl    %ebp\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ebp\n")
+    "ret\n"
+    CFI(".cfi_endproc\n")
+    SIZE(X86CompilationCallback_SSE)
+  );
+# else
+  void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+
+  _declspec(naked) void X86CompilationCallback(void) {
+    __asm {
+      push  ebp
+      mov   ebp, esp
+      push  eax
+      push  edx
+      push  ecx
+      and   esp, -16
+      sub   esp, 16
+      mov   eax, dword ptr [ebp+4]
+      mov   dword ptr [esp+4], eax
+      mov   dword ptr [esp], ebp
+      call  X86CompilationCallback2
+      mov   esp, ebp
+      sub   esp, 12
+      pop   ecx
+      pop   edx
+      pop   eax
+      pop   ebp
+      ret
+    }
+  }
+
+# endif // _MSC_VER
+
+#else // Not an i386 host
+  void X86CompilationCallback() {
+    llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
+  }
+#endif
+}
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call.  This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" {
+#if !(defined (X86_64_JIT) && defined(_MSC_VER))
+ // the following function is called only from this translation unit,
+ // unless we are under 64bit Windows with MSC, where there is
+ // no support for inline assembly
+static
+#endif
+void LLVM_ATTRIBUTE_USED
+X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+  intptr_t *RetAddrLoc = &StackPtr[1];
+  assert(*RetAddrLoc == RetAddr &&
+         "Could not find return address on the stack!");
+
+  // It's a stub if there is an interrupt marker after the call.
+  bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
+
+  // The call instruction should have pushed the return value onto the stack...
+#if defined (X86_64_JIT)
+  RetAddr--;     // Backtrack to the reference itself...
+#else
+  RetAddr -= 4;  // Backtrack to the reference itself...
+#endif
+
+#if 0
+  DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+               << " ESP=" << (void*)StackPtr
+               << ": Resolving call to function: "
+               << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
+#endif
+
+  // Sanity check to make sure this really is a call instruction.
+#if defined (X86_64_JIT)
+  assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+  assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+  assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+  // Rewrite the call target... so that we don't end up here every time we
+  // execute the call.
+#if defined (X86_64_JIT)
+  assert(isStub &&
+         "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+         " the call instruction varies too much.");
+#else
+  *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+  if (isStub) {
+    // If this is a stub, rewrite the call into an unconditional branch
+    // instruction so that two return addresses are not pushed onto the stack
+    // when the requested function finally gets called.  This also makes the
+    // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
+#if defined (X86_64_JIT)
+    // If the target address is within 32-bit range of the stub, use a
+    // PC-relative branch instead of loading the actual address.  (This is
+    // considerably shorter than the 64-bit immediate load already there.)
+    // We assume here intptr_t is 64 bits.
+    intptr_t diff = NewVal-RetAddr+7;
+    if (diff >= -2147483648LL && diff <= 2147483647LL) {
+      *(unsigned char*)(RetAddr-0xc) = 0xE9;
+      *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
+    } else {
+      *(intptr_t *)(RetAddr - 0xa) = NewVal;
+      ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+    }
+    sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
+#else
+    ((unsigned char*)RetAddr)[-1] = 0xE9;
+    sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
+#endif
+  }
+
+  // Change the return address to reexecute the call instruction...
+#if defined (X86_64_JIT)
+  *RetAddrLoc -= 0xd;
+#else
+  *RetAddrLoc -= 5;
+#endif
+}
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  JITCompilerFunction = F;
+
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+  if (Subtarget->hasSSE1())
+    return X86CompilationCallback_SSE;
+#endif
+
+  return X86CompilationCallback;
+}
+
+X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+  Subtarget = &TM.getSubtarget<X86Subtarget>();
+  useGOT = 0;
+  TLSOffset = 0;
+}
+
+void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                             JITCodeEmitter &JCE) {
+#if defined (X86_64_JIT)
+  const unsigned Alignment = 8;
+  uint8_t Buffer[8];
+  uint8_t *Cur = Buffer;
+  MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr);
+  MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32));
+#else
+  const unsigned Alignment = 4;
+  uint8_t Buffer[4];
+  uint8_t *Cur = Buffer;
+  MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr);
+#endif
+  return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment);
+}
+
+TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
+  // The 64-bit stub contains:
+  //   movabs r10 <- 8-byte-target-address  # 10 bytes
+  //   call|jmp *r10  # 3 bytes
+  // The 32-bit stub contains a 5-byte call|jmp.
+  // If the stub is a call to the compilation callback, an extra byte is added
+  // to mark it as a stub.
+  StubLayout Result = {14, 4};
+  return Result;
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
+                                   JITCodeEmitter &JCE) {
+  // Note, we cast to intptr_t here to silence a -pedantic warning that
+  // complains about casting a function pointer to a normal pointer.
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+  bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
+                Target != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+  bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
+#endif
+  JCE.emitAlignment(4);
+  void *Result = (void*)JCE.getCurrentPCValue();
+  if (NotCC) {
+#if defined (X86_64_JIT)
+    JCE.emitByte(0x49);          // REX prefix
+    JCE.emitByte(0xB8+2);        // movabsq r10
+    JCE.emitWordLE((unsigned)(intptr_t)Target);
+    JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+    JCE.emitByte(0x41);          // REX prefix
+    JCE.emitByte(0xFF);          // jmpq *r10
+    JCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+    JCE.emitByte(0xE9);
+    JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+    return Result;
+  }
+
+#if defined (X86_64_JIT)
+  JCE.emitByte(0x49);          // REX prefix
+  JCE.emitByte(0xB8+2);        // movabsq r10
+  JCE.emitWordLE((unsigned)(intptr_t)Target);
+  JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+  JCE.emitByte(0x41);          // REX prefix
+  JCE.emitByte(0xFF);          // callq *r10
+  JCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+  JCE.emitByte(0xE8);   // Call with 32 bit pc-rel destination...
+
+  JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+
+  // This used to use 0xCD, but that value is used by JITMemoryManager to
+  // initialize the buffer with garbage, which means it may follow a
+  // noreturn function call, confusing X86CompilationCallback2.  PR 4929.
+  JCE.emitByte(0xCE);   // Interrupt - Just a marker identifying the stub!
+  return Result;
+}
+
+/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+/// specific basic block.
+uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
+#if defined(X86_64_JIT)
+  return BB - Entry;
+#else
+  return BB - PICBase;
+#endif
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+    switch ((X86::RelocationType)MR->getRelocationType()) {
+    case X86::reloc_pcrel_word: {
+      // PC relative relocation, add the relocated value to the value already in
+      // memory, after we adjust it for where the PC is.
+      ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_picrel_word: {
+      // PIC base relative relocation, add the relocated value to the value
+      // already in memory, after we adjust it for where the PIC base is.
+      ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_absolute_word:
+    case X86::reloc_absolute_word_sext:
+      // Absolute relocation, just add the relocated value to the value already
+      // in memory.
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    case X86::reloc_absolute_dword:
+      *((intptr_t*)RelocPos) += ResultPtr;
+      break;
+    }
+  }
+}
+
+char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
+#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
+  TLSOffset -= size;
+  return TLSOffset;
+#else
+  llvm_unreachable("Cannot allocate thread local storage on this arch!");
+  return 0;
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.h b/contrib/llvm/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 000000000000..238420c236b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,81 @@
+//===- X86JITInfo.h - X86 implementation of the JIT interface  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+  class X86TargetMachine;
+  class X86Subtarget;
+
+  class X86JITInfo : public TargetJITInfo {
+    X86TargetMachine &TM;
+    const X86Subtarget *Subtarget;
+    uintptr_t PICBase;
+    char* TLSOffset;
+  public:
+    explicit X86JITInfo(X86TargetMachine &tm);
+
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+    /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+    /// to emit an indirect symbol which contains the address of the specified
+    /// ptr.
+    virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                             JITCodeEmitter &JCE);
+
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on X86.
+    virtual StubLayout getStubLayout();
+
+    /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+    /// small native function that simply calls the function at the specified
+    /// address.
+    virtual void *emitFunctionStub(const Function* F, void *Target,
+                                   JITCodeEmitter &JCE);
+
+    /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+    /// specific basic block.
+    virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
+
+    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase);
+    
+    /// allocateThreadLocalMemory - Each target has its own way of
+    /// handling thread local variables. This method returns a value only
+    /// meaningful to the target.
+    virtual char* allocateThreadLocalMemory(size_t size);
+
+    /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
+    /// PIC jumptable entry.
+    void setPICBase(uintptr_t Base) { PICBase = Base; }
+    uintptr_t getPICBase() const { return PICBase; }
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
new file mode 100644
index 000000000000..ce8ef495c001
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -0,0 +1,1044 @@
+//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86FixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class X86MCCodeEmitter : public MCCodeEmitter {
+  X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
+  MCContext &Ctx;
+public:
+  X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx)
+    : MCII(mcii), STI(sti), Ctx(ctx) {
+  }
+
+  ~X86MCCodeEmitter() {}
+
+  bool is64BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+  }
+
+  static unsigned GetX86RegNum(const MCOperand &MO) {
+    return X86RegisterInfo::getX86RegNum(MO.getReg());
+  }
+
+  // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+  // 0-7 and the difference between the 2 groups is given by the REX prefix.
+  // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+  // in 1's complement form, example:
+  //
+  //  ModRM field => XMM9 => 1
+  //  VEX.VVVV    => XMM9 => ~9
+  //
+  // See table 4-35 of Intel AVX Programming Reference for details.
+  static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+                                              unsigned OpNum) {
+    unsigned SrcReg = MI.getOperand(OpNum).getReg();
+    unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+    if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
+        (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
+      SrcRegNum += 8;
+
+    // The registers represented through VEX_VVVV should
+    // be encoded in 1's complement form.
+    return (~SrcRegNum) & 0xf;
+  }
+
+  void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, CurByte, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EmitImmediate(const MCOperand &Disp,
+                     unsigned ImmSize, MCFixupKind FixupKind,
+                     unsigned &CurByte, raw_ostream &OS,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     int ImmOffset = 0) const;
+
+  inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+                                        unsigned RM) {
+    assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+    return RM | (RegOpcode << 3) | (Mod << 6);
+  }
+
+  void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
+                        unsigned &CurByte, raw_ostream &OS) const {
+    EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
+  }
+
+  void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+                   unsigned &CurByte, raw_ostream &OS) const {
+    // SIB byte is in the same format as the ModRMByte.
+    EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
+  }
+
+
+  void EmitMemModRMByte(const MCInst &MI, unsigned Op,
+                        unsigned RegOpcodeField,
+                        uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
+                        SmallVectorImpl<MCFixup> &Fixups) const;
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+                           const MCInst &MI, const MCInstrDesc &Desc,
+                           raw_ostream &OS) const;
+
+  void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
+                                 int MemOperand, const MCInst &MI,
+                                 raw_ostream &OS) const;
+
+  void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+                        const MCInst &MI, const MCInstrDesc &Desc,
+                        raw_ostream &OS) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
+                                            MCContext &Ctx) {
+  return new X86MCCodeEmitter(MCII, STI, Ctx);
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+  return Value == (signed char)Value;
+}
+
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
+  unsigned Size = X86II::getSizeOfImm(TSFlags);
+  bool isPCRel = X86II::isImmPCRel(TSFlags);
+
+  return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand.  Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+  
+  if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+    return true;
+  return false;
+}
+
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+  if (Expr->getKind() == MCExpr::Binary) {
+    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+    Expr = BE->getLHS();
+  }
+
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    return false;
+
+  const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+  const MCSymbol &S = Ref->getSymbol();
+  return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
+
+void X86MCCodeEmitter::
+EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
+              unsigned &CurByte, raw_ostream &OS,
+              SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
+  const MCExpr *Expr = NULL;
+  if (DispOp.isImm()) {
+    // If this is a simple integer displacement that doesn't require a relocation,
+    // emit it now.
+    if (FixupKind != FK_PCRel_1 &&
+	FixupKind != FK_PCRel_2 &&
+	FixupKind != FK_PCRel_4) {
+      EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+      return;
+    }
+    Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+  } else {
+    Expr = DispOp.getExpr();
+  }
+
+  // If we have an immoffset, add it to the expression.
+  if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+    assert(ImmOffset == 0);
+
+    FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+    ImmOffset = CurByte;
+  }
+
+  // If the fixup is pc-relative, we need to bias the value to be relative to
+  // the start of the field, not the end of the field.
+  if (FixupKind == FK_PCRel_4 ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
+    ImmOffset -= 4;
+  if (FixupKind == FK_PCRel_2)
+    ImmOffset -= 2;
+  if (FixupKind == FK_PCRel_1)
+    ImmOffset -= 1;
+
+  if (ImmOffset)
+    Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+                                   Ctx);
+
+  // Emit a symbolic constant as a fixup and 4 zeros.
+  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+  EmitConstant(0, Size, CurByte, OS);
+}
+
+void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
+                                        unsigned RegOpcodeField,
+                                        uint64_t TSFlags, unsigned &CurByte,
+                                        raw_ostream &OS,
+                                        SmallVectorImpl<MCFixup> &Fixups) const{
+  const MCOperand &Disp     = MI.getOperand(Op+X86::AddrDisp);
+  const MCOperand &Base     = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &Scale    = MI.getOperand(Op+X86::AddrScaleAmt);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+  unsigned BaseReg = Base.getReg();
+
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP) {    // [disp32+RIP] in X86-64 mode
+    assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode");
+    assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
+    EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+
+    unsigned FixupKind = X86::reloc_riprel_4byte;
+
+    // movq loads are handled with a special relocation form which allows the
+    // linker to eliminate some loads for GOT references which end up in the
+    // same linkage unit.
+    if (MI.getOpcode() == X86::MOV64rm)
+      FixupKind = X86::reloc_riprel_4byte_movq_load;
+
+    // rip-relative addressing is actually relative to the *next* instruction.
+    // Since an immediate can follow the mod/rm byte for an instruction, this
+    // means that we need to bias the immediate field of the instruction with
+    // the size of the immediate field.  If we have this case, add it into the
+    // expression to emit.
+    int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+
+    EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
+                  CurByte, OS, Fixups, -ImmSize);
+    return;
+  }
+
+  unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+
+  // Determine whether a SIB byte is needed.
+  // If no BaseReg, issue a RIP relative instruction only if the MCE can
+  // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+  // 2-7) and absolute references.
+
+  if (// The SIB byte must be used if there is an index register.
+      IndexReg.getReg() == 0 &&
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
+      // If there is no base register and we're in 64-bit mode, we need a SIB
+      // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+      (!is64BitMode() || BaseReg != 0)) {
+
+    if (BaseReg == 0) {          // [disp32]     in X86-32 mode
+      EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+      EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+      return;
+    }
+
+    // If the base is not EBP/ESP and there is no displacement, use simple
+    // indirect register encoding, this handles addresses like [EAX].  The
+    // encoding for [EBP] with no displacement means [disp32] so we handle it
+    // by emitting a displacement of 0 below.
+    if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
+      EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+      return;
+    }
+
+    // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+    if (Disp.isImm() && isDisp8(Disp.getImm())) {
+      EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+      EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+      return;
+    }
+
+    // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+    EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
+    return;
+  }
+
+  // We need a SIB byte, so start by outputting the ModR/M byte first
+  assert(IndexReg.getReg() != X86::ESP &&
+         IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+  bool ForceDisp32 = false;
+  bool ForceDisp8  = false;
+  if (BaseReg == 0) {
+    // If there is no base register, we emit the special case SIB byte with
+    // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+    EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp32 = true;
+  } else if (!Disp.isImm()) {
+    // Emit the normal disp32 encoding.
+    EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp32 = true;
+  } else if (Disp.getImm() == 0 &&
+             // Base reg can't be anything that ends up with '5' as the base
+             // reg, it is the magic [*] nomenclature that indicates no base.
+             BaseRegNo != N86::EBP) {
+    // Emit no displacement ModR/M byte
+    EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+  } else if (isDisp8(Disp.getImm())) {
+    // Emit the disp8 encoding.
+    EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
+  } else {
+    // Emit the normal disp32 encoding.
+    EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+  }
+
+  // Calculate what the SS field value should be...
+  static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  unsigned SS = SSTable[Scale.getImm()];
+
+  if (BaseReg == 0) {
+    // Handle the SIB byte for the case where there is no base, see Intel
+    // Manual 2A, table 2-7. The displacement has already been output.
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = GetX86RegNum(IndexReg);
+    else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+      IndexRegNo = 4;
+    EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+  } else {
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = GetX86RegNum(IndexReg);
+    else
+      IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
+    EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
+  }
+
+  // Do we need to output a displacement?
+  if (ForceDisp8)
+    EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+  else if (ForceDisp32 || Disp.getImm() != 0)
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
+}
+
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                                           int MemOperand, const MCInst &MI,
+                                           const MCInstrDesc &Desc,
+                                           raw_ostream &OS) const {
+  bool HasVEX_4V = false;
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
+    HasVEX_4V = true;
+
+  // VEX_R: opcode externsion equivalent to REX.R in
+  // 1's complement (inverted) form
+  //
+  //  1: Same as REX_R=0 (must be 1 in 32-bit mode)
+  //  0: Same as REX_R=1 (64 bit mode only)
+  //
+  unsigned char VEX_R = 0x1;
+
+  // VEX_X: equivalent to REX.X, only used when a
+  // register is used for index in SIB Byte.
+  //
+  //  1: Same as REX.X=0 (must be 1 in 32-bit mode)
+  //  0: Same as REX.X=1 (64-bit mode only)
+  unsigned char VEX_X = 0x1;
+
+  // VEX_B:
+  //
+  //  1: Same as REX_B=0 (ignored in 32-bit mode)
+  //  0: Same as REX_B=1 (64 bit mode only)
+  //
+  unsigned char VEX_B = 0x1;
+
+  // VEX_W: opcode specific (use like REX.W, or used for
+  // opcode extension, or ignored, depending on the opcode byte)
+  unsigned char VEX_W = 0;
+
+  // VEX_5M (VEX m-mmmmm field):
+  //
+  //  0b00000: Reserved for future use
+  //  0b00001: implied 0F leading opcode
+  //  0b00010: implied 0F 38 leading opcode bytes
+  //  0b00011: implied 0F 3A leading opcode bytes
+  //  0b00100-0b11111: Reserved for future use
+  //
+  unsigned char VEX_5M = 0x1;
+
+  // VEX_4V (VEX vvvv field): a register specifier
+  // (in 1's complement form) or 1111 if unused.
+  unsigned char VEX_4V = 0xf;
+
+  // VEX_L (Vector Length):
+  //
+  //  0: scalar or 128-bit vector
+  //  1: 256-bit vector
+  //
+  unsigned char VEX_L = 0;
+
+  // VEX_PP: opcode extension providing equivalent
+  // functionality of a SIMD prefix
+  //
+  //  0b00: None
+  //  0b01: 66
+  //  0b10: F3
+  //  0b11: F2
+  //
+  unsigned char VEX_PP = 0;
+
+  // Encode the operand size opcode prefix as needed.
+  if (TSFlags & X86II::OpSize)
+    VEX_PP = 0x01;
+
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+    VEX_W = 1;
+
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+    VEX_L = 1;
+
+  switch (TSFlags & X86II::Op0Mask) {
+  default: assert(0 && "Invalid prefix!");
+  case X86II::T8:  // 0F 38
+    VEX_5M = 0x2;
+    break;
+  case X86II::TA:  // 0F 3A
+    VEX_5M = 0x3;
+    break;
+  case X86II::TF:  // F2 0F 38
+    VEX_PP = 0x3;
+    VEX_5M = 0x2;
+    break;
+  case X86II::XS:  // F3 0F
+    VEX_PP = 0x2;
+    break;
+  case X86II::XD:  // F2 0F
+    VEX_PP = 0x3;
+    break;
+  case X86II::A6:  // Bypass: Not used by VEX
+  case X86II::A7:  // Bypass: Not used by VEX
+  case X86II::TB:  // Bypass: Not used by VEX
+  case 0:
+    break;  // No prefix!
+  }
+
+  // Set the vector length to 256-bit if YMM0-YMM15 is used
+  for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+    if (!MI.getOperand(i).isReg())
+      continue;
+    unsigned SrcReg = MI.getOperand(i).getReg();
+    if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+      VEX_L = 1;
+  }
+
+  unsigned NumOps = MI.getNumOperands();
+  unsigned CurOp = 0;
+  bool IsDestMem = false;
+
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMDestMem:
+    IsDestMem = true;
+    // The important info for the VEX prefix is never beyond the address
+    // registers. Don't check beyond that.
+    NumOps = CurOp = X86::AddrNumOperands;
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+  case X86II::MRMSrcMem:
+  case X86II::MRMSrcReg:
+    if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+      VEX_R = 0x0;
+    CurOp++;
+
+    if (HasVEX_4V) {
+      VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
+      CurOp++;
+    }
+
+    // To only check operands before the memory address ones, start
+    // the search from the beginning
+    if (IsDestMem)
+      CurOp = 0;
+
+    // If the last register should be encoded in the immediate field
+    // do not use any bit from VEX prefix to this register, ignore it
+    if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM)
+      NumOps--;
+
+    for (; CurOp != NumOps; ++CurOp) {
+      const MCOperand &MO = MI.getOperand(CurOp);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_B = 0x0;
+      if (!VEX_B && MO.isReg() &&
+          ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
+          X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_X = 0x0;
+    }
+    break;
+  default: // MRMDestReg, MRM0r-MRM7r, RawFrm
+    if (!MI.getNumOperands())
+      break;
+
+    if (MI.getOperand(CurOp).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+      VEX_B = 0;
+
+    if (HasVEX_4V)
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+    CurOp++;
+    for (; CurOp != NumOps; ++CurOp) {
+      const MCOperand &MO = MI.getOperand(CurOp);
+      if (MO.isReg() && !HasVEX_4V &&
+          X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_R = 0x0;
+    }
+    break;
+  }
+
+  // Emit segment override opcode prefix as needed.
+  EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+  // VEX opcode prefix can have 2 or 3 bytes
+  //
+  //  3 bytes:
+  //    +-----+ +--------------+ +-------------------+
+  //    | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+  //    +-----+ +--------------+ +-------------------+
+  //  2 bytes:
+  //    +-----+ +-------------------+
+  //    | C5h | | R | vvvv | L | pp |
+  //    +-----+ +-------------------+
+  //
+  unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+  if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+    EmitByte(0xC5, CurByte, OS);
+    EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+    return;
+  }
+
+  // 3 byte VEX prefix
+  EmitByte(0xC4, CurByte, OS);
+  EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+  EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
+/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
+                                   const MCInstrDesc &Desc) {
+  unsigned REX = 0;
+  if (TSFlags & X86II::REX_W)
+    REX |= 1 << 3; // set REX.W
+
+  if (MI.getNumOperands() == 0) return REX;
+
+  unsigned NumOps = MI.getNumOperands();
+  // FIXME: MCInst should explicitize the two-addrness.
+  bool isTwoAddr = NumOps > 1 &&
+                      Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+  // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+  unsigned i = isTwoAddr ? 1 : 0;
+  for (; i != NumOps; ++i) {
+    const MCOperand &MO = MI.getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
+    // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+    // that returns non-zero.
+    REX |= 0x40; // REX fixed encoding prefix
+    break;
+  }
+
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMSrcReg:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 2; // set REX.R
+    i = isTwoAddr ? 2 : 1;
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        REX |= 1 << 0; // set REX.B
+    }
+    break;
+  case X86II::MRMSrcMem: {
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 2; // set REX.R
+    unsigned Bit = 0;
+    i = isTwoAddr ? 2 : 1;
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+          REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
+        Bit++;
+      }
+    }
+    break;
+  }
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+  case X86II::MRMDestMem: {
+    unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+    i = isTwoAddr ? 1 : 0;
+    if (NumOps > e && MI.getOperand(e).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+      REX |= 1 << 2; // set REX.R
+    unsigned Bit = 0;
+    for (; i != e; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+          REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
+        Bit++;
+      }
+    }
+    break;
+  }
+  default:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 0; // set REX.B
+    i = isTwoAddr ? 2 : 1;
+    for (unsigned e = NumOps; i != e; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        REX |= 1 << 2; // set REX.R
+    }
+    break;
+  }
+  return REX;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
+                                        unsigned &CurByte, int MemOperand,
+                                        const MCInst &MI,
+                                        raw_ostream &OS) const {
+  switch (TSFlags & X86II::SegOvrMask) {
+  default: assert(0 && "Invalid segment!");
+  case 0:
+    // No segment override, check for explicit one on memory operand.
+    if (MemOperand != -1) {   // If the instruction has a memory operand.
+      switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+      default: assert(0 && "Unknown segment register!");
+      case 0: break;
+      case X86::CS: EmitByte(0x2E, CurByte, OS); break;
+      case X86::SS: EmitByte(0x36, CurByte, OS); break;
+      case X86::DS: EmitByte(0x3E, CurByte, OS); break;
+      case X86::ES: EmitByte(0x26, CurByte, OS); break;
+      case X86::FS: EmitByte(0x64, CurByte, OS); break;
+      case X86::GS: EmitByte(0x65, CurByte, OS); break;
+      }
+    }
+    break;
+  case X86II::FS:
+    EmitByte(0x64, CurByte, OS);
+    break;
+  case X86II::GS:
+    EmitByte(0x65, CurByte, OS);
+    break;
+  }
+}
+
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+///
+/// MemOperand is the operand # of the start of a memory operand if present.  If
+/// Not present, it is -1.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                                        int MemOperand, const MCInst &MI,
+                                        const MCInstrDesc &Desc,
+                                        raw_ostream &OS) const {
+
+  // Emit the lock opcode prefix as needed.
+  if (TSFlags & X86II::LOCK)
+    EmitByte(0xF0, CurByte, OS);
+
+  // Emit segment override opcode prefix as needed.
+  EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+  // Emit the repeat opcode prefix as needed.
+  if ((TSFlags & X86II::Op0Mask) == X86II::REP)
+    EmitByte(0xF3, CurByte, OS);
+
+  // Emit the address size opcode prefix as needed.
+  if ((TSFlags & X86II::AdSize) ||
+      (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+    EmitByte(0x67, CurByte, OS);
+  
+  // Emit the operand size opcode prefix as needed.
+  if (TSFlags & X86II::OpSize)
+    EmitByte(0x66, CurByte, OS);
+
+  bool Need0FPrefix = false;
+  switch (TSFlags & X86II::Op0Mask) {
+  default: assert(0 && "Invalid prefix!");
+  case 0: break;  // No prefix!
+  case X86II::REP: break; // already handled.
+  case X86II::TB:  // Two-byte opcode prefix
+  case X86II::T8:  // 0F 38
+  case X86II::TA:  // 0F 3A
+  case X86II::A6:  // 0F A6
+  case X86II::A7:  // 0F A7
+    Need0FPrefix = true;
+    break;
+  case X86II::TF: // F2 0F 38
+    EmitByte(0xF2, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::XS:   // F3 0F
+    EmitByte(0xF3, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::XD:   // F2 0F
+    EmitByte(0xF2, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::D8: EmitByte(0xD8, CurByte, OS); break;
+  case X86II::D9: EmitByte(0xD9, CurByte, OS); break;
+  case X86II::DA: EmitByte(0xDA, CurByte, OS); break;
+  case X86II::DB: EmitByte(0xDB, CurByte, OS); break;
+  case X86II::DC: EmitByte(0xDC, CurByte, OS); break;
+  case X86II::DD: EmitByte(0xDD, CurByte, OS); break;
+  case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
+  case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
+  }
+
+  // Handle REX prefix.
+  // FIXME: Can this come before F2 etc to simplify emission?
+  if (is64BitMode()) {
+    if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
+      EmitByte(0x40 | REX, CurByte, OS);
+  }
+
+  // 0x0F escape code must be emitted just before the opcode.
+  if (Need0FPrefix)
+    EmitByte(0x0F, CurByte, OS);
+
+  // FIXME: Pull this up into previous switch if REX can be moved earlier.
+  switch (TSFlags & X86II::Op0Mask) {
+  case X86II::TF:    // F2 0F 38
+  case X86II::T8:    // 0F 38
+    EmitByte(0x38, CurByte, OS);
+    break;
+  case X86II::TA:    // 0F 3A
+    EmitByte(0x3A, CurByte, OS);
+    break;
+  case X86II::A6:    // 0F A6
+    EmitByte(0xA6, CurByte, OS);
+    break;
+  case X86II::A7:    // 0F A7
+    EmitByte(0xA7, CurByte, OS);
+    break;
+  }
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MCII.get(Opcode);
+  uint64_t TSFlags = Desc.TSFlags;
+
+  // Pseudo instructions don't get encoded.
+  if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+    return;
+
+  // If this is a two-address instruction, skip one of the register operands.
+  // FIXME: This should be handled during MCInst lowering.
+  unsigned NumOps = Desc.getNumOperands();
+  unsigned CurOp = 0;
+  if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
+    ++CurOp;
+  else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0)
+    // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+    --NumOps;
+
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+
+  // Is this instruction encoded using the AVX VEX prefix?
+  bool HasVEXPrefix = false;
+
+  // It uses the VEX.VVVV field?
+  bool HasVEX_4V = false;
+
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
+    HasVEXPrefix = true;
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
+    HasVEX_4V = true;
+
+  
+  // Determine where the memory operand starts, if present.
+  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+  if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+  if (!HasVEXPrefix)
+    EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+  else
+    EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
+  
+  unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  
+  if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+    BaseOpcode = 0x0F;   // Weird 3DNow! encoding.
+  
+  unsigned SrcRegNum = 0;
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg:
+    assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+  default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
+    assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+  case X86II::Pseudo:
+    assert(0 && "Pseudo instruction shouldn't be emitted");
+  case X86II::RawFrm:
+    EmitByte(BaseOpcode, CurByte, OS);
+    break;
+      
+  case X86II::RawFrmImm8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+    break;
+  case X86II::RawFrmImm16:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+    break;
+
+  case X86II::AddRegFrm:
+    EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+    break;
+
+  case X86II::MRMDestReg:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(CurOp),
+                     GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
+    CurOp += 2;
+    break;
+
+  case X86II::MRMDestMem:
+    EmitByte(BaseOpcode, CurByte, OS);
+    SrcRegNum = CurOp + X86::AddrNumOperands;
+
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+      SrcRegNum++;
+
+    EmitMemModRMByte(MI, CurOp,
+                     GetX86RegNum(MI.getOperand(SrcRegNum)),
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp = SrcRegNum + 1;
+    break;
+
+  case X86II::MRMSrcReg:
+    EmitByte(BaseOpcode, CurByte, OS);
+    SrcRegNum = CurOp + 1;
+
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+      SrcRegNum++;
+
+    EmitRegModRMByte(MI.getOperand(SrcRegNum),
+                     GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+    CurOp = SrcRegNum + 1;
+    break;
+
+  case X86II::MRMSrcMem: {
+    int AddrOperands = X86::AddrNumOperands;
+    unsigned FirstMemOp = CurOp+1;
+    if (HasVEX_4V) {
+      ++AddrOperands;
+      ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
+    }
+
+    EmitByte(BaseOpcode, CurByte, OS);
+
+    EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp += AddrOperands + 1;
+    break;
+  }
+
+  case X86II::MRM0r: case X86II::MRM1r:
+  case X86II::MRM2r: case X86II::MRM3r:
+  case X86II::MRM4r: case X86II::MRM5r:
+  case X86II::MRM6r: case X86II::MRM7r:
+    if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+      CurOp++;
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(CurOp++),
+                     (TSFlags & X86II::FormMask)-X86II::MRM0r,
+                     CurByte, OS);
+    break;
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp += X86::AddrNumOperands;
+    break;
+  case X86II::MRM_C1:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC1, CurByte, OS);
+    break;
+  case X86II::MRM_C2:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC2, CurByte, OS);
+    break;
+  case X86II::MRM_C3:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC3, CurByte, OS);
+    break;
+  case X86II::MRM_C4:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC4, CurByte, OS);
+    break;
+  case X86II::MRM_C8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC8, CurByte, OS);
+    break;
+  case X86II::MRM_C9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC9, CurByte, OS);
+    break;
+  case X86II::MRM_E8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xE8, CurByte, OS);
+    break;
+  case X86II::MRM_F0:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF0, CurByte, OS);
+    break;
+  case X86II::MRM_F8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF8, CurByte, OS);
+    break;
+  case X86II::MRM_F9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF9, CurByte, OS);
+    break;
+  case X86II::MRM_D0:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xD0, CurByte, OS);
+    break;
+  case X86II::MRM_D1:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xD1, CurByte, OS);
+    break;
+  }
+
+  // If there is a remaining operand, it must be a trailing immediate.  Emit it
+  // according to the right size for the instruction.
+  if (CurOp != NumOps) {
+    // The last source register of a 4 operand instruction in AVX is encoded
+    // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+    if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+      const MCOperand &MO = MI.getOperand(CurOp++);
+      bool IsExtReg =
+        X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+      unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
+      RegNum |= GetX86RegNum(MO) << 4;
+      EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
+                    Fixups);
+    } else {
+      unsigned FixupKind;
+      // FIXME: Is there a better way to know that we need a signed relocation?
+      if (MI.getOpcode() == X86::ADD64ri32 ||
+          MI.getOpcode() == X86::MOV64ri32 ||
+          MI.getOpcode() == X86::MOV64mi32 ||
+          MI.getOpcode() == X86::PUSH64i32)
+        FixupKind = X86::reloc_signed_4byte;
+      else
+        FixupKind = getImmFixupKind(TSFlags);
+      EmitImmediate(MI.getOperand(CurOp++),
+                    X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
+                    CurByte, OS, Fixups);
+    }
+  }
+
+  if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+    EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+  
+
+#ifndef NDEBUG
+  // FIXME: Verify.
+  if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
+    errs() << "Cannot encode all operands of: ";
+    MI.dump();
+    errs() << '\n';
+    abort();
+  }
+#endif
+}
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
new file mode 100644
index 000000000000..e38533555534
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -0,0 +1,692 @@
+//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower X86 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86AsmPrinter.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+                               X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+  MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
+
+MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
+  return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
+/// operand to an MCSymbol.
+MCSymbol *X86MCInstLower::
+GetSymbolFromOperand(const MachineOperand &MO) const {
+  assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+
+  SmallString<128> Name;
+  
+  if (!MO.isGlobal()) {
+    assert(MO.isSymbol());
+    Name += MAI.getGlobalPrefix();
+    Name += MO.getSymbolName();
+  } else {    
+    const GlobalValue *GV = MO.getGlobal();
+    bool isImplicitlyPrivate = false;
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+      isImplicitlyPrivate = true;
+    
+    Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+  }
+
+  // If the target flags on the operand changes the name of the symbol, do that
+  // before we return the symbol.
+  switch (MO.getTargetFlags()) {
+  default: break;
+  case X86II::MO_DLLIMPORT: {
+    // Handle dllimport linkage.
+    const char *Prefix = "__imp_";
+    Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+    break;
+  }
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getGVStubEntry(Sym);
+    if (StubSym.getPointer() == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
+    }
+    return Sym;
+  }
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getHiddenGVStubEntry(Sym);
+    if (StubSym.getPointer() == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
+    }
+    return Sym;
+  }
+  case X86II::MO_DARWIN_STUB: {
+    Name += "$stub";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getFnStubEntry(Sym);
+    if (StubSym.getPointer())
+      return Sym;
+    
+    if (MO.isGlobal()) {
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
+    } else {
+      Name.erase(Name.end()-5, Name.end());
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+    }
+    return Sym;
+  }
+  }
+
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                             MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = 0;
+  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+  
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+  // These affect the name of the symbol, not any suffix.
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DLLIMPORT:
+  case X86II::MO_DARWIN_STUB:
+    break;
+      
+  case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
+  case X86II::MO_TLVP_PIC_BASE:
+    Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+    // Subtract the pic base.
+    Expr = MCBinaryExpr::CreateSub(Expr,
+                                  MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
+                                                           Ctx),
+                                   Ctx);
+    break;
+  case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+  case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
+  case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
+  case X86II::MO_TPOFF:     RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+  case X86II::MO_NTPOFF:    RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
+  case X86II::MO_GOTPCREL:  RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
+  case X86II::MO_GOT:       RefKind = MCSymbolRefExpr::VK_GOT; break;
+  case X86II::MO_GOTOFF:    RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
+  case X86II::MO_PLT:       RefKind = MCSymbolRefExpr::VK_PLT; break;
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+    // Subtract the pic base.
+    Expr = MCBinaryExpr::CreateSub(Expr, 
+                            MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
+                                   Ctx);
+    if (MO.isJTI() && MAI.hasSetDirective()) {
+      // If .set directive is supported, use it to reduce the number of
+      // relocations the assembler will generate for differences between
+      // local labels. This is only safe when the symbols are in the same
+      // section so we are restricting it to jumptable references.
+      MCSymbol *Label = Ctx.CreateTempSymbol();
+      AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+      Expr = MCSymbolRefExpr::Create(Label, Ctx);
+    }
+    break;
+  }
+  
+  if (Expr == 0)
+    Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+  
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+
+
+static void lower_subreg32(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg32.
+  unsigned Reg = MI->getOperand(OpNo).getReg();
+  if (Reg != 0)
+    MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32));
+}
+
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg64.
+  for (unsigned i = 0; i != 4; ++i) {
+    if (!MI->getOperand(OpNo+i).isReg()) continue;
+    
+    unsigned Reg = MI->getOperand(OpNo+i).getReg();
+    if (Reg == 0) continue;
+    
+    MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+  }
+}
+
+/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8.
+static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) {
+  OutMI.setOpcode(NewOpc);
+  lower_subreg32(&OutMI, 0);
+}
+/// LowerUnaryToTwoAddr - R = setb   -> R = sbb R, R
+static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
+  OutMI.setOpcode(NewOpc);
+  OutMI.addOperand(OutMI.getOperand(0));
+  OutMI.addOperand(OutMI.getOperand(0));
+}
+
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+  unsigned ImmOp = Inst.getNumOperands() - 1;
+  assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+         ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+           Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+          Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+  // Check whether the destination register can be fixed.
+  unsigned Reg = Inst.getOperand(0).getReg();
+  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+    return;
+
+  // If so, rewrite the instruction.
+  MCOperand Saved = Inst.getOperand(ImmOp);
+  Inst = MCInst();
+  Inst.setOpcode(Opcode);
+  Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+                                  unsigned Opcode) {
+  // Don't make these simplifications in 64-bit mode; other assemblers don't
+  // perform them because they make the code larger.
+  if (Printer.getSubtarget().is64Bit())
+    return;
+
+  bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+  unsigned AddrBase = IsStore;
+  unsigned RegOp = IsStore ? 0 : 5;
+  unsigned AddrOp = AddrBase + 3;
+  assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+         Inst.getOperand(AddrBase + 0).isReg() && // base
+         Inst.getOperand(AddrBase + 1).isImm() && // scale
+         Inst.getOperand(AddrBase + 2).isReg() && // index register
+         (Inst.getOperand(AddrOp).isExpr() ||     // address
+          Inst.getOperand(AddrOp).isImm())&&
+         Inst.getOperand(AddrBase + 4).isReg() && // segment
+         "Unexpected instruction!");
+
+  // Check whether the destination register can be fixed.
+  unsigned Reg = Inst.getOperand(RegOp).getReg();
+  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+    return;
+
+  // Check whether this is an absolute address.
+  // FIXME: We know TLVP symbol refs aren't, but there should be a better way 
+  // to do this here.
+  bool Absolute = true;
+  if (Inst.getOperand(AddrOp).isExpr()) {
+    const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
+    if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+      if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+        Absolute = false;
+  }
+  
+  if (Absolute &&
+      (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+       Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+       Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+       Inst.getOperand(AddrBase + 1).getImm() != 1))
+    return;
+
+  // If so, rewrite the instruction.
+  MCOperand Saved = Inst.getOperand(AddrOp);
+  Inst = MCInst();
+  Inst.setOpcode(Opcode);
+  Inst.addOperand(Saved);
+}
+
+void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                       MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO,
+                     AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+  
+  // Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
+  switch (OutMI.getOpcode()) {
+  case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
+    lower_lea64_32mem(&OutMI, 1);
+    // FALL THROUGH.
+  case X86::LEA64r:
+  case X86::LEA16r:
+  case X86::LEA32r:
+    // LEA should have a segment register, but it must be empty.
+    assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+           "Unexpected # of LEA operands");
+    assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+           "LEA has segment specified!");
+    break;
+  case X86::MOVZX64rr32:  LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
+  case X86::MOVZX64rm32:  LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
+  case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
+  case X86::MOVZX64rr8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+  case X86::MOVZX64rm8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+  case X86::MOVZX64rr16:  LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
+  case X86::MOVZX64rm16:  LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
+  case X86::SETB_C8r:     LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break;
+  case X86::SETB_C16r:    LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break;
+  case X86::SETB_C32r:    LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break;
+  case X86::SETB_C64r:    LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
+  case X86::MOV8r0:       LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
+  case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
+  case X86::FsFLD0SS:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::VFsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+  case X86::VFsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+  case X86::V_SET0PS:      LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+  case X86::V_SET0PD:      LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+  case X86::V_SET0PI:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::V_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+  case X86::AVX_SET0PS:    LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+  case X86::AVX_SET0PSY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+  case X86::AVX_SET0PD:    LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+  case X86::AVX_SET0PDY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+  case X86::AVX_SET0PI:    LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+
+  case X86::MOV16r0:
+    LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV16r0 -> MOV32r0
+    LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+    break;
+  case X86::MOV64r0:
+    LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV64r0 -> MOV32r0
+    LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+    break;
+
+  // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
+  // register inputs modeled as normal uses instead of implicit uses.  As such,
+  // truncate off all but the first operand (the callee).  FIXME: Change isel.
+  case X86::TAILJMPr64:
+  case X86::CALL64r:
+  case X86::CALL64pcrel32:
+  case X86::WINCALL64r:
+  case X86::WINCALL64pcrel32: {
+    unsigned Opcode = OutMI.getOpcode();
+    MCOperand Saved = OutMI.getOperand(0);
+    OutMI = MCInst();
+    OutMI.setOpcode(Opcode);
+    OutMI.addOperand(Saved);
+    break;
+  }
+
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    OutMI = MCInst();
+    OutMI.setOpcode(X86::RET);
+    break;
+  }
+
+  // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+  case X86::TAILJMPr:
+  case X86::TAILJMPd:
+  case X86::TAILJMPd64: {
+    unsigned Opcode;
+    switch (OutMI.getOpcode()) {
+    default: assert(0 && "Invalid opcode");
+    case X86::TAILJMPr: Opcode = X86::JMP32r; break;
+    case X86::TAILJMPd:
+    case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
+    }
+    
+    MCOperand Saved = OutMI.getOperand(0);
+    OutMI = MCInst();
+    OutMI.setOpcode(Opcode);
+    OutMI.addOperand(Saved);
+    break;
+  }
+
+  // These are pseudo-ops for OR to help with the OR->ADD transformation.  We do
+  // this with an ugly goto in case the resultant OR uses EAX and needs the
+  // short form.
+  case X86::ADD16rr_DB:   OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+  case X86::ADD32rr_DB:   OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+  case X86::ADD64rr_DB:   OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+  case X86::ADD16ri_DB:   OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+  case X86::ADD32ri_DB:   OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+  case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+  case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+  case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+  case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+      
+  // The assembler backend wants to see branches in their small form and relax
+  // them to their large form.  The JIT can only handle the large form because
+  // it does not do relaxation.  For now, translate the large form to the
+  // small one here.
+  case X86::JMP_4: OutMI.setOpcode(X86::JMP_1); break;
+  case X86::JO_4:  OutMI.setOpcode(X86::JO_1); break;
+  case X86::JNO_4: OutMI.setOpcode(X86::JNO_1); break;
+  case X86::JB_4:  OutMI.setOpcode(X86::JB_1); break;
+  case X86::JAE_4: OutMI.setOpcode(X86::JAE_1); break;
+  case X86::JE_4:  OutMI.setOpcode(X86::JE_1); break;
+  case X86::JNE_4: OutMI.setOpcode(X86::JNE_1); break;
+  case X86::JBE_4: OutMI.setOpcode(X86::JBE_1); break;
+  case X86::JA_4:  OutMI.setOpcode(X86::JA_1); break;
+  case X86::JS_4:  OutMI.setOpcode(X86::JS_1); break;
+  case X86::JNS_4: OutMI.setOpcode(X86::JNS_1); break;
+  case X86::JP_4:  OutMI.setOpcode(X86::JP_1); break;
+  case X86::JNP_4: OutMI.setOpcode(X86::JNP_1); break;
+  case X86::JL_4:  OutMI.setOpcode(X86::JL_1); break;
+  case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
+  case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
+  case X86::JG_4:  OutMI.setOpcode(X86::JG_1); break;
+
+  // We don't currently select the correct instruction form for instructions
+  // which have a short %eax, etc. form. Handle this by custom lowering, for
+  // now.
+  //
+  // Note, we are currently not handling the following instructions:
+  // MOV64ao8, MOV64o8a
+  // XCHG16ar, XCHG32ar, XCHG64ar
+  case X86::MOV8mr_NOREX:
+  case X86::MOV8mr:     SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
+  case X86::MOV8rm_NOREX:
+  case X86::MOV8rm:     SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+  case X86::MOV16mr:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+  case X86::MOV16rm:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+  case X86::MOV32mr:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+  case X86::MOV32rm:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
+
+  case X86::ADC8ri:     SimplifyShortImmForm(OutMI, X86::ADC8i8);    break;
+  case X86::ADC16ri:    SimplifyShortImmForm(OutMI, X86::ADC16i16);  break;
+  case X86::ADC32ri:    SimplifyShortImmForm(OutMI, X86::ADC32i32);  break;
+  case X86::ADC64ri32:  SimplifyShortImmForm(OutMI, X86::ADC64i32);  break;
+  case X86::ADD8ri:     SimplifyShortImmForm(OutMI, X86::ADD8i8);    break;
+  case X86::ADD16ri:    SimplifyShortImmForm(OutMI, X86::ADD16i16);  break;
+  case X86::ADD32ri:    SimplifyShortImmForm(OutMI, X86::ADD32i32);  break;
+  case X86::ADD64ri32:  SimplifyShortImmForm(OutMI, X86::ADD64i32);  break;
+  case X86::AND8ri:     SimplifyShortImmForm(OutMI, X86::AND8i8);    break;
+  case X86::AND16ri:    SimplifyShortImmForm(OutMI, X86::AND16i16);  break;
+  case X86::AND32ri:    SimplifyShortImmForm(OutMI, X86::AND32i32);  break;
+  case X86::AND64ri32:  SimplifyShortImmForm(OutMI, X86::AND64i32);  break;
+  case X86::CMP8ri:     SimplifyShortImmForm(OutMI, X86::CMP8i8);    break;
+  case X86::CMP16ri:    SimplifyShortImmForm(OutMI, X86::CMP16i16);  break;
+  case X86::CMP32ri:    SimplifyShortImmForm(OutMI, X86::CMP32i32);  break;
+  case X86::CMP64ri32:  SimplifyShortImmForm(OutMI, X86::CMP64i32);  break;
+  case X86::OR8ri:      SimplifyShortImmForm(OutMI, X86::OR8i8);     break;
+  case X86::OR16ri:     SimplifyShortImmForm(OutMI, X86::OR16i16);   break;
+  case X86::OR32ri:     SimplifyShortImmForm(OutMI, X86::OR32i32);   break;
+  case X86::OR64ri32:   SimplifyShortImmForm(OutMI, X86::OR64i32);   break;
+  case X86::SBB8ri:     SimplifyShortImmForm(OutMI, X86::SBB8i8);    break;
+  case X86::SBB16ri:    SimplifyShortImmForm(OutMI, X86::SBB16i16);  break;
+  case X86::SBB32ri:    SimplifyShortImmForm(OutMI, X86::SBB32i32);  break;
+  case X86::SBB64ri32:  SimplifyShortImmForm(OutMI, X86::SBB64i32);  break;
+  case X86::SUB8ri:     SimplifyShortImmForm(OutMI, X86::SUB8i8);    break;
+  case X86::SUB16ri:    SimplifyShortImmForm(OutMI, X86::SUB16i16);  break;
+  case X86::SUB32ri:    SimplifyShortImmForm(OutMI, X86::SUB32i32);  break;
+  case X86::SUB64ri32:  SimplifyShortImmForm(OutMI, X86::SUB64i32);  break;
+  case X86::TEST8ri:    SimplifyShortImmForm(OutMI, X86::TEST8i8);   break;
+  case X86::TEST16ri:   SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+  case X86::TEST32ri:   SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+  case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+  case X86::XOR8ri:     SimplifyShortImmForm(OutMI, X86::XOR8i8);    break;
+  case X86::XOR16ri:    SimplifyShortImmForm(OutMI, X86::XOR16i16);  break;
+  case X86::XOR32ri:    SimplifyShortImmForm(OutMI, X86::XOR32i32);  break;
+  case X86::XOR64ri32:  SimplifyShortImmForm(OutMI, X86::XOR64i32);  break;
+  }
+}
+
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+                         X86MCInstLower &MCInstLowering,
+                         const MachineInstr &MI) {
+  bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+  MCContext &context = OutStreamer.getContext();
+
+  if (is64Bits) {
+    MCInst prefix;
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+  }
+  MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+  const MCSymbolRefExpr *symRef =
+    MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+  MCInst LEA;
+  if (is64Bits) {
+    LEA.setOpcode(X86::LEA64r);
+    LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+    LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+    LEA.addOperand(MCOperand::CreateImm(1));        // scale
+    LEA.addOperand(MCOperand::CreateReg(0));        // index
+    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+  } else {
+    LEA.setOpcode(X86::LEA32r);
+    LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+    LEA.addOperand(MCOperand::CreateReg(0));        // base
+    LEA.addOperand(MCOperand::CreateImm(1));        // scale
+    LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+  }
+  OutStreamer.EmitInstruction(LEA);
+
+  if (is64Bits) {
+    MCInst prefix;
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+    prefix.setOpcode(X86::REX64_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+  }
+
+  MCInst call;
+  if (is64Bits)
+    call.setOpcode(X86::CALL64pcrel32);
+  else
+    call.setOpcode(X86::CALLpcrel32);
+  StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+  MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+  const MCSymbolRefExpr *tlsRef =
+    MCSymbolRefExpr::Create(tlsGetAddr,
+                            MCSymbolRefExpr::VK_PLT,
+                            context);
+
+  call.addOperand(MCOperand::CreateExpr(tlsRef));
+  OutStreamer.EmitInstruction(call);
+}
+
+void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  X86MCInstLower MCInstLowering(Mang, *MF, *this);
+  switch (MI->getOpcode()) {
+  case TargetOpcode::DBG_VALUE:
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      std::string TmpStr;
+      raw_string_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+
+  // Emit nothing here but a comment if we can.
+  case X86::Int_MemBarrier:
+    if (OutStreamer.hasRawTextSupport())
+      OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+    return;
+        
+
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    // Lower these as normal, but add some comments.
+    unsigned Reg = MI->getOperand(0).getReg();
+    OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+                           X86ATTInstPrinter::getRegisterName(Reg));
+    break;
+  }
+  case X86::TAILJMPr:
+  case X86::TAILJMPd:
+  case X86::TAILJMPd64:
+    // Lower these as normal, but add some comments.
+    OutStreamer.AddComment("TAILCALL");
+    break;
+
+  case X86::TLS_addr32:
+  case X86::TLS_addr64:
+    return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
+  case X86::MOVPC32r: {
+    MCInst TmpInst;
+    // This is a pseudo op for a two instruction sequence with a label, which
+    // looks like:
+    //     call "L1$pb"
+    // "L1$pb":
+    //     popl %esi
+    
+    // Emit the call.
+    MCSymbol *PICBase = MF->getPICBaseSymbol();
+    TmpInst.setOpcode(X86::CALLpcrel32);
+    // FIXME: We would like an efficient form for this, so we don't have to do a
+    // lot of extra uniquing.
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
+                                                                 OutContext)));
+    OutStreamer.EmitInstruction(TmpInst);
+    
+    // Emit the label.
+    OutStreamer.EmitLabel(PICBase);
+    
+    // popl $reg
+    TmpInst.setOpcode(X86::POP32r);
+    TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+      
+  case X86::ADD32ri: {
+    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+      break;
+    
+    // Okay, we have something like:
+    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+    
+    // For this, we want to print something like:
+    //   MYGLOBAL + (. - PICBASE)
+    // However, we can't generate a ".", so just emit a new label here and refer
+    // to it.
+    MCSymbol *DotSym = OutContext.CreateTempSymbol();
+    OutStreamer.EmitLabel(DotSym);
+    
+    // Now that we have emitted the label, lower the complex operand expression.
+    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
+    
+    const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+    const MCExpr *PICBase =
+      MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
+    DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+    
+    DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), 
+                                      DotExpr, OutContext);
+    
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::ADD32ri);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  }
+  
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.h b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
new file mode 100644
index 000000000000..021007239128
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
@@ -0,0 +1,52 @@
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_MCINSTLOWER_H
+#define X86_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineFunction;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+  class TargetMachine;
+  class X86AsmPrinter;
+  
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
+  MCContext &Ctx;
+  Mangler *Mang;
+  const MachineFunction &MF;
+  const TargetMachine &TM;
+  const MCAsmInfo &MAI;
+  X86AsmPrinter &AsmPrinter;
+public:
+  X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+                 X86AsmPrinter &asmprinter);
+  
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+  
+private:
+  MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp
new file mode 100644
index 000000000000..37110382379e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp
@@ -0,0 +1,554 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Object/MachOFormat.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+  void RecordScatteredRelocation(MachObjectWriter *Writer,
+                                 const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFragment *Fragment,
+                                 const MCFixup &Fixup,
+                                 MCValue Target,
+                                 unsigned Log2Size,
+                                 uint64_t &FixedValue);
+  void RecordTLVPRelocation(MachObjectWriter *Writer,
+                            const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup,
+                            MCValue Target,
+                            uint64_t &FixedValue);
+
+  void RecordX86Relocation(MachObjectWriter *Writer,
+                              const MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup,
+                              MCValue Target,
+                              uint64_t &FixedValue);
+  void RecordX86_64Relocation(MachObjectWriter *Writer,
+                              const MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup,
+                              MCValue Target,
+                              uint64_t &FixedValue);
+public:
+  X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) {
+    if (Writer->is64Bit())
+      RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                             FixedValue);
+    else
+      RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                          FixedValue);
+  }
+};
+}
+
+static bool isFixupKindRIPRel(unsigned Kind) {
+  return Kind == X86::reloc_riprel_4byte ||
+    Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  case FK_PCRel_1:
+  case FK_Data_1: return 0;
+  case FK_PCRel_2:
+  case FK_Data_2: return 1;
+  case FK_PCRel_4:
+    // FIXME: Remove these!!!
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
+                                                 const MCAssembler &Asm,
+                                                 const MCAsmLayout &Layout,
+                                                 const MCFragment *Fragment,
+                                                 const MCFixup &Fixup,
+                                                 MCValue Target,
+                                                 uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+  // See <reloc.h>.
+  uint32_t FixupOffset =
+    Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+  uint32_t FixupAddress =
+    Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+  int64_t Value = 0;
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  Value = Target.getConstant();
+
+  if (IsPCRel) {
+    // Compensate for the relocation offset, Darwin x86_64 relocations only have
+    // the addend and appear to have attempted to define it to be the actual
+    // expression addend without the PCrel bias. However, instructions with data
+    // following the relocation are not accommodated for (see comment below
+    // regarding SIGNED{1,2,4}), so it isn't exactly that either.
+    Value += 1LL << Log2Size;
+  }
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    Type = macho::RIT_X86_64_Unsigned;
+    Index = 0;
+
+    // FIXME: I believe this is broken, I don't think the linker can understand
+    // it. I think it would require a local relocation, but I'm not sure if that
+    // would work either. The official way to get an absolute PCrel relocation
+    // is to use an absolute symbol (which we don't support yet).
+    if (IsPCRel) {
+      IsExtern = 1;
+      Type = macho::RIT_X86_64_Branch;
+    }
+  } else if (Target.getSymB()) { // A - B + constant
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData &A_SD = Asm.getSymbolData(*A);
+    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+    const MCSymbol *B = &Target.getSymB()->getSymbol();
+    MCSymbolData &B_SD = Asm.getSymbolData(*B);
+    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+    // Neither symbol can be modified.
+    if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+        Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+      report_fatal_error("unsupported relocation of modified symbol");
+
+    // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+    // implement most of these correctly.
+    if (IsPCRel)
+      report_fatal_error("unsupported pc-relative relocation of difference");
+
+    // The support for the situation where one or both of the symbols would
+    // require a local relocation is handled just like if the symbols were
+    // external.  This is certainly used in the case of debug sections where the
+    // section has only temporary symbols and thus the symbols don't have base
+    // symbols.  This is encoded using the section ordinal and non-extern
+    // relocation entries.
+
+    // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
+    // single SIGNED relocation); reject it for now.  Except the case where both
+    // symbols don't have a base, equal but both NULL.
+    if (A_Base == B_Base && A_Base)
+      report_fatal_error("unsupported relocation with identical base");
+
+    Value += Writer->getSymbolAddress(&A_SD, Layout) -
+      (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
+    Value -= Writer->getSymbolAddress(&B_SD, Layout) -
+      (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+
+    if (A_Base) {
+      Index = A_Base->getIndex();
+      IsExtern = 1;
+    }
+    else {
+      Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
+    Type = macho::RIT_X86_64_Unsigned;
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = FixupOffset;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Writer->addRelocation(Fragment->getParent(), MRE);
+
+    if (B_Base) {
+      Index = B_Base->getIndex();
+      IsExtern = 1;
+    }
+    else {
+      Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
+    Type = macho::RIT_X86_64_Subtractor;
+  } else {
+    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+    const MCSymbolData *Base = Asm.getAtom(&SD);
+
+    // Relocations inside debug sections always use local relocations when
+    // possible. This seems to be done because the debugger doesn't fully
+    // understand x86_64 relocation entries, and expects to find values that
+    // have already been fixed up.
+    if (Symbol->isInSection()) {
+      const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+        Fragment->getParent()->getSection());
+      if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+        Base = 0;
+    }
+
+    // x86_64 almost always uses external relocations, except when there is no
+    // symbol to use as a base address (a local symbol with no preceding
+    // non-local symbol).
+    if (Base) {
+      Index = Base->getIndex();
+      IsExtern = 1;
+
+      // Add the local offset, if needed.
+      if (Base != &SD)
+        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+    } else if (Symbol->isInSection() && !Symbol->isVariable()) {
+      // The index is the section ordinal (1-based).
+      Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+      Value += Writer->getSymbolAddress(&SD, Layout);
+
+      if (IsPCRel)
+        Value -= FixupAddress + (1 << Log2Size);
+    } else if (Symbol->isVariable()) {
+      const MCExpr *Value = Symbol->getVariableValue();
+      int64_t Res;
+      bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+                                             Writer->getSectionAddressMap());
+      if (isAbs) {
+        FixedValue = Res;
+        return;
+      } else {
+        report_fatal_error("unsupported relocation of variable '" +
+                           Symbol->getName() + "'");
+      }
+    } else {
+      report_fatal_error("unsupported relocation of undefined symbol '" +
+                         Symbol->getName() + "'");
+    }
+
+    MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+    if (IsPCRel) {
+      if (IsRIPRel) {
+        if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+          // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+          // rewrite the movq to an leaq at link time if the symbol ends up in
+          // the same linkage unit.
+          if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+            Type = macho::RIT_X86_64_GOTLoad;
+          else
+            Type = macho::RIT_X86_64_GOT;
+        }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+          Type = macho::RIT_X86_64_TLV;
+        }  else if (Modifier != MCSymbolRefExpr::VK_None) {
+          report_fatal_error("unsupported symbol modifier in relocation");
+        } else {
+          Type = macho::RIT_X86_64_Signed;
+
+          // The Darwin x86_64 relocation format has a problem where it cannot
+          // encode an address (L<foo> + <constant>) which is outside the atom
+          // containing L<foo>. Generally, this shouldn't occur but it does
+          // happen when we have a RIPrel instruction with data following the
+          // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+          // adjustment Darwin x86_64 uses, the offset is still negative and the
+          // linker has no way to recognize this.
+          //
+          // To work around this, Darwin uses several special relocation types
+          // to indicate the offsets. However, the specification or
+          // implementation of these seems to also be incomplete; they should
+          // adjust the addend as well based on the actual encoded instruction
+          // (the additional bias), but instead appear to just look at the final
+          // offset.
+          switch (-(Target.getConstant() + (1LL << Log2Size))) {
+          case 1: Type = macho::RIT_X86_64_Signed1; break;
+          case 2: Type = macho::RIT_X86_64_Signed2; break;
+          case 4: Type = macho::RIT_X86_64_Signed4; break;
+          }
+        }
+      } else {
+        if (Modifier != MCSymbolRefExpr::VK_None)
+          report_fatal_error("unsupported symbol modifier in branch "
+                             "relocation");
+
+        Type = macho::RIT_X86_64_Branch;
+      }
+    } else {
+      if (Modifier == MCSymbolRefExpr::VK_GOT) {
+        Type = macho::RIT_X86_64_GOT;
+      } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+        // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
+        // case all we do is set the PCrel bit in the relocation entry; this is
+        // used with exception handling, for example. The source is required to
+        // include any necessary offset directly.
+        Type = macho::RIT_X86_64_GOT;
+        IsPCRel = 1;
+      } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+        report_fatal_error("TLVP symbol modifier should have been rip-rel");
+      } else if (Modifier != MCSymbolRefExpr::VK_None)
+        report_fatal_error("unsupported symbol modifier in relocation");
+      else
+        Type = macho::RIT_X86_64_Unsigned;
+    }
+  }
+
+  // x86_64 always writes custom values into the fixups.
+  FixedValue = Value;
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+                                                    const MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    const MCFragment *Fragment,
+                                                    const MCFixup &Fixup,
+                                                    MCValue Target,
+                                                    unsigned Log2Size,
+                                                    uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    //
+    // Note that there is no longer any semantic difference between these two
+    // relocation types from the linkers point of view, this is done solely for
+    // pedantic compatibility with 'as'.
+    Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+      (unsigned)macho::RIT_Generic_LocalDifference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+                                               const MCAssembler &Asm,
+                                               const MCAsmLayout &Layout,
+                                               const MCFragment *Fragment,
+                                               const MCFixup &Fixup,
+                                               MCValue Target,
+                                               uint64_t &FixedValue) {
+  assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+         !is64Bit() &&
+         "Should only be called with a 32-bit TLVP relocation!");
+
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+  uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = 0;
+
+  // Get the symbol data.
+  MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  unsigned Index = SD_A->getIndex();
+
+  // We're only going to have a second symbol in pic mode and it'll be a
+  // subtraction from the picbase. For 32-bit pic the addend is the difference
+  // between the picbase and the next address.  For 32-bit static the addend is
+  // zero.
+  if (Target.getSymB()) {
+    // If this is a subtraction then we're pcrel.
+    uint32_t FixupAddress =
+      Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+    MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+    IsPCRel = 1;
+    FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
+                  Target.getConstant());
+    FixedValue += 1ULL << Log2Size;
+  } else {
+    FixedValue = 0;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = Value;
+  MRE.Word1 = ((Index                  <<  0) |
+               (IsPCRel                << 24) |
+               (Log2Size               << 25) |
+               (1                      << 27) | // Extern
+               (macho::RIT_Generic_TLV << 28)); // Type
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
+                                              const MCAssembler &Asm,
+                                              const MCAsmLayout &Layout,
+                                              const MCFragment *Fragment,
+                                              const MCFixup &Fixup,
+                                              MCValue Target,
+                                              uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+  // If this is a 32-bit TLVP reloc it's handled a bit differently.
+  if (Target.getSymA() &&
+      Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+    RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                         FixedValue);
+    return;
+  }
+
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry. Differences always require scattered
+  // relocations.
+  if (Target.getSymB())
+    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+  // If this is an internal relocation with an offset, it also needs a scattered
+  // relocation entry.
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    //
+    // FIXME: Currently, these are never generated (see code below). I cannot
+    // find a case where they are actually emitted.
+    Type = macho::RIT_Vanilla;
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, Writer->getSectionAddressMap())) {
+        FixedValue = Res;
+        return;
+      }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (Writer->doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
+    } else {
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&SymSD);
+    }
+    if (IsPCRel)
+      FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+    Type = macho::RIT_Vanilla;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+                                                bool Is64Bit,
+                                                uint32_t CPUType,
+                                                uint32_t CPUSubtype) {
+  return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+                                                        CPUType,
+                                                        CPUSubtype),
+                                OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 000000000000..06043ecd3f30
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,135 @@
+//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+  /// ForceFramePointer - True if the function is required to use of frame
+  /// pointer for reasons other than it containing dynamic allocation or 
+  /// that FP eliminatation is turned off. For example, Cygwin main function
+  /// contains stack pointer re-alignment code which requires FP.
+  bool ForceFramePointer;
+
+  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
+  /// to the space used by the return address).
+  /// Used on windows platform for stdcall & fastcall name decoration
+  unsigned BytesToPopOnReturn;
+
+  /// ReturnAddrIndex - FrameIndex for return slot.
+  int ReturnAddrIndex;
+
+  /// TailCallReturnAddrDelta - The number of bytes by which return address
+  /// stack slot is moved as the result of tail call optimization.
+  int TailCallReturnAddrDelta;
+
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  /// ReserveFP - whether the function should reserve the frame pointer
+  /// when allocating, even if there may not actually be a frame pointer used.
+  bool ReserveFP;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// RegSaveFrameIndex - X86-64 vararg func register save area.
+  int RegSaveFrameIndex;
+  /// VarArgsGPOffset - X86-64 vararg func int reg offset.
+  unsigned VarArgsGPOffset;
+  /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
+  unsigned VarArgsFPOffset;
+
+public:
+  X86MachineFunctionInfo() : ForceFramePointer(false),
+                             CalleeSavedFrameSize(0),
+                             BytesToPopOnReturn(0),
+                             ReturnAddrIndex(0),
+                             TailCallReturnAddrDelta(0),
+                             SRetReturnReg(0),
+                             GlobalBaseReg(0),
+                             VarArgsFrameIndex(0),
+                             RegSaveFrameIndex(0),
+                             VarArgsGPOffset(0),
+                             VarArgsFPOffset(0) {}
+  
+  explicit X86MachineFunctionInfo(MachineFunction &MF)
+    : ForceFramePointer(false),
+      CalleeSavedFrameSize(0),
+      BytesToPopOnReturn(0),
+      ReturnAddrIndex(0),
+      TailCallReturnAddrDelta(0),
+      SRetReturnReg(0),
+      GlobalBaseReg(0),
+      ReserveFP(false),
+      VarArgsFrameIndex(0),
+      RegSaveFrameIndex(0),
+      VarArgsGPOffset(0),
+      VarArgsFPOffset(0) {}
+  
+  bool getForceFramePointer() const { return ForceFramePointer;} 
+  void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+  void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+  int getRAIndex() const { return ReturnAddrIndex; }
+  void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+  int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
+  void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  bool getReserveFP() const { return ReserveFP; }
+  void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+
+  int getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+  void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; }
+
+  unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; }
+  void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; }
+
+  unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
+  void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 000000000000..f2faf59367a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,973 @@
+//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "X86GenRegisterInfo.inc"
+
+using namespace llvm;
+
+cl::opt<bool>
+ForceStackAlign("force-align-stack",
+                 cl::desc("Force align the stack to the minimum alignment"
+                           " needed for the function."),
+                 cl::init(false), cl::Hidden);
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+                                 const TargetInstrInfo &tii)
+  : X86GenRegisterInfo(), TM(tm), TII(tii) {
+  // Cache some information.
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  Is64Bit = Subtarget->is64Bit();
+  IsWin64 = Subtarget->isTargetWin64();
+
+  if (Is64Bit) {
+    SlotSize = 8;
+    StackPtr = X86::RSP;
+    FramePtr = X86::RBP;
+  } else {
+    SlotSize = 4;
+    StackPtr = X86::ESP;
+    FramePtr = X86::EBP;
+  }
+}
+
+static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) {
+  if (!Subtarget->is64Bit()) {
+    if (Subtarget->isTargetDarwin()) {
+      if (isEH)
+        return DWARFFlavour::X86_32_DarwinEH;
+      else
+        return DWARFFlavour::X86_32_Generic;
+    } else if (Subtarget->isTargetCygMing()) {
+      // Unsupported by now, just quick fallback
+      return DWARFFlavour::X86_32_Generic;
+    } else {
+      return DWARFFlavour::X86_32_Generic;
+    }
+  }
+  return DWARFFlavour::X86_64;
+}
+
+/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
+/// specific numbering, used in debug info and exception tables.
+int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  unsigned Flavour = getFlavour(Subtarget, isEH);
+
+  return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
+}
+
+/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register.
+int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  unsigned Flavour = getFlavour(Subtarget, isEH);
+
+  return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
+}
+
+/// getCompactUnwindRegNum - This function maps the register to the number for
+/// compact unwind encoding. Return -1 if the register isn't valid.
+int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
+  switch (getLLVMRegNum(RegNum, isEH)) {
+  case X86::EBX: case X86::RBX: return 1;
+  case X86::ECX: case X86::R12: return 2;
+  case X86::EDX: case X86::R13: return 3;
+  case X86::EDI: case X86::R14: return 4;
+  case X86::ESI: case X86::R15: return 5;
+  case X86::EBP: case X86::RBP: return 6;
+  }
+
+  return -1;
+}
+
+int
+X86RegisterInfo::getSEHRegNum(unsigned i) const {
+  int reg = getX86RegNum(i);
+  switch (i) {
+  case X86::R8:  case X86::R8D:  case X86::R8W:  case X86::R8B:
+  case X86::R9:  case X86::R9D:  case X86::R9W:  case X86::R9B:
+  case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+  case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+  case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+  case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+  case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+  case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+  case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+  case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+  case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+  case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+    reg += 8;
+  }
+  return reg;
+}
+
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
+unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
+  switch(RegNo) {
+  case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+  case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+  case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+  case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+  case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+    return N86::ESP;
+  case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+    return N86::EBP;
+  case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+    return N86::ESI;
+  case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+    return N86::EDI;
+
+  case X86::R8:  case X86::R8D:  case X86::R8W:  case X86::R8B:
+    return N86::EAX;
+  case X86::R9:  case X86::R9D:  case X86::R9W:  case X86::R9B:
+    return N86::ECX;
+  case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+    return N86::EDX;
+  case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+    return N86::EBX;
+  case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+    return N86::ESP;
+  case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+    return N86::EBP;
+  case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+    return N86::ESI;
+  case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+    return N86::EDI;
+
+  case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+  case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+    return RegNo-X86::ST0;
+
+  case X86::XMM0: case X86::XMM8:
+  case X86::YMM0: case X86::YMM8: case X86::MM0:
+    return 0;
+  case X86::XMM1: case X86::XMM9:
+  case X86::YMM1: case X86::YMM9: case X86::MM1:
+    return 1;
+  case X86::XMM2: case X86::XMM10:
+  case X86::YMM2: case X86::YMM10: case X86::MM2:
+    return 2;
+  case X86::XMM3: case X86::XMM11:
+  case X86::YMM3: case X86::YMM11: case X86::MM3:
+    return 3;
+  case X86::XMM4: case X86::XMM12:
+  case X86::YMM4: case X86::YMM12: case X86::MM4:
+    return 4;
+  case X86::XMM5: case X86::XMM13:
+  case X86::YMM5: case X86::YMM13: case X86::MM5:
+    return 5;
+  case X86::XMM6: case X86::XMM14:
+  case X86::YMM6: case X86::YMM14: case X86::MM6:
+    return 6;
+  case X86::XMM7: case X86::XMM15:
+  case X86::YMM7: case X86::YMM15: case X86::MM7:
+    return 7;
+
+  case X86::ES: return 0;
+  case X86::CS: return 1;
+  case X86::SS: return 2;
+  case X86::DS: return 3;
+  case X86::FS: return 4;
+  case X86::GS: return 5;
+
+  case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+  case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+  case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+  case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+  case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+  case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+  case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+  case X86::CR7: case X86::CR15: case X86::DR7: return 7;
+
+  // Pseudo index registers are equivalent to a "none"
+  // scaled index (See Intel Manual 2A, table 2-3)
+  case X86::EIZ:
+  case X86::RIZ:
+    return 4;
+
+  default:
+    assert(isVirtualRegister(RegNo) && "Unknown physical register!");
+    llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
+    return 0;
+  }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                          const TargetRegisterClass *B,
+                                          unsigned SubIdx) const {
+  switch (SubIdx) {
+  default: return 0;
+  case X86::sub_8bit:
+    if (B == &X86::GR8RegClass) {
+      if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+               A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_ABCDRegClass;
+    } else if (B == &X86::GR8_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_NOREXRegClass;
+      else if (A == &X86::GR32_ABCDRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_NOREXRegClass;
+      else if (A == &X86::GR16_ABCDRegClass)
+        return &X86::GR16_ABCDRegClass;
+    }
+    break;
+  case X86::sub_8bit_hi:
+    if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass))
+      switch (A->getSize()) {
+        case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass);
+        case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass);
+        case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass);
+        default: return 0;
+      }
+    break;
+  case X86::sub_16bit:
+    if (B == &X86::GR16RegClass) {
+      if (A->getSize() == 4 || A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR16_ABCDRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+    } else if (B == &X86::GR16_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_NOREXRegClass;
+      else if (A == &X86::GR32_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+    }
+    break;
+  case X86::sub_32bit:
+    if (B == &X86::GR32RegClass) {
+      if (A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR32_NOSPRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+        return &X86::GR64_NOSPRegClass;
+      if (A->getSize() == 8)
+        return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
+    } else if (B == &X86::GR32_ABCDRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+    } else if (B == &X86::GR32_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREX_NOSPRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+    } else if (B == &X86::GR32_NOREX_NOSPRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREX_NOSPRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+    }
+    break;
+  case X86::sub_ss:
+    if (B == &X86::FR32RegClass)
+      return A;
+    break;
+  case X86::sub_sd:
+    if (B == &X86::FR64RegClass)
+      return A;
+    break;
+  case X86::sub_xmm:
+    if (B == &X86::VR128RegClass)
+      return A;
+    break;
+  }
+  return 0;
+}
+
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+  const TargetRegisterClass *Super = RC;
+  TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+  do {
+    switch (Super->getID()) {
+    case X86::GR8RegClassID:
+    case X86::GR16RegClassID:
+    case X86::GR32RegClassID:
+    case X86::GR64RegClassID:
+    case X86::FR32RegClassID:
+    case X86::FR64RegClassID:
+    case X86::RFP32RegClassID:
+    case X86::RFP64RegClassID:
+    case X86::RFP80RegClassID:
+    case X86::VR128RegClassID:
+    case X86::VR256RegClassID:
+      // Don't return a super-class that would shrink the spill size.
+      // That can happen with the vector and float classes.
+      if (Super->getSize() == RC->getSize())
+        return Super;
+    }
+    Super = *I++;
+  } while (Super);
+  return RC;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
+  switch (Kind) {
+  default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
+  case 0: // Normal GPRs.
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64RegClass;
+    return &X86::GR32RegClass;
+  case 1: // Normal GPRs except the stack pointer (for encoding reasons).
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64_NOSPRegClass;
+    return &X86::GR32_NOSPRegClass;
+  case 2: // Available for tailcall (not callee-saved GPRs).
+    if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+      return &X86::GR64_TCW64RegClass;
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64_TCRegClass;
+    return &X86::GR32_TCRegClass;
+  }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+  if (RC == &X86::CCRRegClass) {
+    if (Is64Bit)
+      return &X86::GR64RegClass;
+    else
+      return &X86::GR32RegClass;
+  }
+  return RC;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                     MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case X86::GR32RegClassID:
+    return 4 - FPDiff;
+  case X86::GR64RegClassID:
+    return 12 - FPDiff;
+  case X86::VR128RegClassID:
+    return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+  case X86::VR64RegClassID:
+    return 4;
+  }
+}
+
+const unsigned *
+X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  bool callsEHReturn = false;
+  bool ghcCall = false;
+
+  if (MF) {
+    callsEHReturn = MF->getMMI().callsEHReturn();
+    const Function *F = MF->getFunction();
+    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+  }
+
+  static const unsigned GhcCalleeSavedRegs[] = {
+    0
+  };
+
+  static const unsigned CalleeSavedRegs32Bit[] = {
+    X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
+  };
+
+  static const unsigned CalleeSavedRegs32EHRet[] = {
+    X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
+  };
+
+  static const unsigned CalleeSavedRegs64Bit[] = {
+    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+  };
+
+  static const unsigned CalleeSavedRegs64EHRet[] = {
+    X86::RAX, X86::RDX, X86::RBX, X86::R12,
+    X86::R13, X86::R14, X86::R15, X86::RBP, 0
+  };
+
+  static const unsigned CalleeSavedRegsWin64[] = {
+    X86::RBX,   X86::RBP,   X86::RDI,   X86::RSI,
+    X86::R12,   X86::R13,   X86::R14,   X86::R15,
+    X86::XMM6,  X86::XMM7,  X86::XMM8,  X86::XMM9,
+    X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
+    X86::XMM14, X86::XMM15, 0
+  };
+
+  if (ghcCall) {
+    return GhcCalleeSavedRegs;
+  } else if (Is64Bit) {
+    if (IsWin64)
+      return CalleeSavedRegsWin64;
+    else
+      return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
+  } else {
+    return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+  }
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // Set the stack-pointer register and its aliases as reserved.
+  Reserved.set(X86::RSP);
+  Reserved.set(X86::ESP);
+  Reserved.set(X86::SP);
+  Reserved.set(X86::SPL);
+
+  // Set the instruction pointer register and its aliases as reserved.
+  Reserved.set(X86::RIP);
+  Reserved.set(X86::EIP);
+  Reserved.set(X86::IP);
+
+  // Set the frame-pointer register and its aliases as reserved if needed.
+  if (TFI->hasFP(MF)) {
+    Reserved.set(X86::RBP);
+    Reserved.set(X86::EBP);
+    Reserved.set(X86::BP);
+    Reserved.set(X86::BPL);
+  }
+
+  // Mark the segment registers as reserved.
+  Reserved.set(X86::CS);
+  Reserved.set(X86::SS);
+  Reserved.set(X86::DS);
+  Reserved.set(X86::ES);
+  Reserved.set(X86::FS);
+  Reserved.set(X86::GS);
+
+  // Reserve the registers that only exist in 64-bit mode.
+  if (!Is64Bit) {
+    // These 8-bit registers are part of the x86-64 extension even though their
+    // super-registers are old 32-bits.
+    Reserved.set(X86::SIL);
+    Reserved.set(X86::DIL);
+    Reserved.set(X86::BPL);
+    Reserved.set(X86::SPL);
+
+    for (unsigned n = 0; n != 8; ++n) {
+      // R8, R9, ...
+      const unsigned GPR64[] = {
+        X86::R8,  X86::R9,  X86::R10, X86::R11,
+        X86::R12, X86::R13, X86::R14, X86::R15
+      };
+      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
+        Reserved.set(Reg);
+
+      // XMM8, XMM9, ...
+      assert(X86::XMM15 == X86::XMM8+7);
+      for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
+           ++AI)
+        Reserved.set(Reg);
+    }
+  }
+
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return (RealignStack &&
+          !MFI->hasVarSizedObjects());
+}
+
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+                               F->hasFnAttr(Attribute::StackAlignment));
+
+  // FIXME: Currently we don't support stack realignment for functions with
+  //        variable-sized allocas.
+  // FIXME: It's more complicated than this...
+  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+    report_fatal_error(
+      "Stack realignment in presence of dynamic allocas is not supported");
+
+  // If we've requested that we force align the stack do so now.
+  if (ForceStackAlign)
+    return canRealignStack(MF);
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+                                           unsigned Reg, int &FrameIdx) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (Reg == FramePtr && TFI->hasFP(MF)) {
+    FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
+    return true;
+  }
+  return false;
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::SUB64ri8;
+    return X86::SUB64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::SUB32ri8;
+    return X86::SUB32ri;
+  }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::ADD64ri8;
+    return X86::ADD64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::ADD32ri8;
+    return X86::ADD32ri;
+  }
+}
+
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+  int Opcode = I->getOpcode();
+  bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+  DebugLoc DL = I->getDebugLoc();
+  uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+  uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+  I = MBB.erase(I);
+
+  if (!reseveCallFrame) {
+    // If the stack pointer can be changed after prologue, turn the
+    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+    // adjcallstackdown instruction into 'add ESP, <amt>'
+    // TODO: consider using push / pop instead of sub + store / add
+    if (Amount == 0)
+      return;
+
+    // We need to keep the stack aligned properly.  To do this, we round the
+    // amount of space needed for the outgoing arguments up to the next
+    // alignment boundary.
+    unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+    MachineInstr *New = 0;
+    if (Opcode == TII.getCallFrameSetupOpcode()) {
+      New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+                    StackPtr)
+        .addReg(StackPtr)
+        .addImm(Amount);
+    } else {
+      assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+      // Factor out the amount the callee already popped.
+      Amount -= CalleeAmt;
+
+      if (Amount) {
+        unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+        New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+          .addReg(StackPtr).addImm(Amount);
+      }
+    }
+
+    if (New) {
+      // The EFLAGS implicit def is dead.
+      New->getOperand(3).setIsDead();
+
+      // Replace the pseudo instruction with a new instruction.
+      MBB.insert(I, New);
+    }
+
+    return;
+  }
+
+  if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
+    // If we are performing frame pointer elimination and if the callee pops
+    // something off the stack pointer, add it back.  We do this until we have
+    // more advanced stack pointer tracking ability.
+    unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+    MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+      .addReg(StackPtr).addImm(CalleeAmt);
+
+    // The EFLAGS implicit def is dead.
+    New->getOperand(3).setIsDead();
+
+    // We are not tracking the stack pointer adjustment by the callee, so make
+    // sure we restore the stack pointer immediately after the call, there may
+    // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+    MachineBasicBlock::iterator B = MBB.begin();
+    while (I != B && !llvm::prior(I)->getDesc().isCall())
+      --I;
+    MBB.insert(I, New);
+  }
+}
+
+void
+X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                     int SPAdj, RegScavenger *RS) const{
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  unsigned BasePtr;
+
+  unsigned Opc = MI.getOpcode();
+  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
+  if (needsStackRealignment(MF))
+    BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+  else if (AfterFPPop)
+    BasePtr = StackPtr;
+  else
+    BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+
+  // This must be part of a four operand memory reference.  Replace the
+  // FrameIndex with base register with EBP.  Add an offset to the offset.
+  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+  // Now add the frame object offset to the offset from EBP.
+  int FIOffset;
+  if (AfterFPPop) {
+    // Tail call jmp happens after FP is popped.
+    const MachineFrameInfo *MFI = MF.getFrameInfo();
+    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
+  } else
+    FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+
+  if (MI.getOperand(i+3).isImm()) {
+    // Offset is a 32-bit integer.
+    int Imm = (int)(MI.getOperand(i + 3).getImm());
+    int Offset = FIOffset + Imm;
+    assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+           "Requesting 64-bit offset in 32-bit immediate!");
+    MI.getOperand(i + 3).ChangeToImmediate(Offset);
+  } else {
+    // Offset is symbolic. This is extremely rare.
+    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
+    MI.getOperand(i+3).setOffset(Offset);
+  }
+}
+
+unsigned X86RegisterInfo::getRARegister() const {
+  return Is64Bit ? X86::RIP     // Should have dwarf #16.
+                 : X86::EIP;    // Should have dwarf #8.
+}
+
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  return TFI->hasFP(MF) ? FramePtr : StackPtr;
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return Reg;
+  case MVT::i8:
+    if (High) {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AH;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DH;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CH;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BH;
+      }
+    } else {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AL;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DL;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CL;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BL;
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SIL;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DIL;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BPL;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SPL;
+      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+        return X86::R8B;
+      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+        return X86::R9B;
+      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+        return X86::R10B;
+      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+        return X86::R11B;
+      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+        return X86::R12B;
+      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+        return X86::R13B;
+      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+        return X86::R14B;
+      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+        return X86::R15B;
+      }
+    }
+  case MVT::i16:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::AX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::DX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::CX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::BX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::SI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::DI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::BP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::SP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8W;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9W;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10W;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11W;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12W;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13W;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14W;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15W;
+    }
+  case MVT::i32:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::EAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::EDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::ECX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::EBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::ESI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::EDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::EBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::ESP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8D;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9D;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10D;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11D;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12D;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13D;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14D;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15D;
+    }
+  case MVT::i64:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::RAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::RDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::RCX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::RBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::RSI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::RDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::RBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::RSP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15;
+    }
+  }
+
+  return Reg;
+}
+}
+
+namespace {
+  struct MSAH : public MachineFunctionPass {
+    static char ID;
+    MSAH() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      const X86TargetMachine *TM =
+        static_cast<const X86TargetMachine *>(&MF.getTarget());
+      const TargetFrameLowering *TFI = TM->getFrameLowering();
+      MachineRegisterInfo &RI = MF.getRegInfo();
+      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned StackAlignment = TFI->getStackAlignment();
+
+      // Be over-conservative: scan over all vreg defs and find whether vector
+      // registers are used. If yes, there is a possibility that vector register
+      // will be spilled and thus require dynamic stack realignment.
+      for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+        unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+        if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
+          FuncInfo->setReserveFP(true);
+          return true;
+        }
+      }
+      // Nothing to do
+      return false;
+    }
+
+    virtual const char *getPassName() const {
+      return "X86 Maximal Stack Alignment Check";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+
+  char MSAH::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 000000000000..a12eb1297f7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,157 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "X86GenRegisterInfo.inc"
+
+namespace llvm {
+  class Type;
+  class TargetInstrInfo;
+  class X86TargetMachine;
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+  enum {
+    EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+  };
+}
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+  enum {
+    X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+  };
+} 
+  
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+  X86TargetMachine &TM;
+  const TargetInstrInfo &TII;
+
+private:
+  /// Is64Bit - Is the target 64-bits.
+  ///
+  bool Is64Bit;
+
+  /// IsWin64 - Is the target on of win64 flavours
+  ///
+  bool IsWin64;
+
+  /// SlotSize - Stack slot size in bytes.
+  ///
+  unsigned SlotSize;
+
+  /// StackPtr - X86 physical register used as stack ptr.
+  ///
+  unsigned StackPtr;
+
+  /// FramePtr - X86 physical register used as frame ptr.
+  ///
+  unsigned FramePtr;
+
+public:
+  X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+  /// getX86RegNum - Returns the native X86 register number for the given LLVM
+  /// register identifier.
+  static unsigned getX86RegNum(unsigned RegNo);
+
+  /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
+  /// (created by TableGen) for target dependencies.
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+  // FIXME: This should be tablegen'd like getDwarfRegNum is
+  int getSEHRegNum(unsigned i) const;
+
+  /// getCompactUnwindRegNum - This function maps the register to the number for
+  /// compact unwind encoding. Return -1 if the register isn't valid.
+  int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
+
+  /// Code Generation virtual methods...
+  /// 
+
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+  /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+  /// values.
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+  /// getCrossCopyRegClass - Returns a legal register class to copy a register
+  /// in the specified class to or from. Returns NULL if it is possible to copy
+  /// between a two registers of the specified class.
+  const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+
+  /// getCalleeSavedRegs - Return a null-terminated list of all of the
+  /// callee-save registers on this target.
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  /// getReservedRegs - Returns a bitset indexed by physical register number
+  /// indicating if a register is a special register that has particular uses and
+  /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+  /// register scavenger to determine what registers are free.
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  bool canRealignStack(const MachineFunction &MF) const;
+
+  bool needsStackRealignment(const MachineFunction &MF) const;
+
+  bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+                            int &FrameIdx) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+  unsigned getStackRegister() const { return StackPtr; }
+  // FIXME: Move to FrameInfok
+  unsigned getSlotSize() const { return SlotSize; }
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 000000000000..203722a66162
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,467 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Register definitions...
+//
+let Namespace = "X86" in {
+
+  // Subregister indices.
+  def sub_8bit    : SubRegIndex;
+  def sub_8bit_hi : SubRegIndex;
+  def sub_16bit   : SubRegIndex;
+  def sub_32bit   : SubRegIndex;
+
+  def sub_ss  : SubRegIndex;
+  def sub_sd  : SubRegIndex;
+  def sub_xmm : SubRegIndex;
+
+
+  // In the register alias definitions below, we define which registers alias
+  // which others.  We only specify which registers the small registers alias,
+  // because the register file generator is smart enough to figure out that
+  // AL aliases AX if we tell it that AX aliased AL (for example).
+
+  // Dwarf numbering is different for 32-bit and 64-bit, and there are
+  // variations by target as well. Currently the first entry is for X86-64,
+  // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+  // and debug information on X86-32/Darwin)
+
+  // 8-bit registers
+  // Low registers
+  def AL : Register<"al">;
+  def DL : Register<"dl">;
+  def CL : Register<"cl">;
+  def BL : Register<"bl">;
+
+  // X86-64 only, requires REX.
+  let CostPerUse = 1 in {
+  def SIL : Register<"sil">;
+  def DIL : Register<"dil">;
+  def BPL : Register<"bpl">;
+  def SPL : Register<"spl">;
+  def R8B  : Register<"r8b">;
+  def R9B  : Register<"r9b">;
+  def R10B : Register<"r10b">;
+  def R11B : Register<"r11b">;
+  def R12B : Register<"r12b">;
+  def R13B : Register<"r13b">;
+  def R14B : Register<"r14b">;
+  def R15B : Register<"r15b">;
+  }
+
+  // High registers. On x86-64, these cannot be used in any instruction
+  // with a REX prefix.
+  def AH : Register<"ah">;
+  def DH : Register<"dh">;
+  def CH : Register<"ch">;
+  def BH : Register<"bh">;
+
+  // 16-bit registers
+  let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
+  def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
+  def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
+  def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
+  def BX : RegisterWithSubRegs<"bx", [BL,BH]>;
+  }
+  let SubRegIndices = [sub_8bit] in {
+  def SI : RegisterWithSubRegs<"si", [SIL]>;
+  def DI : RegisterWithSubRegs<"di", [DIL]>;
+  def BP : RegisterWithSubRegs<"bp", [BPL]>;
+  def SP : RegisterWithSubRegs<"sp", [SPL]>;
+  }
+  def IP : Register<"ip">;
+
+  // X86-64 only, requires REX.
+  let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
+  def R8W  : RegisterWithSubRegs<"r8w", [R8B]>;
+  def R9W  : RegisterWithSubRegs<"r9w", [R9B]>;
+  def R10W : RegisterWithSubRegs<"r10w", [R10B]>;
+  def R11W : RegisterWithSubRegs<"r11w", [R11B]>;
+  def R12W : RegisterWithSubRegs<"r12w", [R12B]>;
+  def R13W : RegisterWithSubRegs<"r13w", [R13B]>;
+  def R14W : RegisterWithSubRegs<"r14w", [R14B]>;
+  def R15W : RegisterWithSubRegs<"r15w", [R15B]>;
+  }
+  // 32-bit registers
+  let SubRegIndices = [sub_16bit] in {
+  def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[-2, 0, 0]>;
+  def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[-2, 2, 2]>;
+  def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[-2, 1, 1]>;
+  def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[-2, 3, 3]>;
+  def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[-2, 6, 6]>;
+  def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[-2, 7, 7]>;
+  def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[-2, 4, 5]>;
+  def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[-2, 5, 4]>;
+  def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[-2, 8, 8]>;
+
+  // X86-64 only, requires REX
+  let CostPerUse = 1 in {
+  def R8D  : RegisterWithSubRegs<"r8d", [R8W]>;
+  def R9D  : RegisterWithSubRegs<"r9d", [R9W]>;
+  def R10D : RegisterWithSubRegs<"r10d", [R10W]>;
+  def R11D : RegisterWithSubRegs<"r11d", [R11W]>;
+  def R12D : RegisterWithSubRegs<"r12d", [R12W]>;
+  def R13D : RegisterWithSubRegs<"r13d", [R13W]>;
+  def R14D : RegisterWithSubRegs<"r14d", [R14W]>;
+  def R15D : RegisterWithSubRegs<"r15d", [R15W]>;
+  }}
+
+  // 64-bit registers, X86-64 only
+  let SubRegIndices = [sub_32bit] in {
+  def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
+  def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
+  def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
+  def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>;
+  def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>;
+  def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>;
+  def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
+  def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+  // These also require REX.
+  let CostPerUse = 1 in {
+  def R8  : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
+  def R9  : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
+  def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
+  def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>;
+  def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>;
+  def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>;
+  def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
+  def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
+  def RIP : RegisterWithSubRegs<"rip", [EIP]>,  DwarfRegNum<[16, -2, -2]>;
+  }}
+
+  // MMX Registers. These are actually aliased to ST0 .. ST7
+  def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
+  def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>;
+  def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>;
+  def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>;
+  def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>;
+  def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
+  def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
+  def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
+
+  // Pseudo Floating Point registers
+  def FP0 : Register<"fp0">;
+  def FP1 : Register<"fp1">;
+  def FP2 : Register<"fp2">;
+  def FP3 : Register<"fp3">;
+  def FP4 : Register<"fp4">;
+  def FP5 : Register<"fp5">;
+  def FP6 : Register<"fp6">;
+
+  // XMM Registers, used by the various SSE instruction set extensions.
+  // The sub_ss and sub_sd subregs are the same registers with another regclass.
+  let CompositeIndices = [(sub_ss), (sub_sd)] in {
+  def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
+  def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
+  def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
+  def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>;
+  def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>;
+  def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>;
+  def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>;
+  def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
+
+  // X86-64 only
+  let CostPerUse = 1 in {
+  def XMM8:  Register<"xmm8">,  DwarfRegNum<[25, -2, -2]>;
+  def XMM9:  Register<"xmm9">,  DwarfRegNum<[26, -2, -2]>;
+  def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
+  def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>;
+  def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>;
+  def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
+  def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
+  def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+  }}
+
+  // YMM Registers, used by AVX instructions
+  let SubRegIndices = [sub_xmm] in {
+  def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegAlias<XMM0>;
+  def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegAlias<XMM1>;
+  def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegAlias<XMM2>;
+  def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegAlias<XMM3>;
+  def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegAlias<XMM4>;
+  def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegAlias<XMM5>;
+  def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegAlias<XMM6>;
+  def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegAlias<XMM7>;
+  def YMM8:  RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegAlias<XMM8>;
+  def YMM9:  RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegAlias<XMM9>;
+  def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegAlias<XMM10>;
+  def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegAlias<XMM11>;
+  def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegAlias<XMM12>;
+  def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegAlias<XMM13>;
+  def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegAlias<XMM14>;
+  def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
+  }
+
+  class STRegister<string Name, list<Register> A> : Register<Name> {
+    let Aliases = A;
+  }
+
+  // Floating point stack registers. These don't map one-to-one to the FP
+  // pseudo registers, but we still mark them as aliasing FP registers. That
+  // way both kinds can be live without exceeding the stack depth. ST registers
+  // are only live around inline assembly.
+  def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>;
+  def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>;
+  def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>;
+  def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>;
+  def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>;
+  def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>;
+  def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
+  def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
+
+  // Status flags register
+  def EFLAGS : Register<"flags">;
+
+  // Segment registers
+  def CS : Register<"cs">;
+  def DS : Register<"ds">;
+  def SS : Register<"ss">;
+  def ES : Register<"es">;
+  def FS : Register<"fs">;
+  def GS : Register<"gs">;
+
+  // Debug registers
+  def DR0 : Register<"dr0">;
+  def DR1 : Register<"dr1">;
+  def DR2 : Register<"dr2">;
+  def DR3 : Register<"dr3">;
+  def DR4 : Register<"dr4">;
+  def DR5 : Register<"dr5">;
+  def DR6 : Register<"dr6">;
+  def DR7 : Register<"dr7">;
+
+  // Control registers
+  def CR0 : Register<"cr0">;
+  def CR1 : Register<"cr1">;
+  def CR2 : Register<"cr2">;
+  def CR3 : Register<"cr3">;
+  def CR4 : Register<"cr4">;
+  def CR5 : Register<"cr5">;
+  def CR6 : Register<"cr6">;
+  def CR7 : Register<"cr7">;
+  def CR8 : Register<"cr8">;
+  def CR9 : Register<"cr9">;
+  def CR10 : Register<"cr10">;
+  def CR11 : Register<"cr11">;
+  def CR12 : Register<"cr12">;
+  def CR13 : Register<"cr13">;
+  def CR14 : Register<"cr14">;
+  def CR15 : Register<"cr15">;
+
+  // Pseudo index registers
+  def EIZ : Register<"eiz">;
+  def RIZ : Register<"riz">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes.  The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8],  8,
+                        (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+                             R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+  let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+  }];
+}
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+                         (add AX, CX, DX, SI, DI, BX, BP, SP,
+                              R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
+}
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+                         (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+                              R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
+def GR64 : RegisterClass<"X86", [i64], 64,
+                         (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                              RBX, R14, R15, R12, R13, RBP, RSP, RIP)> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32 sub_32bit)];
+}
+
+// Segment registers for use by MOV instructions (and others) that have a
+//   segment register as one operand.  Always contain a 16-bit segment
+//   descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
+
+// Debug registers.
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>;
+
+// Control registers.
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
+}
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit)];
+}
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit),
+                       (GR32_ABCD sub_32bit)];
+}
+def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+                                                     R8, R9, R11, RIP)> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32_TC sub_32bit)];
+}
+
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+                                                      R8, R9, R11)>;
+
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+                              (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+  let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+  }];
+}
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+                               (add AX, CX, DX, SI, DI, BX, BP, SP)> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+                               (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit)];
+}
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+                            (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit),
+                       (GR32_NOREX sub_32bit)];
+}
+
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32_NOSP sub_32bit)];
+}
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+                                    (and GR32_NOREX, GR32_NOSP)> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit)];
+}
+
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+                                    (and GR64_NOREX, GR64_NOSP)> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit),
+                       (GR32_NOREX_NOSP sub_32bit)];
+}
+
+// A class to support the 'A' assembler constraint: EAX then EDX.
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit)];
+}
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
+
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values.  This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware.  In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
+  let isAllocatable = 0;
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                          128, (add FR32)> {
+  let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+}
+
+def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
+                          (sequence "YMM%u", 0, 15)> {
+  let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Relocations.h b/contrib/llvm/lib/Target/X86/X86Relocations.h
new file mode 100644
index 000000000000..990962dc4173
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,52 @@
+//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace X86 {
+    /// RelocationType - An enum for the x86 relocation codes. Note that
+    /// the terminology here doesn't follow x86 convention - word means
+    /// 32-bit and dword means 64-bit. The relocations will be treated
+    /// by JIT or ObjectCode emitters, this is transparent to the x86 code 
+    /// emitter but JIT and ObjectCode will treat them differently
+    enum RelocationType {
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
+      reloc_pcrel_word = 0,
+
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
+      reloc_picrel_word = 1,
+
+      /// reloc_absolute_word - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_word = 2,
+
+      /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+      /// value to the value already in memory. In object files, it represents a
+      /// value which must be sign-extended when resolving the relocation.
+      reloc_absolute_word_sext = 3,
+
+      /// reloc_absolute_dword - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_dword = 4
+    };
+  }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..02754f9ae503
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -0,0 +1,259 @@
+//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-selectiondag-info"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+  TargetSelectionDAGInfo(TM),
+  Subtarget(&TM.getSubtarget<X86Subtarget>()),
+  TLI(*TM.getTargetLowering()) {
+}
+
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                             SDValue Chain,
+                                             SDValue Dst, SDValue Src,
+                                             SDValue Size, unsigned Align,
+                                             bool isVolatile,
+                                         MachinePointerInfo DstPtrInfo) const {
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256)
+    return SDValue();
+  
+  // If not DWORD aligned or size is more than the threshold, call the library.
+  // The libc version is likely to be faster for these cases. It can use the
+  // address value and run time information about the CPU.
+  if ((Align & 3) != 0 ||
+      !ConstantSize ||
+      ConstantSize->getZExtValue() >
+        Subtarget->getMaxInlineSizeThreshold()) {
+    SDValue InFlag(0, 0);
+
+    // Check to see if there is a specialized entry-point for memory zeroing.
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+    if (const char *bzeroEntry =  V &&
+        V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+      EVT IntPtr = TLI.getPointerTy();
+      const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+      TargetLowering::ArgListTy Args;
+      TargetLowering::ArgListEntry Entry;
+      Entry.Node = Dst;
+      Entry.Ty = IntPtrTy;
+      Args.push_back(Entry);
+      Entry.Node = Size;
+      Args.push_back(Entry);
+      std::pair<SDValue,SDValue> CallResult =
+        TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+                        false, false, false, false,
+                        0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+                        DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+                        DAG, dl);
+      return CallResult.second;
+    }
+
+    // Otherwise have the target-independent code call memset.
+    return SDValue();
+  }
+
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  SDValue InFlag(0, 0);
+  EVT AVT;
+  SDValue Count;
+  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+  unsigned BytesLeft = 0;
+  bool TwoRepStos = false;
+  if (ValC) {
+    unsigned ValReg;
+    uint64_t Val = ValC->getZExtValue() & 255;
+
+    // If the value is a constant, then we can potentially use larger sets.
+    switch (Align & 3) {
+    case 2:   // WORD aligned
+      AVT = MVT::i16;
+      ValReg = X86::AX;
+      Val = (Val << 8) | Val;
+      break;
+    case 0:  // DWORD aligned
+      AVT = MVT::i32;
+      ValReg = X86::EAX;
+      Val = (Val << 8)  | Val;
+      Val = (Val << 16) | Val;
+      if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
+        AVT = MVT::i64;
+        ValReg = X86::RAX;
+        Val = (Val << 32) | Val;
+      }
+      break;
+    default:  // Byte aligned
+      AVT = MVT::i8;
+      ValReg = X86::AL;
+      Count = DAG.getIntPtrConstant(SizeVal);
+      break;
+    }
+
+    if (AVT.bitsGT(MVT::i8)) {
+      unsigned UBytes = AVT.getSizeInBits() / 8;
+      Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+      BytesLeft = SizeVal % UBytes;
+    }
+
+    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+                              InFlag);
+    InFlag = Chain.getValue(1);
+  } else {
+    AVT = MVT::i8;
+    Count  = DAG.getIntPtrConstant(SizeVal);
+    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+                                                              X86::ECX,
+                            Count, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+                                                              X86::EDI,
+                            Dst, InFlag);
+  InFlag = Chain.getValue(1);
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+  if (TwoRepStos) {
+    InFlag = Chain.getValue(1);
+    Count  = Size;
+    EVT CVT = Count.getValueType();
+    SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+    Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+                                                             X86::ECX,
+                              Left, InFlag);
+    InFlag = Chain.getValue(1);
+    Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+    Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+  } else if (BytesLeft) {
+    // Handle the last 1 - 7 bytes.
+    unsigned Offset = SizeVal - BytesLeft;
+    EVT AddrVT = Dst.getValueType();
+    EVT SizeVT = Size.getValueType();
+
+    Chain = DAG.getMemset(Chain, dl,
+                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+                                      DAG.getConstant(Offset, AddrVT)),
+                          Src,
+                          DAG.getConstant(BytesLeft, SizeVT),
+                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+  }
+
+  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+  return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                        SDValue Chain, SDValue Dst, SDValue Src,
+                                        SDValue Size, unsigned Align,
+                                        bool isVolatile, bool AlwaysInline,
+                                         MachinePointerInfo DstPtrInfo,
+                                         MachinePointerInfo SrcPtrInfo) const {
+  // This requires the copy size to be a constant, preferably
+  // within a subtarget-specific limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+    return SDValue();
+
+  /// If not DWORD aligned, it is more efficient to call the library.  However
+  /// if calling the library is not allowed (AlwaysInline), then soldier on as
+  /// the code generated here is better than the long load-store sequence we
+  /// would otherwise get.
+  if (!AlwaysInline && (Align & 3) != 0)
+    return SDValue();
+
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256 ||
+      SrcPtrInfo.getAddrSpace() >= 256)
+    return SDValue();
+
+  MVT AVT;
+  if (Align & 1)
+    AVT = MVT::i8;
+  else if (Align & 2)
+    AVT = MVT::i16;
+  else if (Align & 4)
+    // DWORD aligned
+    AVT = MVT::i32;
+  else
+    // QWORD aligned
+    AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+  unsigned UBytes = AVT.getSizeInBits() / 8;
+  unsigned CountVal = SizeVal / UBytes;
+  SDValue Count = DAG.getIntPtrConstant(CountVal);
+  unsigned BytesLeft = SizeVal % UBytes;
+
+  SDValue InFlag(0, 0);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+                                                              X86::ECX,
+                            Count, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+                                                              X86::EDI,
+                            Dst, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+                                                              X86::ESI,
+                            Src, InFlag);
+  InFlag = Chain.getValue(1);
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+                                array_lengthof(Ops));
+
+  SmallVector<SDValue, 4> Results;
+  Results.push_back(RepMovs);
+  if (BytesLeft) {
+    // Handle the last 1 - 7 bytes.
+    unsigned Offset = SizeVal - BytesLeft;
+    EVT DstVT = Dst.getValueType();
+    EVT SrcVT = Src.getValueType();
+    EVT SizeVT = Size.getValueType();
+    Results.push_back(DAG.getMemcpy(Chain, dl,
+                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+                                                DAG.getConstant(Offset, DstVT)),
+                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+                                                DAG.getConstant(Offset, SrcVT)),
+                                    DAG.getConstant(BytesLeft, SizeVT),
+                                    Align, isVolatile, AlwaysInline,
+                                    DstPtrInfo.getWithOffset(Offset),
+                                    SrcPtrInfo.getWithOffset(Offset)));
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &Results[0], Results.size());
+}
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
new file mode 100644
index 000000000000..d1d66fe76e94
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -0,0 +1,56 @@
+//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SELECTIONDAGINFO_H
+#define X86SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
+class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const X86Subtarget *Subtarget;
+
+  const X86TargetLowering &TLI;
+
+public:
+  explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
+  ~X86SelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile,
+                                  MachinePointerInfo DstPtrInfo) const;
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 000000000000..5e6c659e5393
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,323 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "X86Subtarget.h"
+#include "X86InstrInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyBlockAddressReference() const {
+  if (isPICStyleGOT())    // 32-bit ELF targets.
+    return X86II::MO_GOTOFF;
+  
+  if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
+    return X86II::MO_PIC_BASE_OFFSET;
+  
+  // Direct static reference to label.
+  return X86II::MO_NO_FLAG;
+}
+
+/// ClassifyGlobalReference - Classify a global variable reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+  // DLLImport only exists on windows, it is implemented as a load from a
+  // DLLIMPORT stub.
+  if (GV->hasDLLImportLinkage())
+    return X86II::MO_DLLIMPORT;
+
+  // Determine whether this is a reference to a definition or a declaration.
+  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+  // load from stub.
+  bool isDecl = GV->hasAvailableExternallyLinkage();
+  if (GV->isDeclaration() && !GV->isMaterializable())
+    isDecl = true;
+
+  // X86-64 in PIC mode.
+  if (isPICStyleRIPRel()) {
+    // Large model never uses stubs.
+    if (TM.getCodeModel() == CodeModel::Large)
+      return X86II::MO_NO_FLAG;
+      
+    if (isTargetDarwin()) {
+      // If symbol visibility is hidden, the extra load is not needed if
+      // target is x86-64 or the symbol is definitely defined in the current
+      // translation unit.
+      if (GV->hasDefaultVisibility() &&
+          (isDecl || GV->isWeakForLinker()))
+        return X86II::MO_GOTPCREL;
+    } else if (!isTargetWin64()) {
+      assert(isTargetELF() && "Unknown rip-relative target");
+
+      // Extra load is needed for all externally visible.
+      if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+        return X86II::MO_GOTPCREL;
+    }
+
+    return X86II::MO_NO_FLAG;
+  }
+  
+  if (isPICStyleGOT()) {   // 32-bit ELF targets.
+    // Extra load is needed for all externally visible.
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return X86II::MO_GOTOFF;
+    return X86II::MO_GOT;
+  }
+  
+  if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
+    // Determine whether we have a stub reference and/or whether the reference
+    // is relative to the PIC base or not.
+    
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (!isDecl && !GV->isWeakForLinker())
+      return X86II::MO_PIC_BASE_OFFSET;
+
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+    
+    // If symbol visibility is hidden, we have a stub for common symbol
+    // references and external declarations.
+    if (isDecl || GV->hasCommonLinkage()) {
+      // Hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
+    }
+    
+    // Otherwise, no stub.
+    return X86II::MO_PIC_BASE_OFFSET;
+  }
+  
+  if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
+    // Determine whether we have a stub reference.
+    
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (!isDecl && !GV->isWeakForLinker())
+      return X86II::MO_NO_FLAG;
+    
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_NONLAZY;
+
+    // Otherwise, no stub.
+    return X86II::MO_NO_FLAG;
+  }
+  
+  // Direct static reference to global.
+  return X86II::MO_NO_FLAG;
+}
+
+
+/// getBZeroEntry - This function returns the name of a function which has an
+/// interface like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+  // Darwin 10 has a __bzero entry point for this purpose.
+  if (getTargetTriple().isMacOSX() &&
+      !getTargetTriple().isMacOSXVersionLT(10, 6))
+    return "__bzero";
+
+  return 0;
+}
+
+/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+/// to immediate address.
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+  if (In64BitMode)
+    return false;
+  return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+}
+
+/// getSpecialAddressLatency - For targets where it is beneficial to
+/// backschedule instructions that compute addresses, return a value
+/// indicating the number of scheduling cycles of backscheduling that
+/// should be attempted.
+unsigned X86Subtarget::getSpecialAddressLatency() const {
+  // For x86 out-of-order targets, back-schedule address computations so
+  // that loads and stores aren't blocked.
+  // This value was chosen arbitrarily.
+  return 200;
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+  
+  if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+    return;
+
+  X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+  
+  if ((EDX >> 15) & 1) HasCMov = true;      ToggleFeature(X86::FeatureCMOV);
+  if ((EDX >> 23) & 1) X86SSELevel = MMX;   ToggleFeature(X86::FeatureMMX);
+  if ((EDX >> 25) & 1) X86SSELevel = SSE1;  ToggleFeature(X86::FeatureSSE1);
+  if ((EDX >> 26) & 1) X86SSELevel = SSE2;  ToggleFeature(X86::FeatureSSE2);
+  if (ECX & 0x1)       X86SSELevel = SSE3;  ToggleFeature(X86::FeatureSSE3);
+  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);
+  if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);
+  if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);
+  // FIXME: AVX codegen support is not ready.
+  //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX);
+
+  bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+  bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+  HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);   ToggleFeature(X86::FeatureCLMUL);
+  HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);  ToggleFeature(X86::FeatureFMA3);
+  HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT);
+  HasAES   = IsIntel && ((ECX >> 25) & 0x1);  ToggleFeature(X86::FeatureAES);
+
+  if (IsIntel || IsAMD) {
+    // Determine if bit test memory instructions are slow.
+    unsigned Family = 0;
+    unsigned Model  = 0;
+    X86_MC::DetectFamilyModel(EAX, Family, Model);
+    if (IsAMD || (Family == 6 && Model >= 13)) {
+      IsBTMemSlow = true;
+      ToggleFeature(X86::FeatureSlowBTMem);
+    }
+    // If it's Nehalem, unaligned memory access is fast.
+    if (Family == 15 && Model == 26) {
+      IsUAMemFast = true;
+      ToggleFeature(X86::FeatureFastUAMem);
+    }
+
+    X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+    if ((EDX >> 29) & 0x1) {
+      HasX86_64 = true;
+      ToggleFeature(X86::Feature64Bit);
+    }
+    if (IsAMD && ((ECX >> 6) & 0x1)) {
+      HasSSE4A = true;
+      ToggleFeature(X86::FeatureSSE4A);
+    }
+    if (IsAMD && ((ECX >> 16) & 0x1)) {
+      HasFMA4 = true;
+      ToggleFeature(X86::FeatureFMA4);
+    }
+  }
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, 
+                           unsigned StackAlignOverride, bool is64Bit)
+  : X86GenSubtargetInfo(TT, CPU, FS)
+  , PICStyle(PICStyles::None)
+  , X86SSELevel(NoMMXSSE)
+  , X863DNowLevel(NoThreeDNow)
+  , HasCMov(false)
+  , HasX86_64(false)
+  , HasPOPCNT(false)
+  , HasSSE4A(false)
+  , HasAVX(false)
+  , HasAES(false)
+  , HasCLMUL(false)
+  , HasFMA3(false)
+  , HasFMA4(false)
+  , IsBTMemSlow(false)
+  , IsUAMemFast(false)
+  , HasVectorUAMem(false)
+  , stackAlignment(8)
+  // FIXME: this is a known good value for Yonah. How about others?
+  , MaxInlineSizeThreshold(128)
+  , TargetTriple(TT)
+  , In64BitMode(is64Bit) {
+  // Determine default and user specified characteristics
+  if (!FS.empty() || !CPU.empty()) {
+    std::string CPUName = CPU;
+    if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+      CPUName = sys::getHostCPUName();
+#else
+      CPUName = "generic";
+#endif
+    }
+
+    // Make sure 64-bit features are available in 64-bit mode. (But make sure
+    // SSE2 can be turned off explicitly.)
+    std::string FullFS = FS;
+    if (In64BitMode) {
+      if (!FullFS.empty())
+        FullFS = "+64bit,+sse2," + FullFS;
+      else
+        FullFS = "+64bit,+sse2";
+    }
+
+    // If feature string is not empty, parse features string.
+    ParseSubtargetFeatures(CPUName, FullFS);
+  } else {
+    // Otherwise, use CPUID to auto-detect feature set.
+    AutoDetectSubtargetFeatures();
+
+    // Make sure 64-bit features are available in 64-bit mode.
+    if (In64BitMode) {
+      HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
+      HasCMov = true;   ToggleFeature(X86::FeatureCMOV);
+
+      if (!HasAVX && X86SSELevel < SSE2) {
+        X86SSELevel = SSE2;
+        ToggleFeature(X86::FeatureSSE1);
+        ToggleFeature(X86::FeatureSSE2);
+      }
+    }
+  }
+
+  // It's important to keep the MCSubtargetInfo feature bits in sync with
+  // target data structure which is shared with MC code emitter, etc.
+  if (In64BitMode)
+    ToggleFeature(X86::Mode64Bit);
+
+  if (HasAVX)
+    X86SSELevel = NoMMXSSE;
+    
+  DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
+               << ", 3DNowLevel " << X863DNowLevel
+               << ", 64bit " << HasX86_64 << "\n");
+  assert((!In64BitMode || HasX86_64) &&
+         "64-bit code requested on a subtarget that doesn't support it!");
+
+  // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+  // 32 and 64 bit) and for all 64-bit targets.
+  if (StackAlignOverride)
+    stackAlignment = StackAlignOverride;
+  else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+           isTargetSolaris() || In64BitMode)
+    stackAlignment = 16;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 000000000000..6d22027b7aa8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,259 @@
+//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CallingConv.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "X86GenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+class TargetMachine;
+
+/// PICStyles - The X86 backend supports a number of different styles of PIC.
+///
+namespace PICStyles {
+enum Style {
+  StubPIC,          // Used on i386-darwin in -fPIC mode.
+  StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+  GOT,              // Used on many 32-bit unices in -fPIC mode.
+  RIPRel,           // Used on X86-64 when not in -static mode.
+  None              // Set when in -static mode (not PIC or DynamicNoPIC mode).
+};
+}
+
+class X86Subtarget : public X86GenSubtargetInfo {
+protected:
+  enum X86SSEEnum {
+    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+  };
+
+  enum X863DNowEnum {
+    NoThreeDNow, ThreeDNow, ThreeDNowA
+  };
+
+  /// PICStyle - Which PIC style to use
+  ///
+  PICStyles::Style PICStyle;
+
+  /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
+  /// none supported.
+  X86SSEEnum X86SSELevel;
+
+  /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+  ///
+  X863DNowEnum X863DNowLevel;
+
+  /// HasCMov - True if this processor has conditional move instructions
+  /// (generally pentium pro+).
+  bool HasCMov;
+
+  /// HasX86_64 - True if the processor supports X86-64 instructions.
+  ///
+  bool HasX86_64;
+
+  /// HasPOPCNT - True if the processor supports POPCNT.
+  bool HasPOPCNT;
+
+  /// HasSSE4A - True if the processor supports SSE4A instructions.
+  bool HasSSE4A;
+
+  /// HasAVX - Target has AVX instructions
+  bool HasAVX;
+
+  /// HasAES - Target has AES instructions
+  bool HasAES;
+
+  /// HasCLMUL - Target has carry-less multiplication
+  bool HasCLMUL;
+
+  /// HasFMA3 - Target has 3-operand fused multiply-add
+  bool HasFMA3;
+
+  /// HasFMA4 - Target has 4-operand fused multiply-add
+  bool HasFMA4;
+
+  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+  bool IsBTMemSlow;
+
+  /// IsUAMemFast - True if unaligned memory access is fast.
+  bool IsUAMemFast;
+
+  /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+  /// operands. This may require setting a feature bit in the processor.
+  bool HasVectorUAMem;
+
+  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  unsigned stackAlignment;
+
+  /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+  ///
+  unsigned MaxInlineSizeThreshold;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
+private:
+  /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
+  bool In64BitMode;
+
+public:
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  X86Subtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS,
+               unsigned StackAlignOverride, bool is64Bit);
+
+  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// stack frame on entry to the function and which must be maintained by every
+  /// function for this subtarget.
+  unsigned getStackAlignment() const { return stackAlignment; }
+
+  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+  /// that still makes it profitable to inline the call.
+  unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+  /// instruction.
+  void AutoDetectSubtargetFeatures();
+
+  bool is64Bit() const { return In64BitMode; }
+
+  PICStyles::Style getPICStyle() const { return PICStyle; }
+  void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
+
+  bool hasCMov() const { return HasCMov; }
+  bool hasMMX() const { return X86SSELevel >= MMX; }
+  bool hasSSE1() const { return X86SSELevel >= SSE1; }
+  bool hasSSE2() const { return X86SSELevel >= SSE2; }
+  bool hasSSE3() const { return X86SSELevel >= SSE3; }
+  bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+  bool hasSSE41() const { return X86SSELevel >= SSE41; }
+  bool hasSSE42() const { return X86SSELevel >= SSE42; }
+  bool hasSSE4A() const { return HasSSE4A; }
+  bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+  bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+  bool hasPOPCNT() const { return HasPOPCNT; }
+  bool hasAVX() const { return HasAVX; }
+  bool hasXMM() const { return hasSSE1() || hasAVX(); }
+  bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
+  bool hasAES() const { return HasAES; }
+  bool hasCLMUL() const { return HasCLMUL; }
+  bool hasFMA3() const { return HasFMA3; }
+  bool hasFMA4() const { return HasFMA4; }
+  bool isBTMemSlow() const { return IsBTMemSlow; }
+  bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+  bool hasVectorUAMem() const { return HasVectorUAMem; }
+
+  const Triple &getTargetTriple() const { return TargetTriple; }
+
+  bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+  bool isTargetFreeBSD() const {
+    return TargetTriple.getOS() == Triple::FreeBSD;
+  }
+  bool isTargetSolaris() const {
+    return TargetTriple.getOS() == Triple::Solaris;
+  }
+
+  // ELF is a reasonably sane default and the only other X86 targets we
+  // support are Darwin and Windows. Just use "not those".
+  bool isTargetELF() const {
+    return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
+  }
+  bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+  bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
+  bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
+  bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
+  bool isTargetCygMing() const {
+    return isTargetMingw() || isTargetCygwin();
+  }
+
+  /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
+  bool isTargetCOFF() const {
+    return isTargetMingw() || isTargetCygwin() || isTargetWindows();
+  }
+
+  bool isTargetWin64() const {
+    return In64BitMode && (isTargetMingw() || isTargetWindows());
+  }
+
+  bool isTargetEnvMacho() const {
+    return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
+  }
+
+  bool isTargetWin32() const {
+    return !In64BitMode && (isTargetMingw() || isTargetWindows());
+  }
+
+  bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
+  bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
+  bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+
+  bool isPICStyleStubPIC() const {
+    return PICStyle == PICStyles::StubPIC;
+  }
+
+  bool isPICStyleStubNoDynamic() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC;
+  }
+  bool isPICStyleStubAny() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC ||
+           PICStyle == PICStyles::StubPIC; }
+
+  /// ClassifyGlobalReference - Classify a global variable reference for the
+  /// current subtarget according to how we should reference it in a non-pcrel
+  /// context.
+  unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+                                        const TargetMachine &TM)const;
+
+  /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+  /// current subtarget according to how we should reference it in a non-pcrel
+  /// context.
+  unsigned char ClassifyBlockAddressReference() const;
+
+  /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+  /// to immediate address.
+  bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+  /// This function returns the name of a function which has an interface
+  /// like the non-standard bzero function, if such a function exists on
+  /// the current subtarget and it is considered prefereable over
+  /// memset with zero passed as the second argument. Otherwise it
+  /// returns null.
+  const char *getBZeroEntry() const;
+
+  /// getSpecialAddressLatency - For targets where it is beneficial to
+  /// backschedule instructions that compute addresses, return a value
+  /// indicating the number of scheduling cycles of backscheduling that
+  /// should be attempted.
+  unsigned getSpecialAddressLatency() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 000000000000..9cab0e089098
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,245 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+    return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+
+  if (TheTriple.isOSWindows())
+    return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+
+  return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeX86Target() {
+  // Register the target.
+  RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
+  RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
+
+  // Register the code emitter.
+  TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
+                                      createX86MCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
+                                      createX86MCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(TheX86_32Target,
+                                     createX86_32AsmBackend);
+  TargetRegistry::RegisterAsmBackend(TheX86_64Target,
+                                     createX86_64AsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
+                                         createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
+                                         createMCStreamer);
+}
+
+
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &CPU,
+                                         const std::string &FS)
+  : X86TargetMachine(T, TT, CPU, FS, false),
+    DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+               "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
+               (getSubtargetImpl()->isTargetCygMing() ||
+                getSubtargetImpl()->isTargetWindows()) ?
+               "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
+               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &CPU, 
+                                         const std::string &FS)
+  : X86TargetMachine(T, TT, CPU, FS, true),
+    DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
+}
+
+/// X86TargetMachine ctor - Create an X86 target.
+///
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
+                                   const std::string &FS, bool is64Bit)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+    FrameLowering(*this, Subtarget),
+    ELFWriterInfo(is64Bit, true) {
+  DefRelocModel = getRelocationModel();
+
+  // If no relocation model was picked, default as appropriate for the target.
+  if (getRelocationModel() == Reloc::Default) {
+    // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+    // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+    // use static relocation model by default.
+    if (Subtarget.isTargetDarwin()) {
+      if (Subtarget.is64Bit())
+        setRelocationModel(Reloc::PIC_);
+      else
+        setRelocationModel(Reloc::DynamicNoPIC);
+    } else if (Subtarget.isTargetWin64())
+      setRelocationModel(Reloc::PIC_);
+    else
+      setRelocationModel(Reloc::Static);
+  }
+
+  assert(getRelocationModel() != Reloc::Default &&
+         "Relocation mode not picked");
+
+  // ELF and X86-64 don't have a distinct DynamicNoPIC model.  DynamicNoPIC
+  // is defined as a model for code which may be used in static or dynamic
+  // executables but not necessarily a shared library. On X86-32 we just
+  // compile in -static mode, in x86-64 we use PIC.
+  if (getRelocationModel() == Reloc::DynamicNoPIC) {
+    if (is64Bit)
+      setRelocationModel(Reloc::PIC_);
+    else if (!Subtarget.isTargetDarwin())
+      setRelocationModel(Reloc::Static);
+  }
+
+  // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+  // the Mach-O file format doesn't support it.
+  if (getRelocationModel() == Reloc::Static &&
+      Subtarget.isTargetDarwin() &&
+      is64Bit)
+    setRelocationModel(Reloc::PIC_);
+
+  // Determine the PICStyle based on the target selected.
+  if (getRelocationModel() == Reloc::Static) {
+    // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+    Subtarget.setPICStyle(PICStyles::None);
+  } else if (Subtarget.is64Bit()) {
+    // PIC in 64 bit mode is always rip-rel.
+    Subtarget.setPICStyle(PICStyles::RIPRel);
+  } else if (Subtarget.isTargetCygMing()) {
+    Subtarget.setPICStyle(PICStyles::None);
+  } else if (Subtarget.isTargetDarwin()) {
+    if (getRelocationModel() == Reloc::PIC_)
+      Subtarget.setPICStyle(PICStyles::StubPIC);
+    else {
+      assert(getRelocationModel() == Reloc::DynamicNoPIC);
+      Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
+    }
+  } else if (Subtarget.isTargetELF()) {
+    Subtarget.setPICStyle(PICStyles::GOT);
+  }
+
+  // Finally, if we have "none" as our PIC style, force to static mode.
+  if (Subtarget.getPICStyle() == PICStyles::None)
+    setRelocationModel(Reloc::Static);
+
+  // default to hard float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Hard;    
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createX86ISelDag(*this, OptLevel));
+
+  // For 32-bit, prepend instructions to set the "global base reg" for PIC.
+  if (!Subtarget.is64Bit())
+    PM.add(createGlobalBaseRegPass());
+
+  return false;
+}
+
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  PM.add(createX86MaxStackAlignmentHeuristicPass());
+  return false;  // -print-machineinstr shouldn't print after this.
+}
+
+bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createX86FloatingPointStackifierPass());
+  return true;  // -print-machineinstr should print after this.
+}
+
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+    PM.add(createSSEDomainFixPass());
+    return true;
+  }
+  return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      JITCodeEmitter &JCE) {
+  // FIXME: Move this to TargetJITInfo!
+  // On Darwin, do not override 64-bit setting made in X86TargetMachine().
+  if (DefRelocModel == Reloc::Default &&
+      (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
+    setRelocationModel(Reloc::Static);
+    Subtarget.setPICStyle(PICStyles::None);
+  }
+
+
+  PM.add(createX86JITCodeEmitterPass(*this, JCE));
+
+  return false;
+}
+
+void X86TargetMachine::setCodeModelForStatic() {
+
+    if (getCodeModel() != CodeModel::Default) return;
+
+    // For static codegen, if we're not already set, use Small codegen.
+    setCodeModel(CodeModel::Small);
+}
+
+
+void X86TargetMachine::setCodeModelForJIT() {
+
+  if (getCodeModel() != CodeModel::Default) return;
+
+  // 64-bit JIT places everything in the same buffer except external functions.
+  if (Subtarget.is64Bit())
+    setCodeModel(CodeModel::Large);
+  else
+    setCodeModel(CodeModel::Small);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 000000000000..885334a365fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,135 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "X86.h"
+#include "X86ELFWriterInfo.h"
+#include "X86InstrInfo.h"
+#include "X86ISelLowering.h"
+#include "X86FrameLowering.h"
+#include "X86JITInfo.h"
+#include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  
+class formatted_raw_ostream;
+
+class X86TargetMachine : public LLVMTargetMachine {
+  X86Subtarget      Subtarget;
+  X86FrameLowering  FrameLowering;
+  X86ELFWriterInfo  ELFWriterInfo;
+  Reloc::Model      DefRelocModel; // Reloc model before it's overridden.
+
+private:
+  // We have specific defaults for X86.
+  virtual void setCodeModelForJIT();
+  virtual void setCodeModelForStatic();
+  
+public:
+  X86TargetMachine(const Target &T, const std::string &TT, 
+                   const std::string &CPU, const std::string &FS,
+                   bool is64Bit);
+
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    llvm_unreachable("getInstrInfo not implemented");
+  }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    llvm_unreachable("getJITInfo not implemented");
+  }
+  virtual const X86Subtarget     *getSubtargetImpl() const{ return &Subtarget; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    llvm_unreachable("getTargetLowering not implemented");
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    llvm_unreachable("getSelectionDAGInfo not implemented");
+  }
+  virtual const X86RegisterInfo  *getRegisterInfo() const {
+    return &getInstrInfo()->getRegisterInfo();
+  }
+  virtual const X86ELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
+
+  // Set up the pass pipeline.
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &JCE);
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
+public:
+  X86_32TargetMachine(const Target &T, const std::string &M,
+                      const std::string &CPU, const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
+public:
+  X86_64TargetMachine(const Target &T, const std::string &TT,
+                      const std::string &CPU, const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
new file mode 100644
index 000000000000..1231798297ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -0,0 +1,121 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetObjectFile.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+const MCExpr *X8664_MachoTargetObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
+
+  // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+  // is an indirect pc-relative reference.
+  if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+    const MCSymbol *Sym = Mang->getSymbol(GV);
+    const MCExpr *Res =
+      MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+    const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+    return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+  }
+
+  return TargetLoweringObjectFileMachO::
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *X8664_MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI) const {
+  return Mang->getSymbol(GV);
+}
+
+unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small || Model == CodeModel::Medium)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
+                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
+  if (CFI)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+
+  return DW_EH_PE_udata4;
+}
+
+unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
new file mode 100644
index 000000000000..e21b5bffd059
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -0,0 +1,60 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+  class X86TargetMachine;
+
+  /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+  /// x86-64.
+  class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+  public:
+    virtual const MCExpr *
+    getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                   MachineModuleInfo *MMI, unsigned Encoding,
+                                   MCStreamer &Streamer) const;
+
+    // getCFIPersonalitySymbol - The symbol that gets passed to
+    // .cfi_personality.
+    virtual MCSymbol *
+    getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                            MachineModuleInfo *MMI) const;
+  };
+
+  class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { }
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding(bool CFI) const;
+    virtual unsigned getTTypeEncoding() const;
+  };
+
+  class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { }
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding(bool CFI) const;
+    virtual unsigned getTTypeEncoding() const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..c3b3dc9e647d
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMXCoreDesc
+  XCoreMCTargetDesc.cpp
+  XCoreMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..de61543bfe9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
new file mode 100644
index 000000000000..42ab1b31d57a
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -0,0 +1,29 @@
+//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCAsmInfo.h"
+using namespace llvm;
+
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
+  SupportsDebugInformation = true;
+  Data16bitsDirective = "\t.short\t";
+  Data32bitsDirective = "\t.long\t";
+  Data64bitsDirective = 0;
+  ZeroDirective = "\t.space\t";
+  CommentString = "#";
+    
+  PrivateGlobalPrefix = ".L";
+  AscizDirective = ".asciiz";
+  WeakDefDirective = "\t.weak\t";
+  WeakRefDirective = "\t.weak\t";
+
+  // Debug
+  HasLEB128 = true;
+}
+
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
new file mode 100644
index 000000000000..840392263881
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class XCoreMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
new file mode 100644
index 000000000000..939d97c9d87c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCTargetDesc.h"
+#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "XCoreGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createXCoreMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitXCoreMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+}
+
+static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+                                          createXCoreMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeXCoreMCAsmInfo() {
+  RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
new file mode 100644
index 000000000000..3cfc3764a62c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCTARGETDESC_H
+#define XCOREMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheXCoreTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for XCore registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "XCoreGenRegisterInfo.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "XCoreGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
new file mode 100644
index 000000000000..7aa8965c4ac6
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheXCoreTarget;
+
+extern "C" void LLVMInitializeXCoreTargetInfo() { 
+  RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
new file mode 100644
index 000000000000..b8fb0cac319b
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -0,0 +1,31 @@
+//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// XCore back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_XCORE_H
+#define TARGET_XCORE_H
+
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class FunctionPass;
+  class TargetMachine;
+  class XCoreTargetMachine;
+  class formatted_raw_ostream;
+
+  FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCore.td b/contrib/llvm/lib/Target/XCore/XCore.td
new file mode 100644
index 000000000000..38401895e634
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCore.td
@@ -0,0 +1,46 @@
+//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Descriptions
+//===----------------------------------------------------------------------===//
+
+include "XCoreRegisterInfo.td"
+include "XCoreInstrInfo.td"
+include "XCoreCallingConv.td"
+
+def XCoreInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// XCore processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",      []>;
+def : Proc<"xs1b-generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def XCore : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = XCoreInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 000000000000..1a43714d63b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,280 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+  cl::Hidden,
+  cl::value_desc("number"),
+  cl::init(8));
+
+namespace {
+  class XCoreAsmPrinter : public AsmPrinter {
+    const XCoreSubtarget &Subtarget;
+  public:
+    explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+
+    virtual const char *getPassName() const {
+      return "XCore Assembly Printer";
+    }
+
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+                       const std::string &directive = ".jmptable");
+    void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
+      printInlineJT(MI, opNum, O, ".jmptable32");
+    }
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+
+    void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
+    virtual void EmitGlobalVariable(const GlobalVariable *GV);
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitFunctionEntryLabel();
+    void EmitInstruction(const MachineInstr *MI);
+    void EmitFunctionBodyEnd();
+  };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
+  assert(((GV->hasExternalLinkage() ||
+    GV->hasWeakLinkage()) ||
+    GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(
+    cast<PointerType>(GV->getType())->getElementType())) {
+    OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+    // FIXME: MCStreamerize.
+    OutStreamer.EmitRawText(StringRef(".globound"));
+    OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()));
+    OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements()));
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+      // TODO Use COMDAT groups for LinkOnceLinkage
+      OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
+                              ".globound");
+    }
+  }
+}
+
+void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (!GV->hasInitializer() ||
+      EmitSpecialLLVMGlobal(GV))
+    return;
+
+  const TargetData *TD = TM.getTargetData();
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
+
+  
+  MCSymbol *GVSym = Mang->getSymbol(GV);
+  const Constant *C = GV->getInitializer();
+  unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+  
+  // Mark the start of the global
+  OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," +
+                          GVSym->getName());
+
+  switch (GV->getLinkage()) {
+  case GlobalValue::AppendingLinkage:
+    report_fatal_error("AppendingLinkage is not supported by this target!");
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::ExternalLinkage:
+    emitArrayBound(GVSym, GV);
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+    // TODO Use COMDAT groups for LinkOnceLinkage
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+    // FALL THROUGH
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+    break;
+  case GlobalValue::DLLImportLinkage:
+    llvm_unreachable("DLLImport linkage is not supported by this target!");
+  case GlobalValue::DLLExportLinkage:
+    llvm_unreachable("DLLExport linkage is not supported by this target!");
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+
+  EmitAlignment(Align > 2 ? Align : 2, GV);
+  
+  unsigned Size = TD->getTypeAllocSize(C->getType());
+  if (GV->isThreadLocal()) {
+    Size *= MaxThreads;
+  }
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+    OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
+                            Twine(Size));
+  }
+  OutStreamer.EmitLabel(GVSym);
+  
+  EmitGlobalConstant(C);
+  if (GV->isThreadLocal()) {
+    for (unsigned i = 1; i < MaxThreads; ++i)
+      EmitGlobalConstant(C);
+  }
+  // The ABI requires that unsigned scalar types smaller than 32 bits
+  // are padded to 32 bits.
+  if (Size < 4)
+    OutStreamer.EmitZeros(4 - Size, 0);
+  
+  // Mark the end of the global
+  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+  // Emit function end directives
+  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) +
+                          ".function");
+}
+
+void XCoreAsmPrinter::EmitFunctionEntryLabel() {
+  // Mark the start of the function
+  OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) +
+                          ".function," + CurrentFnSym->getName());
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  printOperand(MI, opNum, O);
+  
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+  
+  O << "+";
+  printOperand(MI, opNum+1, O);
+}
+
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+              const std::string &directive) {
+  unsigned JTI = MI->getOperand(opNum).getIndex();
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  O << "\t" << directive << " ";
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    if (i > 0)
+      O << ",";
+    O << *MBB->getSymbol();
+  }
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_BlockAddress:
+    O << *GetBlockAddressSymbol(MO.getBlockAddress());
+    break;
+  default:
+    llvm_unreachable("not implemented");
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,const char *ExtraCode,
+                                      raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream O(Str);
+
+  // Check for mov mnemonic
+  if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
+    O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+      << getRegisterName(MI->getOperand(1).getReg());
+  else
+    printInstruction(MI, O);
+  OutStreamer.EmitRawText(O.str());
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreAsmPrinter() { 
+  RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
new file mode 100644
index 000000000000..b20d71f49cfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
@@ -0,0 +1,36 @@
+//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for XCore architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XCore Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_XCore : CallingConv<[
+  // i32 are returned in registers R0, R1, R2, R3
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// XCore Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_XCore : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in R11.
+  CCIfNest<CCAssignToReg<[R11]>>,
+
+  // The first 4 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+  // Integer values get stored in stack slots that are 4 bytes in
+  // size and 4-byte aligned.
+  CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
new file mode 100644
index 000000000000..057822074e54
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -0,0 +1,387 @@
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// helper functions. FIXME: Eliminate.
+static inline bool isImmUs(unsigned val) {
+  return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+  return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+  return val < (1 << 16);
+}
+
+static void loadFromStack(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator I,
+                          unsigned DstReg, int Offset, DebugLoc dl,
+                          const TargetInstrInfo &TII) {
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+  Offset/=4;
+  bool isU6 = isImmU6(Offset);
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("loadFromStack offset too big " + Twine(Offset));
+  int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+    .addImm(Offset);
+}
+
+
+static void storeToStack(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator I,
+                         unsigned SrcReg, int Offset, DebugLoc dl,
+                         const TargetInstrInfo &TII) {
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+  Offset/=4;
+  bool isU6 = isImmU6(Offset);
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("storeToStack offset too big " + Twine(Offset));
+  int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode))
+    .addReg(SrcReg)
+    .addImm(Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameLowering:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0),
+    STI(sti) {
+  // Do nothing
+}
+
+bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
+  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo *MMI = &MF.getMMI();
+  const XCoreRegisterInfo *RegInfo =
+    static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const XCoreInstrInfo &TII =
+    *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  bool FP = hasFP(MF);
+  bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest);
+
+  if (Nested) {
+    loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+  }
+
+  // Work out frame sizes.
+  int FrameSize = MFI->getStackSize();
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+  FrameSize/=4;
+
+  bool isU6 = isImmU6(FrameSize);
+
+  if (!isU6 && !isImmU16(FrameSize)) {
+    // FIXME could emit multiple instructions.
+    report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
+  }
+  bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+
+  // Do we need to allocate space on the stack?
+  if (FrameSize) {
+    bool saveLR = XFI->getUsesLR();
+    bool LRSavedOnEntry = false;
+    int Opcode;
+    if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+      Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+      MBB.addLiveIn(XCore::LR);
+      saveLR = false;
+      LRSavedOnEntry = true;
+    } else {
+      Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+    }
+    BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+    if (emitFrameMoves) {
+      std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+      // Show update of SP.
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+      if (LRSavedOnEntry) {
+        MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+        MachineLocation CSSrc(XCore::LR);
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+      }
+    }
+    if (saveLR) {
+      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+      storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII);
+      MBB.addLiveIn(XCore::LR);
+
+      if (emitFrameMoves) {
+        MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+        BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+        MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+        MachineLocation CSSrc(XCore::LR);
+        MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
+      }
+    }
+  }
+
+  if (FP) {
+    // Save R10 to the stack.
+    int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+    storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII);
+    // R10 is live-in. It is killed at the spill.
+    MBB.addLiveIn(XCore::R10);
+    if (emitFrameMoves) {
+      MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+      MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+      MachineLocation CSSrc(XCore::R10);
+      MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
+    }
+    // Set the FP from the SP.
+    unsigned FramePtr = XCore::R10;
+    BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+      .addImm(0);
+    if (emitFrameMoves) {
+      // Show FP is now valid.
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+      MachineLocation SPDst(FramePtr);
+      MachineLocation SPSrc(MachineLocation::VirtualFP);
+      MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+    }
+  }
+
+  if (emitFrameMoves) {
+    // Frame moves for callee saved.
+    std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+    std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+        XFI->getSpillLabels();
+    for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+      MCSymbol *SpillLabel = SpillLabels[I].first;
+      CalleeSavedInfo &CSI = SpillLabels[I].second;
+      int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+      unsigned Reg = CSI.getReg();
+      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+      MachineLocation CSSrc(Reg);
+      Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+    }
+  }
+}
+
+void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const XCoreInstrInfo &TII =
+    *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  bool FP = hasFP(MF);
+  if (FP) {
+    // Restore the stack pointer.
+    unsigned FramePtr = XCore::R10;
+    BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+      .addReg(FramePtr);
+  }
+
+  // Work out frame sizes.
+  int FrameSize = MFI->getStackSize();
+
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+  FrameSize/=4;
+
+  bool isU6 = isImmU6(FrameSize);
+
+  if (!isU6 && !isImmU16(FrameSize)) {
+    // FIXME could emit multiple instructions.
+    report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
+  }
+
+  if (FrameSize) {
+    XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+    if (FP) {
+      // Restore R10
+      int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+      FPSpillOffset += FrameSize*4;
+      loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII);
+    }
+    bool restoreLR = XFI->getUsesLR();
+    if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+      LRSpillOffset += FrameSize*4;
+      loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII);
+      restoreLR = false;
+    }
+    if (restoreLR) {
+      // Fold prologue into return instruction
+      assert(MBBI->getOpcode() == XCore::RETSP_u6
+        || MBBI->getOpcode() == XCore::RETSP_lu6);
+      int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+      BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+      MBB.erase(MBBI);
+    } else {
+      int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+      BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+    }
+  }
+}
+
+void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                        const {
+  // Initial state of the frame pointer is SP.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(XCore::SP, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return true;
+
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+  bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+                                                    it != CSI.end(); ++it) {
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(it->getReg());
+
+    unsigned Reg = it->getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, true,
+                            it->getFrameIdx(), RC, TRI);
+    if (emitFrameMoves) {
+      MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
+      BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
+      XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
+    }
+  }
+  return true;
+}
+
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                 MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                            const TargetRegisterInfo *TRI) const{
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  bool AtStart = MI == MBB.begin();
+  MachineBasicBlock::iterator BeforeI = MI;
+  if (!AtStart)
+    --BeforeI;
+  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+                                                    it != CSI.end(); ++it) {
+    unsigned Reg = it->getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(),
+                             RC, TRI);
+    assert(MI != MBB.begin() &&
+           "loadRegFromStackSlot didn't insert any code!");
+    // Insert in reverse order.  loadRegFromStackSlot can insert multiple
+    // instructions.
+    if (AtStart)
+      MI = MBB.begin();
+    else {
+      MI = BeforeI;
+      ++MI;
+    }
+  }
+  return true;
+}
+
+void
+XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+  const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  if (LRUsed) {
+    MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+    bool isVarArg = MF.getFunction()->isVarArg();
+    int FrameIdx;
+    if (! isVarArg) {
+      // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
+    } else {
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+                                        false);
+    }
+    XFI->setUsesLR(FrameIdx);
+    XFI->setLRSpillSlot(FrameIdx);
+  }
+  if (RegInfo->requiresRegisterScavenging(MF)) {
+    // Reserve a slot close to SP or frame pointer.
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+  if (hasFP(MF)) {
+    // A callee save register is used to hold the FP.
+    // This needs saving / restoring in the epilogue / prologue.
+    XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+                                               RC->getAlignment(),
+                                               false));
+  }
+}
+
+void XCoreFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
new file mode 100644
index 000000000000..7da19f0deb1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -0,0 +1,59 @@
+//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class XCoreSubtarget;
+
+  class XCoreFrameLowering: public TargetFrameLowering {
+    const XCoreSubtarget &STI;
+  public:
+    XCoreFrameLowering(const XCoreSubtarget &STI);
+
+    /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+    /// the function.
+    void emitPrologue(MachineFunction &MF) const;
+    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+    bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+    bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI,
+                                     const std::vector<CalleeSavedInfo> &CSI,
+                                     const TargetRegisterInfo *TRI) const;
+
+    bool hasFP(const MachineFunction &MF) const;
+
+    void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                              RegScavenger *RS = NULL) const;
+
+    void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+    //! Stack slot size (4 bytes)
+    static int stackSlotSize() {
+      return 4;
+    }
+  };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
new file mode 100644
index 000000000000..a8dd8479d45f
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -0,0 +1,295 @@
+//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+  class XCoreDAGToDAGISel : public SelectionDAGISel {
+    const XCoreTargetLowering &Lowering;
+    const XCoreSubtarget &Subtarget;
+
+  public:
+    XCoreDAGToDAGISel(XCoreTargetMachine &TM)
+      : SelectionDAGISel(TM),
+        Lowering(*TM.getTargetLowering()), 
+        Subtarget(*TM.getSubtargetImpl()) { }
+
+    SDNode *Select(SDNode *N);
+    SDNode *SelectBRIND(SDNode *N);
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    inline bool immMskBitp(SDNode *inN) const {
+      ConstantSDNode *N = cast<ConstantSDNode>(inN);
+      uint32_t value = (uint32_t)N->getZExtValue();
+      if (!isMask_32(value)) {
+        return false;
+      }
+      int msksize = 32 - CountLeadingZeros_32(value);
+      return (msksize >= 1 && msksize <= 8) ||
+              msksize == 16 || msksize == 24 || msksize == 32;
+    }
+
+    // Complex Pattern Selectors.
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    
+    virtual const char *getPassName() const {
+      return "XCore DAG->DAG Pattern Instruction Selection";
+    } 
+    
+    // Include the pieces autogenerated from the target description.
+  #include "XCoreGenDAGISel.inc"
+  };
+}  // end anonymous namespace
+
+/// createXCoreISelDag - This pass converts a legalized DAG into a 
+/// XCore-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
+  return new XCoreDAGToDAGISel(TM);
+}
+
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  FrameIndexSDNode *FIN = 0;
+  if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+      // Constant positive word offset from frame index
+      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
+      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && (CN->getSExtValue() % 4 == 0)) {
+      // Constant word offset from a object in the data region
+      Base = Addr.getOperand(0).getOperand(0);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
+      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && (CN->getSExtValue() % 4 == 0)) {
+      // Constant word offset from a object in the data region
+      Base = Addr.getOperand(0).getOperand(0);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+    if (immMskBitp(N)) {
+      // Transformation function: get the size of a mask
+      // Look for the first non-zero bit
+      SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
+      return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+                                    MVT::i32, MskSize);
+    }
+    else if (!isUInt<16>(Val)) {
+      SDValue CPIdx =
+        CurDAG->getTargetConstantPool(ConstantInt::get(
+                              Type::getInt32Ty(*CurDAG->getContext()), Val),
+                                      TLI.getPointerTy());
+      return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, 
+                                    MVT::Other, CPIdx, 
+                                    CurDAG->getEntryNode());
+    }
+    break;
+  }
+  case XCoreISD::LADD: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        N->getOperand(2) };
+    return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+                                  Ops, 3);
+  }
+  case XCoreISD::LSUB: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        N->getOperand(2) };
+    return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+                                  Ops, 3);
+  }
+  case XCoreISD::MACCU: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                      N->getOperand(2), N->getOperand(3) };
+    return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
+                                  Ops, 4);
+  }
+  case XCoreISD::MACCS: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                      N->getOperand(2), N->getOperand(3) };
+    return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
+                                  Ops, 4);
+  }
+  case XCoreISD::LMUL: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                      N->getOperand(2), N->getOperand(3) };
+    return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
+                                  Ops, 4);
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    case Intrinsic::xcore_crc8:
+      SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) };
+      return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
+                                    Ops, 3);
+    }
+    break;
+  }
+  case ISD::BRIND:
+    if (SDNode *ResNode = SelectBRIND(N))
+      return ResNode;
+    break;
+  // Other cases are autogenerated.
+  }
+  return SelectCode(N);
+}
+
+/// Given a chain return a new chain where any appearance of Old is replaced
+/// by New. There must be at most one instruction between Old and Chain and
+/// this instruction must be a TokenFactor. Returns an empty SDValue if 
+/// these conditions don't hold.
+static SDValue
+replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New)
+{
+  if (Chain == Old)
+    return New;
+  if (Chain->getOpcode() != ISD::TokenFactor)
+    return SDValue();
+  SmallVector<SDValue, 8> Ops;
+  bool found = false;
+  for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) {
+    if (Chain->getOperand(i) == Old) {
+      Ops.push_back(New);
+      found = true;
+    } else {
+      Ops.push_back(Chain->getOperand(i));
+    }
+  }
+  if (!found)
+    return SDValue();
+  return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other,
+                         &Ops[0], Ops.size());
+}
+
+SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  // (brind (int_xcore_checkevent (addr)))
+  SDValue Chain = N->getOperand(0);
+  SDValue Addr = N->getOperand(1);
+  if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return 0;
+  unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
+  if (IntNo != Intrinsic::xcore_checkevent)
+    return 0;
+  SDValue nextAddr = Addr->getOperand(2);
+  SDValue CheckEventChainOut(Addr.getNode(), 1);
+  if (!CheckEventChainOut.use_empty()) {
+    // If the chain out of the checkevent intrinsic is an operand of the
+    // indirect branch or used in a TokenFactor which is the operand of the
+    // indirect branch then build a new chain which uses the chain coming into
+    // the checkevent intrinsic instead.
+    SDValue CheckEventChainIn = Addr->getOperand(0);
+    SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut,
+                                      CheckEventChainIn);
+    if (!NewChain.getNode())
+      return 0;
+    Chain = NewChain;
+  }
+  // Enable events on the thread using setsr 1 and then disable them immediately
+  // after with clrsr 1. If any resources owned by the thread are ready an event
+  // will be taken. If no resource is ready we branch to the address which was
+  // the operand to the checkevent intrinsic.
+  SDValue constOne = getI32Imm(1);
+  SDValue Glue =
+    SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue,
+                                   constOne, Chain), 0);
+  Glue =
+    SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue,
+                                   constOne, Glue), 0);
+  if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper &&
+      nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) {
+    return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other,
+                                nextAddr->getOperand(0), Glue);
+  }
+  return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
new file mode 100644
index 000000000000..6d040e052659
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -0,0 +1,1608 @@
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation   ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-lower"
+
+#include "XCoreISelLowering.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "XCoreTargetObjectFile.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+const char *XCoreTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+  switch (Opcode)
+  {
+    case XCoreISD::BL                : return "XCoreISD::BL";
+    case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
+    case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
+    case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
+    case XCoreISD::STWSP             : return "XCoreISD::STWSP";
+    case XCoreISD::RETSP             : return "XCoreISD::RETSP";
+    case XCoreISD::LADD              : return "XCoreISD::LADD";
+    case XCoreISD::LSUB              : return "XCoreISD::LSUB";
+    case XCoreISD::LMUL              : return "XCoreISD::LMUL";
+    case XCoreISD::MACCU             : return "XCoreISD::MACCU";
+    case XCoreISD::MACCS             : return "XCoreISD::MACCS";
+    case XCoreISD::BR_JT             : return "XCoreISD::BR_JT";
+    case XCoreISD::BR_JT32           : return "XCoreISD::BR_JT32";
+    default                          : return NULL;
+  }
+}
+
+XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
+  : TargetLowering(XTM, new XCoreTargetObjectFile()),
+    TM(XTM),
+    Subtarget(*XTM.getSubtargetImpl()) {
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Division is expensive
+  setIntDivIsCheap(false);
+
+  setStackPointerRegisterToSaveRestore(XCore::SP);
+
+  setSchedulingPreference(Sched::RegPressure);
+
+  // Use i32 for setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // XCore does not have the NodeTypes below.
+  setOperationAction(ISD::BR_CC,     MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i32,   Custom);
+  setOperationAction(ISD::ADDC, MVT::i32, Expand);
+  setOperationAction(ISD::ADDE, MVT::i32, Expand);
+  setOperationAction(ISD::SUBC, MVT::i32, Expand);
+  setOperationAction(ISD::SUBE, MVT::i32, Expand);
+
+  // Stop the combiner recombining select and set_cc
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  // 64bit
+  setOperationAction(ISD::ADD, MVT::i64, Custom);
+  setOperationAction(ISD::SUB, MVT::i64, Custom);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+  setOperationAction(ISD::MULHS, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU, MVT::i32, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+  // Bit Manipulation
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  setOperationAction(ISD::ROTL , MVT::i32, Expand);
+  setOperationAction(ISD::ROTR , MVT::i32, Expand);
+
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // Jump tables.
+  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
+  setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
+
+  // Thread Local Storage
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+  // Conversion of i64 -> double produces constantpool nodes
+  setOperationAction(ISD::ConstantPool, MVT::i32,   Custom);
+
+  // Loads
+  setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+  // Custom expand misaligned loads / stores.
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+  // Varargs
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+  // Dynamic stack
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+  // TRAMPOLINE is custom lowered.
+  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+  maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
+  maxStoresPerMemmove = maxStoresPerMemmoveOptSize
+    = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::ADD);
+
+  setMinFunctionAlignment(1);
+}
+
+SDValue XCoreTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode())
+  {
+  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
+  case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
+  case ISD::BR_JT:            return LowerBR_JT(Op, DAG);
+  case ISD::LOAD:             return LowerLOAD(Op, DAG);
+  case ISD::STORE:            return LowerSTORE(Op, DAG);
+  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+  case ISD::VAARG:            return LowerVAARG(Op, DAG);
+  case ISD::VASTART:          return LowerVASTART(Op, DAG);
+  case ISD::SMUL_LOHI:        return LowerSMUL_LOHI(Op, DAG);
+  case ISD::UMUL_LOHI:        return LowerUMUL_LOHI(Op, DAG);
+  // FIXME: Remove these when LegalizeDAGTypes lands.
+  case ISD::ADD:
+  case ISD::SUB:              return ExpandADDSUB(Op.getNode(), DAG);
+  case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
+  case ISD::TRAMPOLINE:       return LowerTRAMPOLINE(Op, DAG);
+  default:
+    llvm_unreachable("unimplemented operand");
+    return SDValue();
+  }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
+                                             SmallVectorImpl<SDValue>&Results,
+                                             SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Don't know how to custom expand this!");
+    return;
+  case ISD::ADD:
+  case ISD::SUB:
+    Results.push_back(ExpandADDSUB(N, DAG));
+    return;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
+                             Op.getOperand(3), Op.getOperand(4));
+  return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
+                     Op.getOperand(1));
+}
+
+SDValue XCoreTargetLowering::
+getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                        SelectionDAG &DAG) const
+{
+  // FIXME there is no actual debug info here
+  DebugLoc dl = GA.getDebugLoc();
+  if (isa<Function>(GV)) {
+    return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+  }
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee to determine constness
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+  }
+  bool isConst = GVar && GVar->isConstant();
+  if (isConst) {
+    return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+  }
+  return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
+  return getGlobalAddressWrapper(GA, GV, DAG);
+}
+
+static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+                     DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
+}
+
+static inline bool isZeroLengthArray(const Type *Ty) {
+  const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+  return AT && (AT->getNumElements() == 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  // FIXME there isn't really debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  // transform to label + getid() * size
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee to determine size
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+  }
+  if (! GVar) {
+    llvm_unreachable("Thread local object not a GlobalVariable?");
+    return SDValue();
+  }
+  const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+  if (!Ty->isSized() || isZeroLengthArray(Ty)) {
+#ifndef NDEBUG
+    errs() << "Size of thread local object " << GVar->getName()
+           << " is unknown\n";
+#endif
+    llvm_unreachable(0);
+  }
+  SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(Ty);
+  SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
+                       DAG.getConstant(Size, MVT::i32));
+  return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
+}
+
+SDValue XCoreTargetLowering::
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+  return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+}
+
+SDValue XCoreTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  // FIXME there isn't really debug info here
+  DebugLoc dl = CP->getDebugLoc();
+  EVT PtrVT = Op.getValueType();
+  SDValue Res;
+  if (CP->isMachineConstantPoolEntry()) {
+    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                    CP->getAlignment());
+  } else {
+    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                    CP->getAlignment());
+  }
+  return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+}
+
+unsigned XCoreTargetLowering::getJumpTableEncoding() const {
+  return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue XCoreTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  unsigned JTI = JT->getIndex();
+  MachineFunction &MF = DAG.getMachineFunction();
+  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+  unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+  if (NumEntries <= 32) {
+    return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+  }
+  assert((NumEntries >> 31) == 0);
+  SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+                                    DAG.getConstant(1, MVT::i32));
+  return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+                     ScaledIndex);
+}
+
+static bool
+IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
+                                    int64_t &Offset)
+{
+  if (Addr.getOpcode() != ISD::ADD) {
+    return false;
+  }
+  ConstantSDNode *CN = 0;
+  if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    return false;
+  }
+  int64_t off = CN->getSExtValue();
+  const SDValue &Base = Addr.getOperand(0);
+  const SDValue *Root = &Base;
+  if (Base.getOpcode() == ISD::ADD &&
+      Base.getOperand(1).getOpcode() == ISD::SHL) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
+                                                      .getOperand(1));
+    if (CN && (CN->getSExtValue() >= 2)) {
+      Root = &Base.getOperand(0);
+    }
+  }
+  if (isa<FrameIndexSDNode>(*Root)) {
+    // All frame indicies are word aligned
+    AlignedBase = Base;
+    Offset = off;
+    return true;
+  }
+  if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
+      Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
+    // All dp / cp relative addresses are word aligned
+    AlignedBase = Base;
+    Offset = off;
+    return true;
+  }
+  return false;
+}
+
+SDValue XCoreTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+         "Unexpected extension type");
+  assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
+  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
+    return SDValue();
+
+  unsigned ABIAlignment = getTargetData()->
+    getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  // Leave aligned load alone.
+  if (LD->getAlignment() >= ABIAlignment)
+    return SDValue();
+
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  DebugLoc DL = Op.getDebugLoc();
+
+  SDValue Base;
+  int64_t Offset;
+  if (!LD->isVolatile() &&
+      IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
+    if (Offset % 4 == 0) {
+      // We've managed to infer better alignment information than the load
+      // already has. Use an aligned load.
+      //
+      return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+                         MachinePointerInfo(),
+                         false, false, 0);
+    }
+    // Lower to
+    // ldw low, base[offset >> 2]
+    // ldw high, base[(offset >> 2) + 1]
+    // shr low_shifted, low, (offset & 0x3) * 8
+    // shl high_shifted, high, 32 - (offset & 0x3) * 8
+    // or result, low_shifted, high_shifted
+    SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
+    SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
+    SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
+    SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
+
+    SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
+
+    SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+                              LowAddr, MachinePointerInfo(), false, false, 0);
+    SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+                               HighAddr, MachinePointerInfo(), false, false, 0);
+    SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+                             High.getValue(1));
+    SDValue Ops[] = { Result, Chain };
+    return DAG.getMergeValues(Ops, 2, DL);
+  }
+
+  if (LD->getAlignment() == 2) {
+    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
+                                 BasePtr, LD->getPointerInfo(), MVT::i16,
+                                 LD->isVolatile(), LD->isNonTemporal(), 2);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+                                   DAG.getConstant(2, MVT::i32));
+    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+                                  HighAddr,
+                                  LD->getPointerInfo().getWithOffset(2),
+                                  MVT::i16, LD->isVolatile(),
+                                  LD->isNonTemporal(), 2);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
+                                      DAG.getConstant(16, MVT::i32));
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+                             High.getValue(1));
+    SDValue Ops[] = { Result, Chain };
+    return DAG.getMergeValues(Ops, 2, DL);
+  }
+
+  // Lower to a call to __misaligned_load(BasePtr).
+  const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = BasePtr;
+  Args.push_back(Entry);
+
+  std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(Chain, IntPtrTy, false, false,
+                    false, false, 0, CallingConv::C, false,
+                    /*isReturnValueUsed=*/true,
+                    DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
+                    Args, DAG, DL);
+
+  SDValue Ops[] =
+    { CallResult.first, CallResult.second };
+
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue XCoreTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const
+{
+  StoreSDNode *ST = cast<StoreSDNode>(Op);
+  assert(!ST->isTruncatingStore() && "Unexpected store type");
+  assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
+  if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+    return SDValue();
+  }
+  unsigned ABIAlignment = getTargetData()->
+    getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  // Leave aligned store alone.
+  if (ST->getAlignment() >= ABIAlignment) {
+    return SDValue();
+  }
+  SDValue Chain = ST->getChain();
+  SDValue BasePtr = ST->getBasePtr();
+  SDValue Value = ST->getValue();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (ST->getAlignment() == 2) {
+    SDValue Low = Value;
+    SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
+                                      DAG.getConstant(16, MVT::i32));
+    SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
+                                         ST->getPointerInfo(), MVT::i16,
+                                         ST->isVolatile(), ST->isNonTemporal(),
+                                         2);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+                                   DAG.getConstant(2, MVT::i32));
+    SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
+                                          ST->getPointerInfo().getWithOffset(2),
+                                          MVT::i16, ST->isVolatile(),
+                                          ST->isNonTemporal(), 2);
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
+  }
+
+  // Lower to a call to __misaligned_store(BasePtr, Value).
+  const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = BasePtr;
+  Args.push_back(Entry);
+
+  Entry.Node = Value;
+  Args.push_back(Entry);
+
+  std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
+                    false, false, 0, CallingConv::C, false,
+                    /*isReturnValueUsed=*/true,
+                    DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
+                    Args, DAG, dl);
+
+  return CallResult.second;
+}
+
+SDValue XCoreTargetLowering::
+LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+  assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI &&
+         "Unexpected operand to lower!");
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+                           DAG.getVTList(MVT::i32, MVT::i32), Zero, Zero,
+                           LHS, RHS);
+  SDValue Lo(Hi.getNode(), 1);
+  SDValue Ops[] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue XCoreTargetLowering::
+LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+  assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI &&
+         "Unexpected operand to lower!");
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+                           DAG.getVTList(MVT::i32, MVT::i32), LHS, RHS,
+                           Zero, Zero);
+  SDValue Lo(Hi.getNode(), 1);
+  SDValue Ops[] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// isADDADDMUL - Return whether Op is in a form that is equivalent to
+/// add(add(mul(x,y),a),b). If requireIntermediatesHaveOneUse is true then
+/// each intermediate result in the calculation must also have a single use.
+/// If the Op is in the correct form the constituent parts are written to Mul0,
+/// Mul1, Addend0 and Addend1.
+static bool
+isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0,
+            SDValue &Addend1, bool requireIntermediatesHaveOneUse)
+{
+  if (Op.getOpcode() != ISD::ADD)
+    return false;
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue AddOp;
+  SDValue OtherOp;
+  if (N0.getOpcode() == ISD::ADD) {
+    AddOp = N0;
+    OtherOp = N1;
+  } else if (N1.getOpcode() == ISD::ADD) {
+    AddOp = N1;
+    OtherOp = N0;
+  } else {
+    return false;
+  }
+  if (requireIntermediatesHaveOneUse && !AddOp.hasOneUse())
+    return false;
+  if (OtherOp.getOpcode() == ISD::MUL) {
+    // add(add(a,b),mul(x,y))
+    if (requireIntermediatesHaveOneUse && !OtherOp.hasOneUse())
+      return false;
+    Mul0 = OtherOp.getOperand(0);
+    Mul1 = OtherOp.getOperand(1);
+    Addend0 = AddOp.getOperand(0);
+    Addend1 = AddOp.getOperand(1);
+    return true;
+  }
+  if (AddOp.getOperand(0).getOpcode() == ISD::MUL) {
+    // add(add(mul(x,y),a),b)
+    if (requireIntermediatesHaveOneUse && !AddOp.getOperand(0).hasOneUse())
+      return false;
+    Mul0 = AddOp.getOperand(0).getOperand(0);
+    Mul1 = AddOp.getOperand(0).getOperand(1);
+    Addend0 = AddOp.getOperand(1);
+    Addend1 = OtherOp;
+    return true;
+  }
+  if (AddOp.getOperand(1).getOpcode() == ISD::MUL) {
+    // add(add(a,mul(x,y)),b)
+    if (requireIntermediatesHaveOneUse && !AddOp.getOperand(1).hasOneUse())
+      return false;
+    Mul0 = AddOp.getOperand(1).getOperand(0);
+    Mul1 = AddOp.getOperand(1).getOperand(1);
+    Addend0 = AddOp.getOperand(0);
+    Addend1 = OtherOp;
+    return true;
+  }
+  return false;
+}
+
+SDValue XCoreTargetLowering::
+TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
+{
+  SDValue Mul;
+  SDValue Other;
+  if (N->getOperand(0).getOpcode() == ISD::MUL) {
+    Mul = N->getOperand(0);
+    Other = N->getOperand(1);
+  } else if (N->getOperand(1).getOpcode() == ISD::MUL) {
+    Mul = N->getOperand(1);
+    Other = N->getOperand(0);
+  } else {
+    return SDValue();
+  }
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LL, RL, AddendL, AddendH;
+  LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(0),  DAG.getConstant(0, MVT::i32));
+  RL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(1),  DAG.getConstant(0, MVT::i32));
+  AddendL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                        Other,  DAG.getConstant(0, MVT::i32));
+  AddendH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                        Other,  DAG.getConstant(1, MVT::i32));
+  APInt HighMask = APInt::getHighBitsSet(64, 32);
+  unsigned LHSSB = DAG.ComputeNumSignBits(Mul.getOperand(0));
+  unsigned RHSSB = DAG.ComputeNumSignBits(Mul.getOperand(1));
+  if (DAG.MaskedValueIsZero(Mul.getOperand(0), HighMask) &&
+      DAG.MaskedValueIsZero(Mul.getOperand(1), HighMask)) {
+    // The inputs are both zero-extended.
+    SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+                             DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+                             AddendL, LL, RL);
+    SDValue Lo(Hi.getNode(), 1);
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+  }
+  if (LHSSB > 32 && RHSSB > 32) {
+    // The inputs are both sign-extended.
+    SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+                             DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+                             AddendL, LL, RL);
+    SDValue Lo(Hi.getNode(), 1);
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+  }
+  SDValue LH, RH;
+  LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(0),  DAG.getConstant(1, MVT::i32));
+  RH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(1),  DAG.getConstant(1, MVT::i32));
+  SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+                           DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+                           AddendL, LL, RL);
+  SDValue Lo(Hi.getNode(), 1);
+  RH = DAG.getNode(ISD::MUL, dl, MVT::i32, LL, RH);
+  LH = DAG.getNode(ISD::MUL, dl, MVT::i32, LH, RL);
+  Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, RH);
+  Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, LH);
+  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
+{
+  assert(N->getValueType(0) == MVT::i64 &&
+         (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+        "Unknown operand to lower!");
+
+  if (N->getOpcode() == ISD::ADD) {
+    SDValue Result = TryExpandADDWithMul(N, DAG);
+    if (Result.getNode() != 0)
+      return Result;
+  }
+
+  DebugLoc dl = N->getDebugLoc();
+
+  // Extract components
+  SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                            N->getOperand(0),  DAG.getConstant(0, MVT::i32));
+  SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                            N->getOperand(0),  DAG.getConstant(1, MVT::i32));
+  SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             N->getOperand(1), DAG.getConstant(0, MVT::i32));
+  SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             N->getOperand(1), DAG.getConstant(1, MVT::i32));
+
+  // Expand
+  unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
+                                                   XCoreISD::LSUB;
+  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                                  LHSL, RHSL, Zero);
+  SDValue Lo(Carry.getNode(), 1);
+
+  SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                                  LHSH, RHSH, Carry);
+  SDValue Hi(Ignored.getNode(), 1);
+  // Merge the pieces
+  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+LowerVAARG(SDValue Op, SelectionDAG &DAG) const
+{
+  llvm_unreachable("unimplemented");
+  // FIX Arguments passed by reference need a extra dereference.
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+  const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  EVT VT = Node->getValueType(0);
+  SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
+                               Node->getOperand(1), MachinePointerInfo(V),
+                               false, false, 0);
+  // Increment the pointer, VAList, to the next vararg
+  SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+                     DAG.getConstant(VT.getSizeInBits(),
+                                     getPointerTy()));
+  // Store the incremented VAList to the legalized pointer
+  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+                      MachinePointerInfo(V), false, false, 0);
+  // Load the actual argument out of the pointer VAList
+  return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerVASTART(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc dl = Op.getDebugLoc();
+  // vastart stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument
+  MachineFunction &MF = DAG.getMachineFunction();
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
+  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+                      MachinePointerInfo(), false, false, 0);
+}
+
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // Depths > 0 not supported yet!
+  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+    return SDValue();
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                            RegInfo->getFrameRegister(MF), MVT::i32);
+}
+
+SDValue XCoreTargetLowering::
+LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+
+  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+  // .align 4
+  // LDAPF_u10 r11, nest
+  // LDW_2rus r11, r11[0]
+  // STWSP_ru6 r11, sp[0]
+  // LDAPF_u10 r11, fptr
+  // LDW_2rus r11, r11[0]
+  // BAU_1r r11
+  // nest:
+  // .word nest
+  // fptr:
+  // .word fptr
+  SDValue OutChains[5];
+
+  SDValue Addr = Trmp;
+
+  DebugLoc dl = Op.getDebugLoc();
+  OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr), false, false,
+                              0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(4, MVT::i32));
+  OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr, 4), false,
+                              false, 0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(8, MVT::i32));
+  OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr, 8), false,
+                              false, 0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(12, MVT::i32));
+  OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
+                              MachinePointerInfo(TrmpAddr, 12), false, false,
+                              0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(16, MVT::i32));
+  OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
+                              MachinePointerInfo(TrmpAddr, 16), false, false,
+                              0);
+
+  SDValue Ops[] =
+    { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "XCoreGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore call implementation
+SDValue
+XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool &isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const {
+  // XCore target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // For now, only CallingConv::C implemented
+  switch (CallConv)
+  {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+    case CallingConv::Fast:
+    case CallingConv::C:
+      return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                            Outs, OutVals, Ins, dl, DAG, InVals);
+  }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isTailCall, sret.
+SDValue
+XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  // The ABI dictates there should be one stack slot available to the callee
+  // on function entry (for saving lr).
+  CCInfo.AllocateStack(4, 4);
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+                                 getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      int Offset = VA.getLocMemOffset();
+
+      MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+                                        Chain, Arg,
+                                        DAG.getConstant(Offset/4, MVT::i32)));
+    }
+  }
+
+  // Transform all store nodes into one single node because
+  // all store nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emitted instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+  // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                                 RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore formal arguments implementation
+SDValue
+XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv,
+                                          bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                          DebugLoc dl,
+                                          SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
+  switch (CallConv)
+  {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+    case CallingConv::C:
+    case CallingConv::Fast:
+      return LowerCCCArguments(Chain, CallConv, isVarArg,
+                               Ins, dl, DAG, InVals);
+  }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: sret
+SDValue
+XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
+                                       CallingConv::ID CallConv,
+                                       bool isVarArg,
+                                       const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                       DebugLoc dl,
+                                       SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
+
+  unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
+
+  unsigned LRSaveSize = StackSlotSize;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+    CCValAssign &VA = ArgLocs[i];
+
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      EVT RegVT = VA.getLocVT();
+      switch (RegVT.getSimpleVT().SimpleTy) {
+      default:
+        {
+#ifndef NDEBUG
+          errs() << "LowerFormalArguments Unhandled argument type: "
+                 << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+          llvm_unreachable(0);
+        }
+      case MVT::i32:
+        unsigned VReg = RegInfo.createVirtualRegister(
+                          XCore::GRRegsRegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      }
+    } else {
+      // sanity check
+      assert(VA.isMemLoc());
+      // Load the argument to a virtual register
+      unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+      if (ObjSize > StackSlotSize) {
+        errs() << "LowerFormalArguments Unhandled argument type: "
+               << EVT(VA.getLocVT()).getEVTString()
+               << "\n";
+      }
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(ObjSize,
+                                      LRSaveSize + VA.getLocMemOffset(),
+                                      true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      //from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0));
+    }
+  }
+
+  if (isVarArg) {
+    /* Argument registers */
+    static const unsigned ArgRegs[] = {
+      XCore::R0, XCore::R1, XCore::R2, XCore::R3
+    };
+    XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+    unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
+                                                     array_lengthof(ArgRegs));
+    if (FirstVAReg < array_lengthof(ArgRegs)) {
+      SmallVector<SDValue, 4> MemOps;
+      int offset = 0;
+      // Save remaining registers, storing higher register numbers at a higher
+      // address
+      for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
+        // Create a stack slot
+        int FI = MFI->CreateFixedObject(4, offset, true);
+        if (i == FirstVAReg) {
+          XFI->setVarArgsFrameIndex(FI);
+        }
+        offset -= StackSlotSize;
+        SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+        // Move argument from phys reg -> virt reg
+        unsigned VReg = RegInfo.createVirtualRegister(
+                          XCore::GRRegsRegisterClass);
+        RegInfo.addLiveIn(ArgRegs[i], VReg);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+        // Move argument from virt reg -> stack
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                     MachinePointerInfo(), false, false, 0);
+        MemOps.push_back(Store);
+      }
+      if (!MemOps.empty())
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
+    } else {
+      // This will point to the next argument passed via stack.
+      XFI->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+                               true));
+    }
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool XCoreTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+	       bool isVarArg,
+               const SmallVectorImpl<ISD::OutputArg> &Outs,
+               LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC_XCore);
+}
+
+SDValue
+XCoreTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // Return on XCore is always a "retsp 0"
+  if (Flag.getNode())
+    return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+                       Chain, DAG.getConstant(0, MVT::i32), Flag);
+  else // Return Void
+    return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+                       Chain, DAG.getConstant(0, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) const {
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  assert((MI->getOpcode() == XCore::SELECT_CC) &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+    .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = sinkMBB;
+  BuildMI(*BB, BB->begin(), dl,
+          TII.get(XCore::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default: break;
+  case XCoreISD::LADD: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    EVT VT = N0.getValueType();
+
+    // canonicalize constant to RHS
+    if (N0C && !N1C)
+      return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N1, N0, N2);
+
+    // fold (ladd 0, 0, x) -> 0, x & 1
+    if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+      SDValue Carry = DAG.getConstant(0, VT);
+      SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
+                                   DAG.getConstant(1, VT));
+      SDValue Ops [] = { Carry, Result };
+      return DAG.getMergeValues(Ops, 2, dl);
+    }
+
+    // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
+    // low bit set
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+      APInt KnownZero, KnownOne;
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - 1);
+      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+      if (KnownZero == Mask) {
+        SDValue Carry = DAG.getConstant(0, VT);
+        SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
+        SDValue Ops [] = { Carry, Result };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+    }
+  }
+  break;
+  case XCoreISD::LSUB: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    EVT VT = N0.getValueType();
+
+    // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
+    if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+      APInt KnownZero, KnownOne;
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - 1);
+      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+      if (KnownZero == Mask) {
+        SDValue Borrow = N2;
+        SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
+                                     DAG.getConstant(0, VT), N2);
+        SDValue Ops [] = { Borrow, Result };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+    }
+
+    // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
+    // low bit set
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+      APInt KnownZero, KnownOne;
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - 1);
+      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+      if (KnownZero == Mask) {
+        SDValue Borrow = DAG.getConstant(0, VT);
+        SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
+        SDValue Ops [] = { Borrow, Result };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+    }
+  }
+  break;
+  case XCoreISD::LMUL: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    EVT VT = N0.getValueType();
+    // Canonicalize multiplicative constant to RHS. If both multiplicative
+    // operands are constant canonicalize smallest to RHS.
+    if ((N0C && !N1C) ||
+        (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
+      return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3);
+
+    // lmul(x, 0, a, b)
+    if (N1C && N1C->isNullValue()) {
+      // If the high result is unused fold to add(a, b)
+      if (N->hasNUsesOfValue(0, 0)) {
+        SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
+        SDValue Ops [] = { Lo, Lo };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+      // Otherwise fold to ladd(a, b, 0)
+      return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+    }
+  }
+  break;
+  case ISD::ADD: {
+    // Fold 32 bit expressions such as add(add(mul(x,y),a),b) ->
+    // lmul(x, y, a, b). The high result of lmul will be ignored.
+    // This is only profitable if the intermediate results are unused
+    // elsewhere.
+    SDValue Mul0, Mul1, Addend0, Addend1;
+    if (N->getValueType(0) == MVT::i32 &&
+        isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
+      SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
+                                    DAG.getVTList(MVT::i32, MVT::i32), Mul0,
+                                    Mul1, Addend0, Addend1);
+      SDValue Result(Ignored.getNode(), 1);
+      return Result;
+    }
+    APInt HighMask = APInt::getHighBitsSet(64, 32);
+    // Fold 64 bit expression such as add(add(mul(x,y),a),b) ->
+    // lmul(x, y, a, b) if all operands are zero-extended. We do this
+    // before type legalization as it is messy to match the operands after
+    // that.
+    if (N->getValueType(0) == MVT::i64 &&
+        isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, false) &&
+        DAG.MaskedValueIsZero(Mul0, HighMask) &&
+        DAG.MaskedValueIsZero(Mul1, HighMask) &&
+        DAG.MaskedValueIsZero(Addend0, HighMask) &&
+        DAG.MaskedValueIsZero(Addend1, HighMask)) {
+      SDValue Mul0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                  Mul0, DAG.getConstant(0, MVT::i32));
+      SDValue Mul1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                  Mul1, DAG.getConstant(0, MVT::i32));
+      SDValue Addend0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                     Addend0, DAG.getConstant(0, MVT::i32));
+      SDValue Addend1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                     Addend1, DAG.getConstant(0, MVT::i32));
+      SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+                               DAG.getVTList(MVT::i32, MVT::i32), Mul0L, Mul1L,
+                               Addend0L, Addend1L);
+      SDValue Lo(Hi.getNode(), 1);
+      return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+    }
+  }
+  break;
+  case ISD::STORE: {
+    // Replace unaligned store of unaligned load with memmove.
+    StoreSDNode *ST  = cast<StoreSDNode>(N);
+    if (!DCI.isBeforeLegalize() ||
+        allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+        ST->isVolatile() || ST->isIndexed()) {
+      break;
+    }
+    SDValue Chain = ST->getChain();
+
+    unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
+    if (StoreBits % 8) {
+      break;
+    }
+    unsigned ABIAlignment = getTargetData()->getABITypeAlignment(
+        ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
+    unsigned Alignment = ST->getAlignment();
+    if (Alignment >= ABIAlignment) {
+      break;
+    }
+
+    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
+      if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
+        LD->getAlignment() == Alignment &&
+        !LD->isVolatile() && !LD->isIndexed() &&
+        Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+        return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
+                              LD->getBasePtr(),
+                              DAG.getConstant(StoreBits/8, MVT::i32),
+                              Alignment, false, ST->getPointerInfo(),
+                              LD->getPointerInfo());
+      }
+    }
+    break;
+  }
+  }
+  return SDValue();
+}
+
+void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                         const APInt &Mask,
+                                                         APInt &KnownZero,
+                                                         APInt &KnownOne,
+                                                         const SelectionDAG &DAG,
+                                                         unsigned Depth) const {
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  switch (Op.getOpcode()) {
+  default: break;
+  case XCoreISD::LADD:
+  case XCoreISD::LSUB:
+    if (Op.getResNo() == 0) {
+      // Top bits of carry / borrow are clear.
+      KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
+                                        Mask.getBitWidth() - 1);
+      KnownZero &= Mask;
+    }
+    break;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Addressing mode description hooks
+//===----------------------------------------------------------------------===//
+
+static inline bool isImmUs(int64_t val)
+{
+  return (val >= 0 && val <= 11);
+}
+
+static inline bool isImmUs2(int64_t val)
+{
+  return (val%2 == 0 && isImmUs(val/2));
+}
+
+static inline bool isImmUs4(int64_t val)
+{
+  return (val%4 == 0 && isImmUs(val/4));
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  if (Ty->getTypeID() == Type::VoidTyID)
+    return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
+
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(Ty);
+  if (AM.BaseGV) {
+    return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
+                 AM.BaseOffs%4 == 0;
+  }
+
+  switch (Size) {
+  case 1:
+    // reg + imm
+    if (AM.Scale == 0) {
+      return isImmUs(AM.BaseOffs);
+    }
+    // reg + reg
+    return AM.Scale == 1 && AM.BaseOffs == 0;
+  case 2:
+  case 3:
+    // reg + imm
+    if (AM.Scale == 0) {
+      return isImmUs2(AM.BaseOffs);
+    }
+    // reg + reg<<1
+    return AM.Scale == 2 && AM.BaseOffs == 0;
+  default:
+    // reg + imm
+    if (AM.Scale == 0) {
+      return isImmUs4(AM.BaseOffs);
+    }
+    // reg + reg<<2
+    return AM.Scale == 4 && AM.BaseOffs == 0;
+  }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                           XCore Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+XCoreTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+			     EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default : break;
+    case 'r':
+      return std::make_pair(0U, XCore::GRRegsRegisterClass);
+    }
+  }
+  // Use the default implementation in TargetLowering to convert the register
+  // constraint into a member of a register class.
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
new file mode 100644
index 000000000000..9c803bef6dd2
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -0,0 +1,201 @@
+//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that XCore uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREISELLOWERING_H
+#define XCOREISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "XCore.h"
+
+namespace llvm {
+
+  // Forward delcarations
+  class XCoreSubtarget;
+  class XCoreTargetMachine;
+
+  namespace XCoreISD {
+    enum NodeType {
+      // Start the numbering where the builtin ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Branch and link (call)
+      BL,
+
+      // pc relative address
+      PCRelativeWrapper,
+
+      // dp relative address
+      DPRelativeWrapper,
+
+      // cp relative address
+      CPRelativeWrapper,
+
+      // Store word to stack
+      STWSP,
+
+      // Corresponds to retsp instruction
+      RETSP,
+
+      // Corresponds to LADD instruction
+      LADD,
+
+      // Corresponds to LSUB instruction
+      LSUB,
+
+      // Corresponds to LMUL instruction
+      LMUL,
+
+      // Corresponds to MACCU instruction
+      MACCU,
+
+      // Corresponds to MACCS instruction
+      MACCS,
+
+      // Jumptable branch.
+      BR_JT,
+
+      // Jumptable branch using long branches for each entry.
+      BR_JT32
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Implementation
+  //===--------------------------------------------------------------------===//
+  class XCoreTargetLowering : public TargetLowering
+  {
+  public:
+
+    explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+
+    virtual unsigned getJumpTableEncoding() const;
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    //  DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    virtual bool isLegalAddressingMode(const AddrMode &AM,
+                                       const Type *Ty) const;
+
+  private:
+    const XCoreTargetMachine &TM;
+    const XCoreSubtarget &Subtarget;
+
+    // Lower Operand helpers
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+    SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                                    SelectionDAG &DAG) const;
+
+    // Lower Operand specifics
+    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+
+    // Inline asm support
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+				 EVT VT) const;
+
+    // Expand specifics
+    SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
+    SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool
+      CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+		     bool isVarArg,
+                     const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+                     LLVMContext &Context) const;
+  };
+}
+
+#endif // XCOREISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
new file mode 100644
index 000000000000..8002c993270c
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrFormats.td
@@ -0,0 +1,120 @@
+//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "XCore";
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+// XCore pseudo instructions format
+class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstXCore<outs, ins, asmstr, pattern>;
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
new file mode 100644
index 000000000000..f90481f3fbc9
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -0,0 +1,398 @@
+//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+#include "XCoreInstrInfo.h"
+#include "XCore.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+namespace XCore {
+
+  // XCore Condition Codes
+  enum CondCode {
+    COND_TRUE,
+    COND_FALSE,
+    COND_INVALID
+  };
+}
+}
+
+using namespace llvm;
+
+XCoreInstrInfo::XCoreInstrInfo()
+  : XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+    RI(*this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+  return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned
+XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{
+  int Opcode = MI->getOpcode();
+  if (Opcode == XCore::LDWFI) 
+  {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) 
+    {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+  
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+unsigned
+XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                   int &FrameIndex) const {
+  int Opcode = MI->getOpcode();
+  if (Opcode == XCore::STWFI)
+  {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2))))
+    {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static inline bool IsBRU(unsigned BrOpc) {
+  return BrOpc == XCore::BRFU_u6
+      || BrOpc == XCore::BRFU_lu6
+      || BrOpc == XCore::BRBU_u6
+      || BrOpc == XCore::BRBU_lu6;
+}
+
+static inline bool IsBRT(unsigned BrOpc) {
+  return BrOpc == XCore::BRFT_ru6
+      || BrOpc == XCore::BRFT_lru6
+      || BrOpc == XCore::BRBT_ru6
+      || BrOpc == XCore::BRBT_lru6;
+}
+
+static inline bool IsBRF(unsigned BrOpc) {
+  return BrOpc == XCore::BRFF_ru6
+      || BrOpc == XCore::BRFF_lru6
+      || BrOpc == XCore::BRBF_ru6
+      || BrOpc == XCore::BRBF_lru6;
+}
+
+static inline bool IsCondBranch(unsigned BrOpc) {
+  return IsBRF(BrOpc) || IsBRT(BrOpc);
+}
+
+static inline bool IsBR_JT(unsigned BrOpc) {
+  return BrOpc == XCore::BR_JT
+      || BrOpc == XCore::BR_JT32;
+}
+
+/// GetCondFromBranchOpc - Return the XCore CC that matches 
+/// the correspondent Branch instruction opcode.
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) 
+{
+  if (IsBRT(BrOpc)) {
+    return XCore::COND_TRUE;
+  } else if (IsBRF(BrOpc)) {
+    return XCore::COND_FALSE;
+  } else {
+    return XCore::COND_INVALID;
+  }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) 
+{
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case XCore::COND_TRUE   : return XCore::BRFT_lru6;
+  case XCore::COND_FALSE  : return XCore::BRFF_lru6;
+  }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified 
+/// condition, e.g. turning COND_E to COND_NE.
+static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
+{
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case XCore::COND_TRUE   : return XCore::COND_FALSE;
+  case XCore::COND_FALSE  : return XCore::COND_TRUE;
+  }
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target).  Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+///    just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+///    the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+///    an successor block, it sets TBB to be the branch destination block and a
+///    list of operands that evaluate the condition. These
+///    operands can be passed to other TargetInstrInfo methods to create new
+///    branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+///    block, it returns the 'true' destination in TBB, the 'false' destination
+///    in FBB, and a list of operands that evaluate the condition. These
+///    operands can be passed to other TargetInstrInfo methods to create new
+///    branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool
+XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                              MachineBasicBlock *&FBB,
+                              SmallVectorImpl<MachineOperand> &Cond,
+                              bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+  
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (IsBRU(LastInst->getOpcode())) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    
+    XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+    if (BranchCode == XCore::COND_INVALID)
+      return true;  // Can't handle indirect branch.
+    
+    // Conditional branch
+    // Block ends with fall-through condbranch.
+
+    TBB = LastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(BranchCode));
+    Cond.push_back(LastInst->getOperand(0));
+    return false;
+  }
+  
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+  
+  unsigned SecondLastOpc    = SecondLastInst->getOpcode();
+  XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+  
+  // If the block ends with conditional branch followed by unconditional,
+  // handle it.
+  if (BranchCode != XCore::COND_INVALID
+    && IsBRU(LastInst->getOpcode())) {
+
+    TBB = SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(BranchCode));
+    Cond.push_back(SecondLastInst->getOperand(0));
+
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+  
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (IsBRU(SecondLastInst->getOpcode()) && 
+      IsBRU(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Likewise if it ends with a branch table followed by an unconditional branch.
+  if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned
+XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+                             MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond,
+                             DebugLoc DL)const{
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "Unexpected number of components!");
+  
+  if (FBB == 0) { // One way branch.
+    if (Cond.empty()) {
+      // Unconditional branch
+      BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
+    } else {
+      // Conditional branch.
+      unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+      BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+                             .addMBB(TBB);
+    }
+    return 1;
+  }
+  
+  // Two-way Conditional branch.
+  assert(Cond.size() == 2 && "Unexpected number of components!");
+  unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+  BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+                         .addMBB(TBB);
+  BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB);
+  return 2;
+}
+
+unsigned
+XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
+    return 0;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!IsCondBranch(I->getOpcode()))
+    return 1;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
+  bool GRSrc  = XCore::GRRegsRegClass.contains(SrcReg);
+
+  if (GRDest && GRSrc) {
+    BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc))
+      .addImm(0);
+    return;
+  }
+  
+  if (GRDest && SrcReg == XCore::SP) {
+    BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
+    return;
+  }
+
+  if (DestReg == XCore::SP && GRSrc) {
+    BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned SrcReg, bool isKill,
+                                         int FrameIndex,
+                                         const TargetRegisterClass *RC,
+                                         const TargetRegisterInfo *TRI) const
+{
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  BuildMI(MBB, I, DL, get(XCore::STWFI))
+    .addReg(SrcReg, getKillRegState(isKill))
+    .addFrameIndex(FrameIndex)
+    .addImm(0);
+}
+
+void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          unsigned DestReg, int FrameIndex,
+                                          const TargetRegisterClass *RC,
+                                          const TargetRegisterInfo *TRI) const
+{
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
+    .addFrameIndex(FrameIndex)
+    .addImm(0);
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the 
+/// specified Branch instruction.
+bool XCoreInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert((Cond.size() == 2) && 
+          "Invalid XCore branch condition!");
+  Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
new file mode 100644
index 000000000000..840b1e163652
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -0,0 +1,88 @@
+//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTRUCTIONINFO_H
+#define XCOREINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "XCoreRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "XCoreGenInstrInfo.inc"
+
+namespace llvm {
+
+class XCoreInstrInfo : public XCoreGenInstrInfo {
+  const XCoreRegisterInfo RI;
+public:
+  XCoreInstrInfo();
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+  
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+
+  virtual bool ReverseBranchCondition(
+                            SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
new file mode 100644
index 000000000000..55c7527f4e83
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -0,0 +1,1210 @@
+//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the XCore instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Uses of CP, DP are not currently reflected in the patterns, since
+// having a physical register as an operand prevents loop hoisting and
+// since the value of these registers never changes during the life of the
+// function.
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass.
+//===----------------------------------------------------------------------===//
+
+include "XCoreInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// XCore specific DAG Nodes.
+//
+
+// Call
+def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def XCoreBranchLink     : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                             SDNPVariadic]>;
+
+def XCoreRetsp       : SDNode<"XCoreISD::RETSP", SDTBrind,
+                         [SDNPHasChain, SDNPOptInGlue]>;
+
+def SDT_XCoreBR_JT    : SDTypeProfile<0, 2,
+                                      [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+                        [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+                        [SDNPHasChain]>;
+
+def SDT_XCoreAddress    : SDTypeProfile<1, 1,
+                            [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress,
+                           []>;
+
+def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress,
+                           []>;
+
+def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
+                           []>;
+
+def SDT_XCoreStwsp    : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
+def XCoreStwsp        : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
+                               [SDNPHasChain]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_XCoreCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_XCoreCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def div4_xform : SDNodeXForm<imm, [{
+  // Transformation function: imm/4
+  assert(N->getZExtValue() % 4 == 0);
+  return getI32Imm(N->getZExtValue()/4);
+}]>;
+
+def msksize_xform : SDNodeXForm<imm, [{
+  // Transformation function: get the size of a mask
+  assert(isMask_32(N->getZExtValue()));
+  // look for the first non-zero bit
+  return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue()));
+}]>;
+
+def neg_xform : SDNodeXForm<imm, [{
+  // Transformation function: -imm
+  uint32_t value = N->getZExtValue();
+  return getI32Imm(-value);
+}]>;
+
+def bpwsub_xform : SDNodeXForm<imm, [{
+  // Transformation function: 32-imm
+  uint32_t value = N->getZExtValue();
+  return getI32Imm(32-value);
+}]>;
+
+def div4neg_xform : SDNodeXForm<imm, [{
+  // Transformation function: -imm/4
+  uint32_t value = N->getZExtValue();
+  assert(-value % 4 == 0);
+  return getI32Imm(-value/4);
+}]>;
+
+def immUs4Neg : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return (-value)%4 == 0 && (-value)/4 <= 11;
+}]>;
+
+def immUs4 : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return value%4 == 0 && value/4 <= 11;
+}]>;
+
+def immUsNeg : PatLeaf<(imm), [{
+  return -((uint32_t)N->getZExtValue()) <= 11;
+}]>;
+
+def immUs : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() <= 11;
+}]>;
+
+def immU6 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 6);
+}]>;
+
+def immU10 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 10);
+}]>;
+
+def immU16 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 16);
+}]>;
+
+def immU20 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 20);
+}]>;
+
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
+
+def immBitp : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return (value >= 1 && value <= 8)
+          || value == 16
+          || value == 24
+          || value == 32;
+}]>;
+
+def immBpwSubBitp : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return (value >= 24 && value <= 31)
+          || value == 16
+          || value == 8
+          || value == 0;
+}]>;
+
+def lda16f : PatFrag<(ops node:$addr, node:$offset),
+                     (add node:$addr, (shl node:$offset, 1))>;
+def lda16b : PatFrag<(ops node:$addr, node:$offset),
+                     (sub node:$addr, (shl node:$offset, 1))>;
+def ldawf : PatFrag<(ops node:$addr, node:$offset),
+                     (add node:$addr, (shl node:$offset, 2))>;
+def ldawb : PatFrag<(ops node:$addr, node:$offset),
+                     (sub node:$addr, (shl node:$offset, 2))>;
+
+// Instruction operand types
+def calltarget  : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+def pclabel : Operand<i32>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
+                 []>;
+def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
+                 []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+// Jump tables.
+def InlineJT : Operand<i32> {
+  let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+  let PrintMethod = "printInlineJT32";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+
+multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
+  def _3r: _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _2rus : _F2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+multiclass F3R_2RUS_np<string OpcStr> {
+  def _3r: _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 []>;
+  def _2rus : _F2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 []>;
+}
+
+multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
+  def _3r: _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _2rus : _F2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class F3R<string OpcStr, SDNode OpNode> : _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+class F3R_np<string OpcStr> : _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 []>;
+// Three operand long
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
+  def _l3r: _FL3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _l2rus : _FL2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
+  def _l3r: _FL3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _l2rus : _FL2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+// Register - U6
+// Operand register - U6
+multiclass FRU6_LRU6_branch<string OpcStr> {
+  def _ru6: _FRU6<
+                 (outs), (ins GRRegs:$cond, brtarget:$dest),
+                 !strconcat(OpcStr, " $cond, $dest"),
+                 []>;
+  def _lru6: _FLRU6<
+                 (outs), (ins GRRegs:$cond, brtarget:$dest),
+                 !strconcat(OpcStr, " $cond, $dest"),
+                 []>;
+}
+
+multiclass FRU6_LRU6_cp<string OpcStr> {
+  def _ru6: _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$a),
+                 !strconcat(OpcStr, " $dst, cp[$a]"),
+                 []>;
+  def _lru6: _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$a),
+                 !strconcat(OpcStr, " $dst, cp[$a]"),
+                 []>;
+}
+
+// U6
+multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
+  def _u6: _FU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(OpNode immU6:$b)]>;
+  def _lu6: _FLU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(OpNode immU16:$b)]>;
+}
+multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
+  def _u6: _FU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(Int immU6:$b)]>;
+  def _lu6: _FLU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(Int immU16:$b)]>;
+}
+
+multiclass FU6_LU6_np<string OpcStr> {
+  def _u6: _FU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+  def _lu6: _FLU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+}
+
+// U10
+multiclass FU10_LU10_np<string OpcStr> {
+  def _u10: _FU10<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+  def _lu10: _FLU10<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+}
+
+// Two operand short
+
+class F2R_np<string OpcStr> : _F2R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b),
+                 !strconcat(OpcStr, " $dst, $b"),
+                 []>;
+
+// Two operand long
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
+                               "${:comment} ADJCALLSTACKDOWN $amt",
+                               [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                            "${:comment} ADJCALLSTACKUP $amt1",
+                            [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+                             "${:comment} LDWFI $dst, $addr",
+                             [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
+
+def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+                             "${:comment} LDAWFI $dst, $addr",
+                             [(set GRRegs:$dst, ADDRspii:$addr)]>;
+
+def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
+                            "${:comment} STWFI $src, $addr",
+                            [(store GRRegs:$src, ADDRspii:$addr)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1 in {
+  def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
+                              (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
+                              "${:comment} SELECT_CC PSEUDO!",
+                              [(set GRRegs:$dst,
+                                 (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+defm ADD : F3R_2RUS<"add", add>;
+defm SUB : F3R_2RUS<"sub", sub>;
+let neverHasSideEffects = 1 in {
+defm EQ : F3R_2RUS_np<"eq">;
+def LSS_3r : F3R_np<"lss">;
+def LSU_3r : F3R_np<"lsu">;
+}
+def AND_3r : F3R<"and", and>;
+def OR_3r : F3R<"or", or>;
+
+let mayLoad=1 in {
+def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ldw $dst, $addr[$offset]",
+                  []>;
+
+def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
+                  "ldw $dst, $addr[$offset]",
+                  []>;
+
+def LD16S_3r :  _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ld16s $dst, $addr[$offset]",
+                  []>;
+
+def LD8U_3r :  _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ld8u $dst, $addr[$offset]",
+                  []>;
+}
+
+let mayStore=1 in {
+def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                  "stw $val, $addr[$offset]",
+                  []>;
+
+def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+                  "stw $val, $addr[$offset]",
+                  []>;
+}
+
+defm SHL : F3R_2RBITP<"shl", shl>;
+defm SHR : F3R_2RBITP<"shr", srl>;
+// TODO tsetr
+
+// Three operand long
+def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ldaw $dst, $addr[$offset]",
+                  [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
+                    (ins GRRegs:$addr, i32imm:$offset),
+                    "ldaw $dst, $addr[$offset]",
+                    []>;
+
+def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ldaw $dst, $addr[-$offset]",
+                  [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
+                    (ins GRRegs:$addr, i32imm:$offset),
+                    "ldaw $dst, $addr[-$offset]",
+                    []>;
+
+def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "lda16 $dst, $addr[$offset]",
+                  [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "lda16 $dst, $addr[-$offset]",
+                  [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<"mul", mul>;
+// Instructions which may trap are marked as side effecting.
+let hasSideEffects = 1 in {
+def DIVS_l3r : FL3R<"divs", sdiv>;
+def DIVU_l3r : FL3R<"divu", udiv>;
+def REMS_l3r : FL3R<"rems", srem>;
+def REMU_l3r : FL3R<"remu", urem>;
+}
+def XOR_l3r : FL3R<"xor", xor>;
+defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+
+let Constraints = "$src1 = $dst" in
+def CRC_l3r : _FL3R<(outs GRRegs:$dst),
+                     (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                     "crc32 $dst, $src2, $src3",
+                     [(set GRRegs:$dst,
+                        (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
+                                         GRRegs:$src3))]>;
+
+// TODO inpw, outpw
+let mayStore=1 in {
+def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                "st16 $val, $addr[$offset]",
+                []>;
+
+def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                "st8 $val, $addr[$offset]",
+                []>;
+}
+
+// Four operand long
+let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
+def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+                      GRRegs:$src4),
+                    "maccu $dst1, $dst2, $src3, $src4",
+                    []>;
+
+def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+                      GRRegs:$src4),
+                    "maccs $dst1, $dst2, $src3, $src4",
+                    []>;
+}
+
+let Constraints = "$src1 = $dst1" in
+def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                    "crc8 $dst1, $dst2, $src2, $src3",
+                    []>;
+
+// Five operand long
+
+def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                    "ladd $dst1, $dst2, $src1, $src2, $src3",
+                    []>;
+
+def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                    "lsub $dst1, $dst2, $src1, $src2, $src3",
+                    []>;
+
+def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                    "ldiv $dst1, $dst2, $src1, $src2, $src3",
+                    []>;
+
+// Six operand long
+
+def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+                      GRRegs:$src4),
+                    "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
+                    []>;
+
+// Register - U6
+
+//let Uses = [DP] in ...
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldaw $dst, dp[$a]",
+                    []>;
+
+let isReMaterializable = 1 in                    
+def LDAWDP_lru6: _FLRU6<
+                    (outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldaw $dst, dp[$a]",
+                    [(set GRRegs:$dst, ADDRdpii:$a)]>;
+
+let mayLoad=1 in
+def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldw $dst, dp[$a]",
+                    []>;
+                    
+def LDWDP_lru6: _FLRU6<
+                    (outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldw $dst, dp[$a]",
+                    [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+
+let mayStore=1 in
+def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+                  "stw $val, dp[$addr]",
+                  []>;
+
+def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+                  "stw $val, dp[$addr]",
+                  [(store GRRegs:$val, ADDRdpii:$addr)]>;
+
+//let Uses = [CP] in ..
+let mayLoad = 1, isReMaterializable = 1 in
+defm LDWCP : FRU6_LRU6_cp<"ldw">;
+
+let Uses = [SP] in {
+let mayStore=1 in {
+def STWSP_ru6 : _FRU6<
+                 (outs), (ins GRRegs:$val, i32imm:$index),
+                 "stw $val, sp[$index]",
+                 [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
+
+def STWSP_lru6 : _FLRU6<
+                 (outs), (ins GRRegs:$val, i32imm:$index),
+                 "stw $val, sp[$index]",
+                 [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+}
+
+let mayLoad=1 in {
+def LDWSP_ru6 : _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldw $dst, sp[$b]",
+                 []>;
+
+def LDWSP_lru6 : _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldw $dst, sp[$b]",
+                 []>;
+}
+
+let neverHasSideEffects = 1 in {
+def LDAWSP_ru6 : _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+
+def LDAWSP_lru6 : _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+
+def LDAWSP_ru6_RRegs : _FRU6<
+                 (outs RRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+
+def LDAWSP_lru6_RRegs : _FLRU6<
+                 (outs RRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+}
+}
+
+let isReMaterializable = 1 in {
+def LDC_ru6 : _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldc $dst, $b",
+                 [(set GRRegs:$dst, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldc $dst, $b",
+                 [(set GRRegs:$dst, immU16:$b)]>;
+}
+
+def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+
+def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+
+// Operand register - U6
+let isBranch = 1, isTerminator = 1 in {
+defm BRFT: FRU6_LRU6_branch<"bt">;
+defm BRBT: FRU6_LRU6_branch<"bt">;
+defm BRFF: FRU6_LRU6_branch<"bf">;
+defm BRBF: FRU6_LRU6_branch<"bf">;
+}
+
+// U6
+let Defs = [SP], Uses = [SP] in {
+let neverHasSideEffects = 1 in
+defm EXTSP : FU6_LU6_np<"extsp">;
+let mayStore = 1 in
+defm ENTSP : FU6_LU6_np<"entsp">;
+
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
+defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+}
+}
+
+// TODO extdp, kentsp, krestsp, blat
+// getsr, kalli
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+def BRBU_u6 : _FU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+
+def BRBU_lu6 : _FLU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+
+def BRFU_u6 : _FU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+
+def BRFU_lu6 : _FLU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+}
+
+//let Uses = [CP] in ...
+let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
+                    "ldaw r11, cp[$a]",
+                    []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAWCP_lu6: _FLRU6<
+                    (outs), (ins MEMii:$a),
+                    "ldaw r11, cp[$a]",
+                    [(set R11, ADDRcpii:$a)]>;
+
+defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+
+defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+
+// setsr may cause a branch if it is used to enable events. clrsr may
+// branch if it is executed while events are enabled.
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
+defm SETSR_branch : FU6_LU6_np<"setsr">;
+defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+}
+
+// U10
+// TODO ldwcpl, blacp
+
+let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
+def LDAP_u10 : _FU10<
+                  (outs),
+                  (ins i32imm:$addr),
+                  "ldap r11, $addr",
+                  []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10 : _FLU10<
+                  (outs),
+                  (ins i32imm:$addr),
+                  "ldap r11, $addr",
+                  [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10_ba : _FLU10<(outs),
+                          (ins i32imm:$addr),
+                          "ldap r11, $addr",
+                          [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BL_u10 : _FU10<
+                  (outs),
+                  (ins calltarget:$target, variable_ops),
+                  "bl $target",
+                  [(XCoreBranchLink immU10:$target)]>;
+
+def BL_lu10 : _FLU10<
+                  (outs),
+                  (ins calltarget:$target, variable_ops),
+                  "bl $target",
+                  [(XCoreBranchLink immU20:$target)]>;
+}
+
+// Two operand short
+// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg)
+def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+                 "not $dst, $b",
+                 [(set GRRegs:$dst, (not GRRegs:$b))]>;
+
+def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+                 "neg $dst, $b",
+                 [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+
+let Constraints = "$src1 = $dst" in {
+let neverHasSideEffects = 1 in
+def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+                 "sext $dst, $src2",
+                 []>;
+
+let neverHasSideEffects = 1 in
+def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+                 "zext $dst, $src2",
+                 []>;
+
+def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+                 "andnot $dst, $src2",
+                 [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+}
+
+let isReMaterializable = 1, neverHasSideEffects = 1 in
+def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
+                 "mkmsk $dst, $size",
+                 []>;
+
+def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
+                 "mkmsk $dst, $size",
+                 [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+
+def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
+                 "getr $dst, $type",
+                 [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+
+def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "getts $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+
+def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "setpt res[$r], $val",
+                 [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "outct res[$r], $val",
+                 [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+                 "outct res[$r], $val",
+                 [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+
+def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "outt res[$r], $val",
+                 [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+
+def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "out res[$r], $val",
+                 [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+
+let Constraints = "$src = $dst" in
+def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+                 "outshr res[$r], $src",
+                 [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+
+def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "inct $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+
+def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "int $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+
+def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "in $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+
+let Constraints = "$src = $dst" in
+def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+                 "inshr $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+
+def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "chkct res[$r], $val",
+                 [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+
+def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+                 "chkct res[$r], $val",
+                 [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+
+def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "setd res[$r], $val",
+                 [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+
+def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                    "getst $dst, res[$r]",
+                    [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
+
+def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:sp, $src",
+                     [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
+
+def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:pc, $src",
+                     [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
+
+def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:cp, $src",
+                     [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
+
+def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:dp, $src",
+                     [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+
+// Two operand long
+// TODO endin, peek,
+// getd, testlcl
+def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "bitrev $dst, $src",
+                 [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "byterev $dst, $src",
+                 [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+
+def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "clz $dst, $src",
+                 [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+
+def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+
+def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                  "settw res[$r], $val",
+                  [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+
+def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "get $dst, ps[$src]",
+                 [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+
+def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                 "set ps[$src1], $src2",
+                 [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                       "init t[$t]:lr, $src",
+                       [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
+
+def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                       "setclk res[$src1], $src2",
+                       [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                       "setrdy res[$src1], $src2",
+                       [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                       "setpsc res[$src1], $src2",
+                       [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
+// One operand short
+// TODO edu, eeu, waitet, waitef, tstart, clrtp
+// setdp, setcp, setev, kcall
+// dgetreg
+def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
+                    "msync res[$i]",
+		    [(int_xcore_msync GRRegs:$i)]>;
+def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
+                    "mjoin res[$i]",
+		    [(int_xcore_mjoin GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
+                 "bau $addr",
+                 [(brind GRRegs:$addr)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+                            "bru $i\n$t",
+                            [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+                              "bru $i\n$t",
+                              [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
+let Defs=[SP], neverHasSideEffects=1 in
+def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
+                 "set sp, $src",
+                 []>;
+
+let hasCtrlDep = 1 in 
+def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
+                 "ecallt $src",
+                 []>;
+
+let hasCtrlDep = 1 in 
+def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
+                 "ecallf $src",
+                 []>;
+
+let isCall=1, 
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
+                 "bla $addr",
+                 [(XCoreBranchLink GRRegs:$addr)]>;
+}
+
+def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
+                 "syncr res[$r]",
+                 [(int_xcore_syncr GRRegs:$r)]>;
+
+def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
+               "freer res[$r]",
+               [(int_xcore_freer GRRegs:$r)]>;
+
+let Uses=[R11] in
+def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
+               "setv res[$r], r11",
+               [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
+               "eeu res[$r]",
+               [(int_xcore_eeu GRRegs:$r)]>;
+
+// Zero operand short
+// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// dentsp, drestsp
+
+def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+
+let Defs = [R11] in
+def GETID_0R : _F0R<(outs), (ins),
+                 "get r11, id",
+                 [(set R11, (int_xcore_getid))]>;
+
+def SSYNC_0r : _F0R<(outs), (ins),
+                    "ssync",
+		    [(int_xcore_ssync)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+    hasSideEffects = 1 in
+def WAITEU_0R : _F0R<(outs), (ins),
+                 "waiteu",
+                 [(brind (int_xcore_waitevent))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+
+/// sext_inreg
+def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
+def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>;
+def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>;
+
+/// loads
+def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+          (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(sextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+          (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)),
+          (LDW_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(load (add GRRegs:$addr, immUs4:$offset)),
+          (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>;
+
+/// anyext
+def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+          (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+          (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+/// stores
+def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
+          (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
+          (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+          
+def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
+          (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
+          (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
+          (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
+          (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(store GRRegs:$val, GRRegs:$addr),
+          (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+
+/// cttz
+def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
+
+/// trap
+def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>;
+
+///
+/// branch patterns
+///
+
+// unconditional branch
+def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>;
+
+// direct match equal/notequal zero brcond
+def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst),
+          (BRFT_lru6 GRRegs:$lhs, bb:$dst)>;
+def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst),
+          (BRFF_lru6 GRRegs:$lhs, bb:$dst)>;
+
+def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst),
+          (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>;
+
+// generic brcond pattern
+def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>;
+
+
+///
+/// Select patterns
+///
+
+// direct match equal/notequal zero select
+def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+        (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+        (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>;
+
+///
+/// setcc patterns, only matched when none of the above brcond
+/// patterns match
+///
+
+// setcc 2 register operands
+def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+
+def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs),
+          (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>;
+def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs),
+          (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>;
+
+def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs),
+          (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>;
+def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs),
+          (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+// setcc reg/imm operands
+def : Pat<(seteq GRRegs:$lhs, immUs:$rhs),
+          (EQ_2rus GRRegs:$lhs, immUs:$rhs)>;
+def : Pat<(setne GRRegs:$lhs, immUs:$rhs),
+          (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>;
+
+// misc
+def : Pat<(add GRRegs:$addr, immUs4:$offset),
+          (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(sub GRRegs:$addr, immUs4:$offset),
+          (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(and GRRegs:$val, immMskBitp:$mask),
+          (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+def : Pat<(add GRRegs:$src1, immUsNeg:$src2),
+          (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>;
+
+def : Pat<(add GRRegs:$src1, immUs4Neg:$src2),
+          (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>;
+
+///
+/// Some peepholes
+///
+
+def : Pat<(mul GRRegs:$src, 3),
+          (LDA16F_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, 5),
+          (LDAWF_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, -3),
+          (LDAWB_l3r GRRegs:$src, GRRegs:$src)>;
+
+// ashr X, 32 is equivalent to ashr X, 31 on the XCore.
+def : Pat<(sra GRRegs:$src, 31),
+          (ASHR_l2rus GRRegs:$src, 32)>;
+
+def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst),
+          (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+// setge X, 0 is canonicalized to setgt X, -1
+def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst),
+          (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(setgt GRRegs:$lhs, -1),
+          (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>;
+
+def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm),
+          (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
new file mode 100644
index 000000000000..a575a0f69541
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares XCore-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMACHINEFUNCTIONINFO_H
+#define XCOREMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <vector>
+
+namespace llvm {
+
+// Forward declarations
+class Function;
+
+/// XCoreFunctionInfo - This class is derived from MachineFunction private
+/// XCore target-specific information for each MachineFunction.
+class XCoreFunctionInfo : public MachineFunctionInfo {
+private:
+  bool UsesLR;
+  int LRSpillSlot;
+  int FPSpillSlot;
+  int VarArgsFrameIndex;
+  std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
+
+public:
+  XCoreFunctionInfo() :
+    UsesLR(false),
+    LRSpillSlot(0),
+    FPSpillSlot(0),
+    VarArgsFrameIndex(0) {}
+  
+  explicit XCoreFunctionInfo(MachineFunction &MF) :
+    UsesLR(false),
+    LRSpillSlot(0),
+    FPSpillSlot(0),
+    VarArgsFrameIndex(0) {}
+  
+  ~XCoreFunctionInfo() {}
+  
+  void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  
+  void setUsesLR(bool val) { UsesLR = val; }
+  bool getUsesLR() const { return UsesLR; }
+  
+  void setLRSpillSlot(int off) { LRSpillSlot = off; }
+  int getLRSpillSlot() const { return LRSpillSlot; }
+  
+  void setFPSpillSlot(int off) { FPSpillSlot = off; }
+  int getFPSpillSlot() const { return FPSpillSlot; }
+  
+  std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {
+    return SpillLabels;
+  }
+};
+} // End llvm namespace
+
+#endif // XCOREMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
new file mode 100644
index 000000000000..357a4a083582
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -0,0 +1,340 @@
+//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreRegisterInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
+  : XCoreGenRegisterInfo(), TII(tii) {
+}
+
+// helper functions
+static inline bool isImmUs(unsigned val) {
+  return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+  return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+  return val < (1 << 16);
+}
+
+static const unsigned XCore_ArgRegs[] = {
+  XCore::R0, XCore::R1, XCore::R2, XCore::R3
+};
+
+const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
+{
+  return XCore_ArgRegs;
+}
+
+unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
+{
+  return array_lengthof(XCore_ArgRegs);
+}
+
+bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
+  return MF.getMMI().hasDebugInfo() ||
+    MF.getFunction()->needsUnwindTableEntry();
+}
+
+const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+                                                                         const {
+  static const unsigned CalleeSavedRegs[] = {
+    XCore::R4, XCore::R5, XCore::R6, XCore::R7,
+    XCore::R8, XCore::R9, XCore::R10, XCore::LR,
+    0
+  };
+  return CalleeSavedRegs;
+}
+
+BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  Reserved.set(XCore::CP);
+  Reserved.set(XCore::DP);
+  Reserved.set(XCore::SP);
+  Reserved.set(XCore::LR);
+  if (TFI->hasFP(MF)) {
+    Reserved.set(XCore::R10);
+  }
+  return Reserved;
+}
+
+bool
+XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // TODO can we estimate stack size?
+  return TFI->hasFP(MF);
+}
+
+bool
+XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  return false;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+    // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      assert(Amount%4 == 0);
+      Amount /= 4;
+
+      bool isU6 = isImmU6(Amount);
+      if (!isU6 && !isImmU16(Amount)) {
+        // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+        errs() << "eliminateCallFramePseudoInstr size too big: "
+               << Amount << "\n";
+#endif
+        llvm_unreachable(0);
+      }
+
+      MachineInstr *New;
+      if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+        int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+          .addImm(Amount);
+      } else {
+        assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+        int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+          .addImm(Amount);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+  
+  MBB.erase(I);
+}
+
+void
+XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+  MachineInstr &MI = *II;
+  DebugLoc dl = MI.getDebugLoc();
+  unsigned i = 0;
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  MachineOperand &FrameOp = MI.getOperand(i);
+  int FrameIndex = FrameOp.getIndex();
+
+  MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  int StackSize = MF.getFrameInfo()->getStackSize();
+
+  #ifndef NDEBUG
+  DEBUG(errs() << "\nFunction         : " 
+        << MF.getFunction()->getName() << "\n");
+  DEBUG(errs() << "<--------->\n");
+  DEBUG(MI.print(errs()));
+  DEBUG(errs() << "FrameIndex         : " << FrameIndex << "\n");
+  DEBUG(errs() << "FrameOffset        : " << Offset << "\n");
+  DEBUG(errs() << "StackSize          : " << StackSize << "\n");
+  #endif
+
+  Offset += StackSize;
+
+  unsigned FrameReg = getFrameRegister(MF);
+
+  // Special handling of DBG_VALUE instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // fold constant into offset.
+  Offset += MI.getOperand(i + 1).getImm();
+  MI.getOperand(i + 1).ChangeToImmediate(0);
+  
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+
+  DEBUG(errs() << "Offset             : " << Offset << "\n" << "<--------->\n");
+  
+  Offset/=4;
+  
+  bool FP = TFI->hasFP(MF);
+
+  unsigned Reg = MI.getOperand(0).getReg();
+  bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
+
+  assert(XCore::GRRegsRegisterClass->contains(Reg) &&
+         "Unexpected register operand");
+  
+  MachineBasicBlock &MBB = *MI.getParent();
+  
+  if (FP) {
+    bool isUs = isImmUs(Offset);
+    
+    if (!isUs) {
+      if (!RS)
+        report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                           Twine(Offset));
+      unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
+                                                 SPAdj);
+      loadConstant(MBB, II, ScratchReg, Offset, dl);
+      switch (MI.getOpcode()) {
+      case XCore::LDWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+              .addReg(FrameReg)
+              .addReg(ScratchReg, RegState::Kill);
+        break;
+      case XCore::STWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+              .addReg(Reg, getKillRegState(isKill))
+              .addReg(FrameReg)
+              .addReg(ScratchReg, RegState::Kill);
+        break;
+      case XCore::LDAWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+              .addReg(FrameReg)
+              .addReg(ScratchReg, RegState::Kill);
+        break;
+      default:
+        llvm_unreachable("Unexpected Opcode");
+      }
+    } else {
+      switch (MI.getOpcode()) {
+      case XCore::LDWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+              .addReg(FrameReg)
+              .addImm(Offset);
+        break;
+      case XCore::STWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+              .addReg(Reg, getKillRegState(isKill))
+              .addReg(FrameReg)
+              .addImm(Offset);
+        break;
+      case XCore::LDAWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+              .addReg(FrameReg)
+              .addImm(Offset);
+        break;
+      default:
+        llvm_unreachable("Unexpected Opcode");
+      }
+    }
+  } else {
+    bool isU6 = isImmU6(Offset);
+    if (!isU6 && !isImmU16(Offset))
+      report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                         Twine(Offset));
+
+    switch (MI.getOpcode()) {
+    int NewOpcode;
+    case XCore::LDWFI:
+      NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+      BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+            .addImm(Offset);
+      break;
+    case XCore::STWFI:
+      NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+      BuildMI(MBB, II, dl, TII.get(NewOpcode))
+            .addReg(Reg, getKillRegState(isKill))
+            .addImm(Offset);
+      break;
+    case XCore::LDAWFI:
+      NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+      BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+            .addImm(Offset);
+      break;
+    default:
+      llvm_unreachable("Unexpected Opcode");
+    }
+  }
+  // Erase old instruction.
+  MBB.erase(II);
+}
+
+void XCoreRegisterInfo::
+loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+            unsigned DstReg, int64_t Value, DebugLoc dl) const {
+  // TODO use mkmsk if possible.
+  if (!isImmU16(Value)) {
+    // TODO use constant pool.
+    report_fatal_error("loadConstant value too big " + Twine(Value));
+  }
+  int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
+}
+
+int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int XCoreRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+  return XCoreGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
+}
+
+unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
+}
+
+unsigned XCoreRegisterInfo::getRARegister() const {
+  return XCore::LR;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
new file mode 100644
index 000000000000..801d9eba2171
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -0,0 +1,85 @@
+//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREREGISTERINFO_H
+#define XCOREREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "XCoreGenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
+private:
+  const TargetInstrInfo &TII;
+
+  void loadConstant(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  unsigned DstReg, int64_t Value, DebugLoc dl) const;
+
+  void storeToStack(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  unsigned SrcReg, int Offset, DebugLoc dl) const;
+
+  void loadFromStack(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  unsigned DstReg, int Offset, DebugLoc dl) const;
+
+public:
+  XCoreRegisterInfo(const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  //! Return the array of argument passing registers
+  /*!
+    \note The size of this array is returned by getArgRegsSize().
+    */
+  static const unsigned *getArgRegs(const MachineFunction *MF = 0);
+
+  //! Return the size of the argument passing register array
+  static unsigned getNumArgRegs(const MachineFunction *MF = 0);
+  
+  //! Return whether to emit frame moves
+  static bool needsFrameMoves(const MachineFunction &MF);
+
+  //! Get DWARF debugging register number
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
new file mode 100644
index 000000000000..c3542304a4ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
@@ -0,0 +1,56 @@
+//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the XCore register file 
+//===----------------------------------------------------------------------===//
+
+class XCoreReg<string n> : Register<n> {
+  field bits<4> Num;
+  let Namespace = "XCore";
+}
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<4> num, string n> : XCoreReg<n> {
+  let Num = num;
+}
+
+// CPU registers
+def R0  : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1  : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2  : Ri< 2, "r2">, DwarfRegNum<[2]>; 
+def R3  : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4  : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5  : Ri< 5, "r5">, DwarfRegNum<[5]>; 
+def R6  : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7  : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8  : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9  : Ri< 9, "r9">, DwarfRegNum<[9]>; 
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>; 
+def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
+def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
+def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
+
+// Register classes.
+//
+def GRRegs : RegisterClass<"XCore", [i32], 32,
+  // Return values and arguments
+  (add R0, R1, R2, R3,
+  // Not preserved across procedure calls
+  R11,
+  // Callee save
+  R4, R5, R6, R7, R8, R9, R10)>;
+
+// Reserved
+def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
+  let isAllocatable = 0;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..44aeb6057ccb
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-selectiondag-info"
+#include "XCoreTargetMachine.h"
+using namespace llvm;
+
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
new file mode 100644
index 000000000000..0386968638bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCore subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESELECTIONDAGINFO_H
+#define XCORESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine;
+
+class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
+  ~XCoreSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
new file mode 100644
index 000000000000..ad069bf138a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -0,0 +1,28 @@
+//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreSubtarget.h"
+#include "XCore.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "XCoreGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+XCoreSubtarget::XCoreSubtarget(const std::string &TT,
+                               const std::string &CPU, const std::string &FS)
+  : XCoreGenSubtargetInfo(TT, CPU, FS)
+{
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
new file mode 100644
index 000000000000..7b29fa236710
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -0,0 +1,42 @@
+//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESUBTARGET_H
+#define XCORESUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "XCoreGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class XCoreSubtarget : public XCoreGenSubtargetInfo {
+
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  XCoreSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
new file mode 100644
index 000000000000..342966ae5c86
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -0,0 +1,44 @@
+//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetMachine.h"
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// XCoreTargetMachine ctor - Create an ILP32 architecture model
+///
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
+                                       const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
+    DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+               "i16:16:32-i32:32:32-i64:32:32-n32"),
+    InstrInfo(),
+    FrameLowering(Subtarget),
+    TLInfo(*this),
+    TSInfo(*this) {
+}
+
+bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createXCoreISelDag(*this));
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() {
+  RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
new file mode 100644
index 000000000000..6235ac3a6a1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -0,0 +1,62 @@
+//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETMACHINE_H
+#define XCORETARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreSubtarget.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreISelLowering.h"
+#include "XCoreSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine : public LLVMTargetMachine {
+  XCoreSubtarget Subtarget;
+  const TargetData DataLayout;       // Calculates type size & alignment
+  XCoreInstrInfo InstrInfo;
+  XCoreFrameLowering FrameLowering;
+  XCoreTargetLowering TLInfo;
+  XCoreSelectionDAGInfo TSInfo;
+public:
+  XCoreTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &CPU, const std::string &FS);
+
+  virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const XCoreFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  virtual const XCoreTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
new file mode 100644
index 000000000000..7f4e1c1b4fd7
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -0,0 +1,65 @@
+//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetObjectFile.h"
+#include "XCoreSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+
+void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  DataSection =
+    Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS, 
+                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                      ELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getDataRel());
+  BSSSection =
+    Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                      ELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getBSS());
+  
+  MergeableConst4Section = 
+    Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst4());
+  MergeableConst8Section = 
+    Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst8());
+  MergeableConst16Section = 
+    Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst16());
+  
+  // TLS globals are lowered in the backend to arrays indexed by the current
+  // thread id. After lowering they require no special handling by the linker
+  // and can be placed in the standard data / bss sections.
+  TLSDataSection = DataSection;
+  TLSBSSSection = BSSSection;
+
+  ReadOnlySection = 
+    Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getReadOnlyWithRel());
+
+  // Dynamic linking is not supported. Data with relocations is placed in the
+  // same section as data without relocations.
+  DataRelSection = DataRelLocalSection = DataSection;
+  DataRelROSection = DataRelROLocalSection = ReadOnlySection;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
new file mode 100644
index 000000000000..7424c78be305
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -0,0 +1,25 @@
+//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+  class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
+  public:
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    // TODO: Classify globals as xcore wishes.
+  };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 000000000000..fa007cfc6513
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,906 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments.  In
+// practice, this means looking for internal functions that have pointer
+// arguments.  If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value.  This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded.  Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently.  This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "argpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead     , "Number of dead pointer args eliminated");
+
+namespace {
+  /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+  ///
+  struct ArgPromotion : public CallGraphSCCPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+      CallGraphSCCPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnSCC(CallGraphSCC &SCC);
+    static char ID; // Pass identification, replacement for typeid
+    explicit ArgPromotion(unsigned maxElements = 3)
+        : CallGraphSCCPass(ID), maxElements(maxElements) {
+      initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// A vector used to hold the indices of a single GEP instruction
+    typedef std::vector<uint64_t> IndicesVector;
+
+  private:
+    CallGraphNode *PromoteArguments(CallGraphNode *CGN);
+    bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+    CallGraphNode *DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
+                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+    /// The maximum number of elements to expand, or 0 for unlimited.
+    unsigned maxElements;
+  };
+}
+
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+  return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+  bool Changed = false, LocalChange;
+
+  do {  // Iterate until we stop promoting from this SCC.
+    LocalChange = false;
+    // Attempt to promote arguments from all functions in this SCC.
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      if (CallGraphNode *CGN = PromoteArguments(*I)) {
+        LocalChange = true;
+        SCC.ReplaceNode(*I, CGN);
+      }
+    }
+    Changed |= LocalChange;               // Remember that we changed something.
+  } while (LocalChange);
+  
+  return Changed;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct).  If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+  Function *F = CGN->getFunction();
+
+  // Make sure that it is local to this module.
+  if (!F || !F->hasLocalLinkage()) return 0;
+
+  // First check: see if there are any pointer arguments!  If not, quick exit.
+  SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
+  unsigned ArgNo = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++ArgNo)
+    if (I->getType()->isPointerTy())
+      PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
+  if (PointerArgs.empty()) return 0;
+
+  // Second check: make sure that all callers are direct callers.  We can't
+  // transform functions that have indirect callers.  Also see if the function
+  // is self-recursive.
+  bool isSelfRecursive = false;
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+       UI != E; ++UI) {
+    CallSite CS(*UI);
+    // Must be a direct call.
+    if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+    
+    if (CS.getInstruction()->getParent()->getParent() == F)
+      isSelfRecursive = true;
+  }
+  
+  // Check to see which arguments are promotable.  If an argument is promotable,
+  // add it to ArgsToPromote.
+  SmallPtrSet<Argument*, 8> ArgsToPromote;
+  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+  for (unsigned i = 0; i != PointerArgs.size(); ++i) {
+    bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+    Argument *PtrArg = PointerArgs[i].first;
+    const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+
+    // If this is a byval argument, and if the aggregate type is small, just
+    // pass the elements, which is always safe.
+    if (isByVal) {
+      if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+        if (maxElements > 0 && STy->getNumElements() > maxElements) {
+          DEBUG(dbgs() << "argpromotion disable promoting argument '"
+                << PtrArg->getName() << "' because it would require adding more"
+                << " than " << maxElements << " arguments to the function.\n");
+          continue;
+        }
+        
+        // If all the elements are single-value types, we can promote it.
+        bool AllSimple = true;
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          if (!STy->getElementType(i)->isSingleValueType()) {
+            AllSimple = false;
+            break;
+          }
+        }
+
+        // Safe to transform, don't even bother trying to "promote" it.
+        // Passing the elements as a scalar will allow scalarrepl to hack on
+        // the new alloca we introduce.
+        if (AllSimple) {
+          ByValArgsToTransform.insert(PtrArg);
+          continue;
+        }
+      }
+    }
+
+    // If the argument is a recursive type and we're in a recursive
+    // function, we could end up infinitely peeling the function argument.
+    if (isSelfRecursive) {
+      if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+        bool RecursiveType = false;
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          if (STy->getElementType(i) == PtrArg->getType()) {
+            RecursiveType = true;
+            break;
+          }
+        }
+        if (RecursiveType)
+          continue;
+      }
+    }
+    
+    // Otherwise, see if we can promote the pointer to its value.
+    if (isSafeToPromoteArgument(PtrArg, isByVal))
+      ArgsToPromote.insert(PtrArg);
+  }
+
+  // No promotable pointer arguments.
+  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
+    return 0;
+
+  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+}
+
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+  Function *Callee = Arg->getParent();
+
+  unsigned ArgNo = std::distance(Callee->arg_begin(),
+                                 Function::arg_iterator(Arg));
+
+  // Look at all call sites of the function.  At this pointer we know we only
+  // have direct callees.
+  for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
+       UI != E; ++UI) {
+    CallSite CS(*UI);
+    assert(CS && "Should only have direct calls!");
+
+    if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+      return false;
+  }
+  return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+                     const ArgPromotion::IndicesVector &Longer) {
+  if (Prefix.size() > Longer.size())
+    return false;
+  for (unsigned i = 0, e = Prefix.size(); i != e; ++i)
+    if (Prefix[i] != Longer[i])
+      return false;
+  return true;
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+                     std::set<ArgPromotion::IndicesVector> &Set) {
+    std::set<ArgPromotion::IndicesVector>::iterator Low;
+    Low = Set.upper_bound(Indices);
+    if (Low != Set.begin())
+      Low--;
+    // Low is now the last element smaller than or equal to Indices. This means
+    // it points to a prefix of Indices (possibly Indices itself), if such
+    // prefix exists.
+    //
+    // This load is safe if any prefix of its operands is safe to load.
+    return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+                            std::set<ArgPromotion::IndicesVector> &Safe) {
+  std::set<ArgPromotion::IndicesVector>::iterator Low;
+  Low = Safe.upper_bound(ToMark);
+  // Guard against the case where Safe is empty
+  if (Low != Safe.begin())
+    Low--;
+  // Low is now the last element smaller than or equal to Indices. This
+  // means it points to a prefix of Indices (possibly Indices itself), if
+  // such prefix exists.
+  if (Low != Safe.end()) {
+    if (IsPrefix(*Low, ToMark))
+      // If there is already a prefix of these indices (or exactly these
+      // indices) marked a safe, don't bother adding these indices
+      return;
+
+    // Increment Low, so we can use it as a "insert before" hint
+    ++Low;
+  }
+  // Insert
+  Low = Safe.insert(Low, ToMark);
+  ++Low;
+  // If there we're a prefix of longer index list(s), remove those
+  std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+  while (Low != End && IsPrefix(ToMark, *Low)) {
+    std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+    ++Low;
+    Safe.erase(Remove);
+  }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+  typedef std::set<IndicesVector> GEPIndicesSet;
+
+  // Quick exit for unused arguments
+  if (Arg->use_empty())
+    return true;
+
+  // We can only promote this argument if all of the uses are loads, or are GEP
+  // instructions (with constant indices) that are subsequently loaded.
+  //
+  // Promoting the argument causes it to be loaded in the caller
+  // unconditionally. This is only safe if we can prove that either the load
+  // would have happened in the callee anyway (ie, there is a load in the entry
+  // block) or the pointer passed in at every call site is guaranteed to be
+  // valid.
+  // In the former case, invalid loads can happen, but would have happened
+  // anyway, in the latter case, invalid loads won't happen. This prevents us
+  // from introducing an invalid load that wouldn't have happened in the
+  // original code.
+  //
+  // This set will contain all sets of indices that are loaded in the entry
+  // block, and thus are safe to unconditionally load in the caller.
+  GEPIndicesSet SafeToUnconditionallyLoad;
+
+  // This set contains all the sets of indices that we are planning to promote.
+  // This makes it possible to limit the number of arguments added.
+  GEPIndicesSet ToPromote;
+
+  // If the pointer is always valid, any load with first index 0 is valid.
+  if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
+    SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+  // First, iterate the entry block and mark loads of (geps of) arguments as
+  // safe.
+  BasicBlock *EntryBlock = Arg->getParent()->begin();
+  // Declare this here so we can reuse it
+  IndicesVector Indices;
+  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
+       I != E; ++I)
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      Value *V = LI->getPointerOperand();
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+        V = GEP->getPointerOperand();
+        if (V == Arg) {
+          // This load actually loads (part of) Arg? Check the indices then.
+          Indices.reserve(GEP->getNumIndices());
+          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+               II != IE; ++II)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+              Indices.push_back(CI->getSExtValue());
+            else
+              // We found a non-constant GEP index for this argument? Bail out
+              // right away, can't promote this argument at all.
+              return false;
+
+          // Indices checked out, mark them as safe
+          MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+          Indices.clear();
+        }
+      } else if (V == Arg) {
+        // Direct loads are equivalent to a GEP with a single 0 index.
+        MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+      }
+    }
+
+  // Now, iterate all uses of the argument to see if there are any uses that are
+  // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+  SmallVector<LoadInst*, 16> Loads;
+  IndicesVector Operands;
+  for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    Operands.clear();
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile()) return false;  // Don't hack volatile loads
+      Loads.push_back(LI);
+      // Direct loads are equivalent to a GEP with a zero index and then a load.
+      Operands.push_back(0);
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEP->use_empty()) {
+        // Dead GEP's cause trouble later.  Just remove them if we run into
+        // them.
+        getAnalysis<AliasAnalysis>().deleteValue(GEP);
+        GEP->eraseFromParent();
+        // TODO: This runs the above loop over and over again for dead GEPs
+        // Couldn't we just do increment the UI iterator earlier and erase the
+        // use?
+        return isSafeToPromoteArgument(Arg, isByVal);
+      }
+
+      // Ensure that all of the indices are constants.
+      for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+        i != e; ++i)
+        if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+          Operands.push_back(C->getSExtValue());
+        else
+          return false;  // Not a constant operand GEP!
+
+      // Ensure that the only users of the GEP are load instructions.
+      for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
+           UI != E; ++UI)
+        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+          if (LI->isVolatile()) return false;  // Don't hack volatile loads
+          Loads.push_back(LI);
+        } else {
+          // Other uses than load?
+          return false;
+        }
+    } else {
+      return false;  // Not a load or a GEP.
+    }
+
+    // Now, see if it is safe to promote this load / loads of this GEP. Loading
+    // is safe if Operands, or a prefix of Operands, is marked as safe.
+    if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+      return false;
+
+    // See if we are already promoting a load with these indices. If not, check
+    // to make sure that we aren't promoting too many elements.  If so, nothing
+    // to do.
+    if (ToPromote.find(Operands) == ToPromote.end()) {
+      if (maxElements > 0 && ToPromote.size() == maxElements) {
+        DEBUG(dbgs() << "argpromotion not promoting argument '"
+              << Arg->getName() << "' because it would require adding more "
+              << "than " << maxElements << " arguments to the function.\n");
+        // We limit aggregate promotion to only promoting up to a fixed number
+        // of elements of the aggregate.
+        return false;
+      }
+      ToPromote.insert(Operands);
+    }
+  }
+
+  if (Loads.empty()) return true;  // No users, this is a dead argument.
+
+  // Okay, now we know that the argument is only used by load instructions and
+  // it is safe to unconditionally perform all of them. Use alias analysis to
+  // check to see if the pointer is guaranteed to not be modified from entry of
+  // the function to each of the load instructions.
+
+  // Because there could be several/many load instructions, remember which
+  // blocks we know to be transparent to the load.
+  SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+    // Check to see if the load is invalidated from the start of the block to
+    // the load itself.
+    LoadInst *Load = Loads[i];
+    BasicBlock *BB = Load->getParent();
+
+    AliasAnalysis::Location Loc = AA.getLocation(Load);
+    if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
+      return false;  // Pointer is invalidated!
+
+    // Now check every path from the entry block to the load for transparency.
+    // To do this, we perform a depth first search on the inverse CFG from the
+    // loading block.
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
+             I = idf_ext_begin(P, TranspBlocks),
+             E = idf_ext_end(P, TranspBlocks); I != E; ++I)
+        if (AA.canBasicBlockModify(**I, Loc))
+          return false;
+    }
+  }
+
+  // If the path from the entry of the function to each load is free of
+  // instructions that potentially invalidate the load, we can make the
+  // transformation!
+  return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function.  At this point, we know that it's
+/// safe to do so.
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
+                              SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but has modified arguments.
+  const FunctionType *FTy = F->getFunctionType();
+  std::vector<Type*> Params;
+
+  typedef std::set<IndicesVector> ScalarizeTable;
+
+  // ScalarizedElements - If we are promoting a pointer that has elements
+  // accessed out of it, keep track of which elements are accessed so that we
+  // can add one argument for each.
+  //
+  // Arguments that are directly loaded will have a zero element value here, to
+  // handle cases where there are both a direct load and GEP accesses.
+  //
+  std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+  // OriginalLoads - Keep track of a representative load instruction from the
+  // original function so that we can tell the alias analysis implementation
+  // what the new GEP/Load instructions we are inserting look like.
+  std::map<IndicesVector, LoadInst*> OriginalLoads;
+
+  // Attributes - Keep track of the parameter attributes for the arguments
+  // that we are *not* promoting. For the ones that we do promote, the parameter
+  // attributes are lost
+  SmallVector<AttributeWithIndex, 8> AttributesVec;
+  const AttrListPtr &PAL = F->getAttributes();
+
+  // Add any return attributes.
+  if (Attributes attrs = PAL.getRetAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+  // First, determine the new argument list
+  unsigned ArgIndex = 1;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+       ++I, ++ArgIndex) {
+    if (ByValArgsToTransform.count(I)) {
+      // Simple byval argument? Just add all the struct element types.
+      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+      const StructType *STy = cast<StructType>(AgTy);
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        Params.push_back(STy->getElementType(i));
+      ++NumByValArgsPromoted;
+    } else if (!ArgsToPromote.count(I)) {
+      // Unchanged argument
+      Params.push_back(I->getType());
+      if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
+    } else if (I->use_empty()) {
+      // Dead argument (which are always marked as promotable)
+      ++NumArgumentsDead;
+    } else {
+      // Okay, this is being promoted. This means that the only uses are loads
+      // or GEPs which are only used by loads
+
+      // In this table, we will track which indices are loaded from the argument
+      // (where direct loads are tracked as no indices).
+      ScalarizeTable &ArgIndices = ScalarizedElements[I];
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+           ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+        IndicesVector Indices;
+        Indices.reserve(User->getNumOperands() - 1);
+        // Since loads will only have a single operand, and GEPs only a single
+        // non-index operand, this will record direct loads without any indices,
+        // and gep+loads with the GEP indices.
+        for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+             II != IE; ++II)
+          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+        // GEPs with a single 0 index can be merged with direct loads
+        if (Indices.size() == 1 && Indices.front() == 0)
+          Indices.clear();
+        ArgIndices.insert(Indices);
+        LoadInst *OrigLoad;
+        if (LoadInst *L = dyn_cast<LoadInst>(User))
+          OrigLoad = L;
+        else
+          // Take any load, we will use it only to update Alias Analysis
+          OrigLoad = cast<LoadInst>(User->use_back());
+        OriginalLoads[Indices] = OrigLoad;
+      }
+
+      // Add a parameter to the function for each element passed in.
+      for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+             E = ArgIndices.end(); SI != E; ++SI) {
+        // not allowed to dereference ->begin() if size() is 0
+        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
+                                                           SI->begin(),
+                                                           SI->end()));
+        assert(Params.back());
+      }
+
+      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+        ++NumArgumentsPromoted;
+      else
+        ++NumAggregatesPromoted;
+    }
+  }
+
+  // Add any function attributes.
+  if (Attributes attrs = PAL.getFnAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+  const Type *RetTy = FTy->getReturnType();
+
+  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
+  // have zero fixed arguments.
+  bool ExtraArgHack = false;
+  if (Params.empty() && FTy->isVarArg()) {
+    ExtraArgHack = true;
+    Params.push_back(Type::getInt32Ty(F->getContext()));
+  }
+
+  // Construct the new function type using the new arguments.
+  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+  // Create the new function body and insert it into the module.
+  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+  NF->copyAttributesFrom(F);
+
+  
+  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
+        << "From: " << *F);
+  
+  // Recompute the parameter attributes list based on the new arguments for
+  // the function.
+  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                     AttributesVec.end()));
+  AttributesVec.clear();
+
+  F->getParent()->getFunctionList().insert(F, NF);
+  NF->takeName(F);
+
+  // Get the alias analysis information that we need to update to reflect our
+  // changes.
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // Get the callgraph information that we need to update to reflect our
+  // changes.
+  CallGraph &CG = getAnalysis<CallGraph>();
+  
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in the loaded pointers.
+  //
+  SmallVector<Value*, 16> Args;
+  while (!F->use_empty()) {
+    CallSite CS(F->use_back());
+    assert(CS.getCalledFunction() == F);
+    Instruction *Call = CS.getInstruction();
+    const AttrListPtr &CallPAL = CS.getAttributes();
+
+    // Add any return attributes.
+    if (Attributes attrs = CallPAL.getRetAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+    // Loop over the operands, inserting GEP and loads in the caller as
+    // appropriate.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    ArgIndex = 1;
+    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I, ++AI, ++ArgIndex)
+      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+        Args.push_back(*AI);          // Unmodified argument
+
+        if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+
+      } else if (ByValArgsToTransform.count(I)) {
+        // Emit a GEP and load for each element of the struct.
+        const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+        const StructType *STy = cast<StructType>(AgTy);
+        Value *Idxs[2] = {
+              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+          Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
+                                                 (*AI)->getName()+"."+utostr(i),
+                                                 Call);
+          // TODO: Tell AA about the new values?
+          Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+        }
+      } else if (!I->use_empty()) {
+        // Non-dead argument: insert GEPs and loads as appropriate.
+        ScalarizeTable &ArgIndices = ScalarizedElements[I];
+        // Store the Value* version of the indices in here, but declare it now
+        // for reuse.
+        std::vector<Value*> Ops;
+        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+               E = ArgIndices.end(); SI != E; ++SI) {
+          Value *V = *AI;
+          LoadInst *OrigLoad = OriginalLoads[*SI];
+          if (!SI->empty()) {
+            Ops.reserve(SI->size());
+            const Type *ElTy = V->getType();
+            for (IndicesVector::const_iterator II = SI->begin(),
+                 IE = SI->end(); II != IE; ++II) {
+              // Use i32 to index structs, and i64 for others (pointers/arrays).
+              // This satisfies GEP constraints.
+              const Type *IdxTy = (ElTy->isStructTy() ?
+                    Type::getInt32Ty(F->getContext()) : 
+                    Type::getInt64Ty(F->getContext()));
+              Ops.push_back(ConstantInt::get(IdxTy, *II));
+              // Keep track of the type we're currently indexing.
+              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+            }
+            // And create a GEP to extract those indices.
+            V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
+                                          V->getName()+".idx", Call);
+            Ops.clear();
+            AA.copyValue(OrigLoad->getOperand(0), V);
+          }
+          // Since we're replacing a load make sure we take the alignment
+          // of the previous load.
+          LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+          newLoad->setAlignment(OrigLoad->getAlignment());
+          // Transfer the TBAA info too.
+          newLoad->setMetadata(LLVMContext::MD_tbaa,
+                               OrigLoad->getMetadata(LLVMContext::MD_tbaa));
+          Args.push_back(newLoad);
+          AA.copyValue(OrigLoad, Args.back());
+        }
+      }
+
+    if (ExtraArgHack)
+      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
+
+    // Push any varargs arguments on the list.
+    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+      Args.push_back(*AI);
+      if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+    }
+
+    // Add any function attributes.
+    if (Attributes attrs = CallPAL.getFnAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args, "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                                          AttributesVec.end()));
+    } else {
+      New = CallInst::Create(NF, Args, "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                                        AttributesVec.end()));
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    Args.clear();
+    AttributesVec.clear();
+
+    // Update the alias analysis implementation to know that we are replacing
+    // the old call with a new one.
+    AA.replaceWithNewValue(Call, New);
+
+    // Update the callgraph to know that the callsite has been transformed.
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+
+    if (!Call->use_empty()) {
+      Call->replaceAllUsesWith(New);
+      New->takeName(Call);
+    }
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.
+  //
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I) {
+    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+      // If this is an unmodified argument, move the name and users over to the
+      // new version.
+      I->replaceAllUsesWith(I2);
+      I2->takeName(I);
+      AA.replaceWithNewValue(I, I2);
+      ++I2;
+      continue;
+    }
+
+    if (ByValArgsToTransform.count(I)) {
+      // In the callee, we create an alloca, and store each of the new incoming
+      // arguments into the alloca.
+      Instruction *InsertPt = NF->begin()->begin();
+
+      // Just add all the struct element types.
+      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+      const StructType *STy = cast<StructType>(AgTy);
+      Value *Idxs[2] = {
+            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+        Value *Idx = 
+          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+                                    TheAlloca->getName()+"."+Twine(i), 
+                                    InsertPt);
+        I2->setName(I->getName()+"."+Twine(i));
+        new StoreInst(I2++, Idx, InsertPt);
+      }
+
+      // Anything that used the arg should now use the alloca.
+      I->replaceAllUsesWith(TheAlloca);
+      TheAlloca->takeName(I);
+      AA.replaceWithNewValue(I, TheAlloca);
+      continue;
+    }
+
+    if (I->use_empty()) {
+      AA.deleteValue(I);
+      continue;
+    }
+
+    // Otherwise, if we promoted this argument, then all users are load
+    // instructions (or GEPs with only load users), and all loads should be
+    // using the new argument that we added.
+    ScalarizeTable &ArgIndices = ScalarizedElements[I];
+
+    while (!I->use_empty()) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
+        assert(ArgIndices.begin()->empty() &&
+               "Load element should sort to front!");
+        I2->setName(I->getName()+".val");
+        LI->replaceAllUsesWith(I2);
+        AA.replaceWithNewValue(LI, I2);
+        LI->eraseFromParent();
+        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+              << "' in function '" << F->getName() << "'\n");
+      } else {
+        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+        IndicesVector Operands;
+        Operands.reserve(GEP->getNumIndices());
+        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+             II != IE; ++II)
+          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+        // GEPs with a single 0 index can be merged with direct loads
+        if (Operands.size() == 1 && Operands.front() == 0)
+          Operands.clear();
+
+        Function::arg_iterator TheArg = I2;
+        for (ScalarizeTable::iterator It = ArgIndices.begin();
+             *It != Operands; ++It, ++TheArg) {
+          assert(It != ArgIndices.end() && "GEP not handled??");
+        }
+
+        std::string NewName = I->getName();
+        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+            NewName += "." + utostr(Operands[i]);
+        }
+        NewName += ".val";
+        TheArg->setName(NewName);
+
+        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+              << "' of function '" << NF->getName() << "'\n");
+
+        // All of the uses must be load instructions.  Replace them all with
+        // the argument specified by ArgNo.
+        while (!GEP->use_empty()) {
+          LoadInst *L = cast<LoadInst>(GEP->use_back());
+          L->replaceAllUsesWith(TheArg);
+          AA.replaceWithNewValue(L, TheArg);
+          L->eraseFromParent();
+        }
+        AA.deleteValue(GEP);
+        GEP->eraseFromParent();
+      }
+    }
+
+    // Increment I2 past all of the arguments added for this promoted pointer.
+    for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
+      ++I2;
+  }
+
+  // Notify the alias analysis implementation that we inserted a new argument.
+  if (ExtraArgHack)
+    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
+                 NF->arg_begin());
+
+
+  // Tell the alias analysis that the old function is about to disappear.
+  AA.replaceWithNewValue(F, NF);
+
+  
+  NF_CGN->stealCalledFunctionsFrom(CG[F]);
+  
+  // Now that the old function is dead, delete it.  If there is a dangling
+  // reference to the CallgraphNode, just leave the dead function around for
+  // someone else to nuke.
+  CallGraphNode *CGN = CG[F];
+  if (CGN->getNumReferences() == 0)
+    delete CG.removeFunctionFromModule(CGN);
+  else
+    F->setLinkage(Function::ExternalLinkage);
+  
+  return NF_CGN;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 000000000000..a21efced73b9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,190 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared.  This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constmerge"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+  struct ConstantMerge : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    ConstantMerge() : ModulePass(ID) {
+      initializeConstantMergePass(*PassRegistry::getPassRegistry());
+    }
+
+    // run - For this pass, process all of the globals in the module,
+    // eliminating duplicate constants.
+    //
+    bool runOnModule(Module &M);
+  };
+}
+
+char ConstantMerge::ID = 0;
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+                "Merge Duplicate Global Constants", false, false)
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+  
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+        dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
+// True if A is better than B.
+static bool IsBetterCannonical(const GlobalVariable &A,
+                               const GlobalVariable &B) {
+  if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+    return true;
+
+  if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+    return false;
+
+  return A.hasUnnamedAddr();
+}
+
+bool ConstantMerge::runOnModule(Module &M) {
+  // Find all the globals that are marked "used".  These cannot be merged.
+  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+  
+  // Map unique constant/section pairs to globals.  We don't want to merge
+  // globals in different sections.
+  DenseMap<Constant*, GlobalVariable*> CMap;
+
+  // Replacements - This vector contains a list of replacements to perform.
+  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+
+  bool MadeChange = false;
+
+  // Iterate constant merging while we are still making progress.  Merging two
+  // constants together may allow us to merge other constants together if the
+  // second level constants have initializers which point to the globals that
+  // were just merged.
+  while (1) {
+
+    // First: Find the canonical constants others will be merged with.
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+         GVI != E; ) {
+      GlobalVariable *GV = GVI++;
+
+      // If this GV is dead, remove it.
+      GV->removeDeadConstantUsers();
+      if (GV->use_empty() && GV->hasLocalLinkage()) {
+        GV->eraseFromParent();
+        continue;
+      }
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
+        continue;
+
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *&Slot = CMap[Init];
+
+      // If this is the first constant we find or if the old on is local,
+      // replace with the current one. It the current is externally visible
+      // it cannot be replace, but can be the canonical constant we merge with.
+      if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
+        Slot = GV;
+      }
+    }
+
+    // Second: identify all globals that can be merged together, filling in
+    // the Replacements vector.  We cannot do the replacement in this pass
+    // because doing so may cause initializers of other globals to be rewritten,
+    // invalidating the Constant* pointers in CMap.
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+         GVI != E; ) {
+      GlobalVariable *GV = GVI++;
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
+        continue;
+
+      // We can only replace constant with local linkage.
+      if (!GV->hasLocalLinkage())
+        continue;
+
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *Slot = CMap[Init];
+
+      if (!Slot || Slot == GV)
+        continue;
+
+      if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+        continue;
+
+      if (!GV->hasUnnamedAddr())
+        Slot->setUnnamedAddr(false);
+
+      // Make all uses of the duplicate constant use the canonical version.
+      Replacements.push_back(std::make_pair(GV, Slot));
+    }
+
+    if (Replacements.empty())
+      return MadeChange;
+    CMap.clear();
+
+    // Now that we have figured out which replacements must be made, do them all
+    // now.  This avoid invalidating the pointers in CMap, which are unneeded
+    // now.
+    for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+      // Eliminate any uses of the dead global.
+      Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+      // Delete the global value from the module.
+      assert(Replacements[i].first->hasLocalLinkage() &&
+             "Refusing to delete an externally visible global variable.");
+      Replacements[i].first->eraseFromParent();
+    }
+
+    NumMerged += Replacements.size();
+    Replacements.clear();
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 000000000000..15177650f4e5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,1003 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions.  Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions.  This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadargelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated  , "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithUndef, 
+          "Number of unread args replaced with undef");
+namespace {
+  /// DAE - The dead argument elimination pass.
+  ///
+  class DAE : public ModulePass {
+  public:
+
+    /// Struct that represents (part of) either a return value or a function
+    /// argument.  Used so that arguments and return values can be used
+    /// interchangeably.
+    struct RetOrArg {
+      RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+               IsArg(IsArg) {}
+      const Function *F;
+      unsigned Idx;
+      bool IsArg;
+
+      /// Make RetOrArg comparable, so we can put it into a map.
+      bool operator<(const RetOrArg &O) const {
+        if (F != O.F)
+          return F < O.F;
+        else if (Idx != O.Idx)
+          return Idx < O.Idx;
+        else
+          return IsArg < O.IsArg;
+      }
+
+      /// Make RetOrArg comparable, so we can easily iterate the multimap.
+      bool operator==(const RetOrArg &O) const {
+        return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+      }
+
+      std::string getDescription() const {
+        return std::string((IsArg ? "Argument #" : "Return value #"))
+               + utostr(Idx) + " of function " + F->getNameStr();
+      }
+    };
+
+    /// Liveness enum - During our initial pass over the program, we determine
+    /// that things are either alive or maybe alive. We don't mark anything
+    /// explicitly dead (even if we know they are), since anything not alive
+    /// with no registered uses (in Uses) will never be marked alive and will
+    /// thus become dead in the end.
+    enum Liveness { Live, MaybeLive };
+
+    /// Convenience wrapper
+    RetOrArg CreateRet(const Function *F, unsigned Idx) {
+      return RetOrArg(F, Idx, false);
+    }
+    /// Convenience wrapper
+    RetOrArg CreateArg(const Function *F, unsigned Idx) {
+      return RetOrArg(F, Idx, true);
+    }
+
+    typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+    /// This maps a return value or argument to any MaybeLive return values or
+    /// arguments it uses. This allows the MaybeLive values to be marked live
+    /// when any of its users is marked live.
+    /// For example (indices are left out for clarity):
+    ///  - Uses[ret F] = ret G
+    ///    This means that F calls G, and F returns the value returned by G.
+    ///  - Uses[arg F] = ret G
+    ///    This means that some function calls G and passes its result as an
+    ///    argument to F.
+    ///  - Uses[ret F] = arg F
+    ///    This means that F returns one of its own arguments.
+    ///  - Uses[arg F] = arg G
+    ///    This means that G calls F and passes one of its own (G's) arguments
+    ///    directly to F.
+    UseMap Uses;
+
+    typedef std::set<RetOrArg> LiveSet;
+    typedef std::set<const Function*> LiveFuncSet;
+
+    /// This set contains all values that have been determined to be live.
+    LiveSet LiveValues;
+    /// This set contains all values that are cannot be changed in any way.
+    LiveFuncSet LiveFunctions;
+
+    typedef SmallVector<RetOrArg, 5> UseVector;
+
+  protected:
+    // DAH uses this to specify a different ID.
+    explicit DAE(char &ID) : ModulePass(ID) {}
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    DAE() : ModulePass(ID) {
+      initializeDAEPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M);
+
+    virtual bool ShouldHackArguments() const { return false; }
+
+  private:
+    Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+    Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses,
+                       unsigned RetValNum = 0);
+    Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+
+    void SurveyFunction(const Function &F);
+    void MarkValue(const RetOrArg &RA, Liveness L,
+                   const UseVector &MaybeLiveUses);
+    void MarkLive(const RetOrArg &RA);
+    void MarkLive(const Function &F);
+    void PropagateLiveness(const RetOrArg &RA);
+    bool RemoveDeadStuffFromFunction(Function *F);
+    bool DeleteDeadVarargs(Function &Fn);
+    bool RemoveDeadArgumentsFromCallers(Function &Fn);
+  };
+}
+
+
+char DAE::ID = 0;
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
+
+namespace {
+  /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+  /// deletes arguments to functions which are external.  This is only for use
+  /// by bugpoint.
+  struct DAH : public DAE {
+    static char ID;
+    DAH() : DAE(ID) {}
+
+    virtual bool ShouldHackArguments() const { return true; }
+  };
+}
+
+char DAH::ID = 0;
+INITIALIZE_PASS(DAH, "deadarghaX0r", 
+                "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+                false, false)
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+  assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+  if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+  // Ensure that the function is only directly called.
+  if (Fn.hasAddressTaken())
+    return false;
+
+  // Okay, we know we can transform this function if safe.  Scan its body
+  // looking for calls to llvm.vastart.
+  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+        if (II->getIntrinsicID() == Intrinsic::vastart)
+          return false;
+      }
+    }
+  }
+
+  // If we get here, there are no calls to llvm.vastart in the function body,
+  // remove the "..." and adjust all the calls.
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but doesn't have isVarArg set.
+  const FunctionType *FTy = Fn.getFunctionType();
+
+  std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
+  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+                                                Params, false);
+  unsigned NumArgs = Params.size();
+
+  // Create the new function body and insert it into the module...
+  Function *NF = Function::Create(NFTy, Fn.getLinkage());
+  NF->copyAttributesFrom(&Fn);
+  Fn.getParent()->getFunctionList().insert(&Fn, NF);
+  NF->takeName(&Fn);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in a smaller number of arguments into the new function.
+  //
+  std::vector<Value*> Args;
+  while (!Fn.use_empty()) {
+    CallSite CS(Fn.use_back());
+    Instruction *Call = CS.getInstruction();
+
+    // Pass all the same arguments.
+    Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+
+    // Drop any attributes that were on the vararg arguments.
+    AttrListPtr PAL = CS.getAttributes();
+    if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
+      SmallVector<AttributeWithIndex, 8> AttributesVec;
+      for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
+        AttributesVec.push_back(PAL.getSlot(i));
+      if (Attributes FnAttrs = PAL.getFnAttributes())
+        AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+      PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
+    }
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args, "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(PAL);
+    } else {
+      New = CallInst::Create(NF, Args, "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(PAL);
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    New->setDebugLoc(Call->getDebugLoc());
+
+    Args.clear();
+
+    if (!Call->use_empty())
+      Call->replaceAllUsesWith(New);
+
+    New->takeName(Call);
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.  While we're at
+  // it, remove the dead arguments from the DeadArguments list.
+  //
+  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+    // Move the name and users over to the new version.
+    I->replaceAllUsesWith(I2);
+    I2->takeName(I);
+  }
+
+  // Finally, nuke the old function.
+  Fn.eraseFromParent();
+  return true;
+}
+
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any 
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+  if (Fn.isDeclaration() || Fn.mayBeOverridden())
+    return false;
+
+  // Functions with local linkage should already have been handled.
+  if (Fn.hasLocalLinkage())
+    return false;
+
+  if (Fn.use_empty())
+    return false;
+
+  llvm::SmallVector<unsigned, 8> UnusedArgs;
+  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); 
+       I != E; ++I) {
+    Argument *Arg = I;
+
+    if (Arg->use_empty() && !Arg->hasByValAttr())
+      UnusedArgs.push_back(Arg->getArgNo());
+  }
+
+  if (UnusedArgs.empty())
+    return false;
+
+  bool Changed = false;
+
+  for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end(); 
+       I != E; ++I) {
+    CallSite CS(*I);
+    if (!CS || !CS.isCallee(I))
+      continue;
+
+    // Now go through all unused args and replace them with "undef".
+    for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+      unsigned ArgNo = UnusedArgs[I];
+
+      Value *Arg = CS.getArgument(ArgNo);
+      CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+      ++NumArgumentsReplacedWithUndef;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+  if (F->getReturnType()->isVoidTy())
+    return 0;
+  else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+    return STy->getNumElements();
+  else
+    return 1;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+  // We're live if our use or its Function is already marked as live.
+  if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+    return Live;
+
+  // We're maybe live otherwise, but remember that we must become live if
+  // Use becomes live.
+  MaybeLiveUses.push_back(Use);
+  return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeLive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
+                             UseVector &MaybeLiveUses, unsigned RetValNum) {
+    const User *V = *U;
+    if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+      // The value is returned from a function. It's only live when the
+      // function's return value is live. We use RetValNum here, for the case
+      // that U is really a use of an insertvalue instruction that uses the
+      // original Use.
+      RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
+      // We might be live, depending on the liveness of Use.
+      return MarkIfNotLive(Use, MaybeLiveUses);
+    }
+    if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+      if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+          && IV->hasIndices())
+        // The use we are examining is inserted into an aggregate. Our liveness
+        // depends on all uses of that aggregate, but if it is used as a return
+        // value, only index at which we were inserted counts.
+        RetValNum = *IV->idx_begin();
+
+      // Note that if we are used as the aggregate operand to the insertvalue,
+      // we don't change RetValNum, but do survey all our uses.
+
+      Liveness Result = MaybeLive;
+      for (Value::const_use_iterator I = IV->use_begin(),
+           E = V->use_end(); I != E; ++I) {
+        Result = SurveyUse(I, MaybeLiveUses, RetValNum);
+        if (Result == Live)
+          break;
+      }
+      return Result;
+    }
+
+    if (ImmutableCallSite CS = V) {
+      const Function *F = CS.getCalledFunction();
+      if (F) {
+        // Used in a direct call.
+
+        // Find the argument number. We know for sure that this use is an
+        // argument, since if it was the function argument this would be an
+        // indirect call and the we know can't be looking at a value of the
+        // label type (for the invoke instruction).
+        unsigned ArgNo = CS.getArgumentNo(U);
+
+        if (ArgNo >= F->getFunctionType()->getNumParams())
+          // The value is passed in through a vararg! Must be live.
+          return Live;
+
+        assert(CS.getArgument(ArgNo)
+               == CS->getOperand(U.getOperandNo())
+               && "Argument is not where we expected it");
+
+        // Value passed to a normal call. It's only live when the corresponding
+        // argument to the called function turns out live.
+        RetOrArg Use = CreateArg(F, ArgNo);
+        return MarkIfNotLive(Use, MaybeLiveUses);
+      }
+    }
+    // Used in any other way? Value must be live.
+    return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+  // Assume it's dead (which will only hold if there are no uses at all..).
+  Liveness Result = MaybeLive;
+  // Check each use.
+  for (Value::const_use_iterator I = V->use_begin(),
+       E = V->use_end(); I != E; ++I) {
+    Result = SurveyUse(I, MaybeLiveUses);
+    if (Result == Live)
+      break;
+  }
+  return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value.  This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(const Function &F) {
+  unsigned RetCount = NumRetVals(&F);
+  // Assume all return values are dead
+  typedef SmallVector<Liveness, 5> RetVals;
+  RetVals RetValLiveness(RetCount, MaybeLive);
+
+  typedef SmallVector<UseVector, 5> RetUses;
+  // These vectors map each return value to the uses that make it MaybeLive, so
+  // we can add those to the Uses map if the return value really turns out to be
+  // MaybeLive. Initialized to a list of RetCount empty lists.
+  RetUses MaybeLiveRetUses(RetCount);
+
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+      if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+          != F.getFunctionType()->getReturnType()) {
+        // We don't support old style multiple return values.
+        MarkLive(F);
+        return;
+      }
+
+  if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+    MarkLive(F);
+    return;
+  }
+
+  DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+  // Keep track of the number of live retvals, so we can skip checks once all
+  // of them turn out to be live.
+  unsigned NumLiveRetVals = 0;
+  const Type *STy = dyn_cast<StructType>(F.getReturnType());
+  // Loop all uses of the function.
+  for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
+       I != E; ++I) {
+    // If the function is PASSED IN as an argument, its address has been
+    // taken.
+    ImmutableCallSite CS(*I);
+    if (!CS || !CS.isCallee(I)) {
+      MarkLive(F);
+      return;
+    }
+
+    // If this use is anything other than a call site, the function is alive.
+    const Instruction *TheCall = CS.getInstruction();
+    if (!TheCall) {   // Not a direct call site?
+      MarkLive(F);
+      return;
+    }
+
+    // If we end up here, we are looking at a direct call to our function.
+
+    // Now, check how our return value(s) is/are used in this caller. Don't
+    // bother checking return values if all of them are live already.
+    if (NumLiveRetVals != RetCount) {
+      if (STy) {
+        // Check all uses of the return value.
+        for (Value::const_use_iterator I = TheCall->use_begin(),
+             E = TheCall->use_end(); I != E; ++I) {
+          const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+          if (Ext && Ext->hasIndices()) {
+            // This use uses a part of our return value, survey the uses of
+            // that part and store the results for this index only.
+            unsigned Idx = *Ext->idx_begin();
+            if (RetValLiveness[Idx] != Live) {
+              RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+              if (RetValLiveness[Idx] == Live)
+                NumLiveRetVals++;
+            }
+          } else {
+            // Used by something else than extractvalue. Mark all return
+            // values as live.
+            for (unsigned i = 0; i != RetCount; ++i )
+              RetValLiveness[i] = Live;
+            NumLiveRetVals = RetCount;
+            break;
+          }
+        }
+      } else {
+        // Single return value
+        RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
+        if (RetValLiveness[0] == Live)
+          NumLiveRetVals = RetCount;
+      }
+    }
+  }
+
+  // Now we've inspected all callers, record the liveness of our return values.
+  for (unsigned i = 0; i != RetCount; ++i)
+    MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+  DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+
+  // Now, check all of our arguments.
+  unsigned i = 0;
+  UseVector MaybeLiveArgUses;
+  for (Function::const_arg_iterator AI = F.arg_begin(),
+       E = F.arg_end(); AI != E; ++AI, ++i) {
+    // See what the effect of this use is (recording any uses that cause
+    // MaybeLive in MaybeLiveArgUses).
+    Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+    // Mark the result.
+    MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+    // Clear the vector again for the next iteration.
+    MaybeLiveArgUses.clear();
+  }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+                    const UseVector &MaybeLiveUses) {
+  switch (L) {
+    case Live: MarkLive(RA); break;
+    case MaybeLive:
+    {
+      // Note any uses of this value, so this return value can be
+      // marked live whenever one of the uses becomes live.
+      for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+           UE = MaybeLiveUses.end(); UI != UE; ++UI)
+        Uses.insert(std::make_pair(*UI, RA));
+      break;
+    }
+  }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+  DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+  // Mark the function as live.
+  LiveFunctions.insert(&F);
+  // Mark all arguments as live.
+  for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+    PropagateLiveness(CreateArg(&F, i));
+  // Mark all return values as live.
+  for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+    PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+  if (LiveFunctions.count(RA.F))
+    return; // Function was already marked Live.
+
+  if (!LiveValues.insert(RA).second)
+    return; // We were already marked Live.
+
+  DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+  PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+  // We don't use upper_bound (or equal_range) here, because our recursive call
+  // to ourselves is likely to cause the upper_bound (which is the first value
+  // not belonging to RA) to become erased and the iterator invalidated.
+  UseMap::iterator Begin = Uses.lower_bound(RA);
+  UseMap::iterator E = Uses.end();
+  UseMap::iterator I;
+  for (I = Begin; I != E && I->first == RA; ++I)
+    MarkLive(I->second);
+
+  // Erase RA from the Uses map (from the lower bound to wherever we ended up
+  // after the loop).
+  Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+  // Don't modify fully live functions
+  if (LiveFunctions.count(F))
+    return false;
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but has fewer arguments and a different return type.
+  const FunctionType *FTy = F->getFunctionType();
+  std::vector<Type*> Params;
+
+  // Set up to build a new list of parameter attributes.
+  SmallVector<AttributeWithIndex, 8> AttributesVec;
+  const AttrListPtr &PAL = F->getAttributes();
+
+  // The existing function return attributes.
+  Attributes RAttrs = PAL.getRetAttributes();
+  Attributes FnAttrs = PAL.getFnAttributes();
+
+  // Find out the new return value.
+
+  Type *RetTy = FTy->getReturnType();
+  const Type *NRetTy = NULL;
+  unsigned RetCount = NumRetVals(F);
+
+  // -1 means unused, other numbers are the new index
+  SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+  std::vector<Type*> RetTypes;
+  if (RetTy->isVoidTy()) {
+    NRetTy = RetTy;
+  } else {
+    const StructType *STy = dyn_cast<StructType>(RetTy);
+    if (STy)
+      // Look at each of the original return values individually.
+      for (unsigned i = 0; i != RetCount; ++i) {
+        RetOrArg Ret = CreateRet(F, i);
+        if (LiveValues.erase(Ret)) {
+          RetTypes.push_back(STy->getElementType(i));
+          NewRetIdxs[i] = RetTypes.size() - 1;
+        } else {
+          ++NumRetValsEliminated;
+          DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+                << F->getName() << "\n");
+        }
+      }
+    else
+      // We used to return a single value.
+      if (LiveValues.erase(CreateRet(F, 0))) {
+        RetTypes.push_back(RetTy);
+        NewRetIdxs[0] = 0;
+      } else {
+        DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
+              << "\n");
+        ++NumRetValsEliminated;
+      }
+    if (RetTypes.size() > 1)
+      // More than one return type? Return a struct with them. Also, if we used
+      // to return a struct and didn't change the number of return values,
+      // return a struct again. This prevents changing {something} into
+      // something and {} into void.
+      // Make the new struct packed if we used to return a packed struct
+      // already.
+      NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+    else if (RetTypes.size() == 1)
+      // One return type? Just a simple value then, but only if we didn't use to
+      // return a struct with that simple value before.
+      NRetTy = RetTypes.front();
+    else if (RetTypes.size() == 0)
+      // No return types? Make it void, but only if we didn't use to return {}.
+      NRetTy = Type::getVoidTy(F->getContext());
+  }
+
+  assert(NRetTy && "No new return type found?");
+
+  // Remove any incompatible attributes, but only if we removed all return
+  // values. Otherwise, ensure that we don't have any conflicting attributes
+  // here. Currently, this should not be possible, but special handling might be
+  // required when new return value attributes are added.
+  if (NRetTy->isVoidTy())
+    RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+  else
+    assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+           && "Return attributes no longer compatible?");
+
+  if (RAttrs)
+    AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+  // Remember which arguments are still alive.
+  SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+  // Construct the new parameter list from non-dead arguments. Also construct
+  // a new set of parameter attributes to correspond. Skip the first parameter
+  // attribute, since that belongs to the return value.
+  unsigned i = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++i) {
+    RetOrArg Arg = CreateArg(F, i);
+    if (LiveValues.erase(Arg)) {
+      Params.push_back(I->getType());
+      ArgAlive[i] = true;
+
+      // Get the original parameter attributes (skipping the first one, that is
+      // for the return value.
+      if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
+    } else {
+      ++NumArgumentsEliminated;
+      DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+            << ") from " << F->getName() << "\n");
+    }
+  }
+
+  if (FnAttrs != Attribute::None)
+    AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  // Reconstruct the AttributesList based on the vector we constructed.
+  AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(),
+                                        AttributesVec.end());
+
+  // Create the new function type based on the recomputed parameters.
+  FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+  // No change?
+  if (NFTy == FTy)
+    return false;
+
+  // Create the new function body and insert it into the module...
+  Function *NF = Function::Create(NFTy, F->getLinkage());
+  NF->copyAttributesFrom(F);
+  NF->setAttributes(NewPAL);
+  // Insert the new function before the old function, so we won't be processing
+  // it again.
+  F->getParent()->getFunctionList().insert(F, NF);
+  NF->takeName(F);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in a smaller number of arguments into the new function.
+  //
+  std::vector<Value*> Args;
+  while (!F->use_empty()) {
+    CallSite CS(F->use_back());
+    Instruction *Call = CS.getInstruction();
+
+    AttributesVec.clear();
+    const AttrListPtr &CallPAL = CS.getAttributes();
+
+    // The call return attributes.
+    Attributes RAttrs = CallPAL.getRetAttributes();
+    Attributes FnAttrs = CallPAL.getFnAttributes();
+    // Adjust in case the function was changed to return void.
+    RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+    if (RAttrs)
+      AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+    // Declare these outside of the loops, so we can reuse them for the second
+    // loop, which loops the varargs.
+    CallSite::arg_iterator I = CS.arg_begin();
+    unsigned i = 0;
+    // Loop over those operands, corresponding to the normal arguments to the
+    // original function, and add those that are still alive.
+    for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+      if (ArgAlive[i]) {
+        Args.push_back(*I);
+        // Get original parameter attributes, but skip return attributes.
+        if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+      }
+
+    // Push any varargs arguments on the list. Don't forget their attributes.
+    for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+      Args.push_back(*I);
+      if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+    }
+
+    if (FnAttrs != Attribute::None)
+      AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+    // Reconstruct the AttributesList based on the vector we constructed.
+    AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(),
+                                              AttributesVec.end());
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args, "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+    } else {
+      New = CallInst::Create(NF, Args, "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(NewCallPAL);
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    New->setDebugLoc(Call->getDebugLoc());
+
+    Args.clear();
+
+    if (!Call->use_empty()) {
+      if (New->getType() == Call->getType()) {
+        // Return type not changed? Just replace users then.
+        Call->replaceAllUsesWith(New);
+        New->takeName(Call);
+      } else if (New->getType()->isVoidTy()) {
+        // Our return value has uses, but they will get removed later on.
+        // Replace by null for now.
+        if (!Call->getType()->isX86_MMXTy())
+          Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+      } else {
+        assert(RetTy->isStructTy() &&
+               "Return type changed, but not into a void. The old return type"
+               " must have been a struct!");
+        Instruction *InsertPt = Call;
+        if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+          BasicBlock::iterator IP = II->getNormalDest()->begin();
+          while (isa<PHINode>(IP)) ++IP;
+          InsertPt = IP;
+        }
+
+        // We used to return a struct. Instead of doing smart stuff with all the
+        // uses of this struct, we will just rebuild it using
+        // extract/insertvalue chaining and let instcombine clean that up.
+        //
+        // Start out building up our return value from undef
+        Value *RetVal = UndefValue::get(RetTy);
+        for (unsigned i = 0; i != RetCount; ++i)
+          if (NewRetIdxs[i] != -1) {
+            Value *V;
+            if (RetTypes.size() > 1)
+              // We are still returning a struct, so extract the value from our
+              // return value
+              V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+                                           InsertPt);
+            else
+              // We are now returning a single element, so just insert that
+              V = New;
+            // Insert the value at the old position
+            RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+          }
+        // Now, replace all uses of the old call instruction with the return
+        // struct we built
+        Call->replaceAllUsesWith(RetVal);
+        New->takeName(Call);
+      }
+    }
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.
+  i = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I, ++i)
+    if (ArgAlive[i]) {
+      // If this is a live argument, move the name and users over to the new
+      // version.
+      I->replaceAllUsesWith(I2);
+      I2->takeName(I);
+      ++I2;
+    } else {
+      // If this argument is dead, replace any uses of it with null constants
+      // (these are guaranteed to become unused later on).
+      if (!I->getType()->isX86_MMXTy())
+        I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+    }
+
+  // If we change the return value of the function we must rewrite any return
+  // instructions.  Check this now.
+  if (F->getReturnType() != NF->getReturnType())
+    for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+        Value *RetVal;
+
+        if (NFTy->getReturnType()->isVoidTy()) {
+          RetVal = 0;
+        } else {
+          assert (RetTy->isStructTy());
+          // The original return value was a struct, insert
+          // extractvalue/insertvalue chains to extract only the values we need
+          // to return and insert them into our new result.
+          // This does generate messy code, but we'll let it to instcombine to
+          // clean that up.
+          Value *OldRet = RI->getOperand(0);
+          // Start out building up our return value from undef
+          RetVal = UndefValue::get(NRetTy);
+          for (unsigned i = 0; i != RetCount; ++i)
+            if (NewRetIdxs[i] != -1) {
+              ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+                                                              "oldret", RI);
+              if (RetTypes.size() > 1) {
+                // We're still returning a struct, so reinsert the value into
+                // our new return value at the new index
+
+                RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+                                                 "newret", RI);
+              } else {
+                // We are now only returning a simple value, so just return the
+                // extracted value.
+                RetVal = EV;
+              }
+            }
+        }
+        // Replace the return instruction with one returning the new return
+        // value (possibly 0 if we became void).
+        ReturnInst::Create(F->getContext(), RetVal, RI);
+        BB->getInstList().erase(RI);
+      }
+
+  // Now that the old function is dead, delete it.
+  F->eraseFromParent();
+
+  return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // First pass: Do a simple check to see if any functions can have their "..."
+  // removed.  We can do this if they never call va_start.  This loop cannot be
+  // fused with the next loop, because deleting a function invalidates
+  // information computed while surveying other functions.
+  DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function &F = *I++;
+    if (F.getFunctionType()->isVarArg())
+      Changed |= DeleteDeadVarargs(F);
+  }
+
+  // Second phase:loop through the module, determining which arguments are live.
+  // We assume all arguments are dead unless proven otherwise (allowing us to
+  // determine that dead arguments passed into recursive functions are dead).
+  //
+  DEBUG(dbgs() << "DAE - Determining liveness\n");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    SurveyFunction(*I);
+
+  // Now, remove all dead arguments and return values from each function in
+  // turn.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    // Increment now, because the function will probably get removed (ie.
+    // replaced by a new one).
+    Function *F = I++;
+    Changed |= RemoveDeadStuffFromFunction(F);
+  }
+
+  // Finally, look for any unused parameters in functions with non-local
+  // linkage and replace the passed in parameters with undef.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function& F = *I;
+
+    Changed |= RemoveDeadArgumentsFromCallers(F);
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 000000000000..d9911bfb4597
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,92 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Constants.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+  /// @brief A pass to extract specific functions and their dependencies.
+  class GVExtractorPass : public ModulePass {
+    SetVector<GlobalValue *> Named;
+    bool deleteStuff;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+
+    /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+    /// specified function. Otherwise, it deletes as much of the module as
+    /// possible, except for the function specified.
+    ///
+    explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+      : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+
+    bool runOnModule(Module &M) {
+      // Visit the global inline asm.
+      if (!deleteStuff)
+        M.setModuleInlineAsm("");
+
+      // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+      // implementation could figure out which GlobalValues are actually
+      // referenced by the Named set, and which GlobalValues in the rest of
+      // the module are referenced by the NamedSet, and get away with leaving
+      // more internal and private things internal and private. But for now,
+      // be conservative and simple.
+
+      // Visit the GlobalVariables.
+      for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+           I != E; ++I) {
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
+          I->setInitializer(0);
+	} else {
+	  if (I->hasAvailableExternallyLinkage())
+	    continue;
+	  if (I->getName() == "llvm.global_ctors")
+	    continue;
+	}
+
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+      }
+
+      // Visit the Functions.
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
+          I->deleteBody();
+	} else {
+	  if (I->hasAvailableExternallyLinkage())
+	    continue;
+	}
+
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+      }
+
+      return true;
+    }
+  };
+
+  char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, 
+                                         bool deleteFn) {
+  return new GVExtractorPass(GVs, deleteFn);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 000000000000..95decec0f874
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,380 @@
+//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly.  In addition,
+// it marks function arguments (of pointer type) 'nocapture' if a call
+// to the function does not create any copies of the pointer value that
+// outlive the call.  This more or less means that the pointer is only
+// dereferenced, and not returned from the function or stored in a global.
+// This pass is implemented as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "functionattrs"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/InstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+
+namespace {
+  struct FunctionAttrs : public CallGraphSCCPass {
+    static char ID; // Pass identification, replacement for typeid
+    FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+      initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnSCC - Analyze the SCC, performing the transformation if possible.
+    bool runOnSCC(CallGraphSCC &SCC);
+
+    // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+    bool AddReadAttrs(const CallGraphSCC &SCC);
+
+    // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+    bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
+
+    // IsFunctionMallocLike - Does this function allocate new memory?
+    bool IsFunctionMallocLike(Function *F,
+                              SmallPtrSet<Function*, 8> &) const;
+
+    // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+    bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      CallGraphSCCPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    AliasAnalysis *AA;
+  };
+}
+
+char FunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+
+/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
+  SmallPtrSet<Function*, 8> SCCNodes;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
+
+  // Check if any of the functions in the SCC read or write memory.  If they
+  // write memory then they can't be marked readnone or readonly.
+  bool ReadsMemory = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F == 0)
+      // External node - may write memory.  Just give up.
+      return false;
+
+    AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
+    if (MRB == AliasAnalysis::DoesNotAccessMemory)
+      // Already perfect!
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime with
+    // something that writes memory, so treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden()) {
+      if (!AliasAnalysis::onlyReadsMemory(MRB))
+        // May write memory.  Just give up.
+        return false;
+
+      ReadsMemory = true;
+      continue;
+    }
+
+    // Scan the function body for instructions that may read or write memory.
+    for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+      Instruction *I = &*II;
+
+      // Some instructions can be ignored even if they read or write memory.
+      // Detect these now, skipping to the next instruction if one is found.
+      CallSite CS(cast<Value>(I));
+      if (CS) {
+        // Ignore calls to functions in the same SCC.
+        if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+          continue;
+        AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
+        // If the call doesn't access arbitrary memory, we may be able to
+        // figure out something.
+        if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+          // If the call does access argument pointees, check each argument.
+          if (AliasAnalysis::doesAccessArgPointees(MRB))
+            // Check whether all pointer arguments point to local memory, and
+            // ignore calls that only access local memory.
+            for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+                 CI != CE; ++CI) {
+              Value *Arg = *CI;
+              if (Arg->getType()->isPointerTy()) {
+                AliasAnalysis::Location Loc(Arg,
+                                            AliasAnalysis::UnknownSize,
+                                            I->getMetadata(LLVMContext::MD_tbaa));
+                if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
+                  if (MRB & AliasAnalysis::Mod)
+                    // Writes non-local memory.  Give up.
+                    return false;
+                  if (MRB & AliasAnalysis::Ref)
+                    // Ok, it reads non-local memory.
+                    ReadsMemory = true;
+                }
+              }
+            }
+          continue;
+        }
+        // The call could access any memory. If that includes writes, give up.
+        if (MRB & AliasAnalysis::Mod)
+          return false;
+        // If it reads, note it.
+        if (MRB & AliasAnalysis::Ref)
+          ReadsMemory = true;
+        continue;
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        // Ignore non-volatile loads from local memory.
+        if (!LI->isVolatile()) {
+          AliasAnalysis::Location Loc = AA->getLocation(LI);
+          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+            continue;
+        }
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        // Ignore non-volatile stores to local memory.
+        if (!SI->isVolatile()) {
+          AliasAnalysis::Location Loc = AA->getLocation(SI);
+          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+            continue;
+        }
+      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+        // Ignore vaargs on local memory.
+        AliasAnalysis::Location Loc = AA->getLocation(VI);
+        if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+      }
+
+      // Any remaining instructions need to be taken seriously!  Check if they
+      // read or write memory.
+      if (I->mayWriteToMemory())
+        // Writes memory.  Just give up.
+        return false;
+
+      // If this instruction may read memory, remember that.
+      ReadsMemory |= I->mayReadFromMemory();
+    }
+  }
+
+  // Success!  Functions in this SCC do not access memory, or only read memory.
+  // Give them the appropriate attribute.
+  bool MadeChange = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F->doesNotAccessMemory())
+      // Already perfect!
+      continue;
+
+    if (F->onlyReadsMemory() && ReadsMemory)
+      // No change.
+      continue;
+
+    MadeChange = true;
+
+    // Clear out any existing attributes.
+    F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+
+    // Add in the new attribute.
+    F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone);
+
+    if (ReadsMemory)
+      ++NumReadOnly;
+    else
+      ++NumReadNone;
+  }
+
+  return MadeChange;
+}
+
+/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
+  bool Changed = false;
+
+  // Check each function in turn, determining which pointer arguments are not
+  // captured.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F == 0)
+      // External node - skip it;
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime with
+    // something that writes memory, so treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      continue;
+
+    for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
+      if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
+          !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
+        A->addAttr(Attribute::NoCapture);
+        ++NumNoCapture;
+        Changed = true;
+      }
+  }
+
+  return Changed;
+}
+
+/// IsFunctionMallocLike - A function is malloc-like if it returns either null
+/// or a pointer that doesn't alias any other pointer visible to the caller.
+bool FunctionAttrs::IsFunctionMallocLike(Function *F,
+                              SmallPtrSet<Function*, 8> &SCCNodes) const {
+  UniqueVector<Value *> FlowsToReturn;
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+    if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+      FlowsToReturn.insert(Ret->getReturnValue());
+
+  for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+    Value *RetVal = FlowsToReturn[i+1];   // UniqueVector[0] is reserved.
+
+    if (Constant *C = dyn_cast<Constant>(RetVal)) {
+      if (!C->isNullValue() && !isa<UndefValue>(C))
+        return false;
+
+      continue;
+    }
+
+    if (isa<Argument>(RetVal))
+      return false;
+
+    if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+      switch (RVI->getOpcode()) {
+        // Extend the analysis by looking upwards.
+        case Instruction::BitCast:
+        case Instruction::GetElementPtr:
+          FlowsToReturn.insert(RVI->getOperand(0));
+          continue;
+        case Instruction::Select: {
+          SelectInst *SI = cast<SelectInst>(RVI);
+          FlowsToReturn.insert(SI->getTrueValue());
+          FlowsToReturn.insert(SI->getFalseValue());
+          continue;
+        }
+        case Instruction::PHI: {
+          PHINode *PN = cast<PHINode>(RVI);
+          for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            FlowsToReturn.insert(PN->getIncomingValue(i));
+          continue;
+        }
+
+        // Check whether the pointer came from an allocation.
+        case Instruction::Alloca:
+          break;
+        case Instruction::Call:
+        case Instruction::Invoke: {
+          CallSite CS(RVI);
+          if (CS.paramHasAttr(0, Attribute::NoAlias))
+            break;
+          if (CS.getCalledFunction() &&
+              SCCNodes.count(CS.getCalledFunction()))
+            break;
+        } // fall-through
+        default:
+          return false;  // Did not come from an allocation.
+      }
+
+    if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
+      return false;
+  }
+
+  return true;
+}
+
+/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
+  SmallPtrSet<Function*, 8> SCCNodes;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
+
+  // Check each function in turn, determining which functions return noalias
+  // pointers.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F == 0)
+      // External node - skip it;
+      return false;
+
+    // Already noalias.
+    if (F->doesNotAlias(0))
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime, so
+    // treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      return false;
+
+    // We annotate noalias return values, which are only applicable to 
+    // pointer types.
+    if (!F->getReturnType()->isPointerTy())
+      continue;
+
+    if (!IsFunctionMallocLike(F, SCCNodes))
+      return false;
+  }
+
+  bool MadeChange = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+      continue;
+
+    F->setDoesNotAlias(0);
+    ++NumNoAlias;
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool Changed = AddReadAttrs(SCC);
+  Changed |= AddNoCaptureAttrs(SCC);
+  Changed |= AddNoAliasAttrs(SCC);
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 000000000000..2b427aa6a4e6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,211 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program.  It uses an aggressive algorithm, searching out globals that are
+// known to be alive.  After it finds all of the globals which are needed, it
+// deletes whatever is left over.  This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globaldce"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumAliases  , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+  struct GlobalDCE : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    GlobalDCE() : ModulePass(ID) {
+      initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+    }
+
+    // run - Do the GlobalDCE pass on the specified module, optionally updating
+    // the specified callgraph to reflect the changes.
+    //
+    bool runOnModule(Module &M);
+
+  private:
+    SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+
+    /// GlobalIsNeeded - mark the specific global value as needed, and
+    /// recursively mark anything that it uses as also needed.
+    void GlobalIsNeeded(GlobalValue *GV);
+    void MarkUsedGlobalsAsNeeded(Constant *C);
+
+    bool RemoveUnusedGlobalValue(GlobalValue &GV);
+  };
+}
+
+char GlobalDCE::ID = 0;
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+                "Dead Global Elimination", false, false)
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+  bool Changed = false;
+  
+  // Loop over the module, adding globals which are obviously necessary.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Changed |= RemoveUnusedGlobalValue(*I);
+    // Functions with external linkage are needed if they have a body
+    if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+        !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+      GlobalIsNeeded(I);
+  }
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    Changed |= RemoveUnusedGlobalValue(*I);
+    // Externally visible & appending globals are needed, if they have an
+    // initializer.
+    if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+        !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+      GlobalIsNeeded(I);
+  }
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E; ++I) {
+    Changed |= RemoveUnusedGlobalValue(*I);
+    // Externally visible aliases are needed.
+    if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage())
+      GlobalIsNeeded(I);
+  }
+
+  // Now that all globals which are needed are in the AliveGlobals set, we loop
+  // through the program, deleting those which are not alive.
+  //
+
+  // The first pass is to drop initializers of global variables which are dead.
+  std::vector<GlobalVariable*> DeadGlobalVars;   // Keep track of dead globals
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    if (!AliveGlobals.count(I)) {
+      DeadGlobalVars.push_back(I);         // Keep track of dead globals
+      I->setInitializer(0);
+    }
+
+  // The second pass drops the bodies of functions which are dead...
+  std::vector<Function*> DeadFunctions;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!AliveGlobals.count(I)) {
+      DeadFunctions.push_back(I);         // Keep track of dead globals
+      if (!I->isDeclaration())
+        I->deleteBody();
+    }
+
+  // The third pass drops targets of aliases which are dead...
+  std::vector<GlobalAlias*> DeadAliases;
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
+       ++I)
+    if (!AliveGlobals.count(I)) {
+      DeadAliases.push_back(I);
+      I->setAliasee(0);
+    }
+
+  if (!DeadFunctions.empty()) {
+    // Now that all interferences have been dropped, delete the actual objects
+    // themselves.
+    for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
+      RemoveUnusedGlobalValue(*DeadFunctions[i]);
+      M.getFunctionList().erase(DeadFunctions[i]);
+    }
+    NumFunctions += DeadFunctions.size();
+    Changed = true;
+  }
+
+  if (!DeadGlobalVars.empty()) {
+    for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
+      RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
+      M.getGlobalList().erase(DeadGlobalVars[i]);
+    }
+    NumVariables += DeadGlobalVars.size();
+    Changed = true;
+  }
+
+  // Now delete any dead aliases.
+  if (!DeadAliases.empty()) {
+    for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
+      RemoveUnusedGlobalValue(*DeadAliases[i]);
+      M.getAliasList().erase(DeadAliases[i]);
+    }
+    NumAliases += DeadAliases.size();
+    Changed = true;
+  }
+
+  // Make sure that all memory is released
+  AliveGlobals.clear();
+
+  return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+  // If the global is already in the set, no need to reprocess it.
+  if (!AliveGlobals.insert(G))
+    return;
+  
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+    // If this is a global variable, we must make sure to add any global values
+    // referenced by the initializer to the alive set.
+    if (GV->hasInitializer())
+      MarkUsedGlobalsAsNeeded(GV->getInitializer());
+  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+    // The target of a global alias is needed.
+    MarkUsedGlobalsAsNeeded(GA->getAliasee());
+  } else {
+    // Otherwise this must be a function object.  We have to scan the body of
+    // the function looking for constants and global values which are used as
+    // operands.  Any operands of these types must be processed to ensure that
+    // any globals used will be marked as needed.
+    Function *F = cast<Function>(G);
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
+          if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+            GlobalIsNeeded(GV);
+          else if (Constant *C = dyn_cast<Constant>(*U))
+            MarkUsedGlobalsAsNeeded(C);
+  }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return GlobalIsNeeded(GV);
+  
+  // Loop over all of the operands of the constant, adding any globals they
+  // use to the list of needed globals.
+  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
+    if (Constant *OpC = dyn_cast<Constant>(*I))
+      MarkUsedGlobalsAsNeeded(OpC);
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it.  This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+  if (GV.use_empty()) return false;
+  GV.removeDeadConstantUsers();
+  return GV.use_empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 000000000000..4ac721dd0600
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,2856 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken.  If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalopt"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumMarked    , "Number of globals marked constant");
+STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
+STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted   , "Number of globals deleted");
+STATISTIC(NumFnDeleted , "Number of functions deleted");
+STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+
+namespace {
+  struct GlobalStatus;
+  struct GlobalOpt : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    }
+    static char ID; // Pass identification, replacement for typeid
+    GlobalOpt() : ModulePass(ID) {
+      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M);
+
+  private:
+    GlobalVariable *FindGlobalCtors(Module &M);
+    bool OptimizeFunctions(Module &M);
+    bool OptimizeGlobalVars(Module &M);
+    bool OptimizeGlobalAliases(Module &M);
+    bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
+    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
+                               const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+                               const GlobalStatus &GS);
+    bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+  };
+}
+
+char GlobalOpt::ID = 0;
+INITIALIZE_PASS(GlobalOpt, "globalopt",
+                "Global Variable Optimizer", false, false)
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+namespace {
+
+/// GlobalStatus - As we analyze each global, keep track of some information
+/// about it.  If we find out that the address of the global is taken, none of
+/// this info will be accurate.
+struct GlobalStatus {
+  /// isCompared - True if the global's address is used in a comparison.
+  bool isCompared;
+
+  /// isLoaded - True if the global is ever loaded.  If the global isn't ever
+  /// loaded it can be deleted.
+  bool isLoaded;
+
+  /// StoredType - Keep track of what stores to the global look like.
+  ///
+  enum StoredType {
+    /// NotStored - There is no store to this global.  It can thus be marked
+    /// constant.
+    NotStored,
+
+    /// isInitializerStored - This global is stored to, but the only thing
+    /// stored is the constant it was initialized with.  This is only tracked
+    /// for scalar globals.
+    isInitializerStored,
+
+    /// isStoredOnce - This global is stored to, but only its initializer and
+    /// one other value is ever stored to it.  If this global isStoredOnce, we
+    /// track the value stored to it in StoredOnceValue below.  This is only
+    /// tracked for scalar globals.
+    isStoredOnce,
+
+    /// isStored - This global is stored to by multiple values or something else
+    /// that we cannot track.
+    isStored
+  } StoredType;
+
+  /// StoredOnceValue - If only one value (besides the initializer constant) is
+  /// ever stored to this global, keep track of what value it is.
+  Value *StoredOnceValue;
+
+  /// AccessingFunction/HasMultipleAccessingFunctions - These start out
+  /// null/false.  When the first accessing function is noticed, it is recorded.
+  /// When a second different accessing function is noticed,
+  /// HasMultipleAccessingFunctions is set to true.
+  const Function *AccessingFunction;
+  bool HasMultipleAccessingFunctions;
+
+  /// HasNonInstructionUser - Set to true if this global has a user that is not
+  /// an instruction (e.g. a constant expr or GV initializer).
+  bool HasNonInstructionUser;
+
+  /// HasPHIUser - Set to true if this global has a user that is a PHI node.
+  bool HasPHIUser;
+
+  GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+                   StoredOnceValue(0), AccessingFunction(0),
+                   HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+                   HasPHIUser(false) {}
+};
+
+}
+
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself.  Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+static bool SafeToDestroyConstant(const Constant *C) {
+  if (isa<GlobalValue>(C)) return false;
+
+  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+       ++UI)
+    if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+      if (!SafeToDestroyConstant(CU)) return false;
+    } else
+      return false;
+  return true;
+}
+
+
+/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
+/// structure.  If the global has its address taken, return true to indicate we
+/// can't do anything with it.
+///
+static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
+                          SmallPtrSet<const PHINode*, 16> &PHIUsers) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const User *U = *UI;
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      GS.HasNonInstructionUser = true;
+      
+      // If the result of the constantexpr isn't pointer type, then we won't
+      // know to expect it in various places.  Just reject early.
+      if (!isa<PointerType>(CE->getType())) return true;
+      
+      if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
+    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+      if (!GS.HasMultipleAccessingFunctions) {
+        const Function *F = I->getParent()->getParent();
+        if (GS.AccessingFunction == 0)
+          GS.AccessingFunction = F;
+        else if (GS.AccessingFunction != F)
+          GS.HasMultipleAccessingFunctions = true;
+      }
+      if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        GS.isLoaded = true;
+        if (LI->isVolatile()) return true;  // Don't hack on volatile loads.
+      } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        // Don't allow a store OF the address, only stores TO the address.
+        if (SI->getOperand(0) == V) return true;
+
+        if (SI->isVolatile()) return true;  // Don't hack on volatile stores.
+
+        // If this is a direct store to the global (i.e., the global is a scalar
+        // value, not an aggregate), keep more specific information about
+        // stores.
+        if (GS.StoredType != GlobalStatus::isStored) {
+          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
+                                                           SI->getOperand(1))) {
+            Value *StoredVal = SI->getOperand(0);
+            if (StoredVal == GV->getInitializer()) {
+              if (GS.StoredType < GlobalStatus::isInitializerStored)
+                GS.StoredType = GlobalStatus::isInitializerStored;
+            } else if (isa<LoadInst>(StoredVal) &&
+                       cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+              if (GS.StoredType < GlobalStatus::isInitializerStored)
+                GS.StoredType = GlobalStatus::isInitializerStored;
+            } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
+              GS.StoredType = GlobalStatus::isStoredOnce;
+              GS.StoredOnceValue = StoredVal;
+            } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
+                       GS.StoredOnceValue == StoredVal) {
+              // noop.
+            } else {
+              GS.StoredType = GlobalStatus::isStored;
+            }
+          } else {
+            GS.StoredType = GlobalStatus::isStored;
+          }
+        }
+      } else if (isa<GetElementPtrInst>(I)) {
+        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+      } else if (isa<SelectInst>(I)) {
+        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+      } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+        // PHI nodes we can check just like select or GEP instructions, but we
+        // have to be careful about infinite recursion.
+        if (PHIUsers.insert(PN))  // Not already visited.
+          if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+        GS.HasPHIUser = true;
+      } else if (isa<CmpInst>(I)) {
+        GS.isCompared = true;
+      } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+        if (MTI->isVolatile()) return true;
+        if (MTI->getArgOperand(0) == V)
+          GS.StoredType = GlobalStatus::isStored;
+        if (MTI->getArgOperand(1) == V)
+          GS.isLoaded = true;
+      } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+        assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+        if (MSI->isVolatile()) return true;
+        GS.StoredType = GlobalStatus::isStored;
+      } else {
+        return true;  // Any other non-load instruction might take address!
+      }
+    } else if (const Constant *C = dyn_cast<Constant>(U)) {
+      GS.HasNonInstructionUser = true;
+      // We might have a dead and dangling constant hanging off of here.
+      if (!SafeToDestroyConstant(C))
+        return true;
+    } else {
+      GS.HasNonInstructionUser = true;
+      // Otherwise must be some other user.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
+  ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+  if (!CI) return 0;
+  unsigned IdxV = CI->getZExtValue();
+
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) {
+    if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV);
+  } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) {
+    if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV);
+  } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
+    if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
+  } else if (isa<ConstantAggregateZero>(Agg)) {
+    if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+      if (IdxV < STy->getNumElements())
+        return Constant::getNullValue(STy->getElementType(IdxV));
+    } else if (const SequentialType *STy =
+               dyn_cast<SequentialType>(Agg->getType())) {
+      return Constant::getNullValue(STy->getElementType());
+    }
+  } else if (isa<UndefValue>(Agg)) {
+    if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+      if (IdxV < STy->getNumElements())
+        return UndefValue::get(STy->getElementType(IdxV));
+    } else if (const SequentialType *STy =
+               dyn_cast<SequentialType>(Agg->getType())) {
+      return UndefValue::get(STy->getElementType());
+    }
+  }
+  return 0;
+}
+
+
+/// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
+/// users of the global, cleaning up the obvious ones.  This is largely just a
+/// quick scan over the use list to clean up the easy and obvious cruft.  This
+/// returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+  bool Changed = false;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
+    User *U = *UI++;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (Init) {
+        // Replace the load with the initializer.
+        LI->replaceAllUsesWith(Init);
+        LI->eraseFromParent();
+        Changed = true;
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // Store must be unreachable or storing Init into the global.
+      SI->eraseFromParent();
+      Changed = true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      if (CE->getOpcode() == Instruction::GetElementPtr) {
+        Constant *SubInit = 0;
+        if (Init)
+          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+        Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+      } else if (CE->getOpcode() == Instruction::BitCast &&
+                 CE->getType()->isPointerTy()) {
+        // Pointer cast, delete any stores and memsets to the global.
+        Changed |= CleanupConstantGlobalUsers(CE, 0);
+      }
+
+      if (CE->use_empty()) {
+        CE->destroyConstant();
+        Changed = true;
+      }
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+      // and will invalidate our notion of what Init is.
+      Constant *SubInit = 0;
+      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+        ConstantExpr *CE =
+          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+      }
+      Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+
+      if (GEP->use_empty()) {
+        GEP->eraseFromParent();
+        Changed = true;
+      }
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+      if (MI->getRawDest() == V) {
+        MI->eraseFromParent();
+        Changed = true;
+      }
+
+    } else if (Constant *C = dyn_cast<Constant>(U)) {
+      // If we have a chain of dead constantexprs or other things dangling from
+      // us, and if they are all dead, nuke them without remorse.
+      if (SafeToDestroyConstant(C)) {
+        C->destroyConstant();
+        // This could have invalidated UI, start over from scratch.
+        CleanupConstantGlobalUsers(V, Init);
+        return true;
+      }
+    }
+  }
+  return Changed;
+}
+
+/// isSafeSROAElementUse - Return true if the specified instruction is a safe
+/// user of a derived expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+  // We might have a dead and dangling constant hanging off of here.
+  if (Constant *C = dyn_cast<Constant>(V))
+    return SafeToDestroyConstant(C);
+
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+
+  // Loads are ok.
+  if (isa<LoadInst>(I)) return true;
+
+  // Stores *to* the pointer are ok.
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getOperand(0) != V;
+
+  // Otherwise, it must be a GEP.
+  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+  if (GEPI == 0) return false;
+
+  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+    return false;
+
+  for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
+       I != E; ++I)
+    if (!isSafeSROAElementUse(*I))
+      return false;
+  return true;
+}
+
+
+/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
+/// Look at it and its uses and decide whether it is safe to SROA this global.
+///
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+  if (!isa<GetElementPtrInst>(U) &&
+      (!isa<ConstantExpr>(U) ||
+       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+    return false;
+
+  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
+  // don't like < 3 operand CE's, and we don't like non-constant integer
+  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
+  // value of C.
+  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+      !cast<Constant>(U->getOperand(1))->isNullValue() ||
+      !isa<ConstantInt>(U->getOperand(2)))
+    return false;
+
+  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+  ++GEPI;  // Skip over the pointer index.
+
+  // If this is a use of an array allocation, do a bit more checking for sanity.
+  if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+    uint64_t NumElements = AT->getNumElements();
+    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+    // Check to make sure that index falls within the array.  If not,
+    // something funny is going on, so we won't do the optimization.
+    //
+    if (Idx->getZExtValue() >= NumElements)
+      return false;
+
+    // We cannot scalar repl this level of the array unless any array
+    // sub-indices are in-range constants.  In particular, consider:
+    // A[0][i].  We cannot know that the user isn't doing invalid things like
+    // allowing i to index an out-of-range subscript that accesses A[1].
+    //
+    // Scalar replacing *just* the outer index of the array is probably not
+    // going to be a win anyway, so just give up.
+    for (++GEPI; // Skip array index.
+         GEPI != E;
+         ++GEPI) {
+      uint64_t NumElements;
+      if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+        NumElements = SubArrayTy->getNumElements();
+      else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+        NumElements = SubVectorTy->getNumElements();
+      else {
+        assert((*GEPI)->isStructTy() &&
+               "Indexed GEP type is not array, vector, or struct!");
+        continue;
+      }
+
+      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+        return false;
+    }
+  }
+
+  for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
+    if (!isSafeSROAElementUse(*I))
+      return false;
+  return true;
+}
+
+/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
+/// is safe for us to perform this transformation.
+///
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    if (!IsUserOfGlobalSafeForSRA(*UI, GV))
+      return false;
+  }
+  return true;
+}
+
+
+/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
+/// variable.  This opens the door for other optimizations by exposing the
+/// behavior of the program in a more fine-grained way.  We have determined that
+/// this transformation is safe already.  We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+  // Make sure this global only has simple uses that we can SRA.
+  if (!GlobalUsersSafeToSRA(GV))
+    return 0;
+
+  assert(GV->hasLocalLinkage() && !GV->isConstant());
+  Constant *Init = GV->getInitializer();
+  const Type *Ty = Init->getType();
+
+  std::vector<GlobalVariable*> NewGlobals;
+  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+  // Get the alignment of the global, either explicit or target-specific.
+  unsigned StartAlignment = GV->getAlignment();
+  if (StartAlignment == 0)
+    StartAlignment = TD.getABITypeAlignment(GV->getType());
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    NewGlobals.reserve(STy->getNumElements());
+    const StructLayout &Layout = *TD.getStructLayout(STy);
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      Constant *In = getAggregateConstantElement(Init,
+                    ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
+      assert(In && "Couldn't get element of initializer?");
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+                                               GlobalVariable::InternalLinkage,
+                                               In, GV->getName()+"."+Twine(i),
+                                               GV->isThreadLocal(),
+                                              GV->getType()->getAddressSpace());
+      Globals.insert(GV, NGV);
+      NewGlobals.push_back(NGV);
+
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      uint64_t FieldOffset = Layout.getElementOffset(i);
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+      if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+        NGV->setAlignment(NewAlign);
+    }
+  } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+    unsigned NumElements = 0;
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+      NumElements = ATy->getNumElements();
+    else
+      NumElements = cast<VectorType>(STy)->getNumElements();
+
+    if (NumElements > 16 && GV->hasNUsesOrMore(16))
+      return 0; // It's not worth it.
+    NewGlobals.reserve(NumElements);
+
+    uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
+    unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
+    for (unsigned i = 0, e = NumElements; i != e; ++i) {
+      Constant *In = getAggregateConstantElement(Init,
+                    ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
+      assert(In && "Couldn't get element of initializer?");
+
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+                                               GlobalVariable::InternalLinkage,
+                                               In, GV->getName()+"."+Twine(i),
+                                               GV->isThreadLocal(),
+                                              GV->getType()->getAddressSpace());
+      Globals.insert(GV, NGV);
+      NewGlobals.push_back(NGV);
+
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+      if (NewAlign > EltAlign)
+        NGV->setAlignment(NewAlign);
+    }
+  }
+
+  if (NewGlobals.empty())
+    return 0;
+
+  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+
+  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
+
+  // Loop over all of the uses of the global, replacing the constantexpr geps,
+  // with smaller constantexpr geps or direct references.
+  while (!GV->use_empty()) {
+    User *GEP = GV->use_back();
+    assert(((isa<ConstantExpr>(GEP) &&
+             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+    // Ignore the 1th operand, which has to be zero or else the program is quite
+    // broken (undefined).  Get the 2nd operand, which is the structure or array
+    // index.
+    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+    if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+    Value *NewPtr = NewGlobals[Val];
+
+    // Form a shorter GEP if needed.
+    if (GEP->getNumOperands() > 3) {
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+        SmallVector<Constant*, 8> Idxs;
+        Idxs.push_back(NullInt);
+        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+          Idxs.push_back(CE->getOperand(i));
+        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr),
+                                                &Idxs[0], Idxs.size());
+      } else {
+        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+        SmallVector<Value*, 8> Idxs;
+        Idxs.push_back(NullInt);
+        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+          Idxs.push_back(GEPI->getOperand(i));
+        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+                                           GEPI->getName()+"."+Twine(Val),GEPI);
+      }
+    }
+    GEP->replaceAllUsesWith(NewPtr);
+
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+      GEPI->eraseFromParent();
+    else
+      cast<ConstantExpr>(GEP)->destroyConstant();
+  }
+
+  // Delete the old global, now that it is dead.
+  Globals.erase(GV);
+  ++NumSRA;
+
+  // Loop over the new globals array deleting any globals that are obviously
+  // dead.  This can arise due to scalarization of a structure or an array that
+  // has elements that are dead.
+  unsigned FirstGlobal = 0;
+  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+    if (NewGlobals[i]->use_empty()) {
+      Globals.erase(NewGlobals[i]);
+      if (FirstGlobal == i) ++FirstGlobal;
+    }
+
+  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+}
+
+/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
+/// value will trap if the value is dynamically null.  PHIs keeps track of any
+/// phi nodes we've seen to avoid reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const User *U = *UI;
+
+    if (isa<LoadInst>(U)) {
+      // Will trap.
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Storing the value.
+      }
+    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+      if (CI->getCalledValue() != V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Not calling the ptr
+      }
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      if (II->getCalledValue() != V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Not calling the ptr
+      }
+    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
+      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+      // If we've already seen this phi node, ignore it, it has already been
+      // checked.
+      if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+        return false;
+    } else if (isa<ICmpInst>(U) &&
+               isa<ConstantPointerNull>(UI->getOperand(1))) {
+      // Ignore icmp X, null
+    } else {
+      //cerr << "NONTRAPPING USE: " << *U;
+      return false;
+    }
+  }
+  return true;
+}
+
+/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
+/// from GV will trap if the loaded value is null.  Note that this also permits
+/// comparisons of the loaded value against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      SmallPtrSet<const PHINode*, 8> PHIs;
+      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+        return false;
+    } else if (isa<StoreInst>(U)) {
+      // Ignore stores to the global.
+    } else {
+      // We don't know or understand this user, bail out.
+      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
+      return false;
+    }
+  }
+  return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+  bool Changed = false;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
+    Instruction *I = cast<Instruction>(*UI++);
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      LI->setOperand(0, NewV);
+      Changed = true;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      if (SI->getOperand(1) == V) {
+        SI->setOperand(1, NewV);
+        Changed = true;
+      }
+    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+      CallSite CS(I);
+      if (CS.getCalledValue() == V) {
+        // Calling through the pointer!  Turn into a direct call, but be careful
+        // that the pointer is not also being passed as an argument.
+        CS.setCalledFunction(NewV);
+        Changed = true;
+        bool PassedAsArg = false;
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.getArgument(i) == V) {
+            PassedAsArg = true;
+            CS.setArgument(i, NewV);
+          }
+
+        if (PassedAsArg) {
+          // Being passed as an argument also.  Be careful to not invalidate UI!
+          UI = V->use_begin();
+        }
+      }
+    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+                                ConstantExpr::getCast(CI->getOpcode(),
+                                                      NewV, CI->getType()));
+      if (CI->use_empty()) {
+        Changed = true;
+        CI->eraseFromParent();
+      }
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+      // Should handle GEP here.
+      SmallVector<Constant*, 8> Idxs;
+      Idxs.reserve(GEPI->getNumOperands()-1);
+      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+           i != e; ++i)
+        if (Constant *C = dyn_cast<Constant>(*i))
+          Idxs.push_back(C);
+        else
+          break;
+      if (Idxs.size() == GEPI->getNumOperands()-1)
+        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
+                          ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
+                                                        Idxs.size()));
+      if (GEPI->use_empty()) {
+        Changed = true;
+        GEPI->eraseFromParent();
+      }
+    }
+  }
+
+  return Changed;
+}
+
+
+/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
+/// value stored into it.  If there are uses of the loaded value that would trap
+/// if the loaded value is dynamically null, then we know that they cannot be
+/// reachable with a null optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+  bool Changed = false;
+
+  // Keep track of whether we are able to remove all the uses of the global
+  // other than the store that defines it.
+  bool AllNonStoreUsesGone = true;
+
+  // Replace all uses of loads with uses of uses of the stored value.
+  for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+    User *GlobalUser = *GUI++;
+    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+      // If we were able to delete all uses of the loads
+      if (LI->use_empty()) {
+        LI->eraseFromParent();
+        Changed = true;
+      } else {
+        AllNonStoreUsesGone = false;
+      }
+    } else if (isa<StoreInst>(GlobalUser)) {
+      // Ignore the store that stores "LV" to the global.
+      assert(GlobalUser->getOperand(1) == GV &&
+             "Must be storing *to* the global");
+    } else {
+      AllNonStoreUsesGone = false;
+
+      // If we get here we could have other crazy uses that are transitively
+      // loaded.
+      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) &&
+             "Only expect load and stores!");
+    }
+  }
+
+  if (Changed) {
+    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+    ++NumGlobUses;
+  }
+
+  // If we nuked all of the loads, then none of the stores are needed either,
+  // nor is the global.
+  if (AllNonStoreUsesGone) {
+    DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
+    CleanupConstantGlobalUsers(GV, 0);
+    if (GV->use_empty()) {
+      GV->eraseFromParent();
+      ++NumDeleted;
+    }
+    Changed = true;
+  }
+  return Changed;
+}
+
+/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
+/// instructions that are foldable.
+static void ConstantPropUsersOf(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
+    if (Instruction *I = dyn_cast<Instruction>(*UI++))
+      if (Constant *NewC = ConstantFoldInstruction(I)) {
+        I->replaceAllUsesWith(NewC);
+
+        // Advance UI to the next non-I use to avoid invalidating it!
+        // Instructions could multiply use V.
+        while (UI != E && *UI == I)
+          ++UI;
+        I->eraseFromParent();
+      }
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc.  Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+                                                     CallInst *CI,
+                                                     const Type *AllocTy,
+                                                     ConstantInt *NElements,
+                                                     TargetData* TD) {
+  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
+
+  const Type *GlobalType;
+  if (NElements->getZExtValue() == 1)
+    GlobalType = AllocTy;
+  else
+    // If we have an array allocation, the global variable is of an array.
+    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+
+  // Create the new global variable.  The contents of the malloc'd memory is
+  // undefined, so initialize with an undef value.
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+                                             GlobalType, false,
+                                             GlobalValue::InternalLinkage,
+                                             UndefValue::get(GlobalType),
+                                             GV->getName()+".body",
+                                             GV,
+                                             GV->isThreadLocal());
+
+  // If there are bitcast users of the malloc (which is typical, usually we have
+  // a malloc + bitcast) then replace them with uses of the new global.  Update
+  // other users to use the global as well.
+  BitCastInst *TheBC = 0;
+  while (!CI->use_empty()) {
+    Instruction *User = cast<Instruction>(CI->use_back());
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      if (BCI->getType() == NewGV->getType()) {
+        BCI->replaceAllUsesWith(NewGV);
+        BCI->eraseFromParent();
+      } else {
+        BCI->setOperand(0, NewGV);
+      }
+    } else {
+      if (TheBC == 0)
+        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+      User->replaceUsesOfWith(CI, TheBC);
+    }
+  }
+
+  Constant *RepValue = NewGV;
+  if (NewGV->getType() != GV->getType()->getElementType())
+    RepValue = ConstantExpr::getBitCast(RepValue,
+                                        GV->getType()->getElementType());
+
+  // If there is a comparison against null, we will insert a global bool to
+  // keep track of whether the global was initialized yet or not.
+  GlobalVariable *InitBool =
+    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
+                       GlobalValue::InternalLinkage,
+                       ConstantInt::getFalse(GV->getContext()),
+                       GV->getName()+".init", GV->isThreadLocal());
+  bool InitBoolUsed = false;
+
+  // Loop over all uses of GV, processing them in turn.
+  while (!GV->use_empty()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
+      // The global is initialized when the store to it occurs.
+      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
+      SI->eraseFromParent();
+      continue;
+    }
+
+    LoadInst *LI = cast<LoadInst>(GV->use_back());
+    while (!LI->use_empty()) {
+      Use &LoadUse = LI->use_begin().getUse();
+      if (!isa<ICmpInst>(LoadUse.getUser())) {
+        LoadUse = RepValue;
+        continue;
+      }
+
+      ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+      // Replace the cmp X, 0 with a use of the bool value.
+      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+      InitBoolUsed = true;
+      switch (ICI->getPredicate()) {
+      default: llvm_unreachable("Unknown ICmp Predicate!");
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT:   // X < null -> always false
+        LV = ConstantInt::getFalse(GV->getContext());
+        break;
+      case ICmpInst::ICMP_ULE:
+      case ICmpInst::ICMP_SLE:
+      case ICmpInst::ICMP_EQ:
+        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+        break;
+      case ICmpInst::ICMP_NE:
+      case ICmpInst::ICMP_UGE:
+      case ICmpInst::ICMP_SGE:
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT:
+        break;  // no change.
+      }
+      ICI->replaceAllUsesWith(LV);
+      ICI->eraseFromParent();
+    }
+    LI->eraseFromParent();
+  }
+
+  // If the initialization boolean was used, insert it, otherwise delete it.
+  if (!InitBoolUsed) {
+    while (!InitBool->use_empty())  // Delete initializations
+      cast<StoreInst>(InitBool->use_back())->eraseFromParent();
+    delete InitBool;
+  } else
+    GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+  // Now the GV is dead, nuke it and the malloc..
+  GV->eraseFromParent();
+  CI->eraseFromParent();
+
+  // To further other optimizations, loop over all users of NewGV and try to
+  // constant prop them.  This will promote GEP instructions with constant
+  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+  ConstantPropUsersOf(NewGV);
+  if (RepValue != NewGV)
+    ConstantPropUsersOf(RepValue);
+
+  return NewGV;
+}
+
+/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
+/// to make sure that there are no complex uses of V.  We permit simple things
+/// like dereferencing the pointer, but not storing through the address, unless
+/// it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+                                                      const GlobalVariable *GV,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    const Instruction *Inst = cast<Instruction>(*UI);
+
+    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+      continue; // Fine, ignore.
+    }
+
+    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+        return false;  // Storing the pointer itself... bad.
+      continue; // Otherwise, storing through it, or storing into GV... fine.
+    }
+
+    // Must index into the array and into the struct.
+    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+        return false;
+      continue;
+    }
+
+    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
+      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
+      // cycles.
+      if (PHIs.insert(PN))
+        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+          return false;
+      continue;
+    }
+
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+        return false;
+      continue;
+    }
+
+    return false;
+  }
+  return true;
+}
+
+/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
+/// somewhere.  Transform all uses of the allocation into loads from the
+/// global and uses of the resultant pointer.  Further, delete the store into
+/// GV.  This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+                                          GlobalVariable *GV) {
+  while (!Alloc->use_empty()) {
+    Instruction *U = cast<Instruction>(*Alloc->use_begin());
+    Instruction *InsertPt = U;
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // If this is the store of the allocation into the global, remove it.
+      if (SI->getOperand(1) == GV) {
+        SI->eraseFromParent();
+        continue;
+      }
+    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      // Insert the load in the corresponding predecessor, not right before the
+      // PHI.
+      InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+    } else if (isa<BitCastInst>(U)) {
+      // Must be bitcast between the malloc and store to initialize the global.
+      ReplaceUsesOfMallocWithGlobal(U, GV);
+      U->eraseFromParent();
+      continue;
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      // If this is a "GEP bitcast" and the user is a store to the global, then
+      // just process it as a bitcast.
+      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+          if (SI->getOperand(1) == GV) {
+            // Must be bitcast GEP between the malloc and store to initialize
+            // the global.
+            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+            GEPI->eraseFromParent();
+            continue;
+          }
+    }
+
+    // Insert a load from the global, and use it instead of the malloc.
+    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+    U->replaceUsesOfWith(Alloc, NL);
+  }
+}
+
+/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
+/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
+/// that index through the array and struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+  // We permit two users of the load: setcc comparing against the null
+  // pointer, and a getelementptr of a specific form.
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const Instruction *User = cast<Instruction>(*UI);
+
+    // Comparison against null is ok.
+    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return false;
+      continue;
+    }
+
+    // getelementptr is also ok, but only a simple form.
+    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      // Must index into the array and into the struct.
+      if (GEPI->getNumOperands() < 3)
+        return false;
+
+      // Otherwise the GEP is ok.
+      continue;
+    }
+
+    if (const PHINode *PN = dyn_cast<PHINode>(User)) {
+      if (!LoadUsingPHIsPerLoad.insert(PN))
+        // This means some phi nodes are dependent on each other.
+        // Avoid infinite looping!
+        return false;
+      if (!LoadUsingPHIs.insert(PN))
+        // If we have already analyzed this PHI, then it is safe.
+        continue;
+
+      // Make sure all uses of the PHI are simple enough to transform.
+      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
+        return false;
+
+      continue;
+    }
+
+    // Otherwise we don't know what this is, not ok.
+    return false;
+  }
+
+  return true;
+}
+
+
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// GV are simple enough to perform HeapSRA, return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
+                                                    Instruction *StoredVal) {
+  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI)
+    if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+                                          LoadUsingPHIsPerLoad))
+        return false;
+      LoadUsingPHIsPerLoad.clear();
+    }
+
+  // If we reach here, we know that all uses of the loads and transitive uses
+  // (through PHI nodes) are simple enough to transform.  However, we don't know
+  // that all inputs the to the PHI nodes are in the same equivalence sets.
+  // Check to verify that all operands of the PHIs are either PHIS that can be
+  // transformed, loads from GV, or MI itself.
+  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
+       , E = LoadUsingPHIs.end(); I != E; ++I) {
+    const PHINode *PN = *I;
+    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+      Value *InVal = PN->getIncomingValue(op);
+
+      // PHI of the stored value itself is ok.
+      if (InVal == StoredVal) continue;
+
+      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+        // One of the PHIs in our set is (optimistically) ok.
+        if (LoadUsingPHIs.count(InPN))
+          continue;
+        return false;
+      }
+
+      // Load from GV is ok.
+      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
+        if (LI->getOperand(0) == GV)
+          continue;
+
+      // UNDEF? NULL?
+
+      // Anything else is rejected.
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+  if (FieldNo >= FieldVals.size())
+    FieldVals.resize(FieldNo+1);
+
+  // If we already have this value, just reuse the previously scalarized
+  // version.
+  if (Value *FieldVal = FieldVals[FieldNo])
+    return FieldVal;
+
+  // Depending on what instruction this is, we have several cases.
+  Value *Result;
+  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    // This is a scalarized version of the load from the global.  Just create
+    // a new Load of the scalarized global.
+    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+                                           InsertedScalarizedValues,
+                                           PHIsToRewrite),
+                          LI->getName()+".f"+Twine(FieldNo), LI);
+  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
+    // field.
+    const StructType *ST =
+      cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+
+    PHINode *NewPN =
+     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+                     PN->getNumIncomingValues(),
+                     PN->getName()+".f"+Twine(FieldNo), PN);
+    Result = NewPN;
+    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+  } else {
+    llvm_unreachable("Unknown usable value");
+    Result = 0;
+  }
+
+  return FieldVals[FieldNo] = Result;
+}
+
+/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
+/// the load, rewrite the derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  // If this is a comparison against null, handle it.
+  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+    // If we have a setcc of the loaded pointer, we can use a setcc of any
+    // field.
+    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+                                   InsertedScalarizedValues, PHIsToRewrite);
+
+    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+                              Constant::getNullValue(NPtr->getType()),
+                              SCI->getName());
+    SCI->replaceAllUsesWith(New);
+    SCI->eraseFromParent();
+    return;
+  }
+
+  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+           && "Unexpected GEPI!");
+
+    // Load the pointer for this field.
+    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+                                     InsertedScalarizedValues, PHIsToRewrite);
+
+    // Create the new GEP idx vector.
+    SmallVector<Value*, 8> GEPIdx;
+    GEPIdx.push_back(GEPI->getOperand(1));
+    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+    Value *NGEPI = GetElementPtrInst::Create(NewPtr,
+                                             GEPIdx.begin(), GEPIdx.end(),
+                                             GEPI->getName(), GEPI);
+    GEPI->replaceAllUsesWith(NGEPI);
+    GEPI->eraseFromParent();
+    return;
+  }
+
+  // Recursively transform the users of PHI nodes.  This will lazily create the
+  // PHIs that are needed for individual elements.  Keep track of what PHIs we
+  // see in InsertedScalarizedValues so that we don't get infinite loops (very
+  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
+  // already been seen first by another load, so its uses have already been
+  // processed.
+  PHINode *PN = cast<PHINode>(LoadUser);
+  bool Inserted;
+  DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+  tie(InsertPos, Inserted) =
+    InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+  if (!Inserted) return;
+
+  // If this is the first time we've seen this PHI, recursively process all
+  // users.
+  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
+}
+
+/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
+/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
+/// use FieldGlobals instead.  All uses of loaded values satisfy
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+       UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
+
+  if (Load->use_empty()) {
+    Load->eraseFromParent();
+    InsertedScalarizedValues.erase(Load);
+  }
+}
+
+/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+                                            Value* NElems, TargetData *TD) {
+  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
+  const Type* MAT = getMallocAllocatedType(CI);
+  const StructType *STy = cast<StructType>(MAT);
+
+  // There is guaranteed to be at least one use of the malloc (storing
+  // it into GV).  If there are other uses, change them to be uses of
+  // the global to simplify later code.  This also deletes the store
+  // into GV.
+  ReplaceUsesOfMallocWithGlobal(CI, GV);
+
+  // Okay, at this point, there are no users of the malloc.  Insert N
+  // new mallocs at the same place as CI, and N globals.
+  std::vector<Value*> FieldGlobals;
+  std::vector<Value*> FieldMallocs;
+
+  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+    const Type *FieldTy = STy->getElementType(FieldNo);
+    const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+
+    GlobalVariable *NGV =
+      new GlobalVariable(*GV->getParent(),
+                         PFieldTy, false, GlobalValue::InternalLinkage,
+                         Constant::getNullValue(PFieldTy),
+                         GV->getName() + ".f" + Twine(FieldNo), GV,
+                         GV->isThreadLocal());
+    FieldGlobals.push_back(NGV);
+
+    unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+    if (const StructType *ST = dyn_cast<StructType>(FieldTy))
+      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+    const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+                                        ConstantInt::get(IntPtrTy, TypeSize),
+                                        NElems, 0,
+                                        CI->getName() + ".f" + Twine(FieldNo));
+    FieldMallocs.push_back(NMI);
+    new StoreInst(NMI, NGV, CI);
+  }
+
+  // The tricky aspect of this transformation is handling the case when malloc
+  // fails.  In the original code, malloc failing would set the result pointer
+  // of malloc to null.  In this case, some mallocs could succeed and others
+  // could fail.  As such, we emit code that looks like this:
+  //    F0 = malloc(field0)
+  //    F1 = malloc(field1)
+  //    F2 = malloc(field2)
+  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
+  //      if (F0) { free(F0); F0 = 0; }
+  //      if (F1) { free(F1); F1 = 0; }
+  //      if (F2) { free(F2); F2 = 0; }
+  //    }
+  // The malloc can also fail if its argument is too large.
+  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
+                                  ConstantZero, "isneg");
+  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                             Constant::getNullValue(FieldMallocs[i]->getType()),
+                               "isnull");
+    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+  }
+
+  // Split the basic block at the old malloc.
+  BasicBlock *OrigBB = CI->getParent();
+  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+
+  // Create the block to check the first condition.  Put all these blocks at the
+  // end of the function as they are unlikely to be executed.
+  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+                                                "malloc_ret_null",
+                                                OrigBB->getParent());
+
+  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+  // branch on RunningOr.
+  OrigBB->getTerminator()->eraseFromParent();
+  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+  // Within the NullPtrBlock, we need to emit a comparison and branch for each
+  // pointer, because some may be null while others are not.
+  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+                              Constant::getNullValue(GVVal->getType()),
+                              "tmp");
+    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
+                                               OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
+                                               OrigBB->getParent());
+    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+                                         Cmp, NullPtrBlock);
+
+    // Fill in FreeBlock.
+    CallInst::CreateFree(GVVal, BI);
+    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+                  FreeBlock);
+    BranchInst::Create(NextBlock, FreeBlock);
+
+    NullPtrBlock = NextBlock;
+  }
+
+  BranchInst::Create(ContBB, NullPtrBlock);
+
+  // CI is no longer needed, remove it.
+  CI->eraseFromParent();
+
+  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+  /// update all uses of the load, keep track of what scalarized loads are
+  /// inserted for a given load.
+  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+  InsertedScalarizedValues[GV] = FieldGlobals;
+
+  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+  // Okay, the malloc site is completely handled.  All of the uses of GV are now
+  // loads, and all uses of those loads are simple.  Rewrite them to use loads
+  // of the per-field globals instead.
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+      continue;
+    }
+
+    // Must be a store of null.
+    StoreInst *SI = cast<StoreInst>(User);
+    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+           "Unexpected heap-sra user!");
+
+    // Insert a store of null into each global.
+    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+      Constant *Null = Constant::getNullValue(PT->getElementType());
+      new StoreInst(Null, FieldGlobals[i], SI);
+    }
+    // Erase the original store.
+    SI->eraseFromParent();
+  }
+
+  // While we have PHIs that are interesting to rewrite, do it.
+  while (!PHIsToRewrite.empty()) {
+    PHINode *PN = PHIsToRewrite.back().first;
+    unsigned FieldNo = PHIsToRewrite.back().second;
+    PHIsToRewrite.pop_back();
+    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+    // Add all the incoming values.  This can materialize more phis.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+                               PHIsToRewrite);
+      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+    }
+  }
+
+  // Drop all inter-phi links and any loads that made it this far.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->dropAllReferences();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->dropAllReferences();
+  }
+
+  // Delete all the phis and loads now that inter-references are dead.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->eraseFromParent();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->eraseFromParent();
+  }
+
+  // The old global is now dead, remove it.
+  GV->eraseFromParent();
+
+  ++NumHeapSRA;
+  return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+                                               CallInst *CI,
+                                               const Type *AllocTy,
+                                               Module::global_iterator &GVI,
+                                               TargetData *TD) {
+  if (!TD)
+    return false;
+
+  // If this is a malloc of an abstract type, don't touch it.
+  if (!AllocTy->isSized())
+    return false;
+
+  // We can't optimize this global unless all uses of it are *known* to be
+  // of the malloc value, not of the null initializer value (consider a use
+  // that compares the global's value against zero to see if the malloc has
+  // been reached).  To do this, we check to see if all uses of the global
+  // would trap if the global were null: this proves that they must all
+  // happen after the malloc.
+  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+    return false;
+
+  // We can't optimize this if the malloc itself is used in a complex way,
+  // for example, being stored into multiple globals.  This allows the
+  // malloc to be stored into the specified global, loaded setcc'd, and
+  // GEP'd.  These are all things we could transform to using the global
+  // for.
+  SmallPtrSet<const PHINode*, 8> PHIs;
+  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+    return false;
+
+  // If we have a global that is only initialized with a fixed size malloc,
+  // transform the program to use global memory instead of malloc'd memory.
+  // This eliminates dynamic allocation, avoids an indirection accessing the
+  // data, and exposes the resultant global to further GlobalOpt.
+  // We cannot optimize the malloc if we cannot determine malloc array size.
+  Value *NElems = getMallocArraySize(CI, TD, true);
+  if (!NElems)
+    return false;
+
+  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+      return true;
+    }
+
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
+
+  const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+  if (!AllocSTy)
+    return false;
+
+  // This the structure has an unreasonable number of fields, leave it
+  // alone.
+  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+    // If this is a fixed size array, transform the Malloc to be an alloc of
+    // structs.  malloc [100 x struct],1 -> malloc struct, 100
+    if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+      const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+      unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+                                                   AllocSize, NumElements,
+                                                   0, CI->getName());
+      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+      CI->replaceAllUsesWith(Cast);
+      CI->eraseFromParent();
+      CI = dyn_cast<BitCastInst>(Malloc) ?
+        extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
+    }
+
+    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+    return true;
+  }
+
+  return false;
+}
+
+// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
+// that only one value (besides its initializer) is ever stored to the global.
+static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+                                     Module::global_iterator &GVI,
+                                     TargetData *TD) {
+  // Ignore no-op GEPs and bitcasts.
+  StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+  // If we are dealing with a pointer global that is initialized to null and
+  // only has one (non-null) value stored into it, then we can optimize any
+  // users of the loaded value (often calls and loads) that would trap if the
+  // value was null.
+  if (GV->getInitializer()->getType()->isPointerTy() &&
+      GV->getInitializer()->isNullValue()) {
+    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+      if (GV->getInitializer()->getType() != SOVC->getType())
+        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+      // Optimize away any trapping uses of the loaded value.
+      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+        return true;
+    } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
+      const Type* MallocType = getMallocAllocatedType(CI);
+      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+                                                           GVI, TD))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
+/// two values ever stored into GV are its initializer and OtherVal.  See if we
+/// can shrink the global into a boolean and select between the two values
+/// whenever it is used.  This exposes the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+  const Type *GVElType = GV->getType()->getElementType();
+
+  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
+  // an FP value, pointer or vector, don't do this optimization because a select
+  // between them is very expensive and unlikely to lead to later
+  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
+  // where v1 and v2 both require constant pool loads, a big loss.
+  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+      GVElType->isFloatingPointTy() ||
+      GVElType->isPointerTy() || GVElType->isVectorTy())
+    return false;
+
+  // Walk the use list of the global seeing if all the uses are load or store.
+  // If there is anything else, bail out.
+  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+    User *U = *I;
+    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+      return false;
+  }
+
+  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
+
+  // Create the new global, initializing it to false.
+  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+                                             false,
+                                             GlobalValue::InternalLinkage,
+                                        ConstantInt::getFalse(GV->getContext()),
+                                             GV->getName()+".b",
+                                             GV->isThreadLocal());
+  GV->getParent()->getGlobalList().insert(GV, NewGV);
+
+  Constant *InitVal = GV->getInitializer();
+  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
+         "No reason to shrink to bool!");
+
+  // If initialized to zero and storing one into the global, we can use a cast
+  // instead of a select to synthesize the desired value.
+  bool IsOneZero = false;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+    IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+  while (!GV->use_empty()) {
+    Instruction *UI = cast<Instruction>(GV->use_back());
+    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+      // Change the store into a boolean store.
+      bool StoringOther = SI->getOperand(0) == OtherVal;
+      // Only do this if we weren't storing a loaded value.
+      Value *StoreVal;
+      if (StoringOther || SI->getOperand(0) == InitVal)
+        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+                                    StoringOther);
+      else {
+        // Otherwise, we are storing a previously loaded copy.  To do this,
+        // change the copy from copying the original value to just copying the
+        // bool.
+        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+        // If we've already replaced the input, StoredVal will be a cast or
+        // select instruction.  If not, it will be a load of the original
+        // global.
+        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+          assert(LI->getOperand(0) == GV && "Not a copy!");
+          // Insert a new load, to preserve the saved value.
+          StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI);
+        } else {
+          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+                 "This is not a form that we understand!");
+          StoreVal = StoredVal->getOperand(0);
+          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+        }
+      }
+      new StoreInst(StoreVal, NewGV, SI);
+    } else {
+      // Change the load into a load of bool then a select.
+      LoadInst *LI = cast<LoadInst>(UI);
+      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI);
+      Value *NSI;
+      if (IsOneZero)
+        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+      else
+        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+      NSI->takeName(LI);
+      LI->replaceAllUsesWith(NSI);
+    }
+    UI->eraseFromParent();
+  }
+
+  GV->eraseFromParent();
+  return true;
+}
+
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible.  If we make a change, return true.
+bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
+                              Module::global_iterator &GVI) {
+  if (!GV->hasLocalLinkage())
+    return false;
+
+  // Do more involved optimizations if the global is internal.
+  GV->removeDeadConstantUsers();
+
+  if (GV->use_empty()) {
+    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
+    GV->eraseFromParent();
+    ++NumDeleted;
+    return true;
+  }
+
+  SmallPtrSet<const PHINode*, 16> PHIUsers;
+  GlobalStatus GS;
+
+  if (AnalyzeGlobal(GV, GS, PHIUsers))
+    return false;
+
+  if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+    GV->setUnnamedAddr(true);
+    NumUnnamed++;
+  }
+
+  if (GV->isConstant() || !GV->hasInitializer())
+    return false;
+
+  return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+}
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible.  If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+                                      Module::global_iterator &GVI,
+                                      const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+                                      const GlobalStatus &GS) {
+  // If this is a first class global and has only one accessing function
+  // and this function is main (which we know is not recursive we can make
+  // this global a local variable) we replace the global with a local alloca
+  // in this function.
+  //
+  // NOTE: It doesn't make sense to promote non single-value types since we
+  // are just replacing static memory to stack memory.
+  //
+  // If the global is in different address space, don't bring it to stack.
+  if (!GS.HasMultipleAccessingFunctions &&
+      GS.AccessingFunction && !GS.HasNonInstructionUser &&
+      GV->getType()->getElementType()->isSingleValueType() &&
+      GS.AccessingFunction->getName() == "main" &&
+      GS.AccessingFunction->hasExternalLinkage() &&
+      GV->getType()->getAddressSpace() == 0) {
+    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+    Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+                                                   ->getEntryBlock().begin());
+    const Type* ElemTy = GV->getType()->getElementType();
+    // FIXME: Pass Global's alignment when globals have alignment
+    AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+    if (!isa<UndefValue>(GV->getInitializer()))
+      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+    GV->replaceAllUsesWith(Alloca);
+    GV->eraseFromParent();
+    ++NumLocalized;
+    return true;
+  }
+
+  // If the global is never loaded (but may be stored to), it is dead.
+  // Delete it now.
+  if (!GS.isLoaded) {
+    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+    // Delete any stores we can find to the global.  We may not be able to
+    // make it completely dead though.
+    bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+    // If the global is dead now, delete it.
+    if (GV->use_empty()) {
+      GV->eraseFromParent();
+      ++NumDeleted;
+      Changed = true;
+    }
+    return Changed;
+
+  } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+    GV->setConstant(true);
+
+    // Clean up any obviously simplifiable users now.
+    CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+    // If the global is dead now, just nuke it.
+    if (GV->use_empty()) {
+      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
+            << "all users and delete global!\n");
+      GV->eraseFromParent();
+      ++NumDeleted;
+    }
+
+    ++NumMarked;
+    return true;
+  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+    if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+        GVI = FirstNewGV;  // Don't skip the newly produced globals!
+        return true;
+      }
+  } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+    // If the initial value for the global was an undef value, and if only
+    // one other value was stored into it, we can just change the
+    // initializer to be the stored value, then delete all stores to the
+    // global.  This allows us to mark it constant.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+      if (isa<UndefValue>(GV->getInitializer())) {
+        // Change the initial value here.
+        GV->setInitializer(SOVConstant);
+
+        // Clean up any obviously simplifiable users now.
+        CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+        if (GV->use_empty()) {
+          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
+                << "simplify all users and delete global!\n");
+          GV->eraseFromParent();
+          ++NumDeleted;
+        } else {
+          GVI = GV;
+        }
+        ++NumSubstitute;
+        return true;
+      }
+
+    // Try to optimize globals based on the knowledge that only one value
+    // (besides its initializer) is ever stored to the global.
+    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+                                 getAnalysisIfAvailable<TargetData>()))
+      return true;
+
+    // Otherwise, if the global was not a boolean, we can shrink it to be a
+    // boolean.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+      if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+        ++NumShrunkToBool;
+        return true;
+      }
+  }
+
+  return false;
+}
+
+/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
+/// function, changing them to FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+    CallSite User(cast<Instruction>(*UI));
+    User.setCallingConv(CallingConv::Fast);
+  }
+}
+
+static AttrListPtr StripNest(const AttrListPtr &Attrs) {
+  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+    if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
+      continue;
+
+    // There can be only one.
+    return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
+  }
+
+  return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+  F->setAttributes(StripNest(F->getAttributes()));
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+    CallSite User(cast<Instruction>(*UI));
+    User.setAttributes(StripNest(User.getAttributes()));
+  }
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+  bool Changed = false;
+  // Optimize functions.
+  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+    Function *F = FI++;
+    // Functions without names cannot be referenced outside this module.
+    if (!F->hasName() && !F->isDeclaration())
+      F->setLinkage(GlobalValue::InternalLinkage);
+    F->removeDeadConstantUsers();
+    if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) {
+      F->eraseFromParent();
+      Changed = true;
+      ++NumFnDeleted;
+    } else if (F->hasLocalLinkage()) {
+      if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
+          !F->hasAddressTaken()) {
+        // If this function has C calling conventions, is not a varargs
+        // function, and is only called directly, promote it to use the Fast
+        // calling convention.
+        F->setCallingConv(CallingConv::Fast);
+        ChangeCalleesToFastCall(F);
+        ++NumFastCallFns;
+        Changed = true;
+      }
+
+      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+          !F->hasAddressTaken()) {
+        // The function is not used by a trampoline intrinsic, so it is safe
+        // to remove the 'nest' attribute.
+        RemoveNestAttribute(F);
+        ++NumNestRemoved;
+        Changed = true;
+      }
+    }
+  }
+  return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+  bool Changed = false;
+  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+       GVI != E; ) {
+    GlobalVariable *GV = GVI++;
+    // Global variables without names cannot be referenced outside this module.
+    if (!GV->hasName() && !GV->isDeclaration())
+      GV->setLinkage(GlobalValue::InternalLinkage);
+    // Simplify the initializer.
+    if (GV->hasInitializer())
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+        TargetData *TD = getAnalysisIfAvailable<TargetData>();
+        Constant *New = ConstantFoldConstantExpression(CE, TD);
+        if (New && New != CE)
+          GV->setInitializer(New);
+      }
+
+    Changed |= ProcessGlobal(GV, GVI);
+  }
+  return Changed;
+}
+
+/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
+/// initializers have an init priority of 65535.
+GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (GV == 0) return 0;
+  
+  // Verify that the initializer is simple enough for us to handle. We are
+  // only allowed to optimize the initializer if it is unique.
+  if (!GV->hasUniqueInitializer()) return 0;
+
+  if (isa<ConstantAggregateZero>(GV->getInitializer()))
+    return GV;
+  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    if (isa<ConstantAggregateZero>(*i))
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
+    if (isa<ConstantPointerNull>(CS->getOperand(1)))
+      continue;
+
+    // Must have a function or null ptr.
+    if (!isa<Function>(CS->getOperand(1)))
+      return 0;
+
+    // Init priority must be standard.
+    ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+    if (CI->getZExtValue() != 65535)
+      return 0;
+  }
+
+  return GV;
+}
+
+/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+  if (GV->getInitializer()->isNullValue())
+    return std::vector<Function*>();
+  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+  std::vector<Function*> Result;
+  Result.reserve(CA->getNumOperands());
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
+    Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+  }
+  return Result;
+}
+
+/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
+/// specified array, returning the new global to use.
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+                                          const std::vector<Function*> &Ctors) {
+  // If we made a change, reassemble the initializer list.
+  Constant *CSVals[2];
+  CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
+  CSVals[1] = 0;
+
+  const StructType *StructTy =
+    cast <StructType>(
+    cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
+
+  // Create the new init list.
+  std::vector<Constant*> CAList;
+  for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
+    if (Ctors[i]) {
+      CSVals[1] = Ctors[i];
+    } else {
+      const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+                                          false);
+      const PointerType *PFTy = PointerType::getUnqual(FTy);
+      CSVals[1] = Constant::getNullValue(PFTy);
+      CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+                                   0x7fffffff);
+    }
+    CAList.push_back(ConstantStruct::get(StructTy, CSVals));
+  }
+
+  // Create the array initializer.
+  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+                                                   CAList.size()), CAList);
+
+  // If we didn't change the number of elements, don't create a new GV.
+  if (CA->getType() == GCL->getInitializer()->getType()) {
+    GCL->setInitializer(CA);
+    return GCL;
+  }
+
+  // Create the new global and insert it next to the existing list.
+  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+                                           GCL->getLinkage(), CA, "",
+                                           GCL->isThreadLocal());
+  GCL->getParent()->getGlobalList().insert(GCL, NGV);
+  NGV->takeName(GCL);
+
+  // Nuke the old list, replacing any uses with the new one.
+  if (!GCL->use_empty()) {
+    Constant *V = NGV;
+    if (V->getType() != GCL->getType())
+      V = ConstantExpr::getBitCast(V, GCL->getType());
+    GCL->replaceAllUsesWith(V);
+  }
+  GCL->eraseFromParent();
+
+  if (Ctors.size())
+    return NGV;
+  else
+    return 0;
+}
+
+
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
+  if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+  Constant *R = ComputedValues[V];
+  assert(R && "Reference to an uncomputed value!");
+  return R;
+}
+
+static inline bool 
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSet<Constant*, 8> &SimpleConstants);
+
+
+/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
+/// handled by the code generator.  We don't want to generate something like:
+///   void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool isSimpleEnoughValueToCommitHelper(Constant *C,
+                                   SmallPtrSet<Constant*, 8> &SimpleConstants) {
+  // Simple integer, undef, constant aggregate zero, global addresses, etc are
+  // all supported.
+  if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
+      isa<GlobalValue>(C))
+    return true;
+  
+  // Aggregate values are safe if all their elements are.
+  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+      isa<ConstantVector>(C)) {
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      Constant *Op = cast<Constant>(C->getOperand(i));
+      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+        return false;
+    }
+    return true;
+  }
+  
+  // We don't know exactly what relocations are allowed in constant expressions,
+  // so we allow &global+constantoffset, which is safe and uniformly supported
+  // across targets.
+  ConstantExpr *CE = cast<ConstantExpr>(C);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+    // These casts are always fine if the casted value is.
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+      
+  // GEP is fine if it is simple + constant offset.
+  case Instruction::GetElementPtr:
+    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+      if (!isa<ConstantInt>(CE->getOperand(i)))
+        return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+      
+  case Instruction::Add:
+    // We allow simple+cst.
+    if (!isa<ConstantInt>(CE->getOperand(1)))
+      return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+  }
+  return false;
+}
+
+static inline bool 
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSet<Constant*, 8> &SimpleConstants) {
+  // If we already checked this constant, we win.
+  if (!SimpleConstants.insert(C)) return true;
+  // Check the constant.
+  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+}
+
+
+/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
+/// enough for us to understand.  In particular, if it is a cast to anything
+/// other than from one pointer type to another pointer type, we punt.
+/// We basically just support direct accesses to globals and GEP's of
+/// globals.  This should be kept up to date with CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+  // Conservatively, avoid aggregate types. This is because we don't
+  // want to worry about them partially overlapping other stores.
+  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+    return false;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+    // external globals.
+    return GV->hasUniqueInitializer();
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    // Handle a constantexpr gep.
+    if (CE->getOpcode() == Instruction::GetElementPtr &&
+        isa<GlobalVariable>(CE->getOperand(0)) &&
+        cast<GEPOperator>(CE)->isInBounds()) {
+      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      if (!GV->hasUniqueInitializer())
+        return false;
+
+      // The first index must be zero.
+      ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
+      if (!CI || !CI->isZero()) return false;
+
+      // The remaining indices must be compile-time known integers within the
+      // notional bounds of the corresponding static array types.
+      if (!CE->isGEPWithNoNotionalOverIndexing())
+        return false;
+
+      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+    
+    // A constantexpr bitcast from a pointer to another pointer is a no-op,
+    // and we know how to evaluate it by moving the bitcast from the pointer
+    // operand to the value operand.
+    } else if (CE->getOpcode() == Instruction::BitCast &&
+               isa<GlobalVariable>(CE->getOperand(0))) {
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+    }
+  }
+  
+  return false;
+}
+
+/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
+/// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
+/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+                                   ConstantExpr *Addr, unsigned OpNo) {
+  // Base case of the recursion.
+  if (OpNo == Addr->getNumOperands()) {
+    assert(Val->getType() == Init->getType() && "Type mismatch!");
+    return Val;
+  }
+
+  std::vector<Constant*> Elts;
+  if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+
+    // Break up the constant into its elements.
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+      for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
+        Elts.push_back(cast<Constant>(*i));
+    } else if (isa<ConstantAggregateZero>(Init)) {
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
+    } else if (isa<UndefValue>(Init)) {
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        Elts.push_back(UndefValue::get(STy->getElementType(i)));
+    } else {
+      llvm_unreachable("This code is out of sync with "
+             " ConstantFoldLoadThroughGEPConstantExpr");
+    }
+
+    // Replace the element that we are supposed to.
+    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+    unsigned Idx = CU->getZExtValue();
+    assert(Idx < STy->getNumElements() && "Struct index out of range!");
+    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+    // Return the modified struct.
+    return ConstantStruct::get(STy, Elts);
+  }
+  
+  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+  const SequentialType *InitTy = cast<SequentialType>(Init->getType());
+
+  uint64_t NumElts;
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+    NumElts = ATy->getNumElements();
+  else
+    NumElts = cast<VectorType>(InitTy)->getNumElements();
+
+  // Break up the array into elements.
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+    for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+    for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (isa<ConstantAggregateZero>(Init)) {
+    Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
+  } else {
+    assert(isa<UndefValue>(Init) && "This code is out of sync with "
+           " ConstantFoldLoadThroughGEPConstantExpr");
+    Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
+  }
+
+  assert(CI->getZExtValue() < NumElts);
+  Elts[CI->getZExtValue()] =
+    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+
+  if (Init->getType()->isArrayTy())
+    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+  return ConstantVector::get(Elts);
+}
+
+/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+    assert(GV->hasInitializer());
+    GV->setInitializer(Val);
+    return;
+  }
+
+  ConstantExpr *CE = cast<ConstantExpr>(Addr);
+  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
+}
+
+/// ComputeLoadResult - Return the value that would be computed by a load from
+/// P after the stores reflected by 'memory' have been performed.  If we can't
+/// decide, return null.
+static Constant *ComputeLoadResult(Constant *P,
+                                const DenseMap<Constant*, Constant*> &Memory) {
+  // If this memory location has been recently stored, use the stored value: it
+  // is the most up-to-date.
+  DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
+  if (I != Memory.end()) return I->second;
+
+  // Access it.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+    if (GV->hasDefinitiveInitializer())
+      return GV->getInitializer();
+    return 0;
+  }
+
+  // Handle a constantexpr getelementptr.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+    if (CE->getOpcode() == Instruction::GetElementPtr &&
+        isa<GlobalVariable>(CE->getOperand(0))) {
+      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+      if (GV->hasDefinitiveInitializer())
+        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+    }
+
+  return 0;  // don't know how to evaluate.
+}
+
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it.  ActualArgs contains the formal
+/// arguments for the function.
+static bool EvaluateFunction(Function *F, Constant *&RetVal,
+                             const SmallVectorImpl<Constant*> &ActualArgs,
+                             std::vector<Function*> &CallStack,
+                             DenseMap<Constant*, Constant*> &MutatedMemory,
+                             std::vector<GlobalVariable*> &AllocaTmps,
+                             SmallPtrSet<Constant*, 8> &SimpleConstants,
+                             const TargetData *TD) {
+  // Check to see if this function is already executing (recursion).  If so,
+  // bail out.  TODO: we might want to accept limited recursion.
+  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+    return false;
+
+  CallStack.push_back(F);
+
+  /// Values - As we compute SSA register values, we store their contents here.
+  DenseMap<Value*, Constant*> Values;
+
+  // Initialize arguments to the incoming values specified.
+  unsigned ArgNo = 0;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+       ++AI, ++ArgNo)
+    Values[AI] = ActualArgs[ArgNo];
+
+  /// ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
+  /// we can only evaluate any one basic block at most once.  This set keeps
+  /// track of what we have executed so we can detect recursive cases etc.
+  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+  // CurInst - The current instruction we're evaluating.
+  BasicBlock::iterator CurInst = F->begin()->begin();
+
+  // This is the main evaluation loop.
+  while (1) {
+    Constant *InstResult = 0;
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+      if (SI->isVolatile()) return false;  // no volatile accesses.
+      Constant *Ptr = getVal(Values, SI->getOperand(1));
+      if (!isSimpleEnoughPointerToCommit(Ptr))
+        // If this is too complex for us to commit, reject it.
+        return false;
+      
+      Constant *Val = getVal(Values, SI->getOperand(0));
+
+      // If this might be too difficult for the backend to handle (e.g. the addr
+      // of one global variable divided by another) then we can't commit it.
+      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+        return false;
+        
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+        if (CE->getOpcode() == Instruction::BitCast) {
+          // If we're evaluating a store through a bitcast, then we need
+          // to pull the bitcast off the pointer type and push it onto the
+          // stored value.
+          Ptr = CE->getOperand(0);
+          
+          const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+          
+          // In order to push the bitcast onto the stored value, a bitcast
+          // from NewTy to Val's type must be legal.  If it's not, we can try
+          // introspecting NewTy to find a legal conversion.
+          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+            // If NewTy is a struct, we can convert the pointer to the struct
+            // into a pointer to its first member.
+            // FIXME: This could be extended to support arrays as well.
+            if (const StructType *STy = dyn_cast<StructType>(NewTy)) {
+              NewTy = STy->getTypeAtIndex(0U);
+
+              const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+              Constant * const IdxList[] = {IdxZero, IdxZero};
+
+              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+            
+            // If we can't improve the situation by introspecting NewTy,
+            // we have to give up.
+            } else {
+              return 0;
+            }
+          }
+          
+          // If we found compatible types, go ahead and push the bitcast
+          // onto the stored value.
+          Val = ConstantExpr::getBitCast(Val, NewTy);
+        }
+          
+      MutatedMemory[Ptr] = Val;
+    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+      InstResult = ConstantExpr::get(BO->getOpcode(),
+                                     getVal(Values, BO->getOperand(0)),
+                                     getVal(Values, BO->getOperand(1)));
+    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+                                            getVal(Values, CI->getOperand(0)),
+                                            getVal(Values, CI->getOperand(1)));
+    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+      InstResult = ConstantExpr::getCast(CI->getOpcode(),
+                                         getVal(Values, CI->getOperand(0)),
+                                         CI->getType());
+    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+      InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+                                           getVal(Values, SI->getOperand(1)),
+                                           getVal(Values, SI->getOperand(2)));
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+      Constant *P = getVal(Values, GEP->getOperand(0));
+      SmallVector<Constant*, 8> GEPOps;
+      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+           i != e; ++i)
+        GEPOps.push_back(getVal(Values, *i));
+      InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
+          ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
+          ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+      if (LI->isVolatile()) return false;  // no volatile accesses.
+      InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
+                                     MutatedMemory);
+      if (InstResult == 0) return false; // Could not evaluate load.
+    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+      if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
+      const Type *Ty = AI->getType()->getElementType();
+      AllocaTmps.push_back(new GlobalVariable(Ty, false,
+                                              GlobalValue::InternalLinkage,
+                                              UndefValue::get(Ty),
+                                              AI->getName()));
+      InstResult = AllocaTmps.back();
+    } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
+
+      // Debug info can safely be ignored here.
+      if (isa<DbgInfoIntrinsic>(CI)) {
+        ++CurInst;
+        continue;
+      }
+
+      // Cannot handle inline asm.
+      if (isa<InlineAsm>(CI->getCalledValue())) return false;
+
+      if (MemSetInst *MSI = dyn_cast<MemSetInst>(CI)) {
+        if (MSI->isVolatile()) return false;
+        Constant *Ptr = getVal(Values, MSI->getDest());
+        Constant *Val = getVal(Values, MSI->getValue());
+        Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr),
+                                              MutatedMemory);
+        if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+          // This memset is a no-op.
+          ++CurInst;
+          continue;
+        }
+        return false;
+      }
+
+      // Resolve function pointers.
+      Function *Callee = dyn_cast<Function>(getVal(Values,
+                                                   CI->getCalledValue()));
+      if (!Callee) return false;  // Cannot resolve.
+
+      SmallVector<Constant*, 8> Formals;
+      CallSite CS(CI);
+      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
+           i != e; ++i)
+        Formals.push_back(getVal(Values, *i));
+
+      if (Callee->isDeclaration()) {
+        // If this is a function we can constant fold, do it.
+        if (Constant *C = ConstantFoldCall(Callee, Formals.data(),
+                                           Formals.size())) {
+          InstResult = C;
+        } else {
+          return false;
+        }
+      } else {
+        if (Callee->getFunctionType()->isVarArg())
+          return false;
+
+        Constant *RetVal;
+        // Execute the call, if successful, use the return value.
+        if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
+                              MutatedMemory, AllocaTmps, SimpleConstants, TD))
+          return false;
+        InstResult = RetVal;
+      }
+    } else if (isa<TerminatorInst>(CurInst)) {
+      BasicBlock *NewBB = 0;
+      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+        if (BI->isUnconditional()) {
+          NewBB = BI->getSuccessor(0);
+        } else {
+          ConstantInt *Cond =
+            dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
+          if (!Cond) return false;  // Cannot determine.
+
+          NewBB = BI->getSuccessor(!Cond->getZExtValue());
+        }
+      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+        ConstantInt *Val =
+          dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
+        if (!Val) return false;  // Cannot determine.
+        NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+        Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts();
+        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+          NewBB = BA->getBasicBlock();
+        else
+          return false;  // Cannot determine.
+      } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
+        if (RI->getNumOperands())
+          RetVal = getVal(Values, RI->getOperand(0));
+
+        CallStack.pop_back();  // return from fn.
+        return true;  // We succeeded at evaluating this ctor!
+      } else {
+        // invoke, unwind, unreachable.
+        return false;  // Cannot handle this terminator.
+      }
+
+      // Okay, we succeeded in evaluating this control flow.  See if we have
+      // executed the new block before.  If so, we have a looping function,
+      // which we cannot evaluate in reasonable time.
+      if (!ExecutedBlocks.insert(NewBB))
+        return false;  // looped!
+
+      // Okay, we have never been in this block before.  Check to see if there
+      // are any PHI nodes.  If so, evaluate them with information about where
+      // we came from.
+      BasicBlock *OldBB = CurInst->getParent();
+      CurInst = NewBB->begin();
+      PHINode *PN;
+      for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+        Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB));
+
+      // Do NOT increment CurInst.  We know that the terminator had no value.
+      continue;
+    } else {
+      // Did not know how to evaluate this!
+      return false;
+    }
+
+    if (!CurInst->use_empty()) {
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+        InstResult = ConstantFoldConstantExpression(CE, TD);
+      
+      Values[CurInst] = InstResult;
+    }
+
+    // Advance program counter.
+    ++CurInst;
+  }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can.  Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
+  /// MutatedMemory - For each store we execute, we update this map.  Loads
+  /// check this to get the most up-to-date value.  If evaluation is successful,
+  /// this state is committed to the process.
+  DenseMap<Constant*, Constant*> MutatedMemory;
+
+  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+  /// to represent its body.  This vector is needed so we can delete the
+  /// temporary globals when we are done.
+  std::vector<GlobalVariable*> AllocaTmps;
+
+  /// CallStack - This is used to detect recursion.  In pathological situations
+  /// we could hit exponential behavior, but at least there is nothing
+  /// unbounded.
+  std::vector<Function*> CallStack;
+
+  /// SimpleConstants - These are constants we have checked and know to be
+  /// simple enough to live in a static initializer of a global.
+  SmallPtrSet<Constant*, 8> SimpleConstants;
+  
+  // Call the function.
+  Constant *RetValDummy;
+  bool EvalSuccess = EvaluateFunction(F, RetValDummy,
+                                      SmallVector<Constant*, 0>(), CallStack,
+                                      MutatedMemory, AllocaTmps,
+                                      SimpleConstants, TD);
+  
+  if (EvalSuccess) {
+    // We succeeded at evaluation: commit the result.
+    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+          << F->getName() << "' to " << MutatedMemory.size()
+          << " stores.\n");
+    for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
+         E = MutatedMemory.end(); I != E; ++I)
+      CommitValueTo(I->second, I->first);
+  }
+
+  // At this point, we are done interpreting.  If we created any 'alloca'
+  // temporaries, release them now.
+  while (!AllocaTmps.empty()) {
+    GlobalVariable *Tmp = AllocaTmps.back();
+    AllocaTmps.pop_back();
+
+    // If there are still users of the alloca, the program is doing something
+    // silly, e.g. storing the address of the alloca somewhere and using it
+    // later.  Since this is undefined, we'll just make it be null.
+    if (!Tmp->use_empty())
+      Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+    delete Tmp;
+  }
+
+  return EvalSuccess;
+}
+
+
+
+/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
+/// Return true if anything changed.
+bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
+  std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
+  bool MadeChange = false;
+  if (Ctors.empty()) return false;
+
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  // Loop over global ctors, optimizing them when we can.
+  for (unsigned i = 0; i != Ctors.size(); ++i) {
+    Function *F = Ctors[i];
+    // Found a null terminator in the middle of the list, prune off the rest of
+    // the list.
+    if (F == 0) {
+      if (i != Ctors.size()-1) {
+        Ctors.resize(i+1);
+        MadeChange = true;
+      }
+      break;
+    }
+
+    // We cannot simplify external ctor functions.
+    if (F->empty()) continue;
+
+    // If we can evaluate the ctor at compile time, do.
+    if (EvaluateStaticConstructor(F, TD)) {
+      Ctors.erase(Ctors.begin()+i);
+      MadeChange = true;
+      --i;
+      ++NumCtorsEvaluated;
+      continue;
+    }
+  }
+
+  if (!MadeChange) return false;
+
+  GCL = InstallGlobalCtors(GCL, Ctors);
+  return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+  bool Changed = false;
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E;) {
+    Module::alias_iterator J = I++;
+    // Aliases without names cannot be referenced outside this module.
+    if (!J->hasName() && !J->isDeclaration())
+      J->setLinkage(GlobalValue::InternalLinkage);
+    // If the aliasee may change at link time, nothing can be done - bail out.
+    if (J->mayBeOverridden())
+      continue;
+
+    Constant *Aliasee = J->getAliasee();
+    GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+    Target->removeDeadConstantUsers();
+    bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
+
+    // Make all users of the alias use the aliasee instead.
+    if (!J->use_empty()) {
+      J->replaceAllUsesWith(Aliasee);
+      ++NumAliasesResolved;
+      Changed = true;
+    }
+
+    // If the alias is externally visible, we may still be able to simplify it.
+    if (!J->hasLocalLinkage()) {
+      // If the aliasee has internal linkage, give it the name and linkage
+      // of the alias, and delete the alias.  This turns:
+      //   define internal ... @f(...)
+      //   @a = alias ... @f
+      // into:
+      //   define ... @a(...)
+      if (!Target->hasLocalLinkage())
+        continue;
+
+      // Do not perform the transform if multiple aliases potentially target the
+      // aliasee. This check also ensures that it is safe to replace the section
+      // and other attributes of the aliasee with those of the alias.
+      if (!hasOneUse)
+        continue;
+
+      // Give the aliasee the name, linkage and other attributes of the alias.
+      Target->takeName(J);
+      Target->setLinkage(J->getLinkage());
+      Target->GlobalValue::copyAttributesFrom(J);
+    }
+
+    // Delete the alias.
+    M.getAliasList().erase(J);
+    ++NumAliasesRemoved;
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+static Function *FindCXAAtExit(Module &M) {
+  Function *Fn = M.getFunction("__cxa_atexit");
+  
+  if (!Fn)
+    return 0;
+  
+  const FunctionType *FTy = Fn->getFunctionType();
+  
+  // Checking that the function has the right return type, the right number of 
+  // parameters and that they all have pointer types should be enough.
+  if (!FTy->getReturnType()->isIntegerTy() ||
+      FTy->getNumParams() != 3 ||
+      !FTy->getParamType(0)->isPointerTy() ||
+      !FTy->getParamType(1)->isPointerTy() ||
+      !FTy->getParamType(2)->isPointerTy())
+    return 0;
+
+  return Fn;
+}
+
+/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
+/// destructor and can therefore be eliminated.
+/// Note that we assume that other optimization passes have already simplified
+/// the code so we only look for a function with a single basic block, where
+/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor.
+static bool cxxDtorIsEmpty(const Function &Fn,
+                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
+  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
+  // nounwind, but that doesn't seem worth doing.
+  if (Fn.isDeclaration())
+    return false;
+
+  if (++Fn.begin() != Fn.end())
+    return false;
+
+  const BasicBlock &EntryBlock = Fn.getEntryBlock();
+  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
+       I != E; ++I) {
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      // Ignore debug intrinsics.
+      if (isa<DbgInfoIntrinsic>(CI))
+        continue;
+
+      const Function *CalledFn = CI->getCalledFunction();
+
+      if (!CalledFn)
+        return false;
+
+      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
+
+      // Don't treat recursive functions as empty.
+      if (!NewCalledFunctions.insert(CalledFn))
+        return false;
+
+      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
+        return false;
+    } else if (isa<ReturnInst>(*I))
+      return true;
+    else
+      return false;
+  }
+
+  return false;
+}
+
+bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+  /// Itanium C++ ABI p3.3.5:
+  ///
+  ///   After constructing a global (or local static) object, that will require
+  ///   destruction on exit, a termination function is registered as follows:
+  ///
+  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
+  ///
+  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
+  ///   call f(p) when DSO d is unloaded, before all such termination calls
+  ///   registered before this one. It returns zero if registration is
+  ///   successful, nonzero on failure.
+
+  // This pass will look for calls to __cxa_atexit where the function is trivial
+  // and remove them.
+  bool Changed = false;
+
+  for (Function::use_iterator I = CXAAtExitFn->use_begin(), 
+       E = CXAAtExitFn->use_end(); I != E;) {
+    // We're only interested in calls. Theoretically, we could handle invoke
+    // instructions as well, but neither llvm-gcc nor clang generate invokes
+    // to __cxa_atexit.
+    CallInst *CI = dyn_cast<CallInst>(*I++);
+    if (!CI)
+      continue;
+
+    Function *DtorFn = 
+      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
+    if (!DtorFn)
+      continue;
+
+    SmallPtrSet<const Function *, 8> CalledFunctions;
+    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+      continue;
+
+    // Just remove the call.
+    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    CI->eraseFromParent();
+
+    ++NumCXXDtorsRemoved;
+
+    Changed |= true;
+  }
+
+  return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Try to find the llvm.globalctors list.
+  GlobalVariable *GlobalCtors = FindGlobalCtors(M);
+
+  Function *CXAAtExitFn = FindCXAAtExit(M);
+
+  bool LocalChange = true;
+  while (LocalChange) {
+    LocalChange = false;
+
+    // Delete functions that are trivially dead, ccc -> fastcc
+    LocalChange |= OptimizeFunctions(M);
+
+    // Optimize global_ctors list.
+    if (GlobalCtors)
+      LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+
+    // Optimize non-address-taken globals.
+    LocalChange |= OptimizeGlobalVars(M);
+
+    // Resolve aliases, when possible.
+    LocalChange |= OptimizeGlobalAliases(M);
+
+    // Try to remove trivial global destructors.
+    if (CXAAtExitFn)
+      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+
+    Changed |= LocalChange;
+  }
+
+  // TODO: Move all global ctors functions to the end of the module for code
+  // layout.
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 000000000000..25c01346642b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,279 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass.  It could certainly be improved in many different ways,
+// like using a worklist.  This pass makes arguments dead, but does not remove
+// them.  The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ipconstprop"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+  /// IPCP - The interprocedural constant propagation pass
+  ///
+  struct IPCP : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    IPCP() : ModulePass(ID) {
+      initializeIPCPPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M);
+  private:
+    bool PropagateConstantsIntoArguments(Function &F);
+    bool PropagateConstantReturn(Function &F);
+  };
+}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+                "Interprocedural constant propagation", false, false)
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+  bool Changed = false;
+  bool LocalChange = true;
+
+  // FIXME: instead of using smart algorithms, we just iterate until we stop
+  // making changes.
+  while (LocalChange) {
+    LocalChange = false;
+    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+      if (!I->isDeclaration()) {
+        // Delete any klingons.
+        I->removeDeadConstantUsers();
+        if (I->hasLocalLinkage())
+          LocalChange |= PropagateConstantsIntoArguments(*I);
+        Changed |= PropagateConstantReturn(*I);
+      }
+    Changed |= LocalChange;
+  }
+  return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function.  If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+  if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+  // For each argument, keep track of its constant value and whether it is a
+  // constant or not.  The bool is driven to true when found to be non-constant.
+  SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+  ArgumentConstants.resize(F.arg_size());
+
+  unsigned NumNonconstant = 0;
+  for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+    User *U = *UI;
+    // Ignore blockaddress uses.
+    if (isa<BlockAddress>(U)) continue;
+    
+    // Used by a non-instruction, or not the callee of a function, do not
+    // transform.
+    if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+      return false;
+    
+    CallSite CS(cast<Instruction>(U));
+    if (!CS.isCallee(UI))
+      return false;
+
+    // Check out all of the potentially constant arguments.  Note that we don't
+    // inspect varargs here.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    Function::arg_iterator Arg = F.arg_begin();
+    for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+         ++i, ++AI, ++Arg) {
+      
+      // If this argument is known non-constant, ignore it.
+      if (ArgumentConstants[i].second)
+        continue;
+      
+      Constant *C = dyn_cast<Constant>(*AI);
+      if (C && ArgumentConstants[i].first == 0) {
+        ArgumentConstants[i].first = C;   // First constant seen.
+      } else if (C && ArgumentConstants[i].first == C) {
+        // Still the constant value we think it is.
+      } else if (*AI == &*Arg) {
+        // Ignore recursive calls passing argument down.
+      } else {
+        // Argument became non-constant.  If all arguments are non-constant now,
+        // give up on this function.
+        if (++NumNonconstant == ArgumentConstants.size())
+          return false;
+        ArgumentConstants[i].second = true;
+      }
+    }
+  }
+
+  // If we got to this point, there is a constant argument!
+  assert(NumNonconstant != ArgumentConstants.size());
+  bool MadeChange = false;
+  Function::arg_iterator AI = F.arg_begin();
+  for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+    // Do we have a constant argument?
+    if (ArgumentConstants[i].second || AI->use_empty() ||
+        (AI->hasByValAttr() && !F.onlyReadsMemory()))
+      continue;
+  
+    Value *V = ArgumentConstants[i].first;
+    if (V == 0) V = UndefValue::get(AI->getType());
+    AI->replaceAllUsesWith(V);
+    ++NumArgumentsProped;
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+  if (F.getReturnType()->isVoidTy())
+    return false; // No return value.
+
+  // If this function could be overridden later in the link stage, we can't
+  // propagate information about its results into callers.
+  if (F.mayBeOverridden())
+    return false;
+    
+  // Check to see if this function returns a constant.
+  SmallVector<Value *,4> RetVals;
+  const StructType *STy = dyn_cast<StructType>(F.getReturnType());
+  if (STy)
+    for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) 
+      RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+  else
+    RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+  unsigned NumNonConstant = 0;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+        // Already found conflicting return values?
+        Value *RV = RetVals[i];
+        if (!RV)
+          continue;
+
+        // Find the returned value
+        Value *V;
+        if (!STy)
+          V = RI->getOperand(0);
+        else
+          V = FindInsertedValue(RI->getOperand(0), i);
+
+        if (V) {
+          // Ignore undefs, we can change them into anything
+          if (isa<UndefValue>(V))
+            continue;
+          
+          // Try to see if all the rets return the same constant or argument.
+          if (isa<Constant>(V) || isa<Argument>(V)) {
+            if (isa<UndefValue>(RV)) {
+              // No value found yet? Try the current one.
+              RetVals[i] = V;
+              continue;
+            }
+            // Returning the same value? Good.
+            if (RV == V)
+              continue;
+          }
+        }
+        // Different or no known return value? Don't propagate this return
+        // value.
+        RetVals[i] = 0;
+        // All values non constant? Stop looking.
+        if (++NumNonConstant == RetVals.size())
+          return false;
+      }
+    }
+
+  // If we got here, the function returns at least one constant value.  Loop
+  // over all users, replacing any uses of the return value with the returned
+  // constant.
+  bool MadeChange = false;
+  for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+    CallSite CS(*UI);
+    Instruction* Call = CS.getInstruction();
+
+    // Not a call instruction or a call instruction that's not calling F
+    // directly?
+    if (!Call || !CS.isCallee(UI))
+      continue;
+    
+    // Call result not used?
+    if (Call->use_empty())
+      continue;
+
+    MadeChange = true;
+
+    if (STy == 0) {
+      Value* New = RetVals[0];
+      if (Argument *A = dyn_cast<Argument>(New))
+        // Was an argument returned? Then find the corresponding argument in
+        // the call instruction and use that.
+        New = CS.getArgument(A->getArgNo());
+      Call->replaceAllUsesWith(New);
+      continue;
+    }
+   
+    for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
+         I != E;) {
+      Instruction *Ins = cast<Instruction>(*I);
+
+      // Increment now, so we can remove the use
+      ++I;
+
+      // Find the index of the retval to replace with
+      int index = -1;
+      if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+        if (EV->hasIndices())
+          index = *EV->idx_begin();
+
+      // If this use uses a specific return value, and we have a replacement,
+      // replace it.
+      if (index != -1) {
+        Value *New = RetVals[index];
+        if (New) {
+          if (Argument *A = dyn_cast<Argument>(New))
+            // Was an argument returned? Then find the corresponding argument in
+            // the call instruction and use that.
+            New = CS.getArgument(A->getArgNo());
+          Ins->replaceAllUsesWith(New);
+          Ins->eraseFromParent();
+        }
+      }
+    }
+  }
+
+  if (MadeChange) ++NumReturnValProped;
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 000000000000..31ce95f53d33
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,112 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMIPO.a, which implements several transformations over the LLVM 
+// intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void llvm::initializeIPO(PassRegistry &Registry) {
+  initializeArgPromotionPass(Registry);
+  initializeConstantMergePass(Registry);
+  initializeDAEPass(Registry);
+  initializeDAHPass(Registry);
+  initializeFunctionAttrsPass(Registry);
+  initializeGlobalDCEPass(Registry);
+  initializeGlobalOptPass(Registry);
+  initializeIPCPPass(Registry);
+  initializeAlwaysInlinerPass(Registry);
+  initializeSimpleInlinerPass(Registry);
+  initializeInternalizePassPass(Registry);
+  initializeLoopExtractorPass(Registry);
+  initializeBlockExtractorPassPass(Registry);
+  initializeSingleLoopExtractorPass(Registry);
+  initializeLowerSetJmpPass(Registry);
+  initializeMergeFunctionsPass(Registry);
+  initializePartialInlinerPass(Registry);
+  initializePruneEHPass(Registry);
+  initializeStripDeadPrototypesPassPass(Registry);
+  initializeStripSymbolsPass(Registry);
+  initializeStripDebugDeclarePass(Registry);
+  initializeStripDeadDebugInfoPass(Registry);
+  initializeStripNonDebugSymbolsPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+  initializeIPO(*unwrap(R));
+}
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLowerSetJmpPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+  unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+}
+
+
+void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
+  // FIXME: Remove in LLVM 3.0.
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 000000000000..ce795b72438d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,85 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+  // AlwaysInliner only inlines functions that are mark as "always inline".
+  class AlwaysInliner : public Inliner {
+    // Functions that are never inlined
+    SmallPtrSet<const Function*, 16> NeverInline; 
+    InlineCostAnalyzer CA;
+  public:
+    // Use extremely low threshold. 
+    AlwaysInliner() : Inliner(ID, -2000000000) {
+      initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    static char ID; // Pass identification, replacement for typeid
+    InlineCost getInlineCost(CallSite CS) {
+      return CA.getInlineCost(CS, NeverInline);
+    }
+    float getInlineFudgeFactor(CallSite CS) {
+      return CA.getInlineFudgeFactor(CS);
+    }
+    void resetCachedCostInfo(Function *Caller) {
+      CA.resetCachedCostInfo(Caller);
+    }
+    void growCachedCostInfo(Function* Caller, Function* Callee) {
+      CA.growCachedCostInfo(Caller, Callee);
+    }
+    virtual bool doFinalization(CallGraph &CG) { 
+      return removeDeadFunctions(CG, &NeverInline); 
+    }
+    virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
+  };
+}
+
+char AlwaysInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+// doInitialization - Initializes the vector of functions that have not 
+// been annotated with the "always inline" attribute.
+bool AlwaysInliner::doInitialization(CallGraph &CG) {
+  Module &M = CG.getModule();
+  
+  for (Module::iterator I = M.begin(), E = M.end();
+       I != E; ++I)
+    if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
+      NeverInline.insert(I);
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 000000000000..0c5b3be8f983
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,118 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+  class SimpleInliner : public Inliner {
+    // Functions that are never inlined
+    SmallPtrSet<const Function*, 16> NeverInline; 
+    InlineCostAnalyzer CA;
+  public:
+    SimpleInliner() : Inliner(ID) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    static char ID; // Pass identification, replacement for typeid
+    InlineCost getInlineCost(CallSite CS) {
+      return CA.getInlineCost(CS, NeverInline);
+    }
+    float getInlineFudgeFactor(CallSite CS) {
+      return CA.getInlineFudgeFactor(CS);
+    }
+    void resetCachedCostInfo(Function *Caller) {
+      CA.resetCachedCostInfo(Caller);
+    }
+    void growCachedCostInfo(Function* Caller, Function* Callee) {
+      CA.growCachedCostInfo(Caller, Callee);
+    }
+    virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
+  };
+}
+
+char SimpleInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) { 
+  return new SimpleInliner(Threshold);
+}
+
+// doInitialization - Initializes the vector of functions that have been
+// annotated with the noinline attribute.
+bool SimpleInliner::doInitialization(CallGraph &CG) {
+  
+  Module &M = CG.getModule();
+  
+  for (Module::iterator I = M.begin(), E = M.end();
+       I != E; ++I)
+    if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
+      NeverInline.insert(I);
+
+  // Get llvm.noinline
+  GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
+  
+  if (GV == 0)
+    return false;
+
+  // Don't crash on invalid code
+  if (!GV->hasDefinitiveInitializer())
+    return false;
+  
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  
+  if (InitList == 0)
+    return false;
+
+  // Iterate over each element and add to the NeverInline set
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+        
+    // Get Source
+    const Constant *Elt = InitList->getOperand(i);
+        
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
+      if (CE->getOpcode() == Instruction::BitCast) 
+        Elt = CE->getOperand(0);
+    
+    // Insert into set of functions to never inline
+    if (const Function *F = dyn_cast<Function>(Elt))
+      NeverInline.insert(F);
+  }
+  
+  return false;
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 000000000000..57f3e772b569
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,571 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph.  The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+        cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+              cl::desc("Threshold for inlining functions with inline hint"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
+
+Inliner::Inliner(char &ID) 
+  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+
+Inliner::Inliner(char &ID, int Threshold) 
+  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+                                          InlineLimit : Threshold) {}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph.  If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
+  CallGraphSCCPass::getAnalysisUsage(Info);
+}
+
+
+typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR.  The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other  functions inlined into the caller.  If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+                                 InlinedArrayAllocasTy &InlinedArrayAllocas,
+                                 int InlineHistory) {
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = CS.getCaller();
+
+  // Try to inline the function.  Get the list of static allocas that were
+  // inlined.
+  if (!InlineFunction(CS, IFI))
+    return false;
+
+  // If the inlined function had a higher stack protection level than the
+  // calling function, then bump up the caller's stack protection level.
+  if (Callee->hasFnAttr(Attribute::StackProtectReq))
+    Caller->addFnAttr(Attribute::StackProtectReq);
+  else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+           !Caller->hasFnAttr(Attribute::StackProtectReq))
+    Caller->addFnAttr(Attribute::StackProtect);
+
+  // Look at all of the allocas that we inlined through this call site.  If we
+  // have already inlined other allocas through other calls into this function,
+  // then we know that they have disjoint lifetimes and that we can merge them.
+  //
+  // There are many heuristics possible for merging these allocas, and the
+  // different options have different tradeoffs.  One thing that we *really*
+  // don't want to hurt is SRoA: once inlining happens, often allocas are no
+  // longer address taken and so they can be promoted.
+  //
+  // Our "solution" for that is to only merge allocas whose outermost type is an
+  // array type.  These are usually not promoted because someone is using a
+  // variable index into them.  These are also often the most important ones to
+  // merge.
+  //
+  // A better solution would be to have real memory lifetime markers in the IR
+  // and not have the inliner do any merging of allocas at all.  This would
+  // allow the backend to do proper stack slot coloring of all allocas that
+  // *actually make it to the backend*, which is really what we want.
+  //
+  // Because we don't have this information, we do this simple and useful hack.
+  //
+  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+  
+  // When processing our SCC, check to see if CS was inlined from some other
+  // call site.  For example, if we're processing "A" in this code:
+  //   A() { B() }
+  //   B() { x = alloca ... C() }
+  //   C() { y = alloca ... }
+  // Assume that C was not inlined into B initially, and so we're processing A
+  // and decide to inline B into A.  Doing this makes an alloca available for
+  // reuse and makes a callsite (C) available for inlining.  When we process
+  // the C call site we don't want to do any alloca merging between X and Y
+  // because their scopes are not disjoint.  We could make this smarter by
+  // keeping track of the inline history for each alloca in the
+  // InlinedArrayAllocas but this isn't likely to be a significant win.
+  if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
+    return true;
+  
+  // Loop over all the allocas we have so far and see if they can be merged with
+  // a previously inlined alloca.  If not, remember that we had it.
+  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
+       AllocaNo != e; ++AllocaNo) {
+    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+    
+    // Don't bother trying to merge array allocations (they will usually be
+    // canonicalized to be an allocation *of* an array), or allocations whose
+    // type is not itself an array (because we're afraid of pessimizing SRoA).
+    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+    if (ATy == 0 || AI->isArrayAllocation())
+      continue;
+    
+    // Get the list of all available allocas for this array type.
+    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+    
+    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
+    // that we have to be careful not to reuse the same "available" alloca for
+    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+    // set to keep track of which "available" allocas are being used by this
+    // function.  Also, AllocasForType can be empty of course!
+    bool MergedAwayAlloca = false;
+    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+      AllocaInst *AvailableAlloca = AllocasForType[i];
+      
+      // The available alloca has to be in the right function, not in some other
+      // function in this SCC.
+      if (AvailableAlloca->getParent() != AI->getParent())
+        continue;
+      
+      // If the inlined function already uses this alloca then we can't reuse
+      // it.
+      if (!UsedAllocas.insert(AvailableAlloca))
+        continue;
+      
+      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+      // success!
+      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+                   << *AvailableAlloca << '\n');
+      
+      AI->replaceAllUsesWith(AvailableAlloca);
+      AI->eraseFromParent();
+      MergedAwayAlloca = true;
+      ++NumMergedAllocas;
+      IFI.StaticAllocas[AllocaNo] = 0;
+      break;
+    }
+
+    // If we already nuked the alloca, we're done with it.
+    if (MergedAwayAlloca)
+      continue;
+    
+    // If we were unable to merge away the alloca either because there are no
+    // allocas of the right type available or because we reused them all
+    // already, remember that this alloca came from an inlined function and mark
+    // it used so we don't reuse it for other allocas from this inline
+    // operation.
+    AllocasForType.push_back(AI);
+    UsedAllocas.insert(AI);
+  }
+  
+  return true;
+}
+
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+  int thres = InlineThreshold;
+
+  // Listen to optsize when -inline-limit is not given.
+  Function *Caller = CS.getCaller();
+  if (Caller && !Caller->isDeclaration() &&
+      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
+      InlineLimit.getNumOccurrences() == 0)
+    thres = OptSizeThreshold;
+
+  // Listen to inlinehint when it would increase the threshold.
+  Function *Callee = CS.getCalledFunction();
+  if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
+      Callee->hasFnAttr(Attribute::InlineHint))
+    thres = HintThreshold;
+
+  return thres;
+}
+
+/// shouldInline - Return true if the inliner should attempt to inline
+/// at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+  InlineCost IC = getInlineCost(CS);
+  
+  if (IC.isAlways()) {
+    DEBUG(dbgs() << "    Inlining: cost=always"
+          << ", Call: " << *CS.getInstruction() << "\n");
+    return true;
+  }
+  
+  if (IC.isNever()) {
+    DEBUG(dbgs() << "    NOT Inlining: cost=never"
+          << ", Call: " << *CS.getInstruction() << "\n");
+    return false;
+  }
+  
+  int Cost = IC.getValue();
+  Function *Caller = CS.getCaller();
+  int CurrentThreshold = getInlineThreshold(CS);
+  float FudgeFactor = getInlineFudgeFactor(CS);
+  int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
+  if (Cost >= AdjThreshold) {
+    DEBUG(dbgs() << "    NOT Inlining: cost=" << Cost
+          << ", thres=" << AdjThreshold
+          << ", Call: " << *CS.getInstruction() << "\n");
+    return false;
+  }
+  
+  // Try to detect the case where the current inlining candidate caller
+  // (call it B) is a static function and is an inlining candidate elsewhere,
+  // and the current candidate callee (call it C) is large enough that
+  // inlining it into B would make B too big to inline later.  In these
+  // circumstances it may be best not to inline C into B, but to inline B
+  // into its callers.
+  if (Caller->hasLocalLinkage()) {
+    int TotalSecondaryCost = 0;
+    bool outerCallsFound = false;
+    // This bool tracks what happens if we do NOT inline C into B.
+    bool callerWillBeRemoved = true;
+    // This bool tracks what happens if we DO inline C into B.
+    bool inliningPreventsSomeOuterInline = false;
+    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
+         I != E; ++I) {
+      CallSite CS2(*I);
+
+      // If this isn't a call to Caller (it could be some other sort
+      // of reference) skip it.  Such references will prevent the caller
+      // from being removed.
+      if (!CS2 || CS2.getCalledFunction() != Caller) {
+        callerWillBeRemoved = false;
+        continue;
+      }
+
+      InlineCost IC2 = getInlineCost(CS2);
+      if (IC2.isNever())
+        callerWillBeRemoved = false;
+      if (IC2.isAlways() || IC2.isNever())
+        continue;
+
+      outerCallsFound = true;
+      int Cost2 = IC2.getValue();
+      int CurrentThreshold2 = getInlineThreshold(CS2);
+      float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+        callerWillBeRemoved = false;
+
+      // See if we have this case.  We subtract off the penalty
+      // for the call instruction, which we would be deleting.
+      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
+                (int)(CurrentThreshold2 * FudgeFactor2)) {
+        inliningPreventsSomeOuterInline = true;
+        TotalSecondaryCost += Cost2;
+      }
+    }
+    // If all outer calls to Caller would get inlined, the cost for the last
+    // one is set very low by getInlineCost, in anticipation that Caller will
+    // be removed entirely.  We did not account for this above unless there
+    // is only one caller of Caller.
+    if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
+      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+    if (outerCallsFound && inliningPreventsSomeOuterInline &&
+        TotalSecondaryCost < Cost) {
+      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() << 
+           " Cost = " << Cost << 
+           ", outer Cost = " << TotalSecondaryCost << '\n');
+      return false;
+    }
+  }
+
+  DEBUG(dbgs() << "    Inlining: cost=" << Cost
+        << ", thres=" << AdjThreshold
+        << ", Call: " << *CS.getInstruction() << '\n');
+  return true;
+}
+
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+            const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+  CallGraph &CG = getAnalysis<CallGraph>();
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  SmallPtrSet<Function*, 8> SCCFunctions;
+  DEBUG(dbgs() << "Inliner visiting SCC:");
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (F) SCCFunctions.insert(F);
+    DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+  }
+
+  // Scan through and identify all call sites ahead of time so that we only
+  // inline call sites in the original functions, not call sites that result
+  // from inlining other functions.
+  SmallVector<std::pair<CallSite, int>, 16> CallSites;
+  
+  // When inlining a callee produces new call sites, we want to keep track of
+  // the fact that they were inlined from the callee.  This allows us to avoid
+  // infinite inlining in some obscure cases.  To represent this, we use an
+  // index into the InlineHistory vector.
+  SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (!F) continue;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS(cast<Value>(I));
+        // If this isn't a call, or it is a call to an intrinsic, it can
+        // never be inlined.
+        if (!CS || isa<IntrinsicInst>(I))
+          continue;
+        
+        // If this is a direct call to an external function, we can never inline
+        // it.  If it is an indirect call, inlining may resolve it to be a
+        // direct call, so we keep it.
+        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+          continue;
+        
+        CallSites.push_back(std::make_pair(CS, -1));
+      }
+  }
+
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+  // If there are no calls in this function, exit early.
+  if (CallSites.empty())
+    return false;
+  
+  // Now that we have all of the call sites, move the ones to functions in the
+  // current SCC to the end of the list.
+  unsigned FirstCallInSCC = CallSites.size();
+  for (unsigned i = 0; i < FirstCallInSCC; ++i)
+    if (Function *F = CallSites[i].first.getCalledFunction())
+      if (SCCFunctions.count(F))
+        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+  
+  InlinedArrayAllocasTy InlinedArrayAllocas;
+  InlineFunctionInfo InlineInfo(&CG, TD);
+  
+  // Now that we have all of the call sites, loop over them and inline them if
+  // it looks profitable to do so.
+  bool Changed = false;
+  bool LocalChange;
+  do {
+    LocalChange = false;
+    // Iterate over the outer loop because inlining functions can cause indirect
+    // calls to become direct calls.
+    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+      CallSite CS = CallSites[CSi].first;
+      
+      Function *Caller = CS.getCaller();
+      Function *Callee = CS.getCalledFunction();
+
+      // If this call site is dead and it is to a readonly function, we should
+      // just delete the call instead of trying to inline it, regardless of
+      // size.  This happens because IPSCCP propagates the result out of the
+      // call and then we're left with the dead call.
+      if (isInstructionTriviallyDead(CS.getInstruction())) {
+        DEBUG(dbgs() << "    -> Deleting dead call: "
+                     << *CS.getInstruction() << "\n");
+        // Update the call graph by deleting the edge from Callee to Caller.
+        CG[Caller]->removeCallEdgeFor(CS);
+        CS.getInstruction()->eraseFromParent();
+        ++NumCallsDeleted;
+        // Update the cached cost info with the missing call
+        growCachedCostInfo(Caller, NULL);
+      } else {
+        // We can only inline direct calls to non-declarations.
+        if (Callee == 0 || Callee->isDeclaration()) continue;
+      
+        // If this call site was obtained by inlining another function, verify
+        // that the include path for the function did not include the callee
+        // itself.  If so, we'd be recursively inlining the same function,
+        // which would provide the same callsites, which would cause us to
+        // infinitely inline.
+        int InlineHistoryID = CallSites[CSi].second;
+        if (InlineHistoryID != -1 &&
+            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+          continue;
+        
+        
+        // If the policy determines that we should inline this function,
+        // try to do so.
+        if (!shouldInline(CS))
+          continue;
+
+        // Attempt to inline the function.
+        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+                                  InlineHistoryID))
+          continue;
+        ++NumInlined;
+        
+        // If inlining this function gave us any new call sites, throw them
+        // onto our worklist to process.  They are useful inline candidates.
+        if (!InlineInfo.InlinedCalls.empty()) {
+          // Create a new inline history entry for this, so that we remember
+          // that these new callsites came about due to inlining Callee.
+          int NewHistoryID = InlineHistory.size();
+          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
+               i != e; ++i) {
+            Value *Ptr = InlineInfo.InlinedCalls[i];
+            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+          }
+        }
+        
+        // Update the cached cost info with the inlined call.
+        growCachedCostInfo(Caller, Callee);
+      }
+      
+      // If we inlined or deleted the last possible call site to the function,
+      // delete the function body now.
+      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
+          // TODO: Can remove if in SCC now.
+          !SCCFunctions.count(Callee) &&
+          
+          // The function may be apparently dead, but if there are indirect
+          // callgraph references to the node, we cannot delete it yet, this
+          // could invalidate the CGSCC iterator.
+          CG[Callee]->getNumReferences() == 0) {
+        DEBUG(dbgs() << "    -> Deleting dead function: "
+              << Callee->getName() << "\n");
+        CallGraphNode *CalleeNode = CG[Callee];
+        
+        // Remove any call graph edges from the callee to its callees.
+        CalleeNode->removeAllCalledFunctions();
+        
+        resetCachedCostInfo(Callee);
+        
+        // Removing the node for callee from the call graph and delete it.
+        delete CG.removeFunctionFromModule(CalleeNode);
+        ++NumDeleted;
+      }
+
+      // Remove this call site from the list.  If possible, use 
+      // swap/pop_back for efficiency, but do not use it if doing so would
+      // move a call site to a function in this SCC before the
+      // 'FirstCallInSCC' barrier.
+      if (SCC.isSingular()) {
+        CallSites[CSi] = CallSites.back();
+        CallSites.pop_back();
+      } else {
+        CallSites.erase(CallSites.begin()+CSi);
+      }
+      --CSi;
+
+      Changed = true;
+      LocalChange = true;
+    }
+  } while (LocalChange);
+
+  return Changed;
+}
+
+// doFinalization - Remove now-dead linkonce functions at the end of
+// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+  return removeDeadFunctions(CG);
+}
+
+/// removeDeadFunctions - Remove dead functions that are not included in
+/// DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG, 
+                                  SmallPtrSet<const Function *, 16> *DNR) {
+  SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+
+  // Scan for all of the functions, looking for ones that should now be removed
+  // from the program.  Insert the dead ones in the FunctionsToRemove set.
+  for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
+    CallGraphNode *CGN = I->second;
+    if (CGN->getFunction() == 0)
+      continue;
+    
+    Function *F = CGN->getFunction();
+    
+    // If the only remaining users of the function are dead constants, remove
+    // them.
+    F->removeDeadConstantUsers();
+
+    if (DNR && DNR->count(F))
+      continue;
+    if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+        !F->hasAvailableExternallyLinkage())
+      continue;
+    if (!F->use_empty())
+      continue;
+    
+    // Remove any call graph edges from the function to its callees.
+    CGN->removeAllCalledFunctions();
+
+    // Remove any edges from the external node to the function's call graph
+    // node.  These edges might have been made irrelegant due to
+    // optimization of the program.
+    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+    // Removing the node for callee from the call graph and delete it.
+    FunctionsToRemove.insert(CGN);
+  }
+
+  // Now that we know which functions to delete, do so.  We didn't want to do
+  // this inline, because that would invalidate our CallGraph::iterator
+  // objects. :(
+  //
+  // Note that it doesn't matter that we are iterating over a non-stable set
+  // here to do this, it doesn't matter which order the functions are deleted
+  // in.
+  bool Changed = false;
+  for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
+       E = FunctionsToRemove.end(); I != E; ++I) {
+    resetCachedCostInfo((*I)->getFunction());
+    delete CG.removeFunctionFromModule(*I);
+    ++NumDeleted;
+    Changed = true;
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 000000000000..7cb1d18f933d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,191 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for a
+// main function.  If a main function is found, all other functions and all
+// global variables with initializers are marked as internal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "internalize"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumAliases  , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals  , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+        cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+        cl::desc("A list of symbol names to preserve"),
+        cl::CommaSeparated);
+
+namespace {
+  class InternalizePass : public ModulePass {
+    std::set<std::string> ExternalNames;
+    /// If no api symbols were specified and a main function is defined,
+    /// assume the main function is the only API
+    bool AllButMain;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit InternalizePass(bool AllButMain = true);
+    explicit InternalizePass(const std::vector <const char *>& exportList);
+    void LoadFile(const char *Filename);
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addPreserved<CallGraph>();
+    }
+  };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+INITIALIZE_PASS(InternalizePass, "internalize",
+                "Internalize Global Symbols", false, false)
+
+InternalizePass::InternalizePass(bool AllButMain)
+  : ModulePass(ID), AllButMain(AllButMain){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+  if (!APIFile.empty())           // If a filename is specified, use it.
+    LoadFile(APIFile.c_str());
+  if (!APIList.empty())           // If a list is specified, use it as well.
+    ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
+  : ModulePass(ID), AllButMain(false){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+  for(std::vector<const char *>::const_iterator itr = exportList.begin();
+        itr != exportList.end(); itr++) {
+    ExternalNames.insert(*itr);
+  }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+  // Load the APIFile...
+  std::ifstream In(Filename);
+  if (!In.good()) {
+    errs() << "WARNING: Internalize couldn't load file '" << Filename
+         << "'! Continuing as if it's empty.\n";
+    return; // Just continue as if the file were empty
+  }
+  while (In) {
+    std::string Symbol;
+    In >> Symbol;
+    if (!Symbol.empty())
+      ExternalNames.insert(Symbol);
+  }
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+  CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
+  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+  
+  if (ExternalNames.empty()) {
+    // Return if we're not in 'all but main' mode and have no external api
+    if (!AllButMain)
+      return false;
+    // If no list or file of symbols was specified, check to see if there is a
+    // "main" symbol defined in the module.  If so, use it, otherwise do not
+    // internalize the module, it must be a library or something.
+    //
+    Function *MainFunc = M.getFunction("main");
+    if (MainFunc == 0 || MainFunc->isDeclaration())
+      return false;  // No main found, must be a library...
+
+    // Preserve main, internalize all else.
+    ExternalNames.insert(MainFunc->getName());
+  }
+
+  bool Changed = false;
+
+  // Mark all functions not in the api as internal.
+  // FIXME: maybe use private linkage?
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isDeclaration() &&         // Function must be defined here
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
+        !I->hasLocalLinkage() &&  // Can't already have internal linkage
+        !ExternalNames.count(I->getName())) {// Not marked to keep external?
+      I->setLinkage(GlobalValue::InternalLinkage);
+      // Remove a callgraph edge from the external node to this function.
+      if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+      Changed = true;
+      ++NumFunctions;
+      DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+    }
+
+  // Never internalize the llvm.used symbol.  It is used to implement
+  // attribute((used)).
+  // FIXME: Shouldn't this just filter on llvm.metadata section??
+  ExternalNames.insert("llvm.used");
+  ExternalNames.insert("llvm.compiler.used");
+
+  // Never internalize anchors used by the machine module info, else the info
+  // won't find them.  (see MachineModuleInfo.)
+  ExternalNames.insert("llvm.global_ctors");
+  ExternalNames.insert("llvm.global_dtors");
+  ExternalNames.insert("llvm.noinline");
+  ExternalNames.insert("llvm.global.annotations");
+
+  // Mark all global variables with initializers that are not in the api as
+  // internal as well.
+  // FIXME: maybe use private linkage?
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    if (!I->isDeclaration() && !I->hasLocalLinkage() &&
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
+        !ExternalNames.count(I->getName())) {
+      I->setLinkage(GlobalValue::InternalLinkage);
+      Changed = true;
+      ++NumGlobals;
+      DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+    }
+
+  // Mark all aliases that are not in the api as internal as well.
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E; ++I)
+    if (!I->isDeclaration() && !I->hasInternalLinkage() &&
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
+        !ExternalNames.count(I->getName())) {
+      I->setLinkage(GlobalValue::InternalLinkage);
+      Changed = true;
+      ++NumAliases;
+      DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+    }
+
+  return Changed;
+}
+
+ModulePass *llvm::createInternalizePass(bool AllButMain) {
+  return new InternalizePass(AllButMain);
+}
+
+ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
+  return new InternalizePass(el);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 000000000000..848944dc9381
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,248 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-extract"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+  struct LoopExtractor : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    unsigned NumLoops;
+
+    explicit LoopExtractor(unsigned numLoops = ~0) 
+      : LoopPass(ID), NumLoops(numLoops) {
+        initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(BreakCriticalEdgesID);
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequired<DominatorTree>();
+    }
+  };
+}
+
+char LoopExtractor::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
+
+namespace {
+  /// SingleLoopExtractor - For bugpoint.
+  struct SingleLoopExtractor : public LoopExtractor {
+    static char ID; // Pass identification, replacement for typeid
+    SingleLoopExtractor() : LoopExtractor(1) {}
+  };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+                "Extract at most one loop into a new function", false, false)
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+  // Only visit top-level loops.
+  if (L->getParentLoop())
+    return false;
+
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm())
+    return false;
+
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  bool Changed = false;
+
+  // If there is more than one top-level loop in this function, extract all of
+  // the loops. Otherwise there is exactly one top-level loop; in this case if
+  // this function is more than a minimal wrapper around the loop, extract
+  // the loop.
+  bool ShouldExtractLoop = false;
+
+  // Extract the loop if the entry block doesn't branch to the loop header.
+  TerminatorInst *EntryTI =
+    L->getHeader()->getParent()->getEntryBlock().getTerminator();
+  if (!isa<BranchInst>(EntryTI) ||
+      !cast<BranchInst>(EntryTI)->isUnconditional() ||
+      EntryTI->getSuccessor(0) != L->getHeader())
+    ShouldExtractLoop = true;
+  else {
+    // Check to see if any exits from the loop are more than just return
+    // blocks.
+    SmallVector<BasicBlock*, 8> ExitBlocks;
+    L->getExitBlocks(ExitBlocks);
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+      if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+        ShouldExtractLoop = true;
+        break;
+      }
+  }
+  if (ShouldExtractLoop) {
+    if (NumLoops == 0) return Changed;
+    --NumLoops;
+    if (ExtractLoop(DT, L) != 0) {
+      Changed = true;
+      // After extraction, the loop is replaced by a function call, so
+      // we shouldn't try to run any more loop passes on it.
+      LPM.deleteLoopFromQueue(L);
+    }
+    ++NumExtracted;
+  }
+
+  return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can.  This is used by bugpoint.
+//
+Pass *llvm::createSingleLoopExtractorPass() {
+  return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+          cl::desc("A file containing list of basic blocks to not extract"),
+          cl::Hidden);
+
+namespace {
+  /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+  /// from the module into their own functions except for those specified by the
+  /// BlocksToNotExtract list.
+  class BlockExtractorPass : public ModulePass {
+    void LoadFile(const char *Filename);
+
+    std::vector<BasicBlock*> BlocksToNotExtract;
+    std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    BlockExtractorPass() : ModulePass(ID) {
+      if (!BlockFile.empty())
+        LoadFile(BlockFile.c_str());
+    }
+
+    bool runOnModule(Module &M);
+  };
+}
+
+char BlockExtractorPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+                "Extract Basic Blocks From Module (for bugpoint use)",
+                false, false)
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass()
+{
+  return new BlockExtractorPass();
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+  // Load the BlockFile...
+  std::ifstream In(Filename);
+  if (!In.good()) {
+    errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+           << "'!\n";
+    return;
+  }
+  while (In) {
+    std::string FunctionName, BlockName;
+    In >> FunctionName;
+    In >> BlockName;
+    if (!BlockName.empty())
+      BlocksToNotExtractByName.push_back(
+          std::make_pair(FunctionName, BlockName));
+  }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+  std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+  for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+    BasicBlock *BB = BlocksToNotExtract[i];
+    Function *F = BB->getParent();
+
+    // Map the corresponding function in this module.
+    Function *MF = M.getFunction(F->getName());
+    assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+    // Figure out which index the basic block is in its function.
+    Function::iterator BBI = MF->begin();
+    std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+    TranslatedBlocksToNotExtract.insert(BBI);
+  }
+
+  while (!BlocksToNotExtractByName.empty()) {
+    // There's no way to find BBs by name without looking at every BB inside
+    // every Function. Fortunately, this is always empty except when used by
+    // bugpoint in which case correctness is more important than performance.
+
+    std::string &FuncName  = BlocksToNotExtractByName.back().first;
+    std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+    for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+      Function &F = *FI;
+      if (F.getName() != FuncName) continue;
+
+      for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+        BasicBlock &BB = *BI;
+        if (BB.getName() != BlockName) continue;
+
+        TranslatedBlocksToNotExtract.insert(BI);
+      }
+    }
+
+    BlocksToNotExtractByName.pop_back();
+  }
+
+  // Now that we know which blocks to not extract, figure out which ones we WANT
+  // to extract.
+  std::vector<BasicBlock*> BlocksToExtract;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (!TranslatedBlocksToNotExtract.count(BB))
+        BlocksToExtract.push_back(BB);
+
+  for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i)
+    ExtractBasicBlock(BlocksToExtract[i]);
+
+  return !BlocksToExtract.empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
new file mode 100644
index 000000000000..659476b139e4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -0,0 +1,547 @@
+//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the lowering of setjmp and longjmp to use the
+//  LLVM invoke and unwind instructions as necessary.
+//
+//  Lowering of longjmp is fairly trivial. We replace the call with a
+//  call to the LLVM library function "__llvm_sjljeh_throw_longjmp()".
+//  This unwinds the stack for us calling all of the destructors for
+//  objects allocated on the stack.
+//
+//  At a setjmp call, the basic block is split and the setjmp removed.
+//  The calls in a function that have a setjmp are converted to invoke
+//  where the except part checks to see if it's a longjmp exception and,
+//  if so, if it's handled in the function. If it is, then it gets the
+//  value returned by the longjmp and goes to where the basic block was
+//  split. Invoke instructions are handled in a similar fashion with the
+//  original except block being executed if it isn't a longjmp except
+//  that is handled by that function.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FIXME: This pass doesn't deal with PHI statements just yet. That is,
+// we expect this to occur before SSAification is done. This would seem
+// to make sense, but in general, it might be a good idea to make this
+// pass invokable via the "opt" command at will.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersetjmp"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(LongJmpsTransformed, "Number of longjmps transformed");
+STATISTIC(SetJmpsTransformed , "Number of setjmps transformed");
+STATISTIC(CallsTransformed   , "Number of calls invokified");
+STATISTIC(InvokesTransformed , "Number of invokes modified");
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  // LowerSetJmp pass implementation.
+  class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> {
+    // LLVM library functions...
+    Constant *InitSJMap;        // __llvm_sjljeh_init_setjmpmap
+    Constant *DestroySJMap;     // __llvm_sjljeh_destroy_setjmpmap
+    Constant *AddSJToMap;       // __llvm_sjljeh_add_setjmp_to_map
+    Constant *ThrowLongJmp;     // __llvm_sjljeh_throw_longjmp
+    Constant *TryCatchLJ;       // __llvm_sjljeh_try_catching_longjmp_exception
+    Constant *IsLJException;    // __llvm_sjljeh_is_longjmp_exception
+    Constant *GetLJValue;       // __llvm_sjljeh_get_longjmp_value
+
+    typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair;
+
+    // Keep track of those basic blocks reachable via a depth-first search of
+    // the CFG from a setjmp call. We only need to transform those "call" and
+    // "invoke" instructions that are reachable from the setjmp call site.
+    std::set<BasicBlock*> DFSBlocks;
+
+    // The setjmp map is going to hold information about which setjmps
+    // were called (each setjmp gets its own number) and with which
+    // buffer it was called.
+    std::map<Function*, AllocaInst*>            SJMap;
+
+    // The rethrow basic block map holds the basic block to branch to if
+    // the exception isn't handled in the current function and needs to
+    // be rethrown.
+    std::map<const Function*, BasicBlock*>      RethrowBBMap;
+
+    // The preliminary basic block map holds a basic block that grabs the
+    // exception and determines if it's handled by the current function.
+    std::map<const Function*, BasicBlock*>      PrelimBBMap;
+
+    // The switch/value map holds a switch inst/call inst pair. The
+    // switch inst controls which handler (if any) gets called and the
+    // value is the value returned to that handler by the call to
+    // __llvm_sjljeh_get_longjmp_value.
+    std::map<const Function*, SwitchValuePair>  SwitchValMap;
+
+    // A map of which setjmps we've seen so far in a function.
+    std::map<const Function*, unsigned>         SetJmpIDMap;
+
+    AllocaInst*     GetSetJmpMap(Function* Func);
+    BasicBlock*     GetRethrowBB(Function* Func);
+    SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow);
+
+    void TransformLongJmpCall(CallInst* Inst);
+    void TransformSetJmpCall(CallInst* Inst);
+
+    bool IsTransformableFunction(StringRef Name);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSetJmp() : ModulePass(ID) {
+      initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+    }
+
+    void visitCallInst(CallInst& CI);
+    void visitInvokeInst(InvokeInst& II);
+    void visitReturnInst(ReturnInst& RI);
+    void visitUnwindInst(UnwindInst& UI);
+
+    bool runOnModule(Module& M);
+    bool doInitialization(Module& M);
+  };
+} // end anonymous namespace
+
+char LowerSetJmp::ID = 0;
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
+
+// run - Run the transformation on the program. We grab the function
+// prototypes for longjmp and setjmp. If they are used in the program,
+// then we can go directly to the places they're at and transform them.
+bool LowerSetJmp::runOnModule(Module& M) {
+  bool Changed = false;
+
+  // These are what the functions are called.
+  Function* SetJmp = M.getFunction("llvm.setjmp");
+  Function* LongJmp = M.getFunction("llvm.longjmp");
+
+  // This program doesn't have longjmp and setjmp calls.
+  if ((!LongJmp || LongJmp->use_empty()) &&
+        (!SetJmp || SetJmp->use_empty())) return false;
+
+  // Initialize some values and functions we'll need to transform the
+  // setjmp/longjmp functions.
+  doInitialization(M);
+
+  if (SetJmp) {
+    for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end();
+         B != E; ++B) {
+      BasicBlock* BB = cast<Instruction>(*B)->getParent();
+      for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks),
+             E = df_ext_end(BB, DFSBlocks); I != E; ++I)
+        /* empty */;
+    }
+
+    while (!SetJmp->use_empty()) {
+      assert(isa<CallInst>(SetJmp->use_back()) &&
+             "User of setjmp intrinsic not a call?");
+      TransformSetJmpCall(cast<CallInst>(SetJmp->use_back()));
+      Changed = true;
+    }
+  }
+
+  if (LongJmp)
+    while (!LongJmp->use_empty()) {
+      assert(isa<CallInst>(LongJmp->use_back()) &&
+             "User of longjmp intrinsic not a call?");
+      TransformLongJmpCall(cast<CallInst>(LongJmp->use_back()));
+      Changed = true;
+    }
+
+  // Now go through the affected functions and convert calls and invokes
+  // to new invokes...
+  for (std::map<Function*, AllocaInst*>::iterator
+      B = SJMap.begin(), E = SJMap.end(); B != E; ++B) {
+    Function* F = B->first;
+    for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB)
+      for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) {
+        visit(*IB++);
+        if (IB != BB->end() && IB->getParent() != BB)
+          break;  // The next instruction got moved to a different block!
+      }
+  }
+
+  DFSBlocks.clear();
+  SJMap.clear();
+  RethrowBBMap.clear();
+  PrelimBBMap.clear();
+  SwitchValMap.clear();
+  SetJmpIDMap.clear();
+
+  return Changed;
+}
+
+// doInitialization - For the lower long/setjmp pass, this ensures that a
+// module contains a declaration for the intrisic functions we are going
+// to call to convert longjmp and setjmp calls.
+//
+// This function is always successful, unless it isn't.
+bool LowerSetJmp::doInitialization(Module& M)
+{
+  const Type *SBPTy = Type::getInt8PtrTy(M.getContext());
+  const Type *SBPPTy = PointerType::getUnqual(SBPTy);
+
+  // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
+  // a description of the following library functions.
+
+  // void __llvm_sjljeh_init_setjmpmap(void**)
+  InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
+                                    Type::getVoidTy(M.getContext()),
+                                    SBPPTy, (Type *)0);
+  // void __llvm_sjljeh_destroy_setjmpmap(void**)
+  DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
+                                       Type::getVoidTy(M.getContext()),
+                                       SBPPTy, (Type *)0);
+
+  // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
+  AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
+                                     Type::getVoidTy(M.getContext()),
+                                     SBPPTy, SBPTy,
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
+
+  // void __llvm_sjljeh_throw_longjmp(int*, int)
+  ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
+                                       Type::getVoidTy(M.getContext()), SBPTy, 
+                                       Type::getInt32Ty(M.getContext()),
+                                       (Type *)0);
+
+  // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
+  TryCatchLJ =
+    M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
+                          Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
+
+  // bool __llvm_sjljeh_is_longjmp_exception()
+  IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
+                                        Type::getInt1Ty(M.getContext()),
+                                        (Type *)0);
+
+  // int __llvm_sjljeh_get_longjmp_value()
+  GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
+  return true;
+}
+
+// IsTransformableFunction - Return true if the function name isn't one
+// of the ones we don't want transformed. Currently, don't transform any
+// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
+// handling functions (beginning with __llvm_sjljeh_...they don't throw
+// exceptions).
+bool LowerSetJmp::IsTransformableFunction(StringRef Name) {
+  return !Name.startswith("__llvm_sjljeh_");
+}
+
+// TransformLongJmpCall - Transform a longjmp call into a call to the
+// internal __llvm_sjljeh_throw_longjmp function. It then takes care of
+// throwing the exception for us.
+void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
+{
+  const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext());
+
+  // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
+  // same parameters as "longjmp", except that the buffer is cast to a
+  // char*. It returns "void", so it doesn't need to replace any of
+  // Inst's uses and doesn't get a name.
+  CastInst* CI = 
+    new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
+  Value *Args[] = { CI, Inst->getArgOperand(1) };
+  CallInst::Create(ThrowLongJmp, Args, "", Inst);
+
+  SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
+
+  // If the function has a setjmp call in it (they are transformed first)
+  // we should branch to the basic block that determines if this longjmp
+  // is applicable here. Otherwise, issue an unwind.
+  if (SVP.first)
+    BranchInst::Create(SVP.first->getParent(), Inst);
+  else
+    new UnwindInst(Inst->getContext(), Inst);
+
+  // Remove all insts after the branch/unwind inst.  Go from back to front to
+  // avoid replaceAllUsesWith if possible.
+  BasicBlock *BB = Inst->getParent();
+  Instruction *Removed;
+  do {
+    Removed = &BB->back();
+    // If the removed instructions have any users, replace them now.
+    if (!Removed->use_empty())
+      Removed->replaceAllUsesWith(UndefValue::get(Removed->getType()));
+    Removed->eraseFromParent();
+  } while (Removed != Inst);
+
+  ++LongJmpsTransformed;
+}
+
+// GetSetJmpMap - Retrieve (create and initialize, if necessary) the
+// setjmp map. This map is going to hold information about which setjmps
+// were called (each setjmp gets its own number) and with which buffer it
+// was called. There can be only one!
+AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
+{
+  if (SJMap[Func]) return SJMap[Func];
+
+  // Insert the setjmp map initialization before the first instruction in
+  // the function.
+  Instruction* Inst = Func->getEntryBlock().begin();
+  assert(Inst && "Couldn't find even ONE instruction in entry block!");
+
+  // Fill in the alloca and call to initialize the SJ map.
+  const Type *SBPTy =
+        Type::getInt8PtrTy(Func->getContext());
+  AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
+  CallInst::Create(InitSJMap, Map, "", Inst);
+  return SJMap[Func] = Map;
+}
+
+// GetRethrowBB - Only one rethrow basic block is needed per function.
+// If this is a longjmp exception but not handled in this block, this BB
+// performs the rethrow.
+BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
+{
+  if (RethrowBBMap[Func]) return RethrowBBMap[Func];
+
+  // The basic block we're going to jump to if we need to rethrow the
+  // exception.
+  BasicBlock* Rethrow =
+        BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
+
+  // Fill in the "Rethrow" BB with a call to rethrow the exception. This
+  // is the last instruction in the BB since at this point the runtime
+  // should exit this function and go to the next function.
+  new UnwindInst(Func->getContext(), Rethrow);
+  return RethrowBBMap[Func] = Rethrow;
+}
+
+// GetSJSwitch - Return the switch statement that controls which handler
+// (if any) gets called and the value returned to that handler.
+LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
+                                                      BasicBlock* Rethrow)
+{
+  if (SwitchValMap[Func].first) return SwitchValMap[Func];
+
+  BasicBlock* LongJmpPre =
+        BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
+
+  // Keep track of the preliminary basic block for some of the other
+  // transformations.
+  PrelimBBMap[Func] = LongJmpPre;
+
+  // Grab the exception.
+  CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
+
+  // The "decision basic block" gets the number associated with the
+  // setjmp call returning to switch on and the value returned by
+  // longjmp.
+  BasicBlock* DecisionBB =
+        BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
+
+  BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
+
+  // Fill in the "decision" basic block.
+  CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
+  CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
+                                     DecisionBB);
+
+  SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
+  return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
+}
+
+// TransformSetJmpCall - The setjmp call is a bit trickier to transform.
+// We're going to convert all setjmp calls to nops. Then all "call" and
+// "invoke" instructions in the function are converted to "invoke" where
+// the "except" branch is used when returning from a longjmp call.
+void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
+{
+  BasicBlock* ABlock = Inst->getParent();
+  Function* Func = ABlock->getParent();
+
+  // Add this setjmp to the setjmp map.
+  const Type* SBPTy =
+          Type::getInt8PtrTy(Inst->getContext());
+  CastInst* BufPtr = 
+    new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
+  Value *Args[] = {
+    GetSetJmpMap(Func), BufPtr,
+    ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
+  };
+  CallInst::Create(AddSJToMap, Args, "", Inst);
+
+  // We are guaranteed that there are no values live across basic blocks
+  // (because we are "not in SSA form" yet), but there can still be values live
+  // in basic blocks.  Because of this, splitting the setjmp block can cause
+  // values above the setjmp to not dominate uses which are after the setjmp
+  // call.  For all of these occasions, we must spill the value to the stack.
+  //
+  std::set<Instruction*> InstrsAfterCall;
+
+  // The call is probably very close to the end of the basic block, for the
+  // common usage pattern of: 'if (setjmp(...))', so keep track of the
+  // instructions after the call.
+  for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end();
+       I != E; ++I)
+    InstrsAfterCall.insert(I);
+
+  for (BasicBlock::iterator II = ABlock->begin();
+       II != BasicBlock::iterator(Inst); ++II)
+    // Loop over all of the uses of instruction.  If any of them are after the
+    // call, "spill" the value to the stack.
+    for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+         UI != E; ++UI) {
+      User *U = *UI;
+      if (cast<Instruction>(U)->getParent() != ABlock ||
+          InstrsAfterCall.count(cast<Instruction>(U))) {
+        DemoteRegToStack(*II);
+        break;
+      }
+    }
+  InstrsAfterCall.clear();
+
+  // Change the setjmp call into a branch statement. We'll remove the
+  // setjmp call in a little bit. No worries.
+  BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst);
+  assert(SetJmpContBlock && "Couldn't split setjmp BB!!");
+
+  SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont");
+
+  // Add the SetJmpContBlock to the set of blocks reachable from a setjmp.
+  DFSBlocks.insert(SetJmpContBlock);
+
+  // This PHI node will be in the new block created from the
+  // splitBasicBlock call.
+  PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2,
+                                 "SetJmpReturn", Inst);
+
+  // Coming from a call to setjmp, the return is 0.
+  PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
+                   ABlock);
+
+  // Add the case for this setjmp's number...
+  SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
+  SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
+                                      SetJmpIDMap[Func] - 1),
+                     SetJmpContBlock);
+
+  // Value coming from the handling of the exception.
+  PHI->addIncoming(SVP.second, SVP.second->getParent());
+
+  // Replace all uses of this instruction with the PHI node created by
+  // the eradication of setjmp.
+  Inst->replaceAllUsesWith(PHI);
+  Inst->eraseFromParent();
+
+  ++SetJmpsTransformed;
+}
+
+// visitCallInst - This converts all LLVM call instructions into invoke
+// instructions. The except part of the invoke goes to the "LongJmpBlkPre"
+// that grabs the exception and proceeds to determine if it's a longjmp
+// exception or not.
+void LowerSetJmp::visitCallInst(CallInst& CI)
+{
+  if (CI.getCalledFunction())
+    if (!IsTransformableFunction(CI.getCalledFunction()->getName()) ||
+        CI.getCalledFunction()->isIntrinsic()) return;
+
+  BasicBlock* OldBB = CI.getParent();
+
+  // If not reachable from a setjmp call, don't transform.
+  if (!DFSBlocks.count(OldBB)) return;
+
+  BasicBlock* NewBB = OldBB->splitBasicBlock(CI);
+  assert(NewBB && "Couldn't split BB of \"call\" instruction!!");
+  DFSBlocks.insert(NewBB);
+  NewBB->setName("Call2Invoke");
+
+  Function* Func = OldBB->getParent();
+
+  // Construct the new "invoke" instruction.
+  TerminatorInst* Term = OldBB->getTerminator();
+  CallSite CS(&CI);
+  std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
+  InvokeInst* II =
+    InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
+                       Params, CI.getName(), Term);
+  II->setCallingConv(CI.getCallingConv());
+  II->setAttributes(CI.getAttributes());
+
+  // Replace the old call inst with the invoke inst and remove the call.
+  CI.replaceAllUsesWith(II);
+  CI.eraseFromParent();
+
+  // The old terminator is useless now that we have the invoke inst.
+  Term->eraseFromParent();
+  ++CallsTransformed;
+}
+
+// visitInvokeInst - Converting the "invoke" instruction is fairly
+// straight-forward. The old exception part is replaced by a query asking
+// if this is a longjmp exception. If it is, then it goes to the longjmp
+// exception blocks. Otherwise, control is passed the old exception.
+void LowerSetJmp::visitInvokeInst(InvokeInst& II)
+{
+  if (II.getCalledFunction())
+    if (!IsTransformableFunction(II.getCalledFunction()->getName()) ||
+        II.getCalledFunction()->isIntrinsic()) return;
+
+  BasicBlock* BB = II.getParent();
+
+  // If not reachable from a setjmp call, don't transform.
+  if (!DFSBlocks.count(BB)) return;
+
+  BasicBlock* ExceptBB = II.getUnwindDest();
+
+  Function* Func = BB->getParent();
+  BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), 
+                                               "InvokeExcept", Func);
+
+  // If this is a longjmp exception, then branch to the preliminary BB of
+  // the longjmp exception handling. Otherwise, go to the old exception.
+  CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
+                                          NewExceptBB);
+
+  BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
+
+  II.setUnwindDest(NewExceptBB);
+  ++InvokesTransformed;
+}
+
+// visitReturnInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitReturnInst(ReturnInst &RI) {
+  Function* Func = RI.getParent()->getParent();
+  CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI);
+}
+
+// visitUnwindInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitUnwindInst(UnwindInst &UI) {
+  Function* Func = UI.getParent()->getParent();
+  CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI);
+}
+
+ModulePass *llvm::createLowerSetJmpPass() {
+  return new LowerSetJmp();
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 000000000000..7796d05b7bc6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,868 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// A hash is computed from the function, based on its type and number of
+// basic blocks.
+//
+// Once all hashes are computed, we perform an expensive equality comparison
+// on each function pair. This takes n^2/2 comparisons per bucket, so it's
+// important that the hash function be high quality. The equality comparison
+// iterates through each instruction in each basic block.
+//
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such functions.
+//
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
+//
+// * be smarter about bitcasts.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to look through bitcasts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mergefunc"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include <vector>
+using namespace llvm;
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+/// Creates a hash-code for the function which is the same for any two
+/// functions that will compare equal, without looking at the instructions
+/// inside the function.
+static unsigned profileFunction(const Function *F) {
+  const FunctionType *FTy = F->getFunctionType();
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(F->size());
+  ID.AddInteger(F->getCallingConv());
+  ID.AddBoolean(F->hasGC());
+  ID.AddBoolean(FTy->isVarArg());
+  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+  return ID.ComputeHash();
+}
+
+namespace {
+
+/// ComparableFunction - A struct that pairs together functions with a
+/// TargetData so that we can keep them together as elements in the DenseSet.
+class ComparableFunction {
+public:
+  static const ComparableFunction EmptyKey;
+  static const ComparableFunction TombstoneKey;
+  static TargetData * const LookupOnly;
+
+  ComparableFunction(Function *Func, TargetData *TD)
+    : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+
+  Function *getFunc() const { return Func; }
+  unsigned getHash() const { return Hash; }
+  TargetData *getTD() const { return TD; }
+
+  // Drops AssertingVH reference to the function. Outside of debug mode, this
+  // does nothing.
+  void release() {
+    assert(Func &&
+           "Attempted to release function twice, or release empty/tombstone!");
+    Func = NULL;
+  }
+
+private:
+  explicit ComparableFunction(unsigned Hash)
+    : Func(NULL), Hash(Hash), TD(NULL) {}
+
+  AssertingVH<Function> Func;
+  unsigned Hash;
+  TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+    ComparableFunction(1);
+TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1);
+
+}
+
+namespace llvm {
+  template <>
+  struct DenseMapInfo<ComparableFunction> {
+    static ComparableFunction getEmptyKey() {
+      return ComparableFunction::EmptyKey;
+    }
+    static ComparableFunction getTombstoneKey() {
+      return ComparableFunction::TombstoneKey;
+    }
+    static unsigned getHashValue(const ComparableFunction &CF) {
+      return CF.getHash();
+    }
+    static bool isEqual(const ComparableFunction &LHS,
+                        const ComparableFunction &RHS);
+  };
+}
+
+namespace {
+
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+  FunctionComparator(const TargetData *TD, const Function *F1,
+                     const Function *F2)
+    : F1(F1), F2(F2), TD(TD) {}
+
+  /// Test whether the two functions have equivalent behaviour.
+  bool compare();
+
+private:
+  /// Test whether two basic blocks have equivalent behaviour.
+  bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+  /// Assign or look up previously assigned numbers for the two values, and
+  /// return whether the numbers are equal. Numbers are assigned in the order
+  /// visited.
+  bool enumerate(const Value *V1, const Value *V2);
+
+  /// Compare two Instructions for equivalence, similar to
+  /// Instruction::isSameOperationAs but with modifications to the type
+  /// comparison.
+  bool isEquivalentOperation(const Instruction *I1,
+                             const Instruction *I2) const;
+
+  /// Compare two GEPs for equivalent pointer arithmetic.
+  bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+  bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+                       const GetElementPtrInst *GEP2) {
+    return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+  }
+
+  /// Compare two Types, treating all pointer types as equal.
+  bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
+
+  // The two functions undergoing comparison.
+  const Function *F1, *F2;
+
+  const TargetData *TD;
+
+  DenseMap<const Value *, const Value *> id_map;
+  DenseSet<const Value *> seen_values;
+};
+
+}
+
+// Any two pointers in the same address space are equivalent, intptr_t and
+// pointers are equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+                                          const Type *Ty2) const {
+  if (Ty1 == Ty2)
+    return true;
+  if (Ty1->getTypeID() != Ty2->getTypeID()) {
+    if (TD) {
+      LLVMContext &Ctx = Ty1->getContext();
+      if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
+      if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+    }
+    return false;
+  }
+
+  switch (Ty1->getTypeID()) {
+  default:
+    llvm_unreachable("Unknown type!");
+    // Fall through in Release mode.
+  case Type::IntegerTyID:
+  case Type::VectorTyID:
+    // Ty1 == Ty2 would have returned true earlier.
+    return false;
+
+  case Type::VoidTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+  case Type::MetadataTyID:
+    return true;
+
+  case Type::PointerTyID: {
+    const PointerType *PTy1 = cast<PointerType>(Ty1);
+    const PointerType *PTy2 = cast<PointerType>(Ty2);
+    return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+  }
+
+  case Type::StructTyID: {
+    const StructType *STy1 = cast<StructType>(Ty1);
+    const StructType *STy2 = cast<StructType>(Ty2);
+    if (STy1->getNumElements() != STy2->getNumElements())
+      return false;
+
+    if (STy1->isPacked() != STy2->isPacked())
+      return false;
+
+    for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+      if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::FunctionTyID: {
+    const FunctionType *FTy1 = cast<FunctionType>(Ty1);
+    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+    if (FTy1->getNumParams() != FTy2->getNumParams() ||
+        FTy1->isVarArg() != FTy2->isVarArg())
+      return false;
+
+    if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+      return false;
+
+    for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+      if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::ArrayTyID: {
+    const ArrayType *ATy1 = cast<ArrayType>(Ty1);
+    const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+    return ATy1->getNumElements() == ATy2->getNumElements() &&
+           isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+  }
+  }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+                                               const Instruction *I2) const {
+  // Differences from Instruction::isSameOperationAs:
+  //  * replace type comparison with calls to isEquivalentType.
+  //  * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+  //  * because of the above, we don't test for the tail bit on calls later on
+  if (I1->getOpcode() != I2->getOpcode() ||
+      I1->getNumOperands() != I2->getNumOperands() ||
+      !isEquivalentType(I1->getType(), I2->getType()) ||
+      !I1->hasSameSubclassOptionalData(I2))
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+    if (!isEquivalentType(I1->getOperand(i)->getType(),
+                          I2->getOperand(i)->getType()))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+    return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+    return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+    return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(I1))
+    return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+    return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+           CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
+    if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
+    if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+
+  return true;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+                                         const GEPOperator *GEP2) {
+  // When we have target data, we can reduce the GEP down to the value in bytes
+  // added to the address.
+  if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
+    SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+    SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
+    uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
+                                            Indices1.data(), Indices1.size());
+    uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
+                                            Indices2.data(), Indices2.size());
+    return Offset1 == Offset2;
+  }
+
+  if (GEP1->getPointerOperand()->getType() !=
+      GEP2->getPointerOperand()->getType())
+    return false;
+
+  if (GEP1->getNumOperands() != GEP2->getNumOperands())
+    return false;
+
+  for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+    if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+      return false;
+  }
+
+  return true;
+}
+
+// Compare two values used by the two functions under pair-wise comparison. If
+// this is the first time the values are seen, they're added to the mapping so
+// that we will detect mismatches on next use.
+bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
+  // Check for function @f1 referring to itself and function @f2 referring to
+  // itself, or referring to each other, or both referring to either of them.
+  // They're all equivalent if the two functions are otherwise equivalent.
+  if (V1 == F1 && V2 == F2)
+    return true;
+  if (V1 == F2 && V2 == F1)
+    return true;
+
+  if (const Constant *C1 = dyn_cast<Constant>(V1)) {
+    if (V1 == V2) return true;
+    const Constant *C2 = dyn_cast<Constant>(V2);
+    if (!C2) return false;
+    // TODO: constant expressions with GEP or references to F1 or F2.
+    if (C1->isNullValue() && C2->isNullValue() &&
+	isEquivalentType(C1->getType(), C2->getType()))
+      return true;
+    // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
+    // then they must have equal bit patterns.
+    return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
+      C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
+  }
+
+  if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
+    return V1 == V2;
+
+  // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
+  // check whether it's equal to V2. When there is no mapping then we need to
+  // ensure that V2 isn't already equivalent to something else. For this
+  // purpose, we track the V2 values in a set.
+
+  const Value *&map_elem = id_map[V1];
+  if (map_elem)
+    return map_elem == V2;
+  if (!seen_values.insert(V2).second)
+    return false;
+  map_elem = V2;
+  return true;
+}
+
+// Test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+  BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+  BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
+
+  do {
+    if (!enumerate(F1I, F2I))
+      return false;
+
+    if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+      const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+      if (!GEP2)
+        return false;
+
+      if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+        return false;
+
+      if (!isEquivalentGEP(GEP1, GEP2))
+        return false;
+    } else {
+      if (!isEquivalentOperation(F1I, F2I))
+        return false;
+
+      assert(F1I->getNumOperands() == F2I->getNumOperands());
+      for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+        Value *OpF1 = F1I->getOperand(i);
+        Value *OpF2 = F2I->getOperand(i);
+
+        if (!enumerate(OpF1, OpF2))
+          return false;
+
+        if (OpF1->getValueID() != OpF2->getValueID() ||
+            !isEquivalentType(OpF1->getType(), OpF2->getType()))
+          return false;
+      }
+    }
+
+    ++F1I, ++F2I;
+  } while (F1I != F1E && F2I != F2E);
+
+  return F1I == F1E && F2I == F2E;
+}
+
+// Test whether the two functions have equivalent behaviour.
+bool FunctionComparator::compare() {
+  // We need to recheck everything, but check the things that weren't included
+  // in the hash first.
+
+  if (F1->getAttributes() != F2->getAttributes())
+    return false;
+
+  if (F1->hasGC() != F2->hasGC())
+    return false;
+
+  if (F1->hasGC() && F1->getGC() != F2->getGC())
+    return false;
+
+  if (F1->hasSection() != F2->hasSection())
+    return false;
+
+  if (F1->hasSection() && F1->getSection() != F2->getSection())
+    return false;
+
+  if (F1->isVarArg() != F2->isVarArg())
+    return false;
+
+  // TODO: if it's internal and only used in direct calls, we could handle this
+  // case too.
+  if (F1->getCallingConv() != F2->getCallingConv())
+    return false;
+
+  if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
+    return false;
+
+  assert(F1->arg_size() == F2->arg_size() &&
+         "Identically typed functions have different numbers of args!");
+
+  // Visit the arguments so that they get enumerated in the order they're
+  // passed in.
+  for (Function::const_arg_iterator f1i = F1->arg_begin(),
+         f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+    if (!enumerate(f1i, f2i))
+      llvm_unreachable("Arguments repeat!");
+  }
+
+  // We do a CFG-ordered walk since the actual ordering of the blocks in the
+  // linked list is immaterial. Our walk starts at the entry block for both
+  // functions, then takes each block from each terminator in order. As an
+  // artifact, this also means that unreachable blocks are ignored.
+  SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+
+  F1BBs.push_back(&F1->getEntryBlock());
+  F2BBs.push_back(&F2->getEntryBlock());
+
+  VisitedBBs.insert(F1BBs[0]);
+  while (!F1BBs.empty()) {
+    const BasicBlock *F1BB = F1BBs.pop_back_val();
+    const BasicBlock *F2BB = F2BBs.pop_back_val();
+
+    if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
+      return false;
+
+    const TerminatorInst *F1TI = F1BB->getTerminator();
+    const TerminatorInst *F2TI = F2BB->getTerminator();
+
+    assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+    for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+        continue;
+
+      F1BBs.push_back(F1TI->getSuccessor(i));
+      F2BBs.push_back(F2TI->getSuccessor(i));
+    }
+  }
+  return true;
+}
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+  static char ID;
+  MergeFunctions()
+    : ModulePass(ID), HasGlobalAliases(false) {
+    initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M);
+
+private:
+  typedef DenseSet<ComparableFunction> FnSetType;
+
+  /// A work queue of functions that may have been modified and should be
+  /// analyzed again.
+  std::vector<WeakVH> Deferred;
+
+  /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+  /// equal to one that's already present.
+  bool insert(ComparableFunction &NewF);
+
+  /// Remove a Function from the FnSet and queue it up for a second sweep of
+  /// analysis.
+  void remove(Function *F);
+
+  /// Find the functions that use this Value and remove them from FnSet and
+  /// queue the functions.
+  void removeUsers(Value *V);
+
+  /// Replace all direct calls of Old with calls of New. Will bitcast New if
+  /// necessary to make types match.
+  void replaceDirectCallers(Function *Old, Function *New);
+
+  /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+  /// be converted into a thunk. In either case, it should never be visited
+  /// again.
+  void mergeTwoFunctions(Function *F, Function *G);
+
+  /// Replace G with a thunk or an alias to F. Deletes G.
+  void writeThunkOrAlias(Function *F, Function *G);
+
+  /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+  /// of G with bitcast(F). Deletes G.
+  void writeThunk(Function *F, Function *G);
+
+  /// Replace G with an alias to F. Deletes G.
+  void writeAlias(Function *F, Function *G);
+
+  /// The set of all distinct functions. Use the insert() and remove() methods
+  /// to modify it.
+  FnSetType FnSet;
+
+  /// TargetData for more accurate GEP comparisons. May be NULL.
+  TargetData *TD;
+
+  /// Whether or not the target supports global aliases.
+  bool HasGlobalAliases;
+};
+
+}  // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+  return new MergeFunctions();
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+  bool Changed = false;
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+      Deferred.push_back(WeakVH(I));
+  }
+  FnSet.resize(Deferred.size());
+
+  do {
+    std::vector<WeakVH> Worklist;
+    Deferred.swap(Worklist);
+
+    DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+    DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+    // Insert only strong functions and merge them. Strong function merging
+    // always deletes one of them.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          !F->mayBeOverridden()) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        Changed |= insert(CF);
+      }
+    }
+
+    // Insert only weak functions and merge them. By doing these second we
+    // create thunks to the strong function when possible. When two weak
+    // functions are identical, we create a new strong function with two weak
+    // weak thunks to it which are identical but not mergable.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          F->mayBeOverridden()) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        Changed |= insert(CF);
+      }
+    }
+    DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+  } while (!Deferred.empty());
+
+  FnSet.clear();
+
+  return Changed;
+}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+                                               const ComparableFunction &RHS) {
+  if (LHS.getFunc() == RHS.getFunc() &&
+      LHS.getHash() == RHS.getHash())
+    return true;
+  if (!LHS.getFunc() || !RHS.getFunc())
+    return false;
+
+  // One of these is a special "underlying pointer comparison only" object.
+  if (LHS.getTD() == ComparableFunction::LookupOnly ||
+      RHS.getTD() == ComparableFunction::LookupOnly)
+    return false;
+
+  assert(LHS.getTD() == RHS.getTD() &&
+         "Comparing functions for different targets");
+
+  return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+                            RHS.getFunc()).compare();
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+  Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+       UI != UE;) {
+    Value::use_iterator TheIter = UI;
+    ++UI;
+    CallSite CS(*TheIter);
+    if (CS && CS.isCallee(TheIter)) {
+      remove(CS.getInstruction()->getParent()->getParent());
+      TheIter.getUse().set(BitcastNew);
+    }
+  }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+  if (HasGlobalAliases && G->hasUnnamedAddr()) {
+    if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+        G->hasWeakLinkage()) {
+      writeAlias(F, G);
+      return;
+    }
+  }
+
+  writeThunk(F, G);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
+  if (!G->mayBeOverridden()) {
+    // Redirect direct callers of G to F.
+    replaceDirectCallers(G, F);
+  }
+
+  // If G was internal then we may have replaced all uses of G with F. If so,
+  // stop here and delete G. There's no need for a thunk.
+  if (G->hasLocalLinkage() && G->use_empty()) {
+    G->eraseFromParent();
+    return;
+  }
+
+  Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+                                    G->getParent());
+  BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+  IRBuilder<false> Builder(BB);
+
+  SmallVector<Value *, 16> Args;
+  unsigned i = 0;
+  const FunctionType *FFTy = F->getFunctionType();
+  for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+       AI != AE; ++AI) {
+    Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+    ++i;
+  }
+
+  CallInst *CI = Builder.CreateCall(F, Args);
+  CI->setTailCall();
+  CI->setCallingConv(F->getCallingConv());
+  if (NewG->getReturnType()->isVoidTy()) {
+    Builder.CreateRetVoid();
+  } else {
+    Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+  }
+
+  NewG->copyAttributesFrom(G);
+  NewG->takeName(G);
+  removeUsers(G);
+  G->replaceAllUsesWith(NewG);
+  G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+  ++NumThunksWritten;
+}
+
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+  Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+  GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+                                    BitcastF, G->getParent());
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  removeUsers(G);
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+  ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+  if (F->mayBeOverridden()) {
+    assert(G->mayBeOverridden());
+
+    if (HasGlobalAliases) {
+      // Make them both thunks to the same internal function.
+      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                     F->getParent());
+      H->copyAttributesFrom(F);
+      H->takeName(F);
+      removeUsers(F);
+      F->replaceAllUsesWith(H);
+
+      unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+
+      writeAlias(F, G);
+      writeAlias(F, H);
+
+      F->setAlignment(MaxAlignment);
+      F->setLinkage(GlobalValue::PrivateLinkage);
+    } else {
+      // We can't merge them. Instead, pick one and update all direct callers
+      // to call it and hope that we improve the instruction cache hit rate.
+      replaceDirectCallers(G, F);
+    }
+
+    ++NumDoubleWeak;
+  } else {
+    writeThunkOrAlias(F, G);
+  }
+
+  ++NumFunctionsMerged;
+}
+
+// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(ComparableFunction &NewF) {
+  std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+  if (Result.second) {
+    DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+    return false;
+  }
+
+  const ComparableFunction &OldF = *Result.first;
+
+  // Never thunk a strong function to a weak function.
+  assert(!OldF.getFunc()->mayBeOverridden() ||
+         NewF.getFunc()->mayBeOverridden());
+
+  DEBUG(dbgs() << "  " << OldF.getFunc()->getName() << " == "
+               << NewF.getFunc()->getName() << '\n');
+
+  Function *DeleteF = NewF.getFunc();
+  NewF.release();
+  mergeTwoFunctions(OldF.getFunc(), DeleteF);
+  return true;
+}
+
+// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
+// so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+  // We need to make sure we remove F, not a function "equal" to F per the
+  // function equality comparator.
+  //
+  // The special "lookup only" ComparableFunction bypasses the expensive
+  // function comparison in favour of a pointer comparison on the underlying
+  // Function*'s.
+  ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
+  if (FnSet.erase(CF)) {
+    DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+    Deferred.push_back(F);
+  }
+}
+
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+  std::vector<Value *> Worklist;
+  Worklist.push_back(V);
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+
+    for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      Use &U = UI.getUse();
+      if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+        remove(I->getParent()->getParent());
+      } else if (isa<GlobalValue>(U.getUser())) {
+        // do nothing
+      } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+        for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
+             CUI != CUE; ++CUI)
+          Worklist.push_back(*CUI);
+      }
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 000000000000..d9d1d106111e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,182 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+using namespace llvm;
+
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
+namespace {
+  struct PartialInliner : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    static char ID; // Pass identification, replacement for typeid
+    PartialInliner() : ModulePass(ID) {
+      initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    
+    bool runOnModule(Module& M);
+    
+  private:
+    Function* unswitchFunction(Function* F);
+  };
+}
+
+char PartialInliner::ID = 0;
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+                "Partial Inliner", false, false)
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+  // First, verify that this function is an unswitching candidate...
+  BasicBlock* entryBlock = F->begin();
+  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+  if (!BR || BR->isUnconditional())
+    return 0;
+  
+  BasicBlock* returnBlock = 0;
+  BasicBlock* nonReturnBlock = 0;
+  unsigned returnCount = 0;
+  for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+       SI != SE; ++SI)
+    if (isa<ReturnInst>((*SI)->getTerminator())) {
+      returnBlock = *SI;
+      returnCount++;
+    } else
+      nonReturnBlock = *SI;
+  
+  if (returnCount != 1)
+    return 0;
+  
+  // Clone the function, so that we can hack away on it.
+  ValueToValueMapTy VMap;
+  Function* duplicateFunction = CloneFunction(F, VMap,
+                                              /*ModuleLevelChanges=*/false);
+  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+  F->getParent()->getFunctionList().push_back(duplicateFunction);
+  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
+  
+  // Go ahead and update all uses to the duplicate, so that we can just
+  // use the inliner functionality when we're done hacking.
+  F->replaceAllUsesWith(duplicateFunction);
+  
+  // Special hackery is needed with PHI nodes that have inputs from more than
+  // one extracted block.  For simplicity, just split the PHIs into a two-level
+  // sequence of PHIs, some of which will go in the extracted region, and some
+  // of which will go outside.
+  BasicBlock* preReturn = newReturnBlock;
+  newReturnBlock = newReturnBlock->splitBasicBlock(
+                                              newReturnBlock->getFirstNonPHI());
+  BasicBlock::iterator I = preReturn->begin();
+  BasicBlock::iterator Ins = newReturnBlock->begin();
+  while (I != preReturn->end()) {
+    PHINode* OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi) break;
+    
+    PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+    OldPhi->replaceAllUsesWith(retPhi);
+    Ins = newReturnBlock->getFirstNonPHI();
+    
+    retPhi->addIncoming(I, preReturn);
+    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+                        newEntryBlock);
+    OldPhi->removeIncomingValue(newEntryBlock);
+    
+    ++I;
+  }
+  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+  
+  // Gather up the blocks that we're going to extract.
+  std::vector<BasicBlock*> toExtract;
+  toExtract.push_back(newNonReturnBlock);
+  for (Function::iterator FI = duplicateFunction->begin(),
+       FE = duplicateFunction->end(); FI != FE; ++FI)
+    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+        &*FI != newNonReturnBlock)
+      toExtract.push_back(FI);
+      
+  // The CodeExtractor needs a dominator tree.
+  DominatorTree DT;
+  DT.runOnFunction(*duplicateFunction);
+  
+  // Extract the body of the if.
+  Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
+  
+  InlineFunctionInfo IFI;
+  
+  // Inline the top-level if test into all callers.
+  std::vector<User*> Users(duplicateFunction->use_begin(), 
+                           duplicateFunction->use_end());
+  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+       UI != UE; ++UI)
+    if (CallInst *CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI, IFI);
+    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II, IFI);
+  
+  // Ditch the duplicate, since we're done with it, and rewrite all remaining
+  // users (function pointers, etc.) back to the original function.
+  duplicateFunction->replaceAllUsesWith(F);
+  duplicateFunction->eraseFromParent();
+  
+  ++NumPartialInlined;
+  
+  return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+  std::vector<Function*> worklist;
+  worklist.reserve(M.size());
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+    if (!FI->use_empty() && !FI->isDeclaration())
+      worklist.push_back(&*FI);
+    
+  bool changed = false;
+  while (!worklist.empty()) {
+    Function* currFunc = worklist.back();
+    worklist.pop_back();
+  
+    if (currFunc->use_empty()) continue;
+    
+    bool recursive = false;
+    for (Function::use_iterator UI = currFunc->use_begin(),
+         UE = currFunc->use_end(); UI != UE; ++UI)
+      if (Instruction* I = dyn_cast<Instruction>(*UI))
+        if (I->getParent()->getParent() == currFunc) {
+          recursive = true;
+          break;
+        }
+    if (recursive) continue;
+          
+    
+    if (Function* newFunc = unswitchFunction(currFunc)) {
+      worklist.push_back(newFunc);
+      changed = true;
+    }
+    
+  }
+  
+  return changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 000000000000..b7e63dc4484c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,256 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "prune-eh"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+  struct PruneEH : public CallGraphSCCPass {
+    static char ID; // Pass identification, replacement for typeid
+    PruneEH() : CallGraphSCCPass(ID) {
+      initializePruneEHPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnSCC - Analyze the SCC, performing the transformation if possible.
+    bool runOnSCC(CallGraphSCC &SCC);
+
+    bool SimplifyFunction(Function *F);
+    void DeleteBasicBlock(BasicBlock *BB);
+  };
+}
+
+char PruneEH::ID = 0;
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+  SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+  CallGraph &CG = getAnalysis<CallGraph>();
+  bool MadeChange = false;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert(*I);
+
+  // First pass, scan all of the functions in the SCC, simplifying them
+  // according to what we know.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (Function *F = (*I)->getFunction())
+      MadeChange |= SimplifyFunction(F);
+
+  // Next, check to see if any callees might throw or if there are any external
+  // functions in this SCC: if so, we cannot prune any functions in this SCC.
+  // Definitions that are weak and not declared non-throwing might be 
+  // overridden at linktime with something that throws, so assume that.
+  // If this SCC includes the unwind instruction, we KNOW it throws, so
+  // obviously the SCC might throw.
+  //
+  bool SCCMightUnwind = false, SCCMightReturn = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
+       (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (F == 0) {
+      SCCMightUnwind = true;
+      SCCMightReturn = true;
+    } else if (F->isDeclaration() || F->mayBeOverridden()) {
+      SCCMightUnwind |= !F->doesNotThrow();
+      SCCMightReturn |= !F->doesNotReturn();
+    } else {
+      bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+      bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+
+      if (!CheckUnwind && !CheckReturn)
+        continue;
+
+      // Check to see if this function performs an unwind or calls an
+      // unwinding function.
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) {
+          // Uses unwind!
+          SCCMightUnwind = true;
+        } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
+          SCCMightReturn = true;
+        }
+
+        // Invoke instructions don't allow unwinding to continue, so we are
+        // only interested in call instructions.
+        if (CheckUnwind && !SCCMightUnwind)
+          for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+            if (CallInst *CI = dyn_cast<CallInst>(I)) {
+              if (CI->doesNotThrow()) {
+                // This call cannot throw.
+              } else if (Function *Callee = CI->getCalledFunction()) {
+                CallGraphNode *CalleeNode = CG[Callee];
+                // If the callee is outside our current SCC then we may
+                // throw because it might.
+                if (!SCCNodes.count(CalleeNode)) {
+                  SCCMightUnwind = true;
+                  break;
+                }
+              } else {
+                // Indirect call, it might throw.
+                SCCMightUnwind = true;
+                break;
+              }
+            }
+        if (SCCMightUnwind && SCCMightReturn) break;
+      }
+    }
+  }
+
+  // If the SCC doesn't unwind or doesn't throw, note this fact.
+  if (!SCCMightUnwind || !SCCMightReturn)
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      Attributes NewAttributes = Attribute::None;
+
+      if (!SCCMightUnwind)
+        NewAttributes |= Attribute::NoUnwind;
+      if (!SCCMightReturn)
+        NewAttributes |= Attribute::NoReturn;
+
+      Function *F = (*I)->getFunction();
+      const AttrListPtr &PAL = F->getAttributes();
+      const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
+      if (PAL != NPAL) {
+        MadeChange = true;
+        F->setAttributes(NPAL);
+      }
+    }
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    // Convert any invoke instructions to non-throwing functions in this node
+    // into call instructions with a branch.  This makes the exception blocks
+    // dead.
+    if (Function *F = (*I)->getFunction())
+      MadeChange |= SimplifyFunction(F);
+  }
+
+  return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+  bool MadeChange = false;
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+      if (II->doesNotThrow()) {
+        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+        // Insert a call instruction before the invoke.
+        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+        Call->takeName(II);
+        Call->setCallingConv(II->getCallingConv());
+        Call->setAttributes(II->getAttributes());
+        Call->setDebugLoc(II->getDebugLoc());
+
+        // Anything that used the value produced by the invoke instruction
+        // now uses the value produced by the call instruction.  Note that we
+        // do this even for void functions and calls with no uses so that the
+        // callgraph edge is updated.
+        II->replaceAllUsesWith(Call);
+        BasicBlock *UnwindBlock = II->getUnwindDest();
+        UnwindBlock->removePredecessor(II->getParent());
+
+        // Insert a branch to the normal destination right before the
+        // invoke.
+        BranchInst::Create(II->getNormalDest(), II);
+
+        // Finally, delete the invoke instruction!
+        BB->getInstList().pop_back();
+
+        // If the unwind block is now dead, nuke it.
+        if (pred_begin(UnwindBlock) == pred_end(UnwindBlock))
+          DeleteBasicBlock(UnwindBlock);  // Delete the new BB.
+
+        ++NumRemoved;
+        MadeChange = true;
+      }
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+      if (CallInst *CI = dyn_cast<CallInst>(I++))
+        if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+          // This call calls a function that cannot return.  Insert an
+          // unreachable instruction after it and simplify the code.  Do this
+          // by splitting the BB, adding the unreachable, then deleting the
+          // new BB.
+          BasicBlock *New = BB->splitBasicBlock(I);
+
+          // Remove the uncond branch and add an unreachable.
+          BB->getInstList().pop_back();
+          new UnreachableInst(BB->getContext(), BB);
+
+          DeleteBasicBlock(New);  // Delete the new BB.
+          MadeChange = true;
+          ++NumUnreach;
+          break;
+        }
+  }
+
+  return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+  assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
+  CallGraph &CG = getAnalysis<CallGraph>();
+
+  CallGraphNode *CGN = CG[BB->getParent()];
+  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+    --I;
+    if (CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (!isa<IntrinsicInst>(I))
+        CGN->removeCallEdgeFor(CI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+      CGN->removeCallEdgeFor(II);
+    if (!I->use_empty())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+  }
+
+  // Get the list of successors of this block.
+  std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    Succs[i]->removePredecessor(BB);
+
+  BB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 000000000000..b5f09ecccaf2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,73 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for 
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+namespace {
+
+/// @brief Pass to remove unused function declarations.
+class StripDeadPrototypesPass : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  StripDeadPrototypesPass() : ModulePass(ID) {
+    initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+  }
+  virtual bool runOnModule(Module &M);
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
+                "Strip Unused Function Prototypes", false, false)
+
+bool StripDeadPrototypesPass::runOnModule(Module &M) {
+  bool MadeChange = false;
+  
+  // Erase dead function prototypes.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function *F = I++;
+    // Function must be a prototype and unused.
+    if (F->isDeclaration() && F->use_empty()) {
+      F->eraseFromParent();
+      ++NumDeadPrototypes;
+      MadeChange = true;
+    }
+  }
+
+  // Erase dead global var prototypes.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ) {
+    GlobalVariable *GV = I++;
+    // Global must be a prototype and unused.
+    if (GV->isDeclaration() && GV->use_empty())
+      GV->eraseFromParent();
+  }
+  
+  // Return an indication of whether we changed anything or not.
+  return MadeChange;
+}
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+  return new StripDeadPrototypesPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 000000000000..0fbaff1509a7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,413 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+// 
+//   * names for virtual registers
+//   * symbols for internal globals and functions
+//   * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+  class StripSymbols : public ModulePass {
+    bool OnlyDebugInfo;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripSymbols(bool ODI = false) 
+      : ModulePass(ID), OnlyDebugInfo(ODI) {
+        initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripNonDebugSymbols : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripNonDebugSymbols()
+      : ModulePass(ID) {
+        initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripDebugDeclare : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripDebugDeclare()
+      : ModulePass(ID) {
+        initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripDeadDebugInfo : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripDeadDebugInfo()
+      : ModulePass(ID) {
+        initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char StripSymbols::ID = 0;
+INITIALIZE_PASS(StripSymbols, "strip",
+                "Strip all symbols from a module", false, false)
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+  return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+                "Strip all symbols, except dbg symbols, from a module",
+                false, false)
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+  return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+                "Strip all llvm.dbg.declare intrinsics", false, false)
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+  return new StripDebugDeclare();
+}
+
+char StripDeadDebugInfo::ID = 0;
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+                "Strip debug info for unused symbols", false, false)
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+  return new StripDeadDebugInfo();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+  for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+    User *U = *I;
+    if (U != Usr)
+      return false;
+  }
+  return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+  assert(C->use_empty() && "Constant is not dead!");
+  SmallPtrSet<Constant*, 4> Operands;
+  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+    if (OnlyUsedBy(C->getOperand(i), C)) 
+      Operands.insert(cast<Constant>(C->getOperand(i)));
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+    if (!GV->hasLocalLinkage()) return;   // Don't delete non static globals.
+    GV->eraseFromParent();
+  }
+  else if (!isa<Function>(C))
+    if (isa<CompositeType>(C->getType()))
+      C->destroyConstant();
+
+  // If the constant referenced anything, see if we can delete it as well.
+  for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
+         OE = Operands.end(); OI != OE; ++OI)
+    RemoveDeadConstant(*OI);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+  for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+    Value *V = VI->getValue();
+    ++VI;
+    if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+      if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+        // Set name to "", removing from symbol table!
+        V->setName("");
+    }
+  }
+}
+
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+  std::vector<StructType*> StructTypes;
+  M.findUsedStructTypes(StructTypes);
+
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *STy = StructTypes[i];
+    if (STy->isAnonymous() || STy->getName().empty()) continue;
+    
+    if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+      continue;
+
+    STy->setName("");
+  }
+}
+
+/// Find values that are marked as llvm.used.
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+  UsedValues.insert(LLVMUsed);
+  
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+  
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
+/// StripSymbolNames - Strip symbol names.
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+        I->setName("");     // Internal symbols can't participate in linkage
+  }
+  
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+        I->setName("");     // Internal symbols can't participate in linkage
+    StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+  }
+  
+  // Remove all names from types.
+  StripTypeNames(M, PreserveDbgInfo);
+
+  return true;
+}
+
+// StripDebugInfo - Strip debug info in the module if it exists.  
+// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and 
+// llvm.dbg.region.end calls, and any globals they point to if now dead.
+static bool StripDebugInfo(Module &M) {
+
+  bool Changed = false;
+
+  // Remove all of the calls to the debugger intrinsics, and remove them from
+  // the module.
+  if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
+    while (!Declare->use_empty()) {
+      CallInst *CI = cast<CallInst>(Declare->use_back());
+      CI->eraseFromParent();
+    }
+    Declare->eraseFromParent();
+    Changed = true;
+  }
+
+  if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+    while (!DbgVal->use_empty()) {
+      CallInst *CI = cast<CallInst>(DbgVal->use_back());
+      CI->eraseFromParent();
+    }
+    DbgVal->eraseFromParent();
+    Changed = true;
+  }
+
+  for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+         NME = M.named_metadata_end(); NMI != NME;) {
+    NamedMDNode *NMD = NMI;
+    ++NMI;
+    if (NMD->getName().startswith("llvm.dbg.")) {
+      NMD->eraseFromParent();
+      Changed = true;
+    }
+  }
+
+  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+         ++FI)
+      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+           ++BI) {
+        if (!BI->getDebugLoc().isUnknown()) {
+          Changed = true;
+          BI->setDebugLoc(DebugLoc());
+        }
+      }
+
+  return Changed;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+  bool Changed = false;
+  Changed |= StripDebugInfo(M);
+  if (!OnlyDebugInfo)
+    Changed |= StripSymbolNames(M, false);
+  return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+  return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+  Function *Declare = M.getFunction("llvm.dbg.declare");
+  std::vector<Constant*> DeadConstants;
+
+  if (Declare) {
+    while (!Declare->use_empty()) {
+      CallInst *CI = cast<CallInst>(Declare->use_back());
+      Value *Arg1 = CI->getArgOperand(0);
+      Value *Arg2 = CI->getArgOperand(1);
+      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+      CI->eraseFromParent();
+      if (Arg1->use_empty()) {
+        if (Constant *C = dyn_cast<Constant>(Arg1)) 
+          DeadConstants.push_back(C);
+        else 
+          RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+      }
+      if (Arg2->use_empty())
+        if (Constant *C = dyn_cast<Constant>(Arg2)) 
+          DeadConstants.push_back(C);
+    }
+    Declare->eraseFromParent();
+  }
+
+  while (!DeadConstants.empty()) {
+    Constant *C = DeadConstants.back();
+    DeadConstants.pop_back();
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+      if (GV->hasLocalLinkage())
+        RemoveDeadConstant(GV);
+    } else
+      RemoveDeadConstant(C);
+  }
+
+  return true;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm 
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+  char One = '\1';
+  if (LinkageName.startswith(StringRef(&One, 1)))
+    return LinkageName.substr(1);
+  return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Debugging infomration is encoded in llvm IR using metadata. This is designed
+  // such a way that debug info for symbols preserved even if symbols are
+  // optimized away by the optimizer. This special pass removes debug info for 
+  // such symbols.
+
+  // llvm.dbg.gv keeps track of debug info for global variables.
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+    SmallVector<MDNode *, 8> MDs;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+        MDs.push_back(NMD->getOperand(i));
+      else
+        Changed = true;
+    NMD->eraseFromParent();
+    NMD = NULL;
+
+    for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+           E = MDs.end(); I != E; ++I) {
+      GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+      if (GV && M.getGlobalVariable(GV->getName(), true)) {
+        if (!NMD)
+          NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+        NMD->addOperand(*I);
+      }
+      else
+        Changed = true;
+    }
+  }
+
+  // llvm.dbg.sp keeps track of debug info for subprograms.
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+    SmallVector<MDNode *, 8> MDs;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      if (DISubprogram(NMD->getOperand(i)).Verify())
+        MDs.push_back(NMD->getOperand(i));
+      else
+        Changed = true;
+    NMD->eraseFromParent();
+    NMD = NULL;
+
+    for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+           E = MDs.end(); I != E; ++I) {
+      bool FnIsLive = false;
+      if (Function *F = DISubprogram(*I).getFunction())
+        if (M.getFunction(F->getName()))
+          FnIsLive = true;
+      if (FnIsLive) {
+          if (!NMD)
+            NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+          NMD->addOperand(*I);
+      } else {
+        // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+        // to hold debug info for dead function's local variables.
+        StringRef FName = DISubprogram(*I).getLinkageName();
+        if (FName.empty())
+          FName = DISubprogram(*I).getName();
+        if (NamedMDNode *LVNMD = 
+            M.getNamedMetadata(Twine("llvm.dbg.lv.", 
+                                     getRealLinkageName(FName)))) 
+          LVNMD->eraseFromParent();
+      }
+    }
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
new file mode 100644
index 000000000000..8257d6b89ddc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -0,0 +1,367 @@
+//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_INSTCOMBINE_H
+#define INSTCOMBINE_INSTCOMBINE_H
+
+#include "InstCombineWorklist.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/TargetFolder.h"
+
+namespace llvm {
+  class CallSite;
+  class TargetData;
+  class DbgDeclareInst;
+  class MemIntrinsic;
+  class MemSetInst;
+  
+/// SelectPatternFlavor - We can match a variety of different patterns for
+/// select operations.
+enum SelectPatternFlavor {
+  SPF_UNKNOWN = 0,
+  SPF_SMIN, SPF_UMIN,
+  SPF_SMAX, SPF_UMAX
+  //SPF_ABS - TODO.
+};
+  
+/// getComplexity:  Assign a complexity or rank value to LLVM Values...
+///   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+static inline unsigned getComplexity(Value *V) {
+  if (isa<Instruction>(V)) {
+    if (BinaryOperator::isNeg(V) ||
+        BinaryOperator::isFNeg(V) ||
+        BinaryOperator::isNot(V))
+      return 3;
+    return 4;
+  }
+  if (isa<Argument>(V)) return 3;
+  return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+  
+/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+/// just like the normal insertion helper, but also adds any new instructions
+/// to the instcombine worklist.
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter 
+    : public IRBuilderDefaultInserter<true> {
+  InstCombineWorklist &Worklist;
+public:
+  InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+  
+  void InsertHelper(Instruction *I, const Twine &Name,
+                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+    IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+    Worklist.Add(I);
+  }
+};
+  
+/// InstCombiner - The -instcombine pass.
+class LLVM_LIBRARY_VISIBILITY InstCombiner
+                             : public FunctionPass,
+                               public InstVisitor<InstCombiner, Instruction*> {
+  TargetData *TD;
+  bool MadeIRChange;
+public:
+  /// Worklist - All of the instructions that need to be simplified.
+  InstCombineWorklist Worklist;
+
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
+  BuilderTy *Builder;
+      
+  static char ID; // Pass identification, replacement for typeid
+  InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+    initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+  }
+
+public:
+  virtual bool runOnFunction(Function &F);
+  
+  bool DoOneIteration(Function &F, unsigned ItNum);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+                                 
+  TargetData *getTargetData() const { return TD; }
+
+  // Visitation implementation - Implement instruction combining for different
+  // instruction types.  The semantics are as follows:
+  // Return Value:
+  //    null        - No change was made
+  //     I          - Change was made, I is still valid, I may be dead though
+  //   otherwise    - Change was made, replace I with returned instruction
+  //
+  Instruction *visitAdd(BinaryOperator &I);
+  Instruction *visitFAdd(BinaryOperator &I);
+  Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
+  Instruction *visitSub(BinaryOperator &I);
+  Instruction *visitFSub(BinaryOperator &I);
+  Instruction *visitMul(BinaryOperator &I);
+  Instruction *visitFMul(BinaryOperator &I);
+  Instruction *visitURem(BinaryOperator &I);
+  Instruction *visitSRem(BinaryOperator &I);
+  Instruction *visitFRem(BinaryOperator &I);
+  bool SimplifyDivRemOfSelect(BinaryOperator &I);
+  Instruction *commonRemTransforms(BinaryOperator &I);
+  Instruction *commonIRemTransforms(BinaryOperator &I);
+  Instruction *commonDivTransforms(BinaryOperator &I);
+  Instruction *commonIDivTransforms(BinaryOperator &I);
+  Instruction *visitUDiv(BinaryOperator &I);
+  Instruction *visitSDiv(BinaryOperator &I);
+  Instruction *visitFDiv(BinaryOperator &I);
+  Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+  Instruction *visitAnd(BinaryOperator &I);
+  Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+  Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+                                   Value *A, Value *B, Value *C);
+  Instruction *visitOr (BinaryOperator &I);
+  Instruction *visitXor(BinaryOperator &I);
+  Instruction *visitShl(BinaryOperator &I);
+  Instruction *visitAShr(BinaryOperator &I);
+  Instruction *visitLShr(BinaryOperator &I);
+  Instruction *commonShiftTransforms(BinaryOperator &I);
+  Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+                                    Constant *RHSC);
+  Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+                                            GlobalVariable *GV, CmpInst &ICI,
+                                            ConstantInt *AndCst = 0);
+  Instruction *visitFCmpInst(FCmpInst &I);
+  Instruction *visitICmpInst(ICmpInst &I);
+  Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+  Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                              Instruction *LHS,
+                                              ConstantInt *RHS);
+  Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                              ConstantInt *DivRHS);
+  Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+                              ConstantInt *DivRHS);
+  Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
+                                ICmpInst::Predicate Pred, Value *TheAdd);
+  Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+                           ICmpInst::Predicate Cond, Instruction &I);
+  Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+                                   BinaryOperator &I);
+  Instruction *commonCastTransforms(CastInst &CI);
+  Instruction *commonPointerCastTransforms(CastInst &CI);
+  Instruction *visitTrunc(TruncInst &CI);
+  Instruction *visitZExt(ZExtInst &CI);
+  Instruction *visitSExt(SExtInst &CI);
+  Instruction *visitFPTrunc(FPTruncInst &CI);
+  Instruction *visitFPExt(CastInst &CI);
+  Instruction *visitFPToUI(FPToUIInst &FI);
+  Instruction *visitFPToSI(FPToSIInst &FI);
+  Instruction *visitUIToFP(CastInst &CI);
+  Instruction *visitSIToFP(CastInst &CI);
+  Instruction *visitPtrToInt(PtrToIntInst &CI);
+  Instruction *visitIntToPtr(IntToPtrInst &CI);
+  Instruction *visitBitCast(BitCastInst &CI);
+  Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+                              Instruction *FI);
+  Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+  Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+                            Value *A, Value *B, Instruction &Outer,
+                            SelectPatternFlavor SPF2, Value *C);
+  Instruction *visitSelectInst(SelectInst &SI);
+  Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+  Instruction *visitCallInst(CallInst &CI);
+  Instruction *visitInvokeInst(InvokeInst &II);
+
+  Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
+  Instruction *visitPHINode(PHINode &PN);
+  Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+  Instruction *visitAllocaInst(AllocaInst &AI);
+  Instruction *visitMalloc(Instruction &FI);
+  Instruction *visitFree(CallInst &FI);
+  Instruction *visitLoadInst(LoadInst &LI);
+  Instruction *visitStoreInst(StoreInst &SI);
+  Instruction *visitBranchInst(BranchInst &BI);
+  Instruction *visitSwitchInst(SwitchInst &SI);
+  Instruction *visitInsertElementInst(InsertElementInst &IE);
+  Instruction *visitExtractElementInst(ExtractElementInst &EI);
+  Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+  Instruction *visitExtractValueInst(ExtractValueInst &EV);
+
+  // visitInstruction - Specify what to return for unhandled instructions...
+  Instruction *visitInstruction(Instruction &I) { return 0; }
+
+private:
+  bool ShouldChangeType(const Type *From, const Type *To) const;
+  Value *dyn_castNegVal(Value *V) const;
+  Value *dyn_castFNegVal(Value *V) const;
+  const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 
+                                  SmallVectorImpl<Value*> &NewIndices);
+  Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+                                 
+  /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+  /// results in any code being generated and is interesting to optimize out. If
+  /// the cast can be eliminated by some other simple transformation, we prefer
+  /// to do the simplification first.
+  bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+                          const Type *Ty);
+
+  Instruction *visitCallSite(CallSite CS);
+  Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
+  bool transformConstExprCastCall(CallSite CS);
+  Instruction *transformCallThroughTrampoline(CallSite CS);
+  Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                 bool DoXform = true);
+  Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
+  bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+  Value *EmitGEPOffset(User *GEP);
+
+public:
+  // InsertNewInstBefore - insert an instruction New before instruction Old
+  // in the program.  Add the new instruction to the worklist.
+  //
+  Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+    assert(New && New->getParent() == 0 &&
+           "New instruction already inserted into a basic block!");
+    BasicBlock *BB = Old.getParent();
+    BB->getInstList().insert(&Old, New);  // Insert inst
+    Worklist.Add(New);
+    return New;
+  }
+
+  // InsertNewInstWith - same as InsertNewInstBefore, but also sets the 
+  // debug loc.
+  //
+  Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
+    New->setDebugLoc(Old.getDebugLoc());
+    return InsertNewInstBefore(New, Old);
+  }
+
+  // ReplaceInstUsesWith - This method is to be used when an instruction is
+  // found to be dead, replacable with another preexisting expression.  Here
+  // we add all uses of I to the worklist, replace all uses of I with the new
+  // value, then return I, so that the inst combiner will know that I was
+  // modified.
+  //
+  Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+    Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
+    
+    // If we are replacing the instruction with itself, this must be in a
+    // segment of unreachable code, so just clobber the instruction.
+    if (&I == V) 
+      V = UndefValue::get(I.getType());
+
+    DEBUG(errs() << "IC: Replacing " << I << "\n"
+                    "    with " << *V << '\n');
+
+    I.replaceAllUsesWith(V);
+    return &I;
+  }
+
+  // EraseInstFromFunction - When dealing with an instruction that has side
+  // effects or produces a void value, we can't rely on DCE to delete the
+  // instruction.  Instead, visit methods should return the value returned by
+  // this function.
+  Instruction *EraseInstFromFunction(Instruction &I) {
+    DEBUG(errs() << "IC: ERASE " << I << '\n');
+
+    assert(I.use_empty() && "Cannot erase instruction that is used!");
+    // Make sure that we reprocess all operands now that we reduced their
+    // use counts.
+    if (I.getNumOperands() < 8) {
+      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(*i))
+          Worklist.Add(Op);
+    }
+    Worklist.Remove(&I);
+    I.eraseFromParent();
+    MadeIRChange = true;
+    return 0;  // Don't do anything with FI
+  }
+      
+  void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+                         APInt &KnownOne, unsigned Depth = 0) const {
+    return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  }
+  
+  bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+                         unsigned Depth = 0) const {
+    return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+  }
+  unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+    return llvm::ComputeNumSignBits(Op, TD, Depth);
+  }
+
+private:
+
+  /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+  /// operators which are associative or commutative.
+  bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+  /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+  /// which some other binary operation distributes over either by factorizing
+  /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+  /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+  /// a win).  Returns the simplified value, or null if it didn't simplify.
+  Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
+
+  /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
+  /// based on the demanded bits.
+  Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 
+                                 APInt& KnownZero, APInt& KnownOne,
+                                 unsigned Depth);
+  bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+                            APInt& KnownZero, APInt& KnownOne,
+                            unsigned Depth=0);
+      
+  /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+  /// SimplifyDemandedBits knows about.  See if the instruction has any
+  /// properties that allow us to simplify its operands.
+  bool SimplifyDemandedInstructionBits(Instruction &Inst);
+      
+  Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+                                    APInt& UndefElts, unsigned Depth = 0);
+    
+  // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+  // which has a PHI node as operand #0, see if we can fold the instruction
+  // into the PHI (which is only possible if all operands to the PHI are
+  // constants).
+  //
+  Instruction *FoldOpIntoPhi(Instruction &I);
+
+  // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+  // operator and they all are only used by the PHI, PHI together their
+  // inputs, and do the operation once, to the result of the PHI.
+  Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+
+  
+  Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+                        ConstantInt *AndRHS, BinaryOperator &TheAnd);
+  
+  Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+                            bool isSub, Instruction &I);
+  Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                         bool isSigned, bool Inside);
+  Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
+  Instruction *MatchBSwap(BinaryOperator &I);
+  bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+  Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+  Instruction *SimplifyMemSet(MemSetInst *MI);
+
+
+  Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
+};
+
+      
+  
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
new file mode 100644
index 000000000000..c36a9552e7a3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -0,0 +1,697 @@
+//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for add, fadd, sub, and fsub.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+
+// dyn_castFoldableMul - If this value is a multiply that can be folded into
+// other computations (because it has a constant operand), return the
+// non-constant operand of the multiply, and set CST to point to the multiplier.
+// Otherwise, return null.
+//
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+  if (!V->hasOneUse() || !V->getType()->isIntegerTy())
+    return 0;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return 0;
+  
+  if (I->getOpcode() == Instruction::Mul)
+    if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+      return I->getOperand(0);
+  if (I->getOpcode() == Instruction::Shl)
+    if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+      // The multiplier is really 1 << CST.
+      uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+      uint32_t CSTVal = CST->getLimitedValue(BitWidth);
+      CST = ConstantInt::get(V->getType()->getContext(),
+                             APInt(BitWidth, 1).shl(CSTVal));
+      return I->getOperand(0);
+    }
+  return 0;
+}
+
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+  // There are different heuristics we can use for this.  Here are some simple
+  // ones.
+  
+  // Add has the property that adding any two 2's complement numbers can only 
+  // have one carry bit which can change a sign.  As such, if LHS and RHS each
+  // have at least two sign bits, we know that the addition of the two values
+  // will sign extend fine.
+  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+    return true;
+  
+  
+  // If one of the operands only has one non-zero bit, and if the other operand
+  // has a known-zero bit in a more significant place than it (not including the
+  // sign bit) the ripple may go up to and fill the zero, but won't change the
+  // sign.  For example, (X & ~4) + 1.
+  
+  // TODO: Implement.
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A*B)+(A*C) -> A*(B+C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    // X + (signbit) --> X ^ signbit
+    const APInt &Val = CI->getValue();
+    if (Val.isSignBit())
+      return BinaryOperator::CreateXor(LHS, RHS);
+    
+    // See if SimplifyDemandedBits can simplify this.  This handles stuff like
+    // (X & 254)+1 -> (X&254)|1
+    if (SimplifyDemandedInstructionBits(I))
+      return &I;
+
+    // zext(bool) + C -> bool ? C + 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+      if (ZI->getSrcTy()->isIntegerTy(1))
+        return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
+    
+    Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+    if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
+      const APInt &RHSVal = CI->getValue();
+      unsigned ExtendAmt = 0;
+      // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+      // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+      if (XorRHS->getValue() == -RHSVal) {
+        if (RHSVal.isPowerOf2())
+          ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
+        else if (XorRHS->getValue().isPowerOf2())
+          ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
+      }
+      
+      if (ExtendAmt) {
+        APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
+        if (!MaskedValueIsZero(XorLHS, Mask))
+          ExtendAmt = 0;
+      }
+      
+      if (ExtendAmt) {
+        Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
+        Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+        return BinaryOperator::CreateAShr(NewShl, ShAmt);
+      }
+    }
+  }
+
+  if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
+
+  if (I.getType()->isIntegerTy(1))
+    return BinaryOperator::CreateXor(LHS, RHS);
+
+  // X + X --> X << 1
+  if (LHS == RHS) {
+    BinaryOperator *New =
+      BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+    New->setHasNoSignedWrap(I.hasNoSignedWrap());
+    New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+    return New;
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castNegVal(LHS)) {
+    if (Value *RHSV = dyn_castNegVal(RHS)) {
+      Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+      return BinaryOperator::CreateNeg(NewAdd);
+    }
+    
+    return BinaryOperator::CreateSub(RHS, LHSV);
+  }
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castNegVal(RHS))
+      return BinaryOperator::CreateSub(LHS, V);
+
+
+  ConstantInt *C2;
+  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+    if (X == RHS)   // X*C + X --> X * (C+1)
+      return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+    // X*C1 + X*C2 --> X * (C1+C2)
+    ConstantInt *C1;
+    if (X == dyn_castFoldableMul(RHS, C1))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+  }
+
+  // X + X*C --> X * (C+1)
+  if (dyn_castFoldableMul(RHS, C2) == LHS)
+    return BinaryOperator::CreateMul(LHS, AddOne(C2));
+
+  // A+B --> A|B iff A and B have no bits set in common.
+  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+    APInt LHSKnownOne(IT->getBitWidth(), 0);
+    APInt LHSKnownZero(IT->getBitWidth(), 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    if (LHSKnownZero != 0) {
+      APInt RHSKnownOne(IT->getBitWidth(), 0);
+      APInt RHSKnownZero(IT->getBitWidth(), 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      
+      // No bits in common -> bitwise or.
+      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+        return BinaryOperator::CreateOr(LHS, RHS);
+    }
+  }
+
+  // W*X + Y*Z --> W * (X+Z)  iff W == Y
+  {
+    Value *W, *X, *Y, *Z;
+    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+      if (W != Y) {
+        if (W == Z) {
+          std::swap(Y, Z);
+        } else if (Y == X) {
+          std::swap(W, X);
+        } else if (X == Z) {
+          std::swap(Y, Z);
+          std::swap(W, X);
+        }
+      }
+
+      if (W == Y) {
+        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
+        return BinaryOperator::CreateMul(W, NewAdd);
+      }
+    }
+  }
+
+  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+    Value *X = 0;
+    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
+      return BinaryOperator::CreateSub(SubOne(CRHS), X);
+
+    // (X & FF00) + xx00  -> (X+xx00) & FF00
+    if (LHS->hasOneUse() &&
+        match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+        CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+      // See if all bits from the first bit set in the Add RHS up are included
+      // in the mask.  First, get the rightmost bit.
+      const APInt &AddRHSV = CRHS->getValue();
+      
+      // Form a mask of all bits from the lowest bit added through the top.
+      APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+      // See if the and mask includes all of these bits.
+      APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+      if (AddRHSHighBits == AddRHSHighBitsAnd) {
+        // Okay, the xform is safe.  Insert the new add pronto.
+        Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+        return BinaryOperator::CreateAnd(NewAdd, C2);
+      }
+    }
+
+    // Try to fold constant add into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+  }
+
+  // add (select X 0 (sub n A)) A  -->  select X A n
+  {
+    SelectInst *SI = dyn_cast<SelectInst>(LHS);
+    Value *A = RHS;
+    if (!SI) {
+      SI = dyn_cast<SelectInst>(RHS);
+      A = LHS;
+    }
+    if (SI && SI->hasOneUse()) {
+      Value *TV = SI->getTrueValue();
+      Value *FV = SI->getFalseValue();
+      Value *N;
+
+      // Can we fold the add into the argument of the select?
+      // We check both true and false select arguments for a matching subtract.
+      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+        // Fold the add into the true select value.
+        return SelectInst::Create(SI->getCondition(), N, A);
+      
+      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+        // Fold the add into the false select value.
+        return SelectInst::Create(SI->getCondition(), A, N);
+    }
+  }
+
+  // Check for (add (sext x), y), see if we can merge this into an
+  // integer add followed by a sext.
+  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+    // (add (sext x), cst) --> (sext (add x, cst'))
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      Constant *CI = 
+        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new, smaller add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              CI, "addconv");
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add (sext x), (sext y)) --> (sext (add int x, y))
+    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of sexts), and if the
+      // integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                             RHSConv->getOperand(0), "addconv");
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+  }
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // X + 0 --> X
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+                              (I.getType())->getValueAPF()))
+        return ReplaceInstUsesWith(I, LHS);
+    }
+
+    if (isa<PHINode>(LHS))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castFNegVal(LHS))
+    return BinaryOperator::CreateFSub(RHS, LHSV);
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castFNegVal(RHS))
+      return BinaryOperator::CreateFSub(LHS, V);
+
+  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+      return ReplaceInstUsesWith(I, LHS);
+
+  // Check for (fadd double (sitofp x), y), see if we can merge this into an
+  // integer add followed by a promotion.
+  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+    // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+    // ... if the constant fits in the integer value.  This is useful for things
+    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+    // requires a constant pool load, and generally allows the add to be better
+    // instcombined.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+      Constant *CI = 
+      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+                                              CI, "addconv");
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of int->fp conversions),
+      // and if the integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              RHSConv->getOperand(0),"addconv");
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+  }
+  
+  return Changed ? &I : 0;
+}
+
+
+/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
+/// code necessary to compute the offset from the base pointer (without adding
+/// in the base pointer).  Return the result as a signed integer of intptr size.
+Value *InstCombiner::EmitGEPOffset(User *GEP) {
+  TargetData &TD = *getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+  Value *Result = Constant::getNullValue(IntPtrTy);
+
+  // If the GEP is inbounds, we know that none of the addressing operations will
+  // overflow in an unsigned sense.
+  bool isInBounds = cast<GEPOperator>(GEP)->isInBounds();
+  
+  // Build a mask for high order bits.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+       ++i, ++GTI) {
+    Value *Op = *i;
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+      if (OpC->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+        
+        if (Size)
+          Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
+                                      GEP->getName()+".offs");
+        continue;
+      }
+      
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *OC =
+              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
+      // Emit an add instruction.
+      Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
+      continue;
+    }
+    // Convert to correct type.
+    if (Op->getType() != IntPtrTy)
+      Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
+    if (Size != 1) {
+      // We'll let instcombine(mul) convert this to a shl if possible.
+      Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
+                              GEP->getName()+".idx", isInBounds /*NUW*/);
+    }
+
+    // Emit an add instruction.
+    Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
+  }
+  return Result;
+}
+
+
+
+
+/// Optimize pointer differences into the same array into a size.  Consider:
+///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+                                               const Type *Ty) {
+  assert(TD && "Must have target data info for this");
+  
+  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+  // this.
+  bool Swapped = false;
+  GetElementPtrInst *GEP = 0;
+  ConstantExpr *CstGEP = 0;
+  
+  // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
+  // For now we require one side to be the base pointer "A" or a constant
+  // expression derived from it.
+  if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
+    // (gep X, ...) - X
+    if (LHSGEP->getOperand(0) == RHS) {
+      GEP = LHSGEP;
+      Swapped = false;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
+      // (gep X, ...) - (ce_gep X, ...)
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          LHSGEP->getOperand(0) == CE->getOperand(0)) {
+        CstGEP = CE;
+        GEP = LHSGEP;
+        Swapped = false;
+      }
+    }
+  }
+  
+  if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
+    // X - (gep X, ...)
+    if (RHSGEP->getOperand(0) == LHS) {
+      GEP = RHSGEP;
+      Swapped = true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
+      // (ce_gep X, ...) - (gep X, ...)
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          RHSGEP->getOperand(0) == CE->getOperand(0)) {
+        CstGEP = CE;
+        GEP = RHSGEP;
+        Swapped = true;
+      }
+    }
+  }
+  
+  if (GEP == 0)
+    return 0;
+  
+  // Emit the offset of the GEP and an intptr_t.
+  Value *Result = EmitGEPOffset(GEP);
+  
+  // If we had a constant expression GEP on the other side offsetting the
+  // pointer, subtract it from the offset we have.
+  if (CstGEP) {
+    Value *CstOffset = EmitGEPOffset(CstGEP);
+    Result = Builder->CreateSub(Result, CstOffset);
+  }
+  
+
+  // If we have p - gep(p, ...)  then we have to negate the result.
+  if (Swapped)
+    Result = Builder->CreateNeg(Result, "diff.neg");
+
+  return Builder->CreateIntCast(Result, Ty, true);
+}
+
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A*B)-(A*C) -> A*(B-C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
+  if (Value *V = dyn_castNegVal(Op1)) {
+    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+    Res->setHasNoSignedWrap(I.hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+    return Res;
+  }
+
+  if (I.getType()->isIntegerTy(1))
+    return BinaryOperator::CreateXor(Op0, Op1);
+
+  // Replace (-1 - A) with (~A).
+  if (match(Op0, m_AllOnes()))
+    return BinaryOperator::CreateNot(Op1);
+  
+  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+    // C - ~X == X + (1+C)
+    Value *X = 0;
+    if (match(Op1, m_Not(m_Value(X))))
+      return BinaryOperator::CreateAdd(X, AddOne(C));
+
+    // -(X >>u 31) -> (X >>s 31)
+    // -(X >>s 31) -> (X >>u 31)
+    if (C->isZero()) {
+      Value *X; ConstantInt *CI;
+      if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+          // Verify we are shifting out everything but the sign bit.
+          CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+        return BinaryOperator::CreateAShr(X, CI);
+
+      if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+          // Verify we are shifting out everything but the sign bit.
+          CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+        return BinaryOperator::CreateLShr(X, CI);
+    }
+
+    // Try to fold constant sub into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    // C - zext(bool) -> bool ? C - 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
+      if (ZI->getSrcTy()->isIntegerTy(1))
+        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+
+    // C-(X+C2) --> (C-C2)-X
+    ConstantInt *C2;
+    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+      return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+  }
+
+  
+  { Value *Y;
+    // X-(X+Y) == -Y    X-(Y+X) == -Y
+    if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+        match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+      return BinaryOperator::CreateNeg(Y);
+    
+    // (X-Y)-X == -Y
+    if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+      return BinaryOperator::CreateNeg(Y);
+  }
+  
+  if (Op1->hasOneUse()) {
+    Value *X = 0, *Y = 0, *Z = 0;
+    Constant *C = 0;
+    ConstantInt *CI = 0;
+
+    // (X - (Y - Z))  -->  (X + (Z - Y)).
+    if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+      return BinaryOperator::CreateAdd(Op0,
+                                      Builder->CreateSub(Z, Y, Op1->getName()));
+
+    // (X - (X & Y))   -->   (X & ~Y)
+    //
+    if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+        match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+      return BinaryOperator::CreateAnd(Op0,
+                                  Builder->CreateNot(Y, Y->getName() + ".not"));
+    
+    // 0 - (X sdiv C)  -> (X sdiv -C)
+    if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
+        match(Op0, m_Zero()))
+      return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+    // 0 - (X << Y)  -> (-X << Y)   when X is freely negatable.
+    if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+      if (Value *XNeg = dyn_castNegVal(X))
+        return BinaryOperator::CreateShl(XNeg, Y);
+
+    // X - X*C --> X * (1-C)
+    if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+      Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
+      return BinaryOperator::CreateMul(Op0, CP1);
+    }
+
+    // X - X<<C --> X * (1-(1<<C))
+    if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+      Constant *One = ConstantInt::get(I.getType(), 1);
+      C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
+      return BinaryOperator::CreateMul(Op0, C);
+    }
+    
+    // X - A*-B -> X + A*B
+    // X - -A*B -> X + A*B
+    Value *A, *B;
+    if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+        match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+      return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+      
+    // X - A*CI -> X + A*-CI
+    // X - CI*A -> X + A*-CI
+    if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
+        match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+      Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+      return BinaryOperator::CreateAdd(Op0, NewMul);
+    }
+  }
+
+  ConstantInt *C1;
+  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+    if (X == Op1)  // X*C - X --> X * (C-1)
+      return BinaryOperator::CreateMul(Op1, SubOne(C1));
+
+    ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
+    if (X == dyn_castFoldableMul(Op1, C2))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
+  }
+  
+  // Optimize pointer differences into the same array into a size.  Consider:
+  //  &A[10] - &A[0]: we should compile this to "10".
+  if (TD) {
+    Value *LHSOp, *RHSOp;
+    if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+        match(Op1, m_PtrToInt(m_Value(RHSOp))))
+      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+        return ReplaceInstUsesWith(I, Res);
+    
+    // trunc(p)-trunc(q) -> trunc(p-q)
+    if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+        match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+        return ReplaceInstUsesWith(I, Res);
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // If this is a 'B = x-(-A)', change to B = x+A...
+  if (Value *V = dyn_castFNegVal(Op1))
+    return BinaryOperator::CreateFAdd(Op0, V);
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
new file mode 100644
index 000000000000..64ea36fb1e9d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -0,0 +1,2306 @@
+//===- InstCombineAndOrXor.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitAnd, visitOr, and visitXor functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to).  This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+  // ~(~(X)) -> X.
+  if (BinaryOperator::isNot(V))
+    return true;
+  
+  // Constants can be considered to be not'ed values.
+  if (isa<ConstantInt>(V))
+    return true;
+  
+  // Compares can be inverted if they have a single use.
+  if (CmpInst *CI = dyn_cast<CmpInst>(V))
+    return CI->hasOneUse();
+  
+  return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+  // If this is not(not(x)) don't return that this is a not: we want the two
+  // not's to be folded first.
+  if (BinaryOperator::isNot(V)) {
+    Value *Operand = BinaryOperator::getNotArgument(V);
+    if (!isFreeToInvert(Operand))
+      return Operand;
+  }
+  
+  // Constants can be considered to be not'ed values...
+  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return ConstantInt::get(C->getType(), ~C->getValue());
+  return 0;
+}
+
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+///      (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B) 
+///
+/// Three bits are used to represent the condition, as follows:
+///   0  A > B
+///   1  A == B
+///   2  A < B
+///
+/// <=>  Value  Definition
+/// 000     0   Always false
+/// 001     1   A >  B
+/// 010     2   A == B
+/// 011     3   A >= B
+/// 100     4   A <  B
+/// 101     5   A != B
+/// 110     6   A <= B
+/// 111     7   Always true
+///  
+static unsigned getICmpCode(const ICmpInst *ICI) {
+  switch (ICI->getPredicate()) {
+    // False -> 0
+  case ICmpInst::ICMP_UGT: return 1;  // 001
+  case ICmpInst::ICMP_SGT: return 1;  // 001
+  case ICmpInst::ICMP_EQ:  return 2;  // 010
+  case ICmpInst::ICMP_UGE: return 3;  // 011
+  case ICmpInst::ICMP_SGE: return 3;  // 011
+  case ICmpInst::ICMP_ULT: return 4;  // 100
+  case ICmpInst::ICMP_SLT: return 4;  // 100
+  case ICmpInst::ICMP_NE:  return 5;  // 101
+  case ICmpInst::ICMP_ULE: return 6;  // 110
+  case ICmpInst::ICMP_SLE: return 6;  // 110
+    // True -> 7
+  default:
+    llvm_unreachable("Invalid ICmp predicate!");
+    return 0;
+  }
+}
+
+/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
+/// predicate into a three bit mask. It also returns whether it is an ordered
+/// predicate by reference.
+static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
+  isOrdered = false;
+  switch (CC) {
+  case FCmpInst::FCMP_ORD: isOrdered = true; return 0;  // 000
+  case FCmpInst::FCMP_UNO:                   return 0;  // 000
+  case FCmpInst::FCMP_OGT: isOrdered = true; return 1;  // 001
+  case FCmpInst::FCMP_UGT:                   return 1;  // 001
+  case FCmpInst::FCMP_OEQ: isOrdered = true; return 2;  // 010
+  case FCmpInst::FCMP_UEQ:                   return 2;  // 010
+  case FCmpInst::FCMP_OGE: isOrdered = true; return 3;  // 011
+  case FCmpInst::FCMP_UGE:                   return 3;  // 011
+  case FCmpInst::FCMP_OLT: isOrdered = true; return 4;  // 100
+  case FCmpInst::FCMP_ULT:                   return 4;  // 100
+  case FCmpInst::FCMP_ONE: isOrdered = true; return 5;  // 101
+  case FCmpInst::FCMP_UNE:                   return 5;  // 101
+  case FCmpInst::FCMP_OLE: isOrdered = true; return 6;  // 110
+  case FCmpInst::FCMP_ULE:                   return 6;  // 110
+    // True -> 7
+  default:
+    // Not expecting FCMP_FALSE and FCMP_TRUE;
+    llvm_unreachable("Unexpected FCmp predicate!");
+    return 0;
+  }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or a brand 
+/// new ICmp instruction. The sign is passed in to determine which kind
+/// of predicate to use in the new icmp instruction.
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
+  switch (Code) {
+  default: assert(0 && "Illegal ICmp code!");
+  case 0: // False.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+  case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+  case 2: Pred = ICmpInst::ICMP_EQ; break;
+  case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+  case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+  case 5: Pred = ICmpInst::ICMP_NE; break;
+  case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+  case 7: // True.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+  }
+  return Builder->CreateICmp(Pred, LHS, RHS);
+}
+
+/// getFCmpValue - This is the complement of getFCmpCode, which turns an
+/// opcode and two operands into either a FCmp instruction. isordered is passed
+/// in to determine which kind of predicate to use in the new fcmp instruction.
+static Value *getFCmpValue(bool isordered, unsigned code,
+                           Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
+  switch (code) {
+  default: assert(0 && "Illegal FCmp code!");
+  case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+  case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+  case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+  case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+  case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+  case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+  case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+  case 7: 
+    if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+    Pred = FCmpInst::FCMP_ORD; break;
+  }
+  return Builder->CreateFCmp(Pred, LHS, RHS);
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
+
+// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
+// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
+// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+                                    ConstantInt *OpRHS,
+                                    ConstantInt *AndRHS,
+                                    BinaryOperator &TheAnd) {
+  Value *X = Op->getOperand(0);
+  Constant *Together = 0;
+  if (!Op->isShift())
+    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+
+  switch (Op->getOpcode()) {
+  case Instruction::Xor:
+    if (Op->hasOneUse()) {
+      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+      Value *And = Builder->CreateAnd(X, AndRHS);
+      And->takeName(Op);
+      return BinaryOperator::CreateXor(And, Together);
+    }
+    break;
+  case Instruction::Or:
+    if (Op->hasOneUse()){
+      if (Together != OpRHS) {
+        // (X | C1) & C2 --> (X | (C1&C2)) & C2
+        Value *Or = Builder->CreateOr(X, Together);
+        Or->takeName(Op);
+        return BinaryOperator::CreateAnd(Or, AndRHS);
+      }
+      
+      ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+      if (TogetherCI && !TogetherCI->isZero()){
+        // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+        // NOTE: This reduces the number of bits set in the & mask, which
+        // can expose opportunities for store narrowing.
+        Together = ConstantExpr::getXor(AndRHS, Together);
+        Value *And = Builder->CreateAnd(X, Together);
+        And->takeName(Op);
+        return BinaryOperator::CreateOr(And, OpRHS);
+      }
+    }
+    
+    break;
+  case Instruction::Add:
+    if (Op->hasOneUse()) {
+      // Adding a one to a single bit bit-field should be turned into an XOR
+      // of the bit.  First thing to check is to see if this AND is with a
+      // single bit constant.
+      const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+
+      // If there is only one bit set.
+      if (AndRHSV.isPowerOf2()) {
+        // Ok, at this point, we know that we are masking the result of the
+        // ADD down to exactly one bit.  If the constant we are adding has
+        // no bits set below this bit, then we can eliminate the ADD.
+        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+
+        // Check to see if any bits below the one bit set in AndRHSV are set.
+        if ((AddRHS & (AndRHSV-1)) == 0) {
+          // If not, the only thing that can effect the output of the AND is
+          // the bit specified by AndRHSV.  If that bit is set, the effect of
+          // the XOR is to toggle the bit.  If it is clear, then the ADD has
+          // no effect.
+          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+            TheAnd.setOperand(0, X);
+            return &TheAnd;
+          } else {
+            // Pull the XOR out of the AND.
+            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+            NewAnd->takeName(Op);
+            return BinaryOperator::CreateXor(NewAnd, AndRHS);
+          }
+        }
+      }
+    }
+    break;
+
+  case Instruction::Shl: {
+    // We know that the AND will not produce any of the bits shifted in, so if
+    // the anded constant includes them, clear them now!
+    //
+    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+    ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
+                                       AndRHS->getValue() & ShlMask);
+
+    if (CI->getValue() == ShlMask)
+      // Masking out bits that the shift already masks.
+      return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
+    
+    if (CI != AndRHS) {                  // Reducing bits set in and.
+      TheAnd.setOperand(1, CI);
+      return &TheAnd;
+    }
+    break;
+  }
+  case Instruction::LShr: {
+    // We know that the AND will not produce any of the bits shifted in, so if
+    // the anded constant includes them, clear them now!  This only applies to
+    // unsigned shifts, because a signed shr may bring in set bits!
+    //
+    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+    ConstantInt *CI = ConstantInt::get(Op->getContext(),
+                                       AndRHS->getValue() & ShrMask);
+
+    if (CI->getValue() == ShrMask)
+      // Masking out bits that the shift already masks.
+      return ReplaceInstUsesWith(TheAnd, Op);
+    
+    if (CI != AndRHS) {
+      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
+      return &TheAnd;
+    }
+    break;
+  }
+  case Instruction::AShr:
+    // Signed shr.
+    // See if this is shifting in some sign extension, then masking it out
+    // with an and.
+    if (Op->hasOneUse()) {
+      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+      Constant *C = ConstantInt::get(Op->getContext(),
+                                     AndRHS->getValue() & ShrMask);
+      if (C == AndRHS) {          // Masking out bits shifted in.
+        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+        // Make the argument unsigned.
+        Value *ShVal = Op->getOperand(0);
+        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+      }
+    }
+    break;
+  }
+  return 0;
+}
+
+
+/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
+/// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
+/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                                     bool isSigned, bool Inside) {
+  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
+            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+         "Lo is not <= Hi in range emission code!");
+    
+  if (Inside) {
+    if (Lo == Hi)  // Trivially false.
+      return ConstantInt::getFalse(V->getContext());
+
+    // V >= Min && V < Hi --> V < Hi
+    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+      ICmpInst::Predicate pred = (isSigned ? 
+        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+      return Builder->CreateICmp(pred, V, Hi);
+    }
+
+    // Emit V-Lo <u Hi-Lo
+    Constant *NegLo = ConstantExpr::getNeg(Lo);
+    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+    return Builder->CreateICmpULT(Add, UpperBound);
+  }
+
+  if (Lo == Hi)  // Trivially true.
+    return ConstantInt::getTrue(V->getContext());
+
+  // V < Min || V >= Hi -> V > Hi-1
+  Hi = SubOne(cast<ConstantInt>(Hi));
+  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+    ICmpInst::Predicate pred = (isSigned ? 
+        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+    return Builder->CreateICmp(pred, V, Hi);
+  }
+
+  // Emit V-Lo >u Hi-1-Lo
+  // Note that Hi has already had one subtracted from it, above.
+  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+  return Builder->CreateICmpUGT(Add, LowerBound);
+}
+
+// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
+// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
+// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
+// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+  const APInt& V = Val->getValue();
+  uint32_t BitWidth = Val->getType()->getBitWidth();
+  if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+  // look for the first zero bit after the run of ones
+  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+  // look for the first non-zero bit
+  ME = V.getActiveBits(); 
+  return true;
+}
+
+/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
+/// where isSub determines whether the operator is a sub.  If we can fold one of
+/// the following xforms:
+/// 
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+                                        ConstantInt *Mask, bool isSub,
+                                        Instruction &I) {
+  Instruction *LHSI = dyn_cast<Instruction>(LHS);
+  if (!LHSI || LHSI->getNumOperands() != 2 ||
+      !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+
+  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+  switch (LHSI->getOpcode()) {
+  default: return 0;
+  case Instruction::And:
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
+      // If the AndRHS is a power of two minus one (0+1+), this is simple.
+      if ((Mask->getValue().countLeadingZeros() + 
+           Mask->getValue().countPopulation()) == 
+          Mask->getValue().getBitWidth())
+        break;
+
+      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+      // part, we don't need any explicit masks to take them out of A.  If that
+      // is all N is, ignore it.
+      uint32_t MB = 0, ME = 0;
+      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
+        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+        if (MaskedValueIsZero(RHS, Mask))
+          break;
+      }
+    }
+    return 0;
+  case Instruction::Or:
+  case Instruction::Xor:
+    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+    if ((Mask->getValue().countLeadingZeros() + 
+         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
+      break;
+    return 0;
+  }
+  
+  if (isSub)
+    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is 
+/// described as the "AMask" or "BMask" part of the enum. If the enum 
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only 
+/// if (A & B) == A, or all bits of A are set in B.
+///   Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only 
+/// if (A & B) == 0, or all bits of A are cleared in B.
+///   Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not 
+/// contain any number of one bits and zero bits.
+///   Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+///   Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+///    (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+///    (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+  FoldMskICmp_AMask_AllOnes           =     1,
+  FoldMskICmp_AMask_NotAllOnes        =     2,
+  FoldMskICmp_BMask_AllOnes           =     4,
+  FoldMskICmp_BMask_NotAllOnes        =     8,
+  FoldMskICmp_Mask_AllZeroes          =    16,
+  FoldMskICmp_Mask_NotAllZeroes       =    32,
+  FoldMskICmp_AMask_Mixed             =    64,
+  FoldMskICmp_AMask_NotMixed          =   128,
+  FoldMskICmp_BMask_Mixed             =   256,
+  FoldMskICmp_BMask_NotMixed          =   512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, 
+                                    ICmpInst::Predicate SCC)
+{
+  ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+  ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+  bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+  bool icmp_abit = (ACst != 0 && !ACst->isZero() && 
+                    ACst->getValue().isPowerOf2());
+  bool icmp_bbit = (BCst != 0 && !BCst->isZero() && 
+                    BCst->getValue().isPowerOf2());
+  unsigned result = 0;
+  if (CCst != 0 && CCst->isZero()) {
+    // if C is zero, then both A and B qualify as mask
+    result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_AMask_Mixed |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_AMask_NotMixed |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+                            FoldMskICmp_AMask_NotMixed) 
+                         : (FoldMskICmp_AMask_AllOnes |
+                            FoldMskICmp_AMask_Mixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_BMask_AllOnes |
+                            FoldMskICmp_BMask_Mixed));
+    return result;
+  }
+  if (A == C) {
+    result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+                          FoldMskICmp_AMask_Mixed)
+                       : (FoldMskICmp_AMask_NotAllOnes |
+                          FoldMskICmp_AMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_AMask_NotMixed)
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_AMask_Mixed));
+  }
+  else if (ACst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(ACst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+                       : FoldMskICmp_AMask_NotMixed);
+  }
+  if (B == C) 
+  {
+    result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_BMask_NotAllOnes |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_BMask_Mixed));
+  }
+  else if (BCst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(BCst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+                       : FoldMskICmp_BMask_NotMixed);
+  }
+  return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 
+                                             Value*& B, Value*& C,
+                                             Value*& D, Value*& E,
+                                             ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+  if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+  if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+  if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+  // vectors are not (yet?) supported
+  if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+  // Here comes the tricky part:
+  // LHS might be of the form L11 & L12 == X, X == L21 & L22, 
+  // and L11 & L12 == L21 & L22. The same goes for RHS.
+  // Now we must find those components L** and R**, that are equal, so
+  // that we can extract the parameters A, B, C, D, and E for the canonical 
+  // above.
+  Value *L1 = LHS->getOperand(0);
+  Value *L2 = LHS->getOperand(1);
+  Value *L11,*L12,*L21,*L22;
+  if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+    if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+      L21 = L22 = 0;
+  }
+  else {
+    if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+      return 0;
+    std::swap(L1, L2);
+    L21 = L22 = 0;
+  }
+
+  Value *R1 = RHS->getOperand(0);
+  Value *R2 = RHS->getOperand(1);
+  Value *R11,*R12;
+  bool ok = false;
+  if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+      A = R11; D = R12; E = R2; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R2; ok = true;
+    }
+  }
+  if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+       A = R11; D = R12; E = R1; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R1; ok = true;
+    }
+    else
+      return 0;
+  }
+  if (!ok)
+    return 0;
+
+  if (L11 == A) {
+    B = L12; C = L2;
+  }
+  else if (L12 == A) {
+    B = L11; C = L2;
+  }
+  else if (L21 == A) {
+    B = L22; C = L1;
+  }
+  else if (L22 == A) {
+    B = L21; C = L1;
+  }
+
+  unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+  unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+  return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+                                     ICmpInst::Predicate NEWCC,
+                                     llvm::InstCombiner::BuilderTy* Builder) {
+  Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+  unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+  if (mask == 0) return 0;
+
+  if (NEWCC == ICmpInst::ICMP_NE)
+    mask >>= 1; // treat "Not"-states as normal states
+
+  if (mask & FoldMskICmp_Mask_AllZeroes) {
+    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 
+    // -> (icmp eq (A & (B|D)), 0)
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    // we can't use C as zero, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp ne (A & D), D) 
+    // with B and D, having a single bit set
+    Value* zero = Constant::getNullValue(A->getType());
+    return Builder->CreateICmp(NEWCC, newAnd, zero);
+  }
+  else if (mask & FoldMskICmp_BMask_AllOnes) {
+    // (icmp eq (A & B), B) & (icmp eq (A & D), D) 
+    // -> (icmp eq (A & (B|D)), (B|D))
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr);
+  }     
+  else if (mask & FoldMskICmp_AMask_AllOnes) {
+    // (icmp eq (A & B), A) & (icmp eq (A & D), A) 
+    // -> (icmp eq (A & (B&D)), A)
+    Value* newAnd1 = Builder->CreateAnd(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newAnd1);
+    return Builder->CreateICmp(NEWCC, newAnd, A);
+  }
+  else if (mask & FoldMskICmp_BMask_Mixed) {
+    // (icmp eq (A & B), C) & (icmp eq (A & D), E) 
+    // We already know that B & C == C && D & E == E.
+    // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+    // C and E, which are shared by both the mask B and the mask D, don't
+    // contradict, then we can transform to
+    // -> (icmp eq (A & (B|D)), (C|E))
+    // Currently, we only handle the case of B, C, D, and E being constant.
+    ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+    if (BCst == 0) return 0;
+    ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+    if (DCst == 0) return 0;
+    // we can't simply use C and E, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp eq (A & D), D) 
+    // with B and D, having a single bit set
+
+    ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+    if (CCst == 0) return 0;
+    if (LHS->getPredicate() != NEWCC)
+      CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+    ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+    if (ECst == 0) return 0;
+    if (RHS->getPredicate() != NEWCC)
+      ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+    ConstantInt* MCst = dyn_cast<ConstantInt>(
+      ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+                           ConstantExpr::getXor(CCst, ECst)) );
+    // if there is a conflict we should actually return a false for the
+    // whole construct
+    if (!MCst->isZero())
+      return 0;
+    Value *newOr1 = Builder->CreateOr(B, D);
+    Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+    Value *newAnd = Builder->CreateAnd(A, newOr1);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+  }
+  return 0;
+}
+
+/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+  // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+  if (PredicatesFoldable(LHSCC, RHSCC)) {
+    if (LHS->getOperand(0) == RHS->getOperand(1) &&
+        LHS->getOperand(1) == RHS->getOperand(0))
+      LHS->swapOperands();
+    if (LHS->getOperand(0) == RHS->getOperand(0) &&
+        LHS->getOperand(1) == RHS->getOperand(1)) {
+      Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+      unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
+      bool isSigned = LHS->isSigned() || RHS->isSigned();
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+    }
+  }
+
+  // handle (roughly):  (icmp eq (A & B), C) & (icmp eq (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+    return V;
+  
+  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+  Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+  if (LHSCst == 0 || RHSCst == 0) return 0;
+  
+  if (LHSCst == RHSCst && LHSCC == RHSCC) {
+    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+    // where C is a power of 2
+    if (LHSCC == ICmpInst::ICMP_ULT &&
+        LHSCst->getValue().isPowerOf2()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+    
+    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+
+    // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0)
+    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+      Value *NewAnd = Builder->CreateAnd(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+    }
+
+    // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1)
+    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+  }
+
+  // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+  // where CMAX is the all ones value for the truncated type,
+  // iff the lower bits of C2 and CA are zero.
+  if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) &&
+      LHS->hasOneUse() && RHS->hasOneUse()) {
+    Value *V;
+    ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+
+    // (trunc x) == C1 & (and x, CA) == C2
+    if (match(Val2, m_Trunc(m_Value(V))) &&
+        match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+      SmallCst = RHSCst;
+      BigCst = LHSCst;
+    }
+    // (and x, CA) == C2 & (trunc x) == C1
+    else if (match(Val, m_Trunc(m_Value(V))) &&
+             match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+      SmallCst = LHSCst;
+      BigCst = RHSCst;
+    }
+
+    if (SmallCst && BigCst) {
+      unsigned BigBitSize = BigCst->getType()->getBitWidth();
+      unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+
+      // Check that the low bits are zero.
+      APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+      if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
+        Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
+        APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
+        Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
+        return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+      }
+    }
+  }
+  
+  // From here on, we only handle:
+  //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+  if (Val != Val2) return 0;
+  
+  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+    return 0;
+
+  // Make a constant range that's the intersection of the two icmp ranges.
+  // If the intersection is empty, we know that the result is false.
+  ConstantRange LHSRange = 
+    ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+  ConstantRange RHSRange = 
+    ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+
+  if (LHSRange.intersectWith(RHSRange).isEmptySet())
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+
+  // We can't fold (ugt x, C) & (sgt x, C2).
+  if (!PredicatesFoldable(LHSCC, RHSCC))
+    return 0;
+    
+  // Ensure that the larger constant is on the RHS.
+  bool ShouldSwap;
+  if (CmpInst::isSigned(LHSCC) ||
+      (ICmpInst::isEquality(LHSCC) && 
+       CmpInst::isSigned(RHSCC)))
+    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+  else
+    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+    
+  if (ShouldSwap) {
+    std::swap(LHS, RHS);
+    std::swap(LHSCst, RHSCst);
+    std::swap(LHSCC, RHSCC);
+  }
+
+  // At this point, we know we have two icmp instructions
+  // comparing a value against two constants and and'ing the result
+  // together.  Because of the above check, we know that we only have
+  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 
+  // (from the icmp folding check above), that the two constants 
+  // are not equal and that the larger constant is on the RHS
+  assert(LHSCst != RHSCst && "Compares not folded above?");
+
+  switch (LHSCC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ICmpInst::ICMP_EQ:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
+    case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
+    case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
+      return LHS;
+    }
+  case ICmpInst::ICMP_NE:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_ULT:
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+        return Builder->CreateICmpULT(Val, LHSCst);
+      break;                        // (X != 13 & X u< 15) -> no change
+    case ICmpInst::ICMP_SLT:
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+        return Builder->CreateICmpSLT(Val, LHSCst);
+      break;                        // (X != 13 & X s< 15) -> no change
+    case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
+    case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
+      return RHS;
+    case ICmpInst::ICMP_NE:
+      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+      }
+      break;                        // (X != 13 & X != 15) -> no change
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
+    case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+    case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
+    case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
+      return LHS;
+    case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SLT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
+    case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
+      return LHS;
+    case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_UGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
+      return RHS;
+    case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:
+      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
+      break;                        // (X u> 13 & X != 15) -> no change
+    case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+    case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
+      return RHS;
+    case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:
+      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
+      break;                        // (X s> 13 & X != 15) -> no change
+    case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
+    case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
+      break;
+    }
+    break;
+  }
+ 
+  return 0;
+}
+
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // false.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ConstantInt::getFalse(LHS->getContext());
+        return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp ord x,x" is "fcmp ord x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+    if (Op0CC == FCmpInst::FCMP_TRUE)
+      return RHS;
+    if (Op1CC == FCmpInst::FCMP_TRUE)
+      return LHS;
+    
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op1Pred == 0) {
+      std::swap(LHS, RHS);
+      std::swap(Op0Pred, Op1Pred);
+      std::swap(Op0Ordered, Op1Ordered);
+    }
+    if (Op0Pred == 0) {
+      // uno && ueq -> uno && (uno || eq) -> ueq
+      // ord && olt -> ord && (ord && lt) -> olt
+      if (Op0Ordered == Op1Ordered)
+        return RHS;
+      
+      // uno && oeq -> uno && (ord && eq) -> false
+      // uno && ord -> false
+      if (!Op0Ordered)
+        return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+      // ord && ueq -> ord && (uno || eq) -> oeq
+      return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
+    }
+  }
+
+  return 0;
+}
+
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A|B)&(A|C) -> A|(B&C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;  
+
+  if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+    const APInt &AndRHSMask = AndRHS->getValue();
+
+    // Optimize a variety of ((val OP C1) & C2) combinations...
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+      Value *Op0LHS = Op0I->getOperand(0);
+      Value *Op0RHS = Op0I->getOperand(1);
+      switch (Op0I->getOpcode()) {
+      default: break;
+      case Instruction::Xor:
+      case Instruction::Or: {
+        // If the mask is only needed on one incoming arm, push it up.
+        if (!Op0I->hasOneUse()) break;
+          
+        APInt NotAndRHS(~AndRHSMask);
+        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+          // Not masking anything out for the LHS, move to RHS.
+          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+                                             Op0RHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+        }
+        if (!isa<Constant>(Op0RHS) &&
+            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+          // Not masking anything out for the RHS, move to LHS.
+          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+                                             Op0LHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
+        }
+
+        break;
+      }
+      case Instruction::Add:
+        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
+        break;
+
+      case Instruction::Sub:
+        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+
+        // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+        // has 1's for all bits that the subtraction with A might affect.
+        if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
+          uint32_t BitWidth = AndRHSMask.getBitWidth();
+          uint32_t Zeros = AndRHSMask.countLeadingZeros();
+          APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+          if (MaskedValueIsZero(Op0LHS, Mask)) {
+            Value *NewNeg = Builder->CreateNeg(Op0RHS);
+            return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+          }
+        }
+        break;
+
+      case Instruction::Shl:
+      case Instruction::LShr:
+        // (1 << x) & 1 --> zext(x == 0)
+        // (1 >> x) & 1 --> zext(x == 0)
+        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+          Value *NewICmp =
+            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+          return new ZExtInst(NewICmp, I.getType());
+        }
+        break;
+      }
+          
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+        if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+          return Res;
+    }
+    
+    // If this is an integer truncation, and if the source is an 'and' with
+    // immediate, transform it.  This frequently occurs for bitfield accesses.
+    {
+      Value *X = 0; ConstantInt *YC = 0;
+      if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+        // Change: and (trunc (and X, YC) to T), C2
+        // into  : and (trunc X to T), trunc(YC) & C2
+        // This will fold the two constants together, which may allow 
+        // other simplifications.
+        Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+        Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+        C3 = ConstantExpr::getAnd(C3, AndRHS);
+        return BinaryOperator::CreateAnd(NewCast, C3);
+      }
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+
+  // (~A & ~B) == (~(A | B)) - De Morgan's Law
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+                                      I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(Or);
+      }
+  
+  {
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    // (A|B) & ~(A&B) -> A^B
+    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
+    
+    // ~(A&B) & (A|B) -> A^B
+    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
+    
+    if (Op0->hasOneUse() &&
+        match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+      if (A == Op1) {                                // (A^B)&A -> A&(A^B)
+        I.swapOperands();     // Simplify below
+        std::swap(Op0, Op1);
+      } else if (B == Op1) {                         // (A^B)&B -> B&(B^A)
+        cast<BinaryOperator>(Op0)->swapOperands();
+        I.swapOperands();     // Simplify below
+        std::swap(Op0, Op1);
+      }
+    }
+
+    if (Op1->hasOneUse() &&
+        match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+      if (B == Op0) {                                // B&(A^B) -> B&(B^A)
+        cast<BinaryOperator>(Op1)->swapOperands();
+        std::swap(A, B);
+      }
+      // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+      // A is originally -1 (or a vector of -1 and undefs), then we enter
+      // an endless loop. By checking that A is non-constant we ensure that
+      // we will never get to the loop.
+      if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
+    }
+
+    // (A&((~A)|B)) -> A&B
+    if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+        match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+      return BinaryOperator::CreateAnd(A, Op1);
+    if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+        match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+      return BinaryOperator::CreateAnd(A, Op0);
+  }
+  
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+      if (Value *Res = FoldAndOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  // If and'ing two fcmp, try combine them into one.
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  
+  // fold (and (cast A), (cast B)) -> (cast (and A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+          SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+        
+        // Only do this if the casts both really cause code to be generated.
+        if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+        
+        // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Value *Res = FoldAndOfICmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+        
+        // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+      }
+    }
+    
+  // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
+  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+          SI0->getOperand(1) == SI1->getOperand(1) &&
+          (SI0->hasOneUse() || SI1->hasOneUse())) {
+        Value *NewOp =
+          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+                             SI0->getName());
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+                                      SI1->getOperand(1));
+      }
+  }
+
+  return Changed ? &I : 0;
+}
+
+/// CollectBSwapParts - Analyze the specified subexpression and see if it is
+/// capable of providing pieces of a bswap.  The subexpression provides pieces
+/// of a bswap if it is proven that each of the non-zero bytes in the output of
+/// the expression came from the corresponding "byte swapped" byte in some other
+/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
+/// we know that the expression deposits the low byte of %X into the high byte
+/// of the bswap result and that all other bytes are zero.  This expression is
+/// accepted, the high byte of ByteValues is set to X to indicate a correct
+/// match.
+///
+/// This function returns true if the match was unsuccessful and false if so.
+/// On entry to the function the "OverallLeftShift" is a signed integer value
+/// indicating the number of bytes that the subexpression is later shifted.  For
+/// example, if the expression is later right shifted by 16 bits, the
+/// OverallLeftShift value would be -2 on entry.  This is used to specify which
+/// byte of ByteValues is actually being set.
+///
+/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
+/// byte is masked to zero by a user.  For example, in (X & 255), X will be
+/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
+/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
+/// always in the local (OverallLeftShift) coordinate space.
+///
+static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
+                              SmallVector<Value*, 8> &ByteValues) {
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If this is an or instruction, it may be an inner node of the bswap.
+    if (I->getOpcode() == Instruction::Or) {
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+                               ByteValues) ||
+             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
+                               ByteValues);
+    }
+  
+    // If this is a logical shift by a constant multiple of 8, recurse with
+    // OverallLeftShift and ByteMask adjusted.
+    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+      unsigned ShAmt = 
+        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+      // Ensure the shift amount is defined and of a byte value.
+      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+        return true;
+
+      unsigned ByteShift = ShAmt >> 3;
+      if (I->getOpcode() == Instruction::Shl) {
+        // X << 2 -> collect(X, +2)
+        OverallLeftShift += ByteShift;
+        ByteMask >>= ByteShift;
+      } else {
+        // X >>u 2 -> collect(X, -2)
+        OverallLeftShift -= ByteShift;
+        ByteMask <<= ByteShift;
+        ByteMask &= (~0U >> (32-ByteValues.size()));
+      }
+
+      if (OverallLeftShift >= (int)ByteValues.size()) return true;
+      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+                               ByteValues);
+    }
+
+    // If this is a logical 'and' with a mask that clears bytes, clear the
+    // corresponding bytes in ByteMask.
+    if (I->getOpcode() == Instruction::And &&
+        isa<ConstantInt>(I->getOperand(1))) {
+      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
+      unsigned NumBytes = ByteValues.size();
+      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+      
+      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
+        // If this byte is masked out by a later operation, we don't care what
+        // the and mask is.
+        if ((ByteMask & (1 << i)) == 0)
+          continue;
+        
+        // If the AndMask is all zeros for this byte, clear the bit.
+        APInt MaskB = AndMask & Byte;
+        if (MaskB == 0) {
+          ByteMask &= ~(1U << i);
+          continue;
+        }
+        
+        // If the AndMask is not all ones for this byte, it's not a bytezap.
+        if (MaskB != Byte)
+          return true;
+
+        // Otherwise, this byte is kept.
+      }
+
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+                               ByteValues);
+    }
+  }
+  
+  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
+  // the input value to the bswap.  Some observations: 1) if more than one byte
+  // is demanded from this input, then it could not be successfully assembled
+  // into a byteswap.  At least one of the two bytes would not be aligned with
+  // their ultimate destination.
+  if (!isPowerOf2_32(ByteMask)) return true;
+  unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+  
+  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
+  // is demanded, it needs to go into byte 0 of the result.  This means that the
+  // byte needs to be shifted until it lands in the right byte bucket.  The
+  // shift amount depends on the position: if the byte is coming from the high
+  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
+  // low part, it must be shifted left.
+  unsigned DestByteNo = InputByteNo + OverallLeftShift;
+  if (InputByteNo < ByteValues.size()/2) {
+    if (ByteValues.size()-1-DestByteNo != InputByteNo)
+      return true;
+  } else {
+    if (ByteValues.size()-1-DestByteNo != InputByteNo)
+      return true;
+  }
+  
+  // If the destination byte value is already defined, the values are or'd
+  // together, which isn't a bswap (unless it's an or of the same bits).
+  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+    return true;
+  ByteValues[DestByteNo] = V;
+  return false;
+}
+
+/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
+/// If so, insert the new bswap intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+  IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  if (!ITy || ITy->getBitWidth() % 16 || 
+      // ByteMask only allows up to 32-byte values.
+      ITy->getBitWidth() > 32*8) 
+    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
+  
+  /// ByteValues - For each byte of the result, we keep track of which value
+  /// defines each byte.
+  SmallVector<Value*, 8> ByteValues;
+  ByteValues.resize(ITy->getBitWidth()/8);
+    
+  // Try to find all the pieces corresponding to the bswap.
+  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
+  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+    return 0;
+  
+  // Check to see if all of the bytes come from the same value.
+  Value *V = ByteValues[0];
+  if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
+  
+  // Check to make sure that all of the bytes come from the same value.
+  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
+    if (ByteValues[i] != V)
+      return 0;
+  Module *M = I.getParent()->getParent()->getParent();
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+  return CallInst::Create(F, V);
+}
+
+/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
+/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
+/// we can simplify this expression to "cond ? C : D or B".
+static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
+                                         Value *C, Value *D) {
+  // If A is not a select of -1/0, this cannot match.
+  Value *Cond = 0;
+  if (!match(A, m_SExt(m_Value(Cond))) ||
+      !Cond->getType()->isIntegerTy(1))
+    return 0;
+
+  // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
+  if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, B);
+  if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, B);
+  
+  // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
+  if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, D);
+  if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, D);
+  return 0;
+}
+
+/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+  // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+  if (PredicatesFoldable(LHSCC, RHSCC)) {
+    if (LHS->getOperand(0) == RHS->getOperand(1) &&
+        LHS->getOperand(1) == RHS->getOperand(0))
+      LHS->swapOperands();
+    if (LHS->getOperand(0) == RHS->getOperand(0) &&
+        LHS->getOperand(1) == RHS->getOperand(1)) {
+      Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+      unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+      bool isSigned = LHS->isSigned() || RHS->isSigned();
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+    }
+  }
+
+  // handle (roughly):
+  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+    return V;
+
+  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+  Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+  if (LHSCst == 0 || RHSCst == 0) return 0;
+
+  if (LHSCst == RHSCst && LHSCC == RHSCC) {
+    // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+
+    // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+
+    // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1)
+    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+      Value *NewAnd = Builder->CreateAnd(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+    }
+  }
+
+  // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+  //   iff C2 + CA == C1.
+  if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+    ConstantInt *AddCst;
+    if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+      if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+        return Builder->CreateICmpULE(Val, LHSCst);
+  }
+
+  // From here on, we only handle:
+  //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+  if (Val != Val2) return 0;
+  
+  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+    return 0;
+  
+  // We can't fold (ugt x, C) | (sgt x, C2).
+  if (!PredicatesFoldable(LHSCC, RHSCC))
+    return 0;
+  
+  // Ensure that the larger constant is on the RHS.
+  bool ShouldSwap;
+  if (CmpInst::isSigned(LHSCC) ||
+      (ICmpInst::isEquality(LHSCC) && 
+       CmpInst::isSigned(RHSCC)))
+    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+  else
+    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+  
+  if (ShouldSwap) {
+    std::swap(LHS, RHS);
+    std::swap(LHSCst, RHSCst);
+    std::swap(LHSCC, RHSCC);
+  }
+  
+  // At this point, we know we have two icmp instructions
+  // comparing a value against two constants and or'ing the result
+  // together.  Because of the above check, we know that we only have
+  // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+  // icmp folding check above), that the two constants are not
+  // equal.
+  assert(LHSCst != RHSCst && "Compares not folded above?");
+
+  switch (LHSCC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ICmpInst::ICMP_EQ:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:
+      if (LHSCst == SubOne(RHSCst)) {
+        // (X == 13 | X == 14) -> X-13 <u 2
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+        return Builder->CreateICmpULT(Add, AddCST);
+      }
+      break;                         // (X == 13 | X == 15) -> no change
+    case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
+    case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
+      break;
+    case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
+    case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
+      return RHS;
+    }
+    break;
+  case ICmpInst::ICMP_NE:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
+    case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
+    case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
+      return LHS;
+    case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
+    case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
+    case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
+      return ConstantInt::getTrue(LHS->getContext());
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
+      break;
+    case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
+      // If RHSCst is [us]MAXINT, it is always false.  Not handling
+      // this can cause overflow.
+      if (RHSCst->isMaxValue(false))
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
+    case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
+      return RHS;
+    case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SLT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
+      break;
+    case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
+      // If RHSCst is [us]MAXINT, it is always false.  Not handling
+      // this can cause overflow.
+      if (RHSCst->isMaxValue(true))
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
+    case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
+      return RHS;
+    case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_UGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
+    case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
+      return LHS;
+    case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
+    case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
+      return ConstantInt::getTrue(LHS->getContext());
+    case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
+    case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
+      return LHS;
+    case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
+    case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
+      return ConstantInt::getTrue(LHS->getContext());
+    case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
+      break;
+    }
+    break;
+  }
+  return 0;
+}
+
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // true.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ConstantInt::getTrue(LHS->getContext());
+        
+        // Otherwise, no need to compare the two constants, compare the
+        // rest.
+        return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp uno x,x" is "fcmp uno x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+    
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+    if (Op0CC == FCmpInst::FCMP_FALSE)
+      return RHS;
+    if (Op1CC == FCmpInst::FCMP_FALSE)
+      return LHS;
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op0Ordered == Op1Ordered) {
+      // If both are ordered or unordered, return a new fcmp with
+      // or'ed predicates.
+      return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
+    }
+  }
+  return 0;
+}
+
+/// FoldOrWithConstants - This helper function folds:
+///
+///     ((A | B) & C1) | (B & C2)
+///
+/// into:
+/// 
+///     (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+                                               Value *A, Value *B, Value *C) {
+  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+  if (!CI1) return 0;
+
+  Value *V1 = 0;
+  ConstantInt *CI2 = 0;
+  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+  APInt Xor = CI1->getValue() ^ CI2->getValue();
+  if (!Xor.isAllOnesValue()) return 0;
+
+  if (V1 == A || V1 == B) {
+    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+    return BinaryOperator::CreateOr(NewOp, V1);
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A&B)|(A&C) -> A&(B|C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    ConstantInt *C1 = 0; Value *X = 0;
+    // (X & C1) | C2 --> (X | C2) & (C1|C2)
+    // iff (C1 & C2) == 0.
+    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+        (RHS->getValue() & C1->getValue()) != 0 &&
+        Op0->hasOneUse()) {
+      Value *Or = Builder->CreateOr(X, RHS);
+      Or->takeName(Op0);
+      return BinaryOperator::CreateAnd(Or, 
+                         ConstantInt::get(I.getContext(),
+                                          RHS->getValue() | C1->getValue()));
+    }
+
+    // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+        Op0->hasOneUse()) {
+      Value *Or = Builder->CreateOr(X, RHS);
+      Or->takeName(Op0);
+      return BinaryOperator::CreateXor(Or,
+                 ConstantInt::get(I.getContext(),
+                                  C1->getValue() & ~RHS->getValue()));
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  Value *A = 0, *B = 0;
+  ConstantInt *C1 = 0, *C2 = 0;
+
+  // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
+  // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
+  if (match(Op0, m_Or(m_Value(), m_Value())) ||
+      match(Op1, m_Or(m_Value(), m_Value())) ||
+      (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+       match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
+    if (Instruction *BSwap = MatchBSwap(I))
+      return BSwap;
+  }
+  
+  // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+  if (Op0->hasOneUse() &&
+      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+      MaskedValueIsZero(Op1, C1->getValue())) {
+    Value *NOr = Builder->CreateOr(A, Op1);
+    NOr->takeName(Op0);
+    return BinaryOperator::CreateXor(NOr, C1);
+  }
+
+  // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+  if (Op1->hasOneUse() &&
+      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+      MaskedValueIsZero(Op0, C1->getValue())) {
+    Value *NOr = Builder->CreateOr(A, Op0);
+    NOr->takeName(Op0);
+    return BinaryOperator::CreateXor(NOr, C1);
+  }
+
+  // (A & C)|(B & D)
+  Value *C = 0, *D = 0;
+  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+      match(Op1, m_And(m_Value(B), m_Value(D)))) {
+    Value *V1 = 0, *V2 = 0;
+    C1 = dyn_cast<ConstantInt>(C);
+    C2 = dyn_cast<ConstantInt>(D);
+    if (C1 && C2) {  // (A & C1)|(B & C2)
+      // If we have: ((V + N) & C1) | (V & C2)
+      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+      // replace with V+N.
+      if (C1->getValue() == ~C2->getValue()) {
+        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+        }
+        // Or commutes, try both ways.
+        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+        }
+      }
+      
+      if ((C1->getValue() & C2->getValue()) == 0) {
+        // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
+        // iff (C1&C2) == 0 and (N&~C1) == 0
+        if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
+            ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) ||  // (V|N)
+             (V2 == B && MaskedValueIsZero(V1, ~C1->getValue()))))   // (N|V)
+          return BinaryOperator::CreateAnd(A,
+                               ConstantInt::get(A->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        // Or commutes, try both ways.
+        if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
+            ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) ||  // (V|N)
+             (V2 == A && MaskedValueIsZero(V1, ~C2->getValue()))))   // (N|V)
+          return BinaryOperator::CreateAnd(B,
+                               ConstantInt::get(B->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        
+        // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
+        // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
+        ConstantInt *C3 = 0, *C4 = 0;
+        if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
+            (C3->getValue() & ~C1->getValue()) == 0 &&
+            match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
+            (C4->getValue() & ~C2->getValue()) == 0) {
+          V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+          return BinaryOperator::CreateAnd(V2,
+                               ConstantInt::get(B->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        }
+      }
+    }
+
+    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants.
+    // Don't do this for vector select idioms, the code generator doesn't handle
+    // them well yet.
+    if (!I.getType()->isVectorTy()) {
+      if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+        return Match;
+    }
+
+    // ((A&~B)|(~A&B)) -> A^B
+    if ((match(C, m_Not(m_Specific(D))) &&
+         match(B, m_Not(m_Specific(A)))))
+      return BinaryOperator::CreateXor(A, D);
+    // ((~B&A)|(~A&B)) -> A^B
+    if ((match(A, m_Not(m_Specific(D))) &&
+         match(B, m_Not(m_Specific(C)))))
+      return BinaryOperator::CreateXor(C, D);
+    // ((A&~B)|(B&~A)) -> A^B
+    if ((match(C, m_Not(m_Specific(B))) &&
+         match(D, m_Not(m_Specific(A)))))
+      return BinaryOperator::CreateXor(A, B);
+    // ((~B&A)|(B&~A)) -> A^B
+    if ((match(A, m_Not(m_Specific(B))) &&
+         match(D, m_Not(m_Specific(C)))))
+      return BinaryOperator::CreateXor(C, B);
+
+    // ((A|B)&1)|(B&-2) -> (A&1) | B
+    if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
+        match(A, m_Or(m_Specific(B), m_Value(V1)))) {
+      Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
+      if (Ret) return Ret;
+    }
+    // (B&-2)|((A|B)&1) -> (A&1) | B
+    if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
+        match(B, m_Or(m_Value(V1), m_Specific(A)))) {
+      Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
+      if (Ret) return Ret;
+    }
+  }
+  
+  // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
+  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+          SI0->getOperand(1) == SI1->getOperand(1) &&
+          (SI0->hasOneUse() || SI1->hasOneUse())) {
+        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+                                         SI0->getName());
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+                                      SI1->getOperand(1));
+      }
+  }
+
+  // (~A | ~B) == (~(A & B)) - De Morgan's Law
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+                                        I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(And);
+      }
+
+  // Canonicalize xor to the RHS.
+  if (match(Op0, m_Xor(m_Value(), m_Value())))
+    std::swap(Op0, Op1);
+
+  // A | ( A ^ B) -> A |  B
+  // A | (~A ^ B) -> A | ~B
+  if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+    if (Op0 == A || Op0 == B)
+      return BinaryOperator::CreateOr(A, B);
+
+    if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(B, B->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+    if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(A, A->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+  }
+
+  // A | ~(A | B) -> A | ~B
+  // A | ~(A ^ B) -> A | ~B
+  if (match(Op1, m_Not(m_Value(A))))
+    if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
+      if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
+          Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
+                               B->getOpcode() == Instruction::Xor)) {
+        Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
+                                                 B->getOperand(0);
+        Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+        return BinaryOperator::CreateOr(Not, Op0);
+      }
+
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+      if (Value *Res = FoldOrOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+    
+  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  // fold (or (cast A), (cast B)) -> (cast (or A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    CastInst *Op1C = dyn_cast<CastInst>(Op1);
+    if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+        if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+            // Only do this if the casts both really cause code to be
+            // generated.
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+        
+        // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfICmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+        
+        // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+      }
+    }
+  }
+
+  // or(sext(A), B) -> A ? -1 : B where A is an i1
+  // or(A, sext(B)) -> B ? -1 : A where B is an i1
+  if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+    return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+  if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+    return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+
+  // Note: If we've gotten to the point of visiting the outer OR, then the
+  // inner one couldn't be simplified.  If it was a constant, then it won't
+  // be simplified by a later pass either, so we try swapping the inner/outer
+  // ORs in the hopes that we'll be able to simplify it this way.
+  // (X|C) | V --> (X|V) | C
+  if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+      match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+    Value *Inner = Builder->CreateOr(A, Op1);
+    Inner->takeName(Op0);
+    return BinaryOperator::CreateOr(Inner, C1);
+  }
+  
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A&B)^(A&C) -> A&(B^C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  // Is this a ~ operation?
+  if (Value *NotOp = dyn_castNotVal(&I)) {
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+      if (Op0I->getOpcode() == Instruction::And || 
+          Op0I->getOpcode() == Instruction::Or) {
+        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+        if (dyn_castNotVal(Op0I->getOperand(1)))
+          Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1),
+                               Op0I->getOperand(1)->getName()+".not");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(Op0NotVal, NotY);
+          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+        }
+        
+        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+        if (isFreeToInvert(Op0I->getOperand(0)) && 
+            isFreeToInvert(Op0I->getOperand(1))) {
+          Value *NotX =
+            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(NotX, NotY);
+          return BinaryOperator::CreateAnd(NotX, NotY);
+        }
+
+      } else if (Op0I->getOpcode() == Instruction::AShr) {
+        // ~(~X >>s Y) --> (X >>s Y)
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
+          return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+      }
+    }
+  }
+  
+  
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    if (RHS->isOne() && Op0->hasOneUse())
+      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+        return CmpInst::Create(CI->getOpcode(),
+                               CI->getInversePredicate(),
+                               CI->getOperand(0), CI->getOperand(1));
+
+    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+        if (CI->hasOneUse() && Op0C->hasOneUse()) {
+          Instruction::CastOps Opcode = Op0C->getOpcode();
+          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+              (RHS == ConstantExpr::getCast(Opcode, 
+                                           ConstantInt::getTrue(I.getContext()),
+                                            Op0C->getDestTy()))) {
+            CI->setPredicate(CI->getInversePredicate());
+            return CastInst::Create(Opcode, CI, Op0C->getType());
+          }
+        }
+      }
+    }
+
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+      // ~(c-X) == X-c-1 == X+(-c-1)
+      if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+                                      ConstantInt::get(I.getType(), 1));
+          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+        }
+          
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+        if (Op0I->getOpcode() == Instruction::Add) {
+          // ~(X-c) --> (-c-1)-X
+          if (RHS->isAllOnesValue()) {
+            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+            return BinaryOperator::CreateSub(
+                           ConstantExpr::getSub(NegOp0CI,
+                                      ConstantInt::get(I.getType(), 1)),
+                                      Op0I->getOperand(0));
+          } else if (RHS->getValue().isSignBit()) {
+            // (X + C) ^ signbit -> (X + C + signbit)
+            Constant *C = ConstantInt::get(I.getContext(),
+                                           RHS->getValue() + Op0CI->getValue());
+            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+          }
+        } else if (Op0I->getOpcode() == Instruction::Or) {
+          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+            // Anything in both C1 and C2 is known to be zero, remove it from
+            // NewRHS.
+            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+            NewRHS = ConstantExpr::getAnd(NewRHS, 
+                                       ConstantExpr::getNot(CommonBits));
+            Worklist.Add(Op0I);
+            I.setOperand(0, Op0I->getOperand(0));
+            I.setOperand(1, NewRHS);
+            return &I;
+          }
+        }
+      }
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+  if (Op1I) {
+    Value *A, *B;
+    if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+      if (A == Op0) {              // B^(B|A) == (A|B)^B
+        Op1I->swapOperands();
+        I.swapOperands();
+        std::swap(Op0, Op1);
+      } else if (B == Op0) {       // B^(A|B) == (A|B)^B
+        I.swapOperands();     // Simplified below.
+        std::swap(Op0, Op1);
+      }
+    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
+               Op1I->hasOneUse()){
+      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
+        Op1I->swapOperands();
+        std::swap(A, B);
+      }
+      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
+        I.swapOperands();     // Simplified below.
+        std::swap(Op0, Op1);
+      }
+    }
+  }
+  
+  BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+  if (Op0I) {
+    Value *A, *B;
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        Op0I->hasOneUse()) {
+      if (A == Op1)                                  // (B|A)^B == (A|B)^B
+        std::swap(A, B);
+      if (B == Op1)                                  // (A|B)^B == A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
+    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
+               Op0I->hasOneUse()){
+      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
+        std::swap(A, B);
+      if (B == Op1 &&                                      // (B&A)^A == ~B & A
+          !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
+        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
+      }
+    }
+  }
+  
+  // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
+  if (Op0I && Op1I && Op0I->isShift() && 
+      Op0I->getOpcode() == Op1I->getOpcode() && 
+      Op0I->getOperand(1) == Op1I->getOperand(1) &&
+      (Op1I->hasOneUse() || Op1I->hasOneUse())) {
+    Value *NewOp =
+      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+                         Op0I->getName());
+    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
+                                  Op1I->getOperand(1));
+  }
+    
+  if (Op0I && Op1I) {
+    Value *A, *B, *C, *D;
+    // (A & B)^(A | B) -> A ^ B
+    if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+        match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+      if ((A == C && B == D) || (A == D && B == C)) 
+        return BinaryOperator::CreateXor(A, B);
+    }
+    // (A | B)^(A & B) -> A ^ B
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+      if ((A == C && B == D) || (A == D && B == C)) 
+        return BinaryOperator::CreateXor(A, B);
+    }
+  }
+
+  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+      if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+        if (LHS->getOperand(0) == RHS->getOperand(1) &&
+            LHS->getOperand(1) == RHS->getOperand(0))
+          LHS->swapOperands();
+        if (LHS->getOperand(0) == RHS->getOperand(0) &&
+            LHS->getOperand(1) == RHS->getOperand(1)) {
+          Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+          unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+          bool isSigned = LHS->isSigned() || RHS->isSigned();
+          return ReplaceInstUsesWith(I, 
+                               getICmpValue(isSigned, Code, Op0, Op1, Builder));
+        }
+      }
+
+  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
+        const Type *SrcTy = Op0C->getOperand(0)->getType();
+        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
+            // Only do this if the casts both really cause code to be generated.
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                               I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                               I.getType())) {
+          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+      }
+  }
+
+  return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
new file mode 100644
index 000000000000..537f2b318aa9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -0,0 +1,1317 @@
+//===- InstCombineCalls.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitCall and visitInvoke functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+/// getPromotedType - Return the specified type promoted as it would be to pass
+/// though a va_arg area.
+static const Type *getPromotedType(const Type *Ty) {
+  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+    if (ITy->getBitWidth() < 32)
+      return Type::getInt32Ty(Ty->getContext());
+  }
+  return Ty;
+}
+
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
+  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
+  unsigned MinAlign = std::min(DstAlign, SrcAlign);
+  unsigned CopyAlign = MI->getAlignment();
+
+  if (CopyAlign < MinAlign) {
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 
+                                             MinAlign, false));
+    return MI;
+  }
+  
+  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+  // load/store.
+  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
+  if (MemOpLength == 0) return 0;
+  
+  // Source and destination pointer types are always "i8*" for intrinsic.  See
+  // if the size is something we can handle with a single primitive load/store.
+  // A single load+store correctly handles overlapping memory in the memmove
+  // case.
+  unsigned Size = MemOpLength->getZExtValue();
+  if (Size == 0) return MI;  // Delete this mem transfer.
+  
+  if (Size > 8 || (Size&(Size-1)))
+    return 0;  // If not 1/2/4/8 bytes, exit.
+  
+  // Use an integer load+store unless we can find something better.
+  unsigned SrcAddrSp =
+    cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
+  unsigned DstAddrSp =
+    cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
+
+  const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
+  
+  // Memcpy forces the use of i8* for the source and destination.  That means
+  // that if you're using memcpy to move one double around, you'll get a cast
+  // from double* to i8*.  We'd much rather use a double load+store rather than
+  // an i64 load+store, here because this improves the odds that the source or
+  // dest address will be promotable.  See if we can find a better type than the
+  // integer datatype.
+  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+  if (StrippedDest != MI->getArgOperand(0)) {
+    const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+                                    ->getElementType();
+    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
+      // down through these levels if so.
+      while (!SrcETy->isSingleValueType()) {
+        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+          if (STy->getNumElements() == 1)
+            SrcETy = STy->getElementType(0);
+          else
+            break;
+        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+          if (ATy->getNumElements() == 1)
+            SrcETy = ATy->getElementType();
+          else
+            break;
+        } else
+          break;
+      }
+      
+      if (SrcETy->isSingleValueType()) {
+        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+      }
+    }
+  }
+  
+  
+  // If the memcpy/memmove provides better alignment info than we can
+  // infer, use it.
+  SrcAlign = std::max(SrcAlign, CopyAlign);
+  DstAlign = std::max(DstAlign, CopyAlign);
+  
+  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+  LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
+  L->setAlignment(SrcAlign);
+  StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
+  S->setAlignment(DstAlign);
+
+  // Set the size of the copy to 0, it will be deleted on the next iteration.
+  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
+  return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+  unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
+  if (MI->getAlignment() < Alignment) {
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+                                             Alignment, false));
+    return MI;
+  }
+  
+  // Extract the length and alignment and fill if they are constant.
+  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
+    return 0;
+  uint64_t Len = LenC->getZExtValue();
+  Alignment = MI->getAlignment();
+  
+  // If the length is zero, this is a no-op
+  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  
+  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+    const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
+    
+    Value *Dest = MI->getDest();
+    unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+    Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
+
+    // Alignment 0 is identity for alignment 1 for memset, but not store.
+    if (Alignment == 0) Alignment = 1;
+    
+    // Extract the fill value and store.
+    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+    StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
+                                        MI->isVolatile());
+    S->setAlignment(Alignment);
+    
+    // Set the size of the copy to 0, it will be deleted on the next iteration.
+    MI->setLength(Constant::getNullValue(LenC->getType()));
+    return MI;
+  }
+
+  return 0;
+}
+
+/// visitCallInst - CallInst simplification.  This mostly only handles folding 
+/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
+/// the heavy lifting.
+///
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+  if (isFreeCall(&CI))
+    return visitFree(CI);
+  if (isMalloc(&CI))
+    return visitMalloc(CI);
+
+  // If the caller function is nounwind, mark the call as nounwind, even if the
+  // callee isn't.
+  if (CI.getParent()->getParent()->doesNotThrow() &&
+      !CI.doesNotThrow()) {
+    CI.setDoesNotThrow();
+    return &CI;
+  }
+  
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+  if (!II) return visitCallSite(&CI);
+
+  // Intrinsics cannot occur in an invoke, so handle them here instead of in
+  // visitCallSite.
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+    bool Changed = false;
+
+    // memmove/cpy/set of zero bytes is a noop.
+    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+      if (NumBytes->isNullValue())
+        return EraseInstFromFunction(CI);
+
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+        if (CI->getZExtValue() == 1) {
+          // Replace the instruction with just byte operations.  We would
+          // transform other cases to loads/stores, but we don't know if
+          // alignment is sufficient.
+        }
+    }
+    
+    // No other transformations apply to volatile transfers.
+    if (MI->isVolatile())
+      return 0;
+
+    // If we have a memmove and the source operation is a constant global,
+    // then the source and dest pointers can't alias, so we can change this
+    // into a call to memcpy.
+    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+        if (GVSrc->isConstant()) {
+          Module *M = CI.getParent()->getParent()->getParent();
+          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+                           CI.getArgOperand(1)->getType(),
+                           CI.getArgOperand(2)->getType() };
+          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
+          Changed = true;
+        }
+    }
+
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+      // memmove(x,x,size) -> noop.
+      if (MTI->getSource() == MTI->getDest())
+        return EraseInstFromFunction(CI);
+    }
+
+    // If we can determine a pointer alignment that is bigger than currently
+    // set, update the alignment.
+    if (isa<MemTransferInst>(MI)) {
+      if (Instruction *I = SimplifyMemTransfer(MI))
+        return I;
+    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+      if (Instruction *I = SimplifyMemSet(MSI))
+        return I;
+    }
+
+    if (Changed) return II;
+  }
+  
+  switch (II->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::objectsize: {
+    // We need target data for just about everything so depend on it.
+    if (!TD) break;
+    
+    const Type *ReturnTy = CI.getType();
+    uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
+
+    // Get to the real allocated thing and offset as fast as possible.
+    Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
+
+    uint64_t Offset = 0;
+    uint64_t Size = -1ULL;
+
+    // Try to look through constant GEPs.
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) {
+      if (!GEP->hasAllConstantIndices()) break;
+
+      // Get the current byte offset into the thing. Use the original
+      // operand in case we're looking through a bitcast.
+      SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+      Offset = TD->getIndexedOffset(GEP->getPointerOperandType(),
+                                    Ops.data(), Ops.size());
+
+      Op1 = GEP->getPointerOperand()->stripPointerCasts();
+
+      // Make sure we're not a constant offset from an external
+      // global.
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1))
+        if (!GV->hasDefinitiveInitializer()) break;
+    }
+
+    // If we've stripped down to a single global variable that we
+    // can know the size of then just return that.
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
+      if (GV->hasDefinitiveInitializer()) {
+        Constant *C = GV->getInitializer();
+        Size = TD->getTypeAllocSize(C->getType());
+      } else {
+        // Can't determine size of the GV.
+        Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow);
+        return ReplaceInstUsesWith(CI, RetVal);
+      }
+    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+      // Get alloca size.
+      if (AI->getAllocatedType()->isSized()) {
+        Size = TD->getTypeAllocSize(AI->getAllocatedType());
+        if (AI->isArrayAllocation()) {
+          const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
+          if (!C) break;
+          Size *= C->getZExtValue();
+        }
+      }
+    } else if (CallInst *MI = extractMallocCall(Op1)) {
+      // Get allocation size.
+      const Type* MallocType = getMallocAllocatedType(MI);
+      if (MallocType && MallocType->isSized())
+        if (Value *NElems = getMallocArraySize(MI, TD, true))
+          if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+            Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType);
+    }
+
+    // Do not return "I don't know" here. Later optimization passes could
+    // make it possible to evaluate objectsize to a constant.
+    if (Size == -1ULL)
+      break;
+
+    if (Size < Offset) {
+      // Out of bound reference? Negative index normalized to large
+      // index? Just return "I don't know".
+      return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow));
+    }
+    return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset));
+  }
+  case Intrinsic::bswap:
+    // bswap(bswap(x)) -> x
+    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
+      if (Operand->getIntrinsicID() == Intrinsic::bswap)
+        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
+      
+    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
+    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
+      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
+        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
+          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
+                       TI->getType()->getPrimitiveSizeInBits();
+          Value *CV = ConstantInt::get(Operand->getType(), C);
+          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
+          return new TruncInst(V, TI->getType());
+        }
+    }
+      
+    break;
+  case Intrinsic::powi:
+    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // powi(x, 0) -> 1.0
+      if (Power->isZero())
+        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+      // powi(x, 1) -> x
+      if (Power->isOne())
+        return ReplaceInstUsesWith(CI, II->getArgOperand(0));
+      // powi(x, -1) -> 1/x
+      if (Power->isAllOnesValue())
+        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+                                          II->getArgOperand(0));
+    }
+    break;
+  case Intrinsic::cttz: {
+    // If all bits below the first known one are known zero,
+    // this value is constant.
+    const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+    // FIXME: Try to simplify vectors of integers.
+    if (!IT) break;
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+                      KnownZero, KnownOne);
+    unsigned TrailingZeros = KnownOne.countTrailingZeros();
+    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
+    if ((Mask & KnownZero) == Mask)
+      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+                                 APInt(BitWidth, TrailingZeros)));
+    
+    }
+    break;
+  case Intrinsic::ctlz: {
+    // If all bits above the first known one are known zero,
+    // this value is constant.
+    const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+    // FIXME: Try to simplify vectors of integers.
+    if (!IT) break;
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+                      KnownZero, KnownOne);
+    unsigned LeadingZeros = KnownOne.countLeadingZeros();
+    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
+    if ((Mask & KnownZero) == Mask)
+      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+                                 APInt(BitWidth, LeadingZeros)));
+    
+    }
+    break;
+  case Intrinsic::uadd_with_overflow: {
+    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt Mask = APInt::getSignBit(BitWidth);
+    APInt LHSKnownZero(BitWidth, 0);
+    APInt LHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
+    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
+
+    if (LHSKnownNegative || LHSKnownPositive) {
+      APInt RHSKnownZero(BitWidth, 0);
+      APInt RHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
+      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
+      if (LHSKnownNegative && RHSKnownNegative) {
+        // The sign bit is set in both cases: this MUST overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Value *Add = Builder->CreateAdd(LHS, RHS);
+        Add->takeName(&CI);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()),
+          ConstantInt::getTrue(II->getContext())
+        };
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+
+      if (LHSKnownPositive && RHSKnownPositive) {
+        // The sign bit is clear in both cases: this CANNOT overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Value *Add = Builder->CreateNUWAdd(LHS, RHS);
+        Add->takeName(&CI);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+    }
+  }
+  // FALL THROUGH uadd into sadd
+  case Intrinsic::sadd_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getArgOperand(0)) &&
+        !isa<Constant>(II->getArgOperand(1))) {
+      Value *LHS = II->getArgOperand(0);
+      II->setArgOperand(0, II->getArgOperand(1));
+      II->setArgOperand(1, LHS);
+      return II;
+    }
+
+    // X + undef -> undef
+    if (isa<UndefValue>(II->getArgOperand(1)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // X + 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getArgOperand(0)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct =
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+      }
+    }
+    break;
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+    // undef - X -> undef
+    // X - undef -> undef
+    if (isa<UndefValue>(II->getArgOperand(0)) ||
+        isa<UndefValue>(II->getArgOperand(1)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // X - 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getArgOperand(0)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+      }
+    }
+    break;
+  case Intrinsic::umul_with_overflow: {
+    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+    unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+
+    APInt LHSKnownZero(BitWidth, 0);
+    APInt LHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    APInt RHSKnownZero(BitWidth, 0);
+    APInt RHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+    // Get the largest possible values for each operand.
+    APInt LHSMax = ~LHSKnownZero;
+    APInt RHSMax = ~RHSKnownZero;
+
+    // If multiplying the maximum values does not overflow then we can turn
+    // this into a plain NUW mul.
+    bool Overflow;
+    LHSMax.umul_ov(RHSMax, Overflow);
+    if (!Overflow) {
+      Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
+      Constant *V[] = {
+        UndefValue::get(LHS->getType()),
+        Builder->getFalse()
+      };
+      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
+      return InsertValueInst::Create(Struct, Mul, 0);
+    }
+  } // FALL THROUGH
+  case Intrinsic::smul_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getArgOperand(0)) &&
+        !isa<Constant>(II->getArgOperand(1))) {
+      Value *LHS = II->getArgOperand(0);
+      II->setArgOperand(0, II->getArgOperand(1));
+      II->setArgOperand(1, LHS);
+      return II;
+    }
+
+    // X * undef -> undef
+    if (isa<UndefValue>(II->getArgOperand(1)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // X*0 -> {0, false}
+      if (RHSI->isZero())
+        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+      
+      // X * 1 -> {X, false}
+      if (RHSI->equalsInt(1)) {
+        Constant *V[] = {
+          UndefValue::get(II->getArgOperand(0)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+      }
+    }
+    break;
+  case Intrinsic::ppc_altivec_lvx:
+  case Intrinsic::ppc_altivec_lvxl:
+    // Turn PPC lvx -> load if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+                                         PointerType::getUnqual(II->getType()));
+      return new LoadInst(Ptr);
+    }
+    break;
+  case Intrinsic::ppc_altivec_stvx:
+  case Intrinsic::ppc_altivec_stvxl:
+    // Turn stvx -> store if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getArgOperand(0)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      return new StoreInst(II->getArgOperand(0), Ptr);
+    }
+    break;
+  case Intrinsic::x86_sse_storeu_ps:
+  case Intrinsic::x86_sse2_storeu_pd:
+  case Intrinsic::x86_sse2_storeu_dq:
+    // Turn X86 storeu -> store if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getArgOperand(1)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+      return new StoreInst(II->getArgOperand(1), Ptr);
+    }
+    break;
+
+  case Intrinsic::x86_sse_cvtss2si:
+  case Intrinsic::x86_sse_cvtss2si64:
+  case Intrinsic::x86_sse_cvttss2si:
+  case Intrinsic::x86_sse_cvttss2si64:
+  case Intrinsic::x86_sse2_cvtsd2si:
+  case Intrinsic::x86_sse2_cvtsd2si64:
+  case Intrinsic::x86_sse2_cvttsd2si:
+  case Intrinsic::x86_sse2_cvttsd2si64: {
+    // These intrinsics only demand the 0th element of their input vectors. If
+    // we can simplify the input based on that, do so now.
+    unsigned VWidth =
+      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+    APInt DemandedElts(VWidth, 1);
+    APInt UndefElts(VWidth, 0);
+    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
+                                              DemandedElts, UndefElts)) {
+      II->setArgOperand(0, V);
+      return II;
+    }
+    break;
+  }
+
+
+  case Intrinsic::x86_sse41_pmovsxbw:
+  case Intrinsic::x86_sse41_pmovsxwd:
+  case Intrinsic::x86_sse41_pmovsxdq:
+  case Intrinsic::x86_sse41_pmovzxbw:
+  case Intrinsic::x86_sse41_pmovzxwd:
+  case Intrinsic::x86_sse41_pmovzxdq: {
+    // pmov{s|z}x ignores the upper half of their input vectors.
+    unsigned VWidth =
+      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+    unsigned LowHalfElts = VWidth / 2;
+    APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
+    APInt UndefElts(VWidth, 0);
+    if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
+                                                 InputDemandedElts,
+                                                 UndefElts)) {
+      II->setArgOperand(0, TmpV);
+      return II;
+    }
+    break;
+  }
+
+  case Intrinsic::ppc_altivec_vperm:
+    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
+      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+      
+      // Check that all of the elements are integer constants or undefs.
+      bool AllEltsOk = true;
+      for (unsigned i = 0; i != 16; ++i) {
+        if (!isa<ConstantInt>(Mask->getOperand(i)) && 
+            !isa<UndefValue>(Mask->getOperand(i))) {
+          AllEltsOk = false;
+          break;
+        }
+      }
+      
+      if (AllEltsOk) {
+        // Cast the input vectors to byte vectors.
+        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+                                            Mask->getType());
+        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+                                            Mask->getType());
+        Value *Result = UndefValue::get(Op0->getType());
+        
+        // Only extract each element once.
+        Value *ExtractedElts[32];
+        memset(ExtractedElts, 0, sizeof(ExtractedElts));
+        
+        for (unsigned i = 0; i != 16; ++i) {
+          if (isa<UndefValue>(Mask->getOperand(i)))
+            continue;
+          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+          Idx &= 31;  // Match the hardware behavior.
+          
+          if (ExtractedElts[Idx] == 0) {
+            ExtractedElts[Idx] = 
+              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 
+                  ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                   Idx&15, false), "tmp");
+          }
+        
+          // Insert this value into the result vector.
+          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+                         ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                          i, false), "tmp");
+        }
+        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+      }
+    }
+    break;
+
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane:
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+    unsigned AlignArg = II->getNumArgOperands() - 1;
+    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+      II->setArgOperand(AlignArg,
+                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                         MemAlign, false));
+      return II;
+    }
+    break;
+  }
+
+  case Intrinsic::stackrestore: {
+    // If the save is right next to the restore, remove the restore.  This can
+    // happen when variable allocas are DCE'd.
+    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+        BasicBlock::iterator BI = SS;
+        if (&*++BI == II)
+          return EraseInstFromFunction(CI);
+      }
+    }
+    
+    // Scan down this block to see if there is another stack restore in the
+    // same block without an intervening call/alloca.
+    BasicBlock::iterator BI = II;
+    TerminatorInst *TI = II->getParent()->getTerminator();
+    bool CannotRemove = false;
+    for (++BI; &*BI != TI; ++BI) {
+      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
+        CannotRemove = true;
+        break;
+      }
+      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+          // If there is a stackrestore below this one, remove this one.
+          if (II->getIntrinsicID() == Intrinsic::stackrestore)
+            return EraseInstFromFunction(CI);
+          // Otherwise, ignore the intrinsic.
+        } else {
+          // If we found a non-intrinsic call, we can't remove the stack
+          // restore.
+          CannotRemove = true;
+          break;
+        }
+      }
+    }
+    
+    // If the stack restore is in a return/unwind block and if there are no
+    // allocas or calls between the restore and the return, nuke the restore.
+    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+      return EraseInstFromFunction(CI);
+    break;
+  }
+  }
+
+  return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+  return visitCallSite(&II);
+}
+
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value 
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+                                         const CastInst * const CI,
+                                         const TargetData * const TD,
+                                         const int ix) {
+  if (!CI->isLosslessCast())
+    return false;
+
+  // The size of ByVal arguments is derived from the type, so we
+  // can't change to a type with a different size.  If the size were
+  // passed explicitly we could avoid this check.
+  if (!CS.paramHasAttr(ix, Attribute::ByVal))
+    return true;
+
+  const Type* SrcTy = 
+            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+  if (!SrcTy->isSized() || !DstTy->isSized())
+    return false;
+  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+    return false;
+  return true;
+}
+
+namespace {
+class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+  InstCombiner *IC;
+protected:
+  void replaceCall(Value *With) {
+    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
+  }
+  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+      return true;
+    if (ConstantInt *SizeCI =
+                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+      if (SizeCI->isAllOnesValue())
+        return true;
+      if (isString) {
+        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+        // If the length is 0 we don't know how long it is and so we can't
+        // remove the check.
+        if (Len == 0) return false;
+        return SizeCI->getZExtValue() >= Len;
+      }
+      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+                                                  CI->getArgOperand(SizeArgOp)))
+        return SizeCI->getZExtValue() >= Arg->getZExtValue();
+    }
+    return false;
+  }
+public:
+  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
+  Instruction *NewInstruction;
+};
+} // end anonymous namespace
+
+// Try to fold some different type of calls here.
+// Currently we're only working with the checking functions, memcpy_chk, 
+// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
+// strcat_chk and strncat_chk.
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
+  if (CI->getCalledFunction() == 0) return 0;
+
+  InstCombineFortifiedLibCalls Simplifier(this);
+  Simplifier.fold(CI, TD);
+  return Simplifier.NewInstruction;
+}
+
+// visitCallSite - Improvements for call and invoke instructions.
+//
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+  bool Changed = false;
+
+  // If the callee is a pointer to a function, attempt to move any casts to the
+  // arguments of the call/invoke.
+  Value *Callee = CS.getCalledValue();
+  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+    return 0;
+
+  if (Function *CalleeF = dyn_cast<Function>(Callee))
+    // If the call and callee calling conventions don't match, this call must
+    // be unreachable, as the call is undefined.
+    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+        // Only do this for calls to a function with a body.  A prototype may
+        // not actually end up matching the implementation's calling conv for a
+        // variety of reasons (e.g. it may be written in assembly).
+        !CalleeF->isDeclaration()) {
+      Instruction *OldCall = CS.getInstruction();
+      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 
+                                  OldCall);
+      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!OldCall->getType()->isVoidTy())
+        ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
+      if (isa<CallInst>(OldCall))
+        return EraseInstFromFunction(*OldCall);
+      
+      // We cannot remove an invoke, because it would change the CFG, just
+      // change the callee to a null pointer.
+      cast<InvokeInst>(OldCall)->setCalledFunction(
+                                    Constant::getNullValue(CalleeF->getType()));
+      return 0;
+    }
+
+  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+    // This instruction is not reachable, just remove it.  We insert a store to
+    // undef so that we know that this code is not reachable, despite the fact
+    // that we can't modify the CFG here.
+    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+               UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+                  CS.getInstruction());
+
+    // If CS does not return void then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!CS.getInstruction()->getType()->isVoidTy())
+      ReplaceInstUsesWith(*CS.getInstruction(),
+                          UndefValue::get(CS.getInstruction()->getType()));
+
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+      // Don't break the CFG, insert a dummy cond branch.
+      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+                         ConstantInt::getTrue(Callee->getContext()), II);
+    }
+    return EraseInstFromFunction(*CS.getInstruction());
+  }
+
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+        return transformCallThroughTrampoline(CS);
+
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  if (FTy->isVarArg()) {
+    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+    // See if we can optimize any arguments passed through the varargs area of
+    // the call.
+    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+           E = CS.arg_end(); I != E; ++I, ++ix) {
+      CastInst *CI = dyn_cast<CastInst>(*I);
+      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+        *I = CI->getOperand(0);
+        Changed = true;
+      }
+    }
+  }
+
+  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+    // Inline asm calls cannot throw - mark them 'nounwind'.
+    CS.setDoesNotThrow();
+    Changed = true;
+  }
+
+  // Try to optimize the call if possible, we require TargetData for most of
+  // this.  None of these calls are seen as possibly dead so go ahead and
+  // delete the instruction now.
+  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+    Instruction *I = tryOptimizeCall(CI, TD);
+    // If we changed something return the result, etc. Otherwise let
+    // the fallthrough check.
+    if (I) return EraseInstFromFunction(*I);
+  }
+
+  return Changed ? CS.getInstruction() : 0;
+}
+
+// transformConstExprCastCall - If the callee is a constexpr cast of a function,
+// attempt to move the cast to the arguments of the call/invoke.
+//
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+  Function *Callee =
+    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+  if (Callee == 0)
+    return false;
+  Instruction *Caller = CS.getInstruction();
+  const AttrListPtr &CallerPAL = CS.getAttributes();
+
+  // Okay, this is a cast from a function to a different type.  Unless doing so
+  // would cause a type conversion of one of our arguments, change this call to
+  // be a direct call with arguments casted to the appropriate types.
+  //
+  const FunctionType *FT = Callee->getFunctionType();
+  const Type *OldRetTy = Caller->getType();
+  const Type *NewRetTy = FT->getReturnType();
+
+  if (NewRetTy->isStructTy())
+    return false; // TODO: Handle multiple return values.
+
+  // Check to see if we are changing the return type...
+  if (OldRetTy != NewRetTy) {
+    if (Callee->isDeclaration() &&
+        // Conversion is ok if changing from one pointer type to another or from
+        // a pointer to an integer of the same size.
+        !((OldRetTy->isPointerTy() || !TD ||
+           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+          (NewRetTy->isPointerTy() || !TD ||
+           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+      return false;   // Cannot transform this return value.
+
+    if (!Caller->use_empty() &&
+        // void -> non-void is handled specially
+        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
+      return false;   // Cannot transform this return value.
+
+    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+      Attributes RAttrs = CallerPAL.getRetAttributes();
+      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+        return false;   // Attribute not compatible with transformed value.
+    }
+
+    // If the callsite is an invoke instruction, and the return value is used by
+    // a PHI node in a successor, we cannot change the return type of the call
+    // because there is no place to put the cast instruction (without breaking
+    // the critical edge).  Bail out in this case.
+    if (!Caller->use_empty())
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+             UI != E; ++UI)
+          if (PHINode *PN = dyn_cast<PHINode>(*UI))
+            if (PN->getParent() == II->getNormalDest() ||
+                PN->getParent() == II->getUnwindDest())
+              return false;
+  }
+
+  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+  CallSite::arg_iterator AI = CS.arg_begin();
+  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+    const Type *ParamTy = FT->getParamType(i);
+    const Type *ActTy = (*AI)->getType();
+
+    if (!CastInst::isCastable(ActTy, ParamTy))
+      return false;   // Cannot transform this parameter value.
+
+    unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+    if (Attrs & Attribute::typeIncompatible(ParamTy))
+      return false;   // Attribute not compatible with transformed value.
+    
+    // If the parameter is passed as a byval argument, then we have to have a
+    // sized type and the sized type has to have the same size as the old type.
+    if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
+      const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+      if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+        return false;
+      
+      const Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+      if (TD->getTypeAllocSize(CurElTy) !=
+          TD->getTypeAllocSize(ParamPTy->getElementType()))
+        return false;
+    }
+
+    // Converting from one pointer type to another or between a pointer and an
+    // integer of the same size is safe even if we do not have a body.
+    bool isConvertible = ActTy == ParamTy ||
+      (TD && ((ParamTy->isPointerTy() ||
+      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+              (ActTy->isPointerTy() ||
+              ActTy == TD->getIntPtrType(Caller->getContext()))));
+    if (Callee->isDeclaration() && !isConvertible) return false;
+  }
+
+  if (Callee->isDeclaration()) {
+    // Do not delete arguments unless we have a function body.
+    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
+      return false;
+
+    // If the callee is just a declaration, don't change the varargsness of the
+    // call.  We don't want to introduce a varargs call where one doesn't
+    // already exist.
+    const PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
+      return false;
+  }
+      
+  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+      !CallerPAL.isEmpty())
+    // In this case we have more arguments than the new function type, but we
+    // won't be dropping them.  Check that these extra arguments have attributes
+    // that are compatible with being a vararg call argument.
+    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+        break;
+      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+      if (PAttrs & Attribute::VarArgsIncompatible)
+        return false;
+    }
+
+  
+  // Okay, we decided that this is a safe thing to do: go ahead and start
+  // inserting cast instructions as necessary.
+  std::vector<Value*> Args;
+  Args.reserve(NumActualArgs);
+  SmallVector<AttributeWithIndex, 8> attrVec;
+  attrVec.reserve(NumCommonArgs);
+
+  // Get any return attributes.
+  Attributes RAttrs = CallerPAL.getRetAttributes();
+
+  // If the return value is not being used, the type may not be compatible
+  // with the existing attributes.  Wipe out any problematic attributes.
+  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+
+  // Add the new return attributes.
+  if (RAttrs)
+    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+  AI = CS.arg_begin();
+  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+    const Type *ParamTy = FT->getParamType(i);
+    if ((*AI)->getType() == ParamTy) {
+      Args.push_back(*AI);
+    } else {
+      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
+          false, ParamTy, false);
+      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
+    }
+
+    // Add any parameter attributes.
+    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+  }
+
+  // If the function takes more arguments than the call was taking, add them
+  // now.
+  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+  // If we are removing arguments to the function, emit an obnoxious warning.
+  if (FT->getNumParams() < NumActualArgs) {
+    if (!FT->isVarArg()) {
+      errs() << "WARNING: While resolving call to function '"
+             << Callee->getName() << "' arguments were dropped!\n";
+    } else {
+      // Add all of the arguments in their promoted form to the arg list.
+      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+        const Type *PTy = getPromotedType((*AI)->getType());
+        if (PTy != (*AI)->getType()) {
+          // Must promote to pass through va_arg area!
+          Instruction::CastOps opcode =
+            CastInst::getCastOpcode(*AI, false, PTy, false);
+          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
+        } else {
+          Args.push_back(*AI);
+        }
+
+        // Add any parameter attributes.
+        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+      }
+    }
+  }
+
+  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
+    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  if (NewRetTy->isVoidTy())
+    Caller->setName("");   // Void type should not have a name.
+
+  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
+                                                     attrVec.end());
+
+  Instruction *NC;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+    NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
+                               II->getUnwindDest(), Args);
+    NC->takeName(II);
+    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+  } else {
+    CallInst *CI = cast<CallInst>(Caller);
+    NC = Builder->CreateCall(Callee, Args);
+    NC->takeName(CI);
+    if (CI->isTailCall())
+      cast<CallInst>(NC)->setTailCall();
+    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+  }
+
+  // Insert a cast of the return type as necessary.
+  Value *NV = NC;
+  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+    if (!NV->getType()->isVoidTy()) {
+      Instruction::CastOps opcode =
+        CastInst::getCastOpcode(NC, false, OldRetTy, false);
+      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+      NC->setDebugLoc(Caller->getDebugLoc());
+
+      // If this is an invoke instruction, we should insert it after the first
+      // non-phi, instruction in the normal successor block.
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+        InsertNewInstBefore(NC, *I);
+      } else {
+        // Otherwise, it's a call, just insert cast right after the call.
+        InsertNewInstBefore(NC, *Caller);
+      }
+      Worklist.AddUsersToWorkList(*Caller);
+    } else {
+      NV = UndefValue::get(Caller->getType());
+    }
+  }
+
+  if (!Caller->use_empty())
+    ReplaceInstUsesWith(*Caller, NV);
+
+  EraseInstFromFunction(*Caller);
+  return true;
+}
+
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+  Value *Callee = CS.getCalledValue();
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  const AttrListPtr &Attrs = CS.getAttributes();
+
+  // If the call already has the 'nest' attribute somewhere then give up -
+  // otherwise 'nest' would occur twice after splicing in the chain.
+  if (Attrs.hasAttrSomewhere(Attribute::Nest))
+    return 0;
+
+  IntrinsicInst *Tramp =
+    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+  const AttrListPtr &NestAttrs = NestF->getAttributes();
+  if (!NestAttrs.isEmpty()) {
+    unsigned NestIdx = 1;
+    Type *NestTy = 0;
+    Attributes NestAttr = Attribute::None;
+
+    // Look for a parameter marked with the 'nest' attribute.
+    for (FunctionType::param_iterator I = NestFTy->param_begin(),
+         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
+        // Record the parameter type and any other attributes.
+        NestTy = *I;
+        NestAttr = NestAttrs.getParamAttributes(NestIdx);
+        break;
+      }
+
+    if (NestTy) {
+      Instruction *Caller = CS.getInstruction();
+      std::vector<Value*> NewArgs;
+      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+      SmallVector<AttributeWithIndex, 8> NewAttrs;
+      NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+      // Insert the nest argument into the call argument list, which may
+      // mean appending it.  Likewise for attributes.
+
+      // Add any result attributes.
+      if (Attributes Attr = Attrs.getRetAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+
+      {
+        unsigned Idx = 1;
+        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+        do {
+          if (Idx == NestIdx) {
+            // Add the chain argument and attributes.
+            Value *NestVal = Tramp->getArgOperand(2);
+            if (NestVal->getType() != NestTy)
+              NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
+            NewArgs.push_back(NestVal);
+            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+          }
+
+          if (I == E)
+            break;
+
+          // Add the original argument and attributes.
+          NewArgs.push_back(*I);
+          if (Attributes Attr = Attrs.getParamAttributes(Idx))
+            NewAttrs.push_back
+              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Add any function attributes.
+      if (Attributes Attr = Attrs.getFnAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+
+      // The trampoline may have been bitcast to a bogus type (FTy).
+      // Handle this by synthesizing a new function type, equal to FTy
+      // with the chain parameter inserted.
+
+      std::vector<Type*> NewTypes;
+      NewTypes.reserve(FTy->getNumParams()+1);
+
+      // Insert the chain's type into the list of parameter types, which may
+      // mean appending it.
+      {
+        unsigned Idx = 1;
+        FunctionType::param_iterator I = FTy->param_begin(),
+          E = FTy->param_end();
+
+        do {
+          if (Idx == NestIdx)
+            // Add the chain's type.
+            NewTypes.push_back(NestTy);
+
+          if (I == E)
+            break;
+
+          // Add the original type.
+          NewTypes.push_back(*I);
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Replace the trampoline call with a direct call.  Let the generic
+      // code sort out any function type mismatches.
+      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 
+                                                FTy->isVarArg());
+      Constant *NewCallee =
+        NestF->getType() == PointerType::getUnqual(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, 
+                                         PointerType::getUnqual(NewFTy));
+      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
+                                                   NewAttrs.end());
+
+      Instruction *NewCaller;
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        NewCaller = InvokeInst::Create(NewCallee,
+                                       II->getNormalDest(), II->getUnwindDest(),
+                                       NewArgs);
+        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+      } else {
+        NewCaller = CallInst::Create(NewCallee, NewArgs);
+        if (cast<CallInst>(Caller)->isTailCall())
+          cast<CallInst>(NewCaller)->setTailCall();
+        cast<CallInst>(NewCaller)->
+          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+      }
+
+      return NewCaller;
+    }
+  }
+
+  // Replace the trampoline call with a direct call.  Since there is no 'nest'
+  // parameter, there is no need to adjust the argument list.  Let the generic
+  // code sort out any function type mismatches.
+  Constant *NewCallee =
+    NestF->getType() == PTy ? NestF : 
+                              ConstantExpr::getBitCast(NestF, PTy);
+  CS.setCalledFunction(NewCallee);
+  return CS.getInstruction();
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
new file mode 100644
index 000000000000..82c734e0b829
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -0,0 +1,1771 @@
+//===- InstCombineCasts.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for cast operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
+/// expression.  If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+                                        uint64_t &Offset) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    Offset = CI->getZExtValue();
+    Scale  = 0;
+    return ConstantInt::get(Val->getType(), 0);
+  }
+  
+  if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    // Cannot look past anything that might overflow.
+    OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+    if (OBI && !OBI->hasNoUnsignedWrap()) {
+      Scale = 1;
+      Offset = 0;
+      return Val;
+    }
+
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (I->getOpcode() == Instruction::Shl) {
+        // This is a value scaled by '1 << the shift amt'.
+        Scale = UINT64_C(1) << RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      }
+      
+      if (I->getOpcode() == Instruction::Mul) {
+        // This value is scaled by 'RHS'.
+        Scale = RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      }
+      
+      if (I->getOpcode() == Instruction::Add) {
+        // We have X+C.  Check to see if we really have (X*C2)+C1, 
+        // where C1 is divisible by C2.
+        unsigned SubScale;
+        Value *SubVal = 
+          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+        Offset += RHS->getZExtValue();
+        Scale = SubScale;
+        return SubVal;
+      }
+    }
+  }
+
+  // Otherwise, we can't look past this.
+  Scale = 1;
+  Offset = 0;
+  return Val;
+}
+
+/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
+/// try to eliminate the cast by moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+                                                   AllocaInst &AI) {
+  // This requires TargetData to get the alloca alignment and size information.
+  if (!TD) return 0;
+
+  const PointerType *PTy = cast<PointerType>(CI.getType());
+  
+  BuilderTy AllocaBuilder(*Builder);
+  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+
+  // Get the type really allocated and the type casted to.
+  const Type *AllocElTy = AI.getAllocatedType();
+  const Type *CastElTy = PTy->getElementType();
+  if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+
+  unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
+  unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+  if (CastElTyAlign < AllocElTyAlign) return 0;
+
+  // If the allocation has multiple uses, only promote it if we are strictly
+  // increasing the alignment of the resultant allocation.  If we keep it the
+  // same, we open the door to infinite loops of various kinds.
+  if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
+
+  uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
+  uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+  if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+
+  // See if we can satisfy the modulus by pulling a scale out of the array
+  // size argument.
+  unsigned ArraySizeScale;
+  uint64_t ArrayOffset;
+  Value *NumElements = // See if the array size is a decomposable linear expr.
+    DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+ 
+  // If we can now satisfy the modulus, by using a non-1 scale, we really can
+  // do the xform.
+  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+      (AllocElTySize*ArrayOffset   ) % CastElTySize != 0) return 0;
+
+  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+  Value *Amt = 0;
+  if (Scale == 1) {
+    Amt = NumElements;
+  } else {
+    Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
+    // Insert before the alloca, not before the cast.
+    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+  }
+  
+  if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+    Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
+                                  Offset, true);
+    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+  }
+  
+  AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  New->setAlignment(AI.getAlignment());
+  New->takeName(&AI);
+  
+  // If the allocation has multiple real uses, insert a cast and change all
+  // things that used it to use the new cast.  This will also hack on CI, but it
+  // will die soon.
+  if (!AI.hasOneUse()) {
+    // New is the allocation instruction, pointer typed. AI is the original
+    // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+    Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
+    ReplaceInstUsesWith(AI, NewCast);
+  }
+  return ReplaceInstUsesWith(CI, New);
+}
+
+
+
+/// EvaluateInDifferentType - Given an expression that 
+/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
+/// insert the code to evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
+                                             bool isSigned) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+    // If we got a constantexpr back, try to simplify it with TD info.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+      C = ConstantFoldConstantExpression(CE, TD);
+    return C;
+  }
+
+  // Otherwise, it must be an instruction.
+  Instruction *I = cast<Instruction>(V);
+  Instruction *Res = 0;
+  unsigned Opc = I->getOpcode();
+  switch (Opc) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::AShr:
+  case Instruction::LShr:
+  case Instruction::Shl:
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+    Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+    Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+    break;
+  }    
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    // If the source type of the cast is the type we're trying for then we can
+    // just return the source.  There's no need to insert it because it is not
+    // new.
+    if (I->getOperand(0)->getType() == Ty)
+      return I->getOperand(0);
+    
+    // Otherwise, must be the same type of cast, so just reinsert a new one.
+    // This also handles the case of zext(trunc(x)) -> zext(x).
+    Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
+                                      Opc == Instruction::SExt);
+    break;
+  case Instruction::Select: {
+    Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+    Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+    Res = SelectInst::Create(I->getOperand(0), True, False);
+    break;
+  }
+  case Instruction::PHI: {
+    PHINode *OPN = cast<PHINode>(I);
+    PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
+    for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+      Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+      NPN->addIncoming(V, OPN->getIncomingBlock(i));
+    }
+    Res = NPN;
+    break;
+  }
+  default: 
+    // TODO: Can handle more cases here.
+    llvm_unreachable("Unreachable!");
+    break;
+  }
+  
+  Res->takeName(I);
+  return InsertNewInstWith(Res, *I);
+}
+
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps 
+isEliminableCastPair(
+  const CastInst *CI, ///< The first cast instruction
+  unsigned opcode,       ///< The opcode of the second cast instruction
+  const Type *DstTy,     ///< The target type for the second cast instruction
+  TargetData *TD         ///< The target data for pointer size
+) {
+
+  const Type *SrcTy = CI->getOperand(0)->getType();   // A from above
+  const Type *MidTy = CI->getType();                  // B from above
+
+  // Get the opcodes of the two Cast instructions
+  Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+  Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+
+  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+                                                DstTy,
+                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);
+  
+  // We don't want to form an inttoptr or ptrtoint that converts to an integer
+  // type that differs from the pointer size.
+  if ((Res == Instruction::IntToPtr &&
+          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+      (Res == Instruction::PtrToInt &&
+          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+    Res = 0;
+  
+  return Instruction::CastOps(Res);
+}
+
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+                                      const Type *Ty) {
+  // Noop casts and casts of constants should be eliminated trivially.
+  if (V->getType() == Ty || isa<Constant>(V)) return false;
+  
+  // If this is another cast that can be eliminated, we prefer to have it
+  // eliminated.
+  if (const CastInst *CI = dyn_cast<CastInst>(V))
+    if (isEliminableCastPair(CI, opc, Ty, TD))
+      return false;
+  
+  // If this is a vector sext from a compare, then we don't want to break the
+  // idiom where each element of the extended vector is either zero or all ones.
+  if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+    return false;
+  
+  return true;
+}
+
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+
+  // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+  // eliminate it now.
+  if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
+    if (Instruction::CastOps opc = 
+        isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+      // The first cast (CSrc) is eliminable so we need to fix up or replace
+      // the second cast (CI). CSrc will then have a good chance of being dead.
+      return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+    }
+  }
+
+  // If we are casting a select then fold the cast into the select
+  if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+    if (Instruction *NV = FoldOpIntoSelect(CI, SI))
+      return NV;
+
+  // If we are casting a PHI then fold the cast into the PHI
+  if (isa<PHINode>(Src)) {
+    // We don't do this if this would create a PHI node with an illegal type if
+    // it is currently legal.
+    if (!Src->getType()->isIntegerTy() ||
+        !CI.getType()->isIntegerTy() ||
+        ShouldChangeType(CI.getType(), Src->getType()))
+      if (Instruction *NV = FoldOpIntoPhi(CI))
+        return NV;
+  }
+  
+  return 0;
+}
+
+/// CanEvaluateTruncated - Return true if we can evaluate the specified
+/// expression tree as type Ty instead of its larger type, and arrive with the
+/// same value.  This is used by code that tries to eliminate truncates.
+///
+/// Ty will always be a type smaller than V.  We should return true if trunc(V)
+/// can be computed by computing V in the smaller type.  If V is an instruction,
+/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
+/// makes sense if x and y can be efficiently truncated.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
+  // We can always evaluate constants in another type.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  const Type *OrigTy = V->getType();
+  
+  // If this is an extension from the dest type, we can eliminate it, even if it
+  // has multiple uses.
+  if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 
+      I->getOperand(0)->getType() == Ty)
+    return true;
+
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
+  unsigned Opc = I->getOpcode();
+  switch (Opc) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // These operators can all arbitrarily be extended or truncated.
+    return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+           CanEvaluateTruncated(I->getOperand(1), Ty);
+
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    // UDiv and URem can be truncated if all the truncated bits are zero.
+    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    if (BitWidth < OrigBitWidth) {
+      APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+      if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+          MaskedValueIsZero(I->getOperand(1), Mask)) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+               CanEvaluateTruncated(I->getOperand(1), Ty);
+      }
+    }
+    break;
+  }
+  case Instruction::Shl:
+    // If we are truncating the result of this SHL, and if it's a shift of a
+    // constant amount, we can always perform a SHL in a smaller type.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (CI->getLimitedValue(BitWidth) < BitWidth)
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+    }
+    break;
+  case Instruction::LShr:
+    // If this is a truncate of a logical shr, we can truncate it to a smaller
+    // lshr iff we know that the bits we would otherwise be shifting in are
+    // already zeros.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(0),
+            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+          CI->getLimitedValue(BitWidth) < BitWidth) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+      }
+    }
+    break;
+  case Instruction::Trunc:
+    // trunc(trunc(x)) -> trunc(x)
+    return true;
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+    // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+    return true;
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
+           CanEvaluateTruncated(SI->getFalseValue(), Ty);
+  }
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+        return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    break;
+  }
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+  if (Instruction *Result = commonCastTransforms(CI))
+    return Result;
+  
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+  
+  // Attempt to truncate the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateTruncated(Src, DestTy)) {
+      
+    // If this cast is a truncate, evaluting in a different type always
+    // eliminates the cast, so it is always a win.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid cast: " << CI << '\n');
+    Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+    assert(Res->getType() == DestTy);
+    return ReplaceInstUsesWith(CI, Res);
+  }
+
+  // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
+  if (DestTy->getScalarSizeInBits() == 1) {
+    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Src = Builder->CreateAnd(Src, One, "tmp");
+    Value *Zero = Constant::getNullValue(Src->getType());
+    return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+  }
+  
+  // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+  Value *A = 0; ConstantInt *Cst = 0;
+  if (Src->hasOneUse() &&
+      match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
+    // We have three types to worry about here, the type of A, the source of
+    // the truncate (MidSize), and the destination of the truncate. We know that
+    // ASize < MidSize   and MidSize > ResultSize, but don't know the relation
+    // between ASize and ResultSize.
+    unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+    
+    // If the shift amount is larger than the size of A, then the result is
+    // known to be zero because all the input bits got shifted out.
+    if (Cst->getZExtValue() >= ASize)
+      return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+    // Since we're doing an lshr and a zero extend, and know that the shift
+    // amount is smaller than ASize, it is always safe to do the shift in A's
+    // type, then zero extend or truncate to the result.
+    Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+    Shift->takeName(Src);
+    return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+  }
+  
+  // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+  // type isn't non-native.
+  if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
+      ShouldChangeType(Src->getType(), CI.getType()) &&
+      match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+    Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+    return BinaryOperator::CreateAnd(NewTrunc,
+                                     ConstantExpr::getTrunc(Cst, CI.getType()));
+  }
+
+  return 0;
+}
+
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                             bool DoXform) {
+  // If we are just checking for a icmp eq of a single bit and zext'ing it
+  // to an integer, then shift the bit to the appropriate place and then
+  // cast to integer to avoid the comparison.
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+    const APInt &Op1CV = Op1C->getValue();
+      
+    // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
+    // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+        (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+      if (!DoXform) return ICI;
+
+      Value *In = ICI->getOperand(0);
+      Value *Sh = ConstantInt::get(In->getType(),
+                                   In->getType()->getScalarSizeInBits()-1);
+      In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+
+      if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+        Constant *One = ConstantInt::get(In->getType(), 1);
+        In = Builder->CreateXor(In, One, In->getName()+".not");
+      }
+
+      return ReplaceInstUsesWith(CI, In);
+    }
+      
+      
+      
+    // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    // zext (X == 1) to i32 --> X        iff X has only the low bit set.
+    // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 0) to i32 --> X        iff X has only the low bit set.
+    // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+        // This only works for EQ and NE
+        ICI->isEquality()) {
+      // If Op1C some other power of two, convert:
+      uint32_t BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+        
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+        if (!DoXform) return ICI;
+
+        bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+        if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+          // (X&4) == 2 --> false
+          // (X&4) != 2 --> true
+          Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
+                                           isNE);
+          Res = ConstantExpr::getZExt(Res, CI.getType());
+          return ReplaceInstUsesWith(CI, Res);
+        }
+          
+        uint32_t ShiftAmt = KnownZeroMask.logBase2();
+        Value *In = ICI->getOperand(0);
+        if (ShiftAmt) {
+          // Perform a logical shr by shiftamt.
+          // Insert the shift to put the result in the low bit.
+          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+                                   In->getName()+".lobit");
+        }
+          
+        if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = Builder->CreateXor(In, One, "tmp");
+        }
+          
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+      }
+    }
+  }
+
+  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+  // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+  // may lead to additional simplifications.
+  if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+    if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+      uint32_t BitWidth = ITy->getBitWidth();
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+
+      APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+      APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+      if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+        APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+        APInt UnknownBit = ~KnownBits;
+        if (UnknownBit.countPopulation() == 1) {
+          if (!DoXform) return ICI;
+
+          Value *Result = Builder->CreateXor(LHS, RHS);
+
+          // Mask off any bits that are set and won't be shifted away.
+          if (KnownOneLHS.uge(UnknownBit))
+            Result = Builder->CreateAnd(Result,
+                                        ConstantInt::get(ITy, UnknownBit));
+
+          // Shift the bit we're testing down to the lsb.
+          Result = Builder->CreateLShr(
+               Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
+          if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+            Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+          Result->takeName(ICI);
+          return ReplaceInstUsesWith(CI, Result);
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// CanEvaluateZExtd - Determine if the specified value can be computed in the
+/// specified wider type and produce the same low bits.  If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out.  For example, to promote something like:
+///
+///   %B = trunc i64 %A to i32
+///   %C = lshr i32 %B, 8
+///   %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63.  Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+  BitsToClear = 0;
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If the input is a truncate from the destination type, we can trivially
+  // eliminate it, even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+    return true;
+  
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+  
+  unsigned Opc = I->getOpcode(), Tmp;
+  switch (Opc) {
+  case Instruction::ZExt:  // zext(zext(x)) -> zext(x).
+  case Instruction::SExt:  // zext(sext(x)) -> sext(x).
+  case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
+    return true;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::Shl:
+    if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+        !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+      return false;
+    // These can all be promoted if neither operand has 'bits to clear'.
+    if (BitsToClear == 0 && Tmp == 0)
+      return true;
+      
+    // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+    // other side, BitsToClear is ok.
+    if (Tmp == 0 &&
+        (Opc == Instruction::And || Opc == Instruction::Or ||
+         Opc == Instruction::Xor)) {
+      // We use MaskedValueIsZero here for generality, but the case we care
+      // about the most is constant RHS.
+      unsigned VSize = V->getType()->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(1),
+                            APInt::getHighBitsSet(VSize, BitsToClear)))
+        return true;
+    }
+      
+    // Otherwise, we don't know how to analyze this BitsToClear case yet.
+    return false;
+      
+  case Instruction::LShr:
+    // We can promote lshr(x, cst) if we can promote x.  This requires the
+    // ultimate 'and' to clear out the high zero bits we're clearing out though.
+    if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+        return false;
+      BitsToClear += Amt->getZExtValue();
+      if (BitsToClear > V->getType()->getScalarSizeInBits())
+        BitsToClear = V->getType()->getScalarSizeInBits();
+      return true;
+    }
+    // Cannot promote variable LSHR.
+    return false;
+  case Instruction::Select:
+    if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+        !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+        // TODO: If important, we could handle the case when the BitsToClear are
+        // known zero in the disagreeing side.
+        Tmp != BitsToClear)
+      return false;
+    return true;
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+      return false;
+    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+          // TODO: If important, we could handle the case when the BitsToClear
+          // are known zero in the disagreeing input.
+          Tmp != BitsToClear)
+        return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    return false;
+  }
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+  // If this zero extend is only used by a truncate, let the truncate by
+  // eliminated before we try to optimize this zext.
+  if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+    return 0;
+  
+  // If one of the common conversion will work, do it.
+  if (Instruction *Result = commonCastTransforms(CI))
+    return Result;
+
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+  
+  // Attempt to extend the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  unsigned BitsToClear;
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateZExtd(Src, DestTy, BitsToClear)) { 
+    assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+           "Unreasonable BitsToClear");
+    
+    // Okay, we can transform this!  Insert the new expression now.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid zero extend: " << CI);
+    Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+    assert(Res->getType() == DestTy);
+    
+    uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+    
+    // If the high bits are already filled with zeros, just replace this
+    // cast with the result.
+    if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
+                                                     DestBitSize-SrcBitsKept)))
+      return ReplaceInstUsesWith(CI, Res);
+    
+    // We need to emit an AND to clear the high bits.
+    Constant *C = ConstantInt::get(Res->getType(),
+                               APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
+    return BinaryOperator::CreateAnd(Res, C);
+  }
+
+  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+  // types and if the sizes are just right we can convert this into a logical
+  // 'and' which will be much cheaper than the pair of casts.
+  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) {   // A->B->C cast
+    // TODO: Subsume this into EvaluateInDifferentType.
+    
+    // Get the sizes of the types involved.  We know that the intermediate type
+    // will be smaller than A or C, but don't know the relation between A and C.
+    Value *A = CSrc->getOperand(0);
+    unsigned SrcSize = A->getType()->getScalarSizeInBits();
+    unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+    unsigned DstSize = CI.getType()->getScalarSizeInBits();
+    // If we're actually extending zero bits, then if
+    // SrcSize <  DstSize: zext(a & mask)
+    // SrcSize == DstSize: a & mask
+    // SrcSize  > DstSize: trunc(a) & mask
+    if (SrcSize < DstSize) {
+      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+      return new ZExtInst(And, CI.getType());
+    }
+    
+    if (SrcSize == DstSize) {
+      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+                                                           AndValue));
+    }
+    if (SrcSize > DstSize) {
+      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+      APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+      return BinaryOperator::CreateAnd(Trunc, 
+                                       ConstantInt::get(Trunc->getType(),
+                                                        AndValue));
+    }
+  }
+
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformZExtICmp(ICI, CI);
+
+  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+    // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+    // of the (zext icmp) will be transformed.
+    ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+    ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+    if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        (transformZExtICmp(LHS, CI, false) ||
+         transformZExtICmp(RHS, CI, false))) {
+      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+      return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+    }
+  }
+
+  // zext(trunc(t) & C) -> (t & zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+        Value *TI0 = TI->getOperand(0);
+        if (TI0->getType() == CI.getType())
+          return
+            BinaryOperator::CreateAnd(TI0,
+                                ConstantExpr::getZExt(C, CI.getType()));
+      }
+
+  // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+        if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+            And->getOperand(1) == C)
+          if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+            Value *TI0 = TI->getOperand(0);
+            if (TI0->getType() == CI.getType()) {
+              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+              return BinaryOperator::CreateXor(NewAnd, ZC);
+            }
+          }
+
+  // zext (xor i1 X, true) to i32  --> xor (zext i1 X to i32), 1
+  Value *X;
+  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
+      match(SrcI, m_Not(m_Value(X))) &&
+      (!X->hasOneUse() || !isa<CmpInst>(X))) {
+    Value *New = Builder->CreateZExt(X, CI.getType());
+    return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+  }
+  
+  return 0;
+}
+
+/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+  Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    // (x <s  0) ? -1 : 0 -> ashr x, 31        -> all ones if negative
+    // (x >s -1) ? -1 : 0 -> not (ashr x, 31)  -> all ones if positive
+    if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+        (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
+
+      Value *Sh = ConstantInt::get(Op0->getType(),
+                                   Op0->getType()->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+
+      if (Pred == ICmpInst::ICMP_SGT)
+        In = Builder->CreateNot(In, In->getName()+".not");
+      return ReplaceInstUsesWith(CI, In);
+    }
+
+    // If we know that only one bit of the LHS of the icmp can be set and we
+    // have an equality comparison with zero or a power of 2, we can transform
+    // the icmp and sext into bitwise/integer operations.
+    if (ICI->hasOneUse() &&
+        ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
+      unsigned BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
+
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) {
+        Value *In = ICI->getOperand(0);
+
+        // If the icmp tests for a known zero bit we can constant fold it.
+        if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
+          Value *V = Pred == ICmpInst::ICMP_NE ?
+                       ConstantInt::getAllOnesValue(CI.getType()) :
+                       ConstantInt::getNullValue(CI.getType());
+          return ReplaceInstUsesWith(CI, V);
+        }
+
+        if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
+          // sext ((x & 2^n) == 0)   -> (x >> n) - 1
+          // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
+          unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+          // Perform a right shift to place the desired bit in the LSB.
+          if (ShiftAmt)
+            In = Builder->CreateLShr(In,
+                                     ConstantInt::get(In->getType(), ShiftAmt));
+
+          // At this point "In" is either 1 or 0. Subtract 1 to turn
+          // {1, 0} -> {0, -1}.
+          In = Builder->CreateAdd(In,
+                                  ConstantInt::getAllOnesValue(In->getType()),
+                                  "sext");
+        } else {
+          // sext ((x & 2^n) != 0)   -> (x << bitwidth-n) a>> bitwidth-1
+          // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
+          unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+          // Perform a left shift to place the desired bit in the MSB.
+          if (ShiftAmt)
+            In = Builder->CreateShl(In,
+                                    ConstantInt::get(In->getType(), ShiftAmt));
+
+          // Distribute the bit over the whole bit width.
+          In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
+                                                        BitWidth - 1), "sext");
+        }
+
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
+      }
+    }
+  }
+
+  // vector (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed.
+  if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
+    if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
+        Op0->getType() == CI.getType()) {
+      const Type *EltTy = VTy->getElementType();
+
+      // splat the shift constant to a constant vector.
+      Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
+      return ReplaceInstUsesWith(CI, In);
+    }
+  }
+
+  return 0;
+}
+
+/// CanEvaluateSExtd - Return true if we can take the specified value
+/// and return it as type Ty without inserting any new casts and without
+/// changing the value of the common low bits.  This is used by code that tries
+/// to promote integer operations to a wider types will allow us to eliminate
+/// the extension.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
+  assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
+         "Can't sign extend type to a smaller type");
+  // If this is a constant, it can be trivially promoted.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If this is a truncate from the dest type, we can trivially eliminate it,
+  // even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+    return true;
+  
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
+  switch (I->getOpcode()) {
+  case Instruction::SExt:  // sext(sext(x)) -> sext(x)
+  case Instruction::ZExt:  // sext(zext(x)) -> zext(x)
+  case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
+    return true;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // These operators can all arbitrarily be extended if their inputs can.
+    return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+           CanEvaluateSExtd(I->getOperand(1), Ty);
+      
+  //case Instruction::Shl:   TODO
+  //case Instruction::LShr:  TODO
+      
+  case Instruction::Select:
+    return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+           CanEvaluateSExtd(I->getOperand(2), Ty);
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    break;
+  }
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+  // If this sign extend is only used by a truncate, let the truncate by
+  // eliminated before we try to optimize this zext.
+  if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+    return 0;
+  
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+  // Attempt to extend the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateSExtd(Src, DestTy)) {
+    // Okay, we can transform this!  Insert the new expression now.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid sign extend: " << CI);
+    Value *Res = EvaluateInDifferentType(Src, DestTy, true);
+    assert(Res->getType() == DestTy);
+
+    uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+    // If the high bits are already filled with sign bit, just replace this
+    // cast with the result.
+    if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+      return ReplaceInstUsesWith(CI, Res);
+    
+    // We need to emit a shl + ashr to do the sign extend.
+    Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+    return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+                                      ShAmt);
+  }
+
+  // If this input is a trunc from our destination, then turn sext(trunc(x))
+  // into shifts.
+  if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+    if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+      uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+      uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+      
+      // We need to emit a shl + ashr to do the sign extend.
+      Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+      Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+      return BinaryOperator::CreateAShr(Res, ShAmt);
+    }
+
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformSExtICmp(ICI, CI);
+
+  // If the input is a shl/ashr pair of a same constant, then this is a sign
+  // extension from a smaller value.  If we could trust arbitrary bitwidth
+  // integers, we could turn this into a truncate to the smaller bit and then
+  // use a sext for the whole extension.  Since we don't, look deeper and check
+  // for a truncate.  If the source and dest are the same type, eliminate the
+  // trunc and extend and just do shifts.  For example, turn:
+  //   %a = trunc i32 %i to i8
+  //   %b = shl i8 %a, 6
+  //   %c = ashr i8 %b, 6
+  //   %d = sext i8 %c to i32
+  // into:
+  //   %a = shl i32 %i, 30
+  //   %d = ashr i32 %a, 30
+  Value *A = 0;
+  // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
+  ConstantInt *BA = 0, *CA = 0;
+  if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
+                        m_ConstantInt(CA))) &&
+      BA == CA && A->getType() == CI.getType()) {
+    unsigned MidSize = Src->getType()->getScalarSizeInBits();
+    unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
+    unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+    Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+    A = Builder->CreateShl(A, ShAmtV, CI.getName());
+    return BinaryOperator::CreateAShr(A, ShAmtV);
+  }
+  
+  return 0;
+}
+
+
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+  bool losesInfo;
+  APFloat F = CFP->getValueAPF();
+  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+  if (!losesInfo)
+    return ConstantFP::get(CFP->getContext(), F);
+  return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getOpcode() == Instruction::FPExt)
+      return LookThroughFPExtensions(I->getOperand(0));
+  
+  // If this value is a constant, return the constant in the smallest FP type
+  // that can accurately represent it.  This allows us to turn
+  // (float)((double)X+2.0) into x+2.0f.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
+      return V;  // No constant folding of this.
+    // See if the value can be truncated to float and then reextended.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+      return V;
+    if (CFP->getType()->isDoubleTy())
+      return V;  // Won't shrink.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+      return V;
+    // Don't try to shrink to various long double types.
+  }
+  
+  return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
+  // smaller than the destination type, we can eliminate the truncate by doing
+  // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well
+  // as many builtins (sqrt, etc).
+  BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+  if (OpI && OpI->hasOneUse()) {
+    switch (OpI->getOpcode()) {
+    default: break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      const Type *SrcTy = OpI->getType();
+      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+      if (LHSTrunc->getType() != SrcTy && 
+          RHSTrunc->getType() != SrcTy) {
+        unsigned DstSize = CI.getType()->getScalarSizeInBits();
+        // If the source types were both smaller than the destination type of
+        // the cast, do this xform.
+        if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+            RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
+          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
+          return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+        }
+      }
+      break;  
+    }
+  }
+  
+  // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+  // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+  CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+  if (Call && Call->getCalledFunction() &&
+      Call->getCalledFunction()->getName() == "sqrt" &&
+      Call->getNumArgOperands() == 1 &&
+      Call->hasOneUse()) {
+    CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+    if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+        CI.getType()->isFloatTy() &&
+        Call->getType()->isDoubleTy() &&
+        Arg->getType()->isDoubleTy() &&
+        Arg->getOperand(0)->getType()->isFloatTy()) {
+      Function *Callee = Call->getCalledFunction();
+      Module *M = CI.getParent()->getParent()->getParent();
+      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", 
+                                                   Callee->getAttributes(),
+                                                   Builder->getFloatTy(),
+                                                   Builder->getFloatTy(),
+                                                   NULL);
+      CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+                                       "sqrtfcall");
+      ret->setAttributes(Callee->getAttributes());
+      
+      
+      // Remove the old Call.  With -fmath-errno, it won't get marked readnone.
+      ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
+      EraseInstFromFunction(*Call);
+      return ret;
+    }
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+  if (OpI == 0)
+    return commonCastTransforms(FI);
+
+  // fptoui(uitofp(X)) --> X
+  // fptoui(sitofp(X)) --> X
+  // This is safe if the intermediate type has enough bits in its mantissa to
+  // accurately represent all values of X.  For example, do not do this with
+  // i64->float->i64.  This is also safe for sitofp case, because any negative
+  // 'X' value would cause an undefined result for the fptoui. 
+  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+      OpI->getOperand(0)->getType() == FI.getType() &&
+      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
+                    OpI->getType()->getFPMantissaWidth())
+    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+  return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+  if (OpI == 0)
+    return commonCastTransforms(FI);
+  
+  // fptosi(sitofp(X)) --> X
+  // fptosi(uitofp(X)) --> X
+  // This is safe if the intermediate type has enough bits in its mantissa to
+  // accurately represent all values of X.  For example, do not do this with
+  // i64->float->i64.  This is also safe for sitofp case, because any negative
+  // 'X' value would cause an undefined result for the fptoui. 
+  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+      OpI->getOperand(0)->getType() == FI.getType() &&
+      (int)FI.getType()->getScalarSizeInBits() <=
+                    OpI->getType()->getFPMantissaWidth())
+    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+  
+  return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+  // If the source integer type is not the intptr_t type for this target, do a
+  // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
+  // cast to be exposed to other transforms.
+  if (TD) {
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateTrunc(CI.getOperand(0),
+                                      TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateZExt(CI.getOperand(0),
+                                     TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
+  }
+  
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+
+  return 0;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+  
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+    // If casting the result of a getelementptr instruction with no offset, turn
+    // this into a cast of the original pointer!
+    if (GEP->hasAllZeroIndices()) {
+      // Changing the cast operand is usually not a good idea but it is safe
+      // here because the pointer operand is being replaced with another 
+      // pointer operand so the opcode doesn't need to change.
+      Worklist.Add(GEP);
+      CI.setOperand(0, GEP->getOperand(0));
+      return &CI;
+    }
+    
+    // If the GEP has a single use, and the base pointer is a bitcast, and the
+    // GEP computes a constant offset, see if we can convert these three
+    // instructions into fewer.  This typically happens with unions and other
+    // non-type-safe code.
+    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+        GEP->hasAllConstantIndices()) {
+      // We are guaranteed to get a constant from EmitGEPOffset.
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP));
+      int64_t Offset = OffsetV->getSExtValue();
+      
+      // Get the base pointer input of the bitcast, and the type it points to.
+      Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+      const Type *GEPIdxTy =
+      cast<PointerType>(OrigBase->getType())->getElementType();
+      SmallVector<Value*, 8> NewIndices;
+      if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+        // If we were able to index down into an element, create the GEP
+        // and bitcast the result.  This eliminates one bitcast, potentially
+        // two.
+        Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+        Builder->CreateInBoundsGEP(OrigBase,
+                                   NewIndices.begin(), NewIndices.end()) :
+        Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+        NGEP->takeName(GEP);
+        
+        if (isa<BitCastInst>(CI))
+          return new BitCastInst(NGEP, CI.getType());
+        assert(isa<PtrToIntInst>(CI));
+        return new PtrToIntInst(NGEP, CI.getType());
+      }      
+    }
+  }
+  
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+  // If the destination integer type is not the intptr_t type for this target,
+  // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
+  // to be exposed to other transforms.
+  if (TD) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new TruncInst(P, CI.getType());
+    }
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new ZExtInst(P, CI.getType());
+    }
+  }
+  
+  return commonPointerCastTransforms(CI);
+}
+
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type.  Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+                                         InstCombiner &IC) {
+  // We can only do this optimization if the output is a multiple of the input
+  // element size, or the input is a multiple of the output element size.
+  // Convert the input type to have the same element type as the output.
+  const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+  
+  if (SrcTy->getElementType() != DestTy->getElementType()) {
+    // The input types don't need to be identical, but for now they must be the
+    // same size.  There is no specific reason we couldn't handle things like
+    // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+    // there yet. 
+    if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+        DestTy->getElementType()->getPrimitiveSizeInBits())
+      return 0;
+    
+    SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+    InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+  }
+  
+  // Now that the element types match, get the shuffle mask and RHS of the
+  // shuffle to use, which depends on whether we're increasing or decreasing the
+  // size of the input.
+  SmallVector<Constant*, 16> ShuffleMask;
+  Value *V2;
+  const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+  
+  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+    // If we're shrinking the number of elements, just shuffle in the low
+    // elements from the input and use undef as the second shuffle input.
+    V2 = UndefValue::get(SrcTy);
+    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+    
+  } else {
+    // If we're increasing the number of elements, shuffle in all of the
+    // elements from InVal and fill the rest of the result elements with zeros
+    // from a constant zero.
+    V2 = Constant::getNullValue(SrcTy);
+    unsigned SrcElts = SrcTy->getNumElements();
+    for (unsigned i = 0, e = SrcElts; i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+    // The excess elements reference the first element of the zero input.
+    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+                       ConstantInt::get(Int32Ty, SrcElts));
+  }
+  
+  return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+  return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+  return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy.  Look through the value to see if we can decompose it into
+/// insertions into the vector.  See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+                                     SmallVectorImpl<Value*> &Elements,
+                                     const Type *VecEltTy) {
+  // Undef values never contribute useful bits to the result.
+  if (isa<UndefValue>(V)) return true;
+  
+  // If we got down to a value of the right type, we win, try inserting into the
+  // right element.
+  if (V->getType() == VecEltTy) {
+    // Inserting null doesn't actually insert any elements.
+    if (Constant *C = dyn_cast<Constant>(V))
+      if (C->isNullValue())
+        return true;
+    
+    // Fail if multiple elements are inserted into this slot.
+    if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+      return false;
+    
+    Elements[ElementIndex] = V;
+    return true;
+  }
+  
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    // Figure out the # elements this provides, and bitcast it or slice it up
+    // as required.
+    unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+                                        VecEltTy);
+    // If the constant is the size of a vector element, we just need to bitcast
+    // it to the right type so it gets properly inserted.
+    if (NumElts == 1)
+      return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+                                      ElementIndex, Elements, VecEltTy);
+    
+    // Okay, this is a constant that covers multiple elements.  Slice it up into
+    // pieces and insert each element-sized piece into the vector.
+    if (!isa<IntegerType>(C->getType()))
+      C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+                                       C->getType()->getPrimitiveSizeInBits()));
+    unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+    const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+    
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+                                                               i*ElementSize));
+      Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+      if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+        return false;
+    }
+    return true;
+  }
+  
+  if (!V->hasOneUse()) return false;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return false;
+  switch (I->getOpcode()) {
+  default: return false; // Unhandled case.
+  case Instruction::BitCast:
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy);  
+  case Instruction::ZExt:
+    if (!isMultipleOfTypeSize(
+                          I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+                              VecEltTy))
+      return false;
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy);  
+  case Instruction::Or:
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy) &&
+           CollectInsertionElements(I->getOperand(1), ElementIndex,
+                                    Elements, VecEltTy);
+  case Instruction::Shl: {
+    // Must be shifting by a constant that is a multiple of the element size.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+    if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+    unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+    
+    return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+                                    Elements, VecEltTy);
+  }
+      
+  }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements.  This is to
+/// optimize code like this:
+///
+///    %tmp37 = bitcast float %inc to i32
+///    %tmp38 = zext i32 %tmp37 to i64
+///    %tmp31 = bitcast float %inc5 to i32
+///    %tmp32 = zext i32 %tmp31 to i64
+///    %tmp33 = shl i64 %tmp32, 32
+///    %ins35 = or i64 %tmp33, %tmp38
+///    %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+                                                InstCombiner &IC) {
+  const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+  Value *IntInput = CI.getOperand(0);
+
+  SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+  if (!CollectInsertionElements(IntInput, 0, Elements,
+                                DestVecTy->getElementType()))
+    return 0;
+
+  // If we succeeded, we know that all of the element are specified by Elements
+  // or are zero if Elements has a null entry.  Recast this as a set of
+  // insertions.
+  Value *Result = Constant::getNullValue(CI.getType());
+  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+    if (Elements[i] == 0) continue;  // Unset element.
+    
+    Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+                                             IC.Builder->getInt32(i));
+  }
+  
+  return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast.  The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+  Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType();
+
+  // If this is a bitcast from int to float, check to see if the int is an
+  // extraction from a vector.
+  Value *VecInput = 0;
+  // bitcast(trunc(bitcast(somevector)))
+  if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+    
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+    }
+  }
+  
+  // bitcast(trunc(lshr(bitcast(somevector), cst))
+  ConstantInt *ShAmt = 0;
+  if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+                                m_ConstantInt(ShAmt)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+        ShAmt->getZExtValue() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+      
+      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+  // If the operands are integer typed then apply the integer transforms,
+  // otherwise just apply the common ones.
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType();
+  const Type *DestTy = CI.getType();
+
+  // Get rid of casts from one type to the same type. These are useless and can
+  // be replaced by the operand.
+  if (DestTy == Src->getType())
+    return ReplaceInstUsesWith(CI, Src);
+
+  if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+    const Type *DstElTy = DstPTy->getElementType();
+    const Type *SrcElTy = SrcPTy->getElementType();
+    
+    // If the address spaces don't match, don't eliminate the bitcast, which is
+    // required for changing types.
+    if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+      return 0;
+    
+    // If we are casting a alloca to a pointer to a type of the same
+    // size, rewrite the allocation instruction to allocate the "right" type.
+    // There is no need to modify malloc calls because it is their bitcast that
+    // needs to be cleaned up.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
+      if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+        return V;
+    
+    // If the source and destination are pointers, and this cast is equivalent
+    // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
+    // This can enhance SROA and other transforms that want type-safe pointers.
+    Constant *ZeroUInt =
+      Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
+    unsigned NumZeros = 0;
+    while (SrcElTy != DstElTy && 
+           isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
+           SrcElTy->getNumContainedTypes() /* not "{}" */) {
+      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+      ++NumZeros;
+    }
+
+    // If we found a path from the src to dest, create the getelementptr now.
+    if (SrcElTy == DstElTy) {
+      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end());
+    }
+  }
+  
+  // Try to optimize int -> float bitcasts.
+  if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+    if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+      return I;
+
+  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+    if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
+      Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+      return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+                     Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+    }
+    
+    if (isa<IntegerType>(SrcTy)) {
+      // If this is a cast from an integer to vector, check to see if the input
+      // is a trunc or zext of a bitcast from vector.  If so, we can replace all
+      // the casts with a shuffle and (potentially) a bitcast.
+      if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+        CastInst *SrcCast = cast<CastInst>(Src);
+        if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+          if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+            if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+                                               cast<VectorType>(DestTy), *this))
+              return I;
+      }
+      
+      // If the input is an 'or' instruction, we may be doing shifts and ors to
+      // assemble the elements of the vector manually.  Try to rip the code out
+      // and replace it with insertelements.
+      if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+        return ReplaceInstUsesWith(CI, V);
+    }
+  }
+
+  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+    if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
+      Value *Elem = 
+        Builder->CreateExtractElement(Src,
+                   Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+      return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+    }
+  }
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+    // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
+    // a bitcast to a vector with the same # elts.
+    if (SVI->hasOneUse() && DestTy->isVectorTy() && 
+        cast<VectorType>(DestTy)->getNumElements() ==
+              SVI->getType()->getNumElements() &&
+        SVI->getType()->getNumElements() ==
+          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+      BitCastInst *Tmp;
+      // If either of the operands is a cast from CI.getType(), then
+      // evaluating the shuffle in the casted destination's type will allow
+      // us to eliminate at least one cast.
+      if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && 
+           Tmp->getOperand(0)->getType() == DestTy) ||
+          ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && 
+           Tmp->getOperand(0)->getType() == DestTy)) {
+        Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+        Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+        // Return a new shuffle vector.  Use the same element ID's, as we
+        // know the vector types match #elts.
+        return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+      }
+    }
+  }
+  
+  if (SrcTy->isPointerTy())
+    return commonPointerCastTransforms(CI);
+  return commonCastTransforms(CI);
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
new file mode 100644
index 000000000000..c78760b20692
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -0,0 +1,2919 @@
+//===- InstCombineCompares.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitICmp and visitFCmp functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+static ConstantInt *getOne(Constant *C) {
+  return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
+/// AddOne - Add one to a ConstantInt
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt
+static Constant *SubOne(Constant *C) {
+  return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+}
+
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+
+static bool HasAddOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
+  if (!IsSigned)
+    return Result->getValue().ult(In1->getValue());
+
+  if (In2->isNegative())
+    return Result->getValue().sgt(In1->getValue());
+  return Result->getValue().slt(In1->getValue());
+}
+
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
+/// overflowed for this type.
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getAdd(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+      if (HasAddOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasAddOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
+static bool HasSubOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
+  if (!IsSigned)
+    return Result->getValue().ugt(In1->getValue());
+  
+  if (In2->isNegative())
+    return Result->getValue().slt(In1->getValue());
+
+  return Result->getValue().sgt(In1->getValue());
+}
+
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getSub(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+      if (HasSubOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasSubOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit.  If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+                           bool &TrueIfSigned) {
+  switch (pred) {
+  case ICmpInst::ICMP_SLT:   // True if LHS s< 0
+    TrueIfSigned = true;
+    return RHS->isZero();
+  case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
+    TrueIfSigned = true;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_SGT:   // True if LHS s> -1
+    TrueIfSigned = false;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_UGT:
+    // True if LHS u> RHS and RHS == high-bit-mask - 1
+    TrueIfSigned = true;
+    return RHS->isMaxValue(true);
+  case ICmpInst::ICMP_UGE: 
+    // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+    TrueIfSigned = true;
+    return RHS->getValue().isSignBit();
+  default:
+    return false;
+  }
+}
+
+// isHighOnes - Return true if the constant is of the form 1+0+.
+// This is the same as lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+  return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a 
+/// set of known zero and one bits, compute the maximum and minimum values that
+/// could have the specified known zero and known one bits, returning them in
+/// min/max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
+                                                   const APInt& KnownOne,
+                                                   APInt& Min, APInt& Max) {
+  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+         KnownZero.getBitWidth() == Min.getBitWidth() &&
+         KnownZero.getBitWidth() == Max.getBitWidth() &&
+         "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+  APInt UnknownBits = ~(KnownZero|KnownOne);
+
+  // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+  // bit if it is unknown.
+  Min = KnownOne;
+  Max = KnownOne|UnknownBits;
+  
+  if (UnknownBits.isNegative()) { // Sign bit is unknown
+    Min.setBit(Min.getBitWidth()-1);
+    Max.clearBit(Max.getBitWidth()-1);
+  }
+}
+
+// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
+// a set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+                                                     const APInt &KnownOne,
+                                                     APInt &Min, APInt &Max) {
+  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+         KnownZero.getBitWidth() == Min.getBitWidth() &&
+         KnownZero.getBitWidth() == Max.getBitWidth() &&
+         "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+  APInt UnknownBits = ~(KnownZero|KnownOne);
+  
+  // The minimum value is when the unknown bits are all zeros.
+  Min = KnownOne;
+  // The maximum value is when the unknown bits are all ones.
+  Max = KnownOne|UnknownBits;
+}
+
+
+
+/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
+///   cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer.  Try to simplify
+/// this into some simple computation that does not need the load.  For example
+/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
+///
+/// If AndCst is non-null, then the loaded value is masked with that constant
+/// before doing the comparison.  This handles cases like "A[i]&4 == 0".
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+                             CmpInst &ICI, ConstantInt *AndCst) {
+  // We need TD information to know the pointer size unless this is inbounds.
+  if (!GEP->isInBounds() && TD == 0) return 0;
+  
+  ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+  
+  // There are many forms of this optimization we can handle, for now, just do
+  // the simple index into a single-dimensional array.
+  //
+  // Require: GEP GV, 0, i {{, constant indices}}
+  if (GEP->getNumOperands() < 3 ||
+      !isa<ConstantInt>(GEP->getOperand(1)) ||
+      !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+      isa<Constant>(GEP->getOperand(2)))
+    return 0;
+
+  // Check that indices after the variable are constants and in-range for the
+  // type they index.  Collect the indices.  This is typically for arrays of
+  // structs.
+  SmallVector<unsigned, 4> LaterIndices;
+  
+  const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+    ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (Idx == 0) return 0;  // Variable index.
+    
+    uint64_t IdxVal = Idx->getZExtValue();
+    if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+    
+    if (const StructType *STy = dyn_cast<StructType>(EltTy))
+      EltTy = STy->getElementType(IdxVal);
+    else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+      if (IdxVal >= ATy->getNumElements()) return 0;
+      EltTy = ATy->getElementType();
+    } else {
+      return 0; // Unknown type.
+    }
+    
+    LaterIndices.push_back(IdxVal);
+  }
+  
+  enum { Overdefined = -3, Undefined = -2 };
+
+  // Variables for our state machines.
+  
+  // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
+  // "i == 47 | i == 87", where 47 is the first index the condition is true for,
+  // and 87 is the second (and last) index.  FirstTrueElement is -2 when
+  // undefined, otherwise set to the first true element.  SecondTrueElement is
+  // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
+  int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
+
+  // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
+  // form "i != 47 & i != 87".  Same state transitions as for true elements.
+  int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
+  
+  /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+  /// define a state machine that triggers for ranges of values that the index
+  /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
+  /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+  /// index in the range (inclusive).  We use -2 for undefined here because we
+  /// use relative comparisons and don't want 0-1 to match -1.
+  int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+  
+  // MagicBitvector - This is a magic bitvector where we set a bit if the
+  // comparison is true for element 'i'.  If there are 64 elements or less in
+  // the array, this will fully represent all the comparison results.
+  uint64_t MagicBitvector = 0;
+  
+  
+  // Scan the array and see if one of our patterns matches.
+  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+  for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+    Constant *Elt = Init->getOperand(i);
+    
+    // If this is indexing an array of structures, get the structure element.
+    if (!LaterIndices.empty())
+      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
+    
+    // If the element is masked, handle it.
+    if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+    
+    // Find out if the comparison would be true or false for the i'th element.
+    Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
+                                                  CompareRHS, TD);
+    // If the result is undef for this element, ignore it.
+    if (isa<UndefValue>(C)) {
+      // Extend range state machines to cover this element in case there is an
+      // undef in the middle of the range.
+      if (TrueRangeEnd == (int)i-1)
+        TrueRangeEnd = i;
+      if (FalseRangeEnd == (int)i-1)
+        FalseRangeEnd = i;
+      continue;
+    }
+    
+    // If we can't compute the result for any of the elements, we have to give
+    // up evaluating the entire conditional.
+    if (!isa<ConstantInt>(C)) return 0;
+    
+    // Otherwise, we know if the comparison is true or false for this element,
+    // update our state machines.
+    bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+    
+    // State machine for single/double/range index comparison.
+    if (IsTrueForElt) {
+      // Update the TrueElement state machine.
+      if (FirstTrueElement == Undefined)
+        FirstTrueElement = TrueRangeEnd = i;  // First true element.
+      else {
+        // Update double-compare state machine.
+        if (SecondTrueElement == Undefined)
+          SecondTrueElement = i;
+        else
+          SecondTrueElement = Overdefined;
+        
+        // Update range state machine.
+        if (TrueRangeEnd == (int)i-1)
+          TrueRangeEnd = i;
+        else
+          TrueRangeEnd = Overdefined;
+      }
+    } else {
+      // Update the FalseElement state machine.
+      if (FirstFalseElement == Undefined)
+        FirstFalseElement = FalseRangeEnd = i; // First false element.
+      else {
+        // Update double-compare state machine.
+        if (SecondFalseElement == Undefined)
+          SecondFalseElement = i;
+        else
+          SecondFalseElement = Overdefined;
+        
+        // Update range state machine.
+        if (FalseRangeEnd == (int)i-1)
+          FalseRangeEnd = i;
+        else
+          FalseRangeEnd = Overdefined;
+      }
+    }
+    
+    
+    // If this element is in range, update our magic bitvector.
+    if (i < 64 && IsTrueForElt)
+      MagicBitvector |= 1ULL << i;
+    
+    // If all of our states become overdefined, bail out early.  Since the
+    // predicate is expensive, only check it every 8 elements.  This is only
+    // really useful for really huge arrays.
+    if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+        SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+        FalseRangeEnd == Overdefined)
+      return 0;
+  }
+
+  // Now that we've scanned the entire array, emit our new comparison(s).  We
+  // order the state machines in complexity of the generated code.
+  Value *Idx = GEP->getOperand(2);
+
+  // If the index is larger than the pointer size of the target, truncate the
+  // index down like the GEP would do implicitly.  We don't have to do this for
+  // an inbounds GEP because the index can't be out of range.
+  if (!GEP->isInBounds() &&
+      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+    Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+  
+  // If the comparison is only true for one or two elements, emit direct
+  // comparisons.
+  if (SecondTrueElement != Overdefined) {
+    // None true -> false.
+    if (FirstTrueElement == Undefined)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+    
+    Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
+    
+    // True for one element -> 'i == 47'.
+    if (SecondTrueElement == Undefined)
+      return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+    
+    // True for two elements -> 'i == 47 | i == 72'.
+    Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+    Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
+    Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+    return BinaryOperator::CreateOr(C1, C2);
+  }
+
+  // If the comparison is only false for one or two elements, emit direct
+  // comparisons.
+  if (SecondFalseElement != Overdefined) {
+    // None false -> true.
+    if (FirstFalseElement == Undefined)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+    
+    Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
+
+    // False for one element -> 'i != 47'.
+    if (SecondFalseElement == Undefined)
+      return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+     
+    // False for two elements -> 'i != 47 & i != 72'.
+    Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+    Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+    Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+    return BinaryOperator::CreateAnd(C1, C2);
+  }
+  
+  // If the comparison can be replaced with a range comparison for the elements
+  // where it is true, emit the range check.
+  if (TrueRangeEnd != Overdefined) {
+    assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+    
+    // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+    if (FirstTrueElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  TrueRangeEnd-FirstTrueElement+1);
+    return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+  }
+  
+  // False range check.
+  if (FalseRangeEnd != Overdefined) {
+    assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+    // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+    if (FirstFalseElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  FalseRangeEnd-FirstFalseElement);
+    return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+  }
+  
+  
+  // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+  // of this load, replace it with computation that does:
+  //   ((magic_cst >> i) & 1) != 0
+  if (Init->getNumOperands() <= 32 ||
+      (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
+    const Type *Ty;
+    if (Init->getNumOperands() <= 32)
+      Ty = Type::getInt32Ty(Init->getContext());
+    else
+      Ty = Type::getInt64Ty(Init->getContext());
+    Value *V = Builder->CreateIntCast(Idx, Ty, false);
+    V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+    V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+    return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+  }
+  
+  return 0;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
+/// be complex, and scales are involved.  The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+/// 
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
+  TargetData &TD = *IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  
+  // Check to see if this gep only has a single variable index.  If so, and if
+  // any constant indices are a multiple of its scale, then we can compute this
+  // in terms of the scale of the variable index.  For example, if the GEP
+  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+  // because the expression will cross zero at the same point.
+  unsigned i, e = GEP->getNumOperands();
+  int64_t Offset = 0;
+  for (i = 1; i != e; ++i, ++GTI) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      // Compute the aggregate offset of constant indices.
+      if (CI->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+      } else {
+        uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+        Offset += Size*CI->getSExtValue();
+      }
+    } else {
+      // Found our variable index.
+      break;
+    }
+  }
+  
+  // If there are no variable indices, we must have a constant offset, just
+  // evaluate it the general way.
+  if (i == e) return 0;
+  
+  Value *VariableIdx = GEP->getOperand(i);
+  // Determine the scale factor of the variable element.  For example, this is
+  // 4 if the variable index is into an array of i32.
+  uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+  
+  // Verify that there are no other variable indices.  If so, emit the hard way.
+  for (++i, ++GTI; i != e; ++i, ++GTI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!CI) return 0;
+    
+    // Compute the aggregate offset of constant indices.
+    if (CI->isZero()) continue;
+    
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += Size*CI->getSExtValue();
+    }
+  }
+  
+  // Okay, we know we have a single variable index, which must be a
+  // pointer/array/vector index.  If there is no offset, life is simple, return
+  // the index.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  if (Offset == 0) {
+    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
+    // we don't need to bother extending: the extension won't affect where the
+    // computation crosses zero.
+    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
+      const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+      VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
+    }
+    return VariableIdx;
+  }
+  
+  // Otherwise, there is an index.  The computation we will do will be modulo
+  // the pointer size, so get it.
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+  
+  Offset &= PtrSizeMask;
+  VariableScale &= PtrSizeMask;
+  
+  // To do this transformation, any constant index must be a multiple of the
+  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
+  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
+  // multiple of the variable scale.
+  int64_t NewOffs = Offset / (int64_t)VariableScale;
+  if (Offset != NewOffs*(int64_t)VariableScale)
+    return 0;
+  
+  // Okay, we can do this evaluation.  Start by converting the index to intptr.
+  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+  if (VariableIdx->getType() != IntPtrTy)
+    VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
+                                            true /*Signed*/);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
+}
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else.  At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+                                       ICmpInst::Predicate Cond,
+                                       Instruction &I) {
+  // Look through bitcasts.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+    RHS = BCI->getOperand(0);
+
+  Value *PtrBase = GEPLHS->getOperand(0);
+  if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
+    // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
+    // This transformation (ignoring the base and scales) is valid because we
+    // know pointers can't overflow since the gep is inbounds.  See if we can
+    // output an optimized form.
+    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+    
+    // If not, synthesize the offset the hard way.
+    if (Offset == 0)
+      Offset = EmitGEPOffset(GEPLHS);
+    return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+                        Constant::getNullValue(Offset->getType()));
+  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
+    // If the base pointers are different, but the indices are the same, just
+    // compare the base pointer.
+    if (PtrBase != GEPRHS->getOperand(0)) {
+      bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+      IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+                        GEPRHS->getOperand(0)->getType();
+      if (IndicesTheSame)
+        for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+          if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+            IndicesTheSame = false;
+            break;
+          }
+
+      // If all indices are the same, just compare the base pointers.
+      if (IndicesTheSame)
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+                            GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+      // Otherwise, the base pointers are different and the indices are
+      // different, bail out.
+      return 0;
+    }
+
+    // If one of the GEPs has all zero indices, recurse.
+    bool AllZeros = true;
+    for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+      if (!isa<Constant>(GEPLHS->getOperand(i)) ||
+          !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
+        AllZeros = false;
+        break;
+      }
+    if (AllZeros)
+      return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+                          ICmpInst::getSwappedPredicate(Cond), I);
+
+    // If the other GEP has all zero indices, recurse.
+    AllZeros = true;
+    for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+      if (!isa<Constant>(GEPRHS->getOperand(i)) ||
+          !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
+        AllZeros = false;
+        break;
+      }
+    if (AllZeros)
+      return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+    bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
+    if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+      // If the GEPs only differ by one index, compare it.
+      unsigned NumDifferences = 0;  // Keep track of # differences.
+      unsigned DiffOperand = 0;     // The operand that differs.
+      for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+        if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+          if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+                   GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+            // Irreconcilable differences.
+            NumDifferences = 2;
+            break;
+          } else {
+            if (NumDifferences++) break;
+            DiffOperand = i;
+          }
+        }
+
+      if (NumDifferences == 0)   // SAME GEP?
+        return ReplaceInstUsesWith(I, // No comparison is needed here.
+                               ConstantInt::get(Type::getInt1Ty(I.getContext()),
+                                             ICmpInst::isTrueWhenEqual(Cond)));
+
+      else if (NumDifferences == 1 && GEPsInBounds) {
+        Value *LHSV = GEPLHS->getOperand(DiffOperand);
+        Value *RHSV = GEPRHS->getOperand(DiffOperand);
+        // Make sure we do a signed comparison here.
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+      }
+    }
+
+    // Only lower this if the icmp is the only user of the GEP or if we expect
+    // the result to fold to a constant!
+    if (TD &&
+        GEPsInBounds &&
+        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+        (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+      // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
+      Value *L = EmitGEPOffset(GEPLHS);
+      Value *R = EmitGEPOffset(GEPRHS);
+      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+    }
+  }
+  return 0;
+}
+
+/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+                                            Value *X, ConstantInt *CI,
+                                            ICmpInst::Predicate Pred,
+                                            Value *TheAdd) {
+  // If we have X+0, exit early (simplifying logic below) and let it get folded
+  // elsewhere.   icmp X+0, X  -> icmp X, X
+  if (CI->isZero()) {
+    bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
+    return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+  }
+  
+  // (X+4) == X -> false.
+  if (Pred == ICmpInst::ICMP_EQ)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+  // (X+4) != X -> true.
+  if (Pred == ICmpInst::ICMP_NE)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+
+  // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+  // so the values can never be equal.  Similarly for all other "or equals"
+  // operators.
+  
+  // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
+  // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
+  // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
+  if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+    Value *R = 
+      ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+    return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+  }
+  
+  // (X+1) >u X        --> X <u (0-1)        --> X != 255
+  // (X+2) >u X        --> X <u (0-2)        --> X <u 254
+  // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
+  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+    return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+  
+  unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+  ConstantInt *SMax = ConstantInt::get(X->getContext(),
+                                       APInt::getSignedMaxValue(BitWidth));
+
+  // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
+  // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
+  // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
+  // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
+  // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
+  // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
+  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+    return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+  
+  // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
+  // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
+  // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+  // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+  // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
+  // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
+  
+  assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+  Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+  return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                          ConstantInt *DivRHS) {
+  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+  const APInt &CmpRHSV = CmpRHS->getValue();
+  
+  // FIXME: If the operand types don't match the type of the divide 
+  // then don't attempt this transform. The code below doesn't have the
+  // logic to deal with a signed divide and an unsigned compare (and
+  // vice versa). This is because (x /s C1) <s C2  produces different 
+  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+  // (x /u C1) <u C2.  Simply casting the operands and result won't 
+  // work. :(  The if statement below tests that condition and bails 
+  // if it finds it.
+  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+    return 0;
+  if (DivRHS->isZero())
+    return 0; // The ProdOV computation fails on divide by zero.
+  if (DivIsSigned && DivRHS->isAllOnesValue())
+    return 0; // The overflow computation also screws up here
+  if (DivRHS->isOne()) {
+    // This eliminates some funny cases with INT_MIN.
+    ICI.setOperand(0, DivI->getOperand(0));   // X/1 == X.
+    return &ICI;
+  }
+
+  // Compute Prod = CI * DivRHS. We are essentially solving an equation
+  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
+  // C2 (CI). By solving for X we can turn this into a range check 
+  // instead of computing a divide. 
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+  // Determine if the product overflows by seeing if the product is
+  // not equal to the divide. Make sure we do the same kind of divide
+  // as in the LHS instruction that we're folding. 
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+  // Get the ICmp opcode
+  ICmpInst::Predicate Pred = ICI.getPredicate();
+
+  /// If the division is known to be exact, then there is no remainder from the
+  /// divide, so the covered range size is unit, otherwise it is the divisor.
+  ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+  
+  // Figure out the interval that is being checked.  For example, a comparison
+  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 
+  // Compute this interval based on the constants involved and the signedness of
+  // the compare/divide.  This computes a half-open interval, keeping track of
+  // whether either value in the interval overflows.  After analysis each
+  // overflow variable is set to 0 if it's corresponding bound variable is valid
+  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+  int LoOverflow = 0, HiOverflow = 0;
+  Constant *LoBound = 0, *HiBound = 0;
+
+  if (!DivIsSigned) {  // udiv
+    // e.g. X/5 op 3  --> [15, 20)
+    LoBound = Prod;
+    HiOverflow = LoOverflow = ProdOV;
+    if (!HiOverflow) {
+      // If this is not an exact divide, then many values in the range collapse
+      // to the same result value.
+      HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+    }
+    
+  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+    if (CmpRHSV == 0) {       // (X / pos) op 0
+      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
+      LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+      HiBound = RangeSize;
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
+      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
+      HiOverflow = LoOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+    } else {                       // (X / pos) op neg
+      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
+      HiBound = AddOne(Prod);
+      LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow) {
+        ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+      }
+    }
+  } else if (DivRHS->isNegative()) { // Divisor is < 0.
+    if (DivI->isExact())
+      RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+    if (CmpRHSV == 0) {       // (X / neg) op 0
+      // e.g. X/-5 op 0  --> [-4, 5)
+      LoBound = AddOne(RangeSize);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
+        HiOverflow = 1;            // [INTMIN+1, overflow)
+        HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
+      }
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
+      // e.g. X/-5 op 3  --> [-19, -14)
+      HiBound = AddOne(Prod);
+      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow)
+        LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+    } else {                       // (X / neg) op neg
+      LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
+      LoOverflow = HiOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+    }
+    
+    // Dividing by a negative swaps the condition.  LT <-> GT
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  Value *X = DivI->getOperand(0);
+  switch (Pred) {
+  default: llvm_unreachable("Unhandled icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, true));
+  case ICmpInst::ICMP_NE:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, false));
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT:
+    if (LoOverflow == +1)   // Low bound is greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (LoOverflow == -1)   // Low bound is less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    return new ICmpInst(Pred, X, LoBound);
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT:
+    if (HiOverflow == +1)       // High bound greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    if (HiOverflow == -1)       // High bound less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (Pred == ICmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+    return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+  }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+                                          ConstantInt *ShAmt) {
+  const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+  
+  // Check that the shift amount is in range.  If not, don't perform
+  // undefined shifts.  When the shift is visited it will be
+  // simplified.
+  uint32_t TypeBits = CmpRHSV.getBitWidth();
+  uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+    return 0;
+  
+  if (!ICI.isEquality()) {
+    // If we have an unsigned comparison and an ashr, we can't simplify this.
+    // Similarly for signed comparisons with lshr.
+    if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+      return 0;
+    
+    // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+    // by a power of 2.  Since we already have logic to simplify these,
+    // transform to div and then simplify the resultant comparison.
+    if (Shr->getOpcode() == Instruction::AShr &&
+        (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+      return 0;
+    
+    // Revisit the shift (to delete it).
+    Worklist.Add(Shr);
+    
+    Constant *DivCst =
+      ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+    
+    Value *Tmp =
+      Shr->getOpcode() == Instruction::AShr ?
+      Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+      Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+    
+    ICI.setOperand(0, Tmp);
+    
+    // If the builder folded the binop, just return it.
+    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+    if (TheDiv == 0)
+      return &ICI;
+    
+    // Otherwise, fold this div/compare.
+    assert(TheDiv->getOpcode() == Instruction::SDiv ||
+           TheDiv->getOpcode() == Instruction::UDiv);
+    
+    Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+    assert(Res && "This div/cst should have folded!");
+    return Res;
+  }
+  
+  
+  // If we are comparing against bits always shifted out, the
+  // comparison cannot succeed.
+  APInt Comp = CmpRHSV << ShAmtVal;
+  ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+  if (Shr->getOpcode() == Instruction::LShr)
+    Comp = Comp.lshr(ShAmtVal);
+  else
+    Comp = Comp.ashr(ShAmtVal);
+  
+  if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+    bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+                                     IsICMP_NE);
+    return ReplaceInstUsesWith(ICI, Cst);
+  }
+  
+  // Otherwise, check to see if the bits shifted out are known to be zero.
+  // If so, we can compare against the unshifted value:
+  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+  if (Shr->hasOneUse() && Shr->isExact())
+    return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+  
+  if (Shr->hasOneUse()) {
+    // Otherwise strength reduce the shift into an and.
+    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+    Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+    
+    Value *And = Builder->CreateAnd(Shr->getOperand(0),
+                                    Mask, Shr->getName()+".mask");
+    return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+  }
+  return 0;
+}
+
+
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                                          Instruction *LHSI,
+                                                          ConstantInt *RHS) {
+  const APInt &RHSV = RHS->getValue();
+  
+  switch (LHSI->getOpcode()) {
+  case Instruction::Trunc:
+    if (ICI.isEquality() && LHSI->hasOneUse()) {
+      // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+      // of the high bits truncated out of x are known.
+      unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+             SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
+      APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+      ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+      
+      // If all the high bits are known, we can do this xform.
+      if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+        // Pull in the high bits from known-ones set.
+        APInt NewRHS = RHS->getValue().zext(SrcBits);
+        NewRHS |= KnownOne;
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantInt::get(ICI.getContext(), NewRHS));
+      }
+    }
+    break;
+      
+  case Instruction::Xor:         // (icmp pred (xor X, XorCST), CI)
+    if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+      // fold the xor.
+      if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+          (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+        Value *CompareVal = LHSI->getOperand(0);
+        
+        // If the sign bit of the XorCST is not set, there is no change to
+        // the operation, just stop using the Xor.
+        if (!XorCST->isNegative()) {
+          ICI.setOperand(0, CompareVal);
+          Worklist.Add(LHSI);
+          return &ICI;
+        }
+        
+        // Was the old condition true if the operand is positive?
+        bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+        
+        // If so, the new one isn't.
+        isTrueIfPositive ^= true;
+        
+        if (isTrueIfPositive)
+          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+                              SubOne(RHS));
+        else
+          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+                              AddOne(RHS));
+      }
+
+      if (LHSI->hasOneUse()) {
+        // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+        if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+          const APInt &SignBit = XorCST->getValue();
+          ICmpInst::Predicate Pred = ICI.isSigned()
+                                         ? ICI.getUnsignedPredicate()
+                                         : ICI.getSignedPredicate();
+          return new ICmpInst(Pred, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),
+                                               RHSV ^ SignBit));
+        }
+
+        // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+        if (!ICI.isEquality() && XorCST->isMaxValue(true)) {
+          const APInt &NotSignBit = XorCST->getValue();
+          ICmpInst::Predicate Pred = ICI.isSigned()
+                                         ? ICI.getUnsignedPredicate()
+                                         : ICI.getSignedPredicate();
+          Pred = ICI.getSwappedPredicate(Pred);
+          return new ICmpInst(Pred, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),
+                                               RHSV ^ NotSignBit));
+        }
+      }
+    }
+    break;
+  case Instruction::And:         // (icmp pred (and X, AndCST), RHS)
+    if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+        LHSI->getOperand(0)->hasOneUse()) {
+      ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+      
+      // If the LHS is an AND of a truncating cast, we can widen the
+      // and/compare to be the input width without changing the value
+      // produced, eliminating a cast.
+      if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+        // We can do this transformation if either the AND constant does not
+        // have its sign bit set or if it is an equality comparison. 
+        // Extending a relational comparison when we're checking the sign
+        // bit would not work.
+        if (ICI.isEquality() ||
+            (!AndCST->isNegative() && RHSV.isNonNegative())) {
+          Value *NewAnd =
+            Builder->CreateAnd(Cast->getOperand(0),
+                               ConstantExpr::getZExt(AndCST, Cast->getSrcTy()));
+          NewAnd->takeName(LHSI);
+          return new ICmpInst(ICI.getPredicate(), NewAnd,
+                              ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
+        }
+      }
+
+      // If the LHS is an AND of a zext, and we have an equality compare, we can
+      // shrink the and/compare to the smaller type, eliminating the cast.
+      if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
+        const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
+        // Make sure we don't compare the upper bits, SimplifyDemandedBits
+        // should fold the icmp to true/false in that case.
+        if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
+          Value *NewAnd =
+            Builder->CreateAnd(Cast->getOperand(0),
+                               ConstantExpr::getTrunc(AndCST, Ty));
+          NewAnd->takeName(LHSI);
+          return new ICmpInst(ICI.getPredicate(), NewAnd,
+                              ConstantExpr::getTrunc(RHS, Ty));
+        }
+      }
+
+      // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+      // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
+      // happens a LOT in code produced by the C front-end, for bitfield
+      // access.
+      BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+      if (Shift && !Shift->isShift())
+        Shift = 0;
+      
+      ConstantInt *ShAmt;
+      ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+      const Type *Ty = Shift ? Shift->getType() : 0;  // Type of the shift.
+      const Type *AndTy = AndCST->getType();          // Type of the and.
+      
+      // We can fold this as long as we can't shift unknown bits
+      // into the mask.  This can only happen with signed shift
+      // rights, as they sign-extend.
+      if (ShAmt) {
+        bool CanFold = Shift->isLogicalShift();
+        if (!CanFold) {
+          // To test for the bad case of the signed shr, see if any
+          // of the bits shifted in could be tested after the mask.
+          uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+          int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+          
+          uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+          if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & 
+               AndCST->getValue()) == 0)
+            CanFold = true;
+        }
+        
+        if (CanFold) {
+          Constant *NewCst;
+          if (Shift->getOpcode() == Instruction::Shl)
+            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+          else
+            NewCst = ConstantExpr::getShl(RHS, ShAmt);
+          
+          // Check to see if we are shifting out any of the bits being
+          // compared.
+          if (ConstantExpr::get(Shift->getOpcode(),
+                                       NewCst, ShAmt) != RHS) {
+            // If we shifted bits out, the fold is not going to work out.
+            // As a special case, check to see if this means that the
+            // result is always true or false now.
+            if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+              return ReplaceInstUsesWith(ICI,
+                                       ConstantInt::getFalse(ICI.getContext()));
+            if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+              return ReplaceInstUsesWith(ICI,
+                                       ConstantInt::getTrue(ICI.getContext()));
+          } else {
+            ICI.setOperand(1, NewCst);
+            Constant *NewAndCST;
+            if (Shift->getOpcode() == Instruction::Shl)
+              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+            else
+              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+            LHSI->setOperand(1, NewAndCST);
+            LHSI->setOperand(0, Shift->getOperand(0));
+            Worklist.Add(Shift); // Shift is dead.
+            return &ICI;
+          }
+        }
+      }
+      
+      // Turn ((X >> Y) & C) == 0  into  (X & (C << Y)) == 0.  The later is
+      // preferable because it allows the C<<Y expression to be hoisted out
+      // of a loop if Y is invariant and X is not.
+      if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+          ICI.isEquality() && !Shift->isArithmeticShift() &&
+          !isa<Constant>(Shift->getOperand(0))) {
+        // Compute C << Y.
+        Value *NS;
+        if (Shift->getOpcode() == Instruction::LShr) {
+          NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
+        } else {
+          // Insert a logical shift.
+          NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
+        }
+        
+        // Compute X & (C << Y).
+        Value *NewAnd = 
+          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+        
+        ICI.setOperand(0, NewAnd);
+        return &ICI;
+      }
+    }
+      
+    // Try to optimize things like "A[i]&42 == 0" to index computations.
+    if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
+      if (GetElementPtrInst *GEP =
+          dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+              !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
+            ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
+            if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
+              return Res;
+          }
+    }
+    break;
+
+  case Instruction::Or: {
+    if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
+      break;
+    Value *P, *Q;
+    if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+      // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+      // -> and (icmp eq P, null), (icmp eq Q, null).
+      Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
+                                        Constant::getNullValue(P->getType()));
+      Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
+                                        Constant::getNullValue(Q->getType()));
+      Instruction *Op;
+      if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+        Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
+      else
+        Op = BinaryOperator::CreateOr(ICIP, ICIQ);
+      return Op;
+    }
+    break;
+  }
+    
+  case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt) break;
+    
+    uint32_t TypeBits = RHSV.getBitWidth();
+    
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    if (ICI.isEquality()) {
+      // If we are comparing against bits always shifted out, the
+      // comparison cannot succeed.
+      Constant *Comp =
+        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
+                                                                 ShAmt);
+      if (Comp != RHS) {// Comparing against a bit that we know is zero.
+        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+        Constant *Cst =
+          ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+        return ReplaceInstUsesWith(ICI, Cst);
+      }
+      
+      // If the shift is NUW, then it is just shifting out zeros, no need for an
+      // AND.
+      if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantExpr::getLShr(RHS, ShAmt));
+      
+      if (LHSI->hasOneUse()) {
+        // Otherwise strength reduce the shift into an and.
+        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+        Constant *Mask =
+          ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, 
+                                                       TypeBits-ShAmtVal));
+        
+        Value *And =
+          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
+        return new ICmpInst(ICI.getPredicate(), And,
+                            ConstantExpr::getLShr(RHS, ShAmt));
+      }
+    }
+    
+    // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+    bool TrueIfSigned = false;
+    if (LHSI->hasOneUse() &&
+        isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+      // (X << 31) <s 0  --> (X&1) != 0
+      Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+                                        APInt::getOneBitSet(TypeBits, 
+                                            TypeBits-ShAmt->getZExtValue()-1));
+      Value *And =
+        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
+      return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+                          And, Constant::getNullValue(And->getType()));
+    }
+    break;
+  }
+    
+  case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
+  case Instruction::AShr: {
+    // Handle equality comparisons of shift-by-constant.
+    BinaryOperator *BO = cast<BinaryOperator>(LHSI);
+    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
+        return Res;
+    }
+
+    // Handle exact shr's.
+    if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
+      if (RHSV.isMinValue())
+        return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+    }
+    break;
+  }
+    
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+    // Fold: icmp pred ([us]div X, C1), C2 -> range test
+    // Fold this div into the comparison, producing a range check. 
+    // Determine, based on the divide type, what the range is being 
+    // checked.  If there is an overflow on the low or high side, remember 
+    // it, otherwise compute the range [low, hi) bounding the new value.
+    // See: InsertRangeTest above for the kinds of replacements possible.
+    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+      if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+                                          DivRHS))
+        return R;
+    break;
+
+  case Instruction::Add:
+    // Fold: icmp pred (add X, C1), C2
+    if (!ICI.isEquality()) {
+      ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+      if (!LHSC) break;
+      const APInt &LHSV = LHSC->getValue();
+
+      ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+                            .subtract(LHSV);
+
+      if (ICI.isSigned()) {
+        if (CR.getLower().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getUpper()));
+        } else if (CR.getUpper().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getLower()));
+        }
+      } else {
+        if (CR.getLower().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getUpper()));
+        } else if (CR.getUpper().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getLower()));
+        }
+      }
+    }
+    break;
+  }
+  
+  // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+  if (ICI.isEquality()) {
+    bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    
+    // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 
+    // the second operand is a constant, simplify a bit.
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+      switch (BO->getOpcode()) {
+      case Instruction::SRem:
+        // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+        if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+          const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+          if (V.sgt(1) && V.isPowerOf2()) {
+            Value *NewRem =
+              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+                                  BO->getName());
+            return new ICmpInst(ICI.getPredicate(), NewRem,
+                                Constant::getNullValue(BO->getType()));
+          }
+        }
+        break;
+      case Instruction::Add:
+        // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+        if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          if (BO->hasOneUse())
+            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                                ConstantExpr::getSub(RHS, BOp1C));
+        } else if (RHSV == 0) {
+          // Replace ((add A, B) != 0) with (A != -B) if A or B is
+          // efficiently invertible, or if the add has just this one use.
+          Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+          
+          if (Value *NegVal = dyn_castNegVal(BOp1))
+            return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+          if (Value *NegVal = dyn_castNegVal(BOp0))
+            return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+          if (BO->hasOneUse()) {
+            Value *Neg = Builder->CreateNeg(BOp1);
+            Neg->takeName(BO);
+            return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+          }
+        }
+        break;
+      case Instruction::Xor:
+        // For the xor case, we can xor two constants together, eliminating
+        // the explicit xor.
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              ConstantExpr::getXor(RHS, BOC));
+        } else if (RHSV == 0) {
+          // Replace ((xor A, B) != 0) with (A != B)
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              BO->getOperand(1));
+        }
+        break;
+      case Instruction::Sub:
+        // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
+        if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
+          if (BO->hasOneUse())
+            return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
+                                ConstantExpr::getSub(BOp0C, RHS));
+        } else if (RHSV == 0) {
+          // Replace ((sub A, B) != 0) with (A != B)
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              BO->getOperand(1));
+        }
+        break;
+      case Instruction::Or:
+        // If bits are being or'd in that are not present in the constant we
+        // are comparing against, then the comparison could never succeed!
+        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          Constant *NotCI = ConstantExpr::getNot(RHS);
+          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+            return ReplaceInstUsesWith(ICI,
+                             ConstantInt::get(Type::getInt1Ty(ICI.getContext()), 
+                                       isICMP_NE));
+        }
+        break;
+        
+      case Instruction::And:
+        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          // If bits are being compared against that are and'd out, then the
+          // comparison can never succeed!
+          if ((RHSV & ~BOC->getValue()) != 0)
+            return ReplaceInstUsesWith(ICI,
+                             ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+                                       isICMP_NE));
+          
+          // If we have ((X & C) == C), turn it into ((X & C) != 0).
+          if (RHS == BOC && RHSV.isPowerOf2())
+            return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+                                ICmpInst::ICMP_NE, LHSI,
+                                Constant::getNullValue(RHS->getType()));
+
+          // Don't perform the following transforms if the AND has multiple uses
+          if (!BO->hasOneUse())
+            break;
+
+          // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+          if (BOC->getValue().isSignBit()) {
+            Value *X = BO->getOperand(0);
+            Constant *Zero = Constant::getNullValue(X->getType());
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+            return new ICmpInst(pred, X, Zero);
+          }
+          
+          // ((X & ~7) == 0) --> X < 8
+          if (RHSV == 0 && isHighOnes(BOC)) {
+            Value *X = BO->getOperand(0);
+            Constant *NegX = ConstantExpr::getNeg(BOC);
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+            return new ICmpInst(pred, X, NegX);
+          }
+        }
+      default: break;
+      }
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+      // Handle icmp {eq|ne} <intrinsic>, intcst.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::bswap:
+        Worklist.Add(II);
+        ICI.setOperand(0, II->getArgOperand(0));
+        ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+        return &ICI;
+      case Intrinsic::ctlz:
+      case Intrinsic::cttz:
+        // ctz(A) == bitwidth(a)  ->  A == 0 and likewise for !=
+        if (RHSV == RHS->getType()->getBitWidth()) {
+          Worklist.Add(II);
+          ICI.setOperand(0, II->getArgOperand(0));
+          ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
+          return &ICI;
+        }
+        break;
+      case Intrinsic::ctpop:
+        // popcount(A) == 0  ->  A == 0 and likewise for !=
+        if (RHS->isZero()) {
+          Worklist.Add(II);
+          ICI.setOperand(0, II->getArgOperand(0));
+          ICI.setOperand(1, RHS);
+          return &ICI;
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+  return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+  const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+  Value *LHSCIOp        = LHSCI->getOperand(0);
+  const Type *SrcTy     = LHSCIOp->getType();
+  const Type *DestTy    = LHSCI->getType();
+  Value *RHSCIOp;
+
+  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
+  // integer type is the same size as the pointer type.
+  if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+      TD->getPointerSizeInBits() ==
+         cast<IntegerType>(DestTy)->getBitWidth()) {
+    Value *RHSOp = 0;
+    if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+    } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+      RHSOp = RHSC->getOperand(0);
+      // If the pointer types don't match, insert a bitcast.
+      if (LHSCIOp->getType() != RHSOp->getType())
+        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+    }
+
+    if (RHSOp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+  }
+  
+  // The code below only handles extension cast instructions, so far.
+  // Enforce this.
+  if (LHSCI->getOpcode() != Instruction::ZExt &&
+      LHSCI->getOpcode() != Instruction::SExt)
+    return 0;
+
+  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+  bool isSignedCmp = ICI.isSigned();
+
+  if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+    // Not an extension from the same type?
+    RHSCIOp = CI->getOperand(0);
+    if (RHSCIOp->getType() != LHSCIOp->getType()) 
+      return 0;
+    
+    // If the signedness of the two casts doesn't agree (i.e. one is a sext
+    // and the other is a zext), then we can't handle this.
+    if (CI->getOpcode() != LHSCI->getOpcode())
+      return 0;
+
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedCmp && isSignedExt)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+  }
+
+  // If we aren't dealing with a constant on the RHS, exit early
+  ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+  if (!CI)
+    return 0;
+
+  // Compute the constant that would happen if we truncated to SrcTy then
+  // reextended to DestTy.
+  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
+                                                Res1, DestTy);
+
+  // If the re-extended constant didn't change...
+  if (Res2 == CI) {
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedExt && isSignedCmp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
+  }
+
+  // The re-extended constant changed so the constant cannot be represented 
+  // in the shorter type. Consequently, we cannot emit a simple comparison.
+  // All the cases that fold to true or false will have already been handled
+  // by SimplifyICmpInst, so only deal with the tricky case.
+
+  if (isSignedCmp || !isSignedExt)
+    return 0;
+
+  // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+  // should have been folded away previously and not enter in here.
+
+  // We're performing an unsigned comp with a sign extended value.
+  // This is true if the input is >= 0. [aka >s -1]
+  Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+  Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
+
+  // Finally, return the value computed.
+  if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
+    return ReplaceInstUsesWith(ICI, Result);
+
+  assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
+  return BinaryOperator::CreateNot(Result);
+}
+
+/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
+///   I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+///   sum = a + b
+///   if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+                                          ConstantInt *CI2, ConstantInt *CI1,
+                                          InstCombiner &IC) {
+  // The transformation we're trying to do here is to transform this into an
+  // llvm.sadd.with.overflow.  To do this, we have to replace the original add
+  // with a narrower add, and discard the add-with-constant that is part of the
+  // range check (if we can't eliminate it, this isn't profitable).
+  
+  // In order to eliminate the add-with-constant, the compare can be its only
+  // use.
+  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+  if (!AddWithCst->hasOneUse()) return 0;
+  
+  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+  if (!CI2->getValue().isPowerOf2()) return 0;
+  unsigned NewWidth = CI2->getValue().countTrailingZeros();
+  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+    
+  // The width of the new add formed is 1 more than the bias.
+  ++NewWidth;
+  
+  // Check to see that CI1 is an all-ones value with NewWidth bits.
+  if (CI1->getBitWidth() == NewWidth ||
+      CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+    return 0;
+  
+  // In order to replace the original add with a narrower 
+  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+  // and truncates that discard the high bits of the add.  Verify that this is
+  // the case.
+  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+  for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+       UI != E; ++UI) {
+    if (*UI == AddWithCst) continue;
+    
+    // Only accept truncates for now.  We would really like a nice recursive
+    // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+    // chain to see which bits of a value are actually demanded.  If the
+    // original add had another add which was then immediately truncated, we
+    // could still do the transformation.
+    TruncInst *TI = dyn_cast<TruncInst>(*UI);
+    if (TI == 0 ||
+        TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+  }
+  
+  // If the pattern matches, truncate the inputs to the narrower type and
+  // use the sadd_with_overflow intrinsic to efficiently compute both the
+  // result and the overflow bit.
+  Module *M = I.getParent()->getParent()->getParent();
+  
+  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
+                                       NewType);
+
+  InstCombiner::BuilderTy *Builder = IC.Builder;
+  
+  // Put the new code above the original add, in case there are any uses of the
+  // add between the add and the compare.
+  Builder->SetInsertPoint(OrigAdd);
+  
+  Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+  Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+  CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+  Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+  Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+  
+  // The inner add was the result of the narrow add, zero extended to the
+  // wider type.  Replace it with the result computed by the intrinsic.
+  IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
+  
+  // The original icmp gets replaced with the overflow value.
+  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
+                                     InstCombiner &IC) {
+  // Don't bother doing this transformation for pointers, don't do it for
+  // vectors.
+  if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+  
+  // If the add is a constant expr, then we don't bother transforming it.
+  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
+  if (OrigAdd == 0) return 0;
+  
+  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+  
+  // Put the new code above the original add, in case there are any uses of the
+  // add between the add and the compare.
+  InstCombiner::BuilderTy *Builder = IC.Builder;
+  Builder->SetInsertPoint(OrigAdd);
+
+  Module *M = I.getParent()->getParent()->getParent();
+  Type *Ty = LHS->getType();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
+  Value *Add = Builder->CreateExtractValue(Call, 0);
+
+  IC.ReplaceInstUsesWith(*OrigAdd, Add);
+
+  // The original icmp gets replaced with the overflow value.
+  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+}
+
+// DemandedBitsLHSMask - When performing a comparison against a constant,
+// it is possible that not all the bits in the LHS are demanded.  This helper
+// method computes the mask that IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+                                 unsigned BitWidth, bool isSignCheck) {
+  if (isSignCheck)
+    return APInt::getSignBit(BitWidth);
+  
+  ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+  if (!CI) return APInt::getAllOnesValue(BitWidth);
+  const APInt &RHS = CI->getValue();
+  
+  switch (I.getPredicate()) {
+  // For a UGT comparison, we don't care about any bits that 
+  // correspond to the trailing ones of the comparand.  The value of these
+  // bits doesn't impact the outcome of the comparison, because any value
+  // greater than the RHS must differ in a bit higher than these due to carry.
+  case ICmpInst::ICMP_UGT: {
+    unsigned trailingOnes = RHS.countTrailingOnes();
+    APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+    return ~lowBitsSet;
+  }
+  
+  // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+  // Any value less than the RHS must differ in a higher bit because of carries.
+  case ICmpInst::ICMP_ULT: {
+    unsigned trailingZeros = RHS.countTrailingZeros();
+    APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+    return ~lowBitsSet;
+  }
+  
+  default:
+    return APInt::getAllOnesValue(BitWidth);
+  }
+  
+}
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+  bool Changed = false;
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(Op0) < getComplexity(Op1)) {
+    I.swapOperands();
+    std::swap(Op0, Op1);
+    Changed = true;
+  }
+  
+  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  const Type *Ty = Op0->getType();
+
+  // icmp's with boolean values can always be turned into bitwise operations
+  if (Ty->isIntegerTy(1)) {
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Invalid icmp instruction!");
+    case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
+      Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateNot(Xor);
+    }
+    case ICmpInst::ICMP_NE:                  // icmp eq i1 A, B -> A^B
+      return BinaryOperator::CreateXor(Op0, Op1);
+
+    case ICmpInst::ICMP_UGT:
+      std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult
+      // FALL THROUGH
+    case ICmpInst::ICMP_ULT:{               // icmp ult i1 A, B -> ~A & B
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+      return BinaryOperator::CreateAnd(Not, Op1);
+    }
+    case ICmpInst::ICMP_SGT:
+      std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt
+      // FALL THROUGH
+    case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateAnd(Not, Op0);
+    }
+    case ICmpInst::ICMP_UGE:
+      std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule
+      // FALL THROUGH
+    case ICmpInst::ICMP_ULE: {               //  icmp ule i1 A, B -> ~A | B
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+      return BinaryOperator::CreateOr(Not, Op1);
+    }
+    case ICmpInst::ICMP_SGE:
+      std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle
+      // FALL THROUGH
+    case ICmpInst::ICMP_SLE: {               //  icmp sle i1 A, B -> A | ~B
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+    }
+  }
+
+  unsigned BitWidth = 0;
+  if (Ty->isIntOrIntVectorTy())
+    BitWidth = Ty->getScalarSizeInBits();
+  else if (TD)  // Pointers require TD info to get their size.
+    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+  
+  bool isSignBit = false;
+
+  // See if we are doing a comparison with a constant.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    Value *A = 0, *B = 0;
+    
+    // Match the following pattern, which is a common idiom when writing
+    // overflow-safe integer arithmetic function.  The source performs an
+    // addition in wider type, and explicitly checks for overflow using
+    // comparisons against INT_MIN and INT_MAX.  Simplify this by using the
+    // sadd_with_overflow intrinsic.
+    //
+    // TODO: This could probably be generalized to handle other overflow-safe
+    // operations if we worked out the formulas to compute the appropriate 
+    // magic constants.
+    // 
+    // sum = a + b
+    // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
+    {
+    ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
+    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+        match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+      if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+        return Res;
+    }
+    
+    // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+    if (I.isEquality() && CI->isZero() &&
+        match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+      // (icmp cond A B) if cond is equality
+      return new ICmpInst(I.getPredicate(), A, B);
+    }
+    
+    // If we have an icmp le or icmp ge instruction, turn it into the
+    // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
+    // them being folded in the code below.  The SimplifyICmpInst code has
+    // already handled the edge cases for us, so we just assert on them.
+    switch (I.getPredicate()) {
+    default: break;
+    case ICmpInst::ICMP_ULE:
+      assert(!CI->isMaxValue(false));                 // A <=u MAX -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+    case ICmpInst::ICMP_SLE:
+      assert(!CI->isMaxValue(true));                  // A <=s MAX -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+    case ICmpInst::ICMP_UGE:
+      assert(!CI->isMinValue(false));                 // A >=u MIN -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+    case ICmpInst::ICMP_SGE:
+      assert(!CI->isMinValue(true));                  // A >=s MIN -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+    }
+    
+    // If this comparison is a normal comparison, it demands all
+    // bits, if it is a sign bit comparison, it only demands the sign bit.
+    bool UnusedBit;
+    isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+  }
+
+  // See if we can fold the comparison based on range information we can get
+  // by checking whether bits are known to be zero or one in the input.
+  if (BitWidth != 0) {
+    APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+    APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+    if (SimplifyDemandedBits(I.getOperandUse(0),
+                             DemandedBitsLHSMask(I, BitWidth, isSignBit),
+                             Op0KnownZero, Op0KnownOne, 0))
+      return &I;
+    if (SimplifyDemandedBits(I.getOperandUse(1),
+                             APInt::getAllOnesValue(BitWidth),
+                             Op1KnownZero, Op1KnownOne, 0))
+      return &I;
+
+    // Given the known and unknown bits, compute a range that the LHS could be
+    // in.  Compute the Min, Max and RHS values based on the known bits. For the
+    // EQ and NE we use unsigned values.
+    APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+    APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+    if (I.isSigned()) {
+      ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+                                             Op0Min, Op0Max);
+      ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+                                             Op1Min, Op1Max);
+    } else {
+      ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+                                               Op0Min, Op0Max);
+      ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+                                               Op1Min, Op1Max);
+    }
+
+    // If Min and Max are known to be the same, then SimplifyDemandedBits
+    // figured out that the LHS is a constant.  Just constant fold this now so
+    // that code below can assume that Min != Max.
+    if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+      return new ICmpInst(I.getPredicate(),
+                          ConstantInt::get(Op0->getType(), Op0Min), Op1);
+    if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+      return new ICmpInst(I.getPredicate(), Op0,
+                          ConstantInt::get(Op1->getType(), Op1Min));
+
+    // Based on the range information we know about the LHS, see if we can
+    // simplify this comparison.  For example, (x&4) < 8 is always true.
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Unknown icmp opcode!");
+    case ICmpInst::ICMP_EQ: {
+      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+        
+      // If all bits are known zero except for one, then we know at most one
+      // bit is set.   If the comparison is against zero, then this is a check
+      // to see if *that* bit is set.
+      APInt Op0KnownZeroInverted = ~Op0KnownZero;
+      if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+        // If the LHS is an AND with the same constant, look through it.
+        Value *LHS = 0;
+        ConstantInt *LHSC = 0;
+        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+            LHSC->getValue() != Op0KnownZeroInverted)
+          LHS = Op0;
+        
+        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+        // then turn "((1 << x)&8) == 0" into "x != 3".
+        Value *X = 0;
+        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+          unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+          return new ICmpInst(ICmpInst::ICMP_NE, X,
+                              ConstantInt::get(X->getType(), CmpVal));
+        }
+        
+        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+        // then turn "((8 >>u x)&1) == 0" into "x != 3".
+        const APInt *CI;
+        if (Op0KnownZeroInverted == 1 &&
+            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+          return new ICmpInst(ICmpInst::ICMP_NE, X,
+                              ConstantInt::get(X->getType(),
+                                               CI->countTrailingZeros()));
+      }
+        
+      break;
+    }
+    case ICmpInst::ICMP_NE: {
+      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      
+      // If all bits are known zero except for one, then we know at most one
+      // bit is set.   If the comparison is against zero, then this is a check
+      // to see if *that* bit is set.
+      APInt Op0KnownZeroInverted = ~Op0KnownZero;
+      if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+        // If the LHS is an AND with the same constant, look through it.
+        Value *LHS = 0;
+        ConstantInt *LHSC = 0;
+        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+            LHSC->getValue() != Op0KnownZeroInverted)
+          LHS = Op0;
+        
+        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+        // then turn "((1 << x)&8) != 0" into "x == 3".
+        Value *X = 0;
+        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+          unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+          return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                              ConstantInt::get(X->getType(), CmpVal));
+        }
+        
+        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+        // then turn "((8 >>u x)&1) != 0" into "x == 3".
+        const APInt *CI;
+        if (Op0KnownZeroInverted == 1 &&
+            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+          return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                              ConstantInt::get(X->getType(),
+                                               CI->countTrailingZeros()));
+      }
+      
+      break;
+    }
+    case ICmpInst::ICMP_ULT:
+      if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+
+        // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
+        if (CI->isMinValue(true))
+          return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                           Constant::getAllOnesValue(Op0->getType()));
+      }
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+      if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+
+        // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
+        if (CI->isMaxValue(true))
+          return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                              Constant::getNullValue(Op0->getType()));
+      }
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+      }
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+      if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+      }
+      break;
+    case ICmpInst::ICMP_SGE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+      if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    case ICmpInst::ICMP_SLE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+      if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    case ICmpInst::ICMP_UGE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+      if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    case ICmpInst::ICMP_ULE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+      if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    }
+
+    // Turn a signed comparison into an unsigned one if both operands
+    // are known to have the same sign.
+    if (I.isSigned() &&
+        ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+         (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+      return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+  }
+
+  // Test if the ICmpInst instruction is used exclusively by a select as
+  // part of a minimum or maximum operation. If so, refrain from doing
+  // any other folding. This helps out other analyses which understand
+  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+  // and CodeGen. And in this case, at least one of the comparison
+  // operands has at least one user besides the compare (the select),
+  // which would often largely negate the benefit of folding anyway.
+  if (I.hasOneUse())
+    if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+        return 0;
+
+  // See if we are doing a comparison between a constant and an instruction that
+  // can be folded into the comparison.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    // Since the RHS is a ConstantInt (CI), if the left hand side is an 
+    // instruction, see if that instruction also has constants so that the 
+    // instruction can be folded into the icmp 
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+        return Res;
+  }
+
+  // Handle icmp with constant (but not simple integer constant) RHS
+  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      switch (LHSI->getOpcode()) {
+      case Instruction::GetElementPtr:
+          // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+        if (RHSC->isNullValue() &&
+            cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+                  Constant::getNullValue(LHSI->getOperand(0)->getType()));
+        break;
+      case Instruction::PHI:
+        // Only fold icmp into the PHI if the phi and icmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
+        break;
+      case Instruction::Select: {
+        // If either operand of the select is a constant, we can fold the
+        // comparison into the select arms, which will cause one to be
+        // constant folded and the select turned into a bitwise or.
+        Value *Op1 = 0, *Op2 = 0;
+        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
+          Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
+          Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+
+        // We only want to perform this transformation if it will not lead to
+        // additional code. This is true if either both sides of the select
+        // fold to a constant (in which case the icmp is replaced with a select
+        // which will usually simplify) or this is the only user of the
+        // select (in which case we are trading a select+icmp for a simpler
+        // select+icmp).
+        if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
+          if (!Op1)
+            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
+          if (!Op2)
+            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+                                      RHSC, I.getName());
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+        }
+        break;
+      }
+      case Instruction::IntToPtr:
+        // icmp pred inttoptr(X), null -> icmp pred X, 0
+        if (RHSC->isNullValue() && TD &&
+            TD->getIntPtrType(RHSC->getContext()) == 
+               LHSI->getOperand(0)->getType())
+          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+                        Constant::getNullValue(LHSI->getOperand(0)->getType()));
+        break;
+
+      case Instruction::Load:
+        // Try to optimize things like "A[i] > 4" to index computations.
+        if (GetElementPtrInst *GEP =
+              dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+                !cast<LoadInst>(LHSI)->isVolatile())
+              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+                return Res;
+        }
+        break;
+      }
+  }
+
+  // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+    if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+      return NI;
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
+    if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+                           ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+      return NI;
+
+  // Test to see if the operands of the icmp are casted versions of other
+  // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
+  // now.
+  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+    if (Op0->getType()->isPointerTy() && 
+        (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { 
+      // We keep moving the cast from the left operand over to the right
+      // operand, where it can often be eliminated completely.
+      Op0 = CI->getOperand(0);
+
+      // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+      // so eliminate it as well.
+      if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+        Op1 = CI2->getOperand(0);
+
+      // If Op1 is a constant, we can fold the cast into the constant.
+      if (Op0->getType() != Op1->getType()) {
+        if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+        } else {
+          // Otherwise, cast the RHS right before the icmp
+          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+        }
+      }
+      return new ICmpInst(I.getPredicate(), Op0, Op1);
+    }
+  }
+  
+  if (isa<CastInst>(Op0)) {
+    // Handle the special case of: icmp (cast bool to X), <cst>
+    // This comes up when you have code like
+    //   int X = A < B;
+    //   if (X) ...
+    // For generality, we handle any zero-extension of any operand comparison
+    // with a constant or another cast from the same type.
+    if (isa<Constant>(Op1) || isa<CastInst>(Op1))
+      if (Instruction *R = visitICmpInstWithCastAndCast(I))
+        return R;
+  }
+
+  // Special logic for binary operators.
+  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+  if (BO0 || BO1) {
+    CmpInst::Predicate Pred = I.getPredicate();
+    bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+    if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+      NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+    if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+      NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+    // Analyze the case when either Op0 or Op1 is an add instruction.
+    // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    if (BO0 && BO0->getOpcode() == Instruction::Add)
+      A = BO0->getOperand(0), B = BO0->getOperand(1);
+    if (BO1 && BO1->getOpcode() == Instruction::Add)
+      C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+    if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+      return new ICmpInst(Pred, A == Op1 ? B : A,
+                          Constant::getNullValue(Op1->getType()));
+
+    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+    if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+      return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+                          C == Op0 ? D : C);
+
+    // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+    if (A && C && (A == C || A == D || B == C || B == D) &&
+        NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse()) {
+      // Determine Y and Z in the form icmp (X+Y), (X+Z).
+      Value *Y = (A == C || A == D) ? B : A;
+      Value *Z = (C == A || C == B) ? D : C;
+      return new ICmpInst(Pred, Y, Z);
+    }
+
+    // Analyze the case when either Op0 or Op1 is a sub instruction.
+    // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+    A = 0; B = 0; C = 0; D = 0;
+    if (BO0 && BO0->getOpcode() == Instruction::Sub)
+      A = BO0->getOperand(0), B = BO0->getOperand(1);
+    if (BO1 && BO1->getOpcode() == Instruction::Sub)
+      C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+    // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+    if (A == Op1 && NoOp0WrapProblem)
+      return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+    // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+    if (C == Op0 && NoOp1WrapProblem)
+      return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+    // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+    if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse())
+      return new ICmpInst(Pred, A, C);
+
+    // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+    if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse())
+      return new ICmpInst(Pred, D, B);
+
+    BinaryOperator *SRem = NULL;
+    // icmp (srem X, Y), Y
+    if (BO0 && BO0->getOpcode() == Instruction::SRem &&
+        Op1 == BO0->getOperand(1))
+      SRem = BO0;
+    // icmp Y, (srem X, Y)
+    else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+             Op0 == BO1->getOperand(1))
+      SRem = BO1;
+    if (SRem) {
+      // We don't check hasOneUse to avoid increasing register pressure because
+      // the value we use is the same value this instruction was already using.
+      switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+        default: break;
+        case ICmpInst::ICMP_EQ:
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+        case ICmpInst::ICMP_NE:
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+        case ICmpInst::ICMP_SGT:
+        case ICmpInst::ICMP_SGE:
+          return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+                              Constant::getAllOnesValue(SRem->getType()));
+        case ICmpInst::ICMP_SLT:
+        case ICmpInst::ICMP_SLE:
+          return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+                              Constant::getNullValue(SRem->getType()));
+      }
+    }
+
+    if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+        BO0->hasOneUse() && BO1->hasOneUse() &&
+        BO0->getOperand(1) == BO1->getOperand(1)) {
+      switch (BO0->getOpcode()) {
+      default: break;
+      case Instruction::Add:
+      case Instruction::Sub:
+      case Instruction::Xor:
+        if (I.isEquality())    // a+x icmp eq/ne b+x --> a icmp b
+          return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                              BO1->getOperand(0));
+        // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+          if (CI->getValue().isSignBit()) {
+            ICmpInst::Predicate Pred = I.isSigned()
+                                           ? I.getUnsignedPredicate()
+                                           : I.getSignedPredicate();
+            return new ICmpInst(Pred, BO0->getOperand(0),
+                                BO1->getOperand(0));
+          }
+          
+          if (CI->isMaxValue(true)) {
+            ICmpInst::Predicate Pred = I.isSigned()
+                                           ? I.getUnsignedPredicate()
+                                           : I.getSignedPredicate();
+            Pred = I.getSwappedPredicate(Pred);
+            return new ICmpInst(Pred, BO0->getOperand(0),
+                                BO1->getOperand(0));
+          }
+        }
+        break;
+      case Instruction::Mul:
+        if (!I.isEquality())
+          break;
+
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+          // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+          // Mask = -1 >> count-trailing-zeros(Cst).
+          if (!CI->isZero() && !CI->isOne()) {
+            const APInt &AP = CI->getValue();
+            ConstantInt *Mask = ConstantInt::get(I.getContext(), 
+                                    APInt::getLowBitsSet(AP.getBitWidth(),
+                                                         AP.getBitWidth() -
+                                                    AP.countTrailingZeros()));
+            Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+            Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+            return new ICmpInst(I.getPredicate(), And1, And2);
+          }
+        }
+        break;
+      case Instruction::UDiv:
+      case Instruction::LShr:
+        if (I.isSigned())
+          break;
+        // fall-through
+      case Instruction::SDiv:
+      case Instruction::AShr:
+        if (!BO0->isExact() || !BO1->isExact())
+          break;
+        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                            BO1->getOperand(0));
+      case Instruction::Shl: {
+        bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+        bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+        if (!NUW && !NSW)
+          break;
+        if (!NSW && I.isSigned())
+          break;
+        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                            BO1->getOperand(0));
+      }
+      }
+    }
+  }
+  
+  { Value *A, *B;
+    // ~x < ~y --> y < x
+    // ~x < cst --> ~cst < x
+    if (match(Op0, m_Not(m_Value(A)))) {
+      if (match(Op1, m_Not(m_Value(B))))
+        return new ICmpInst(I.getPredicate(), B, A);
+      if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+        return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+    }
+
+    // (a+b) <u a  --> llvm.uadd.with.overflow.
+    // (a+b) <u b  --> llvm.uadd.with.overflow.
+    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+        match(Op0, m_Add(m_Value(A), m_Value(B))) && 
+        (Op1 == A || Op1 == B))
+      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+        return R;
+                                 
+    // a >u (a+b)  --> llvm.uadd.with.overflow.
+    // b >u (a+b)  --> llvm.uadd.with.overflow.
+    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+        (Op0 == A || Op0 == B))
+      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+        return R;
+  }
+  
+  if (I.isEquality()) {
+    Value *A, *B, *C, *D;
+
+    if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+      if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
+        Value *OtherVal = A == Op1 ? B : A;
+        return new ICmpInst(I.getPredicate(), OtherVal,
+                            Constant::getNullValue(A->getType()));
+      }
+
+      if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+        // A^c1 == C^c2 --> A == C^(c1^c2)
+        ConstantInt *C1, *C2;
+        if (match(B, m_ConstantInt(C1)) &&
+            match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+          Constant *NC = ConstantInt::get(I.getContext(),
+                                          C1->getValue() ^ C2->getValue());
+          Value *Xor = Builder->CreateXor(C, NC, "tmp");
+          return new ICmpInst(I.getPredicate(), A, Xor);
+        }
+        
+        // A^B == A^D -> B == D
+        if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+        if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+        if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+        if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+      }
+    }
+    
+    if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+        (A == Op0 || B == Op0)) {
+      // A == (A^B)  ->  B == 0
+      Value *OtherVal = A == Op0 ? B : A;
+      return new ICmpInst(I.getPredicate(), OtherVal,
+                          Constant::getNullValue(A->getType()));
+    }
+
+    // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+    if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && 
+        match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+      Value *X = 0, *Y = 0, *Z = 0;
+      
+      if (A == C) {
+        X = B; Y = D; Z = A;
+      } else if (A == D) {
+        X = B; Y = C; Z = A;
+      } else if (B == C) {
+        X = A; Y = D; Z = B;
+      } else if (B == D) {
+        X = A; Y = C; Z = B;
+      }
+      
+      if (X) {   // Build (X^Y) & Z
+        Op1 = Builder->CreateXor(X, Y, "tmp");
+        Op1 = Builder->CreateAnd(Op1, Z, "tmp");
+        I.setOperand(0, Op1);
+        I.setOperand(1, Constant::getNullValue(Op1->getType()));
+        return &I;
+      }
+    }
+    
+    // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
+    // "icmp (and X, mask), cst"
+    uint64_t ShAmt = 0;
+    ConstantInt *Cst1;
+    if (Op0->hasOneUse() &&
+        match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
+                                           m_ConstantInt(ShAmt))))) &&
+        match(Op1, m_ConstantInt(Cst1)) &&
+        // Only do this when A has multiple uses.  This is most important to do
+        // when it exposes other optimizations.
+        !A->hasOneUse()) {
+      unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
+      
+      if (ShAmt < ASize) {
+        APInt MaskV =
+          APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
+        MaskV <<= ShAmt;
+        
+        APInt CmpV = Cst1->getValue().zext(ASize);
+        CmpV <<= ShAmt;
+        
+        Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
+        return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+      }
+    }
+  }
+  
+  {
+    Value *X; ConstantInt *Cst;
+    // icmp X+Cst, X
+    if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+      return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+
+    // icmp X, X+Cst
+    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+      return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+  }
+  return Changed ? &I : 0;
+}
+
+
+
+
+
+
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+                                                Instruction *LHSI,
+                                                Constant *RHSC) {
+  if (!isa<ConstantFP>(RHSC)) return 0;
+  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+  
+  // Get the width of the mantissa.  We don't want to hack on conversions that
+  // might lose information from the integer, e.g. "i64 -> float"
+  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+  if (MantissaWidth == -1) return 0;  // Unknown.
+  
+  // Check to see that the input is converted from an integer type that is small
+  // enough that preserves all bits.  TODO: check here for "known" sign bits.
+  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+  unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
+  
+  // If this is a uitofp instruction, we need an extra bit to hold the sign.
+  bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+  if (LHSUnsigned)
+    ++InputSize;
+  
+  // If the conversion would lose info, don't hack on this.
+  if ((int)InputSize > MantissaWidth)
+    return 0;
+  
+  // Otherwise, we can potentially simplify the comparison.  We know that it
+  // will always come through as an integer value and we know the constant is
+  // not a NAN (it would have been previously simplified).
+  assert(!RHS.isNaN() && "NaN comparison not already folded!");
+  
+  ICmpInst::Predicate Pred;
+  switch (I.getPredicate()) {
+  default: llvm_unreachable("Unexpected predicate!");
+  case FCmpInst::FCMP_UEQ:
+  case FCmpInst::FCMP_OEQ:
+    Pred = ICmpInst::ICMP_EQ;
+    break;
+  case FCmpInst::FCMP_UGT:
+  case FCmpInst::FCMP_OGT:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+    break;
+  case FCmpInst::FCMP_UGE:
+  case FCmpInst::FCMP_OGE:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+    break;
+  case FCmpInst::FCMP_ULT:
+  case FCmpInst::FCMP_OLT:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+    break;
+  case FCmpInst::FCMP_ULE:
+  case FCmpInst::FCMP_OLE:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+    break;
+  case FCmpInst::FCMP_UNE:
+  case FCmpInst::FCMP_ONE:
+    Pred = ICmpInst::ICMP_NE;
+    break;
+  case FCmpInst::FCMP_ORD:
+    return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+  case FCmpInst::FCMP_UNO:
+    return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+  }
+  
+  const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+  
+  // Now we know that the APFloat is a normal number, zero or inf.
+  
+  // See if the FP constant is too large for the integer.  For example,
+  // comparing an i8 to 300.0.
+  unsigned IntWidth = IntTy->getScalarSizeInBits();
+  
+  if (!LHSUnsigned) {
+    // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
+    // and large values.
+    APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+    SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+                          APFloat::rmNearestTiesToEven);
+    if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
+      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
+          Pred == ICmpInst::ICMP_SLE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  } else {
+    // If the RHS value is > UnsignedMax, fold the comparison. This handles
+    // +INF and large values.
+    APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+    UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+                          APFloat::rmNearestTiesToEven);
+    if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
+      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
+          Pred == ICmpInst::ICMP_ULE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  }
+  
+  if (!LHSUnsigned) {
+    // See if the RHS value is < SignedMin.
+    APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+    SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+                          APFloat::rmNearestTiesToEven);
+    if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+      if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+          Pred == ICmpInst::ICMP_SGE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  }
+
+  // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+  // [0, UMAX], but it may still be fractional.  See if it is fractional by
+  // casting the FP value to the integer value and back, checking for equality.
+  // Don't do this for zero, because -0.0 is not fractional.
+  Constant *RHSInt = LHSUnsigned
+    ? ConstantExpr::getFPToUI(RHSC, IntTy)
+    : ConstantExpr::getFPToSI(RHSC, IntTy);
+  if (!RHS.isZero()) {
+    bool Equal = LHSUnsigned
+      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+    if (!Equal) {
+      // If we had a comparison against a fractional value, we have to adjust
+      // the compare predicate and sometimes the value.  RHSC is rounded towards
+      // zero at this point.
+      switch (Pred) {
+      default: llvm_unreachable("Unexpected integer comparison!");
+      case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      case ICmpInst::ICMP_ULE:
+        // (float)int <= 4.4   --> int <= 4
+        // (float)int <= -4.4  --> false
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        break;
+      case ICmpInst::ICMP_SLE:
+        // (float)int <= 4.4   --> int <= 4
+        // (float)int <= -4.4  --> int < -4
+        if (RHS.isNegative())
+          Pred = ICmpInst::ICMP_SLT;
+        break;
+      case ICmpInst::ICMP_ULT:
+        // (float)int < -4.4   --> false
+        // (float)int < 4.4    --> int <= 4
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        Pred = ICmpInst::ICMP_ULE;
+        break;
+      case ICmpInst::ICMP_SLT:
+        // (float)int < -4.4   --> int < -4
+        // (float)int < 4.4    --> int <= 4
+        if (!RHS.isNegative())
+          Pred = ICmpInst::ICMP_SLE;
+        break;
+      case ICmpInst::ICMP_UGT:
+        // (float)int > 4.4    --> int > 4
+        // (float)int > -4.4   --> true
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        break;
+      case ICmpInst::ICMP_SGT:
+        // (float)int > 4.4    --> int > 4
+        // (float)int > -4.4   --> int >= -4
+        if (RHS.isNegative())
+          Pred = ICmpInst::ICMP_SGE;
+        break;
+      case ICmpInst::ICMP_UGE:
+        // (float)int >= -4.4   --> true
+        // (float)int >= 4.4    --> int > 4
+        if (!RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        Pred = ICmpInst::ICMP_UGT;
+        break;
+      case ICmpInst::ICMP_SGE:
+        // (float)int >= -4.4   --> int >= -4
+        // (float)int >= 4.4    --> int > 4
+        if (!RHS.isNegative())
+          Pred = ICmpInst::ICMP_SGT;
+        break;
+      }
+    }
+  }
+
+  // Lower this FP comparison into an appropriate integer version of the
+  // comparison.
+  return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+  bool Changed = false;
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+    I.swapOperands();
+    Changed = true;
+  }
+
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Simplify 'fcmp pred X, X'
+  if (Op0 == Op1) {
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Unknown predicate!");
+    case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
+    case FCmpInst::FCMP_ULT:    // True if unordered or less than
+    case FCmpInst::FCMP_UGT:    // True if unordered or greater than
+    case FCmpInst::FCMP_UNE:    // True if unordered or not equal
+      // Canonicalize these to be 'fcmp uno %X, 0.0'.
+      I.setPredicate(FCmpInst::FCMP_UNO);
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      return &I;
+      
+    case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
+    case FCmpInst::FCMP_OEQ:    // True if ordered and equal
+    case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
+    case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
+      // Canonicalize these to be 'fcmp ord %X, 0.0'.
+      I.setPredicate(FCmpInst::FCMP_ORD);
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      return &I;
+    }
+  }
+    
+  // Handle fcmp with constant RHS
+  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      switch (LHSI->getOpcode()) {
+      case Instruction::FPExt: {
+        // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
+        FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
+        ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
+        if (!RHSF)
+          break;
+
+        // We can't convert a PPC double double.
+        if (RHSF->getType()->isPPC_FP128Ty())
+          break;
+
+        const fltSemantics *Sem;
+        // FIXME: This shouldn't be here.
+        if (LHSExt->getSrcTy()->isFloatTy())
+          Sem = &APFloat::IEEEsingle;
+        else if (LHSExt->getSrcTy()->isDoubleTy())
+          Sem = &APFloat::IEEEdouble;
+        else if (LHSExt->getSrcTy()->isFP128Ty())
+          Sem = &APFloat::IEEEquad;
+        else if (LHSExt->getSrcTy()->isX86_FP80Ty())
+          Sem = &APFloat::x87DoubleExtended;
+        else
+          break;
+
+        bool Lossy;
+        APFloat F = RHSF->getValueAPF();
+        F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
+
+        // Avoid lossy conversions and denormals.
+        if (!Lossy &&
+            F.compare(APFloat::getSmallestNormalized(*Sem)) !=
+                                                           APFloat::cmpLessThan)
+          return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+                              ConstantFP::get(RHSC->getContext(), F));
+        break;
+      }
+      case Instruction::PHI:
+        // Only fold fcmp into the PHI if the phi and fcmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
+        break;
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+        if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+          return NV;
+        break;
+      case Instruction::Select: {
+        // If either operand of the select is a constant, we can fold the
+        // comparison into the select arms, which will cause one to be
+        // constant folded and the select turned into a bitwise or.
+        Value *Op1 = 0, *Op2 = 0;
+        if (LHSI->hasOneUse()) {
+          if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+            // Fold the known value into the constant operand.
+            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            // Insert a new FCmp of the other select operand.
+            Op2 = Builder->CreateFCmp(I.getPredicate(),
+                                      LHSI->getOperand(2), RHSC, I.getName());
+          } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+            // Fold the known value into the constant operand.
+            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            // Insert a new FCmp of the other select operand.
+            Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
+          }
+        }
+
+        if (Op1)
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+        break;
+      }
+      case Instruction::FSub: {
+        // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
+        Value *Op;
+        if (match(LHSI, m_FNeg(m_Value(Op))))
+          return new FCmpInst(I.getSwappedPredicate(), Op,
+                              ConstantExpr::getFNeg(RHSC));
+        break;
+      }
+      case Instruction::Load:
+        if (GetElementPtrInst *GEP =
+            dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+                !cast<LoadInst>(LHSI)->isVolatile())
+              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+                return Res;
+        }
+        break;
+      }
+  }
+
+  // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
+  Value *X, *Y;
+  if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+    return new FCmpInst(I.getSwappedPredicate(), X, Y);
+
+  // fcmp (fpext x), (fpext y) -> fcmp x, y
+  if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
+    if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
+      if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
+        return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+                            RHSExt->getOperand(0));
+
+  return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
new file mode 100644
index 000000000000..f499290fe876
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -0,0 +1,615 @@
+//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for load, store and alloca.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+  // Ensure that the alloca array size argument has type intptr_t, so that
+  // any casting is exposed early.
+  if (TD) {
+    const Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+    if (AI.getArraySize()->getType() != IntPtrTy) {
+      Value *V = Builder->CreateIntCast(AI.getArraySize(),
+                                        IntPtrTy, false);
+      AI.setOperand(0, V);
+      return &AI;
+    }
+  }
+
+  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
+  if (AI.isArrayAllocation()) {  // Check C != 1
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+      const Type *NewTy = 
+        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+      New->setAlignment(AI.getAlignment());
+
+      // Scan to the end of the allocation instructions, to skip over a block of
+      // allocas if possible...also skip interleaved debug info
+      //
+      BasicBlock::iterator It = New;
+      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+
+      // Now that I is pointing to the first non-allocation-inst in the block,
+      // insert our getelementptr instruction...
+      //
+      Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+      Value *Idx[2];
+      Idx[0] = NullIdx;
+      Idx[1] = NullIdx;
+      Instruction *GEP =
+           GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+                                             New->getName()+".sub");
+      InsertNewInstBefore(GEP, *It);
+
+      // Now make everything use the getelementptr instead of the original
+      // allocation.
+      return ReplaceInstUsesWith(AI, GEP);
+    } else if (isa<UndefValue>(AI.getArraySize())) {
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+    }
+  }
+
+  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+    // If alloca'ing a zero byte object, replace the alloca with a null pointer.
+    // Note that we only do this for alloca's, because malloc should allocate
+    // and return a unique pointer, even for a zero byte allocation.
+    if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+    // If the alignment is 0 (unspecified), assign it the preferred alignment.
+    if (AI.getAlignment() == 0)
+      AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+  }
+
+  return 0;
+}
+
+
+/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+                                        const TargetData *TD) {
+  User *CI = cast<User>(LI.getOperand(0));
+  Value *CastOp = CI->getOperand(0);
+
+  const PointerType *DestTy = cast<PointerType>(CI->getType());
+  const Type *DestPTy = DestTy->getElementType();
+  if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+    // If the address spaces don't match, don't eliminate the cast.
+    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+      return 0;
+
+    const Type *SrcPTy = SrcTy->getElementType();
+
+    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 
+         DestPTy->isVectorTy()) {
+      // If the source is an array, the code below will not succeed.  Check to
+      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
+      // constants.
+      if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
+          if (ASrcTy->getNumElements() != 0) {
+            Value *Idxs[2];
+            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
+            Idxs[1] = Idxs[0];
+            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+            SrcTy = cast<PointerType>(CastOp->getType());
+            SrcPTy = SrcTy->getElementType();
+          }
+
+      if (IC.getTargetData() &&
+          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 
+            SrcPTy->isVectorTy()) &&
+          // Do not allow turning this into a load of an integer, which is then
+          // casted to a pointer, this pessimizes pointer analysis a lot.
+          (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
+               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
+
+        // Okay, we are casting from one integer or pointer type to another of
+        // the same size.  Instead of casting the pointer before the load, cast
+        // the result of the loaded value.
+        LoadInst *NewLoad = 
+          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
+        NewLoad->setAlignment(LI.getAlignment());
+        // Now cast the result of the load.
+        return new BitCastInst(NewLoad, LI.getType());
+      }
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+  Value *Op = LI.getOperand(0);
+
+  // Attempt to improve the alignment.
+  if (TD) {
+    unsigned KnownAlign =
+      getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
+    unsigned LoadAlign = LI.getAlignment();
+    unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+      TD->getABITypeAlignment(LI.getType());
+
+    if (KnownAlign > EffectiveLoadAlign)
+      LI.setAlignment(KnownAlign);
+    else if (LoadAlign == 0)
+      LI.setAlignment(EffectiveLoadAlign);
+  }
+
+  // load (cast X) --> cast (load X) iff safe.
+  if (isa<CastInst>(Op))
+    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+      return Res;
+
+  // None of the following transforms are legal for volatile loads.
+  if (LI.isVolatile()) return 0;
+  
+  // Do really simple store-to-load forwarding and load CSE, to catch cases
+  // where there are several consecutive memory accesses to the same location,
+  // separated by a few arithmetic operations.
+  BasicBlock::iterator BBI = &LI;
+  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+    return ReplaceInstUsesWith(LI, AvailableVal);
+
+  // load(gep null, ...) -> unreachable
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    const Value *GEPI0 = GEPI->getOperand(0);
+    // TODO: Consider a target hook for valid address spaces for this xform.
+    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
+      // Insert a new store to null instruction before the load to indicate
+      // that this code is not reachable.  We do this instead of inserting
+      // an unreachable instruction directly because we cannot modify the
+      // CFG.
+      new StoreInst(UndefValue::get(LI.getType()),
+                    Constant::getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+    }
+  } 
+
+  // load null/undef -> unreachable
+  // TODO: Consider a target hook for valid address spaces for this xform.
+  if (isa<UndefValue>(Op) ||
+      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+    // Insert a new store to null instruction before the load to indicate that
+    // this code is not reachable.  We do this instead of inserting an
+    // unreachable instruction directly because we cannot modify the CFG.
+    new StoreInst(UndefValue::get(LI.getType()),
+                  Constant::getNullValue(Op->getType()), &LI);
+    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+  }
+
+  // Instcombine load (constantexpr_cast global) -> cast (load global)
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+        return Res;
+  
+  if (Op->hasOneUse()) {
+    // Change select and PHI nodes to select values instead of addresses: this
+    // helps alias analysis out a lot, allows many others simplifications, and
+    // exposes redundancy in the code.
+    //
+    // Note that we cannot do the transformation unless we know that the
+    // introduced loads cannot trap!  Something like this is valid as long as
+    // the condition is always false: load (select bool %C, int* null, int* %G),
+    // but it would not be valid if we transformed it to load from null
+    // unconditionally.
+    //
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
+      unsigned Align = LI.getAlignment();
+      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
+          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
+        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
+                                           SI->getOperand(1)->getName()+".val");
+        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
+                                           SI->getOperand(2)->getName()+".val");
+        V1->setAlignment(Align);
+        V2->setAlignment(Align);
+        return SelectInst::Create(SI->getCondition(), V1, V2);
+      }
+
+      // load (select (cond, null, P)) -> load P
+      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
+        if (C->isNullValue()) {
+          LI.setOperand(0, SI->getOperand(2));
+          return &LI;
+        }
+
+      // load (select (cond, P, null)) -> load P
+      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
+        if (C->isNullValue()) {
+          LI.setOperand(0, SI->getOperand(1));
+          return &LI;
+        }
+    }
+  }
+  return 0;
+}
+
+/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
+/// when possible.  This makes it generally easy to do alias analysis and/or
+/// SROA/mem2reg of the memory object.
+static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
+  User *CI = cast<User>(SI.getOperand(1));
+  Value *CastOp = CI->getOperand(0);
+
+  const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+  const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+  if (SrcTy == 0) return 0;
+  
+  const Type *SrcPTy = SrcTy->getElementType();
+
+  if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
+    return 0;
+  
+  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
+  /// to its first element.  This allows us to handle things like:
+  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
+  /// on 32-bit hosts.
+  SmallVector<Value*, 4> NewGEPIndices;
+  
+  // If the source is an array, the code below will not succeed.  Check to
+  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
+  // constants.
+  if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
+    // Index through pointer.
+    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
+    NewGEPIndices.push_back(Zero);
+    
+    while (1) {
+      if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+        if (!STy->getNumElements()) /* Struct can be empty {} */
+          break;
+        NewGEPIndices.push_back(Zero);
+        SrcPTy = STy->getElementType(0);
+      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+        NewGEPIndices.push_back(Zero);
+        SrcPTy = ATy->getElementType();
+      } else {
+        break;
+      }
+    }
+    
+    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+  }
+
+  if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
+    return 0;
+  
+  // If the pointers point into different address spaces or if they point to
+  // values with different sizes, we can't do the transformation.
+  if (!IC.getTargetData() ||
+      SrcTy->getAddressSpace() != 
+        cast<PointerType>(CI->getType())->getAddressSpace() ||
+      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
+      IC.getTargetData()->getTypeSizeInBits(DestPTy))
+    return 0;
+
+  // Okay, we are casting from one integer or pointer type to another of
+  // the same size.  Instead of casting the pointer before 
+  // the store, cast the value to be stored.
+  Value *NewCast;
+  Value *SIOp0 = SI.getOperand(0);
+  Instruction::CastOps opcode = Instruction::BitCast;
+  const Type* CastSrcTy = SIOp0->getType();
+  const Type* CastDstTy = SrcPTy;
+  if (CastDstTy->isPointerTy()) {
+    if (CastSrcTy->isIntegerTy())
+      opcode = Instruction::IntToPtr;
+  } else if (CastDstTy->isIntegerTy()) {
+    if (SIOp0->getType()->isPointerTy())
+      opcode = Instruction::PtrToInt;
+  }
+  
+  // SIOp0 is a pointer to aggregate and this is a store to the first field,
+  // emit a GEP to index into its first field.
+  if (!NewGEPIndices.empty())
+    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+                                           NewGEPIndices.end());
+  
+  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+                                   SIOp0->getName()+".c");
+  SI.setOperand(0, NewCast);
+  SI.setOperand(1, CastOp);
+  return &SI;
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+///   %t0 = getelementptr \@a, 0, 3
+///   store i32 0, i32* %t0
+///   %t1 = getelementptr \@a, 0, 3
+///   %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+  // Test if the values are trivially equivalent.
+  if (A == B) return true;
+  
+  // Test if the values come form identical arithmetic instructions.
+  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+  // its only used to compare two uses within the same basic block, which
+  // means that they'll always either have the same value or one of them
+  // will have an undefined value.
+  if (isa<BinaryOperator>(A) ||
+      isa<CastInst>(A) ||
+      isa<PHINode>(A) ||
+      isa<GetElementPtrInst>(A))
+    if (Instruction *BI = dyn_cast<Instruction>(B))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+        return true;
+  
+  // Otherwise they may not be equivalent.
+  return false;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+  Value *Val = SI.getOperand(0);
+  Value *Ptr = SI.getOperand(1);
+
+  // If the RHS is an alloca with a single use, zapify the store, making the
+  // alloca dead.
+  if (!SI.isVolatile()) {
+    if (Ptr->hasOneUse()) {
+      if (isa<AllocaInst>(Ptr)) 
+        return EraseInstFromFunction(SI);
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+        if (isa<AllocaInst>(GEP->getOperand(0))) {
+          if (GEP->getOperand(0)->hasOneUse())
+            return EraseInstFromFunction(SI);
+        }
+      }
+    }
+  }
+
+  // Attempt to improve the alignment.
+  if (TD) {
+    unsigned KnownAlign =
+      getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
+                                 TD);
+    unsigned StoreAlign = SI.getAlignment();
+    unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+      TD->getABITypeAlignment(Val->getType());
+
+    if (KnownAlign > EffectiveStoreAlign)
+      SI.setAlignment(KnownAlign);
+    else if (StoreAlign == 0)
+      SI.setAlignment(EffectiveStoreAlign);
+  }
+
+  // Do really simple DSE, to catch cases where there are several consecutive
+  // stores to the same location, separated by a few arithmetic operations. This
+  // situation often occurs with bitfield accesses.
+  BasicBlock::iterator BBI = &SI;
+  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+       --ScanInsts) {
+    --BBI;
+    // Don't count debug info directives, lest they affect codegen,
+    // and we skip pointer-to-pointer bitcasts, which are NOPs.
+    if (isa<DbgInfoIntrinsic>(BBI) ||
+        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+      ScanInsts++;
+      continue;
+    }    
+    
+    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+      // Prev store isn't volatile, and stores to the same location?
+      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
+                                                          SI.getOperand(1))) {
+        ++NumDeadStore;
+        ++BBI;
+        EraseInstFromFunction(*PrevSI);
+        continue;
+      }
+      break;
+    }
+    
+    // If this is a load, we have to stop.  However, if the loaded value is from
+    // the pointer we're loading and is producing the pointer we're storing,
+    // then *this* store is dead (X = load P; store X -> P).
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
+          !SI.isVolatile())
+        return EraseInstFromFunction(SI);
+      
+      // Otherwise, this is a load from some other location.  Stores before it
+      // may not be dead.
+      break;
+    }
+    
+    // Don't skip over loads or things that can modify memory.
+    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+      break;
+  }
+  
+  
+  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
+
+  // store X, null    -> turns into 'unreachable' in SimplifyCFG
+  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
+    if (!isa<UndefValue>(Val)) {
+      SI.setOperand(0, UndefValue::get(Val->getType()));
+      if (Instruction *U = dyn_cast<Instruction>(Val))
+        Worklist.Add(U);  // Dropped a use.
+    }
+    return 0;  // Do not modify these!
+  }
+
+  // store undef, Ptr -> noop
+  if (isa<UndefValue>(Val))
+    return EraseInstFromFunction(SI);
+
+  // If the pointer destination is a cast, see if we can fold the cast into the
+  // source instead.
+  if (isa<CastInst>(Ptr))
+    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+      return Res;
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+        return Res;
+
+  
+  // If this store is the last instruction in the basic block (possibly
+  // excepting debug info instructions), and if the block ends with an
+  // unconditional branch, try to move it to the successor block.
+  BBI = &SI; 
+  do {
+    ++BBI;
+  } while (isa<DbgInfoIntrinsic>(BBI) ||
+           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+    if (BI->isUnconditional())
+      if (SimplifyStoreAtEndOfBlock(SI))
+        return 0;  // xform done!
+  
+  return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+///   if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+///   *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+  BasicBlock *StoreBB = SI.getParent();
+  
+  // Check to see if the successor block has exactly two incoming edges.  If
+  // so, see if the other predecessor contains a store to the same location.
+  // if so, insert a PHI node (if needed) and move the stores down.
+  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+  
+  // Determine whether Dest has exactly two predecessors and, if so, compute
+  // the other predecessor.
+  pred_iterator PI = pred_begin(DestBB);
+  BasicBlock *P = *PI;
+  BasicBlock *OtherBB = 0;
+
+  if (P != StoreBB)
+    OtherBB = P;
+
+  if (++PI == pred_end(DestBB))
+    return false;
+  
+  P = *PI;
+  if (P != StoreBB) {
+    if (OtherBB)
+      return false;
+    OtherBB = P;
+  }
+  if (++PI != pred_end(DestBB))
+    return false;
+
+  // Bail out if all the relevant blocks aren't distinct (this can happen,
+  // for example, if SI is in an infinite loop)
+  if (StoreBB == DestBB || OtherBB == DestBB)
+    return false;
+
+  // Verify that the other block ends in a branch and is not otherwise empty.
+  BasicBlock::iterator BBI = OtherBB->getTerminator();
+  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+  if (!OtherBr || BBI == OtherBB->begin())
+    return false;
+  
+  // If the other block ends in an unconditional branch, check for the 'if then
+  // else' case.  there is an instruction before the branch.
+  StoreInst *OtherStore = 0;
+  if (OtherBr->isUnconditional()) {
+    --BBI;
+    // Skip over debugging info.
+    while (isa<DbgInfoIntrinsic>(BBI) ||
+           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+      if (BBI==OtherBB->begin())
+        return false;
+      --BBI;
+    }
+    // If this isn't a store, isn't a store to the same location, or if the
+    // alignments differ, bail out.
+    OtherStore = dyn_cast<StoreInst>(BBI);
+    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+        OtherStore->getAlignment() != SI.getAlignment())
+      return false;
+  } else {
+    // Otherwise, the other block ended with a conditional branch. If one of the
+    // destinations is StoreBB, then we have the if/then case.
+    if (OtherBr->getSuccessor(0) != StoreBB && 
+        OtherBr->getSuccessor(1) != StoreBB)
+      return false;
+    
+    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+    // if/then triangle.  See if there is a store to the same ptr as SI that
+    // lives in OtherBB.
+    for (;; --BBI) {
+      // Check to see if we find the matching store.
+      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+            OtherStore->getAlignment() != SI.getAlignment())
+          return false;
+        break;
+      }
+      // If we find something that may be using or overwriting the stored
+      // value, or if we run out of instructions, we can't do the xform.
+      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+          BBI == OtherBB->begin())
+        return false;
+    }
+    
+    // In order to eliminate the store in OtherBr, we have to
+    // make sure nothing reads or overwrites the stored value in
+    // StoreBB.
+    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+      // FIXME: This should really be AA driven.
+      if (I->mayReadFromMemory() || I->mayWriteToMemory())
+        return false;
+    }
+  }
+  
+  // Insert a PHI node now if we need it.
+  Value *MergedVal = OtherStore->getOperand(0);
+  if (MergedVal != SI.getOperand(0)) {
+    PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
+    PN->addIncoming(SI.getOperand(0), SI.getParent());
+    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+    MergedVal = InsertNewInstBefore(PN, DestBB->front());
+  }
+  
+  // Advance to a place where it is safe to insert the new store and
+  // insert it.
+  BBI = DestBB->getFirstNonPHI();
+  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
+                                   OtherStore->isVolatile(),
+                                   SI.getAlignment());
+  InsertNewInstBefore(NewSI, *BBI);
+  NewSI->setDebugLoc(OtherStore->getDebugLoc()); 
+
+  // Nuke the old stores.
+  EraseInstFromFunction(SI);
+  EraseInstFromFunction(*OtherStore);
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
new file mode 100644
index 000000000000..630a6fee3990
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -0,0 +1,731 @@
+//===- InstCombineMulDivRem.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
+// srem, urem, frem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// simplifyValueKnownNonZero - The specific integer value is used in a context
+/// where it is known to be non-zero.  If this allows us to simplify the
+/// computation, do so and return the new operand, otherwise return null.
+static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
+  // If V has multiple uses, then we would have to do more analysis to determine
+  // if this is safe.  For example, the use could be in dynamically unreached
+  // code.
+  if (!V->hasOneUse()) return 0;
+  
+  bool MadeChange = false;
+
+  // ((1 << A) >>u B) --> (1 << (A-B))
+  // Because V cannot be zero, we know that B is less than A.
+  Value *A = 0, *B = 0, *PowerOf2 = 0;
+  if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
+                      m_Value(B))) &&
+      // The "1" can be any value known to be a power of 2.
+      isPowerOfTwo(PowerOf2, IC.getTargetData())) {
+    A = IC.Builder->CreateSub(A, B, "tmp");
+    return IC.Builder->CreateShl(PowerOf2, A);
+  }
+  
+  // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
+  // inexact.  Similarly for <<.
+  if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
+    if (I->isLogicalShift() &&
+        isPowerOfTwo(I->getOperand(0), IC.getTargetData())) {
+      // We know that this is an exact/nuw shift and that the input is a
+      // non-zero context as well.
+      if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
+        I->setOperand(0, V2);
+        MadeChange = true;
+      }
+      
+      if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
+        I->setIsExact();
+        MadeChange = true;
+      }
+      
+      if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
+        I->setHasNoUnsignedWrap();
+        MadeChange = true;
+      }
+    }
+
+  // TODO: Lots more we could do here:
+  //    If V is a phi node, we can call this on each of its operands.
+  //    "select cond, X, 0" can simplify to "X".
+  
+  return MadeChange ? V : 0;
+}
+
+
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+  uint32_t W = C1->getBitWidth();
+  APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+  if (sign) {
+    LHSExt = LHSExt.sext(W * 2);
+    RHSExt = RHSExt.sext(W * 2);
+  } else {
+    LHSExt = LHSExt.zext(W * 2);
+    RHSExt = RHSExt.zext(W * 2);
+  }
+  
+  APInt MulExt = LHSExt * RHSExt;
+  
+  if (!sign)
+    return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+  
+  APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+  APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+  return MulExt.slt(Min) || MulExt.sgt(Max);
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
+    return BinaryOperator::CreateNeg(Op0, I.getName());
+  
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    
+    // ((X << C1)*C2) == (X * (C2 << C1))
+    if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+      if (SI->getOpcode() == Instruction::Shl)
+        if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+          return BinaryOperator::CreateMul(SI->getOperand(0),
+                                           ConstantExpr::getShl(CI, ShOp));
+    
+    const APInt &Val = CI->getValue();
+    if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
+      Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
+      BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
+      if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+      if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+      return Shl;
+    }
+    
+    // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+    { Value *X; ConstantInt *C1;
+      if (Op0->hasOneUse() &&
+          match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
+        Value *Add = Builder->CreateMul(X, CI, "tmp");
+        return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
+      }
+    }
+
+    // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n
+    // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n
+    // The "* (2**n)" thus becomes a potential shifting opportunity.
+    {
+      const APInt &   Val = CI->getValue();
+      const APInt &PosVal = Val.abs();
+      if (Val.isNegative() && PosVal.isPowerOf2()) {
+        Value *X = 0, *Y = 0;
+        if (Op0->hasOneUse()) {
+          ConstantInt *C1;
+          Value *Sub = 0;
+          if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
+            Sub = Builder->CreateSub(X, Y, "suba");
+          else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
+            Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc");
+          if (Sub)
+            return
+              BinaryOperator::CreateMul(Sub,
+                                        ConstantInt::get(Y->getType(), PosVal));
+        }
+      }
+    }
+  }
+  
+  // Simplify mul instructions with a constant RHS.
+  if (isa<Constant>(Op1)) {    
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castNegVal(Op1))
+      return BinaryOperator::CreateMul(Op0v, Op1v);
+
+  // (X / Y) *  Y = X - (X % Y)
+  // (X / Y) * -Y = (X % Y) - X
+  {
+    Value *Op1C = Op1;
+    BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+    if (!BO ||
+        (BO->getOpcode() != Instruction::UDiv && 
+         BO->getOpcode() != Instruction::SDiv)) {
+      Op1C = Op0;
+      BO = dyn_cast<BinaryOperator>(Op1);
+    }
+    Value *Neg = dyn_castNegVal(Op1C);
+    if (BO && BO->hasOneUse() &&
+        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
+        (BO->getOpcode() == Instruction::UDiv ||
+         BO->getOpcode() == Instruction::SDiv)) {
+      Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+      // If the division is exact, X % Y is zero, so we end up with X or -X.
+      if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
+        if (SDiv->isExact()) {
+          if (Op1BO == Op1C)
+            return ReplaceInstUsesWith(I, Op0BO);
+          return BinaryOperator::CreateNeg(Op0BO);
+        }
+
+      Value *Rem;
+      if (BO->getOpcode() == Instruction::UDiv)
+        Rem = Builder->CreateURem(Op0BO, Op1BO);
+      else
+        Rem = Builder->CreateSRem(Op0BO, Op1BO);
+      Rem->takeName(BO);
+
+      if (Op1BO == Op1C)
+        return BinaryOperator::CreateSub(Op0BO, Rem);
+      return BinaryOperator::CreateSub(Rem, Op0BO);
+    }
+  }
+
+  /// i1 mul -> i1 and.
+  if (I.getType()->isIntegerTy(1))
+    return BinaryOperator::CreateAnd(Op0, Op1);
+
+  // X*(1 << Y) --> X << Y
+  // (1 << Y)*X --> X << Y
+  {
+    Value *Y;
+    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op1, Y);
+    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op0, Y);
+  }
+  
+  // If one of the operands of the multiply is a cast from a boolean value, then
+  // we know the bool is either zero or one, so this is a 'masking' multiply.
+  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
+  if (!I.getType()->isVectorTy()) {
+    // -2 is "-1 << 1" so it is all bits set except the low one.
+    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+    
+    Value *BoolCast = 0, *OtherOp = 0;
+    if (MaskedValueIsZero(Op0, Negative2))
+      BoolCast = Op0, OtherOp = Op1;
+    else if (MaskedValueIsZero(Op1, Negative2))
+      BoolCast = Op1, OtherOp = Op0;
+
+    if (BoolCast) {
+      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+                                    BoolCast, "tmp");
+      return BinaryOperator::CreateAnd(V, OtherOp);
+    }
+  }
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Simplify mul instructions with a constant RHS...
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
+      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
+      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+      if (Op1F->isExactlyValue(1.0))
+        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'fmul double %X, 1.0'
+    } else if (Op1C->getType()->isVectorTy()) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
+        // As above, vector X*splat(1.0) -> X in all defined cases.
+        if (Constant *Splat = Op1V->getSplatValue()) {
+          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+            if (F->isExactlyValue(1.0))
+              return ReplaceInstUsesWith(I, Op0);
+        }
+      }
+    }
+
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(Op1))
+      return BinaryOperator::CreateFMul(Op0v, Op1v);
+
+  return Changed ? &I : 0;
+}
+
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+  SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+  
+  // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+  int NonNullOperand = -1;
+  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+    if (ST->isNullValue())
+      NonNullOperand = 2;
+  // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+    if (ST->isNullValue())
+      NonNullOperand = 1;
+  
+  if (NonNullOperand == -1)
+    return false;
+  
+  Value *SelectCond = SI->getOperand(0);
+  
+  // Change the div/rem to use 'Y' instead of the select.
+  I.setOperand(1, SI->getOperand(NonNullOperand));
+  
+  // Okay, we know we replace the operand of the div/rem with 'Y' with no
+  // problem.  However, the select, or the condition of the select may have
+  // multiple uses.  Based on our knowledge that the operand must be non-zero,
+  // propagate the known value for the select into other uses of it, and
+  // propagate a known value of the condition into its other users.
+  
+  // If the select and condition only have a single use, don't bother with this,
+  // early exit.
+  if (SI->use_empty() && SelectCond->hasOneUse())
+    return true;
+  
+  // Scan the current block backward, looking for other uses of SI.
+  BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+  
+  while (BBI != BBFront) {
+    --BBI;
+    // If we found a call to a function, we can't assume it will return, so
+    // information from below it cannot be propagated above it.
+    if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+      break;
+    
+    // Replace uses of the select or its condition with the known values.
+    for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+         I != E; ++I) {
+      if (*I == SI) {
+        *I = SI->getOperand(NonNullOperand);
+        Worklist.Add(BBI);
+      } else if (*I == SelectCond) {
+        *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
+                                   ConstantInt::getFalse(BBI->getContext());
+        Worklist.Add(BBI);
+      }
+    }
+    
+    // If we past the instruction, quit looking for it.
+    if (&*BBI == SI)
+      SI = 0;
+    if (&*BBI == SelectCond)
+      SelectCond = 0;
+    
+    // If we ran out of things to eliminate, break out of the loop.
+    if (SelectCond == 0 && SI == 0)
+      break;
+    
+  }
+  return true;
+}
+
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // The RHS is known non-zero.
+  if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+    I.setOperand(1, V);
+    return &I;
+  }
+  
+  // Handle cases involving: [su]div X, (select Cond, Y, Z)
+  // This does not apply for fdiv.
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // (X / C1) / C2  -> X / (C1*C2)
+    if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+      if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
+        if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
+          if (MultiplyOverflows(RHS, LHSRHS,
+                                I.getOpcode()==Instruction::SDiv))
+            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+          return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+                                        ConstantExpr::getMul(RHS, LHSRHS));
+        }
+
+    if (!RHS->isZero()) { // avoid X udiv 0
+      if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+        if (Instruction *R = FoldOpIntoSelect(I, SI))
+          return R;
+      if (isa<PHINode>(Op0))
+        if (Instruction *NV = FoldOpIntoPhi(I))
+          return NV;
+    }
+  }
+
+  // See if we can fold away this div instruction.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+  Value *X = 0, *Z = 0;
+  if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+    bool isSigned = I.getOpcode() == Instruction::SDiv;
+    if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+        (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+      return BinaryOperator::Create(I.getOpcode(), X, Op1);
+  }
+
+  return 0;
+}
+
+/// dyn_castZExtVal - Checks if V is a zext or constant that can
+/// be truncated to Ty without losing bits.
+static Value *dyn_castZExtVal(Value *V, const Type *Ty) {
+  if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
+    if (Z->getSrcTy() == Ty)
+      return Z->getOperand(0);
+  } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+    if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
+      return ConstantExpr::getTrunc(C, Ty);
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
+
+  if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+    // X udiv 2^C -> X >> C
+    // Check to see if this is an unsigned division with an exact power of 2,
+    // if so, convert to a right shift.
+    if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+      BinaryOperator *LShr =
+        BinaryOperator::CreateLShr(Op0, 
+            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+      if (I.isExact()) LShr->setIsExact();
+      return LShr;
+    }
+
+    // X udiv C, where C >= signbit
+    if (C->getValue().isNegative()) {
+      Value *IC = Builder->CreateICmpULT(Op0, C);
+      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+                                ConstantInt::get(I.getType(), 1));
+    }
+  }
+
+  // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
+  { const APInt *CI; Value *N;
+    if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
+      if (*CI != 1)
+        N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
+                               "tmp");
+      if (I.isExact())
+        return BinaryOperator::CreateExactLShr(Op0, N);
+      return BinaryOperator::CreateLShr(Op0, N);
+    }
+  }
+  
+  // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+  // where C1&C2 are powers of two.
+  { Value *Cond; const APInt *C1, *C2;
+    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+      // Construct the "on true" case of the select
+      Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+                                       I.isExact());
+  
+      // Construct the "on false" case of the select
+      Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+                                       I.isExact());
+      
+      // construct the select instruction and return it.
+      return SelectInst::Create(Cond, TSI, FSI);
+    }
+  }
+
+  // (zext A) udiv (zext B) --> zext (A udiv B)
+  if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+    if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+      return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div",
+                                              I.isExact()),
+                          I.getType());
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // sdiv X, -1 == -X
+    if (RHS->isAllOnesValue())
+      return BinaryOperator::CreateNeg(Op0);
+
+    // sdiv X, C  -->  ashr exact X, log2(C)
+    if (I.isExact() && RHS->getValue().isNonNegative() &&
+        RHS->getValue().isPowerOf2()) {
+      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+                                            RHS->getValue().exactLogBase2());
+      return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
+    }
+
+    // -X/C  -->  X/-C  provided the negation doesn't overflow.
+    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+      if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
+        return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+                                          ConstantExpr::getNeg(RHS));
+  }
+
+  // If the sign bits of both operands are zero (i.e. we can prove they are
+  // unsigned inputs), turn this into a udiv.
+  if (I.getType()->isIntegerTy()) {
+    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+    if (MaskedValueIsZero(Op0, Mask)) {
+      if (MaskedValueIsZero(Op1, Mask)) {
+        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+      
+      if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+        // Safe because the only negative value (1 << Y) can take on is
+        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+        // the sign bit set.
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+    }
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+    const APFloat &Op1F = Op1C->getValueAPF();
+
+    // If the divisor has an exact multiplicative inverse we can turn the fdiv
+    // into a cheaper fmul.
+    APFloat Reciprocal(Op1F.getSemantics());
+    if (Op1F.getExactInverse(&Reciprocal)) {
+      ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal);
+      return BinaryOperator::CreateFMul(Op0, RFP);
+    }
+  }
+
+  return 0;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // The RHS is known non-zero.
+  if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+    I.setOperand(1, V);
+    return &I;
+  }
+
+  // Handle cases involving: rem X, (select Cond, Y, Z)
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
+
+  if (isa<ConstantInt>(Op1)) {
+    if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+      if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+        if (Instruction *R = FoldOpIntoSelect(I, SI))
+          return R;
+      } else if (isa<PHINode>(Op0I)) {
+        if (Instruction *NV = FoldOpIntoPhi(I))
+          return NV;
+      }
+
+      // See if we can fold away this rem instruction.
+      if (SimplifyDemandedInstructionBits(I))
+        return &I;
+    }
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyURemInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (Instruction *common = commonIRemTransforms(I))
+    return common;
+  
+  // X urem C^2 -> X and C-1
+  { const APInt *C;
+    if (match(Op1, m_Power2(C)))
+      return BinaryOperator::CreateAnd(Op0,
+                                       ConstantInt::get(I.getType(), *C-1));
+  }
+
+  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
+  if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+    Constant *N1 = Constant::getAllOnesValue(I.getType());
+    Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+    return BinaryOperator::CreateAnd(Op0, Add);
+  }
+
+  // urem X, (select Cond, 2^C1, 2^C2) -->
+  //    select Cond, (and X, C1-1), (and X, C2-1)
+  // when C1&C2 are powers of two.
+  { Value *Cond; const APInt *C1, *C2;
+    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+      Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
+      Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
+      return SelectInst::Create(Cond, TrueAnd, FalseAnd);
+    }
+  }
+
+  // (zext A) urem (zext B) --> zext (A urem B)
+  if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+    if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+      return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+                          I.getType());
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifySRemInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle the integer rem common cases
+  if (Instruction *Common = commonIRemTransforms(I))
+    return Common;
+  
+  if (Value *RHSNeg = dyn_castNegVal(Op1))
+    if (!isa<Constant>(RHSNeg) ||
+        (isa<ConstantInt>(RHSNeg) &&
+         cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
+      // X % -Y -> X % Y
+      Worklist.AddValue(I.getOperand(1));
+      I.setOperand(1, RHSNeg);
+      return &I;
+    }
+
+  // If the sign bits of both operands are zero (i.e. we can prove they are
+  // unsigned inputs), turn this into a urem.
+  if (I.getType()->isIntegerTy()) {
+    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+      // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+      return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+    }
+  }
+
+  // If it's a constant vector, flip any negative values positive.
+  if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
+    unsigned VWidth = RHSV->getNumOperands();
+
+    bool hasNegative = false;
+    for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
+      if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+        if (RHS->isNegative())
+          hasNegative = true;
+
+    if (hasNegative) {
+      std::vector<Constant *> Elts(VWidth);
+      for (unsigned i = 0; i != VWidth; ++i) {
+        if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+          if (RHS->isNegative())
+            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+          else
+            Elts[i] = RHS;
+        }
+      }
+
+      Constant *NewRHSV = ConstantVector::get(Elts);
+      if (NewRHSV != RHSV) {
+        Worklist.AddValue(I.getOperand(1));
+        I.setOperand(1, NewRHSV);
+        return &I;
+      }
+    }
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyFRemInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle cases involving: rem X, (select Cond, Y, Z)
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
new file mode 100644
index 000000000000..37773403490c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -0,0 +1,900 @@
+//===- InstCombinePHI.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitPHINode function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
+/// and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+  assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+  unsigned Opc = FirstInst->getOpcode();
+  Value *LHSVal = FirstInst->getOperand(0);
+  Value *RHSVal = FirstInst->getOperand(1);
+    
+  const Type *LHSType = LHSVal->getType();
+  const Type *RHSType = RHSVal->getType();
+  
+  bool isNUW = false, isNSW = false, isExact = false;
+  if (OverflowingBinaryOperator *BO =
+        dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+    isNUW = BO->hasNoUnsignedWrap();
+    isNSW = BO->hasNoSignedWrap();
+  } else if (PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(FirstInst))
+    isExact = PEO->isExact();
+  
+  // Scan to see if all operands are the same opcode, and all have one use.
+  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+    if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+        // Verify type of the LHS matches so we don't fold cmp's of different
+        // types.
+        I->getOperand(0)->getType() != LHSType ||
+        I->getOperand(1)->getType() != RHSType)
+      return 0;
+
+    // If they are CmpInst instructions, check their predicates
+    if (CmpInst *CI = dyn_cast<CmpInst>(I))
+      if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
+        return 0;
+    
+    if (isNUW)
+      isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+    if (isNSW)
+      isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    if (isExact)
+      isExact = cast<PossiblyExactOperator>(I)->isExact();
+    
+    // Keep track of which operand needs a phi node.
+    if (I->getOperand(0) != LHSVal) LHSVal = 0;
+    if (I->getOperand(1) != RHSVal) RHSVal = 0;
+  }
+
+  // If both LHS and RHS would need a PHI, don't do this transformation,
+  // because it would increase the number of PHIs entering the block,
+  // which leads to higher register pressure. This is especially
+  // bad when the PHIs are in the header of a loop.
+  if (!LHSVal && !RHSVal)
+    return 0;
+  
+  // Otherwise, this is safe to transform!
+  
+  Value *InLHS = FirstInst->getOperand(0);
+  Value *InRHS = FirstInst->getOperand(1);
+  PHINode *NewLHS = 0, *NewRHS = 0;
+  if (LHSVal == 0) {
+    NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
+                             FirstInst->getOperand(0)->getName() + ".pn");
+    NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+    InsertNewInstBefore(NewLHS, PN);
+    LHSVal = NewLHS;
+  }
+  
+  if (RHSVal == 0) {
+    NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
+                             FirstInst->getOperand(1)->getName() + ".pn");
+    NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+    InsertNewInstBefore(NewRHS, PN);
+    RHSVal = NewRHS;
+  }
+  
+  // Add all operands to the new PHIs.
+  if (NewLHS || NewRHS) {
+    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+      Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+      if (NewLHS) {
+        Value *NewInLHS = InInst->getOperand(0);
+        NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+      }
+      if (NewRHS) {
+        Value *NewInRHS = InInst->getOperand(1);
+        NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+      }
+    }
+  }
+    
+  if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
+    CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                                     LHSVal, RHSVal);
+    NewCI->setDebugLoc(FirstInst->getDebugLoc());
+    return NewCI;
+  }
+
+  BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+  BinaryOperator *NewBinOp =
+    BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+  if (isNUW) NewBinOp->setHasNoUnsignedWrap();
+  if (isNSW) NewBinOp->setHasNoSignedWrap();
+  if (isExact) NewBinOp->setIsExact();
+  NewBinOp->setDebugLoc(FirstInst->getDebugLoc());
+  return NewBinOp;
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+  GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+  
+  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 
+                                        FirstInst->op_end());
+  // This is true if all GEP bases are allocas and if all indices into them are
+  // constants.
+  bool AllBasePointersAreAllocas = true;
+
+  // We don't want to replace this phi if the replacement would require
+  // more than one phi, which leads to higher register pressure. This is
+  // especially bad when the PHIs are in the header of a loop.
+  bool NeededPhi = false;
+  
+  bool AllInBounds = true;
+  
+  // Scan to see if all operands are the same opcode, and all have one use.
+  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+    GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+    if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+      GEP->getNumOperands() != FirstInst->getNumOperands())
+      return 0;
+
+    AllInBounds &= GEP->isInBounds();
+    
+    // Keep track of whether or not all GEPs are of alloca pointers.
+    if (AllBasePointersAreAllocas &&
+        (!isa<AllocaInst>(GEP->getOperand(0)) ||
+         !GEP->hasAllConstantIndices()))
+      AllBasePointersAreAllocas = false;
+    
+    // Compare the operand lists.
+    for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+      if (FirstInst->getOperand(op) == GEP->getOperand(op))
+        continue;
+      
+      // Don't merge two GEPs when two operands differ (introducing phi nodes)
+      // if one of the PHIs has a constant for the index.  The index may be
+      // substantially cheaper to compute for the constants, so making it a
+      // variable index could pessimize the path.  This also handles the case
+      // for struct indices, which must always be constant.
+      if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+          isa<ConstantInt>(GEP->getOperand(op)))
+        return 0;
+      
+      if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+        return 0;
+
+      // If we already needed a PHI for an earlier operand, and another operand
+      // also requires a PHI, we'd be introducing more PHIs than we're
+      // eliminating, which increases register pressure on entry to the PHI's
+      // block.
+      if (NeededPhi)
+        return 0;
+
+      FixedOperands[op] = 0;  // Needs a PHI.
+      NeededPhi = true;
+    }
+  }
+  
+  // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+  // bother doing this transformation.  At best, this will just save a bit of
+  // offset calculation, but all the predecessors will have to materialize the
+  // stack address into a register anyway.  We'd actually rather *clone* the
+  // load up into the predecessors so that we have a load of a gep of an alloca,
+  // which can usually all be folded into the load.
+  if (AllBasePointersAreAllocas)
+    return 0;
+  
+  // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
+  // that is variable.
+  SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+  
+  bool HasAnyPHIs = false;
+  for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+    if (FixedOperands[i]) continue;  // operand doesn't need a phi.
+    Value *FirstOp = FirstInst->getOperand(i);
+    PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
+                                     FirstOp->getName()+".pn");
+    InsertNewInstBefore(NewPN, PN);
+    
+    NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+    OperandPhis[i] = NewPN;
+    FixedOperands[i] = NewPN;
+    HasAnyPHIs = true;
+  }
+
+  
+  // Add all operands to the new PHIs.
+  if (HasAnyPHIs) {
+    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+      GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+      BasicBlock *InBB = PN.getIncomingBlock(i);
+      
+      for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+        if (PHINode *OpPhi = OperandPhis[op])
+          OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+    }
+  }
+  
+  Value *Base = FixedOperands[0];
+  GetElementPtrInst *NewGEP = 
+    GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+                              FixedOperands.end());
+  if (AllInBounds) NewGEP->setIsInBounds();
+  NewGEP->setDebugLoc(FirstInst->getDebugLoc());
+  return NewGEP;
+}
+
+
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it.  This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targeting a
+/// non-address-taken alloca.  Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+  BasicBlock::iterator BBI = L, E = L->getParent()->end();
+  
+  for (++BBI; BBI != E; ++BBI)
+    if (BBI->mayWriteToMemory())
+      return false;
+  
+  // Check for non-address taken alloca.  If not address-taken already, it isn't
+  // profitable to do this xform.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+    bool isAddressTaken = false;
+    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+         UI != E; ++UI) {
+      User *U = *UI;
+      if (isa<LoadInst>(U)) continue;
+      if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+        // If storing TO the alloca, then the address isn't taken.
+        if (SI->getOperand(1) == AI) continue;
+      }
+      isAddressTaken = true;
+      break;
+    }
+    
+    if (!isAddressTaken && AI->isStaticAlloca())
+      return false;
+  }
+  
+  // If this load is a load from a GEP with a constant offset from an alloca,
+  // then we don't want to sink it.  In its present form, it will be
+  // load [constant stack offset].  Sinking it will cause us to have to
+  // materialize the stack addresses in each predecessor in a register only to
+  // do a shared load from register in the successor.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+      if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+        return false;
+  
+  return true;
+}
+
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+  LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+  
+  // When processing loads, we need to propagate two bits of information to the
+  // sunk load: whether it is volatile, and what its alignment is.  We currently
+  // don't sink loads when some have their alignment specified and some don't.
+  // visitLoadInst will propagate an alignment onto the load when TD is around,
+  // and if TD isn't around, we can't handle the mixed case.
+  bool isVolatile = FirstLI->isVolatile();
+  unsigned LoadAlignment = FirstLI->getAlignment();
+  unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+  
+  // We can't sink the load if the loaded value could be modified between the
+  // load and the PHI.
+  if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+      !isSafeAndProfitableToSinkLoad(FirstLI))
+    return 0;
+  
+  // If the PHI is of volatile loads and the load block has multiple
+  // successors, sinking it would remove a load of the volatile value from
+  // the path through the other successor.
+  if (isVolatile && 
+      FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+    return 0;
+  
+  // Check to see if all arguments are the same operation.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+    if (!LI || !LI->hasOneUse())
+      return 0;
+    
+    // We can't sink the load if the loaded value could be modified between 
+    // the load and the PHI.
+    if (LI->isVolatile() != isVolatile ||
+        LI->getParent() != PN.getIncomingBlock(i) ||
+        LI->getPointerAddressSpace() != LoadAddrSpace ||
+        !isSafeAndProfitableToSinkLoad(LI))
+      return 0;
+      
+    // If some of the loads have an alignment specified but not all of them,
+    // we can't do the transformation.
+    if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+      return 0;
+    
+    LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+    
+    // If the PHI is of volatile loads and the load block has multiple
+    // successors, sinking it would remove a load of the volatile value from
+    // the path through the other successor.
+    if (isVolatile &&
+        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+      return 0;
+  }
+  
+  // Okay, they are all the same operation.  Create a new PHI node of the
+  // correct type, and PHI together all of the LHS's of the instructions.
+  PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+                                   PN.getNumIncomingValues(),
+                                   PN.getName()+".in");
+  
+  Value *InVal = FirstLI->getOperand(0);
+  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+  
+  // Add all operands to the new PHI.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+    if (NewInVal != InVal)
+      InVal = 0;
+    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+  }
+  
+  Value *PhiVal;
+  if (InVal) {
+    // The new PHI unions all of the same values together.  This is really
+    // common, so we handle it intelligently here for compile-time speed.
+    PhiVal = InVal;
+    delete NewPN;
+  } else {
+    InsertNewInstBefore(NewPN, PN);
+    PhiVal = NewPN;
+  }
+  
+  // If this was a volatile load that we are merging, make sure to loop through
+  // and mark all the input loads as non-volatile.  If we don't do this, we will
+  // insert a new volatile load and the old ones will not be deletable.
+  if (isVolatile)
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+  
+  LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
+  NewLI->setDebugLoc(FirstLI->getDebugLoc());
+  return NewLI;
+}
+
+
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+  if (isa<GetElementPtrInst>(FirstInst))
+    return FoldPHIArgGEPIntoPHI(PN);
+  if (isa<LoadInst>(FirstInst))
+    return FoldPHIArgLoadIntoPHI(PN);
+  
+  // Scan the instruction, looking for input operations that can be folded away.
+  // If all input operands to the phi are the same instruction (e.g. a cast from
+  // the same type or "+42") we can pull the operation through the PHI, reducing
+  // code size and simplifying code.
+  Constant *ConstantOp = 0;
+  const Type *CastSrcTy = 0;
+  bool isNUW = false, isNSW = false, isExact = false;
+  
+  if (isa<CastInst>(FirstInst)) {
+    CastSrcTy = FirstInst->getOperand(0)->getType();
+
+    // Be careful about transforming integer PHIs.  We don't want to pessimize
+    // the code by turning an i32 into an i1293.
+    if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
+      if (!ShouldChangeType(PN.getType(), CastSrcTy))
+        return 0;
+    }
+  } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+    // Can fold binop, compare or shift here if the RHS is a constant, 
+    // otherwise call FoldPHIArgBinOpIntoPHI.
+    ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+    if (ConstantOp == 0)
+      return FoldPHIArgBinOpIntoPHI(PN);
+    
+    if (OverflowingBinaryOperator *BO =
+        dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+      isNUW = BO->hasNoUnsignedWrap();
+      isNSW = BO->hasNoSignedWrap();
+    } else if (PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(FirstInst))
+      isExact = PEO->isExact();
+  } else {
+    return 0;  // Cannot fold this operation.
+  }
+
+  // Check to see if all arguments are the same operation.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+    if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+      return 0;
+    if (CastSrcTy) {
+      if (I->getOperand(0)->getType() != CastSrcTy)
+        return 0;  // Cast operation must match.
+    } else if (I->getOperand(1) != ConstantOp) {
+      return 0;
+    }
+    
+    if (isNUW)
+      isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+    if (isNSW)
+      isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    if (isExact)
+      isExact = cast<PossiblyExactOperator>(I)->isExact();
+  }
+
+  // Okay, they are all the same operation.  Create a new PHI node of the
+  // correct type, and PHI together all of the LHS's of the instructions.
+  PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+                                   PN.getNumIncomingValues(),
+                                   PN.getName()+".in");
+
+  Value *InVal = FirstInst->getOperand(0);
+  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+  // Add all operands to the new PHI.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+    if (NewInVal != InVal)
+      InVal = 0;
+    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+  }
+
+  Value *PhiVal;
+  if (InVal) {
+    // The new PHI unions all of the same values together.  This is really
+    // common, so we handle it intelligently here for compile-time speed.
+    PhiVal = InVal;
+    delete NewPN;
+  } else {
+    InsertNewInstBefore(NewPN, PN);
+    PhiVal = NewPN;
+  }
+
+  // Insert and return the new operation.
+  if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) {
+    CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal,
+                                       PN.getType());
+    NewCI->setDebugLoc(FirstInst->getDebugLoc());
+    return NewCI;
+  }
+  
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+    BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+    if (isNUW) BinOp->setHasNoUnsignedWrap();
+    if (isNSW) BinOp->setHasNoSignedWrap();
+    if (isExact) BinOp->setIsExact();
+    BinOp->setDebugLoc(FirstInst->getDebugLoc());
+    return BinOp;
+  }
+  
+  CmpInst *CIOp = cast<CmpInst>(FirstInst);
+  CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                                   PhiVal, ConstantOp);
+  NewCI->setDebugLoc(FirstInst->getDebugLoc());
+  return NewCI;
+}
+
+/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
+/// that is dead.
+static bool DeadPHICycle(PHINode *PN,
+                         SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+  if (PN->use_empty()) return true;
+  if (!PN->hasOneUse()) return false;
+
+  // Remember this node, and if we find the cycle, return.
+  if (!PotentiallyDeadPHIs.insert(PN))
+    return true;
+  
+  // Don't scan crazily complex things.
+  if (PotentiallyDeadPHIs.size() == 16)
+    return false;
+
+  if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+    return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+  return false;
+}
+
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
+///   z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
+                           SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+  // See if we already saw this PHI node.
+  if (!ValueEqualPHIs.insert(PN))
+    return true;
+  
+  // Don't scan crazily complex things.
+  if (ValueEqualPHIs.size() == 16)
+    return false;
+ 
+  // Scan the operands to see if they are either phi nodes or are equal to
+  // the value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *Op = PN->getIncomingValue(i);
+    if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+      if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+        return false;
+    } else if (Op != NonPhiInVal)
+      return false;
+  }
+  
+  return true;
+}
+
+
+namespace {
+struct PHIUsageRecord {
+  unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
+  unsigned Shift;     // The amount shifted.
+  Instruction *Inst;  // The trunc instruction.
+  
+  PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+    : PHIId(pn), Shift(Sh), Inst(User) {}
+  
+  bool operator<(const PHIUsageRecord &RHS) const {
+    if (PHIId < RHS.PHIId) return true;
+    if (PHIId > RHS.PHIId) return false;
+    if (Shift < RHS.Shift) return true;
+    if (Shift > RHS.Shift) return false;
+    return Inst->getType()->getPrimitiveSizeInBits() <
+           RHS.Inst->getType()->getPrimitiveSizeInBits();
+  }
+};
+  
+struct LoweredPHIRecord {
+  PHINode *PN;        // The PHI that was lowered.
+  unsigned Shift;     // The amount shifted.
+  unsigned Width;     // The width extracted.
+  
+  LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+    : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+  
+  // Ctor form used by DenseMap.
+  LoweredPHIRecord(PHINode *pn, unsigned Sh)
+    : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<LoweredPHIRecord> {
+    static inline LoweredPHIRecord getEmptyKey() {
+      return LoweredPHIRecord(0, 0);
+    }
+    static inline LoweredPHIRecord getTombstoneKey() {
+      return LoweredPHIRecord(0, 1);
+    }
+    static unsigned getHashValue(const LoweredPHIRecord &Val) {
+      return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+             (Val.Width>>3);
+    }
+    static bool isEqual(const LoweredPHIRecord &LHS,
+                        const LoweredPHIRecord &RHS) {
+      return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+             LHS.Width == RHS.Width;
+    }
+  };
+  template <>
+  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations.  If
+/// so, we split the PHI into the various pieces being extracted.  This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr.  We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+  // PHIUsers - Keep track of all of the truncated values extracted from a set
+  // of PHIs, along with their offset.  These are the things we want to rewrite.
+  SmallVector<PHIUsageRecord, 16> PHIUsers;
+  
+  // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+  // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+  // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+  // check the uses of (to ensure they are all extracts).
+  SmallVector<PHINode*, 8> PHIsToSlice;
+  SmallPtrSet<PHINode*, 8> PHIsInspected;
+  
+  PHIsToSlice.push_back(&FirstPhi);
+  PHIsInspected.insert(&FirstPhi);
+  
+  for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+    PHINode *PN = PHIsToSlice[PHIId];
+    
+    // Scan the input list of the PHI.  If any input is an invoke, and if the
+    // input is defined in the predecessor, then we won't be split the critical
+    // edge which is required to insert a truncate.  Because of this, we have to
+    // bail out.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+      if (II == 0) continue;
+      if (II->getParent() != PN->getIncomingBlock(i))
+        continue;
+     
+      // If we have a phi, and if it's directly in the predecessor, then we have
+      // a critical edge where we need to put the truncate.  Since we can't
+      // split the edge in instcombine, we have to bail out.
+      return 0;
+    }
+      
+    
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      
+      // If the user is a PHI, inspect its uses recursively.
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (PHIsInspected.insert(UserPN))
+          PHIsToSlice.push_back(UserPN);
+        continue;
+      }
+      
+      // Truncates are always ok.
+      if (isa<TruncInst>(User)) {
+        PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+        continue;
+      }
+      
+      // Otherwise it must be a lshr which can only be used by one trunc.
+      if (User->getOpcode() != Instruction::LShr ||
+          !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+          !isa<ConstantInt>(User->getOperand(1)))
+        return 0;
+      
+      unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+      PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+    }
+  }
+  
+  // If we have no users, they must be all self uses, just nuke the PHI.
+  if (PHIUsers.empty())
+    return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+  
+  // If this phi node is transformable, create new PHIs for all the pieces
+  // extracted out of it.  First, sort the users by their offset and size.
+  array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+  
+  DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+            for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+              errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+        );
+  
+  // PredValues - This is a temporary used when rewriting PHI nodes.  It is
+  // hoisted out here to avoid construction/destruction thrashing.
+  DenseMap<BasicBlock*, Value*> PredValues;
+  
+  // ExtractedVals - Each new PHI we introduce is saved here so we don't
+  // introduce redundant PHIs.
+  DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+  
+  for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+    unsigned PHIId = PHIUsers[UserI].PHIId;
+    PHINode *PN = PHIsToSlice[PHIId];
+    unsigned Offset = PHIUsers[UserI].Shift;
+    const Type *Ty = PHIUsers[UserI].Inst->getType();
+    
+    PHINode *EltPHI;
+    
+    // If we've already lowered a user like this, reuse the previously lowered
+    // value.
+    if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+      
+      // Otherwise, Create the new PHI node for this user.
+      EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
+                               PN->getName()+".off"+Twine(Offset), PN);
+      assert(EltPHI->getType() != PN->getType() &&
+             "Truncate didn't shrink phi?");
+    
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        Value *&PredVal = PredValues[Pred];
+        
+        // If we already have a value for this predecessor, reuse it.
+        if (PredVal) {
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        }
+
+        // Handle the PHI self-reuse case.
+        Value *InVal = PN->getIncomingValue(i);
+        if (InVal == PN) {
+          PredVal = EltPHI;
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        }
+        
+        if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+          // If the incoming value was a PHI, and if it was one of the PHIs we
+          // already rewrote it, just use the lowered value.
+          if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+            PredVal = Res;
+            EltPHI->addIncoming(PredVal, Pred);
+            continue;
+          }
+        }
+        
+        // Otherwise, do an extract in the predecessor.
+        Builder->SetInsertPoint(Pred, Pred->getTerminator());
+        Value *Res = InVal;
+        if (Offset)
+          Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+                                                          Offset), "extract");
+        Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+        PredVal = Res;
+        EltPHI->addIncoming(Res, Pred);
+        
+        // If the incoming value was a PHI, and if it was one of the PHIs we are
+        // rewriting, we will ultimately delete the code we inserted.  This
+        // means we need to revisit that PHI to make sure we extract out the
+        // needed piece.
+        if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+          if (PHIsInspected.count(OldInVal)) {
+            unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+                                          OldInVal)-PHIsToSlice.begin();
+            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
+                                              cast<Instruction>(Res)));
+            ++UserE;
+          }
+      }
+      PredValues.clear();
+      
+      DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
+                   << *EltPHI << '\n');
+      ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+    }
+    
+    // Replace the use of this piece with the PHI node.
+    ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+  }
+  
+  // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+  // with undefs.
+  Value *Undef = UndefValue::get(FirstPhi.getType());
+  for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+    ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+  return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+  if (Value *V = SimplifyInstruction(&PN, TD))
+    return ReplaceInstUsesWith(PN, V);
+
+  // If all PHI operands are the same operation, pull them through the PHI,
+  // reducing code size.
+  if (isa<Instruction>(PN.getIncomingValue(0)) &&
+      isa<Instruction>(PN.getIncomingValue(1)) &&
+      cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+      cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+      // FIXME: The hasOneUse check will fail for PHIs that use the value more
+      // than themselves more than once.
+      PN.getIncomingValue(0)->hasOneUse())
+    if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+      return Result;
+
+  // If this is a trivial cycle in the PHI node graph, remove it.  Basically, if
+  // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+  // PHI)... break the cycle.
+  if (PN.hasOneUse()) {
+    Instruction *PHIUser = cast<Instruction>(PN.use_back());
+    if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+      SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+      PotentiallyDeadPHIs.insert(&PN);
+      if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+    }
+   
+    // If this phi has a single use, and if that use just computes a value for
+    // the next iteration of a loop, delete the phi.  This occurs with unused
+    // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
+    // common case here is good because the only other things that catch this
+    // are induction variable analysis (sometimes) and ADCE, which is only run
+    // late.
+    if (PHIUser->hasOneUse() &&
+        (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+        PHIUser->use_back() == &PN) {
+      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+    }
+  }
+
+  // We sometimes end up with phi cycles that non-obviously end up being the
+  // same value, for example:
+  //   z = some value; x = phi (y, z); y = phi (x, z)
+  // where the phi nodes don't necessarily need to be in the same block.  Do a
+  // quick check to see if the PHI node only contains a single non-phi value, if
+  // so, scan to see if the phi cycle is actually equal to that value.
+  {
+    unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
+    // Scan for the first non-phi operand.
+    while (InValNo != NumIncomingVals &&
+           isa<PHINode>(PN.getIncomingValue(InValNo)))
+      ++InValNo;
+
+    if (InValNo != NumIncomingVals) {
+      Value *NonPhiInVal = PN.getIncomingValue(InValNo);
+      
+      // Scan the rest of the operands to see if there are any conflicts, if so
+      // there is no need to recursively scan other phis.
+      for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
+        Value *OpVal = PN.getIncomingValue(InValNo);
+        if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+          break;
+      }
+      
+      // If we scanned over all operands, then we have one unique value plus
+      // phi values.  Scan PHI nodes to see if they all merge in each other or
+      // the value.
+      if (InValNo == NumIncomingVals) {
+        SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+        if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+          return ReplaceInstUsesWith(PN, NonPhiInVal);
+      }
+    }
+  }
+
+  // If there are multiple PHIs, sort their operands so that they all list
+  // the blocks in the same order. This will help identical PHIs be eliminated
+  // by other passes. Other passes shouldn't depend on this for correctness
+  // however.
+  PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+  if (&PN != FirstPN)
+    for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *BBA = PN.getIncomingBlock(i);
+      BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+      if (BBA != BBB) {
+        Value *VA = PN.getIncomingValue(i);
+        unsigned j = PN.getBasicBlockIndex(BBB);
+        Value *VB = PN.getIncomingValue(j);
+        PN.setIncomingBlock(i, BBB);
+        PN.setIncomingValue(i, VB);
+        PN.setIncomingBlock(j, BBA);
+        PN.setIncomingValue(j, VA);
+        // NOTE: Instcombine normally would want us to "return &PN" if we
+        // modified any of the operands of an instruction.  However, since we
+        // aren't adding or removing uses (just rearranging them) we don't do
+        // this in this case.
+      }
+    }
+
+  // If this is an integer PHI and we know that it has an illegal type, see if
+  // it is only used by trunc or trunc(lshr) operations.  If so, we split the
+  // PHI into the various pieces being extracted.  This sort of thing is
+  // introduced when SROA promotes an aggregate to a single large integer type.
+  if (PN.getType()->isIntegerTy() && TD &&
+      !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+    if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+      return Res;
+  
+  return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
new file mode 100644
index 000000000000..5733c20828c6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -0,0 +1,876 @@
+//===- InstCombineSelect.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitSelect function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
+/// returning the kind and providing the out parameter results if we
+/// successfully match.
+static SelectPatternFlavor
+MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (SI == 0) return SPF_UNKNOWN;
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
+  if (ICI == 0) return SPF_UNKNOWN;
+
+  LHS = ICI->getOperand(0);
+  RHS = ICI->getOperand(1);
+
+  // (icmp X, Y) ? X : Y
+  if (SI->getTrueValue() == ICI->getOperand(0) &&
+      SI->getFalseValue() == ICI->getOperand(1)) {
+    switch (ICI->getPredicate()) {
+    default: return SPF_UNKNOWN; // Equality.
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE: return SPF_UMAX;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE: return SPF_SMAX;
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE: return SPF_UMIN;
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE: return SPF_SMIN;
+    }
+  }
+
+  // (icmp X, Y) ? Y : X
+  if (SI->getTrueValue() == ICI->getOperand(1) &&
+      SI->getFalseValue() == ICI->getOperand(0)) {
+    switch (ICI->getPredicate()) {
+      default: return SPF_UNKNOWN; // Equality.
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_UGE: return SPF_UMIN;
+      case ICmpInst::ICMP_SGT:
+      case ICmpInst::ICMP_SGE: return SPF_SMIN;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_ULE: return SPF_UMAX;
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_SLE: return SPF_SMAX;
+    }
+  }
+
+  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
+
+  return SPF_UNKNOWN;
+}
+
+
+/// GetSelectFoldableOperands - We want to turn code that looks like this:
+///   %C = or %A, %B
+///   %D = select %cond, %C, %A
+/// into:
+///   %C = select %cond, %B, 0
+///   %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    return 3;              // Can fold through either operand.
+  case Instruction::Sub:   // Can only fold on the amount subtracted.
+  case Instruction::Shl:   // Can only fold on the shift amount.
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return 1;
+  default:
+    return 0;              // Cannot fold
+  }
+}
+
+/// GetSelectFoldableConstant - For the same transformation as the previous
+/// function, return the identity constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("This cannot happen!");
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return Constant::getNullValue(I->getType());
+  case Instruction::And:
+    return Constant::getAllOnesValue(I->getType());
+  case Instruction::Mul:
+    return ConstantInt::get(I->getType(), 1);
+  }
+}
+
+/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// have the same opcode and only one use each.  Try to simplify this.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+                                          Instruction *FI) {
+  if (TI->getNumOperands() == 1) {
+    // If this is a non-volatile load or a cast from the same type,
+    // merge.
+    if (TI->isCast()) {
+      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+        return 0;
+    } else {
+      return 0;  // unknown unary op.
+    }
+
+    // Fold this by inserting a select from the input values.
+    Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
+                                         FI->getOperand(0), SI.getName()+".v");
+    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+                            TI->getType());
+  }
+
+  // Only handle binary operators here.
+  if (!isa<BinaryOperator>(TI))
+    return 0;
+
+  // Figure out if the operations have any operands in common.
+  Value *MatchOp, *OtherOpT, *OtherOpF;
+  bool MatchIsOpZero;
+  if (TI->getOperand(0) == FI->getOperand(0)) {
+    MatchOp  = TI->getOperand(0);
+    OtherOpT = TI->getOperand(1);
+    OtherOpF = FI->getOperand(1);
+    MatchIsOpZero = true;
+  } else if (TI->getOperand(1) == FI->getOperand(1)) {
+    MatchOp  = TI->getOperand(1);
+    OtherOpT = TI->getOperand(0);
+    OtherOpF = FI->getOperand(0);
+    MatchIsOpZero = false;
+  } else if (!TI->isCommutative()) {
+    return 0;
+  } else if (TI->getOperand(0) == FI->getOperand(1)) {
+    MatchOp  = TI->getOperand(0);
+    OtherOpT = TI->getOperand(1);
+    OtherOpF = FI->getOperand(0);
+    MatchIsOpZero = true;
+  } else if (TI->getOperand(1) == FI->getOperand(0)) {
+    MatchOp  = TI->getOperand(1);
+    OtherOpT = TI->getOperand(0);
+    OtherOpF = FI->getOperand(1);
+    MatchIsOpZero = true;
+  } else {
+    return 0;
+  }
+
+  // If we reach here, they do have operations in common.
+  Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT,
+                                       OtherOpF, SI.getName()+".v");
+
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+    if (MatchIsOpZero)
+      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+    else
+      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+  }
+  llvm_unreachable("Shouldn't get here");
+  return 0;
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+  ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+  if (!C1I)
+    return false;
+  ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+  if (!C2I)
+    return false;
+  if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+    return false;
+  return C1I->isOne() || C1I->isAllOnesValue() ||
+         C2I->isOne() || C2I->isAllOnesValue();
+}
+
+/// FoldSelectIntoOp - Try fold the select into one of the operands to
+/// facilitate further optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+                                            Value *FalseVal) {
+  // See the comment above GetSelectFoldableOperands for a description of the
+  // transformation we are doing here.
+  if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+    if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+        !isa<Constant>(FalseVal)) {
+      if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+        unsigned OpToFold = 0;
+        if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+          OpToFold = 1;
+        } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+          OpToFold = 2;
+        }
+
+        if (OpToFold) {
+          Constant *C = GetSelectFoldableConstant(TVI);
+          Value *OOp = TVI->getOperand(2-OpToFold);
+          // Avoid creating select between 2 constants unless it's selecting
+          // between 0, 1 and -1.
+          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+            Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C);
+            NewSel->takeName(TVI);
+            BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
+            BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
+                                                        FalseVal, NewSel);
+            if (isa<PossiblyExactOperator>(BO))
+              BO->setIsExact(TVI_BO->isExact());
+            if (isa<OverflowingBinaryOperator>(BO)) {
+              BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap());
+              BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap());
+            }
+            return BO;
+          }
+        }
+      }
+    }
+  }
+
+  if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+    if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+        !isa<Constant>(TrueVal)) {
+      if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+        unsigned OpToFold = 0;
+        if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+          OpToFold = 1;
+        } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+          OpToFold = 2;
+        }
+
+        if (OpToFold) {
+          Constant *C = GetSelectFoldableConstant(FVI);
+          Value *OOp = FVI->getOperand(2-OpToFold);
+          // Avoid creating select between 2 constants unless it's selecting
+          // between 0, 1 and -1.
+          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+            Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp);
+            NewSel->takeName(FVI);
+            BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
+            BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
+                                                        TrueVal, NewSel);
+            if (isa<PossiblyExactOperator>(BO))
+              BO->setIsExact(FVI_BO->isExact());
+            if (isa<OverflowingBinaryOperator>(BO)) {
+              BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap());
+              BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap());
+            }
+            return BO;
+          }
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
+/// replaced with RepOp.
+static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                     const TargetData *TD) {
+  // Trivial replacement.
+  if (V == Op)
+    return RepOp;
+
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return 0;
+
+  // If this is a binary operator, try to simplify it with the replaced op.
+  if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
+    if (B->getOperand(0) == Op)
+      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD);
+    if (B->getOperand(1) == Op)
+      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD);
+  }
+
+  // Same for CmpInsts.
+  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+    if (C->getOperand(0) == Op)
+      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD);
+    if (C->getOperand(1) == Op)
+      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD);
+  }
+
+  // TODO: We could hand off more cases to instsimplify here.
+
+  // If all operands are constant after substituting Op for RepOp then we can
+  // constant fold the instruction.
+  if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
+    // Build a list of all constant operands.
+    SmallVector<Constant*, 8> ConstOps;
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      if (I->getOperand(i) == Op)
+        ConstOps.push_back(CRepOp);
+      else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
+        ConstOps.push_back(COp);
+      else
+        break;
+    }
+
+    // All operands were constants, fold it.
+    if (ConstOps.size() == I->getNumOperands())
+      return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                      ConstOps.data(), ConstOps.size(), TD);
+  }
+
+  return 0;
+}
+
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+                                                   ICmpInst *ICI) {
+  bool Changed = false;
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *CmpLHS = ICI->getOperand(0);
+  Value *CmpRHS = ICI->getOperand(1);
+  Value *TrueVal = SI.getTrueValue();
+  Value *FalseVal = SI.getFalseValue();
+
+  // Check cases where the comparison is with a constant that
+  // can be adjusted to fit the min/max idiom. We may move or edit ICI
+  // here, so make sure the select is the only user.
+  if (ICI->hasOneUse())
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+      // X < MIN ? T : F  -->  F
+      if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
+          && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      // X > MAX ? T : F  -->  F
+      else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
+               && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      switch (Pred) {
+      default: break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT: {
+        // These transformations only work for selects over integers.
+        const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+        if (!SelectTy)
+          break;
+
+        Constant *AdjustedRHS;
+        if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+        else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
+        // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
+        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
+        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+            (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+          ; // Nothing to do here. Values match without any sign/zero extension.
+
+        // Types do not match. Instead of calculating this with mixed types
+        // promote all to the larger type. This enables scalar evolution to
+        // analyze this expression.
+        else if (CmpRHS->getType()->getScalarSizeInBits()
+                 < SelectTy->getBitWidth()) {
+          Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+          // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+          // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+          // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+          // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+          if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+                sextRHS == FalseVal) {
+            CmpLHS = TrueVal;
+            AdjustedRHS = sextRHS;
+          } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+                     sextRHS == TrueVal) {
+            CmpLHS = FalseVal;
+            AdjustedRHS = sextRHS;
+          } else if (ICI->isUnsigned()) {
+            Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+            // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+            // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+            // zext + signed compare cannot be changed:
+            //    0xff <s 0x00, but 0x00ff >s 0x0000
+            if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+                zextRHS == FalseVal) {
+              CmpLHS = TrueVal;
+              AdjustedRHS = zextRHS;
+            } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+                       zextRHS == TrueVal) {
+              CmpLHS = FalseVal;
+              AdjustedRHS = zextRHS;
+            } else
+              break;
+          } else
+            break;
+        } else
+          break;
+
+        Pred = ICmpInst::getSwappedPredicate(Pred);
+        CmpRHS = AdjustedRHS;
+        std::swap(FalseVal, TrueVal);
+        ICI->setPredicate(Pred);
+        ICI->setOperand(0, CmpLHS);
+        ICI->setOperand(1, CmpRHS);
+        SI.setOperand(1, TrueVal);
+        SI.setOperand(2, FalseVal);
+
+        // Move ICI instruction right before the select instruction. Otherwise
+        // the sext/zext value may be defined after the ICI instruction uses it.
+        ICI->moveBefore(&SI);
+
+        Changed = true;
+        break;
+      }
+      }
+    }
+
+  // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
+  // and       (X <s  0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
+  // FIXME: Type and constness constraints could be lifted, but we have to
+  //        watch code size carefully. We should consider xor instead of
+  //        sub/add when we decide to do that.
+  if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+    if (TrueVal->getType() == Ty) {
+      if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
+        ConstantInt *C1 = NULL, *C2 = NULL;
+        if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+          C1 = dyn_cast<ConstantInt>(TrueVal);
+          C2 = dyn_cast<ConstantInt>(FalseVal);
+        } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+          C1 = dyn_cast<ConstantInt>(FalseVal);
+          C2 = dyn_cast<ConstantInt>(TrueVal);
+        }
+        if (C1 && C2) {
+          // This shift results in either -1 or 0.
+          Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+
+          // Check if we can express the operation with a single or.
+          if (C2->isAllOnesValue())
+            return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+
+          Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
+          return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+        }
+      }
+    }
+  }
+
+  // If we have an equality comparison then we know the value in one of the
+  // arms of the select. See if substituting this value into the arm and
+  // simplifying the result yields the same value as the other arm.
+  if (Pred == ICmpInst::ICMP_EQ) {
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+      return ReplaceInstUsesWith(SI, FalseVal);
+  } else if (Pred == ICmpInst::ICMP_NE) {
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+      return ReplaceInstUsesWith(SI, TrueVal);
+  }
+
+  // NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+  if (isa<Constant>(CmpRHS)) {
+    if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
+      // Transform (X == C) ? X : Y -> (X == C) ? C : Y
+      SI.setOperand(1, CmpRHS);
+      Changed = true;
+    } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
+      // Transform (X != C) ? Y : X -> (X != C) ? Y : C
+      SI.setOperand(2, CmpRHS);
+      Changed = true;
+    }
+  }
+
+  return Changed ? &SI : 0;
+}
+
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks).  See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI.  For
+/// example, cases like this cannot be mapped:
+///
+///   X = phi [ C1, BB1], [C2, BB2]
+///   Y = add
+///   Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+                                                   const SelectInst &SI) {
+  // If the value is a non-instruction value like a constant or argument, it
+  // can always be mapped.
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return true;
+
+  // If V is a PHI node defined in the same block as the condition PHI, we can
+  // map the arguments.
+  const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+
+  if (const PHINode *VP = dyn_cast<PHINode>(I))
+    if (VP->getParent() == CondPHI->getParent())
+      return true;
+
+  // Otherwise, if the PHI and select are defined in the same block and if V is
+  // defined in a different block, then we can transform it.
+  if (SI.getParent() == CondPHI->getParent() &&
+      I->getParent() != CondPHI->getParent())
+    return true;
+
+  // Otherwise we have a 'hard' case and we can't tell without doing more
+  // detailed dominator based analysis, punt.
+  return false;
+}
+
+/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+///   SPF2(SPF1(A, B), C)
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+                                        SelectPatternFlavor SPF1,
+                                        Value *A, Value *B,
+                                        Instruction &Outer,
+                                        SelectPatternFlavor SPF2, Value *C) {
+  if (C == A || C == B) {
+    // MAX(MAX(A, B), B) -> MAX(A, B)
+    // MIN(MIN(a, b), a) -> MIN(a, b)
+    if (SPF1 == SPF2)
+      return ReplaceInstUsesWith(Outer, Inner);
+
+    // MAX(MIN(a, b), a) -> a
+    // MIN(MAX(a, b), a) -> a
+    if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+        (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+        (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+        (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+      return ReplaceInstUsesWith(Outer, C);
+  }
+
+  // TODO: MIN(MIN(A, 23), 97)
+  return 0;
+}
+
+
+/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
+/// both be) and we have an icmp instruction with zero, and we have an 'and'
+/// with the non-constant value and a power of two we can turn the select
+/// into a shift on the result of the 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+                                ConstantInt *FalseVal,
+                                InstCombiner::BuilderTy *Builder) {
+  const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+  if (!IC || !IC->isEquality())
+    return 0;
+
+  if (!match(IC->getOperand(1), m_Zero()))
+    return 0;
+
+  ConstantInt *AndRHS;
+  Value *LHS = IC->getOperand(0);
+  if (LHS->getType() != SI.getType() ||
+      !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+    return 0;
+
+  // If both select arms are non-zero see if we have a select of the form
+  // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+  // for 'x ? 2^n : 0' and fix the thing up at the end.
+  ConstantInt *Offset = 0;
+  if (!TrueVal->isZero() && !FalseVal->isZero()) {
+    if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+      Offset = FalseVal;
+    else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+      Offset = TrueVal;
+    else
+      return 0;
+
+    // Adjust TrueVal and FalseVal to the offset.
+    TrueVal = ConstantInt::get(Builder->getContext(),
+                               TrueVal->getValue() - Offset->getValue());
+    FalseVal = ConstantInt::get(Builder->getContext(),
+                                FalseVal->getValue() - Offset->getValue());
+  }
+
+  // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+  if (!AndRHS->getValue().isPowerOf2() ||
+      (!TrueVal->getValue().isPowerOf2() &&
+       !FalseVal->getValue().isPowerOf2()))
+    return 0;
+
+  // Determine which shift is needed to transform result of the 'and' into the
+  // desired result.
+  ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+  unsigned ValZeros = ValC->getValue().logBase2();
+  unsigned AndZeros = AndRHS->getValue().logBase2();
+
+  Value *V = LHS;
+  if (ValZeros > AndZeros)
+    V = Builder->CreateShl(V, ValZeros - AndZeros);
+  else if (ValZeros < AndZeros)
+    V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+  // Okay, now we know that everything is set up, we just don't know whether we
+  // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+  bool ShouldNotVal = !TrueVal->isZero();
+  ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+  if (ShouldNotVal)
+    V = Builder->CreateXor(V, ValC);
+
+  // Apply an offset if needed.
+  if (Offset)
+    V = Builder->CreateAdd(V, Offset);
+  return V;
+}
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+  Value *CondVal = SI.getCondition();
+  Value *TrueVal = SI.getTrueValue();
+  Value *FalseVal = SI.getFalseValue();
+
+  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+    return ReplaceInstUsesWith(SI, V);
+
+  if (SI.getType()->isIntegerTy(1)) {
+    if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
+      if (C->getZExtValue()) {
+        // Change: A = select B, true, C --> A = or B, C
+        return BinaryOperator::CreateOr(CondVal, FalseVal);
+      }
+      // Change: A = select B, false, C --> A = and !B, C
+      Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+      return BinaryOperator::CreateAnd(NotCond, FalseVal);
+    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+      if (C->getZExtValue() == false) {
+        // Change: A = select B, C, false --> A = and B, C
+        return BinaryOperator::CreateAnd(CondVal, TrueVal);
+      }
+      // Change: A = select B, C, true --> A = or !B, C
+      Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+      return BinaryOperator::CreateOr(NotCond, TrueVal);
+    }
+
+    // select a, b, a  -> a&b
+    // select a, a, b  -> a|b
+    if (CondVal == TrueVal)
+      return BinaryOperator::CreateOr(CondVal, FalseVal);
+    else if (CondVal == FalseVal)
+      return BinaryOperator::CreateAnd(CondVal, TrueVal);
+  }
+
+  // Selecting between two integer constants?
+  if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+    if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
+      // select C, 1, 0 -> zext C to int
+      if (FalseValC->isZero() && TrueValC->getValue() == 1)
+        return new ZExtInst(CondVal, SI.getType());
+
+      // select C, -1, 0 -> sext C to int
+      if (FalseValC->isZero() && TrueValC->isAllOnesValue())
+        return new SExtInst(CondVal, SI.getType());
+
+      // select C, 0, 1 -> zext !C to int
+      if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+        Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+        return new ZExtInst(NotCond, SI.getType());
+      }
+
+      // select C, 0, -1 -> sext !C to int
+      if (TrueValC->isZero() && FalseValC->isAllOnesValue()) {
+        Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+        return new SExtInst(NotCond, SI.getType());
+      }
+
+      if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+        return ReplaceInstUsesWith(SI, V);
+    }
+
+  // See if we are selecting two values based on a comparison of the two values.
+  if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+    if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+      // Transform (X == Y) ? X : Y  -> Y
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      }
+      // Transform (X une Y) ? X : Y  -> X
+      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+        return ReplaceInstUsesWith(SI, TrueVal);
+      }
+      // NOTE: if we wanted to, this is where to detect MIN/MAX
+
+    } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+      // Transform (X == Y) ? Y : X  -> X
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, FalseVal);
+      }
+      // Transform (X une Y) ? Y : X  -> Y
+      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, TrueVal);
+      }
+      // NOTE: if we wanted to, this is where to detect MIN/MAX
+    }
+    // NOTE: if we wanted to, this is where to detect ABS
+  }
+
+  // See if we are selecting two values based on a comparison of the two values.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+    if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+      return Result;
+
+  if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
+    if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
+      if (TI->hasOneUse() && FI->hasOneUse()) {
+        Instruction *AddOp = 0, *SubOp = 0;
+
+        // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+        if (TI->getOpcode() == FI->getOpcode())
+          if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+            return IV;
+
+        // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))).  This is
+        // even legal for FP.
+        if ((TI->getOpcode() == Instruction::Sub &&
+             FI->getOpcode() == Instruction::Add) ||
+            (TI->getOpcode() == Instruction::FSub &&
+             FI->getOpcode() == Instruction::FAdd)) {
+          AddOp = FI; SubOp = TI;
+        } else if ((FI->getOpcode() == Instruction::Sub &&
+                    TI->getOpcode() == Instruction::Add) ||
+                   (FI->getOpcode() == Instruction::FSub &&
+                    TI->getOpcode() == Instruction::FAdd)) {
+          AddOp = TI; SubOp = FI;
+        }
+
+        if (AddOp) {
+          Value *OtherAddOp = 0;
+          if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+            OtherAddOp = AddOp->getOperand(1);
+          } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+            OtherAddOp = AddOp->getOperand(0);
+          }
+
+          if (OtherAddOp) {
+            // So at this point we know we have (Y -> OtherAddOp):
+            //        select C, (add X, Y), (sub X, Z)
+            Value *NegVal;  // Compute -Z
+            if (SI.getType()->isFPOrFPVectorTy()) {
+              NegVal = Builder->CreateFNeg(SubOp->getOperand(1));
+            } else {
+              NegVal = Builder->CreateNeg(SubOp->getOperand(1));
+            }
+
+            Value *NewTrueOp = OtherAddOp;
+            Value *NewFalseOp = NegVal;
+            if (AddOp != TI)
+              std::swap(NewTrueOp, NewFalseOp);
+            Value *NewSel = 
+              Builder->CreateSelect(CondVal, NewTrueOp,
+                                    NewFalseOp, SI.getName() + ".p");
+
+            if (SI.getType()->isFPOrFPVectorTy())
+              return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+            else
+              return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+          }
+        }
+      }
+
+  // See if we can fold the select into one of our operands.
+  if (SI.getType()->isIntegerTy()) {
+    if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+      return FoldI;
+
+    // MAX(MAX(a, b), a) -> MAX(a, b)
+    // MIN(MIN(a, b), a) -> MIN(a, b)
+    // MAX(MIN(a, b), a) -> a
+    // MIN(MAX(a, b), a) -> a
+    Value *LHS, *RHS, *LHS2, *RHS2;
+    if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
+      if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 
+                                          SI, SPF, RHS))
+          return R;
+      if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+                                          SI, SPF, LHS))
+          return R;
+    }
+
+    // TODO.
+    // ABS(-X) -> ABS(X)
+    // ABS(ABS(X)) -> ABS(X)
+  }
+
+  // See if we can fold the select into a phi node if the condition is a select.
+  if (isa<PHINode>(SI.getCondition()))
+    // The true/false values have to be live in the PHI predecessor's blocks.
+    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+      if (Instruction *NV = FoldOpIntoPhi(SI))
+        return NV;
+
+  if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+    if (TrueSI->getCondition() == CondVal) {
+      SI.setOperand(1, TrueSI->getTrueValue());
+      return &SI;
+    }
+  }
+  if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+    if (FalseSI->getCondition() == CondVal) {
+      SI.setOperand(2, FalseSI->getFalseValue());
+      return &SI;
+    }
+  }
+
+  if (BinaryOperator::isNot(CondVal)) {
+    SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+    SI.setOperand(1, FalseVal);
+    SI.setOperand(2, TrueVal);
+    return &SI;
+  }
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
new file mode 100644
index 000000000000..811f94976f68
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -0,0 +1,753 @@
+//===- InstCombineShifts.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitShl, visitLShr, and visitAShr functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+  assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // See if we can fold away this shift.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  // Try to fold constant and into select arguments.
+  if (isa<Constant>(Op0))
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+  if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+    if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+      return Res;
+
+  // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+  // Because shifts by negative values (which could occur if A were negative)
+  // are undefined.
+  Value *A; const APInt *B;
+  if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+    // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+    // demand the sign bit (and many others) here??
+    Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+                                    Op1->getName());
+    I.setOperand(1, Rem);
+    return &I;
+  }
+  
+  return 0;
+}
+
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits.  This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree.  This is used to eliminate extraneous shifting from things
+/// like:
+///      %C = shl i128 %A, 64
+///      %D = shl i128 %B, 96
+///      %E = or i128 %C, %D
+///      %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits.  If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+                               InstCombiner &IC) {
+  // We can always evaluate constants shifted.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If this is the opposite shift, we can directly reuse the input of the shift
+  // if the needed bits are already zero in the input.  This allows us to reuse
+  // the value which means that we don't care if the shift has multiple uses.
+  //  TODO:  Handle opposite shift by exact value.
+  ConstantInt *CI = 0;
+  if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+      (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+    if (CI->getZExtValue() == NumBits) {
+      // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+      // If this is a truncate of a logical shr, we can truncate it to a smaller
+      // lshr iff we know that the bits we would otherwise be shifting in are
+      // already zeros.
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(0),
+            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+          CI->getLimitedValue(BitWidth) < BitWidth) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+      }
+#endif
+      
+    }
+  }
+  
+  // We can't mutate something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+  
+  switch (I->getOpcode()) {
+  default: return false;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+    return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+           CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+      
+  case Instruction::Shl: {
+    // We can often fold the shift into shifts-by-a-constant.
+    CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+
+    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+    if (isLeftShift) return true;
+    
+    // We can always turn shl(c)+shr(c) -> and(c2).
+    if (CI->getValue() == NumBits) return true;
+      
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+    // profitable unless we know the and'd out bits are already zero.
+    if (CI->getZExtValue() > NumBits) {
+      unsigned LowBits = TypeWidth - CI->getZExtValue();
+      if (MaskedValueIsZero(I->getOperand(0),
+                       APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+        return true;
+    }
+      
+    return false;
+  }
+  case Instruction::LShr: {
+    // We can often fold the shift into shifts-by-a-constant.
+    CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+    
+    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+    if (!isLeftShift) return true;
+    
+    // We can always turn lshr(c)+shl(c) -> and(c2).
+    if (CI->getValue() == NumBits) return true;
+      
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+    // profitable unless we know the and'd out bits are already zero.
+    if (CI->getZExtValue() > NumBits) {
+      unsigned LowBits = CI->getZExtValue() - NumBits;
+      if (MaskedValueIsZero(I->getOperand(0),
+                          APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+        return true;
+    }
+      
+    return false;
+  }
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+           CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+  }
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+        return false;
+    return true;
+  }
+  }      
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+                              InstCombiner &IC) {
+  // We can always evaluate constants shifted.
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (isLeftShift)
+      V = IC.Builder->CreateShl(C, NumBits);
+    else
+      V = IC.Builder->CreateLShr(C, NumBits);
+    // If we got a constantexpr back, try to simplify it with TD info.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+    return V;
+  }
+  
+  Instruction *I = cast<Instruction>(V);
+  IC.Worklist.Add(I);
+
+  switch (I->getOpcode()) {
+  default: assert(0 && "Inconsistency with CanEvaluateShifted");
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+    I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+    return I;
+    
+  case Instruction::Shl: {
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We only accept shifts-by-a-constant in CanEvaluateShifted.
+    ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+    
+    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+    if (isLeftShift) {
+      // If this is oversized composite shift, then unsigned shifts get 0.
+      unsigned NewShAmt = NumBits+CI->getZExtValue();
+      if (NewShAmt >= TypeWidth)
+        return Constant::getNullValue(I->getType());
+
+      I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+      return I;
+    }
+    
+    // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+    // zeros.
+    if (CI->getValue() == NumBits) {
+      APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+      V = IC.Builder->CreateAnd(I->getOperand(0),
+                                ConstantInt::get(I->getContext(), Mask));
+      if (Instruction *VI = dyn_cast<Instruction>(V)) {
+        VI->moveBefore(I);
+        VI->takeName(I);
+      }
+      return V;
+    }
+    
+    // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+    // the and won't be needed.
+    assert(CI->getZExtValue() > NumBits);
+    I->setOperand(1, ConstantInt::get(I->getType(),
+                                      CI->getZExtValue() - NumBits));
+    return I;
+  }
+  case Instruction::LShr: {
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+    // We only accept shifts-by-a-constant in CanEvaluateShifted.
+    ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+    
+    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+    if (!isLeftShift) {
+      // If this is oversized composite shift, then unsigned shifts get 0.
+      unsigned NewShAmt = NumBits+CI->getZExtValue();
+      if (NewShAmt >= TypeWidth)
+        return Constant::getNullValue(I->getType());
+      
+      I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+      return I;
+    }
+    
+    // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+    // zeros.
+    if (CI->getValue() == NumBits) {
+      APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+      V = IC.Builder->CreateAnd(I->getOperand(0),
+                                ConstantInt::get(I->getContext(), Mask));
+      if (Instruction *VI = dyn_cast<Instruction>(V)) {
+        VI->moveBefore(I);
+        VI->takeName(I);
+      }
+      return V;
+    }
+    
+    // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+    // the and won't be needed.
+    assert(CI->getZExtValue() > NumBits);
+    I->setOperand(1, ConstantInt::get(I->getType(),
+                                      CI->getZExtValue() - NumBits));
+    return I;
+  }
+    
+  case Instruction::Select:
+    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+    I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+    return I;
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+                                              NumBits, isLeftShift, IC));
+    return PN;
+  }
+  }      
+}
+
+
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+                                               BinaryOperator &I) {
+  bool isLeftShift = I.getOpcode() == Instruction::Shl;
+  
+  
+  // See if we can propagate this shift into the input, this covers the trivial
+  // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+  if (I.getOpcode() != Instruction::AShr &&
+      CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+    DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+              " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");
+    
+    return ReplaceInstUsesWith(I, 
+                 GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+  }
+  
+  
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+  
+  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+  // a signed shift.
+  //
+  if (Op1->uge(TypeBits)) {
+    if (I.getOpcode() != Instruction::AShr)
+      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+    // ashr i32 X, 32 --> ashr i32 X, 31
+    I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+    return &I;
+  }
+  
+  // ((X*C1) << C2) == (X * (C1 << C2))
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+        return BinaryOperator::CreateMul(BO->getOperand(0),
+                                        ConstantExpr::getShl(BOOp, Op1));
+  
+  // Try to fold constant and into select arguments.
+  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+    if (Instruction *R = FoldOpIntoSelect(I, SI))
+      return R;
+  if (isa<PHINode>(Op0))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
+  
+  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+    // place.  Don't try to do this transformation in this case.  Also, we
+    // require that the input operand is a shift-by-constant so that we have
+    // confidence that the shifts will get folded together.  We could do this
+    // xform in more cases, but it is unlikely to be profitable.
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+        isa<ConstantInt>(TrOp->getOperand(1))) {
+      // Okay, we'll do this xform.  Make the shift of shift.
+      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      // (shift2 (shift1 & 0x00FF), c2)
+      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+
+      // For logical shifts, the truncation has the effect of making the high
+      // part of the register be zeros.  Emulate this by inserting an AND to
+      // clear the top bits as needed.  This 'and' will usually be zapped by
+      // other xforms later if dead.
+      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+      unsigned DstSize = TI->getType()->getScalarSizeInBits();
+      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+      
+      // The mask we constructed says what the trunc would do if occurring
+      // between the shifts.  We want to know the effect *after* the second
+      // shift.  We know that it is a logical shift by a constant, so adjust the
+      // mask as appropriate.
+      if (I.getOpcode() == Instruction::Shl)
+        MaskV <<= Op1->getZExtValue();
+      else {
+        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+        MaskV = MaskV.lshr(Op1->getZExtValue());
+      }
+
+      // shift1 & 0x00FF
+      Value *And = Builder->CreateAnd(NSh,
+                                      ConstantInt::get(I.getContext(), MaskV),
+                                      TI->getName());
+
+      // Return the value truncated to the interesting size.
+      return new TruncInst(And, I.getType());
+    }
+  }
+  
+  if (Op0->hasOneUse()) {
+    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
+      Value *V1, *V2;
+      ConstantInt *CC;
+      switch (Op0BO->getOpcode()) {
+      default: break;
+      case Instruction::Add:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor: {
+        // These operators commute.
+        // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
+        if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+            match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+                  m_Specific(Op1)))) {
+          Value *YS =         // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+          // (X + (Y << C))
+          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+                                          Op0BO->getOperand(1)->getName());
+          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+        }
+        
+        // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
+        Value *Op0BOOp1 = Op0BO->getOperand(1);
+        if (isLeftShift && Op0BOOp1->hasOneUse() &&
+            match(Op0BOOp1, 
+                  m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
+                        m_ConstantInt(CC))) &&
+            cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+          Value *YS =   // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(0), Op1,
+                                         Op0BO->getName());
+          // X & (CC << C)
+          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                         V1->getName()+".mask");
+          return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+        }
+      }
+        
+      // FALL THROUGH.
+      case Instruction::Sub: {
+        // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
+        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+            match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+                  m_Specific(Op1)))) {
+          Value *YS =  // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+          // (X + (Y << C))
+          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+                                          Op0BO->getOperand(0)->getName());
+          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+        }
+        
+        // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
+        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+            match(Op0BO->getOperand(0),
+                  m_And(m_Shr(m_Value(V1), m_Value(V2)),
+                        m_ConstantInt(CC))) && V2 == Op1 &&
+            cast<BinaryOperator>(Op0BO->getOperand(0))
+                ->getOperand(0)->hasOneUse()) {
+          Value *YS = // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+          // X & (CC << C)
+          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                         V1->getName()+".mask");
+          
+          return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+        }
+        
+        break;
+      }
+      }
+      
+      
+      // If the operand is an bitwise operator with a constant RHS, and the
+      // shift is the only use, we can pull it out of the shift.
+      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+        bool isValid = true;     // Valid only for And, Or, Xor
+        bool highBitSet = false; // Transform if high bit of constant set?
+        
+        switch (Op0BO->getOpcode()) {
+        default: isValid = false; break;   // Do not perform transform!
+        case Instruction::Add:
+          isValid = isLeftShift;
+          break;
+        case Instruction::Or:
+        case Instruction::Xor:
+          highBitSet = false;
+          break;
+        case Instruction::And:
+          highBitSet = true;
+          break;
+        }
+        
+        // If this is a signed shift right, and the high bit is modified
+        // by the logical operation, do not perform the transformation.
+        // The highBitSet boolean indicates the value of the high bit of
+        // the constant which would cause it to be modified for this
+        // operation.
+        //
+        if (isValid && I.getOpcode() == Instruction::AShr)
+          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+        
+        if (isValid) {
+          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+          
+          Value *NewShift =
+            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+          NewShift->takeName(Op0BO);
+          
+          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+                                        NewRHS);
+        }
+      }
+    }
+  }
+  
+  // Find out if this is a shift of a shift by a constant.
+  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+  if (ShiftOp && !ShiftOp->isShift())
+    ShiftOp = 0;
+  
+  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
+    Value *X = ShiftOp->getOperand(0);
+    
+    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
+    
+    const IntegerType *Ty = cast<IntegerType>(I.getType());
+    
+    // Check for (X << c1) << c2  and  (X >> c1) >> c2
+    if (I.getOpcode() == ShiftOp->getOpcode()) {
+      // If this is oversized composite shift, then unsigned shifts get 0, ashr
+      // saturates.
+      if (AmtSum >= TypeBits) {
+        if (I.getOpcode() != Instruction::AShr)
+          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
+      }
+      
+      return BinaryOperator::Create(I.getOpcode(), X,
+                                    ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftAmt1 == ShiftAmt2) {
+      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
+        return BinaryOperator::CreateAnd(X,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+        return BinaryOperator::CreateAnd(X,
+                                        ConstantInt::get(I.getContext(), Mask));
+      }
+    } else if (ShiftAmt1 < ShiftAmt2) {
+      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+      
+      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
+        assert(ShiftOp->getOpcode() == Instruction::LShr ||
+               ShiftOp->getOpcode() == Instruction::AShr);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        assert(ShiftOp->getOpcode() == Instruction::Shl);
+        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+    } else {
+      assert(ShiftAmt2 < ShiftAmt1);
+      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
+        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
+                                            ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+  if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+                                 I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+                                 TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  if (Instruction *V = commonShiftTransforms(I))
+    return V;
+  
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+    unsigned ShAmt = Op1C->getZExtValue();
+    
+    // If the shifted-out value is known-zero, then this is a NUW shift.
+    if (!I.hasNoUnsignedWrap() && 
+        MaskedValueIsZero(I.getOperand(0),
+                          APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+          I.setHasNoUnsignedWrap();
+          return &I;
+        }
+    
+    // If the shifted out value is all signbits, this is a NSW shift.
+    if (!I.hasNoSignedWrap() &&
+        ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+      I.setHasNoSignedWrap();
+      return &I;
+    }
+  }
+
+  // (C1 << A) << C2 -> (C1 << C2) << A
+  Constant *C1, *C2;
+  Value *A;
+  if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) &&
+      match(I.getOperand(1), m_Constant(C2)))
+    return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
+
+  return 0;    
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+  if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
+                                  I.isExact(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    unsigned ShAmt = Op1C->getZExtValue();
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+      unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+      // ctlz.i32(x)>>5  --> zext(x == 0)
+      // cttz.i32(x)>>5  --> zext(x == 0)
+      // ctpop.i32(x)>>5 --> zext(x == -1)
+      if ((II->getIntrinsicID() == Intrinsic::ctlz ||
+           II->getIntrinsicID() == Intrinsic::cttz ||
+           II->getIntrinsicID() == Intrinsic::ctpop) &&
+          isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
+        bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
+        Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
+        Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+        return new ZExtInst(Cmp, II->getType());
+      }
+    }
+  
+    // If the shifted-out value is known-zero, then this is an exact shift.
+    if (!I.isExact() && 
+        MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+      I.setIsExact();
+      return &I;
+    }    
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+  if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
+                                  I.isExact(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    unsigned ShAmt = Op1C->getZExtValue();
+    
+    // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
+    // have a sign-extend idiom.
+    Value *X;
+    if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
+      // If the left shift is just shifting out partial signbits, delete the
+      // extension.
+      if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+        return ReplaceInstUsesWith(I, X);
+
+      // If the input is an extension from the shifted amount value, e.g.
+      //   %x = zext i8 %A to i32
+      //   %y = shl i32 %x, 24
+      //   %z = ashr %y, 24
+      // then turn this into "z = sext i8 A to i32".
+      if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) {
+        uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits();
+        uint32_t DestBits = ZI->getType()->getScalarSizeInBits();
+        if (Op1C->getZExtValue() == DestBits-SrcBits)
+          return new SExtInst(ZI->getOperand(0), ZI->getType());
+      }
+    }
+
+    // If the shifted-out value is known-zero, then this is an exact shift.
+    if (!I.isExact() && 
+        MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+      I.setIsExact();
+      return &I;
+    }
+  }            
+  
+  // See if we can turn a signed shr into an unsigned shr.
+  if (MaskedValueIsZero(Op0,
+                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+    return BinaryOperator::CreateLShr(Op0, Op1);
+  
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  unsigned NumSignBits = ComputeNumSignBits(Op0);
+  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+    return ReplaceInstUsesWith(I, Op0);
+  
+  return 0;
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
new file mode 100644
index 000000000000..8fea8eb7efb5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -0,0 +1,1153 @@
+//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains logic for simplifying instructions based on information
+// about how they are used.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/IntrinsicInst.h"
+
+using namespace llvm;
+
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
+                                   APInt Demanded) {
+  assert(I && "No instruction?");
+  assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+  // If the operand is not a constant integer, nothing to do.
+  ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+  if (!OpC) return false;
+
+  // If there are no bits set that aren't demanded, nothing to do.
+  Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+  if ((~Demanded & OpC->getValue()) == 0)
+    return false;
+
+  // This instruction is producing bits that are not demanded. Shrink the RHS.
+  Demanded &= OpC->getValue();
+  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+  return true;
+}
+
+
+
+/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+/// SimplifyDemandedBits knows about.  See if the instruction has any
+/// properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+  
+  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, 
+                                     KnownZero, KnownOne, 0);
+  if (V == 0) return false;
+  if (V == &Inst) return true;
+  ReplaceInstUsesWith(Inst, V);
+  return true;
+}
+
+/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
+/// specified instruction operand if possible, updating it in place.  It returns
+/// true if it made any change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+                                        APInt &KnownZero, APInt &KnownOne,
+                                        unsigned Depth) {
+  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
+                                          KnownZero, KnownOne, Depth);
+  if (NewVal == 0) return false;
+  U = NewVal;
+  return true;
+}
+
+
+/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
+/// value based on the demanded bits.  When this function is called, it is known
+/// that only the bits set in DemandedMask of the result of V are ever used
+/// downstream. Consequently, depending on the mask and V, it may be possible
+/// to replace V with a constant or one of its operands. In such cases, this
+/// function does the replacement and returns true. In all other cases, it
+/// returns false after analyzing the expression and setting KnownOne and known
+/// to be one in the expression.  KnownZero contains all the bits that are known
+/// to be zero in the expression. These are provided to potentially allow the
+/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
+/// the expression. KnownOne and KnownZero always follow the invariant that 
+/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
+/// the bits in KnownOne and KnownZero may only be accurate for those bits set
+/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
+/// and KnownOne must all be the same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification.  This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+                                             APInt &KnownZero, APInt &KnownOne,
+                                             unsigned Depth) {
+  assert(V != 0 && "Null pointer of Value???");
+  assert(Depth <= 6 && "Limit Search Depth");
+  uint32_t BitWidth = DemandedMask.getBitWidth();
+  const Type *VTy = V->getType();
+  assert((TD || !VTy->isPointerTy()) &&
+         "SimplifyDemandedBits needs to know bit widths!");
+  assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+         (!VTy->isIntOrIntVectorTy() ||
+          VTy->getScalarSizeInBits() == BitWidth) &&
+         KnownZero.getBitWidth() == BitWidth &&
+         KnownOne.getBitWidth() == BitWidth &&
+         "Value *V, DemandedMask, KnownZero and KnownOne "
+         "must have same BitWidth");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne = CI->getValue() & DemandedMask;
+    KnownZero = ~KnownOne & DemandedMask;
+    return 0;
+  }
+  if (isa<ConstantPointerNull>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne.clearAllBits();
+    KnownZero = DemandedMask;
+    return 0;
+  }
+
+  KnownZero.clearAllBits();
+  KnownOne.clearAllBits();
+  if (DemandedMask == 0) {   // Not demanding any bits from V.
+    if (isa<UndefValue>(V))
+      return 0;
+    return UndefValue::get(VTy);
+  }
+  
+  if (Depth == 6)        // Limit search depth.
+    return 0;
+  
+  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+  APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) {
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    return 0;        // Only analyze instructions.
+  }
+
+  // If there are multiple uses of this value and we aren't at the root, then
+  // we can't do any simplifications of the operands, because DemandedMask
+  // only reflects the bits demanded by *one* of the users.
+  if (Depth != 0 && !I->hasOneUse()) {
+    // Despite the fact that we can't simplify this instruction in all User's
+    // context, we can at least compute the knownzero/knownone bits, and we can
+    // do simplifications that apply to *just* the one user if we know that
+    // this instruction has a simpler value in that context.
+    if (I->getOpcode() == Instruction::And) {
+      // If either the LHS or the RHS are Zero, the result is zero.
+      ComputeMaskedBits(I->getOperand(1), DemandedMask,
+                        RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
+                        LHSKnownZero, LHSKnownOne, Depth+1);
+      
+      // If all of the demanded bits are known 1 on one side, return the other.
+      // These bits cannot contribute to the result of the 'and' in this
+      // context.
+      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+          (DemandedMask & ~LHSKnownZero))
+        return I->getOperand(0);
+      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+          (DemandedMask & ~RHSKnownZero))
+        return I->getOperand(1);
+      
+      // If all of the demanded bits in the inputs are known zeros, return zero.
+      if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+        return Constant::getNullValue(VTy);
+      
+    } else if (I->getOpcode() == Instruction::Or) {
+      // We can simplify (X|Y) -> X or Y in the user's context if we know that
+      // only bits from X or Y are demanded.
+      
+      // If either the LHS or the RHS are One, the result is One.
+      ComputeMaskedBits(I->getOperand(1), DemandedMask, 
+                        RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, 
+                        LHSKnownZero, LHSKnownOne, Depth+1);
+      
+      // If all of the demanded bits are known zero on one side, return the
+      // other.  These bits cannot contribute to the result of the 'or' in this
+      // context.
+      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+          (DemandedMask & ~LHSKnownOne))
+        return I->getOperand(0);
+      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+          (DemandedMask & ~RHSKnownOne))
+        return I->getOperand(1);
+      
+      // If all of the potentially set bits on one side are known to be set on
+      // the other side, just use the 'other' side.
+      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+          (DemandedMask & (~RHSKnownZero)))
+        return I->getOperand(0);
+      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+          (DemandedMask & (~LHSKnownZero)))
+        return I->getOperand(1);
+    }
+    
+    // Compute the KnownZero/KnownOne bits to simplify things downstream.
+    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+    return 0;
+  }
+  
+  // If this is the root being simplified, allow it to have multiple uses,
+  // just set the DemandedMask to all bits so that we can try to simplify the
+  // operands.  This allows visitTruncInst (for example) to simplify the
+  // operand of a trunc without duplicating all the logic below.
+  if (Depth == 0 && !V->hasOneUse())
+    DemandedMask = APInt::getAllOnesValue(BitWidth);
+  
+  switch (I->getOpcode()) {
+  default:
+    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  case Instruction::And:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+
+    // If all of the demanded bits are known 1 on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+        (DemandedMask & ~LHSKnownZero))
+      return I->getOperand(0);
+    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+        (DemandedMask & ~RHSKnownZero))
+      return I->getOperand(1);
+    
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+      return Constant::getNullValue(VTy);
+      
+    // If the RHS is a constant, see if we can simplify it.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+      return I;
+      
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne = RHSKnownOne & LHSKnownOne;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero = RHSKnownZero | LHSKnownZero;
+    break;
+  case Instruction::Or:
+    // If either the LHS or the RHS are One, the result is One.
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+        (DemandedMask & ~LHSKnownOne))
+      return I->getOperand(0);
+    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+        (DemandedMask & ~RHSKnownOne))
+      return I->getOperand(1);
+
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+        (DemandedMask & (~RHSKnownZero)))
+      return I->getOperand(0);
+    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+        (DemandedMask & (~LHSKnownZero)))
+      return I->getOperand(1);
+        
+    // If the RHS is a constant, see if we can simplify it.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+      return I;
+          
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero = RHSKnownZero & LHSKnownZero;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne = RHSKnownOne | LHSKnownOne;
+    break;
+  case Instruction::Xor: {
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((DemandedMask & RHSKnownZero) == DemandedMask)
+      return I->getOperand(0);
+    if ((DemandedMask & LHSKnownZero) == DemandedMask)
+      return I->getOperand(1);
+    
+    // If all of the demanded bits are known to be zero on one side or the
+    // other, turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+      Instruction *Or = 
+        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+                                 I->getName());
+      return InsertNewInstWith(Or, *I);
+    }
+    
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
+      // all known
+      if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+        Constant *AndC = Constant::getIntegerValue(VTy,
+                                                   ~RHSKnownOne & DemandedMask);
+        Instruction *And = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        return InsertNewInstWith(And, *I);
+      }
+    }
+    
+    // If the RHS is a constant, see if we can simplify it.
+    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+      return I;
+    
+    // If our LHS is an 'and' and if it has one use, and if any of the bits we
+    // are flipping are known to be set, then the xor is just resetting those
+    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
+    // simplifying both of them.
+    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+          isa<ConstantInt>(I->getOperand(1)) &&
+          isa<ConstantInt>(LHSInst->getOperand(1)) &&
+          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+        
+        Constant *AndC =
+          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+        Instruction *NewAnd = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        InsertNewInstWith(NewAnd, *I);
+        
+        Constant *XorC =
+          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+        Instruction *NewXor =
+          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+        return InsertNewInstWith(NewXor, *I);
+      }
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
+    break;
+  }
+  case Instruction::Select:
+    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+        ShrinkDemandedConstant(I, 2, DemandedMask))
+      return I;
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne = RHSKnownOne & LHSKnownOne;
+    KnownZero = RHSKnownZero & LHSKnownZero;
+    break;
+  case Instruction::Trunc: {
+    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
+    DemandedMask = DemandedMask.zext(truncBf);
+    KnownZero = KnownZero.zext(truncBf);
+    KnownOne = KnownOne.zext(truncBf);
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    DemandedMask = DemandedMask.trunc(BitWidth);
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    break;
+  }
+  case Instruction::BitCast:
+    if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
+      return 0;  // vector->int or fp->int?
+
+    if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+      if (const VectorType *SrcVTy =
+            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+          // Don't touch a bitcast between vectors of different element counts.
+          return 0;
+      } else
+        // Don't touch a scalar-to-vector bitcast.
+        return 0;
+    } else if (I->getOperand(0)->getType()->isVectorTy())
+      // Don't touch a vector-to-scalar bitcast.
+      return 0;
+
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    break;
+  case Instruction::ZExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+    
+    DemandedMask = DemandedMask.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    DemandedMask = DemandedMask.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    // The top bits are known to be zero.
+    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    break;
+  }
+  case Instruction::SExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+    
+    APInt InputDemandedBits = DemandedMask & 
+                              APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+    APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if ((NewBits & DemandedMask) != 0)
+      InputDemandedBits.setBit(SrcBitWidth-1);
+      
+    InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    InputDemandedBits = InputDemandedBits.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+      
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+
+    // If the input sign bit is known zero, or if the NewBits are not demanded
+    // convert this into a zero extension.
+    if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+      // Convert to ZExt cast
+      CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+      return InsertNewInstWith(NewCast, *I);
+    } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
+      KnownOne |= NewBits;
+    }
+    break;
+  }
+  case Instruction::Add: {
+    // Figure out what the input bits are.  If the top bits of the and result
+    // are not demanded, then the add doesn't demand them from its input
+    // either.
+    unsigned NLZ = DemandedMask.countLeadingZeros();
+      
+    // If there is a constant on the RHS, there are a variety of xformations
+    // we can do.
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // If null, this should be simplified elsewhere.  Some of the xforms here
+      // won't work if the RHS is zero.
+      if (RHS->isZero())
+        break;
+      
+      // If the top bit of the output is demanded, demand everything from the
+      // input.  Otherwise, we demand all the input bits except NLZ top bits.
+      APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
+
+      // Find information about known zero/one bits in the input.
+      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, 
+                               LHSKnownZero, LHSKnownOne, Depth+1))
+        return I;
+
+      // If the RHS of the add has bits set that can't affect the input, reduce
+      // the constant.
+      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+        return I;
+      
+      // Avoid excess work.
+      if (LHSKnownZero == 0 && LHSKnownOne == 0)
+        break;
+      
+      // Turn it into OR if input bits are zero.
+      if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
+        Instruction *Or =
+          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+                                   I->getName());
+        return InsertNewInstWith(Or, *I);
+      }
+      
+      // We can say something about the output known-zero and known-one bits,
+      // depending on potential carries from the input constant and the
+      // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
+      // bits set and the RHS constant is 0x01001, then we know we have a known
+      // one mask of 0x00001 and a known zero mask of 0xE0F0E.
+      
+      // To compute this, we first compute the potential carry bits.  These are
+      // the bits which may be modified.  I'm not aware of a better way to do
+      // this scan.
+      const APInt &RHSVal = RHS->getValue();
+      APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
+      
+      // Now that we know which bits have carries, compute the known-1/0 sets.
+      
+      // Bits are known one if they are known zero in one operand and one in the
+      // other, and there is no input carry.
+      KnownOne = ((LHSKnownZero & RHSVal) | 
+                  (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+      
+      // Bits are known zero if they are known zero in both operands and there
+      // is no input carry.
+      KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+    } else {
+      // If the high-bits of this ADD are not demanded, then it does not demand
+      // the high bits of its LHS or RHS.
+      if (DemandedMask[BitWidth-1] == 0) {
+        // Right fill the mask of bits for this ADD to demand the most
+        // significant bit and all those below it.
+        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+        if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+                                 LHSKnownZero, LHSKnownOne, Depth+1) ||
+            SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return I;
+      }
+    }
+    break;
+  }
+  case Instruction::Sub:
+    // If the high-bits of this SUB are not demanded, then it does not demand
+    // the high bits of its LHS or RHS.
+    if (DemandedMask[BitWidth-1] == 0) {
+      // Right fill the mask of bits for this SUB to demand the most
+      // significant bit and all those below it.
+      uint32_t NLZ = DemandedMask.countLeadingZeros();
+      APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+                               LHSKnownZero, LHSKnownOne, Depth+1) ||
+          SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+                               LHSKnownZero, LHSKnownOne, Depth+1))
+        return I;
+    }
+    // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+    // the known zeros and ones.
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  case Instruction::Shl:
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+      
+      // If the shift is NUW/NSW, then it does demand the high bits.
+      ShlOperator *IOp = cast<ShlOperator>(I);
+      if (IOp->hasNoSignedWrap())
+        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+      else if (IOp->hasNoUnsignedWrap())
+        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
+                               KnownZero, KnownOne, Depth+1))
+        return I;
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      KnownZero <<= ShiftAmt;
+      KnownOne  <<= ShiftAmt;
+      // low bits known zero.
+      if (ShiftAmt)
+        KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+    }
+    break;
+  case Instruction::LShr:
+    // For a logical shift right
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      
+      // Unsigned shift right.
+      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<LShrOperator>(I)->isExact())
+        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+                               KnownZero, KnownOne, Depth+1))
+        return I;
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+      if (ShiftAmt) {
+        // Compute the new bits that are at the top now.
+        APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+        KnownZero |= HighBits;  // high bits known zero.
+      }
+    }
+    break;
+  case Instruction::AShr:
+    // If this is an arithmetic shift right and only the low-bit is set, we can
+    // always convert this into a logical shr, even if the shift amount is
+    // variable.  The low bit of the shift cannot be an input sign bit unless
+    // the shift amount is >= the size of the datatype, which is undefined.
+    if (DemandedMask == 1) {
+      // Perform the logical shift right.
+      Instruction *NewVal = BinaryOperator::CreateLShr(
+                        I->getOperand(0), I->getOperand(1), I->getName());
+      return InsertNewInstWith(NewVal, *I);
+    }    
+
+    // If the sign bit is the only bit demanded by this ashr, then there is no
+    // need to do it, the shift doesn't change the high bit.
+    if (DemandedMask.isSignBit())
+      return I->getOperand(0);
+    
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      
+      // Signed shift right.
+      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      // If any of the "high bits" are demanded, we should set the sign bit as
+      // demanded.
+      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+        DemandedMaskIn.setBit(BitWidth-1);
+      
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<AShrOperator>(I)->isExact())
+        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+                               KnownZero, KnownOne, Depth+1))
+        return I;
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      // Compute the new bits that are at the top now.
+      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+        
+      // Handle the sign bits.
+      APInt SignBit(APInt::getSignBit(BitWidth));
+      // Adjust to where it is now in the mask.
+      SignBit = APIntOps::lshr(SignBit, ShiftAmt);  
+        
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 
+          (HighBits & ~DemandedMask) == HighBits) {
+        // Perform the logical shift right.
+        Instruction *NewVal = BinaryOperator::CreateLShr(
+                          I->getOperand(0), SA, I->getName());
+        return InsertNewInstWith(NewVal, *I);
+      } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+        KnownOne |= HighBits;
+      }
+    }
+    break;
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // X % -1 demands all the bits because we don't want to introduce
+      // INT_MIN % -1 (== undef) by accident.
+      if (Rem->isAllOnesValue())
+        break;
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
+          return I->getOperand(0);
+
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return I;
+
+        // The low bits of LHS are unchanged by the srem.
+        KnownZero = LHSKnownZero & LowBits;
+        KnownOne = LHSKnownOne & LowBits;
+
+        // If LHS is non-negative or has all low bits zero, then the upper bits
+        // are all zero.
+        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If LHS is negative and not all low bits are zero, then the upper bits
+        // are all one.
+        if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+      }
+    }
+
+    // The sign bit is the LHS's sign bit, except when the result of the
+    // remainder is zero.
+    if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+      APInt Mask2 = APInt::getSignBit(BitWidth);
+      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne,
+                        Depth+1);
+      // If it's known zero, our sign bit is also zero.
+      if (LHSKnownZero.isNegative())
+        KnownZero |= LHSKnownZero;
+    }
+    break;
+  case Instruction::URem: {
+    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1))
+      return I;
+
+    unsigned Leaders = KnownZero2.countLeadingOnes();
+    Leaders = std::max(Leaders,
+                       KnownZero2.countLeadingOnes());
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+    break;
+  }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap: {
+        // If the only bits demanded come from one byte of the bswap result,
+        // just shift the input byte into position to eliminate the bswap.
+        unsigned NLZ = DemandedMask.countLeadingZeros();
+        unsigned NTZ = DemandedMask.countTrailingZeros();
+          
+        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
+        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
+        // have 14 leading zeros, round to 8.
+        NLZ &= ~7;
+        NTZ &= ~7;
+        // If we need exactly one byte, we can do this transformation.
+        if (BitWidth-NLZ-NTZ == 8) {
+          unsigned ResultBit = NTZ;
+          unsigned InputBit = BitWidth-NTZ-8;
+          
+          // Replace this with either a left or right shift to get the byte into
+          // the right place.
+          Instruction *NewVal;
+          if (InputBit > ResultBit)
+            NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
+                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+          else
+            NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
+                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+          NewVal->takeName(I);
+          return InsertNewInstWith(NewVal, *I);
+        }
+          
+        // TODO: Could compute known zero/one bits based on the input.
+        break;
+      }
+      case Intrinsic::x86_sse42_crc32_64_8:
+      case Intrinsic::x86_sse42_crc32_64_64:
+        KnownZero = APInt::getHighBitsSet(64, 32);
+        return 0;
+      }
+    }
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+  
+  // If the client is only demanding bits that we know, return the known
+  // constant.
+  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+    return Constant::getIntegerValue(VTy, KnownOne);
+  return 0;
+}
+
+
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
+/// any number of elements. DemandedElts contains the set of elements that are
+/// actually used by the caller.  This method analyzes which elements of the
+/// operand are undef and returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned.  This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+                                                APInt &UndefElts,
+                                                unsigned Depth) {
+  unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+  APInt EltMask(APInt::getAllOnesValue(VWidth));
+  assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+  if (isa<UndefValue>(V)) {
+    // If the entire vector is undefined, just return this info.
+    UndefElts = EltMask;
+    return 0;
+  }
+  
+  if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+    UndefElts = EltMask;
+    return UndefValue::get(V->getType());
+  }
+
+  UndefElts = 0;
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+    Constant *Undef = UndefValue::get(EltTy);
+
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0; i != VWidth; ++i)
+      if (!DemandedElts[i]) {   // If not demanded, set to undef.
+        Elts.push_back(Undef);
+        UndefElts.setBit(i);
+      } else if (isa<UndefValue>(CV->getOperand(i))) {   // Already undef.
+        Elts.push_back(Undef);
+        UndefElts.setBit(i);
+      } else {                               // Otherwise, defined.
+        Elts.push_back(CV->getOperand(i));
+      }
+
+    // If we changed the constant, return it.
+    Constant *NewCP = ConstantVector::get(Elts);
+    return NewCP != CV ? NewCP : 0;
+  }
+  
+  if (isa<ConstantAggregateZero>(V)) {
+    // Simplify the CAZ to a ConstantVector where the non-demanded elements are
+    // set to undef.
+    
+    // Check if this is identity. If so, return 0 since we are not simplifying
+    // anything.
+    if (DemandedElts.isAllOnesValue())
+      return 0;
+    
+    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+    Constant *Zero = Constant::getNullValue(EltTy);
+    Constant *Undef = UndefValue::get(EltTy);
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0; i != VWidth; ++i) {
+      Constant *Elt = DemandedElts[i] ? Zero : Undef;
+      Elts.push_back(Elt);
+    }
+    UndefElts = DemandedElts ^ EltMask;
+    return ConstantVector::get(Elts);
+  }
+  
+  // Limit search depth.
+  if (Depth == 10)
+    return 0;
+
+  // If multiple users are using the root value, proceed with
+  // simplification conservatively assuming that all elements
+  // are needed.
+  if (!V->hasOneUse()) {
+    // Quit if we find multiple users of a non-root value though.
+    // They'll be handled when it's their turn to be visited by
+    // the main instcombine process.
+    if (Depth != 0)
+      // TODO: Just compute the UndefElts information recursively.
+      return 0;
+
+    // Conservatively assume that all elements are needed.
+    DemandedElts = EltMask;
+  }
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return 0;        // Only analyze instructions.
+  
+  bool MadeChange = false;
+  APInt UndefElts2(VWidth, 0);
+  Value *TmpV;
+  switch (I->getOpcode()) {
+  default: break;
+    
+  case Instruction::InsertElement: {
+    // If this is a variable index, we don't know which element it overwrites.
+    // demand exactly the same input as we produce.
+    ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+    if (Idx == 0) {
+      // Note that we can't propagate undef elt info, because we don't know
+      // which elt is getting updated.
+      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+                                        UndefElts2, Depth+1);
+      if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+      break;
+    }
+    
+    // If this is inserting an element that isn't demanded, remove this
+    // insertelement.
+    unsigned IdxNo = Idx->getZExtValue();
+    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+      Worklist.Add(I);
+      return I->getOperand(0);
+    }
+    
+    // Otherwise, the element inserted overwrites whatever was there, so the
+    // input demanded set is simpler than the output set.
+    APInt DemandedElts2 = DemandedElts;
+    DemandedElts2.clearBit(IdxNo);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+    // The inserted element is defined.
+    UndefElts.clearBit(IdxNo);
+    break;
+  }
+  case Instruction::ShuffleVector: {
+    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+    uint64_t LHSVWidth =
+      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+    APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+    for (unsigned i = 0; i < VWidth; i++) {
+      if (DemandedElts[i]) {
+        unsigned MaskVal = Shuffle->getMaskValue(i);
+        if (MaskVal != -1u) {
+          assert(MaskVal < LHSVWidth * 2 &&
+                 "shufflevector mask index out of range!");
+          if (MaskVal < LHSVWidth)
+            LeftDemanded.setBit(MaskVal);
+          else
+            RightDemanded.setBit(MaskVal - LHSVWidth);
+        }
+      }
+    }
+
+    APInt UndefElts4(LHSVWidth, 0);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+                                      UndefElts4, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+    APInt UndefElts3(LHSVWidth, 0);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+                                      UndefElts3, Depth+1);
+    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+    bool NewUndefElts = false;
+    for (unsigned i = 0; i < VWidth; i++) {
+      unsigned MaskVal = Shuffle->getMaskValue(i);
+      if (MaskVal == -1u) {
+        UndefElts.setBit(i);
+      } else if (MaskVal < LHSVWidth) {
+        if (UndefElts4[MaskVal]) {
+          NewUndefElts = true;
+          UndefElts.setBit(i);
+        }
+      } else {
+        if (UndefElts3[MaskVal - LHSVWidth]) {
+          NewUndefElts = true;
+          UndefElts.setBit(i);
+        }
+      }
+    }
+
+    if (NewUndefElts) {
+      // Add additional discovered undefs.
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0; i < VWidth; ++i) {
+        if (UndefElts[i])
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+        else
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                          Shuffle->getMaskValue(i)));
+      }
+      I->setOperand(2, ConstantVector::get(Elts));
+      MadeChange = true;
+    }
+    break;
+  }
+  case Instruction::BitCast: {
+    // Vector->vector casts only.
+    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+    if (!VTy) break;
+    unsigned InVWidth = VTy->getNumElements();
+    APInt InputDemandedElts(InVWidth, 0);
+    unsigned Ratio;
+
+    if (VWidth == InVWidth) {
+      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+      // elements as are demanded of us.
+      Ratio = 1;
+      InputDemandedElts = DemandedElts;
+    } else if (VWidth > InVWidth) {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the result than there are in the source,
+      // then an input element is live if any of the corresponding output
+      // elements are live.
+      Ratio = VWidth/InVWidth;
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+        if (DemandedElts[OutIdx])
+          InputDemandedElts.setBit(OutIdx/Ratio);
+      }
+    } else {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the source than there are in the result,
+      // then an input element is live if the corresponding output element is
+      // live.
+      Ratio = InVWidth/VWidth;
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (DemandedElts[InIdx/Ratio])
+          InputDemandedElts.setBit(InIdx);
+    }
+    
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) {
+      I->setOperand(0, TmpV);
+      MadeChange = true;
+    }
+    
+    UndefElts = UndefElts2;
+    if (VWidth > InVWidth) {
+      llvm_unreachable("Unimp");
+      // If there are more elements in the result than there are in the source,
+      // then an output element is undef if the corresponding input element is
+      // undef.
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+        if (UndefElts2[OutIdx/Ratio])
+          UndefElts.setBit(OutIdx);
+    } else if (VWidth < InVWidth) {
+      llvm_unreachable("Unimp");
+      // If there are more elements in the source than there are in the result,
+      // then a result element is undef if all of the corresponding input
+      // elements are undef.
+      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (!UndefElts2[InIdx])            // Not undef?
+          UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
+    }
+    break;
+  }
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+      
+    // Output elements are undefined if both are undefined.  Consider things
+    // like undef&0.  The result is known zero, not undef.
+    UndefElts &= UndefElts2;
+    break;
+    
+  case Instruction::Call: {
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+    if (!II) break;
+    switch (II->getIntrinsicID()) {
+    default: break;
+      
+    // Binary vector operations that work column-wise.  A dest element is a
+    // function of the corresponding input elements from the two inputs.
+    case Intrinsic::x86_sse_sub_ss:
+    case Intrinsic::x86_sse_mul_ss:
+    case Intrinsic::x86_sse_min_ss:
+    case Intrinsic::x86_sse_max_ss:
+    case Intrinsic::x86_sse2_sub_sd:
+    case Intrinsic::x86_sse2_mul_sd:
+    case Intrinsic::x86_sse2_min_sd:
+    case Intrinsic::x86_sse2_max_sd:
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+                                        UndefElts, Depth+1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+                                        UndefElts2, Depth+1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+      // If only the low elt is demanded and this is a scalarizable intrinsic,
+      // scalarize it now.
+      if (DemandedElts == 1) {
+        switch (II->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::x86_sse_sub_ss:
+        case Intrinsic::x86_sse_mul_ss:
+        case Intrinsic::x86_sse2_sub_sd:
+        case Intrinsic::x86_sse2_mul_sd:
+          // TODO: Lower MIN/MAX/ABS/etc
+          Value *LHS = II->getArgOperand(0);
+          Value *RHS = II->getArgOperand(1);
+          // Extract the element as scalars.
+          LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, 
+            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+          RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
+            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+          
+          switch (II->getIntrinsicID()) {
+          default: llvm_unreachable("Case stmts out of sync!");
+          case Intrinsic::x86_sse_sub_ss:
+          case Intrinsic::x86_sse2_sub_sd:
+            TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
+                                                        II->getName()), *II);
+            break;
+          case Intrinsic::x86_sse_mul_ss:
+          case Intrinsic::x86_sse2_mul_sd:
+            TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
+                                                         II->getName()), *II);
+            break;
+          }
+          
+          Instruction *New =
+            InsertElementInst::Create(
+              UndefValue::get(II->getType()), TmpV,
+              ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
+                                      II->getName());
+          InsertNewInstWith(New, *II);
+          return New;
+        }            
+      }
+        
+      // Output elements are undefined if both are undefined.  Consider things
+      // like undef&0.  The result is known zero, not undef.
+      UndefElts &= UndefElts2;
+      break;
+    }
+    break;
+  }
+  }
+  return MadeChange ? I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
new file mode 100644
index 000000000000..ad6a8d054ee7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -0,0 +1,575 @@
+//===- InstCombineVectorOps.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements instcombine for ExtractElement, InsertElement and
+// ShuffleVector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+using namespace llvm;
+
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+  if (isa<ConstantAggregateZero>(V))
+    return true;
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+    if (isConstant) return true;
+    // If all elts are the same, we can extract.
+    Constant *Op0 = C->getOperand(0);
+    for (unsigned i = 1; i < C->getNumOperands(); ++i)
+      if (C->getOperand(i) != Op0)
+        return false;
+    return true;
+  }
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+
+  // Insert element gets simplified to the inserted element or is deleted if
+  // this is constant idx extract element and its a constant idx insertelt.
+  if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+      isa<ConstantInt>(I->getOperand(2)))
+    return true;
+  if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+    return true;
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+    if (BO->hasOneUse() &&
+        (CheapToScalarize(BO->getOperand(0), isConstant) ||
+         CheapToScalarize(BO->getOperand(1), isConstant)))
+      return true;
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    if (CI->hasOneUse() &&
+        (CheapToScalarize(CI->getOperand(0), isConstant) ||
+         CheapToScalarize(CI->getOperand(1), isConstant)))
+      return true;
+
+  return false;
+}
+
+/// getShuffleMask - Read and decode a shufflevector mask.
+/// Turn undef elements into negative values.
+static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
+  unsigned NElts = SVI->getType()->getNumElements();
+  if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
+    return std::vector<int>(NElts, 0);
+  if (isa<UndefValue>(SVI->getOperand(2)))
+    return std::vector<int>(NElts, -1);
+
+  std::vector<int> Result;
+  const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
+  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+    if (isa<UndefValue>(*i))
+      Result.push_back(-1);  // undef
+    else
+      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
+  return Result;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+  const VectorType *PTy = cast<VectorType>(V->getType());
+  unsigned Width = PTy->getNumElements();
+  if (EltNo >= Width)  // Out of range access.
+    return UndefValue::get(PTy->getElementType());
+
+  if (isa<UndefValue>(V))
+    return UndefValue::get(PTy->getElementType());
+  if (isa<ConstantAggregateZero>(V))
+    return Constant::getNullValue(PTy->getElementType());
+  if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
+    return CP->getOperand(EltNo);
+
+  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert to a variable element, we don't know what it is.
+    if (!isa<ConstantInt>(III->getOperand(2)))
+      return 0;
+    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+    // If this is an insert to the element we are looking for, return the
+    // inserted value.
+    if (EltNo == IIElt)
+      return III->getOperand(1);
+
+    // Otherwise, the insertelement doesn't modify the value, recurse on its
+    // vector input.
+    return FindScalarElement(III->getOperand(0), EltNo);
+  }
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+    unsigned LHSWidth =
+      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+    int InEl = getShuffleMask(SVI)[EltNo];
+    if (InEl < 0)
+      return UndefValue::get(PTy->getElementType());
+    if (InEl < (int)LHSWidth)
+      return FindScalarElement(SVI->getOperand(0), InEl);
+    return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+  }
+
+  // Otherwise, we don't know.
+  return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+  // If vector val is undef, replace extract with scalar undef.
+  if (isa<UndefValue>(EI.getOperand(0)))
+    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+  // If vector val is constant 0, replace extract with scalar 0.
+  if (isa<ConstantAggregateZero>(EI.getOperand(0)))
+    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
+    // If vector val is constant with all elements the same, replace EI with
+    // that element. When the elements are not identical, we cannot replace yet
+    // (we do that below, but only when the index is constant).
+    Constant *op0 = C->getOperand(0);
+    for (unsigned i = 1; i != C->getNumOperands(); ++i)
+      if (C->getOperand(i) != op0) {
+        op0 = 0;
+        break;
+      }
+    if (op0)
+      return ReplaceInstUsesWith(EI, op0);
+  }
+
+  // If extracting a specified index from the vector, see if we can recursively
+  // find a previously computed scalar that was inserted into the vector.
+  if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+    unsigned IndexVal = IdxC->getZExtValue();
+    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+
+    // If this is extracting an invalid index, turn this into undef, to avoid
+    // crashing the code below.
+    if (IndexVal >= VectorWidth)
+      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+    // This instruction only demands the single element from the input vector.
+    // If the input vector has a single use, simplify it based on this use
+    // property.
+    if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+      APInt UndefElts(VectorWidth, 0);
+      APInt DemandedMask(VectorWidth, 0);
+      DemandedMask.setBit(IndexVal);
+      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
+                                                DemandedMask, UndefElts)) {
+        EI.setOperand(0, V);
+        return &EI;
+      }
+    }
+
+    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+      return ReplaceInstUsesWith(EI, Elt);
+
+    // If the this extractelement is directly using a bitcast from a vector of
+    // the same number of elements, see if we can find the source element from
+    // it.  In this case, we will end up needing to bitcast the scalars.
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+      if (const VectorType *VT =
+          dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+        if (VT->getNumElements() == VectorWidth)
+          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+            return new BitCastInst(Elt, EI.getType());
+    }
+  }
+
+  if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+    // Push extractelement into predecessor operation if legal and
+    // profitable to do so
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+      if (I->hasOneUse() &&
+          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+        Value *newEI0 =
+          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                        EI.getName()+".lhs");
+        Value *newEI1 =
+          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                        EI.getName()+".rhs");
+        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+      }
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+      // Extracting the inserted element?
+      if (IE->getOperand(2) == EI.getOperand(1))
+        return ReplaceInstUsesWith(EI, IE->getOperand(1));
+      // If the inserted and extracted elements are constants, they must not
+      // be the same value, extract from the pre-inserted value instead.
+      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+        Worklist.AddValue(EI.getOperand(0));
+        EI.setOperand(0, IE->getOperand(0));
+        return &EI;
+      }
+    } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+      // If this is extracting an element from a shufflevector, figure out where
+      // it came from and extract from the appropriate input element instead.
+      if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+        int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+        Value *Src;
+        unsigned LHSWidth =
+          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+        if (SrcIdx < 0)
+          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+        if (SrcIdx < (int)LHSWidth)
+          Src = SVI->getOperand(0);
+        else {
+          SrcIdx -= LHSWidth;
+          Src = SVI->getOperand(1);
+        }
+        const Type *Int32Ty = Type::getInt32Ty(EI.getContext());
+        return ExtractElementInst::Create(Src,
+                                          ConstantInt::get(Int32Ty,
+                                                           SrcIdx, false));
+      }
+    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+      // Canonicalize extractelement(cast) -> cast(extractelement)
+      // bitcasts can change the number of vector elements and they cost nothing
+      if (CI->hasOneUse() && EI.hasOneUse() &&
+          (CI->getOpcode() != Instruction::BitCast)) {
+        Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
+                                                  EI.getIndexOperand());
+        return CastInst::Create(CI->getOpcode(), EE, EI.getType());
+      }
+    }
+  }
+  return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+                                         std::vector<Constant*> &Mask) {
+  assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+         "Invalid CollectSingleShuffleElements");
+  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+    return true;
+  }
+
+  if (V == LHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+    return true;
+  }
+
+  if (V == RHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                                      i+NumElts));
+    return true;
+  }
+
+  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+
+    if (!isa<ConstantInt>(IdxOp))
+      return false;
+    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+    if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
+      // Okay, we can handle this if the vector we are insertinting into is
+      // transitively ok.
+      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+        // If so, update the mask to reflect the inserted undef.
+        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
+        return true;
+      }
+    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+      if (isa<ConstantInt>(EI->getOperand(1)) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+
+        // This must be extracting from either LHS or RHS.
+        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+          // Okay, we can handle this if the vector we are insertinting into is
+          // transitively ok.
+          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+            // If so, update the mask to reflect the inserted value.
+            if (EI->getOperand(0) == LHS) {
+              Mask[InsertedIdx % NumElts] =
+              ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                               ExtractedIdx);
+            } else {
+              assert(EI->getOperand(0) == RHS);
+              Mask[InsertedIdx % NumElts] =
+              ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                               ExtractedIdx+NumElts);
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+
+  return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+                                     Value *&RHS) {
+  assert(V->getType()->isVectorTy() &&
+         (RHS == 0 || V->getType() == RHS->getType()) &&
+         "Invalid shuffle!");
+  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+    return V;
+  } else if (isa<ConstantAggregateZero>(V)) {
+    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
+    return V;
+  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+
+    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+        // Either the extracted from or inserted into vector must be RHSVec,
+        // otherwise we'd end up with a shuffle of three inputs.
+        if (EI->getOperand(0) == RHS || RHS == 0) {
+          RHS = EI->getOperand(0);
+          Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+          Mask[InsertedIdx % NumElts] =
+            ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                             NumElts+ExtractedIdx);
+          return V;
+        }
+
+        if (VecOp == RHS) {
+          Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          // Everything but the extracted element is replaced with the RHS.
+          for (unsigned i = 0; i != NumElts; ++i) {
+            if (i != InsertedIdx)
+              Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                                         NumElts+i);
+          }
+          return V;
+        }
+
+        // If this insertelement is a chain that comes from exactly these two
+        // vectors, return the vector and the effective shuffle.
+        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+          return EI->getOperand(0);
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+
+  // Otherwise, can't do anything fancy.  Return an identity vector.
+  for (unsigned i = 0; i != NumElts; ++i)
+    Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+  return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+  Value *VecOp    = IE.getOperand(0);
+  Value *ScalarOp = IE.getOperand(1);
+  Value *IdxOp    = IE.getOperand(2);
+
+  // Inserting an undef or into an undefined place, remove this.
+  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+    ReplaceInstUsesWith(IE, VecOp);
+
+  // If the inserted element was extracted from some other vector, and if the
+  // indexes are constant, try to turn this into a shufflevector operation.
+  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+        EI->getOperand(0)->getType() == IE.getType()) {
+      unsigned NumVectorElts = IE.getType()->getNumElements();
+      unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+      if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+        return ReplaceInstUsesWith(IE, VecOp);
+
+      if (InsertedIdx >= NumVectorElts)  // Out of range insert.
+        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+      // If we are extracting a value from a vector, then inserting it right
+      // back into the same place, just use the input vector.
+      if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+        return ReplaceInstUsesWith(IE, VecOp);
+
+      // If this insertelement isn't used by some other insertelement, turn it
+      // (and any insertelements it points to), into one big shuffle.
+      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+        std::vector<Constant*> Mask;
+        Value *RHS = 0;
+        Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+        // We now have a shuffle of LHS, RHS, Mask.
+        return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+      }
+    }
+  }
+
+  unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+  APInt UndefElts(VWidth, 0);
+  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+    if (V != &IE)
+      return ReplaceInstUsesWith(IE, V);
+    return &IE;
+  }
+
+  return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+  Value *LHS = SVI.getOperand(0);
+  Value *RHS = SVI.getOperand(1);
+  std::vector<int> Mask = getShuffleMask(&SVI);
+
+  bool MadeChange = false;
+
+  // Undefined shuffle mask -> undefined value.
+  if (isa<UndefValue>(SVI.getOperand(2)))
+    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+  if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
+    return 0;
+
+  APInt UndefElts(VWidth, 0);
+  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+    if (V != &SVI)
+      return ReplaceInstUsesWith(SVI, V);
+    LHS = SVI.getOperand(0);
+    RHS = SVI.getOperand(1);
+    MadeChange = true;
+  }
+
+  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
+  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+  if (LHS == RHS || isa<UndefValue>(LHS)) {
+    if (isa<UndefValue>(LHS) && LHS == RHS) {
+      // shuffle(undef,undef,mask) -> undef.
+      return ReplaceInstUsesWith(SVI, LHS);
+    }
+
+    // Remap any references to RHS to use LHS.
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+      if (Mask[i] < 0)
+        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+      else {
+        if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+            (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
+          Mask[i] = -1;     // Turn into undef.
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+        } else {
+          Mask[i] = Mask[i] % e;  // Force to LHS.
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+                                          Mask[i]));
+        }
+      }
+    }
+    SVI.setOperand(0, SVI.getOperand(1));
+    SVI.setOperand(1, UndefValue::get(RHS->getType()));
+    SVI.setOperand(2, ConstantVector::get(Elts));
+    LHS = SVI.getOperand(0);
+    RHS = SVI.getOperand(1);
+    MadeChange = true;
+  }
+
+  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+  bool isLHSID = true, isRHSID = true;
+
+  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+    if (Mask[i] < 0) continue;  // Ignore undef values.
+    // Is this an identity shuffle of the LHS value?
+    isLHSID &= (Mask[i] == (int)i);
+
+    // Is this an identity shuffle of the RHS value?
+    isRHSID &= (Mask[i]-e == i);
+  }
+
+  // Eliminate identity shuffles.
+  if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+  if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+
+  // If the LHS is a shufflevector itself, see if we can combine it with this
+  // one without producing an unusual shuffle.  Here we are really conservative:
+  // we are absolutely afraid of producing a shuffle mask not in the input
+  // program, because the code gen may not be smart enough to turn a merged
+  // shuffle into two specific shuffles: it may produce worse code.  As such,
+  // we only merge two shuffles if the result is either a splat or one of the
+  // two input shuffle masks.  In this case, merging the shuffles just removes
+  // one instruction, which we know is safe.  This is good for things like
+  // turning: (splat(splat)) -> splat.
+  if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+    if (isa<UndefValue>(RHS)) {
+      std::vector<int> LHSMask = getShuffleMask(LHSSVI);
+
+      if (LHSMask.size() == Mask.size()) {
+        std::vector<int> NewMask;
+        bool isSplat = true;
+        int SplatElt = -1; // undef
+        for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+          int MaskElt;
+          if (Mask[i] < 0 || Mask[i] >= (int)e)
+            MaskElt = -1; // undef
+          else
+            MaskElt = LHSMask[Mask[i]];
+          // Check if this could still be a splat.
+          if (MaskElt >= 0) {
+            if (SplatElt >=0 && SplatElt != MaskElt)
+              isSplat = false;
+            SplatElt = MaskElt;
+          }
+          NewMask.push_back(MaskElt);
+        }
+
+        // If the result mask is equal to the src shuffle or this
+        // shuffle mask, do the replacement.
+        if (isSplat || NewMask == LHSMask || NewMask == Mask) {
+          std::vector<Constant*> Elts;
+          const Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+            if (NewMask[i] < 0) {
+              Elts.push_back(UndefValue::get(Int32Ty));
+            } else {
+              Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
+            }
+          }
+          return new ShuffleVectorInst(LHSSVI->getOperand(0),
+                                       LHSSVI->getOperand(1),
+                                       ConstantVector::get(Elts));
+        }
+      }
+    }
+  }
+
+  return MadeChange ? &SVI : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
new file mode 100644
index 000000000000..32009c39ec25
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -0,0 +1,106 @@
+//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_WORKLIST_H
+#define INSTCOMBINE_WORKLIST_H
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  
+/// InstCombineWorklist - This is the worklist management logic for
+/// InstCombine.
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
+  SmallVector<Instruction*, 256> Worklist;
+  DenseMap<Instruction*, unsigned> WorklistMap;
+  
+  void operator=(const InstCombineWorklist&RHS);   // DO NOT IMPLEMENT
+  InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
+public:
+  InstCombineWorklist() {}
+  
+  bool isEmpty() const { return Worklist.empty(); }
+  
+  /// Add - Add the specified instruction to the worklist if it isn't already
+  /// in it.
+  void Add(Instruction *I) {
+    if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+      DEBUG(errs() << "IC: ADD: " << *I << '\n');
+      Worklist.push_back(I);
+    }
+  }
+  
+  void AddValue(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      Add(I);
+  }
+  
+  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+  /// which should only be done when the worklist is empty and when the group
+  /// has no duplicates.
+  void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+    assert(Worklist.empty() && "Worklist must be empty to add initial group");
+    Worklist.reserve(NumEntries+16);
+    WorklistMap.resize(NumEntries);
+    DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+    for (; NumEntries; --NumEntries) {
+      Instruction *I = List[NumEntries-1];
+      WorklistMap.insert(std::make_pair(I, Worklist.size()));
+      Worklist.push_back(I);
+    }
+  }
+  
+  // Remove - remove I from the worklist if it exists.
+  void Remove(Instruction *I) {
+    DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+    if (It == WorklistMap.end()) return; // Not in worklist.
+    
+    // Don't bother moving everything down, just null out the slot.
+    Worklist[It->second] = 0;
+    
+    WorklistMap.erase(It);
+  }
+  
+  Instruction *RemoveOne() {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+    WorklistMap.erase(I);
+    return I;
+  }
+  
+  /// AddUsersToWorkList - When an instruction is simplified, add all users of
+  /// the instruction to the work lists because they might get more simplified
+  /// now.
+  ///
+  void AddUsersToWorkList(Instruction &I) {
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+         UI != UE; ++UI)
+      Add(cast<Instruction>(*UI));
+  }
+  
+  
+  /// Zap - check that the worklist is empty and nuke the backing store for
+  /// the map if it is large.
+  void Zap() {
+    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+    
+    // Do an explicit clear, this shrinks the map if needed.
+    WorklistMap.clear();
+  }
+};
+  
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
new file mode 100644
index 000000000000..ab98ef9fccf8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -0,0 +1,1677 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions.  This pass does not modify the CFG.  This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+//    %Y = add i32 %X, 1
+//    %Z = add i32 %Y, 1
+// into:
+//    %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+//    1. If a binary operator has a constant operand, it is moved to the RHS
+//    2. Bitwise operators with constant operands are always grouped so that
+//       shifts are performed first, then or's, then and's, then xor's.
+//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+//    4. All cmp instructions on boolean values are replaced with logical ops
+//    5. add X, X is represented as (X*2) => (X << 1)
+//    6. Multiplies with a power-of-two constant argument are transformed into
+//       shifts.
+//   ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Scalar.h"
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
+#include <algorithm>
+#include <climits>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand,    "Number of expansions");
+STATISTIC(NumFactor   , "Number of factorizations");
+STATISTIC(NumReassoc  , "Number of reassociations");
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+  initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+  initializeInstCombine(*unwrap(R));
+}
+
+char InstCombiner::ID = 0;
+INITIALIZE_PASS(InstCombiner, "instcombine",
+                "Combine redundant instructions", false, false)
+
+void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
+  assert(From->isIntegerTy() && To->isIntegerTy());
+  
+  // If we don't have TD, we don't know if the source/dest are legal.
+  if (!TD) return false;
+  
+  unsigned FromWidth = From->getPrimitiveSizeInBits();
+  unsigned ToWidth = To->getPrimitiveSizeInBits();
+  bool FromLegal = TD->isLegalInteger(FromWidth);
+  bool ToLegal = TD->isLegalInteger(ToWidth);
+  
+  // If this is a legal integer from type, and the result would be an illegal
+  // type, don't do the transformation.
+  if (FromLegal && !ToLegal)
+    return false;
+  
+  // Otherwise, if both are illegal, do not increase the size of the result. We
+  // do allow things like i160 -> i64, but not i64 -> i160.
+  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+    return false;
+  
+  return true;
+}
+
+
+/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+/// operators which are associative or commutative:
+//
+//  Commutative operators:
+//
+//  1. Order operands such that they are listed from right (least complex) to
+//     left (most complex).  This puts constants before unary operators before
+//     binary operators.
+//
+//  Associative operators:
+//
+//  2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+//  3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+//
+//  Associative and commutative operators:
+//
+//  4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+//  5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+//  6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+//     if C1 and C2 are constants.
+//
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+  Instruction::BinaryOps Opcode = I.getOpcode();
+  bool Changed = false;
+
+  do {
+    // Order operands such that they are listed from right (least complex) to
+    // left (most complex).  This puts constants before unary operators before
+    // binary operators.
+    if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+        getComplexity(I.getOperand(1)))
+      Changed = !I.swapOperands();
+
+    BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+    BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+    if (I.isAssociative()) {
+      // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+      if (Op0 && Op0->getOpcode() == Opcode) {
+        Value *A = Op0->getOperand(0);
+        Value *B = Op0->getOperand(1);
+        Value *C = I.getOperand(1);
+
+        // Does "B op C" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+          // It simplifies to V.  Form "A op V".
+          I.setOperand(0, A);
+          I.setOperand(1, V);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+      if (Op1 && Op1->getOpcode() == Opcode) {
+        Value *A = I.getOperand(0);
+        Value *B = Op1->getOperand(0);
+        Value *C = Op1->getOperand(1);
+
+        // Does "A op B" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+          // It simplifies to V.  Form "V op C".
+          I.setOperand(0, V);
+          I.setOperand(1, C);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+    }
+
+    if (I.isAssociative() && I.isCommutative()) {
+      // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+      if (Op0 && Op0->getOpcode() == Opcode) {
+        Value *A = Op0->getOperand(0);
+        Value *B = Op0->getOperand(1);
+        Value *C = I.getOperand(1);
+
+        // Does "C op A" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+          // It simplifies to V.  Form "V op B".
+          I.setOperand(0, V);
+          I.setOperand(1, B);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+      if (Op1 && Op1->getOpcode() == Opcode) {
+        Value *A = I.getOperand(0);
+        Value *B = Op1->getOperand(0);
+        Value *C = Op1->getOperand(1);
+
+        // Does "C op A" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+          // It simplifies to V.  Form "B op V".
+          I.setOperand(0, B);
+          I.setOperand(1, V);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+      // if C1 and C2 are constants.
+      if (Op0 && Op1 &&
+          Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+          isa<Constant>(Op0->getOperand(1)) &&
+          isa<Constant>(Op1->getOperand(1)) &&
+          Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *A = Op0->getOperand(0);
+        Constant *C1 = cast<Constant>(Op0->getOperand(1));
+        Value *B = Op1->getOperand(0);
+        Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+        Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+        Instruction *New = BinaryOperator::Create(Opcode, A, B);
+        InsertNewInstWith(New, I);
+        New->takeName(Op1);
+        I.setOperand(0, New);
+        I.setOperand(1, Folded);
+        // Conservatively clear the optional flags, since they may not be
+        // preserved by the reassociation.
+        I.clearSubclassOptionalData();
+        Changed = true;
+        continue;
+      }
+    }
+
+    // No further simplifications.
+    return Changed;
+  } while (1);
+}
+
+/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+                                     Instruction::BinaryOps ROp) {
+  switch (LOp) {
+  default:
+    return false;
+
+  case Instruction::And:
+    // And distributes over Or and Xor.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::Or:
+    case Instruction::Xor:
+      return true;
+    }
+
+  case Instruction::Mul:
+    // Multiplication distributes over addition and subtraction.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::Add:
+    case Instruction::Sub:
+      return true;
+    }
+
+  case Instruction::Or:
+    // Or distributes over And.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::And:
+      return true;
+    }
+  }
+}
+
+/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+                                     Instruction::BinaryOps ROp) {
+  if (Instruction::isCommutative(ROp))
+    return LeftDistributesOverRight(ROp, LOp);
+  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+  // but this requires knowing that the addition does not overflow and other
+  // such subtleties.
+  return false;
+}
+
+/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+/// which some other binary operation distributes over either by factorizing
+/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+/// a win).  Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
+
+  // Factorization.
+  if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
+    // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
+    // a common term.
+    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+    Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+    // Does "X op' Y" always equal "Y op' X"?
+    bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+    // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+    if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+      // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+      // commutative case, "(A op' B) op (C op' A)"?
+      if (A == C || (InnerCommutative && A == D)) {
+        if (A != C)
+          std::swap(C, D);
+        // Consider forming "A op' (B op D)".
+        // If "B op D" simplifies then it can be formed with no cost.
+        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+        // If "B op D" doesn't simplify then only go on if both of the existing
+        // operations "A op' B" and "C op' D" will be zapped as no longer used.
+        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+          V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
+        if (V) {
+          ++NumFactor;
+          V = Builder->CreateBinOp(InnerOpcode, A, V);
+          V->takeName(&I);
+          return V;
+        }
+      }
+
+    // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+    if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+      // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+      // commutative case, "(A op' B) op (B op' D)"?
+      if (B == D || (InnerCommutative && B == C)) {
+        if (B != D)
+          std::swap(C, D);
+        // Consider forming "(A op C) op' B".
+        // If "A op C" simplifies then it can be formed with no cost.
+        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+        // If "A op C" doesn't simplify then only go on if both of the existing
+        // operations "A op' B" and "C op' D" will be zapped as no longer used.
+        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+          V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
+        if (V) {
+          ++NumFactor;
+          V = Builder->CreateBinOp(InnerOpcode, V, B);
+          V->takeName(&I);
+          return V;
+        }
+      }
+  }
+
+  // Expansion.
+  if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+    // The instruction has the form "(A op' B) op C".  See if expanding it out
+    // to "(A op C) op' (B op C)" results in simplifications.
+    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+    // Do "A op C" and "B op C" both simplify?
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+        // They do! Return "L op' R".
+        ++NumExpand;
+        // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+        if ((L == A && R == B) ||
+            (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+          return Op0;
+        // Otherwise return "L op' R" if it simplifies.
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+          return V;
+        // Otherwise, create a new instruction.
+        C = Builder->CreateBinOp(InnerOpcode, L, R);
+        C->takeName(&I);
+        return C;
+      }
+  }
+
+  if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+    // The instruction has the form "A op (B op' C)".  See if expanding it out
+    // to "(A op B) op' (A op C)" results in simplifications.
+    Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+    Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+    // Do "A op B" and "A op C" both simplify?
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+        // They do! Return "L op' R".
+        ++NumExpand;
+        // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+        if ((L == B && R == C) ||
+            (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+          return Op1;
+        // Otherwise return "L op' R" if it simplifies.
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+          return V;
+        // Otherwise, create a new instruction.
+        A = Builder->CreateBinOp(InnerOpcode, L, R);
+        A->takeName(&I);
+        return A;
+      }
+  }
+
+  return 0;
+}
+
+// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
+// if the LHS is a constant zero (which is the 'negate' form).
+//
+Value *InstCombiner::dyn_castNegVal(Value *V) const {
+  if (BinaryOperator::isNeg(V))
+    return BinaryOperator::getNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return ConstantExpr::getNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isIntegerTy())
+      return ConstantExpr::getNeg(C);
+
+  return 0;
+}
+
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+Value *InstCombiner::dyn_castFNegVal(Value *V) const {
+  if (BinaryOperator::isFNeg(V))
+    return BinaryOperator::getFNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+    return ConstantExpr::getFNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isFloatingPointTy())
+      return ConstantExpr::getFNeg(C);
+
+  return 0;
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+                                             InstCombiner *IC) {
+  if (CastInst *CI = dyn_cast<CastInst>(&I)) {
+    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+  }
+
+  // Figure out if the constant is the left or the right argument.
+  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+  if (Constant *SOC = dyn_cast<Constant>(SO)) {
+    if (ConstIsRHS)
+      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+  }
+
+  Value *Op0 = SO, *Op1 = ConstOperand;
+  if (!ConstIsRHS)
+    std::swap(Op0, Op1);
+  
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+                                    SO->getName()+".op");
+  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  llvm_unreachable("Unknown binary instruction type!");
+}
+
+// FoldOpIntoSelect - Given an instruction with a select as one operand and a
+// constant as the other operand, try to fold the binary operator into the
+// select arguments.  This also works for Cast instructions, which obviously do
+// not have a second operand.
+Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+  // Don't modify shared select instructions
+  if (!SI->hasOneUse()) return 0;
+  Value *TV = SI->getOperand(1);
+  Value *FV = SI->getOperand(2);
+
+  if (isa<Constant>(TV) || isa<Constant>(FV)) {
+    // Bool selects with constant operands can be folded to logical ops.
+    if (SI->getType()->isIntegerTy(1)) return 0;
+
+    // If it's a bitcast involving vectors, make sure it has the same number of
+    // elements on both sides.
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+      const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+      const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+      // Verify that either both or neither are vectors.
+      if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+      // If vectors, verify that they have the same number of elements.
+      if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+        return 0;
+    }
+    
+    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
+    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+
+    return SelectInst::Create(SI->getCondition(),
+                              SelectTrueVal, SelectFalseVal);
+  }
+  return 0;
+}
+
+
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+  PHINode *PN = cast<PHINode>(I.getOperand(0));
+  unsigned NumPHIValues = PN->getNumIncomingValues();
+  if (NumPHIValues == 0)
+    return 0;
+  
+  // We normally only transform phis with a single use.  However, if a PHI has
+  // multiple uses and they are all the same operation, we can fold *all* of the
+  // uses into the PHI.
+  if (!PN->hasOneUse()) {
+    // Walk the use list for the instruction, comparing them to I.
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (User != &I && !I.isIdenticalTo(User))
+        return 0;
+    }
+    // Otherwise, we can replace *all* users with the new PHI we form.
+  }
+  
+  // Check to see if all of the operands of the PHI are simple constants
+  // (constantint/constantfp/undef).  If there is one non-constant value,
+  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
+  // bail out.  We don't do arbitrary constant expressions here because moving
+  // their computation can be expensive without a cost model.
+  BasicBlock *NonConstBB = 0;
+  for (unsigned i = 0; i != NumPHIValues; ++i) {
+    Value *InVal = PN->getIncomingValue(i);
+    if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+      continue;
+
+    if (isa<PHINode>(InVal)) return 0;  // Itself a phi.
+    if (NonConstBB) return 0;  // More than one non-const value.
+    
+    NonConstBB = PN->getIncomingBlock(i);
+
+    // If the InVal is an invoke at the end of the pred block, then we can't
+    // insert a computation after it without breaking the edge.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+      if (II->getParent() == NonConstBB)
+        return 0;
+    
+    // If the incoming non-constant value is in I's block, we will remove one
+    // instruction, but insert another equivalent one, leading to infinite
+    // instcombine.
+    if (NonConstBB == I.getParent())
+      return 0;
+  }
+  
+  // If there is exactly one non-constant value, we can insert a copy of the
+  // operation in that block.  However, if this is a critical edge, we would be
+  // inserting the computation one some other paths (e.g. inside a loop).  Only
+  // do this if the pred block is unconditionally branching into the phi block.
+  if (NonConstBB != 0) {
+    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+    if (!BI || !BI->isUnconditional()) return 0;
+  }
+
+  // Okay, we can do the transformation: create the new PHI node.
+  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
+  InsertNewInstBefore(NewPN, *PN);
+  NewPN->takeName(PN);
+  
+  // If we are going to have to insert a new computation, do so right before the
+  // predecessors terminator.
+  if (NonConstBB)
+    Builder->SetInsertPoint(NonConstBB->getTerminator());
+  
+  // Next, add all of the operands to the PHI.
+  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+    // We only currently try to fold the condition of a select when it is a phi,
+    // not the true/false values.
+    Value *TrueV = SI->getTrueValue();
+    Value *FalseV = SI->getFalseValue();
+    BasicBlock *PhiTransBB = PN->getParent();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      BasicBlock *ThisBB = PN->getIncomingBlock(i);
+      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+      else
+        InV = Builder->CreateSelect(PN->getIncomingValue(i),
+                                    TrueVInPred, FalseVInPred, "phitmp");
+      NewPN->addIncoming(InV, ThisBB);
+    }
+  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+    Constant *C = cast<Constant>(I.getOperand(1));
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+      else if (isa<ICmpInst>(CI))
+        InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                  C, "phitmp");
+      else
+        InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                  C, "phitmp");
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  } else if (I.getNumOperands() == 2) {
+    Constant *C = cast<Constant>(I.getOperand(1));
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::get(I.getOpcode(), InC, C);
+      else
+        InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+                                   PN->getIncomingValue(i), C, "phitmp");
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  } else { 
+    CastInst *CI = cast<CastInst>(&I);
+    const Type *RetTy = CI->getType();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+      else 
+        InV = Builder->CreateCast(CI->getOpcode(),
+                                PN->getIncomingValue(i), I.getType(), "phitmp");
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  }
+  
+  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+       UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    if (User == &I) continue;
+    ReplaceInstUsesWith(*User, NewPN);
+    EraseInstFromFunction(*User);
+  }
+  return ReplaceInstUsesWith(I, NewPN);
+}
+
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset.  If so, fill them into NewIndices and return the
+/// resultant element type, otherwise return null.
+const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, 
+                                          SmallVectorImpl<Value*> &NewIndices) {
+  if (!TD) return 0;
+  if (!Ty->isSized()) return 0;
+  
+  // Start with the index over the outer type.  Note that the type size
+  // might be zero (even if the offset isn't zero) if the indexed type
+  // is something like [0 x {int, int}]
+  const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+  int64_t FirstIdx = 0;
+  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+    FirstIdx = Offset/TySize;
+    Offset -= FirstIdx*TySize;
+    
+    // Handle hosts where % returns negative instead of values [0..TySize).
+    if (Offset < 0) {
+      --FirstIdx;
+      Offset += TySize;
+      assert(Offset >= 0);
+    }
+    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+  }
+  
+  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+    
+  // Index into the types.  If we fail, set OrigBase to null.
+  while (Offset) {
+    // Indexing into tail padding between struct/array elements.
+    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+      return 0;
+    
+    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      const StructLayout *SL = TD->getStructLayout(STy);
+      assert(Offset < (int64_t)SL->getSizeInBytes() &&
+             "Offset must stay within the indexed type");
+      
+      unsigned Elt = SL->getElementContainingOffset(Offset);
+      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                            Elt));
+      
+      Offset -= SL->getElementOffset(Elt);
+      Ty = STy->getElementType(Elt);
+    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+      assert(EltSize && "Cannot index into a zero-sized array");
+      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      Offset %= EltSize;
+      Ty = AT->getElementType();
+    } else {
+      // Otherwise, we can't index into the middle of this atomic type, bail.
+      return 0;
+    }
+  }
+  
+  return Ty;
+}
+
+
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
+    return ReplaceInstUsesWith(GEP, V);
+
+  Value *PtrOp = GEP.getOperand(0);
+
+  // Eliminate unneeded casts for indices, and replace indices which displace
+  // by multiples of a zero size type with zero.
+  if (TD) {
+    bool MadeChange = false;
+    const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+         I != E; ++I, ++GTI) {
+      // Skip indices into struct types.
+      const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+      if (!SeqTy) continue;
+
+      // If the element type has zero size then any index over it is equivalent
+      // to an index of zero, so replace it with zero if it is not zero already.
+      if (SeqTy->getElementType()->isSized() &&
+          TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+        if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+          *I = Constant::getNullValue(IntPtrTy);
+          MadeChange = true;
+        }
+
+      if ((*I)->getType() != IntPtrTy) {
+        // If we are using a wider index than needed for this platform, shrink
+        // it to what we need.  If narrower, sign-extend it to what we need.
+        // This explicit cast can make subsequent optimizations more obvious.
+        *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+        MadeChange = true;
+      }
+    }
+    if (MadeChange) return &GEP;
+  }
+
+  // Combine Indices - If the source pointer to this getelementptr instruction
+  // is a getelementptr instruction, combine the indices of the two
+  // getelementptr instructions into a single instruction.
+  //
+  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+
+    // If this GEP has only 0 indices, it is the same pointer as
+    // Src. If Src is not a trivial GEP too, don't combine
+    // the indices.
+    if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
+        !Src->hasOneUse())
+      return 0;
+
+    // Note that if our source is a gep chain itself that we wait for that
+    // chain to be resolved before we perform this transformation.  This
+    // avoids us creating a TON of code in some cases.
+    //
+    if (GetElementPtrInst *SrcGEP =
+          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
+      if (SrcGEP->getNumOperands() == 2)
+        return 0;   // Wait until our source is folded to completion.
+
+    SmallVector<Value*, 8> Indices;
+
+    // Find out whether the last index in the source GEP is a sequential idx.
+    bool EndsWithSequential = false;
+    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+         I != E; ++I)
+      EndsWithSequential = !(*I)->isStructTy();
+
+    // Can we combine the two pointer arithmetics offsets?
+    if (EndsWithSequential) {
+      // Replace: gep (gep %P, long B), long A, ...
+      // With:    T = long A+B; gep %P, T, ...
+      //
+      Value *Sum;
+      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+      Value *GO1 = GEP.getOperand(1);
+      if (SO1 == Constant::getNullValue(SO1->getType())) {
+        Sum = GO1;
+      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+        Sum = SO1;
+      } else {
+        // If they aren't the same type, then the input hasn't been processed
+        // by the loop above yet (which canonicalizes sequential index types to
+        // intptr_t).  Just avoid transforming this until the input has been
+        // normalized.
+        if (SO1->getType() != GO1->getType())
+          return 0;
+        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+      }
+
+      // Update the GEP in place if possible.
+      if (Src->getNumOperands() == 2) {
+        GEP.setOperand(0, Src->getOperand(0));
+        GEP.setOperand(1, Sum);
+        return &GEP;
+      }
+      Indices.append(Src->op_begin()+1, Src->op_end()-1);
+      Indices.push_back(Sum);
+      Indices.append(GEP.op_begin()+2, GEP.op_end());
+    } else if (isa<Constant>(*GEP.idx_begin()) &&
+               cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+               Src->getNumOperands() != 1) {
+      // Otherwise we can do the fold if the first index of the GEP is a zero
+      Indices.append(Src->op_begin()+1, Src->op_end());
+      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+    }
+
+    if (!Indices.empty())
+      return (GEP.isInBounds() && Src->isInBounds()) ?
+        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+                                          Indices.end(), GEP.getName()) :
+        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
+                                  Indices.end(), GEP.getName());
+  }
+
+  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+  Value *StrippedPtr = PtrOp->stripPointerCasts();
+  const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+  if (StrippedPtr != PtrOp &&
+    StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
+    bool HasZeroPointerIndex = false;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+      HasZeroPointerIndex = C->isZero();
+
+    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+    // into     : GEP [10 x i8]* X, i32 0, ...
+    //
+    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+    //           into     : GEP i8* X, ...
+    //
+    // This occurs when the program declares an array extern like "int X[];"
+    if (HasZeroPointerIndex) {
+      const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+      if (const ArrayType *CATy =
+          dyn_cast<ArrayType>(CPTy->getElementType())) {
+        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+        if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+          // -> GEP i8* X, ...
+          SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+          GetElementPtrInst *Res =
+            GetElementPtrInst::Create(StrippedPtr, Idx.begin(),
+                                      Idx.end(), GEP.getName());
+          Res->setIsInBounds(GEP.isInBounds());
+          return Res;
+        }
+        
+        if (const ArrayType *XATy =
+              dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+          if (CATy->getElementType() == XATy->getElementType()) {
+            // -> GEP [10 x i8]* X, i32 0, ...
+            // At this point, we know that the cast source type is a pointer
+            // to an array of the same type as the destination pointer
+            // array.  Because the array type is never stepped over (there
+            // is a leading zero) we can fold the cast into this GEP.
+            GEP.setOperand(0, StrippedPtr);
+            return &GEP;
+          }
+        }
+      }
+    } else if (GEP.getNumOperands() == 2) {
+      // Transform things like:
+      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+      const Type *SrcElTy = StrippedPtrTy->getElementType();
+      const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+      if (TD && SrcElTy->isArrayTy() &&
+          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+          TD->getTypeAllocSize(ResElTy)) {
+        Value *Idx[2];
+        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+        Idx[1] = GEP.getOperand(1);
+        Value *NewGEP = GEP.isInBounds() ?
+          Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) :
+          Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+        // V and GEP are both pointer types --> BitCast
+        return new BitCastInst(NewGEP, GEP.getType());
+      }
+      
+      // Transform things like:
+      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+      //   (where tmp = 8*tmp2) into:
+      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+      
+      if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
+        uint64_t ArrayEltSize =
+            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+        
+        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
+        // allow either a mul, shift, or constant here.
+        Value *NewIdx = 0;
+        ConstantInt *Scale = 0;
+        if (ArrayEltSize == 1) {
+          NewIdx = GEP.getOperand(1);
+          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
+        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
+          NewIdx = ConstantInt::get(CI->getType(), 1);
+          Scale = CI;
+        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
+          if (Inst->getOpcode() == Instruction::Shl &&
+              isa<ConstantInt>(Inst->getOperand(1))) {
+            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
+            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
+            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+                                     1ULL << ShAmtVal);
+            NewIdx = Inst->getOperand(0);
+          } else if (Inst->getOpcode() == Instruction::Mul &&
+                     isa<ConstantInt>(Inst->getOperand(1))) {
+            Scale = cast<ConstantInt>(Inst->getOperand(1));
+            NewIdx = Inst->getOperand(0);
+          }
+        }
+        
+        // If the index will be to exactly the right offset with the scale taken
+        // out, perform the transformation. Note, we don't know whether Scale is
+        // signed or not. We'll use unsigned version of division/modulo
+        // operation after making sure Scale doesn't have the sign bit set.
+        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
+            Scale->getZExtValue() % ArrayEltSize == 0) {
+          Scale = ConstantInt::get(Scale->getType(),
+                                   Scale->getZExtValue() / ArrayEltSize);
+          if (Scale->getZExtValue() != 1) {
+            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
+                                                       false /*ZExt*/);
+            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
+          }
+
+          // Insert the new GEP instruction.
+          Value *Idx[2];
+          Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+          Idx[1] = NewIdx;
+          Value *NewGEP = GEP.isInBounds() ?
+            Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()):
+            Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+          // The NewGEP must be pointer typed, so must the old one -> BitCast
+          return new BitCastInst(NewGEP, GEP.getType());
+        }
+      }
+    }
+  }
+
+  /// See if we can simplify:
+  ///   X = bitcast A* to B*
+  ///   Y = gep X, <...constant indices...>
+  /// into a gep of the original struct.  This is important for SROA and alias
+  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+    if (TD &&
+        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
+        StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
+      // Determine how much the GEP moves the pointer.  We are guaranteed to get
+      // a constant back from EmitGEPOffset.
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
+      int64_t Offset = OffsetV->getSExtValue();
+
+      // If this GEP instruction doesn't move the pointer, just replace the GEP
+      // with a bitcast of the real input to the dest type.
+      if (Offset == 0) {
+        // If the bitcast is of an allocation, and the allocation will be
+        // converted to match the type of the cast, don't touch this.
+        if (isa<AllocaInst>(BCI->getOperand(0)) ||
+            isMalloc(BCI->getOperand(0))) {
+          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+          if (Instruction *I = visitBitCast(*BCI)) {
+            if (I != BCI) {
+              I->takeName(BCI);
+              BCI->getParent()->getInstList().insert(BCI, I);
+              ReplaceInstUsesWith(*BCI, I);
+            }
+            return &GEP;
+          }
+        }
+        return new BitCastInst(BCI->getOperand(0), GEP.getType());
+      }
+      
+      // Otherwise, if the offset is non-zero, we need to find out if there is a
+      // field at Offset in 'A's type.  If so, we can pull the cast through the
+      // GEP.
+      SmallVector<Value*, 8> NewIndices;
+      const Type *InTy =
+        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+        Value *NGEP = GEP.isInBounds() ?
+          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+                                     NewIndices.end()) :
+          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+                             NewIndices.end());
+        
+        if (NGEP->getType() == GEP.getType())
+          return ReplaceInstUsesWith(GEP, NGEP);
+        NGEP->takeName(&GEP);
+        return new BitCastInst(NGEP, GEP.getType());
+      }
+    }
+  }    
+    
+  return 0;
+}
+
+
+
+static bool IsOnlyNullComparedAndFreed(const Value &V) {
+  for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end();
+       UI != UE; ++UI) {
+    const User *U = *UI;
+    if (isFreeCall(U))
+      continue;
+    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U))
+      if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1)))
+        continue;
+    return false;
+  }
+  return true;
+}
+
+Instruction *InstCombiner::visitMalloc(Instruction &MI) {
+  // If we have a malloc call which is only used in any amount of comparisons
+  // to null and free calls, delete the calls and replace the comparisons with
+  // true or false as appropriate.
+  if (IsOnlyNullComparedAndFreed(MI)) {
+    for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end();
+         UI != UE;) {
+      // We can assume that every remaining use is a free call or an icmp eq/ne
+      // to null, so the cast is safe.
+      Instruction *I = cast<Instruction>(*UI);
+
+      // Early increment here, as we're about to get rid of the user.
+      ++UI;
+
+      if (isFreeCall(I)) {
+        EraseInstFromFunction(*cast<CallInst>(I));
+        continue;
+      }
+      // Again, the cast is safe.
+      ICmpInst *C = cast<ICmpInst>(I);
+      ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()),
+                                               C->isFalseWhenEqual()));
+      EraseInstFromFunction(*C);
+    }
+    return EraseInstFromFunction(MI);
+  }
+  return 0;
+}
+
+
+
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+  Value *Op = FI.getArgOperand(0);
+
+  // free undef -> unreachable.
+  if (isa<UndefValue>(Op)) {
+    // Insert a new store to null because we cannot modify the CFG here.
+    Builder->CreateStore(ConstantInt::getTrue(FI.getContext()),
+                         UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+    return EraseInstFromFunction(FI);
+  }
+  
+  // If we have 'free null' delete the instruction.  This can happen in stl code
+  // when lots of inlining happens.
+  if (isa<ConstantPointerNull>(Op))
+    return EraseInstFromFunction(FI);
+
+  return 0;
+}
+
+
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+  // Change br (not X), label True, label False to: br X, label False, True
+  Value *X = 0;
+  BasicBlock *TrueDest;
+  BasicBlock *FalseDest;
+  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+      !isa<Constant>(X)) {
+    // Swap Destinations and condition...
+    BI.setCondition(X);
+    BI.setSuccessor(0, FalseDest);
+    BI.setSuccessor(1, TrueDest);
+    return &BI;
+  }
+
+  // Cannonicalize fcmp_one -> fcmp_oeq
+  FCmpInst::Predicate FPred; Value *Y;
+  if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 
+                             TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+        FPred == FCmpInst::FCMP_OGE) {
+      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+      
+      // Swap Destinations and condition.
+      BI.setSuccessor(0, FalseDest);
+      BI.setSuccessor(1, TrueDest);
+      Worklist.Add(Cond);
+      return &BI;
+    }
+
+  // Cannonicalize icmp_ne -> icmp_eq
+  ICmpInst::Predicate IPred;
+  if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+                      TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
+        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+        IPred == ICmpInst::ICMP_SGE) {
+      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+      // Swap Destinations and condition.
+      BI.setSuccessor(0, FalseDest);
+      BI.setSuccessor(1, TrueDest);
+      Worklist.Add(Cond);
+      return &BI;
+    }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+  Value *Cond = SI.getCondition();
+  if (Instruction *I = dyn_cast<Instruction>(Cond)) {
+    if (I->getOpcode() == Instruction::Add)
+      if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+        // change 'switch (X+4) case 1:' into 'switch (X) case -3'
+        for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
+          SI.setOperand(i,
+                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
+                                                AddRHS));
+        SI.setOperand(0, I->getOperand(0));
+        Worklist.Add(I);
+        return &SI;
+      }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+  Value *Agg = EV.getAggregateOperand();
+
+  if (!EV.hasIndices())
+    return ReplaceInstUsesWith(EV, Agg);
+
+  if (Constant *C = dyn_cast<Constant>(Agg)) {
+    if (isa<UndefValue>(C))
+      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+      
+    if (isa<ConstantAggregateZero>(C))
+      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+      // Extract the element indexed by the first index out of the constant
+      Value *V = C->getOperand(*EV.idx_begin());
+      if (EV.getNumIndices() > 1)
+        // Extract the remaining indices out of the constant indexed by the
+        // first index
+        return ExtractValueInst::Create(V, EV.getIndices().slice(1));
+      else
+        return ReplaceInstUsesWith(EV, V);
+    }
+    return 0; // Can't handle other constants
+  } 
+  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+    // We're extracting from an insertvalue instruction, compare the indices
+    const unsigned *exti, *exte, *insi, *inse;
+    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+         exte = EV.idx_end(), inse = IV->idx_end();
+         exti != exte && insi != inse;
+         ++exti, ++insi) {
+      if (*insi != *exti)
+        // The insert and extract both reference distinctly different elements.
+        // This means the extract is not influenced by the insert, and we can
+        // replace the aggregate operand of the extract with the aggregate
+        // operand of the insert. i.e., replace
+        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+        // %E = extractvalue { i32, { i32 } } %I, 0
+        // with
+        // %E = extractvalue { i32, { i32 } } %A, 0
+        return ExtractValueInst::Create(IV->getAggregateOperand(),
+                                        EV.getIndices());
+    }
+    if (exti == exte && insi == inse)
+      // Both iterators are at the end: Index lists are identical. Replace
+      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+      // %C = extractvalue { i32, { i32 } } %B, 1, 0
+      // with "i32 42"
+      return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+    if (exti == exte) {
+      // The extract list is a prefix of the insert list. i.e. replace
+      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+      // %E = extractvalue { i32, { i32 } } %I, 1
+      // with
+      // %X = extractvalue { i32, { i32 } } %A, 1
+      // %E = insertvalue { i32 } %X, i32 42, 0
+      // by switching the order of the insert and extract (though the
+      // insertvalue should be left in, since it may have other uses).
+      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+                                                 EV.getIndices());
+      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+                                     ArrayRef<unsigned>(insi, inse));
+    }
+    if (insi == inse)
+      // The insert list is a prefix of the extract list
+      // We can simply remove the common indices from the extract and make it
+      // operate on the inserted value instead of the insertvalue result.
+      // i.e., replace
+      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+      // %E = extractvalue { i32, { i32 } } %I, 1, 0
+      // with
+      // %E extractvalue { i32 } { i32 42 }, 0
+      return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
+                                      ArrayRef<unsigned>(exti, exte));
+  }
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+    // We're extracting from an intrinsic, see if we're the only user, which
+    // allows us to simplify multiple result intrinsics to simpler things that
+    // just get one value.
+    if (II->hasOneUse()) {
+      // Check if we're grabbing the overflow bit or the result of a 'with
+      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
+      // and replace it with a traditional binary instruction.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::sadd_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+          ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateAdd(LHS, RHS);
+        }
+          
+        // If the normal result of the add is dead, and the RHS is a constant,
+        // we can transform this into a range comparison.
+        // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
+        if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+            return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+                                ConstantExpr::getNot(CI));
+        break;
+      case Intrinsic::usub_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+          ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateSub(LHS, RHS);
+        }
+        break;
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::smul_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+          ReplaceInstUsesWith(*II, UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateMul(LHS, RHS);
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+  if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+    // If the (non-volatile) load only has one use, we can rewrite this to a
+    // load from a GEP. This reduces the size of the load.
+    // FIXME: If a load is used only by extractvalue instructions then this
+    //        could be done regardless of having multiple uses.
+    if (!L->isVolatile() && L->hasOneUse()) {
+      // extractvalue has integer indices, getelementptr has Value*s. Convert.
+      SmallVector<Value*, 4> Indices;
+      // Prefix an i32 0 since we need the first element.
+      Indices.push_back(Builder->getInt32(0));
+      for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+            I != E; ++I)
+        Indices.push_back(Builder->getInt32(*I));
+
+      // We need to insert these at the location of the old load, not at that of
+      // the extractvalue.
+      Builder->SetInsertPoint(L->getParent(), L);
+      Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
+                                              Indices.begin(), Indices.end());
+      // Returning the load directly will cause the main loop to insert it in
+      // the wrong spot, so use ReplaceInstUsesWith().
+      return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+    }
+  // We could simplify extracts from other values. Note that nested extracts may
+  // already be simplified implicitly by the above: extract (extract (insert) )
+  // will be translated into extract ( insert ( extract ) ) first and then just
+  // the value inserted, if appropriate. Similarly for extracts from single-use
+  // loads: extract (extract (load)) will be translated to extract (load (gep))
+  // and if again single-use then via load (gep (gep)) to load (gep).
+  // However, double extracts from e.g. function arguments or return values
+  // aren't handled yet.
+  return 0;
+}
+
+
+
+
+/// TryToSinkInstruction - Try to move the specified instruction from its
+/// current block into the beginning of DestBlock, which can only happen if it's
+/// safe to move the instruction past all of the instructions between it and the
+/// end of its block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+  assert(I->hasOneUse() && "Invariants didn't hold!");
+
+  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+  if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+    return false;
+
+  // Do not sink alloca instructions out of the entry block.
+  if (isa<AllocaInst>(I) && I->getParent() ==
+        &DestBlock->getParent()->getEntryBlock())
+    return false;
+
+  // We can only sink load instructions if there is nothing between the load and
+  // the end of block that could change the value.
+  if (I->mayReadFromMemory()) {
+    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+         Scan != E; ++Scan)
+      if (Scan->mayWriteToMemory())
+        return false;
+  }
+
+  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
+
+  I->moveBefore(InsertPos);
+  ++NumSunkInst;
+  return true;
+}
+
+
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful.  In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant).  Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB, 
+                                       SmallPtrSet<BasicBlock*, 64> &Visited,
+                                       InstCombiner &IC,
+                                       const TargetData *TD) {
+  bool MadeIRChange = false;
+  SmallVector<BasicBlock*, 256> Worklist;
+  Worklist.push_back(BB);
+
+  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+  DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+
+  do {
+    BB = Worklist.pop_back_val();
+    
+    // We have now visited this block!  If we've already been here, ignore it.
+    if (!Visited.insert(BB)) continue;
+
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+      Instruction *Inst = BBI++;
+      
+      // DCE instruction if trivially dead.
+      if (isInstructionTriviallyDead(Inst)) {
+        ++NumDeadInst;
+        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+        Inst->eraseFromParent();
+        continue;
+      }
+      
+      // ConstantProp instruction if trivially constant.
+      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+                       << *Inst << '\n');
+          Inst->replaceAllUsesWith(C);
+          ++NumConstProp;
+          Inst->eraseFromParent();
+          continue;
+        }
+      
+      if (TD) {
+        // See if we can constant fold its operands.
+        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+             i != e; ++i) {
+          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+          if (CE == 0) continue;
+
+          Constant*& FoldRes = FoldedConstants[CE];
+          if (!FoldRes)
+            FoldRes = ConstantFoldConstantExpression(CE, TD);
+          if (!FoldRes)
+            FoldRes = CE;
+
+          if (FoldRes != CE) {
+            *i = FoldRes;
+            MadeIRChange = true;
+          }
+        }
+      }
+
+      InstrsForInstCombineWorklist.push_back(Inst);
+    }
+
+    // Recursively visit successors.  If this is a branch or switch on a
+    // constant, only visit the reachable successor.
+    TerminatorInst *TI = BB->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+        Worklist.push_back(ReachableBB);
+        continue;
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+        // See if this is an explicit destination.
+        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+          if (SI->getCaseValue(i) == Cond) {
+            BasicBlock *ReachableBB = SI->getSuccessor(i);
+            Worklist.push_back(ReachableBB);
+            continue;
+          }
+        
+        // Otherwise it is the default destination.
+        Worklist.push_back(SI->getSuccessor(0));
+        continue;
+      }
+    }
+    
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      Worklist.push_back(TI->getSuccessor(i));
+  } while (!Worklist.empty());
+  
+  // Once we've found all of the instructions to add to instcombine's worklist,
+  // add them in reverse order.  This way instcombine will visit from the top
+  // of the function down.  This jives well with the way that it adds all uses
+  // of instructions to the worklist after doing a transformation, thus avoiding
+  // some N^2 behavior in pathological cases.
+  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+                              InstrsForInstCombineWorklist.size());
+  
+  return MadeIRChange;
+}
+
+bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
+  MadeIRChange = false;
+  
+  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+        << F.getNameStr() << "\n");
+
+  {
+    // Do a depth-first traversal of the function, populate the worklist with
+    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
+    // track of which blocks we visit.
+    SmallPtrSet<BasicBlock*, 64> Visited;
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+
+    // Do a quick scan over the function.  If we find any blocks that are
+    // unreachable, remove any instructions inside of them.  This prevents
+    // the instcombine code from having to deal with some bad special cases.
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      if (!Visited.count(BB)) {
+        Instruction *Term = BB->getTerminator();
+        while (Term != BB->begin()) {   // Remove instrs bottom-up
+          BasicBlock::iterator I = Term; --I;
+
+          DEBUG(errs() << "IC: DCE: " << *I << '\n');
+          // A debug intrinsic shouldn't force another iteration if we weren't
+          // going to do one without it.
+          if (!isa<DbgInfoIntrinsic>(I)) {
+            ++NumDeadInst;
+            MadeIRChange = true;
+          }
+
+          // If I is not void type then replaceAllUsesWith undef.
+          // This allows ValueHandlers and custom metadata to adjust itself.
+          if (!I->getType()->isVoidTy())
+            I->replaceAllUsesWith(UndefValue::get(I->getType()));
+          I->eraseFromParent();
+        }
+      }
+  }
+
+  while (!Worklist.isEmpty()) {
+    Instruction *I = Worklist.RemoveOne();
+    if (I == 0) continue;  // skip null values.
+
+    // Check to see if we can DCE the instruction.
+    if (isInstructionTriviallyDead(I)) {
+      DEBUG(errs() << "IC: DCE: " << *I << '\n');
+      EraseInstFromFunction(*I);
+      ++NumDeadInst;
+      MadeIRChange = true;
+      continue;
+    }
+
+    // Instruction isn't dead, see if we can constant propagate it.
+    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+      if (Constant *C = ConstantFoldInstruction(I, TD)) {
+        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+
+        // Add operands to the worklist.
+        ReplaceInstUsesWith(*I, C);
+        ++NumConstProp;
+        EraseInstFromFunction(*I);
+        MadeIRChange = true;
+        continue;
+      }
+
+    // See if we can trivially sink this instruction to a successor basic block.
+    if (I->hasOneUse()) {
+      BasicBlock *BB = I->getParent();
+      Instruction *UserInst = cast<Instruction>(I->use_back());
+      BasicBlock *UserParent;
+      
+      // Get the block the use occurs in.
+      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+      else
+        UserParent = UserInst->getParent();
+      
+      if (UserParent != BB) {
+        bool UserIsSuccessor = false;
+        // See if the user is one of our successors.
+        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+          if (*SI == UserParent) {
+            UserIsSuccessor = true;
+            break;
+          }
+
+        // If the user is one of our immediate successors, and if that successor
+        // only has us as a predecessors (we'd have to split the critical edge
+        // otherwise), we can keep going.
+        if (UserIsSuccessor && UserParent->getSinglePredecessor())
+          // Okay, the CFG is simple enough, try to sink this instruction.
+          MadeIRChange |= TryToSinkInstruction(I, UserParent);
+      }
+    }
+
+    // Now that we have an instruction, try combining it to simplify it.
+    Builder->SetInsertPoint(I->getParent(), I);
+    Builder->SetCurrentDebugLocation(I->getDebugLoc());
+    
+#ifndef NDEBUG
+    std::string OrigI;
+#endif
+    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
+    if (Instruction *Result = visit(*I)) {
+      ++NumCombined;
+      // Should we replace the old instruction with a new one?
+      if (Result != I) {
+        DEBUG(errs() << "IC: Old = " << *I << '\n'
+                     << "    New = " << *Result << '\n');
+
+        if (!I->getDebugLoc().isUnknown())
+          Result->setDebugLoc(I->getDebugLoc());
+        // Everything uses the new instruction now.
+        I->replaceAllUsesWith(Result);
+
+        // Push the new instruction and any users onto the worklist.
+        Worklist.Add(Result);
+        Worklist.AddUsersToWorkList(*Result);
+
+        // Move the name to the new instruction first.
+        Result->takeName(I);
+
+        // Insert the new instruction into the basic block...
+        BasicBlock *InstParent = I->getParent();
+        BasicBlock::iterator InsertPos = I;
+
+        if (!isa<PHINode>(Result))        // If combining a PHI, don't insert
+          while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
+            ++InsertPos;
+
+        InstParent->getInstList().insert(InsertPos, Result);
+
+        EraseInstFromFunction(*I);
+      } else {
+#ifndef NDEBUG
+        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+                     << "    New = " << *I << '\n');
+#endif
+
+        // If the instruction was modified, it's possible that it is now dead.
+        // if so, remove it.
+        if (isInstructionTriviallyDead(I)) {
+          EraseInstFromFunction(*I);
+        } else {
+          Worklist.Add(I);
+          Worklist.AddUsersToWorkList(*I);
+        }
+      }
+      MadeIRChange = true;
+    }
+  }
+
+  Worklist.Zap();
+  return MadeIRChange;
+}
+
+
+bool InstCombiner::runOnFunction(Function &F) {
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  IRBuilder<true, TargetFolder, InstCombineIRInserter> 
+    TheBuilder(F.getContext(), TargetFolder(TD),
+               InstCombineIRInserter(Worklist));
+  Builder = &TheBuilder;
+  
+  bool EverMadeChange = false;
+
+  // Lower dbg.declare intrinsics otherwise their value may be clobbered
+  // by instcombiner.
+  EverMadeChange = LowerDbgDeclare(F);
+
+  // Iterate while there is work to do.
+  unsigned Iteration = 0;
+  while (DoOneIteration(F, Iteration++))
+    EverMadeChange = true;
+  
+  Builder = 0;
+  return EverMadeChange;
+}
+
+FunctionPass *llvm::createInstructionCombiningPass() {
+  return new InstCombiner();
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
new file mode 100644
index 000000000000..1d31fcc4df3f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -0,0 +1,117 @@
+//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+// Note that this implementation is very naive.  We insert a counter for *every*
+// edge in the program, instead of using control flow information to prune the
+// number of counters inserted.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-edge-profiling"
+
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+  class EdgeProfiler : public ModulePass {
+    bool runOnModule(Module &M);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    EdgeProfiler() : ModulePass(ID) {
+      initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual const char *getPassName() const {
+      return "Edge Profiler";
+    }
+  };
+}
+
+char EdgeProfiler::ID = 0;
+INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
+                "Insert instrumentation for edge profiling", false, false)
+
+ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
+
+bool EdgeProfiler::runOnModule(Module &M) {
+  Function *Main = M.getFunction("main");
+  if (Main == 0) {
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
+    return false;  // No main, no instrumentation!
+  }
+
+  std::set<BasicBlock*> BlocksToInstrument;
+  unsigned NumEdges = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Keep track of which blocks need to be instrumented.  We don't want to
+      // instrument blocks that are added as the result of breaking critical
+      // edges!
+      BlocksToInstrument.insert(BB);
+      NumEdges += BB->getTerminator()->getNumSuccessors();
+    }
+  }
+
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
+  GlobalVariable *Counters =
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "EdgeProfCounters");
+  NumEdgesInserted = NumEdges;
+
+  // Instrument all of the edges...
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Create counter for (0,entry) edge.
+    IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (BlocksToInstrument.count(BB)) {  // Don't instrument inserted blocks
+        // Okay, we have to add a counter of each outgoing edge.  If the
+        // outgoing edge is not critical don't split it, just insert the counter
+        // in the source or destination of the edge.
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          // If the edge is critical, split it.
+          SplitCriticalEdge(TI, s, this);
+
+          // Okay, we are guaranteed that the edge is no longer critical.  If we
+          // only have a single successor, insert the counter in this block,
+          // otherwise insert it in the successor block.
+          if (TI->getNumSuccessors() == 1) {
+            // Insert counter at the start of the block
+            IncrementCounterInBlock(BB, i++, Counters, false);
+          } else {
+            // Insert counter at the start of the block
+            IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
+          }
+        }
+      }
+  }
+
+  // Add the initialization call to main.
+  InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
new file mode 100644
index 000000000000..3f2c4123882d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -0,0 +1,672 @@
+//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements GCOV-style profiling. When this pass is run it emits
+// "gcno" files next to the existing source, and instruments the code that runs
+// to records the edges between blocks that run and emit a complementary "gcda"
+// file on exit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "insert-gcov-profiling"
+
+#include "ProfilingUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include <string>
+#include <utility>
+using namespace llvm;
+
+namespace {
+  class GCOVProfiler : public ModulePass {
+  public:
+    static char ID;
+    GCOVProfiler()
+        : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) {
+      initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+    }
+    GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false)
+        : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
+          Use402Format(use402Format) {
+      assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+      initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "GCOV Profiler";
+    }
+
+  private:
+    bool runOnModule(Module &M);
+
+    // Create the GCNO files for the Module based on DebugInfo.
+    void emitGCNO(DebugInfoFinder &DIF);
+
+    // Modify the program to track transitions along edges and call into the
+    // profiling runtime to emit .gcda files when run.
+    bool emitProfileArcs(DebugInfoFinder &DIF);
+
+    // Get pointers to the functions in the runtime library.
+    Constant *getStartFileFunc();
+    Constant *getIncrementIndirectCounterFunc();
+    Constant *getEmitFunctionFunc();
+    Constant *getEmitArcsFunc();
+    Constant *getEndFileFunc();
+
+    // Create or retrieve an i32 state value that is used to represent the
+    // pred block number for certain non-trivial edges.
+    GlobalVariable *getEdgeStateValue();
+
+    // Produce a table of pointers to counters, by predecessor and successor
+    // block number.
+    GlobalVariable *buildEdgeLookupTable(Function *F,
+                                         GlobalVariable *Counter,
+                                         const UniqueVector<BasicBlock *> &Preds,
+                                         const UniqueVector<BasicBlock *> &Succs);
+
+    // Add the function to write out all our counters to the global destructor
+    // list.
+    void insertCounterWriteout(DebugInfoFinder &,
+                               SmallVector<std::pair<GlobalVariable *,
+                                                     MDNode *>, 8> &);
+
+    std::string mangleName(DICompileUnit CU, std::string NewStem);
+
+    bool EmitNotes;
+    bool EmitData;
+    bool Use402Format;
+
+    Module *M;
+    LLVMContext *Ctx;
+  };
+}
+
+char GCOVProfiler::ID = 0;
+INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
+                "Insert instrumentation for GCOV profiling", false, false)
+
+ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
+                                         bool Use402Format) {
+  return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
+}
+
+static DISubprogram findSubprogram(DIScope Scope) {
+  while (!Scope.isSubprogram()) {
+    assert(Scope.isLexicalBlock() &&
+           "Debug location not lexical block or subprogram");
+    Scope = DILexicalBlock(Scope).getContext();
+  }
+  return DISubprogram(Scope);
+}
+
+namespace {
+  class GCOVRecord {
+   protected:
+    static const char *LinesTag;
+    static const char *FunctionTag;
+    static const char *BlockTag;
+    static const char *EdgeTag;
+
+    GCOVRecord() {}
+
+    void writeBytes(const char *Bytes, int Size) {
+      os->write(Bytes, Size);
+    }
+
+    void write(uint32_t i) {
+      writeBytes(reinterpret_cast<char*>(&i), 4);
+    }
+
+    // Returns the length measured in 4-byte blocks that will be used to
+    // represent this string in a GCOV file
+    unsigned lengthOfGCOVString(StringRef s) {
+      // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
+      // padding out to the next 4-byte word. The length is measured in 4-byte
+      // words including padding, not bytes of actual string.
+      return (s.size() / 4) + 1;
+    }
+
+    void writeGCOVString(StringRef s) {
+      uint32_t Len = lengthOfGCOVString(s);
+      write(Len);
+      writeBytes(s.data(), s.size());
+
+      // Write 1 to 4 bytes of NUL padding.
+      assert((unsigned)(4 - (s.size() % 4)) > 0);
+      assert((unsigned)(4 - (s.size() % 4)) <= 4);
+      writeBytes("\0\0\0\0", 4 - (s.size() % 4));
+    }
+
+    raw_ostream *os;
+  };
+  const char *GCOVRecord::LinesTag = "\0\0\x45\x01";
+  const char *GCOVRecord::FunctionTag = "\0\0\0\1";
+  const char *GCOVRecord::BlockTag = "\0\0\x41\x01";
+  const char *GCOVRecord::EdgeTag = "\0\0\x43\x01";
+
+  class GCOVFunction;
+  class GCOVBlock;
+
+  // Constructed only by requesting it from a GCOVBlock, this object stores a
+  // list of line numbers and a single filename, representing lines that belong
+  // to the block.
+  class GCOVLines : public GCOVRecord {
+   public:
+    void addLine(uint32_t Line) {
+      Lines.push_back(Line);
+    }
+
+    uint32_t length() {
+      return lengthOfGCOVString(Filename) + 2 + Lines.size();
+    }
+
+   private:
+    friend class GCOVBlock;
+
+    GCOVLines(std::string Filename, raw_ostream *os)
+        : Filename(Filename) {
+      this->os = os;
+    }
+
+    std::string Filename;
+    SmallVector<uint32_t, 32> Lines;
+  };
+
+  // Represent a basic block in GCOV. Each block has a unique number in the
+  // function, number of lines belonging to each block, and a set of edges to
+  // other blocks.
+  class GCOVBlock : public GCOVRecord {
+   public:
+    GCOVLines &getFile(std::string Filename) {
+      GCOVLines *&Lines = LinesByFile[Filename];
+      if (!Lines) {
+        Lines = new GCOVLines(Filename, os);
+      }
+      return *Lines;
+    }
+
+    void addEdge(GCOVBlock &Successor) {
+      OutEdges.push_back(&Successor);
+    }
+
+    void writeOut() {
+      uint32_t Len = 3;
+      for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+               E = LinesByFile.end(); I != E; ++I) {
+        Len += I->second->length();
+      }
+
+      writeBytes(LinesTag, 4);
+      write(Len);
+      write(Number);
+      for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+               E = LinesByFile.end(); I != E; ++I) {
+        write(0);
+        writeGCOVString(I->second->Filename);
+        for (int i = 0, e = I->second->Lines.size(); i != e; ++i) {
+          write(I->second->Lines[i]);
+        }
+      }
+      write(0);
+      write(0);
+    }
+
+    ~GCOVBlock() {
+      DeleteContainerSeconds(LinesByFile);
+    }
+
+   private:
+    friend class GCOVFunction;
+
+    GCOVBlock(uint32_t Number, raw_ostream *os)
+        : Number(Number) {
+      this->os = os;
+    }
+
+    uint32_t Number;
+    StringMap<GCOVLines *> LinesByFile;
+    SmallVector<GCOVBlock *, 4> OutEdges;
+  };
+
+  // A function has a unique identifier, a checksum (we leave as zero) and a
+  // set of blocks and a map of edges between blocks. This is the only GCOV
+  // object users can construct, the blocks and lines will be rooted here.
+  class GCOVFunction : public GCOVRecord {
+   public:
+    GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) {
+      this->os = os;
+
+      Function *F = SP.getFunction();
+      uint32_t i = 0;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        Blocks[BB] = new GCOVBlock(i++, os);
+      }
+      ReturnBlock = new GCOVBlock(i++, os);
+
+      writeBytes(FunctionTag, 4);
+      uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+          1 + lengthOfGCOVString(SP.getFilename()) + 1;
+      if (!Use402Format)
+        ++BlockLen; // For second checksum.
+      write(BlockLen);
+      uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
+      write(Ident);
+      write(0);  // checksum #1
+      if (!Use402Format)
+        write(0);  // checksum #2
+      writeGCOVString(SP.getName());
+      writeGCOVString(SP.getFilename());
+      write(SP.getLineNumber());
+    }
+
+    ~GCOVFunction() {
+      DeleteContainerSeconds(Blocks);
+      delete ReturnBlock;
+    }
+
+    GCOVBlock &getBlock(BasicBlock *BB) {
+      return *Blocks[BB];
+    }
+
+    GCOVBlock &getReturnBlock() {
+      return *ReturnBlock;
+    }
+
+    void writeOut() {
+      // Emit count of blocks.
+      writeBytes(BlockTag, 4);
+      write(Blocks.size() + 1);
+      for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
+        write(0);  // No flags on our blocks.
+      }
+
+      // Emit edges between blocks.
+      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+               E = Blocks.end(); I != E; ++I) {
+        GCOVBlock &Block = *I->second;
+        if (Block.OutEdges.empty()) continue;
+
+        writeBytes(EdgeTag, 4);
+        write(Block.OutEdges.size() * 2 + 1);
+        write(Block.Number);
+        for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+          write(Block.OutEdges[i]->Number);
+          write(0);  // no flags
+        }
+      }
+
+      // Emit lines for each block.
+      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+               E = Blocks.end(); I != E; ++I) {
+        I->second->writeOut();
+      }
+    }
+
+   private:
+    DenseMap<BasicBlock *, GCOVBlock *> Blocks;
+    GCOVBlock *ReturnBlock;
+  };
+}
+
+std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) {
+  if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
+    for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
+      MDNode *N = GCov->getOperand(i);
+      if (N->getNumOperands() != 2) continue;
+      MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
+      MDNode *CompileUnit = dyn_cast<MDNode>(N->getOperand(1));
+      if (!GCovFile || !CompileUnit) continue;
+      if (CompileUnit == CU) {
+        SmallString<128> Filename = GCovFile->getString();
+        sys::path::replace_extension(Filename, NewStem);
+        return Filename.str();
+      }
+    }
+  }
+
+  SmallString<128> Filename = CU.getFilename();
+  sys::path::replace_extension(Filename, NewStem);
+  return sys::path::filename(Filename.str());
+}
+
+bool GCOVProfiler::runOnModule(Module &M) {
+  this->M = &M;
+  Ctx = &M.getContext();
+
+  DebugInfoFinder DIF;
+  DIF.processModule(M);
+
+  if (EmitNotes) emitGCNO(DIF);
+  if (EmitData) return emitProfileArcs(DIF);
+  return false;
+}
+
+void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
+  DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
+  for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(),
+           E = DIF.compile_unit_end(); I != E; ++I) {
+    // Each compile unit gets its own .gcno file. This means that whether we run
+    // this pass over the original .o's as they're produced, or run it after
+    // LTO, we'll generate the same .gcno files.
+
+    DICompileUnit CU(*I);
+    raw_fd_ostream *&out = GcnoFiles[CU];
+    std::string ErrorInfo;
+    out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
+                             raw_fd_ostream::F_Binary);
+    if (!Use402Format)
+      out->write("oncg*404MVLL", 12);
+    else
+      out->write("oncg*402MVLL", 12);
+  }
+
+  for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+           SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+    DISubprogram SP(*SPI);
+    raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()];
+
+    Function *F = SP.getFunction();
+    if (!F) continue;
+    GCOVFunction Func(SP, os, Use402Format);
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      GCOVBlock &Block = Func.getBlock(BB);
+      TerminatorInst *TI = BB->getTerminator();
+      if (int successors = TI->getNumSuccessors()) {
+        for (int i = 0; i != successors; ++i) {
+          Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+        }
+      } else if (isa<ReturnInst>(TI)) {
+        Block.addEdge(Func.getReturnBlock());
+      }
+
+      uint32_t Line = 0;
+      for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+        const DebugLoc &Loc = I->getDebugLoc();
+        if (Loc.isUnknown()) continue;
+        if (Line == Loc.getLine()) continue;
+        Line = Loc.getLine();
+        if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue;
+
+        GCOVLines &Lines = Block.getFile(SP.getFilename());
+        Lines.addLine(Loc.getLine());
+      }
+    }
+    Func.writeOut();
+  }
+
+  for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
+           I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
+    raw_fd_ostream *&out = I->second;
+    out->write("\0\0\0\0\0\0\0\0", 8);  // EOF
+    out->close();
+    delete out;
+  }
+}
+
+bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) {
+  if (DIF.subprogram_begin() == DIF.subprogram_end())
+    return false;
+
+  SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
+  for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+           SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+    DISubprogram SP(*SPI);
+    Function *F = SP.getFunction();
+    if (!F) continue;
+
+    unsigned Edges = 0;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+      if (isa<ReturnInst>(TI))
+        ++Edges;
+      else
+        Edges += TI->getNumSuccessors();
+    }
+
+    const ArrayType *CounterTy =
+        ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
+    GlobalVariable *Counters =
+        new GlobalVariable(*M, CounterTy, false,
+                           GlobalValue::InternalLinkage,
+                           Constant::getNullValue(CounterTy),
+                           "__llvm_gcov_ctr", 0, false, 0);
+    CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+
+    UniqueVector<BasicBlock *> ComplexEdgePreds;
+    UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+
+    unsigned Edge = 0;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+      int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+      if (Successors) {
+        IRBuilder<> Builder(TI);
+
+        if (Successors == 1) {
+          Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+                                                              Edge);
+          Value *Count = Builder.CreateLoad(Counter);
+          Count = Builder.CreateAdd(Count,
+                                    ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+          Builder.CreateStore(Count, Counter);
+        } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+          Value *Sel = Builder.CreateSelect(
+              BI->getCondition(),
+              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
+              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+          SmallVector<Value *, 2> Idx;
+          Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+          Idx.push_back(Sel);
+          Value *Counter = Builder.CreateInBoundsGEP(Counters,
+                                                     Idx.begin(), Idx.end());
+          Value *Count = Builder.CreateLoad(Counter);
+          Count = Builder.CreateAdd(Count,
+                                    ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+          Builder.CreateStore(Count, Counter);
+        } else {
+          ComplexEdgePreds.insert(BB);
+          for (int i = 0; i != Successors; ++i)
+            ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+        }
+        Edge += Successors;
+      }
+    }
+
+    if (!ComplexEdgePreds.empty()) {
+      GlobalVariable *EdgeTable =
+          buildEdgeLookupTable(F, Counters,
+                               ComplexEdgePreds, ComplexEdgeSuccs);
+      GlobalVariable *EdgeState = getEdgeStateValue();
+
+      const Type *Int32Ty = Type::getInt32Ty(*Ctx);
+      for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+        IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+        Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+      }
+      for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+        // call runtime to perform increment
+        IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI());
+        Value *CounterPtrArray =
+            Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
+                                               i * ComplexEdgePreds.size());
+        Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+                            EdgeState, CounterPtrArray);
+        // clear the predecessor number
+        Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+      }
+    }
+  }
+
+  insertCounterWriteout(DIF, CountersBySP);
+
+  return true;
+}
+
+// All edges with successors that aren't branches are "complex", because it
+// requires complex logic to pick which counter to update.
+GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
+    Function *F,
+    GlobalVariable *Counters,
+    const UniqueVector<BasicBlock *> &Preds,
+    const UniqueVector<BasicBlock *> &Succs) {
+  // TODO: support invoke, threads. We rely on the fact that nothing can modify
+  // the whole-Module pred edge# between the time we set it and the time we next
+  // read it. Threads and invoke make this untrue.
+
+  // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+  const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+  const ArrayType *EdgeTableTy = ArrayType::get(
+      Int64PtrTy, Succs.size() * Preds.size());
+
+  Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()];
+  Constant *NullValue = Constant::getNullValue(Int64PtrTy);
+  for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i)
+    EdgeTable[i] = NullValue;
+
+  unsigned Edge = 0;
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    TerminatorInst *TI = BB->getTerminator();
+    int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+    if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
+      for (int i = 0; i != Successors; ++i) {
+        BasicBlock *Succ = TI->getSuccessor(i);
+        IRBuilder<> builder(Succ);
+        Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+                                                            Edge + i);
+        EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
+                  (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+      }
+    }
+    Edge += Successors;
+  }
+
+  ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size());
+  GlobalVariable *EdgeTableGV =
+      new GlobalVariable(
+          *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
+          ConstantArray::get(EdgeTableTy, V),
+          "__llvm_gcda_edge_table");
+  EdgeTableGV->setUnnamedAddr(true);
+  return EdgeTableGV;
+}
+
+Constant *GCOVProfiler::getStartFileFunc() {
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Type::getInt8PtrTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
+}
+
+Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
+  Type *Args[] = {
+    Type::getInt32PtrTy(*Ctx),                  // uint32_t *predecessor
+    Type::getInt64PtrTy(*Ctx)->getPointerTo(),  // uint64_t **state_table_row
+  };
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Args, false);
+  return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy);
+}
+
+Constant *GCOVProfiler::getEmitFunctionFunc() {
+  Type *Args[2] = {
+    Type::getInt32Ty(*Ctx),    // uint32_t ident
+    Type::getInt8PtrTy(*Ctx),  // const char *function_name
+  };
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Args, false);
+  return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+}
+
+Constant *GCOVProfiler::getEmitArcsFunc() {
+  Type *Args[] = {
+    Type::getInt32Ty(*Ctx),     // uint32_t num_counters
+    Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
+  };
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Args, false);
+  return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
+}
+
+Constant *GCOVProfiler::getEndFileFunc() {
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
+}
+
+GlobalVariable *GCOVProfiler::getEdgeStateValue() {
+  GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
+  if (!GV) {
+    GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
+                            GlobalValue::InternalLinkage,
+                            ConstantInt::get(Type::getInt32Ty(*Ctx),
+                                             0xffffffff),
+                            "__llvm_gcov_global_state_pred");
+    GV->setUnnamedAddr(true);
+  }
+  return GV;
+}
+
+void GCOVProfiler::insertCounterWriteout(
+    DebugInfoFinder &DIF,
+    SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) {
+  const FunctionType *WriteoutFTy =
+      FunctionType::get(Type::getVoidTy(*Ctx), false);
+  Function *WriteoutF = Function::Create(WriteoutFTy,
+                                         GlobalValue::InternalLinkage,
+                                         "__llvm_gcov_writeout", M);
+  WriteoutF->setUnnamedAddr(true);
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF);
+  IRBuilder<> Builder(BB);
+
+  Constant *StartFile = getStartFileFunc();
+  Constant *EmitFunction = getEmitFunctionFunc();
+  Constant *EmitArcs = getEmitArcsFunc();
+  Constant *EndFile = getEndFileFunc();
+
+  for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(),
+           CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) {
+    DICompileUnit compile_unit(*CUI);
+    std::string FilenameGcda = mangleName(compile_unit, "gcda");
+    Builder.CreateCall(StartFile,
+                       Builder.CreateGlobalStringPtr(FilenameGcda));
+    for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
+             I = CountersBySP.begin(), E = CountersBySP.end();
+         I != E; ++I) {
+      DISubprogram SP(I->second);
+      intptr_t ident = reinterpret_cast<intptr_t>(I->second);
+      Builder.CreateCall2(EmitFunction,
+                          ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
+                          Builder.CreateGlobalStringPtr(SP.getName()));
+                                                        
+      GlobalVariable *GV = I->first;
+      unsigned Arcs =
+          cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
+      Builder.CreateCall2(EmitArcs,
+                          ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+                          Builder.CreateConstGEP2_64(GV, 0, 0));
+    }
+    Builder.CreateCall(EndFile);
+  }
+  Builder.CreateRetVoid();
+
+  InsertProfilingShutdownCall(WriteoutF, M);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 000000000000..71adc1ec6de0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,33 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+  initializeEdgeProfilerPass(Registry);
+  initializeOptimalEdgeProfilerPass(Registry);
+  initializePathProfilerPass(Registry);
+  initializeGCOVProfilerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+  initializeInstrumentation(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 000000000000..f76c77e1bdbf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,108 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This module provides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+
+  /// MaximumSpanningTree - A MST implementation.
+  /// The type parameter T determines the type of the nodes of the graph.
+  template <typename T>
+  class MaximumSpanningTree {
+
+    // A comparing class for comparing weighted edges.
+    template <typename CT>
+    struct EdgeWeightCompare {
+      bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, 
+                      typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
+        if (X.second > Y.second) return true;
+        if (X.second < Y.second) return false;
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
+        return false;
+      }
+    };
+
+  public:
+    typedef std::pair<const T*, const T*> Edge;
+    typedef std::pair<Edge, double> EdgeWeight;
+    typedef std::vector<EdgeWeight> EdgeWeights;
+  protected:
+    typedef std::vector<Edge> MaxSpanTree;
+
+    MaxSpanTree MST;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+    /// spanning tree.
+    MaximumSpanningTree(EdgeWeights &EdgeVector) {
+
+      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+
+      // Create spanning tree, Forest contains a special data structure
+      // that makes checking if two nodes are already in a common (sub-)tree
+      // fast and cheap.
+      EquivalenceClasses<const T*> Forest;
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        Forest.insert(e.first);
+        Forest.insert(e.second);
+      }
+
+      // Iterate over the sorted edges, biggest first.
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+          Forest.unionSets(e.first, e.second);
+          // So we know now that the edge is not already in a subtree, so we push
+          // the edge to the MST.
+          MST.push_back(e);
+        }
+      }
+    }
+
+    typename MaxSpanTree::iterator begin() {
+      return MST.begin();
+    }
+
+    typename MaxSpanTree::iterator end() {
+      return MST.end();
+    }
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
new file mode 100644
index 000000000000..e09f882aa323
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -0,0 +1,225 @@
+//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "MaximumSpanningTree.h"
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+  class OptimalEdgeProfiler : public ModulePass {
+    bool runOnModule(Module &M);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    OptimalEdgeProfiler() : ModulePass(ID) {
+      initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(ProfileEstimatorPassID);
+      AU.addRequired<ProfileInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Optimal Edge Profiler";
+    }
+  };
+}
+
+char OptimalEdgeProfiler::ID = 0;
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+                "Insert optimal instrumentation for edge profiling",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+                "Insert optimal instrumentation for edge profiling",
+                false, false)
+
+ModulePass *llvm::createOptimalEdgeProfilerPass() {
+  return new OptimalEdgeProfiler();
+}
+
+inline static void printEdgeCounter(ProfileInfo::Edge e,
+                                    BasicBlock* b,
+                                    unsigned i) {
+  DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
+               << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+}
+
+bool OptimalEdgeProfiler::runOnModule(Module &M) {
+  Function *Main = M.getFunction("main");
+  if (Main == 0) {
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
+    return false;  // No main, no instrumentation!
+  }
+
+  // NumEdges counts all the edges that may be instrumented. Later on its
+  // decided which edges to actually instrument, to achieve optimal profiling.
+  // For the entry block a virtual edge (0,entry) is reserved, for each block
+  // with no successors an edge (BB,0) is reserved. These edges are necessary
+  // to calculate a truly optimal maximum spanning tree and thus an optimal
+  // instrumentation.
+  unsigned NumEdges = 0;
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Keep track of which blocks need to be instrumented.  We don't want to
+      // instrument blocks that are added as the result of breaking critical
+      // edges!
+      if (BB->getTerminator()->getNumSuccessors() == 0) {
+        // Reserve space for (BB,0) edge.
+        ++NumEdges;
+      } else {
+        NumEdges += BB->getTerminator()->getNumSuccessors();
+      }
+    }
+  }
+
+  // In the profiling output a counter for each edge is reserved, but only few
+  // are used. This is done to be able to read back in the profile without
+  // calulating the maximum spanning tree again, instead each edge counter that
+  // is not used is initialised with -1 to signal that this edge counter has to
+  // be calculated from other edge counters on reading the profile info back
+  // in.
+
+  const Type *Int32 = Type::getInt32Ty(M.getContext());
+  const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+  GlobalVariable *Counters =
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "OptEdgeProfCounters");
+  NumEdgesInserted = 0;
+
+  std::vector<Constant*> Initializer(NumEdges);
+  Constant *Zero = ConstantInt::get(Int32, 0);
+  Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+
+  // Instrument all of the edges not in MST...
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n");
+
+    // Calculate a Maximum Spanning Tree with the edge weights determined by
+    // ProfileEstimator. ProfileEstimator also assign weights to the virtual
+    // edges (0,entry) and (BB,0) (for blocks with no successors) and this
+    // edges also participate in the maximum spanning tree calculation.
+    // The third parameter of MaximumSpanningTree() has the effect that not the
+    // actual MST is returned but the edges _not_ in the MST.
+
+    ProfileInfo::EdgeWeights ECs =
+      getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
+    std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
+    MaximumSpanningTree<BasicBlock> MST(EdgeVector);
+    std::stable_sort(MST.begin(), MST.end());
+
+    // Check if (0,entry) not in the MST. If not, instrument edge
+    // (IncrementCounterInBlock()) and set the counter initially to zero, if
+    // the edge is in the MST the counter is initialised to -1.
+
+    BasicBlock *entry = &(F->getEntryBlock());
+    ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
+    if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+      printEdgeCounter(edge, entry, i);
+      IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
+      Initializer[i++] = (Zero);
+    } else{
+      Initializer[i++] = (Uncounted);
+    }
+
+    // InsertedBlocks contains all blocks that were inserted for splitting an
+    // edge, this blocks do not have to be instrumented.
+    DenseSet<BasicBlock*> InsertedBlocks;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Check if block was not inserted and thus does not have to be
+      // instrumented.
+      if (InsertedBlocks.count(BB)) continue;
+
+      // Okay, we have to add a counter of each outgoing edge not in MST. If
+      // the outgoing edge is not critical don't split it, just insert the
+      // counter in the source or destination of the edge. Also, if the block
+      // has no successors, the virtual edge (BB,0) is processed.
+      TerminatorInst *TI = BB->getTerminator();
+      if (TI->getNumSuccessors() == 0) {
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+          printEdgeCounter(edge, BB, i);
+          IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+          Initializer[i++] = (Zero);
+        } else{
+          Initializer[i++] = (Uncounted);
+        }
+      }
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+        BasicBlock *Succ = TI->getSuccessor(s);
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+
+          // If the edge is critical, split it.
+          bool wasInserted = SplitCriticalEdge(TI, s, this);
+          Succ = TI->getSuccessor(s);
+          if (wasInserted)
+            InsertedBlocks.insert(Succ);
+
+          // Okay, we are guaranteed that the edge is no longer critical.  If
+          // we only have a single successor, insert the counter in this block,
+          // otherwise insert it in the successor block.
+          if (TI->getNumSuccessors() == 1) {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge, BB, i);
+            IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+          } else {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge, Succ, i);
+            IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
+          }
+          Initializer[i++] = (Zero);
+        } else {
+          Initializer[i++] = (Uncounted);
+        }
+      }
+    }
+  }
+
+  // Check if the number of edges counted at first was the number of edges we
+  // considered for instrumentation.
+  assert(i == NumEdges && "the number of edges in counting array is wrong");
+
+  // Assign the now completely defined initialiser to the array.
+  Constant *init = ConstantArray::get(ATy, Initializer);
+  Counters->setInitializer(init);
+
+  // Add the initialization call to main.
+  InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
new file mode 100644
index 000000000000..75416637db4f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1425 @@
+//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments functions for Ball-Larus path profiling.  Ball-Larus
+// profiling converts the CFG into a DAG by replacing backedges with edges
+// from entry to the start block and from the end block to exit.  The paths
+// along the new DAG are enumrated, i.e. each path is given a path number.
+// Edges are instrumented to increment the path number register, such that the
+// path number register will equal the path number of the path taken at the
+// exit.
+//
+// This file defines classes for building a CFG for use with different stages
+// in the Ball-Larus path profiling instrumentation [Ball96].  The
+// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
+// numbering, finding a spanning tree, moving increments from the spanning
+// tree to chords.
+//
+// Terms:
+// DAG            - Directed Acyclic Graph.
+// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
+//                  removed in the following manner.  For every backedge
+//                  v->w, insert edge ENTRY->w and edge v->EXIT.
+// Path Number    - The number corresponding to a specific path through a
+//                  Ball-Larus DAG.
+// Spanning Tree  - A subgraph, S, is a spanning tree if S covers all
+//                  vertices and is a tree.
+// Chord          - An edge not in the spanning tree.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+// [Ball94]
+//  Thomas Ball.  "Efficiently Counting Program Events with Support for
+//  On-line queries."
+//  ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
+//  September 1994, Pages 1399-1410.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-path-profiling"
+
+#include "llvm/DerivedTypes.h"
+#include "ProfilingUtils.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <vector>
+
+#define HASH_THRESHHOLD 100000
+
+using namespace llvm;
+
+namespace {
+class BLInstrumentationNode;
+class BLInstrumentationEdge;
+class BLInstrumentationDag;
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationNode extends BallLarusNode with member used by the
+// instrumentation algortihms.
+// ---------------------------------------------------------------------------
+class BLInstrumentationNode : public BallLarusNode {
+public:
+  // Creates a new BLInstrumentationNode from a BasicBlock.
+  BLInstrumentationNode(BasicBlock* BB);
+
+  // Get/sets the Value corresponding to the pathNumber register,
+  // constant or phinode.  Used by the instrumentation code to remember
+  // path number Values.
+  Value* getStartingPathNumber();
+  void setStartingPathNumber(Value* pathNumber);
+
+  Value* getEndingPathNumber();
+  void setEndingPathNumber(Value* pathNumber);
+
+  // Get/set the PHINode Instruction for this node.
+  PHINode* getPathPHI();
+  void setPathPHI(PHINode* pathPHI);
+
+private:
+
+  Value* _startingPathNumber; // The Value for the current pathNumber.
+  Value* _endingPathNumber; // The Value for the current pathNumber.
+  PHINode* _pathPHI; // The PHINode for current pathNumber.
+};
+
+// --------------------------------------------------------------------------
+// BLInstrumentationEdge extends BallLarusEdge with data about the
+// instrumentation that will end up on each edge.
+// --------------------------------------------------------------------------
+class BLInstrumentationEdge : public BallLarusEdge {
+public:
+  BLInstrumentationEdge(BLInstrumentationNode* source,
+                        BLInstrumentationNode* target);
+
+  // Sets the target node of this edge.  Required to split edges.
+  void setTarget(BallLarusNode* node);
+
+  // Get/set whether edge is in the spanning tree.
+  bool isInSpanningTree() const;
+  void setIsInSpanningTree(bool isInSpanningTree);
+
+  // Get/ set whether this edge will be instrumented with a path number
+  // initialization.
+  bool isInitialization() const;
+  void setIsInitialization(bool isInitialization);
+
+  // Get/set whether this edge will be instrumented with a path counter
+  // increment.  Notice this is incrementing the path counter
+  // corresponding to the path number register.  The path number
+  // increment is determined by getIncrement().
+  bool isCounterIncrement() const;
+  void setIsCounterIncrement(bool isCounterIncrement);
+
+  // Get/set the path number increment that this edge will be instrumented
+  // with.  This is distinct from the path counter increment and the
+  // weight.  The counter increment counts the number of executions of
+  // some path, whereas the path number keeps track of which path number
+  // the program is on.
+  long getIncrement() const;
+  void setIncrement(long increment);
+
+  // Get/set whether the edge has been instrumented.
+  bool hasInstrumentation();
+  void setHasInstrumentation(bool hasInstrumentation);
+
+  // Returns the successor number of this edge in the source.
+  unsigned getSuccessorNumber();
+
+private:
+  // The increment that the code will be instrumented with.
+  long long _increment;
+
+  // Whether this edge is in the spanning tree.
+  bool _isInSpanningTree;
+
+  // Whether this edge is an initialiation of the path number.
+  bool _isInitialization;
+
+  // Whether this edge is a path counter increment.
+  bool _isCounterIncrement;
+
+  // Whether this edge has been instrumented.
+  bool _hasInstrumentation;
+};
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationDag extends BallLarusDag with algorithms that
+// determine where instrumentation should be placed.
+// ---------------------------------------------------------------------------
+class BLInstrumentationDag : public BallLarusDag {
+public:
+  BLInstrumentationDag(Function &F);
+
+  // Returns the Exit->Root edge. This edge is required for creating
+  // directed cycles in the algorithm for moving instrumentation off of
+  // the spanning tree
+  BallLarusEdge* getExitRootEdge();
+
+  // Returns an array of phony edges which mark those nodes
+  // with function calls
+  BLEdgeVector getCallPhonyEdges();
+
+  // Gets/sets the path counter array
+  GlobalVariable* getCounterArray();
+  void setCounterArray(GlobalVariable* c);
+
+  // Calculates the increments for the chords, thereby removing
+  // instrumentation from the spanning tree edges. Implementation is based
+  // on the algorithm in Figure 4 of [Ball94]
+  void calculateChordIncrements();
+
+  // Updates the state when an edge has been split
+  void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
+
+  // Calculates a spanning tree of the DAG ignoring cycles.  Whichever
+  // edges are in the spanning tree will not be instrumented, but this
+  // implementation does not try to minimize the instrumentation overhead
+  // by trying to find hot edges.
+  void calculateSpanningTree();
+
+  // Pushes initialization further down in order to group the first
+  // increment and initialization.
+  void pushInitialization();
+
+  // Pushes the path counter increments up in order to group the last path
+  // number increment.
+  void pushCounters();
+
+  // Removes phony edges from the successor list of the source, and the
+  // predecessor list of the target.
+  void unlinkPhony();
+
+  // Generate dot graph for the function
+  void generateDotGraph();
+
+protected:
+  // BLInstrumentationDag creates BLInstrumentationNode objects in this
+  // method overriding the creation of BallLarusNode objects.
+  //
+  // Allows subclasses to determine which type of Node is created.
+  // Override this method to produce subclasses of BallLarusNode if
+  // necessary.
+  virtual BallLarusNode* createNode(BasicBlock* BB);
+
+  // BLInstrumentationDag create BLInstrumentationEdges.
+  //
+  // Allows subclasses to determine which type of Edge is created.
+  // Override this method to produce subclasses of BallLarusEdge if
+  // necessary.  Parameters source and target will have been created by
+  // createNode and can be cast to the subclass of BallLarusNode*
+  // returned by createNode.
+  virtual BallLarusEdge* createEdge(
+    BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
+
+private:
+  BLEdgeVector _treeEdges; // All edges in the spanning tree.
+  BLEdgeVector _chordEdges; // All edges not in the spanning tree.
+  GlobalVariable* _counterArray; // Array to store path counters
+
+  // Removes the edge from the appropriate predecessor and successor lists.
+  void unlinkEdge(BallLarusEdge* edge);
+
+  // Makes an edge part of the spanning tree.
+  void makeEdgeSpanning(BLInstrumentationEdge* edge);
+
+  // Pushes initialization and calls itself recursively.
+  void pushInitializationFromEdge(BLInstrumentationEdge* edge);
+
+  // Pushes path counter increments up recursively.
+  void pushCountersFromEdge(BLInstrumentationEdge* edge);
+
+  // Depth first algorithm for determining the chord increments.f
+  void calculateChordIncrementsDfs(
+    long weight, BallLarusNode* v, BallLarusEdge* e);
+
+  // Determines the relative direction of two edges.
+  int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
+};
+
+// ---------------------------------------------------------------------------
+// PathProfiler is a module pass which instruments path profiling instructions
+// ---------------------------------------------------------------------------
+class PathProfiler : public ModulePass {
+private:
+  // Current context for multi threading support.
+  LLVMContext* Context;
+
+  // Which function are we currently instrumenting
+  unsigned currentFunctionNumber;
+
+  // The function prototype in the profiling runtime for incrementing a
+  // single path counter in a hash table.
+  Constant* llvmIncrementHashFunction;
+  Constant* llvmDecrementHashFunction;
+
+  // Instruments each function with path profiling.  'main' is instrumented
+  // with code to save the profile to disk.
+  bool runOnModule(Module &M);
+
+  // Analyzes the function for Ball-Larus path profiling, and inserts code.
+  void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
+
+  // Creates an increment constant representing incr.
+  ConstantInt* createIncrementConstant(long incr, int bitsize);
+
+  // Creates an increment constant representing the value in
+  // edge->getIncrement().
+  ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
+
+  // Finds the insertion point after pathNumber in block.  PathNumber may
+  // be NULL.
+  BasicBlock::iterator getInsertionPoint(
+    BasicBlock* block, Value* pathNumber);
+
+  // Inserts source's pathNumber Value* into target.  Target may or may not
+  // have multiple predecessors, and may or may not have its phiNode
+  // initalized.
+  void pushValueIntoNode(
+    BLInstrumentationNode* source, BLInstrumentationNode* target);
+
+  // Inserts source's pathNumber Value* into the appropriate slot of
+  // target's phiNode.
+  void pushValueIntoPHI(
+    BLInstrumentationNode* target, BLInstrumentationNode* source);
+
+  // The Value* in node, oldVal,  is updated with a Value* correspodning to
+  // oldVal + addition.
+  void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
+                             bool atBeginning);
+
+  // Creates a counter increment in the given node.  The Value* in node is
+  // taken as the index into a hash table.
+  void insertCounterIncrement(
+    Value* incValue,
+    BasicBlock::iterator insertPoint,
+    BLInstrumentationDag* dag,
+    bool increment = true);
+
+  // A PHINode is created in the node, and its values initialized to -1U.
+  void preparePHI(BLInstrumentationNode* node);
+
+  // Inserts instrumentation for the given edge
+  //
+  // Pre: The edge's source node has pathNumber set if edge is non zero
+  // path number increment.
+  //
+  // Post: Edge's target node has a pathNumber set to the path number Value
+  // corresponding to the value of the path register after edge's
+  // execution.
+  void insertInstrumentationStartingAt(
+    BLInstrumentationEdge* edge,
+    BLInstrumentationDag* dag);
+
+  // If this edge is a critical edge, then inserts a node at this edge.
+  // This edge becomes the first edge, and a new BallLarusEdge is created.
+  bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
+
+  // Inserts instrumentation according to the marked edges in dag.  Phony
+  // edges must be unlinked from the DAG, but accessible from the
+  // backedges.  Dag must have initializations, path number increments, and
+  // counter increments present.
+  //
+  // Counter storage is created here.
+  void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  PathProfiler() : ModulePass(ID) {
+    initializePathProfilerPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual const char *getPassName() const {
+    return "Path Profiler";
+  }
+};
+} // end anonymous namespace
+
+// Should we print the dot-graphs
+static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
+        cl::desc("Output the path profiling DAG for each function."));
+
+// Register the path profiler as a pass
+char PathProfiler::ID = 0;
+INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
+                "Insert instrumentation for Ball-Larus path profiling",
+                false, false)
+
+ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
+
+namespace llvm {
+  class PathProfilingFunctionTable {};
+
+  // Type for global array storing references to hashes or arrays
+  template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
+                                            xcompile> {
+  public:
+    static const StructType *get(LLVMContext& C) {
+      return( StructType::get(
+                TypeBuilder<types::i<32>, xcompile>::get(C), // type
+                TypeBuilder<types::i<32>, xcompile>::get(C), // array size
+                TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
+                NULL));
+    }
+  };
+
+  typedef TypeBuilder<PathProfilingFunctionTable, true>
+  ftEntryTypeBuilder;
+
+  // BallLarusEdge << operator overloading
+  raw_ostream& operator<<(raw_ostream& os,
+                          const BLInstrumentationEdge& edge)
+      LLVM_ATTRIBUTE_USED;
+  raw_ostream& operator<<(raw_ostream& os,
+                          const BLInstrumentationEdge& edge) {
+    os << "[" << edge.getSource()->getName() << " -> "
+       << edge.getTarget()->getName() << "] init: "
+       << (edge.isInitialization() ? "yes" : "no")
+       << " incr:" << edge.getIncrement() << " cinc: "
+       << (edge.isCounterIncrement() ? "yes" : "no");
+    return(os);
+  }
+}
+
+// Creates a new BLInstrumentationNode from a BasicBlock.
+BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
+  BallLarusNode(BB),
+  _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
+
+// Constructor for BLInstrumentationEdge.
+BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
+                                             BLInstrumentationNode* target)
+  : BallLarusEdge(source, target, 0),
+    _increment(0), _isInSpanningTree(false), _isInitialization(false),
+    _isCounterIncrement(false), _hasInstrumentation(false) {}
+
+// Sets the target node of this edge.  Required to split edges.
+void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
+  _target = node;
+}
+
+// Returns whether this edge is in the spanning tree.
+bool BLInstrumentationEdge::isInSpanningTree() const {
+  return(_isInSpanningTree);
+}
+
+// Sets whether this edge is in the spanning tree.
+void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
+  _isInSpanningTree = isInSpanningTree;
+}
+
+// Returns whether this edge will be instrumented with a path number
+// initialization.
+bool BLInstrumentationEdge::isInitialization() const {
+  return(_isInitialization);
+}
+
+// Sets whether this edge will be instrumented with a path number
+// initialization.
+void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
+  _isInitialization = isInitialization;
+}
+
+// Returns whether this edge will be instrumented with a path counter
+// increment.  Notice this is incrementing the path counter
+// corresponding to the path number register.  The path number
+// increment is determined by getIncrement().
+bool BLInstrumentationEdge::isCounterIncrement() const {
+  return(_isCounterIncrement);
+}
+
+// Sets whether this edge will be instrumented with a path counter
+// increment.
+void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
+  _isCounterIncrement = isCounterIncrement;
+}
+
+// Gets the path number increment that this edge will be instrumented
+// with.  This is distinct from the path counter increment and the
+// weight.  The counter increment is counts the number of executions of
+// some path, whereas the path number keeps track of which path number
+// the program is on.
+long BLInstrumentationEdge::getIncrement() const {
+  return(_increment);
+}
+
+// Set whether this edge will be instrumented with a path number
+// increment.
+void BLInstrumentationEdge::setIncrement(long increment) {
+  _increment = increment;
+}
+
+// True iff the edge has already been instrumented.
+bool BLInstrumentationEdge::hasInstrumentation() {
+  return(_hasInstrumentation);
+}
+
+// Set whether this edge has been instrumented.
+void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
+  _hasInstrumentation = hasInstrumentation;
+}
+
+// Returns the successor number of this edge in the source.
+unsigned BLInstrumentationEdge::getSuccessorNumber() {
+  BallLarusNode* sourceNode = getSource();
+  BallLarusNode* targetNode = getTarget();
+  BasicBlock* source = sourceNode->getBlock();
+  BasicBlock* target = targetNode->getBlock();
+
+  if(source == NULL || target == NULL)
+    return(0);
+
+  TerminatorInst* terminator = source->getTerminator();
+
+        unsigned i;
+  for(i=0; i < terminator->getNumSuccessors(); i++) {
+    if(terminator->getSuccessor(i) == target)
+      break;
+  }
+
+  return(i);
+}
+
+// BLInstrumentationDag constructor initializes a DAG for the given Function.
+BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
+                                                          _counterArray(0) {
+}
+
+// Returns the Exit->Root edge. This edge is required for creating
+// directed cycles in the algorithm for moving instrumentation off of
+// the spanning tree
+BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
+  BLEdgeIterator erEdge = getExit()->succBegin();
+  return(*erEdge);
+}
+
+BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
+  BLEdgeVector callEdges;
+
+  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+       edge != end; edge++ ) {
+    if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
+      callEdges.push_back(*edge);
+  }
+
+  return callEdges;
+}
+
+// Gets the path counter array
+GlobalVariable* BLInstrumentationDag::getCounterArray() {
+  return _counterArray;
+}
+
+void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
+  _counterArray = c;
+}
+
+// Calculates the increment for the chords, thereby removing
+// instrumentation from the spanning tree edges. Implementation is based on
+// the algorithm in Figure 4 of [Ball94]
+void BLInstrumentationDag::calculateChordIncrements() {
+  calculateChordIncrementsDfs(0, getRoot(), NULL);
+
+  BLInstrumentationEdge* chord;
+  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+      end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+    chord = (BLInstrumentationEdge*) *chordEdge;
+    chord->setIncrement(chord->getIncrement() + chord->getWeight());
+  }
+}
+
+// Updates the state when an edge has been split
+void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
+                                       BasicBlock* newBlock) {
+  BallLarusNode* oldTarget = formerEdge->getTarget();
+  BallLarusNode* newNode = addNode(newBlock);
+  formerEdge->setTarget(newNode);
+  newNode->addPredEdge(formerEdge);
+
+  DEBUG(dbgs() << "  Edge split: " << *formerEdge << "\n");
+
+  oldTarget->removePredEdge(formerEdge);
+  BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
+
+  if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
+                        formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
+                newEdge->setType(formerEdge->getType());
+    newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
+    newEdge->setPhonyExit(formerEdge->getPhonyExit());
+    formerEdge->setType(BallLarusEdge::NORMAL);
+                formerEdge->setPhonyRoot(NULL);
+    formerEdge->setPhonyExit(NULL);
+  }
+}
+
+// Calculates a spanning tree of the DAG ignoring cycles.  Whichever
+// edges are in the spanning tree will not be instrumented, but this
+// implementation does not try to minimize the instrumentation overhead
+// by trying to find hot edges.
+void BLInstrumentationDag::calculateSpanningTree() {
+  std::stack<BallLarusNode*> dfsStack;
+
+  for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
+      nodeIt != end; nodeIt++) {
+    (*nodeIt)->setColor(BallLarusNode::WHITE);
+  }
+
+  dfsStack.push(getRoot());
+  while(dfsStack.size() > 0) {
+    BallLarusNode* node = dfsStack.top();
+    dfsStack.pop();
+
+    if(node->getColor() == BallLarusNode::WHITE)
+      continue;
+
+    BallLarusNode* nextNode;
+    bool forward = true;
+    BLEdgeIterator succEnd = node->succEnd();
+
+    node->setColor(BallLarusNode::WHITE);
+    // first iterate over successors then predecessors
+    for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
+        edge != predEnd; edge++) {
+      if(edge == succEnd) {
+        edge = node->predBegin();
+        forward = false;
+      }
+
+      // Ignore split edges
+      if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
+      if(nextNode->getColor() != BallLarusNode::WHITE) {
+        nextNode->setColor(BallLarusNode::WHITE);
+        makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
+      }
+    }
+  }
+
+  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+      edge != end; edge++) {
+    BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
+      // safe since createEdge is overriden
+    if(!instEdge->isInSpanningTree() && (*edge)->getType()
+        != BallLarusEdge::SPLITEDGE)
+      _chordEdges.push_back(instEdge);
+  }
+}
+
+// Pushes initialization further down in order to group the first
+// increment and initialization.
+void BLInstrumentationDag::pushInitialization() {
+  BLInstrumentationEdge* exitRootEdge =
+                (BLInstrumentationEdge*) getExitRootEdge();
+  exitRootEdge->setIsInitialization(true);
+  pushInitializationFromEdge(exitRootEdge);
+}
+
+// Pushes the path counter increments up in order to group the last path
+// number increment.
+void BLInstrumentationDag::pushCounters() {
+  BLInstrumentationEdge* exitRootEdge =
+    (BLInstrumentationEdge*) getExitRootEdge();
+  exitRootEdge->setIsCounterIncrement(true);
+  pushCountersFromEdge(exitRootEdge);
+}
+
+// Removes phony edges from the successor list of the source, and the
+// predecessor list of the target.
+void BLInstrumentationDag::unlinkPhony() {
+  BallLarusEdge* edge;
+
+  for(BLEdgeIterator next = _edges.begin(),
+      end = _edges.end(); next != end; next++) {
+    edge = (*next);
+
+    if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+        edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
+        edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
+      unlinkEdge(edge);
+    }
+  }
+}
+
+// Generate a .dot graph to represent the DAG and pathNumbers
+void BLInstrumentationDag::generateDotGraph() {
+  std::string errorInfo;
+  std::string functionName = getFunction().getNameStr();
+  std::string filename = "pathdag." + functionName + ".dot";
+
+  DEBUG (dbgs() << "Writing '" << filename << "'...\n");
+  raw_fd_ostream dotFile(filename.c_str(), errorInfo);
+
+  if (!errorInfo.empty()) {
+    errs() << "Error opening '" << filename.c_str() <<"' for writing!";
+    errs() << "\n";
+    return;
+  }
+
+  dotFile << "digraph " << functionName << " {\n";
+
+  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+       edge != end; edge++) {
+    std::string sourceName = (*edge)->getSource()->getName();
+    std::string targetName = (*edge)->getTarget()->getName();
+
+    dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
+            << targetName.c_str() << "\" ";
+
+    long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+
+    switch( (*edge)->getType() ) {
+    case BallLarusEdge::NORMAL:
+      dotFile << "[label=" << inc << "] [color=black];\n";
+      break;
+
+    case BallLarusEdge::BACKEDGE:
+      dotFile << "[color=cyan];\n";
+      break;
+
+    case BallLarusEdge::BACKEDGE_PHONY:
+      dotFile << "[label=" << inc
+              << "] [color=blue];\n";
+      break;
+
+    case BallLarusEdge::SPLITEDGE:
+      dotFile << "[color=violet];\n";
+      break;
+
+    case BallLarusEdge::SPLITEDGE_PHONY:
+      dotFile << "[label=" << inc << "] [color=red];\n";
+      break;
+
+    case BallLarusEdge::CALLEDGE_PHONY:
+      dotFile << "[label=" << inc     << "] [color=green];\n";
+      break;
+    }
+  }
+
+  dotFile << "}\n";
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
+  return( new BLInstrumentationNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
+                                                BallLarusNode* target, unsigned edgeNumber) {
+  // One can cast from BallLarusNode to BLInstrumentationNode since createNode
+  // is overriden to produce BLInstrumentationNode.
+  return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
+                                    (BLInstrumentationNode*)target) );
+}
+
+// Sets the Value corresponding to the pathNumber register, constant,
+// or phinode.  Used by the instrumentation code to remember path
+// number Values.
+Value* BLInstrumentationNode::getStartingPathNumber(){
+  return(_startingPathNumber);
+}
+
+// Sets the Value of the pathNumber.  Used by the instrumentation code.
+void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
+  DEBUG(dbgs() << "  SPN-" << getName() << " <-- " << (pathNumber ?
+                                                       pathNumber->getNameStr() : "unused") << "\n");
+  _startingPathNumber = pathNumber;
+}
+
+Value* BLInstrumentationNode::getEndingPathNumber(){
+  return(_endingPathNumber);
+}
+
+void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
+  DEBUG(dbgs() << "  EPN-" << getName() << " <-- "
+        << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+  _endingPathNumber = pathNumber;
+}
+
+// Get the PHINode Instruction for this node.  Used by instrumentation
+// code.
+PHINode* BLInstrumentationNode::getPathPHI() {
+  return(_pathPHI);
+}
+
+// Set the PHINode Instruction for this node.  Used by instrumentation
+// code.
+void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
+  _pathPHI = pathPHI;
+}
+
+// Removes the edge from the appropriate predecessor and successor
+// lists.
+void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
+  if(edge == getExitRootEdge())
+    DEBUG(dbgs() << " Removing exit->root edge\n");
+
+  edge->getSource()->removeSuccEdge(edge);
+  edge->getTarget()->removePredEdge(edge);
+}
+
+// Makes an edge part of the spanning tree.
+void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
+  edge->setIsInSpanningTree(true);
+  _treeEdges.push_back(edge);
+}
+
+// Pushes initialization and calls itself recursively.
+void BLInstrumentationDag::pushInitializationFromEdge(
+  BLInstrumentationEdge* edge) {
+  BallLarusNode* target;
+
+  target = edge->getTarget();
+  if( target->getNumberPredEdges() > 1 || target == getExit() ) {
+    return;
+  } else {
+    for(BLEdgeIterator next = target->succBegin(),
+          end = target->succEnd(); next != end; next++) {
+      BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
+
+      // Skip split edges
+      if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      intoEdge->setIncrement(intoEdge->getIncrement() +
+                             edge->getIncrement());
+      intoEdge->setIsInitialization(true);
+      pushInitializationFromEdge(intoEdge);
+    }
+
+    edge->setIncrement(0);
+    edge->setIsInitialization(false);
+  }
+}
+
+// Pushes path counter increments up recursively.
+void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
+  BallLarusNode* source;
+
+  source = edge->getSource();
+  if(source->getNumberSuccEdges() > 1 || source == getRoot()
+     || edge->isInitialization()) {
+    return;
+  } else {
+    for(BLEdgeIterator previous = source->predBegin(),
+          end = source->predEnd(); previous != end; previous++) {
+      BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
+
+      // Skip split edges
+      if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      fromEdge->setIncrement(fromEdge->getIncrement() +
+                             edge->getIncrement());
+      fromEdge->setIsCounterIncrement(true);
+      pushCountersFromEdge(fromEdge);
+    }
+
+    edge->setIncrement(0);
+    edge->setIsCounterIncrement(false);
+  }
+}
+
+// Depth first algorithm for determining the chord increments.
+void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
+                                                       BallLarusNode* v, BallLarusEdge* e) {
+  BLInstrumentationEdge* f;
+
+  for(BLEdgeIterator treeEdge = _treeEdges.begin(),
+        end = _treeEdges.end(); treeEdge != end; treeEdge++) {
+    f = (BLInstrumentationEdge*) *treeEdge;
+    if(e != f && v == f->getTarget()) {
+      calculateChordIncrementsDfs(
+        calculateChordIncrementsDir(e,f)*(weight) +
+        f->getWeight(), f->getSource(), f);
+    }
+    if(e != f && v == f->getSource()) {
+      calculateChordIncrementsDfs(
+        calculateChordIncrementsDir(e,f)*(weight) +
+        f->getWeight(), f->getTarget(), f);
+    }
+  }
+
+  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+        end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+    f = (BLInstrumentationEdge*) *chordEdge;
+    if(v == f->getSource() || v == f->getTarget()) {
+      f->setIncrement(f->getIncrement() +
+                      calculateChordIncrementsDir(e,f)*weight);
+    }
+  }
+}
+
+// Determines the relative direction of two edges.
+int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
+                                                      BallLarusEdge* f) {
+  if( e == NULL)
+    return(1);
+  else if(e->getSource() == f->getTarget()
+          || e->getTarget() == f->getSource())
+    return(1);
+
+  return(-1);
+}
+
+// Creates an increment constant representing incr.
+ConstantInt* PathProfiler::createIncrementConstant(long incr,
+                                                   int bitsize) {
+  return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
+}
+
+// Creates an increment constant representing the value in
+// edge->getIncrement().
+ConstantInt* PathProfiler::createIncrementConstant(
+  BLInstrumentationEdge* edge) {
+  return(createIncrementConstant(edge->getIncrement(), 32));
+}
+
+// Finds the insertion point after pathNumber in block.  PathNumber may
+// be NULL.
+BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
+                                                     pathNumber) {
+  if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
+     || (((Instruction*)(pathNumber))->getParent()) != block) {
+    return(block->getFirstNonPHI());
+  } else {
+    Instruction* pathNumberInst = (Instruction*) (pathNumber);
+    BasicBlock::iterator insertPoint;
+    BasicBlock::iterator end = block->end();
+
+    for(insertPoint = block->begin();
+        insertPoint != end; insertPoint++) {
+      Instruction* insertInst = &(*insertPoint);
+
+      if(insertInst == pathNumberInst)
+        return(++insertPoint);
+    }
+
+    return(insertPoint);
+  }
+}
+
+// A PHINode is created in the node, and its values initialized to -1U.
+void PathProfiler::preparePHI(BLInstrumentationNode* node) {
+  BasicBlock* block = node->getBlock();
+  BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+  pred_iterator PB = pred_begin(node->getBlock()),
+          PE = pred_end(node->getBlock());
+  PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
+                                 std::distance(PB, PE), "pathNumber",
+                                 insertPoint );
+  node->setPathPHI(phi);
+  node->setStartingPathNumber(phi);
+  node->setEndingPathNumber(phi);
+
+  for(pred_iterator predIt = PB; predIt != PE; predIt++) {
+    BasicBlock* pred = (*predIt);
+
+    if(pred != NULL)
+      phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
+  }
+}
+
+// Inserts source's pathNumber Value* into target.  Target may or may not
+// have multiple predecessors, and may or may not have its phiNode
+// initalized.
+void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
+                                     BLInstrumentationNode* target) {
+  if(target->getBlock() == NULL)
+    return;
+
+
+  if(target->getNumberPredEdges() <= 1) {
+    assert(target->getStartingPathNumber() == NULL &&
+           "Target already has path number");
+    target->setStartingPathNumber(source->getEndingPathNumber());
+    target->setEndingPathNumber(source->getEndingPathNumber());
+    DEBUG(dbgs() << "  Passing path number"
+          << (source->getEndingPathNumber() ? "" : " (null)")
+          << " value through.\n");
+  } else {
+    if(target->getPathPHI() == NULL) {
+      DEBUG(dbgs() << "  Initializing PHI node for block '"
+            << target->getName() << "'\n");
+      preparePHI(target);
+    }
+    pushValueIntoPHI(target, source);
+    DEBUG(dbgs() << "  Passing number value into PHI for block '"
+          << target->getName() << "'\n");
+  }
+}
+
+// Inserts source's pathNumber Value* into the appropriate slot of
+// target's phiNode.
+void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
+                                    BLInstrumentationNode* source) {
+  PHINode* phi = target->getPathPHI();
+  assert(phi != NULL && "  Tried to push value into node with PHI, but node"
+         " actually had no PHI.");
+  phi->removeIncomingValue(source->getBlock(), false);
+  phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
+}
+
+// The Value* in node, oldVal,  is updated with a Value* correspodning to
+// oldVal + addition.
+void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
+                                         Value* addition, bool atBeginning) {
+  BasicBlock* block = node->getBlock();
+  assert(node->getStartingPathNumber() != NULL);
+  assert(node->getEndingPathNumber() != NULL);
+
+  BasicBlock::iterator insertPoint;
+
+  if( atBeginning )
+    insertPoint = block->getFirstNonPHI();
+  else
+    insertPoint = block->getTerminator();
+
+  DEBUG(errs() << "  Creating addition instruction.\n");
+  Value* newpn = BinaryOperator::Create(Instruction::Add,
+                                        node->getStartingPathNumber(),
+                                        addition, "pathNumber", insertPoint);
+
+  node->setEndingPathNumber(newpn);
+
+  if( atBeginning )
+    node->setStartingPathNumber(newpn);
+}
+
+// Creates a counter increment in the given node.  The Value* in node is
+// taken as the index into an array or hash table.  The hash table access
+// is a call to the runtime.
+void PathProfiler::insertCounterIncrement(Value* incValue,
+                                          BasicBlock::iterator insertPoint,
+                                          BLInstrumentationDag* dag,
+                                          bool increment) {
+  // Counter increment for array
+  if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
+    // Get pointer to the array location
+    std::vector<Value*> gepIndices(2);
+    gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+    gepIndices[1] = incValue;
+
+    GetElementPtrInst* pcPointer =
+      GetElementPtrInst::Create(dag->getCounterArray(),
+                                gepIndices.begin(), gepIndices.end(),
+                                "counterInc", insertPoint);
+
+    // Load from the array - call it oldPC
+    LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
+
+    // Test to see whether adding 1 will overflow the counter
+    ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
+                                   createIncrementConstant(0xffffffff, 32),
+                                   "isMax");
+
+    // Select increment for the path counter based on overflow
+    SelectInst* inc =
+      SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
+                          createIncrementConstant(0,32),
+                          "pathInc", insertPoint);
+
+    // newPc = oldPc + inc
+    BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
+                                                   oldPc, inc, "newPC",
+                                                   insertPoint);
+
+    // Store back in to the array
+    new StoreInst(newPc, pcPointer, insertPoint);
+  } else { // Counter increment for hash
+    std::vector<Value*> args(2);
+    args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
+                               currentFunctionNumber);
+    args[1] = incValue;
+
+    CallInst::Create(
+      increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
+      args, "", insertPoint);
+  }
+}
+
+// Inserts instrumentation for the given edge
+//
+// Pre: The edge's source node has pathNumber set if edge is non zero
+// path number increment.
+//
+// Post: Edge's target node has a pathNumber set to the path number Value
+// corresponding to the value of the path register after edge's
+// execution.
+//
+// FIXME: This should be reworked so it's not recursive.
+void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
+                                                   BLInstrumentationDag* dag) {
+  // Mark the edge as instrumented
+  edge->setHasInstrumentation(true);
+  DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
+
+  // create a new node for this edge's instrumentation
+  splitCritical(edge, dag);
+
+  BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
+  BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
+  BLInstrumentationNode* instrumentNode;
+  BLInstrumentationNode* nextSourceNode;
+
+  bool atBeginning = false;
+
+  // Source node has only 1 successor so any information can be simply
+  // inserted in to it without splitting
+  if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
+    DEBUG(dbgs() << "  Potential instructions to be placed in: "
+          << sourceNode->getName() << " (at end)\n");
+    instrumentNode = sourceNode;
+    nextSourceNode = targetNode; // ... since we never made any new nodes
+  }
+
+  // The target node only has one predecessor, so we can safely insert edge
+  // instrumentation into it. If there was splitting, it must have been
+  // successful.
+  else if( targetNode->getNumberPredEdges() == 1 ) {
+    DEBUG(dbgs() << "  Potential instructions to be placed in: "
+          << targetNode->getName() << " (at beginning)\n");
+    pushValueIntoNode(sourceNode, targetNode);
+    instrumentNode = targetNode;
+    nextSourceNode = NULL; // ... otherwise we'll just keep splitting
+    atBeginning = true;
+  }
+
+  // Somehow, splitting must have failed.
+  else {
+    errs() << "Instrumenting could not split a critical edge.\n";
+    DEBUG(dbgs() << "  Couldn't split edge " << (*edge) << ".\n");
+    return;
+  }
+
+  // Insert instrumentation if this is a back or split edge
+  if( edge->getType() == BallLarusEdge::BACKEDGE ||
+      edge->getType() == BallLarusEdge::SPLITEDGE ) {
+    BLInstrumentationEdge* top =
+      (BLInstrumentationEdge*) edge->getPhonyRoot();
+    BLInstrumentationEdge* bottom =
+      (BLInstrumentationEdge*) edge->getPhonyExit();
+
+    assert( top->isInitialization() && " Top phony edge did not"
+            " contain a path number initialization.");
+    assert( bottom->isCounterIncrement() && " Bottom phony edge"
+            " did not contain a path counter increment.");
+
+    // split edge has yet to be initialized
+    if( !instrumentNode->getEndingPathNumber() ) {
+      instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
+      instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
+    }
+
+    BasicBlock::iterator insertPoint = atBeginning ?
+      instrumentNode->getBlock()->getFirstNonPHI() :
+      instrumentNode->getBlock()->getTerminator();
+
+    // add information from the bottom edge, if it exists
+    if( bottom->getIncrement() ) {
+      Value* newpn =
+        BinaryOperator::Create(Instruction::Add,
+                               instrumentNode->getStartingPathNumber(),
+                               createIncrementConstant(bottom),
+                               "pathNumber", insertPoint);
+      instrumentNode->setEndingPathNumber(newpn);
+    }
+
+    insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                           insertPoint, dag);
+
+    if( atBeginning )
+      instrumentNode->setStartingPathNumber(createIncrementConstant(top));
+
+    instrumentNode->setEndingPathNumber(createIncrementConstant(top));
+
+    // Check for path counter increments
+    if( top->isCounterIncrement() ) {
+      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                             instrumentNode->getBlock()->getTerminator(),dag);
+      instrumentNode->setEndingPathNumber(0);
+    }
+  }
+
+  // Insert instrumentation if this is a normal edge
+  else {
+    BasicBlock::iterator insertPoint = atBeginning ?
+      instrumentNode->getBlock()->getFirstNonPHI() :
+      instrumentNode->getBlock()->getTerminator();
+
+    if( edge->isInitialization() ) { // initialize path number
+      instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
+    } else if( edge->getIncrement() )       {// increment path number
+      Value* newpn =
+        BinaryOperator::Create(Instruction::Add,
+                               instrumentNode->getStartingPathNumber(),
+                               createIncrementConstant(edge),
+                               "pathNumber", insertPoint);
+      instrumentNode->setEndingPathNumber(newpn);
+
+      if( atBeginning )
+        instrumentNode->setStartingPathNumber(newpn);
+    }
+
+    // Check for path counter increments
+    if( edge->isCounterIncrement() ) {
+      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                             insertPoint, dag);
+      instrumentNode->setEndingPathNumber(0);
+    }
+  }
+
+  // Push it along
+  if (nextSourceNode && instrumentNode->getEndingPathNumber())
+    pushValueIntoNode(instrumentNode, nextSourceNode);
+
+  // Add all the successors
+  for( BLEdgeIterator next = targetNode->succBegin(),
+         end = targetNode->succEnd(); next != end; next++ ) {
+    // So long as it is un-instrumented, add it to the list
+    if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
+      insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
+    else
+      DEBUG(dbgs() << "  Edge " << *(BLInstrumentationEdge*)(*next)
+            << " already instrumented.\n");
+  }
+}
+
+// Inserts instrumentation according to the marked edges in dag.  Phony edges
+// must be unlinked from the DAG, but accessible from the backedges.  Dag
+// must have initializations, path number increments, and counter increments
+// present.
+//
+// Counter storage is created here.
+void PathProfiler::insertInstrumentation(
+  BLInstrumentationDag& dag, Module &M) {
+
+  BLInstrumentationEdge* exitRootEdge =
+    (BLInstrumentationEdge*) dag.getExitRootEdge();
+  insertInstrumentationStartingAt(exitRootEdge, &dag);
+
+  // Iterate through each call edge and apply the appropriate hash increment
+  // and decrement functions
+  BLEdgeVector callEdges = dag.getCallPhonyEdges();
+  for( BLEdgeIterator edge = callEdges.begin(),
+         end = callEdges.end(); edge != end; edge++ ) {
+    BLInstrumentationNode* node =
+      (BLInstrumentationNode*)(*edge)->getSource();
+    BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+
+    // Find the first function call
+    while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
+      insertPoint++;
+
+    DEBUG(dbgs() << "\nInstrumenting method call block '"
+          << node->getBlock()->getNameStr() << "'\n");
+    DEBUG(dbgs() << "   Path number initialized: "
+          << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+
+    Value* newpn;
+    if( node->getStartingPathNumber() ) {
+      long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+      if ( inc )
+        newpn = BinaryOperator::Create(Instruction::Add,
+                                       node->getStartingPathNumber(),
+                                       createIncrementConstant(inc,32),
+                                       "pathNumber", insertPoint);
+      else
+        newpn = node->getStartingPathNumber();
+    } else {
+      newpn = (Value*)createIncrementConstant(
+        ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
+    }
+
+    insertCounterIncrement(newpn, insertPoint, &dag);
+    insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
+                           &dag, false);
+  }
+}
+
+// Entry point of the module
+void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
+                                 Function &F, Module &M) {
+  // Build DAG from CFG
+  BLInstrumentationDag dag = BLInstrumentationDag(F);
+  dag.init();
+
+  // give each path a unique integer value
+  dag.calculatePathNumbers();
+
+  // modify path increments to increase the efficiency
+  // of instrumentation
+  dag.calculateSpanningTree();
+  dag.calculateChordIncrements();
+  dag.pushInitialization();
+  dag.pushCounters();
+  dag.unlinkPhony();
+
+  // potentially generate .dot graph for the dag
+  if (DotPathDag)
+    dag.generateDotGraph ();
+
+  // Should we store the information in an array or hash
+  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
+    const Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+                                   dag.getNumberOfPaths());
+
+    dag.setCounterArray(new GlobalVariable(M, t, false,
+                                           GlobalValue::InternalLinkage,
+                                           Constant::getNullValue(t), ""));
+  }
+
+  insertInstrumentation(dag, M);
+
+  // Add to global function reference table
+  unsigned type;
+  const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+
+  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
+    type = ProfilingArray;
+  else
+    type = ProfilingHash;
+
+  std::vector<Constant*> entryArray(3);
+  entryArray[0] = createIncrementConstant(type,32);
+  entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
+  entryArray[2] = dag.getCounterArray() ?
+    ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
+    Constant::getNullValue(voidPtr);
+
+  const StructType* at = ftEntryTypeBuilder::get(*Context);
+  ConstantStruct* functionEntry =
+    (ConstantStruct*)ConstantStruct::get(at, entryArray);
+  ftInit.push_back(functionEntry);
+}
+
+// Output the bitcode if we want to observe instrumentation changess
+#define PRINT_MODULE dbgs() <<                               \
+  "\n\n============= MODULE BEGIN ===============\n" << M << \
+  "\n============== MODULE END ================\n"
+
+bool PathProfiler::runOnModule(Module &M) {
+  Context = &M.getContext();
+
+  DEBUG(dbgs()
+        << "****************************************\n"
+        << "****************************************\n"
+        << "**                                    **\n"
+        << "**   PATH PROFILING INSTRUMENTATION   **\n"
+        << "**                                    **\n"
+        << "****************************************\n"
+        << "****************************************\n");
+
+  // No main, no instrumentation!
+  Function *Main = M.getFunction("main");
+
+  // Using fortran? ... this kind of works
+  if (!Main)
+    Main = M.getFunction("MAIN__");
+
+  if (!Main) {
+    errs() << "WARNING: cannot insert path profiling into a module"
+           << " with no main function!\n";
+    return false;
+  }
+
+  llvmIncrementHashFunction = M.getOrInsertFunction(
+    "llvm_increment_path_count",
+    Type::getVoidTy(*Context), // return type
+    Type::getInt32Ty(*Context), // function number
+    Type::getInt32Ty(*Context), // path number
+    NULL );
+
+  llvmDecrementHashFunction = M.getOrInsertFunction(
+    "llvm_decrement_path_count",
+    Type::getVoidTy(*Context), // return type
+    Type::getInt32Ty(*Context), // function number
+    Type::getInt32Ty(*Context), // path number
+    NULL );
+
+  std::vector<Constant*> ftInit;
+  unsigned functionNumber = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+    if (F->isDeclaration())
+      continue;
+
+    DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+    functionNumber++;
+
+    // set function number
+    currentFunctionNumber = functionNumber;
+    runOnFunction(ftInit, *F, M);
+  }
+
+  const Type *t = ftEntryTypeBuilder::get(*Context);
+  const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+  Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
+
+  DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
+
+  GlobalVariable* functionTable =
+    new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
+                       ftInitConstant, "functionPathTable");
+  const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+  InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
+                          PointerType::getUnqual(eltType));
+
+  DEBUG(PRINT_MODULE);
+
+  return true;
+}
+
+// If this edge is a critical edge, then inserts a node at this edge.
+// This edge becomes the first edge, and a new BallLarusEdge is created.
+// Returns true if the edge was split
+bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
+                                 BLInstrumentationDag* dag) {
+  unsigned succNum = edge->getSuccessorNumber();
+  BallLarusNode* sourceNode = edge->getSource();
+  BallLarusNode* targetNode = edge->getTarget();
+  BasicBlock* sourceBlock = sourceNode->getBlock();
+  BasicBlock* targetBlock = targetNode->getBlock();
+
+  if(sourceBlock == NULL || targetBlock == NULL
+     || sourceNode->getNumberSuccEdges() <= 1
+     || targetNode->getNumberPredEdges() == 1 ) {
+    return(false);
+  }
+
+  TerminatorInst* terminator = sourceBlock->getTerminator();
+
+  if( SplitCriticalEdge(terminator, succNum, this, false)) {
+    BasicBlock* newBlock = terminator->getSuccessor(succNum);
+    dag->splitUpdate(edge, newBlock);
+    return(true);
+  } else
+    return(false);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
new file mode 100644
index 000000000000..445a5b6f6074
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -0,0 +1,170 @@
+//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a few helper functions which are used by profile
+// instrumentation code to instrument the code.  This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+
+void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
+                                   GlobalValue *Array,
+                                   PointerType *arrayType) {
+  LLVMContext &Context = MainFn->getContext();
+  const Type *ArgVTy =
+    PointerType::getUnqual(Type::getInt8PtrTy(Context));
+  const PointerType *UIntPtr = arrayType ? arrayType :
+    Type::getInt32PtrTy(Context);
+  Module &M = *MainFn->getParent();
+  Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
+                                           Type::getInt32Ty(Context),
+                                           ArgVTy, UIntPtr,
+                                           Type::getInt32Ty(Context),
+                                           (Type *)0);
+
+  // This could force argc and argv into programs that wouldn't otherwise have
+  // them, but instead we just pass null values in.
+  std::vector<Value*> Args(4);
+  Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+  Args[1] = Constant::getNullValue(ArgVTy);
+
+  // Skip over any allocas in the entry block.
+  BasicBlock *Entry = MainFn->begin();
+  BasicBlock::iterator InsertPos = Entry->begin();
+  while (isa<AllocaInst>(InsertPos)) ++InsertPos;
+
+  std::vector<Constant*> GEPIndices(2,
+                             Constant::getNullValue(Type::getInt32Ty(Context)));
+  unsigned NumElements = 0;
+  if (Array) {
+    Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
+                                             GEPIndices.size());
+    NumElements =
+      cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
+  } else {
+    // If this profiling instrumentation doesn't have a constant array, just
+    // pass null.
+    Args[2] = ConstantPointerNull::get(UIntPtr);
+  }
+  Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
+
+  CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
+
+  // If argc or argv are not available in main, just pass null values in.
+  Function::arg_iterator AI;
+  switch (MainFn->arg_size()) {
+  default:
+  case 2:
+    AI = MainFn->arg_begin(); ++AI;
+    if (AI->getType() != ArgVTy) {
+      Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+                                                            false);
+      InitCall->setArgOperand(1,
+          CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
+    } else {
+      InitCall->setArgOperand(1, AI);
+    }
+    /* FALL THROUGH */
+
+  case 1:
+    AI = MainFn->arg_begin();
+    // If the program looked at argc, have it look at the return value of the
+    // init call instead.
+    if (!AI->getType()->isIntegerTy(32)) {
+      Instruction::CastOps opcode;
+      if (!AI->use_empty()) {
+        opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
+        AI->replaceAllUsesWith(
+          CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
+      }
+      opcode = CastInst::getCastOpcode(AI, true,
+                                       Type::getInt32Ty(Context), true);
+      InitCall->setArgOperand(0,
+          CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
+                           "argc.cast", InitCall));
+    } else {
+      AI->replaceAllUsesWith(InitCall);
+      InitCall->setArgOperand(0, AI);
+    }
+
+  case 0: break;
+  }
+}
+
+void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+                                   GlobalValue *CounterArray, bool beginning) {
+  // Insert the increment after any alloca or PHI instructions...
+  BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+                                   BB->getTerminator();
+  while (isa<AllocaInst>(InsertPos))
+    ++InsertPos;
+
+  LLVMContext &Context = BB->getContext();
+
+  // Create the getelementptr constant expression
+  std::vector<Constant*> Indices(2);
+  Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
+  Constant *ElementPtr =
+    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
+
+  // Load, increment and store the value back.
+  Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
+  Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
+                                 ConstantInt::get(Type::getInt32Ty(Context), 1),
+                                         "NewFuncCounter", InsertPos);
+  new StoreInst(NewVal, ElementPtr, InsertPos);
+}
+
+void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
+  // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
+  // types.
+  Type *GlobalDtorElems[2] = {
+    Type::getInt32Ty(Mod->getContext()),
+    FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
+  };
+  const StructType *GlobalDtorElemTy =
+      StructType::get(Mod->getContext(), GlobalDtorElems, false);
+
+  // Construct the new element we'll be adding.
+  Constant *Elem[2] = {
+    ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
+    ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
+  };
+
+  // If llvm.global_dtors exists, make a copy of the things in its list and
+  // delete it, to replace it with one that has a larger array type.
+  std::vector<Constant *> dtors;
+  if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
+    if (ConstantArray *InitList =
+        dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
+      for (unsigned i = 0, e = InitList->getType()->getNumElements();
+           i != e; ++i)
+        dtors.push_back(cast<Constant>(InitList->getOperand(i)));
+    }
+    GlobalDtors->eraseFromParent();
+  }
+
+  // Build up llvm.global_dtors with our new item in it.
+  GlobalVariable *GlobalDtors = new GlobalVariable(
+      *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
+      GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
+                                    
+  dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
+  GlobalDtors->setInitializer(ConstantArray::get(
+      cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
new file mode 100644
index 000000000000..09b22171ff04
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -0,0 +1,36 @@
+//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a few helper functions which are used by profile
+// instrumentation code to instrument the code.  This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROFILINGUTILS_H
+#define PROFILINGUTILS_H
+
+namespace llvm {
+  class BasicBlock;
+  class Function;
+  class GlobalValue;
+  class Module;
+  class PointerType;
+
+  void InsertProfilingInitCall(Function *MainFn, const char *FnName,
+                               GlobalValue *Arr = 0,
+                               PointerType *arrayType = 0);
+  void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+                               GlobalValue *CounterArray,
+                               bool beginning = true);
+  void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
new file mode 100644
index 000000000000..a5adb5e7cefe
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -0,0 +1,97 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Aggressive Dead Code Elimination pass.  This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not 
+// catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "adce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+  struct ADCE : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ADCE() : FunctionPass(ID) {
+      initializeADCEPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual bool runOnFunction(Function& F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+      AU.setPreservesCFG();
+    }
+    
+  };
+}
+
+char ADCE::ID = 0;
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
+
+bool ADCE::runOnFunction(Function& F) {
+  SmallPtrSet<Instruction*, 128> alive;
+  SmallVector<Instruction*, 128> worklist;
+  
+  // Collect the set of "root" instructions that are known live.
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    if (isa<TerminatorInst>(I.getInstructionIterator()) ||
+        isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+        I->mayHaveSideEffects()) {
+      alive.insert(I.getInstructionIterator());
+      worklist.push_back(I.getInstructionIterator());
+    }
+  
+  // Propagate liveness backwards to operands.
+  while (!worklist.empty()) {
+    Instruction* curr = worklist.pop_back_val();
+    
+    for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
+         OI != OE; ++OI)
+      if (Instruction* Inst = dyn_cast<Instruction>(OI))
+        if (alive.insert(Inst))
+          worklist.push_back(Inst);
+  }
+  
+  // The inverse of the live set is the dead set.  These are those instructions
+  // which have no side effects and do not influence the control flow or return
+  // value of the function, and may therefore be deleted safely.
+  // NOTE: We reuse the worklist vector here for memory efficiency.
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    if (!alive.count(I.getInstructionIterator())) {
+      worklist.push_back(I.getInstructionIterator());
+      I->dropAllReferences();
+    }
+  
+  for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+       E = worklist.end(); I != E; ++I) {
+    ++NumRemoved;
+    (*I)->eraseFromParent();
+  }
+
+  return !worklist.empty();
+}
+
+FunctionPass *llvm::createAggressiveDCEPass() {
+  return new ADCE();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
new file mode 100644
index 000000000000..cee550265622
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -0,0 +1,152 @@
+//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a very simple profile guided basic block placement
+// algorithm.  The idea is to put frequently executed blocks together at the
+// start of the function, and hopefully increase the number of fall-through
+// conditional branches.  If there is no profile information for a particular
+// function, this pass basically orders blocks in depth-first order
+//
+// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+// Positioning" by Pettis and Hansen, except that it uses basic block counts
+// instead of edge counts.  This should be improved in many ways, but is very
+// simple for now.
+//
+// Basically we "place" the entry block, then loop over all successors in a DFO,
+// placing the most frequently executed successor until we run out of blocks.  I
+// told you this was _extremely_ simplistic. :) This is also much slower than it
+// could be.  When it becomes important, this pass will be rewritten to use a
+// better algorithm, and then we can worry about efficiency.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Scalar.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumMoved, "Number of basic blocks moved");
+
+namespace {
+  struct BlockPlacement : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    BlockPlacement() : FunctionPass(ID) {
+      initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<ProfileInfo>();
+      //AU.addPreserved<ProfileInfo>();  // Does this work?
+    }
+  private:
+    /// PI - The profile information that is guiding us.
+    ///
+    ProfileInfo *PI;
+
+    /// NumMovedBlocks - Every time we move a block, increment this counter.
+    ///
+    unsigned NumMovedBlocks;
+
+    /// PlacedBlocks - Every time we place a block, remember it so we don't get
+    /// into infinite loops.
+    std::set<BasicBlock*> PlacedBlocks;
+
+    /// InsertPos - This an iterator to the next place we want to insert a
+    /// block.
+    Function::iterator InsertPos;
+
+    /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+    /// successors.
+    void PlaceBlocks(BasicBlock *BB);
+  };
+}
+
+char BlockPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
+
+FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
+
+bool BlockPlacement::runOnFunction(Function &F) {
+  PI = &getAnalysis<ProfileInfo>();
+
+  NumMovedBlocks = 0;
+  InsertPos = F.begin();
+
+  // Recursively place all blocks.
+  PlaceBlocks(F.begin());
+
+  PlacedBlocks.clear();
+  NumMoved += NumMovedBlocks;
+  return NumMovedBlocks != 0;
+}
+
+
+/// PlaceBlocks - Recursively place the specified blocks and any unplaced
+/// successors.
+void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+  assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+  PlacedBlocks.insert(BB);
+
+  // Place the specified block.
+  if (&*InsertPos != BB) {
+    // Use splice to move the block into the right place.  This avoids having to
+    // remove the block from the function then readd it, which causes a bunch of
+    // symbol table traffic that is entirely pointless.
+    Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+    Blocks.splice(InsertPos, Blocks, BB);
+
+    ++NumMovedBlocks;
+  } else {
+    // This block is already in the right place, we don't have to do anything.
+    ++InsertPos;
+  }
+
+  // Keep placing successors until we run out of ones to place.  Note that this
+  // loop is very inefficient (N^2) for blocks with many successors, like switch
+  // statements.  FIXME!
+  while (1) {
+    // Okay, now place any unplaced successors.
+    succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+
+    // Scan for the first unplaced successor.
+    for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+      /*empty*/;
+    if (SI == E) return;  // No more successors to place.
+
+    double MaxExecutionCount = PI->getExecutionCount(*SI);
+    BasicBlock *MaxSuccessor = *SI;
+
+    // Scan for more frequently executed successors
+    for (; SI != E; ++SI)
+      if (!PlacedBlocks.count(*SI)) {
+        double Count = PI->getExecutionCount(*SI);
+        if (Count > MaxExecutionCount ||
+            // Prefer to not disturb the code.
+            (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+          MaxExecutionCount = Count;
+          MaxSuccessor = *SI;
+        }
+      }
+
+    // Now that we picked the maximally executed successor, place it.
+    PlaceBlocks(MaxSuccessor);
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
new file mode 100644
index 000000000000..0af14ed17bf6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -0,0 +1,1161 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegenprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+                      "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+                       "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+                          "computations were sunk");
+STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
+
+static cl::opt<bool> DisableBranchOpts(
+  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+  cl::desc("Disable branch optimizations in CodeGenPrepare"));
+
+namespace {
+  class CodeGenPrepare : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// transformation profitability.
+    const TargetLowering *TLI;
+    DominatorTree *DT;
+    ProfileInfo *PFI;
+    
+    /// CurInstIterator - As we scan instructions optimizing them, this is the
+    /// next instruction to optimize.  Xforms that can invalidate this should
+    /// update it.
+    BasicBlock::iterator CurInstIterator;
+
+    /// Keeps track of non-local addresses that have been sunk into a block.
+    /// This allows us to avoid inserting duplicate code for blocks with
+    /// multiple load/stores of the same address.
+    DenseMap<Value*, Value*> SunkAddrs;
+
+    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+    /// be updated.
+    bool ModifiedDT;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit CodeGenPrepare(const TargetLowering *tli = 0)
+      : FunctionPass(ID), TLI(tli) {
+        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+      }
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<ProfileInfo>();
+    }
+
+  private:
+    bool EliminateMostlyEmptyBlocks(Function &F);
+    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+    void EliminateMostlyEmptyBlock(BasicBlock *BB);
+    bool OptimizeBlock(BasicBlock &BB);
+    bool OptimizeInst(Instruction *I);
+    bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+    bool OptimizeInlineAsmInst(CallInst *CS);
+    bool OptimizeCallInst(CallInst *CI);
+    bool MoveExtToFormExtLoad(Instruction *I);
+    bool OptimizeExtUses(Instruction *I);
+    bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+  };
+}
+
+char CodeGenPrepare::ID = 0;
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+                "Optimize for code generation", false, false)
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
+  return new CodeGenPrepare(TLI);
+}
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+  bool EverMadeChange = false;
+
+  ModifiedDT = false;
+  DT = getAnalysisIfAvailable<DominatorTree>();
+  PFI = getAnalysisIfAvailable<ProfileInfo>();
+
+  // First pass, eliminate blocks that contain only PHI nodes and an
+  // unconditional branch.
+  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+
+  bool MadeChange = true;
+  while (MadeChange) {
+    MadeChange = false;
+    for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+      BasicBlock *BB = I++;
+      MadeChange |= OptimizeBlock(*BB);
+    }
+    EverMadeChange |= MadeChange;
+  }
+
+  SunkAddrs.clear();
+
+  if (!DisableBranchOpts) {
+    MadeChange = false;
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      MadeChange |= ConstantFoldTerminator(BB, true);
+
+    if (MadeChange)
+      ModifiedDT = true;
+    EverMadeChange |= MadeChange;
+  }
+
+  if (ModifiedDT && DT)
+    DT->DT->recalculate(F);
+
+  return EverMadeChange;
+}
+
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch.  Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel.  Start by eliminating these blocks so we can split them the way we
+/// want them.
+bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+  bool MadeChange = false;
+  // Note that this intentionally skips the entry block.
+  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *BB = I++;
+
+    // If this block doesn't end with an uncond branch, ignore it.
+    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+    if (!BI || !BI->isUnconditional())
+      continue;
+
+    // If the instruction before the branch (skipping debug info) isn't a phi
+    // node, then other stuff is happening here.
+    BasicBlock::iterator BBI = BI;
+    if (BBI != BB->begin()) {
+      --BBI;
+      while (isa<DbgInfoIntrinsic>(BBI)) {
+        if (BBI == BB->begin())
+          break;
+        --BBI;
+      }
+      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+        continue;
+    }
+
+    // Do not break infinite loops.
+    BasicBlock *DestBB = BI->getSuccessor(0);
+    if (DestBB == BB)
+      continue;
+
+    if (!CanMergeBlocks(BB, DestBB))
+      continue;
+
+    EliminateMostlyEmptyBlock(BB);
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
+/// single uncond branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+                                    const BasicBlock *DestBB) const {
+  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+  // the successor.  If there are more complex condition (e.g. preheaders),
+  // don't mess around with them.
+  BasicBlock::const_iterator BBI = BB->begin();
+  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+    for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      const Instruction *User = cast<Instruction>(*UI);
+      if (User->getParent() != DestBB || !isa<PHINode>(User))
+        return false;
+      // If User is inside DestBB block and it is a PHINode then check
+      // incoming value. If incoming value is not from BB then this is
+      // a complex condition (e.g. preheaders) we want to avoid here.
+      if (User->getParent() == DestBB) {
+        if (const PHINode *UPN = dyn_cast<PHINode>(User))
+          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+            if (Insn && Insn->getParent() == BB &&
+                Insn->getParent() != UPN->getIncomingBlock(I))
+              return false;
+          }
+      }
+    }
+  }
+
+  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+  // and DestBB may have conflicting incoming values for the block.  If so, we
+  // can't merge the block.
+  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+  if (!DestBBPN) return true;  // no conflict.
+
+  // Collect the preds of BB.
+  SmallPtrSet<const BasicBlock*, 16> BBPreds;
+  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+    // It is faster to get preds from a PHI than with pred_iterator.
+    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+      BBPreds.insert(BBPN->getIncomingBlock(i));
+  } else {
+    BBPreds.insert(pred_begin(BB), pred_end(BB));
+  }
+
+  // Walk the preds of DestBB.
+  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+    if (BBPreds.count(Pred)) {   // Common predecessor?
+      BBI = DestBB->begin();
+      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+        const Value *V1 = PN->getIncomingValueForBlock(Pred);
+        const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+        // If V2 is a phi node in BB, look up what the mapped value will be.
+        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+          if (V2PN->getParent() == BB)
+            V2 = V2PN->getIncomingValueForBlock(Pred);
+
+        // If there is a conflict, bail out.
+        if (V1 != V2) return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+
+/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
+/// an unconditional branch in it.
+void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+  BasicBlock *DestBB = BI->getSuccessor(0);
+
+  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+
+  // If the destination block has a single pred, then this is a trivial edge,
+  // just collapse it.
+  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+    if (SinglePred != DestBB) {
+      // Remember if SinglePred was the entry block of the function.  If so, we
+      // will need to move BB back to the entry position.
+      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      MergeBasicBlockIntoOnlyPred(DestBB, this);
+
+      if (isEntry && BB != &BB->getParent()->getEntryBlock())
+        BB->moveBefore(&BB->getParent()->getEntryBlock());
+      
+      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+      return;
+    }
+  }
+
+  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
+  // to handle the new incoming edges it is about to have.
+  PHINode *PN;
+  for (BasicBlock::iterator BBI = DestBB->begin();
+       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+    // Remove the incoming value for BB, and remember it.
+    Value *InVal = PN->removeIncomingValue(BB, false);
+
+    // Two options: either the InVal is a phi node defined in BB or it is some
+    // value that dominates BB.
+    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+    if (InValPhi && InValPhi->getParent() == BB) {
+      // Add all of the input values of the input PHI as inputs of this phi.
+      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+        PN->addIncoming(InValPhi->getIncomingValue(i),
+                        InValPhi->getIncomingBlock(i));
+    } else {
+      // Otherwise, add one instance of the dominating value for each edge that
+      // we will be adding.
+      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+      } else {
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+          PN->addIncoming(InVal, *PI);
+      }
+    }
+  }
+
+  // The PHIs are now updated, change everything that refers to BB to use
+  // DestBB and remove BB.
+  BB->replaceAllUsesWith(DestBB);
+  if (DT && !ModifiedDT) {
+    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
+    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+    DT->changeImmediateDominator(DestBB, NewIDom);
+    DT->eraseNode(BB);
+  }
+  if (PFI) {
+    PFI->replaceAllUses(BB, DestBB);
+    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+  }
+  BB->eraseFromParent();
+  ++NumBlocksElim;
+
+  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+  // If this is a noop copy,
+  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(CI->getType());
+
+  // This is an fp<->int conversion?
+  if (SrcVT.isInteger() != DstVT.isInteger())
+    return false;
+
+  // If this is an extension, it will be a zero or sign extension, which
+  // isn't a noop.
+  if (SrcVT.bitsLT(DstVT)) return false;
+
+  // If these values will be promoted, find out what they will be promoted
+  // to.  This helps us consider truncates on PPC as noop copies when they
+  // are.
+  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+      TargetLowering::TypePromoteInteger)
+    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+      TargetLowering::TypePromoteInteger)
+    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+  // If, after promotion, these are the same types, this is a noop copy.
+  if (SrcVT != DstVT)
+    return false;
+
+  BasicBlock *DefBB = CI->getParent();
+
+  /// InsertedCasts - Only insert a cast in each block once.
+  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+  bool MadeChange = false;
+  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; ) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this cast is used in.  For PHI's this is the
+    // appropriate predecessor block.
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User)) {
+      UserBB = PN->getIncomingBlock(UI);
+    }
+
+    // Preincrement use iterator so we don't invalidate it.
+    ++UI;
+
+    // If this user is in the same block as the cast, don't change the cast.
+    if (UserBB == DefBB) continue;
+
+    // If we have already inserted a cast into this block, use it.
+    CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+    if (!InsertedCast) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+      InsertedCast =
+        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+                         InsertPt);
+      MadeChange = true;
+    }
+
+    // Replace a use of the cast with a use of the new cast.
+    TheUse = InsertedCast;
+    ++NumCastUses;
+  }
+
+  // If we removed all uses, nuke the cast.
+  if (CI->use_empty()) {
+    CI->eraseFromParent();
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced.  This is
+/// a clear win except on targets with multiple condition code registers
+///  (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  BasicBlock *DefBB = CI->getParent();
+
+  /// InsertedCmp - Only insert a cmp in each block once.
+  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+  bool MadeChange = false;
+  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; ) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Preincrement use iterator so we don't invalidate it.
+    ++UI;
+
+    // Don't bother for PHI nodes.
+    if (isa<PHINode>(User))
+      continue;
+
+    // Figure out which BB this cmp is used in.
+    BasicBlock *UserBB = User->getParent();
+
+    // If this user is in the same block as the cmp, don't change the cmp.
+    if (UserBB == DefBB) continue;
+
+    // If we have already inserted a cmp into this block, use it.
+    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+    if (!InsertedCmp) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+      InsertedCmp =
+        CmpInst::Create(CI->getOpcode(),
+                        CI->getPredicate(),  CI->getOperand(0),
+                        CI->getOperand(1), "", InsertPt);
+      MadeChange = true;
+    }
+
+    // Replace a use of the cmp with a use of the new cmp.
+    TheUse = InsertedCmp;
+    ++NumCmpUses;
+  }
+
+  // If we removed all uses, nuke the cmp.
+  if (CI->use_empty())
+    CI->eraseFromParent();
+
+  return MadeChange;
+}
+
+namespace {
+class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+protected:
+  void replaceCall(Value *With) {
+    CI->replaceAllUsesWith(With);
+    CI->eraseFromParent();
+  }
+  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
+      if (ConstantInt *SizeCI =
+                             dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+        return SizeCI->isAllOnesValue();
+    return false;
+  }
+};
+} // end anonymous namespace
+
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+  BasicBlock *BB = CI->getParent();
+  
+  // Lower inline assembly if we can.
+  // If we found an inline asm expession, and if the target knows how to
+  // lower it to normal LLVM code, do so now.
+  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+    if (TLI->ExpandInlineAsm(CI)) {
+      // Avoid invalidating the iterator.
+      CurInstIterator = BB->begin();
+      // Avoid processing instructions out of order, which could cause
+      // reuse before a value is defined.
+      SunkAddrs.clear();
+      return true;
+    }
+    // Sink address computing for memory operands into the block.
+    if (OptimizeInlineAsmInst(CI))
+      return true;
+  }
+  
+  // Lower all uses of llvm.objectsize.*
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
+    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+    const Type *ReturnTy = CI->getType();
+    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);    
+    
+    // Substituting this can cause recursive simplifications, which can
+    // invalidate our iterator.  Use a WeakVH to hold onto it in case this
+    // happens.
+    WeakVH IterHandle(CurInstIterator);
+    
+    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+                              ModifiedDT ? 0 : DT);
+
+    // If the iterator instruction was recursively deleted, start over at the
+    // start of the block.
+    if (IterHandle != CurInstIterator) {
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    }
+    return true;
+  }
+
+  // From here on out we're working with named functions.
+  if (CI->getCalledFunction() == 0) return false;
+
+  // llvm.dbg.value is far away from the value then iSel may not be able
+  // handle it properly. iSel will drop llvm.dbg.value if it can not 
+  // find a node corresponding to the value.
+  if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI))
+    if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()))
+      if (!VI->isTerminator() &&
+          (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) {
+        DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+        DVI->removeFromParent();
+        if (isa<PHINode>(VI))
+          DVI->insertBefore(VI->getParent()->getFirstNonPHI());
+        else
+          DVI->insertAfter(VI);
+        return true;
+      }
+
+  // We'll need TargetData from here on out.
+  const TargetData *TD = TLI ? TLI->getTargetData() : 0;
+  if (!TD) return false;
+  
+  // Lower all default uses of _chk calls.  This is very similar
+  // to what InstCombineCalls does, but here we are only lowering calls
+  // that have the default "don't know" as the objectsize.  Anything else
+  // should be left alone.
+  CodeGenPrepareFortifiedLibCalls Simplifier;
+  return Simplifier.fold(CI, TD);
+}
+
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   br label %return
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   br label %return
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   br label %return
+/// return:
+///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+///   ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   ret i32 %tmp0
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   ret i32 %tmp1
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+  if (!TLI)
+    return false;
+
+  Value *V = RI->getReturnValue();
+  PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
+  if (V && !PN)
+    return false;
+
+  BasicBlock *BB = RI->getParent();
+  if (PN && PN->getParent() != BB)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  // See llvm::isInTailCallPosition().
+  const Function *F = BB->getParent();
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Make sure there are no instructions between the PHI and return, or that the
+  // return is the first instruction in the block.
+  if (PN) {
+    BasicBlock::iterator BI = BB->begin();
+    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+    if (&*BI != RI)
+      return false;
+  } else {
+    BasicBlock::iterator BI = BB->begin();
+    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+    if (&*BI != RI)
+      return false;
+  }
+
+  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+  /// call.
+  SmallVector<CallInst*, 4> TailCalls;
+  if (PN) {
+    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+      // Make sure the phi value is indeed produced by the tail call.
+      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+          TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  } else {
+    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+      if (!VisitedBBs.insert(*PI))
+        continue;
+
+      BasicBlock::InstListType &InstList = (*PI)->getInstList();
+      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+      if (RI == RE)
+        continue;
+
+      CallInst *CI = dyn_cast<CallInst>(&*RI);
+      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+    CallInst *CI = TailCalls[i];
+    CallSite CS(CI);
+
+    // Conservatively require the attributes of the call to match those of the
+    // return. Ignore noalias because it doesn't affect the call sequence.
+    unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+    if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+      continue;
+
+    // Make sure the call instruction is followed by an unconditional branch to
+    // the return block.
+    BasicBlock *CallBB = CI->getParent();
+    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+      continue;
+
+    // Duplicate the return into CallBB.
+    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+    ModifiedDT = Changed = true;
+    ++NumRetsDup;
+  }
+
+  // If we eliminated all predecessors of the block, delete the block now.
+  if (Changed && pred_begin(BB) == pred_end(BB))
+    BB->eraseFromParent();
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+/// IsNonLocalValue - Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() != BB;
+  return false;
+}
+
+/// OptimizeMemoryInst - Load and Store Instructions often have
+/// addressing modes that can do significant amounts of computation.  As such,
+/// instruction selection will try to get the load or store to do as much
+/// computation as possible for the program.  The problem is that isel can only
+/// see within a single block.  As such, we sink as much legal addressing mode
+/// stuff into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+                                        const Type *AccessTy) {
+  Value *Repl = Addr;
+  
+  // Try to collapse single-value PHI nodes.  This is necessary to undo 
+  // unprofitable PRE transformations.
+  SmallVector<Value*, 8> worklist;
+  SmallPtrSet<Value*, 16> Visited;
+  worklist.push_back(Addr);
+  
+  // Use a worklist to iteratively look through PHI nodes, and ensure that
+  // the addressing mode obtained from the non-PHI roots of the graph
+  // are equivalent.
+  Value *Consensus = 0;
+  unsigned NumUsesConsensus = 0;
+  bool IsNumUsesConsensusValid = false;
+  SmallVector<Instruction*, 16> AddrModeInsts;
+  ExtAddrMode AddrMode;
+  while (!worklist.empty()) {
+    Value *V = worklist.back();
+    worklist.pop_back();
+    
+    // Break use-def graph loops.
+    if (Visited.count(V)) {
+      Consensus = 0;
+      break;
+    }
+    
+    Visited.insert(V);
+    
+    // For a PHI node, push all of its incoming values.
+    if (PHINode *P = dyn_cast<PHINode>(V)) {
+      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+        worklist.push_back(P->getIncomingValue(i));
+      continue;
+    }
+    
+    // For non-PHIs, determine the addressing mode being computed.
+    SmallVector<Instruction*, 16> NewAddrModeInsts;
+    ExtAddrMode NewAddrMode =
+      AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+                                   NewAddrModeInsts, *TLI);
+
+    // This check is broken into two cases with very similar code to avoid using
+    // getNumUses() as much as possible. Some values have a lot of uses, so
+    // calling getNumUses() unconditionally caused a significant compile-time
+    // regression.
+    if (!Consensus) {
+      Consensus = V;
+      AddrMode = NewAddrMode;
+      AddrModeInsts = NewAddrModeInsts;
+      continue;
+    } else if (NewAddrMode == AddrMode) {
+      if (!IsNumUsesConsensusValid) {
+        NumUsesConsensus = Consensus->getNumUses();
+        IsNumUsesConsensusValid = true;
+      }
+
+      // Ensure that the obtained addressing mode is equivalent to that obtained
+      // for all other roots of the PHI traversal.  Also, when choosing one
+      // such root as representative, select the one with the most uses in order
+      // to keep the cost modeling heuristics in AddressingModeMatcher
+      // applicable.
+      unsigned NumUses = V->getNumUses();
+      if (NumUses > NumUsesConsensus) {
+        Consensus = V;
+        NumUsesConsensus = NumUses;
+        AddrModeInsts = NewAddrModeInsts;
+      }
+      continue;
+    }
+    
+    Consensus = 0;
+    break;
+  }
+  
+  // If the addressing mode couldn't be determined, or if multiple different
+  // ones were determined, bail out now.
+  if (!Consensus) return false;
+  
+  // Check to see if any of the instructions supersumed by this addr mode are
+  // non-local to I's BB.
+  bool AnyNonLocal = false;
+  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+      AnyNonLocal = true;
+      break;
+    }
+  }
+
+  // If all the instructions matched are already in this BB, don't do anything.
+  if (!AnyNonLocal) {
+    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
+    return false;
+  }
+
+  // Insert this computation right after this user.  Since our caller is
+  // scanning from the top of the BB to the bottom, reuse of the expr are
+  // guaranteed to happen later.
+  BasicBlock::iterator InsertPt = MemoryInst;
+
+  // Now that we determined the addressing expression we want to use and know
+  // that we have to sink it into this block.  Check to see if we have already
+  // done this for some other load/store instr in this block.  If so, reuse the
+  // computation.
+  Value *&SunkAddr = SunkAddrs[Addr];
+  if (SunkAddr) {
+    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    if (SunkAddr->getType() != Addr->getType())
+      SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+  } else {
+    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    const Type *IntPtrTy =
+          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
+
+    Value *Result = 0;
+
+    // Start with the base register. Do this first so that subsequent address
+    // matching finds it last, which will prevent it from trying to match it
+    // as the scaled value in case it happens to be a mul. That would be
+    // problematic if we've sunk a different mul for the scale, because then
+    // we'd end up sinking both muls.
+    if (AddrMode.BaseReg) {
+      Value *V = AddrMode.BaseReg;
+      if (V->getType()->isPointerTy())
+        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      if (V->getType() != IntPtrTy)
+        V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
+                                        "sunkaddr", InsertPt);
+      Result = V;
+    }
+
+    // Add the scale value.
+    if (AddrMode.Scale) {
+      Value *V = AddrMode.ScaledReg;
+      if (V->getType() == IntPtrTy) {
+        // done.
+      } else if (V->getType()->isPointerTy()) {
+        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+                 cast<IntegerType>(V->getType())->getBitWidth()) {
+        V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      } else {
+        V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      }
+      if (AddrMode.Scale != 1)
+        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
+                                                                AddrMode.Scale),
+                                      "sunkaddr", InsertPt);
+      if (Result)
+        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+      else
+        Result = V;
+    }
+
+    // Add in the BaseGV if present.
+    if (AddrMode.BaseGV) {
+      Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
+                                  InsertPt);
+      if (Result)
+        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+      else
+        Result = V;
+    }
+
+    // Add in the Base Offset if present.
+    if (AddrMode.BaseOffs) {
+      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+      if (Result)
+        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+      else
+        Result = V;
+    }
+
+    if (Result == 0)
+      SunkAddr = Constant::getNullValue(Addr->getType());
+    else
+      SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+  }
+
+  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+
+  // If we have no uses, recursively delete the value and all dead instructions
+  // using it.
+  if (Repl->use_empty()) {
+    // This can cause recursive deletion, which can invalidate our iterator.
+    // Use a WeakVH to hold onto it in case this happens.
+    WeakVH IterHandle(CurInstIterator);
+    BasicBlock *BB = CurInstIterator->getParent();
+    
+    RecursivelyDeleteTriviallyDeadInstructions(Repl);
+
+    if (IterHandle != CurInstIterator) {
+      // If the iterator instruction was recursively deleted, start over at the
+      // start of the block.
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    } else {
+      // This address is now available for reassignment, so erase the table
+      // entry; we don't want to match some completely different instruction.
+      SunkAddrs[Addr] = 0;
+    }    
+  }
+  ++NumMemoryInsts;
+  return true;
+}
+
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeMemoryInst to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+  bool MadeChange = false;
+
+  TargetLowering::AsmOperandInfoVector 
+    TargetConstraints = TLI->ParseConstraints(CS);
+  unsigned ArgNo = 0;
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
+    // Compute the constraint code and ConstraintType to use.
+    TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        OpInfo.isIndirect) {
+      Value *OpVal = CS->getArgOperand(ArgNo++);
+      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+    } else if (OpInfo.Type == InlineAsm::isInput)
+      ArgNo++;
+  }
+
+  return MadeChange;
+}
+
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+  // Look for a load being extended.
+  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+  if (!LI) return false;
+
+  // If they're already in the same block, there's nothing to do.
+  if (LI->getParent() == I->getParent())
+    return false;
+
+  // If the load has other users and the truncate is not free, this probably
+  // isn't worthwhile.
+  if (!LI->hasOneUse() &&
+      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+      !TLI->isTruncateFree(I->getType(), LI->getType()))
+    return false;
+
+  // Check whether the target supports casts folded into loads.
+  unsigned LType;
+  if (isa<ZExtInst>(I))
+    LType = ISD::ZEXTLOAD;
+  else {
+    assert(isa<SExtInst>(I) && "Unexpected ext type!");
+    LType = ISD::SEXTLOAD;
+  }
+  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+    return false;
+
+  // Move the extend into the same block as the load, so that SelectionDAG
+  // can fold it.
+  I->removeFromParent();
+  I->insertAfter(LI);
+  ++NumExtsMoved;
+  return true;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+  BasicBlock *DefBB = I->getParent();
+
+  // If the result of a {s|z}ext and its source are both live out, rewrite all
+  // other uses of the source with result of extension.
+  Value *Src = I->getOperand(0);
+  if (Src->hasOneUse())
+    return false;
+
+  // Only do this xform if truncating is free.
+  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+    return false;
+
+  // Only safe to perform the optimization if the source is also defined in
+  // this block.
+  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+    return false;
+
+  bool DefIsLiveOut = false;
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+    DefIsLiveOut = true;
+    break;
+  }
+  if (!DefIsLiveOut)
+    return false;
+
+  // Make sure non of the uses are PHI nodes.
+  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+    // Be conservative. We don't want this xform to end up introducing
+    // reloads just before load / store instructions.
+    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
+      return false;
+  }
+
+  // InsertedTruncs - Only insert one trunc in each block once.
+  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+  bool MadeChange = false;
+  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+       UI != E; ++UI) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+
+    // Both src and def are live in this block. Rewrite the use.
+    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+    if (!InsertedTrunc) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+    }
+
+    // Replace a use of the {s|z}ext source with a use of the result.
+    TheUse = InsertedTrunc;
+    ++NumExtUses;
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+  if (PHINode *P = dyn_cast<PHINode>(I)) {
+    // It is possible for very late stage optimizations (such as SimplifyCFG)
+    // to introduce PHI nodes too late to be cleaned up.  If we detect such a
+    // trivial PHI, go ahead and zap it here.
+    if (Value *V = SimplifyInstruction(P)) {
+      P->replaceAllUsesWith(V);
+      P->eraseFromParent();
+      ++NumPHIsElim;
+      return true;
+    }
+    return false;
+  }
+  
+  if (CastInst *CI = dyn_cast<CastInst>(I)) {
+    // If the source of the cast is a constant, then this should have
+    // already been constant folded.  The only reason NOT to constant fold
+    // it is if something (e.g. LSR) was careful to place the constant
+    // evaluation in a block other than then one that uses it (e.g. to hoist
+    // the address of globals out of a loop).  If this is the case, we don't
+    // want to forward-subst the cast.
+    if (isa<Constant>(CI->getOperand(0)))
+      return false;
+
+    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+      return true;
+
+    if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+      bool MadeChange = MoveExtToFormExtLoad(I);
+      return MadeChange | OptimizeExtUses(I);
+    }
+    return false;
+  }
+  
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    return OptimizeCmpExpression(CI);
+  
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+    return false;
+  }
+  
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, SI->getOperand(1),
+                                SI->getOperand(0)->getType());
+    return false;
+  }
+  
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    if (GEPI->hasAllZeroIndices()) {
+      /// The GEP operand must be a pointer, so must its result -> BitCast
+      Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+                                        GEPI->getName(), GEPI);
+      GEPI->replaceAllUsesWith(NC);
+      GEPI->eraseFromParent();
+      ++NumGEPsElim;
+      OptimizeInst(NC);
+      return true;
+    }
+    return false;
+  }
+  
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return OptimizeCallInst(CI);
+
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+    return DupRetToEnableTailCallOpts(RI);
+
+  return false;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+  SunkAddrs.clear();
+  bool MadeChange = false;
+
+  CurInstIterator = BB.begin();
+  for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+    MadeChange |= OptimizeInst(CurInstIterator++);
+
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
new file mode 100644
index 000000000000..664c3f6a222f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -0,0 +1,91 @@
+//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements constant propagation and merging:
+//
+// Specifically, this:
+//   * Converts instructions like "add int 1, 2" into 3
+//
+// Notice that:
+//   * This pass has a habit of making definitions be dead.  It is a good idea
+//     to run a DIE pass sometime after running this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constprop"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constant.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInstKilled, "Number of instructions killed");
+
+namespace {
+  struct ConstantPropagation : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ConstantPropagation() : FunctionPass(ID) {
+      initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+}
+
+char ConstantPropagation::ID = 0;
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+                "Simple constant propagation", false, false)
+
+FunctionPass *llvm::createConstantPropagationPass() {
+  return new ConstantPropagation();
+}
+
+
+bool ConstantPropagation::runOnFunction(Function &F) {
+  // Initialize the worklist to all of the instructions ready to process...
+  std::set<Instruction*> WorkList;
+  for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+      WorkList.insert(&*i);
+  }
+  bool Changed = false;
+
+  while (!WorkList.empty()) {
+    Instruction *I = *WorkList.begin();
+    WorkList.erase(WorkList.begin());    // Get an element from the worklist...
+
+    if (!I->use_empty())                 // Don't muck with dead instructions...
+      if (Constant *C = ConstantFoldInstruction(I)) {
+        // Add all of the users of this instruction to the worklist, they might
+        // be constant propagatable now...
+        for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+             UI != UE; ++UI)
+          WorkList.insert(cast<Instruction>(*UI));
+
+        // Replace all of the uses of a variable with uses of the constant.
+        I->replaceAllUsesWith(C);
+
+        // Remove the dead instruction.
+        WorkList.erase(I);
+        I->eraseFromParent();
+
+        // We made a change to the function...
+        Changed = true;
+        ++NumInstKilled;
+      }
+  }
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 000000000000..e275268fc4ea
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,207 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis,      "Number of phis propagated");
+STATISTIC(NumSelects,   "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps,      "Number of comparisons propagated");
+
+namespace {
+  class CorrelatedValuePropagation : public FunctionPass {
+    LazyValueInfo *LVI;
+
+    bool processSelect(SelectInst *SI);
+    bool processPHI(PHINode *P);
+    bool processMemAccess(Instruction *I);
+    bool processCmp(CmpInst *C);
+
+  public:
+    static char ID;
+    CorrelatedValuePropagation(): FunctionPass(ID) {
+     initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LazyValueInfo>();
+    }
+  };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
+                "Value Propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
+                "Value Propagation", false, false)
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+  return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+  if (S->getType()->isVectorTy()) return false;
+  if (isa<Constant>(S->getOperand(0))) return false;
+
+  Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+  if (!C) return false;
+
+  ConstantInt *CI = dyn_cast<ConstantInt>(C);
+  if (!CI) return false;
+
+  Value *ReplaceWith = S->getOperand(1);
+  Value *Other = S->getOperand(2);
+  if (!CI->isOne()) std::swap(ReplaceWith, Other);
+  if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+  S->replaceAllUsesWith(ReplaceWith);
+  S->eraseFromParent();
+
+  ++NumSelects;
+
+  return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+  bool Changed = false;
+
+  BasicBlock *BB = P->getParent();
+  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+    Value *Incoming = P->getIncomingValue(i);
+    if (isa<Constant>(Incoming)) continue;
+
+    Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+                                         P->getIncomingBlock(i),
+                                         BB);
+    if (!C) continue;
+
+    P->setIncomingValue(i, C);
+    Changed = true;
+  }
+
+  if (Value *V = SimplifyInstruction(P)) {
+    P->replaceAllUsesWith(V);
+    P->eraseFromParent();
+    Changed = true;
+  }
+
+  ++NumPhis;
+
+  return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+  Value *Pointer = 0;
+  if (LoadInst *L = dyn_cast<LoadInst>(I))
+    Pointer = L->getPointerOperand();
+  else
+    Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+  if (isa<Constant>(Pointer)) return false;
+
+  Constant *C = LVI->getConstant(Pointer, I->getParent());
+  if (!C) return false;
+
+  ++NumMemAccess;
+  I->replaceUsesOfWith(Pointer, C);
+  return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out.  Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge.  If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+  Value *Op0 = C->getOperand(0);
+  if (isa<Instruction>(Op0) &&
+      cast<Instruction>(Op0)->getParent() == C->getParent())
+    return false;
+
+  Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+  if (!Op1) return false;
+
+  pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+  if (PI == PE) return false;
+
+  LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+                                    C->getOperand(0), Op1, *PI, C->getParent());
+  if (Result == LazyValueInfo::Unknown) return false;
+
+  ++PI;
+  while (PI != PE) {
+    LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+                                    C->getOperand(0), Op1, *PI, C->getParent());
+    if (Res != Result) return false;
+    ++PI;
+  }
+
+  ++NumCmps;
+
+  if (Result == LazyValueInfo::True)
+    C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+  else
+    C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+  C->eraseFromParent();
+
+  return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+  LVI = &getAnalysis<LazyValueInfo>();
+
+  bool FnChanged = false;
+
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    bool BBChanged = false;
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+      Instruction *II = BI++;
+      switch (II->getOpcode()) {
+      case Instruction::Select:
+        BBChanged |= processSelect(cast<SelectInst>(II));
+        break;
+      case Instruction::PHI:
+        BBChanged |= processPHI(cast<PHINode>(II));
+        break;
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        BBChanged |= processCmp(cast<CmpInst>(II));
+        break;
+      case Instruction::Load:
+      case Instruction::Store:
+        BBChanged |= processMemAccess(II);
+        break;
+      }
+    }
+
+    FnChanged |= BBChanged;
+  }
+
+  return FnChanged;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
new file mode 100644
index 000000000000..8dbcc23d7ec8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -0,0 +1,135 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead inst elimination and dead code elimination.
+//
+// Dead Inst Elimination performs a single pass over the function removing
+// instructions that are obviously dead.  Dead Code Elimination is similar, but
+// it rechecks instructions that were used by removed instructions to see if
+// they are newly dead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
+STATISTIC(DCEEliminated, "Number of insts removed");
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  // DeadInstElimination pass implementation
+  //
+  struct DeadInstElimination : public BasicBlockPass {
+    static char ID; // Pass identification, replacement for typeid
+    DeadInstElimination() : BasicBlockPass(ID) {
+      initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnBasicBlock(BasicBlock &BB) {
+      bool Changed = false;
+      for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
+        Instruction *Inst = DI++;
+        if (isInstructionTriviallyDead(Inst)) {
+          Inst->eraseFromParent();
+          Changed = true;
+          ++DIEEliminated;
+        }
+      }
+      return Changed;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+}
+
+char DeadInstElimination::ID = 0;
+INITIALIZE_PASS(DeadInstElimination, "die",
+                "Dead Instruction Elimination", false, false)
+
+Pass *llvm::createDeadInstEliminationPass() {
+  return new DeadInstElimination();
+}
+
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  // DeadCodeElimination pass implementation
+  //
+  struct DCE : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    DCE() : FunctionPass(ID) {
+      initializeDCEPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+ };
+}
+
+char DCE::ID = 0;
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
+
+bool DCE::runOnFunction(Function &F) {
+  // Start out with all of the instructions in the worklist...
+  std::vector<Instruction*> WorkList;
+  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
+    WorkList.push_back(&*i);
+
+  // Loop over the worklist finding instructions that are dead.  If they are
+  // dead make them drop all of their uses, making other instructions
+  // potentially dead, and work until the worklist is empty.
+  //
+  bool MadeChange = false;
+  while (!WorkList.empty()) {
+    Instruction *I = WorkList.back();
+    WorkList.pop_back();
+
+    if (isInstructionTriviallyDead(I)) {       // If the instruction is dead.
+      // Loop over all of the values that the instruction uses, if there are
+      // instructions being used, add them to the worklist, because they might
+      // go dead after this one is removed.
+      //
+      for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+        if (Instruction *Used = dyn_cast<Instruction>(*OI))
+          WorkList.push_back(Used);
+
+      // Remove the instruction.
+      I->eraseFromParent();
+
+      // Remove the instruction from the worklist if it still exists in it.
+      for (std::vector<Instruction*>::iterator WI = WorkList.begin();
+           WI != WorkList.end(); ) {
+        if (*WI == I)
+          WI = WorkList.erase(WI);
+        else
+          ++WI;
+      }
+
+      MadeChange = true;
+      ++DCEEliminated;
+    }
+  }
+  return MadeChange;
+}
+
+FunctionPass *llvm::createDeadCodeEliminationPass() {
+  return new DCE();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
new file mode 100644
index 000000000000..cb9b5bebc5c7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -0,0 +1,727 @@
+//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a trivial dead store elimination that only considers
+// basic-block local redundant stores.
+//
+// FIXME: This should eventually be extended to be a post-dominator tree
+// traversal.  Doing so would be pretty trivial.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumFastStores, "Number of stores deleted");
+STATISTIC(NumFastOther , "Number of other instrs removed");
+
+namespace {
+  struct DSE : public FunctionPass {
+    AliasAnalysis *AA;
+    MemoryDependenceAnalysis *MD;
+
+    static char ID; // Pass identification, replacement for typeid
+    DSE() : FunctionPass(ID), AA(0), MD(0) {
+      initializeDSEPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      AA = &getAnalysis<AliasAnalysis>();
+      MD = &getAnalysis<MemoryDependenceAnalysis>();
+      DominatorTree &DT = getAnalysis<DominatorTree>();
+      
+      bool Changed = false;
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+        // Only check non-dead blocks.  Dead blocks may have strange pointer
+        // cycles that will confuse alias analysis.
+        if (DT.isReachableFromEntry(I))
+          Changed |= runOnBasicBlock(*I);
+      
+      AA = 0; MD = 0;
+      return Changed;
+    }
+    
+    bool runOnBasicBlock(BasicBlock &BB);
+    bool HandleFree(CallInst *F);
+    bool handleEndBlock(BasicBlock &BB);
+    void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+                               SmallPtrSet<Value*, 16> &DeadStackObjects);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<MemoryDependenceAnalysis>();
+    }
+  };
+}
+
+char DSE::ID = 0;
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
+
+FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction.  Before we do, go through
+/// and zero out all the operands of this instruction.  If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+                                  MemoryDependenceAnalysis &MD,
+                                  SmallPtrSet<Value*, 16> *ValueSet = 0) {
+  SmallVector<Instruction*, 32> NowDeadInsts;
+  
+  NowDeadInsts.push_back(I);
+  --NumFastOther;
+  
+  // Before we touch this instruction, remove it from memdep!
+  do {
+    Instruction *DeadInst = NowDeadInsts.pop_back_val();
+    ++NumFastOther;
+    
+    // This instruction is dead, zap it, in stages.  Start by removing it from
+    // MemDep, which needs to know the operands and needs it to be in the
+    // function.
+    MD.removeInstruction(DeadInst);
+    
+    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+      Value *Op = DeadInst->getOperand(op);
+      DeadInst->setOperand(op, 0);
+      
+      // If this operand just became dead, add it to the NowDeadInsts list.
+      if (!Op->use_empty()) continue;
+      
+      if (Instruction *OpI = dyn_cast<Instruction>(Op))
+        if (isInstructionTriviallyDead(OpI))
+          NowDeadInsts.push_back(OpI);
+    }
+    
+    DeadInst->eraseFromParent();
+    
+    if (ValueSet) ValueSet->erase(DeadInst);
+  } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory?  This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I) {
+  if (isa<StoreInst>(I))
+    return true;
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::memset:
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+    case Intrinsic::init_trampoline:
+    case Intrinsic::lifetime_end:
+      return true;
+    }
+  }
+  return false;
+}
+
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    return AA.getLocation(SI);
+  
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+    // memcpy/memmove/memset.
+    AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+    // If we don't have target data around, an unknown size in Location means
+    // that we should use the size of the pointee type.  This isn't valid for
+    // memset/memcpy, which writes more than an i8.
+    if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+      return AliasAnalysis::Location();
+    return Loc;
+  }
+  
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+  if (II == 0) return AliasAnalysis::Location();
+  
+  switch (II->getIntrinsicID()) {
+  default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+  case Intrinsic::init_trampoline:
+    // If we don't have target data around, an unknown size in Location means
+    // that we should use the size of the pointee type.  This isn't valid for
+    // init.trampoline, which writes more than an i8.
+    if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+      
+    // FIXME: We don't know the size of the trampoline, so we can't really
+    // handle it here.
+    return AliasAnalysis::Location(II->getArgOperand(0));
+  case Intrinsic::lifetime_end: {
+    uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+    return AliasAnalysis::Location(II->getArgOperand(1), Len);
+  }
+  }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location 
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+  assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+  
+  // The only instructions that both read and write are the mem transfer
+  // instructions (memcpy/memmove).
+  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+    return AA.getLocationForSource(MTI);
+  return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+  // Don't remove volatile stores.
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return !SI->isVolatile();
+  
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
+  default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+  case Intrinsic::lifetime_end:
+    // Never remove dead lifetime_end's, e.g. because it is followed by a
+    // free.
+    return false;
+  case Intrinsic::init_trampoline:
+    // Always safe to remove init_trampoline.
+    return true;
+    
+  case Intrinsic::memset:
+  case Intrinsic::memmove:
+  case Intrinsic::memcpy:
+    // Don't remove volatile memory intrinsics.
+    return !cast<MemIntrinsic>(II)->isVolatile();
+  }
+}
+
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+    return MI->getDest();
+
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
+  default: assert(false && "Unexpected intrinsic!");
+  case Intrinsic::init_trampoline:
+    return II->getArgOperand(0);
+  }
+}
+
+static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+  const TargetData *TD = AA.getTargetData();
+  if (TD == 0)
+    return AliasAnalysis::UnknownSize;
+  
+  if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+    // Get size information for the alloca
+    if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+      return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+    return AliasAnalysis::UnknownSize;
+  }
+  
+  assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+  const PointerType *PT = cast<PointerType>(V->getType());
+  return TD->getTypeAllocSize(PT->getElementType());
+}
+
+/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
+/// pointing to an object with a pointer size we can trust.
+static bool isObjectPointerWithTrustworthySize(const Value *V) {
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    return !AI->isArrayAllocation();
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return !GV->mayBeOverridden();
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+  return false;
+}
+
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+                                const AliasAnalysis::Location &Earlier,
+                                AliasAnalysis &AA) {
+  const Value *P1 = Earlier.Ptr->stripPointerCasts();
+  const Value *P2 = Later.Ptr->stripPointerCasts();
+  
+  // If the start pointers are the same, we just have to compare sizes to see if
+  // the later store was larger than the earlier store.
+  if (P1 == P2) {
+    // If we don't know the sizes of either access, then we can't do a
+    // comparison.
+    if (Later.Size == AliasAnalysis::UnknownSize ||
+        Earlier.Size == AliasAnalysis::UnknownSize) {
+      // If we have no TargetData information around, then the size of the store
+      // is inferrable from the pointee type.  If they are the same type, then
+      // we know that the store is safe.
+      if (AA.getTargetData() == 0)
+        return Later.Ptr->getType() == Earlier.Ptr->getType();
+      return false;
+    }
+    
+    // Make sure that the Later size is >= the Earlier size.
+    if (Later.Size < Earlier.Size)
+      return false;
+    return true;
+  }
+  
+  // Otherwise, we have to have size information, and the later store has to be
+  // larger than the earlier one.
+  if (Later.Size == AliasAnalysis::UnknownSize ||
+      Earlier.Size == AliasAnalysis::UnknownSize ||
+      Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+    return false;
+  
+  // Check to see if the later store is to the entire object (either a global,
+  // an alloca, or a byval argument).  If so, then it clearly overwrites any
+  // other store to the same object.
+  const TargetData &TD = *AA.getTargetData();
+  
+  const Value *UO1 = GetUnderlyingObject(P1, &TD),
+              *UO2 = GetUnderlyingObject(P2, &TD);
+  
+  // If we can't resolve the same pointers to the same object, then we can't
+  // analyze them at all.
+  if (UO1 != UO2)
+    return false;
+  
+  // If the "Later" store is to a recognizable object, get its size.
+  if (isObjectPointerWithTrustworthySize(UO2)) {
+    uint64_t ObjectSize =
+      TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
+    if (ObjectSize == Later.Size)
+      return true;
+  }
+  
+  // Okay, we have stores to two completely different pointers.  Try to
+  // decompose the pointer into a "base + constant_offset" form.  If the base
+  // pointers are equal, then we can reason about the two stores.
+  int64_t EarlierOff = 0, LaterOff = 0;
+  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
+  
+  // If the base pointers still differ, we have two completely different stores.
+  if (BP1 != BP2)
+    return false;
+
+  // The later store completely overlaps the earlier store if:
+  // 
+  // 1. Both start at the same offset and the later one's size is greater than
+  //    or equal to the earlier one's, or
+  //
+  //      |--earlier--|
+  //      |--   later   --|
+  //      
+  // 2. The earlier store has an offset greater than the later offset, but which
+  //    still lies completely within the later store.
+  //
+  //        |--earlier--|
+  //    |-----  later  ------|
+  //
+  // We have to be careful here as *Off is signed while *.Size is unsigned.
+  if (EarlierOff >= LaterOff &&
+      uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+    return true;
+
+  // Otherwise, they don't completely overlap.
+  return false;
+}
+
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense.  Consider this case: 
+///
+///   memcpy(A <- B)
+///   memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
+///
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+                               const AliasAnalysis::Location &InstStoreLoc,
+                               Instruction *DepWrite, AliasAnalysis &AA) {
+  // Self reads can only happen for instructions that read memory.  Get the
+  // location read.
+  AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+  if (InstReadLoc.Ptr == 0) return false;  // Not a reading instruction.
+  
+  // If the read and written loc obviously don't alias, it isn't a read.
+  if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
+  
+  // Okay, 'Inst' may copy over itself.  However, we can still remove a the
+  // DepWrite instruction if we can prove that it reads from the same location
+  // as Inst.  This handles useful cases like:
+  //   memcpy(A <- B)
+  //   memcpy(A <- B)
+  // Here we don't know if A/B may alias, but we do know that B/B are must
+  // aliases, so removing the first memcpy is safe (assuming it writes <= #
+  // bytes as the second one.
+  AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
+  
+  if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+    return false;
+  
+  // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+  // then it can't be considered dead.
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
+  bool MadeChange = false;
+  
+  // Do a top-down walk on the BB.
+  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
+    Instruction *Inst = BBI++;
+    
+    // Handle 'free' calls specially.
+    if (CallInst *F = isFreeCall(Inst)) {
+      MadeChange |= HandleFree(F);
+      continue;
+    }
+    
+    // If we find something that writes memory, get its memory dependence.
+    if (!hasMemoryWrite(Inst))
+      continue;
+
+    MemDepResult InstDep = MD->getDependency(Inst);
+    
+    // Ignore any store where we can't find a local dependence.
+    // FIXME: cross-block DSE would be fun. :)
+    if (InstDep.isNonLocal() || InstDep.isUnknown())
+      continue;
+     
+    // If we're storing the same value back to a pointer that we just
+    // loaded from, then the store can be removed.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+            SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
+                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
+          
+          // DeleteDeadInstruction can delete the current instruction.  Save BBI
+          // in case we need it.
+          WeakVH NextInst(BBI);
+          
+          DeleteDeadInstruction(SI, *MD);
+          
+          if (NextInst == 0)  // Next instruction deleted.
+            BBI = BB.begin();
+          else if (BBI != BB.begin())  // Revisit this instruction if possible.
+            --BBI;
+          ++NumFastStores;
+          MadeChange = true;
+          continue;
+        }
+      }
+    }
+    
+    // Figure out what location is being stored to.
+    AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+    // If we didn't get a useful location, fail.
+    if (Loc.Ptr == 0)
+      continue;
+    
+    while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
+      // Get the memory clobbered by the instruction we depend on.  MemDep will
+      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
+      // end up depending on a may- or must-aliased load, then we can't optimize
+      // away the store and we bail out.  However, if we depend on on something
+      // that overwrites the memory location we *can* potentially optimize it.
+      //
+      // Find out what memory location the dependent instruction stores.
+      Instruction *DepWrite = InstDep.getInst();
+      AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+      // If we didn't get a useful location, or if it isn't a size, bail out.
+      if (DepLoc.Ptr == 0)
+        break;
+
+      // If we find a write that is a) removable (i.e., non-volatile), b) is
+      // completely obliterated by the store to 'Loc', and c) which we know that
+      // 'Inst' doesn't load from, then we can remove it.
+      if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+          !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+        DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
+              << *DepWrite << "\n  KILLER: " << *Inst << '\n');
+        
+        // Delete the store and now-dead instructions that feed it.
+        DeleteDeadInstruction(DepWrite, *MD);
+        ++NumFastStores;
+        MadeChange = true;
+        
+        // DeleteDeadInstruction can delete the current instruction in loop
+        // cases, reset BBI.
+        BBI = Inst;
+        if (BBI != BB.begin())
+          --BBI;
+        break;
+      }
+      
+      // If this is a may-aliased store that is clobbering the store value, we
+      // can keep searching past it for another must-aliased pointer that stores
+      // to the same location.  For example, in:
+      //   store -> P
+      //   store -> Q
+      //   store -> P
+      // we can remove the first store to P even though we don't know if P and Q
+      // alias.
+      if (DepWrite == &BB.front()) break;
+      
+      // Can't look past this instruction if it might read 'Loc'.
+      if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+        break;
+        
+      InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+    }
+  }
+  
+  // If this block ends in a return, unwind, or unreachable, all allocas are
+  // dead at its end, which means stores to them are also dead.
+  if (BB.getTerminator()->getNumSuccessors() == 0)
+    MadeChange |= handleEndBlock(BB);
+  
+  return MadeChange;
+}
+
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+  bool MadeChange = false;
+
+  MemDepResult Dep = MD->getDependency(F);
+
+  while (!Dep.isNonLocal() && !Dep.isUnknown()) {
+    Instruction *Dependency = Dep.getInst();
+    if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+      return MadeChange;
+  
+    Value *DepPointer =
+      GetUnderlyingObject(getStoredPointerOperand(Dependency));
+
+    // Check for aliasing.
+    if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+      return MadeChange;
+  
+    // DCE instructions only used to calculate that store
+    DeleteDeadInstruction(Dependency, *MD);
+    ++NumFastStores;
+    MadeChange = true;
+
+    // Inst's old Dependency is now deleted. Compute the next dependency,
+    // which may also be dead, as in
+    //    s[0] = 0;
+    //    s[1] = 0; // This has just been deleted.
+    //    free(s);
+    Dep = MD->getDependency(F);
+  };
+  
+  return MadeChange;
+}
+
+/// handleEndBlock - Remove dead stores to stack-allocated locations in the
+/// function end block.  Ex:
+/// %A = alloca i32
+/// ...
+/// store i32 1, i32* %A
+/// ret void
+bool DSE::handleEndBlock(BasicBlock &BB) {
+  bool MadeChange = false;
+  
+  // Keep track of all of the stack objects that are dead at the end of the
+  // function.
+  SmallPtrSet<Value*, 16> DeadStackObjects;
+  
+  // Find all of the alloca'd pointers in the entry block.
+  BasicBlock *Entry = BB.getParent()->begin();
+  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      DeadStackObjects.insert(AI);
+  
+  // Treat byval arguments the same, stores to them are dead at the end of the
+  // function.
+  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
+       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
+    if (AI->hasByValAttr())
+      DeadStackObjects.insert(AI);
+  
+  // Scan the basic block backwards
+  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
+    --BBI;
+    
+    // If we find a store, check to see if it points into a dead stack value.
+    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+      // See through pointer-to-pointer bitcasts
+      Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+
+      // Stores to stack values are valid candidates for removal.
+      if (DeadStackObjects.count(Pointer)) {
+        Instruction *Dead = BBI++;
+        
+        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
+                     << *Dead << "\n  Object: " << *Pointer << '\n');
+        
+        // DCE instructions only used to calculate that store.
+        DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+        ++NumFastStores;
+        MadeChange = true;
+        continue;
+      }
+    }
+    
+    // Remove any dead non-memory-mutating instructions.
+    if (isInstructionTriviallyDead(BBI)) {
+      Instruction *Inst = BBI++;
+      DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+      ++NumFastOther;
+      MadeChange = true;
+      continue;
+    }
+    
+    if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+      DeadStackObjects.erase(A);
+      continue;
+    }
+    
+    if (CallSite CS = cast<Value>(BBI)) {
+      // If this call does not access memory, it can't be loading any of our
+      // pointers.
+      if (AA->doesNotAccessMemory(CS))
+        continue;
+      
+      // If the call might load from any of our allocas, then any store above
+      // the call is live.
+      SmallVector<Value*, 8> LiveAllocas;
+      for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+           E = DeadStackObjects.end(); I != E; ++I) {
+        // See if the call site touches it.
+        AliasAnalysis::ModRefResult A = 
+          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
+        
+        if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
+          LiveAllocas.push_back(*I);
+      }
+      
+      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+           E = LiveAllocas.end(); I != E; ++I)
+        DeadStackObjects.erase(*I);
+      
+      // If all of the allocas were clobbered by the call then we're not going
+      // to find anything else to process.
+      if (DeadStackObjects.empty())
+        return MadeChange;
+      
+      continue;
+    }
+    
+    AliasAnalysis::Location LoadedLoc;
+    
+    // If we encounter a use of the pointer, it is no longer considered dead
+    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+      LoadedLoc = AA->getLocation(L);
+    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+      LoadedLoc = AA->getLocation(V);
+    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+      LoadedLoc = AA->getLocationForSource(MTI);
+    } else {
+      // Not a loading instruction.
+      continue;
+    }
+
+    // Remove any allocas from the DeadPointer set that are loaded, as this
+    // makes any stores above the access live.
+    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
+
+    // If all of the allocas were clobbered by the access then we're not going
+    // to find anything else to process.
+    if (DeadStackObjects.empty())
+      break;
+  }
+  
+  return MadeChange;
+}
+
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set.  If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+                                SmallPtrSet<Value*, 16> &DeadStackObjects) {
+  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+  // A constant can't be in the dead pointer set.
+  if (isa<Constant>(UnderlyingPointer))
+    return;
+  
+  // If the kill pointer can be easily reduced to an alloca, don't bother doing
+  // extraneous AA queries.
+  if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+    DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+    return;
+  }
+  
+  SmallVector<Value*, 16> NowLive;
+  for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+       E = DeadStackObjects.end(); I != E; ++I) {
+    // See if the loaded location could alias the stack location.
+    AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
+    if (!AA->isNoAlias(StackLoc, LoadedLoc))
+      NowLive.push_back(*I);
+  }
+
+  for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
+       I != E; ++I)
+    DeadStackObjects.erase(*I);
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 000000000000..3d3f17b26fc6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE,      "Number of instructions CSE'd");
+STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
+STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
+STATISTIC(NumDSE,      "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+  return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// SimpleValue - Instances of this struct represent available values in the
+  /// scoped hash table.
+  struct SimpleValue {
+    Instruction *Inst;
+    
+    SimpleValue(Instruction *I) : Inst(I) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+    }
+    
+    bool isSentinel() const {
+      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    
+    static bool canHandle(Instruction *Inst) {
+      // This can only handle non-void readnone functions.
+      if (CallInst *CI = dyn_cast<CallInst>(Inst))
+        return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+      return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+             isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+             isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+             isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+             isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+    }
+  };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+  static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+  static inline SimpleValue getEmptyKey() {
+    return DenseMapInfo<Instruction*>::getEmptyKey();
+  }
+  static inline SimpleValue getTombstoneKey() {
+    return DenseMapInfo<Instruction*>::getTombstoneKey();
+  }
+  static unsigned getHashValue(SimpleValue Val);
+  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+  Instruction *Inst = Val.Inst;
+  
+  // Hash in all of the operands as pointers.
+  unsigned Res = 0;
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+    Res ^= getHash(Inst->getOperand(i)) << i;
+
+  if (CastInst *CI = dyn_cast<CastInst>(Inst))
+    Res ^= getHash(CI->getType());
+  else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
+    Res ^= CI->getPredicate();
+  else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
+    for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
+         E = EVI->idx_end(); I != E; ++I)
+      Res ^= *I;
+  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
+    for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
+         E = IVI->idx_end(); I != E; ++I)
+      Res ^= *I;
+  } else {
+    // nothing extra to hash in.
+    assert((isa<CallInst>(Inst) ||
+            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
+           "Invalid/unknown instruction");
+  }
+
+  // Mix in the opcode.
+  return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+  
+  if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+  return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// CallValue - Instances of this struct represent available call values in
+  /// the scoped hash table.
+  struct CallValue {
+    Instruction *Inst;
+    
+    CallValue(Instruction *I) : Inst(I) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+    }
+    
+    bool isSentinel() const {
+      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    
+    static bool canHandle(Instruction *Inst) {
+      // Don't value number anything that returns void.
+      if (Inst->getType()->isVoidTy())
+        return false;
+      
+      CallInst *CI = dyn_cast<CallInst>(Inst);
+      if (CI == 0 || !CI->onlyReadsMemory())
+        return false;
+      return true;
+    }
+  };
+}
+
+namespace llvm {
+  // CallValue is POD.
+  template<> struct isPodLike<CallValue> {
+    static const bool value = true;
+  };
+  
+  template<> struct DenseMapInfo<CallValue> {
+    static inline CallValue getEmptyKey() {
+      return DenseMapInfo<Instruction*>::getEmptyKey();
+    }
+    static inline CallValue getTombstoneKey() {
+      return DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    static unsigned getHashValue(CallValue Val);
+    static bool isEqual(CallValue LHS, CallValue RHS);
+  };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+  Instruction *Inst = Val.Inst;
+  // Hash in all of the operands as pointers.
+  unsigned Res = 0;
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+    assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+           "Cannot value number calls with metadata operands");
+    Res ^= getHash(Inst->getOperand(i)) << i;
+  }
+  
+  // Mix in the opcode.
+  return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+  return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass. 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes.  It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective.  It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+  const TargetData *TD;
+  DominatorTree *DT;
+  typedef RecyclingAllocator<BumpPtrAllocator,
+                      ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+  typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+                          AllocatorTy> ScopedHTType;
+  
+  /// AvailableValues - This scoped hash table contains the current values of
+  /// all of our simple scalar expressions.  As we walk down the domtree, we
+  /// look to see if instructions are in this: if so, we replace them with what
+  /// we find, otherwise we insert them so that dominated values can succeed in
+  /// their lookup.
+  ScopedHTType *AvailableValues;
+  
+  /// AvailableLoads - This scoped hash table contains the current values
+  /// of loads.  This allows us to get efficient access to dominating loads when
+  /// we have a fully redundant load.  In addition to the most recent load, we
+  /// keep track of a generation count of the read, which is compared against
+  /// the current generation count.  The current generation count is
+  /// incremented after every possibly writing memory operation, which ensures
+  /// that we only CSE loads with other loads that have no intervening store.
+  typedef RecyclingAllocator<BumpPtrAllocator,
+    ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+  typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+                          DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+  LoadHTType *AvailableLoads;
+  
+  /// AvailableCalls - This scoped hash table contains the current values
+  /// of read-only call values.  It uses the same generation count as loads.
+  typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+  CallHTType *AvailableCalls;
+  
+  /// CurrentGeneration - This is the current generation of the memory value.
+  unsigned CurrentGeneration;
+  
+  static char ID;
+  explicit EarlyCSE() : FunctionPass(ID) {
+    initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F);
+
+private:
+  
+  bool processNode(DomTreeNode *Node);
+  
+  // This transformation requires dominator postdominator info
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<DominatorTree>();
+    AU.setPreservesCFG();
+  }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+  return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+  // Define a scope in the scoped hash table.  When we are done processing this
+  // domtree node and recurse back up to our parent domtree node, this will pop
+  // off all the values we install.
+  ScopedHTType::ScopeTy Scope(*AvailableValues);
+  
+  // Define a scope for the load values so that anything we add will get
+  // popped when we recurse back up to our parent domtree node.
+  LoadHTType::ScopeTy LoadScope(*AvailableLoads);
+  
+  // Define a scope for the call values so that anything we add will get
+  // popped when we recurse back up to our parent domtree node.
+  CallHTType::ScopeTy CallScope(*AvailableCalls);
+  
+  BasicBlock *BB = Node->getBlock();
+  
+  // If this block has a single predecessor, then the predecessor is the parent
+  // of the domtree node and all of the live out memory values are still current
+  // in this block.  If this block has multiple predecessors, then they could
+  // have invalidated the live-out memory values of our parent value.  For now,
+  // just be conservative and invalidate memory if this block has multiple
+  // predecessors.
+  if (BB->getSinglePredecessor() == 0)
+    ++CurrentGeneration;
+  
+  /// LastStore - Keep track of the last non-volatile store that we saw... for
+  /// as long as there in no instruction that reads memory.  If we see a store
+  /// to the same location, we delete the dead store.  This zaps trivial dead
+  /// stores which can occur in bitfield code among other things.
+  StoreInst *LastStore = 0;
+  
+  bool Changed = false;
+
+  // See if any instructions in the block can be eliminated.  If so, do it.  If
+  // not, add them to AvailableValues.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    
+    // Dead instructions should just be removed.
+    if (isInstructionTriviallyDead(Inst)) {
+      DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+      Inst->eraseFromParent();
+      Changed = true;
+      ++NumSimplify;
+      continue;
+    }
+    
+    // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+    // its simpler value.
+    if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+      DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
+      Inst->replaceAllUsesWith(V);
+      Inst->eraseFromParent();
+      Changed = true;
+      ++NumSimplify;
+      continue;
+    }
+    
+    // If this is a simple instruction that we can value number, process it.
+    if (SimpleValue::canHandle(Inst)) {
+      // See if the instruction has an available value.  If so, use it.
+      if (Value *V = AvailableValues->lookup(Inst)) {
+        DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
+        Inst->replaceAllUsesWith(V);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSE;
+        continue;
+      }
+      
+      // Otherwise, just remember that this value is available.
+      AvailableValues->insert(Inst, Inst);
+      continue;
+    }
+    
+    // If this is a non-volatile load, process it.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      // Ignore volatile loads.
+      if (LI->isVolatile()) {
+        LastStore = 0;
+        continue;
+      }
+      
+      // If we have an available version of this load, and if it is the right
+      // generation, replace this instruction.
+      std::pair<Value*, unsigned> InVal =
+        AvailableLoads->lookup(Inst->getOperand(0));
+      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+        DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << "  to: "
+              << *InVal.first << '\n');
+        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSELoad;
+        continue;
+      }
+      
+      // Otherwise, remember that we have this instruction.
+      AvailableLoads->insert(Inst->getOperand(0),
+                          std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      LastStore = 0;
+      continue;
+    }
+    
+    // If this instruction may read from memory, forget LastStore.
+    if (Inst->mayReadFromMemory())
+      LastStore = 0;
+    
+    // If this is a read-only call, process it.
+    if (CallValue::canHandle(Inst)) {
+      // If we have an available version of this call, and if it is the right
+      // generation, replace this instruction.
+      std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+        DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << "  to: "
+                     << *InVal.first << '\n');
+        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSECall;
+        continue;
+      }
+      
+      // Otherwise, remember that we have this instruction.
+      AvailableCalls->insert(Inst,
+                         std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      continue;
+    }
+    
+    // Okay, this isn't something we can CSE at all.  Check to see if it is
+    // something that could modify memory.  If so, our available memory values
+    // cannot be used so bump the generation count.
+    if (Inst->mayWriteToMemory()) {
+      ++CurrentGeneration;
+     
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        // We do a trivial form of DSE if there are two stores to the same
+        // location with no intervening loads.  Delete the earlier store.
+        if (LastStore &&
+            LastStore->getPointerOperand() == SI->getPointerOperand()) {
+          DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << "  due to: "
+                       << *Inst << '\n');
+          LastStore->eraseFromParent();
+          Changed = true;
+          ++NumDSE;
+          LastStore = 0;
+          continue;
+        }
+        
+        // Okay, we just invalidated anything we knew about loaded values.  Try
+        // to salvage *something* by remembering that the stored value is a live
+        // version of the pointer.  It is safe to forward from volatile stores
+        // to non-volatile loads, so we don't have to check for volatility of
+        // the store.
+        AvailableLoads->insert(SI->getPointerOperand(),
+         std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+        
+        // Remember that this was the last store we saw for DSE.
+        if (!SI->isVolatile())
+          LastStore = SI;
+      }
+    }
+  }
+  
+  unsigned LiveOutGeneration = CurrentGeneration;
+  for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
+    Changed |= processNode(*I);
+    // Pop any generation changes off the stack from the recursive walk.
+    CurrentGeneration = LiveOutGeneration;
+  }
+  return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+  TD = getAnalysisIfAvailable<TargetData>();
+  DT = &getAnalysis<DominatorTree>();
+  
+  // Tables that the pass uses when walking the domtree.
+  ScopedHTType AVTable;
+  AvailableValues = &AVTable;
+  LoadHTType LoadTable;
+  AvailableLoads = &LoadTable;
+  CallHTType CallTable;
+  AvailableCalls = &CallTable;
+  
+  CurrentGeneration = 0;
+  return processNode(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
new file mode 100644
index 000000000000..87b7317ad2dd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -0,0 +1,2287 @@
+//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global value numbering to eliminate fully redundant
+// instructions.  It also performs simple dead load elimination.
+//
+// Note that this pass does the value numbering itself; it does not use the
+// ValueNumbering analysis passes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gvn"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+STATISTIC(NumGVNInstr,  "Number of instructions deleted");
+STATISTIC(NumGVNLoad,   "Number of loads deleted");
+STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
+STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumPRELoad,   "Number of loads PRE'd");
+
+static cl::opt<bool> EnablePRE("enable-pre",
+                               cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+//                         ValueTable Class
+//===----------------------------------------------------------------------===//
+
+/// This class holds the mapping between values and value numbers.  It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+namespace {
+  struct Expression {
+    uint32_t opcode;
+    const Type *type;
+    SmallVector<uint32_t, 4> varargs;
+
+    Expression(uint32_t o = ~2U) : opcode(o) { }
+
+    bool operator==(const Expression &other) const {
+      if (opcode != other.opcode)
+        return false;
+      if (opcode == ~0U || opcode == ~1U)
+        return true;
+      if (type != other.type)
+        return false;
+      if (varargs != other.varargs)
+        return false;
+      return true;
+    }
+  };
+
+  class ValueTable {
+    DenseMap<Value*, uint32_t> valueNumbering;
+    DenseMap<Expression, uint32_t> expressionNumbering;
+    AliasAnalysis *AA;
+    MemoryDependenceAnalysis *MD;
+    DominatorTree *DT;
+
+    uint32_t nextValueNumber;
+
+    Expression create_expression(Instruction* I);
+    Expression create_extractvalue_expression(ExtractValueInst* EI);
+    uint32_t lookup_or_add_call(CallInst* C);
+  public:
+    ValueTable() : nextValueNumber(1) { }
+    uint32_t lookup_or_add(Value *V);
+    uint32_t lookup(Value *V) const;
+    void add(Value *V, uint32_t num);
+    void clear();
+    void erase(Value *v);
+    void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+    AliasAnalysis *getAliasAnalysis() const { return AA; }
+    void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+    void setDomTree(DominatorTree* D) { DT = D; }
+    uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+    void verifyRemoved(const Value *) const;
+  };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+  static inline Expression getEmptyKey() {
+    return ~0U;
+  }
+
+  static inline Expression getTombstoneKey() {
+    return ~1U;
+  }
+
+  static unsigned getHashValue(const Expression e) {
+    unsigned hash = e.opcode;
+
+    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+            (unsigned)((uintptr_t)e.type >> 9));
+
+    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+         E = e.varargs.end(); I != E; ++I)
+      hash = *I + hash * 37;
+    
+    return hash;
+  }
+  static bool isEqual(const Expression &LHS, const Expression &RHS) {
+    return LHS == RHS;
+  }
+};
+
+}
+
+//===----------------------------------------------------------------------===//
+//                     ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+
+Expression ValueTable::create_expression(Instruction *I) {
+  Expression e;
+  e.type = I->getType();
+  e.opcode = I->getOpcode();
+  for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+       OI != OE; ++OI)
+    e.varargs.push_back(lookup_or_add(*OI));
+  
+  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+    e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+  } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+    for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+         II != IE; ++II)
+      e.varargs.push_back(*II);
+  }
+  
+  return e;
+}
+
+Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+  assert(EI != 0 && "Not an ExtractValueInst?");
+  Expression e;
+  e.type = EI->getType();
+  e.opcode = 0;
+
+  IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+  if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+    // EI might be an extract from one of our recognised intrinsics. If it
+    // is we'll synthesize a semantically equivalent expression instead on
+    // an extract value expression.
+    switch (I->getIntrinsicID()) {
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::uadd_with_overflow:
+        e.opcode = Instruction::Add;
+        break;
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::usub_with_overflow:
+        e.opcode = Instruction::Sub;
+        break;
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::umul_with_overflow:
+        e.opcode = Instruction::Mul;
+        break;
+      default:
+        break;
+    }
+
+    if (e.opcode != 0) {
+      // Intrinsic recognized. Grab its args to finish building the expression.
+      assert(I->getNumArgOperands() == 2 &&
+             "Expect two args for recognised intrinsics.");
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+      return e;
+    }
+  }
+
+  // Not a recognised intrinsic. Fall back to producing an extract value
+  // expression.
+  e.opcode = EI->getOpcode();
+  for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
+       OI != OE; ++OI)
+    e.varargs.push_back(lookup_or_add(*OI));
+
+  for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
+         II != IE; ++II)
+    e.varargs.push_back(*II);
+
+  return e;
+}
+
+//===----------------------------------------------------------------------===//
+//                     ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value *V, uint32_t num) {
+  valueNumbering.insert(std::make_pair(V, num));
+}
+
+uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+  if (AA->doesNotAccessMemory(C)) {
+    Expression exp = create_expression(C);
+    uint32_t& e = expressionNumbering[exp];
+    if (!e) e = nextValueNumber++;
+    valueNumbering[C] = e;
+    return e;
+  } else if (AA->onlyReadsMemory(C)) {
+    Expression exp = create_expression(C);
+    uint32_t& e = expressionNumbering[exp];
+    if (!e) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
+    }
+    if (!MD) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
+    }
+
+    MemDepResult local_dep = MD->getDependency(C);
+
+    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+      valueNumbering[C] =  nextValueNumber;
+      return nextValueNumber++;
+    }
+
+    if (local_dep.isDef()) {
+      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
+
+      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+        valueNumbering[C] = nextValueNumber;
+        return nextValueNumber++;
+      }
+
+      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+        uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+        uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
+        if (c_vn != cd_vn) {
+          valueNumbering[C] = nextValueNumber;
+          return nextValueNumber++;
+        }
+      }
+
+      uint32_t v = lookup_or_add(local_cdep);
+      valueNumbering[C] = v;
+      return v;
+    }
+
+    // Non-local case.
+    const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+      MD->getNonLocalCallDependency(CallSite(C));
+    // FIXME: Move the checking logic to MemDep!
+    CallInst* cdep = 0;
+
+    // Check to see if we have a single dominating call instruction that is
+    // identical to C.
+    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+      const NonLocalDepEntry *I = &deps[i];
+      if (I->getResult().isNonLocal())
+        continue;
+
+      // We don't handle non-definitions.  If we already have a call, reject
+      // instruction dependencies.
+      if (!I->getResult().isDef() || cdep != 0) {
+        cdep = 0;
+        break;
+      }
+
+      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
+      // FIXME: All duplicated with non-local case.
+      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
+        cdep = NonLocalDepCall;
+        continue;
+      }
+
+      cdep = 0;
+      break;
+    }
+
+    if (!cdep) {
+      valueNumbering[C] = nextValueNumber;
+      return nextValueNumber++;
+    }
+
+    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+      valueNumbering[C] = nextValueNumber;
+      return nextValueNumber++;
+    }
+    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+      uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+      uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
+      if (c_vn != cd_vn) {
+        valueNumbering[C] = nextValueNumber;
+        return nextValueNumber++;
+      }
+    }
+
+    uint32_t v = lookup_or_add(cdep);
+    valueNumbering[C] = v;
+    return v;
+
+  } else {
+    valueNumbering[C] = nextValueNumber;
+    return nextValueNumber++;
+  }
+}
+
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value *V) {
+  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+  if (VI != valueNumbering.end())
+    return VI->second;
+
+  if (!isa<Instruction>(V)) {
+    valueNumbering[V] = nextValueNumber;
+    return nextValueNumber++;
+  }
+  
+  Instruction* I = cast<Instruction>(V);
+  Expression exp;
+  switch (I->getOpcode()) {
+    case Instruction::Call:
+      return lookup_or_add_call(cast<CallInst>(I));
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or :
+    case Instruction::Xor:
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+    case Instruction::Select:
+    case Instruction::ExtractElement:
+    case Instruction::InsertElement:
+    case Instruction::ShuffleVector:
+    case Instruction::InsertValue:
+    case Instruction::GetElementPtr:
+      exp = create_expression(I);
+      break;
+    case Instruction::ExtractValue:
+      exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+      break;
+    default:
+      valueNumbering[V] = nextValueNumber;
+      return nextValueNumber++;
+  }
+
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
+  valueNumbering[V] = e;
+  return e;
+}
+
+/// lookup - Returns the value number of the specified value. Fails if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value *V) const {
+  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
+  assert(VI != valueNumbering.end() && "Value not numbered?");
+  return VI->second;
+}
+
+/// clear - Remove all entries from the ValueTable.
+void ValueTable::clear() {
+  valueNumbering.clear();
+  expressionNumbering.clear();
+  nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering.
+void ValueTable::erase(Value *V) {
+  valueNumbering.erase(V);
+}
+
+/// verifyRemoved - Verify that the value is removed from all internal data
+/// structures.
+void ValueTable::verifyRemoved(const Value *V) const {
+  for (DenseMap<Value*, uint32_t>::const_iterator
+         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
+    assert(I->first != V && "Inst still occurs in value numbering map!");
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                                GVN Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+  class GVN : public FunctionPass {
+    bool NoLoads;
+    MemoryDependenceAnalysis *MD;
+    DominatorTree *DT;
+    const TargetData *TD;
+    
+    ValueTable VN;
+    
+    /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+    /// have that value number.  Use findLeader to query it.
+    struct LeaderTableEntry {
+      Value *Val;
+      BasicBlock *BB;
+      LeaderTableEntry *Next;
+    };
+    DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+    BumpPtrAllocator TableAllocator;
+    
+    SmallVector<Instruction*, 8> InstrsToErase;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit GVN(bool noloads = false)
+        : FunctionPass(ID), NoLoads(noloads), MD(0) {
+      initializeGVNPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+    
+    /// markInstructionForDeletion - This removes the specified instruction from
+    /// our various maps and marks it for deletion.
+    void markInstructionForDeletion(Instruction *I) {
+      VN.erase(I);
+      InstrsToErase.push_back(I);
+    }
+    
+    const TargetData *getTargetData() const { return TD; }
+    DominatorTree &getDominatorTree() const { return *DT; }
+    AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
+    MemoryDependenceAnalysis &getMemDep() const { return *MD; }
+  private:
+    /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+    /// its value number.
+    void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+      LeaderTableEntry &Curr = LeaderTable[N];
+      if (!Curr.Val) {
+        Curr.Val = V;
+        Curr.BB = BB;
+        return;
+      }
+      
+      LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
+      Node->Val = V;
+      Node->BB = BB;
+      Node->Next = Curr.Next;
+      Curr.Next = Node;
+    }
+    
+    /// removeFromLeaderTable - Scan the list of values corresponding to a given
+    /// value number, and remove the given value if encountered.
+    void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+      LeaderTableEntry* Prev = 0;
+      LeaderTableEntry* Curr = &LeaderTable[N];
+
+      while (Curr->Val != V || Curr->BB != BB) {
+        Prev = Curr;
+        Curr = Curr->Next;
+      }
+      
+      if (Prev) {
+        Prev->Next = Curr->Next;
+      } else {
+        if (!Curr->Next) {
+          Curr->Val = 0;
+          Curr->BB = 0;
+        } else {
+          LeaderTableEntry* Next = Curr->Next;
+          Curr->Val = Next->Val;
+          Curr->BB = Next->BB;
+          Curr->Next = Next->Next;
+        }
+      }
+    }
+
+    // List of critical edges to be split between iterations.
+    SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
+    // This transformation requires dominator postdominator info
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      if (!NoLoads)
+        AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addRequired<AliasAnalysis>();
+
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<AliasAnalysis>();
+    }
+    
+
+    // Helper fuctions
+    // FIXME: eliminate or document these better
+    bool processLoad(LoadInst *L);
+    bool processInstruction(Instruction *I);
+    bool processNonLocalLoad(LoadInst *L);
+    bool processBlock(BasicBlock *BB);
+    void dump(DenseMap<uint32_t, Value*> &d);
+    bool iterateOnFunction(Function &F);
+    bool performPRE(Function &F);
+    Value *findLeader(BasicBlock *BB, uint32_t num);
+    void cleanupGlobalSets();
+    void verifyRemoved(const Instruction *I) const;
+    bool splitCriticalEdges();
+  };
+
+  char GVN::ID = 0;
+}
+
+// createGVNPass - The public interface to this file...
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+  return new GVN(NoLoads);
+}
+
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
+
+void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+  errs() << "{\n";
+  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
+       E = d.end(); I != E; ++I) {
+      errs() << I->first << "\n";
+      I->second->dump();
+  }
+  errs() << "}\n";
+}
+
+/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// we're analyzing is fully available in the specified block.  As we go, keep
+/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
+/// map is actually a tri-state map with the following values:
+///   0) we know the block *is not* fully available.
+///   1) we know the block *is* fully available.
+///   2) we do not know whether the block is fully available or not, but we are
+///      currently speculating that it will be.
+///   3) we are speculating for this block and have used that to speculate for
+///      other blocks.
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
+                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
+  // Optimistically assume that the block is fully available and check to see
+  // if we already know about this block in one lookup.
+  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
+
+  // If the entry already existed for this block, return the precomputed value.
+  if (!IV.second) {
+    // If this is a speculative "available" value, mark it as being used for
+    // speculation of other blocks.
+    if (IV.first->second == 2)
+      IV.first->second = 3;
+    return IV.first->second != 0;
+  }
+
+  // Otherwise, see if it is fully available in all predecessors.
+  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+  // If this block has no predecessors, it isn't live-in here.
+  if (PI == PE)
+    goto SpeculationFailure;
+
+  for (; PI != PE; ++PI)
+    // If the value isn't fully available in one of our predecessors, then it
+    // isn't fully available in this block either.  Undo our previous
+    // optimistic assumption and bail out.
+    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+      goto SpeculationFailure;
+
+  return true;
+
+// SpeculationFailure - If we get here, we found out that this is not, after
+// all, a fully-available block.  We have a problem if we speculated on this and
+// used the speculation to mark other blocks as available.
+SpeculationFailure:
+  char &BBVal = FullyAvailableBlocks[BB];
+
+  // If we didn't speculate on this, just return with it set to false.
+  if (BBVal == 2) {
+    BBVal = 0;
+    return false;
+  }
+
+  // If we did speculate on this value, we could have blocks set to 1 that are
+  // incorrect.  Walk the (transitive) successors of this block and mark them as
+  // 0 if set to one.
+  SmallVector<BasicBlock*, 32> BBWorklist;
+  BBWorklist.push_back(BB);
+
+  do {
+    BasicBlock *Entry = BBWorklist.pop_back_val();
+    // Note that this sets blocks to 0 (unavailable) if they happen to not
+    // already be in FullyAvailableBlocks.  This is safe.
+    char &EntryVal = FullyAvailableBlocks[Entry];
+    if (EntryVal == 0) continue;  // Already unavailable.
+
+    // Mark as unavailable.
+    EntryVal = 0;
+
+    for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
+      BBWorklist.push_back(*I);
+  } while (!BBWorklist.empty());
+
+  return false;
+}
+
+
+/// CanCoerceMustAliasedValueToLoad - Return true if
+/// CoerceAvailableValueToLoadType will succeed.
+static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
+                                            const Type *LoadTy,
+                                            const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+      StoredVal->getType()->isStructTy() ||
+      StoredVal->getType()->isArrayTy())
+    return false;
+  
+  // The store has to be at least as big as the load.
+  if (TD.getTypeSizeInBits(StoredVal->getType()) <
+        TD.getTypeSizeInBits(LoadTy))
+    return false;
+  
+  return true;
+}
+  
+
+/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value.  LoadedTy is the type of the load we want to replace and
+/// InsertPt is the place to insert new instructions.
+///
+/// If we can't do it, return null.
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 
+                                             const Type *LoadedTy,
+                                             Instruction *InsertPt,
+                                             const TargetData &TD) {
+  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+    return 0;
+  
+  // If this is already the right type, just return it.
+  const Type *StoredValTy = StoredVal->getType();
+  
+  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
+  uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
+  
+  // If the store and reload are the same size, we can always reuse it.
+  if (StoreSize == LoadSize) {
+    // Pointer to Pointer -> use bitcast.
+    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
+      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+    
+    // Convert source pointers to integers, which can be bitcast.
+    if (StoredValTy->isPointerTy()) {
+      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+    }
+    
+    const Type *TypeToCastTo = LoadedTy;
+    if (TypeToCastTo->isPointerTy())
+      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+    
+    if (StoredValTy != TypeToCastTo)
+      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+    
+    // Cast to pointer if the load needs a pointer type.
+    if (LoadedTy->isPointerTy())
+      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+    
+    return StoredVal;
+  }
+  
+  // If the loaded value is smaller than the available value, then we can
+  // extract out a piece from it.  If the available value is too small, then we
+  // can't do anything.
+  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+  
+  // Convert source pointers to integers, which can be manipulated.
+  if (StoredValTy->isPointerTy()) {
+    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // Convert vectors and fp to integer, which can be manipulated.
+  if (!StoredValTy->isIntegerTy()) {
+    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // If this is a big-endian system, we need to shift the value down to the low
+  // bits so that a truncate will work.
+  if (TD.isBigEndian()) {
+    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
+    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+  }
+  
+  // Truncate the integer to the right size now.
+  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+  
+  if (LoadedTy == NewIntTy)
+    return StoredVal;
+  
+  // If the result is a pointer, inttoptr.
+  if (LoadedTy->isPointerTy())
+    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+  
+  // Otherwise, bitcast.
+  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+}
+
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove).  This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up.  This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+                                          Value *WritePtr,
+                                          uint64_t WriteSizeInBits,
+                                          const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+    return -1;
+  
+  int64_t StoreOffset = 0, LoadOffset = 0;
+  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
+  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+  if (StoreBase != LoadBase)
+    return -1;
+  
+  // If the load and store are to the exact same address, they should have been
+  // a must alias.  AA must have gotten confused.
+  // FIXME: Study to see if/when this happens.  One case is forwarding a memset
+  // to a load from the base of the memset.
+#if 0
+  if (LoadOffset == StoreOffset) {
+    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
+  }
+#endif
+  
+  // If the load and store don't overlap at all, the store doesn't provide
+  // anything to the load.  In this case, they really don't alias at all, AA
+  // must have gotten confused.
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
+  
+  if ((WriteSizeInBits & 7) | (LoadSize & 7))
+    return -1;
+  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
+  LoadSize >>= 3;
+  
+  
+  bool isAAFailure = false;
+  if (StoreOffset < LoadOffset)
+    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+  else
+    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+
+  if (isAAFailure) {
+#if 0
+    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
+#endif
+    return -1;
+  }
+  
+  // If the Load isn't completely contained within the stored bits, we don't
+  // have all the bits to feed it.  We could do something crazy in the future
+  // (issue a smaller load then merge the bits in) but this seems unlikely to be
+  // valuable.
+  if (StoreOffset > LoadOffset ||
+      StoreOffset+StoreSize < LoadOffset+LoadSize)
+    return -1;
+  
+  // Okay, we can do this transformation.  Return the number of bytes into the
+  // store that the load is.
+  return LoadOffset-StoreOffset;
+}  
+
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+                                          StoreInst *DepSI,
+                                          const TargetData &TD) {
+  // Cannot handle reading from store of first-class aggregate yet.
+  if (DepSI->getValueOperand()->getType()->isStructTy() ||
+      DepSI->getValueOperand()->getType()->isArrayTy())
+    return -1;
+
+  Value *StorePtr = DepSI->getPointerOperand();
+  uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                        StorePtr, StoreSize, TD);
+}
+
+/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load.  See if
+/// the other load can feed into the second load.
+static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+                                         LoadInst *DepLI, const TargetData &TD){
+  // Cannot handle reading from store of first-class aggregate yet.
+  if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+    return -1;
+  
+  Value *DepPtr = DepLI->getPointerOperand();
+  uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
+  int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+  if (R != -1) return R;
+  
+  // If we have a load/load clobber an DepLI can be widened to cover this load,
+  // then we should widen it!
+  int64_t LoadOffs = 0;
+  const Value *LoadBase =
+    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+  if (Size == 0) return -1;
+  
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+}
+
+
+
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+                                            MemIntrinsic *MI,
+                                            const TargetData &TD) {
+  // If the mem operation is a non-constant size, we can't handle it.
+  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+  if (SizeCst == 0) return -1;
+  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+  // If this is memset, we just need to see if the offset is valid in the size
+  // of the memset..
+  if (MI->getIntrinsicID() == Intrinsic::memset)
+    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+                                          MemSizeInBits, TD);
+  
+  // If we have a memcpy/memmove, the only case we can handle is if this is a
+  // copy from constant memory.  In that case, we can read directly from the
+  // constant memory.
+  MemTransferInst *MTI = cast<MemTransferInst>(MI);
+  
+  Constant *Src = dyn_cast<Constant>(MTI->getSource());
+  if (Src == 0) return -1;
+  
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
+  if (GV == 0 || !GV->isConstant()) return -1;
+  
+  // See if the access is within the bounds of the transfer.
+  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                              MI->getDest(), MemSizeInBits, TD);
+  if (Offset == -1)
+    return Offset;
+  
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  if (ConstantFoldLoadFromConstPtr(Src, &TD))
+    return Offset;
+  return -1;
+}
+                                            
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.  This means
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias.  Check this case to see if there is anything more we can do
+/// before we give up.
+static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
+                                   const Type *LoadTy,
+                                   Instruction *InsertPt, const TargetData &TD){
+  LLVMContext &Ctx = SrcVal->getType()->getContext();
+  
+  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
+  
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  
+  // Compute which bits of the stored value are being used by the load.  Convert
+  // to an integer type to start with.
+  if (SrcVal->getType()->isPointerTy())
+    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
+  if (!SrcVal->getType()->isIntegerTy())
+    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+                                   "tmp");
+  
+  // Shift the bits to the least significant depending on endianness.
+  unsigned ShiftAmt;
+  if (TD.isLittleEndian())
+    ShiftAmt = Offset*8;
+  else
+    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
+  
+  if (ShiftAmt)
+    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
+  
+  if (LoadSize != StoreSize)
+    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+                                 "tmp");
+  
+  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+}
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load.  This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+                                  const Type *LoadTy, Instruction *InsertPt,
+                                  GVN &gvn) {
+  const TargetData &TD = *gvn.getTargetData();
+  // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+  // widen SrcVal out to a larger load.
+  unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  if (Offset+LoadSize > SrcValSize) {
+    assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+    assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+    // If we have a load/load clobber an DepLI can be widened to cover this
+    // load, then we should widen it to the next power of 2 size big enough!
+    unsigned NewLoadSize = Offset+LoadSize;
+    if (!isPowerOf2_32(NewLoadSize))
+      NewLoadSize = NextPowerOf2(NewLoadSize);
+
+    Value *PtrVal = SrcVal->getPointerOperand();
+    
+    // Insert the new load after the old load.  This ensures that subsequent
+    // memdep queries will find the new load.  We can't easily remove the old
+    // load completely because it is already in the value numbering table.
+    IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+    const Type *DestPTy = 
+      IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+    DestPTy = PointerType::get(DestPTy, 
+                       cast<PointerType>(PtrVal->getType())->getAddressSpace());
+    Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+    PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+    LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+    NewLoad->takeName(SrcVal);
+    NewLoad->setAlignment(SrcVal->getAlignment());
+
+    DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+    DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+    
+    // Replace uses of the original load with the wider load.  On a big endian
+    // system, we need to shift down to get the relevant bits.
+    Value *RV = NewLoad;
+    if (TD.isBigEndian())
+      RV = Builder.CreateLShr(RV,
+                    NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+    RV = Builder.CreateTrunc(RV, SrcVal->getType());
+    SrcVal->replaceAllUsesWith(RV);
+    
+    // We would like to use gvn.markInstructionForDeletion here, but we can't
+    // because the load is already memoized into the leader map table that GVN
+    // tracks.  It is potentially possible to remove the load from the table,
+    // but then there all of the operations based on it would need to be
+    // rehashed.  Just leave the dead load around.
+    gvn.getMemDep().removeInstruction(SrcVal);
+    SrcVal = NewLoad;
+  }
+  
+  return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+                                     const Type *LoadTy, Instruction *InsertPt,
+                                     const TargetData &TD){
+  LLVMContext &Ctx = LoadTy->getContext();
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  
+  // We know that this method is only called when the mem transfer fully
+  // provides the bits for the load.
+  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+    // independently of what the offset is.
+    Value *Val = MSI->getValue();
+    if (LoadSize != 1)
+      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+    
+    Value *OneElt = Val;
+    
+    // Splat the value out to the right number of bits.
+    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+      // If we can double the number of bytes set, do it.
+      if (NumBytesSet*2 <= LoadSize) {
+        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+        Val = Builder.CreateOr(Val, ShVal);
+        NumBytesSet <<= 1;
+        continue;
+      }
+      
+      // Otherwise insert one byte at a time.
+      Value *ShVal = Builder.CreateShl(Val, 1*8);
+      Val = Builder.CreateOr(OneElt, ShVal);
+      ++NumBytesSet;
+    }
+    
+    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+  }
+ 
+  // Otherwise, this is a memcpy/memmove from a constant global.
+  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+  Constant *Src = cast<Constant>(MTI->getSource());
+
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+namespace {
+
+struct AvailableValueInBlock {
+  /// BB - The basic block in question.
+  BasicBlock *BB;
+  enum ValType {
+    SimpleVal,  // A simple offsetted value that is accessed.
+    LoadVal,    // A value produced by a load.
+    MemIntrin   // A memory intrinsic which is loaded from.
+  };
+  
+  /// V - The value that is live out of the block.
+  PointerIntPair<Value *, 2, ValType> Val;
+  
+  /// Offset - The byte offset in Val that is interesting for the load query.
+  unsigned Offset;
+  
+  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+                                   unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(V);
+    Res.Val.setInt(SimpleVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
+  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+                                     unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(MI);
+    Res.Val.setInt(MemIntrin);
+    Res.Offset = Offset;
+    return Res;
+  }
+  
+  static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+                                       unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(LI);
+    Res.Val.setInt(LoadVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
+  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+  bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+  Value *getSimpleValue() const {
+    assert(isSimpleValue() && "Wrong accessor");
+    return Val.getPointer();
+  }
+  
+  LoadInst *getCoercedLoadValue() const {
+    assert(isCoercedLoadValue() && "Wrong accessor");
+    return cast<LoadInst>(Val.getPointer());
+  }
+  
+  MemIntrinsic *getMemIntrinValue() const {
+    assert(isMemIntrinValue() && "Wrong accessor");
+    return cast<MemIntrinsic>(Val.getPointer());
+  }
+  
+  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+  /// defined here to the specified type.  This handles various coercion cases.
+  Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
+    Value *Res;
+    if (isSimpleValue()) {
+      Res = getSimpleValue();
+      if (Res->getType() != LoadTy) {
+        const TargetData *TD = gvn.getTargetData();
+        assert(TD && "Need target data to handle type mismatch case");
+        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+                                   *TD);
+        
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+                     << *getSimpleValue() << '\n'
+                     << *Res << '\n' << "\n\n\n");
+      }
+    } else if (isCoercedLoadValue()) {
+      LoadInst *Load = getCoercedLoadValue();
+      if (Load->getType() == LoadTy && Offset == 0) {
+        Res = Load;
+      } else {
+        Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+                                  gvn);
+        
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+                     << *getCoercedLoadValue() << '\n'
+                     << *Res << '\n' << "\n\n\n");
+      }
+    } else {
+      const TargetData *TD = gvn.getTargetData();
+      assert(TD && "Need target data to handle type mismatch case");
+      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+                                   LoadTy, BB->getTerminator(), *TD);
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+                   << "  " << *getMemIntrinValue() << '\n'
+                   << *Res << '\n' << "\n\n\n");
+    }
+    return Res;
+  }
+};
+
+} // end anonymous namespace
+
+/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// construct SSA form, allowing us to eliminate LI.  This returns the value
+/// that should be used at LI's definition site.
+static Value *ConstructSSAForLoadSet(LoadInst *LI, 
+                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
+                                     GVN &gvn) {
+  // Check for the fully redundant, dominating load case.  In this case, we can
+  // just use the dominating value directly.
+  if (ValuesPerBlock.size() == 1 && 
+      gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
+                                               LI->getParent()))
+    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+
+  // Otherwise, we have to construct SSA form.
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSAUpdate(&NewPHIs);
+  SSAUpdate.Initialize(LI->getType(), LI->getName());
+  
+  const Type *LoadTy = LI->getType();
+  
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+    const AvailableValueInBlock &AV = ValuesPerBlock[i];
+    BasicBlock *BB = AV.BB;
+    
+    if (SSAUpdate.HasValueForBlock(BB))
+      continue;
+
+    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
+  }
+  
+  // Perform PHI construction.
+  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
+  
+  // If new PHI nodes were created, notify alias analysis.
+  if (V->getType()->isPointerTy()) {
+    AliasAnalysis *AA = gvn.getAliasAnalysis();
+    
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      AA->copyValue(LI, NewPHIs[i]);
+    
+    // Now that we've copied information to the new PHIs, scan through
+    // them again and inform alias analysis that we've added potentially
+    // escaping uses to any values that are operands to these PHIs.
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+      PHINode *P = NewPHIs[i];
+      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
+        unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+        AA->addEscapingUse(P->getOperandUse(jj));
+      }
+    }
+  }
+
+  return V;
+}
+
+static bool isLifetimeStart(const Instruction *Inst) {
+  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+    return II->getIntrinsicID() == Intrinsic::lifetime_start;
+  return false;
+}
+
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+  // Find the non-local dependencies of the load.
+  SmallVector<NonLocalDepResult, 64> Deps;
+  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
+  //             << Deps.size() << *LI << '\n');
+
+  // If we had to process more than one hundred blocks to find the
+  // dependencies, this load isn't worth worrying about.  Optimizing
+  // it will be too expensive.
+  if (Deps.size() > 100)
+    return false;
+
+  // If we had a phi translation failure, we'll have a single entry which is a
+  // clobber in the current block.  Reject this early.
+  if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
+    DEBUG(
+      dbgs() << "GVN: non-local load ";
+      WriteAsOperand(dbgs(), LI);
+      dbgs() << " has unknown dependencies\n";
+    );
+    return false;
+  }
+
+  // Filter out useless results (non-locals, etc).  Keep track of the blocks
+  // where we have a value available in repl, also keep track of whether we see
+  // dependencies that produce an unknown value for the load (such as a call
+  // that could potentially clobber the load).
+  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
+  SmallVector<BasicBlock*, 16> UnavailableBlocks;
+
+  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+    BasicBlock *DepBB = Deps[i].getBB();
+    MemDepResult DepInfo = Deps[i].getResult();
+
+    if (DepInfo.isUnknown()) {
+      UnavailableBlocks.push_back(DepBB);
+      continue;
+    }
+
+    if (DepInfo.isClobber()) {
+      // The address being loaded in this non-local block may not be the same as
+      // the pointer operand of the load if PHI translation occurs.  Make sure
+      // to consider the right address.
+      Value *Address = Deps[i].getAddress();
+      
+      // If the dependence is to a store that writes to a superset of the bits
+      // read by the load, we can extract the bits we need for the load from the
+      // stored value.
+      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+                                                      DepSI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                       DepSI->getValueOperand(),
+                                                                Offset));
+            continue;
+          }
+        }
+      }
+      
+      // Check to see if we have something like this:
+      //    load i32* P
+      //    load i8* (P+1)
+      // if we have this, replace the later with an extraction from the former.
+      if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+        // If this is a clobber and L is the first instruction in its block, then
+        // we have the first instruction in the entry block.
+        if (DepLI != LI && Address && TD) {
+          int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
+                                                     LI->getPointerOperand(),
+                                                     DepLI, *TD);
+          
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+                                                                    Offset));
+            continue;
+          }
+        }
+      }
+
+      // If the clobbering value is a memset/memcpy/memmove, see if we can
+      // forward a value on from it.
+      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+                                                        DepMI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+                                                                  Offset));
+            continue;
+          }            
+        }
+      }
+      
+      UnavailableBlocks.push_back(DepBB);
+      continue;
+    }
+
+    assert(DepInfo.isDef() && "Expecting def here");
+
+    Instruction *DepInst = DepInfo.getInst();
+
+    // Loading the allocation -> undef.
+    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+        // Loading immediately after lifetime begin -> undef.
+        isLifetimeStart(DepInst)) {
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                             UndefValue::get(LI->getType())));
+      continue;
+    }
+    
+    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+      // Reject loads and stores that are to the same address but are of
+      // different types if we have to.
+      if (S->getValueOperand()->getType() != LI->getType()) {
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+                                                        LI->getType(), *TD)) {
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }
+      }
+
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                         S->getValueOperand()));
+      continue;
+    }
+    
+    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+      // If the types mismatch and we can't handle it, reject reuse of the load.
+      if (LD->getType() != LI->getType()) {
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }          
+      }
+      ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
+      continue;
+    }
+    
+    UnavailableBlocks.push_back(DepBB);
+    continue;
+  }
+
+  // If we have no predecessors that produce a known value for this load, exit
+  // early.
+  if (ValuesPerBlock.empty()) return false;
+
+  // If all of the instructions we depend on produce a known value for this
+  // load, then it is fully redundant and we can use PHI insertion to compute
+  // its value.  Insert PHIs and remove the fully redundant value now.
+  if (UnavailableBlocks.empty()) {
+    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+    
+    // Perform PHI construction.
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+    LI->replaceAllUsesWith(V);
+
+    if (isa<PHINode>(V))
+      V->takeName(LI);
+    if (V->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(V);
+    markInstructionForDeletion(LI);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  if (!EnablePRE || !EnableLoadPRE)
+    return false;
+
+  // Okay, we have *some* definitions of the value.  This means that the value
+  // is available in some of our (transitive) predecessors.  Lets think about
+  // doing PRE of this load.  This will involve inserting a new load into the
+  // predecessor when it's not available.  We could do this in general, but
+  // prefer to not increase code size.  As such, we only do this when we know
+  // that we only have to insert *one* load (which means we're basically moving
+  // the load, not inserting a new one).
+
+  SmallPtrSet<BasicBlock *, 4> Blockers;
+  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+    Blockers.insert(UnavailableBlocks[i]);
+
+  // Lets find first basic block with more than one predecessor.  Walk backwards
+  // through predecessors if needed.
+  BasicBlock *LoadBB = LI->getParent();
+  BasicBlock *TmpBB = LoadBB;
+
+  bool isSinglePred = false;
+  bool allSingleSucc = true;
+  while (TmpBB->getSinglePredecessor()) {
+    isSinglePred = true;
+    TmpBB = TmpBB->getSinglePredecessor();
+    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
+      return false;
+    if (Blockers.count(TmpBB))
+      return false;
+    
+    // If any of these blocks has more than one successor (i.e. if the edge we
+    // just traversed was critical), then there are other paths through this 
+    // block along which the load may not be anticipated.  Hoisting the load 
+    // above this block would be adding the load to execution paths along
+    // which it was not previously executed.
+    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+      return false;
+  }
+
+  assert(TmpBB);
+  LoadBB = TmpBB;
+
+  // FIXME: It is extremely unclear what this loop is doing, other than
+  // artificially restricting loadpre.
+  if (isSinglePred) {
+    bool isHot = false;
+    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+      const AvailableValueInBlock &AV = ValuesPerBlock[i];
+      if (AV.isSimpleValue())
+        // "Hot" Instruction is in some loop (because it dominates its dep.
+        // instruction).
+        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+          if (DT->dominates(LI, I)) {
+            isHot = true;
+            break;
+          }
+    }
+
+    // We are interested only in "hot" instructions. We don't want to do any
+    // mis-optimizations here.
+    if (!isHot)
+      return false;
+  }
+
+  // Check to see how many predecessors have the loaded value fully
+  // available.
+  DenseMap<BasicBlock*, Value*> PredLoads;
+  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
+  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+
+  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
+       PI != E; ++PI) {
+    BasicBlock *Pred = *PI;
+    if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
+      continue;
+    }
+    PredLoads[Pred] = 0;
+
+    if (Pred->getTerminator()->getNumSuccessors() != 1) {
+      if (isa<IndirectBrInst>(Pred->getTerminator())) {
+        DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+              << Pred->getName() << "': " << *LI << '\n');
+        return false;
+      }
+      unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+      NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+    }
+  }
+  if (!NeedToSplit.empty()) {
+    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
+    return false;
+  }
+
+  // Decide whether PRE is profitable for this load.
+  unsigned NumUnavailablePreds = PredLoads.size();
+  assert(NumUnavailablePreds != 0 &&
+         "Fully available value should be eliminated above!");
+  
+  // If this load is unavailable in multiple predecessors, reject it.
+  // FIXME: If we could restructure the CFG, we could make a common pred with
+  // all the preds that don't have an available LI and insert a new load into
+  // that one block.
+  if (NumUnavailablePreds != 1)
+      return false;
+
+  // Check if the load can safely be moved to all the unavailable predecessors.
+  bool CanDoPRE = true;
+  SmallVector<Instruction*, 8> NewInsts;
+  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+         E = PredLoads.end(); I != E; ++I) {
+    BasicBlock *UnavailablePred = I->first;
+
+    // Do PHI translation to get its value in the predecessor if necessary.  The
+    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+
+    // If all preds have a single successor, then we know it is safe to insert
+    // the load on the pred (?!?), so we can insert code to materialize the
+    // pointer if it is not available.
+    PHITransAddr Address(LI->getPointerOperand(), TD);
+    Value *LoadPtr = 0;
+    if (allSingleSucc) {
+      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                  *DT, NewInsts);
+    } else {
+      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
+      LoadPtr = Address.getAddr();
+    }
+
+    // If we couldn't find or insert a computation of this phi translated value,
+    // we fail PRE.
+    if (LoadPtr == 0) {
+      DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+            << *LI->getPointerOperand() << "\n");
+      CanDoPRE = false;
+      break;
+    }
+
+    // Make sure it is valid to move this load here.  We have to watch out for:
+    //  @1 = getelementptr (i8* p, ...
+    //  test p and branch if == 0
+    //  load @1
+    // It is valid to have the getelementptr before the test, even if p can
+    // be 0, as getelementptr only does address arithmetic.
+    // If we are not pushing the value through any multiple-successor blocks
+    // we do not have this case.  Otherwise, check that the load is safe to
+    // put anywhere; this can be improved, but should be conservatively safe.
+    if (!allSingleSucc &&
+        // FIXME: REEVALUTE THIS.
+        !isSafeToLoadUnconditionally(LoadPtr,
+                                     UnavailablePred->getTerminator(),
+                                     LI->getAlignment(), TD)) {
+      CanDoPRE = false;
+      break;
+    }
+
+    I->second = LoadPtr;
+  }
+
+  if (!CanDoPRE) {
+    while (!NewInsts.empty()) {
+      Instruction *I = NewInsts.pop_back_val();
+      if (MD) MD->removeInstruction(I);
+      I->eraseFromParent();
+    }
+    return false;
+  }
+
+  // Okay, we can eliminate this load by inserting a reload in the predecessor
+  // and using PHI construction to get the value in the other predecessors, do
+  // it.
+  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+  DEBUG(if (!NewInsts.empty())
+          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
+                 << *NewInsts.back() << '\n');
+  
+  // Assign value numbers to the new instructions.
+  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+    // FIXME: We really _ought_ to insert these value numbers into their 
+    // parent's availability map.  However, in doing so, we risk getting into
+    // ordering issues.  If a block hasn't been processed yet, we would be
+    // marking a value as AVAIL-IN, which isn't what we intend.
+    VN.lookup_or_add(NewInsts[i]);
+  }
+
+  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+         E = PredLoads.end(); I != E; ++I) {
+    BasicBlock *UnavailablePred = I->first;
+    Value *LoadPtr = I->second;
+
+    Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+                                        LI->getAlignment(),
+                                        UnavailablePred->getTerminator());
+
+    // Transfer the old load's TBAA tag to the new load.
+    if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+      NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+
+    // Transfer DebugLoc.
+    NewLoad->setDebugLoc(LI->getDebugLoc());
+
+    // Add the newly created load.
+    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
+                                                        NewLoad));
+    MD->invalidateCachedPointerInfo(LoadPtr);
+    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
+  }
+
+  // Perform PHI construction.
+  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+  LI->replaceAllUsesWith(V);
+  if (isa<PHINode>(V))
+    V->takeName(LI);
+  if (V->getType()->isPointerTy())
+    MD->invalidateCachedPointerInfo(V);
+  markInstructionForDeletion(LI);
+  ++NumPRELoad;
+  return true;
+}
+
+/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// locally, and then attempting non-local elimination if that fails.
+bool GVN::processLoad(LoadInst *L) {
+  if (!MD)
+    return false;
+
+  if (L->isVolatile())
+    return false;
+
+  if (L->use_empty()) {
+    markInstructionForDeletion(L);
+    return true;
+  }
+  
+  // ... to a pointer that has been loaded from before...
+  MemDepResult Dep = MD->getDependency(L);
+
+  // If we have a clobber and target data is around, see if this is a clobber
+  // that we can fix up through code synthesis.
+  if (Dep.isClobber() && TD) {
+    // Check to see if we have something like this:
+    //   store i32 123, i32* %P
+    //   %A = bitcast i32* %P to i8*
+    //   %B = gep i8* %A, i32 1
+    //   %C = load i8* %B
+    //
+    // We could do that by recognizing if the clobber instructions are obviously
+    // a common base + constant offset, and if the previous store (or memset)
+    // completely covers this load.  This sort of thing can happen in bitfield
+    // access code.
+    Value *AvailVal = 0;
+    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
+      int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+                                                  L->getPointerOperand(),
+                                                  DepSI, *TD);
+      if (Offset != -1)
+        AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+                                        L->getType(), L, *TD);
+    }
+    
+    // Check to see if we have something like this:
+    //    load i32* P
+    //    load i8* (P+1)
+    // if we have this, replace the later with an extraction from the former.
+    if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
+      // If this is a clobber and L is the first instruction in its block, then
+      // we have the first instruction in the entry block.
+      if (DepLI == L)
+        return false;
+      
+      int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
+                                                 L->getPointerOperand(),
+                                                 DepLI, *TD);
+      if (Offset != -1)
+        AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
+    }
+    
+    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+    // a value on from it.
+    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+      int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+                                                    L->getPointerOperand(),
+                                                    DepMI, *TD);
+      if (Offset != -1)
+        AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
+    }
+        
+    if (AvailVal) {
+      DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+            << *AvailVal << '\n' << *L << "\n\n\n");
+      
+      // Replace the load!
+      L->replaceAllUsesWith(AvailVal);
+      if (AvailVal->getType()->isPointerTy())
+        MD->invalidateCachedPointerInfo(AvailVal);
+      markInstructionForDeletion(L);
+      ++NumGVNLoad;
+      return true;
+    }
+  }
+  
+  // If the value isn't available, don't do anything!
+  if (Dep.isClobber()) {
+    DEBUG(
+      // fast print dep, using operator<< on instruction is too slow.
+      dbgs() << "GVN: load ";
+      WriteAsOperand(dbgs(), L);
+      Instruction *I = Dep.getInst();
+      dbgs() << " is clobbered by " << *I << '\n';
+    );
+    return false;
+  }
+
+  if (Dep.isUnknown()) {
+    DEBUG(
+      // fast print dep, using operator<< on instruction is too slow.
+      dbgs() << "GVN: load ";
+      WriteAsOperand(dbgs(), L);
+      dbgs() << " has unknown dependence\n";
+    );
+    return false;
+  }
+
+  // If it is defined in another block, try harder.
+  if (Dep.isNonLocal())
+    return processNonLocalLoad(L);
+
+  assert(Dep.isDef() && "Expecting def here");
+
+  Instruction *DepInst = Dep.getInst();
+  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
+    Value *StoredVal = DepSI->getValueOperand();
+    
+    // The store and load are to a must-aliased pointer, but they may not
+    // actually have the same type.  See if we know how to reuse the stored
+    // value (depending on its type).
+    if (StoredVal->getType() != L->getType()) {
+      if (TD) {
+        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+                                                   L, *TD);
+        if (StoredVal == 0)
+          return false;
+        
+        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+                     << '\n' << *L << "\n\n\n");
+      }
+      else 
+        return false;
+    }
+
+    // Remove it!
+    L->replaceAllUsesWith(StoredVal);
+    if (StoredVal->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(StoredVal);
+    markInstructionForDeletion(L);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+    Value *AvailableVal = DepLI;
+    
+    // The loads are of a must-aliased pointer, but they may not actually have
+    // the same type.  See if we know how to reuse the previously loaded value
+    // (depending on its type).
+    if (DepLI->getType() != L->getType()) {
+      if (TD) {
+        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
+                                                      L, *TD);
+        if (AvailableVal == 0)
+          return false;
+      
+        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+                     << "\n" << *L << "\n\n\n");
+      }
+      else 
+        return false;
+    }
+    
+    // Remove it!
+    L->replaceAllUsesWith(AvailableVal);
+    if (DepLI->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(DepLI);
+    markInstructionForDeletion(L);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  // If this load really doesn't depend on anything, then we must be loading an
+  // undef value.  This can happen when loading for a fresh allocation with no
+  // intervening stores, for example.
+  if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+    L->replaceAllUsesWith(UndefValue::get(L->getType()));
+    markInstructionForDeletion(L);
+    ++NumGVNLoad;
+    return true;
+  }
+  
+  // If this load occurs either right after a lifetime begin,
+  // then the loaded value is undefined.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
+    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+      L->replaceAllUsesWith(UndefValue::get(L->getType()));
+      markInstructionForDeletion(L);
+      ++NumGVNLoad;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// findLeader - In order to find a leader for a given value number at a 
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in 
+// question.  This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+  LeaderTableEntry Vals = LeaderTable[num];
+  if (!Vals.Val) return 0;
+  
+  Value *Val = 0;
+  if (DT->dominates(Vals.BB, BB)) {
+    Val = Vals.Val;
+    if (isa<Constant>(Val)) return Val;
+  }
+  
+  LeaderTableEntry* Next = Vals.Next;
+  while (Next) {
+    if (DT->dominates(Next->BB, BB)) {
+      if (isa<Constant>(Next->Val)) return Next->Val;
+      if (!Val) Val = Next->Val;
+    }
+    
+    Next = Next->Next;
+  }
+
+  return Val;
+}
+
+
+/// processInstruction - When calculating availability, handle an instruction
+/// by inserting it into the appropriate sets
+bool GVN::processInstruction(Instruction *I) {
+  // Ignore dbg info intrinsics.
+  if (isa<DbgInfoIntrinsic>(I))
+    return false;
+
+  // If the instruction can be easily simplified then do so now in preference
+  // to value numbering it.  Value numbering often exposes redundancies, for
+  // example if it determines that %y is equal to %x then the instruction
+  // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+  if (Value *V = SimplifyInstruction(I, TD, DT)) {
+    I->replaceAllUsesWith(V);
+    if (MD && V->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(V);
+    markInstructionForDeletion(I);
+    return true;
+  }
+
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (processLoad(LI))
+      return true;
+
+    unsigned Num = VN.lookup_or_add(LI);
+    addToLeaderTable(Num, LI, LI->getParent());
+    return false;
+  }
+
+  // For conditions branches, we can perform simple conditional propagation on
+  // the condition value itself.
+  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+      return false;
+    
+    Value *BranchCond = BI->getCondition();
+    uint32_t CondVN = VN.lookup_or_add(BranchCond);
+  
+    BasicBlock *TrueSucc = BI->getSuccessor(0);
+    BasicBlock *FalseSucc = BI->getSuccessor(1);
+  
+    if (TrueSucc->getSinglePredecessor())
+      addToLeaderTable(CondVN,
+                   ConstantInt::getTrue(TrueSucc->getContext()),
+                   TrueSucc);
+    if (FalseSucc->getSinglePredecessor())
+      addToLeaderTable(CondVN,
+                   ConstantInt::getFalse(TrueSucc->getContext()),
+                   FalseSucc);
+    
+    return false;
+  }
+  
+  // Instructions with void type don't return a value, so there's
+  // no point in trying to find redudancies in them.
+  if (I->getType()->isVoidTy()) return false;
+  
+  uint32_t NextNum = VN.getNextUnusedValueNumber();
+  unsigned Num = VN.lookup_or_add(I);
+
+  // Allocations are always uniquely numbered, so we can save time and memory
+  // by fast failing them.
+  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
+  }
+
+  // If the number we were assigned was a brand new VN, then we don't
+  // need to do a lookup to see if the number already exists
+  // somewhere in the domtree: it can't!
+  if (Num == NextNum) {
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
+  }
+  
+  // Perform fast-path value-number based elimination of values inherited from
+  // dominators.
+  Value *repl = findLeader(I->getParent(), Num);
+  if (repl == 0) {
+    // Failure, just remember this instance for future use.
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
+  }
+  
+  // Remove it!
+  I->replaceAllUsesWith(repl);
+  if (MD && repl->getType()->isPointerTy())
+    MD->invalidateCachedPointerInfo(repl);
+  markInstructionForDeletion(I);
+  return true;
+}
+
+/// runOnFunction - This is the main transformation entry point for a function.
+bool GVN::runOnFunction(Function& F) {
+  if (!NoLoads)
+    MD = &getAnalysis<MemoryDependenceAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+  VN.setMemDep(MD);
+  VN.setDomTree(DT);
+
+  bool Changed = false;
+  bool ShouldContinue = true;
+
+  // Merge unconditional branches, allowing PRE to catch more
+  // optimization opportunities.
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
+    BasicBlock *BB = FI++;
+    
+    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+    if (removedBlock) ++NumGVNBlocks;
+
+    Changed |= removedBlock;
+  }
+
+  unsigned Iteration = 0;
+  while (ShouldContinue) {
+    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
+    ShouldContinue = iterateOnFunction(F);
+    if (splitCriticalEdges())
+      ShouldContinue = true;
+    Changed |= ShouldContinue;
+    ++Iteration;
+  }
+
+  if (EnablePRE) {
+    bool PREChanged = true;
+    while (PREChanged) {
+      PREChanged = performPRE(F);
+      Changed |= PREChanged;
+    }
+  }
+  // FIXME: Should perform GVN again after PRE does something.  PRE can move
+  // computations into blocks where they become fully redundant.  Note that
+  // we can't do this until PRE's critical edge splitting updates memdep.
+  // Actually, when this happens, we should just fully integrate PRE into GVN.
+
+  cleanupGlobalSets();
+
+  return Changed;
+}
+
+
+bool GVN::processBlock(BasicBlock *BB) {
+  // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
+  // (and incrementing BI before processing an instruction).
+  assert(InstrsToErase.empty() &&
+         "We expect InstrsToErase to be empty across iterations");
+  bool ChangedFunction = false;
+
+  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+       BI != BE;) {
+    ChangedFunction |= processInstruction(BI);
+    if (InstrsToErase.empty()) {
+      ++BI;
+      continue;
+    }
+
+    // If we need some instructions deleted, do it now.
+    NumGVNInstr += InstrsToErase.size();
+
+    // Avoid iterator invalidation.
+    bool AtStart = BI == BB->begin();
+    if (!AtStart)
+      --BI;
+
+    for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+         E = InstrsToErase.end(); I != E; ++I) {
+      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
+      if (MD) MD->removeInstruction(*I);
+      (*I)->eraseFromParent();
+      DEBUG(verifyRemoved(*I));
+    }
+    InstrsToErase.clear();
+
+    if (AtStart)
+      BI = BB->begin();
+    else
+      ++BI;
+  }
+
+  return ChangedFunction;
+}
+
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function &F) {
+  bool Changed = false;
+  DenseMap<BasicBlock*, Value*> predMap;
+  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+    BasicBlock *CurrentBlock = *DI;
+
+    // Nothing to PRE in the entry block.
+    if (CurrentBlock == &F.getEntryBlock()) continue;
+
+    for (BasicBlock::iterator BI = CurrentBlock->begin(),
+         BE = CurrentBlock->end(); BI != BE; ) {
+      Instruction *CurInst = BI++;
+
+      if (isa<AllocaInst>(CurInst) ||
+          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+          CurInst->getType()->isVoidTy() ||
+          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
+          isa<DbgInfoIntrinsic>(CurInst))
+        continue;
+      
+      // We don't currently value number ANY inline asm calls.
+      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+        if (CallI->isInlineAsm())
+          continue;
+
+      uint32_t ValNo = VN.lookup(CurInst);
+
+      // Look for the predecessors for PRE opportunities.  We're
+      // only trying to solve the basic diamond case, where
+      // a value is computed in the successor and one predecessor,
+      // but not the other.  We also explicitly disallow cases
+      // where the successor is its own predecessor, because they're
+      // more complicated to get right.
+      unsigned NumWith = 0;
+      unsigned NumWithout = 0;
+      BasicBlock *PREPred = 0;
+      predMap.clear();
+
+      for (pred_iterator PI = pred_begin(CurrentBlock),
+           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        // We're not interested in PRE where the block is its
+        // own predecessor, or in blocks with predecessors
+        // that are not reachable.
+        if (P == CurrentBlock) {
+          NumWithout = 2;
+          break;
+        } else if (!DT->dominates(&F.getEntryBlock(), P))  {
+          NumWithout = 2;
+          break;
+        }
+
+        Value* predV = findLeader(P, ValNo);
+        if (predV == 0) {
+          PREPred = P;
+          ++NumWithout;
+        } else if (predV == CurInst) {
+          NumWithout = 2;
+        } else {
+          predMap[P] = predV;
+          ++NumWith;
+        }
+      }
+
+      // Don't do PRE when it might increase code size, i.e. when
+      // we would need to insert instructions in more than one pred.
+      if (NumWithout != 1 || NumWith == 0)
+        continue;
+      
+      // Don't do PRE across indirect branch.
+      if (isa<IndirectBrInst>(PREPred->getTerminator()))
+        continue;
+
+      // We can't do PRE safely on a critical edge, so instead we schedule
+      // the edge to be split and perform the PRE the next time we iterate
+      // on the function.
+      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+        continue;
+      }
+
+      // Instantiate the expression in the predecessor that lacked it.
+      // Because we are going top-down through the block, all value numbers
+      // will be available in the predecessor by the time we need them.  Any
+      // that weren't originally present will have been instantiated earlier
+      // in this loop.
+      Instruction *PREInstr = CurInst->clone();
+      bool success = true;
+      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
+        Value *Op = PREInstr->getOperand(i);
+        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+          continue;
+
+        if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
+          PREInstr->setOperand(i, V);
+        } else {
+          success = false;
+          break;
+        }
+      }
+
+      // Fail out if we encounter an operand that is not available in
+      // the PRE predecessor.  This is typically because of loads which
+      // are not value numbered precisely.
+      if (!success) {
+        delete PREInstr;
+        DEBUG(verifyRemoved(PREInstr));
+        continue;
+      }
+
+      PREInstr->insertBefore(PREPred->getTerminator());
+      PREInstr->setName(CurInst->getName() + ".pre");
+      PREInstr->setDebugLoc(CurInst->getDebugLoc());
+      predMap[PREPred] = PREInstr;
+      VN.add(PREInstr, ValNo);
+      ++NumGVNPRE;
+
+      // Update the availability map to include the new instruction.
+      addToLeaderTable(ValNo, PREInstr, PREPred);
+
+      // Create a PHI to make the value available in this block.
+      pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
+      PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
+                                     CurInst->getName() + ".pre-phi",
+                                     CurrentBlock->begin());
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        Phi->addIncoming(predMap[P], P);
+      }
+
+      VN.add(Phi, ValNo);
+      addToLeaderTable(ValNo, Phi, CurrentBlock);
+      Phi->setDebugLoc(CurInst->getDebugLoc());
+      CurInst->replaceAllUsesWith(Phi);
+      if (Phi->getType()->isPointerTy()) {
+        // Because we have added a PHI-use of the pointer value, it has now
+        // "escaped" from alias analysis' perspective.  We need to inform
+        // AA of this.
+        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+             ++ii) {
+          unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+        }
+        
+        if (MD)
+          MD->invalidateCachedPointerInfo(Phi);
+      }
+      VN.erase(CurInst);
+      removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
+
+      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
+      if (MD) MD->removeInstruction(CurInst);
+      CurInst->eraseFromParent();
+      DEBUG(verifyRemoved(CurInst));
+      Changed = true;
+    }
+  }
+
+  if (splitCriticalEdges())
+    Changed = true;
+
+  return Changed;
+}
+
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+  if (toSplit.empty())
+    return false;
+  do {
+    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+    SplitCriticalEdge(Edge.first, Edge.second, this);
+  } while (!toSplit.empty());
+  if (MD) MD->invalidateCachedPredecessors();
+  return true;
+}
+
+/// iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
+  cleanupGlobalSets();
+  
+  // Top-down walk of the dominator tree
+  bool Changed = false;
+#if 0
+  // Needed for value numbering with phi construction to work.
+  ReversePostOrderTraversal<Function*> RPOT(&F);
+  for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
+       RE = RPOT.end(); RI != RE; ++RI)
+    Changed |= processBlock(*RI);
+#else
+  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+       DE = df_end(DT->getRootNode()); DI != DE; ++DI)
+    Changed |= processBlock(DI->getBlock());
+#endif
+
+  return Changed;
+}
+
+void GVN::cleanupGlobalSets() {
+  VN.clear();
+  LeaderTable.clear();
+  TableAllocator.Reset();
+}
+
+/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// internal data structures.
+void GVN::verifyRemoved(const Instruction *Inst) const {
+  VN.verifyRemoved(Inst);
+
+  // Walk through the value number scope to make sure the instruction isn't
+  // ferreted away in it.
+  for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+       I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+    const LeaderTableEntry *Node = &I->second;
+    assert(Node->Val != Inst && "Inst still in value numbering scope!");
+    
+    while (Node->Next) {
+      Node = Node->Next;
+      assert(Node->Val != Inst && "Inst still in value numbering scope!");
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
new file mode 100644
index 000000000000..dee3d38d72af
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -0,0 +1,1871 @@
+//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into simpler forms suitable for subsequent
+// analysis and transformation.
+//
+// This transformation makes the following changes to each loop with an
+// identifiable induction variable:
+//   1. All loops are transformed to have a SINGLE canonical induction variable
+//      which starts at zero and steps by one.
+//   2. The canonical induction variable is guaranteed to be the first PHI node
+//      in the loop header block.
+//   3. The canonical induction variable is guaranteed to be in a wide enough
+//      type so that IV expressions need not be (directly) zero-extended or
+//      sign-extended.
+//   4. Any pointer arithmetic recurrences are raised to use array subscripts.
+//
+// If the trip count of a loop is computable, this pass also makes the following
+// changes:
+//   1. The exit condition for the loop is canonicalized to compare the
+//      induction value against the exit value.  This turns loops like:
+//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
+//   2. Any use outside of the loop of an expression derived from the indvar
+//      is changed to compute the derived value outside of the loop, eliminating
+//      the dependence on the exit value of the induction variable.  If the only
+//      purpose of the loop is to compute the exit value of some derived
+//      expression, this transformation will make the loop dead.
+//
+// This transformation should be followed by strength reduction after all of the
+// desired loop transformations have been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved     , "Number of aux indvars removed");
+STATISTIC(NumWidened     , "Number of indvars widened");
+STATISTIC(NumInserted    , "Number of canonical indvars added");
+STATISTIC(NumReplaced    , "Number of exit values replaced");
+STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
+STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
+
+static cl::opt<bool> DisableIVRewrite(
+  "disable-iv-rewrite", cl::Hidden,
+  cl::desc("Disable canonical induction variable rewriting"));
+
+namespace {
+  class IndVarSimplify : public LoopPass {
+    IVUsers         *IU;
+    LoopInfo        *LI;
+    ScalarEvolution *SE;
+    DominatorTree   *DT;
+    TargetData      *TD;
+
+    SmallVector<WeakVH, 16> DeadInsts;
+    bool Changed;
+  public:
+
+    static char ID; // Pass identification, replacement for typeid
+    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+                       Changed(false) {
+      initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      if (!DisableIVRewrite)
+        AU.addRequired<IVUsers>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      if (!DisableIVRewrite)
+        AU.addPreserved<IVUsers>();
+      AU.setPreservesCFG();
+    }
+
+  private:
+    virtual void releaseMemory() {
+      DeadInsts.clear();
+    }
+
+    bool isValidRewrite(Value *FromVal, Value *ToVal);
+
+    void HandleFloatingPointIV(Loop *L, PHINode *PH);
+    void RewriteNonIntegerIVs(Loop *L);
+
+    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
+    void SimplifyIVUsers(SCEVExpander &Rewriter);
+    void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+
+    bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+    void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+    void EliminateIVRemainder(BinaryOperator *Rem,
+                              Value *IVOperand,
+                              bool IsSigned);
+
+    void SimplifyCongruentIVs(Loop *L);
+
+    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
+
+    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+                                        PHINode *IndVar,
+                                        SCEVExpander &Rewriter);
+
+    void SinkUnusedInvariants(Loop *L);
+  };
+}
+
+char IndVarSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+                "Induction Variable Simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+                "Induction Variable Simplification", false, false)
+
+Pass *llvm::createIndVarSimplifyPass() {
+  return new IndVarSimplify();
+}
+
+/// isValidRewrite - Return true if the SCEV expansion generated by the
+/// rewriter can replace the original value. SCEV guarantees that it
+/// produces the same value, but the way it is produced may be illegal IR.
+/// Ideally, this function will only be called for verification.
+bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
+  // If an SCEV expression subsumed multiple pointers, its expansion could
+  // reassociate the GEP changing the base pointer. This is illegal because the
+  // final address produced by a GEP chain must be inbounds relative to its
+  // underlying object. Otherwise basic alias analysis, among other things,
+  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+  // producing an expression involving multiple pointers. Until then, we must
+  // bail out here.
+  //
+  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+  // because it understands lcssa phis while SCEV does not.
+  Value *FromPtr = FromVal;
+  Value *ToPtr = ToVal;
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+    FromPtr = GEP->getPointerOperand();
+  }
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+    ToPtr = GEP->getPointerOperand();
+  }
+  if (FromPtr != FromVal || ToPtr != ToVal) {
+    // Quickly check the common case
+    if (FromPtr == ToPtr)
+      return true;
+
+    // SCEV may have rewritten an expression that produces the GEP's pointer
+    // operand. That's ok as long as the pointer operand has the same base
+    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+    // base of a recurrence. This handles the case in which SCEV expansion
+    // converts a pointer type recurrence into a nonrecurrent pointer base
+    // indexed by an integer recurrence.
+    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+    if (FromBase == ToBase)
+      return true;
+
+    DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
+          << *FromBase << " != " << *ToBase << "\n");
+
+    return false;
+  }
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+  bool isExact = false;
+  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
+    return false;
+  // See if we can convert this to an int64_t
+  uint64_t UIntVal;
+  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+                           &isExact) != APFloat::opOK || !isExact)
+    return false;
+  IntVal = UIntVal;
+  return true;
+}
+
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+///   bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+///   bar((double)i);
+///
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge     = IncomingEdge^1;
+
+  // Check incoming value.
+  ConstantFP *InitValueVal =
+    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+
+  int64_t InitValue;
+  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+    return;
+
+  // Check IV increment. Reject this PN if increment operation is not
+  // an add or increment value can not be represented by an integer.
+  BinaryOperator *Incr =
+    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+
+  // If this is not an add of the PHI with a constantfp, or if the constant fp
+  // is not an integer, bail out.
+  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+  int64_t IncValue;
+  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+    return;
+
+  // Check Incr uses. One user is PN and the other user is an exit condition
+  // used by the conditional terminator.
+  Value::use_iterator IncrUse = Incr->use_begin();
+  Instruction *U1 = cast<Instruction>(*IncrUse++);
+  if (IncrUse == Incr->use_end()) return;
+  Instruction *U2 = cast<Instruction>(*IncrUse++);
+  if (IncrUse != Incr->use_end()) return;
+
+  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
+  // only used by a branch, we can't transform it.
+  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+  if (!Compare)
+    Compare = dyn_cast<FCmpInst>(U2);
+  if (Compare == 0 || !Compare->hasOneUse() ||
+      !isa<BranchInst>(Compare->use_back()))
+    return;
+
+  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+  // We need to verify that the branch actually controls the iteration count
+  // of the loop.  If not, the new IV can overflow and no one will notice.
+  // The branch block must be in the loop and one of the successors must be out
+  // of the loop.
+  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+  if (!L->contains(TheBr->getParent()) ||
+      (L->contains(TheBr->getSuccessor(0)) &&
+       L->contains(TheBr->getSuccessor(1))))
+    return;
+
+
+  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+  // transform it.
+  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+  int64_t ExitValue;
+  if (ExitValueVal == 0 ||
+      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+    return;
+
+  // Find new predicate for integer comparison.
+  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+  switch (Compare->getPredicate()) {
+  default: return;  // Unknown comparison.
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
+  }
+
+  // We convert the floating point induction variable to a signed i32 value if
+  // we can.  This is only safe if the comparison will not overflow in a way
+  // that won't be trapped by the integer equivalent operations.  Check for this
+  // now.
+  // TODO: We could use i64 if it is native and the range requires it.
+
+  // The start/stride/exit values must all fit in signed i32.
+  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+    return;
+
+  // If not actually striding (add x, 0.0), avoid touching the code.
+  if (IncValue == 0)
+    return;
+
+  // Positive and negative strides have different safety conditions.
+  if (IncValue > 0) {
+    // If we have a positive stride, we require the init to be less than the
+    // exit value and an equality or less than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+      return;
+
+    uint32_t Range = uint32_t(ExitValue-InitValue);
+    if (NewPred == CmpInst::ICMP_SLE) {
+      // Normalize SLE -> SLT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+
+    unsigned Leftover = Range % uint32_t(IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+      return;
+
+  } else {
+    // If we have a negative stride, we require the init to be greater than the
+    // exit value and an equality or greater than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+      return;
+
+    uint32_t Range = uint32_t(InitValue-ExitValue);
+    if (NewPred == CmpInst::ICMP_SGE) {
+      // Normalize SGE -> SGT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+
+    unsigned Leftover = Range % uint32_t(-IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+      return;
+  }
+
+  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+
+  // Insert new integer induction variable.
+  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+                      PN->getIncomingBlock(IncomingEdge));
+
+  Value *NewAdd =
+    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+                              Incr->getName()+".int", Incr);
+  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
+
+  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+                                      ConstantInt::get(Int32Ty, ExitValue),
+                                      Compare->getName());
+
+  // In the following deletions, PN may become dead and may be deleted.
+  // Use a WeakVH to observe whether this happens.
+  WeakVH WeakPH = PN;
+
+  // Delete the old floating point exit comparison.  The branch starts using the
+  // new comparison.
+  NewCompare->takeName(Compare);
+  Compare->replaceAllUsesWith(NewCompare);
+  RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+  // Delete the old floating point increment.
+  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+  RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+  // If the FP induction variable still has uses, this is because something else
+  // in the loop uses its value.  In order to canonicalize the induction
+  // variable, we chose to eliminate the IV and rewrite it in terms of an
+  // int->fp cast.
+  //
+  // We give preference to sitofp over uitofp because it is faster on most
+  // platforms.
+  if (WeakPH) {
+    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+                                 PN->getParent()->getFirstNonPHI());
+    PN->replaceAllUsesWith(Conv);
+    RecursivelyDeleteTriviallyDeadInstructions(PN);
+  }
+
+  // Add a new IVUsers entry for the newly-created integer PHI.
+  if (IU)
+    IU->AddUsersIfInteresting(NewPHI);
+}
+
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+  // First step.  Check to see if there are any floating-point recurrences.
+  // If there are, change them into integer recurrences, permitting analysis by
+  // the SCEV routines.
+  //
+  BasicBlock *Header = L->getHeader();
+
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+      HandleFloatingPointIV(L, PN);
+
+  // If the loop previously had floating-point IV, ScalarEvolution
+  // may not have been able to compute a trip count. Now that we've done some
+  // re-writing, the trip count may be computable.
+  if (Changed)
+    SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
+/// RewriteLoopExitValues - Check to see if this loop has a computable
+/// loop-invariant execution count.  If so, this means that we can compute the
+/// final value of any expressions that are recurrent in the loop, and
+/// substitute the exit values from the loop into any instructions outside of
+/// the loop that use the final values of the current expressions.
+///
+/// This is mostly redundant with the regular IndVarSimplify activities that
+/// happen later, except that it's more powerful in some cases, because it's
+/// able to brute-force evaluate arbitrary instructions as long as they have
+/// constant operands at the beginning of the loop.
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+  // Verify the input to the pass in already in LCSSA form.
+  assert(L->isLCSSAForm(*DT));
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+
+  // Find all values that are computed inside the loop, but used outside of it.
+  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
+  // the exit blocks of the loop to find them.
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBB = ExitBlocks[i];
+
+    // If there are no PHI nodes in this exit block, then no values defined
+    // inside the loop are used on this path, skip it.
+    PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+    if (!PN) continue;
+
+    unsigned NumPreds = PN->getNumIncomingValues();
+
+    // Iterate over all of the PHI nodes.
+    BasicBlock::iterator BBI = ExitBB->begin();
+    while ((PN = dyn_cast<PHINode>(BBI++))) {
+      if (PN->use_empty())
+        continue; // dead use, don't replace it
+
+      // SCEV only supports integer expressions for now.
+      if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+        continue;
+
+      // It's necessary to tell ScalarEvolution about this explicitly so that
+      // it can walk the def-use list and forget all SCEVs, as it may not be
+      // watching the PHI itself. Once the new exit value is in place, there
+      // may not be a def-use connection between the loop and every instruction
+      // which got a SCEVAddRecExpr for that loop.
+      SE->forgetValue(PN);
+
+      // Iterate over all of the values in all the PHI nodes.
+      for (unsigned i = 0; i != NumPreds; ++i) {
+        // If the value being merged in is not integer or is not defined
+        // in the loop, skip it.
+        Value *InVal = PN->getIncomingValue(i);
+        if (!isa<Instruction>(InVal))
+          continue;
+
+        // If this pred is for a subloop, not L itself, skip it.
+        if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+          continue; // The Block is in a subloop, skip it.
+
+        // Check that InVal is defined in the loop.
+        Instruction *Inst = cast<Instruction>(InVal);
+        if (!L->contains(Inst))
+          continue;
+
+        // Okay, this instruction has a user outside of the current loop
+        // and varies predictably *inside* the loop.  Evaluate the value it
+        // contains when the loop exits, if possible.
+        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+        if (!SE->isLoopInvariant(ExitValue, L))
+          continue;
+
+        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+
+        DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+                     << "  LoopVal = " << *Inst << "\n");
+
+        if (!isValidRewrite(Inst, ExitVal)) {
+          DeadInsts.push_back(ExitVal);
+          continue;
+        }
+        Changed = true;
+        ++NumReplaced;
+
+        PN->setIncomingValue(i, ExitVal);
+
+        // If this instruction is dead now, delete it.
+        RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
+        if (NumPreds == 1) {
+          // Completely replace a single-pred PHI. This is safe, because the
+          // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+          // node anymore.
+          PN->replaceAllUsesWith(ExitVal);
+          RecursivelyDeleteTriviallyDeadInstructions(PN);
+        }
+      }
+      if (NumPreds != 1) {
+        // Clone the PHI and delete the original one. This lets IVUsers and
+        // any other maps purge the original user from their records.
+        PHINode *NewPN = cast<PHINode>(PN->clone());
+        NewPN->takeName(PN);
+        NewPN->insertBefore(PN);
+        PN->replaceAllUsesWith(NewPN);
+        PN->eraseFromParent();
+      }
+    }
+  }
+
+  // The insertion point instruction may have been deleted; clear it out
+  // so that the rewriter doesn't trip over it later.
+  Rewriter.clearInsertPoint();
+}
+
+//===----------------------------------------------------------------------===//
+//  Rewrite IV users based on a canonical IV.
+//  To be replaced by -disable-iv-rewrite.
+//===----------------------------------------------------------------------===//
+
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyIVUsersNoRewrite.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+  // Each round of simplification involves a round of eliminating operations
+  // followed by a round of widening IVs. A single IVUsers worklist is used
+  // across all rounds. The inner loop advances the user. If widening exposes
+  // more uses, then another pass through the outer loop is triggered.
+  for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+    Instruction *UseInst = I->getUser();
+    Value *IVOperand = I->getOperandValToReplace();
+
+    if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+      EliminateIVComparison(ICmp, IVOperand);
+      continue;
+    }
+    if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+      bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+      if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+        EliminateIVRemainder(Rem, IVOperand, IsSigned);
+        continue;
+      }
+    }
+  }
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+  // Loop-invariant values are safe.
+  if (SE->isLoopInvariant(S, L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L, SE)) return false;
+    return true;
+  }
+
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L, SE);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L, SE) &&
+           isSafe(UD->getRHS(), L, SE);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+  // Rewrite all induction variable expressions in terms of the canonical
+  // induction variable.
+  //
+  // If there were induction variables of other sizes or offsets, manually
+  // add the offsets to the primary induction variable and cast, avoiding
+  // the need for the code evaluation methods to insert induction variables
+  // of different sizes.
+  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+    Value *Op = UI->getOperandValToReplace();
+    const Type *UseTy = Op->getType();
+    Instruction *User = UI->getUser();
+
+    // Compute the final addrec to expand into code.
+    const SCEV *AR = IU->getReplacementExpr(*UI);
+
+    // Evaluate the expression out of the loop, if possible.
+    if (!L->contains(UI->getUser())) {
+      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+      if (SE->isLoopInvariant(ExitVal, L))
+        AR = ExitVal;
+    }
+
+    // FIXME: It is an extremely bad idea to indvar substitute anything more
+    // complex than affine induction variables.  Doing so will put expensive
+    // polynomial evaluations inside of the loop, and the str reduction pass
+    // currently can only reduce affine polynomials.  For now just disable
+    // indvar subst on anything more complex than an affine addrec, unless
+    // it can be expanded to a trivial value.
+    if (!isSafe(AR, L, SE))
+      continue;
+
+    // Determine the insertion point for this user. By default, insert
+    // immediately before the user. The SCEVExpander class will automatically
+    // hoist loop invariants out of the loop. For PHI nodes, there may be
+    // multiple uses, so compute the nearest common dominator for the
+    // incoming blocks.
+    Instruction *InsertPt = User;
+    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+        if (PHI->getIncomingValue(i) == Op) {
+          if (InsertPt == User)
+            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+          else
+            InsertPt =
+              DT->findNearestCommonDominator(InsertPt->getParent(),
+                                             PHI->getIncomingBlock(i))
+                    ->getTerminator();
+        }
+
+    // Now expand it into actual Instructions and patch it into place.
+    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+
+    if (!isValidRewrite(Op, NewVal)) {
+      DeadInsts.push_back(NewVal);
+      continue;
+    }
+    // Inform ScalarEvolution that this value is changing. The change doesn't
+    // affect its value, but it does potentially affect which use lists the
+    // value will be on after the replacement, which affects ScalarEvolution's
+    // ability to walk use lists and drop dangling pointers when a value is
+    // deleted.
+    SE->forgetValue(User);
+
+    // Patch the new value into place.
+    if (Op->hasName())
+      NewVal->takeName(Op);
+    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+      NewValI->setDebugLoc(User->getDebugLoc());
+    User->replaceUsesOfWith(Op, NewVal);
+    UI->setOperandValToReplace(NewVal);
+
+    ++NumRemoved;
+    Changed = true;
+
+    // The old value may be dead now.
+    DeadInsts.push_back(Op);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  // Collect information about induction variables that are used by sign/zero
+  // extend operations. This information is recorded by CollectExtend and
+  // provides the input to WidenIV.
+  struct WideIVInfo {
+    const Type *WidestNativeType; // Widest integer type created [sz]ext
+    bool IsSigned;                // Was an sext user seen before a zext?
+
+    WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
+  };
+}
+
+/// CollectExtend - Update information about the induction variable that is
+/// extended by this sign or zero extend operation. This is used to determine
+/// the final width of the IV before actually widening it.
+static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
+                          ScalarEvolution *SE, const TargetData *TD) {
+  const Type *Ty = Cast->getType();
+  uint64_t Width = SE->getTypeSizeInBits(Ty);
+  if (TD && !TD->isLegalInteger(Width))
+    return;
+
+  if (!WI.WidestNativeType) {
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+    WI.IsSigned = IsSigned;
+    return;
+  }
+
+  // We extend the IV to satisfy the sign of its first user, arbitrarily.
+  if (WI.IsSigned != IsSigned)
+    return;
+
+  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+}
+
+namespace {
+/// WidenIV - The goal of this transform is to remove sign and zero extends
+/// without creating any new induction variables. To do this, it creates a new
+/// phi of the wider type and redirects all users, either removing extends or
+/// inserting truncs whenever we stop propagating the type.
+///
+class WidenIV {
+  // Parameters
+  PHINode *OrigPhi;
+  const Type *WideType;
+  bool IsSigned;
+
+  // Context
+  LoopInfo        *LI;
+  Loop            *L;
+  ScalarEvolution *SE;
+  DominatorTree   *DT;
+
+  // Result
+  PHINode *WidePhi;
+  Instruction *WideInc;
+  const SCEV *WideIncExpr;
+  SmallVectorImpl<WeakVH> &DeadInsts;
+
+  SmallPtrSet<Instruction*,16> Widened;
+  SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
+
+public:
+  WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+          ScalarEvolution *SEv, DominatorTree *DTree,
+          SmallVectorImpl<WeakVH> &DI) :
+    OrigPhi(PN),
+    WideType(WI.WidestNativeType),
+    IsSigned(WI.IsSigned),
+    LI(LInfo),
+    L(LI->getLoopFor(OrigPhi->getParent())),
+    SE(SEv),
+    DT(DTree),
+    WidePhi(0),
+    WideInc(0),
+    WideIncExpr(0),
+    DeadInsts(DI) {
+    assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+  }
+
+  PHINode *CreateWideIV(SCEVExpander &Rewriter);
+
+protected:
+  Instruction *CloneIVUser(Instruction *NarrowUse,
+                           Instruction *NarrowDef,
+                           Instruction *WideDef);
+
+  const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+
+  Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
+                          Instruction *WideDef);
+
+  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
+};
+} // anonymous namespace
+
+static Value *getExtend( Value *NarrowOper, const Type *WideType,
+                               bool IsSigned, IRBuilder<> &Builder) {
+  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+                    Builder.CreateZExt(NarrowOper, WideType);
+}
+
+/// CloneIVUser - Instantiate a wide operation to replace a narrow
+/// operation. This only needs to handle operations that can evaluation to
+/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
+Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
+                                  Instruction *NarrowDef,
+                                  Instruction *WideDef) {
+  unsigned Opcode = NarrowUse->getOpcode();
+  switch (Opcode) {
+  default:
+    return 0;
+  case Instruction::Add:
+  case Instruction::Mul:
+  case Instruction::UDiv:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n");
+
+    IRBuilder<> Builder(NarrowUse);
+
+    // Replace NarrowDef operands with WideDef. Otherwise, we don't know
+    // anything about the narrow operand yet so must insert a [sz]ext. It is
+    // probably loop invariant and will be folded or hoisted. If it actually
+    // comes from a widened IV, it should be removed during a future call to
+    // WidenIVUse.
+    Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef :
+      getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder);
+    Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef :
+      getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder);
+
+    BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse);
+    BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
+                                                    LHS, RHS,
+                                                    NarrowBO->getName());
+    Builder.Insert(WideBO);
+    if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
+      if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+      if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+    }
+    return WideBO;
+  }
+  llvm_unreachable(0);
+}
+
+/// HoistStep - Attempt to hoist an IV increment above a potential use.
+///
+/// To successfully hoist, two criteria must be met:
+/// - IncV operands dominate InsertPos and
+/// - InsertPos dominates IncV
+///
+/// Meeting the second condition means that we don't need to check all of IncV's
+/// existing uses (it's moving up in the domtree).
+///
+/// This does not yet recursively hoist the operands, although that would
+/// not be difficult.
+static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
+                      const DominatorTree *DT)
+{
+  if (DT->dominates(IncV, InsertPos))
+    return true;
+
+  if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
+    return false;
+
+  if (IncV->mayHaveSideEffects())
+    return false;
+
+  // Attempt to hoist IncV
+  for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
+       OI != OE; ++OI) {
+    Instruction *OInst = dyn_cast<Instruction>(OI);
+    if (OInst && !DT->dominates(OInst, InsertPos))
+      return false;
+  }
+  IncV->moveBefore(InsertPos);
+  return true;
+}
+
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+  if (!SE->isSCEVable(NarrowUse->getType()))
+    return 0;
+
+  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+  if (SE->getTypeSizeInBits(NarrowExpr->getType())
+      >= SE->getTypeSizeInBits(WideType)) {
+    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+    // index. So don't follow this use.
+    return 0;
+  }
+
+  const SCEV *WideExpr = IsSigned ?
+    SE->getSignExtendExpr(NarrowExpr, WideType) :
+    SE->getZeroExtendExpr(NarrowExpr, WideType);
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+  if (!AddRec || AddRec->getLoop() != L)
+    return 0;
+
+  return AddRec;
+}
+
+/// WidenIVUse - Determine whether an individual user of the narrow IV can be
+/// widened. If so, return the wide clone of the user.
+Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
+                                 Instruction *WideDef) {
+  Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
+
+  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+  if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
+    return 0;
+
+  // Our raison d'etre! Eliminate sign and zero extension.
+  if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) {
+    Value *NewDef = WideDef;
+    if (NarrowUse->getType() != WideType) {
+      unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType());
+      unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+      if (CastWidth < IVWidth) {
+        // The cast isn't as wide as the IV, so insert a Trunc.
+        IRBuilder<> Builder(NarrowDefUse);
+        NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+      }
+      else {
+        // A wider extend was hidden behind a narrower one. This may induce
+        // another round of IV widening in which the intermediate IV becomes
+        // dead. It should be very rare.
+        DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+              << " not wide enough to subsume " << *NarrowUse << "\n");
+        NarrowUse->replaceUsesOfWith(NarrowDef, WideDef);
+        NewDef = NarrowUse;
+      }
+    }
+    if (NewDef != NarrowUse) {
+      DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse
+            << " replaced by " << *WideDef << "\n");
+      ++NumElimExt;
+      NarrowUse->replaceAllUsesWith(NewDef);
+      DeadInsts.push_back(NarrowUse);
+    }
+    // Now that the extend is gone, we want to expose it's uses for potential
+    // further simplification. We don't need to directly inform SimplifyIVUsers
+    // of the new users, because their parent IV will be processed later as a
+    // new loop phi. If we preserved IVUsers analysis, we would also want to
+    // push the uses of WideDef here.
+
+    // No further widening is needed. The deceased [sz]ext had done it for us.
+    return 0;
+  }
+
+  // Does this user itself evaluate to a recurrence after widening?
+  const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
+  if (!WideAddRec) {
+    // This user does not evaluate to a recurence after widening, so don't
+    // follow it. Instead insert a Trunc to kill off the original use,
+    // eventually isolating the original narrow IV so it can be removed.
+    IRBuilder<> Builder(NarrowDefUse);
+    Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
+    NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
+    return 0;
+  }
+  // We assume that block terminators are not SCEVable. We wouldn't want to
+  // insert a Trunc after a terminator if there happens to be a critical edge.
+  assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+         "SCEV is not expected to evaluate a block terminator");
+
+  // Reuse the IV increment that SCEVExpander created as long as it dominates
+  // NarrowUse.
+  Instruction *WideUse = 0;
+  if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) {
+    WideUse = WideInc;
+  }
+  else {
+    WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef);
+    if (!WideUse)
+      return 0;
+  }
+  // Evaluation of WideAddRec ensured that the narrow expression could be
+  // extended outside the loop without overflow. This suggests that the wide use
+  // evaluates to the same expression as the extended narrow use, but doesn't
+  // absolutely guarantee it. Hence the following failsafe check. In rare cases
+  // where it fails, we simply throw away the newly created wide use.
+  if (WideAddRec != SE->getSCEV(WideUse)) {
+    DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
+          << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
+    DeadInsts.push_back(WideUse);
+    return 0;
+  }
+
+  // Returning WideUse pushes it on the worklist.
+  return WideUse;
+}
+
+/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+///
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+  for (Value::use_iterator UI = NarrowDef->use_begin(),
+         UE = NarrowDef->use_end(); UI != UE; ++UI) {
+    Use &U = UI.getUse();
+
+    // Handle data flow merges and bizarre phi cycles.
+    if (!Widened.insert(cast<Instruction>(U.getUser())))
+      continue;
+
+    NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+  }
+}
+
+/// CreateWideIV - Process a single induction variable. First use the
+/// SCEVExpander to create a wide induction variable that evaluates to the same
+/// recurrence as the original narrow IV. Then use a worklist to forward
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
+/// interesting IV users, the narrow IV will be isolated for removal by
+/// DeleteDeadPHIs.
+///
+/// It would be simpler to delete uses as they are processed, but we must avoid
+/// invalidating SCEV expressions.
+///
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+  // Is this phi an induction variable?
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
+  if (!AddRec)
+    return NULL;
+
+  // Widen the induction variable expression.
+  const SCEV *WideIVExpr = IsSigned ?
+    SE->getSignExtendExpr(AddRec, WideType) :
+    SE->getZeroExtendExpr(AddRec, WideType);
+
+  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
+         "Expect the new IV expression to preserve its type");
+
+  // Can the IV be extended outside the loop without overflow?
+  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
+  if (!AddRec || AddRec->getLoop() != L)
+    return NULL;
+
+  // An AddRec must have loop-invariant operands. Since this AddRec is
+  // materialized by a loop header phi, the expression cannot have any post-loop
+  // operands, so they must dominate the loop header.
+  assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+         SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
+         && "Loop header phi recurrence inputs do not dominate the loop");
+
+  // The rewriter provides a value for the desired IV expression. This may
+  // either find an existing phi or materialize a new one. Either way, we
+  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
+  // of the phi-SCC dominates the loop entry.
+  Instruction *InsertPt = L->getHeader()->begin();
+  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
+
+  // Remembering the WideIV increment generated by SCEVExpander allows
+  // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+  // employ a general reuse mechanism because the call above is the only call to
+  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
+  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+    WideInc =
+      cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+    WideIncExpr = SE->getSCEV(WideInc);
+  }
+
+  DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
+  ++NumWidened;
+
+  // Traverse the def-use chain using a worklist starting at the original IV.
+  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+  Widened.insert(OrigPhi);
+  pushNarrowIVUsers(OrigPhi, WidePhi);
+
+  while (!NarrowIVUsers.empty()) {
+    Use *UsePtr;
+    Instruction *WideDef;
+    tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
+    Use &NarrowDefUse = *UsePtr;
+
+    // Process a def-use edge. This may replace the use, so don't hold a
+    // use_iterator across it.
+    Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
+    Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
+
+    // Follow all def-use edges from the previous narrow use.
+    if (WideUse)
+      pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+
+    // WidenIVUse may have removed the def-use edge.
+    if (NarrowDef->use_empty())
+      DeadInsts.push_back(NarrowDef);
+  }
+  return WidePhi;
+}
+
+//===----------------------------------------------------------------------===//
+//  Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
+void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+  unsigned IVOperIdx = 0;
+  ICmpInst::Predicate Pred = ICmp->getPredicate();
+  if (IVOperand != ICmp->getOperand(0)) {
+    // Swapped
+    assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+    IVOperIdx = 1;
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  // Get the SCEVs for the ICmp operands.
+  const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+  const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+  // Simplify unnecessary loops away.
+  const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+  S = SE->getSCEVAtScope(S, ICmpLoop);
+  X = SE->getSCEVAtScope(X, ICmpLoop);
+
+  // If the condition is always true or always false, replace it with
+  // a constant value.
+  if (SE->isKnownPredicate(Pred, S, X))
+    ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+  else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+    ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+  else
+    return;
+
+  DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+  ++NumElimCmp;
+  Changed = true;
+  DeadInsts.push_back(ICmp);
+}
+
+void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
+                                          Value *IVOperand,
+                                          bool IsSigned) {
+  // We're only interested in the case where we know something about
+  // the numerator.
+  if (IVOperand != Rem->getOperand(0))
+    return;
+
+  // Get the SCEVs for the ICmp operands.
+  const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+  const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+  // Simplify unnecessary loops away.
+  const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+  S = SE->getSCEVAtScope(S, ICmpLoop);
+  X = SE->getSCEVAtScope(X, ICmpLoop);
+
+  // i % n  -->  i  if i is in [0,n).
+  if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+      SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                           S, X))
+    Rem->replaceAllUsesWith(Rem->getOperand(0));
+  else {
+    // (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
+    const SCEV *LessOne =
+      SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+    if (IsSigned && !SE->isKnownNonNegative(LessOne))
+      return;
+
+    if (!SE->isKnownPredicate(IsSigned ?
+                              ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                              LessOne, X))
+      return;
+
+    ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+                                  Rem->getOperand(0), Rem->getOperand(1),
+                                  "tmp");
+    SelectInst *Sel =
+      SelectInst::Create(ICmp,
+                         ConstantInt::get(Rem->getType(), 0),
+                         Rem->getOperand(0), "tmp", Rem);
+    Rem->replaceAllUsesWith(Sel);
+  }
+
+  // Inform IVUsers about the new users.
+  if (IU) {
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+  }
+  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+  ++NumElimRem;
+  Changed = true;
+  DeadInsts.push_back(Rem);
+}
+
+/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
+                                     Instruction *IVOperand) {
+  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+    EliminateIVComparison(ICmp, IVOperand);
+    return true;
+  }
+  if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+    bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+    if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+      EliminateIVRemainder(Rem, IVOperand, IsSigned);
+      return true;
+    }
+  }
+
+  // Eliminate any operation that SCEV can prove is an identity function.
+  if (!SE->isSCEVable(UseInst->getType()) ||
+      (UseInst->getType() != IVOperand->getType()) ||
+      (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+    return false;
+
+  DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+  UseInst->replaceAllUsesWith(IVOperand);
+  ++NumElimIdentity;
+  Changed = true;
+  DeadInsts.push_back(UseInst);
+  return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+  Instruction *Def,
+  SmallPtrSet<Instruction*,16> &Simplified,
+  SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+  for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Avoid infinite or exponential worklist processing.
+    // Also ensure unique worklist users.
+    // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+    // self edges first.
+    if (User != Def && Simplified.insert(User))
+      SimpleIVUsers.push_back(std::make_pair(User, Def));
+  }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInsteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *S = SE->getSCEV(I);
+
+  // We assume that terminators are not SCEVable.
+  assert((!S || I != I->getParent()->getTerminator()) &&
+         "can't fold terminators");
+
+  // Only consider affine recurrences.
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+  if (AR && AR->getLoop() == L)
+    return true;
+
+  return false;
+}
+
+/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
+/// of IV users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
+/// simplifies instructions in-place during analysis. Rather than rewriting
+/// induction variables bottom-up from their users, it transforms a chain of
+/// IVUsers top-down, updating the IR only when it encouters a clear
+/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
+/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
+/// extend elimination.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
+  std::map<PHINode *, WideIVInfo> WideIVMap;
+
+  SmallVector<PHINode*, 8> LoopPhis;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    LoopPhis.push_back(cast<PHINode>(I));
+  }
+  // Each round of simplification iterates through the SimplifyIVUsers worklist
+  // for all current phis, then determines whether any IVs can be
+  // widened. Widening adds new phis to LoopPhis, inducing another round of
+  // simplification on the wide IVs.
+  while (!LoopPhis.empty()) {
+    // Evaluate as many IV expressions as possible before widening any IVs. This
+    // forces SCEV to set no-wrap flags before evaluating sign/zero
+    // extension. The first time SCEV attempts to normalize sign/zero extension,
+    // the result becomes final. So for the most predictable results, we delay
+    // evaluation of sign/zero extend evaluation until needed, and avoid running
+    // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+    do {
+      PHINode *CurrIV = LoopPhis.pop_back_val();
+
+      // Information about sign/zero extensions of CurrIV.
+      WideIVInfo WI;
+
+      // Instructions processed by SimplifyIVUsers for CurrIV.
+      SmallPtrSet<Instruction*,16> Simplified;
+
+      // Use-def pairs if IV users waiting to be processed for CurrIV.
+      SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+      // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+      // called multiple times for the same LoopPhi. This is the proper thing to
+      // do for loop header phis that use each other.
+      pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+      while (!SimpleIVUsers.empty()) {
+        Instruction *UseInst, *Operand;
+        tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
+        // Bypass back edges to avoid extra work.
+        if (UseInst == CurrIV) continue;
+
+        if (EliminateIVUser(UseInst, Operand)) {
+          pushIVUsers(Operand, Simplified, SimpleIVUsers);
+          continue;
+        }
+        if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+          bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+          if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+            CollectExtend(Cast, IsSigned, WI, SE, TD);
+          }
+          continue;
+        }
+        if (isSimpleIVUser(UseInst, L, SE)) {
+          pushIVUsers(UseInst, Simplified, SimpleIVUsers);
+        }
+      }
+      if (WI.WidestNativeType) {
+        WideIVMap[CurrIV] = WI;
+      }
+    } while(!LoopPhis.empty());
+
+    for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
+           E = WideIVMap.end(); I != E; ++I) {
+      WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+      if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+        Changed = true;
+        LoopPhis.push_back(WidePhi);
+      }
+    }
+    WideIVMap.clear();
+  }
+}
+
+/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
+/// populate ExprToIVMap for use later.
+///
+void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
+  DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *Phi = cast<PHINode>(I);
+    if (!SE->isSCEVable(Phi->getType()))
+      continue;
+
+    const SCEV *S = SE->getSCEV(Phi);
+    DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
+    bool Inserted;
+    tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
+    if (Inserted)
+      continue;
+    PHINode *OrigPhi = Pos->second;
+    // Replacing the congruent phi is sufficient because acyclic redundancy
+    // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+    // that a phi is congruent, it's almost certain to be the head of an IV
+    // user cycle that is isomorphic with the original phi. So it's worth
+    // eagerly cleaning up the common case of a single IV increment.
+    if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+      Instruction *OrigInc =
+        cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+      Instruction *IsomorphicInc =
+        cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+      if (OrigInc != IsomorphicInc &&
+          SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
+          HoistStep(OrigInc, IsomorphicInc, DT)) {
+        DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+              << *IsomorphicInc << '\n');
+        IsomorphicInc->replaceAllUsesWith(OrigInc);
+        DeadInsts.push_back(IsomorphicInc);
+      }
+    }
+    DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+    ++NumElimIV;
+    Phi->replaceAllUsesWith(OrigPhi);
+    DeadInsts.push_back(Phi);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+      BackedgeTakenCount->isZero())
+    return false;
+
+  if (!L->getExitingBlock())
+    return false;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return false;
+
+  // Special case: If the backedge-taken count is a UDiv, it's very likely a
+  // UDiv that ScalarEvolution produced in order to compute a precise
+  // expression, rather than a UDiv from the user's code. If we can't find a
+  // UDiv in the code with some simple searching, assume the former and forego
+  // rewriting the loop.
+  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!OrigCond) return false;
+    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+    if (R != BackedgeTakenCount) {
+      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+      if (L != BackedgeTakenCount)
+        return false;
+    }
+  }
+  return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary if LFTR does not force a canonical IV.
+static const Type *getBackedgeIVType(Loop *L) {
+  if (!L->getExitingBlock())
+    return 0;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return 0;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return 0;
+
+  const Type *Ty = 0;
+  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+      OI != OE; ++OI) {
+    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+    if (!Trunc)
+      continue;
+
+    return Trunc->getSrcTy();
+  }
+  return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable.  This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+                          const SCEV *BackedgeTakenCount,
+                          PHINode *IndVar,
+                          SCEVExpander &Rewriter) {
+  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+
+  // If the exiting block is not the same as the backedge block, we must compare
+  // against the preincremented value, otherwise we prefer to compare against
+  // the post-incremented value.
+  Value *CmpIndVar;
+  const SCEV *RHS = BackedgeTakenCount;
+  if (L->getExitingBlock() == L->getLoopLatch()) {
+    // Add one to the "backedge-taken" count to get the trip count.
+    // If this addition may overflow, we have to be more pessimistic and
+    // cast the induction variable before doing the add.
+    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+    const SCEV *N =
+      SE->getAddExpr(BackedgeTakenCount,
+                     SE->getConstant(BackedgeTakenCount->getType(), 1));
+    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+      // No overflow. Cast the sum.
+      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+    } else {
+      // Potential overflow. Cast before doing the add.
+      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                        IndVar->getType());
+      RHS = SE->getAddExpr(RHS,
+                           SE->getConstant(IndVar->getType(), 1));
+    }
+
+    // The BackedgeTaken expression contains the number of times that the
+    // backedge branches to the loop header.  This is one less than the
+    // number of times the loop executes, so use the incremented indvar.
+    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+  } else {
+    // We have to use the preincremented value...
+    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                      IndVar->getType());
+    CmpIndVar = IndVar;
+  }
+
+  // Expand the code for the iteration count.
+  assert(SE->isLoopInvariant(RHS, L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+  // Insert a new icmp_ne or icmp_eq instruction before the branch.
+  ICmpInst::Predicate Opcode;
+  if (L->contains(BI->getSuccessor(0)))
+    Opcode = ICmpInst::ICMP_NE;
+  else
+    Opcode = ICmpInst::ICMP_EQ;
+
+  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+               << "      LHS:" << *CmpIndVar << '\n'
+               << "       op:\t"
+               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *RHS << "\n");
+
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+  Cond->setDebugLoc(BI->getDebugLoc());
+  Value *OrigCond = BI->getCondition();
+  // It's tempting to use replaceAllUsesWith here to fully replace the old
+  // comparison, but that's not immediately safe, since users of the old
+  // comparison may not be dominated by the new comparison. Instead, just
+  // update the branch to use the new comparison; in the common case this
+  // will make old comparison dead.
+  BI->setCondition(Cond);
+  DeadInsts.push_back(OrigCond);
+
+  ++NumLFTR;
+  Changed = true;
+  return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+//  SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock) return;
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) return;
+
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+  BasicBlock::iterator I = Preheader->getTerminator();
+  while (I != Preheader->begin()) {
+    --I;
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
+      break;
+
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also don't
+    // move instructions which might read memory, since the loop may modify
+    // memory. Note that it's okay if the instruction might have undefined
+    // behavior: LoopSimplify guarantees that the preheader dominates the exit
+    // block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      continue;
+
+    // Skip debug info intrinsics.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    // Don't sink static AllocaInsts out of the entry block, which would
+    // turn them into dynamic allocas!
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (AI->isStaticAlloca())
+        continue;
+
+    // Determine if there is a use in or before the loop (direct or
+    // otherwise).
+    bool UsedInLoop = false;
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+         UI != UE; ++UI) {
+      User *U = *UI;
+      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+      if (PHINode *P = dyn_cast<PHINode>(U)) {
+        unsigned i =
+          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+        UseBB = P->getIncomingBlock(i);
+      }
+      if (UseBB == Preheader || L->contains(UseBB)) {
+        UsedInLoop = true;
+        break;
+      }
+    }
+
+    // If there is, the def must remain in the preheader.
+    if (UsedInLoop)
+      continue;
+
+    // Otherwise, sink it to the exit block.
+    Instruction *ToMove = I;
+    bool Done = false;
+
+    if (I != Preheader->begin()) {
+      // Skip debug info intrinsics.
+      do {
+        --I;
+      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+        Done = true;
+    } else {
+      Done = true;
+    }
+
+    ToMove->moveBefore(InsertPt);
+    if (Done) break;
+    InsertPt = ToMove;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+  // If LoopSimplify form is not available, stay out of trouble. Some notes:
+  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
+  //    canonicalization can be a pessimization without LSR to "clean up"
+  //    afterwards.
+  //  - We depend on having a preheader; in particular,
+  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
+  //    and we're in trouble if we can't find the induction variable even when
+  //    we've manually inserted one.
+  if (!L->isLoopSimplifyForm())
+    return false;
+
+  if (!DisableIVRewrite)
+    IU = &getAnalysis<IVUsers>();
+  LI = &getAnalysis<LoopInfo>();
+  SE = &getAnalysis<ScalarEvolution>();
+  DT = &getAnalysis<DominatorTree>();
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  DeadInsts.clear();
+  Changed = false;
+
+  // If there are any floating-point recurrences, attempt to
+  // transform them to use integer recurrences.
+  RewriteNonIntegerIVs(L);
+
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+
+  // Create a rewriter object which we'll use to transform the code with.
+  SCEVExpander Rewriter(*SE, "indvars");
+
+  // Eliminate redundant IV users.
+  //
+  // Simplification works best when run before other consumers of SCEV. We
+  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
+  // other expressions involving loop IVs have been evaluated. This helps SCEV
+  // set no-wrap flags before normalizing sign/zero extension.
+  if (DisableIVRewrite) {
+    Rewriter.disableCanonicalMode();
+    SimplifyIVUsersNoRewrite(L, Rewriter);
+  }
+
+  // Check to see if this loop has a computable loop-invariant execution count.
+  // If so, this means that we can compute the final value of any expressions
+  // that are recurrent in the loop, and substitute the exit values from the
+  // loop into any instructions outside of the loop that use the final values of
+  // the current expressions.
+  //
+  if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    RewriteLoopExitValues(L, Rewriter);
+
+  // Eliminate redundant IV users.
+  if (!DisableIVRewrite)
+    SimplifyIVUsers(Rewriter);
+
+  // Eliminate redundant IV cycles.
+  if (DisableIVRewrite)
+    SimplifyCongruentIVs(L);
+
+  // Compute the type of the largest recurrence expression, and decide whether
+  // a canonical induction variable should be inserted.
+  const Type *LargestType = 0;
+  bool NeedCannIV = false;
+  bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
+  if (ExpandBECount) {
+    // If we have a known trip count and a single exit block, we'll be
+    // rewriting the loop exit test condition below, which requires a
+    // canonical induction variable.
+    NeedCannIV = true;
+    const Type *Ty = BackedgeTakenCount->getType();
+    if (DisableIVRewrite) {
+      // In this mode, SimplifyIVUsers may have already widened the IV used by
+      // the backedge test and inserted a Trunc on the compare's operand. Get
+      // the wider type to avoid creating a redundant narrow IV only used by the
+      // loop test.
+      LargestType = getBackedgeIVType(L);
+    }
+    if (!LargestType ||
+        SE->getTypeSizeInBits(Ty) >
+        SE->getTypeSizeInBits(LargestType))
+      LargestType = SE->getEffectiveSCEVType(Ty);
+  }
+  if (!DisableIVRewrite) {
+    for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+      NeedCannIV = true;
+      const Type *Ty =
+        SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
+      if (!LargestType ||
+          SE->getTypeSizeInBits(Ty) >
+          SE->getTypeSizeInBits(LargestType))
+        LargestType = Ty;
+    }
+  }
+
+  // Now that we know the largest of the induction variable expressions
+  // in this loop, insert a canonical induction variable of the largest size.
+  PHINode *IndVar = 0;
+  if (NeedCannIV) {
+    // Check to see if the loop already has any canonical-looking induction
+    // variables. If any are present and wider than the planned canonical
+    // induction variable, temporarily remove them, so that the Rewriter
+    // doesn't attempt to reuse them.
+    SmallVector<PHINode *, 2> OldCannIVs;
+    while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
+      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+          SE->getTypeSizeInBits(LargestType))
+        OldCannIV->removeFromParent();
+      else
+        break;
+      OldCannIVs.push_back(OldCannIV);
+    }
+
+    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
+
+    ++NumInserted;
+    Changed = true;
+    DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
+
+    // Now that the official induction variable is established, reinsert
+    // any old canonical-looking variables after it so that the IR remains
+    // consistent. They will be deleted as part of the dead-PHI deletion at
+    // the end of the pass.
+    while (!OldCannIVs.empty()) {
+      PHINode *OldCannIV = OldCannIVs.pop_back_val();
+      OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+    }
+  }
+
+  // If we have a trip count expression, rewrite the loop's exit condition
+  // using it.  We can currently only handle loops with a single exit.
+  ICmpInst *NewICmp = 0;
+  if (ExpandBECount) {
+    assert(canExpandBackedgeTakenCount(L, SE) &&
+           "canonical IV disrupted BackedgeTaken expansion");
+    assert(NeedCannIV &&
+           "LinearFunctionTestReplace requires a canonical induction variable");
+    // Check preconditions for proper SCEVExpander operation. SCEV does not
+    // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+    // pass that uses the SCEVExpander must do it. This does not work well for
+    // loop passes because SCEVExpander makes assumptions about all loops, while
+    // LoopPassManager only forces the current loop to be simplified.
+    //
+    // FIXME: SCEV expansion has no way to bail out, so the caller must
+    // explicitly check any assumptions made by SCEV. Brittle.
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+    if (!AR || AR->getLoop()->getLoopPreheader())
+      NewICmp =
+        LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
+  }
+  // Rewrite IV-derived expressions.
+  if (!DisableIVRewrite)
+    RewriteIVExpressions(L, Rewriter);
+
+  // Clear the rewriter cache, because values that are in the rewriter's cache
+  // can be deleted in the loop below, causing the AssertingVH in the cache to
+  // trigger.
+  Rewriter.clear();
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
+  // The Rewriter may not be used from this point on.
+
+  // Loop-invariant instructions in the preheader that aren't used in the
+  // loop may be sunk below the loop to reduce register pressure.
+  SinkUnusedInvariants(L);
+
+  // For completeness, inform IVUsers of the IV use in the newly-created
+  // loop exit test instruction.
+  if (NewICmp && IU)
+    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
+
+  // Clean up dead instructions.
+  Changed |= DeleteDeadPHIs(L->getHeader());
+  // Check a post-condition.
+  assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
new file mode 100644
index 000000000000..b500d5b4fdff
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -0,0 +1,1594 @@
+//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Jump Threading pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jump-threading"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumThreads, "Number of jumps threaded");
+STATISTIC(NumFolds,   "Number of terminators folded");
+STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");
+
+static cl::opt<unsigned>
+Threshold("jump-threading-threshold",
+          cl::desc("Max block size to duplicate for jump threading"),
+          cl::init(6), cl::Hidden);
+
+namespace {
+  // These are at global scope so static functions can use them too.
+  typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+  typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+  // This is used to keep track of what kind of constant we're currently hoping
+  // to find.
+  enum ConstantPreference {
+    WantInteger,
+    WantBlockAddress
+  };
+
+  /// This pass performs 'jump threading', which looks at blocks that have
+  /// multiple predecessors and multiple successors.  If one or more of the
+  /// predecessors of the block can be proven to always jump to one of the
+  /// successors, we forward the edge from the predecessor to the successor by
+  /// duplicating the contents of this block.
+  ///
+  /// An example of when this can occur is code like this:
+  ///
+  ///   if () { ...
+  ///     X = 4;
+  ///   }
+  ///   if (X < 3) {
+  ///
+  /// In this case, the unconditional branch at the end of the first if can be
+  /// revectored to the false side of the second if.
+  ///
+  class JumpThreading : public FunctionPass {
+    TargetData *TD;
+    LazyValueInfo *LVI;
+#ifdef NDEBUG
+    SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+#else
+    SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+#endif
+    DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+    // RAII helper for updating the recursion stack.
+    struct RecursionSetRemover {
+      DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+      std::pair<Value*, BasicBlock*> ThePair;
+
+      RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+                          std::pair<Value*, BasicBlock*> P)
+        : TheSet(S), ThePair(P) { }
+
+      ~RecursionSetRemover() {
+        TheSet.erase(ThePair);
+      }
+    };
+  public:
+    static char ID; // Pass identification
+    JumpThreading() : FunctionPass(ID) {
+      initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LazyValueInfo>();
+      AU.addPreserved<LazyValueInfo>();
+    }
+
+    void FindLoopHeaders(Function &F);
+    bool ProcessBlock(BasicBlock *BB);
+    bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+                    BasicBlock *SuccBB);
+    bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                  const SmallVectorImpl<BasicBlock *> &PredBBs);
+
+    bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+                                         PredValueInfo &Result,
+                                         ConstantPreference Preference);
+    bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+                                ConstantPreference Preference);
+
+    bool ProcessBranchOnPHI(PHINode *PN);
+    bool ProcessBranchOnXOR(BinaryOperator *BO);
+
+    bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+  };
+}
+
+char JumpThreading::ID = 0;
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
+
+// Public interface to the Jump Threading pass
+FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
+
+/// runOnFunction - Top level algorithm.
+///
+bool JumpThreading::runOnFunction(Function &F) {
+  DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
+  TD = getAnalysisIfAvailable<TargetData>();
+  LVI = &getAnalysis<LazyValueInfo>();
+
+  FindLoopHeaders(F);
+
+  bool Changed, EverChanged = false;
+  do {
+    Changed = false;
+    for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+      BasicBlock *BB = I;
+      // Thread all of the branches we can over this block.
+      while (ProcessBlock(BB))
+        Changed = true;
+
+      ++I;
+
+      // If the block is trivially dead, zap it.  This eliminates the successor
+      // edges which simplifies the CFG.
+      if (pred_begin(BB) == pred_end(BB) &&
+          BB != &BB->getParent()->getEntryBlock()) {
+        DEBUG(dbgs() << "  JT: Deleting dead block '" << BB->getName()
+              << "' with terminator: " << *BB->getTerminator() << '\n');
+        LoopHeaders.erase(BB);
+        LVI->eraseBlock(BB);
+        DeleteDeadBlock(BB);
+        Changed = true;
+        continue;
+      }
+
+      BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+      // Can't thread an unconditional jump, but if the block is "almost
+      // empty", we can replace uses of it with uses of the successor and make
+      // this dead.
+      if (BI && BI->isUnconditional() &&
+          BB != &BB->getParent()->getEntryBlock() &&
+          // If the terminator is the only non-phi instruction, try to nuke it.
+          BB->getFirstNonPHIOrDbg()->isTerminator()) {
+        // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+        // block, we have to make sure it isn't in the LoopHeaders set.  We
+        // reinsert afterward if needed.
+        bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+        BasicBlock *Succ = BI->getSuccessor(0);
+
+        // FIXME: It is always conservatively correct to drop the info
+        // for a block even if it doesn't get erased.  This isn't totally
+        // awesome, but it allows us to use AssertingVH to prevent nasty
+        // dangling pointer issues within LazyValueInfo.
+        LVI->eraseBlock(BB);
+        if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+          Changed = true;
+          // If we deleted BB and BB was the header of a loop, then the
+          // successor is now the header of the loop.
+          BB = Succ;
+        }
+
+        if (ErasedFromLoopHeaders)
+          LoopHeaders.insert(BB);
+      }
+    }
+    EverChanged |= Changed;
+  } while (Changed);
+
+  LoopHeaders.clear();
+  return EverChanged;
+}
+
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+  /// Ignore PHI nodes, these will be flattened when duplication happens.
+  BasicBlock::const_iterator I = BB->getFirstNonPHI();
+
+  // FIXME: THREADING will delete values that are just used to compute the
+  // branch, so they shouldn't count against the duplication cost.
+
+
+  // Sum up the cost of each instruction until we get to the terminator.  Don't
+  // include the terminator because the copy won't include it.
+  unsigned Size = 0;
+  for (; !isa<TerminatorInst>(I); ++I) {
+    // Debugger intrinsics don't incur code size.
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+
+    // If this is a pointer->pointer bitcast, it is free.
+    if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
+      continue;
+
+    // All other instructions count for at least one unit.
+    ++Size;
+
+    // Calls are more expensive.  If they are non-intrinsic calls, we model them
+    // as having cost of 4.  If they are a non-vector intrinsic, we model them
+    // as having cost of 2 total, and if they are a vector intrinsic, we model
+    // them as having cost 1.
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (!isa<IntrinsicInst>(CI))
+        Size += 3;
+      else if (!CI->getType()->isVectorTy())
+        Size += 1;
+    }
+  }
+
+  // Threading through a switch statement is particularly profitable.  If this
+  // block ends in a switch, decrease its cost to make it more likely to happen.
+  if (isa<SwitchInst>(I))
+    Size = Size > 6 ? Size-6 : 0;
+
+  // The same holds for indirect branches, but slightly more so.
+  if (isa<IndirectBrInst>(I))
+    Size = Size > 8 ? Size-8 : 0;
+
+  return Size;
+}
+
+/// FindLoopHeaders - We do not want jump threading to turn proper loop
+/// structures into irreducible loops.  Doing this breaks up the loop nesting
+/// hierarchy and pessimizes later transformations.  To prevent this from
+/// happening, we first have to find the loop headers.  Here we approximate this
+/// by finding targets of backedges in the CFG.
+///
+/// Note that there definitely are cases when we want to allow threading of
+/// edges across a loop header.  For example, threading a jump from outside the
+/// loop (the preheader) to an exit block of the loop is definitely profitable.
+/// It is also almost always profitable to thread backedges from within the loop
+/// to exit blocks, and is often profitable to thread backedges to other blocks
+/// within the loop (forming a nested loop).  This simple analysis is not rich
+/// enough to track all of these properties and keep it up-to-date as the CFG
+/// mutates, so we don't allow any of these transformations.
+///
+void JumpThreading::FindLoopHeaders(Function &F) {
+  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+  FindFunctionBackedges(F, Edges);
+
+  for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+    LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+}
+
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+  if (!Val)
+    return 0;
+
+  // Undef is "known" enough.
+  if (UndefValue *U = dyn_cast<UndefValue>(Val))
+    return U;
+
+  if (Preference == WantBlockAddress)
+    return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+  return dyn_cast<ConstantInt>(Val);
+}
+
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors.  If so, return the known list of value and pred
+/// BB in the result vector.
+///
+/// This returns true if there were any known values.
+///
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+                                ConstantPreference Preference) {
+  // This method walks up use-def chains recursively.  Because of this, we could
+  // get into an infinite loop going around loops in the use-def chain.  To
+  // prevent this, keep track of what (value, block) pairs we've already visited
+  // and terminate the search if we loop back to them
+  if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+    return false;
+
+  // An RAII help to remove this pair from the recursion set once the recursion
+  // stack pops back out again.
+  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
+  // If V is a constant, then it is known in all predecessors.
+  if (Constant *KC = getKnownConstant(V, Preference)) {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Result.push_back(std::make_pair(KC, *PI));
+
+    return true;
+  }
+
+  // If V is a non-instruction value, or an instruction in a different block,
+  // then it can't be derived from a PHI.
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0 || I->getParent() != BB) {
+
+    // Okay, if this is a live-in value, see if it has a known value at the end
+    // of any of our predecessors.
+    //
+    // FIXME: This should be an edge property, not a block end property.
+    /// TODO: Per PR2563, we could infer value range information about a
+    /// predecessor based on its terminator.
+    //
+    // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+    // "I" is a non-local compare-with-a-constant instruction.  This would be
+    // able to handle value inequalities better, for example if the compare is
+    // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+    // Perhaps getConstantOnEdge should be smart enough to do this?
+
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      // If the value is known by LazyValueInfo to be a constant in a
+      // predecessor, use that information to try to thread this block.
+      Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+      if (Constant *KC = getKnownConstant(PredCst, Preference))
+        Result.push_back(std::make_pair(KC, P));
+    }
+
+    return !Result.empty();
+  }
+
+  /// If I is a PHI node, then we know the incoming values for any constants.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      if (Constant *KC = getKnownConstant(InVal, Preference)) {
+        Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+      } else {
+        Constant *CI = LVI->getConstantOnEdge(InVal,
+                                              PN->getIncomingBlock(i), BB);
+        if (Constant *KC = getKnownConstant(CI, Preference))
+          Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+      }
+    }
+
+    return !Result.empty();
+  }
+
+  PredValueInfoTy LHSVals, RHSVals;
+
+  // Handle some boolean conditions.
+  if (I->getType()->getPrimitiveSizeInBits() == 1) {
+    assert(Preference == WantInteger && "One-bit non-integer type?");
+    // X | true -> true
+    // X & false -> false
+    if (I->getOpcode() == Instruction::Or ||
+        I->getOpcode() == Instruction::And) {
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+                                      WantInteger);
+      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+                                      WantInteger);
+
+      if (LHSVals.empty() && RHSVals.empty())
+        return false;
+
+      ConstantInt *InterestingVal;
+      if (I->getOpcode() == Instruction::Or)
+        InterestingVal = ConstantInt::getTrue(I->getContext());
+      else
+        InterestingVal = ConstantInt::getFalse(I->getContext());
+
+      SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
+      // Scan for the sentinel.  If we find an undef, force it to the
+      // interesting value: x|undef -> true and x&undef -> false.
+      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+        if (LHSVals[i].first == InterestingVal ||
+            isa<UndefValue>(LHSVals[i].first)) {
+          Result.push_back(LHSVals[i]);
+          Result.back().first = InterestingVal;
+          LHSKnownBBs.insert(LHSVals[i].second);
+        }
+      for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+        if (RHSVals[i].first == InterestingVal ||
+            isa<UndefValue>(RHSVals[i].first)) {
+          // If we already inferred a value for this block on the LHS, don't
+          // re-add it.
+          if (!LHSKnownBBs.count(RHSVals[i].second)) {
+            Result.push_back(RHSVals[i]);
+            Result.back().first = InterestingVal;
+          }
+        }
+
+      return !Result.empty();
+    }
+
+    // Handle the NOT form of XOR.
+    if (I->getOpcode() == Instruction::Xor &&
+        isa<ConstantInt>(I->getOperand(1)) &&
+        cast<ConstantInt>(I->getOperand(1))->isOne()) {
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+                                      WantInteger);
+      if (Result.empty())
+        return false;
+
+      // Invert the known values.
+      for (unsigned i = 0, e = Result.size(); i != e; ++i)
+        Result[i].first = ConstantExpr::getNot(Result[i].first);
+
+      return true;
+    }
+
+  // Try to simplify some other binary operator values.
+  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    assert(Preference != WantBlockAddress
+            && "A binary operator creating a block address?");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+      PredValueInfoTy LHSVals;
+      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+                                      WantInteger);
+
+      // Try to use constant folding to simplify the binary operator.
+      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+        Constant *V = LHSVals[i].first;
+        Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+        if (Constant *KC = getKnownConstant(Folded, WantInteger))
+          Result.push_back(std::make_pair(KC, LHSVals[i].second));
+      }
+    }
+
+    return !Result.empty();
+  }
+
+  // Handle compare with phi operand, where the PHI is defined in this block.
+  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+    assert(Preference == WantInteger && "Compares only produce integers");
+    PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+    if (PN && PN->getParent() == BB) {
+      // We can do this simplification if any comparisons fold to true or false.
+      // See if any do.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        BasicBlock *PredBB = PN->getIncomingBlock(i);
+        Value *LHS = PN->getIncomingValue(i);
+        Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+        if (Res == 0) {
+          if (!isa<Constant>(RHS))
+            continue;
+
+          LazyValueInfo::Tristate
+            ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+                                           cast<Constant>(RHS), PredBB, BB);
+          if (ResT == LazyValueInfo::Unknown)
+            continue;
+          Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+        }
+
+        if (Constant *KC = getKnownConstant(Res, WantInteger))
+          Result.push_back(std::make_pair(KC, PredBB));
+      }
+
+      return !Result.empty();
+    }
+
+
+    // If comparing a live-in value against a constant, see if we know the
+    // live-in value on any predecessors.
+    if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
+      if (!isa<Instruction>(Cmp->getOperand(0)) ||
+          cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+        Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+          BasicBlock *P = *PI;
+          // If the value is known by LazyValueInfo to be a constant in a
+          // predecessor, use that information to try to thread this block.
+          LazyValueInfo::Tristate Res =
+            LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+                                    RHSCst, P, BB);
+          if (Res == LazyValueInfo::Unknown)
+            continue;
+
+          Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+          Result.push_back(std::make_pair(ResC, P));
+        }
+
+        return !Result.empty();
+      }
+
+      // Try to find a constant value for the LHS of a comparison,
+      // and evaluate it statically if we can.
+      if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+        PredValueInfoTy LHSVals;
+        ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+                                        WantInteger);
+
+        for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+          Constant *V = LHSVals[i].first;
+          Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+                                                      V, CmpConst);
+          if (Constant *KC = getKnownConstant(Folded, WantInteger))
+            Result.push_back(std::make_pair(KC, LHSVals[i].second));
+        }
+
+        return !Result.empty();
+      }
+    }
+  }
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+    // Handle select instructions where at least one operand is a known constant
+    // and we can figure out the condition value for any predecessor block.
+    Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+    Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+    PredValueInfoTy Conds;
+    if ((TrueVal || FalseVal) &&
+        ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+                                        WantInteger)) {
+      for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+        Constant *Cond = Conds[i].first;
+
+        // Figure out what value to use for the condition.
+        bool KnownCond;
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+          // A known boolean.
+          KnownCond = CI->isOne();
+        } else {
+          assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+          // Either operand will do, so be sure to pick the one that's a known
+          // constant.
+          // FIXME: Do this more cleverly if both values are known constants?
+          KnownCond = (TrueVal != 0);
+        }
+
+        // See if the select has a known constant value for this predecessor.
+        if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+          Result.push_back(std::make_pair(Val, Conds[i].second));
+      }
+
+      return !Result.empty();
+    }
+  }
+
+  // If all else fails, see if LVI can figure out a constant value for us.
+  Constant *CI = LVI->getConstant(V, BB);
+  if (Constant *KC = getKnownConstant(CI, Preference)) {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Result.push_back(std::make_pair(KC, *PI));
+  }
+
+  return !Result.empty();
+}
+
+
+
+/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
+/// in an undefined jump, decide which block is best to revector to.
+///
+/// Since we can pick an arbitrary destination, we pick the successor with the
+/// fewest predecessors.  This should reduce the in-degree of the others.
+///
+static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+  TerminatorInst *BBTerm = BB->getTerminator();
+  unsigned MinSucc = 0;
+  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+  // Compute the successor with the minimum number of predecessors.
+  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+    TestBB = BBTerm->getSuccessor(i);
+    unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+    if (NumPreds < MinNumPreds) {
+      MinSucc = i;
+      MinNumPreds = NumPreds;
+    }
+  }
+
+  return MinSucc;
+}
+
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+  if (!BB->hasAddressTaken()) return false;
+
+  // If the block has its address taken, it may be a tree of dead constants
+  // hanging off of it.  These shouldn't keep the block alive.
+  BlockAddress *BA = BlockAddress::get(BB);
+  BA->removeDeadConstantUsers();
+  return !BA->use_empty();
+}
+
+/// ProcessBlock - If there are any predecessors whose control can be threaded
+/// through to a successor, transform them now.
+bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+  // If the block is trivially dead, just return and let the caller nuke it.
+  // This simplifies other transformations.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock())
+    return false;
+
+  // If this block has a single predecessor, and if that pred has a single
+  // successor, merge the blocks.  This encourages recursive jump threading
+  // because now the condition in this block can be threaded through
+  // predecessors of our predecessor block.
+  if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
+    if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+        SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
+      // If SinglePred was a loop header, BB becomes one.
+      if (LoopHeaders.erase(SinglePred))
+        LoopHeaders.insert(BB);
+
+      // Remember if SinglePred was the entry block of the function.  If so, we
+      // will need to move BB back to the entry position.
+      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      LVI->eraseBlock(SinglePred);
+      MergeBasicBlockIntoOnlyPred(BB);
+
+      if (isEntry && BB != &BB->getParent()->getEntryBlock())
+        BB->moveBefore(&BB->getParent()->getEntryBlock());
+      return true;
+    }
+  }
+
+  // What kind of constant we're looking for.
+  ConstantPreference Preference = WantInteger;
+
+  // Look to see if the terminator is a conditional branch, switch or indirect
+  // branch, if not we can't thread it.
+  Value *Condition;
+  Instruction *Terminator = BB->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
+    // Can't thread an unconditional jump.
+    if (BI->isUnconditional()) return false;
+    Condition = BI->getCondition();
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
+    Condition = SI->getCondition();
+  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+    Condition = IB->getAddress()->stripPointerCasts();
+    Preference = WantBlockAddress;
+  } else {
+    return false; // Must be an invoke.
+  }
+
+  // Run constant folding to see if we can reduce the condition to a simple
+  // constant.
+  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+    Value *SimpleVal = ConstantFoldInstruction(I, TD);
+    if (SimpleVal) {
+      I->replaceAllUsesWith(SimpleVal);
+      I->eraseFromParent();
+      Condition = SimpleVal;
+    }
+  }
+
+  // If the terminator is branching on an undef, we can pick any of the
+  // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
+  if (isa<UndefValue>(Condition)) {
+    unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
+
+    // Fold the branch/switch.
+    TerminatorInst *BBTerm = BB->getTerminator();
+    for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+      if (i == BestSucc) continue;
+      BBTerm->getSuccessor(i)->removePredecessor(BB, true);
+    }
+
+    DEBUG(dbgs() << "  In block '" << BB->getName()
+          << "' folding undef terminator: " << *BBTerm << '\n');
+    BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
+    BBTerm->eraseFromParent();
+    return true;
+  }
+
+  // If the terminator of this block is branching on a constant, simplify the
+  // terminator to an unconditional branch.  This can occur due to threading in
+  // other blocks.
+  if (getKnownConstant(Condition, Preference)) {
+    DEBUG(dbgs() << "  In block '" << BB->getName()
+          << "' folding terminator: " << *BB->getTerminator() << '\n');
+    ++NumFolds;
+    ConstantFoldTerminator(BB, true);
+    return true;
+  }
+
+  Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
+  // All the rest of our checks depend on the condition being an instruction.
+  if (CondInst == 0) {
+    // FIXME: Unify this with code below.
+    if (ProcessThreadableEdges(Condition, BB, Preference))
+      return true;
+    return false;
+  }
+
+
+  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+    // For a comparison where the LHS is outside this block, it's possible
+    // that we've branched on it before.  Used LVI to see if we can simplify
+    // the branch based on that.
+    BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+    Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+        (!isa<Instruction>(CondCmp->getOperand(0)) ||
+         cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+      // For predecessor edge, determine if the comparison is true or false
+      // on that edge.  If they're all true or all false, we can simplify the
+      // branch.
+      // FIXME: We could handle mixed true/false by duplicating code.
+      LazyValueInfo::Tristate Baseline =
+        LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+                                CondConst, *PI, BB);
+      if (Baseline != LazyValueInfo::Unknown) {
+        // Check that all remaining incoming values match the first one.
+        while (++PI != PE) {
+          LazyValueInfo::Tristate Ret =
+            LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+                                    CondCmp->getOperand(0), CondConst, *PI, BB);
+          if (Ret != Baseline) break;
+        }
+
+        // If we terminated early, then one of the values didn't match.
+        if (PI == PE) {
+          unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+          unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+          CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
+          BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+          CondBr->eraseFromParent();
+          return true;
+        }
+      }
+    }
+  }
+
+  // Check for some cases that are worth simplifying.  Right now we want to look
+  // for loads that are used by a switch or by the condition for the branch.  If
+  // we see one, check to see if it's partially redundant.  If so, insert a PHI
+  // which can then be used to thread the values.
+  //
+  Value *SimplifyValue = CondInst;
+  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
+    if (isa<Constant>(CondCmp->getOperand(1)))
+      SimplifyValue = CondCmp->getOperand(0);
+
+  // TODO: There are other places where load PRE would be profitable, such as
+  // more complex comparisons.
+  if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
+    if (SimplifyPartiallyRedundantLoad(LI))
+      return true;
+
+
+  // Handle a variety of cases where we are branching on something derived from
+  // a PHI node in the current block.  If we can prove that any predecessors
+  // compute a predictable value based on a PHI node, thread those predecessors.
+  //
+  if (ProcessThreadableEdges(CondInst, BB, Preference))
+    return true;
+
+  // If this is an otherwise-unfoldable branch on a phi node in the current
+  // block, see if we can simplify.
+  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+    if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+      return ProcessBranchOnPHI(PN);
+
+
+  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+  if (CondInst->getOpcode() == Instruction::Xor &&
+      CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+    return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+
+
+  // TODO: If we have: "br (X > 0)"  and we have a predecessor where we know
+  // "(X == 4)", thread through this block.
+
+  return false;
+}
+
+
+/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
+/// load instruction, eliminate it by replacing it with a PHI node.  This is an
+/// important optimization that encourages jump threading, and needs to be run
+/// interlaced with other jump threading tasks.
+bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
+  // Don't hack volatile loads.
+  if (LI->isVolatile()) return false;
+
+  // If the load is defined in a block with exactly one predecessor, it can't be
+  // partially redundant.
+  BasicBlock *LoadBB = LI->getParent();
+  if (LoadBB->getSinglePredecessor())
+    return false;
+
+  Value *LoadedPtr = LI->getOperand(0);
+
+  // If the loaded operand is defined in the LoadBB, it can't be available.
+  // TODO: Could do simple PHI translation, that would be fun :)
+  if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
+    if (PtrOp->getParent() == LoadBB)
+      return false;
+
+  // Scan a few instructions up from the load, to see if it is obviously live at
+  // the entry to its block.
+  BasicBlock::iterator BBIt = LI;
+
+  if (Value *AvailableVal =
+        FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
+    // If the value if the load is locally available within the block, just use
+    // it.  This frequently occurs for reg2mem'd allocas.
+    //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
+
+    // If the returned value is the load itself, replace with an undef. This can
+    // only happen in dead loops.
+    if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+    LI->replaceAllUsesWith(AvailableVal);
+    LI->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, if we scanned the whole block and got to the top of the block,
+  // we know the block is locally transparent to the load.  If not, something
+  // might clobber its value.
+  if (BBIt != LoadBB->begin())
+    return false;
+
+
+  SmallPtrSet<BasicBlock*, 8> PredsScanned;
+  typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
+  AvailablePredsTy AvailablePreds;
+  BasicBlock *OneUnavailablePred = 0;
+
+  // If we got here, the loaded value is transparent through to the start of the
+  // block.  Check to see if it is available in any of the predecessor blocks.
+  for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+       PI != PE; ++PI) {
+    BasicBlock *PredBB = *PI;
+
+    // If we already scanned this predecessor, skip it.
+    if (!PredsScanned.insert(PredBB))
+      continue;
+
+    // Scan the predecessor to see if the value is available in the pred.
+    BBIt = PredBB->end();
+    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+    if (!PredAvailable) {
+      OneUnavailablePred = PredBB;
+      continue;
+    }
+
+    // If so, this load is partially redundant.  Remember this info so that we
+    // can create a PHI node.
+    AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
+  }
+
+  // If the loaded value isn't available in any predecessor, it isn't partially
+  // redundant.
+  if (AvailablePreds.empty()) return false;
+
+  // Okay, the loaded value is available in at least one (and maybe all!)
+  // predecessors.  If the value is unavailable in more than one unique
+  // predecessor, we want to insert a merge block for those common predecessors.
+  // This ensures that we only have to insert one reload, thus not increasing
+  // code size.
+  BasicBlock *UnavailablePred = 0;
+
+  // If there is exactly one predecessor where the value is unavailable, the
+  // already computed 'OneUnavailablePred' block is it.  If it ends in an
+  // unconditional branch, we know that it isn't a critical edge.
+  if (PredsScanned.size() == AvailablePreds.size()+1 &&
+      OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
+    UnavailablePred = OneUnavailablePred;
+  } else if (PredsScanned.size() != AvailablePreds.size()) {
+    // Otherwise, we had multiple unavailable predecessors or we had a critical
+    // edge from the one.
+    SmallVector<BasicBlock*, 8> PredsToSplit;
+    SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
+
+    for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
+      AvailablePredSet.insert(AvailablePreds[i].first);
+
+    // Add all the unavailable predecessors to the PredsToSplit list.
+    for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+         PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      // If the predecessor is an indirect goto, we can't split the edge.
+      if (isa<IndirectBrInst>(P->getTerminator()))
+        return false;
+
+      if (!AvailablePredSet.count(P))
+        PredsToSplit.push_back(P);
+    }
+
+    // Split them out to their own block.
+    UnavailablePred =
+      SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
+                             "thread-pre-split", this);
+  }
+
+  // If the value isn't available in all predecessors, then there will be
+  // exactly one where it isn't available.  Insert a load on that edge and add
+  // it to the AvailablePreds list.
+  if (UnavailablePred) {
+    assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
+           "Can't handle critical edge here!");
+    LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+                                 LI->getAlignment(),
+                                 UnavailablePred->getTerminator());
+    NewVal->setDebugLoc(LI->getDebugLoc());
+    AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
+  }
+
+  // Now we know that each predecessor of this block has a value in
+  // AvailablePreds, sort them for efficient access as we're walking the preds.
+  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
+
+  // Create a PHI node at the start of the block for the PRE'd load value.
+  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+  PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+                                LoadBB->begin());
+  PN->takeName(LI);
+  PN->setDebugLoc(LI->getDebugLoc());
+
+  // Insert new entries into the PHI for each predecessor.  A single block may
+  // have multiple entries here.
+  for (pred_iterator PI = PB; PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    AvailablePredsTy::iterator I =
+      std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
+                       std::make_pair(P, (Value*)0));
+
+    assert(I != AvailablePreds.end() && I->first == P &&
+           "Didn't find entry for predecessor!");
+
+    PN->addIncoming(I->second, I->first);
+  }
+
+  //cerr << "PRE: " << *LI << *PN << "\n";
+
+  LI->replaceAllUsesWith(PN);
+  LI->eraseFromParent();
+
+  return true;
+}
+
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations.  Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+                    const SmallVectorImpl<std::pair<BasicBlock*,
+                                  BasicBlock*> > &PredToDestList) {
+  assert(!PredToDestList.empty());
+
+  // Determine popularity.  If there are multiple possible destinations, we
+  // explicitly choose to ignore 'undef' destinations.  We prefer to thread
+  // blocks with known and real destinations to threading undef.  We'll handle
+  // them later if interesting.
+  DenseMap<BasicBlock*, unsigned> DestPopularity;
+  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+    if (PredToDestList[i].second)
+      DestPopularity[PredToDestList[i].second]++;
+
+  // Find the most popular dest.
+  DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+  BasicBlock *MostPopularDest = DPI->first;
+  unsigned Popularity = DPI->second;
+  SmallVector<BasicBlock*, 4> SamePopularity;
+
+  for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+    // If the popularity of this entry isn't higher than the popularity we've
+    // seen so far, ignore it.
+    if (DPI->second < Popularity)
+      ; // ignore.
+    else if (DPI->second == Popularity) {
+      // If it is the same as what we've seen so far, keep track of it.
+      SamePopularity.push_back(DPI->first);
+    } else {
+      // If it is more popular, remember it.
+      SamePopularity.clear();
+      MostPopularDest = DPI->first;
+      Popularity = DPI->second;
+    }
+  }
+
+  // Okay, now we know the most popular destination.  If there is more than one
+  // destination, we need to determine one.  This is arbitrary, but we need
+  // to make a deterministic decision.  Pick the first one that appears in the
+  // successor list.
+  if (!SamePopularity.empty()) {
+    SamePopularity.push_back(MostPopularDest);
+    TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0; ; ++i) {
+      assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
+
+      if (std::find(SamePopularity.begin(), SamePopularity.end(),
+                    TI->getSuccessor(i)) == SamePopularity.end())
+        continue;
+
+      MostPopularDest = TI->getSuccessor(i);
+      break;
+    }
+  }
+
+  // Okay, we have finally picked the most popular destination.
+  return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+                                           ConstantPreference Preference) {
+  // If threading this would thread across a loop header, don't even try to
+  // thread the edge.
+  if (LoopHeaders.count(BB))
+    return false;
+
+  PredValueInfoTy PredValues;
+  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
+    return false;
+
+  assert(!PredValues.empty() &&
+         "ComputeValueKnownInPredecessors returned true with no values");
+
+  DEBUG(dbgs() << "IN BB: " << *BB;
+        for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+          dbgs() << "  BB '" << BB->getName() << "': FOUND condition = "
+            << *PredValues[i].first
+            << " for pred '" << PredValues[i].second->getName() << "'.\n";
+        });
+
+  // Decide what we want to thread through.  Convert our list of known values to
+  // a list of known destinations for each pred.  This also discards duplicate
+  // predecessors and keeps track of the undefined inputs (which are represented
+  // as a null dest in the PredToDestList).
+  SmallPtrSet<BasicBlock*, 16> SeenPreds;
+  SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+  BasicBlock *OnlyDest = 0;
+  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+    BasicBlock *Pred = PredValues[i].second;
+    if (!SeenPreds.insert(Pred))
+      continue;  // Duplicate predecessor entry.
+
+    // If the predecessor ends with an indirect goto, we can't change its
+    // destination.
+    if (isa<IndirectBrInst>(Pred->getTerminator()))
+      continue;
+
+    Constant *Val = PredValues[i].first;
+
+    BasicBlock *DestBB;
+    if (isa<UndefValue>(Val))
+      DestBB = 0;
+    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+      DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+      DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
+    else {
+      assert(isa<IndirectBrInst>(BB->getTerminator())
+              && "Unexpected terminator");
+      DestBB = cast<BlockAddress>(Val)->getBasicBlock();
+    }
+
+    // If we have exactly one destination, remember it for efficiency below.
+    if (PredToDestList.empty())
+      OnlyDest = DestBB;
+    else if (OnlyDest != DestBB)
+      OnlyDest = MultipleDestSentinel;
+
+    PredToDestList.push_back(std::make_pair(Pred, DestBB));
+  }
+
+  // If all edges were unthreadable, we fail.
+  if (PredToDestList.empty())
+    return false;
+
+  // Determine which is the most common successor.  If we have many inputs and
+  // this block is a switch, we want to start by threading the batch that goes
+  // to the most popular destination first.  If we only know about one
+  // threadable destination (the common case) we can avoid this.
+  BasicBlock *MostPopularDest = OnlyDest;
+
+  if (MostPopularDest == MultipleDestSentinel)
+    MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+  // Now that we know what the most popular destination is, factor all
+  // predecessors that will jump to it into a single predecessor.
+  SmallVector<BasicBlock*, 16> PredsToFactor;
+  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+    if (PredToDestList[i].second == MostPopularDest) {
+      BasicBlock *Pred = PredToDestList[i].first;
+
+      // This predecessor may be a switch or something else that has multiple
+      // edges to the block.  Factor each of these edges by listing them
+      // according to # occurrences in PredsToFactor.
+      TerminatorInst *PredTI = Pred->getTerminator();
+      for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+        if (PredTI->getSuccessor(i) == BB)
+          PredsToFactor.push_back(Pred);
+    }
+
+  // If the threadable edges are branching on an undefined value, we get to pick
+  // the destination that these predecessors should get to.
+  if (MostPopularDest == 0)
+    MostPopularDest = BB->getTerminator()->
+                            getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+  // Ok, try to thread it!
+  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block.  See if there are any simplifications we
+/// can do based on inputs to the phi node.
+///
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
+  BasicBlock *BB = PN->getParent();
+
+  // TODO: We could make use of this to do it once for blocks with common PHI
+  // values.
+  SmallVector<BasicBlock*, 1> PredBBs;
+  PredBBs.resize(1);
+
+  // If any of the predecessor blocks end in an unconditional branch, we can
+  // *duplicate* the conditional branch into that block in order to further
+  // encourage jump threading and to eliminate cases where we have branch on a
+  // phi of an icmp (branch on icmp is much better).
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
+      if (PredBr->isUnconditional()) {
+        PredBBs[0] = PredBB;
+        // Try to duplicate BB into PredBB.
+        if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+          return true;
+      }
+  }
+
+  return false;
+}
+
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block.  See if there are any
+/// simplifications we can do based on inputs to the xor.
+///
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+  BasicBlock *BB = BO->getParent();
+
+  // If either the LHS or RHS of the xor is a constant, don't do this
+  // optimization.
+  if (isa<ConstantInt>(BO->getOperand(0)) ||
+      isa<ConstantInt>(BO->getOperand(1)))
+    return false;
+
+  // If the first instruction in BB isn't a phi, we won't be able to infer
+  // anything special about any particular predecessor.
+  if (!isa<PHINode>(BB->front()))
+    return false;
+
+  // If we have a xor as the branch input to this block, and we know that the
+  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+  // the condition into the predecessor and fix that value to true, saving some
+  // logical ops on that path and encouraging other paths to simplify.
+  //
+  // This copies something like this:
+  //
+  //  BB:
+  //    %X = phi i1 [1],  [%X']
+  //    %Y = icmp eq i32 %A, %B
+  //    %Z = xor i1 %X, %Y
+  //    br i1 %Z, ...
+  //
+  // Into:
+  //  BB':
+  //    %Y = icmp ne i32 %A, %B
+  //    br i1 %Z, ...
+
+  PredValueInfoTy XorOpValues;
+  bool isLHS = true;
+  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+                                       WantInteger)) {
+    assert(XorOpValues.empty());
+    if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+                                         WantInteger))
+      return false;
+    isLHS = false;
+  }
+
+  assert(!XorOpValues.empty() &&
+         "ComputeValueKnownInPredecessors returned true with no values");
+
+  // Scan the information to see which is most popular: true or false.  The
+  // predecessors can be of the set true, false, or undef.
+  unsigned NumTrue = 0, NumFalse = 0;
+  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+    if (isa<UndefValue>(XorOpValues[i].first))
+      // Ignore undefs for the count.
+      continue;
+    if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+      ++NumFalse;
+    else
+      ++NumTrue;
+  }
+
+  // Determine which value to split on, true, false, or undef if neither.
+  ConstantInt *SplitVal = 0;
+  if (NumTrue > NumFalse)
+    SplitVal = ConstantInt::getTrue(BB->getContext());
+  else if (NumTrue != 0 || NumFalse != 0)
+    SplitVal = ConstantInt::getFalse(BB->getContext());
+
+  // Collect all of the blocks that this can be folded into so that we can
+  // factor this once and clone it once.
+  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+    if (XorOpValues[i].first != SplitVal &&
+        !isa<UndefValue>(XorOpValues[i].first))
+      continue;
+
+    BlocksToFoldInto.push_back(XorOpValues[i].second);
+  }
+
+  // If we inferred a value for all of the predecessors, then duplication won't
+  // help us.  However, we can just replace the LHS or RHS with the constant.
+  if (BlocksToFoldInto.size() ==
+      cast<PHINode>(BB->front()).getNumIncomingValues()) {
+    if (SplitVal == 0) {
+      // If all preds provide undef, just nuke the xor, because it is undef too.
+      BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+      BO->eraseFromParent();
+    } else if (SplitVal->isZero()) {
+      // If all preds provide 0, replace the xor with the other input.
+      BO->replaceAllUsesWith(BO->getOperand(isLHS));
+      BO->eraseFromParent();
+    } else {
+      // If all preds provide 1, set the computed value to 1.
+      BO->setOperand(!isLHS, SplitVal);
+    }
+
+    return true;
+  }
+
+  // Try to duplicate BB into PredBB.
+  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
+
+/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
+/// NewPred using the entries from OldPred (suitably mapped).
+static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+                                            BasicBlock *OldPred,
+                                            BasicBlock *NewPred,
+                                     DenseMap<Instruction*, Value*> &ValueMap) {
+  for (BasicBlock::iterator PNI = PHIBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+    // Ok, we have a PHI node.  Figure out what the incoming value was for the
+    // DestBlock.
+    Value *IV = PN->getIncomingValueForBlock(OldPred);
+
+    // Remap the value if necessary.
+    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+      DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+      if (I != ValueMap.end())
+        IV = I->second;
+    }
+
+    PN->addIncoming(IV, NewPred);
+  }
+}
+
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB.  Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+                               const SmallVectorImpl<BasicBlock*> &PredBBs,
+                               BasicBlock *SuccBB) {
+  // If threading to the same block as we come from, we would infinite loop.
+  if (SuccBB == BB) {
+    DEBUG(dbgs() << "  Not threading across BB '" << BB->getName()
+          << "' - would thread to self!\n");
+    return false;
+  }
+
+  // If threading this would thread across a loop header, don't thread the edge.
+  // See the comments above FindLoopHeaders for justifications and caveats.
+  if (LoopHeaders.count(BB)) {
+    DEBUG(dbgs() << "  Not threading across loop header BB '" << BB->getName()
+          << "' to dest BB '" << SuccBB->getName()
+          << "' - it might create an irreducible loop!\n");
+    return false;
+  }
+
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+  if (JumpThreadCost > Threshold) {
+    DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
+          << "' - Cost is too high: " << JumpThreadCost << "\n");
+    return false;
+  }
+
+  // And finally, do it!  Start by factoring the predecessors is needed.
+  BasicBlock *PredBB;
+  if (PredBBs.size() == 1)
+    PredBB = PredBBs[0];
+  else {
+    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
+          << " common predecessors.\n");
+    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+                                    ".thr_comm", this);
+  }
+
+  // And finally, do it!
+  DEBUG(dbgs() << "  Threading edge from '" << PredBB->getName() << "' to '"
+        << SuccBB->getName() << "' with cost: " << JumpThreadCost
+        << ", across block:\n    "
+        << *BB << "\n");
+
+  LVI->threadEdge(PredBB, BB, SuccBB);
+
+  // We are going to have to map operands from the original BB block to the new
+  // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
+  // account for entry from PredBB.
+  DenseMap<Instruction*, Value*> ValueMapping;
+
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+                                         BB->getName()+".thread",
+                                         BB->getParent(), BB);
+  NewBB->moveAfter(PredBB);
+
+  BasicBlock::iterator BI = BB->begin();
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+  // Clone the non-phi instructions of BB into NewBB, keeping track of the
+  // mapping and using it to remap operands in the cloned instructions.
+  for (; !isa<TerminatorInst>(BI); ++BI) {
+    Instruction *New = BI->clone();
+    New->setName(BI->getName());
+    NewBB->getInstList().push_back(New);
+    ValueMapping[BI] = New;
+
+    // Remap operands to patch up intra-block references.
+    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+        if (I != ValueMapping.end())
+          New->setOperand(i, I->second);
+      }
+  }
+
+  // We didn't copy the terminator from BB over to NewBB, because there is now
+  // an unconditional jump to SuccBB.  Insert the unconditional jump.
+  BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
+
+  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
+  // PHI nodes for NewBB now.
+  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+
+      UsesToRename.push_back(&UI.getUse());
+    }
+
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I->getType(), I->getName());
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(dbgs() << "\n");
+  }
+
+
+  // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to
+  // NewBB instead of BB.  This eliminates predecessors from BB, which requires
+  // us to simplify any PHI nodes in BB.
+  TerminatorInst *PredTerm = PredBB->getTerminator();
+  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
+    if (PredTerm->getSuccessor(i) == BB) {
+      BB->removePredecessor(PredBB, true);
+      PredTerm->setSuccessor(i, NewBB);
+    }
+
+  // At this point, the IR is fully up to date and consistent.  Do a quick scan
+  // over the new instructions and zap any that are constants or dead.  This
+  // frequently happens because of phi translation.
+  SimplifyInstructionsInBlock(NewBB, TD);
+
+  // Threaded an edge!
+  ++NumThreads;
+  return true;
+}
+
+/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
+/// If we can duplicate the contents of BB up into PredBB do so now, this
+/// improves the odds that the branch will be on an analyzable instruction like
+/// a compare.
+bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock *> &PredBBs) {
+  assert(!PredBBs.empty() && "Can't handle an empty set");
+
+  // If BB is a loop header, then duplicating this block outside the loop would
+  // cause us to transform this into an irreducible loop, don't do this.
+  // See the comments above FindLoopHeaders for justifications and caveats.
+  if (LoopHeaders.count(BB)) {
+    DEBUG(dbgs() << "  Not duplicating loop header '" << BB->getName()
+          << "' into predecessor block '" << PredBBs[0]->getName()
+          << "' - it might create an irreducible loop!\n");
+    return false;
+  }
+
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+  if (DuplicationCost > Threshold) {
+    DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
+          << "' - Cost is too high: " << DuplicationCost << "\n");
+    return false;
+  }
+
+  // And finally, do it!  Start by factoring the predecessors is needed.
+  BasicBlock *PredBB;
+  if (PredBBs.size() == 1)
+    PredBB = PredBBs[0];
+  else {
+    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
+          << " common predecessors.\n");
+    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+                                    ".thr_comm", this);
+  }
+
+  // Okay, we decided to do this!  Clone all the instructions in BB onto the end
+  // of PredBB.
+  DEBUG(dbgs() << "  Duplicating block '" << BB->getName() << "' into end of '"
+        << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
+        << DuplicationCost << " block is:" << *BB << "\n");
+
+  // Unless PredBB ends with an unconditional branch, split the edge so that we
+  // can just clone the bits from BB into the end of the new PredBB.
+  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+
+  if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+    PredBB = SplitEdge(PredBB, BB, this);
+    OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+  }
+
+  // We are going to have to map operands from the original BB block into the
+  // PredBB block.  Evaluate PHI nodes in BB.
+  DenseMap<Instruction*, Value*> ValueMapping;
+
+  BasicBlock::iterator BI = BB->begin();
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+  // Clone the non-phi instructions of BB into PredBB, keeping track of the
+  // mapping and using it to remap operands in the cloned instructions.
+  for (; BI != BB->end(); ++BI) {
+    Instruction *New = BI->clone();
+
+    // Remap operands to patch up intra-block references.
+    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+        if (I != ValueMapping.end())
+          New->setOperand(i, I->second);
+      }
+
+    // If this instruction can be simplified after the operands are updated,
+    // just use the simplified value instead.  This frequently happens due to
+    // phi translation.
+    if (Value *IV = SimplifyInstruction(New, TD)) {
+      delete New;
+      ValueMapping[BI] = IV;
+    } else {
+      // Otherwise, insert the new instruction into the block.
+      New->setName(BI->getName());
+      PredBB->getInstList().insert(OldPredBranch, New);
+      ValueMapping[BI] = New;
+    }
+  }
+
+  // Check to see if the targets of the branch had PHI nodes. If so, we need to
+  // add entries to the PHI nodes for branch from PredBB now.
+  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+                                  ValueMapping);
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+                                  ValueMapping);
+
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+
+      UsesToRename.push_back(&UI.getUse());
+    }
+
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I->getType(), I->getName());
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(dbgs() << "\n");
+  }
+
+  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
+  // that we nuked.
+  BB->removePredecessor(PredBB, true);
+
+  // Remove the unconditional branch at the end of the PredBB block.
+  OldPredBranch->eraseFromParent();
+
+  ++NumDupes;
+  return true;
+}
+
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
new file mode 100644
index 000000000000..66add6ca01ee
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -0,0 +1,808 @@
+//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion, attempting to remove as much
+// code from the body of a loop as possible.  It does this by either hoisting
+// code into the preheader block, or by sinking code to the exit blocks if it is
+// safe.  This pass also promotes must-aliased memory locations in the loop to
+// live in registers, thus hoisting and sinking "invariant" loads and stores.
+//
+// This pass uses alias analysis for two purposes:
+//
+//  1. Moving loop invariant loads and calls out of loops.  If we can determine
+//     that a load or call inside of a loop never aliases anything stored to,
+//     we can hoist it or sink it like any other instruction.
+//  2. Scalar Promotion of Memory - If there is a store instruction inside of
+//     the loop, we try to move the store to happen AFTER the loop instead of
+//     inside of the loop.  This can only happen if a few conditions are true:
+//       A. The pointer stored through is loop invariant
+//       B. There are no stores or loads in the loop which _may_ alias the
+//          pointer.  There are no calls in the loop which mod/ref the pointer.
+//     If these conditions are true, we can promote the loads and stores in the
+//     loop of the pointer to use a temporary alloca'd variable.  We then use
+//     the SSAUpdater to construct the appropriate SSA form for the value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSunk      , "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted   , "Number of instructions hoisted out of loop");
+STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
+STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
+STATISTIC(NumPromoted  , "Number of memory locations promoted to registers");
+
+static cl::opt<bool>
+DisablePromotion("disable-licm-promotion", cl::Hidden,
+                 cl::desc("Disable memory promotion in LICM pass"));
+
+namespace {
+  struct LICM : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LICM() : LoopPass(ID) {
+      initializeLICMPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG...
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved("scalar-evolution");
+      AU.addPreservedID(LoopSimplifyID);
+    }
+
+    bool doFinalization() {
+      assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+      return false;
+    }
+
+  private:
+    AliasAnalysis *AA;       // Current AliasAnalysis information
+    LoopInfo      *LI;       // Current LoopInfo
+    DominatorTree *DT;       // Dominator Tree for the current Loop.
+
+    // State that is updated as we process loops.
+    bool Changed;            // Set to true when we change anything.
+    BasicBlock *Preheader;   // The preheader block of the current loop...
+    Loop *CurLoop;           // The current loop we are working on...
+    AliasSetTracker *CurAST; // AliasSet information for the current loop...
+    DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
+
+    /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+    void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+
+    /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+    /// set.
+    void deleteAnalysisValue(Value *V, Loop *L);
+
+    /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+    /// dominated by the specified block, and that are in the current loop) in
+    /// reverse depth first order w.r.t the DominatorTree.  This allows us to
+    /// visit uses before definitions, allowing us to sink a loop body in one
+    /// pass without iteration.
+    ///
+    void SinkRegion(DomTreeNode *N);
+
+    /// HoistRegion - Walk the specified region of the CFG (defined by all
+    /// blocks dominated by the specified block, and that are in the current
+    /// loop) in depth first order w.r.t the DominatorTree.  This allows us to
+    /// visit definitions before uses, allowing us to hoist a loop body in one
+    /// pass without iteration.
+    ///
+    void HoistRegion(DomTreeNode *N);
+
+    /// inSubLoop - Little predicate that returns true if the specified basic
+    /// block is in a subloop of the current one, not the current one itself.
+    ///
+    bool inSubLoop(BasicBlock *BB) {
+      assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+      return LI->getLoopFor(BB) != CurLoop;
+    }
+
+    /// sink - When an instruction is found to only be used outside of the loop,
+    /// this function moves it to the exit blocks and patches up SSA form as
+    /// needed.
+    ///
+    void sink(Instruction &I);
+
+    /// hoist - When an instruction is found to only use loop invariant operands
+    /// that is safe to hoist, this instruction is called to do the dirty work.
+    ///
+    void hoist(Instruction &I);
+
+    /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
+    /// is not a trapping instruction or if it is a trapping instruction and is
+    /// guaranteed to execute.
+    ///
+    bool isSafeToExecuteUnconditionally(Instruction &I);
+
+    /// pointerInvalidatedByLoop - Return true if the body of this loop may
+    /// store into the memory location pointed to by V.
+    ///
+    bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+                                  const MDNode *TBAAInfo) {
+      // Check to see if any of the basic blocks in CurLoop invalidate *V.
+      return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
+    }
+
+    bool canSinkOrHoistInst(Instruction &I);
+    bool isNotUsedInLoop(Instruction &I);
+
+    void PromoteAliasSet(AliasSet &AS);
+  };
+}
+
+char LICM::ID = 0;
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
+
+Pass *llvm::createLICMPass() { return new LICM(); }
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner
+/// loop is not preserved so it is not a good idea to run LICM multiple
+/// times on one loop.
+///
+bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+  Changed = false;
+
+  // Get our Loop and Alias Analysis information...
+  LI = &getAnalysis<LoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+
+  CurAST = new AliasSetTracker(*AA);
+  // Collect Alias info from subloops.
+  for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
+       LoopItr != LoopItrE; ++LoopItr) {
+    Loop *InnerL = *LoopItr;
+    AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+    assert(InnerAST && "Where is my AST?");
+
+    // What if InnerLoop was modified by other passes ?
+    CurAST->add(*InnerAST);
+
+    // Once we've incorporated the inner loop's AST into ours, we don't need the
+    // subloop's anymore.
+    delete InnerAST;
+    LoopToAliasSetMap.erase(InnerL);
+  }
+
+  CurLoop = L;
+
+  // Get the preheader block to move instructions into...
+  Preheader = L->getLoopPreheader();
+
+  // Loop over the body of this loop, looking for calls, invokes, and stores.
+  // Because subloops have already been incorporated into AST, we skip blocks in
+  // subloops.
+  //
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I) {
+    BasicBlock *BB = *I;
+    if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops.
+      CurAST->add(*BB);                 // Incorporate the specified basic block
+  }
+
+  // We want to visit all of the instructions in this loop... that are not parts
+  // of our subloops (they have already had their invariants hoisted out of
+  // their loop, into this loop, so there is no need to process the BODIES of
+  // the subloops).
+  //
+  // Traverse the body of the loop in depth first order on the dominator tree so
+  // that we are guaranteed to see definitions before we see uses.  This allows
+  // us to sink instructions in one pass, without iteration.  After sinking
+  // instructions, we perform another pass to hoist them out of the loop.
+  //
+  if (L->hasDedicatedExits())
+    SinkRegion(DT->getNode(L->getHeader()));
+  if (Preheader)
+    HoistRegion(DT->getNode(L->getHeader()));
+
+  // Now that all loop invariants have been removed from the loop, promote any
+  // memory references to scalars that we can.
+  if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+    // Loop over all of the alias sets in the tracker object.
+    for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+         I != E; ++I)
+      PromoteAliasSet(*I);
+  }
+
+  // Clear out loops state information for the next iteration
+  CurLoop = 0;
+  Preheader = 0;
+
+  // If this loop is nested inside of another one, save the alias information
+  // for when we process the outer loop.
+  if (L->getParentLoop())
+    LoopToAliasSetMap[L] = CurAST;
+  else
+    delete CurAST;
+  return Changed;
+}
+
+/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in
+/// reverse depth first order w.r.t the DominatorTree.  This allows us to visit
+/// uses before definitions, allowing us to sink a loop body in one pass without
+/// iteration.
+///
+void LICM::SinkRegion(DomTreeNode *N) {
+  assert(N != 0 && "Null dominator tree node?");
+  BasicBlock *BB = N->getBlock();
+
+  // If this subregion is not in the top level loop at all, exit.
+  if (!CurLoop->contains(BB)) return;
+
+  // We are processing blocks in reverse dfo, so process children first.
+  const std::vector<DomTreeNode*> &Children = N->getChildren();
+  for (unsigned i = 0, e = Children.size(); i != e; ++i)
+    SinkRegion(Children[i]);
+
+  // Only need to process the contents of this block if it is not part of a
+  // subloop (which would already have been processed).
+  if (inSubLoop(BB)) return;
+
+  for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+    Instruction &I = *--II;
+
+    // If the instruction is dead, we would try to sink it because it isn't used
+    // in the loop, instead, just delete it.
+    if (isInstructionTriviallyDead(&I)) {
+      DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+      ++II;
+      CurAST->deleteValue(&I);
+      I.eraseFromParent();
+      Changed = true;
+      continue;
+    }
+
+    // Check to see if we can sink this instruction to the exit blocks
+    // of the loop.  We can do this if the all users of the instruction are
+    // outside of the loop.  In this case, it doesn't even matter if the
+    // operands of the instruction are loop invariant.
+    //
+    if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+      ++II;
+      sink(I);
+    }
+  }
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree.  This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void LICM::HoistRegion(DomTreeNode *N) {
+  assert(N != 0 && "Null dominator tree node?");
+  BasicBlock *BB = N->getBlock();
+
+  // If this subregion is not in the top level loop at all, exit.
+  if (!CurLoop->contains(BB)) return;
+
+  // Only need to process the contents of this block if it is not part of a
+  // subloop (which would already have been processed).
+  if (!inSubLoop(BB))
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+      Instruction &I = *II++;
+
+      // Try constant folding this instruction.  If all the operands are
+      // constants, it is technically hoistable, but it would be better to just
+      // fold it.
+      if (Constant *C = ConstantFoldInstruction(&I)) {
+        DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
+        CurAST->copyValue(&I, C);
+        CurAST->deleteValue(&I);
+        I.replaceAllUsesWith(C);
+        I.eraseFromParent();
+        continue;
+      }
+
+      // Try hoisting the instruction out to the preheader.  We can only do this
+      // if all of the operands of the instruction are loop invariant and if it
+      // is safe to hoist the instruction.
+      //
+      if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
+          isSafeToExecuteUnconditionally(I))
+        hoist(I);
+    }
+
+  const std::vector<DomTreeNode*> &Children = N->getChildren();
+  for (unsigned i = 0, e = Children.size(); i != e; ++i)
+    HoistRegion(Children[i]);
+}
+
+/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
+/// instruction.
+///
+bool LICM::canSinkOrHoistInst(Instruction &I) {
+  // Loads have extra constraints we have to verify before we can hoist them.
+  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+    if (LI->isVolatile())
+      return false;        // Don't hoist volatile loads!
+
+    // Loads from constant memory are always safe to move, even if they end up
+    // in the same alias set as something that ends up being modified.
+    if (AA->pointsToConstantMemory(LI->getOperand(0)))
+      return true;
+
+    // Don't hoist loads which have may-aliased stores in loop.
+    uint64_t Size = 0;
+    if (LI->getType()->isSized())
+      Size = AA->getTypeStoreSize(LI->getType());
+    return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+                                     LI->getMetadata(LLVMContext::MD_tbaa));
+  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+    // Don't sink or hoist dbg info; it's legal, but not useful.
+    if (isa<DbgInfoIntrinsic>(I))
+      return false;
+
+    // Handle simple cases by querying alias analysis.
+    AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
+    if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+      return true;
+    if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+      // If this call only reads from memory and there are no writes to memory
+      // in the loop, we can hoist or sink the call as appropriate.
+      bool FoundMod = false;
+      for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+           I != E; ++I) {
+        AliasSet &AS = *I;
+        if (!AS.isForwardingAliasSet() && AS.isMod()) {
+          FoundMod = true;
+          break;
+        }
+      }
+      if (!FoundMod) return true;
+    }
+
+    // FIXME: This should use mod/ref information to see if we can hoist or sink
+    // the call.
+
+    return false;
+  }
+
+  // Otherwise these instructions are hoistable/sinkable
+  return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+         isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+         isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+         isa<ShuffleVectorInst>(I);
+}
+
+/// isNotUsedInLoop - Return true if the only users of this instruction are
+/// outside of the loop.  If this is true, we can sink the instruction to the
+/// exit blocks of the loop.
+///
+bool LICM::isNotUsedInLoop(Instruction &I) {
+  for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    if (PHINode *PN = dyn_cast<PHINode>(User)) {
+      // PHI node uses occur in predecessor blocks!
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == &I)
+          if (CurLoop->contains(PN->getIncomingBlock(i)))
+            return false;
+    } else if (CurLoop->contains(User)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+
+/// sink - When an instruction is found to only be used outside of the loop,
+/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// This method is guaranteed to remove the original instruction from its
+/// position, and may either delete it or move it to outside of the loop.
+///
+void LICM::sink(Instruction &I) {
+  DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  if (isa<LoadInst>(I)) ++NumMovedLoads;
+  else if (isa<CallInst>(I)) ++NumMovedCalls;
+  ++NumSunk;
+  Changed = true;
+
+  // The case where there is only a single exit node of this loop is common
+  // enough that we handle it as a special (more efficient) case.  It is more
+  // efficient to handle because there are no PHI nodes that need to be placed.
+  if (ExitBlocks.size() == 1) {
+    if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+      // Instruction is not used, just delete it.
+      CurAST->deleteValue(&I);
+      // If I has users in unreachable blocks, eliminate.
+      // If I is not void type then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!I.use_empty())
+        I.replaceAllUsesWith(UndefValue::get(I.getType()));
+      I.eraseFromParent();
+    } else {
+      // Move the instruction to the start of the exit block, after any PHI
+      // nodes in it.
+      I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+      // This instruction is no longer in the AST for the current loop, because
+      // we just sunk it out of the loop.  If we just sunk it into an outer
+      // loop, we will rediscover the operation when we process it.
+      CurAST->deleteValue(&I);
+    }
+    return;
+  }
+
+  if (ExitBlocks.empty()) {
+    // The instruction is actually dead if there ARE NO exit blocks.
+    CurAST->deleteValue(&I);
+    // If I has users in unreachable blocks, eliminate.
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    I.eraseFromParent();
+    return;
+  }
+
+  // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+  // hard work of inserting PHI nodes as necessary.
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+
+  if (!I.use_empty())
+    SSA.Initialize(I.getType(), I.getName());
+
+  // Insert a copy of the instruction in each exit block of the loop that is
+  // dominated by the instruction.  Each exit block is known to only be in the
+  // ExitBlocks list once.
+  BasicBlock *InstOrigBB = I.getParent();
+  unsigned NumInserted = 0;
+
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+
+    if (!DT->dominates(InstOrigBB, ExitBlock))
+      continue;
+
+    // Insert the code after the last PHI node.
+    BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+    // If this is the first exit block processed, just move the original
+    // instruction, otherwise clone the original instruction and insert
+    // the copy.
+    Instruction *New;
+    if (NumInserted++ == 0) {
+      I.moveBefore(InsertPt);
+      New = &I;
+    } else {
+      New = I.clone();
+      if (!I.getName().empty())
+        New->setName(I.getName()+".le");
+      ExitBlock->getInstList().insert(InsertPt, New);
+    }
+
+    // Now that we have inserted the instruction, inform SSAUpdater.
+    if (!I.use_empty())
+      SSA.AddAvailableValue(ExitBlock, New);
+  }
+
+  // If the instruction doesn't dominate any exit blocks, it must be dead.
+  if (NumInserted == 0) {
+    CurAST->deleteValue(&I);
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    I.eraseFromParent();
+    return;
+  }
+
+  // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+    // Grab the use before incrementing the iterator.
+    Use &U = UI.getUse();
+    // Increment the iterator before removing the use from the list.
+    ++UI;
+    SSA.RewriteUseAfterInsertions(U);
+  }
+
+  // Update CurAST for NewPHIs if I had pointer type.
+  if (I.getType()->isPointerTy())
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      CurAST->copyValue(&I, NewPHIs[i]);
+
+  // Finally, remove the instruction from CurAST.  It is no longer in the loop.
+  CurAST->deleteValue(&I);
+}
+
+/// hoist - When an instruction is found to only use loop invariant operands
+/// that is safe to hoist, this instruction is called to do the dirty work.
+///
+void LICM::hoist(Instruction &I) {
+  DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
+        << I << "\n");
+
+  // Move the new node to the Preheader, before its terminator.
+  I.moveBefore(Preheader->getTerminator());
+
+  if (isa<LoadInst>(I)) ++NumMovedLoads;
+  else if (isa<CallInst>(I)) ++NumMovedCalls;
+  ++NumHoisted;
+  Changed = true;
+}
+
+/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
+/// not a trapping instruction or if it is a trapping instruction and is
+/// guaranteed to execute.
+///
+bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
+  // If it is not a trapping instruction, it is always safe to hoist.
+  if (Inst.isSafeToSpeculativelyExecute())
+    return true;
+
+  // Otherwise we have to check to make sure that the instruction dominates all
+  // of the exit blocks.  If it doesn't, then there is a path out of the loop
+  // which does not execute this instruction, so we can't hoist it.
+
+  // If the instruction is in the header block for the loop (which is very
+  // common), it is always guaranteed to dominate the exit blocks.  Since this
+  // is a common case, and can save some work, check it now.
+  if (Inst.getParent() == CurLoop->getHeader())
+    return true;
+
+  // Get the exit blocks for the current loop.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getExitBlocks(ExitBlocks);
+
+  // Verify that the block dominates each of the exit blocks of the loop.
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+      return false;
+
+  return true;
+}
+
+namespace {
+  class LoopPromoter : public LoadAndStorePromoter {
+    Value *SomePtr;  // Designated pointer to store to.
+    SmallPtrSet<Value*, 4> &PointerMustAliases;
+    SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+    AliasSetTracker &AST;
+    DebugLoc DL;
+    int Alignment;
+  public:
+    LoopPromoter(Value *SP,
+                 const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+                 SmallPtrSet<Value*, 4> &PMA,
+                 SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
+                 DebugLoc dl, int alignment)
+      : LoadAndStorePromoter(Insts, S), SomePtr(SP),
+        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
+        Alignment(alignment) {}
+
+    virtual bool isInstInList(Instruction *I,
+                              const SmallVectorImpl<Instruction*> &) const {
+      Value *Ptr;
+      if (LoadInst *LI = dyn_cast<LoadInst>(I))
+        Ptr = LI->getOperand(0);
+      else
+        Ptr = cast<StoreInst>(I)->getPointerOperand();
+      return PointerMustAliases.count(Ptr);
+    }
+
+    virtual void doExtraRewritesBeforeFinalDeletion() const {
+      // Insert stores after in the loop exit blocks.  Each exit block gets a
+      // store of the live-out values that feed them.  Since we've already told
+      // the SSA updater about the defs in the loop and the preheader
+      // definition, it is all set and we can start using it.
+      for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+        BasicBlock *ExitBlock = LoopExitBlocks[i];
+        Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+        Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+        StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+        NewSI->setAlignment(Alignment);
+        NewSI->setDebugLoc(DL);
+      }
+    }
+
+    virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+      // Update alias analysis.
+      AST.copyValue(LI, V);
+    }
+    virtual void instructionDeleted(Instruction *I) const {
+      AST.deleteValue(I);
+    }
+  };
+} // end anon namespace
+
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
+/// stores out of the loop and moving loads to before the loop.  We do this by
+/// looping over the stores in the loop, looking for stores to Must pointers
+/// which are loop invariant.
+///
+void LICM::PromoteAliasSet(AliasSet &AS) {
+  // We can promote this alias set if it has a store, if it is a "Must" alias
+  // set, if the pointer is loop invariant, and if we are not eliminating any
+  // volatile loads or stores.
+  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+    return;
+
+  assert(!AS.empty() &&
+         "Must alias set should have at least one pointer element in it!");
+  Value *SomePtr = AS.begin()->getValue();
+
+  // It isn't safe to promote a load/store from the loop if the load/store is
+  // conditional.  For example, turning:
+  //
+  //    for () { if (c) *P += 1; }
+  //
+  // into:
+  //
+  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
+  //
+  // is not safe, because *P may only be valid to access if 'c' is true.
+  //
+  // It is safe to promote P if all uses are direct load/stores and if at
+  // least one is guaranteed to be executed.
+  bool GuaranteedToExecute = false;
+
+  SmallVector<Instruction*, 64> LoopUses;
+  SmallPtrSet<Value*, 4> PointerMustAliases;
+
+  // We start with an alignment of one and try to find instructions that allow
+  // us to prove better alignment.
+  unsigned Alignment = 1;
+
+  // Check that all of the pointers in the alias set have the same type.  We
+  // cannot (yet) promote a memory location that is loaded and stored in
+  // different sizes.
+  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+    Value *ASIV = ASI->getValue();
+    PointerMustAliases.insert(ASIV);
+
+    // Check that all of the pointers in the alias set have the same type.  We
+    // cannot (yet) promote a memory location that is loaded and stored in
+    // different sizes.
+    if (SomePtr->getType() != ASIV->getType())
+      return;
+
+    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
+         UI != UE; ++UI) {
+      // Ignore instructions that are outside the loop.
+      Instruction *Use = dyn_cast<Instruction>(*UI);
+      if (!Use || !CurLoop->contains(Use))
+        continue;
+
+      // If there is an non-load/store instruction in the loop, we can't promote
+      // it.
+      unsigned InstAlignment;
+      if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
+        assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+        InstAlignment = load->getAlignment();
+      } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
+        // Stores *of* the pointer are not interesting, only stores *to* the
+        // pointer.
+        if (Use->getOperand(1) != ASIV)
+          continue;
+        InstAlignment = store->getAlignment();
+        assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+      } else
+        return; // Not a load or store.
+
+      // If the alignment of this instruction allows us to specify a more
+      // restrictive (and performant) alignment and if we are sure this
+      // instruction will be executed, update the alignment.
+      // Larger is better, with the exception of 0 being the best alignment.
+      if ((InstAlignment > Alignment || InstAlignment == 0)
+          && (Alignment != 0))
+        if (isSafeToExecuteUnconditionally(*Use)) {
+          GuaranteedToExecute = true;
+          Alignment = InstAlignment;
+        }
+
+      if (!GuaranteedToExecute)
+        GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+
+      LoopUses.push_back(Use);
+    }
+  }
+
+  // If there isn't a guaranteed-to-execute instruction, we can't promote.
+  if (!GuaranteedToExecute)
+    return;
+
+  // Otherwise, this is safe to promote, lets do it!
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+  Changed = true;
+  ++NumPromoted;
+
+  // Grab a debug location for the inserted loads/stores; given that the
+  // inserted loads/stores have little relation to the original loads/stores,
+  // this code just arbitrarily picks a location from one, since any debug
+  // location is better than none.
+  DebugLoc DL = LoopUses[0]->getDebugLoc();
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  // We use the SSAUpdater interface to insert phi nodes as required.
+  SmallVector<PHINode*, 16> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+                        *CurAST, DL, Alignment);
+
+  // Set up the preheader to have a definition of the value.  It is the live-out
+  // value from the preheader that uses in the loop will use.
+  LoadInst *PreheaderLoad =
+    new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+                 Preheader->getTerminator());
+  PreheaderLoad->setAlignment(Alignment);
+  PreheaderLoad->setDebugLoc(DL);
+  SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+  // Rewrite all the loads in the loop and remember all the definitions from
+  // stores in the loop.
+  Promoter.run(LoopUses);
+
+  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
+  if (PreheaderLoad->use_empty())
+    PreheaderLoad->eraseFromParent();
+}
+
+
+/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+  if (!AST)
+    return;
+
+  AST->copyValue(From, To);
+}
+
+/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+/// set.
+void LICM::deleteAnalysisValue(Value *V, Loop *L) {
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+  if (!AST)
+    return;
+
+  AST->deleteValue(V);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
new file mode 100644
index 000000000000..f7f32981baa7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -0,0 +1,248 @@
+//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dead Loop Deletion Pass. This pass is responsible
+// for eliminating loops with non-infinite computable trip counts that have no
+// side effects or volatile instructions, and do not contribute to the
+// computation of the function's return value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-delete"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumDeleted, "Number of loops deleted");
+
+namespace {
+  class LoopDeletion : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopDeletion() : LoopPass(ID) {
+      initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // Possibly eliminate loop L if it is dead.
+    bool runOnLoop(Loop* L, LPPassManager& LPM);
+    
+    bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
+                    SmallVector<BasicBlock*, 4>& exitBlocks,
+                    bool &Changed, BasicBlock *Preheader);
+
+    virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+    }
+  };
+}
+  
+char LoopDeletion::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
+
+Pass* llvm::createLoopDeletionPass() {
+  return new LoopDeletion();
+}
+
+/// IsLoopDead - Determined if a loop is dead.  This assumes that we've already
+/// checked for unique exit and exiting blocks, and that the code is in LCSSA
+/// form.
+bool LoopDeletion::IsLoopDead(Loop* L,
+                              SmallVector<BasicBlock*, 4>& exitingBlocks,
+                              SmallVector<BasicBlock*, 4>& exitBlocks,
+                              bool &Changed, BasicBlock *Preheader) {
+  BasicBlock* exitBlock = exitBlocks[0];
+  
+  // Make sure that all PHI entries coming from the loop are loop invariant.
+  // Because the code is in LCSSA form, any values used outside of the loop
+  // must pass through a PHI in the exit block, meaning that this check is
+  // sufficient to guarantee that no loop-variant values are used outside
+  // of the loop.
+  BasicBlock::iterator BI = exitBlock->begin();
+  while (PHINode* P = dyn_cast<PHINode>(BI)) {
+    Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+
+    // Make sure all exiting blocks produce the same incoming value for the exit
+    // block.  If there are different incoming values for different exiting
+    // blocks, then it is impossible to statically determine which value should
+    // be used.
+    for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+      if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
+        return false;
+    }
+      
+    if (Instruction* I = dyn_cast<Instruction>(incoming))
+      if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
+        return false;
+
+    ++BI;
+  }
+  
+  // Make sure that no instructions in the block have potential side-effects.
+  // This includes instructions that could write to memory, and loads that are
+  // marked volatile.  This could be made more aggressive by using aliasing
+  // information to identify readonly and readnone calls.
+  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+       LI != LE; ++LI) {
+    for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+         BI != BE; ++BI) {
+      if (BI->mayHaveSideEffects())
+        return false;
+    }
+  }
+  
+  return true;
+}
+
+/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
+/// observable behavior of the program other than finite running time.  Note 
+/// we do ensure that this never remove a loop that might be infinite, as doing
+/// so could change the halting/non-halting nature of a program.
+/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
+/// in order to make various safety checks work.
+bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+  // We can only remove the loop if there is a preheader that we can 
+  // branch from after removing it.
+  BasicBlock* preheader = L->getLoopPreheader();
+  if (!preheader)
+    return false;
+  
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->hasDedicatedExits())
+    return false;
+
+  // We can't remove loops that contain subloops.  If the subloops were dead,
+  // they would already have been removed in earlier executions of this pass.
+  if (L->begin() != L->end())
+    return false;
+  
+  SmallVector<BasicBlock*, 4> exitingBlocks;
+  L->getExitingBlocks(exitingBlocks);
+  
+  SmallVector<BasicBlock*, 4> exitBlocks;
+  L->getUniqueExitBlocks(exitBlocks);
+  
+  // We require that the loop only have a single exit block.  Otherwise, we'd
+  // be in the situation of needing to be able to solve statically which exit
+  // block will be branched to, or trying to preserve the branching logic in
+  // a loop invariant manner.
+  if (exitBlocks.size() != 1)
+    return false;
+  
+  // Finally, we have to check that the loop really is dead.
+  bool Changed = false;
+  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+    return Changed;
+  
+  // Don't remove loops for which we can't solve the trip count.
+  // They could be infinite, in which case we'd be changing program behavior.
+  ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+  const SCEV *S = SE.getMaxBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(S))
+    return Changed;
+  
+  // Now that we know the removal is safe, remove the loop by changing the
+  // branch from the preheader to go to the single exit block.  
+  BasicBlock* exitBlock = exitBlocks[0];
+  
+  // Because we're deleting a large chunk of code at once, the sequence in which
+  // we remove things is very important to avoid invalidation issues.  Don't
+  // mess with this unless you have good reason and know what you're doing.
+
+  // Tell ScalarEvolution that the loop is deleted. Do this before
+  // deleting the loop so that ScalarEvolution can look at the loop
+  // to determine what it needs to clean up.
+  SE.forgetLoop(L);
+
+  // Connect the preheader directly to the exit block.
+  TerminatorInst* TI = preheader->getTerminator();
+  TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+
+  // Rewrite phis in the exit block to get their inputs from
+  // the preheader instead of the exiting block.
+  BasicBlock* exitingBlock = exitingBlocks[0];
+  BasicBlock::iterator BI = exitBlock->begin();
+  while (PHINode* P = dyn_cast<PHINode>(BI)) {
+    int j = P->getBasicBlockIndex(exitingBlock);
+    assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+    P->setIncomingBlock(j, preheader);
+    for (unsigned i = 1; i < exitingBlocks.size(); ++i)
+      P->removeIncomingValue(exitingBlocks[i]);
+    ++BI;
+  }
+  
+  // Update the dominator tree and remove the instructions and blocks that will
+  // be deleted from the reference counting scheme.
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  SmallVector<DomTreeNode*, 8> ChildNodes;
+  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+       LI != LE; ++LI) {
+    // Move all of the block's children to be children of the preheader, which
+    // allows us to remove the domtree entry for the block.
+    ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+    for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+         DE = ChildNodes.end(); DI != DE; ++DI) {
+      DT.changeImmediateDominator(*DI, DT[preheader]);
+    }
+    
+    ChildNodes.clear();
+    DT.eraseNode(*LI);
+
+    // Remove the block from the reference counting scheme, so that we can
+    // delete it freely later.
+    (*LI)->dropAllReferences();
+  }
+  
+  // Erase the instructions and the blocks without having to worry
+  // about ordering because we already dropped the references.
+  // NOTE: This iteration is safe because erasing the block does not remove its
+  // entry from the loop's block list.  We do that in the next section.
+  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+       LI != LE; ++LI)
+    (*LI)->eraseFromParent();
+
+  // Finally, the blocks from loopinfo.  This has to happen late because
+  // otherwise our loop iterators won't work.
+  LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+  SmallPtrSet<BasicBlock*, 8> blocks;
+  blocks.insert(L->block_begin(), L->block_end());
+  for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
+       E = blocks.end(); I != E; ++I)
+    loopInfo.removeBlock(*I);
+  
+  // The last step is to inform the loop pass manager that we've
+  // eliminated this loop.
+  LPM.deleteLoopFromQueue(L);
+  Changed = true;
+  
+  ++NumDeleted;
+  
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 000000000000..a0e41d9a9772
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,634 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form.  In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+//   memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+//   fpowi
+// Future integer operation idioms to recognize:
+//   ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set.  It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop.  This would handle things like:
+//   void foo(_Complex float *P)
+//     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
+//
+// We should enhance this to handle negative strides through memory.
+// Alternatively (and perhaps better) we could rely on an earlier pass to force
+// forward iteration through memory, which is generally better for cache
+// behavior.  Negative strides *do* happen for memset/memcpy loops.
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+  class LoopIdiomRecognize : public LoopPass {
+    Loop *CurLoop;
+    const TargetData *TD;
+    DominatorTree *DT;
+    ScalarEvolution *SE;
+    TargetLibraryInfo *TLI;
+  public:
+    static char ID;
+    explicit LoopIdiomRecognize() : LoopPass(ID) {
+      initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+                        SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+    bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+    bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+
+    bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+                                 unsigned StoreAlignment,
+                                 Value *SplatValue, Instruction *TheStore,
+                                 const SCEVAddRecExpr *Ev,
+                                 const SCEV *BECount);
+    bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                                    const SCEVAddRecExpr *StoreEv,
+                                    const SCEVAddRecExpr *LoadEv,
+                                    const SCEV *BECount);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<DominatorTree>();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<TargetLibraryInfo>();
+    }
+  };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+                    false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// deleteDeadInstruction - Delete this instruction.  Before we do, go through
+/// and zero out all the operands of this instruction.  If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+  SmallVector<Instruction*, 32> NowDeadInsts;
+
+  NowDeadInsts.push_back(I);
+
+  // Before we touch this instruction, remove it from SE!
+  do {
+    Instruction *DeadInst = NowDeadInsts.pop_back_val();
+
+    // This instruction is dead, zap it, in stages.  Start by removing it from
+    // SCEV.
+    SE.forgetValue(DeadInst);
+
+    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+      Value *Op = DeadInst->getOperand(op);
+      DeadInst->setOperand(op, 0);
+
+      // If this operand just became dead, add it to the NowDeadInsts list.
+      if (!Op->use_empty()) continue;
+
+      if (Instruction *OpI = dyn_cast<Instruction>(Op))
+        if (isInstructionTriviallyDead(OpI))
+          NowDeadInsts.push_back(OpI);
+    }
+
+    DeadInst->eraseFromParent();
+
+  } while (!NowDeadInsts.empty());
+}
+
+/// deleteIfDeadInstruction - If the specified value is a dead instruction,
+/// delete it and any recursively used instructions.
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (isInstructionTriviallyDead(I))
+      deleteDeadInstruction(I, SE);
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+  CurLoop = L;
+
+  // Disable loop idiom recognition if the function's name is a common idiom. 
+  StringRef Name = L->getHeader()->getParent()->getName();
+  if (Name == "memset" || Name == "memcpy")
+    return false;
+
+  // The trip count of the loop must be analyzable.
+  SE = &getAnalysis<ScalarEvolution>();
+  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+    return false;
+  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BECount)) return false;
+
+  // If this loop executes exactly one time, then it should be peeled, not
+  // optimized by this pass.
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    if (BECst->getValue()->getValue() == 0)
+      return false;
+
+  // We require target data for now.
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (TD == 0) return false;
+
+  DT = &getAnalysis<DominatorTree>();
+  LoopInfo &LI = getAnalysis<LoopInfo>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  DEBUG(dbgs() << "loop-idiom Scanning: F["
+               << L->getHeader()->getParent()->getName()
+               << "] Loop %" << L->getHeader()->getName() << "\n");
+
+  bool MadeChange = false;
+  // Scan all the blocks in the loop that are not in subloops.
+  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+       ++BI) {
+    // Ignore blocks in subloops.
+    if (LI.getLoopFor(*BI) != CurLoop)
+      continue;
+
+    MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+  }
+  return MadeChange;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count.  This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+                                     SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  // We can only promote stores in this block if they are unconditionally
+  // executed in the loop.  For a block to be unconditionally executed, it has
+  // to dominate all the exit blocks of the loop.  Verify this now.
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (!DT->dominates(BB, ExitBlocks[i]))
+      return false;
+
+  bool MadeChange = false;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    // Look for store instructions, which may be optimized to memset/memcpy.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))  {
+      WeakVH InstPtr(I);
+      if (!processLoopStore(SI, BECount)) continue;
+      MadeChange = true;
+
+      // If processing the store invalidated our iterator, start over from the
+      // top of the block.
+      if (InstPtr == 0)
+        I = BB->begin();
+      continue;
+    }
+
+    // Look for memset instructions, which may be optimized to a larger memset.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
+      WeakVH InstPtr(I);
+      if (!processLoopMemSet(MSI, BECount)) continue;
+      MadeChange = true;
+
+      // If processing the memset invalidated our iterator, start over from the
+      // top of the block.
+      if (InstPtr == 0)
+        I = BB->begin();
+      continue;
+    }
+  }
+
+  return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+  if (SI->isVolatile()) return false;
+
+  Value *StoredVal = SI->getValueOperand();
+  Value *StorePtr = SI->getPointerOperand();
+
+  // Reject stores that are so large that they overflow an unsigned.
+  uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+    return false;
+
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *StoreEv =
+    dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+  if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+    return false;
+
+  // Check to see if the stride matches the size of the store.  If so, then we
+  // know that every byte is touched in the loop.
+  unsigned StoreSize = (unsigned)SizeInBits >> 3;
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+
+  if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+    // TODO: Could also handle negative stride here someday, that will require
+    // the validity check in mayLoopAccessLocation to be updated though.
+    // Enable this to print exact negative strides.
+    if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
+      dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
+      dbgs() << "BB: " << *SI->getParent();
+    }
+
+    return false;
+  }
+
+  // See if we can optimize just this store in isolation.
+  if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+                              StoredVal, SI, StoreEv, BECount))
+    return true;
+
+  // If the stored value is a strided load in the same loop with the same stride
+  // this this may be transformable into a memcpy.  This kicks in for stuff like
+  //   for (i) A[i] = B[i];
+  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+    const SCEVAddRecExpr *LoadEv =
+      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+    if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+        StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+      if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+        return true;
+  }
+  //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+  return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+  // We can only handle non-volatile memsets with a constant size.
+  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+  // If we're not allowed to hack on memset, we fail.
+  if (!TLI->has(LibFunc::memset))
+    return false;
+
+  Value *Pointer = MSI->getDest();
+
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+  if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+    return false;
+
+  // Reject memsets that are so large that they overflow an unsigned.
+  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+  if ((SizeInBytes >> 32) != 0)
+    return false;
+
+  // Check to see if the stride matches the size of the memset.  If so, then we
+  // know that every byte is touched in the loop.
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+
+  // TODO: Could also handle negative stride here someday, that will require the
+  // validity check in mayLoopAccessLocation to be updated though.
+  if (Stride == 0 || MSI->getLength() != Stride->getValue())
+    return false;
+
+  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+                                 MSI->getAlignment(), MSI->getValue(),
+                                 MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access.  The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+                                  Loop *L, const SCEV *BECount,
+                                  unsigned StoreSize, AliasAnalysis &AA,
+                                  Instruction *IgnoredStore) {
+  // Get the location that may be stored across the loop.  Since the access is
+  // strided positively through memory, we say that the modified location starts
+  // at the pointer and has infinite size.
+  uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+  // If the loop iterates a fixed number of times, we can refine the access size
+  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+
+  // TODO: For this to be really effective, we have to dive into the pointer
+  // operand in the store.  Store to &A[i] of 100 will always return may alias
+  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+  // which will then no-alias a store to &A[100].
+  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+       ++BI)
+    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+      if (&*I != IgnoredStore &&
+          (AA.getModRefInfo(I, StoreLoc) & Access))
+        return true;
+
+  return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in.  Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+  // If the value isn't a constant, we can't promote it to being in a constant
+  // array.  We could theoretically do a store to an alloca or something, but
+  // that doesn't seem worthwhile.
+  Constant *C = dyn_cast<Constant>(V);
+  if (C == 0) return 0;
+
+  // Only handle simple values that are a power of two bytes in size.
+  uint64_t Size = TD.getTypeSizeInBits(V->getType());
+  if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+    return 0;
+
+  // Don't care enough about darwin/ppc to implement this.
+  if (TD.isBigEndian())
+    return 0;
+
+  // Convert to size in bytes.
+  Size /= 8;
+
+  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+  // if the top and bottom are the same (e.g. for vectors and large integers).
+  if (Size > 16) return 0;
+
+  // If the constant is exactly 16 bytes, just use it.
+  if (Size == 16) return C;
+
+  // Otherwise, we'll use an array of the constants.
+  unsigned ArraySize = 16/Size;
+  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+  return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value.  If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+                        unsigned StoreAlignment, Value *StoredVal,
+                        Instruction *TheStore, const SCEVAddRecExpr *Ev,
+                        const SCEV *BECount) {
+
+  // If the stored value is a byte-wise value (like i32 -1), then it may be
+  // turned into a memset of i8 -1, assuming that all the consecutive bytes
+  // are stored.  A store of i32 0x01020304 can never be turned into a memset,
+  // but it can be turned into memset_pattern if the target supports it.
+  Value *SplatValue = isBytewiseValue(StoredVal);
+  Constant *PatternValue = 0;
+
+  // If we're allowed to form a memset, and the stored value would be acceptable
+  // for memset, use it.
+  if (SplatValue && TLI->has(LibFunc::memset) &&
+      // Verify that the stored value is loop invariant.  If not, we can't
+      // promote the memset.
+      CurLoop->isLoopInvariant(SplatValue)) {
+    // Keep and use SplatValue.
+    PatternValue = 0;
+  } else if (TLI->has(LibFunc::memset_pattern16) &&
+             (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+    // It looks like we can use PatternValue!
+    SplatValue = 0;
+  } else {
+    // Otherwise, this isn't an idiom we can transform.  For example, we can't
+    // do anything with a 3-byte store, for example.
+    return false;
+  }
+
+  // The trip count of the loop and the base pointer of the addrec SCEV is
+  // guaranteed to be loop invariant, which means that it should dominate the
+  // header.  This allows us to insert code for it in the preheader.
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
+  IRBuilder<> Builder(Preheader->getTerminator());
+  SCEVExpander Expander(*SE, "loop-idiom");
+
+  // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
+  // this into a memset in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write to the aliased location.  Check for any overlap by generating the
+  // base pointer and checking the region.
+  unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+  Value *BasePtr =
+    Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+                           Preheader->getTerminator());
+
+
+  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
+                            CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+    Expander.clear();
+    // If we generated new code for the base pointer, clean up.
+    deleteIfDeadInstruction(BasePtr, *SE);
+    return false;
+  }
+
+  // Okay, everything looks good, insert the memset.
+
+  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+  // pointer size if it isn't already.
+  const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+                                         SCEV::FlagNUW);
+  if (StoreSize != 1)
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+                               SCEV::FlagNUW);
+
+  Value *NumBytes =
+    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+  CallInst *NewCall;
+  if (SplatValue)
+    NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+  else {
+    Module *M = TheStore->getParent()->getParent()->getParent();
+    Value *MSP = M->getOrInsertFunction("memset_pattern16",
+                                        Builder.getVoidTy(),
+                                        Builder.getInt8PtrTy(),
+                                        Builder.getInt8PtrTy(), IntPtr,
+                                        (void*)0);
+
+    // Otherwise we should form a memset_pattern16.  PatternValue is known to be
+    // an constant array of 16-bytes.  Plop the value into a mergable global.
+    GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+                                            GlobalValue::InternalLinkage,
+                                            PatternValue, ".memset_pattern");
+    GV->setUnnamedAddr(true); // Ok to merge these.
+    GV->setAlignment(16);
+    Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+    NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+  }
+
+  DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
+               << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
+  NewCall->setDebugLoc(TheStore->getDebugLoc());
+
+  // Okay, the memset has been formed.  Zap the original store and anything that
+  // feeds into it.
+  deleteDeadInstruction(TheStore, *SE);
+  ++NumMemSet;
+  return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                           const SCEVAddRecExpr *StoreEv,
+                           const SCEVAddRecExpr *LoadEv,
+                           const SCEV *BECount) {
+  // If we're not allowed to form memcpy, we fail.
+  if (!TLI->has(LibFunc::memcpy))
+    return false;
+
+  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+
+  // The trip count of the loop and the base pointer of the addrec SCEV is
+  // guaranteed to be loop invariant, which means that it should dominate the
+  // header.  This allows us to insert code for it in the preheader.
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
+  IRBuilder<> Builder(Preheader->getTerminator());
+  SCEVExpander Expander(*SE, "loop-idiom");
+
+  // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
+  // this into a memcpy in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write the memory region we're storing to.  This includes the load that
+  // feeds the stores.  Check for an alias by generating the base address and
+  // checking everything.
+  Value *StoreBasePtr =
+    Expander.expandCodeFor(StoreEv->getStart(),
+                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+
+  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
+                            CurLoop, BECount, StoreSize,
+                            getAnalysis<AliasAnalysis>(), SI)) {
+    Expander.clear();
+    // If we generated new code for the base pointer, clean up.
+    deleteIfDeadInstruction(StoreBasePtr, *SE);
+    return false;
+  }
+
+  // For a memcpy, we have to make sure that the input array is not being
+  // mutated by the loop.
+  Value *LoadBasePtr =
+    Expander.expandCodeFor(LoadEv->getStart(),
+                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+
+  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+    Expander.clear();
+    // If we generated new code for the base pointer, clean up.
+    deleteIfDeadInstruction(LoadBasePtr, *SE);
+    deleteIfDeadInstruction(StoreBasePtr, *SE);
+    return false;
+  }
+
+  // Okay, everything is safe, we can transform this!
+
+
+  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+  // pointer size if it isn't already.
+  const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+                                         SCEV::FlagNUW);
+  if (StoreSize != 1)
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+                               SCEV::FlagNUW);
+
+  Value *NumBytes =
+    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+  CallInst *NewCall =
+    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+                         std::min(SI->getAlignment(), LI->getAlignment()));
+  NewCall->setDebugLoc(SI->getDebugLoc());
+
+  DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
+               << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+               << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+
+
+  // Okay, the memset has been formed.  Zap the original store and anything that
+  // feeds into it.
+  deleteDeadInstruction(SI, *SE);
+  ++NumMemCpy;
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 000000000000..af25c5c1a661
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+  class LoopInstSimplify : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopInstSimplify() : LoopPass(ID) {
+      initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnLoop(Loop*, LPPassManager&);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved("scalar-evolution");
+    }
+  };
+}
+  
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+                "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+                "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+  return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = &getAnalysis<LoopInfo>();
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+  array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+  SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+  // The bit we are stealing from the pointer represents whether this basic
+  // block is the header of a subloop, in which case we only process its phis.
+  typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+  SmallVector<WorklistItem, 16> VisitStack;
+  SmallPtrSet<BasicBlock*, 32> Visited;
+
+  bool Changed = false;
+  bool LocalChanged;
+  do {
+    LocalChanged = false;
+
+    VisitStack.clear();
+    Visited.clear();
+
+    VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+    while (!VisitStack.empty()) {
+      WorklistItem Item = VisitStack.pop_back_val();
+      BasicBlock *BB = Item.getPointer();
+      bool IsSubloopHeader = Item.getInt();
+
+      // Simplify instructions in the current basic block.
+      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+        Instruction *I = BI++;
+
+        // The first time through the loop ToSimplify is empty and we try to
+        // simplify all instructions. On later iterations ToSimplify is not
+        // empty and we only bother simplifying instructions that are in it.
+        if (!ToSimplify->empty() && !ToSimplify->count(I))
+          continue;
+
+        // Don't bother simplifying unused instructions.
+        if (!I->use_empty()) {
+          Value *V = SimplifyInstruction(I, TD, DT);
+          if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+            // Mark all uses for resimplification next time round the loop.
+            for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+                 UI != UE; ++UI)
+              Next->insert(cast<Instruction>(*UI));
+
+            I->replaceAllUsesWith(V);
+            LocalChanged = true;
+            ++NumSimplified;
+          }
+        }
+        LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+
+        if (IsSubloopHeader && !isa<PHINode>(I))
+          break;
+      }
+
+      // Add all successors to the worklist, except for loop exit blocks and the
+      // bodies of subloops. We visit the headers of loops so that we can process
+      // their phis, but we contract the rest of the subloop body and only follow
+      // edges leading back to the original loop.
+      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+           ++SI) {
+        BasicBlock *SuccBB = *SI;
+        if (!Visited.insert(SuccBB))
+          continue;
+
+        const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+        if (SuccLoop && SuccLoop->getHeader() == SuccBB
+                     && L->contains(SuccLoop)) {
+          VisitStack.push_back(WorklistItem(SuccBB, true));
+
+          SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+          SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+          for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+            BasicBlock *ExitBB = SubLoopExitBlocks[i];
+            if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+              VisitStack.push_back(WorklistItem(ExitBB, false));
+          }
+
+          continue;
+        }
+
+        bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+                                              ExitBlocks.end(), SuccBB);
+        if (IsExitBlock)
+          continue;
+
+        VisitStack.push_back(WorklistItem(SuccBB, false));
+      }
+    }
+
+    // Place the list of instructions to simplify on the next loop iteration
+    // into ToSimplify.
+    std::swap(ToSimplify, Next);
+    Next->clear();
+
+    Changed |= LocalChanged;
+  } while (LocalChanged);
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
new file mode 100644
index 000000000000..9fd0958fd4c3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -0,0 +1,353 @@
+//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Rotation Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-rotate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+#define MAX_HEADER_SIZE 16
+
+STATISTIC(NumRotated, "Number of loops rotated");
+namespace {
+
+  class LoopRotate : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopRotate() : LoopPass(ID) {
+      initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+    }
+
+    // LCSSA form makes instruction renaming easier.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<ScalarEvolution>();
+    }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool rotateLoop(Loop *L);
+    
+  private:
+    LoopInfo *LI;
+  };
+}
+  
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+
+/// Rotate Loop L as many times as possible. Return true if
+/// the loop is rotated at least once.
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+  LI = &getAnalysis<LoopInfo>();
+
+  // One loop can be rotated multiple times.
+  bool MadeChange = false;
+  while (rotateLoop(L))
+    MadeChange = true;
+
+  return MadeChange;
+}
+
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader.  If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values.  Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+                                            BasicBlock *OrigPreheader,
+                                            ValueToValueMapTy &ValueMap) {
+  // Remove PHI node entries that are no longer live.
+  BasicBlock::iterator I, E = OrigHeader->end();
+  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+    
+  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+  // as necessary.
+  SSAUpdater SSA;
+  for (I = OrigHeader->begin(); I != E; ++I) {
+    Value *OrigHeaderVal = I;
+    
+    // If there are no uses of the value (e.g. because it returns void), there
+    // is nothing to rewrite.
+    if (OrigHeaderVal->use_empty())
+      continue;
+    
+    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
+
+    // The value now exits in two versions: the initial value in the preheader
+    // and the loop "next" value in the original header.
+    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+    
+    // Visit each use of the OrigHeader instruction.
+    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+         UE = OrigHeaderVal->use_end(); UI != UE; ) {
+      // Grab the use before incrementing the iterator.
+      Use &U = UI.getUse();
+      
+      // Increment the iterator before removing the use from the list.
+      ++UI;
+      
+      // SSAUpdater can't handle a non-PHI use in the same block as an
+      // earlier def. We can easily handle those cases manually.
+      Instruction *UserInst = cast<Instruction>(U.getUser());
+      if (!isa<PHINode>(UserInst)) {
+        BasicBlock *UserBB = UserInst->getParent();
+        
+        // The original users in the OrigHeader are already using the
+        // original definitions.
+        if (UserBB == OrigHeader)
+          continue;
+        
+        // Users in the OrigPreHeader need to use the value to which the
+        // original definitions are mapped.
+        if (UserBB == OrigPreheader) {
+          U = OrigPreHeaderVal;
+          continue;
+        }
+      }
+      
+      // Anything else can be handled by SSAUpdater.
+      SSA.RewriteUse(U);
+    }
+  }
+}  
+
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
+  // If the loop has only one block then there is not much to rotate.
+  if (L->getBlocks().size() == 1)
+    return false;
+  
+  BasicBlock *OrigHeader = L->getHeader();
+  
+  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+  if (BI == 0 || BI->isUnconditional())
+    return false;
+  
+  // If the loop header is not one of the loop exiting blocks then
+  // either this loop is already rotated or it is not
+  // suitable for loop rotation transformations.
+  if (!L->isLoopExiting(OrigHeader))
+    return false;
+
+  // Updating PHInodes in loops with multiple exits adds complexity. 
+  // Keep it simple, and restrict loop rotation to loops with one exit only.
+  // In future, lift this restriction and support for multiple exits if
+  // required.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  if (ExitBlocks.size() > 1)
+    return false;
+
+  // Check size of original header and reject loop if it is very big.
+  {
+    CodeMetrics Metrics;
+    Metrics.analyzeBasicBlock(OrigHeader);
+    if (Metrics.NumInsts > MAX_HEADER_SIZE)
+      return false;
+  }
+
+  // Now, this loop is suitable for rotation.
+  BasicBlock *OrigPreheader = L->getLoopPreheader();
+  BasicBlock *OrigLatch = L->getLoopLatch();
+  
+  // If the loop could not be converted to canonical form, it must have an
+  // indirectbr in it, just give up.
+  if (OrigPreheader == 0 || OrigLatch == 0)
+    return false;
+
+  // Anything ScalarEvolution may know about this loop or the PHI nodes
+  // in its header will soon be invalidated.
+  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoop(L);
+
+  // Find new Loop header. NewHeader is a Header's one and only successor
+  // that is inside loop.  Header's other successor is outside the
+  // loop.  Otherwise loop is not suitable for rotation.
+  BasicBlock *Exit = BI->getSuccessor(0);
+  BasicBlock *NewHeader = BI->getSuccessor(1);
+  if (L->contains(Exit))
+    std::swap(Exit, NewHeader);
+  assert(NewHeader && "Unable to determine new loop header");
+  assert(L->contains(NewHeader) && !L->contains(Exit) && 
+         "Unable to determine loop header and exit blocks");
+  
+  // This code assumes that the new header has exactly one predecessor.
+  // Remove any single-entry PHI nodes in it.
+  assert(NewHeader->getSinglePredecessor() &&
+         "New header doesn't have one pred!");
+  FoldSingleEntryPHINodes(NewHeader);
+
+  // Begin by walking OrigHeader and populating ValueMap with an entry for
+  // each Instruction.
+  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+  ValueToValueMapTy ValueMap;
+
+  // For PHI nodes, the value available in OldPreHeader is just the
+  // incoming value from OldPreHeader.
+  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+
+  // For the rest of the instructions, either hoist to the OrigPreheader if
+  // possible or create a clone in the OldPreHeader if not.
+  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+  while (I != E) {
+    Instruction *Inst = I++;
+    
+    // If the instruction's operands are invariant and it doesn't read or write
+    // memory, then it is safe to hoist.  Doing this doesn't change the order of
+    // execution in the preheader, but does prevent the instruction from
+    // executing in each iteration of the loop.  This means it is safe to hoist
+    // something that might trap, but isn't safe to hoist something that reads
+    // memory (without proving that the loop doesn't write).
+    if (L->hasLoopInvariantOperands(Inst) &&
+        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+      Inst->moveBefore(LoopEntryBranch);
+      continue;
+    }
+    
+    // Otherwise, create a duplicate of the instruction.
+    Instruction *C = Inst->clone();
+    
+    // Eagerly remap the operands of the instruction.
+    RemapInstruction(C, ValueMap,
+                     RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+    
+    // With the operands remapped, see if the instruction constant folds or is
+    // otherwise simplifyable.  This commonly occurs because the entry from PHI
+    // nodes allows icmps and other instructions to fold.
+    Value *V = SimplifyInstruction(C);
+    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+      // If so, then delete the temporary instruction and stick the folded value
+      // in the map.
+      delete C;
+      ValueMap[Inst] = V;
+    } else {
+      // Otherwise, stick the new instruction into the new block!
+      C->setName(Inst->getName());
+      C->insertBefore(LoopEntryBranch);
+      ValueMap[Inst] = C;
+    }
+  }
+
+  // Along with all the other instructions, we just cloned OrigHeader's
+  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+  // successors by duplicating their incoming values for OrigHeader.
+  TerminatorInst *TI = OrigHeader->getTerminator();
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
+
+  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+  // OrigPreHeader's old terminator (the original branch into the loop), and
+  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+  LoopEntryBranch->eraseFromParent();
+
+  // If there were any uses of instructions in the duplicated block outside the
+  // loop, update them, inserting PHI nodes as required
+  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
+
+  // NewHeader is now the header of the loop.
+  L->moveToHeader(NewHeader);
+  assert(L->getHeader() == NewHeader && "Latch block is our new header");
+
+  
+  // At this point, we've finished our major CFG changes.  As part of cloning
+  // the loop into the preheader we've simplified instructions and the
+  // duplicated conditional branch may now be branching on a constant.  If it is
+  // branching on a constant and if that constant means that we enter the loop,
+  // then we fold away the cond branch to an uncond branch.  This simplifies the
+  // loop in cases important for nested loops, and it also means we don't have
+  // to split as many edges.
+  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+  if (!isa<ConstantInt>(PHBI->getCondition()) ||
+      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+          != NewHeader) {
+    // The conditional branch can't be folded, handle the general case.
+    // Update DominatorTree to reflect the CFG change we just made.  Then split
+    // edges as necessary to preserve LoopSimplify form.
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+      // Since OrigPreheader now has the conditional branch to Exit block, it is
+      // the dominator of Exit.
+      DT->changeImmediateDominator(Exit, OrigPreheader);
+      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      
+      // Update OrigHeader to be dominated by the new header block.
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
+    }
+    
+    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+    // thus is not a preheader anymore.  Split the edge to form a real preheader.
+    BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+    NewPH->setName(NewHeader->getName() + ".lr.ph");
+    
+    // Preserve canonical loop form, which means that 'Exit' should have only one
+    // predecessor.
+    BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+    ExitSplit->moveBefore(Exit);
+  } else {
+    // We can fold the conditional branch in the preheader, this makes things
+    // simpler. The first step is to remove the extra edge to the Exit block.
+    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+    NewBI->setDebugLoc(PHBI->getDebugLoc());
+    PHBI->eraseFromParent();
+    
+    // With our CFG finalized, update DomTree if it is available.
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+      // Update OrigHeader to be dominated by the new header block.
+      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
+    }
+  }
+  
+  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+  // Now that the CFG and DomTree are in a consistent state again, try to merge
+  // the OrigHeader block into OrigLatch.  This will succeed if they are
+  // connected by an unconditional branch.  This is just a cleanup so the
+  // emitted code isn't too gross in this common case.
+  MergeBlockIntoPredecessor(OrigHeader, this);
+  
+  ++NumRotated;
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
new file mode 100644
index 000000000000..509d0264f10b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -0,0 +1,3911 @@
+//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into forms suitable for efficient execution
+// on the target.
+//
+// This pass performs a strength reduction on array references inside loops that
+// have as one or more of their components the loop induction variable, it
+// rewrites expressions to take advantage of scaled-index addressing modes
+// available on the target, and it performs a variety of other optimizations
+// related to loop induction variables.
+//
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+//   %i = phi [ 0, %entry ], [ %i.next, %latch ]
+//   ...
+//   %i.next = add %i, 1
+//   %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+//       instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+//       smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+//       multiple base registers, or as the increment expression in an addrec),
+//       we may not actually need both reg and (-1 * reg) in registers; the
+//       negation can be implemented by using a sub instead of an add. The
+//       lack of support for taking this into consideration when making
+//       register pressure decisions is partly worked around by the "Special"
+//       use kind.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reduce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+  /// UsedByIndices - This represents the set of LSRUse indices which reference
+  /// a particular register.
+  SmallBitVector UsedByIndices;
+
+  RegSortData() {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+  OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+void RegSortData::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+  typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
+
+  RegUsesTy RegUsesMap;
+  SmallVector<const SCEV *, 16> RegSequence;
+
+public:
+  void CountRegister(const SCEV *Reg, size_t LUIdx);
+  void DropRegister(const SCEV *Reg, size_t LUIdx);
+  void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+
+  bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+  const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+  void clear();
+
+  typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+  typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+  iterator begin() { return RegSequence.begin(); }
+  iterator end()   { return RegSequence.end(); }
+  const_iterator begin() const { return RegSequence.begin(); }
+  const_iterator end() const   { return RegSequence.end(); }
+};
+
+}
+
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+  std::pair<RegUsesTy::iterator, bool> Pair =
+    RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
+  RegSortData &RSD = Pair.first->second;
+  if (Pair.second)
+    RegSequence.push_back(Reg);
+  RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+  RSD.UsedByIndices.set(LUIdx);
+}
+
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+  RegUsesTy::iterator It = RegUsesMap.find(Reg);
+  assert(It != RegUsesMap.end());
+  RegSortData &RSD = It->second;
+  assert(RSD.UsedByIndices.size() > LUIdx);
+  RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+  assert(LUIdx <= LastLUIdx);
+
+  // Update RegUses. The data structure is not optimized for this purpose;
+  // we must iterate through it and update each of the bit vectors.
+  for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+       I != E; ++I) {
+    SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+    if (LUIdx < UsedByIndices.size())
+      UsedByIndices[LUIdx] =
+        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+    UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+  }
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  if (I == RegUsesMap.end())
+    return false;
+  const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+  int i = UsedByIndices.find_first();
+  if (i == -1) return false;
+  if ((size_t)i != LUIdx) return true;
+  return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  assert(I != RegUsesMap.end() && "Unknown register!");
+  return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+  RegUsesMap.clear();
+  RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+  /// AM - This is used to represent complex addressing, as well as other kinds
+  /// of interesting uses.
+  TargetLowering::AddrMode AM;
+
+  /// BaseRegs - The list of "base" registers for this use. When this is
+  /// non-empty, AM.HasBaseReg should be set to true.
+  SmallVector<const SCEV *, 2> BaseRegs;
+
+  /// ScaledReg - The 'scaled' register for this use. This should be non-null
+  /// when AM.Scale is not zero.
+  const SCEV *ScaledReg;
+
+  /// UnfoldedOffset - An additional constant offset which added near the
+  /// use. This requires a temporary register, but the offset itself can
+  /// live in an add immediate field rather than a register.
+  int64_t UnfoldedOffset;
+
+  Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+
+  void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+
+  unsigned getNumRegs() const;
+  const Type *getType() const;
+
+  void DeleteBaseReg(const SCEV *&S);
+
+  bool referencesReg(const SCEV *S) const;
+  bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                  const RegUseTracker &RegUses) const;
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recursion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+                           SmallVectorImpl<const SCEV *> &Good,
+                           SmallVectorImpl<const SCEV *> &Bad,
+                           ScalarEvolution &SE) {
+  // Collect expressions which properly dominate the loop header.
+  if (SE.properlyDominates(S, L->getHeader())) {
+    Good.push_back(S);
+    return;
+  }
+
+  // Look at add operands.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      DoInitialMatch(*I, L, Good, Bad, SE);
+    return;
+  }
+
+  // Look at addrec operands.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    if (!AR->getStart()->isZero()) {
+      DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
+      DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+                                      AR->getStepRecurrence(SE),
+                                      // FIXME: AR->getNoWrapFlags()
+                                      AR->getLoop(), SCEV::FlagAnyWrap),
+                     L, Good, Bad, SE);
+      return;
+    }
+
+  // Handle a multiplication by -1 (negation) if it didn't fold.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+    if (Mul->getOperand(0)->isAllOnesValue()) {
+      SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+      const SCEV *NewMul = SE.getMulExpr(Ops);
+
+      SmallVector<const SCEV *, 4> MyGood;
+      SmallVector<const SCEV *, 4> MyBad;
+      DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
+      const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+        SE.getEffectiveSCEVType(NewMul->getType())));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+           E = MyGood.end(); I != E; ++I)
+        Good.push_back(SE.getMulExpr(NegOne, *I));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+           E = MyBad.end(); I != E; ++I)
+        Bad.push_back(SE.getMulExpr(NegOne, *I));
+      return;
+    }
+
+  // Ok, we can't do anything interesting. Just stuff the whole thing into a
+  // register and hope for the best.
+  Bad.push_back(S);
+}
+
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+  SmallVector<const SCEV *, 4> Good;
+  SmallVector<const SCEV *, 4> Bad;
+  DoInitialMatch(S, L, Good, Bad, SE);
+  if (!Good.empty()) {
+    const SCEV *Sum = SE.getAddExpr(Good);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
+    AM.HasBaseReg = true;
+  }
+  if (!Bad.empty()) {
+    const SCEV *Sum = SE.getAddExpr(Bad);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
+    AM.HasBaseReg = true;
+  }
+}
+
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+  return !!ScaledReg + BaseRegs.size();
+}
+
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+  return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+         ScaledReg ? ScaledReg->getType() :
+         AM.BaseGV ? AM.BaseGV->getType() :
+         0;
+}
+
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+  if (&S != &BaseRegs.back())
+    std::swap(S, BaseRegs.back());
+  BaseRegs.pop_back();
+}
+
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+  return S == ScaledReg ||
+         std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
+
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                         const RegUseTracker &RegUses) const {
+  if (ScaledReg)
+    if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+      return true;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I)
+    if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+      return true;
+  return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+  bool First = true;
+  if (AM.BaseGV) {
+    if (!First) OS << " + "; else First = false;
+    WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+  }
+  if (AM.BaseOffs != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.BaseOffs;
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I) {
+    if (!First) OS << " + "; else First = false;
+    OS << "reg(" << **I << ')';
+  }
+  if (AM.HasBaseReg && BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: HasBaseReg**";
+  } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: !HasBaseReg**";
+  }
+  if (AM.Scale != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.Scale << "*reg(";
+    if (ScaledReg)
+      OS << *ScaledReg;
+    else
+      OS << "<unknown>";
+    OS << ')';
+  }
+  if (UnfoldedOffset != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << "imm(" << UnfoldedOffset << ')';
+  }
+}
+
+void Formula::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
+  return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
+  return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given mul can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+  return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero,  or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+                                ScalarEvolution &SE,
+                                bool IgnoreSignificantBits = false) {
+  // Handle the trivial case, which works for any SCEV type.
+  if (LHS == RHS)
+    return SE.getConstant(LHS->getType(), 1);
+
+  // Handle a few RHS special cases.
+  const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+  if (RC) {
+    const APInt &RA = RC->getValue()->getValue();
+    // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+    // some folding.
+    if (RA.isAllOnesValue())
+      return SE.getMulExpr(LHS, RC);
+    // Handle x /s 1 as x.
+    if (RA == 1)
+      return LHS;
+  }
+
+  // Check for a division of a constant by a constant.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+    if (!RC)
+      return 0;
+    const APInt &LA = C->getValue()->getValue();
+    const APInt &RA = RC->getValue()->getValue();
+    if (LA.srem(RA) != 0)
+      return 0;
+    return SE.getConstant(LA.sdiv(RA));
+  }
+
+  // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+    if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+      const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+                                      IgnoreSignificantBits);
+      if (!Step) return 0;
+      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+                                       IgnoreSignificantBits);
+      if (!Start) return 0;
+      // FlagNW is independent of the start value, step direction, and is
+      // preserved with smaller magnitude steps.
+      // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+      return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
+    }
+    return 0;
+  }
+
+  // Distribute the sdiv over add operands, if the add doesn't overflow.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+    if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+      SmallVector<const SCEV *, 8> Ops;
+      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+           I != E; ++I) {
+        const SCEV *Op = getExactSDiv(*I, RHS, SE,
+                                      IgnoreSignificantBits);
+        if (!Op) return 0;
+        Ops.push_back(Op);
+      }
+      return SE.getAddExpr(Ops);
+    }
+    return 0;
+  }
+
+  // Check for a multiply operand that we can pull RHS out of.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
+    if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
+      SmallVector<const SCEV *, 4> Ops;
+      bool Found = false;
+      for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+           I != E; ++I) {
+        const SCEV *S = *I;
+        if (!Found)
+          if (const SCEV *Q = getExactSDiv(S, RHS, SE,
+                                           IgnoreSignificantBits)) {
+            S = Q;
+            Found = true;
+          }
+        Ops.push_back(S);
+      }
+      return Found ? SE.getMulExpr(Ops) : 0;
+    }
+    return 0;
+  }
+
+  // Otherwise we don't know.
+  return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+      S = SE.getConstant(C->getType(), 0);
+      return C->getValue()->getSExtValue();
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    if (Result != 0)
+      S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    if (Result != 0)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                           SCEV::FlagAnyWrap);
+    return Result;
+  }
+  return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+      S = SE.getConstant(GV->getType(), 0);
+      return GV;
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+    if (Result)
+      S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+    if (Result)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                           SCEV::FlagAnyWrap);
+    return Result;
+  }
+  return 0;
+}
+
+/// isAddressUse - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+  bool isAddress = isa<LoadInst>(Inst);
+  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (SI->getOperand(1) == OperandVal)
+      isAddress = true;
+  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // Addressing modes can also be folded into prefetches and a variety
+    // of intrinsics.
+    switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::prefetch:
+      case Intrinsic::x86_sse_storeu_ps:
+      case Intrinsic::x86_sse2_storeu_pd:
+      case Intrinsic::x86_sse2_storeu_dq:
+      case Intrinsic::x86_sse2_storel_dq:
+        if (II->getArgOperand(0) == OperandVal)
+          isAddress = true;
+        break;
+    }
+  }
+  return isAddress;
+}
+
+/// getAccessType - Return the type of the memory being accessed.
+static const Type *getAccessType(const Instruction *Inst) {
+  const Type *AccessTy = Inst->getType();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    AccessTy = SI->getOperand(0)->getType();
+  else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // Addressing modes can also be folded into prefetches and a variety
+    // of intrinsics.
+    switch (II->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::x86_sse_storeu_ps:
+    case Intrinsic::x86_sse2_storeu_pd:
+    case Intrinsic::x86_sse2_storeu_dq:
+    case Intrinsic::x86_sse2_storel_dq:
+      AccessTy = II->getArgOperand(0)->getType();
+      break;
+    }
+  }
+
+  // All pointers have the same requirements, so canonicalize them to an
+  // arbitrary pointer type to minimize variation.
+  if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+    AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                PTy->getAddressSpace());
+
+  return AccessTy;
+}
+
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+  bool Changed = false;
+
+  while (!DeadInsts.empty()) {
+    Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+
+    if (I == 0 || !isInstructionTriviallyDead(I))
+      continue;
+
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+        *OI = 0;
+        if (U->use_empty())
+          DeadInsts.push_back(U);
+      }
+
+    I->eraseFromParent();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+namespace {
+
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+  /// TODO: Some of these could be merged. Also, a lexical ordering
+  /// isn't always optimal.
+  unsigned NumRegs;
+  unsigned AddRecCost;
+  unsigned NumIVMuls;
+  unsigned NumBaseAdds;
+  unsigned ImmCost;
+  unsigned SetupCost;
+
+public:
+  Cost()
+    : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+      SetupCost(0) {}
+
+  bool operator<(const Cost &Other) const;
+
+  void Loose();
+
+  void RateFormula(const Formula &F,
+                   SmallPtrSet<const SCEV *, 16> &Regs,
+                   const DenseSet<const SCEV *> &VisitedRegs,
+                   const Loop *L,
+                   const SmallVectorImpl<int64_t> &Offsets,
+                   ScalarEvolution &SE, DominatorTree &DT);
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+private:
+  void RateRegister(const SCEV *Reg,
+                    SmallPtrSet<const SCEV *, 16> &Regs,
+                    const Loop *L,
+                    ScalarEvolution &SE, DominatorTree &DT);
+  void RatePrimaryRegister(const SCEV *Reg,
+                           SmallPtrSet<const SCEV *, 16> &Regs,
+                           const Loop *L,
+                           ScalarEvolution &SE, DominatorTree &DT);
+};
+
+}
+
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+                        SmallPtrSet<const SCEV *, 16> &Regs,
+                        const Loop *L,
+                        ScalarEvolution &SE, DominatorTree &DT) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+    if (AR->getLoop() == L)
+      AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+    // If this is an addrec for a loop that's already been visited by LSR,
+    // don't second-guess its addrec phi nodes. LSR isn't currently smart
+    // enough to reason about more than one loop at a time. Consider these
+    // registers free and leave them alone.
+    else if (L->contains(AR->getLoop()) ||
+             (!AR->getLoop()->contains(L) &&
+              DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+      for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+           PHINode *PN = dyn_cast<PHINode>(I); ++I)
+        if (SE.isSCEVable(PN->getType()) &&
+            (SE.getEffectiveSCEVType(PN->getType()) ==
+             SE.getEffectiveSCEVType(AR->getType())) &&
+            SE.getSCEV(PN) == AR)
+          return;
+
+      // If this isn't one of the addrecs that the loop already has, it
+      // would require a costly new phi and add. TODO: This isn't
+      // precisely modeled right now.
+      ++NumBaseAdds;
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getStart(), Regs, L, SE, DT);
+    }
+
+    // Add the step value register, if it needs one.
+    // TODO: The non-affine case isn't precisely modeled here.
+    if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+  }
+  ++NumRegs;
+
+  // Rough heuristic; favor registers which don't require extra setup
+  // instructions in the preheader.
+  if (!isa<SCEVUnknown>(Reg) &&
+      !isa<SCEVConstant>(Reg) &&
+      !(isa<SCEVAddRecExpr>(Reg) &&
+        (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+         isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+    ++SetupCost;
+
+    NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+                 SE.hasComputableLoopEvolution(Reg, L);
+}
+
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+                               SmallPtrSet<const SCEV *, 16> &Regs,
+                               const Loop *L,
+                               ScalarEvolution &SE, DominatorTree &DT) {
+  if (Regs.insert(Reg))
+    RateRegister(Reg, Regs, L, SE, DT);
+}
+
+void Cost::RateFormula(const Formula &F,
+                       SmallPtrSet<const SCEV *, 16> &Regs,
+                       const DenseSet<const SCEV *> &VisitedRegs,
+                       const Loop *L,
+                       const SmallVectorImpl<int64_t> &Offsets,
+                       ScalarEvolution &SE, DominatorTree &DT) {
+  // Tally up the registers.
+  if (const SCEV *ScaledReg = F.ScaledReg) {
+    if (VisitedRegs.count(ScaledReg)) {
+      Loose();
+      return;
+    }
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (VisitedRegs.count(BaseReg)) {
+      Loose();
+      return;
+    }
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+  }
+
+  // Determine how many (unfolded) adds we'll need inside the loop.
+  size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+  if (NumBaseParts > 1)
+    NumBaseAdds += NumBaseParts - 1;
+
+  // Tally up the non-zero immediates.
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
+    if (F.AM.BaseGV)
+      ImmCost += 64; // Handle symbolic values conservatively.
+                     // TODO: This should probably be the pointer size.
+    else if (Offset != 0)
+      ImmCost += APInt(64, Offset, true).getMinSignedBits();
+  }
+}
+
+/// Loose - Set this cost to a losing value.
+void Cost::Loose() {
+  NumRegs = ~0u;
+  AddRecCost = ~0u;
+  NumIVMuls = ~0u;
+  NumBaseAdds = ~0u;
+  ImmCost = ~0u;
+  SetupCost = ~0u;
+}
+
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+  if (NumRegs != Other.NumRegs)
+    return NumRegs < Other.NumRegs;
+  if (AddRecCost != Other.AddRecCost)
+    return AddRecCost < Other.AddRecCost;
+  if (NumIVMuls != Other.NumIVMuls)
+    return NumIVMuls < Other.NumIVMuls;
+  if (NumBaseAdds != Other.NumBaseAdds)
+    return NumBaseAdds < Other.NumBaseAdds;
+  if (ImmCost != Other.ImmCost)
+    return ImmCost < Other.ImmCost;
+  if (SetupCost != Other.SetupCost)
+    return SetupCost < Other.SetupCost;
+  return false;
+}
+
+void Cost::print(raw_ostream &OS) const {
+  OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+  if (AddRecCost != 0)
+    OS << ", with addrec cost " << AddRecCost;
+  if (NumIVMuls != 0)
+    OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+  if (NumBaseAdds != 0)
+    OS << ", plus " << NumBaseAdds << " base add"
+       << (NumBaseAdds == 1 ? "" : "s");
+  if (ImmCost != 0)
+    OS << ", plus " << ImmCost << " imm cost";
+  if (SetupCost != 0)
+    OS << ", plus " << SetupCost << " setup cost";
+}
+
+void Cost::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+  /// UserInst - The instruction which will be updated.
+  Instruction *UserInst;
+
+  /// OperandValToReplace - The operand of the instruction which will
+  /// be replaced. The operand may be used more than once; every instance
+  /// will be replaced.
+  Value *OperandValToReplace;
+
+  /// PostIncLoops - If this user is to use the post-incremented value of an
+  /// induction variable, this variable is non-null and holds the loop
+  /// associated with the induction variable.
+  PostIncLoopSet PostIncLoops;
+
+  /// LUIdx - The index of the LSRUse describing the expression which
+  /// this fixup needs, minus an offset (below).
+  size_t LUIdx;
+
+  /// Offset - A constant offset to be added to the LSRUse expression.
+  /// This allows multiple fixups to share the same LSRUse with different
+  /// offsets, for example in an unrolled loop.
+  int64_t Offset;
+
+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
+  LSRFixup();
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+LSRFixup::LSRFixup()
+  : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+  // PHI nodes use their value in their incoming blocks.
+  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == OperandValToReplace &&
+          L->contains(PN->getIncomingBlock(i)))
+        return false;
+    return true;
+  }
+
+  return !L->contains(UserInst);
+}
+
+void LSRFixup::print(raw_ostream &OS) const {
+  OS << "UserInst=";
+  // Store is common and interesting enough to be worth special-casing.
+  if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+    OS << "store ";
+    WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+  } else if (UserInst->getType()->isVoidTy())
+    OS << UserInst->getOpcodeName();
+  else
+    WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+  OS << ", OperandValToReplace=";
+  WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+       E = PostIncLoops.end(); I != E; ++I) {
+    OS << ", PostIncLoop=";
+    WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
+  }
+
+  if (LUIdx != ~size_t(0))
+    OS << ", LUIdx=" << LUIdx;
+
+  if (Offset != 0)
+    OS << ", Offset=" << Offset;
+}
+
+void LSRFixup::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+  static SmallVector<const SCEV *, 2> getEmptyKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-1));
+    return V;
+  }
+
+  static SmallVector<const SCEV *, 2> getTombstoneKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-2));
+    return V;
+  }
+
+  static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+    unsigned Result = 0;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+         E = V.end(); I != E; ++I)
+      Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+    return Result;
+  }
+
+  static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
+                      const SmallVector<const SCEV *, 2> &RHS) {
+    return LHS == RHS;
+  }
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+  DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+  /// KindType - An enum for a kind of use, indicating what types of
+  /// scaled and immediate operands it might support.
+  enum KindType {
+    Basic,   ///< A normal use, with no folding.
+    Special, ///< A special case of basic, allowing -1 scales.
+    Address, ///< An address use; folding according to TargetLowering
+    ICmpZero ///< An equality icmp with both operands folded into one.
+    // TODO: Add a generic icmp too?
+  };
+
+  KindType Kind;
+  const Type *AccessTy;
+
+  SmallVector<int64_t, 8> Offsets;
+  int64_t MinOffset;
+  int64_t MaxOffset;
+
+  /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+  /// LSRUse are outside of the loop, in which case some special-case heuristics
+  /// may be used.
+  bool AllFixupsOutsideLoop;
+
+  /// WidestFixupType - This records the widest use type for any fixup using
+  /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+  /// max fixup widths to be equivalent, because the narrower one may be relying
+  /// on the implicit truncation to truncate away bogus bits.
+  const Type *WidestFixupType;
+
+  /// Formulae - A list of ways to build a value that can satisfy this user.
+  /// After the list is populated, one of these is selected heuristically and
+  /// used to formulate a replacement for OperandValToReplace in UserInst.
+  SmallVector<Formula, 12> Formulae;
+
+  /// Regs - The set of register candidates used by all formulae in this LSRUse.
+  SmallPtrSet<const SCEV *, 4> Regs;
+
+  LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+                                      MinOffset(INT64_MAX),
+                                      MaxOffset(INT64_MIN),
+                                      AllFixupsOutsideLoop(true),
+                                      WidestFixupType(0) {}
+
+  bool HasFormulaWithSameRegs(const Formula &F) const;
+  bool InsertFormula(const Formula &F);
+  void DeleteFormula(Formula &F);
+  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
+  return Uniquifier.count(Key);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(const Formula &F) {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
+
+  if (!Uniquifier.insert(Key).second)
+    return false;
+
+  // Using a register to hold the value of 0 is not profitable.
+  assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+         "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+  for (SmallVectorImpl<const SCEV *>::const_iterator I =
+       F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+    assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
+
+  // Add the formula to the list.
+  Formulae.push_back(F);
+
+  // Record registers now being used by this use.
+  if (F.ScaledReg) Regs.insert(F.ScaledReg);
+  Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+
+  return true;
+}
+
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+  if (&F != &Formulae.back())
+    std::swap(F, Formulae.back());
+  Formulae.pop_back();
+  assert(!Formulae.empty() && "LSRUse has no formulae left!");
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+  // Now that we've filtered out some formulae, recompute the Regs set.
+  SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+  Regs.clear();
+  for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+       E = Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+    if (F.ScaledReg) Regs.insert(F.ScaledReg);
+    Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+  }
+
+  // Update the RegTracker.
+  for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+       E = OldRegs.end(); I != E; ++I)
+    if (!Regs.count(*I))
+      RegUses.DropRegister(*I, LUIdx);
+}
+
+void LSRUse::print(raw_ostream &OS) const {
+  OS << "LSR Use: Kind=";
+  switch (Kind) {
+  case Basic:    OS << "Basic"; break;
+  case Special:  OS << "Special"; break;
+  case ICmpZero: OS << "ICmpZero"; break;
+  case Address:
+    OS << "Address of ";
+    if (AccessTy->isPointerTy())
+      OS << "pointer"; // the full pointer type could be really verbose
+    else
+      OS << *AccessTy;
+  }
+
+  OS << ", Offsets={";
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    OS << *I;
+    if (llvm::next(I) != E)
+      OS << ',';
+  }
+  OS << '}';
+
+  if (AllFixupsOutsideLoop)
+    OS << ", all-fixups-outside-loop";
+
+  if (WidestFixupType)
+    OS << ", widest fixup type: " << *WidestFixupType;
+}
+
+void LSRUse::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetLowering::AddrMode &AM,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  switch (Kind) {
+  case LSRUse::Address:
+    // If we have low-level target information, ask the target if it can
+    // completely fold this address.
+    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+
+    // Otherwise, just guess that reg+reg addressing is legal.
+    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+
+  case LSRUse::ICmpZero:
+    // There's not even a target hook for querying whether it would be legal to
+    // fold a GV into an ICmp.
+    if (AM.BaseGV)
+      return false;
+
+    // ICmp only has two operands; don't allow more than two non-trivial parts.
+    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+      return false;
+
+    // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+    // putting the scaled register in the other operand of the icmp.
+    if (AM.Scale != 0 && AM.Scale != -1)
+      return false;
+
+    // If we have low-level target information, ask the target if it can fold an
+    // integer immediate on an icmp.
+    if (AM.BaseOffs != 0) {
+      if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+      return false;
+    }
+
+    return true;
+
+  case LSRUse::Basic:
+    // Only handle single-register values.
+    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+
+  case LSRUse::Special:
+    // Only handle -1 scales, or no scale.
+    return AM.Scale == 0 || AM.Scale == -1;
+  }
+
+  return false;
+}
+
+static bool isLegalUse(TargetLowering::AddrMode AM,
+                       int64_t MinOffset, int64_t MaxOffset,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  // Check for overflow.
+  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+      (MinOffset > 0))
+    return false;
+  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
+  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
+    // Check for overflow.
+    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
+        (MaxOffset > 0))
+      return false;
+    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
+    return isLegalUse(AM, Kind, AccessTy, TLI);
+  }
+  return false;
+}
+
+static bool isAlwaysFoldable(int64_t BaseOffs,
+                             GlobalValue *BaseGV,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI) {
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  // Canonicalize a scale of 1 to a base register if the formula doesn't
+  // already have a base register.
+  if (!AM.HasBaseReg && AM.Scale == 1) {
+    AM.Scale = 0;
+    AM.HasBaseReg = true;
+  }
+
+  return isLegalUse(AM, Kind, AccessTy, TLI);
+}
+
+static bool isAlwaysFoldable(const SCEV *S,
+                             int64_t MinOffset, int64_t MaxOffset,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI,
+                             ScalarEvolution &SE) {
+  // Fast-path: zero is always foldable.
+  if (S->isZero()) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  int64_t BaseOffs = ExtractImmediate(S, SE);
+  GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+  // If there's anything else involved, it's not foldable.
+  if (!S->isZero()) return false;
+
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+}
+
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+  static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+    return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+  }
+
+  static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+    return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+  }
+
+  static unsigned
+  getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+    unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+    Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+    return Result;
+  }
+
+  static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+                      const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+    return LHS == RHS;
+  }
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+  IVUsers &IU;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+  LoopInfo &LI;
+  const TargetLowering *const TLI;
+  Loop *const L;
+  bool Changed;
+
+  /// IVIncInsertPos - This is the insert position that the current loop's
+  /// induction variable increment should be placed. In simple loops, this is
+  /// the latch block's terminator. But in more complicated cases, this is a
+  /// position which will dominate all the in-loop post-increment users.
+  Instruction *IVIncInsertPos;
+
+  /// Factors - Interesting factors between use strides.
+  SmallSetVector<int64_t, 8> Factors;
+
+  /// Types - Interesting use types, to facilitate truncation reuse.
+  SmallSetVector<const Type *, 4> Types;
+
+  /// Fixups - The list of operands which are to be replaced.
+  SmallVector<LSRFixup, 16> Fixups;
+
+  /// Uses - The list of interesting uses.
+  SmallVector<LSRUse, 16> Uses;
+
+  /// RegUses - Track which uses use which register candidates.
+  RegUseTracker RegUses;
+
+  void OptimizeShadowIV();
+  bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+  ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+  void OptimizeLoopTermCond();
+
+  void CollectInterestingTypesAndFactors();
+  void CollectFixupsAndInitialFormulae();
+
+  LSRFixup &getNewFixup() {
+    Fixups.push_back(LSRFixup());
+    return Fixups.back();
+  }
+
+  // Support for sharing of LSRUses between LSRFixups.
+  typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+                   size_t,
+                   UseMapDenseMapInfo> UseMapTy;
+  UseMapTy UseMap;
+
+  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+                          LSRUse::KindType Kind, const Type *AccessTy);
+
+  std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+                                    LSRUse::KindType Kind,
+                                    const Type *AccessTy);
+
+  void DeleteUse(LSRUse &LU, size_t LUIdx);
+
+  LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
+public:
+  void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+  void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+  void CountRegisters(const Formula &F, size_t LUIdx);
+  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+  void CollectLoopInvariantFixupsAndFormulae();
+
+  void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+                              unsigned Depth = 0);
+  void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateCrossUseConstantOffsets();
+  void GenerateAllReuseFormulae();
+
+  void FilterOutUndesirableDedicatedRegisters();
+
+  size_t EstimateSearchSpaceComplexity() const;
+  void NarrowSearchSpaceByDetectingSupersets();
+  void NarrowSearchSpaceByCollapsingUnrolledCode();
+  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  void NarrowSearchSpaceByPickingWinnerRegs();
+  void NarrowSearchSpaceUsingHeuristics();
+
+  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                    Cost &SolutionCost,
+                    SmallVectorImpl<const Formula *> &Workspace,
+                    const Cost &CurCost,
+                    const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                    DenseSet<const SCEV *> &VisitedRegs) const;
+  void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+  BasicBlock::iterator
+    HoistInsertPosition(BasicBlock::iterator IP,
+                        const SmallVectorImpl<Instruction *> &Inputs) const;
+  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                                     const LSRFixup &LF,
+                                                     const LSRUse &LU) const;
+
+  Value *Expand(const LSRFixup &LF,
+                const Formula &F,
+                BasicBlock::iterator IP,
+                SCEVExpander &Rewriter,
+                SmallVectorImpl<WeakVH> &DeadInsts) const;
+  void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+                     const Formula &F,
+                     SCEVExpander &Rewriter,
+                     SmallVectorImpl<WeakVH> &DeadInsts,
+                     Pass *P) const;
+  void Rewrite(const LSRFixup &LF,
+               const Formula &F,
+               SCEVExpander &Rewriter,
+               SmallVectorImpl<WeakVH> &DeadInsts,
+               Pass *P) const;
+  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                         Pass *P);
+
+  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+
+  bool getChanged() const { return Changed; }
+
+  void print_factors_and_types(raw_ostream &OS) const;
+  void print_fixups(raw_ostream &OS) const;
+  void print_uses(raw_ostream &OS) const;
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast operation.
+void LSRInstance::OptimizeShadowIV() {
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return;
+
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+       UI != E; /* empty */) {
+    IVUsers::const_iterator CandidateUI = UI;
+    ++UI;
+    Instruction *ShadowUse = CandidateUI->getUser();
+    const Type *DestTy = NULL;
+
+    /* If shadow use is a int->float cast then insert a second IV
+       to eliminate this cast.
+
+         for (unsigned i = 0; i < n; ++i)
+           foo((double)i);
+
+       is transformed into
+
+         double d = 0.0;
+         for (unsigned i = 0; i < n; ++i, ++d)
+           foo(d);
+    */
+    if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+      DestTy = UCast->getDestTy();
+    else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+      DestTy = SCast->getDestTy();
+    if (!DestTy) continue;
+
+    if (TLI) {
+      // If target does not support DestTy natively then do not apply
+      // this transformation.
+      EVT DVT = TLI->getValueType(DestTy);
+      if (!TLI->isTypeLegal(DVT)) continue;
+    }
+
+    PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+    if (!PH) continue;
+    if (PH->getNumIncomingValues() != 2) continue;
+
+    const Type *SrcTy = PH->getType();
+    int Mantissa = DestTy->getFPMantissaWidth();
+    if (Mantissa == -1) continue;
+    if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+      continue;
+
+    unsigned Entry, Latch;
+    if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+      Entry = 0;
+      Latch = 1;
+    } else {
+      Entry = 1;
+      Latch = 0;
+    }
+
+    ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+    if (!Init) continue;
+    Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+
+    BinaryOperator *Incr =
+      dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+    if (!Incr) continue;
+    if (Incr->getOpcode() != Instruction::Add
+        && Incr->getOpcode() != Instruction::Sub)
+      continue;
+
+    /* Initialize new IV, double d = 0.0 in above example. */
+    ConstantInt *C = NULL;
+    if (Incr->getOperand(0) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+    else if (Incr->getOperand(1) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+    else
+      continue;
+
+    if (!C) continue;
+
+    // Ignore negative constants, as the code below doesn't handle them
+    // correctly. TODO: Remove this restriction.
+    if (!C->getValue().isStrictlyPositive()) continue;
+
+    /* Add new PHINode. */
+    PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
+
+    /* create new increment. '++d' in above example. */
+    Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+    BinaryOperator *NewIncr =
+      BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+                               Instruction::FAdd : Instruction::FSub,
+                             NewPH, CFP, "IV.S.next.", Incr);
+
+    NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+    NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+
+    /* Remove cast operation */
+    ShadowUse->replaceAllUsesWith(NewPH);
+    ShadowUse->eraseFromParent();
+    Changed = true;
+    break;
+  }
+}
+
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
+  for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+    if (UI->getUser() == Cond) {
+      // NOTE: we could handle setcc instructions with multiple uses here, but
+      // InstCombine does it as well for simple uses, it's not clear that it
+      // occurs enough in real life to handle.
+      CondUse = UI;
+      return true;
+    }
+  return false;
+}
+
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
+///
+/// This is a narrow solution to a specific, but acute, problem. For loops
+/// like this:
+///
+///   i = 0;
+///   do {
+///     p[i] = 0.0;
+///   } while (++i < n);
+///
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
+//
+///   if (n > 0) {
+///     i = 0;
+///     do {
+///       p[i] = 0.0;
+///     } while (++i < n);
+///   }
+///
+/// and then it's possible for subsequent optimization to obscure the if
+/// test in such a way that indvars can't find it.
+///
+/// When indvars can't find the if test in loops like this, it creates a
+/// max expression, which allows it to give the loop a canonical
+/// induction variable:
+///
+///   i = 0;
+///   max = n < 1 ? 1 : n;
+///   do {
+///     p[i] = 0.0;
+///   } while (++i != max);
+///
+/// Canonical induction variables are necessary because the loop passes
+/// are designed around them. The most obvious example of this is the
+/// LoopInfo analysis, which doesn't remember trip count values. It
+/// expects to be able to rediscover the trip count each time it is
+/// needed, and it does this using a simple analysis that only succeeds if
+/// the loop has a canonical induction variable.
+///
+/// However, when it comes time to generate code, the maximum operation
+/// can be quite costly, especially if it's inside of an outer loop.
+///
+/// This function solves this problem by detecting this type of loop and
+/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
+/// the instructions for the maximum computation.
+///
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
+  // Check that the loop matches the pattern we're looking for.
+  if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
+      Cond->getPredicate() != CmpInst::ICMP_NE)
+    return Cond;
+
+  SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
+  if (!Sel || !Sel->hasOneUse()) return Cond;
+
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return Cond;
+  const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
+
+  // Add one to the backedge-taken count to get the trip count.
+  const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
+  if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+  // Check for a max calculation that matches the pattern. There's no check
+  // for ICMP_ULE here because the comparison would be with zero, which
+  // isn't interesting.
+  CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+  const SCEVNAryExpr *Max = 0;
+  if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+    Pred = ICmpInst::ICMP_SLE;
+    Max = S;
+  } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_SLT;
+    Max = S;
+  } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_ULT;
+    Max = U;
+  } else {
+    // No match; bail.
+    return Cond;
+  }
+
+  // To handle a max with more than two operands, this optimization would
+  // require additional checking and setup.
+  if (Max->getNumOperands() != 2)
+    return Cond;
+
+  const SCEV *MaxLHS = Max->getOperand(0);
+  const SCEV *MaxRHS = Max->getOperand(1);
+
+  // ScalarEvolution canonicalizes constants to the left. For < and >, look
+  // for a comparison with 1. For <= and >=, a comparison with zero.
+  if (!MaxLHS ||
+      (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+    return Cond;
+
+  // Check the relevant induction variable for conformance to
+  // the pattern.
+  const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+  if (!AR || !AR->isAffine() ||
+      AR->getStart() != One ||
+      AR->getStepRecurrence(SE) != One)
+    return Cond;
+
+  assert(AR->getLoop() == L &&
+         "Loop condition operand is an addrec in a different loop!");
+
+  // Check the right operand of the select, and remember it, as it will
+  // be used in the new comparison instruction.
+  Value *NewRHS = 0;
+  if (ICmpInst::isTrueWhenEqual(Pred)) {
+    // Look for n+1, and grab n.
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (!NewRHS)
+      return Cond;
+  } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+    NewRHS = Sel->getOperand(1);
+  else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
+    NewRHS = Sel->getOperand(2);
+  else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+    NewRHS = SU->getValue();
+  else
+    // Max doesn't match expected pattern.
+    return Cond;
+
+  // Determine the new comparison opcode. It may be signed or unsigned,
+  // and the original comparison may be either equality or inequality.
+  if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+    Pred = CmpInst::getInversePredicate(Pred);
+
+  // Ok, everything looks ok to change the condition into an SLT or SGE and
+  // delete the max calculation.
+  ICmpInst *NewCond =
+    new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
+
+  // Delete the max calculation instructions.
+  Cond->replaceAllUsesWith(NewCond);
+  CondUse->setUser(NewCond);
+  Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+  Cond->eraseFromParent();
+  Sel->eraseFromParent();
+  if (Cmp->use_empty())
+    Cmp->eraseFromParent();
+  return NewCond;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+void
+LSRInstance::OptimizeLoopTermCond() {
+  SmallPtrSet<Instruction *, 4> PostIncs;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitingBlock = ExitingBlocks[i];
+
+    // Get the terminating condition for the loop if possible.  If we
+    // can, we want to change it to use a post-incremented version of its
+    // induction variable, to allow coalescing the live ranges for the IV into
+    // one register value.
+
+    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+    if (!TermBr)
+      continue;
+    // FIXME: Overly conservative, termination condition could be an 'or' etc..
+    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+      continue;
+
+    // Search IVUsesByStride to find Cond's IVUse if there is one.
+    IVStrideUse *CondUse = 0;
+    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+    if (!FindIVUserForCond(Cond, CondUse))
+      continue;
+
+    // If the trip count is computed in terms of a max (due to ScalarEvolution
+    // being unable to find a sufficient guard, for example), change the loop
+    // comparison to use SLT or ULT instead of NE.
+    // One consequence of doing this now is that it disrupts the count-down
+    // optimization. That's not always a bad thing though, because in such
+    // cases it may still be worthwhile to avoid a max.
+    Cond = OptimizeMax(Cond, CondUse);
+
+    // If this exiting block dominates the latch block, it may also use
+    // the post-inc value if it won't be shared with other uses.
+    // Check for dominance.
+    if (!DT.dominates(ExitingBlock, LatchBlock))
+      continue;
+
+    // Conservatively avoid trying to use the post-inc value in non-latch
+    // exits if there may be pre-inc users in intervening blocks.
+    if (LatchBlock != ExitingBlock)
+      for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+        // Test if the use is reachable from the exiting block. This dominator
+        // query is a conservative approximation of reachability.
+        if (&*UI != CondUse &&
+            !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+          // Conservatively assume there may be reuse if the quotient of their
+          // strides could be a legal scale.
+          const SCEV *A = IU.getStride(*CondUse, L);
+          const SCEV *B = IU.getStride(*UI, L);
+          if (!A || !B) continue;
+          if (SE.getTypeSizeInBits(A->getType()) !=
+              SE.getTypeSizeInBits(B->getType())) {
+            if (SE.getTypeSizeInBits(A->getType()) >
+                SE.getTypeSizeInBits(B->getType()))
+              B = SE.getSignExtendExpr(B, A->getType());
+            else
+              A = SE.getSignExtendExpr(A, B->getType());
+          }
+          if (const SCEVConstant *D =
+                dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+            const ConstantInt *C = D->getValue();
+            // Stride of one or negative one can have reuse with non-addresses.
+            if (C->isOne() || C->isAllOnesValue())
+              goto decline_post_inc;
+            // Avoid weird situations.
+            if (C->getValue().getMinSignedBits() >= 64 ||
+                C->getValue().isMinSignedValue())
+              goto decline_post_inc;
+            // Without TLI, assume that any stride might be valid, and so any
+            // use might be shared.
+            if (!TLI)
+              goto decline_post_inc;
+            // Check for possible scaled-address reuse.
+            const Type *AccessTy = getAccessType(UI->getUser());
+            TargetLowering::AddrMode AM;
+            AM.Scale = C->getSExtValue();
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+            AM.Scale = -AM.Scale;
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+          }
+        }
+
+    DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
+                 << *Cond << '\n');
+
+    // It's possible for the setcc instruction to be anywhere in the loop, and
+    // possible for it to have multiple users.  If it is not immediately before
+    // the exiting block branch, move it.
+    if (&*++BasicBlock::iterator(Cond) != TermBr) {
+      if (Cond->hasOneUse()) {
+        Cond->moveBefore(TermBr);
+      } else {
+        // Clone the terminating condition and insert into the loopend.
+        ICmpInst *OldCond = Cond;
+        Cond = cast<ICmpInst>(Cond->clone());
+        Cond->setName(L->getHeader()->getName() + ".termcond");
+        ExitingBlock->getInstList().insert(TermBr, Cond);
+
+        // Clone the IVUse, as the old use still exists!
+        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
+        TermBr->replaceUsesOfWith(OldCond, Cond);
+      }
+    }
+
+    // If we get to here, we know that we can transform the setcc instruction to
+    // use the post-incremented version of the IV, allowing us to coalesce the
+    // live ranges for the IV correctly.
+    CondUse->transformToPostInc(L);
+    Changed = true;
+
+    PostIncs.insert(Cond);
+  decline_post_inc:;
+  }
+
+  // Determine an insertion point for the loop induction variable increment. It
+  // must dominate all the post-inc comparisons we just set up, and it must
+  // dominate the loop latch edge.
+  IVIncInsertPos = L->getLoopLatch()->getTerminator();
+  for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+       E = PostIncs.end(); I != E; ++I) {
+    BasicBlock *BB =
+      DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+                                    (*I)->getParent());
+    if (BB == (*I)->getParent())
+      IVIncInsertPos = *I;
+    else if (BB != IVIncInsertPos->getParent())
+      IVIncInsertPos = BB->getTerminator();
+  }
+}
+
+/// reconcileNewOffset - Determine if the given use can accommodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+                                LSRUse::KindType Kind, const Type *AccessTy) {
+  int64_t NewMinOffset = LU.MinOffset;
+  int64_t NewMaxOffset = LU.MaxOffset;
+  const Type *NewAccessTy = AccessTy;
+
+  // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+  // something conservative, however this can pessimize in the case that one of
+  // the uses will have all its uses outside the loop, for example.
+  if (LU.Kind != Kind)
+    return false;
+  // Conservatively assume HasBaseReg is true for now.
+  if (NewOffset < LU.MinOffset) {
+    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
+                          Kind, AccessTy, TLI))
+      return false;
+    NewMinOffset = NewOffset;
+  } else if (NewOffset > LU.MaxOffset) {
+    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
+                          Kind, AccessTy, TLI))
+      return false;
+    NewMaxOffset = NewOffset;
+  }
+  // Check for a mismatched access type, and fall back conservatively as needed.
+  // TODO: Be less conservative when the type is similar and can use the same
+  // addressing modes.
+  if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+    NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+  // Update the use.
+  LU.MinOffset = NewMinOffset;
+  LU.MaxOffset = NewMaxOffset;
+  LU.AccessTy = NewAccessTy;
+  if (NewOffset != LU.Offsets.back())
+    LU.Offsets.push_back(NewOffset);
+  return true;
+}
+
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an existing use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+                    LSRUse::KindType Kind, const Type *AccessTy) {
+  const SCEV *Copy = Expr;
+  int64_t Offset = ExtractImmediate(Expr, SE);
+
+  // Basic uses can't accept any offset, for example.
+  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+    Expr = Copy;
+    Offset = 0;
+  }
+
+  std::pair<UseMapTy::iterator, bool> P =
+    UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
+  if (!P.second) {
+    // A use already existed with this base.
+    size_t LUIdx = P.first->second;
+    LSRUse &LU = Uses[LUIdx];
+    if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
+      // Reuse this use.
+      return std::make_pair(LUIdx, Offset);
+  }
+
+  // Create a new use.
+  size_t LUIdx = Uses.size();
+  P.first->second = LUIdx;
+  Uses.push_back(LSRUse(Kind, AccessTy));
+  LSRUse &LU = Uses[LUIdx];
+
+  // We don't need to track redundant offsets, but we don't need to go out
+  // of our way here to avoid them.
+  if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+    LU.Offsets.push_back(Offset);
+
+  LU.MinOffset = Offset;
+  LU.MaxOffset = Offset;
+  return std::make_pair(LUIdx, Offset);
+}
+
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
+  if (&LU != &Uses.back())
+    std::swap(LU, Uses.back());
+  Uses.pop_back();
+
+  // Update RegUses.
+  RegUses.SwapAndDropUse(LUIdx, Uses.size());
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+                                       const LSRUse &OrigLU) {
+  // Search all uses for the formula. This could be more clever.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    // Check whether this use is close enough to OrigLU, to see whether it's
+    // worthwhile looking through its formulae.
+    // Ignore ICmpZero uses because they may contain formulae generated by
+    // GenerateICmpZeroScales, in which case adding fixup offsets may
+    // be invalid.
+    if (&LU != &OrigLU &&
+        LU.Kind != LSRUse::ICmpZero &&
+        LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+        LU.WidestFixupType == OrigLU.WidestFixupType &&
+        LU.HasFormulaWithSameRegs(OrigF)) {
+      // Scan through this use's formulae.
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        // Check to see if this formula has the same registers and symbols
+        // as OrigF.
+        if (F.BaseRegs == OrigF.BaseRegs &&
+            F.ScaledReg == OrigF.ScaledReg &&
+            F.AM.BaseGV == OrigF.AM.BaseGV &&
+            F.AM.Scale == OrigF.AM.Scale &&
+            F.UnfoldedOffset == OrigF.UnfoldedOffset) {
+          if (F.AM.BaseOffs == 0)
+            return &LU;
+          // This is the formula where all the registers and symbols matched;
+          // there aren't going to be any others. Since we declined it, we
+          // can skip the rest of the formulae and procede to the next LSRUse.
+          break;
+        }
+      }
+    }
+  }
+
+  // Nothing looked good.
+  return 0;
+}
+
+void LSRInstance::CollectInterestingTypesAndFactors() {
+  SmallSetVector<const SCEV *, 4> Strides;
+
+  // Collect interesting types and strides.
+  SmallVector<const SCEV *, 4> Worklist;
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    const SCEV *Expr = IU.getExpr(*UI);
+
+    // Collect interesting types.
+    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+    // Add strides for mentioned loops.
+    Worklist.push_back(Expr);
+    do {
+      const SCEV *S = Worklist.pop_back_val();
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+        Strides.insert(AR->getStepRecurrence(SE));
+        Worklist.push_back(AR->getStart());
+      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+        Worklist.append(Add->op_begin(), Add->op_end());
+      }
+    } while (!Worklist.empty());
+  }
+
+  // Compute interesting factors from the set of interesting strides.
+  for (SmallSetVector<const SCEV *, 4>::const_iterator
+       I = Strides.begin(), E = Strides.end(); I != E; ++I)
+    for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+         llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
+      const SCEV *OldStride = *I;
+      const SCEV *NewStride = *NewStrideIter;
+
+      if (SE.getTypeSizeInBits(OldStride->getType()) !=
+          SE.getTypeSizeInBits(NewStride->getType())) {
+        if (SE.getTypeSizeInBits(OldStride->getType()) >
+            SE.getTypeSizeInBits(NewStride->getType()))
+          NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+        else
+          OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+      }
+      if (const SCEVConstant *Factor =
+            dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+                                                        SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      } else if (const SCEVConstant *Factor =
+                   dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+                                                               NewStride,
+                                                               SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      }
+    }
+
+  // If all uses use the same type, don't bother looking for truncation-based
+  // reuse.
+  if (Types.size() == 1)
+    Types.clear();
+
+  DEBUG(print_factors_and_types(dbgs()));
+}
+
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    // Record the uses.
+    LSRFixup &LF = getNewFixup();
+    LF.UserInst = UI->getUser();
+    LF.OperandValToReplace = UI->getOperandValToReplace();
+    LF.PostIncLoops = UI->getPostIncLoops();
+
+    LSRUse::KindType Kind = LSRUse::Basic;
+    const Type *AccessTy = 0;
+    if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+      Kind = LSRUse::Address;
+      AccessTy = getAccessType(LF.UserInst);
+    }
+
+    const SCEV *S = IU.getExpr(*UI);
+
+    // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+    // (N - i == 0), and this allows (N - i) to be the expression that we work
+    // with rather than just N or i, so we can consider the register
+    // requirements for both N and i at the same time. Limiting this code to
+    // equality icmps is not a problem because all interesting loops use
+    // equality icmps, thanks to IndVarSimplify.
+    if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+      if (CI->isEquality()) {
+        // Swap the operands if needed to put the OperandValToReplace on the
+        // left, for consistency.
+        Value *NV = CI->getOperand(1);
+        if (NV == LF.OperandValToReplace) {
+          CI->setOperand(1, CI->getOperand(0));
+          CI->setOperand(0, NV);
+          NV = CI->getOperand(1);
+          Changed = true;
+        }
+
+        // x == y  -->  x - y == 0
+        const SCEV *N = SE.getSCEV(NV);
+        if (SE.isLoopInvariant(N, L)) {
+          // S is normalized, so normalize N before folding it into S
+          // to keep the result normalized.
+          N = TransformForPostIncUse(Normalize, N, CI, 0,
+                                     LF.PostIncLoops, SE, DT);
+          Kind = LSRUse::ICmpZero;
+          S = SE.getMinusSCEV(N, S);
+        }
+
+        // -1 and the negations of all interesting strides (except the negation
+        // of -1) are now also interesting.
+        for (size_t i = 0, e = Factors.size(); i != e; ++i)
+          if (Factors[i] != -1)
+            Factors.insert(-(uint64_t)Factors[i]);
+        Factors.insert(-1);
+      }
+
+    // Set up the initial formula for this use.
+    std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+    LF.LUIdx = P.first;
+    LF.Offset = P.second;
+    LSRUse &LU = Uses[LF.LUIdx];
+    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+    if (!LU.WidestFixupType ||
+        SE.getTypeSizeInBits(LU.WidestFixupType) <
+        SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+      LU.WidestFixupType = LF.OperandValToReplace->getType();
+
+    // If this is the first use of this LSRUse, give it a formula.
+    if (LU.Formulae.empty()) {
+      InsertInitialFormula(S, LU, LF.LUIdx);
+      CountRegisters(LU.Formulae.back(), LF.LUIdx);
+    }
+  }
+
+  DEBUG(print_fixups(dbgs()));
+}
+
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.InitialMatch(S, L, SE);
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+                                       LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.BaseRegs.push_back(S);
+  F.AM.HasBaseReg = true;
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+  if (F.ScaledReg)
+    RegUses.CountRegister(F.ScaledReg, LUIdx);
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I)
+    RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+  if (!LU.InsertFormula(F))
+    return false;
+
+  CountRegisters(F, LUIdx);
+  return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+  SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+  SmallPtrSet<const SCEV *, 8> Inserted;
+
+  while (!Worklist.empty()) {
+    const SCEV *S = Worklist.pop_back_val();
+
+    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+      Worklist.append(N->op_begin(), N->op_end());
+    else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+      Worklist.push_back(C->getOperand());
+    else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+      Worklist.push_back(D->getLHS());
+      Worklist.push_back(D->getRHS());
+    } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+      if (!Inserted.insert(U)) continue;
+      const Value *V = U->getValue();
+      if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+        // Look for instructions defined outside the loop.
+        if (L->contains(Inst)) continue;
+      } else if (isa<UndefValue>(V))
+        // Undef doesn't have a live range, so it doesn't matter.
+        continue;
+      for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+           UI != UE; ++UI) {
+        const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+        // Ignore non-instructions.
+        if (!UserInst)
+          continue;
+        // Ignore instructions in other functions (as can happen with
+        // Constants).
+        if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+          continue;
+        // Ignore instructions not dominated by the loop.
+        const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+          UserInst->getParent() :
+          cast<PHINode>(UserInst)->getIncomingBlock(
+            PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+        if (!DT.dominates(L->getHeader(), UseBB))
+          continue;
+        // Ignore uses which are part of other SCEV expressions, to avoid
+        // analyzing them multiple times.
+        if (SE.isSCEVable(UserInst->getType())) {
+          const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
+          // If the user is a no-op, look through to its uses.
+          if (!isa<SCEVUnknown>(UserS))
+            continue;
+          if (UserS == U) {
+            Worklist.push_back(
+              SE.getUnknown(const_cast<Instruction *>(UserInst)));
+            continue;
+          }
+        }
+        // Ignore icmp instructions which are already being analyzed.
+        if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+          unsigned OtherIdx = !UI.getOperandNo();
+          Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+          if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
+            continue;
+        }
+
+        LSRFixup &LF = getNewFixup();
+        LF.UserInst = const_cast<Instruction *>(UserInst);
+        LF.OperandValToReplace = UI.getUse();
+        std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+        LF.LUIdx = P.first;
+        LF.Offset = P.second;
+        LSRUse &LU = Uses[LF.LUIdx];
+        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+        if (!LU.WidestFixupType ||
+            SE.getTypeSizeInBits(LU.WidestFixupType) <
+            SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+          LU.WidestFixupType = LF.OperandValToReplace->getType();
+        InsertSupplementalFormula(U, LU, LF.LUIdx);
+        CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+        break;
+      }
+    }
+  }
+}
+
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+                            SmallVectorImpl<const SCEV *> &Ops,
+                            const Loop *L,
+                            ScalarEvolution &SE) {
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    // Break out add operands.
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      CollectSubexprs(*I, C, Ops, L, SE);
+    return;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Split a non-zero base out of an addrec.
+    if (!AR->getStart()->isZero()) {
+      CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+                                       AR->getStepRecurrence(SE),
+                                       AR->getLoop(),
+                                       //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                                       SCEV::FlagAnyWrap),
+                      C, Ops, L, SE);
+      CollectSubexprs(AR->getStart(), C, Ops, L, SE);
+      return;
+    }
+  } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    // Break (C * (a + b + c)) into C*a + C*b + C*c.
+    if (Mul->getNumOperands() == 2)
+      if (const SCEVConstant *Op0 =
+            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+        CollectSubexprs(Mul->getOperand(1),
+                        C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
+                        Ops, L, SE);
+        return;
+      }
+  }
+
+  // Otherwise use the value itself, optionally with a scale applied.
+  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+}
+
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base,
+                                         unsigned Depth) {
+  // Arbitrarily cap recursion to protect compile time.
+  if (Depth >= 3) return;
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *BaseReg = Base.BaseRegs[i];
+
+    SmallVector<const SCEV *, 8> AddOps;
+    CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+
+    if (AddOps.size() == 1) continue;
+
+    for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+         JE = AddOps.end(); J != JE; ++J) {
+
+      // Loop-variant "unknown" values are uninteresting; we won't be able to
+      // do anything meaningful with them.
+      if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+        continue;
+
+      // Don't pull a constant into a register if the constant could be folded
+      // into an immediate field.
+      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
+
+      // Collect all operands except *J.
+      SmallVector<const SCEV *, 8> InnerAddOps
+        (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+      InnerAddOps.append
+        (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+      // Don't leave just a constant behind in a register if the constant could
+      // be folded into an immediate field.
+      if (InnerAddOps.size() == 1 &&
+          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
+
+      const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+      if (InnerSum->isZero())
+        continue;
+      Formula F = Base;
+
+      // Add the remaining pieces of the add back into the new formula.
+      const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+      if (TLI && InnerSumSC &&
+          SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                   InnerSumSC->getValue()->getZExtValue())) {
+        F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+                           InnerSumSC->getValue()->getZExtValue();
+        F.BaseRegs.erase(F.BaseRegs.begin() + i);
+      } else
+        F.BaseRegs[i] = InnerSum;
+
+      // Add J as its own register, or an unfolded immediate.
+      const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+      if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                   SC->getValue()->getZExtValue()))
+        F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+                           SC->getValue()->getZExtValue();
+      else
+        F.BaseRegs.push_back(*J);
+
+      if (InsertFormula(LU, LUIdx, F))
+        // If that formula hadn't been seen before, recurse to find more like
+        // it.
+        GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+    }
+  }
+}
+
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+                                       Formula Base) {
+  // This method is only interesting on a plurality of registers.
+  if (Base.BaseRegs.size() <= 1) return;
+
+  Formula F = Base;
+  F.BaseRegs.clear();
+  SmallVector<const SCEV *, 4> Ops;
+  for (SmallVectorImpl<const SCEV *>::const_iterator
+       I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+        !SE.hasComputableLoopEvolution(BaseReg, L))
+      Ops.push_back(BaseReg);
+    else
+      F.BaseRegs.push_back(BaseReg);
+  }
+  if (Ops.size() > 1) {
+    const SCEV *Sum = SE.getAddExpr(Ops);
+    // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+    // opportunity to fold something. For now, just ignore such cases
+    // rather than proceed with zero in a register.
+    if (!Sum->isZero()) {
+      F.BaseRegs.push_back(Sum);
+      (void)InsertFormula(LU, LUIdx, F);
+    }
+  }
+}
+
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // We can't add a symbolic offset if the address already contains one.
+  if (Base.AM.BaseGV) return;
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+    GlobalValue *GV = ExtractSymbol(G, SE);
+    if (G->isZero() || !GV)
+      continue;
+    Formula F = Base;
+    F.AM.BaseGV = GV;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
+}
+
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // TODO: For now, just add the min and max offset, because it usually isn't
+  // worthwhile looking at everything inbetween.
+  SmallVector<int64_t, 2> Worklist;
+  Worklist.push_back(LU.MinOffset);
+  if (LU.MaxOffset != LU.MinOffset)
+    Worklist.push_back(LU.MaxOffset);
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+
+    for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+         E = Worklist.end(); I != E; ++I) {
+      Formula F = Base;
+      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
+      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
+                     LU.Kind, LU.AccessTy, TLI)) {
+        // Add the offset to the base register.
+        const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+        // If it cancelled out, drop the base register, otherwise update it.
+        if (NewG->isZero()) {
+          std::swap(F.BaseRegs[i], F.BaseRegs.back());
+          F.BaseRegs.pop_back();
+        } else
+          F.BaseRegs[i] = NewG;
+
+        (void)InsertFormula(LU, LUIdx, F);
+      }
+    }
+
+    int64_t Imm = ExtractImmediate(G, SE);
+    if (G->isZero() || Imm == 0)
+      continue;
+    Formula F = Base;
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
+}
+
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base) {
+  if (LU.Kind != LSRUse::ICmpZero) return;
+
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+  if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+  // Don't do this if there is more than one offset.
+  if (LU.MinOffset != LU.MaxOffset) return;
+
+  assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+
+    // Check that the multiplication doesn't overflow.
+    if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+      continue;
+    int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+    if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+      continue;
+
+    // Check that multiplying with the use offset doesn't overflow.
+    int64_t Offset = LU.MinOffset;
+    if (Offset == INT64_MIN && Factor == -1)
+      continue;
+    Offset = (uint64_t)Offset * Factor;
+    if (Offset / Factor != LU.MinOffset)
+      continue;
+
+    Formula F = Base;
+    F.AM.BaseOffs = NewBaseOffs;
+
+    // Check that this scale is legal.
+    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+      continue;
+
+    // Compensate for the use having MinOffset built into it.
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+
+    const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+
+    // Check that multiplying with each base register doesn't overflow.
+    for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+      F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+      if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+        goto next;
+    }
+
+    // Check that multiplying with the scaled register doesn't overflow.
+    if (F.ScaledReg) {
+      F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+      if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+        continue;
+    }
+
+    // Check that multiplying with the unfolded offset doesn't overflow.
+    if (F.UnfoldedOffset != 0) {
+      if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
+        continue;
+      F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
+      if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
+        continue;
+    }
+
+    // If we make it here and it's legal, add it.
+    (void)InsertFormula(LU, LUIdx, F);
+  next:;
+  }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+
+  // If this Formula already has a scaled register, we can't add another one.
+  if (Base.AM.Scale != 0) return;
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+
+    Base.AM.Scale = Factor;
+    Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+    // Check whether this scale is going to be legal.
+    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI)) {
+      // As a special-case, handle special out-of-loop Basic users specially.
+      // TODO: Reconsider this special case.
+      if (LU.Kind == LSRUse::Basic &&
+          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                     LSRUse::Special, LU.AccessTy, TLI) &&
+          LU.AllFixupsOutsideLoop)
+        LU.Kind = LSRUse::Special;
+      else
+        continue;
+    }
+    // For an ICmpZero, negating a solitary base register won't lead to
+    // new solutions.
+    if (LU.Kind == LSRUse::ICmpZero &&
+        !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+      continue;
+    // For each addrec base reg, apply the scale, if possible.
+    for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+      if (const SCEVAddRecExpr *AR =
+            dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+        const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+        if (FactorS->isZero())
+          continue;
+        // Divide out the factor, ignoring high bits, since we'll be
+        // scaling the value back up in the end.
+        if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
+          // TODO: This could be optimized to avoid all the copying.
+          Formula F = Base;
+          F.ScaledReg = Quotient;
+          F.DeleteBaseReg(F.BaseRegs[i]);
+          (void)InsertFormula(LU, LUIdx, F);
+        }
+      }
+  }
+}
+
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
+  // This requires TargetLowering to tell us which truncates are free.
+  if (!TLI) return;
+
+  // Don't bother truncating symbolic values.
+  if (Base.AM.BaseGV) return;
+
+  // Determine the integer type for the base formula.
+  const Type *DstTy = Base.getType();
+  if (!DstTy) return;
+  DstTy = SE.getEffectiveSCEVType(DstTy);
+
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    const Type *SrcTy = *I;
+    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+      Formula F = Base;
+
+      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+      for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J)
+        *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+      // TODO: This assumes we've done basic processing on all uses and
+      // have an idea what the register usage is.
+      if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+        continue;
+
+      (void)InsertFormula(LU, LUIdx, F);
+    }
+  }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+  size_t LUIdx;
+  int64_t Imm;
+  const SCEV *OrigReg;
+
+  WorkItem(size_t LI, int64_t I, const SCEV *R)
+    : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+  OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+     << " , add offset " << Imm;
+}
+
+void WorkItem::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+  // Group the registers by their value without any added constant offset.
+  typedef std::map<int64_t, const SCEV *> ImmMapTy;
+  typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+  RegMapTy Map;
+  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+  SmallVector<const SCEV *, 8> Sequence;
+  for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+       I != E; ++I) {
+    const SCEV *Reg = *I;
+    int64_t Imm = ExtractImmediate(Reg, SE);
+    std::pair<RegMapTy::iterator, bool> Pair =
+      Map.insert(std::make_pair(Reg, ImmMapTy()));
+    if (Pair.second)
+      Sequence.push_back(Reg);
+    Pair.first->second.insert(std::make_pair(Imm, *I));
+    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+  }
+
+  // Now examine each set of registers with the same base value. Build up
+  // a list of work to do and do the work in a separate step so that we're
+  // not adding formulae and register counts while we're searching.
+  SmallVector<WorkItem, 32> WorkItems;
+  SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+       E = Sequence.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    const ImmMapTy &Imms = Map.find(Reg)->second;
+
+    // It's not worthwhile looking for reuse if there's only one offset.
+    if (Imms.size() == 1)
+      continue;
+
+    DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+          for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+               J != JE; ++J)
+            dbgs() << ' ' << J->first;
+          dbgs() << '\n');
+
+    // Examine each offset.
+    for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+         J != JE; ++J) {
+      const SCEV *OrigReg = J->second;
+
+      int64_t JImm = J->first;
+      const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+      if (!isa<SCEVConstant>(OrigReg) &&
+          UsedByIndicesMap[Reg].count() == 1) {
+        DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+        continue;
+      }
+
+      // Conservatively examine offsets between this orig reg a few selected
+      // other orig regs.
+      ImmMapTy::const_iterator OtherImms[] = {
+        Imms.begin(), prior(Imms.end()),
+        Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+      };
+      for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+        ImmMapTy::const_iterator M = OtherImms[i];
+        if (M == J || M == JE) continue;
+
+        // Compute the difference between the two.
+        int64_t Imm = (uint64_t)JImm - M->first;
+        for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+             LUIdx = UsedByIndices.find_next(LUIdx))
+          // Make a memo of this use, offset, and register tuple.
+          if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+            WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
+      }
+    }
+  }
+
+  Map.clear();
+  Sequence.clear();
+  UsedByIndicesMap.clear();
+  UniqueItems.clear();
+
+  // Now iterate through the worklist and add new formulae.
+  for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+       E = WorkItems.end(); I != E; ++I) {
+    const WorkItem &WI = *I;
+    size_t LUIdx = WI.LUIdx;
+    LSRUse &LU = Uses[LUIdx];
+    int64_t Imm = WI.Imm;
+    const SCEV *OrigReg = WI.OrigReg;
+
+    const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+    const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+    unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+    // TODO: Use a more targeted data structure.
+    for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+      const Formula &F = LU.Formulae[L];
+      // Use the immediate in the scaled register.
+      if (F.ScaledReg == OrigReg) {
+        int64_t Offs = (uint64_t)F.AM.BaseOffs +
+                       Imm * (uint64_t)F.AM.Scale;
+        // Don't create 50 + reg(-50).
+        if (F.referencesReg(SE.getSCEV(
+                   ConstantInt::get(IntTy, -(uint64_t)Offs))))
+          continue;
+        Formula NewF = F;
+        NewF.AM.BaseOffs = Offs;
+        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                        LU.Kind, LU.AccessTy, TLI))
+          continue;
+        NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+        // If the new scale is a constant in a register, and adding the constant
+        // value to the immediate would produce a value closer to zero than the
+        // immediate itself, then the formula isn't worthwhile.
+        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+          if (C->getValue()->isNegative() !=
+                (NewF.AM.BaseOffs < 0) &&
+              (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
+                .ule(abs64(NewF.AM.BaseOffs)))
+            continue;
+
+        // OK, looks good.
+        (void)InsertFormula(LU, LUIdx, NewF);
+      } else {
+        // Use the immediate in a base register.
+        for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+          const SCEV *BaseReg = F.BaseRegs[N];
+          if (BaseReg != OrigReg)
+            continue;
+          Formula NewF = F;
+          NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
+          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI)) {
+            if (!TLI ||
+                !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+              continue;
+            NewF = F;
+            NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
+          }
+          NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+          // If the new formula has a constant in a register, and adding the
+          // constant value to the immediate would produce a value closer to
+          // zero than the immediate itself, then the formula isn't worthwhile.
+          for (SmallVectorImpl<const SCEV *>::const_iterator
+               J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+               J != JE; ++J)
+            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+              if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
+                   abs64(NewF.AM.BaseOffs)) &&
+                  (C->getValue()->getValue() +
+                   NewF.AM.BaseOffs).countTrailingZeros() >=
+                   CountTrailingZeros_64(NewF.AM.BaseOffs))
+                goto skip_formula;
+
+          // Ok, looks good.
+          (void)InsertFormula(LU, LUIdx, NewF);
+          break;
+        skip_formula:;
+        }
+      }
+    }
+  }
+}
+
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+  // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+  // queries are more precise.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateScales(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+  }
+
+  GenerateCrossUseConstantOffsets();
+
+  DEBUG(dbgs() << "\n"
+                  "After generating reuse formulae:\n";
+        print_uses(dbgs()));
+}
+
+/// If there are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+  DenseSet<const SCEV *> VisitedRegs;
+  SmallPtrSet<const SCEV *, 16> Regs;
+#ifndef NDEBUG
+  bool ChangedFormulae = false;
+#endif
+
+  // Collect the best formula for each unique set of shared registers. This
+  // is reset for each use.
+  typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+    BestFormulaeTy;
+  BestFormulaeTy BestFormulae;
+
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+
+    bool Any = false;
+    for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+         FIdx != NumForms; ++FIdx) {
+      Formula &F = LU.Formulae[FIdx];
+
+      SmallVector<const SCEV *, 2> Key;
+      for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J) {
+        const SCEV *Reg = *J;
+        if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+          Key.push_back(Reg);
+      }
+      if (F.ScaledReg &&
+          RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+        Key.push_back(F.ScaledReg);
+      // Unstable sort by host order ok, because this is only used for
+      // uniquifying.
+      std::sort(Key.begin(), Key.end());
+
+      std::pair<BestFormulaeTy::const_iterator, bool> P =
+        BestFormulae.insert(std::make_pair(Key, FIdx));
+      if (!P.second) {
+        Formula &Best = LU.Formulae[P.first->second];
+
+        Cost CostF;
+        CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        Cost CostBest;
+        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        if (CostF < CostBest)
+          std::swap(F, Best);
+        DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
+              dbgs() << "\n"
+                        "    in favor of formula "; Best.print(dbgs());
+              dbgs() << '\n');
+#ifndef NDEBUG
+        ChangedFormulae = true;
+#endif
+        LU.DeleteFormula(F);
+        --FIdx;
+        --NumForms;
+        Any = true;
+        continue;
+      }
+    }
+
+    // Now that we've filtered out some formulae, recompute the Regs set.
+    if (Any)
+      LU.RecomputeRegs(LUIdx, RegUses);
+
+    // Reset this to prepare for the next use.
+    BestFormulae.clear();
+  }
+
+  DEBUG(if (ChangedFormulae) {
+          dbgs() << "\n"
+                    "After filtering out undesirable candidates:\n";
+          print_uses(dbgs());
+        });
+}
+
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+  size_t Power = 1;
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    size_t FSize = I->Formulae.size();
+    if (FSize >= ComplexityLimit) {
+      Power = ComplexityLimit;
+      break;
+    }
+    Power *= FSize;
+    if (Power >= ComplexityLimit)
+      break;
+  }
+  return Power;
+}
+
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+                    "which use a superset of registers used by other "
+                    "formulae.\n");
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      bool Any = false;
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        // Look for a formula with a constant or GV in a register. If the use
+        // also has a formula with that same value in an immediate field,
+        // delete the one that uses a register.
+        for (SmallVectorImpl<const SCEV *>::const_iterator
+             I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+          if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+            Formula NewF = F;
+            NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+            NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                (I - F.BaseRegs.begin()));
+            if (LU.HasFormulaWithSameRegs(NewF)) {
+              DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+              LU.DeleteFormula(F);
+              --i;
+              --e;
+              Any = true;
+              break;
+            }
+          } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+            if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+              if (!F.AM.BaseGV) {
+                Formula NewF = F;
+                NewF.AM.BaseGV = GV;
+                NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                    (I - F.BaseRegs.begin()));
+                if (LU.HasFormulaWithSameRegs(NewF)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LU.DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                  break;
+                }
+              }
+          }
+        }
+      }
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
+                    "separated by a constant offset will use the same "
+                    "registers.\n");
+
+    // This is especially useful for unrolled loops.
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+          if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
+            if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+                                   /*HasBaseReg=*/false,
+                                   LU.Kind, LU.AccessTy)) {
+              DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs());
+                    dbgs() << '\n');
+
+              LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+              // Update the relocs to reference the new use.
+              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+                   E = Fixups.end(); I != E; ++I) {
+                LSRFixup &Fixup = *I;
+                if (Fixup.LUIdx == LUIdx) {
+                  Fixup.LUIdx = LUThatHas - &Uses.front();
+                  Fixup.Offset += F.AM.BaseOffs;
+                  // Add the new offset to LUThatHas' offset list.
+                  if (LUThatHas->Offsets.back() != Fixup.Offset) {
+                    LUThatHas->Offsets.push_back(Fixup.Offset);
+                    if (Fixup.Offset > LUThatHas->MaxOffset)
+                      LUThatHas->MaxOffset = Fixup.Offset;
+                    if (Fixup.Offset < LUThatHas->MinOffset)
+                      LUThatHas->MinOffset = Fixup.Offset;
+                  }
+                  DEBUG(dbgs() << "New fixup has offset "
+                               << Fixup.Offset << '\n');
+                }
+                if (Fixup.LUIdx == NumUses-1)
+                  Fixup.LUIdx = LUIdx;
+              }
+
+              // Delete formulae from the new use which are no longer legal.
+              bool Any = false;
+              for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+                Formula &F = LUThatHas->Formulae[i];
+                if (!isLegalUse(F.AM,
+                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LUThatHas->DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                }
+              }
+              if (Any)
+                LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+              // Delete the old use.
+              DeleteUse(LU, LUIdx);
+              --LUIdx;
+              --NumUses;
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+                    "undesirable dedicated registers.\n");
+
+    FilterOutUndesirableDedicatedRegisters();
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
+  // With all other options exhausted, loop until the system is simple
+  // enough to handle.
+  SmallPtrSet<const SCEV *, 4> Taken;
+  while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    // Ok, we have too many of formulae on our hands to conveniently handle.
+    // Use a rough heuristic to thin out the list.
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    // Pick the register which is used by the most LSRUses, which is likely
+    // to be a good reuse register candidate.
+    const SCEV *Best = 0;
+    unsigned BestNum = 0;
+    for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+         I != E; ++I) {
+      const SCEV *Reg = *I;
+      if (Taken.count(Reg))
+        continue;
+      if (!Best)
+        Best = Reg;
+      else {
+        unsigned Count = RegUses.getUsedByIndices(Reg).count();
+        if (Count > BestNum) {
+          Best = Reg;
+          BestNum = Count;
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+                 << " will yield profitable reuse.\n");
+    Taken.insert(Best);
+
+    // In any use with formulae which references this register, delete formulae
+    // which don't reference it.
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      if (!LU.Regs.count(Best)) continue;
+
+      bool Any = false;
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        if (!F.referencesReg(Best)) {
+          DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+          LU.DeleteFormula(F);
+          --e;
+          --i;
+          Any = true;
+          assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
+          continue;
+        }
+      }
+
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+  NarrowSearchSpaceByDetectingSupersets();
+  NarrowSearchSpaceByCollapsingUnrolledCode();
+  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  NarrowSearchSpaceByPickingWinnerRegs();
+}
+
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                               Cost &SolutionCost,
+                               SmallVectorImpl<const Formula *> &Workspace,
+                               const Cost &CurCost,
+                               const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                               DenseSet<const SCEV *> &VisitedRegs) const {
+  // Some ideas:
+  //  - prune more:
+  //    - use more aggressive filtering
+  //    - sort the formula so that the most profitable solutions are found first
+  //    - sort the uses too
+  //  - search faster:
+  //    - don't compute a cost, and then compare. compare while computing a cost
+  //      and bail early.
+  //    - track register sets with SmallBitVector
+
+  const LSRUse &LU = Uses[Workspace.size()];
+
+  // If this use references any register that's already a part of the
+  // in-progress solution, consider it a requirement that a formula must
+  // reference that register in order to be considered. This prunes out
+  // unprofitable searching.
+  SmallSetVector<const SCEV *, 4> ReqRegs;
+  for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+       E = CurRegs.end(); I != E; ++I)
+    if (LU.Regs.count(*I))
+      ReqRegs.insert(*I);
+
+  bool AnySatisfiedReqRegs = false;
+  SmallPtrSet<const SCEV *, 16> NewRegs;
+  Cost NewCost;
+retry:
+  for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+       E = LU.Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+
+    // Ignore formulae which do not use any of the required registers.
+    for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+         JE = ReqRegs.end(); J != JE; ++J) {
+      const SCEV *Reg = *J;
+      if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+          std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+          F.BaseRegs.end())
+        goto skip;
+    }
+    AnySatisfiedReqRegs = true;
+
+    // Evaluate the cost of the current formula. If it's already worse than
+    // the current best, prune the search at that point.
+    NewCost = CurCost;
+    NewRegs = CurRegs;
+    NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+    if (NewCost < SolutionCost) {
+      Workspace.push_back(&F);
+      if (Workspace.size() != Uses.size()) {
+        SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+                     NewRegs, VisitedRegs);
+        if (F.getNumRegs() == 1 && Workspace.size() == 1)
+          VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+      } else {
+        DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+              dbgs() << ". Regs:";
+              for (SmallPtrSet<const SCEV *, 16>::const_iterator
+                   I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+                dbgs() << ' ' << **I;
+              dbgs() << '\n');
+
+        SolutionCost = NewCost;
+        Solution = Workspace;
+      }
+      Workspace.pop_back();
+    }
+  skip:;
+  }
+
+  // If none of the formulae had all of the required registers, relax the
+  // constraint so that we don't exclude all formulae.
+  if (!AnySatisfiedReqRegs) {
+    assert(!ReqRegs.empty() && "Solver failed even without required registers");
+    ReqRegs.clear();
+    goto retry;
+  }
+}
+
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+  SmallVector<const Formula *, 8> Workspace;
+  Cost SolutionCost;
+  SolutionCost.Loose();
+  Cost CurCost;
+  SmallPtrSet<const SCEV *, 16> CurRegs;
+  DenseSet<const SCEV *> VisitedRegs;
+  Workspace.reserve(Uses.size());
+
+  // SolveRecurse does all the work.
+  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+               CurRegs, VisitedRegs);
+
+  // Ok, we've now made all our decisions.
+  DEBUG(dbgs() << "\n"
+                  "The chosen solution requires "; SolutionCost.print(dbgs());
+        dbgs() << ":\n";
+        for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+          dbgs() << "  ";
+          Uses[i].print(dbgs());
+          dbgs() << "\n"
+                    "    ";
+          Solution[i]->print(dbgs());
+          dbgs() << '\n';
+        });
+
+  assert(Solution.size() == Uses.size() && "Malformed solution!");
+}
+
+/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
+/// the dominator tree far as we can go while still being dominated by the
+/// input positions. This helps canonicalize the insert position, which
+/// encourages sharing.
+BasicBlock::iterator
+LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
+                                 const SmallVectorImpl<Instruction *> &Inputs)
+                                                                         const {
+  for (;;) {
+    const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+    unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+    BasicBlock *IDom;
+    for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+      if (!Rung) return IP;
+      Rung = Rung->getIDom();
+      if (!Rung) return IP;
+      IDom = Rung->getBlock();
+
+      // Don't climb into a loop though.
+      const Loop *IDomLoop = LI.getLoopFor(IDom);
+      unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+      if (IDomDepth <= IPLoopDepth &&
+          (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+        break;
+    }
+
+    bool AllDominate = true;
+    Instruction *BetterPos = 0;
+    Instruction *Tentative = IDom->getTerminator();
+    for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+         E = Inputs.end(); I != E; ++I) {
+      Instruction *Inst = *I;
+      if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+        AllDominate = false;
+        break;
+      }
+      // Attempt to find an insert position in the middle of the block,
+      // instead of at the end, so that it can be used for other expansions.
+      if (IDom == Inst->getParent() &&
+          (!BetterPos || DT.dominates(BetterPos, Inst)))
+        BetterPos = llvm::next(BasicBlock::iterator(Inst));
+    }
+    if (!AllDominate)
+      break;
+    if (BetterPos)
+      IP = BetterPos;
+    else
+      IP = Tentative;
+  }
+
+  return IP;
+}
+
+/// AdjustInsertPositionForExpand - Determine an input position which will be
+/// dominated by the operands and which will dominate the result.
+BasicBlock::iterator
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                           const LSRFixup &LF,
+                                           const LSRUse &LU) const {
+  // Collect some instructions which must be dominated by the
+  // expanding replacement. These must be dominated by any operands that
+  // will be required in the expansion.
+  SmallVector<Instruction *, 4> Inputs;
+  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+    Inputs.push_back(I);
+  if (LU.Kind == LSRUse::ICmpZero)
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+      Inputs.push_back(I);
+  if (LF.PostIncLoops.count(L)) {
+    if (LF.isUseFullyOutsideLoop(L))
+      Inputs.push_back(L->getLoopLatch()->getTerminator());
+    else
+      Inputs.push_back(IVIncInsertPos);
+  }
+  // The expansion must also be dominated by the increment positions of any
+  // loops it for which it is using post-inc mode.
+  for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
+       E = LF.PostIncLoops.end(); I != E; ++I) {
+    const Loop *PIL = *I;
+    if (PIL == L) continue;
+
+    // Be dominated by the loop exit.
+    SmallVector<BasicBlock *, 4> ExitingBlocks;
+    PIL->getExitingBlocks(ExitingBlocks);
+    if (!ExitingBlocks.empty()) {
+      BasicBlock *BB = ExitingBlocks[0];
+      for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
+        BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
+      Inputs.push_back(BB->getTerminator());
+    }
+  }
+
+  // Then, climb up the immediate dominator tree as far as we can go while
+  // still being dominated by the input positions.
+  IP = HoistInsertPosition(IP, Inputs);
+
+  // Don't insert instructions before PHI nodes.
+  while (isa<PHINode>(IP)) ++IP;
+
+  // Ignore debug intrinsics.
+  while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+
+  return IP;
+}
+
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
+Value *LSRInstance::Expand(const LSRFixup &LF,
+                           const Formula &F,
+                           BasicBlock::iterator IP,
+                           SCEVExpander &Rewriter,
+                           SmallVectorImpl<WeakVH> &DeadInsts) const {
+  const LSRUse &LU = Uses[LF.LUIdx];
+
+  // Determine an input position which will be dominated by the operands and
+  // which will dominate the result.
+  IP = AdjustInsertPositionForExpand(IP, LF, LU);
+
+  // Inform the Rewriter if we have a post-increment use, so that it can
+  // perform an advantageous expansion.
+  Rewriter.setPostInc(LF.PostIncLoops);
+
+  // This is the type that the user actually needs.
+  const Type *OpTy = LF.OperandValToReplace->getType();
+  // This will be the type that we'll initially expand to.
+  const Type *Ty = F.getType();
+  if (!Ty)
+    // No type known; just expand directly to the ultimate type.
+    Ty = OpTy;
+  else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+    // Expand directly to the ultimate type if it's the right size.
+    Ty = OpTy;
+  // This is the type to do integer arithmetic in.
+  const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+  // Build up a list of operands to add together to form the full base.
+  SmallVector<const SCEV *, 8> Ops;
+
+  // Expand the BaseRegs portion.
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    Reg = TransformForPostIncUse(Denormalize, Reg,
+                                 LF.UserInst, LF.OperandValToReplace,
+                                 Loops, SE, DT);
+
+    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+  }
+
+  // Flush the operand list to suppress SCEVExpander hoisting.
+  if (!Ops.empty()) {
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+    Ops.clear();
+    Ops.push_back(SE.getUnknown(FullV));
+  }
+
+  // Expand the ScaledReg portion.
+  Value *ICmpScaledV = 0;
+  if (F.AM.Scale != 0) {
+    const SCEV *ScaledS = F.ScaledReg;
+
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+                                     LF.UserInst, LF.OperandValToReplace,
+                                     Loops, SE, DT);
+
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // An interesting way of "folding" with an icmp is to use a negated
+      // scale, which we'll implement by inserting it into the other operand
+      // of the icmp.
+      assert(F.AM.Scale == -1 &&
+             "The only scale supported by ICmpZero uses is -1!");
+      ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+    } else {
+      // Otherwise just expand the scaled register and an explicit scale,
+      // which is expected to be matched as part of the address.
+      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+      ScaledS = SE.getMulExpr(ScaledS,
+                              SE.getConstant(ScaledS->getType(), F.AM.Scale));
+      Ops.push_back(ScaledS);
+
+      // Flush the operand list to suppress SCEVExpander hoisting.
+      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+      Ops.clear();
+      Ops.push_back(SE.getUnknown(FullV));
+    }
+  }
+
+  // Expand the GV portion.
+  if (F.AM.BaseGV) {
+    Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+
+    // Flush the operand list to suppress SCEVExpander hoisting.
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+    Ops.clear();
+    Ops.push_back(SE.getUnknown(FullV));
+  }
+
+  // Expand the immediate portion.
+  int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+  if (Offset != 0) {
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // The other interesting way of "folding" with an ICmpZero is to use a
+      // negated immediate.
+      if (!ICmpScaledV)
+        ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+      else {
+        Ops.push_back(SE.getUnknown(ICmpScaledV));
+        ICmpScaledV = ConstantInt::get(IntTy, Offset);
+      }
+    } else {
+      // Just add the immediate values. These again are expected to be matched
+      // as part of the address.
+      Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+    }
+  }
+
+  // Expand the unfolded offset portion.
+  int64_t UnfoldedOffset = F.UnfoldedOffset;
+  if (UnfoldedOffset != 0) {
+    // Just add the immediate values.
+    Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
+                                                       UnfoldedOffset)));
+  }
+
+  // Emit instructions summing all the operands.
+  const SCEV *FullS = Ops.empty() ?
+                      SE.getConstant(IntTy, 0) :
+                      SE.getAddExpr(Ops);
+  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+  // We're done expanding now, so reset the rewriter.
+  Rewriter.clearPostInc();
+
+  // An ICmpZero Formula represents an ICmp which we're handling as a
+  // comparison against zero. Now that we've expanded an expression for that
+  // form, update the ICmp's other operand.
+  if (LU.Kind == LSRUse::ICmpZero) {
+    ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+    DeadInsts.push_back(CI->getOperand(1));
+    assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+                           "a scale at the same time!");
+    if (F.AM.Scale == -1) {
+      if (ICmpScaledV->getType() != OpTy) {
+        Instruction *Cast =
+          CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+                                                   OpTy, false),
+                           ICmpScaledV, OpTy, "tmp", CI);
+        ICmpScaledV = Cast;
+      }
+      CI->setOperand(1, ICmpScaledV);
+    } else {
+      assert(F.AM.Scale == 0 &&
+             "ICmp does not support folding a global value and "
+             "a scale at the same time!");
+      Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+                                           -(uint64_t)Offset);
+      if (C->getType() != OpTy)
+        C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                          OpTy, false),
+                                  C, OpTy);
+
+      CI->setOperand(1, C);
+    }
+  }
+
+  return FullV;
+}
+
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+                                const LSRFixup &LF,
+                                const Formula &F,
+                                SCEVExpander &Rewriter,
+                                SmallVectorImpl<WeakVH> &DeadInsts,
+                                Pass *P) const {
+  DenseMap<BasicBlock *, Value *> Inserted;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+      BasicBlock *BB = PN->getIncomingBlock(i);
+
+      // If this is a critical edge, split the edge so that we do not insert
+      // the code on all predecessor/successor paths.  We do this unless this
+      // is the canonical backedge for this loop, which complicates post-inc
+      // users.
+      if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+          !isa<IndirectBrInst>(BB->getTerminator())) {
+        Loop *PNLoop = LI.getLoopFor(PN->getParent());
+        if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+          // Split the critical edge.
+          BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+          // If PN is outside of the loop and BB is in the loop, we want to
+          // move the block to be immediately before the PHI block, not
+          // immediately after BB.
+          if (L->contains(BB) && !L->contains(PN))
+            NewBB->moveBefore(PN->getParent());
+
+          // Splitting the edge can reduce the number of PHI entries we have.
+          e = PN->getNumIncomingValues();
+          BB = NewBB;
+          i = PN->getBasicBlockIndex(BB);
+        }
+      }
+
+      std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+        Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+      if (!Pair.second)
+        PN->setIncomingValue(i, Pair.first->second);
+      else {
+        Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+        // If this is reuse-by-noop-cast, insert the noop cast.
+        const Type *OpTy = LF.OperandValToReplace->getType();
+        if (FullV->getType() != OpTy)
+          FullV =
+            CastInst::Create(CastInst::getCastOpcode(FullV, false,
+                                                     OpTy, false),
+                             FullV, LF.OperandValToReplace->getType(),
+                             "tmp", BB->getTerminator());
+
+        PN->setIncomingValue(i, FullV);
+        Pair.first->second = FullV;
+      }
+    }
+}
+
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+                          const Formula &F,
+                          SCEVExpander &Rewriter,
+                          SmallVectorImpl<WeakVH> &DeadInsts,
+                          Pass *P) const {
+  // First, find an insertion point that dominates UserInst. For PHI nodes,
+  // find the nearest block which dominates all the relevant uses.
+  if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+    RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+  } else {
+    Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+
+    // If this is reuse-by-noop-cast, insert the noop cast.
+    const Type *OpTy = LF.OperandValToReplace->getType();
+    if (FullV->getType() != OpTy) {
+      Instruction *Cast =
+        CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+                         FullV, OpTy, "tmp", LF.UserInst);
+      FullV = Cast;
+    }
+
+    // Update the user. ICmpZero is handled specially here (for now) because
+    // Expand may have updated one of the operands of the icmp already, and
+    // its new value may happen to be equal to LF.OperandValToReplace, in
+    // which case doing replaceUsesOfWith leads to replacing both operands
+    // with the same value. TODO: Reorganize this.
+    if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+      LF.UserInst->setOperand(0, FullV);
+    else
+      LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
+  }
+
+  DeadInsts.push_back(LF.OperandValToReplace);
+}
+
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                               Pass *P) {
+  // Keep track of instructions we may have made dead, so that
+  // we can remove them after we are done working.
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  SCEVExpander Rewriter(SE, "lsr");
+  Rewriter.disableCanonicalMode();
+  Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
+
+  // Expand the new value definitions and update the users.
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    const LSRFixup &Fixup = *I;
+
+    Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+
+    Changed = true;
+  }
+
+  // Clean up after ourselves. This must be done before deleting any
+  // instructions.
+  Rewriter.clear();
+
+  Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
+
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+  : IU(P->getAnalysis<IVUsers>()),
+    SE(P->getAnalysis<ScalarEvolution>()),
+    DT(P->getAnalysis<DominatorTree>()),
+    LI(P->getAnalysis<LoopInfo>()),
+    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
+
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm()) return;
+
+  // If there's no interesting work to be done, bail early.
+  if (IU.empty()) return;
+
+  DEBUG(dbgs() << "\nLSR on loop ";
+        WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+        dbgs() << ":\n");
+
+  // First, perform some low-level loop optimizations.
+  OptimizeShadowIV();
+  OptimizeLoopTermCond();
+
+  // Start collecting data and preparing for the solver.
+  CollectInterestingTypesAndFactors();
+  CollectFixupsAndInitialFormulae();
+  CollectLoopInvariantFixupsAndFormulae();
+
+  DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+        print_uses(dbgs()));
+
+  // Now use the reuse data to generate a bunch of interesting ways
+  // to formulate the values needed for the uses.
+  GenerateAllReuseFormulae();
+
+  FilterOutUndesirableDedicatedRegisters();
+  NarrowSearchSpaceUsingHeuristics();
+
+  SmallVector<const Formula *, 8> Solution;
+  Solve(Solution);
+
+  // Release memory that is no longer needed.
+  Factors.clear();
+  Types.clear();
+  RegUses.clear();
+
+#ifndef NDEBUG
+  // Formulae should be legal.
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+     const LSRUse &LU = *I;
+     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+          JE = LU.Formulae.end(); J != JE; ++J)
+        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI) &&
+               "Illegal formula generated!");
+  };
+#endif
+
+  // Now that we've decided what we want, make it so.
+  ImplementSolution(Solution, P);
+}
+
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+  if (Factors.empty() && Types.empty()) return;
+
+  OS << "LSR has identified the following interesting factors and types: ";
+  bool First = true;
+
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '*' << *I;
+  }
+
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '(' << **I << ')';
+  }
+  OS << '\n';
+}
+
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+  OS << "LSR is examining the following fixup sites:\n";
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    dbgs() << "  ";
+    I->print(OS);
+    OS << '\n';
+  }
+}
+
+void LSRInstance::print_uses(raw_ostream &OS) const {
+  OS << "LSR is examining the following uses:\n";
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    const LSRUse &LU = *I;
+    dbgs() << "  ";
+    LU.print(OS);
+    OS << '\n';
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+         JE = LU.Formulae.end(); J != JE; ++J) {
+      OS << "    ";
+      J->print(OS);
+      OS << '\n';
+    }
+  }
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+  print_factors_and_types(OS);
+  print_fixups(OS);
+  print_uses(OS);
+}
+
+void LSRInstance::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+  /// TLI - Keep a pointer of a TargetLowering to consult for determining
+  /// transformation profitability.
+  const TargetLowering *const TLI;
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+
+private:
+  bool runOnLoop(Loop *L, LPPassManager &LPM);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+  return new LoopStrengthReduce(TLI);
+}
+
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+  : LoopPass(ID), TLI(tli) {
+    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+  }
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+  // We split critical edges, so we change the CFG.  However, we do update
+  // many analyses if they are around.
+  AU.addPreservedID(LoopSimplifyID);
+
+  AU.addRequired<LoopInfo>();
+  AU.addPreserved<LoopInfo>();
+  AU.addRequiredID(LoopSimplifyID);
+  AU.addRequired<DominatorTree>();
+  AU.addPreserved<DominatorTree>();
+  AU.addRequired<ScalarEvolution>();
+  AU.addPreserved<ScalarEvolution>();
+  // Requiring LoopSimplify a second time here prevents IVUsers from running
+  // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+  AU.addRequiredID(LoopSimplifyID);
+  AU.addRequired<IVUsers>();
+  AU.addPreserved<IVUsers>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+  bool Changed = false;
+
+  // Run the main LSR transformation.
+  Changed |= LSRInstance(TLI, L, this).getChanged();
+
+  // At this point, it is worth checking to see if any recurrence PHIs are also
+  // dead, so that we can remove them as well.
+  Changed |= DeleteDeadPHIs(L->getHeader());
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
new file mode 100644
index 000000000000..fef6bc31c7b6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -0,0 +1,193 @@
+//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop unroller.  It works best when loops have
+// been canonicalized by the -indvars pass, allowing it to determine the trip
+// counts of loops easily.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <climits>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+  cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+  cl::desc("Use this unroll count for all loops, for testing purposes"));
+
+static cl::opt<bool>
+UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+  cl::desc("Allows loops to be partially unrolled until "
+           "-unroll-threshold loop size is reached."));
+
+namespace {
+  class LoopUnroll : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopUnroll(int T = -1, int C = -1,  int P = -1) : LoopPass(ID) {
+      CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+      CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
+      CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+
+      UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+     
+      initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// A magic value for use with the Threshold parameter to indicate
+    /// that the loop unroll should be performed regardless of how much
+    /// code expansion would result.
+    static const unsigned NoThreshold = UINT_MAX;
+    
+    // Threshold to use when optsize is specified (and there is no
+    // explicit -unroll-threshold).
+    static const unsigned OptSizeUnrollThreshold = 50;
+    
+    unsigned CurrentCount;
+    unsigned CurrentThreshold;
+    bool     CurrentAllowPartial;
+    bool     UserThreshold;        // CurrentThreshold is user-specified.
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG...
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<ScalarEvolution>();
+      // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+      // If loop unroll does not preserve dom info then LCSSA pass on next
+      // loop will receive invalid dom info.
+      // For now, recreate dom info, if loop is unrolled.
+      AU.addPreserved<DominatorTree>();
+    }
+  };
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
+  return new LoopUnroll(Threshold, Count, AllowPartial);
+}
+
+/// ApproximateLoopSize - Approximate the size of the loop.
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
+  CodeMetrics Metrics;
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+  NumCalls = Metrics.NumInlineCandidates;
+  
+  unsigned LoopSize = Metrics.NumInsts;
+  
+  // Don't allow an estimate of size zero.  This would allows unrolling of loops
+  // with huge iteration counts, which is a compile time problem even if it's
+  // not a problem for code quality.
+  if (LoopSize == 0) LoopSize = 1;
+  
+  return LoopSize;
+}
+
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+  LoopInfo *LI = &getAnalysis<LoopInfo>();
+
+  BasicBlock *Header = L->getHeader();
+  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
+        << "] Loop %" << Header->getName() << "\n");
+  (void)Header;
+  
+  // Determine the current unrolling threshold.  While this is normally set
+  // from UnrollThreshold, it is overridden to a smaller value if the current
+  // function is marked as optimize-for-size, and the unroll threshold was
+  // not user specified.
+  unsigned Threshold = CurrentThreshold;
+  if (!UserThreshold && 
+      Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+    Threshold = OptSizeUnrollThreshold;
+
+  // Find trip count
+  unsigned TripCount = L->getSmallConstantTripCount();
+  unsigned Count = CurrentCount;
+
+  // Automatically select an unroll count.
+  if (Count == 0) {
+    // Conservative heuristic: if we know the trip count, see if we can
+    // completely unroll (subject to the threshold, checked below); otherwise
+    // try to find greatest modulo of the trip count which is still under
+    // threshold value.
+    if (TripCount == 0)
+      return false;
+    Count = TripCount;
+  }
+
+  // Enforce the threshold.
+  if (Threshold != NoThreshold) {
+    unsigned NumInlineCandidates;
+    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
+    DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+    if (NumInlineCandidates != 0) {
+      DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
+      return false;
+    }
+    uint64_t Size = (uint64_t)LoopSize*Count;
+    if (TripCount != 1 && Size > Threshold) {
+      DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
+            << " because size: " << Size << ">" << Threshold << "\n");
+      if (!CurrentAllowPartial) {
+        DEBUG(dbgs() << "  will not try to unroll partially because "
+              << "-unroll-allow-partial not given\n");
+        return false;
+      }
+      // Reduce unroll count to be modulo of TripCount for partial unrolling
+      Count = Threshold / LoopSize;
+      while (Count != 0 && TripCount%Count != 0) {
+        Count--;
+      }
+      if (Count < 2) {
+        DEBUG(dbgs() << "  could not unroll partially\n");
+        return false;
+      }
+      DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
+    }
+  }
+
+  // Unroll the loop.
+  Function *F = L->getHeader()->getParent();
+  if (!UnrollLoop(L, Count, LI, &LPM))
+    return false;
+
+  // FIXME: Reconstruct dom info, because it is not preserved properly.
+  if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+    DT->runOnFunction(*F);
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
new file mode 100644
index 000000000000..840c4b69cf06
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -0,0 +1,1063 @@
+//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops that contain branches on loop-invariant conditions
+// to have multiple loops.  For example, it turns the left into the right code:
+//
+//  for (...)                  if (lic)
+//    A                          for (...)
+//    if (lic)                     A; B; C
+//      B                      else
+//    C                          for (...)
+//                                 A; C
+//
+// This can increase the size of the code exponentially (doubling it every time
+// a loop is unswitched) so we only unswitch if the resultant code will be
+// smaller than a threshold.
+//
+// This pass expects LICM to be run before it to hoist invariant conditions out
+// of the loop, to make the unswitching opportunity obvious.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unswitch"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects , "Number of selects unswitched");
+STATISTIC(NumTrivial , "Number of unswitches that are trivial");
+STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+
+// The specific value of 50 here was chosen based only on intuition and a
+// few specific examples.
+static cl::opt<unsigned>
+Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
+          cl::init(50), cl::Hidden);
+  
+namespace {
+  class LoopUnswitch : public LoopPass {
+    LoopInfo *LI;  // Loop information
+    LPPassManager *LPM;
+
+    // LoopProcessWorklist - Used to check if second loop needs processing
+    // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+    std::vector<Loop*> LoopProcessWorklist;
+    SmallPtrSet<Value *,8> UnswitchedVals;
+    
+    bool OptimizeForSize;
+    bool redoLoop;
+
+    Loop *currentLoop;
+    DominatorTree *DT;
+    BasicBlock *loopHeader;
+    BasicBlock *loopPreheader;
+    
+    // LoopBlocks contains all of the basic blocks of the loop, including the
+    // preheader of the loop, the body of the loop, and the exit blocks of the 
+    // loop, in that order.
+    std::vector<BasicBlock*> LoopBlocks;
+    // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
+    std::vector<BasicBlock*> NewBlocks;
+
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    explicit LoopUnswitch(bool Os = false) : 
+      LoopPass(ID), OptimizeForSize(Os), redoLoop(false), 
+      currentLoop(NULL), DT(NULL), loopHeader(NULL),
+      loopPreheader(NULL) {
+        initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool processCurrentLoop();
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<ScalarEvolution>();
+    }
+
+  private:
+
+    virtual void releaseMemory() {
+      UnswitchedVals.clear();
+    }
+
+    /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
+    /// remove it.
+    void RemoveLoopFromWorklist(Loop *L) {
+      std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
+                                                 LoopProcessWorklist.end(), L);
+      if (I != LoopProcessWorklist.end())
+        LoopProcessWorklist.erase(I);
+    }
+
+    void initLoopData() {
+      loopHeader = currentLoop->getHeader();
+      loopPreheader = currentLoop->getLoopPreheader();
+    }
+
+    /// Split all of the edges from inside the loop to their exit blocks.
+    /// Update the appropriate Phi nodes as we do so.
+    void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
+
+    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+    void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+                                  BasicBlock *ExitBlock);
+    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+
+    void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+                                              Constant *Val, bool isEqual);
+
+    void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+                                        BasicBlock *TrueDest, 
+                                        BasicBlock *FalseDest,
+                                        Instruction *InsertPt);
+
+    void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+    void RemoveBlockIfDead(BasicBlock *BB,
+                           std::vector<Instruction*> &Worklist, Loop *l);
+    void RemoveLoopFromHierarchy(Loop *L);
+    bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
+                                    BasicBlock **LoopExit = 0);
+
+  };
+}
+char LoopUnswitch::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
+
+Pass *llvm::createLoopUnswitchPass(bool Os) { 
+  return new LoopUnswitch(Os); 
+}
+
+/// FindLIVLoopCondition - Cond is a condition that occurs in L.  If it is
+/// invariant in the loop, or has an invariant piece, return the invariant.
+/// Otherwise, return null.
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+  // We can never unswitch on vector conditions.
+  if (Cond->getType()->isVectorTy())
+    return 0;
+
+  // Constants should be folded, not unswitched on!
+  if (isa<Constant>(Cond)) return 0;
+
+  // TODO: Handle: br (VARIANT|INVARIANT).
+
+  // Hoist simple values out.
+  if (L->makeLoopInvariant(Cond, Changed))
+    return Cond;
+
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
+    if (BO->getOpcode() == Instruction::And ||
+        BO->getOpcode() == Instruction::Or) {
+      // If either the left or right side is invariant, we can unswitch on this,
+      // which will cause the branch to go away in one loop and the condition to
+      // simplify in the other one.
+      if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
+        return LHS;
+      if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
+        return RHS;
+    }
+  
+  return 0;
+}
+
+bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+  LI = &getAnalysis<LoopInfo>();
+  LPM = &LPM_Ref;
+  DT = getAnalysisIfAvailable<DominatorTree>();
+  currentLoop = L;
+  Function *F = currentLoop->getHeader()->getParent();
+  bool Changed = false;
+  do {
+    assert(currentLoop->isLCSSAForm(*DT));
+    redoLoop = false;
+    Changed |= processCurrentLoop();
+  } while(redoLoop);
+
+  if (Changed) {
+    // FIXME: Reconstruct dom info, because it is not preserved properly.
+    if (DT)
+      DT->runOnFunction(*F);
+  }
+  return Changed;
+}
+
+/// processCurrentLoop - Do actual work and unswitch loop if possible 
+/// and profitable.
+bool LoopUnswitch::processCurrentLoop() {
+  bool Changed = false;
+  LLVMContext &Context = currentLoop->getHeader()->getContext();
+
+  // Loop over all of the basic blocks in the loop.  If we find an interior
+  // block that is branching on a loop-invariant condition, we can unswitch this
+  // loop.
+  for (Loop::block_iterator I = currentLoop->block_begin(), 
+         E = currentLoop->block_end(); I != E; ++I) {
+    TerminatorInst *TI = (*I)->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      // If this isn't branching on an invariant condition, we can't unswitch
+      // it.
+      if (BI->isConditional()) {
+        // See if this, or some part of it, is loop invariant.  If so, we can
+        // unswitch on it if we desire.
+        Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 
+                                               currentLoop, Changed);
+        if (LoopCond && UnswitchIfProfitable(LoopCond, 
+                                             ConstantInt::getTrue(Context))) {
+          ++NumBranches;
+          return true;
+        }
+      }      
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
+                                             currentLoop, Changed);
+      if (LoopCond && SI->getNumCases() > 1) {
+        // Find a value to unswitch on:
+        // FIXME: this should chose the most expensive case!
+        // FIXME: scan for a case with a non-critical edge?
+        Constant *UnswitchVal = SI->getCaseValue(1);
+        // Do not process same value again and again.
+        if (!UnswitchedVals.insert(UnswitchVal))
+          continue;
+
+        if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
+          ++NumSwitches;
+          return true;
+        }
+      }
+    }
+    
+    // Scan the instructions to check for unswitchable values.
+    for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 
+         BBI != E; ++BBI)
+      if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
+        Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
+                                               currentLoop, Changed);
+        if (LoopCond && UnswitchIfProfitable(LoopCond, 
+                                             ConstantInt::getTrue(Context))) {
+          ++NumSelects;
+          return true;
+        }
+      }
+  }
+  return Changed;
+}
+
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
+///
+/// If true, we return true and set ExitBB to the block we
+/// exit through.
+///
+static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
+                                         BasicBlock *&ExitBB,
+                                         std::set<BasicBlock*> &Visited) {
+  if (!Visited.insert(BB).second) {
+    // Already visited. Without more analysis, this could indicate an infinte loop.
+    return false;
+  } else if (!L->contains(BB)) {
+    // Otherwise, this is a loop exit, this is fine so long as this is the
+    // first exit.
+    if (ExitBB != 0) return false;
+    ExitBB = BB;
+    return true;
+  }
+  
+  // Otherwise, this is an unvisited intra-loop node.  Check all successors.
+  for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
+    // Check to see if the successor is a trivial loop exit.
+    if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
+      return false;
+  }
+
+  // Okay, everything after this looks good, check to make sure that this block
+  // doesn't include any side effects.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (I->mayHaveSideEffects())
+      return false;
+  
+  return true;
+}
+
+/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
+/// leads to an exit from the specified loop, and has no side-effects in the 
+/// process.  If so, return the block that is exited to, otherwise return null.
+static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
+  std::set<BasicBlock*> Visited;
+  Visited.insert(L->getHeader());  // Branches to header make infinite loops.
+  BasicBlock *ExitBB = 0;
+  if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
+    return ExitBB;
+  return 0;
+}
+
+/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
+/// trivial: that is, that the condition controls whether or not the loop does
+/// anything at all.  If this is a trivial condition, unswitching produces no
+/// code duplications (equivalently, it produces a simpler loop and a new empty
+/// loop, which gets deleted).
+///
+/// If this is a trivial condition, return true, otherwise return false.  When
+/// returning true, this sets Cond and Val to the condition that controls the
+/// trivial condition: when Cond dynamically equals Val, the loop is known to
+/// exit.  Finally, this sets LoopExit to the BB that the loop exits to when
+/// Cond == Val.
+///
+bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
+                                       BasicBlock **LoopExit) {
+  BasicBlock *Header = currentLoop->getHeader();
+  TerminatorInst *HeaderTerm = Header->getTerminator();
+  LLVMContext &Context = Header->getContext();
+  
+  BasicBlock *LoopExitBB = 0;
+  if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
+    // If the header block doesn't end with a conditional branch on Cond, we
+    // can't handle it.
+    if (!BI->isConditional() || BI->getCondition() != Cond)
+      return false;
+  
+    // Check to see if a successor of the branch is guaranteed to 
+    // exit through a unique exit block without having any 
+    // side-effects.  If so, determine the value of Cond that causes it to do
+    // this.
+    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+                                             BI->getSuccessor(0)))) {
+      if (Val) *Val = ConstantInt::getTrue(Context);
+    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+                                                    BI->getSuccessor(1)))) {
+      if (Val) *Val = ConstantInt::getFalse(Context);
+    }
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
+    // If this isn't a switch on Cond, we can't handle it.
+    if (SI->getCondition() != Cond) return false;
+    
+    // Check to see if a successor of the switch is guaranteed to go to the
+    // latch block or exit through a one exit block without having any 
+    // side-effects.  If so, determine the value of Cond that causes it to do
+    // this.  Note that we can't trivially unswitch on the default case.
+    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+      if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+                                               SI->getSuccessor(i)))) {
+        // Okay, we found a trivial case, remember the value that is trivial.
+        if (Val) *Val = SI->getCaseValue(i);
+        break;
+      }
+  }
+
+  // If we didn't find a single unique LoopExit block, or if the loop exit block
+  // contains phi nodes, this isn't trivial.
+  if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+    return false;   // Can't handle this.
+  
+  if (LoopExit) *LoopExit = LoopExitBB;
+  
+  // We already know that nothing uses any scalar values defined inside of this
+  // loop.  As such, we just have to check to see if this loop will execute any
+  // side-effecting instructions (e.g. stores, calls, volatile loads) in the
+  // part of the loop that the code *would* execute.  We already checked the
+  // tail, check the header now.
+  for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
+    if (I->mayHaveSideEffects())
+      return false;
+  return true;
+}
+
+/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
+/// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
+/// unswitch the loop, reprocess the pieces, then return true.
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+
+  initLoopData();
+
+  // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+  if (!loopPreheader)
+    return false;
+
+  Function *F = loopHeader->getParent();
+
+  Constant *CondVal = 0;
+  BasicBlock *ExitBlock = 0;
+  if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+    // If the condition is trivial, always unswitch. There is no code growth
+    // for this case.
+    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+    return true;
+  }
+
+  // Check to see if it would be profitable to unswitch current loop.
+
+  // Do not do non-trivial unswitch while optimizing for size.
+  if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+    return false;
+
+  // FIXME: This is overly conservative because it does not take into
+  // consideration code simplification opportunities and code that can
+  // be shared by the resultant unswitched loops.
+  CodeMetrics Metrics;
+  for (Loop::block_iterator I = currentLoop->block_begin(), 
+         E = currentLoop->block_end();
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+
+  // Limit the number of instructions to avoid causing significant code
+  // expansion, and the number of basic blocks, to avoid loops with
+  // large numbers of branches which cause loop unswitching to go crazy.
+  // This is a very ad-hoc heuristic.
+  if (Metrics.NumInsts > Threshold ||
+      Metrics.NumBlocks * 5 > Threshold ||
+      Metrics.containsIndirectBr || Metrics.isRecursive) {
+    DEBUG(dbgs() << "NOT unswitching loop %"
+          << currentLoop->getHeader()->getName() << ", cost too high: "
+          << currentLoop->getBlocks().size() << "\n");
+    return false;
+  }
+
+  UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+  return true;
+}
+
+/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+                       LoopInfo *LI, LPPassManager *LPM) {
+  Loop *New = new Loop();
+  LPM->insertLoop(New, PL);
+
+  // Add all of the blocks in L to the new loop.
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    if (LI->getLoopFor(*I) == L)
+      New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+
+  // Add all of the subloops to the new loop.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    CloneLoop(*I, New, VM, LI, LPM);
+
+  return New;
+}
+
+/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
+/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest.  Insert the
+/// code immediately before InsertPt.
+void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+                                                  BasicBlock *TrueDest,
+                                                  BasicBlock *FalseDest,
+                                                  Instruction *InsertPt) {
+  // Insert a conditional branch on LIC to the two preheaders.  The original
+  // code is the true version and the new code is the false version.
+  Value *BranchVal = LIC;
+  if (!isa<ConstantInt>(Val) ||
+      Val->getType() != Type::getInt1Ty(LIC->getContext()))
+    BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+  else if (Val != ConstantInt::getTrue(Val->getContext()))
+    // We want to enter the new loop when the condition is true.
+    std::swap(TrueDest, FalseDest);
+
+  // Insert the new branch.
+  BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+
+  // If either edge is critical, split it. This helps preserve LoopSimplify
+  // form for enclosing loops.
+  SplitCriticalEdge(BI, 0, this);
+  SplitCriticalEdge(BI, 1, this);
+}
+
+/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
+/// condition in it (a cond branch from its header block to its latch block,
+/// where the path through the loop that doesn't execute its body has no 
+/// side-effects), unswitch it.  This doesn't involve any code duplication, just
+/// moving the conditional branch outside of the loop and updating loop info.
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, 
+                                            Constant *Val, 
+                                            BasicBlock *ExitBlock) {
+  DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << L->getHeader()->getParent()->getName()
+        << " on cond: " << *Val << " == " << *Cond << "\n");
+  
+  // First step, split the preheader, so that we know that there is a safe place
+  // to insert the conditional branch.  We will change loopPreheader to have a
+  // conditional branch on Cond.
+  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+
+  // Now that we have a place to insert the conditional branch, create a place
+  // to branch to: this is the exit block out of the loop that we should
+  // short-circuit to.
+  
+  // Split this block now, so that the loop maintains its exit block, and so
+  // that the jump from the preheader can execute the contents of the exit block
+  // without actually branching to it (the exit block should be dominated by the
+  // loop header, not the preheader).
+  assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
+  BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+    
+  // Okay, now we have a position to branch from and a position to branch to, 
+  // insert the new conditional branch.
+  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, 
+                                 loopPreheader->getTerminator());
+  LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
+  loopPreheader->getTerminator()->eraseFromParent();
+
+  // We need to reprocess this loop, it could be unswitched again.
+  redoLoop = true;
+  
+  // Now that we know that the loop is never entered when this condition is a
+  // particular value, rewrite the loop with this info.  We know that this will
+  // at least eliminate the old branch.
+  RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+  ++NumTrivial;
+}
+
+/// SplitExitEdges - Split all of the edges from inside the loop to their exit
+/// blocks.  Update the appropriate Phi nodes as we do so.
+void LoopUnswitch::SplitExitEdges(Loop *L, 
+                                const SmallVector<BasicBlock *, 8> &ExitBlocks){
+
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+                                       pred_end(ExitBlock));
+    // Although SplitBlockPredecessors doesn't preserve loop-simplify in
+    // general, if we call it on all predecessors of all exits then it does.
+    SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+                           ".us-lcssa", this);
+  }
+}
+
+/// UnswitchNontrivialCondition - We determined that the loop is profitable 
+/// to unswitch when LIC equal Val.  Split it into loop versions and test the 
+/// condition outside of either loop.  Return the loops created as Out1/Out2.
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
+                                               Loop *L) {
+  Function *F = loopHeader->getParent();
+  DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << F->getName()
+        << " when '" << *Val << "' == " << *LIC << "\n");
+
+  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoop(L);
+
+  LoopBlocks.clear();
+  NewBlocks.clear();
+
+  // First step, split the preheader and exit blocks, and add these blocks to
+  // the LoopBlocks list.
+  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+  LoopBlocks.push_back(NewPreheader);
+
+  // We want the loop to come after the preheader, but before the exit blocks.
+  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+
+  // Split all of the edges from inside the loop to their exit blocks.  Update
+  // the appropriate Phi nodes as we do so.
+  SplitExitEdges(L, ExitBlocks);
+
+  // The exit blocks may have been changed due to edge splitting, recompute.
+  ExitBlocks.clear();
+  L->getUniqueExitBlocks(ExitBlocks);
+
+  // Add exit blocks to the loop blocks.
+  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+
+  // Next step, clone all of the basic blocks that make up the loop (including
+  // the loop preheader and exit blocks), keeping track of the mapping between
+  // the instructions and blocks.
+  NewBlocks.reserve(LoopBlocks.size());
+  ValueToValueMapTy VMap;
+  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
+    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+    NewBlocks.push_back(NewBB);
+    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
+    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
+  }
+
+  // Splice the newly inserted blocks into the function right before the
+  // original preheader.
+  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
+                                NewBlocks[0], F->end());
+
+  // Now we create the new Loop object for the versioned loop.
+  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
+  Loop *ParentLoop = L->getParentLoop();
+  if (ParentLoop) {
+    // Make sure to add the cloned preheader and exit blocks to the parent loop
+    // as well.
+    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+  }
+  
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
+    // The new exit block should be in the same loop as the old one.
+    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
+      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+    
+    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
+           "Exit block should have been split to have one successor!");
+    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+
+    // If the successor of the exit block had PHI nodes, add an entry for
+    // NewExit.
+    PHINode *PN;
+    for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
+      PN = cast<PHINode>(I);
+      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+      ValueToValueMapTy::iterator It = VMap.find(V);
+      if (It != VMap.end()) V = It->second;
+      PN->addIncoming(V, NewExit);
+    }
+  }
+
+  // Rewrite the code to refer to itself.
+  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+           E = NewBlocks[i]->end(); I != E; ++I)
+      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+  
+  // Rewrite the original preheader to select between versions of the loop.
+  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
+  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
+         "Preheader splitting did not work correctly!");
+
+  // Emit the new branch that selects between the two versions of this loop.
+  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+  LPM->deleteSimpleAnalysisValue(OldBR, L);
+  OldBR->eraseFromParent();
+
+  LoopProcessWorklist.push_back(NewLoop);
+  redoLoop = true;
+
+  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
+  // deletes the instruction (for example by simplifying a PHI that feeds into
+  // the condition that we're unswitching on), we don't rewrite the second
+  // iteration.
+  WeakVH LICHandle(LIC);
+  
+  // Now we rewrite the original code to know that the condition is true and the
+  // new code to know that the condition is false.
+  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
+
+  // It's possible that simplifying one loop could cause the other to be
+  // changed to another value or a constant.  If its a constant, don't simplify
+  // it.
+  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+      LICHandle && !isa<Constant>(LICHandle))
+    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
+}
+
+/// RemoveFromWorklist - Remove all instances of I from the worklist vector
+/// specified.
+static void RemoveFromWorklist(Instruction *I, 
+                               std::vector<Instruction*> &Worklist) {
+  std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
+                                                     Worklist.end(), I);
+  while (WI != Worklist.end()) {
+    unsigned Offset = WI-Worklist.begin();
+    Worklist.erase(WI);
+    WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
+  }
+}
+
+/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// program, replacing all uses with V and update the worklist.
+static void ReplaceUsesOfWith(Instruction *I, Value *V, 
+                              std::vector<Instruction*> &Worklist,
+                              Loop *L, LPPassManager *LPM) {
+  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+
+  // Add uses to the worklist, which may be dead now.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+      Worklist.push_back(Use);
+
+  // Add users to the worklist which may be simplified now.
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI)
+    Worklist.push_back(cast<Instruction>(*UI));
+  LPM->deleteSimpleAnalysisValue(I, L);
+  RemoveFromWorklist(I, Worklist);
+  I->replaceAllUsesWith(V);
+  I->eraseFromParent();
+  ++NumSimplify;
+}
+
+/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
+/// information, and remove any dead successors it has.
+///
+void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
+                                     std::vector<Instruction*> &Worklist,
+                                     Loop *L) {
+  if (pred_begin(BB) != pred_end(BB)) {
+    // This block isn't dead, since an edge to BB was just removed, see if there
+    // are any easy simplifications we can do now.
+    if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+      // If it has one pred, fold phi nodes in BB.
+      while (isa<PHINode>(BB->begin()))
+        ReplaceUsesOfWith(BB->begin(), 
+                          cast<PHINode>(BB->begin())->getIncomingValue(0), 
+                          Worklist, L, LPM);
+      
+      // If this is the header of a loop and the only pred is the latch, we now
+      // have an unreachable loop.
+      if (Loop *L = LI->getLoopFor(BB))
+        if (loopHeader == BB && L->contains(Pred)) {
+          // Remove the branch from the latch to the header block, this makes
+          // the header dead, which will make the latch dead (because the header
+          // dominates the latch).
+          LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
+          Pred->getTerminator()->eraseFromParent();
+          new UnreachableInst(BB->getContext(), Pred);
+          
+          // The loop is now broken, remove it from LI.
+          RemoveLoopFromHierarchy(L);
+          
+          // Reprocess the header, which now IS dead.
+          RemoveBlockIfDead(BB, Worklist, L);
+          return;
+        }
+      
+      // If pred ends in a uncond branch, add uncond branch to worklist so that
+      // the two blocks will get merged.
+      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+        if (BI->isUnconditional())
+          Worklist.push_back(BI);
+    }
+    return;
+  }
+
+  DEBUG(dbgs() << "Nuking dead block: " << *BB);
+  
+  // Remove the instructions in the basic block from the worklist.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    RemoveFromWorklist(I, Worklist);
+    
+    // Anything that uses the instructions in this basic block should have their
+    // uses replaced with undefs.
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I->getType()->isVoidTy())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+  }
+  
+  // If this is the edge to the header block for a loop, remove the loop and
+  // promote all subloops.
+  if (Loop *BBLoop = LI->getLoopFor(BB)) {
+    if (BBLoop->getLoopLatch() == BB) {
+      RemoveLoopFromHierarchy(BBLoop);
+      if (currentLoop == BBLoop) {
+        currentLoop = 0;
+        redoLoop = false;
+      }
+    }
+  }
+
+  // Remove the block from the loop info, which removes it from any loops it
+  // was in.
+  LI->removeBlock(BB);
+  
+  
+  // Remove phi node entries in successors for this block.
+  TerminatorInst *TI = BB->getTerminator();
+  SmallVector<BasicBlock*, 4> Succs;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    Succs.push_back(TI->getSuccessor(i));
+    TI->getSuccessor(i)->removePredecessor(BB);
+  }
+  
+  // Unique the successors, remove anything with multiple uses.
+  array_pod_sort(Succs.begin(), Succs.end());
+  Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
+  
+  // Remove the basic block, including all of the instructions contained in it.
+  LPM->deleteSimpleAnalysisValue(BB, L);  
+  BB->eraseFromParent();
+  // Remove successor blocks here that are not dead, so that we know we only
+  // have dead blocks in this list.  Nondead blocks have a way of becoming dead,
+  // then getting removed before we revisit them, which is badness.
+  //
+  for (unsigned i = 0; i != Succs.size(); ++i)
+    if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
+      // One exception is loop headers.  If this block was the preheader for a
+      // loop, then we DO want to visit the loop so the loop gets deleted.
+      // We know that if the successor is a loop header, that this loop had to
+      // be the preheader: the case where this was the latch block was handled
+      // above and headers can only have two predecessors.
+      if (!LI->isLoopHeader(Succs[i])) {
+        Succs.erase(Succs.begin()+i);
+        --i;
+      }
+    }
+  
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    RemoveBlockIfDead(Succs[i], Worklist, L);
+}
+
+/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
+/// become unwrapped, either because the backedge was deleted, or because the
+/// edge into the header was removed.  If the edge into the header from the
+/// latch block was removed, the loop is unwrapped but subloops are still alive,
+/// so they just reparent loops.  If the loops are actually dead, they will be
+/// removed later.
+void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
+  LPM->deleteLoopFromQueue(L);
+  RemoveLoopFromWorklist(L);
+}
+
+// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
+// the value specified by Val in the specified loop, or we know it does NOT have
+// that value.  Rewrite any uses of LIC or of properties correlated to it.
+void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+                                                        Constant *Val,
+                                                        bool IsEqual) {
+  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+  
+  // FIXME: Support correlated properties, like:
+  //  for (...)
+  //    if (li1 < li2)
+  //      ...
+  //    if (li1 > li2)
+  //      ...
+  
+  // FOLD boolean conditions (X|LIC), (X&LIC).  Fold conditional branches,
+  // selects, switches.
+  std::vector<Instruction*> Worklist;
+  LLVMContext &Context = Val->getContext();
+
+
+  // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
+  // in the loop with the appropriate one directly.
+  if (IsEqual || (isa<ConstantInt>(Val) &&
+      Val->getType()->isIntegerTy(1))) {
+    Value *Replacement;
+    if (IsEqual)
+      Replacement = Val;
+    else
+      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 
+                                     !cast<ConstantInt>(Val)->getZExtValue());
+    
+    for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+         UI != E; ++UI) {
+      Instruction *U = dyn_cast<Instruction>(*UI);
+      if (!U || !L->contains(U))
+        continue;
+      U->replaceUsesOfWith(LIC, Replacement);
+      Worklist.push_back(U);
+    }
+    SimplifyCode(Worklist, L);
+    return;
+  }
+  
+  // Otherwise, we don't know the precise value of LIC, but we do know that it
+  // is certainly NOT "Val".  As such, simplify any uses in the loop that we
+  // can.  This case occurs when we unswitch switch statements.
+  for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+       UI != E; ++UI) {
+    Instruction *U = dyn_cast<Instruction>(*UI);
+    if (!U || !L->contains(U))
+      continue;
+
+    Worklist.push_back(U);
+
+    // TODO: We could do other simplifications, for example, turning 
+    // 'icmp eq LIC, Val' -> false.
+
+    // If we know that LIC is not Val, use this info to simplify code.
+    SwitchInst *SI = dyn_cast<SwitchInst>(U);
+    if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+    
+    unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+    if (DeadCase == 0) continue;  // Default case is live for multiple values.
+    
+    // Found a dead case value.  Don't remove PHI nodes in the 
+    // successor if they become single-entry, those PHI nodes may
+    // be in the Users list.
+
+    BasicBlock *Switch = SI->getParent();
+    BasicBlock *SISucc = SI->getSuccessor(DeadCase);
+    BasicBlock *Latch = L->getLoopLatch();
+    if (!SI->findCaseDest(SISucc)) continue;  // Edge is critical.
+    // If the DeadCase successor dominates the loop latch, then the
+    // transformation isn't safe since it will delete the sole predecessor edge
+    // to the latch.
+    if (Latch && DT->dominates(SISucc, Latch))
+      continue;
+
+    // FIXME: This is a hack.  We need to keep the successor around
+    // and hooked up so as to preserve the loop structure, because
+    // trying to update it is complicated.  So instead we preserve the
+    // loop structure and put the block on a dead code path.
+    SplitEdge(Switch, SISucc, this);
+    // Compute the successors instead of relying on the return value
+    // of SplitEdge, since it may have split the switch successor
+    // after PHI nodes.
+    BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+    BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+    // Create an "unreachable" destination.
+    BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+                                           Switch->getParent(),
+                                           OldSISucc);
+    new UnreachableInst(Context, Abort);
+    // Force the new case destination to branch to the "unreachable"
+    // block while maintaining a (dead) CFG edge to the old block.
+    NewSISucc->getTerminator()->eraseFromParent();
+    BranchInst::Create(Abort, OldSISucc,
+                       ConstantInt::getTrue(Context), NewSISucc);
+    // Release the PHI operands for this edge.
+    for (BasicBlock::iterator II = NewSISucc->begin();
+         PHINode *PN = dyn_cast<PHINode>(II); ++II)
+      PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+                           UndefValue::get(PN->getType()));
+    // Tell the domtree about the new block. We don't fully update the
+    // domtree here -- instead we force it to do a full recomputation
+    // after the pass is complete -- but we do need to inform it of
+    // new blocks.
+    if (DT)
+      DT->addNewBlock(Abort, NewSISucc);
+  }
+  
+  SimplifyCode(Worklist, L);
+}
+
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
+/// loop, walk over it and constant prop, dce, and fold control flow where
+/// possible.  Note that this is effectively a very simple loop-structure-aware
+/// optimizer.  During processing of this loop, L could very well be deleted, so
+/// it must not be used.
+///
+/// FIXME: When the loop optimizer is more mature, separate this out to a new
+/// pass.
+///
+void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+
+    // Simple DCE.
+    if (isInstructionTriviallyDead(I)) {
+      DEBUG(dbgs() << "Remove dead instruction '" << *I);
+      
+      // Add uses to the worklist, which may be dead now.
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+        if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+          Worklist.push_back(Use);
+      LPM->deleteSimpleAnalysisValue(I, L);
+      RemoveFromWorklist(I, Worklist);
+      I->eraseFromParent();
+      ++NumSimplify;
+      continue;
+    }
+
+    // See if instruction simplification can hack this up.  This is common for
+    // things like "select false, X, Y" after unswitching made the condition be
+    // 'false'.
+    if (Value *V = SimplifyInstruction(I, 0, DT))
+      if (LI->replacementPreservesLCSSAForm(I, V)) {
+        ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+        continue;
+      }
+
+    // Special case hacks that appear commonly in unswitched code.
+    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+      if (BI->isUnconditional()) {
+        // If BI's parent is the only pred of the successor, fold the two blocks
+        // together.
+        BasicBlock *Pred = BI->getParent();
+        BasicBlock *Succ = BI->getSuccessor(0);
+        BasicBlock *SinglePred = Succ->getSinglePredecessor();
+        if (!SinglePred) continue;  // Nothing to do.
+        assert(SinglePred == Pred && "CFG broken");
+
+        DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " 
+              << Succ->getName() << "\n");
+        
+        // Resolve any single entry PHI nodes in Succ.
+        while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
+          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+        
+        // If Succ has any successors with PHI nodes, update them to have
+        // entries coming from Pred instead of Succ.
+        Succ->replaceAllUsesWith(Pred);
+        
+        // Move all of the successor contents from Succ to Pred.
+        Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
+                                   Succ->end());
+        LPM->deleteSimpleAnalysisValue(BI, L);
+        BI->eraseFromParent();
+        RemoveFromWorklist(BI, Worklist);
+        
+        // Remove Succ from the loop tree.
+        LI->removeBlock(Succ);
+        LPM->deleteSimpleAnalysisValue(Succ, L);
+        Succ->eraseFromParent();
+        ++NumSimplify;
+        continue;
+      }
+      
+      if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+        // Conditional branch.  Turn it into an unconditional branch, then
+        // remove dead blocks.
+        continue;  // FIXME: Enable.
+
+        DEBUG(dbgs() << "Folded branch: " << *BI);
+        BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
+        BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
+        DeadSucc->removePredecessor(BI->getParent(), true);
+        Worklist.push_back(BranchInst::Create(LiveSucc, BI));
+        LPM->deleteSimpleAnalysisValue(BI, L);
+        BI->eraseFromParent();
+        RemoveFromWorklist(BI, Worklist);
+        ++NumSimplify;
+
+        RemoveBlockIfDead(DeadSucc, Worklist, L);
+      }
+      continue;
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 000000000000..9087b46c138b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,139 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
+  IRBuilder<> Builder(II->getParent(), II);
+  unsigned IID = II->getIntrinsicID();
+  switch (IID) {
+  case Intrinsic::memory_barrier:
+    break;
+
+  case Intrinsic::atomic_load_add:
+  case Intrinsic::atomic_load_sub:
+  case Intrinsic::atomic_load_and:
+  case Intrinsic::atomic_load_nand:
+  case Intrinsic::atomic_load_or:
+  case Intrinsic::atomic_load_xor:
+  case Intrinsic::atomic_load_max:
+  case Intrinsic::atomic_load_min:
+  case Intrinsic::atomic_load_umax:
+  case Intrinsic::atomic_load_umin: {
+    Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
+
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Value *Res = NULL;
+    switch (IID) {
+    default: assert(0 && "Unrecognized atomic modify operation");
+    case Intrinsic::atomic_load_add:
+      Res = Builder.CreateAdd(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_sub:
+      Res = Builder.CreateSub(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_and:
+      Res = Builder.CreateAnd(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_nand:
+      Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+      break;
+    case Intrinsic::atomic_load_or:
+      Res = Builder.CreateOr(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_xor:
+      Res = Builder.CreateXor(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_max:
+      Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                 Delta, Orig);
+      break;
+    case Intrinsic::atomic_load_min:
+      Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                 Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_umax:
+      Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                 Delta, Orig);
+      break;
+    case Intrinsic::atomic_load_umin:
+      Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                 Orig, Delta);
+      break;
+    }
+    Builder.CreateStore(Res, Ptr);
+
+    II->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  case Intrinsic::atomic_swap: {
+    Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Builder.CreateStore(Val, Ptr);
+    II->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  case Intrinsic::atomic_cmp_swap: {
+    Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
+    Value *Val = II->getArgOperand(2);
+
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+    Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+    Builder.CreateStore(Res, Ptr);
+    II->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  default:
+    return false;
+  }
+
+  assert(II->use_empty() &&
+         "Lowering should have eliminated any uses of the intrinsic call!");
+  II->eraseFromParent();
+
+  return true;
+}
+
+namespace {
+  struct LowerAtomic : public BasicBlockPass {
+    static char ID;
+    LowerAtomic() : BasicBlockPass(ID) {
+      initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnBasicBlock(BasicBlock &BB) {
+      bool Changed = false;
+      for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
+          Changed |= LowerAtomicIntrinsic(II);
+      return Changed;
+    }
+  };
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+                "Lower atomic intrinsics to non-atomic form",
+                false, false)
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
new file mode 100644
index 000000000000..7ed3db6cc1db
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -0,0 +1,986 @@
+//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs various transformations related to eliminating memcpy
+// calls, or transforming sets of stores into memset's.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memcpyopt"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
+STATISTIC(NumCpyToSet,    "Number of memcpys converted to memset");
+
+static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
+                                  bool &VariableIdxFound, const TargetData &TD){
+  // Skip over the first indices.
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (unsigned i = 1; i != Idx; ++i, ++GTI)
+    /*skip along*/;
+  
+  // Compute the offset implied by the rest of the indices.
+  int64_t Offset = 0;
+  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (OpC == 0)
+      return VariableIdxFound = true;
+    if (OpC->isZero()) continue;  // No offset.
+
+    // Handle struct indices, which add their field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+      continue;
+    }
+    
+    // Otherwise, we have a sequential type like an array or vector.  Multiply
+    // the index by the ElementSize.
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+    Offset += Size*OpC->getSExtValue();
+  }
+
+  return Offset;
+}
+
+/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
+/// constant offset, and return that constant offset.  For example, Ptr1 might
+/// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
+static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
+                            const TargetData &TD) {
+  Ptr1 = Ptr1->stripPointerCasts();
+  Ptr2 = Ptr2->stripPointerCasts();
+  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+  
+  bool VariableIdxFound = false;
+
+  // If one pointer is a GEP and the other isn't, then see if the GEP is a
+  // constant offset from the base, as in "P" and "gep P, 1".
+  if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+    Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+    return !VariableIdxFound;
+  }
+
+  if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+    Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+    return !VariableIdxFound;
+  }
+  
+  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+  // base.  After that base, they may have some number of common (and
+  // potentially variable) indices.  After that they handle some constant
+  // offset, which determines their offset from each other.  At this point, we
+  // handle no other case.
+  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+    return false;
+  
+  // Skip any common indices and track the GEP types.
+  unsigned Idx = 1;
+  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+      break;
+
+  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
+  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+  if (VariableIdxFound) return false;
+  
+  Offset = Offset2-Offset1;
+  return true;
+}
+
+
+/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// This allows us to analyze stores like:
+///   store 0 -> P+1
+///   store 0 -> P+0
+///   store 0 -> P+3
+///   store 0 -> P+2
+/// which sometimes happens with stores to arrays of structs etc.  When we see
+/// the first store, we make a range [1, 2).  The second store extends the range
+/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
+/// two ranges into [0, 3) which is memset'able.
+namespace {
+struct MemsetRange {
+  // Start/End - A semi range that describes the span that this range covers.
+  // The range is closed at the start and open at the end: [Start, End).  
+  int64_t Start, End;
+
+  /// StartPtr - The getelementptr instruction that points to the start of the
+  /// range.
+  Value *StartPtr;
+  
+  /// Alignment - The known alignment of the first store.
+  unsigned Alignment;
+  
+  /// TheStores - The actual stores that make up this range.
+  SmallVector<Instruction*, 16> TheStores;
+  
+  bool isProfitableToUseMemset(const TargetData &TD) const;
+
+};
+} // end anon namespace
+
+bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
+  // If we found more than 8 stores to merge or 64 bytes, use memset.
+  if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+  // If there is nothing to merge, don't do anything.
+  if (TheStores.size() < 2) return false;
+  
+  // If any of the stores are a memset, then it is always good to extend the
+  // memset.
+  for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+    if (!isa<StoreInst>(TheStores[i]))
+      return true;
+  
+  // Assume that the code generator is capable of merging pairs of stores
+  // together if it wants to.
+  if (TheStores.size() == 2) return false;
+  
+  // If we have fewer than 8 stores, it can still be worthwhile to do this.
+  // For example, merging 4 i8 stores into an i32 store is useful almost always.
+  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
+  // memset will be split into 2 32-bit stores anyway) and doing so can
+  // pessimize the llvm optimizer.
+  //
+  // Since we don't have perfect knowledge here, make some assumptions: assume
+  // the maximum GPR width is the same size as the pointer size and assume that
+  // this width can be stored.  If so, check to see whether we will end up
+  // actually reducing the number of stores used.
+  unsigned Bytes = unsigned(End-Start);
+  unsigned NumPointerStores = Bytes/TD.getPointerSize();
+  
+  // Assume the remaining bytes if any are done a byte at a time.
+  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+  
+  // If we will reduce the # stores (according to this heuristic), do the
+  // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
+  // etc.
+  return TheStores.size() > NumPointerStores+NumByteStores;
+}    
+
+
+namespace {
+class MemsetRanges {
+  /// Ranges - A sorted list of the memset ranges.  We use std::list here
+  /// because each element is relatively large and expensive to copy.
+  std::list<MemsetRange> Ranges;
+  typedef std::list<MemsetRange>::iterator range_iterator;
+  const TargetData &TD;
+public:
+  MemsetRanges(const TargetData &td) : TD(td) {}
+  
+  typedef std::list<MemsetRange>::const_iterator const_iterator;
+  const_iterator begin() const { return Ranges.begin(); }
+  const_iterator end() const { return Ranges.end(); }
+  bool empty() const { return Ranges.empty(); }
+  
+  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+      addStore(OffsetFromFirst, SI);
+    else
+      addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+  }
+
+  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+    int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+    
+    addRange(OffsetFromFirst, StoreSize,
+             SI->getPointerOperand(), SI->getAlignment(), SI);
+  }
+  
+  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+    int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+  }
+  
+  void addRange(int64_t Start, int64_t Size, Value *Ptr,
+                unsigned Alignment, Instruction *Inst);
+
+};
+  
+} // end anon namespace
+
+
+/// addRange - Add a new store to the MemsetRanges data structure.  This adds a
+/// new range for the specified store at the specified offset, merging into
+/// existing ranges as appropriate.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list.  We keep the ranges sorted for
+/// simplicity here.  This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+                            unsigned Alignment, Instruction *Inst) {
+  int64_t End = Start+Size;
+  range_iterator I = Ranges.begin(), E = Ranges.end();
+  
+  while (I != E && Start > I->End)
+    ++I;
+  
+  // We now know that I == E, in which case we didn't find anything to merge
+  // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
+  // to insert a new range.  Handle this now.
+  if (I == E || End < I->Start) {
+    MemsetRange &R = *Ranges.insert(I, MemsetRange());
+    R.Start        = Start;
+    R.End          = End;
+    R.StartPtr     = Ptr;
+    R.Alignment    = Alignment;
+    R.TheStores.push_back(Inst);
+    return;
+  }
+  
+  // This store overlaps with I, add it.
+  I->TheStores.push_back(Inst);
+  
+  // At this point, we may have an interval that completely contains our store.
+  // If so, just add it to the interval and return.
+  if (I->Start <= Start && I->End >= End)
+    return;
+  
+  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
+  // but is not entirely contained within the range.
+  
+  // See if the range extends the start of the range.  In this case, it couldn't
+  // possibly cause it to join the prior range, because otherwise we would have
+  // stopped on *it*.
+  if (Start < I->Start) {
+    I->Start = Start;
+    I->StartPtr = Ptr;
+    I->Alignment = Alignment;
+  }
+    
+  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
+  // is in or right at the end of I), and that End >= I->Start.  Extend I out to
+  // End.
+  if (End > I->End) {
+    I->End = End;
+    range_iterator NextI = I;
+    while (++NextI != E && End >= NextI->Start) {
+      // Merge the range in.
+      I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
+      if (NextI->End > I->End)
+        I->End = NextI->End;
+      Ranges.erase(NextI);
+      NextI = I;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         MemCpyOpt Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+  class MemCpyOpt : public FunctionPass {
+    MemoryDependenceAnalysis *MD;
+    TargetLibraryInfo *TLI;
+    const TargetData *TD;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    MemCpyOpt() : FunctionPass(ID) {
+      initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+      MD = 0;
+      TLI = 0;
+      TD = 0;
+    }
+
+    bool runOnFunction(Function &F);
+
+  private:
+    // This transformation requires dominator postdominator info
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<MemoryDependenceAnalysis>();
+    }
+  
+    // Helper fuctions
+    bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+    bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
+    bool processMemCpy(MemCpyInst *M);
+    bool processMemMove(MemMoveInst *M);
+    bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+                              uint64_t cpyLen, CallInst *C);
+    bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+                                       uint64_t MSize);
+    bool processByValArgument(CallSite CS, unsigned ArgNo);
+    Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+                                      Value *ByteVal);
+
+    bool iterateOnFunction(Function &F);
+  };
+  
+  char MemCpyOpt::ID = 0;
+}
+
+// createMemCpyOptPass - The public interface to this file...
+FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
+
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                    false, false)
+
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
+/// some other patterns to fold away.  In particular, this looks for stores to
+/// neighboring locations of memory.  If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, 
+                                             Value *StartPtr, Value *ByteVal) {
+  if (TD == 0) return 0;
+  
+  // Okay, so we now have a single store that can be splatable.  Scan to find
+  // all subsequent stores of the same value to offset from the same pointer.
+  // Join these together into ranges, so we can decide whether contiguous blocks
+  // are stored.
+  MemsetRanges Ranges(*TD);
+  
+  BasicBlock::iterator BI = StartInst;
+  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+    if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+      // If the instruction is readnone, ignore it, otherwise bail out.  We
+      // don't even allow readonly here because we don't want something like:
+      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+      if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+        break;
+      continue;
+    }
+    
+    if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+      // If this is a store, see if we can merge it in.
+      if (NextStore->isVolatile()) break;
+    
+      // Check to see if this stored value is of the same byte-splattable value.
+      if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+        break;
+      
+      // Check to see if this store is to a constant offset from the start ptr.
+      int64_t Offset;
+      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+                           Offset, *TD))
+        break;
+      
+      Ranges.addStore(Offset, NextStore);
+    } else {
+      MemSetInst *MSI = cast<MemSetInst>(BI);
+      
+      if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+          !isa<ConstantInt>(MSI->getLength()))
+        break;
+      
+      // Check to see if this store is to a constant offset from the start ptr.
+      int64_t Offset;
+      if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+        break;
+      
+      Ranges.addMemSet(Offset, MSI);
+    }
+  }
+  
+  // If we have no ranges, then we just had a single store with nothing that
+  // could be merged in.  This is a very common case of course.
+  if (Ranges.empty())
+    return 0;
+  
+  // If we had at least one store that could be merged in, add the starting
+  // store as well.  We try to avoid this unless there is at least something
+  // interesting as a small compile-time optimization.
+  Ranges.addInst(0, StartInst);
+
+  // If we create any memsets, we put it right before the first instruction that
+  // isn't part of the memset block.  This ensure that the memset is dominated
+  // by any addressing instruction needed by the start of the block.
+  IRBuilder<> Builder(BI);
+
+  // Now that we have full information about ranges, loop over the ranges and
+  // emit memset's for anything big enough to be worthwhile.
+  Instruction *AMemSet = 0;
+  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
+       I != E; ++I) {
+    const MemsetRange &Range = *I;
+    
+    if (Range.TheStores.size() == 1) continue;
+    
+    // If it is profitable to lower this range to memset, do so now.
+    if (!Range.isProfitableToUseMemset(*TD))
+      continue;
+    
+    // Otherwise, we do want to transform this!  Create a new memset.
+    // Get the starting pointer of the block.
+    StartPtr = Range.StartPtr;
+    
+    // Determine alignment
+    unsigned Alignment = Range.Alignment;
+    if (Alignment == 0) {
+      const Type *EltType = 
+        cast<PointerType>(StartPtr->getType())->getElementType();
+      Alignment = TD->getABITypeAlignment(EltType);
+    }
+    
+    AMemSet = 
+      Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+    
+    DEBUG(dbgs() << "Replace stores:\n";
+          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
+            dbgs() << *Range.TheStores[i] << '\n';
+          dbgs() << "With: " << *AMemSet << '\n');
+
+    if (!Range.TheStores.empty())
+      AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
+
+    // Zap all the stores.
+    for (SmallVector<Instruction*, 16>::const_iterator
+         SI = Range.TheStores.begin(),
+         SE = Range.TheStores.end(); SI != SE; ++SI) {
+      MD->removeInstruction(*SI);
+      (*SI)->eraseFromParent();
+    }
+    ++NumMemSetInfer;
+  }
+  
+  return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+  if (SI->isVolatile()) return false;
+  
+  if (TD == 0) return false;
+
+  // Detect cases where we're performing call slot forwarding, but
+  // happen to be using a load-store pair to implement it, rather than
+  // a memcpy.
+  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+    if (!LI->isVolatile() && LI->hasOneUse() &&
+        LI->getParent() == SI->getParent()) {
+      MemDepResult ldep = MD->getDependency(LI);
+      CallInst *C = 0;
+      if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+        C = dyn_cast<CallInst>(ldep.getInst());
+
+      if (C) {
+        // Check that nothing touches the dest of the "copy" between
+        // the call and the store.
+        AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+        AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+        for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+                                  E = C; I != E; --I) {
+          if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+            C = 0;
+            break;
+          }
+        }
+      }
+
+      if (C) {
+        bool changed = performCallSlotOptzn(LI,
+                        SI->getPointerOperand()->stripPointerCasts(), 
+                        LI->getPointerOperand()->stripPointerCasts(),
+                        TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+        if (changed) {
+          MD->removeInstruction(SI);
+          SI->eraseFromParent();
+          MD->removeInstruction(LI);
+          LI->eraseFromParent();
+          ++NumMemCpyInstr;
+          return true;
+        }
+      }
+    }
+  }
+  
+  // There are two cases that are interesting for this code to handle: memcpy
+  // and memset.  Right now we only handle memset.
+  
+  // Ensure that the value being stored is something that can be memset'able a
+  // byte at a time like "0" or "-1" or any width, as well as things like
+  // 0xA0A0A0A0 and 0.0.
+  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+    if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+                                              ByteVal)) {
+      BBI = I;  // Don't invalidate iterator.
+      return true;
+    }
+  
+  return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+  // See if there is another memset or store neighboring this memset which
+  // allows us to widen out the memset to do a single larger store.
+  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+    if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+                                              MSI->getValue())) {
+      BBI = I;  // Don't invalidate iterator.
+      return true;
+    }
+  return false;
+}
+
+
+/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a call slot optimization by having
+/// the call write its result directly into the destination of the memcpy.
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+                                     Value *cpyDest, Value *cpySrc,
+                                     uint64_t cpyLen, CallInst *C) {
+  // The general transformation to keep in mind is
+  //
+  //   call @func(..., src, ...)
+  //   memcpy(dest, src, ...)
+  //
+  // ->
+  //
+  //   memcpy(dest, src, ...)
+  //   call @func(..., dest, ...)
+  //
+  // Since moving the memcpy is technically awkward, we additionally check that
+  // src only holds uninitialized values at the moment of the call, meaning that
+  // the memcpy can be discarded rather than moved.
+
+  // Deliberately get the source and destination with bitcasts stripped away,
+  // because we'll need to do type comparisons based on the underlying type.
+  CallSite CS(C);
+
+  // Require that src be an alloca.  This simplifies the reasoning considerably.
+  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+  if (!srcAlloca)
+    return false;
+
+  // Check that all of src is copied to dest.
+  if (TD == 0) return false;
+
+  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+  if (!srcArraySize)
+    return false;
+
+  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
+    srcArraySize->getZExtValue();
+
+  if (cpyLen < srcSize)
+    return false;
+
+  // Check that accessing the first srcSize bytes of dest will not cause a
+  // trap.  Otherwise the transform is invalid since it might cause a trap
+  // to occur earlier than it otherwise would.
+  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
+    // The destination is an alloca.  Check it is larger than srcSize.
+    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+    if (!destArraySize)
+      return false;
+
+    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
+      destArraySize->getZExtValue();
+
+    if (destSize < srcSize)
+      return false;
+  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
+    // If the destination is an sret parameter then only accesses that are
+    // outside of the returned struct type can trap.
+    if (!A->hasStructRetAttr())
+      return false;
+
+    const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+    uint64_t destSize = TD->getTypeAllocSize(StructTy);
+
+    if (destSize < srcSize)
+      return false;
+  } else {
+    return false;
+  }
+
+  // Check that src is not accessed except via the call and the memcpy.  This
+  // guarantees that it holds only undefined values when passed in (so the final
+  // memcpy can be dropped), that it is not read or written between the call and
+  // the memcpy, and that writing beyond the end of it is undefined.
+  SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
+                                   srcAlloca->use_end());
+  while (!srcUseList.empty()) {
+    User *UI = srcUseList.pop_back_val();
+
+    if (isa<BitCastInst>(UI)) {
+      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+           I != E; ++I)
+        srcUseList.push_back(*I);
+    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
+      if (G->hasAllZeroIndices())
+        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+             I != E; ++I)
+          srcUseList.push_back(*I);
+      else
+        return false;
+    } else if (UI != C && UI != cpy) {
+      return false;
+    }
+  }
+
+  // Since we're changing the parameter to the callsite, we need to make sure
+  // that what would be the new parameter dominates the callsite.
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
+    if (!DT.dominates(cpyDestInst, C))
+      return false;
+
+  // In addition to knowing that the call does not access src in some
+  // unexpected manner, for example via a global, which we deduce from
+  // the use analysis, we also need to know that it does not sneakily
+  // access dest.  We rely on AA to figure this out for us.
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
+    return false;
+
+  // All the checks have passed, so do the transformation.
+  bool changedArgument = false;
+  for (unsigned i = 0; i < CS.arg_size(); ++i)
+    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
+      if (cpySrc->getType() != cpyDest->getType())
+        cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
+                                              cpyDest->getName(), C);
+      changedArgument = true;
+      if (CS.getArgument(i)->getType() == cpyDest->getType())
+        CS.setArgument(i, cpyDest);
+      else
+        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
+                          CS.getArgument(i)->getType(), cpyDest->getName(), C));
+    }
+
+  if (!changedArgument)
+    return false;
+
+  // Drop any cached information about the call, because we may have changed
+  // its dependence information by changing its parameter.
+  MD->removeInstruction(C);
+
+  // Remove the memcpy.
+  MD->removeInstruction(cpy);
+  ++NumMemCpyInstr;
+
+  return true;
+}
+
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'.  Try to simplify M to
+/// copy from MDep's input if we can.  MSize is the size of M's copy.
+/// 
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+                                              uint64_t MSize) {
+  // We can only transforms memcpy's where the dest of one is the source of the
+  // other.
+  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
+    return false;
+  
+  // If dep instruction is reading from our current input, then it is a noop
+  // transfer and substituting the input won't change this instruction.  Just
+  // ignore the input and let someone else zap MDep.  This handles cases like:
+  //    memcpy(a <- a)
+  //    memcpy(b <- a)
+  if (M->getSource() == MDep->getSource())
+    return false;
+  
+  // Second, the length of the memcpy's must be the same, or the preceding one
+  // must be larger than the following one.
+  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+    return false;
+  
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // Verify that the copied-from memory doesn't change in between the two
+  // transfers.  For example, in:
+  //    memcpy(a <- b)
+  //    *b = 42;
+  //    memcpy(c <- a)
+  // It would be invalid to transform the second memcpy into memcpy(c <- b).
+  //
+  // TODO: If the code between M and MDep is transparent to the destination "c",
+  // then we could still perform the xform by moving M up to the first memcpy.
+  //
+  // NOTE: This is conservative, it will stop on any read from the source loc,
+  // not just the defining memcpy.
+  MemDepResult SourceDep =
+    MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+                                 false, M, M->getParent());
+  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+    return false;
+  
+  // If the dest of the second might alias the source of the first, then the
+  // source and dest might overlap.  We still want to eliminate the intermediate
+  // value, but we have to generate a memmove instead of memcpy.
+  bool UseMemMove = false;
+  if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+    UseMemMove = true;
+  
+  // If all checks passed, then we can transform M.
+  
+  // Make sure to use the lesser of the alignment of the source and the dest
+  // since we're changing where we're reading from, but don't want to increase
+  // the alignment past what can be read from or written to.
+  // TODO: Is this worth it if we're creating a less aligned memcpy? For
+  // example we could be moving from movaps -> movq on x86.
+  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+  
+  IRBuilder<> Builder(M);
+  if (UseMemMove)
+    Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+                          Align, M->isVolatile());
+  else
+    Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+                         Align, M->isVolatile());
+
+  // Remove the instruction we're replacing.
+  MD->removeInstruction(M);
+  M->eraseFromParent();
+  ++NumMemCpyInstr;
+  return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+  // We can only optimize statically-sized memcpy's that are non-volatile.
+  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+  if (CopySize == 0 || M->isVolatile()) return false;
+
+  // If the source and destination of the memcpy are the same, then zap it.
+  if (M->getSource() == M->getDest()) {
+    MD->removeInstruction(M);
+    M->eraseFromParent();
+    return false;
+  }
+
+  // If copying from a constant, try to turn the memcpy into a memset.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+    if (GV->isConstant() && GV->hasDefinitiveInitializer())
+      if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+        IRBuilder<> Builder(M);
+        Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+                             M->getAlignment(), false);
+        MD->removeInstruction(M);
+        M->eraseFromParent();
+        ++NumCpyToSet;
+        return true;
+      }
+
+  // The are two possible optimizations we can do for memcpy:
+  //   a) memcpy-memcpy xform which exposes redundance for DSE.
+  //   b) call-memcpy xform for return slot optimization.
+  MemDepResult DepInfo = MD->getDependency(M);
+  if (!DepInfo.isClobber())
+    return false;
+  
+  if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+    return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+    
+  if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+    if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+                             CopySize->getZExtValue(), C)) {
+      MD->removeInstruction(M);
+      M->eraseFromParent();
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
+/// are guaranteed not to alias.
+bool MemCpyOpt::processMemMove(MemMoveInst *M) {
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  if (!TLI->has(LibFunc::memmove))
+    return false;
+  
+  // See if the pointers alias.
+  if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
+    return false;
+  
+  DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
+  
+  // If not, then we know we can transform this.
+  Module *Mod = M->getParent()->getParent()->getParent();
+  Type *ArgTys[3] = { M->getRawDest()->getType(),
+                      M->getRawSource()->getType(),
+                      M->getLength()->getType() };
+  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+                                                 ArgTys));
+
+  // MemDep may have over conservative information about this instruction, just
+  // conservatively flush it from the cache.
+  MD->removeInstruction(M);
+
+  ++NumMoveToCpy;
+  return true;
+}
+  
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+  if (TD == 0) return false;
+
+  // Find out what feeds this byval argument.
+  Value *ByValArg = CS.getArgument(ArgNo);
+  const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+  uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+  MemDepResult DepInfo =
+    MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+                                 true, CS.getInstruction(),
+                                 CS.getInstruction()->getParent());
+  if (!DepInfo.isClobber())
+    return false;
+
+  // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
+  // a memcpy, see if we can byval from the source of the memcpy instead of the
+  // result.
+  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+  if (MDep == 0 || MDep->isVolatile() ||
+      ByValArg->stripPointerCasts() != MDep->getDest())
+    return false;
+  
+  // The length of the memcpy must be larger or equal to the size of the byval.
+  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+  if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+    return false;
+
+  // Get the alignment of the byval.  If the call doesn't specify the alignment,
+  // then it is some target specific value that we can't know.
+  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+  if (ByValAlign == 0) return false;
+  
+  // If it is greater than the memcpy, then we check to see if we can force the
+  // source of the memcpy to the alignment we need.  If we fail, we bail out.
+  if (MDep->getAlignment() < ByValAlign &&
+      getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
+    return false;
+  
+  // Verify that the copied-from memory doesn't change in between the memcpy and
+  // the byval call.
+  //    memcpy(a <- b)
+  //    *b = 42;
+  //    foo(*a)
+  // It would be invalid to transform the second memcpy into foo(*b).
+  //
+  // NOTE: This is conservative, it will stop on any read from the source loc,
+  // not just the defining memcpy.
+  MemDepResult SourceDep =
+    MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+                                 false, CS.getInstruction(), MDep->getParent());
+  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+    return false;
+  
+  Value *TmpCast = MDep->getSource();
+  if (MDep->getSource()->getType() != ByValArg->getType())
+    TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+                              "tmpcast", CS.getInstruction());
+  
+  DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+               << "  " << *MDep << "\n"
+               << "  " << *CS.getInstruction() << "\n");
+  
+  // Otherwise we're good!  Update the byval argument.
+  CS.setArgument(ArgNo, TmpCast);
+  ++NumMemCpyInstr;
+  return true;
+}
+
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+bool MemCpyOpt::iterateOnFunction(Function &F) {
+  bool MadeChange = false;
+
+  // Walk all instruction in the function.
+  for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+      // Avoid invalidating the iterator.
+      Instruction *I = BI++;
+      
+      bool RepeatInstruction = false;
+      
+      if (StoreInst *SI = dyn_cast<StoreInst>(I))
+        MadeChange |= processStore(SI, BI);
+      else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+        RepeatInstruction = processMemSet(M, BI);
+      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+        RepeatInstruction = processMemCpy(M);
+      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+        RepeatInstruction = processMemMove(M);
+      else if (CallSite CS = (Value*)I) {
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.paramHasAttr(i+1, Attribute::ByVal))
+            MadeChange |= processByValArgument(CS, i);
+      }
+
+      // Reprocess the instruction if desired.
+      if (RepeatInstruction) {
+        if (BI != BB->begin()) --BI;
+        MadeChange = true;
+      }
+    }
+  }
+  
+  return MadeChange;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function &F) {
+  bool MadeChange = false;
+  MD = &getAnalysis<MemoryDependenceAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+  
+  // If we don't have at least memset and memcpy, there is little point of doing
+  // anything here.  These are required by a freestanding implementation, so if
+  // even they are disabled, there is no point in trying hard.
+  if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+    return false;
+  
+  while (1) {
+    if (!iterateOnFunction(F))
+      break;
+    MadeChange = true;
+  }
+  
+  MD = 0;
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
new file mode 100644
index 000000000000..ee132d3be4f5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
@@ -0,0 +1,3595 @@
+//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines ObjC ARC optimizations. ARC stands for
+// Automatic Reference Counting and is a system for managing reference counts
+// for objects in Objective C.
+//
+// The optimizations performed include elimination of redundant, partially
+// redundant, and inconsequential reference count operations, elimination of
+// redundant weak pointer operations, pattern-matching and replacement of
+// low-level operations into higher-level operations, and numerous minor
+// simplifications.
+//
+// This file also defines a simple ARC-aware AliasAnalysis.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowedge of their semantics.
+//
+// WARNING: This file knows about how certain Objective-C library functions are
+// used. Naive LLVM IR transformations which would otherwise be
+// behavior-preserving may break these assumptions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// A handy option to enable/disable all optimizations in this file.
+static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+// Misc. Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// MapVector - An associative container with fast insertion-order
+  /// (deterministic) iteration over its elements. Plus the special
+  /// blot operation.
+  template<class KeyT, class ValueT>
+  class MapVector {
+    /// Map - Map keys to indices in Vector.
+    typedef DenseMap<KeyT, size_t> MapTy;
+    MapTy Map;
+
+    /// Vector - Keys and values.
+    typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+    VectorTy Vector;
+
+  public:
+    typedef typename VectorTy::iterator iterator;
+    typedef typename VectorTy::const_iterator const_iterator;
+    iterator begin() { return Vector.begin(); }
+    iterator end() { return Vector.end(); }
+    const_iterator begin() const { return Vector.begin(); }
+    const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+    ~MapVector() {
+      assert(Vector.size() >= Map.size()); // May differ due to blotting.
+      for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+           I != E; ++I) {
+        assert(I->second < Vector.size());
+        assert(Vector[I->second].first == I->first);
+      }
+      for (typename VectorTy::const_iterator I = Vector.begin(),
+           E = Vector.end(); I != E; ++I)
+        assert(!I->first ||
+               (Map.count(I->first) &&
+                Map[I->first] == size_t(I - Vector.begin())));
+    }
+#endif
+
+    ValueT &operator[](KeyT Arg) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(Arg, size_t(0)));
+      if (Pair.second) {
+        Pair.first->second = Vector.size();
+        Vector.push_back(std::make_pair(Arg, ValueT()));
+        return Vector.back().second;
+      }
+      return Vector[Pair.first->second].second;
+    }
+
+    std::pair<iterator, bool>
+    insert(const std::pair<KeyT, ValueT> &InsertPair) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+      if (Pair.second) {
+        Pair.first->second = Vector.size();
+        Vector.push_back(InsertPair);
+        return std::make_pair(llvm::prior(Vector.end()), true);
+      }
+      return std::make_pair(Vector.begin() + Pair.first->second, false);
+    }
+
+    const_iterator find(KeyT Key) const {
+      typename MapTy::const_iterator It = Map.find(Key);
+      if (It == Map.end()) return Vector.end();
+      return Vector.begin() + It->second;
+    }
+
+    /// blot - This is similar to erase, but instead of removing the element
+    /// from the vector, it just zeros out the key in the vector. This leaves
+    /// iterators intact, but clients must be prepared for zeroed-out keys when
+    /// iterating.
+    void blot(KeyT Key) {
+      typename MapTy::iterator It = Map.find(Key);
+      if (It == Map.end()) return;
+      Vector[It->second].first = KeyT();
+      Map.erase(It);
+    }
+
+    void clear() {
+      Map.clear();
+      Vector.clear();
+    }
+  };
+}
+
+//===----------------------------------------------------------------------===//
+// ARC Utilities.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// InstructionClass - A simple classification for instructions.
+  enum InstructionClass {
+    IC_Retain,              ///< objc_retain
+    IC_RetainRV,            ///< objc_retainAutoreleasedReturnValue
+    IC_RetainBlock,         ///< objc_retainBlock
+    IC_Release,             ///< objc_release
+    IC_Autorelease,         ///< objc_autorelease
+    IC_AutoreleaseRV,       ///< objc_autoreleaseReturnValue
+    IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+    IC_AutoreleasepoolPop,  ///< objc_autoreleasePoolPop
+    IC_NoopCast,            ///< objc_retainedObject, etc.
+    IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+    IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+    IC_LoadWeakRetained,    ///< objc_loadWeakRetained (primitive)
+    IC_StoreWeak,           ///< objc_storeWeak (primitive)
+    IC_InitWeak,            ///< objc_initWeak (derived)
+    IC_LoadWeak,            ///< objc_loadWeak (derived)
+    IC_MoveWeak,            ///< objc_moveWeak (derived)
+    IC_CopyWeak,            ///< objc_copyWeak (derived)
+    IC_DestroyWeak,         ///< objc_destroyWeak (derived)
+    IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
+    IC_Call,                ///< could call objc_release
+    IC_User,                ///< could "use" a pointer
+    IC_None                 ///< anything else
+  };
+}
+
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer.
+static bool IsPotentialUse(const Value *Op) {
+  // Pointers to static or stack storage are not reference-counted pointers.
+  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+    return false;
+  // Special arguments are not reference-counted.
+  if (const Argument *Arg = dyn_cast<Argument>(Op))
+    if (Arg->hasByValAttr() ||
+        Arg->hasNestAttr() ||
+        Arg->hasStructRetAttr())
+      return false;
+  // Only consider values with pointer types, and not function pointers.
+  const PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+  if (!Ty || isa<FunctionType>(Ty->getElementType()))
+    return false;
+  // Conservatively assume anything else is a potential use.
+  return true;
+}
+
+/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
+/// of construct CS is.
+static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I)
+    if (IsPotentialUse(*I))
+      return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+  return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// GetFunctionClass - Determine if F is one of the special known Functions.
+/// If it isn't, return IC_CallOrUser.
+static InstructionClass GetFunctionClass(const Function *F) {
+  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+  // No arguments.
+  if (AI == AE)
+    return StringSwitch<InstructionClass>(F->getName())
+      .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
+      .Default(IC_CallOrUser);
+
+  // One argument.
+  const Argument *A0 = AI++;
+  if (AI == AE)
+    // Argument is a pointer.
+    if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+      const Type *ETy = PTy->getElementType();
+      // Argument is i8*.
+      if (ETy->isIntegerTy(8))
+        return StringSwitch<InstructionClass>(F->getName())
+          .Case("objc_retain",                IC_Retain)
+          .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+          .Case("objc_retainBlock",           IC_RetainBlock)
+          .Case("objc_release",               IC_Release)
+          .Case("objc_autorelease",           IC_Autorelease)
+          .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+          .Case("objc_autoreleasePoolPop",    IC_AutoreleasepoolPop)
+          .Case("objc_retainedObject",        IC_NoopCast)
+          .Case("objc_unretainedObject",      IC_NoopCast)
+          .Case("objc_unretainedPointer",     IC_NoopCast)
+          .Case("objc_retain_autorelease",    IC_FusedRetainAutorelease)
+          .Case("objc_retainAutorelease",     IC_FusedRetainAutorelease)
+          .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+          .Default(IC_CallOrUser);
+
+      // Argument is i8**
+      if (const PointerType *Pte = dyn_cast<PointerType>(ETy))
+        if (Pte->getElementType()->isIntegerTy(8))
+          return StringSwitch<InstructionClass>(F->getName())
+            .Case("objc_loadWeakRetained",      IC_LoadWeakRetained)
+            .Case("objc_loadWeak",              IC_LoadWeak)
+            .Case("objc_destroyWeak",           IC_DestroyWeak)
+            .Default(IC_CallOrUser);
+    }
+
+  // Two arguments, first is i8**.
+  const Argument *A1 = AI++;
+  if (AI == AE)
+    if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+      if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+        if (Pte->getElementType()->isIntegerTy(8))
+          if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+            const Type *ETy1 = PTy1->getElementType();
+            // Second argument is i8*
+            if (ETy1->isIntegerTy(8))
+              return StringSwitch<InstructionClass>(F->getName())
+                     .Case("objc_storeWeak",             IC_StoreWeak)
+                     .Case("objc_initWeak",              IC_InitWeak)
+                     .Default(IC_CallOrUser);
+            // Second argument is i8**.
+            if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+              if (Pte1->getElementType()->isIntegerTy(8))
+                return StringSwitch<InstructionClass>(F->getName())
+                       .Case("objc_moveWeak",              IC_MoveWeak)
+                       .Case("objc_copyWeak",              IC_CopyWeak)
+                       .Default(IC_CallOrUser);
+          }
+
+  // Anything else.
+  return IC_CallOrUser;
+}
+
+/// GetInstructionClass - Determine what kind of construct V is.
+static InstructionClass GetInstructionClass(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Any instruction other than bitcast and gep with a pointer operand have a
+    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+    // to a subsequent use, rather than using it themselves, in this sense.
+    // As a short cut, several other opcodes are known to have no pointer
+    // operands of interest. And ret is never followed by a release, so it's
+    // not interesting to examine.
+    switch (I->getOpcode()) {
+    case Instruction::Call: {
+      const CallInst *CI = cast<CallInst>(I);
+      // Check for calls to special functions.
+      if (const Function *F = CI->getCalledFunction()) {
+        InstructionClass Class = GetFunctionClass(F);
+        if (Class != IC_CallOrUser)
+          return Class;
+
+        // None of the intrinsic functions do objc_release. For intrinsics, the
+        // only question is whether or not they may be users.
+        switch (F->getIntrinsicID()) {
+        case 0: break;
+        case Intrinsic::bswap: case Intrinsic::ctpop:
+        case Intrinsic::ctlz: case Intrinsic::cttz:
+        case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+        case Intrinsic::stacksave: case Intrinsic::stackrestore:
+        case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+        // Don't let dbg info affect our results.
+        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+          // Short cut: Some intrinsics obviously don't use ObjC pointers.
+          return IC_None;
+        default:
+          for (Function::const_arg_iterator AI = F->arg_begin(),
+               AE = F->arg_end(); AI != AE; ++AI)
+            if (IsPotentialUse(AI))
+              return IC_User;
+          return IC_None;
+        }
+      }
+      return GetCallSiteClass(CI);
+    }
+    case Instruction::Invoke:
+      return GetCallSiteClass(cast<InvokeInst>(I));
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::Select: case Instruction::PHI:
+    case Instruction::Ret: case Instruction::Br:
+    case Instruction::Switch: case Instruction::IndirectBr:
+    case Instruction::Alloca: case Instruction::VAArg:
+    case Instruction::Add: case Instruction::FAdd:
+    case Instruction::Sub: case Instruction::FSub:
+    case Instruction::Mul: case Instruction::FMul:
+    case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+    case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+    case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+    case Instruction::And: case Instruction::Or: case Instruction::Xor:
+    case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+    case Instruction::IntToPtr: case Instruction::FCmp:
+    case Instruction::FPTrunc: case Instruction::FPExt:
+    case Instruction::FPToUI: case Instruction::FPToSI:
+    case Instruction::UIToFP: case Instruction::SIToFP:
+    case Instruction::InsertElement: case Instruction::ExtractElement:
+    case Instruction::ShuffleVector:
+    case Instruction::ExtractValue:
+      break;
+    case Instruction::ICmp:
+      // Comparing a pointer with null, or any other constant, isn't an
+      // interesting use, because we don't care what the pointer points to, or
+      // about the values of any other dynamic reference-counted pointers.
+      if (IsPotentialUse(I->getOperand(1)))
+        return IC_User;
+      break;
+    default:
+      // For anything else, check all the operands.
+      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+           OI != OE; ++OI)
+        if (IsPotentialUse(*OI))
+          return IC_User;
+    }
+  }
+
+  // Otherwise, it's totally inert for ARC purposes.
+  return IC_None;
+}
+
+/// GetBasicInstructionClass - Determine what kind of construct V is. This is
+/// similar to GetInstructionClass except that it only detects objc runtine
+/// calls. This allows it to be faster.
+static InstructionClass GetBasicInstructionClass(const Value *V) {
+  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+    if (const Function *F = CI->getCalledFunction())
+      return GetFunctionClass(F);
+    // Otherwise, be conservative.
+    return IC_CallOrUser;
+  }
+
+  // Otherwise, be conservative.
+  return IC_User;
+}
+
+/// IsRetain - Test if the the given class is objc_retain or
+/// equivalent.
+static bool IsRetain(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV;
+}
+
+/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// equivalent.
+static bool IsAutorelease(InstructionClass Class) {
+  return Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// IsForwarding - Test if the given class represents instructions which return
+/// their argument verbatim.
+static bool IsForwarding(InstructionClass Class) {
+  // objc_retainBlock technically doesn't always return its argument
+  // verbatim, but it doesn't matter for our purposes here.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_RetainBlock ||
+         Class == IC_NoopCast;
+}
+
+/// IsNoopOnNull - Test if the given class represents instructions which do
+/// nothing if passed a null pointer.
+static bool IsNoopOnNull(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_RetainBlock;
+}
+
+/// IsAlwaysTail - Test if the given class represents instructions which are
+/// always safe to mark with the "tail" keyword.
+static bool IsAlwaysTail(InstructionClass Class) {
+  // IC_RetainBlock may be given a stack argument.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// IsNoThrow - Test if the given class represents instructions which are always
+/// safe to mark with the nounwind attribute..
+static bool IsNoThrow(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_RetainBlock ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_AutoreleasepoolPush ||
+         Class == IC_AutoreleasepoolPop;
+}
+
+/// EraseInstruction - Erase the given instruction. ObjC calls return their
+/// argument verbatim, so if it's such a call and the return value has users,
+/// replace them with the argument value.
+static void EraseInstruction(Instruction *CI) {
+  Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+  bool Unused = CI->use_empty();
+
+  if (!Unused) {
+    // Replace the return value with the argument.
+    assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+           "Can't delete non-forwarding instruction with users!");
+    CI->replaceAllUsesWith(OldArg);
+  }
+
+  CI->eraseFromParent();
+
+  if (Unused)
+    RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
+/// also knows how to look through objc_retain and objc_autorelease calls, which
+/// we know to return their argument verbatim.
+static const Value *GetUnderlyingObjCPtr(const Value *V) {
+  for (;;) {
+    V = GetUnderlyingObject(V);
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+
+  return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static Value *StripPointerCastsAndObjCCalls(Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// GetObjCArg - Assuming the given instruction is one of the special calls such
+/// as objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static Value *GetObjCArg(Value *Inst) {
+  return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
+/// isObjCIdentifiedObject, except that it uses special knowledge of
+/// ObjC conventions...
+static bool IsObjCIdentifiedObject(const Value *V) {
+  // Assume that call results and arguments have their own "provenance".
+  // Constants (including GlobalVariables) and Allocas are never
+  // reference-counted.
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+      isa<Argument>(V) || isa<Constant>(V) ||
+      isa<AllocaInst>(V))
+    return true;
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    const Value *Pointer =
+      StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+      StringRef Name = GV->getName();
+      // These special variables are known to hold values which are not
+      // reference-counted pointers.
+      if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+          Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+          Name.startswith("\01l_objc_msgSend_fixup_"))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// FindSingleUseIdentifiedObject - This is similar to
+/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
+/// with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+  if (Arg->hasOneUse()) {
+    if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+      return FindSingleUseIdentifiedObject(BC->getOperand(0));
+    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+      if (GEP->hasAllZeroIndices())
+        return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+    if (IsForwarding(GetBasicInstructionClass(Arg)))
+      return FindSingleUseIdentifiedObject(
+               cast<CallInst>(Arg)->getArgOperand(0));
+    if (!IsObjCIdentifiedObject(Arg))
+      return 0;
+    return Arg;
+  }
+
+  // If we found an identifiable object but it has multiple uses, but they
+  // are trivial uses, we can still consider this to be a single-use
+  // value.
+  if (IsObjCIdentifiedObject(Arg)) {
+    for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+         UI != UE; ++UI) {
+      const User *U = *UI;
+      if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+         return 0;
+    }
+
+    return Arg;
+  }
+
+  return 0;
+}
+
+/// ModuleHasARC - Test if the given module looks interesting to run ARC
+/// optimization on.
+static bool ModuleHasARC(const Module &M) {
+  return
+    M.getNamedValue("objc_retain") ||
+    M.getNamedValue("objc_release") ||
+    M.getNamedValue("objc_autorelease") ||
+    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+    M.getNamedValue("objc_retainBlock") ||
+    M.getNamedValue("objc_autoreleaseReturnValue") ||
+    M.getNamedValue("objc_autoreleasePoolPush") ||
+    M.getNamedValue("objc_loadWeakRetained") ||
+    M.getNamedValue("objc_loadWeak") ||
+    M.getNamedValue("objc_destroyWeak") ||
+    M.getNamedValue("objc_storeWeak") ||
+    M.getNamedValue("objc_initWeak") ||
+    M.getNamedValue("objc_moveWeak") ||
+    M.getNamedValue("objc_copyWeak") ||
+    M.getNamedValue("objc_retainedObject") ||
+    M.getNamedValue("objc_unretainedObject") ||
+    M.getNamedValue("objc_unretainedPointer");
+}
+
+//===----------------------------------------------------------------------===//
+// ARC AliasAnalysis.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+
+namespace {
+  /// ObjCARCAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses knowledge of ARC constructs to answer queries.
+  ///
+  /// TODO: This class could be generalized to know about other ObjC-specific
+  /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+  /// even though their offsets are dynamic.
+  class ObjCARCAliasAnalysis : public ImmutablePass,
+                               public AliasAnalysis {
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+      initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+                   "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+  return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+  // precise alias query.
+  const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+  const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+  AliasResult Result =
+    AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+                         Location(SB, LocB.Size, LocB.TBAATag));
+  if (Result != MayAlias)
+    return Result;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *UA = GetUnderlyingObjCPtr(SA);
+  const Value *UB = GetUnderlyingObjCPtr(SB);
+  if (UA != SA || UB != SB) {
+    Result = AliasAnalysis::alias(Location(UA), Location(UB));
+    // We can't use MustAlias or PartialAlias results here because
+    // GetUnderlyingObjCPtr may return an offsetted pointer value.
+    if (Result == NoAlias)
+      return NoAlias;
+  }
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                             bool OrLocal) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making
+  // a precise alias query.
+  const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+  if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+                                            OrLocal))
+    return true;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *U = GetUnderlyingObjCPtr(S);
+  if (U != S)
+    return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  // We have nothing to do. Just chain to the next AliasAnalysis.
+  return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefBehavior(F);
+
+  switch (GetFunctionClass(F)) {
+  case IC_NoopCast:
+    return DoesNotAccessMemory;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefInfo(CS, Loc);
+
+  switch (GetBasicInstructionClass(CS.getInstruction())) {
+  case IC_Retain:
+  case IC_RetainRV:
+  case IC_RetainBlock:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_NoopCast:
+  case IC_AutoreleasepoolPush:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    // These functions don't access any memory visible to the compiler.
+    return NoModRef;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+                                    ImmutableCallSite CS2) {
+  // TODO: Theoretically we could check for dependencies between objc_* calls
+  // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+  return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
+//===----------------------------------------------------------------------===//
+// ARC expansion.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Scalar.h"
+
+namespace {
+  /// ObjCARCExpand - Early ARC transformations.
+  class ObjCARCExpand : public FunctionPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+  public:
+    static char ID;
+    ObjCARCExpand() : FunctionPass(ID) {
+      initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+                "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+  return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  bool Changed = false;
+
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+    Instruction *Inst = &*I;
+
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_Retain:
+    case IC_RetainRV:
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV:
+      // These calls return their argument verbatim, as a low-level
+      // optimization. However, this makes high-level optimizations
+      // harder. Undo any uses of this optimization that the front-end
+      // emitted here. We'll redo them in a later pass.
+      Changed = true;
+      Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
+      break;
+    default:
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// ARC optimization.
+//===----------------------------------------------------------------------===//
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+#include "llvm/GlobalAlias.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+
+STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets,        "Number of return value forwarding "
+                          "retain+autoreleaes eliminated");
+STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+
+namespace {
+  /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
+  /// uses many of the same techniques, except it uses special ObjC-specific
+  /// reasoning about pointer relationships.
+  class ProvenanceAnalysis {
+    AliasAnalysis *AA;
+
+    typedef std::pair<const Value *, const Value *> ValuePairTy;
+    typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+    CachedResultsTy CachedResults;
+
+    bool relatedCheck(const Value *A, const Value *B);
+    bool relatedSelect(const SelectInst *A, const Value *B);
+    bool relatedPHI(const PHINode *A, const Value *B);
+
+    // Do not implement.
+    void operator=(const ProvenanceAnalysis &);
+    ProvenanceAnalysis(const ProvenanceAnalysis &);
+
+  public:
+    ProvenanceAnalysis() {}
+
+    void setAA(AliasAnalysis *aa) { AA = aa; }
+
+    AliasAnalysis *getAA() const { return AA; }
+
+    bool related(const Value *A, const Value *B);
+
+    void clear() {
+      CachedResults.clear();
+    }
+  };
+}
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for relations between the values on corresponding arms.
+  if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+    if (A->getCondition() == SB->getCondition()) {
+      if (related(A->getTrueValue(), SB->getTrueValue()))
+        return true;
+      if (related(A->getFalseValue(), SB->getFalseValue()))
+        return true;
+      return false;
+    }
+
+  // Check both arms of the Select node individually.
+  if (related(A->getTrueValue(), B))
+    return true;
+  if (related(A->getFalseValue(), B))
+    return true;
+
+  // The arms both checked out.
+  return false;
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
+  // If the values are PHIs in the same block, we can do a more precise as well
+  // as efficient check: just check for relations between the values on
+  // corresponding edges.
+  if (const PHINode *PNB = dyn_cast<PHINode>(B))
+    if (PNB->getParent() == A->getParent()) {
+      for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+        if (related(A->getIncomingValue(i),
+                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+          return true;
+      return false;
+    }
+
+  // Check each unique source of the PHI node against B.
+  SmallPtrSet<const Value *, 4> UniqueSrc;
+  for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+    const Value *PV1 = A->getIncomingValue(i);
+    if (UniqueSrc.insert(PV1) && related(PV1, B))
+      return true;
+  }
+
+  // All of the arms checked out.
+  return false;
+}
+
+/// isStoredObjCPointer - Test if the value of P, or any value covered by its
+/// provenance, is ever stored within the function (not counting callees).
+static bool isStoredObjCPointer(const Value *P) {
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Worklist;
+  Worklist.push_back(P);
+  Visited.insert(P);
+  do {
+    P = Worklist.pop_back_val();
+    for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+         UI != UE; ++UI) {
+      const User *Ur = *UI;
+      if (isa<StoreInst>(Ur)) {
+        if (UI.getOperandNo() == 0)
+          // The pointer is stored.
+          return true;
+        // The pointed is stored through.
+        continue;
+      }
+      if (isa<CallInst>(Ur))
+        // The pointer is passed as an argument, ignore this.
+        continue;
+      if (isa<PtrToIntInst>(P))
+        // Assume the worst.
+        return true;
+      if (Visited.insert(Ur))
+        Worklist.push_back(Ur);
+    }
+  } while (!Worklist.empty());
+
+  // Everything checked out.
+  return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
+  // Skip past provenance pass-throughs.
+  A = GetUnderlyingObjCPtr(A);
+  B = GetUnderlyingObjCPtr(B);
+
+  // Quick check.
+  if (A == B)
+    return true;
+
+  // Ask regular AliasAnalysis, for a first approximation.
+  switch (AA->alias(A, B)) {
+  case AliasAnalysis::NoAlias:
+    return false;
+  case AliasAnalysis::MustAlias:
+  case AliasAnalysis::PartialAlias:
+    return true;
+  case AliasAnalysis::MayAlias:
+    break;
+  }
+
+  bool AIsIdentified = IsObjCIdentifiedObject(A);
+  bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+  // An ObjC-Identified object can't alias a load if it is never locally stored.
+  if (AIsIdentified) {
+    if (BIsIdentified) {
+      // If both pointers have provenance, they can be directly compared.
+      if (A != B)
+        return false;
+    } else {
+      if (isa<LoadInst>(B))
+        return isStoredObjCPointer(A);
+    }
+  } else {
+    if (BIsIdentified && isa<LoadInst>(A))
+      return isStoredObjCPointer(B);
+  }
+
+   // Special handling for PHI and Select.
+  if (const PHINode *PN = dyn_cast<PHINode>(A))
+    return relatedPHI(PN, B);
+  if (const PHINode *PN = dyn_cast<PHINode>(B))
+    return relatedPHI(PN, A);
+  if (const SelectInst *S = dyn_cast<SelectInst>(A))
+    return relatedSelect(S, B);
+  if (const SelectInst *S = dyn_cast<SelectInst>(B))
+    return relatedSelect(S, A);
+
+  // Conservative.
+  return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
+  // Begin by inserting a conservative value into the map. If the insertion
+  // fails, we have the answer already. If it succeeds, leave it there until we
+  // compute the real answer to guard against recursive queries.
+  if (A > B) std::swap(A, B);
+  std::pair<CachedResultsTy::iterator, bool> Pair =
+    CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  bool Result = relatedCheck(A, B);
+  CachedResults[ValuePairTy(A, B)] = Result;
+  return Result;
+}
+
+namespace {
+  // Sequence - A sequence of states that a pointer may go through in which an
+  // objc_retain and objc_release are actually needed.
+  enum Sequence {
+    S_None,
+    S_Retain,         ///< objc_retain(x)
+    S_CanRelease,     ///< foo(x) -- x could possibly see a ref count decrement
+    S_Use,            ///< any use of x
+    S_Stop,           ///< like S_Release, but code motion is stopped
+    S_Release,        ///< objc_release(x)
+    S_MovableRelease  ///< objc_release(x), !clang.imprecise_release
+  };
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+  // The easy cases.
+  if (A == B)
+    return A;
+  if (A == S_None || B == S_None)
+    return S_None;
+
+  // Note that we can't merge S_CanRelease and S_Use.
+  if (A > B) std::swap(A, B);
+  if (TopDown) {
+    // Choose the side which is further along in the sequence.
+    if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+      return B;
+  } else {
+    // Choose the side which is further along in the sequence.
+    if ((A == S_Use || A == S_CanRelease) &&
+        (B == S_Release || B == S_Stop || B == S_MovableRelease))
+      return A;
+    // If both sides are releases, choose the more conservative one.
+    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+      return A;
+    if (A == S_Release && B == S_MovableRelease)
+      return A;
+  }
+
+  return S_None;
+}
+
+namespace {
+  /// RRInfo - Unidirectional information about either a
+  /// retain-decrement-use-release sequence or release-use-decrement-retain
+  /// reverese sequence.
+  struct RRInfo {
+    /// KnownIncremented - After an objc_retain, the reference count of the
+    /// referenced object is known to be positive. Similarly, before an
+    /// objc_release, the reference count of the referenced object is known to
+    /// be positive. If there are retain-release pairs in code regions where the
+    /// retain count is known to be positive, they can be eliminated, regardless
+    /// of any side effects between them.
+    bool KnownIncremented;
+
+    /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
+    /// opposed to objc_retain calls).
+    bool IsRetainBlock;
+
+    /// IsTailCallRelease - True of the objc_release calls are all marked
+    /// with the "tail" keyword.
+    bool IsTailCallRelease;
+
+    /// ReleaseMetadata - If the Calls are objc_release calls and they all have
+    /// a clang.imprecise_release tag, this is the metadata tag.
+    MDNode *ReleaseMetadata;
+
+    /// Calls - For a top-down sequence, the set of objc_retains or
+    /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+    SmallPtrSet<Instruction *, 2> Calls;
+
+    /// ReverseInsertPts - The set of optimal insert positions for
+    /// moving calls in the opposite sequence.
+    SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+    RRInfo() :
+      KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+      ReleaseMetadata(0) {}
+
+    void clear();
+  };
+}
+
+void RRInfo::clear() {
+  KnownIncremented = false;
+  IsRetainBlock = false;
+  IsTailCallRelease = false;
+  ReleaseMetadata = 0;
+  Calls.clear();
+  ReverseInsertPts.clear();
+}
+
+namespace {
+  /// PtrState - This class summarizes several per-pointer runtime properties
+  /// which are propogated through the flow graph.
+  class PtrState {
+    /// RefCount - The known minimum number of reference count increments.
+    unsigned RefCount;
+
+    /// Seq - The current position in the sequence.
+    Sequence Seq;
+
+  public:
+    /// RRI - Unidirectional information about the current sequence.
+    /// TODO: Encapsulate this better.
+    RRInfo RRI;
+
+    PtrState() : RefCount(0), Seq(S_None) {}
+
+    void IncrementRefCount() {
+      if (RefCount != UINT_MAX) ++RefCount;
+    }
+
+    void DecrementRefCount() {
+      if (RefCount != 0) --RefCount;
+    }
+
+    void ClearRefCount() {
+      RefCount = 0;
+    }
+
+    bool IsKnownIncremented() const {
+      return RefCount > 0;
+    }
+
+    void SetSeq(Sequence NewSeq) {
+      Seq = NewSeq;
+    }
+
+    void SetSeqToRelease(MDNode *M) {
+      if (Seq == S_None || Seq == S_Use) {
+        Seq = M ? S_MovableRelease : S_Release;
+        RRI.ReleaseMetadata = M;
+      } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
+        Seq = S_Release;
+        RRI.ReleaseMetadata = 0;
+      }
+    }
+
+    Sequence GetSeq() const {
+      return Seq;
+    }
+
+    void ClearSequenceProgress() {
+      Seq = S_None;
+      RRI.clear();
+    }
+
+    void Merge(const PtrState &Other, bool TopDown);
+  };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+  RefCount = std::min(RefCount, Other.RefCount);
+
+  // We can't merge a plain objc_retain with an objc_retainBlock.
+  if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
+    Seq = S_None;
+
+  if (Seq == S_None) {
+    RRI.clear();
+  } else {
+    // Conservatively merge the ReleaseMetadata information.
+    if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+      RRI.ReleaseMetadata = 0;
+
+    RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+    RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
+    RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+    RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
+                                Other.RRI.ReverseInsertPts.end());
+  }
+}
+
+namespace {
+  /// BBState - Per-BasicBlock state.
+  class BBState {
+    /// TopDownPathCount - The number of unique control paths from the entry
+    /// which can reach this block.
+    unsigned TopDownPathCount;
+
+    /// BottomUpPathCount - The number of unique control paths to exits
+    /// from this block.
+    unsigned BottomUpPathCount;
+
+    /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
+    typedef MapVector<const Value *, PtrState> MapTy;
+
+    /// PerPtrTopDown - The top-down traversal uses this to record information
+    /// known about a pointer at the bottom of each block.
+    MapTy PerPtrTopDown;
+
+    /// PerPtrBottomUp - The bottom-up traversal uses this to record information
+    /// known about a pointer at the top of each block.
+    MapTy PerPtrBottomUp;
+
+  public:
+    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+    typedef MapTy::iterator ptr_iterator;
+    typedef MapTy::const_iterator ptr_const_iterator;
+
+    ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+    ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+    ptr_const_iterator top_down_ptr_begin() const {
+      return PerPtrTopDown.begin();
+    }
+    ptr_const_iterator top_down_ptr_end() const {
+      return PerPtrTopDown.end();
+    }
+
+    ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+    ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+    ptr_const_iterator bottom_up_ptr_begin() const {
+      return PerPtrBottomUp.begin();
+    }
+    ptr_const_iterator bottom_up_ptr_end() const {
+      return PerPtrBottomUp.end();
+    }
+
+    /// SetAsEntry - Mark this block as being an entry block, which has one
+    /// path from the entry by definition.
+    void SetAsEntry() { TopDownPathCount = 1; }
+
+    /// SetAsExit - Mark this block as being an exit block, which has one
+    /// path to an exit by definition.
+    void SetAsExit()  { BottomUpPathCount = 1; }
+
+    PtrState &getPtrTopDownState(const Value *Arg) {
+      return PerPtrTopDown[Arg];
+    }
+
+    PtrState &getPtrBottomUpState(const Value *Arg) {
+      return PerPtrBottomUp[Arg];
+    }
+
+    void clearBottomUpPointers() {
+      PerPtrTopDown.clear();
+    }
+
+    void clearTopDownPointers() {
+      PerPtrTopDown.clear();
+    }
+
+    void InitFromPred(const BBState &Other);
+    void InitFromSucc(const BBState &Other);
+    void MergePred(const BBState &Other);
+    void MergeSucc(const BBState &Other);
+
+    /// GetAllPathCount - Return the number of possible unique paths from an
+    /// entry to an exit which pass through this block. This is only valid
+    /// after both the top-down and bottom-up traversals are complete.
+    unsigned GetAllPathCount() const {
+      return TopDownPathCount * BottomUpPathCount;
+    }
+  };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+  PerPtrTopDown = Other.PerPtrTopDown;
+  TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+  PerPtrBottomUp = Other.PerPtrBottomUp;
+  BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// MergePred - The top-down traversal uses this to merge information about
+/// predecessors to form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+  // Other.TopDownPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  TopDownPathCount += Other.TopDownPathCount;
+
+  // For each entry in the other set, if our set has an entry with the same key,
+  // merge the entries. Otherwise, copy the entry and merge it with an empty
+  // entry.
+  for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+       ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/true);
+  }
+
+  // For each entry in our set, if the other set doens't have an entry with the
+  // same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = top_down_ptr_begin(),
+       ME = top_down_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// MergeSucc - The bottom-up traversal uses this to merge information about
+/// successors to form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+  // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  BottomUpPathCount += Other.BottomUpPathCount;
+
+  // For each entry in the other set, if our set has an entry with the
+  // same key, merge the entries. Otherwise, copy the entry and merge
+  // it with an empty entry.
+  for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+       ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/false);
+  }
+
+  // For each entry in our set, if the other set doens't have an entry
+  // with the same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = bottom_up_ptr_begin(),
+       ME = bottom_up_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+namespace {
+  /// ObjCARCOpt - The main ARC optimization pass.
+  class ObjCARCOpt : public FunctionPass {
+    bool Changed;
+    ProvenanceAnalysis PA;
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// RetainFunc, RelaseFunc - Declarations for objc_retain,
+    /// objc_retainBlock, and objc_release.
+    Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
+
+    /// RetainRVCallee, etc. - Declarations for ObjC runtime
+    /// functions, for use in creating calls to them. These are initialized
+    /// lazily to avoid cluttering up the Module with unused declarations.
+    Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
+             *RetainCallee, *AutoreleaseCallee;
+
+    /// UsedInThisFunciton - Flags which determine whether each of the
+    /// interesting runtine functions is in fact used in the current function.
+    unsigned UsedInThisFunction;
+
+    /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
+    /// metadata.
+    unsigned ImpreciseReleaseMDKind;
+
+    Constant *getRetainRVCallee(Module *M);
+    Constant *getAutoreleaseRVCallee(Module *M);
+    Constant *getReleaseCallee(Module *M);
+    Constant *getRetainCallee(Module *M);
+    Constant *getAutoreleaseCallee(Module *M);
+
+    void OptimizeRetainCall(Function &F, Instruction *Retain);
+    bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+    void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
+    void OptimizeIndividualCalls(Function &F);
+
+    void CheckForCFGHazards(const BasicBlock *BB,
+                            DenseMap<const BasicBlock *, BBState> &BBStates,
+                            BBState &MyStates) const;
+    bool VisitBottomUp(BasicBlock *BB,
+                       DenseMap<const BasicBlock *, BBState> &BBStates,
+                       MapVector<Value *, RRInfo> &Retains);
+    bool VisitTopDown(BasicBlock *BB,
+                      DenseMap<const BasicBlock *, BBState> &BBStates,
+                      DenseMap<Value *, RRInfo> &Releases);
+    bool Visit(Function &F,
+               DenseMap<const BasicBlock *, BBState> &BBStates,
+               MapVector<Value *, RRInfo> &Retains,
+               DenseMap<Value *, RRInfo> &Releases);
+
+    void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+                   MapVector<Value *, RRInfo> &Retains,
+                   DenseMap<Value *, RRInfo> &Releases,
+                   SmallVectorImpl<Instruction *> &DeadInsts);
+
+    bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+                              MapVector<Value *, RRInfo> &Retains,
+                              DenseMap<Value *, RRInfo> &Releases);
+
+    void OptimizeWeakCalls(Function &F);
+
+    bool OptimizeSequences(Function &F);
+
+    void OptimizeReturns(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+    virtual void releaseMemory();
+
+  public:
+    static char ID;
+    ObjCARCOpt() : FunctionPass(ID) {
+      initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+                      "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+                    "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+  return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<ObjCARCAliasAnalysis>();
+  AU.addRequired<AliasAnalysis>();
+  // ARC optimization doesn't currently split critical edges.
+  AU.setPreservesCFG();
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+  if (!RetainRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+                             Attributes);
+  }
+  return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+  if (!AutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    AutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+                             Attributes);
+  }
+  return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+  if (!ReleaseCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    ReleaseCallee =
+      M->getOrInsertFunction(
+        "objc_release",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+  if (!RetainCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainCallee =
+      M->getOrInsertFunction(
+        "objc_retain",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+  if (!AutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    AutoreleaseCallee =
+      M->getOrInsertFunction(
+        "objc_autorelease",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return AutoreleaseCallee;
+}
+
+/// CanAlterRefCount - Test whether the given instruction can result in a
+/// reference count modification (positive or negative) for the pointer's
+/// object.
+static bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+                 ProvenanceAnalysis &PA, InstructionClass Class) {
+  switch (Class) {
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_User:
+    // These operations never directly modify a reference count.
+    return false;
+  default: break;
+  }
+
+  ImmutableCallSite CS = static_cast<const Value *>(Inst);
+  assert(CS && "Only calls can alter reference counts!");
+
+  // See if AliasAnalysis can help us with the call.
+  AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+  if (AliasAnalysis::onlyReadsMemory(MRB))
+    return false;
+  if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+    for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+         I != E; ++I) {
+      const Value *Op = *I;
+      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  }
+
+  // Assume the worst.
+  return true;
+}
+
+/// CanUse - Test whether the given instruction can "use" the given pointer's
+/// object in a way that requires the reference count to be positive.
+static bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+       InstructionClass Class) {
+  // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+  if (Class == IC_Call)
+    return false;
+
+  // Consider various instructions which may have pointer arguments which are
+  // not "uses".
+  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+    // Comparing a pointer with null, or any other constant, isn't really a use,
+    // because we don't care what the pointer points to, or about the values
+    // of any other dynamic reference-counted pointers.
+    if (!IsPotentialUse(ICI->getOperand(1)))
+      return false;
+  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+    // For calls, just check the arguments (and not the callee operand).
+    for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+         OE = CS.arg_end(); OI != OE; ++OI) {
+      const Value *Op = *OI;
+      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    // Special-case stores, because we don't care about the stored value, just
+    // the store address.
+    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+    // If we can't tell what the underlying object was, assume there is a
+    // dependence.
+    return IsPotentialUse(Op) && PA.related(Op, Ptr);
+  }
+
+  // Check each operand for a match.
+  for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+       OI != OE; ++OI) {
+    const Value *Op = *OI;
+    if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+      return true;
+  }
+  return false;
+}
+
+/// CanInterruptRV - Test whether the given instruction can autorelease
+/// any pointer or cause an autoreleasepool pop.
+static bool
+CanInterruptRV(InstructionClass Class) {
+  switch (Class) {
+  case IC_AutoreleasepoolPop:
+  case IC_CallOrUser:
+  case IC_Call:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    return true;
+  default:
+    return false;
+  }
+}
+
+namespace {
+  /// DependenceKind - There are several kinds of dependence-like concepts in
+  /// use here.
+  enum DependenceKind {
+    NeedsPositiveRetainCount,
+    CanChangeRetainCount,
+    RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
+    RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
+    RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
+  };
+}
+
+/// Depends - Test if there can be dependencies on Inst through Arg. This
+/// function only tests dependencies relevant for removing pairs of calls.
+static bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+        ProvenanceAnalysis &PA) {
+  // If we've reached the definition of Arg, stop.
+  if (Inst == Arg)
+    return true;
+
+  switch (Flavor) {
+  case NeedsPositiveRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanUse(Inst, Arg, PA, Class);
+    }
+  }
+
+  case CanChangeRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+      // Conservatively assume this can decrement any count.
+      return true;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanAlterRefCount(Inst, Arg, PA, Class);
+    }
+  }
+
+  case RetainAutoreleaseDep:
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_AutoreleasepoolPop:
+      // Don't merge an objc_autorelease with an objc_retain inside a different
+      // autoreleasepool scope.
+      return true;
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Nothing else matters for objc_retainAutorelease formation.
+      return false;
+    }
+    break;
+
+  case RetainAutoreleaseRVDep: {
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Anything that can autorelease interrupts
+      // retainAutoreleaseReturnValue formation.
+      return CanInterruptRV(Class);
+    }
+    break;
+  }
+
+  case RetainRVDep:
+    return CanInterruptRV(GetBasicInstructionClass(Inst));
+  }
+
+  llvm_unreachable("Invalid dependence flavor");
+  return true;
+}
+
+/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
+/// find local and non-local dependencies on Arg.
+/// TODO: Cache results?
+static void
+FindDependencies(DependenceKind Flavor,
+                 const Value *Arg,
+                 BasicBlock *StartBB, Instruction *StartInst,
+                 SmallPtrSet<Instruction *, 4> &DependingInstructions,
+                 SmallPtrSet<const BasicBlock *, 4> &Visited,
+                 ProvenanceAnalysis &PA) {
+  BasicBlock::iterator StartPos = StartInst;
+
+  SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+  Worklist.push_back(std::make_pair(StartBB, StartPos));
+  do {
+    std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+      Worklist.pop_back_val();
+    BasicBlock *LocalStartBB = Pair.first;
+    BasicBlock::iterator LocalStartPos = Pair.second;
+    BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+    for (;;) {
+      if (LocalStartPos == StartBBBegin) {
+        pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+        if (PI == PE)
+          // If we've reached the function entry, produce a null dependence.
+          DependingInstructions.insert(0);
+        else
+          // Add the predecessors to the worklist.
+          do {
+            BasicBlock *PredBB = *PI;
+            if (Visited.insert(PredBB))
+              Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+          } while (++PI != PE);
+        break;
+      }
+
+      Instruction *Inst = --LocalStartPos;
+      if (Depends(Flavor, Inst, Arg, PA)) {
+        DependingInstructions.insert(Inst);
+        break;
+      }
+    }
+  } while (!Worklist.empty());
+
+  // Determine whether the original StartBB post-dominates all of the blocks we
+  // visited. If not, insert a sentinal indicating that most optimizations are
+  // not safe.
+  for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+       E = Visited.end(); I != E; ++I) {
+    const BasicBlock *BB = *I;
+    if (BB == StartBB)
+      continue;
+    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+      const BasicBlock *Succ = *SI;
+      if (Succ != StartBB && !Visited.count(Succ)) {
+        DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
+        return;
+      }
+    }
+  }
+}
+
+static bool isNullOrUndef(const Value *V) {
+  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static bool isNoopInstruction(const Instruction *I) {
+  return isa<BitCastInst>(I) ||
+         (isa<GetElementPtrInst>(I) &&
+          cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+/// OptimizeRetainCall - Turn objc_retain into
+/// objc_retainAutoreleasedReturnValue if the operand is a return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+  CallSite CS(GetObjCArg(Retain));
+  Instruction *Call = CS.getInstruction();
+  if (!Call) return;
+  if (Call->getParent() != Retain->getParent()) return;
+
+  // Check that the call is next to the retain.
+  BasicBlock::iterator I = Call;
+  ++I;
+  while (isNoopInstruction(I)) ++I;
+  if (&*I != Retain)
+    return;
+
+  // Turn it to an objc_retainAutoreleasedReturnValue..
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+}
+
+/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
+/// objc_retain if the operand is not a return value.  Or, if it can be
+/// paired with an objc_autoreleaseReturnValue, delete the pair and
+/// return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+  // Check for the argument being from an immediately preceding call.
+  Value *Arg = GetObjCArg(RetainRV);
+  CallSite CS(Arg);
+  if (Instruction *Call = CS.getInstruction())
+    if (Call->getParent() == RetainRV->getParent()) {
+      BasicBlock::iterator I = Call;
+      ++I;
+      while (isNoopInstruction(I)) ++I;
+      if (&*I == RetainRV)
+        return false;
+    }
+
+  // Check for being preceded by an objc_autoreleaseReturnValue on the same
+  // pointer. In this case, we can delete the pair.
+  BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+  if (I != Begin) {
+    do --I; while (I != Begin && isNoopInstruction(I));
+    if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+        GetObjCArg(I) == Arg) {
+      Changed = true;
+      ++NumPeeps;
+      EraseInstruction(I);
+      EraseInstruction(RetainRV);
+      return true;
+    }
+  }
+
+  // Turn it to a plain objc_retain.
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+  return false;
+}
+
+/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
+/// objc_autorelease if the result is not used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
+  // Check for a return of the pointer value.
+  const Value *Ptr = GetObjCArg(AutoreleaseRV);
+  for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+       UI != UE; ++UI) {
+    const User *I = *UI;
+    if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+      return;
+  }
+
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(AutoreleaseRV)->
+    setCalledFunction(getAutoreleaseCallee(F.getParent()));
+}
+
+/// OptimizeIndividualCalls - Visit each call, one at a time, and make
+/// simplifications without doing any additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+  // Reset all the flags in preparation for recomputing them.
+  UsedInThisFunction = 0;
+
+  // Visit all objc_* calls in F.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+
+    switch (Class) {
+    default: break;
+
+    // Delete no-op casts. These function calls have special semantics, but
+    // the semantics are entirely implemented via lowering in the front-end,
+    // so by the time they reach the optimizer, they are just no-op calls
+    // which return their argument.
+    //
+    // There are gray areas here, as the ability to cast reference-counted
+    // pointers to raw void* and back allows code to break ARC assumptions,
+    // however these are currently considered to be unimportant.
+    case IC_NoopCast:
+      Changed = true;
+      ++NumNoops;
+      EraseInstruction(Inst);
+      continue;
+
+    // If the pointer-to-weak-pointer is null, it's undefined behavior.
+    case IC_StoreWeak:
+    case IC_LoadWeak:
+    case IC_LoadWeakRetained:
+    case IC_InitWeak:
+    case IC_DestroyWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(0))) {
+        const Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_CopyWeak:
+    case IC_MoveWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(0)) ||
+          isNullOrUndef(CI->getArgOperand(1))) {
+        const Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_Retain:
+      OptimizeRetainCall(F, Inst);
+      break;
+    case IC_RetainRV:
+      if (OptimizeRetainRVCall(F, Inst))
+        continue;
+      break;
+    case IC_AutoreleaseRV:
+      OptimizeAutoreleaseRVCall(F, Inst);
+      break;
+    }
+
+    // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+    if (IsAutorelease(Class) && Inst->use_empty()) {
+      CallInst *Call = cast<CallInst>(Inst);
+      const Value *Arg = Call->getArgOperand(0);
+      Arg = FindSingleUseIdentifiedObject(Arg);
+      if (Arg) {
+        Changed = true;
+        ++NumAutoreleases;
+
+        // Create the declaration lazily.
+        LLVMContext &C = Inst->getContext();
+        CallInst *NewCall =
+          CallInst::Create(getReleaseCallee(F.getParent()),
+                           Call->getArgOperand(0), "", Call);
+        NewCall->setMetadata(ImpreciseReleaseMDKind,
+                             MDNode::get(C, ArrayRef<Value *>()));
+        EraseInstruction(Call);
+        Inst = NewCall;
+        Class = IC_Release;
+      }
+    }
+
+    // For functions which can never be passed stack arguments, add
+    // a tail keyword.
+    if (IsAlwaysTail(Class)) {
+      Changed = true;
+      cast<CallInst>(Inst)->setTailCall();
+    }
+
+    // Set nounwind as needed.
+    if (IsNoThrow(Class)) {
+      Changed = true;
+      cast<CallInst>(Inst)->setDoesNotThrow();
+    }
+
+    if (!IsNoopOnNull(Class)) {
+      UsedInThisFunction |= 1 << Class;
+      continue;
+    }
+
+    const Value *Arg = GetObjCArg(Inst);
+
+    // ARC calls with null are no-ops. Delete them.
+    if (isNullOrUndef(Arg)) {
+      Changed = true;
+      ++NumNoops;
+      EraseInstruction(Inst);
+      continue;
+    }
+
+    // Keep track of which of retain, release, autorelease, and retain_block
+    // are actually present in this function.
+    UsedInThisFunction |= 1 << Class;
+
+    // If Arg is a PHI, and one or more incoming values to the
+    // PHI are null, and the call is control-equivalent to the PHI, and there
+    // are no relevant side effects between the PHI and the call, the call
+    // could be pushed up to just those paths with non-null incoming values.
+    // For now, don't bother splitting critical edges for this.
+    SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+    Worklist.push_back(std::make_pair(Inst, Arg));
+    do {
+      std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+      Inst = Pair.first;
+      Arg = Pair.second;
+
+      const PHINode *PN = dyn_cast<PHINode>(Arg);
+      if (!PN) continue;
+
+      // Determine if the PHI has any null operands, or any incoming
+      // critical edges.
+      bool HasNull = false;
+      bool HasCriticalEdges = false;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *Incoming =
+          StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+        if (isNullOrUndef(Incoming))
+          HasNull = true;
+        else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+                   .getNumSuccessors() != 1) {
+          HasCriticalEdges = true;
+          break;
+        }
+      }
+      // If we have null operands and no critical edges, optimize.
+      if (!HasCriticalEdges && HasNull) {
+        SmallPtrSet<Instruction *, 4> DependingInstructions;
+        SmallPtrSet<const BasicBlock *, 4> Visited;
+
+        // Check that there is nothing that cares about the reference
+        // count between the call and the phi.
+        FindDependencies(NeedsPositiveRetainCount, Arg,
+                         Inst->getParent(), Inst,
+                         DependingInstructions, Visited, PA);
+        if (DependingInstructions.size() == 1 &&
+            *DependingInstructions.begin() == PN) {
+          Changed = true;
+          ++NumPartialNoops;
+          // Clone the call into each predecessor that has a non-null value.
+          CallInst *CInst = cast<CallInst>(Inst);
+          const Type *ParamTy = CInst->getArgOperand(0)->getType();
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+            Value *Incoming =
+              StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+            if (!isNullOrUndef(Incoming)) {
+              CallInst *Clone = cast<CallInst>(CInst->clone());
+              Value *Op = PN->getIncomingValue(i);
+              Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+              if (Op->getType() != ParamTy)
+                Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+              Clone->setArgOperand(0, Op);
+              Clone->insertBefore(InsertPos);
+              Worklist.push_back(std::make_pair(Clone, Incoming));
+            }
+          }
+          // Erase the original call.
+          EraseInstruction(CInst);
+          continue;
+        }
+      }
+    } while (!Worklist.empty());
+  }
+}
+
+/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
+/// control flow, or other CFG structures where moving code across the edge
+/// would result in it being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+                               DenseMap<const BasicBlock *, BBState> &BBStates,
+                               BBState &MyStates) const {
+  // If any top-down local-use or possible-dec has a succ which is earlier in
+  // the sequence, forget it.
+  for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+       E = MyStates.top_down_ptr_end(); I != E; ++I)
+    switch (I->second.GetSeq()) {
+    default: break;
+    case S_Use: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+        switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+        case S_None:
+        case S_CanRelease:
+          MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+          SomeSuccHasSame = false;
+          break;
+        case S_Use:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+          AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+    }
+    case S_CanRelease: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+        switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+        case S_None:
+          MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+          SomeSuccHasSame = false;
+          break;
+        case S_CanRelease:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+        case S_Use:
+          AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+    }
+    }
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+                          DenseMap<const BasicBlock *, BBState> &BBStates,
+                          MapVector<Value *, RRInfo> &Retains) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each successor to compute the initial state
+  // for the current block.
+  const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+  succ_const_iterator SI(TI), SE(TI, false);
+  if (SI == SE)
+    MyStates.SetAsExit();
+  else
+    do {
+      const BasicBlock *Succ = *SI++;
+      if (Succ == BB)
+        continue;
+      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+      if (I == BBStates.end())
+        continue;
+      MyStates.InitFromSucc(I->second);
+      while (SI != SE) {
+        Succ = *SI++;
+        if (Succ != BB) {
+          I = BBStates.find(Succ);
+          if (I != BBStates.end())
+            MyStates.MergeSucc(I->second);
+        }
+      }
+      break;
+    } while (SI != SE);
+
+  // Visit all the instructions, bottom-up.
+  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+    Instruction *Inst = llvm::prior(I);
+    InstructionClass Class = GetInstructionClass(Inst);
+    const Value *Arg = 0;
+
+    switch (Class) {
+    case IC_Release: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+      // If we see two releases in a row on the same pointer. If so, make
+      // a note, and we'll cicle back to revisit it after we've
+      // hopefully eliminated the second release, which may allow us to
+      // eliminate the first release too.
+      // Theoretically we could implement removal of nested retain+release
+      // pairs by making PtrState hold a stack of states, but this is
+      // simple and avoids adding overhead for the non-nested case.
+      if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+        NestingDetected = true;
+
+      S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
+      S.RRI.clear();
+      S.RRI.KnownIncremented = S.IsKnownIncremented();
+      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      S.RRI.Calls.insert(Inst);
+
+      S.IncrementRefCount();
+      break;
+    }
+    case IC_RetainBlock:
+    case IC_Retain:
+    case IC_RetainRV: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+      S.DecrementRefCount();
+
+      switch (S.GetSeq()) {
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+      case S_Use:
+        S.RRI.ReverseInsertPts.clear();
+        // FALL THROUGH
+      case S_CanRelease:
+        // Don't do retain+release tracking for IC_RetainRV, because it's
+        // better to let it remain as the first instruction after a call.
+        if (Class != IC_RetainRV) {
+          S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+          Retains[Inst] = S.RRI;
+        }
+        S.ClearSequenceProgress();
+        break;
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+      break;
+    }
+    case IC_AutoreleasepoolPop:
+      // Conservatively, clear MyStates for all known pointers.
+      MyStates.clearBottomUpPointers();
+      continue;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      // These are irrelevant.
+      continue;
+    default:
+      break;
+    }
+
+    // Consider any other possible effects of this instruction on each
+    // pointer being tracked.
+    for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+         ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+      const Value *Ptr = MI->first;
+      if (Ptr == Arg)
+        continue; // Handled above.
+      PtrState &S = MI->second;
+      Sequence Seq = S.GetSeq();
+
+      // Check for possible retains and releases.
+      if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+        // Check for a retain (we're going bottom-up here).
+        S.DecrementRefCount();
+
+        // Check for a release.
+        if (!IsRetain(Class) && Class != IC_RetainBlock)
+          switch (Seq) {
+          case S_Use:
+            S.SetSeq(S_CanRelease);
+            continue;
+          case S_CanRelease:
+          case S_Release:
+          case S_MovableRelease:
+          case S_Stop:
+          case S_None:
+            break;
+          case S_Retain:
+            llvm_unreachable("bottom-up pointer in retain state!");
+          }
+      }
+
+      // Check for possible direct uses.
+      switch (Seq) {
+      case S_Release:
+      case S_MovableRelease:
+        if (CanUse(Inst, Ptr, PA, Class)) {
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+          S.SetSeq(S_Use);
+        } else if (Seq == S_Release &&
+                   (Class == IC_User || Class == IC_CallOrUser)) {
+          // Non-movable releases depend on any possible objc pointer use.
+          S.SetSeq(S_Stop);
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+        }
+        break;
+      case S_Stop:
+        if (CanUse(Inst, Ptr, PA, Class))
+          S.SetSeq(S_Use);
+        break;
+      case S_CanRelease:
+      case S_Use:
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+    }
+  }
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+                         DenseMap<const BasicBlock *, BBState> &BBStates,
+                         DenseMap<Value *, RRInfo> &Releases) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each predecessor to compute the initial state
+  // for the current block.
+  const_pred_iterator PI(BB), PE(BB, false);
+  if (PI == PE)
+    MyStates.SetAsEntry();
+  else
+    do {
+      const BasicBlock *Pred = *PI++;
+      if (Pred == BB)
+        continue;
+      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+      if (I == BBStates.end())
+        continue;
+      MyStates.InitFromPred(I->second);
+      while (PI != PE) {
+        Pred = *PI++;
+        if (Pred != BB) {
+          I = BBStates.find(Pred);
+          if (I != BBStates.end())
+            MyStates.MergePred(I->second);
+        }
+      }
+      break;
+    } while (PI != PE);
+
+  // Visit all the instructions, top-down.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    Instruction *Inst = I;
+    InstructionClass Class = GetInstructionClass(Inst);
+    const Value *Arg = 0;
+
+    switch (Class) {
+    case IC_RetainBlock:
+    case IC_Retain:
+    case IC_RetainRV: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+      // Don't do retain+release tracking for IC_RetainRV, because it's
+      // better to let it remain as the first instruction after a call.
+      if (Class != IC_RetainRV) {
+        // If we see two retains in a row on the same pointer. If so, make
+        // a note, and we'll cicle back to revisit it after we've
+        // hopefully eliminated the second retain, which may allow us to
+        // eliminate the first retain too.
+        // Theoretically we could implement removal of nested retain+release
+        // pairs by making PtrState hold a stack of states, but this is
+        // simple and avoids adding overhead for the non-nested case.
+        if (S.GetSeq() == S_Retain)
+          NestingDetected = true;
+
+        S.SetSeq(S_Retain);
+        S.RRI.clear();
+        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+        S.RRI.KnownIncremented = S.IsKnownIncremented();
+        S.RRI.Calls.insert(Inst);
+      }
+
+      S.IncrementRefCount();
+      break;
+    }
+    case IC_Release: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrTopDownState(Arg);
+      S.DecrementRefCount();
+
+      switch (S.GetSeq()) {
+      case S_Retain:
+      case S_CanRelease:
+        S.RRI.ReverseInsertPts.clear();
+        // FALL THROUGH
+      case S_Use:
+        S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+        S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+        Releases[Inst] = S.RRI;
+        S.ClearSequenceProgress();
+        break;
+      case S_None:
+        break;
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        llvm_unreachable("top-down pointer in release state!");
+      }
+      break;
+    }
+    case IC_AutoreleasepoolPop:
+      // Conservatively, clear MyStates for all known pointers.
+      MyStates.clearTopDownPointers();
+      continue;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      // These are irrelevant.
+      continue;
+    default:
+      break;
+    }
+
+    // Consider any other possible effects of this instruction on each
+    // pointer being tracked.
+    for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+         ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+      const Value *Ptr = MI->first;
+      if (Ptr == Arg)
+        continue; // Handled above.
+      PtrState &S = MI->second;
+      Sequence Seq = S.GetSeq();
+
+      // Check for possible releases.
+      if (!IsRetain(Class) && Class != IC_RetainBlock &&
+          CanAlterRefCount(Inst, Ptr, PA, Class)) {
+        // Check for a release.
+        S.DecrementRefCount();
+
+        // Check for a release.
+        switch (Seq) {
+        case S_Retain:
+          S.SetSeq(S_CanRelease);
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+
+          // One call can't cause a transition from S_Retain to S_CanRelease
+          // and S_CanRelease to S_Use. If we've made the first transition,
+          // we're done.
+          continue;
+        case S_Use:
+        case S_CanRelease:
+        case S_None:
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+          llvm_unreachable("top-down pointer in release state!");
+        }
+      }
+
+      // Check for possible direct uses.
+      switch (Seq) {
+      case S_CanRelease:
+        if (CanUse(Inst, Ptr, PA, Class))
+          S.SetSeq(S_Use);
+        break;
+      case S_Use:
+      case S_Retain:
+      case S_None:
+        break;
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        llvm_unreachable("top-down pointer in release state!");
+      }
+    }
+  }
+
+  CheckForCFGHazards(BB, BBStates, MyStates);
+  return NestingDetected;
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+                  DenseMap<const BasicBlock *, BBState> &BBStates,
+                  MapVector<Value *, RRInfo> &Retains,
+                  DenseMap<Value *, RRInfo> &Releases) {
+  // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+  // magically know that loops will be well behaved, i.e. they won't repeatedly
+  // call retain on a single pointer without doing a release.
+  bool BottomUpNestingDetected = false;
+  SmallVector<BasicBlock *, 8> PostOrder;
+  for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+    BasicBlock *BB = *I;
+    PostOrder.push_back(BB);
+
+    BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+  }
+
+  // Iterate through the post-order in reverse order, achieving a
+  // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
+  // class here because it works by computing its own full postorder iteration,
+  // recording the sequence, and playing it back in reverse. Since we're already
+  // doing a full iteration above, we can just record the sequence manually and
+  // avoid the cost of having ReversePostOrderTraversal compute it.
+  bool TopDownNestingDetected = false;
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
+       RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
+    TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+
+  return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+                           RRInfo &RetainsToMove,
+                           RRInfo &ReleasesToMove,
+                           MapVector<Value *, RRInfo> &Retains,
+                           DenseMap<Value *, RRInfo> &Releases,
+                           SmallVectorImpl<Instruction *> &DeadInsts) {
+  const Type *ArgTy = Arg->getType();
+  const Type *ParamTy =
+    (RetainRVFunc ? RetainRVFunc :
+     RetainFunc ? RetainFunc :
+     RetainBlockFunc)->arg_begin()->getType();
+
+  // Insert the new retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = ReleasesToMove.ReverseInsertPts.begin(),
+       PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *InsertPt = *PI;
+    Value *MyArg = ArgTy == ParamTy ? Arg :
+                   new BitCastInst(Arg, ParamTy, "", InsertPt);
+    CallInst *Call =
+      CallInst::Create(RetainsToMove.IsRetainBlock ?
+                         RetainBlockFunc : RetainFunc,
+                       MyArg, "", InsertPt);
+    Call->setDoesNotThrow();
+    if (!RetainsToMove.IsRetainBlock)
+      Call->setTailCall();
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = RetainsToMove.ReverseInsertPts.begin(),
+       PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *LastUse = *PI;
+    Instruction *InsertPts[] = { 0, 0, 0 };
+    if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
+      // We can't insert code immediately after an invoke instruction, so
+      // insert code at the beginning of both successor blocks instead.
+      // The invoke's return value isn't available in the unwind block,
+      // but our releases will never depend on it, because they must be
+      // paired with retains from before the invoke.
+      InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
+      InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+    } else {
+      // Insert code immediately after the last use.
+      InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
+    }
+
+    for (Instruction **I = InsertPts; *I; ++I) {
+      Instruction *InsertPt = *I;
+      Value *MyArg = ArgTy == ParamTy ? Arg :
+                     new BitCastInst(Arg, ParamTy, "", InsertPt);
+      CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+      // Attach a clang.imprecise_release metadata tag, if appropriate.
+      if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+        Call->setMetadata(ImpreciseReleaseMDKind, M);
+      Call->setDoesNotThrow();
+      if (ReleasesToMove.IsTailCallRelease)
+        Call->setTailCall();
+    }
+  }
+
+  // Delete the original retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = RetainsToMove.Calls.begin(),
+       AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRetain = *AI;
+    Retains.blot(OrigRetain);
+    DeadInsts.push_back(OrigRetain);
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = ReleasesToMove.Calls.begin(),
+       AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRelease = *AI;
+    Releases.erase(OrigRelease);
+    DeadInsts.push_back(OrigRelease);
+  }
+}
+
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+                                   &BBStates,
+                                 MapVector<Value *, RRInfo> &Retains,
+                                 DenseMap<Value *, RRInfo> &Releases) {
+  bool AnyPairsCompletelyEliminated = false;
+  RRInfo RetainsToMove;
+  RRInfo ReleasesToMove;
+  SmallVector<Instruction *, 4> NewRetains;
+  SmallVector<Instruction *, 4> NewReleases;
+  SmallVector<Instruction *, 8> DeadInsts;
+
+  for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+       E = Retains.end(); I != E; ) {
+    Value *V = (I++)->first;
+    if (!V) continue; // blotted
+
+    Instruction *Retain = cast<Instruction>(V);
+    Value *Arg = GetObjCArg(Retain);
+
+    // If the object being released is in static or stack storage, we know it's
+    // not being managed by ObjC reference counting, so we can delete pairs
+    // regardless of what possible decrements or uses lie between them.
+    bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+    // If a pair happens in a region where it is known that the reference count
+    // is already incremented, we can similarly ignore possible decrements.
+    bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+
+    // Connect the dots between the top-down-collected RetainsToMove and
+    // bottom-up-collected ReleasesToMove to form sets of related calls.
+    // This is an iterative process so that we connect multiple releases
+    // to multiple retains if needed.
+    unsigned OldDelta = 0;
+    unsigned NewDelta = 0;
+    unsigned OldCount = 0;
+    unsigned NewCount = 0;
+    bool FirstRelease = true;
+    bool FirstRetain = true;
+    NewRetains.push_back(Retain);
+    for (;;) {
+      for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+        Instruction *NewRetain = *NI;
+        MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+        assert(It != Retains.end());
+        const RRInfo &NewRetainRRI = It->second;
+        KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+        for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewRetainRRI.Calls.begin(),
+             LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+          Instruction *NewRetainRelease = *LI;
+          DenseMap<Value *, RRInfo>::const_iterator Jt =
+            Releases.find(NewRetainRelease);
+          if (Jt == Releases.end())
+            goto next_retain;
+          const RRInfo &NewRetainReleaseRRI = Jt->second;
+          assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+          if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+            OldDelta -=
+              BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+            // Merge the ReleaseMetadata and IsTailCallRelease values.
+            if (FirstRelease) {
+              ReleasesToMove.ReleaseMetadata =
+                NewRetainReleaseRRI.ReleaseMetadata;
+              ReleasesToMove.IsTailCallRelease =
+                NewRetainReleaseRRI.IsTailCallRelease;
+              FirstRelease = false;
+            } else {
+              if (ReleasesToMove.ReleaseMetadata !=
+                    NewRetainReleaseRRI.ReleaseMetadata)
+                ReleasesToMove.ReleaseMetadata = 0;
+              if (ReleasesToMove.IsTailCallRelease !=
+                    NewRetainReleaseRRI.IsTailCallRelease)
+                ReleasesToMove.IsTailCallRelease = false;
+            }
+
+            // Collect the optimal insertion points.
+            if (!KnownSafe)
+              for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+                   RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+                   RI != RE; ++RI) {
+                Instruction *RIP = *RI;
+                if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+                  NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+              }
+            NewReleases.push_back(NewRetainRelease);
+          }
+        }
+      }
+      NewRetains.clear();
+      if (NewReleases.empty()) break;
+
+      // Back the other way.
+      for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+        Instruction *NewRelease = *NI;
+        DenseMap<Value *, RRInfo>::const_iterator It =
+          Releases.find(NewRelease);
+        assert(It != Releases.end());
+        const RRInfo &NewReleaseRRI = It->second;
+        KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+        for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewReleaseRRI.Calls.begin(),
+             LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+          Instruction *NewReleaseRetain = *LI;
+          MapVector<Value *, RRInfo>::const_iterator Jt =
+            Retains.find(NewReleaseRetain);
+          if (Jt == Retains.end())
+            goto next_retain;
+          const RRInfo &NewReleaseRetainRRI = Jt->second;
+          assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+          if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+            unsigned PathCount =
+              BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+            OldDelta += PathCount;
+            OldCount += PathCount;
+
+            // Merge the IsRetainBlock values.
+            if (FirstRetain) {
+              RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
+              FirstRetain = false;
+            } else if (ReleasesToMove.IsRetainBlock !=
+                       NewReleaseRetainRRI.IsRetainBlock)
+              // It's not possible to merge the sequences if one uses
+              // objc_retain and the other uses objc_retainBlock.
+              goto next_retain;
+
+            // Collect the optimal insertion points.
+            if (!KnownSafe)
+              for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+                   RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+                   RI != RE; ++RI) {
+                Instruction *RIP = *RI;
+                if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+                  PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+                  NewDelta += PathCount;
+                  NewCount += PathCount;
+                }
+              }
+            NewRetains.push_back(NewReleaseRetain);
+          }
+        }
+      }
+      NewReleases.clear();
+      if (NewRetains.empty()) break;
+    }
+
+    // If the pointer is known incremented, we can safely delete the pair
+    // regardless of what's between them.
+    if (KnownIncrementedTD || KnownIncrementedBU) {
+      RetainsToMove.ReverseInsertPts.clear();
+      ReleasesToMove.ReverseInsertPts.clear();
+      NewCount = 0;
+    }
+
+    // Determine whether the original call points are balanced in the retain and
+    // release calls through the program. If not, conservatively don't touch
+    // them.
+    // TODO: It's theoretically possible to do code motion in this case, as
+    // long as the existing imbalances are maintained.
+    if (OldDelta != 0)
+      goto next_retain;
+
+    // Determine whether the new insertion points we computed preserve the
+    // balance of retain and release calls through the program.
+    // TODO: If the fully aggressive solution isn't valid, try to find a
+    // less aggressive solution which is.
+    if (NewDelta != 0)
+      goto next_retain;
+
+    // Ok, everything checks out and we're all set. Let's move some code!
+    Changed = true;
+    AnyPairsCompletelyEliminated = NewCount == 0;
+    NumRRs += OldCount - NewCount;
+    MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+
+  next_retain:
+    NewReleases.clear();
+    NewRetains.clear();
+    RetainsToMove.clear();
+    ReleasesToMove.clear();
+  }
+
+  // Now that we're done moving everything, we can delete the newly dead
+  // instructions, as we no longer need them as insert points.
+  while (!DeadInsts.empty())
+    EraseInstruction(DeadInsts.pop_back_val());
+
+  return AnyPairsCompletelyEliminated;
+}
+
+/// OptimizeWeakCalls - Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+  // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+  // itself because it uses AliasAnalysis and we need to do provenance
+  // queries instead.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+      continue;
+
+    // Delete objc_loadWeak calls with no users.
+    if (Class == IC_LoadWeak && Inst->use_empty()) {
+      Inst->eraseFromParent();
+      continue;
+    }
+
+    // TODO: For now, just look for an earlier available version of this value
+    // within the same block. Theoretically, we could do memdep-style non-local
+    // analysis too, but that would want caching. A better approach would be to
+    // use the technique that EarlyCSE uses.
+    inst_iterator Current = llvm::prior(I);
+    BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+    for (BasicBlock::iterator B = CurrentBB->begin(),
+                              J = Current.getInstructionIterator();
+         J != B; --J) {
+      Instruction *EarlierInst = &*llvm::prior(J);
+      InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+      switch (EarlierClass) {
+      case IC_LoadWeak:
+      case IC_LoadWeakRetained: {
+        // If this is loading from the same pointer, replace this load's value
+        // with that one.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall);
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_StoreWeak:
+      case IC_InitWeak: {
+        // If this is storing to the same pointer and has the same size etc.
+        // replace this load's value with the stored value.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_MoveWeak:
+      case IC_CopyWeak:
+        // TOOD: Grab the copied value.
+        goto clobbered;
+      case IC_AutoreleasepoolPush:
+      case IC_None:
+      case IC_User:
+        // Weak pointers are only modified through the weak entry points
+        // (and arbitrary calls, which could call the weak entry points).
+        break;
+      default:
+        // Anything else could modify the weak pointer.
+        goto clobbered;
+      }
+    }
+  clobbered:;
+  }
+
+  // Then, for each destroyWeak with an alloca operand, check to see if
+  // the alloca and all its users can be zapped.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_DestroyWeak)
+      continue;
+
+    CallInst *Call = cast<CallInst>(Inst);
+    Value *Arg = Call->getArgOperand(0);
+    if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ++UI) {
+        Instruction *UserInst = cast<Instruction>(*UI);
+        switch (GetBasicInstructionClass(UserInst)) {
+        case IC_InitWeak:
+        case IC_StoreWeak:
+        case IC_DestroyWeak:
+          continue;
+        default:
+          goto done;
+        }
+      }
+      Changed = true;
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ) {
+        CallInst *UserInst = cast<CallInst>(*UI++);
+        if (!UserInst->use_empty())
+          UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+        UserInst->eraseFromParent();
+      }
+      Alloca->eraseFromParent();
+    done:;
+    }
+  }
+}
+
+/// OptimizeSequences - Identify program paths which execute sequences of
+/// retains and releases which can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+  /// Releases, Retains - These are used to store the results of the main flow
+  /// analysis. These use Value* as the key instead of Instruction* so that the
+  /// map stays valid when we get around to rewriting code and calls get
+  /// replaced by arguments.
+  DenseMap<Value *, RRInfo> Releases;
+  MapVector<Value *, RRInfo> Retains;
+
+  /// BBStates, This is used during the traversal of the function to track the
+  /// states for each identified object at each block.
+  DenseMap<const BasicBlock *, BBState> BBStates;
+
+  // Analyze the CFG of the function, and all instructions.
+  bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+  // Transform.
+  return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+}
+
+/// OptimizeReturns - Look for this pattern:
+///
+///    %call = call i8* @something(...)
+///    %2 = call i8* @objc_retain(i8* %call)
+///    %3 = call i8* @objc_autorelease(i8* %2)
+///    ret i8* %3
+///
+/// And delete the retain and autorelease.
+///
+/// Otherwise if it's just this:
+///
+///    %3 = call i8* @objc_autorelease(i8* %2)
+///    ret i8* %3
+///
+/// convert the autorelease to autoreleaseRV.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+  if (!F.getReturnType()->isPointerTy())
+    return;
+
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    BasicBlock *BB = FI;
+    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+    if (!Ret) continue;
+
+    const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+    FindDependencies(NeedsPositiveRetainCount, Arg,
+                     BB, Ret, DependingInstructions, Visited, PA);
+    if (DependingInstructions.size() != 1)
+      goto next_block;
+
+    {
+      CallInst *Autorelease =
+        dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+      if (!Autorelease)
+        goto next_block;
+      InstructionClass AutoreleaseClass =
+        GetBasicInstructionClass(Autorelease);
+      if (!IsAutorelease(AutoreleaseClass))
+        goto next_block;
+      if (GetObjCArg(Autorelease) != Arg)
+        goto next_block;
+
+      DependingInstructions.clear();
+      Visited.clear();
+
+      // Check that there is nothing that can affect the reference
+      // count between the autorelease and the retain.
+      FindDependencies(CanChangeRetainCount, Arg,
+                       BB, Autorelease, DependingInstructions, Visited, PA);
+      if (DependingInstructions.size() != 1)
+        goto next_block;
+
+      {
+        CallInst *Retain =
+          dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+        // Check that we found a retain with the same argument.
+        if (!Retain ||
+            !IsRetain(GetBasicInstructionClass(Retain)) ||
+            GetObjCArg(Retain) != Arg)
+          goto next_block;
+
+        DependingInstructions.clear();
+        Visited.clear();
+
+        // Convert the autorelease to an autoreleaseRV, since it's
+        // returning the value.
+        if (AutoreleaseClass == IC_Autorelease) {
+          Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
+          AutoreleaseClass = IC_AutoreleaseRV;
+        }
+
+        // Check that there is nothing that can affect the reference
+        // count between the retain and the call.
+        FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+                         DependingInstructions, Visited, PA);
+        if (DependingInstructions.size() != 1)
+          goto next_block;
+
+        {
+          CallInst *Call =
+            dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+          // Check that the pointer is the return value of the call.
+          if (!Call || Arg != Call)
+            goto next_block;
+
+          // Check that the call is a regular call.
+          InstructionClass Class = GetBasicInstructionClass(Call);
+          if (Class != IC_CallOrUser && Class != IC_Call)
+            goto next_block;
+
+          // If so, we can zap the retain and autorelease.
+          Changed = true;
+          ++NumRets;
+          EraseInstruction(Retain);
+          EraseInstruction(Autorelease);
+        }
+      }
+    }
+
+  next_block:
+    DependingInstructions.clear();
+    Visited.clear();
+  }
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+  if (!EnableARCOpts)
+    return false;
+
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // Identify the imprecise release metadata kind.
+  ImpreciseReleaseMDKind =
+    M.getContext().getMDKindID("clang.imprecise_release");
+
+  // Identify the declarations for objc_retain and friends.
+  RetainFunc = M.getFunction("objc_retain");
+  RetainBlockFunc = M.getFunction("objc_retainBlock");
+  RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
+  ReleaseFunc = M.getFunction("objc_release");
+
+  // Intuitively, objc_retain and others are nocapture, however in practice
+  // they are not, because they return their argument value. And objc_release
+  // calls finalizers.
+
+  // These are initialized lazily.
+  RetainRVCallee = 0;
+  AutoreleaseRVCallee = 0;
+  ReleaseCallee = 0;
+  RetainCallee = 0;
+  AutoreleaseCallee = 0;
+
+  return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // This pass performs several distinct transformations. As a compile-time aid
+  // when compiling code that isn't ObjC, skip these if the relevant ObjC
+  // library functions aren't declared.
+
+  // Preliminary optimizations. This also computs UsedInThisFunction.
+  OptimizeIndividualCalls(F);
+
+  // Optimizations for weak pointers.
+  if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+                            (1 << IC_LoadWeakRetained) |
+                            (1 << IC_StoreWeak) |
+                            (1 << IC_InitWeak) |
+                            (1 << IC_CopyWeak) |
+                            (1 << IC_MoveWeak) |
+                            (1 << IC_DestroyWeak)))
+    OptimizeWeakCalls(F);
+
+  // Optimizations for retain+release pairs.
+  if (UsedInThisFunction & ((1 << IC_Retain) |
+                            (1 << IC_RetainRV) |
+                            (1 << IC_RetainBlock)))
+    if (UsedInThisFunction & (1 << IC_Release))
+      // Run OptimizeSequences until it either stops making changes or
+      // no retain+release pair nesting is detected.
+      while (OptimizeSequences(F)) {}
+
+  // Optimizations if objc_autorelease is used.
+  if (UsedInThisFunction &
+      ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV)))
+    OptimizeReturns(F);
+
+  return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+  PA.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// ARC contraction.
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#include "llvm/Operator.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/Dominators.h"
+
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+  /// ObjCARCContract - Late ARC optimizations.  These change the IR in a way
+  /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
+  class ObjCARCContract : public FunctionPass {
+    bool Changed;
+    AliasAnalysis *AA;
+    DominatorTree *DT;
+    ProvenanceAnalysis PA;
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// StoreStrongCallee, etc. - Declarations for ObjC runtime
+    /// functions, for use in creating calls to them. These are initialized
+    /// lazily to avoid cluttering up the Module with unused declarations.
+    Constant *StoreStrongCallee,
+             *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
+
+    /// RetainRVMarker - The inline asm string to insert between calls and
+    /// RetainRV calls to make the optimization work on targets which need it.
+    const MDString *RetainRVMarker;
+
+    Constant *getStoreStrongCallee(Module *M);
+    Constant *getRetainAutoreleaseCallee(Module *M);
+    Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+    bool ContractAutorelease(Function &F, Instruction *Autorelease,
+                             InstructionClass Class,
+                             SmallPtrSet<Instruction *, 4>
+                               &DependingInstructions,
+                             SmallPtrSet<const BasicBlock *, 4>
+                               &Visited);
+
+    void ContractRelease(Instruction *Release,
+                         inst_iterator &Iter);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+  public:
+    static char ID;
+    ObjCARCContract() : FunctionPass(ID) {
+      initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+                      "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+                    "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+  return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<DominatorTree>();
+  AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+  if (!StoreStrongCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *I8XX = PointerType::getUnqual(I8X);
+    std::vector<Type *> Params;
+    Params.push_back(I8XX);
+    Params.push_back(I8X);
+
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Attributes.addAttr(1, Attribute::NoCapture);
+
+    StoreStrongCallee =
+      M->getOrInsertFunction(
+        "objc_storeStrong",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+  if (!RetainAutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainAutoreleaseCallee =
+      M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
+  }
+  return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+  if (!RetainAutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainAutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+                             Attributes);
+  }
+  return RetainAutoreleaseRVCallee;
+}
+
+/// ContractAutorelease - Merge an autorelease with a retain into a fused
+/// call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+                                     InstructionClass Class,
+                                     SmallPtrSet<Instruction *, 4>
+                                       &DependingInstructions,
+                                     SmallPtrSet<const BasicBlock *, 4>
+                                       &Visited) {
+  const Value *Arg = GetObjCArg(Autorelease);
+
+  // Check that there are no instructions between the retain and the autorelease
+  // (such as an autorelease_pop) which may change the count.
+  CallInst *Retain = 0;
+  if (Class == IC_AutoreleaseRV)
+    FindDependencies(RetainAutoreleaseRVDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+  else
+    FindDependencies(RetainAutoreleaseDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+
+  Visited.clear();
+  if (DependingInstructions.size() != 1) {
+    DependingInstructions.clear();
+    return false;
+  }
+
+  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+  DependingInstructions.clear();
+
+  if (!Retain ||
+      GetBasicInstructionClass(Retain) != IC_Retain ||
+      GetObjCArg(Retain) != Arg)
+    return false;
+
+  Changed = true;
+  ++NumPeeps;
+
+  if (Class == IC_AutoreleaseRV)
+    Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+  else
+    Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+  EraseInstruction(Autorelease);
+  return true;
+}
+
+/// ContractRelease - Attempt to merge an objc_release with a store, load, and
+/// objc_retain to form an objc_storeStrong. This can be a little tricky because
+/// the instructions don't always appear in order, and there may be unrelated
+/// intervening instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+                                      inst_iterator &Iter) {
+  LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+  if (!Load || Load->isVolatile()) return;
+
+  // For now, require everything to be in one basic block.
+  BasicBlock *BB = Release->getParent();
+  if (Load->getParent() != BB) return;
+
+  // Walk down to find the store.
+  BasicBlock::iterator I = Load, End = BB->end();
+  ++I;
+  AliasAnalysis::Location Loc = AA->getLocation(Load);
+  while (I != End &&
+         (&*I == Release ||
+          IsRetain(GetBasicInstructionClass(I)) ||
+          !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
+    ++I;
+  StoreInst *Store = dyn_cast<StoreInst>(I);
+  if (!Store || Store->isVolatile()) return;
+  if (Store->getPointerOperand() != Loc.Ptr) return;
+
+  Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+  // Walk up to find the retain.
+  I = Store;
+  BasicBlock::iterator Begin = BB->begin();
+  while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+    --I;
+  Instruction *Retain = I;
+  if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+  if (GetObjCArg(Retain) != New) return;
+
+  Changed = true;
+  ++NumStoreStrongs;
+
+  LLVMContext &C = Release->getContext();
+  const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  const Type *I8XX = PointerType::getUnqual(I8X);
+
+  Value *Args[] = { Load->getPointerOperand(), New };
+  if (Args[0]->getType() != I8XX)
+    Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+  if (Args[1]->getType() != I8X)
+    Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+  CallInst *StoreStrong =
+    CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+                     Args, "", Store);
+  StoreStrong->setDoesNotThrow();
+  StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+  if (&*Iter == Store) ++Iter;
+  Store->eraseFromParent();
+  Release->eraseFromParent();
+  EraseInstruction(Retain);
+  if (Load->use_empty())
+    Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // These are initialized lazily.
+  StoreStrongCallee = 0;
+  RetainAutoreleaseCallee = 0;
+  RetainAutoreleaseRVCallee = 0;
+
+  // Initialize RetainRVMarker.
+  RetainRVMarker = 0;
+  if (NamedMDNode *NMD =
+        M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+    if (NMD->getNumOperands() == 1) {
+      const MDNode *N = NMD->getOperand(0);
+      if (N->getNumOperands() == 1)
+        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+          RetainRVMarker = S;
+    }
+
+  return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // For ObjC library calls which return their argument, replace uses of the
+  // argument with uses of the call return value, if it dominates the use. This
+  // reduces register pressure.
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    // Only these library routines return their argument. In particular,
+    // objc_retainBlock does not necessarily return its argument.
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV:
+      break;
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+        continue;
+      break;
+    case IC_RetainRV: {
+      // If we're compiling for a target which needs a special inline-asm
+      // marker to do the retainAutoreleasedReturnValue optimization,
+      // insert it now.
+      if (!RetainRVMarker)
+        break;
+      BasicBlock::iterator BBI = Inst;
+      --BBI;
+      while (isNoopInstruction(BBI)) --BBI;
+      if (&*BBI == GetObjCArg(Inst)) {
+        InlineAsm *IA =
+          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+                                           /*isVarArg=*/false),
+                         RetainRVMarker->getString(),
+                         /*Constraints=*/"", /*hasSideEffects=*/true);
+        CallInst::Create(IA, "", Inst);
+      }
+      break;
+    }
+    case IC_InitWeak: {
+      // objc_initWeak(p, null) => *p = null
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(1))) {
+        Value *Null =
+          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+        Changed = true;
+        new StoreInst(Null, CI->getArgOperand(0), CI);
+        CI->replaceAllUsesWith(Null);
+        CI->eraseFromParent();
+      }
+      continue;
+    }
+    case IC_Release:
+      ContractRelease(Inst, I);
+      continue;
+    default:
+      continue;
+    }
+
+    // Don't use GetObjCArg because we don't want to look through bitcasts
+    // and such; to do the replacement, the argument must have type i8*.
+    const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+    for (;;) {
+      // If we're compiling bugpointed code, don't get in trouble.
+      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+        break;
+      // Look through the uses of the pointer.
+      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+           UI != UE; ) {
+        Use &U = UI.getUse();
+        unsigned OperandNo = UI.getOperandNo();
+        ++UI; // Increment UI now, because we may unlink its element.
+        if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
+          if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
+            Changed = true;
+            Instruction *Replacement = Inst;
+            const Type *UseTy = U.get()->getType();
+            if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+              // For PHI nodes, insert the bitcast in the predecessor block.
+              unsigned ValNo =
+                PHINode::getIncomingValueNumForOperand(OperandNo);
+              BasicBlock *BB =
+                PHI->getIncomingBlock(ValNo);
+              if (Replacement->getType() != UseTy)
+                Replacement = new BitCastInst(Replacement, UseTy, "",
+                                              &BB->back());
+              for (unsigned i = 0, e = PHI->getNumIncomingValues();
+                   i != e; ++i)
+                if (PHI->getIncomingBlock(i) == BB) {
+                  // Keep the UI iterator valid.
+                  if (&PHI->getOperandUse(
+                        PHINode::getOperandNumForIncomingValue(i)) ==
+                        &UI.getUse())
+                    ++UI;
+                  PHI->setIncomingValue(i, Replacement);
+                }
+            } else {
+              if (Replacement->getType() != UseTy)
+                Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+              U.set(Replacement);
+            }
+          }
+      }
+
+      // If Arg is a no-op casted pointer, strip one level of casts and
+      // iterate.
+      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+        Arg = BI->getOperand(0);
+      else if (isa<GEPOperator>(Arg) &&
+               cast<GEPOperator>(Arg)->hasAllZeroIndices())
+        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+      else if (isa<GlobalAlias>(Arg) &&
+               !cast<GlobalAlias>(Arg)->mayBeOverridden())
+        Arg = cast<GlobalAlias>(Arg)->getAliasee();
+      else
+        break;
+    }
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
new file mode 100644
index 000000000000..e6341ae3071f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -0,0 +1,1123 @@
+//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates commutative expressions in an order that is designed
+// to promote better constant propagation, GCSE, LICM, PRE, etc.
+//
+// For example: 4 + (x + 5) -> x + (4 + 5)
+//
+// In the implementation of this algorithm, constants are assigned rank = 0,
+// function arguments are rank = 1, and other values are assigned ranks
+// corresponding to the reverse post order traversal of current function
+// (starting at 2), which effectively gives values in deep loops higher rank
+// than values not in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reassociate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumLinear , "Number of insts linearized");
+STATISTIC(NumChanged, "Number of insts reassociated");
+STATISTIC(NumAnnihil, "Number of expr tree annihilated");
+STATISTIC(NumFactor , "Number of multiplies factored");
+
+namespace {
+  struct ValueEntry {
+    unsigned Rank;
+    Value *Op;
+    ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
+  };
+  inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
+    return LHS.Rank > RHS.Rank;   // Sort so that highest rank goes to start.
+  }
+}
+
+#ifndef NDEBUG
+/// PrintOps - Print out the expression identified in the Ops list.
+///
+static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
+  Module *M = I->getParent()->getParent()->getParent();
+  dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
+       << *Ops[0].Op->getType() << '\t';
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    dbgs() << "[ ";
+    WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+    dbgs() << ", #" << Ops[i].Rank << "] ";
+  }
+}
+#endif
+  
+namespace {
+  class Reassociate : public FunctionPass {
+    DenseMap<BasicBlock*, unsigned> RankMap;
+    DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+    SmallVector<WeakVH, 8> RedoInsts;
+    SmallVector<WeakVH, 8> DeadInsts;
+    bool MadeChange;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    Reassociate() : FunctionPass(ID) {
+      initializeReassociatePass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  private:
+    void BuildRankMap(Function &F);
+    unsigned getRank(Value *V);
+    Value *ReassociateExpression(BinaryOperator *I);
+    void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops,
+                         unsigned Idx = 0);
+    Value *OptimizeExpression(BinaryOperator *I,
+                              SmallVectorImpl<ValueEntry> &Ops);
+    Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+    void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
+    void LinearizeExpr(BinaryOperator *I);
+    Value *RemoveFactorFromExpression(Value *V, Value *Factor);
+    void ReassociateInst(BasicBlock::iterator &BBI);
+    
+    void RemoveDeadBinaryOp(Value *V);
+  };
+}
+
+char Reassociate::ID = 0;
+INITIALIZE_PASS(Reassociate, "reassociate",
+                "Reassociate expressions", false, false)
+
+// Public interface to the Reassociate pass
+FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
+
+void Reassociate::RemoveDeadBinaryOp(Value *V) {
+  Instruction *Op = dyn_cast<Instruction>(V);
+  if (!Op || !isa<BinaryOperator>(Op))
+    return;
+  
+  Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
+  
+  ValueRankMap.erase(Op);
+  DeadInsts.push_back(Op);
+  RemoveDeadBinaryOp(LHS);
+  RemoveDeadBinaryOp(RHS);
+}
+
+
+static bool isUnmovableInstruction(Instruction *I) {
+  if (I->getOpcode() == Instruction::PHI ||
+      I->getOpcode() == Instruction::Alloca ||
+      I->getOpcode() == Instruction::Load ||
+      I->getOpcode() == Instruction::Invoke ||
+      (I->getOpcode() == Instruction::Call &&
+       !isa<DbgInfoIntrinsic>(I)) ||
+      I->getOpcode() == Instruction::UDiv || 
+      I->getOpcode() == Instruction::SDiv ||
+      I->getOpcode() == Instruction::FDiv ||
+      I->getOpcode() == Instruction::URem ||
+      I->getOpcode() == Instruction::SRem ||
+      I->getOpcode() == Instruction::FRem)
+    return true;
+  return false;
+}
+
+void Reassociate::BuildRankMap(Function &F) {
+  unsigned i = 2;
+
+  // Assign distinct ranks to function arguments
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+    ValueRankMap[&*I] = ++i;
+
+  ReversePostOrderTraversal<Function*> RPOT(&F);
+  for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+         E = RPOT.end(); I != E; ++I) {
+    BasicBlock *BB = *I;
+    unsigned BBRank = RankMap[BB] = ++i << 16;
+
+    // Walk the basic block, adding precomputed ranks for any instructions that
+    // we cannot move.  This ensures that the ranks for these instructions are
+    // all different in the block.
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isUnmovableInstruction(I))
+        ValueRankMap[&*I] = ++BBRank;
+  }
+}
+
+unsigned Reassociate::getRank(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) {
+    if (isa<Argument>(V)) return ValueRankMap[V];   // Function argument.
+    return 0;  // Otherwise it's a global or constant, rank 0.
+  }
+
+  if (unsigned Rank = ValueRankMap[I])
+    return Rank;    // Rank already known?
+
+  // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
+  // we can reassociate expressions for code motion!  Since we do not recurse
+  // for PHI nodes, we cannot have infinite recursion here, because there
+  // cannot be loops in the value graph that do not go through PHI nodes.
+  unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
+  for (unsigned i = 0, e = I->getNumOperands();
+       i != e && Rank != MaxRank; ++i)
+    Rank = std::max(Rank, getRank(I->getOperand(i)));
+
+  // If this is a not or neg instruction, do not count it for rank.  This
+  // assures us that X and ~X will have the same rank.
+  if (!I->getType()->isIntegerTy() ||
+      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
+    ++Rank;
+
+  //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
+  //     << Rank << "\n");
+
+  return ValueRankMap[I] = Rank;
+}
+
+/// isReassociableOp - Return true if V is an instruction of the specified
+/// opcode and if it only has one use.
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
+  if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) &&
+      cast<Instruction>(V)->getOpcode() == Opcode)
+    return cast<BinaryOperator>(V);
+  return 0;
+}
+
+/// LowerNegateToMultiply - Replace 0-X with X*-1.
+///
+static Instruction *LowerNegateToMultiply(Instruction *Neg,
+                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+  Constant *Cst = Constant::getAllOnesValue(Neg->getType());
+
+  Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
+  ValueRankMap.erase(Neg);
+  Res->takeName(Neg);
+  Neg->replaceAllUsesWith(Res);
+  Res->setDebugLoc(Neg->getDebugLoc());
+  Neg->eraseFromParent();
+  return Res;
+}
+
+// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'.
+// Note that if D is also part of the expression tree that we recurse to
+// linearize it as well.  Besides that case, this does not recurse into A,B, or
+// C.
+void Reassociate::LinearizeExpr(BinaryOperator *I) {
+  BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+  BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1));
+  assert(isReassociableOp(LHS, I->getOpcode()) &&
+         isReassociableOp(RHS, I->getOpcode()) &&
+         "Not an expression that needs linearization?");
+
+  DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
+
+  // Move the RHS instruction to live immediately before I, avoiding breaking
+  // dominator properties.
+  RHS->moveBefore(I);
+
+  // Move operands around to do the linearization.
+  I->setOperand(1, RHS->getOperand(0));
+  RHS->setOperand(0, LHS);
+  I->setOperand(0, RHS);
+
+  // Conservatively clear all the optional flags, which may not hold
+  // after the reassociation.
+  I->clearSubclassOptionalData();
+  LHS->clearSubclassOptionalData();
+  RHS->clearSubclassOptionalData();
+
+  ++NumLinear;
+  MadeChange = true;
+  DEBUG(dbgs() << "Linearized: " << *I << '\n');
+
+  // If D is part of this expression tree, tail recurse.
+  if (isReassociableOp(I->getOperand(1), I->getOpcode()))
+    LinearizeExpr(I);
+}
+
+
+/// LinearizeExprTree - Given an associative binary expression tree, traverse
+/// all of the uses putting it into canonical form.  This forces a left-linear
+/// form of the expression (((a+b)+c)+d), and collects information about the
+/// rank of the non-tree operands.
+///
+/// NOTE: These intentionally destroys the expression tree operands (turning
+/// them into undef values) to reduce #uses of the values.  This means that the
+/// caller MUST use something like RewriteExprTree to put the values back in.
+///
+void Reassociate::LinearizeExprTree(BinaryOperator *I,
+                                    SmallVectorImpl<ValueEntry> &Ops) {
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  unsigned Opcode = I->getOpcode();
+
+  // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
+  BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
+  BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode);
+
+  // If this is a multiply expression tree and it contains internal negations,
+  // transform them into multiplies by -1 so they can be reassociated.
+  if (I->getOpcode() == Instruction::Mul) {
+    if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
+      LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
+      LHSBO = isReassociableOp(LHS, Opcode);
+    }
+    if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
+      RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
+      RHSBO = isReassociableOp(RHS, Opcode);
+    }
+  }
+
+  if (!LHSBO) {
+    if (!RHSBO) {
+      // Neither the LHS or RHS as part of the tree, thus this is a leaf.  As
+      // such, just remember these operands and their rank.
+      Ops.push_back(ValueEntry(getRank(LHS), LHS));
+      Ops.push_back(ValueEntry(getRank(RHS), RHS));
+      
+      // Clear the leaves out.
+      I->setOperand(0, UndefValue::get(I->getType()));
+      I->setOperand(1, UndefValue::get(I->getType()));
+      return;
+    }
+    
+    // Turn X+(Y+Z) -> (Y+Z)+X
+    std::swap(LHSBO, RHSBO);
+    std::swap(LHS, RHS);
+    bool Success = !I->swapOperands();
+    assert(Success && "swapOperands failed");
+    Success = false;
+    MadeChange = true;
+  } else if (RHSBO) {
+    // Turn (A+B)+(C+D) -> (((A+B)+C)+D).  This guarantees the RHS is not
+    // part of the expression tree.
+    LinearizeExpr(I);
+    LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
+    RHS = I->getOperand(1);
+    RHSBO = 0;
+  }
+
+  // Okay, now we know that the LHS is a nested expression and that the RHS is
+  // not.  Perform reassociation.
+  assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!");
+
+  // Move LHS right before I to make sure that the tree expression dominates all
+  // values.
+  LHSBO->moveBefore(I);
+
+  // Linearize the expression tree on the LHS.
+  LinearizeExprTree(LHSBO, Ops);
+
+  // Remember the RHS operand and its rank.
+  Ops.push_back(ValueEntry(getRank(RHS), RHS));
+  
+  // Clear the RHS leaf out.
+  I->setOperand(1, UndefValue::get(I->getType()));
+}
+
+// RewriteExprTree - Now that the operands for this expression tree are
+// linearized and optimized, emit them in-order.  This function is written to be
+// tail recursive.
+void Reassociate::RewriteExprTree(BinaryOperator *I,
+                                  SmallVectorImpl<ValueEntry> &Ops,
+                                  unsigned i) {
+  if (i+2 == Ops.size()) {
+    if (I->getOperand(0) != Ops[i].Op ||
+        I->getOperand(1) != Ops[i+1].Op) {
+      Value *OldLHS = I->getOperand(0);
+      DEBUG(dbgs() << "RA: " << *I << '\n');
+      I->setOperand(0, Ops[i].Op);
+      I->setOperand(1, Ops[i+1].Op);
+
+      // Clear all the optional flags, which may not hold after the
+      // reassociation if the expression involved more than just this operation.
+      if (Ops.size() != 2)
+        I->clearSubclassOptionalData();
+
+      DEBUG(dbgs() << "TO: " << *I << '\n');
+      MadeChange = true;
+      ++NumChanged;
+      
+      // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3)
+      // delete the extra, now dead, nodes.
+      RemoveDeadBinaryOp(OldLHS);
+    }
+    return;
+  }
+  assert(i+2 < Ops.size() && "Ops index out of range!");
+
+  if (I->getOperand(1) != Ops[i].Op) {
+    DEBUG(dbgs() << "RA: " << *I << '\n');
+    I->setOperand(1, Ops[i].Op);
+
+    // Conservatively clear all the optional flags, which may not hold
+    // after the reassociation.
+    I->clearSubclassOptionalData();
+
+    DEBUG(dbgs() << "TO: " << *I << '\n');
+    MadeChange = true;
+    ++NumChanged;
+  }
+  
+  BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+  assert(LHS->getOpcode() == I->getOpcode() &&
+         "Improper expression tree!");
+  
+  // Compactify the tree instructions together with each other to guarantee
+  // that the expression tree is dominated by all of Ops.
+  LHS->moveBefore(I);
+  RewriteExprTree(LHS, Ops, i+1);
+}
+
+
+
+// NegateValue - Insert instructions before the instruction pointed to by BI,
+// that computes the negative version of the value specified.  The negative
+// version of the value is returned, and BI is left pointing at the instruction
+// that should be processed next by the reassociation pass.
+//
+static Value *NegateValue(Value *V, Instruction *BI) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return ConstantExpr::getNeg(C);
+  
+  // We are trying to expose opportunity for reassociation.  One of the things
+  // that we want to do to achieve this is to push a negation as deep into an
+  // expression chain as possible, to expose the add instructions.  In practice,
+  // this means that we turn this:
+  //   X = -(A+12+C+D)   into    X = -A + -12 + -C + -D = -12 + -A + -C + -D
+  // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
+  // the constants.  We assume that instcombine will clean up the mess later if
+  // we introduce tons of unnecessary negation instructions.
+  //
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
+      // Push the negates through the add.
+      I->setOperand(0, NegateValue(I->getOperand(0), BI));
+      I->setOperand(1, NegateValue(I->getOperand(1), BI));
+
+      // We must move the add instruction here, because the neg instructions do
+      // not dominate the old add instruction in general.  By moving it, we are
+      // assured that the neg instructions we just inserted dominate the 
+      // instruction we are about to insert after them.
+      //
+      I->moveBefore(BI);
+      I->setName(I->getName()+".neg");
+      return I;
+    }
+  
+  // Okay, we need to materialize a negated version of V with an instruction.
+  // Scan the use lists of V to see if we have one already.
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (!BinaryOperator::isNeg(U)) continue;
+
+    // We found one!  Now we have to make sure that the definition dominates
+    // this use.  We do this by moving it to the entry block (if it is a
+    // non-instruction value) or right after the definition.  These negates will
+    // be zapped by reassociate later, so we don't need much finesse here.
+    BinaryOperator *TheNeg = cast<BinaryOperator>(U);
+
+    // Verify that the negate is in this function, V might be a constant expr.
+    if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
+      continue;
+    
+    BasicBlock::iterator InsertPt;
+    if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
+      if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
+        InsertPt = II->getNormalDest()->begin();
+      } else {
+        InsertPt = InstInput;
+        ++InsertPt;
+      }
+      while (isa<PHINode>(InsertPt)) ++InsertPt;
+    } else {
+      InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
+    }
+    TheNeg->moveBefore(InsertPt);
+    return TheNeg;
+  }
+
+  // Insert a 'neg' instruction that subtracts the value from zero to get the
+  // negation.
+  return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
+}
+
+/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
+/// X-Y into (X + -Y).
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
+  // If this is a negation, we can't split it up!
+  if (BinaryOperator::isNeg(Sub))
+    return false;
+  
+  // Don't bother to break this up unless either the LHS is an associable add or
+  // subtract or if this is only used by one.
+  if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
+      isReassociableOp(Sub->getOperand(0), Instruction::Sub))
+    return true;
+  if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
+      isReassociableOp(Sub->getOperand(1), Instruction::Sub))
+    return true;
+  if (Sub->hasOneUse() && 
+      (isReassociableOp(Sub->use_back(), Instruction::Add) ||
+       isReassociableOp(Sub->use_back(), Instruction::Sub)))
+    return true;
+    
+  return false;
+}
+
+/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
+/// only used by an add, transform this into (X+(0-Y)) to promote better
+/// reassociation.
+static Instruction *BreakUpSubtract(Instruction *Sub,
+                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+  // Convert a subtract into an add and a neg instruction. This allows sub
+  // instructions to be commuted with other add instructions.
+  //
+  // Calculate the negative value of Operand 1 of the sub instruction,
+  // and set it as the RHS of the add instruction we just made.
+  //
+  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+  Instruction *New =
+    BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+  New->takeName(Sub);
+
+  // Everyone now refers to the add instruction.
+  ValueRankMap.erase(Sub);
+  Sub->replaceAllUsesWith(New);
+  New->setDebugLoc(Sub->getDebugLoc());
+  Sub->eraseFromParent();
+
+  DEBUG(dbgs() << "Negated: " << *New << '\n');
+  return New;
+}
+
+/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
+/// by one, change this into a multiply by a constant to assist with further
+/// reassociation.
+static Instruction *ConvertShiftToMul(Instruction *Shl, 
+                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+  // If an operand of this shift is a reassociable multiply, or if the shift
+  // is used by a reassociable multiply or add, turn into a multiply.
+  if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
+      (Shl->hasOneUse() && 
+       (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
+        isReassociableOp(Shl->use_back(), Instruction::Add)))) {
+    Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+    MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+    
+    Instruction *Mul =
+      BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
+    ValueRankMap.erase(Shl);
+    Mul->takeName(Shl);
+    Shl->replaceAllUsesWith(Mul);
+    Mul->setDebugLoc(Shl->getDebugLoc());
+    Shl->eraseFromParent();
+    return Mul;
+  }
+  return 0;
+}
+
+// Scan backwards and forwards among values with the same rank as element i to
+// see if X exists.  If X does not exist, return i.  This is useful when
+// scanning for 'x' when we see '-x' because they both get the same rank.
+static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
+                                  Value *X) {
+  unsigned XRank = Ops[i].Rank;
+  unsigned e = Ops.size();
+  for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
+    if (Ops[j].Op == X)
+      return j;
+  // Scan backwards.
+  for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
+    if (Ops[j].Op == X)
+      return j;
+  return i;
+}
+
+/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// and returning the result.  Insert the tree before I.
+static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){
+  if (Ops.size() == 1) return Ops.back();
+  
+  Value *V1 = Ops.back();
+  Ops.pop_back();
+  Value *V2 = EmitAddTreeOfValues(I, Ops);
+  return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
+}
+
+/// RemoveFactorFromExpression - If V is an expression tree that is a 
+/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// remove Factor from the tree and return the new tree.
+Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
+  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+  if (!BO) return 0;
+  
+  SmallVector<ValueEntry, 8> Factors;
+  LinearizeExprTree(BO, Factors);
+
+  bool FoundFactor = false;
+  bool NeedsNegate = false;
+  for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+    if (Factors[i].Op == Factor) {
+      FoundFactor = true;
+      Factors.erase(Factors.begin()+i);
+      break;
+    }
+    
+    // If this is a negative version of this factor, remove it.
+    if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
+      if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
+        if (FC1->getValue() == -FC2->getValue()) {
+          FoundFactor = NeedsNegate = true;
+          Factors.erase(Factors.begin()+i);
+          break;
+        }
+  }
+  
+  if (!FoundFactor) {
+    // Make sure to restore the operands to the expression tree.
+    RewriteExprTree(BO, Factors);
+    return 0;
+  }
+  
+  BasicBlock::iterator InsertPt = BO; ++InsertPt;
+  
+  // If this was just a single multiply, remove the multiply and return the only
+  // remaining operand.
+  if (Factors.size() == 1) {
+    ValueRankMap.erase(BO);
+    DeadInsts.push_back(BO);
+    V = Factors[0].Op;
+  } else {
+    RewriteExprTree(BO, Factors);
+    V = BO;
+  }
+  
+  if (NeedsNegate)
+    V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
+  
+  return V;
+}
+
+/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
+/// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
+static void FindSingleUseMultiplyFactors(Value *V,
+                                         SmallVectorImpl<Value*> &Factors,
+                                       const SmallVectorImpl<ValueEntry> &Ops,
+                                         bool IsRoot) {
+  BinaryOperator *BO;
+  if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
+      !(BO = dyn_cast<BinaryOperator>(V)) ||
+      BO->getOpcode() != Instruction::Mul) {
+    Factors.push_back(V);
+    return;
+  }
+  
+  // If this value has a single use because it is another input to the add
+  // tree we're reassociating and we dropped its use, it actually has two
+  // uses and we can't factor it.
+  if (!IsRoot) {
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (Ops[i].Op == V) {
+        Factors.push_back(V);
+        return;
+      }
+  }
+  
+  
+  // Otherwise, add the LHS and RHS to the list of factors.
+  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
+  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
+}
+
+/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
+/// instruction.  This optimizes based on identities.  If it can be reduced to
+/// a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+static Value *OptimizeAndOrXor(unsigned Opcode,
+                               SmallVectorImpl<ValueEntry> &Ops) {
+  // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
+  // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    // First, check for X and ~X in the operand list.
+    assert(i < Ops.size());
+    if (BinaryOperator::isNot(Ops[i].Op)) {    // Cannot occur for ^.
+      Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+      unsigned FoundX = FindInOperandList(Ops, i, X);
+      if (FoundX != i) {
+        if (Opcode == Instruction::And)   // ...&X&~X = 0
+          return Constant::getNullValue(X->getType());
+        
+        if (Opcode == Instruction::Or)    // ...|X|~X = -1
+          return Constant::getAllOnesValue(X->getType());
+      }
+    }
+    
+    // Next, check for duplicate pairs of values, which we assume are next to
+    // each other, due to our sorting criteria.
+    assert(i < Ops.size());
+    if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
+      if (Opcode == Instruction::And || Opcode == Instruction::Or) {
+        // Drop duplicate values for And and Or.
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+        ++NumAnnihil;
+        continue;
+      }
+      
+      // Drop pairs of values for Xor.
+      assert(Opcode == Instruction::Xor);
+      if (e == 2)
+        return Constant::getNullValue(Ops[0].Op->getType());
+      
+      // Y ^ X^X -> Y
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+      i -= 1; e -= 2;
+      ++NumAnnihil;
+    }
+  }
+  return 0;
+}
+
+/// OptimizeAdd - Optimize a series of operands to an 'add' instruction.  This
+/// optimizes based on identities.  If it can be reduced to a single Value, it
+/// is returned, otherwise the Ops list is mutated as necessary.
+Value *Reassociate::OptimizeAdd(Instruction *I,
+                                SmallVectorImpl<ValueEntry> &Ops) {
+  // Scan the operand lists looking for X and -X pairs.  If we find any, we
+  // can simplify the expression. X+-X == 0.  While we're at it, scan for any
+  // duplicates.  We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
+  //
+  // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
+  //
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    Value *TheOp = Ops[i].Op;
+    // Check to see if we've seen this operand before.  If so, we factor all
+    // instances of the operand together.  Due to our sorting criteria, we know
+    // that these need to be next to each other in the vector.
+    if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
+      // Rescan the list, remove all instances of this operand from the expr.
+      unsigned NumFound = 0;
+      do {
+        Ops.erase(Ops.begin()+i);
+        ++NumFound;
+      } while (i != Ops.size() && Ops[i].Op == TheOp);
+      
+      DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
+      ++NumFactor;
+      
+      // Insert a new multiply.
+      Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
+      Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
+      
+      // Now that we have inserted a multiply, optimize it. This allows us to
+      // handle cases that require multiple factoring steps, such as this:
+      // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
+      RedoInsts.push_back(Mul);
+      
+      // If every add operand was a duplicate, return the multiply.
+      if (Ops.empty())
+        return Mul;
+      
+      // Otherwise, we had some input that didn't have the dupe, such as
+      // "A + A + B" -> "A*2 + B".  Add the new multiply to the list of
+      // things being added by this operation.
+      Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
+      
+      --i;
+      e = Ops.size();
+      continue;
+    }
+    
+    // Check for X and -X in the operand list.
+    if (!BinaryOperator::isNeg(TheOp))
+      continue;
+    
+    Value *X = BinaryOperator::getNegArgument(TheOp);
+    unsigned FoundX = FindInOperandList(Ops, i, X);
+    if (FoundX == i)
+      continue;
+    
+    // Remove X and -X from the operand list.
+    if (Ops.size() == 2)
+      return Constant::getNullValue(X->getType());
+    
+    Ops.erase(Ops.begin()+i);
+    if (i < FoundX)
+      --FoundX;
+    else
+      --i;   // Need to back up an extra one.
+    Ops.erase(Ops.begin()+FoundX);
+    ++NumAnnihil;
+    --i;     // Revisit element.
+    e -= 2;  // Removed two elements.
+  }
+  
+  // Scan the operand list, checking to see if there are any common factors
+  // between operands.  Consider something like A*A+A*B*C+D.  We would like to
+  // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
+  // To efficiently find this, we count the number of times a factor occurs
+  // for any ADD operands that are MULs.
+  DenseMap<Value*, unsigned> FactorOccurrences;
+  
+  // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
+  // where they are actually the same multiply.
+  unsigned MaxOcc = 0;
+  Value *MaxOccVal = 0;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+    if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+      continue;
+    
+    // Compute all of the factors of this added value.
+    SmallVector<Value*, 8> Factors;
+    FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
+    assert(Factors.size() > 1 && "Bad linearize!");
+    
+    // Add one to FactorOccurrences for each unique factor in this op.
+    SmallPtrSet<Value*, 8> Duplicates;
+    for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+      Value *Factor = Factors[i];
+      if (!Duplicates.insert(Factor)) continue;
+      
+      unsigned Occ = ++FactorOccurrences[Factor];
+      if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+      
+      // If Factor is a negative constant, add the negated value as a factor
+      // because we can percolate the negate out.  Watch for minint, which
+      // cannot be positivified.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
+        if (CI->isNegative() && !CI->isMinValue(true)) {
+          Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
+          assert(!Duplicates.count(Factor) &&
+                 "Shouldn't have two constant factors, missed a canonicalize");
+          
+          unsigned Occ = ++FactorOccurrences[Factor];
+          if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+        }
+    }
+  }
+  
+  // If any factor occurred more than one time, we can pull it out.
+  if (MaxOcc > 1) {
+    DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
+    ++NumFactor;
+
+    // Create a new instruction that uses the MaxOccVal twice.  If we don't do
+    // this, we could otherwise run into situations where removing a factor
+    // from an expression will drop a use of maxocc, and this can cause 
+    // RemoveFactorFromExpression on successive values to behave differently.
+    Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+    SmallVector<Value*, 4> NewMulOps;
+    for (unsigned i = 0; i != Ops.size(); ++i) {
+      // Only try to remove factors from expressions we're allowed to.
+      BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+      if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+        continue;
+      
+      if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
+        // The factorized operand may occur several times.  Convert them all in
+        // one fell swoop.
+        for (unsigned j = Ops.size(); j != i;) {
+          --j;
+          if (Ops[j].Op == Ops[i].Op) {
+            NewMulOps.push_back(V);
+            Ops.erase(Ops.begin()+j);
+          }
+        }
+        --i;
+      }
+    }
+    
+    // No need for extra uses anymore.
+    delete DummyInst;
+
+    unsigned NumAddedValues = NewMulOps.size();
+    Value *V = EmitAddTreeOfValues(I, NewMulOps);
+
+    // Now that we have inserted the add tree, optimize it. This allows us to
+    // handle cases that require multiple factoring steps, such as this:
+    // A*A*B + A*A*C   -->   A*(A*B+A*C)   -->   A*(A*(B+C))
+    assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
+    (void)NumAddedValues;
+    V = ReassociateExpression(cast<BinaryOperator>(V));
+
+    // Create the multiply.
+    Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+
+    // Rerun associate on the multiply in case the inner expression turned into
+    // a multiply.  We want to make sure that we keep things in canonical form.
+    V2 = ReassociateExpression(cast<BinaryOperator>(V2));
+    
+    // If every add operand included the factor (e.g. "A*B + A*C"), then the
+    // entire result expression is just the multiply "A*(B+C)".
+    if (Ops.empty())
+      return V2;
+    
+    // Otherwise, we had some input that didn't have the factor, such as
+    // "A*B + A*C + D" -> "A*(B+C) + D".  Add the new multiply to the list of
+    // things being added by this operation.
+    Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
+  }
+  
+  return 0;
+}
+
+Value *Reassociate::OptimizeExpression(BinaryOperator *I,
+                                       SmallVectorImpl<ValueEntry> &Ops) {
+  // Now that we have the linearized expression tree, try to optimize it.
+  // Start by folding any constants that we found.
+  bool IterateOptimization = false;
+  if (Ops.size() == 1) return Ops[0].Op;
+
+  unsigned Opcode = I->getOpcode();
+  
+  if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
+    if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
+      Ops.pop_back();
+      Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
+      return OptimizeExpression(I, Ops);
+    }
+
+  // Check for destructive annihilation due to a constant being used.
+  if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op))
+    switch (Opcode) {
+    default: break;
+    case Instruction::And:
+      if (CstVal->isZero())                  // X & 0 -> 0
+        return CstVal;
+      if (CstVal->isAllOnesValue())          // X & -1 -> X
+        Ops.pop_back();
+      break;
+    case Instruction::Mul:
+      if (CstVal->isZero()) {                // X * 0 -> 0
+        ++NumAnnihil;
+        return CstVal;
+      }
+        
+      if (cast<ConstantInt>(CstVal)->isOne())
+        Ops.pop_back();                      // X * 1 -> X
+      break;
+    case Instruction::Or:
+      if (CstVal->isAllOnesValue())          // X | -1 -> -1
+        return CstVal;
+      // FALLTHROUGH!
+    case Instruction::Add:
+    case Instruction::Xor:
+      if (CstVal->isZero())                  // X [|^+] 0 -> X
+        Ops.pop_back();
+      break;
+    }
+  if (Ops.size() == 1) return Ops[0].Op;
+
+  // Handle destructive annihilation due to identities between elements in the
+  // argument list here.
+  switch (Opcode) {
+  default: break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    unsigned NumOps = Ops.size();
+    if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
+      return Result;
+    IterateOptimization |= Ops.size() != NumOps;
+    break;
+  }
+
+  case Instruction::Add: {
+    unsigned NumOps = Ops.size();
+    if (Value *Result = OptimizeAdd(I, Ops))
+      return Result;
+    IterateOptimization |= Ops.size() != NumOps;
+  }
+
+    break;
+  //case Instruction::Mul:
+  }
+
+  if (IterateOptimization)
+    return OptimizeExpression(I, Ops);
+  return 0;
+}
+
+
+/// ReassociateInst - Inspect and reassociate the instruction at the
+/// given position, post-incrementing the position.
+void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
+  Instruction *BI = BBI++;
+  if (BI->getOpcode() == Instruction::Shl &&
+      isa<ConstantInt>(BI->getOperand(1)))
+    if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+      MadeChange = true;
+      BI = NI;
+    }
+
+  // Reject cases where it is pointless to do this.
+  if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
+      BI->getType()->isVectorTy())
+    return;  // Floating point ops are not associative.
+
+  // Do not reassociate boolean (i1) expressions.  We want to preserve the
+  // original order of evaluation for short-circuited comparisons that
+  // SimplifyCFG has folded to AND/OR expressions.  If the expression
+  // is not further optimized, it is likely to be transformed back to a
+  // short-circuited form for code gen, and the source order may have been
+  // optimized for the most likely conditions.
+  if (BI->getType()->isIntegerTy(1))
+    return;
+
+  // If this is a subtract instruction which is not already in negate form,
+  // see if we can convert it to X+-Y.
+  if (BI->getOpcode() == Instruction::Sub) {
+    if (ShouldBreakUpSubtract(BI)) {
+      BI = BreakUpSubtract(BI, ValueRankMap);
+      // Reset the BBI iterator in case BreakUpSubtract changed the
+      // instruction it points to.
+      BBI = BI;
+      ++BBI;
+      MadeChange = true;
+    } else if (BinaryOperator::isNeg(BI)) {
+      // Otherwise, this is a negation.  See if the operand is a multiply tree
+      // and if this is not an inner node of a multiply tree.
+      if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+          (!BI->hasOneUse() ||
+           !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+        BI = LowerNegateToMultiply(BI, ValueRankMap);
+        MadeChange = true;
+      }
+    }
+  }
+
+  // If this instruction is a commutative binary operator, process it.
+  if (!BI->isAssociative()) return;
+  BinaryOperator *I = cast<BinaryOperator>(BI);
+
+  // If this is an interior node of a reassociable tree, ignore it until we
+  // get to the root of the tree, to avoid N^2 analysis.
+  if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+    return;
+
+  // If this is an add tree that is used by a sub instruction, ignore it 
+  // until we process the subtract.
+  if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+      cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+    return;
+
+  ReassociateExpression(I);
+}
+
+Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
+  
+  // First, walk the expression tree, linearizing the tree, collecting the
+  // operand information.
+  SmallVector<ValueEntry, 8> Ops;
+  LinearizeExprTree(I, Ops);
+  
+  DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
+  
+  // Now that we have linearized the tree to a list and have gathered all of
+  // the operands and their ranks, sort the operands by their rank.  Use a
+  // stable_sort so that values with equal ranks will have their relative
+  // positions maintained (and so the compiler is deterministic).  Note that
+  // this sorts so that the highest ranking values end up at the beginning of
+  // the vector.
+  std::stable_sort(Ops.begin(), Ops.end());
+  
+  // OptimizeExpression - Now that we have the expression tree in a convenient
+  // sorted form, optimize it globally if possible.
+  if (Value *V = OptimizeExpression(I, Ops)) {
+    // This expression tree simplified to something that isn't a tree,
+    // eliminate it.
+    DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
+    I->replaceAllUsesWith(V);
+    if (Instruction *VI = dyn_cast<Instruction>(V))
+      VI->setDebugLoc(I->getDebugLoc());
+    RemoveDeadBinaryOp(I);
+    ++NumAnnihil;
+    return V;
+  }
+  
+  // We want to sink immediates as deeply as possible except in the case where
+  // this is a multiply tree used only by an add, and the immediate is a -1.
+  // In this case we reassociate to put the negation on the outside so that we
+  // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
+  if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
+      cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Ops.back().Op) &&
+      cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+    ValueEntry Tmp = Ops.pop_back_val();
+    Ops.insert(Ops.begin(), Tmp);
+  }
+  
+  DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
+  
+  if (Ops.size() == 1) {
+    // This expression tree simplified to something that isn't a tree,
+    // eliminate it.
+    I->replaceAllUsesWith(Ops[0].Op);
+    if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
+      OI->setDebugLoc(I->getDebugLoc());
+    RemoveDeadBinaryOp(I);
+    return Ops[0].Op;
+  }
+  
+  // Now that we ordered and optimized the expressions, splat them back into
+  // the expression tree, removing any unneeded nodes.
+  RewriteExprTree(I, Ops);
+  return I;
+}
+
+
+bool Reassociate::runOnFunction(Function &F) {
+  // Recalculate the rank map for F
+  BuildRankMap(F);
+
+  MadeChange = false;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
+      ReassociateInst(BBI);
+
+  // Now that we're done, revisit any instructions which are likely to
+  // have secondary reassociation opportunities.
+  while (!RedoInsts.empty())
+    if (Value *V = RedoInsts.pop_back_val()) {
+      BasicBlock::iterator BBI = cast<Instruction>(V);
+      ReassociateInst(BBI);
+    }
+
+  // Now that we're done, delete any instructions which are no longer used.
+  while (!DeadInsts.empty())
+    if (Value *V = DeadInsts.pop_back_val())
+      RecursivelyDeleteTriviallyDeadInstructions(V);
+
+  // We are done with the rank map.
+  RankMap.clear();
+  ValueRankMap.clear();
+  return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
new file mode 100644
index 000000000000..47afc770bb0c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -0,0 +1,134 @@
+//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file demotes all registers to memory references.  It is intented to be
+// the inverse of PromoteMemoryToRegister.  By converting to loads, the only
+// values live across basic blocks are allocas and loads before phi nodes.
+// It is intended that this should make CFG hacking much easier.
+// To make later hacking easier, the entry block is split into two, such that
+// all introduced allocas and nothing else are in the entry block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg2mem"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumRegsDemoted, "Number of registers demoted");
+STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
+
+namespace {
+  struct RegToMem : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    RegToMem() : FunctionPass(ID) {
+      initializeRegToMemPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(BreakCriticalEdgesID);
+      AU.addPreservedID(BreakCriticalEdgesID);
+    }
+
+   bool valueEscapes(const Instruction *Inst) const {
+     const BasicBlock *BB = Inst->getParent();
+      for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
+           UI != E; ++UI) {
+        const Instruction *I = cast<Instruction>(*UI);
+        if (I->getParent() != BB || isa<PHINode>(I))
+          return true;
+      }
+      return false;
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+  
+char RegToMem::ID = 0;
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
+
+bool RegToMem::runOnFunction(Function &F) {
+  if (F.isDeclaration()) 
+    return false;
+  
+  // Insert all new allocas into entry block.
+  BasicBlock *BBEntry = &F.getEntryBlock();
+  assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+         "Entry block to function must not have predecessors!");
+  
+  // Find first non-alloca instruction and create insertion point. This is
+  // safe if block is well-formed: it always have terminator, otherwise
+  // we'll get and assertion.
+  BasicBlock::iterator I = BBEntry->begin();
+  while (isa<AllocaInst>(I)) ++I;
+  
+  CastInst *AllocaInsertionPoint =
+    new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                    Type::getInt32Ty(F.getContext()),
+                    "reg2mem alloca point", I);
+  
+  // Find the escaped instructions. But don't create stack slots for
+  // allocas in entry block.
+  std::list<Instruction*> WorkList;
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib) {
+      if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+          valueEscapes(iib)) {
+        WorkList.push_front(&*iib);
+      }
+    }
+  
+  // Demote escaped instructions
+  NumRegsDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+  
+  WorkList.clear();
+  
+  // Find all phi's
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib)
+      if (isa<PHINode>(iib))
+        WorkList.push_front(&*iib);
+  
+  // Demote phi nodes
+  NumPhisDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+  
+  return true;
+}
+
+
+// createDemoteRegisterToMemory - Provide an entry point to create this pass.
+//
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
+FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
+  return new RegToMem();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
new file mode 100644
index 000000000000..083412ed942d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -0,0 +1,2010 @@
+//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements sparse conditional constant propagation and merging:
+//
+// Specifically, this:
+//   * Assumes values are constant unless proven otherwise
+//   * Assumes BasicBlocks are dead unless proven otherwise
+//   * Proves values to be constant, and replaces them with constants
+//   * Proves conditional branches to be unconditional
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sccp"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumInstRemoved, "Number of instructions removed");
+STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
+
+STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
+STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
+STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
+
+namespace {
+/// LatticeVal class - This class represents the different lattice values that
+/// an LLVM value may occupy.  It is a simple class with value semantics.
+///
+class LatticeVal {
+  enum LatticeValueTy {
+    /// undefined - This LLVM Value has no known value yet.
+    undefined,
+    
+    /// constant - This LLVM Value has a specific constant value.
+    constant,
+
+    /// forcedconstant - This LLVM Value was thought to be undef until
+    /// ResolvedUndefsIn.  This is treated just like 'constant', but if merged
+    /// with another (different) constant, it goes to overdefined, instead of
+    /// asserting.
+    forcedconstant,
+    
+    /// overdefined - This instruction is not known to be constant, and we know
+    /// it has a value.
+    overdefined
+  };
+
+  /// Val: This stores the current lattice value along with the Constant* for
+  /// the constant if this is a 'constant' or 'forcedconstant' value.
+  PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+  
+  LatticeValueTy getLatticeValue() const {
+    return Val.getInt();
+  }
+  
+public:
+  LatticeVal() : Val(0, undefined) {}
+  
+  bool isUndefined() const { return getLatticeValue() == undefined; }
+  bool isConstant() const {
+    return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
+  }
+  bool isOverdefined() const { return getLatticeValue() == overdefined; }
+  
+  Constant *getConstant() const {
+    assert(isConstant() && "Cannot get the constant of a non-constant!");
+    return Val.getPointer();
+  }
+  
+  /// markOverdefined - Return true if this is a change in status.
+  bool markOverdefined() {
+    if (isOverdefined())
+      return false;
+    
+    Val.setInt(overdefined);
+    return true;
+  }
+
+  /// markConstant - Return true if this is a change in status.
+  bool markConstant(Constant *V) {
+    if (getLatticeValue() == constant) { // Constant but not forcedconstant.
+      assert(getConstant() == V && "Marking constant with different value");
+      return false;
+    }
+    
+    if (isUndefined()) {
+      Val.setInt(constant);
+      assert(V && "Marking constant with NULL");
+      Val.setPointer(V);
+    } else {
+      assert(getLatticeValue() == forcedconstant && 
+             "Cannot move from overdefined to constant!");
+      // Stay at forcedconstant if the constant is the same.
+      if (V == getConstant()) return false;
+      
+      // Otherwise, we go to overdefined.  Assumptions made based on the
+      // forced value are possibly wrong.  Assuming this is another constant
+      // could expose a contradiction.
+      Val.setInt(overdefined);
+    }
+    return true;
+  }
+
+  /// getConstantInt - If this is a constant with a ConstantInt value, return it
+  /// otherwise return null.
+  ConstantInt *getConstantInt() const {
+    if (isConstant())
+      return dyn_cast<ConstantInt>(getConstant());
+    return 0;
+  }
+  
+  void markForcedConstant(Constant *V) {
+    assert(isUndefined() && "Can't force a defined value!");
+    Val.setInt(forcedconstant);
+    Val.setPointer(V);
+  }
+};
+} // end anonymous namespace.
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+//
+/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
+/// Constant Propagation.
+///
+class SCCPSolver : public InstVisitor<SCCPSolver> {
+  const TargetData *TD;
+  SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+  DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
+
+  /// StructValueState - This maintains ValueState for values that have
+  /// StructType, for example for formal arguments, calls, insertelement, etc.
+  ///
+  DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
+  
+  /// GlobalValue - If we are tracking any values for the contents of a global
+  /// variable, we keep a mapping from the constant accessor to the element of
+  /// the global, to the currently known value.  If the value becomes
+  /// overdefined, it's entry is simply removed from this map.
+  DenseMap<GlobalVariable*, LatticeVal> TrackedGlobals;
+
+  /// TrackedRetVals - If we are tracking arguments into and the return
+  /// value out of a function, it will have an entry in this map, indicating
+  /// what the known return value for the function is.
+  DenseMap<Function*, LatticeVal> TrackedRetVals;
+
+  /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+  /// that return multiple values.
+  DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
+  
+  /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+  /// represented here for efficient lookup.
+  SmallPtrSet<Function*, 16> MRVFunctionsTracked;
+
+  /// TrackingIncomingArguments - This is the set of functions for whose
+  /// arguments we make optimistic assumptions about and try to prove as
+  /// constants.
+  SmallPtrSet<Function*, 16> TrackingIncomingArguments;
+  
+  /// The reason for two worklists is that overdefined is the lowest state
+  /// on the lattice, and moving things to overdefined as fast as possible
+  /// makes SCCP converge much faster.
+  ///
+  /// By having a separate worklist, we accomplish this because everything
+  /// possibly overdefined will become overdefined at the soonest possible
+  /// point.
+  SmallVector<Value*, 64> OverdefinedInstWorkList;
+  SmallVector<Value*, 64> InstWorkList;
+
+
+  SmallVector<BasicBlock*, 64>  BBWorkList;  // The BasicBlock work list
+
+  /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not
+  /// overdefined, despite the fact that the PHI node is overdefined.
+  std::multimap<PHINode*, Instruction*> UsersOfOverdefinedPHIs;
+
+  /// KnownFeasibleEdges - Entries in this set are edges which have already had
+  /// PHI nodes retriggered.
+  typedef std::pair<BasicBlock*, BasicBlock*> Edge;
+  DenseSet<Edge> KnownFeasibleEdges;
+public:
+  SCCPSolver(const TargetData *td) : TD(td) {}
+
+  /// MarkBlockExecutable - This method can be used by clients to mark all of
+  /// the blocks that are known to be intrinsically live in the processed unit.
+  ///
+  /// This returns true if the block was not considered live before.
+  bool MarkBlockExecutable(BasicBlock *BB) {
+    if (!BBExecutable.insert(BB)) return false;
+    DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+    BBWorkList.push_back(BB);  // Add the block to the work list!
+    return true;
+  }
+
+  /// TrackValueOfGlobalVariable - Clients can use this method to
+  /// inform the SCCPSolver that it should track loads and stores to the
+  /// specified global variable if it can.  This is only legal to call if
+  /// performing Interprocedural SCCP.
+  void TrackValueOfGlobalVariable(GlobalVariable *GV) {
+    // We only track the contents of scalar globals.
+    if (GV->getType()->getElementType()->isSingleValueType()) {
+      LatticeVal &IV = TrackedGlobals[GV];
+      if (!isa<UndefValue>(GV->getInitializer()))
+        IV.markConstant(GV->getInitializer());
+    }
+  }
+
+  /// AddTrackedFunction - If the SCCP solver is supposed to track calls into
+  /// and out of the specified function (which cannot have its address taken),
+  /// this method must be called.
+  void AddTrackedFunction(Function *F) {
+    // Add an entry, F -> undef.
+    if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+      MRVFunctionsTracked.insert(F);
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
+                                                     LatticeVal()));
+    } else
+      TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
+  }
+
+  void AddArgumentTrackedFunction(Function *F) {
+    TrackingIncomingArguments.insert(F);
+  }
+  
+  /// Solve - Solve for constants and executable blocks.
+  ///
+  void Solve();
+
+  /// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+  /// that branches on undef values cannot reach any of their successors.
+  /// However, this is not a safe assumption.  After we solve dataflow, this
+  /// method should be use to handle this.  If this returns true, the solver
+  /// should be rerun.
+  bool ResolvedUndefsIn(Function &F);
+
+  bool isBlockExecutable(BasicBlock *BB) const {
+    return BBExecutable.count(BB);
+  }
+
+  LatticeVal getLatticeValueFor(Value *V) const {
+    DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
+    assert(I != ValueState.end() && "V is not in valuemap!");
+    return I->second;
+  }
+  
+  /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = 
+      StructValueState.find(std::make_pair(V, i));
+    assert(I != StructValueState.end() && "V is not in valuemap!");
+    return I->second;
+  }*/
+
+  /// getTrackedRetVals - Get the inferred return value map.
+  ///
+  const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+    return TrackedRetVals;
+  }
+
+  /// getTrackedGlobals - Get and return the set of inferred initializers for
+  /// global variables.
+  const DenseMap<GlobalVariable*, LatticeVal> &getTrackedGlobals() {
+    return TrackedGlobals;
+  }
+
+  void markOverdefined(Value *V) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    markOverdefined(ValueState[V], V);
+  }
+
+  /// markAnythingOverdefined - Mark the specified value overdefined.  This
+  /// works with both scalars and structs.
+  void markAnythingOverdefined(Value *V) {
+    if (const StructType *STy = dyn_cast<StructType>(V->getType()))
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        markOverdefined(getStructValueState(V, i), V);
+    else
+      markOverdefined(V);
+  }
+  
+private:
+  // markConstant - Make a value be marked as "constant".  If the value
+  // is not already a constant, add it to the instruction work list so that
+  // the users of the instruction are updated later.
+  //
+  void markConstant(LatticeVal &IV, Value *V, Constant *C) {
+    if (!IV.markConstant(C)) return;
+    DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
+  }
+  
+  void markConstant(Value *V, Constant *C) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    markConstant(ValueState[V], V, C);
+  }
+
+  void markForcedConstant(Value *V, Constant *C) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    LatticeVal &IV = ValueState[V];
+    IV.markForcedConstant(C);
+    DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
+  }
+  
+  
+  // markOverdefined - Make a value be marked as "overdefined". If the
+  // value is not already overdefined, add it to the overdefined instruction
+  // work list so that the users of the instruction are updated later.
+  void markOverdefined(LatticeVal &IV, Value *V) {
+    if (!IV.markOverdefined()) return;
+    
+    DEBUG(dbgs() << "markOverdefined: ";
+          if (Function *F = dyn_cast<Function>(V))
+            dbgs() << "Function '" << F->getName() << "'\n";
+          else
+            dbgs() << *V << '\n');
+    // Only instructions go on the work list
+    OverdefinedInstWorkList.push_back(V);
+  }
+
+  void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
+    if (IV.isOverdefined() || MergeWithV.isUndefined())
+      return;  // Noop.
+    if (MergeWithV.isOverdefined())
+      markOverdefined(IV, V);
+    else if (IV.isUndefined())
+      markConstant(IV, V, MergeWithV.getConstant());
+    else if (IV.getConstant() != MergeWithV.getConstant())
+      markOverdefined(IV, V);
+  }
+  
+  void mergeInValue(Value *V, LatticeVal MergeWithV) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    mergeInValue(ValueState[V], V, MergeWithV);
+  }
+
+
+  /// getValueState - Return the LatticeVal object that corresponds to the
+  /// value.  This function handles the case when the value hasn't been seen yet
+  /// by properly seeding constants etc.
+  LatticeVal &getValueState(Value *V) {
+    assert(!V->getType()->isStructTy() && "Should use getStructValueState");
+
+    std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
+      ValueState.insert(std::make_pair(V, LatticeVal()));
+    LatticeVal &LV = I.first->second;
+
+    if (!I.second)
+      return LV;  // Common case, already in the map.
+
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      // Undef values remain undefined.
+      if (!isa<UndefValue>(V))
+        LV.markConstant(C);          // Constants are constant
+    }
+    
+    // All others are underdefined by default.
+    return LV;
+  }
+
+  /// getStructValueState - Return the LatticeVal object that corresponds to the
+  /// value/field pair.  This function handles the case when the value hasn't
+  /// been seen yet by properly seeding constants etc.
+  LatticeVal &getStructValueState(Value *V, unsigned i) {
+    assert(V->getType()->isStructTy() && "Should use getValueState");
+    assert(i < cast<StructType>(V->getType())->getNumElements() &&
+           "Invalid element #");
+
+    std::pair<DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator,
+              bool> I = StructValueState.insert(
+                        std::make_pair(std::make_pair(V, i), LatticeVal()));
+    LatticeVal &LV = I.first->second;
+
+    if (!I.second)
+      return LV;  // Common case, already in the map.
+
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      if (isa<UndefValue>(C))
+        ; // Undef values remain undefined.
+      else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+        LV.markConstant(CS->getOperand(i));      // Constants are constant.
+      else if (isa<ConstantAggregateZero>(C)) {
+        const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
+        LV.markConstant(Constant::getNullValue(FieldTy));
+      } else
+        LV.markOverdefined();      // Unknown sort of constant.
+    }
+    
+    // All others are underdefined by default.
+    return LV;
+  }
+  
+
+  /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+  /// work list if it is not already executable.
+  void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+    if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+      return;  // This edge is already known to be executable!
+
+    if (!MarkBlockExecutable(Dest)) {
+      // If the destination is already executable, we just made an *edge*
+      // feasible that wasn't before.  Revisit the PHI nodes in the block
+      // because they have potentially new operands.
+      DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+            << " -> " << Dest->getName() << "\n");
+
+      PHINode *PN;
+      for (BasicBlock::iterator I = Dest->begin();
+           (PN = dyn_cast<PHINode>(I)); ++I)
+        visitPHINode(*PN);
+    }
+  }
+
+  // getFeasibleSuccessors - Return a vector of booleans to indicate which
+  // successors are reachable from a given terminator instruction.
+  //
+  void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
+
+  // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+  // block to the 'To' basic block is currently feasible.
+  //
+  bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
+
+  // OperandChangedState - This method is invoked on all of the users of an
+  // instruction that was just changed state somehow.  Based on this
+  // information, we need to update the specified user of this instruction.
+  //
+  void OperandChangedState(Instruction *I) {
+    if (BBExecutable.count(I->getParent()))   // Inst is executable?
+      visit(*I);
+  }
+  
+  /// RemoveFromOverdefinedPHIs - If I has any entries in the
+  /// UsersOfOverdefinedPHIs map for PN, remove them now.
+  void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
+    if (UsersOfOverdefinedPHIs.empty()) return;
+    std::multimap<PHINode*, Instruction*>::iterator It, E;
+    tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+    while (It != E) {
+      if (It->second == I)
+        UsersOfOverdefinedPHIs.erase(It++);
+      else
+        ++It;
+    }
+  }
+
+  /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
+  /// map for I and PN, but if one is there already, do not create another.
+  /// (Duplicate entries do not break anything directly, but can lead to
+  /// exponential growth of the table in rare cases.)
+  void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
+    std::multimap<PHINode*, Instruction*>::iterator J, E;
+    tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+    for (; J != E; ++J)
+      if (J->second == I)
+        return;
+    UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
+  }
+
+private:
+  friend class InstVisitor<SCCPSolver>;
+
+  // visit implementations - Something changed in this instruction.  Either an
+  // operand made a transition, or the instruction is newly executable.  Change
+  // the value type of I to reflect these changes if appropriate.
+  void visitPHINode(PHINode &I);
+
+  // Terminators
+  void visitReturnInst(ReturnInst &I);
+  void visitTerminatorInst(TerminatorInst &TI);
+
+  void visitCastInst(CastInst &I);
+  void visitSelectInst(SelectInst &I);
+  void visitBinaryOperator(Instruction &I);
+  void visitCmpInst(CmpInst &I);
+  void visitExtractElementInst(ExtractElementInst &I);
+  void visitInsertElementInst(InsertElementInst &I);
+  void visitShuffleVectorInst(ShuffleVectorInst &I);
+  void visitExtractValueInst(ExtractValueInst &EVI);
+  void visitInsertValueInst(InsertValueInst &IVI);
+
+  // Instructions that cannot be folded away.
+  void visitStoreInst     (StoreInst &I);
+  void visitLoadInst      (LoadInst &I);
+  void visitGetElementPtrInst(GetElementPtrInst &I);
+  void visitCallInst      (CallInst &I) {
+    visitCallSite(&I);
+  }
+  void visitInvokeInst    (InvokeInst &II) {
+    visitCallSite(&II);
+    visitTerminatorInst(II);
+  }
+  void visitCallSite      (CallSite CS);
+  void visitUnwindInst    (TerminatorInst &I) { /*returns void*/ }
+  void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+  void visitAllocaInst    (Instruction &I) { markOverdefined(&I); }
+  void visitVAArgInst     (Instruction &I) { markAnythingOverdefined(&I); }
+
+  void visitInstruction(Instruction &I) {
+    // If a new instruction is added to LLVM that we don't handle.
+    dbgs() << "SCCP: Don't know how to handle: " << I;
+    markAnythingOverdefined(&I);   // Just in case
+  }
+};
+
+} // end anonymous namespace
+
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+//
+void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+                                       SmallVector<bool, 16> &Succs) {
+  Succs.resize(TI.getNumSuccessors());
+  if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+    if (BI->isUnconditional()) {
+      Succs[0] = true;
+      return;
+    }
+    
+    LatticeVal BCValue = getValueState(BI->getCondition());
+    ConstantInt *CI = BCValue.getConstantInt();
+    if (CI == 0) {
+      // Overdefined condition variables, and branches on unfoldable constant
+      // conditions, mean the branch could go either way.
+      if (!BCValue.isUndefined())
+        Succs[0] = Succs[1] = true;
+      return;
+    }
+    
+    // Constant condition variables mean the branch can only go a single way.
+    Succs[CI->isZero()] = true;
+    return;
+  }
+  
+  if (isa<InvokeInst>(TI)) {
+    // Invoke instructions successors are always executable.
+    Succs[0] = Succs[1] = true;
+    return;
+  }
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+    LatticeVal SCValue = getValueState(SI->getCondition());
+    ConstantInt *CI = SCValue.getConstantInt();
+    
+    if (CI == 0) {   // Overdefined or undefined condition?
+      // All destinations are executable!
+      if (!SCValue.isUndefined())
+        Succs.assign(TI.getNumSuccessors(), true);
+      return;
+    }
+      
+    Succs[SI->findCaseValue(CI)] = true;
+    return;
+  }
+  
+  // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+  if (isa<IndirectBrInst>(&TI)) {
+    // Just mark all destinations executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  
+#ifndef NDEBUG
+  dbgs() << "Unknown terminator instruction: " << TI << '\n';
+#endif
+  llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+}
+
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible.
+//
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
+  assert(BBExecutable.count(To) && "Dest should always be alive!");
+
+  // Make sure the source basic block is executable!!
+  if (!BBExecutable.count(From)) return false;
+
+  // Check to make sure this edge itself is actually feasible now.
+  TerminatorInst *TI = From->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+    if (BI->isUnconditional())
+      return true;
+    
+    LatticeVal BCValue = getValueState(BI->getCondition());
+
+    // Overdefined condition variables mean the branch could go either way,
+    // undef conditions mean that neither edge is feasible yet.
+    ConstantInt *CI = BCValue.getConstantInt();
+    if (CI == 0)
+      return !BCValue.isUndefined();
+    
+    // Constant condition variables mean the branch can only go a single way.
+    return BI->getSuccessor(CI->isZero()) == To;
+  }
+  
+  // Invoke instructions successors are always executable.
+  if (isa<InvokeInst>(TI))
+    return true;
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    LatticeVal SCValue = getValueState(SI->getCondition());
+    ConstantInt *CI = SCValue.getConstantInt();
+    
+    if (CI == 0)
+      return !SCValue.isUndefined();
+
+    // Make sure to skip the "default value" which isn't a value
+    for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
+      if (SI->getSuccessorValue(i) == CI) // Found the taken branch.
+        return SI->getSuccessor(i) == To;
+
+    // If the constant value is not equal to any of the branches, we must
+    // execute default branch.
+    return SI->getDefaultDest() == To;
+  }
+  
+  // Just mark all destinations executable!
+  // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+  if (isa<IndirectBrInst>(TI))
+    return true;
+  
+#ifndef NDEBUG
+  dbgs() << "Unknown terminator instruction: " << *TI << '\n';
+#endif
+  llvm_unreachable(0);
+}
+
+// visit Implementations - Something changed in this instruction, either an
+// operand made a transition, or the instruction is newly executable.  Change
+// the value type of I to reflect these changes if appropriate.  This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+//    from different branches, or if the PHI node merges in an overdefined
+//    value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+//    PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+//    destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+//    successors executable.
+//
+void SCCPSolver::visitPHINode(PHINode &PN) {
+  // If this PN returns a struct, just mark the result overdefined.
+  // TODO: We could do a lot better than this if code actually uses this.
+  if (PN.getType()->isStructTy())
+    return markAnythingOverdefined(&PN);
+  
+  if (getValueState(&PN).isOverdefined()) {
+    // There may be instructions using this PHI node that are not overdefined
+    // themselves.  If so, make sure that they know that the PHI node operand
+    // changed.
+    std::multimap<PHINode*, Instruction*>::iterator I, E;
+    tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
+    if (I == E)
+      return;
+    
+    SmallVector<Instruction*, 16> Users;
+    for (; I != E; ++I)
+      Users.push_back(I->second);
+    while (!Users.empty())
+      visit(Users.pop_back_val());
+    return;  // Quick exit
+  }
+
+  // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+  // and slow us down a lot.  Just mark them overdefined.
+  if (PN.getNumIncomingValues() > 64)
+    return markOverdefined(&PN);
+  
+  // Look at all of the executable operands of the PHI node.  If any of them
+  // are overdefined, the PHI becomes overdefined as well.  If they are all
+  // constant, and they agree with each other, the PHI becomes the identical
+  // constant.  If they are constant and don't agree, the PHI is overdefined.
+  // If there are no executable operands, the PHI remains undefined.
+  //
+  Constant *OperandVal = 0;
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    LatticeVal IV = getValueState(PN.getIncomingValue(i));
+    if (IV.isUndefined()) continue;  // Doesn't influence PHI node.
+
+    if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+      continue;
+    
+    if (IV.isOverdefined())    // PHI node becomes overdefined!
+      return markOverdefined(&PN);
+
+    if (OperandVal == 0) {   // Grab the first value.
+      OperandVal = IV.getConstant();
+      continue;
+    }
+    
+    // There is already a reachable operand.  If we conflict with it,
+    // then the PHI node becomes overdefined.  If we agree with it, we
+    // can continue on.
+    
+    // Check to see if there are two different constants merging, if so, the PHI
+    // node is overdefined.
+    if (IV.getConstant() != OperandVal)
+      return markOverdefined(&PN);
+  }
+
+  // If we exited the loop, this means that the PHI node only has constant
+  // arguments that agree with each other(and OperandVal is the constant) or
+  // OperandVal is null because there are no defined incoming arguments.  If
+  // this is the case, the PHI remains undefined.
+  //
+  if (OperandVal)
+    markConstant(&PN, OperandVal);      // Acquire operand value
+}
+
+
+
+
+void SCCPSolver::visitReturnInst(ReturnInst &I) {
+  if (I.getNumOperands() == 0) return;  // ret void
+
+  Function *F = I.getParent()->getParent();
+  Value *ResultOp = I.getOperand(0);
+  
+  // If we are tracking the return value of this function, merge it in.
+  if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
+    DenseMap<Function*, LatticeVal>::iterator TFRVI =
+      TrackedRetVals.find(F);
+    if (TFRVI != TrackedRetVals.end()) {
+      mergeInValue(TFRVI->second, F, getValueState(ResultOp));
+      return;
+    }
+  }
+  
+  // Handle functions that return multiple values.
+  if (!TrackedMultipleRetVals.empty()) {
+    if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+      if (MRVFunctionsTracked.count(F))
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+                       getStructValueState(ResultOp, i));
+    
+  }
+}
+
+void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+  SmallVector<bool, 16> SuccFeasible;
+  getFeasibleSuccessors(TI, SuccFeasible);
+
+  BasicBlock *BB = TI.getParent();
+
+  // Mark all feasible successors executable.
+  for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+    if (SuccFeasible[i])
+      markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPSolver::visitCastInst(CastInst &I) {
+  LatticeVal OpSt = getValueState(I.getOperand(0));
+  if (OpSt.isOverdefined())          // Inherit overdefinedness of operand
+    markOverdefined(&I);
+  else if (OpSt.isConstant())        // Propagate constant value
+    markConstant(&I, ConstantExpr::getCast(I.getOpcode(), 
+                                           OpSt.getConstant(), I.getType()));
+}
+
+
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+  // If this returns a struct, mark all elements over defined, we don't track
+  // structs in structs.
+  if (EVI.getType()->isStructTy())
+    return markAnythingOverdefined(&EVI);
+    
+  // If this is extracting from more than one level of struct, we don't know.
+  if (EVI.getNumIndices() != 1)
+    return markOverdefined(&EVI);
+
+  Value *AggVal = EVI.getAggregateOperand();
+  if (AggVal->getType()->isStructTy()) {
+    unsigned i = *EVI.idx_begin();
+    LatticeVal EltVal = getStructValueState(AggVal, i);
+    mergeInValue(getValueState(&EVI), &EVI, EltVal);
+  } else {
+    // Otherwise, must be extracting from an array.
+    return markOverdefined(&EVI);
+  }
+}
+
+void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+  const StructType *STy = dyn_cast<StructType>(IVI.getType());
+  if (STy == 0)
+    return markOverdefined(&IVI);
+  
+  // If this has more than one index, we can't handle it, drive all results to
+  // undef.
+  if (IVI.getNumIndices() != 1)
+    return markAnythingOverdefined(&IVI);
+  
+  Value *Aggr = IVI.getAggregateOperand();
+  unsigned Idx = *IVI.idx_begin();
+  
+  // Compute the result based on what we're inserting.
+  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    // This passes through all values that aren't the inserted element.
+    if (i != Idx) {
+      LatticeVal EltVal = getStructValueState(Aggr, i);
+      mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+      continue;
+    }
+    
+    Value *Val = IVI.getInsertedValueOperand();
+    if (Val->getType()->isStructTy())
+      // We don't track structs in structs.
+      markOverdefined(getStructValueState(&IVI, i), &IVI);
+    else {
+      LatticeVal InVal = getValueState(Val);
+      mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
+    }
+  }
+}
+
+void SCCPSolver::visitSelectInst(SelectInst &I) {
+  // If this select returns a struct, just mark the result overdefined.
+  // TODO: We could do a lot better than this if code actually uses this.
+  if (I.getType()->isStructTy())
+    return markAnythingOverdefined(&I);
+  
+  LatticeVal CondValue = getValueState(I.getCondition());
+  if (CondValue.isUndefined())
+    return;
+  
+  if (ConstantInt *CondCB = CondValue.getConstantInt()) {
+    Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+    mergeInValue(&I, getValueState(OpVal));
+    return;
+  }
+  
+  // Otherwise, the condition is overdefined or a constant we can't evaluate.
+  // See if we can produce something better than overdefined based on the T/F
+  // value.
+  LatticeVal TVal = getValueState(I.getTrueValue());
+  LatticeVal FVal = getValueState(I.getFalseValue());
+  
+  // select ?, C, C -> C.
+  if (TVal.isConstant() && FVal.isConstant() && 
+      TVal.getConstant() == FVal.getConstant())
+    return markConstant(&I, FVal.getConstant());
+
+  if (TVal.isUndefined())   // select ?, undef, X -> X.
+    return mergeInValue(&I, FVal);
+  if (FVal.isUndefined())   // select ?, X, undef -> X.
+    return mergeInValue(&I, TVal);
+  markOverdefined(&I);
+}
+
+// Handle Binary Operators.
+void SCCPSolver::visitBinaryOperator(Instruction &I) {
+  LatticeVal V1State = getValueState(I.getOperand(0));
+  LatticeVal V2State = getValueState(I.getOperand(1));
+  
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (V1State.isConstant() && V2State.isConstant())
+    return markConstant(IV, &I,
+                        ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+                                          V2State.getConstant()));
+  
+  // If something is undef, wait for it to resolve.
+  if (!V1State.isOverdefined() && !V2State.isOverdefined())
+    return;
+  
+  // Otherwise, one of our operands is overdefined.  Try to produce something
+  // better than overdefined with some tricks.
+  
+  // If this is an AND or OR with 0 or -1, it doesn't matter that the other
+  // operand is overdefined.
+  if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+    LatticeVal *NonOverdefVal = 0;
+    if (!V1State.isOverdefined())
+      NonOverdefVal = &V1State;
+    else if (!V2State.isOverdefined())
+      NonOverdefVal = &V2State;
+
+    if (NonOverdefVal) {
+      if (NonOverdefVal->isUndefined()) {
+        // Could annihilate value.
+        if (I.getOpcode() == Instruction::And)
+          markConstant(IV, &I, Constant::getNullValue(I.getType()));
+        else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+          markConstant(IV, &I, Constant::getAllOnesValue(PT));
+        else
+          markConstant(IV, &I,
+                       Constant::getAllOnesValue(I.getType()));
+        return;
+      }
+      
+      if (I.getOpcode() == Instruction::And) {
+        // X and 0 = 0
+        if (NonOverdefVal->getConstant()->isNullValue())
+          return markConstant(IV, &I, NonOverdefVal->getConstant());
+      } else {
+        if (ConstantInt *CI = NonOverdefVal->getConstantInt())
+          if (CI->isAllOnesValue())     // X or -1 = -1
+            return markConstant(IV, &I, NonOverdefVal->getConstant());
+      }
+    }
+  }
+
+
+  // If both operands are PHI nodes, it is possible that this instruction has
+  // a constant value, despite the fact that the PHI node doesn't.  Check for
+  // this condition now.
+  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+      if (PN1->getParent() == PN2->getParent()) {
+        // Since the two PHI nodes are in the same basic block, they must have
+        // entries for the same predecessors.  Walk the predecessor list, and
+        // if all of the incoming values are constants, and the result of
+        // evaluating this expression with all incoming value pairs is the
+        // same, then this expression is a constant even though the PHI node
+        // is not a constant!
+        LatticeVal Result;
+        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+          BasicBlock *InBlock = PN1->getIncomingBlock(i);
+          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+          if (In1.isOverdefined() || In2.isOverdefined()) {
+            Result.markOverdefined();
+            break;  // Cannot fold this operation over the PHI nodes!
+          }
+          
+          if (In1.isConstant() && In2.isConstant()) {
+            Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
+                                            In2.getConstant());
+            if (Result.isUndefined())
+              Result.markConstant(V);
+            else if (Result.isConstant() && Result.getConstant() != V) {
+              Result.markOverdefined();
+              break;
+            }
+          }
+        }
+
+        // If we found a constant value here, then we know the instruction is
+        // constant despite the fact that the PHI nodes are overdefined.
+        if (Result.isConstant()) {
+          markConstant(IV, &I, Result.getConstant());
+          // Remember that this instruction is virtually using the PHI node
+          // operands. 
+          InsertInOverdefinedPHIs(&I, PN1);
+          InsertInOverdefinedPHIs(&I, PN2);
+          return;
+        }
+        
+        if (Result.isUndefined())
+          return;
+
+        // Okay, this really is overdefined now.  Since we might have
+        // speculatively thought that this was not overdefined before, and
+        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+        // make sure to clean out any entries that we put there, for
+        // efficiency.
+        RemoveFromOverdefinedPHIs(&I, PN1);
+        RemoveFromOverdefinedPHIs(&I, PN2);
+      }
+
+  markOverdefined(&I);
+}
+
+// Handle ICmpInst instruction.
+void SCCPSolver::visitCmpInst(CmpInst &I) {
+  LatticeVal V1State = getValueState(I.getOperand(0));
+  LatticeVal V2State = getValueState(I.getOperand(1));
+
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (V1State.isConstant() && V2State.isConstant())
+    return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
+                                                         V1State.getConstant(), 
+                                                        V2State.getConstant()));
+  
+  // If operands are still undefined, wait for it to resolve.
+  if (!V1State.isOverdefined() && !V2State.isOverdefined())
+    return;
+  
+  // If something is overdefined, use some tricks to avoid ending up and over
+  // defined if we can.
+  
+  // If both operands are PHI nodes, it is possible that this instruction has
+  // a constant value, despite the fact that the PHI node doesn't.  Check for
+  // this condition now.
+  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+      if (PN1->getParent() == PN2->getParent()) {
+        // Since the two PHI nodes are in the same basic block, they must have
+        // entries for the same predecessors.  Walk the predecessor list, and
+        // if all of the incoming values are constants, and the result of
+        // evaluating this expression with all incoming value pairs is the
+        // same, then this expression is a constant even though the PHI node
+        // is not a constant!
+        LatticeVal Result;
+        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+          BasicBlock *InBlock = PN1->getIncomingBlock(i);
+          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+          if (In1.isOverdefined() || In2.isOverdefined()) {
+            Result.markOverdefined();
+            break;  // Cannot fold this operation over the PHI nodes!
+          }
+          
+          if (In1.isConstant() && In2.isConstant()) {
+            Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
+                                                   In1.getConstant(), 
+                                                   In2.getConstant());
+            if (Result.isUndefined())
+              Result.markConstant(V);
+            else if (Result.isConstant() && Result.getConstant() != V) {
+              Result.markOverdefined();
+              break;
+            }
+          }
+        }
+
+        // If we found a constant value here, then we know the instruction is
+        // constant despite the fact that the PHI nodes are overdefined.
+        if (Result.isConstant()) {
+          markConstant(&I, Result.getConstant());
+          // Remember that this instruction is virtually using the PHI node
+          // operands.
+          InsertInOverdefinedPHIs(&I, PN1);
+          InsertInOverdefinedPHIs(&I, PN2);
+          return;
+        }
+        
+        if (Result.isUndefined())
+          return;
+
+        // Okay, this really is overdefined now.  Since we might have
+        // speculatively thought that this was not overdefined before, and
+        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+        // make sure to clean out any entries that we put there, for
+        // efficiency.
+        RemoveFromOverdefinedPHIs(&I, PN1);
+        RemoveFromOverdefinedPHIs(&I, PN2);
+      }
+
+  markOverdefined(&I);
+}
+
+void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
+
+#if 0
+  LatticeVal &ValState = getValueState(I.getOperand(0));
+  LatticeVal &IdxState = getValueState(I.getOperand(1));
+
+  if (ValState.isOverdefined() || IdxState.isOverdefined())
+    markOverdefined(&I);
+  else if(ValState.isConstant() && IdxState.isConstant())
+    markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(),
+                                                     IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
+#if 0
+  LatticeVal &ValState = getValueState(I.getOperand(0));
+  LatticeVal &EltState = getValueState(I.getOperand(1));
+  LatticeVal &IdxState = getValueState(I.getOperand(2));
+
+  if (ValState.isOverdefined() || EltState.isOverdefined() ||
+      IdxState.isOverdefined())
+    markOverdefined(&I);
+  else if(ValState.isConstant() && EltState.isConstant() &&
+          IdxState.isConstant())
+    markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(),
+                                                    EltState.getConstant(),
+                                                    IdxState.getConstant()));
+  else if (ValState.isUndefined() && EltState.isConstant() &&
+           IdxState.isConstant()) 
+    markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
+                                                   EltState.getConstant(),
+                                                   IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
+#if 0
+  LatticeVal &V1State   = getValueState(I.getOperand(0));
+  LatticeVal &V2State   = getValueState(I.getOperand(1));
+  LatticeVal &MaskState = getValueState(I.getOperand(2));
+
+  if (MaskState.isUndefined() ||
+      (V1State.isUndefined() && V2State.isUndefined()))
+    return;  // Undefined output if mask or both inputs undefined.
+  
+  if (V1State.isOverdefined() || V2State.isOverdefined() ||
+      MaskState.isOverdefined()) {
+    markOverdefined(&I);
+  } else {
+    // A mix of constant/undef inputs.
+    Constant *V1 = V1State.isConstant() ? 
+        V1State.getConstant() : UndefValue::get(I.getType());
+    Constant *V2 = V2State.isConstant() ? 
+        V2State.getConstant() : UndefValue::get(I.getType());
+    Constant *Mask = MaskState.isConstant() ? 
+      MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
+    markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
+  }
+#endif
+}
+
+// Handle getelementptr instructions.  If all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+//
+void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
+  if (ValueState[&I].isOverdefined()) return;
+
+  SmallVector<Constant*, 8> Operands;
+  Operands.reserve(I.getNumOperands());
+
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    LatticeVal State = getValueState(I.getOperand(i));
+    if (State.isUndefined())
+      return;  // Operands are not resolved yet.
+    
+    if (State.isOverdefined())
+      return markOverdefined(&I);
+
+    assert(State.isConstant() && "Unknown state!");
+    Operands.push_back(State.getConstant());
+  }
+
+  Constant *Ptr = Operands[0];
+  markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
+                                                  Operands.size()-1));
+}
+
+void SCCPSolver::visitStoreInst(StoreInst &SI) {
+  // If this store is of a struct, ignore it.
+  if (SI.getOperand(0)->getType()->isStructTy())
+    return;
+  
+  if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+    return;
+  
+  GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+  DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
+  if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
+
+  // Get the value we are storing into the global, then merge it.
+  mergeInValue(I->second, GV, getValueState(SI.getOperand(0)));
+  if (I->second.isOverdefined())
+    TrackedGlobals.erase(I);      // No need to keep tracking this!
+}
+
+
+// Handle load instructions.  If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPSolver::visitLoadInst(LoadInst &I) {
+  // If this load is of a struct, just mark the result overdefined.
+  if (I.getType()->isStructTy())
+    return markAnythingOverdefined(&I);
+  
+  LatticeVal PtrVal = getValueState(I.getOperand(0));
+  if (PtrVal.isUndefined()) return;   // The pointer is not resolved yet!
+  
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (!PtrVal.isConstant() || I.isVolatile())
+    return markOverdefined(IV, &I);
+    
+  Constant *Ptr = PtrVal.getConstant();
+
+  // load null -> null
+  if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
+    return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+  
+  // Transform load (constant global) into the value loaded.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+    if (!TrackedGlobals.empty()) {
+      // If we are tracking this global, merge in the known value for it.
+      DenseMap<GlobalVariable*, LatticeVal>::iterator It =
+        TrackedGlobals.find(GV);
+      if (It != TrackedGlobals.end()) {
+        mergeInValue(IV, &I, It->second);
+        return;
+      }
+    }
+  }
+
+  // Transform load from a constant into a constant if possible.
+  if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+    return markConstant(IV, &I, C);
+
+  // Otherwise we cannot say for certain what value this load will produce.
+  // Bail out.
+  markOverdefined(IV, &I);
+}
+
+void SCCPSolver::visitCallSite(CallSite CS) {
+  Function *F = CS.getCalledFunction();
+  Instruction *I = CS.getInstruction();
+  
+  // The common case is that we aren't tracking the callee, either because we
+  // are not doing interprocedural analysis or the callee is indirect, or is
+  // external.  Handle these cases first.
+  if (F == 0 || F->isDeclaration()) {
+CallOverdefined:
+    // Void return and not tracking callee, just bail.
+    if (I->getType()->isVoidTy()) return;
+    
+    // Otherwise, if we have a single return value case, and if the function is
+    // a declaration, maybe we can constant fold it.
+    if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
+        canConstantFoldCallTo(F)) {
+      
+      SmallVector<Constant*, 8> Operands;
+      for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
+           AI != E; ++AI) {
+        LatticeVal State = getValueState(*AI);
+        
+        if (State.isUndefined())
+          return;  // Operands are not resolved yet.
+        if (State.isOverdefined())
+          return markOverdefined(I);
+        assert(State.isConstant() && "Unknown state!");
+        Operands.push_back(State.getConstant());
+      }
+     
+      // If we can constant fold this, mark the result of the call as a
+      // constant.
+      if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size()))
+        return markConstant(I, C);
+    }
+
+    // Otherwise, we don't know anything about this call, mark it overdefined.
+    return markAnythingOverdefined(I);
+  }
+
+  // If this is a local function that doesn't have its address taken, mark its
+  // entry block executable and merge in the actual arguments to the call into
+  // the formal arguments of the function.
+  if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
+    MarkBlockExecutable(F->begin());
+    
+    // Propagate information from this call site into the callee.
+    CallSite::arg_iterator CAI = CS.arg_begin();
+    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+         AI != E; ++AI, ++CAI) {
+      // If this argument is byval, and if the function is not readonly, there
+      // will be an implicit copy formed of the input aggregate.
+      if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+        markOverdefined(AI);
+        continue;
+      }
+      
+      if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          LatticeVal CallArg = getStructValueState(*CAI, i);
+          mergeInValue(getStructValueState(AI, i), AI, CallArg);
+        }
+      } else {
+        mergeInValue(AI, getValueState(*CAI));
+      }
+    }
+  }
+  
+  // If this is a single/zero retval case, see if we're tracking the function.
+  if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+    if (!MRVFunctionsTracked.count(F))
+      goto CallOverdefined;  // Not tracking this callee.
+    
+    // If we are tracking this callee, propagate the result of the function
+    // into this call site.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      mergeInValue(getStructValueState(I, i), I, 
+                   TrackedMultipleRetVals[std::make_pair(F, i)]);
+  } else {
+    DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+    if (TFRVI == TrackedRetVals.end())
+      goto CallOverdefined;  // Not tracking this callee.
+      
+    // If so, propagate the return value of the callee into this call result.
+    mergeInValue(I, TFRVI->second);
+  }
+}
+
+void SCCPSolver::Solve() {
+  // Process the work lists until they are empty!
+  while (!BBWorkList.empty() || !InstWorkList.empty() ||
+         !OverdefinedInstWorkList.empty()) {
+    // Process the overdefined instruction's work list first, which drives other
+    // things to overdefined more quickly.
+    while (!OverdefinedInstWorkList.empty()) {
+      Value *I = OverdefinedInstWorkList.pop_back_val();
+
+      DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
+
+      // "I" got into the work list because it either made the transition from
+      // bottom to constant
+      //
+      // Anything on this worklist that is overdefined need not be visited
+      // since all of its users will have already been marked as overdefined
+      // Update all of the users of this instruction's value.
+      //
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI)
+        if (Instruction *I = dyn_cast<Instruction>(*UI))
+          OperandChangedState(I);
+    }
+    
+    // Process the instruction work list.
+    while (!InstWorkList.empty()) {
+      Value *I = InstWorkList.pop_back_val();
+
+      DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
+
+      // "I" got into the work list because it made the transition from undef to
+      // constant.
+      //
+      // Anything on this worklist that is overdefined need not be visited
+      // since all of its users will have already been marked as overdefined.
+      // Update all of the users of this instruction's value.
+      //
+      if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
+        for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+             UI != E; ++UI)
+          if (Instruction *I = dyn_cast<Instruction>(*UI))
+            OperandChangedState(I);
+    }
+
+    // Process the basic block work list.
+    while (!BBWorkList.empty()) {
+      BasicBlock *BB = BBWorkList.back();
+      BBWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
+
+      // Notify all instructions in this basic block that they are newly
+      // executable.
+      visit(BB);
+    }
+  }
+}
+
+/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+/// that branches on undef values cannot reach any of their successors.
+/// However, this is not a safe assumption.  After we solve dataflow, this
+/// method should be use to handle this.  If this returns true, the solver
+/// should be rerun.
+///
+/// This method handles this by finding an unresolved branch and marking it one
+/// of the edges from the block as being feasible, even though the condition
+/// doesn't say it would otherwise be.  This allows SCCP to find the rest of the
+/// CFG and only slightly pessimizes the analysis results (by marking one,
+/// potentially infeasible, edge feasible).  This cannot usefully modify the
+/// constraints on the condition of the branch, as that would impact other users
+/// of the value.
+///
+/// This scan also checks for values that use undefs, whose results are actually
+/// defined.  For example, 'zext i8 undef to i32' should produce all zeros
+/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero,
+/// even if X isn't defined.
+bool SCCPSolver::ResolvedUndefsIn(Function &F) {
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (!BBExecutable.count(BB))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // Look for instructions which produce undef values.
+      if (I->getType()->isVoidTy()) continue;
+      
+      if (const StructType *STy = dyn_cast<StructType>(I->getType())) {
+        // Only a few things that can be structs matter for undef.  Just send
+        // all their results to overdefined.  We could be more precise than this
+        // but it isn't worth bothering.
+        if (isa<CallInst>(I) || isa<SelectInst>(I)) {
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            LatticeVal &LV = getStructValueState(I, i);
+            if (LV.isUndefined())
+              markOverdefined(LV, I);
+          }
+        }
+        continue;
+      }
+      
+      LatticeVal &LV = getValueState(I);
+      if (!LV.isUndefined()) continue;
+
+      // No instructions using structs need disambiguation.
+      if (I->getOperand(0)->getType()->isStructTy())
+        continue;
+
+      // Get the lattice values of the first two operands for use below.
+      LatticeVal Op0LV = getValueState(I->getOperand(0));
+      LatticeVal Op1LV;
+      if (I->getNumOperands() == 2) {
+        // No instructions using structs need disambiguation.
+        if (I->getOperand(1)->getType()->isStructTy())
+          continue;
+        
+        // If this is a two-operand instruction, and if both operands are
+        // undefs, the result stays undef.
+        Op1LV = getValueState(I->getOperand(1));
+        if (Op0LV.isUndefined() && Op1LV.isUndefined())
+          continue;
+      }
+      
+      // If this is an instructions whose result is defined even if the input is
+      // not fully defined, propagate the information.
+      const Type *ITy = I->getType();
+      switch (I->getOpcode()) {
+      default: break;          // Leave the instruction as an undef.
+      case Instruction::ZExt:
+        // After a zero extend, we know the top part is zero.  SExt doesn't have
+        // to be handled here, because we don't know whether the top part is 1's
+        // or 0's.
+      case Instruction::SIToFP:  // some FP values are not possible, just use 0.
+      case Instruction::UIToFP:  // some FP values are not possible, just use 0.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+      case Instruction::Mul:
+      case Instruction::And:
+        // undef * X -> 0.   X could be zero.
+        // undef & X -> 0.   X could be zero.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+
+      case Instruction::Or:
+        // undef | X -> -1.   X could be -1.
+        markForcedConstant(I, Constant::getAllOnesValue(ITy));
+        return true;
+
+      case Instruction::SDiv:
+      case Instruction::UDiv:
+      case Instruction::SRem:
+      case Instruction::URem:
+        // X / undef -> undef.  No change.
+        // X % undef -> undef.  No change.
+        if (Op1LV.isUndefined()) break;
+        
+        // undef / X -> 0.   X could be maxint.
+        // undef % X -> 0.   X could be 1.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+        
+      case Instruction::AShr:
+        // undef >>s X -> undef.  No change.
+        if (Op0LV.isUndefined()) break;
+        
+        // X >>s undef -> X.  X could be 0, X could have the high-bit known set.
+        if (Op0LV.isConstant())
+          markForcedConstant(I, Op0LV.getConstant());
+        else
+          markOverdefined(I);
+        return true;
+      case Instruction::LShr:
+      case Instruction::Shl:
+        // undef >> X -> undef.  No change.
+        // undef << X -> undef.  No change.
+        if (Op0LV.isUndefined()) break;
+        
+        // X >> undef -> 0.  X could be 0.
+        // X << undef -> 0.  X could be 0.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+      case Instruction::Select:
+        // undef ? X : Y  -> X or Y.  There could be commonality between X/Y.
+        if (Op0LV.isUndefined()) {
+          if (!Op1LV.isConstant())  // Pick the constant one if there is any.
+            Op1LV = getValueState(I->getOperand(2));
+        } else if (Op1LV.isUndefined()) {
+          // c ? undef : undef -> undef.  No change.
+          Op1LV = getValueState(I->getOperand(2));
+          if (Op1LV.isUndefined())
+            break;
+          // Otherwise, c ? undef : x -> x.
+        } else {
+          // Leave Op1LV as Operand(1)'s LatticeValue.
+        }
+        
+        if (Op1LV.isConstant())
+          markForcedConstant(I, Op1LV.getConstant());
+        else
+          markOverdefined(I);
+        return true;
+      case Instruction::Call:
+        // If a call has an undef result, it is because it is constant foldable
+        // but one of the inputs was undef.  Just force the result to
+        // overdefined.
+        markOverdefined(I);
+        return true;
+      }
+    }
+  
+    // Check to see if we have a branch or switch on an undefined value.  If so
+    // we force the branch to go one way or the other to make the successor
+    // values live.  It doesn't really matter which way we force it.
+    TerminatorInst *TI = BB->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (!BI->isConditional()) continue;
+      if (!getValueState(BI->getCondition()).isUndefined())
+        continue;
+    
+      // If the input to SCCP is actually branch on undef, fix the undef to
+      // false.
+      if (isa<UndefValue>(BI->getCondition())) {
+        BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+        markEdgeExecutable(BB, TI->getSuccessor(1));
+        return true;
+      }
+      
+      // Otherwise, it is a branch on a symbolic value which is currently
+      // considered to be undef.  Handle this by forcing the input value to the
+      // branch to false.
+      markForcedConstant(BI->getCondition(),
+                         ConstantInt::getFalse(TI->getContext()));
+      return true;
+    }
+    
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      if (SI->getNumSuccessors() < 2)   // no cases
+        continue;
+      if (!getValueState(SI->getCondition()).isUndefined())
+        continue;
+      
+      // If the input to SCCP is actually switch on undef, fix the undef to
+      // the first constant.
+      if (isa<UndefValue>(SI->getCondition())) {
+        SI->setCondition(SI->getCaseValue(1));
+        markEdgeExecutable(BB, TI->getSuccessor(1));
+        return true;
+      }
+      
+      markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  //
+  /// SCCP Class - This class uses the SCCPSolver to implement a per-function
+  /// Sparse Conditional Constant Propagator.
+  ///
+  struct SCCP : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    SCCP() : FunctionPass(ID) {
+      initializeSCCPPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnFunction - Run the Sparse Conditional Constant Propagation
+    // algorithm, and return true if the function was modified.
+    //
+    bool runOnFunction(Function &F);
+  };
+} // end anonymous namespace
+
+char SCCP::ID = 0;
+INITIALIZE_PASS(SCCP, "sccp",
+                "Sparse Conditional Constant Propagation", false, false)
+
+// createSCCPPass - This is the public interface to this file.
+FunctionPass *llvm::createSCCPPass() {
+  return new SCCP();
+}
+
+static void DeleteInstructionInBlock(BasicBlock *BB) {
+  DEBUG(dbgs() << "  BasicBlock Dead:" << *BB);
+  ++NumDeadBlocks;
+  
+  // Delete the instructions backwards, as it has a reduced likelihood of
+  // having to update as many def-use and use-def chains.
+  while (!isa<TerminatorInst>(BB->begin())) {
+    Instruction *I = --BasicBlock::iterator(BB->getTerminator());
+    
+    if (!I->use_empty())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+    BB->getInstList().erase(I);
+    ++NumInstRemoved;
+  }
+}
+
+// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
+// and return true if the function was modified.
+//
+bool SCCP::runOnFunction(Function &F) {
+  DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
+  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+  // Mark the first block of the function as being executable.
+  Solver.MarkBlockExecutable(F.begin());
+
+  // Mark all arguments to the function as being overdefined.
+  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
+    Solver.markAnythingOverdefined(AI);
+
+  // Solve for constants.
+  bool ResolvedUndefs = true;
+  while (ResolvedUndefs) {
+    Solver.Solve();
+    DEBUG(dbgs() << "RESOLVING UNDEFs\n");
+    ResolvedUndefs = Solver.ResolvedUndefsIn(F);
+  }
+
+  bool MadeChanges = false;
+
+  // If we decided that there are basic blocks that are dead in this function,
+  // delete their contents now.  Note that we cannot actually delete the blocks,
+  // as we cannot modify the CFG of the function.
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (!Solver.isBlockExecutable(BB)) {
+      DeleteInstructionInBlock(BB);
+      MadeChanges = true;
+      continue;
+    }
+  
+    // Iterate over all of the instructions in a function, replacing them with
+    // constants if we have found them to be of constant values.
+    //
+    for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+      Instruction *Inst = BI++;
+      if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+        continue;
+      
+      // TODO: Reconstruct structs from their elements.
+      if (Inst->getType()->isStructTy())
+        continue;
+      
+      LatticeVal IV = Solver.getLatticeValueFor(Inst);
+      if (IV.isOverdefined())
+        continue;
+      
+      Constant *Const = IV.isConstant()
+        ? IV.getConstant() : UndefValue::get(Inst->getType());
+      DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
+
+      // Replaces all of the uses of a variable with uses of the constant.
+      Inst->replaceAllUsesWith(Const);
+      
+      // Delete the instruction.
+      Inst->eraseFromParent();
+      
+      // Hey, we just changed something!
+      MadeChanges = true;
+      ++NumInstRemoved;
+    }
+  }
+
+  return MadeChanges;
+}
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  //
+  /// IPSCCP Class - This class implements interprocedural Sparse Conditional
+  /// Constant Propagation.
+  ///
+  struct IPSCCP : public ModulePass {
+    static char ID;
+    IPSCCP() : ModulePass(ID) {
+      initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnModule(Module &M);
+  };
+} // end anonymous namespace
+
+char IPSCCP::ID = 0;
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+                "Interprocedural Sparse Conditional Constant Propagation",
+                false, false)
+
+// createIPSCCPPass - This is the public interface to this file.
+ModulePass *llvm::createIPSCCPPass() {
+  return new IPSCCP();
+}
+
+
+static bool AddressIsTaken(const GlobalValue *GV) {
+  // Delete any dead constantexpr klingons.
+  GV->removeDeadConstantUsers();
+
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+    if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == GV || SI->isVolatile())
+        return true;  // Storing addr of GV.
+    } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
+      // Make sure we are calling the function, not passing the address.
+      ImmutableCallSite CS(cast<Instruction>(U));
+      if (!CS.isCallee(UI))
+        return true;
+    } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile())
+        return true;
+    } else if (isa<BlockAddress>(U)) {
+      // blockaddress doesn't take the address of the function, it takes addr
+      // of label.
+    } else {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool IPSCCP::runOnModule(Module &M) {
+  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+  // AddressTakenFunctions - This set keeps track of the address-taken functions
+  // that are in the input.  As IPSCCP runs through and simplifies code,
+  // functions that were address taken can end up losing their
+  // address-taken-ness.  Because of this, we keep track of their addresses from
+  // the first pass so we can use them for the later simplification pass.
+  SmallPtrSet<Function*, 32> AddressTakenFunctions;
+  
+  // Loop over all functions, marking arguments to those with their addresses
+  // taken or that are external as overdefined.
+  //
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration())
+      continue;
+    
+    // If this is a strong or ODR definition of this function, then we can
+    // propagate information about its result into callsites of it.
+    if (!F->mayBeOverridden())
+      Solver.AddTrackedFunction(F);
+    
+    // If this function only has direct calls that we can see, we can track its
+    // arguments and return value aggressively, and can assume it is not called
+    // unless we see evidence to the contrary.
+    if (F->hasLocalLinkage()) {
+      if (AddressIsTaken(F))
+        AddressTakenFunctions.insert(F);
+      else {
+        Solver.AddArgumentTrackedFunction(F);
+        continue;
+      }
+    }
+
+    // Assume the function is called.
+    Solver.MarkBlockExecutable(F->begin());
+    
+    // Assume nothing about the incoming arguments.
+    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+         AI != E; ++AI)
+      Solver.markAnythingOverdefined(AI);
+  }
+
+  // Loop over global variables.  We inform the solver about any internal global
+  // variables that do not have their 'addresses taken'.  If they don't have
+  // their addresses taken, we can propagate constants through them.
+  for (Module::global_iterator G = M.global_begin(), E = M.global_end();
+       G != E; ++G)
+    if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
+      Solver.TrackValueOfGlobalVariable(G);
+
+  // Solve for constants.
+  bool ResolvedUndefs = true;
+  while (ResolvedUndefs) {
+    Solver.Solve();
+
+    DEBUG(dbgs() << "RESOLVING UNDEFS\n");
+    ResolvedUndefs = false;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+      ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
+  }
+
+  bool MadeChanges = false;
+
+  // Iterate over all of the instructions in the module, replacing them with
+  // constants if we have found them to be of constant values.
+  //
+  SmallVector<BasicBlock*, 512> BlocksToErase;
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (Solver.isBlockExecutable(F->begin())) {
+      for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+           AI != E; ++AI) {
+        if (AI->use_empty() || AI->getType()->isStructTy()) continue;
+        
+        // TODO: Could use getStructLatticeValueFor to find out if the entire
+        // result is a constant and replace it entirely if so.
+
+        LatticeVal IV = Solver.getLatticeValueFor(AI);
+        if (IV.isOverdefined()) continue;
+        
+        Constant *CST = IV.isConstant() ?
+        IV.getConstant() : UndefValue::get(AI->getType());
+        DEBUG(dbgs() << "***  Arg " << *AI << " = " << *CST <<"\n");
+        
+        // Replaces all of the uses of a variable with uses of the
+        // constant.
+        AI->replaceAllUsesWith(CST);
+        ++IPNumArgsElimed;
+      }
+    }
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      if (!Solver.isBlockExecutable(BB)) {
+        DeleteInstructionInBlock(BB);
+        MadeChanges = true;
+
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+          BasicBlock *Succ = TI->getSuccessor(i);
+          if (!Succ->empty() && isa<PHINode>(Succ->begin()))
+            TI->getSuccessor(i)->removePredecessor(BB);
+        }
+        if (!TI->use_empty())
+          TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+        TI->eraseFromParent();
+
+        if (&*BB != &F->front())
+          BlocksToErase.push_back(BB);
+        else
+          new UnreachableInst(M.getContext(), BB);
+        continue;
+      }
+      
+      for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+        Instruction *Inst = BI++;
+        if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
+          continue;
+        
+        // TODO: Could use getStructLatticeValueFor to find out if the entire
+        // result is a constant and replace it entirely if so.
+        
+        LatticeVal IV = Solver.getLatticeValueFor(Inst);
+        if (IV.isOverdefined())
+          continue;
+        
+        Constant *Const = IV.isConstant()
+          ? IV.getConstant() : UndefValue::get(Inst->getType());
+        DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
+
+        // Replaces all of the uses of a variable with uses of the
+        // constant.
+        Inst->replaceAllUsesWith(Const);
+        
+        // Delete the instruction.
+        if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
+          Inst->eraseFromParent();
+
+        // Hey, we just changed something!
+        MadeChanges = true;
+        ++IPNumInstRemoved;
+      }
+    }
+
+    // Now that all instructions in the function are constant folded, erase dead
+    // blocks, because we can now use ConstantFoldTerminator to get rid of
+    // in-edges.
+    for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+      // If there are any PHI nodes in this successor, drop entries for BB now.
+      BasicBlock *DeadBB = BlocksToErase[i];
+      for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
+           UI != UE; ) {
+        // Grab the user and then increment the iterator early, as the user
+        // will be deleted. Step past all adjacent uses from the same user.
+        Instruction *I = dyn_cast<Instruction>(*UI);
+        do { ++UI; } while (UI != UE && *UI == I);
+
+        // Ignore blockaddress users; BasicBlock's dtor will handle them.
+        if (!I) continue;
+
+        bool Folded = ConstantFoldTerminator(I->getParent());
+        if (!Folded) {
+          // The constant folder may not have been able to fold the terminator
+          // if this is a branch or switch on undef.  Fold it manually as a
+          // branch to the first successor.
+#ifndef NDEBUG
+          if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+            assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+                   "Branch should be foldable!");
+          } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+            assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+          } else {
+            llvm_unreachable("Didn't fold away reference to block!");
+          }
+#endif
+          
+          // Make this an uncond branch to the first successor.
+          TerminatorInst *TI = I->getParent()->getTerminator();
+          BranchInst::Create(TI->getSuccessor(0), TI);
+          
+          // Remove entries in successor phi nodes to remove edges.
+          for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+            TI->getSuccessor(i)->removePredecessor(TI->getParent());
+          
+          // Remove the old terminator.
+          TI->eraseFromParent();
+        }
+      }
+
+      // Finally, delete the basic block.
+      F->getBasicBlockList().erase(DeadBB);
+    }
+    BlocksToErase.clear();
+  }
+
+  // If we inferred constant or undef return values for a function, we replaced
+  // all call uses with the inferred value.  This means we don't need to bother
+  // actually returning anything from the function.  Replace all return
+  // instructions with return undef.
+  //
+  // Do this in two stages: first identify the functions we should process, then
+  // actually zap their returns.  This is important because we can only do this
+  // if the address of the function isn't taken.  In cases where a return is the
+  // last use of a function, the order of processing functions would affect
+  // whether other functions are optimizable.
+  SmallVector<ReturnInst*, 8> ReturnsToZap;
+  
+  // TODO: Process multiple value ret instructions also.
+  const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+  for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
+       E = RV.end(); I != E; ++I) {
+    Function *F = I->first;
+    if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
+      continue;
+  
+    // We can only do this if we know that nothing else can call the function.
+    if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
+      continue;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+        if (!isa<UndefValue>(RI->getOperand(0)))
+          ReturnsToZap.push_back(RI);
+  }
+
+  // Zap all returns which we've identified as zap to change.
+  for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+    Function *F = ReturnsToZap[i]->getParent()->getParent();
+    ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
+  }
+    
+  // If we inferred constant or undef values for globals variables, we can delete
+  // the global and any stores that remain to it.
+  const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
+  for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
+         E = TG.end(); I != E; ++I) {
+    GlobalVariable *GV = I->first;
+    assert(!I->second.isOverdefined() &&
+           "Overdefined values should have been taken out of the map!");
+    DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
+    while (!GV->use_empty()) {
+      StoreInst *SI = cast<StoreInst>(GV->use_back());
+      SI->eraseFromParent();
+    }
+    M.getGlobalList().erase(GV);
+    ++IPNumGlobalConst;
+  }
+
+  return MadeChanges;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
new file mode 100644
index 000000000000..302c287d3cbd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -0,0 +1,189 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMScalarOpts.a, which 
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+/// initializeScalarOptsPasses - Initialize all passes linked into the 
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+  initializeADCEPass(Registry);
+  initializeBlockPlacementPass(Registry);
+  initializeCodeGenPreparePass(Registry);
+  initializeConstantPropagationPass(Registry);
+  initializeCorrelatedValuePropagationPass(Registry);
+  initializeDCEPass(Registry);
+  initializeDeadInstEliminationPass(Registry);
+  initializeDSEPass(Registry);
+  initializeGVNPass(Registry);
+  initializeEarlyCSEPass(Registry);
+  initializeIndVarSimplifyPass(Registry);
+  initializeJumpThreadingPass(Registry);
+  initializeLICMPass(Registry);
+  initializeLoopDeletionPass(Registry);
+  initializeLoopInstSimplifyPass(Registry);
+  initializeLoopRotatePass(Registry);
+  initializeLoopStrengthReducePass(Registry);
+  initializeLoopUnrollPass(Registry);
+  initializeLoopUnswitchPass(Registry);
+  initializeLoopIdiomRecognizePass(Registry);
+  initializeLowerAtomicPass(Registry);
+  initializeLowerExpectIntrinsicPass(Registry);
+  initializeMemCpyOptPass(Registry);
+  initializeObjCARCAliasAnalysisPass(Registry);
+  initializeObjCARCExpandPass(Registry);
+  initializeObjCARCContractPass(Registry);
+  initializeObjCARCOptPass(Registry);
+  initializeReassociatePass(Registry);
+  initializeRegToMemPass(Registry);
+  initializeSCCPPass(Registry);
+  initializeIPSCCPPass(Registry);
+  initializeSROA_DTPass(Registry);
+  initializeSROA_SSAUpPass(Registry);
+  initializeCFGSimplifyPassPass(Registry);
+  initializeSimplifyLibCallsPass(Registry);
+  initializeSinkingPass(Registry);
+  initializeTailDupPass(Registry);
+  initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+  initializeScalarOpts(*unwrap(R));
+}
+
+void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createAggressiveDCEPass());
+}
+
+void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCFGSimplificationPass());
+}
+
+void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDeadStoreEliminationPass());
+}
+
+void LLVMAddGVNPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGVNPass());
+}
+
+void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIndVarSimplifyPass());
+}
+
+void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createInstructionCombiningPass());
+}
+
+void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createJumpThreadingPass());
+}
+
+void LLVMAddLICMPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLICMPass());
+}
+
+void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopDeletionPass());
+}
+
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopIdiomPass());
+}
+
+void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopRotatePass());
+}
+
+void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopUnrollPass());
+}
+
+void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopUnswitchPass());
+}
+
+void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createMemCpyOptPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
+void LLVMAddReassociatePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createReassociatePass());
+}
+
+void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createSCCPPass());
+}
+
+void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createScalarReplAggregatesPass());
+}
+
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+}
+
+void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
+                                                  int Threshold) {
+  unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+}
+
+void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createSimplifyLibCallsPass());
+}
+
+void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createTailCallEliminationPass());
+}
+
+void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createConstantPropagationPass());
+}
+
+void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDemoteRegisterToMemoryPass());
+}
+
+void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createVerifierPass());
+}
+
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCorrelatedValuePropagationPass());
+}
+
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createEarlyCSEPass());
+}
+
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+}
+
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createBasicAliasAnalysisPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
new file mode 100644
index 000000000000..7d6349cf4e77
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -0,0 +1,2674 @@
+//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation implements the well known scalar replacement of
+// aggregates transformation.  This xform breaks up alloca instructions of
+// aggregate type (structure or array) into individual alloca instructions for
+// each member (if possible).  Then, if possible, it transforms the individual
+// alloca instructions into nice clean scalar SSA form.
+//
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// often interact, especially for C++ programs.  As such, iterating between
+// SRoA, then Mem2Reg until we run out of things to promote works well.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalarrepl"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumReplaced,  "Number of allocas broken up");
+STATISTIC(NumPromoted,  "Number of allocas promoted");
+STATISTIC(NumAdjusted,  "Number of scalar allocas adjusted to allow promotion");
+STATISTIC(NumConverted, "Number of aggregates converted to scalar");
+STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
+
+namespace {
+  struct SROA : public FunctionPass {
+    SROA(int T, bool hasDT, char &ID)
+      : FunctionPass(ID), HasDomTree(hasDT) {
+      if (T == -1)
+        SRThreshold = 128;
+      else
+        SRThreshold = T;
+    }
+
+    bool runOnFunction(Function &F);
+
+    bool performScalarRepl(Function &F);
+    bool performPromotion(Function &F);
+
+  private:
+    bool HasDomTree;
+    TargetData *TD;
+
+    /// DeadInsts - Keep track of instructions we have made dead, so that
+    /// we can remove them after we are done working.
+    SmallVector<Value*, 32> DeadInsts;
+
+    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
+    /// information about the uses.  All these fields are initialized to false
+    /// and set to true when something is learned.
+    struct AllocaInfo {
+      /// The alloca to promote.
+      AllocaInst *AI;
+      
+      /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+      /// looping and avoid redundant work.
+      SmallPtrSet<PHINode*, 8> CheckedPHIs;
+      
+      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
+      bool isUnsafe : 1;
+
+      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
+      bool isMemCpySrc : 1;
+
+      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
+      bool isMemCpyDst : 1;
+
+      /// hasSubelementAccess - This is true if a subelement of the alloca is
+      /// ever accessed, or false if the alloca is only accessed with mem
+      /// intrinsics or load/store that only access the entire alloca at once.
+      bool hasSubelementAccess : 1;
+      
+      /// hasALoadOrStore - This is true if there are any loads or stores to it.
+      /// The alloca may just be accessed with memcpy, for example, which would
+      /// not set this.
+      bool hasALoadOrStore : 1;
+      
+      explicit AllocaInfo(AllocaInst *ai)
+        : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+          hasSubelementAccess(false), hasALoadOrStore(false) {}
+    };
+
+    unsigned SRThreshold;
+
+    void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+      I.isUnsafe = true;
+      DEBUG(dbgs() << "  Transformation preventing inst: " << *User << '\n');
+    }
+
+    bool isSafeAllocaToScalarRepl(AllocaInst *AI);
+
+    void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+    void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+                                         AllocaInfo &Info);
+    void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+    void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+                         const Type *MemOpType, bool isStore, AllocaInfo &Info,
+                         Instruction *TheAccess, bool AllowWholeAccess);
+    bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
+    uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
+                                  const Type *&IdxTy);
+
+    void DoScalarReplacement(AllocaInst *AI,
+                             std::vector<AllocaInst*> &WorkList);
+    void DeleteDeadInstructions();
+
+    void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+                              SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+                        SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+                    SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+                                      AllocaInst *AI,
+                                      SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+                                       SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+                                      SmallVector<AllocaInst*, 32> &NewElts);
+
+    static MemTransferInst *isOnlyCopiedFromConstantGlobal(
+        AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
+  };
+  
+  // SROA_DT - SROA that uses DominatorTree.
+  struct SROA_DT : public SROA {
+    static char ID;
+  public:
+    SROA_DT(int T = -1) : SROA(T, true, ID) {
+      initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // getAnalysisUsage - This pass does not require any passes, but we know it
+    // will not alter the CFG, so say so.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.setPreservesCFG();
+    }
+  };
+  
+  // SROA_SSAUp - SROA that uses SSAUpdater.
+  struct SROA_SSAUp : public SROA {
+    static char ID;
+  public:
+    SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+      initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // getAnalysisUsage - This pass does not require any passes, but we know it
+    // will not alter the CFG, so say so.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+  
+}
+
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+                "Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+                "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+                      "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+                    "Scalar Replacement of Aggregates (SSAUp)", false, false)
+
+// Public interface to the ScalarReplAggregates pass
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+                                                   bool UseDomTree) {
+  if (UseDomTree)
+    return new SROA_DT(Threshold);
+  return new SROA_SSAUp(Threshold);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Convert To Scalar Optimization.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
+/// optimization, which scans the uses of an alloca and determines if it can
+/// rewrite it in terms of a single new alloca that can be mem2reg'd.
+class ConvertToScalarInfo {
+  /// AllocaSize - The size of the alloca being considered in bytes.
+  unsigned AllocaSize;
+  const TargetData &TD;
+
+  /// IsNotTrivial - This is set to true if there is some access to the object
+  /// which means that mem2reg can't promote it.
+  bool IsNotTrivial;
+
+  /// ScalarKind - Tracks the kind of alloca being considered for promotion,
+  /// computed based on the uses of the alloca rather than the LLVM type system.
+  enum {
+    Unknown,
+
+    // Accesses via GEPs that are consistent with element access of a vector
+    // type. This will not be converted into a vector unless there is a later
+    // access using an actual vector type.
+    ImplicitVector,
+
+    // Accesses via vector operations and GEPs that are consistent with the
+    // layout of a vector type.
+    Vector,
+
+    // An integer bag-of-bits with bitwise operations for insertion and
+    // extraction. Any combination of types can be converted into this kind
+    // of scalar.
+    Integer
+  } ScalarKind;
+
+  /// VectorTy - This tracks the type that we should promote the vector to if
+  /// it is possible to turn it into a vector.  This starts out null, and if it
+  /// isn't possible to turn into a vector type, it gets set to VoidTy.
+  const VectorType *VectorTy;
+
+  /// HadNonMemTransferAccess - True if there is at least one access to the 
+  /// alloca that is not a MemTransferInst.  We don't want to turn structs into
+  /// large integers unless there is some potential for optimization.
+  bool HadNonMemTransferAccess;
+
+public:
+  explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
+    : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
+      VectorTy(0), HadNonMemTransferAccess(false) { }
+
+  AllocaInst *TryConvert(AllocaInst *AI);
+
+private:
+  bool CanConvertToScalar(Value *V, uint64_t Offset);
+  void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
+  bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
+  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+
+  Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+                                    uint64_t Offset, IRBuilder<> &Builder);
+  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+                                   uint64_t Offset, IRBuilder<> &Builder);
+};
+} // end anonymous namespace.
+
+
+/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
+/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
+/// alloca if possible or null if not.
+AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
+  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
+  // out.
+  if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+    return 0;
+
+  // If an alloca has only memset / memcpy uses, it may still have an Unknown
+  // ScalarKind. Treat it as an Integer below.
+  if (ScalarKind == Unknown)
+    ScalarKind = Integer;
+
+  // FIXME: It should be possible to promote the vector type up to the alloca's
+  // size.
+  if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+    ScalarKind = Integer;
+
+  // If we were able to find a vector type that can handle this with
+  // insert/extract elements, and if there was at least one use that had
+  // a vector type, promote this to a vector.  We don't want to promote
+  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+  // we just get a lot of insert/extracts.  If at least one vector is
+  // involved, then we probably really do have a union of vector/array.
+  const Type *NewTy;
+  if (ScalarKind == Vector) {
+    assert(VectorTy && "Missing type for vector scalar.");
+    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
+          << *VectorTy << '\n');
+    NewTy = VectorTy;  // Use the vector type.
+  } else {
+    unsigned BitWidth = AllocaSize * 8;
+    if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
+        !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
+      return 0;
+
+    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+    // Create and insert the integer alloca.
+    NewTy = IntegerType::get(AI->getContext(), BitWidth);
+  }
+  AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+  ConvertUsesToScalar(AI, NewAI, 0);
+  return NewAI;
+}
+
+/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
+/// (VectorTy) so far at the offset specified by Offset (which is specified in
+/// bytes).
+///
+/// There are three cases we handle here:
+///   1) A union of vector types of the same size and potentially its elements.
+///      Here we turn element accesses into insert/extract element operations.
+///      This promotes a <4 x float> with a store of float to the third element
+///      into a <4 x float> that uses insert element.
+///   2) A union of vector types with power-of-2 size differences, e.g. a float,
+///      <2 x float> and <4 x float>.  Here we turn element accesses into insert
+///      and extract element operations, and <2 x float> accesses into a cast to
+///      <2 x double>, an extract, and a cast back to <2 x float>.
+///   3) A fully general blob of memory, which we turn into some (potentially
+///      large) integer type with extract and insert operations where the loads
+///      and stores would mutate the memory.  We mark this by setting VectorTy
+///      to VoidTy.
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+                                                    uint64_t Offset) {
+  // If we already decided to turn this into a blob of integer memory, there is
+  // nothing to be done.
+  if (ScalarKind == Integer)
+    return;
+
+  // If this could be contributing to a vector, analyze it.
+
+  // If the In type is a vector that is the same size as the alloca, see if it
+  // matches the existing VecTy.
+  if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+    if (MergeInVectorType(VInTy, Offset))
+      return;
+  } else if (In->isFloatTy() || In->isDoubleTy() ||
+             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
+              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // Full width accesses can be ignored, because they can always be turned
+    // into bitcasts.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (EltSize == AllocaSize)
+      return;
+
+    // If we're accessing something that could be an element of a vector, see
+    // if the implied vector agrees with what we already have and if Offset is
+    // compatible with it.
+    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+        (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
+      if (!VectorTy) {
+        ScalarKind = ImplicitVector;
+        VectorTy = VectorType::get(In, AllocaSize/EltSize);
+        return;
+      }
+
+      unsigned CurrentEltSize = VectorTy->getElementType()
+                                ->getPrimitiveSizeInBits()/8;
+      if (EltSize == CurrentEltSize)
+        return;
+
+      if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
+        return;
+    }
+  }
+
+  // Otherwise, we have a case that we can't handle with an optimized vector
+  // form.  We can still turn this into a large integer.
+  ScalarKind = Integer;
+}
+
+/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
+/// returning true if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+                                            uint64_t Offset) {
+  // TODO: Support nonzero offsets?
+  if (Offset != 0)
+    return false;
+
+  // Only allow vectors that are a power-of-2 away from the size of the alloca.
+  if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+    return false;
+
+  // If this the first vector we see, remember the type so that we know the
+  // element size.
+  if (!VectorTy) {
+    ScalarKind = Vector;
+    VectorTy = VInTy;
+    return true;
+  }
+
+  unsigned BitWidth = VectorTy->getBitWidth();
+  unsigned InBitWidth = VInTy->getBitWidth();
+
+  // Vectors of the same size can be converted using a simple bitcast.
+  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
+    ScalarKind = Vector;
+    return true;
+  }
+
+  const Type *ElementTy = VectorTy->getElementType();
+  const Type *InElementTy = VInTy->getElementType();
+
+  // Do not allow mixed integer and floating-point accesses from vectors of
+  // different sizes.
+  if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+    return false;
+
+  if (ElementTy->isFloatingPointTy()) {
+    // Only allow floating-point vectors of different sizes if they have the
+    // same element type.
+    // TODO: This could be loosened a bit, but would anything benefit?
+    if (ElementTy != InElementTy)
+      return false;
+
+    // There are no arbitrary-precision floating-point types, which limits the
+    // number of legal vector types with larger element types that we can form
+    // to bitcast and extract a subvector.
+    // TODO: We could support some more cases with mixed fp128 and double here.
+    if (!(BitWidth == 64 || BitWidth == 128) ||
+        !(InBitWidth == 64 || InBitWidth == 128))
+      return false;
+  } else {
+    assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+                                       "or floating-point.");
+    unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+    unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+    // Do not allow integer types smaller than a byte or types whose widths are
+    // not a multiple of a byte.
+    if (BitWidth < 8 || InBitWidth < 8 ||
+        BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+      return false;
+  }
+
+  // Pick the largest of the two vector types.
+  ScalarKind = Vector;
+  if (InBitWidth > BitWidth)
+    VectorTy = VInTy;
+
+  return true;
+}
+
+/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
+/// its accesses to a single vector type, return true and set VecTy to
+/// the new type.  If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // Don't break volatile loads.
+      if (LI->isVolatile())
+        return false;
+      // Don't touch MMX operations.
+      if (LI->getType()->isX86_MMXTy())
+        return false;
+      HadNonMemTransferAccess = true;
+      MergeInTypeForLoadOrStore(LI->getType(), Offset);
+      continue;
+    }
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Storing the pointer, not into the value?
+      if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+      // Don't touch MMX operations.
+      if (SI->getOperand(0)->getType()->isX86_MMXTy())
+        return false;
+      HadNonMemTransferAccess = true;
+      MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
+      continue;
+    }
+
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      if (!CanConvertToScalar(BCI, Offset))
+        return false;
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // If this is a GEP with a variable indices, we can't handle it.
+      if (!GEP->hasAllConstantIndices())
+        return false;
+
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      // See if all uses can be converted.
+      if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+        return false;
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
+      continue;
+    }
+
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // handle it.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      // Store of constant value.
+      if (!isa<ConstantInt>(MSI->getValue()))
+        return false;
+
+      // Store of constant size.
+      ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
+      if (!Len)
+        return false;
+
+      // If the size differs from the alloca, we can only convert the alloca to
+      // an integer bag-of-bits.
+      // FIXME: This should handle all of the cases that are currently accepted
+      // as vector element insertions.
+      if (Len->getZExtValue() != AllocaSize || Offset != 0)
+        ScalarKind = Integer;
+
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
+      if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+        return false;
+
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // Otherwise, we cannot handle this!
+    return false;
+  }
+
+  return true;
+}
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly.  This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.  By the end of this, there should be no uses of Ptr.
+void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
+                                              uint64_t Offset) {
+  while (!Ptr->use_empty()) {
+    Instruction *User = cast<Instruction>(Ptr->use_back());
+
+    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+      ConvertUsesToScalar(CI, NewAI, Offset);
+      CI->eraseFromParent();
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+      GEP->eraseFromParent();
+      continue;
+    }
+
+    IRBuilder<> Builder(User);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // The load is a bit extract from NewAI shifted right by Offset bits.
+      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+      Value *NewLoadVal
+        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+      LI->replaceAllUsesWith(NewLoadVal);
+      LI->eraseFromParent();
+      continue;
+    }
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      assert(SI->getOperand(0) != Ptr && "Consistency error!");
+      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+                                             Builder);
+      Builder.CreateStore(New, NewAI);
+      SI->eraseFromParent();
+
+      // If the load we just inserted is now dead, then the inserted store
+      // overwrote the entire thing.
+      if (Old->use_empty())
+        Old->eraseFromParent();
+      continue;
+    }
+
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // transform it into a store of the expanded constant value.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      assert(MSI->getRawDest() == Ptr && "Consistency error!");
+      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+      if (NumBytes != 0) {
+        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+
+        // Compute the value replicated the right number of times.
+        APInt APVal(NumBytes*8, Val);
+
+        // Splat the value if non-zero.
+        if (Val)
+          for (unsigned i = 1; i != NumBytes; ++i)
+            APVal |= APVal << 8;
+
+        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+        Value *New = ConvertScalar_InsertValue(
+                                    ConstantInt::get(User->getContext(), APVal),
+                                               Old, Offset, Builder);
+        Builder.CreateStore(New, NewAI);
+
+        // If the load we just inserted is now dead, then the memset overwrote
+        // the entire thing.
+        if (Old->use_empty())
+          Old->eraseFromParent();
+      }
+      MSI->eraseFromParent();
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      assert(Offset == 0 && "must be store to start of alloca");
+
+      // If the source and destination are both to the same alloca, then this is
+      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
+      // as appropriate.
+      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+      if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
+        // Dest must be OrigAI, change this to be a load from the original
+        // pointer (bitcasted), then a store to our new alloca.
+        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+        Value *SrcPtr = MTI->getSource();
+        const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+        if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+          AIPTy = PointerType::get(AIPTy->getElementType(),
+                                   SPTy->getAddressSpace());
+        }
+        SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
+        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+        SrcVal->setAlignment(MTI->getAlignment());
+        Builder.CreateStore(SrcVal, NewAI);
+      } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
+        // Src must be OrigAI, change this to be a load from NewAI then a store
+        // through the original dest pointer (bitcasted).
+        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+        const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+        if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+          AIPTy = PointerType::get(AIPTy->getElementType(),
+                                   DPTy->getAddressSpace());
+        }
+        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
+        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+        NewStore->setAlignment(MTI->getAlignment());
+      } else {
+        // Noop transfer. Src == Dst
+      }
+
+      MTI->eraseFromParent();
+      continue;
+    }
+
+    llvm_unreachable("Unsupported operation!");
+  }
+}
+
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input types must be integer or floating-point
+/// scalar or vector types, and the resulting type is an integer, float or
+/// double.
+static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
+                                        unsigned NewBitWidth) {
+  bool IsFP1 = Ty1->isFloatingPointTy() ||
+               (Ty1->isVectorTy() &&
+                cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
+  bool IsFP2 = Ty2->isFloatingPointTy() ||
+               (Ty2->isVectorTy() &&
+                cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
+
+  LLVMContext &Context = Ty1->getContext();
+
+  // Prefer floating-point types over integer types, as integer types may have
+  // been created by earlier scalar replacement.
+  if (IsFP1 || IsFP2) {
+    if (NewBitWidth == 32)
+      return Type::getFloatTy(Context);
+    if (NewBitWidth == 64)
+      return Type::getDoubleTy(Context);
+  }
+
+  return Type::getIntNTy(Context, NewBitWidth);
+}
+
+/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
+/// to another vector of the same element type which has the same allocation
+/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
+static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
+                                      IRBuilder<> &Builder) {
+  const Type *FromType = FromVal->getType();
+  const VectorType *FromVTy = cast<VectorType>(FromType);
+  const VectorType *ToVTy = cast<VectorType>(ToType);
+  assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
+         "Vectors must have the same element type");
+   Value *UnV = UndefValue::get(FromType);
+   unsigned numEltsFrom = FromVTy->getNumElements();
+   unsigned numEltsTo = ToVTy->getNumElements();
+
+   SmallVector<Constant*, 3> Args;
+   const Type* Int32Ty = Builder.getInt32Ty();
+   unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+   unsigned i;
+   for (i=0; i != minNumElts; ++i)
+     Args.push_back(ConstantInt::get(Int32Ty, i));
+
+   if (i < numEltsTo) {
+     Constant* UnC = UndefValue::get(Int32Ty);
+     for (; i != numEltsTo; ++i)
+       Args.push_back(UnC);
+   }
+   Constant *Mask = ConstantVector::get(Args);
+   return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset.  This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+                           uint64_t Offset, IRBuilder<> &Builder) {
+  // If the load is of the whole new alloca, no conversion is needed.
+  const Type *FromType = FromVal->getType();
+  if (FromType == ToType && Offset == 0)
+    return FromVal;
+
+  // If the result alloca is a vector type, this is either an element
+  // access or a bitcast to another vector type of the same size.
+  if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+    unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
+    unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+    if (FromTypeSize == ToTypeSize) {
+      // If the two types have the same primitive size, use a bit cast.
+      // Otherwise, it is two vectors with the same element type that has
+      // the same allocation size but different number of elements so use
+      // a shuffle vector.
+      if (FromType->getPrimitiveSizeInBits() ==
+          ToType->getPrimitiveSizeInBits())
+        return Builder.CreateBitCast(FromVal, ToType, "tmp");
+      else
+        return CreateShuffleVectorCast(FromVal, ToType, Builder);
+    }
+
+    if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
+      assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
+             "of a smaller vector type at a nonzero offset.");
+
+      const Type *CastElementTy = getScaledElementType(FromType, ToType,
+                                                       ToTypeSize * 8);
+      unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
+
+      LLVMContext &Context = FromVal->getContext();
+      const Type *CastTy = VectorType::get(CastElementTy,
+                                           NumCastVectorElements);
+      Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+
+      unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+      unsigned Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+      Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+                                        Type::getInt32Ty(Context), Elt), "tmp");
+      return Builder.CreateBitCast(Extract, ToType, "tmp");
+    }
+
+    // Otherwise it must be an element access.
+    unsigned Elt = 0;
+    if (Offset) {
+      unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+      Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+    }
+    // Return the element extracted out of it.
+    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+    if (V->getType() != ToType)
+      V = Builder.CreateBitCast(V, ToType, "tmp");
+    return V;
+  }
+
+  // If ToType is a first class aggregate, extract out each of the pieces and
+  // use insertvalue's to form the FCA.
+  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    Value *Res = UndefValue::get(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+                                        Offset+Layout.getElementOffsetInBits(i),
+                                              Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+
+  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    Value *Res = UndefValue::get(AT);
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+                                              Offset+i*EltSize, Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+
+  // Otherwise, this must be a union that was converted to an integer value.
+  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+  // If this is a big-endian system and the load is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+            TD.getTypeStoreSizeInBits(ToType) - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shl) which are not defined.
+  // We do this to support (f.e.) loads off the end of a structure where
+  // only some bits are used.
+  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateLShr(FromVal,
+                                 ConstantInt::get(FromVal->getType(),
+                                                           ShAmt), "tmp");
+  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateShl(FromVal,
+                                ConstantInt::get(FromVal->getType(),
+                                                          -ShAmt), "tmp");
+
+  // Finally, unconditionally truncate the integer to the right width.
+  unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+  if (LIBitWidth < NTy->getBitWidth())
+    FromVal =
+      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+                                                    LIBitWidth), "tmp");
+  else if (LIBitWidth > NTy->getBitWidth())
+    FromVal =
+       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+                                                    LIBitWidth), "tmp");
+
+  // If the result is an integer, this is a trunc or bitcast.
+  if (ToType->isIntegerTy()) {
+    // Should be done.
+  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
+    // Just do a bitcast, we know the sizes match up.
+    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+  } else {
+    // Otherwise must be a pointer.
+    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+  }
+  assert(FromVal->getType() == ToType && "Didn't convert right?");
+  return FromVal;
+}
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_InsertValue(Value *SV, Value *Old,
+                          uint64_t Offset, IRBuilder<> &Builder) {
+  // Convert the stored type to the actual type, shift it left to insert
+  // then 'or' into place.
+  const Type *AllocaType = Old->getType();
+  LLVMContext &Context = Old->getContext();
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+    uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
+    uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+
+    // Changing the whole vector with memset or with an access of a different
+    // vector type?
+    if (ValSize == VecSize) {
+      // If the two types have the same primitive size, use a bit cast.
+      // Otherwise, it is two vectors with the same element type that has
+      // the same allocation size but different number of elements so use
+      // a shuffle vector.
+      if (VTy->getPrimitiveSizeInBits() ==
+          SV->getType()->getPrimitiveSizeInBits())
+        return Builder.CreateBitCast(SV, AllocaType, "tmp");
+      else
+        return CreateShuffleVectorCast(SV, VTy, Builder);
+    }
+
+    if (isPowerOf2_64(VecSize / ValSize)) {
+      assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
+             "value of a smaller vector type at a nonzero offset.");
+
+      const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
+                                                       ValSize);
+      unsigned NumCastVectorElements = VecSize / ValSize;
+
+      LLVMContext &Context = SV->getContext();
+      const Type *OldCastTy = VectorType::get(CastElementTy,
+                                              NumCastVectorElements);
+      Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
+
+      Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+
+      unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+      unsigned Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+      Value *Insert =
+        Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+                                        Type::getInt32Ty(Context), Elt), "tmp");
+      return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+    }
+
+    // Must be an element insertion.
+    assert(SV->getType() == VTy->getElementType());
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+    unsigned Elt = Offset/EltSize;
+    return Builder.CreateInsertElement(Old, SV,
+                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
+                                     "tmp");
+  }
+
+  // If SV is a first-class aggregate value, insert each value recursively.
+  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old,
+                                      Offset+Layout.getElementOffsetInBits(i),
+                                      Builder);
+    }
+    return Old;
+  }
+
+  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+    }
+    return Old;
+  }
+
+  // If SV is a float, convert it to the appropriate integer type.
+  // If it is a pointer, do the same.
+  unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
+  unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
+  unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
+  unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
+    SV = Builder.CreateBitCast(SV,
+                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+  else if (SV->getType()->isPointerTy())
+    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+
+  // Zero extend or truncate the value if needed.
+  if (SV->getType() != AllocaType) {
+    if (SV->getType()->getPrimitiveSizeInBits() <
+             AllocaType->getPrimitiveSizeInBits())
+      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+    else {
+      // Truncation may be needed if storing more than the alloca can hold
+      // (undefined behavior).
+      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+      SrcWidth = DestWidth;
+      SrcStoreWidth = DestStoreWidth;
+    }
+  }
+
+  // If this is a big-endian system and the store is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shr) which are not defined.
+  // We do this to support (f.e.) stores off the end of a structure where
+  // only some bits in the structure are set.
+  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
+                           ShAmt), "tmp");
+    Mask <<= ShAmt;
+  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
+                            -ShAmt), "tmp");
+    Mask = Mask.lshr(-ShAmt);
+  }
+
+  // Mask out the bits we are about to insert from the old value, and or
+  // in the new bits.
+  if (SrcWidth != DestWidth) {
+    assert(DestWidth > SrcWidth);
+    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
+    SV = Builder.CreateOr(Old, SV, "ins");
+  }
+  return SV;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SRoA Driver
+//===----------------------------------------------------------------------===//
+
+
+bool SROA::runOnFunction(Function &F) {
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  bool Changed = performPromotion(F);
+
+  // FIXME: ScalarRepl currently depends on TargetData more than it
+  // theoretically needs to. It should be refactored in order to support
+  // target-independent IR. Until this is done, just skip the actual
+  // scalar-replacement portion of this pass.
+  if (!TD) return Changed;
+
+  while (1) {
+    bool LocalChange = performScalarRepl(F);
+    if (!LocalChange) break;   // No need to repromote if no scalarrepl
+    Changed = true;
+    LocalChange = performPromotion(F);
+    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
+  }
+
+  return Changed;
+}
+
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+  AllocaInst *AI;
+  DIBuilder *DIB;
+  SmallVector<DbgDeclareInst *, 4> DDIs;
+  SmallVector<DbgValueInst *, 4> DVIs;
+public:
+  AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+                 DIBuilder *DB)
+    : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
+  
+  void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+    // Remember which alloca we're promoting (for isInstInList).
+    this->AI = AI;
+    if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+      for (Value::use_iterator UI = DebugNode->use_begin(),
+             E = DebugNode->use_end(); UI != E; ++UI)
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+          DDIs.push_back(DDI);
+        else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+          DVIs.push_back(DVI);
+
+    LoadAndStorePromoter::run(Insts);
+    AI->eraseFromParent();
+    for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      DDI->eraseFromParent();
+    }
+    for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      DVI->eraseFromParent();
+    }
+  }
+  
+  virtual bool isInstInList(Instruction *I,
+                            const SmallVectorImpl<Instruction*> &Insts) const {
+    if (LoadInst *LI = dyn_cast<LoadInst>(I))
+      return LI->getOperand(0) == AI;
+    return cast<StoreInst>(I)->getPointerOperand() == AI;
+  }
+
+  virtual void updateDebugInfo(Instruction *Inst) const {
+    for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+      else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
+    }
+    for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        Instruction *DbgVal = NULL;
+        // If an argument is zero extended then use argument directly. The ZExt
+        // may be zapped by an optimization pass in future.
+        Argument *ExtendedArg = NULL;
+        if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+        if (ExtendedArg)
+          DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        else
+          DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        Instruction *DbgVal = 
+          DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, 
+                                       DIVariable(DVI->getVariable()), LI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      }
+    }
+  }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+///   %V = load i32* %P2
+/// to:
+///   %V1 = load i32* %Alloca      -> will be mem2reg'd
+///   %V2 = load i32* %Other
+///   %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+  bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+  bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+  
+  for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+       UI != UE; ++UI) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI);
+    if (LI == 0 || LI->isVolatile()) return false;
+    
+    // Both operands to the select need to be dereferencable, either absolutely
+    // (e.g. allocas) or at this point because we can see other accesses to it.
+    if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+                                                    LI->getAlignment(), TD))
+      return false;
+    if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+                                                    LI->getAlignment(), TD))
+      return false;
+  }
+  
+  return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+///   %P2 = phi [i32* %Alloca, i32* %Other]
+///   %V = load i32* %P2
+/// to:
+///   %V1 = load i32* %Alloca      -> will be mem2reg'd
+///   ...
+///   %V2 = load i32* %Other
+///   ...
+///   %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+  // For now, we can only do this promotion if the load is in the same block as
+  // the PHI, and if there are no stores between the phi and load.
+  // TODO: Allow recursive phi users.
+  // TODO: Allow stores.
+  BasicBlock *BB = PN->getParent();
+  unsigned MaxAlign = 0;
+  for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+       UI != UE; ++UI) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI);
+    if (LI == 0 || LI->isVolatile()) return false;
+    
+    // For now we only allow loads in the same block as the PHI.  This is a
+    // common case that happens when instcombine merges two loads through a PHI.
+    if (LI->getParent() != BB) return false;
+    
+    // Ensure that there are no instructions between the PHI and the load that
+    // could store.
+    for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+      if (BBI->mayWriteToMemory())
+        return false;
+    
+    MaxAlign = std::max(MaxAlign, LI->getAlignment());
+  }
+  
+  // Okay, we know that we have one or more loads in the same block as the PHI.
+  // We can transform this if it is safe to push the loads into the predecessor
+  // blocks.  The only thing to watch out for is that we can't put a possibly
+  // trapping load in the predecessor if it is a critical edge.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *Pred = PN->getIncomingBlock(i);
+
+    // If the predecessor has a single successor, then the edge isn't critical.
+    if (Pred->getTerminator()->getNumSuccessors() == 1)
+      continue;
+    
+    Value *InVal = PN->getIncomingValue(i);
+    
+    // If the InVal is an invoke in the pred, we can't put a load on the edge.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+      if (II->getParent() == Pred)
+        return false;
+
+    // If this pointer is always safe to load, or if we can prove that there is
+    // already a load in the block, then we can move the load to the pred block.
+    if (InVal->isDereferenceablePointer() ||
+        isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+      continue;
+    
+    return false;
+  }
+    
+  return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it.  If the alloca is close but
+/// not quite there, this will transform the code to allow promotion.  As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+  SetVector<Instruction*, SmallVector<Instruction*, 4>,
+            SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+  
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI) {
+    User *U = *UI;
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile())
+        return false;
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == AI || SI->isVolatile())
+        return false;   // Don't allow a store OF the AI, only INTO the AI.
+      continue;
+    }
+
+    if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+      // If the condition being selected on is a constant, fold the select, yes
+      // this does (rarely) happen early on.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+        Value *Result = SI->getOperand(1+CI->isZero());
+        SI->replaceAllUsesWith(Result);
+        SI->eraseFromParent();
+        
+        // This is very rare and we just scrambled the use list of AI, start
+        // over completely.
+        return tryToMakeAllocaBePromotable(AI, TD);
+      }
+
+      // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+      // loads, then we can transform this by rewriting the select.
+      if (!isSafeSelectToSpeculate(SI, TD))
+        return false;
+      
+      InstsToRewrite.insert(SI);
+      continue;
+    }
+    
+    if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      if (PN->use_empty()) {  // Dead PHIs can be stripped.
+        InstsToRewrite.insert(PN);
+        continue;
+      }
+      
+      // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+      // in the pred blocks, then we can transform this by rewriting the PHI.
+      if (!isSafePHIToSpeculate(PN, TD))
+        return false;
+      
+      InstsToRewrite.insert(PN);
+      continue;
+    }
+    
+    return false;
+  }
+
+  // If there are no instructions to rewrite, then all uses are load/stores and
+  // we're done!
+  if (InstsToRewrite.empty())
+    return true;
+  
+  // If we have instructions that need to be rewritten for this to be promotable
+  // take care of it now.
+  for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+    if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+      // Selects in InstsToRewrite only have load uses.  Rewrite each as two
+      // loads with a new select.
+      while (!SI->use_empty()) {
+        LoadInst *LI = cast<LoadInst>(SI->use_back());
+      
+        IRBuilder<> Builder(LI);
+        LoadInst *TrueLoad = 
+          Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+        LoadInst *FalseLoad = 
+          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
+        
+        // Transfer alignment and TBAA info if present.
+        TrueLoad->setAlignment(LI->getAlignment());
+        FalseLoad->setAlignment(LI->getAlignment());
+        if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+          TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+          FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+        }
+        
+        Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+        V->takeName(LI);
+        LI->replaceAllUsesWith(V);
+        LI->eraseFromParent();
+      }
+    
+      // Now that all the loads are gone, the select is gone too.
+      SI->eraseFromParent();
+      continue;
+    }
+    
+    // Otherwise, we have a PHI node which allows us to push the loads into the
+    // predecessors.
+    PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+    if (PN->use_empty()) {
+      PN->eraseFromParent();
+      continue;
+    }
+    
+    const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+    PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+                                     PN->getName()+".ld", PN);
+
+    // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
+    // matter which one we get and if any differ, it doesn't matter.
+    LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+    unsigned Align = SomeLoad->getAlignment();
+    
+    // Rewrite all loads of the PN to use the new PHI.
+    while (!PN->use_empty()) {
+      LoadInst *LI = cast<LoadInst>(PN->use_back());
+      LI->replaceAllUsesWith(NewPN);
+      LI->eraseFromParent();
+    }
+    
+    // Inject loads into all of the pred blocks.  Keep track of which blocks we
+    // insert them into in case we have multiple edges from the same block.
+    DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+    
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *Pred = PN->getIncomingBlock(i);
+      LoadInst *&Load = InsertedLoads[Pred];
+      if (Load == 0) {
+        Load = new LoadInst(PN->getIncomingValue(i),
+                            PN->getName() + "." + Pred->getName(),
+                            Pred->getTerminator());
+        Load->setAlignment(Align);
+        if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+      }
+      
+      NewPN->addIncoming(Load, Pred);
+    }
+    
+    PN->eraseFromParent();
+  }
+    
+  ++NumAdjusted;
+  return true;
+}
+
+bool SROA::performPromotion(Function &F) {
+  std::vector<AllocaInst*> Allocas;
+  DominatorTree *DT = 0;
+  if (HasDomTree)
+    DT = &getAnalysis<DominatorTree>();
+
+  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
+  DIBuilder DIB(*F.getParent());
+  bool Changed = false;
+  SmallVector<Instruction*, 64> Insts;
+  while (1) {
+    Allocas.clear();
+
+    // Find allocas that are safe to promote, by looking at all instructions in
+    // the entry node
+    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
+        if (tryToMakeAllocaBePromotable(AI, TD))
+          Allocas.push_back(AI);
+
+    if (Allocas.empty()) break;
+
+    if (HasDomTree)
+      PromoteMemToReg(Allocas, *DT);
+    else {
+      SSAUpdater SSA;
+      for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+        AllocaInst *AI = Allocas[i];
+        
+        // Build list of instructions to promote.
+        for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+             UI != E; ++UI)
+          Insts.push_back(cast<Instruction>(*UI));
+        AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
+        Insts.clear();
+      }
+    }
+    NumPromoted += Allocas.size();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA.  It must be a struct or array type with a small number of elements.
+static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+  const Type *T = AI->getAllocatedType();
+  // Do not promote any struct into more than 32 separate vars.
+  if (const StructType *ST = dyn_cast<StructType>(T))
+    return ST->getNumElements() <= 32;
+  // Arrays are much less likely to be safe for SROA; only consider
+  // them if they are very small.
+  if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+    return AT->getNumElements() <= 8;
+  return false;
+}
+
+
+// performScalarRepl - This algorithm is a simple worklist driven algorithm,
+// which runs on all of the alloca instructions in the function, removing them
+// if they are only used by getelementptr instructions.
+//
+bool SROA::performScalarRepl(Function &F) {
+  std::vector<AllocaInst*> WorkList;
+
+  // Scan the entry basic block, adding allocas to the worklist.
+  BasicBlock &BB = F.getEntryBlock();
+  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
+      WorkList.push_back(A);
+
+  // Process the worklist
+  bool Changed = false;
+  while (!WorkList.empty()) {
+    AllocaInst *AI = WorkList.back();
+    WorkList.pop_back();
+
+    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
+    // with unused elements.
+    if (AI->use_empty()) {
+      AI->eraseFromParent();
+      Changed = true;
+      continue;
+    }
+
+    // If this alloca is impossible for us to promote, reject it early.
+    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
+      continue;
+
+    // Check to see if this allocation is only modified by a memcpy/memmove from
+    // a constant global.  If this is the case, we can change all users to use
+    // the constant global instead.  This is commonly produced by the CFE by
+    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+    // is only subsequently read.
+    SmallVector<Instruction *, 4> ToDelete;
+    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
+      DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+      DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
+      for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+        ToDelete[i]->eraseFromParent();
+      Constant *TheSrc = cast<Constant>(Copy->getSource());
+      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+      Copy->eraseFromParent();  // Don't mutate the global.
+      AI->eraseFromParent();
+      ++NumGlobals;
+      Changed = true;
+      continue;
+    }
+
+    // Check to see if we can perform the core SROA transformation.  We cannot
+    // transform the allocation instruction if it is an array allocation
+    // (allocations OF arrays are ok though), and an allocation of a scalar
+    // value cannot be decomposed at all.
+    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+
+    // Do not promote [0 x %struct].
+    if (AllocaSize == 0) continue;
+
+    // Do not promote any struct whose size is too big.
+    if (AllocaSize > SRThreshold) continue;
+
+    // If the alloca looks like a good candidate for scalar replacement, and if
+    // all its users can be transformed, then split up the aggregate into its
+    // separate elements.
+    if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+      DoScalarReplacement(AI, WorkList);
+      Changed = true;
+      continue;
+    }
+
+    // If we can turn this aggregate value (potentially with casts) into a
+    // simple scalar value that can be mem2reg'd into a register value.
+    // IsNotTrivial tracks whether this is something that mem2reg could have
+    // promoted itself.  If so, we don't want to transform it needlessly.  Note
+    // that we can't just check based on the type: the alloca may be of an i32
+    // but that has pointer arithmetic to set byte 3 of it or something.
+    if (AllocaInst *NewAI =
+          ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
+      NewAI->takeName(AI);
+      AI->eraseFromParent();
+      ++NumConverted;
+      Changed = true;
+      continue;
+    }
+
+    // Otherwise, couldn't process this alloca.
+  }
+
+  return Changed;
+}
+
+/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
+/// predicate, do SROA now.
+void SROA::DoScalarReplacement(AllocaInst *AI,
+                               std::vector<AllocaInst*> &WorkList) {
+  DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
+  SmallVector<AllocaInst*, 32> ElementAllocas;
+  if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+    ElementAllocas.reserve(ST->getNumContainedTypes());
+    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
+      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+                                      AI->getAlignment(),
+                                      AI->getName() + "." + Twine(i), AI);
+      ElementAllocas.push_back(NA);
+      WorkList.push_back(NA);  // Add to worklist for recursive processing
+    }
+  } else {
+    const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+    ElementAllocas.reserve(AT->getNumElements());
+    const Type *ElTy = AT->getElementType();
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+                                      AI->getName() + "." + Twine(i), AI);
+      ElementAllocas.push_back(NA);
+      WorkList.push_back(NA);  // Add to worklist for recursive processing
+    }
+  }
+
+  // Now that we have created the new alloca instructions, rewrite all the
+  // uses of the old alloca.
+  RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
+
+  // Now erase any instructions that were made dead while rewriting the alloca.
+  DeleteDeadInstructions();
+  AI->eraseFromParent();
+
+  ++NumReplaced;
+}
+
+/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
+/// recursively including all their operands that become trivially dead.
+void SROA::DeleteDeadInstructions() {
+  while (!DeadInsts.empty()) {
+    Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
+
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+        // Zero out the operand and see if it becomes trivially dead.
+        // (But, don't add allocas to the dead instruction list -- they are
+        // already on the worklist and will be deleted separately.)
+        *OI = 0;
+        if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
+          DeadInsts.push_back(U);
+      }
+
+    I->eraseFromParent();
+  }
+}
+
+/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
+/// performing scalar replacement of alloca AI.  The results are flagged in
+/// the Info parameter.  Offset indicates the position within AI that is
+/// referenced by this instruction.
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
+                               AllocaInfo &Info) {
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+      isSafeForScalarRepl(BC, Offset, Info);
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      uint64_t GEPOffset = Offset;
+      isSafeGEP(GEPI, GEPOffset, Info);
+      if (!Info.isUnsafe)
+        isSafeForScalarRepl(GEPI, GEPOffset, Info);
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+      if (Length == 0)
+        return MarkUnsafe(Info, User);
+      isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+                      UI.getOperandNo() == 0, Info, MI,
+                      true /*AllowWholeAccess*/);
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      if (LI->isVolatile())
+        return MarkUnsafe(Info, User);
+      const Type *LIType = LI->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+                      LIType, false, Info, LI, true /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+        
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Store is ok if storing INTO the pointer, not storing the pointer
+      if (SI->isVolatile() || SI->getOperand(0) == I)
+        return MarkUnsafe(Info, User);
+        
+      const Type *SIType = SI->getOperand(0)->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+                      SIType, true, Info, SI, true /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+      isSafePHISelectUseForScalarRepl(User, Offset, Info);
+    } else {
+      return MarkUnsafe(Info, User);
+    }
+    if (Info.isUnsafe) return;
+  }
+}
+ 
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca.  The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+                                           AllocaInfo &Info) {
+  // If we've already checked this PHI, don't do it again.
+  if (PHINode *PN = dyn_cast<PHINode>(I))
+    if (!Info.CheckedPHIs.insert(PN))
+      return;
+  
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+      isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      // Only allow "bitcast" GEPs for simplicity.  We could generalize this,
+      // but would have to prove that we're staying inside of an element being
+      // promoted.
+      if (!GEPI->hasAllZeroIndices())
+        return MarkUnsafe(Info, User);
+      isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      if (LI->isVolatile())
+        return MarkUnsafe(Info, User);
+      const Type *LIType = LI->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+                      LIType, false, Info, LI, false /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+      
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Store is ok if storing INTO the pointer, not storing the pointer
+      if (SI->isVolatile() || SI->getOperand(0) == I)
+        return MarkUnsafe(Info, User);
+      
+      const Type *SIType = SI->getOperand(0)->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+                      SIType, true, Info, SI, false /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+      isSafePHISelectUseForScalarRepl(User, Offset, Info);
+    } else {
+      return MarkUnsafe(Info, User);
+    }
+    if (Info.isUnsafe) return;
+  }
+}
+
+/// isSafeGEP - Check if a GEP instruction can be handled for scalar
+/// replacement.  It is safe when all the indices are constant, in-bounds
+/// references, and when the resulting offset corresponds to an element within
+/// the alloca type.  The results are flagged in the Info parameter.  Upon
+/// return, Offset is adjusted as specified by the GEP indices.
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
+                     uint64_t &Offset, AllocaInfo &Info) {
+  gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+  if (GEPIt == E)
+    return;
+
+  // Walk through the GEP type indices, checking the types that this indexes
+  // into.
+  for (; GEPIt != E; ++GEPIt) {
+    // Ignore struct elements, no extra checking needed for these.
+    if ((*GEPIt)->isStructTy())
+      continue;
+
+    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+    if (!IdxVal)
+      return MarkUnsafe(Info, GEPI);
+  }
+
+  // Compute the offset due to this GEP and check if the alloca has a
+  // component element at that offset.
+  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+                                 &Indices[0], Indices.size());
+  if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+    MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays).  If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
+                                   const Type *&EltTy) {
+  if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+    NumElts = AT->getNumElements();
+    EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+    return true;
+  }
+  if (const StructType *ST = dyn_cast<StructType>(T)) {
+    NumElts = ST->getNumContainedTypes();
+    EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+    for (unsigned n = 1; n < NumElts; ++n) {
+      if (ST->getContainedType(n) != EltTy)
+        return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+  if (T1 == T2)
+    return true;
+
+  unsigned NumElts1, NumElts2;
+  const Type *EltTy1, *EltTy2;
+  if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+      isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+      NumElts1 == NumElts2 &&
+      EltTy1 == EltTy2)
+    return true;
+
+  return false;
+}
+
+/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
+/// alloca or has an offset and size that corresponds to a component element
+/// within it.  The offset checked here may have been formed from a GEP with a
+/// pointer bitcasted to a different type.
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit.  If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+                           const Type *MemOpType, bool isStore,
+                           AllocaInfo &Info, Instruction *TheAccess,
+                           bool AllowWholeAccess) {
+  // Check if this is a load/store of the entire alloca.
+  if (Offset == 0 && AllowWholeAccess &&
+      MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+    // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+    // loads/stores (which are essentially the same as the MemIntrinsics with
+    // regard to copying padding between elements).  But, if an alloca is
+    // flagged as both a source and destination of such operations, we'll need
+    // to check later for padding between elements.
+    if (!MemOpType || MemOpType->isIntegerTy()) {
+      if (isStore)
+        Info.isMemCpyDst = true;
+      else
+        Info.isMemCpySrc = true;
+      return;
+    }
+    // This is also safe for references using a type that is compatible with
+    // the type of the alloca, so that loads/stores can be rewritten using
+    // insertvalue/extractvalue.
+    if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+      Info.hasSubelementAccess = true;
+      return;
+    }
+  }
+  // Check if the offset/size correspond to a component within the alloca type.
+  const Type *T = Info.AI->getAllocatedType();
+  if (TypeHasComponent(T, Offset, MemSize)) {
+    Info.hasSubelementAccess = true;
+    return;
+  }
+
+  return MarkUnsafe(Info, TheAccess);
+}
+
+/// TypeHasComponent - Return true if T has a component type with the
+/// specified offset and size.  If Size is zero, do not check the size.
+bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
+  const Type *EltTy;
+  uint64_t EltSize;
+  if (const StructType *ST = dyn_cast<StructType>(T)) {
+    const StructLayout *Layout = TD->getStructLayout(ST);
+    unsigned EltIdx = Layout->getElementContainingOffset(Offset);
+    EltTy = ST->getContainedType(EltIdx);
+    EltSize = TD->getTypeAllocSize(EltTy);
+    Offset -= Layout->getElementOffset(EltIdx);
+  } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+    EltTy = AT->getElementType();
+    EltSize = TD->getTypeAllocSize(EltTy);
+    if (Offset >= AT->getNumElements() * EltSize)
+      return false;
+    Offset %= EltSize;
+  } else {
+    return false;
+  }
+  if (Offset == 0 && (Size == 0 || EltSize == Size))
+    return true;
+  // Check if the component spans multiple elements.
+  if (Offset + Size > EltSize)
+    return false;
+  return TypeHasComponent(EltTy, Offset, Size);
+}
+
+/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
+/// the instruction I, which references it, to use the separate elements.
+/// Offset indicates the position within AI that is referenced by this
+/// instruction.
+void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+                                SmallVector<AllocaInst*, 32> &NewElts) {
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI++);
+
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+      RewriteBitCast(BC, AI, Offset, NewElts);
+      continue;
+    }
+    
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      RewriteGEP(GEPI, AI, Offset, NewElts);
+      continue;
+    }
+    
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+      uint64_t MemSize = Length->getZExtValue();
+      if (Offset == 0 &&
+          MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+        RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
+      // Otherwise the intrinsic can only touch a single element and the
+      // address operand will be updated, so nothing else needs to be done.
+      continue;
+    }
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      const Type *LIType = LI->getType();
+      
+      if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
+        // Replace:
+        //   %res = load { i32, i32 }* %alloc
+        // with:
+        //   %load.0 = load i32* %alloc.0
+        //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+        //   %load.1 = load i32* %alloc.1
+        //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+        // (Also works for arrays instead of structs)
+        Value *Insert = UndefValue::get(LIType);
+        IRBuilder<> Builder(LI);
+        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+          Value *Load = Builder.CreateLoad(NewElts[i], "load");
+          Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
+        }
+        LI->replaceAllUsesWith(Insert);
+        DeadInsts.push_back(LI);
+      } else if (LIType->isIntegerTy() &&
+                 TD->getTypeAllocSize(LIType) ==
+                 TD->getTypeAllocSize(AI->getAllocatedType())) {
+        // If this is a load of the entire alloca to an integer, rewrite it.
+        RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
+      }
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      Value *Val = SI->getOperand(0);
+      const Type *SIType = Val->getType();
+      if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
+        // Replace:
+        //   store { i32, i32 } %val, { i32, i32 }* %alloc
+        // with:
+        //   %val.0 = extractvalue { i32, i32 } %val, 0
+        //   store i32 %val.0, i32* %alloc.0
+        //   %val.1 = extractvalue { i32, i32 } %val, 1
+        //   store i32 %val.1, i32* %alloc.1
+        // (Also works for arrays instead of structs)
+        IRBuilder<> Builder(SI);
+        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+          Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
+          Builder.CreateStore(Extract, NewElts[i]);
+        }
+        DeadInsts.push_back(SI);
+      } else if (SIType->isIntegerTy() &&
+                 TD->getTypeAllocSize(SIType) ==
+                 TD->getTypeAllocSize(AI->getAllocatedType())) {
+        // If this is a store of the entire alloca from an integer, rewrite it.
+        RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+      }
+      continue;
+    }
+    
+    if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+      // If we have a PHI user of the alloca itself (as opposed to a GEP or 
+      // bitcast) we have to rewrite it.  GEP and bitcast uses will be RAUW'd to
+      // the new pointer.
+      if (!isa<AllocaInst>(I)) continue;
+      
+      assert(Offset == 0 && NewElts[0] &&
+             "Direct alloca use should have a zero offset");
+      
+      // If we have a use of the alloca, we know the derived uses will be
+      // utilizing just the first element of the scalarized result.  Insert a
+      // bitcast of the first alloca before the user as required.
+      AllocaInst *NewAI = NewElts[0];
+      BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+      NewAI->moveBefore(BCI);
+      TheUse = BCI;
+      continue;
+    }
+  }
+}
+
+/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
+/// and recursively continue updating all of its uses.
+void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+                          SmallVector<AllocaInst*, 32> &NewElts) {
+  RewriteForScalarRepl(BC, AI, Offset, NewElts);
+  if (BC->getOperand(0) != AI)
+    return;
+
+  // The bitcast references the original alloca.  Replace its uses with
+  // references to the first new element alloca.
+  Instruction *Val = NewElts[0];
+  if (Val->getType() != BC->getDestTy()) {
+    Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
+    Val->takeName(BC);
+  }
+  BC->replaceAllUsesWith(Val);
+  DeadInsts.push_back(BC);
+}
+
+/// FindElementAndOffset - Return the index of the element containing Offset
+/// within the specified type, which must be either a struct or an array.
+/// Sets T to the type of the element and Offset to the offset within that
+/// element.  IdxTy is set to the type of the index result to be used in a
+/// GEP instruction.
+uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
+                                    const Type *&IdxTy) {
+  uint64_t Idx = 0;
+  if (const StructType *ST = dyn_cast<StructType>(T)) {
+    const StructLayout *Layout = TD->getStructLayout(ST);
+    Idx = Layout->getElementContainingOffset(Offset);
+    T = ST->getContainedType(Idx);
+    Offset -= Layout->getElementOffset(Idx);
+    IdxTy = Type::getInt32Ty(T->getContext());
+    return Idx;
+  }
+  const ArrayType *AT = cast<ArrayType>(T);
+  T = AT->getElementType();
+  uint64_t EltSize = TD->getTypeAllocSize(T);
+  Idx = Offset / EltSize;
+  Offset -= Idx * EltSize;
+  IdxTy = Type::getInt64Ty(T->getContext());
+  return Idx;
+}
+
+/// RewriteGEP - Check if this GEP instruction moves the pointer across
+/// elements of the alloca that are being split apart, and if so, rewrite
+/// the GEP to be relative to the new element.
+void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+                      SmallVector<AllocaInst*, 32> &NewElts) {
+  uint64_t OldOffset = Offset;
+  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+                                 &Indices[0], Indices.size());
+
+  RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
+
+  const Type *T = AI->getAllocatedType();
+  const Type *IdxTy;
+  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+  if (GEPI->getOperand(0) == AI)
+    OldIdx = ~0ULL; // Force the GEP to be rewritten.
+
+  T = AI->getAllocatedType();
+  uint64_t EltOffset = Offset;
+  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+
+  // If this GEP does not move the pointer across elements of the alloca
+  // being split, then it does not needs to be rewritten.
+  if (Idx == OldIdx)
+    return;
+
+  const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+  SmallVector<Value*, 8> NewArgs;
+  NewArgs.push_back(Constant::getNullValue(i32Ty));
+  while (EltOffset != 0) {
+    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+    NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
+  }
+  Instruction *Val = NewElts[Idx];
+  if (NewArgs.size() > 1) {
+    Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
+                                            NewArgs.end(), "", GEPI);
+    Val->takeName(GEPI);
+  }
+  if (Val->getType() != GEPI->getType())
+    Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
+  GEPI->replaceAllUsesWith(Val);
+  DeadInsts.push_back(GEPI);
+}
+
+/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
+/// Rewrite it to copy or set the elements of the scalarized memory.
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+                                        AllocaInst *AI,
+                                        SmallVector<AllocaInst*, 32> &NewElts) {
+  // If this is a memcpy/memmove, construct the other pointer as the
+  // appropriate type.  The "Other" pointer is the pointer that goes to memory
+  // that doesn't have anything to do with the alloca that we are promoting. For
+  // memset, this Value* stays null.
+  Value *OtherPtr = 0;
+  unsigned MemAlignment = MI->getAlignment();
+  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
+    if (Inst == MTI->getRawDest())
+      OtherPtr = MTI->getRawSource();
+    else {
+      assert(Inst == MTI->getRawSource());
+      OtherPtr = MTI->getRawDest();
+    }
+  }
+
+  // If there is an other pointer, we want to convert it to the same pointer
+  // type as AI has, so we can GEP through it safely.
+  if (OtherPtr) {
+    unsigned AddrSpace =
+      cast<PointerType>(OtherPtr->getType())->getAddressSpace();
+
+    // Remove bitcasts and all-zero GEPs from OtherPtr.  This is an
+    // optimization, but it's also required to detect the corner case where
+    // both pointer operands are referencing the same memory, and where
+    // OtherPtr may be a bitcast or GEP that currently being rewritten.  (This
+    // function is only called for mem intrinsics that access the whole
+    // aggregate, so non-zero GEPs are not an issue here.)
+    OtherPtr = OtherPtr->stripPointerCasts();
+
+    // Copying the alloca to itself is a no-op: just delete it.
+    if (OtherPtr == AI || OtherPtr == NewElts[0]) {
+      // This code will run twice for a no-op memcpy -- once for each operand.
+      // Put only one reference to MI on the DeadInsts list.
+      for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
+             E = DeadInsts.end(); I != E; ++I)
+        if (*I == MI) return;
+      DeadInsts.push_back(MI);
+      return;
+    }
+
+    // If the pointer is not the right type, insert a bitcast to the right
+    // type.
+    const Type *NewTy =
+      PointerType::get(AI->getType()->getElementType(), AddrSpace);
+
+    if (OtherPtr->getType() != NewTy)
+      OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
+  }
+
+  // Process each element of the aggregate.
+  bool SROADest = MI->getRawDest() == Inst;
+
+  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+
+  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+    // If this is a memcpy/memmove, emit a GEP of the other element address.
+    Value *OtherElt = 0;
+    unsigned OtherEltAlign = MemAlignment;
+
+    if (OtherPtr) {
+      Value *Idx[2] = { Zero,
+                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
+      OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+                                              OtherPtr->getName()+"."+Twine(i),
+                                                   MI);
+      uint64_t EltOffset;
+      const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+      const Type *OtherTy = OtherPtrTy->getElementType();
+      if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
+        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+      } else {
+        const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+        EltOffset = TD->getTypeAllocSize(EltTy)*i;
+      }
+
+      // The alignment of the other pointer is the guaranteed alignment of the
+      // element, which is affected by both the known alignment of the whole
+      // mem intrinsic and the alignment of the element.  If the alignment of
+      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
+      // known alignment is just 4 bytes.
+      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
+    }
+
+    Value *EltPtr = NewElts[i];
+    const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+
+    // If we got down to a scalar, insert a load or store as appropriate.
+    if (EltTy->isSingleValueType()) {
+      if (isa<MemTransferInst>(MI)) {
+        if (SROADest) {
+          // From Other to Alloca.
+          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
+          new StoreInst(Elt, EltPtr, MI);
+        } else {
+          // From Alloca to Other.
+          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
+          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
+        }
+        continue;
+      }
+      assert(isa<MemSetInst>(MI));
+
+      // If the stored element is zero (common case), just store a null
+      // constant.
+      Constant *StoreVal;
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
+        if (CI->isZero()) {
+          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
+        } else {
+          // If EltTy is a vector type, get the element type.
+          const Type *ValTy = EltTy->getScalarType();
+
+          // Construct an integer with the right value.
+          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+          APInt OneVal(EltSize, CI->getZExtValue());
+          APInt TotalVal(OneVal);
+          // Set each byte.
+          for (unsigned i = 0; 8*i < EltSize; ++i) {
+            TotalVal = TotalVal.shl(8);
+            TotalVal |= OneVal;
+          }
+
+          // Convert the integer value to the appropriate type.
+          StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
+          if (ValTy->isPointerTy())
+            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
+          else if (ValTy->isFloatingPointTy())
+            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
+          assert(StoreVal->getType() == ValTy && "Type mismatch!");
+
+          // If the requested value was a vector constant, create it.
+          if (EltTy != ValTy) {
+            unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
+            StoreVal = ConstantVector::get(Elts);
+          }
+        }
+        new StoreInst(StoreVal, EltPtr, MI);
+        continue;
+      }
+      // Otherwise, if we're storing a byte variable, use a memset call for
+      // this element.
+    }
+
+    unsigned EltSize = TD->getTypeAllocSize(EltTy);
+
+    IRBuilder<> Builder(MI);
+
+    // Finally, insert the meminst for this element.
+    if (isa<MemSetInst>(MI)) {
+      Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+                           MI->isVolatile());
+    } else {
+      assert(isa<MemTransferInst>(MI));
+      Value *Dst = SROADest ? EltPtr : OtherElt;  // Dest ptr
+      Value *Src = SROADest ? OtherElt : EltPtr;  // Src ptr
+
+      if (isa<MemCpyInst>(MI))
+        Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+      else
+        Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
+    }
+  }
+  DeadInsts.push_back(MI);
+}
+
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
+/// overwrites the entire allocation.  Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+                                         SmallVector<AllocaInst*, 32> &NewElts){
+  // Extract each element out of the integer according to its structure offset
+  // and store the element value to the individual alloca.
+  Value *SrcVal = SI->getOperand(0);
+  const Type *AllocaEltTy = AI->getAllocatedType();
+  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+  IRBuilder<> Builder(SI);
+  
+  // Handle tail padding by extending the operand
+  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+    SrcVal = Builder.CreateZExt(SrcVal,
+                            IntegerType::get(SI->getContext(), AllocaSizeBits));
+
+  DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+               << '\n');
+
+  // There are two forms here: AI could be an array or struct.  Both cases
+  // have different ways to compute the element offset.
+  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+    const StructLayout *Layout = TD->getStructLayout(EltSTy);
+
+    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+      // Get the number of bits to shift SrcVal to get the value.
+      const Type *FieldTy = EltSTy->getElementType(i);
+      uint64_t Shift = Layout->getElementOffsetInBits(i);
+
+      if (TD->isBigEndian())
+        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+
+      Value *EltVal = SrcVal;
+      if (Shift) {
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+      }
+
+      // Truncate down to an integer of the right size.
+      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+      // Ignore zero sized fields like {}, they obviously contain no data.
+      if (FieldSizeBits == 0) continue;
+
+      if (FieldSizeBits != AllocaSizeBits)
+        EltVal = Builder.CreateTrunc(EltVal,
+                             IntegerType::get(SI->getContext(), FieldSizeBits));
+      Value *DestField = NewElts[i];
+      if (EltVal->getType() == FieldTy) {
+        // Storing to an integer field of this size, just do it.
+      } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
+        // Bitcast to the right element type (for fp/vector values).
+        EltVal = Builder.CreateBitCast(EltVal, FieldTy);
+      } else {
+        // Otherwise, bitcast the dest pointer (for aggregates).
+        DestField = Builder.CreateBitCast(DestField,
+                                     PointerType::getUnqual(EltVal->getType()));
+      }
+      new StoreInst(EltVal, DestField, SI);
+    }
+
+  } else {
+    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+    const Type *ArrayEltTy = ATy->getElementType();
+    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+    uint64_t Shift;
+
+    if (TD->isBigEndian())
+      Shift = AllocaSizeBits-ElementOffset;
+    else
+      Shift = 0;
+
+    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+      // Ignore zero sized fields like {}, they obviously contain no data.
+      if (ElementSizeBits == 0) continue;
+
+      Value *EltVal = SrcVal;
+      if (Shift) {
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+      }
+
+      // Truncate down to an integer of the right size.
+      if (ElementSizeBits != AllocaSizeBits)
+        EltVal = Builder.CreateTrunc(EltVal,
+                                     IntegerType::get(SI->getContext(),
+                                                      ElementSizeBits));
+      Value *DestField = NewElts[i];
+      if (EltVal->getType() == ArrayEltTy) {
+        // Storing to an integer field of this size, just do it.
+      } else if (ArrayEltTy->isFloatingPointTy() ||
+                 ArrayEltTy->isVectorTy()) {
+        // Bitcast to the right element type (for fp/vector values).
+        EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
+      } else {
+        // Otherwise, bitcast the dest pointer (for aggregates).
+        DestField = Builder.CreateBitCast(DestField,
+                                     PointerType::getUnqual(EltVal->getType()));
+      }
+      new StoreInst(EltVal, DestField, SI);
+
+      if (TD->isBigEndian())
+        Shift -= ElementOffset;
+      else
+        Shift += ElementOffset;
+    }
+  }
+
+  DeadInsts.push_back(SI);
+}
+
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
+/// an integer.  Load the individual pieces to form the aggregate value.
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+                                        SmallVector<AllocaInst*, 32> &NewElts) {
+  // Extract each element out of the NewElts according to its structure offset
+  // and form the result value.
+  const Type *AllocaEltTy = AI->getAllocatedType();
+  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+  DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+               << '\n');
+
+  // There are two forms here: AI could be an array or struct.  Both cases
+  // have different ways to compute the element offset.
+  const StructLayout *Layout = 0;
+  uint64_t ArrayEltBitOffset = 0;
+  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+    Layout = TD->getStructLayout(EltSTy);
+  } else {
+    const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+  }
+
+  Value *ResultVal =
+    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
+
+  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+    // Load the value from the alloca.  If the NewElt is an aggregate, cast
+    // the pointer to an integer of the same size before doing the load.
+    Value *SrcField = NewElts[i];
+    const Type *FieldTy =
+      cast<PointerType>(SrcField->getType())->getElementType();
+    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+    // Ignore zero sized fields like {}, they obviously contain no data.
+    if (FieldSizeBits == 0) continue;
+
+    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+                                                     FieldSizeBits);
+    if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+        !FieldTy->isVectorTy())
+      SrcField = new BitCastInst(SrcField,
+                                 PointerType::getUnqual(FieldIntTy),
+                                 "", LI);
+    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
+
+    // If SrcField is a fp or vector of the right size but that isn't an
+    // integer type, bitcast to an integer so we can shift it.
+    if (SrcField->getType() != FieldIntTy)
+      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
+
+    // Zero extend the field to be the same size as the final alloca so that
+    // we can shift and insert it.
+    if (SrcField->getType() != ResultVal->getType())
+      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
+
+    // Determine the number of bits to shift SrcField.
+    uint64_t Shift;
+    if (Layout) // Struct case.
+      Shift = Layout->getElementOffsetInBits(i);
+    else  // Array case.
+      Shift = i*ArrayEltBitOffset;
+
+    if (TD->isBigEndian())
+      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
+
+    if (Shift) {
+      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
+      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
+    }
+
+    // Don't create an 'or x, 0' on the first iteration.
+    if (!isa<Constant>(ResultVal) ||
+        !cast<Constant>(ResultVal)->isNullValue())
+      ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+    else
+      ResultVal = SrcField;
+  }
+
+  // Handle tail padding by truncating the result
+  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
+
+  LI->replaceAllUsesWith(ResultVal);
+  DeadInsts.push_back(LI);
+}
+
+/// HasPadding - Return true if the specified type has any structure or
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
+static bool HasPadding(const Type *Ty, const TargetData &TD) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Ty = ATy->getElementType();
+    return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+  }
+
+  // SROA currently handles only Arrays and Structs.
+  const StructType *STy = cast<StructType>(Ty);
+  const StructLayout *SL = TD.getStructLayout(STy);
+  unsigned PrevFieldBitOffset = 0;
+  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+    // Check to see if there is any padding between this element and the
+    // previous one.
+    if (i) {
+      unsigned PrevFieldEnd =
+        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+      if (PrevFieldEnd < FieldBitOffset)
+        return true;
+    }
+    PrevFieldBitOffset = FieldBitOffset;
+  }
+  // Check for tail padding.
+  if (unsigned EltCount = STy->getNumElements()) {
+    unsigned PrevFieldEnd = PrevFieldBitOffset +
+      TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+    if (PrevFieldEnd < SL->getSizeInBits())
+      return true;
+  }
+  return false;
+}
+
+/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
+/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
+/// or 1 if safe after canonicalization has been performed.
+bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
+  // Loop over the use list of the alloca.  We can only transform it if all of
+  // the users are safe to transform.
+  AllocaInfo Info(AI);
+
+  isSafeForScalarRepl(AI, 0, Info);
+  if (Info.isUnsafe) {
+    DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
+    return false;
+  }
+
+  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
+  // source and destination, we have to be careful.  In particular, the memcpy
+  // could be moving around elements that live in structure padding of the LLVM
+  // types, but may actually be used.  In these cases, we refuse to promote the
+  // struct.
+  if (Info.isMemCpySrc && Info.isMemCpyDst &&
+      HasPadding(AI->getAllocatedType(), *TD))
+    return false;
+
+  // If the alloca never has an access to just *part* of it, but is accessed
+  // via loads and stores, then we should use ConvertToScalarInfo to promote
+  // the alloca instead of promoting each piece at a time and inserting fission
+  // and fusion code.
+  if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+    // If the struct/array just has one element, use basic SRoA.
+    if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+      if (ST->getNumElements() > 1) return false;
+    } else {
+      if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+        return false;
+    }
+  }
+  
+  return true;
+}
+
+
+
+/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable.  This intentionally only accepts
+/// constant expressions because we don't can't rewrite arbitrary instructions.
+static bool PointsToConstantGlobal(Value *V) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return GV->isConstant();
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::BitCast ||
+        CE->getOpcode() == Instruction::GetElementPtr)
+      return PointsToConstantGlobal(CE->getOperand(0));
+  return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with isOffset) but otherwise traverse
+/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+                               bool isOffset,
+                               SmallVector<Instruction *, 4> &LifetimeMarkers) {
+  // We track lifetime intrinsics as we encounter them.  If we decide to go
+  // ahead and replace the value with the global, this lets the caller quickly
+  // eliminate the markers.
+
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+    User *U = cast<Instruction>(*UI);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // Ignore non-volatile loads, they are always ok.
+      if (LI->isVolatile()) return false;
+      continue;
+    }
+
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      // If uses of the bitcast are ok, we are ok.
+      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
+                                          LifetimeMarkers))
+        return false;
+      continue;
+    }
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
+      // doesn't, it does.
+      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
+                                          isOffset || !GEP->hasAllZeroIndices(),
+                                          LifetimeMarkers))
+        return false;
+      continue;
+    }
+
+    if (CallSite CS = U) {
+      // If this is the function being called then we treat it like a load and
+      // ignore it.
+      if (CS.isCallee(UI))
+        continue;
+
+      // If this is a readonly/readnone call site, then we know it is just a
+      // load (but one that potentially returns the value itself), so we can
+      // ignore it if we know that the value isn't captured.
+      unsigned ArgNo = CS.getArgumentNo(UI);
+      if (CS.onlyReadsMemory() &&
+          (CS.getInstruction()->use_empty() ||
+           CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)))
+        continue;
+
+      // If this is being passed as a byval argument, the caller is making a
+      // copy, so it is only a read of the alloca.
+      if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+        continue;
+    }
+
+    // Lifetime intrinsics can be handled by the caller.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+          II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        assert(II->use_empty() && "Lifetime markers have no result to use!");
+        LifetimeMarkers.push_back(II);
+        continue;
+      }
+    }
+
+    // If this is isn't our memcpy/memmove, reject it as something we can't
+    // handle.
+    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+    if (MI == 0)
+      return false;
+
+    // If the transfer is using the alloca as a source of the transfer, then
+    // ignore it since it is a load (unless the transfer is volatile).
+    if (UI.getOperandNo() == 1) {
+      if (MI->isVolatile()) return false;
+      continue;
+    }
+
+    // If we already have seen a copy, reject the second one.
+    if (TheCopy) return false;
+
+    // If the pointer has been offset from the start of the alloca, we can't
+    // safely handle this.
+    if (isOffset) return false;
+
+    // If the memintrinsic isn't using the alloca as the dest, reject it.
+    if (UI.getOperandNo() != 0) return false;
+
+    // If the source of the memcpy/move is not a constant global, reject it.
+    if (!PointsToConstantGlobal(MI->getSource()))
+      return false;
+
+    // Otherwise, the transform is safe.  Remember the copy instruction.
+    TheCopy = MI;
+  }
+  return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global.  If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+MemTransferInst *
+SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+                                     SmallVector<Instruction*, 4> &ToDelete) {
+  MemTransferInst *TheCopy = 0;
+  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
+    return TheCopy;
+  return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
new file mode 100644
index 000000000000..a66b3e38258f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -0,0 +1,331 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations.  For example:
+//
+//   * Removes basic blocks with no predecessors.
+//   * Merges a basic block into its predecessor if there is only one and the
+//     predecessor only has one successor.
+//   * Eliminates PHI nodes for basic blocks with a single predecessor.
+//   * Eliminates a basic block that only contains an unconditional branch.
+//   * Changes invoke instructions to nounwind functions to be calls.
+//   * Change things like "if (x) if (y)" into "if (x&y)".
+//   * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Attributes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+namespace {
+  struct CFGSimplifyPass : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGSimplifyPass() : FunctionPass(ID) {
+      initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+                "Simplify the CFG", false, false)
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass() {
+  return new CFGSimplifyPass();
+}
+
+/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+  BasicBlock *BB = I->getParent();
+  // Loop over all of the successors, removing BB's entry from any PHI
+  // nodes.
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    (*SI)->removePredecessor(BB);
+  
+  // Insert a call to llvm.trap right before this.  This turns the undefined
+  // behavior into a hard fail instead of falling through into random code.
+  if (UseLLVMTrap) {
+    Function *TrapFn =
+      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+    CallTrap->setDebugLoc(I->getDebugLoc());
+  }
+  new UnreachableInst(I->getContext(), I);
+  
+  // All instructions after this are dead.
+  BasicBlock::iterator BBI = I, BBE = BB->end();
+  while (BBI != BBE) {
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BB->getInstList().erase(BBI++);
+  }
+}
+
+/// ChangeToCall - Convert the specified invoke into a normal call.
+static void ChangeToCall(InvokeInst *II) {
+  BasicBlock *BB = II->getParent();
+  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
+  II->replaceAllUsesWith(NewCall);
+
+  // Follow the call by a branch to the normal destination.
+  BranchInst::Create(II->getNormalDest(), II);
+
+  // Update PHI nodes in the unwind destination
+  II->getUnwindDest()->removePredecessor(BB);
+  BB->getInstList().erase(II);
+}
+
+static bool MarkAliveBlocks(BasicBlock *BB,
+                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
+  
+  SmallVector<BasicBlock*, 128> Worklist;
+  Worklist.push_back(BB);
+  bool Changed = false;
+  do {
+    BB = Worklist.pop_back_val();
+    
+    if (!Reachable.insert(BB))
+      continue;
+
+    // Do a quick scan of the basic block, turning any obviously unreachable
+    // instructions into LLVM unreachable insts.  The instruction combining pass
+    // canonicalizes unreachable insts into stores to null or undef.
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+        if (CI->doesNotReturn()) {
+          // If we found a call to a no-return function, insert an unreachable
+          // instruction after it.  Make sure there isn't *already* one there
+          // though.
+          ++BBI;
+          if (!isa<UnreachableInst>(BBI)) {
+            // Don't insert a call to llvm.trap right before the unreachable.
+            ChangeToUnreachable(BBI, false);
+            Changed = true;
+          }
+          break;
+        }
+      }
+      
+      // Store to undef and store to null are undefined and used to signal that
+      // they should be changed to unreachable by passes that can't modify the
+      // CFG.
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        // Don't touch volatile stores.
+        if (SI->isVolatile()) continue;
+
+        Value *Ptr = SI->getOperand(1);
+        
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             SI->getPointerAddressSpace() == 0)) {
+          ChangeToUnreachable(SI, true);
+          Changed = true;
+          break;
+        }
+      }
+    }
+
+    // Turn invokes that call 'nounwind' functions into ordinary calls.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+      if (II->doesNotThrow()) {
+        ChangeToCall(II);
+        Changed = true;
+      }
+
+    Changed |= ConstantFoldTerminator(BB, true);
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      Worklist.push_back(*SI);
+  } while (!Worklist.empty());
+  return Changed;
+}
+
+/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even 
+/// if they are in a dead cycle.  Return true if a change was made, false 
+/// otherwise.
+static bool RemoveUnreachableBlocksFromFn(Function &F) {
+  SmallPtrSet<BasicBlock*, 128> Reachable;
+  bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+  
+  // If there are unreachable blocks in the CFG...
+  if (Reachable.size() == F.size())
+    return Changed;
+  
+  assert(Reachable.size() < F.size());
+  NumSimpl += F.size()-Reachable.size();
+  
+  // Loop over all of the basic blocks that are not reachable, dropping all of
+  // their internal references...
+  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Reachable.count(BB))
+      continue;
+    
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      if (Reachable.count(*SI))
+        (*SI)->removePredecessor(BB);
+    BB->dropAllReferences();
+  }
+  
+  for (Function::iterator I = ++F.begin(); I != F.end();)
+    if (!Reachable.count(I))
+      I = F.getBasicBlockList().erase(I);
+    else
+      ++I;
+  
+  return true;
+}
+
+/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// node) return blocks, merge them together to promote recursive block merging.
+static bool MergeEmptyReturnBlocks(Function &F) {
+  bool Changed = false;
+  
+  BasicBlock *RetBlock = 0;
+  
+  // Scan all the blocks in the function, looking for empty return blocks.
+  for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
+    BasicBlock &BB = *BBI++;
+    
+    // Only look at return blocks.
+    ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (Ret == 0) continue;
+    
+    // Only look at the block if it is empty or the only other thing in it is a
+    // single PHI node that is the operand to the return.
+    if (Ret != &BB.front()) {
+      // Check for something else in the block.
+      BasicBlock::iterator I = Ret;
+      --I;
+      // Skip over debug info.
+      while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
+        --I;
+      if (!isa<DbgInfoIntrinsic>(I) &&
+          (!isa<PHINode>(I) || I != BB.begin() ||
+           Ret->getNumOperands() == 0 ||
+           Ret->getOperand(0) != I))
+        continue;
+    }
+
+    // If this is the first returning block, remember it and keep going.
+    if (RetBlock == 0) {
+      RetBlock = &BB;
+      continue;
+    }
+    
+    // Otherwise, we found a duplicate return block.  Merge the two.
+    Changed = true;
+    
+    // Case when there is no input to the return or when the returned values
+    // agree is trivial.  Note that they can't agree if there are phis in the
+    // blocks.
+    if (Ret->getNumOperands() == 0 ||
+        Ret->getOperand(0) == 
+          cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+      BB.replaceAllUsesWith(RetBlock);
+      BB.eraseFromParent();
+      continue;
+    }
+    
+    // If the canonical return block has no PHI node, create one now.
+    PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
+    if (RetBlockPHI == 0) {
+      Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
+      pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+                                    std::distance(PB, PE), "merge",
+                                    &RetBlock->front());
+      
+      for (pred_iterator PI = PB; PI != PE; ++PI)
+        RetBlockPHI->addIncoming(InVal, *PI);
+      RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
+    }
+    
+    // Turn BB into a block that just unconditionally branches to the return
+    // block.  This handles the case when the two return blocks have a common
+    // predecessor but that return different things.
+    RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
+    BB.getTerminator()->eraseFromParent();
+    BranchInst::Create(RetBlock, &BB);
+  }
+  
+  return Changed;
+}
+
+/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+  bool Changed = false;
+  bool LocalChange = true;
+  while (LocalChange) {
+    LocalChange = false;
+    
+    // Loop over all of the basic blocks and remove them if they are unneeded...
+    //
+    for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
+      if (SimplifyCFG(BBIt++, TD)) {
+        LocalChange = true;
+        ++NumSimpl;
+      }
+    }
+    Changed |= LocalChange;
+  }
+  return Changed;
+}
+
+// It is possible that we may require multiple passes over the code to fully
+// simplify the CFG.
+//
+bool CFGSimplifyPass::runOnFunction(Function &F) {
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  bool EverChanged = RemoveUnreachableBlocksFromFn(F);
+  EverChanged |= MergeEmptyReturnBlocks(F);
+  EverChanged |= IterativeSimplifyCFG(F, TD);
+
+  // If neither pass changed anything, we're done.
+  if (!EverChanged) return false;
+
+  // IterativeSimplifyCFG can (rarely) make some loops dead.  If this happens,
+  // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+  // iterate between the two optimizations.  We structure the code like this to
+  // avoid reruning IterativeSimplifyCFG if the second pass of 
+  // RemoveUnreachableBlocksFromFn doesn't do anything.
+  if (!RemoveUnreachableBlocksFromFn(F))
+    return true;
+
+  do {
+    EverChanged = IterativeSimplifyCFG(F, TD);
+    EverChanged |= RemoveUnreachableBlocksFromFn(F);
+  } while (EverChanged);
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
new file mode 100644
index 000000000000..7c415e5150dc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -0,0 +1,2391 @@
+//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies a variety of small
+// optimizations for calls to specific well-known function calls (e.g. runtime
+// library functions).   Any optimization that takes the very simple form
+// "replace call to library function with simpler code that provides the same
+// result" belongs in this file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+
+//===----------------------------------------------------------------------===//
+// Optimizer Base Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+  Function *Caller;
+  const TargetData *TD;
+  const TargetLibraryInfo *TLI;
+  LLVMContext* Context;
+public:
+  LibCallOptimization() { }
+  virtual ~LibCallOptimization() {}
+
+  /// CallOptimizer - This pure virtual method is implemented by base classes to
+  /// do various optimizations.  If this returns null then no transformation was
+  /// performed.  If it returns CI, then it transformed the call and CI is to be
+  /// deleted.  If it returns something else, replace CI with the new value and
+  /// delete CI.
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+    =0;
+
+  Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+                      const TargetLibraryInfo *TLI, IRBuilder<> &B) {
+    Caller = CI->getParent()->getParent();
+    this->TD = TD;
+    this->TLI = TLI;
+    if (CI->getCalledFunction())
+      Context = &CI->getCalledFunction()->getContext();
+
+    // We never change the calling convention.
+    if (CI->getCallingConv() != llvm::CallingConv::C)
+      return NULL;
+
+    return CallOptimizer(CI->getCalledFunction(), CI, B);
+  }
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality())
+        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+ 
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+       it != e; ++it) {
+    if ((*it)->getType()->isFloatingPointTy())
+      return true;
+  }
+  return false;
+}
+
+/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality() && IC->getOperand(1) == With)
+        continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// String and Memory LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'strcat' Optimizations
+namespace {
+struct StrCatOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcat" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+
+    // See if we can get the length of the input string.
+    uint64_t Len = GetStringLength(Src);
+    if (Len == 0) return 0;
+    --Len;  // Unbias length.
+
+    // Handle the simple, do-nothing case: strcat(x, "") -> x
+    if (Len == 0)
+      return Dst;
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    EmitStrLenMemCpy(Src, Dst, Len, B);
+    return Dst;
+  }
+
+  void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+    // We need to find the end of the destination string.  That's where the
+    // memory is to be moved to. We just generate a call to strlen.
+    Value *DstLen = EmitStrLen(Dst, B, TD);
+
+    // Now that we have the destination's length, we must index into the
+    // destination's pointer to get the actual memcpy destination (end of
+    // the string .. we're concatenating).
+    Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+    // We have enough information to now generate the memcpy call to do the
+    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
+    B.CreateMemCpy(CpyDst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+  }
+};
+
+//===---------------------------------------===//
+// 'strncat' Optimizations
+
+struct StrNCatOpt : public StrCatOpt {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncat" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    uint64_t Len;
+
+    // We don't do anything if length is not constant
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Len = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    // See if we can get the length of the input string.
+    uint64_t SrcLen = GetStringLength(Src);
+    if (SrcLen == 0) return 0;
+    --SrcLen;  // Unbias length.
+
+    // Handle the simple, do-nothing cases:
+    // strncat(x, "", c) -> x
+    // strncat(x,  c, 0) -> x
+    if (SrcLen == 0 || Len == 0) return Dst;
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // We don't optimize this case
+    if (Len < SrcLen) return 0;
+
+    // strncat(x, s, c) -> strcat(x, s)
+    // s is constant so the strcat can be optimized further
+    EmitStrLenMemCpy(Src, Dst, SrcLen, B);
+    return Dst;
+  }
+};
+
+//===---------------------------------------===//
+// 'strchr' Optimizations
+
+struct StrChrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strchr" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+
+    // If the second operand is non-constant, see if we can compute the length
+    // of the input string and turn this into memchr.
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    if (CharC == 0) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      uint64_t Len = GetStringLength(SrcStr);
+      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+        return 0;
+
+      return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                        B, TD);
+    }
+
+    // Otherwise, the character is a constant, see if the first argument is
+    // a string literal.  If so, we can constant fold.
+    std::string Str;
+    if (!GetConstantStringInfo(SrcStr, Str))
+      return 0;
+
+    // strchr can find the nul character.
+    Str += '\0';
+
+    // Compute the offset.
+    size_t I = Str.find(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char.  strchr returns null.
+      return Constant::getNullValue(CI->getType());
+
+    // strchr(s+n,c)  -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  }
+};
+
+//===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strrchr" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+    // Cannot fold anything if we're not looking for a constant.
+    if (!CharC)
+      return 0;
+
+    std::string Str;
+    if (!GetConstantStringInfo(SrcStr, Str)) {
+      // strrchr(s, 0) -> strchr(s, 0)
+      if (TD && CharC->isZero())
+        return EmitStrChr(SrcStr, '\0', B, TD);
+      return 0;
+    }
+
+    // strrchr can find the nul character.
+    Str += '\0';
+
+    // Compute the offset.
+    size_t I = Str.rfind(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char. Return null.
+      return Constant::getNullValue(CI->getType());
+
+    // strrchr(s+n,c) -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+  }
+};
+
+//===---------------------------------------===//
+// 'strcmp' Optimizations
+
+struct StrCmpOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcmp" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strcmp(x,x)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    std::string Str1, Str2;
+    bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+    if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
+      return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+    if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2)
+      return ConstantInt::get(CI->getType(),
+                                     strcmp(Str1.c_str(),Str2.c_str()));
+
+    // strcmp(P, "x") -> memcmp(P, "x", 2)
+    uint64_t Len1 = GetStringLength(Str1P);
+    uint64_t Len2 = GetStringLength(Str2P);
+    if (Len1 && Len2) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      return EmitMemCmp(Str1P, Str2P,
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                        std::min(Len1, Len2)), B, TD);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strncmp' Optimizations
+
+struct StrNCmpOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncmp" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    // Get the length argument if it is constant.
+    uint64_t Length;
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Length = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    if (Length == 0) // strncmp(x,y,0)   -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+
+    std::string Str1, Str2;
+    bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+    if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> *x
+      return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+    if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2)
+      return ConstantInt::get(CI->getType(),
+                              strncmp(Str1.c_str(), Str2.c_str(), Length));
+    return 0;
+  }
+};
+
+
+//===---------------------------------------===//
+// 'strcpy' Optimizations
+
+struct StrCpyOpt : public LibCallOptimization {
+  bool OptChkCall;  // True if it's optimizing a __strcpy_chk libcall.
+
+  StrCpyOpt(bool c) : OptChkCall(c) {}
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcpy" function prototype.
+    unsigned NumParams = OptChkCall ? 3 : 2;
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != NumParams ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy())
+      return 0;
+
+    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+    if (Dst == Src)      // strcpy(x,x)  -> x
+      return Src;
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // See if we can get the length of the input string.
+    uint64_t Len = GetStringLength(Src);
+    if (Len == 0) return 0;
+
+    // We have enough information to now generate the memcpy call to do the
+    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
+    if (OptChkCall)
+      EmitMemCpyChk(Dst, Src,
+                    ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                    CI->getArgOperand(2), B, TD);
+    else
+      B.CreateMemCpy(Dst, Src,
+                     ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+    return Dst;
+  }
+};
+
+//===---------------------------------------===//
+// 'strncpy' Optimizations
+
+struct StrNCpyOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    Value *LenOp = CI->getArgOperand(2);
+
+    // See if we can get the length of the input string.
+    uint64_t SrcLen = GetStringLength(Src);
+    if (SrcLen == 0) return 0;
+    --SrcLen;
+
+    if (SrcLen == 0) {
+      // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+      B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+      return Dst;
+    }
+
+    uint64_t Len;
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+      Len = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // Let strncpy handle the zero padding
+    if (Len > SrcLen+1) return 0;
+
+    // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+    B.CreateMemCpy(Dst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+
+    return Dst;
+  }
+};
+
+//===---------------------------------------===//
+// 'strlen' Optimizations
+
+struct StrLenOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    Value *Src = CI->getArgOperand(0);
+
+    // Constant folding: strlen("xyz") -> 3
+    if (uint64_t Len = GetStringLength(Src))
+      return ConstantInt::get(CI->getType(), Len-1);
+
+    // strlen(x) != 0 --> *x != 0
+    // strlen(x) == 0 --> *x == 0
+    if (IsOnlyUsedInZeroEqualityComparison(CI))
+      return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+    return 0;
+  }
+};
+
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        FT->getReturnType() != FT->getParamType(0))
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strpbrk(s, "") -> NULL
+    // strpbrk("", s) -> NULL
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2) {
+      size_t I = S1.find_first_of(S2);
+      if (I == std::string::npos) // No match.
+        return Constant::getNullValue(CI->getType());
+
+      return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+    }
+
+    // strpbrk(s, "a") -> strchr(s, 'a')
+    if (TD && HasS2 && S2.size() == 1)
+      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strto*' Optimizations.  This handles strtol, strtod, strtof, strtoul, etc.
+
+struct StrToOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy())
+      return 0;
+
+    Value *EndPtr = CI->getArgOperand(1);
+    if (isa<ConstantPointerNull>(EndPtr)) {
+      // With a null EndPtr, this function won't capture the main argument.
+      // It would be readonly too, except that it still may write to errno.
+      CI->addAttribute(1, Attribute::NoCapture);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strspn(s, "") -> 0
+    // strspn("", s) -> 0
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strcspn("", s) -> 0
+    if (HasS1 && S1.empty())
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+    // strcspn(s, "") -> strlen(s)
+    if (TD && HasS2 && S2.empty())
+      return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strstr' Optimizations
+
+struct StrStrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isPointerTy())
+      return 0;
+
+    // fold strstr(x, x) -> x.
+    if (CI->getArgOperand(0) == CI->getArgOperand(1))
+      return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+    // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+    if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+      Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+      Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+                                   StrLen, B, TD);
+      for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+           UI != UE; ) {
+        ICmpInst *Old = cast<ICmpInst>(*UI++);
+        Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+                                  ConstantInt::getNullValue(StrNCmp->getType()),
+                                  "cmp");
+        Old->replaceAllUsesWith(Cmp);
+        Old->eraseFromParent();
+      }
+      return CI;
+    }
+
+    // See if either input string is a constant string.
+    std::string SearchStr, ToFindStr;
+    bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+    bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+    // fold strstr(x, "") -> x.
+    if (HasStr2 && ToFindStr.empty())
+      return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+    // If both strings are known, constant fold it.
+    if (HasStr1 && HasStr2) {
+      std::string::size_type Offset = SearchStr.find(ToFindStr);
+
+      if (Offset == std::string::npos) // strstr("foo", "bar") -> null
+        return Constant::getNullValue(CI->getType());
+
+      // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+      Value *Result = CastToCStr(CI->getArgOperand(0), B);
+      Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+      return B.CreateBitCast(Result, CI->getType());
+    }
+
+    // fold strstr(x, "y") -> strchr(x, 'y').
+    if (HasStr2 && ToFindStr.size() == 1)
+      return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+                             ToFindStr[0], B, TD), CI->getType());
+    return 0;
+  }
+};
+
+
+//===---------------------------------------===//
+// 'memcmp' Optimizations
+
+struct MemCmpOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy(32))
+      return 0;
+
+    Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+    if (LHS == RHS)  // memcmp(s,s,x) -> 0
+      return Constant::getNullValue(CI->getType());
+
+    // Make sure we have a constant length.
+    ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!LenC) return 0;
+    uint64_t Len = LenC->getZExtValue();
+
+    if (Len == 0) // memcmp(s1,s2,0) -> 0
+      return Constant::getNullValue(CI->getType());
+
+    // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+    if (Len == 1) {
+      Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+                                 CI->getType(), "lhsv");
+      Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+                                 CI->getType(), "rhsv");
+      return B.CreateSub(LHSV, RHSV, "chardiff");
+    }
+
+    // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+    std::string LHSStr, RHSStr;
+    if (GetConstantStringInfo(LHS, LHSStr) &&
+        GetConstantStringInfo(RHS, RHSStr)) {
+      // Make sure we're not reading out-of-bounds memory.
+      if (Len > LHSStr.length() || Len > RHSStr.length())
+        return 0;
+      uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+      return ConstantInt::get(CI->getType(), Ret);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'memcpy' Optimizations
+
+struct MemCpyOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                   CI->getArgOperand(2), 1);
+    return CI->getArgOperand(0);
+  }
+};
+
+//===---------------------------------------===//
+// 'memmove' Optimizations
+
+struct MemMoveOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+    B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                    CI->getArgOperand(2), 1);
+    return CI->getArgOperand(0);
+  }
+};
+
+//===---------------------------------------===//
+// 'memset' Optimizations
+
+struct MemSetOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+    Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+    B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+    return CI->getArgOperand(0);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'pow*' Optimizations
+
+struct PowOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return 0;
+
+    Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+    if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+      if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
+        return Op1C;
+      if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
+        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+    }
+
+    ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+    if (Op2C == 0) return 0;
+
+    if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
+      return ConstantFP::get(CI->getType(), 1.0);
+
+    if (Op2C->isExactlyValue(0.5)) {
+      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+      // This is faster than calling pow, and still handles negative zero
+      // and negative infinite correctly.
+      // TODO: In fast-math mode, this could be just sqrt(x).
+      // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+      Value *Inf = ConstantFP::getInfinity(CI->getType());
+      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+                                         Callee->getAttributes());
+      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+                                         Callee->getAttributes());
+      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
+      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+      return Sel;
+    }
+
+    if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
+      return Op1;
+    if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
+      return B.CreateFMul(Op1, Op1, "pow2");
+    if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+                          Op1, "powrecip");
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'exp2' Optimizations
+
+struct Exp2Opt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 1 argument of FP type, which matches the
+    // result type.
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return 0;
+
+    Value *Op = CI->getArgOperand(0);
+    // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
+    // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
+    Value *LdExpArg = 0;
+    if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+        LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+    } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+        LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+    }
+
+    if (LdExpArg) {
+      const char *Name;
+      if (Op->getType()->isFloatTy())
+        Name = "ldexpf";
+      else if (Op->getType()->isDoubleTy())
+        Name = "ldexp";
+      else
+        Name = "ldexpl";
+
+      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+      if (!Op->getType()->isFloatTy())
+        One = ConstantExpr::getFPExtend(One, Op->getType());
+
+      Module *M = Caller->getParent();
+      Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                             Op->getType(),
+                                             B.getInt32Ty(), NULL);
+      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+        CI->setCallingConv(F->getCallingConv());
+
+      return CI;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+        !FT->getParamType(0)->isDoubleTy())
+      return 0;
+
+    // If this is something like 'floor((double)floatval)', convert to floorf.
+    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+      return 0;
+
+    // floor((double)floatval) -> (double)floorf(floatval)
+    Value *V = Cast->getOperand(0);
+    V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
+                             Callee->getAttributes());
+    return B.CreateFPExt(V, B.getDoubleTy());
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'ffs*' Optimizations
+
+struct FFSOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 1 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        !FT->getParamType(0)->isIntegerTy())
+      return 0;
+
+    Value *Op = CI->getArgOperand(0);
+
+    // Constant fold.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+      if (CI->getValue() == 0)  // ffs(0) -> 0.
+        return Constant::getNullValue(CI->getType());
+      // ffs(c) -> cttz(c)+1
+      return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+    }
+
+    // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+    Type *ArgType = Op->getType();
+    Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+                                         Intrinsic::cttz, ArgType);
+    Value *V = B.CreateCall(F, Op, "cttz");
+    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+    V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
+
+    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+    return B.CreateSelect(Cond, V, B.getInt32(0));
+  }
+};
+
+//===---------------------------------------===//
+// 'isdigit' Optimizations
+
+struct IsDigitOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isdigit(c) -> (c-'0') <u 10
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+    Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+//===---------------------------------------===//
+// 'isascii' Optimizations
+
+struct IsAsciiOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isascii(c) -> c <u 128
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+//===---------------------------------------===//
+// 'abs', 'labs', 'llabs' Optimizations
+
+struct AbsOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require integer(integer) where the types agree.
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        FT->getParamType(0) != FT->getReturnType())
+      return 0;
+
+    // abs(x) -> x >s -1 ? x : -x
+    Value *Op = CI->getArgOperand(0);
+    Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+                                 "ispos");
+    Value *Neg = B.CreateNeg(Op, "neg");
+    return B.CreateSelect(Pos, Op, Neg);
+  }
+};
+
+
+//===---------------------------------------===//
+// 'toascii' Optimizations
+
+struct ToAsciiOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require i32(i32)
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isascii(c) -> c & 0x7f
+    return B.CreateAnd(CI->getArgOperand(0),
+                       ConstantInt::get(CI->getType(),0x7F));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'printf' Optimizations
+
+struct PrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    std::string FormatStr;
+    if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
+      return 0;
+
+    // Empty format string -> noop.
+    if (FormatStr.empty())  // Tolerate printf's declared void.
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 0);
+
+    // Do not do any of the following transformations if the printf return value
+    // is used, in general the printf return value is not compatible with either
+    // putchar() or puts().
+    if (!CI->use_empty())
+      return 0;
+
+    // printf("x") -> putchar('x'), even for '%'.
+    if (FormatStr.size() == 1) {
+      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("foo\n") --> puts("foo")
+    if (FormatStr[FormatStr.size()-1] == '\n' &&
+        FormatStr.find('%') == std::string::npos) {  // no format characters.
+      // Create a string literal with no \n on it.  We expect the constant merge
+      // pass to be run after this pass, to merge duplicate strings.
+      FormatStr.erase(FormatStr.end()-1);
+      Constant *C = ConstantArray::get(*Context, FormatStr, true);
+      C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
+                             GlobalVariable::InternalLinkage, C, "str");
+      EmitPutS(C, B, TD);
+      return CI->use_empty() ? (Value*)CI :
+                    ConstantInt::get(CI->getType(), FormatStr.size()+1);
+    }
+
+    // Optimize specific format strings.
+    // printf("%c", chr) --> putchar(chr)
+    if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isIntegerTy()) {
+      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("%s\n", str) --> puts(str)
+    if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isPointerTy()) {
+      EmitPutS(CI->getArgOperand(1), B, TD);
+      return CI;
+    }
+    return 0;
+  }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // printf(format, ...) -> iprintf(format, ...) if no floating point
+    // arguments.
+    if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *IPrintFFn =
+        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(IPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'sprintf' Optimizations
+
+struct SPrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    std::string FormatStr;
+    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // If we just have a format string (nothing else crazy) transform it.
+    if (CI->getNumArgOperands() == 2) {
+      // Make sure there's no % in the constant array.  We could try to handle
+      // %% -> % in the future if we cared.
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')
+          return 0; // we found a format specifier, bail out.
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+                                      FormatStr.size() + 1), 1);   // nul byte.
+      return ConstantInt::get(CI->getType(), FormatStr.size());
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+      B.CreateStore(V, Ptr);
+      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+      B.CreateStore(B.getInt8(0), Ptr);
+
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    if (FormatStr[1] == 's') {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+      Value *IncLen = B.CreateAdd(Len,
+                                  ConstantInt::get(Len->getType(), 1),
+                                  "leninc");
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+      // The sprintf result is the unincremented number of bytes in the string.
+      return B.CreateIntCast(Len, CI->getType(), false);
+    }
+    return 0;
+  }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed pointer arguments and an integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+    // point arguments.
+    if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *SIPrintFFn =
+        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(SIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'fwrite' Optimizations
+
+struct FWriteOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require a pointer, an integer, an integer, a pointer, returning integer.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        !FT->getParamType(3)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    // Get the element size and count.
+    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!SizeC || !CountC) return 0;
+    uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+    // If this is writing zero records, remove the call (it's a noop).
+    if (Bytes == 0)
+      return ConstantInt::get(CI->getType(), 0);
+
+    // If this is writing one byte, turn it into fputc.
+    if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
+      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+      EmitFPutC(Char, CI->getArgOperand(3), B, TD);
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'fputs' Optimizations
+
+struct FPutsOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // Require two pointers.  Also, we can't optimize if return value is used.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !CI->use_empty())
+      return 0;
+
+    // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+    uint64_t Len = GetStringLength(CI->getArgOperand(0));
+    if (!Len) return 0;
+    EmitFWrite(CI->getArgOperand(0),
+               ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+               CI->getArgOperand(1), B, TD);
+    return CI;  // Known to have no uses (see above).
+  }
+};
+
+//===---------------------------------------===//
+// 'fprintf' Optimizations
+
+struct FPrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // All the optimizations depend on the format string.
+    std::string FormatStr;
+    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+    if (CI->getNumArgOperands() == 2) {
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
+          return 0; // We found a format specifier.
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      EmitFWrite(CI->getArgOperand(1),
+                 ConstantInt::get(TD->getIntPtrType(*Context),
+                                  FormatStr.size()),
+                 CI->getArgOperand(0), B, TD);
+      return ConstantInt::get(CI->getType(), FormatStr.size());
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // fprintf(F, "%c", chr) --> fputc(chr, F)
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    if (FormatStr[1] == 's') {
+      // fprintf(F, "%s", str) --> fputs(str, F)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+        return 0;
+      EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+      return CI;
+    }
+    return 0;
+  }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed paramters as pointers and integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+    // floating point arguments.
+    if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *FIPrintFFn =
+        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(FIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'puts' Optimizations
+
+struct PutsOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    // Check for a constant string.
+    std::string Str;
+    if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+      return 0;
+
+    if (Str.empty() && CI->use_empty()) {
+      // puts("") -> putchar('\n')
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    return 0;
+  }
+};
+
+} // end anonymous namespace.
+
+//===----------------------------------------------------------------------===//
+// SimplifyLibCalls Pass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// This pass optimizes well known library functions from libc and libm.
+  ///
+  class SimplifyLibCalls : public FunctionPass {
+    TargetLibraryInfo *TLI;
+    
+    StringMap<LibCallOptimization*> Optimizations;
+    // String and Memory LibCall Optimizations
+    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+    StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+    StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+    StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
+    MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
+    // Math Library Optimizations
+    PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+    // Integer Optimizations
+    FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
+    ToAsciiOpt ToAscii;
+    // Formatting and IO Optimizations
+    SPrintFOpt SPrintF; PrintFOpt PrintF;
+    FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+    PutsOpt Puts;
+    
+    bool Modified;  // This is only used by doInitialization.
+  public:
+    static char ID; // Pass identification
+    SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+      initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+    }
+    void InitOptimizations();
+    bool runOnFunction(Function &F);
+
+    void setDoesNotAccessMemory(Function &F);
+    void setOnlyReadsMemory(Function &F);
+    void setDoesNotThrow(Function &F);
+    void setDoesNotCapture(Function &F, unsigned n);
+    void setDoesNotAlias(Function &F, unsigned n);
+    bool doInitialization(Module &M);
+
+    void inferPrototypeAttributes(Function &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
+    }
+  };
+} // end anonymous namespace.
+
+char SimplifyLibCalls::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls",
+                      "Simplify well-known library calls", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls",
+                    "Simplify well-known library calls", false, false)
+
+// Public interface to the Simplify LibCalls pass.
+FunctionPass *llvm::createSimplifyLibCallsPass() {
+  return new SimplifyLibCalls();
+}
+
+/// Optimizations - Populate the Optimizations map with all the optimizations
+/// we know.
+void SimplifyLibCalls::InitOptimizations() {
+  // String and Memory LibCall Optimizations
+  Optimizations["strcat"] = &StrCat;
+  Optimizations["strncat"] = &StrNCat;
+  Optimizations["strchr"] = &StrChr;
+  Optimizations["strrchr"] = &StrRChr;
+  Optimizations["strcmp"] = &StrCmp;
+  Optimizations["strncmp"] = &StrNCmp;
+  Optimizations["strcpy"] = &StrCpy;
+  Optimizations["strncpy"] = &StrNCpy;
+  Optimizations["strlen"] = &StrLen;
+  Optimizations["strpbrk"] = &StrPBrk;
+  Optimizations["strtol"] = &StrTo;
+  Optimizations["strtod"] = &StrTo;
+  Optimizations["strtof"] = &StrTo;
+  Optimizations["strtoul"] = &StrTo;
+  Optimizations["strtoll"] = &StrTo;
+  Optimizations["strtold"] = &StrTo;
+  Optimizations["strtoull"] = &StrTo;
+  Optimizations["strspn"] = &StrSpn;
+  Optimizations["strcspn"] = &StrCSpn;
+  Optimizations["strstr"] = &StrStr;
+  Optimizations["memcmp"] = &MemCmp;
+  if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy;
+  Optimizations["memmove"] = &MemMove;
+  if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet;
+
+  // _chk variants of String and Memory LibCall Optimizations.
+  Optimizations["__strcpy_chk"] = &StrCpyChk;
+
+  // Math Library Optimizations
+  Optimizations["powf"] = &Pow;
+  Optimizations["pow"] = &Pow;
+  Optimizations["powl"] = &Pow;
+  Optimizations["llvm.pow.f32"] = &Pow;
+  Optimizations["llvm.pow.f64"] = &Pow;
+  Optimizations["llvm.pow.f80"] = &Pow;
+  Optimizations["llvm.pow.f128"] = &Pow;
+  Optimizations["llvm.pow.ppcf128"] = &Pow;
+  Optimizations["exp2l"] = &Exp2;
+  Optimizations["exp2"] = &Exp2;
+  Optimizations["exp2f"] = &Exp2;
+  Optimizations["llvm.exp2.ppcf128"] = &Exp2;
+  Optimizations["llvm.exp2.f128"] = &Exp2;
+  Optimizations["llvm.exp2.f80"] = &Exp2;
+  Optimizations["llvm.exp2.f64"] = &Exp2;
+  Optimizations["llvm.exp2.f32"] = &Exp2;
+
+#ifdef HAVE_FLOORF
+  Optimizations["floor"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_CEILF
+  Optimizations["ceil"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_ROUNDF
+  Optimizations["round"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_RINTF
+  Optimizations["rint"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_NEARBYINTF
+  Optimizations["nearbyint"] = &UnaryDoubleFP;
+#endif
+
+  // Integer Optimizations
+  Optimizations["ffs"] = &FFS;
+  Optimizations["ffsl"] = &FFS;
+  Optimizations["ffsll"] = &FFS;
+  Optimizations["abs"] = &Abs;
+  Optimizations["labs"] = &Abs;
+  Optimizations["llabs"] = &Abs;
+  Optimizations["isdigit"] = &IsDigit;
+  Optimizations["isascii"] = &IsAscii;
+  Optimizations["toascii"] = &ToAscii;
+
+  // Formatting and IO Optimizations
+  Optimizations["sprintf"] = &SPrintF;
+  Optimizations["printf"] = &PrintF;
+  Optimizations["fwrite"] = &FWrite;
+  Optimizations["fputs"] = &FPuts;
+  Optimizations["fprintf"] = &FPrintF;
+  Optimizations["puts"] = &Puts;
+}
+
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyLibCalls::runOnFunction(Function &F) {
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
+  if (Optimizations.empty())
+    InitOptimizations();
+
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  IRBuilder<> Builder(F.getContext());
+
+  bool Changed = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+      // Ignore non-calls.
+      CallInst *CI = dyn_cast<CallInst>(I++);
+      if (!CI) continue;
+
+      // Ignore indirect calls and calls to non-external functions.
+      Function *Callee = CI->getCalledFunction();
+      if (Callee == 0 || !Callee->isDeclaration() ||
+          !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
+        continue;
+
+      // Ignore unknown calls.
+      LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+      if (!LCO) continue;
+
+      // Set the builder to the instruction after the call.
+      Builder.SetInsertPoint(BB, I);
+
+      // Use debug location of CI for all new instructions.
+      Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+      // Try to optimize this call.
+      Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
+      if (Result == 0) continue;
+
+      DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
+            dbgs() << "  into: " << *Result << "\n");
+
+      // Something changed!
+      Changed = true;
+      ++NumSimplified;
+
+      // Inspect the instruction after the call (which was potentially just
+      // added) next.
+      I = CI; ++I;
+
+      if (CI != Result && !CI->use_empty()) {
+        CI->replaceAllUsesWith(Result);
+        if (!Result->hasName())
+          Result->takeName(CI);
+      }
+      CI->eraseFromParent();
+    }
+  }
+  return Changed;
+}
+
+// Utility methods for doInitialization.
+
+void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
+  if (!F.doesNotAccessMemory()) {
+    F.setDoesNotAccessMemory();
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
+  if (!F.onlyReadsMemory()) {
+    F.setOnlyReadsMemory();
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setDoesNotThrow(Function &F) {
+  if (!F.doesNotThrow()) {
+    F.setDoesNotThrow();
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
+  if (!F.doesNotCapture(n)) {
+    F.setDoesNotCapture(n);
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
+  if (!F.doesNotAlias(n)) {
+    F.setDoesNotAlias(n);
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+
+
+void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
+  const FunctionType *FTy = F.getFunctionType();
+  
+  StringRef Name = F.getName();
+  switch (Name[0]) {
+  case 's':
+    if (Name == "strlen") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "strchr" ||
+               Name == "strrchr") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isIntegerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+    } else if (Name == "strcpy" ||
+               Name == "stpcpy" ||
+               Name == "strcat" ||
+               Name == "strtol" ||
+               Name == "strtod" ||
+               Name == "strtof" ||
+               Name == "strtoul" ||
+               Name == "strtoll" ||
+               Name == "strtold" ||
+               Name == "strncat" ||
+               Name == "strncpy" ||
+               Name == "strtoull") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strxfrm") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strcmp" ||
+               Name == "strspn" ||
+               Name == "strncmp" ||
+               Name == "strcspn" ||
+               Name == "strcoll" ||
+               Name == "strcasecmp" ||
+               Name == "strncasecmp") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strstr" ||
+               Name == "strpbrk") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strtok" ||
+               Name == "strtok_r") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "scanf" ||
+               Name == "setbuf" ||
+               Name == "setvbuf") {
+      if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "strdup" ||
+               Name == "strndup") {
+      if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "stat" ||
+               Name == "sscanf" ||
+               Name == "sprintf" ||
+               Name == "statvfs") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "snprintf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "setitimer") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "system") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "system" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'm':
+    if (Name == "malloc") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "memcmp") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "memchr" ||
+               Name == "memrchr") {
+      if (FTy->getNumParams() != 3)
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+    } else if (Name == "modf" ||
+               Name == "modff" ||
+               Name == "modfl" ||
+               Name == "memcpy" ||
+               Name == "memccpy" ||
+               Name == "memmove") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "memalign") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotAlias(F, 0);
+    } else if (Name == "mkdir" ||
+               Name == "mktime") {
+      if (FTy->getNumParams() == 0 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'r':
+    if (Name == "realloc") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "read") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; "read" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 2);
+    } else if (Name == "rmdir" ||
+               Name == "rewind" ||
+               Name == "remove" ||
+               Name == "realpath") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "rename" ||
+               Name == "readlink") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'w':
+    if (Name == "write") {
+      if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; "write" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'b':
+    if (Name == "bcopy") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "bcmp") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "bzero") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'c':
+    if (Name == "calloc") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "chmod" ||
+               Name == "chown" ||
+               Name == "ctermid" ||
+               Name == "clearerr" ||
+               Name == "closedir") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'a':
+    if (Name == "atoi" ||
+        Name == "atol" ||
+        Name == "atof" ||
+        Name == "atoll") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "access") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'f':
+    if (Name == "fopen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "fdopen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "feof" ||
+               Name == "free" ||
+               Name == "fseek" ||
+               Name == "ftell" ||
+               Name == "fgetc" ||
+               Name == "fseeko" ||
+               Name == "ftello" ||
+               Name == "fileno" ||
+               Name == "fflush" ||
+               Name == "fclose" ||
+               Name == "fsetpos" ||
+               Name == "flockfile" ||
+               Name == "funlockfile" ||
+               Name == "ftrylockfile") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "ferror") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setOnlyReadsMemory(F);
+    } else if (Name == "fputc" ||
+               Name == "fstat" ||
+               Name == "frexp" ||
+               Name == "frexpf" ||
+               Name == "frexpl" ||
+               Name == "fstatvfs") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "fgets") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "fread" ||
+               Name == "fwrite") {
+      if (FTy->getNumParams() != 4 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(3)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 4);
+    } else if (Name == "fputs" ||
+               Name == "fscanf" ||
+               Name == "fprintf" ||
+               Name == "fgetpos") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'g':
+    if (Name == "getc" ||
+        Name == "getlogin_r" ||
+        Name == "getc_unlocked") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "getenv") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "gets" ||
+               Name == "getchar") {
+      setDoesNotThrow(F);
+    } else if (Name == "getitimer") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "getpwnam") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'u':
+    if (Name == "ungetc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "uname" ||
+               Name == "unlink" ||
+               Name == "unsetenv") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "utime" ||
+               Name == "utimes") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'p':
+    if (Name == "putc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "puts" ||
+               Name == "printf" ||
+               Name == "perror") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "pread" ||
+               Name == "pwrite") {
+      if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; these are valid pthread cancellation points.
+      setDoesNotCapture(F, 2);
+    } else if (Name == "putchar") {
+      setDoesNotThrow(F);
+    } else if (Name == "popen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "pclose") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'v':
+    if (Name == "vscanf") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "vsscanf" ||
+               Name == "vfscanf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "valloc") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "vprintf") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "vfprintf" ||
+               Name == "vsprintf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "vsnprintf") {
+      if (FTy->getNumParams() != 4 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 3);
+    }
+    break;
+  case 'o':
+    if (Name == "open") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "open" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    } else if (Name == "opendir") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 't':
+    if (Name == "tmpfile") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "times") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'h':
+    if (Name == "htonl" ||
+        Name == "htons") {
+      setDoesNotThrow(F);
+      setDoesNotAccessMemory(F);
+    }
+    break;
+  case 'n':
+    if (Name == "ntohl" ||
+        Name == "ntohs") {
+      setDoesNotThrow(F);
+      setDoesNotAccessMemory(F);
+    }
+    break;
+  case 'l':
+    if (Name == "lstat") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "lchown") {
+      if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'q':
+    if (Name == "qsort") {
+      if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+        return;
+      // May throw; places call through function pointer.
+      setDoesNotCapture(F, 4);
+    }
+    break;
+  case '_':
+    if (Name == "__strdup" ||
+        Name == "__strndup") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "__strtok_r") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "_IO_getc") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "_IO_putc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 1:
+    if (Name == "\1__isoc99_scanf") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "\1stat64" ||
+               Name == "\1lstat64" ||
+               Name == "\1statvfs64" ||
+               Name == "\1__isoc99_sscanf") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1fopen64") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1fseeko64" ||
+               Name == "\1ftello64") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "\1tmpfile64") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "\1fstat64" ||
+               Name == "\1fstatvfs64") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1open64") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "open" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  }
+}
+
+/// doInitialization - Add attributes to well-known functions.
+///
+bool SimplifyLibCalls::doInitialization(Module &M) {
+  Modified = false;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (F.isDeclaration() && F.hasName())
+      inferPrototypeAttributes(F);
+  }
+  return Modified;
+}
+
+// TODO:
+//   Additional cases that we need to add to this file:
+//
+// cbrt:
+//   * cbrt(expN(X))  -> expN(x/3)
+//   * cbrt(sqrt(x))  -> pow(x,1/6)
+//   * cbrt(sqrt(x))  -> pow(x,1/9)
+//
+// cos, cosf, cosl:
+//   * cos(-x)  -> cos(x)
+//
+// exp, expf, expl:
+//   * exp(log(x))  -> x
+//
+// log, logf, logl:
+//   * log(exp(x))   -> x
+//   * log(x**y)     -> y*log(x)
+//   * log(exp(y))   -> y*log(e)
+//   * log(exp2(y))  -> y*log(2)
+//   * log(exp10(y)) -> y*log(10)
+//   * log(sqrt(x))  -> 0.5*log(x)
+//   * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+//   * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+//   * pow(exp(x),y)  -> exp(x*y)
+//   * pow(sqrt(x),y) -> pow(x,y*0.5)
+//   * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+//   * round(cnst) -> cnst'
+//
+// signbit:
+//   * signbit(cnst) -> cnst'
+//   * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+//   * sqrt(expN(x))  -> expN(x*0.5)
+//   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+//   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// stpcpy:
+//   * stpcpy(str, "literal") ->
+//           llvm.memcpy(str,"literal",strlen("literal")+1,1)
+//
+// tan, tanf, tanl:
+//   * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+//   * trunc(cnst) -> cnst'
+//
+//
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 000000000000..705f44204900
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,274 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+
+namespace {
+  class Sinking : public FunctionPass {
+    DominatorTree *DT;
+    LoopInfo *LI;
+    AliasAnalysis *AA;
+
+  public:
+    static char ID; // Pass identification
+    Sinking() : FunctionPass(ID) {
+      initializeSinkingPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+    }
+  private:
+    bool ProcessBlock(BasicBlock &BB);
+    bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+    bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+  };
+} // end anonymous namespace
+  
+char Sinking::ID = 0;
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, 
+                                      BasicBlock *BB) const {
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+  for (Value::use_iterator I = Inst->use_begin(),
+       E = Inst->use_end(); I != E; ++I) {
+    // Determine the block of the use.
+    Instruction *UseInst = cast<Instruction>(*I);
+    BasicBlock *UseBlock = UseInst->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+      // PHI nodes use the operand in the predecessor block, not the block with
+      // the PHI.
+      unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+      UseBlock = PN->getIncomingBlock(Num);
+    }
+    // Check that it dominates.
+    if (!DT->dominates(BB, UseBlock))
+      return false;
+  }
+  return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTree>();
+  LI = &getAnalysis<LoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool EverMadeChange = false;
+  
+  while (1) {
+    bool MadeChange = false;
+
+    // Process all basic blocks.
+    for (Function::iterator I = F.begin(), E = F.end(); 
+         I != E; ++I)
+      MadeChange |= ProcessBlock(*I);
+    
+    // If this iteration over the code changed anything, keep iterating.
+    if (!MadeChange) break;
+    EverMadeChange = true;
+  } 
+  return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+  // Can't sink anything out of a block that has less than two successors.
+  if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+  // Don't bother sinking code out of unreachable blocks. In addition to being
+  // unprofitable, it can also lead to infinite looping, because in an unreachable
+  // loop there may be nowhere to stop.
+  if (!DT->isReachableFromEntry(&BB)) return false;
+
+  bool MadeChange = false;
+
+  // Walk the basic block bottom-up.  Remember if we saw a store.
+  BasicBlock::iterator I = BB.end();
+  --I;
+  bool ProcessedBegin = false;
+  SmallPtrSet<Instruction *, 8> Stores;
+  do {
+    Instruction *Inst = I;  // The instruction to sink.
+    
+    // Predecrement I (if it's not begin) so that it isn't invalidated by
+    // sinking.
+    ProcessedBegin = I == BB.begin();
+    if (!ProcessedBegin)
+      --I;
+
+    if (isa<DbgInfoIntrinsic>(Inst))
+      continue;
+
+    if (SinkInstruction(Inst, Stores))
+      ++NumSunk, MadeChange = true;
+    
+    // If we just processed the first instruction in the block, we're done.
+  } while (!ProcessedBegin);
+  
+  return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+                         SmallPtrSet<Instruction *, 8> &Stores) {
+  if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+    if (L->isVolatile()) return false;
+
+    AliasAnalysis::Location Loc = AA->getLocation(L);
+    for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+         E = Stores.end(); I != E; ++I)
+      if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
+        return false;
+  }
+
+  if (Inst->mayWriteToMemory()) {
+    Stores.insert(Inst);
+    return false;
+  }
+
+  if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+    return false;
+
+  return true;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+                              SmallPtrSet<Instruction *, 8> &Stores) {
+  // Check if it's safe to move the instruction.
+  if (!isSafeToMove(Inst, AA, Stores))
+    return false;
+  
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+  
+  // Loop over all the operands of the specified instruction.  If there is
+  // anything we can't handle, bail out.
+  BasicBlock *ParentBlock = Inst->getParent();
+  
+  // SuccToSinkTo - This is the successor to sink this instruction to, once we
+  // decide.
+  BasicBlock *SuccToSinkTo = 0;
+  
+  // FIXME: This picks a successor to sink into based on having one
+  // successor that dominates all the uses.  However, there are cases where
+  // sinking can happen but where the sink point isn't a successor.  For
+  // example:
+  //   x = computation
+  //   if () {} else {}
+  //   use x
+  // the instruction could be sunk over the whole diamond for the 
+  // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+  // after that.
+  
+  // Instructions can only be sunk if all their uses are in blocks
+  // dominated by one of the successors.
+  // Look at all the successors and decide which one
+  // we should sink to.
+  for (succ_iterator SI = succ_begin(ParentBlock),
+       E = succ_end(ParentBlock); SI != E; ++SI) {
+    if (AllUsesDominatedByBlock(Inst, *SI)) {
+      SuccToSinkTo = *SI;
+      break;
+    }
+  }
+      
+  // If we couldn't find a block to sink to, ignore this instruction.
+  if (SuccToSinkTo == 0)
+    return false;
+  
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (Inst->getParent() == SuccToSinkTo)
+    return false;
+  
+  DEBUG(dbgs() << "Sink instr " << *Inst);
+  DEBUG(dbgs() << "to block ";
+        WriteAsOperand(dbgs(), SuccToSinkTo, false));
+  
+  // If the block has multiple predecessors, this would introduce computation on
+  // a path that it doesn't already exist.  We could split the critical edge,
+  // but for now we just punt.
+  // FIXME: Split critical edges if not backedges.
+  if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    if (!Inst->isSafeToSpeculativelyExecute()) {
+      DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+      return false;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+      return false;
+    }
+
+    // Don't sink instructions into a loop.
+    if (LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+      return false;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    DEBUG(dbgs() << "Sinking along critical edge.\n");
+  }
+  
+  // Determine where to insert into.  Skip phi nodes.
+  BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+  while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
+    ++InsertPos;
+  
+  // Move the instruction.
+  Inst->moveBefore(InsertPos);
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
new file mode 100644
index 000000000000..9dd83c04fa61
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
@@ -0,0 +1,373 @@
+//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a limited form of tail duplication, intended to simplify
+// CFGs by removing some unconditional branches.  This pass is necessary to
+// straighten out loops created by the C front-end, but also is capable of
+// making other code nicer.  After this pass is run, the CFG simplify pass
+// should be run to clean up the mess.
+//
+// This pass could be enhanced in the future to use profile information to be
+// more aggressive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplicate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constant.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
+
+static cl::opt<unsigned>
+TailDupThreshold("taildup-threshold",
+                 cl::desc("Max block size to tail duplicate"),
+                 cl::init(1), cl::Hidden);
+
+namespace {
+  class TailDup : public FunctionPass {
+    bool runOnFunction(Function &F);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    TailDup() : FunctionPass(ID) {
+      initializeTailDupPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
+    inline void eliminateUnconditionalBranch(BranchInst *BI);
+    SmallPtrSet<BasicBlock*, 4> CycleDetector;
+  };
+}
+
+char TailDup::ID = 0;
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
+
+// Public interface to the Tail Duplication pass
+FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
+
+/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
+/// the function, eliminating it if it looks attractive enough.  CycleDetector
+/// prevents infinite loops by checking that we aren't redirecting a branch to
+/// a place it already pointed to earlier; see PR 2323.
+bool TailDup::runOnFunction(Function &F) {
+  bool Changed = false;
+  CycleDetector.clear();
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    if (shouldEliminateUnconditionalBranch(I->getTerminator(),
+                                           TailDupThreshold)) {
+      eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
+      Changed = true;
+    } else {
+      ++I;
+      CycleDetector.clear();
+    }
+  }
+  return Changed;
+}
+
+/// shouldEliminateUnconditionalBranch - Return true if this branch looks
+/// attractive to eliminate.  We eliminate the branch if the destination basic
+/// block has <= 5 instructions in it, not counting PHI nodes.  In practice,
+/// since one of these is a terminator instruction, this means that we will add
+/// up to 4 instructions to the new block.
+///
+/// We don't count PHI nodes in the count since they will be removed when the
+/// contents of the block are copied over.
+///
+bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
+                                                 unsigned Threshold) {
+  BranchInst *BI = dyn_cast<BranchInst>(TI);
+  if (!BI || !BI->isUnconditional()) return false;  // Not an uncond branch!
+
+  BasicBlock *Dest = BI->getSuccessor(0);
+  if (Dest == BI->getParent()) return false;        // Do not loop infinitely!
+
+  // Do not inline a block if we will just get another branch to the same block!
+  TerminatorInst *DTI = Dest->getTerminator();
+  if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
+    if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
+      return false;                                 // Do not loop infinitely!
+
+  // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
+  // because doing so would require breaking critical edges.  This should be
+  // fixed eventually.
+  if (!DTI->use_empty())
+    return false;
+
+  // Do not bother with blocks with only a single predecessor: simplify
+  // CFG will fold these two blocks together!
+  pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
+  ++PI;
+  if (PI == PE) return false;  // Exactly one predecessor!
+
+  BasicBlock::iterator I = Dest->getFirstNonPHI();
+
+  for (unsigned Size = 0; I != Dest->end(); ++I) {
+    if (Size == Threshold) return false;  // The block is too large.
+    
+    // Don't tail duplicate call instructions.  They are very large compared to
+    // other instructions.
+    if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
+
+    // Also alloca and malloc.
+    if (isa<AllocaInst>(I)) return false;
+
+    // Some vector instructions can expand into a number of instructions.
+    if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
+        isa<InsertElementInst>(I)) return false;
+    
+    // Only count instructions that are not debugger intrinsics.
+    if (!isa<DbgInfoIntrinsic>(I)) ++Size;
+  }
+
+  // Do not tail duplicate a block that has thousands of successors into a block
+  // with a single successor if the block has many other predecessors.  This can
+  // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
+  // cases that have a large number of indirect gotos.
+  unsigned NumSuccs = DTI->getNumSuccessors();
+  if (NumSuccs > 8) {
+    unsigned TooMany = 128;
+    if (NumSuccs >= TooMany) return false;
+    TooMany = TooMany/NumSuccs;
+    for (; PI != PE; ++PI)
+      if (TooMany-- == 0) return false;
+  }
+  
+  // If this unconditional branch is a fall-through, be careful about
+  // tail duplicating it.  In particular, we don't want to taildup it if the
+  // original block will still be there after taildup is completed: doing so
+  // would eliminate the fall-through, requiring unconditional branches.
+  Function::iterator DestI = Dest;
+  if (&*--DestI == BI->getParent()) {
+    // The uncond branch is a fall-through.  Tail duplication of the block is
+    // will eliminate the fall-through-ness and end up cloning the terminator
+    // at the end of the Dest block.  Since the original Dest block will
+    // continue to exist, this means that one or the other will not be able to
+    // fall through.  One typical example that this helps with is code like:
+    // if (a)
+    //   foo();
+    // if (b)
+    //   foo();
+    // Cloning the 'if b' block into the end of the first foo block is messy.
+    
+    // The messy case is when the fall-through block falls through to other
+    // blocks.  This is what we would be preventing if we cloned the block.
+    DestI = Dest;
+    if (++DestI != Dest->getParent()->end()) {
+      BasicBlock *DestSucc = DestI;
+      // If any of Dest's successors are fall-throughs, don't do this xform.
+      for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
+           SI != SE; ++SI)
+        if (*SI == DestSucc)
+          return false;
+    }
+  }
+
+  // Finally, check that we haven't redirected to this target block earlier;
+  // there are cases where we loop forever if we don't check this (PR 2323).
+  if (!CycleDetector.insert(Dest))
+    return false;
+
+  return true;
+}
+
+/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
+/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock.  If we
+/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
+/// DstBlock, return it.
+static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
+                                          BasicBlock *DstBlock) {
+  // SrcBlock must have a single predecessor.
+  pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
+  if (PI == PE || ++PI != PE) return 0;
+
+  BasicBlock *SrcPred = *pred_begin(SrcBlock);
+
+  // Look at the predecessors of DstBlock.  One of them will be SrcBlock.  If
+  // there is only one other pred, get it, otherwise we can't handle it.
+  PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
+  BasicBlock *DstOtherPred = 0;
+  BasicBlock *P = *PI;
+  if (P == SrcBlock) {
+    if (++PI == PE) return 0;
+    DstOtherPred = *PI;
+    if (++PI != PE) return 0;
+  } else {
+    DstOtherPred = P;
+    if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
+  }
+
+  // We can handle two situations here: "if then" and "if then else" blocks.  An
+  // 'if then' situation is just where DstOtherPred == SrcPred.
+  if (DstOtherPred == SrcPred)
+    return SrcPred;
+
+  // Check to see if we have an "if then else" situation, which means that
+  // DstOtherPred will have a single predecessor and it will be SrcPred.
+  PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
+  if (PI != PE && *PI == SrcPred) {
+    if (++PI != PE) return 0;  // Not a single pred.
+    return SrcPred;  // Otherwise, it's an "if then" situation.  Return the if.
+  }
+
+  // Otherwise, this is something we can't handle.
+  return 0;
+}
+
+
+/// eliminateUnconditionalBranch - Clone the instructions from the destination
+/// block into the source block, eliminating the specified unconditional branch.
+/// If the destination block defines values used by successors of the dest
+/// block, we may need to insert PHI nodes.
+///
+void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
+  BasicBlock *SourceBlock = Branch->getParent();
+  BasicBlock *DestBlock = Branch->getSuccessor(0);
+  assert(SourceBlock != DestBlock && "Our predicate is broken!");
+
+  DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
+        << "]: Eliminating branch: " << *Branch);
+
+  // See if we can avoid duplicating code by moving it up to a dominator of both
+  // blocks.
+  if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
+    DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
+
+    // If there are non-phi instructions in DestBlock that have no operands
+    // defined in DestBlock, and if the instruction has no side effects, we can
+    // move the instruction to DomBlock instead of duplicating it.
+    BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
+    while (!isa<TerminatorInst>(BBI)) {
+      Instruction *I = BBI++;
+
+      bool CanHoist = I->isSafeToSpeculativelyExecute() &&
+                      !I->mayReadFromMemory();
+      if (CanHoist) {
+        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+          if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
+            if (OpI->getParent() == DestBlock ||
+                (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
+              CanHoist = false;
+              break;
+            }
+        if (CanHoist) {
+          // Remove from DestBlock, move right before the term in DomBlock.
+          DestBlock->getInstList().remove(I);
+          DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
+          DEBUG(dbgs() << "Hoisted: " << *I);
+        }
+      }
+    }
+  }
+
+  // Tail duplication can not update SSA properties correctly if the values
+  // defined in the duplicated tail are used outside of the tail itself.  For
+  // this reason, we spill all values that are used outside of the tail to the
+  // stack.
+  for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
+    if (I->isUsedOutsideOfBlock(DestBlock)) {
+      // We found a use outside of the tail.  Create a new stack slot to
+      // break this inter-block usage pattern.
+      DemoteRegToStack(*I);
+    }
+
+  // We are going to have to map operands from the original block B to the new
+  // copy of the block B'.  If there are PHI nodes in the DestBlock, these PHI
+  // nodes also define part of this mapping.  Loop over these PHI nodes, adding
+  // them to our mapping.
+  //
+  std::map<Value*, Value*> ValueMapping;
+
+  BasicBlock::iterator BI = DestBlock->begin();
+  bool HadPHINodes = isa<PHINode>(BI);
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
+
+  // Clone the non-phi instructions of the dest block into the source block,
+  // keeping track of the mapping...
+  //
+  for (; BI != DestBlock->end(); ++BI) {
+    Instruction *New = BI->clone();
+    New->setName(BI->getName());
+    SourceBlock->getInstList().push_back(New);
+    ValueMapping[BI] = New;
+  }
+
+  // Now that we have built the mapping information and cloned all of the
+  // instructions (giving us a new terminator, among other things), walk the new
+  // instructions, rewriting references of old instructions to use new
+  // instructions.
+  //
+  BI = Branch; ++BI;  // Get an iterator to the first new instruction
+  for (; BI != SourceBlock->end(); ++BI)
+    for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+      std::map<Value*, Value*>::const_iterator I =
+        ValueMapping.find(BI->getOperand(i));
+      if (I != ValueMapping.end())
+        BI->setOperand(i, I->second);
+    }
+
+  // Next we check to see if any of the successors of DestBlock had PHI nodes.
+  // If so, we need to add entries to the PHI nodes for SourceBlock now.
+  for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
+       SI != SE; ++SI) {
+    BasicBlock *Succ = *SI;
+    for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
+      PHINode *PN = cast<PHINode>(PNI);
+      // Ok, we have a PHI node.  Figure out what the incoming value was for the
+      // DestBlock.
+      Value *IV = PN->getIncomingValueForBlock(DestBlock);
+
+      // Remap the value if necessary...
+      std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
+      if (I != ValueMapping.end())
+        IV = I->second;
+      PN->addIncoming(IV, SourceBlock);
+    }
+  }
+
+  // Next, remove the old branch instruction, and any PHI node entries that we
+  // had.
+  BI = Branch; ++BI;  // Get an iterator to the first new instruction
+  DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
+  SourceBlock->getInstList().erase(Branch);  // Destroy the uncond branch...
+
+  // Final step: now that we have finished everything up, walk the cloned
+  // instructions one last time, constant propagating and DCE'ing them, because
+  // they may not be needed anymore.
+  //
+  if (HadPHINodes) {
+    while (BI != SourceBlock->end()) {
+      Instruction *Inst = BI++;
+      if (isInstructionTriviallyDead(Inst))
+        Inst->eraseFromParent();
+      else if (Value *V = SimplifyInstruction(Inst)) {
+        Inst->replaceAllUsesWith(V);
+        Inst->eraseFromParent();
+      }
+    }
+  }
+
+  ++NumEliminated;  // We just killed a branch!
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
new file mode 100644
index 000000000000..e21eb9dca270
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -0,0 +1,634 @@
+//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms calls of the current function (self recursion) followed
+// by a return instruction with a branch to the entry of the function, creating
+// a loop.  This pass also implements the following extensions to the basic
+// algorithm:
+//
+//  1. Trivial instructions between the call and return do not prevent the
+//     transformation from taking place, though currently the analysis cannot
+//     support moving any really useful instructions (only dead ones).
+//  2. This pass transforms functions that are prevented from being tail
+//     recursive by an associative and commutative expression to use an
+//     accumulator variable, thus compiling the typical naive factorial or
+//     'fib' implementation into efficient code.
+//  3. TRE is performed if the function returns void, if the return
+//     returns the result returned by the call, or if the function returns a
+//     run-time constant on all exits from the function.  It is possible, though
+//     unlikely, that the return returns something else (like constant 0), and
+//     can still be TRE'd.  It can be TRE'd if ALL OTHER return instructions in
+//     the function return the exact same value.
+//  4. If it can prove that callees do not access their caller stack frame,
+//     they are marked as eligible for tail call elimination (by the code
+//     generator).
+//
+// There are several improvements that could be made:
+//
+//  1. If the function has any alloca instructions, these instructions will be
+//     moved out of the entry block of the function, causing them to be
+//     evaluated each time through the tail recursion.  Safely keeping allocas
+//     in the entry block requires analysis to proves that the tail-called
+//     function does not read or write the stack object.
+//  2. Tail recursion is only performed if the call immediately precedes the
+//     return instruction.  It's possible that there could be a jump between
+//     the call and the return.
+//  3. There can be intervening operations between the call and the return that
+//     prevent the TRE from occurring.  For example, there could be GEP's and
+//     stores to memory that will not be read or written by the call.  This
+//     requires some substantial analysis (such as with DSA) to prove safe to
+//     move ahead of the call, but doing so could allow many more TREs to be
+//     performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
+//  4. The algorithm we use to detect if callees access their caller stack
+//     frames is very primitive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailcallelim"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped,   "Number of return duplicated");
+STATISTIC(NumAccumAdded, "Number of accumulators introduced");
+
+namespace {
+  struct TailCallElim : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    TailCallElim() : FunctionPass(ID) {
+      initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+  private:
+    CallInst *FindTRECandidate(Instruction *I,
+                               bool CannotTailCallElimCallsMarkedTail);
+    bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+                                    BasicBlock *&OldEntry,
+                                    bool &TailCallsAreMarkedTail,
+                                    SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                    bool CannotTailCallElimCallsMarkedTail);
+    bool FoldReturnAndProcessPred(BasicBlock *BB,
+                                  ReturnInst *Ret, BasicBlock *&OldEntry,
+                                  bool &TailCallsAreMarkedTail,
+                                  SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                  bool CannotTailCallElimCallsMarkedTail);
+    bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
+                               bool &TailCallsAreMarkedTail,
+                               SmallVector<PHINode*, 8> &ArgumentPHIs,
+                               bool CannotTailCallElimCallsMarkedTail);
+    bool CanMoveAboveCall(Instruction *I, CallInst *CI);
+    Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
+  };
+}
+
+char TailCallElim::ID = 0;
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+                "Tail Call Elimination", false, false)
+
+// Public interface to the TailCallElimination pass
+FunctionPass *llvm::createTailCallEliminationPass() {
+  return new TailCallElim();
+}
+
+/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
+/// callees of this function.  We only do very simple analysis right now, this
+/// could be expanded in the future to use mod/ref information for particular
+/// call sites if desired.
+static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
+  // FIXME: do simple 'address taken' analysis.
+  return true;
+}
+
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
+/// instructions.  If it contains any that might be accessed by calls, return
+/// true.
+static bool CheckForEscapingAllocas(BasicBlock *BB,
+                                    bool &CannotTCETailMarkedCall) {
+  bool RetVal = false;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+      RetVal |= AllocaMightEscapeToCalls(AI);
+
+      // If this alloca is in the body of the function, or if it is a variable
+      // sized allocation, we cannot tail call eliminate calls marked 'tail'
+      // with this mechanism.
+      if (BB != &BB->getParent()->getEntryBlock() ||
+          !isa<ConstantInt>(AI->getArraySize()))
+        CannotTCETailMarkedCall = true;
+    }
+  return RetVal;
+}
+
+bool TailCallElim::runOnFunction(Function &F) {
+  // If this function is a varargs function, we won't be able to PHI the args
+  // right, so don't even try to convert it...
+  if (F.getFunctionType()->isVarArg()) return false;
+
+  BasicBlock *OldEntry = 0;
+  bool TailCallsAreMarkedTail = false;
+  SmallVector<PHINode*, 8> ArgumentPHIs;
+  bool MadeChange = false;
+  bool FunctionContainsEscapingAllocas = false;
+
+  // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+  // marked with the 'tail' attribute, because doing so would cause the stack
+  // size to increase (real TCE would deallocate variable sized allocas, TCE
+  // doesn't).
+  bool CannotTCETailMarkedCall = false;
+
+  // Loop over the function, looking for any returning blocks, and keeping track
+  // of whether this function has any non-trivially used allocas.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
+      break;
+
+    FunctionContainsEscapingAllocas |=
+      CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+  }
+  
+  /// FIXME: The code generator produces really bad code when an 'escaping
+  /// alloca' is changed from being a static alloca to being a dynamic alloca.
+  /// Until this is resolved, disable this transformation if that would ever
+  /// happen.  This bug is PR962.
+  if (FunctionContainsEscapingAllocas)
+    return false;
+
+  // Second pass, change any tail calls to loops.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+                                          ArgumentPHIs,CannotTCETailMarkedCall);
+      if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+        Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+                                          TailCallsAreMarkedTail, ArgumentPHIs,
+                                          CannotTCETailMarkedCall);
+      MadeChange |= Change;
+    }
+  }
+
+  // If we eliminated any tail recursions, it's possible that we inserted some
+  // silly PHI nodes which just merge an initial value (the incoming operand)
+  // with themselves.  Check to see if we did and clean up our mess if so.  This
+  // occurs when a function passes an argument straight through to its tail
+  // call.
+  if (!ArgumentPHIs.empty()) {
+    for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+      PHINode *PN = ArgumentPHIs[i];
+
+      // If the PHI Node is a dynamic constant, replace it with the value it is.
+      if (Value *PNV = SimplifyInstruction(PN)) {
+        PN->replaceAllUsesWith(PNV);
+        PN->eraseFromParent();
+      }
+    }
+  }
+
+  // Finally, if this function contains no non-escaping allocas, or calls
+  // setjmp, mark all calls in the function as eligible for tail calls
+  //(there is no stack memory for them to access).
+  if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          CI->setTailCall();
+          MadeChange = true;
+        }
+
+  return MadeChange;
+}
+
+
+/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// instruction from after the call to before the call, assuming that all
+/// instructions between the call and this instruction are movable.
+///
+bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+  // FIXME: We can move load/store/call/free instructions above the call if the
+  // call does not mod/ref the memory location being processed.
+  if (I->mayHaveSideEffects())  // This also handles volatile loads.
+    return false;
+  
+  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+    // Loads may always be moved above calls without side effects.
+    if (CI->mayHaveSideEffects()) {
+      // Non-volatile loads may be moved above a call with side effects if it
+      // does not write to memory and the load provably won't trap.
+      // FIXME: Writes to memory only matter if they may alias the pointer
+      // being loaded from.
+      if (CI->mayWriteToMemory() ||
+          !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
+                                       L->getAlignment()))
+        return false;
+    }
+  }
+
+  // Otherwise, if this is a side-effect free instruction, check to make sure
+  // that it does not use the return value of the call.  If it doesn't use the
+  // return value of the call, it must only use things that are defined before
+  // the call, or movable instructions between the call and the instruction
+  // itself.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (I->getOperand(i) == CI)
+      return false;
+  return true;
+}
+
+// isDynamicConstant - Return true if the specified value is the same when the
+// return would exit as it was when the initial iteration of the recursive
+// function was executed.
+//
+// We currently handle static constants and arguments that are not modified as
+// part of the recursion.
+//
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
+  if (isa<Constant>(V)) return true; // Static constants are always dyn consts
+
+  // Check to see if this is an immutable argument, if so, the value
+  // will be available to initialize the accumulator.
+  if (Argument *Arg = dyn_cast<Argument>(V)) {
+    // Figure out which argument number this is...
+    unsigned ArgNo = 0;
+    Function *F = CI->getParent()->getParent();
+    for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI)
+      ++ArgNo;
+
+    // If we are passing this argument into call as the corresponding
+    // argument operand, then the argument is dynamically constant.
+    // Otherwise, we cannot transform this function safely.
+    if (CI->getArgOperand(ArgNo) == Arg)
+      return true;
+  }
+
+  // Switch cases are always constant integers. If the value is being switched
+  // on and the return is only reachable from one of its cases, it's
+  // effectively constant.
+  if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+      if (SI->getCondition() == V)
+        return SI->getDefaultDest() != RI->getParent();
+
+  // Not a constant or immutable argument, we can't safely transform.
+  return false;
+}
+
+// getCommonReturnValue - Check to see if the function containing the specified
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI.  If so, return the returned value.
+//
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+  Function *F = CI->getParent()->getParent();
+  Value *ReturnedValue = 0;
+
+  for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+    ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+    if (RI == 0 || RI == IgnoreRI) continue;
+
+    // We can only perform this transformation if the value returned is
+    // evaluatable at the start of the initial invocation of the function,
+    // instead of at the end of the evaluation.
+    //
+    Value *RetOp = RI->getOperand(0);
+    if (!isDynamicConstant(RetOp, CI, RI))
+      return 0;
+
+    if (ReturnedValue && RetOp != ReturnedValue)
+      return 0;     // Cannot transform if differing values are returned.
+    ReturnedValue = RetOp;
+  }
+  return ReturnedValue;
+}
+
+/// CanTransformAccumulatorRecursion - If the specified instruction can be
+/// transformed using accumulator recursion elimination, return the constant
+/// which is the start of the accumulator value.  Otherwise return null.
+///
+Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
+                                                      CallInst *CI) {
+  if (!I->isAssociative() || !I->isCommutative()) return 0;
+  assert(I->getNumOperands() == 2 &&
+         "Associative/commutative operations should have 2 args!");
+
+  // Exactly one operand should be the result of the call instruction.
+  if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
+      (I->getOperand(0) != CI && I->getOperand(1) != CI))
+    return 0;
+
+  // The only user of this instruction we allow is a single return instruction.
+  if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
+    return 0;
+
+  // Ok, now we have to check all of the other return instructions in this
+  // function.  If they return non-constants or differing values, then we cannot
+  // transform the function safely.
+  return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+}
+
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+  while (isa<DbgInfoIntrinsic>(I))
+    ++I;
+  return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+                               bool CannotTailCallElimCallsMarkedTail) {
+  BasicBlock *BB = TI->getParent();
+  Function *F = BB->getParent();
+
+  if (&BB->front() == TI) // Make sure there is something before the terminator.
+    return 0;
+  
+  // Scan backwards from the return, checking to see if there is a tail call in
+  // this block.  If so, set CI to it.
+  CallInst *CI = 0;
+  BasicBlock::iterator BBI = TI;
+  while (true) {
+    CI = dyn_cast<CallInst>(BBI);
+    if (CI && CI->getCalledFunction() == F)
+      break;
+
+    if (BBI == BB->begin())
+      return 0;          // Didn't find a potential tail call.
+    --BBI;
+  }
+
+  // If this call is marked as a tail call, and if there are dynamic allocas in
+  // the function, we cannot perform this optimization.
+  if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
+    return 0;
+
+  // As a special case, detect code like this:
+  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+  // and disable this xform in this case, because the code generator will
+  // lower the call to fabs into inline code.
+  if (BB == &F->getEntryBlock() && 
+      FirstNonDbg(BB->front()) == CI &&
+      FirstNonDbg(llvm::next(BB->begin())) == TI &&
+      callIsSmall(F)) {
+    // A single-block function with just a call and a return. Check that
+    // the arguments match.
+    CallSite::arg_iterator I = CallSite(CI).arg_begin(),
+                           E = CallSite(CI).arg_end();
+    Function::arg_iterator FI = F->arg_begin(),
+                           FE = F->arg_end();
+    for (; I != E && FI != FE; ++I, ++FI)
+      if (*I != &*FI) break;
+    if (I == E && FI == FE)
+      return 0;
+  }
+
+  return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+                                       BasicBlock *&OldEntry,
+                                       bool &TailCallsAreMarkedTail,
+                                       SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  // If we are introducing accumulator recursion to eliminate operations after
+  // the call instruction that are both associative and commutative, the initial
+  // value for the accumulator is placed in this variable.  If this value is set
+  // then we actually perform accumulator recursion elimination instead of
+  // simple tail recursion elimination.  If the operation is an LLVM instruction
+  // (eg: "add") then it is recorded in AccumulatorRecursionInstr.  If not, then
+  // we are handling the case when the return instruction returns a constant C
+  // which is different to the constant returned by other return instructions
+  // (which is recorded in AccumulatorRecursionEliminationInitVal).  This is a
+  // special case of accumulator recursion, the operation being "return C".
+  Value *AccumulatorRecursionEliminationInitVal = 0;
+  Instruction *AccumulatorRecursionInstr = 0;
+
+  // Ok, we found a potential tail call.  We can currently only transform the
+  // tail call if all of the instructions between the call and the return are
+  // movable to above the call itself, leaving the call next to the return.
+  // Check that this is the case now.
+  BasicBlock::iterator BBI = CI;
+  for (++BBI; &*BBI != Ret; ++BBI) {
+    if (CanMoveAboveCall(BBI, CI)) continue;
+    
+    // If we can't move the instruction above the call, it might be because it
+    // is an associative and commutative operation that could be transformed
+    // using accumulator recursion elimination.  Check to see if this is the
+    // case, and if so, remember the initial accumulator value for later.
+    if ((AccumulatorRecursionEliminationInitVal =
+                           CanTransformAccumulatorRecursion(BBI, CI))) {
+      // Yes, this is accumulator recursion.  Remember which instruction
+      // accumulates.
+      AccumulatorRecursionInstr = BBI;
+    } else {
+      return false;   // Otherwise, we cannot eliminate the tail recursion!
+    }
+  }
+
+  // We can only transform call/return pairs that either ignore the return value
+  // of the call and return void, ignore the value of the call and return a
+  // constant, return the value returned by the tail call, or that are being
+  // accumulator recursion variable eliminated.
+  if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
+      !isa<UndefValue>(Ret->getReturnValue()) &&
+      AccumulatorRecursionEliminationInitVal == 0 &&
+      !getCommonReturnValue(0, CI)) {
+    // One case remains that we are able to handle: the current return
+    // instruction returns a constant, and all other return instructions
+    // return a different constant.
+    if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
+      return false; // Current return instruction does not return a constant.
+    // Check that all other return instructions return a common constant.  If
+    // so, record it in AccumulatorRecursionEliminationInitVal.
+    AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
+    if (!AccumulatorRecursionEliminationInitVal)
+      return false;
+  }
+
+  BasicBlock *BB = Ret->getParent();
+  Function *F = BB->getParent();
+
+  // OK! We can transform this tail call.  If this is the first one found,
+  // create the new entry block, allowing us to branch back to the old entry.
+  if (OldEntry == 0) {
+    OldEntry = &F->getEntryBlock();
+    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
+    NewEntry->takeName(OldEntry);
+    OldEntry->setName("tailrecurse");
+    BranchInst::Create(OldEntry, NewEntry);
+
+    // If this tail call is marked 'tail' and if there are any allocas in the
+    // entry block, move them up to the new entry block.
+    TailCallsAreMarkedTail = CI->isTailCall();
+    if (TailCallsAreMarkedTail)
+      // Move all fixed sized allocas from OldEntry to NewEntry.
+      for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
+             NEBI = NewEntry->begin(); OEBI != E; )
+        if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+          if (isa<ConstantInt>(AI->getArraySize()))
+            AI->moveBefore(NEBI);
+
+    // Now that we have created a new block, which jumps to the entry
+    // block, insert a PHI node for each argument of the function.
+    // For now, we initialize each PHI to only have the real arguments
+    // which are passed in.
+    Instruction *InsertPos = OldEntry->begin();
+    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I) {
+      PHINode *PN = PHINode::Create(I->getType(), 2,
+                                    I->getName() + ".tr", InsertPos);
+      I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
+      PN->addIncoming(I, NewEntry);
+      ArgumentPHIs.push_back(PN);
+    }
+  }
+
+  // If this function has self recursive calls in the tail position where some
+  // are marked tail and some are not, only transform one flavor or another.  We
+  // have to choose whether we move allocas in the entry block to the new entry
+  // block or not, so we can't make a good choice for both.  NOTE: We could do
+  // slightly better here in the case that the function has no entry block
+  // allocas.
+  if (TailCallsAreMarkedTail && !CI->isTailCall())
+    return false;
+
+  // Ok, now that we know we have a pseudo-entry block WITH all of the
+  // required PHI nodes, add entries into the PHI node for the actual
+  // parameters passed into the tail-recursive call.
+  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+    ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
+
+  // If we are introducing an accumulator variable to eliminate the recursion,
+  // do so now.  Note that we _know_ that no subsequent tail recursion
+  // eliminations will happen on this function because of the way the
+  // accumulator recursion predicate is set up.
+  //
+  if (AccumulatorRecursionEliminationInitVal) {
+    Instruction *AccRecInstr = AccumulatorRecursionInstr;
+    // Start by inserting a new PHI node for the accumulator.
+    pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
+    PHINode *AccPN =
+      PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+                      std::distance(PB, PE) + 1,
+                      "accumulator.tr", OldEntry->begin());
+
+    // Loop over all of the predecessors of the tail recursion block.  For the
+    // real entry into the function we seed the PHI with the initial value,
+    // computed earlier.  For any other existing branches to this block (due to
+    // other tail recursions eliminated) the accumulator is not modified.
+    // Because we haven't added the branch in the current block to OldEntry yet,
+    // it will not show up as a predecessor.
+    for (pred_iterator PI = PB; PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      if (P == &F->getEntryBlock())
+        AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
+      else
+        AccPN->addIncoming(AccPN, P);
+    }
+
+    if (AccRecInstr) {
+      // Add an incoming argument for the current block, which is computed by
+      // our associative and commutative accumulator instruction.
+      AccPN->addIncoming(AccRecInstr, BB);
+
+      // Next, rewrite the accumulator recursion instruction so that it does not
+      // use the result of the call anymore, instead, use the PHI node we just
+      // inserted.
+      AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+    } else {
+      // Add an incoming argument for the current block, which is just the
+      // constant returned by the current return instruction.
+      AccPN->addIncoming(Ret->getReturnValue(), BB);
+    }
+
+    // Finally, rewrite any return instructions in the program to return the PHI
+    // node instead of the "initval" that they do currently.  This loop will
+    // actually rewrite the return value we are destroying, but that's ok.
+    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+        RI->setOperand(0, AccPN);
+    ++NumAccumAdded;
+  }
+
+  // Now that all of the PHI nodes are in place, remove the call and
+  // ret instructions, replacing them with an unconditional branch.
+  BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+  NewBI->setDebugLoc(CI->getDebugLoc());
+
+  BB->getInstList().erase(Ret);  // Remove return.
+  BB->getInstList().erase(CI);   // Remove call.
+  ++NumEliminated;
+  return true;
+}
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+                                       ReturnInst *Ret, BasicBlock *&OldEntry,
+                                       bool &TailCallsAreMarkedTail,
+                                       SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  bool Change = false;
+
+  // If the return block contains nothing but the return and PHI's,
+  // there might be an opportunity to duplicate the return in its
+  // predecessors and perform TRC there. Look for predecessors that end
+  // in unconditional branch and recursive call(s).
+  SmallVector<BranchInst*, 8> UncondBranchPreds;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *Pred = *PI;
+    TerminatorInst *PTI = Pred->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+      if (BI->isUnconditional())
+        UncondBranchPreds.push_back(BI);
+  }
+
+  while (!UncondBranchPreds.empty()) {
+    BranchInst *BI = UncondBranchPreds.pop_back_val();
+    BasicBlock *Pred = BI->getParent();
+    if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+      DEBUG(dbgs() << "FOLDING: " << *BB
+            << "INTO UNCOND BRANCH PRED: " << *Pred);
+      EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+                                 OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+                                 CannotTailCallElimCallsMarkedTail);
+      ++NumRetDuped;
+      Change = true;
+    }
+  }
+
+  return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+                                         bool &TailCallsAreMarkedTail,
+                                         SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+  if (!CI)
+    return false;
+
+  return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+                                    ArgumentPHIs,
+                                    CannotTailCallElimCallsMarkedTail);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
new file mode 100644
index 000000000000..be7bed1cecdf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -0,0 +1,582 @@
+//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target addressing mode matcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+  bool NeedPlus = false;
+  OS << "[";
+  if (BaseGV) {
+    OS << (NeedPlus ? " + " : "")
+       << "GV:";
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  if (BaseOffs)
+    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+  if (BaseReg) {
+    OS << (NeedPlus ? " + " : "")
+       << "Base:";
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+  if (Scale) {
+    OS << (NeedPlus ? " + " : "")
+       << Scale << "*";
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  OS << ']';
+}
+
+void ExtAddrMode::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+                                             unsigned Depth) {
+  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+  // mode.  Just process that directly.
+  if (Scale == 1)
+    return MatchAddr(ScaleReg, Depth);
+  
+  // If the scale is 0, it takes nothing to add this.
+  if (Scale == 0)
+    return true;
+  
+  // If we already have a scale of this value, we can add to it, otherwise, we
+  // need an available scale field.
+  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+    return false;
+
+  ExtAddrMode TestAddrMode = AddrMode;
+
+  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
+  // [A+B + A*7] -> [B+A*8].
+  TestAddrMode.Scale += Scale;
+  TestAddrMode.ScaledReg = ScaleReg;
+
+  // If the new address isn't legal, bail out.
+  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+    return false;
+
+  // It was legal, so commit it.
+  AddrMode = TestAddrMode;
+  
+  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
+  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
+  // X*Scale + C*Scale to addr mode.
+  ConstantInt *CI = 0; Value *AddLHS = 0;
+  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
+      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.ScaledReg = AddLHS;
+    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+      
+    // If this addressing mode is legal, commit it and remember that we folded
+    // this instruction.
+    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+      AddrMode = TestAddrMode;
+      return true;
+    }
+  }
+
+  // Otherwise, not (x+c)*scale, just return what we have.
+  return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it.  This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::BitCast:
+    // Don't touch identity bitcasts.
+    if (I->getType() == I->getOperand(0)->getType())
+      return false;
+    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return true;
+  case Instruction::IntToPtr:
+    // We know the input is intptr_t, so this is foldable.
+    return true;
+  case Instruction::Add:
+    return true;
+  case Instruction::Mul:
+  case Instruction::Shl:
+    // Can only handle X*C and X << C.
+    return isa<ConstantInt>(I->getOperand(1));
+  case Instruction::GetElementPtr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode.  If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+                                               unsigned Depth) {
+  // Avoid exponential behavior on extremely deep expression trees.
+  if (Depth >= 5) return false;
+  
+  switch (Opcode) {
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return MatchAddr(AddrInst->getOperand(0), Depth);
+  case Instruction::IntToPtr:
+    // This inttoptr is a no-op if the integer type is pointer sized.
+    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+        TLI.getPointerTy())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::BitCast:
+    // BitCast is always a noop, and we can handle it as long as it is
+    // int->int or pointer->pointer (we don't want int<->fp or something).
+    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+        // Don't touch identity bitcasts.  These were probably put here by LSR,
+        // and we don't want to mess around with them.  Assume it knows what it
+        // is doing.
+        AddrInst->getOperand(0)->getType() != AddrInst->getType())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::Add: {
+    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(0), Depth+1))
+      return true;
+    
+    // Restore the old addr mode info.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    
+    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
+    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(1), Depth+1))
+      return true;
+    
+    // Otherwise we definitely can't merge the ADD in.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    break;
+  }
+  //case Instruction::Or:
+  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+  //break;
+  case Instruction::Mul:
+  case Instruction::Shl: {
+    // Can only handle X*C and X << C.
+    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+    if (!RHS) return false;
+    int64_t Scale = RHS->getSExtValue();
+    if (Opcode == Instruction::Shl)
+      Scale = 1LL << Scale;
+    
+    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+  }
+  case Instruction::GetElementPtr: {
+    // Scan the GEP.  We check it if it contains constant offsets and at most
+    // one variable offset.
+    int VariableOperand = -1;
+    unsigned VariableScale = 0;
+    
+    int64_t ConstantOffset = 0;
+    const TargetData *TD = TLI.getTargetData();
+    gep_type_iterator GTI = gep_type_begin(AddrInst);
+    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx =
+          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+        ConstantOffset += SL->getElementOffset(Idx);
+      } else {
+        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+          ConstantOffset += CI->getSExtValue()*TypeSize;
+        } else if (TypeSize) {  // Scales of zero don't do anything.
+          // We only allow one variable index at the moment.
+          if (VariableOperand != -1)
+            return false;
+          
+          // Remember the variable index.
+          VariableOperand = i;
+          VariableScale = TypeSize;
+        }
+      }
+    }
+    
+    // A common case is for the GEP to only do a constant offset.  In this case,
+    // just add it to the disp field and check validity.
+    if (VariableOperand == -1) {
+      AddrMode.BaseOffs += ConstantOffset;
+      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+        // Check to see if we can fold the base pointer in too.
+        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+          return true;
+      }
+      AddrMode.BaseOffs -= ConstantOffset;
+      return false;
+    }
+
+    // Save the valid addressing mode in case we can't match.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // See if the scale and offset amount is valid for this target.
+    AddrMode.BaseOffs += ConstantOffset;
+
+    // Match the base operand of the GEP.
+    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+      // If it couldn't be matched, just stuff the value in a register.
+      if (AddrMode.HasBaseReg) {
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+    }
+
+    // Match the remaining variable portion of the GEP.
+    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+                          Depth)) {
+      // If it couldn't be matched, try stuffing the base into a register
+      // instead of matching it, and retrying the match of the scale.
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      if (AddrMode.HasBaseReg)
+        return false;
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+      AddrMode.BaseOffs += ConstantOffset;
+      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+                            VariableScale, Depth)) {
+        // If even that didn't work, bail.
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+    }
+
+    return true;
+  }
+  }
+  return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode.  If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+    // Fold in immediates if legal for the target.
+    AddrMode.BaseOffs += CI->getSExtValue();
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.BaseOffs -= CI->getSExtValue();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+    // If this is a global variable, try to fold it into the addressing mode.
+    if (AddrMode.BaseGV == 0) {
+      AddrMode.BaseGV = GV;
+      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+        return true;
+      AddrMode.BaseGV = 0;
+    }
+  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // Check to see if it is possible to fold this operation.
+    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+      // Okay, it's possible to fold this.  Check to see if it is actually
+      // *profitable* to do so.  We use a simple cost model to avoid increasing
+      // register pressure too much.
+      if (I->hasOneUse() ||
+          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+        AddrModeInsts.push_back(I);
+        return true;
+      }
+      
+      // It isn't profitable to do this, roll back.
+      //cerr << "NOT FOLDING: " << *I;
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+      return true;
+  } else if (isa<ConstantPointerNull>(Addr)) {
+    // Null pointer gets folded without affecting the addressing mode.
+    return true;
+  }
+
+  // Worse case, the target should support [reg] addressing modes. :)
+  if (!AddrMode.HasBaseReg) {
+    AddrMode.HasBaseReg = true;
+    AddrMode.BaseReg = Addr;
+    // Still check for legality in case the target supports [imm] but not [i+r].
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.HasBaseReg = false;
+    AddrMode.BaseReg = 0;
+  }
+
+  // If the base register is already taken, see if we can do [r+r].
+  if (AddrMode.Scale == 0) {
+    AddrMode.Scale = 1;
+    AddrMode.ScaledReg = Addr;
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.Scale = 0;
+    AddrMode.ScaledReg = 0;
+  }
+  // Couldn't match.
+  return false;
+}
+
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands.  If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+                                    const TargetLowering &TLI) {
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // If this asm operand is our Value*, and if it isn't an indirect memory
+    // operand, we can't fold it!
+    if (OpInfo.CallOperandVal == OpVal &&
+        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+         !OpInfo.isIndirect))
+      return false;
+  }
+
+  return true;
+}
+
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use.  If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+                              const TargetLowering &TLI) {
+  // If we already considered this instruction, we're done.
+  if (!ConsideredInsts.insert(I))
+    return false;
+  
+  // If this is an obviously unfoldable instruction, bail out.
+  if (!MightBeFoldableInst(I))
+    return true;
+
+  // Loop over all the uses, recursively processing them.
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      unsigned opNo = UI.getOperandNo();
+      if (opNo == 0) return true; // Storing addr, not into addr.
+      MemoryUses.push_back(std::make_pair(SI, opNo));
+      continue;
+    }
+    
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+      if (!IA) return true;
+      
+      // If this is a memory operand, we're cool, otherwise bail out.
+      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+        return true;
+      continue;
+    }
+    
+    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+                          TLI))
+      return true;
+  }
+
+  return false;
+}
+
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into.  If so, there is no cost to
+/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+                                                   Value *KnownLive2) {
+  // If Val is either of the known-live values, we know it is live!
+  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+    return true;
+  
+  // All values other than instructions and arguments (e.g. constants) are live.
+  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+  
+  // If Val is a constant sized alloca in the entry block, it is live, this is
+  // true because it is just a reference to the stack/frame pointer, which is
+  // live for the whole function.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+    if (AI->isStaticAlloca())
+      return true;
+  
+  // Check to see if this value is already used in the memory instruction's
+  // block.  If so, it's already live into the block at the very least, so we
+  // can reasonably fold it.
+  BasicBlock *MemBB = MemoryInst->getParent();
+  for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
+       UI != E; ++UI)
+    // We know that uses of arguments and instructions have to be instructions.
+    if (cast<Instruction>(*UI)->getParent() == MemBB)
+      return true;
+  
+  return false;
+}
+
+
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it.  However, the specified instruction has multiple
+/// uses.  Given this, it may actually increase register pressure to fold it
+/// into the load.  For example, consider this code:
+///
+///     X = ...
+///     Y = X+1
+///     use(Y)   -> nonload/store
+///     Z = Y+1
+///     load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
+/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case.  This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+                                     ExtAddrMode &AMAfter) {
+  if (IgnoreProfitability) return true;
+  
+  // AMBefore is the addressing mode before this instruction was folded into it,
+  // and AMAfter is the addressing mode after the instruction was folded.  Get
+  // the set of registers referenced by AMAfter and subtract out those
+  // referenced by AMBefore: this is the set of values which folding in this
+  // address extends the lifetime of.
+  //
+  // Note that there are only two potential values being referenced here,
+  // BaseReg and ScaleReg (global addresses are always available, as are any
+  // folded immediates).
+  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+  
+  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+  // lifetime wasn't extended by adding this instruction.
+  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    BaseReg = 0;
+  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    ScaledReg = 0;
+
+  // If folding this instruction (and it's subexprs) didn't extend any live
+  // ranges, we're ok with it.
+  if (BaseReg == 0 && ScaledReg == 0)
+    return true;
+
+  // If all uses of this instruction are ultimately load/store/inlineasm's,
+  // check to see if their addressing modes will include this instruction.  If
+  // so, we can fold it into all uses, so it doesn't matter if it has multiple
+  // uses.
+  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+  SmallPtrSet<Instruction*, 16> ConsideredInsts;
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+    return false;  // Has a non-memory, non-foldable use!
+  
+  // Now that we know that all uses of this instruction are part of a chain of
+  // computation involving only operations that could theoretically be folded
+  // into a memory use, loop over each of these uses and see if they could
+  // *actually* fold the instruction.
+  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+    Instruction *User = MemoryUses[i].first;
+    unsigned OpNo = MemoryUses[i].second;
+    
+    // Get the access type of this use.  If the use isn't a pointer, we don't
+    // know what it accesses.
+    Value *Address = User->getOperand(OpNo);
+    if (!Address->getType()->isPointerTy())
+      return false;
+    const Type *AddressAccessTy =
+      cast<PointerType>(Address->getType())->getElementType();
+    
+    // Do a match against the root of this address, ignoring profitability. This
+    // will tell us if the addressing mode for the memory operation will
+    // *actually* cover the shared instruction.
+    ExtAddrMode Result;
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+                                  MemoryInst, Result);
+    Matcher.IgnoreProfitability = true;
+    bool Success = Matcher.MatchAddr(Address, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+
+    // If the match didn't cover I, then it won't be shared by it.
+    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+                  I) == MatchedAddrModeInsts.end())
+      return false;
+    
+    MatchedAddrModeInsts.clear();
+  }
+  
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 000000000000..b4f74f97e978
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,550 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constant.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+  assert((pred_begin(BB) == pred_end(BB) ||
+         // Can delete self loop.
+         BB->getSinglePredecessor() == BB) && "Block is not dead!");
+  TerminatorInst *BBTerm = BB->getTerminator();
+  
+  // Loop through all of our successors and make sure they know that one
+  // of their predecessors is going away.
+  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+    BBTerm->getSuccessor(i)->removePredecessor(BB);
+  
+  // Zap all the instructions in the block.
+  while (!BB->empty()) {
+    Instruction &I = BB->back();
+    // If this instruction is used, replace uses with an arbitrary value.
+    // Because control flow can't get here, we don't care what we replace the
+    // value with.  Note that since this block is unreachable, and all values
+    // contained within it must dominate their uses, that all uses will
+    // eventually be removed (they are themselves dead).
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    BB->getInstList().pop_back();
+  }
+  
+  // Zap the block!
+  BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor.  If there are
+/// any single-entry PHI nodes in it, fold them away.  This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+  if (!isa<PHINode>(BB->begin())) return;
+  
+  AliasAnalysis *AA = 0;
+  MemoryDependenceAnalysis *MemDep = 0;
+  if (P) {
+    AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+    MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+  }
+  
+  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    if (PN->getIncomingValue(0) != PN)
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+    else
+      PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    
+    if (MemDep)
+      MemDep->removeInstruction(PN);  // Memdep updates AA itself.
+    else if (AA && isa<PointerType>(PN->getType()))
+      AA->deleteValue(PN);
+    
+    PN->eraseFromParent();
+  }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+  // Recursively deleting a PHI may cause multiple PHIs to be deleted
+  // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  bool Changed = false;
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+      Changed |= RecursivelyDeleteDeadPHINode(PN);
+
+  return Changed;
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible.  The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+  // Don't merge away blocks who have their address taken.
+  if (BB->hasAddressTaken()) return false;
+  
+  // Can't merge if there are multiple predecessors, or no predecessors.
+  BasicBlock *PredBB = BB->getUniquePredecessor();
+  if (!PredBB) return false;
+
+  // Don't break self-loops.
+  if (PredBB == BB) return false;
+  // Don't break invokes.
+  if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+  
+  succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+  BasicBlock *OnlySucc = BB;
+  for (; SI != SE; ++SI)
+    if (*SI != OnlySucc) {
+      OnlySucc = 0;     // There are multiple distinct successors!
+      break;
+    }
+  
+  // Can't merge if there are multiple successors.
+  if (!OnlySucc) return false;
+
+  // Can't merge if there is PHI loop.
+  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == PN)
+          return false;
+    } else
+      break;
+  }
+
+  // Begin by getting rid of unneeded PHIs.
+  if (isa<PHINode>(BB->front()))
+    FoldSingleEntryPHINodes(BB, P);
+  
+  // Delete the unconditional branch from the predecessor...
+  PredBB->getInstList().pop_back();
+  
+  // Make all PHI nodes that referred to BB now refer to Pred as their
+  // source...
+  BB->replaceAllUsesWith(PredBB);
+  
+  // Move all definitions in the successor to the predecessor...
+  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+  
+  // Inherit predecessors name if it exists.
+  if (!PredBB->hasName())
+    PredBB->takeName(BB);
+  
+  // Finally, erase the old block and update dominator info.
+  if (P) {
+    if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+      if (DomTreeNode *DTN = DT->getNode(BB)) {
+        DomTreeNode *PredDTN = DT->getNode(PredBB);
+        SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+        for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+             DE = Children.end(); DI != DE; ++DI)
+          DT->changeImmediateDominator(*DI, PredDTN);
+
+        DT->eraseNode(BB);
+      }
+      
+      if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+        LI->removeBlock(BB);
+      
+      if (MemoryDependenceAnalysis *MD =
+            P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+        MD->invalidateCachedPredecessors();
+    }
+  }
+  
+  BB->eraseFromParent();
+  return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+                                BasicBlock::iterator &BI, Value *V) {
+  Instruction &I = *BI;
+  // Replaces all of the uses of the instruction with uses of the value
+  I.replaceAllUsesWith(V);
+
+  // Make sure to propagate a name if there is one already.
+  if (I.hasName() && !V->hasName())
+    V->takeName(&I);
+
+  // Delete the unnecessary instruction now...
+  BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I.  The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+                               BasicBlock::iterator &BI, Instruction *I) {
+  assert(I->getParent() == 0 &&
+         "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+  // Insert the new instruction into the basic block...
+  BasicBlock::iterator New = BIL.insert(BI, I);
+
+  // Replace all uses of the old instruction, and delete it.
+  ReplaceInstWithValue(BIL, BI, I);
+
+  // Move BI back to point to the newly inserted instruction
+  BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+  BasicBlock::iterator BI(From);
+  ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors.  It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+  TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+  unsigned e = Term->getNumSuccessors();
+#endif
+  for (unsigned i = 0; ; ++i) {
+    assert(i != e && "Didn't find edge?");
+    if (Term->getSuccessor(i) == Succ)
+      return i;
+  }
+  return 0;
+}
+
+/// SplitEdge -  Split the edge connecting specified block. Pass P must 
+/// not be NULL. 
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+  unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+  
+  // If this is a critical edge, let SplitCriticalEdge do it.
+  TerminatorInst *LatchTerm = BB->getTerminator();
+  if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+    return LatchTerm->getSuccessor(SuccNum);
+
+  // If the edge isn't critical, then BB has a single successor or Succ has a
+  // single pred.  Split the block.
+  BasicBlock::iterator SplitPoint;
+  if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+    // If the successor only has a single pred, split the top of the successor
+    // block.
+    assert(SP == BB && "CFG broken");
+    SP = NULL;
+    return SplitBlock(Succ, Succ->begin(), P);
+  }
+  
+  // Otherwise, if BB has a single successor, split it at the bottom of the
+  // block.
+  assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+         "Should have a single succ!"); 
+  return SplitBlock(BB, BB->getTerminator(), P);
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block.  The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+  BasicBlock::iterator SplitIt = SplitPt;
+  while (isa<PHINode>(SplitIt))
+    ++SplitIt;
+  BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+  // The new block lives in whichever loop the old one did. This preserves
+  // LCSSA as well, because we force the split point to be after any PHI nodes.
+  if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+    if (Loop *L = LI->getLoopFor(Old))
+      L->addBasicBlockToLoop(New, LI->getBase());
+
+  if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+    // Old dominates New. New node dominates all other nodes dominated by Old.
+    DomTreeNode *OldNode = DT->getNode(Old);
+    std::vector<DomTreeNode *> Children;
+    for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+         I != E; ++I) 
+      Children.push_back(*I);
+
+      DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+      for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+             E = Children.end(); I != E; ++I) 
+        DT->changeImmediateDominator(*I, NewNode);
+  }
+
+  return New;
+}
+
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block.  The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it.  The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
+                                         BasicBlock *const *Preds,
+                                         unsigned NumPreds, const char *Suffix,
+                                         Pass *P) {
+  // Create new basic block, insert right before the original block.
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+                                         BB->getParent(), BB);
+  
+  // The new block unconditionally branches to the old block.
+  BranchInst *BI = BranchInst::Create(BB, NewBB);
+  
+  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
+  Loop *L = LI ? LI->getLoopFor(BB) : 0;
+  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+
+  // Move the edges from Preds to point to NewBB instead of BB.
+  // While here, if we need to preserve loop analyses, collect
+  // some information about how this split will affect loops.
+  bool HasLoopExit = false;
+  bool IsLoopEntry = !!L;
+  bool SplitMakesNewLoopHeader = false;
+  for (unsigned i = 0; i != NumPreds; ++i) {
+    // This is slightly more strict than necessary; the minimum requirement
+    // is that there be no more than one indirectbr branching to BB. And
+    // all BlockAddress uses would need to be updated.
+    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+           "Cannot split an edge from an IndirectBrInst");
+
+    Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+
+    if (LI) {
+      // If we need to preserve LCSSA, determine if any of
+      // the preds is a loop exit.
+      if (PreserveLCSSA)
+        if (Loop *PL = LI->getLoopFor(Preds[i]))
+          if (!PL->contains(BB))
+            HasLoopExit = true;
+      // If we need to preserve LoopInfo, note whether any of the
+      // preds crosses an interesting loop boundary.
+      if (L) {
+        if (L->contains(Preds[i]))
+          IsLoopEntry = false;
+        else
+          SplitMakesNewLoopHeader = true;
+      }
+    }
+  }
+
+  // Update dominator tree if available.
+  DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+  // node becomes an incoming value for BB's phi node.  However, if the Preds
+  // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+  // account for the newly created predecessor.
+  if (NumPreds == 0) {
+    // Insert dummy values as the incoming value.
+    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+      cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+    return NewBB;
+  }
+
+  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+  if (L) {
+    if (IsLoopEntry) {
+      // Add the new block to the nearest enclosing loop (and not an
+      // adjacent loop). To find this, examine each of the predecessors and
+      // determine which loops enclose them, and select the most-nested loop
+      // which contains the loop containing the block being split.
+      Loop *InnermostPredLoop = 0;
+      for (unsigned i = 0; i != NumPreds; ++i)
+        if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
+          // Seek a loop which actually contains the block being split (to
+          // avoid adjacent loops).
+          while (PredLoop && !PredLoop->contains(BB))
+            PredLoop = PredLoop->getParentLoop();
+          // Select the most-nested of these loops which contains the block.
+          if (PredLoop &&
+              PredLoop->contains(BB) &&
+              (!InnermostPredLoop ||
+               InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+            InnermostPredLoop = PredLoop;
+        }
+      if (InnermostPredLoop)
+        InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+    } else {
+      L->addBasicBlockToLoop(NewBB, LI->getBase());
+      if (SplitMakesNewLoopHeader)
+        L->moveToHeader(NewBB);
+    }
+  }
+  
+  // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
+  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I++);
+    
+    // Check to see if all of the values coming in are the same.  If so, we
+    // don't need to create a new PHI node, unless it's needed for LCSSA.
+    Value *InVal = 0;
+    if (!HasLoopExit) {
+      InVal = PN->getIncomingValueForBlock(Preds[0]);
+      for (unsigned i = 1; i != NumPreds; ++i)
+        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+          InVal = 0;
+          break;
+        }
+    }
+
+    if (InVal) {
+      // If all incoming values for the new PHI would be the same, just don't
+      // make a new PHI.  Instead, just remove the incoming values from the old
+      // PHI.
+      for (unsigned i = 0; i != NumPreds; ++i)
+        PN->removeIncomingValue(Preds[i], false);
+    } else {
+      // If the values coming into the block are not the same, we need a PHI.
+      // Create the new PHI node, insert it into NewBB at the end of the block
+      PHINode *NewPHI =
+        PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI);
+      if (AA) AA->copyValue(PN, NewPHI);
+      
+      // Move all of the PHI values for 'Preds' to the new PHI.
+      for (unsigned i = 0; i != NumPreds; ++i) {
+        Value *V = PN->removeIncomingValue(Preds[i], false);
+        NewPHI->addIncoming(V, Preds[i]);
+      }
+      InVal = NewPHI;
+    }
+    
+    // Add an incoming value to the PHI node in the loop for the preheader
+    // edge.
+    PN->addIncoming(InVal, NewBB);
+  }
+  
+  return NewBB;
+}
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them.  This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+     SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+  const BasicBlock *BB = &F.getEntryBlock();
+  if (succ_begin(BB) == succ_end(BB))
+    return;
+  
+  SmallPtrSet<const BasicBlock*, 8> Visited;
+  SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+  SmallPtrSet<const BasicBlock*, 8> InStack;
+  
+  Visited.insert(BB);
+  VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+  InStack.insert(BB);
+  do {
+    std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+    const BasicBlock *ParentBB = Top.first;
+    succ_const_iterator &I = Top.second;
+    
+    bool FoundNew = false;
+    while (I != succ_end(ParentBB)) {
+      BB = *I++;
+      if (Visited.insert(BB)) {
+        FoundNew = true;
+        break;
+      }
+      // Successor is in VisitStack, it's a back edge.
+      if (InStack.count(BB))
+        Result.push_back(std::make_pair(ParentBB, BB));
+    }
+    
+    if (FoundNew) {
+      // Go down one level if there is a unvisited successor.
+      InStack.insert(BB);
+      VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+    } else {
+      // Go up one level.
+      InStack.erase(VisitStack.pop_back_val().first);
+    }
+  } while (!VisitStack.empty()); 
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+                                             BasicBlock *Pred) {
+  Instruction *UncondBranch = Pred->getTerminator();
+  // Clone the return and add it to the end of the predecessor.
+  Instruction *NewRet = RI->clone();
+  Pred->getInstList().push_back(NewRet);
+      
+  // If the return instruction returns a value, and if the value was a
+  // PHI node in "BB", propagate the right value into the return.
+  for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+       i != e; ++i)
+    if (PHINode *PN = dyn_cast<PHINode>(*i))
+      if (PN->getParent() == BB)
+        *i = PN->getIncomingValueForBlock(Pred);
+      
+  // Update any PHI nodes in the returning block to realize that we no
+  // longer branch to them.
+  BB->removePredecessor(Pred);
+  UncondBranch->eraseFromParent();
+  return cast<ReturnInst>(NewRet);
+}
+
+/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a 
+/// given basic block.
+DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) {
+  if (const Instruction *I = BB->getFirstNonPHI())
+    return I->getDebugLoc();
+  // Scanning entire block may be too expensive, if the first instruction
+  // does not have valid location info.
+  return DebugLoc();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
new file mode 100644
index 000000000000..23a30cc58507
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
@@ -0,0 +1,182 @@
+//===- BasicInliner.cpp - Basic function level inliner --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple function based inliner that does not use
+// call graph information. 
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basicinliner"
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Utils/BasicInliner.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned>     
+BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
+   cl::desc("Control the amount of basic inlining to perform (default = 200)"));
+
+namespace llvm {
+
+  /// BasicInlinerImpl - BasicInliner implemantation class. This hides
+  /// container info, used by basic inliner, from public interface.
+  struct BasicInlinerImpl {
+    
+    BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
+    void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
+  public:
+    BasicInlinerImpl(TargetData *T) : TD(T) {}
+
+    /// addFunction - Add function into the list of functions to process.
+    /// All functions must be inserted using this interface before invoking
+    /// inlineFunctions().
+    void addFunction(Function *F) {
+      Functions.push_back(F);
+    }
+
+    /// neverInlineFunction - Sometimes a function is never to be inlined 
+    /// because of one or other reason. 
+    void neverInlineFunction(Function *F) {
+      NeverInline.insert(F);
+    }
+
+    /// inlineFuctions - Walk all call sites in all functions supplied by
+    /// client. Inline as many call sites as possible. Delete completely
+    /// inlined functions.
+    void inlineFunctions();
+    
+  private:
+    TargetData *TD;
+    std::vector<Function *> Functions;
+    SmallPtrSet<const Function *, 16> NeverInline;
+    SmallPtrSet<Function *, 8> DeadFunctions;
+    InlineCostAnalyzer CA;
+  };
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInlinerImpl::inlineFunctions() {
+      
+  // Scan through and identify all call sites ahead of time so that we only
+  // inline call sites in the original functions, not call sites that result
+  // from inlining other functions.
+  std::vector<CallSite> CallSites;
+  
+  for (std::vector<Function *>::iterator FI = Functions.begin(),
+         FE = Functions.end(); FI != FE; ++FI) {
+    Function *F = *FI;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+        CallSite CS(cast<Value>(I));
+        if (CS && CS.getCalledFunction()
+            && !CS.getCalledFunction()->isDeclaration())
+          CallSites.push_back(CS);
+      }
+  }
+  
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+  
+  // Inline call sites.
+  bool Changed = false;
+  do {
+    Changed = false;
+    for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); 
+         ++index) {
+      CallSite CS = CallSites[index];
+      if (Function *Callee = CS.getCalledFunction()) {
+        
+        // Eliminate calls that are never inlinable.
+        if (Callee->isDeclaration() ||
+            CS.getInstruction()->getParent()->getParent() == Callee) {
+          CallSites.erase(CallSites.begin() + index);
+          --index;
+          continue;
+        }
+        InlineCost IC = CA.getInlineCost(CS, NeverInline);
+        if (IC.isAlways()) {        
+          DEBUG(dbgs() << "  Inlining: cost=always"
+                       <<", call: " << *CS.getInstruction());
+        } else if (IC.isNever()) {
+          DEBUG(dbgs() << "  NOT Inlining: cost=never"
+                       <<", call: " << *CS.getInstruction());
+          continue;
+        } else {
+          int Cost = IC.getValue();
+          
+          if (Cost >= (int) BasicInlineThreshold) {
+            DEBUG(dbgs() << "  NOT Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
+            continue;
+          } else {
+            DEBUG(dbgs() << "  Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
+          }
+        }
+        
+        // Inline
+        InlineFunctionInfo IFI(0, TD);
+        if (InlineFunction(CS, IFI)) {
+          if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
+                                      Callee->hasAvailableExternallyLinkage()))
+            DeadFunctions.insert(Callee);
+          Changed = true;
+          CallSites.erase(CallSites.begin() + index);
+          --index;
+        }
+      }
+    }
+  } while (Changed);
+  
+  // Remove completely inlined functions from module.
+  for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
+        E = DeadFunctions.end(); I != E; ++I) {
+    Function *D = *I;
+    Module *M = D->getParent();
+    M->getFunctionList().remove(D);
+  }
+}
+
+BasicInliner::BasicInliner(TargetData *TD) {
+  Impl = new BasicInlinerImpl(TD);
+}
+
+BasicInliner::~BasicInliner() {
+  delete Impl;
+}
+
+/// addFunction - Add function into the list of functions to process.
+/// All functions must be inserted using this interface before invoking
+/// inlineFunctions().
+void BasicInliner::addFunction(Function *F) {
+  Impl->addFunction(F);
+}
+
+/// neverInlineFunction - Sometimes a function is never to be inlined because
+/// of one or other reason. 
+void BasicInliner::neverInlineFunction(Function *F) {
+  Impl->neverInlineFunction(F);
+}
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInliner::inlineFunctions() {
+  Impl->inlineFunctions();
+}
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 000000000000..92ce50030a5d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,386 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block.  This pass may be "required" by passes that
+// cannot deal with critical edges.  For this usage, the structure type is
+// forward declared.  This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+  struct BreakCriticalEdges : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    BreakCriticalEdges() : FunctionPass(ID) {
+      initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<ProfileInfo>();
+
+      // No loop canonicalization guarantees are broken by this pass.
+      AU.addPreservedID(LoopSimplifyID);
+    }
+  };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+                "Break critical edges in CFG", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+  return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+  bool Changed = false;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    TerminatorInst *TI = I->getTerminator();
+    if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+        if (SplitCriticalEdge(TI, i, this)) {
+          ++NumBroken;
+          Changed = true;
+        }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//    Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+                          bool AllowIdenticalEdges) {
+  assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+  if (TI->getNumSuccessors() == 1) return false;
+
+  const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+  const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+  // If there is more than one predecessor, this is a critical edge...
+  assert(I != E && "No preds, but we have an edge to the block?");
+  const BasicBlock *FirstPred = *I;
+  ++I;        // Skip one edge due to the incoming arc from TI.
+  if (!AllowIdenticalEdges)
+    return I != E;
+  
+  // If AllowIdenticalEdges is true, then we allow this edge to be considered
+  // non-critical iff all preds come from TI's block.
+  while (I != E) {
+    const BasicBlock *P = *I;
+    if (P != FirstPred)
+      return true;
+    // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+    // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+    E = pred_end(P);
+    ++I;
+  }
+  return false;
+}
+
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+                                       BasicBlock *SplitBB,
+                                       BasicBlock *DestBB) {
+  // SplitBB shouldn't have anything non-trivial in it yet.
+  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+         "SplitBB has non-PHI nodes!");
+
+  // For each PHI in the destination block...
+  for (BasicBlock::iterator I = DestBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+    Value *V = PN->getIncomingValue(Idx);
+    // If the input is a PHI which already satisfies LCSSA, don't create
+    // a new one.
+    if (const PHINode *VP = dyn_cast<PHINode>(V))
+      if (VP->getParent() == SplitBB)
+        continue;
+    // Otherwise a new PHI is needed. Create one and populate it.
+    PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split",
+                                     SplitBB->getTerminator());
+    for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+      NewPN->addIncoming(V, Preds[i]);
+    // Update the original PHI.
+    PN->setIncomingValue(Idx, NewPN);
+  }
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge.  This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.  
+/// This is most commonly interesting with switch instructions, which may 
+/// have many edges to any one destination.  This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't 
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst.  Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                                    Pass *P, bool MergeIdenticalEdges) {
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+  
+  assert(!isa<IndirectBrInst>(TI) &&
+         "Cannot split critical edge from IndirectBrInst");
+  
+  BasicBlock *TIBB = TI->getParent();
+  BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+  // Create a new basic block, linking it into the CFG.
+  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+  // Create our unconditional branch.
+  BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
+  NewBI->setDebugLoc(TI->getDebugLoc());
+
+  // Branch to the new block, breaking the edge.
+  TI->setSuccessor(SuccNum, NewBB);
+
+  // Insert the block into the function... right after the block TI lives in.
+  Function &F = *TIBB->getParent();
+  Function::iterator FBBI = TIBB;
+  F.getBasicBlockList().insert(++FBBI, NewBB);
+  
+  // If there are any PHI nodes in DestBB, we need to update them so that they
+  // merge incoming values from NewBB instead of from TIBB.
+  {
+    unsigned BBIdx = 0;
+    for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+      // We no longer enter through TIBB, now we come in through NewBB.
+      // Revector exactly one entry in the PHI node that used to come from
+      // TIBB to come from NewBB.
+      PHINode *PN = cast<PHINode>(I);
+
+      // Reuse the previous value of BBIdx if it lines up.  In cases where we
+      // have multiple phi nodes with *lots* of predecessors, this is a speed
+      // win because we don't have to scan the PHI looking for TIBB.  This
+      // happens because the BB list of PHI nodes are usually in the same
+      // order.
+      if (PN->getIncomingBlock(BBIdx) != TIBB)
+	BBIdx = PN->getBasicBlockIndex(TIBB);
+      PN->setIncomingBlock(BBIdx, NewBB);
+    }
+  }
+   
+  // If there are any other edges from TIBB to DestBB, update those to go
+  // through the split block, making those edges non-critical as well (and
+  // reducing the number of phi entries in the DestBB if relevant).
+  if (MergeIdenticalEdges) {
+    for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+      if (TI->getSuccessor(i) != DestBB) continue;
+      
+      // Remove an entry for TIBB from DestBB phi nodes.
+      DestBB->removePredecessor(TIBB);
+      
+      // We found another edge to DestBB, go to NewBB instead.
+      TI->setSuccessor(i, NewBB);
+    }
+  }
+  
+  
+
+  // If we don't have a pass object, we can't update anything...
+  if (P == 0) return NewBB;
+  
+  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+  
+  // If we have nothing to update, just return.
+  if (DT == 0 && LI == 0 && PI == 0)
+    return NewBB;
+
+  // Now update analysis information.  Since the only predecessor of NewBB is
+  // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
+  // anything, as there are other successors of DestBB.  However, if all other
+  // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+  // loop header) then NewBB dominates DestBB.
+  SmallVector<BasicBlock*, 8> OtherPreds;
+
+  // If there is a PHI in the block, loop over predecessors with it, which is
+  // faster than iterating pred_begin/end.
+  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingBlock(i) != NewBB)
+        OtherPreds.push_back(PN->getIncomingBlock(i));
+  } else {
+    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+         I != E; ++I) {
+      BasicBlock *P = *I;
+      if (P != NewBB)
+        OtherPreds.push_back(P);
+    }
+  }
+
+  bool NewBBDominatesDestBB = true;
+  
+  // Should we update DominatorTree information?
+  if (DT) {
+    DomTreeNode *TINode = DT->getNode(TIBB);
+
+    // The new block is not the immediate dominator for any other nodes, but
+    // TINode is the immediate dominator for the new node.
+    //
+    if (TINode) {       // Don't break unreachable code!
+      DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+      DomTreeNode *DestBBNode = 0;
+     
+      // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+      if (!OtherPreds.empty()) {
+        DestBBNode = DT->getNode(DestBB);
+        while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+          if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+            NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+          OtherPreds.pop_back();
+        }
+        OtherPreds.clear();
+      }
+      
+      // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+      // doesn't dominate anything.
+      if (NewBBDominatesDestBB) {
+        if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+        DT->changeImmediateDominator(DestBBNode, NewBBNode);
+      }
+    }
+  }
+
+  // Update LoopInfo if it is around.
+  if (LI) {
+    if (Loop *TIL = LI->getLoopFor(TIBB)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
+      if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+        if (TIL == DestLoop) {
+          // Both in the same loop, the NewBB joins loop.
+          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else if (TIL->contains(DestLoop)) {
+          // Edge from an outer loop to an inner loop.  Add to the outer loop.
+          TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else if (DestLoop->contains(TIL)) {
+          // Edge from an inner loop to an outer loop.  Add to the outer loop.
+          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else {
+          // Edge from two loops with no containment relation.  Because these
+          // are natural loops, we know that the destination block must be the
+          // header of its loop (adding a branch into a loop elsewhere would
+          // create an irreducible loop).
+          assert(DestLoop->getHeader() == DestBB &&
+                 "Should not create irreducible loops!");
+          if (Loop *P = DestLoop->getParentLoop())
+            P->addBasicBlockToLoop(NewBB, LI->getBase());
+        }
+      }
+      // If TIBB is in a loop and DestBB is outside of that loop, split the
+      // other exit blocks of the loop that also have predecessors outside
+      // the loop, to maintain a LoopSimplify guarantee.
+      if (!TIL->contains(DestBB) &&
+          P->mustPreserveAnalysisID(LoopSimplifyID)) {
+        assert(!TIL->contains(NewBB) &&
+               "Split point for loop exit is contained in loop!");
+
+        // Update LCSSA form in the newly created exit block.
+        if (P->mustPreserveAnalysisID(LCSSAID)) {
+          SmallVector<BasicBlock *, 1> OrigPred;
+          OrigPred.push_back(TIBB);
+          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+        }
+
+        // For each unique exit block...
+        SmallVector<BasicBlock *, 4> ExitBlocks;
+        TIL->getExitBlocks(ExitBlocks);
+        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+          // Collect all the preds that are inside the loop, and note
+          // whether there are any preds outside the loop.
+          SmallVector<BasicBlock *, 4> Preds;
+          bool HasPredOutsideOfLoop = false;
+          BasicBlock *Exit = ExitBlocks[i];
+          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+               I != E; ++I) {
+            BasicBlock *P = *I;
+            if (TIL->contains(P))
+              Preds.push_back(P);
+            else
+              HasPredOutsideOfLoop = true;
+          }
+          // If there are any preds not in the loop, we'll need to split
+          // the edges. The Preds.empty() check is needed because a block
+          // may appear multiple times in the list. We can't use
+          // getUniqueExitBlocks above because that depends on LoopSimplify
+          // form, which we're in the process of restoring!
+          if (!Preds.empty() && HasPredOutsideOfLoop) {
+            BasicBlock *NewExitBB =
+              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+                                     "split", P);
+            if (P->mustPreserveAnalysisID(LCSSAID))
+              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+          }
+        }
+      }
+      // LCSSA form was updated above for the case where LoopSimplify is
+      // available, which means that all predecessors of loop exit blocks
+      // are within the loop. Without LoopSimplify form, it would be
+      // necessary to insert a new phi.
+      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+             "SplitCriticalEdge doesn't know how to update LCCSA form "
+             "without LoopSimplify!");
+    }
+  }
+
+  // Update ProfileInfo if it is around.
+  if (PI)
+    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
+
+  return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 000000000000..14bb17fbda3f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,479 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+  return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer.  This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+                                            TD->getIntPtrType(Context),
+                                            B.getInt8PtrTy(),
+                                            NULL);
+  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character.  Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+                        const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI =
+    AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+  const Type *I8Ptr = B.getInt8PtrTy();
+  const Type *I32Ty = B.getInt32Ty();
+  Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+                                            I8Ptr, I8Ptr, I32Ty, NULL);
+  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+                               ConstantInt::get(I32Ty, C), "strchr");
+  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+                         IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3),
+                                          B.getInt32Ty(),
+                                          B.getInt8PtrTy(),
+                                          B.getInt8PtrTy(),
+                                          TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+                               CastToCStr(Ptr2, B), Len, "strncmp");
+
+  if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                        const TargetData *TD, StringRef Name) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  const Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                         I8Ptr, I8Ptr, I8Ptr, NULL);
+  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               Name);
+  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+                         IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  const Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                          I8Ptr, I8Ptr, I8Ptr,
+                                          Len->getType(), NULL);
+  CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               Len, "strncpy");
+  if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+                           IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+                                         AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context),
+                                         TD->getIntPtrType(Context), NULL);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+  if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt32Ty(),
+                                         TD->getIntPtrType(Context),
+                                         NULL);
+  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+                                         B.getInt32Ty(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+                               Len, "memcmp");
+
+  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor').  This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type.  If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+                                  IRBuilder<> &B, const AttrListPtr &Attrs) {
+  char NameBuffer[20];
+  if (!Op->getType()->isDoubleTy()) {
+    // If we need to add a suffix, copy into NameBuffer.
+    unsigned NameLen = strlen(Name);
+    assert(NameLen < sizeof(NameBuffer)-2);
+    memcpy(NameBuffer, Name, NameLen);
+    if (Op->getType()->isFloatTy())
+      NameBuffer[NameLen] = 'f';  // floorf
+    else
+      NameBuffer[NameLen] = 'l';  // floorl
+    NameBuffer[NameLen+1] = 0;
+    Name = NameBuffer;
+  }
+
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                         Op->getType(), NULL);
+  CallInst *CI = B.CreateCall(Callee, Op, Name);
+  CI->setAttributes(Attrs);
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+                                          B.getInt32Ty(), NULL);
+  CallInst *CI = B.CreateCall(PutChar,
+                              B.CreateIntCast(Char,
+                              B.getInt32Ty(),
+                              /*isSigned*/true,
+                              "chari"),
+                              "putchar");
+
+  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function.  This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+                                       B.getInt32Ty(),
+                                       B.getInt8PtrTy(),
+                                       NULL);
+  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+                               B.getInt32Ty(),
+                               B.getInt32Ty(), File->getType(),
+                               NULL);
+  else
+    F = M->getOrInsertFunction("fputc",
+                               B.getInt32Ty(),
+                               B.getInt32Ty(),
+                               File->getType(), NULL);
+  Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+                         "chari");
+  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function.  Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+                               B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+                      IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+                               TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+                        ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+  // We really need TargetData for later.
+  if (!TD) return false;
+  
+  this->CI = CI;
+  Function *Callee = CI->getCalledFunction();
+  StringRef Name = Callee->getName();
+  const FunctionType *FT = Callee->getFunctionType();
+  LLVMContext &Context = CI->getParent()->getContext();
+  IRBuilder<> B(CI);
+
+  if (Name == "__memcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  // Should be similar to memcpy.
+  if (Name == "__mempcpy_chk") {
+    return false;
+  }
+
+  if (Name == "__memmove_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                      CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__memset_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+                                   false);
+      B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(Context))
+      return 0;
+    
+    
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+    // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (isFoldable(2, 1, true)) {
+      Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+                              Name.substr(2, 6));
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcat_chk") {
+    return false;
+  }
+
+  if (Name == "__strncat_chk") {
+    return false;
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 000000000000..6ea831f5345b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,538 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner.  This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+                                  ValueToValueMapTy &VMap,
+                                  const Twine &NameSuffix, Function *F,
+                                  ClonedCodeInfo *CodeInfo) {
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+       II != IE; ++II) {
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(BB->getTerminator());
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+                                        BB != &BB->getParent()->getEntryBlock();
+  }
+  return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                             ValueToValueMapTy &VMap,
+                             bool ModuleLevelChanges,
+                             SmallVectorImpl<ReturnInst*> &Returns,
+                             const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+  for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); I != E; ++I)
+    assert(VMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+  // Clone any attributes.
+  if (NewFunc->arg_size() == OldFunc->arg_size())
+    NewFunc->copyAttributesFrom(OldFunc);
+  else {
+    //Some arguments were deleted with the VMap. Copy arguments one by one
+    for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
+           E = OldFunc->arg_end(); I != E; ++I)
+      if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
+        Anew->addAttr( OldFunc->getAttributes()
+                       .getParamAttributes(I->getArgNo() + 1));
+    NewFunc->setAttributes(NewFunc->getAttributes()
+                           .addAttr(0, OldFunc->getAttributes()
+                                     .getRetAttributes()));
+    NewFunc->setAttributes(NewFunc->getAttributes()
+                           .addAttr(~0, OldFunc->getAttributes()
+                                     .getFnAttributes()));
+
+  }
+
+  // Loop over all of the basic blocks in the function, cloning them as
+  // appropriate.  Note that we save BE this way in order to handle cloning of
+  // recursive functions into themselves.
+  //
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    const BasicBlock &BB = *BI;
+
+    // Create a new basic block and copy instructions into it!
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+    VMap[&BB] = CBB;                       // Add basic block mapping.
+
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+      Returns.push_back(RI);
+  }
+
+  // Loop over all of the instructions in the function, fixing up operand
+  // references as we go.  This uses VMap to do all the hard work.
+  for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
+         BE = NewFunc->end(); BB != BE; ++BB)
+    // Loop over all instructions, fixing each one as we find it...
+    for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+      RemapInstruction(II, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module.  Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one.  If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function.  The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
+                              bool ModuleLevelChanges,
+                              ClonedCodeInfo *CodeInfo) {
+  std::vector<Type*> ArgTypes;
+
+  // The user might be deleting arguments to the function by specifying them in
+  // the VMap.  If so, we need to not add the arguments to the arg ty vector
+  //
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (VMap.count(I) == 0)  // Haven't mapped the argument to anything yet?
+      ArgTypes.push_back(I->getType());
+
+  // Create a new function type...
+  FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+                                    ArgTypes, F->getFunctionType()->isVarArg());
+
+  // Create the new function...
+  Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+  // Loop over the arguments, copying the names of the mapped arguments over...
+  Function::arg_iterator DestI = NewF->arg_begin();
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (VMap.count(I) == 0) {   // Is this argument preserved?
+      DestI->setName(I->getName()); // Copy the name over...
+      VMap[I] = DestI++;        // Add mapping to VMap
+    }
+
+  SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
+  CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+  return NewF;
+}
+
+
+
+namespace {
+  /// PruningFunctionCloner - This class is a private class used to implement
+  /// the CloneAndPruneFunctionInto method.
+  struct PruningFunctionCloner {
+    Function *NewFunc;
+    const Function *OldFunc;
+    ValueToValueMapTy &VMap;
+    bool ModuleLevelChanges;
+    SmallVectorImpl<ReturnInst*> &Returns;
+    const char *NameSuffix;
+    ClonedCodeInfo *CodeInfo;
+    const TargetData *TD;
+  public:
+    PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+                          ValueToValueMapTy &valueMap,
+                          bool moduleLevelChanges,
+                          SmallVectorImpl<ReturnInst*> &returns,
+                          const char *nameSuffix, 
+                          ClonedCodeInfo *codeInfo,
+                          const TargetData *td)
+    : NewFunc(newFunc), OldFunc(oldFunc),
+      VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+      Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+    }
+
+    /// CloneBlock - The specified block is found to be reachable, clone it and
+    /// anything that it can reach.
+    void CloneBlock(const BasicBlock *BB,
+                    std::vector<const BasicBlock*> &ToClone);
+    
+  public:
+    /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+    /// mapping its operands through VMap if they are available.
+    Constant *ConstantFoldMappedInstruction(const Instruction *I);
+  };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+                                       std::vector<const BasicBlock*> &ToClone){
+  TrackingVH<Value> &BBEntry = VMap[BB];
+
+  // Have we already cloned this block?
+  if (BBEntry) return;
+  
+  // Nope, clone it now.
+  BasicBlock *NewBB;
+  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over, DCE'ing as we go.  This
+  // loop doesn't include the terminator.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+       II != IE; ++II) {
+    // If this instruction constant folds, don't bother cloning the instruction,
+    // instead, just add the constant to the value map.
+    if (Constant *C = ConstantFoldMappedInstruction(II)) {
+      VMap[II] = C;
+      continue;
+    }
+
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  // Finally, clone over the terminator.
+  const TerminatorInst *OldTI = BB->getTerminator();
+  bool TerminatorDone = false;
+  if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+    if (BI->isConditional()) {
+      // If the condition was a known constant in the callee...
+      ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+      // Or is a known constant in the caller...
+      if (Cond == 0) {
+        Value *V = VMap[BI->getCondition()];
+        Cond = dyn_cast_or_null<ConstantInt>(V);
+      }
+
+      // Constant fold to uncond branch!
+      if (Cond) {
+        BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+        VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+        ToClone.push_back(Dest);
+        TerminatorDone = true;
+      }
+    }
+  } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+    // If switching on a value known constant in the caller.
+    ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+    if (Cond == 0) { // Or known constant after constant prop in the callee...
+      Value *V = VMap[SI->getCondition()];
+      Cond = dyn_cast_or_null<ConstantInt>(V);
+    }
+    if (Cond) {     // Constant fold to uncond branch!
+      BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+      VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+      ToClone.push_back(Dest);
+      TerminatorDone = true;
+    }
+  }
+  
+  if (!TerminatorDone) {
+    Instruction *NewInst = OldTI->clone();
+    if (OldTI->hasName())
+      NewInst->setName(OldTI->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[OldTI] = NewInst;             // Add instruction map to value.
+    
+    // Recursively clone any reachable successor blocks.
+    const TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      ToClone.push_back(TI->getSuccessor(i));
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(OldTI);
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+      BB != &BB->getParent()->front();
+  }
+  
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
+    Returns.push_back(RI);
+}
+
+/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+/// mapping its operands through VMap if they are available.
+Constant *PruningFunctionCloner::
+ConstantFoldMappedInstruction(const Instruction *I) {
+  SmallVector<Constant*, 8> Ops;
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
+                                                           VMap,
+                  ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
+      Ops.push_back(Op);
+    else
+      return 0;  // All operands not constant!
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           TD);
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+      if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr)
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+          if (GV->isConstant() && GV->hasDefinitiveInitializer())
+            return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(),
+                                                          CE);
+
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
+                                  Ops.size(), TD);
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly.  The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead.  Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                                     ValueToValueMapTy &VMap,
+                                     bool ModuleLevelChanges,
+                                     SmallVectorImpl<ReturnInst*> &Returns,
+                                     const char *NameSuffix, 
+                                     ClonedCodeInfo *CodeInfo,
+                                     const TargetData *TD,
+                                     Instruction *TheCall) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+  
+#ifndef NDEBUG
+  for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); II != E; ++II)
+    assert(VMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+                            Returns, NameSuffix, CodeInfo, TD);
+
+  // Clone the entry block, and anything recursively reachable from it.
+  std::vector<const BasicBlock*> CloneWorklist;
+  CloneWorklist.push_back(&OldFunc->getEntryBlock());
+  while (!CloneWorklist.empty()) {
+    const BasicBlock *BB = CloneWorklist.back();
+    CloneWorklist.pop_back();
+    PFC.CloneBlock(BB, CloneWorklist);
+  }
+  
+  // Loop over all of the basic blocks in the old function.  If the block was
+  // reachable, we have cloned it and the old block is now in the value map:
+  // insert it into the new function in the right order.  If not, ignore it.
+  //
+  // Defer PHI resolution until rest of function is resolved.
+  SmallVector<const PHINode*, 16> PHIToResolve;
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    Value *V = VMap[BI];
+    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+    if (NewBB == 0) continue;  // Dead block.
+
+    // Add the new block to the new function.
+    NewFunc->getBasicBlockList().push_back(NewBB);
+    
+    // Loop over all of the instructions in the block, fixing up operand
+    // references as we go.  This uses VMap to do all the hard work.
+    //
+    BasicBlock::iterator I = NewBB->begin();
+
+    DebugLoc TheCallDL;
+    if (TheCall) 
+      TheCallDL = TheCall->getDebugLoc();
+    
+    // Handle PHI nodes specially, as we have to remove references to dead
+    // blocks.
+    if (PHINode *PN = dyn_cast<PHINode>(I)) {
+      // Skip over all PHI nodes, remembering them for later.
+      BasicBlock::const_iterator OldI = BI->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+        PHIToResolve.push_back(cast<PHINode>(OldI));
+    }
+    
+    // Otherwise, remap the rest of the instructions normally.
+    for (; I != NewBB->end(); ++I)
+      RemapInstruction(I, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+  }
+  
+  // Defer PHI resolution until rest of function is resolved, PHI resolution
+  // requires the CFG to be up-to-date.
+  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+    const PHINode *OPN = PHIToResolve[phino];
+    unsigned NumPreds = OPN->getNumIncomingValues();
+    const BasicBlock *OldBB = OPN->getParent();
+    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+    // Map operands for blocks that are live and remove operands for blocks
+    // that are dead.
+    for (; phino != PHIToResolve.size() &&
+         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+      OPN = PHIToResolve[phino];
+      PHINode *PN = cast<PHINode>(VMap[OPN]);
+      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+        Value *V = VMap[PN->getIncomingBlock(pred)];
+        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+          Value *InVal = MapValue(PN->getIncomingValue(pred),
+                                  VMap, 
+                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+          assert(InVal && "Unknown input value?");
+          PN->setIncomingValue(pred, InVal);
+          PN->setIncomingBlock(pred, MappedBlock);
+        } else {
+          PN->removeIncomingValue(pred, false);
+          --pred, --e;  // Revisit the next entry.
+        }
+      } 
+    }
+    
+    // The loop above has removed PHI entries for those blocks that are dead
+    // and has updated others.  However, if a block is live (i.e. copied over)
+    // but its terminator has been changed to not go to this block, then our
+    // phi nodes will have invalid entries.  Update the PHI nodes in this
+    // case.
+    PHINode *PN = cast<PHINode>(NewBB->begin());
+    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+    if (NumPreds != PN->getNumIncomingValues()) {
+      assert(NumPreds < PN->getNumIncomingValues());
+      // Count how many times each predecessor comes to this block.
+      std::map<BasicBlock*, unsigned> PredCount;
+      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+           PI != E; ++PI)
+        --PredCount[*PI];
+      
+      // Figure out how many entries to remove from each PHI.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        ++PredCount[PN->getIncomingBlock(i)];
+      
+      // At this point, the excess predecessor entries are positive in the
+      // map.  Loop over all of the PHIs and remove excess predecessor
+      // entries.
+      BasicBlock::iterator I = NewBB->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+        for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+             E = PredCount.end(); PCI != E; ++PCI) {
+          BasicBlock *Pred     = PCI->first;
+          for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+            PN->removeIncomingValue(Pred, false);
+        }
+      }
+    }
+    
+    // If the loops above have made these phi nodes have 0 or 1 operand,
+    // replace them with undef or the input value.  We must do this for
+    // correctness, because 0-operand phis are not valid.
+    PN = cast<PHINode>(NewBB->begin());
+    if (PN->getNumIncomingValues() == 0) {
+      BasicBlock::iterator I = NewBB->begin();
+      BasicBlock::const_iterator OldI = OldBB->begin();
+      while ((PN = dyn_cast<PHINode>(I++))) {
+        Value *NV = UndefValue::get(PN->getType());
+        PN->replaceAllUsesWith(NV);
+        assert(VMap[OldI] == PN && "VMap mismatch");
+        VMap[OldI] = NV;
+        PN->eraseFromParent();
+        ++OldI;
+      }
+    }
+    // NOTE: We cannot eliminate single entry phi nodes here, because of
+    // VMap.  Single entry phi nodes can have multiple VMap entries
+    // pointing at them.  Thus, deleting one would require scanning the VMap
+    // to update any entries in it that would require that.  This would be
+    // really slow.
+  }
+  
+  // Now that the inlined function body has been fully constructed, go through
+  // and zap unconditional fall-through branches.  This happen all the time when
+  // specializing code: code specialization turns conditional branches into
+  // uncond branches, and this code folds them.
+  Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+  while (I != NewFunc->end()) {
+    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+    if (!BI || BI->isConditional()) { ++I; continue; }
+    
+    // Note that we can't eliminate uncond branches if the destination has
+    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
+    // require scanning the VMap to update any entries that point to the phi
+    // node.
+    BasicBlock *Dest = BI->getSuccessor(0);
+    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+      ++I; continue;
+    }
+    
+    // We know all single-entry PHI nodes in the inlined function have been
+    // removed, so we just need to splice the blocks.
+    BI->eraseFromParent();
+    
+    // Make all PHI nodes that referred to Dest now refer to I as their source.
+    Dest->replaceAllUsesWith(I);
+
+    // Move all the instructions in the succ to the pred.
+    I->getInstList().splice(I->end(), Dest->getInstList());
+    
+    // Remove the dest block.
+    Dest->eraseFromParent();
+    
+    // Do not increment I, iteratively merge all things this block branches to.
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 000000000000..a08fa35065cc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,127 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constant.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module.  This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+  // Create the value map that maps things from the old module over to the new
+  // module.
+  ValueToValueMapTy VMap;
+  return CloneModule(M, VMap);
+}
+
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+  // First off, we need to create the new module.
+  Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+  New->setDataLayout(M->getDataLayout());
+  New->setTargetTriple(M->getTargetTriple());
+  New->setModuleInlineAsm(M->getModuleInlineAsm());
+   
+  // Copy all of the dependent libraries over.
+  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+    New->addLibrary(*I);
+
+  // Loop over all of the global variables, making corresponding globals in the
+  // new module.  Here we add them to the VMap and to the new Module.  We
+  // don't worry about attributes or initializers, they will come later.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = new GlobalVariable(*New, 
+                                            I->getType()->getElementType(),
+                                            false,
+                                            GlobalValue::ExternalLinkage, 0,
+                                            I->getName());
+    GV->setAlignment(I->getAlignment());
+    VMap[I] = GV;
+  }
+
+  // Loop over the functions in the module, making external functions as before
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *NF =
+      Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+                       GlobalValue::ExternalLinkage, I->getName(), New);
+    NF->copyAttributesFrom(I);
+    VMap[I] = NF;
+  }
+
+  // Loop over the aliases in the module
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+                                  I->getName(), NULL, New);
+  
+  // Now that all of the things that global variable initializer can refer to
+  // have been created, loop through and copy the global variable referrers
+  // over...  We also set the attributes on the global now.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+    if (I->hasInitializer())
+      GV->setInitializer(MapValue(I->getInitializer(), VMap));
+    GV->setLinkage(I->getLinkage());
+    GV->setThreadLocal(I->isThreadLocal());
+    GV->setConstant(I->isConstant());
+  }
+
+  // Similarly, copy over function bodies now...
+  //
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *F = cast<Function>(VMap[I]);
+    if (!I->isDeclaration()) {
+      Function::arg_iterator DestI = F->arg_begin();
+      for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+           ++J) {
+        DestI->setName(J->getName());
+        VMap[J] = DestI++;
+      }
+
+      SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
+      CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+    }
+
+    F->setLinkage(I->getLinkage());
+  }
+
+  // And aliases
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I) {
+    GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+    GA->setLinkage(I->getLinkage());
+    if (const Constant *C = I->getAliasee())
+      GA->setAliasee(MapValue(C, VMap));
+  }
+
+  // And named metadata....
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+         E = M->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode &NMD = *I;
+    NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+    for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+      NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
+  }
+
+  return New;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 000000000000..081352358b95
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,795 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+                 cl::desc("Aggregate arguments to code-extracted functions"));
+
+namespace {
+  class CodeExtractor {
+    typedef SetVector<Value*> Values;
+    SetVector<BasicBlock*> BlocksToExtract;
+    DominatorTree* DT;
+    bool AggregateArgs;
+    unsigned NumExitBlocks;
+    const Type *RetTy;
+  public:
+    CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
+      : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
+
+    Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+
+    bool isEligible(const std::vector<BasicBlock*> &code);
+
+  private:
+    /// definedInRegion - Return true if the specified value is defined in the
+    /// extracted region.
+    bool definedInRegion(Value *V) const {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    /// definedInCaller - Return true if the specified value is defined in the
+    /// function being code extracted, but not in the region being extracted.
+    /// These values must be passed in as live-ins to the function.
+    bool definedInCaller(Value *V) const {
+      if (isa<Argument>(V)) return true;
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (!BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    void severSplitPHINodes(BasicBlock *&Header);
+    void splitReturnBlocks();
+    void findInputsOutputs(Values &inputs, Values &outputs);
+
+    Function *constructFunction(const Values &inputs,
+                                const Values &outputs,
+                                BasicBlock *header,
+                                BasicBlock *newRootNode, BasicBlock *newHeader,
+                                Function *oldFunction, Module *M);
+
+    void moveCodeToFunction(Function *newFunction);
+
+    void emitCallAndSwitchStatement(Function *newFunction,
+                                    BasicBlock *newHeader,
+                                    Values &inputs,
+                                    Values &outputs);
+
+  };
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+  unsigned NumPredsFromRegion = 0;
+  unsigned NumPredsOutsideRegion = 0;
+
+  if (Header != &Header->getParent()->getEntryBlock()) {
+    PHINode *PN = dyn_cast<PHINode>(Header->begin());
+    if (!PN) return;  // No PHI nodes.
+
+    // If the header node contains any PHI nodes, check to see if there is more
+    // than one entry from outside the region.  If so, we need to sever the
+    // header block into two.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+        ++NumPredsFromRegion;
+      else
+        ++NumPredsOutsideRegion;
+
+    // If there is one (or fewer) predecessor from outside the region, we don't
+    // need to do anything special.
+    if (NumPredsOutsideRegion <= 1) return;
+  }
+
+  // Otherwise, we need to split the header block into two pieces: one
+  // containing PHI nodes merging values from outside of the region, and a
+  // second that contains all of the code for the block and merges back any
+  // incoming values from inside of the region.
+  BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+  BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+                                              Header->getName()+".ce");
+
+  // We only want to code extract the second block now, and it becomes the new
+  // header of the region.
+  BasicBlock *OldPred = Header;
+  BlocksToExtract.remove(OldPred);
+  BlocksToExtract.insert(NewBB);
+  Header = NewBB;
+
+  // Okay, update dominator sets. The blocks that dominate the new one are the
+  // blocks that dominate TIBB plus the new block itself.
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  // Okay, now we need to adjust the PHI nodes and any branches from within the
+  // region to go to the new header block instead of the old header block.
+  if (NumPredsFromRegion) {
+    PHINode *PN = cast<PHINode>(OldPred->begin());
+    // Loop over all of the predecessors of OldPred that are in the region,
+    // changing them to branch to NewBB instead.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+        TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+        TI->replaceUsesOfWith(OldPred, NewBB);
+      }
+
+    // Okay, everything within the region is now branching to the right block, we
+    // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+    for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+      PHINode *PN = cast<PHINode>(AfterPHIs);
+      // Create a new PHI node in the new region, which has an incoming value
+      // from OldPred of PN.
+      PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+                                       PN->getName()+".ce", NewBB->begin());
+      NewPN->addIncoming(PN, OldPred);
+
+      // Loop over all of the incoming value in PN, moving them to NewPN if they
+      // are from the extracted region.
+      for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+        if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+          NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+          PN->removeIncomingValue(i);
+          --i;
+        }
+      }
+    }
+  }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+  for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+         E = BlocksToExtract.end(); I != E; ++I)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+      if (DT) {
+        // Old dominates New. New node dominates all other nodes dominated
+        // by Old.
+        DomTreeNode *OldNode = DT->getNode(*I);
+        SmallVector<DomTreeNode*, 8> Children;
+        for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+             DI != DE; ++DI) 
+          Children.push_back(*DI);
+
+        DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+               E = Children.end(); I != E; ++I) 
+          DT->changeImmediateDominator(*I, NewNode);
+      }
+    }
+}
+
+// findInputsOutputs - Find inputs to, outputs from the code region.
+//
+void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
+  std::set<BasicBlock*> ExitBlocks;
+  for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+       ce = BlocksToExtract.end(); ci != ce; ++ci) {
+    BasicBlock *BB = *ci;
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // If a used value is defined outside the region, it's an input.  If an
+      // instruction is used outside the region, it's an output.
+      for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
+        if (definedInCaller(*O))
+          inputs.insert(*O);
+
+      // Consider uses of this instruction (outputs).
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI)
+        if (!definedInRegion(*UI)) {
+          outputs.insert(I);
+          break;
+        }
+    } // for: insts
+
+    // Keep track of the exit blocks from the region.
+    TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i)))
+        ExitBlocks.insert(TI->getSuccessor(i));
+  } // for: basic blocks
+
+  NumExitBlocks = ExitBlocks.size();
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const Values &inputs,
+                                           const Values &outputs,
+                                           BasicBlock *header,
+                                           BasicBlock *newRootNode,
+                                           BasicBlock *newHeader,
+                                           Function *oldFunction,
+                                           Module *M) {
+  DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+  DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+  // This function returns unsigned, outputs will go back by reference.
+  switch (NumExitBlocks) {
+  case 0:
+  case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+  case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+  default: RetTy = Type::getInt16Ty(header->getContext()); break;
+  }
+
+  std::vector<Type*> paramTy;
+
+  // Add the types of the input values to the function's argument list
+  for (Values::const_iterator i = inputs.begin(),
+         e = inputs.end(); i != e; ++i) {
+    const Value *value = *i;
+    DEBUG(dbgs() << "value used in func: " << *value << "\n");
+    paramTy.push_back(value->getType());
+  }
+
+  // Add the types of the output values to the function's argument list.
+  for (Values::const_iterator I = outputs.begin(), E = outputs.end();
+       I != E; ++I) {
+    DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+    if (AggregateArgs)
+      paramTy.push_back((*I)->getType());
+    else
+      paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+  }
+
+  DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
+  for (std::vector<Type*>::iterator i = paramTy.begin(),
+         e = paramTy.end(); i != e; ++i)
+    DEBUG(dbgs() << **i << ", ");
+  DEBUG(dbgs() << ")\n");
+
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    PointerType *StructPtr =
+           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+    paramTy.clear();
+    paramTy.push_back(StructPtr);
+  }
+  const FunctionType *funcType =
+                  FunctionType::get(RetTy, paramTy, false);
+
+  // Create the new function
+  Function *newFunction = Function::Create(funcType,
+                                           GlobalValue::InternalLinkage,
+                                           oldFunction->getName() + "_" +
+                                           header->getName(), M);
+  // If the old function is no-throw, so is the new one.
+  if (oldFunction->doesNotThrow())
+    newFunction->setDoesNotThrow(true);
+  
+  newFunction->getBasicBlockList().push_back(newRootNode);
+
+  // Create an iterator to name all of the arguments we inserted.
+  Function::arg_iterator AI = newFunction->arg_begin();
+
+  // Rewrite all users of the inputs in the extracted region to use the
+  // arguments (or appropriate addressing into struct) instead.
+  for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+    Value *RewriteVal;
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+      TerminatorInst *TI = newFunction->begin()->getTerminator();
+      GetElementPtrInst *GEP = 
+        GetElementPtrInst::Create(AI, Idx, Idx+2, 
+                                  "gep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+    } else
+      RewriteVal = AI++;
+
+    std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+    for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+         use != useE; ++use)
+      if (Instruction* inst = dyn_cast<Instruction>(*use))
+        if (BlocksToExtract.count(inst->getParent()))
+          inst->replaceUsesOfWith(inputs[i], RewriteVal);
+  }
+
+  // Set names for input and output arguments.
+  if (!AggregateArgs) {
+    AI = newFunction->arg_begin();
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+      AI->setName(inputs[i]->getName());
+    for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+      AI->setName(outputs[i]->getName()+".out");
+  }
+
+  // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+  // within the new function. This must be done before we lose track of which
+  // blocks were originally in the code region.
+  std::vector<User*> Users(header->use_begin(), header->use_end());
+  for (unsigned i = 0, e = Users.size(); i != e; ++i)
+    // The BasicBlock which contains the branch is not in the region
+    // modify the branch target to a new block
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+      if (!BlocksToExtract.count(TI->getParent()) &&
+          TI->getParent()->getParent() == oldFunction)
+        TI->replaceUsesOfWith(header, newHeader);
+
+  return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+  for (Value::use_iterator UI = Used->use_begin(),
+       UE = Used->use_end(); UI != UE; ++UI) {
+     PHINode *P = dyn_cast<PHINode>(*UI);
+     if (P && P->getParent() == BB)
+       return P->getIncomingBlock(UI);
+  }
+  
+  return 0;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+                           Values &inputs, Values &outputs) {
+  // Emit a call to the new function, passing in: *pointer to struct (if
+  // aggregating parameters), or plan inputs and allocated memory for outputs
+  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+  
+  LLVMContext &Context = newFunction->getContext();
+
+  // Add inputs as params, or to be filled into the struct
+  for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+    if (AggregateArgs)
+      StructValues.push_back(*i);
+    else
+      params.push_back(*i);
+
+  // Create allocas for the outputs
+  for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+    if (AggregateArgs) {
+      StructValues.push_back(*i);
+    } else {
+      AllocaInst *alloca =
+        new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+                       codeReplacer->getParent()->begin()->begin());
+      ReloadOutputs.push_back(alloca);
+      params.push_back(alloca);
+    }
+  }
+
+  AllocaInst *Struct = 0;
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    std::vector<Type*> ArgTypes;
+    for (Values::iterator v = StructValues.begin(),
+           ve = StructValues.end(); v != ve; ++v)
+      ArgTypes.push_back((*v)->getType());
+
+    // Allocate a struct at the beginning of this function
+    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+    Struct =
+      new AllocaInst(StructArgTy, 0, "structArg",
+                     codeReplacer->getParent()->begin()->begin());
+    params.push_back(Struct);
+
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+      GetElementPtrInst *GEP =
+        GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+                                  "gep_" + StructValues[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      StoreInst *SI = new StoreInst(StructValues[i], GEP);
+      codeReplacer->getInstList().push_back(SI);
+    }
+  }
+
+  // Emit the call to the function
+  CallInst *call = CallInst::Create(newFunction, params,
+                                    NumExitBlocks > 1 ? "targetBlock" : "");
+  codeReplacer->getInstList().push_back(call);
+
+  Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+  unsigned FirstOut = inputs.size();
+  if (!AggregateArgs)
+    std::advance(OutputArgBegin, inputs.size());
+
+  // Reload the outputs passed in by reference
+  for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+    Value *Output = 0;
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+      GetElementPtrInst *GEP
+        = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+                                    "gep_reload_" + outputs[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      Output = GEP;
+    } else {
+      Output = ReloadOutputs[i];
+    }
+    LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    Reloads.push_back(load);
+    codeReplacer->getInstList().push_back(load);
+    std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+    for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+      Instruction *inst = cast<Instruction>(Users[u]);
+      if (!BlocksToExtract.count(inst->getParent()))
+        inst->replaceUsesOfWith(outputs[i], load);
+    }
+  }
+
+  // Now we can emit a switch statement using the call as a value.
+  SwitchInst *TheSwitch =
+      SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+                         codeReplacer, 0, codeReplacer);
+
+  // Since there may be multiple exits from the original region, make the new
+  // function return an unsigned, switch on that number.  This loop iterates
+  // over all of the blocks in the extracted region, updating any terminator
+  // instructions in the to-be-extracted region that branch to blocks that are
+  // not in the region to be extracted.
+  std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+  unsigned switchVal = 0;
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    TerminatorInst *TI = (*i)->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i))) {
+        BasicBlock *OldTarget = TI->getSuccessor(i);
+        // add a new basic block which returns the appropriate value
+        BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+        if (!NewTarget) {
+          // If we don't already have an exit stub for this non-extracted
+          // destination, create one now!
+          NewTarget = BasicBlock::Create(Context,
+                                         OldTarget->getName() + ".exitStub",
+                                         newFunction);
+          unsigned SuccNum = switchVal++;
+
+          Value *brVal = 0;
+          switch (NumExitBlocks) {
+          case 0:
+          case 1: break;  // No value needed.
+          case 2:         // Conditional branch, return a bool
+            brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+            break;
+          default:
+            brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+            break;
+          }
+
+          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+          // Update the switch instruction.
+          TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+                                              SuccNum),
+                             OldTarget);
+
+          // Restore values just before we exit
+          Function::arg_iterator OAI = OutputArgBegin;
+          for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+            // For an invoke, the normal destination is the only one that is
+            // dominated by the result of the invocation
+            BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+            bool DominatesDef = true;
+
+            if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+              DefBlock = Invoke->getNormalDest();
+
+              // Make sure we are looking at the original successor block, not
+              // at a newly inserted exit block, which won't be in the dominator
+              // info.
+              for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+                     ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+                if (DefBlock == I->second) {
+                  DefBlock = I->first;
+                  break;
+                }
+
+              // In the extract block case, if the block we are extracting ends
+              // with an invoke instruction, make sure that we don't emit a
+              // store of the invoke value for the unwind block.
+              if (!DT && DefBlock != OldTarget)
+                DominatesDef = false;
+            }
+
+            if (DT) {
+              DominatesDef = DT->dominates(DefBlock, OldTarget);
+              
+              // If the output value is used by a phi in the target block,
+              // then we need to test for dominance of the phi's predecessor
+              // instead.  Unfortunately, this a little complicated since we
+              // have already rewritten uses of the value to uses of the reload.
+              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], 
+                                                          OldTarget);
+              if (pred && DT && DT->dominates(DefBlock, pred))
+                DominatesDef = true;
+            }
+
+            if (DominatesDef) {
+              if (AggregateArgs) {
+                Value *Idx[2];
+                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+                                          FirstOut+out);
+                GetElementPtrInst *GEP =
+                  GetElementPtrInst::Create(OAI, Idx, Idx + 2,
+                                            "gep_" + outputs[out]->getName(),
+                                            NTRet);
+                new StoreInst(outputs[out], GEP, NTRet);
+              } else {
+                new StoreInst(outputs[out], OAI, NTRet);
+              }
+            }
+            // Advance output iterator even if we don't emit a store
+            if (!AggregateArgs) ++OAI;
+          }
+        }
+
+        // rewrite the original branch instruction with this new target
+        TI->setSuccessor(i, NewTarget);
+      }
+  }
+
+  // Now that we've done the deed, simplify the switch instruction.
+  const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+  switch (NumExitBlocks) {
+  case 0:
+    // There are no successors (the block containing the switch itself), which
+    // means that previously this was the last part of the function, and hence
+    // this should be rewritten as a `ret'
+
+    // Check if the function should return a value
+    if (OldFnRetTy->isVoidTy()) {
+      ReturnInst::Create(Context, 0, TheSwitch);  // Return void
+    } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+      // return what we have
+      ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+    } else {
+      // Otherwise we must have code extracted an unwind or something, just
+      // return whatever we want.
+      ReturnInst::Create(Context, 
+                         Constant::getNullValue(OldFnRetTy), TheSwitch);
+    }
+
+    TheSwitch->eraseFromParent();
+    break;
+  case 1:
+    // Only a single destination, change the switch into an unconditional
+    // branch.
+    BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+    TheSwitch->eraseFromParent();
+    break;
+  case 2:
+    BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+                       call, TheSwitch);
+    TheSwitch->eraseFromParent();
+    break;
+  default:
+    // Otherwise, make the default destination of the switch instruction be one
+    // of the other successors.
+    TheSwitch->setOperand(0, call);
+    TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
+    TheSwitch->removeCase(NumExitBlocks);  // Remove redundant case
+    break;
+  }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+  Function *oldFunc = (*BlocksToExtract.begin())->getParent();
+  Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+  Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    // Delete the basic block from the old function, and the list of blocks
+    oldBlocks.remove(*i);
+
+    // Insert this basic block into the new function
+    newBlocks.push_back(*i);
+  }
+}
+
+/// ExtractRegion - Removes a loop from a function, replaces it with a call to
+/// new function. Returns pointer to the new function.
+///
+/// algorithm:
+///
+/// find inputs and outputs for the region
+///
+/// for inputs: add to function as args, map input instr* to arg#
+/// for outputs: add allocas for scalars,
+///             add to func as args, map output instr* to arg#
+///
+/// rewrite func to use argument #s instead of instr*
+///
+/// for each scalar output in the function: at every exit, store intermediate
+/// computed result back into memory.
+///
+Function *CodeExtractor::
+ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+  if (!isEligible(code))
+    return 0;
+
+  // 1) Find inputs, outputs
+  // 2) Construct new function
+  //  * Add allocas for defs, pass as args by reference
+  //  * Pass in uses as args
+  // 3) Move code region, add call instr to func
+  //
+  BlocksToExtract.insert(code.begin(), code.end());
+
+  Values inputs, outputs;
+
+  // Assumption: this is a single-entry code region, and the header is the first
+  // block in the region.
+  BasicBlock *header = code[0];
+
+  for (unsigned i = 1, e = code.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]);
+         PI != E; ++PI)
+      assert(BlocksToExtract.count(*PI) &&
+             "No blocks in this region may have entries from outside the region"
+             " except for the first block!");
+
+  // If we have to split PHI nodes or the entry block, do so now.
+  severSplitPHINodes(header);
+
+  // If we have any return instructions in the region, split those blocks so
+  // that the return is not in the region.
+  splitReturnBlocks();
+
+  Function *oldFunction = header->getParent();
+
+  // This takes place of the original loop
+  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
+                                                "codeRepl", oldFunction,
+                                                header);
+
+  // The new function needs a root node because other nodes can branch to the
+  // head of the region, but the entry node of a function cannot have preds.
+  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
+                                               "newFuncRoot");
+  newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+  // Find inputs to, outputs from the code region.
+  findInputsOutputs(inputs, outputs);
+
+  // Construct new function based on inputs/outputs & add allocas for all defs.
+  Function *newFunction = constructFunction(inputs, outputs, header,
+                                            newFuncRoot,
+                                            codeReplacer, oldFunction,
+                                            oldFunction->getParent());
+
+  emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+  moveCodeToFunction(newFunction);
+
+  // Loop over all of the PHI nodes in the header block, and change any
+  // references to the old incoming edge to be the new incoming edge.
+  for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BlocksToExtract.count(PN->getIncomingBlock(i)))
+        PN->setIncomingBlock(i, newFuncRoot);
+  }
+
+  // Look at all successors of the codeReplacer block.  If any of these blocks
+  // had PHI nodes in them, we need to update the "from" block to be the code
+  // replacer, not the original block in the extracted region.
+  std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+                                 succ_end(codeReplacer));
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      std::set<BasicBlock*> ProcessedPreds;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+          if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+            PN->setIncomingBlock(i, codeReplacer);
+          else {
+            // There were multiple entries in the PHI for this block, now there
+            // is only one, so remove the duplicated entries.
+            PN->removeIncomingValue(i, false);
+            --i; --e;
+          }
+        }
+    }
+
+  //cerr << "NEW FUNCTION: " << *newFunction;
+  //  verifyFunction(*newFunction);
+
+  //  cerr << "OLD FUNCTION: " << *oldFunction;
+  //  verifyFunction(*oldFunction);
+
+  DEBUG(if (verifyFunction(*newFunction)) 
+        report_fatal_error("verifyFunction failed!"));
+  return newFunction;
+}
+
+bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+  // Deny code region if it contains allocas or vastarts.
+  for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+       BB != e; ++BB)
+    for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
+         I != Ie; ++I)
+      if (isa<AllocaInst>(*I))
+        return false;
+      else if (const CallInst *CI = dyn_cast<CallInst>(I))
+        if (const Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::vastart)
+            return false;
+  return true;
+}
+
+
+/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
+/// function
+///
+Function* llvm::ExtractCodeRegion(DominatorTree &DT,
+                                  const std::vector<BasicBlock*> &code,
+                                  bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
+}
+
+/// ExtractBasicBlock - slurp a natural loop into a brand new function
+///
+Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
+}
+
+/// ExtractBasicBlock - slurp a basic block into a brand new function
+///
+Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
+  std::vector<BasicBlock*> Blocks;
+  Blocks.push_back(BB);
+  return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 000000000000..8cc26492c292
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,146 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide the function DemoteRegToStack().  This function takes a
+// virtual register computed by an Instruction and replaces it with a slot in
+// the stack frame, allocated via alloca. It returns the pointer to the
+// AllocaInst inserted.  After this function is called on an instruction, we are
+// guaranteed that the only user of the instruction is a store that is
+// immediately after it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include <map>
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca.  This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value.  It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+///
+AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+                                   Instruction *AllocaPoint) {
+  if (I.use_empty()) {
+    I.eraseFromParent();
+    return 0;
+  }
+
+  // Create a stack slot to hold the value.
+  AllocaInst *Slot;
+  if (AllocaPoint) {
+    Slot = new AllocaInst(I.getType(), 0,
+                          I.getName()+".reg2mem", AllocaPoint);
+  } else {
+    Function *F = I.getParent()->getParent();
+    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+                          F->getEntryBlock().begin());
+  }
+
+  // Change all of the users of the instruction to read from the stack slot
+  // instead.
+  while (!I.use_empty()) {
+    Instruction *U = cast<Instruction>(I.use_back());
+    if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      // If this is a PHI node, we can't insert a load of the value before the
+      // use.  Instead, insert the load in the predecessor block corresponding
+      // to the incoming value.
+      //
+      // Note that if there are multiple edges from a basic block to this PHI
+      // node that we cannot multiple loads.  The problem is that the resultant
+      // PHI node will have multiple values (from each load) coming in from the
+      // same block, which is illegal SSA form.  For this reason, we keep track
+      // and reuse loads we insert.
+      std::map<BasicBlock*, Value*> Loads;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == &I) {
+          Value *&V = Loads[PN->getIncomingBlock(i)];
+          if (V == 0) {
+            // Insert the load into the predecessor block
+            V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+                             PN->getIncomingBlock(i)->getTerminator());
+          }
+          PN->setIncomingValue(i, V);
+        }
+
+    } else {
+      // If this is a normal instruction, just insert a load.
+      Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+      U->replaceUsesOfWith(&I, V);
+    }
+  }
+
+
+  // Insert stores of the computed value into the stack slot.  We have to be
+  // careful is I is an invoke instruction though, because we can't insert the
+  // store AFTER the terminator instruction.
+  BasicBlock::iterator InsertPt;
+  if (!isa<TerminatorInst>(I)) {
+    InsertPt = &I;
+    ++InsertPt;
+  } else {
+    // We cannot demote invoke instructions to the stack if their normal edge
+    // is critical.
+    InvokeInst &II = cast<InvokeInst>(I);
+    assert(II.getNormalDest()->getSinglePredecessor() &&
+           "Cannot demote invoke with a critical successor!");
+    InsertPt = II.getNormalDest()->begin();
+  }
+
+  for (; isa<PHINode>(InsertPt); ++InsertPt)
+  /* empty */;   // Don't insert before any PHI nodes.
+  new StoreInst(&I, Slot, InsertPt);
+
+  return Slot;
+}
+
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted.
+AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+  if (P->use_empty()) {
+    P->eraseFromParent();
+    return 0;
+  }
+
+  // Create a stack slot to hold the value.
+  AllocaInst *Slot;
+  if (AllocaPoint) {
+    Slot = new AllocaInst(P->getType(), 0,
+                          P->getName()+".reg2mem", AllocaPoint);
+  } else {
+    Function *F = P->getParent()->getParent();
+    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+                          F->getEntryBlock().begin());
+  }
+
+  // Iterate over each operand, insert store in each predecessor.
+  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+      assert(II->getParent() != P->getIncomingBlock(i) &&
+             "Invoke edge not supported yet"); (void)II;
+    }
+    new StoreInst(P->getIncomingValue(i), Slot,
+                  P->getIncomingBlock(i)->getTerminator());
+  }
+
+  // Insert load in place of the phi and replace all uses.
+  Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+  P->replaceAllUsesWith(V);
+
+  // Delete phi.
+  P->eraseFromParent();
+
+  return Slot;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 000000000000..d5b382e55e5c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,1182 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+// The code in this file for handling inlines through invoke
+// instructions preserves semantics only under some assumptions about
+// the behavior of unwinders which correspond to gcc-style libUnwind
+// exception personality functions.  Eventually the IR will be
+// improved to make this unnecessary, but until then, this code is
+// marked [LIBUNWIND].
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Attributes.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(CI), IFI);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(II), IFI);
+}
+
+/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
+static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) {
+  for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) {
+    EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i);
+    if (exn) return exn;
+  }
+
+  return 0;
+}
+
+/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for
+/// the given llvm.eh.exception call.
+static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) {
+  BasicBlock *exnBlock = exn->getParent();
+
+  EHSelectorInst *outOfBlockSelector = 0;
+  for (Instruction::use_iterator
+         ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) {
+    EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui);
+    if (!sel) continue;
+
+    // Immediately accept an eh.selector in the same block as the
+    // excepton call.
+    if (sel->getParent() == exnBlock) return sel;
+
+    // Otherwise, use the first selector we see.
+    if (!outOfBlockSelector) outOfBlockSelector = sel;
+  }
+
+  return outOfBlockSelector;
+}
+
+/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector
+/// in the given landing pad.  In principle, llvm.eh.exception is
+/// required to be in the landing pad; in practice, SplitCriticalEdge
+/// can break that invariant, and then inlining can break it further.
+/// There's a real need for a reliable solution here, but until that
+/// happens, we have some fragile workarounds here.
+static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) {
+  // Look for an exception call in the actual landing pad.
+  EHExceptionInst *exn = findExceptionInBlock(lpad);
+  if (exn) return findSelectorForException(exn);
+
+  // Okay, if that failed, look for one in an obvious successor.  If
+  // we find one, we'll fix the IR by moving things back to the
+  // landing pad.
+
+  bool dominates = true; // does the lpad dominate the exn call
+  BasicBlock *nonDominated = 0; // if not, the first non-dominated block
+  BasicBlock *lastDominated = 0; // and the block which branched to it
+
+  BasicBlock *exnBlock = lpad;
+
+  // We need to protect against lpads that lead into infinite loops.
+  SmallPtrSet<BasicBlock*,4> visited;
+  visited.insert(exnBlock);
+
+  do {
+    // We're not going to apply this hack to anything more complicated
+    // than a series of unconditional branches, so if the block
+    // doesn't terminate in an unconditional branch, just fail.  More
+    // complicated cases can arise when, say, sinking a call into a
+    // split unwind edge and then inlining it; but that can do almost
+    // *anything* to the CFG, including leaving the selector
+    // completely unreachable.  The only way to fix that properly is
+    // to (1) prohibit transforms which move the exception or selector
+    // values away from the landing pad, e.g. by producing them with
+    // instructions that are pinned to an edge like a phi, or
+    // producing them with not-really-instructions, and (2) making
+    // transforms which split edges deal with that.
+    BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back());
+    if (!branch || branch->isConditional()) return 0;
+
+    BasicBlock *successor = branch->getSuccessor(0);
+
+    // Fail if we found an infinite loop.
+    if (!visited.insert(successor)) return 0;
+
+    // If the successor isn't dominated by exnBlock:
+    if (!successor->getSinglePredecessor()) {
+      // We don't want to have to deal with threading the exception
+      // through multiple levels of phi, so give up if we've already
+      // followed a non-dominating edge.
+      if (!dominates) return 0;
+
+      // Otherwise, remember this as a non-dominating edge.
+      dominates = false;
+      nonDominated = successor;
+      lastDominated = exnBlock;
+    }
+
+    exnBlock = successor;
+
+    // Can we stop here?
+    exn = findExceptionInBlock(exnBlock);
+  } while (!exn);
+
+  // Look for a selector call for the exception we found.
+  EHSelectorInst *selector = findSelectorForException(exn);
+  if (!selector) return 0;
+
+  // The easy case is when the landing pad still dominates the
+  // exception call, in which case we can just move both calls back to
+  // the landing pad.
+  if (dominates) {
+    selector->moveBefore(lpad->getFirstNonPHI());
+    exn->moveBefore(selector);
+    return selector;
+  }
+
+  // Otherwise, we have to split at the first non-dominating block.
+  // The CFG looks basically like this:
+  //    lpad:
+  //      phis_0
+  //      insnsAndBranches_1
+  //      br label %nonDominated
+  //    nonDominated:
+  //      phis_2
+  //      insns_3
+  //      %exn = call i8* @llvm.eh.exception()
+  //      insnsAndBranches_4
+  //      %selector = call @llvm.eh.selector(i8* %exn, ...
+  // We need to turn this into:
+  //    lpad:
+  //      phis_0
+  //      %exn0 = call i8* @llvm.eh.exception()
+  //      %selector0 = call @llvm.eh.selector(i8* %exn0, ...
+  //      insnsAndBranches_1
+  //      br label %split // from lastDominated
+  //    nonDominated:
+  //      phis_2 (without edge from lastDominated)
+  //      %exn1 = call i8* @llvm.eh.exception()
+  //      %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ...
+  //      br label %split
+  //    split:
+  //      phis_2 (edge from lastDominated, edge from split)
+  //      %exn = phi ...
+  //      %selector = phi ...
+  //      insns_3
+  //      insnsAndBranches_4
+
+  assert(nonDominated);
+  assert(lastDominated);
+
+  // First, make clones of the intrinsics to go in lpad.
+  EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone());
+  EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone());
+  lpadSelector->setArgOperand(0, lpadExn);
+  lpadSelector->insertBefore(lpad->getFirstNonPHI());
+  lpadExn->insertBefore(lpadSelector);
+
+  // Split the non-dominated block.
+  BasicBlock *split =
+    nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(),
+                                  nonDominated->getName() + ".lpad-fix");
+
+  // Redirect the last dominated branch there.
+  cast<BranchInst>(lastDominated->back()).setSuccessor(0, split);
+
+  // Move the existing intrinsics to the end of the old block.
+  selector->moveBefore(&nonDominated->back());
+  exn->moveBefore(selector);
+
+  Instruction *splitIP = &split->front();
+
+  // For all the phis in nonDominated, make a new phi in split to join
+  // that phi with the edge from lastDominated.
+  for (BasicBlock::iterator
+         i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) {
+    PHINode *phi = dyn_cast<PHINode>(i);
+    if (!phi) break;
+
+    PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(),
+                                        splitIP);
+    phi->replaceAllUsesWith(splitPhi);
+    splitPhi->addIncoming(phi, nonDominated);
+    splitPhi->addIncoming(phi->removeIncomingValue(lastDominated),
+                          lastDominated);
+  }
+
+  // Make new phis for the exception and selector.
+  PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP);
+  exn->replaceAllUsesWith(exnPhi);
+  selector->setArgOperand(0, exn); // except for this use
+  exnPhi->addIncoming(exn, nonDominated);
+  exnPhi->addIncoming(lpadExn, lastDominated);
+
+  PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP);
+  selector->replaceAllUsesWith(selectorPhi);
+  selectorPhi->addIncoming(selector, nonDominated);
+  selectorPhi->addIncoming(lpadSelector, lastDominated);
+
+  return lpadSelector;
+}
+
+namespace {
+  /// A class for recording information about inlining through an invoke.
+  class InvokeInliningInfo {
+    BasicBlock *OuterUnwindDest;
+    EHSelectorInst *OuterSelector;
+    BasicBlock *InnerUnwindDest;
+    PHINode *InnerExceptionPHI;
+    PHINode *InnerSelectorPHI;
+    SmallVector<Value*, 8> UnwindDestPHIValues;
+
+  public:
+    InvokeInliningInfo(InvokeInst *II) :
+      OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
+      InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) {
+
+      // If there are PHI nodes in the unwind destination block, we
+      // need to keep track of which values came into them from the
+      // invoke before removing the edge from this block.
+      llvm::BasicBlock *invokeBB = II->getParent();
+      for (BasicBlock::iterator I = OuterUnwindDest->begin();
+             isa<PHINode>(I); ++I) {
+        // Save the value to use for this edge.
+        PHINode *phi = cast<PHINode>(I);
+        UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB));
+      }
+    }
+
+    /// The outer unwind destination is the target of unwind edges
+    /// introduced for calls within the inlined function.
+    BasicBlock *getOuterUnwindDest() const {
+      return OuterUnwindDest;
+    }
+
+    EHSelectorInst *getOuterSelector() {
+      if (!OuterSelector)
+        OuterSelector = findSelectorForLandingPad(OuterUnwindDest);
+      return OuterSelector;
+    }
+
+    BasicBlock *getInnerUnwindDest();
+
+    bool forwardEHResume(CallInst *call, BasicBlock *src);
+
+    /// Add incoming-PHI values to the unwind destination block for
+    /// the given basic block, using the values for the original
+    /// invoke's source block.
+    void addIncomingPHIValuesFor(BasicBlock *BB) const {
+      addIncomingPHIValuesForInto(BB, OuterUnwindDest);
+    }
+
+    void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
+      BasicBlock::iterator I = dest->begin();
+      for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+        PHINode *phi = cast<PHINode>(I);
+        phi->addIncoming(UnwindDestPHIValues[i], src);
+      }
+    }
+  };
+}
+
+/// Get or create a target for the branch out of rewritten calls to
+/// llvm.eh.resume.
+BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
+  if (InnerUnwindDest) return InnerUnwindDest;
+
+  // Find and hoist the llvm.eh.exception and llvm.eh.selector calls
+  // in the outer landing pad to immediately following the phis.
+  EHSelectorInst *selector = getOuterSelector();
+  if (!selector) return 0;
+
+  // The call to llvm.eh.exception *must* be in the landing pad.
+  Instruction *exn = cast<Instruction>(selector->getArgOperand(0));
+  assert(exn->getParent() == OuterUnwindDest);
+
+  // TODO: recognize when we've already done this, so that we don't
+  // get a linear number of these when inlining calls into lots of
+  // invokes with the same landing pad.
+
+  // Do the hoisting.
+  Instruction *splitPoint = exn->getParent()->getFirstNonPHI();
+  assert(splitPoint != selector && "selector-on-exception dominance broken!");
+  if (splitPoint == exn) {
+    selector->removeFromParent();
+    selector->insertAfter(exn);
+    splitPoint = selector->getNextNode();
+  } else {
+    exn->moveBefore(splitPoint);
+    selector->moveBefore(splitPoint);
+  }
+
+  // Split the landing pad.
+  InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint,
+                                        OuterUnwindDest->getName() + ".body");
+
+  // The number of incoming edges we expect to the inner landing pad.
+  const unsigned phiCapacity = 2;
+
+  // Create corresponding new phis for all the phis in the outer landing pad.
+  BasicBlock::iterator insertPoint = InnerUnwindDest->begin();
+  BasicBlock::iterator I = OuterUnwindDest->begin();
+  for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+    PHINode *outerPhi = cast<PHINode>(I);
+    PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity,
+                                        outerPhi->getName() + ".lpad-body",
+                                        insertPoint);
+    outerPhi->replaceAllUsesWith(innerPhi);
+    innerPhi->addIncoming(outerPhi, OuterUnwindDest);
+  }
+
+  // Create a phi for the exception value...
+  InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity,
+                                      "exn.lpad-body", insertPoint);
+  exn->replaceAllUsesWith(InnerExceptionPHI);
+  selector->setArgOperand(0, exn); // restore this use
+  InnerExceptionPHI->addIncoming(exn, OuterUnwindDest);
+
+  // ...and the selector.
+  InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity,
+                                     "selector.lpad-body", insertPoint);
+  selector->replaceAllUsesWith(InnerSelectorPHI);
+  InnerSelectorPHI->addIncoming(selector, OuterUnwindDest);
+
+  // All done.
+  return InnerUnwindDest;
+}
+
+/// [LIBUNWIND] Try to forward the given call, which logically occurs
+/// at the end of the given block, as a branch to the inner unwind
+/// block.  Returns true if the call was forwarded.
+bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
+  // First, check whether this is a call to the intrinsic.
+  Function *fn = dyn_cast<Function>(call->getCalledValue());
+  if (!fn || fn->getName() != "llvm.eh.resume")
+    return false;
+  
+  // At this point, we need to return true on all paths, because
+  // otherwise we'll construct an invoke of the intrinsic, which is
+  // not well-formed.
+
+  // Try to find or make an inner unwind dest, which will fail if we
+  // can't find a selector call for the outer unwind dest.
+  BasicBlock *dest = getInnerUnwindDest();
+  bool hasSelector = (dest != 0);
+
+  // If we failed, just use the outer unwind dest, dropping the
+  // exception and selector on the floor.
+  if (!hasSelector)
+    dest = OuterUnwindDest;
+
+  // Make a branch.
+  BranchInst::Create(dest, src);
+
+  // Update the phis in the destination.  They were inserted in an
+  // order which makes this work.
+  addIncomingPHIValuesForInto(src, dest);
+
+  if (hasSelector) {
+    InnerExceptionPHI->addIncoming(call->getArgOperand(0), src);
+    InnerSelectorPHI->addIncoming(call->getArgOperand(1), src);
+  }
+
+  return true;
+}
+
+/// [LIBUNWIND] Check whether this selector is "only cleanups":
+///   call i32 @llvm.eh.selector(blah, blah, i32 0)
+static bool isCleanupOnlySelector(EHSelectorInst *selector) {
+  if (selector->getNumArgOperands() != 3) return false;
+  ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2));
+  return (val && val->isZero());
+}
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes.  This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+/// Returns true to indicate that the next block should be skipped.
+static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+                                                   InvokeInliningInfo &Invoke) {
+  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+    Instruction *I = BBI++;
+    
+    // We only need to check for function calls: inlined invoke
+    // instructions require no special handling.
+    CallInst *CI = dyn_cast<CallInst>(I);
+    if (CI == 0) continue;
+
+    // LIBUNWIND: merge selector instructions.
+    if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) {
+      EHSelectorInst *Outer = Invoke.getOuterSelector();
+      if (!Outer) continue;
+
+      bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner);
+      bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer);
+
+      // If both selectors contain only cleanups, we don't need to do
+      // anything.  TODO: this is really just a very specific instance
+      // of a much more general optimization.
+      if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue;
+
+      // Otherwise, we just append the outer selector to the inner selector.
+      SmallVector<Value*, 16> NewSelector;
+      for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i)
+        NewSelector.push_back(Inner->getArgOperand(i));
+      for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
+        NewSelector.push_back(Outer->getArgOperand(i));
+
+      CallInst *NewInner =
+        IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
+      // No need to copy attributes, calling convention, etc.
+      NewInner->takeName(Inner);
+      Inner->replaceAllUsesWith(NewInner);
+      Inner->eraseFromParent();
+      continue;
+    }
+    
+    // If this call cannot unwind, don't convert it to an invoke.
+    if (CI->doesNotThrow())
+      continue;
+    
+    // Convert this function call into an invoke instruction.
+    // First, split the basic block.
+    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+
+    // Delete the unconditional branch inserted by splitBasicBlock
+    BB->getInstList().pop_back();
+
+    // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch
+    // directly to the new landing pad.
+    if (Invoke.forwardEHResume(CI, BB)) {
+      // TODO: 'Split' is now unreachable; clean it up.
+
+      // We want to leave the original call intact so that the call
+      // graph and other structures won't get misled.  We also have to
+      // avoid processing the next block, or we'll iterate here forever.
+      return true;
+    }
+
+    // Otherwise, create the new invoke instruction.
+    ImmutableCallSite CS(CI);
+    SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
+    InvokeInst *II =
+      InvokeInst::Create(CI->getCalledValue(), Split,
+                         Invoke.getOuterUnwindDest(),
+                         InvokeArgs, CI->getName(), BB);
+    II->setCallingConv(CI->getCallingConv());
+    II->setAttributes(CI->getAttributes());
+    
+    // Make sure that anything using the call now uses the invoke!  This also
+    // updates the CallGraph if present, because it uses a WeakVH.
+    CI->replaceAllUsesWith(II);
+
+    Split->getInstList().pop_front();  // Delete the original call
+
+    // Update any PHI nodes in the exceptional block to indicate that
+    // there is now a new entry in them.
+    Invoke.addIncomingPHIValuesFor(BB);
+    return false;
+  }
+
+  return false;
+}
+  
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes and turn unwind
+/// instructions into branches to the invoke unwind dest.
+///
+/// II is the invoke instruction being inlined.  FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+                                ClonedCodeInfo &InlinedCodeInfo) {
+  BasicBlock *InvokeDest = II->getUnwindDest();
+
+  Function *Caller = FirstNewBlock->getParent();
+
+  // The inlined code is currently at the end of the function, scan from the
+  // start of the inlined code to its end, checking for stuff we need to
+  // rewrite.  If the code doesn't have calls or unwinds, we know there is
+  // nothing to rewrite.
+  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+    // Now that everything is happy, we have one final detail.  The PHI nodes in
+    // the exception destination block still have entries due to the original
+    // invoke instruction.  Eliminate these entries (which might even delete the
+    // PHI node) now.
+    InvokeDest->removePredecessor(II->getParent());
+    return;
+  }
+
+  InvokeInliningInfo Invoke(II);
+  
+  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+    if (InlinedCodeInfo.ContainsCalls)
+      if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
+        // Honor a request to skip the next block.  We don't need to
+        // consider UnwindInsts in this case either.
+        ++BB;
+        continue;
+      }
+
+    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // An UnwindInst requires special handling when it gets inlined into an
+      // invoke site.  Once this happens, we know that the unwind would cause
+      // a control transfer to the invoke exception destination, so we can
+      // transform it into a direct branch to the exception destination.
+      BranchInst::Create(InvokeDest, UI);
+
+      // Delete the unwind instruction!
+      UI->eraseFromParent();
+
+      // Update any PHI nodes in the exceptional block to indicate that
+      // there is now a new entry in them.
+      Invoke.addIncomingPHIValuesFor(BB);
+    }
+  }
+
+  // Now that everything is happy, we have one final detail.  The PHI nodes in
+  // the exception destination block still have entries due to the original
+  // invoke instruction.  Eliminate these entries (which might even delete the
+  // PHI node) now.
+  InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made.  Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+                                         Function::iterator FirstNewBlock,
+                                         ValueToValueMapTy &VMap,
+                                         InlineFunctionInfo &IFI) {
+  CallGraph &CG = *IFI.CG;
+  const Function *Caller = CS.getInstruction()->getParent()->getParent();
+  const Function *Callee = CS.getCalledFunction();
+  CallGraphNode *CalleeNode = CG[Callee];
+  CallGraphNode *CallerNode = CG[Caller];
+
+  // Since we inlined some uninlined call sites in the callee into the caller,
+  // add edges from the caller to all of the callees of the callee.
+  CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+  // Consider the case where CalleeNode == CallerNode.
+  CallGraphNode::CalledFunctionsVector CallCache;
+  if (CalleeNode == CallerNode) {
+    CallCache.assign(I, E);
+    I = CallCache.begin();
+    E = CallCache.end();
+  }
+
+  for (; I != E; ++I) {
+    const Value *OrigCall = I->first;
+
+    ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+    // Only copy the edge if the call was inlined!
+    if (VMI == VMap.end() || VMI->second == 0)
+      continue;
+    
+    // If the call was inlined, but then constant folded, there is no edge to
+    // add.  Check for this case.
+    Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+    if (NewCall == 0) continue;
+
+    // Remember that this call site got inlined for the client of
+    // InlineFunction.
+    IFI.InlinedCalls.push_back(NewCall);
+
+    // It's possible that inlining the callsite will cause it to go from an
+    // indirect to a direct call by resolving a function pointer.  If this
+    // happens, set the callee of the new call site to a more precise
+    // destination.  This can also happen if the call graph node of the caller
+    // was just unnecessarily imprecise.
+    if (I->second->getFunction() == 0)
+      if (Function *F = CallSite(NewCall).getCalledFunction()) {
+        // Indirect call site resolved to direct call.
+        CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+        continue;
+      }
+
+    CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+  }
+  
+  // Update the call graph by deleting the edge from Callee to Caller.  We must
+  // do this after the loop above in case Caller and Callee are the same.
+  CallerNode->removeCallEdgeFor(CS);
+}
+
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+                                  const Function *CalledFunc,
+                                  InlineFunctionInfo &IFI,
+                                  unsigned ByValAlignment) {
+  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+  // If the called function is readonly, then it could not mutate the caller's
+  // copy of the byval'd memory.  In this case, it is safe to elide the copy and
+  // temporary.
+  if (CalledFunc->onlyReadsMemory()) {
+    // If the byval argument has a specified alignment that is greater than the
+    // passed in pointer, then we either have to round up the input pointer or
+    // give up on this transformation.
+    if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
+      return Arg;
+
+    // If the pointer is already known to be sufficiently aligned, or if we can
+    // round it up to a larger alignment, then we don't need a temporary.
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+                                   IFI.TD) >= ByValAlignment)
+      return Arg;
+    
+    // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
+    // for code quality, but rarely happens and is required for correctness.
+  }
+  
+  LLVMContext &Context = Arg->getContext();
+
+  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  
+  // Create the alloca.  If we have TargetData, use nice alignment.
+  unsigned Align = 1;
+  if (IFI.TD)
+    Align = IFI.TD->getPrefTypeAlignment(AggTy);
+  
+  // If the byval had an alignment specified, we *must* use at least that
+  // alignment, as it is required by the byval argument (and uses of the
+  // pointer inside the callee).
+  Align = std::max(Align, ByValAlignment);
+  
+  Function *Caller = TheCall->getParent()->getParent(); 
+  
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+                                    &*Caller->begin()->begin());
+  // Emit a memcpy.
+  Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+                                                 Intrinsic::memcpy, 
+                                                 Tys);
+  Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+  
+  Value *Size;
+  if (IFI.TD == 0)
+    Size = ConstantExpr::getSizeOf(AggTy);
+  else
+    Size = ConstantInt::get(Type::getInt64Ty(Context),
+                            IFI.TD->getTypeStoreSize(AggTy));
+  
+  // Always generate a memcpy of alignment 1 here because we don't know
+  // the alignment of the src pointer.  Other optimizations can infer
+  // better alignment.
+  Value *CallArgs[] = {
+    DestCast, SrcCast, Size,
+    ConstantInt::get(Type::getInt32Ty(Context), 1),
+    ConstantInt::getFalse(Context) // isVolatile
+  };
+  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+  
+  // Uses of the argument in the function should use our new alloca
+  // instead.
+  return NewAlloca;
+}
+
+// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
+// intrinsic.
+static bool isUsedByLifetimeMarker(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
+       ++UI) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::lifetime_start:
+      case Intrinsic::lifetime_end:
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// hasLifetimeMarkers - Check whether the given alloca already has
+// lifetime.start or lifetime.end intrinsics.
+static bool hasLifetimeMarkers(AllocaInst *AI) {
+  const Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
+  if (AI->getType() == Int8PtrTy)
+    return isUsedByLifetimeMarker(AI);
+
+  // Do a scan to find all the casts to i8*.
+  for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
+       ++I) {
+    if (I->getType() != Int8PtrTy) continue;
+    if (I->stripPointerCasts() != AI) continue;
+    if (isUsedByLifetimeMarker(*I))
+      return true;
+  }
+  return false;
+}
+
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
+/// update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, 
+                                    const DebugLoc &InlinedAtDL,
+                                    LLVMContext &Ctx) {
+  if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+    DebugLoc NewInlinedAtDL 
+      = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+    return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                         NewInlinedAtDL.getAsMDNode(Ctx));
+  }
+                                             
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                       InlinedAtDL.getAsMDNode(Ctx));
+}
+
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to 
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+                              Instruction *TheCall) {
+  DebugLoc TheCallDL = TheCall->getDebugLoc();
+  if (TheCallDL.isUnknown())
+    return;
+
+  for (; FI != Fn->end(); ++FI) {
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ++BI) {
+      DebugLoc DL = BI->getDebugLoc();
+      if (!DL.isUnknown())
+        BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+    }
+  }
+}
+
+// InlineFunction - This function inlines the called function into the basic
+// block of the caller.  This returns false if it is not possible to inline this
+// call.  The program is still in a well defined state if this occurs though.
+//
+// Note that this only does one level of inlining.  For example, if the
+// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+// exists in the instruction stream.  Similarly this will inline a recursive
+// function by one level.
+//
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
+  Instruction *TheCall = CS.getInstruction();
+  LLVMContext &Context = TheCall->getContext();
+  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+         "Instruction not in function!");
+
+  // If IFI has any state in it, zap it before we fill it in.
+  IFI.reset();
+  
+  const Function *CalledFunc = CS.getCalledFunction();
+  if (CalledFunc == 0 ||          // Can't inline external function or indirect
+      CalledFunc->isDeclaration() || // call, or call to a vararg function!
+      CalledFunc->getFunctionType()->isVarArg()) return false;
+
+  // If the call to the callee is not a tail call, we must clear the 'tail'
+  // flags on any calls that we inline.
+  bool MustClearTailCallFlags =
+    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+  // If the call to the callee cannot throw, set the 'nounwind' flag on any
+  // calls that we inline.
+  bool MarkNoUnwind = CS.doesNotThrow();
+
+  BasicBlock *OrigBB = TheCall->getParent();
+  Function *Caller = OrigBB->getParent();
+
+  // GC poses two hazards to inlining, which only occur when the callee has GC:
+  //  1. If the caller has no GC, then the callee's GC must be propagated to the
+  //     caller.
+  //  2. If the caller has a differing GC, it is invalid to inline.
+  if (CalledFunc->hasGC()) {
+    if (!Caller->hasGC())
+      Caller->setGC(CalledFunc->getGC());
+    else if (CalledFunc->getGC() != Caller->getGC())
+      return false;
+  }
+
+  // Get an iterator to the last basic block in the function, which will have
+  // the new function inlined after it.
+  //
+  Function::iterator LastBlock = &Caller->back();
+
+  // Make sure to capture all of the return instructions from the cloned
+  // function.
+  SmallVector<ReturnInst*, 8> Returns;
+  ClonedCodeInfo InlinedFunctionInfo;
+  Function::iterator FirstNewBlock;
+
+  { // Scope to destroy VMap after cloning.
+    ValueToValueMapTy VMap;
+
+    assert(CalledFunc->arg_size() == CS.arg_size() &&
+           "No varargs calls can be inlined!");
+
+    // Calculate the vector of arguments to pass into the function cloner, which
+    // matches up the formal to the actual argument values.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    unsigned ArgNo = 0;
+    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+      Value *ActualArg = *AI;
+
+      // When byval arguments actually inlined, we need to make the copy implied
+      // by them explicit.  However, we don't do this if the callee is readonly
+      // or readnone, because the copy would be unneeded: the callee doesn't
+      // modify the struct.
+      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+                                        CalledFunc->getParamAlignment(ArgNo+1));
+ 
+        // Calls that we inline may use the new alloca, so we need to clear
+        // their 'tail' flags if HandleByValArgument introduced a new alloca and
+        // the callee has calls.
+        MustClearTailCallFlags |= ActualArg != *AI;
+      }
+
+      VMap[I] = ActualArg;
+    }
+
+    // We want the inliner to prune the code as it copies.  We would LOVE to
+    // have no dead or constant instructions leftover after inlining occurs
+    // (which can happen, e.g., because an argument was constant), but we'll be
+    // happy with whatever the cloner can do.
+    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
+                              /*ModuleLevelChanges=*/false, Returns, ".i",
+                              &InlinedFunctionInfo, IFI.TD, TheCall);
+
+    // Remember the first block that is newly cloned over.
+    FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+    // Update the callgraph if requested.
+    if (IFI.CG)
+      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+    // Update inlined instructions' line number information.
+    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
+  }
+
+  // If there are any alloca instructions in the block that used to be the entry
+  // block for the callee, move them to the entry block of the caller.  First
+  // calculate which instruction they should be inserted before.  We insert the
+  // instructions at the end of the current alloca list.
+  //
+  {
+    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+    for (BasicBlock::iterator I = FirstNewBlock->begin(),
+         E = FirstNewBlock->end(); I != E; ) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+      if (AI == 0) continue;
+      
+      // If the alloca is now dead, remove it.  This often occurs due to code
+      // specialization.
+      if (AI->use_empty()) {
+        AI->eraseFromParent();
+        continue;
+      }
+
+      if (!isa<Constant>(AI->getArraySize()))
+        continue;
+      
+      // Keep track of the static allocas that we inline into the caller.
+      IFI.StaticAllocas.push_back(AI);
+      
+      // Scan for the block of allocas that we can move over, and move them
+      // all at once.
+      while (isa<AllocaInst>(I) &&
+             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+        ++I;
+      }
+
+      // Transfer all of the allocas over in a block.  Using splice means
+      // that the instructions aren't removed from the symbol table, then
+      // reinserted.
+      Caller->getEntryBlock().getInstList().splice(InsertPoint,
+                                                   FirstNewBlock->getInstList(),
+                                                   AI, I);
+    }
+  }
+
+  // Leave lifetime markers for the static alloca's, scoping them to the
+  // function we just inlined.
+  if (!IFI.StaticAllocas.empty()) {
+    IRBuilder<> builder(FirstNewBlock->begin());
+    for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
+      AllocaInst *AI = IFI.StaticAllocas[ai];
+
+      // If the alloca is already scoped to something smaller than the whole
+      // function then there's no need to add redundant, less accurate markers.
+      if (hasLifetimeMarkers(AI))
+        continue;
+
+      builder.CreateLifetimeStart(AI);
+      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
+        IRBuilder<> builder(Returns[ri]);
+        builder.CreateLifetimeEnd(AI);
+      }
+    }
+  }
+
+  // If the inlined code contained dynamic alloca instructions, wrap the inlined
+  // code with llvm.stacksave/llvm.stackrestore intrinsics.
+  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+    Module *M = Caller->getParent();
+    // Get the two intrinsics we care about.
+    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+    // Insert the llvm.stacksave.
+    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+      .CreateCall(StackSave, "savedstack");
+
+    // Insert a call to llvm.stackrestore before any return instructions in the
+    // inlined function.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+    }
+
+    // Count the number of StackRestore calls we insert.
+    unsigned NumStackRestores = Returns.size();
+
+    // If we are inlining an invoke instruction, insert restores before each
+    // unwind.  These unwinds will be rewritten into branches later.
+    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
+      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+           BB != E; ++BB)
+        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+          IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr);
+          ++NumStackRestores;
+        }
+    }
+  }
+
+  // If we are inlining tail call instruction through a call site that isn't
+  // marked 'tail', we must remove the tail marker for any calls in the inlined
+  // code.  Also, calls inlined through a 'nounwind' call site should be marked
+  // 'nounwind'.
+  if (InlinedFunctionInfo.ContainsCalls &&
+      (MustClearTailCallFlags || MarkNoUnwind)) {
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          if (MustClearTailCallFlags)
+            CI->setTailCall(false);
+          if (MarkNoUnwind)
+            CI->setDoesNotThrow();
+        }
+  }
+
+  // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
+  // instructions are unreachable.
+  if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB) {
+      TerminatorInst *Term = BB->getTerminator();
+      if (isa<UnwindInst>(Term)) {
+        new UnreachableInst(Context, Term);
+        BB->getInstList().erase(Term);
+      }
+    }
+
+  // If we are inlining for an invoke instruction, we must make sure to rewrite
+  // any inlined 'unwind' instructions into branches to the invoke exception
+  // destination, and call instructions into invoke instructions.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+  // If we cloned in _exactly one_ basic block, and if that block ends in a
+  // return instruction, we splice the body of the inlined callee directly into
+  // the calling basic block.
+  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+    // Move all of the instructions right before the call.
+    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+                                 FirstNewBlock->begin(), FirstNewBlock->end());
+    // Remove the cloned basic block.
+    Caller->getBasicBlockList().pop_back();
+
+    // If the call site was an invoke instruction, add a branch to the normal
+    // destination.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+      BranchInst::Create(II->getNormalDest(), TheCall);
+
+    // If the return instruction returned a value, replace uses of the call with
+    // uses of the returned value.
+    if (!TheCall->use_empty()) {
+      ReturnInst *R = Returns[0];
+      if (TheCall == R->getReturnValue())
+        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+      else
+        TheCall->replaceAllUsesWith(R->getReturnValue());
+    }
+    // Since we are now done with the Call/Invoke, we can delete it.
+    TheCall->eraseFromParent();
+
+    // Since we are now done with the return instruction, delete it also.
+    Returns[0]->eraseFromParent();
+
+    // We are now done with the inlining.
+    return true;
+  }
+
+  // Otherwise, we have the normal case, of more than one block to inline or
+  // multiple return sites.
+
+  // We want to clone the entire callee function into the hole between the
+  // "starter" and "ender" blocks.  How we accomplish this depends on whether
+  // this is an invoke instruction or a call instruction.
+  BasicBlock *AfterCallBB;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+    // Add an unconditional branch to make this look like the CallInst case...
+    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+
+    // Split the basic block.  This guarantees that no PHI nodes will have to be
+    // updated due to new incoming edges, and make the invoke case more
+    // symmetric to the call case.
+    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+                                          CalledFunc->getName()+".exit");
+
+  } else {  // It's a call
+    // If this is a call instruction, we need to split the basic block that
+    // the call lives in.
+    //
+    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+                                          CalledFunc->getName()+".exit");
+  }
+
+  // Change the branch that used to go to AfterCallBB to branch to the first
+  // basic block of the inlined function.
+  //
+  TerminatorInst *Br = OrigBB->getTerminator();
+  assert(Br && Br->getOpcode() == Instruction::Br &&
+         "splitBasicBlock broken!");
+  Br->setOperand(0, FirstNewBlock);
+
+
+  // Now that the function is correct, make it a little bit nicer.  In
+  // particular, move the basic blocks inserted from the end of the function
+  // into the space made by splitting the source basic block.
+  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+                                     FirstNewBlock, Caller->end());
+
+  // Handle all of the return instructions that we just cloned in, and eliminate
+  // any users of the original call/invoke instruction.
+  const Type *RTy = CalledFunc->getReturnType();
+
+  PHINode *PHI = 0;
+  if (Returns.size() > 1) {
+    // The PHI node should go at the front of the new basic block to merge all
+    // possible incoming values.
+    if (!TheCall->use_empty()) {
+      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+                            AfterCallBB->begin());
+      // Anything that used the result of the function call should now use the
+      // PHI node as their operand.
+      TheCall->replaceAllUsesWith(PHI);
+    }
+
+    // Loop over all of the return instructions adding entries to the PHI node
+    // as appropriate.
+    if (PHI) {
+      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+        ReturnInst *RI = Returns[i];
+        assert(RI->getReturnValue()->getType() == PHI->getType() &&
+               "Ret value not consistent in function!");
+        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+      }
+    }
+
+
+    // Add a branch to the merge points and remove return instructions.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *RI = Returns[i];
+      BranchInst::Create(AfterCallBB, RI);
+      RI->eraseFromParent();
+    }
+  } else if (!Returns.empty()) {
+    // Otherwise, if there is exactly one return value, just replace anything
+    // using the return value of the call with the computed value.
+    if (!TheCall->use_empty()) {
+      if (TheCall == Returns[0]->getReturnValue())
+        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+      else
+        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+    }
+
+    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+    BasicBlock *ReturnBB = Returns[0]->getParent();
+    ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+    // Splice the code from the return block into the block that it will return
+    // to, which contains the code that was after the call.
+    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+                                      ReturnBB->getInstList());
+
+    // Delete the return instruction now and empty ReturnBB now.
+    Returns[0]->eraseFromParent();
+    ReturnBB->eraseFromParent();
+  } else if (!TheCall->use_empty()) {
+    // No returns, but something is using the return value of the call.  Just
+    // nuke the result.
+    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+  }
+
+  // Since we are now done with the Call/Invoke, we can delete it.
+  TheCall->eraseFromParent();
+
+  // We should always be able to fold the entry block of the function into the
+  // single predecessor of the block...
+  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+  // Splice the code entry block into calling block, right before the
+  // unconditional branch.
+  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
+  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+
+  // Remove the unconditional branch.
+  OrigBB->getInstList().erase(Br);
+
+  // Now we can remove the CalleeEntry block, which is now empty.
+  Caller->getBasicBlockList().erase(CalleeEntry);
+
+  // If we inserted a phi node, check to see if it has a single value (e.g. all
+  // the entries are the same or undef).  If so, remove the PHI so it doesn't
+  // block other optimizations.
+  if (PHI)
+    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+      PHI->replaceAllUsesWith(V);
+      PHI->eraseFromParent();
+    }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 000000000000..45c15de9437f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,64 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+namespace {
+  struct InstNamer : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    InstNamer() : FunctionPass(ID) {
+      initializeInstNamerPass(*PassRegistry::getPassRegistry());
+    }
+    
+    void getAnalysisUsage(AnalysisUsage &Info) const {
+      Info.setPreservesAll();
+    }
+
+    bool runOnFunction(Function &F) {
+      for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+           AI != AE; ++AI)
+        if (!AI->hasName() && !AI->getType()->isVoidTy())
+          AI->setName("arg");
+
+      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+        if (!BB->hasName())
+          BB->setName("bb");
+        
+        for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+          if (!I->hasName() && !I->getType()->isVoidTy())
+            I->setName("tmp");
+      }
+      return true;
+    }
+  };
+  
+  char InstNamer::ID = 0;
+}
+
+INITIALIZE_PASS(InstNamer, "instnamer", 
+                "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+  return new InstNamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 000000000000..b654111eba74
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,279 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary.  For example, it turns
+// the left into the right code:
+// 
+// for (...)                for (...)
+//   if (c)                   if (c)
+//     X1 = ...                 X1 = ...
+//   else                     else
+//     X2 = ...                 X2 = ...
+//   X3 = phi(X1, X2)         X3 = phi(X1, X2)
+// ... = X3 + 4             X4 = phi(X3)
+//                          ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine.  The major benefit of this 
+// transformation is that it makes many other loop optimizations, such as 
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+  struct LCSSA : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LCSSA() : LoopPass(ID) {
+      initializeLCSSAPass(*PassRegistry::getPassRegistry());
+    }
+
+    // Cached analysis information for the current function.
+    DominatorTree *DT;
+    std::vector<BasicBlock*> LoopBlocks;
+    PredIteratorCache PredCache;
+    Loop *L;
+    
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.  It maintains both of these,
+    /// as well as the CFG.  It also requires dominator information.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreserved<ScalarEvolution>();
+    }
+  private:
+    bool ProcessInstruction(Instruction *Inst,
+                            const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+    
+    /// verifyAnalysis() - Verify loop nest.
+    virtual void verifyAnalysis() const {
+      // Check the special guarantees that LCSSA makes.
+      assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
+    }
+
+    /// inLoop - returns true if the given block is within the current loop
+    bool inLoop(BasicBlock *B) const {
+      return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+    }
+  };
+}
+  
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                                 DominatorTree *DT) {
+  DomTreeNode *DomNode = DT->getNode(BB);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+      return true;
+
+  return false;
+}
+
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+  L = TheLoop;
+  
+  DT = &getAnalysis<DominatorTree>();
+
+  // Get the set of exiting blocks.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  
+  if (ExitBlocks.empty())
+    return false;
+  
+  // Speed up queries by creating a sorted vector of blocks.
+  LoopBlocks.clear();
+  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
+  
+  // Look at all the instructions in the loop, checking to see if they have uses
+  // outside the loop.  If so, rewrite those uses.
+  bool MadeChange = false;
+  
+  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+       BBI != E; ++BBI) {
+    BasicBlock *BB = *BBI;
+    
+    // For large loops, avoid use-scanning by using dominance information:  In
+    // particular, if a block does not dominate any of the loop exits, then none
+    // of the values defined in the block could be used outside the loop.
+    if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Reject two common cases fast: instructions with no uses (like stores)
+      // and instructions with one use that is in the same block as this.
+      if (I->use_empty() ||
+          (I->hasOneUse() && I->use_back()->getParent() == BB &&
+           !isa<PHINode>(I->use_back())))
+        continue;
+      
+      MadeChange |= ProcessInstruction(I, ExitBlocks);
+    }
+  }
+  
+  assert(L->isLCSSAForm(*DT));
+  PredCache.clear();
+
+  return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (ExitBlocks[i] == BB)
+      return true;
+  return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  SmallVector<Use*, 16> UsesToRewrite;
+  
+  BasicBlock *InstBB = Inst->getParent();
+  
+  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(U))
+      UserBB = PN->getIncomingBlock(UI);
+    
+    if (InstBB != UserBB && !inLoop(UserBB))
+      UsesToRewrite.push_back(&UI.getUse());
+  }
+
+  // If there are no uses outside the loop, exit with no change.
+  if (UsesToRewrite.empty()) return false;
+  
+  ++NumLCSSA; // We are applying the transformation
+
+  // Invoke instructions are special in that their result value is not available
+  // along their unwind edge. The code below tests to see whether DomBB dominates
+  // the value, so adjust DomBB to the normal destination block, which is
+  // effectively where the value is first usable.
+  BasicBlock *DomBB = Inst->getParent();
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+    DomBB = Inv->getNormalDest();
+
+  DomTreeNode *DomNode = DT->getNode(DomBB);
+
+  SmallVector<PHINode*, 16> AddedPHIs;
+
+  SSAUpdater SSAUpdate;
+  SSAUpdate.Initialize(Inst->getType(), Inst->getName());
+  
+  // Insert the LCSSA phi's into all of the exit blocks dominated by the
+  // value, and add them to the Phi's map.
+  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+    BasicBlock *ExitBB = *BBI;
+    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
+    
+    // If we already inserted something for this BB, don't reprocess it.
+    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
+    
+    PHINode *PN = PHINode::Create(Inst->getType(),
+                                  PredCache.GetNumPreds(ExitBB),
+                                  Inst->getName()+".lcssa",
+                                  ExitBB->begin());
+
+    // Add inputs from inside the loop for this PHI.
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
+      PN->addIncoming(Inst, *PI);
+
+      // If the exit block has a predecessor not within the loop, arrange for
+      // the incoming value use corresponding to that predecessor to be
+      // rewritten in terms of a different LCSSA PHI.
+      if (!inLoop(*PI))
+        UsesToRewrite.push_back(
+          &PN->getOperandUse(
+            PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+    }
+
+    AddedPHIs.push_back(PN);
+    
+    // Remember that this phi makes the value alive in this block.
+    SSAUpdate.AddAvailableValue(ExitBB, PN);
+  }
+  
+  // Rewrite all uses outside the loop in terms of the new PHIs we just
+  // inserted.
+  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+    // If this use is in an exit block, rewrite to use the newly inserted PHI.
+    // This is required for correctness because SSAUpdate doesn't handle uses in
+    // the same block.  It assumes the PHI we inserted is at the end of the
+    // block.
+    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User))
+      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+
+    if (isa<PHINode>(UserBB->begin()) &&
+        isExitBlock(UserBB, ExitBlocks)) {
+      UsesToRewrite[i]->set(UserBB->begin());
+      continue;
+    }
+    
+    // Otherwise, do full PHI insertion.
+    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+  }
+
+  // Remove PHI nodes that did not have any uses rewritten.
+  for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
+    if (AddedPHIs[i]->use_empty())
+      AddedPHIs[i]->eraseFromParent();
+  }
+  
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 000000000000..0f6d9ae99d66
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,875 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination.  This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
+/// conditions and indirectbr addresses this might make dead if
+/// DeleteDeadConditions is true.
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
+  TerminatorInst *T = BB->getTerminator();
+  IRBuilder<> Builder(T);
+
+  // Branch - See if we are conditional jumping on constant
+  if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+    if (BI->isUnconditional()) return false;  // Can't optimize uncond branch
+    BasicBlock *Dest1 = BI->getSuccessor(0);
+    BasicBlock *Dest2 = BI->getSuccessor(1);
+
+    if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+      // Are we branching on constant?
+      // YES.  Change to unconditional branch...
+      BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+      BasicBlock *OldDest     = Cond->getZExtValue() ? Dest2 : Dest1;
+
+      //cerr << "Function: " << T->getParent()->getParent()
+      //     << "\nRemoving branch from " << T->getParent()
+      //     << "\n\nTo: " << OldDest << endl;
+
+      // Let the basic block know that we are letting go of it.  Based on this,
+      // it will adjust it's PHI nodes.
+      OldDest->removePredecessor(BB);
+
+      // Replace the conditional branch with an unconditional one.
+      Builder.CreateBr(Destination);
+      BI->eraseFromParent();
+      return true;
+    }
+    
+    if (Dest2 == Dest1) {       // Conditional branch to same location?
+      // This branch matches something like this:
+      //     br bool %cond, label %Dest, label %Dest
+      // and changes it into:  br label %Dest
+
+      // Let the basic block know that we are letting go of one copy of it.
+      assert(BI->getParent() && "Terminator not inserted in block!");
+      Dest1->removePredecessor(BI->getParent());
+
+      // Replace the conditional branch with an unconditional one.
+      Builder.CreateBr(Dest1);
+      Value *Cond = BI->getCondition();
+      BI->eraseFromParent();
+      if (DeleteDeadConditions)
+        RecursivelyDeleteTriviallyDeadInstructions(Cond);
+      return true;
+    }
+    return false;
+  }
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+    // If we are switching on a constant, we can convert the switch into a
+    // single branch instruction!
+    ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+    BasicBlock *TheOnlyDest = SI->getSuccessor(0);  // The default dest
+    BasicBlock *DefaultDest = TheOnlyDest;
+    assert(TheOnlyDest == SI->getDefaultDest() &&
+           "Default destination is not successor #0?");
+
+    // Figure out which case it goes to.
+    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+      // Found case matching a constant operand?
+      if (SI->getSuccessorValue(i) == CI) {
+        TheOnlyDest = SI->getSuccessor(i);
+        break;
+      }
+
+      // Check to see if this branch is going to the same place as the default
+      // dest.  If so, eliminate it as an explicit compare.
+      if (SI->getSuccessor(i) == DefaultDest) {
+        // Remove this entry.
+        DefaultDest->removePredecessor(SI->getParent());
+        SI->removeCase(i);
+        --i; --e;  // Don't skip an entry...
+        continue;
+      }
+
+      // Otherwise, check to see if the switch only branches to one destination.
+      // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+      // destinations.
+      if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+    }
+
+    if (CI && !TheOnlyDest) {
+      // Branching on a constant, but not any of the cases, go to the default
+      // successor.
+      TheOnlyDest = SI->getDefaultDest();
+    }
+
+    // If we found a single destination that we can fold the switch into, do so
+    // now.
+    if (TheOnlyDest) {
+      // Insert the new branch.
+      Builder.CreateBr(TheOnlyDest);
+      BasicBlock *BB = SI->getParent();
+
+      // Remove entries from PHI nodes which we no longer branch to...
+      for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+        // Found case matching a constant operand?
+        BasicBlock *Succ = SI->getSuccessor(i);
+        if (Succ == TheOnlyDest)
+          TheOnlyDest = 0;  // Don't modify the first branch to TheOnlyDest
+        else
+          Succ->removePredecessor(BB);
+      }
+
+      // Delete the old switch.
+      Value *Cond = SI->getCondition();
+      SI->eraseFromParent();
+      if (DeleteDeadConditions)
+        RecursivelyDeleteTriviallyDeadInstructions(Cond);
+      return true;
+    }
+    
+    if (SI->getNumSuccessors() == 2) {
+      // Otherwise, we can fold this switch into a conditional branch
+      // instruction if it has only one non-default destination.
+      Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+                                         SI->getSuccessorValue(1), "cond");
+
+      // Insert the new branch.
+      Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0));
+
+      // Delete the old switch.
+      SI->eraseFromParent();
+      return true;
+    }
+    return false;
+  }
+
+  if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+    // indirectbr blockaddress(@F, @BB) -> br label @BB
+    if (BlockAddress *BA =
+          dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+      BasicBlock *TheOnlyDest = BA->getBasicBlock();
+      // Insert the new branch.
+      Builder.CreateBr(TheOnlyDest);
+      
+      for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+        if (IBI->getDestination(i) == TheOnlyDest)
+          TheOnlyDest = 0;
+        else
+          IBI->getDestination(i)->removePredecessor(IBI->getParent());
+      }
+      Value *Address = IBI->getAddress();
+      IBI->eraseFromParent();
+      if (DeleteDeadConditions)
+        RecursivelyDeleteTriviallyDeadInstructions(Address);
+      
+      // If we didn't find our destination in the IBI successor list, then we
+      // have undefined behavior.  Replace the unconditional branch with an
+      // 'unreachable' instruction.
+      if (TheOnlyDest) {
+        BB->getTerminator()->eraseFromParent();
+        new UnreachableInst(BB->getContext(), BB);
+      }
+      
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I) {
+  if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+  // We don't want debug info removed by anything this general, unless
+  // debug info is empty.
+  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+    if (DDI->getAddress()) 
+      return false;
+    return true;
+  } 
+  if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+    if (DVI->getValue())
+      return false;
+    return true;
+  }
+
+  if (!I->mayHaveSideEffects()) return true;
+
+  // Special case intrinsics that "may have side effects" but can be deleted
+  // when dead.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    // Safe to delete llvm.stacksave if dead.
+    if (II->getIntrinsicID() == Intrinsic::stacksave)
+      return true;
+  return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it.  If that makes any of its operands
+/// trivially dead, delete them too, recursively.  Return true if any
+/// instructions were deleted.
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+    return false;
+  
+  SmallVector<Instruction*, 16> DeadInsts;
+  DeadInsts.push_back(I);
+  
+  do {
+    I = DeadInsts.pop_back_val();
+
+    // Null out all of the instruction's operands to see if any operand becomes
+    // dead as we go.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      Value *OpV = I->getOperand(i);
+      I->setOperand(i, 0);
+      
+      if (!OpV->use_empty()) continue;
+    
+      // If the operand is an instruction that became dead as we nulled out the
+      // operand, and if it is 'trivially' dead, delete it in a future loop
+      // iteration.
+      if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+        if (isInstructionTriviallyDead(OpI))
+          DeadInsts.push_back(OpI);
+    }
+    
+    I->eraseFromParent();
+  } while (!DeadInsts.empty());
+
+  return true;
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+  Value::use_iterator UI = I->use_begin();
+  Value::use_iterator UE = I->use_end();
+  if (UI == UE)
+    return true;
+
+  User *TheUse = *UI;
+  for (++UI; UI != UE; ++UI) {
+    if (*UI != TheUse)
+      return false;
+  }
+  return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it.  If that makes any of its operands trivially dead, delete them
+/// too, recursively.  Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+  SmallPtrSet<Instruction*, 4> Visited;
+  for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+       I = cast<Instruction>(*I->use_begin())) {
+    if (I->use_empty())
+      return RecursivelyDeleteTriviallyDeadInstructions(I);
+
+    // If we find an instruction more than once, we're on a cycle that
+    // won't prove fruitful.
+    if (!Visited.insert(I)) {
+      // Break the cycle and delete the instruction and its operands.
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+      (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+      return true;
+    }
+  }
+  return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+  bool MadeChange = false;
+  for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+    Instruction *Inst = BI++;
+    
+    if (Value *V = SimplifyInstruction(Inst, TD)) {
+      WeakVH BIHandle(BI);
+      ReplaceAndSimplifyAllUses(Inst, V, TD);
+      MadeChange = true;
+      if (BIHandle != BI)
+        BI = BB->begin();
+      continue;
+    }
+
+    if (Inst->isTerminator())
+      break;
+
+    WeakVH BIHandle(BI);
+    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+    if (BIHandle != BI)
+      BI = BB->begin();
+  }
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB.  If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values.  For example, if we have:
+///   x = phi(1, 0, 0, 0)
+///   y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+                                        TargetData *TD) {
+  // This only adjusts blocks with PHI nodes.
+  if (!isa<PHINode>(BB->begin()))
+    return;
+  
+  // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+  // them down.  This will leave us with single entry phi nodes and other phis
+  // that can be removed.
+  BB->removePredecessor(Pred, true);
+  
+  WeakVH PhiIt = &BB->front();
+  while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+    PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+
+    Value *PNV = SimplifyInstruction(PN, TD);
+    if (PNV == 0) continue;
+
+    // If we're able to simplify the phi to a single value, substitute the new
+    // value into all of its uses.
+    assert(PNV != PN && "SimplifyInstruction broken!");
+    
+    Value *OldPhiIt = PhiIt;
+    ReplaceAndSimplifyAllUses(PN, PNV, TD);
+    
+    // If recursive simplification ended up deleting the next PHI node we would
+    // iterate to, then our iterator is invalid, restart scanning from the top
+    // of the block.
+    if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+  }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!).  Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+  // If BB has single-entry PHI nodes, fold them.
+  while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    Value *NewVal = PN->getIncomingValue(0);
+    // Replace self referencing PHI with undef, it must be dead.
+    if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+    PN->replaceAllUsesWith(NewVal);
+    PN->eraseFromParent();
+  }
+  
+  BasicBlock *PredBB = DestBB->getSinglePredecessor();
+  assert(PredBB && "Block doesn't have a single predecessor!");
+  
+  // Zap anything that took the address of DestBB.  Not doing this will give the
+  // address an invalid value.
+  if (DestBB->hasAddressTaken()) {
+    BlockAddress *BA = BlockAddress::get(DestBB);
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+    BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                     BA->getType()));
+    BA->destroyConstant();
+  }
+  
+  // Anything that branched to PredBB now branches to DestBB.
+  PredBB->replaceAllUsesWith(DestBB);
+  
+  // Splice all the instructions from PredBB to DestBB.
+  PredBB->getTerminator()->eraseFromParent();
+  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+  if (P) {
+    DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+    if (DT) {
+      BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+      DT->changeImmediateDominator(DestBB, PredBBIDom);
+      DT->eraseNode(PredBB);
+    }
+    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PI) {
+      PI->replaceAllUses(PredBB, DestBB);
+      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+    }
+  }
+  // Nuke BB.
+  PredBB->eraseFromParent();
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+  assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+  DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " 
+        << Succ->getName() << "\n");
+  // Shortcut, if there is only a single predecessor it must be BB and merging
+  // is always safe
+  if (Succ->getSinglePredecessor()) return true;
+
+  // Make a list of the predecessors of BB
+  typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+  BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+  // Use that list to make another list of common predecessors of BB and Succ
+  BlockSet CommonPreds;
+  for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+       PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (BBPreds.count(P))
+      CommonPreds.insert(P);
+  }
+
+  // Shortcut, if there are no common predecessors, merging is always safe
+  if (CommonPreds.empty())
+    return true;
+  
+  // Look at all the phi nodes in Succ, to see if they present a conflict when
+  // merging these blocks
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+
+    // If the incoming value from BB is again a PHINode in
+    // BB which has the same incoming value for *PI as PN does, we can
+    // merge the phi nodes and then the blocks can still be merged
+    PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+    if (BBPN && BBPN->getParent() == BB) {
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        if (BBPN->getIncomingValueForBlock(*PI) 
+              != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with " 
+                << BBPN->getName() << " with regard to common predecessor "
+                << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    } else {
+      Value* Val = PN->getIncomingValueForBlock(BB);
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        // See if the incoming value for the common predecessor is equal to the
+        // one for BB, in which case this phi node will not prevent the merging
+        // of the block.
+        if (Val != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with regard to common "
+                << "predecessor " << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential side-effect free intrinsics and the branch.  If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true.  If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+  assert(BB != &BB->getParent()->getEntryBlock() &&
+         "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+  // We can't eliminate infinite loops.
+  BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+  if (BB == Succ) return false;
+  
+  // Check to see if merging these blocks would cause conflicts for any of the
+  // phi nodes in BB or Succ. If not, we can safely merge.
+  if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+  // Check for cases where Succ has multiple predecessors and a PHI node in BB
+  // has uses which will not disappear when the PHI nodes are merged.  It is
+  // possible to handle such cases, but difficult: it requires checking whether
+  // BB dominates Succ, which is non-trivial to calculate in the case where
+  // Succ has multiple predecessors.  Also, it requires checking whether
+  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // conflicts; this isn't too difficult, but the previous code for doing this
+  // was incorrect.
+  //
+  // Note that if this check finds a live use, BB dominates Succ, so BB is
+  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+  // folding the branch isn't profitable in that case anyway.
+  if (!Succ->getSinglePredecessor()) {
+    BasicBlock::iterator BBI = BB->begin();
+    while (isa<PHINode>(*BBI)) {
+      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+           UI != E; ++UI) {
+        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+          if (PN->getIncomingBlock(UI) != BB)
+            return false;
+        } else {
+          return false;
+        }
+      }
+      ++BBI;
+    }
+  }
+
+  DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+  
+  if (isa<PHINode>(Succ->begin())) {
+    // If there is more than one pred of succ, and there are PHI nodes in
+    // the successor, then we need to add incoming edges for the PHI nodes
+    //
+    const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+    
+    // Loop over all of the PHI nodes in the successor of BB.
+    for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      Value *OldVal = PN->removeIncomingValue(BB, false);
+      assert(OldVal && "No entry in PHI for Pred BB!");
+      
+      // If this incoming value is one of the PHI nodes in BB, the new entries
+      // in the PHI node are the entries from the old PHI.
+      if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+        PHINode *OldValPN = cast<PHINode>(OldVal);
+        for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+          // Note that, since we are merging phi nodes and BB and Succ might
+          // have common predecessors, we could end up with a phi node with
+          // identical incoming branches. This will be cleaned up later (and
+          // will trigger asserts if we try to clean it up now, without also
+          // simplifying the corresponding conditional branch).
+          PN->addIncoming(OldValPN->getIncomingValue(i),
+                          OldValPN->getIncomingBlock(i));
+      } else {
+        // Add an incoming value for each of the new incoming values.
+        for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+          PN->addIncoming(OldVal, BBPreds[i]);
+      }
+    }
+  }
+  
+  if (Succ->getSinglePredecessor()) {
+    // BB is the only predecessor of Succ, so Succ will end up with exactly
+    // the same predecessors BB had.
+
+    // Copy over any phi, debug or lifetime instruction.
+    BB->getTerminator()->eraseFromParent();
+    Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+  } else {
+    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+      assert(PN->use_empty() && "There shouldn't be any uses here!");
+      PN->eraseFromParent();
+    }
+  }
+    
+  // Everything that jumped to BB now goes to Succ.
+  BB->replaceAllUsesWith(Succ);
+  if (!Succ->hasName()) Succ->takeName(BB);
+  BB->eraseFromParent();              // Delete the old basic block.
+  return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+  bool Changed = false;
+
+  // This implementation doesn't currently consider undef operands
+  // specially. Theoretically, two phis which are identical except for
+  // one having an undef where the other doesn't could be collapsed.
+
+  // Map from PHI hash values to PHI nodes. If multiple PHIs have
+  // the same hash value, the element is the first PHI in the
+  // linked list in CollisionMap.
+  DenseMap<uintptr_t, PHINode *> HashMap;
+
+  // Maintain linked lists of PHI nodes with common hash values.
+  DenseMap<PHINode *, PHINode *> CollisionMap;
+
+  // Examine each PHI.
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I++); ) {
+    // Compute a hash value on the operands. Instcombine will likely have sorted
+    // them, which helps expose duplicates, but we have to check all the
+    // operands to be safe in case instcombine hasn't run.
+    uintptr_t Hash = 0;
+    // This hash algorithm is quite weak as hash functions go, but it seems
+    // to do a good enough job for this particular purpose, and is very quick.
+    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+         I != E; ++I) {
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+    Hash >>= 1;
+    // If we've never seen this hash value before, it's a unique PHI.
+    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+      HashMap.insert(std::make_pair(Hash, PN));
+    if (Pair.second) continue;
+    // Otherwise it's either a duplicate or a hash collision.
+    for (PHINode *OtherPN = Pair.first->second; ; ) {
+      if (OtherPN->isIdenticalTo(PN)) {
+        // A duplicate. Replace this PHI with its duplicate.
+        PN->replaceAllUsesWith(OtherPN);
+        PN->eraseFromParent();
+        Changed = true;
+        break;
+      }
+      // A non-duplicate hash collision.
+      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+      if (I == CollisionMap.end()) {
+        // Set this PHI to be the head of the linked list of colliding PHIs.
+        PHINode *Old = Pair.first->second;
+        Pair.first->second = PN;
+        CollisionMap[PN] = Old;
+        break;
+      }
+      // Procede to the next PHI in the list.
+      OtherPN = I->second;
+    }
+  }
+
+  return Changed;
+}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+                                      unsigned PrefAlign) {
+  V = V->stripPointerCasts();
+
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      return AI->getAlignment();
+    AI->setAlignment(PrefAlign);
+    return PrefAlign;
+  }
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (GV->isDeclaration()) return Align;
+    
+    if (GV->getAlignment() >= PrefAlign)
+      return GV->getAlignment();
+    // We can only increase the alignment of the global if it has no alignment
+    // specified or if it is not assigned a section.  If it is assigned a
+    // section, the global could be densely packed with other objects in the
+    // section, increasing the alignment could cause padding issues.
+    if (!GV->hasSection() || GV->getAlignment() == 0)
+      GV->setAlignment(PrefAlign);
+    return GV->getAlignment();
+  }
+
+  return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+                                          const TargetData *TD) {
+  assert(V->getType()->isPointerTy() &&
+         "getOrEnforceKnownAlignment expects a pointer!");
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  unsigned TrailZ = KnownZero.countTrailingOnes();
+  
+  // Avoid trouble with rediculously large TrailZ values, such as
+  // those computed from a null pointer.
+  TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+  
+  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+  
+  // LLVM doesn't support alignments larger than this currently.
+  Align = std::min(Align, +Value::MaximumAlignment);
+  
+  if (PrefAlign > Align)
+    Align = enforceKnownAlignment(V, Align, PrefAlign);
+    
+  // We don't need to make any adjustment.
+  return Align;
+}
+
+///===---------------------------------------------------------------------===//
+///  Dbg Intrinsic utilities
+///
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                           StoreInst *SI, DIBuilder &Builder) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  Instruction *DbgVal = NULL;
+  // If an argument is zero extended then use argument directly. The ZExt
+  // may be zapped by an optimization pass in future.
+  Argument *ExtendedArg = NULL;
+  if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+    ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+  if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+    ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+  if (ExtendedArg)
+    DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
+  else
+    DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
+
+  // Propagate any debug metadata from the store onto the dbg.value.
+  DebugLoc SIDL = SI->getDebugLoc();
+  if (!SIDL.isUnknown())
+    DbgVal->setDebugLoc(SIDL);
+  // Otherwise propagate debug metadata from dbg.declare.
+  else
+    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  return true;
+}
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                           LoadInst *LI, DIBuilder &Builder) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  Instruction *DbgVal = 
+    Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
+                                    DIVar, LI);
+  
+  // Propagate any debug metadata from the store onto the dbg.value.
+  DebugLoc LIDL = LI->getDebugLoc();
+  if (!LIDL.isUnknown())
+    DbgVal->setDebugLoc(LIDL);
+  // Otherwise propagate debug metadata from dbg.declare.
+  else
+    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  return true;
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+  DIBuilder DIB(*F.getParent());
+  SmallVector<DbgDeclareInst *, 4> Dbgs;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        Dbgs.push_back(DDI);
+    }
+  if (Dbgs.empty())
+    return false;
+
+  for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+         E = Dbgs.end(); I != E; ++I) {
+    DbgDeclareInst *DDI = *I;
+    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+      bool RemoveDDI = true;
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E; ++UI)
+        if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+          ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+        else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+          ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+        else
+          RemoveDDI = false;
+      if (RemoveDDI)
+        DDI->eraseFromParent();
+    }
+  }
+  return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
+  if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
+    for (Value::use_iterator UI = DebugNode->use_begin(),
+         E = DebugNode->use_end(); UI != E; ++UI)
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+        return DDI;
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 000000000000..e79fb5ac21b4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,753 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header.  This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header).  This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-simplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested  , "Number of nested loops split out");
+
+namespace {
+  struct LoopSimplify : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LoopSimplify() : LoopPass(ID) {
+      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    // AA - If we have an alias analysis object to update, this is it, otherwise
+    // this is null.
+    AliasAnalysis *AA;
+    LoopInfo *LI;
+    DominatorTree *DT;
+    ScalarEvolution *SE;
+    Loop *L;
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // We need loop information to identify the loops...
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<DominatorTree>();
+
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
+    }
+
+    /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+    void verifyAnalysis() const;
+
+  private:
+    bool ProcessLoop(Loop *L, LPPassManager &LPM);
+    BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+    BasicBlock *InsertPreheaderForLoop(Loop *L);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+    void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                  SmallVectorImpl<BasicBlock*> &SplitPreds,
+                                  Loop *L);
+  };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+  L = l;
+  bool Changed = false;
+  LI = &getAnalysis<LoopInfo>();
+  AA = getAnalysisIfAvailable<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+  SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+  Changed |= ProcessLoop(L, LPM);
+
+  return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
+  bool Changed = false;
+ReprocessLoop:
+
+  // Check to see that no blocks (other than the header) in this loop have
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock*, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB),
+         PE = pred_end(*BB); PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      if (!L->contains(P))
+        BadPreds.insert(P);
+    }
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+         E = BadPreds.end(); I != E; ++I) {
+
+      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+                   << (*I)->getName() << "\n");
+
+      // Inform each successor of each dead pred.
+      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+        (*SI)->removePredecessor(*I);
+      // Zap the dead pred's terminator and replace it with unreachable.
+      TerminatorInst *TI = (*I)->getTerminator();
+       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+      (*I)->getTerminator()->eraseFromParent();
+      new UnreachableInst((*I)->getContext(), *I);
+      Changed = true;
+    }
+  }
+
+  // If there are exiting blocks with branches on undef, resolve the undef in
+  // the direction which will exit the loop. This will help simplify loop
+  // trip count computations.
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+       E = ExitingBlocks.end(); I != E; ++I)
+    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+      if (BI->isConditional()) {
+        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+                       << (*I)->getName() << "\n");
+
+          BI->setCondition(ConstantInt::get(Cond->getType(),
+                                            !L->contains(BI->getSuccessor(0))));
+          Changed = true;
+        }
+      }
+
+  // Does the loop already have a preheader?  If so, don't insert one.
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L);
+    if (Preheader) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Next, check to make sure that all exit nodes of the loop only have
+  // predecessors that are inside of the loop.  This check guarantees that the
+  // loop preheader/header will dominate the exit blocks.  If the exit block has
+  // predecessors from outside of the loop, split the edge now.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+    
+  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+                                               ExitBlocks.end());
+  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+         E = ExitBlockSet.end(); I != E; ++I) {
+    BasicBlock *ExitBlock = *I;
+    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+         PI != PE; ++PI)
+      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+      // allowed.
+      if (!L->contains(*PI)) {
+        if (RewriteLoopExitBlock(L, ExitBlock)) {
+          ++NumInserted;
+          Changed = true;
+        }
+        break;
+      }
+  }
+
+  // If the header has more than two predecessors at this point (from the
+  // preheader and from multiple backedges), we must adjust the loop.
+  BasicBlock *LoopLatch = L->getLoopLatch();
+  if (!LoopLatch) {
+    // If this is really a nested loop, rip it out into a child loop.  Don't do
+    // this for loops with a giant number of backedges, just factor them into a
+    // common backedge instead.
+    if (L->getNumBackEdges() < 8) {
+      if (SeparateNestedLoop(L, LPM)) {
+        ++NumNested;
+        // This is a big restructuring change, reprocess the whole loop.
+        Changed = true;
+        // GCC doesn't tail recursion eliminate this.
+        goto ReprocessLoop;
+      }
+    }
+
+    // If we either couldn't, or didn't want to, identify nesting of the loops,
+    // insert a new block that all backedges target, then make it jump to the
+    // loop header.
+    LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+    if (LoopLatch) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Scan over the PHI nodes in the loop header.  Since they now have only two
+  // incoming values (the loop is canonicalized), we may have simplified the PHI
+  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+  PHINode *PN;
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       (PN = dyn_cast<PHINode>(I++)); )
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+      if (AA) AA->deleteValue(PN);
+      if (SE) SE->forgetValue(PN);
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+    }
+
+  // If this loop has multiple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  bool UniqueExit = true;
+  if (!ExitBlocks.empty())
+    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+      if (ExitBlocks[i] != ExitBlocks[0]) {
+        UniqueExit = false;
+        break;
+      }
+  if (UniqueExit) {
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+        Instruction *Inst = I++;
+        // Skip debug info intrinsics.
+        if (isa<DbgInfoIntrinsic>(Inst))
+          continue;
+        if (Inst == CI)
+          continue;
+        if (!L->makeLoopInvariant(Inst, Changed,
+                                  Preheader ? Preheader->getTerminator() : 0)) {
+          AllInvariant = false;
+          break;
+        }
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI)) continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and delete it.
+      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+                   << ExitingBlock->getName() << "\n");
+
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      LI->removeBlock(ExitingBlock);
+
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+      }
+      DT->eraseNode(ExitingBlock);
+
+      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one.  This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+  BasicBlock *Header = L->getHeader();
+
+  // Compute the set of predecessors of the loop that are not in the loop.
+  SmallVector<BasicBlock*, 8> OutsideBlocks;
+  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+       PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (!L->contains(P)) {         // Coming in from outside the loop?
+      // If the loop is branched to from an indirect branch, we won't
+      // be able to fully transform the loop, because it prohibits
+      // edge splitting.
+      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+      // Keep track of it.
+      OutsideBlocks.push_back(P);
+    }
+  }
+
+  // Split out the loop pre-header.
+  BasicBlock *NewBB =
+    SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
+                           ".preheader", this);
+
+  NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
+  DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
+               << "\n");
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+  return NewBB;
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks.  This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+  SmallVector<BasicBlock*, 8> LoopBlocks;
+  for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+    BasicBlock *P = *I;
+    if (L->contains(P)) {
+      // Don't do this if the loop is exited via an indirect branch.
+      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+      LoopBlocks.push_back(P);
+    }
+  }
+
+  assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+  BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], 
+                                             LoopBlocks.size(), ".loopexit",
+                                             this);
+
+  DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+               << NewBB->getName() << "\n");
+  return NewBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there.  Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+                                  std::set<BasicBlock*> &Blocks) {
+  std::vector<BasicBlock *> WorkList;
+  WorkList.push_back(InputBB);
+  do {
+    BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+    if (Blocks.insert(BB).second && BB != StopBlock)
+      // If BB is not already processed and it is not a stop block then
+      // insert its predecessor in the work list
+      for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+        BasicBlock *WBB = *I;
+        WorkList.push_back(WBB);
+      }
+  } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+                                        AliasAnalysis *AA, LoopInfo *LI) {
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I);
+    ++I;
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+      // This is a degenerate PHI already, don't modify it!
+      PN->replaceAllUsesWith(V);
+      if (AA) AA->deleteValue(PN);
+      PN->eraseFromParent();
+      continue;
+    }
+
+    // Scan this PHI node looking for a use of the PHI node by itself.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == PN &&
+          L->contains(PN->getIncomingBlock(i)))
+        // We found something tasty to remove.
+        return PN;
+  }
+  return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block.  This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                       SmallVectorImpl<BasicBlock*> &SplitPreds,
+                                            Loop *L) {
+  // Check to see if NewBB is already well placed.
+  Function::iterator BBI = NewBB; --BBI;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    if (&*BBI == SplitPreds[i])
+      return;
+  }
+  
+  // If it isn't already after an outside block, move it after one.  This is
+  // always good as it makes the uncond branch from the outside block into a
+  // fall-through.
+  
+  // Figure out *which* outside block to put this after.  Prefer an outside
+  // block that neighbors a BB actually in the loop.
+  BasicBlock *FoundBB = 0;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    Function::iterator BBI = SplitPreds[i];
+    if (++BBI != NewBB->getParent()->end() && 
+        L->contains(BBI)) {
+      FoundBB = SplitPreds[i];
+      break;
+    }
+  }
+  
+  // If our heuristic for a *good* bb to place this after doesn't find
+  // anything, just pick something.  It's likely better than leaving it within
+  // the loop.
+  if (!FoundBB)
+    FoundBB = SplitPreds[0];
+  NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop.  This is important for code that looks like
+/// this:
+///
+///  Loop:
+///     ...
+///     br cond, Loop, Next
+///     ...
+///     br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop.  PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+  if (PN == 0) return 0;  // No known way to partition.
+
+  // Pull out all predecessors that have varying values in the loop.  This
+  // handles the case when a PHI node has multiple instances of itself as
+  // arguments.
+  SmallVector<BasicBlock*, 8> OuterLoopPreds;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) != PN ||
+        !L->contains(PN->getIncomingBlock(i))) {
+      // We can't split indirectbr edges.
+      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+        return 0;
+
+      OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+    }
+
+  DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+  // If ScalarEvolution is around and knows anything about values in
+  // this loop, tell it to forget them, because we're about to
+  // substantially change it.
+  if (SE)
+    SE->forgetLoop(L);
+
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
+                                             OuterLoopPreds.size(),
+                                             ".outer", this);
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+  
+  // Create the new outer loop.
+  Loop *NewOuter = new Loop();
+
+  // Change the parent loop to use the outer loop as its child now.
+  if (Loop *Parent = L->getParentLoop())
+    Parent->replaceChildLoopWith(L, NewOuter);
+  else
+    LI->changeTopLevelLoop(L, NewOuter);
+
+  // L is now a subloop of our outer loop.
+  NewOuter->addChildLoop(L);
+
+  // Add the new loop to the pass manager queue.
+  LPM.insertLoopIntoQueue(NewOuter);
+
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    NewOuter->addBlockEntry(*I);
+
+  // Now reset the header in L, which had been moved by
+  // SplitBlockPredecessors for the outer loop.
+  L->moveToHeader(Header);
+
+  // Determine which blocks should stay in L and which should be moved out to
+  // the Outer loop now.
+  std::set<BasicBlock*> BlocksInL;
+  for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+    BasicBlock *P = *PI;
+    if (DT->dominates(Header, P))
+      AddBlockAndPredsToSet(P, Header, BlocksInL);
+  }
+
+  // Scan all of the loop children of L, moving them to OuterLoop if they are
+  // not part of the inner loop.
+  const std::vector<Loop*> &SubLoops = L->getSubLoops();
+  for (size_t I = 0; I != SubLoops.size(); )
+    if (BlocksInL.count(SubLoops[I]->getHeader()))
+      ++I;   // Loop remains in L
+    else
+      NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+  // Now that we know which blocks are in L and which need to be moved to
+  // OuterLoop, move any blocks that need it.
+  for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+    BasicBlock *BB = L->getBlocks()[i];
+    if (!BlocksInL.count(BB)) {
+      // Move this block to the parent, updating the exit blocks sets
+      L->removeBlockFromLoop(BB);
+      if ((*LI)[BB] == L)
+        LI->changeLoopFor(BB, NewOuter);
+      --i;
+    }
+  }
+
+  return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it.  If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header.  This ensures that loops have exactly one backedge.
+///
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+  // Get information about the loop
+  BasicBlock *Header = L->getHeader();
+  Function *F = Header->getParent();
+
+  // Unique backedge insertion currently depends on having a preheader.
+  if (!Preheader)
+    return 0;
+
+  // Figure out which basic blocks contain back-edges to the loop header.
+  std::vector<BasicBlock*> BackedgeBlocks;
+  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+    BasicBlock *P = *I;
+
+    // Indirectbr edges cannot be split, so we must fail if we find one.
+    if (isa<IndirectBrInst>(P->getTerminator()))
+      return 0;
+
+    if (P != Preheader) BackedgeBlocks.push_back(P);
+  }
+
+  // Create and insert the new backedge block...
+  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+                                           Header->getName()+".backedge", F);
+  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+               << BEBlock->getName() << "\n");
+
+  // Move the new backedge block to right after the last backedge block.
+  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+  // Now that the block has been inserted into the function, create PHI nodes in
+  // the backedge block which correspond to any PHI nodes in the header block.
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+                                     PN->getName()+".be", BETerminator);
+    if (AA) AA->copyValue(PN, NewPN);
+
+    // Loop over the PHI node, moving all entries except the one for the
+    // preheader over to the new PHI node.
+    unsigned PreheaderIdx = ~0U;
+    bool HasUniqueIncomingValue = true;
+    Value *UniqueValue = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *IBB = PN->getIncomingBlock(i);
+      Value *IV = PN->getIncomingValue(i);
+      if (IBB == Preheader) {
+        PreheaderIdx = i;
+      } else {
+        NewPN->addIncoming(IV, IBB);
+        if (HasUniqueIncomingValue) {
+          if (UniqueValue == 0)
+            UniqueValue = IV;
+          else if (UniqueValue != IV)
+            HasUniqueIncomingValue = false;
+        }
+      }
+    }
+
+    // Delete all of the incoming values from the old PN except the preheader's
+    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+    if (PreheaderIdx != 0) {
+      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+    }
+    // Nuke all entries except the zero'th.
+    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+      PN->removeIncomingValue(e-i, false);
+
+    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+    PN->addIncoming(NewPN, BEBlock);
+
+    // As an optimization, if all incoming values in the new PhiNode (which is a
+    // subset of the incoming values of the old PHI node) have the same value,
+    // eliminate the PHI Node.
+    if (HasUniqueIncomingValue) {
+      NewPN->replaceAllUsesWith(UniqueValue);
+      if (AA) AA->deleteValue(NewPN);
+      BEBlock->getInstList().erase(NewPN);
+    }
+  }
+
+  // Now that all of the PHI nodes have been inserted and adjusted, modify the
+  // backedge blocks to just to the BEBlock instead of the header.
+  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+      if (TI->getSuccessor(Op) == Header)
+        TI->setSuccessor(Op, BEBlock);
+  }
+
+  //===--- Update all analyses which we must preserve now -----------------===//
+
+  // Update Loop Information - we know that this block is now in the current
+  // loop and all parent loops.
+  L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+  // Update dominator information
+  DT->splitBlock(BEBlock);
+
+  return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+  // It used to be possible to just assert L->isLoopSimplifyForm(), however
+  // with the introduction of indirectbr, there are now cases where it's
+  // not possible to transform a loop as necessary. We can at least check
+  // that there is an indirectbr near any time there's trouble.
+
+  // Indirectbr can interfere with preheader and unique backedge insertion.
+  if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+    bool HasIndBrPred = false;
+    for (pred_iterator PI = pred_begin(L->getHeader()),
+         PE = pred_end(L->getHeader()); PI != PE; ++PI)
+      if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+        HasIndBrPred = true;
+        break;
+      }
+    assert(HasIndBrPred &&
+           "LoopSimplify has no excuse for missing loop header info!");
+  }
+
+  // Indirectbr can interfere with exit block canonicalization.
+  if (!L->hasDedicatedExits()) {
+    bool HasIndBrExiting = false;
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+      if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+        HasIndBrExiting = true;
+        break;
+      }
+    assert(HasIndBrExiting &&
+           "LoopSimplify has no excuse for missing exit block info!");
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 000000000000..6772511b5d5a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,395 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// It works best when loops have been canonicalized by the -indvars pass,
+// allowing it to determine the trip counts of loops easily.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches.  This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+static inline void RemapInstruction(Instruction *I,
+                                    ValueToValueMapTy &VMap) {
+  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+    Value *Op = I->getOperand(op);
+    ValueToValueMapTy::iterator It = VMap.find(Op);
+    if (It != VMap.end())
+      I->setOperand(op, It->second);
+  }
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+      if (It != VMap.end())
+        PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+    }
+  }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  BasicBlock *OnlyPred = BB->getSinglePredecessor();
+  if (!OnlyPred) return 0;
+
+  if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+    return 0;
+
+  DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+  // Resolve any PHI nodes at the start of the block.  They are all
+  // guaranteed to have exactly one entry if they exist, unless there are
+  // multiple duplicate (but guaranteed to be equal) entries for the
+  // incoming edges.  This occurs when there are multiple edges from
+  // OnlyPred to OnlySucc.
+  FoldSingleEntryPHINodes(BB);
+
+  // Delete the unconditional branch from the predecessor...
+  OnlyPred->getInstList().pop_back();
+
+  // Make all PHI nodes that referred to BB now refer to Pred as their
+  // source...
+  BB->replaceAllUsesWith(OnlyPred);
+
+  // Move all definitions in the successor to the predecessor...
+  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+  std::string OldName = BB->getName();
+
+  // Erase basic block from the function...
+  LI->removeBlock(BB);
+  BB->eraseFromParent();
+
+  // Inherit predecessor's name if it exists...
+  if (!OldName.empty() && !OnlyPred->hasName())
+    OnlyPred->setName(OldName);
+
+  return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+bool llvm::UnrollLoop(Loop *L, unsigned Count,
+                      LoopInfo *LI, LPPassManager *LPM) {
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
+    return false;
+  }
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock) {
+    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
+    return false;
+  }
+
+  BasicBlock *Header = L->getHeader();
+  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+  
+  if (!BI || BI->isUnconditional()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    DEBUG(dbgs() <<
+             "  Can't unroll; loop not terminated by a conditional branch.\n");
+    return false;
+  }
+  
+  if (Header->hasAddressTaken()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    DEBUG(dbgs() <<
+          "  Won't unroll loop: address of header block is taken.\n");
+    return false;
+  }
+
+  // Notify ScalarEvolution that the loop will be substantially changed,
+  // if not outright eliminated.
+  if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoop(L);
+
+  // Find trip count
+  unsigned TripCount = L->getSmallConstantTripCount();
+  // Find trip multiple if count is not available
+  unsigned TripMultiple = 1;
+  if (TripCount == 0)
+    TripMultiple = L->getSmallConstantTripMultiple();
+
+  if (TripCount != 0)
+    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
+  if (TripMultiple != 1)
+    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");
+
+  // Effectively "DCE" unrolled iterations that are beyond the tripcount
+  // and will never be executed.
+  if (TripCount != 0 && Count > TripCount)
+    Count = TripCount;
+
+  assert(Count > 0);
+  assert(TripMultiple > 0);
+  assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+  // Are we eliminating the loop control altogether?
+  bool CompletelyUnroll = Count == TripCount;
+
+  // If we know the trip count, we know the multiple...
+  unsigned BreakoutTrip = 0;
+  if (TripCount != 0) {
+    BreakoutTrip = TripCount % Count;
+    TripMultiple = 0;
+  } else {
+    // Figure out what multiple to use.
+    BreakoutTrip = TripMultiple =
+      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+  }
+
+  if (CompletelyUnroll) {
+    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+          << " with trip count " << TripCount << "!\n");
+  } else {
+    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+          << " by " << Count);
+    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+    } else if (TripMultiple != 1) {
+      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+    }
+    DEBUG(dbgs() << "!\n");
+  }
+
+  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
+
+  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+  // For the first iteration of the loop, we should use the precloned values for
+  // PHI nodes.  Insert associations now.
+  ValueToValueMapTy LastValueMap;
+  std::vector<PHINode*> OrigPHINode;
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    OrigPHINode.push_back(PN);
+    if (Instruction *I = 
+                dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
+      if (L->contains(I))
+        LastValueMap[I] = I;
+  }
+
+  std::vector<BasicBlock*> Headers;
+  std::vector<BasicBlock*> Latches;
+  Headers.push_back(Header);
+  Latches.push_back(LatchBlock);
+
+  for (unsigned It = 1; It != Count; ++It) {
+    std::vector<BasicBlock*> NewBlocks;
+    
+    for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
+         E = LoopBlocks.end(); BB != E; ++BB) {
+      ValueToValueMapTy VMap;
+      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+      Header->getParent()->getBasicBlockList().push_back(New);
+
+      // Loop over all of the PHI nodes in the block, changing them to use the
+      // incoming values from the previous block.
+      if (*BB == Header)
+        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+            if (It > 1 && L->contains(InValI))
+              InVal = LastValueMap[InValI];
+          VMap[OrigPHINode[i]] = InVal;
+          New->getInstList().erase(NewPHI);
+        }
+
+      // Update our running map of newest clones
+      LastValueMap[*BB] = New;
+      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+           VI != VE; ++VI)
+        LastValueMap[VI->first] = VI->second;
+
+      L->addBasicBlockToLoop(New, LI->getBase());
+
+      // Add phi entries for newly created values to all exit blocks except
+      // the successor of the latch block.  The successor of the exit block will
+      // be updated specially after unrolling all the way.
+      if (*BB != LatchBlock)
+        for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
+             ++SI)
+          if (!L->contains(*SI))
+            for (BasicBlock::iterator BBI = (*SI)->begin();
+                 PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+              Value *Incoming = phi->getIncomingValueForBlock(*BB);
+              phi->addIncoming(Incoming, New);
+            }
+
+      // Keep track of new headers and latches as we create them, so that
+      // we can insert the proper branches later.
+      if (*BB == Header)
+        Headers.push_back(New);
+      if (*BB == LatchBlock) {
+        Latches.push_back(New);
+
+        // Also, clear out the new latch's back edge so that it doesn't look
+        // like a new loop, so that it's amenable to being merged with adjacent
+        // blocks later on.
+        TerminatorInst *Term = New->getTerminator();
+        assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
+        assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
+        Term->setSuccessor(!ContinueOnTrue, NULL);
+      }
+
+      NewBlocks.push_back(New);
+    }
+    
+    // Remap all instructions in the most recent iteration
+    for (unsigned i = 0; i < NewBlocks.size(); ++i)
+      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+           E = NewBlocks[i]->end(); I != E; ++I)
+        ::RemapInstruction(I, LastValueMap);
+  }
+  
+  // The latch block exits the loop.  If there are any PHI nodes in the
+  // successor blocks, update them to use the appropriate values computed as the
+  // last iteration of the loop.
+  if (Count != 1) {
+    BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
+    for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
+         SI != SE; ++SI) {
+      for (BasicBlock::iterator BBI = (*SI)->begin();
+           PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+        Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+        // If this value was defined in the loop, take the value defined by the
+        // last iteration of the loop.
+        if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+          if (L->contains(InValI))
+            InVal = LastValueMap[InVal];
+        }
+        PN->addIncoming(InVal, LastIterationBB);
+      }
+    }
+  }
+
+  // Now, if we're doing complete unrolling, loop over the PHI nodes in the
+  // original block, setting them to their incoming values.
+  if (CompletelyUnroll) {
+    BasicBlock *Preheader = L->getLoopPreheader();
+    for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+      PHINode *PN = OrigPHINode[i];
+      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+      Header->getInstList().erase(PN);
+    }
+  }
+
+  // Now that all the basic blocks for the unrolled iterations are in place,
+  // set up the branches to connect them.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    // The original branch was replicated in each unrolled iteration.
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+    // The branch destination.
+    unsigned j = (i + 1) % e;
+    BasicBlock *Dest = Headers[j];
+    bool NeedConditional = true;
+
+    // For a complete unroll, make the last iteration end with a branch
+    // to the exit block.
+    if (CompletelyUnroll && j == 0) {
+      Dest = LoopExit;
+      NeedConditional = false;
+    }
+
+    // If we know the trip count or a multiple of it, we can safely use an
+    // unconditional branch for some iterations.
+    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+      NeedConditional = false;
+    }
+
+    if (NeedConditional) {
+      // Update the conditional branch's successor for the following
+      // iteration.
+      Term->setSuccessor(!ContinueOnTrue, Dest);
+    } else {
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Dest, Term);
+      Term->eraseFromParent();
+    }
+  }
+
+  // Merge adjacent basic blocks, if possible.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+    if (Term->isUnconditional()) {
+      BasicBlock *Dest = Term->getSuccessor(0);
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI))
+        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+    }
+  }
+  
+  // At this point, the code is well formed.  We now do a quick sweep over the
+  // inserted code, doing constant propagation and dead code elimination as we
+  // go.
+  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+      Instruction *Inst = I++;
+
+      if (isInstructionTriviallyDead(Inst))
+        (*BB)->getInstList().erase(Inst);
+      else if (Value *V = SimplifyInstruction(Inst))
+        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+          Inst->replaceAllUsesWith(V);
+          (*BB)->getInstList().erase(Inst);
+        }
+    }
+
+  NumCompletelyUnrolled += CompletelyUnroll;
+  ++NumUnrolled;
+  // Remove the loop from the LoopPassManager if it's completely removed.
+  if (CompletelyUnroll && LPM != NULL)
+    LPM->deleteLoopFromQueue(L);
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 000000000000..c1213fac7bc7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,166 @@
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+                   cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+                   cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+  class LowerExpectIntrinsic : public FunctionPass {
+
+    bool HandleSwitchExpect(SwitchInst *SI);
+
+    bool HandleIfExpect(BranchInst *BI);
+
+  public:
+    static char ID;
+    LowerExpectIntrinsic() : FunctionPass(ID) {
+      initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+  };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+  CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+
+  unsigned caseNo = SI->findCaseValue(ExpectedValue);
+  std::vector<Value *> Vec;
+  unsigned n = SI->getNumCases();
+  Vec.resize(n + 1); // +1 for MDString
+
+  Vec[0] = MDString::get(Context, "branch_weights");
+  for (unsigned i = 0; i < n; ++i) {
+    Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+  }
+
+  MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
+  SI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  SI->setCondition(ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+  if (BI->isUnconditional())
+    return false;
+
+  // Handle non-optimized IR code like:
+  //   %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+  //   %tobool = icmp ne i64 %expval, 0
+  //   br i1 %tobool, label %if.then, label %if.end
+
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+    return false;
+
+  CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+  bool Likely = ExpectedValue->isOne();
+
+  // If expect value is equal to 1 it means that we are more likely to take
+  // branch 0, in other case more likely is branch 1.
+  Value *Ops[] = {
+    MDString::get(Context, "branch_weights"),
+    ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight),
+    ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight)
+  };
+
+  MDNode *WeightsNode = MDNode::get(Context, Ops);
+  BI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  CmpI->setOperand(0, ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+    BasicBlock *BB = I++;
+
+    // Create "block_weights" metadata.
+    if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+      if (HandleIfExpect(BI))
+        IfHandled++;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      if (HandleSwitchExpect(SI))
+        IfHandled++;
+    }
+
+    // remove llvm.expect intrinsics.
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; ) {
+      CallInst *CI = dyn_cast<CallInst>(BI++);
+      if (!CI)
+        continue;
+
+      Function *Fn = CI->getCalledFunction();
+      if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+        Value *Exp = CI->getArgOperand(0);
+        CI->replaceAllUsesWith(Exp);
+        CI->eraseFromParent();
+      }
+    }
+  }
+
+  return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+                "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+  return new LowerExpectIntrinsic();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 000000000000..f77d19de900d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,603 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding.  This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort().  If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive.  It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+  class LowerInvoke : public FunctionPass {
+    // Used for both models.
+    Constant *AbortFn;
+
+    // Used for expensive EH support.
+    StructType *JBLinkTy;
+    GlobalVariable *JBListHead;
+    Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+    bool useExpensiveEHSupport;
+
+    // We peek in TLI to grab the target's jmp_buf size and alignment
+    const TargetLowering *TLI;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit LowerInvoke(const TargetLowering *tli = NULL,
+                         bool useExpensiveEHSupport = ExpensiveEHSupport)
+      : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+        TLI(tli) {
+      initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+    }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved("mem2reg");
+      AU.addPreservedID(LowerSwitchID);
+    }
+
+  private:
+    bool insertCheapEHSupport(Function &F);
+    void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
+    void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                AllocaInst *InvokeNum, AllocaInst *StackPtr,
+                                SwitchInst *CatchSwitch);
+    bool insertExpensiveEHSupport(Function &F);
+  };
+}
+
+char LowerInvoke::ID = 0;
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+                "Lower invoke and unwind, for unwindless code generators",
+                false, false)
+
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+  return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+                                          bool useExpensiveEHSupport) {
+  return new LowerInvoke(TLI, useExpensiveEHSupport);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+  const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+  if (useExpensiveEHSupport) {
+    // Insert a type for the linked list of jump buffers.
+    unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+    JBSize = JBSize ? JBSize : 200;
+    Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+    JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty");
+    Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+    JBLinkTy->setBody(Elts);
+
+    const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+    // Now that we've done that, insert the jmpbuf list head global, unless it
+    // already exists.
+    if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+      JBListHead = new GlobalVariable(M, PtrJBList, false,
+                                      GlobalValue::LinkOnceAnyLinkage,
+                                      Constant::getNullValue(PtrJBList),
+                                      "llvm.sjljeh.jblist");
+    }
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+    SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+   // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
+#endif
+
+    LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+    StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+    StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+  }
+
+  // We need the 'write' and 'abort' functions for both models.
+  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+                                  (Type *)0);
+  return true;
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+  bool Changed = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+      // Insert a normal call instruction...
+      CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+                                           CallArgs, "", II);
+      NewCall->takeName(II);
+      NewCall->setCallingConv(II->getCallingConv());
+      NewCall->setAttributes(II->getAttributes());
+      NewCall->setDebugLoc(II->getDebugLoc());
+      II->replaceAllUsesWith(NewCall);
+
+      // Insert an unconditional branch to the normal destination.
+      BranchInst::Create(II->getNormalDest(), II);
+
+      // Remove any PHI node entries from the exception destination.
+      II->getUnwindDest()->removePredecessor(BB);
+
+      // Remove the invoke instruction now.
+      BB->getInstList().erase(II);
+
+      ++NumInvokes; Changed = true;
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // Insert a call to abort()
+      CallInst::Create(AbortFn, "", UI)->setTailCall();
+
+      // Insert a return instruction.  This really should be a "barrier", as it
+      // is unreachable.
+      ReturnInst::Create(F.getContext(),
+                         F.getReturnType()->isVoidTy() ?
+                          0 : Constant::getNullValue(F.getReturnType()), UI);
+
+      // Remove the unwind instruction now.
+      BB->getInstList().erase(UI);
+
+      ++NumUnwinds; Changed = true;
+    }
+  return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                         AllocaInst *InvokeNum,
+                                         AllocaInst *StackPtr,
+                                         SwitchInst *CatchSwitch) {
+  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
+
+  // If the unwind edge has phi nodes, split the edge.
+  if (isa<PHINode>(II->getUnwindDest()->begin())) {
+    SplitCriticalEdge(II, 1, this);
+
+    // If there are any phi nodes left, they must have a single predecessor.
+    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      PN->eraseFromParent();
+    }
+  }
+
+  // Insert a store of the invoke num before the invoke and store zero into the
+  // location afterward.
+  new StoreInst(InvokeNoC, InvokeNum, true, II);  // volatile
+  
+  // Insert a store of the stack ptr before the invoke, so we can restore it
+  // later in the exception case.
+  CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+  new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
+
+  BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
+  // nonvolatile.
+  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
+                InvokeNum, false, NI);
+
+  Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
+                                           II->getUnwindDest()->getFirstNonPHI()
+                                           );
+  CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+    
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+  // Insert a normal call instruction.
+  SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+                                       CallArgs, "", II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
+  II->replaceAllUsesWith(NewCall);
+
+  // Replace the invoke with an uncond branch.
+  BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+  II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges.  Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge.  This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    const Type *Ty = AI->getType();
+    // Aggregate types can't be cast, but are legal argument types, so we have
+    // to handle them differently. We use an extract/insert pair as a
+    // lightweight method to achieve the same goal.
+    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+      NI->insertAfter(EI);
+      AI->replaceAllUsesWith(NI);
+      // Set the operand of the instructions back to the AllocaInst.
+      EI->setOperand(0, AI);
+      NI->setOperand(0, AI);
+    } else {
+      // This is always a no-op cast because we're casting AI to AI->getType()
+      // so src and destination types are identical. BitCast is the only
+      // possibility.
+      CastInst *NC = new BitCastInst(
+        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+      AI->replaceAllUsesWith(NC);
+      // Set the operand of the cast instruction back to the AllocaInst.
+      // Normally it's forbidden to replace a CastInst's operand because it
+      // could cause the opcode to reflect an illegal conversion. However,
+      // we're replacing it here with the same value it was constructed with.
+      // We do this because the above replaceAllUsesWith() clobbered the
+      // operand, but we want this one to remain.
+      NC->setOperand(0, AI);
+    }
+  }
+
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      SmallVector<Instruction*,16> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Scan all of the uses and see if the live range is live across an unwind
+      // edge.  If we find a use live across an invoke edge, create an alloca
+      // and spill the value.
+      std::set<InvokeInst*> InvokesWithStoreInserted;
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+  SmallVector<ReturnInst*,16> Returns;
+  SmallVector<UnwindInst*,16> Unwinds;
+  SmallVector<InvokeInst*,16> Invokes;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+
+  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  // TODO: This is not an optimal way to do this.  In particular, this always
+  // inserts setjmp calls into the entries of functions with invoke instructions
+  // even though there are possibly paths through the function that do not
+  // execute any invokes.  In particular, for functions with early exits, e.g.
+  // the 'addMove' method in hexxagon, it would be nice to not have to do the
+  // setjmp stuff on the early exit path.  This requires a bit of dataflow, but
+  // would not be too hard to do.
+
+  // If we have an invoke instruction, insert a setjmp that dominates all
+  // invokes.  After the setjmp, use a cond branch that goes to the original
+  // code path on zero, and to a designated 'catch' block of nonzero.
+  Value *OldJmpBufPtr = 0;
+  if (!Invokes.empty()) {
+    // First thing we need to do is scan the whole function for values that are
+    // live across unwind edges.  Each value that is live across an unwind edge
+    // we spill into a stack location, guaranteeing that there is nothing live
+    // across the unwind edge.  This process also splits all critical edges
+    // coming out of invoke's.
+    splitLiveRangesLiveAcrossInvokes(Invokes);
+
+    BasicBlock *EntryBB = F.begin();
+
+    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+    // that needs to be restored on all exits from the function.  This is an
+    // alloca because the value needs to be live across invokes.
+    unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+    AllocaInst *JmpBuf =
+      new AllocaInst(JBLinkTy, 0, Align,
+                     "jblink", F.begin()->begin());
+
+    Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                     ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+    OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+                                             "OldBuf",
+                                             EntryBB->getTerminator());
+
+    // Copy the JBListHead to the alloca.
+    Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+                                 EntryBB->getTerminator());
+    new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+    // Add the new jumpbuf to the list.
+    new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+    // Create the catch block.  The catch block is basically a big switch
+    // statement that goes to all of the invoke catch blocks.
+    BasicBlock *CatchBB =
+            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
+
+    // Create an alloca which keeps track of the stack pointer before every
+    // invoke, this allows us to properly restore the stack pointer after
+    // long jumping.
+    AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+                                          "stackptr", EntryBB->begin());
+
+    // Create an alloca which keeps track of which invoke is currently
+    // executing.  For normal calls it contains zero.
+    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+                                           "invokenum",EntryBB->begin());
+    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), 
+                  InvokeNum, true, EntryBB->getTerminator());
+
+    // Insert a load in the Catch block, and a switch on its value.  By default,
+    // we go to a block that just does an unwind (which is the correct action
+    // for a standard call).
+    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
+
+    Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+    SwitchInst *CatchSwitch =
+      SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+    // Now that things are set up, insert the setjmp call itself.
+
+    // Split the entry block to insert the conditional branch for the setjmp.
+    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                     "setjmp.cont");
+
+    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
+    Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+                                                 "TheJmpBuf",
+                                                 EntryBB->getTerminator());
+    JmpBufPtr = new BitCastInst(JmpBufPtr,
+                        Type::getInt8PtrTy(F.getContext()),
+                                "tmp", EntryBB->getTerminator());
+    Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+                                    EntryBB->getTerminator());
+
+    // Compare the return value to zero.
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, SJRet,
+                                   Constant::getNullValue(SJRet->getType()),
+                                   "notunwind");
+    // Nuke the uncond branch.
+    EntryBB->getTerminator()->eraseFromParent();
+
+    // Put in a new condbranch in its place.
+    BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+    // At this point, we are all set up, rewrite each invoke instruction.
+    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
+  }
+
+  // We know that there is at least one unwind.
+
+  // Create three new blocks, the block to load the jmpbuf ptr and compare
+  // against null, the block to do the longjmp, and the error block for if it
+  // is null.  Add them at the end of the function because they are not hot.
+  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+                                                "dounwind", &F);
+  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
+
+  // If this function contains an invoke, restore the old jumpbuf ptr.
+  Value *BufPtr;
+  if (OldJmpBufPtr) {
+    // Before the return, insert a copy from the saved value to the new value.
+    BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+    new StoreInst(BufPtr, JBListHead, UnwindHandler);
+  } else {
+    BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+  }
+
+  // Load the JBList, if it's null, then there was no catch!
+  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
+                                Constant::getNullValue(BufPtr->getType()),
+                                "notnull");
+  BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+  // Create the block to do the longjmp.
+  // Get a pointer to the jmpbuf and longjmp.
+  Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                   ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+  Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf",
+                                     UnwindBlock);
+  Idx[0] = new BitCastInst(Idx[0],
+             Type::getInt8PtrTy(F.getContext()),
+                           "tmp", UnwindBlock);
+  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+  CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
+  new UnreachableInst(F.getContext(), UnwindBlock);
+
+  // Set up the term block ("throw without a catch").
+  new UnreachableInst(F.getContext(), TermBlock);
+
+  // Insert a call to abort()
+  CallInst::Create(AbortFn, "",
+                   TermBlock->getTerminator())->setTailCall();
+
+
+  // Replace all unwinds with a branch to the unwind handler.
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    BranchInst::Create(UnwindHandler, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();
+  }
+
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, restore the old jmpbuf pointer to its input value.
+  if (OldJmpBufPtr) {
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *R = Returns[i];
+
+      // Before the return, insert a copy from the saved value to the new value.
+      Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+      new StoreInst(OldBuf, JBListHead, true, R);
+    }
+  }
+
+  return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+  if (useExpensiveEHSupport)
+    return insertExpensiveEHSupport(F);
+  else
+    return insertCheapEHSupport(F);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 000000000000..ed733d393a11
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,323 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+  /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+  /// instructions.
+  class LowerSwitch : public FunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSwitch() : FunctionPass(ID) {
+      initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+    } 
+
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreserved("mem2reg");
+      AU.addPreservedID(LowerInvokePassID);
+    }
+
+    struct CaseRange {
+      Constant* Low;
+      Constant* High;
+      BasicBlock* BB;
+
+      CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+        Low(low), High(high), BB(bb) { }
+    };
+
+    typedef std::vector<CaseRange>           CaseVector;
+    typedef std::vector<CaseRange>::iterator CaseItr;
+  private:
+    void processSwitchInst(SwitchInst *SI);
+
+    BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+                              BasicBlock* OrigBlock, BasicBlock* Default);
+    BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+                             BasicBlock* OrigBlock, BasicBlock* Default);
+    unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+  };
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const LowerSwitch::CaseRange& C1,
+                      const LowerSwitch::CaseRange& C2) {
+
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+                "Lower SwitchInst's to branches", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+  return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+  bool Changed = false;
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+      Changed = true;
+      processSwitchInst(SI);
+    }
+  }
+
+  return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C)
+    LLVM_ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) {
+  O << "[";
+
+  for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+         E = C.end(); B != E; ) {
+    O << *B->Low << " -" << *B->High;
+    if (++B != E) O << ", ";
+  }
+
+  return O << "]";
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+                                       Value* Val, BasicBlock* OrigBlock,
+                                       BasicBlock* Default)
+{
+  unsigned Size = End - Begin;
+
+  if (Size == 1)
+    return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+  unsigned Mid = Size / 2;
+  std::vector<CaseRange> LHS(Begin, Begin + Mid);
+  DEBUG(dbgs() << "LHS: " << LHS << "\n");
+  std::vector<CaseRange> RHS(Begin + Mid, End);
+  DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+  CaseRange& Pivot = *(Begin + Mid);
+  DEBUG(dbgs() << "Pivot ==> " 
+               << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+               << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+  BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+                                      OrigBlock, Default);
+  BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+                                      OrigBlock, Default);
+
+  // Create a new node that checks if the value is < pivot. Go to the
+  // left branch if it is and right branch if not.
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewNode);
+
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+                                Val, Pivot.Low, "Pivot");
+  NewNode->getInstList().push_back(Comp);
+  BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+  return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+                                      BasicBlock* OrigBlock,
+                                      BasicBlock* Default)
+{
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewLeaf);
+
+  // Emit comparison
+  ICmpInst* Comp = NULL;
+  if (Leaf.Low == Leaf.High) {
+    // Make the seteq instruction...
+    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+                        Leaf.Low, "SwitchLeaf");
+  } else {
+    // Make range comparison
+    if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+      // Val >= Min && Val <= Hi --> Val <= Hi
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+                          "SwitchLeaf");
+    } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+      // Val >= 0 && Val <= Hi --> Val <=u Hi
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+                          "SwitchLeaf");      
+    } else {
+      // Emit V-Lo <=u Hi-Lo
+      Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+      Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+                                                   Val->getName()+".off",
+                                                   NewLeaf);
+      Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+                          "SwitchLeaf");
+    }
+  }
+
+  // Make the conditional branch...
+  BasicBlock* Succ = Leaf.BB;
+  BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+  // If there were any PHI nodes in this successor, rewrite one entry
+  // from OrigBlock to come from NewLeaf.
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode* PN = cast<PHINode>(I);
+    // Remove all but one incoming entries from the cluster
+    uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+                     cast<ConstantInt>(Leaf.Low)->getSExtValue();    
+    for (uint64_t j = 0; j < Range; ++j) {
+      PN->removeIncomingValue(OrigBlock);
+    }
+    
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+  }
+
+  return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+  unsigned numCmps = 0;
+
+  // Start with "simple" cases
+  for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
+    Cases.push_back(CaseRange(SI->getSuccessorValue(i),
+                              SI->getSuccessorValue(i),
+                              SI->getSuccessor(i)));
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size()>=2)
+    for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      BasicBlock* nextBB = J->BB;
+      BasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+  BasicBlock *CurBlock = SI->getParent();
+  BasicBlock *OrigBlock = CurBlock;
+  Function *F = CurBlock->getParent();
+  Value *Val = SI->getOperand(0);  // The value we are switching on...
+  BasicBlock* Default = SI->getDefaultDest();
+
+  // If there is only the default destination, don't bother with the code below.
+  if (SI->getNumOperands() == 2) {
+    BranchInst::Create(SI->getDefaultDest(), CurBlock);
+    CurBlock->getInstList().erase(SI);
+    return;
+  }
+
+  // Create a new, empty default block so that the new hierarchy of
+  // if-then statements go to this and the PHI nodes are happy.
+  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+  F->getBasicBlockList().insert(Default, NewDefault);
+
+  BranchInst::Create(Default, NewDefault);
+
+  // If there is an entry in any PHI nodes for the default edge, make sure
+  // to update them as well.
+  for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+  }
+
+  // Prepare cases vector.
+  CaseVector Cases;
+  unsigned numCmps = Clusterify(Cases, SI);
+
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << "\n");
+  DEBUG(dbgs() << "Cases: " << Cases << "\n");
+  (void)numCmps;
+  
+  BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+                                          OrigBlock, NewDefault);
+
+  // Branch to our shiny new if-then stuff...
+  BranchInst::Create(SwitchBlock, OrigBlock);
+
+  // We are now done with the switch instruction, delete it.
+  CurBlock->getInstList().erase(SI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 000000000000..f4ca81af6d87
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,90 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+  struct PromotePass : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    PromotePass() : FunctionPass(ID) {
+      initializePromotePassPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnFunction - To run this pass, first we calculate the alloca
+    // instructions that are safe for promotion, then we promote each one.
+    //
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.setPreservesCFG();
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreservedID(LowerSwitchID);
+      AU.addPreservedID(LowerInvokePassID);
+    }
+  };
+}  // end of anonymous namespace
+
+char PromotePass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+
+bool PromotePass::runOnFunction(Function &F) {
+  std::vector<AllocaInst*> Allocas;
+
+  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
+
+  bool Changed  = false;
+
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+
+  while (1) {
+    Allocas.clear();
+
+    // Find allocas that are safe to promote, by looking at all instructions in
+    // the entry node
+    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
+        if (isAllocaPromotable(AI))
+          Allocas.push_back(AI);
+
+    if (Allocas.empty()) break;
+
+    PromoteMemToReg(Allocas, DT);
+    NumPromoted += Allocas.size();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+  return new PromotePass();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 000000000000..e5a00f4e9774
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1134 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references.  It promotes
+// alloca instructions which only have loads and stores as uses.  An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+//   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+//   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+//   Programming Languages
+//   POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
+
+namespace llvm {
+template<>
+struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
+  typedef std::pair<BasicBlock*, unsigned> EltTy;
+  static inline EltTy getEmptyKey() {
+    return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+  }
+  static inline EltTy getTombstoneKey() {
+    return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+  }
+  static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+    return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+  }
+  static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
+    return LHS == RHS;
+  }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+  // FIXME: If the memory unit is of pointer or integer type, we can permit
+  // assignments to subsections of the memory unit.
+
+  // Only allow direct and non-volatile loads and stores...
+  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI) {   // Loop over all of the uses of the alloca
+    const User *U = *UI;
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile())
+        return false;
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == AI)
+        return false;   // Don't allow a store OF the AI, only INTO the AI.
+      if (SI->isVolatile())
+        return false;
+    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
+    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!onlyUsedByLifetimeMarkers(BCI))
+        return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!GEPI->hasAllZeroIndices())
+        return false;
+      if (!onlyUsedByLifetimeMarkers(GEPI))
+        return false;
+    } else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+namespace {
+  struct AllocaInfo;
+
+  // Data package used by RenamePass()
+  class RenamePassData {
+  public:
+    typedef std::vector<Value *> ValVector;
+    
+    RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+    RenamePassData(BasicBlock *B, BasicBlock *P,
+                   const ValVector &V) : BB(B), Pred(P), Values(V) {}
+    BasicBlock *BB;
+    BasicBlock *Pred;
+    ValVector Values;
+    
+    void swap(RenamePassData &RHS) {
+      std::swap(BB, RHS.BB);
+      std::swap(Pred, RHS.Pred);
+      Values.swap(RHS.Values);
+    }
+  };
+  
+  /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+  /// load/store instructions in the block that directly load or store an alloca.
+  ///
+  /// This functionality is important because it avoids scanning large basic
+  /// blocks multiple times when promoting many allocas in the same block.
+  class LargeBlockInfo {
+    /// InstNumbers - For each instruction that we track, keep the index of the
+    /// instruction.  The index starts out as the number of the instruction from
+    /// the start of the block.
+    DenseMap<const Instruction *, unsigned> InstNumbers;
+  public:
+    
+    /// isInterestingInstruction - This code only looks at accesses to allocas.
+    static bool isInterestingInstruction(const Instruction *I) {
+      return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+             (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+    }
+    
+    /// getInstructionIndex - Get or calculate the index of the specified
+    /// instruction.
+    unsigned getInstructionIndex(const Instruction *I) {
+      assert(isInterestingInstruction(I) &&
+             "Not a load/store to/from an alloca?");
+      
+      // If we already have this instruction number, return it.
+      DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+      if (It != InstNumbers.end()) return It->second;
+      
+      // Scan the whole block to get the instruction.  This accumulates
+      // information for every interesting instruction in the block, in order to
+      // avoid gratuitus rescans.
+      const BasicBlock *BB = I->getParent();
+      unsigned InstNo = 0;
+      for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+           BBI != E; ++BBI)
+        if (isInterestingInstruction(BBI))
+          InstNumbers[BBI] = InstNo++;
+      It = InstNumbers.find(I);
+      
+      assert(It != InstNumbers.end() && "Didn't insert instruction?");
+      return It->second;
+    }
+    
+    void deleteValue(const Instruction *I) {
+      InstNumbers.erase(I);
+    }
+    
+    void clear() {
+      InstNumbers.clear();
+    }
+  };
+
+  struct PromoteMem2Reg {
+    /// Allocas - The alloca instructions being promoted.
+    ///
+    std::vector<AllocaInst*> Allocas;
+    DominatorTree &DT;
+    DIBuilder *DIB;
+
+    /// AST - An AliasSetTracker object to update.  If null, don't update it.
+    ///
+    AliasSetTracker *AST;
+    
+    /// AllocaLookup - Reverse mapping of Allocas.
+    ///
+    DenseMap<AllocaInst*, unsigned>  AllocaLookup;
+
+    /// NewPhiNodes - The PhiNodes we're adding.
+    ///
+    DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+    
+    /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+    /// it corresponds to.
+    DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+    
+    /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+    /// each alloca that is of pointer type, we keep track of what to copyValue
+    /// to the inserted PHI nodes here.
+    ///
+    std::vector<Value*> PointerAllocaValues;
+
+    /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
+    /// intrinsic that describes it, if any, so that we can convert it to a
+    /// dbg.value intrinsic if the alloca gets promoted.
+    SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
+
+    /// Visited - The set of basic blocks the renamer has already visited.
+    ///
+    SmallPtrSet<BasicBlock*, 16> Visited;
+
+    /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+    /// non-determinstic behavior.
+    DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+    /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+    DenseMap<DomTreeNode*, unsigned> DomLevels;
+
+    /// BBNumPreds - Lazily compute the number of predecessors a block has.
+    DenseMap<const BasicBlock*, unsigned> BBNumPreds;
+  public:
+    PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
+                   AliasSetTracker *ast)
+      : Allocas(A), DT(dt), DIB(0), AST(ast) {}
+    ~PromoteMem2Reg() {
+      delete DIB;
+    }
+
+    void run();
+
+    /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+    ///
+    bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+      return DT.dominates(BB1, BB2);
+    }
+
+  private:
+    void RemoveFromAllocasList(unsigned &AllocaIdx) {
+      Allocas[AllocaIdx] = Allocas.back();
+      Allocas.pop_back();
+      --AllocaIdx;
+    }
+
+    unsigned getNumPreds(const BasicBlock *BB) {
+      unsigned &NP = BBNumPreds[BB];
+      if (NP == 0)
+        NP = std::distance(pred_begin(BB), pred_end(BB))+1;
+      return NP-1;
+    }
+
+    void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+                                 AllocaInfo &Info);
+    void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
+                             const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+                             SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
+    
+    void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                  LargeBlockInfo &LBI);
+    void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                  LargeBlockInfo &LBI);
+    
+    void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                    RenamePassData::ValVector &IncVals,
+                    std::vector<RenamePassData> &Worklist);
+    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+  };
+  
+  struct AllocaInfo {
+    SmallVector<BasicBlock*, 32> DefiningBlocks;
+    SmallVector<BasicBlock*, 32> UsingBlocks;
+    
+    StoreInst  *OnlyStore;
+    BasicBlock *OnlyBlock;
+    bool OnlyUsedInOneBlock;
+    
+    Value *AllocaPointerVal;
+    DbgDeclareInst *DbgDeclare;
+    
+    void clear() {
+      DefiningBlocks.clear();
+      UsingBlocks.clear();
+      OnlyStore = 0;
+      OnlyBlock = 0;
+      OnlyUsedInOneBlock = true;
+      AllocaPointerVal = 0;
+      DbgDeclare = 0;
+    }
+    
+    /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
+    /// ivars.
+    void AnalyzeAlloca(AllocaInst *AI) {
+      clear();
+
+      // As we scan the uses of the alloca instruction, keep track of stores,
+      // and decide whether all of the loads and stores to the alloca are within
+      // the same basic block.
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E;)  {
+        Instruction *User = cast<Instruction>(*UI++);
+
+        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+          // Remember the basic blocks which define new values for the alloca
+          DefiningBlocks.push_back(SI->getParent());
+          AllocaPointerVal = SI->getOperand(0);
+          OnlyStore = SI;
+        } else {
+          LoadInst *LI = cast<LoadInst>(User);
+          // Otherwise it must be a load instruction, keep track of variable
+          // reads.
+          UsingBlocks.push_back(LI->getParent());
+          AllocaPointerVal = LI;
+        }
+        
+        if (OnlyUsedInOneBlock) {
+          if (OnlyBlock == 0)
+            OnlyBlock = User->getParent();
+          else if (OnlyBlock != User->getParent())
+            OnlyUsedInOneBlock = false;
+        }
+      }
+      
+      DbgDeclare = FindAllocaDbgDeclare(AI);
+    }
+  };
+
+  typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+  struct DomTreeNodeCompare {
+    bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+      return LHS.second < RHS.second;
+    }
+  };
+}  // end of anonymous namespace
+
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+  // Knowing that this alloca is promotable, we know that it's safe to kill all
+  // instructions except for load and store.
+
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE;) {
+    Instruction *I = cast<Instruction>(*UI);
+    ++UI;
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      continue;
+
+    if (!I->getType()->isVoidTy()) {
+      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+      // Follow the use/def chain to erase them now instead of leaving it for
+      // dead code elimination later.
+      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE;) {
+        Instruction *Inst = cast<Instruction>(*UI);
+        ++UI;
+        Inst->eraseFromParent();
+      }
+    }
+    I->eraseFromParent();
+  }
+}
+
+void PromoteMem2Reg::run() {
+  Function &F = *DT.getRoot()->getParent();
+
+  if (AST) PointerAllocaValues.resize(Allocas.size());
+  AllocaDbgDeclares.resize(Allocas.size());
+
+  AllocaInfo Info;
+  LargeBlockInfo LBI;
+
+  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+    AllocaInst *AI = Allocas[AllocaNum];
+
+    assert(isAllocaPromotable(AI) &&
+           "Cannot promote non-promotable alloca!");
+    assert(AI->getParent()->getParent() == &F &&
+           "All allocas should be in the same function, which is same as DF!");
+
+    removeLifetimeIntrinsicUsers(AI);
+
+    if (AI->use_empty()) {
+      // If there are no uses of the alloca, just delete it now.
+      if (AST) AST->deleteValue(AI);
+      AI->eraseFromParent();
+
+      // Remove the alloca from the Allocas list, since it has been processed
+      RemoveFromAllocasList(AllocaNum);
+      ++NumDeadAlloca;
+      continue;
+    }
+    
+    // Calculate the set of read and write-locations for each alloca.  This is
+    // analogous to finding the 'uses' and 'definitions' of each variable.
+    Info.AnalyzeAlloca(AI);
+
+    // If there is only a single store to this value, replace any loads of
+    // it that are directly dominated by the definition with the value stored.
+    if (Info.DefiningBlocks.size() == 1) {
+      RewriteSingleStoreAlloca(AI, Info, LBI);
+
+      // Finally, after the scan, check to see if the store is all that is left.
+      if (Info.UsingBlocks.empty()) {
+        // Record debuginfo for the store and remove the declaration's debuginfo.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+          if (!DIB)
+            DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
+          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
+          DDI->eraseFromParent();
+        }
+        // Remove the (now dead) store and alloca.
+        Info.OnlyStore->eraseFromParent();
+        LBI.deleteValue(Info.OnlyStore);
+
+        if (AST) AST->deleteValue(AI);
+        AI->eraseFromParent();
+        LBI.deleteValue(AI);
+        
+        // The alloca has been processed, move on.
+        RemoveFromAllocasList(AllocaNum);
+        
+        ++NumSingleStore;
+        continue;
+      }
+    }
+    
+    // If the alloca is only read and written in one basic block, just perform a
+    // linear sweep over the block to eliminate it.
+    if (Info.OnlyUsedInOneBlock) {
+      PromoteSingleBlockAlloca(AI, Info, LBI);
+      
+      // Finally, after the scan, check to see if the stores are all that is
+      // left.
+      if (Info.UsingBlocks.empty()) {
+        
+        // Remove the (now dead) stores and alloca.
+        while (!AI->use_empty()) {
+          StoreInst *SI = cast<StoreInst>(AI->use_back());
+          // Record debuginfo for the store before removing it.
+          if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+            if (!DIB)
+              DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+            ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+          }
+          SI->eraseFromParent();
+          LBI.deleteValue(SI);
+        }
+        
+        if (AST) AST->deleteValue(AI);
+        AI->eraseFromParent();
+        LBI.deleteValue(AI);
+        
+        // The alloca has been processed, move on.
+        RemoveFromAllocasList(AllocaNum);
+        
+        // The alloca's debuginfo can be removed as well.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare)
+          DDI->eraseFromParent();
+
+        ++NumLocalPromoted;
+        continue;
+      }
+    }
+
+    // If we haven't computed dominator tree levels, do so now.
+    if (DomLevels.empty()) {
+      SmallVector<DomTreeNode*, 32> Worklist;
+
+      DomTreeNode *Root = DT.getRootNode();
+      DomLevels[Root] = 0;
+      Worklist.push_back(Root);
+
+      while (!Worklist.empty()) {
+        DomTreeNode *Node = Worklist.pop_back_val();
+        unsigned ChildLevel = DomLevels[Node] + 1;
+        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+             CI != CE; ++CI) {
+          DomLevels[*CI] = ChildLevel;
+          Worklist.push_back(*CI);
+        }
+      }
+    }
+
+    // If we haven't computed a numbering for the BB's in the function, do so
+    // now.
+    if (BBNumbers.empty()) {
+      unsigned ID = 0;
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+        BBNumbers[I] = ID++;
+    }
+
+    // If we have an AST to keep updated, remember some pointer value that is
+    // stored into the alloca.
+    if (AST)
+      PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+      
+    // Remember the dbg.declare intrinsic describing this alloca, if any.
+    if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+    
+    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+    // At this point, we're committed to promoting the alloca using IDF's, and
+    // the standard SSA construction algorithm.  Determine which blocks need PHI
+    // nodes and see if we can optimize out some work by avoiding insertion of
+    // dead phi nodes.
+    DetermineInsertionPoint(AI, AllocaNum, Info);
+  }
+
+  if (Allocas.empty())
+    return; // All of the allocas must have been trivial!
+
+  LBI.clear();
+  
+  
+  // Set the incoming values for the basic block to be null values for all of
+  // the alloca's.  We do this in case there is a load of a value that has not
+  // been stored yet.  In this case, it will get this null value.
+  //
+  RenamePassData::ValVector Values(Allocas.size());
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+  // Walks all basic blocks in the function performing the SSA rename algorithm
+  // and inserting the phi nodes we marked as necessary
+  //
+  std::vector<RenamePassData> RenamePassWorkList;
+  RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+  do {
+    RenamePassData RPD;
+    RPD.swap(RenamePassWorkList.back());
+    RenamePassWorkList.pop_back();
+    // RenamePass may add new worklist entries.
+    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+  } while (!RenamePassWorkList.empty());
+  
+  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
+  Visited.clear();
+
+  // Remove the allocas themselves from the function.
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+    Instruction *A = Allocas[i];
+
+    // If there are any uses of the alloca instructions left, they must be in
+    // unreachable basic blocks that were not processed by walking the dominator
+    // tree. Just delete the users now.
+    if (!A->use_empty())
+      A->replaceAllUsesWith(UndefValue::get(A->getType()));
+    if (AST) AST->deleteValue(A);
+    A->eraseFromParent();
+  }
+
+  // Remove alloca's dbg.declare instrinsics from the function.
+  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+      DDI->eraseFromParent();
+
+  // Loop over all of the PHI nodes and see if there are any that we can get
+  // rid of because they merge all of the same incoming values.  This can
+  // happen due to undef values coming into the PHI nodes.  This process is
+  // iterative, because eliminating one PHI node can cause others to be removed.
+  bool EliminatedAPHI = true;
+  while (EliminatedAPHI) {
+    EliminatedAPHI = false;
+    
+    for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+           NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+      PHINode *PN = I->second;
+
+      // If this PHI node merges one value and/or undefs, get the value.
+      if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+        if (AST && PN->getType()->isPointerTy())
+          AST->deleteValue(PN);
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+        NewPhiNodes.erase(I++);
+        EliminatedAPHI = true;
+        continue;
+      }
+      ++I;
+    }
+  }
+  
+  // At this point, the renamer has added entries to PHI nodes for all reachable
+  // code.  Unfortunately, there may be unreachable blocks which the renamer
+  // hasn't traversed.  If this is the case, the PHI nodes may not
+  // have incoming values for all predecessors.  Loop over all PHI nodes we have
+  // created, inserting undef values if they are missing any incoming values.
+  //
+  for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+         NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+    // We want to do this once per basic block.  As such, only process a block
+    // when we find the PHI that is the first entry in the block.
+    PHINode *SomePHI = I->second;
+    BasicBlock *BB = SomePHI->getParent();
+    if (&BB->front() != SomePHI)
+      continue;
+
+    // Only do work here if there the PHI nodes are missing incoming values.  We
+    // know that all PHI nodes that were inserted in a block will have the same
+    // number of incoming values, so we can just check any of them.
+    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+      continue;
+
+    // Get the preds for BB.
+    SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+    
+    // Ok, now we know that all of the PHI nodes are missing entries for some
+    // basic blocks.  Start by sorting the incoming predecessors for efficient
+    // access.
+    std::sort(Preds.begin(), Preds.end());
+    
+    // Now we loop through all BB's which have entries in SomePHI and remove
+    // them from the Preds list.
+    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+      // Do a log(n) search of the Preds list for the entry we want.
+      SmallVector<BasicBlock*, 16>::iterator EntIt =
+        std::lower_bound(Preds.begin(), Preds.end(),
+                         SomePHI->getIncomingBlock(i));
+      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+             "PHI node has entry for a block which is not a predecessor!");
+
+      // Remove the entry
+      Preds.erase(EntIt);
+    }
+
+    // At this point, the blocks left in the preds list must have dummy
+    // entries inserted into every PHI nodes for the block.  Update all the phi
+    // nodes in this block that we are inserting (there could be phis before
+    // mem2reg runs).
+    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+    BasicBlock::iterator BBI = BB->begin();
+    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+           SomePHI->getNumIncomingValues() == NumBadPreds) {
+      Value *UndefVal = UndefValue::get(SomePHI->getType());
+      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+        SomePHI->addIncoming(UndefVal, Preds[pred]);
+    }
+  }
+        
+  NewPhiNodes.clear();
+}
+
+
+/// ComputeLiveInBlocks - Determine which blocks the value is live in.  These
+/// are blocks which lead to uses.  Knowing this allows us to avoid inserting
+/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
+/// would be dead).
+void PromoteMem2Reg::
+ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
+                    const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+                    SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
+  
+  // To determine liveness, we must iterate through the predecessors of blocks
+  // where the def is live.  Blocks are added to the worklist if we need to
+  // check their predecessors.  Start with all the using blocks.
+  SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+                                                   Info.UsingBlocks.end());
+  
+  // If any of the using blocks is also a definition block, check to see if the
+  // definition occurs before or after the use.  If it happens before the use,
+  // the value isn't really live-in.
+  for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+    BasicBlock *BB = LiveInBlockWorklist[i];
+    if (!DefBlocks.count(BB)) continue;
+    
+    // Okay, this is a block that both uses and defines the value.  If the first
+    // reference to the alloca is a def (store), then we know it isn't live-in.
+    for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        if (SI->getOperand(1) != AI) continue;
+        
+        // We found a store to the alloca before a load.  The alloca is not
+        // actually live-in here.
+        LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+        LiveInBlockWorklist.pop_back();
+        --i, --e;
+        break;
+      }
+      
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        if (LI->getOperand(0) != AI) continue;
+        
+        // Okay, we found a load before a store to the alloca.  It is actually
+        // live into this block.
+        break;
+      }
+    }
+  }
+  
+  // Now that we have a set of blocks where the phi is live-in, recursively add
+  // their predecessors until we find the full region the value is live.
+  while (!LiveInBlockWorklist.empty()) {
+    BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+    
+    // The block really is live in here, insert it into the set.  If already in
+    // the set, then it has already been processed.
+    if (!LiveInBlocks.insert(BB))
+      continue;
+    
+    // Since the value is live into BB, it is either defined in a predecessor or
+    // live into it to.  Add the preds to the worklist unless they are a
+    // defining block.
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      
+      // The value is not live into a predecessor if it defines the value.
+      if (DefBlocks.count(P))
+        continue;
+      
+      // Otherwise it is, add to the worklist.
+      LiveInBlockWorklist.push_back(P);
+    }
+  }
+}
+
+/// DetermineInsertionPoint - At this point, we're committed to promoting the
+/// alloca using IDF's, and the standard SSA construction algorithm.  Determine
+/// which blocks need phi nodes and see if we can optimize out some work by
+/// avoiding insertion of dead phi nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+                                             AllocaInfo &Info) {
+  // Unique the set of defining blocks for efficient lookup.
+  SmallPtrSet<BasicBlock*, 32> DefBlocks;
+  DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+  // Determine which blocks the value is live in.  These are blocks which lead
+  // to uses.
+  SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+  ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+  // Use a priority queue keyed on dominator tree level so that inserted nodes
+  // are handled from the bottom of the dominator tree upwards.
+  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                              DomTreeNodeCompare> IDFPriorityQueue;
+  IDFPriorityQueue PQ;
+
+  for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+       E = DefBlocks.end(); I != E; ++I) {
+    if (DomTreeNode *Node = DT.getNode(*I))
+      PQ.push(std::make_pair(Node, DomLevels[Node]));
+  }
+
+  SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+  SmallPtrSet<DomTreeNode*, 32> Visited;
+  SmallVector<DomTreeNode*, 32> Worklist;
+  while (!PQ.empty()) {
+    DomTreeNodePair RootPair = PQ.top();
+    PQ.pop();
+    DomTreeNode *Root = RootPair.first;
+    unsigned RootLevel = RootPair.second;
+
+    // Walk all dominator tree children of Root, inspecting their CFG edges with
+    // targets elsewhere on the dominator tree. Only targets whose level is at
+    // most Root's level are added to the iterated dominance frontier of the
+    // definition set.
+
+    Worklist.clear();
+    Worklist.push_back(Root);
+
+    while (!Worklist.empty()) {
+      DomTreeNode *Node = Worklist.pop_back_val();
+      BasicBlock *BB = Node->getBlock();
+
+      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+           ++SI) {
+        DomTreeNode *SuccNode = DT.getNode(*SI);
+
+        // Quickly skip all CFG edges that are also dominator tree edges instead
+        // of catching them below.
+        if (SuccNode->getIDom() == Node)
+          continue;
+
+        unsigned SuccLevel = DomLevels[SuccNode];
+        if (SuccLevel > RootLevel)
+          continue;
+
+        if (!Visited.insert(SuccNode))
+          continue;
+
+        BasicBlock *SuccBB = SuccNode->getBlock();
+        if (!LiveInBlocks.count(SuccBB))
+          continue;
+
+        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+        if (!DefBlocks.count(SuccBB))
+          PQ.push(std::make_pair(SuccNode, SuccLevel));
+      }
+
+      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+           ++CI) {
+        if (!Visited.count(*CI))
+          Worklist.push_back(*CI);
+      }
+    }
+  }
+
+  if (DFBlocks.size() > 1)
+    std::sort(DFBlocks.begin(), DFBlocks.end());
+
+  unsigned CurrentVersion = 0;
+  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
+}
+
+/// RewriteSingleStoreAlloca - If there is only a single store to this value,
+/// replace any loads of it that are directly dominated by the definition with
+/// the value stored.
+void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
+                                              AllocaInfo &Info,
+                                              LargeBlockInfo &LBI) {
+  StoreInst *OnlyStore = Info.OnlyStore;
+  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+  BasicBlock *StoreBB = OnlyStore->getParent();
+  int StoreIndex = -1;
+
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
+  
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+    Instruction *UserInst = cast<Instruction>(*UI++);
+    if (!isa<LoadInst>(UserInst)) {
+      assert(UserInst == OnlyStore && "Should only have load/stores");
+      continue;
+    }
+    LoadInst *LI = cast<LoadInst>(UserInst);
+    
+    // Okay, if we have a load from the alloca, we want to replace it with the
+    // only value stored to the alloca.  We can do this if the value is
+    // dominated by the store.  If not, we use the rest of the mem2reg machinery
+    // to insert the phi nodes as needed.
+    if (!StoringGlobalVal) {  // Non-instructions are always dominated.
+      if (LI->getParent() == StoreBB) {
+        // If we have a use that is in the same block as the store, compare the
+        // indices of the two instructions to see which one came first.  If the
+        // load came before the store, we can't handle it.
+        if (StoreIndex == -1)
+          StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+          // Can't handle this load, bail out.
+          Info.UsingBlocks.push_back(StoreBB);
+          continue;
+        }
+        
+      } else if (LI->getParent() != StoreBB &&
+                 !dominates(StoreBB, LI->getParent())) {
+        // If the load and store are in different blocks, use BB dominance to
+        // check their relationships.  If the store doesn't dom the use, bail
+        // out.
+        Info.UsingBlocks.push_back(LI->getParent());
+        continue;
+      }
+    }
+    
+    // Otherwise, we *can* safely rewrite this load.
+    Value *ReplVal = OnlyStore->getOperand(0);
+    // If the replacement value is the load, this must occur in unreachable
+    // code.
+    if (ReplVal == LI)
+      ReplVal = UndefValue::get(LI->getType());
+    LI->replaceAllUsesWith(ReplVal);
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+}
+
+namespace {
+
+/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
+/// first element of a pair.
+struct StoreIndexSearchPredicate {
+  bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
+                  const std::pair<unsigned, StoreInst*> &RHS) {
+    return LHS.first < RHS.first;
+  }
+};
+
+}
+
+/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
+/// block.  If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true.  This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths.  e.g. code like
+/// this is potentially correct:
+///
+///   for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                              LargeBlockInfo &LBI) {
+  // The trickiest case to handle is when we have large blocks. Because of this,
+  // this code is optimized assuming that large blocks happen.  This does not
+  // significantly pessimize the small block case.  This uses LargeBlockInfo to
+  // make it efficient to get the index of various operations in the block.
+  
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
+  
+  // Walk the use-def list of the alloca, getting the locations of all stores.
+  typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
+  StoresByIndexTy StoresByIndex;
+  
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+       UI != E; ++UI) 
+    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+  // If there are no stores to the alloca, just replace any loads with undef.
+  if (StoresByIndex.empty()) {
+    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 
+      if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
+        LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+        if (AST && LI->getType()->isPointerTy())
+          AST->deleteValue(LI);
+        LBI.deleteValue(LI);
+        LI->eraseFromParent();
+      }
+    return;
+  }
+  
+  // Sort the stores by their index, making it efficient to do a lookup with a
+  // binary search.
+  std::sort(StoresByIndex.begin(), StoresByIndex.end());
+  
+  // Walk all of the loads from this alloca, replacing them with the nearest
+  // store above them, if any.
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+    if (!LI) continue;
+    
+    unsigned LoadIdx = LBI.getInstructionIndex(LI);
+    
+    // Find the nearest store that has a lower than this load. 
+    StoresByIndexTy::iterator I = 
+      std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+                       std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
+                       StoreIndexSearchPredicate());
+    
+    // If there is no store before this load, then we can't promote this load.
+    if (I == StoresByIndex.begin()) {
+      // Can't handle this load, bail out.
+      Info.UsingBlocks.push_back(LI->getParent());
+      continue;
+    }
+      
+    // Otherwise, there was a store before this load, the load takes its value.
+    --I;
+    LI->replaceAllUsesWith(I->second->getOperand(0));
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+}
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+                                  unsigned &Version) {
+  // Look up the basic-block in question.
+  PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+
+  // If the BB already has a phi node added for the i'th alloca then we're done!
+  if (PN) return false;
+
+  // Create a PhiNode using the dereferenced type... and add the phi-node to the
+  // BasicBlock.
+  PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
+                       Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
+                       BB->begin());
+  ++NumPHIInsert;
+  PhiToAllocaMap[PN] = AllocaNo;
+
+  if (AST && PN->getType()->isPointerTy())
+    AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+  return true;
+}
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting.  IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                                RenamePassData::ValVector &IncomingVals,
+                                std::vector<RenamePassData> &Worklist) {
+NextIteration:
+  // If we are inserting any phi nodes into this BB, they will already be in the
+  // block.
+  if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+    // If we have PHI nodes to update, compute the number of edges from Pred to
+    // BB.
+    if (PhiToAllocaMap.count(APN)) {
+      // We want to be able to distinguish between PHI nodes being inserted by
+      // this invocation of mem2reg from those phi nodes that already existed in
+      // the IR before mem2reg was run.  We determine that APN is being inserted
+      // because it is missing incoming edges.  All other PHI nodes being
+      // inserted by this pass of mem2reg will have the same number of incoming
+      // operands so far.  Remember this count.
+      unsigned NewPHINumOperands = APN->getNumOperands();
+      
+      unsigned NumEdges = 0;
+      for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
+        if (*I == BB)
+          ++NumEdges;
+      assert(NumEdges && "Must be at least one edge from Pred to BB!");
+      
+      // Add entries for all the phis.
+      BasicBlock::iterator PNI = BB->begin();
+      do {
+        unsigned AllocaNo = PhiToAllocaMap[APN];
+        
+        // Add N incoming values to the PHI node.
+        for (unsigned i = 0; i != NumEdges; ++i)
+          APN->addIncoming(IncomingVals[AllocaNo], Pred);
+        
+        // The currently active variable for this block is now the PHI.
+        IncomingVals[AllocaNo] = APN;
+        
+        // Get the next phi node.
+        ++PNI;
+        APN = dyn_cast<PHINode>(PNI);
+        if (APN == 0) break;
+        
+        // Verify that it is missing entries.  If not, it is not being inserted
+        // by this mem2reg invocation so we want to ignore it.
+      } while (APN->getNumOperands() == NewPHINumOperands);
+    }
+  }
+  
+  // Don't revisit blocks.
+  if (!Visited.insert(BB)) return;
+
+  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+    Instruction *I = II++; // get the instruction, increment iterator
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+      if (!Src) continue;
+  
+      DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+      if (AI == AllocaLookup.end()) continue;
+
+      Value *V = IncomingVals[AI->second];
+
+      // Anything using the load now uses the current value.
+      LI->replaceAllUsesWith(V);
+      if (AST && LI->getType()->isPointerTy())
+        AST->deleteValue(LI);
+      BB->getInstList().erase(LI);
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      // Delete this instruction and mark the name as the current holder of the
+      // value
+      AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+      if (!Dest) continue;
+      
+      DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+      if (ai == AllocaLookup.end())
+        continue;
+      
+      // what value were we writing?
+      IncomingVals[ai->second] = SI->getOperand(0);
+      // Record debuginfo for the store before removing it.
+      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
+        if (!DIB)
+          DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+      }
+      BB->getInstList().erase(SI);
+    }
+  }
+
+  // 'Recurse' to our successors.
+  succ_iterator I = succ_begin(BB), E = succ_end(BB);
+  if (I == E) return;
+
+  // Keep track of the successors so we don't visit the same successor twice
+  SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+
+  // Handle the first successor without using the worklist.
+  VisitedSuccs.insert(*I);
+  Pred = BB;
+  BB = *I;
+  ++I;
+
+  for (; I != E; ++I)
+    if (VisitedSuccs.insert(*I))
+      Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+  goto NextIteration;
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate.  This function does
+/// not modify the CFG of the function at all.  All allocas must be from the
+/// same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+                           DominatorTree &DT, AliasSetTracker *AST) {
+  // If there is nothing to do, bail out...
+  if (Allocas.empty()) return;
+
+  PromoteMem2Reg(Allocas, DT, AST).run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 000000000000..b47a7ccd80ba
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,520 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+  : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+  delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates with type 'Ty'.  PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+  ProtoType = Ty;
+  ProtoName = Name;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+  assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+  assert(ProtoType == V->getType() &&
+         "All rewritten values must have the same type");
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
+/// in ValueMapping for each predecessor block.
+static bool IsEquivalentPHI(PHINode *PHI,
+                            DenseMap<BasicBlock*, Value*> &ValueMapping) {
+  unsigned PHINumValues = PHI->getNumIncomingValues();
+  if (PHINumValues != ValueMapping.size())
+    return false;
+
+  // Scan the phi to see if it matches.
+  for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+    if (ValueMapping[PHI->getIncomingBlock(i)] !=
+        PHI->getIncomingValue(i)) {
+      return false;
+    }
+
+  return true;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+  Value *Res = GetValueAtEndOfBlockInternal(BB);
+  return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!HasValueForBlock(BB))
+    return GetValueAtEndOfBlock(BB);
+
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+  Value *SingularValue = 0;
+
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+
+  // If there are no predecessors, just return undef.
+  if (PredValues.empty())
+    return UndefValue::get(ProtoType);
+
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+
+  // Otherwise, we do need a PHI: check to see if we already have one available
+  // in this block that produces the right value.
+  if (isa<PHINode>(BB->begin())) {
+    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+                                               PredValues.end());
+    PHINode *SomePHI;
+    for (BasicBlock::iterator It = BB->begin();
+         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+      if (IsEquivalentPHI(SomePHI, ValueMapping))
+        return SomePHI;
+    }
+  }
+
+  // Ok, we have no way out, insert a new one now.
+  PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+                                         ProtoName, &BB->front());
+
+  // Fill in all the predecessors of the PHI.
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *V = SimplifyInstruction(InsertedPHI)) {
+    InsertedPHI->eraseFromParent();
+    return V;
+  }
+
+  // Set DebugLoc.
+  InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB));
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueInMiddleOfBlock(User->getParent());
+
+  U.set(V);
+}
+
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse.  However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+  
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueAtEndOfBlock(User->getParent());
+  
+  U.set(V);
+}
+
+/// PHIiter - Iterator for PHI operands.  This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+  class PHIiter {
+  private:
+    PHINode *PHI;
+    unsigned idx;
+
+  public:
+    explicit PHIiter(PHINode *P) // begin iterator
+      : PHI(P), idx(0) {}
+    PHIiter(PHINode *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+    PHIiter &operator++() { ++idx; return *this; } 
+    bool operator==(const PHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const PHIiter& x) const { return !operator==(x); }
+    Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+    BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+  };
+}
+
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+  typedef BasicBlock BlkT;
+  typedef Value *ValT;
+  typedef PHINode PhiT;
+
+  typedef succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+  typedef PHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
+  }
+
+  /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+  /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+  static void FindPredecessorBlocks(BasicBlock *BB,
+                                    SmallVectorImpl<BasicBlock*> *Preds) {
+    // We can get our predecessor info by walking the pred_iterator list,
+    // but it is relatively slow.  If we already have PHI nodes in this
+    // block, walk one of them to get the predecessor list instead.
+    if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+      for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+        Preds->push_back(SomePhi->getIncomingBlock(PI));
+    } else {
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        Preds->push_back(*PI);
+    }
+  }
+
+  /// GetUndefVal - Get an undefined value of the same type as the value
+  /// being handled.
+  static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+    return UndefValue::get(Updater->ProtoType);
+  }
+
+  /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+  /// Reserve space for the operands but do not fill them in yet.
+  static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+                               SSAUpdater *Updater) {
+    PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+                                   Updater->ProtoName, &BB->front());
+    return PHI;
+  }
+
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+    PHI->addIncoming(Val, Pred);
+  }
+
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static PHINode *InstrIsPHI(Instruction *I) {
+    return dyn_cast<PHINode>(I);
+  }
+
+  /// ValueIsPHI - Check if a value is a PHI.
+  ///
+  static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+    return dyn_cast<PHINode>(Val);
+  }
+
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+    PHINode *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumIncomingValues() == 0)
+      return PHI;
+    return 0;
+  }
+
+  /// GetPHIValue - For the specified PHI instruction, return the value
+  /// that it defines.
+  static Value *GetPHIValue(PHINode *PHI) {
+    return PHI;
+  }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (Value *V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+                     SSAUpdater &S, StringRef BaseName) : SSA(S) {
+  if (Insts.empty()) return;
+  
+  Value *SomeVal;
+  if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+    SomeVal = LI;
+  else
+    SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+  if (BaseName.empty())
+    BaseName = SomeVal->getName();
+  SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+  
+  // First step: bucket up uses of the alloca by the block they occur in.
+  // This is important because we have to handle multiple defs/uses in a block
+  // ourselves: SSAUpdater is purely for cross-block references.
+  // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
+  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    UsesByBlock[User->getParent()].push_back(User);
+  }
+  
+  // Okay, now we can iterate over all the blocks in the function with uses,
+  // processing them.  Keep track of which loads are loading a live-in value.
+  // Walk the uses in the use-list order to be determinstic.
+  SmallVector<LoadInst*, 32> LiveInLoads;
+  DenseMap<Value*, Value*> ReplacedLoads;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    BasicBlock *BB = User->getParent();
+    std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
+    
+    // If this block has already been processed, ignore this repeat use.
+    if (BlockUses.empty()) continue;
+    
+    // Okay, this is the first use in the block.  If this block just has a
+    // single user in it, we can rewrite it trivially.
+    if (BlockUses.size() == 1) {
+      // If it is a store, it is a trivial def of the value in the block.
+      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+        updateDebugInfo(SI);
+        SSA.AddAvailableValue(BB, SI->getOperand(0));
+      } else 
+        // Otherwise it is a load, queue it to rewrite as a live-in load.
+        LiveInLoads.push_back(cast<LoadInst>(User));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, check to see if this block is all loads.
+    bool HasStore = false;
+    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+      if (isa<StoreInst>(BlockUses[i])) {
+        HasStore = true;
+        break;
+      }
+    }
+    
+    // If so, we can queue them all as live in loads.  We don't have an
+    // efficient way to tell which on is first in the block and don't want to
+    // scan large blocks, so just add all loads as live ins.
+    if (!HasStore) {
+      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+    // Since SSAUpdater is purely for cross-block values, we need to determine
+    // the order of these instructions in the block.  If the first use in the
+    // block is a load, then it uses the live in value.  The last store defines
+    // the live out value.  We handle this by doing a linear scan of the block.
+    Value *StoredValue = 0;
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+        // If this is a load from an unrelated pointer, ignore it.
+        if (!isInstInList(L, Insts)) continue;
+        
+        // If we haven't seen a store yet, this is a live in use, otherwise
+        // use the stored value.
+        if (StoredValue) {
+          replaceLoadWithValue(L, StoredValue);
+          L->replaceAllUsesWith(StoredValue);
+          ReplacedLoads[L] = StoredValue;
+        } else {
+          LiveInLoads.push_back(L);
+        }
+        continue;
+      }
+      
+      if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+        // If this is a store to an unrelated pointer, ignore it.
+        if (!isInstInList(SI, Insts)) continue;
+        updateDebugInfo(SI);
+
+        // Remember that this is the active value in the block.
+        StoredValue = SI->getOperand(0);
+      }
+    }
+    
+    // The last stored value that happened is the live-out for the block.
+    assert(StoredValue && "Already checked that there is a store in block");
+    SSA.AddAvailableValue(BB, StoredValue);
+    BlockUses.clear();
+  }
+  
+  // Okay, now we rewrite all loads that use live-in values in the loop,
+  // inserting PHI nodes as necessary.
+  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+    LoadInst *ALoad = LiveInLoads[i];
+    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+    replaceLoadWithValue(ALoad, NewVal);
+
+    // Avoid assertions in unreachable code.
+    if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+    ALoad->replaceAllUsesWith(NewVal);
+    ReplacedLoads[ALoad] = NewVal;
+  }
+  
+  // Allow the client to do stuff before we start nuking things.
+  doExtraRewritesBeforeFinalDeletion();
+  
+  // Now that everything is rewritten, delete the old instructions from the
+  // function.  They should all be dead now.
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    
+    // If this is a load that still has uses, then the load must have been added
+    // as a live value in the SSAUpdate data structure for a block (e.g. because
+    // the loaded value was stored later).  In this case, we need to recursively
+    // propagate the updates until we get to the real value.
+    if (!User->use_empty()) {
+      Value *NewVal = ReplacedLoads[User];
+      assert(NewVal && "not a replaced load?");
+      
+      // Propagate down to the ultimate replacee.  The intermediately loads
+      // could theoretically already have been deleted, so we don't want to
+      // dereference the Value*'s.
+      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+      while (RLI != ReplacedLoads.end()) {
+        NewVal = RLI->second;
+        RLI = ReplacedLoads.find(NewVal);
+      }
+      
+      replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+      User->replaceAllUsesWith(NewVal);
+    }
+    
+    instructionDeleted(User);
+    User->eraseFromParent();
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 000000000000..9d9c324b8468
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,2779 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+#include <map>
+using namespace llvm;
+
+static cl::opt<unsigned>
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
+   cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+       cl::desc("Duplicate return instructions into unconditional branches"));
+
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+class SimplifyCFGOpt {
+  const TargetData *const TD;
+
+  Value *isValueEqualityComparison(TerminatorInst *TI);
+  BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+    std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
+  bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                                     BasicBlock *Pred,
+                                                     IRBuilder<> &Builder);
+  bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+                                           IRBuilder<> &Builder);
+
+  bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
+  bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder);
+  bool SimplifyUnreachable(UnreachableInst *UI);
+  bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+  bool SimplifyIndirectBr(IndirectBrInst *IBI);
+  bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
+  bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
+
+public:
+  explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+  bool run(BasicBlock *BB);
+};
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+  if (SI1 == SI2) return false;  // Can't merge with self!
+  
+  // It is not safe to merge these two switch instructions if they have a common
+  // successor, and if that successor has a PHI node, and if *that* PHI node has
+  // conflicting incoming values from the two switch blocks.
+  BasicBlock *SI1BB = SI1->getParent();
+  BasicBlock *SI2BB = SI2->getParent();
+  SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+  
+  for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+    if (SI1Succs.count(*I))
+      for (BasicBlock::iterator BBI = (*I)->begin();
+           isa<PHINode>(BBI); ++BBI) {
+        PHINode *PN = cast<PHINode>(BBI);
+        if (PN->getIncomingValueForBlock(SI1BB) !=
+            PN->getIncomingValueForBlock(SI2BB))
+          return false;
+      }
+        
+  return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block.  The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+                                  BasicBlock *ExistPred) {
+  if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+  
+  PHINode *PN;
+  for (BasicBlock::iterator I = Succ->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I)
+    PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
+/// to an "if condition".  If so, return the boolean condition that determines
+/// which entry into BB will be taken.  Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+                             BasicBlock *&IfFalse) {
+  PHINode *SomePHI = cast<PHINode>(BB->begin());
+  assert(SomePHI->getNumIncomingValues() == 2 &&
+         "Function can only handle blocks with 2 predecessors!");
+  BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+  BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
+
+  // We can only handle branches.  Other control flow will be lowered to
+  // branches if possible anyway.
+  BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+  BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+  if (Pred1Br == 0 || Pred2Br == 0)
+    return 0;
+
+  // Eliminate code duplication by ensuring that Pred1Br is conditional if
+  // either are.
+  if (Pred2Br->isConditional()) {
+    // If both branches are conditional, we don't have an "if statement".  In
+    // reality, we could transform this case, but since the condition will be
+    // required anyway, we stand no chance of eliminating it, so the xform is
+    // probably not profitable.
+    if (Pred1Br->isConditional())
+      return 0;
+
+    std::swap(Pred1, Pred2);
+    std::swap(Pred1Br, Pred2Br);
+  }
+
+  if (Pred1Br->isConditional()) {
+    // The only thing we have to watch out for here is to make sure that Pred2
+    // doesn't have incoming edges from other blocks.  If it does, the condition
+    // doesn't dominate BB.
+    if (Pred2->getSinglePredecessor() == 0)
+      return 0;
+    
+    // If we found a conditional branch predecessor, make sure that it branches
+    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
+    if (Pred1Br->getSuccessor(0) == BB &&
+        Pred1Br->getSuccessor(1) == Pred2) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+               Pred1Br->getSuccessor(1) == BB) {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    } else {
+      // We know that one arm of the conditional goes to BB, so the other must
+      // go somewhere unrelated, and this must not be an "if statement".
+      return 0;
+    }
+
+    return Pred1Br->getCondition();
+  }
+
+  // Ok, if we got here, both predecessors end with an unconditional branch to
+  // BB.  Don't panic!  If both blocks only have a single (identical)
+  // predecessor, and THAT is a conditional branch, then we're all ok!
+  BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+    return 0;
+
+  // Otherwise, if this is a conditional branch, then we can use it!
+  BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+  if (BI == 0) return 0;
+  
+  assert(BI->isConditional() && "Two successors but not conditional?");
+  if (BI->getSuccessor(0) == Pred1) {
+    IfTrue = Pred1;
+    IfFalse = Pred2;
+  } else {
+    IfTrue = Pred2;
+    IfFalse = Pred1;
+  }
+  return BI->getCondition();
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block.  We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping.  If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands.  If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+                                SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+                                unsigned &CostRemaining) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) {
+    // Non-instructions all dominate instructions, but not all constantexprs
+    // can be executed unconditionally.
+    if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+      if (C->canTrap())
+        return false;
+    return true;
+  }
+  BasicBlock *PBB = I->getParent();
+
+  // We don't want to allow weird loops that might have the "if condition" in
+  // the bottom of this block.
+  if (PBB == BB) return false;
+
+  // If this instruction is defined in a block that contains an unconditional
+  // branch to BB, then it must be in the 'conditional' part of the "if
+  // statement".  If not, it definitely dominates the region.
+  BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+  if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+    return true;
+
+  // If we aren't allowing aggressive promotion anymore, then don't consider
+  // instructions in the 'if region'.
+  if (AggressiveInsts == 0) return false;
+  
+  // If we have seen this instruction before, don't count it again.
+  if (AggressiveInsts->count(I)) return true;
+
+  // Okay, it looks like the instruction IS in the "condition".  Check to
+  // see if it's a cheap instruction to unconditionally compute, and if it
+  // only uses stuff defined outside of the condition.  If so, hoist it out.
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
+
+  unsigned Cost = 0;
+
+  switch (I->getOpcode()) {
+  default: return false;  // Cannot hoist this out safely.
+  case Instruction::Load:
+    // We have to check to make sure there are no instructions before the
+    // load in its basic block, as we are going to hoist the load out to its
+    // predecessor.
+    if (PBB->getFirstNonPHIOrDbg() != I)
+      return false;
+    Cost = 1;
+    break;
+  case Instruction::GetElementPtr:
+    // GEPs are cheap if all indices are constant.
+    if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
+      return false;
+    Cost = 1;
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::ICmp:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    Cost = 1;
+    break;   // These are all cheap and non-trapping instructions.
+
+  case Instruction::Select:
+    Cost = 2;
+    break;
+  }
+
+  if (Cost > CostRemaining)
+    return false;
+
+  CostRemaining -= Cost;
+
+  // Okay, we can only really hoist these out if their operands do
+  // not take us over the cost threshold.
+  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
+      return false;
+  // Okay, it's safe to do this!  Remember this instruction.
+  AggressiveInsts->insert(I);
+  return true;
+}
+
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
+  // Normal constant int.
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+  if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+    return CI;
+
+  // This is some kind of pointer constant. Turn it into a pointer-sized
+  // ConstantInt if possible.
+  const IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+
+  // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+  if (isa<ConstantPointerNull>(V))
+    return ConstantInt::get(PtrTy, 0);
+
+  // IntToPtr const int.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+        // The constant is very likely to have the right type already.
+        if (CI->getType() == PtrTy)
+          return CI;
+        else
+          return cast<ConstantInt>
+            (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+      }
+  return 0;
+}
+
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+                       const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return 0;
+  
+  // If this is an icmp against a constant, handle this as one of the cases.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+    if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+      if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+        UsedICmps++;
+        Vals.push_back(C);
+        return I->getOperand(0);
+      }
+      
+      // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+      // the set.
+      ConstantRange Span =
+        ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+      
+      // If this is an and/!= check then we want to optimize "x ugt 2" into
+      // x != 0 && x != 1.
+      if (!isEQ)
+        Span = Span.inverse();
+      
+      // If there are a ton of values, we don't want to make a ginormous switch.
+      if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
+          // We don't handle wrapped sets yet.
+          Span.isWrappedSet())
+        return 0;
+      
+      for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+        Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+      UsedICmps++;
+      return I->getOperand(0);
+    }
+    return 0;
+  }
+  
+  // Otherwise, we can only handle an | or &, depending on isEQ.
+  if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+    return 0;
+  
+  unsigned NumValsBeforeLHS = Vals.size();
+  unsigned UsedICmpsBeforeLHS = UsedICmps;
+  if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+                                          isEQ, UsedICmps)) {
+    unsigned NumVals = Vals.size();
+    unsigned UsedICmpsBeforeRHS = UsedICmps;
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps)) {
+      if (LHS == RHS)
+        return LHS;
+      Vals.resize(NumVals);
+      UsedICmps = UsedICmpsBeforeRHS;
+    }
+
+    // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+    // set it and return success.
+    if (Extra == 0 || Extra == I->getOperand(1)) {
+      Extra = I->getOperand(1);
+      return LHS;
+    }
+    
+    Vals.resize(NumValsBeforeLHS);
+    UsedICmps = UsedICmpsBeforeLHS;
+    return 0;
+  }
+  
+  // If the LHS can't be folded in, but Extra is available and RHS can, try to
+  // use LHS as Extra.
+  if (Extra == 0 || Extra == I->getOperand(0)) {
+    Value *OldExtra = Extra;
+    Extra = I->getOperand(0);
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps))
+      return RHS;
+    assert(Vals.size() == NumValsBeforeLHS);
+    Extra = OldExtra;
+  }
+  
+  return 0;
+}
+      
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+  Instruction* Cond = 0;
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cond = dyn_cast<Instruction>(SI->getCondition());
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+    if (BI->isConditional())
+      Cond = dyn_cast<Instruction>(BI->getCondition());
+  } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+    Cond = dyn_cast<Instruction>(IBI->getAddress());
+  }
+
+  TI->eraseFromParent();
+  if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+  Value *CV = 0;
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    // Do not permit merging of large switch instructions into their
+    // predecessors unless there is only one predecessor.
+    if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+                                             pred_end(SI->getParent())) <= 128)
+      CV = SI->getCondition();
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+    if (BI->isConditional() && BI->getCondition()->hasOneUse())
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+        if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+             ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+            GetConstantInt(ICI->getOperand(1), TD))
+          CV = ICI->getOperand(0);
+
+  // Unwrap any lossless ptrtoint cast.
+  if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
+    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+      CV = PTII->getOperand(0);
+  return CV;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+                                std::vector<std::pair<ConstantInt*,
+                                                      BasicBlock*> > &Cases) {
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cases.reserve(SI->getNumCases());
+    for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+      Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+    return SI->getDefaultDest();
+  }
+
+  BranchInst *BI = cast<BranchInst>(TI);
+  ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+  Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD),
+                                 BI->getSuccessor(ICI->getPredicate() ==
+                                                  ICmpInst::ICMP_NE)));
+  return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+               std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) {
+  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
+    if (Cases[i].second == BB) {
+      Cases.erase(Cases.begin()+i);
+      --i; --e;
+    }
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
+              std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) {
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2;
+
+  // Make V1 be smaller than V2.
+  if (V1->size() > V2->size())
+    std::swap(V1, V2);
+
+  if (V1->size() == 0) return false;
+  if (V1->size() == 1) {
+    // Just scan V2.
+    ConstantInt *TheVal = (*V1)[0].first;
+    for (unsigned i = 0, e = V2->size(); i != e; ++i)
+      if (TheVal == (*V2)[i].first)
+        return true;
+  }
+
+  // Otherwise, just sort both lists and compare element by element.
+  array_pod_sort(V1->begin(), V1->end());
+  array_pod_sort(V2->begin(), V2->end());
+  unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+  while (i1 != e1 && i2 != e2) {
+    if ((*V1)[i1].first == (*V2)[i2].first)
+      return true;
+    if ((*V1)[i1].first < (*V2)[i2].first)
+      ++i1;
+    else
+      ++i2;
+  }
+  return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison.  If so, simplify TI.  This does a very limited
+/// form of jump threading.
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                              BasicBlock *Pred,
+                                              IRBuilder<> &Builder) {
+  Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+  if (!PredVal) return false;  // Not a value comparison in predecessor.
+
+  Value *ThisVal = isValueEqualityComparison(TI);
+  assert(ThisVal && "This isn't a value comparison!!");
+  if (ThisVal != PredVal) return false;  // Different predicates.
+
+  // Find out information about when control will move from Pred to TI's block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+  BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+                                                        PredCases);
+  EliminateBlockCases(PredDef, PredCases);  // Remove default from cases.
+
+  // Find information about how control leaves this block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases;
+  BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+  EliminateBlockCases(ThisDef, ThisCases);  // Remove default from cases.
+
+  // If TI's block is the default block from Pred's comparison, potentially
+  // simplify TI based on this knowledge.
+  if (PredDef == TI->getParent()) {
+    // If we are here, we know that the value is none of those cases listed in
+    // PredCases.  If there are any cases in ThisCases that are in PredCases, we
+    // can simplify TI.
+    if (!ValuesOverlap(PredCases, ThisCases))
+      return false;
+    
+    if (isa<BranchInst>(TI)) {
+      // Okay, one of the successors of this condbr is dead.  Convert it to a
+      // uncond br.
+      assert(ThisCases.size() == 1 && "Branch can only have one case!");
+      // Insert the new branch.
+      Instruction *NI = Builder.CreateBr(ThisDef);
+      (void) NI;
+
+      // Remove PHI node entries for the dead edge.
+      ThisCases[0].second->removePredecessor(TI->getParent());
+
+      DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+           << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+      EraseTerminatorInstAndDCECond(TI);
+      return true;
+    }
+      
+    SwitchInst *SI = cast<SwitchInst>(TI);
+    // Okay, TI has cases that are statically dead, prune them away.
+    SmallPtrSet<Constant*, 16> DeadCases;
+    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+      DeadCases.insert(PredCases[i].first);
+
+    DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+                 << "Through successor TI: " << *TI);
+
+    for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+      if (DeadCases.count(SI->getCaseValue(i))) {
+        SI->getSuccessor(i)->removePredecessor(TI->getParent());
+        SI->removeCase(i);
+      }
+
+    DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+    return true;
+  }
+  
+  // Otherwise, TI's block must correspond to some matched value.  Find out
+  // which value (or set of values) this is.
+  ConstantInt *TIV = 0;
+  BasicBlock *TIBB = TI->getParent();
+  for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+    if (PredCases[i].second == TIBB) {
+      if (TIV != 0)
+        return false;  // Cannot handle multiple values coming to this block.
+      TIV = PredCases[i].first;
+    }
+  assert(TIV && "No edge from pred to succ?");
+
+  // Okay, we found the one constant that our value can be if we get into TI's
+  // BB.  Find out which successor will unconditionally be branched to.
+  BasicBlock *TheRealDest = 0;
+  for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+    if (ThisCases[i].first == TIV) {
+      TheRealDest = ThisCases[i].second;
+      break;
+    }
+
+  // If not handled by any explicit cases, it is handled by the default case.
+  if (TheRealDest == 0) TheRealDest = ThisDef;
+
+  // Remove PHI node entries for dead edges.
+  BasicBlock *CheckEdge = TheRealDest;
+  for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+    if (*SI != CheckEdge)
+      (*SI)->removePredecessor(TIBB);
+    else
+      CheckEdge = 0;
+
+  // Insert the new branch.
+  Instruction *NI = Builder.CreateBr(TheRealDest);
+  (void) NI;
+
+  DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+            << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+  EraseTerminatorInstAndDCECond(TI);
+  return true;
+}
+
+namespace {
+  /// ConstantIntOrdering - This class implements a stable ordering of constant
+  /// integers that does not depend on their address.  This is important for
+  /// applications that sort ConstantInt's to ensure uniqueness.
+  struct ConstantIntOrdering {
+    bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+      return LHS->getValue().ult(RHS->getValue());
+    }
+  };
+}
+
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+  const ConstantInt *LHS = *(const ConstantInt**)P1;
+  const ConstantInt *RHS = *(const ConstantInt**)P2;
+  if (LHS->getValue().ult(RHS->getValue()))
+    return 1;
+  if (LHS->getValue() == RHS->getValue())
+    return 0;
+  return -1;
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value.  If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+                                                         IRBuilder<> &Builder) {
+  BasicBlock *BB = TI->getParent();
+  Value *CV = isValueEqualityComparison(TI);  // CondVal
+  assert(CV && "Not a comparison?");
+  bool Changed = false;
+
+  SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.pop_back_val();
+
+    // See if the predecessor is a comparison with the same value.
+    TerminatorInst *PTI = Pred->getTerminator();
+    Value *PCV = isValueEqualityComparison(PTI);  // PredCondVal
+
+    if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+      // Figure out which 'cases' to copy from SI to PSI.
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+      BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+      BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+      // Based on whether the default edge from PTI goes to BB or not, fill in
+      // PredCases and PredDefault with the new switch cases we would like to
+      // build.
+      SmallVector<BasicBlock*, 8> NewSuccessors;
+
+      if (PredDefault == BB) {
+        // If this is the default destination from PTI, only the edges in TI
+        // that don't occur in PTI, or that branch to BB will be activated.
+        std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second != BB)
+            PTIHandled.insert(PredCases[i].first);
+          else {
+            // The default destination is BB, we don't need explicit targets.
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Reconstruct the new switch statement we will be building.
+        if (PredDefault != BBDefault) {
+          PredDefault->removePredecessor(Pred);
+          PredDefault = BBDefault;
+          NewSuccessors.push_back(BBDefault);
+        }
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (!PTIHandled.count(BBCases[i].first) &&
+              BBCases[i].second != BBDefault) {
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+          }
+
+      } else {
+        // If this is not the default destination from PSI, only the edges
+        // in SI that occur in PSI with a destination of BB will be
+        // activated.
+        std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second == BB) {
+            PTIHandled.insert(PredCases[i].first);
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Okay, now we know which constants were sent to BB from the
+        // predecessor.  Figure out where they will all go now.
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (PTIHandled.count(BBCases[i].first)) {
+            // If this is one we are capable of getting...
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+            PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+          }
+
+        // If there are any constants vectored to BB that TI doesn't handle,
+        // they must go to the default destination of TI.
+        for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = 
+                                    PTIHandled.begin(),
+               E = PTIHandled.end(); I != E; ++I) {
+          PredCases.push_back(std::make_pair(*I, BBDefault));
+          NewSuccessors.push_back(BBDefault);
+        }
+      }
+
+      // Okay, at this point, we know which new successor Pred will get.  Make
+      // sure we update the number of entries in the PHI nodes for these
+      // successors.
+      for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+        AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+      Builder.SetInsertPoint(PTI);
+      // Convert pointer to int before we switch.
+      if (CV->getType()->isPointerTy()) {
+        assert(TD && "Cannot switch on pointer without TargetData");
+        CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+                                    "magicptr");
+      }
+
+      // Now that the successors are updated, create the new Switch instruction.
+      SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
+                                               PredCases.size());
+      NewSI->setDebugLoc(PTI->getDebugLoc());
+      for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+        NewSI->addCase(PredCases[i].first, PredCases[i].second);
+
+      EraseTerminatorInstAndDCECond(PTI);
+
+      // Okay, last check.  If BB is still a successor of PSI, then we must
+      // have an infinite loop case.  If so, add an infinitely looping block
+      // to handle the case to preserve the behavior of the code.
+      BasicBlock *InfLoopBlock = 0;
+      for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+        if (NewSI->getSuccessor(i) == BB) {
+          if (InfLoopBlock == 0) {
+            // Insert it at the end of the function, because it's either code,
+            // or it won't matter if it's hot. :)
+            InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                              "infloop", BB->getParent());
+            BranchInst::Create(InfLoopBlock, InfLoopBlock);
+          }
+          NewSI->setSuccessor(i, InfLoopBlock);
+        }
+
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+                                Instruction *I1, Instruction *I2) {
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block.  The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+  // This does very trivial matching, with limited scanning, to find identical
+  // instructions in the two blocks.  In particular, we don't want to get into
+  // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
+  // such, we currently just scan for obviously identical instructions in an
+  // identical order.
+  BasicBlock *BB1 = BI->getSuccessor(0);  // The true destination.
+  BasicBlock *BB2 = BI->getSuccessor(1);  // The false destination
+
+  BasicBlock::iterator BB1_Itr = BB1->begin();
+  BasicBlock::iterator BB2_Itr = BB2->begin();
+
+  Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+  // Skip debug info if it is not identical.
+  DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+  DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+  if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+    while (isa<DbgInfoIntrinsic>(I1))
+      I1 = BB1_Itr++;
+    while (isa<DbgInfoIntrinsic>(I2))
+      I2 = BB2_Itr++;
+  }
+  if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
+      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+    return false;
+
+  // If we get here, we can hoist at least one instruction.
+  BasicBlock *BIParent = BI->getParent();
+
+  do {
+    // If we are hoisting the terminator instruction, don't move one (making a
+    // broken BB), instead clone it, and remove BI.
+    if (isa<TerminatorInst>(I1))
+      goto HoistTerminator;
+
+    // For a normal instruction, we just move one to right before the branch,
+    // then replace all uses of the other with the first.  Finally, we remove
+    // the now redundant second instruction.
+    BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+    if (!I2->use_empty())
+      I2->replaceAllUsesWith(I1);
+    I1->intersectOptionalDataWith(I2);
+    I2->eraseFromParent();
+
+    I1 = BB1_Itr++;
+    I2 = BB2_Itr++;
+    // Skip debug info if it is not identical.
+    DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+    DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+    if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+      while (isa<DbgInfoIntrinsic>(I1))
+        I1 = BB1_Itr++;
+      while (isa<DbgInfoIntrinsic>(I2))
+        I2 = BB2_Itr++;
+    }
+  } while (I1->isIdenticalToWhenDefined(I2));
+
+  return true;
+
+HoistTerminator:
+  // It may not be possible to hoist an invoke.
+  if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+    return true;
+
+  // Okay, it is safe to hoist the terminator.
+  Instruction *NT = I1->clone();
+  BIParent->getInstList().insert(BI, NT);
+  if (!NT->getType()->isVoidTy()) {
+    I1->replaceAllUsesWith(NT);
+    I2->replaceAllUsesWith(NT);
+    NT->takeName(I1);
+  }
+
+  IRBuilder<true, NoFolder> Builder(NT);
+  // Hoisting one of the terminators from our successor is a great thing.
+  // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+  // them.  If they do, all PHI entries for BB1/BB2 must agree for all PHI
+  // nodes, so we insert select instruction to compute the final result.
+  std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V == BB2V) continue;
+      
+      // These values do not agree.  Insert a select instruction before NT
+      // that determines the right value.
+      SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+      if (SI == 0) 
+        SI = cast<SelectInst>
+          (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+                                BB1V->getName()+"."+BB2V->getName()));
+
+      // Make the PHI node use the select for all incoming values for BB1/BB2
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+          PN->setIncomingValue(i, SI);
+    }
+  }
+
+  // Update any PHI nodes in our new successors.
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+    AddPredecessorToBlock(*SI, BIParent, BB1);
+
+  EraseTerminatorInstAndDCECond(BI);
+  return true;
+}
+
+/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
+/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
+/// (for now, restricted to a single instruction that's side effect free) from
+/// the BB1 into the branch block to speculatively execute it.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
+  // Only speculatively execution a single instruction (not counting the
+  // terminator) for now.
+  Instruction *HInst = NULL;
+  Instruction *Term = BB1->getTerminator();
+  for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+       BBI != BBE; ++BBI) {
+    Instruction *I = BBI;
+    // Skip debug info.
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+    if (I == Term) break;
+
+    if (HInst)
+      return false;
+    HInst = I;
+  }
+  if (!HInst)
+    return false;
+
+  // Be conservative for now. FP select instruction can often be expensive.
+  Value *BrCond = BI->getCondition();
+  if (isa<FCmpInst>(BrCond))
+    return false;
+
+  // If BB1 is actually on the false edge of the conditional branch, remember
+  // to swap the select operands later.
+  bool Invert = false;
+  if (BB1 != BI->getSuccessor(0)) {
+    assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
+    Invert = true;
+  }
+
+  // Turn
+  // BB:
+  //     %t1 = icmp
+  //     br i1 %t1, label %BB1, label %BB2
+  // BB1:
+  //     %t3 = add %t2, c
+  //     br label BB2
+  // BB2:
+  // =>
+  // BB:
+  //     %t1 = icmp
+  //     %t4 = add %t2, c
+  //     %t3 = select i1 %t1, %t2, %t3
+  switch (HInst->getOpcode()) {
+  default: return false;  // Not safe / profitable to hoist.
+  case Instruction::Add:
+  case Instruction::Sub:
+    // Not worth doing for vector ops.
+    if (HInst->getType()->isVectorTy())
+      return false;
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    // Don't mess with vector operations.
+    if (HInst->getType()->isVectorTy())
+      return false;
+    break;   // These are all cheap and non-trapping instructions.
+  }
+  
+  // If the instruction is obviously dead, don't try to predicate it.
+  if (HInst->use_empty()) {
+    HInst->eraseFromParent();
+    return true;
+  }
+
+  // Can we speculatively execute the instruction? And what is the value 
+  // if the condition is false? Consider the phi uses, if the incoming value
+  // from the "if" block are all the same V, then V is the value of the
+  // select if the condition is false.
+  BasicBlock *BIParent = BI->getParent();
+  SmallVector<PHINode*, 4> PHIUses;
+  Value *FalseV = NULL;
+  
+  BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+  for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
+       UI != E; ++UI) {
+    // Ignore any user that is not a PHI node in BB2.  These can only occur in
+    // unreachable blocks, because they would not be dominated by the instr.
+    PHINode *PN = dyn_cast<PHINode>(*UI);
+    if (!PN || PN->getParent() != BB2)
+      return false;
+    PHIUses.push_back(PN);
+    
+    Value *PHIV = PN->getIncomingValueForBlock(BIParent);
+    if (!FalseV)
+      FalseV = PHIV;
+    else if (FalseV != PHIV)
+      return false;  // Inconsistent value when condition is false.
+  }
+  
+  assert(FalseV && "Must have at least one user, and it must be a PHI");
+
+  // Do not hoist the instruction if any of its operands are defined but not
+  // used in this BB. The transformation will prevent the operand from
+  // being sunk into the use block.
+  for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); 
+       i != e; ++i) {
+    Instruction *OpI = dyn_cast<Instruction>(*i);
+    if (OpI && OpI->getParent() == BIParent &&
+        !OpI->isUsedInBasicBlock(BIParent))
+      return false;
+  }
+
+  // If we get here, we can hoist the instruction. Try to place it
+  // before the icmp instruction preceding the conditional branch.
+  BasicBlock::iterator InsertPos = BI;
+  if (InsertPos != BIParent->begin())
+    --InsertPos;
+  // Skip debug info between condition and branch.
+  while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
+    --InsertPos;
+  if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
+    SmallPtrSet<Instruction *, 4> BB1Insns;
+    for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end(); 
+        BB1I != BB1E; ++BB1I) 
+      BB1Insns.insert(BB1I);
+    for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
+        UI != UE; ++UI) {
+      Instruction *Use = cast<Instruction>(*UI);
+      if (!BB1Insns.count(Use)) continue;
+      
+      // If BrCond uses the instruction that place it just before
+      // branch instruction.
+      InsertPos = BI;
+      break;
+    }
+  } else
+    InsertPos = BI;
+  BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+
+  // Create a select whose true value is the speculatively executed value and
+  // false value is the previously determined FalseV.
+  IRBuilder<true, NoFolder> Builder(BI);
+  SelectInst *SI;
+  if (Invert)
+    SI = cast<SelectInst>
+      (Builder.CreateSelect(BrCond, FalseV, HInst,
+                            FalseV->getName() + "." + HInst->getName()));
+  else
+    SI = cast<SelectInst>
+      (Builder.CreateSelect(BrCond, HInst, FalseV,
+                            HInst->getName() + "." + FalseV->getName()));
+
+  // Make the PHI node use the select for all incoming values for "then" and
+  // "if" blocks.
+  for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
+    PHINode *PN = PHIUses[i];
+    for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
+      if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
+        PN->setIncomingValue(j, SI);
+  }
+
+  ++NumSpeculations;
+  return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+  unsigned Size = 0;
+  
+  for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+    if (isa<DbgInfoIntrinsic>(BBI))
+      continue;
+    if (Size > 10) return false;  // Don't clone large BB's.
+    ++Size;
+    
+    // We can only support instructions that do not define values that are
+    // live outside of the current basic block.
+    for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+         UI != E; ++UI) {
+      Instruction *U = cast<Instruction>(*UI);
+      if (U->getParent() != BB || isa<PHINode>(U)) return false;
+    }
+    
+    // Looks ok, continue checking.
+  }
+
+  return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
+  BasicBlock *BB = BI->getParent();
+  PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+  // NOTE: we currently cannot transform this case if the PHI node is used
+  // outside of the block.
+  if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+    return false;
+  
+  // Degenerate case of a single entry PHI.
+  if (PN->getNumIncomingValues() == 1) {
+    FoldSingleEntryPHINodes(PN->getParent());
+    return true;    
+  }
+
+  // Now we know that this block has multiple preds and two succs.
+  if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+  
+  // Okay, this is a simple enough basic block.  See if any phi values are
+  // constants.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+    if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+    
+    // Okay, we now know that all edges from PredBB should be revectored to
+    // branch to RealDest.
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+    
+    if (RealDest == BB) continue;  // Skip self loops.
+    // Skip if the predecessor's terminator is an indirect branch.
+    if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
+    
+    // The dest block might have PHI nodes, other predecessors and other
+    // difficult cases.  Instead of being smart about this, just insert a new
+    // block that jumps to the destination block, effectively splitting
+    // the edge we are about to create.
+    BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+                                            RealDest->getName()+".critedge",
+                                            RealDest->getParent(), RealDest);
+    BranchInst::Create(RealDest, EdgeBB);
+    
+    // Update PHI nodes.
+    AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+    // BB may have instructions that are being threaded over.  Clone these
+    // instructions into EdgeBB.  We know that there will be no uses of the
+    // cloned instructions outside of EdgeBB.
+    BasicBlock::iterator InsertPt = EdgeBB->begin();
+    DenseMap<Value*, Value*> TranslateMap;  // Track translated values.
+    for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+      if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+        TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+        continue;
+      }
+      // Clone the instruction.
+      Instruction *N = BBI->clone();
+      if (BBI->hasName()) N->setName(BBI->getName()+".c");
+      
+      // Update operands due to translation.
+      for (User::op_iterator i = N->op_begin(), e = N->op_end();
+           i != e; ++i) {
+        DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+        if (PI != TranslateMap.end())
+          *i = PI->second;
+      }
+      
+      // Check for trivial simplification.
+      if (Value *V = SimplifyInstruction(N, TD)) {
+        TranslateMap[BBI] = V;
+        delete N;   // Instruction folded away, don't need actual inst
+      } else {
+        // Insert the new instruction into its new home.
+        EdgeBB->getInstList().insert(InsertPt, N);
+        if (!BBI->use_empty())
+          TranslateMap[BBI] = N;
+      }
+    }
+
+    // Loop over all of the edges from PredBB to BB, changing them to branch
+    // to EdgeBB instead.
+    TerminatorInst *PredBBTI = PredBB->getTerminator();
+    for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+      if (PredBBTI->getSuccessor(i) == BB) {
+        BB->removePredecessor(PredBB);
+        PredBBTI->setSuccessor(i, EdgeBB);
+      }
+
+    // Recurse, simplifying any other constants.
+    return FoldCondBranchOnPHI(BI, TD) | true;
+  }
+
+  return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
+  // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
+  // statement", which has a very simple dominance structure.  Basically, we
+  // are trying to find the condition that is being branched on, which
+  // subsequently causes this merge to happen.  We really want control
+  // dependence information for this check, but simplifycfg can't keep it up
+  // to date, and this catches most of the cases we care about anyway.
+  BasicBlock *BB = PN->getParent();
+  BasicBlock *IfTrue, *IfFalse;
+  Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+  if (!IfCond ||
+      // Don't bother if the branch will be constant folded trivially.
+      isa<ConstantInt>(IfCond))
+    return false;
+  
+  // Okay, we found that we can merge this two-entry phi node into a select.
+  // Doing so would require us to fold *all* two entry phi nodes in this block.
+  // At some point this becomes non-profitable (particularly if the target
+  // doesn't support cmov's).  Only do this transformation if there are two or
+  // fewer PHI nodes in this block.
+  unsigned NumPhis = 0;
+  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+    if (NumPhis > 2)
+      return false;
+  
+  // Loop over the PHI's seeing if we can promote them all to select
+  // instructions.  While we are at it, keep track of the instructions
+  // that need to be moved to the dominating block.
+  SmallPtrSet<Instruction*, 4> AggressiveInsts;
+  unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+           MaxCostVal1 = PHINodeFoldingThreshold;
+  
+  for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+    PHINode *PN = cast<PHINode>(II++);
+    if (Value *V = SimplifyInstruction(PN, TD)) {
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+      continue;
+    }
+    
+    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+                             MaxCostVal0) ||
+        !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+                             MaxCostVal1))
+      return false;
+  }
+  
+  // If we folded the the first phi, PN dangles at this point.  Refresh it.  If
+  // we ran out of PHIs then we simplified them all.
+  PN = dyn_cast<PHINode>(BB->begin());
+  if (PN == 0) return true;
+  
+  // Don't fold i1 branches on PHIs which contain binary operators.  These can
+  // often be turned into switches and other things.
+  if (PN->getType()->isIntegerTy(1) &&
+      (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+       isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+       isa<BinaryOperator>(IfCond)))
+    return false;
+  
+  // If we all PHI nodes are promotable, check to make sure that all
+  // instructions in the predecessor blocks can be promoted as well.  If
+  // not, we won't be able to get rid of the control flow, so it's not
+  // worth promoting to select instructions.
+  BasicBlock *DomBlock = 0;
+  BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+  BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+  if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+    IfBlock1 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock1);
+    for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
+      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+    
+  if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+    IfBlock2 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock2);
+    for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
+      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+  
+  DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+               << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
+      
+  // If we can still promote the PHI nodes after this gauntlet of tests,
+  // do all of the PHI's now.
+  Instruction *InsertPt = DomBlock->getTerminator();
+  IRBuilder<true, NoFolder> Builder(InsertPt);
+  
+  // Move all 'aggressive' instructions, which are defined in the
+  // conditional parts of the if's up to the dominating block.
+  if (IfBlock1)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock1->getInstList(), IfBlock1->begin(),
+                                   IfBlock1->getTerminator());
+  if (IfBlock2)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock2->getInstList(), IfBlock2->begin(),
+                                   IfBlock2->getTerminator());
+  
+  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    // Change the PHI node into a select instruction.
+    Value *TrueVal  = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+    Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+    
+    SelectInst *NV = 
+      cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
+    PN->replaceAllUsesWith(NV);
+    NV->takeName(PN);
+    PN->eraseFromParent();
+  }
+  
+  // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+  // has been flattened.  Change DomBlock to jump directly to our new block to
+  // avoid other simplifycfg's kicking in on the diamond.
+  TerminatorInst *OldTI = DomBlock->getTerminator();
+  Builder.SetInsertPoint(OldTI);
+  Builder.CreateBr(BB);
+  OldTI->eraseFromParent();
+  return true;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, 
+                                           IRBuilder<> &Builder) {
+  assert(BI->isConditional() && "Must be a conditional branch");
+  BasicBlock *TrueSucc = BI->getSuccessor(0);
+  BasicBlock *FalseSucc = BI->getSuccessor(1);
+  ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+  ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+  
+  // Check to ensure both blocks are empty (just a return) or optionally empty
+  // with PHI nodes.  If there are other instructions, merging would cause extra
+  // computation on one path or the other.
+  if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
+    return false;
+  if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
+    return false;
+
+  Builder.SetInsertPoint(BI);
+  // Okay, we found a branch that is going to two return nodes.  If
+  // there is no return value for this function, just change the
+  // branch into a return.
+  if (FalseRet->getNumOperands() == 0) {
+    TrueSucc->removePredecessor(BI->getParent());
+    FalseSucc->removePredecessor(BI->getParent());
+    Builder.CreateRetVoid();
+    EraseTerminatorInstAndDCECond(BI);
+    return true;
+  }
+    
+  // Otherwise, figure out what the true and false return values are
+  // so we can insert a new select instruction.
+  Value *TrueValue = TrueRet->getReturnValue();
+  Value *FalseValue = FalseRet->getReturnValue();
+  
+  // Unwrap any PHI nodes in the return blocks.
+  if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+    if (TVPN->getParent() == TrueSucc)
+      TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+  if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+    if (FVPN->getParent() == FalseSucc)
+      FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+  
+  // In order for this transformation to be safe, we must be able to
+  // unconditionally execute both operands to the return.  This is
+  // normally the case, but we could have a potentially-trapping
+  // constant expression that prevents this transformation from being
+  // safe.
+  if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+    if (TCV->canTrap())
+      return false;
+  if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+    if (FCV->canTrap())
+      return false;
+  
+  // Okay, we collected all the mapped values and checked them for sanity, and
+  // defined to really do this transformation.  First, update the CFG.
+  TrueSucc->removePredecessor(BI->getParent());
+  FalseSucc->removePredecessor(BI->getParent());
+  
+  // Insert select instructions where needed.
+  Value *BrCond = BI->getCondition();
+  if (TrueValue) {
+    // Insert a select if the results differ.
+    if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+    } else if (isa<UndefValue>(TrueValue)) {
+      TrueValue = FalseValue;
+    } else {
+      TrueValue = Builder.CreateSelect(BrCond, TrueValue,
+                                       FalseValue, "retval");
+    }
+  }
+
+  Value *RI = !TrueValue ? 
+    Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
+
+  (void) RI;
+      
+  DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+               << "\n  " << *BI << "NewRet = " << *RI
+               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+      
+  EraseTerminatorInstAndDCECond(BI);
+
+  return true;
+}
+
+/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
+/// predecessor branches to us and one of our successors, fold the block into
+/// the predecessor and use logical operations to pick the right destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+
+  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+  if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+    Cond->getParent() != BB || !Cond->hasOneUse())
+  return false;
+
+  // Only allow this if the condition is a simple instruction that can be
+  // executed unconditionally.  It must be in the same block as the branch, and
+  // must be at the front of the block.
+  BasicBlock::iterator FrontIt = BB->front();
+
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+    
+  // Allow a single instruction to be hoisted in addition to the compare
+  // that feeds the branch.  We later ensure that any values that _it_ uses
+  // were also live in the predecessor, so that we don't unnecessarily create
+  // register pressure or inhibit out-of-order execution.
+  Instruction *BonusInst = 0;
+  if (&*FrontIt != Cond &&
+      FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+      FrontIt->isSafeToSpeculativelyExecute()) {
+    BonusInst = &*FrontIt;
+    ++FrontIt;
+    
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+  }
+
+  // Only a single bonus inst is allowed.
+  if (&*FrontIt != Cond)
+    return false;
+  
+  // Make sure the instruction after the condition is the cond branch.
+  BasicBlock::iterator CondIt = Cond; ++CondIt;
+
+  // Ingore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+  
+  if (&*CondIt != BI)
+    return false;
+
+  // Cond is known to be a compare or binary operator.  Check to make sure that
+  // neither operand is a potentially-trapping constant expression.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+    if (CE->canTrap())
+      return false;
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+    if (CE->canTrap())
+      return false;
+  
+  // Finally, don't infinitely unroll conditional loops.
+  BasicBlock *TrueDest  = BI->getSuccessor(0);
+  BasicBlock *FalseDest = BI->getSuccessor(1);
+  if (TrueDest == BB || FalseDest == BB)
+    return false;
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *PredBlock = *PI;
+    BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+    
+    // Check that we have two conditional branches.  If there is a PHI node in
+    // the common successor, verify that the same value flows in from both
+    // blocks.
+    if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
+      continue;
+    
+    // Determine if the two branches share a common destination.
+    Instruction::BinaryOps Opc;
+    bool InvertPredCond = false;
+    
+    if (PBI->getSuccessor(0) == TrueDest)
+      Opc = Instruction::Or;
+    else if (PBI->getSuccessor(1) == FalseDest)
+      Opc = Instruction::And;
+    else if (PBI->getSuccessor(0) == FalseDest)
+      Opc = Instruction::And, InvertPredCond = true;
+    else if (PBI->getSuccessor(1) == TrueDest)
+      Opc = Instruction::Or, InvertPredCond = true;
+    else
+      continue;
+
+    // Ensure that any values used in the bonus instruction are also used
+    // by the terminator of the predecessor.  This means that those values
+    // must already have been resolved, so we won't be inhibiting the 
+    // out-of-order core by speculating them earlier.
+    if (BonusInst) {
+      // Collect the values used by the bonus inst
+      SmallPtrSet<Value*, 4> UsedValues;
+      for (Instruction::op_iterator OI = BonusInst->op_begin(),
+           OE = BonusInst->op_end(); OI != OE; ++OI) {
+        Value* V = *OI;
+        if (!isa<Constant>(V))
+          UsedValues.insert(V);
+      }
+
+      SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+      Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+      
+      // Walk up to four levels back up the use-def chain of the predecessor's
+      // terminator to see if all those values were used.  The choice of four
+      // levels is arbitrary, to provide a compile-time-cost bound.
+      while (!Worklist.empty()) {
+        std::pair<Value*, unsigned> Pair = Worklist.back();
+        Worklist.pop_back();
+        
+        if (Pair.second >= 4) continue;
+        UsedValues.erase(Pair.first);
+        if (UsedValues.empty()) break;
+        
+        if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
+          for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+               OI != OE; ++OI)
+            Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+        }       
+      }
+      
+      if (!UsedValues.empty()) return false;
+    }
+
+    DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+    IRBuilder<> Builder(PBI);    
+
+    // If we need to invert the condition in the pred block to match, do so now.
+    if (InvertPredCond) {
+      Value *NewCond = PBI->getCondition();
+      
+      if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+        CmpInst *CI = cast<CmpInst>(NewCond);
+        CI->setPredicate(CI->getInversePredicate());
+      } else {
+        NewCond = Builder.CreateNot(NewCond, 
+                                    PBI->getCondition()->getName()+".not");
+      }
+      
+      PBI->setCondition(NewCond);
+      BasicBlock *OldTrue = PBI->getSuccessor(0);
+      BasicBlock *OldFalse = PBI->getSuccessor(1);
+      PBI->setSuccessor(0, OldFalse);
+      PBI->setSuccessor(1, OldTrue);
+    }
+    
+    // If we have a bonus inst, clone it into the predecessor block.
+    Instruction *NewBonus = 0;
+    if (BonusInst) {
+      NewBonus = BonusInst->clone();
+      PredBlock->getInstList().insert(PBI, NewBonus);
+      NewBonus->takeName(BonusInst);
+      BonusInst->setName(BonusInst->getName()+".old");
+    }
+    
+    // Clone Cond into the predecessor basic block, and or/and the
+    // two conditions together.
+    Instruction *New = Cond->clone();
+    if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+    PredBlock->getInstList().insert(PBI, New);
+    New->takeName(Cond);
+    Cond->setName(New->getName()+".old");
+    
+    Instruction *NewCond = 
+      cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
+                                            New, "or.cond"));
+    PBI->setCondition(NewCond);
+    if (PBI->getSuccessor(0) == BB) {
+      AddPredecessorToBlock(TrueDest, PredBlock, BB);
+      PBI->setSuccessor(0, TrueDest);
+    }
+    if (PBI->getSuccessor(1) == BB) {
+      AddPredecessorToBlock(FalseDest, PredBlock, BB);
+      PBI->setSuccessor(1, FalseDest);
+    }
+
+    // Copy any debug value intrinsics into the end of PredBlock.
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isa<DbgInfoIntrinsic>(*I))
+        I->clone()->insertBefore(PBI);
+      
+    return true;
+  }
+  return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it.  We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+  assert(PBI->isConditional() && BI->isConditional());
+  BasicBlock *BB = BI->getParent();
+
+  // If this block ends with a branch instruction, and if there is a
+  // predecessor that ends on a branch of the same condition, make 
+  // this conditional branch redundant.
+  if (PBI->getCondition() == BI->getCondition() &&
+      PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+    // Okay, the outcome of this conditional branch is statically
+    // knowable.  If this block had a single pred, handle specially.
+    if (BB->getSinglePredecessor()) {
+      // Turn this into a branch on constant.
+      bool CondIsTrue = PBI->getSuccessor(0) == BB;
+      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                        CondIsTrue));
+      return true;  // Nuke the branch on constant.
+    }
+    
+    // Otherwise, if there are multiple predecessors, insert a PHI that merges
+    // in the constant and simplify the block result.  Subsequent passes of
+    // simplifycfg will thread the block.
+    if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+      pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+                                       std::distance(PB, PE),
+                                       BI->getCondition()->getName() + ".pr",
+                                       BB->begin());
+      // Okay, we're going to insert the PHI node.  Since PBI is not the only
+      // predecessor, compute the PHI'd conditional value for all of the preds.
+      // Any predecessor where the condition is not computable we keep symbolic.
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
+            PBI != BI && PBI->isConditional() &&
+            PBI->getCondition() == BI->getCondition() &&
+            PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+          bool CondIsTrue = PBI->getSuccessor(0) == BB;
+          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                              CondIsTrue), P);
+        } else {
+          NewPN->addIncoming(BI->getCondition(), P);
+        }
+      }
+      
+      BI->setCondition(NewPN);
+      return true;
+    }
+  }
+  
+  // If this is a conditional branch in an empty block, and if any
+  // predecessors is a conditional branch to one of our destinations,
+  // fold the conditions into logical ops and one cond br.
+  BasicBlock::iterator BBI = BB->begin();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(BBI))
+    ++BBI;
+  if (&*BBI != BI)
+    return false;
+
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+    if (CE->canTrap())
+      return false;
+  
+  int PBIOp, BIOp;
+  if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+    PBIOp = BIOp = 0;
+  else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+    PBIOp = 0, BIOp = 1;
+  else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+    PBIOp = 1, BIOp = 0;
+  else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+    PBIOp = BIOp = 1;
+  else
+    return false;
+    
+  // Check to make sure that the other destination of this branch
+  // isn't BB itself.  If so, this is an infinite loop that will
+  // keep getting unwound.
+  if (PBI->getSuccessor(PBIOp) == BB)
+    return false;
+    
+  // Do not perform this transformation if it would require 
+  // insertion of a large number of select instructions. For targets
+  // without predication/cmovs, this is a big pessimization.
+  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+      
+  unsigned NumPhis = 0;
+  for (BasicBlock::iterator II = CommonDest->begin();
+       isa<PHINode>(II); ++II, ++NumPhis)
+    if (NumPhis > 2) // Disable this xform.
+      return false;
+    
+  // Finally, if everything is ok, fold the branches to logical ops.
+  BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
+  
+  DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+               << "AND: " << *BI->getParent());
+  
+  
+  // If OtherDest *is* BB, then BB is a basic block with a single conditional
+  // branch in it, where one edge (OtherDest) goes back to itself but the other
+  // exits.  We don't *know* that the program avoids the infinite loop
+  // (even though that seems likely).  If we do this xform naively, we'll end up
+  // recursively unpeeling the loop.  Since we know that (after the xform is
+  // done) that the block *is* infinite if reached, we just make it an obviously
+  // infinite loop with no cond branch.
+  if (OtherDest == BB) {
+    // Insert it at the end of the function, because it's either code,
+    // or it won't matter if it's hot. :)
+    BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                                  "infloop", BB->getParent());
+    BranchInst::Create(InfLoopBlock, InfLoopBlock);
+    OtherDest = InfLoopBlock;
+  }  
+  
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+  // BI may have other predecessors.  Because of this, we leave
+  // it alone, but modify PBI.
+  
+  // Make sure we get to CommonDest on True&True directions.
+  Value *PBICond = PBI->getCondition();
+  IRBuilder<true, NoFolder> Builder(PBI);
+  if (PBIOp)
+    PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not");
+
+  Value *BICond = BI->getCondition();
+  if (BIOp)
+    BICond = Builder.CreateNot(BICond, BICond->getName()+".not");
+
+  // Merge the conditions.
+  Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
+  
+  // Modify PBI to branch on the new condition to the new dests.
+  PBI->setCondition(Cond);
+  PBI->setSuccessor(0, CommonDest);
+  PBI->setSuccessor(1, OtherDest);
+  
+  // OtherDest may have phi nodes.  If so, add an entry from PBI's
+  // block that are identical to the entries for BI's block.
+  AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+  
+  // We know that the CommonDest already had an edge from PBI to
+  // it.  If it has PHIs though, the PHIs may have different
+  // entries for BB and PBI's BB.  If so, insert a select to make
+  // them agree.
+  PHINode *PN;
+  for (BasicBlock::iterator II = CommonDest->begin();
+       (PN = dyn_cast<PHINode>(II)); ++II) {
+    Value *BIV = PN->getIncomingValueForBlock(BB);
+    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+    Value *PBIV = PN->getIncomingValue(PBBIdx);
+    if (BIV != PBIV) {
+      // Insert a select in PBI to pick the right value.
+      Value *NV = cast<SelectInst>
+        (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux"));
+      PN->setIncomingValue(PBBIdx, NV);
+    }
+  }
+  
+  DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
+  
+  // This basic block is probably dead.  We know it has at least
+  // one fewer predecessor.
+  return true;
+}
+
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+                                       BasicBlock *TrueBB, BasicBlock *FalseBB){
+  // Remove any superfluous successor edges from the CFG.
+  // First, figure out which successors to preserve.
+  // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+  // successor.
+  BasicBlock *KeepEdge1 = TrueBB;
+  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+  // Then remove the rest.
+  for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+    BasicBlock *Succ = OldTerm->getSuccessor(I);
+    // Make sure only to keep exactly one copy of each edge.
+    if (Succ == KeepEdge1)
+      KeepEdge1 = 0;
+    else if (Succ == KeepEdge2)
+      KeepEdge2 = 0;
+    else
+      Succ->removePredecessor(OldTerm->getParent());
+  }
+
+  IRBuilder<> Builder(OldTerm);
+  Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
+
+  // Insert an appropriate new terminator.
+  if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+    if (TrueBB == FalseBB)
+      // We were only looking for one successor, and it was present.
+      // Create an unconditional branch to it.
+      Builder.CreateBr(TrueBB);
+    else
+      // We found both of the successors we were looking for.
+      // Create a conditional branch sharing the condition of the select.
+      Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+  } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+    // Neither of the selected blocks were successors, so this
+    // terminator must be unreachable.
+    new UnreachableInst(OldTerm->getContext(), OldTerm);
+  } else {
+    // One of the selected values was a successor, but the other wasn't.
+    // Insert an unconditional branch to the one that was found;
+    // the edge to the one that wasn't must be unreachable.
+    if (KeepEdge1 == 0)
+      // Only TrueBB was found.
+      Builder.CreateBr(TrueBB);
+    else
+      // Only FalseBB was found.
+      Builder.CreateBr(FalseBB);
+  }
+
+  EraseTerminatorInstAndDCECond(OldTerm);
+  return true;
+}
+
+// SimplifySwitchOnSelect - Replaces
+//   (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+  // Check for constant integer values in the select.
+  ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+  ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+  if (!TrueVal || !FalseVal)
+    return false;
+
+  // Find the relevant condition and destinations.
+  Value *Condition = Select->getCondition();
+  BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
+  BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+}
+
+// SimplifyIndirectBrOnSelect - Replaces
+//   (indirectbr (select cond, blockaddress(@fn, BlockA),
+//                             blockaddress(@fn, BlockB)))
+// with
+//   (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+  // Check that both operands of the select are block addresses.
+  BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+  BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+  if (!TBA || !FBA)
+    return false;
+
+  // Extract the actual blocks.
+  BasicBlock *TrueBB = TBA->getBasicBlock();
+  BasicBlock *FalseBB = FBA->getBasicBlock();
+
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+}
+
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch.  We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+///   switch i8 %A, label %DEFAULT [ i8 1, label %end    i8 2, label %end ]
+/// DEFAULT:
+///   %tmp = icmp eq i8 %A, 92
+///   br label %end
+/// end:
+///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+/// 
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+                                                  const TargetData *TD,
+                                                  IRBuilder<> &Builder) {
+  BasicBlock *BB = ICI->getParent();
+
+  // If the block has any PHIs in it or the icmp has multiple uses, it is too
+  // complex.
+  if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+  Value *V = ICI->getOperand(0);
+  ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+  
+  // The pattern we're looking for is where our only predecessor is a switch on
+  // 'V' and this block is the default case for the switch.  In this case we can
+  // fold the compared value into the switch to simplify things.
+  BasicBlock *Pred = BB->getSinglePredecessor();
+  if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+  
+  SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+  if (SI->getCondition() != V)
+    return false;
+  
+  // If BB is reachable on a non-default case, then we simply know the value of
+  // V in this block.  Substitute it and constant fold the icmp instruction
+  // away.
+  if (SI->getDefaultDest() != BB) {
+    ConstantInt *VVal = SI->findCaseDest(BB);
+    assert(VVal && "Should have a unique destination value");
+    ICI->setOperand(0, VVal);
+    
+    if (Value *V = SimplifyInstruction(ICI, TD)) {
+      ICI->replaceAllUsesWith(V);
+      ICI->eraseFromParent();
+    }
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // Ok, the block is reachable from the default dest.  If the constant we're
+  // comparing exists in one of the other edges, then we can constant fold ICI
+  // and zap it.
+  if (SI->findCaseValue(Cst) != 0) {
+    Value *V;
+    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+      V = ConstantInt::getFalse(BB->getContext());
+    else
+      V = ConstantInt::getTrue(BB->getContext());
+    
+    ICI->replaceAllUsesWith(V);
+    ICI->eraseFromParent();
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // The use of the icmp has to be in the 'end' block, by the only PHI node in
+  // the block.
+  BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+  PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+  if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+      isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+    return false;
+
+  // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+  // true in the PHI.
+  Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+  Constant *NewCst     = ConstantInt::getFalse(BB->getContext());
+
+  if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+    std::swap(DefaultCst, NewCst);
+
+  // Replace ICI (which is used by the PHI for the default value) with true or
+  // false depending on if it is EQ or NE.
+  ICI->replaceAllUsesWith(DefaultCst);
+  ICI->eraseFromParent();
+
+  // Okay, the switch goes to this block on a default value.  Add an edge from
+  // the switch to the merge point on the compared value.
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+                                         BB->getParent(), BB);
+  SI->addCase(Cst, NewBB);
+  
+  // NewBB branches to the phi block, add the uncond branch and the phi entry.
+  Builder.SetInsertPoint(NewBB);
+  Builder.SetCurrentDebugLocation(SI->getDebugLoc());
+  Builder.CreateBr(SuccBlock);
+  PHIUse->addIncoming(NewCst, NewBB);
+  return true;
+}
+
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
+                                      IRBuilder<> &Builder) {
+  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+  if (Cond == 0) return false;
+  
+  
+  // Change br (X == 0 | X == 1), T, F into a switch instruction.
+  // If this is a bunch of seteq's or'd together, or if it's a bunch of
+  // 'setne's and'ed together, collect them.
+  Value *CompVal = 0;
+  std::vector<ConstantInt*> Values;
+  bool TrueWhenEqual = true;
+  Value *ExtraCase = 0;
+  unsigned UsedICmps = 0;
+  
+  if (Cond->getOpcode() == Instruction::Or) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+                                     UsedICmps);
+  } else if (Cond->getOpcode() == Instruction::And) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+                                     UsedICmps);
+    TrueWhenEqual = false;
+  }
+  
+  // If we didn't have a multiply compared value, fail.
+  if (CompVal == 0) return false;
+
+  // Avoid turning single icmps into a switch.
+  if (UsedICmps <= 1)
+    return false;
+
+  // There might be duplicate constants in the list, which the switch
+  // instruction can't handle, remove them now.
+  array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+  Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+  
+  // If Extra was used, we require at least two switch values to do the
+  // transformation.  A switch with one value is just an cond branch.
+  if (ExtraCase && Values.size() < 2) return false;
+  
+  // Figure out which block is which destination.
+  BasicBlock *DefaultBB = BI->getSuccessor(1);
+  BasicBlock *EdgeBB    = BI->getSuccessor(0);
+  if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+  
+  BasicBlock *BB = BI->getParent();
+  
+  DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+               << " cases into SWITCH.  BB is:\n" << *BB);
+  
+  // If there are any extra values that couldn't be folded into the switch
+  // then we evaluate them with an explicit branch first.  Split the block
+  // right before the condbr to handle it.
+  if (ExtraCase) {
+    BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+    // Remove the uncond branch added to the old block.
+    TerminatorInst *OldTI = BB->getTerminator();
+    Builder.SetInsertPoint(OldTI);
+
+    if (TrueWhenEqual)
+      Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
+    else
+      Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+      
+    OldTI->eraseFromParent();
+    
+    // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+    // for the edge we just added.
+    AddPredecessorToBlock(EdgeBB, BB, NewBB);
+    
+    DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
+          << "\nEXTRABB = " << *BB);
+    BB = NewBB;
+  }
+
+  Builder.SetInsertPoint(BI);
+  // Convert pointer to int before we switch.
+  if (CompVal->getType()->isPointerTy()) {
+    assert(TD && "Cannot switch on pointer without TargetData");
+    CompVal = Builder.CreatePtrToInt(CompVal,
+                                     TD->getIntPtrType(CompVal->getContext()),
+                                     "magicptr");
+  }
+  
+  // Create the new switch instruction now.
+  SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+
+  // Add all of the 'cases' to the switch instruction.
+  for (unsigned i = 0, e = Values.size(); i != e; ++i)
+    New->addCase(Values[i], EdgeBB);
+  
+  // We added edges from PI to the EdgeBB.  As such, if there were any
+  // PHI nodes in EdgeBB, they need entries to be added corresponding to
+  // the number of edges added.
+  for (BasicBlock::iterator BBI = EdgeBB->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    PHINode *PN = cast<PHINode>(BBI);
+    Value *InVal = PN->getIncomingValueForBlock(BB);
+    for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+      PN->addIncoming(InVal, BB);
+  }
+  
+  // Erase the old branch instruction.
+  EraseTerminatorInstAndDCECond(BI);
+  
+  DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
+  return true;
+}
+
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
+  BasicBlock *BB = RI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+  
+  // Find predecessors that end with branches.
+  SmallVector<BasicBlock*, 8> UncondBranchPreds;
+  SmallVector<BranchInst*, 8> CondBranchPreds;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *P = *PI;
+    TerminatorInst *PTI = P->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+      if (BI->isUnconditional())
+        UncondBranchPreds.push_back(P);
+      else
+        CondBranchPreds.push_back(BI);
+    }
+  }
+  
+  // If we found some, do the transformation!
+  if (!UncondBranchPreds.empty() && DupRet) {
+    while (!UncondBranchPreds.empty()) {
+      BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+      DEBUG(dbgs() << "FOLDING: " << *BB
+            << "INTO UNCOND BRANCH PRED: " << *Pred);
+      (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+    }
+    
+    // If we eliminated all predecessors of the block, delete the block now.
+    if (pred_begin(BB) == pred_end(BB))
+      // We know there are no successors, so just nuke the block.
+      BB->eraseFromParent();
+    
+    return true;
+  }
+  
+  // Check out all of the conditional branches going to this return
+  // instruction.  If any of them just select between returns, change the
+  // branch itself into a select/return pair.
+  while (!CondBranchPreds.empty()) {
+    BranchInst *BI = CondBranchPreds.pop_back_val();
+    
+    // Check to see if the non-BB successor is also a return block.
+    if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+        isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+        SimplifyCondBranchToTwoReturns(BI, Builder))
+      return true;
+  }
+  return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
+  // Check to see if the first instruction in this block is just an unwind.
+  // If so, replace any invoke instructions which use this as an exception
+  // destination with call instructions.
+  BasicBlock *BB = UI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+  bool Changed = false;
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.back();
+    InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
+    if (II && II->getUnwindDest() == BB) {
+      // Insert a new branch instruction before the invoke, because this
+      // is now a fall through.
+      Builder.SetInsertPoint(II);
+      BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+      Pred->getInstList().remove(II);   // Take out of symbol table
+      
+      // Insert the call now.
+      SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+      Builder.SetInsertPoint(BI);
+      CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+                                        Args, II->getName());
+      CI->setCallingConv(II->getCallingConv());
+      CI->setAttributes(II->getAttributes());
+      // If the invoke produced a value, the Call now does instead.
+      II->replaceAllUsesWith(CI);
+      delete II;
+      Changed = true;
+    }
+    
+    Preds.pop_back();
+  }
+  
+  // If this block is now dead (and isn't the entry block), remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
+  
+  return Changed;  
+}
+
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+  BasicBlock *BB = UI->getParent();
+  
+  bool Changed = false;
+  
+  // If there are any instructions immediately before the unreachable that can
+  // be removed, do so.
+  while (UI != BB->begin()) {
+    BasicBlock::iterator BBI = UI;
+    --BBI;
+    // Do not delete instructions that can have side effects, like calls
+    // (which may never return) and volatile loads and stores.
+    if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+      if (SI->isVolatile())
+        break;
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+      if (LI->isVolatile())
+        break;
+    
+    // Delete this instruction (any uses are guaranteed to be dead)
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BBI->eraseFromParent();
+    Changed = true;
+  }
+  
+  // If the unreachable instruction is the first in the block, take a gander
+  // at all of the predecessors of this instruction, and simplify them.
+  if (&BB->front() != UI) return Changed;
+  
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+    TerminatorInst *TI = Preds[i]->getTerminator();
+    IRBuilder<> Builder(TI);
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isUnconditional()) {
+        if (BI->getSuccessor(0) == BB) {
+          new UnreachableInst(TI->getContext(), TI);
+          TI->eraseFromParent();
+          Changed = true;
+        }
+      } else {
+        if (BI->getSuccessor(0) == BB) {
+          Builder.CreateBr(BI->getSuccessor(1));
+          EraseTerminatorInstAndDCECond(BI);
+        } else if (BI->getSuccessor(1) == BB) {
+          Builder.CreateBr(BI->getSuccessor(0));
+          EraseTerminatorInstAndDCECond(BI);
+          Changed = true;
+        }
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+        if (SI->getSuccessor(i) == BB) {
+          BB->removePredecessor(SI->getParent());
+          SI->removeCase(i);
+          --i; --e;
+          Changed = true;
+        }
+      // If the default value is unreachable, figure out the most popular
+      // destination and make it the default.
+      if (SI->getSuccessor(0) == BB) {
+        std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
+        for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+          std::pair<unsigned, unsigned>& entry =
+              Popularity[SI->getSuccessor(i)];
+          if (entry.first == 0) {
+            entry.first = 1;
+            entry.second = i;
+          } else {
+            entry.first++;
+          }
+        }
+
+        // Find the most popular block.
+        unsigned MaxPop = 0;
+        unsigned MaxIndex = 0;
+        BasicBlock *MaxBlock = 0;
+        for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
+             I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+          if (I->second.first > MaxPop || 
+              (I->second.first == MaxPop && MaxIndex > I->second.second)) {
+            MaxPop = I->second.first;
+            MaxIndex = I->second.second;
+            MaxBlock = I->first;
+          }
+        }
+        if (MaxBlock) {
+          // Make this the new default, allowing us to delete any explicit
+          // edges to it.
+          SI->setSuccessor(0, MaxBlock);
+          Changed = true;
+          
+          // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+          // it.
+          if (isa<PHINode>(MaxBlock->begin()))
+            for (unsigned i = 0; i != MaxPop-1; ++i)
+              MaxBlock->removePredecessor(SI->getParent());
+          
+          for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+            if (SI->getSuccessor(i) == MaxBlock) {
+              SI->removeCase(i);
+              --i; --e;
+            }
+        }
+      }
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+      if (II->getUnwindDest() == BB) {
+        // Convert the invoke to a call instruction.  This would be a good
+        // place to note that the call does not throw though.
+        BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+        II->removeFromParent();   // Take out of symbol table
+        
+        // Insert the call now...
+        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+        Builder.SetInsertPoint(BI);
+        CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+                                          Args, II->getName());
+        CI->setCallingConv(II->getCallingConv());
+        CI->setAttributes(II->getAttributes());
+        // If the invoke produced a value, the call does now instead.
+        II->replaceAllUsesWith(CI);
+        delete II;
+        Changed = true;
+      }
+    }
+  }
+  
+  // If this block is now dead, remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
+
+  return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+  assert(SI->getNumCases() > 2 && "Degenerate switch?");
+
+  // Make sure all cases point to the same destination and gather the values.
+  SmallVector<ConstantInt *, 16> Cases;
+  Cases.push_back(SI->getCaseValue(1));
+  for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
+    if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+      return false;
+    Cases.push_back(SI->getCaseValue(I));
+  }
+  assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+
+  // Sort the case values, then check if they form a range we can transform.
+  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+  for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+    if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+      return false;
+  }
+
+  Constant *Offset = ConstantExpr::getNeg(Cases.back());
+  Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+
+  Value *Sub = SI->getCondition();
+  if (!Offset->isNullValue())
+    Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+  Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+  Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest());
+
+  // Prune obsolete incoming values off the successor's PHI nodes.
+  for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+  }
+  SI->eraseFromParent();
+
+  return true;
+}
+
+/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
+/// and use it to remove dead cases.
+static bool EliminateDeadSwitchCases(SwitchInst *SI) {
+  Value *Cond = SI->getCondition();
+  unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+  APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
+  ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne);
+
+  // Gather dead cases.
+  SmallVector<ConstantInt*, 8> DeadCases;
+  for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) {
+    if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 ||
+        (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) {
+      DeadCases.push_back(SI->getCaseValue(I));
+      DEBUG(dbgs() << "SimplifyCFG: switch case '"
+                   << SI->getCaseValue(I)->getValue() << "' is dead.\n");
+    }
+  }
+
+  // Remove dead cases from the switch.
+  for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
+    unsigned Case = SI->findCaseValue(DeadCases[I]);
+    // Prune unused values from PHI nodes.
+    SI->getSuccessor(Case)->removePredecessor(SI->getParent());
+    SI->removeCase(Case);
+  }
+
+  return !DeadCases.empty();
+}
+
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+                                              BasicBlock *BB,
+                                              int *PhiIndex) {
+  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+    return NULL; // BB must be empty to be a candidate for simplification.
+  if (!BB->getSinglePredecessor())
+    return NULL; // BB must be dominated by the switch.
+
+  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!Branch || !Branch->isUnconditional())
+    return NULL; // Terminator must be unconditional branch.
+
+  BasicBlock *Succ = Branch->getSuccessor(0);
+
+  BasicBlock::iterator I = Succ->begin();
+  while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+    int Idx = PHI->getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+    Value *InValue = PHI->getIncomingValue(Idx);
+    if (InValue != CaseValue) continue;
+
+    *PhiIndex = Idx;
+    return PHI;
+  }
+
+  return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+  typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+  ForwardingNodesMap ForwardingNodes;
+
+  for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
+    ConstantInt *CaseValue = SI->getCaseValue(I);
+    BasicBlock *CaseDest = SI->getSuccessor(I);
+
+    int PhiIndex;
+    PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+                                                 &PhiIndex);
+    if (!PHI) continue;
+
+    ForwardingNodes[PHI].push_back(PhiIndex);
+  }
+
+  bool Changed = false;
+
+  for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+       E = ForwardingNodes.end(); I != E; ++I) {
+    PHINode *Phi = I->first;
+    SmallVector<int,4> &Indexes = I->second;
+
+    if (Indexes.size() < 2) continue;
+
+    for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+      Phi->setIncomingValue(Indexes[I], SI->getCondition());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+  // If this switch is too complex to want to look at, ignore it.
+  if (!isValueEqualityComparison(SI))
+    return false;
+
+  BasicBlock *BB = SI->getParent();
+
+  // If we only have one predecessor, and if it is a branch on this value,
+  // see if that predecessor totally determines the outcome of this switch.
+  if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+    if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+      return SimplifyCFG(BB) | true;
+
+  Value *Cond = SI->getCondition();
+  if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+    if (SimplifySwitchOnSelect(SI, Select))
+      return SimplifyCFG(BB) | true;
+
+  // If the block only contains the switch, see if we can fold the block
+  // away into any preds.
+  BasicBlock::iterator BBI = BB->begin();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(BBI))
+    ++BBI;
+  if (SI == &*BBI)
+    if (FoldValueComparisonIntoPredecessors(SI, Builder))
+      return SimplifyCFG(BB) | true;
+
+  // Try to transform the switch into an icmp and a branch.
+  if (TurnSwitchRangeIntoICmp(SI, Builder))
+    return SimplifyCFG(BB) | true;
+
+  // Remove unreachable cases.
+  if (EliminateDeadSwitchCases(SI))
+    return SimplifyCFG(BB) | true;
+
+  if (ForwardSwitchConditionToPHI(SI))
+    return SimplifyCFG(BB) | true;
+
+  return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+  BasicBlock *BB = IBI->getParent();
+  bool Changed = false;
+  
+  // Eliminate redundant destinations.
+  SmallPtrSet<Value *, 8> Succs;
+  for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+    BasicBlock *Dest = IBI->getDestination(i);
+    if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+      Dest->removePredecessor(BB);
+      IBI->removeDestination(i);
+      --i; --e;
+      Changed = true;
+    }
+  } 
+
+  if (IBI->getNumDestinations() == 0) {
+    // If the indirectbr has no successors, change it to unreachable.
+    new UnreachableInst(IBI->getContext(), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
+  }
+  
+  if (IBI->getNumDestinations() == 1) {
+    // If the indirectbr has one successor, change it to a direct branch.
+    BranchInst::Create(IBI->getDestination(0), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
+  }
+  
+  if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+    if (SimplifyIndirectBrOnSelect(IBI, SI))
+      return SimplifyCFG(BB) | true;
+  }
+  return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
+  BasicBlock *BB = BI->getParent();
+  
+  // If the Terminator is the only non-phi instruction, simplify the block.
+  BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
+  if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+      TryToSimplifyUncondBranchFromEmptyBlock(BB))
+    return true;
+  
+  // If the only instruction in the block is a seteq/setne comparison
+  // against a constant, try to simplify the block.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+    if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+      for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+        ;
+      if (I->isTerminator() 
+          && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
+        return true;
+    }
+  
+  return false;
+}
+
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+  BasicBlock *BB = BI->getParent();
+  
+  // Conditional branch
+  if (isValueEqualityComparison(BI)) {
+    // If we only have one predecessor, and if it is a branch on this value,
+    // see if that predecessor totally determines the outcome of this
+    // switch.
+    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+      if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
+        return SimplifyCFG(BB) | true;
+    
+    // This block must be empty, except for the setcond inst, if it exists.
+    // Ignore dbg intrinsics.
+    BasicBlock::iterator I = BB->begin();
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(I))
+      ++I;
+    if (&*I == BI) {
+      if (FoldValueComparisonIntoPredecessors(BI, Builder))
+        return SimplifyCFG(BB) | true;
+    } else if (&*I == cast<Instruction>(BI->getCondition())){
+      ++I;
+      // Ignore dbg intrinsics.
+      while (isa<DbgInfoIntrinsic>(I))
+        ++I;
+      if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
+        return SimplifyCFG(BB) | true;
+    }
+  }
+  
+  // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+  if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+    return true;
+  
+  // We have a conditional branch to two blocks that are only reachable
+  // from BI.  We know that the condbr dominates the two blocks, so see if
+  // there is any identical code in the "then" and "else" blocks.  If so, we
+  // can hoist it up to the branching block.
+  if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+    if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+      if (HoistThenElseCodeToIf(BI))
+        return SimplifyCFG(BB) | true;
+    } else {
+      // If Successor #1 has multiple preds, we may be able to conditionally
+      // execute Successor #0 if it branches to successor #1.
+      TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+      if (Succ0TI->getNumSuccessors() == 1 &&
+          Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+          return SimplifyCFG(BB) | true;
+    }
+  } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+    // If Successor #0 has multiple preds, we may be able to conditionally
+    // execute Successor #1 if it branches to successor #0.
+    TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+    if (Succ1TI->getNumSuccessors() == 1 &&
+        Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+        return SimplifyCFG(BB) | true;
+  }
+  
+  // If this is a branch on a phi node in the current block, thread control
+  // through this block if any PHI node entries are constants.
+  if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+    if (PN->getParent() == BI->getParent())
+      if (FoldCondBranchOnPHI(BI, TD))
+        return SimplifyCFG(BB) | true;
+  
+  // If this basic block is ONLY a setcc and a branch, and if a predecessor
+  // branches to us and one of our successors, fold the setcc into the
+  // predecessor and use logical operations to pick the right destination.
+  if (FoldBranchToCommonDest(BI))
+    return SimplifyCFG(BB) | true;
+  
+  // Scan predecessor blocks for conditional branches.
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+      if (PBI != BI && PBI->isConditional())
+        if (SimplifyCondBranchToCondBranch(PBI, BI))
+          return SimplifyCFG(BB) | true;
+
+  return false;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+  bool Changed = false;
+
+  assert(BB && BB->getParent() && "Block not embedded in function!");
+  assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+  // Remove basic blocks that have no predecessors (except the entry block)...
+  // or that just have themself as a predecessor.  These are unreachable.
+  if ((pred_begin(BB) == pred_end(BB) &&
+       BB != &BB->getParent()->getEntryBlock()) ||
+      BB->getSinglePredecessor() == BB) {
+    DEBUG(dbgs() << "Removing BB: \n" << *BB);
+    DeleteDeadBlock(BB);
+    return true;
+  }
+
+  // Check to see if we can constant propagate this terminator instruction
+  // away...
+  Changed |= ConstantFoldTerminator(BB, true);
+
+  // Check for and eliminate duplicate PHI nodes in this block.
+  Changed |= EliminateDuplicatePHINodes(BB);
+
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  //
+  if (MergeBlockIntoPredecessor(BB))
+    return true;
+  
+  IRBuilder<> Builder(BB);
+
+  // If there is a trivial two-entry PHI node in this basic block, and we can
+  // eliminate it, do so now.
+  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+    if (PN->getNumIncomingValues() == 2)
+      Changed |= FoldTwoEntryPHINode(PN, TD);
+
+  Builder.SetInsertPoint(BB->getTerminator());
+  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+    if (BI->isUnconditional()) {
+      if (SimplifyUncondBranch(BI, Builder)) return true;
+    } else {
+      if (SimplifyCondBranch(BI, Builder)) return true;
+    }
+  } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+    if (SimplifyReturn(RI, Builder)) return true;
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+    if (SimplifySwitch(SI, Builder)) return true;
+  } else if (UnreachableInst *UI =
+               dyn_cast<UnreachableInst>(BB->getTerminator())) {
+    if (SimplifyUnreachable(UI)) return true;
+  } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+    if (SimplifyUnwind(UI, Builder)) return true;
+  } else if (IndirectBrInst *IBI =
+               dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+    if (SimplifyIndirectBr(IBI)) return true;
+  }
+
+  return Changed;
+}
+
+/// SimplifyCFG - This function is used to do simplification of a CFG.  For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG.  It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
+  return SimplifyCFGOpt(TD).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 000000000000..ac005f95b33a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification.  If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+  struct InstSimplifier : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    InstSimplifier() : FunctionPass(ID) {
+      initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+
+    /// runOnFunction - Remove instructions that simplify.
+    bool runOnFunction(Function &F) {
+      const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+      const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+      SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+      bool Changed = false;
+
+      do {
+        for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+             DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+          for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+            Instruction *I = BI++;
+            // The first time through the loop ToSimplify is empty and we try to
+            // simplify all instructions.  On later iterations ToSimplify is not
+            // empty and we only bother simplifying instructions that are in it.
+            if (!ToSimplify->empty() && !ToSimplify->count(I))
+              continue;
+            // Don't waste time simplifying unused instructions.
+            if (!I->use_empty())
+              if (Value *V = SimplifyInstruction(I, TD, DT)) {
+                // Mark all uses for resimplification next time round the loop.
+                for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+                     UI != UE; ++UI)
+                  Next->insert(cast<Instruction>(*UI));
+                I->replaceAllUsesWith(V);
+                ++NumSimplified;
+                Changed = true;
+              }
+            Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+          }
+
+        // Place the list of instructions to simplify on the next loop iteration
+        // into ToSimplify.
+        std::swap(ToSimplify, Next);
+        Next->clear();
+      } while (!ToSimplify->empty());
+
+      return Changed;
+    }
+  };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
+                false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+  return new InstSimplifier();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 000000000000..46d4adaaa154
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,142 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them.  Additionally, it keeps track of which node is the new
+// exit node of the CFG.  If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+                "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+  return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+  // We preserve the non-critical-edgeness property
+  AU.addPreservedID(BreakCriticalEdgesID);
+  // This is a cluster of orthogonal Transforms
+  AU.addPreserved("mem2reg");
+  AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block.  The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+  // Loop over all of the blocks in a function, tracking all of the blocks that
+  // return.
+  //
+  std::vector<BasicBlock*> ReturningBlocks;
+  std::vector<BasicBlock*> UnwindingBlocks;
+  std::vector<BasicBlock*> UnreachableBlocks;
+  for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (isa<ReturnInst>(I->getTerminator()))
+      ReturningBlocks.push_back(I);
+    else if (isa<UnwindInst>(I->getTerminator()))
+      UnwindingBlocks.push_back(I);
+    else if (isa<UnreachableInst>(I->getTerminator()))
+      UnreachableBlocks.push_back(I);
+
+  // Handle unwinding blocks first.
+  if (UnwindingBlocks.empty()) {
+    UnwindBlock = 0;
+  } else if (UnwindingBlocks.size() == 1) {
+    UnwindBlock = UnwindingBlocks.front();
+  } else {
+    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+    new UnwindInst(F.getContext(), UnwindBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
+           E = UnwindingBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unwind insn
+      BranchInst::Create(UnwindBlock, BB);
+    }
+  }
+
+  // Then unreachable blocks.
+  if (UnreachableBlocks.empty()) {
+    UnreachableBlock = 0;
+  } else if (UnreachableBlocks.size() == 1) {
+    UnreachableBlock = UnreachableBlocks.front();
+  } else {
+    UnreachableBlock = BasicBlock::Create(F.getContext(), 
+                                          "UnifiedUnreachableBlock", &F);
+    new UnreachableInst(F.getContext(), UnreachableBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+           E = UnreachableBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unreachable inst.
+      BranchInst::Create(UnreachableBlock, BB);
+    }
+  }
+
+  // Now handle return blocks.
+  if (ReturningBlocks.empty()) {
+    ReturnBlock = 0;
+    return false;                          // No blocks return
+  } else if (ReturningBlocks.size() == 1) {
+    ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+    return false;
+  }
+
+  // Otherwise, we need to insert a new basic block into the function, add a PHI
+  // nodes (if the function returns values), and convert all of the return
+  // instructions into unconditional branches.
+  //
+  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+                                               "UnifiedReturnBlock", &F);
+
+  PHINode *PN = 0;
+  if (F.getReturnType()->isVoidTy()) {
+    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+  } else {
+    // If the function doesn't return void... add a PHI node to the block...
+    PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+                         "UnifiedRetVal");
+    NewRetBlock->getInstList().push_back(PN);
+    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+  }
+
+  // Loop over all of the blocks, replacing the return instruction with an
+  // unconditional branch.
+  //
+  for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+         E = ReturningBlocks.end(); I != E; ++I) {
+    BasicBlock *BB = *I;
+
+    // Add an incoming element to the PHI node for every return instruction that
+    // is merging into this new block...
+    if (PN)
+      PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+    BB->getInstList().pop_back();  // Remove the return insn
+    BranchInst::Create(NewRetBlock, BB);
+  }
+  ReturnBlock = NewRetBlock;
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 000000000000..24e8c8ff5c5f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+  initializeBreakCriticalEdgesPass(Registry);
+  initializeInstNamerPass(Registry);
+  initializeLCSSAPass(Registry);
+  initializeLoopSimplifyPass(Registry);
+  initializeLowerInvokePass(Registry);
+  initializeLowerSwitchPass(Registry);
+  initializePromotePassPass(Registry);
+  initializeUnifyFunctionExitNodesPass(Registry);
+  initializeInstSimplifierPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+  initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 000000000000..973b105a1cbb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,201 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Metadata.h"
+using namespace llvm;
+
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::Anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+                      ValueMapTypeRemapper *TypeMapper) {
+  ValueToValueMapTy::iterator I = VM.find(V);
+  
+  // If the value already exists in the map, use it.
+  if (I != VM.end() && I->second) return I->second;
+  
+  // Global values do not need to be seeded into the VM if they
+  // are using the identity mapping.
+  if (isa<GlobalValue>(V) || isa<MDString>(V))
+    return VM[V] = const_cast<Value*>(V);
+  
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    // Inline asm may need *type* remapping.
+    FunctionType *NewTy = IA->getFunctionType();
+    if (TypeMapper) {
+      NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+      if (NewTy != IA->getFunctionType())
+        V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+                           IA->hasSideEffects(), IA->isAlignStack());
+    }
+    
+    return VM[V] = const_cast<Value*>(V);
+  }
+  
+
+  if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+    // If this is a module-level metadata and we know that nothing at the module
+    // level is changing, then use an identity mapping.
+    if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+      return VM[V] = const_cast<Value*>(V);
+    
+    // Create a dummy node in case we have a metadata cycle.
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+    VM[V] = Dummy;
+    
+    // Check all operands to see if any need to be remapped.
+    for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+      Value *OP = MD->getOperand(i);
+      if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
+
+      // Ok, at least one operand needs remapping.  
+      SmallVector<Value*, 4> Elts;
+      Elts.reserve(MD->getNumOperands());
+      for (i = 0; i != e; ++i) {
+        Value *Op = MD->getOperand(i);
+        Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
+      }
+      MDNode *NewMD = MDNode::get(V->getContext(), Elts);
+      Dummy->replaceAllUsesWith(NewMD);
+      VM[V] = NewMD;
+      MDNode::deleteTemporary(Dummy);
+      return NewMD;
+    }
+
+    VM[V] = const_cast<Value*>(V);
+    MDNode::deleteTemporary(Dummy);
+
+    // No operands needed remapping.  Use an identity mapping.
+    return const_cast<Value*>(V);
+  }
+
+  // Okay, this either must be a constant (which may or may not be mappable) or
+  // is something that is not in the mapping table.
+  Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+  if (C == 0)
+    return 0;
+  
+  if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+    Function *F = 
+      cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+    BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+                                                       Flags, TypeMapper));
+    return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+  }
+  
+  // Otherwise, we have some other constant to remap.  Start by checking to see
+  // if all operands have an identity remapping.
+  unsigned OpNo = 0, NumOperands = C->getNumOperands();
+  Value *Mapped = 0;
+  for (; OpNo != NumOperands; ++OpNo) {
+    Value *Op = C->getOperand(OpNo);
+    Mapped = MapValue(Op, VM, Flags, TypeMapper);
+    if (Mapped != C) break;
+  }
+  
+  // See if the type mapper wants to remap the type as well.
+  Type *NewTy = C->getType();
+  if (TypeMapper)
+    NewTy = TypeMapper->remapType(NewTy);
+
+  // If the result type and all operands match up, then just insert an identity
+  // mapping.
+  if (OpNo == NumOperands && NewTy == C->getType())
+    return VM[V] = C;
+  
+  // Okay, we need to create a new constant.  We've already processed some or
+  // all of the operands, set them all up now.
+  SmallVector<Constant*, 8> Ops;
+  Ops.reserve(NumOperands);
+  for (unsigned j = 0; j != OpNo; ++j)
+    Ops.push_back(cast<Constant>(C->getOperand(j)));
+  
+  // If one of the operands mismatch, push it and the other mapped operands.
+  if (OpNo != NumOperands) {
+    Ops.push_back(cast<Constant>(Mapped));
+  
+    // Map the rest of the operands that aren't processed yet.
+    for (++OpNo; OpNo != NumOperands; ++OpNo)
+      Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+                             Flags, TypeMapper));
+  }
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return VM[V] = CE->getWithOperands(Ops, NewTy);
+  if (isa<ConstantArray>(C))
+    return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+  if (isa<ConstantStruct>(C))
+    return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+  if (isa<ConstantVector>(C))
+    return VM[V] = ConstantVector::get(Ops);
+  // If this is a no-operand constant, it must be because the type was remapped.
+  if (isa<UndefValue>(C))
+    return VM[V] = UndefValue::get(NewTy);
+  if (isa<ConstantAggregateZero>(C))
+    return VM[V] = ConstantAggregateZero::get(NewTy);
+  assert(isa<ConstantPointerNull>(C));
+  return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+                            RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+  // Remap operands.
+  for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+    Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+    // If we aren't ignoring missing entries, assert that something happened.
+    if (V != 0)
+      *op = V;
+    else
+      assert((Flags & RF_IgnoreMissingEntries) &&
+             "Referenced value not in value map!");
+  }
+
+  // Remap phi nodes' incoming blocks.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+      // If we aren't ignoring missing entries, assert that something happened.
+      if (V != 0)
+        PN->setIncomingBlock(i, cast<BasicBlock>(V));
+      else
+        assert((Flags & RF_IgnoreMissingEntries) &&
+               "Referenced block not in value map!");
+    }
+  }
+
+  // Remap attached metadata.  Don't bother remapping DebugLoc, it can never
+  // have mappings to do.
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  I->getAllMetadataOtherThanDebugLoc(MDs);
+  for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+       MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+    MDNode *Old = MI->second;
+    MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+    if (New != Old)
+      I->setMetadata(MI->first, New);
+  }
+  
+  // If the instruction's type is being remapped, do so now.
+  if (TypeMapper)
+    I->mutateType(TypeMapper->remapType(I->getType()));
+}
diff --git a/contrib/llvm/lib/VMCore/AsmWriter.cpp b/contrib/llvm/lib/VMCore/AsmWriter.cpp
new file mode 100644
index 000000000000..94794c35fe0b
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/AsmWriter.cpp
@@ -0,0 +1,2038 @@
+//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Writer.h
+//
+// Note that these routines must be extremely tolerant of various errors in the
+// LLVM code, because it can be used for debugging transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Make virtual table appear in this compilation unit.
+AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static const Module *getModuleFromVal(const Value *V) {
+  if (const Argument *MA = dyn_cast<Argument>(V))
+    return MA->getParent() ? MA->getParent()->getParent() : 0;
+
+  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return BB->getParent() ? BB->getParent()->getParent() : 0;
+
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
+    return M ? M->getParent() : 0;
+  }
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return GV->getParent();
+  return 0;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
+  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+    unsigned char C = Name[i];
+    if (isprint(C) && C != '\\' && C != '"')
+      Out << C;
+    else
+      Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+  }
+}
+
+enum PrefixType {
+  GlobalPrefix,
+  LabelPrefix,
+  LocalPrefix,
+  NoPrefix
+};
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it).  Print it out.
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+  assert(!Name.empty() && "Cannot get empty name!");
+  switch (Prefix) {
+  default: llvm_unreachable("Bad prefix!");
+  case NoPrefix: break;
+  case GlobalPrefix: OS << '@'; break;
+  case LabelPrefix:  break;
+  case LocalPrefix:  OS << '%'; break;
+  }
+
+  // Scan the name to see if it needs quotes first.
+  bool NeedsQuotes = isdigit(Name[0]);
+  if (!NeedsQuotes) {
+    for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+      char C = Name[i];
+      if (!isalnum(C) && C != '-' && C != '.' && C != '_') {
+        NeedsQuotes = true;
+        break;
+      }
+    }
+  }
+
+  // If we didn't need any quotes, just write out the name in one blast.
+  if (!NeedsQuotes) {
+    OS << Name;
+    return;
+  }
+
+  // Okay, we need quotes.  Output the quotes and escape any scary characters as
+  // needed.
+  OS << '"';
+  PrintEscapedString(Name, OS);
+  OS << '"';
+}
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it).  Print it out.
+static void PrintLLVMName(raw_ostream &OS, const Value *V) {
+  PrintLLVMName(OS, V->getName(),
+                isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
+}
+
+//===----------------------------------------------------------------------===//
+// TypePrinting Class: Type printing machinery
+//===----------------------------------------------------------------------===//
+
+/// TypePrinting - Type printing machinery.
+namespace {
+class TypePrinting {
+  TypePrinting(const TypePrinting &);   // DO NOT IMPLEMENT
+  void operator=(const TypePrinting&);  // DO NOT IMPLEMENT
+public:
+
+  /// NamedTypes - The named types that are used by the current module.
+  std::vector<StructType*> NamedTypes;
+  
+  /// NumberedTypes - The numbered types, along with their value.
+  DenseMap<StructType*, unsigned> NumberedTypes;
+  
+
+  TypePrinting() {}
+  ~TypePrinting() {}
+  
+  void incorporateTypes(const Module &M);
+  
+  void print(Type *Ty, raw_ostream &OS);
+  
+  void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // end anonymous namespace.
+
+
+void TypePrinting::incorporateTypes(const Module &M) {
+  M.findUsedStructTypes(NamedTypes);
+  
+  // The list of struct types we got back includes all the struct types, split
+  // the unnamed ones out to a numbering and remove the anonymous structs.
+  unsigned NextNumber = 0;
+  
+  std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
+  for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
+    StructType *STy = *I;
+    
+    // Ignore anonymous types.
+    if (STy->isAnonymous())
+      continue;
+    
+    if (STy->getName().empty())
+      NumberedTypes[STy] = NextNumber++;
+    else
+      *NextToUse++ = STy;
+  }
+    
+  NamedTypes.erase(NextToUse, NamedTypes.end());
+}
+
+
+/// CalcTypeName - Write the specified type to the specified raw_ostream, making
+/// use of type names or up references to shorten the type name where possible.
+void TypePrinting::print(Type *Ty, raw_ostream &OS) {
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:      OS << "void"; break;
+  case Type::FloatTyID:     OS << "float"; break;
+  case Type::DoubleTyID:    OS << "double"; break;
+  case Type::X86_FP80TyID:  OS << "x86_fp80"; break;
+  case Type::FP128TyID:     OS << "fp128"; break;
+  case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
+  case Type::LabelTyID:     OS << "label"; break;
+  case Type::MetadataTyID:  OS << "metadata"; break;
+  case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
+  case Type::IntegerTyID:
+    OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
+    return;
+
+  case Type::FunctionTyID: {
+    FunctionType *FTy = cast<FunctionType>(Ty);
+    print(FTy->getReturnType(), OS);
+    OS << " (";
+    for (FunctionType::param_iterator I = FTy->param_begin(),
+         E = FTy->param_end(); I != E; ++I) {
+      if (I != FTy->param_begin())
+        OS << ", ";
+      print(*I, OS);
+    }
+    if (FTy->isVarArg()) {
+      if (FTy->getNumParams()) OS << ", ";
+      OS << "...";
+    }
+    OS << ')';
+    return;
+  }
+  case Type::StructTyID: {
+    StructType *STy = cast<StructType>(Ty);
+    
+    if (STy->isAnonymous())
+      return printStructBody(STy, OS);
+
+    if (!STy->getName().empty())
+      return PrintLLVMName(OS, STy->getName(), LocalPrefix);
+    
+    DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
+    if (I != NumberedTypes.end())
+      OS << '%' << I->second;
+    else  // Not enumerated, print the hex address.
+      OS << "%\"type 0x" << STy << '\"';
+    return;
+  }
+  case Type::PointerTyID: {
+    PointerType *PTy = cast<PointerType>(Ty);
+    print(PTy->getElementType(), OS);
+    if (unsigned AddressSpace = PTy->getAddressSpace())
+      OS << " addrspace(" << AddressSpace << ')';
+    OS << '*';
+    return;
+  }
+  case Type::ArrayTyID: {
+    ArrayType *ATy = cast<ArrayType>(Ty);
+    OS << '[' << ATy->getNumElements() << " x ";
+    print(ATy->getElementType(), OS);
+    OS << ']';
+    return;
+  }
+  case Type::VectorTyID: {
+    VectorType *PTy = cast<VectorType>(Ty);
+    OS << "<" << PTy->getNumElements() << " x ";
+    print(PTy->getElementType(), OS);
+    OS << '>';
+    return;
+  }
+  default:
+    OS << "<unrecognized-type>";
+    return;
+  }
+}
+
+void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
+  if (STy->isOpaque()) {
+    OS << "opaque";
+    return;
+  }
+  
+  if (STy->isPacked())
+    OS << '<';
+  
+  if (STy->getNumElements() == 0) {
+    OS << "{}";
+  } else {
+    StructType::element_iterator I = STy->element_begin();
+    OS << "{ ";
+    print(*I++, OS);
+    for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
+      OS << ", ";
+      print(*I, OS);
+    }
+  
+    OS << " }";
+  }
+  if (STy->isPacked())
+    OS << '>';
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SlotTracker Class: Enumerate slot numbers for unnamed values
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This class provides computation of slot numbers for LLVM Assembly writing.
+///
+class SlotTracker {
+public:
+  /// ValueMap - A mapping of Values to slot numbers.
+  typedef DenseMap<const Value*, unsigned> ValueMap;
+
+private:
+  /// TheModule - The module for which we are holding slot numbers.
+  const Module* TheModule;
+
+  /// TheFunction - The function for which we are holding slot numbers.
+  const Function* TheFunction;
+  bool FunctionProcessed;
+
+  /// mMap - The slot map for the module level data.
+  ValueMap mMap;
+  unsigned mNext;
+
+  /// fMap - The slot map for the function level data.
+  ValueMap fMap;
+  unsigned fNext;
+
+  /// mdnMap - Map for MDNodes.
+  DenseMap<const MDNode*, unsigned> mdnMap;
+  unsigned mdnNext;
+public:
+  /// Construct from a module
+  explicit SlotTracker(const Module *M);
+  /// Construct from a function, starting out in incorp state.
+  explicit SlotTracker(const Function *F);
+
+  /// Return the slot number of the specified value in it's type
+  /// plane.  If something is not in the SlotTracker, return -1.
+  int getLocalSlot(const Value *V);
+  int getGlobalSlot(const GlobalValue *V);
+  int getMetadataSlot(const MDNode *N);
+
+  /// If you'd like to deal with a function instead of just a module, use
+  /// this method to get its data into the SlotTracker.
+  void incorporateFunction(const Function *F) {
+    TheFunction = F;
+    FunctionProcessed = false;
+  }
+
+  /// After calling incorporateFunction, use this method to remove the
+  /// most recently incorporated function from the SlotTracker. This
+  /// will reset the state of the machine back to just the module contents.
+  void purgeFunction();
+
+  /// MDNode map iterators.
+  typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator;
+  mdn_iterator mdn_begin() { return mdnMap.begin(); }
+  mdn_iterator mdn_end() { return mdnMap.end(); }
+  unsigned mdn_size() const { return mdnMap.size(); }
+  bool mdn_empty() const { return mdnMap.empty(); }
+
+  /// This function does the actual initialization.
+  inline void initialize();
+
+  // Implementation Details
+private:
+  /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+  void CreateModuleSlot(const GlobalValue *V);
+
+  /// CreateMetadataSlot - Insert the specified MDNode* into the slot table.
+  void CreateMetadataSlot(const MDNode *N);
+
+  /// CreateFunctionSlot - Insert the specified Value* into the slot table.
+  void CreateFunctionSlot(const Value *V);
+
+  /// Add all of the module level global variables (and their initializers)
+  /// and function declarations, but not the contents of those functions.
+  void processModule();
+
+  /// Add all of the functions arguments, basic blocks, and instructions.
+  void processFunction();
+
+  SlotTracker(const SlotTracker &);  // DO NOT IMPLEMENT
+  void operator=(const SlotTracker &);  // DO NOT IMPLEMENT
+};
+
+}  // end anonymous namespace
+
+
+static SlotTracker *createSlotTracker(const Value *V) {
+  if (const Argument *FA = dyn_cast<Argument>(V))
+    return new SlotTracker(FA->getParent());
+
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return new SlotTracker(I->getParent()->getParent());
+
+  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return new SlotTracker(BB->getParent());
+
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return new SlotTracker(GV->getParent());
+
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return new SlotTracker(GA->getParent());
+
+  if (const Function *Func = dyn_cast<Function>(V))
+    return new SlotTracker(Func);
+
+  if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+    if (!MD->isFunctionLocal())
+      return new SlotTracker(MD->getFunction());
+
+    return new SlotTracker((Function *)0);
+  }
+
+  return 0;
+}
+
+#if 0
+#define ST_DEBUG(X) dbgs() << X
+#else
+#define ST_DEBUG(X)
+#endif
+
+// Module level constructor. Causes the contents of the Module (sans functions)
+// to be added to the slot table.
+SlotTracker::SlotTracker(const Module *M)
+  : TheModule(M), TheFunction(0), FunctionProcessed(false), 
+    mNext(0), fNext(0),  mdnNext(0) {
+}
+
+// Function level constructor. Causes the contents of the Module and the one
+// function provided to be added to the slot table.
+SlotTracker::SlotTracker(const Function *F)
+  : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
+    mNext(0), fNext(0), mdnNext(0) {
+}
+
+inline void SlotTracker::initialize() {
+  if (TheModule) {
+    processModule();
+    TheModule = 0; ///< Prevent re-processing next time we're called.
+  }
+
+  if (TheFunction && !FunctionProcessed)
+    processFunction();
+}
+
+// Iterate through all the global variables, functions, and global
+// variable initializers and create slots for them.
+void SlotTracker::processModule() {
+  ST_DEBUG("begin processModule!\n");
+
+  // Add all of the unnamed global variables to the value table.
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    if (!I->hasName())
+      CreateModuleSlot(I);
+  }
+
+  // Add metadata used by named metadata.
+  for (Module::const_named_metadata_iterator
+         I = TheModule->named_metadata_begin(),
+         E = TheModule->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      CreateMetadataSlot(NMD->getOperand(i));
+  }
+
+  // Add all the unnamed functions to the table.
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I)
+    if (!I->hasName())
+      CreateModuleSlot(I);
+
+  ST_DEBUG("end processModule!\n");
+}
+
+// Process the arguments, basic blocks, and instructions  of a function.
+void SlotTracker::processFunction() {
+  ST_DEBUG("begin processFunction!\n");
+  fNext = 0;
+
+  // Add all the function arguments with no names.
+  for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
+      AE = TheFunction->arg_end(); AI != AE; ++AI)
+    if (!AI->hasName())
+      CreateFunctionSlot(AI);
+
+  ST_DEBUG("Inserting Instructions:\n");
+
+  SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+  // Add all of the basic blocks and instructions with no names.
+  for (Function::const_iterator BB = TheFunction->begin(),
+       E = TheFunction->end(); BB != E; ++BB) {
+    if (!BB->hasName())
+      CreateFunctionSlot(BB);
+    
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
+         ++I) {
+      if (!I->getType()->isVoidTy() && !I->hasName())
+        CreateFunctionSlot(I);
+      
+      // Intrinsics can directly use metadata.  We allow direct calls to any
+      // llvm.foo function here, because the target may not be linked into the
+      // optimizer.
+      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (Function *F = CI->getCalledFunction())
+          if (F->getName().startswith("llvm."))
+            for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+              if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+                CreateMetadataSlot(N);
+      }
+
+      // Process metadata attached with this instruction.
+      I->getAllMetadata(MDForInst);
+      for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+        CreateMetadataSlot(MDForInst[i].second);
+      MDForInst.clear();
+    }
+  }
+
+  FunctionProcessed = true;
+
+  ST_DEBUG("end processFunction!\n");
+}
+
+/// Clean up after incorporating a function. This is the only way to get out of
+/// the function incorporation state that affects get*Slot/Create*Slot. Function
+/// incorporation state is indicated by TheFunction != 0.
+void SlotTracker::purgeFunction() {
+  ST_DEBUG("begin purgeFunction!\n");
+  fMap.clear(); // Simply discard the function level map
+  TheFunction = 0;
+  FunctionProcessed = false;
+  ST_DEBUG("end purgeFunction!\n");
+}
+
+/// getGlobalSlot - Get the slot number of a global value.
+int SlotTracker::getGlobalSlot(const GlobalValue *V) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the value in the module map
+  ValueMap::iterator MI = mMap.find(V);
+  return MI == mMap.end() ? -1 : (int)MI->second;
+}
+
+/// getMetadataSlot - Get the slot number of a MDNode.
+int SlotTracker::getMetadataSlot(const MDNode *N) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the MDNode in the module map
+  mdn_iterator MI = mdnMap.find(N);
+  return MI == mdnMap.end() ? -1 : (int)MI->second;
+}
+
+
+/// getLocalSlot - Get the slot number for a value that is local to a function.
+int SlotTracker::getLocalSlot(const Value *V) {
+  assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
+
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  ValueMap::iterator FI = fMap.find(V);
+  return FI == fMap.end() ? -1 : (int)FI->second;
+}
+
+
+/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
+  assert(V && "Can't insert a null Value into SlotTracker!");
+  assert(!V->getType()->isVoidTy() && "Doesn't need a slot!");
+  assert(!V->hasName() && "Doesn't need a slot!");
+
+  unsigned DestSlot = mNext++;
+  mMap[V] = DestSlot;
+
+  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+           DestSlot << " [");
+  // G = Global, F = Function, A = Alias, o = other
+  ST_DEBUG((isa<GlobalVariable>(V) ? 'G' :
+            (isa<Function>(V) ? 'F' :
+             (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
+}
+
+/// CreateSlot - Create a new slot for the specified value if it has no name.
+void SlotTracker::CreateFunctionSlot(const Value *V) {
+  assert(!V->getType()->isVoidTy() && !V->hasName() && "Doesn't need a slot!");
+
+  unsigned DestSlot = fNext++;
+  fMap[V] = DestSlot;
+
+  // G = Global, F = Function, o = other
+  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+           DestSlot << " [o]\n");
+}
+
+/// CreateModuleSlot - Insert the specified MDNode* into the slot table.
+void SlotTracker::CreateMetadataSlot(const MDNode *N) {
+  assert(N && "Can't insert a null Value into SlotTracker!");
+
+  // Don't insert if N is a function-local metadata, these are always printed
+  // inline.
+  if (!N->isFunctionLocal()) {
+    mdn_iterator I = mdnMap.find(N);
+    if (I != mdnMap.end())
+      return;
+
+    unsigned DestSlot = mdnNext++;
+    mdnMap[N] = DestSlot;
+  }
+
+  // Recursively add any MDNodes referenced by operands.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (const MDNode *Op = dyn_cast_or_null<MDNode>(N->getOperand(i)))
+      CreateMetadataSlot(Op);
+}
+
+//===----------------------------------------------------------------------===//
+// AsmWriter Implementation
+//===----------------------------------------------------------------------===//
+
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+                                   TypePrinting *TypePrinter,
+                                   SlotTracker *Machine,
+                                   const Module *Context);
+
+
+
+static const char *getPredicateText(unsigned predicate) {
+  const char * pred = "unknown";
+  switch (predicate) {
+  case FCmpInst::FCMP_FALSE: pred = "false"; break;
+  case FCmpInst::FCMP_OEQ:   pred = "oeq"; break;
+  case FCmpInst::FCMP_OGT:   pred = "ogt"; break;
+  case FCmpInst::FCMP_OGE:   pred = "oge"; break;
+  case FCmpInst::FCMP_OLT:   pred = "olt"; break;
+  case FCmpInst::FCMP_OLE:   pred = "ole"; break;
+  case FCmpInst::FCMP_ONE:   pred = "one"; break;
+  case FCmpInst::FCMP_ORD:   pred = "ord"; break;
+  case FCmpInst::FCMP_UNO:   pred = "uno"; break;
+  case FCmpInst::FCMP_UEQ:   pred = "ueq"; break;
+  case FCmpInst::FCMP_UGT:   pred = "ugt"; break;
+  case FCmpInst::FCMP_UGE:   pred = "uge"; break;
+  case FCmpInst::FCMP_ULT:   pred = "ult"; break;
+  case FCmpInst::FCMP_ULE:   pred = "ule"; break;
+  case FCmpInst::FCMP_UNE:   pred = "une"; break;
+  case FCmpInst::FCMP_TRUE:  pred = "true"; break;
+  case ICmpInst::ICMP_EQ:    pred = "eq"; break;
+  case ICmpInst::ICMP_NE:    pred = "ne"; break;
+  case ICmpInst::ICMP_SGT:   pred = "sgt"; break;
+  case ICmpInst::ICMP_SGE:   pred = "sge"; break;
+  case ICmpInst::ICMP_SLT:   pred = "slt"; break;
+  case ICmpInst::ICMP_SLE:   pred = "sle"; break;
+  case ICmpInst::ICMP_UGT:   pred = "ugt"; break;
+  case ICmpInst::ICMP_UGE:   pred = "uge"; break;
+  case ICmpInst::ICMP_ULT:   pred = "ult"; break;
+  case ICmpInst::ICMP_ULE:   pred = "ule"; break;
+  }
+  return pred;
+}
+
+
+static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(U)) {
+    if (OBO->hasNoUnsignedWrap())
+      Out << " nuw";
+    if (OBO->hasNoSignedWrap())
+      Out << " nsw";
+  } else if (const PossiblyExactOperator *Div =
+               dyn_cast<PossiblyExactOperator>(U)) {
+    if (Div->isExact())
+      Out << " exact";
+  } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+    if (GEP->isInBounds())
+      Out << " inbounds";
+  }
+}
+
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+                                  TypePrinting &TypePrinter,
+                                  SlotTracker *Machine,
+                                  const Module *Context) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    if (CI->getType()->isIntegerTy(1)) {
+      Out << (CI->getZExtValue() ? "true" : "false");
+      return;
+    }
+    Out << CI->getValue();
+    return;
+  }
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble ||
+        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
+      // We would like to output the FP constant value in exponential notation,
+      // but we cannot do this if doing so will lose precision.  Check here to
+      // make sure that we only output it in exponential format if we can parse
+      // the value back and get the same value.
+      //
+      bool ignored;
+      bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+      double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+                              CFP->getValueAPF().convertToFloat();
+      SmallString<128> StrVal;
+      raw_svector_ostream(StrVal) << Val;
+
+      // Check to make sure that the stringized number is not some string like
+      // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
+      // that the string matches the "[-+]?[0-9]" regex.
+      //
+      if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+          ((StrVal[0] == '-' || StrVal[0] == '+') &&
+           (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+        // Reparse stringized version!
+        if (atof(StrVal.c_str()) == Val) {
+          Out << StrVal.str();
+          return;
+        }
+      }
+      // Otherwise we could not reparse it to exactly the same value, so we must
+      // output the string in hexadecimal format!  Note that loading and storing
+      // floating point types changes the bits of NaNs on some hosts, notably
+      // x86, so we must not use these types.
+      assert(sizeof(double) == sizeof(uint64_t) &&
+             "assuming that double is 64 bits!");
+      char Buffer[40];
+      APFloat apf = CFP->getValueAPF();
+      // Floats are represented in ASCII IR as double, convert.
+      if (!isDouble)
+        apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                          &ignored);
+      Out << "0x" <<
+              utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
+                            Buffer+40);
+      return;
+    }
+
+    // Some form of long double.  These appear as a magic letter identifying
+    // the type, then a fixed number of hex digits.
+    Out << "0x";
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
+      Out << 'K';
+      // api needed to prevent premature destruction
+      APInt api = CFP->getValueAPF().bitcastToAPInt();
+      const uint64_t* p = api.getRawData();
+      uint64_t word = p[1];
+      int shiftcount=12;
+      int width = api.getBitWidth();
+      for (int j=0; j<width; j+=4, shiftcount-=4) {
+        unsigned int nibble = (word>>shiftcount) & 15;
+        if (nibble < 10)
+          Out << (unsigned char)(nibble + '0');
+        else
+          Out << (unsigned char)(nibble - 10 + 'A');
+        if (shiftcount == 0 && j+4 < width) {
+          word = *p;
+          shiftcount = 64;
+          if (width-j-4 < 64)
+            shiftcount = width-j-4;
+        }
+      }
+      return;
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad)
+      Out << 'L';
+    else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble)
+      Out << 'M';
+    else
+      llvm_unreachable("Unsupported floating point type");
+    // api needed to prevent premature destruction
+    APInt api = CFP->getValueAPF().bitcastToAPInt();
+    const uint64_t* p = api.getRawData();
+    uint64_t word = *p;
+    int shiftcount=60;
+    int width = api.getBitWidth();
+    for (int j=0; j<width; j+=4, shiftcount-=4) {
+      unsigned int nibble = (word>>shiftcount) & 15;
+      if (nibble < 10)
+        Out << (unsigned char)(nibble + '0');
+      else
+        Out << (unsigned char)(nibble - 10 + 'A');
+      if (shiftcount == 0 && j+4 < width) {
+        word = *(++p);
+        shiftcount = 64;
+        if (width-j-4 < 64)
+          shiftcount = width-j-4;
+      }
+    }
+    return;
+  }
+
+  if (isa<ConstantAggregateZero>(CV)) {
+    Out << "zeroinitializer";
+    return;
+  }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    Out << "blockaddress(";
+    WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
+                           Context);
+    Out << ", ";
+    WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
+                           Context);
+    Out << ")";
+    return;
+  }
+
+  if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+    // As a special case, print the array as a string if it is an array of
+    // i8 with ConstantInt values.
+    //
+    Type *ETy = CA->getType()->getElementType();
+    if (CA->isString()) {
+      Out << "c\"";
+      PrintEscapedString(CA->getAsString(), Out);
+      Out << '"';
+    } else {                // Cannot output in string format...
+      Out << '[';
+      if (CA->getNumOperands()) {
+        TypePrinter.print(ETy, Out);
+        Out << ' ';
+        WriteAsOperandInternal(Out, CA->getOperand(0),
+                               &TypePrinter, Machine,
+                               Context);
+        for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+          Out << ", ";
+          TypePrinter.print(ETy, Out);
+          Out << ' ';
+          WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+                                 Context);
+        }
+      }
+      Out << ']';
+    }
+    return;
+  }
+
+  if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+    if (CS->getType()->isPacked())
+      Out << '<';
+    Out << '{';
+    unsigned N = CS->getNumOperands();
+    if (N) {
+      Out << ' ';
+      TypePrinter.print(CS->getOperand(0)->getType(), Out);
+      Out << ' ';
+
+      WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
+                             Context);
+
+      for (unsigned i = 1; i < N; i++) {
+        Out << ", ";
+        TypePrinter.print(CS->getOperand(i)->getType(), Out);
+        Out << ' ';
+
+        WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
+                               Context);
+      }
+      Out << ' ';
+    }
+
+    Out << '}';
+    if (CS->getType()->isPacked())
+      Out << '>';
+    return;
+  }
+
+  if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    Type *ETy = CP->getType()->getElementType();
+    assert(CP->getNumOperands() > 0 &&
+           "Number of operands for a PackedConst must be > 0");
+    Out << '<';
+    TypePrinter.print(ETy, Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine,
+                           Context);
+    for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+      Out << ", ";
+      TypePrinter.print(ETy, Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine,
+                             Context);
+    }
+    Out << '>';
+    return;
+  }
+
+  if (isa<ConstantPointerNull>(CV)) {
+    Out << "null";
+    return;
+  }
+
+  if (isa<UndefValue>(CV)) {
+    Out << "undef";
+    return;
+  }
+
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    Out << CE->getOpcodeName();
+    WriteOptimizationInfo(Out, CE);
+    if (CE->isCompare())
+      Out << ' ' << getPredicateText(CE->getPredicate());
+    Out << " (";
+
+    for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
+      TypePrinter.print((*OI)->getType(), Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
+      if (OI+1 != CE->op_end())
+        Out << ", ";
+    }
+
+    if (CE->hasIndices()) {
+      ArrayRef<unsigned> Indices = CE->getIndices();
+      for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+        Out << ", " << Indices[i];
+    }
+
+    if (CE->isCast()) {
+      Out << " to ";
+      TypePrinter.print(CE->getType(), Out);
+    }
+
+    Out << ')';
+    return;
+  }
+
+  Out << "<placeholder or erroneous Constant>";
+}
+
+static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
+                                    TypePrinting *TypePrinter,
+                                    SlotTracker *Machine,
+                                    const Module *Context) {
+  Out << "!{";
+  for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
+    const Value *V = Node->getOperand(mi);
+    if (V == 0)
+      Out << "null";
+    else {
+      TypePrinter->print(V->getType(), Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, Node->getOperand(mi), 
+                             TypePrinter, Machine, Context);
+    }
+    if (mi + 1 != me)
+      Out << ", ";
+  }
+  
+  Out << "}";
+}
+
+
+/// WriteAsOperand - Write the name of the specified value out to the specified
+/// ostream.  This can be useful when you just want to print int %reg126, not
+/// the whole instruction that generated it.
+///
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+                                   TypePrinting *TypePrinter,
+                                   SlotTracker *Machine,
+                                   const Module *Context) {
+  if (V->hasName()) {
+    PrintLLVMName(Out, V);
+    return;
+  }
+
+  const Constant *CV = dyn_cast<Constant>(V);
+  if (CV && !isa<GlobalValue>(CV)) {
+    assert(TypePrinter && "Constants require TypePrinting!");
+    WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
+    return;
+  }
+
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    Out << "asm ";
+    if (IA->hasSideEffects())
+      Out << "sideeffect ";
+    if (IA->isAlignStack())
+      Out << "alignstack ";
+    Out << '"';
+    PrintEscapedString(IA->getAsmString(), Out);
+    Out << "\", \"";
+    PrintEscapedString(IA->getConstraintString(), Out);
+    Out << '"';
+    return;
+  }
+
+  if (const MDNode *N = dyn_cast<MDNode>(V)) {
+    if (N->isFunctionLocal()) {
+      // Print metadata inline, not via slot reference number.
+      WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
+      return;
+    }
+  
+    if (!Machine) {
+      if (N->isFunctionLocal())
+        Machine = new SlotTracker(N->getFunction());
+      else
+        Machine = new SlotTracker(Context);
+    }
+    int Slot = Machine->getMetadataSlot(N);
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
+    return;
+  }
+
+  if (const MDString *MDS = dyn_cast<MDString>(V)) {
+    Out << "!\"";
+    PrintEscapedString(MDS->getString(), Out);
+    Out << '"';
+    return;
+  }
+
+  if (V->getValueID() == Value::PseudoSourceValueVal ||
+      V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
+    V->print(Out);
+    return;
+  }
+
+  char Prefix = '%';
+  int Slot;
+  if (Machine) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+      Slot = Machine->getGlobalSlot(GV);
+      Prefix = '@';
+    } else {
+      Slot = Machine->getLocalSlot(V);
+    }
+  } else {
+    Machine = createSlotTracker(V);
+    if (Machine) {
+      if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+        Slot = Machine->getGlobalSlot(GV);
+        Prefix = '@';
+      } else {
+        Slot = Machine->getLocalSlot(V);
+      }
+      delete Machine;
+    } else {
+      Slot = -1;
+    }
+  }
+
+  if (Slot != -1)
+    Out << Prefix << Slot;
+  else
+    Out << "<badref>";
+}
+
+void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
+                          bool PrintType, const Module *Context) {
+
+  // Fast path: Don't construct and populate a TypePrinting object if we
+  // won't be needing any types printed.
+  if (!PrintType &&
+      ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
+       V->hasName() || isa<GlobalValue>(V))) {
+    WriteAsOperandInternal(Out, V, 0, 0, Context);
+    return;
+  }
+
+  if (Context == 0) Context = getModuleFromVal(V);
+
+  TypePrinting TypePrinter;
+  if (Context)
+    TypePrinter.incorporateTypes(*Context);
+  if (PrintType) {
+    TypePrinter.print(V->getType(), Out);
+    Out << ' ';
+  }
+
+  WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
+}
+
+namespace {
+
+class AssemblyWriter {
+  formatted_raw_ostream &Out;
+  SlotTracker &Machine;
+  const Module *TheModule;
+  TypePrinting TypePrinter;
+  AssemblyAnnotationWriter *AnnotationWriter;
+  
+public:
+  inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+                        const Module *M,
+                        AssemblyAnnotationWriter *AAW)
+    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
+    if (M)
+      TypePrinter.incorporateTypes(*M);
+  }
+
+  void printMDNodeBody(const MDNode *MD);
+  void printNamedMDNode(const NamedMDNode *NMD);
+  
+  void printModule(const Module *M);
+
+  void writeOperand(const Value *Op, bool PrintType);
+  void writeParamOperand(const Value *Operand, Attributes Attrs);
+
+  void writeAllMDNodes();
+
+  void printTypeIdentities();
+  void printGlobal(const GlobalVariable *GV);
+  void printAlias(const GlobalAlias *GV);
+  void printFunction(const Function *F);
+  void printArgument(const Argument *FA, Attributes Attrs);
+  void printBasicBlock(const BasicBlock *BB);
+  void printInstruction(const Instruction &I);
+
+private:
+  // printInfoComment - Print a little comment after the instruction indicating
+  // which slot it occupies.
+  void printInfoComment(const Value &V);
+};
+}  // end of anonymous namespace
+
+void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
+  if (Operand == 0) {
+    Out << "<null operand!>";
+    return;
+  }
+  if (PrintType) {
+    TypePrinter.print(Operand->getType(), Out);
+    Out << ' ';
+  }
+  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::writeParamOperand(const Value *Operand,
+                                       Attributes Attrs) {
+  if (Operand == 0) {
+    Out << "<null operand!>";
+    return;
+  }
+
+  // Print the type
+  TypePrinter.print(Operand->getType(), Out);
+  // Print parameter attributes list
+  if (Attrs != Attribute::None)
+    Out << ' ' << Attribute::getAsString(Attrs);
+  Out << ' ';
+  // Print the operand
+  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::printModule(const Module *M) {
+  if (!M->getModuleIdentifier().empty() &&
+      // Don't print the ID if it will start a new line (which would
+      // require a comment char before it).
+      M->getModuleIdentifier().find('\n') == std::string::npos)
+    Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+
+  if (!M->getDataLayout().empty())
+    Out << "target datalayout = \"" << M->getDataLayout() << "\"\n";
+  if (!M->getTargetTriple().empty())
+    Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
+
+  if (!M->getModuleInlineAsm().empty()) {
+    // Split the string into lines, to make it easier to read the .ll file.
+    std::string Asm = M->getModuleInlineAsm();
+    size_t CurPos = 0;
+    size_t NewLine = Asm.find_first_of('\n', CurPos);
+    Out << '\n';
+    while (NewLine != std::string::npos) {
+      // We found a newline, print the portion of the asm string from the
+      // last newline up to this newline.
+      Out << "module asm \"";
+      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+                         Out);
+      Out << "\"\n";
+      CurPos = NewLine+1;
+      NewLine = Asm.find_first_of('\n', CurPos);
+    }
+    std::string rest(Asm.begin()+CurPos, Asm.end());
+    if (!rest.empty()) {
+      Out << "module asm \"";
+      PrintEscapedString(rest, Out);
+      Out << "\"\n";
+    }
+  }
+
+  // Loop over the dependent libraries and emit them.
+  Module::lib_iterator LI = M->lib_begin();
+  Module::lib_iterator LE = M->lib_end();
+  if (LI != LE) {
+    Out << '\n';
+    Out << "deplibs = [ ";
+    while (LI != LE) {
+      Out << '"' << *LI << '"';
+      ++LI;
+      if (LI != LE)
+        Out << ", ";
+    }
+    Out << " ]";
+  }
+
+  printTypeIdentities();
+
+  // Output all globals.
+  if (!M->global_empty()) Out << '\n';
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    printGlobal(I);
+
+  // Output all aliases.
+  if (!M->alias_empty()) Out << "\n";
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    printAlias(I);
+
+  // Output all of the functions.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+    printFunction(I);
+
+  // Output named metadata.
+  if (!M->named_metadata_empty()) Out << '\n';
+  
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+       E = M->named_metadata_end(); I != E; ++I)
+    printNamedMDNode(I);
+
+  // Output metadata.
+  if (!Machine.mdn_empty()) {
+    Out << '\n';
+    writeAllMDNodes();
+  }
+}
+
+void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
+  Out << '!';
+  StringRef Name = NMD->getName();
+  if (Name.empty()) {
+    Out << "<empty name> ";
+  } else {
+    if (isalpha(Name[0]) || Name[0] == '-' || Name[0] == '$' ||
+        Name[0] == '.' || Name[0] == '_')
+      Out << Name[0];
+    else
+      Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
+    for (unsigned i = 1, e = Name.size(); i != e; ++i) {
+      unsigned char C = Name[i];
+      if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_')
+        Out << C;
+      else
+        Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+    }
+  }
+  Out << " = !{";
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    if (i) Out << ", ";
+    int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
+  }
+  Out << "}\n";
+}
+
+
+static void PrintLinkage(GlobalValue::LinkageTypes LT,
+                         formatted_raw_ostream &Out) {
+  switch (LT) {
+  case GlobalValue::ExternalLinkage: break;
+  case GlobalValue::PrivateLinkage:       Out << "private ";        break;
+  case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    Out << "linker_private_weak ";
+    break;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    Out << "linker_private_weak_def_auto ";
+    break;
+  case GlobalValue::InternalLinkage:      Out << "internal ";       break;
+  case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
+  case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
+  case GlobalValue::WeakAnyLinkage:       Out << "weak ";           break;
+  case GlobalValue::WeakODRLinkage:       Out << "weak_odr ";       break;
+  case GlobalValue::CommonLinkage:        Out << "common ";         break;
+  case GlobalValue::AppendingLinkage:     Out << "appending ";      break;
+  case GlobalValue::DLLImportLinkage:     Out << "dllimport ";      break;
+  case GlobalValue::DLLExportLinkage:     Out << "dllexport ";      break;
+  case GlobalValue::ExternalWeakLinkage:  Out << "extern_weak ";    break;
+  case GlobalValue::AvailableExternallyLinkage:
+    Out << "available_externally ";
+    break;
+  }
+}
+
+
+static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
+                            formatted_raw_ostream &Out) {
+  switch (Vis) {
+  case GlobalValue::DefaultVisibility: break;
+  case GlobalValue::HiddenVisibility:    Out << "hidden "; break;
+  case GlobalValue::ProtectedVisibility: Out << "protected "; break;
+  }
+}
+
+void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+  if (GV->isMaterializable())
+    Out << "; Materializable\n";
+
+  WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
+  Out << " = ";
+
+  if (!GV->hasInitializer() && GV->hasExternalLinkage())
+    Out << "external ";
+
+  PrintLinkage(GV->getLinkage(), Out);
+  PrintVisibility(GV->getVisibility(), Out);
+
+  if (GV->isThreadLocal()) Out << "thread_local ";
+  if (unsigned AddressSpace = GV->getType()->getAddressSpace())
+    Out << "addrspace(" << AddressSpace << ") ";
+  if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
+  Out << (GV->isConstant() ? "constant " : "global ");
+  TypePrinter.print(GV->getType()->getElementType(), Out);
+
+  if (GV->hasInitializer()) {
+    Out << ' ';
+    writeOperand(GV->getInitializer(), false);
+  }
+
+  if (GV->hasSection()) {
+    Out << ", section \"";
+    PrintEscapedString(GV->getSection(), Out);
+    Out << '"';
+  }
+  if (GV->getAlignment())
+    Out << ", align " << GV->getAlignment();
+
+  printInfoComment(*GV);
+  Out << '\n';
+}
+
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+  if (GA->isMaterializable())
+    Out << "; Materializable\n";
+
+  // Don't crash when dumping partially built GA
+  if (!GA->hasName())
+    Out << "<<nameless>> = ";
+  else {
+    PrintLLVMName(Out, GA);
+    Out << " = ";
+  }
+  PrintVisibility(GA->getVisibility(), Out);
+
+  Out << "alias ";
+
+  PrintLinkage(GA->getLinkage(), Out);
+
+  const Constant *Aliasee = GA->getAliasee();
+
+  if (Aliasee == 0) {
+    TypePrinter.print(GA->getType(), Out);
+    Out << " <<NULL ALIASEE>>";
+  } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
+    TypePrinter.print(GV->getType(), Out);
+    Out << ' ';
+    PrintLLVMName(Out, GV);
+  } else if (const Function *F = dyn_cast<Function>(Aliasee)) {
+    TypePrinter.print(F->getFunctionType(), Out);
+    Out << "* ";
+
+    WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+  } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
+    TypePrinter.print(GA->getType(), Out);
+    Out << ' ';
+    PrintLLVMName(Out, GA);
+  } else {
+    const ConstantExpr *CE = cast<ConstantExpr>(Aliasee);
+    // The only valid GEP is an all zero GEP.
+    assert((CE->getOpcode() == Instruction::BitCast ||
+            CE->getOpcode() == Instruction::GetElementPtr) &&
+           "Unsupported aliasee");
+    writeOperand(CE, false);
+  }
+
+  printInfoComment(*GA);
+  Out << '\n';
+}
+
+void AssemblyWriter::printTypeIdentities() {
+  if (TypePrinter.NumberedTypes.empty() &&
+      TypePrinter.NamedTypes.empty())
+    return;
+  
+  Out << '\n';
+  
+  // We know all the numbers that each type is used and we know that it is a
+  // dense assignment.  Convert the map to an index table.
+  std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
+  for (DenseMap<StructType*, unsigned>::iterator I = 
+       TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
+       I != E; ++I) {
+    assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
+    NumberedTypes[I->second] = I->first;
+  }
+           
+  // Emit all numbered types.
+  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
+    Out << '%' << i << " = type ";
+    
+    // Make sure we print out at least one level of the type structure, so
+    // that we do not get %2 = type %2
+    TypePrinter.printStructBody(NumberedTypes[i], Out);
+    Out << '\n';
+  }
+  
+  for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
+    PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
+    Out << " = type ";
+
+    // Make sure we print out at least one level of the type structure, so
+    // that we do not get %FILE = type %FILE
+    TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out);
+    Out << '\n';
+  }
+}
+
+/// printFunction - Print all aspects of a function.
+///
+void AssemblyWriter::printFunction(const Function *F) {
+  // Print out the return type and name.
+  Out << '\n';
+
+  if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
+
+  if (F->isMaterializable())
+    Out << "; Materializable\n";
+
+  if (F->isDeclaration())
+    Out << "declare ";
+  else
+    Out << "define ";
+
+  PrintLinkage(F->getLinkage(), Out);
+  PrintVisibility(F->getVisibility(), Out);
+
+  // Print the calling convention.
+  switch (F->getCallingConv()) {
+  case CallingConv::C: break;   // default
+  case CallingConv::Fast:         Out << "fastcc "; break;
+  case CallingConv::Cold:         Out << "coldcc "; break;
+  case CallingConv::X86_StdCall:  Out << "x86_stdcallcc "; break;
+  case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+  case CallingConv::X86_ThisCall: Out << "x86_thiscallcc "; break;
+  case CallingConv::ARM_APCS:     Out << "arm_apcscc "; break;
+  case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
+  case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
+  case CallingConv::MSP430_INTR:  Out << "msp430_intrcc "; break;
+  case CallingConv::PTX_Kernel:   Out << "ptx_kernel "; break;
+  case CallingConv::PTX_Device:   Out << "ptx_device "; break;
+  default: Out << "cc" << F->getCallingConv() << " "; break;
+  }
+
+  const FunctionType *FT = F->getFunctionType();
+  const AttrListPtr &Attrs = F->getAttributes();
+  Attributes RetAttrs = Attrs.getRetAttributes();
+  if (RetAttrs != Attribute::None)
+    Out <<  Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
+  TypePrinter.print(F->getReturnType(), Out);
+  Out << ' ';
+  WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+  Out << '(';
+  Machine.incorporateFunction(F);
+
+  // Loop over the arguments, printing them...
+
+  unsigned Idx = 1;
+  if (!F->isDeclaration()) {
+    // If this isn't a declaration, print the argument names as well.
+    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I) {
+      // Insert commas as we go... the first arg doesn't get a comma
+      if (I != F->arg_begin()) Out << ", ";
+      printArgument(I, Attrs.getParamAttributes(Idx));
+      Idx++;
+    }
+  } else {
+    // Otherwise, print the types from the function type.
+    for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+      // Insert commas as we go... the first arg doesn't get a comma
+      if (i) Out << ", ";
+
+      // Output type...
+      TypePrinter.print(FT->getParamType(i), Out);
+
+      Attributes ArgAttrs = Attrs.getParamAttributes(i+1);
+      if (ArgAttrs != Attribute::None)
+        Out << ' ' << Attribute::getAsString(ArgAttrs);
+    }
+  }
+
+  // Finish printing arguments...
+  if (FT->isVarArg()) {
+    if (FT->getNumParams()) Out << ", ";
+    Out << "...";  // Output varargs portion of signature!
+  }
+  Out << ')';
+  if (F->hasUnnamedAddr())
+    Out << " unnamed_addr";
+  Attributes FnAttrs = Attrs.getFnAttributes();
+  if (FnAttrs != Attribute::None)
+    Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
+  if (F->hasSection()) {
+    Out << " section \"";
+    PrintEscapedString(F->getSection(), Out);
+    Out << '"';
+  }
+  if (F->getAlignment())
+    Out << " align " << F->getAlignment();
+  if (F->hasGC())
+    Out << " gc \"" << F->getGC() << '"';
+  if (F->isDeclaration()) {
+    Out << '\n';
+  } else {
+    Out << " {";
+    // Output all of the function's basic blocks.
+    for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
+      printBasicBlock(I);
+
+    Out << "}\n";
+  }
+
+  Machine.purgeFunction();
+}
+
+/// printArgument - This member is called for every argument that is passed into
+/// the function.  Simply print it out
+///
+void AssemblyWriter::printArgument(const Argument *Arg,
+                                   Attributes Attrs) {
+  // Output type...
+  TypePrinter.print(Arg->getType(), Out);
+
+  // Output parameter attributes list
+  if (Attrs != Attribute::None)
+    Out << ' ' << Attribute::getAsString(Attrs);
+
+  // Output name, if available...
+  if (Arg->hasName()) {
+    Out << ' ';
+    PrintLLVMName(Out, Arg);
+  }
+}
+
+/// printBasicBlock - This member is called for each basic block in a method.
+///
+void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+  if (BB->hasName()) {              // Print out the label if it exists...
+    Out << "\n";
+    PrintLLVMName(Out, BB->getName(), LabelPrefix);
+    Out << ':';
+  } else if (!BB->use_empty()) {      // Don't print block # of no uses...
+    Out << "\n; <label>:";
+    int Slot = Machine.getLocalSlot(BB);
+    if (Slot != -1)
+      Out << Slot;
+    else
+      Out << "<badref>";
+  }
+
+  if (BB->getParent() == 0) {
+    Out.PadToColumn(50);
+    Out << "; Error: Block without parent!";
+  } else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
+    // Output predecessors for the block.
+    Out.PadToColumn(50);
+    Out << ";";
+    const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+    if (PI == PE) {
+      Out << " No predecessors!";
+    } else {
+      Out << " preds = ";
+      writeOperand(*PI, false);
+      for (++PI; PI != PE; ++PI) {
+        Out << ", ";
+        writeOperand(*PI, false);
+      }
+    }
+  }
+
+  Out << "\n";
+
+  if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
+
+  // Output all of the instructions in the basic block...
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    printInstruction(*I);
+    Out << '\n';
+  }
+
+  if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
+}
+
+/// printInfoComment - Print a little comment after the instruction indicating
+/// which slot it occupies.
+///
+void AssemblyWriter::printInfoComment(const Value &V) {
+  if (AnnotationWriter) {
+    AnnotationWriter->printInfoComment(V, Out);
+    return;
+  }
+}
+
+// This member is called for each Instruction in a function..
+void AssemblyWriter::printInstruction(const Instruction &I) {
+  if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
+
+  // Print out indentation for an instruction.
+  Out << "  ";
+
+  // Print out name if it exists...
+  if (I.hasName()) {
+    PrintLLVMName(Out, &I);
+    Out << " = ";
+  } else if (!I.getType()->isVoidTy()) {
+    // Print out the def slot taken.
+    int SlotNum = Machine.getLocalSlot(&I);
+    if (SlotNum == -1)
+      Out << "<badref> = ";
+    else
+      Out << '%' << SlotNum << " = ";
+  }
+
+  // If this is a volatile load or store, print out the volatile marker.
+  if ((isa<LoadInst>(I)  && cast<LoadInst>(I).isVolatile()) ||
+      (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())) {
+      Out << "volatile ";
+  } else if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall()) {
+    // If this is a call, check if it's a tail call.
+    Out << "tail ";
+  }
+
+  // Print out the opcode...
+  Out << I.getOpcodeName();
+
+  // Print out optimization information.
+  WriteOptimizationInfo(Out, &I);
+
+  // Print out the compare instruction predicates
+  if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
+    Out << ' ' << getPredicateText(CI->getPredicate());
+
+  // Print out the type of the operands...
+  const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
+
+  // Special case conditional branches to swizzle the condition out to the front
+  if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
+    BranchInst &BI(cast<BranchInst>(I));
+    Out << ' ';
+    writeOperand(BI.getCondition(), true);
+    Out << ", ";
+    writeOperand(BI.getSuccessor(0), true);
+    Out << ", ";
+    writeOperand(BI.getSuccessor(1), true);
+
+  } else if (isa<SwitchInst>(I)) {
+    // Special case switch instruction to get formatting nice and correct.
+    Out << ' ';
+    writeOperand(Operand        , true);
+    Out << ", ";
+    writeOperand(I.getOperand(1), true);
+    Out << " [";
+
+    for (unsigned op = 2, Eop = I.getNumOperands(); op < Eop; op += 2) {
+      Out << "\n    ";
+      writeOperand(I.getOperand(op  ), true);
+      Out << ", ";
+      writeOperand(I.getOperand(op+1), true);
+    }
+    Out << "\n  ]";
+  } else if (isa<IndirectBrInst>(I)) {
+    // Special case indirectbr instruction to get formatting nice and correct.
+    Out << ' ';
+    writeOperand(Operand, true);
+    Out << ", [";
+    
+    for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+      if (i != 1)
+        Out << ", ";
+      writeOperand(I.getOperand(i), true);
+    }
+    Out << ']';
+  } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
+    Out << ' ';
+    TypePrinter.print(I.getType(), Out);
+    Out << ' ';
+
+    for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
+      if (op) Out << ", ";
+      Out << "[ ";
+      writeOperand(PN->getIncomingValue(op), false); Out << ", ";
+      writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
+    }
+  } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
+    Out << ' ';
+    writeOperand(I.getOperand(0), true);
+    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+      Out << ", " << *i;
+  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&I)) {
+    Out << ' ';
+    writeOperand(I.getOperand(0), true); Out << ", ";
+    writeOperand(I.getOperand(1), true);
+    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+      Out << ", " << *i;
+  } else if (isa<ReturnInst>(I) && !Operand) {
+    Out << " void";
+  } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+    // Print the calling convention being used.
+    switch (CI->getCallingConv()) {
+    case CallingConv::C: break;   // default
+    case CallingConv::Fast:  Out << " fastcc"; break;
+    case CallingConv::Cold:  Out << " coldcc"; break;
+    case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
+    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+    case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
+    default: Out << " cc" << CI->getCallingConv(); break;
+    }
+
+    Operand = CI->getCalledValue();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
+    const AttrListPtr &PAL = CI->getAttributes();
+
+    if (PAL.getRetAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+    // If possible, print out the short form of the call instruction.  We can
+    // only do this if the first argument is a pointer to a nonvararg function,
+    // and if the return type is not a pointer to a function.
+    //
+    Out << ' ';
+    if (!FTy->isVarArg() &&
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+      TypePrinter.print(RetTy, Out);
+      Out << ' ';
+      writeOperand(Operand, false);
+    } else {
+      writeOperand(Operand, true);
+    }
+    Out << '(';
+    for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+      if (op > 0)
+        Out << ", ";
+      writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1));
+    }
+    Out << ')';
+    if (PAL.getFnAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+    Operand = II->getCalledValue();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
+    const AttrListPtr &PAL = II->getAttributes();
+
+    // Print the calling convention being used.
+    switch (II->getCallingConv()) {
+    case CallingConv::C: break;   // default
+    case CallingConv::Fast:  Out << " fastcc"; break;
+    case CallingConv::Cold:  Out << " coldcc"; break;
+    case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
+    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+    case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
+    default: Out << " cc" << II->getCallingConv(); break;
+    }
+
+    if (PAL.getRetAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+    // If possible, print out the short form of the invoke instruction. We can
+    // only do this if the first argument is a pointer to a nonvararg function,
+    // and if the return type is not a pointer to a function.
+    //
+    Out << ' ';
+    if (!FTy->isVarArg() &&
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+      TypePrinter.print(RetTy, Out);
+      Out << ' ';
+      writeOperand(Operand, false);
+    } else {
+      writeOperand(Operand, true);
+    }
+    Out << '(';
+    for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
+      if (op)
+        Out << ", ";
+      writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1));
+    }
+
+    Out << ')';
+    if (PAL.getFnAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+
+    Out << "\n          to ";
+    writeOperand(II->getNormalDest(), true);
+    Out << " unwind ";
+    writeOperand(II->getUnwindDest(), true);
+
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+    Out << ' ';
+    TypePrinter.print(AI->getType()->getElementType(), Out);
+    if (!AI->getArraySize() || AI->isArrayAllocation()) {
+      Out << ", ";
+      writeOperand(AI->getArraySize(), true);
+    }
+    if (AI->getAlignment()) {
+      Out << ", align " << AI->getAlignment();
+    }
+  } else if (isa<CastInst>(I)) {
+    if (Operand) {
+      Out << ' ';
+      writeOperand(Operand, true);   // Work with broken code
+    }
+    Out << " to ";
+    TypePrinter.print(I.getType(), Out);
+  } else if (isa<VAArgInst>(I)) {
+    if (Operand) {
+      Out << ' ';
+      writeOperand(Operand, true);   // Work with broken code
+    }
+    Out << ", ";
+    TypePrinter.print(I.getType(), Out);
+  } else if (Operand) {   // Print the normal way.
+
+    // PrintAllTypes - Instructions who have operands of all the same type
+    // omit the type from all but the first operand.  If the instruction has
+    // different type operands (for example br), then they are all printed.
+    bool PrintAllTypes = false;
+    Type *TheType = Operand->getType();
+
+    // Select, Store and ShuffleVector always print all types.
+    if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
+        || isa<ReturnInst>(I)) {
+      PrintAllTypes = true;
+    } else {
+      for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
+        Operand = I.getOperand(i);
+        // note that Operand shouldn't be null, but the test helps make dump()
+        // more tolerant of malformed IR
+        if (Operand && Operand->getType() != TheType) {
+          PrintAllTypes = true;    // We have differing types!  Print them all!
+          break;
+        }
+      }
+    }
+
+    if (!PrintAllTypes) {
+      Out << ' ';
+      TypePrinter.print(TheType, Out);
+    }
+
+    Out << ' ';
+    for (unsigned i = 0, E = I.getNumOperands(); i != E; ++i) {
+      if (i) Out << ", ";
+      writeOperand(I.getOperand(i), PrintAllTypes);
+    }
+  }
+
+  // Print post operand alignment for load/store.
+  if (isa<LoadInst>(I) && cast<LoadInst>(I).getAlignment()) {
+    Out << ", align " << cast<LoadInst>(I).getAlignment();
+  } else if (isa<StoreInst>(I) && cast<StoreInst>(I).getAlignment()) {
+    Out << ", align " << cast<StoreInst>(I).getAlignment();
+  }
+
+  // Print Metadata info.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+  I.getAllMetadata(InstMD);
+  if (!InstMD.empty()) {
+    SmallVector<StringRef, 8> MDNames;
+    I.getType()->getContext().getMDKindNames(MDNames);
+    for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+      unsigned Kind = InstMD[i].first;
+       if (Kind < MDNames.size()) {
+         Out << ", !" << MDNames[Kind];
+      } else {
+        Out << ", !<unknown kind #" << Kind << ">";
+      }
+      Out << ' ';
+      WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
+                             TheModule);
+    }
+  }
+  printInfoComment(I);
+}
+
+static void WriteMDNodeComment(const MDNode *Node,
+                               formatted_raw_ostream &Out) {
+  if (Node->getNumOperands() < 1)
+    return;
+  ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0));
+  if (!CI) return;
+  APInt Val = CI->getValue();
+  APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
+  if (Val.ult(LLVMDebugVersion))
+    return;
+  
+  Out.PadToColumn(50);
+  if (Tag == dwarf::DW_TAG_user_base)
+    Out << "; [ DW_TAG_user_base ]";
+  else if (Tag.isIntN(32)) {
+    if (const char *TagName = dwarf::TagString(Tag.getZExtValue()))
+      Out << "; [ " << TagName << " ]";
+  }
+}
+
+void AssemblyWriter::writeAllMDNodes() {
+  SmallVector<const MDNode *, 16> Nodes;
+  Nodes.resize(Machine.mdn_size());
+  for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end();
+       I != E; ++I)
+    Nodes[I->second] = cast<MDNode>(I->first);
+  
+  for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+    Out << '!' << i << " = metadata ";
+    printMDNodeBody(Nodes[i]);
+  }
+}
+
+void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
+  WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
+  WriteMDNodeComment(Node, Out);
+  Out << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+//                       External Interface declarations
+//===----------------------------------------------------------------------===//
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(this);
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, this, AAW);
+  W.printModule(this);
+}
+
+void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(getParent());
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+  W.printNamedMDNode(this);
+}
+
+void Type::print(raw_ostream &OS) const {
+  if (this == 0) {
+    OS << "<null Type>";
+    return;
+  }
+  TypePrinting TP;
+  TP.print(const_cast<Type*>(this), OS);
+  
+  // If the type is a named struct type, print the body as well.
+  if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
+    if (!STy->isAnonymous()) {
+      OS << " = type ";
+      TP.printStructBody(STy, OS);
+    }
+}
+
+void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  if (this == 0) {
+    ROS << "printing a <null> value\n";
+    return;
+  }
+  formatted_raw_ostream OS(ROS);
+  if (const Instruction *I = dyn_cast<Instruction>(this)) {
+    const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
+    SlotTracker SlotTable(F);
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW);
+    W.printInstruction(*I);
+  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
+    SlotTracker SlotTable(BB->getParent());
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW);
+    W.printBasicBlock(BB);
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+    SlotTracker SlotTable(GV->getParent());
+    AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
+    if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
+      W.printGlobal(V);
+    else if (const Function *F = dyn_cast<Function>(GV))
+      W.printFunction(F);
+    else
+      W.printAlias(cast<GlobalAlias>(GV));
+  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+    const Function *F = N->getFunction();
+    SlotTracker SlotTable(F);
+    AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
+    W.printMDNodeBody(N);
+  } else if (const Constant *C = dyn_cast<Constant>(this)) {
+    TypePrinting TypePrinter;
+    TypePrinter.print(C->getType(), OS);
+    OS << ' ';
+    WriteConstantInternal(OS, C, TypePrinter, 0, 0);
+  } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
+             isa<Argument>(this)) {
+    WriteAsOperand(OS, this, true, 0);
+  } else {
+    // Otherwise we don't know what it is. Call the virtual function to
+    // allow a subclass to print itself.
+    printCustom(OS);
+  }
+}
+
+// Value::printCustom - subclasses should override this to implement printing.
+void Value::printCustom(raw_ostream &OS) const {
+  llvm_unreachable("Unknown value to print out!");
+}
+
+// Value::dump - allow easy printing of Values from the debugger.
+void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
+
+// Type::dump - allow easy printing of Types from the debugger.
+void Type::dump() const { print(dbgs()); }
+
+// Module::dump() - Allow printing of Modules from the debugger.
+void Module::dump() const { print(dbgs(), 0); }
diff --git a/contrib/llvm/lib/VMCore/Attributes.cpp b/contrib/llvm/lib/VMCore/Attributes.cpp
new file mode 100644
index 000000000000..bf6efa1645a2
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Attributes.cpp
@@ -0,0 +1,353 @@
+//===-- Attributes.cpp - Implement AttributesList -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AttributesList class and Attribute utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Attribute Function Definitions
+//===----------------------------------------------------------------------===//
+
+std::string Attribute::getAsString(Attributes Attrs) {
+  std::string Result;
+  if (Attrs & Attribute::ZExt)
+    Result += "zeroext ";
+  if (Attrs & Attribute::SExt)
+    Result += "signext ";
+  if (Attrs & Attribute::NoReturn)
+    Result += "noreturn ";
+  if (Attrs & Attribute::NoUnwind)
+    Result += "nounwind ";
+  if (Attrs & Attribute::UWTable)
+    Result += "uwtable ";
+  if (Attrs & Attribute::InReg)
+    Result += "inreg ";
+  if (Attrs & Attribute::NoAlias)
+    Result += "noalias ";
+  if (Attrs & Attribute::NoCapture)
+    Result += "nocapture ";
+  if (Attrs & Attribute::StructRet)
+    Result += "sret ";
+  if (Attrs & Attribute::ByVal)
+    Result += "byval ";
+  if (Attrs & Attribute::Nest)
+    Result += "nest ";
+  if (Attrs & Attribute::ReadNone)
+    Result += "readnone ";
+  if (Attrs & Attribute::ReadOnly)
+    Result += "readonly ";
+  if (Attrs & Attribute::OptimizeForSize)
+    Result += "optsize ";
+  if (Attrs & Attribute::NoInline)
+    Result += "noinline ";
+  if (Attrs & Attribute::InlineHint)
+    Result += "inlinehint ";
+  if (Attrs & Attribute::AlwaysInline)
+    Result += "alwaysinline ";
+  if (Attrs & Attribute::StackProtect)
+    Result += "ssp ";
+  if (Attrs & Attribute::StackProtectReq)
+    Result += "sspreq ";
+  if (Attrs & Attribute::NoRedZone)
+    Result += "noredzone ";
+  if (Attrs & Attribute::NoImplicitFloat)
+    Result += "noimplicitfloat ";
+  if (Attrs & Attribute::Naked)
+    Result += "naked ";
+  if (Attrs & Attribute::Hotpatch)
+    Result += "hotpatch ";
+  if (Attrs & Attribute::NonLazyBind)
+    Result += "nonlazybind ";
+  if (Attrs & Attribute::StackAlignment) {
+    Result += "alignstack(";
+    Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
+    Result += ") ";
+  }
+  if (Attrs & Attribute::Alignment) {
+    Result += "align ";
+    Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
+    Result += " ";
+  }
+  // Trim the trailing space.
+  assert(!Result.empty() && "Unknown attribute!");
+  Result.erase(Result.end()-1);
+  return Result;
+}
+
+Attributes Attribute::typeIncompatible(const Type *Ty) {
+  Attributes Incompatible = None;
+  
+  if (!Ty->isIntegerTy())
+    // Attributes that only apply to integers.
+    Incompatible |= SExt | ZExt;
+  
+  if (!Ty->isPointerTy())
+    // Attributes that only apply to pointers.
+    Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
+  
+  return Incompatible;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeListImpl Definition
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+  class AttributeListImpl;
+}
+
+static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
+
+namespace llvm {
+static ManagedStatic<sys::SmartMutex<true> > ALMutex;
+
+class AttributeListImpl : public FoldingSetNode {
+  sys::cas_flag RefCount;
+  
+  // AttributesList is uniqued, these should not be publicly available.
+  void operator=(const AttributeListImpl &); // Do not implement
+  AttributeListImpl(const AttributeListImpl &); // Do not implement
+  ~AttributeListImpl();                        // Private implementation
+public:
+  SmallVector<AttributeWithIndex, 4> Attrs;
+  
+  AttributeListImpl(const AttributeWithIndex *Attr, unsigned NumAttrs)
+    : Attrs(Attr, Attr+NumAttrs) {
+    RefCount = 0;
+  }
+  
+  void AddRef() {
+    sys::SmartScopedLock<true> Lock(*ALMutex);
+    ++RefCount;
+  }
+  void DropRef() {
+    sys::SmartScopedLock<true> Lock(*ALMutex);
+    if (!AttributesLists.isConstructed())
+      return;
+    sys::cas_flag new_val = --RefCount;
+    if (new_val == 0)
+      delete this;
+  }
+  
+  void Profile(FoldingSetNodeID &ID) const {
+    Profile(ID, Attrs.data(), Attrs.size());
+  }
+  static void Profile(FoldingSetNodeID &ID, const AttributeWithIndex *Attr,
+                      unsigned NumAttrs) {
+    for (unsigned i = 0; i != NumAttrs; ++i)
+      ID.AddInteger(uint64_t(Attr[i].Attrs) << 32 | unsigned(Attr[i].Index));
+  }
+};
+}
+
+AttributeListImpl::~AttributeListImpl() {
+  // NOTE: Lock must be acquired by caller.
+  AttributesLists->RemoveNode(this);
+}
+
+
+AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs) {
+  // If there are no attributes then return a null AttributesList pointer.
+  if (NumAttrs == 0)
+    return AttrListPtr();
+  
+#ifndef NDEBUG
+  for (unsigned i = 0; i != NumAttrs; ++i) {
+    assert(Attrs[i].Attrs != Attribute::None && 
+           "Pointless attribute!");
+    assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
+           "Misordered AttributesList!");
+  }
+#endif
+  
+  // Otherwise, build a key to look up the existing attributes.
+  FoldingSetNodeID ID;
+  AttributeListImpl::Profile(ID, Attrs, NumAttrs);
+  void *InsertPos;
+  
+  sys::SmartScopedLock<true> Lock(*ALMutex);
+  
+  AttributeListImpl *PAL =
+    AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
+  
+  // If we didn't find any existing attributes of the same shape then
+  // create a new one and insert it.
+  if (!PAL) {
+    PAL = new AttributeListImpl(Attrs, NumAttrs);
+    AttributesLists->InsertNode(PAL, InsertPos);
+  }
+  
+  // Return the AttributesList that we found or created.
+  return AttrListPtr(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AttrListPtr Method Implementations
+//===----------------------------------------------------------------------===//
+
+AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {
+  if (LI) LI->AddRef();
+}
+
+AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
+  if (AttrList) AttrList->AddRef();  
+}
+
+const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
+  sys::SmartScopedLock<true> Lock(*ALMutex);
+  if (AttrList == RHS.AttrList) return *this;
+  if (AttrList) AttrList->DropRef();
+  AttrList = RHS.AttrList;
+  if (AttrList) AttrList->AddRef();
+  return *this;
+}
+
+AttrListPtr::~AttrListPtr() {
+  if (AttrList) AttrList->DropRef();
+}
+
+/// getNumSlots - Return the number of slots used in this attribute list. 
+/// This is the number of arguments that have an attribute set on them
+/// (including the function itself).
+unsigned AttrListPtr::getNumSlots() const {
+  return AttrList ? AttrList->Attrs.size() : 0;
+}
+
+/// getSlot - Return the AttributeWithIndex at the specified slot.  This
+/// holds a number plus a set of attributes.
+const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
+  assert(AttrList && Slot < AttrList->Attrs.size() && "Slot # out of range!");
+  return AttrList->Attrs[Slot];
+}
+
+
+/// getAttributes - The attributes for the specified index are
+/// returned.  Attributes for the result are denoted with Idx = 0.
+/// Function notes are denoted with idx = ~0.
+Attributes AttrListPtr::getAttributes(unsigned Idx) const {
+  if (AttrList == 0) return Attribute::None;
+  
+  const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+  for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
+    if (Attrs[i].Index == Idx)
+      return Attrs[i].Attrs;
+  return Attribute::None;
+}
+
+/// hasAttrSomewhere - Return true if the specified attribute is set for at
+/// least one parameter or for the return value.
+bool AttrListPtr::hasAttrSomewhere(Attributes Attr) const {
+  if (AttrList == 0) return false;
+  
+  const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+  for (unsigned i = 0, e = Attrs.size(); i != e; ++i)
+    if (Attrs[i].Attrs & Attr)
+      return true;
+  return false;
+}
+
+
+AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
+  Attributes OldAttrs = getAttributes(Idx);
+#ifndef NDEBUG
+  // FIXME it is not obvious how this should work for alignment.
+  // For now, say we can't change a known alignment.
+  Attributes OldAlign = OldAttrs & Attribute::Alignment;
+  Attributes NewAlign = Attrs & Attribute::Alignment;
+  assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
+         "Attempt to change alignment!");
+#endif
+  
+  Attributes NewAttrs = OldAttrs | Attrs;
+  if (NewAttrs == OldAttrs)
+    return *this;
+  
+  SmallVector<AttributeWithIndex, 8> NewAttrList;
+  if (AttrList == 0)
+    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+  else {
+    const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+    unsigned i = 0, e = OldAttrList.size();
+    // Copy attributes for arguments before this one.
+    for (; i != e && OldAttrList[i].Index < Idx; ++i)
+      NewAttrList.push_back(OldAttrList[i]);
+
+    // If there are attributes already at this index, merge them in.
+    if (i != e && OldAttrList[i].Index == Idx) {
+      Attrs |= OldAttrList[i].Attrs;
+      ++i;
+    }
+    
+    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+    
+    // Copy attributes for arguments after this one.
+    NewAttrList.insert(NewAttrList.end(), 
+                       OldAttrList.begin()+i, OldAttrList.end());
+  }
+  
+  return get(NewAttrList.data(), NewAttrList.size());
+}
+
+AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
+#ifndef NDEBUG
+  // FIXME it is not obvious how this should work for alignment.
+  // For now, say we can't pass in alignment, which no current use does.
+  assert(!(Attrs & Attribute::Alignment) && "Attempt to exclude alignment!");
+#endif
+  if (AttrList == 0) return AttrListPtr();
+  
+  Attributes OldAttrs = getAttributes(Idx);
+  Attributes NewAttrs = OldAttrs & ~Attrs;
+  if (NewAttrs == OldAttrs)
+    return *this;
+
+  SmallVector<AttributeWithIndex, 8> NewAttrList;
+  const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+  unsigned i = 0, e = OldAttrList.size();
+  
+  // Copy attributes for arguments before this one.
+  for (; i != e && OldAttrList[i].Index < Idx; ++i)
+    NewAttrList.push_back(OldAttrList[i]);
+  
+  // If there are attributes already at this index, merge them in.
+  assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
+  Attrs = OldAttrList[i].Attrs & ~Attrs;
+  ++i;
+  if (Attrs)  // If any attributes left for this parameter, add them.
+    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+  
+  // Copy attributes for arguments after this one.
+  NewAttrList.insert(NewAttrList.end(), 
+                     OldAttrList.begin()+i, OldAttrList.end());
+  
+  return get(NewAttrList.data(), NewAttrList.size());
+}
+
+void AttrListPtr::dump() const {
+  dbgs() << "PAL[ ";
+  for (unsigned i = 0; i < getNumSlots(); ++i) {
+    const AttributeWithIndex &PAWI = getSlot(i);
+    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+  }
+  
+  dbgs() << "]\n";
+}
diff --git a/contrib/llvm/lib/VMCore/AutoUpgrade.cpp b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp
new file mode 100644
index 000000000000..9e93ff370e25
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp
@@ -0,0 +1,281 @@
+//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the auto-upgrade helper functions 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/AutoUpgrade.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstring>
+using namespace llvm;
+
+
+static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+  assert(F && "Illegal to upgrade a non-existent Function.");
+
+  // Quickly eliminate it, if it's not a candidate.
+  StringRef Name = F->getName();
+  if (Name.size() <= 8 || !Name.startswith("llvm."))
+    return false;
+  Name = Name.substr(5); // Strip off "llvm."
+
+  const FunctionType *FTy = F->getFunctionType();
+  Module *M = F->getParent();
+  
+  switch (Name[0]) {
+  default: break;
+  case 'p':
+    //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
+    //  which is a instruction / data cache identifier. The old version only
+    //  implicitly accepted the data version.
+    if (Name == "prefetch") {
+      // Don't do anything if it has the correct number of arguments already
+      if (FTy->getNumParams() == 4)
+        break;
+
+      assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
+      //  We first need to change the name of the old (bad) intrinsic, because
+      //  its type is incorrect, but we cannot overload that name. We
+      //  arbitrarily unique it here allowing us to construct a correctly named
+      //  and typed function below.
+      std::string NameTmp = F->getName();
+      F->setName("");
+      NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
+                                                    FTy->getReturnType(),
+                                                    FTy->getParamType(0),
+                                                    FTy->getParamType(1),
+                                                    FTy->getParamType(2),
+                                                    FTy->getParamType(2),
+                                                    (Type*)0));
+      return true;
+    }
+
+    break;
+  case 'x': {
+    const char *NewFnName = NULL;
+    // This fixes the poorly named crc32 intrinsics.
+    if (Name == "x86.sse42.crc32.8")
+      NewFnName = "llvm.x86.sse42.crc32.32.8";
+    else if (Name == "x86.sse42.crc32.16")
+      NewFnName = "llvm.x86.sse42.crc32.32.16";
+    else if (Name == "x86.sse42.crc32.32")
+      NewFnName = "llvm.x86.sse42.crc32.32.32";
+    else if (Name == "x86.sse42.crc64.8")
+      NewFnName = "llvm.x86.sse42.crc32.64.8";
+    else if (Name == "x86.sse42.crc64.64")
+      NewFnName = "llvm.x86.sse42.crc32.64.64";
+    
+    if (NewFnName) {
+      F->setName(NewFnName);
+      NewFn = F;
+      return true;
+    }
+
+    // Calls to these instructions are transformed into unaligned loads.
+    if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
+        Name == "x86.sse2.loadu.pd")
+      return true;
+      
+    // Calls to these instructions are transformed into nontemporal stores.
+    if (Name == "x86.sse.movnt.ps"  || Name == "x86.sse2.movnt.dq" ||
+        Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
+      return true;
+
+    break;
+  }
+  }
+
+  //  This may not belong here. This function is effectively being overloaded 
+  //  to both detect an intrinsic which needs upgrading, and to provide the 
+  //  upgraded form of the intrinsic. We should perhaps have two separate 
+  //  functions for this.
+  return false;
+}
+
+bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
+  NewFn = 0;
+  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+
+  // Upgrade intrinsic attributes.  This does not change the function.
+  if (NewFn)
+    F = NewFn;
+  if (unsigned id = F->getIntrinsicID())
+    F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
+  return Upgraded;
+}
+
+bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+  // Nothing to do yet.
+  return false;
+}
+
+// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
+// upgraded intrinsic. All argument and return casting must be provided in 
+// order to seamlessly integrate with existing context.
+void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
+  Function *F = CI->getCalledFunction();
+  LLVMContext &C = CI->getContext();
+  ImmutableCallSite CS(CI);
+
+  assert(F && "CallInst has no function associated with it.");
+
+  if (!NewFn) {
+    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
+        F->getName() == "llvm.x86.sse2.loadu.dq" ||
+        F->getName() == "llvm.x86.sse2.loadu.pd") {
+      // Convert to a native, unaligned load.
+      const Type *VecTy = CI->getType();
+      const Type *IntTy = IntegerType::get(C, 128);
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
+                                        PointerType::getUnqual(IntTy),
+                                        "cast");
+      LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
+      LI->setAlignment(1);      // Unaligned load.
+      BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
+
+      // Fix up all the uses with our new load.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(BC);
+
+      // Remove intrinsic.
+      CI->eraseFromParent();
+    } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
+               F->getName() == "llvm.x86.sse2.movnt.dq" ||
+               F->getName() == "llvm.x86.sse2.movnt.pd" ||
+               F->getName() == "llvm.x86.sse2.movnt.i") {
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      Module *M = F->getParent();
+      SmallVector<Value *, 1> Elts;
+      Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+      MDNode *Node = MDNode::get(C, Elts);
+
+      Value *Arg0 = CI->getArgOperand(0);
+      Value *Arg1 = CI->getArgOperand(1);
+
+      // Convert the type of the pointer to a pointer to the stored type.
+      Value *BC = Builder.CreateBitCast(Arg0,
+                                        PointerType::getUnqual(Arg1->getType()),
+                                        "cast");
+      StoreInst *SI = Builder.CreateStore(Arg1, BC);
+      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
+      SI->setAlignment(16);
+
+      // Remove intrinsic.
+      CI->eraseFromParent();
+    } else {
+      llvm_unreachable("Unknown function for CallInst upgrade.");
+    }
+    return;
+  }
+
+  switch (NewFn->getIntrinsicID()) {
+  case Intrinsic::prefetch: {
+    IRBuilder<> Builder(C);
+    Builder.SetInsertPoint(CI->getParent(), CI);
+    const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
+
+    // Add the extra "data cache" argument
+    Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
+                           CI->getArgOperand(2),
+                           llvm::ConstantInt::get(I32Ty, 1) };
+    CallInst *NewCI = CallInst::Create(NewFn, Operands,
+                                       CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty())
+      //  Replace all uses of the old call with the new cast which has the
+      //  correct type.
+      CI->replaceAllUsesWith(NewCI);
+
+    //  Clean up the old call now that it has been completely upgraded.
+    CI->eraseFromParent();
+    break;
+  }
+  }
+}
+
+// This tests each Function to determine if it needs upgrading. When we find 
+// one we are interested in, we then upgrade all calls to reflect the new 
+// function.
+void llvm::UpgradeCallsToIntrinsic(Function* F) {
+  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
+
+  // Upgrade the function and check if it is a totaly new function.
+  Function *NewFn;
+  if (UpgradeIntrinsicFunction(F, NewFn)) {
+    if (NewFn != F) {
+      // Replace all uses to the old function with the new one if necessary.
+      for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
+           UI != UE; ) {
+        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, NewFn);
+      }
+      // Remove old function, no longer used, from the module.
+      F->eraseFromParent();
+    }
+  }
+}
+
+/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
+/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
+/// strips that use.
+void llvm::CheckDebugInfoIntrinsics(Module *M) {
+  if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
+    while (!FuncStart->use_empty())
+      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
+    FuncStart->eraseFromParent();
+  }
+  
+  if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
+    while (!StopPoint->use_empty())
+      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
+    StopPoint->eraseFromParent();
+  }
+
+  if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
+    while (!RegionStart->use_empty())
+      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
+    RegionStart->eraseFromParent();
+  }
+
+  if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
+    while (!RegionEnd->use_empty())
+      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
+    RegionEnd->eraseFromParent();
+  }
+  
+  if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
+    if (!Declare->use_empty()) {
+      DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
+      if (!isa<MDNode>(DDI->getArgOperand(0)) ||
+          !isa<MDNode>(DDI->getArgOperand(1))) {
+        while (!Declare->use_empty()) {
+          CallInst *CI = cast<CallInst>(Declare->use_back());
+          CI->eraseFromParent();
+        }
+        Declare->eraseFromParent();
+      }
+    }
+  }
+}
diff --git a/contrib/llvm/lib/VMCore/BasicBlock.cpp b/contrib/llvm/lib/VMCore/BasicBlock.cpp
new file mode 100644
index 000000000000..70265c899d7e
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/BasicBlock.cpp
@@ -0,0 +1,346 @@
+//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BasicBlock class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/LeakDetector.h"
+#include "SymbolTableListTraitsImpl.h"
+#include <algorithm>
+using namespace llvm;
+
+ValueSymbolTable *BasicBlock::getValueSymbolTable() {
+  if (Function *F = getParent())
+    return &F->getValueSymbolTable();
+  return 0;
+}
+
+LLVMContext &BasicBlock::getContext() const {
+  return getType()->getContext();
+}
+
+// Explicit instantiation of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
+
+
+BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
+                       BasicBlock *InsertBefore)
+  : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
+
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (InsertBefore) {
+    assert(NewParent &&
+           "Cannot insert block before another block with no function!");
+    NewParent->getBasicBlockList().insert(InsertBefore, this);
+  } else if (NewParent) {
+    NewParent->getBasicBlockList().push_back(this);
+  }
+  
+  setName(Name);
+}
+
+
+BasicBlock::~BasicBlock() {
+  // If the address of the block is taken and it is being deleted (e.g. because
+  // it is dead), this means that there is either a dangling constant expr
+  // hanging off the block, or an undefined use of the block (source code
+  // expecting the address of a label to keep the block alive even though there
+  // is no indirect branch).  Handle these cases by zapping the BlockAddress
+  // nodes.  There are no other possible uses at this point.
+  if (hasAddressTaken()) {
+    assert(!use_empty() && "There should be at least one blockaddress!");
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
+    while (!use_empty()) {
+      BlockAddress *BA = cast<BlockAddress>(use_back());
+      BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                       BA->getType()));
+      BA->destroyConstant();
+    }
+  }
+  
+  assert(getParent() == 0 && "BasicBlock still linked into the program!");
+  dropAllReferences();
+  InstList.clear();
+}
+
+void BasicBlock::setParent(Function *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+
+  // Set Parent=parent, updating instruction symtab entries as appropriate.
+  InstList.setSymTabObject(&Parent, parent);
+
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void BasicBlock::removeFromParent() {
+  getParent()->getBasicBlockList().remove(this);
+}
+
+void BasicBlock::eraseFromParent() {
+  getParent()->getBasicBlockList().erase(this);
+}
+
+/// moveBefore - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right before MovePos.
+void BasicBlock::moveBefore(BasicBlock *MovePos) {
+  MovePos->getParent()->getBasicBlockList().splice(MovePos,
+                       getParent()->getBasicBlockList(), this);
+}
+
+/// moveAfter - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right after MovePos.
+void BasicBlock::moveAfter(BasicBlock *MovePos) {
+  Function::iterator I = MovePos;
+  MovePos->getParent()->getBasicBlockList().splice(++I,
+                                       getParent()->getBasicBlockList(), this);
+}
+
+
+TerminatorInst *BasicBlock::getTerminator() {
+  if (InstList.empty()) return 0;
+  return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+const TerminatorInst *BasicBlock::getTerminator() const {
+  if (InstList.empty()) return 0;
+  return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+Instruction* BasicBlock::getFirstNonPHI() {
+  BasicBlock::iterator i = begin();
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  while (isa<PHINode>(i)) ++i;
+  return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbg() {
+  BasicBlock::iterator i = begin();
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
+  return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  BasicBlock::iterator i = begin();
+  for (;; ++i) {
+    if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+      continue;
+
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
+    if (!II)
+      break;
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      break;
+  }
+  return &*i;
+}
+
+void BasicBlock::dropAllReferences() {
+  for(iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+}
+
+/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// return the block, otherwise return a null pointer.
+BasicBlock *BasicBlock::getSinglePredecessor() {
+  pred_iterator PI = pred_begin(this), E = pred_end(this);
+  if (PI == E) return 0;         // No preds.
+  BasicBlock *ThePred = *PI;
+  ++PI;
+  return (PI == E) ? ThePred : 0 /*multiple preds*/;
+}
+
+/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// return the block, otherwise return a null pointer.
+/// Note that unique predecessor doesn't mean single edge, there can be 
+/// multiple edges from the unique predecessor to this block (for example 
+/// a switch statement with multiple cases having the same destination).
+BasicBlock *BasicBlock::getUniquePredecessor() {
+  pred_iterator PI = pred_begin(this), E = pred_end(this);
+  if (PI == E) return 0; // No preds.
+  BasicBlock *PredBB = *PI;
+  ++PI;
+  for (;PI != E; ++PI) {
+    if (*PI != PredBB)
+      return 0;
+    // The same predecessor appears multiple times in the predecessor list.
+    // This is OK.
+  }
+  return PredBB;
+}
+
+/// removePredecessor - This method is used to notify a BasicBlock that the
+/// specified Predecessor of the block is no longer able to reach it.  This is
+/// actually not used to update the Predecessor list, but is actually used to
+/// update the PHI nodes that reside in the block.  Note that this should be
+/// called while the predecessor still refers to this block.
+///
+void BasicBlock::removePredecessor(BasicBlock *Pred,
+                                   bool DontDeleteUselessPHIs) {
+  assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
+          find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
+         "removePredecessor: BB is not a predecessor!");
+
+  if (InstList.empty()) return;
+  PHINode *APN = dyn_cast<PHINode>(&front());
+  if (!APN) return;   // Quick exit.
+
+  // If there are exactly two predecessors, then we want to nuke the PHI nodes
+  // altogether.  However, we cannot do this, if this in this case:
+  //
+  //  Loop:
+  //    %x = phi [X, Loop]
+  //    %x2 = add %x, 1         ;; This would become %x2 = add %x2, 1
+  //    br Loop                 ;; %x2 does not dominate all uses
+  //
+  // This is because the PHI node input is actually taken from the predecessor
+  // basic block.  The only case this can happen is with a self loop, so we
+  // check for this case explicitly now.
+  //
+  unsigned max_idx = APN->getNumIncomingValues();
+  assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!");
+  if (max_idx == 2) {
+    BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred);
+
+    // Disable PHI elimination!
+    if (this == Other) max_idx = 3;
+  }
+
+  // <= Two predecessors BEFORE I remove one?
+  if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+    // Yup, loop through and nuke the PHI nodes
+    while (PHINode *PN = dyn_cast<PHINode>(&front())) {
+      // Remove the predecessor first.
+      PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+
+      // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
+      if (max_idx == 2) {
+        if (PN->getIncomingValue(0) != PN)
+          PN->replaceAllUsesWith(PN->getIncomingValue(0));
+        else
+          // We are left with an infinite loop with no entries: kill the PHI.
+          PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+        getInstList().pop_front();    // Remove the PHI node
+      }
+
+      // If the PHI node already only had one entry, it got deleted by
+      // removeIncomingValue.
+    }
+  } else {
+    // Okay, now we know that we need to remove predecessor #pred_idx from all
+    // PHI nodes.  Iterate over each PHI node fixing them up
+    PHINode *PN;
+    for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) {
+      ++II;
+      PN->removeIncomingValue(Pred, false);
+      // If all incoming values to the Phi are the same, we can replace the Phi
+      // with that value.
+      Value* PNV = 0;
+      if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+        if (PNV != PN) {
+          PN->replaceAllUsesWith(PNV);
+          PN->eraseFromParent();
+        }
+    }
+  }
+}
+
+
+/// splitBasicBlock - This splits a basic block into two at the specified
+/// instruction.  Note that all instructions BEFORE the specified iterator stay
+/// as part of the original basic block, an unconditional branch is added to
+/// the new BB, and the rest of the instructions in the BB are moved to the new
+/// BB, including the old terminator.  This invalidates the iterator.
+///
+/// Note that this only works on well formed basic blocks (must have a
+/// terminator), and 'I' must not be the end of instruction list (which would
+/// cause a degenerate basic block to be formed, having a terminator inside of
+/// the basic block).
+///
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
+  assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
+  assert(I != InstList.end() &&
+         "Trying to get me to create degenerate basic block!");
+
+  BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
+                               .getNodePtrUnchecked();
+  BasicBlock *New = BasicBlock::Create(getContext(), BBName,
+                                       getParent(), InsertBefore);
+
+  // Move all of the specified instructions from the original basic block into
+  // the new basic block.
+  New->getInstList().splice(New->end(), this->getInstList(), I, end());
+
+  // Add a branch instruction to the newly formed basic block.
+  BranchInst::Create(New, this);
+
+  // Now we must loop through all of the successors of the New block (which
+  // _were_ the successors of the 'this' block), and update any PHI nodes in
+  // successors.  If there were PHI nodes in the successors, then they need to
+  // know that incoming branches will be from New, not from Old.
+  //
+  for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
+    // Loop over any phi nodes in the basic block, updating the BB field of
+    // incoming values...
+    BasicBlock *Successor = *I;
+    PHINode *PN;
+    for (BasicBlock::iterator II = Successor->begin();
+         (PN = dyn_cast<PHINode>(II)); ++II) {
+      int IDX = PN->getBasicBlockIndex(this);
+      while (IDX != -1) {
+        PN->setIncomingBlock((unsigned)IDX, New);
+        IDX = PN->getBasicBlockIndex(this);
+      }
+    }
+  }
+  return New;
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+  TerminatorInst *TI = getTerminator();
+  if (!TI)
+    // Cope with being called on a BasicBlock that doesn't have a terminator
+    // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
+    return;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    BasicBlock *Succ = TI->getSuccessor(i);
+    for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II);
+         ++II) {
+      int i;
+      while ((i = PN->getBasicBlockIndex(this)) >= 0)
+        PN->setIncomingBlock(i, New);
+    }
+  }
+}
diff --git a/contrib/llvm/lib/VMCore/ConstantFold.cpp b/contrib/llvm/lib/VMCore/ConstantFold.cpp
new file mode 100644
index 000000000000..323e2a280999
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ConstantFold.cpp
@@ -0,0 +1,2344 @@
+//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements folding of constants for LLVM.  This implements the
+// (internal) ConstantFold.h interface, which is used by the
+// ConstantExpr::get* methods to automatically fold constants when possible.
+//
+// The current constant folding implementation is implemented in two pieces: the
+// pieces that don't need TargetData, and the pieces that do. This is to avoid
+// a dependence in VMCore on Target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConstantFold.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                ConstantFold*Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+/// BitCastConstantVector - Convert the specified ConstantVector node to the
+/// specified vector type.  At this point, we know that the elements of the
+/// input vector constant are all simple integer or FP values.
+static Constant *BitCastConstantVector(ConstantVector *CV,
+                                       const VectorType *DstTy) {
+
+  if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
+  if (CV->isNullValue()) return Constant::getNullValue(DstTy);
+
+  // If this cast changes element count then we can't handle it here:
+  // doing so requires endianness information.  This should be handled by
+  // Analysis/ConstantFolding.cpp
+  unsigned NumElts = DstTy->getNumElements();
+  if (NumElts != CV->getNumOperands())
+    return 0;
+
+  // Check to verify that all elements of the input are simple.
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (!isa<ConstantInt>(CV->getOperand(i)) &&
+        !isa<ConstantFP>(CV->getOperand(i)))
+      return 0;
+  }
+
+  // Bitcast each element now.
+  std::vector<Constant*> Result;
+  const Type *DstEltTy = DstTy->getElementType();
+  for (unsigned i = 0; i != NumElts; ++i)
+    Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i),
+                                                    DstEltTy));
+  return ConstantVector::get(Result);
+}
+
+/// This function determines which opcode to use to fold two constant cast 
+/// expressions together. It uses CastInst::isEliminableCastPair to determine
+/// the opcode. Consequently its just a wrapper around that function.
+/// @brief Determine if it is valid to fold a cast of a cast
+static unsigned
+foldConstantCastPair(
+  unsigned opc,          ///< opcode of the second cast constant expression
+  ConstantExpr *Op,      ///< the first cast constant expression
+  const Type *DstTy      ///< desintation type of the first cast
+) {
+  assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
+  assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
+  assert(CastInst::isCast(opc) && "Invalid cast opcode");
+
+  // The the types and opcodes for the two Cast constant expressions
+  const Type *SrcTy = Op->getOperand(0)->getType();
+  const Type *MidTy = Op->getType();
+  Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
+  Instruction::CastOps secondOp = Instruction::CastOps(opc);
+
+  // Let CastInst::isEliminableCastPair do the heavy lifting.
+  return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
+                                        Type::getInt64Ty(DstTy->getContext()));
+}
+
+static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
+  const Type *SrcTy = V->getType();
+  if (SrcTy == DestTy)
+    return V; // no-op cast
+
+  // Check to see if we are casting a pointer to an aggregate to a pointer to
+  // the first element.  If so, return the appropriate GEP instruction.
+  if (const PointerType *PTy = dyn_cast<PointerType>(V->getType()))
+    if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy))
+      if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
+        SmallVector<Value*, 8> IdxList;
+        Value *Zero =
+          Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
+        IdxList.push_back(Zero);
+        const Type *ElTy = PTy->getElementType();
+        while (ElTy != DPTy->getElementType()) {
+          if (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+            if (STy->getNumElements() == 0) break;
+            ElTy = STy->getElementType(0);
+            IdxList.push_back(Zero);
+          } else if (const SequentialType *STy = 
+                     dyn_cast<SequentialType>(ElTy)) {
+            if (ElTy->isPointerTy()) break;  // Can't index into pointers!
+            ElTy = STy->getElementType();
+            IdxList.push_back(Zero);
+          } else {
+            break;
+          }
+        }
+
+        if (ElTy == DPTy->getElementType())
+          // This GEP is inbounds because all indices are zero.
+          return ConstantExpr::getInBoundsGetElementPtr(V, &IdxList[0],
+                                                        IdxList.size());
+      }
+
+  // Handle casts from one vector constant to another.  We know that the src 
+  // and dest type have the same size (otherwise its an illegal cast).
+  if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+    if (const VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
+      assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
+             "Not cast between same sized vectors!");
+      SrcTy = NULL;
+      // First, check for null.  Undef is already handled.
+      if (isa<ConstantAggregateZero>(V))
+        return Constant::getNullValue(DestTy);
+
+      if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+        return BitCastConstantVector(CV, DestPTy);
+    }
+
+    // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
+    // This allows for other simplifications (although some of them
+    // can only be handled by Analysis/ConstantFolding.cpp).
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
+      return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
+  }
+
+  // Finally, implement bitcast folding now.   The code below doesn't handle
+  // bitcast right.
+  if (isa<ConstantPointerNull>(V))  // ptr->ptr cast.
+    return ConstantPointerNull::get(cast<PointerType>(DestTy));
+
+  // Handle integral constant input.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (DestTy->isIntegerTy())
+      // Integral -> Integral. This is a no-op because the bit widths must
+      // be the same. Consequently, we just fold to V.
+      return V;
+
+    if (DestTy->isFloatingPointTy())
+      return ConstantFP::get(DestTy->getContext(),
+                             APFloat(CI->getValue(),
+                                     !DestTy->isPPC_FP128Ty()));
+
+    // Otherwise, can't fold this (vector?)
+    return 0;
+  }
+
+  // Handle ConstantFP input: FP -> Integral.
+  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+    return ConstantInt::get(FP->getContext(),
+                            FP->getValueAPF().bitcastToAPInt());
+
+  return 0;
+}
+
+
+/// ExtractConstantBytes - V is an integer constant which only has a subset of
+/// its bytes used.  The bytes used are indicated by ByteStart (which is the
+/// first byte used, counting from the least significant byte) and ByteSize,
+/// which is the number of bytes used.
+///
+/// This function analyzes the specified constant to see if the specified byte
+/// range can be returned as a simplified constant.  If so, the constant is
+/// returned, otherwise null is returned.
+/// 
+static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
+                                      unsigned ByteSize) {
+  assert(C->getType()->isIntegerTy() &&
+         (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
+         "Non-byte sized integer input");
+  unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
+  assert(ByteSize && "Must be accessing some piece");
+  assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input");
+  assert(ByteSize != CSize && "Should not extract everything");
+  
+  // Constant Integers are simple.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    APInt V = CI->getValue();
+    if (ByteStart)
+      V = V.lshr(ByteStart*8);
+    V = V.trunc(ByteSize*8);
+    return ConstantInt::get(CI->getContext(), V);
+  }
+  
+  // In the input is a constant expr, we might be able to recursively simplify.
+  // If not, we definitely can't do anything.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (CE == 0) return 0;
+  
+  switch (CE->getOpcode()) {
+  default: return 0;
+  case Instruction::Or: {
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+    if (RHS == 0)
+      return 0;
+    
+    // X | -1 -> -1.
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
+      if (RHSC->isAllOnesValue())
+        return RHSC;
+    
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+    if (LHS == 0)
+      return 0;
+    return ConstantExpr::getOr(LHS, RHS);
+  }
+  case Instruction::And: {
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+    if (RHS == 0)
+      return 0;
+    
+    // X & 0 -> 0.
+    if (RHS->isNullValue())
+      return RHS;
+    
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+    if (LHS == 0)
+      return 0;
+    return ConstantExpr::getAnd(LHS, RHS);
+  }
+  case Instruction::LShr: {
+    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+    if (Amt == 0)
+      return 0;
+    unsigned ShAmt = Amt->getZExtValue();
+    // Cannot analyze non-byte shifts.
+    if ((ShAmt & 7) != 0)
+      return 0;
+    ShAmt >>= 3;
+    
+    // If the extract is known to be all zeros, return zero.
+    if (ByteStart >= CSize-ShAmt)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+    // If the extract is known to be fully in the input, extract it.
+    if (ByteStart+ByteSize+ShAmt <= CSize)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+    
+  case Instruction::Shl: {
+    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+    if (Amt == 0)
+      return 0;
+    unsigned ShAmt = Amt->getZExtValue();
+    // Cannot analyze non-byte shifts.
+    if ((ShAmt & 7) != 0)
+      return 0;
+    ShAmt >>= 3;
+    
+    // If the extract is known to be all zeros, return zero.
+    if (ByteStart+ByteSize <= ShAmt)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+    // If the extract is known to be fully in the input, extract it.
+    if (ByteStart >= ShAmt)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+      
+  case Instruction::ZExt: {
+    unsigned SrcBitSize =
+      cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
+    
+    // If extracting something that is completely zero, return 0.
+    if (ByteStart*8 >= SrcBitSize)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+
+    // If exactly extracting the input, return it.
+    if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
+      return CE->getOperand(0);
+    
+    // If extracting something completely in the input, if if the input is a
+    // multiple of 8 bits, recurse.
+    if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
+      
+    // Otherwise, if extracting a subset of the input, which is not multiple of
+    // 8 bits, do a shift and trunc to get the bits.
+    if ((ByteStart+ByteSize)*8 < SrcBitSize) {
+      assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
+      Constant *Res = CE->getOperand(0);
+      if (ByteStart)
+        Res = ConstantExpr::getLShr(Res, 
+                                 ConstantInt::get(Res->getType(), ByteStart*8));
+      return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(),
+                                                          ByteSize*8));
+    }
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+  }
+}
+
+/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
+                                 bool Folded) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has size zero.
+      if (NumElems == 0)
+        return ConstantExpr::getNullValue(DestTy);
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantInt::get(DestTy, NumElems);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // Pointer size doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                         PTy->getAddressSpace()),
+                        DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular sizeof expression.
+  Constant *C = ConstantExpr::getSizeOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
+                                  bool Folded) {
+  // The alignment of an array is equal to the alignment of the
+  // array element. Note that this is not always true for vectors.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
+    C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                      DestTy,
+                                                      false),
+                              C, DestTy);
+    return C;
+  }
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    // Packed structs always have an alignment of 1.
+    if (STy->isPacked())
+      return ConstantInt::get(DestTy, 1);
+
+    // Otherwise, struct alignment is the maximum alignment of any member.
+    // Without target data, we can't compare much, but we can check to see
+    // if all the members have the same alignment.
+    unsigned NumElems = STy->getNumElements();
+    // An empty struct has minimal alignment.
+    if (NumElems == 0)
+      return ConstantInt::get(DestTy, 1);
+    // Check for a struct with all members having the same alignment.
+    Constant *MemberAlign =
+      getFoldedAlignOf(STy->getElementType(0), DestTy, true);
+    bool AllSame = true;
+    for (unsigned i = 1; i != NumElems; ++i)
+      if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
+        AllSame = false;
+        break;
+      }
+    if (AllSame)
+      return MemberAlign;
+  }
+
+  // Pointer alignment doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
+                                                           1),
+                                          PTy->getAddressSpace()),
+                         DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular alignof expression.
+  Constant *C = ConstantExpr::getAlignOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof
+/// on Ty and FieldNo, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
+                                   const Type *DestTy,
+                                   bool Folded) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
+                                                                DestTy, false),
+                                        FieldNo, DestTy);
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has no members.
+      if (NumElems == 0)
+        return 0;
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
+                                                                    false,
+                                                                    DestTy,
+                                                                    false),
+                                            FieldNo, DestTy);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular offsetof expression.
+  Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
+                                            const Type *DestTy) {
+  if (isa<UndefValue>(V)) {
+    // zext(undef) = 0, because the top bits will be zero.
+    // sext(undef) = 0, because the top bits will all be the same.
+    // [us]itofp(undef) = 0, because the result value is bounded.
+    if (opc == Instruction::ZExt || opc == Instruction::SExt ||
+        opc == Instruction::UIToFP || opc == Instruction::SIToFP)
+      return Constant::getNullValue(DestTy);
+    return UndefValue::get(DestTy);
+  }
+
+  // No compile-time operations on this type yet.
+  if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty())
+    return 0;
+
+  if (V->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+
+  // If the cast operand is a constant expression, there's a few things we can
+  // do to try to simplify it.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->isCast()) {
+      // Try hard to fold cast of cast because they are often eliminable.
+      if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
+        return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
+    } else if (CE->getOpcode() == Instruction::GetElementPtr) {
+      // If all of the indexes in the GEP are null values, there is no pointer
+      // adjustment going on.  We might as well cast the source pointer.
+      bool isAllNull = true;
+      for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+        if (!CE->getOperand(i)->isNullValue()) {
+          isAllNull = false;
+          break;
+        }
+      if (isAllNull)
+        // This is casting one pointer type to another, always BitCast
+        return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
+    }
+  }
+
+  // If the cast operand is a constant vector, perform the cast by
+  // operating on each element. In the cast of bitcasts, the element
+  // count may be mismatched; don't attempt to handle that here.
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+    if (DestTy->isVectorTy() &&
+        cast<VectorType>(DestTy)->getNumElements() ==
+        CV->getType()->getNumElements()) {
+      std::vector<Constant*> res;
+      const VectorType *DestVecTy = cast<VectorType>(DestTy);
+      const Type *DstEltTy = DestVecTy->getElementType();
+      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+        res.push_back(ConstantExpr::getCast(opc,
+                                            CV->getOperand(i), DstEltTy));
+      return ConstantVector::get(res);
+    }
+
+  // We actually have to do a cast now. Perform the cast according to the
+  // opcode specified.
+  switch (opc) {
+  default:
+    llvm_unreachable("Failed to cast constant expression");
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+      bool ignored;
+      APFloat Val = FPC->getValueAPF();
+      Val.convert(DestTy->isFloatTy() ? APFloat::IEEEsingle :
+                  DestTy->isDoubleTy() ? APFloat::IEEEdouble :
+                  DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
+                  DestTy->isFP128Ty() ? APFloat::IEEEquad :
+                  APFloat::Bogus,
+                  APFloat::rmNearestTiesToEven, &ignored);
+      return ConstantFP::get(V->getContext(), Val);
+    }
+    return 0; // Can't fold.
+  case Instruction::FPToUI: 
+  case Instruction::FPToSI:
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+      const APFloat &V = FPC->getValueAPF();
+      bool ignored;
+      uint64_t x[2]; 
+      uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
+                                APFloat::rmTowardZero, &ignored);
+      APInt Val(DestBitWidth, 2, x);
+      return ConstantInt::get(FPC->getContext(), Val);
+    }
+    return 0; // Can't fold.
+  case Instruction::IntToPtr:   //always treated as unsigned
+    if (V->isNullValue())       // Is it an integral null value?
+      return ConstantPointerNull::get(cast<PointerType>(DestTy));
+    return 0;                   // Other pointer types cannot be casted
+  case Instruction::PtrToInt:   // always treated as unsigned
+    // Is it a null pointer value?
+    if (V->isNullValue())
+      return ConstantInt::get(DestTy, 0);
+    // If this is a sizeof-like expression, pull out multiplications by
+    // known factors to expose them to subsequent folding. If it's an
+    // alignof-like expression, factor out known factors.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          CE->getOperand(0)->isNullValue()) {
+        const Type *Ty =
+          cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+        if (CE->getNumOperands() == 2) {
+          // Handle a sizeof-like expression.
+          Constant *Idx = CE->getOperand(1);
+          bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
+          if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
+            Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
+                                                                DestTy, false),
+                                        Idx, DestTy);
+            return ConstantExpr::getMul(C, Idx);
+          }
+        } else if (CE->getNumOperands() == 3 &&
+                   CE->getOperand(1)->isNullValue()) {
+          // Handle an alignof-like expression.
+          if (const StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked()) {
+              ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
+              if (CI->isOne() &&
+                  STy->getNumElements() == 2 &&
+                  STy->getElementType(0)->isIntegerTy(1)) {
+                return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
+              }
+            }
+          // Handle an offsetof-like expression.
+          if (Ty->isStructTy() || Ty->isArrayTy()) {
+            if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
+                                                DestTy, false))
+              return C;
+          }
+        }
+      }
+    // Other pointer types cannot be casted
+    return 0;
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      APInt api = CI->getValue();
+      APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), true);
+      (void)apf.convertFromAPInt(api, 
+                                 opc==Instruction::SIToFP,
+                                 APFloat::rmNearestTiesToEven);
+      return ConstantFP::get(V->getContext(), apf);
+    }
+    return 0;
+  case Instruction::ZExt:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().zext(BitWidth));
+    }
+    return 0;
+  case Instruction::SExt:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().sext(BitWidth));
+    }
+    return 0;
+  case Instruction::Trunc: {
+    uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().trunc(DestBitWidth));
+    }
+    
+    // The input must be a constantexpr.  See if we can simplify this based on
+    // the bytes we are demanding.  Only do this if the source and dest are an
+    // even multiple of a byte.
+    if ((DestBitWidth & 7) == 0 &&
+        (cast<IntegerType>(V->getType())->getBitWidth() & 7) == 0)
+      if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8))
+        return Res;
+      
+    return 0;
+  }
+  case Instruction::BitCast:
+    return FoldBitCast(V, DestTy);
+  }
+}
+
+Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
+                                              Constant *V1, Constant *V2) {
+  if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
+    return CB->getZExtValue() ? V1 : V2;
+
+  // Check for zero aggregate and ConstantVector of zeros
+  if (Cond->isNullValue()) return V2;
+
+  if (ConstantVector* CondV = dyn_cast<ConstantVector>(Cond)) {
+
+    if (CondV->isAllOnesValue()) return V1;
+
+    const VectorType *VTy = cast<VectorType>(V1->getType());
+    ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
+    ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
+
+    if ((CP1 || isa<ConstantAggregateZero>(V1)) &&
+        (CP2 || isa<ConstantAggregateZero>(V2))) {
+
+      // Find the element type of the returned vector
+      const Type *EltTy = VTy->getElementType();
+      unsigned NumElem = VTy->getNumElements();
+      std::vector<Constant*> Res(NumElem);
+
+      bool Valid = true;
+      for (unsigned i = 0; i < NumElem; ++i) {
+        ConstantInt* c = dyn_cast<ConstantInt>(CondV->getOperand(i));
+        if (!c) {
+          Valid = false;
+          break;
+        }
+        Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+        Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+        Res[i] = c->getZExtValue() ? C1 : C2;
+      }
+      // If we were able to build the vector, return it
+      if (Valid) return ConstantVector::get(Res);
+    }
+  }
+
+
+  if (isa<UndefValue>(Cond)) {
+    if (isa<UndefValue>(V1)) return V1;
+    return V2;
+  }
+  if (isa<UndefValue>(V1)) return V2;
+  if (isa<UndefValue>(V2)) return V1;
+  if (V1 == V2) return V1;
+
+  if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
+    if (TrueVal->getOpcode() == Instruction::Select)
+      if (TrueVal->getOperand(0) == Cond)
+	return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
+  }
+  if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
+    if (FalseVal->getOpcode() == Instruction::Select)
+      if (FalseVal->getOperand(0) == Cond)
+	return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
+  }
+
+  return 0;
+}
+
+Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
+                                                      Constant *Idx) {
+  if (isa<UndefValue>(Val))  // ee(undef, x) -> undef
+    return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
+  if (Val->isNullValue())  // ee(zero, x) -> zero
+    return Constant::getNullValue(
+                          cast<VectorType>(Val->getType())->getElementType());
+
+  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+    if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+      return CVal->getOperand(CIdx->getZExtValue());
+    } else if (isa<UndefValue>(Idx)) {
+      // ee({w,x,y,z}, undef) -> w (an arbitrary value).
+      return CVal->getOperand(0);
+    }
+  }
+  return 0;
+}
+
+Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
+                                                     Constant *Elt,
+                                                     Constant *Idx) {
+  ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
+  if (!CIdx) return 0;
+  APInt idxVal = CIdx->getValue();
+  if (isa<UndefValue>(Val)) { 
+    // Insertion of scalar constant into vector undef
+    // Optimize away insertion of undef
+    if (isa<UndefValue>(Elt))
+      return Val;
+    // Otherwise break the aggregate undef into multiple undefs and do
+    // the insertion
+    unsigned numOps = 
+      cast<VectorType>(Val->getType())->getNumElements();
+    std::vector<Constant*> Ops; 
+    Ops.reserve(numOps);
+    for (unsigned i = 0; i < numOps; ++i) {
+      Constant *Op =
+        (idxVal == i) ? Elt : UndefValue::get(Elt->getType());
+      Ops.push_back(Op);
+    }
+    return ConstantVector::get(Ops);
+  }
+  if (isa<ConstantAggregateZero>(Val)) {
+    // Insertion of scalar constant into vector aggregate zero
+    // Optimize away insertion of zero
+    if (Elt->isNullValue())
+      return Val;
+    // Otherwise break the aggregate zero into multiple zeros and do
+    // the insertion
+    unsigned numOps = 
+      cast<VectorType>(Val->getType())->getNumElements();
+    std::vector<Constant*> Ops; 
+    Ops.reserve(numOps);
+    for (unsigned i = 0; i < numOps; ++i) {
+      Constant *Op =
+        (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType());
+      Ops.push_back(Op);
+    }
+    return ConstantVector::get(Ops);
+  }
+  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+    // Insertion of scalar constant into vector constant
+    std::vector<Constant*> Ops; 
+    Ops.reserve(CVal->getNumOperands());
+    for (unsigned i = 0; i < CVal->getNumOperands(); ++i) {
+      Constant *Op =
+        (idxVal == i) ? Elt : cast<Constant>(CVal->getOperand(i));
+      Ops.push_back(Op);
+    }
+    return ConstantVector::get(Ops);
+  }
+
+  return 0;
+}
+
+/// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
+/// return the specified element value.  Otherwise return null.
+static Constant *GetVectorElement(Constant *C, unsigned EltNo) {
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
+    return CV->getOperand(EltNo);
+
+  const Type *EltTy = cast<VectorType>(C->getType())->getElementType();
+  if (isa<ConstantAggregateZero>(C))
+    return Constant::getNullValue(EltTy);
+  if (isa<UndefValue>(C))
+    return UndefValue::get(EltTy);
+  return 0;
+}
+
+Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
+                                                     Constant *V2,
+                                                     Constant *Mask) {
+  // Undefined shuffle mask -> undefined value.
+  if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
+
+  unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
+  unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
+  const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+
+  // Loop over the shuffle mask, evaluating each element.
+  SmallVector<Constant*, 32> Result;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    Constant *InElt = GetVectorElement(Mask, i);
+    if (InElt == 0) return 0;
+
+    if (isa<UndefValue>(InElt))
+      InElt = UndefValue::get(EltTy);
+    else if (ConstantInt *CI = dyn_cast<ConstantInt>(InElt)) {
+      unsigned Elt = CI->getZExtValue();
+      if (Elt >= SrcNumElts*2)
+        InElt = UndefValue::get(EltTy);
+      else if (Elt >= SrcNumElts)
+        InElt = GetVectorElement(V2, Elt - SrcNumElts);
+      else
+        InElt = GetVectorElement(V1, Elt);
+      if (InElt == 0) return 0;
+    } else {
+      // Unknown value.
+      return 0;
+    }
+    Result.push_back(InElt);
+  }
+
+  return ConstantVector::get(Result);
+}
+
+Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
+                                                    ArrayRef<unsigned> Idxs) {
+  // Base case: no indices, so return the entire value.
+  if (Idxs.empty())
+    return Agg;
+
+  if (isa<UndefValue>(Agg))  // ev(undef, x) -> undef
+    return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
+                                                            Idxs));
+
+  if (isa<ConstantAggregateZero>(Agg))  // ev(0, x) -> 0
+    return
+      Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
+                                                              Idxs));
+
+  // Otherwise recurse.
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
+    return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]),
+                                               Idxs.slice(1));
+
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
+    return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]),
+                                               Idxs.slice(1));
+  ConstantVector *CV = cast<ConstantVector>(Agg);
+  return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]),
+                                             Idxs.slice(1));
+}
+
+Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
+                                                   Constant *Val,
+                                                   ArrayRef<unsigned> Idxs) {
+  // Base case: no indices, so replace the entire value.
+  if (Idxs.empty())
+    return Val;
+
+  if (isa<UndefValue>(Agg)) {
+    // Insertion of constant into aggregate undef
+    // Optimize away insertion of undef.
+    if (isa<UndefValue>(Val))
+      return Agg;
+    
+    // Otherwise break the aggregate undef into multiple undefs and do
+    // the insertion.
+    const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+    unsigned numOps;
+    if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+      numOps = AR->getNumElements();
+    else
+      numOps = cast<StructType>(AggTy)->getNumElements();
+    
+    std::vector<Constant*> Ops(numOps); 
+    for (unsigned i = 0; i < numOps; ++i) {
+      const Type *MemberTy = AggTy->getTypeAtIndex(i);
+      Constant *Op =
+        (Idxs[0] == i) ?
+        ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
+                                           Val, Idxs.slice(1)) :
+        UndefValue::get(MemberTy);
+      Ops[i] = Op;
+    }
+    
+    if (const StructType* ST = dyn_cast<StructType>(AggTy))
+      return ConstantStruct::get(ST, Ops);
+    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+  }
+  
+  if (isa<ConstantAggregateZero>(Agg)) {
+    // Insertion of constant into aggregate zero
+    // Optimize away insertion of zero.
+    if (Val->isNullValue())
+      return Agg;
+    
+    // Otherwise break the aggregate zero into multiple zeros and do
+    // the insertion.
+    const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+    unsigned numOps;
+    if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+      numOps = AR->getNumElements();
+    else
+      numOps = cast<StructType>(AggTy)->getNumElements();
+    
+    std::vector<Constant*> Ops(numOps);
+    for (unsigned i = 0; i < numOps; ++i) {
+      const Type *MemberTy = AggTy->getTypeAtIndex(i);
+      Constant *Op =
+        (Idxs[0] == i) ?
+        ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
+                                           Val, Idxs.slice(1)) :
+        Constant::getNullValue(MemberTy);
+      Ops[i] = Op;
+    }
+    
+    if (const StructType *ST = dyn_cast<StructType>(AggTy))
+      return ConstantStruct::get(ST, Ops);
+    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+  }
+  
+  if (isa<ConstantStruct>(Agg) || isa<ConstantArray>(Agg)) {
+    // Insertion of constant into aggregate constant.
+    std::vector<Constant*> Ops(Agg->getNumOperands());
+    for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
+      Constant *Op = cast<Constant>(Agg->getOperand(i));
+      if (Idxs[0] == i)
+        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1));
+      Ops[i] = Op;
+    }
+    
+    if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
+      return ConstantStruct::get(ST, Ops);
+    return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
+  }
+
+  return 0;
+}
+
+
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
+                                              Constant *C1, Constant *C2) {
+  // No compile-time operations on this type yet.
+  if (C1->getType()->isPPC_FP128Ty())
+    return 0;
+
+  // Handle UndefValue up front.
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    switch (Opcode) {
+    case Instruction::Xor:
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
+        // Handle undef ^ undef -> 0 special case. This is a common
+        // idiom (misuse).
+        return Constant::getNullValue(C1->getType());
+      // Fallthrough
+    case Instruction::Add:
+    case Instruction::Sub:
+      return UndefValue::get(C1->getType());
+    case Instruction::And:
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef
+        return C1;
+      return Constant::getNullValue(C1->getType());   // undef & X -> 0
+    case Instruction::Mul: {
+      ConstantInt *CI;
+      // X * undef -> undef   if X is odd or undef
+      if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) ||
+          ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) ||
+          (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+        return UndefValue::get(C1->getType());
+
+      // X * undef -> 0       otherwise
+      return Constant::getNullValue(C1->getType());
+    }
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+      // undef / 1 -> undef
+      if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv)
+        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2))
+          if (CI2->isOne())
+            return C1;
+      // FALL THROUGH
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (!isa<UndefValue>(C2))                    // undef / X -> 0
+        return Constant::getNullValue(C1->getType());
+      return C2;                                   // X / undef -> undef
+    case Instruction::Or:                          // X | undef -> -1
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef
+        return C1;
+      return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
+    case Instruction::LShr:
+      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+        return C1;                                  // undef lshr undef -> undef
+      return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
+                                                    // undef lshr X -> 0
+    case Instruction::AShr:
+      if (!isa<UndefValue>(C2))                     // undef ashr X --> all ones
+        return Constant::getAllOnesValue(C1->getType());
+      else if (isa<UndefValue>(C1)) 
+        return C1;                                  // undef ashr undef -> undef
+      else
+        return C1;                                  // X ashr undef --> X
+    case Instruction::Shl:
+      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+        return C1;                                  // undef shl undef -> undef
+      // undef << X -> 0   or   X << undef -> 0
+      return Constant::getNullValue(C1->getType());
+    }
+  }
+
+  // Handle simplifications when the RHS is a constant int.
+  if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+    switch (Opcode) {
+    case Instruction::Add:
+      if (CI2->equalsInt(0)) return C1;                         // X + 0 == X
+      break;
+    case Instruction::Sub:
+      if (CI2->equalsInt(0)) return C1;                         // X - 0 == X
+      break;
+    case Instruction::Mul:
+      if (CI2->equalsInt(0)) return C2;                         // X * 0 == 0
+      if (CI2->equalsInt(1))
+        return C1;                                              // X * 1 == X
+      break;
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+      if (CI2->equalsInt(1))
+        return C1;                                            // X / 1 == X
+      if (CI2->equalsInt(0))
+        return UndefValue::get(CI2->getType());               // X / 0 == undef
+      break;
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (CI2->equalsInt(1))
+        return Constant::getNullValue(CI2->getType());        // X % 1 == 0
+      if (CI2->equalsInt(0))
+        return UndefValue::get(CI2->getType());               // X % 0 == undef
+      break;
+    case Instruction::And:
+      if (CI2->isZero()) return C2;                           // X & 0 == 0
+      if (CI2->isAllOnesValue())
+        return C1;                                            // X & -1 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
+        if (CE1->getOpcode() == Instruction::ZExt) {
+          unsigned DstWidth = CI2->getType()->getBitWidth();
+          unsigned SrcWidth =
+            CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
+          APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
+          if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
+            return C1;
+        }
+
+        // If and'ing the address of a global with a constant, fold it.
+        if (CE1->getOpcode() == Instruction::PtrToInt && 
+            isa<GlobalValue>(CE1->getOperand(0))) {
+          GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
+
+          // Functions are at least 4-byte aligned.
+          unsigned GVAlign = GV->getAlignment();
+          if (isa<Function>(GV))
+            GVAlign = std::max(GVAlign, 4U);
+
+          if (GVAlign > 1) {
+            unsigned DstWidth = CI2->getType()->getBitWidth();
+            unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
+            APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
+
+            // If checking bits we know are clear, return zero.
+            if ((CI2->getValue() & BitsNotSet) == CI2->getValue())
+              return Constant::getNullValue(CI2->getType());
+          }
+        }
+      }
+      break;
+    case Instruction::Or:
+      if (CI2->equalsInt(0)) return C1;    // X | 0 == X
+      if (CI2->isAllOnesValue())
+        return C2;                         // X | -1 == -1
+      break;
+    case Instruction::Xor:
+      if (CI2->equalsInt(0)) return C1;    // X ^ 0 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        switch (CE1->getOpcode()) {
+        default: break;
+        case Instruction::ICmp:
+        case Instruction::FCmp:
+          // cmp pred ^ true -> cmp !pred
+          assert(CI2->equalsInt(1));
+          CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
+          pred = CmpInst::getInversePredicate(pred);
+          return ConstantExpr::getCompare(pred, CE1->getOperand(0),
+                                          CE1->getOperand(1));
+        }
+      }
+      break;
+    case Instruction::AShr:
+      // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
+        if (CE1->getOpcode() == Instruction::ZExt)  // Top bits known zero.
+          return ConstantExpr::getLShr(C1, C2);
+      break;
+    }
+  } else if (isa<ConstantInt>(C1)) {
+    // If C1 is a ConstantInt and C2 is not, swap the operands.
+    if (Instruction::isCommutative(Opcode))
+      return ConstantExpr::get(Opcode, C2, C1);
+  }
+
+  // At this point we know neither constant is an UndefValue.
+  if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
+    if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+      using namespace APIntOps;
+      const APInt &C1V = CI1->getValue();
+      const APInt &C2V = CI2->getValue();
+      switch (Opcode) {
+      default:
+        break;
+      case Instruction::Add:     
+        return ConstantInt::get(CI1->getContext(), C1V + C2V);
+      case Instruction::Sub:     
+        return ConstantInt::get(CI1->getContext(), C1V - C2V);
+      case Instruction::Mul:     
+        return ConstantInt::get(CI1->getContext(), C1V * C2V);
+      case Instruction::UDiv:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
+      case Instruction::SDiv:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+          return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
+        return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
+      case Instruction::URem:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
+      case Instruction::SRem:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+          return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
+        return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
+      case Instruction::And:
+        return ConstantInt::get(CI1->getContext(), C1V & C2V);
+      case Instruction::Or:
+        return ConstantInt::get(CI1->getContext(), C1V | C2V);
+      case Instruction::Xor:
+        return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
+      case Instruction::Shl: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      case Instruction::LShr: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      case Instruction::AShr: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      }
+    }
+
+    switch (Opcode) {
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::Shl:
+      if (CI1->equalsInt(0)) return C1;
+      break;
+    default:
+      break;
+    }
+  } else if (ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
+    if (ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
+      APFloat C1V = CFP1->getValueAPF();
+      APFloat C2V = CFP2->getValueAPF();
+      APFloat C3V = C1V;  // copy for modification
+      switch (Opcode) {
+      default:                   
+        break;
+      case Instruction::FAdd:
+        (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FSub:
+        (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FMul:
+        (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FDiv:
+        (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FRem:
+        (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      }
+    }
+  } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
+    ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
+    ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
+    if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
+        (CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
+      std::vector<Constant*> Res;
+      const Type* EltTy = VTy->getElementType();  
+      Constant *C1 = 0;
+      Constant *C2 = 0;
+      switch (Opcode) {
+      default:
+        break;
+      case Instruction::Add:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAdd(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FAdd:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFAdd(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Sub:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSub(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FSub:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFSub(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Mul:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getMul(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FMul:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFMul(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::UDiv:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getUDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::SDiv:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FDiv:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::URem:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getURem(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::SRem:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSRem(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FRem:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFRem(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::And: 
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAnd(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Or:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getOr(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Xor:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getXor(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::LShr:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getLShr(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::AShr:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAShr(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Shl:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getShl(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      }
+    }
+  }
+
+  if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+    // There are many possible foldings we could do here.  We should probably
+    // at least fold add of a pointer with an integer into the appropriate
+    // getelementptr.  This will improve alias analysis a bit.
+
+    // Given ((a + b) + c), if (b + c) folds to something interesting, return
+    // (a + (b + c)).
+    if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
+      Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
+      if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
+        return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
+    }
+  } else if (isa<ConstantExpr>(C2)) {
+    // If C2 is a constant expr and C1 isn't, flop them around and fold the
+    // other way if possible.
+    if (Instruction::isCommutative(Opcode))
+      return ConstantFoldBinaryInstruction(Opcode, C2, C1);
+  }
+
+  // i1 can be simplified in many cases.
+  if (C1->getType()->isIntegerTy(1)) {
+    switch (Opcode) {
+    case Instruction::Add:
+    case Instruction::Sub:
+      return ConstantExpr::getXor(C1, C2);
+    case Instruction::Mul:
+      return ConstantExpr::getAnd(C1, C2);
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+      // We can assume that C2 == 0.  If it were one the result would be
+      // undefined because the shift value is as large as the bitwidth.
+      return C1;
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return C1;
+    case Instruction::URem:
+    case Instruction::SRem:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return ConstantInt::getFalse(C1->getContext());
+    default:
+      break;
+    }
+  }
+
+  // We don't know how to fold this.
+  return 0;
+}
+
+/// isZeroSizedType - This type is zero sized if its an array or structure of
+/// zero sized types.  The only leaf zero sized type is an empty structure.
+static bool isMaybeZeroSizedType(const Type *Ty) {
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    if (STy->isOpaque()) return true;  // Can't say.
+
+    // If all of elements have zero size, this does too.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
+    return true;
+
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    return isMaybeZeroSizedType(ATy->getElementType());
+  }
+  return false;
+}
+
+/// IdxCompare - Compare the two constants as though they were getelementptr
+/// indices.  This allows coersion of the types to be the same thing.
+///
+/// If the two constants are the "same" (after coersion), return 0.  If the
+/// first is less than the second, return -1, if the second is less than the
+/// first, return 1.  If the constants are not integral, return -2.
+///
+static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
+  if (C1 == C2) return 0;
+
+  // Ok, we found a different index.  If they are not ConstantInt, we can't do
+  // anything with them.
+  if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
+    return -2; // don't know!
+
+  // Ok, we have two differing integer indices.  Sign extend them to be the same
+  // type.  Long is always big enough, so we use it.
+  if (!C1->getType()->isIntegerTy(64))
+    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+
+  if (!C2->getType()->isIntegerTy(64))
+    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
+
+  if (C1 == C2) return 0;  // They are equal
+
+  // If the type being indexed over is really just a zero sized type, there is
+  // no pointer difference being made here.
+  if (isMaybeZeroSizedType(ElTy))
+    return -2; // dunno.
+
+  // If they are really different, now that they are the same type, then we
+  // found a difference!
+  if (cast<ConstantInt>(C1)->getSExtValue() < 
+      cast<ConstantInt>(C2)->getSExtValue())
+    return -1;
+  else
+    return 1;
+}
+
+/// evaluateFCmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided.  This doesn't need to handle simple
+/// things like ConstantFP comparisons, but should instead handle ConstantExprs.
+/// If we can determine that the two constants have a particular relation to 
+/// each other, we should return the corresponding FCmpInst predicate, 
+/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
+/// ConstantFoldCompareInstruction.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two.  We consider ConstantFP
+/// to be the simplest, and ConstantExprs to be the most complex.
+static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
+  assert(V1->getType() == V2->getType() &&
+         "Cannot compare values of different types!");
+
+  // No compile-time operations on this type yet.
+  if (V1->getType()->isPPC_FP128Ty())
+    return FCmpInst::BAD_FCMP_PREDICATE;
+
+  // Handle degenerate case quickly
+  if (V1 == V2) return FCmpInst::FCMP_OEQ;
+
+  if (!isa<ConstantExpr>(V1)) {
+    if (!isa<ConstantExpr>(V2)) {
+      // We distilled thisUse the standard constant folder for a few cases
+      ConstantInt *R = 0;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OEQ;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OLT;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OGT;
+
+      // Nothing more we can do
+      return FCmpInst::BAD_FCMP_PREDICATE;
+    }
+
+    // If the first operand is simple and second is ConstantExpr, swap operands.
+    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
+    if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
+      return FCmpInst::getSwappedPredicate(SwappedRelation);
+  } else {
+    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
+    // constantexpr or a simple constant.
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    switch (CE1->getOpcode()) {
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+      // We might be able to do something with these but we don't right now.
+      break;
+    default:
+      break;
+    }
+  }
+  // There are MANY other foldings that we could perform here.  They will
+  // probably be added on demand, as they seem needed.
+  return FCmpInst::BAD_FCMP_PREDICATE;
+}
+
+/// evaluateICmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided.  This doesn't need to handle simple
+/// things like integer comparisons, but should instead handle ConstantExprs
+/// and GlobalValues.  If we can determine that the two constants have a
+/// particular relation to each other, we should return the corresponding ICmp
+/// predicate, otherwise return ICmpInst::BAD_ICMP_PREDICATE.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two.  We consider simple
+/// constants (like ConstantInt) to be the simplest, followed by
+/// GlobalValues, followed by ConstantExpr's (the most complex).
+///
+static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
+                                                bool isSigned) {
+  assert(V1->getType() == V2->getType() &&
+         "Cannot compare different types of values!");
+  if (V1 == V2) return ICmpInst::ICMP_EQ;
+
+  if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
+      !isa<BlockAddress>(V1)) {
+    if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
+        !isa<BlockAddress>(V2)) {
+      // We distilled this down to a simple case, use the standard constant
+      // folder.
+      ConstantInt *R = 0;
+      ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero()) 
+        return pred;
+      pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero())
+        return pred;
+      pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero())
+        return pred;
+
+      // If we couldn't figure it out, bail.
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+
+    // If the first operand is simple, swap operands.
+    ICmpInst::Predicate SwappedRelation = 
+      evaluateICmpRelation(V2, V1, isSigned);
+    if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+      return ICmpInst::getSwappedPredicate(SwappedRelation);
+
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
+    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
+      ICmpInst::Predicate SwappedRelation = 
+        evaluateICmpRelation(V2, V1, isSigned);
+      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+        return ICmpInst::getSwappedPredicate(SwappedRelation);
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it's a
+    // ConstantPointerNull).
+    if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+      // Don't try to decide equality of aliases.
+      if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
+        if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
+          return ICmpInst::ICMP_NE;
+    } else if (isa<BlockAddress>(V2)) {
+      return ICmpInst::ICMP_NE; // Globals never equal labels.
+    } else {
+      assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
+      // GlobalVals can never be null unless they have external weak linkage.
+      // We don't try to evaluate aliases here.
+      if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV))
+        return ICmpInst::ICMP_NE;
+    }
+  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
+    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
+      ICmpInst::Predicate SwappedRelation = 
+        evaluateICmpRelation(V2, V1, isSigned);
+      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+        return ICmpInst::getSwappedPredicate(SwappedRelation);
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+    
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it is a
+    // ConstantPointerNull).
+    if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
+      // Block address in another function can't equal this one, but block
+      // addresses in the current function might be the same if blocks are
+      // empty.
+      if (BA2->getFunction() != BA->getFunction())
+        return ICmpInst::ICMP_NE;
+    } else {
+      // Block addresses aren't null, don't equal the address of globals.
+      assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
+             "Canonicalization guarantee!");
+      return ICmpInst::ICMP_NE;
+    }
+  } else {
+    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
+    // constantexpr, a global, block address, or a simple constant.
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    Constant *CE1Op0 = CE1->getOperand(0);
+
+    switch (CE1->getOpcode()) {
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      break; // We can't evaluate floating point casts or truncations.
+
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::BitCast:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      // If the cast is not actually changing bits, and the second operand is a
+      // null pointer, do the comparison with the pre-casted value.
+      if (V2->isNullValue() &&
+          (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
+        if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
+        if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
+        return evaluateICmpRelation(CE1Op0,
+                                    Constant::getNullValue(CE1Op0->getType()), 
+                                    isSigned);
+      }
+      break;
+
+    case Instruction::GetElementPtr:
+      // Ok, since this is a getelementptr, we know that the constant has a
+      // pointer type.  Check the various cases.
+      if (isa<ConstantPointerNull>(V2)) {
+        // If we are comparing a GEP to a null pointer, check to see if the base
+        // of the GEP equals the null pointer.
+        if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+          if (GV->hasExternalWeakLinkage())
+            // Weak linkage GVals could be zero or not. We're comparing that
+            // to null pointer so its greater-or-equal
+            return isSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+          else 
+            // If its not weak linkage, the GVal must have a non-zero address
+            // so the result is greater-than
+            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+        } else if (isa<ConstantPointerNull>(CE1Op0)) {
+          // If we are indexing from a null pointer, check to see if we have any
+          // non-zero indices.
+          for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i)
+            if (!CE1->getOperand(i)->isNullValue())
+              // Offsetting from null, must not be equal.
+              return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          // Only zero indexes from null, must still be zero.
+          return ICmpInst::ICMP_EQ;
+        }
+        // Otherwise, we can't really say if the first operand is null or not.
+      } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+        if (isa<ConstantPointerNull>(CE1Op0)) {
+          if (GV2->hasExternalWeakLinkage())
+            // Weak linkage GVals could be zero or not. We're comparing it to
+            // a null pointer, so its less-or-equal
+            return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+          else
+            // If its not weak linkage, the GVal must have a non-zero address
+            // so the result is less-than
+            return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+        } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+          if (GV == GV2) {
+            // If this is a getelementptr of the same global, then it must be
+            // different.  Because the types must match, the getelementptr could
+            // only have at most one index, and because we fold getelementptr's
+            // with a single zero index, it must be nonzero.
+            assert(CE1->getNumOperands() == 2 &&
+                   !CE1->getOperand(1)->isNullValue() &&
+                   "Surprising getelementptr!");
+            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          } else {
+            // If they are different globals, we don't know what the value is,
+            // but they can't be equal.
+            return ICmpInst::ICMP_NE;
+          }
+        }
+      } else {
+        ConstantExpr *CE2 = cast<ConstantExpr>(V2);
+        Constant *CE2Op0 = CE2->getOperand(0);
+
+        // There are MANY other foldings that we could perform here.  They will
+        // probably be added on demand, as they seem needed.
+        switch (CE2->getOpcode()) {
+        default: break;
+        case Instruction::GetElementPtr:
+          // By far the most common case to handle is when the base pointers are
+          // obviously to the same or different globals.
+          if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
+            if (CE1Op0 != CE2Op0) // Don't know relative ordering, but not equal
+              return ICmpInst::ICMP_NE;
+            // Ok, we know that both getelementptr instructions are based on the
+            // same global.  From this, we can precisely determine the relative
+            // ordering of the resultant pointers.
+            unsigned i = 1;
+
+            // The logic below assumes that the result of the comparison
+            // can be determined by finding the first index that differs.
+            // This doesn't work if there is over-indexing in any
+            // subsequent indices, so check for that case first.
+            if (!CE1->isGEPWithNoNotionalOverIndexing() ||
+                !CE2->isGEPWithNoNotionalOverIndexing())
+               return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+
+            // Compare all of the operands the GEP's have in common.
+            gep_type_iterator GTI = gep_type_begin(CE1);
+            for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
+                 ++i, ++GTI)
+              switch (IdxCompare(CE1->getOperand(i),
+                                 CE2->getOperand(i), GTI.getIndexedType())) {
+              case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
+              case 1:  return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
+              case -2: return ICmpInst::BAD_ICMP_PREDICATE;
+              }
+
+            // Ok, we ran out of things they have in common.  If any leftovers
+            // are non-zero then we have a difference, otherwise we are equal.
+            for (; i < CE1->getNumOperands(); ++i)
+              if (!CE1->getOperand(i)->isNullValue()) {
+                if (isa<ConstantInt>(CE1->getOperand(i)))
+                  return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+                else
+                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+              }
+
+            for (; i < CE2->getNumOperands(); ++i)
+              if (!CE2->getOperand(i)->isNullValue()) {
+                if (isa<ConstantInt>(CE2->getOperand(i)))
+                  return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+                else
+                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+              }
+            return ICmpInst::ICMP_EQ;
+          }
+        }
+      }
+    default:
+      break;
+    }
+  }
+
+  return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, 
+                                               Constant *C1, Constant *C2) {
+  const Type *ResultTy;
+  if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+    ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
+                               VT->getNumElements());
+  else
+    ResultTy = Type::getInt1Ty(C1->getContext());
+
+  // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
+  if (pred == FCmpInst::FCMP_FALSE)
+    return Constant::getNullValue(ResultTy);
+
+  if (pred == FCmpInst::FCMP_TRUE)
+    return Constant::getAllOnesValue(ResultTy);
+
+  // Handle some degenerate cases first
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    // For EQ and NE, we can always pick a value for the undef to make the
+    // predicate pass or fail, so we can return undef.
+    // Also, if both operands are undef, we can return undef.
+    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
+        (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+      return UndefValue::get(ResultTy);
+    // Otherwise, pick the same value as the non-undef operand, and fold
+    // it to true or false.
+    return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+  }
+
+  // No compile-time operations on this type yet.
+  if (C1->getType()->isPPC_FP128Ty())
+    return 0;
+
+  // icmp eq/ne(null,GV) -> false/true
+  if (C1->isNullValue()) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))
+      // Don't try to evaluate aliases.  External weak GV can be null.
+      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+        if (pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(C1->getContext());
+        else if (pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(C1->getContext());
+      }
+  // icmp eq/ne(GV,null) -> false/true
+  } else if (C2->isNullValue()) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C1))
+      // Don't try to evaluate aliases.  External weak GV can be null.
+      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+        if (pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(C1->getContext());
+        else if (pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(C1->getContext());
+      }
+  }
+
+  // If the comparison is a comparison between two i1's, simplify it.
+  if (C1->getType()->isIntegerTy(1)) {
+    switch(pred) {
+    case ICmpInst::ICMP_EQ:
+      if (isa<ConstantInt>(C2))
+        return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2));
+      return ConstantExpr::getXor(ConstantExpr::getNot(C1), C2);
+    case ICmpInst::ICMP_NE:
+      return ConstantExpr::getXor(C1, C2);
+    default:
+      break;
+    }
+  }
+
+  if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
+    APInt V1 = cast<ConstantInt>(C1)->getValue();
+    APInt V2 = cast<ConstantInt>(C2)->getValue();
+    switch (pred) {
+    default: llvm_unreachable("Invalid ICmp Predicate"); return 0;
+    case ICmpInst::ICMP_EQ:  return ConstantInt::get(ResultTy, V1 == V2);
+    case ICmpInst::ICMP_NE:  return ConstantInt::get(ResultTy, V1 != V2);
+    case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
+    case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
+    case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
+    case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
+    case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
+    case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
+    case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
+    case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
+    }
+  } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
+    APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
+    APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
+    APFloat::cmpResult R = C1V.compare(C2V);
+    switch (pred) {
+    default: llvm_unreachable("Invalid FCmp Predicate"); return 0;
+    case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
+    case FCmpInst::FCMP_TRUE:  return Constant::getAllOnesValue(ResultTy);
+    case FCmpInst::FCMP_UNO:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
+    case FCmpInst::FCMP_ORD:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
+    case FCmpInst::FCMP_UEQ:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_OEQ:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_UNE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
+    case FCmpInst::FCMP_ONE:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_ULT: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpLessThan);
+    case FCmpInst::FCMP_OLT:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
+    case FCmpInst::FCMP_UGT:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_OGT:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_ULE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_OLE: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_UGE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
+    case FCmpInst::FCMP_OGE: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
+                                        R==APFloat::cmpEqual);
+    }
+  } else if (C1->getType()->isVectorTy()) {
+    SmallVector<Constant*, 16> C1Elts, C2Elts;
+    C1->getVectorElements(C1Elts);
+    C2->getVectorElements(C2Elts);
+    if (C1Elts.empty() || C2Elts.empty())
+      return 0;
+
+    // If we can constant fold the comparison of each element, constant fold
+    // the whole vector comparison.
+    SmallVector<Constant*, 4> ResElts;
+    // Compare the elements, producing an i1 result or constant expr.
+    for (unsigned i = 0, e = C1Elts.size(); i != e; ++i)
+      ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
+
+    return ConstantVector::get(ResElts);
+  }
+
+  if (C1->getType()->isFloatingPointTy()) {
+    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
+    switch (evaluateFCmpRelation(C1, C2)) {
+    default: llvm_unreachable("Unknown relation!");
+    case FCmpInst::FCMP_UNO:
+    case FCmpInst::FCMP_ORD:
+    case FCmpInst::FCMP_UEQ:
+    case FCmpInst::FCMP_UNE:
+    case FCmpInst::FCMP_ULT:
+    case FCmpInst::FCMP_UGT:
+    case FCmpInst::FCMP_ULE:
+    case FCmpInst::FCMP_UGE:
+    case FCmpInst::FCMP_TRUE:
+    case FCmpInst::FCMP_FALSE:
+    case FCmpInst::BAD_FCMP_PREDICATE:
+      break; // Couldn't determine anything about these constants.
+    case FCmpInst::FCMP_OEQ: // We know that C1 == C2
+      Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ ||
+                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE ||
+                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+      break;
+    case FCmpInst::FCMP_OLT: // We know that C1 < C2
+      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+                pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT ||
+                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE);
+      break;
+    case FCmpInst::FCMP_OGT: // We know that C1 > C2
+      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+                pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT ||
+                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+      break;
+    case FCmpInst::FCMP_OLE: // We know that C1 <= C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
+        Result = 1;
+      break;
+    case FCmpInst::FCMP_OGE: // We known that C1 >= C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
+        Result = 1;
+      break;
+    case FCmpInst::FCMP_ONE: // We know that C1 != C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE) 
+        Result = 1;
+      break;
+    }
+
+    // If we evaluated the result, return it now.
+    if (Result != -1)
+      return ConstantInt::get(ResultTy, Result);
+
+  } else {
+    // Evaluate the relation between the two constants, per the predicate.
+    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
+    switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+    default: llvm_unreachable("Unknown relational!");
+    case ICmpInst::BAD_ICMP_PREDICATE:
+      break;  // Couldn't determine anything about these constants.
+    case ICmpInst::ICMP_EQ:   // We know the constants are equal!
+      // If we know the constants are equal, we can decide the result of this
+      // computation precisely.
+      Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred);
+      break;
+    case ICmpInst::ICMP_ULT:
+      switch (pred) {
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE:
+        Result = 1; break;
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_SLT:
+      switch (pred) {
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_UGT:
+      switch (pred) {
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_SGT:
+      switch (pred) {
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_ULE:
+      if (pred == ICmpInst::ICMP_UGT) Result = 0;
+      if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if (pred == ICmpInst::ICMP_SGT) Result = 0;
+      if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1;
+      break;
+    case ICmpInst::ICMP_UGE:
+      if (pred == ICmpInst::ICMP_ULT) Result = 0;
+      if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if (pred == ICmpInst::ICMP_SLT) Result = 0;
+      if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1;
+      break;
+    case ICmpInst::ICMP_NE:
+      if (pred == ICmpInst::ICMP_EQ) Result = 0;
+      if (pred == ICmpInst::ICMP_NE) Result = 1;
+      break;
+    }
+
+    // If we evaluated the result, return it now.
+    if (Result != -1)
+      return ConstantInt::get(ResultTy, Result);
+
+    // If the right hand side is a bitcast, try using its inverse to simplify
+    // it by moving it to the left hand side.  We can't do this if it would turn
+    // a vector compare into a scalar compare or visa versa.
+    if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
+      Constant *CE2Op0 = CE2->getOperand(0);
+      if (CE2->getOpcode() == Instruction::BitCast &&
+          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
+        Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
+        return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
+      }
+    }
+
+    // If the left hand side is an extension, try eliminating it.
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+      if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+          (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
+        Constant *CE1Op0 = CE1->getOperand(0);
+        Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
+        if (CE1Inverse == CE1Op0) {
+          // Check whether we can safely truncate the right hand side.
+          Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
+          if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) {
+            return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse);
+          }
+        }
+      }
+    }
+
+    if ((!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) ||
+        (C1->isNullValue() && !C2->isNullValue())) {
+      // If C2 is a constant expr and C1 isn't, flip them around and fold the
+      // other way if possible.
+      // Also, if C1 is null and C2 isn't, flip them around.
+      pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+      return ConstantExpr::getICmp(pred, C2, C1);
+    }
+  }
+  return 0;
+}
+
+/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
+/// is "inbounds".
+template<typename IndexTy>
+static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
+  // No indices means nothing that could be out of bounds.
+  if (NumIdx == 0) return true;
+
+  // If the first index is zero, it's in bounds.
+  if (cast<Constant>(Idxs[0])->isNullValue()) return true;
+
+  // If the first index is one and all the rest are zero, it's in bounds,
+  // by the one-past-the-end rule.
+  if (!cast<ConstantInt>(Idxs[0])->isOne())
+    return false;
+  for (unsigned i = 1, e = NumIdx; i != e; ++i)
+    if (!cast<Constant>(Idxs[i])->isNullValue())
+      return false;
+  return true;
+}
+
+template<typename IndexTy>
+static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+                                               bool inBounds,
+                                               IndexTy const *Idxs,
+                                               unsigned NumIdx) {
+  if (NumIdx == 0) return C;
+  Constant *Idx0 = cast<Constant>(Idxs[0]);
+  if ((NumIdx == 1 && Idx0->isNullValue()))
+    return C;
+
+  if (isa<UndefValue>(C)) {
+    const PointerType *Ptr = cast<PointerType>(C->getType());
+    const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, Idxs+NumIdx);
+    assert(Ty != 0 && "Invalid indices for GEP!");
+    return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
+  }
+
+  if (C->isNullValue()) {
+    bool isNull = true;
+    for (unsigned i = 0, e = NumIdx; i != e; ++i)
+      if (!cast<Constant>(Idxs[i])->isNullValue()) {
+        isNull = false;
+        break;
+      }
+    if (isNull) {
+      const PointerType *Ptr = cast<PointerType>(C->getType());
+      const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs,
+                                                         Idxs+NumIdx);
+      assert(Ty != 0 && "Invalid indices for GEP!");
+      return ConstantPointerNull::get(PointerType::get(Ty,
+                                                       Ptr->getAddressSpace()));
+    }
+  }
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    // Combine Indices - If the source pointer to this getelementptr instruction
+    // is a getelementptr instruction, combine the indices of the two
+    // getelementptr instructions into a single instruction.
+    //
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      const Type *LastTy = 0;
+      for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+           I != E; ++I)
+        LastTy = *I;
+
+      if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
+        SmallVector<Value*, 16> NewIndices;
+        NewIndices.reserve(NumIdx + CE->getNumOperands());
+        for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
+          NewIndices.push_back(CE->getOperand(i));
+
+        // Add the last index of the source with the first index of the new GEP.
+        // Make sure to handle the case when they are actually different types.
+        Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
+        // Otherwise it must be an array.
+        if (!Idx0->isNullValue()) {
+          const Type *IdxTy = Combined->getType();
+          if (IdxTy != Idx0->getType()) {
+            const Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
+            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
+            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
+            Combined = ConstantExpr::get(Instruction::Add, C1, C2);
+          } else {
+            Combined =
+              ConstantExpr::get(Instruction::Add, Idx0, Combined);
+          }
+        }
+
+        NewIndices.push_back(Combined);
+        NewIndices.append(Idxs+1, Idxs+NumIdx);
+        return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
+          ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
+                                                 &NewIndices[0],
+                                                 NewIndices.size()) :
+          ConstantExpr::getGetElementPtr(CE->getOperand(0),
+                                         &NewIndices[0],
+                                         NewIndices.size());
+      }
+    }
+
+    // Implement folding of:
+    //    i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+    //                        i64 0, i64 0)
+    // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
+    //
+    if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) {
+      if (const PointerType *SPT =
+          dyn_cast<PointerType>(CE->getOperand(0)->getType()))
+        if (const ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
+          if (const ArrayType *CAT =
+        dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
+            if (CAT->getElementType() == SAT->getElementType())
+              return inBounds ?
+                ConstantExpr::getInBoundsGetElementPtr(
+                      (Constant*)CE->getOperand(0), Idxs, NumIdx) :
+                ConstantExpr::getGetElementPtr(
+                      (Constant*)CE->getOperand(0), Idxs, NumIdx);
+    }
+  }
+
+  // Check to see if any array indices are not within the corresponding
+  // notional array bounds. If so, try to determine if they can be factored
+  // out into preceding dimensions.
+  bool Unknown = false;
+  SmallVector<Constant *, 8> NewIdxs;
+  const Type *Ty = C->getType();
+  const Type *Prev = 0;
+  for (unsigned i = 0; i != NumIdx;
+       Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+        if (ATy->getNumElements() <= INT64_MAX &&
+            ATy->getNumElements() != 0 &&
+            CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
+          if (isa<SequentialType>(Prev)) {
+            // It's out of range, but we can factor it into the prior
+            // dimension.
+            NewIdxs.resize(NumIdx);
+            ConstantInt *Factor = ConstantInt::get(CI->getType(),
+                                                   ATy->getNumElements());
+            NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
+
+            Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
+            Constant *Div = ConstantExpr::getSDiv(CI, Factor);
+
+            // Before adding, extend both operands to i64 to avoid
+            // overflow trouble.
+            if (!PrevIdx->getType()->isIntegerTy(64))
+              PrevIdx = ConstantExpr::getSExt(PrevIdx,
+                                           Type::getInt64Ty(Div->getContext()));
+            if (!Div->getType()->isIntegerTy(64))
+              Div = ConstantExpr::getSExt(Div,
+                                          Type::getInt64Ty(Div->getContext()));
+
+            NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
+          } else {
+            // It's out of range, but the prior dimension is a struct
+            // so we can't do anything about it.
+            Unknown = true;
+          }
+        }
+    } else {
+      // We don't know if it's in range or not.
+      Unknown = true;
+    }
+  }
+
+  // If we did any factoring, start over with the adjusted indices.
+  if (!NewIdxs.empty()) {
+    for (unsigned i = 0; i != NumIdx; ++i)
+      if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
+    return inBounds ?
+      ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(),
+                                             NewIdxs.size()) :
+      ConstantExpr::getGetElementPtr(C, NewIdxs.data(), NewIdxs.size());
+  }
+
+  // If all indices are known integers and normalized, we can do a simple
+  // check for the "inbounds" property.
+  if (!Unknown && !inBounds &&
+      isa<GlobalVariable>(C) && isInBoundsIndices(Idxs, NumIdx))
+    return ConstantExpr::getInBoundsGetElementPtr(C, Idxs, NumIdx);
+
+  return 0;
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          Constant* const *Idxs,
+                                          unsigned NumIdx) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          Value* const *Idxs,
+                                          unsigned NumIdx) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
diff --git a/contrib/llvm/lib/VMCore/ConstantFold.h b/contrib/llvm/lib/VMCore/ConstantFold.h
new file mode 100644
index 000000000000..653a1c3f377d
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ConstantFold.h
@@ -0,0 +1,56 @@
+//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the (internal) constant folding interfaces for LLVM.  These
+// interfaces are used by the ConstantExpr::get* methods to automatically fold
+// constants when possible.
+//
+// These operators may return a null object if they don't know how to perform
+// the specified operation on the specified constant types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CONSTANTFOLDING_H
+#define CONSTANTFOLDING_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+  class Value;
+  class Constant;
+  class Type;
+
+  // Constant fold various types of instruction...
+  Constant *ConstantFoldCastInstruction(
+    unsigned opcode,     ///< The opcode of the cast
+    Constant *V,         ///< The source constant
+    const Type *DestTy   ///< The destination type
+  );
+  Constant *ConstantFoldSelectInstruction(Constant *Cond,
+                                          Constant *V1, Constant *V2);
+  Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
+  Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt,
+                                                 Constant *Idx);
+  Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
+                                                 Constant *Mask);
+  Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
+                                                ArrayRef<unsigned> Idxs);
+  Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
+                                               ArrayRef<unsigned> Idxs);
+  Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
+                                          Constant *V2);
+  Constant *ConstantFoldCompareInstruction(unsigned short predicate, 
+                                           Constant *C1, Constant *C2);
+  Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+                                      Constant* const *Idxs, unsigned NumIdx);
+  Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+                                      Value* const *Idxs, unsigned NumIdx);
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/Constants.cpp b/contrib/llvm/lib/VMCore/Constants.cpp
new file mode 100644
index 000000000000..316c8846f94f
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Constants.cpp
@@ -0,0 +1,2173 @@
+//===-- Constants.cpp - Implement Constant nodes --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Constant* classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "LLVMContextImpl.h"
+#include "ConstantFold.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                              Constant Class
+//===----------------------------------------------------------------------===//
+
+bool Constant::isNegativeZeroValue() const {
+  // Floating point values have an explicit -0.0 value.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && CFP->isNegative();
+  
+  // Otherwise, just use +0.0.
+  return isNullValue();
+}
+
+bool Constant::isNullValue() const {
+  // 0 is null.
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->isZero();
+  
+  // +0.0 is null.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && !CFP->isNegative();
+
+  // constant zero is zero for aggregates and cpnull is null for pointers.
+  return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+}
+
+// Constructor to create a '0' constant of arbitrary type...
+Constant *Constant::getNullValue(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    return ConstantInt::get(Ty, 0);
+  case Type::FloatTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEsingle));
+  case Type::DoubleTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEdouble));
+  case Type::X86_FP80TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::x87DoubleExtended));
+  case Type::FP128TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEquad));
+  case Type::PPC_FP128TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat(APInt::getNullValue(128)));
+  case Type::PointerTyID:
+    return ConstantPointerNull::get(cast<PointerType>(Ty));
+  case Type::StructTyID:
+  case Type::ArrayTyID:
+  case Type::VectorTyID:
+    return ConstantAggregateZero::get(Ty);
+  default:
+    // Function, Label, or Opaque type?
+    assert(!"Cannot create a null constant of that type!");
+    return 0;
+  }
+}
+
+Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) {
+  const Type *ScalarTy = Ty->getScalarType();
+
+  // Create the base integer constant.
+  Constant *C = ConstantInt::get(Ty->getContext(), V);
+
+  // Convert an integer to a pointer, if necessary.
+  if (const PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
+    C = ConstantExpr::getIntToPtr(C, PTy);
+
+  // Broadcast a scalar to a vector, if necessary.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+Constant *Constant::getAllOnesValue(const Type *Ty) {
+  if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+    return ConstantInt::get(Ty->getContext(),
+                            APInt::getAllOnesValue(ITy->getBitWidth()));
+
+  if (Ty->isFloatingPointTy()) {
+    APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
+                                          !Ty->isPPC_FP128Ty());
+    return ConstantFP::get(Ty->getContext(), FL);
+  }
+
+  SmallVector<Constant*, 16> Elts;
+  const VectorType *VTy = cast<VectorType>(Ty);
+  Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
+  assert(Elts[0] && "Not a vector integer type!");
+  return cast<ConstantVector>(ConstantVector::get(Elts));
+}
+
+void Constant::destroyConstantImpl() {
+  // When a Constant is destroyed, there may be lingering
+  // references to the constant by other constants in the constant pool.  These
+  // constants are implicitly dependent on the module that is being deleted,
+  // but they don't know that.  Because we only find out when the CPV is
+  // deleted, we must now notify all of our users (that should only be
+  // Constants) that they are, in fact, invalid now and should be deleted.
+  //
+  while (!use_empty()) {
+    Value *V = use_back();
+#ifndef NDEBUG      // Only in -g mode...
+    if (!isa<Constant>(V)) {
+      dbgs() << "While deleting: " << *this
+             << "\n\nUse still stuck around after Def is destroyed: "
+             << *V << "\n\n";
+    }
+#endif
+    assert(isa<Constant>(V) && "References remain to Constant being destroyed");
+    Constant *CV = cast<Constant>(V);
+    CV->destroyConstant();
+
+    // The constant should remove itself from our use list...
+    assert((use_empty() || use_back() != V) && "Constant not removed!");
+  }
+
+  // Value has no outstanding references it is safe to delete it now...
+  delete this;
+}
+
+/// canTrap - Return true if evaluation of this constant could trap.  This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+  assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+  // The only thing that could possibly trap are constant exprs.
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
+  if (!CE) return false;
+  
+  // ConstantExpr traps if any operands can trap. 
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (CE->getOperand(i)->canTrap()) 
+      return true;
+
+  // Otherwise, only specific operations can trap.
+  switch (CE->getOpcode()) {
+  default:
+    return false;
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+    // Div and rem can trap if the RHS is not known to be non-zero.
+    if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
+      return true;
+    return false;
+  }
+}
+
+/// isConstantUsed - Return true if the constant has users other than constant
+/// exprs and other dangling things.
+bool Constant::isConstantUsed() const {
+  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    const Constant *UC = dyn_cast<Constant>(*UI);
+    if (UC == 0 || isa<GlobalValue>(UC))
+      return true;
+    
+    if (UC->isConstantUsed())
+      return true;
+  }
+  return false;
+}
+
+
+
+/// getRelocationInfo - This method classifies the entry according to
+/// whether or not it may generate a relocation entry.  This must be
+/// conservative, so if it might codegen to a relocatable entry, it should say
+/// so.  The return values are:
+/// 
+///  NoRelocation: This constant pool entry is guaranteed to never have a
+///     relocation applied to it (because it holds a simple constant like
+///     '4').
+///  LocalRelocation: This entry has relocations, but the entries are
+///     guaranteed to be resolvable by the static linker, so the dynamic
+///     linker will never see them.
+///  GlobalRelocations: This entry may have arbitrary relocations.
+///
+/// FIXME: This really should not be in VMCore.
+Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return LocalRelocation;  // Local to this file/library.
+    return GlobalRelocations;    // Global reference.
+  }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
+    return BA->getFunction()->getRelocationInfo();
+  
+  // While raw uses of blockaddress need to be relocated, differences between
+  // two of them don't when they are for labels in the same function.  This is a
+  // common idiom when creating a table for the indirect goto extension, so we
+  // handle it efficiently here.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
+    if (CE->getOpcode() == Instruction::Sub) {
+      ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
+      ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
+      if (LHS && RHS &&
+          LHS->getOpcode() == Instruction::PtrToInt &&
+          RHS->getOpcode() == Instruction::PtrToInt &&
+          isa<BlockAddress>(LHS->getOperand(0)) &&
+          isa<BlockAddress>(RHS->getOperand(0)) &&
+          cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
+            cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+        return NoRelocation;
+    }
+  
+  PossibleRelocationsTy Result = NoRelocation;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    Result = std::max(Result,
+                      cast<Constant>(getOperand(i))->getRelocationInfo());
+  
+  return Result;
+}
+
+
+/// getVectorElements - This method, which is only valid on constant of vector
+/// type, returns the elements of the vector in the specified smallvector.
+/// This handles breaking down a vector undef into undef elements, etc.  For
+/// constant exprs and other cases we can't handle, we return an empty vector.
+void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
+  assert(getType()->isVectorTy() && "Not a vector constant!");
+  
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
+    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
+      Elts.push_back(CV->getOperand(i));
+    return;
+  }
+  
+  const VectorType *VT = cast<VectorType>(getType());
+  if (isa<ConstantAggregateZero>(this)) {
+    Elts.assign(VT->getNumElements(), 
+                Constant::getNullValue(VT->getElementType()));
+    return;
+  }
+  
+  if (isa<UndefValue>(this)) {
+    Elts.assign(VT->getNumElements(), UndefValue::get(VT->getElementType()));
+    return;
+  }
+  
+  // Unknown type, must be constant expr etc.
+}
+
+
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it.  This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+  if (isa<GlobalValue>(C)) return false; // Cannot remove this
+  
+  while (!C->use_empty()) {
+    const Constant *User = dyn_cast<Constant>(C->use_back());
+    if (!User) return false; // Non-constant usage;
+    if (!removeDeadUsersOfConstant(User))
+      return false; // Constant wasn't dead
+  }
+  
+  const_cast<Constant*>(C)->destroyConstant();
+  return true;
+}
+
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this constant, remove them.  This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void Constant::removeDeadConstantUsers() const {
+  Value::const_use_iterator I = use_begin(), E = use_end();
+  Value::const_use_iterator LastNonDeadUser = E;
+  while (I != E) {
+    const Constant *User = dyn_cast<Constant>(*I);
+    if (User == 0) {
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+    
+    if (!removeDeadUsersOfConstant(User)) {
+      // If the constant wasn't dead, remember that this was the last live use
+      // and move on to the next constant.
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+    
+    // If the constant was dead, then the iterator is invalidated.
+    if (LastNonDeadUser == E) {
+      I = use_begin();
+      if (I == E) break;
+    } else {
+      I = LastNonDeadUser;
+      ++I;
+    }
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                                ConstantInt
+//===----------------------------------------------------------------------===//
+
+ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
+  : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
+  assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
+}
+
+ConstantInt *ConstantInt::getTrue(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  if (!pImpl->TheTrueVal)
+    pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
+  return pImpl->TheTrueVal;
+}
+
+ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  if (!pImpl->TheFalseVal)
+    pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
+  return pImpl->TheFalseVal;
+}
+
+Constant *ConstantInt::getTrue(const Type *Ty) {
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
+    return ConstantInt::getTrue(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "True must be vector of i1 or i1.");
+  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+                                   ConstantInt::getTrue(Ty->getContext()));
+  return ConstantVector::get(Splat);
+}
+
+Constant *ConstantInt::getFalse(const Type *Ty) {
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
+    return ConstantInt::getFalse(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "False must be vector of i1 or i1.");
+  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+                                   ConstantInt::getFalse(Ty->getContext()));
+  return ConstantVector::get(Splat);
+}
+
+
+// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
+// as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
+// operator== and operator!= to ensure that the DenseMap doesn't attempt to
+// compare APInt's of different widths, which would violate an APInt class
+// invariant which generates an assertion.
+ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
+  // Get the corresponding integer type for the bit width of the value.
+  const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
+  // get an existing value or the insertion position
+  DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+  ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; 
+  if (!Slot) Slot = new ConstantInt(ITy, V);
+  return Slot;
+}
+
+Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(SmallVector<Constant*,
+                                           16>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+ConstantInt* ConstantInt::get(const IntegerType* Ty, uint64_t V, 
+                              bool isSigned) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+ConstantInt* ConstantInt::getSigned(const IntegerType* Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant *ConstantInt::get(const Type* Ty, const APInt& V) {
+  ConstantInt *C = get(Ty->getContext(), V);
+  assert(C->getType() == Ty->getScalarType() &&
+         "ConstantInt type doesn't match the type implied by its value!");
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str,
+                              uint8_t radix) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
+}
+
+//===----------------------------------------------------------------------===//
+//                                ConstantFP
+//===----------------------------------------------------------------------===//
+
+static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
+  if (Ty->isFloatTy())
+    return &APFloat::IEEEsingle;
+  if (Ty->isDoubleTy())
+    return &APFloat::IEEEdouble;
+  if (Ty->isX86_FP80Ty())
+    return &APFloat::x87DoubleExtended;
+  else if (Ty->isFP128Ty())
+    return &APFloat::IEEEquad;
+  
+  assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
+  return &APFloat::PPCDoubleDouble;
+}
+
+/// get() - This returns a constant fp for the specified value in the
+/// specified type.  This should only be used for simple constant values like
+/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
+Constant *ConstantFP::get(const Type* Ty, double V) {
+  LLVMContext &Context = Ty->getContext();
+  
+  APFloat FV(V);
+  bool ignored;
+  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+
+Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
+  LLVMContext &Context = Ty->getContext();
+
+  APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+  return C; 
+}
+
+
+ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
+  LLVMContext &Context = Ty->getContext();
+  APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+  apf.changeSign();
+  return get(Context, apf);
+}
+
+
+Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) {
+  if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
+    if (PTy->getElementType()->isFloatingPointTy()) {
+      SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
+                           getNegativeZero(PTy->getElementType()));
+      return ConstantVector::get(zeros);
+    }
+
+  if (Ty->isFloatingPointTy()) 
+    return getNegativeZero(Ty);
+
+  return Constant::getNullValue(Ty);
+}
+
+
+// ConstantFP accessors.
+ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
+  DenseMapAPFloatKeyInfo::KeyTy Key(V);
+  
+  LLVMContextImpl* pImpl = Context.pImpl;
+  
+  ConstantFP *&Slot = pImpl->FPConstants[Key];
+    
+  if (!Slot) {
+    const Type *Ty;
+    if (&V.getSemantics() == &APFloat::IEEEsingle)
+      Ty = Type::getFloatTy(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEdouble)
+      Ty = Type::getDoubleTy(Context);
+    else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+      Ty = Type::getX86_FP80Ty(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEquad)
+      Ty = Type::getFP128Ty(Context);
+    else {
+      assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
+             "Unknown FP format");
+      Ty = Type::getPPC_FP128Ty(Context);
+    }
+    Slot = new ConstantFP(Ty, V);
+  }
+  
+  return Slot;
+}
+
+ConstantFP *ConstantFP::getInfinity(const Type *Ty, bool Negative) {
+  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
+  return ConstantFP::get(Ty->getContext(),
+                         APFloat::getInf(Semantics, Negative));
+}
+
+ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
+  : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+  assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+         "FP type Mismatch");
+}
+
+bool ConstantFP::isExactlyValue(const APFloat &V) const {
+  return Val.bitwiseIsEqual(V);
+}
+
+//===----------------------------------------------------------------------===//
+//                            ConstantXXX Classes
+//===----------------------------------------------------------------------===//
+
+
+ConstantArray::ConstantArray(const ArrayType *T,
+                             const std::vector<Constant*> &V)
+  : Constant(T, ConstantArrayVal,
+             OperandTraits<ConstantArray>::op_end(this) - V.size(),
+             V.size()) {
+  assert(V.size() == T->getNumElements() &&
+         "Invalid initializer vector for constant array");
+  Use *OL = OperandList;
+  for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+       I != E; ++I, ++OL) {
+    Constant *C = *I;
+    assert(C->getType() == T->getElementType() &&
+           "Initializer for array element doesn't match array element type!");
+    *OL = C;
+  }
+}
+
+Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) {
+  for (unsigned i = 0, e = V.size(); i != e; ++i) {
+    assert(V[i]->getType() == Ty->getElementType() &&
+           "Wrong type in array element initializer");
+  }
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  // If this is an all-zero array, return a ConstantAggregateZero object
+  if (!V.empty()) {
+    Constant *C = V[0];
+    if (!C->isNullValue())
+      return pImpl->ArrayConstants.getOrCreate(Ty, V);
+    
+    for (unsigned i = 1, e = V.size(); i != e; ++i)
+      if (V[i] != C)
+        return pImpl->ArrayConstants.getOrCreate(Ty, V);
+  }
+  
+  return ConstantAggregateZero::get(Ty);
+}
+
+/// ConstantArray::get(const string&) - Return an array that is initialized to
+/// contain the specified string.  If length is zero then a null terminator is 
+/// added to the specified string so that it may be used in a natural way. 
+/// Otherwise, the length parameter specifies how much of the string to use 
+/// and it won't be null terminated.
+///
+Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
+                             bool AddNull) {
+  std::vector<Constant*> ElementVals;
+  ElementVals.reserve(Str.size() + size_t(AddNull));
+  for (unsigned i = 0; i < Str.size(); ++i)
+    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
+
+  // Add a null terminator to the string...
+  if (AddNull) {
+    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
+  }
+
+  ArrayType *ATy = ArrayType::get(Type::getInt8Ty(Context), ElementVals.size());
+  return get(ATy, ElementVals);
+}
+
+/// getTypeForElements - Return an anonymous struct type to use for a constant
+/// with the specified set of elements.  The list must not be empty.
+StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
+                                               ArrayRef<Constant*> V,
+                                               bool Packed) {
+  SmallVector<Type*, 16> EltTypes;
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    EltTypes.push_back(V[i]->getType());
+  
+  return StructType::get(Context, EltTypes, Packed);
+}
+
+
+StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
+                                               bool Packed) {
+  assert(!V.empty() &&
+         "ConstantStruct::getTypeForElements cannot be called on empty list");
+  return getTypeForElements(V[0]->getContext(), V, Packed);
+}
+
+
+ConstantStruct::ConstantStruct(const StructType *T,
+                               const std::vector<Constant*> &V)
+  : Constant(T, ConstantStructVal,
+             OperandTraits<ConstantStruct>::op_end(this) - V.size(),
+             V.size()) {
+  assert((T->isOpaque() || V.size() == T->getNumElements()) &&
+         "Invalid initializer vector for constant structure");
+  Use *OL = OperandList;
+  for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+       I != E; ++I, ++OL) {
+    Constant *C = *I;
+    assert((T->isOpaque() || C->getType() == T->getElementType(I-V.begin())) &&
+           "Initializer for struct element doesn't match struct element type!");
+    *OL = C;
+  }
+}
+
+// ConstantStruct accessors.
+Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) {
+  // Create a ConstantAggregateZero value if all elements are zeros.
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    if (!V[i]->isNullValue())
+      return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
+
+  assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
+         "Incorrect # elements specified to ConstantStruct::get");
+  return ConstantAggregateZero::get(ST);
+}
+
+Constant* ConstantStruct::get(const StructType *T, ...) {
+  va_list ap;
+  SmallVector<Constant*, 8> Values;
+  va_start(ap, T);
+  while (Constant *Val = va_arg(ap, llvm::Constant*))
+    Values.push_back(Val);
+  va_end(ap);
+  return get(T, Values);
+}
+
+ConstantVector::ConstantVector(const VectorType *T,
+                               const std::vector<Constant*> &V)
+  : Constant(T, ConstantVectorVal,
+             OperandTraits<ConstantVector>::op_end(this) - V.size(),
+             V.size()) {
+  Use *OL = OperandList;
+  for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+       I != E; ++I, ++OL) {
+    Constant *C = *I;
+    assert(C->getType() == T->getElementType() &&
+           "Initializer for vector element doesn't match vector element type!");
+    *OL = C;
+  }
+}
+
+// ConstantVector accessors.
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
+  assert(!V.empty() && "Vectors can't be empty");
+  const VectorType *T = VectorType::get(V.front()->getType(), V.size());
+  LLVMContextImpl *pImpl = T->getContext().pImpl;
+
+  // If this is an all-undef or all-zero vector, return a
+  // ConstantAggregateZero or UndefValue.
+  Constant *C = V[0];
+  bool isZero = C->isNullValue();
+  bool isUndef = isa<UndefValue>(C);
+
+  if (isZero || isUndef) {
+    for (unsigned i = 1, e = V.size(); i != e; ++i)
+      if (V[i] != C) {
+        isZero = isUndef = false;
+        break;
+      }
+  }
+  
+  if (isZero)
+    return ConstantAggregateZero::get(T);
+  if (isUndef)
+    return UndefValue::get(T);
+    
+  return pImpl->VectorConstants.getOrCreate(T, V);
+}
+
+// Utility function for determining if a ConstantExpr is a CastOp or not. This
+// can't be inline because we don't want to #include Instruction.h into
+// Constant.h
+bool ConstantExpr::isCast() const {
+  return Instruction::isCast(getOpcode());
+}
+
+bool ConstantExpr::isCompare() const {
+  return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
+}
+
+bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
+  if (getOpcode() != Instruction::GetElementPtr) return false;
+
+  gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
+  User::const_op_iterator OI = llvm::next(this->op_begin());
+
+  // Skip the first index, as it has no static limit.
+  ++GEPI;
+  ++OI;
+
+  // The remaining indices must be compile-time known integers within the
+  // bounds of the corresponding notional static array types.
+  for (; GEPI != E; ++GEPI, ++OI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
+    if (!CI) return false;
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
+      if (CI->getValue().getActiveBits() > 64 ||
+          CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+  }
+
+  // All the indices checked out.
+  return true;
+}
+
+bool ConstantExpr::hasIndices() const {
+  return getOpcode() == Instruction::ExtractValue ||
+         getOpcode() == Instruction::InsertValue;
+}
+
+ArrayRef<unsigned> ConstantExpr::getIndices() const {
+  if (const ExtractValueConstantExpr *EVCE =
+        dyn_cast<ExtractValueConstantExpr>(this))
+    return EVCE->Indices;
+
+  return cast<InsertValueConstantExpr>(this)->Indices;
+}
+
+unsigned ConstantExpr::getPredicate() const {
+  assert(isCompare());
+  return ((const CompareConstantExpr*)this)->predicate;
+}
+
+/// getWithOperandReplaced - Return a constant expression identical to this
+/// one, but with the specified operand set to the specified value.
+Constant *
+ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
+  assert(OpNo < getNumOperands() && "Operand num is out of range!");
+  assert(Op->getType() == getOperand(OpNo)->getType() &&
+         "Replacing operand with value of different type!");
+  if (getOperand(OpNo) == Op)
+    return const_cast<ConstantExpr*>(this);
+  
+  Constant *Op0, *Op1, *Op2;
+  switch (getOpcode()) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    return ConstantExpr::getCast(getOpcode(), Op, getType());
+  case Instruction::Select:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    Op2 = (OpNo == 2) ? Op : getOperand(2);
+    return ConstantExpr::getSelect(Op0, Op1, Op2);
+  case Instruction::InsertElement:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    Op2 = (OpNo == 2) ? Op : getOperand(2);
+    return ConstantExpr::getInsertElement(Op0, Op1, Op2);
+  case Instruction::ExtractElement:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    return ConstantExpr::getExtractElement(Op0, Op1);
+  case Instruction::ShuffleVector:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    Op2 = (OpNo == 2) ? Op : getOperand(2);
+    return ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+  case Instruction::GetElementPtr: {
+    SmallVector<Constant*, 8> Ops;
+    Ops.resize(getNumOperands()-1);
+    for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
+      Ops[i-1] = getOperand(i);
+    if (OpNo == 0)
+      return cast<GEPOperator>(this)->isInBounds() ?
+        ConstantExpr::getInBoundsGetElementPtr(Op, &Ops[0], Ops.size()) :
+        ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size());
+    Ops[OpNo-1] = Op;
+    return cast<GEPOperator>(this)->isInBounds() ?
+      ConstantExpr::getInBoundsGetElementPtr(getOperand(0), &Ops[0],Ops.size()):
+      ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size());
+  }
+  default:
+    assert(getNumOperands() == 2 && "Must be binary operator?");
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    return ConstantExpr::get(getOpcode(), Op0, Op1, SubclassOptionalData);
+  }
+}
+
+/// getWithOperands - This returns the current constant expression with the
+/// operands replaced with the specified values.  The specified array must
+/// have the same number of operands as our current one.
+Constant *ConstantExpr::
+getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const {
+  assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
+  bool AnyChange = Ty != getType();
+  for (unsigned i = 0; i != Ops.size(); ++i)
+    AnyChange |= Ops[i] != getOperand(i);
+  
+  if (!AnyChange)  // No operands changed, return self.
+    return const_cast<ConstantExpr*>(this);
+
+  switch (getOpcode()) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
+  case Instruction::Select:
+    return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+  case Instruction::InsertElement:
+    return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::ShuffleVector:
+    return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+  case Instruction::GetElementPtr:
+    return cast<GEPOperator>(this)->isInBounds() ?
+      ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], Ops.size()-1) :
+      ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], Ops.size()-1);
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
+  default:
+    assert(getNumOperands() == 2 && "Must be binary operator?");
+    return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      isValueValidForType implementations
+
+bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
+  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+  if (Ty == Type::getInt1Ty(Ty->getContext()))
+    return Val == 0 || Val == 1;
+  if (NumBits >= 64)
+    return true; // always true, has to fit in largest type
+  uint64_t Max = (1ll << NumBits) - 1;
+  return Val <= Max;
+}
+
+bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) {
+  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+  if (Ty == Type::getInt1Ty(Ty->getContext()))
+    return Val == 0 || Val == 1 || Val == -1;
+  if (NumBits >= 64)
+    return true; // always true, has to fit in largest type
+  int64_t Min = -(1ll << (NumBits-1));
+  int64_t Max = (1ll << (NumBits-1)) - 1;
+  return (Val >= Min && Val <= Max);
+}
+
+bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
+  // convert modifies in place, so make a copy.
+  APFloat Val2 = APFloat(Val);
+  bool losesInfo;
+  switch (Ty->getTypeID()) {
+  default:
+    return false;         // These can't be represented as floating point!
+
+  // FIXME rounding mode needs to be more flexible
+  case Type::FloatTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEsingle)
+      return true;
+    Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
+  case Type::DoubleTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEsingle ||
+        &Val2.getSemantics() == &APFloat::IEEEdouble)
+      return true;
+    Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
+  case Type::X86_FP80TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::x87DoubleExtended;
+  case Type::FP128TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::IEEEquad;
+  case Type::PPC_FP128TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                      Factory Function Implementation
+
+ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
+  assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
+         "Cannot create an aggregate zero of non-aggregate type!");
+  
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  return pImpl->AggZeroConstants.getOrCreate(Ty, 0);
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantAggregateZero::destroyConstant() {
+  getType()->getContext().pImpl->AggZeroConstants.remove(this);
+  destroyConstantImpl();
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantArray::destroyConstant() {
+  getType()->getContext().pImpl->ArrayConstants.remove(this);
+  destroyConstantImpl();
+}
+
+/// isString - This method returns true if the array is an array of i8, and 
+/// if the elements of the array are all ConstantInt's.
+bool ConstantArray::isString() const {
+  // Check the element type for i8...
+  if (!getType()->getElementType()->isIntegerTy(8))
+    return false;
+  // Check the elements to make sure they are all integers, not constant
+  // expressions.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (!isa<ConstantInt>(getOperand(i)))
+      return false;
+  return true;
+}
+
+/// isCString - This method returns true if the array is a string (see
+/// isString) and it ends in a null byte \\0 and does not contains any other
+/// null bytes except its terminator.
+bool ConstantArray::isCString() const {
+  // Check the element type for i8...
+  if (!getType()->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Last element must be a null.
+  if (!getOperand(getNumOperands()-1)->isNullValue())
+    return false;
+  // Other elements must be non-null integers.
+  for (unsigned i = 0, e = getNumOperands()-1; i != e; ++i) {
+    if (!isa<ConstantInt>(getOperand(i)))
+      return false;
+    if (getOperand(i)->isNullValue())
+      return false;
+  }
+  return true;
+}
+
+
+/// convertToString - Helper function for getAsString() and getAsCString().
+static std::string convertToString(const User *U, unsigned len) {
+  std::string Result;
+  Result.reserve(len);
+  for (unsigned i = 0; i != len; ++i)
+    Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue());
+  return Result;
+}
+
+/// getAsString - If this array is isString(), then this method converts the
+/// array to an std::string and returns it.  Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsString() const {
+  assert(isString() && "Not a string!");
+  return convertToString(this, getNumOperands());
+}
+
+
+/// getAsCString - If this array is isCString(), then this method converts the
+/// array (without the trailing null byte) to an std::string and returns it.
+/// Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsCString() const {
+  assert(isCString() && "Not a string!");
+  return convertToString(this, getNumOperands() - 1);
+}
+
+
+//---- ConstantStruct::get() implementation...
+//
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantStruct::destroyConstant() {
+  getType()->getContext().pImpl->StructConstants.remove(this);
+  destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantVector::destroyConstant() {
+  getType()->getContext().pImpl->VectorConstants.remove(this);
+  destroyConstantImpl();
+}
+
+/// This function will return true iff every element in this vector constant
+/// is set to all ones.
+/// @returns true iff this constant's elements are all set to all ones.
+/// @brief Determine if the value is all ones.
+bool ConstantVector::isAllOnesValue() const {
+  // Check out first element.
+  const Constant *Elt = getOperand(0);
+  const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+  if (!CI || !CI->isAllOnesValue()) return false;
+  // Then make sure all remaining elements point to the same value.
+  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+    if (getOperand(I) != Elt)
+      return false;
+  
+  return true;
+}
+
+/// getSplatValue - If this is a splat constant, where all of the
+/// elements have the same value, return that value. Otherwise return null.
+Constant *ConstantVector::getSplatValue() const {
+  // Check out first element.
+  Constant *Elt = getOperand(0);
+  // Then make sure all remaining elements point to the same value.
+  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+    if (getOperand(I) != Elt)
+      return 0;
+  return Elt;
+}
+
+//---- ConstantPointerNull::get() implementation.
+//
+
+ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+  return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantPointerNull::destroyConstant() {
+  getType()->getContext().pImpl->NullPtrConstants.remove(this);
+  destroyConstantImpl();
+}
+
+
+//---- UndefValue::get() implementation.
+//
+
+UndefValue *UndefValue::get(const Type *Ty) {
+  return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void UndefValue::destroyConstant() {
+  getType()->getContext().pImpl->UndefValueConstants.remove(this);
+  destroyConstantImpl();
+}
+
+//---- BlockAddress::get() implementation.
+//
+
+BlockAddress *BlockAddress::get(BasicBlock *BB) {
+  assert(BB->getParent() != 0 && "Block must have a parent");
+  return get(BB->getParent(), BB);
+}
+
+BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
+  BlockAddress *&BA =
+    F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
+  if (BA == 0)
+    BA = new BlockAddress(F, BB);
+  
+  assert(BA->getFunction() == F && "Basic block moved between functions");
+  return BA;
+}
+
+BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
+: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal,
+           &Op<0>(), 2) {
+  setOperand(0, F);
+  setOperand(1, BB);
+  BB->AdjustBlockAddressRefCount(1);
+}
+
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void BlockAddress::destroyConstant() {
+  getFunction()->getType()->getContext().pImpl
+    ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
+  getBasicBlock()->AdjustBlockAddressRefCount(-1);
+  destroyConstantImpl();
+}
+
+void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+  // This could be replacing either the Basic Block or the Function.  In either
+  // case, we have to remove the map entry.
+  Function *NewF = getFunction();
+  BasicBlock *NewBB = getBasicBlock();
+  
+  if (U == &Op<0>())
+    NewF = cast<Function>(To);
+  else
+    NewBB = cast<BasicBlock>(To);
+  
+  // See if the 'new' entry already exists, if not, just update this in place
+  // and return early.
+  BlockAddress *&NewBA =
+    getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
+  if (NewBA == 0) {
+    getBasicBlock()->AdjustBlockAddressRefCount(-1);
+    
+    // Remove the old entry, this can't cause the map to rehash (just a
+    // tombstone will get added).
+    getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
+                                                            getBasicBlock()));
+    NewBA = this;
+    setOperand(0, NewF);
+    setOperand(1, NewBB);
+    getBasicBlock()->AdjustBlockAddressRefCount(1);
+    return;
+  }
+
+  // Otherwise, I do need to replace this with an existing value.
+  assert(NewBA != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(NewBA);
+  
+  destroyConstant();
+}
+
+//---- ConstantExpr::get() implementations.
+//
+
+/// This is a utility function to handle folding of casts and lookup of the
+/// cast in the ExprConstants map. It is used by the various get* methods below.
+static inline Constant *getFoldedCast(
+  Instruction::CastOps opc, Constant *C, const Type *Ty) {
+  assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+  // Fold a few common cases
+  if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
+    return FC;
+
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> argVec(1, C);
+  ExprMapKeyType Key(opc, argVec);
+  
+  return pImpl->ExprConstants.getOrCreate(Ty, Key);
+}
+ 
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
+  Instruction::CastOps opc = Instruction::CastOps(oc);
+  assert(Instruction::isCast(opc) && "opcode out of range");
+  assert(C && Ty && "Null arguments to getCast");
+  assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
+
+  switch (opc) {
+  default:
+    llvm_unreachable("Invalid cast opcode");
+    break;
+  case Instruction::Trunc:    return getTrunc(C, Ty);
+  case Instruction::ZExt:     return getZExt(C, Ty);
+  case Instruction::SExt:     return getSExt(C, Ty);
+  case Instruction::FPTrunc:  return getFPTrunc(C, Ty);
+  case Instruction::FPExt:    return getFPExtend(C, Ty);
+  case Instruction::UIToFP:   return getUIToFP(C, Ty);
+  case Instruction::SIToFP:   return getSIToFP(C, Ty);
+  case Instruction::FPToUI:   return getFPToUI(C, Ty);
+  case Instruction::FPToSI:   return getFPToSI(C, Ty);
+  case Instruction::PtrToInt: return getPtrToInt(C, Ty);
+  case Instruction::IntToPtr: return getIntToPtr(C, Ty);
+  case Instruction::BitCast:  return getBitCast(C, Ty);
+  }
+  return 0;
+} 
+
+Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getZExt(C, Ty);
+}
+
+Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getSExt(C, Ty);
+}
+
+Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getTrunc(C, Ty);
+}
+
+Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
+
+  if (Ty->isIntegerTy())
+    return getPtrToInt(S, Ty);
+  return getBitCast(S, Ty);
+}
+
+Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
+                                       bool isSigned) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         Ty->isIntOrIntVectorTy() && "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  if (SrcBits == DstBits)
+    return C; // Avoid a useless cast
+  Instruction::CastOps opcode =
+    (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+  return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+  assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
+  assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+         "SrcTy must be larger than DestTy for Trunc!");
+
+  return getFoldedCast(Instruction::Trunc, C, Ty);
+}
+
+Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "SrcTy must be smaller than DestTy for SExt!");
+
+  return getFoldedCast(Instruction::SExt, C, Ty);
+}
+
+Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "SrcTy must be smaller than DestTy for ZExt!");
+
+  return getFoldedCast(Instruction::ZExt, C, Ty);
+}
+
+Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+         "This is an illegal floating point truncation!");
+  return getFoldedCast(Instruction::FPTrunc, C, Ty);
+}
+
+Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "This is an illegal floating point extension!");
+  return getFoldedCast(Instruction::FPExt, C, Ty);
+}
+
+Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "This is an illegal uint to floating point cast!");
+  return getFoldedCast(Instruction::UIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "This is an illegal sint to floating point cast!");
+  return getFoldedCast(Instruction::SIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "This is an illegal floating point to uint cast!");
+  return getFoldedCast(Instruction::FPToUI, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "This is an illegal floating point to sint cast!");
+  return getFoldedCast(Instruction::FPToSI, C, Ty);
+}
+
+Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
+  assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
+  assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
+  return getFoldedCast(Instruction::PtrToInt, C, DstTy);
+}
+
+Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
+  assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
+  assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
+  return getFoldedCast(Instruction::IntToPtr, C, DstTy);
+}
+
+Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
+  assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
+         "Invalid constantexpr bitcast!");
+  
+  // It is common to ask for a bitcast of a value to its own type, handle this
+  // speedily.
+  if (C->getType() == DstTy) return C;
+  
+  return getFoldedCast(Instruction::BitCast, C, DstTy);
+}
+
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+                            unsigned Flags) {
+  // Check the operands for consistency first.
+  assert(Opcode >= Instruction::BinaryOpsBegin &&
+         Opcode <  Instruction::BinaryOpsEnd   &&
+         "Invalid opcode in binary constant expression");
+  assert(C1->getType() == C2->getType() &&
+         "Operand types in binary constant expression should match");
+  
+#ifndef NDEBUG
+  switch (Opcode) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  case Instruction::UDiv: 
+  case Instruction::SDiv: 
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::FDiv:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::URem: 
+  case Instruction::SRem: 
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::FRem:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create a logical operation on a non-integral type!");
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create a shift operation on a non-integer type!");
+    break;
+  default:
+    break;
+  }
+#endif
+
+  if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+    return FC;          // Fold a few common cases.
+  
+  std::vector<Constant*> argVec(1, C1);
+  argVec.push_back(C2);
+  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
+  
+  LLVMContextImpl *pImpl = C1->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
+}
+
+Constant *ConstantExpr::getSizeOf(const Type* Ty) {
+  // sizeof is implemented as: (i64) gep (Ty*)null, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *GEP = getGetElementPtr(
+                 Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
+  return getPtrToInt(GEP, 
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getAlignOf(const Type* Ty) {
+  // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  const Type *AligningTy = 
+    StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+  Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
+  Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
+  Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *Indices[2] = { Zero, One };
+  Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+  return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
+                                           FieldNo));
+}
+
+Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
+  // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx[] = {
+    ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
+    FieldNo
+  };
+  Constant *GEP = getGetElementPtr(
+                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2);
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getCompare(unsigned short Predicate, 
+                                   Constant *C1, Constant *C2) {
+  assert(C1->getType() == C2->getType() && "Op types should be identical!");
+  
+  switch (Predicate) {
+  default: llvm_unreachable("Invalid CmpInst predicate");
+  case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+  case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+  case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+  case CmpInst::FCMP_TRUE:
+    return getFCmp(Predicate, C1, C2);
+    
+  case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+  case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE:
+    return getICmp(Predicate, C1, C2);
+  }
+}
+
+Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
+  assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
+
+  if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+    return SC;        // Fold common cases
+
+  std::vector<Constant*> argVec(3, C);
+  argVec[1] = V1;
+  argVec[2] = V2;
+  ExprMapKeyType Key(Instruction::Select, argVec);
+  
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+                                         unsigned NumIdx, bool InBounds) {
+  if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
+    return FC;          // Fold a few common cases.
+
+  // Get the result type of the getelementptr!
+  const Type *Ty = 
+    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+  assert(Ty && "GEP indices invalid!");
+  unsigned AS = cast<PointerType>(C->getType())->getAddressSpace();
+  Type *ReqTy = Ty->getPointerTo(AS);
+  
+  assert(C->getType()->isPointerTy() &&
+         "Non-pointer type for constant GetElementPtr expression");
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.reserve(NumIdx+1);
+  ArgVec.push_back(C);
+  for (unsigned i = 0; i != NumIdx; ++i)
+    ArgVec.push_back(cast<Constant>(Idxs[i]));
+  const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
+                           InBounds ? GEPOperator::IsInBounds : 0);
+  
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *
+ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+  assert(LHS->getType() == RHS->getType());
+  assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && 
+         pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
+
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+    return FC;          // Fold a few common cases...
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.push_back(LHS);
+  ArgVec.push_back(RHS);
+  // Get the key type with both the opcode and predicate
+  const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+  const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+  if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *
+ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+  assert(LHS->getType() == RHS->getType());
+  assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
+
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+    return FC;          // Fold a few common cases...
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.push_back(LHS);
+  ArgVec.push_back(RHS);
+  // Get the key type with both the opcode and predicate
+  const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+
+  const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+  if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
+  assert(Val->getType()->isVectorTy() &&
+         "Tried to create extractelement operation on non-vector type!");
+  assert(Idx->getType()->isIntegerTy(32) &&
+         "Extractelement index must be i32 type!");
+  
+  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
+    return FC;          // Fold a few common cases.
+  
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, Val);
+  ArgVec.push_back(Idx);
+  const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+  
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  Type *ReqTy = cast<VectorType>(Val->getType())->getElementType();
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, 
+                                         Constant *Idx) {
+  assert(Val->getType()->isVectorTy() &&
+         "Tried to create insertelement operation on non-vector type!");
+  assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
+         && "Insertelement types must match!");
+  assert(Idx->getType()->isIntegerTy(32) &&
+         "Insertelement index must be i32 type!");
+
+  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+    return FC;          // Fold a few common cases.
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, Val);
+  ArgVec.push_back(Elt);
+  ArgVec.push_back(Idx);
+  const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+  
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
+}
+
+Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, 
+                                         Constant *Mask) {
+  assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector constant expr operands!");
+
+  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+    return FC;          // Fold a few common cases.
+
+  unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
+  const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+  const Type *ShufTy = VectorType::get(EltTy, NElts);
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, V1);
+  ArgVec.push_back(V2);
+  ArgVec.push_back(Mask);
+  const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  
+  LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
+}
+
+Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
+                                       ArrayRef<unsigned> Idxs) {
+  assert(ExtractValueInst::getIndexedType(Agg->getType(),
+                                          Idxs) == Val->getType() &&
+         "insertvalue indices invalid!");
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant insertvalue expression");
+  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs);
+  assert(FC && "insertvalue constant expr couldn't be folded!");
+  return FC;
+}
+
+Constant *ConstantExpr::getExtractValue(Constant *Agg,
+                                        ArrayRef<unsigned> Idxs) {
+  assert(Agg->getType()->isFirstClassType() &&
+         "Tried to create extractelement operation on non-first-class type!");
+
+  const Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
+  (void)ReqTy;
+  assert(ReqTy && "extractvalue indices invalid!");
+  
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant extractvalue expression");
+  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs);
+  assert(FC && "ExtractValue constant expr couldn't be folded!");
+  return FC;
+}
+
+Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         "Cannot NEG a nonintegral value!");
+  return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
+                C, HasNUW, HasNSW);
+}
+
+Constant *ConstantExpr::getFNeg(Constant *C) {
+  assert(C->getType()->isFPOrFPVectorTy() &&
+         "Cannot FNEG a non-floating-point value!");
+  return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+}
+
+Constant *ConstantExpr::getNot(Constant *C) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         "Cannot NOT a nonintegral value!");
+  return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
+}
+
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Add, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
+  return get(Instruction::FAdd, C1, C2);
+}
+
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Sub, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
+  return get(Instruction::FSub, C1, C2);
+}
+
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Mul, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
+  return get(Instruction::FMul, C1, C2);
+}
+
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::UDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::SDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
+  return get(Instruction::FDiv, C1, C2);
+}
+
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
+  return get(Instruction::URem, C1, C2);
+}
+
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
+  return get(Instruction::SRem, C1, C2);
+}
+
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
+  return get(Instruction::FRem, C1, C2);
+}
+
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
+  return get(Instruction::And, C1, C2);
+}
+
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
+  return get(Instruction::Or, C1, C2);
+}
+
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
+  return get(Instruction::Xor, C1, C2);
+}
+
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Shl, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::LShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::AShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantExpr::destroyConstant() {
+  getType()->getContext().pImpl->ExprConstants.remove(this);
+  destroyConstantImpl();
+}
+
+const char *ConstantExpr::getOpcodeName() const {
+  return Instruction::getOpcodeName(getOpcode());
+}
+
+
+
+GetElementPtrConstantExpr::
+GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+                          const Type *DestTy)
+  : ConstantExpr(DestTy, Instruction::GetElementPtr,
+                 OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+                 - (IdxList.size()+1), IdxList.size()+1) {
+  OperandList[0] = C;
+  for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+    OperandList[i+1] = IdxList[i];
+}
+
+
+//===----------------------------------------------------------------------===//
+//                replaceUsesOfWithOnConstant implementations
+
+/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
+/// 'From' to be uses of 'To'.  This must update the uniquing data structures
+/// etc.
+///
+/// Note that we intentionally replace all uses of From with To here.  Consider
+/// a large array that uses 'From' 1000 times.  By handling this case all here,
+/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
+/// single invocation handles all 1000 uses.  Handling them one at a time would
+/// work, but would be really slow because it would have to unique each updated
+/// array instance.
+///
+void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  Constant *ToC = cast<Constant>(To);
+
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+  std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
+  Lookup.first.first = cast<ArrayType>(getType());
+  Lookup.second = this;
+
+  std::vector<Constant*> &Values = Lookup.first.second;
+  Values.reserve(getNumOperands());  // Build replacement array.
+
+  // Fill values with the modified operands of the constant array.  Also, 
+  // compute whether this turns into an all-zeros array.
+  bool isAllZeros = false;
+  unsigned NumUpdated = 0;
+  if (!ToC->isNullValue()) {
+    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      if (Val == From) {
+        Val = ToC;
+        ++NumUpdated;
+      }
+      Values.push_back(Val);
+    }
+  } else {
+    isAllZeros = true;
+    for (Use *O = OperandList, *E = OperandList+getNumOperands();O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      if (Val == From) {
+        Val = ToC;
+        ++NumUpdated;
+      }
+      Values.push_back(Val);
+      if (isAllZeros) isAllZeros = Val->isNullValue();
+    }
+  }
+  
+  Constant *Replacement = 0;
+  if (isAllZeros) {
+    Replacement = ConstantAggregateZero::get(getType());
+  } else {
+    // Check to see if we have this array type already.
+    bool Exists;
+    LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
+      pImpl->ArrayConstants.InsertOrGetItem(Lookup, Exists);
+    
+    if (Exists) {
+      Replacement = I->second;
+    } else {
+      // Okay, the new shape doesn't exist in the system yet.  Instead of
+      // creating a new constant array, inserting it, replaceallusesof'ing the
+      // old with the new, then deleting the old... just update the current one
+      // in place!
+      pImpl->ArrayConstants.MoveConstantToNewSlot(this, I);
+      
+      // Update to the new value.  Optimize for the case when we have a single
+      // operand that we're changing, but handle bulk updates efficiently.
+      if (NumUpdated == 1) {
+        unsigned OperandToUpdate = U - OperandList;
+        assert(getOperand(OperandToUpdate) == From &&
+               "ReplaceAllUsesWith broken!");
+        setOperand(OperandToUpdate, ToC);
+      } else {
+        for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+          if (getOperand(i) == From)
+            setOperand(i, ToC);
+      }
+      return;
+    }
+  }
+ 
+  // Otherwise, I do need to replace this with an existing value.
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  Constant *ToC = cast<Constant>(To);
+
+  unsigned OperandToUpdate = U-OperandList;
+  assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
+
+  std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
+  Lookup.first.first = cast<StructType>(getType());
+  Lookup.second = this;
+  std::vector<Constant*> &Values = Lookup.first.second;
+  Values.reserve(getNumOperands());  // Build replacement struct.
+  
+  
+  // Fill values with the modified operands of the constant struct.  Also, 
+  // compute whether this turns into an all-zeros struct.
+  bool isAllZeros = false;
+  if (!ToC->isNullValue()) {
+    for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
+      Values.push_back(cast<Constant>(O->get()));
+  } else {
+    isAllZeros = true;
+    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      Values.push_back(Val);
+      if (isAllZeros) isAllZeros = Val->isNullValue();
+    }
+  }
+  Values[OperandToUpdate] = ToC;
+  
+  LLVMContextImpl *pImpl = getContext().pImpl;
+  
+  Constant *Replacement = 0;
+  if (isAllZeros) {
+    Replacement = ConstantAggregateZero::get(getType());
+  } else {
+    // Check to see if we have this struct type already.
+    bool Exists;
+    LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
+      pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
+    
+    if (Exists) {
+      Replacement = I->second;
+    } else {
+      // Okay, the new shape doesn't exist in the system yet.  Instead of
+      // creating a new constant struct, inserting it, replaceallusesof'ing the
+      // old with the new, then deleting the old... just update the current one
+      // in place!
+      pImpl->StructConstants.MoveConstantToNewSlot(this, I);
+      
+      // Update to the new value.
+      setOperand(OperandToUpdate, ToC);
+      return;
+    }
+  }
+  
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  
+  std::vector<Constant*> Values;
+  Values.reserve(getNumOperands());  // Build replacement array...
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    Constant *Val = getOperand(i);
+    if (Val == From) Val = cast<Constant>(To);
+    Values.push_back(Val);
+  }
+  
+  Constant *Replacement = get(Values);
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
+                                               Use *U) {
+  assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
+  Constant *To = cast<Constant>(ToV);
+  
+  Constant *Replacement = 0;
+  if (getOpcode() == Instruction::GetElementPtr) {
+    SmallVector<Constant*, 8> Indices;
+    Constant *Pointer = getOperand(0);
+    Indices.reserve(getNumOperands()-1);
+    if (Pointer == From) Pointer = To;
+    
+    for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+      Constant *Val = getOperand(i);
+      if (Val == From) Val = To;
+      Indices.push_back(Val);
+    }
+    Replacement = ConstantExpr::getGetElementPtr(Pointer,
+                                                 &Indices[0], Indices.size(),
+                                         cast<GEPOperator>(this)->isInBounds());
+  } else if (getOpcode() == Instruction::ExtractValue) {
+    Constant *Agg = getOperand(0);
+    if (Agg == From) Agg = To;
+    
+    ArrayRef<unsigned> Indices = getIndices();
+    Replacement = ConstantExpr::getExtractValue(Agg, Indices);
+  } else if (getOpcode() == Instruction::InsertValue) {
+    Constant *Agg = getOperand(0);
+    Constant *Val = getOperand(1);
+    if (Agg == From) Agg = To;
+    if (Val == From) Val = To;
+    
+    ArrayRef<unsigned> Indices = getIndices();
+    Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices);
+  } else if (isCast()) {
+    assert(getOperand(0) == From && "Cast only has one use!");
+    Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
+  } else if (getOpcode() == Instruction::Select) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    Constant *C3 = getOperand(2);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (C3 == From) C3 = To;
+    Replacement = ConstantExpr::getSelect(C1, C2, C3);
+  } else if (getOpcode() == Instruction::ExtractElement) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    Replacement = ConstantExpr::getExtractElement(C1, C2);
+  } else if (getOpcode() == Instruction::InsertElement) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    Constant *C3 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (C3 == From) C3 = To;
+    Replacement = ConstantExpr::getInsertElement(C1, C2, C3);
+  } else if (getOpcode() == Instruction::ShuffleVector) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    Constant *C3 = getOperand(2);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (C3 == From) C3 = To;
+    Replacement = ConstantExpr::getShuffleVector(C1, C2, C3);
+  } else if (isCompare()) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (getOpcode() == Instruction::ICmp)
+      Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2);
+    else {
+      assert(getOpcode() == Instruction::FCmp);
+      Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
+    }
+  } else if (getNumOperands() == 2) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    Replacement = ConstantExpr::get(getOpcode(), C1, C2, SubclassOptionalData);
+  } else {
+    llvm_unreachable("Unknown ConstantExpr type!");
+    return;
+  }
+  
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
diff --git a/contrib/llvm/lib/VMCore/ConstantsContext.h b/contrib/llvm/lib/VMCore/ConstantsContext.h
new file mode 100644
index 000000000000..bd134d9b892d
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ConstantsContext.h
@@ -0,0 +1,709 @@
+//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing constants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANTSCONTEXT_H
+#define LLVM_CONSTANTSCONTEXT_H
+
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+namespace llvm {
+template<class ValType>
+struct ConstantTraits;
+
+/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement unary constant exprs.
+class UnaryConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty)
+    : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
+    Op<0>() = C;
+  }
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement binary constant exprs.
+class BinaryConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
+                     unsigned Flags)
+    : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    SubclassOptionalData = Flags;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// SelectConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement select constant exprs.
+class SelectConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractelement constant exprs.
+class ExtractElementConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  ExtractElementConstantExpr(Constant *C1, Constant *C2)
+    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(), 
+                   Instruction::ExtractElement, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertelement constant exprs.
+class InsertElementConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C1->getType(), Instruction::InsertElement, 
+                   &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ShuffleVectorConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// shufflevector constant exprs.
+class ShuffleVectorConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+  : ConstantExpr(VectorType::get(
+                   cast<VectorType>(C1->getType())->getElementType(),
+                   cast<VectorType>(C3->getType())->getNumElements()),
+                 Instruction::ShuffleVector, 
+                 &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractvalue constant exprs.
+class ExtractValueConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  ExtractValueConstantExpr(Constant *Agg,
+                           const SmallVector<unsigned, 4> &IdxList,
+                           const Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+  }
+
+  /// Indices - These identify which value to extract.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertvalue constant exprs.
+class InsertValueConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  InsertValueConstantExpr(Constant *Agg, Constant *Val,
+                          const SmallVector<unsigned, 4> &IdxList,
+                          const Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+    Op<1>() = Val;
+  }
+
+  /// Indices - These identify the position for the insertion.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+
+/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
+/// used behind the scenes to implement getelementpr constant exprs.
+class GetElementPtrConstantExpr : public ConstantExpr {
+  GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+                            const Type *DestTy);
+public:
+  static GetElementPtrConstantExpr *Create(Constant *C,
+                                           const std::vector<Constant*>&IdxList,
+                                           const Type *DestTy,
+                                           unsigned Flags) {
+    GetElementPtrConstantExpr *Result =
+      new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
+    Result->SubclassOptionalData = Flags;
+    return Result;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+// CompareConstantExpr - This class is private to Constants.cpp, and is used
+// behind the scenes to implement ICmp and FCmp constant expressions. This is
+// needed in order to store the predicate value for these instructions.
+struct CompareConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  unsigned short predicate;
+  CompareConstantExpr(const Type *ty, Instruction::OtherOps opc,
+                      unsigned short pred,  Constant* LHS, Constant* RHS)
+    : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
+    Op<0>() = LHS;
+    Op<1>() = RHS;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+template <>
+struct OperandTraits<UnaryConstantExpr> :
+  public FixedNumOperandTraits<UnaryConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<BinaryConstantExpr> :
+  public FixedNumOperandTraits<BinaryConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<SelectConstantExpr> :
+  public FixedNumOperandTraits<SelectConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractElementConstantExpr> :
+  public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertElementConstantExpr> :
+  public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<ShuffleVectorConstantExpr> :
+    public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractValueConstantExpr> :
+  public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertValueConstantExpr> :
+  public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<GetElementPtrConstantExpr> :
+  public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
+
+
+template <>
+struct OperandTraits<CompareConstantExpr> :
+  public FixedNumOperandTraits<CompareConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+
+struct ExprMapKeyType {
+  ExprMapKeyType(unsigned opc,
+      ArrayRef<Constant*> ops,
+      unsigned short flags = 0,
+      unsigned short optionalflags = 0,
+      ArrayRef<unsigned> inds = ArrayRef<unsigned>())
+        : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
+        operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
+  uint8_t opcode;
+  uint8_t subclassoptionaldata;
+  uint16_t subclassdata;
+  std::vector<Constant*> operands;
+  SmallVector<unsigned, 4> indices;
+  bool operator==(const ExprMapKeyType& that) const {
+    return this->opcode == that.opcode &&
+           this->subclassdata == that.subclassdata &&
+           this->subclassoptionaldata == that.subclassoptionaldata &&
+           this->operands == that.operands &&
+           this->indices == that.indices;
+  }
+  bool operator<(const ExprMapKeyType & that) const {
+    if (this->opcode != that.opcode) return this->opcode < that.opcode;
+    if (this->operands != that.operands) return this->operands < that.operands;
+    if (this->subclassdata != that.subclassdata)
+      return this->subclassdata < that.subclassdata;
+    if (this->subclassoptionaldata != that.subclassoptionaldata)
+      return this->subclassoptionaldata < that.subclassoptionaldata;
+    if (this->indices != that.indices) return this->indices < that.indices;
+    return false;
+  }
+
+  bool operator!=(const ExprMapKeyType& that) const {
+    return !(*this == that);
+  }
+};
+
+struct InlineAsmKeyType {
+  InlineAsmKeyType(StringRef AsmString,
+                   StringRef Constraints, bool hasSideEffects,
+                   bool isAlignStack)
+    : asm_string(AsmString), constraints(Constraints),
+      has_side_effects(hasSideEffects), is_align_stack(isAlignStack) {}
+  std::string asm_string;
+  std::string constraints;
+  bool has_side_effects;
+  bool is_align_stack;
+  bool operator==(const InlineAsmKeyType& that) const {
+    return this->asm_string == that.asm_string &&
+           this->constraints == that.constraints &&
+           this->has_side_effects == that.has_side_effects &&
+           this->is_align_stack == that.is_align_stack;
+  }
+  bool operator<(const InlineAsmKeyType& that) const {
+    if (this->asm_string != that.asm_string)
+      return this->asm_string < that.asm_string;
+    if (this->constraints != that.constraints)
+      return this->constraints < that.constraints;
+    if (this->has_side_effects != that.has_side_effects)
+      return this->has_side_effects < that.has_side_effects;
+    if (this->is_align_stack != that.is_align_stack)
+      return this->is_align_stack < that.is_align_stack;
+    return false;
+  }
+
+  bool operator!=(const InlineAsmKeyType& that) const {
+    return !(*this == that);
+  }
+};
+
+// The number of operands for each ConstantCreator::create method is
+// determined by the ConstantTraits template.
+// ConstantCreator - A class that is used to create constants by
+// ConstantUniqueMap*.  This class should be partially specialized if there is
+// something strange that needs to be done to interface to the ctor for the
+// constant.
+//
+template<typename T, typename Alloc>
+struct ConstantTraits< std::vector<T, Alloc> > {
+  static unsigned uses(const std::vector<T, Alloc>& v) {
+    return v.size();
+  }
+};
+
+template<>
+struct ConstantTraits<Constant *> {
+  static unsigned uses(Constant * const & v) {
+    return 1;
+  }
+};
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator {
+  static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
+    return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+  }
+};
+
+template<class ConstantClass>
+struct ConstantKeyData {
+  typedef void ValType;
+  static ValType getValType(ConstantClass *C) {
+    llvm_unreachable("Unknown Constant type!");
+  }
+};
+
+template<>
+struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
+  static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V,
+      unsigned short pred = 0) {
+    if (Instruction::isCast(V.opcode))
+      return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
+    if ((V.opcode >= Instruction::BinaryOpsBegin &&
+         V.opcode < Instruction::BinaryOpsEnd))
+      return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
+                                    V.subclassoptionaldata);
+    if (V.opcode == Instruction::Select)
+      return new SelectConstantExpr(V.operands[0], V.operands[1], 
+                                    V.operands[2]);
+    if (V.opcode == Instruction::ExtractElement)
+      return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::InsertElement)
+      return new InsertElementConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::ShuffleVector)
+      return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::InsertValue)
+      return new InsertValueConstantExpr(V.operands[0], V.operands[1],
+                                         V.indices, Ty);
+    if (V.opcode == Instruction::ExtractValue)
+      return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
+    if (V.opcode == Instruction::GetElementPtr) {
+      std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
+      return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
+                                               V.subclassoptionaldata);
+    }
+
+    // The compare instructions are weird. We have to encode the predicate
+    // value and it is combined with the instruction opcode by multiplying
+    // the opcode by one hundred. We must decode this to get the predicate.
+    if (V.opcode == Instruction::ICmp)
+      return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::FCmp) 
+      return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    llvm_unreachable("Invalid ConstantExpr!");
+    return 0;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantExpr> {
+  typedef ExprMapKeyType ValType;
+  static ValType getValType(ConstantExpr *CE) {
+    std::vector<Constant*> Operands;
+    Operands.reserve(CE->getNumOperands());
+    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+      Operands.push_back(cast<Constant>(CE->getOperand(i)));
+    return ExprMapKeyType(CE->getOpcode(), Operands,
+        CE->isCompare() ? CE->getPredicate() : 0,
+        CE->getRawSubclassOptionalData(),
+        CE->hasIndices() ?
+          CE->getIndices() : ArrayRef<unsigned>());
+  }
+};
+
+// ConstantAggregateZero does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
+  static ConstantAggregateZero *create(const Type *Ty, const ValType &V){
+    return new ConstantAggregateZero(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantVector> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantVector *CP) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CP->getNumOperands());
+    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+      Elements.push_back(CP->getOperand(i));
+    return Elements;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantAggregateZero> {
+  typedef char ValType;
+  static ValType getValType(ConstantAggregateZero *C) {
+    return 0;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantArray> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantArray *CA) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CA->getNumOperands());
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+      Elements.push_back(cast<Constant>(CA->getOperand(i)));
+    return Elements;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantStruct> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantStruct *CS) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CS->getNumOperands());
+    for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
+      Elements.push_back(cast<Constant>(CS->getOperand(i)));
+    return Elements;
+  }
+};
+
+// ConstantPointerNull does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
+  static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){
+    return new ConstantPointerNull(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantPointerNull> {
+  typedef char ValType;
+  static ValType getValType(ConstantPointerNull *C) {
+    return 0;
+  }
+};
+
+// UndefValue does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<UndefValue, Type, ValType> {
+  static UndefValue *create(const Type *Ty, const ValType &V) {
+    return new UndefValue(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<UndefValue> {
+  typedef char ValType;
+  static ValType getValType(UndefValue *C) {
+    return 0;
+  }
+};
+
+template<>
+struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
+  static InlineAsm *create(const PointerType *Ty, const InlineAsmKeyType &Key) {
+    return new InlineAsm(Ty, Key.asm_string, Key.constraints,
+                         Key.has_side_effects, Key.is_align_stack);
+  }
+};
+
+template<>
+struct ConstantKeyData<InlineAsm> {
+  typedef InlineAsmKeyType ValType;
+  static ValType getValType(InlineAsm *Asm) {
+    return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
+                            Asm->hasSideEffects(), Asm->isAlignStack());
+  }
+};
+
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
+         bool HasLargeKey = false /*true for arrays and structs*/ >
+class ConstantUniqueMap {
+public:
+  typedef std::pair<const TypeClass*, ValType> MapKey;
+  typedef std::map<MapKey, ConstantClass *> MapTy;
+  typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
+private:
+  /// Map - This is the main map from the element descriptor to the Constants.
+  /// This is the primary way we avoid creating two of the same shape
+  /// constant.
+  MapTy Map;
+    
+  /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
+  /// from the constants to their element in Map.  This is important for
+  /// removal of constants from the array, which would otherwise have to scan
+  /// through the map with very large keys.
+  InverseMapTy InverseMap;
+
+public:
+  typename MapTy::iterator map_begin() { return Map.begin(); }
+  typename MapTy::iterator map_end() { return Map.end(); }
+
+  void freeConstants() {
+    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+         I != E; ++I) {
+      // Asserts that use_empty().
+      delete I->second;
+    }
+  }
+    
+  /// InsertOrGetItem - Return an iterator for the specified element.
+  /// If the element exists in the map, the returned iterator points to the
+  /// entry and Exists=true.  If not, the iterator points to the newly
+  /// inserted entry and returns Exists=false.  Newly inserted entries have
+  /// I->second == 0, and should be filled in.
+  typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
+                                 &InsertVal,
+                                 bool &Exists) {
+    std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
+    Exists = !IP.second;
+    return IP.first;
+  }
+    
+private:
+  typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
+    if (HasLargeKey) {
+      typename InverseMapTy::iterator IMI = InverseMap.find(CP);
+      assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
+             IMI->second->second == CP &&
+             "InverseMap corrupt!");
+      return IMI->second;
+    }
+      
+    typename MapTy::iterator I =
+      Map.find(MapKey(static_cast<const TypeClass*>(CP->getType()),
+                      ConstantKeyData<ConstantClass>::getValType(CP)));
+    if (I == Map.end() || I->second != CP) {
+      // FIXME: This should not use a linear scan.  If this gets to be a
+      // performance problem, someone should look at this.
+      for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
+        /* empty */;
+    }
+    return I;
+  }
+
+  ConstantClass *Create(const TypeClass *Ty, ValRefType V,
+                        typename MapTy::iterator I) {
+    ConstantClass* Result =
+      ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
+
+    assert(Result->getType() == Ty && "Type specified is not correct!");
+    I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.insert(std::make_pair(Result, I));
+
+    return Result;
+  }
+public:
+    
+  /// getOrCreate - Return the specified constant from the map, creating it if
+  /// necessary.
+  ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) {
+    MapKey Lookup(Ty, V);
+    ConstantClass* Result = 0;
+    
+    typename MapTy::iterator I = Map.find(Lookup);
+    // Is it in the map?  
+    if (I != Map.end())
+      Result = I->second;
+        
+    if (!Result) {
+      // If no preexisting value, create one now...
+      Result = Create(Ty, V, I);
+    }
+        
+    return Result;
+  }
+
+  void remove(ConstantClass *CP) {
+    typename MapTy::iterator I = FindExistingElement(CP);
+    assert(I != Map.end() && "Constant not found in constant table!");
+    assert(I->second == CP && "Didn't find correct element?");
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.erase(CP);
+
+    Map.erase(I);
+  }
+
+  /// MoveConstantToNewSlot - If we are about to change C to be the element
+  /// specified by I, update our internal data structures to reflect this
+  /// fact.
+  void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
+    // First, remove the old location of the specified constant in the map.
+    typename MapTy::iterator OldI = FindExistingElement(C);
+    assert(OldI != Map.end() && "Constant not found in constant table!");
+    assert(OldI->second == C && "Didn't find correct element?");
+      
+     // Remove the old entry from the map.
+    Map.erase(OldI);
+    
+    // Update the inverse map so that we know that this constant is now
+    // located at descriptor I.
+    if (HasLargeKey) {
+      assert(I->second == C && "Bad inversemap entry!");
+      InverseMap[C] = I;
+    }
+  }
+
+  void dump() const {
+    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+  }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/Core.cpp b/contrib/llvm/lib/VMCore/Core.cpp
new file mode 100644
index 000000000000..2a816e123a61
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Core.cpp
@@ -0,0 +1,2217 @@
+//===-- Core.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including the C bindings)
+// for libLLVMCore.a, which implements the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeCore(PassRegistry &Registry) {
+  initializeDominatorTreePass(Registry);
+  initializePrintModulePassPass(Registry);
+  initializePrintFunctionPassPass(Registry);
+  initializeVerifierPass(Registry);
+  initializePreVerifierPass(Registry);
+}
+
+void LLVMInitializeCore(LLVMPassRegistryRef R) {
+  initializeCore(*unwrap(R));
+}
+
+/*===-- Error handling ----------------------------------------------------===*/
+
+void LLVMDisposeMessage(char *Message) {
+  free(Message);
+}
+
+
+/*===-- Operations on contexts --------------------------------------------===*/
+
+LLVMContextRef LLVMContextCreate() {
+  return wrap(new LLVMContext());
+}
+
+LLVMContextRef LLVMGetGlobalContext() {
+  return wrap(&getGlobalContext());
+}
+
+void LLVMContextDispose(LLVMContextRef C) {
+  delete unwrap(C);
+}
+
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+                                  unsigned SLen) {
+  return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+  return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
+  return wrap(new Module(ModuleID, getGlobalContext()));
+}
+
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, 
+                                                LLVMContextRef C) {
+  return wrap(new Module(ModuleID, *unwrap(C)));
+}
+
+void LLVMDisposeModule(LLVMModuleRef M) {
+  delete unwrap(M);
+}
+
+/*--.. Data layout .........................................................--*/
+const char * LLVMGetDataLayout(LLVMModuleRef M) {
+  return unwrap(M)->getDataLayout().c_str();
+}
+
+void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple) {
+  unwrap(M)->setDataLayout(Triple);
+}
+
+/*--.. Target triple .......................................................--*/
+const char * LLVMGetTarget(LLVMModuleRef M) {
+  return unwrap(M)->getTargetTriple().c_str();
+}
+
+void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
+  unwrap(M)->setTargetTriple(Triple);
+}
+
+void LLVMDumpModule(LLVMModuleRef M) {
+  unwrap(M)->dump();
+}
+
+/*--.. Operations on inline assembler ......................................--*/
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
+  unwrap(M)->setModuleInlineAsm(StringRef(Asm));
+}
+
+
+/*--.. Operations on module contexts ......................................--*/
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
+  return wrap(&unwrap(M)->getContext());
+}
+
+
+/*===-- Operations on types -----------------------------------------------===*/
+
+/*--.. Operations on all types (mostly) ....................................--*/
+
+LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
+  switch (unwrap(Ty)->getTypeID()) {
+  default:
+    assert(false && "Unhandled TypeID.");
+  case Type::VoidTyID:
+    return LLVMVoidTypeKind;
+  case Type::FloatTyID:
+    return LLVMFloatTypeKind;
+  case Type::DoubleTyID:
+    return LLVMDoubleTypeKind;
+  case Type::X86_FP80TyID:
+    return LLVMX86_FP80TypeKind;
+  case Type::FP128TyID:
+    return LLVMFP128TypeKind;
+  case Type::PPC_FP128TyID:
+    return LLVMPPC_FP128TypeKind;
+  case Type::LabelTyID:
+    return LLVMLabelTypeKind;
+  case Type::MetadataTyID:
+    return LLVMMetadataTypeKind;
+  case Type::IntegerTyID:
+    return LLVMIntegerTypeKind;
+  case Type::FunctionTyID:
+    return LLVMFunctionTypeKind;
+  case Type::StructTyID:
+    return LLVMStructTypeKind;
+  case Type::ArrayTyID:
+    return LLVMArrayTypeKind;
+  case Type::PointerTyID:
+    return LLVMPointerTypeKind;
+  case Type::VectorTyID:
+    return LLVMVectorTypeKind;
+  case Type::X86_MMXTyID:
+    return LLVMX86_MMXTypeKind;
+  }
+}
+
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
+  return wrap(&unwrap(Ty)->getContext());
+}
+
+/*--.. Operations on integer types .........................................--*/
+
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
+  return wrap(IntegerType::get(*unwrap(C), NumBits));
+}
+
+LLVMTypeRef LLVMInt1Type(void)  {
+  return LLVMInt1TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt8Type(void)  {
+  return LLVMInt8TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt16Type(void) {
+  return LLVMInt16TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt32Type(void) {
+  return LLVMInt32TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt64Type(void) {
+  return LLVMInt64TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMIntType(unsigned NumBits) {
+  return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
+}
+
+unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
+  return unwrap<IntegerType>(IntegerTy)->getBitWidth();
+}
+
+/*--.. Operations on real types ............................................--*/
+
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
+}
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C));
+}
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
+}
+
+LLVMTypeRef LLVMFloatType(void) {
+  return LLVMFloatTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMDoubleType(void) {
+  return LLVMDoubleTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86FP80Type(void) {
+  return LLVMX86FP80TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFP128Type(void) {
+  return LLVMFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMPPCFP128Type(void) {
+  return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86MMXType(void) {
+  return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
+}
+
+/*--.. Operations on function types ........................................--*/
+
+LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
+                             LLVMTypeRef *ParamTypes, unsigned ParamCount,
+                             LLVMBool IsVarArg) {
+  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+  return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
+}
+
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
+  return unwrap<FunctionType>(FunctionTy)->isVarArg();
+}
+
+LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) {
+  return wrap(unwrap<FunctionType>(FunctionTy)->getReturnType());
+}
+
+unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) {
+  return unwrap<FunctionType>(FunctionTy)->getNumParams();
+}
+
+void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
+  FunctionType *Ty = unwrap<FunctionType>(FunctionTy);
+  for (FunctionType::param_iterator I = Ty->param_begin(),
+                                    E = Ty->param_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
+/*--.. Operations on struct types ..........................................--*/
+
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, LLVMBool Packed) {
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+  return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
+}
+
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, LLVMBool Packed) {
+  return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+                                 ElementCount, Packed);
+}
+
+LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
+{
+  return wrap(StructType::createNamed(*unwrap(C), Name));
+}
+
+void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
+                       unsigned ElementCount, LLVMBool Packed) {
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+  unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0);
+}
+
+unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->getNumElements();
+}
+
+void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
+  StructType *Ty = unwrap<StructType>(StructTy);
+  for (StructType::element_iterator I = Ty->element_begin(),
+                                    E = Ty->element_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isPacked();
+}
+
+LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isOpaque();
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getTypeByName(Name));
+}
+
+/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
+
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
+  return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
+  return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
+}
+
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
+  return wrap(VectorType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
+  return wrap(unwrap<SequentialType>(Ty)->getElementType());
+}
+
+unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
+  return unwrap<ArrayType>(ArrayTy)->getNumElements();
+}
+
+unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
+  return unwrap<PointerType>(PointerTy)->getAddressSpace();
+}
+
+unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
+  return unwrap<VectorType>(VectorTy)->getNumElements();
+}
+
+/*--.. Operations on other types ...........................................--*/
+
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C)  {
+  return wrap(Type::getVoidTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
+  return wrap(Type::getLabelTy(*unwrap(C)));
+}
+
+LLVMTypeRef LLVMVoidType(void)  {
+  return LLVMVoidTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMLabelType(void) {
+  return LLVMLabelTypeInContext(LLVMGetGlobalContext());
+}
+
+/*===-- Operations on values ----------------------------------------------===*/
+
+/*--.. Operations on all values ............................................--*/
+
+LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
+  return wrap(unwrap(Val)->getType());
+}
+
+const char *LLVMGetValueName(LLVMValueRef Val) {
+  return unwrap(Val)->getName().data();
+}
+
+void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
+  unwrap(Val)->setName(Name);
+}
+
+void LLVMDumpValue(LLVMValueRef Val) {
+  unwrap(Val)->dump();
+}
+
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
+  unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
+}
+
+int LLVMHasMetadata(LLVMValueRef Inst) {
+  return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+  return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+  unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
+/*--.. Conversion functions ................................................--*/
+
+#define LLVM_DEFINE_VALUE_CAST(name)                                       \
+  LLVMValueRef LLVMIsA##name(LLVMValueRef Val) {                           \
+    return wrap(static_cast<Value*>(dyn_cast_or_null<name>(unwrap(Val)))); \
+  }
+
+LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
+
+/*--.. Operations on Uses ..................................................--*/
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
+  Value *V = unwrap(Val);
+  Value::use_iterator I = V->use_begin();
+  if (I == V->use_end())
+    return 0;
+  return wrap(&(I.getUse()));
+}
+
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+  Use *Next = unwrap(U)->getNext();
+  if (Next)
+    return wrap(Next);
+  return 0;
+}
+
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+  return wrap(unwrap(U)->getUser());
+}
+
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+  return wrap(unwrap(U)->get());
+}
+
+/*--.. Operations on Users .................................................--*/
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
+  return wrap(unwrap<User>(Val)->getOperand(Index));
+}
+
+void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
+  unwrap<User>(Val)->setOperand(Index, unwrap(Op));
+}
+
+int LLVMGetNumOperands(LLVMValueRef Val) {
+  return unwrap<User>(Val)->getNumOperands();
+}
+
+/*--.. Operations on constants of any type .................................--*/
+
+LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
+  return wrap(Constant::getNullValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) {
+  return wrap(Constant::getAllOnesValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
+  return wrap(UndefValue::get(unwrap(Ty)));
+}
+
+LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
+  return isa<Constant>(unwrap(Ty));
+}
+
+LLVMBool LLVMIsNull(LLVMValueRef Val) {
+  if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
+    return C->isNullValue();
+  return false;
+}
+
+LLVMBool LLVMIsUndef(LLVMValueRef Val) {
+  return isa<UndefValue>(unwrap(Val));
+}
+
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
+  return
+      wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
+}
+
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen) {
+  return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+  return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count) {
+  return wrap(MDNode::get(*unwrap(C),
+                          ArrayRef<Value*>(unwrap<Value>(Vals, Count), Count)));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+  return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
+/*--.. Operations on scalar constants ......................................--*/
+
+LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
+                          LLVMBool SignExtend) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
+}
+
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+                                              unsigned NumWords,
+                                              const uint64_t Words[]) {
+    IntegerType *Ty = unwrap<IntegerType>(IntTy);
+    return wrap(ConstantInt::get(Ty->getContext(),
+                                 APInt(Ty->getBitWidth(), NumWords, Words)));
+}
+
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
+                                  uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
+                               Radix));
+}
+
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[],
+                                         unsigned SLen, uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str, SLen),
+                               Radix));
+}
+
+LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
+  return wrap(ConstantFP::get(unwrap(RealTy), N));
+}
+
+LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text)));
+}
+
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[],
+                                          unsigned SLen) {
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen)));
+}
+
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getZExtValue();
+}
+
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
+}
+
+/*--.. Operations on composite constants ...................................--*/
+
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+                                      unsigned Length,
+                                      LLVMBool DontNullTerminate) {
+  /* Inverted the sense of AddNull because ', 0)' is a
+     better mnemonic for null termination than ', 1)'. */
+  return wrap(ConstantArray::get(*unwrap(C), StringRef(Str, Length),
+                                 DontNullTerminate == 0));
+}
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, LLVMBool Packed) {
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  return wrap(ConstantStruct::getAnon(*unwrap(C),
+                                      ArrayRef<Constant*>(Elements, Count),
+                                      Packed != 0));
+}
+
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+                             LLVMBool DontNullTerminate) {
+  return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
+                                  DontNullTerminate);
+}
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+                            LLVMValueRef *ConstantVals, unsigned Length) {
+  ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
+  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
+}
+LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
+                             LLVMBool Packed) {
+  return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
+                                  Packed);
+}
+
+LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
+                                  LLVMValueRef *ConstantVals,
+                                  unsigned Count) {
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  const StructType *Ty = cast<StructType>(unwrap(StructTy));
+
+  return wrap(ConstantStruct::get(Ty, ArrayRef<Constant*>(Elements, Count)));
+}
+
+LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
+  return wrap(ConstantVector::get(ArrayRef<Constant*>(
+                            unwrap<Constant>(ScalarConstantVals, Size), Size)));
+}
+/*--.. Constant expressions ................................................--*/
+
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
+  return (LLVMOpcode)unwrap<ConstantExpr>(ConstantVal)->getOpcode();
+}
+
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
+  return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
+  return wrap(ConstantExpr::getSizeOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
+                                LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
+                                         unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
+                                  unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getICmp(Predicate,
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFCmp(Predicate,
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
+                          LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
+                                             unwrap<Constant>(ConstantIndices, 
+                                                              NumIndices),
+                                             NumIndices));
+}
+
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+                                  LLVMValueRef *ConstantIndices,
+                                  unsigned NumIndices) {
+  Constant* Val = unwrap<Constant>(ConstantVal);
+  Constant** Idxs = unwrap<Constant>(ConstantIndices, NumIndices);
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, Idxs, NumIndices));
+}
+
+LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
+                                     unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
+                                    unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
+                                    unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
+                                       unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
+                                       unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+                                     LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
+                                              unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+                                  LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+                              LLVMBool isSigned) {
+  return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType), isSigned));
+}
+
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
+                             LLVMValueRef ConstantIfTrue,
+                             LLVMValueRef ConstantIfFalse) {
+  return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
+                                      unwrap<Constant>(ConstantIfTrue),
+                                      unwrap<Constant>(ConstantIfFalse)));
+}
+
+LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
+                                     LLVMValueRef IndexConstant) {
+  return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
+                                              unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
+                                    LLVMValueRef ElementValueConstant,
+                                    LLVMValueRef IndexConstant) {
+  return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
+                                         unwrap<Constant>(ElementValueConstant),
+                                             unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
+                                    LLVMValueRef VectorBConstant,
+                                    LLVMValueRef MaskConstant) {
+  return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
+                                             unwrap<Constant>(VectorBConstant),
+                                             unwrap<Constant>(MaskConstant)));
+}
+
+LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
+                                   unsigned NumIdx) {
+  return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
+                                            ArrayRef<unsigned>(IdxList,
+                                                               NumIdx)));
+}
+
+LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
+                                  LLVMValueRef ElementValueConstant,
+                                  unsigned *IdxList, unsigned NumIdx) {
+  return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
+                                         unwrap<Constant>(ElementValueConstant),
+                                           ArrayRef<unsigned>(IdxList,
+                                                              NumIdx)));
+}
+
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+                                const char *Constraints,
+                                LLVMBool HasSideEffects,
+                                LLVMBool IsAlignStack) {
+  return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
+                             Constraints, HasSideEffects, IsAlignStack));
+}
+
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+  return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
+/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
+
+LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
+  return wrap(unwrap<GlobalValue>(Global)->getParent());
+}
+
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->isDeclaration();
+}
+
+LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
+  switch (unwrap<GlobalValue>(Global)->getLinkage()) {
+  default:
+    assert(false && "Unhandled Linkage Type.");
+  case GlobalValue::ExternalLinkage:
+    return LLVMExternalLinkage;
+  case GlobalValue::AvailableExternallyLinkage:
+    return LLVMAvailableExternallyLinkage;
+  case GlobalValue::LinkOnceAnyLinkage:
+    return LLVMLinkOnceAnyLinkage;
+  case GlobalValue::LinkOnceODRLinkage:
+    return LLVMLinkOnceODRLinkage;
+  case GlobalValue::WeakAnyLinkage:
+    return LLVMWeakAnyLinkage;
+  case GlobalValue::WeakODRLinkage:
+    return LLVMWeakODRLinkage;
+  case GlobalValue::AppendingLinkage:
+    return LLVMAppendingLinkage;
+  case GlobalValue::InternalLinkage:
+    return LLVMInternalLinkage;
+  case GlobalValue::PrivateLinkage:
+    return LLVMPrivateLinkage;
+  case GlobalValue::LinkerPrivateLinkage:
+    return LLVMLinkerPrivateLinkage;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    return LLVMLinkerPrivateWeakLinkage;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    return LLVMLinkerPrivateWeakDefAutoLinkage;
+  case GlobalValue::DLLImportLinkage:
+    return LLVMDLLImportLinkage;
+  case GlobalValue::DLLExportLinkage:
+    return LLVMDLLExportLinkage;
+  case GlobalValue::ExternalWeakLinkage:
+    return LLVMExternalWeakLinkage;
+  case GlobalValue::CommonLinkage:
+    return LLVMCommonLinkage;
+  }
+
+  // Should never get here.
+  return static_cast<LLVMLinkage>(0);
+}
+
+void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
+  GlobalValue *GV = unwrap<GlobalValue>(Global);
+
+  switch (Linkage) {
+  default:
+    assert(false && "Unhandled Linkage Type.");
+  case LLVMExternalLinkage:
+    GV->setLinkage(GlobalValue::ExternalLinkage);
+    break;
+  case LLVMAvailableExternallyLinkage:
+    GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+    break;
+  case LLVMLinkOnceAnyLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+    break;
+  case LLVMLinkOnceODRLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    break;
+  case LLVMWeakAnyLinkage:
+    GV->setLinkage(GlobalValue::WeakAnyLinkage);
+    break;
+  case LLVMWeakODRLinkage:
+    GV->setLinkage(GlobalValue::WeakODRLinkage);
+    break;
+  case LLVMAppendingLinkage:
+    GV->setLinkage(GlobalValue::AppendingLinkage);
+    break;
+  case LLVMInternalLinkage:
+    GV->setLinkage(GlobalValue::InternalLinkage);
+    break;
+  case LLVMPrivateLinkage:
+    GV->setLinkage(GlobalValue::PrivateLinkage);
+    break;
+  case LLVMLinkerPrivateLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
+    break;
+  case LLVMLinkerPrivateWeakLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+    break;
+  case LLVMLinkerPrivateWeakDefAutoLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage);
+    break;
+  case LLVMDLLImportLinkage:
+    GV->setLinkage(GlobalValue::DLLImportLinkage);
+    break;
+  case LLVMDLLExportLinkage:
+    GV->setLinkage(GlobalValue::DLLExportLinkage);
+    break;
+  case LLVMExternalWeakLinkage:
+    GV->setLinkage(GlobalValue::ExternalWeakLinkage);
+    break;
+  case LLVMGhostLinkage:
+    DEBUG(errs()
+          << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported.");
+    break;
+  case LLVMCommonLinkage:
+    GV->setLinkage(GlobalValue::CommonLinkage);
+    break;
+  }
+}
+
+const char *LLVMGetSection(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->getSection().c_str();
+}
+
+void LLVMSetSection(LLVMValueRef Global, const char *Section) {
+  unwrap<GlobalValue>(Global)->setSection(Section);
+}
+
+LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
+  return static_cast<LLVMVisibility>(
+    unwrap<GlobalValue>(Global)->getVisibility());
+}
+
+void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
+  unwrap<GlobalValue>(Global)
+    ->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
+}
+
+unsigned LLVMGetAlignment(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->getAlignment();
+}
+
+void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
+  unwrap<GlobalValue>(Global)->setAlignment(Bytes);
+}
+
+/*--.. Operations on global variables ......................................--*/
+
+LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name));
+}
+
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+                                         const char *Name,
+                                         unsigned AddressSpace) {
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name, 0,
+                                 false, AddressSpace));
+}
+
+LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getNamedGlobal(Name));
+}
+
+LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::global_iterator I = Mod->global_begin();
+  if (I == Mod->global_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::global_iterator I = Mod->global_end();
+  if (I == Mod->global_begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+  Module::global_iterator I = GV;
+  if (++I == GV->getParent()->global_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+  Module::global_iterator I = GV;
+  if (I == GV->getParent()->global_begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
+  unwrap<GlobalVariable>(GlobalVar)->eraseFromParent();
+}
+
+LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
+  GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
+  if ( !GV->hasInitializer() )
+    return 0;
+  return wrap(GV->getInitializer());
+}
+
+void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
+  unwrap<GlobalVariable>(GlobalVar)
+    ->setInitializer(unwrap<Constant>(ConstantVal));
+}
+
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
+}
+
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) {
+  unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
+}
+
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isConstant();
+}
+
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
+  unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
+}
+
+/*--.. Operations on aliases ......................................--*/
+
+LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
+                          const char *Name) {
+  return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
+                              unwrap<Constant>(Aliasee), unwrap (M)));
+}
+
+/*--.. Operations on functions .............................................--*/
+
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+                             LLVMTypeRef FunctionTy) {
+  return wrap(Function::Create(unwrap<FunctionType>(FunctionTy),
+                               GlobalValue::ExternalLinkage, Name, unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getFunction(Name));
+}
+
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::iterator I = Mod->begin();
+  if (I == Mod->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::iterator I = Mod->end();
+  if (I == Mod->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Module::iterator I = Func;
+  if (++I == Func->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Module::iterator I = Func;
+  if (I == Func->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMDeleteFunction(LLVMValueRef Fn) {
+  unwrap<Function>(Fn)->eraseFromParent();
+}
+
+unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
+  if (Function *F = dyn_cast<Function>(unwrap(Fn)))
+    return F->getIntrinsicID();
+  return 0;
+}
+
+unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
+  return unwrap<Function>(Fn)->getCallingConv();
+}
+
+void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
+  return unwrap<Function>(Fn)->setCallingConv(
+    static_cast<CallingConv::ID>(CC));
+}
+
+const char *LLVMGetGC(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  return F->hasGC()? F->getGC() : 0;
+}
+
+void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
+  Function *F = unwrap<Function>(Fn);
+  if (GC)
+    F->setGC(GC);
+  else
+    F->clearGC();
+}
+
+void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttrListPtr PAL = Func->getAttributes();
+  const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
+  Func->setAttributes(PALnew);
+}
+
+void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttrListPtr PAL = Func->getAttributes();
+  const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
+  Func->setAttributes(PALnew);
+}
+
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttrListPtr PAL = Func->getAttributes();
+  Attributes attr = PAL.getFnAttributes();
+  return (LLVMAttribute)attr;
+}
+
+/*--.. Operations on parameters ............................................--*/
+
+unsigned LLVMCountParams(LLVMValueRef FnRef) {
+  // This function is strictly redundant to
+  //   LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
+  return unwrap<Function>(FnRef)->arg_size();
+}
+
+void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
+  Function *Fn = unwrap<Function>(FnRef);
+  for (Function::arg_iterator I = Fn->arg_begin(),
+                              E = Fn->arg_end(); I != E; I++)
+    *ParamRefs++ = wrap(I);
+}
+
+LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
+  Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
+  while (index --> 0)
+    AI++;
+  return wrap(AI);
+}
+
+LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
+  return wrap(unwrap<Argument>(V)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::arg_iterator I = Func->arg_begin();
+  if (I == Func->arg_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::arg_iterator I = Func->arg_end();
+  if (I == Func->arg_begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Function::arg_iterator I = A;
+  if (++I == A->getParent()->arg_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Function::arg_iterator I = A;
+  if (I == A->getParent()->arg_begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+  unwrap<Argument>(Arg)->addAttr(PA);
+}
+
+void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+  unwrap<Argument>(Arg)->removeAttr(PA);
+}
+
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Attributes attr = A->getParent()->getAttributes().getParamAttributes(
+    A->getArgNo()+1);
+  return (LLVMAttribute)attr;
+}
+  
+
+void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
+  unwrap<Argument>(Arg)->addAttr(
+          Attribute::constructAlignmentFromInt(align));
+}
+
+/*--.. Operations on basic blocks ..........................................--*/
+
+LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
+  return wrap(static_cast<Value*>(unwrap(BB)));
+}
+
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) {
+  return isa<BasicBlock>(unwrap(Val));
+}
+
+LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) {
+  return wrap(unwrap<BasicBlock>(Val));
+}
+
+LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
+  return wrap(unwrap(BB)->getParent());
+}
+
+unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
+  return unwrap<Function>(FnRef)->size();
+}
+
+void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
+  Function *Fn = unwrap<Function>(FnRef);
+  for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
+    *BasicBlocksRefs++ = wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
+  return wrap(&unwrap<Function>(Fn)->getEntryBlock());
+}
+
+LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::iterator I = Func->begin();
+  if (I == Func->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::iterator I = Func->end();
+  if (I == Func->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  Function::iterator I = Block;
+  if (++I == Block->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  Function::iterator I = Block;
+  if (I == Block->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+                                                LLVMValueRef FnRef,
+                                                const char *Name) {
+  return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap<Function>(FnRef)));
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
+  return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name);
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+                                                LLVMBasicBlockRef BBRef,
+                                                const char *Name) {
+  BasicBlock *BB = unwrap(BBRef);
+  return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB));
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef,
+                                       const char *Name) {
+  return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name);
+}
+
+void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
+  unwrap(BBRef)->eraseFromParent();
+}
+
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+  unwrap(BB)->moveBefore(unwrap(MovePos));
+}
+
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+  unwrap(BB)->moveAfter(unwrap(MovePos));
+}
+
+/*--.. Operations on instructions ..........................................--*/
+
+LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
+  return wrap(unwrap<Instruction>(Inst)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  BasicBlock::iterator I = Block->begin();
+  if (I == Block->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  BasicBlock::iterator I = Block->end();
+  if (I == Block->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
+  Instruction *Instr = unwrap<Instruction>(Inst);
+  BasicBlock::iterator I = Instr;
+  if (++I == Instr->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
+  Instruction *Instr = unwrap<Instruction>(Inst);
+  BasicBlock::iterator I = Instr;
+  if (I == Instr->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+/*--.. Call and invoke instructions ........................................--*/
+
+unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
+  Value *V = unwrap(Instr);
+  if (CallInst *CI = dyn_cast<CallInst>(V))
+    return CI->getCallingConv();
+  else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+    return II->getCallingConv();
+  llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
+  return 0;
+}
+
+void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
+  Value *V = unwrap(Instr);
+  if (CallInst *CI = dyn_cast<CallInst>(V))
+    return CI->setCallingConv(static_cast<CallingConv::ID>(CC));
+  else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+    return II->setCallingConv(static_cast<CallingConv::ID>(CC));
+  llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, 
+                           LLVMAttribute PA) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Call.setAttributes(
+    Call.getAttributes().addAttr(index, PA));
+}
+
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
+                              LLVMAttribute PA) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Call.setAttributes(
+    Call.getAttributes().removeAttr(index, PA));
+}
+
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
+                                unsigned align) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Call.setAttributes(
+    Call.getAttributes().addAttr(index, 
+        Attribute::constructAlignmentFromInt(align)));
+}
+
+/*--.. Operations on call instructions (only) ..............................--*/
+
+LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
+  return unwrap<CallInst>(Call)->isTailCall();
+}
+
+void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
+  unwrap<CallInst>(Call)->setTailCall(isTailCall);
+}
+
+/*--.. Operations on phi nodes .............................................--*/
+
+void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
+                     LLVMBasicBlockRef *IncomingBlocks, unsigned Count) {
+  PHINode *PhiVal = unwrap<PHINode>(PhiNode);
+  for (unsigned I = 0; I != Count; ++I)
+    PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I]));
+}
+
+unsigned LLVMCountIncoming(LLVMValueRef PhiNode) {
+  return unwrap<PHINode>(PhiNode)->getNumIncomingValues();
+}
+
+LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) {
+  return wrap(unwrap<PHINode>(PhiNode)->getIncomingValue(Index));
+}
+
+LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
+  return wrap(unwrap<PHINode>(PhiNode)->getIncomingBlock(Index));
+}
+
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) {
+  return wrap(new IRBuilder<>(*unwrap(C)));
+}
+
+LLVMBuilderRef LLVMCreateBuilder(void) {
+  return LLVMCreateBuilderInContext(LLVMGetGlobalContext());
+}
+
+void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
+                         LLVMValueRef Instr) {
+  BasicBlock *BB = unwrap(Block);
+  Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
+  unwrap(Builder)->SetInsertPoint(BB, I);
+}
+
+void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+  Instruction *I = unwrap<Instruction>(Instr);
+  unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+}
+
+void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
+  BasicBlock *BB = unwrap(Block);
+  unwrap(Builder)->SetInsertPoint(BB);
+}
+
+LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
+   return wrap(unwrap(Builder)->GetInsertBlock());
+}
+
+void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
+  unwrap(Builder)->ClearInsertionPoint();
+}
+
+void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+  unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
+}
+
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+                                   const char *Name) {
+  unwrap(Builder)->Insert(unwrap<Instruction>(Instr), Name);
+}
+
+void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
+  delete unwrap(Builder);
+}
+
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+  MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
+  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+  return wrap(unwrap(Builder)->getCurrentDebugLocation()
+              .getAsMDNode(unwrap(Builder)->getContext()));
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+  unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
+/*--.. Instruction builders ................................................--*/
+
+LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateRetVoid());
+}
+
+LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
+  return wrap(unwrap(B)->CreateRet(unwrap(V)));
+}
+
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals,
+                                   unsigned N) {
+  return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N));
+}
+
+LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
+  return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
+}
+
+LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) {
+  return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else)));
+}
+
+LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
+                             LLVMBasicBlockRef Else, unsigned NumCases) {
+  return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
+}
+
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+                                 unsigned NumDests) {
+  return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
+LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
+                             LLVMValueRef *Args, unsigned NumArgs,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
+                                      ArrayRef<Value *>(unwrap(Args), NumArgs),
+                                      Name));
+}
+
+LLVMValueRef LLVMBuildUnwind(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateUnwind());
+}
+
+LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateUnreachable());
+}
+
+void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
+                 LLVMBasicBlockRef Dest) {
+  unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
+}
+
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+  unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
+/*--.. Arithmetic ..........................................................--*/
+
+LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS,
+                                LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                         const char *Name) {
+  return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+                            LLVMValueRef LHS, LLVMValueRef RHS,
+                            const char *Name) {
+  return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS),
+                                     unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
+}
+
+/*--.. Memory ..............................................................--*/
+
+LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+                             const char *Name) {
+  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               0, 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name) {
+  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               unwrap(Val), 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name) {
+  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
+  return wrap(unwrap(B)->Insert(
+     CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
+}
+
+
+LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+}
+
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, 
+                            LLVMValueRef PointerVal) {
+  return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
+}
+
+LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                          LLVMValueRef *Indices, unsigned NumIndices,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), unwrap(Indices),
+                                   unwrap(Indices) + NumIndices, Name));
+}
+
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                  LLVMValueRef *Indices, unsigned NumIndices,
+                                  const char *Name) {
+  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), unwrap(Indices),
+                                           unwrap(Indices) + NumIndices, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                unsigned Idx, const char *Name) {
+  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+                                   const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalString(Str, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+                                      const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
+}
+
+/*--.. Casts ...............................................................--*/
+
+LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                     LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy),
+                                              Name));
+}
+
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val),
+                                    unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                  LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
+                                       /*isSigned*/true, Name));
+}
+
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+/*--.. Comparisons .........................................................--*/
+
+LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateICmp(static_cast<ICmpInst::Predicate>(Op),
+                                    unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFCmp(static_cast<FCmpInst::Predicate>(Op),
+                                    unwrap(LHS), unwrap(RHS), Name));
+}
+
+/*--.. Miscellaneous instructions ..........................................--*/
+
+LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
+  return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
+                           LLVMValueRef *Args, unsigned NumArgs,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateCall(unwrap(Fn),
+                                    ArrayRef<Value *>(unwrap(Args), NumArgs),
+                                    Name));
+}
+
+LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
+                             LLVMValueRef Then, LLVMValueRef Else,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else),
+                                      Name));
+}
+
+LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List,
+                            LLVMTypeRef Ty, const char *Name) {
+  return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+                                      LLVMValueRef Index, const char *Name) {
+  return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index),
+                                              Name));
+}
+
+LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+                                    LLVMValueRef EltVal, LLVMValueRef Index,
+                                    const char *Name) {
+  return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal),
+                                             unwrap(Index), Name));
+}
+
+LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1,
+                                    LLVMValueRef V2, LLVMValueRef Mask,
+                                    const char *Name) {
+  return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2),
+                                             unwrap(Mask), Name));
+}
+
+LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+                                   unsigned Index, const char *Name) {
+  return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name));
+}
+
+LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+                                  LLVMValueRef EltVal, unsigned Index,
+                                  const char *Name) {
+  return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal),
+                                           Index, Name));
+}
+
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
+                                const char *Name) {
+  return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
+                              LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+}
+
+
+/*===-- Module providers --------------------------------------------------===*/
+
+LLVMModuleProviderRef
+LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
+  return reinterpret_cast<LLVMModuleProviderRef>(M);
+}
+
+void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
+  delete unwrap(MP);
+}
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
+    const char *Path,
+    LLVMMemoryBufferRef *OutMemBuf,
+    char **OutMessage) {
+
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getFile(Path, MB))) {
+    *OutMemBuf = wrap(MB.take());
+    return 0;
+  }
+
+  *OutMessage = strdup(ec.message().c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+                                         char **OutMessage) {
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getSTDIN(MB))) {
+    *OutMemBuf = wrap(MB.take());
+    return 0;
+  }
+
+  *OutMessage = strdup(ec.message().c_str());
+  return 1;
+}
+
+void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
+  delete unwrap(MemBuf);
+}
+
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
+  return wrap(PassRegistry::getPassRegistry());
+}
+
+/*===-- Pass Manager ------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+  return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+  return wrap(new FunctionPassManager(unwrap(M)));
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+  return LLVMCreateFunctionPassManagerForModule(
+                                            reinterpret_cast<LLVMModuleRef>(P));
+}
+
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+  return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+  return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+  return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+  return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+  delete unwrap(PM);
+}
diff --git a/contrib/llvm/lib/VMCore/DebugInfoProbe.cpp b/contrib/llvm/lib/VMCore/DebugInfoProbe.cpp
new file mode 100644
index 000000000000..d1275ff58caa
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/DebugInfoProbe.cpp
@@ -0,0 +1,225 @@
+//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DebugInfoProbe. This probe can be used by a pass
+// manager to analyze how optimizer is treating debugging information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "debuginfoprobe"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include <set>
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden,
+                     cl::desc("Enable debug info probe"));
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeImpl - This class implements a interface to monitor
+// how an optimization pass is preserving debugging information.
+
+namespace llvm {
+
+  class DebugInfoProbeImpl {
+  public:
+    DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {}
+    void initialize(StringRef PName, Function &F);
+    void finalize(Function &F);
+    void report();
+  private:
+    unsigned NumDbgLineLost, NumDbgValueLost;
+    std::string PassName;
+    Function *TheFn;
+    std::set<MDNode *> DbgVariables;
+    std::set<Instruction *> MissingDebugLoc;
+  };
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeImpl
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  PassName = PName;
+
+  DbgVariables.clear();
+  MissingDebugLoc.clear();
+  TheFn = &F;
+
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
+         BI != BE; ++BI) {
+      if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown())
+        MissingDebugLoc.insert(BI);
+      if (!isa<DbgInfoIntrinsic>(BI)) continue;
+      Value *Addr = NULL;
+      MDNode *Node = NULL;
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+        Addr = DDI->getAddress();
+        Node = DDI->getVariable();
+      } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+        Addr = DVI->getValue();
+        Node = DVI->getVariable();
+      }
+      if (Addr)
+        DbgVariables.insert(Node);
+    }
+}
+
+/// report - Report findings. This should be invoked after finalize.
+void DebugInfoProbeImpl::report() {
+  if (!EnableDebugInfoProbe) return;
+  if (NumDbgLineLost || NumDbgValueLost) {
+    raw_ostream *OutStream = CreateInfoOutputFile();
+    if (NumDbgLineLost)
+      *OutStream << NumDbgLineLost
+                 << "\t times line number info lost by "
+                 << PassName << "\n";
+    if (NumDbgValueLost)
+      *OutStream << NumDbgValueLost
+                 << "\t times variable info lost by    "
+                 << PassName << "\n";
+    delete OutStream;
+  }
+  NumDbgLineLost = 0;
+  NumDbgValueLost = 0;
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbeImpl::finalize(Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  assert (TheFn == &F && "Invalid function to measure!");
+
+  std::set<MDNode *>DbgVariables2;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
+         BI != BE; ++BI) {
+      if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown() &&
+          MissingDebugLoc.count(BI) == 0) {
+        ++NumDbgLineLost;
+        DEBUG(dbgs() << "DebugInfoProbe (" << PassName << "): --- ");
+        DEBUG(BI->print(dbgs()));
+        DEBUG(dbgs() << "\n");
+      }
+      if (!isa<DbgInfoIntrinsic>(BI)) continue;
+      Value *Addr = NULL;
+      MDNode *Node = NULL;
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+        Addr = DDI->getAddress();
+        Node = DDI->getVariable();
+      } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+        Addr = DVI->getValue();
+        Node = DVI->getVariable();
+      }
+      if (Addr)
+        DbgVariables2.insert(Node);
+    }
+
+  for (std::set<MDNode *>::iterator I = DbgVariables.begin(), 
+         E = DbgVariables.end(); I != E; ++I) {
+    if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) {
+      DEBUG(dbgs() 
+            << "DebugInfoProbe("
+            << PassName
+            << "): Losing dbg info for variable: ";
+            if (MDString *MDS = dyn_cast_or_null<MDString>(
+                (*I)->getOperand(2)))
+              dbgs() << MDS->getString();
+            else
+              dbgs() << "...";
+            dbgs() << "\n");
+      ++NumDbgValueLost;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+DebugInfoProbe::DebugInfoProbe() {
+  pImpl = new DebugInfoProbeImpl();
+}
+
+DebugInfoProbe::~DebugInfoProbe() {
+  delete pImpl;
+}
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbe::initialize(StringRef PName, Function &F) {
+  pImpl->initialize(PName, F);
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbe::finalize(Function &F) {
+  pImpl->finalize(F);
+}
+
+/// report - Report findings. This should be invoked after finalize.
+void DebugInfoProbe::report() {
+  pImpl->report();
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeInfo
+
+/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting
+/// them.
+DebugInfoProbeInfo::~DebugInfoProbeInfo() {
+  if (!EnableDebugInfoProbe) return;
+    for (StringMap<DebugInfoProbe*>::iterator I = Probes.begin(),
+           E = Probes.end(); I != E; ++I) {
+      I->second->report();
+      delete I->second;
+    }
+  }
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbeInfo::initialize(Pass *P, Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  if (P->getAsPMDataManager())
+    return;
+
+  StringMapEntry<DebugInfoProbe *> &Entry =
+    Probes.GetOrCreateValue(P->getPassName());
+  DebugInfoProbe *&Probe = Entry.getValue();
+  if (!Probe)
+    Probe = new DebugInfoProbe();
+  Probe->initialize(P->getPassName(), F);
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbeInfo::finalize(Pass *P, Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  if (P->getAsPMDataManager())
+    return;
+  StringMapEntry<DebugInfoProbe *> &Entry =
+    Probes.GetOrCreateValue(P->getPassName());
+  DebugInfoProbe *&Probe = Entry.getValue();
+  assert (Probe && "DebugInfoProbe is not initialized!");
+  Probe->finalize(F);
+}
diff --git a/contrib/llvm/lib/VMCore/DebugLoc.cpp b/contrib/llvm/lib/VMCore/DebugLoc.cpp
new file mode 100644
index 000000000000..4ff6b2cd80e8
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/DebugLoc.cpp
@@ -0,0 +1,344 @@
+//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "LLVMContextImpl.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DebugLoc Implementation
+//===----------------------------------------------------------------------===//
+
+MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
+  if (ScopeIdx == 0) return 0;
+  
+  if (ScopeIdx > 0) {
+    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+    // position specified.
+    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+           "Invalid ScopeIdx!");
+    return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+  }
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+}
+
+MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
+  // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+  // position specified.  Zero is invalid.
+  if (ScopeIdx >= 0) return 0;
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+/// Return both the Scope and the InlinedAt values.
+void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+                                    const LLVMContext &Ctx) const {
+  if (ScopeIdx == 0) {
+    Scope = IA = 0;
+    return;
+  }
+  
+  if (ScopeIdx > 0) {
+    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+    // position specified.
+    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+           "Invalid ScopeIdx!");
+    Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+    IA = 0;
+    return;
+  }
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+  IA    = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
+                       MDNode *Scope, MDNode *InlinedAt) {
+  DebugLoc Result;
+  
+  // If no scope is available, this is an unknown location.
+  if (Scope == 0) return Result;
+  
+  // Saturate line and col to "unknown".
+  if (Col > 255) Col = 0;
+  if (Line >= (1 << 24)) Line = 0;
+  Result.LineCol = Line | (Col << 24);
+  
+  LLVMContext &Ctx = Scope->getContext();
+  
+  // If there is no inlined-at location, use the ScopeRecords array.
+  if (InlinedAt == 0)
+    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
+  else
+    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
+                                                                InlinedAt, 0);
+
+  return Result;
+}
+
+/// getAsMDNode - This method converts the compressed DebugLoc node into a
+/// DILocation compatible MDNode.
+MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
+  if (isUnknown()) return 0;
+  
+  MDNode *Scope, *IA;
+  getScopeAndInlinedAt(Scope, IA, Ctx);
+  assert(Scope && "If scope is null, this should be isUnknown()");
+  
+  LLVMContext &Ctx2 = Scope->getContext();
+  const Type *Int32 = Type::getInt32Ty(Ctx2);
+  Value *Elts[] = {
+    ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
+    Scope, IA
+  };
+  return MDNode::get(Ctx2, Elts);
+}
+
+/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
+  if (N == 0 || N->getNumOperands() != 4) return DebugLoc();
+  
+  MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(2));
+  if (Scope == 0) return DebugLoc();
+  
+  unsigned LineNo = 0, ColNo = 0;
+  if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(0)))
+    LineNo = Line->getZExtValue();
+  if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(1)))
+    ColNo = Col->getZExtValue();
+  
+  return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3)));
+}
+
+/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
+DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
+  if (N == 0 || N->getNumOperands() < 3) return DebugLoc();
+  
+  MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(1));
+  if (Scope == 0) return DebugLoc();
+  
+  unsigned LineNo = 0, ColNo = 0;
+  if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(2)))
+    LineNo = Line->getZExtValue();
+  if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(3)))
+    ColNo = Col->getZExtValue();
+  
+  return get(LineNo, ColNo, Scope, NULL);
+}
+
+void DebugLoc::dump(const LLVMContext &Ctx) const {
+#ifndef NDEBUG
+  if (!isUnknown()) {
+    dbgs() << getLine();
+    if (getCol() != 0)
+      dbgs() << ',' << getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      dbgs() << " @ ";
+      InlinedAtDL.dump(Ctx);
+    } else
+      dbgs() << "\n";
+  }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// DenseMap specialization
+//===----------------------------------------------------------------------===//
+
+DebugLoc DenseMapInfo<DebugLoc>::getEmptyKey() {
+  return DebugLoc::getEmptyKey();
+}
+
+DebugLoc DenseMapInfo<DebugLoc>::getTombstoneKey() {
+  return DebugLoc::getTombstoneKey();
+}
+
+unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
+  FoldingSetNodeID ID;
+  ID.AddInteger(Key.LineCol);
+  ID.AddInteger(Key.ScopeIdx);
+  return ID.ComputeHash();
+}
+
+bool DenseMapInfo<DebugLoc>::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) {
+  return LHS == RHS;
+}
+
+//===----------------------------------------------------------------------===//
+// LLVMContextImpl Implementation
+//===----------------------------------------------------------------------===//
+
+int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope,
+                                                 int ExistingIdx) {
+  // If we already have an entry for this scope, return it.
+  int &Idx = ScopeRecordIdx[Scope];
+  if (Idx) return Idx;
+  
+  // If we don't have an entry, but ExistingIdx is specified, use it.
+  if (ExistingIdx)
+    return Idx = ExistingIdx;
+  
+  // Otherwise add a new entry.
+  
+  // Start out ScopeRecords with a minimal reasonable size to avoid
+  // excessive reallocation starting out.
+  if (ScopeRecords.empty())
+    ScopeRecords.reserve(128);
+  
+  // Index is biased by 1 for index.
+  Idx = ScopeRecords.size()+1;
+  ScopeRecords.push_back(DebugRecVH(Scope, this, Idx));
+  return Idx;
+}
+
+int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
+                                                    int ExistingIdx) {
+  // If we already have an entry, return it.
+  int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)];
+  if (Idx) return Idx;
+  
+  // If we don't have an entry, but ExistingIdx is specified, use it.
+  if (ExistingIdx)
+    return Idx = ExistingIdx;
+  
+  // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid
+  // excessive reallocation starting out.
+  if (ScopeInlinedAtRecords.empty())
+    ScopeInlinedAtRecords.reserve(128);
+    
+  // Index is biased by 1 and negated.
+  Idx = -ScopeInlinedAtRecords.size()-1;
+  ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx),
+                                                 DebugRecVH(IA, this, Idx)));
+  return Idx;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugRecVH Implementation
+//===----------------------------------------------------------------------===//
+
+/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
+/// to drop to null and we need remove our entry from the DenseMap.
+void DebugRecVH::deleted() {
+  // If this is a  non-canonical reference, just drop the value to null, we know
+  // it doesn't have a map entry.
+  if (Idx == 0) {
+    setValPtr(0);
+    return;
+  }
+    
+  MDNode *Cur = get();
+  
+  // If the index is positive, it is an entry in ScopeRecords.
+  if (Idx > 0) {
+    assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
+    Ctx->ScopeRecordIdx.erase(Cur);
+    // Reset this VH to null and we're done.
+    setValPtr(0);
+    Idx = 0;
+    return;
+  }
+  
+  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+  // is the scope or the inlined-at record entry.
+  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+  assert((this == &Entry.first || this == &Entry.second) &&
+         "Mapping out of date!");
+  
+  MDNode *OldScope = Entry.first.get();
+  MDNode *OldInlinedAt = Entry.second.get();
+  assert(OldScope != 0 && OldInlinedAt != 0 &&
+         "Entry should be non-canonical if either val dropped to null");
+
+  // Otherwise, we do have an entry in it, nuke it and we're done.
+  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+         "Mapping out of date");
+  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+  
+  // Reset this VH to null.  Drop both 'Idx' values to null to indicate that
+  // we're in non-canonical form now.
+  setValPtr(0);
+  Entry.first.Idx = Entry.second.Idx = 0;
+}
+
+void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
+  // If being replaced with a non-mdnode value (e.g. undef) handle this as if
+  // the mdnode got deleted.
+  MDNode *NewVal = dyn_cast<MDNode>(NewVa);
+  if (NewVal == 0) return deleted();
+  
+  // If this is a non-canonical reference, just change it, we know it already
+  // doesn't have a map entry.
+  if (Idx == 0) {
+    setValPtr(NewVa);
+    return;
+  }
+  
+  MDNode *OldVal = get();
+  assert(OldVal != NewVa && "Node replaced with self?");
+  
+  // If the index is positive, it is an entry in ScopeRecords.
+  if (Idx > 0) {
+    assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!");
+    Ctx->ScopeRecordIdx.erase(OldVal);
+    setValPtr(NewVal);
+
+    int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx);
+    
+    // If NewVal already has an entry, this becomes a non-canonical reference,
+    // just drop Idx to 0 to signify this.
+    if (NewEntry != Idx)
+      Idx = 0;
+    return;
+  }
+  
+  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+  // is the scope or the inlined-at record entry.
+  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+  assert((this == &Entry.first || this == &Entry.second) &&
+         "Mapping out of date!");
+  
+  MDNode *OldScope = Entry.first.get();
+  MDNode *OldInlinedAt = Entry.second.get();
+  assert(OldScope != 0 && OldInlinedAt != 0 &&
+         "Entry should be non-canonical if either val dropped to null");
+  
+  // Otherwise, we do have an entry in it, nuke it and we're done.
+  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+         "Mapping out of date");
+  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+  
+  // Reset this VH to the new value.
+  setValPtr(NewVal);
+
+  int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(),
+                                                   Entry.second.get(), Idx);
+  // If NewVal already has an entry, this becomes a non-canonical reference,
+  // just drop Idx to 0 to signify this.
+  if (NewIdx != Idx) {
+    std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1];
+    Entry.first.Idx = Entry.second.Idx = 0;
+  }
+}
diff --git a/contrib/llvm/lib/VMCore/Dominators.cpp b/contrib/llvm/lib/VMCore/Dominators.cpp
new file mode 100644
index 000000000000..08b845ef9d6b
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Dominators.cpp
@@ -0,0 +1,106 @@
+//===- Dominators.cpp - Dominator Calculation -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators.  Postdominators are available in libanalysis, but are not
+// included in libvmcore, because it's not needed.  Forward dominators are
+// needed to support the Verifier pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify dominfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyDomInfo = true;
+#else
+static bool VerifyDomInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
+               cl::desc("Verify dominator info (time consuming)"));
+
+//===----------------------------------------------------------------------===//
+//  DominatorTree Implementation
+//===----------------------------------------------------------------------===//
+//
+// Provide public access to DominatorTree information.  Implementation details
+// can be found in DominatorInternals.h.
+//
+//===----------------------------------------------------------------------===//
+
+TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
+TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
+
+char DominatorTree::ID = 0;
+INITIALIZE_PASS(DominatorTree, "domtree",
+                "Dominator Tree Construction", true, true)
+
+bool DominatorTree::runOnFunction(Function &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+void DominatorTree::verifyAnalysis() const {
+  if (!VerifyDomInfo) return;
+
+  Function &F = *getRoot()->getParent();
+
+  DominatorTree OtherDT;
+  OtherDT.getBase().recalculate(F);
+  if (compare(OtherDT)) {
+    errs() << "DominatorTree is not up to date!\nComputed:\n";
+    print(errs());
+    errs() << "\nActual:\n";
+    OtherDT.print(errs());
+    abort();
+  }
+}
+
+void DominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+// dominates - Return true if A dominates a use in B. This performs the
+// special checks necessary if A and B are in the same basic block.
+bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
+  const BasicBlock *BBA = A->getParent(), *BBB = B->getParent();
+  
+  // If A is an invoke instruction, its value is only available in this normal
+  // successor block.
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(A))
+    BBA = II->getNormalDest();
+  
+  if (BBA != BBB) return dominates(BBA, BBB);
+  
+  // It is not possible to determine dominance between two PHI nodes 
+  // based on their ordering.
+  if (isa<PHINode>(A) && isa<PHINode>(B)) 
+    return false;
+  
+  // Loop through the basic block until we find A or B.
+  BasicBlock::const_iterator I = BBA->begin();
+  for (; &*I != A && &*I != B; ++I)
+    /*empty*/;
+  
+  return &*I == A;
+}
diff --git a/contrib/llvm/lib/VMCore/Function.cpp b/contrib/llvm/lib/VMCore/Function.cpp
new file mode 100644
index 000000000000..6536bcd0e2ed
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Function.cpp
@@ -0,0 +1,445 @@
+//===-- Function.cpp - Implement the Global object classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Function class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/StringPool.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/Threading.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Argument, Function>;
+template class llvm::SymbolTableListTraits<BasicBlock, Function>;
+
+//===----------------------------------------------------------------------===//
+// Argument Implementation
+//===----------------------------------------------------------------------===//
+
+Argument::Argument(const Type *Ty, const Twine &Name, Function *Par)
+  : Value(Ty, Value::ArgumentVal) {
+  Parent = 0;
+
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (Par)
+    Par->getArgumentList().push_back(this);
+  setName(Name);
+}
+
+void Argument::setParent(Function *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+/// getArgNo - Return the index of this formal argument in its containing
+/// function.  For example in "void foo(int a, float b)" a is 0 and b is 1. 
+unsigned Argument::getArgNo() const {
+  const Function *F = getParent();
+  assert(F && "Argument is not in a function");
+  
+  Function::const_arg_iterator AI = F->arg_begin();
+  unsigned ArgIdx = 0;
+  for (; &*AI != this; ++AI)
+    ++ArgIdx;
+
+  return ArgIdx;
+}
+
+/// hasByValAttr - Return true if this argument has the byval attribute on it
+/// in its containing function.
+bool Argument::hasByValAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
+}
+
+unsigned Argument::getParamAlignment() const {
+  assert(getType()->isPointerTy() && "Only pointers have alignments");
+  return getParent()->getParamAlignment(getArgNo()+1);
+  
+}
+
+/// hasNestAttr - Return true if this argument has the nest attribute on
+/// it in its containing function.
+bool Argument::hasNestAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
+}
+
+/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
+/// it in its containing function.
+bool Argument::hasNoAliasAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
+}
+
+/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
+/// on it in its containing function.
+bool Argument::hasNoCaptureAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
+}
+
+/// hasSRetAttr - Return true if this argument has the sret attribute on
+/// it in its containing function.
+bool Argument::hasStructRetAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  if (this != getParent()->arg_begin())
+    return false; // StructRet param must be first param
+  return getParent()->paramHasAttr(1, Attribute::StructRet);
+}
+
+/// addAttr - Add a Attribute to an argument
+void Argument::addAttr(Attributes attr) {
+  getParent()->addAttribute(getArgNo() + 1, attr);
+}
+
+/// removeAttr - Remove a Attribute from an argument
+void Argument::removeAttr(Attributes attr) {
+  getParent()->removeAttribute(getArgNo() + 1, attr);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Methods in Function
+//===----------------------------------------------------------------------===//
+
+LLVMContext &Function::getContext() const {
+  return getType()->getContext();
+}
+
+FunctionType *Function::getFunctionType() const {
+  return cast<FunctionType>(getType()->getElementType());
+}
+
+bool Function::isVarArg() const {
+  return getFunctionType()->isVarArg();
+}
+
+Type *Function::getReturnType() const {
+  return getFunctionType()->getReturnType();
+}
+
+void Function::removeFromParent() {
+  getParent()->getFunctionList().remove(this);
+}
+
+void Function::eraseFromParent() {
+  getParent()->getFunctionList().erase(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Function Implementation
+//===----------------------------------------------------------------------===//
+
+Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
+                   const Twine &name, Module *ParentModule)
+  : GlobalValue(PointerType::getUnqual(Ty), 
+                Value::FunctionVal, 0, 0, Linkage, name) {
+  assert(FunctionType::isValidReturnType(getReturnType()) &&
+         "invalid return type");
+  SymTab = new ValueSymbolTable();
+
+  // If the function has arguments, mark them as lazily built.
+  if (Ty->getNumParams())
+    setValueSubclassData(1);   // Set the "has lazy arguments" bit.
+  
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (ParentModule)
+    ParentModule->getFunctionList().push_back(this);
+
+  // Ensure intrinsics have the right parameter attributes.
+  if (unsigned IID = getIntrinsicID())
+    setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID)));
+
+}
+
+Function::~Function() {
+  dropAllReferences();    // After this it is safe to delete instructions.
+
+  // Delete all of the method arguments and unlink from symbol table...
+  ArgumentList.clear();
+  delete SymTab;
+
+  // Remove the function from the on-the-side GC table.
+  clearGC();
+}
+
+void Function::BuildLazyArguments() const {
+  // Create the arguments vector, all arguments start out unnamed.
+  const FunctionType *FT = getFunctionType();
+  for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+    assert(!FT->getParamType(i)->isVoidTy() &&
+           "Cannot have void typed arguments!");
+    ArgumentList.push_back(new Argument(FT->getParamType(i)));
+  }
+  
+  // Clear the lazy arguments bit.
+  unsigned SDC = getSubclassDataFromValue();
+  const_cast<Function*>(this)->setValueSubclassData(SDC &= ~1);
+}
+
+size_t Function::arg_size() const {
+  return getFunctionType()->getNumParams();
+}
+bool Function::arg_empty() const {
+  return getFunctionType()->getNumParams() == 0;
+}
+
+void Function::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+// dropAllReferences() - This function causes all the subinstructions to "let
+// go" of all references that they are maintaining.  This allows one to
+// 'delete' a whole class at a time, even though there may be circular
+// references... first all references are dropped, and all use counts go to
+// zero.  Then everything is deleted for real.  Note that no operations are
+// valid on an object that has "dropped all references", except operator
+// delete.
+//
+void Function::dropAllReferences() {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+  
+  // Delete all basic blocks. They are now unused, except possibly by
+  // blockaddresses, but BasicBlock's destructor takes care of those.
+  while (!BasicBlocks.empty())
+    BasicBlocks.begin()->eraseFromParent();
+}
+
+void Function::addAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.addAttr(i, attr);
+  setAttributes(PAL);
+}
+
+void Function::removeAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.removeAttr(i, attr);
+  setAttributes(PAL);
+}
+
+// Maintain the GC name for each function in an on-the-side table. This saves
+// allocating an additional word in Function for programs which do not use GC
+// (i.e., most programs) at the cost of increased overhead for clients which do
+// use GC.
+static DenseMap<const Function*,PooledStringPtr> *GCNames;
+static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
+
+bool Function::hasGC() const {
+  sys::SmartScopedReader<true> Reader(*GCLock);
+  return GCNames && GCNames->count(this);
+}
+
+const char *Function::getGC() const {
+  assert(hasGC() && "Function has no collector");
+  sys::SmartScopedReader<true> Reader(*GCLock);
+  return *(*GCNames)[this];
+}
+
+void Function::setGC(const char *Str) {
+  sys::SmartScopedWriter<true> Writer(*GCLock);
+  if (!GCNamePool)
+    GCNamePool = new StringPool();
+  if (!GCNames)
+    GCNames = new DenseMap<const Function*,PooledStringPtr>();
+  (*GCNames)[this] = GCNamePool->intern(Str);
+}
+
+void Function::clearGC() {
+  sys::SmartScopedWriter<true> Writer(*GCLock);
+  if (GCNames) {
+    GCNames->erase(this);
+    if (GCNames->empty()) {
+      delete GCNames;
+      GCNames = 0;
+      if (GCNamePool->empty()) {
+        delete GCNamePool;
+        GCNamePool = 0;
+      }
+    }
+  }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a Function) from the Function Src to this one.
+void Function::copyAttributesFrom(const GlobalValue *Src) {
+  assert(isa<Function>(Src) && "Expected a Function!");
+  GlobalValue::copyAttributesFrom(Src);
+  const Function *SrcF = cast<Function>(Src);
+  setCallingConv(SrcF->getCallingConv());
+  setAttributes(SrcF->getAttributes());
+  if (SrcF->hasGC())
+    setGC(SrcF->getGC());
+  else
+    clearGC();
+}
+
+/// getIntrinsicID - This method returns the ID number of the specified
+/// function, or Intrinsic::not_intrinsic if the function is not an
+/// intrinsic, or if the pointer is null.  This value is always defined to be
+/// zero to allow easy checking for whether a function is intrinsic or not.  The
+/// particular intrinsic functions which correspond to this value are defined in
+/// llvm/Intrinsics.h.
+///
+unsigned Function::getIntrinsicID() const {
+  const ValueName *ValName = this->getValueName();
+  if (!ValName)
+    return 0;
+  unsigned Len = ValName->getKeyLength();
+  const char *Name = ValName->getKeyData();
+  
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "llvm/Intrinsics.gen"
+#undef GET_FUNCTION_RECOGNIZER
+  return 0;
+}
+
+std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
+  assert(id < num_intrinsics && "Invalid intrinsic ID!");
+  static const char * const Table[] = {
+    "not_intrinsic",
+#define GET_INTRINSIC_NAME_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+  if (Tys.empty())
+    return Table[id];
+  std::string Result(Table[id]);
+  for (unsigned i = 0; i < Tys.size(); ++i) {
+    if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
+      Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + 
+                EVT::getEVT(PTyp->getElementType()).getEVTString();
+    }
+    else if (Tys[i])
+      Result += "." + EVT::getEVT(Tys[i]).getEVTString();
+  }
+  return Result;
+}
+
+const FunctionType *Intrinsic::getType(LLVMContext &Context,
+                                       ID id, ArrayRef<Type*> Tys) {
+  const Type *ResultTy = NULL;
+  std::vector<Type*> ArgTys;
+  bool IsVarArg = false;
+  
+#define GET_INTRINSIC_GENERATOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
+}
+
+bool Intrinsic::isOverloaded(ID id) {
+  static const bool OTable[] = {
+    false,
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  return OTable[id];
+}
+
+/// This defines the "Intrinsic::getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
+  // There can never be multiple globals with the same name of different types,
+  // because intrinsics must be a specific type.
+  return
+    cast<Function>(M->getOrInsertFunction(getName(id, Tys),
+                                          getType(M->getContext(), id, Tys)));
+}
+
+// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "llvm/Intrinsics.gen"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+/// hasAddressTaken - returns true if there are any uses of this function
+/// other than direct calls or invokes to it.
+bool Function::hasAddressTaken(const User* *PutOffender) const {
+  for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    const User *U = *I;
+    if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+      return PutOffender ? (*PutOffender = U, true) : true;
+    ImmutableCallSite CS(cast<Instruction>(U));
+    if (!CS.isCallee(I))
+      return PutOffender ? (*PutOffender = U, true) : true;
+  }
+  return false;
+}
+
+/// callsFunctionThatReturnsTwice - Return true if the function has a call to
+/// setjmp or other function that gcc recognizes as "returning twice".
+///
+/// FIXME: Remove after <rdar://problem/8031714> is fixed.
+/// FIXME: Is the above FIXME valid?
+bool Function::callsFunctionThatReturnsTwice() const {
+  const Module *M = this->getParent();
+  static const char *ReturnsTwiceFns[] = {
+    "_setjmp",
+    "setjmp",
+    "sigsetjmp",
+    "setjmp_syscall",
+    "savectx",
+    "qsetjmp",
+    "vfork",
+    "getcontext"
+  };
+
+  for (unsigned I = 0; I < array_lengthof(ReturnsTwiceFns); ++I)
+    if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
+      if (!Callee->use_empty())
+        for (Value::const_use_iterator
+               I = Callee->use_begin(), E = Callee->use_end();
+             I != E; ++I)
+          if (const CallInst *CI = dyn_cast<CallInst>(*I))
+            if (CI->getParent()->getParent() == this)
+              return true;
+    }
+
+  return false;
+}
+
+// vim: sw=2 ai
diff --git a/contrib/llvm/lib/VMCore/GVMaterializer.cpp b/contrib/llvm/lib/VMCore/GVMaterializer.cpp
new file mode 100644
index 000000000000..f77a9c908d54
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/GVMaterializer.cpp
@@ -0,0 +1,18 @@
+//===-- GVMaterializer.cpp - Base implementation for GV materializers -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Minimal implementation of the abstract interface for materializing
+// GlobalValues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/GVMaterializer.h"
+using namespace llvm;
+
+GVMaterializer::~GVMaterializer() {}
diff --git a/contrib/llvm/lib/VMCore/Globals.cpp b/contrib/llvm/lib/VMCore/Globals.cpp
new file mode 100644
index 000000000000..db008e09d1c8
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Globals.cpp
@@ -0,0 +1,263 @@
+//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GlobalValue & GlobalVariable classes for the VMCore
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                            GlobalValue Class
+//===----------------------------------------------------------------------===//
+
+bool GlobalValue::isMaterializable() const {
+  return getParent() && getParent()->isMaterializable(this);
+}
+bool GlobalValue::isDematerializable() const {
+  return getParent() && getParent()->isDematerializable(this);
+}
+bool GlobalValue::Materialize(std::string *ErrInfo) {
+  return getParent()->Materialize(this, ErrInfo);
+}
+void GlobalValue::Dematerialize() {
+  getParent()->Dematerialize(this);
+}
+
+/// Override destroyConstant to make sure it doesn't get called on
+/// GlobalValue's because they shouldn't be treated like other constants.
+void GlobalValue::destroyConstant() {
+  llvm_unreachable("You can't GV->destroyConstant()!");
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalValue) from the GlobalValue Src to this one.
+void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
+  setAlignment(Src->getAlignment());
+  setSection(Src->getSection());
+  setVisibility(Src->getVisibility());
+  setUnnamedAddr(Src->hasUnnamedAddr());
+}
+
+void GlobalValue::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  Alignment = Log2_32(Align) + 1;
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool GlobalValue::isDeclaration() const {
+  // Globals are definitions if they have an initializer.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+    return GV->getNumOperands() == 0;
+
+  // Functions are definitions if they have a body.
+  if (const Function *F = dyn_cast<Function>(this))
+    return F->empty();
+
+  // Aliases are always definitions.
+  assert(isa<GlobalAlias>(this));
+  return false;
+}
+  
+//===----------------------------------------------------------------------===//
+// GlobalVariable Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
+                               Constant *InitVal, const Twine &Name,
+                               bool ThreadLocal, unsigned AddressSpace)
+  : GlobalValue(PointerType::get(Ty, AddressSpace), 
+                Value::GlobalVariableVal,
+                OperandTraits<GlobalVariable>::op_begin(this),
+                InitVal != 0, Link, Name),
+    isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+  if (InitVal) {
+    assert(InitVal->getType() == Ty &&
+           "Initializer should be the same type as the GlobalVariable!");
+    Op<0>() = InitVal;
+  }
+
+  LeakDetector::addGarbageObject(this);
+}
+
+GlobalVariable::GlobalVariable(Module &M, const Type *Ty, bool constant,
+                               LinkageTypes Link, Constant *InitVal,
+                               const Twine &Name,
+                               GlobalVariable *Before, bool ThreadLocal,
+                               unsigned AddressSpace)
+  : GlobalValue(PointerType::get(Ty, AddressSpace), 
+                Value::GlobalVariableVal,
+                OperandTraits<GlobalVariable>::op_begin(this),
+                InitVal != 0, Link, Name),
+    isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+  if (InitVal) {
+    assert(InitVal->getType() == Ty &&
+           "Initializer should be the same type as the GlobalVariable!");
+    Op<0>() = InitVal;
+  }
+  
+  LeakDetector::addGarbageObject(this);
+  
+  if (Before)
+    Before->getParent()->getGlobalList().insert(Before, this);
+  else
+    M.getGlobalList().push_back(this);
+}
+
+void GlobalVariable::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalVariable::removeFromParent() {
+  getParent()->getGlobalList().remove(this);
+}
+
+void GlobalVariable::eraseFromParent() {
+  getParent()->getGlobalList().erase(this);
+}
+
+void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  // If you call this, then you better know this GVar has a constant
+  // initializer worth replacing. Enforce that here.
+  assert(getNumOperands() == 1 &&
+         "Attempt to replace uses of Constants on a GVar with no initializer");
+
+  // And, since you know it has an initializer, the From value better be
+  // the initializer :)
+  assert(getOperand(0) == From &&
+         "Attempt to replace wrong constant initializer in GVar");
+
+  // And, you better have a constant for the replacement value
+  assert(isa<Constant>(To) &&
+         "Attempt to replace GVar initializer with non-constant");
+
+  // Okay, preconditions out of the way, replace the constant initializer.
+  this->setOperand(0, cast<Constant>(To));
+}
+
+void GlobalVariable::setInitializer(Constant *InitVal) {
+  if (InitVal == 0) {
+    if (hasInitializer()) {
+      Op<0>().set(0);
+      NumOperands = 0;
+    }
+  } else {
+    assert(InitVal->getType() == getType()->getElementType() &&
+           "Initializer type must match GlobalVariable type");
+    if (!hasInitializer())
+      NumOperands = 1;
+    Op<0>().set(InitVal);
+  }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalVariable) from the GlobalVariable Src to this one.
+void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
+  assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
+  GlobalValue::copyAttributesFrom(Src);
+  const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
+  setThreadLocal(SrcVar->isThreadLocal());
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalAlias Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalAlias::GlobalAlias(const Type *Ty, LinkageTypes Link,
+                         const Twine &Name, Constant* aliasee,
+                         Module *ParentModule)
+  : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+  LeakDetector::addGarbageObject(this);
+
+  if (aliasee)
+    assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
+  Op<0>() = aliasee;
+
+  if (ParentModule)
+    ParentModule->getAliasList().push_back(this);
+}
+
+void GlobalAlias::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalAlias::removeFromParent() {
+  getParent()->getAliasList().remove(this);
+}
+
+void GlobalAlias::eraseFromParent() {
+  getParent()->getAliasList().erase(this);
+}
+
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+  assert((!Aliasee || Aliasee->getType() == getType()) &&
+         "Alias and aliasee types should match!");
+  
+  setOperand(0, Aliasee);
+}
+
+const GlobalValue *GlobalAlias::getAliasedGlobal() const {
+  const Constant *C = getAliasee();
+  if (C == 0) return 0;
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return GV;
+
+  const ConstantExpr *CE = cast<ConstantExpr>(C);
+  assert((CE->getOpcode() == Instruction::BitCast || 
+          CE->getOpcode() == Instruction::GetElementPtr) &&
+         "Unsupported aliasee");
+  
+  return dyn_cast<GlobalValue>(CE->getOperand(0));
+}
+
+const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
+  SmallPtrSet<const GlobalValue*, 3> Visited;
+
+  // Check if we need to stop early.
+  if (stopOnWeak && mayBeOverridden())
+    return this;
+
+  const GlobalValue *GV = getAliasedGlobal();
+  Visited.insert(GV);
+
+  // Iterate over aliasing chain, stopping on weak alias if necessary.
+  while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
+    if (stopOnWeak && GA->mayBeOverridden())
+      break;
+
+    GV = GA->getAliasedGlobal();
+
+    if (!Visited.insert(GV))
+      return 0;
+  }
+
+  return GV;
+}
diff --git a/contrib/llvm/lib/VMCore/IRBuilder.cpp b/contrib/llvm/lib/VMCore/IRBuilder.cpp
new file mode 100644
index 000000000000..ffe961fee7c2
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/IRBuilder.cpp
@@ -0,0 +1,149 @@
+//===---- IRBuilder.cpp - Builder for LLVM Instrs -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IRBuilder class, which is used as a convenient way
+// to create LLVM instructions with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+using namespace llvm;
+
+/// CreateGlobalString - Make a new global variable with an initializer that
+/// has array of i8 type filled in with the nul terminated string value
+/// specified.  If Name is specified, it is the name of the global variable
+/// created.
+Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
+  Constant *StrConstant = ConstantArray::get(Context, Str, true);
+  Module &M = *BB->getParent()->getParent();
+  GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
+                                          true, GlobalValue::InternalLinkage,
+                                          StrConstant, "", 0, false);
+  GV->setName(Name);
+  GV->setUnnamedAddr(true);
+  return GV;
+}
+
+Type *IRBuilderBase::getCurrentFunctionReturnType() const {
+  assert(BB && BB->getParent() && "No current function!");
+  return BB->getParent()->getReturnType();
+}
+
+Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
+  const PointerType *PT = cast<PointerType>(Ptr->getType());
+  if (PT->getElementType()->isIntegerTy(8))
+    return Ptr;
+  
+  // Otherwise, we need to insert a bitcast.
+  PT = getInt8PtrTy(PT->getAddressSpace());
+  BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
+  BB->getInstList().insert(InsertPt, BCI);
+  SetInstDebugLocation(BCI);
+  return BCI;
+}
+
+static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+                                  IRBuilderBase *Builder) {
+  CallInst *CI = CallInst::Create(Callee, Ops, "");
+  Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
+  Builder->SetInstDebugLocation(CI);
+  return CI;  
+}
+
+CallInst *IRBuilderBase::
+CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag) {
+  Ptr = getCastedInt8PtrValue(Ptr);
+  Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
+  Type *Tys[] = { Ptr->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;  
+}
+
+CallInst *IRBuilderBase::
+CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+              bool isVolatile, MDNode *TBAATag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+  
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;  
+}
+
+CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
+  assert(isa<PointerType>(Ptr->getType()) &&
+	 "lifetime.start only applies to pointers.");
+  Ptr = getCastedInt8PtrValue(Ptr);
+  if (!Size)
+    Size = getInt64(-1);
+  else
+    assert(Size->getType() == getInt64Ty() &&
+	   "lifetime.start requires the size to be an i64");
+  Value *Ops[] = { Size, Ptr };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
+  return createCallHelper(TheFn, Ops, this);
+}
+
+CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
+  assert(isa<PointerType>(Ptr->getType()) &&
+	 "lifetime.end only applies to pointers.");
+  Ptr = getCastedInt8PtrValue(Ptr);
+  if (!Size)
+    Size = getInt64(-1);
+  else
+    assert(Size->getType() == getInt64Ty() &&
+	   "lifetime.end requires the size to be an i64");
+  Value *Ops[] = { Size, Ptr };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
+  return createCallHelper(TheFn, Ops, this);
+}
diff --git a/contrib/llvm/lib/VMCore/InlineAsm.cpp b/contrib/llvm/lib/VMCore/InlineAsm.cpp
new file mode 100644
index 000000000000..4a03b395e98e
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/InlineAsm.cpp
@@ -0,0 +1,294 @@
+//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InlineAsm class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InlineAsm.h"
+#include "ConstantsContext.h"
+#include "LLVMContextImpl.h"
+#include "llvm/DerivedTypes.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Implement the first virtual method in this class in this file so the
+// InlineAsm vtable is emitted here.
+InlineAsm::~InlineAsm() {
+}
+
+
+InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
+                          StringRef Constraints, bool hasSideEffects,
+                          bool isAlignStack) {
+  InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack);
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
+}
+
+InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
+                     const std::string &constraints, bool hasSideEffects,
+                     bool isAlignStack)
+  : Value(Ty, Value::InlineAsmVal),
+    AsmString(asmString), 
+    Constraints(constraints), HasSideEffects(hasSideEffects), 
+    IsAlignStack(isAlignStack) {
+
+  // Do various checks on the constraint string and type.
+  assert(Verify(getFunctionType(), constraints) &&
+         "Function type not legal for constraints!");
+}
+
+void InlineAsm::destroyConstant() {
+  getType()->getContext().pImpl->InlineAsms.remove(this);
+  delete this;
+}
+
+FunctionType *InlineAsm::getFunctionType() const {
+  return cast<FunctionType>(getType()->getElementType());
+}
+    
+///Default constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo() :
+  Type(isInput), isEarlyClobber(false),
+  MatchingInput(-1), isCommutative(false),
+  isIndirect(false), isMultipleAlternative(false),
+  currentAlternativeIndex(0) {
+}
+
+/// Copy constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
+  Type(other.Type), isEarlyClobber(other.isEarlyClobber),
+  MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
+  isIndirect(other.isIndirect), Codes(other.Codes),
+  isMultipleAlternative(other.isMultipleAlternative),
+  multipleAlternatives(other.multipleAlternatives),
+  currentAlternativeIndex(other.currentAlternativeIndex) {
+}
+
+/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
+/// fields in this structure.  If the constraint string is not understood,
+/// return true, otherwise return false.
+bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
+                     InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
+  StringRef::iterator I = Str.begin(), E = Str.end();
+  unsigned multipleAlternativeCount = Str.count('|') + 1;
+  unsigned multipleAlternativeIndex = 0;
+  ConstraintCodeVector *pCodes = &Codes;
+  
+  // Initialize
+  isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+  if (isMultipleAlternative) {
+    multipleAlternatives.resize(multipleAlternativeCount);
+    pCodes = &multipleAlternatives[0].Codes;
+  }
+  Type = isInput;
+  isEarlyClobber = false;
+  MatchingInput = -1;
+  isCommutative = false;
+  isIndirect = false;
+  currentAlternativeIndex = 0;
+  
+  // Parse prefixes.
+  if (*I == '~') {
+    Type = isClobber;
+    ++I;
+  } else if (*I == '=') {
+    ++I;
+    Type = isOutput;
+  }
+  
+  if (*I == '*') {
+    isIndirect = true;
+    ++I;
+  }
+  
+  if (I == E) return true;  // Just a prefix, like "==" or "~".
+  
+  // Parse the modifiers.
+  bool DoneWithModifiers = false;
+  while (!DoneWithModifiers) {
+    switch (*I) {
+    default:
+      DoneWithModifiers = true;
+      break;
+    case '&':     // Early clobber.
+      if (Type != isOutput ||      // Cannot early clobber anything but output.
+          isEarlyClobber)          // Reject &&&&&&
+        return true;
+      isEarlyClobber = true;
+      break;
+    case '%':     // Commutative.
+      if (Type == isClobber ||     // Cannot commute clobbers.
+          isCommutative)           // Reject %%%%%
+        return true;
+      isCommutative = true;
+      break;
+    case '#':     // Comment.
+    case '*':     // Register preferencing.
+      return true;     // Not supported.
+    }
+    
+    if (!DoneWithModifiers) {
+      ++I;
+      if (I == E) return true;   // Just prefixes and modifiers!
+    }
+  }
+  
+  // Parse the various constraints.
+  while (I != E) {
+    if (*I == '{') {   // Physical register reference.
+      // Find the end of the register name.
+      StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
+      if (ConstraintEnd == E) return true;  // "{foo"
+      pCodes->push_back(std::string(I, ConstraintEnd+1));
+      I = ConstraintEnd+1;
+    } else if (isdigit(*I)) {     // Matching Constraint
+      // Maximal munch numbers.
+      StringRef::iterator NumStart = I;
+      while (I != E && isdigit(*I))
+        ++I;
+      pCodes->push_back(std::string(NumStart, I));
+      unsigned N = atoi(pCodes->back().c_str());
+      // Check that this is a valid matching constraint!
+      if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
+          Type != isInput)
+        return true;  // Invalid constraint number.
+      
+      // If Operand N already has a matching input, reject this.  An output
+      // can't be constrained to the same value as multiple inputs.
+      if (isMultipleAlternative) {
+        InlineAsm::SubConstraintInfo &scInfo =
+          ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
+        if (scInfo.MatchingInput != -1)
+          return true;
+        // Note that operand #n has a matching input.
+        scInfo.MatchingInput = ConstraintsSoFar.size();
+      } else {
+        if (ConstraintsSoFar[N].hasMatchingInput())
+          return true;
+        // Note that operand #n has a matching input.
+        ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+        }
+    } else if (*I == '|') {
+      multipleAlternativeIndex++;
+      pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
+      ++I;
+    } else if (*I == '^') {
+      // Multi-letter constraint
+      // FIXME: For now assuming these are 2-character constraints.
+      pCodes->push_back(std::string(I+1, I+3));
+      I += 3;
+    } else {
+      // Single letter constraint.
+      pCodes->push_back(std::string(I, I+1));
+      ++I;
+    }
+  }
+
+  return false;
+}
+
+/// selectAlternative - Point this constraint to the alternative constraint
+/// indicated by the index.
+void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
+  if (index < multipleAlternatives.size()) {
+    currentAlternativeIndex = index;
+    InlineAsm::SubConstraintInfo &scInfo =
+      multipleAlternatives[currentAlternativeIndex];
+    MatchingInput = scInfo.MatchingInput;
+    Codes = scInfo.Codes;
+  }
+}
+
+InlineAsm::ConstraintInfoVector
+InlineAsm::ParseConstraints(StringRef Constraints) {
+  ConstraintInfoVector Result;
+  
+  // Scan the constraints string.
+  for (StringRef::iterator I = Constraints.begin(),
+         E = Constraints.end(); I != E; ) {
+    ConstraintInfo Info;
+
+    // Find the end of this constraint.
+    StringRef::iterator ConstraintEnd = std::find(I, E, ',');
+
+    if (ConstraintEnd == I ||  // Empty constraint like ",,"
+        Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
+      Result.clear();          // Erroneous constraint?
+      break;
+    }
+
+    Result.push_back(Info);
+    
+    // ConstraintEnd may be either the next comma or the end of the string.  In
+    // the former case, we skip the comma.
+    I = ConstraintEnd;
+    if (I != E) {
+      ++I;
+      if (I == E) { Result.clear(); break; }    // don't allow "xyz,"
+    }
+  }
+  
+  return Result;
+}
+
+/// Verify - Verify that the specified constraint string is reasonable for the
+/// specified function type, and otherwise validate the constraint string.
+bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
+  if (Ty->isVarArg()) return false;
+  
+  ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
+  
+  // Error parsing constraints.
+  if (Constraints.empty() && !ConstStr.empty()) return false;
+  
+  unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
+  unsigned NumIndirect = 0;
+  
+  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+    switch (Constraints[i].Type) {
+    case InlineAsm::isOutput:
+      if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
+        return false;  // outputs before inputs and clobbers.
+      if (!Constraints[i].isIndirect) {
+        ++NumOutputs;
+        break;
+      }
+      ++NumIndirect;
+      // FALLTHROUGH for Indirect Outputs.
+    case InlineAsm::isInput:
+      if (NumClobbers) return false;               // inputs before clobbers.
+      ++NumInputs;
+      break;
+    case InlineAsm::isClobber:
+      ++NumClobbers;
+      break;
+    }
+  }
+  
+  switch (NumOutputs) {
+  case 0:
+    if (!Ty->getReturnType()->isVoidTy()) return false;
+    break;
+  case 1:
+    if (Ty->getReturnType()->isStructTy()) return false;
+    break;
+  default:
+    const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
+    if (STy == 0 || STy->getNumElements() != NumOutputs)
+      return false;
+    break;
+  }      
+  
+  if (Ty->getNumParams() != NumInputs) return false;
+  return true;
+}
+
diff --git a/contrib/llvm/lib/VMCore/Instruction.cpp b/contrib/llvm/lib/VMCore/Instruction.cpp
new file mode 100644
index 000000000000..02c075743959
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Instruction.cpp
@@ -0,0 +1,414 @@
+//===-- Instruction.cpp - Implement the Instruction class -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Instruction class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instruction.h"
+#include "llvm/Type.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+                         Instruction *InsertBefore)
+  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+  // Make sure that we get added to a basicblock
+  LeakDetector::addGarbageObject(this);
+
+  // If requested, insert this instruction into a basic block...
+  if (InsertBefore) {
+    assert(InsertBefore->getParent() &&
+           "Instruction to insert before is not in a basic block!");
+    InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
+  }
+}
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+                         BasicBlock *InsertAtEnd)
+  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+  // Make sure that we get added to a basicblock
+  LeakDetector::addGarbageObject(this);
+
+  // append this instruction into the basic block
+  assert(InsertAtEnd && "Basic block to append to may not be NULL!");
+  InsertAtEnd->getInstList().push_back(this);
+}
+
+
+// Out of line virtual method, so the vtable, etc has a home.
+Instruction::~Instruction() {
+  assert(Parent == 0 && "Instruction still linked in the program!");
+  if (hasMetadataHashEntry())
+    clearMetadataHashEntries();
+}
+
+
+void Instruction::setParent(BasicBlock *P) {
+  if (getParent()) {
+    if (!P) LeakDetector::addGarbageObject(this);
+  } else {
+    if (P) LeakDetector::removeGarbageObject(this);
+  }
+
+  Parent = P;
+}
+
+void Instruction::removeFromParent() {
+  getParent()->getInstList().remove(this);
+}
+
+void Instruction::eraseFromParent() {
+  getParent()->getInstList().erase(this);
+}
+
+/// insertBefore - Insert an unlinked instructions into a basic block
+/// immediately before the specified instruction.
+void Instruction::insertBefore(Instruction *InsertPos) {
+  InsertPos->getParent()->getInstList().insert(InsertPos, this);
+}
+
+/// insertAfter - Insert an unlinked instructions into a basic block
+/// immediately after the specified instruction.
+void Instruction::insertAfter(Instruction *InsertPos) {
+  InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+}
+
+/// moveBefore - Unlink this instruction from its current basic block and
+/// insert it into the basic block that MovePos lives in, right before
+/// MovePos.
+void Instruction::moveBefore(Instruction *MovePos) {
+  MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
+                                             this);
+}
+
+
+const char *Instruction::getOpcodeName(unsigned OpCode) {
+  switch (OpCode) {
+  // Terminators
+  case Ret:    return "ret";
+  case Br:     return "br";
+  case Switch: return "switch";
+  case IndirectBr: return "indirectbr";
+  case Invoke: return "invoke";
+  case Unwind: return "unwind";
+  case Unreachable: return "unreachable";
+
+  // Standard binary operators...
+  case Add: return "add";
+  case FAdd: return "fadd";
+  case Sub: return "sub";
+  case FSub: return "fsub";
+  case Mul: return "mul";
+  case FMul: return "fmul";
+  case UDiv: return "udiv";
+  case SDiv: return "sdiv";
+  case FDiv: return "fdiv";
+  case URem: return "urem";
+  case SRem: return "srem";
+  case FRem: return "frem";
+
+  // Logical operators...
+  case And: return "and";
+  case Or : return "or";
+  case Xor: return "xor";
+
+  // Memory instructions...
+  case Alloca:        return "alloca";
+  case Load:          return "load";
+  case Store:         return "store";
+  case GetElementPtr: return "getelementptr";
+
+  // Convert instructions...
+  case Trunc:     return "trunc";
+  case ZExt:      return "zext";
+  case SExt:      return "sext";
+  case FPTrunc:   return "fptrunc";
+  case FPExt:     return "fpext";
+  case FPToUI:    return "fptoui";
+  case FPToSI:    return "fptosi";
+  case UIToFP:    return "uitofp";
+  case SIToFP:    return "sitofp";
+  case IntToPtr:  return "inttoptr";
+  case PtrToInt:  return "ptrtoint";
+  case BitCast:   return "bitcast";
+
+  // Other instructions...
+  case ICmp:           return "icmp";
+  case FCmp:           return "fcmp";
+  case PHI:            return "phi";
+  case Select:         return "select";
+  case Call:           return "call";
+  case Shl:            return "shl";
+  case LShr:           return "lshr";
+  case AShr:           return "ashr";
+  case VAArg:          return "va_arg";
+  case ExtractElement: return "extractelement";
+  case InsertElement:  return "insertelement";
+  case ShuffleVector:  return "shufflevector";
+  case ExtractValue:   return "extractvalue";
+  case InsertValue:    return "insertvalue";
+
+  default: return "<Invalid operator> ";
+  }
+
+  return 0;
+}
+
+/// isIdenticalTo - Return true if the specified instruction is exactly
+/// identical to the current one.  This means that all operands match and any
+/// extra information (e.g. load is volatile) agree.
+bool Instruction::isIdenticalTo(const Instruction *I) const {
+  return isIdenticalToWhenDefined(I) &&
+         SubclassOptionalData == I->SubclassOptionalData;
+}
+
+/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+/// ignores the SubclassOptionalData flags, which specify conditions
+/// under which the instruction's result is undefined.
+bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      getType() != I->getType())
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (getOperand(i) != I->getOperand(i))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+
+  return true;
+}
+
+// isSameOperationAs
+// This should be kept in sync with isEquivalentOperation in
+// lib/Transforms/IPO/MergeFunctions.cpp.
+bool Instruction::isSameOperationAs(const Instruction *I) const {
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      getType() != I->getType())
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (getOperand(i)->getType() != I->getOperand(i)->getType())
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+           CI->getAttributes() ==
+             cast<InvokeInst>(I)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+
+  return true;
+}
+
+/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
+/// specified block.  Note that PHI nodes are considered to evaluate their
+/// operands in the corresponding predecessor block.
+bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
+  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    // PHI nodes uses values in the corresponding predecessor block.  For other
+    // instructions, just check to see whether the parent of the use matches up.
+    const User *U = *UI;
+    const PHINode *PN = dyn_cast<PHINode>(U);
+    if (PN == 0) {
+      if (cast<Instruction>(U)->getParent() != BB)
+        return true;
+      continue;
+    }
+
+    if (PN->getIncomingBlock(UI) != BB)
+      return true;
+  }
+  return false;
+}
+
+/// mayReadFromMemory - Return true if this instruction may read memory.
+///
+bool Instruction::mayReadFromMemory() const {
+  switch (getOpcode()) {
+  default: return false;
+  case Instruction::VAArg:
+  case Instruction::Load:
+    return true;
+  case Instruction::Call:
+    return !cast<CallInst>(this)->doesNotAccessMemory();
+  case Instruction::Invoke:
+    return !cast<InvokeInst>(this)->doesNotAccessMemory();
+  case Instruction::Store:
+    return cast<StoreInst>(this)->isVolatile();
+  }
+}
+
+/// mayWriteToMemory - Return true if this instruction may modify memory.
+///
+bool Instruction::mayWriteToMemory() const {
+  switch (getOpcode()) {
+  default: return false;
+  case Instruction::Store:
+  case Instruction::VAArg:
+    return true;
+  case Instruction::Call:
+    return !cast<CallInst>(this)->onlyReadsMemory();
+  case Instruction::Invoke:
+    return !cast<InvokeInst>(this)->onlyReadsMemory();
+  case Instruction::Load:
+    return cast<LoadInst>(this)->isVolatile();
+  }
+}
+
+/// mayThrow - Return true if this instruction may throw an exception.
+///
+bool Instruction::mayThrow() const {
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return !CI->doesNotThrow();
+  return false;
+}
+
+/// isAssociative - Return true if the instruction is associative:
+///
+///   Associative operators satisfy:  x op (y op z) === (x op y) op z
+///
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
+///
+bool Instruction::isAssociative(unsigned Opcode) {
+  return Opcode == And || Opcode == Or || Opcode == Xor ||
+         Opcode == Add || Opcode == Mul;
+}
+
+/// isCommutative - Return true if the instruction is commutative:
+///
+///   Commutative operators satisfy: (x op y) === (y op x)
+///
+/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+/// applied to any type.
+///
+bool Instruction::isCommutative(unsigned op) {
+  switch (op) {
+  case Add:
+  case FAdd:
+  case Mul:
+  case FMul:
+  case And:
+  case Or:
+  case Xor:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool Instruction::isSafeToSpeculativelyExecute() const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (Constant *C = dyn_cast<Constant>(getOperand(i)))
+      if (C->canTrap())
+        return false;
+
+  switch (getOpcode()) {
+  default:
+    return true;
+  case UDiv:
+  case URem: {
+    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+    return Op && !Op->isNullValue();
+  }
+  case SDiv:
+  case SRem: {
+    // x / y is undefined if y == 0, and might be undefined if y == -1,
+    // but calcuations like x / 3 are safe.
+    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+    return Op && !Op->isNullValue() && !Op->isAllOnesValue();
+  }
+  case Load: {
+    const LoadInst *LI = cast<LoadInst>(this);
+    if (LI->isVolatile())
+      return false;
+    return LI->getPointerOperand()->isDereferenceablePointer();
+  }
+  case Call:
+    return false; // The called function could have undefined behavior or
+                  // side-effects.
+                  // FIXME: We should special-case some intrinsics (bswap,
+                  // overflow-checking arithmetic, etc.)
+  case VAArg:
+  case Alloca:
+  case Invoke:
+  case PHI:
+  case Store:
+  case Ret:
+  case Br:
+  case IndirectBr:
+  case Switch:
+  case Unwind:
+  case Unreachable:
+    return false; // Misc instructions which have effects
+  }
+}
+
+Instruction *Instruction::clone() const {
+  Instruction *New = clone_impl();
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (!hasMetadata())
+    return New;
+  
+  // Otherwise, enumerate and copy over metadata from the old instruction to the
+  // new one.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
+  getAllMetadataOtherThanDebugLoc(TheMDs);
+  for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
+    New->setMetadata(TheMDs[i].first, TheMDs[i].second);
+  
+  New->setDebugLoc(getDebugLoc());
+  return New;
+}
diff --git a/contrib/llvm/lib/VMCore/Instructions.cpp b/contrib/llvm/lib/VMCore/Instructions.cpp
new file mode 100644
index 000000000000..9baad09cb272
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Instructions.cpp
@@ -0,0 +1,3187 @@
+//===-- Instructions.cpp - Implement the LLVM instructions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements all of the non-inline methods for the LLVM instruction
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                            CallSite Class
+//===----------------------------------------------------------------------===//
+
+User::op_iterator CallSite::getCallee() const {
+  Instruction *II(getInstruction());
+  return isCall()
+    ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
+    : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
+}
+
+//===----------------------------------------------------------------------===//
+//                            TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+TerminatorInst::~TerminatorInst() {
+}
+
+//===----------------------------------------------------------------------===//
+//                           UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+UnaryInstruction::~UnaryInstruction() {
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// areInvalidOperands - Return a string if the specified operands are invalid
+/// for a select operation, otherwise return null.
+const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
+  if (Op1->getType() != Op2->getType())
+    return "both values to select must have same type";
+  
+  if (const VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
+    // Vector select.
+    if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
+      return "vector select condition element type must be i1";
+    const VectorType *ET = dyn_cast<VectorType>(Op1->getType());
+    if (ET == 0)
+      return "selected values for vector select must be vectors";
+    if (ET->getNumElements() != VT->getNumElements())
+      return "vector select requires selected vectors to have "
+                   "the same vector length as select condition";
+  } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
+    return "select condition must be i1 or <n x i1>";
+  }
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               PHINode Class
+//===----------------------------------------------------------------------===//
+
+PHINode::PHINode(const PHINode &PN)
+  : Instruction(PN.getType(), Instruction::PHI,
+                allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
+    ReservedSpace(PN.getNumOperands()) {
+  std::copy(PN.op_begin(), PN.op_end(), op_begin());
+  std::copy(PN.block_begin(), PN.block_end(), block_begin());
+  SubclassOptionalData = PN.SubclassOptionalData;
+}
+
+PHINode::~PHINode() {
+  dropHungoffUses();
+}
+
+Use *PHINode::allocHungoffUses(unsigned N) const {
+  // Allocate the array of Uses of the incoming values, followed by a pointer
+  // (with bottom bit set) to the User, followed by the array of pointers to
+  // the incoming basic blocks.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
+    + N * sizeof(BasicBlock*);
+  Use *Begin = static_cast<Use*>(::operator new(size));
+  Use *End = Begin + N;
+  (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
+  return Use::initTags(Begin, End);
+}
+
+// removeIncomingValue - Remove an incoming value.  This is useful if a
+// predecessor basic block is deleted.
+Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
+  Value *Removed = getIncomingValue(Idx);
+
+  // Move everything after this operand down.
+  //
+  // FIXME: we could just swap with the end of the list, then erase.  However,
+  // clients might not expect this to happen.  The code as it is thrashes the
+  // use/def lists, which is kinda lame.
+  std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
+  std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
+
+  // Nuke the last value.
+  Op<-1>().set(0);
+  --NumOperands;
+
+  // If the PHI node is dead, because it has zero entries, nuke it now.
+  if (getNumOperands() == 0 && DeletePHIIfEmpty) {
+    // If anyone is using this PHI, make them use a dummy value instead...
+    replaceAllUsesWith(UndefValue::get(getType()));
+    eraseFromParent();
+  }
+  return Removed;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 1.5
+/// times.
+///
+void PHINode::growOperands() {
+  unsigned e = getNumOperands();
+  unsigned NumOps = e + e / 2;
+  if (NumOps < 2) NumOps = 2;      // 2 op PHI nodes are VERY common.
+
+  Use *OldOps = op_begin();
+  BasicBlock **OldBlocks = block_begin();
+
+  ReservedSpace = NumOps;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  std::copy(OldOps, OldOps + e, op_begin());
+  std::copy(OldBlocks, OldBlocks + e, block_begin());
+
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+/// hasConstantValue - If the specified PHI node always merges together the same
+/// value, return the value, otherwise return null.
+Value *PHINode::hasConstantValue() const {
+  // Exploit the fact that phi nodes always have at least one entry.
+  Value *ConstantValue = getIncomingValue(0);
+  for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
+    if (getIncomingValue(i) != ConstantValue)
+      return 0; // Incoming values not all the same.
+  return ConstantValue;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        CallInst Implementation
+//===----------------------------------------------------------------------===//
+
+CallInst::~CallInst() {
+}
+
+void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
+  Op<-1>() = Func;
+
+#ifndef NDEBUG
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+  assert((Args.size() == FTy->getNumParams() ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+         "Calling a function with bad signature!");
+
+  for (unsigned i = 0; i != Args.size(); ++i)
+    assert((i >= FTy->getNumParams() || 
+            FTy->getParamType(i) == Args[i]->getType()) &&
+           "Calling a function with a bad signature!");
+#endif
+
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
+}
+
+void CallInst::init(Value *Func, const Twine &NameStr) {
+  assert(NumOperands == 1 && "NumOperands not set up?");
+  Op<-1>() = Func;
+
+#ifndef NDEBUG
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+  assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
+#endif
+
+  setName(NameStr);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+                   Instruction *InsertBefore)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 1,
+                1, InsertBefore) {
+  init(Func, Name);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+                   BasicBlock *InsertAtEnd)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 1,
+                1, InsertAtEnd) {
+  init(Func, Name);
+}
+
+CallInst::CallInst(const CallInst &CI)
+  : Instruction(CI.getType(), Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
+                CI.getNumOperands()) {
+  setAttributes(CI.getAttributes());
+  setTailCall(CI.isTailCall());
+  setCallingConv(CI.getCallingConv());
+    
+  std::copy(CI.op_begin(), CI.op_end(), op_begin());
+  SubclassOptionalData = CI.SubclassOptionalData;
+}
+
+void CallInst::addAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.addAttr(i, attr);
+  setAttributes(PAL);
+}
+
+void CallInst::removeAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.removeAttr(i, attr);
+  setAttributes(PAL);
+}
+
+bool CallInst::paramHasAttr(unsigned i, Attributes attr) const {
+  if (AttributeList.paramHasAttr(i, attr))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->paramHasAttr(i, attr);
+  return false;
+}
+
+/// IsConstantOne - Return true only if val is constant int 1
+static bool IsConstantOne(Value *val) {
+  assert(val && "IsConstantOne does not work with NULL val");
+  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+static Instruction *createMalloc(Instruction *InsertBefore,
+                                 BasicBlock *InsertAtEnd, const Type *IntPtrTy,
+                                 const Type *AllocTy, Value *AllocSize, 
+                                 Value *ArraySize, Function *MallocF,
+                                 const Twine &Name) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createMalloc needs either InsertBefore or InsertAtEnd");
+
+  // malloc(type) becomes: 
+  //       bitcast (i8* malloc(typeSize)) to type*
+  // malloc(type, arraySize) becomes:
+  //       bitcast (i8 *malloc(typeSize*arraySize)) to type*
+  if (!ArraySize)
+    ArraySize = ConstantInt::get(IntPtrTy, 1);
+  else if (ArraySize->getType() != IntPtrTy) {
+    if (InsertBefore)
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertBefore);
+    else
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertAtEnd);
+  }
+
+  if (!IsConstantOne(ArraySize)) {
+    if (IsConstantOne(AllocSize)) {
+      AllocSize = ArraySize;         // Operand * 1 = Operand
+    } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
+      Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
+                                                     false /*ZExt*/);
+      // Malloc arg is constant product of type size and array size
+      AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
+    } else {
+      // Multiply type size by the array size...
+      if (InsertBefore)
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertBefore);
+      else
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertAtEnd);
+    }
+  }
+
+  assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+  // Create the call to Malloc.
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+  Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+  Value *MallocFunc = MallocF;
+  if (!MallocFunc)
+    // prototype malloc as "void *malloc(size_t)"
+    MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
+  const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
+  CallInst *MCall = NULL;
+  Instruction *Result = NULL;
+  if (InsertBefore) {
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
+    Result = MCall;
+    if (Result->getType() != AllocPtrType)
+      // Create a cast instruction to convert to the right type...
+      Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
+  } else {
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
+    Result = MCall;
+    if (Result->getType() != AllocPtrType) {
+      InsertAtEnd->getInstList().push_back(MCall);
+      // Create a cast instruction to convert to the right type...
+      Result = new BitCastInst(MCall, AllocPtrType, Name);
+    }
+  }
+  MCall->setTailCall();
+  if (Function *F = dyn_cast<Function>(MallocFunc)) {
+    MCall->setCallingConv(F->getCallingConv());
+    if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+  }
+  assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
+
+  return Result;
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
+                                    const Type *IntPtrTy, const Type *AllocTy,
+                                    Value *AllocSize, Value *ArraySize,
+                                    Function * MallocF,
+                                    const Twine &Name) {
+  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
+                      ArraySize, MallocF, Name);
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+/// Note: This function does not add the bitcast to the basic block, that is the
+/// responsibility of the caller.
+Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
+                                    const Type *IntPtrTy, const Type *AllocTy,
+                                    Value *AllocSize, Value *ArraySize, 
+                                    Function *MallocF, const Twine &Name) {
+  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
+                      ArraySize, MallocF, Name);
+}
+
+static Instruction* createFree(Value* Source, Instruction *InsertBefore,
+                               BasicBlock *InsertAtEnd) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createFree needs either InsertBefore or InsertAtEnd");
+  assert(Source->getType()->isPointerTy() &&
+         "Can not free something of nonpointer type!");
+
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+
+  const Type *VoidTy = Type::getVoidTy(M->getContext());
+  const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
+  // prototype free as "void free(void*)"
+  Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
+  CallInst* Result = NULL;
+  Value *PtrCast = Source;
+  if (InsertBefore) {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
+    Result = CallInst::Create(FreeFunc, PtrCast, "", InsertBefore);
+  } else {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
+    Result = CallInst::Create(FreeFunc, PtrCast, "");
+  }
+  Result->setTailCall();
+  if (Function *F = dyn_cast<Function>(FreeFunc))
+    Result->setCallingConv(F->getCallingConv());
+
+  return Result;
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
+  return createFree(Source, InsertBefore, NULL);
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+/// Note: This function does not add the call to the basic block, that is the
+/// responsibility of the caller.
+Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
+  Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
+  assert(FreeCall && "CreateFree did not create a CallInst");
+  return FreeCall;
+}
+
+//===----------------------------------------------------------------------===//
+//                        InvokeInst Implementation
+//===----------------------------------------------------------------------===//
+
+void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
+                      ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
+  Op<-3>() = Fn;
+  Op<-2>() = IfNormal;
+  Op<-1>() = IfException;
+
+#ifndef NDEBUG
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
+
+  assert(((Args.size() == FTy->getNumParams()) ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+         "Invoking a function with bad signature");
+
+  for (unsigned i = 0, e = Args.size(); i != e; i++)
+    assert((i >= FTy->getNumParams() || 
+            FTy->getParamType(i) == Args[i]->getType()) &&
+           "Invoking a function with a bad signature!");
+#endif
+
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
+}
+
+InvokeInst::InvokeInst(const InvokeInst &II)
+  : TerminatorInst(II.getType(), Instruction::Invoke,
+                   OperandTraits<InvokeInst>::op_end(this)
+                   - II.getNumOperands(),
+                   II.getNumOperands()) {
+  setAttributes(II.getAttributes());
+  setCallingConv(II.getCallingConv());
+  std::copy(II.op_begin(), II.op_end(), op_begin());
+  SubclassOptionalData = II.SubclassOptionalData;
+}
+
+BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned InvokeInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  return setSuccessor(idx, B);
+}
+
+bool InvokeInst::paramHasAttr(unsigned i, Attributes attr) const {
+  if (AttributeList.paramHasAttr(i, attr))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->paramHasAttr(i, attr);
+  return false;
+}
+
+void InvokeInst::addAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.addAttr(i, attr);
+  setAttributes(PAL);
+}
+
+void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.removeAttr(i, attr);
+  setAttributes(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        ReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+ReturnInst::ReturnInst(const ReturnInst &RI)
+  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) -
+                     RI.getNumOperands(),
+                   RI.getNumOperands()) {
+  if (RI.getNumOperands())
+    Op<0>() = RI.Op<0>();
+  SubclassOptionalData = RI.SubclassOptionalData;
+}
+
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                   InsertBefore) {
+  if (retVal)
+    Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                   InsertAtEnd) {
+  if (retVal)
+    Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
+}
+
+unsigned ReturnInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
+/// emit the vtable for the class in this translation unit.
+void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("ReturnInst has no successors!");
+}
+
+BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("ReturnInst has no successors!");
+  return 0;
+}
+
+ReturnInst::~ReturnInst() {
+}
+
+//===----------------------------------------------------------------------===//
+//                        UnwindInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnwindInst::UnwindInst(LLVMContext &Context, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+                   0, 0, InsertBefore) {
+}
+UnwindInst::UnwindInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+                   0, 0, InsertAtEnd) {
+}
+
+
+unsigned UnwindInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+void UnwindInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("UnwindInst has no successors!");
+}
+
+BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("UnwindInst has no successors!");
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                      UnreachableInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnreachableInst::UnreachableInst(LLVMContext &Context, 
+                                 Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertBefore) {
+}
+UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertAtEnd) {
+}
+
+unsigned UnreachableInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("UnwindInst has no successors!");
+}
+
+BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("UnwindInst has no successors!");
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                        BranchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void BranchInst::AssertOK() {
+  if (isConditional())
+    assert(getCondition()->getType()->isIntegerTy(1) &&
+           "May only branch on boolean predicates!");
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 1,
+                   1, InsertBefore) {
+  assert(IfTrue != 0 && "Branch destination may not be null!");
+  Op<-1>() = IfTrue;
+}
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+                       Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 3,
+                   3, InsertBefore) {
+  Op<-1>() = IfTrue;
+  Op<-2>() = IfFalse;
+  Op<-3>() = Cond;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 1,
+                   1, InsertAtEnd) {
+  assert(IfTrue != 0 && "Branch destination may not be null!");
+  Op<-1>() = IfTrue;
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+           BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 3,
+                   3, InsertAtEnd) {
+  Op<-1>() = IfTrue;
+  Op<-2>() = IfFalse;
+  Op<-3>() = Cond;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+
+BranchInst::BranchInst(const BranchInst &BI) :
+  TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
+                 OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+                 BI.getNumOperands()) {
+  Op<-1>() = BI.Op<-1>();
+  if (BI.getNumOperands() != 1) {
+    assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
+    Op<-3>() = BI.Op<-3>();
+    Op<-2>() = BI.Op<-2>();
+  }
+  SubclassOptionalData = BI.SubclassOptionalData;
+}
+
+BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned BranchInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        AllocaInst Implementation
+//===----------------------------------------------------------------------===//
+
+static Value *getAISize(LLVMContext &Context, Value *Amt) {
+  if (!Amt)
+    Amt = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  else {
+    assert(!isa<BasicBlock>(Amt) &&
+           "Passed basic block into allocation size parameter! Use other ctor");
+    assert(Amt->getType()->isIntegerTy() &&
+           "Allocation array size is not an integer!");
+  }
+  return Amt;
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+                       Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertBefore) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+                       BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertAtEnd) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+  setAlignment(Align);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+  setAlignment(Align);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+// Out of line virtual method, so the vtable, etc has a home.
+AllocaInst::~AllocaInst() {
+}
+
+void AllocaInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData(Log2_32(Align) + 1);
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool AllocaInst::isArrayAllocation() const {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
+    return !CI->isOne();
+  return true;
+}
+
+Type *AllocaInst::getAllocatedType() const {
+  return getType()->getElementType();
+}
+
+/// isStaticAlloca - Return true if this alloca is in the entry block of the
+/// function and is a constant size.  If so, the code generator will fold it
+/// into the prolog/epilog code, so it is basically free.
+bool AllocaInst::isStaticAlloca() const {
+  // Must be constant size.
+  if (!isa<ConstantInt>(getArraySize())) return false;
+  
+  // Must be in the entry block.
+  const BasicBlock *Parent = getParent();
+  return Parent == &Parent->getParent()->front();
+}
+
+//===----------------------------------------------------------------------===//
+//                           LoadInst Implementation
+//===----------------------------------------------------------------------===//
+
+void LoadInst::AssertOK() {
+  assert(getOperand(0)->getType()->isPointerTy() &&
+         "Ptr must have pointer type.");
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+                   Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+                   BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+                   Instruction *InsertBef)
+: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                   Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+                   BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+void LoadInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+                             ((Log2_32(Align)+1)<<1));
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+//                           StoreInst Implementation
+//===----------------------------------------------------------------------===//
+
+void StoreInst::AssertOK() {
+  assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
+  assert(getOperand(1)->getType()->isPointerTy() &&
+         "Ptr must have pointer type!");
+  assert(getOperand(0)->getType() ==
+                 cast<PointerType>(getOperand(1)->getType())->getElementType()
+         && "Ptr must be a pointer to Val type!");
+}
+
+
+StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+}
+
+void StoreInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+                             ((Log2_32(Align)+1) << 1));
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+//                       GetElementPtrInst Implementation
+//===----------------------------------------------------------------------===//
+
+static unsigned retrieveAddrSpace(const Value *Val) {
+  return cast<PointerType>(Val->getType())->getAddressSpace();
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
+                             const Twine &Name) {
+  assert(NumOperands == 1+NumIdx && "NumOperands not initialized?");
+  Use *OL = OperandList;
+  OL[0] = Ptr;
+
+  for (unsigned i = 0; i != NumIdx; ++i)
+    OL[i+1] = Idx[i];
+
+  setName(Name);
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value *Idx, const Twine &Name) {
+  assert(NumOperands == 2 && "NumOperands not initialized?");
+  Use *OL = OperandList;
+  OL[0] = Ptr;
+  OL[1] = Idx;
+
+  setName(Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
+  : Instruction(GEPI.getType(), GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this)
+                - GEPI.getNumOperands(),
+                GEPI.getNumOperands()) {
+  Use *OL = OperandList;
+  Use *GEPIOL = GEPI.OperandList;
+  for (unsigned i = 0, E = NumOperands; i != E; ++i)
+    OL[i] = GEPIOL[i];
+  SubclassOptionalData = GEPI.SubclassOptionalData;
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+                                     const Twine &Name, Instruction *InBe)
+  : Instruction(PointerType::get(
+      checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+                2, InBe) {
+  init(Ptr, Idx, Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+                                     const Twine &Name, BasicBlock *IAE)
+  : Instruction(PointerType::get(
+            checkGEPType(getIndexedType(Ptr->getType(),Idx)),  
+                retrieveAddrSpace(Ptr)),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+                2, IAE) {
+  init(Ptr, Idx, Name);
+}
+
+/// getIndexedType - Returns the type of the element that would be accessed with
+/// a gep instruction with the specified parameters.
+///
+/// The Idxs pointer should point to a continuous piece of memory containing the
+/// indices, either as Value* or uint64_t.
+///
+/// A null type is returned if the indices are invalid for the specified
+/// pointer type.
+///
+template <typename IndexTy>
+static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
+                                    unsigned NumIdx) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+  if (!PTy) return 0;   // Type isn't a pointer type!
+  Type *Agg = PTy->getElementType();
+
+  // Handle the special case of the empty set index set, which is always valid.
+  if (NumIdx == 0)
+    return Agg;
+  
+  // If there is at least one index, the top level type must be sized, otherwise
+  // it cannot be 'stepped over'.
+  if (!Agg->isSized())
+    return 0;
+
+  unsigned CurIdx = 1;
+  for (; CurIdx != NumIdx; ++CurIdx) {
+    CompositeType *CT = dyn_cast<CompositeType>(Agg);
+    if (!CT || CT->isPointerTy()) return 0;
+    IndexTy Index = Idxs[CurIdx];
+    if (!CT->indexValid(Index)) return 0;
+    Agg = CT->getTypeAtIndex(Index);
+  }
+  return CurIdx == NumIdx ? Agg : 0;
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value* const *Idxs,
+                                        unsigned NumIdx) {
+  return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                        Constant* const *Idxs,
+                                        unsigned NumIdx) {
+  return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                        uint64_t const *Idxs,
+                                        unsigned NumIdx) {
+  return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+  if (!PTy) return 0;   // Type isn't a pointer type!
+
+  // Check the pointer index.
+  if (!PTy->indexValid(Idx)) return 0;
+
+  return PTy->getElementType();
+}
+
+
+/// hasAllZeroIndices - Return true if all of the indices of this GEP are
+/// zeros.  If so, the result pointer and the first operand have the same
+/// value, just potentially different types.
+bool GetElementPtrInst::hasAllZeroIndices() const {
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(i))) {
+      if (!CI->isZero()) return false;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+/// hasAllConstantIndices - Return true if all of the indices of this GEP are
+/// constant integers.  If so, the result pointer and the first operand have
+/// a constant offset between them.
+bool GetElementPtrInst::hasAllConstantIndices() const {
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    if (!isa<ConstantInt>(getOperand(i)))
+      return false;
+  }
+  return true;
+}
+
+void GetElementPtrInst::setIsInBounds(bool B) {
+  cast<GEPOperator>(this)->setIsInBounds(B);
+}
+
+bool GetElementPtrInst::isInBounds() const {
+  return cast<GEPOperator>(this)->isInBounds();
+}
+
+//===----------------------------------------------------------------------===//
+//                           ExtractElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+                                       const Twine &Name,
+                                       Instruction *InsertBef)
+  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+                ExtractElement,
+                OperandTraits<ExtractElementInst>::op_begin(this),
+                2, InsertBef) {
+  assert(isValidOperands(Val, Index) &&
+         "Invalid extractelement instruction operands!");
+  Op<0>() = Val;
+  Op<1>() = Index;
+  setName(Name);
+}
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+                                       const Twine &Name,
+                                       BasicBlock *InsertAE)
+  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+                ExtractElement,
+                OperandTraits<ExtractElementInst>::op_begin(this),
+                2, InsertAE) {
+  assert(isValidOperands(Val, Index) &&
+         "Invalid extractelement instruction operands!");
+
+  Op<0>() = Val;
+  Op<1>() = Index;
+  setName(Name);
+}
+
+
+bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
+  if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
+    return false;
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                           InsertElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+                                     const Twine &Name,
+                                     Instruction *InsertBef)
+  : Instruction(Vec->getType(), InsertElement,
+                OperandTraits<InsertElementInst>::op_begin(this),
+                3, InsertBef) {
+  assert(isValidOperands(Vec, Elt, Index) &&
+         "Invalid insertelement instruction operands!");
+  Op<0>() = Vec;
+  Op<1>() = Elt;
+  Op<2>() = Index;
+  setName(Name);
+}
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+                                     const Twine &Name,
+                                     BasicBlock *InsertAE)
+  : Instruction(Vec->getType(), InsertElement,
+                OperandTraits<InsertElementInst>::op_begin(this),
+                3, InsertAE) {
+  assert(isValidOperands(Vec, Elt, Index) &&
+         "Invalid insertelement instruction operands!");
+
+  Op<0>() = Vec;
+  Op<1>() = Elt;
+  Op<2>() = Index;
+  setName(Name);
+}
+
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, 
+                                        const Value *Index) {
+  if (!Vec->getType()->isVectorTy())
+    return false;   // First operand of insertelement must be vector type.
+  
+  if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
+    return false;// Second operand of insertelement must be vector element type.
+    
+  if (!Index->getType()->isIntegerTy(32))
+    return false;  // Third operand of insertelement must be i32.
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      ShuffleVectorInst Implementation
+//===----------------------------------------------------------------------===//
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                                     const Twine &Name,
+                                     Instruction *InsertBefore)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertBefore) {
+  assert(isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector instruction operands!");
+  Op<0>() = V1;
+  Op<1>() = V2;
+  Op<2>() = Mask;
+  setName(Name);
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                                     const Twine &Name,
+                                     BasicBlock *InsertAtEnd)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertAtEnd) {
+  assert(isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector instruction operands!");
+
+  Op<0>() = V1;
+  Op<1>() = V2;
+  Op<2>() = Mask;
+  setName(Name);
+}
+
+bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
+                                        const Value *Mask) {
+  if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
+    return false;
+  
+  const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+  if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+    return false;
+
+  // Check to see if Mask is valid.
+  if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+    const VectorType *VTy = cast<VectorType>(V1->getType());
+    for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+      if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+        if (CI->uge(VTy->getNumElements()*2))
+          return false;
+      } else if (!isa<UndefValue>(MV->getOperand(i))) {
+        return false;
+      }
+    }
+  }
+  else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask))
+    return false;
+  
+  return true;
+}
+
+/// getMaskValue - Return the index from the shuffle mask for the specified
+/// output result.  This is either -1 if the element is undef or a number less
+/// than 2*numelements.
+int ShuffleVectorInst::getMaskValue(unsigned i) const {
+  const Constant *Mask = cast<Constant>(getOperand(2));
+  if (isa<UndefValue>(Mask)) return -1;
+  if (isa<ConstantAggregateZero>(Mask)) return 0;
+  const ConstantVector *MaskCV = cast<ConstantVector>(Mask);
+  assert(i < MaskCV->getNumOperands() && "Index out of range");
+
+  if (isa<UndefValue>(MaskCV->getOperand(i)))
+    return -1;
+  return cast<ConstantInt>(MaskCV->getOperand(i))->getZExtValue();
+}
+
+//===----------------------------------------------------------------------===//
+//                             InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, 
+                           const Twine &Name) {
+  assert(NumOperands == 2 && "NumOperands not initialized?");
+
+  // There's no fundamental reason why we require at least one index
+  // (other than weirdness with &*IdxBegin being invalid; see
+  // getelementptr's init routine for example). But there's no
+  // present need to support it.
+  assert(Idxs.size() > 0 && "InsertValueInst must have at least one index");
+
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) ==
+         Val->getType() && "Inserted value must match indexed type!");
+  Op<0>() = Agg;
+  Op<1>() = Val;
+
+  Indices.append(Idxs.begin(), Idxs.end());
+  setName(Name);
+}
+
+InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
+  : Instruction(IVI.getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this), 2),
+    Indices(IVI.Indices) {
+  Op<0>() = IVI.getOperand(0);
+  Op<1>() = IVI.getOperand(1);
+  SubclassOptionalData = IVI.SubclassOptionalData;
+}
+
+//===----------------------------------------------------------------------===//
+//                             ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
+  assert(NumOperands == 1 && "NumOperands not initialized?");
+
+  // There's no fundamental reason why we require at least one index.
+  // But there's no present need to support it.
+  assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index");
+
+  Indices.append(Idxs.begin(), Idxs.end());
+  setName(Name);
+}
+
+ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
+  : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
+    Indices(EVI.Indices) {
+  SubclassOptionalData = EVI.SubclassOptionalData;
+}
+
+// getIndexedType - Returns the type of the element that would be extracted
+// with an extractvalue instruction with the specified parameters.
+//
+// A null type is returned if the indices are invalid for the specified
+// pointer type.
+//
+Type *ExtractValueInst::getIndexedType(const Type *Agg,
+                                       ArrayRef<unsigned> Idxs) {
+  for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
+    unsigned Index = Idxs[CurIdx];
+    // We can't use CompositeType::indexValid(Index) here.
+    // indexValid() always returns true for arrays because getelementptr allows
+    // out-of-bounds indices. Since we don't allow those for extractvalue and
+    // insertvalue we need to check array indexing manually.
+    // Since the only other types we can index into are struct types it's just
+    // as easy to check those manually as well.
+    if (const ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+      if (Index >= AT->getNumElements())
+        return 0;
+    } else if (const StructType *ST = dyn_cast<StructType>(Agg)) {
+      if (Index >= ST->getNumElements())
+        return 0;
+    } else {
+      // Not a valid type to index into.
+      return 0;
+    }
+
+    Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
+  }
+  return const_cast<Type*>(Agg);
+}
+
+//===----------------------------------------------------------------------===//
+//                             BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+                               const Type *Ty, const Twine &Name,
+                               Instruction *InsertBefore)
+  : Instruction(Ty, iType,
+                OperandTraits<BinaryOperator>::op_begin(this),
+                OperandTraits<BinaryOperator>::operands(this),
+                InsertBefore) {
+  Op<0>() = S1;
+  Op<1>() = S2;
+  init(iType);
+  setName(Name);
+}
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
+                               const Type *Ty, const Twine &Name,
+                               BasicBlock *InsertAtEnd)
+  : Instruction(Ty, iType,
+                OperandTraits<BinaryOperator>::op_begin(this),
+                OperandTraits<BinaryOperator>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = S1;
+  Op<1>() = S2;
+  init(iType);
+  setName(Name);
+}
+
+
+void BinaryOperator::init(BinaryOps iType) {
+  Value *LHS = getOperand(0), *RHS = getOperand(1);
+  (void)LHS; (void)RHS; // Silence warnings.
+  assert(LHS->getType() == RHS->getType() &&
+         "Binary operator operand types must match!");
+#ifndef NDEBUG
+  switch (iType) {
+  case Add: case Sub:
+  case Mul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isIntOrIntVectorTy() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case FAdd: case FSub:
+  case FMul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  case UDiv: 
+  case SDiv: 
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Incorrect operand type (not integer) for S/UDIV");
+    break;
+  case FDiv:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Incorrect operand type (not floating point) for FDIV");
+    break;
+  case URem: 
+  case SRem: 
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Incorrect operand type (not integer) for S/UREM");
+    break;
+  case FRem:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Incorrect operand type (not floating point) for FREM");
+    break;
+  case Shl:
+  case LShr:
+  case AShr:
+    assert(getType() == LHS->getType() &&
+           "Shift operation should return same type as operands!");
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Tried to create a shift operation on a non-integral type!");
+    break;
+  case And: case Or:
+  case Xor:
+    assert(getType() == LHS->getType() &&
+           "Logical operation should return same type as operands!");
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Tried to create a logical operation on a non-integral type!");
+    break;
+  default:
+    break;
+  }
+#endif
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+                                       const Twine &Name,
+                                       Instruction *InsertBefore) {
+  assert(S1->getType() == S2->getType() &&
+         "Cannot create binary operator with two operands of differing type!");
+  return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+                                       const Twine &Name,
+                                       BasicBlock *InsertAtEnd) {
+  BinaryOperator *Res = Create(Op, S1, S2, Name);
+  InsertAtEnd->getInstList().push_back(Res);
+  return Res;
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+                                          Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::Sub,
+                            zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+                                          BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::Sub,
+                            zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+                                             Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+                                             BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+                                           Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+                                           BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+                                          Instruction *InsertBefore) {
+  Constant *C;
+  if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+    C = Constant::getAllOnesValue(PTy->getElementType());
+    C = ConstantVector::get(
+                              std::vector<Constant*>(PTy->getNumElements(), C));
+  } else {
+    C = Constant::getAllOnesValue(Op->getType());
+  }
+  
+  return new BinaryOperator(Instruction::Xor, Op, C,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+                                          BasicBlock *InsertAtEnd) {
+  Constant *AllOnes;
+  if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+    // Create a vector of all ones values.
+    Constant *Elt = Constant::getAllOnesValue(PTy->getElementType());
+    AllOnes = ConstantVector::get(
+                            std::vector<Constant*>(PTy->getNumElements(), Elt));
+  } else {
+    AllOnes = Constant::getAllOnesValue(Op->getType());
+  }
+  
+  return new BinaryOperator(Instruction::Xor, Op, AllOnes,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+
+// isConstantAllOnes - Helper function for several functions below
+static inline bool isConstantAllOnes(const Value *V) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return CI->isAllOnesValue();
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+    return CV->isAllOnesValue();
+  return false;
+}
+
+bool BinaryOperator::isNeg(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::Sub)
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+        return C->isNegativeZeroValue();
+  return false;
+}
+
+bool BinaryOperator::isFNeg(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::FSub)
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+        return C->isNegativeZeroValue();
+  return false;
+}
+
+bool BinaryOperator::isNot(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    return (Bop->getOpcode() == Instruction::Xor &&
+            (isConstantAllOnes(Bop->getOperand(1)) ||
+             isConstantAllOnes(Bop->getOperand(0))));
+  return false;
+}
+
+Value *BinaryOperator::getNegArgument(Value *BinOp) {
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
+  return getNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getFNegArgument(Value *BinOp) {
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
+  return getFNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getNotArgument(Value *BinOp) {
+  assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
+  BinaryOperator *BO = cast<BinaryOperator>(BinOp);
+  Value *Op0 = BO->getOperand(0);
+  Value *Op1 = BO->getOperand(1);
+  if (isConstantAllOnes(Op0)) return Op1;
+
+  assert(isConstantAllOnes(Op1));
+  return Op0;
+}
+
+const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
+  return getNotArgument(const_cast<Value*>(BinOp));
+}
+
+
+// swapOperands - Exchange the two operands to this instruction.  This
+// instruction is safe to use on any binary instruction and does not
+// modify the semantics of the instruction.  If the instruction is
+// order dependent (SetLT f.e.) the opcode is changed.
+//
+bool BinaryOperator::swapOperands() {
+  if (!isCommutative())
+    return true; // Can't commute operands
+  Op<0>().swap(Op<1>());
+  return false;
+}
+
+void BinaryOperator::setHasNoUnsignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(b);
+}
+
+void BinaryOperator::setHasNoSignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(b);
+}
+
+void BinaryOperator::setIsExact(bool b) {
+  cast<PossiblyExactOperator>(this)->setIsExact(b);
+}
+
+bool BinaryOperator::hasNoUnsignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
+}
+
+bool BinaryOperator::hasNoSignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
+}
+
+bool BinaryOperator::isExact() const {
+  return cast<PossiblyExactOperator>(this)->isExact();
+}
+
+//===----------------------------------------------------------------------===//
+//                                CastInst Class
+//===----------------------------------------------------------------------===//
+
+// Just determine if this cast only deals with integral->integral conversion.
+bool CastInst::isIntegerCast() const {
+  switch (getOpcode()) {
+    default: return false;
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::Trunc:
+      return true;
+    case Instruction::BitCast:
+      return getOperand(0)->getType()->isIntegerTy() &&
+        getType()->isIntegerTy();
+  }
+}
+
+bool CastInst::isLosslessCast() const {
+  // Only BitCast can be lossless, exit fast if we're not BitCast
+  if (getOpcode() != Instruction::BitCast)
+    return false;
+
+  // Identity cast is always lossless
+  const Type* SrcTy = getOperand(0)->getType();
+  const Type* DstTy = getType();
+  if (SrcTy == DstTy)
+    return true;
+  
+  // Pointer to pointer is always lossless.
+  if (SrcTy->isPointerTy())
+    return DstTy->isPointerTy();
+  return false;  // Other types have no identity values
+}
+
+/// This function determines if the CastInst does not require any bits to be
+/// changed in order to effect the cast. Essentially, it identifies cases where
+/// no code gen is necessary for the cast, hence the name no-op cast.  For 
+/// example, the following are all no-op casts:
+/// # bitcast i32* %x to i8*
+/// # bitcast <2 x i32> %x to <4 x i16> 
+/// # ptrtoint i32* %x to i32     ; on 32-bit plaforms only
+/// @brief Determine if the described cast is a no-op.
+bool CastInst::isNoopCast(Instruction::CastOps Opcode,
+                          const Type *SrcTy,
+                          const Type *DestTy,
+                          const Type *IntPtrTy) {
+  switch (Opcode) {
+    default:
+      assert(!"Invalid CastOp");
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt: 
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      return false; // These always modify bits
+    case Instruction::BitCast:
+      return true;  // BitCast never modifies bits.
+    case Instruction::PtrToInt:
+      return IntPtrTy->getScalarSizeInBits() ==
+             DestTy->getScalarSizeInBits();
+    case Instruction::IntToPtr:
+      return IntPtrTy->getScalarSizeInBits() ==
+             SrcTy->getScalarSizeInBits();
+  }
+}
+
+/// @brief Determine if a cast is a no-op.
+bool CastInst::isNoopCast(const Type *IntPtrTy) const {
+  return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
+}
+
+/// This function determines if a pair of casts can be eliminated and what 
+/// opcode should be used in the elimination. This assumes that there are two 
+/// instructions like this:
+/// *  %F = firstOpcode SrcTy %x to MidTy
+/// *  %S = secondOpcode MidTy %F to DstTy
+/// The function returns a resultOpcode so these two casts can be replaced with:
+/// *  %Replacement = resultOpcode %SrcTy %x to DstTy
+/// If no such cast is permited, the function returns 0.
+unsigned CastInst::isEliminableCastPair(
+  Instruction::CastOps firstOp, Instruction::CastOps secondOp,
+  const Type *SrcTy, const Type *MidTy, const Type *DstTy, const Type *IntPtrTy)
+{
+  // Define the 144 possibilities for these two cast instructions. The values
+  // in this matrix determine what to do in a given situation and select the
+  // case in the switch below.  The rows correspond to firstOp, the columns 
+  // correspond to secondOp.  In looking at the table below, keep in  mind
+  // the following cast properties:
+  //
+  //          Size Compare       Source               Destination
+  // Operator  Src ? Size   Type       Sign         Type       Sign
+  // -------- ------------ -------------------   ---------------------
+  // TRUNC         >       Integer      Any        Integral     Any
+  // ZEXT          <       Integral   Unsigned     Integer      Any
+  // SEXT          <       Integral    Signed      Integer      Any
+  // FPTOUI       n/a      FloatPt      n/a        Integral   Unsigned
+  // FPTOSI       n/a      FloatPt      n/a        Integral    Signed 
+  // UITOFP       n/a      Integral   Unsigned     FloatPt      n/a   
+  // SITOFP       n/a      Integral    Signed      FloatPt      n/a   
+  // FPTRUNC       >       FloatPt      n/a        FloatPt      n/a   
+  // FPEXT         <       FloatPt      n/a        FloatPt      n/a   
+  // PTRTOINT     n/a      Pointer      n/a        Integral   Unsigned
+  // INTTOPTR     n/a      Integral   Unsigned     Pointer      n/a
+  // BITCAST       =       FirstClass   n/a       FirstClass    n/a   
+  //
+  // NOTE: some transforms are safe, but we consider them to be non-profitable.
+  // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+  // into "fptoui double to i64", but this loses information about the range
+  // of the produced value (we no longer know the top-part is all zeros). 
+  // Further this conversion is often much more expensive for typical hardware,
+  // and causes issues when building libgcc.  We disallow fptosi+sext for the 
+  // same reason.
+  const unsigned numCastOps = 
+    Instruction::CastOpsEnd - Instruction::CastOpsBegin;
+  static const uint8_t CastResults[numCastOps][numCastOps] = {
+    // T        F  F  U  S  F  F  P  I  B   -+
+    // R  Z  S  P  P  I  I  T  P  2  N  T    |
+    // U  E  E  2  2  2  2  R  E  I  T  C    +- secondOp
+    // N  X  X  U  S  F  F  N  X  N  2  V    |
+    // C  T  T  I  I  P  P  C  T  T  P  T   -+
+    {  1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc      -+
+    {  8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt        |
+    {  8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt        |
+    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI      |
+    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI      |
+    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP      +- firstOp
+    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP      |
+    { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc     |
+    { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt       |
+    {  1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt    |
+    { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr    |
+    {  5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast    -+
+  };
+  
+  // If either of the casts are a bitcast from scalar to vector, disallow the
+  // merging.
+  if ((firstOp == Instruction::BitCast &&
+       isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+      (secondOp == Instruction::BitCast &&
+       isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+    return 0; // Disallowed
+
+  int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
+                            [secondOp-Instruction::CastOpsBegin];
+  switch (ElimCase) {
+    case 0: 
+      // categorically disallowed
+      return 0;
+    case 1: 
+      // allowed, use first cast's opcode
+      return firstOp;
+    case 2: 
+      // allowed, use second cast's opcode
+      return secondOp;
+    case 3: 
+      // no-op cast in second op implies firstOp as long as the DestTy 
+      // is integer and we are not converting between a vector and a
+      // non vector type.
+      if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
+        return firstOp;
+      return 0;
+    case 4:
+      // no-op cast in second op implies firstOp as long as the DestTy
+      // is floating point.
+      if (DstTy->isFloatingPointTy())
+        return firstOp;
+      return 0;
+    case 5: 
+      // no-op cast in first op implies secondOp as long as the SrcTy
+      // is an integer.
+      if (SrcTy->isIntegerTy())
+        return secondOp;
+      return 0;
+    case 6:
+      // no-op cast in first op implies secondOp as long as the SrcTy
+      // is a floating point.
+      if (SrcTy->isFloatingPointTy())
+        return secondOp;
+      return 0;
+    case 7: { 
+      // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
+      if (!IntPtrTy)
+        return 0;
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned MidSize = MidTy->getScalarSizeInBits();
+      if (MidSize >= PtrSize)
+        return Instruction::BitCast;
+      return 0;
+    }
+    case 8: {
+      // ext, trunc -> bitcast,    if the SrcTy and DstTy are same size
+      // ext, trunc -> ext,        if sizeof(SrcTy) < sizeof(DstTy)
+      // ext, trunc -> trunc,      if sizeof(SrcTy) > sizeof(DstTy)
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
+      if (SrcSize == DstSize)
+        return Instruction::BitCast;
+      else if (SrcSize < DstSize)
+        return firstOp;
+      return secondOp;
+    }
+    case 9: // zext, sext -> zext, because sext can't sign extend after zext
+      return Instruction::ZExt;
+    case 10:
+      // fpext followed by ftrunc is allowed if the bit size returned to is
+      // the same as the original, in which case its just a bitcast
+      if (SrcTy == DstTy)
+        return Instruction::BitCast;
+      return 0; // If the types are not the same we can't eliminate it.
+    case 11:
+      // bitcast followed by ptrtoint is allowed as long as the bitcast
+      // is a pointer to pointer cast.
+      if (SrcTy->isPointerTy() && MidTy->isPointerTy())
+        return secondOp;
+      return 0;
+    case 12:
+      // inttoptr, bitcast -> intptr  if bitcast is a ptr to ptr cast
+      if (MidTy->isPointerTy() && DstTy->isPointerTy())
+        return firstOp;
+      return 0;
+    case 13: {
+      // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+      if (!IntPtrTy)
+        return 0;
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
+      if (SrcSize <= PtrSize && SrcSize == DstSize)
+        return Instruction::BitCast;
+      return 0;
+    }
+    case 99: 
+      // cast combination can't happen (error in input). This is for all cases
+      // where the MidTy is not the same for the two cast instructions.
+      assert(!"Invalid Cast Combination");
+      return 0;
+    default:
+      assert(!"Error in CastResults table!!!");
+      return 0;
+  }
+  return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, 
+  const Twine &Name, Instruction *InsertBefore) {
+  assert(castIsValid(op, S, Ty) && "Invalid cast!");
+  // Construct and return the appropriate CastInst subclass
+  switch (op) {
+    case Trunc:    return new TruncInst    (S, Ty, Name, InsertBefore);
+    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertBefore);
+    case SExt:     return new SExtInst     (S, Ty, Name, InsertBefore);
+    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertBefore);
+    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertBefore);
+    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertBefore);
+    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertBefore);
+    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertBefore);
+    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertBefore);
+    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
+    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
+    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertBefore);
+    default:
+      assert(!"Invalid opcode provided");
+  }
+  return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+  const Twine &Name, BasicBlock *InsertAtEnd) {
+  assert(castIsValid(op, S, Ty) && "Invalid cast!");
+  // Construct and return the appropriate CastInst subclass
+  switch (op) {
+    case Trunc:    return new TruncInst    (S, Ty, Name, InsertAtEnd);
+    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertAtEnd);
+    case SExt:     return new SExtInst     (S, Ty, Name, InsertAtEnd);
+    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertAtEnd);
+    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertAtEnd);
+    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertAtEnd);
+    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertAtEnd);
+    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertAtEnd);
+    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertAtEnd);
+    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
+    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
+    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertAtEnd);
+    default:
+      assert(!"Invalid opcode provided");
+  }
+  return 0;
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+                                         const Twine &Name,
+                                         Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+                                         const Twine &Name, 
+                                         BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+                                      const Twine &Name,
+                                      BasicBlock *InsertAtEnd) {
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Invalid cast");
+
+  if (Ty->isIntegerTy())
+    return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+}
+
+/// @brief Create a BitCast or a PtrToInt cast instruction
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, 
+                                      const Twine &Name, 
+                                      Instruction *InsertBefore) {
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Invalid cast");
+
+  if (Ty->isIntegerTy())
+    return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
+  return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
+                                      bool isSigned, const Twine &Name,
+                                      Instruction *InsertBefore) {
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "Invalid integer cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
+                                      bool isSigned, const Twine &Name,
+                                      BasicBlock *InsertAtEnd) {
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
+                                 const Twine &Name, 
+                                 Instruction *InsertBefore) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+  return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
+                                 const Twine &Name, 
+                                 BasicBlock *InsertAtEnd) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+  return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+// Check whether it is valid to call getCastOpcode for these types.
+// This routine must be kept in sync with getCastOpcode.
+bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
+  if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
+    return false;
+
+  if (SrcTy == DestTy)
+    return true;
+
+  if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+    if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+      if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+        // An element by element cast.  Valid if casting the elements is valid.
+        SrcTy = SrcVecTy->getElementType();
+        DestTy = DestVecTy->getElementType();
+      }
+
+  // Get the bit sizes, we'll need these
+  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+  // Run through the possibilities ...
+  if (DestTy->isIntegerTy()) {               // Casting to integral
+    if (SrcTy->isIntegerTy()) {                // Casting from integral
+        return true;
+    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
+      return true;
+    } else if (SrcTy->isVectorTy()) {          // Casting from vector
+      return DestBits == SrcBits;
+    } else {                                   // Casting from something else
+      return SrcTy->isPointerTy();
+    }
+  } else if (DestTy->isFloatingPointTy()) {  // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                // Casting from integral
+      return true;
+    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
+      return true;
+    } else if (SrcTy->isVectorTy()) {          // Casting from vector
+      return DestBits == SrcBits;
+    } else {                                   // Casting from something else
+      return false;
+    }
+  } else if (DestTy->isVectorTy()) {         // Casting to vector
+    return DestBits == SrcBits;
+  } else if (DestTy->isPointerTy()) {        // Casting to pointer
+    if (SrcTy->isPointerTy()) {                // Casting from pointer
+      return true;
+    } else if (SrcTy->isIntegerTy()) {         // Casting from integral
+      return true;
+    } else {                                   // Casting from something else
+      return false;
+    }
+  } else if (DestTy->isX86_MMXTy()) {
+    if (SrcTy->isVectorTy()) {
+      return DestBits == SrcBits;       // 64-bit vector to MMX
+    } else {
+      return false;
+    }
+  } else {                                   // Casting to something else
+    return false;
+  }
+}
+
+// Provide a way to get a "cast" where the cast opcode is inferred from the 
+// types and size of the operand. This, basically, is a parallel of the 
+// logic in the castIsValid function below.  This axiom should hold:
+//   castIsValid( getCastOpcode(Val, Ty), Val, Ty)
+// should not assert in castIsValid. In other words, this produces a "correct"
+// casting opcode for the arguments passed to it.
+// This routine must be kept in sync with isCastable.
+Instruction::CastOps
+CastInst::getCastOpcode(
+  const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
+  const Type *SrcTy = Src->getType();
+
+  assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
+         "Only first class types are castable!");
+
+  if (SrcTy == DestTy)
+    return BitCast;
+
+  if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+    if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+      if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+        // An element by element cast.  Find the appropriate opcode based on the
+        // element types.
+        SrcTy = SrcVecTy->getElementType();
+        DestTy = DestVecTy->getElementType();
+      }
+
+  // Get the bit sizes, we'll need these
+  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+  // Run through the possibilities ...
+  if (DestTy->isIntegerTy()) {                      // Casting to integral
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
+      if (DestBits < SrcBits)
+        return Trunc;                               // int -> smaller int
+      else if (DestBits > SrcBits) {                // its an extension
+        if (SrcIsSigned)
+          return SExt;                              // signed -> SEXT
+        else
+          return ZExt;                              // unsigned -> ZEXT
+      } else {
+        return BitCast;                             // Same size, No-op cast
+      }
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
+      if (DestIsSigned) 
+        return FPToSI;                              // FP -> sint
+      else
+        return FPToUI;                              // FP -> uint 
+    } else if (SrcTy->isVectorTy()) {
+      assert(DestBits == SrcBits &&
+             "Casting vector to integer of different width");
+      return BitCast;                             // Same size, no-op cast
+    } else {
+      assert(SrcTy->isPointerTy() &&
+             "Casting from a value that is not first-class type");
+      return PtrToInt;                              // ptr -> int
+    }
+  } else if (DestTy->isFloatingPointTy()) {         // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
+      if (SrcIsSigned)
+        return SIToFP;                              // sint -> FP
+      else
+        return UIToFP;                              // uint -> FP
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
+      if (DestBits < SrcBits) {
+        return FPTrunc;                             // FP -> smaller FP
+      } else if (DestBits > SrcBits) {
+        return FPExt;                               // FP -> larger FP
+      } else  {
+        return BitCast;                             // same size, no-op cast
+      }
+    } else if (SrcTy->isVectorTy()) {
+      assert(DestBits == SrcBits &&
+             "Casting vector to floating point of different width");
+      return BitCast;                             // same size, no-op cast
+    } else {
+      llvm_unreachable("Casting pointer or non-first class to float");
+    }
+  } else if (DestTy->isVectorTy()) {
+    assert(DestBits == SrcBits &&
+           "Illegal cast to vector (wrong type or size)");
+    return BitCast;
+  } else if (DestTy->isPointerTy()) {
+    if (SrcTy->isPointerTy()) {
+      return BitCast;                               // ptr -> ptr
+    } else if (SrcTy->isIntegerTy()) {
+      return IntToPtr;                              // int -> ptr
+    } else {
+      assert(!"Casting pointer to other than pointer or int");
+    }
+  } else if (DestTy->isX86_MMXTy()) {
+    if (SrcTy->isVectorTy()) {
+      assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
+      return BitCast;                               // 64-bit vector to MMX
+    } else {
+      assert(!"Illegal cast to X86_MMX");
+    }
+  } else {
+    assert(!"Casting to type that is not first-class");
+  }
+
+  // If we fall through to here we probably hit an assertion cast above
+  // and assertions are not turned on. Anything we return is an error, so
+  // BitCast is as good a choice as any.
+  return BitCast;
+}
+
+//===----------------------------------------------------------------------===//
+//                    CastInst SubClass Constructors
+//===----------------------------------------------------------------------===//
+
+/// Check that the construction parameters for a CastInst are correct. This
+/// could be broken out into the separate constructors but it is useful to have
+/// it in one place and to eliminate the redundant code for getting the sizes
+/// of the types involved.
+bool 
+CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
+
+  // Check for type sanity on the arguments
+  const Type *SrcTy = S->getType();
+  if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
+      SrcTy->isAggregateType() || DstTy->isAggregateType())
+    return false;
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DstBitSize = DstTy->getScalarSizeInBits();
+
+  // If these are vector types, get the lengths of the vectors (using zero for
+  // scalar types means that checking that vector lengths match also checks that
+  // scalars are not being converted to vectors or vectors to scalars).
+  unsigned SrcLength = SrcTy->isVectorTy() ?
+    cast<VectorType>(SrcTy)->getNumElements() : 0;
+  unsigned DstLength = DstTy->isVectorTy() ?
+    cast<VectorType>(DstTy)->getNumElements() : 0;
+
+  // Switch on the opcode provided
+  switch (op) {
+  default: return false; // This is an input error
+  case Instruction::Trunc:
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength && SrcBitSize > DstBitSize;
+  case Instruction::ZExt:
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength && SrcBitSize < DstBitSize;
+  case Instruction::SExt: 
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength && SrcBitSize < DstBitSize;
+  case Instruction::FPTrunc:
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+      SrcLength == DstLength && SrcBitSize > DstBitSize;
+  case Instruction::FPExt:
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+      SrcLength == DstLength && SrcBitSize < DstBitSize;
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy() &&
+      SrcLength == DstLength;
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength;
+  case Instruction::PtrToInt:
+    return SrcTy->isPointerTy() && DstTy->isIntegerTy();
+  case Instruction::IntToPtr:
+    return SrcTy->isIntegerTy() && DstTy->isPointerTy();
+  case Instruction::BitCast:
+    // BitCast implies a no-op cast of type only. No bits change.
+    // However, you can't cast pointers to anything but pointers.
+    if (SrcTy->isPointerTy() != DstTy->isPointerTy())
+      return false;
+
+    // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
+    // these cases, the cast is okay if the source and destination bit widths
+    // are identical.
+    return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
+  }
+}
+
+TruncInst::TruncInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+TruncInst::TruncInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+ZExtInst::ZExtInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+)  : CastInst(Ty, ZExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+
+ZExtInst::ZExtInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+)  : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+SExtInst::SExtInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+SExtInst::SExtInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+)  : CastInst(Ty, SExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+FPTruncInst::FPTruncInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPTruncInst::FPTruncInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPExtInst::FPExtInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+FPExtInst::FPExtInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+UIToFPInst::UIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+UIToFPInst::UIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+FPToUIInst::FPToUIInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToUIInst::FPToUIInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToSIInst::FPToSIInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+FPToSIInst::FPToSIInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+PtrToIntInst::PtrToIntInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+PtrToIntInst::PtrToIntInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+IntToPtrInst::IntToPtrInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+IntToPtrInst::IntToPtrInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+BitCastInst::BitCastInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+BitCastInst::BitCastInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+//===----------------------------------------------------------------------===//
+//                               CmpInst Classes
+//===----------------------------------------------------------------------===//
+
+void CmpInst::Anchor() const {}
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+                 Value *LHS, Value *RHS, const Twine &Name,
+                 Instruction *InsertBefore)
+  : Instruction(ty, op,
+                OperandTraits<CmpInst>::op_begin(this),
+                OperandTraits<CmpInst>::operands(this),
+                InsertBefore) {
+    Op<0>() = LHS;
+    Op<1>() = RHS;
+  setPredicate((Predicate)predicate);
+  setName(Name);
+}
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+                 Value *LHS, Value *RHS, const Twine &Name,
+                 BasicBlock *InsertAtEnd)
+  : Instruction(ty, op,
+                OperandTraits<CmpInst>::op_begin(this),
+                OperandTraits<CmpInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = LHS;
+  Op<1>() = RHS;
+  setPredicate((Predicate)predicate);
+  setName(Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate,
+                Value *S1, Value *S2, 
+                const Twine &Name, Instruction *InsertBefore) {
+  if (Op == Instruction::ICmp) {
+    if (InsertBefore)
+      return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                          S1, S2, Name);
+    else
+      return new ICmpInst(CmpInst::Predicate(predicate),
+                          S1, S2, Name);
+  }
+  
+  if (InsertBefore)
+    return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+  else
+    return new FCmpInst(CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, 
+                const Twine &Name, BasicBlock *InsertAtEnd) {
+  if (Op == Instruction::ICmp) {
+    return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+  }
+  return new FCmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                      S1, S2, Name);
+}
+
+void CmpInst::swapOperands() {
+  if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    IC->swapOperands();
+  else
+    cast<FCmpInst>(this)->swapOperands();
+}
+
+bool CmpInst::isCommutative() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    return IC->isCommutative();
+  return cast<FCmpInst>(this)->isCommutative();
+}
+
+bool CmpInst::isEquality() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    return IC->isEquality();
+  return cast<FCmpInst>(this)->isEquality();
+}
+
+
+CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(!"Unknown cmp predicate!");
+    case ICMP_EQ: return ICMP_NE;
+    case ICMP_NE: return ICMP_EQ;
+    case ICMP_UGT: return ICMP_ULE;
+    case ICMP_ULT: return ICMP_UGE;
+    case ICMP_UGE: return ICMP_ULT;
+    case ICMP_ULE: return ICMP_UGT;
+    case ICMP_SGT: return ICMP_SLE;
+    case ICMP_SLT: return ICMP_SGE;
+    case ICMP_SGE: return ICMP_SLT;
+    case ICMP_SLE: return ICMP_SGT;
+
+    case FCMP_OEQ: return FCMP_UNE;
+    case FCMP_ONE: return FCMP_UEQ;
+    case FCMP_OGT: return FCMP_ULE;
+    case FCMP_OLT: return FCMP_UGE;
+    case FCMP_OGE: return FCMP_ULT;
+    case FCMP_OLE: return FCMP_UGT;
+    case FCMP_UEQ: return FCMP_ONE;
+    case FCMP_UNE: return FCMP_OEQ;
+    case FCMP_UGT: return FCMP_OLE;
+    case FCMP_ULT: return FCMP_OGE;
+    case FCMP_UGE: return FCMP_OLT;
+    case FCMP_ULE: return FCMP_OGT;
+    case FCMP_ORD: return FCMP_UNO;
+    case FCMP_UNO: return FCMP_ORD;
+    case FCMP_TRUE: return FCMP_FALSE;
+    case FCMP_FALSE: return FCMP_TRUE;
+  }
+}
+
+ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(! "Unknown icmp predicate!");
+    case ICMP_EQ: case ICMP_NE: 
+    case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: 
+       return pred;
+    case ICMP_UGT: return ICMP_SGT;
+    case ICMP_ULT: return ICMP_SLT;
+    case ICMP_UGE: return ICMP_SGE;
+    case ICMP_ULE: return ICMP_SLE;
+  }
+}
+
+ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(! "Unknown icmp predicate!");
+    case ICMP_EQ: case ICMP_NE: 
+    case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: 
+       return pred;
+    case ICMP_SGT: return ICMP_UGT;
+    case ICMP_SLT: return ICMP_ULT;
+    case ICMP_SGE: return ICMP_UGE;
+    case ICMP_SLE: return ICMP_ULE;
+  }
+}
+
+/// Initialize a set of values that all satisfy the condition with C.
+///
+ConstantRange 
+ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
+  APInt Lower(C);
+  APInt Upper(C);
+  uint32_t BitWidth = C.getBitWidth();
+  switch (pred) {
+  default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
+  case ICmpInst::ICMP_EQ: Upper++; break;
+  case ICmpInst::ICMP_NE: Lower++; break;
+  case ICmpInst::ICMP_ULT:
+    Lower = APInt::getMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_SLT:
+    Lower = APInt::getSignedMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_UGT: 
+    Lower++; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_SGT:
+    Lower++; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_ULE: 
+    Lower = APInt::getMinValue(BitWidth); Upper++; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_SLE: 
+    Lower = APInt::getSignedMinValue(BitWidth); Upper++; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_UGE:
+    Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_SGE:
+    Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  }
+  return ConstantRange(Lower, Upper);
+}
+
+CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(!"Unknown cmp predicate!");
+    case ICMP_EQ: case ICMP_NE:
+      return pred;
+    case ICMP_SGT: return ICMP_SLT;
+    case ICMP_SLT: return ICMP_SGT;
+    case ICMP_SGE: return ICMP_SLE;
+    case ICMP_SLE: return ICMP_SGE;
+    case ICMP_UGT: return ICMP_ULT;
+    case ICMP_ULT: return ICMP_UGT;
+    case ICMP_UGE: return ICMP_ULE;
+    case ICMP_ULE: return ICMP_UGE;
+  
+    case FCMP_FALSE: case FCMP_TRUE:
+    case FCMP_OEQ: case FCMP_ONE:
+    case FCMP_UEQ: case FCMP_UNE:
+    case FCMP_ORD: case FCMP_UNO:
+      return pred;
+    case FCMP_OGT: return FCMP_OLT;
+    case FCMP_OLT: return FCMP_OGT;
+    case FCMP_OGE: return FCMP_OLE;
+    case FCMP_OLE: return FCMP_OGE;
+    case FCMP_UGT: return FCMP_ULT;
+    case FCMP_ULT: return FCMP_UGT;
+    case FCMP_UGE: return FCMP_ULE;
+    case FCMP_ULE: return FCMP_UGE;
+  }
+}
+
+bool CmpInst::isUnsigned(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: 
+    case ICmpInst::ICMP_UGE: return true;
+  }
+}
+
+bool CmpInst::isSigned(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: 
+    case ICmpInst::ICMP_SGE: return true;
+  }
+}
+
+bool CmpInst::isOrdered(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: 
+    case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE: 
+    case FCmpInst::FCMP_ORD: return true;
+  }
+}
+      
+bool CmpInst::isUnordered(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: 
+    case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE: 
+    case FCmpInst::FCMP_UNO: return true;
+  }
+}
+
+bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+    default: return false;
+    case ICMP_EQ:   case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
+    case FCMP_TRUE: case FCMP_UEQ: case FCMP_UGE: case FCMP_ULE: return true;
+  }
+}
+
+bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+  case ICMP_NE:    case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
+  case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
+  default: return false;
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        SwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
+  assert(Value && Default && NumReserved);
+  ReservedSpace = NumReserved;
+  NumOperands = 2;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  OperandList[0] = Value;
+  OperandList[1] = Default;
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination.  The number of additional cases can
+/// be specified here to make memory allocation more efficient.  This
+/// constructor can also autoinsert before another instruction.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+                       Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertBefore) {
+  init(Value, Default, 2+NumCases*2);
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination.  The number of additional cases can
+/// be specified here to make memory allocation more efficient.  This
+/// constructor also autoinserts at the end of the specified BasicBlock.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+                       BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertAtEnd) {
+  init(Value, Default, 2+NumCases*2);
+}
+
+SwitchInst::SwitchInst(const SwitchInst &SI)
+  : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+  init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
+  NumOperands = SI.getNumOperands();
+  Use *OL = OperandList, *InOL = SI.OperandList;
+  for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
+    OL[i] = InOL[i];
+    OL[i+1] = InOL[i+1];
+  }
+  SubclassOptionalData = SI.SubclassOptionalData;
+}
+
+SwitchInst::~SwitchInst() {
+  dropHungoffUses();
+}
+
+
+/// addCase - Add an entry to the switch instruction...
+///
+void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+  unsigned OpNo = NumOperands;
+  if (OpNo+2 > ReservedSpace)
+    growOperands();  // Get more space!
+  // Initialize some new operands.
+  assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
+  NumOperands = OpNo+2;
+  OperandList[OpNo] = OnVal;
+  OperandList[OpNo+1] = Dest;
+}
+
+/// removeCase - This method removes the specified successor from the switch
+/// instruction.  Note that this cannot be used to remove the default
+/// destination (successor #0).
+///
+void SwitchInst::removeCase(unsigned idx) {
+  assert(idx != 0 && "Cannot remove the default case!");
+  assert(idx*2 < getNumOperands() && "Successor index out of range!!!");
+
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+
+  // Overwrite this case with the end of the list.
+  if ((idx + 1) * 2 != NumOps) {
+    OL[idx * 2] = OL[NumOps - 2];
+    OL[idx * 2 + 1] = OL[NumOps - 1];
+  }
+
+  // Nuke the last value.
+  OL[NumOps-2].set(0);
+  OL[NumOps-2+1].set(0);
+  NumOperands = NumOps-2;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 3 times.
+///
+void SwitchInst::growOperands() {
+  unsigned e = getNumOperands();
+  unsigned NumOps = e*3;
+
+  ReservedSpace = NumOps;
+  Use *NewOps = allocHungoffUses(NumOps);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i) {
+      NewOps[i] = OldOps[i];
+  }
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+
+BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned SwitchInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                        IndirectBrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void IndirectBrInst::init(Value *Address, unsigned NumDests) {
+  assert(Address && Address->getType()->isPointerTy() &&
+         "Address of indirectbr must be a pointer");
+  ReservedSpace = 1+NumDests;
+  NumOperands = 1;
+  OperandList = allocHungoffUses(ReservedSpace);
+  
+  OperandList[0] = Address;
+}
+
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 2 times.
+///
+void IndirectBrInst::growOperands() {
+  unsigned e = getNumOperands();
+  unsigned NumOps = e*2;
+  
+  ReservedSpace = NumOps;
+  Use *NewOps = allocHungoffUses(NumOps);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i)
+    NewOps[i] = OldOps[i];
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               Instruction *InsertBefore)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertBefore) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               BasicBlock *InsertAtEnd)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertAtEnd) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
+  : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+                   allocHungoffUses(IBI.getNumOperands()),
+                   IBI.getNumOperands()) {
+  Use *OL = OperandList, *InOL = IBI.OperandList;
+  for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
+    OL[i] = InOL[i];
+  SubclassOptionalData = IBI.SubclassOptionalData;
+}
+
+IndirectBrInst::~IndirectBrInst() {
+  dropHungoffUses();
+}
+
+/// addDestination - Add a destination.
+///
+void IndirectBrInst::addDestination(BasicBlock *DestBB) {
+  unsigned OpNo = NumOperands;
+  if (OpNo+1 > ReservedSpace)
+    growOperands();  // Get more space!
+  // Initialize some new operands.
+  assert(OpNo < ReservedSpace && "Growing didn't work!");
+  NumOperands = OpNo+1;
+  OperandList[OpNo] = DestBB;
+}
+
+/// removeDestination - This method removes the specified successor from the
+/// indirectbr instruction.
+void IndirectBrInst::removeDestination(unsigned idx) {
+  assert(idx < getNumOperands()-1 && "Successor index out of range!");
+  
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+
+  // Replace this value with the last one.
+  OL[idx+1] = OL[NumOps-1];
+  
+  // Nuke the last value.
+  OL[NumOps-1].set(0);
+  NumOperands = NumOps-1;
+}
+
+BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned IndirectBrInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                           clone_impl() implementations
+//===----------------------------------------------------------------------===//
+
+// Define these methods here so vtables don't get emitted into every translation
+// unit that uses these classes.
+
+GetElementPtrInst *GetElementPtrInst::clone_impl() const {
+  return new (getNumOperands()) GetElementPtrInst(*this);
+}
+
+BinaryOperator *BinaryOperator::clone_impl() const {
+  return Create(getOpcode(), Op<0>(), Op<1>());
+}
+
+FCmpInst* FCmpInst::clone_impl() const {
+  return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ICmpInst* ICmpInst::clone_impl() const {
+  return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ExtractValueInst *ExtractValueInst::clone_impl() const {
+  return new ExtractValueInst(*this);
+}
+
+InsertValueInst *InsertValueInst::clone_impl() const {
+  return new InsertValueInst(*this);
+}
+
+AllocaInst *AllocaInst::clone_impl() const {
+  return new AllocaInst(getAllocatedType(),
+                        (Value*)getOperand(0),
+                        getAlignment());
+}
+
+LoadInst *LoadInst::clone_impl() const {
+  return new LoadInst(getOperand(0),
+                      Twine(), isVolatile(),
+                      getAlignment());
+}
+
+StoreInst *StoreInst::clone_impl() const {
+  return new StoreInst(getOperand(0), getOperand(1),
+                       isVolatile(), getAlignment());
+}
+
+TruncInst *TruncInst::clone_impl() const {
+  return new TruncInst(getOperand(0), getType());
+}
+
+ZExtInst *ZExtInst::clone_impl() const {
+  return new ZExtInst(getOperand(0), getType());
+}
+
+SExtInst *SExtInst::clone_impl() const {
+  return new SExtInst(getOperand(0), getType());
+}
+
+FPTruncInst *FPTruncInst::clone_impl() const {
+  return new FPTruncInst(getOperand(0), getType());
+}
+
+FPExtInst *FPExtInst::clone_impl() const {
+  return new FPExtInst(getOperand(0), getType());
+}
+
+UIToFPInst *UIToFPInst::clone_impl() const {
+  return new UIToFPInst(getOperand(0), getType());
+}
+
+SIToFPInst *SIToFPInst::clone_impl() const {
+  return new SIToFPInst(getOperand(0), getType());
+}
+
+FPToUIInst *FPToUIInst::clone_impl() const {
+  return new FPToUIInst(getOperand(0), getType());
+}
+
+FPToSIInst *FPToSIInst::clone_impl() const {
+  return new FPToSIInst(getOperand(0), getType());
+}
+
+PtrToIntInst *PtrToIntInst::clone_impl() const {
+  return new PtrToIntInst(getOperand(0), getType());
+}
+
+IntToPtrInst *IntToPtrInst::clone_impl() const {
+  return new IntToPtrInst(getOperand(0), getType());
+}
+
+BitCastInst *BitCastInst::clone_impl() const {
+  return new BitCastInst(getOperand(0), getType());
+}
+
+CallInst *CallInst::clone_impl() const {
+  return  new(getNumOperands()) CallInst(*this);
+}
+
+SelectInst *SelectInst::clone_impl() const {
+  return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
+}
+
+VAArgInst *VAArgInst::clone_impl() const {
+  return new VAArgInst(getOperand(0), getType());
+}
+
+ExtractElementInst *ExtractElementInst::clone_impl() const {
+  return ExtractElementInst::Create(getOperand(0), getOperand(1));
+}
+
+InsertElementInst *InsertElementInst::clone_impl() const {
+  return InsertElementInst::Create(getOperand(0),
+                                   getOperand(1),
+                                   getOperand(2));
+}
+
+ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
+  return new ShuffleVectorInst(getOperand(0),
+                           getOperand(1),
+                           getOperand(2));
+}
+
+PHINode *PHINode::clone_impl() const {
+  return new PHINode(*this);
+}
+
+ReturnInst *ReturnInst::clone_impl() const {
+  return new(getNumOperands()) ReturnInst(*this);
+}
+
+BranchInst *BranchInst::clone_impl() const {
+  return new(getNumOperands()) BranchInst(*this);
+}
+
+SwitchInst *SwitchInst::clone_impl() const {
+  return new SwitchInst(*this);
+}
+
+IndirectBrInst *IndirectBrInst::clone_impl() const {
+  return new IndirectBrInst(*this);
+}
+
+
+InvokeInst *InvokeInst::clone_impl() const {
+  return new(getNumOperands()) InvokeInst(*this);
+}
+
+UnwindInst *UnwindInst::clone_impl() const {
+  LLVMContext &Context = getContext();
+  return new UnwindInst(Context);
+}
+
+UnreachableInst *UnreachableInst::clone_impl() const {
+  LLVMContext &Context = getContext();
+  return new UnreachableInst(Context);
+}
diff --git a/contrib/llvm/lib/VMCore/IntrinsicInst.cpp b/contrib/llvm/lib/VMCore/IntrinsicInst.cpp
new file mode 100644
index 000000000000..ac8ec2086b18
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/IntrinsicInst.cpp
@@ -0,0 +1,73 @@
+//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods that make it really easy to deal with intrinsic
+// functions.
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class.  Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+// 
+// In some cases, arguments to intrinsics need to be generic and are defined as
+// type pointer to empty struct { }*.  To access the real item of interest the
+// cast instruction needs to be stripped away. 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Metadata.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+///
+
+static Value *CastOperand(Value *C) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (CE->isCast())
+      return CE->getOperand(0);
+  return NULL;
+}
+
+Value *DbgInfoIntrinsic::StripCast(Value *C) {
+  if (Value *CO = CastOperand(C)) {
+    C = StripCast(CO);
+  } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+    if (GV->hasInitializer())
+      if (Value *CO = CastOperand(GV->getInitializer()))
+        C = StripCast(CO);
+  }
+  return dyn_cast<GlobalVariable>(C);
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
+///
+
+Value *DbgDeclareInst::getAddress() const {
+  if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
+    return MD->getOperand(0);
+  else
+    return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgValueInst - This represents the llvm.dbg.value instruction.
+///
+
+const Value *DbgValueInst::getValue() const {
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
+
+Value *DbgValueInst::getValue() {
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
diff --git a/contrib/llvm/lib/VMCore/LLVMContext.cpp b/contrib/llvm/lib/VMCore/LLVMContext.cpp
new file mode 100644
index 000000000000..ebd1e0aa1b0f
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LLVMContext.cpp
@@ -0,0 +1,147 @@
+//===-- LLVMContext.cpp - Implement LLVMContext -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements LLVMContext, as a wrapper around the opaque
+//  class LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/SourceMgr.h"
+#include "LLVMContextImpl.h"
+#include <cctype>
+using namespace llvm;
+
+static ManagedStatic<LLVMContext> GlobalContext;
+
+LLVMContext& llvm::getGlobalContext() {
+  return *GlobalContext;
+}
+
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
+  // Create the fixed metadata kinds. This is done in the same order as the
+  // MD_* enum values so that they correspond.
+
+  // Create the 'dbg' metadata kind.
+  unsigned DbgID = getMDKindID("dbg");
+  assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+
+  // Create the 'tbaa' metadata kind.
+  unsigned TBAAID = getMDKindID("tbaa");
+  assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
+
+  // Create the 'prof' metadata kind.
+  unsigned ProfID = getMDKindID("prof");
+  assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
+}
+LLVMContext::~LLVMContext() { delete pImpl; }
+
+void LLVMContext::addModule(Module *M) {
+  pImpl->OwnedModules.insert(M);
+}
+
+void LLVMContext::removeModule(Module *M) {
+  pImpl->OwnedModules.erase(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Recoverable Backend Errors
+//===----------------------------------------------------------------------===//
+
+void LLVMContext::
+setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+                              void *DiagContext) {
+  pImpl->InlineAsmDiagHandler = DiagHandler;
+  pImpl->InlineAsmDiagContext = DiagContext;
+}
+
+/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
+/// setInlineAsmDiagnosticHandler.
+LLVMContext::InlineAsmDiagHandlerTy
+LLVMContext::getInlineAsmDiagnosticHandler() const {
+  return pImpl->InlineAsmDiagHandler;
+}
+
+/// getInlineAsmDiagnosticContext - Return the diagnostic context set by
+/// setInlineAsmDiagnosticHandler.
+void *LLVMContext::getInlineAsmDiagnosticContext() const {
+  return pImpl->InlineAsmDiagContext;
+}
+
+void LLVMContext::emitError(StringRef ErrorStr) {
+  emitError(0U, ErrorStr);
+}
+
+void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
+  unsigned LocCookie = 0;
+  if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
+    if (SrcLoc->getNumOperands() != 0)
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
+        LocCookie = CI->getZExtValue();
+  }
+  return emitError(LocCookie, ErrorStr);
+}
+
+void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
+  // If there is no error handler installed, just print the error and exit.
+  if (pImpl->InlineAsmDiagHandler == 0) {
+    errs() << "error: " << ErrorStr << "\n";
+    exit(1);
+  }
+
+  // If we do have an error handler, we can report the error and keep going.
+  SMDiagnostic Diag("", "error: " + ErrorStr.str());
+
+  pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
+}
+
+//===----------------------------------------------------------------------===//
+// Metadata Kind Uniquing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+/// isValidName - Return true if Name is a valid custom metadata handler name.
+static bool isValidName(StringRef MDName) {
+  if (MDName.empty())
+    return false;
+
+  if (!std::isalpha(MDName[0]))
+    return false;
+
+  for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
+       ++I) {
+    if (!std::isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
+      return false;
+  }
+  return true;
+}
+#endif
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+unsigned LLVMContext::getMDKindID(StringRef Name) const {
+  assert(isValidName(Name) && "Invalid MDNode name");
+
+  // If this is new, assign it its ID.
+  return
+    pImpl->CustomMDKindNames.GetOrCreateValue(
+      Name, pImpl->CustomMDKindNames.size()).second;
+}
+
+/// getHandlerNames - Populate client supplied smallvector using custome
+/// metadata name and ID.
+void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
+  Names.resize(pImpl->CustomMDKindNames.size());
+  for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
+       E = pImpl->CustomMDKindNames.end(); I != E; ++I)
+    Names[I->second] = I->first();
+}
diff --git a/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp b/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp
new file mode 100644
index 000000000000..504b37267f70
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp
@@ -0,0 +1,94 @@
+//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the opaque LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
+  : TheTrueVal(0), TheFalseVal(0),
+    VoidTy(C, Type::VoidTyID),
+    LabelTy(C, Type::LabelTyID),
+    FloatTy(C, Type::FloatTyID),
+    DoubleTy(C, Type::DoubleTyID),
+    MetadataTy(C, Type::MetadataTyID),
+    X86_FP80Ty(C, Type::X86_FP80TyID),
+    FP128Ty(C, Type::FP128TyID),
+    PPC_FP128Ty(C, Type::PPC_FP128TyID),
+    X86_MMXTy(C, Type::X86_MMXTyID),
+    Int1Ty(C, 1),
+    Int8Ty(C, 8),
+    Int16Ty(C, 16),
+    Int32Ty(C, 32),
+    Int64Ty(C, 64) {
+  InlineAsmDiagHandler = 0;
+  InlineAsmDiagContext = 0;
+  NamedStructTypesUniqueID = 0;
+}
+
+namespace {
+struct DropReferences {
+  // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+  // is a Constant*.
+  template<typename PairT>
+  void operator()(const PairT &P) {
+    P.second->dropAllReferences();
+  }
+};
+}
+
+LLVMContextImpl::~LLVMContextImpl() {
+  // NOTE: We need to delete the contents of OwnedModules, but we have to
+  // duplicate it into a temporary vector, because the destructor of Module
+  // will try to remove itself from OwnedModules set.  This would cause
+  // iterator invalidation if we iterated on the set directly.
+  std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
+  DeleteContainerPointers(Modules);
+  
+  std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
+                DropReferences());
+  std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
+                DropReferences());
+  std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
+                DropReferences());
+  std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
+                DropReferences());
+  ExprConstants.freeConstants();
+  ArrayConstants.freeConstants();
+  StructConstants.freeConstants();
+  VectorConstants.freeConstants();
+  AggZeroConstants.freeConstants();
+  NullPtrConstants.freeConstants();
+  UndefValueConstants.freeConstants();
+  InlineAsms.freeConstants();
+  DeleteContainerSeconds(IntConstants);
+  DeleteContainerSeconds(FPConstants);
+  
+  // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
+  // and the NonUniquedMDNodes sets, so copy the values out first.
+  SmallVector<MDNode*, 8> MDNodes;
+  MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
+  for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+       I != E; ++I)
+    MDNodes.push_back(&*I);
+  MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
+  for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
+         E = MDNodes.end(); I != E; ++I)
+    (*I)->destroy();
+  assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
+         "Destroying all MDNodes didn't empty the Context's sets.");
+  // Destroy MDStrings.
+  DeleteContainerSeconds(MDStringCache);
+}
diff --git a/contrib/llvm/lib/VMCore/LLVMContextImpl.h b/contrib/llvm/lib/VMCore/LLVMContextImpl.h
new file mode 100644
index 000000000000..06a6f2a25a38
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LLVMContextImpl.h
@@ -0,0 +1,238 @@
+//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file declares LLVMContextImpl, the opaque implementation 
+//  of LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LLVMCONTEXT_IMPL_H
+#define LLVM_LLVMCONTEXT_IMPL_H
+
+#include "llvm/LLVMContext.h"
+#include "ConstantsContext.h"
+#include "LeaksContext.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+
+namespace llvm {
+
+class ConstantInt;
+class ConstantFP;
+class LLVMContext;
+class Type;
+class Value;
+
+struct DenseMapAPIntKeyInfo {
+  struct KeyTy {
+    APInt val;
+    const Type* type;
+    KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {}
+    KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
+    bool operator==(const KeyTy& that) const {
+      return type == that.type && this->val == that.val;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
+  static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return DenseMapInfo<void*>::getHashValue(Key.type) ^ 
+      Key.val.getHashValue();
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+};
+
+struct DenseMapAPFloatKeyInfo {
+  struct KeyTy {
+    APFloat val;
+    KeyTy(const APFloat& V) : val(V){}
+    KeyTy(const KeyTy& that) : val(that.val) {}
+    bool operator==(const KeyTy& that) const {
+      return this->val.bitwiseIsEqual(that.val);
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline KeyTy getEmptyKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,1));
+  }
+  static inline KeyTy getTombstoneKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,2)); 
+  }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return Key.val.getHashValue();
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+};
+
+/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
+/// up to date as MDNodes mutate.  This class is implemented in DebugLoc.cpp.
+class DebugRecVH : public CallbackVH {
+  /// Ctx - This is the LLVM Context being referenced.
+  LLVMContextImpl *Ctx;
+  
+  /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that
+  /// this reference lives in.  If this is zero, then it represents a
+  /// non-canonical entry that has no DenseMap value.  This can happen due to
+  /// RAUW.
+  int Idx;
+public:
+  DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx)
+    : CallbackVH(n), Ctx(ctx), Idx(idx) {}
+  
+  MDNode *get() const {
+    return cast_or_null<MDNode>(getValPtr());
+  }
+  
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *VNew);
+};
+  
+class LLVMContextImpl {
+public:
+  /// OwnedModules - The set of modules instantiated in this context, and which
+  /// will be automatically deleted if this context is deleted.
+  SmallPtrSet<Module*, 4> OwnedModules;
+  
+  LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
+  void *InlineAsmDiagContext;
+  
+  typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, 
+                         DenseMapAPIntKeyInfo> IntMapTy;
+  IntMapTy IntConstants;
+  
+  typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, 
+                         DenseMapAPFloatKeyInfo> FPMapTy;
+  FPMapTy FPConstants;
+  
+  StringMap<MDString*> MDStringCache;
+  
+  FoldingSet<MDNode> MDNodeSet;
+  // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
+  // aren't in the MDNodeSet, but they're still shared between objects, so no
+  // one object can destroy them.  This set allows us to at least destroy them
+  // on Context destruction.
+  SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
+  
+  ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants;
+
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy;
+  ArrayConstantsTy ArrayConstants;
+  
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    StructType, ConstantStruct, true /*largekey*/> StructConstantsTy;
+  StructConstantsTy StructConstants;
+  
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+                            VectorType, ConstantVector> VectorConstantsTy;
+  VectorConstantsTy VectorConstants;
+  
+  ConstantUniqueMap<char, char, PointerType, ConstantPointerNull>
+    NullPtrConstants;
+  ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants;
+  
+  DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
+  ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
+    ExprConstants;
+
+  ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
+                    InlineAsm> InlineAsms;
+  
+  ConstantInt *TheTrueVal;
+  ConstantInt *TheFalseVal;
+  
+  LeakDetectorImpl<Value> LLVMObjects;
+  
+  // Basic type instances.
+  Type VoidTy, LabelTy, FloatTy, DoubleTy, MetadataTy;
+  Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
+  IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
+
+  
+  /// TypeAllocator - All dynamically allocated types are allocated from this.
+  /// They live forever until the context is torn down.
+  BumpPtrAllocator TypeAllocator;
+  
+  DenseMap<unsigned, IntegerType*> IntegerTypes;
+  
+  // TODO: Optimize FunctionTypes/AnonStructTypes!
+  std::map<std::vector<Type*>, FunctionType*> FunctionTypes;
+  std::map<std::vector<Type*>, StructType*> AnonStructTypes;
+  StringMap<StructType*> NamedStructTypes;
+  unsigned NamedStructTypesUniqueID;
+    
+  DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
+  DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+  DenseMap<Type*, PointerType*> PointerTypes;  // Pointers in AddrSpace = 0
+  DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
+
+
+  /// ValueHandles - This map keeps track of all of the value handles that are
+  /// watching a Value*.  The Value::HasValueHandle bit is used to know
+  // whether or not a value has an entry in this map.
+  typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+  ValueHandlesTy ValueHandles;
+  
+  /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
+  StringMap<unsigned> CustomMDKindNames;
+  
+  typedef std::pair<unsigned, TrackingVH<MDNode> > MDPairTy;
+  typedef SmallVector<MDPairTy, 2> MDMapTy;
+
+  /// MetadataStore - Collection of per-instruction metadata used in this
+  /// context.
+  DenseMap<const Instruction *, MDMapTy> MetadataStore;
+  
+  /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope
+  /// entry with no "inlined at" element.
+  DenseMap<MDNode*, int> ScopeRecordIdx;
+  
+  /// ScopeRecords - These are the actual mdnodes (in a value handle) for an
+  /// index.  The ValueHandle ensures that ScopeRecordIdx stays up to date if
+  /// the MDNode is RAUW'd.
+  std::vector<DebugRecVH> ScopeRecords;
+  
+  /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an
+  /// scope/inlined-at pair.
+  DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx;
+  
+  /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles)
+  /// for an index.  The ValueHandle ensures that ScopeINlinedAtIdx stays up
+  /// to date.
+  std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
+  
+  int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
+  int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
+  
+  LLVMContextImpl(LLVMContext &C);
+  ~LLVMContextImpl();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/LeakDetector.cpp b/contrib/llvm/lib/VMCore/LeakDetector.cpp
new file mode 100644
index 000000000000..f6651e93e273
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LeakDetector.cpp
@@ -0,0 +1,69 @@
+//===-- LeakDetector.cpp - Implement LeakDetector interface ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LeakDetector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Value.h"
+using namespace llvm;
+
+static ManagedStatic<sys::SmartMutex<true> > ObjectsLock;
+static ManagedStatic<LeakDetectorImpl<void> > Objects;
+
+static void clearGarbage(LLVMContext &Context) {
+  Objects->clear();
+  Context.pImpl->LLVMObjects.clear();
+}
+
+void LeakDetector::addGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  Objects->addGarbage(Object);
+}
+
+void LeakDetector::addGarbageObjectImpl(const Value *Object) {
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  pImpl->LLVMObjects.addGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  Objects->removeGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  pImpl->LLVMObjects.removeGarbage(Object);
+}
+
+void LeakDetector::checkForGarbageImpl(LLVMContext &Context, 
+                                       const std::string &Message) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  
+  Objects->setName("GENERIC");
+  pImpl->LLVMObjects.setName("LLVM");
+  
+  // use non-short-circuit version so that both checks are performed
+  if (Objects->hasGarbage(Message) |
+      pImpl->LLVMObjects.hasGarbage(Message))
+    errs() << "\nThis is probably because you removed an object, but didn't "
+           << "delete it.  Please check your code for memory leaks.\n";
+
+  // Clear out results so we don't get duplicate warnings on
+  // next call...
+  clearGarbage(Context);
+}
diff --git a/contrib/llvm/lib/VMCore/LeaksContext.h b/contrib/llvm/lib/VMCore/LeaksContext.h
new file mode 100644
index 000000000000..b9e59d46b7ad
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/LeaksContext.h
@@ -0,0 +1,92 @@
+//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for leaks detectors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Value.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+template <class T>
+struct PrinterTrait {
+  static void print(const T* P) { errs() << P; }
+};
+
+template<>
+struct PrinterTrait<Value> {
+  static void print(const Value* P) { errs() << *P; }
+};
+
+template <typename T>
+struct LeakDetectorImpl {
+  explicit LeakDetectorImpl(const char* const name = "") : 
+    Cache(0), Name(name) { }
+
+  void clear() {
+    Cache = 0;
+    Ts.clear();
+  }
+    
+  void setName(const char* n) { 
+    Name = n;
+  }
+    
+  // Because the most common usage pattern, by far, is to add a
+  // garbage object, then remove it immediately, we optimize this
+  // case.  When an object is added, it is not added to the set
+  // immediately, it is added to the CachedValue Value.  If it is
+  // immediately removed, no set search need be performed.
+  void addGarbage(const T* o) {
+    assert(Ts.count(o) == 0 && "Object already in set!");
+    if (Cache) {
+      assert(Cache != o && "Object already in set!");
+      Ts.insert(Cache);
+    }
+    Cache = o;
+  }
+
+  void removeGarbage(const T* o) {
+    if (o == Cache)
+      Cache = 0; // Cache hit
+    else
+      Ts.erase(o);
+  }
+
+  bool hasGarbage(const std::string& Message) {
+    addGarbage(0); // Flush the Cache
+
+    assert(Cache == 0 && "No value should be cached anymore!");
+
+    if (!Ts.empty()) {
+      errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
+      for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
+           E = Ts.end(); I != E; ++I) {
+        errs() << '\t';
+        PrinterTrait<T>::print(*I);
+        errs() << '\n';
+      }
+      errs() << '\n';
+
+      return true;
+    }
+    
+    return false;
+  }
+
+private:
+  SmallPtrSet<const T*, 8> Ts;
+  const T* Cache;
+  const char* Name;
+};
+
+}
diff --git a/contrib/llvm/lib/VMCore/Metadata.cpp b/contrib/llvm/lib/VMCore/Metadata.cpp
new file mode 100644
index 000000000000..ace4dc2de271
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Metadata.cpp
@@ -0,0 +1,562 @@
+//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Metadata classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Metadata.h"
+#include "LLVMContextImpl.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Instruction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MDString implementation.
+//
+
+MDString::MDString(LLVMContext &C, StringRef S)
+  : Value(Type::getMetadataTy(C), Value::MDStringVal), Str(S) {}
+
+MDString *MDString::get(LLVMContext &Context, StringRef Str) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  StringMapEntry<MDString *> &Entry =
+    pImpl->MDStringCache.GetOrCreateValue(Str);
+  MDString *&S = Entry.getValue();
+  if (!S) S = new MDString(Context, Entry.getKey());
+  return S;
+}
+
+//===----------------------------------------------------------------------===//
+// MDNodeOperand implementation.
+//
+
+// Use CallbackVH to hold MDNode operands.
+namespace llvm {
+class MDNodeOperand : public CallbackVH {
+  MDNode *Parent;
+public:
+  MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {}
+  ~MDNodeOperand() {}
+
+  void set(Value *V) {
+    setValPtr(V);
+  }
+
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *NV);
+};
+} // end namespace llvm.
+
+
+void MDNodeOperand::deleted() {
+  Parent->replaceOperand(this, 0);
+}
+
+void MDNodeOperand::allUsesReplacedWith(Value *NV) {
+  Parent->replaceOperand(this, NV);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// MDNode implementation.
+//
+
+/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
+/// the end of the MDNode.
+static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
+  // Use <= instead of < to permit a one-past-the-end address.
+  assert(Op <= N->getNumOperands() && "Invalid operand number");
+  return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
+}
+
+MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
+: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
+  NumOperands = Vals.size();
+
+  if (isFunctionLocal)
+    setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
+
+  // Initialize the operand list, which is co-allocated on the end of the node.
+  unsigned i = 0;
+  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+       Op != E; ++Op, ++i)
+    new (Op) MDNodeOperand(Vals[i], this);
+}
+
+
+/// ~MDNode - Destroy MDNode.
+MDNode::~MDNode() {
+  assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
+         "Not being destroyed through destroy()?");
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  if (isNotUniqued()) {
+    pImpl->NonUniquedMDNodes.erase(this);
+  } else {
+    pImpl->MDNodeSet.RemoveNode(this);
+  }
+
+  // Destroy the operands.
+  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+       Op != E; ++Op)
+    Op->~MDNodeOperand();
+}
+
+static const Function *getFunctionForValue(Value *V) {
+  if (!V) return NULL;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    BasicBlock *BB = I->getParent();
+    return BB ? BB->getParent() : 0;
+  }
+  if (Argument *A = dyn_cast<Argument>(V))
+    return A->getParent();
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return BB->getParent();
+  if (MDNode *MD = dyn_cast<MDNode>(V))
+    return MD->getFunction();
+  return NULL;
+}
+
+#ifndef NDEBUG
+static const Function *assertLocalFunction(const MDNode *N) {
+  if (!N->isFunctionLocal()) return 0;
+
+  // FIXME: This does not handle cyclic function local metadata.
+  const Function *F = 0, *NewF = 0;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i)) {
+      if (MDNode *MD = dyn_cast<MDNode>(V))
+        NewF = assertLocalFunction(MD);
+      else
+        NewF = getFunctionForValue(V);
+    }
+    if (F == 0)
+      F = NewF;
+    else 
+      assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata");
+  }
+  return F;
+}
+#endif
+
+// getFunction - If this metadata is function-local and recursively has a
+// function-local operand, return the first such operand's parent function.
+// Otherwise, return null. getFunction() should not be used for performance-
+// critical code because it recursively visits all the MDNode's operands.  
+const Function *MDNode::getFunction() const {
+#ifndef NDEBUG
+  return assertLocalFunction(this);
+#endif
+  if (!isFunctionLocal()) return NULL;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (const Function *F = getFunctionForValue(getOperand(i)))
+      return F;
+  return NULL;
+}
+
+// destroy - Delete this node.  Only when there are no uses.
+void MDNode::destroy() {
+  setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
+  // Placement delete, the free the memory.
+  this->~MDNode();
+  free(this);
+}
+
+/// isFunctionLocalValue - Return true if this is a value that would require a
+/// function-local MDNode.
+static bool isFunctionLocalValue(Value *V) {
+  return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+         (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
+}
+
+MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
+                          FunctionLocalness FL, bool Insert) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+
+  // Add all the operand pointers. Note that we don't have to add the
+  // isFunctionLocal bit because that's implied by the operands.
+  // Note that if the operands are later nulled out, the node will be
+  // removed from the uniquing map.
+  FoldingSetNodeID ID;
+  for (unsigned i = 0; i != Vals.size(); ++i)
+    ID.AddPointer(Vals[i]);
+
+  void *InsertPoint;
+  MDNode *N = NULL;
+  
+  if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+    return N;
+    
+  bool isFunctionLocal = false;
+  switch (FL) {
+  case FL_Unknown:
+    for (unsigned i = 0; i != Vals.size(); ++i) {
+      Value *V = Vals[i];
+      if (!V) continue;
+      if (isFunctionLocalValue(V)) {
+        isFunctionLocal = true;
+        break;
+      }
+    }
+    break;
+  case FL_No:
+    isFunctionLocal = false;
+    break;
+  case FL_Yes:
+    isFunctionLocal = true;
+    break;
+  }
+
+  // Coallocate space for the node and Operands together, then placement new.
+  void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
+  N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
+
+  // InsertPoint will have been set by the FindNodeOrInsertPos call.
+  pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+
+  return N;
+}
+
+MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  return getMDNode(Context, Vals, FL_Unknown);
+}
+
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context,
+                                      ArrayRef<Value*> Vals,
+                                      bool isFunctionLocal) {
+  return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No);
+}
+
+MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  return getMDNode(Context, Vals, FL_Unknown, false);
+}
+
+MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  MDNode *N =
+    (MDNode *)malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
+  N = new (N) MDNode(Context, Vals, FL_No);
+  N->setValueSubclassData(N->getSubclassDataFromValue() |
+                          NotUniquedBit);
+  LeakDetector::addGarbageObject(N);
+  return N;
+}
+
+void MDNode::deleteTemporary(MDNode *N) {
+  assert(N->use_empty() && "Temporary MDNode has uses!");
+  assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
+         "Deleting a non-temporary uniqued node!");
+  assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
+         "Deleting a non-temporary non-uniqued node!");
+  assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
+         "Temporary MDNode does not have NotUniquedBit set!");
+  assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
+         "Temporary MDNode has DestroyFlag set!");
+  LeakDetector::removeGarbageObject(N);
+  N->destroy();
+}
+
+/// getOperand - Return specified operand.
+Value *MDNode::getOperand(unsigned i) const {
+  return *getOperandPtr(const_cast<MDNode*>(this), i);
+}
+
+void MDNode::Profile(FoldingSetNodeID &ID) const {
+  // Add all the operand pointers. Note that we don't have to add the
+  // isFunctionLocal bit because that's implied by the operands.
+  // Note that if the operands are later nulled out, the node will be
+  // removed from the uniquing map.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    ID.AddPointer(getOperand(i));
+}
+
+void MDNode::setIsNotUniqued() {
+  setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  pImpl->NonUniquedMDNodes.insert(this);
+}
+
+// Replace value from this node's operand list.
+void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
+  Value *From = *Op;
+
+  // If is possible that someone did GV->RAUW(inst), replacing a global variable
+  // with an instruction or some other function-local object.  If this is a
+  // non-function-local MDNode, it can't point to a function-local object.
+  // Handle this case by implicitly dropping the MDNode reference to null.
+  // Likewise if the MDNode is function-local but for a different function.
+  if (To && isFunctionLocalValue(To)) {
+    if (!isFunctionLocal())
+      To = 0;
+    else {
+      const Function *F = getFunction();
+      const Function *FV = getFunctionForValue(To);
+      // Metadata can be function-local without having an associated function.
+      // So only consider functions to have changed if non-null.
+      if (F && FV && F != FV)
+        To = 0;
+    }
+  }
+  
+  if (From == To)
+    return;
+
+  // Update the operand.
+  Op->set(To);
+
+  // If this node is already not being uniqued (because one of the operands
+  // already went to null), then there is nothing else to do here.
+  if (isNotUniqued()) return;
+
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+  // Remove "this" from the context map.  FoldingSet doesn't have to reprofile
+  // this node to remove it, so we don't care what state the operands are in.
+  pImpl->MDNodeSet.RemoveNode(this);
+
+  // If we are dropping an argument to null, we choose to not unique the MDNode
+  // anymore.  This commonly occurs during destruction, and uniquing these
+  // brings little reuse.  Also, this means we don't need to include
+  // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
+  if (To == 0) {
+    setIsNotUniqued();
+    return;
+  }
+
+  // Now that the node is out of the folding set, get ready to reinsert it.
+  // First, check to see if another node with the same operands already exists
+  // in the set.  If so, then this node is redundant.
+  FoldingSetNodeID ID;
+  Profile(ID);
+  void *InsertPoint;
+  if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
+    replaceAllUsesWith(N);
+    destroy();
+    return;
+  }
+
+  // InsertPoint will have been set by the FindNodeOrInsertPos call.
+  pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+
+  // If this MDValue was previously function-local but no longer is, clear
+  // its function-local flag.
+  if (isFunctionLocal() && !isFunctionLocalValue(To)) {
+    bool isStillFunctionLocal = false;
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      Value *V = getOperand(i);
+      if (!V) continue;
+      if (isFunctionLocalValue(V)) {
+        isStillFunctionLocal = true;
+        break;
+      }
+    }
+    if (!isStillFunctionLocal)
+      setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// NamedMDNode implementation.
+//
+
+static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
+  return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
+}
+
+NamedMDNode::NamedMDNode(const Twine &N)
+  : Name(N.str()), Parent(0),
+    Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
+}
+
+NamedMDNode::~NamedMDNode() {
+  dropAllReferences();
+  delete &getNMDOps(Operands);
+}
+
+/// getNumOperands - Return number of NamedMDNode operands.
+unsigned NamedMDNode::getNumOperands() const {
+  return (unsigned)getNMDOps(Operands).size();
+}
+
+/// getOperand - Return specified operand.
+MDNode *NamedMDNode::getOperand(unsigned i) const {
+  assert(i < getNumOperands() && "Invalid Operand number!");
+  return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
+}
+
+/// addOperand - Add metadata Operand.
+void NamedMDNode::addOperand(MDNode *M) {
+  assert(!M->isFunctionLocal() &&
+         "NamedMDNode operands must not be function-local!");
+  getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
+}
+
+/// eraseFromParent - Drop all references and remove the node from parent
+/// module.
+void NamedMDNode::eraseFromParent() {
+  getParent()->eraseNamedMetadata(this);
+}
+
+/// dropAllReferences - Remove all uses and clear node vector.
+void NamedMDNode::dropAllReferences() {
+  getNMDOps(Operands).clear();
+}
+
+/// getName - Return a constant reference to this named metadata's name.
+StringRef NamedMDNode::getName() const {
+  return StringRef(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Metadata method implementations.
+//
+
+void Instruction::setMetadata(const char *Kind, MDNode *Node) {
+  if (Node == 0 && !hasMetadata()) return;
+  setMetadata(getContext().getMDKindID(Kind), Node);
+}
+
+MDNode *Instruction::getMetadataImpl(const char *Kind) const {
+  return getMetadataImpl(getContext().getMDKindID(Kind));
+}
+
+/// setMetadata - Set the metadata of of the specified kind to the specified
+/// node.  This updates/replaces metadata if already present, or removes it if
+/// Node is null.
+void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
+  if (Node == 0 && !hasMetadata()) return;
+
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (KindID == LLVMContext::MD_dbg) {
+    DbgLoc = DebugLoc::getFromDILocation(Node);
+    return;
+  }
+  
+  // Handle the case when we're adding/updating metadata on an instruction.
+  if (Node) {
+    LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+    assert(!Info.empty() == hasMetadataHashEntry() &&
+           "HasMetadata bit is wonked");
+    if (Info.empty()) {
+      setHasMetadataHashEntry(true);
+    } else {
+      // Handle replacement of an existing value.
+      for (unsigned i = 0, e = Info.size(); i != e; ++i)
+        if (Info[i].first == KindID) {
+          Info[i].second = Node;
+          return;
+        }
+    }
+
+    // No replacement, just add it to the list.
+    Info.push_back(std::make_pair(KindID, Node));
+    return;
+  }
+
+  // Otherwise, we're removing metadata from an instruction.
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "HasMetadata bit out of date!");
+  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+
+  // Common case is removing the only entry.
+  if (Info.size() == 1 && Info[0].first == KindID) {
+    getContext().pImpl->MetadataStore.erase(this);
+    setHasMetadataHashEntry(false);
+    return;
+  }
+
+  // Handle removal of an existing value.
+  for (unsigned i = 0, e = Info.size(); i != e; ++i)
+    if (Info[i].first == KindID) {
+      Info[i] = Info.back();
+      Info.pop_back();
+      assert(!Info.empty() && "Removing last entry should be handled above");
+      return;
+    }
+  // Otherwise, removing an entry that doesn't exist on the instruction.
+}
+
+MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (KindID == LLVMContext::MD_dbg)
+    return DbgLoc.getAsMDNode(getContext());
+  
+  if (!hasMetadataHashEntry()) return 0;
+  
+  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+  assert(!Info.empty() && "bit out of sync with hash table");
+
+  for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
+       I != E; ++I)
+    if (I->first == KindID)
+      return I->second;
+  return 0;
+}
+
+void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
+                                       MDNode*> > &Result) const {
+  Result.clear();
+  
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (!DbgLoc.isUnknown()) {
+    Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg,
+                                    DbgLoc.getAsMDNode(getContext())));
+    if (!hasMetadataHashEntry()) return;
+  }
+  
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "Shouldn't have called this");
+  const LLVMContextImpl::MDMapTy &Info =
+    getContext().pImpl->MetadataStore.find(this)->second;
+  assert(!Info.empty() && "Shouldn't have called this");
+
+  Result.append(Info.begin(), Info.end());
+
+  // Sort the resulting array so it is stable.
+  if (Result.size() > 1)
+    array_pod_sort(Result.begin(), Result.end());
+}
+
+void Instruction::
+getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+                                    MDNode*> > &Result) const {
+  Result.clear();
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "Shouldn't have called this");
+  const LLVMContextImpl::MDMapTy &Info =
+  getContext().pImpl->MetadataStore.find(this)->second;
+  assert(!Info.empty() && "Shouldn't have called this");
+  
+  Result.append(Info.begin(), Info.end());
+  
+  // Sort the resulting array so it is stable.
+  if (Result.size() > 1)
+    array_pod_sort(Result.begin(), Result.end());
+}
+
+
+/// clearMetadataHashEntries - Clear all hashtable-based metadata from
+/// this instruction.
+void Instruction::clearMetadataHashEntries() {
+  assert(hasMetadataHashEntry() && "Caller should check");
+  getContext().pImpl->MetadataStore.erase(this);
+  setHasMetadataHashEntry(false);
+}
+
diff --git a/contrib/llvm/lib/VMCore/Module.cpp b/contrib/llvm/lib/VMCore/Module.cpp
new file mode 100644
index 000000000000..be2fcb8ac6c0
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Module.cpp
@@ -0,0 +1,553 @@
+//===-- Module.cpp - Implement the Module class ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Module class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/LeakDetector.h"
+#include "SymbolTableListTraitsImpl.h"
+#include <algorithm>
+#include <cstdarg>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Methods to implement the globals and functions lists.
+//
+
+GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
+  GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()),
+                                           false, GlobalValue::ExternalLinkage);
+  // This should not be garbage monitored.
+  LeakDetector::removeGarbageObject(Ret);
+  return Ret;
+}
+GlobalAlias *ilist_traits<GlobalAlias>::createSentinel() {
+  GlobalAlias *Ret = new GlobalAlias(Type::getInt32Ty(getGlobalContext()),
+                                     GlobalValue::ExternalLinkage);
+  // This should not be garbage monitored.
+  LeakDetector::removeGarbageObject(Ret);
+  return Ret;
+}
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file.
+template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
+template class llvm::SymbolTableListTraits<Function, Module>;
+template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
+
+//===----------------------------------------------------------------------===//
+// Primitive Module methods.
+//
+
+Module::Module(StringRef MID, LLVMContext& C)
+  : Context(C), Materializer(NULL), ModuleID(MID) {
+  ValSymTab = new ValueSymbolTable();
+  NamedMDSymTab = new StringMap<NamedMDNode *>();
+  Context.addModule(this);
+}
+
+Module::~Module() {
+  Context.removeModule(this);
+  dropAllReferences();
+  GlobalList.clear();
+  FunctionList.clear();
+  AliasList.clear();
+  LibraryList.clear();
+  NamedMDList.clear();
+  delete ValSymTab;
+  delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
+}
+
+/// Target endian information.
+Module::Endianness Module::getEndianness() const {
+  StringRef temp = DataLayout;
+  Module::Endianness ret = AnyEndianness;
+  
+  while (!temp.empty()) {
+    StringRef token = DataLayout;
+    tie(token, temp) = getToken(temp, "-");
+    
+    if (token[0] == 'e') {
+      ret = LittleEndian;
+    } else if (token[0] == 'E') {
+      ret = BigEndian;
+    }
+  }
+  
+  return ret;
+}
+
+/// Target Pointer Size information...
+Module::PointerSize Module::getPointerSize() const {
+  StringRef temp = DataLayout;
+  Module::PointerSize ret = AnyPointerSize;
+  
+  while (!temp.empty()) {
+    StringRef token, signalToken;
+    tie(token, temp) = getToken(temp, "-");
+    tie(signalToken, token) = getToken(token, ":");
+    
+    if (signalToken[0] == 'p') {
+      int size = 0;
+      getToken(token, ":").first.getAsInteger(10, size);
+      if (size == 32)
+        ret = Pointer32;
+      else if (size == 64)
+        ret = Pointer64;
+    }
+  }
+  
+  return ret;
+}
+
+/// getNamedValue - Return the first global value in the module with
+/// the specified name, of arbitrary type.  This method returns null
+/// if a global with the specified name is not found.
+GlobalValue *Module::getNamedValue(StringRef Name) const {
+  return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
+}
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+/// This ID is uniqued across modules in the current LLVMContext.
+unsigned Module::getMDKindID(StringRef Name) const {
+  return Context.getMDKindID(Name);
+}
+
+/// getMDKindNames - Populate client supplied SmallVector with the name for
+/// custom metadata IDs registered in this LLVMContext.   ID #0 is not used,
+/// so it is filled in as an empty string.
+void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
+  return Context.getMDKindNames(Result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the functions in the module.
+//
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table.  If it does not exist, add a prototype for the function and return
+// it.  This is nice because it allows most passes to get away with not handling
+// the symbol table directly for this common task.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      const FunctionType *Ty,
+                                      AttrListPtr AttributeList) {
+  // See if we have a definition for the specified function already.
+  GlobalValue *F = getNamedValue(Name);
+  if (F == 0) {
+    // Nope, add it
+    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+    if (!New->isIntrinsic())       // Intrinsics get attrs set on construction
+      New->setAttributes(AttributeList);
+    FunctionList.push_back(New);
+    return New;                    // Return the new prototype.
+  }
+
+  // Okay, the function exists.  Does it have externally visible linkage?
+  if (F->hasLocalLinkage()) {
+    // Clear the function's name.
+    F->setName("");
+    // Retry, now there won't be a conflict.
+    Constant *NewF = getOrInsertFunction(Name, Ty);
+    F->setName(Name);
+    return NewF;
+  }
+
+  // If the function exists but has the wrong type, return a bitcast to the
+  // right type.
+  if (F->getType() != PointerType::getUnqual(Ty))
+    return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+  
+  // Otherwise, we just found the existing function or a prototype.
+  return F;  
+}
+
+Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
+                                             const FunctionType *Ty,
+                                             AttrListPtr AttributeList) {
+  // See if we have a definition for the specified function already.
+  GlobalValue *F = getNamedValue(Name);
+  if (F == 0) {
+    // Nope, add it
+    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+    New->setAttributes(AttributeList);
+    FunctionList.push_back(New);
+    return New; // Return the new prototype.
+  }
+
+  // Otherwise, we just found the existing function or a prototype.
+  return F;  
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      const FunctionType *Ty) {
+  AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
+  return getOrInsertFunction(Name, Ty, AttributeList);
+}
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table.  If it does not exist, add a prototype for the function and return it.
+// This version of the method takes a null terminated list of function
+// arguments, which makes it easier for clients to use.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      AttrListPtr AttributeList,
+                                      const Type *RetTy, ...) {
+  va_list Args;
+  va_start(Args, RetTy);
+
+  // Build the list of argument types...
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
+    ArgTys.push_back(ArgTy);
+
+  va_end(Args);
+
+  // Build the function type and chain to the other getOrInsertFunction...
+  return getOrInsertFunction(Name,
+                             FunctionType::get(RetTy, ArgTys, false),
+                             AttributeList);
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      const Type *RetTy, ...) {
+  va_list Args;
+  va_start(Args, RetTy);
+
+  // Build the list of argument types...
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
+    ArgTys.push_back(ArgTy);
+
+  va_end(Args);
+
+  // Build the function type and chain to the other getOrInsertFunction...
+  return getOrInsertFunction(Name, 
+                             FunctionType::get(RetTy, ArgTys, false),
+                             AttrListPtr::get((AttributeWithIndex *)0, 0));
+}
+
+// getFunction - Look up the specified function in the module symbol table.
+// If it does not exist, return null.
+//
+Function *Module::getFunction(StringRef Name) const {
+  return dyn_cast_or_null<Function>(getNamedValue(Name));
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+/// getGlobalVariable - Look up the specified global variable in the module
+/// symbol table.  If it does not exist, return null.  The type argument
+/// should be the underlying type of the global, i.e., it should not have
+/// the top-level PointerType, which represents the address of the global.
+/// If AllowLocal is set to true, this function will return types that
+/// have an local. By default, these types are not returned.
+///
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
+                                          bool AllowLocal) const {
+  if (GlobalVariable *Result = 
+      dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
+    if (AllowLocal || !Result->hasLocalLinkage())
+      return Result;
+  return 0;
+}
+
+/// getOrInsertGlobal - Look up the specified global in the module symbol table.
+///   1. If it does not exist, add a declaration of the global and return it.
+///   2. Else, the global exists but has the wrong type: return the function
+///      with a constantexpr cast to the right type.
+///   3. Finally, if the existing global is the correct delclaration, return the
+///      existing global.
+Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) {
+  // See if we have a definition for the specified global already.
+  GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
+  if (GV == 0) {
+    // Nope, add it
+    GlobalVariable *New =
+      new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+                         0, Name);
+     return New;                    // Return the new declaration.
+  }
+
+  // If the variable exists but has the wrong type, return a bitcast to the
+  // right type.
+  if (GV->getType() != PointerType::getUnqual(Ty))
+    return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
+  
+  // Otherwise, we just found the existing function or a prototype.
+  return GV;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+// getNamedAlias - Look up the specified global in the module symbol table.
+// If it does not exist, return null.
+//
+GlobalAlias *Module::getNamedAlias(StringRef Name) const {
+  return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
+}
+
+/// getNamedMetadata - Return the first NamedMDNode in the module with the
+/// specified name. This method returns null if a NamedMDNode with the 
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
+  SmallString<256> NameData;
+  StringRef NameRef = Name.toStringRef(NameData);
+  return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
+}
+
+/// getOrInsertNamedMetadata - Return the first named MDNode in the module 
+/// with the specified name. This method returns a new NamedMDNode if a 
+/// NamedMDNode with the specified name is not found.
+NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
+  NamedMDNode *&NMD =
+    (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
+  if (!NMD) {
+    NMD = new NamedMDNode(Name);
+    NMD->setParent(this);
+    NamedMDList.push_back(NMD);
+  }
+  return NMD;
+}
+
+void Module::eraseNamedMetadata(NamedMDNode *NMD) {
+  static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
+  NamedMDList.erase(NMD);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to control the materialization of GlobalValues in the Module.
+//
+void Module::setMaterializer(GVMaterializer *GVM) {
+  assert(!Materializer &&
+         "Module already has a GVMaterializer.  Call MaterializeAllPermanently"
+         " to clear it out before setting another one.");
+  Materializer.reset(GVM);
+}
+
+bool Module::isMaterializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isMaterializable(GV);
+  return false;
+}
+
+bool Module::isDematerializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isDematerializable(GV);
+  return false;
+}
+
+bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  if (Materializer)
+    return Materializer->Materialize(GV, ErrInfo);
+  return false;
+}
+
+void Module::Dematerialize(GlobalValue *GV) {
+  if (Materializer)
+    return Materializer->Dematerialize(GV);
+}
+
+bool Module::MaterializeAll(std::string *ErrInfo) {
+  if (!Materializer)
+    return false;
+  return Materializer->MaterializeModule(this, ErrInfo);
+}
+
+bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
+  if (MaterializeAll(ErrInfo))
+    return true;
+  Materializer.reset();
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other module related stuff.
+//
+
+
+// dropAllReferences() - This function causes all the subelementss to "let go"
+// of all references that they are maintaining.  This allows one to 'delete' a
+// whole module at a time, even though there may be circular references... first
+// all references are dropped, and all use counts go to zero.  Then everything
+// is deleted for real.  Note that no operations are valid on an object that
+// has "dropped all references", except operator delete.
+//
+void Module::dropAllReferences() {
+  for(Module::iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+
+  for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
+    I->dropAllReferences();
+
+  for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
+    I->dropAllReferences();
+}
+
+void Module::addLibrary(StringRef Lib) {
+  for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
+    if (*I == Lib)
+      return;
+  LibraryList.push_back(Lib);
+}
+
+void Module::removeLibrary(StringRef Lib) {
+  LibraryListType::iterator I = LibraryList.begin();
+  LibraryListType::iterator E = LibraryList.end();
+  for (;I != E; ++I)
+    if (*I == Lib) {
+      LibraryList.erase(I);
+      return;
+    }
+}
+
+//===----------------------------------------------------------------------===//
+// Type finding functionality.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// TypeFinder - Walk over a module, identifying all of the types that are
+  /// used by the module.
+  class TypeFinder {
+    // To avoid walking constant expressions multiple times and other IR
+    // objects, we keep several helper maps.
+    DenseSet<const Value*> VisitedConstants;
+    DenseSet<const Type*> VisitedTypes;
+    
+    std::vector<StructType*> &StructTypes;
+  public:
+    TypeFinder(std::vector<StructType*> &structTypes)
+      : StructTypes(structTypes) {}
+    
+    void run(const Module &M) {
+      // Get types from global variables.
+      for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I) {
+        incorporateType(I->getType());
+        if (I->hasInitializer())
+          incorporateValue(I->getInitializer());
+      }
+      
+      // Get types from aliases.
+      for (Module::const_alias_iterator I = M.alias_begin(),
+           E = M.alias_end(); I != E; ++I) {
+        incorporateType(I->getType());
+        if (const Value *Aliasee = I->getAliasee())
+          incorporateValue(Aliasee);
+      }
+      
+      SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+      // Get types from functions.
+      for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+        incorporateType(FI->getType());
+        
+        for (Function::const_iterator BB = FI->begin(), E = FI->end();
+             BB != E;++BB)
+          for (BasicBlock::const_iterator II = BB->begin(),
+               E = BB->end(); II != E; ++II) {
+            const Instruction &I = *II;
+            // Incorporate the type of the instruction and all its operands.
+            incorporateType(I.getType());
+            for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+                 OI != OE; ++OI)
+              incorporateValue(*OI);
+            
+            // Incorporate types hiding in metadata.
+            I.getAllMetadataOtherThanDebugLoc(MDForInst);
+            for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+              incorporateMDNode(MDForInst[i].second);
+            MDForInst.clear();
+          }
+      }
+      
+      for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I) {
+        const NamedMDNode *NMD = I;
+        for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+          incorporateMDNode(NMD->getOperand(i));
+      }
+    }
+    
+  private:
+    void incorporateType(Type *Ty) {
+      // Check to see if we're already visited this type.
+      if (!VisitedTypes.insert(Ty).second)
+        return;
+      
+      // If this is a structure or opaque type, add a name for the type.
+      if (StructType *STy = dyn_cast<StructType>(Ty))
+        StructTypes.push_back(STy);
+      
+      // Recursively walk all contained types.
+      for (Type::subtype_iterator I = Ty->subtype_begin(),
+           E = Ty->subtype_end(); I != E; ++I)
+        incorporateType(*I);
+    }
+    
+    /// incorporateValue - This method is used to walk operand lists finding
+    /// types hiding in constant expressions and other operands that won't be
+    /// walked in other ways.  GlobalValues, basic blocks, instructions, and
+    /// inst operands are all explicitly enumerated.
+    void incorporateValue(const Value *V) {
+      if (const MDNode *M = dyn_cast<MDNode>(V))
+        return incorporateMDNode(M);
+      if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+      
+      // Already visited?
+      if (!VisitedConstants.insert(V).second)
+        return;
+      
+      // Check this type.
+      incorporateType(V->getType());
+      
+      // Look in operands for types.
+      const User *U = cast<User>(V);
+      for (Constant::const_op_iterator I = U->op_begin(),
+           E = U->op_end(); I != E;++I)
+        incorporateValue(*I);
+    }
+    
+    void incorporateMDNode(const MDNode *V) {
+      
+      // Already visited?
+      if (!VisitedConstants.insert(V).second)
+        return;
+      
+      // Look in operands for types.
+      for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+        if (Value *Op = V->getOperand(i))
+          incorporateValue(Op);
+    }
+  };
+} // end anonymous namespace
+
+void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes) const {
+  TypeFinder(StructTypes).run(*this);
+}
+
+
diff --git a/contrib/llvm/lib/VMCore/Pass.cpp b/contrib/llvm/lib/VMCore/Pass.cpp
new file mode 100644
index 000000000000..9afc54063321
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Pass.cpp
@@ -0,0 +1,293 @@
+//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass infrastructure.  It is primarily
+// responsible with ensuring that passes are executed and batched together
+// optimally.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Pass Implementation
+//
+
+Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
+
+// Force out-of-line virtual method.
+Pass::~Pass() { 
+  delete Resolver; 
+}
+
+// Force out-of-line virtual method.
+ModulePass::~ModulePass() { }
+
+Pass *ModulePass::createPrinterPass(raw_ostream &O,
+                                    const std::string &Banner) const {
+  return createPrintModulePass(&O, false, Banner);
+}
+
+PassManagerType ModulePass::getPotentialPassManagerType() const {
+  return PMT_ModulePassManager;
+}
+
+bool Pass::mustPreserveAnalysisID(char &AID) const {
+  return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
+}
+
+// dumpPassStructure - Implement the -debug-passes=Structure option
+void Pass::dumpPassStructure(unsigned Offset) {
+  dbgs().indent(Offset*2) << getPassName() << "\n";
+}
+
+/// getPassName - Return a nice clean name for a pass.  This usually
+/// implemented in terms of the name that is registered by one of the
+/// Registration templates, but can be overloaded directly.
+///
+const char *Pass::getPassName() const {
+  AnalysisID AID =  getPassID();
+  const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+  if (PI)
+    return PI->getPassName();
+  return "Unnamed pass: implement Pass::getPassName()";
+}
+
+void Pass::preparePassManager(PMStack &) {
+  // By default, don't do anything.
+}
+
+PassManagerType Pass::getPotentialPassManagerType() const {
+  // Default implementation.
+  return PMT_Unknown; 
+}
+
+void Pass::getAnalysisUsage(AnalysisUsage &) const {
+  // By default, no analysis results are used, all are invalidated.
+}
+
+void Pass::releaseMemory() {
+  // By default, don't do anything.
+}
+
+void Pass::verifyAnalysis() const {
+  // By default, don't do anything.
+}
+
+void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
+  return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+  return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+  return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+  assert(!Resolver && "Resolver is already set");
+  Resolver = AR;
+}
+
+// print - Print out the internal state of the pass.  This is called by Analyze
+// to print out the contents of an analysis.  Otherwise it is not necessary to
+// implement this method.
+//
+void Pass::print(raw_ostream &O,const Module*) const {
+  O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
+}
+
+// dump - call print(cerr);
+void Pass::dump() const {
+  print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// ImmutablePass Implementation
+//
+// Force out-of-line virtual method.
+ImmutablePass::~ImmutablePass() { }
+
+void ImmutablePass::initializePass() {
+  // By default, don't do anything.
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPass Implementation
+//
+
+Pass *FunctionPass::createPrinterPass(raw_ostream &O,
+                                      const std::string &Banner) const {
+  return createPrintFunctionPass(Banner, &O);
+}
+
+bool FunctionPass::doInitialization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool FunctionPass::doFinalization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+PassManagerType FunctionPass::getPotentialPassManagerType() const {
+  return PMT_FunctionPassManager;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicBlockPass Implementation
+//
+
+Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
+                                        const std::string &Banner) const {
+  
+  llvm_unreachable("BasicBlockPass printing unsupported.");
+  return 0;
+}
+
+bool BasicBlockPass::doInitialization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doInitialization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doFinalization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doFinalization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
+  return PMT_BasicBlockPassManager; 
+}
+
+const PassInfo *Pass::lookupPassInfo(const void *TI) {
+  return PassRegistry::getPassRegistry()->getPassInfo(TI);
+}
+
+const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
+  return PassRegistry::getPassRegistry()->getPassInfo(Arg);
+}
+
+Pass *PassInfo::createPass() const {
+  assert((!isAnalysisGroup() || NormalCtor) &&
+         "No default implementation found for analysis group!");
+  assert(NormalCtor &&
+         "Cannot call createPass on PassInfo without default ctor!");
+  return NormalCtor();
+}
+
+//===----------------------------------------------------------------------===//
+//                  Analysis Group Implementation Code
+//===----------------------------------------------------------------------===//
+
+// RegisterAGBase implementation
+//
+RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+                               const void *PassID, bool isDefault)
+    : PassInfo(Name, InterfaceID) {
+  PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
+                                                         *this, isDefault);
+}
+
+//===----------------------------------------------------------------------===//
+// PassRegistrationListener implementation
+//
+
+// PassRegistrationListener ctor - Add the current object to the list of
+// PassRegistrationListeners...
+PassRegistrationListener::PassRegistrationListener() {
+  PassRegistry::getPassRegistry()->addRegistrationListener(this);
+}
+
+// dtor - Remove object from list of listeners...
+PassRegistrationListener::~PassRegistrationListener() {
+  PassRegistry::getPassRegistry()->removeRegistrationListener(this);
+}
+
+// enumeratePasses - Iterate over the registered passes, calling the
+// passEnumerate callback on each PassInfo object.
+//
+void PassRegistrationListener::enumeratePasses() {
+  PassRegistry::getPassRegistry()->enumerateWith(this);
+}
+
+PassNameParser::~PassNameParser() {}
+
+//===----------------------------------------------------------------------===//
+//   AnalysisUsage Class Implementation
+//
+
+namespace {
+  struct GetCFGOnlyPasses : public PassRegistrationListener {
+    typedef AnalysisUsage::VectorType VectorType;
+    VectorType &CFGOnlyList;
+    GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
+    
+    void passEnumerate(const PassInfo *P) {
+      if (P->isCFGOnlyPass())
+        CFGOnlyList.push_back(P->getTypeInfo());
+    }
+  };
+}
+
+// setPreservesCFG - This function should be called to by the pass, iff they do
+// not:
+//
+//  1. Add or remove basic blocks from the function
+//  2. Modify terminator instructions in any way.
+//
+// This function annotates the AnalysisUsage info object to say that analyses
+// that only depend on the CFG are preserved by this pass.
+//
+void AnalysisUsage::setPreservesCFG() {
+  // Since this transformation doesn't modify the CFG, it preserves all analyses
+  // that only depend on the CFG (like dominators, loop info, etc...)
+  GetCFGOnlyPasses(Preserved).enumeratePasses();
+}
+
+AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
+  const PassInfo *PI = Pass::lookupPassInfo(Arg);
+  // If the pass exists, preserve it. Otherwise silently do nothing.
+  if (PI) Preserved.push_back(PI->getTypeInfo());
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
+  Required.push_back(ID);
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
+  Required.push_back(&ID);
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
+  Required.push_back(&ID);
+  RequiredTransitive.push_back(&ID);
+  return *this;
+}
diff --git a/contrib/llvm/lib/VMCore/PassManager.cpp b/contrib/llvm/lib/VMCore/PassManager.cpp
new file mode 100644
index 000000000000..5cf2905733ac
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/PassManager.cpp
@@ -0,0 +1,1882 @@
+//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Manager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/PassManagers.h"
+#include "llvm/PassManager.h"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/StringMap.h"
+#include <algorithm>
+#include <cstdio>
+#include <map>
+using namespace llvm;
+
+// See PassManagers.h for Pass Manager infrastructure overview.
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Pass debugging information.  Often it is useful to find out what pass is
+// running when a crash occurs in a utility.  When this library is compiled with
+// debugging on, a command line option (--debug-pass) is enabled that causes the
+// pass name to be printed before it executes.
+//
+
+// Different debug levels that can be enabled...
+enum PassDebugLevel {
+  None, Arguments, Structure, Executions, Details
+};
+
+static cl::opt<enum PassDebugLevel>
+PassDebugging("debug-pass", cl::Hidden,
+                  cl::desc("Print PassManager debugging information"),
+                  cl::values(
+  clEnumVal(None      , "disable debug output"),
+  clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
+  clEnumVal(Structure , "print pass structure before run()"),
+  clEnumVal(Executions, "print pass name before it is executed"),
+  clEnumVal(Details   , "print pass details when it is executed"),
+                             clEnumValEnd));
+
+typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
+PassOptionList;
+
+// Print IR out before/after specified passes.
+static PassOptionList
+PrintBefore("print-before",
+            llvm::cl::desc("Print IR before specified passes"),
+            cl::Hidden);
+
+static PassOptionList
+PrintAfter("print-after",
+           llvm::cl::desc("Print IR after specified passes"),
+           cl::Hidden);
+
+static cl::opt<bool>
+PrintBeforeAll("print-before-all",
+               llvm::cl::desc("Print IR before each pass"),
+               cl::init(false));
+static cl::opt<bool>
+PrintAfterAll("print-after-all",
+              llvm::cl::desc("Print IR after each pass"),
+              cl::init(false));
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+static bool ShouldPrintBeforeOrAfterPass(const void *PassID,
+                                         PassOptionList &PassesToPrint) {
+  if (const llvm::PassInfo *PI =
+      PassRegistry::getPassRegistry()->getPassInfo(PassID)) {
+    for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+      const llvm::PassInfo *PassInf = PassesToPrint[i];
+      if (PassInf)
+        if (PassInf->getPassArgument() == PI->getPassArgument()) {
+          return true;
+        }
+    }
+  }
+  return false;
+}
+
+
+/// This is a utility to check whether a pass should have IR dumped
+/// before it.
+static bool ShouldPrintBeforePass(const void *PassID) {
+  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// after it.
+static bool ShouldPrintAfterPass(const void *PassID) {
+  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
+}
+
+} // End of llvm namespace
+
+/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+/// or higher is specified.
+bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
+  return PassDebugging >= Executions;
+}
+
+
+
+
+void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
+  if (V == 0 && M == 0)
+    OS << "Releasing pass '";
+  else
+    OS << "Running pass '";
+
+  OS << P->getPassName() << "'";
+
+  if (M) {
+    OS << " on module '" << M->getModuleIdentifier() << "'.\n";
+    return;
+  }
+  if (V == 0) {
+    OS << '\n';
+    return;
+  }
+
+  OS << " on ";
+  if (isa<Function>(V))
+    OS << "function";
+  else if (isa<BasicBlock>(V))
+    OS << "basic block";
+  else
+    OS << "value";
+
+  OS << " '";
+  WriteAsOperand(OS, V, /*PrintTy=*/false, M);
+  OS << "'\n";
+}
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BBPassManager
+//
+/// BBPassManager manages BasicBlockPass. It batches all the
+/// pass together and sequence them to process one basic block before
+/// processing next basic block.
+class BBPassManager : public PMDataManager, public FunctionPass {
+
+public:
+  static char ID;
+  explicit BBPassManager(int Depth)
+    : PMDataManager(Depth), FunctionPass(ID) {}
+
+  /// Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the function, and if so, return true.
+  bool runOnFunction(Function &F);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  bool doInitialization(Module &M);
+  bool doInitialization(Function &F);
+  bool doFinalization(Module &M);
+  bool doFinalization(Function &F);
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  virtual const char *getPassName() const {
+    return "BasicBlock Pass Manager";
+  }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      BasicBlockPass *BP = getContainedPass(Index);
+      BP->dumpPassStructure(Offset + 1);
+      dumpLastUses(BP, Offset+1);
+    }
+  }
+
+  BasicBlockPass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
+    return BP;
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_BasicBlockPassManager;
+  }
+};
+
+char BBPassManager::ID = 0;
+}
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl
+//
+/// FunctionPassManagerImpl manages FPPassManagers
+class FunctionPassManagerImpl : public Pass,
+                                public PMDataManager,
+                                public PMTopLevelManager {
+private:
+  bool wasRun;
+public:
+  static char ID;
+  explicit FunctionPassManagerImpl(int Depth) :
+    Pass(PT_PassManager, ID), PMDataManager(Depth),
+    PMTopLevelManager(new FPPassManager(1)), wasRun(false) {}
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P) {
+    schedulePass(P);
+  }
+
+  /// createPrinterPass - Get a function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintFunctionPass(Banner, &O);
+  }
+
+  // Prepare for running an on the fly pass, freeing memory if needed
+  // from a previous run.
+  void releaseMemoryOnTheFly();
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool run(Function &F);
+
+  /// doInitialization - Run all of the initializers for the function passes.
+  ///
+  bool doInitialization(Module &M);
+
+  /// doFinalization - Run all of the finalizers for the function passes.
+  ///
+  bool doFinalization(Module &M);
+
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  void addTopLevelPass(Pass *P) {
+    if (ImmutablePass *IP = P->getAsImmutablePass()) {
+      // P is a immutable pass and it will be managed by this
+      // top level manager. Set up analysis resolver to connect them.
+      AnalysisResolver *AR = new AnalysisResolver(*this);
+      P->setResolver(AR);
+      initializeAnalysisImpl(P);
+      addImmutablePass(IP);
+      recordAvailableAnalysis(IP);
+    } else {
+      P->assignPassManager(activeStack, PMT_FunctionPassManager);
+    }
+
+  }
+
+  FPPassManager *getContainedManager(unsigned N) {
+    assert(N < PassManagers.size() && "Pass number out of range!");
+    FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
+    return FP;
+  }
+};
+
+char FunctionPassManagerImpl::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// MPPassManager
+//
+/// MPPassManager manages ModulePasses and function pass managers.
+/// It batches all Module passes and function pass managers together and
+/// sequences them to process one module.
+class MPPassManager : public Pass, public PMDataManager {
+public:
+  static char ID;
+  explicit MPPassManager(int Depth) :
+    Pass(PT_PassManager, ID), PMDataManager(Depth) { }
+
+  // Delete on the fly managers.
+  virtual ~MPPassManager() {
+    for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+           I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+         I != E; ++I) {
+      FunctionPassManagerImpl *FPP = I->second;
+      delete FPP;
+    }
+  }
+
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnModule(Module &M);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  /// Add RequiredPass into list of lower level passes required by pass P.
+  /// RequiredPass is run on the fly by Pass Manager when P requests it
+  /// through getAnalysis interface.
+  virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+  /// Return function pass corresponding to PassInfo PI, that is
+  /// required by module pass MP. Instantiate analysis pass, by using
+  /// its runOnFunction() for function F.
+  virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
+
+  virtual const char *getPassName() const {
+    return "Module Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      ModulePass *MP = getContainedPass(Index);
+      MP->dumpPassStructure(Offset + 1);
+      std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
+        OnTheFlyManagers.find(MP);
+      if (I != OnTheFlyManagers.end())
+        I->second->dumpPassStructure(Offset + 2);
+      dumpLastUses(MP, Offset+1);
+    }
+  }
+
+  ModulePass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<ModulePass *>(PassVector[N]);
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_ModulePassManager;
+  }
+
+ private:
+  /// Collection of on the fly FPPassManagers. These managers manage
+  /// function passes that are required by module passes.
+  std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
+};
+
+char MPPassManager::ID = 0;
+//===----------------------------------------------------------------------===//
+// PassManagerImpl
+//
+
+/// PassManagerImpl manages MPPassManagers
+class PassManagerImpl : public Pass,
+                        public PMDataManager,
+                        public PMTopLevelManager {
+
+public:
+  static char ID;
+  explicit PassManagerImpl(int Depth) :
+    Pass(PT_PassManager, ID), PMDataManager(Depth),
+                              PMTopLevelManager(new MPPassManager(1)) {}
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P) {
+    schedulePass(P);
+  }
+
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool run(Module &M);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  void addTopLevelPass(Pass *P) {
+    if (ImmutablePass *IP = P->getAsImmutablePass()) {
+      // P is a immutable pass and it will be managed by this
+      // top level manager. Set up analysis resolver to connect them.
+      AnalysisResolver *AR = new AnalysisResolver(*this);
+      P->setResolver(AR);
+      initializeAnalysisImpl(P);
+      addImmutablePass(IP);
+      recordAvailableAnalysis(IP);
+    } else {
+      P->assignPassManager(activeStack, PMT_ModulePassManager);
+    }
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  MPPassManager *getContainedManager(unsigned N) {
+    assert(N < PassManagers.size() && "Pass number out of range!");
+    MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
+    return MP;
+  }
+};
+
+char PassManagerImpl::ID = 0;
+} // End of llvm namespace
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+static DebugInfoProbeInfo *TheDebugProbe;
+static void createDebugInfoProbe() {
+  if (TheDebugProbe) return;
+
+  // Constructed the first time this is called. This guarantees that the
+  // object will be constructed, if -enable-debug-info-probe is set,
+  // before static globals, thus it will be destroyed before them.
+  static ManagedStatic<DebugInfoProbeInfo> DIP;
+  TheDebugProbe = &*DIP;
+}
+
+//===----------------------------------------------------------------------===//
+/// TimingInfo Class - This class is used to calculate information about the
+/// amount of time each pass takes to execute.  This only happens when
+/// -time-passes is enabled on the command line.
+///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
+class TimingInfo {
+  DenseMap<Pass*, Timer*> TimingData;
+  TimerGroup TG;
+public:
+  // Use 'create' member to get this.
+  TimingInfo() : TG("... Pass execution timing report ...") {}
+
+  // TimingDtor - Print out information about timing information
+  ~TimingInfo() {
+    // Delete all of the timers, which accumulate their info into the
+    // TimerGroup.
+    for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
+         E = TimingData.end(); I != E; ++I)
+      delete I->second;
+    // TimerGroup is deleted next, printing the report.
+  }
+
+  // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
+  // to a non null value (if the -time-passes option is enabled) or it leaves it
+  // null.  It may be called multiple times.
+  static void createTheTimeInfo();
+
+  /// getPassTimer - Return the timer for the specified pass if it exists.
+  Timer *getPassTimer(Pass *P) {
+    if (P->getAsPMDataManager())
+      return 0;
+
+    sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+    Timer *&T = TimingData[P];
+    if (T == 0)
+      T = new Timer(P->getPassName(), TG);
+    return T;
+  }
+};
+
+} // End of anon namespace
+
+static TimingInfo *TheTimeInfo;
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager implementation
+
+/// Initialize top level manager. Create first pass manager.
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+  PMDM->setTopLevelManager(this);
+  addPassManager(PMDM);
+  activeStack.push(PMDM);
+}
+
+/// Set pass P as the last user of the given analysis passes.
+void
+PMTopLevelManager::setLastUser(const SmallVectorImpl<Pass *> &AnalysisPasses,
+                               Pass *P) {
+  unsigned PDepth = 0;
+  if (P->getResolver())
+    PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+  for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
+         E = AnalysisPasses.end(); I != E; ++I) {
+    Pass *AP = *I;
+    LastUser[AP] = P;
+
+    if (P == AP)
+      continue;
+
+    // Update the last users of passes that are required transitive by AP.
+    AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+    const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+    SmallVector<Pass *, 12> LastUses;
+    SmallVector<Pass *, 12> LastPMUses;
+    for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+         E = IDs.end(); I != E; ++I) {
+      Pass *AnalysisPass = findAnalysisPass(*I);
+      assert(AnalysisPass && "Expected analysis pass to exist.");
+      AnalysisResolver *AR = AnalysisPass->getResolver();
+      assert(AR && "Expected analysis resolver to exist.");
+      unsigned APDepth = AR->getPMDataManager().getDepth();
+
+      if (PDepth == APDepth)
+        LastUses.push_back(AnalysisPass);
+      else if (PDepth > APDepth)
+        LastPMUses.push_back(AnalysisPass);
+    }
+
+    setLastUser(LastUses, P);
+
+    // If this pass has a corresponding pass manager, push higher level
+    // analysis to this pass manager.
+    if (P->getResolver())
+      setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
+    // If AP is the last user of other passes then make P last user of
+    // such passes.
+    for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
+           LUE = LastUser.end(); LUI != LUE; ++LUI) {
+      if (LUI->second == AP)
+        // DenseMap iterator is not invalidated here because
+        // this is just updating existing entries.
+        LastUser[LUI->first] = P;
+    }
+  }
+}
+
+/// Collect passes whose last user is P
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
+                                        Pass *P) {
+  DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+    InversedLastUser.find(P);
+  if (DMI == InversedLastUser.end())
+    return;
+
+  SmallPtrSet<Pass *, 8> &LU = DMI->second;
+  for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
+         E = LU.end(); I != E; ++I) {
+    LastUses.push_back(*I);
+  }
+
+}
+
+AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
+  AnalysisUsage *AnUsage = NULL;
+  DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+  if (DMI != AnUsageMap.end())
+    AnUsage = DMI->second;
+  else {
+    AnUsage = new AnalysisUsage();
+    P->getAnalysisUsage(*AnUsage);
+    AnUsageMap[P] = AnUsage;
+  }
+  return AnUsage;
+}
+
+/// Schedule pass P for execution. Make sure that passes required by
+/// P are run before P is run. Update analysis info maintained by
+/// the manager. Remove dead passes. This is a recursive function.
+void PMTopLevelManager::schedulePass(Pass *P) {
+
+  // TODO : Allocate function manager for this pass, other wise required set
+  // may be inserted into previous function manager
+
+  // Give pass a chance to prepare the stage.
+  P->preparePassManager(activeStack);
+
+  // If P is an analysis pass and it is available then do not
+  // generate the analysis again. Stale analysis info should not be
+  // available at this point.
+  const PassInfo *PI =
+    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+  if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+    delete P;
+    return;
+  }
+
+  AnalysisUsage *AnUsage = findAnalysisUsage(P);
+
+  bool checkAnalysis = true;
+  while (checkAnalysis) {
+    checkAnalysis = false;
+
+    const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+    for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
+           E = RequiredSet.end(); I != E; ++I) {
+
+      Pass *AnalysisPass = findAnalysisPass(*I);
+      if (!AnalysisPass) {
+        const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+        assert(PI && "Expected required passes to be initialized");
+        AnalysisPass = PI->createPass();
+        if (P->getPotentialPassManagerType () ==
+            AnalysisPass->getPotentialPassManagerType())
+          // Schedule analysis pass that is managed by the same pass manager.
+          schedulePass(AnalysisPass);
+        else if (P->getPotentialPassManagerType () >
+                 AnalysisPass->getPotentialPassManagerType()) {
+          // Schedule analysis pass that is managed by a new manager.
+          schedulePass(AnalysisPass);
+          // Recheck analysis passes to ensure that required analyses that
+          // are already checked are still available.
+          checkAnalysis = true;
+        }
+        else
+          // Do not schedule this analysis. Lower level analsyis
+          // passes are run on the fly.
+          delete AnalysisPass;
+      }
+    }
+  }
+
+  // Now all required passes are available.
+  addTopLevelPass(P);
+}
+
+/// Find the pass that implements Analysis AID. Search immutable
+/// passes and all pass managers. If desired pass is not found
+/// then return NULL.
+Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+
+  // Check pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
+
+  // Check other pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator
+         I = IndirectPassManagers.begin(),
+         E = IndirectPassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
+
+  // Check the immutable passes. Iterate in reverse order so that we find
+  // the most recently registered passes first.
+  for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
+       ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
+    AnalysisID PI = (*I)->getPassID();
+    if (PI == AID)
+      return *I;
+
+    // If Pass not found then check the interfaces implemented by Immutable Pass
+    const PassInfo *PassInf =
+      PassRegistry::getPassRegistry()->getPassInfo(PI);
+    assert(PassInf && "Expected all immutable passes to be initialized");
+    const std::vector<const PassInfo*> &ImmPI =
+      PassInf->getInterfacesImplemented();
+    for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+         EE = ImmPI.end(); II != EE; ++II) {
+      if ((*II)->getTypeInfo() == AID)
+        return *I;
+    }
+  }
+
+  return 0;
+}
+
+// Print passes managed by this top level manager.
+void PMTopLevelManager::dumpPasses() const {
+
+  if (PassDebugging < Structure)
+    return;
+
+  // Print out the immutable passes
+  for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
+    ImmutablePasses[i]->dumpPassStructure(0);
+  }
+
+  // Every class that derives from PMDataManager also derives from Pass
+  // (sometimes indirectly), but there's no inheritance relationship
+  // between PMDataManager and Pass, so we have to getAsPass to get
+  // from a PMDataManager* to a Pass*.
+  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->getAsPass()->dumpPassStructure(1);
+}
+
+void PMTopLevelManager::dumpArguments() const {
+
+  if (PassDebugging < Arguments)
+    return;
+
+  dbgs() << "Pass Arguments: ";
+  for (SmallVector<ImmutablePass *, 8>::const_iterator I =
+       ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+    if (const PassInfo *PI =
+        PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+      assert(PI && "Expected all immutable passes to be initialized");
+      if (!PI->isAnalysisGroup())
+        dbgs() << " -" << PI->getPassArgument();
+    }
+  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->dumpPassArguments();
+  dbgs() << "\n";
+}
+
+void PMTopLevelManager::initializeAllAnalysisInfo() {
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->initializeAnalysisInfo();
+
+  // Initailize other pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator
+       I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+       I != E; ++I)
+    (*I)->initializeAnalysisInfo();
+
+  for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
+        DME = LastUser.end(); DMI != DME; ++DMI) {
+    DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+      InversedLastUser.find(DMI->second);
+    if (InvDMI != InversedLastUser.end()) {
+      SmallPtrSet<Pass *, 8> &L = InvDMI->second;
+      L.insert(DMI->first);
+    } else {
+      SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
+      InversedLastUser[DMI->second] = L;
+    }
+  }
+}
+
+/// Destructor
+PMTopLevelManager::~PMTopLevelManager() {
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    delete *I;
+
+  for (SmallVectorImpl<ImmutablePass *>::iterator
+         I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+    delete *I;
+
+  for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
+         DME = AnUsageMap.end(); DMI != DME; ++DMI)
+    delete DMI->second;
+}
+
+//===----------------------------------------------------------------------===//
+// PMDataManager implementation
+
+/// Augement AvailableAnalysis by adding analysis made available by pass P.
+void PMDataManager::recordAvailableAnalysis(Pass *P) {
+  AnalysisID PI = P->getPassID();
+
+  AvailableAnalysis[PI] = P;
+
+  assert(!AvailableAnalysis.empty());
+
+  // This pass is the current implementation of all of the interfaces it
+  // implements as well.
+  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+  if (PInf == 0) return;
+  const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+  for (unsigned i = 0, e = II.size(); i != e; ++i)
+    AvailableAnalysis[II[i]->getTypeInfo()] = P;
+}
+
+// Return true if P preserves high level analysis used by other
+// passes managed by this manager
+bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  if (AnUsage->getPreservesAll())
+    return true;
+
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+  for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
+         E = HigherLevelAnalysis.end(); I  != E; ++I) {
+    Pass *P1 = *I;
+    if (P1->getAsImmutablePass() == 0 &&
+        std::find(PreservedSet.begin(), PreservedSet.end(),
+                  P1->getPassID()) ==
+           PreservedSet.end())
+      return false;
+  }
+
+  return true;
+}
+
+/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
+void PMDataManager::verifyPreservedAnalysis(Pass *P) {
+  // Don't do this unless assertions are enabled.
+#ifdef NDEBUG
+  return;
+#endif
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+
+  // Verify preserved analysis
+  for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
+         E = PreservedSet.end(); I != E; ++I) {
+    AnalysisID AID = *I;
+    if (Pass *AP = findAnalysisPass(AID, true)) {
+      TimeRegion PassTimer(getPassTimer(AP));
+      AP->verifyAnalysis();
+    }
+  }
+}
+
+/// Remove Analysis not preserved by Pass P
+void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  if (AnUsage->getPreservesAll())
+    return;
+
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+  for (std::map<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
+         E = AvailableAnalysis.end(); I != E; ) {
+    std::map<AnalysisID, Pass*>::iterator Info = I++;
+    if (Info->second->getAsImmutablePass() == 0 &&
+        std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+        PreservedSet.end()) {
+      // Remove this analysis
+      if (PassDebugging >= Details) {
+        Pass *S = Info->second;
+        dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+        dbgs() << S->getPassName() << "'\n";
+      }
+      AvailableAnalysis.erase(Info);
+    }
+  }
+
+  // Check inherited analysis also. If P is not preserving analysis
+  // provided by parent manager then remove it here.
+  for (unsigned Index = 0; Index < PMT_Last; ++Index) {
+
+    if (!InheritedAnalysis[Index])
+      continue;
+
+    for (std::map<AnalysisID, Pass*>::iterator
+           I = InheritedAnalysis[Index]->begin(),
+           E = InheritedAnalysis[Index]->end(); I != E; ) {
+      std::map<AnalysisID, Pass *>::iterator Info = I++;
+      if (Info->second->getAsImmutablePass() == 0 &&
+          std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+             PreservedSet.end()) {
+        // Remove this analysis
+        if (PassDebugging >= Details) {
+          Pass *S = Info->second;
+          dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+          dbgs() << S->getPassName() << "'\n";
+        }
+        InheritedAnalysis[Index]->erase(Info);
+      }
+    }
+  }
+}
+
+/// Remove analysis passes that are not used any longer
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
+                                     enum PassDebuggingString DBG_STR) {
+
+  SmallVector<Pass *, 12> DeadPasses;
+
+  // If this is a on the fly manager then it does not have TPM.
+  if (!TPM)
+    return;
+
+  TPM->collectLastUses(DeadPasses, P);
+
+  if (PassDebugging >= Details && !DeadPasses.empty()) {
+    dbgs() << " -*- '" <<  P->getPassName();
+    dbgs() << "' is the last user of following pass instances.";
+    dbgs() << " Free these instances\n";
+  }
+
+  for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
+         E = DeadPasses.end(); I != E; ++I)
+    freePass(*I, Msg, DBG_STR);
+}
+
+void PMDataManager::freePass(Pass *P, StringRef Msg,
+                             enum PassDebuggingString DBG_STR) {
+  dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
+
+  {
+    // If the pass crashes releasing memory, remember this.
+    PassManagerPrettyStackEntry X(P);
+    TimeRegion PassTimer(getPassTimer(P));
+
+    P->releaseMemory();
+  }
+
+  AnalysisID PI = P->getPassID();
+  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+    // Remove the pass itself (if it is not already removed).
+    AvailableAnalysis.erase(PI);
+
+    // Remove all interfaces this pass implements, for which it is also
+    // listed as the available implementation.
+    const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+    for (unsigned i = 0, e = II.size(); i != e; ++i) {
+      std::map<AnalysisID, Pass*>::iterator Pos =
+        AvailableAnalysis.find(II[i]->getTypeInfo());
+      if (Pos != AvailableAnalysis.end() && Pos->second == P)
+        AvailableAnalysis.erase(Pos);
+    }
+  }
+}
+
+/// Add pass P into the PassVector. Update
+/// AvailableAnalysis appropriately if ProcessAnalysis is true.
+void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
+  // This manager is going to manage pass P. Set up analysis resolver
+  // to connect them.
+  AnalysisResolver *AR = new AnalysisResolver(*this);
+  P->setResolver(AR);
+
+  // If a FunctionPass F is the last user of ModulePass info M
+  // then the F's manager, not F, records itself as a last user of M.
+  SmallVector<Pass *, 12> TransferLastUses;
+
+  if (!ProcessAnalysis) {
+    // Add pass
+    PassVector.push_back(P);
+    return;
+  }
+
+  // At the moment, this pass is the last user of all required passes.
+  SmallVector<Pass *, 12> LastUses;
+  SmallVector<Pass *, 8> RequiredPasses;
+  SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
+
+  unsigned PDepth = this->getDepth();
+
+  collectRequiredAnalysis(RequiredPasses,
+                          ReqAnalysisNotAvailable, P);
+  for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
+         E = RequiredPasses.end(); I != E; ++I) {
+    Pass *PRequired = *I;
+    unsigned RDepth = 0;
+
+    assert(PRequired->getResolver() && "Analysis Resolver is not set");
+    PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+    RDepth = DM.getDepth();
+
+    if (PDepth == RDepth)
+      LastUses.push_back(PRequired);
+    else if (PDepth > RDepth) {
+      // Let the parent claim responsibility of last use
+      TransferLastUses.push_back(PRequired);
+      // Keep track of higher level analysis used by this manager.
+      HigherLevelAnalysis.push_back(PRequired);
+    } else
+      llvm_unreachable("Unable to accommodate Required Pass");
+  }
+
+  // Set P as P's last user until someone starts using P.
+  // However, if P is a Pass Manager then it does not need
+  // to record its last user.
+  if (P->getAsPMDataManager() == 0)
+    LastUses.push_back(P);
+  TPM->setLastUser(LastUses, P);
+
+  if (!TransferLastUses.empty()) {
+    Pass *My_PM = getAsPass();
+    TPM->setLastUser(TransferLastUses, My_PM);
+    TransferLastUses.clear();
+  }
+
+  // Now, take care of required analyses that are not available.
+  for (SmallVectorImpl<AnalysisID>::iterator
+         I = ReqAnalysisNotAvailable.begin(),
+         E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
+    const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+    Pass *AnalysisPass = PI->createPass();
+    this->addLowerLevelRequiredPass(P, AnalysisPass);
+  }
+
+  // Take a note of analysis required and made available by this pass.
+  // Remove the analysis not preserved by this pass
+  removeNotPreservedAnalysis(P);
+  recordAvailableAnalysis(P);
+
+  // Add pass
+  PassVector.push_back(P);
+}
+
+
+/// Populate RP with analysis pass that are required by
+/// pass P and are available. Populate RP_NotAvail with analysis
+/// pass that are required by pass P but are not available.
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+                                       SmallVectorImpl<AnalysisID> &RP_NotAvail,
+                                            Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+  for (AnalysisUsage::VectorType::const_iterator
+         I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
+    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+      RP.push_back(AnalysisPass);
+    else
+      RP_NotAvail.push_back(*I);
+  }
+
+  const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+  for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+         E = IDs.end(); I != E; ++I) {
+    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+      RP.push_back(AnalysisPass);
+    else
+      RP_NotAvail.push_back(*I);
+  }
+}
+
+// All Required analyses should be available to the pass as it runs!  Here
+// we fill in the AnalysisImpls member of the pass so that it can
+// successfully use the getAnalysis() method to retrieve the
+// implementations it needs.
+//
+void PMDataManager::initializeAnalysisImpl(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+
+  for (AnalysisUsage::VectorType::const_iterator
+         I = AnUsage->getRequiredSet().begin(),
+         E = AnUsage->getRequiredSet().end(); I != E; ++I) {
+    Pass *Impl = findAnalysisPass(*I, true);
+    if (Impl == 0)
+      // This may be analysis pass that is initialized on the fly.
+      // If that is not the case then it will raise an assert when it is used.
+      continue;
+    AnalysisResolver *AR = P->getResolver();
+    assert(AR && "Analysis Resolver is not set");
+    AR->addAnalysisImplsPair(*I, Impl);
+  }
+}
+
+/// Find the pass that implements Analysis AID. If desired pass is not found
+/// then return NULL.
+Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
+
+  // Check if AvailableAnalysis map has one entry.
+  std::map<AnalysisID, Pass*>::const_iterator I =  AvailableAnalysis.find(AID);
+
+  if (I != AvailableAnalysis.end())
+    return I->second;
+
+  // Search Parents through TopLevelManager
+  if (SearchParent)
+    return TPM->findAnalysisPass(AID);
+
+  return NULL;
+}
+
+// Print list of passes that are last used by P.
+void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+
+  SmallVector<Pass *, 12> LUses;
+
+  // If this is a on the fly manager then it does not have TPM.
+  if (!TPM)
+    return;
+
+  TPM->collectLastUses(LUses, P);
+
+  for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
+         E = LUses.end(); I != E; ++I) {
+    llvm::dbgs() << "--" << std::string(Offset*2, ' ');
+    (*I)->dumpPassStructure(0);
+  }
+}
+
+void PMDataManager::dumpPassArguments() const {
+  for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
+        E = PassVector.end(); I != E; ++I) {
+    if (PMDataManager *PMD = (*I)->getAsPMDataManager())
+      PMD->dumpPassArguments();
+    else
+      if (const PassInfo *PI =
+            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+        if (!PI->isAnalysisGroup())
+          dbgs() << " -" << PI->getPassArgument();
+  }
+}
+
+void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+                                 enum PassDebuggingString S2,
+                                 StringRef Msg) {
+  if (PassDebugging < Executions)
+    return;
+  dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
+  switch (S1) {
+  case EXECUTION_MSG:
+    dbgs() << "Executing Pass '" << P->getPassName();
+    break;
+  case MODIFICATION_MSG:
+    dbgs() << "Made Modification '" << P->getPassName();
+    break;
+  case FREEING_MSG:
+    dbgs() << " Freeing Pass '" << P->getPassName();
+    break;
+  default:
+    break;
+  }
+  switch (S2) {
+  case ON_BASICBLOCK_MSG:
+    dbgs() << "' on BasicBlock '" << Msg << "'...\n";
+    break;
+  case ON_FUNCTION_MSG:
+    dbgs() << "' on Function '" << Msg << "'...\n";
+    break;
+  case ON_MODULE_MSG:
+    dbgs() << "' on Module '"  << Msg << "'...\n";
+    break;
+  case ON_REGION_MSG:
+    dbgs() << "' on Region '"  << Msg << "'...\n";
+    break;
+  case ON_LOOP_MSG:
+    dbgs() << "' on Loop '" << Msg << "'...\n";
+    break;
+  case ON_CG_MSG:
+    dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
+    break;
+  default:
+    break;
+  }
+}
+
+void PMDataManager::dumpRequiredSet(const Pass *P) const {
+  if (PassDebugging < Details)
+    return;
+
+  AnalysisUsage analysisUsage;
+  P->getAnalysisUsage(analysisUsage);
+  dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
+}
+
+void PMDataManager::dumpPreservedSet(const Pass *P) const {
+  if (PassDebugging < Details)
+    return;
+
+  AnalysisUsage analysisUsage;
+  P->getAnalysisUsage(analysisUsage);
+  dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
+}
+
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
+                                   const AnalysisUsage::VectorType &Set) const {
+  assert(PassDebugging >= Details);
+  if (Set.empty())
+    return;
+  dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+  for (unsigned i = 0; i != Set.size(); ++i) {
+    if (i) dbgs() << ',';
+    const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+    if (!PInf) {
+      // Some preserved passes, such as AliasAnalysis, may not be initialized by
+      // all drivers.
+      dbgs() << " Uninitialized Pass";
+      continue;
+    }
+    dbgs() << ' ' << PInf->getPassName();
+  }
+  dbgs() << '\n';
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+/// This should be handled by specific pass manager.
+void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+  if (TPM) {
+    TPM->dumpArguments();
+    TPM->dumpPasses();
+  }
+
+  // Module Level pass may required Function Level analysis info
+  // (e.g. dominator info). Pass manager uses on the fly function pass manager
+  // to provide this on demand. In that case, in Pass manager terminology,
+  // module level pass is requiring lower level analysis info managed by
+  // lower level pass manager.
+
+  // When Pass manager is not able to order required analysis info, Pass manager
+  // checks whether any lower level manager will be able to provide this
+  // analysis info on demand or not.
+#ifndef NDEBUG
+  dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
+  dbgs() << "' required by '" << P->getPassName() << "'\n";
+#endif
+  llvm_unreachable("Unable to schedule pass");
+}
+
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
+  assert(0 && "Unable to find on the fly pass");
+  return NULL;
+}
+
+// Destructor
+PMDataManager::~PMDataManager() {
+  for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
+         E = PassVector.end(); I != E; ++I)
+    delete *I;
+}
+
+//===----------------------------------------------------------------------===//
+// NOTE: Is this the right place to define this method ?
+// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
+  return PM.findAnalysisPass(ID, dir);
+}
+
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
+                                     Function &F) {
+  return PM.getOnTheFlyPass(P, AnalysisPI, F);
+}
+
+//===----------------------------------------------------------------------===//
+// BBPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method.  Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool BBPassManager::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  bool Changed = doInitialization(F);
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      BasicBlockPass *BP = getContainedPass(Index);
+      bool LocalChanged = false;
+
+      dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
+      dumpRequiredSet(BP);
+
+      initializeAnalysisImpl(BP);
+
+      {
+        // If the pass crashes, remember this.
+        PassManagerPrettyStackEntry X(BP, *I);
+        TimeRegion PassTimer(getPassTimer(BP));
+
+        LocalChanged |= BP->runOnBasicBlock(*I);
+      }
+
+      Changed |= LocalChanged;
+      if (LocalChanged)
+        dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
+                     I->getName());
+      dumpPreservedSet(BP);
+
+      verifyPreservedAnalysis(BP);
+      removeNotPreservedAnalysis(BP);
+      recordAvailableAnalysis(BP);
+      removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
+    }
+
+  return doFinalization(F) || Changed;
+}
+
+// Implement doInitialization and doFinalization
+bool BBPassManager::doInitialization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool BBPassManager::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+
+  return Changed;
+}
+
+bool BBPassManager::doInitialization(Function &F) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    BasicBlockPass *BP = getContainedPass(Index);
+    Changed |= BP->doInitialization(F);
+  }
+
+  return Changed;
+}
+
+bool BBPassManager::doFinalization(Function &F) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    BasicBlockPass *BP = getContainedPass(Index);
+    Changed |= BP->doFinalization(F);
+  }
+
+  return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManager implementation
+
+/// Create new Function pass manager
+FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
+  FPM = new FunctionPassManagerImpl(0);
+  // FPM is the top level manager.
+  FPM->setTopLevelManager(FPM);
+
+  AnalysisResolver *AR = new AnalysisResolver(*FPM);
+  FPM->setResolver(AR);
+}
+
+FunctionPassManager::~FunctionPassManager() {
+  delete FPM;
+}
+
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void FunctionPassManager::addImpl(Pass *P) {
+  FPM->add(P);
+}
+
+/// add - Add a pass to the queue of passes to run.  This passes
+/// ownership of the Pass to the PassManager.  When the
+/// PassManager_X is destroyed, the pass will be destroyed as well, so
+/// there is no need to delete the pass. (TODO delete passes.)
+/// This implies that all passes MUST be allocated with 'new'.
+void FunctionPassManager::add(Pass *P) {
+  // If this is a not a function pass, don't add a printer for it.
+  const void *PassID = P->getPassID();
+  if (P->getPassKind() == PT_Function)
+    if (ShouldPrintBeforePass(PassID))
+      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                                   + P->getPassName() + " ***"));
+
+  addImpl(P);
+
+  if (P->getPassKind() == PT_Function)
+    if (ShouldPrintAfterPass(PassID))
+      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                                   + P->getPassName() + " ***"));
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep
+/// track of whether any of the passes modifies the function, and if
+/// so, return true.
+///
+bool FunctionPassManager::run(Function &F) {
+  if (F.isMaterializable()) {
+    std::string errstr;
+    if (F.Materialize(&errstr))
+      report_fatal_error("Error reading bitcode file: " + Twine(errstr));
+  }
+  return FPM->run(F);
+}
+
+
+/// doInitialization - Run all of the initializers for the function passes.
+///
+bool FunctionPassManager::doInitialization() {
+  return FPM->doInitialization(*M);
+}
+
+/// doFinalization - Run all of the finalizers for the function passes.
+///
+bool FunctionPassManager::doFinalization() {
+  return FPM->doFinalization(*M);
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl implementation
+//
+bool FunctionPassManagerImpl::doInitialization(Module &M) {
+  bool Changed = false;
+
+  dumpArguments();
+  dumpPasses();
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool FunctionPassManagerImpl::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->doFinalization(M);
+
+  return Changed;
+}
+
+/// cleanup - After running all passes, clean up pass manager cache.
+void FPPassManager::cleanup() {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    AnalysisResolver *AR = FP->getResolver();
+    assert(AR && "Analysis Resolver is not set");
+    AR->clearAnalysisImpls();
+ }
+}
+
+void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
+  if (!wasRun)
+    return;
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
+    FPPassManager *FPPM = getContainedManager(Index);
+    for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
+      FPPM->getContainedPass(Index)->releaseMemory();
+    }
+  }
+  wasRun = false;
+}
+
+// Execute all the passes managed by this top level manager.
+// Return true if any function is modified by a pass.
+bool FunctionPassManagerImpl::run(Function &F) {
+  bool Changed = false;
+  TimingInfo::createTheTimeInfo();
+  createDebugInfoProbe();
+
+  initializeAllAnalysisInfo();
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->runOnFunction(F);
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    getContainedManager(Index)->cleanup();
+
+  wasRun = true;
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// FPPassManager implementation
+
+char FPPassManager::ID = 0;
+/// Print passes managed by this manager
+void FPPassManager::dumpPassStructure(unsigned Offset) {
+  llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    FP->dumpPassStructure(Offset + 1);
+    dumpLastUses(FP, Offset+1);
+  }
+}
+
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method.  Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool FPPassManager::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    bool LocalChanged = false;
+
+    dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
+    dumpRequiredSet(FP);
+
+    initializeAnalysisImpl(FP);
+    if (TheDebugProbe)
+      TheDebugProbe->initialize(FP, F);
+    {
+      PassManagerPrettyStackEntry X(FP, F);
+      TimeRegion PassTimer(getPassTimer(FP));
+
+      LocalChanged |= FP->runOnFunction(F);
+    }
+    if (TheDebugProbe)
+      TheDebugProbe->finalize(FP, F);
+
+    Changed |= LocalChanged;
+    if (LocalChanged)
+      dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
+    dumpPreservedSet(FP);
+
+    verifyPreservedAnalysis(FP);
+    removeNotPreservedAnalysis(FP);
+    recordAvailableAnalysis(FP);
+    removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
+  }
+  return Changed;
+}
+
+bool FPPassManager::runOnModule(Module &M) {
+  bool Changed = doInitialization(M);
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    runOnFunction(*I);
+
+  return doFinalization(M) || Changed;
+}
+
+bool FPPassManager::doInitialization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool FPPassManager::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MPPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method.  Keep track of whether any of the passes modifies
+/// the module, and if so, return true.
+bool
+MPPassManager::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Initialize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    Changed |= FPP->doInitialization(M);
+  }
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    ModulePass *MP = getContainedPass(Index);
+    bool LocalChanged = false;
+
+    dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
+    dumpRequiredSet(MP);
+
+    initializeAnalysisImpl(MP);
+
+    {
+      PassManagerPrettyStackEntry X(MP, M);
+      TimeRegion PassTimer(getPassTimer(MP));
+
+      LocalChanged |= MP->runOnModule(M);
+    }
+
+    Changed |= LocalChanged;
+    if (LocalChanged)
+      dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
+                   M.getModuleIdentifier());
+    dumpPreservedSet(MP);
+
+    verifyPreservedAnalysis(MP);
+    removeNotPreservedAnalysis(MP);
+    recordAvailableAnalysis(MP);
+    removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
+  }
+
+  // Finalize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    // We don't know when is the last time an on-the-fly pass is run,
+    // so we need to releaseMemory / finalize here
+    FPP->releaseMemoryOnTheFly();
+    Changed |= FPP->doFinalization(M);
+  }
+  return Changed;
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+  assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
+         "Unable to handle Pass that requires lower level Analysis pass");
+  assert((P->getPotentialPassManagerType() <
+          RequiredPass->getPotentialPassManagerType()) &&
+         "Unable to handle Pass that requires lower level Analysis pass");
+
+  FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
+  if (!FPP) {
+    FPP = new FunctionPassManagerImpl(0);
+    // FPP is the top level manager.
+    FPP->setTopLevelManager(FPP);
+
+    OnTheFlyManagers[P] = FPP;
+  }
+  FPP->add(RequiredPass);
+
+  // Register P as the last user of RequiredPass.
+  SmallVector<Pass *, 1> LU;
+  LU.push_back(RequiredPass);
+  FPP->setLastUser(LU,  P);
+}
+
+/// Return function pass corresponding to PassInfo PI, that is
+/// required by module pass MP. Instantiate analysis pass, by using
+/// its runOnFunction() for function F.
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
+  FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
+  assert(FPP && "Unable to find on the fly pass");
+
+  FPP->releaseMemoryOnTheFly();
+  FPP->run(F);
+  return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassManagerImpl implementation
+//
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManagerImpl::run(Module &M) {
+  bool Changed = false;
+  TimingInfo::createTheTimeInfo();
+  createDebugInfoProbe();
+
+  dumpArguments();
+  dumpPasses();
+
+  initializeAllAnalysisInfo();
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->runOnModule(M);
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager implementation
+
+/// Create new pass manager
+PassManager::PassManager() {
+  PM = new PassManagerImpl(0);
+  // PM is the top level manager
+  PM->setTopLevelManager(PM);
+}
+
+PassManager::~PassManager() {
+  delete PM;
+}
+
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void PassManager::addImpl(Pass *P) {
+  PM->add(P);
+}
+
+/// add - Add a pass to the queue of passes to run.  This passes ownership of
+/// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+/// will be destroyed as well, so there is no need to delete the pass.  This
+/// implies that all passes MUST be allocated with 'new'.
+void PassManager::add(Pass *P) {
+  const void* PassID = P->getPassID();
+  if (ShouldPrintBeforePass(PassID))
+    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                                 + P->getPassName() + " ***"));
+
+  addImpl(P);
+
+  if (ShouldPrintAfterPass(PassID))
+    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                                 + P->getPassName() + " ***"));
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManager::run(Module &M) {
+  return PM->run(M);
+}
+
+//===----------------------------------------------------------------------===//
+// TimingInfo Class - This class is used to calculate information about the
+// amount of time each pass takes to execute.  This only happens with
+// -time-passes is enabled on the command line.
+//
+bool llvm::TimePassesIsEnabled = false;
+static cl::opt<bool,true>
+EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
+            cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
+// a non null value (if the -time-passes option is enabled) or it leaves it
+// null.  It may be called multiple times.
+void TimingInfo::createTheTimeInfo() {
+  if (!TimePassesIsEnabled || TheTimeInfo) return;
+
+  // Constructed the first time this is called, iff -time-passes is enabled.
+  // This guarantees that the object will be constructed before static globals,
+  // thus it will be destroyed before them.
+  static ManagedStatic<TimingInfo> TTI;
+  TheTimeInfo = &*TTI;
+}
+
+/// If TimingInfo is enabled then start pass timer.
+Timer *llvm::getPassTimer(Pass *P) {
+  if (TheTimeInfo)
+    return TheTimeInfo->getPassTimer(P);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PMStack implementation
+//
+
+// Pop Pass Manager from the stack and clear its analysis info.
+void PMStack::pop() {
+
+  PMDataManager *Top = this->top();
+  Top->initializeAnalysisInfo();
+
+  S.pop_back();
+}
+
+// Push PM on the stack and set its top level manager.
+void PMStack::push(PMDataManager *PM) {
+  assert(PM && "Unable to push. Pass Manager expected");
+
+  if (!this->empty()) {
+    PMTopLevelManager *TPM = this->top()->getTopLevelManager();
+
+    assert(TPM && "Unable to find top level manager");
+    TPM->addIndirectPassManager(PM);
+    PM->setTopLevelManager(TPM);
+  }
+
+  S.push_back(PM);
+}
+
+// Dump content of the pass manager stack.
+void PMStack::dump() const {
+  for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
+         E = S.end(); I != E; ++I)
+    printf("%s ", (*I)->getAsPass()->getPassName());
+
+  if (!S.empty())
+    printf("\n");
+}
+
+/// Find appropriate Module Pass Manager in the PM Stack and
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
+                                   PassManagerType PreferredType) {
+  // Find Module Pass Manager
+  while (!PMS.empty()) {
+    PassManagerType TopPMType = PMS.top()->getPassManagerType();
+    if (TopPMType == PreferredType)
+      break; // We found desired pass manager
+    else if (TopPMType > PMT_ModulePassManager)
+      PMS.pop();    // Pop children pass managers
+    else
+      break;
+  }
+  assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+  PMS.top()->add(this);
+}
+
+/// Find appropriate Function Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void FunctionPass::assignPassManager(PMStack &PMS,
+                                     PassManagerType PreferredType) {
+
+  // Find Module Pass Manager
+  while (!PMS.empty()) {
+    if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
+      PMS.pop();
+    else
+      break;
+  }
+
+  // Create new Function Pass Manager if needed.
+  FPPassManager *FPP;
+  if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
+    FPP = (FPPassManager *)PMS.top();
+  } else {
+    assert(!PMS.empty() && "Unable to create Function Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Function Pass Manager
+    FPP = new FPPassManager(PMD->getDepth() + 1);
+    FPP->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(FPP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    FPP->assignPassManager(PMS, PMD->getPassManagerType());
+
+    // [4] Push new manager into PMS
+    PMS.push(FPP);
+  }
+
+  // Assign FPP as the manager of this pass.
+  FPP->add(this);
+}
+
+/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void BasicBlockPass::assignPassManager(PMStack &PMS,
+                                       PassManagerType PreferredType) {
+  BBPassManager *BBP;
+
+  // Basic Pass Manager is a leaf pass manager. It does not handle
+  // any other pass manager.
+  if (!PMS.empty() &&
+      PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
+    BBP = (BBPassManager *)PMS.top();
+  } else {
+    // If leaf manager is not Basic Block Pass manager then create new
+    // basic Block Pass manager.
+    assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Basic Block Manager
+    BBP = new BBPassManager(PMD->getDepth() + 1);
+
+    // [2] Set up new manager's top level manager
+    // Basic Block Pass Manager does not live by itself
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(BBP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    BBP->assignPassManager(PMS, PreferredType);
+
+    // [4] Push new manager into PMS
+    PMS.push(BBP);
+  }
+
+  // Assign BBP as the manager of this pass.
+  BBP->add(this);
+}
+
+PassManagerBase::~PassManagerBase() {}
diff --git a/contrib/llvm/lib/VMCore/PassRegistry.cpp b/contrib/llvm/lib/VMCore/PassRegistry.cpp
new file mode 100644
index 000000000000..fa92620b288e
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/PassRegistry.cpp
@@ -0,0 +1,208 @@
+//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PassRegistry, with which passes are registered on
+// initialization, and supports the PassManager in dependency resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+
+using namespace llvm;
+
+// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
+// Unfortunately, passes are registered with static ctors, and having
+// llvm_shutdown clear this map prevents successful resurrection after
+// llvm_shutdown is run.  Ideally we should find a solution so that we don't
+// leak the map, AND can still resurrect after shutdown.
+static ManagedStatic<PassRegistry> PassRegistryObj;
+PassRegistry *PassRegistry::getPassRegistry() {
+  return &*PassRegistryObj;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > Lock;
+
+//===----------------------------------------------------------------------===//
+// PassRegistryImpl
+//
+
+namespace {
+struct PassRegistryImpl {
+  /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+  typedef DenseMap<const void*, const PassInfo*> MapType;
+  MapType PassInfoMap;
+  
+  typedef StringMap<const PassInfo*> StringMapType;
+  StringMapType PassInfoStringMap;
+  
+  /// AnalysisGroupInfo - Keep track of information for each analysis group.
+  struct AnalysisGroupInfo {
+    SmallPtrSet<const PassInfo *, 8> Implementations;
+  };
+  DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+  
+  std::vector<const PassInfo*> ToFree;
+  std::vector<PassRegistrationListener*> Listeners;
+};
+} // end anonymous namespace
+
+void *PassRegistry::getImpl() const {
+  if (!pImpl)
+    pImpl = new PassRegistryImpl();
+  return pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// Accessors
+//
+
+PassRegistry::~PassRegistry() {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
+  
+  for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
+       E = Impl->ToFree.end(); I != E; ++I)
+    delete *I;
+  
+  delete Impl;
+  pImpl = 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
+  return I != Impl->PassInfoMap.end() ? I->second : 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::StringMapType::const_iterator
+    I = Impl->PassInfoStringMap.find(Arg);
+  return I != Impl->PassInfoStringMap.end() ? I->second : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+
+void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  bool Inserted =
+    Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+  assert(Inserted && "Pass registered multiple times!");
+  (void)Inserted;
+  Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
+  
+  // Notify any listeners.
+  for (std::vector<PassRegistrationListener*>::iterator
+       I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
+    (*I)->passRegistered(&PI);
+  
+  if (ShouldFree) Impl->ToFree.push_back(&PI);
+}
+
+void PassRegistry::unregisterPass(const PassInfo &PI) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::iterator I = 
+    Impl->PassInfoMap.find(PI.getTypeInfo());
+  assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
+  
+  // Remove pass from the map.
+  Impl->PassInfoMap.erase(I);
+  Impl->PassInfoStringMap.erase(PI.getPassArgument());
+}
+
+void PassRegistry::enumerateWith(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
+       E = Impl->PassInfoMap.end(); I != E; ++I)
+    L->passEnumerate(I->second);
+}
+
+
+/// Analysis Group Mechanisms.
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID, 
+                                         const void *PassID,
+                                         PassInfo& Registeree,
+                                         bool isDefault,
+                                         bool ShouldFree) {
+  PassInfo *InterfaceInfo =  const_cast<PassInfo*>(getPassInfo(InterfaceID));
+  if (InterfaceInfo == 0) {
+    // First reference to Interface, register it now.
+    registerPass(Registeree);
+    InterfaceInfo = &Registeree;
+  }
+  assert(Registeree.isAnalysisGroup() && 
+         "Trying to join an analysis group that is a normal pass!");
+
+  if (PassID) {
+    PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+    assert(ImplementationInfo &&
+           "Must register pass before adding to AnalysisGroup!");
+
+    sys::SmartScopedLock<true> Guard(*Lock);
+    
+    // Make sure we keep track of the fact that the implementation implements
+    // the interface.
+    ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
+
+    PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+    PassRegistryImpl::AnalysisGroupInfo &AGI =
+      Impl->AnalysisGroupInfoMap[InterfaceInfo];
+    assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+           "Cannot add a pass to the same analysis group more than once!");
+    AGI.Implementations.insert(ImplementationInfo);
+    if (isDefault) {
+      assert(InterfaceInfo->getNormalCtor() == 0 &&
+             "Default implementation for analysis group already specified!");
+      assert(ImplementationInfo->getNormalCtor() &&
+           "Cannot specify pass as default if it does not have a default ctor");
+      InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+    }
+  }
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  if (ShouldFree) Impl->ToFree.push_back(&Registeree);
+}
+
+void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  Impl->Listeners.push_back(L);
+}
+
+void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  
+  // NOTE: This is necessary, because removeRegistrationListener() can be called
+  // as part of the llvm_shutdown sequence.  Since we have no control over the
+  // order of that sequence, we need to gracefully handle the case where the
+  // PassRegistry is destructed before the object that triggers this call.
+  if (!pImpl) return;
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  std::vector<PassRegistrationListener*>::iterator I =
+    std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
+  assert(I != Impl->Listeners.end() &&
+         "PassRegistrationListener not registered!");
+  Impl->Listeners.erase(I);
+}
diff --git a/contrib/llvm/lib/VMCore/PrintModulePass.cpp b/contrib/llvm/lib/VMCore/PrintModulePass.cpp
new file mode 100644
index 000000000000..1f1fbc91bc31
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/PrintModulePass.cpp
@@ -0,0 +1,101 @@
+//===--- VMCore/PrintModulePass.cpp - Module/Function Printer -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PrintModulePass and PrintFunctionPass implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/PrintModulePass.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+  class PrintModulePass : public ModulePass {
+    std::string Banner;
+    raw_ostream *Out;       // raw_ostream to print on
+    bool DeleteStream;      // Delete the ostream in our dtor?
+  public:
+    static char ID;
+    PrintModulePass() : ModulePass(ID), Out(&dbgs()), 
+      DeleteStream(false) {}
+    PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
+        : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+    
+    ~PrintModulePass() {
+      if (DeleteStream) delete Out;
+    }
+    
+    bool runOnModule(Module &M) {
+      (*Out) << Banner << M;
+      return false;
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+  
+  class PrintFunctionPass : public FunctionPass {
+    std::string Banner;     // String to print before each function
+    raw_ostream *Out;       // raw_ostream to print on
+    bool DeleteStream;      // Delete the ostream in our dtor?
+  public:
+    static char ID;
+    PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()), 
+                          DeleteStream(false) {}
+    PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
+      : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+    
+    ~PrintFunctionPass() {
+      if (DeleteStream) delete Out;
+    }
+    
+    // runOnFunction - This pass just prints a banner followed by the
+    // function as it's processed.
+    //
+    bool runOnFunction(Function &F) {
+      (*Out) << Banner << static_cast<Value&>(F);
+      return false;
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char PrintModulePass::ID = 0;
+INITIALIZE_PASS(PrintModulePass, "print-module",
+                "Print module to stderr", false, false)
+char PrintFunctionPass::ID = 0;
+INITIALIZE_PASS(PrintFunctionPass, "print-function",
+                "Print function to stderr", false, false)
+
+/// createPrintModulePass - Create and return a pass that writes the
+/// module to the specified raw_ostream.
+ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS, 
+                                        bool DeleteStream,
+                                        const std::string &Banner) {
+  return new PrintModulePass(Banner, OS, DeleteStream);
+}
+
+/// createPrintFunctionPass - Create and return a pass that prints
+/// functions to the specified raw_ostream as they are processed.
+FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner,
+                                            llvm::raw_ostream *OS, 
+                                            bool DeleteStream) {
+  return new PrintFunctionPass(Banner, OS, DeleteStream);
+}
+
diff --git a/contrib/llvm/lib/VMCore/SymbolTableListTraitsImpl.h b/contrib/llvm/lib/VMCore/SymbolTableListTraitsImpl.h
new file mode 100644
index 000000000000..72687bb5e0b2
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/SymbolTableListTraitsImpl.h
@@ -0,0 +1,118 @@
+//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stickier parts of the SymbolTableListTraits class,
+// and is explicitly instantiated where needed to avoid defining all this code
+// in a widely used header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+
+#include "llvm/SymbolTableListTraits.h"
+#include "llvm/ValueSymbolTable.h"
+
+namespace llvm {
+
+/// setSymTabObject - This is called when (f.e.) the parent of a basic block
+/// changes.  This requires us to remove all the instruction symtab entries from
+/// the current function and reinsert them into the new function.
+template<typename ValueSubClass, typename ItemParentClass>
+template<typename TPtr>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::setSymTabObject(TPtr *Dest, TPtr Src) {
+  // Get the old symtab and value list before doing the assignment.
+  ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+
+  // Do it.
+  *Dest = Src;
+  
+  // Get the new SymTab object.
+  ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+  
+  // If there is nothing to do, quick exit.
+  if (OldST == NewST) return;
+  
+  // Move all the elements from the old symtab to the new one.
+  iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+  if (ItemList.empty()) return;
+  
+  if (OldST) {
+    // Remove all entries from the previous symtab.
+    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+         I != ItemList.end(); ++I)
+      if (I->hasName())
+        OldST->removeValueName(I->getValueName());
+  }
+
+  if (NewST) {
+    // Add all of the items to the new symtab.
+    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+         I != ItemList.end(); ++I)
+      if (I->hasName())
+        NewST->reinsertValue(I);
+  }
+  
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::addNodeToList(ValueSubClass *V) {
+  assert(V->getParent() == 0 && "Value already in a container!!");
+  ItemParentClass *Owner = getListOwner();
+  V->setParent(Owner);
+  if (V->hasName())
+    if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+      ST->reinsertValue(V);
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::removeNodeFromList(ValueSubClass *V) {
+  V->setParent(0);
+  if (V->hasName())
+    if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+      ST->removeValueName(V->getValueName());
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+                        ilist_iterator<ValueSubClass> first,
+                        ilist_iterator<ValueSubClass> last) {
+  // We only have to do work here if transferring instructions between BBs
+  ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
+  if (NewIP == OldIP) return;  // No work to do at all...
+
+  // We only have to update symbol table entries if we are transferring the
+  // instructions to a different symtab object...
+  ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
+  ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+  if (NewST != OldST) {
+    for (; first != last; ++first) {
+      ValueSubClass &V = *first;
+      bool HasName = V.hasName();
+      if (OldST && HasName)
+        OldST->removeValueName(V.getValueName());
+      V.setParent(NewIP);
+      if (NewST && HasName)
+        NewST->reinsertValue(&V);
+    }
+  } else {
+    // Just transferring between blocks in the same function, simply update the
+    // parent fields in the instructions...
+    for (; first != last; ++first)
+      first->setParent(NewIP);
+  }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/VMCore/Type.cpp b/contrib/llvm/lib/VMCore/Type.cpp
new file mode 100644
index 000000000000..f874d1b28302
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Type.cpp
@@ -0,0 +1,687 @@
+//===-- Type.cpp - Implement the Type class -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Type class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Module.h"
+#include <algorithm>
+#include <cstdarg>
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                         Type Class Implementation
+//===----------------------------------------------------------------------===//
+
+Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
+  switch (IDNumber) {
+  case VoidTyID      : return getVoidTy(C);
+  case FloatTyID     : return getFloatTy(C);
+  case DoubleTyID    : return getDoubleTy(C);
+  case X86_FP80TyID  : return getX86_FP80Ty(C);
+  case FP128TyID     : return getFP128Ty(C);
+  case PPC_FP128TyID : return getPPC_FP128Ty(C);
+  case LabelTyID     : return getLabelTy(C);
+  case MetadataTyID  : return getMetadataTy(C);
+  case X86_MMXTyID   : return getX86_MMXTy(C);
+  default:
+    return 0;
+  }
+}
+
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+const Type *Type::getScalarType() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType();
+  return this;
+}
+
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+  return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+}
+
+/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
+/// integer types.
+///
+bool Type::isIntOrIntVectorTy() const {
+  if (isIntegerTy())
+    return true;
+  if (ID != Type::VectorTyID) return false;
+  
+  return cast<VectorType>(this)->getElementType()->isIntegerTy();
+}
+
+/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
+///
+bool Type::isFPOrFPVectorTy() const {
+  if (ID == Type::FloatTyID || ID == Type::DoubleTyID || 
+      ID == Type::FP128TyID || ID == Type::X86_FP80TyID || 
+      ID == Type::PPC_FP128TyID)
+    return true;
+  if (ID != Type::VectorTyID) return false;
+  
+  return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
+}
+
+// canLosslesslyBitCastTo - Return true if this type can be converted to
+// 'Ty' without any reinterpretation of bits.  For example, i8* to i32*.
+//
+bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
+  // Identity cast means no change so return true
+  if (this == Ty) 
+    return true;
+  
+  // They are not convertible unless they are at least first class types
+  if (!this->isFirstClassType() || !Ty->isFirstClassType())
+    return false;
+
+  // Vector -> Vector conversions are always lossless if the two vector types
+  // have the same size, otherwise not.  Also, 64-bit vector types can be
+  // converted to x86mmx.
+  if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
+    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+      return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+    if (Ty->getTypeID() == Type::X86_MMXTyID &&
+        thisPTy->getBitWidth() == 64)
+      return true;
+  }
+
+  if (this->getTypeID() == Type::X86_MMXTyID)
+    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+      if (thatPTy->getBitWidth() == 64)
+        return true;
+
+  // At this point we have only various mismatches of the first class types
+  // remaining and ptr->ptr. Just select the lossless conversions. Everything
+  // else is not lossless.
+  if (this->isPointerTy())
+    return Ty->isPointerTy();
+  return false;  // Other types have no identity values
+}
+
+bool Type::isEmptyTy() const {
+  const ArrayType *ATy = dyn_cast<ArrayType>(this);
+  if (ATy) {
+    unsigned NumElements = ATy->getNumElements();
+    return NumElements == 0 || ATy->getElementType()->isEmptyTy();
+  }
+
+  const StructType *STy = dyn_cast<StructType>(this);
+  if (STy) {
+    unsigned NumElements = STy->getNumElements();
+    for (unsigned i = 0; i < NumElements; ++i)
+      if (!STy->getElementType(i)->isEmptyTy())
+        return false;
+    return true;
+  }
+
+  return false;
+}
+
+unsigned Type::getPrimitiveSizeInBits() const {
+  switch (getTypeID()) {
+  case Type::FloatTyID: return 32;
+  case Type::DoubleTyID: return 64;
+  case Type::X86_FP80TyID: return 80;
+  case Type::FP128TyID: return 128;
+  case Type::PPC_FP128TyID: return 128;
+  case Type::X86_MMXTyID: return 64;
+  case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
+  case Type::VectorTyID:  return cast<VectorType>(this)->getBitWidth();
+  default: return 0;
+  }
+}
+
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() const {
+  return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type.  This
+/// is only valid on floating point types.  If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->getFPMantissaWidth();
+  assert(isFloatingPointTy() && "Not a floating point type!");
+  if (ID == FloatTyID) return 24;
+  if (ID == DoubleTyID) return 53;
+  if (ID == X86_FP80TyID) return 64;
+  if (ID == FP128TyID) return 113;
+  assert(ID == PPC_FP128TyID && "unknown fp type");
+  return -1;
+}
+
+/// isSizedDerivedType - Derived types like structures and arrays are sized
+/// iff all of the members of the type are sized as well.  Since asking for
+/// their size is relatively uncommon, move this operation out of line.
+bool Type::isSizedDerivedType() const {
+  if (this->isIntegerTy())
+    return true;
+
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+    return ATy->getElementType()->isSized();
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->isSized();
+
+  if (!this->isStructTy()) 
+    return false;
+
+  // Opaque structs have no size.
+  if (cast<StructType>(this)->isOpaque())
+    return false;
+  
+  // Okay, our struct is sized if all of the elements are.
+  for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
+    if (!(*I)->isSized()) 
+      return false;
+
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//                          Primitive 'Type' data
+//===----------------------------------------------------------------------===//
+
+Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
+Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
+Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
+Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
+Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
+Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
+Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
+
+IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
+IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
+IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
+IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
+IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
+
+IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+  return IntegerType::get(C, N);
+}
+
+PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+  return getFloatTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+  return getDoubleTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_FP80Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getFP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getPPC_FP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_MMXTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+  return getIntNTy(C, N)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt1Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt8Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt16Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt32Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt64Ty(C)->getPointerTo(AS);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                       IntegerType Implementation
+//===----------------------------------------------------------------------===//
+
+IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
+  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+  
+  // Check for the built-in integer types
+  switch (NumBits) {
+  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
+  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
+  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+  default: 
+    break;
+  }
+  
+  IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
+  
+  if (Entry == 0)
+    Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+  
+  return Entry;
+}
+
+bool IntegerType::isPowerOf2ByteWidth() const {
+  unsigned BitWidth = getBitWidth();
+  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+}
+
+APInt IntegerType::getMask() const {
+  return APInt::getAllOnesValue(getBitWidth());
+}
+
+//===----------------------------------------------------------------------===//
+//                       FunctionType Implementation
+//===----------------------------------------------------------------------===//
+
+FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params,
+                           bool IsVarArgs)
+  : Type(Result->getContext(), FunctionTyID) {
+  Type **SubTys = reinterpret_cast<Type**>(this+1);
+  assert(isValidReturnType(Result) && "invalid return type for function");
+  setSubclassData(IsVarArgs);
+
+  SubTys[0] = const_cast<Type*>(Result);
+
+  for (unsigned i = 0, e = Params.size(); i != e; ++i) {
+    assert(isValidArgumentType(Params[i]) &&
+           "Not a valid type for function argument!");
+    SubTys[i+1] = Params[i];
+  }
+
+  ContainedTys = SubTys;
+  NumContainedTys = Params.size() + 1; // + 1 for result type
+}
+
+// FunctionType::get - The factory function for the FunctionType class.
+FunctionType *FunctionType::get(const Type *ReturnType,
+                                ArrayRef<Type*> Params, bool isVarArg) {
+  // TODO: This is brutally slow.
+  std::vector<Type*> Key;
+  Key.reserve(Params.size()+2);
+  Key.push_back(const_cast<Type*>(ReturnType));
+  for (unsigned i = 0, e = Params.size(); i != e; ++i)
+    Key.push_back(const_cast<Type*>(Params[i]));
+  if (isVarArg)
+    Key.push_back(0);
+  
+  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+  FunctionType *&FT = pImpl->FunctionTypes[Key];
+  
+  if (FT == 0) {
+    FT = (FunctionType*) pImpl->TypeAllocator.
+      Allocate(sizeof(FunctionType) + sizeof(Type*)*(Params.size()+1),
+               AlignOf<FunctionType>::Alignment);
+    new (FT) FunctionType(ReturnType, Params, isVarArg);
+  }
+
+  return FT;
+}
+
+
+FunctionType *FunctionType::get(const Type *Result, bool isVarArg) {
+  return get(Result, ArrayRef<Type *>(), isVarArg);
+}
+
+
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(const Type *RetTy) {
+  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+  !RetTy->isMetadataTy();
+}
+
+/// isValidArgumentType - Return true if the specified type is valid as an
+/// argument type.
+bool FunctionType::isValidArgumentType(const Type *ArgTy) {
+  return ArgTy->isFirstClassType();
+}
+
+//===----------------------------------------------------------------------===//
+//                       StructType Implementation
+//===----------------------------------------------------------------------===//
+
+// Primitive Constructors.
+
+StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, 
+                            bool isPacked) {
+  // FIXME: std::vector is horribly inefficient for this probe.
+  std::vector<Type*> Key;
+  for (unsigned i = 0, e = ETypes.size(); i != e; ++i) {
+    assert(isValidElementType(ETypes[i]) &&
+           "Invalid type for structure element!");
+    Key.push_back(ETypes[i]);
+  }
+  if (isPacked)
+    Key.push_back(0);
+  
+  StructType *&ST = Context.pImpl->AnonStructTypes[Key];
+  if (ST) return ST;
+  
+  // Value not found.  Create a new type!
+  ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  ST->setSubclassData(SCDB_IsAnonymous);  // Anonymous struct.
+  ST->setBody(ETypes, isPacked);
+  return ST;
+}
+
+void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
+  assert(isOpaque() && "Struct body already set!");
+  
+  setSubclassData(getSubclassData() | SCDB_HasBody);
+  if (isPacked)
+    setSubclassData(getSubclassData() | SCDB_Packed);
+  
+  Type **Elts = getContext().pImpl->
+    TypeAllocator.Allocate<Type*>(Elements.size());
+  memcpy(Elts, Elements.data(), sizeof(Elements[0])*Elements.size());
+  
+  ContainedTys = Elts;
+  NumContainedTys = Elements.size();
+}
+
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name) {
+  StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  if (!Name.empty())
+    ST->setName(Name);
+  return ST;
+}
+
+void StructType::setName(StringRef Name) {
+  if (Name == getName()) return;
+
+  // If this struct already had a name, remove its symbol table entry.
+  if (SymbolTableEntry) {
+    getContext().pImpl->NamedStructTypes.erase(getName());
+    SymbolTableEntry = 0;
+  }
+  
+  // If this is just removing the name, we're done.
+  if (Name.empty())
+    return;
+  
+  // Look up the entry for the name.
+  StringMapEntry<StructType*> *Entry =
+    &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+  
+  // While we have a name collision, try a random rename.
+  if (Entry->getValue()) {
+    SmallString<64> TempStr(Name);
+    TempStr.push_back('.');
+    raw_svector_ostream TmpStream(TempStr);
+   
+    do {
+      TempStr.resize(Name.size()+1);
+      TmpStream.resync();
+      TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
+      
+      Entry = &getContext().pImpl->
+                 NamedStructTypes.GetOrCreateValue(TmpStream.str());
+    } while (Entry->getValue());
+  }
+
+  // Okay, we found an entry that isn't used.  It's us!
+  Entry->setValue(this);
+    
+  SymbolTableEntry = Entry;
+}
+
+//===----------------------------------------------------------------------===//
+// StructType Helper functions.
+
+StructType *StructType::get(LLVMContext &Context, bool isPacked) {
+  return get(Context, llvm::ArrayRef<Type*>(), isPacked);
+}
+
+StructType *StructType::get(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  return llvm::StructType::get(Ctx, StructFields);
+}
+
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name,
+                                    ArrayRef<Type*> Elements, bool isPacked) {
+  StructType *ST = createNamed(Context, Name);
+  ST->setBody(Elements, isPacked);
+  return ST;
+}
+
+StructType *StructType::createNamed(StringRef Name, ArrayRef<Type*> Elements,
+                                    bool isPacked) {
+  assert(!Elements.empty() &&
+         "This method may not be invoked with an empty list");
+  return createNamed(Elements[0]->getContext(), Name, Elements, isPacked);
+}
+
+StructType *StructType::createNamed(StringRef Name, Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  return llvm::StructType::createNamed(Ctx, Name, StructFields);
+}
+
+StringRef StructType::getName() const {
+  assert(!isAnonymous() && "Anonymous structs never have names");
+  if (SymbolTableEntry == 0) return StringRef();
+  
+  return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
+}
+
+void StructType::setBody(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  setBody(StructFields);
+}
+
+bool StructType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+/// isLayoutIdentical - Return true if this is layout identical to the
+/// specified struct.
+bool StructType::isLayoutIdentical(const StructType *Other) const {
+  if (this == Other) return true;
+  
+  if (isPacked() != Other->isPacked() ||
+      getNumElements() != Other->getNumElements())
+    return false;
+  
+  return std::equal(element_begin(), element_end(), Other->element_begin());
+}
+
+
+/// getTypeByName - Return the type with the specified name, or null if there
+/// is none by that name.
+StructType *Module::getTypeByName(StringRef Name) const {
+  StringMap<StructType*>::iterator I =
+    getContext().pImpl->NamedStructTypes.find(Name);
+  if (I != getContext().pImpl->NamedStructTypes.end())
+    return I->second;
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                       CompositeType Implementation
+//===----------------------------------------------------------------------===//
+
+Type *CompositeType::getTypeAtIndex(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+Type *CompositeType::getTypeAtIndex(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+bool CompositeType::indexValid(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    // Structure indexes require 32-bit integer constants.
+    if (V->getType()->isIntegerTy(32))
+      if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+        return CU->getZExtValue() < STy->getNumElements();
+    return false;
+  }
+  
+  // Sequential types can be indexed by any integer.
+  return V->getType()->isIntegerTy();
+}
+
+bool CompositeType::indexValid(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this))
+    return Idx < STy->getNumElements();
+  // Sequential types can be indexed by any integer.
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                           ArrayType Implementation
+//===----------------------------------------------------------------------===//
+
+ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
+  : SequentialType(ArrayTyID, ElType) {
+  NumElements = NumEl;
+}
+
+
+ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(isValidElementType(ElementType) && "Invalid type for array element!");
+    
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  ArrayType *&Entry = 
+    pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
+  
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+  return Entry;
+}
+
+bool ArrayType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+//===----------------------------------------------------------------------===//
+//                          VectorType Implementation
+//===----------------------------------------------------------------------===//
+
+VectorType::VectorType(Type *ElType, unsigned NumEl)
+  : SequentialType(VectorTyID, ElType) {
+  NumElements = NumEl;
+}
+
+VectorType *VectorType::get(const Type *elementType, unsigned NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+  assert(isValidElementType(ElementType) &&
+         "Elements of a VectorType must be a primitive type");
+  
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  VectorType *&Entry = ElementType->getContext().pImpl
+    ->VectorTypes[std::make_pair(ElementType, NumElements)];
+  
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+  return Entry;
+}
+
+bool VectorType::isValidElementType(const Type *ElemTy) {
+  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
+}
+
+//===----------------------------------------------------------------------===//
+//                         PointerType Implementation
+//===----------------------------------------------------------------------===//
+
+PointerType *PointerType::get(const Type *eltTy, unsigned AddressSpace) {
+  Type *EltTy = const_cast<Type*>(eltTy);
+  assert(EltTy && "Can't get a pointer to <null> type!");
+  assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
+  
+  LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
+  
+  // Since AddressSpace #0 is the common case, we special case it.
+  PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
+     : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
+
+  if (Entry == 0)
+    Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+  return Entry;
+}
+
+
+PointerType::PointerType(Type *E, unsigned AddrSpace)
+  : SequentialType(PointerTyID, E) {
+  setSubclassData(AddrSpace);
+}
+
+PointerType *Type::getPointerTo(unsigned addrs) const {
+  return PointerType::get(this, addrs);
+}
+
+bool PointerType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy();
+}
diff --git a/contrib/llvm/lib/VMCore/Use.cpp b/contrib/llvm/lib/VMCore/Use.cpp
new file mode 100644
index 000000000000..359a1517ab79
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Use.cpp
@@ -0,0 +1,144 @@
+//===-- Use.cpp - Implement the Use class ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the algorithm for finding the User of a Use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Value.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                         Use swap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::swap(Use &RHS) {
+  Value *V1(Val);
+  Value *V2(RHS.Val);
+  if (V1 != V2) {
+    if (V1) {
+      removeFromList();
+    }
+
+    if (V2) {
+      RHS.removeFromList();
+      Val = V2;
+      V2->addUse(*this);
+    } else {
+      Val = 0;
+    }
+
+    if (V1) {
+      RHS.Val = V1;
+      V1->addUse(RHS);
+    } else {
+      RHS.Val = 0;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use getImpliedUser Implementation
+//===----------------------------------------------------------------------===//
+
+const Use *Use::getImpliedUser() const {
+  const Use *Current = this;
+
+  while (true) {
+    unsigned Tag = (Current++)->Prev.getInt();
+    switch (Tag) {
+      case zeroDigitTag:
+      case oneDigitTag:
+        continue;
+
+      case stopTag: {
+        ++Current;
+        ptrdiff_t Offset = 1;
+        while (true) {
+          unsigned Tag = Current->Prev.getInt();
+          switch (Tag) {
+            case zeroDigitTag:
+            case oneDigitTag:
+              ++Current;
+              Offset = (Offset << 1) + Tag;
+              continue;
+            default:
+              return Current + Offset;
+          }
+        }
+      }
+
+      case fullStopTag:
+        return Current;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use initTags Implementation
+//===----------------------------------------------------------------------===//
+
+Use *Use::initTags(Use * const Start, Use *Stop) {
+  ptrdiff_t Done = 0;
+  while (Done < 20) {
+    if (Start == Stop--)
+      return Start;
+    static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
+                                         oneDigitTag, oneDigitTag, stopTag,
+                                         zeroDigitTag, oneDigitTag, oneDigitTag,
+                                         stopTag, zeroDigitTag, oneDigitTag,
+                                         zeroDigitTag, oneDigitTag, stopTag,
+                                         oneDigitTag, oneDigitTag, oneDigitTag,
+                                         oneDigitTag, stopTag
+                                       };
+    new(Stop) Use(tags[Done++]);
+  }
+
+  ptrdiff_t Count = Done;
+  while (Start != Stop) {
+    --Stop;
+    if (!Count) {
+      new(Stop) Use(stopTag);
+      ++Done;
+      Count = Done;
+    } else {
+      new(Stop) Use(PrevPtrTag(Count & 1));
+      Count >>= 1;
+      ++Done;
+    }
+  }
+
+  return Start;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use zap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::zap(Use *Start, const Use *Stop, bool del) {
+  while (Start != Stop)
+    (--Stop)->~Use();
+  if (del)
+    ::operator delete(Start);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use getUser Implementation
+//===----------------------------------------------------------------------===//
+
+User *Use::getUser() const {
+  const Use *End = getImpliedUser();
+  const UserRef *ref = reinterpret_cast<const UserRef*>(End);
+  return ref->getInt()
+    ? ref->getPointer()
+    : (User*)End;
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/VMCore/User.cpp b/contrib/llvm/lib/VMCore/User.cpp
new file mode 100644
index 000000000000..f01fa349adfd
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/User.cpp
@@ -0,0 +1,79 @@
+//===-- User.cpp - Implement the User class -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constant.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                                 User Class
+//===----------------------------------------------------------------------===//
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+  if (From == To) return;   // Duh what?
+
+  assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+         "Cannot call User::replaceUsesOfWith on a constant!");
+
+  for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+    if (getOperand(i) == From) {  // Is This operand is pointing to oldval?
+      // The side effects of this setOperand call include linking to
+      // "To", adding "this" to the uses list of To, and
+      // most importantly, removing "this" from the use list of "From".
+      setOperand(i, To); // Fix it now...
+    }
+}
+
+//===----------------------------------------------------------------------===//
+//                         User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+  // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
+  // the User.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+  Use *Begin = static_cast<Use*>(::operator new(size));
+  Use *End = Begin + N;
+  (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
+  return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+  void *Storage = ::operator new(s + sizeof(Use) * Us);
+  Use *Start = static_cast<Use*>(Storage);
+  Use *End = Start + Us;
+  User *Obj = reinterpret_cast<User*>(End);
+  Obj->OperandList = Start;
+  Obj->NumOperands = Us;
+  Use::initTags(Start, End);
+  return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+  User *Start = static_cast<User*>(Usr);
+  Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+  // If there were hung-off uses, they will have been freed already and
+  // NumOperands reset to 0, so here we just free the User itself.
+  ::operator delete(Storage);
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/VMCore/Value.cpp b/contrib/llvm/lib/VMCore/Value.cpp
new file mode 100644
index 000000000000..f1815e377edc
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Value.cpp
@@ -0,0 +1,632 @@
+//===-- Value.cpp - Implement the Value class -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Value, ValueHandle, and User classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                                Value Class
+//===----------------------------------------------------------------------===//
+
+static inline Type *checkType(const Type *Ty) {
+  assert(Ty && "Value defined with a null type: Error!");
+  return const_cast<Type*>(Ty);
+}
+
+Value::Value(const Type *ty, unsigned scid)
+  : SubclassID(scid), HasValueHandle(0),
+    SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
+    UseList(0), Name(0) {
+  // FIXME: Why isn't this in the subclass gunk??
+  if (isa<CallInst>(this) || isa<InvokeInst>(this))
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
+           "invalid CallInst type!");
+  else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
+    assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
+           "Cannot create non-first-class values except for constants!");
+}
+
+Value::~Value() {
+  // Notify all ValueHandles (if present) that this value is going away.
+  if (HasValueHandle)
+    ValueHandleBase::ValueIsDeleted(this);
+
+#ifndef NDEBUG      // Only in -g mode...
+  // Check to make sure that there are no uses of this value that are still
+  // around when the value is destroyed.  If there are, then we have a dangling
+  // reference and something is wrong.  This code is here to print out what is
+  // still being referenced.  The value in question should be printed as
+  // a <badref>
+  //
+  if (!use_empty()) {
+    dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+    for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+      dbgs() << "Use still stuck around after Def is destroyed:"
+           << **I << "\n";
+  }
+#endif
+  assert(use_empty() && "Uses remain when a value is destroyed!");
+
+  // If this value is named, destroy the name.  This should not be in a symtab
+  // at this point.
+  if (Name)
+    Name->Destroy();
+
+  // There should be no uses of this object anymore, remove it.
+  LeakDetector::removeGarbageObject(this);
+}
+
+/// hasNUses - Return true if this Value has exactly N users.
+///
+bool Value::hasNUses(unsigned N) const {
+  const_use_iterator UI = use_begin(), E = use_end();
+
+  for (; N; --N, ++UI)
+    if (UI == E) return false;  // Too few.
+  return UI == E;
+}
+
+/// hasNUsesOrMore - Return true if this value has N users or more.  This is
+/// logically equivalent to getNumUses() >= N.
+///
+bool Value::hasNUsesOrMore(unsigned N) const {
+  const_use_iterator UI = use_begin(), E = use_end();
+
+  for (; N; --N, ++UI)
+    if (UI == E) return false;  // Too few.
+
+  return true;
+}
+
+/// isUsedInBasicBlock - Return true if this value is used in the specified
+/// basic block.
+bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+  for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    const Instruction *User = dyn_cast<Instruction>(*I);
+    if (User && User->getParent() == BB)
+      return true;
+  }
+  return false;
+}
+
+
+/// getNumUses - This method computes the number of uses of this Value.  This
+/// is a linear time operation.  Use hasOneUse or hasNUses to check for specific
+/// values.
+unsigned Value::getNumUses() const {
+  return (unsigned)std::distance(use_begin(), use_end());
+}
+
+static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
+  ST = 0;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    if (BasicBlock *P = I->getParent())
+      if (Function *PP = P->getParent())
+        ST = &PP->getValueSymbolTable();
+  } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+    if (Function *P = BB->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    if (Module *P = GV->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (Argument *A = dyn_cast<Argument>(V)) {
+    if (Function *P = A->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (isa<MDString>(V))
+    return true;
+  else {
+    assert(isa<Constant>(V) && "Unknown value type!");
+    return true;  // no name is setable for this.
+  }
+  return false;
+}
+
+StringRef Value::getName() const {
+  // Make sure the empty string is still a C string. For historical reasons,
+  // some clients want to call .data() on the result and expect it to be null
+  // terminated.
+  if (!Name) return StringRef("", 0);
+  return Name->getKey();
+}
+
+std::string Value::getNameStr() const {
+  return getName().str();
+}
+
+void Value::setName(const Twine &NewName) {
+  // Fast path for common IRBuilder case of setName("") when there is no name.
+  if (NewName.isTriviallyEmpty() && !hasName())
+    return;
+
+  SmallString<256> NameData;
+  StringRef NameRef = NewName.toStringRef(NameData);
+
+  // Name isn't changing?
+  if (getName() == NameRef)
+    return;
+
+  assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
+
+  // Get the symbol table to update for this object.
+  ValueSymbolTable *ST;
+  if (getSymTab(this, ST))
+    return;  // Cannot set a name on this value (e.g. constant).
+
+  if (!ST) { // No symbol table to update?  Just do the change.
+    if (NameRef.empty()) {
+      // Free the name for this value.
+      Name->Destroy();
+      Name = 0;
+      return;
+    }
+
+    if (Name)
+      Name->Destroy();
+
+    // NOTE: Could optimize for the case the name is shrinking to not deallocate
+    // then reallocated.
+
+    // Create the new name.
+    Name = ValueName::Create(NameRef.begin(), NameRef.end());
+    Name->setValue(this);
+    return;
+  }
+
+  // NOTE: Could optimize for the case the name is shrinking to not deallocate
+  // then reallocated.
+  if (hasName()) {
+    // Remove old name.
+    ST->removeValueName(Name);
+    Name->Destroy();
+    Name = 0;
+
+    if (NameRef.empty())
+      return;
+  }
+
+  // Name is changing to something new.
+  Name = ST->createValueName(NameRef, this);
+}
+
+
+/// takeName - transfer the name from V to this value, setting V's name to
+/// empty.  It is an error to call V->takeName(V).
+void Value::takeName(Value *V) {
+  ValueSymbolTable *ST = 0;
+  // If this value has a name, drop it.
+  if (hasName()) {
+    // Get the symtab this is in.
+    if (getSymTab(this, ST)) {
+      // We can't set a name on this value, but we need to clear V's name if
+      // it has one.
+      if (V->hasName()) V->setName("");
+      return;  // Cannot set a name on this value (e.g. constant).
+    }
+
+    // Remove old name.
+    if (ST)
+      ST->removeValueName(Name);
+    Name->Destroy();
+    Name = 0;
+  }
+
+  // Now we know that this has no name.
+
+  // If V has no name either, we're done.
+  if (!V->hasName()) return;
+
+  // Get this's symtab if we didn't before.
+  if (!ST) {
+    if (getSymTab(this, ST)) {
+      // Clear V's name.
+      V->setName("");
+      return;  // Cannot set a name on this value (e.g. constant).
+    }
+  }
+
+  // Get V's ST, this should always succed, because V has a name.
+  ValueSymbolTable *VST;
+  bool Failure = getSymTab(V, VST);
+  assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
+
+  // If these values are both in the same symtab, we can do this very fast.
+  // This works even if both values have no symtab yet.
+  if (ST == VST) {
+    // Take the name!
+    Name = V->Name;
+    V->Name = 0;
+    Name->setValue(this);
+    return;
+  }
+
+  // Otherwise, things are slightly more complex.  Remove V's name from VST and
+  // then reinsert it into ST.
+
+  if (VST)
+    VST->removeValueName(V->Name);
+  Name = V->Name;
+  V->Name = 0;
+  Name->setValue(this);
+
+  if (ST)
+    ST->reinsertValue(this);
+}
+
+
+void Value::replaceAllUsesWith(Value *New) {
+  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+  assert(New->getType() == getType() &&
+         "replaceAllUses of value with new value of different type!");
+
+  // Notify all ValueHandles (if present) that this value is going away.
+  if (HasValueHandle)
+    ValueHandleBase::ValueIsRAUWd(this, New);
+  
+  while (!use_empty()) {
+    Use &U = *UseList;
+    // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+    // constant because they are uniqued.
+    if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+      if (!isa<GlobalValue>(C)) {
+        C->replaceUsesOfWithOnConstant(this, New, &U);
+        continue;
+      }
+    }
+    
+    U.set(New);
+  }
+  
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
+    BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
+}
+
+Value *Value::stripPointerCasts() {
+  if (!getType()->isPointerTy())
+    return this;
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+
+  Value *V = this;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->hasAllZeroIndices())
+        return V;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V));
+
+  return V;
+}
+
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+  // Note that it is not safe to speculate into a malloc'd region because
+  // malloc may return null.
+  // It's also not always safe to follow a bitcast, for example:
+  //   bitcast i8* (alloca i8) to i32*
+  // would result in a 4-byte load from a 1-byte alloca. Some cases could
+  // be handled using TargetData to check sizes and alignments though.
+
+  // These are obviously ok.
+  if (isa<AllocaInst>(this)) return true;
+
+  // Global variables which can't collapse to null are ok.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+    return !GV->hasExternalWeakLinkage();
+
+  // byval arguments are ok.
+  if (const Argument *A = dyn_cast<Argument>(this))
+    return A->hasByValAttr();
+  
+  // For GEPs, determine if the indexing lands within the allocated object.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(this)) {
+    // Conservatively require that the base pointer be fully dereferenceable.
+    if (!GEP->getOperand(0)->isDereferenceablePointer())
+      return false;
+    // Check the indices.
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::const_op_iterator I = GEP->op_begin()+1,
+         E = GEP->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      const Type *Ty = *GTI++;
+      // Struct indices can't be out of bounds.
+      if (isa<StructType>(Ty))
+        continue;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+      if (!CI)
+        return false;
+      // Zero is always ok.
+      if (CI->isZero())
+        continue;
+      // Check to see that it's within the bounds of an array.
+      const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+      if (!ATy)
+        return false;
+      if (CI->getValue().getActiveBits() > 64)
+        return false;
+      if (CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+    }
+    // Indices check out; this is dereferenceable.
+    return true;
+  }
+
+  // If we don't know, assume the worst.
+  return false;
+}
+
+/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+/// return the value in the PHI node corresponding to PredBB.  If not, return
+/// ourself.  This is useful if you want to know the value something has in a
+/// predecessor block.
+Value *Value::DoPHITranslation(const BasicBlock *CurBB,
+                               const BasicBlock *PredBB) {
+  PHINode *PN = dyn_cast<PHINode>(this);
+  if (PN && PN->getParent() == CurBB)
+    return PN->getIncomingValueForBlock(PredBB);
+  return this;
+}
+
+LLVMContext &Value::getContext() const { return VTy->getContext(); }
+
+//===----------------------------------------------------------------------===//
+//                             ValueHandleBase Class
+//===----------------------------------------------------------------------===//
+
+/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+/// List is known to point into the existing use list.
+void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
+  assert(List && "Handle list is null?");
+
+  // Splice ourselves into the list.
+  Next = *List;
+  *List = this;
+  setPrevPtr(List);
+  if (Next) {
+    Next->setPrevPtr(&Next);
+    assert(VP == Next->VP && "Added to wrong list?");
+  }
+}
+
+void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
+  assert(List && "Must insert after existing node");
+
+  Next = List->Next;
+  setPrevPtr(&List->Next);
+  List->Next = this;
+  if (Next)
+    Next->setPrevPtr(&Next);
+}
+
+/// AddToUseList - Add this ValueHandle to the use list for VP.
+void ValueHandleBase::AddToUseList() {
+  assert(VP && "Null pointer doesn't have a use list!");
+
+  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+
+  if (VP->HasValueHandle) {
+    // If this value already has a ValueHandle, then it must be in the
+    // ValueHandles map already.
+    ValueHandleBase *&Entry = pImpl->ValueHandles[VP];
+    assert(Entry != 0 && "Value doesn't have any handles?");
+    AddToExistingUseList(&Entry);
+    return;
+  }
+
+  // Ok, it doesn't have any handles yet, so we must insert it into the
+  // DenseMap.  However, doing this insertion could cause the DenseMap to
+  // reallocate itself, which would invalidate all of the PrevP pointers that
+  // point into the old table.  Handle this by checking for reallocation and
+  // updating the stale pointers only if needed.
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+  const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
+
+  ValueHandleBase *&Entry = Handles[VP];
+  assert(Entry == 0 && "Value really did already have handles?");
+  AddToExistingUseList(&Entry);
+  VP->HasValueHandle = true;
+
+  // If reallocation didn't happen or if this was the first insertion, don't
+  // walk the table.
+  if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
+      Handles.size() == 1) {
+    return;
+  }
+
+  // Okay, reallocation did happen.  Fix the Prev Pointers.
+  for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
+       E = Handles.end(); I != E; ++I) {
+    assert(I->second && I->first == I->second->VP && "List invariant broken!");
+    I->second->setPrevPtr(&I->second);
+  }
+}
+
+/// RemoveFromUseList - Remove this ValueHandle from its current use list.
+void ValueHandleBase::RemoveFromUseList() {
+  assert(VP && VP->HasValueHandle && "Pointer doesn't have a use list!");
+
+  // Unlink this from its use list.
+  ValueHandleBase **PrevPtr = getPrevPtr();
+  assert(*PrevPtr == this && "List invariant broken");
+
+  *PrevPtr = Next;
+  if (Next) {
+    assert(Next->getPrevPtr() == &Next && "List invariant broken");
+    Next->setPrevPtr(PrevPtr);
+    return;
+  }
+
+  // If the Next pointer was null, then it is possible that this was the last
+  // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
+  // map.
+  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+  if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
+    Handles.erase(VP);
+    VP->HasValueHandle = false;
+  }
+}
+
+
+void ValueHandleBase::ValueIsDeleted(Value *V) {
+  assert(V->HasValueHandle && "Should only be called if ValueHandles present");
+
+  // Get the linked list base, which is guaranteed to exist since the
+  // HasValueHandle flag is set.
+  LLVMContextImpl *pImpl = V->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[V];
+  assert(Entry && "Value bit set but no entries exist");
+
+  // We use a local ValueHandleBase as an iterator so that ValueHandles can add
+  // and remove themselves from the list without breaking our iteration.  This
+  // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
+  // Note that we deliberately do not the support the case when dropping a value
+  // handle results in a new value handle being permanently added to the list
+  // (as might occur in theory for CallbackVH's): the new value handle will not
+  // be processed and the checking code will mete out righteous punishment if
+  // the handle is still present once we have finished processing all the other
+  // value handles (it is fine to momentarily add then remove a value handle).
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
+    case Assert:
+      break;
+    case Tracking:
+      // Mark that this value has been deleted by setting it to an invalid Value
+      // pointer.
+      Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
+      break;
+    case Weak:
+      // Weak just goes to null, which will unlink it from the list.
+      Entry->operator=(0);
+      break;
+    case Callback:
+      // Forward to the subclass's implementation.
+      static_cast<CallbackVH*>(Entry)->deleted();
+      break;
+    }
+  }
+
+  // All callbacks, weak references, and assertingVHs should be dropped by now.
+  if (V->HasValueHandle) {
+#ifndef NDEBUG      // Only in +Asserts mode...
+    dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+           << "\n";
+    if (pImpl->ValueHandles[V]->getKind() == Assert)
+      llvm_unreachable("An asserting value handle still pointed to this"
+                       " value!");
+
+#endif
+    llvm_unreachable("All references to V were not removed?");
+  }
+}
+
+
+void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
+  assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
+  assert(Old != New && "Changing value into itself!");
+
+  // Get the linked list base, which is guaranteed to exist since the
+  // HasValueHandle flag is set.
+  LLVMContextImpl *pImpl = Old->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[Old];
+
+  assert(Entry && "Value bit set but no entries exist");
+
+  // We use a local ValueHandleBase as an iterator so that
+  // ValueHandles can add and remove themselves from the list without
+  // breaking our iteration.  This is not really an AssertingVH; we
+  // just have to give ValueHandleBase some kind.
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
+    case Assert:
+      // Asserting handle does not follow RAUW implicitly.
+      break;
+    case Tracking:
+      // Tracking goes to new value like a WeakVH. Note that this may make it
+      // something incompatible with its templated type. We don't want to have a
+      // virtual (or inline) interface to handle this though, so instead we make
+      // the TrackingVH accessors guarantee that a client never sees this value.
+
+      // FALLTHROUGH
+    case Weak:
+      // Weak goes to the new value, which will unlink it from Old's list.
+      Entry->operator=(New);
+      break;
+    case Callback:
+      // Forward to the subclass's implementation.
+      static_cast<CallbackVH*>(Entry)->allUsesReplacedWith(New);
+      break;
+    }
+  }
+
+#ifndef NDEBUG
+  // If any new tracking or weak value handles were added while processing the
+  // list, then complain about it now.
+  if (Old->HasValueHandle)
+    for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
+      switch (Entry->getKind()) {
+      case Tracking:
+      case Weak:
+        dbgs() << "After RAUW from " << *Old->getType() << " %"
+          << Old->getNameStr() << " to " << *New->getType() << " %"
+          << New->getNameStr() << "\n";
+        llvm_unreachable("A tracking or weak value handle still pointed to the"
+                         " old value!\n");
+      default:
+        break;
+      }
+#endif
+}
+
+/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
+/// more than once.
+CallbackVH::~CallbackVH() {}
diff --git a/contrib/llvm/lib/VMCore/ValueSymbolTable.cpp b/contrib/llvm/lib/VMCore/ValueSymbolTable.cpp
new file mode 100644
index 000000000000..f1c970361a50
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ValueSymbolTable.cpp
@@ -0,0 +1,117 @@
+//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueSymbolTable class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "valuesymtab"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Class destructor
+ValueSymbolTable::~ValueSymbolTable() {
+#ifndef NDEBUG   // Only do this in -g mode...
+  for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
+    dbgs() << "Value still in symbol table! Type = '"
+           << *VI->getValue()->getType() << "' Name = '"
+           << VI->getKeyData() << "'\n";
+  assert(vmap.empty() && "Values remain in symbol table!");
+#endif
+}
+
+// Insert a value into the symbol table with the specified name...
+//
+void ValueSymbolTable::reinsertValue(Value* V) {
+  assert(V->hasName() && "Can't insert nameless Value into symbol table");
+
+  // Try inserting the name, assuming it won't conflict.
+  if (vmap.insert(V->Name)) {
+    //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n");
+    return;
+  }
+  
+  // Otherwise, there is a naming conflict.  Rename this value.
+  SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
+
+  // The name is too already used, just free it so we can allocate a new name.
+  V->Name->Destroy();
+  
+  unsigned BaseSize = UniqueName.size();
+  while (1) {
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(BaseSize);
+    raw_svector_ostream(UniqueName) << ++LastUnique;
+
+    // Try insert the vmap entry with this suffix.
+    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+    if (NewName.getValue() == 0) {
+      // Newly inserted name.  Success!
+      NewName.setValue(V);
+      V->Name = &NewName;
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+      return;
+    }
+  }
+}
+
+void ValueSymbolTable::removeValueName(ValueName *V) {
+  //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n");
+  // Remove the value from the symbol table.
+  vmap.remove(V);
+}
+
+/// createValueName - This method attempts to create a value name and insert
+/// it into the symbol table with the specified name.  If it conflicts, it
+/// auto-renames the name and returns that instead.
+ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
+  // In the common case, the name is not already in the symbol table.
+  ValueName &Entry = vmap.GetOrCreateValue(Name);
+  if (Entry.getValue() == 0) {
+    Entry.setValue(V);
+    //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
+    //           << *V << "\n");
+    return &Entry;
+  }
+  
+  // Otherwise, there is a naming conflict.  Rename this value.
+  SmallString<256> UniqueName(Name.begin(), Name.end());
+  
+  while (1) {
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(Name.size());
+    raw_svector_ostream(UniqueName) << ++LastUnique;
+    
+    // Try insert the vmap entry with this suffix.
+    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+    if (NewName.getValue() == 0) {
+      // Newly inserted name.  Success!
+      NewName.setValue(V);
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+      return &NewName;
+    }
+  }
+}
+
+
+// dump - print out the symbol table
+//
+void ValueSymbolTable::dump() const {
+  //DEBUG(dbgs() << "ValueSymbolTable:\n");
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    //DEBUG(dbgs() << "  '" << I->getKeyData() << "' = ");
+    I->getValue()->dump();
+    //DEBUG(dbgs() << "\n");
+  }
+}
diff --git a/contrib/llvm/lib/VMCore/ValueTypes.cpp b/contrib/llvm/lib/VMCore/ValueTypes.cpp
new file mode 100644
index 000000000000..21a1f034446a
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/ValueTypes.cpp
@@ -0,0 +1,212 @@
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods in the CodeGen/ValueTypes.h header.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+  EVT VT;
+  VT.LLVMTy = IntegerType::get(Context, BitWidth);
+  assert(VT.isExtended() && "Type is not extended!");
+  return VT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
+                             unsigned NumElements) {
+  EVT ResultVT;
+  ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
+  assert(ResultVT.isExtended() && "Type is not extended!");
+  return ResultVT;
+}
+
+bool EVT::isExtendedFloatingPoint() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isFPOrFPVectorTy();
+}
+
+bool EVT::isExtendedInteger() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isIntOrIntVectorTy();
+}
+
+bool EVT::isExtendedVector() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isVectorTy();
+}
+
+bool EVT::isExtended64BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 128;
+}
+
+bool EVT::isExtended256BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 256;
+}
+
+bool EVT::isExtended512BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 512;
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+  assert(isExtended() && "Type is not extended!");
+  return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+  assert(isExtended() && "Type is not extended!");
+  return cast<VectorType>(LLVMTy)->getNumElements();
+}
+
+unsigned EVT::getExtendedSizeInBits() const {
+  assert(isExtended() && "Type is not extended!");
+  if (const IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+    return ITy->getBitWidth();
+  if (const VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+    return VTy->getBitWidth();
+  assert(false && "Unrecognized extended type!");
+  return 0; // Suppress warnings.
+}
+
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+  switch (V.SimpleTy) {
+  default:
+    if (isVector())
+      return "v" + utostr(getVectorNumElements()) +
+             getVectorElementType().getEVTString();
+    if (isInteger())
+      return "i" + utostr(getSizeInBits());
+    llvm_unreachable("Invalid EVT!");
+    return "?";
+  case MVT::i1:      return "i1";
+  case MVT::i8:      return "i8";
+  case MVT::i16:     return "i16";
+  case MVT::i32:     return "i32";
+  case MVT::i64:     return "i64";
+  case MVT::i128:    return "i128";
+  case MVT::f32:     return "f32";
+  case MVT::f64:     return "f64";
+  case MVT::f80:     return "f80";
+  case MVT::f128:    return "f128";
+  case MVT::ppcf128: return "ppcf128";
+  case MVT::isVoid:  return "isVoid";
+  case MVT::Other:   return "ch";
+  case MVT::Glue:    return "glue";
+  case MVT::x86mmx:  return "x86mmx";
+  case MVT::v2i8:    return "v2i8";
+  case MVT::v4i8:    return "v4i8";
+  case MVT::v8i8:    return "v8i8";
+  case MVT::v16i8:   return "v16i8";
+  case MVT::v32i8:   return "v32i8";
+  case MVT::v2i16:   return "v2i16";
+  case MVT::v4i16:   return "v4i16";
+  case MVT::v8i16:   return "v8i16";
+  case MVT::v16i16:  return "v16i16";
+  case MVT::v2i32:   return "v2i32";
+  case MVT::v4i32:   return "v4i32";
+  case MVT::v8i32:   return "v8i32";
+  case MVT::v1i64:   return "v1i64";
+  case MVT::v2i64:   return "v2i64";
+  case MVT::v4i64:   return "v4i64";
+  case MVT::v8i64:   return "v8i64";
+  case MVT::v2f32:   return "v2f32";
+  case MVT::v4f32:   return "v4f32";
+  case MVT::v8f32:   return "v8f32";
+  case MVT::v2f64:   return "v2f64";
+  case MVT::v4f64:   return "v4f64";
+  case MVT::Metadata:return "Metadata";
+  case MVT::untyped: return "untyped";
+  }
+}
+
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT.  For integer types, this returns an unsigned type.  Note
+/// that this will abort for types that cannot be represented.
+const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+  switch (V.SimpleTy) {
+  default:
+    assert(isExtended() && "Type is not extended!");
+    return LLVMTy;
+  case MVT::isVoid:  return Type::getVoidTy(Context);
+  case MVT::i1:      return Type::getInt1Ty(Context);
+  case MVT::i8:      return Type::getInt8Ty(Context);
+  case MVT::i16:     return Type::getInt16Ty(Context);
+  case MVT::i32:     return Type::getInt32Ty(Context);
+  case MVT::i64:     return Type::getInt64Ty(Context);
+  case MVT::i128:    return IntegerType::get(Context, 128);
+  case MVT::f32:     return Type::getFloatTy(Context);
+  case MVT::f64:     return Type::getDoubleTy(Context);
+  case MVT::f80:     return Type::getX86_FP80Ty(Context);
+  case MVT::f128:    return Type::getFP128Ty(Context);
+  case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+  case MVT::x86mmx:  return Type::getX86_MMXTy(Context);
+  case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
+  case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
+  case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
+  case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16);
+  case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
+  case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
+  case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
+  case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8);
+  case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
+  case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
+  case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
+  case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
+  case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
+  case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
+  case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
+  case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
+  case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
+  case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
+  case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
+  case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
+  case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4); 
+  case MVT::Metadata: return Type::getMetadataTy(Context);
+ }
+}
+
+/// getEVT - Return the value type corresponding to the specified type.  This
+/// returns all pointers as MVT::iPTR.  If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
+  switch (Ty->getTypeID()) {
+  default:
+    if (HandleUnknown) return MVT(MVT::Other);
+    llvm_unreachable("Unknown type!");
+    return MVT::isVoid;
+  case Type::VoidTyID:
+    return MVT::isVoid;
+  case Type::IntegerTyID:
+    return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+  case Type::FloatTyID:     return MVT(MVT::f32);
+  case Type::DoubleTyID:    return MVT(MVT::f64);
+  case Type::X86_FP80TyID:  return MVT(MVT::f80);
+  case Type::X86_MMXTyID:   return MVT(MVT::x86mmx);
+  case Type::FP128TyID:     return MVT(MVT::f128);
+  case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+  case Type::PointerTyID:   return MVT(MVT::iPTR);
+  case Type::VectorTyID: {
+    const VectorType *VTy = cast<VectorType>(Ty);
+    return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
+                       VTy->getNumElements());
+  }
+  }
+}
diff --git a/contrib/llvm/lib/VMCore/Verifier.cpp b/contrib/llvm/lib/VMCore/Verifier.cpp
new file mode 100644
index 000000000000..b146b896cbfb
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/Verifier.cpp
@@ -0,0 +1,1870 @@
+//===-- Verifier.cpp - Implement the Module Verifier -------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function verifier interface, that can be used for some
+// sanity checking of input to the system.
+//
+// Note that this does not provide full `Java style' security and verifications,
+// instead it just tries to ensure that code is well-formed.
+//
+//  * Both of a binary operator's parameters are of the same type
+//  * Verify that the indices of mem access instructions match other operands
+//  * Verify that arithmetic and other things are only performed on first-class
+//    types.  Verify that shifts & logicals only happen on integrals f.e.
+//  * All of the constants in a switch statement are of the correct type
+//  * The code is in valid SSA form
+//  * It should be illegal to put a label into any other type (like a structure)
+//    or to return one. [except constant arrays!]
+//  * Only phi nodes can be self referential: 'add i32 %0, %0 ; <int>:0' is bad
+//  * PHI nodes must have an entry for each predecessor, with no extras.
+//  * PHI nodes must be the first thing in a basic block, all grouped together
+//  * PHI nodes must have at least one entry
+//  * All basic blocks should only end with terminator insts, not contain them
+//  * The entry node to a function must not have predecessors
+//  * All Instructions must be embedded into a basic block
+//  * Functions cannot take a void-typed parameter
+//  * Verify that a function's argument list agrees with it's declared type.
+//  * It is illegal to specify a name for a void value.
+//  * It is illegal to have a internal global value with no initializer
+//  * It is illegal to have a ret instruction that returns a value that does not
+//    agree with the function return value type.
+//  * Function call argument types match the function prototype
+//  * All other things that are tested by asserts spread about the code...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+namespace {  // Anonymous namespace for class
+  struct PreVerifier : public FunctionPass {
+    static char ID; // Pass ID, replacement for typeid
+
+    PreVerifier() : FunctionPass(ID) {
+      initializePreVerifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    // Check that the prerequisites for successful DominatorTree construction
+    // are satisfied.
+    bool runOnFunction(Function &F) {
+      bool Broken = false;
+
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+        if (I->empty() || !I->back().isTerminator()) {
+          dbgs() << "Basic Block in function '" << F.getName() 
+                 << "' does not have terminator!\n";
+          WriteAsOperand(dbgs(), I, true);
+          dbgs() << "\n";
+          Broken = true;
+        }
+      }
+
+      if (Broken)
+        report_fatal_error("Broken module, no Basic Block terminator!");
+
+      return false;
+    }
+  };
+}
+
+char PreVerifier::ID = 0;
+INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", 
+                false, false)
+static char &PreVerifyID = PreVerifier::ID;
+
+namespace {
+  struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
+    static char ID; // Pass ID, replacement for typeid
+    bool Broken;          // Is this module found to be broken?
+    bool RealPass;        // Are we not being run by a PassManager?
+    VerifierFailureAction action;
+                          // What to do if verification fails.
+    Module *Mod;          // Module we are verifying right now
+    LLVMContext *Context; // Context within which we are verifying
+    DominatorTree *DT;    // Dominator Tree, caution can be null!
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    /// InstInThisBlock - when verifying a basic block, keep track of all of the
+    /// instructions we have seen so far.  This allows us to do efficient
+    /// dominance checks for the case when an instruction has an operand that is
+    /// an instruction in the same block.
+    SmallPtrSet<Instruction*, 16> InstsInThisBlock;
+
+    /// MDNodes - keep track of the metadata nodes that have been checked
+    /// already.
+    SmallPtrSet<MDNode *, 32> MDNodes;
+
+    Verifier()
+      : FunctionPass(ID), 
+      Broken(false), RealPass(true), action(AbortProcessAction),
+      Mod(0), Context(0), DT(0), MessagesStr(Messages) {
+        initializeVerifierPass(*PassRegistry::getPassRegistry());
+      }
+    explicit Verifier(VerifierFailureAction ctn)
+      : FunctionPass(ID), 
+      Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
+      MessagesStr(Messages) {
+        initializeVerifierPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool doInitialization(Module &M) {
+      Mod = &M;
+      Context = &M.getContext();
+
+      // If this is a real pass, in a pass manager, we must abort before
+      // returning back to the pass manager, or else the pass manager may try to
+      // run other passes on the broken module.
+      if (RealPass)
+        return abortIfBroken();
+      return false;
+    }
+
+    bool runOnFunction(Function &F) {
+      // Get dominator information if we are being run by PassManager
+      if (RealPass) DT = &getAnalysis<DominatorTree>();
+
+      Mod = F.getParent();
+      if (!Context) Context = &F.getContext();
+
+      visit(F);
+      InstsInThisBlock.clear();
+
+      // If this is a real pass, in a pass manager, we must abort before
+      // returning back to the pass manager, or else the pass manager may try to
+      // run other passes on the broken module.
+      if (RealPass)
+        return abortIfBroken();
+
+      return false;
+    }
+
+    bool doFinalization(Module &M) {
+      // Scan through, checking all of the external function's linkage now...
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        visitGlobalValue(*I);
+
+        // Check to make sure function prototypes are okay.
+        if (I->isDeclaration()) visitFunction(*I);
+      }
+
+      for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
+           I != E; ++I)
+        visitGlobalVariable(*I);
+
+      for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 
+           I != E; ++I)
+        visitGlobalAlias(*I);
+
+      for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I)
+        visitNamedMDNode(*I);
+
+      // If the module is broken, abort at this time.
+      return abortIfBroken();
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequiredID(PreVerifyID);
+      if (RealPass)
+        AU.addRequired<DominatorTree>();
+    }
+
+    /// abortIfBroken - If the module is broken and we are supposed to abort on
+    /// this condition, do so.
+    ///
+    bool abortIfBroken() {
+      if (!Broken) return false;
+      MessagesStr << "Broken module found, ";
+      switch (action) {
+      default: llvm_unreachable("Unknown action");
+      case AbortProcessAction:
+        MessagesStr << "compilation aborted!\n";
+        dbgs() << MessagesStr.str();
+        // Client should choose different reaction if abort is not desired
+        abort();
+      case PrintMessageAction:
+        MessagesStr << "verification continues.\n";
+        dbgs() << MessagesStr.str();
+        return false;
+      case ReturnStatusAction:
+        MessagesStr << "compilation terminated.\n";
+        return true;
+      }
+    }
+
+
+    // Verification methods...
+    void visitGlobalValue(GlobalValue &GV);
+    void visitGlobalVariable(GlobalVariable &GV);
+    void visitGlobalAlias(GlobalAlias &GA);
+    void visitNamedMDNode(NamedMDNode &NMD);
+    void visitMDNode(MDNode &MD, Function *F);
+    void visitFunction(Function &F);
+    void visitBasicBlock(BasicBlock &BB);
+    using InstVisitor<Verifier>::visit;
+
+    void visit(Instruction &I);
+
+    void visitTruncInst(TruncInst &I);
+    void visitZExtInst(ZExtInst &I);
+    void visitSExtInst(SExtInst &I);
+    void visitFPTruncInst(FPTruncInst &I);
+    void visitFPExtInst(FPExtInst &I);
+    void visitFPToUIInst(FPToUIInst &I);
+    void visitFPToSIInst(FPToSIInst &I);
+    void visitUIToFPInst(UIToFPInst &I);
+    void visitSIToFPInst(SIToFPInst &I);
+    void visitIntToPtrInst(IntToPtrInst &I);
+    void visitPtrToIntInst(PtrToIntInst &I);
+    void visitBitCastInst(BitCastInst &I);
+    void visitPHINode(PHINode &PN);
+    void visitBinaryOperator(BinaryOperator &B);
+    void visitICmpInst(ICmpInst &IC);
+    void visitFCmpInst(FCmpInst &FC);
+    void visitExtractElementInst(ExtractElementInst &EI);
+    void visitInsertElementInst(InsertElementInst &EI);
+    void visitShuffleVectorInst(ShuffleVectorInst &EI);
+    void visitVAArgInst(VAArgInst &VAA) { visitInstruction(VAA); }
+    void visitCallInst(CallInst &CI);
+    void visitInvokeInst(InvokeInst &II);
+    void visitGetElementPtrInst(GetElementPtrInst &GEP);
+    void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+    void visitInstruction(Instruction &I);
+    void visitTerminatorInst(TerminatorInst &I);
+    void visitBranchInst(BranchInst &BI);
+    void visitReturnInst(ReturnInst &RI);
+    void visitSwitchInst(SwitchInst &SI);
+    void visitIndirectBrInst(IndirectBrInst &BI);
+    void visitSelectInst(SelectInst &SI);
+    void visitUserOp1(Instruction &I);
+    void visitUserOp2(Instruction &I) { visitUserOp1(I); }
+    void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+    void visitAllocaInst(AllocaInst &AI);
+    void visitExtractValueInst(ExtractValueInst &EVI);
+    void visitInsertValueInst(InsertValueInst &IVI);
+
+    void VerifyCallSite(CallSite CS);
+    bool PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+                          int VT, unsigned ArgNo, std::string &Suffix);
+    void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+                                  unsigned RetNum, unsigned ParamNum, ...);
+    void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                              bool isReturnValue, const Value *V);
+    void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
+                             const Value *V);
+
+    void WriteValue(const Value *V) {
+      if (!V) return;
+      if (isa<Instruction>(V)) {
+        MessagesStr << *V << '\n';
+      } else {
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << '\n';
+      }
+    }
+
+    void WriteType(const Type *T) {
+      if (!T) return;
+      MessagesStr << ' ' << *T;
+    }
+
+
+    // CheckFailed - A check failed, so print out the condition and the message
+    // that failed.  This provides a nice place to put a breakpoint if you want
+    // to see why something is not correct.
+    void CheckFailed(const Twine &Message,
+                     const Value *V1 = 0, const Value *V2 = 0,
+                     const Value *V3 = 0, const Value *V4 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteValue(V2);
+      WriteValue(V3);
+      WriteValue(V4);
+      Broken = true;
+    }
+
+    void CheckFailed(const Twine &Message, const Value *V1,
+                     const Type *T2, const Value *V3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteType(T2);
+      WriteValue(V3);
+      Broken = true;
+    }
+
+    void CheckFailed(const Twine &Message, const Type *T1,
+                     const Type *T2 = 0, const Type *T3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteType(T1);
+      WriteType(T2);
+      WriteType(T3);
+      Broken = true;
+    }
+  };
+} // End anonymous namespace
+
+char Verifier::ID = 0;
+INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
+INITIALIZE_PASS_DEPENDENCY(PreVerifier)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+  do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+  do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+  do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+  do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+  do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+void Verifier::visit(Instruction &I) {
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+    Assert1(I.getOperand(i) != 0, "Operand is null", &I);
+  InstVisitor<Verifier>::visit(I);
+}
+
+
+void Verifier::visitGlobalValue(GlobalValue &GV) {
+  Assert1(!GV.isDeclaration() ||
+          GV.isMaterializable() ||
+          GV.hasExternalLinkage() ||
+          GV.hasDLLImportLinkage() ||
+          GV.hasExternalWeakLinkage() ||
+          (isa<GlobalAlias>(GV) &&
+           (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
+  "Global is external, but doesn't have external or dllimport or weak linkage!",
+          &GV);
+
+  Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
+          "Global is marked as dllimport, but not external", &GV);
+
+  Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+          "Only global variables can have appending linkage!", &GV);
+
+  if (GV.hasAppendingLinkage()) {
+    GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
+    Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
+            "Only global arrays can have appending linkage!", GVar);
+  }
+
+  Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(),
+          "linker_private_weak_def_auto can only have default visibility!",
+          &GV);
+}
+
+void Verifier::visitGlobalVariable(GlobalVariable &GV) {
+  if (GV.hasInitializer()) {
+    Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+            "Global variable initializer type does not match global "
+            "variable type!", &GV);
+
+    // If the global has common linkage, it must have a zero initializer and
+    // cannot be constant.
+    if (GV.hasCommonLinkage()) {
+      Assert1(GV.getInitializer()->isNullValue(),
+              "'common' global must have a zero initializer!", &GV);
+      Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
+              &GV);
+    }
+  } else {
+    Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
+            GV.hasExternalWeakLinkage(),
+            "invalid linkage type for global declaration", &GV);
+  }
+
+  if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
+                       GV.getName() == "llvm.global_dtors")) {
+    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+            "invalid linkage for intrinsic global variable", &GV);
+    // Don't worry about emitting an error for it not being an array,
+    // visitGlobalValue will complain on appending non-array.
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
+      const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+      const PointerType *FuncPtrTy =
+          FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
+      Assert1(STy && STy->getNumElements() == 2 &&
+              STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+              STy->getTypeAtIndex(1) == FuncPtrTy,
+              "wrong type for intrinsic global variable", &GV);
+    }
+  }
+
+  visitGlobalValue(GV);
+}
+
+void Verifier::visitGlobalAlias(GlobalAlias &GA) {
+  Assert1(!GA.getName().empty(),
+          "Alias name cannot be empty!", &GA);
+  Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
+          GA.hasWeakLinkage(),
+          "Alias should have external or external weak linkage!", &GA);
+  Assert1(GA.getAliasee(),
+          "Aliasee cannot be NULL!", &GA);
+  Assert1(GA.getType() == GA.getAliasee()->getType(),
+          "Alias and aliasee types should match!", &GA);
+  Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
+
+  if (!isa<GlobalValue>(GA.getAliasee())) {
+    const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
+    Assert1(CE && 
+            (CE->getOpcode() == Instruction::BitCast ||
+             CE->getOpcode() == Instruction::GetElementPtr) &&
+            isa<GlobalValue>(CE->getOperand(0)),
+            "Aliasee should be either GlobalValue or bitcast of GlobalValue",
+            &GA);
+  }
+
+  const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false);
+  Assert1(Aliasee,
+          "Aliasing chain should end with function or global variable", &GA);
+
+  visitGlobalValue(GA);
+}
+
+void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
+  for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
+    MDNode *MD = NMD.getOperand(i);
+    if (!MD)
+      continue;
+
+    Assert1(!MD->isFunctionLocal(),
+            "Named metadata operand cannot be function local!", MD);
+    visitMDNode(*MD, 0);
+  }
+}
+
+void Verifier::visitMDNode(MDNode &MD, Function *F) {
+  // Only visit each node once.  Metadata can be mutually recursive, so this
+  // avoids infinite recursion here, as well as being an optimization.
+  if (!MDNodes.insert(&MD))
+    return;
+
+  for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
+    Value *Op = MD.getOperand(i);
+    if (!Op)
+      continue;
+    if (isa<Constant>(Op) || isa<MDString>(Op))
+      continue;
+    if (MDNode *N = dyn_cast<MDNode>(Op)) {
+      Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
+              "Global metadata operand cannot be function local!", &MD, N);
+      visitMDNode(*N, F);
+      continue;
+    }
+    Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
+
+    // If this was an instruction, bb, or argument, verify that it is in the
+    // function that we expect.
+    Function *ActualF = 0;
+    if (Instruction *I = dyn_cast<Instruction>(Op))
+      ActualF = I->getParent()->getParent();
+    else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
+      ActualF = BB->getParent();
+    else if (Argument *A = dyn_cast<Argument>(Op))
+      ActualF = A->getParent();
+    assert(ActualF && "Unimplemented function local metadata case!");
+
+    Assert2(ActualF == F, "function-local metadata used in wrong function",
+            &MD, Op);
+  }
+}
+
+// VerifyParameterAttrs - Check the given attributes for an argument or return
+// value of the specified type.  The value V is printed in error messages.
+void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                                    bool isReturnValue, const Value *V) {
+  if (Attrs == Attribute::None)
+    return;
+
+  Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
+  Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
+          " only applies to the function!", V);
+
+  if (isReturnValue) {
+    Attributes RetI = Attrs & Attribute::ParameterOnly;
+    Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) +
+            " does not apply to return values!", V);
+  }
+
+  for (unsigned i = 0;
+       i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+    Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
+    Assert1(!(MutI & (MutI - 1)), "Attributes " +
+            Attribute::getAsString(MutI) + " are incompatible!", V);
+  }
+
+  Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty);
+  Assert1(!TypeI, "Wrong type for attribute " +
+          Attribute::getAsString(TypeI), V);
+
+  Attributes ByValI = Attrs & Attribute::ByVal;
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+    Assert1(!ByValI || PTy->getElementType()->isSized(),
+            "Attribute " + Attribute::getAsString(ByValI) +
+            " does not support unsized types!", V);
+  } else {
+    Assert1(!ByValI,
+            "Attribute " + Attribute::getAsString(ByValI) +
+            " only applies to parameters with pointer type!", V);
+  }
+}
+
+// VerifyFunctionAttrs - Check parameter attributes against a function type.
+// The value V is printed in error messages.
+void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
+                                   const AttrListPtr &Attrs,
+                                   const Value *V) {
+  if (Attrs.isEmpty())
+    return;
+
+  bool SawNest = false;
+
+  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+    const AttributeWithIndex &Attr = Attrs.getSlot(i);
+
+    const Type *Ty;
+    if (Attr.Index == 0)
+      Ty = FT->getReturnType();
+    else if (Attr.Index-1 < FT->getNumParams())
+      Ty = FT->getParamType(Attr.Index-1);
+    else
+      break;  // VarArgs attributes, verified elsewhere.
+
+    VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
+
+    if (Attr.Attrs & Attribute::Nest) {
+      Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+      SawNest = true;
+    }
+
+    if (Attr.Attrs & Attribute::StructRet)
+      Assert1(Attr.Index == 1, "Attribute sret not on first parameter!", V);
+  }
+
+  Attributes FAttrs = Attrs.getFnAttributes();
+  Attributes NotFn = FAttrs & (~Attribute::FunctionOnly);
+  Assert1(!NotFn, "Attribute " + Attribute::getAsString(NotFn) +
+          " does not apply to the function!", V);
+
+  for (unsigned i = 0;
+       i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+    Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
+    Assert1(!(MutI & (MutI - 1)), "Attributes " +
+            Attribute::getAsString(MutI) + " are incompatible!", V);
+  }
+}
+
+static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
+  if (Attrs.isEmpty())
+    return true;
+
+  unsigned LastSlot = Attrs.getNumSlots() - 1;
+  unsigned LastIndex = Attrs.getSlot(LastSlot).Index;
+  if (LastIndex <= Params
+      || (LastIndex == (unsigned)~0
+          && (LastSlot == 0 || Attrs.getSlot(LastSlot - 1).Index <= Params)))  
+    return true;
+
+  return false;
+}
+
+// visitFunction - Verify that a function is ok.
+//
+void Verifier::visitFunction(Function &F) {
+  // Check function arguments.
+  const FunctionType *FT = F.getFunctionType();
+  unsigned NumArgs = F.arg_size();
+
+  Assert1(Context == &F.getContext(),
+          "Function context does not match Module context!", &F);
+
+  Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+  Assert2(FT->getNumParams() == NumArgs,
+          "# formal arguments must match # of arguments for function type!",
+          &F, FT);
+  Assert1(F.getReturnType()->isFirstClassType() ||
+          F.getReturnType()->isVoidTy() || 
+          F.getReturnType()->isStructTy(),
+          "Functions cannot return aggregate values!", &F);
+
+  Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+          "Invalid struct return type!", &F);
+
+  const AttrListPtr &Attrs = F.getAttributes();
+
+  Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
+          "Attributes after last parameter!", &F);
+
+  // Check function attributes.
+  VerifyFunctionAttrs(FT, Attrs, &F);
+
+  // Check that this function meets the restrictions on this calling convention.
+  switch (F.getCallingConv()) {
+  default:
+    break;
+  case CallingConv::C:
+    break;
+  case CallingConv::Fast:
+  case CallingConv::Cold:
+  case CallingConv::X86_FastCall:
+  case CallingConv::X86_ThisCall:
+  case CallingConv::PTX_Kernel:
+  case CallingConv::PTX_Device:
+    Assert1(!F.isVarArg(),
+            "Varargs functions must have C calling conventions!", &F);
+    break;
+  }
+
+  bool isLLVMdotName = F.getName().size() >= 5 &&
+                       F.getName().substr(0, 5) == "llvm.";
+
+  // Check that the argument values match the function type for this function...
+  unsigned i = 0;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++i) {
+    Assert2(I->getType() == FT->getParamType(i),
+            "Argument value does not match function argument type!",
+            I, FT->getParamType(i));
+    Assert1(I->getType()->isFirstClassType(),
+            "Function arguments must have first-class types!", I);
+    if (!isLLVMdotName)
+      Assert2(!I->getType()->isMetadataTy(),
+              "Function takes metadata but isn't an intrinsic", I, &F);
+  }
+
+  if (F.isMaterializable()) {
+    // Function has a body somewhere we can't see.
+  } else if (F.isDeclaration()) {
+    Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
+            F.hasExternalWeakLinkage(),
+            "invalid linkage type for function declaration", &F);
+  } else {
+    // Verify that this function (which has a body) is not named "llvm.*".  It
+    // is not legal to define intrinsics.
+    Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+    
+    // Check the entry node
+    BasicBlock *Entry = &F.getEntryBlock();
+    Assert1(pred_begin(Entry) == pred_end(Entry),
+            "Entry block to function must not have predecessors!", Entry);
+    
+    // The address of the entry block cannot be taken, unless it is dead.
+    if (Entry->hasAddressTaken()) {
+      Assert1(!BlockAddress::get(Entry)->isConstantUsed(),
+              "blockaddress may not be used with the entry block!", Entry);
+    }
+  }
+ 
+  // If this function is actually an intrinsic, verify that it is only used in
+  // direct call/invokes, never having its "address taken".
+  if (F.getIntrinsicID()) {
+    const User *U;
+    if (F.hasAddressTaken(&U))
+      Assert1(0, "Invalid user of intrinsic instruction!", U); 
+  }
+}
+
+// verifyBasicBlock - Verify that a basic block is well formed...
+//
+void Verifier::visitBasicBlock(BasicBlock &BB) {
+  InstsInThisBlock.clear();
+
+  // Ensure that basic blocks have terminators!
+  Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+
+  // Check constraints that this basic block imposes on all of the PHI nodes in
+  // it.
+  if (isa<PHINode>(BB.front())) {
+    SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
+    SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
+    std::sort(Preds.begin(), Preds.end());
+    PHINode *PN;
+    for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+      // Ensure that PHI nodes have at least one entry!
+      Assert1(PN->getNumIncomingValues() != 0,
+              "PHI nodes must have at least one entry.  If the block is dead, "
+              "the PHI should be removed!", PN);
+      Assert1(PN->getNumIncomingValues() == Preds.size(),
+              "PHINode should have one entry for each predecessor of its "
+              "parent basic block!", PN);
+
+      // Get and sort all incoming values in the PHI node...
+      Values.clear();
+      Values.reserve(PN->getNumIncomingValues());
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Values.push_back(std::make_pair(PN->getIncomingBlock(i),
+                                        PN->getIncomingValue(i)));
+      std::sort(Values.begin(), Values.end());
+
+      for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+        // Check to make sure that if there is more than one entry for a
+        // particular basic block in this PHI node, that the incoming values are
+        // all identical.
+        //
+        Assert4(i == 0 || Values[i].first  != Values[i-1].first ||
+                Values[i].second == Values[i-1].second,
+                "PHI node has multiple entries for the same basic block with "
+                "different incoming values!", PN, Values[i].first,
+                Values[i].second, Values[i-1].second);
+
+        // Check to make sure that the predecessors and PHI node entries are
+        // matched up.
+        Assert3(Values[i].first == Preds[i],
+                "PHI node entries do not match predecessors!", PN,
+                Values[i].first, Preds[i]);
+      }
+    }
+  }
+}
+
+void Verifier::visitTerminatorInst(TerminatorInst &I) {
+  // Ensure that terminators only exist at the end of the basic block.
+  Assert1(&I == I.getParent()->getTerminator(),
+          "Terminator found in the middle of a basic block!", I.getParent());
+  visitInstruction(I);
+}
+
+void Verifier::visitBranchInst(BranchInst &BI) {
+  if (BI.isConditional()) {
+    Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+  }
+  visitTerminatorInst(BI);
+}
+
+void Verifier::visitReturnInst(ReturnInst &RI) {
+  Function *F = RI.getParent()->getParent();
+  unsigned N = RI.getNumOperands();
+  if (F->getReturnType()->isVoidTy()) 
+    Assert2(N == 0,
+            "Found return instr that returns non-void in Function of void "
+            "return type!", &RI, F->getReturnType());
+  else
+    Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+            "Function return type does not match operand "
+            "type of return inst!", &RI, F->getReturnType());
+
+  // Check to make sure that the return value has necessary properties for
+  // terminators...
+  visitTerminatorInst(RI);
+}
+
+void Verifier::visitSwitchInst(SwitchInst &SI) {
+  // Check to make sure that all of the constants in the switch instruction
+  // have the same type as the switched-on value.
+  const Type *SwitchTy = SI.getCondition()->getType();
+  SmallPtrSet<ConstantInt*, 32> Constants;
+  for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
+    Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+            "Switch constants must all be same type as switch value!", &SI);
+    Assert2(Constants.insert(SI.getCaseValue(i)),
+            "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+  }
+
+  visitTerminatorInst(SI);
+}
+
+void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
+  Assert1(BI.getAddress()->getType()->isPointerTy(),
+          "Indirectbr operand must have pointer type!", &BI);
+  for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
+    Assert1(BI.getDestination(i)->getType()->isLabelTy(),
+            "Indirectbr destinations must all have pointer type!", &BI);
+
+  visitTerminatorInst(BI);
+}
+
+void Verifier::visitSelectInst(SelectInst &SI) {
+  Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+                                          SI.getOperand(2)),
+          "Invalid operands for select instruction!", &SI);
+
+  Assert1(SI.getTrueValue()->getType() == SI.getType(),
+          "Select values must have same type as select instruction!", &SI);
+  visitInstruction(SI);
+}
+
+/// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
+/// a pass, if any exist, it's an error.
+///
+void Verifier::visitUserOp1(Instruction &I) {
+  Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+}
+
+void Verifier::visitTruncInst(TruncInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "trunc source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitZExtInst(ZExtInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "zext source and destination must both be a vector or neither", &I);
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitSExtInst(SExtInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "sext source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPTruncInst(FPTruncInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "fptrunc source and destination must both be a vector or neither",&I);
+  Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPExtInst(FPExtInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "fpext source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitUIToFPInst(UIToFPInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "UIToFP source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(),
+          "UIToFP source must be integer or integer vector", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),
+          "UIToFP result must be FP or FP vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "UIToFP source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitSIToFPInst(SIToFPInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "SIToFP source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(),
+          "SIToFP source must be integer or integer vector", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),
+          "SIToFP result must be FP or FP vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "SIToFP source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPToUIInst(FPToUIInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "FPToUI source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+          &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
+          "FPToUI result must be integer or integer vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "FPToUI source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPToSIInst(FPToSIInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "FPToSI source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(),
+          "FPToSI source must be FP or FP vector", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
+          "FPToSI result must be integer or integer vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "FPToSI source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
+  Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
+  Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitBitCastInst(BitCastInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+  // BitCast implies a no-op cast of type only. No bits change.
+  // However, you can't cast pointers to anything but pointers.
+  Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
+          "Bitcast requires both operands to be pointer or neither", &I);
+  Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
+
+  // Disallow aggregates.
+  Assert1(!SrcTy->isAggregateType(),
+          "Bitcast operand must not be aggregate", &I);
+  Assert1(!DestTy->isAggregateType(),
+          "Bitcast type must not be aggregate", &I);
+
+  visitInstruction(I);
+}
+
+/// visitPHINode - Ensure that a PHI node is well formed.
+///
+void Verifier::visitPHINode(PHINode &PN) {
+  // Ensure that the PHI nodes are all grouped together at the top of the block.
+  // This can be tested by checking whether the instruction before this is
+  // either nonexistent (because this is begin()) or is a PHI node.  If not,
+  // then there is some other instruction before a PHI.
+  Assert2(&PN == &PN.getParent()->front() || 
+          isa<PHINode>(--BasicBlock::iterator(&PN)),
+          "PHI nodes not grouped at top of basic block!",
+          &PN, PN.getParent());
+
+  // Check that all of the values of the PHI node have the same type as the
+  // result, and that the incoming blocks are really basic blocks.
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
+            "PHI node operands are not the same type as the result!", &PN);
+  }
+
+  // All other PHI node constraints are checked in the visitBasicBlock method.
+
+  visitInstruction(PN);
+}
+
+void Verifier::VerifyCallSite(CallSite CS) {
+  Instruction *I = CS.getInstruction();
+
+  Assert1(CS.getCalledValue()->getType()->isPointerTy(),
+          "Called function must be a pointer!", I);
+  const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+
+  Assert1(FPTy->getElementType()->isFunctionTy(),
+          "Called function is not pointer to function type!", I);
+  const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+
+  // Verify that the correct number of arguments are being passed
+  if (FTy->isVarArg())
+    Assert1(CS.arg_size() >= FTy->getNumParams(),
+            "Called function requires more parameters than were provided!",I);
+  else
+    Assert1(CS.arg_size() == FTy->getNumParams(),
+            "Incorrect number of arguments passed to called function!", I);
+
+  // Verify that all arguments to the call match the function type.
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
+            "Call parameter type does not match function signature!",
+            CS.getArgument(i), FTy->getParamType(i), I);
+
+  const AttrListPtr &Attrs = CS.getAttributes();
+
+  Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
+          "Attributes after last parameter!", I);
+
+  // Verify call attributes.
+  VerifyFunctionAttrs(FTy, Attrs, I);
+
+  if (FTy->isVarArg())
+    // Check attributes on the varargs part.
+    for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
+      Attributes Attr = Attrs.getParamAttributes(Idx);
+
+      VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
+
+      Attributes VArgI = Attr & Attribute::VarArgsIncompatible;
+      Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) +
+              " cannot be used for vararg call arguments!", I);
+    }
+
+  // Verify that there's no metadata unless it's a direct call to an intrinsic.
+  if (CS.getCalledFunction() == 0 ||
+      !CS.getCalledFunction()->getName().startswith("llvm.")) {
+    for (FunctionType::param_iterator PI = FTy->param_begin(),
+           PE = FTy->param_end(); PI != PE; ++PI)
+      Assert1(!(*PI)->isMetadataTy(),
+              "Function has metadata parameter but isn't an intrinsic", I);
+  }
+
+  visitInstruction(*I);
+}
+
+void Verifier::visitCallInst(CallInst &CI) {
+  VerifyCallSite(&CI);
+
+  if (Function *F = CI.getCalledFunction())
+    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+      visitIntrinsicFunctionCall(ID, CI);
+}
+
+void Verifier::visitInvokeInst(InvokeInst &II) {
+  VerifyCallSite(&II);
+  visitTerminatorInst(II);
+}
+
+/// visitBinaryOperator - Check that both arguments to the binary operator are
+/// of the same type!
+///
+void Verifier::visitBinaryOperator(BinaryOperator &B) {
+  Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+          "Both operands to a binary operator are not of the same type!", &B);
+
+  switch (B.getOpcode()) {
+  // Check that integer arithmetic operators are only used with
+  // integral operands.
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Integer arithmetic operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Integer arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that floating-point arithmetic operators are only used with
+  // floating-point operands.
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    Assert1(B.getType()->isFPOrFPVectorTy(),
+            "Floating-point arithmetic operators only work with "
+            "floating-point types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Floating-point arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that logical operators are only used with integral operands.
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Logical operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Logical operators must have same type for operands and result!",
+            &B);
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Shifts only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Shift return type must be same as operands!", &B);
+    break;
+  default:
+    llvm_unreachable("Unknown BinaryOperator opcode!");
+  }
+
+  visitInstruction(B);
+}
+
+void Verifier::visitICmpInst(ICmpInst &IC) {
+  // Check that the operands are the same type
+  const Type *Op0Ty = IC.getOperand(0)->getType();
+  const Type *Op1Ty = IC.getOperand(1)->getType();
+  Assert1(Op0Ty == Op1Ty,
+          "Both operands to ICmp instruction are not of the same type!", &IC);
+  // Check that the operands are the right type
+  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
+          "Invalid operand types for ICmp instruction", &IC);
+  // Check that the predicate is valid.
+  Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+          IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+          "Invalid predicate in ICmp instruction!", &IC);
+
+  visitInstruction(IC);
+}
+
+void Verifier::visitFCmpInst(FCmpInst &FC) {
+  // Check that the operands are the same type
+  const Type *Op0Ty = FC.getOperand(0)->getType();
+  const Type *Op1Ty = FC.getOperand(1)->getType();
+  Assert1(Op0Ty == Op1Ty,
+          "Both operands to FCmp instruction are not of the same type!", &FC);
+  // Check that the operands are the right type
+  Assert1(Op0Ty->isFPOrFPVectorTy(),
+          "Invalid operand types for FCmp instruction", &FC);
+  // Check that the predicate is valid.
+  Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+          FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+          "Invalid predicate in FCmp instruction!", &FC);
+
+  visitInstruction(FC);
+}
+
+void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
+  Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
+                                              EI.getOperand(1)),
+          "Invalid extractelement operands!", &EI);
+  visitInstruction(EI);
+}
+
+void Verifier::visitInsertElementInst(InsertElementInst &IE) {
+  Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
+                                             IE.getOperand(1),
+                                             IE.getOperand(2)),
+          "Invalid insertelement operands!", &IE);
+  visitInstruction(IE);
+}
+
+void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
+  Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+                                             SV.getOperand(2)),
+          "Invalid shufflevector operands!", &SV);
+  visitInstruction(SV);
+}
+
+void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+  SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
+  const Type *ElTy =
+    GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
+                                      Idxs.begin(), Idxs.end());
+  Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+  Assert2(GEP.getType()->isPointerTy() &&
+          cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+          "GEP is not of right type for indices!", &GEP, ElTy);
+  visitInstruction(GEP);
+}
+
+void Verifier::visitLoadInst(LoadInst &LI) {
+  const PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
+  Assert1(PTy, "Load operand must be a pointer.", &LI);
+  const Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == LI.getType(),
+          "Load result type does not match pointer operand type!", &LI, ElTy);
+  visitInstruction(LI);
+}
+
+void Verifier::visitStoreInst(StoreInst &SI) {
+  const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
+  Assert1(PTy, "Store operand must be a pointer.", &SI);
+  const Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == SI.getOperand(0)->getType(),
+          "Stored value type does not match pointer operand type!",
+          &SI, ElTy);
+  visitInstruction(SI);
+}
+
+void Verifier::visitAllocaInst(AllocaInst &AI) {
+  const PointerType *PTy = AI.getType();
+  Assert1(PTy->getAddressSpace() == 0, 
+          "Allocation instruction pointer not in the generic address space!",
+          &AI);
+  Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
+          &AI);
+  Assert1(AI.getArraySize()->getType()->isIntegerTy(),
+          "Alloca array size must have integer type", &AI);
+  visitInstruction(AI);
+}
+
+void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
+  Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+                                           EVI.getIndices()) ==
+          EVI.getType(),
+          "Invalid ExtractValueInst operands!", &EVI);
+  
+  visitInstruction(EVI);
+}
+
+void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
+  Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+                                           IVI.getIndices()) ==
+          IVI.getOperand(1)->getType(),
+          "Invalid InsertValueInst operands!", &IVI);
+  
+  visitInstruction(IVI);
+}
+
+/// verifyInstruction - Verify that an instruction is well formed.
+///
+void Verifier::visitInstruction(Instruction &I) {
+  BasicBlock *BB = I.getParent();
+  Assert1(BB, "Instruction not embedded in basic block!", &I);
+
+  if (!isa<PHINode>(I)) {   // Check that non-phi nodes are not self referential
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+         UI != UE; ++UI)
+      Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
+              "Only PHI nodes may reference their own value!", &I);
+  }
+
+  // Check that void typed values don't have names
+  Assert1(!I.getType()->isVoidTy() || !I.hasName(),
+          "Instruction has a name, but provides a void value!", &I);
+
+  // Check that the return value of the instruction is either void or a legal
+  // value type.
+  Assert1(I.getType()->isVoidTy() || 
+          I.getType()->isFirstClassType(),
+          "Instruction returns a non-scalar type!", &I);
+
+  // Check that the instruction doesn't produce metadata. Calls are already
+  // checked against the callee type.
+  Assert1(!I.getType()->isMetadataTy() ||
+          isa<CallInst>(I) || isa<InvokeInst>(I),
+          "Invalid use of metadata!", &I);
+
+  // Check that all uses of the instruction, if they are instructions
+  // themselves, actually have parent basic blocks.  If the use is not an
+  // instruction, it is an error!
+  for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
+       UI != UE; ++UI) {
+    if (Instruction *Used = dyn_cast<Instruction>(*UI))
+      Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
+              " embedded in a basic block!", &I, Used);
+    else {
+      CheckFailed("Use of instruction is not an instruction!", *UI);
+      return;
+    }
+  }
+
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
+
+    // Check to make sure that only first-class-values are operands to
+    // instructions.
+    if (!I.getOperand(i)->getType()->isFirstClassType()) {
+      Assert1(0, "Instruction operands must be first-class values!", &I);
+    }
+
+    if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+      // Check to make sure that the "address of" an intrinsic function is never
+      // taken.
+      Assert1(!F->isIntrinsic() || (i + 1 == e && isa<CallInst>(I)),
+              "Cannot take the address of an intrinsic!", &I);
+      Assert1(F->getParent() == Mod, "Referencing function in another module!",
+              &I);
+    } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
+      Assert1(OpBB->getParent() == BB->getParent(),
+              "Referring to a basic block in another function!", &I);
+    } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
+      Assert1(OpArg->getParent() == BB->getParent(),
+              "Referring to an argument in another function!", &I);
+    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
+      Assert1(GV->getParent() == Mod, "Referencing global in another module!",
+              &I);
+    } else if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+      BasicBlock *OpBlock = Op->getParent();
+
+      // Check that a definition dominates all of its uses.
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+        // Invoke results are only usable in the normal destination, not in the
+        // exceptional destination.
+        BasicBlock *NormalDest = II->getNormalDest();
+
+        Assert2(NormalDest != II->getUnwindDest(),
+                "No uses of invoke possible due to dominance structure!",
+                Op, &I);
+
+        // PHI nodes differ from other nodes because they actually "use" the
+        // value in the predecessor basic blocks they correspond to.
+        BasicBlock *UseBlock = BB;
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          unsigned j = PHINode::getIncomingValueNumForOperand(i);
+          UseBlock = PN->getIncomingBlock(j);
+        }
+        Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
+                Op, &I);
+
+        if (isa<PHINode>(I) && UseBlock == OpBlock) {
+          // Special case of a phi node in the normal destination or the unwind
+          // destination.
+          Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
+                  "Invoke result not available in the unwind destination!",
+                  Op, &I);
+        } else {
+          Assert2(DT->dominates(NormalDest, UseBlock) ||
+                  !DT->isReachableFromEntry(UseBlock),
+                  "Invoke result does not dominate all uses!", Op, &I);
+
+          // If the normal successor of an invoke instruction has multiple
+          // predecessors, then the normal edge from the invoke is critical,
+          // so the invoke value can only be live if the destination block
+          // dominates all of it's predecessors (other than the invoke).
+          if (!NormalDest->getSinglePredecessor() &&
+              DT->isReachableFromEntry(UseBlock))
+            // If it is used by something non-phi, then the other case is that
+            // 'NormalDest' dominates all of its predecessors other than the
+            // invoke.  In this case, the invoke value can still be used.
+            for (pred_iterator PI = pred_begin(NormalDest),
+                 E = pred_end(NormalDest); PI != E; ++PI)
+              if (*PI != II->getParent() && !DT->dominates(NormalDest, *PI) &&
+                  DT->isReachableFromEntry(*PI)) {
+                CheckFailed("Invoke result does not dominate all uses!", Op,&I);
+                return;
+              }
+        }
+      } else if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+        // PHI nodes are more difficult than other nodes because they actually
+        // "use" the value in the predecessor basic blocks they correspond to.
+        unsigned j = PHINode::getIncomingValueNumForOperand(i);
+        BasicBlock *PredBB = PN->getIncomingBlock(j);
+        Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
+                           !DT->isReachableFromEntry(PredBB)),
+                "Instruction does not dominate all uses!", Op, &I);
+      } else {
+        if (OpBlock == BB) {
+          // If they are in the same basic block, make sure that the definition
+          // comes before the use.
+          Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB),
+                  "Instruction does not dominate all uses!", Op, &I);
+        }
+
+        // Definition must dominate use unless use is unreachable!
+        Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, &I) ||
+                !DT->isReachableFromEntry(BB),
+                "Instruction does not dominate all uses!", Op, &I);
+      }
+    } else if (isa<InlineAsm>(I.getOperand(i))) {
+      Assert1((i + 1 == e && isa<CallInst>(I)) ||
+              (i + 3 == e && isa<InvokeInst>(I)),
+              "Cannot take the address of an inline asm!", &I);
+    }
+  }
+  InstsInThisBlock.insert(&I);
+}
+
+// Flags used by TableGen to mark intrinsic parameters with the
+// LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes.
+static const unsigned ExtendedElementVectorType = 0x40000000;
+static const unsigned TruncatedElementVectorType = 0x20000000;
+
+/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
+///
+void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
+  Function *IF = CI.getCalledFunction();
+  Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+          IF);
+
+#define GET_INTRINSIC_VERIFIER
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_VERIFIER
+
+  // If the intrinsic takes MDNode arguments, verify that they are either global
+  // or are local to *this* function.
+  for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
+    if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i)))
+      visitMDNode(*MD, CI.getParent()->getParent());
+
+  switch (ID) {
+  default:
+    break;
+  case Intrinsic::dbg_declare: {  // llvm.dbg.declare
+    Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
+                "invalid llvm.dbg.declare intrinsic call 1", &CI);
+    MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
+    Assert1(MD->getNumOperands() == 1,
+                "invalid llvm.dbg.declare intrinsic call 2", &CI);
+  } break;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+  case Intrinsic::memset:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
+            "alignment argument of memory intrinsics must be a constant int",
+            &CI);
+    Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
+            "isvolatile argument of memory intrinsics must be a constant int",
+            &CI);
+    break;
+  case Intrinsic::gcroot:
+  case Intrinsic::gcwrite:
+  case Intrinsic::gcread:
+    if (ID == Intrinsic::gcroot) {
+      AllocaInst *AI =
+        dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
+      Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
+      Assert1(isa<Constant>(CI.getArgOperand(1)),
+              "llvm.gcroot parameter #2 must be a constant.", &CI);
+      if (!AI->getType()->getElementType()->isPointerTy()) {
+        Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+                "llvm.gcroot parameter #1 must either be a pointer alloca, "
+                "or argument #2 must be a non-null constant.", &CI);
+      }
+    }
+
+    Assert1(CI.getParent()->getParent()->hasGC(),
+            "Enclosing function does not use GC.", &CI);
+    break;
+  case Intrinsic::init_trampoline:
+    Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+            "llvm.init_trampoline parameter #2 must resolve to a function.",
+            &CI);
+    break;
+  case Intrinsic::prefetch:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+            isa<ConstantInt>(CI.getArgOperand(2)) &&
+            cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+            cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
+            "invalid arguments to llvm.prefetch",
+            &CI);
+    break;
+  case Intrinsic::stackprotector:
+    Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
+            "llvm.stackprotector parameter #2 must resolve to an alloca.",
+            &CI);
+    break;
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+  case Intrinsic::invariant_start:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
+            "size argument of memory use markers must be a constant integer",
+            &CI);
+    break;
+  case Intrinsic::invariant_end:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+            "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+    break;
+  }
+}
+
+/// Produce a string to identify an intrinsic parameter or return value.
+/// The ArgNo value numbers the return values from 0 to NumRets-1 and the
+/// parameters beginning with NumRets.
+///
+static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) {
+  if (ArgNo >= NumRets)
+    return "Intrinsic parameter #" + utostr(ArgNo - NumRets);
+  if (NumRets == 1)
+    return "Intrinsic result type";
+  return "Intrinsic result type #" + utostr(ArgNo);
+}
+
+bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+                                int VT, unsigned ArgNo, std::string &Suffix) {
+  const FunctionType *FTy = F->getFunctionType();
+
+  unsigned NumElts = 0;
+  const Type *EltTy = Ty;
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (VTy) {
+    EltTy = VTy->getElementType();
+    NumElts = VTy->getNumElements();
+  }
+
+  const Type *RetTy = FTy->getReturnType();
+  const StructType *ST = dyn_cast<StructType>(RetTy);
+  unsigned NumRetVals;
+  if (RetTy->isVoidTy())
+    NumRetVals = 0;
+  else if (ST)
+    NumRetVals = ST->getNumElements();
+  else
+    NumRetVals = 1;
+
+  if (VT < 0) {
+    int Match = ~VT;
+
+    // Check flags that indicate a type that is an integral vector type with
+    // elements that are larger or smaller than the elements of the matched
+    // type.
+    if ((Match & (ExtendedElementVectorType |
+                  TruncatedElementVectorType)) != 0) {
+      const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy);
+      if (!VTy || !IEltTy) {
+        CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+                    "an integral vector type.", F);
+        return false;
+      }
+      // Adjust the current Ty (in the opposite direction) rather than
+      // the type being matched against.
+      if ((Match & ExtendedElementVectorType) != 0) {
+        if ((IEltTy->getBitWidth() & 1) != 0) {
+          CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " vector "
+                      "element bit-width is odd.", F);
+          return false;
+        }
+        Ty = VectorType::getTruncatedElementVectorType(VTy);
+      } else
+        Ty = VectorType::getExtendedElementVectorType(VTy);
+      Match &= ~(ExtendedElementVectorType | TruncatedElementVectorType);
+    }
+
+    if (Match <= static_cast<int>(NumRetVals - 1)) {
+      if (ST)
+        RetTy = ST->getElementType(Match);
+
+      if (Ty != RetTy) {
+        CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
+                    "match return type.", F);
+        return false;
+      }
+    } else {
+      if (Ty != FTy->getParamType(Match - NumRetVals)) {
+        CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
+                    "match parameter %" + utostr(Match - NumRetVals) + ".", F);
+        return false;
+      }
+    }
+  } else if (VT == MVT::iAny) {
+    if (!EltTy->isIntegerTy()) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+                  "an integer type.", F);
+      return false;
+    }
+
+    unsigned GotBits = cast<IntegerType>(EltTy)->getBitWidth();
+    Suffix += ".";
+
+    if (EltTy != Ty)
+      Suffix += "v" + utostr(NumElts);
+
+    Suffix += "i" + utostr(GotBits);
+
+    // Check some constraints on various intrinsics.
+    switch (ID) {
+    default: break; // Not everything needs to be checked.
+    case Intrinsic::bswap:
+      if (GotBits < 16 || GotBits % 16 != 0) {
+        CheckFailed("Intrinsic requires even byte width argument", F);
+        return false;
+      }
+      break;
+    }
+  } else if (VT == MVT::fAny) {
+    if (!EltTy->isFloatingPointTy()) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+                  "a floating-point type.", F);
+      return false;
+    }
+
+    Suffix += ".";
+
+    if (EltTy != Ty)
+      Suffix += "v" + utostr(NumElts);
+
+    Suffix += EVT::getEVT(EltTy).getEVTString();
+  } else if (VT == MVT::vAny) {
+    if (!VTy) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a vector type.",
+                  F);
+      return false;
+    }
+    Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
+  } else if (VT == MVT::iPTR) {
+    if (!Ty->isPointerTy()) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
+                  "pointer and a pointer is required.", F);
+      return false;
+    }
+  } else if (VT == MVT::iPTRAny) {
+    // Outside of TableGen, we don't distinguish iPTRAny (to any address space)
+    // and iPTR. In the verifier, we can not distinguish which case we have so
+    // allow either case to be legal.
+    if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
+      EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true);
+      if (PointeeVT == MVT::Other) {
+        CheckFailed("Intrinsic has pointer to complex type.");
+        return false;
+      }
+      Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
+        PointeeVT.getEVTString();
+    } else {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
+                  "pointer and a pointer is required.", F);
+      return false;
+    }
+  } else if (EVT((MVT::SimpleValueType)VT).isVector()) {
+    EVT VVT = EVT((MVT::SimpleValueType)VT);
+
+    // If this is a vector argument, verify the number and type of elements.
+    if (VVT.getVectorElementType() != EVT::getEVT(EltTy)) {
+      CheckFailed("Intrinsic prototype has incorrect vector element type!", F);
+      return false;
+    }
+
+    if (VVT.getVectorNumElements() != NumElts) {
+      CheckFailed("Intrinsic prototype has incorrect number of "
+                  "vector elements!", F);
+      return false;
+    }
+  } else if (EVT((MVT::SimpleValueType)VT).getTypeForEVT(Ty->getContext()) != 
+             EltTy) {
+    CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is wrong!", F);
+    return false;
+  } else if (EltTy != Ty) {
+    CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is a vector "
+                "and a scalar is required.", F);
+    return false;
+  }
+
+  return true;
+}
+
+/// VerifyIntrinsicPrototype - TableGen emits calls to this function into
+/// Intrinsics.gen.  This implements a little state machine that verifies the
+/// prototype of intrinsics.
+void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+                                        unsigned NumRetVals,
+                                        unsigned NumParams, ...) {
+  va_list VA;
+  va_start(VA, NumParams);
+  const FunctionType *FTy = F->getFunctionType();
+
+  // For overloaded intrinsics, the Suffix of the function name must match the
+  // types of the arguments. This variable keeps track of the expected
+  // suffix, to be checked at the end.
+  std::string Suffix;
+
+  if (FTy->getNumParams() + FTy->isVarArg() != NumParams) {
+    CheckFailed("Intrinsic prototype has incorrect number of arguments!", F);
+    return;
+  }
+
+  const Type *Ty = FTy->getReturnType();
+  const StructType *ST = dyn_cast<StructType>(Ty);
+
+  if (NumRetVals == 0 && !Ty->isVoidTy()) {
+    CheckFailed("Intrinsic should return void", F);
+    return;
+  }
+  
+  // Verify the return types.
+  if (ST && ST->getNumElements() != NumRetVals) {
+    CheckFailed("Intrinsic prototype has incorrect number of return types!", F);
+    return;
+  }
+  
+  for (unsigned ArgNo = 0; ArgNo != NumRetVals; ++ArgNo) {
+    int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+    if (ST) Ty = ST->getElementType(ArgNo);
+    if (!PerformTypeCheck(ID, F, Ty, VT, ArgNo, Suffix))
+      break;
+  }
+
+  // Verify the parameter types.
+  for (unsigned ArgNo = 0; ArgNo != NumParams; ++ArgNo) {
+    int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+    if (VT == MVT::isVoid && ArgNo > 0) {
+      if (!FTy->isVarArg())
+        CheckFailed("Intrinsic prototype has no '...'!", F);
+      break;
+    }
+
+    if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT,
+                          ArgNo + NumRetVals, Suffix))
+      break;
+  }
+
+  va_end(VA);
+
+  // For intrinsics without pointer arguments, if we computed a Suffix then the
+  // intrinsic is overloaded and we need to make sure that the name of the
+  // function is correct. We add the suffix to the name of the intrinsic and
+  // compare against the given function name. If they are not the same, the
+  // function name is invalid. This ensures that overloading of intrinsics
+  // uses a sane and consistent naming convention.  Note that intrinsics with
+  // pointer argument may or may not be overloaded so we will check assuming it
+  // has a suffix and not.
+  if (!Suffix.empty()) {
+    std::string Name(Intrinsic::getName(ID));
+    if (Name + Suffix != F->getName()) {
+      CheckFailed("Overloaded intrinsic has incorrect suffix: '" +
+                  F->getName().substr(Name.length()) + "'. It should be '" +
+                  Suffix + "'", F);
+    }
+  }
+
+  // Check parameter attributes.
+  Assert1(F->getAttributes() == Intrinsic::getAttributes(ID),
+          "Intrinsic has wrong parameter attributes!", F);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
+  return new Verifier(action);
+}
+
+
+/// verifyFunction - Check a function for errors, printing messages on stderr.
+/// Return true if the function is corrupt.
+///
+bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot verify external functions");
+
+  FunctionPassManager FPM(F.getParent());
+  Verifier *V = new Verifier(action);
+  FPM.add(V);
+  FPM.run(F);
+  return V->Broken;
+}
+
+/// verifyModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
+                        std::string *ErrorInfo) {
+  PassManager PM;
+  Verifier *V = new Verifier(action);
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+
+  if (ErrorInfo && V->Broken)
+    *ErrorInfo = V->MessagesStr.str();
+  return V->Broken;
+}